pax_global_header00006660000000000000000000000064142216414770014522gustar00rootroot0000000000000052 comment=787c74ce7ada3dc7a92592c6259cd38d91e3822b compute-runtime-22.14.22890/000077500000000000000000000000001422164147700153525ustar00rootroot00000000000000compute-runtime-22.14.22890/.branch000066400000000000000000000000001422164147700165760ustar00rootroot00000000000000compute-runtime-22.14.22890/.clang-format000066400000000000000000000062251422164147700177320ustar00rootroot00000000000000--- Language: Cpp # BasedOnStyle: LLVM AccessModifierOffset: -2 AlignAfterOpenBracket: Align AlignConsecutiveAssignments: false AlignConsecutiveDeclarations: false AlignEscapedNewlinesLeft: false AlignOperands: true AlignTrailingComments: true AllowAllParametersOfDeclarationOnNextLine: true AllowShortBlocksOnASingleLine: false AllowShortCaseLabelsOnASingleLine: false AllowShortFunctionsOnASingleLine: All AllowShortIfStatementsOnASingleLine: false AllowShortLoopsOnASingleLine: false AlwaysBreakAfterDefinitionReturnType: None AlwaysBreakAfterReturnType: None AlwaysBreakBeforeMultilineStrings: false AlwaysBreakTemplateDeclarations: false BinPackArguments: true BinPackParameters: true BraceWrapping: AfterClass: false AfterControlStatement: false AfterEnum: false AfterFunction: false AfterNamespace: false AfterObjCDeclaration: false AfterStruct: false AfterUnion: false BeforeCatch: false BeforeElse: false IndentBraces: false BreakBeforeBinaryOperators: false BreakBeforeBraces: Attach BreakBeforeTernaryOperators: true BreakConstructorInitializersBeforeComma: false # clang-format > v3.8.0: BreakAfterJavaFieldAnnotations: false # clang-format > v3.8.0: BreakStringLiterals: true ColumnLimit: 0 CommentPragmas: '^ IWYU pragma:' ConstructorInitializerAllOnOneLineOrOnePerLine: false ConstructorInitializerIndentWidth: 4 ContinuationIndentWidth: 4 Cpp11BracedListStyle: true DerivePointerAlignment: false DisableFormat: false ExperimentalAutoDetectBinPacking: false ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] IncludeBlocks: Regroup IncludeCategories: - Regex: '^.(shared)/' Priority: 1 - Regex: '^.(documentation|manifests|offline_compiler|opencl|scripts)/' Priority: 2 - Regex: '^.(level_zero)/' Priority: 3 - Regex: '(d3d9types|d3d10_1)\.h' Priority: 4 - Regex: '(gfxEscape|windows)\.h' Priority: 5 - Regex: '^.(third_party|llvm|llvm-c|clang|clang-c|gtest|isl|json)/' Priority: 5 - Regex: '^.(gmock|cl|gl|d3d)' Priority: 5 - Regex: '^<.*>$' Priority: 10 - Regex: '.*' Priority: 5 IndentCaseLabels: false IndentWidth: 4 IndentWrappedFunctionNames: false KeepEmptyLinesAtTheStartOfBlocks: true MacroBlockBegin: '' MacroBlockEnd: '' MaxEmptyLinesToKeep: 1 NamespaceIndentation: None ObjCBlockIndentWidth: 2 ObjCSpaceAfterProperty: false ObjCSpaceBeforeProtocolList: true PenaltyBreakBeforeFirstCallParameter: 19 PenaltyBreakComment: 300 PenaltyBreakFirstLessLess: 120 PenaltyBreakString: 1000 PenaltyExcessCharacter: 1000000 PenaltyReturnTypeOnItsOwnLine: 60 PointerAlignment: Right ReflowComments: true SortIncludes: true SpaceAfterCStyleCast: false SpaceBeforeAssignmentOperators: true SpaceBeforeParens: ControlStatements SpaceInEmptyParentheses: false SpacesBeforeTrailingComments: 1 SpacesInAngles: false SpacesInContainerLiterals: true SpacesInCStyleCastParentheses: false SpacesInParentheses: false SpacesInSquareBrackets: false Standard: Cpp11 TabWidth: 8 UseTab: Never ... compute-runtime-22.14.22890/.clang-tidy000066400000000000000000000034531422164147700174130ustar00rootroot00000000000000--- Checks: 'clang-diagnostic-*,clang-analyzer-*,google-default-arguments,readability-identifier-naming,modernize-use-override,modernize-use-default-member-init,-clang-analyzer-alpha*' # WarningsAsErrors: '.*' HeaderFilterRegex: '^((?!^third_party\/).+)\.(h|hpp|inl)$' AnalyzeTemporaryDtors: false CheckOptions: - key: google-readability-braces-around-statements.ShortStatementLines value: '1' - key: google-readability-function-size.StatementThreshold value: '800' - key: google-readability-namespace-comments.ShortNamespaceLines value: '10' - key: google-readability-namespace-comments.SpacesBeforeComments value: '2' - key: readability-identifier-naming.MethodCase value: camelBack - key: readability-identifier-naming.ParameterCase value: camelBack - key: readability-identifier-naming.StructMemberCase value: camelBack - key: readability-identifier-naming.ClassMemberCase value: camelBack - key: readability-identifier-naming.ClassMethodCase value: camelBack - key: modernize-loop-convert.MaxCopySize value: '16' - key: modernize-loop-convert.MinConfidence value: reasonable - key: modernize-loop-convert.NamingStyle value: CamelCase - key: modernize-pass-by-value.IncludeStyle value: llvm - key: modernize-replace-auto-ptr.IncludeStyle value: llvm - key: modernize-use-nullptr.NullMacros value: 'NULL' - key: modernize-use-default-member-init.UseAssignment value: '1' ... compute-runtime-22.14.22890/.ctags000066400000000000000000000000461422164147700164540ustar00rootroot00000000000000-R -h .inl.h --langmap=c++:.inl.cpp.h compute-runtime-22.14.22890/.gitattributes000066400000000000000000000000551422164147700202450ustar00rootroot00000000000000manifests/manifest.yml filter=repo_converter compute-runtime-22.14.22890/.github/000077500000000000000000000000001422164147700167125ustar00rootroot00000000000000compute-runtime-22.14.22890/.github/actions/000077500000000000000000000000001422164147700203525ustar00rootroot00000000000000compute-runtime-22.14.22890/.github/actions/neo-lint/000077500000000000000000000000001422164147700220775ustar00rootroot00000000000000compute-runtime-22.14.22890/.github/actions/neo-lint/Dockerfile000066400000000000000000000005231422164147700240710ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # FROM docker.io/ubuntu:20.04 RUN apt-get -y update ; apt-get install -y --no-install-recommends gpg software-properties-common RUN apt-get -y update ; apt-get install -y --no-install-recommends clang-format-11 git COPY lint.sh /lint.sh ENTRYPOINT ["/lint.sh"] compute-runtime-22.14.22890/.github/actions/neo-lint/action.yml000066400000000000000000000005301422164147700240750ustar00rootroot00000000000000# action.yml # # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # name: 'Neo lint' description: 'check coding style' inputs: path: description: 'Relative path under $GITHUB_WORKSPACE to place the repository' iregex: description: 'iregex to pass to clang-format' runs: using: 'docker' image: 'Dockerfile' compute-runtime-22.14.22890/.github/actions/neo-lint/lint.sh000077500000000000000000000007661422164147700234150ustar00rootroot00000000000000#!/bin/bash # # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # set -e clang-format-11 --version INPUT_PATH="${INPUT_PATH:-.}" INPUT_IREGEX="${INPUT_IREGEX:-.*\.(cpp|h|inl)}" ( cd ${INPUT_PATH} git fetch origin ${GITHUB_BASE_REF} set -x git diff -U0 --no-color origin/master..HEAD | clang-format-diff-11 -p1 -i -v -iregex ${INPUT_IREGEX} set +x ) if [ -n "$(git -C ${INPUT_PATH} status --porcelain)" ]; then git -C ${INPUT_PATH} diff exit 1 fi compute-runtime-22.14.22890/.github/neo-lint.yml000066400000000000000000000000211422164147700211530ustar00rootroot00000000000000use-project: neo compute-runtime-22.14.22890/.github/pull-request.yml000066400000000000000000000000421422164147700220730ustar00rootroot00000000000000defaultbranch: master draft: true compute-runtime-22.14.22890/.github/workflows/000077500000000000000000000000001422164147700207475ustar00rootroot00000000000000compute-runtime-22.14.22890/.github/workflows/verify.yml000066400000000000000000000004741422164147700230030ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # name: verify on: - pull_request jobs: lint: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 with: path: neo - name: lint uses: ./neo/.github/actions/neo-lint with: path: neo compute-runtime-22.14.22890/.gitignore000066400000000000000000000000761422164147700173450ustar00rootroot00000000000000.vs/* CMakeSettings.json build/* build_linux/* /debian/ out/* compute-runtime-22.14.22890/.lint-revision000066400000000000000000000000071422164147700201520ustar00rootroot000000000000001.0.16 compute-runtime-22.14.22890/BUILD.md000066400000000000000000000026331422164147700165370ustar00rootroot00000000000000 # Building NEO driver Instructions have been tested on Ubuntu* and CentOS*. They assume a clean installation of a stable version. 1. Download & install required packages Example (Ubuntu): ```shell sudo apt-get install cmake g++ git pkg-config ``` Example (CentOS): ```shell sudo dnf install gcc-c++ cmake git make ``` 2. Install required dependencies Neo requires: - [Intel(R) Graphics Compiler for OpenCL(TM)](https://github.com/intel/intel-graphics-compiler) - [Intel(R) Graphics Memory Management](https://github.com/intel/gmmlib) Please visit their repositories for building and instalation instructions. Use versions compatible with selected [Neo release](https://github.com/intel/compute-runtime/releases). 3. Create workspace folder and download sources: Example: ```shell mkdir workspace cd workspace git clone https://github.com/intel/compute-runtime neo ``` 4. Create folder for build: Example: ```shell mkdir build ``` 5. (Optional) Enabling additional extensions * [cl_intel_va_api_media_sharing](https://github.com/intel/compute-runtime/blob/master/opencl/doc/cl_intel_va_api_media_sharing.md) 6. Build and install Example: ```shell cd build cmake -DCMAKE_BUILD_TYPE=Release -DNEO_SKIP_UNIT_TESTS=1 ../neo make -j`nproc` sudo make install ``` ___(*) Other names and brands may be claimed as property of others.___ compute-runtime-22.14.22890/CMakeLists.txt000066400000000000000000000732141422164147700201210ustar00rootroot00000000000000# # Copyright (C) 2018-2022 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) cmake_minimum_required(VERSION 3.13.0 FATAL_ERROR) else() cmake_minimum_required(VERSION 3.2.0 FATAL_ERROR) endif() include(ExternalProject) # Set the runtime source directory if(NOT DEFINED NEO_SOURCE_DIR) set(NEO_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) endif() if(MSVC) set(CMAKE_GENERATOR_TOOLSET "host=x64") endif() if(NOT PROJECT_NAME) set(PROJECT_NAME igdrcl) endif() project(${PROJECT_NAME}) file(READ ".branch" BRANCH_TYPE) string(STRIP "${BRANCH_TYPE}" BRANCH_TYPE) set(BRANCH_DIR_SUFFIX "/${BRANCH_TYPE}") list(APPEND BRANCH_DIR_LIST "/") if(NOT "${BRANCH_TYPE}" STREQUAL "") list(APPEND BRANCH_DIR_LIST "/${BRANCH_TYPE}/") set(BRANCH_DIR_SUFFIX "${BRANCH_DIR_SUFFIX}/") endif() message(STATUS "branch dir list: ${BRANCH_DIR_LIST}") if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64") set(NEO_TARGET_PROCESSOR "x86_64") elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64") set(NEO_TARGET_PROCESSOR "x86_64") elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64") set(NEO_TARGET_PROCESSOR "aarch64") if(NOT ${CMAKE_HOST_SYSTEM_PROCESSOR} STREQUAL ${CMAKE_SYSTEM_PROCESSOR}) set(NEO_DISABLE_LD_LLD TRUE) set(NEO_DISABLE_LD_GOLD TRUE) endif() include_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_party/sse2neon) endif() message(STATUS "Host processor: ${CMAKE_HOST_SYSTEM_PROCESSOR}") message(STATUS "Target processor: ${CMAKE_SYSTEM_PROCESSOR}") message(STATUS "Neo target processor: ${NEO_TARGET_PROCESSOR}") if(NOT DEFINED NEO_TARGET_PROCESSOR) message(FATAL_ERROR "Unsupported target processor: ${CMAKE_SYSTEM_PROCESSOR}") endif() # Include custom configs include("${BRANCH_TYPE}/custom_config.cmake" OPTIONAL) # Include needed macros include(cmake/common_macros.cmake) if(CMAKE_CONFIGURATION_TYPES) # Set custom build types for multi-configuration generators set(CMAKE_CONFIGURATION_TYPES "Release;ReleaseInternal;Debug") if(DEFINED CMAKE_BUILD_TYPE) list(APPEND CMAKE_CONFIGURATION_TYPES ${CMAKE_BUILD_TYPE}) endif() list(REMOVE_DUPLICATES CMAKE_CONFIGURATION_TYPES) endif() include(GNUInstallDirs) # define global property where we will collect component list to package define_property(GLOBAL PROPERTY NEO_OCL_COMPONENTS_LIST BRIEF_DOCS "List of components" FULL_DOCS "List of components to create packages") set(GLOBAL PROPERTY NEO_OCL_COMPONENTS_LIST "") define_property(GLOBAL PROPERTY NEO_L0_COMPONENTS_LIST BRIEF_DOCS "List of components" FULL_DOCS "List of components to create packages") set(GLOBAL PROPERTY NEO_L0_COMPONENTS_LIST "") string(REPLACE " " ";" NEO_RUN_INTERCEPTOR_LIST "${NEO_RUN_INTERCEPTOR}") if(WIN32) set(DISABLE_WDDM_LINUX TRUE) endif() include(cmake/find_wdk.cmake) if(WIN32) set(DRIVER_MODEL wddm) else() if(DISABLE_WDDM_LINUX) set(DRIVER_MODEL drm) else() set(DRIVER_MODEL drm_or_wddm) endif() endif() message(STATUS "Driver model : ${DRIVER_MODEL}") if(TR_DEPRECATED) add_definitions(-D_SILENCE_TR1_NAMESPACE_DEPRECATION_WARNING=1) endif() if(RELEASE_WITH_REGKEYS) message(STATUS "Enabling RegKey reading in release build") add_definitions(-D_RELEASE_BUILD_WITH_REGKEYS) endif() if(DEFINED NEO_EXTRA_DEFINES) string(REPLACE "," ";" NEO_EXTRA_DEFINES "${NEO_EXTRA_DEFINES}") foreach(_tmp ${NEO_EXTRA_DEFINES}) message(STATUS "adding extra define: ${_tmp}") add_definitions(-D${_tmp}) endforeach() endif() include("${BRANCH_TYPE}/${BRANCH_TYPE}.cmake" OPTIONAL) if(NOT CMAKE_BUILD_TYPE) message(STATUS "CMAKE_BUILD_TYPE not specified, using Release") set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Build type: [Release, ReleaseInternal, Debug]" FORCE) endif() if(CMAKE_SIZEOF_VOID_P EQUAL 8) set(NEO_BITS "64") set(NEO_ARCH "x64") else() set(NEO_BITS "32") set(NEO_ARCH "x86") endif() if(NOT DEFINED NEO_BUILD_WITH_OCL) if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/opencl/CMakeLists.txt) set(NEO_BUILD_WITH_OCL TRUE) endif() endif() if(NOT DEFINED BUILD_WITH_L0) if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/level_zero/CMakeLists.txt) if("${NEO_BITS}" STREQUAL "64") set(BUILD_WITH_L0 TRUE) endif() endif() endif() if(NOT "${NEO_BITS}" STREQUAL "64") set(BUILD_WITH_L0 FALSE) endif() add_definitions( -DCL_USE_DEPRECATED_OPENCL_1_1_APIS -DCL_USE_DEPRECATED_OPENCL_1_2_APIS -DCL_USE_DEPRECATED_OPENCL_2_0_APIS -DCL_USE_DEPRECATED_OPENCL_2_1_APIS -DCL_USE_DEPRECATED_OPENCL_2_2_APIS ) if(WIN32 OR NOT DISABLE_WDDM_LINUX) add_definitions(-D_CRT_SECURE_NO_WARNINGS -DWDDM_VERSION_NUMBER=23 -DNOMINMAX) add_definitions(-DLEGACY_D3DAPI_VERSION=20) if(NOT WIN32) add_definitions(-DWDDM_LINUX=1) endif() endif() set(CMAKE_C_FLAGS_RELEASEINTERNAL "${CMAKE_C_FLAGS_RELEASE}") set(CMAKE_CXX_FLAGS_RELEASEINTERNAL "${CMAKE_CXX_FLAGS_RELEASE}") set(CMAKE_SHARED_LINKER_FLAGS_RELEASEINTERNAL "${CMAKE_SHARED_LINKER_FLAGS_RELEASE}") set(CMAKE_EXE_LINKER_FLAGS_RELEASEINTERNAL "${CMAKE_EXE_LINKER_FLAGS_RELEASE}") if(MSVC AND ENABLE_VS_FASTLINK) string(APPEND CMAKE_EXE_LINKER_FLAGS_DEBUG " /debug:FASTLINK") string(APPEND CMAKE_SHARED_LINKER_FLAGS_DEBUG " /debug:FASTLINK") string(APPEND CMAKE_STATIC_LINKER_FLAGS_DEBUG " /debug:FASTLINK") string(APPEND CMAKE_MODULE_LINKER_FLAGS_DEBUG " /debug:FASTLINK") endif() include(CheckCXXCompilerFlag) if(UNIX) find_program(__LD_GOLD_FOUND ld.gold) find_program(__LD_LLD_FOUND ld.lld) check_cxx_compiler_flag(-fuse-ld=lld COMPILER_SUPPORTS_LLD) check_cxx_compiler_flag(-fuse-ld=gold COMPILER_SUPPORTS_GOLD) if(NOT NEO_DISABLE_LD_LLD AND COMPILER_SUPPORTS_LLD AND __LD_LLD_FOUND) message(STATUS "Using linker: ld.lld") set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=lld") elseif(NOT NEO_DISABLE_LD_GOLD AND COMPILER_SUPPORTS_GOLD AND __LD_GOLD_FOUND) message(STATUS "Using linker: ld.gold") set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=gold") else() message(STATUS "Using linker: default") endif() if(NEO_BUILD_DEBUG_SYMBOLS_PACKAGE) set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -g") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -g") set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} -g") set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} -g") endif() set(CMAKE_C_FLAGS_RELEASEINTERNAL "${CMAKE_C_FLAGS_RELEASEINTERNAL} -g") set(CMAKE_CXX_FLAGS_RELEASEINTERNAL "${CMAKE_CXX_FLAGS_RELEASEINTERNAL} -g") set(CMAKE_SHARED_LINKER_FLAGS_RELEASEINTERNAL "${CMAKE_SHARED_LINKER_FLAGS_RELEASEINTERNAL} -g") set(CMAKE_EXE_LINKER_FLAGS_RELEASEINTERNAL "${CMAKE_EXE_LINKER_FLAGS_RELEASEINTERNAL} -g") endif() string(TOLOWER "${CMAKE_BUILD_TYPE}" BUILD_TYPE_lower) if("${BUILD_TYPE_lower}" STREQUAL "releaseinternal") add_definitions(-D_RELEASE_INTERNAL) endif("${BUILD_TYPE_lower}" STREQUAL "releaseinternal") message(STATUS "${CMAKE_BUILD_TYPE} build configuration") # Set the ocloc directory if(NOT DEFINED OCLOC_DIRECTORY) set(OCLOC_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/shared/offline_compiler) endif() # Set the shared source directory if(NOT DEFINED NEO_SHARED_DIRECTORY) set(NEO_SHARED_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/shared/source) endif() # Set the shared test directory if(NOT DEFINED NEO_SHARED_TEST_DIRECTORY) set(NEO_SHARED_TEST_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/shared/test) endif() # leave temporarily for compatibitlity if(DEFINED SKIP_UNIT_TESTS) set(NEO_SKIP_UNIT_TESTS ${SKIP_UNIT_TESTS}) endif() if(NEO_SKIP_UNIT_TESTS) set(NEO_SKIP_SHARED_UNIT_TESTS TRUE) set(NEO_SKIP_OCL_UNIT_TESTS TRUE) set(NEO_SKIP_L0_UNIT_TESTS TRUE) set(NEO_SKIP_L0_BLACK_BOX_TESTS TRUE) endif() # Set our build directory if(NOT DEFINED NEO_BUILD_DIR) set(NEO_BUILD_DIR ${CMAKE_BINARY_DIR}) endif() if(NOT NEO_BINARY_DIR) set(NEO_BINARY_DIR ${CMAKE_BINARY_DIR}) endif() # we use c++17 set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) # we force using response files set(CMAKE_NINJA_FORCE_RESPONSE_FILE 1) # set output paths set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${NEO_BINARY_DIR}/bin) set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${NEO_BINARY_DIR}/bin) set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${NEO_BINARY_DIR}/lib) # determine Neo version include(version.cmake) # set variables for symbols stripping if(UNIX AND NEO_BUILD_DEBUG_SYMBOLS_PACKAGE) set_property(GLOBAL PROPERTY DEBUG_SYMBOL_FILES "") set_property(GLOBAL PROPERTY IGDRCL_SYMBOL_FILE "") set_property(GLOBAL APPEND PROPERTY NEO_OCL_COMPONENTS_LIST "opencl-debuginfo") set(STRIP_SYMBOLS_TARGET strip_debug_symbols) set(DEBUG_SYMBOL_INSTALL_DIR "/usr/lib/debug${CMAKE_INSTALL_FULL_LIBDIR}") add_custom_target(${STRIP_SYMBOLS_TARGET} ALL) endif() # We want to build with the static, multithreaded runtime libraries (as opposed # to the multithreaded runtime DLLs) if(MSVC) string(REPLACE "/MDd" "/MTd" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}") string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}") string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_RELEASEINTERNAL "${CMAKE_CXX_FLAGS_RELEASEINTERNAL}") string(REPLACE "/GR" "/GR-" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") add_definitions(-D_HAS_STATIC_RTTI=0) add_definitions(-DPURGE_DEBUG_KEY_NAMES=1) else() add_definitions(-DPURGE_DEBUG_KEY_NAMES=0) endif() if(NOT NEO_SKIP_UNIT_TESTS) if(NOT DEFINED GTEST_REPEAT) set(GTEST_REPEAT 1) endif() message(STATUS "GTest repeat count set to ${GTEST_REPEAT}") if(NOT DEFINED GTEST_SHUFFLE) set(GTEST_SHUFFLE --gtest_shuffle --gtest_random_seed=0) endif() message(STATUS "GTest shuffle set to ${GTEST_SHUFFLE}") endif() if(NOT KMDAF_HEADERS_DIR) if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/../kmdaf/inc/common/kmDaf.h") get_filename_component(KMDAF_HEADERS_DIR "../kmdaf/inc/common/" ABSOLUTE) message(STATUS "KM-DAF headers dir: ${KMDAF_HEADERS_DIR}") endif() endif() if(NOT DEFINED SOURCE_LEVEL_DEBUGGER_HEADERS_DIR) get_filename_component(SOURCE_LEVEL_DEBUGGER_HEADERS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/third_party/source_level_debugger/" ABSOLUTE) message(STATUS "Source Level Debugger headers dir: ${SOURCE_LEVEL_DEBUGGER_HEADERS_DIR}") endif() get_filename_component(AUB_STREAM_HEADERS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/third_party${BRANCH_DIR_SUFFIX}aub_stream/headers" ABSOLUTE) if(IS_DIRECTORY ${AUB_STREAM_HEADERS_DIR}) message(STATUS "Aub Stream Headers dir: ${AUB_STREAM_HEADERS_DIR}") else() message(FATAL_ERROR "Aub Stream headers not available!") endif() if(TARGET aub_stream) set(AUB_STREAM_PROJECT_NAME "aub_stream") else() if(NOT DEFINED AUB_STREAM_DIR) get_filename_component(TEMP_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../aub_stream/aub_mem_dump" ABSOLUTE) if(IS_DIRECTORY ${TEMP_DIR}) get_filename_component(AUB_STREAM_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../aub_stream" ABSOLUTE) message(STATUS "Aub Stream dir: ${AUB_STREAM_DIR}") endif() endif() if(DISABLE_AUB_STREAM) unset(AUB_STREAM_DIR) endif() endif() if(NOT DEFINED KHRONOS_HEADERS_DIR) get_filename_component(DIR_tmp "${CMAKE_CURRENT_SOURCE_DIR}/third_party/opencl_headers" ABSOLUTE) if(IS_DIRECTORY ${DIR_tmp}) set(KHRONOS_HEADERS_DIR ${DIR_tmp}) add_definitions(-DCL_TARGET_OPENCL_VERSION=300) else() message(FATAL_ERROR "Khronos OpenCL headers not available!") endif() endif() message(STATUS "Khronos OpenCL headers dir: ${KHRONOS_HEADERS_DIR}") set(OCL_HEADERS_DIR ${KHRONOS_HEADERS_DIR}) if(NOT DEFINED KHRONOS_GL_HEADERS_DIR) get_filename_component(GL_DIR "${CMAKE_CURRENT_SOURCE_DIR}/third_party/opengl_headers" ABSOLUTE) if(IS_DIRECTORY ${GL_DIR}) set(KHRONOS_GL_HEADERS_DIR ${GL_DIR}) add_definitions(-DGL_TARGET_OPENGL_VERSION=210) endif() endif() message(STATUS "Khronos OpenGL headers dir: ${KHRONOS_GL_HEADERS_DIR}") if(NOT THIRD_PARTY_DIR) get_filename_component(THIRD_PARTY_DIR "../third_party/" ABSOLUTE) endif() message(STATUS "Third party dir: ${THIRD_PARTY_DIR}") # Intel Graphics Compiler detection include(cmake/find_igc.cmake) # GmmLib detection include(cmake/find_gmmlib.cmake) # Metrics detection include(cmake/find_metrics.cmake) # LibVA detection if(NOT DISABLE_LIBVA) if(UNIX) set(NEO__LIBVA_IS_REQUIRED "") if(IGDRCL_FORCE_USE_LIBVA) set(NEO__LIBVA_IS_REQUIRED "REQUIRED") endif() if(DEFINED LIBVA_SOURCE_DIR) get_filename_component(LIBVA_SOURCE_DIR "${LIBVA_SOURCE_DIR}" ABSOLUTE) else() get_filename_component(LIBVA_SOURCE_DIR_tmp "${NEO_SOURCE_DIR}/../libva" ABSOLUTE) if(IS_DIRECTORY "${LIBVA_SOURCE_DIR_tmp}") set(LIBVA_SOURCE_DIR "${LIBVA_SOURCE_DIR_tmp}") endif() endif() find_package(PkgConfig) if(DEFINED LIBVA_SOURCE_DIR AND IS_DIRECTORY "${LIBVA_SOURCE_DIR}/lib/pkgconfig/") set(OLD_PKG_CONFIG_PATH $ENV{PKG_CONFIG_PATH}) set(ENV{PKG_CONFIG_PATH} "${LIBVA_SOURCE_DIR}/lib/pkgconfig/") endif() pkg_check_modules(NEO__LIBVA ${NEO__LIBVA_IS_REQUIRED} libva>=1.0.0) include(CheckLibraryExists) if(DEFINED LIBVA_SOURCE_DIR AND IS_DIRECTORY "${LIBVA_SOURCE_DIR}/lib/pkgconfig/") set(ENV{PKG_CONFIG_PATH} ${OLD_PKG_CONFIG_PATH}) endif() if(NEO__LIBVA_FOUND) CHECK_LIBRARY_EXISTS(va vaGetLibFunc ${NEO__LIBVA_LIBDIR} HAVE_VAGETLIBFUNC) add_definitions(-DLIBVA) message(STATUS "Using libva ") if(DEFINED LIBVA_SOURCE_DIR AND IS_DIRECTORY "${LIBVA_SOURCE_DIR}/lib/pkgconfig/") string(REPLACE "${NEO__LIBVA_INCLUDEDIR}" "${LIBVA_SOURCE_DIR}/include" NEO__LIBVA_INCLUDE_DIRS "${NEO__LIBVA_INCLUDE_DIRS}") string(REPLACE "${NEO__LIBVA_LIBDIR}" "${LIBVA_SOURCE_DIR}/lib" NEO__LIBVA_LIBDIR "${NEO__LIBVA_LIBDIR}") set(NEO__LIBVA_LIBRARY_PATH "${NEO__LIBVA_LIBDIR}") endif() list(LENGTH NEO__LIBVA_INCLUDE_DIRS __len) if(__len GREATER 0) set(NEO__LIBVA_INCLUDE_DIR ${NEO__LIBVA_INCLUDE_DIRS}) include_directories("${NEO__LIBVA_INCLUDE_DIR}") message(STATUS "LibVA include dirs: ${NEO__LIBVA_INCLUDE_DIR}") endif() endif() endif() endif() set(SLD_LIBRARY_NAME "igfxdbgxchg") if(NEO_BITS STREQUAL "64") set(SLD_LIBRARY_NAME "${SLD_LIBRARY_NAME}${NEO_BITS}") endif() add_subdirectory_unique(third_party/gtest) message(STATUS "AUB_STREAM_DIR = ${AUB_STREAM_DIR}") if(DEFINED AUB_STREAM_DIR) set(AUB_STREAM_PROJECT_NAME "aub_stream") add_subdirectory_unique(${AUB_STREAM_DIR} ${CMAKE_BINARY_DIR}/aub_stream EXCLUDE_FROM_ALL) set(ENGINE_NODE_DIR ${AUB_STREAM_DIR}${BRANCH_DIR_SUFFIX}headers) else() set(ENGINE_NODE_DIR ${AUB_STREAM_HEADERS_DIR}) endif() message(STATUS "Engine node dir: ${ENGINE_NODE_DIR}") # We want to organize our IDE targets into folders set_property(GLOBAL PROPERTY USE_FOLDERS ON) # Get available platfroms include(platforms.cmake) get_filename_component(I915_INCLUDES_DIR "${NEO_SOURCE_DIR}/third_party${BRANCH_DIR_SUFFIX}uapi" ABSOLUTE) message(STATUS "i915 includes dir: ${I915_INCLUDES_DIR}") # Enable/Disable BuiltIns compilation during build set(COMPILE_BUILT_INS TRUE CACHE BOOL "Enable built-in kernels compilation") if(NOT DEFINED NEO_KERNELS_BIN_DIR) get_filename_component(NEO_KERNELS_BIN_DIR "${NEO_SOURCE_DIR}/../kernels_bin/" ABSOLUTE) endif() # Changing the default executable and library output directories set(IGDRCL_OUTPUT_DIR "${IGDRCL_OPTION__OUTPUT_DIR}") # do not add rpath set(CMAKE_SKIP_RPATH YES CACHE BOOL "" FORCE) set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -D_DEBUG") option(APPVERIFIER_ALLOWED "allow use of AppVerifier" OFF) option(CCACHE_ALLOWED "allow use of ccache" TRUE) find_program(CCACHE_EXE_FOUND ccache) if(CCACHE_EXE_FOUND AND CCACHE_ALLOWED) message(STATUS "Found ccache: ${CCACHE_EXE_FOUND}") if(NOT NEO_DONT_SET_CCACHE_BASEDIR AND NOT DEFINED ENV{CCACHE_BASEDIR}) get_filename_component(__tmp_dir "../infra" ABSOLUTE) if(IS_DIRECTORY ${__tmp_dir}) get_filename_component(NEO_CCACHE_BASEDIR ".." ABSOLUTE) list(APPEND NEO_RULE_LAUNCH_LIST "CCACHE_BASEDIR=${NEO_CCACHE_BASEDIR}") endif() endif() list(APPEND NEO_RULE_LAUNCH_LIST "ccache") string(REPLACE ";" " " NEO_RULE_LAUNCH_STR "${NEO_RULE_LAUNCH_LIST}") message(STATUS "Using rule: ${NEO_RULE_LAUNCH_STR}") set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${NEO_RULE_LAUNCH_STR}) if("${CMAKE_GENERATOR}" STREQUAL "Ninja") set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ${NEO_RULE_LAUNCH_STR}) else() set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache) endif() endif() # Miscs options option(IGDRCL_GCOV "generate gcov report" OFF) option(USE_CL_CACHE "Use OpenCL program binary cache" ON) set(CL_CACHE_LOCATION "cl_cache" CACHE STRING "OpenCL program binary cache location") option(NEO_SHOW_VERBOSE_ULT_RESULTS "Use the default/verbose test output" OFF) if(NOT NEO_SHOW_VERBOSE_ULT_RESULTS) set(NEO_TESTS_LISTENER_OPTION "--disable_default_listener") else() set(NEO_TESTS_LISTENER_OPTION "--enable_default_listener") endif() # Put profiling enable flag into define if(KMD_PROFILING) add_definitions(-DKMD_PROFILING=${KMD_PROFILING}) endif() if(MSVC) # Force to treat warnings as errors if(NOT CMAKE_CXX_FLAGS MATCHES "/WX") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /WX") if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /we4189") endif() endif() # Disable generating manifest set(LINKER_FLAGS "/MANIFEST:NO") # Disable COMDAT folding optimization set(LINKER_FLAGS "${LINKER_FLAGS} /OPT:NOICF") # Use legacy float rounding set(LINKER_FLAGS "${LINKER_FLAGS} legacy_stdio_float_rounding.obj") set(LINKER_FLAGS "${LINKER_FLAGS} /CETCOMPAT") # Support for WUD set(CMAKE_CXX_STANDARD_LIBRARIES "onecore.lib") foreach(IT kernel32.lib;user32.lib;gdi32.lib;advapi32.lib;ole32.lib;) set(LINKER_FLAGS "${LINKER_FLAGS} /NODEFAULTLIB:${IT}") endforeach() set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${LINKER_FLAGS}") set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} ${LINKER_FLAGS}") set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${LINKER_FLAGS}") add_definitions(-DUNICODE -D_UNICODE) else() if(IGDRCL_GCOV) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage --coverage") endif() option(USE_ASAN "Link with address sanitization support" OFF) if(USE_ASAN) if(CMAKE_COMPILER_IS_GNUCC) set(ASAN_FLAGS " -fsanitize=address -fno-omit-frame-pointer -DSANITIZER_BUILD") set(ASAN_LIBS "asan") set(GTEST_ENV ${GTEST_ENV} LSAN_OPTIONS=suppressions=${CMAKE_CURRENT_SOURCE_DIR}/opencl/test/unit_test/lsan_suppressions.txt) else() message(STATUS "Address sanitization with clang not yet support") endif() endif() if(USE_TSAN) if(CMAKE_COMPILER_IS_GNUCC AND USE_ASAN) message(STATUS "Cannot use thread sanitization with address sanitization in gcc") else() set(TSAN_FLAGS " -fsanitize=thread -DSANITIZER_BUILD") set(TSAN_LIBS "tsan") endif() endif() endif() # setup variables needed for custom configuration type # generate PDB files even for release build on MSVC if(MSVC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MP") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Zi") set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF") set(CMAKE_CXX_FLAGS_RELEASEINTERNAL "${CMAKE_CXX_FLAGS_RELEASEINTERNAL} /Zi") set(CMAKE_SHARED_LINKER_FLAGS_RELEASEINTERNAL "${CMAKE_SHARED_LINKER_FLAGS_RELEASEINTERNAL} /DEBUG /OPT:REF") if(NO_PDB) string(REGEX REPLACE "/Zi" "" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}") string(REGEX REPLACE "/Zi" "" CMAKE_CXX_FLAGS_RELEASEINTERNAL "${CMAKE_CXX_FLAGS_RELEASEINTERNAL}") string(REGEX REPLACE "/Zi" "/Z7" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}") endif() endif() # spectre mitigation if(MSVC) check_cxx_compiler_flag(/Qspectre COMPILER_SUPPORTS_QSPECTRE) check_cxx_compiler_flag(/d2guardspecload COMPILER_SUPPORTS_D2GUARDSPECLOAD) if(COMPILER_SUPPORTS_QSPECTRE) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Qspectre") elseif(COMPILER_SUPPORTS_D2GUARDSPECLOAD) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /d2guardspecload") else() message(WARNING "Spectre mitigation is not supported by the compiler") endif() else() if(NOT NEO_DISABLE_MITIGATIONS) if(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang") check_cxx_compiler_flag(-mretpoline COMPILER_SUPPORTS_RETPOLINE) if(COMPILER_SUPPORTS_RETPOLINE) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mretpoline") else() message(WARNING "Spectre mitigation -mretpoline flag is not supported by the compiler") endif() else() check_cxx_compiler_flag(-mindirect-branch=thunk COMPILER_SUPPORTS_INDIRECT_BRANCH_THUNK) if(COMPILER_SUPPORTS_INDIRECT_BRANCH_THUNK) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mindirect-branch=thunk") else() message(WARNING "Spectre mitigation -mindirect-branch=thunk flag is not supported by the compiler") endif() check_cxx_compiler_flag(-mfunction-return=thunk COMPILER_SUPPORTS_FUNCTION_RETURN_THUNK) if(COMPILER_SUPPORTS_FUNCTION_RETURN_THUNK) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfunction-return=thunk") else() message(WARNING "Spectre mitigation -mfunction-return=thunk flag is not supported by the compiler") endif() check_cxx_compiler_flag(-mindirect-branch-register COMPILER_SUPPORTS_INDIRECT_BRANCH_REGISTER) if(COMPILER_SUPPORTS_INDIRECT_BRANCH_REGISTER) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mindirect-branch-register") else() message(WARNING "Spectre mitigation -mindirect-branch-register flag is not supported by the compiler") endif() endif() else() message(WARNING "Spectre mitigation DISABLED") endif() check_cxx_compiler_flag(-msse4.2 COMPILER_SUPPORTS_SSE42) check_cxx_compiler_flag(-mavx2 COMPILER_SUPPORTS_AVX2) check_cxx_compiler_flag(-march=armv8-a+simd COMPILER_SUPPORTS_NEON) endif() if(NOT MSVC) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ftemplate-depth=1024") endif() # Compiler warning flags if(NOT MSVC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wempty-body") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wignored-qualifiers") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wtype-limits") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wuninitialized") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wextra -Wno-unused-parameter -Wno-missing-field-initializers") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Winvalid-pch") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unknown-pragmas") if("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang") # clang only set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wshorten-64-to-32 -Wno-extern-c-compat") if(NOT (CMAKE_C_COMPILER_VERSION VERSION_LESS 3.6)) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-local-typedefs -DSANITIZER_BUILD") endif() if(NOT (CMAKE_C_COMPILER_VERSION VERSION_LESS 4.0)) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated-register") # Added for htons() endif() if(CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL 10.0) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated-copy") endif() else() # gcc only set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-local-typedefs -Wno-unused-but-set-variable") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wclobbered") if(CMAKE_C_COMPILER_VERSION VERSION_LESS 7.0) else() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wimplicit-fallthrough=4") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-noexcept-type") # Added for gtest endif() endif() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror") if(USE_SANITIZE_UB) check_cxx_compiler_flag(-fsanitize=undefined COMPILER_SUPPORTS_UNDEFINED_BEHAVIOR_SANITIZER) if(COMPILER_SUPPORTS_UNDEFINED_BEHAVIOR_SANITIZER) message(STATUS "Enabling undefined behavior sanitizer") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined -fsanitize-recover=vptr -fno-rtti -DSANITIZER_BUILD") if(NOT SANITIZE_UB_ALLOW_CONTINUE) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-sanitize-recover=undefined") endif() endif() endif() endif() # Compile code with defenses enabled (settings to be used for production release code) if("${CMAKE_BUILD_TYPE}" STREQUAL "Release") if(MSVC) set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /GS") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /sdl") set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /NXCompat") set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /DynamicBase") if("${NEO_ARCH}" STREQUAL "x86") set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /SafeSEH") endif() else() if(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstack-protector-strong") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2 -D_FORTIFY_SOURCE=2") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wformat -Wformat-security") else() # gcc, g++ only if(CMAKE_C_COMPILER_VERSION VERSION_LESS 4.9) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstack-protector") else() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstack-protector-strong") endif() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2 -D_FORTIFY_SOURCE=2") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wformat -Wformat-security") set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} -Wl,-z,noexecstack") set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} -Wl,-z,relro") set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} -Wl,-z,now") endif() endif() endif() # Project-wide include paths # Please keep alphabetical order include_directories(${NEO_BUILD_DIR}) include_directories(${NEO_SOURCE_DIR}) include_directories(${NEO_SHARED_DIRECTORY}/aub_mem_dump/definitions${BRANCH_DIR_SUFFIX}) include_directories(${NEO_SHARED_DIRECTORY}/built_ins/builtinops${BRANCH_DIR_SUFFIX}) include_directories(${NEO_SHARED_DIRECTORY}/command_container/definitions${BRANCH_DIR_SUFFIX}) include_directories(${NEO_SHARED_DIRECTORY}/command_stream/definitions${BRANCH_DIR_SUFFIX}) include_directories(${NEO_SHARED_DIRECTORY}/compiler_interface/compiler_options${BRANCH_DIR_SUFFIX}) include_directories(${NEO_SHARED_DIRECTORY}/debug_settings/definitions${BRANCH_DIR_SUFFIX}) include_directories(${NEO_SHARED_DIRECTORY}/dll/devices${BRANCH_DIR_SUFFIX}) include_directories(${NEO_SHARED_DIRECTORY}/gen_common${BRANCH_DIR_SUFFIX}) if(WIN32) include_directories(${NEO_SHARED_DIRECTORY}/gmm_helper/windows/gmm_memory${BRANCH_DIR_SUFFIX}) else() include_directories(${NEO_SHARED_DIRECTORY}/gmm_helper/windows/gmm_memory) endif() include_directories(${NEO_SHARED_DIRECTORY}/helpers/definitions${BRANCH_DIR_SUFFIX}) include_directories(${NEO_SHARED_DIRECTORY}/memory_properties${BRANCH_DIR_SUFFIX}) include_directories(${NEO_SHARED_DIRECTORY}/sku_info/definitions${BRANCH_DIR_SUFFIX}) include_directories(${NEO_SOURCE_DIR}/opencl/source/command_queue/definitions${BRANCH_DIR_SUFFIX}) include_directories(${NEO_SOURCE_DIR}/opencl/source/mem_obj/definitions${BRANCH_DIR_SUFFIX}) include_directories(${NEO_SOURCE_DIR}/opencl/source/memory_manager/definitions${BRANCH_DIR_SUFFIX}) if(MSVC) include_directories(${NEO_SOURCE_DIR}/opencl/source/sharings/gl/windows/include) endif() macro(macro_for_each_core_type) foreach(PLATFORM_DIRECTORY ${NEO_SHARED_DIRECTORY}/${CORE_TYPE_LOWER}/definitions${BRANCH_DIR_SUFFIX} ${NEO_SOURCE_DIR}/opencl/source/${CORE_TYPE_LOWER}/definitions${BRANCH_DIR_SUFFIX}) if(EXISTS ${PLATFORM_DIRECTORY}) include_directories(${PLATFORM_DIRECTORY}) endif() endforeach() endmacro() apply_macro_for_each_core_type("SUPPORTED") # Define where to put binaries set(TargetDir ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) if(MSVC) if(NOT "${CMAKE_GENERATOR}" STREQUAL "Ninja") set(TargetDir ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${CMAKE_BUILD_TYPE}) endif() endif() add_subdirectory_unique(${OCLOC_DIRECTORY} ${NEO_BUILD_DIR}/offline_compiler) target_compile_definitions(ocloc_lib PRIVATE MOCKABLE_VIRTUAL=) include(cmake/ocloc_cmd_prefix.cmake) if(DONT_CARE_OF_VIRTUALS) set(NEO_SHARED_RELEASE_LIB_NAME "neo_shared") set(NEO_SHARED_MOCKABLE_LIB_NAME ${NEO_SHARED_RELEASE_LIB_NAME}) else() set(NEO_SHARED_RELEASE_LIB_NAME "neo_shared") if(NOT NEO_SKIP_UNIT_TESTS) set(NEO_SHARED_MOCKABLE_LIB_NAME "neo_shared_mockable") endif() endif() set(BIKSIM_LIB_NAME "biksim") set(BUILTINS_SOURCES_LIB_NAME "builtins_sources") set(BUILTINS_BINARIES_BINDFUL_LIB_NAME "builtins_binaries_bindful") set(BUILTINS_BINARIES_BINDLESS_LIB_NAME "builtins_binaries_bindless") set(BUILTINS_SPIRV_LIB_NAME "builtins_spirv") set(BUILTINS_VME_LIB_NAME "builtins_vme") if(WIN32) set(NEO_EXTRA_LIBS Ws2_32) else() set(NEO_EXTRA_LIBS dl pthread rt) endif() if(UNIX) # prelim headers detection if(NOT ("${BRANCH_TYPE}" STREQUAL "")) set(NEO_ENABLE_i915_PRELIM_DETECTION TRUE) elseif(NOT DEFINED NEO_ENABLE_i915_PRELIM_DETECTION) set(NEO_ENABLE_i915_PRELIM_DETECTION FALSE) endif() message(STATUS "i915 prelim headers detection: ${NEO_ENABLE_i915_PRELIM_DETECTION}") endif() add_subdirectory_unique(shared) if(NEO_BUILD_WITH_OCL) add_subdirectory_unique(opencl) else() hide_subdir(opencl) set(NEO_SKIP_OCL_UNIT_TESTS TRUE) endif() set(DONT_LINK_ELF_STATICALLY TRUE) if(EXISTS ${NEO_SOURCE_DIR}/../internal) add_subdirectory_unique(${NEO_SOURCE_DIR}/../internal ${NEO_BUILD_DIR}/internal) endif() set(NEO_SCRIPT_PROJECTS_FOLDER "neo scripts") configure_file(config.h.in ${NEO_BUILD_DIR}/config.h) configure_file(driver_version.h.in ${NEO_BUILD_DIR}/driver_version.h) # Put Driver version into define configure_file(lib_names.h.in ${NEO_BUILD_DIR}/lib_names.h) if(BUILD_WITH_L0) add_subdirectory_unique(level_zero) endif() add_subdirectory_unique(target_unit_tests) add_subdirectory_unique(target_aub_tests) add_subdirectories() include(package.cmake) compute-runtime-22.14.22890/CONTRIBUTING.md000066400000000000000000000062061422164147700176070ustar00rootroot00000000000000 # Contribution guidelines ## Process overview ### 1. Patch creation Start with a patch (we prefer smaller self-contained incremental changes vs. large blobs of code). When adding new code, please also add corresponding unit level tests (ULT). Added ULTs should cover all the decision points introduced by the commit and should fail if executed without the code changes. Make sure it builds and passes _all_ ULT tests. For details about what compilers and build configurations we expect, refer to instructions for [building](https://github.com/intel/compute-runtime/blob/master/BUILD.md) the driver. Make sure you adhere to our [coding standard](https://github.com/intel/compute-runtime/blob/master/GUIDELINES.md); this will be verified by clang-format and clang-tidy (tool configuration is already included in NEO repository). ### 2. Certificate of origin In order to get a clear contribution chain of trust we use the [signed-off-by language](https://01.org/community/signed-process) used by the Linux kernel project. Please make sure your commit message adheres to this guideline. ### 3. Patch submission Create a pull request on github once you are confident that your changes are complete and fulfill the requirements above. Make sure your commit message follows these rules: * each line has 80 character limit * title (first line) should be self-contained (i.e. make sense without looking at the body) * additional description can be provided in the body * title and body need to be separated by an empty line ### 4. Initial (cursory) review One of NEO maintainers will do an initial (brief) review of your code. We will let you know if anything major is missing. ### 5. Verification We'll double-check that your code meets all of our minimal quality expectations. Every commit must: * Build under Linux - using clang (8.0) and gcc (7.x ... 9.0) * Build under Windows (this is currently a requirement that cannot be verified externally) * Pass ULTs for all supported platforms * Pass clang-format check with the configuration contained within repository * Pass clang-tidy check with the configuration contained within repository * Pass sanity testing (test content recommendation for the external community will be provided in the future) When all the automated checks are confirmed to be passing, we will start actual code review process. ### 6. Code review We'll make sure that your code fits within the architecture and design of NEO, is readable and maintainable. Please make sure to address our questions and concerns. ### 7. Patch disposition We reserve, upon conclusion of the code review, the right to do one of the following: 1. Merge the patch as submitted 1. Merge the patch (with modifications) 1. Reject the patch If merged, you will be listed as patch author. Your patch may be reverted later in case of major regression that was not detected prior to commit. ## Intel Employees If you are an Intel Employee *and* you want to contribute to NEO as part of your regular job duties please: * Contact us in advance * Make sure your github account is linked to your intel.com email addresscompute-runtime-22.14.22890/DISTRIBUTIONS.md000066400000000000000000000013331422164147700177360ustar00rootroot00000000000000 # NEO in Linux distributions ## OpenCL specific * [distributions](https://github.com/intel/compute-runtime/blob/master/opencl/doc/DISTRIBUTIONS.md) ## Intel software for General Purpose GPU capabilities * Documentation and instructions for installing, deploying, and updating Intel software to enable general purpose GPU (GPGPU) capabilities for Linux*-based operating system distributions can be found on: [https://dgpu-docs.intel.com](https://dgpu-docs.intel.com) * An Intel brand integrated and global search accessible Linux graphics landing page [https://intel.com/linux-graphics-drivers](https://intel.com/linux-graphics-drivers) compute-runtime-22.14.22890/FAQ.md000066400000000000000000000057441422164147700163150ustar00rootroot00000000000000 # Frequently asked questions For OpenCL specific questions, see the [OpenCL FAQ](https://github.com/intel/compute-runtime/blob/master/opencl/doc/FAQ.md). ## OS support ### Which Linux versions does NEO support? NEO should work on any modern Linux distribution (i.e. Ubuntu, Fedora, etc.) with default / stock configuration (no kernel patches), assuming the underlying kernel's drm subsystem is 4.7 or higher. Newer platforms will require a kernel version that provides support for that platform (e.g. Coffee Lake requires kernel 4.14 or higher). Our default (most frequent) validation config is currently Ubuntu 20.04 LTS (as of Q1'21). ### Does NEO support Microsoft Windows? Our closed-source driver for Windows is using the same codebase. At this time, we do not support compilation of the stack for Windows. It is our long-term intention to offer that option. ### Does NEO support Windows Subsystem for Linux (WSL)? See [WSL.md](https://github.com/intel/compute-runtime/blob/master/WSL.md). ### Why is the feature set different in latest Windows driver vs. latest NEO on github? Our Windows release process takes up to several weeks before drivers are available through intel.com and/or Windows update. Features available in github will be available on Windows later. Note: Older platforms (e.g. Broadwell) are considered to be in maintenance mode for Windows. ### How can I enable reading debug environment variables on Linux release builds? Reading of debug environment variables on Linux release builds can be enabled by specifying `NEOReadDebugKeys` environment variable with a value of 1. E.g. to rebuild precompiled kernels you need to set both `RebuildPrecompiledKernels` and `NEOReadDebugKeys` to a value of 1. List of all debug keys can be found [here](https://github.com/intel/compute-runtime/blob/master/shared/source/debug_settings/debug_variables_base.inl). ## Platform support ### Which Intel platforms are supported by the driver? See [README.md](https://github.com/intel/compute-runtime/blob/master/README.md). ### How can I check that my specific device is supported by the driver? To check support for any device, you can follow these steps: 1. Go to [Ark]( https://ark.intel.com) and find your Device ID 1. Check if this Device ID is enumerated in the [supported device list](https://github.com/intel/compute-runtime/blob/master/shared/source/dll/devices/devices_base.inl) ### Do you provide binary packages with support for DG1? Yes. Please refer to official [installation guide](https://dgpu-docs.intel.com/installation-guides/index.html). ### When will support for platform X be added? We will start adding platform support after platform is disclosed by Intel. It is our intention to offer full support ahead of platform's market availability. ## Who are we? The Compute Runtime team is part of GSE (Graphics Software Engineering). Most of our engineers are located in Poland, United States, and India. compute-runtime-22.14.22890/GUIDELINES.md000066400000000000000000000057231422164147700173330ustar00rootroot00000000000000 File to cover guidelines for NEO project. # C++ usage * use c++ style casts instead of c style casts. * do not use default parameters * prefer using over typedef * avoid defines for constants, use constexpr * prefer forward declarations in headers * avoid includes in headers unless absolutely necessary * use of exceptions in driver code needs strong justification * prefer static create methods returning std::unique_ptr instead of throwing from constructor * inside methods, use an explicit `this->` pointer for refering to non-static class members # Naming conventions * use snake_case for new files * use PascalCase for class, struct, enum, and namespace names * use camelCase for variable and function names * prefer verbose names for variables and functions ``` bad examples : sld, elws, aws good examples : sourceLevelDebugger, enqueuedLocalWorkGroupSize, actualWorkGroupSize ``` * follow givenWhenThen test naming pattern, indicate what is interesting in the test bad examples : ``` TEST(CsrTests, initialize) TEST(CQTests, simple) TEST(CQTests, basic) TEST(CQTests, works) ``` good examples: ``` TEST(CommandStreamReceiverTests, givenCommandStreamReceiverWhenItIsInitializedThenProperFieldsAreSet) TEST(CommandQueueTests, givenCommandQueueWhenEnqueueIsDoneThenTaskLevelIsModifed) TEST(CommandQueueTests, givenCommandQueueWithDefaultParamtersWhenEnqueueIsDoneThenTaskCountIncreases) TEST(CommandQueueTests, givenCommandQueueWhenEnqueueWithBlockingFlagIsSetThenDriverWaitsUntilAllCommandsAreCompleted) ``` # Testing mindset * Test behaviors instead of implementations, do not focus on adding a test per every function in the class (avoid tests for setters and getters), focus on the functionality you are adding and how it changes the driver behavior, do not bind tests to implementation. * Make sure that test is fast, our test suite needs to complete in seconds for efficient development pace, as a general rule test shouldn't be longer then 1ms in Debug driver. # Coding guidelines * Favor the design of a self-explanatory code over the use of comments; if comments are needed, use double slash instead of block comments * HW commands and structures used in NEO must be initialized with constants defines for each Gfx Family: i.e. PIPE_CONTROL cmd = GfxFamily::cmdInitPipeControl * Any new HW command or structure must have its own static constant initializer added to any Gfx Family that is going to use it. * One-line branches use braces * Headers are guarded using `#pragma once` * Do not use `TODO`s in the code * Use `UNRECOVERABLE_IF` and `DEBUG_BREAK_IF` instead of `asserts`: * Use `UNRECOVERABLE_IF` when a failure is found and driver cannot proceed with normal execution. `UNRECOVERABLE_IF` is implemented in Release and Debug builds. * Use `DEBUG_BREAK_IF` when a failure can be handled gracefully by the driver and it can continue with normal execution. `DEBUG_BREAK_IF` is only implemented in Debug builds. compute-runtime-22.14.22890/Jenkinsfile000066400000000000000000000001161422164147700175340ustar00rootroot00000000000000#!groovy dependenciesRevision='5e00a0c1148dfea10b3a934521597896c8e7d32d-1885' compute-runtime-22.14.22890/LICENSE.md000066400000000000000000000001031422164147700167500ustar00rootroot00000000000000Copyright (C) 2021 Intel Corporation SPDX-License-Identifier: MIT compute-runtime-22.14.22890/README.md000066400000000000000000000115001422164147700166260ustar00rootroot00000000000000 # Intel(R) Graphics Compute Runtime for oneAPI Level Zero and OpenCL(TM) Driver ## Introduction The Intel(R) Graphics Compute Runtime for oneAPI Level Zero and OpenCL(TM) Driver is an open source project providing compute API support (Level Zero, OpenCL) for Intel graphics hardware architectures (HD Graphics, Xe). ## What is NEO? NEO is the shorthand name for Compute Runtime contained within this repository. It is also a development mindset that we adopted when we first started the implementation effort for OpenCL. The project evolved beyond a single API and NEO no longer implies a specific API. When talking about a specific API, we will mention it by name (e.g. Level Zero, OpenCL). ## License The Intel(R) Graphics Compute Runtime for oneAPI Level Zero and OpenCL(TM) Driver is distributed under the MIT License. You may obtain a copy of the License at: https://opensource.org/licenses/MIT ## Supported Platforms |Platform|OpenCL|Level Zero| |--------|:----:|:--------:| |Intel Core Processors with Gen8 graphics devices (formerly Broadwell)| 3.0 | - | |Intel Core Processors with Gen9 graphics devices (formerly Skylake, Kaby Lake, Coffee Lake)| 3.0 | Y | |Intel Atom Processors with Gen9 graphics devices (formerly Apollo Lake, Gemini Lake)| 3.0 | - | |Intel Core Processors with Gen11 graphics devices (formerly Ice Lake)| 3.0 | Y | |Intel Atom Processors with Gen11 graphics devices (formerly Elkhart Lake)| 3.0 | - | |Intel Core Processors with Gen12 graphics devices (formerly Tiger Lake, Rocket Lake, Alder Lake)| 3.0 | Y | ## Release cadence * Once a week, we run extended validation cycle on a selected driver. * When the extended validation cycle tests pass, the corresponding commit on github is tagged using the format yy.ww.bbbb (yy - year, ww - work week, bbbb - incremental build number). * Typically for weekly tags we will post a binary release (e.g. deb). * Quality level of the driver (per platform) will be provided in the Release Notes. ## Installation Options To allow NEO access to GPU device make sure user has permissions to files /dev/dri/renderD*. ### Via system package manager NEO is available for installation on a variety of Linux distributions and can be installed via the distro's package manager. For example on Ubuntu* 20.04: ``` apt-get install intel-opencl-icd ``` Procedures for other [distributions](https://github.com/intel/compute-runtime/blob/master/DISTRIBUTIONS.md). ### Manual download .deb packages for Ubuntu are provided along with installation instructions and Release Notes on the [release page](https://github.com/intel/compute-runtime/releases) ## Linking applications Directly linking to the runtime library is not supported: * Level Zero applications should link with [Level Zero loader](https://github.com/oneapi-src/level-zero) * OpenCL applications should link with [ICD loader library (ocl-icd)](https://github.com/OCL-dev/ocl-icd) ## Dependencies * GmmLib - https://github.com/intel/gmmlib * Intel Graphics Compiler - https://github.com/intel/intel-graphics-compiler In addition, to enable performance counters support, the following packages are needed: * Intel Metrics Discovery - https://github.com/intel/metrics-discovery * Intel Metrics Library for MDAPI - https://github.com/intel/metrics-library ## How to provide feedback By default, please submit an issue using native github.com [interface](https://github.com/intel/compute-runtime/issues). ## How to contribute Create a pull request on github.com with your patch. Make sure your change is cleanly building and passing ULTs. A maintainer will contact you if there are questions or concerns. See [contribution guidelines](https://github.com/intel/compute-runtime/blob/master/CONTRIBUTING.md) for more details. ## See also * [Contribution guidelines](https://github.com/intel/compute-runtime/blob/master/CONTRIBUTING.md) * [Frequently Asked Questions](https://github.com/intel/compute-runtime/blob/master/FAQ.md) ### Level Zero specific * [oneAPI Level Zero specification](https://spec.oneapi.com/versions/latest/elements/l0/source/index.html) * [Intel(R) OneApi Level Zero Specification API C/C++ header files](https://github.com/oneapi-src/level-zero/) * [oneAPI Level Zero tests](https://github.com/oneapi-src/level-zero-tests/) ### OpenCL specific * [OpenCL on Linux guide](https://github.com/bashbaug/OpenCLPapers/blob/markdown/OpenCLOnLinux.md) * [Intel(R) GPU Compute Samples](https://github.com/intel/compute-samples) * [Frequently Asked Questions](https://github.com/intel/compute-runtime/blob/master/opencl/doc/FAQ.md) * [Interoperability with VTune](https://github.com/intel/compute-runtime/blob/master/opencl/doc/VTUNE.md) * [OpenCL Conformance Tests](https://github.com/KhronosGroup/OpenCL-CTS/) ___(*) Other names and brands may be claimed as property of others.___ compute-runtime-22.14.22890/WSL.md000066400000000000000000000022641422164147700163450ustar00rootroot00000000000000 # WDDM GPU Paravirtualization support for WSL2 ## Introduction This document describes ingredients required in order to use Intel® Graphics Compute Runtime for oneAPI Level Zero and OpenCL™ Driver in WSL2 environment. ## Host OS Windows 11 or Windows 10 with the [21H2 update](https://blogs.windows.com/windowsexperience/2021/07/15/introducing-the-next-feature-update-to-windows-10-21h2/). ## WSL kernel Tested with [5.10.16.3](https://docs.microsoft.com/en-us/windows/wsl/kernel-release-notes#510163). ## Host Graphics Driver Required driver package (30.0.100.9955) is available [here](https://www.intel.com/content/www/us/en/download/19344/intel-graphics-windows-dch-drivers.html). ## Guest (WSL2) Intel® Graphics Compute Runtime for oneAPI Level Zero and OpenCL™ Driver Support was enabled at https://github.com/intel/compute-runtime/commit/fad4ee7e246839c36c3f6b0e14ea0c79d9e4758a and it is included in [21.30.20482](https://github.com/intel/compute-runtime/releases/tag/21.30.20482) and beyond - use [latest](https://github.com/intel/compute-runtime/releases) release for best experience. compute-runtime-22.14.22890/cmake/000077500000000000000000000000001422164147700164325ustar00rootroot00000000000000compute-runtime-22.14.22890/cmake/common_macros.cmake000066400000000000000000000061631422164147700222760ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # macro(hide_subdir subdir) file(RELATIVE_PATH subdir_relative ${NEO_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}) set(${subdir_relative}_hidden} TRUE) endmacro() macro(add_subdirectory_unique subdir) file(RELATIVE_PATH subdir_relative ${NEO_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}) if(NOT ${subdir_relative}_hidden}) add_subdirectory(${subdir} ${ARGN}) endif() hide_subdir(${subdir}) endmacro() macro(add_subdirectories) file(GLOB subdirectories RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/*) foreach(subdir ${subdirectories}) file(RELATIVE_PATH subdir_relative ${NEO_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}) if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/CMakeLists.txt AND NOT ${subdir_relative}_hidden}) add_subdirectory(${subdir}) endif() endforeach() endmacro() macro(create_project_source_tree target) if(MSVC) set(prefixes ${CMAKE_CURRENT_SOURCE_DIR} ${ARGN} ${NEO_SOURCE_DIR}) get_target_property(source_list ${target} SOURCES) foreach(source_file ${source_list}) if(NOT ${source_file} MATCHES "\<*\>") string(TOLOWER ${source_file} source_file_relative) foreach(prefix ${prefixes}) if(source_file_relative) string(TOLOWER ${prefix} prefix) string(REPLACE "${prefix}" "" source_file_relative ${source_file_relative}) endif() endforeach() get_filename_component(source_path_relative ${source_file_relative} PATH) if(source_path_relative) string(REPLACE "/" "\\" source_path_relative ${source_path_relative}) endif() source_group("Source Files\\${source_path_relative}" FILES ${source_file}) endif() endforeach() endif() endmacro() macro(create_project_source_tree_with_exports target exports_filename) create_project_source_tree(${target} ${ARGN}) if(MSVC) if(NOT "${exports_filename}" STREQUAL "") source_group("exports" FILES "${exports_filename}") endif() endif() endmacro() macro(apply_macro_for_each_core_type type) set(given_type ${type}) foreach(CORE_TYPE ${ALL_CORE_TYPES}) string(TOLOWER ${CORE_TYPE} CORE_TYPE_LOWER) CORE_CONTAINS_PLATFORMS(${given_type} ${CORE_TYPE} COREX_HAS_PLATFORMS) if(${COREX_HAS_PLATFORMS}) macro_for_each_core_type() endif() endforeach() endmacro() macro(apply_macro_for_each_platform) GET_PLATFORMS_FOR_CORE_TYPE(${given_type} ${CORE_TYPE} TESTED_COREX_PLATFORMS) foreach(PLATFORM_IT ${TESTED_COREX_PLATFORMS}) string(TOLOWER ${PLATFORM_IT} PLATFORM_IT_LOWER) macro_for_each_platform() endforeach() endmacro() macro(get_family_name_with_type core_type platform_type) string(REPLACE "GEN" "Gen" core_type_capitalized ${core_type}) string(TOLOWER ${platform_type} platform_type_lower) set(family_name_with_type ${core_type_capitalized}${platform_type_lower}) endmacro() macro(append_sources_from_properties list_name) foreach(name ${ARGN}) get_property(${name} GLOBAL PROPERTY ${name}) list(APPEND ${list_name} ${${name}}) endforeach() endmacro() compute-runtime-22.14.22890/cmake/fill_core_types.cmake000066400000000000000000000010451422164147700226160ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # list(APPEND ALL_CORE_TYPES "GEN8") list(APPEND ALL_CORE_TYPES "GEN9") list(APPEND ALL_CORE_TYPES "GEN11") list(APPEND ALL_CORE_TYPES "GEN12LP") list(APPEND ALL_CORE_TYPES "XE_HP_CORE") list(APPEND ALL_CORE_TYPES "XE_HPG_CORE") list(APPEND ALL_CORE_TYPES "XE_HPC_CORE") list(APPEND XEHP_AND_LATER_CORE_TYPES "XE_HP_CORE" "XE_HPG_CORE" "XE_HPC_CORE") list(APPEND DG2_AND_LATER_CORE_TYPES "XE_HPG_CORE" "XE_HPC_CORE") list(APPEND PVC_AND_LATER_CORE_TYPES "XE_HPC_CORE") compute-runtime-22.14.22890/cmake/find_gmmlib.cmake000066400000000000000000000056421422164147700217120ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # # GmmLib detection if(TARGET igfx_gmmumd_dll) set(GMM_TARGET_NAME "igfx_gmmumd_dll") set(GMM_LINK_NAME $) set(NEO__GMM_LIBRARY_PATH $) set(NEO__GMM_INCLUDE_DIR $) else() if(DEFINED GMM_DIR) get_filename_component(GMM_DIR "${GMM_DIR}" ABSOLUTE) else() get_filename_component(GMM_DIR_tmp "${NEO_SOURCE_DIR}/../gmmlib" ABSOLUTE) if(IS_DIRECTORY "${GMM_DIR_tmp}") set(GMM_DIR "${GMM_DIR_tmp}") endif() endif() if(UNIX) if(DEFINED GMM_DIR) if(IS_DIRECTORY "${GMM_DIR}/lib/pkgconfig/") set(__tmp_LIBDIR "lib") elseif(IS_DIRECTORY "${GMM_DIR}/${CMAKE_INSTALL_LIBDIR}/pkgconfig/") set(__tmp_LIBDIR ${CMAKE_INSTALL_LIBDIR}) endif() endif() find_package(PkgConfig) if(DEFINED __tmp_LIBDIR) set(OLD_PKG_CONFIG_PATH $ENV{PKG_CONFIG_PATH}) set(ENV{PKG_CONFIG_PATH} "${GMM_DIR}/${__tmp_LIBDIR}/pkgconfig/") endif() pkg_check_modules(NEO__GMM igdgmm) if(DEFINED __tmp_LIBDIR) set(ENV{PKG_CONFIG_PATH} ${OLD_PKG_CONFIG_PATH}) endif() if(NEO__GMM_FOUND) if(DEFINED __tmp_LIBDIR) string(REPLACE "${NEO__GMM_INCLUDEDIR}" "${GMM_DIR}/include/igdgmm" NEO__GMM_INCLUDE_DIRS "${NEO__GMM_INCLUDE_DIRS}") string(REPLACE "${NEO__GMM_LIBDIR}" "${GMM_DIR}/${__tmp_LIBDIR}" NEO__GMM_LIBDIR "${NEO__GMM_LIBDIR}") set(NEO__GMM_LIBRARY_PATH "${NEO__GMM_LIBDIR}") endif() set(GMM_TARGET_NAME "igfx_gmmumd_dll") set(GMM_LINK_NAME ${NEO__GMM_LIBRARIES}) set(NEO__GMM_INCLUDE_DIR ${NEO__GMM_INCLUDE_DIRS}) message(STATUS "GmmLib include dirs: ${NEO__GMM_INCLUDE_DIR}") else() message(FATAL_ERROR "GmmLib not found!") endif() if(DEFINED __tmp_LIBDIR) unset(__tmp_LIBDIR) endif() else() if(EXISTS "${GMM_DIR}/CMakeLists.txt") message(STATUS "GmmLib source dir is: ${GMM_DIR}") add_subdirectory_unique("${GMM_DIR}" "${NEO_BUILD_DIR}/gmmlib") if(NOT DEFINED GMM_TARGET_NAME) set(GMM_TARGET_NAME "igfx_gmmumd_dll") endif() set(NEO__GMM_INCLUDE_DIR $) set(NEO__GMM_LIBRARY_PATH $) set(GMM_LINK_NAME $) else() message(FATAL_ERROR "GmmLib not found!") endif() endif() endif() macro(copy_gmm_dll_for target) if(NOT UNIX) add_custom_command( TARGET ${target} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different $ $ ) endif() endmacro() link_directories(${NEO__GMM_LIBRARY_PATH}) add_definitions(-DGMM_OCL) compute-runtime-22.14.22890/cmake/find_igc.cmake000066400000000000000000000104131422164147700211750ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # # Intel Graphics Compiler detection if(NOT IGC__IGC_TARGETS) # check whether igc is part of workspace if(DEFINED IGC_DIR) get_filename_component(IGC_DIR "${IGC_DIR}" ABSOLUTE) else() get_filename_component(IGC_DIR_tmp "${NEO_SOURCE_DIR}/../igc" ABSOLUTE) if(IS_DIRECTORY "${IGC_DIR_tmp}") set(IGC_DIR "${IGC_DIR_tmp}") endif() endif() if(UNIX) # on Unix-like use pkg-config find_package(PkgConfig) if(DEFINED IGC_DIR) if(IS_DIRECTORY "${IGC_DIR}/lib/pkgconfig/") set(__tmp_LIBDIR "lib") elseif(IS_DIRECTORY "${IGC_DIR}/${CMAKE_INSTALL_LIBDIR}/pkgconfig/") set(__tmp_LIBDIR ${CMAKE_INSTALL_LIBDIR}) endif() endif() if(DEFINED __tmp_LIBDIR) set(OLD_PKG_CONFIG_PATH $ENV{PKG_CONFIG_PATH}) set(ENV{PKG_CONFIG_PATH} "${IGC_DIR}/${__tmp_LIBDIR}/pkgconfig") endif() pkg_check_modules(NEO__IGC igc-opencl) if(DEFINED __tmp_LIBDIR) set(ENV{PKG_CONFIG_PATH} ${OLD_PKG_CONFIG_PATH}) set(NEO__IGC_LIBRARY_PATH "${IGC_DIR}/${__tmp_LIBDIR}/") endif() if(NEO__IGC_FOUND) if(DEFINED IGC_DIR AND IS_DIRECTORY "${IGC_DIR}/${__tmp_LIBDIR}/pkgconfig/") string(REPLACE "${NEO__IGC_INCLUDEDIR}" "${IGC_DIR}/include/igc" NEO__IGC_INCLUDE_DIRS "${NEO__IGC_INCLUDE_DIRS}") endif() set(NEO__IGC_INCLUDE_DIR ${NEO__IGC_INCLUDE_DIRS}) message(STATUS "IGC include dirs: ${NEO__IGC_INCLUDE_DIR}") endif() if(DEFINED __tmp_LIBDIR) unset(__tmp_LIBDIR) endif() endif() if(NEO__IGC_FOUND) # do nothing elseif(EXISTS "${IGC_DIR}/CMakeLists.txt") message(STATUS "IGC source dir is: ${IGC_DIR}") set(IGC_OPTION__OUTPUT_DIR "${NEO_BUILD_DIR}/igc") set(IGC_OPTION__INCLUDE_IGC_COMPILER_TOOLS OFF) add_subdirectory_unique("${IGC_DIR}" "${NEO_BUILD_DIR}/igc" EXCLUDE_FROM_ALL) set(NEO__IGC_TARGETS "${IGC__IGC_TARGETS}") foreach(TARGET_tmp ${NEO__IGC_TARGETS}) list(APPEND NEO__IGC_INCLUDE_DIR $) list(APPEND NEO__IGC_COMPILE_DEFINITIONS $) endforeach() message(STATUS "IGC targets: ${NEO__IGC_TARGETS}") else() message(FATAL_ERROR "Intel Graphics Compiler not found!") endif() else() set(NEO__IGC_TARGETS "${IGC__IGC_TARGETS}") foreach(TARGET_tmp ${NEO__IGC_TARGETS}) list(APPEND NEO__IGC_INCLUDE_DIR $) list(APPEND NEO__IGC_LIBRARY_PATH $) endforeach() string(REPLACE ";" ":" NEO__IGC_LIBRARY_PATH "${NEO__IGC_LIBRARY_PATH}") message(STATUS "IGC targets: ${NEO__IGC_TARGETS}") endif() # VISA headers - always relative to IGC if(IS_DIRECTORY "${IGC_DIR}/../visa") get_filename_component(VISA_DIR "${IGC_DIR}/../visa" ABSOLUTE) elseif(IS_DIRECTORY "${IGC_DIR}/visa") set(VISA_DIR "${IGC_DIR}/visa") elseif(IS_DIRECTORY "${IGC_DIR}/include/visa") set(VISA_DIR "${IGC_DIR}/include/visa") elseif(IS_DIRECTORY "${NEO__IGC_INCLUDEDIR}/../visa") get_filename_component(VISA_DIR "${NEO__IGC_INCLUDEDIR}/../visa" ABSOLUTE) elseif(IS_DIRECTORY "${IGC_OCL_ADAPTOR_DIR}/../../visa") get_filename_component(VISA_DIR "${IGC_OCL_ADAPTOR_DIR}/../../visa" ABSOLUTE) endif() message(STATUS "VISA Dir: ${VISA_DIR}") if(IS_DIRECTORY "${VISA_DIR}/include") set(VISA_INCLUDE_DIR "${VISA_DIR}/include") else() set(VISA_INCLUDE_DIR "${VISA_DIR}") endif() # IGA headers - always relative to VISA if(IS_DIRECTORY "${VISA_DIR}/../iga") get_filename_component(IGA_DIR "${VISA_DIR}/../iga" ABSOLUTE) elseif(IS_DIRECTORY "${VISA_DIR}/iga") set(IGA_DIR "${VISA_DIR}/iga") endif() if(IS_DIRECTORY "${IGA_DIR}/IGALibrary/api") set(IGA_INCLUDE_DIR "${IGA_DIR}/IGALibrary/api") else() set(IGA_INCLUDE_DIR "${IGA_DIR}") endif() if(IS_DIRECTORY ${IGA_INCLUDE_DIR}) set(IGA_HEADERS_AVAILABLE TRUE) set(IGA_LIBRARY_NAME "iga${NEO_BITS}") else() set(IGA_HEADERS_AVAILABLE FALSE) endif() message(STATUS "IGA Includes dir: ${IGA_INCLUDE_DIR}") if(WIN32) set(IGC_LIBRARY_NAME "igc${NEO_BITS}") set(FCL_LIBRARY_NAME "igdfcl${NEO_BITS}") endif() if(WIN32 AND NOT NEO__IGC_FOUND) configure_file(igc.opencl.h.in ${NEO_BUILD_DIR}/igc.opencl.h) endif() compute-runtime-22.14.22890/cmake/find_metrics.cmake000066400000000000000000000056241422164147700221110ustar00rootroot00000000000000# # Copyright (C) 2021-2022 Intel Corporation # # SPDX-License-Identifier: MIT # # DEPENDENCY DETECTION function(dependency_detect COMPONENT_NAME DLL_NAME VAR_NAME REL_LOCATION IS_THIRD_PARTY) if(DEFINED ${VAR_NAME}_DIR) get_filename_component(LIBRARY_DIR "${${VAR_NAME}_DIR}" ABSOLUTE) else() get_filename_component(LIBRARY_DIR_tmp "${NEO_SOURCE_DIR}/${REL_LOCATION}" ABSOLUTE) if(IS_DIRECTORY "${LIBRARY_DIR_tmp}") set(LIBRARY_DIR "${LIBRARY_DIR_tmp}") endif() endif() if(UNIX) if(DEFINED LIBRARY_DIR) if(IS_DIRECTORY "${LIBRARY_DIR}/lib/pkgconfig/") set(__tmp_LIBDIR "lib") elseif(IS_DIRECTORY "${LIBRARY_DIR}/${CMAKE_INSTALL_LIBDIR}/pkgconfig/") set(__tmp_LIBDIR ${CMAKE_INSTALL_LIBDIR}) endif() endif() find_package(PkgConfig) if(DEFINED __tmp_LIBDIR) set(OLD_PKG_CONFIG_PATH $ENV{PKG_CONFIG_PATH}) set(ENV{PKG_CONFIG_PATH} "${LIBRARY_DIR}/${__tmp_LIBDIR}/pkgconfig/") endif() if(NOT DLL_NAME STREQUAL "") pkg_check_modules(NEO__${VAR_NAME} ${DLL_NAME}) endif() if(DEFINED __tmp_LIBDIR) set(ENV{PKG_CONFIG_PATH} ${OLD_PKG_CONFIG_PATH}) endif() if(NEO__${VAR_NAME}_FOUND) if(DEFINED __tmp_LIBDIR) if(NOT NEO__${VAR_NAME}_INCLUDE_DIRS STREQUAL "") string(REPLACE "${NEO__${VAR_NAME}_INCLUDEDIR}" "${LIBRARY_DIR}/include" NEO__${VAR_NAME}_INCLUDE_DIRS "${NEO__${VAR_NAME}_INCLUDE_DIRS}") else() set(NEO__${VAR_NAME}_INCLUDE_DIRS "${LIBRARY_DIR}/include") endif() endif() set(NEO__${VAR_NAME}_INCLUDE_DIR "${NEO__${VAR_NAME}_INCLUDE_DIRS}") set(NEO__${VAR_NAME}_INCLUDE_DIR "${NEO__${VAR_NAME}_INCLUDE_DIRS}" PARENT_SCOPE) set(NEO__${VAR_NAME}_LIBRARIES ${NEO__${VAR_NAME}_LIBRARIES} PARENT_SCOPE) message(STATUS "${COMPONENT_NAME} include dirs: ${NEO__${VAR_NAME}_INCLUDE_DIR}") return() endif() else() # Windows endif() if(IS_THIRD_PARTY) string(TOLOWER ${VAR_NAME} _VAR_NAME_LOWER) get_filename_component(${VAR_NAME}_HEADERS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/third_party${BRANCH_DIR_SUFFIX}/${_VAR_NAME_LOWER}" ABSOLUTE) if(IS_DIRECTORY ${${VAR_NAME}_HEADERS_DIR}) message(STATUS "${COMPONENT_NAME} dir: ${${VAR_NAME}_HEADERS_DIR}") set(NEO__${VAR_NAME}_INCLUDE_DIR "${${VAR_NAME}_HEADERS_DIR}" PARENT_SCOPE) return() endif() endif() message(FATAL_ERROR "${COMPONENT_NAME} not found!") endfunction() # Metrics Library Detection dependency_detect("Metrics Library" libigdml METRICS_LIBRARY "../metrics/library" TRUE) if(NOT NEO__METRICS_LIBRARY_INCLUDE_DIR STREQUAL "") include_directories("${NEO__METRICS_LIBRARY_INCLUDE_DIR}") endif() # Metrics Discovery Detection dependency_detect("Metrics Discovery" "" METRICS_DISCOVERY "../metrics/discovery" TRUE) if(NOT NEO__METRICS_DISCOVERY_INCLUDE_DIR STREQUAL "") include_directories("${NEO__METRICS_DISCOVERY_INCLUDE_DIR}") endif() compute-runtime-22.14.22890/cmake/find_wdk.cmake000066400000000000000000000053471422164147700212320ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) # get WDK location and version to use if(NOT WDK_DIR) if(IS_DIRECTORY "${NEO_SOURCE_DIR}/../wdk") get_filename_component(WDK_DIR "../wdk" ABSOLUTE) endif() endif() if(WDK_DIR) if(IS_DIRECTORY "${WDK_DIR}/Win15") get_filename_component(WDK_DIR "${WDK_DIR}/Win15" ABSOLUTE) endif() endif() message(STATUS "WDK Directory: ${WDK_DIR}") if(NOT WDK_VERSION) if(WDK_DIR) # Get WDK version from ${WDK_DIR}/WDKVersion.txt file(READ "${WDK_DIR}/WDKVersion.txt" WindowsTargetPlatformVersion) string(REPLACE " " ";" WindowsTargetPlatformVersion ${WindowsTargetPlatformVersion}) list(LENGTH WindowsTargetPlatformVersion versionListLength) if(NOT versionListLength EQUAL 3) if(WIN32) message(ERROR "Error reading content of WDKVersion.txt file") endif() else() list(GET WindowsTargetPlatformVersion 2 WindowsTargetPlatformVersion) endif() else() if(WIN32) message(ERROR "WDK not available") endif() endif() else() set(WindowsTargetPlatformVersion ${WDK_VERSION}) endif() message(STATUS "WDK Version is ${WindowsTargetPlatformVersion}") endif() if(NOT DISABLE_WDDM_LINUX) get_filename_component(LIBDXG_PATH "${NEO_SOURCE_DIR}/third_party/libdxg" ABSOLUTE) set(D3DKMTHK_INCLUDE_PATHS "${LIBDXG_PATH}/include/") endif() if(WIN32) if(${WindowsTargetPlatformVersion} VERSION_LESS "10.0.18328.0") set(CONST_FROM_WDK_10_0_18328_0) else() set(CONST_FROM_WDK_10_0_18328_0 "CONST") endif() add_compile_options(-DCONST_FROM_WDK_10_0_18328_0=${CONST_FROM_WDK_10_0_18328_0}) set(WDK_INCLUDE_PATHS "") list(APPEND WDK_INCLUDE_PATHS "${WDK_DIR}/Include/${WindowsTargetPlatformVersion}/um" "${WDK_DIR}/Include/${WindowsTargetPlatformVersion}/shared" "${WDK_DIR}/Include/${WindowsTargetPlatformVersion}/km" ) message(STATUS "WDK include paths: ${WDK_INCLUDE_PATHS}") elseif(NOT DISABLE_WDDM_LINUX) add_compile_options(-DCONST_FROM_WDK_10_0_18328_0=CONST) add_compile_options(-DWDDM_LINUX=1) set(WDK_INCLUDE_PATHS "") get_filename_component(DX_HEADERS_PATH "${NEO_SOURCE_DIR}/third_party/DirectX-Headers" ABSOLUTE) list(APPEND WDK_INCLUDE_PATHS ${DX_HEADERS_PATH}/include/wsl) list(APPEND WDK_INCLUDE_PATHS ${DX_HEADERS_PATH}/include/wsl/stubs) list(APPEND WDK_INCLUDE_PATHS ${DX_HEADERS_PATH}/include/directx) list(APPEND WDK_INCLUDE_PATHS ${DX_HEADERS_PATH}/include/dxguids) list(APPEND WDK_INCLUDE_PATHS ${D3DKMTHK_INCLUDE_PATHS}) message(STATUS "WDK include paths: ${WDK_INCLUDE_PATHS}") endif() if(WIN32) link_directories("${WDK_DIR}/Lib/${WindowsTargetPlatformVersion}/um/${NEO_ARCH}/") endif() compute-runtime-22.14.22890/cmake/ocloc_cmd_prefix.cmake000066400000000000000000000006141422164147700227340ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) set(ocloc_cmd_prefix ocloc) else() if(DEFINED NEO__IGC_LIBRARY_PATH) set(ocloc_cmd_prefix LD_LIBRARY_PATH=${NEO__IGC_LIBRARY_PATH}:$ $) else() set(ocloc_cmd_prefix LD_LIBRARY_PATH=$ $) endif() endif() compute-runtime-22.14.22890/cmake/run_aub_test_target.cmake000066400000000000000000000101631422164147700234750ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # string(REPLACE "/" ";" aub_test_config ${aub_test_config}) list(GET aub_test_config 0 product) list(GET aub_test_config 1 slices) list(GET aub_test_config 2 subslices) list(GET aub_test_config 3 eu_per_ss) list(GET aub_test_config 4 revision_id) add_custom_target(run_${product}_${revision_id}_aub_tests ALL) if(NOT NEO_SKIP_OCL_UNIT_TESTS OR NOT NEO_SKIP_L0_UNIT_TESTS) if(NOT NEO_SKIP_OCL_UNIT_TESTS) add_dependencies(run_${product}_${revision_id}_aub_tests copy_test_files_per_product) add_dependencies(run_${product}_${revision_id}_aub_tests prepare_test_kernels_for_ocl) add_dependencies(run_${product}_${revision_id}_aub_tests prepare_test_kernels_for_shared) endif() add_dependencies(run_aub_tests run_${product}_${revision_id}_aub_tests) set_target_properties(run_${product}_${revision_id}_aub_tests PROPERTIES FOLDER "${AUB_TESTS_TARGETS_FOLDER}/${product}/${revision_id}") if(WIN32) add_dependencies(run_${product}_${revision_id}_aub_tests mock_gdi) endif() set(aub_tests_options "") if(NOT ${AUB_DUMP_BUFFER_FORMAT} STREQUAL "") list(APPEND aub_tests_options --dump_buffer_format) list(APPEND aub_tests_options ${AUB_DUMP_BUFFER_FORMAT}) endif() if(NOT ${AUB_DUMP_IMAGE_FORMAT} STREQUAL "") list(APPEND aub_tests_options --dump_image_format) list(APPEND aub_tests_options ${AUB_DUMP_IMAGE_FORMAT}) endif() add_custom_command( TARGET run_${product}_${revision_id}_aub_tests POST_BUILD COMMAND WORKING_DIRECTORY ${TargetDir} COMMAND echo re-creating working directory for ${product}/${revision_id} AUBs generation... COMMAND ${CMAKE_COMMAND} -E remove_directory ${TargetDir}/${product}_aub/${revision_id} COMMAND ${CMAKE_COMMAND} -E make_directory ${TargetDir}/${product}_aub/${revision_id} COMMAND ${CMAKE_COMMAND} -E make_directory ${TargetDir}/${product}_aub/${revision_id}/aub_out COMMAND ${CMAKE_COMMAND} -E make_directory ${TargetDir}/${product}_aub/${revision_id}/cl_cache ) endif() if(NOT NEO_SKIP_OCL_UNIT_TESTS) if(WIN32 OR NOT DEFINED NEO__GMM_LIBRARY_PATH) set(aub_test_cmd_prefix $) else() set(aub_test_cmd_prefix LD_LIBRARY_PATH=${NEO__GMM_LIBRARY_PATH} IGDRCL_TEST_SELF_EXEC=off ${NEO_RUN_INTERCEPTOR_LIST} $) endif() add_custom_command( TARGET run_${product}_${revision_id}_aub_tests POST_BUILD COMMAND WORKING_DIRECTORY ${TargetDir} COMMAND echo Running AUB generation for ${product} in ${TargetDir}/${product}_aub COMMAND ${aub_test_cmd_prefix} --product ${product} --slices ${slices} --subslices ${subslices} --eu_per_ss ${eu_per_ss} --gtest_repeat=1 ${aub_tests_options} ${NEO_TESTS_LISTENER_OPTION} --rev_id ${revision_id} ) endif() if(NOT NEO_SKIP_L0_UNIT_TESTS AND BUILD_WITH_L0) add_dependencies(run_${product}_${revision_id}_aub_tests prepare_test_kernels_for_l0) if(WIN32 OR NOT DEFINED NEO__GMM_LIBRARY_PATH) set(l0_aub_test_cmd_prefix $) else() set(l0_aub_test_cmd_prefix LD_LIBRARY_PATH=${NEO__GMM_LIBRARY_PATH} ${NEO_RUN_INTERCEPTOR_LIST} $) endif() add_custom_command( TARGET run_${product}_${revision_id}_aub_tests POST_BUILD COMMAND WORKING_DIRECTORY ${TargetDir} COMMAND echo Running Level Zero AUB generation for ${product} in ${TargetDir}/${product}_aub COMMAND ${l0_aub_test_cmd_prefix} --product ${product} --slices ${slices} --subslices ${subslices} --eu_per_ss ${eu_per_ss} --gtest_repeat=1 ${aub_tests_options} --rev_id ${revision_id} ) endif() if(DO_NOT_RUN_AUB_TESTS) set_target_properties(run_${product}_${revision_id}_aub_tests PROPERTIES EXCLUDE_FROM_DEFAULT_BUILD TRUE EXCLUDE_FROM_ALL TRUE ) endif() compute-runtime-22.14.22890/cmake/run_ult_target.cmake000066400000000000000000000105651422164147700225010ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # string(REPLACE "/" ";" unit_test_config ${unit_test_config}) list(GET unit_test_config 0 product) list(GET unit_test_config 1 slices) list(GET unit_test_config 2 subslices) list(GET unit_test_config 3 eu_per_ss) list(GET unit_test_config 4 revision_id) add_custom_target(run_${product}_${revision_id}_unit_tests ALL DEPENDS unit_tests) set_target_properties(run_${product}_${revision_id}_unit_tests PROPERTIES FOLDER "${PLATFORM_SPECIFIC_TEST_TARGETS_FOLDER}/${product}/${revision_id}") if(NOT NEO_SKIP_SHARED_UNIT_TESTS) add_custom_command( TARGET run_${product}_${revision_id}_unit_tests POST_BUILD COMMAND WORKING_DIRECTORY ${TargetDir} COMMAND echo Running neo_shared_tests ${target} ${slices}x${subslices}x${eu_per_ss} in ${TargetDir}/${product} COMMAND ${NEO_RUN_INTERCEPTOR_LIST} $ --product ${product} --slices ${slices} --subslices ${subslices} --eu_per_ss ${eu_per_ss} ${GTEST_EXCEPTION_OPTIONS} --gtest_repeat=${GTEST_REPEAT} ${GTEST_SHUFFLE} ${NEO_TESTS_LISTENER_OPTION} ${GTEST_FILTER_OPTION} --rev_id ${revision_id} ) endif() if(NOT NEO_SKIP_OCL_UNIT_TESTS) add_custom_command( TARGET run_${product}_${revision_id}_unit_tests POST_BUILD COMMAND WORKING_DIRECTORY ${TargetDir} COMMAND echo Running igdrcl_tests ${target} ${slices}x${subslices}x${eu_per_ss} in ${TargetDir}/${product} COMMAND ${GTEST_ENV} ${NEO_RUN_INTERCEPTOR_LIST} $ --product ${product} --slices ${slices} --subslices ${subslices} --eu_per_ss ${eu_per_ss} ${GTEST_EXCEPTION_OPTIONS} --gtest_repeat=${GTEST_REPEAT} ${GTEST_SHUFFLE} ${NEO_TESTS_LISTENER_OPTION} ${GTEST_FILTER_OPTION} --rev_id ${revision_id} ) if(WIN32 AND ${CMAKE_BUILD_TYPE} STREQUAL "Debug" AND "${IGDRCL_OPTION__BITS}" STREQUAL "64" AND APPVERIFIER_ALLOWED) add_custom_command( TARGET run_${product}_${revision_id}_unit_tests POST_BUILD COMMAND WORKING_DIRECTORY ${TargetDir} COMMAND echo Running igdrcl_tests with App Verifier COMMAND ${NEO_SOURCE_DIR}/scripts/verify.bat $ --product ${product} --slices ${slices} --subslices ${subslices} --eu_per_ss ${eu_per_ss} ${GTEST_EXCEPTION_OPTIONS} ${NEO_TESTS_LISTENER_OPTION} ${GTEST_FILTER_OPTION} --rev_id ${revision_id} COMMAND echo App Verifier returned: %errorLevel% ) endif() endif() if(NOT NEO_SKIP_L0_UNIT_TESTS AND BUILD_WITH_L0) add_custom_command( TARGET run_${product}_${revision_id}_unit_tests POST_BUILD COMMAND WORKING_DIRECTORY ${TargetDir} COMMAND echo Running ze_intel_gpu_core_tests ${target} ${slices}x${subslices}x${eu_per_ss} in ${TargetDir}/${product} COMMAND ${NEO_RUN_INTERCEPTOR_LIST} $ --product ${product} --slices ${slices} --subslices ${subslices} --eu_per_ss ${eu_per_ss} ${GTEST_EXCEPTION_OPTIONS} --gtest_repeat=${GTEST_REPEAT} ${GTEST_SHUFFLE} ${NEO_TESTS_LISTENER_OPTION} ${GTEST_FILTER_OPTION} --rev_id ${revision_id} COMMAND echo Running ze_intel_gpu_tools_tests ${target} ${slices}x${subslices}x${eu_per_ss} in ${TargetDir}/${product} COMMAND ${NEO_RUN_INTERCEPTOR_LIST} $ --product ${product} --slices ${slices} --subslices ${subslices} --eu_per_ss ${eu_per_ss} ${GTEST_EXCEPTION_OPTIONS} --gtest_repeat=${GTEST_REPEAT} ${GTEST_SHUFFLE} ${NEO_TESTS_LISTENER_OPTION} ${GTEST_FILTER_OPTION} --rev_id ${revision_id} COMMAND echo Running ze_intel_gpu_exp_tests ${target} ${slices}x${subslices}x${eu_per_ss} in ${TargetDir}/${product} COMMAND ${NEO_RUN_INTERCEPTOR_LIST} $ --product ${product} --slices ${slices} --subslices ${subslices} --eu_per_ss ${eu_per_ss} ${GTEST_EXCEPTION_OPTIONS} --gtest_repeat=${GTEST_REPEAT} ${GTEST_SHUFFLE} ${NEO_TESTS_LISTENER_OPTION} ${GTEST_FILTER_OPTION} --rev_id ${revision_id} ) endif() add_dependencies(run_unit_tests run_${product}_${revision_id}_unit_tests) compute-runtime-22.14.22890/cmake/setup_platform_flags.cmake000066400000000000000000000237731422164147700236700ustar00rootroot00000000000000# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # SET_FLAGS_FOR("GEN8" "BDW") SET_FLAGS_FOR("GEN9" "SKL" "KBL" "BXT" "GLK" "CFL") SET_FLAGS_FOR("GEN11" "ICLLP" "LKF" "EHL") SET_FLAGS_FOR("GEN12LP" "TGLLP" "RKL" "ADLS" "ADLP" "DG1") SET_FLAGS_FOR("XE_HP_CORE" "XE_HP_SDV") SET_FLAGS_FOR("XE_HPG_CORE" "DG2") SET_FLAGS_FOR("XE_HPC_CORE" "PVC") foreach(CORE_TYPE ${XEHP_AND_LATER_CORE_TYPES}) if(TESTS_${CORE_TYPE}) set(TESTS_XEHP_AND_LATER 1) endif() if(SUPPORT_${CORE_TYPE}) set(SUPPORT_XEHP_AND_LATER 1) endif() endforeach() foreach(CORE_TYPE ${DG2_AND_LATER_CORE_TYPES}) if(TESTS_${CORE_TYPE}) set(TESTS_DG2_AND_LATER 1) endif() if(SUPPORT_${CORE_TYPE}) set(SUPPORT_DG2_AND_LATER 1) endif() endforeach() foreach(CORE_TYPE ${PVC_AND_LATER_CORE_TYPES}) if(TESTS_${CORE_TYPE}) set(TESTS_PVC_AND_LATER 1) endif() if(SUPPORT_${CORE_TYPE}) set(SUPPORT_PVC_AND_LATER 1) endif() endforeach() # Add supported and tested platforms if(SUPPORT_GEN8) set(CORE_GEN8_REVISIONS 0) if(TESTS_GEN8) ADD_ITEM_FOR_CORE_TYPE("FAMILY_NAME" "TESTED" "GEN8" "BDWFamily") endif() if(SUPPORT_BDW) ADD_PRODUCT("SUPPORTED" "BDW" "IGFX_BROADWELL") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED" "GEN8" "BDW" "CORE") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED_2_0" "GEN8" "BDW" "CORE") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED_IMAGES" "GEN8" "BDW" "CORE") if(TESTS_BDW) ADD_ITEM_FOR_CORE_TYPE("PLATFORMS" "TESTED" "GEN8" "BDW") ADD_PRODUCT("TESTED" "BDW" "IGFX_BROADWELL") endif() endif() endif() if(SUPPORT_GEN9) set(CORE_GEN9_REVISIONS 9) set(LP_GEN9_REVISIONS 0) if(TESTS_GEN9) ADD_ITEM_FOR_CORE_TYPE("FAMILY_NAME" "TESTED" "GEN9" "SKLFamily") endif() if(SUPPORT_SKL) ADD_PRODUCT("SUPPORTED" "SKL" "IGFX_SKYLAKE") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED" "GEN9" "SKL" "CORE") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED_2_0" "GEN9" "SKL" "CORE") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED_VME" "GEN9" "SKL" "CORE") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED_IMAGES" "GEN9" "SKL" "CORE") set(PREFERRED_PLATFORM "SKL") if(TESTS_SKL) set(PREFERRED_FAMILY_NAME "SKLFamily") ADD_ITEM_FOR_CORE_TYPE("PLATFORMS" "TESTED" "GEN9" "SKL") ADD_PRODUCT("TESTED" "SKL" "IGFX_SKYLAKE") endif() endif() if(SUPPORT_KBL) ADD_PRODUCT("SUPPORTED" "KBL" "IGFX_KABYLAKE") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED" "GEN9" "KBL" "CORE") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED_2_0" "GEN9" "KBL" "CORE") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED_VME" "GEN9" "KBL" "CORE") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED_IMAGES" "GEN9" "KBL" "CORE") if(TESTS_KBL) ADD_ITEM_FOR_CORE_TYPE("PLATFORMS" "TESTED" "GEN9" "KBL") ADD_PRODUCT("TESTED" "KBL" "IGFX_KABYLAKE") endif() endif() if(SUPPORT_GLK) ADD_PRODUCT("SUPPORTED" "GLK" "IGFX_GEMINILAKE") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED" "GEN9" "GLK" "LP") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED_VME" "GEN9" "GLK" "LP") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED_IMAGES" "GEN9" "GLK" "LP") if(TESTS_GLK) ADD_ITEM_FOR_CORE_TYPE("PLATFORMS" "TESTED" "GEN9" "GLK") ADD_PRODUCT("TESTED" "GLK" "IGFX_GEMINILAKE") endif() endif() if(SUPPORT_CFL) ADD_PRODUCT("SUPPORTED" "CFL" "IGFX_COFFEELAKE") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED" "GEN9" "CFL" "CORE") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED_2_0" "GEN9" "CFL" "CORE") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED_VME" "GEN9" "CFL" "CORE") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED_IMAGES" "GEN9" "CFL" "CORE") if(TESTS_CFL) ADD_ITEM_FOR_CORE_TYPE("PLATFORMS" "TESTED" "GEN9" "CFL") ADD_PRODUCT("TESTED" "CFL" "IGFX_COFFEELAKE") endif() endif() if(SUPPORT_BXT) ADD_PRODUCT("SUPPORTED" "BXT" "IGFX_BROXTON") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED" "GEN9" "BXT" "LP") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED_VME" "GEN9" "BXT" "LP") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED_IMAGES" "GEN9" "BXT" "LP") if(TESTS_BXT) ADD_ITEM_FOR_CORE_TYPE("PLATFORMS" "TESTED" "GEN9" "BXT") ADD_PRODUCT("TESTED" "BXT" "IGFX_BROXTON") endif() endif() endif() if(SUPPORT_GEN11) set(CORE_GEN11_REVISIONS 0) set(LP_GEN11_REVISIONS 0) if(TESTS_GEN11) ADD_ITEM_FOR_CORE_TYPE("FAMILY_NAME" "TESTED" "GEN11" "ICLFamily") endif() if(SUPPORT_ICLLP) ADD_PRODUCT("SUPPORTED" "ICLLP" "IGFX_ICELAKE_LP") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED" "GEN11" "ICLLP" "LP") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED_2_0" "GEN11" "ICLLP" "LP") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED_VME" "GEN11" "ICLLP" "LP") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED_IMAGES" "GEN11" "ICLLP" "LP") if(TESTS_ICLLP) ADD_ITEM_FOR_CORE_TYPE("PLATFORMS" "TESTED" "GEN11" "ICLLP") ADD_PRODUCT("TESTED" "ICLLP" "IGFX_ICELAKE_LP") endif() endif() if(SUPPORT_LKF) ADD_PRODUCT("SUPPORTED" "LKF" "IGFX_LAKEFIELD") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED" "GEN11" "LKF" "LP") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED_IMAGES" "GEN11" "LKF" "LP") if(TESTS_LKF) ADD_ITEM_FOR_CORE_TYPE("PLATFORMS" "TESTED" "GEN11" "LKF") ADD_PRODUCT("TESTED" "LKF" "IGFX_LAKEFIELD") endif() endif() if(SUPPORT_EHL) ADD_PRODUCT("SUPPORTED" "EHL" "IGFX_ELKHARTLAKE") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED" "GEN11" "EHL" "LP") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED_IMAGES" "GEN11" "EHL" "LP") if(TESTS_EHL) ADD_ITEM_FOR_CORE_TYPE("PLATFORMS" "TESTED" "GEN11" "EHL") ADD_PRODUCT("TESTED" "EHL" "IGFX_ELKHARTLAKE") endif() endif() endif() if(SUPPORT_GEN12LP) set(LP_GEN12LP_REVISIONS 0) if(TESTS_GEN12LP) ADD_ITEM_FOR_CORE_TYPE("FAMILY_NAME" "TESTED" "GEN12LP" "TGLLPFamily") endif() if(SUPPORT_TGLLP) ADD_PRODUCT("SUPPORTED" "TGLLP" "IGFX_TIGERLAKE_LP") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED" "GEN12LP" "TGLLP" "LP") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED_IMAGES" "GEN12LP" "TGLLP" "LP") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED_AUX_TRANSLATION" "GEN12LP" "TGLLP" "LP") if(TESTS_TGLLP) ADD_ITEM_FOR_CORE_TYPE("PLATFORMS" "TESTED" "GEN12LP" "TGLLP") ADD_PRODUCT("TESTED" "TGLLP" "IGFX_TIGERLAKE_LP") endif() endif() if(SUPPORT_DG1) ADD_PRODUCT("SUPPORTED" "DG1" "IGFX_DG1") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED" "GEN12LP" "DG1" "LP") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED_IMAGES" "GEN12LP" "DG1" "LP") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED_AUX_TRANSLATION" "GEN12LP" "DG1" "LP") if(TESTS_DG1) ADD_ITEM_FOR_CORE_TYPE("PLATFORMS" "TESTED" "GEN12LP" "DG1") ADD_PRODUCT("TESTED" "DG1" "IGFX_DG1") endif() endif() if(SUPPORT_RKL) ADD_PRODUCT("SUPPORTED" "RKL" "IGFX_ROCKETLAKE") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED" "GEN12LP" "RKL" "LP") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED_IMAGES" "GEN12LP" "RKL" "LP") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED_AUX_TRANSLATION" "GEN12LP" "RKL" "LP") if(TESTS_RKL) ADD_ITEM_FOR_CORE_TYPE("PLATFORMS" "TESTED" "GEN12LP" "RKL") ADD_PRODUCT("TESTED" "RKL" "IGFX_ROCKETLAKE") endif() endif() if(SUPPORT_ADLS) ADD_PRODUCT("SUPPORTED" "ADLS" "IGFX_ALDERLAKE_S") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED" "GEN12LP" "ADLS" "LP") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED_IMAGES" "GEN12LP" "ADLS" "LP") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED_AUX_TRANSLATION" "GEN12LP" "ADLS" "LP") if(TESTS_ADLS) ADD_ITEM_FOR_CORE_TYPE("PLATFORMS" "TESTED" "GEN12LP" "ADLS") ADD_PRODUCT("TESTED" "ADLS" "IGFX_ALDERLAKE_S") endif() endif() if(SUPPORT_ADLP) ADD_PRODUCT("SUPPORTED" "ADLP" "IGFX_ALDERLAKE_P") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED" "GEN12LP" "ADLP" "LP") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED_IMAGES" "GEN12LP" "ADLP" "LP") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED_AUX_TRANSLATION" "GEN12LP" "ADLP" "LP") if(TESTS_ADLP) ADD_ITEM_FOR_CORE_TYPE("PLATFORMS" "TESTED" "GEN12LP" "ADLP") ADD_PRODUCT("TESTED" "ADLP" "IGFX_ALDERLAKE_P") endif() endif() endif() if(SUPPORT_XE_HP_CORE) SET_FLAGS_FOR("XE_HP_CORE") set(CORE_XE_HP_CORE_REVISIONS 4) if(TESTS_XE_HP_CORE) if(TESTS_XE_HP_SDV) ADD_ITEM_FOR_CORE_TYPE("FAMILY_NAME" "TESTED" "XE_HP_CORE" "XeHpFamily") else() set(TESTS_XE_HP_CORE FALSE) endif() endif() if(SUPPORT_XE_HP_SDV) ADD_PRODUCT("SUPPORTED" "XE_HP_SDV" "IGFX_XE_HP_SDV") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED" "XE_HP_CORE" "XE_HP_SDV" "CORE") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED_IMAGES" "XE_HP_CORE" "XE_HP_SDV" "CORE") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED_AUX_TRANSLATION" "XE_HP_CORE" "XE_HP_SDV" "CORE") set(PREFERRED_PLATFORM "XE_HP_SDV") if(TESTS_XE_HP_SDV) set(PREFERRED_FAMILY_NAME "XeHpFamily") ADD_ITEM_FOR_CORE_TYPE("PLATFORMS" "TESTED" "XE_HP_CORE" "XE_HP_SDV") ADD_PRODUCT("TESTED" "XE_HP_SDV" "IGFX_XE_HP_SDV") endif() endif() endif() if(SUPPORT_XE_HPG_CORE) set(CORE_XE_HPG_CORE_REVISIONS 0) if(TESTS_XE_HPG_CORE) ADD_ITEM_FOR_CORE_TYPE("FAMILY_NAME" "TESTED" "XE_HPG_CORE" "XE_HPG_COREFamily") endif() if(SUPPORT_DG2) set(DG2_XE_HPG_CORE_REVISIONS 0) ADD_PRODUCT("SUPPORTED" "DG2" "IGFX_DG2") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED" "XE_HPG_CORE" "DG2" "DG2") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED_AUX_TRANSLATION" "XE_HPG_CORE" "DG2" "DG2") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED_IMAGES" "XE_HPG_CORE" "DG2" "DG2") if(TESTS_DG2) ADD_ITEM_FOR_CORE_TYPE("PLATFORMS" "TESTED" "XE_HPG_CORE" "DG2") ADD_PRODUCT("TESTED" "DG2" "IGFX_DG2") endif() endif() endif() if(SUPPORT_XE_HPC_CORE) set(XE_HPC_CORE_TEST_KERNELS_BLOCKLIST "CopyBuffer_simd8.cl") set(CORE_XE_HPC_CORE_REVISIONS 0 3 30 157) if(TESTS_XE_HPC_CORE) ADD_ITEM_FOR_CORE_TYPE("FAMILY_NAME" "TESTED" "XE_HPC_CORE" "XE_HPC_COREFamily") endif() if(SUPPORT_PVC) ADD_PRODUCT("SUPPORTED" "PVC" "IGFX_PVC") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED" "XE_HPC_CORE" "PVC" "CORE") ADD_PLATFORM_FOR_CORE_TYPE("SUPPORTED_AUX_TRANSLATION" "XE_HPC_CORE" "PVC" "CORE") if(TESTS_PVC) ADD_ITEM_FOR_CORE_TYPE("PLATFORMS" "TESTED" "XE_HPC_CORE" "PVC") ADD_PRODUCT("TESTED" "PVC" "IGFX_PVC") endif() endif() endif() compute-runtime-22.14.22890/cmake/setup_ult_global_flags.cmake000066400000000000000000000021431422164147700241540ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # # disable optimizations for ults if(UNIX) string(REPLACE "-O2" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) #disable fortify source as this requires optimization to be on string(REPLACE "-Wp,-D_FORTIFY_SOURCE=2" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) string(REPLACE "-D_FORTIFY_SOURCE=2" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O0") set(CMAKE_CXX_FLAGS_RELEASEINTERNAL "${CMAKE_CXX_FLAGS_RELEASEINTERNAL} -O0") set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O0") set(CMAKE_C_FLAGS_RELEASEINTERNAL "${CMAKE_C_FLAGS_RELEASEINTERNAL} -O0") endif() if(WIN32) string(REPLACE "/O2" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) string(REPLACE "/O2" "/Od" CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE}) string(REPLACE "/O2" "/Od" CMAKE_C_FLAGS_RELEASE ${CMAKE_C_FLAGS_RELEASE}) string(REPLACE "/O2" "/Od" CMAKE_CXX_FLAGS_RELEASEINTERNAL ${CMAKE_CXX_FLAGS_RELEASEINTERNAL}) string(REPLACE "/O2" "/Od" CMAKE_C_FLAGS_RELEASEINTERNAL ${CMAKE_C_FLAGS_RELEASEINTERNAL}) endif() compute-runtime-22.14.22890/config.h.in000066400000000000000000000006041422164147700173750ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #ifndef CONFIG_H #define CONFIG_H #cmakedefine USE_CL_CACHE #if defined(USE_CL_CACHE) static const bool clCacheEnabled = true; #else static const bool clCacheEnabled = false; #endif #cmakedefine CL_CACHE_LOCATION "${CL_CACHE_LOCATION}" #cmakedefine NEO_ARCH "${NEO_ARCH}" #endif /* CONFIG_H */ compute-runtime-22.14.22890/driver_version.h.in000066400000000000000000000004271422164147700211730ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #ifndef DRIVER_VERSION_H #define DRIVER_VERSION_H #cmakedefine NEO_OCL_DRIVER_VERSION ${NEO_OCL_DRIVER_VERSION} #cmakedefine NEO_REVISION "${NEO_REVISION}" #endif /* DRIVER_VERSION_H */ compute-runtime-22.14.22890/igc.opencl.h.in000066400000000000000000000006101422164147700201460ustar00rootroot00000000000000/* * Copyright (C) 2018 Intel Corporation * * SPDX-License-Identifier: MIT * */ #ifndef COMPILER_SETUP_H #define COMPILER_SETUP_H #cmakedefine IGC_LIBRARY_NAME "${CMAKE_SHARED_LIBRARY_PREFIX}${IGC_LIBRARY_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}" #cmakedefine FCL_LIBRARY_NAME "${CMAKE_SHARED_LIBRARY_PREFIX}${FCL_LIBRARY_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}" #endif /* COMPILER_SETUP_H */ compute-runtime-22.14.22890/level_zero/000077500000000000000000000000001422164147700175205ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/CMakeLists.txt000066400000000000000000000553401422164147700222670ustar00rootroot00000000000000# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # include(cmake/verify_l0_support.cmake) if(BUILD_WITH_L0) set(TARGET_NAME_L0 ze_intel_gpu) # Level Zero third party detection if(DEFINED LEVEL_ZERO_ROOT) get_filename_component(LEVEL_ZERO_ROOT "${LEVEL_ZERO_ROOT}" ABSOLUTE) else() get_filename_component(LEVEL_ZERO_ROOT_tmp "${NEO_SOURCE_DIR}/../level_zero" ABSOLUTE) # Level Zero Headers if read from the git repo are in include/core & include/tools. # To support the installation path of level_zero headers which is include/level_zero/* # the header files are combined into the path include/level_zero/* in the commands below. if(IS_DIRECTORY "${LEVEL_ZERO_ROOT_tmp}") set(CUSTOM_L0_INCLUDE_PATH "${LEVEL_ZERO_ROOT_tmp}/include/level_zero/") file(GLOB LEVEL_ZERO_SOURCE_HEADERS "${LEVEL_ZERO_ROOT_tmp}/include/*.h" ) file(MAKE_DIRECTORY ${CUSTOM_L0_INCLUDE_PATH}) file(COPY ${LEVEL_ZERO_SOURCE_HEADERS} DESTINATION ${CUSTOM_L0_INCLUDE_PATH}) set(LEVEL_ZERO_ROOT "${LEVEL_ZERO_ROOT_tmp}") endif() endif() if(NOT DEFINED DONT_USE_PREBUILT_KERNELS_L0) set(DONT_USE_PREBUILT_KERNELS_L0 FALSE) endif() project(level-zero-gpu VERSION ${NEO_L0_VERSION_MAJOR}.${NEO_L0_VERSION_MINOR}.${NEO_VERSION_BUILD}) message(STATUS "Level Zero driver version: ${PROJECT_VERSION}") include(cmake/source_tree.cmake) list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") find_package(LevelZero) if(NOT LevelZero_FOUND) message(STATUS "Level zero headers not found") message(STATUS "Skipping level zero") set(BUILD_WITH_L0 FALSE PARENT_SCOPE) return() endif() add_definitions(-DL0_PROJECT_VERSION_MAJOR="${PROJECT_VERSION_MAJOR}") add_definitions(-DL0_PROJECT_VERSION_MINOR="${PROJECT_VERSION_MINOR}") add_definitions(-DL0_PROJECT_VERSION_PATCH="${PROJECT_VERSION_PATCH}") add_definitions(-DNEO_VERSION_BUILD="${NEO_VERSION_BUILD}") add_definitions(-DZE_ENABLE_OCL_INTEROP=1) file(WRITE "${CMAKE_BINARY_DIR}/VERSION" "${PROJECT_VERSION}") #Define a path for custom commands to work around MSVC set(CUSTOM_COMMAND_BINARY_DIR ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) if(MSVC) #MSVC implicitly adds $ to the output path if(NOT "${CMAKE_GENERATOR}" STREQUAL "Ninja") set(CUSTOM_COMMAND_BINARY_DIR ${CUSTOM_COMMAND_BINARY_DIR}/$) endif() endif() if(UNIX) # Netlink and Generic Netlink find_path(LIBGENL_INCLUDE_DIR netlink/genl/genl.h PATH_SUFFIXES libnl3) if(LIBGENL_INCLUDE_DIR) message(STATUS "LibGenl headers directory: ${LIBGENL_INCLUDE_DIR}") include_directories(SYSTEM ${LIBGENL_INCLUDE_DIR}) set(LIBGENL_FOUND TRUE) else() message(STATUS "LibGenl headers not available. Building without") endif() endif() if(UNIX) # Firmware Update Library get_filename_component(IGSC_DIR_tmp "${NEO_SOURCE_DIR}/../igsc" ABSOLUTE) if(EXISTS "${IGSC_DIR_tmp}/lib/cmake") find_package(igsc HINTS "${IGSC_DIR_tmp}/lib/cmake") else() find_package(igsc) endif() if(igsc_FOUND) add_definitions(-DIGSC_PRESENT=1) if(EXISTS "${IGSC_DIR_tmp}/lib/cmake") get_filename_component(igsc_INCLUDE_DIR "${NEO_SOURCE_DIR}/../igsc/include" ABSOLUTE) endif() message(STATUS "igsc Library headers directory: ${igsc_INCLUDE_DIR}") message(STATUS "igsc version: ${igsc_VERSION}") include_directories(SYSTEM ${igsc_INCLUDE_DIR}) else() message(STATUS "igsc Library headers not available. Building without") endif() endif() if(UNIX) # Load GNUInstallDirs to determine install targets for Linux packages include(GNUInstallDirs) endif() if(NOT MSVC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fpermissive -fPIC") endif() set(L0_ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}") if(NOT DEFINED COMPUTE_RUNTIME_DIR) get_filename_component(COMPUTE_RUNTIME_DIR ${CMAKE_CURRENT_SOURCE_DIR}/.. ABSOLUTE) endif() # Option to disable tests option(${PROJECT_NAME}_BUILD_TESTS "Build unit tests." ON) # Copy third_party_binaries to output BIN folder add_custom_target(copy_third_party_files) set_target_properties(copy_third_party_files PROPERTIES FOLDER ${TARGET_NAME_L0}) if(DEFINED NEO__IGC_TARGETS) if(WIN32) add_dependencies(copy_third_party_files copy_compiler_files) else() add_dependencies(copy_third_party_files ${NEO__IGC_TARGETS}) foreach(TARGET_tmp ${NEO__IGC_TARGETS}) if(UNIX) add_custom_command( TARGET copy_third_party_files PRE_BUILD COMMAND ${CMAKE_COMMAND} -E make_directory ${CUSTOM_COMMAND_BINARY_DIR} COMMAND ${CMAKE_COMMAND} -E copy_if_different $ "${CUSTOM_COMMAND_BINARY_DIR}/" ) endif() add_custom_command( TARGET copy_third_party_files PRE_BUILD COMMAND ${CMAKE_COMMAND} -E make_directory ${CUSTOM_COMMAND_BINARY_DIR} COMMAND ${CMAKE_COMMAND} -E copy_if_different $ "${CUSTOM_COMMAND_BINARY_DIR}/" ) endforeach() endif() else() if(DEFINED IGC_DIR) # Only copy igc libs if available message(STATUS "L0::Igc Dir: ${IGC_DIR}") add_custom_command( TARGET copy_third_party_files PRE_BUILD COMMAND ${CMAKE_COMMAND} -E make_directory ${CUSTOM_COMMAND_BINARY_DIR} COMMAND ${CMAKE_COMMAND} -E copy_directory "${NEO__IGC_LIBRARY_PATH}" "${CUSTOM_COMMAND_BINARY_DIR}/" DEPENDS "${NEO__IGC_LIBRARY_PATH}" ) endif() endif() if(TARGET ${GMM_TARGET_NAME}) message(STATUS "L0::Gmm Target: ${GMM_TARGET_NAME}") add_dependencies(copy_third_party_files ${GMM_TARGET_NAME}) if(UNIX) add_custom_command( TARGET copy_third_party_files PRE_BUILD COMMAND ${CMAKE_COMMAND} -E make_directory ${CUSTOM_COMMAND_BINARY_DIR} COMMAND ${CMAKE_COMMAND} -E copy_if_different "$" "${CUSTOM_COMMAND_BINARY_DIR}/" ) endif() add_custom_command( TARGET copy_third_party_files PRE_BUILD COMMAND ${CMAKE_COMMAND} -E make_directory ${CUSTOM_COMMAND_BINARY_DIR} COMMAND ${CMAKE_COMMAND} -E copy_if_different "$" "${CUSTOM_COMMAND_BINARY_DIR}/" ) else() if(DEFINED GMM_DIR) # Only copy gmm libs if available message(STATUS "L0::Gmm Dir: ${GMM_DIR}") add_custom_command( TARGET copy_third_party_files PRE_BUILD COMMAND ${CMAKE_COMMAND} -E make_directory ${CUSTOM_COMMAND_BINARY_DIR} COMMAND ${CMAKE_COMMAND} -E copy_directory "${NEO__GMM_LIBRARY_PATH}" "${CUSTOM_COMMAND_BINARY_DIR}/" DEPENDS "${NEO__GMM_LIBRARY_PATH}" ) endif() endif() # Get build type string(TOLOWER "${CMAKE_BUILD_TYPE}" BUILD_TYPE) if("${BUILD_TYPE}" STREQUAL "debug") add_definitions(-DZE_DEBUG) endif() include_directories(${COMPUTE_RUNTIME_DIR}/third_party/opencl_headers) if(CUSTOM_L0_INCLUDE_PATH) include_directories(${CUSTOM_L0_INCLUDE_PATH}/../) else() include_directories(${LevelZero_INCLUDE_DIRS}) endif() include_directories(${NEO_SOURCE_DIR}/level_zero/api/experimental${BRANCH_DIR_SUFFIX}) include_directories(${NEO_SOURCE_DIR}/shared/source/compiler_interface/compiler_options${BRANCH_DIR_SUFFIX}) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/core/source/hw_helpers${BRANCH_DIR_SUFFIX}) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/core/source/cmdlist/cmdlist_extended${BRANCH_DIR_SUFFIX}) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/core/source/cmdqueue/cmdqueue_extended${BRANCH_DIR_SUFFIX}) include_directories("${CMAKE_CURRENT_SOURCE_DIR}") include_directories(ddi${BRANCH_DIR_SUFFIX}) include_directories(tools/source) include_directories(experimental${BRANCH_DIR_SUFFIX}source) include_directories(experimental/source/tracing) # Create our shared library/DLL configure_file(ze_intel_gpu_version.h.in ${NEO_BUILD_DIR}/ze_intel_gpu_version.h) add_library(${TARGET_NAME_L0} SHARED ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${NEO_SHARED_DIRECTORY}/dll/create_command_stream.cpp ${NEO_SHARED_DIRECTORY}/dll/direct_submission_controller_enabled.cpp ${NEO_SHARED_DIRECTORY}/dll/options_dll.cpp ${NEO_SHARED_DIRECTORY}/gmm_helper/page_table_mngr.cpp ${NEO_SHARED_DIRECTORY}/gmm_helper/resource_info.cpp ${NEO_SHARED_DIRECTORY}/helpers/abort.cpp ${NEO_SHARED_DIRECTORY}/helpers/debug_helpers.cpp ${NEO_SHARED_DIRECTORY}/utilities/cpuintrinsics.cpp ${NEO_SHARED_DIRECTORY}/utilities/debug_settings_reader_creator.cpp ${NEO_SHARED_DIRECTORY}/utilities/io_functions.cpp ) target_sources(${TARGET_NAME_L0} PRIVATE ${NEO_SHARED_DIRECTORY}/aub/aub_stream_interface.cpp ${NEO_SHARED_DIRECTORY}/built_ins/sip_init.cpp ${NEO_SHARED_DIRECTORY}/dll/create_deferred_deleter.cpp ${NEO_SHARED_DIRECTORY}/dll/create_memory_manager_${DRIVER_MODEL}.cpp ${NEO_SHARED_DIRECTORY}/dll/create_tbx_sockets.cpp ${NEO_SHARED_DIRECTORY}/dll/get_devices.cpp ${NEO_SHARED_DIRECTORY}/dll/source_level_debugger_dll.cpp ) if(WIN32) target_sources(${TARGET_NAME_L0} PRIVATE ${NEO_SHARED_DIRECTORY}/dll/windows/environment_variables.cpp ${NEO_SHARED_DIRECTORY}/dll/windows/options_windows.cpp ${NEO_SHARED_DIRECTORY}/dll/windows/os_interface.cpp ${NEO_SHARED_DIRECTORY}/gmm_helper/windows/gmm_memory_base.cpp ${NEO_SHARED_DIRECTORY}/gmm_helper/windows/gmm_memory.cpp ${NEO_SHARED_DIRECTORY}/os_interface/windows/sys_calls.cpp ${NEO_SHARED_DIRECTORY}/os_interface/windows/os_interface_win.cpp ${NEO_SHARED_DIRECTORY}/os_interface/windows/os_memory_virtual_alloc.cpp ${NEO_SHARED_DIRECTORY}/os_interface/windows/wddm/wddm_calls.cpp ${NEO_SHARED_DIRECTORY}/os_interface/windows/wddm/wddm_create.cpp ) target_link_libraries(${TARGET_NAME_L0} dxgi ) if(DEFINED L0_DLL_RC_FILE) message(STATUS "Setting L0 Resource Info") target_sources(${TARGET_NAME_L0} PRIVATE ${L0_DLL_RC_FILE}) endif() else() target_sources(${TARGET_NAME_L0} PRIVATE ${NEO_SHARED_DIRECTORY}/os_interface/linux/sys_calls_linux.cpp ${NEO_SHARED_DIRECTORY}/dll/devices${BRANCH_DIR_SUFFIX}devices.inl ${NEO_SHARED_DIRECTORY}/dll/devices${BRANCH_DIR_SUFFIX}devices_additional.inl ${NEO_SHARED_DIRECTORY}/dll/devices/devices_base.inl ${NEO_SHARED_DIRECTORY}/dll/linux/drm_neo_create.cpp ${NEO_SHARED_DIRECTORY}/dll/linux/options_linux.cpp ${NEO_SHARED_DIRECTORY}/dll/linux/os_interface.cpp ${NEO_SHARED_DIRECTORY}/os_interface/linux/gmm_interface_linux.cpp ) if(NOT DISABLE_WDDM_LINUX) target_sources(${TARGET_NAME_L0} PRIVATE ${NEO_SHARED_DIRECTORY}/gmm_helper/windows/gmm_memory.cpp ${NEO_SHARED_DIRECTORY}/os_interface/windows/wddm/wddm_create.cpp ) endif() endif() if(DEFINED AUB_STREAM_PROJECT_NAME) target_sources(${TARGET_NAME_L0} PRIVATE $ ) endif() if(TARGET ${BUILTINS_SPIRV_LIB_NAME}) target_sources(${TARGET_NAME_L0} PRIVATE $ ) endif() if(NOT ${DONT_USE_PREBUILT_KERNELS_L0}) message(STATUS "Prebuilt kernels are linked to Level Zero.") target_sources(${TARGET_NAME_L0} PRIVATE $ $ ) endif() include_directories(${CMAKE_CURRENT_SOURCE_DIR}/source/inc) target_compile_definitions(${TARGET_NAME_L0} PUBLIC ZE_MAKEDLL ) get_property(COMPUTE_RUNTIME_DEFINITIONS TARGET ${NEO_SHARED_RELEASE_LIB_NAME} PROPERTY COMPILE_DEFINITIONS ) target_compile_definitions(${TARGET_NAME_L0} PRIVATE ${COMPUTE_RUNTIME_DEFINITIONS} ) if(UNIX) target_include_directories(${TARGET_NAME_L0} PUBLIC ${L0_ROOT_DIR}/core/source/os_interface/linux ${I915_INCLUDES_DIR} ) target_include_directories(${TARGET_NAME_L0} PRIVATE ${NEO_SHARED_DIRECTORY}/dll/devices${BRANCH_DIR_SUFFIX} ) else() target_include_directories(${TARGET_NAME_L0} PUBLIC ${L0_ROOT_DIR}/core/source/os_interface/windows ) if(CMAKE_SIZEOF_VOID_P EQUAL 4) set(L0_BITNESS_SUFIX 32) elseif(CMAKE_SIZEOF_VOID_P EQUAL 8) set(L0_BITNESS_SUFIX 64) endif() set_target_properties(${TARGET_NAME_L0} PROPERTIES DEBUG_OUTPUT_NAME "${TARGET_NAME_L0}${L0_BITNESS_SUFIX}" RELEASE_OUTPUT_NAME "${TARGET_NAME_L0}${L0_BITNESS_SUFIX}" RELEASEINTERNAL_OUTPUT_NAME "${TARGET_NAME_L0}${L0_BITNESS_SUFIX}" OUTPUT_NAME "${TARGET_NAME_L0}${L0_BITNESS_SUFIX}" ) add_dependencies(${TARGET_NAME_L0} ${GMM_TARGET_NAME}) target_sources(${TARGET_NAME_L0} PRIVATE ${NEO_SHARED_DIRECTORY}/os_interface/windows/gmm_interface_win.cpp ) endif() add_subdirectory_unique(api) add_subdirectory_unique(source) set(L0_RELEASE_LIB_NAME "${TARGET_NAME_L0}_lib") if(NOT NEO_SKIP_L0_UNIT_TESTS) if(DONT_CARE_OF_VIRTUALS) set(L0_MOCKABLE_LIB_NAME "${TARGET_NAME_L0}_lib") else() set(L0_MOCKABLE_LIB_NAME "${TARGET_NAME_L0}_mockable") endif() endif() function(generate_l0_lib LIB_NAME MOCKABLE) set(L0_STATIC_LIB_NAME ${LIB_NAME}) add_library(${LIB_NAME} OBJECT ${L0_RUNTIME_SOURCES} ) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/core/source "${NEO_BUILD_DIR}/${LIB_NAME}/core/source") add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/experimental${BRANCH_DIR_SUFFIX}source "${NEO_BUILD_DIR}/${LIB_NAME}/experimental${BRANCH_DIR_SUFFIX}source") add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/experimental/source/tracing "${NEO_BUILD_DIR}/${LIB_NAME}/experimental/tracing") add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/source "${NEO_BUILD_DIR}/${LIB_NAME}tools/source") append_sources_from_properties(L0_RUNTIME_SOURCES L0_API L0_SOURCES_LINUX L0_SOURCES_WINDOWS L0_SRCS_CACHE_RESERVATION L0_SRCS_COMPILER_INTERFACE L0_SRCS_DEBUGGER L0_SRCS_DRIVER L0_SRCS_OCLOC_SHARED ) if(WIN32) append_sources_from_properties(L0_RUNTIME_SOURCES L0_SRCS_CACHE_RESERVATION_WINDOWS L0_SRCS_DEBUGGER_WINDOWS ) else() append_sources_from_properties(L0_RUNTIME_SOURCES L0_SRCS_CACHE_RESERVATION_LINUX L0_SRCS_DEBUGGER_LINUX ) endif() target_sources(${LIB_NAME} PRIVATE ${L0_RUNTIME_SOURCES}) if(${MOCKABLE}) get_property(COMPUTE_RUNTIME_DEFINITIONS TARGET ${NEO_SHARED_MOCKABLE_LIB_NAME} PROPERTY COMPILE_DEFINITIONS ) target_compile_definitions(${LIB_NAME} PUBLIC MOCKABLE_VIRTUAL=virtual PUBLIC ${COMPUTE_RUNTIME_DEFINITIONS} ) else() get_property(COMPUTE_RUNTIME_DEFINITIONS TARGET ${NEO_SHARED_RELEASE_LIB_NAME} PROPERTY COMPILE_DEFINITIONS ) target_compile_definitions(${LIB_NAME} PUBLIC MOCKABLE_VIRTUAL= PUBLIC ${COMPUTE_RUNTIME_DEFINITIONS} ) endif() set_property(TARGET ${LIB_NAME} APPEND_STRING PROPERTY COMPILE_FLAGS ${ASAN_FLAGS} ${TSAN_FLAGS}) set_target_properties(${LIB_NAME} PROPERTIES FOLDER ${TARGET_NAME_L0}) target_include_directories(${LIB_NAME} PUBLIC ${ENGINE_NODE_DIR} ${NEO__GMM_INCLUDE_DIR} ${CIF_BASE_DIR} ${IGC_OCL_ADAPTOR_DIR} ${NEO__IGC_INCLUDE_DIR} ${KHRONOS_HEADERS_DIR} ) if(WIN32 OR NOT DISABLE_WDDM_LINUX) target_include_directories(${LIB_NAME} PUBLIC ${WDK_INCLUDE_PATHS}) endif() if(WIN32) target_include_directories(${LIB_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/os_interface/windows ) else() target_include_directories(${LIB_NAME} PUBLIC ${I915_INCLUDES_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/os_interface/linux ) endif() create_project_source_tree(${LIB_NAME}) endfunction() if(UNIX) option(L0_INSTALL_UDEV_RULES "Install udev rules. An attempt to automatically determine the proper location will be made if UDEV_RULES_DIR is not set." OFF) if(L0_INSTALL_UDEV_RULES) if(DEFINED UDEV_RULES_DIR) set(UDEV_RULES_DIR_FOUND TRUE) else() include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/${BRANCH_TYPE}/UdevRulesDir.cmake) endif() if(NOT UDEV_RULES_DIR_FOUND) message(SEND_ERROR "udev rule install requested but no rules directory found") endif() endif() endif() if(DONT_CARE_OF_VIRTUALS) generate_l0_lib(${L0_RELEASE_LIB_NAME} TRUE) else() generate_l0_lib(${L0_RELEASE_LIB_NAME} FALSE) if(NOT NEO_SKIP_L0_UNIT_TESTS) generate_l0_lib(${L0_MOCKABLE_LIB_NAME} TRUE) endif() endif() append_sources_from_properties(L0_SHARED_LIB_SRCS L0_SRCS_DLL NEO_CORE_SRCS_LINK NEO_SRCS_ENABLE_CORE) target_sources(${TARGET_NAME_L0} PRIVATE $ ${L0_SHARED_LIB_SRCS}) target_link_libraries(${TARGET_NAME_L0} ${NEO_SHARED_RELEASE_LIB_NAME} ${NEO_EXTRA_LIBS} ${ASAN_LIBS} ${TSAN_LIBS} ) if(UNIX) target_link_libraries(${TARGET_NAME_L0} ${GMM_LINK_NAME}) set_property(TARGET ${TARGET_NAME_L0} APPEND_STRING PROPERTY LINK_FLAGS " -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/core/source/dll/linux/ze.exports" ) endif() create_source_tree(${TARGET_NAME_L0} ${L0_ROOT_DIR}/..) set_property(TARGET ${TARGET_NAME_L0} APPEND_STRING PROPERTY COMPILE_FLAGS ${ASAN_FLAGS}) set_target_properties(${TARGET_NAME_L0} PROPERTIES FOLDER ${TARGET_NAME_L0} VERSION "${PROJECT_VERSION}" SOVERSION "${PROJECT_VERSION_MAJOR}" ) if(UNIX) if(NEO_BUILD_DEBUG_SYMBOLS_PACKAGE) get_filename_component(lib_file_name $ NAME_WE) set(symbols_file_name ${lib_file_name}.debug) set(debug_symbols_target_name "${STRIP_SYMBOLS_TARGET}_${TARGET_NAME_L0}") add_custom_target(${debug_symbols_target_name} COMMAND sh -c "objcopy --only-keep-debug ${lib_file_name} ${symbols_file_name}" COMMAND sh -c "strip -g ${lib_file_name}" COMMAND sh -c "objcopy --add-gnu-debuglink=${symbols_file_name} ${lib_file_name}" ) add_dependencies(${debug_symbols_target_name} ${TARGET_NAME_L0}) add_dependencies(${STRIP_SYMBOLS_TARGET} ${debug_symbols_target_name}) set_property(GLOBAL APPEND PROPERTY DEBUG_SYMBOL_FILES "${symbols_file_name}") endif() install(TARGETS ${TARGET_NAME_L0} LIBRARY PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT ${PROJECT_NAME} NAMELINK_SKIP ) else() install(TARGETS ${TARGET_NAME_L0} RUNTIME DESTINATION Release/lh64 CONFIGURATIONS Release ) install(TARGETS ${TARGET_NAME_L0} RUNTIME DESTINATION Release-Internal/lh64 CONFIGURATIONS ReleaseInternal ) install(TARGETS ${TARGET_NAME_L0} RUNTIME DESTINATION Debug/lh64 CONFIGURATIONS Debug ) endif() if(NOT NEO_SKIP_L0_UNIT_TESTS) add_subdirectory_unique(core/test/common) add_subdirectory_unique(core/test/unit_tests) add_subdirectory_unique(core/test/aub_tests) add_subdirectory_unique(tools/test/unit_tests) add_subdirectory_unique(experimental/test/unit_tests) else() hide_subdir(core/test/common) hide_subdir(core/test/unit_tests) hide_subdir(core/test/aub_tests) hide_subdir(tools/test/unit_tests) hide_subdir(experimental/test/unit_tests) endif() if(NOT NEO_SKIP_L0_BLACK_BOX_TESTS) add_subdirectory_unique(core/test/black_box_tests) add_subdirectory_unique(tools/test/black_box_tests) else() hide_subdir(core/test/black_box_tests) hide_subdir(tools/test/black_box_tests) endif() add_subdirectories() if(UNIX AND NEO_BUILD_L0_PACKAGE) message(STATUS "Building LevelZero package") set_property(GLOBAL APPEND PROPERTY NEO_L0_COMPONENTS_LIST ${PROJECT_NAME}) set(L0_PACKAGE_VERSION_DEB "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}") set(L0_PACKAGE_VERSION_RPM "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}") if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/cmake/${BRANCH_TYPE}/cpack.cmake) include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/${BRANCH_TYPE}/cpack.cmake) endif() endif() else() message(STATUS "Skipping level zero") set(BUILD_WITH_L0 FALSE PARENT_SCOPE) endif() compute-runtime-22.14.22890/level_zero/api/000077500000000000000000000000001422164147700202715ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/api/CMakeLists.txt000066400000000000000000000014251422164147700230330ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # add_subdirectory(core) add_subdirectory(experimental${BRANCH_DIR_SUFFIX}) add_subdirectory(experimental/tracing) add_subdirectory(tools) add_subdirectory(sysman) add_subdirectory(extensions${BRANCH_DIR_SUFFIX}) add_subdirectory(extensions/public) set(L0_API "") append_sources_from_properties(L0_API L0_SRCS_API L0_EXPERIMENTAL_API L0_EXPERIMENTAL_TRACING_API L0_SYSMAN_API L0_TOOLS_API L0_EXTENSIONS_SRCS_API L0_PUBLIC_EXTENSIONS_SRCS_API ) set_property(GLOBAL PROPERTY L0_API ${L0_API}) compute-runtime-22.14.22890/level_zero/api/core/000077500000000000000000000000001422164147700212215ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/api/core/CMakeLists.txt000066400000000000000000000015211422164147700237600ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_API ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/ze_barrier.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ze_cmdlist.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ze_cmdqueue.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ze_context.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ze_copy.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ze_core_loader.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ze_device.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ze_driver.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ze_event.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ze_fence.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ze_image.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ze_memory.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ze_module.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ze_sampler.cpp ) set_property(GLOBAL PROPERTY L0_SRCS_API ${L0_SRCS_API}) compute-runtime-22.14.22890/level_zero/api/core/ze_barrier.cpp000066400000000000000000000021371422164147700240540ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/cmdlist/cmdlist.h" #include ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendBarrier( ze_command_list_handle_t hCommandList, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return L0::CommandList::fromHandle(hCommandList)->appendBarrier(hSignalEvent, numWaitEvents, phWaitEvents); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendMemoryRangesBarrier( ze_command_list_handle_t hCommandList, uint32_t numRanges, const size_t *pRangeSizes, const void **pRanges, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return L0::CommandList::fromHandle(hCommandList)->appendMemoryRangesBarrier(numRanges, pRangeSizes, pRanges, hSignalEvent, numWaitEvents, phWaitEvents); } ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceSystemBarrier( ze_device_handle_t hDevice) { return L0::Device::fromHandle(hDevice)->systemBarrier(); } compute-runtime-22.14.22890/level_zero/api/core/ze_cmdlist.cpp000066400000000000000000000043101422164147700240600ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/cmdlist/cmdlist.h" #include "level_zero/core/source/context/context.h" #include ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListCreate( ze_context_handle_t hContext, ze_device_handle_t hDevice, const ze_command_list_desc_t *desc, ze_command_list_handle_t *phCommandList) { return L0::Context::fromHandle(hContext)->createCommandList(hDevice, desc, phCommandList); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListCreateImmediate( ze_context_handle_t hContext, ze_device_handle_t hDevice, const ze_command_queue_desc_t *altdesc, ze_command_list_handle_t *phCommandList) { return L0::Context::fromHandle(hContext)->createCommandListImmediate(hDevice, altdesc, phCommandList); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListDestroy( ze_command_list_handle_t hCommandList) { return L0::CommandList::fromHandle(hCommandList)->destroy(); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListClose( ze_command_list_handle_t hCommandList) { return L0::CommandList::fromHandle(hCommandList)->close(); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListReset( ze_command_list_handle_t hCommandList) { return L0::CommandList::fromHandle(hCommandList)->reset(); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendWriteGlobalTimestamp( ze_command_list_handle_t hCommandList, uint64_t *dstptr, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return L0::CommandList::fromHandle(hCommandList)->appendWriteGlobalTimestamp(dstptr, hSignalEvent, numWaitEvents, phWaitEvents); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendQueryKernelTimestamps( ze_command_list_handle_t hCommandList, uint32_t numEvents, ze_event_handle_t *phEvents, void *dstptr, const size_t *pOffsets, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return L0::CommandList::fromHandle(hCommandList)->appendQueryKernelTimestamps(numEvents, phEvents, dstptr, pOffsets, hSignalEvent, numWaitEvents, phWaitEvents); } compute-runtime-22.14.22890/level_zero/api/core/ze_cmdqueue.cpp000066400000000000000000000024021422164147700242310ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/cmdqueue/cmdqueue.h" #include "level_zero/core/source/context/context.h" #include ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandQueueCreate( ze_context_handle_t hContext, ze_device_handle_t hDevice, const ze_command_queue_desc_t *desc, ze_command_queue_handle_t *phCommandQueue) { return L0::Context::fromHandle(hContext)->createCommandQueue(hDevice, desc, phCommandQueue); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandQueueDestroy( ze_command_queue_handle_t hCommandQueue) { return L0::CommandQueue::fromHandle(hCommandQueue)->destroy(); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandQueueExecuteCommandLists( ze_command_queue_handle_t hCommandQueue, uint32_t numCommandLists, ze_command_list_handle_t *phCommandLists, ze_fence_handle_t hFence) { return L0::CommandQueue::fromHandle(hCommandQueue)->executeCommandLists(numCommandLists, phCommandLists, hFence, true); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandQueueSynchronize( ze_command_queue_handle_t hCommandQueue, uint64_t timeout) { return L0::CommandQueue::fromHandle(hCommandQueue)->synchronize(timeout); } compute-runtime-22.14.22890/level_zero/api/core/ze_context.cpp000066400000000000000000000110501422164147700241040ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/context/context.h" #include "level_zero/core/source/driver/driver_handle.h" #include ZE_APIEXPORT ze_result_t ZE_APICALL zeContextCreate( ze_driver_handle_t hDriver, const ze_context_desc_t *desc, ze_context_handle_t *phContext) { return L0::DriverHandle::fromHandle(hDriver)->createContext(desc, 0u, nullptr, phContext); } ZE_APIEXPORT ze_result_t ZE_APICALL zeContextCreateEx( ze_driver_handle_t hDriver, const ze_context_desc_t *desc, uint32_t numDevices, ze_device_handle_t *phDevices, ze_context_handle_t *phContext) { return L0::DriverHandle::fromHandle(hDriver)->createContext(desc, numDevices, phDevices, phContext); } ZE_APIEXPORT ze_result_t ZE_APICALL zeContextDestroy(ze_context_handle_t hContext) { return L0::Context::fromHandle(hContext)->destroy(); } ZE_APIEXPORT ze_result_t ZE_APICALL zeContextGetStatus(ze_context_handle_t hContext) { return L0::Context::fromHandle(hContext)->getStatus(); } ZE_APIEXPORT ze_result_t ZE_APICALL zeVirtualMemReserve( ze_context_handle_t hContext, const void *pStart, size_t size, void **pptr) { return L0::Context::fromHandle(hContext)->reserveVirtualMem(pStart, size, pptr); } ZE_APIEXPORT ze_result_t ZE_APICALL zeVirtualMemFree( ze_context_handle_t hContext, const void *ptr, size_t size) { return L0::Context::fromHandle(hContext)->freeVirtualMem(ptr, size); } ZE_APIEXPORT ze_result_t ZE_APICALL zeVirtualMemQueryPageSize( ze_context_handle_t hContext, ze_device_handle_t hDevice, size_t size, size_t *pagesize) { return L0::Context::fromHandle(hContext)->queryVirtualMemPageSize(hDevice, size, pagesize); } ZE_APIEXPORT ze_result_t ZE_APICALL zePhysicalMemCreate( ze_context_handle_t hContext, ze_device_handle_t hDevice, ze_physical_mem_desc_t *desc, ze_physical_mem_handle_t *phPhysicalMemory) { return L0::Context::fromHandle(hContext)->createPhysicalMem(hDevice, desc, phPhysicalMemory); } ZE_APIEXPORT ze_result_t ZE_APICALL zePhysicalMemDestroy( ze_context_handle_t hContext, ze_physical_mem_handle_t hPhysicalMemory) { return L0::Context::fromHandle(hContext)->destroyPhysicalMem(hPhysicalMemory); } ZE_APIEXPORT ze_result_t ZE_APICALL zeVirtualMemMap( ze_context_handle_t hContext, const void *ptr, size_t size, ze_physical_mem_handle_t hPhysicalMemory, size_t offset, ze_memory_access_attribute_t access) { return L0::Context::fromHandle(hContext)->mapVirtualMem(ptr, size, hPhysicalMemory, offset, access); } ZE_APIEXPORT ze_result_t ZE_APICALL zeVirtualMemUnmap( ze_context_handle_t hContext, const void *ptr, size_t size) { return L0::Context::fromHandle(hContext)->unMapVirtualMem(ptr, size); } ZE_APIEXPORT ze_result_t ZE_APICALL zeVirtualMemSetAccessAttribute( ze_context_handle_t hContext, const void *ptr, size_t size, ze_memory_access_attribute_t access) { return L0::Context::fromHandle(hContext)->setVirtualMemAccessAttribute(ptr, size, access); } ZE_APIEXPORT ze_result_t ZE_APICALL zeVirtualMemGetAccessAttribute( ze_context_handle_t hContext, const void *ptr, size_t size, ze_memory_access_attribute_t *access, size_t *outSize) { return L0::Context::fromHandle(hContext)->getVirtualMemAccessAttribute(ptr, size, access, outSize); } ZE_APIEXPORT ze_result_t ZE_APICALL zeContextSystemBarrier( ze_context_handle_t hContext, ze_device_handle_t hDevice) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ZE_APIEXPORT ze_result_t ZE_APICALL zeContextMakeMemoryResident( ze_context_handle_t hContext, ze_device_handle_t hDevice, void *ptr, size_t size) { return L0::Context::fromHandle(hContext)->makeMemoryResident(hDevice, ptr, size); } ZE_APIEXPORT ze_result_t ZE_APICALL zeContextEvictMemory( ze_context_handle_t hContext, ze_device_handle_t hDevice, void *ptr, size_t size) { return L0::Context::fromHandle(hContext)->evictMemory(hDevice, ptr, size); } ZE_APIEXPORT ze_result_t ZE_APICALL zeContextMakeImageResident( ze_context_handle_t hContext, ze_device_handle_t hDevice, ze_image_handle_t hImage) { return L0::Context::fromHandle(hContext)->makeImageResident(hDevice, hImage); } ZE_APIEXPORT ze_result_t ZE_APICALL zeContextEvictImage( ze_context_handle_t hContext, ze_device_handle_t hDevice, ze_image_handle_t hImage) { return L0::Context::fromHandle(hContext)->evictImage(hDevice, hImage); } compute-runtime-22.14.22890/level_zero/api/core/ze_copy.cpp000066400000000000000000000107701422164147700234020ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/cmdlist/cmdlist.h" #include ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendMemoryCopy( ze_command_list_handle_t hCommandList, void *dstptr, const void *srcptr, size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return L0::CommandList::fromHandle(hCommandList)->appendMemoryCopy(dstptr, srcptr, size, hSignalEvent, numWaitEvents, phWaitEvents); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendMemoryFill( ze_command_list_handle_t hCommandList, void *ptr, const void *pattern, size_t patternSize, size_t size, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return L0::CommandList::fromHandle(hCommandList)->appendMemoryFill(ptr, pattern, patternSize, size, hEvent, numWaitEvents, phWaitEvents); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendMemoryCopyRegion( ze_command_list_handle_t hCommandList, void *dstptr, const ze_copy_region_t *dstRegion, uint32_t dstPitch, uint32_t dstSlicePitch, const void *srcptr, const ze_copy_region_t *srcRegion, uint32_t srcPitch, uint32_t srcSlicePitch, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return L0::CommandList::fromHandle(hCommandList)->appendMemoryCopyRegion(dstptr, dstRegion, dstPitch, dstSlicePitch, srcptr, srcRegion, srcPitch, srcSlicePitch, hEvent, numWaitEvents, phWaitEvents); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendImageCopy( ze_command_list_handle_t hCommandList, ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return L0::CommandList::fromHandle(hCommandList)->appendImageCopy(hDstImage, hSrcImage, hSignalEvent, numWaitEvents, phWaitEvents); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendImageCopyRegion( ze_command_list_handle_t hCommandList, ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage, const ze_image_region_t *pDstRegion, const ze_image_region_t *pSrcRegion, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return L0::CommandList::fromHandle(hCommandList)->appendImageCopyRegion(hDstImage, hSrcImage, pDstRegion, pSrcRegion, hSignalEvent, numWaitEvents, phWaitEvents); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendImageCopyToMemory( ze_command_list_handle_t hCommandList, void *dstptr, ze_image_handle_t hSrcImage, const ze_image_region_t *pSrcRegion, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return L0::CommandList::fromHandle(hCommandList)->appendImageCopyToMemory(dstptr, hSrcImage, pSrcRegion, hSignalEvent, numWaitEvents, phWaitEvents); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendImageCopyFromMemory( ze_command_list_handle_t hCommandList, ze_image_handle_t hDstImage, const void *srcptr, const ze_image_region_t *pDstRegion, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return L0::CommandList::fromHandle(hCommandList)->appendImageCopyFromMemory(hDstImage, srcptr, pDstRegion, hSignalEvent, numWaitEvents, phWaitEvents); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendMemoryPrefetch( ze_command_list_handle_t hCommandList, const void *ptr, size_t size) { return L0::CommandList::fromHandle(hCommandList)->appendMemoryPrefetch(ptr, size); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendMemAdvise( ze_command_list_handle_t hCommandList, ze_device_handle_t hDevice, const void *ptr, size_t size, ze_memory_advice_t advice) { return L0::CommandList::fromHandle(hCommandList)->appendMemAdvise(hDevice, ptr, size, advice); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendMemoryCopyFromContext( ze_command_list_handle_t hCommandList, void *dstptr, ze_context_handle_t hContextSrc, const void *srcptr, size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return L0::CommandList::fromHandle(hCommandList)->appendMemoryCopyFromContext(dstptr, hContextSrc, srcptr, size, hSignalEvent, numWaitEvents, phWaitEvents); } compute-runtime-22.14.22890/level_zero/api/core/ze_core_loader.cpp000066400000000000000000000730051422164147700247060ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/experimental/source/tracing/tracing_imp.h" #include "level_zero/source/inc/ze_intel_gpu.h" #include #include #include #include #include "ze_ddi_tables.h" ze_gpu_driver_dditable_t driver_ddiTable; ZE_APIEXPORT ze_result_t ZE_APICALL zeGetDriverProcAddrTable( ze_api_version_t version, ze_driver_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; driver_ddiTable.enableTracing = getenv_tobool("ZET_ENABLE_API_TRACING_EXP"); ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnGet = zeDriverGet; pDdiTable->pfnGetApiVersion = zeDriverGetApiVersion; pDdiTable->pfnGetProperties = zeDriverGetProperties; pDdiTable->pfnGetIpcProperties = zeDriverGetIpcProperties; pDdiTable->pfnGetExtensionProperties = zeDriverGetExtensionProperties; pDdiTable->pfnGetExtensionFunctionAddress = zeDriverGetExtensionFunctionAddress; driver_ddiTable.core_ddiTable.Driver = *pDdiTable; if (driver_ddiTable.enableTracing) { pDdiTable->pfnGet = zeDriverGet_Tracing; pDdiTable->pfnGetApiVersion = zeDriverGetApiVersion_Tracing; pDdiTable->pfnGetProperties = zeDriverGetProperties_Tracing; pDdiTable->pfnGetIpcProperties = zeDriverGetIpcProperties_Tracing; pDdiTable->pfnGetExtensionProperties = zeDriverGetExtensionProperties_Tracing; } return result; } ZE_DLLEXPORT ze_result_t ZE_APICALL zeGetMemProcAddrTable( ze_api_version_t version, ze_mem_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; driver_ddiTable.enableTracing = getenv_tobool("ZET_ENABLE_API_TRACING_EXP"); ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnAllocShared = zeMemAllocShared; pDdiTable->pfnAllocDevice = zeMemAllocDevice; pDdiTable->pfnAllocHost = zeMemAllocHost; pDdiTable->pfnFree = zeMemFree; pDdiTable->pfnFreeExt = zeMemFreeExt; pDdiTable->pfnGetAllocProperties = zeMemGetAllocProperties; pDdiTable->pfnGetAddressRange = zeMemGetAddressRange; pDdiTable->pfnGetIpcHandle = zeMemGetIpcHandle; pDdiTable->pfnOpenIpcHandle = zeMemOpenIpcHandle; pDdiTable->pfnCloseIpcHandle = zeMemCloseIpcHandle; driver_ddiTable.core_ddiTable.Mem = *pDdiTable; if (driver_ddiTable.enableTracing) { pDdiTable->pfnAllocShared = zeMemAllocShared_Tracing; pDdiTable->pfnAllocDevice = zeMemAllocDevice_Tracing; pDdiTable->pfnAllocHost = zeMemAllocHost_Tracing; pDdiTable->pfnFree = zeMemFree_Tracing; pDdiTable->pfnGetAllocProperties = zeMemGetAllocProperties_Tracing; pDdiTable->pfnGetAddressRange = zeMemGetAddressRange_Tracing; pDdiTable->pfnGetIpcHandle = zeMemGetIpcHandle_Tracing; pDdiTable->pfnOpenIpcHandle = zeMemOpenIpcHandle_Tracing; pDdiTable->pfnCloseIpcHandle = zeMemCloseIpcHandle_Tracing; } return result; } ZE_DLLEXPORT ze_result_t ZE_APICALL zeGetContextProcAddrTable( ze_api_version_t version, ze_context_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; driver_ddiTable.enableTracing = getenv_tobool("ZET_ENABLE_API_TRACING_EXP"); ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnCreate = zeContextCreate; pDdiTable->pfnCreateEx = zeContextCreateEx; pDdiTable->pfnDestroy = zeContextDestroy; pDdiTable->pfnGetStatus = zeContextGetStatus; pDdiTable->pfnSystemBarrier = zeContextSystemBarrier; pDdiTable->pfnMakeMemoryResident = zeContextMakeMemoryResident; pDdiTable->pfnEvictMemory = zeContextEvictMemory; pDdiTable->pfnMakeImageResident = zeContextMakeImageResident; pDdiTable->pfnEvictImage = zeContextEvictImage; driver_ddiTable.core_ddiTable.Context = *pDdiTable; if (driver_ddiTable.enableTracing) { pDdiTable->pfnCreate = zeContextCreate_Tracing; pDdiTable->pfnDestroy = zeContextDestroy_Tracing; pDdiTable->pfnGetStatus = zeContextGetStatus_Tracing; pDdiTable->pfnSystemBarrier = zeContextSystemBarrier_Tracing; pDdiTable->pfnMakeMemoryResident = zeContextMakeMemoryResident_Tracing; pDdiTable->pfnEvictMemory = zeContextEvictMemory_Tracing; pDdiTable->pfnMakeImageResident = zeContextMakeImageResident_Tracing; pDdiTable->pfnEvictImage = zeContextEvictImage_Tracing; } return result; } ZE_DLLEXPORT ze_result_t ZE_APICALL zeGetPhysicalMemProcAddrTable( ze_api_version_t version, ze_physical_mem_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; driver_ddiTable.enableTracing = getenv_tobool("ZET_ENABLE_API_TRACING_EXP"); ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnCreate = zePhysicalMemCreate; pDdiTable->pfnDestroy = zePhysicalMemDestroy; driver_ddiTable.core_ddiTable.PhysicalMem = *pDdiTable; if (driver_ddiTable.enableTracing) { pDdiTable->pfnCreate = zePhysicalMemCreate_Tracing; pDdiTable->pfnDestroy = zePhysicalMemDestroy_Tracing; } return result; } ZE_DLLEXPORT ze_result_t ZE_APICALL zeGetVirtualMemProcAddrTable( ze_api_version_t version, ze_virtual_mem_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; driver_ddiTable.enableTracing = getenv_tobool("ZET_ENABLE_API_TRACING_EXP"); ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnReserve = zeVirtualMemReserve; pDdiTable->pfnFree = zeVirtualMemFree; pDdiTable->pfnQueryPageSize = zeVirtualMemQueryPageSize; pDdiTable->pfnMap = zeVirtualMemMap; pDdiTable->pfnUnmap = zeVirtualMemUnmap; pDdiTable->pfnSetAccessAttribute = zeVirtualMemSetAccessAttribute; pDdiTable->pfnGetAccessAttribute = zeVirtualMemGetAccessAttribute; driver_ddiTable.core_ddiTable.VirtualMem = *pDdiTable; if (driver_ddiTable.enableTracing) { pDdiTable->pfnReserve = zeVirtualMemReserve_Tracing; pDdiTable->pfnFree = zeVirtualMemFree_Tracing; pDdiTable->pfnQueryPageSize = zeVirtualMemQueryPageSize_Tracing; pDdiTable->pfnMap = zeVirtualMemMap_Tracing; pDdiTable->pfnUnmap = zeVirtualMemUnmap_Tracing; pDdiTable->pfnSetAccessAttribute = zeVirtualMemSetAccessAttribute_Tracing; pDdiTable->pfnGetAccessAttribute = zeVirtualMemGetAccessAttribute_Tracing; } return result; } ZE_APIEXPORT ze_result_t ZE_APICALL zeGetGlobalProcAddrTable( ze_api_version_t version, ze_global_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; driver_ddiTable.enableTracing = getenv_tobool("ZET_ENABLE_API_TRACING_EXP"); ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnInit = zeInit; driver_ddiTable.core_ddiTable.Global = *pDdiTable; if (driver_ddiTable.enableTracing) { pDdiTable->pfnInit = zeInit_Tracing; } return result; } ZE_APIEXPORT ze_result_t ZE_APICALL zeGetDeviceProcAddrTable( ze_api_version_t version, ze_device_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; driver_ddiTable.enableTracing = getenv_tobool("ZET_ENABLE_API_TRACING_EXP"); ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnGet = zeDeviceGet; pDdiTable->pfnGetCommandQueueGroupProperties = zeDeviceGetCommandQueueGroupProperties; pDdiTable->pfnGetSubDevices = zeDeviceGetSubDevices; pDdiTable->pfnGetProperties = zeDeviceGetProperties; pDdiTable->pfnGetComputeProperties = zeDeviceGetComputeProperties; pDdiTable->pfnGetModuleProperties = zeDeviceGetModuleProperties; pDdiTable->pfnGetMemoryProperties = zeDeviceGetMemoryProperties; pDdiTable->pfnGetMemoryAccessProperties = zeDeviceGetMemoryAccessProperties; pDdiTable->pfnGetCacheProperties = zeDeviceGetCacheProperties; pDdiTable->pfnGetImageProperties = zeDeviceGetImageProperties; pDdiTable->pfnGetP2PProperties = zeDeviceGetP2PProperties; pDdiTable->pfnCanAccessPeer = zeDeviceCanAccessPeer; pDdiTable->pfnGetStatus = zeDeviceGetStatus; pDdiTable->pfnGetExternalMemoryProperties = zeDeviceGetExternalMemoryProperties; pDdiTable->pfnGetGlobalTimestamps = zeDeviceGetGlobalTimestamps; pDdiTable->pfnReserveCacheExt = zeDeviceReserveCacheExt; pDdiTable->pfnSetCacheAdviceExt = zeDeviceSetCacheAdviceExt; pDdiTable->pfnPciGetPropertiesExt = zeDevicePciGetPropertiesExt; driver_ddiTable.core_ddiTable.Device = *pDdiTable; if (driver_ddiTable.enableTracing) { pDdiTable->pfnGet = zeDeviceGet_Tracing; pDdiTable->pfnGetCommandQueueGroupProperties = zeDeviceGetCommandQueueGroupProperties_Tracing; pDdiTable->pfnGetSubDevices = zeDeviceGetSubDevices_Tracing; pDdiTable->pfnGetProperties = zeDeviceGetProperties_Tracing; pDdiTable->pfnGetComputeProperties = zeDeviceGetComputeProperties_Tracing; pDdiTable->pfnGetModuleProperties = zeDeviceGetModuleProperties_Tracing; pDdiTable->pfnGetMemoryProperties = zeDeviceGetMemoryProperties_Tracing; pDdiTable->pfnGetMemoryAccessProperties = zeDeviceGetMemoryAccessProperties_Tracing; pDdiTable->pfnGetCacheProperties = zeDeviceGetCacheProperties_Tracing; pDdiTable->pfnGetImageProperties = zeDeviceGetImageProperties_Tracing; pDdiTable->pfnGetP2PProperties = zeDeviceGetP2PProperties_Tracing; pDdiTable->pfnCanAccessPeer = zeDeviceCanAccessPeer_Tracing; pDdiTable->pfnGetStatus = zeDeviceGetStatus_Tracing; pDdiTable->pfnGetExternalMemoryProperties = zeDeviceGetExternalMemoryProperties_Tracing; } return result; } ZE_APIEXPORT ze_result_t ZE_APICALL zeGetCommandQueueProcAddrTable( ze_api_version_t version, ze_command_queue_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; driver_ddiTable.enableTracing = getenv_tobool("ZET_ENABLE_API_TRACING_EXP"); ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnCreate = zeCommandQueueCreate; pDdiTable->pfnDestroy = zeCommandQueueDestroy; pDdiTable->pfnExecuteCommandLists = zeCommandQueueExecuteCommandLists; pDdiTable->pfnSynchronize = zeCommandQueueSynchronize; driver_ddiTable.core_ddiTable.CommandQueue = *pDdiTable; if (driver_ddiTable.enableTracing) { pDdiTable->pfnCreate = zeCommandQueueCreate_Tracing; pDdiTable->pfnDestroy = zeCommandQueueDestroy_Tracing; pDdiTable->pfnExecuteCommandLists = zeCommandQueueExecuteCommandLists_Tracing; pDdiTable->pfnSynchronize = zeCommandQueueSynchronize_Tracing; } return result; } ZE_APIEXPORT ze_result_t ZE_APICALL zeGetCommandListProcAddrTable( ze_api_version_t version, ze_command_list_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; driver_ddiTable.enableTracing = getenv_tobool("ZET_ENABLE_API_TRACING_EXP"); ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnAppendBarrier = zeCommandListAppendBarrier; pDdiTable->pfnAppendMemoryRangesBarrier = zeCommandListAppendMemoryRangesBarrier; pDdiTable->pfnCreate = zeCommandListCreate; pDdiTable->pfnCreateImmediate = zeCommandListCreateImmediate; pDdiTable->pfnDestroy = zeCommandListDestroy; pDdiTable->pfnClose = zeCommandListClose; pDdiTable->pfnReset = zeCommandListReset; pDdiTable->pfnAppendMemoryCopy = zeCommandListAppendMemoryCopy; pDdiTable->pfnAppendMemoryCopyRegion = zeCommandListAppendMemoryCopyRegion; pDdiTable->pfnAppendMemoryFill = zeCommandListAppendMemoryFill; pDdiTable->pfnAppendImageCopy = zeCommandListAppendImageCopy; pDdiTable->pfnAppendImageCopyRegion = zeCommandListAppendImageCopyRegion; pDdiTable->pfnAppendImageCopyToMemory = zeCommandListAppendImageCopyToMemory; pDdiTable->pfnAppendImageCopyFromMemory = zeCommandListAppendImageCopyFromMemory; pDdiTable->pfnAppendMemoryPrefetch = zeCommandListAppendMemoryPrefetch; pDdiTable->pfnAppendMemAdvise = zeCommandListAppendMemAdvise; pDdiTable->pfnAppendSignalEvent = zeCommandListAppendSignalEvent; pDdiTable->pfnAppendWaitOnEvents = zeCommandListAppendWaitOnEvents; pDdiTable->pfnAppendEventReset = zeCommandListAppendEventReset; pDdiTable->pfnAppendLaunchKernel = zeCommandListAppendLaunchKernel; pDdiTable->pfnAppendLaunchCooperativeKernel = zeCommandListAppendLaunchCooperativeKernel; pDdiTable->pfnAppendLaunchKernelIndirect = zeCommandListAppendLaunchKernelIndirect; pDdiTable->pfnAppendLaunchMultipleKernelsIndirect = zeCommandListAppendLaunchMultipleKernelsIndirect; pDdiTable->pfnAppendWriteGlobalTimestamp = zeCommandListAppendWriteGlobalTimestamp; pDdiTable->pfnAppendMemoryCopyFromContext = zeCommandListAppendMemoryCopyFromContext; pDdiTable->pfnAppendQueryKernelTimestamps = zeCommandListAppendQueryKernelTimestamps; driver_ddiTable.core_ddiTable.CommandList = *pDdiTable; if (driver_ddiTable.enableTracing) { pDdiTable->pfnAppendBarrier = zeCommandListAppendBarrier_Tracing; pDdiTable->pfnAppendMemoryRangesBarrier = zeCommandListAppendMemoryRangesBarrier_Tracing; pDdiTable->pfnCreate = zeCommandListCreate_Tracing; pDdiTable->pfnCreateImmediate = zeCommandListCreateImmediate_Tracing; pDdiTable->pfnDestroy = zeCommandListDestroy_Tracing; pDdiTable->pfnClose = zeCommandListClose_Tracing; pDdiTable->pfnReset = zeCommandListReset_Tracing; pDdiTable->pfnAppendMemoryCopy = zeCommandListAppendMemoryCopy_Tracing; pDdiTable->pfnAppendMemoryCopyRegion = zeCommandListAppendMemoryCopyRegion_Tracing; pDdiTable->pfnAppendMemoryFill = zeCommandListAppendMemoryFill_Tracing; pDdiTable->pfnAppendImageCopy = zeCommandListAppendImageCopy_Tracing; pDdiTable->pfnAppendImageCopyRegion = zeCommandListAppendImageCopyRegion_Tracing; pDdiTable->pfnAppendImageCopyToMemory = zeCommandListAppendImageCopyToMemory_Tracing; pDdiTable->pfnAppendImageCopyFromMemory = zeCommandListAppendImageCopyFromMemory_Tracing; pDdiTable->pfnAppendMemoryPrefetch = zeCommandListAppendMemoryPrefetch_Tracing; pDdiTable->pfnAppendMemAdvise = zeCommandListAppendMemAdvise_Tracing; pDdiTable->pfnAppendSignalEvent = zeCommandListAppendSignalEvent_Tracing; pDdiTable->pfnAppendWaitOnEvents = zeCommandListAppendWaitOnEvents_Tracing; pDdiTable->pfnAppendEventReset = zeCommandListAppendEventReset_Tracing; pDdiTable->pfnAppendLaunchKernel = zeCommandListAppendLaunchKernel_Tracing; pDdiTable->pfnAppendLaunchCooperativeKernel = zeCommandListAppendLaunchCooperativeKernel_Tracing; pDdiTable->pfnAppendLaunchKernelIndirect = zeCommandListAppendLaunchKernelIndirect_Tracing; pDdiTable->pfnAppendLaunchMultipleKernelsIndirect = zeCommandListAppendLaunchMultipleKernelsIndirect_Tracing; pDdiTable->pfnAppendWriteGlobalTimestamp = zeCommandListAppendWriteGlobalTimestamp_Tracing; pDdiTable->pfnAppendMemoryCopyFromContext = zeCommandListAppendMemoryCopyFromContext_Tracing; pDdiTable->pfnAppendQueryKernelTimestamps = zeCommandListAppendQueryKernelTimestamps_Tracing; } return result; } ZE_APIEXPORT ze_result_t ZE_APICALL zeGetFenceProcAddrTable( ze_api_version_t version, ze_fence_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; driver_ddiTable.enableTracing = getenv_tobool("ZET_ENABLE_API_TRACING_EXP"); ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnCreate = zeFenceCreate; pDdiTable->pfnDestroy = zeFenceDestroy; pDdiTable->pfnHostSynchronize = zeFenceHostSynchronize; pDdiTable->pfnQueryStatus = zeFenceQueryStatus; pDdiTable->pfnReset = zeFenceReset; driver_ddiTable.core_ddiTable.Fence = *pDdiTable; if (driver_ddiTable.enableTracing) { pDdiTable->pfnCreate = zeFenceCreate_Tracing; pDdiTable->pfnDestroy = zeFenceDestroy_Tracing; pDdiTable->pfnHostSynchronize = zeFenceHostSynchronize_Tracing; pDdiTable->pfnQueryStatus = zeFenceQueryStatus_Tracing; pDdiTable->pfnReset = zeFenceReset_Tracing; } return result; } ZE_APIEXPORT ze_result_t ZE_APICALL zeGetEventPoolProcAddrTable( ze_api_version_t version, ze_event_pool_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; driver_ddiTable.enableTracing = getenv_tobool("ZET_ENABLE_API_TRACING_EXP"); ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnCreate = zeEventPoolCreate; pDdiTable->pfnDestroy = zeEventPoolDestroy; pDdiTable->pfnGetIpcHandle = zeEventPoolGetIpcHandle; pDdiTable->pfnOpenIpcHandle = zeEventPoolOpenIpcHandle; pDdiTable->pfnCloseIpcHandle = zeEventPoolCloseIpcHandle; driver_ddiTable.core_ddiTable.EventPool = *pDdiTable; if (driver_ddiTable.enableTracing) { pDdiTable->pfnCreate = zeEventPoolCreate_Tracing; pDdiTable->pfnDestroy = zeEventPoolDestroy_Tracing; pDdiTable->pfnGetIpcHandle = zeEventPoolGetIpcHandle_Tracing; pDdiTable->pfnOpenIpcHandle = zeEventPoolOpenIpcHandle_Tracing; pDdiTable->pfnCloseIpcHandle = zeEventPoolCloseIpcHandle_Tracing; } return result; } ZE_APIEXPORT ze_result_t ZE_APICALL zeGetEventProcAddrTable( ze_api_version_t version, ze_event_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; driver_ddiTable.enableTracing = getenv_tobool("ZET_ENABLE_API_TRACING_EXP"); ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnCreate = zeEventCreate; pDdiTable->pfnDestroy = zeEventDestroy; pDdiTable->pfnHostSignal = zeEventHostSignal; pDdiTable->pfnHostSynchronize = zeEventHostSynchronize; pDdiTable->pfnQueryStatus = zeEventQueryStatus; pDdiTable->pfnHostReset = zeEventHostReset; pDdiTable->pfnQueryKernelTimestamp = zeEventQueryKernelTimestamp; driver_ddiTable.core_ddiTable.Event = *pDdiTable; if (driver_ddiTable.enableTracing) { pDdiTable->pfnCreate = zeEventCreate_Tracing; pDdiTable->pfnDestroy = zeEventDestroy_Tracing; pDdiTable->pfnHostSignal = zeEventHostSignal_Tracing; pDdiTable->pfnHostSynchronize = zeEventHostSynchronize_Tracing; pDdiTable->pfnQueryStatus = zeEventQueryStatus_Tracing; pDdiTable->pfnHostReset = zeEventHostReset_Tracing; pDdiTable->pfnQueryKernelTimestamp = zeEventQueryKernelTimestamp_Tracing; } return result; } ZE_APIEXPORT ze_result_t ZE_APICALL zeGetEventExpProcAddrTable( ze_api_version_t version, ze_event_exp_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnQueryTimestampsExp = zeEventQueryTimestampsExp; return result; } ZE_APIEXPORT ze_result_t ZE_APICALL zeGetImageProcAddrTable( ze_api_version_t version, ze_image_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; driver_ddiTable.enableTracing = getenv_tobool("ZET_ENABLE_API_TRACING_EXP"); ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnGetProperties = zeImageGetProperties; pDdiTable->pfnCreate = zeImageCreate; pDdiTable->pfnDestroy = zeImageDestroy; pDdiTable->pfnGetAllocPropertiesExt = zeImageGetAllocPropertiesExt; driver_ddiTable.core_ddiTable.Image = *pDdiTable; if (driver_ddiTable.enableTracing) { pDdiTable->pfnGetProperties = zeImageGetProperties_Tracing; pDdiTable->pfnCreate = zeImageCreate_Tracing; pDdiTable->pfnDestroy = zeImageDestroy_Tracing; pDdiTable->pfnGetAllocPropertiesExt = zeImageGetAllocPropertiesExt; } return result; } ZE_APIEXPORT ze_result_t ZE_APICALL zeGetModuleProcAddrTable( ze_api_version_t version, ze_module_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; driver_ddiTable.enableTracing = getenv_tobool("ZET_ENABLE_API_TRACING_EXP"); ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnCreate = zeModuleCreate; pDdiTable->pfnDestroy = zeModuleDestroy; pDdiTable->pfnDynamicLink = zeModuleDynamicLink; pDdiTable->pfnGetNativeBinary = zeModuleGetNativeBinary; pDdiTable->pfnGetGlobalPointer = zeModuleGetGlobalPointer; pDdiTable->pfnGetKernelNames = zeModuleGetKernelNames; pDdiTable->pfnGetFunctionPointer = zeModuleGetFunctionPointer; pDdiTable->pfnGetProperties = zeModuleGetProperties; driver_ddiTable.core_ddiTable.Module = *pDdiTable; if (driver_ddiTable.enableTracing) { pDdiTable->pfnCreate = zeModuleCreate_Tracing; pDdiTable->pfnDestroy = zeModuleDestroy_Tracing; pDdiTable->pfnGetNativeBinary = zeModuleGetNativeBinary_Tracing; pDdiTable->pfnDynamicLink = zeModuleDynamicLink_Tracing; pDdiTable->pfnGetGlobalPointer = zeModuleGetGlobalPointer_Tracing; pDdiTable->pfnGetFunctionPointer = zeModuleGetFunctionPointer_Tracing; pDdiTable->pfnGetKernelNames = zeModuleGetKernelNames_Tracing; pDdiTable->pfnGetProperties = zeModuleGetProperties_Tracing; } return result; } ZE_APIEXPORT ze_result_t ZE_APICALL zeGetModuleBuildLogProcAddrTable( ze_api_version_t version, ze_module_build_log_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; driver_ddiTable.enableTracing = getenv_tobool("ZET_ENABLE_API_TRACING_EXP"); ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnDestroy = zeModuleBuildLogDestroy; pDdiTable->pfnGetString = zeModuleBuildLogGetString; driver_ddiTable.core_ddiTable.ModuleBuildLog = *pDdiTable; if (driver_ddiTable.enableTracing) { pDdiTable->pfnDestroy = zeModuleBuildLogDestroy_Tracing; pDdiTable->pfnGetString = zeModuleBuildLogGetString_Tracing; } return result; } ZE_APIEXPORT ze_result_t ZE_APICALL zeGetKernelProcAddrTable( ze_api_version_t version, ze_kernel_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; driver_ddiTable.enableTracing = getenv_tobool("ZET_ENABLE_API_TRACING_EXP"); ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnCreate = zeKernelCreate; pDdiTable->pfnDestroy = zeKernelDestroy; pDdiTable->pfnSetGroupSize = zeKernelSetGroupSize; pDdiTable->pfnSuggestGroupSize = zeKernelSuggestGroupSize; pDdiTable->pfnSuggestMaxCooperativeGroupCount = zeKernelSuggestMaxCooperativeGroupCount; pDdiTable->pfnSetArgumentValue = zeKernelSetArgumentValue; pDdiTable->pfnSetIndirectAccess = zeKernelSetIndirectAccess; pDdiTable->pfnGetIndirectAccess = zeKernelGetIndirectAccess; pDdiTable->pfnGetSourceAttributes = zeKernelGetSourceAttributes; pDdiTable->pfnGetProperties = zeKernelGetProperties; pDdiTable->pfnSetCacheConfig = zeKernelSetCacheConfig; pDdiTable->pfnGetName = zeKernelGetName; driver_ddiTable.core_ddiTable.Kernel = *pDdiTable; if (driver_ddiTable.enableTracing) { pDdiTable->pfnCreate = zeKernelCreate_Tracing; pDdiTable->pfnDestroy = zeKernelDestroy_Tracing; pDdiTable->pfnSetGroupSize = zeKernelSetGroupSize_Tracing; pDdiTable->pfnSuggestGroupSize = zeKernelSuggestGroupSize_Tracing; pDdiTable->pfnSuggestMaxCooperativeGroupCount = zeKernelSuggestMaxCooperativeGroupCount_Tracing; pDdiTable->pfnSetArgumentValue = zeKernelSetArgumentValue_Tracing; pDdiTable->pfnSetIndirectAccess = zeKernelSetIndirectAccess_Tracing; pDdiTable->pfnGetIndirectAccess = zeKernelGetIndirectAccess_Tracing; pDdiTable->pfnGetSourceAttributes = zeKernelGetSourceAttributes_Tracing; pDdiTable->pfnGetProperties = zeKernelGetProperties_Tracing; pDdiTable->pfnSetCacheConfig = zeKernelSetCacheConfig_Tracing; pDdiTable->pfnGetName = zeKernelGetName_Tracing; } return result; } ZE_APIEXPORT ze_result_t ZE_APICALL zeGetSamplerProcAddrTable( ze_api_version_t version, ze_sampler_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; driver_ddiTable.enableTracing = getenv_tobool("ZET_ENABLE_API_TRACING_EXP"); ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnCreate = zeSamplerCreate; pDdiTable->pfnDestroy = zeSamplerDestroy; driver_ddiTable.core_ddiTable.Sampler = *pDdiTable; if (driver_ddiTable.enableTracing) { pDdiTable->pfnCreate = zeSamplerCreate_Tracing; pDdiTable->pfnDestroy = zeSamplerDestroy_Tracing; } return result; } ZE_APIEXPORT ze_result_t ZE_APICALL zeGetKernelExpProcAddrTable( ze_api_version_t version, ze_kernel_exp_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnSetGlobalOffsetExp = zeKernelSetGlobalOffsetExp; pDdiTable->pfnSchedulingHintExp = zeKernelSchedulingHintExp; driver_ddiTable.core_ddiTable.KernelExp = *pDdiTable; return result; } ZE_APIEXPORT ze_result_t ZE_APICALL zeGetImageExpProcAddrTable( ze_api_version_t version, ze_image_exp_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnGetMemoryPropertiesExp = zeImageGetMemoryPropertiesExp; pDdiTable->pfnViewCreateExp = zeImageViewCreateExp; driver_ddiTable.core_ddiTable.ImageExp = *pDdiTable; return result; } compute-runtime-22.14.22890/level_zero/api/core/ze_device.cpp000066400000000000000000000112241422164147700236620ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/driver/driver.h" #include "level_zero/core/source/driver/driver_handle.h" #include #include ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGet( ze_driver_handle_t hDriver, uint32_t *pCount, ze_device_handle_t *phDevices) { return L0::DriverHandle::fromHandle(hDriver)->getDevice(pCount, phDevices); } ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetSubDevices( ze_device_handle_t hDevice, uint32_t *pCount, ze_device_handle_t *phSubdevices) { return L0::Device::fromHandle(hDevice)->getSubDevices(pCount, phSubdevices); } ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetProperties( ze_device_handle_t hDevice, ze_device_properties_t *pDeviceProperties) { return L0::Device::fromHandle(hDevice)->getProperties(pDeviceProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetComputeProperties( ze_device_handle_t hDevice, ze_device_compute_properties_t *pComputeProperties) { return L0::Device::fromHandle(hDevice)->getComputeProperties(pComputeProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetModuleProperties( ze_device_handle_t hDevice, ze_device_module_properties_t *pKernelProperties) { return L0::Device::fromHandle(hDevice)->getKernelProperties(pKernelProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetMemoryProperties( ze_device_handle_t hDevice, uint32_t *pCount, ze_device_memory_properties_t *pMemProperties) { return L0::Device::fromHandle(hDevice)->getMemoryProperties(pCount, pMemProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetMemoryAccessProperties( ze_device_handle_t hDevice, ze_device_memory_access_properties_t *pMemAccessProperties) { return L0::Device::fromHandle(hDevice)->getMemoryAccessProperties(pMemAccessProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetCacheProperties( ze_device_handle_t hDevice, uint32_t *pCount, ze_device_cache_properties_t *pCacheProperties) { return L0::Device::fromHandle(hDevice)->getCacheProperties(pCount, pCacheProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetImageProperties( ze_device_handle_t hDevice, ze_device_image_properties_t *pImageProperties) { return L0::Device::fromHandle(hDevice)->getDeviceImageProperties(pImageProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetP2PProperties( ze_device_handle_t hDevice, ze_device_handle_t hPeerDevice, ze_device_p2p_properties_t *pP2PProperties) { return L0::Device::fromHandle(hDevice)->getP2PProperties(hPeerDevice, pP2PProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceCanAccessPeer( ze_device_handle_t hDevice, ze_device_handle_t hPeerDevice, ze_bool_t *value) { return L0::Device::fromHandle(hDevice)->canAccessPeer(hPeerDevice, value); } ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetCommandQueueGroupProperties( ze_device_handle_t hDevice, uint32_t *pCount, ze_command_queue_group_properties_t *pCommandQueueGroupProperties) { return L0::Device::fromHandle(hDevice)->getCommandQueueGroupProperties(pCount, pCommandQueueGroupProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetExternalMemoryProperties( ze_device_handle_t hDevice, ze_device_external_memory_properties_t *pExternalMemoryProperties) { return L0::Device::fromHandle(hDevice)->getExternalMemoryProperties(pExternalMemoryProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetStatus( ze_device_handle_t hDevice) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetGlobalTimestamps( ze_device_handle_t hDevice, uint64_t *hostTimestamp, uint64_t *deviceTimestamp) { return L0::Device::fromHandle(hDevice)->getGlobalTimestamps(hostTimestamp, deviceTimestamp); } ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceReserveCacheExt( ze_device_handle_t hDevice, size_t cacheLevel, size_t cacheReservationSize) { return L0::Device::fromHandle(hDevice)->reserveCache(cacheLevel, cacheReservationSize); } ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceSetCacheAdviceExt( ze_device_handle_t hDevice, void *ptr, size_t regionSize, ze_cache_ext_region_t cacheRegion) { return L0::Device::fromHandle(hDevice)->setCacheAdvice(ptr, regionSize, cacheRegion); } ZE_APIEXPORT ze_result_t ZE_APICALL zeDevicePciGetPropertiesExt( ze_device_handle_t hDevice, ze_pci_ext_properties_t *pPciProperties) { return L0::Device::fromHandle(hDevice)->getPciProperties(pPciProperties); } compute-runtime-22.14.22890/level_zero/api/core/ze_driver.cpp000066400000000000000000000032421422164147700237170ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/driver/driver.h" #include "level_zero/core/source/driver/driver_handle.h" #include ZE_APIEXPORT ze_result_t ZE_APICALL zeInit( ze_init_flags_t flags) { return L0::init(flags); } ZE_APIEXPORT ze_result_t ZE_APICALL zeDriverGet( uint32_t *pCount, ze_driver_handle_t *phDrivers) { return L0::driverHandleGet(pCount, phDrivers); } ZE_APIEXPORT ze_result_t ZE_APICALL zeDriverGetProperties( ze_driver_handle_t hDriver, ze_driver_properties_t *pProperties) { return L0::DriverHandle::fromHandle(hDriver)->getProperties(pProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zeDriverGetApiVersion( ze_driver_handle_t hDriver, ze_api_version_t *version) { return L0::DriverHandle::fromHandle(hDriver)->getApiVersion(version); } ZE_APIEXPORT ze_result_t ZE_APICALL zeDriverGetIpcProperties( ze_driver_handle_t hDriver, ze_driver_ipc_properties_t *pIPCProperties) { return L0::DriverHandle::fromHandle(hDriver)->getIPCProperties(pIPCProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zeDriverGetExtensionProperties( ze_driver_handle_t hDriver, uint32_t *pCount, ze_driver_extension_properties_t *pExtensionProperties) { return L0::DriverHandle::fromHandle(hDriver)->getExtensionProperties(pCount, pExtensionProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zeDriverGetExtensionFunctionAddress( ze_driver_handle_t hDriver, const char *name, void **ppFunctionAddress) { return L0::DriverHandle::fromHandle(hDriver)->getExtensionFunctionAddress(name, ppFunctionAddress); }compute-runtime-22.14.22890/level_zero/api/core/ze_event.cpp000066400000000000000000000063101422164147700235440ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/event/event.h" #include ZE_APIEXPORT ze_result_t ZE_APICALL zeEventPoolCreate( ze_context_handle_t hContext, const ze_event_pool_desc_t *desc, uint32_t numDevices, ze_device_handle_t *phDevices, ze_event_pool_handle_t *phEventPool) { return L0::Context::fromHandle(hContext)->createEventPool(desc, numDevices, phDevices, phEventPool); } ZE_APIEXPORT ze_result_t ZE_APICALL zeEventPoolDestroy( ze_event_pool_handle_t hEventPool) { return L0::EventPool::fromHandle(hEventPool)->destroy(); } ZE_APIEXPORT ze_result_t ZE_APICALL zeEventCreate( ze_event_pool_handle_t hEventPool, const ze_event_desc_t *desc, ze_event_handle_t *phEvent) { return L0::EventPool::fromHandle(hEventPool)->createEvent(desc, phEvent); } ZE_APIEXPORT ze_result_t ZE_APICALL zeEventDestroy( ze_event_handle_t hEvent) { return L0::Event::fromHandle(hEvent)->destroy(); } ZE_APIEXPORT ze_result_t ZE_APICALL zeEventPoolGetIpcHandle( ze_event_pool_handle_t hEventPool, ze_ipc_event_pool_handle_t *phIpc) { return L0::EventPool::fromHandle(hEventPool)->getIpcHandle(phIpc); } ZE_APIEXPORT ze_result_t ZE_APICALL zeEventPoolOpenIpcHandle( ze_context_handle_t hContext, ze_ipc_event_pool_handle_t hIpc, ze_event_pool_handle_t *phEventPool) { return L0::Context::fromHandle(hContext)->openEventPoolIpcHandle(hIpc, phEventPool); } ZE_APIEXPORT ze_result_t ZE_APICALL zeEventPoolCloseIpcHandle( ze_event_pool_handle_t hEventPool) { return L0::EventPool::fromHandle(hEventPool)->closeIpcHandle(); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendSignalEvent( ze_command_list_handle_t hCommandList, ze_event_handle_t hEvent) { return L0::CommandList::fromHandle(hCommandList)->appendSignalEvent(hEvent); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendWaitOnEvents( ze_command_list_handle_t hCommandList, uint32_t numEvents, ze_event_handle_t *phEvents) { return L0::CommandList::fromHandle(hCommandList)->appendWaitOnEvents(numEvents, phEvents); } ZE_APIEXPORT ze_result_t ZE_APICALL zeEventHostSignal( ze_event_handle_t hEvent) { return L0::Event::fromHandle(hEvent)->hostSignal(); } ZE_APIEXPORT ze_result_t ZE_APICALL zeEventHostSynchronize( ze_event_handle_t hEvent, uint64_t timeout) { return L0::Event::fromHandle(hEvent)->hostSynchronize(timeout); } ZE_APIEXPORT ze_result_t ZE_APICALL zeEventQueryStatus( ze_event_handle_t hEvent) { return L0::Event::fromHandle(hEvent)->queryStatus(); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendEventReset( ze_command_list_handle_t hCommandList, ze_event_handle_t hEvent) { return L0::CommandList::fromHandle(hCommandList)->appendEventReset(hEvent); } ZE_APIEXPORT ze_result_t ZE_APICALL zeEventHostReset( ze_event_handle_t hEvent) { return L0::Event::fromHandle(hEvent)->reset(); } ZE_APIEXPORT ze_result_t ZE_APICALL zeEventQueryKernelTimestamp( ze_event_handle_t hEvent, ze_kernel_timestamp_result_t *timestampType) { return L0::Event::fromHandle(hEvent)->queryKernelTimestamp(timestampType); }compute-runtime-22.14.22890/level_zero/api/core/ze_fence.cpp000066400000000000000000000020121422164147700234760ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/fence/fence.h" #include ZE_APIEXPORT ze_result_t ZE_APICALL zeFenceCreate( ze_command_queue_handle_t hCommandQueue, const ze_fence_desc_t *desc, ze_fence_handle_t *phFence) { return L0::CommandQueue::fromHandle(hCommandQueue)->createFence(desc, phFence); } ZE_APIEXPORT ze_result_t ZE_APICALL zeFenceDestroy( ze_fence_handle_t hFence) { return L0::Fence::fromHandle(hFence)->destroy(); } ZE_APIEXPORT ze_result_t ZE_APICALL zeFenceHostSynchronize( ze_fence_handle_t hFence, uint64_t timeout) { return L0::Fence::fromHandle(hFence)->hostSynchronize(timeout); } ZE_APIEXPORT ze_result_t ZE_APICALL zeFenceQueryStatus( ze_fence_handle_t hFence) { return L0::Fence::fromHandle(hFence)->queryStatus(); } ZE_APIEXPORT ze_result_t ZE_APICALL zeFenceReset( ze_fence_handle_t hFence) { return L0::Fence::fromHandle(hFence)->reset(false); } compute-runtime-22.14.22890/level_zero/api/core/ze_image.cpp000066400000000000000000000015111422164147700235030ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/image/image.h" #include ZE_APIEXPORT ze_result_t ZE_APICALL zeImageGetProperties( ze_device_handle_t hDevice, const ze_image_desc_t *desc, ze_image_properties_t *pImageProperties) { return L0::Device::fromHandle(hDevice)->imageGetProperties(desc, pImageProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zeImageCreate( ze_context_handle_t hContext, ze_device_handle_t hDevice, const ze_image_desc_t *desc, ze_image_handle_t *phImage) { return L0::Context::fromHandle(hContext)->createImage(hDevice, desc, phImage); } ZE_APIEXPORT ze_result_t ZE_APICALL zeImageDestroy( ze_image_handle_t hImage) { return L0::Image::fromHandle(hImage)->destroy(); } compute-runtime-22.14.22890/level_zero/api/core/ze_memory.cpp000066400000000000000000000054661422164147700237460ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/driver/driver_handle.h" #include ZE_APIEXPORT ze_result_t ZE_APICALL zeMemAllocShared( ze_context_handle_t hContext, const ze_device_mem_alloc_desc_t *deviceDesc, const ze_host_mem_alloc_desc_t *hostDesc, size_t size, size_t alignment, ze_device_handle_t hDevice, void **pptr) { return L0::Context::fromHandle(hContext)->allocSharedMem(hDevice, deviceDesc, hostDesc, size, alignment, pptr); } ZE_APIEXPORT ze_result_t ZE_APICALL zeMemAllocDevice( ze_context_handle_t hContext, const ze_device_mem_alloc_desc_t *deviceDesc, size_t size, size_t alignment, ze_device_handle_t hDevice, void **pptr) { return L0::Context::fromHandle(hContext)->allocDeviceMem(hDevice, deviceDesc, size, alignment, pptr); } ZE_APIEXPORT ze_result_t ZE_APICALL zeMemAllocHost( ze_context_handle_t hContext, const ze_host_mem_alloc_desc_t *hostDesc, size_t size, size_t alignment, void **pptr) { return L0::Context::fromHandle(hContext)->allocHostMem(hostDesc, size, alignment, pptr); } ZE_APIEXPORT ze_result_t ZE_APICALL zeMemFree( ze_context_handle_t hContext, void *ptr) { return L0::Context::fromHandle(hContext)->freeMem(ptr); } ZE_APIEXPORT ze_result_t ZE_APICALL zeMemFreeExt( ze_context_handle_t hContext, const ze_memory_free_ext_desc_t *pMemFreeDesc, void *ptr) { return L0::Context::fromHandle(hContext)->freeMemExt(pMemFreeDesc, ptr); } ZE_APIEXPORT ze_result_t ZE_APICALL zeMemGetAllocProperties( ze_context_handle_t hContext, const void *ptr, ze_memory_allocation_properties_t *pMemAllocProperties, ze_device_handle_t *phDevice) { return L0::Context::fromHandle(hContext)->getMemAllocProperties(ptr, pMemAllocProperties, phDevice); } ZE_APIEXPORT ze_result_t ZE_APICALL zeMemGetAddressRange( ze_context_handle_t hContext, const void *ptr, void **pBase, size_t *pSize) { return L0::Context::fromHandle(hContext)->getMemAddressRange(ptr, pBase, pSize); } ZE_APIEXPORT ze_result_t ZE_APICALL zeMemGetIpcHandle( ze_context_handle_t hContext, const void *ptr, ze_ipc_mem_handle_t *pIpcHandle) { return L0::Context::fromHandle(hContext)->getIpcMemHandle(ptr, pIpcHandle); } ZE_APIEXPORT ze_result_t ZE_APICALL zeMemOpenIpcHandle( ze_context_handle_t hContext, ze_device_handle_t hDevice, ze_ipc_mem_handle_t handle, ze_ipc_memory_flags_t flags, void **pptr) { return L0::Context::fromHandle(hContext)->openIpcMemHandle(hDevice, handle, flags, pptr); } ZE_APIEXPORT ze_result_t ZE_APICALL zeMemCloseIpcHandle( ze_context_handle_t hContext, const void *ptr) { return L0::Context::fromHandle(hContext)->closeIpcMemHandle(ptr); } compute-runtime-22.14.22890/level_zero/api/core/ze_module.cpp000066400000000000000000000166471422164147700237260ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/cmdlist/cmdlist.h" #include "level_zero/core/source/module/module.h" #include ZE_APIEXPORT ze_result_t ZE_APICALL zeModuleCreate( ze_context_handle_t hContext, ze_device_handle_t hDevice, const ze_module_desc_t *desc, ze_module_handle_t *phModule, ze_module_build_log_handle_t *phBuildLog) { return L0::Context::fromHandle(hContext)->createModule(hDevice, desc, phModule, phBuildLog); } ZE_APIEXPORT ze_result_t ZE_APICALL zeModuleDestroy( ze_module_handle_t hModule) { return L0::Module::fromHandle(hModule)->destroy(); } ZE_APIEXPORT ze_result_t ZE_APICALL zeModuleBuildLogDestroy( ze_module_build_log_handle_t hModuleBuildLog) { return L0::ModuleBuildLog::fromHandle(hModuleBuildLog)->destroy(); } ZE_APIEXPORT ze_result_t ZE_APICALL zeModuleBuildLogGetString( ze_module_build_log_handle_t hModuleBuildLog, size_t *pSize, char *pBuildLog) { return L0::ModuleBuildLog::fromHandle(hModuleBuildLog)->getString(pSize, pBuildLog); } ZE_APIEXPORT ze_result_t ZE_APICALL zeModuleGetNativeBinary( ze_module_handle_t hModule, size_t *pSize, uint8_t *pModuleNativeBinary) { return L0::Module::fromHandle(hModule)->getNativeBinary(pSize, pModuleNativeBinary); } ZE_APIEXPORT ze_result_t ZE_APICALL zeModuleGetGlobalPointer( ze_module_handle_t hModule, const char *pGlobalName, size_t *pSize, void **pptr) { return L0::Module::fromHandle(hModule)->getGlobalPointer(pGlobalName, pSize, pptr); } ZE_APIEXPORT ze_result_t ZE_APICALL zeModuleGetKernelNames( ze_module_handle_t hModule, uint32_t *pCount, const char **pNames) { return L0::Module::fromHandle(hModule)->getKernelNames(pCount, pNames); } ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelCreate( ze_module_handle_t hModule, const ze_kernel_desc_t *desc, ze_kernel_handle_t *phFunction) { return L0::Module::fromHandle(hModule)->createKernel(desc, phFunction); } ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelDestroy( ze_kernel_handle_t hKernel) { return L0::Kernel::fromHandle(hKernel)->destroy(); } ZE_APIEXPORT ze_result_t ZE_APICALL zeModuleGetFunctionPointer( ze_module_handle_t hModule, const char *pKernelName, void **pfnFunction) { return L0::Module::fromHandle(hModule)->getFunctionPointer(pKernelName, pfnFunction); } ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelSetGroupSize( ze_kernel_handle_t hKernel, uint32_t groupSizeX, uint32_t groupSizeY, uint32_t groupSizeZ) { return L0::Kernel::fromHandle(hKernel)->setGroupSize(groupSizeX, groupSizeY, groupSizeZ); } ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelSuggestGroupSize( ze_kernel_handle_t hKernel, uint32_t globalSizeX, uint32_t globalSizeY, uint32_t globalSizeZ, uint32_t *groupSizeX, uint32_t *groupSizeY, uint32_t *groupSizeZ) { return L0::Kernel::fromHandle(hKernel)->suggestGroupSize(globalSizeX, globalSizeY, globalSizeZ, groupSizeX, groupSizeY, groupSizeZ); } ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelSuggestMaxCooperativeGroupCount( ze_kernel_handle_t hKernel, uint32_t *totalGroupCount) { return L0::Kernel::fromHandle(hKernel)->suggestMaxCooperativeGroupCount(totalGroupCount, NEO::EngineGroupType::Compute, false); } ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelSetArgumentValue( ze_kernel_handle_t hKernel, uint32_t argIndex, size_t argSize, const void *pArgValue) { return L0::Kernel::fromHandle(hKernel)->setArgumentValue(argIndex, argSize, pArgValue); } ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelSetIndirectAccess( ze_kernel_handle_t hKernel, ze_kernel_indirect_access_flags_t flags) { return L0::Kernel::fromHandle(hKernel)->setIndirectAccess(flags); } ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelGetIndirectAccess( ze_kernel_handle_t hKernel, ze_kernel_indirect_access_flags_t *pFlags) { return L0::Kernel::fromHandle(hKernel)->getIndirectAccess(pFlags); } ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelGetSourceAttributes( ze_kernel_handle_t hKernel, uint32_t *pSize, char **pString) { return L0::Kernel::fromHandle(hKernel)->getSourceAttributes(pSize, pString); } ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelGetProperties( ze_kernel_handle_t hKernel, ze_kernel_properties_t *pKernelProperties) { return L0::Kernel::fromHandle(hKernel)->getProperties(pKernelProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchKernel( ze_command_list_handle_t hCommandList, ze_kernel_handle_t hKernel, const ze_group_count_t *pLaunchFuncArgs, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return L0::CommandList::fromHandle(hCommandList)->appendLaunchKernel(hKernel, pLaunchFuncArgs, hSignalEvent, numWaitEvents, phWaitEvents); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchCooperativeKernel( ze_command_list_handle_t hCommandList, ze_kernel_handle_t hKernel, const ze_group_count_t *pLaunchFuncArgs, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return L0::CommandList::fromHandle(hCommandList)->appendLaunchCooperativeKernel(hKernel, pLaunchFuncArgs, hSignalEvent, numWaitEvents, phWaitEvents); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchKernelIndirect( ze_command_list_handle_t hCommandList, ze_kernel_handle_t hKernel, const ze_group_count_t *pLaunchArgumentsBuffer, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return L0::CommandList::fromHandle(hCommandList)->appendLaunchKernelIndirect(hKernel, pLaunchArgumentsBuffer, hSignalEvent, numWaitEvents, phWaitEvents); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchMultipleKernelsIndirect( ze_command_list_handle_t hCommandList, uint32_t numKernels, ze_kernel_handle_t *phKernels, const uint32_t *pCountBuffer, const ze_group_count_t *pLaunchArgumentsBuffer, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return L0::CommandList::fromHandle(hCommandList)->appendLaunchMultipleKernelsIndirect(numKernels, phKernels, pCountBuffer, pLaunchArgumentsBuffer, hSignalEvent, numWaitEvents, phWaitEvents); } ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelGetName( ze_kernel_handle_t hKernel, size_t *pSize, char *pName) { return L0::Kernel::fromHandle(hKernel)->getKernelName(pSize, pName); } ZE_APIEXPORT ze_result_t ZE_APICALL zeModuleDynamicLink( uint32_t numModules, ze_module_handle_t *phModules, ze_module_build_log_handle_t *phLinkLog) { return L0::Module::fromHandle(phModules[0])->performDynamicLink(numModules, phModules, phLinkLog); } ZE_APIEXPORT ze_result_t ZE_APICALL zeModuleGetProperties( ze_module_handle_t hModule, ze_module_properties_t *pModuleProperties) { return L0::Module::fromHandle(hModule)->getProperties(pModuleProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelSetCacheConfig( ze_kernel_handle_t hKernel, ze_cache_config_flags_t flags) { return L0::Kernel::fromHandle(hKernel)->setCacheConfig(flags); } ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelSchedulingHintExp( ze_kernel_handle_t hKernel, ze_scheduling_hint_exp_desc_t *pHint) { return L0::Kernel::fromHandle(hKernel)->setSchedulingHintExp(pHint); } compute-runtime-22.14.22890/level_zero/api/core/ze_sampler.cpp000066400000000000000000000012221422164147700240630ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/context/context.h" #include "level_zero/core/source/sampler/sampler.h" #include ZE_APIEXPORT ze_result_t ZE_APICALL zeSamplerCreate( ze_context_handle_t hContext, ze_device_handle_t hDevice, const ze_sampler_desc_t *desc, ze_sampler_handle_t *phSampler) { return L0::Context::fromHandle(hContext)->createSampler(hDevice, desc, phSampler); } ZE_APIEXPORT ze_result_t ZE_APICALL zeSamplerDestroy( ze_sampler_handle_t hSampler) { return L0::Sampler::fromHandle(hSampler)->destroy(); } compute-runtime-22.14.22890/level_zero/api/experimental/000077500000000000000000000000001422164147700227665ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/api/experimental/CMakeLists.txt000066400000000000000000000002611422164147700255250ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_EXPERIMENTAL_API ) set_property(GLOBAL PROPERTY L0_EXPERIMENTAL_API ${L0_EXPERIMENTAL_API}) compute-runtime-22.14.22890/level_zero/api/experimental/tracing/000077500000000000000000000000001422164147700244155ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/api/experimental/tracing/CMakeLists.txt000066400000000000000000000003711422164147700271560ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_EXPERIMENTAL_TRACING_API ${CMAKE_CURRENT_SOURCE_DIR}/zet_tracing.cpp ) set_property(GLOBAL PROPERTY L0_EXPERIMENTAL_TRACING_API ${L0_EXPERIMENTAL_TRACING_API}) compute-runtime-22.14.22890/level_zero/api/experimental/tracing/zet_tracing.cpp000066400000000000000000000022601422164147700274320ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/experimental/source/tracing/tracing.h" #include ZE_APIEXPORT ze_result_t ZE_APICALL zetTracerExpCreate( zet_context_handle_t hContext, const zet_tracer_exp_desc_t *desc, zet_tracer_exp_handle_t *phTracer) { return L0::createAPITracer(hContext, desc, phTracer); } ZE_APIEXPORT ze_result_t ZE_APICALL zetTracerExpDestroy( zet_tracer_exp_handle_t hTracer) { return L0::APITracer::fromHandle(hTracer)->destroyTracer(hTracer); } ZE_APIEXPORT ze_result_t ZE_APICALL zetTracerExpSetPrologues( zet_tracer_exp_handle_t hTracer, zet_core_callbacks_t *pCoreCbs) { return L0::APITracer::fromHandle(hTracer)->setPrologues(pCoreCbs); } ZE_APIEXPORT ze_result_t ZE_APICALL zetTracerExpSetEpilogues( zet_tracer_exp_handle_t hTracer, zet_core_callbacks_t *pCoreCbs) { return L0::APITracer::fromHandle(hTracer)->setEpilogues(pCoreCbs); } ZE_APIEXPORT ze_result_t ZE_APICALL zetTracerExpSetEnabled( zet_tracer_exp_handle_t hTracer, ze_bool_t enable) { return L0::APITracer::fromHandle(hTracer)->enableTracer(enable); } compute-runtime-22.14.22890/level_zero/api/extensions/000077500000000000000000000000001422164147700224705ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/api/extensions/CMakeLists.txt000066400000000000000000000002771422164147700252360ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_EXTENSIONS_SRCS_API ) set_property(GLOBAL PROPERTY L0_EXTENSIONS_SRCS_API ${L0_EXTENSIONS_SRCS_API}) compute-runtime-22.14.22890/level_zero/api/extensions/public/000077500000000000000000000000001422164147700237465ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/api/extensions/public/CMakeLists.txt000066400000000000000000000005411422164147700265060ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_PUBLIC_EXTENSIONS_SRCS_API ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/ze_exp_ext.cpp ${CMAKE_CURRENT_SOURCE_DIR}/zet_exp_ext.cpp ) set_property(GLOBAL PROPERTY L0_PUBLIC_EXTENSIONS_SRCS_API ${L0_PUBLIC_EXTENSIONS_SRCS_API}) compute-runtime-22.14.22890/level_zero/api/extensions/public/ze_exp_ext.cpp000066400000000000000000000034751422164147700266350ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/api/extensions/public/ze_exp_ext.h" #include "level_zero/core/source/event/event.h" #include "level_zero/core/source/image/image.h" #include "level_zero/core/source/kernel/kernel.h" #if defined(__cplusplus) extern "C" { #endif ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelSetGlobalOffsetExp( ze_kernel_handle_t hKernel, uint32_t offsetX, uint32_t offsetY, uint32_t offsetZ) { return L0::Kernel::fromHandle(hKernel)->setGlobalOffsetExp(offsetX, offsetY, offsetZ); } ZE_APIEXPORT ze_result_t ZE_APICALL zeImageGetMemoryPropertiesExp( ze_image_handle_t hImage, ze_image_memory_properties_exp_t *pMemoryProperties) { return L0::Image::fromHandle(hImage)->getMemoryProperties(pMemoryProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zeImageGetAllocPropertiesExt( ze_context_handle_t hContext, ze_image_handle_t hImage, ze_image_allocation_ext_properties_t *pAllocProperties) { return L0::Context::fromHandle(hContext)->getImageAllocProperties(L0::Image::fromHandle(hImage), pAllocProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zeImageViewCreateExp( ze_context_handle_t hContext, ze_device_handle_t hDevice, const ze_image_desc_t *desc, ze_image_handle_t hImage, ze_image_handle_t *phImageView) { return L0::Image::fromHandle(hImage)->createView(L0::Device::fromHandle(hDevice), desc, phImageView); } ZE_APIEXPORT ze_result_t ZE_APICALL zeEventQueryTimestampsExp( ze_event_handle_t hEvent, ze_device_handle_t hDevice, uint32_t *pCount, ze_kernel_timestamp_result_t *pTimestamps) { return L0::Event::fromHandle(hEvent)->queryTimestampsExp(L0::Device::fromHandle(hDevice), pCount, pTimestamps); } #if defined(__cplusplus) } // extern "C" #endif compute-runtime-22.14.22890/level_zero/api/extensions/public/ze_exp_ext.h000066400000000000000000000002101422164147700262620ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include compute-runtime-22.14.22890/level_zero/api/extensions/public/zet_exp_ext.cpp000066400000000000000000000014411422164147700270100ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/metrics/metric.h" #include #if defined(__cplusplus) extern "C" { #endif ZE_APIEXPORT ze_result_t ZE_APICALL zetMetricGroupCalculateMultipleMetricValuesExp( zet_metric_group_handle_t hMetricGroup, zet_metric_group_calculation_type_t type, size_t rawDataSize, const uint8_t *pRawData, uint32_t *pSetCount, uint32_t *pTotalMetricValueCount, uint32_t *pMetricCounts, zet_typed_value_t *pMetricValues) { return L0::MetricGroup::fromHandle(hMetricGroup)->calculateMetricValuesExp(type, rawDataSize, pRawData, pSetCount, pTotalMetricValueCount, pMetricCounts, pMetricValues); } #if defined(__cplusplus) } // extern "C" #endif compute-runtime-22.14.22890/level_zero/api/sysman/000077500000000000000000000000001422164147700216035ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/api/sysman/CMakeLists.txt000066400000000000000000000004021422164147700243370ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SYSMAN_API ${CMAKE_CURRENT_SOURCE_DIR}/ze_sysman_loader.cpp ${CMAKE_CURRENT_SOURCE_DIR}/zes_sysman.cpp ) set_property(GLOBAL PROPERTY L0_SYSMAN_API ${L0_SYSMAN_API}) compute-runtime-22.14.22890/level_zero/api/sysman/ze_sysman_loader.cpp000066400000000000000000000440541422164147700256540ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/debug_env_reader.h" #include "level_zero/source/inc/ze_intel_gpu.h" #include #include #include #include #include #include #include "ze_ddi_tables.h" extern ze_gpu_driver_dditable_t driver_ddiTable; ZE_DLLEXPORT ze_result_t ZE_APICALL zesGetDeviceProcAddrTable( ze_api_version_t version, zes_device_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; NEO::EnvironmentVariableReader envReader; bool isSysManEnabled = envReader.getSetting("ZES_ENABLE_SYSMAN", false); if (false == isSysManEnabled) { *pDdiTable = {}; return result; } pDdiTable->pfnGetProperties = zesDeviceGetProperties; pDdiTable->pfnGetState = zesDeviceGetState; pDdiTable->pfnReset = zesDeviceReset; pDdiTable->pfnProcessesGetState = zesDeviceProcessesGetState; pDdiTable->pfnPciGetProperties = zesDevicePciGetProperties; pDdiTable->pfnPciGetState = zesDevicePciGetState; pDdiTable->pfnPciGetBars = zesDevicePciGetBars; pDdiTable->pfnPciGetStats = zesDevicePciGetStats; pDdiTable->pfnEnumDiagnosticTestSuites = zesDeviceEnumDiagnosticTestSuites; pDdiTable->pfnEnumEngineGroups = zesDeviceEnumEngineGroups; pDdiTable->pfnEventRegister = zesDeviceEventRegister; pDdiTable->pfnEnumFabricPorts = zesDeviceEnumFabricPorts; pDdiTable->pfnEnumFans = zesDeviceEnumFans; pDdiTable->pfnEnumFirmwares = zesDeviceEnumFirmwares; pDdiTable->pfnEnumFrequencyDomains = zesDeviceEnumFrequencyDomains; pDdiTable->pfnEnumLeds = zesDeviceEnumLeds; pDdiTable->pfnEnumMemoryModules = zesDeviceEnumMemoryModules; pDdiTable->pfnEnumPerformanceFactorDomains = zesDeviceEnumPerformanceFactorDomains; pDdiTable->pfnEnumPowerDomains = zesDeviceEnumPowerDomains; pDdiTable->pfnEnumPsus = zesDeviceEnumPsus; pDdiTable->pfnEnumRasErrorSets = zesDeviceEnumRasErrorSets; pDdiTable->pfnEnumSchedulers = zesDeviceEnumSchedulers; pDdiTable->pfnEnumStandbyDomains = zesDeviceEnumStandbyDomains; pDdiTable->pfnEnumTemperatureSensors = zesDeviceEnumTemperatureSensors; return result; } ZE_DLLEXPORT ze_result_t ZE_APICALL zesGetDriverProcAddrTable( ze_api_version_t version, zes_driver_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; NEO::EnvironmentVariableReader envReader; bool isSysManEnabled = envReader.getSetting("ZES_ENABLE_SYSMAN", false); if (false == isSysManEnabled) { *pDdiTable = {}; return result; } pDdiTable->pfnEventListen = zesDriverEventListen; pDdiTable->pfnEventListenEx = zesDriverEventListenEx; return result; } ZE_DLLEXPORT ze_result_t ZE_APICALL zesGetDiagnosticsProcAddrTable( ze_api_version_t version, zes_diagnostics_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; NEO::EnvironmentVariableReader envReader; bool isSysManEnabled = envReader.getSetting("ZES_ENABLE_SYSMAN", false); if (false == isSysManEnabled) { *pDdiTable = {}; return result; } pDdiTable->pfnGetProperties = zesDiagnosticsGetProperties; pDdiTable->pfnGetTests = zesDiagnosticsGetTests; pDdiTable->pfnRunTests = zesDiagnosticsRunTests; return result; } ZE_DLLEXPORT ze_result_t ZE_APICALL zesGetEngineProcAddrTable( ze_api_version_t version, zes_engine_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; NEO::EnvironmentVariableReader envReader; bool isSysManEnabled = envReader.getSetting("ZES_ENABLE_SYSMAN", false); if (false == isSysManEnabled) { *pDdiTable = {}; return result; } pDdiTable->pfnGetProperties = zesEngineGetProperties; pDdiTable->pfnGetActivity = zesEngineGetActivity; return result; } ZE_DLLEXPORT ze_result_t ZE_APICALL zesGetFabricPortProcAddrTable( ze_api_version_t version, zes_fabric_port_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; NEO::EnvironmentVariableReader envReader; bool isSysManEnabled = envReader.getSetting("ZES_ENABLE_SYSMAN", false); if (false == isSysManEnabled) { *pDdiTable = {}; return result; } pDdiTable->pfnGetProperties = zesFabricPortGetProperties; pDdiTable->pfnGetLinkType = zesFabricPortGetLinkType; pDdiTable->pfnGetConfig = zesFabricPortGetConfig; pDdiTable->pfnSetConfig = zesFabricPortSetConfig; pDdiTable->pfnGetState = zesFabricPortGetState; pDdiTable->pfnGetThroughput = zesFabricPortGetThroughput; return result; } ZE_DLLEXPORT ze_result_t ZE_APICALL zesGetFanProcAddrTable( ze_api_version_t version, zes_fan_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; NEO::EnvironmentVariableReader envReader; bool isSysManEnabled = envReader.getSetting("ZES_ENABLE_SYSMAN", false); if (false == isSysManEnabled) { *pDdiTable = {}; return result; } pDdiTable->pfnGetProperties = zesFanGetProperties; pDdiTable->pfnGetConfig = zesFanGetConfig; pDdiTable->pfnSetDefaultMode = zesFanSetDefaultMode; pDdiTable->pfnSetFixedSpeedMode = zesFanSetFixedSpeedMode; pDdiTable->pfnSetSpeedTableMode = zesFanSetSpeedTableMode; pDdiTable->pfnGetState = zesFanGetState; return result; } ZE_DLLEXPORT ze_result_t ZE_APICALL zesGetFirmwareProcAddrTable( ze_api_version_t version, zes_firmware_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; NEO::EnvironmentVariableReader envReader; bool isSysManEnabled = envReader.getSetting("ZES_ENABLE_SYSMAN", false); if (false == isSysManEnabled) { *pDdiTable = {}; return result; } pDdiTable->pfnGetProperties = zesFirmwareGetProperties; pDdiTable->pfnFlash = zesFirmwareFlash; return result; } ZE_DLLEXPORT ze_result_t ZE_APICALL zesGetFrequencyProcAddrTable( ze_api_version_t version, zes_frequency_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; NEO::EnvironmentVariableReader envReader; bool isSysManEnabled = envReader.getSetting("ZES_ENABLE_SYSMAN", false); if (false == isSysManEnabled) { *pDdiTable = {}; return result; } pDdiTable->pfnGetProperties = zesFrequencyGetProperties; pDdiTable->pfnGetAvailableClocks = zesFrequencyGetAvailableClocks; pDdiTable->pfnGetRange = zesFrequencyGetRange; pDdiTable->pfnSetRange = zesFrequencySetRange; pDdiTable->pfnGetState = zesFrequencyGetState; pDdiTable->pfnGetThrottleTime = zesFrequencyGetThrottleTime; pDdiTable->pfnOcGetCapabilities = zesFrequencyOcGetCapabilities; pDdiTable->pfnOcGetFrequencyTarget = zesFrequencyOcGetFrequencyTarget; pDdiTable->pfnOcSetFrequencyTarget = zesFrequencyOcSetFrequencyTarget; pDdiTable->pfnOcGetVoltageTarget = zesFrequencyOcGetVoltageTarget; pDdiTable->pfnOcSetVoltageTarget = zesFrequencyOcSetVoltageTarget; pDdiTable->pfnOcSetMode = zesFrequencyOcSetMode; pDdiTable->pfnOcGetMode = zesFrequencyOcGetMode; pDdiTable->pfnOcGetIccMax = zesFrequencyOcGetIccMax; pDdiTable->pfnOcSetIccMax = zesFrequencyOcSetIccMax; pDdiTable->pfnOcGetTjMax = zesFrequencyOcGetTjMax; pDdiTable->pfnOcSetTjMax = zesFrequencyOcSetTjMax; return result; } ZE_DLLEXPORT ze_result_t ZE_APICALL zesGetLedProcAddrTable( ze_api_version_t version, zes_led_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; NEO::EnvironmentVariableReader envReader; bool isSysManEnabled = envReader.getSetting("ZES_ENABLE_SYSMAN", false); if (false == isSysManEnabled) { *pDdiTable = {}; return result; } pDdiTable->pfnGetProperties = zesLedGetProperties; pDdiTable->pfnGetState = zesLedGetState; pDdiTable->pfnSetState = zesLedSetState; pDdiTable->pfnSetColor = zesLedSetColor; return result; } ZE_DLLEXPORT ze_result_t ZE_APICALL zesGetMemoryProcAddrTable( ze_api_version_t version, zes_memory_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; NEO::EnvironmentVariableReader envReader; bool isSysManEnabled = envReader.getSetting("ZES_ENABLE_SYSMAN", false); if (false == isSysManEnabled) { *pDdiTable = {}; return result; } pDdiTable->pfnGetProperties = zesMemoryGetProperties; pDdiTable->pfnGetState = zesMemoryGetState; pDdiTable->pfnGetBandwidth = zesMemoryGetBandwidth; return result; } ZE_DLLEXPORT ze_result_t ZE_APICALL zesGetPerformanceFactorProcAddrTable( ze_api_version_t version, zes_performance_factor_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; NEO::EnvironmentVariableReader envReader; bool isSysManEnabled = envReader.getSetting("ZES_ENABLE_SYSMAN", false); if (false == isSysManEnabled) { *pDdiTable = {}; return result; } pDdiTable->pfnGetProperties = zesPerformanceFactorGetProperties; pDdiTable->pfnGetConfig = zesPerformanceFactorGetConfig; pDdiTable->pfnSetConfig = zesPerformanceFactorSetConfig; return result; } ZE_DLLEXPORT ze_result_t ZE_APICALL zesGetPowerProcAddrTable( ze_api_version_t version, zes_power_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; NEO::EnvironmentVariableReader envReader; bool isSysManEnabled = envReader.getSetting("ZES_ENABLE_SYSMAN", false); if (false == isSysManEnabled) { *pDdiTable = {}; return result; } pDdiTable->pfnGetProperties = zesPowerGetProperties; pDdiTable->pfnGetEnergyCounter = zesPowerGetEnergyCounter; pDdiTable->pfnGetLimits = zesPowerGetLimits; pDdiTable->pfnSetLimits = zesPowerSetLimits; pDdiTable->pfnGetEnergyThreshold = zesPowerGetEnergyThreshold; pDdiTable->pfnSetEnergyThreshold = zesPowerSetEnergyThreshold; return result; } ZE_DLLEXPORT ze_result_t ZE_APICALL zesGetPsuProcAddrTable( ze_api_version_t version, zes_psu_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; NEO::EnvironmentVariableReader envReader; bool isSysManEnabled = envReader.getSetting("ZES_ENABLE_SYSMAN", false); if (false == isSysManEnabled) { *pDdiTable = {}; return result; } pDdiTable->pfnGetProperties = zesPsuGetProperties; pDdiTable->pfnGetState = zesPsuGetState; return result; } ZE_DLLEXPORT ze_result_t ZE_APICALL zesGetRasProcAddrTable( ze_api_version_t version, zes_ras_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; NEO::EnvironmentVariableReader envReader; bool isSysManEnabled = envReader.getSetting("ZES_ENABLE_SYSMAN", false); if (false == isSysManEnabled) { *pDdiTable = {}; return result; } pDdiTable->pfnGetProperties = zesRasGetProperties; pDdiTable->pfnGetConfig = zesRasGetConfig; pDdiTable->pfnSetConfig = zesRasSetConfig; pDdiTable->pfnGetState = zesRasGetState; return result; } ZE_DLLEXPORT ze_result_t ZE_APICALL zesGetSchedulerProcAddrTable( ze_api_version_t version, zes_scheduler_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; NEO::EnvironmentVariableReader envReader; bool isSysManEnabled = envReader.getSetting("ZES_ENABLE_SYSMAN", false); if (false == isSysManEnabled) { *pDdiTable = {}; return result; } pDdiTable->pfnGetProperties = zesSchedulerGetProperties; pDdiTable->pfnGetCurrentMode = zesSchedulerGetCurrentMode; pDdiTable->pfnGetTimeoutModeProperties = zesSchedulerGetTimeoutModeProperties; pDdiTable->pfnGetTimesliceModeProperties = zesSchedulerGetTimesliceModeProperties; pDdiTable->pfnSetTimeoutMode = zesSchedulerSetTimeoutMode; pDdiTable->pfnSetTimesliceMode = zesSchedulerSetTimesliceMode; pDdiTable->pfnSetExclusiveMode = zesSchedulerSetExclusiveMode; pDdiTable->pfnSetComputeUnitDebugMode = zesSchedulerSetComputeUnitDebugMode; return result; } ZE_DLLEXPORT ze_result_t ZE_APICALL zesGetStandbyProcAddrTable( ze_api_version_t version, zes_standby_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; NEO::EnvironmentVariableReader envReader; bool isSysManEnabled = envReader.getSetting("ZES_ENABLE_SYSMAN", false); if (false == isSysManEnabled) { *pDdiTable = {}; return result; } pDdiTable->pfnGetProperties = zesStandbyGetProperties; pDdiTable->pfnGetMode = zesStandbyGetMode; pDdiTable->pfnSetMode = zesStandbySetMode; return result; } ZE_DLLEXPORT ze_result_t ZE_APICALL zesGetTemperatureProcAddrTable( ze_api_version_t version, zes_temperature_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; NEO::EnvironmentVariableReader envReader; bool isSysManEnabled = envReader.getSetting("ZES_ENABLE_SYSMAN", false); if (false == isSysManEnabled) { *pDdiTable = {}; return result; } pDdiTable->pfnGetProperties = zesTemperatureGetProperties; pDdiTable->pfnGetConfig = zesTemperatureGetConfig; pDdiTable->pfnSetConfig = zesTemperatureSetConfig; pDdiTable->pfnGetState = zesTemperatureGetState; return result; } compute-runtime-22.14.22890/level_zero/api/sysman/zes_sysman.cpp000066400000000000000000000527461422164147700245200ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "sysman/sysman.h" ZE_APIEXPORT ze_result_t ZE_APICALL zesDeviceGetProperties( zes_device_handle_t hDevice, zes_device_properties_t *pProperties) { return L0::SysmanDevice::deviceGetProperties(hDevice, pProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zesDeviceGetState( zes_device_handle_t hDevice, zes_device_state_t *pState) { return L0::SysmanDevice::deviceGetState(hDevice, pState); } ZE_APIEXPORT ze_result_t ZE_APICALL zesDeviceEnumSchedulers( zes_device_handle_t hDevice, uint32_t *pCount, zes_sched_handle_t *phScheduler) { return L0::SysmanDevice::schedulerGet(hDevice, pCount, phScheduler); } ZE_APIEXPORT ze_result_t ZE_APICALL zesSchedulerGetProperties( zes_sched_handle_t hScheduler, zes_sched_properties_t *pProperties) { return L0::Scheduler::fromHandle(hScheduler)->schedulerGetProperties(pProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zesSchedulerGetCurrentMode( zes_sched_handle_t hScheduler, zes_sched_mode_t *pMode) { return L0::Scheduler::fromHandle(hScheduler)->getCurrentMode(pMode); } ZE_APIEXPORT ze_result_t ZE_APICALL zesSchedulerGetTimeoutModeProperties( zes_sched_handle_t hScheduler, ze_bool_t getDefaults, zes_sched_timeout_properties_t *pConfig) { return L0::Scheduler::fromHandle(hScheduler)->getTimeoutModeProperties(getDefaults, pConfig); } ZE_APIEXPORT ze_result_t ZE_APICALL zesSchedulerGetTimesliceModeProperties( zes_sched_handle_t hScheduler, ze_bool_t getDefaults, zes_sched_timeslice_properties_t *pConfig) { return L0::Scheduler::fromHandle(hScheduler)->getTimesliceModeProperties(getDefaults, pConfig); } ZE_APIEXPORT ze_result_t ZE_APICALL zesSchedulerSetTimeoutMode( zes_sched_handle_t hScheduler, zes_sched_timeout_properties_t *pProperties, ze_bool_t *pNeedReload) { return L0::Scheduler::fromHandle(hScheduler)->setTimeoutMode(pProperties, pNeedReload); } ZE_APIEXPORT ze_result_t ZE_APICALL zesSchedulerSetTimesliceMode( zes_sched_handle_t hScheduler, zes_sched_timeslice_properties_t *pProperties, ze_bool_t *pNeedReload) { return L0::Scheduler::fromHandle(hScheduler)->setTimesliceMode(pProperties, pNeedReload); } ZE_APIEXPORT ze_result_t ZE_APICALL zesSchedulerSetExclusiveMode( zes_sched_handle_t hScheduler, ze_bool_t *pNeedReload) { return L0::Scheduler::fromHandle(hScheduler)->setExclusiveMode(pNeedReload); } ZE_APIEXPORT ze_result_t ZE_APICALL zesSchedulerSetComputeUnitDebugMode( zes_sched_handle_t hScheduler, ze_bool_t *pNeedReload) { return L0::Scheduler::fromHandle(hScheduler)->setComputeUnitDebugMode(pNeedReload); } ZE_APIEXPORT ze_result_t ZE_APICALL zesDeviceProcessesGetState( zes_device_handle_t hDevice, uint32_t *pCount, zes_process_state_t *pProcesses) { return L0::SysmanDevice::processesGetState(hDevice, pCount, pProcesses); } ZE_APIEXPORT ze_result_t ZE_APICALL zesDeviceReset( zes_device_handle_t hDevice, ze_bool_t force) { return L0::SysmanDevice::deviceReset(hDevice, force); } ZE_APIEXPORT ze_result_t ZE_APICALL zesDevicePciGetProperties( zes_device_handle_t hDevice, zes_pci_properties_t *pProperties) { return L0::SysmanDevice::pciGetProperties(hDevice, pProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zesDevicePciGetState( zes_device_handle_t hDevice, zes_pci_state_t *pState) { return L0::SysmanDevice::pciGetState(hDevice, pState); } ZE_APIEXPORT ze_result_t ZE_APICALL zesDevicePciGetBars( zes_device_handle_t hDevice, uint32_t *pCount, zes_pci_bar_properties_t *pProperties) { return L0::SysmanDevice::pciGetBars(hDevice, pCount, pProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zesDevicePciGetStats( zes_device_handle_t hDevice, zes_pci_stats_t *pStats) { return L0::SysmanDevice::pciGetStats(hDevice, pStats); } ZE_APIEXPORT ze_result_t ZE_APICALL zesDeviceEnumPowerDomains( zes_device_handle_t hDevice, uint32_t *pCount, zes_pwr_handle_t *phPower) { return L0::SysmanDevice::powerGet(hDevice, pCount, phPower); } ZE_APIEXPORT ze_result_t ZE_APICALL zesPowerGetProperties( zes_pwr_handle_t hPower, zes_power_properties_t *pProperties) { return L0::Power::fromHandle(hPower)->powerGetProperties(pProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zesPowerGetEnergyCounter( zes_pwr_handle_t hPower, zes_power_energy_counter_t *pEnergy) { return L0::Power::fromHandle(hPower)->powerGetEnergyCounter(pEnergy); } ZE_APIEXPORT ze_result_t ZE_APICALL zesPowerGetLimits( zes_pwr_handle_t hPower, zes_power_sustained_limit_t *pSustained, zes_power_burst_limit_t *pBurst, zes_power_peak_limit_t *pPeak) { return L0::Power::fromHandle(hPower)->powerGetLimits(pSustained, pBurst, pPeak); } ZE_APIEXPORT ze_result_t ZE_APICALL zesPowerSetLimits( zes_pwr_handle_t hPower, const zes_power_sustained_limit_t *pSustained, const zes_power_burst_limit_t *pBurst, const zes_power_peak_limit_t *pPeak) { return L0::Power::fromHandle(hPower)->powerSetLimits(pSustained, pBurst, pPeak); } ZE_APIEXPORT ze_result_t ZE_APICALL zesPowerGetEnergyThreshold( zes_pwr_handle_t hPower, zes_energy_threshold_t *pThreshold) { return L0::Power::fromHandle(hPower)->powerGetEnergyThreshold(pThreshold); } ZE_APIEXPORT ze_result_t ZE_APICALL zesPowerSetEnergyThreshold( zes_pwr_handle_t hPower, double threshold) { return L0::Power::fromHandle(hPower)->powerSetEnergyThreshold(threshold); } ZE_APIEXPORT ze_result_t ZE_APICALL zesDeviceEnumFrequencyDomains( zes_device_handle_t hDevice, uint32_t *pCount, zes_freq_handle_t *phFrequency) { return L0::SysmanDevice::frequencyGet(hDevice, pCount, phFrequency); } ZE_APIEXPORT ze_result_t ZE_APICALL zesFrequencyGetProperties( zes_freq_handle_t hFrequency, zes_freq_properties_t *pProperties) { return L0::Frequency::fromHandle(hFrequency)->frequencyGetProperties(pProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zesFrequencyGetAvailableClocks( zes_freq_handle_t hFrequency, uint32_t *pCount, double *phFrequency) { return L0::Frequency::fromHandle(hFrequency)->frequencyGetAvailableClocks(pCount, phFrequency); } ZE_APIEXPORT ze_result_t ZE_APICALL zesFrequencyGetRange( zes_freq_handle_t hFrequency, zes_freq_range_t *pLimits) { return L0::Frequency::fromHandle(hFrequency)->frequencyGetRange(pLimits); } ZE_APIEXPORT ze_result_t ZE_APICALL zesFrequencySetRange( zes_freq_handle_t hFrequency, const zes_freq_range_t *pLimits) { return L0::Frequency::fromHandle(hFrequency)->frequencySetRange(pLimits); } ZE_APIEXPORT ze_result_t ZE_APICALL zesFrequencyGetState( zes_freq_handle_t hFrequency, zes_freq_state_t *pState) { return L0::Frequency::fromHandle(hFrequency)->frequencyGetState(pState); } ZE_APIEXPORT ze_result_t ZE_APICALL zesFrequencyGetThrottleTime( zes_freq_handle_t hFrequency, zes_freq_throttle_time_t *pThrottleTime) { return L0::Frequency::fromHandle(hFrequency)->frequencyGetThrottleTime(pThrottleTime); } ZE_APIEXPORT ze_result_t ZE_APICALL zesFrequencyOcGetFrequencyTarget( zes_freq_handle_t hFrequency, double *pCurrentOcFrequency) { return L0::Frequency::fromHandle(hFrequency)->frequencyOcGetFrequencyTarget(pCurrentOcFrequency); } ZE_APIEXPORT ze_result_t ZE_APICALL zesFrequencyOcSetFrequencyTarget( zes_freq_handle_t hFrequency, double currentOcFrequency) { return L0::Frequency::fromHandle(hFrequency)->frequencyOcSetFrequencyTarget(currentOcFrequency); } ZE_APIEXPORT ze_result_t ZE_APICALL zesFrequencyOcGetVoltageTarget( zes_freq_handle_t hFrequency, double *pCurrentVoltageTarget, double *pCurrentVoltageOffset) { return L0::Frequency::fromHandle(hFrequency)->frequencyOcGetVoltageTarget(pCurrentVoltageTarget, pCurrentVoltageOffset); } ZE_APIEXPORT ze_result_t ZE_APICALL zesFrequencyOcSetVoltageTarget( zes_freq_handle_t hFrequency, double currentVoltageTarget, double currentVoltageOffset) { return L0::Frequency::fromHandle(hFrequency)->frequencyOcSetVoltageTarget(currentVoltageTarget, currentVoltageOffset); } ZE_APIEXPORT ze_result_t ZE_APICALL zesFrequencyOcSetMode( zes_freq_handle_t hFrequency, zes_oc_mode_t currentOcMode) { return L0::Frequency::fromHandle(hFrequency)->frequencyOcSetMode(currentOcMode); } ZE_APIEXPORT ze_result_t ZE_APICALL zesFrequencyOcGetMode( zes_freq_handle_t hFrequency, zes_oc_mode_t *pCurrentOcMode) { return L0::Frequency::fromHandle(hFrequency)->frequencyOcGetMode(pCurrentOcMode); } ZE_APIEXPORT ze_result_t ZE_APICALL zesFrequencyOcGetCapabilities( zes_freq_handle_t hFrequency, zes_oc_capabilities_t *pOcCapabilities) { return L0::Frequency::fromHandle(hFrequency)->frequencyOcGetCapabilities(pOcCapabilities); } ZE_APIEXPORT ze_result_t ZE_APICALL zesFrequencyOcGetIccMax( zes_freq_handle_t hFrequency, double *pOcIccMax) { return L0::Frequency::fromHandle(hFrequency)->frequencyOcGetIccMax(pOcIccMax); } ZE_APIEXPORT ze_result_t ZE_APICALL zesFrequencyOcSetIccMax( zes_freq_handle_t hFrequency, double ocIccMax) { return L0::Frequency::fromHandle(hFrequency)->frequencyOcSetIccMax(ocIccMax); } ZE_APIEXPORT ze_result_t ZE_APICALL zesFrequencyOcGetTjMax( zes_freq_handle_t hFrequency, double *pOcTjMax) { return L0::Frequency::fromHandle(hFrequency)->frequencyOcGeTjMax(pOcTjMax); } ZE_APIEXPORT ze_result_t ZE_APICALL zesFrequencyOcSetTjMax( zes_freq_handle_t hFrequency, double ocTjMax) { return L0::Frequency::fromHandle(hFrequency)->frequencyOcSetTjMax(ocTjMax); } ZE_APIEXPORT ze_result_t ZE_APICALL zesDeviceEnumEngineGroups( zes_device_handle_t hDevice, uint32_t *pCount, zes_engine_handle_t *phEngine) { return L0::SysmanDevice::engineGet(hDevice, pCount, phEngine); } ZE_APIEXPORT ze_result_t ZE_APICALL zesEngineGetProperties( zes_engine_handle_t hEngine, zes_engine_properties_t *pProperties) { return L0::Engine::fromHandle(hEngine)->engineGetProperties(pProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zesEngineGetActivity( zes_engine_handle_t hEngine, zes_engine_stats_t *pStats) { return L0::Engine::fromHandle(hEngine)->engineGetActivity(pStats); } ZE_APIEXPORT ze_result_t ZE_APICALL zesDeviceEnumStandbyDomains( zes_device_handle_t hDevice, uint32_t *pCount, zes_standby_handle_t *phStandby) { return L0::SysmanDevice::standbyGet(hDevice, pCount, phStandby); } ZE_APIEXPORT ze_result_t ZE_APICALL zesStandbyGetProperties( zes_standby_handle_t hStandby, zes_standby_properties_t *pProperties) { return L0::Standby::fromHandle(hStandby)->standbyGetProperties(pProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zesStandbyGetMode( zes_standby_handle_t hStandby, zes_standby_promo_mode_t *pMode) { return L0::Standby::fromHandle(hStandby)->standbyGetMode(pMode); } ZE_APIEXPORT ze_result_t ZE_APICALL zesStandbySetMode( zes_standby_handle_t hStandby, zes_standby_promo_mode_t mode) { return L0::Standby::fromHandle(hStandby)->standbySetMode(mode); } ZE_APIEXPORT ze_result_t ZE_APICALL zesDeviceEnumFirmwares( zes_device_handle_t hDevice, uint32_t *pCount, zes_firmware_handle_t *phFirmware) { return L0::SysmanDevice::firmwareGet(hDevice, pCount, phFirmware); } ZE_APIEXPORT ze_result_t ZE_APICALL zesFirmwareGetProperties( zes_firmware_handle_t hFirmware, zes_firmware_properties_t *pProperties) { return L0::Firmware::fromHandle(hFirmware)->firmwareGetProperties(pProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zesFirmwareFlash( zes_firmware_handle_t hFirmware, void *pImage, uint32_t size) { return L0::Firmware::fromHandle(hFirmware)->firmwareFlash(pImage, size); } ZE_APIEXPORT ze_result_t ZE_APICALL zesDeviceEnumMemoryModules( zes_device_handle_t hDevice, uint32_t *pCount, zes_mem_handle_t *phMemory) { return L0::SysmanDevice::memoryGet(hDevice, pCount, phMemory); } ZE_APIEXPORT ze_result_t ZE_APICALL zesMemoryGetProperties( zes_mem_handle_t hMemory, zes_mem_properties_t *pProperties) { return L0::Memory::fromHandle(hMemory)->memoryGetProperties(pProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zesMemoryGetState( zes_mem_handle_t hMemory, zes_mem_state_t *pState) { return L0::Memory::fromHandle(hMemory)->memoryGetState(pState); } ZE_APIEXPORT ze_result_t ZE_APICALL zesMemoryGetBandwidth( zes_mem_handle_t hMemory, zes_mem_bandwidth_t *pBandwidth) { return L0::Memory::fromHandle(hMemory)->memoryGetBandwidth(pBandwidth); } ZE_APIEXPORT ze_result_t ZE_APICALL zesDeviceEnumFabricPorts( zes_device_handle_t hDevice, uint32_t *pCount, zes_fabric_port_handle_t *phPort) { return L0::SysmanDevice::fabricPortGet(hDevice, pCount, phPort); } ZE_APIEXPORT ze_result_t ZE_APICALL zesFabricPortGetProperties( zes_fabric_port_handle_t hPort, zes_fabric_port_properties_t *pProperties) { return L0::FabricPort::fromHandle(hPort)->fabricPortGetProperties(pProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zesFabricPortGetLinkType( zes_fabric_port_handle_t hPort, zes_fabric_link_type_t *pLinkType) { return L0::FabricPort::fromHandle(hPort)->fabricPortGetLinkType(pLinkType); } ZE_APIEXPORT ze_result_t ZE_APICALL zesFabricPortGetConfig( zes_fabric_port_handle_t hPort, zes_fabric_port_config_t *pConfig) { return L0::FabricPort::fromHandle(hPort)->fabricPortGetConfig(pConfig); } ZE_APIEXPORT ze_result_t ZE_APICALL zesFabricPortSetConfig( zes_fabric_port_handle_t hPort, const zes_fabric_port_config_t *pConfig) { return L0::FabricPort::fromHandle(hPort)->fabricPortSetConfig(pConfig); } ZE_APIEXPORT ze_result_t ZE_APICALL zesFabricPortGetState( zes_fabric_port_handle_t hPort, zes_fabric_port_state_t *pState) { return L0::FabricPort::fromHandle(hPort)->fabricPortGetState(pState); } ZE_APIEXPORT ze_result_t ZE_APICALL zesFabricPortGetThroughput( zes_fabric_port_handle_t hPort, zes_fabric_port_throughput_t *pThroughput) { return L0::FabricPort::fromHandle(hPort)->fabricPortGetThroughput(pThroughput); } ZE_APIEXPORT ze_result_t ZE_APICALL zesDeviceEnumTemperatureSensors( zes_device_handle_t hDevice, uint32_t *pCount, zes_temp_handle_t *phTemperature) { return L0::SysmanDevice::temperatureGet(hDevice, pCount, phTemperature); } ZE_APIEXPORT ze_result_t ZE_APICALL zesTemperatureGetProperties( zes_temp_handle_t hTemperature, zes_temp_properties_t *pProperties) { return L0::Temperature::fromHandle(hTemperature)->temperatureGetProperties(pProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zesTemperatureGetConfig( zes_temp_handle_t hTemperature, zes_temp_config_t *pConfig) { return L0::Temperature::fromHandle(hTemperature)->temperatureGetConfig(pConfig); } ZE_APIEXPORT ze_result_t ZE_APICALL zesTemperatureSetConfig( zes_temp_handle_t hTemperature, const zes_temp_config_t *pConfig) { return L0::Temperature::fromHandle(hTemperature)->temperatureSetConfig(pConfig); } ZE_APIEXPORT ze_result_t ZE_APICALL zesTemperatureGetState( zes_temp_handle_t hTemperature, double *pTemperature) { return L0::Temperature::fromHandle(hTemperature)->temperatureGetState(pTemperature); } ZE_APIEXPORT ze_result_t ZE_APICALL zesDeviceEnumPsus( zes_device_handle_t hDevice, uint32_t *pCount, zes_psu_handle_t *phPsu) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ZE_APIEXPORT ze_result_t ZE_APICALL zesPsuGetProperties( zes_psu_handle_t hPsu, zes_psu_properties_t *pProperties) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ZE_APIEXPORT ze_result_t ZE_APICALL zesPsuGetState( zes_psu_handle_t hPsu, zes_psu_state_t *pState) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ZE_APIEXPORT ze_result_t ZE_APICALL zesDeviceEnumFans( zes_device_handle_t hDevice, uint32_t *pCount, zes_fan_handle_t *phFan) { return L0::SysmanDevice::fanGet(hDevice, pCount, phFan); } ZE_APIEXPORT ze_result_t ZE_APICALL zesFanGetProperties( zes_fan_handle_t hFan, zes_fan_properties_t *pProperties) { return L0::Fan::fromHandle(hFan)->fanGetProperties(pProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zesFanGetConfig( zes_fan_handle_t hFan, zes_fan_config_t *pConfig) { return L0::Fan::fromHandle(hFan)->fanGetConfig(pConfig); } ZE_APIEXPORT ze_result_t ZE_APICALL zesFanSetDefaultMode( zes_fan_handle_t hFan) { return L0::Fan::fromHandle(hFan)->fanSetDefaultMode(); } ZE_APIEXPORT ze_result_t ZE_APICALL zesFanSetFixedSpeedMode( zes_fan_handle_t hFan, const zes_fan_speed_t *speed) { return L0::Fan::fromHandle(hFan)->fanSetFixedSpeedMode(speed); } ZE_APIEXPORT ze_result_t ZE_APICALL zesFanSetSpeedTableMode( zes_fan_handle_t hFan, const zes_fan_speed_table_t *speedTable) { return L0::Fan::fromHandle(hFan)->fanSetSpeedTableMode(speedTable); } ZE_APIEXPORT ze_result_t ZE_APICALL zesFanGetState( zes_fan_handle_t hFan, zes_fan_speed_units_t units, int32_t *pSpeed) { return L0::Fan::fromHandle(hFan)->fanGetState(units, pSpeed); } ZE_APIEXPORT ze_result_t ZE_APICALL zesDeviceEnumLeds( zes_device_handle_t hDevice, uint32_t *pCount, zes_led_handle_t *phLed) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ZE_APIEXPORT ze_result_t ZE_APICALL zesLedGetProperties( zes_led_handle_t hLed, zes_led_properties_t *pProperties) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ZE_APIEXPORT ze_result_t ZE_APICALL zesLedGetState( zes_led_handle_t hLed, zes_led_state_t *pState) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ZE_APIEXPORT ze_result_t ZE_APICALL zesLedSetState( zes_led_handle_t hLed, ze_bool_t enable) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ZE_APIEXPORT ze_result_t ZE_APICALL zesLedSetColor( zes_led_handle_t hLed, const zes_led_color_t *pColor) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ZE_APIEXPORT ze_result_t ZE_APICALL zesDeviceEnumRasErrorSets( zes_device_handle_t hDevice, uint32_t *pCount, zes_ras_handle_t *phRas) { return L0::SysmanDevice::rasGet(hDevice, pCount, phRas); } ZE_APIEXPORT ze_result_t ZE_APICALL zesRasGetProperties( zes_ras_handle_t hRas, zes_ras_properties_t *pProperties) { return L0::Ras::fromHandle(hRas)->rasGetProperties(pProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zesRasGetConfig( zes_ras_handle_t hRas, zes_ras_config_t *pConfig) { return L0::Ras::fromHandle(hRas)->rasGetConfig(pConfig); } ZE_APIEXPORT ze_result_t ZE_APICALL zesRasSetConfig( zes_ras_handle_t hRas, const zes_ras_config_t *pConfig) { return L0::Ras::fromHandle(hRas)->rasSetConfig(pConfig); } ZE_APIEXPORT ze_result_t ZE_APICALL zesRasGetState( zes_ras_handle_t hRas, ze_bool_t clear, zes_ras_state_t *pState) { return L0::Ras::fromHandle(hRas)->rasGetState(pState, clear); } ZE_APIEXPORT ze_result_t ZE_APICALL zesDeviceEventRegister( zes_device_handle_t hDevice, zes_event_type_flags_t events) { return L0::SysmanDevice::deviceEventRegister(hDevice, events); } ZE_APIEXPORT ze_result_t ZE_APICALL zesDriverEventListen( ze_driver_handle_t hDriver, uint32_t timeout, uint32_t count, zes_device_handle_t *phDevices, uint32_t *pNumDeviceEvents, zes_event_type_flags_t *pEvents) { return L0::DriverHandle::fromHandle(hDriver)->sysmanEventsListen(timeout, count, phDevices, pNumDeviceEvents, pEvents); } ZE_APIEXPORT ze_result_t ZE_APICALL zesDriverEventListenEx( ze_driver_handle_t hDriver, uint64_t timeout, uint32_t count, zes_device_handle_t *phDevices, uint32_t *pNumDeviceEvents, zes_event_type_flags_t *pEvents) { return L0::DriverHandle::fromHandle(hDriver)->sysmanEventsListenEx(timeout, count, phDevices, pNumDeviceEvents, pEvents); } ZE_APIEXPORT ze_result_t ZE_APICALL zesDeviceEnumDiagnosticTestSuites( zes_device_handle_t hDevice, uint32_t *pCount, zes_diag_handle_t *phDiagnostics) { return L0::SysmanDevice::diagnosticsGet(hDevice, pCount, phDiagnostics); } ZE_APIEXPORT ze_result_t ZE_APICALL zesDiagnosticsGetProperties( zes_diag_handle_t hDiagnostics, zes_diag_properties_t *pProperties) { return L0::Diagnostics::fromHandle(hDiagnostics)->diagnosticsGetProperties(pProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zesDiagnosticsGetTests( zes_diag_handle_t hDiagnostics, uint32_t *pCount, zes_diag_test_t *pTests) { return L0::Diagnostics::fromHandle(hDiagnostics)->diagnosticsGetTests(pCount, pTests); } ZE_APIEXPORT ze_result_t ZE_APICALL zesDiagnosticsRunTests( zes_diag_handle_t hDiagnostics, uint32_t startIndex, uint32_t endIndex, zes_diag_result_t *pResult) { return L0::Diagnostics::fromHandle(hDiagnostics)->diagnosticsRunTests(startIndex, endIndex, pResult); } ZE_APIEXPORT ze_result_t ZE_APICALL zesDeviceEnumPerformanceFactorDomains( zes_device_handle_t hDevice, uint32_t *pCount, zes_perf_handle_t *phPerf) { return L0::SysmanDevice::performanceGet(hDevice, pCount, phPerf); } ZE_APIEXPORT ze_result_t ZE_APICALL zesPerformanceFactorGetProperties( zes_perf_handle_t hPerf, zes_perf_properties_t *pProperties) { return L0::Performance::fromHandle(hPerf)->performanceGetProperties(pProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zesPerformanceFactorGetConfig( zes_perf_handle_t hPerf, double *pFactor) { return L0::Performance::fromHandle(hPerf)->performanceGetConfig(pFactor); } ZE_APIEXPORT ze_result_t ZE_APICALL zesPerformanceFactorSetConfig( zes_perf_handle_t hPerf, double factor) { return L0::Performance::fromHandle(hPerf)->performanceSetConfig(factor); } compute-runtime-22.14.22890/level_zero/api/tools/000077500000000000000000000000001422164147700214315ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/api/tools/CMakeLists.txt000066400000000000000000000005331422164147700241720ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_TOOLS_API ${CMAKE_CURRENT_SOURCE_DIR}/ze_tools_loader.cpp ${CMAKE_CURRENT_SOURCE_DIR}/zet_debug.cpp ${CMAKE_CURRENT_SOURCE_DIR}/zet_metric.cpp ${CMAKE_CURRENT_SOURCE_DIR}/zet_module.cpp ) set_property(GLOBAL PROPERTY L0_TOOLS_API ${L0_TOOLS_API}) compute-runtime-22.14.22890/level_zero/api/tools/ze_tools_loader.cpp000066400000000000000000000217411422164147700253260ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/source/inc/ze_intel_gpu.h" #include #include #include #include #include #include #include "ze_ddi_tables.h" extern ze_gpu_driver_dditable_t driver_ddiTable; ZE_DLLEXPORT ze_result_t ZE_APICALL zetGetContextProcAddrTable( ze_api_version_t version, zet_context_dditable_t *pDdiTable) { ze_result_t result = ZE_RESULT_SUCCESS; if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; pDdiTable->pfnActivateMetricGroups = zetContextActivateMetricGroups; return result; } ZE_DLLEXPORT ze_result_t ZE_APICALL zetGetMetricStreamerProcAddrTable( ze_api_version_t version, zet_metric_streamer_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnOpen = zetMetricStreamerOpen; pDdiTable->pfnClose = zetMetricStreamerClose; pDdiTable->pfnReadData = zetMetricStreamerReadData; return result; } ZE_DLLEXPORT ze_result_t ZE_APICALL zetGetTracerExpProcAddrTable( ze_api_version_t version, zet_tracer_exp_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnCreate = zetTracerExpCreate; pDdiTable->pfnDestroy = zetTracerExpDestroy; pDdiTable->pfnSetPrologues = zetTracerExpSetPrologues; pDdiTable->pfnSetEpilogues = zetTracerExpSetEpilogues; pDdiTable->pfnSetEnabled = zetTracerExpSetEnabled; return result; } ZE_DLLEXPORT ze_result_t ZE_APICALL zetGetCommandListProcAddrTable( ze_api_version_t version, zet_command_list_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnAppendMetricStreamerMarker = zetCommandListAppendMetricStreamerMarker; pDdiTable->pfnAppendMetricQueryBegin = zetCommandListAppendMetricQueryBegin; pDdiTable->pfnAppendMetricQueryEnd = zetCommandListAppendMetricQueryEnd; pDdiTable->pfnAppendMetricMemoryBarrier = zetCommandListAppendMetricMemoryBarrier; return result; } ZE_DLLEXPORT ze_result_t ZE_APICALL zetGetModuleProcAddrTable( ze_api_version_t version, zet_module_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnGetDebugInfo = zetModuleGetDebugInfo; return result; } ZE_DLLEXPORT ze_result_t ZE_APICALL zetGetKernelProcAddrTable( ze_api_version_t version, zet_kernel_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnGetProfileInfo = zetKernelGetProfileInfo; return result; } ZE_DLLEXPORT ze_result_t ZE_APICALL zetGetMetricGroupProcAddrTable( ze_api_version_t version, zet_metric_group_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnGet = zetMetricGroupGet; pDdiTable->pfnGetProperties = zetMetricGroupGetProperties; pDdiTable->pfnCalculateMetricValues = zetMetricGroupCalculateMetricValues; return result; } ZE_DLLEXPORT ze_result_t ZE_APICALL zetGetMetricProcAddrTable( ze_api_version_t version, zet_metric_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnGet = zetMetricGet; pDdiTable->pfnGetProperties = zetMetricGetProperties; return result; } ZE_DLLEXPORT ze_result_t ZE_APICALL zetGetMetricQueryPoolProcAddrTable( ze_api_version_t version, zet_metric_query_pool_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnCreate = zetMetricQueryPoolCreate; pDdiTable->pfnDestroy = zetMetricQueryPoolDestroy; return result; } ZE_DLLEXPORT ze_result_t ZE_APICALL zetGetMetricQueryProcAddrTable( ze_api_version_t version, zet_metric_query_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnCreate = zetMetricQueryCreate; pDdiTable->pfnDestroy = zetMetricQueryDestroy; pDdiTable->pfnReset = zetMetricQueryReset; pDdiTable->pfnGetData = zetMetricQueryGetData; return result; } ZE_DLLEXPORT ze_result_t ZE_APICALL zetGetDeviceProcAddrTable( ze_api_version_t version, zet_device_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnGetDebugProperties = zetDeviceGetDebugProperties; return result; } ZE_DLLEXPORT ze_result_t ZE_APICALL zetGetDebugProcAddrTable( ze_api_version_t version, zet_debug_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnAttach = zetDebugAttach; pDdiTable->pfnDetach = zetDebugDetach; pDdiTable->pfnReadEvent = zetDebugReadEvent; pDdiTable->pfnAcknowledgeEvent = zetDebugAcknowledgeEvent; pDdiTable->pfnInterrupt = zetDebugInterrupt; pDdiTable->pfnResume = zetDebugResume; pDdiTable->pfnReadMemory = zetDebugReadMemory; pDdiTable->pfnWriteMemory = zetDebugWriteMemory; pDdiTable->pfnGetRegisterSetProperties = zetDebugGetRegisterSetProperties; pDdiTable->pfnReadRegisters = zetDebugReadRegisters; pDdiTable->pfnWriteRegisters = zetDebugWriteRegisters; return result; } ZE_DLLEXPORT ze_result_t ZE_APICALL zetGetMetricGroupExpProcAddrTable( ze_api_version_t version, zet_metric_group_exp_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (ZE_MAJOR_VERSION(driver_ddiTable.version) != ZE_MAJOR_VERSION(version) || ZE_MINOR_VERSION(driver_ddiTable.version) > ZE_MINOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnCalculateMultipleMetricValuesExp = zetMetricGroupCalculateMultipleMetricValuesExp; return result; } compute-runtime-22.14.22890/level_zero/api/tools/zet_debug.cpp000066400000000000000000000062421422164147700241110ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/device/device.h" #include "level_zero/tools/source/debug/debug_handlers.h" #include ZE_APIEXPORT ze_result_t ZE_APICALL zetDeviceGetDebugProperties( zet_device_handle_t hDevice, zet_device_debug_properties_t *pDebugProperties) { return L0::Device::fromHandle(hDevice)->getDebugProperties(pDebugProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zetDebugAttach( zet_device_handle_t hDevice, const zet_debug_config_t *config, zet_debug_session_handle_t *phDebug) { return L0::DebugApiHandlers::debugAttach(hDevice, config, phDebug); } ZE_APIEXPORT ze_result_t ZE_APICALL zetDebugDetach( zet_debug_session_handle_t hDebug) { return L0::DebugApiHandlers::debugDetach(hDebug); } ZE_APIEXPORT ze_result_t ZE_APICALL zetDebugReadEvent( zet_debug_session_handle_t hDebug, uint64_t timeout, zet_debug_event_t *event) { return L0::DebugApiHandlers::debugReadEvent(hDebug, timeout, event); } ZE_APIEXPORT ze_result_t ZE_APICALL zetDebugInterrupt( zet_debug_session_handle_t hDebug, ze_device_thread_t thread) { return L0::DebugApiHandlers::debugInterrupt(hDebug, thread); } ZE_APIEXPORT ze_result_t ZE_APICALL zetDebugResume( zet_debug_session_handle_t hDebug, ze_device_thread_t thread) { return L0::DebugApiHandlers::debugResume(hDebug, thread); } ZE_APIEXPORT ze_result_t ZE_APICALL zetDebugReadMemory( zet_debug_session_handle_t hDebug, ze_device_thread_t thread, const zet_debug_memory_space_desc_t *desc, size_t size, void *buffer) { return L0::DebugApiHandlers::debugReadMemory(hDebug, thread, desc, size, buffer); } ZE_APIEXPORT ze_result_t ZE_APICALL zetDebugWriteMemory( zet_debug_session_handle_t hDebug, ze_device_thread_t thread, const zet_debug_memory_space_desc_t *desc, size_t size, const void *buffer) { return L0::DebugApiHandlers::debugWriteMemory(hDebug, thread, desc, size, buffer); } ZE_APIEXPORT ze_result_t ZE_APICALL zetDebugAcknowledgeEvent( zet_debug_session_handle_t hDebug, const zet_debug_event_t *event) { return L0::DebugApiHandlers::debugAcknowledgeEvent(hDebug, event); } ZE_APIEXPORT ze_result_t ZE_APICALL zetDebugGetRegisterSetProperties( zet_device_handle_t hDevice, uint32_t *pCount, zet_debug_regset_properties_t *pRegisterSetProperties) { return L0::DebugApiHandlers::debugGetRegisterSetProperties(hDevice, pCount, pRegisterSetProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zetDebugReadRegisters( zet_debug_session_handle_t hDebug, ze_device_thread_t thread, uint32_t type, uint32_t start, uint32_t count, void *pRegisterValues) { return L0::DebugApiHandlers::debugReadRegisters(hDebug, thread, type, start, count, pRegisterValues); } ZE_APIEXPORT ze_result_t ZE_APICALL zetDebugWriteRegisters( zet_debug_session_handle_t hDebug, ze_device_thread_t thread, uint32_t type, uint32_t start, uint32_t count, void *pRegisterValues) { return L0::DebugApiHandlers::debugWriteRegisters(hDebug, thread, type, start, count, pRegisterValues); }compute-runtime-22.14.22890/level_zero/api/tools/zet_metric.cpp000066400000000000000000000125261422164147700243100ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/cmdlist/cmdlist.h" #include "level_zero/core/source/device/device.h" #include "level_zero/tools/source/metrics/metric.h" #include ZE_DLLEXPORT ze_result_t ZE_APICALL zetMetricGroupGet( zet_device_handle_t hDevice, uint32_t *pCount, zet_metric_group_handle_t *phMetricGroups) { return L0::metricGroupGet(hDevice, pCount, phMetricGroups); } ZE_DLLEXPORT ze_result_t ZE_APICALL zetMetricGroupGetProperties( zet_metric_group_handle_t hMetricGroup, zet_metric_group_properties_t *pProperties) { return L0::MetricGroup::fromHandle(hMetricGroup)->getProperties(pProperties); } ZE_DLLEXPORT ze_result_t ZE_APICALL zetMetricGet( zet_metric_group_handle_t hMetricGroup, uint32_t *pCount, zet_metric_handle_t *phMetrics) { return L0::MetricGroup::fromHandle(hMetricGroup)->metricGet(pCount, phMetrics); } ZE_DLLEXPORT ze_result_t ZE_APICALL zetMetricGetProperties( zet_metric_handle_t hMetric, zet_metric_properties_t *pProperties) { return L0::Metric::fromHandle(hMetric)->getProperties(pProperties); } ZE_DLLEXPORT ze_result_t ZE_APICALL zetMetricGroupCalculateMetricValues( zet_metric_group_handle_t hMetricGroup, zet_metric_group_calculation_type_t type, size_t rawDataSize, const uint8_t *pRawData, uint32_t *pMetricValueCount, zet_typed_value_t *pMetricValues) { return L0::MetricGroup::fromHandle(hMetricGroup)->calculateMetricValues(type, rawDataSize, pRawData, pMetricValueCount, pMetricValues); } ZE_DLLEXPORT ze_result_t ZE_APICALL zetContextActivateMetricGroups( zet_context_handle_t hContext, zet_device_handle_t hDevice, uint32_t count, zet_metric_group_handle_t *phMetricGroups) { return L0::Context::fromHandle(hContext)->activateMetricGroups(hDevice, count, phMetricGroups); } ZE_APIEXPORT ze_result_t ZE_APICALL zetMetricStreamerOpen( zet_context_handle_t hContext, zet_device_handle_t hDevice, zet_metric_group_handle_t hMetricGroup, zet_metric_streamer_desc_t *pDesc, ze_event_handle_t hNotificationEvent, zet_metric_streamer_handle_t *phMetricStreamer) { return L0::metricStreamerOpen(hContext, hDevice, hMetricGroup, pDesc, hNotificationEvent, phMetricStreamer); } ZE_APIEXPORT ze_result_t ZE_APICALL zetCommandListAppendMetricStreamerMarker( ze_command_list_handle_t hCommandList, zet_metric_streamer_handle_t hMetricStreamer, uint32_t value) { return L0::CommandList::fromHandle(hCommandList)->appendMetricStreamerMarker(hMetricStreamer, value); } ZE_APIEXPORT ze_result_t ZE_APICALL zetMetricStreamerClose( zet_metric_streamer_handle_t hMetricStreamer) { return L0::MetricStreamer::fromHandle(hMetricStreamer)->close(); } ZE_APIEXPORT ze_result_t ZE_APICALL zetMetricStreamerReadData( zet_metric_streamer_handle_t hMetricStreamer, uint32_t maxReportCount, size_t *pRawDataSize, uint8_t *pRawData) { return L0::MetricStreamer::fromHandle(hMetricStreamer)->readData(maxReportCount, pRawDataSize, pRawData); } ZE_DLLEXPORT ze_result_t ZE_APICALL zetMetricQueryPoolCreate( zet_context_handle_t hContext, zet_device_handle_t hDevice, zet_metric_group_handle_t hMetricGroup, const zet_metric_query_pool_desc_t *desc, zet_metric_query_pool_handle_t *phMetricQueryPool) { return L0::metricQueryPoolCreate(hContext, hDevice, hMetricGroup, desc, phMetricQueryPool); } ZE_DLLEXPORT ze_result_t ZE_APICALL zetMetricQueryPoolDestroy( zet_metric_query_pool_handle_t hMetricQueryPool) { return L0::MetricQueryPool::fromHandle(hMetricQueryPool)->destroy(); } ZE_DLLEXPORT ze_result_t ZE_APICALL zetMetricQueryCreate( zet_metric_query_pool_handle_t hMetricQueryPool, uint32_t index, zet_metric_query_handle_t *phMetricQuery) { return L0::MetricQueryPool::fromHandle(hMetricQueryPool)->metricQueryCreate(index, phMetricQuery); } ZE_DLLEXPORT ze_result_t ZE_APICALL zetMetricQueryDestroy( zet_metric_query_handle_t hMetricQuery) { return L0::MetricQuery::fromHandle(hMetricQuery)->destroy(); } ZE_DLLEXPORT ze_result_t ZE_APICALL zetMetricQueryReset( zet_metric_query_handle_t hMetricQuery) { return L0::MetricQuery::fromHandle(hMetricQuery)->reset(); } ZE_DLLEXPORT ze_result_t ZE_APICALL zetCommandListAppendMetricQueryBegin( zet_command_list_handle_t hCommandList, zet_metric_query_handle_t hMetricQuery) { return L0::CommandList::fromHandle(hCommandList)->appendMetricQueryBegin(hMetricQuery); } ZE_DLLEXPORT ze_result_t ZE_APICALL zetCommandListAppendMetricQueryEnd( zet_command_list_handle_t hCommandList, zet_metric_query_handle_t hMetricQuery, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return L0::CommandList::fromHandle(hCommandList)->appendMetricQueryEnd(hMetricQuery, hSignalEvent, numWaitEvents, phWaitEvents); } ZE_DLLEXPORT ze_result_t ZE_APICALL zetCommandListAppendMetricMemoryBarrier( zet_command_list_handle_t hCommandList) { return L0::CommandList::fromHandle(hCommandList)->appendMetricMemoryBarrier(); } ZE_DLLEXPORT ze_result_t ZE_APICALL zetMetricQueryGetData( zet_metric_query_handle_t hMetricQuery, size_t *pRawDataSize, uint8_t *pRawData) { return L0::MetricQuery::fromHandle(hMetricQuery)->getData(pRawDataSize, pRawData); } compute-runtime-22.14.22890/level_zero/api/tools/zet_module.cpp000066400000000000000000000012271422164147700243060ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/module/module.h" #include ZE_DLLEXPORT ze_result_t ZE_APICALL zetModuleGetDebugInfo( zet_module_handle_t hModule, zet_module_debug_info_format_t format, size_t *pSize, uint8_t *pDebugInfo) { return L0::Module::fromHandle(hModule)->getDebugInfo(pSize, pDebugInfo); } ZE_APIEXPORT ze_result_t ZE_APICALL zetKernelGetProfileInfo( zet_kernel_handle_t hKernel, zet_profile_properties_t *pProfileProperties) { return L0::Kernel::fromHandle(hKernel)->getProfileInfo(pProfileProperties); } compute-runtime-22.14.22890/level_zero/cmake/000077500000000000000000000000001422164147700206005ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/cmake/FindLevelZero.cmake000066400000000000000000000015111422164147700243100ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # include(FindPackageHandleStandardArgs) find_path(LevelZero_INCLUDE_DIR NAMES level_zero/ze_api.h PATHS ${LEVEL_ZERO_ROOT} PATH_SUFFIXES "include" ) find_package_handle_standard_args(LevelZero REQUIRED_VARS LevelZero_INCLUDE_DIR ) if(LevelZero_FOUND) list(APPEND LevelZero_INCLUDE_DIRS ${LevelZero_INCLUDE_DIR}) endif() if(LevelZero_FOUND AND NOT TARGET LevelZero::LevelZero) add_library(LevelZero::LevelZero INTERFACE IMPORTED) set_target_properties(LevelZero::LevelZero PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${LevelZero_INCLUDE_DIRS}" ) endif() MESSAGE(STATUS "LevelZero_INCLUDE_DIRS: " ${LevelZero_INCLUDE_DIRS}) compute-runtime-22.14.22890/level_zero/cmake/UdevRulesDir.cmake000066400000000000000000000005521422164147700241610ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(UDEV_RULES_DIR_FOUND FALSE) foreach(rules_dir IN ITEMS "/lib/udev/rules.d" "/usr/lib/udev/rules.d") if(IS_DIRECTORY ${rules_dir}) set(UDEV_RULES_DIR ${rules_dir} CACHE PATH "Install path for udev rules") set(UDEV_RULES_DIR_FOUND TRUE) break() endif() endforeach() compute-runtime-22.14.22890/level_zero/cmake/not_supported_platforms.cmake000066400000000000000000000003371422164147700266010ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_NOT_SUPPORTED_PRODUCT_FAMILIES "IGFX_GEMINILAKE" "IGFX_BROXTON" "IGFX_LAKEFIELD" "IGFX_ELKHARTLAKE" "IGFX_BROADWELL" ) compute-runtime-22.14.22890/level_zero/cmake/source_tree.cmake000066400000000000000000000020001422164147700241110ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # function(create_source_tree target directory) if(WIN32) get_filename_component(directory ${directory} ABSOLUTE) get_target_property(source_list ${target} SOURCES) #source_group fails with file generated in build directory if(DEFINED L0_DLL_RC_FILE) list(FIND source_list ${L0_DLL_RC_FILE} _index) if(${_index} GREATER -1) list(REMOVE_ITEM source_list ${L0_DLL_RC_FILE}) endif() endif() source_group(TREE ${directory} FILES ${source_list}) endif() endfunction() macro(add_subdirectoriesL0 curdir dirmask) file(GLOB children RELATIVE ${curdir} ${curdir}/${dirmask}) set(dirlist "") foreach(child ${children}) if(IS_DIRECTORY ${curdir}/${child}) list(APPEND dirlist ${child}) endif() endforeach() foreach(subdir ${dirlist}) if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/CMakeLists.txt) add_subdirectory(${subdir}) endif() endforeach() endmacro() compute-runtime-22.14.22890/level_zero/cmake/verify_l0_support.cmake000066400000000000000000000013231422164147700252740ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # include(${CMAKE_CURRENT_SOURCE_DIR}/cmake${BRANCH_DIR_SUFFIX}not_supported_platforms.cmake) set(L0_SUPPORTED_PRODUCT_FAMILIES ${ALL_SUPPORTED_PRODUCT_FAMILY}) # Remove unsupported products list(LENGTH L0_SUPPORTED_PRODUCT_FAMILIES L0_SUPPORTED_PRODUCT_FAMILIES_COUNT) if(L0_SUPPORTED_PRODUCT_FAMILIES_COUNT GREATER 0) list(REMOVE_ITEM L0_SUPPORTED_PRODUCT_FAMILIES ${L0_NOT_SUPPORTED_PRODUCT_FAMILIES}) list(LENGTH L0_SUPPORTED_PRODUCT_FAMILIES L0_SUPPORTED_PRODUCT_FAMILIES_COUNT) endif() if(L0_SUPPORTED_PRODUCT_FAMILIES_COUNT EQUAL 0) set(BUILD_WITH_L0 FALSE) message(STATUS "L0: Selected platforms are not supported.") endif() compute-runtime-22.14.22890/level_zero/core/000077500000000000000000000000001422164147700204505ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/000077500000000000000000000000001422164147700217505ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/CMakeLists.txt000066400000000000000000000144321422164147700245140ustar00rootroot00000000000000# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_RUNTIME_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/get_extension_function_lookup_map.h ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}get_extension_function_lookup_map.cpp ${CMAKE_CURRENT_SOURCE_DIR}/builtin/builtin_functions_lib.h ${CMAKE_CURRENT_SOURCE_DIR}/builtin/builtin_functions_lib_impl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/builtin/builtin_functions_lib_impl.h ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist/cmdlist.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist/cmdlist.h ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist/cmdlist_hw.h ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist/cmdlist_hw.inl ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist/cmdlist_hw_base.inl ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist/cmdlist_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist/cmdlist_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist/cmdlist_hw_immediate.h ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist/cmdlist_hw_immediate.inl ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist/cmdlist_extended${BRANCH_DIR_SUFFIX}cmdlist_extended.inl ${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue/cmdqueue.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue/cmdqueue.h ${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue/cmdqueue_hw.h ${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue/cmdqueue_hw.inl ${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue/cmdqueue_hw_base.inl ${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue/cmdqueue_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue/cmdqueue_extended${BRANCH_DIR_SUFFIX}cmdqueue_extended.inl ${CMAKE_CURRENT_SOURCE_DIR}/context/context_imp_${DRIVER_MODEL}/context_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/context/context_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/context/context_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/context/context.h ${CMAKE_CURRENT_SOURCE_DIR}/debugger/debug_manager.cpp ${CMAKE_CURRENT_SOURCE_DIR}/device${BRANCH_DIR_SUFFIX}device_imp_helper.cpp ${CMAKE_CURRENT_SOURCE_DIR}/device/device.h ${CMAKE_CURRENT_SOURCE_DIR}/device/device_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/device/device_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/driver/driver_handle.h ${CMAKE_CURRENT_SOURCE_DIR}/driver/driver_handle_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/driver/driver_handle_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/driver/driver.cpp ${CMAKE_CURRENT_SOURCE_DIR}/driver/driver.h ${CMAKE_CURRENT_SOURCE_DIR}/driver/driver_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/driver/host_pointer_manager.cpp ${CMAKE_CURRENT_SOURCE_DIR}/driver/host_pointer_manager.h ${CMAKE_CURRENT_SOURCE_DIR}/event/event_impl.inl ${CMAKE_CURRENT_SOURCE_DIR}/event/event.cpp ${CMAKE_CURRENT_SOURCE_DIR}/event/event.h ${CMAKE_CURRENT_SOURCE_DIR}/fence/fence.cpp ${CMAKE_CURRENT_SOURCE_DIR}/fence/fence.h ${CMAKE_CURRENT_SOURCE_DIR}/helpers/allocation_extensions.h ${CMAKE_CURRENT_SOURCE_DIR}/helpers/allocation_extensions.cpp ${CMAKE_CURRENT_SOURCE_DIR}/helpers/api_specific_config_l0.cpp ${CMAKE_CURRENT_SOURCE_DIR}/helpers/implicit_scaling_l0.cpp ${CMAKE_CURRENT_SOURCE_DIR}/helpers/l0_populate_factory.h ${CMAKE_CURRENT_SOURCE_DIR}/helpers/properties_parser.h ${CMAKE_CURRENT_SOURCE_DIR}/hw_helpers/l0_hw_helper_base.inl ${CMAKE_CURRENT_SOURCE_DIR}/hw_helpers/l0_hw_helper_skl_and_later.inl ${CMAKE_CURRENT_SOURCE_DIR}/hw_helpers/l0_hw_helper.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hw_helpers/l0_hw_helper.h ${CMAKE_CURRENT_SOURCE_DIR}/kernel/kernel.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel/kernel.h ${CMAKE_CURRENT_SOURCE_DIR}/kernel/kernel_hw.h ${CMAKE_CURRENT_SOURCE_DIR}/kernel/kernel_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel/kernel_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/kernel/sampler_patch_values.h ${CMAKE_CURRENT_SOURCE_DIR}/image/image.h ${CMAKE_CURRENT_SOURCE_DIR}/image/image_format_desc_helper.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image/image_format_desc_helper.h ${CMAKE_CURRENT_SOURCE_DIR}/image/image_formats.h ${CMAKE_CURRENT_SOURCE_DIR}/image/image_hw.h ${CMAKE_CURRENT_SOURCE_DIR}/image/image_hw.inl ${CMAKE_CURRENT_SOURCE_DIR}/image/image_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image/image_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/memory/memory_operations_helper.h ${CMAKE_CURRENT_SOURCE_DIR}/memory/cpu_page_fault_memory_manager.cpp ${CMAKE_CURRENT_SOURCE_DIR}/memory_manager/compression_selector_l0.cpp ${CMAKE_CURRENT_SOURCE_DIR}/module/module.h ${CMAKE_CURRENT_SOURCE_DIR}/module/module_build_log.cpp ${CMAKE_CURRENT_SOURCE_DIR}/module/module_build_log.h ${CMAKE_CURRENT_SOURCE_DIR}/module${BRANCH_DIR_SUFFIX}module_extra_options.cpp ${CMAKE_CURRENT_SOURCE_DIR}/module/module_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/module/module_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/printf_handler/printf_handler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/printf_handler/printf_handler.h ${CMAKE_CURRENT_SOURCE_DIR}/sampler/sampler.h ${CMAKE_CURRENT_SOURCE_DIR}/sampler/sampler_hw.h ${CMAKE_CURRENT_SOURCE_DIR}/sampler/sampler_hw.inl ${CMAKE_CURRENT_SOURCE_DIR}/sampler/sampler_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler/sampler_imp.h ) if(SUPPORT_XEHP_AND_LATER) list(APPEND L0_RUNTIME_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist/cmdlist_hw_xehp_and_later.inl ${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue/cmdqueue_xe_hp_core_and_later.inl ) endif() target_include_directories(${L0_STATIC_LIB_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/ ) target_include_directories(${TARGET_NAME_L0} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/ ) add_subdirectories() include_directories(${CMAKE_CURRENT_SOURCE_DIR}) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_RUNTIME_SOURCES} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) target_sources(${TARGET_NAME_L0} PRIVATE ${L0_SRCS_DLL} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) if(UNIX) set_source_files_properties(${L0_RUNTIME_SOURCES} PROPERTIES COMPILE_FLAGS -Wall) endif() if(NOT DEFINED L0_DRIVER_VERSION) set(L0_DRIVER_VERSION 1) endif() configure_file(driver/driver_version.h.in ${CMAKE_BINARY_DIR}/driver_version_l0.h) # Put Driver version into define # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_RUNTIME_SOURCES ${L0_RUNTIME_SOURCES}) compute-runtime-22.14.22890/level_zero/core/source/builtin/000077500000000000000000000000001422164147700234165ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/builtin/builtin_functions_lib.h000066400000000000000000000035441422164147700301610ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include #include namespace NEO { class BuiltIns; } // namespace NEO namespace L0 { struct Device; struct Kernel; enum class Builtin : uint32_t { CopyBufferBytes = 0u, CopyBufferBytesStateless, CopyBufferRectBytes2d, CopyBufferRectBytes3d, CopyBufferToBufferMiddle, CopyBufferToBufferMiddleStateless, CopyBufferToBufferSide, CopyBufferToBufferSideStateless, FillBufferImmediate, FillBufferImmediateStateless, FillBufferSSHOffset, FillBufferSSHOffsetStateless, FillBufferMiddle, FillBufferMiddleStateless, FillBufferRightLeftover, FillBufferRightLeftoverStateless, QueryKernelTimestamps, QueryKernelTimestampsWithOffsets, COUNT }; enum class ImageBuiltin : uint32_t { CopyBufferToImage3d16Bytes = 0u, CopyBufferToImage3d2Bytes, CopyBufferToImage3d4Bytes, CopyBufferToImage3d8Bytes, CopyBufferToImage3dBytes, CopyImage3dToBuffer16Bytes, CopyImage3dToBuffer2Bytes, CopyImage3dToBuffer4Bytes, CopyImage3dToBuffer8Bytes, CopyImage3dToBufferBytes, CopyImageRegion, COUNT }; struct BuiltinFunctionsLib { using MutexType = std::mutex; virtual ~BuiltinFunctionsLib() = default; static std::unique_ptr create(Device *device, NEO::BuiltIns *builtins); virtual Kernel *getFunction(Builtin func) = 0; virtual Kernel *getImageFunction(ImageBuiltin func) = 0; virtual void initBuiltinKernel(Builtin builtId) = 0; virtual void initBuiltinImageKernel(ImageBuiltin func) = 0; MOCKABLE_VIRTUAL std::unique_lock obtainUniqueOwnership(); protected: BuiltinFunctionsLib() = default; MutexType ownershipMutex; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/builtin/builtin_functions_lib_impl.cpp000066400000000000000000000202061422164147700315270ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/builtin/builtin_functions_lib_impl.h" #include "shared/source/built_ins/built_ins.h" namespace NEO { const char *getAdditionalBuiltinAsString(EBuiltInOps::Type builtin) { return nullptr; } } // namespace NEO namespace L0 { std::unique_lock BuiltinFunctionsLib::obtainUniqueOwnership() { return std::unique_lock(this->ownershipMutex); } void BuiltinFunctionsLibImpl::initBuiltinKernel(Builtin func) { const char *builtinName = nullptr; NEO::EBuiltInOps::Type builtin; switch (func) { case Builtin::CopyBufferBytes: builtinName = "copyBufferToBufferBytesSingle"; builtin = NEO::EBuiltInOps::CopyBufferToBuffer; break; case Builtin::CopyBufferBytesStateless: builtinName = "copyBufferToBufferBytesSingle"; builtin = NEO::EBuiltInOps::CopyBufferToBufferStateless; break; case Builtin::CopyBufferRectBytes2d: builtinName = "CopyBufferRectBytes2d"; builtin = NEO::EBuiltInOps::CopyBufferRect; break; case Builtin::CopyBufferRectBytes3d: builtinName = "CopyBufferRectBytes3d"; builtin = NEO::EBuiltInOps::CopyBufferRect; break; case Builtin::CopyBufferToBufferMiddle: builtinName = "CopyBufferToBufferMiddleRegion"; builtin = NEO::EBuiltInOps::CopyBufferToBuffer; break; case Builtin::CopyBufferToBufferMiddleStateless: builtinName = "CopyBufferToBufferMiddleRegion"; builtin = NEO::EBuiltInOps::CopyBufferToBufferStateless; break; case Builtin::CopyBufferToBufferSide: builtinName = "CopyBufferToBufferSideRegion"; builtin = NEO::EBuiltInOps::CopyBufferToBuffer; break; case Builtin::CopyBufferToBufferSideStateless: builtinName = "CopyBufferToBufferSideRegion"; builtin = NEO::EBuiltInOps::CopyBufferToBufferStateless; break; case Builtin::FillBufferImmediate: builtinName = "FillBufferImmediate"; builtin = NEO::EBuiltInOps::FillBuffer; break; case Builtin::FillBufferImmediateStateless: builtinName = "FillBufferImmediate"; builtin = NEO::EBuiltInOps::FillBufferStateless; break; case Builtin::FillBufferSSHOffset: builtinName = "FillBufferSSHOffset"; builtin = NEO::EBuiltInOps::FillBuffer; break; case Builtin::FillBufferSSHOffsetStateless: builtinName = "FillBufferSSHOffset"; builtin = NEO::EBuiltInOps::FillBufferStateless; break; case Builtin::FillBufferMiddle: builtinName = "FillBufferMiddle"; builtin = NEO::EBuiltInOps::FillBuffer; break; case Builtin::FillBufferMiddleStateless: builtinName = "FillBufferMiddle"; builtin = NEO::EBuiltInOps::FillBufferStateless; break; case Builtin::FillBufferRightLeftover: builtinName = "FillBufferRightLeftover"; builtin = NEO::EBuiltInOps::FillBuffer; break; case Builtin::FillBufferRightLeftoverStateless: builtinName = "FillBufferRightLeftover"; builtin = NEO::EBuiltInOps::FillBufferStateless; break; case Builtin::QueryKernelTimestamps: builtinName = "QueryKernelTimestamps"; builtin = NEO::EBuiltInOps::QueryKernelTimestamps; break; case Builtin::QueryKernelTimestampsWithOffsets: builtinName = "QueryKernelTimestampsWithOffsets"; builtin = NEO::EBuiltInOps::QueryKernelTimestamps; break; default: UNRECOVERABLE_IF(true); }; auto builtId = static_cast(func); builtins[builtId] = loadBuiltIn(builtin, builtinName); } void BuiltinFunctionsLibImpl::initBuiltinImageKernel(ImageBuiltin func) { const char *builtinName = nullptr; NEO::EBuiltInOps::Type builtin; switch (func) { case ImageBuiltin::CopyBufferToImage3d16Bytes: builtinName = "CopyBufferToImage3d16Bytes"; builtin = NEO::EBuiltInOps::CopyBufferToImage3d; break; case ImageBuiltin::CopyBufferToImage3d2Bytes: builtinName = "CopyBufferToImage3d2Bytes"; builtin = NEO::EBuiltInOps::CopyBufferToImage3d; break; case ImageBuiltin::CopyBufferToImage3d4Bytes: builtinName = "CopyBufferToImage3d4Bytes"; builtin = NEO::EBuiltInOps::CopyBufferToImage3d; break; case ImageBuiltin::CopyBufferToImage3d8Bytes: builtinName = "CopyBufferToImage3d8Bytes"; builtin = NEO::EBuiltInOps::CopyBufferToImage3d; break; case ImageBuiltin::CopyBufferToImage3dBytes: builtinName = "CopyBufferToImage3dBytes"; builtin = NEO::EBuiltInOps::CopyBufferToImage3d; break; case ImageBuiltin::CopyImage3dToBuffer16Bytes: builtinName = "CopyImage3dToBuffer16Bytes"; builtin = NEO::EBuiltInOps::CopyImage3dToBuffer; break; case ImageBuiltin::CopyImage3dToBuffer2Bytes: builtinName = "CopyImage3dToBuffer2Bytes"; builtin = NEO::EBuiltInOps::CopyImage3dToBuffer; break; case ImageBuiltin::CopyImage3dToBuffer4Bytes: builtinName = "CopyImage3dToBuffer4Bytes"; builtin = NEO::EBuiltInOps::CopyImage3dToBuffer; break; case ImageBuiltin::CopyImage3dToBuffer8Bytes: builtinName = "CopyImage3dToBuffer8Bytes"; builtin = NEO::EBuiltInOps::CopyImage3dToBuffer; break; case ImageBuiltin::CopyImage3dToBufferBytes: builtinName = "CopyImage3dToBufferBytes"; builtin = NEO::EBuiltInOps::CopyImage3dToBuffer; break; case ImageBuiltin::CopyImageRegion: builtinName = "CopyImageToImage3d"; builtin = NEO::EBuiltInOps::CopyImageToImage3d; break; default: UNRECOVERABLE_IF(true); }; auto builtId = static_cast(func); imageBuiltins[builtId] = loadBuiltIn(builtin, builtinName); } Kernel *BuiltinFunctionsLibImpl::getFunction(Builtin func) { auto builtId = static_cast(func); if (builtins[builtId].get() == nullptr) { initBuiltinKernel(func); } return builtins[builtId]->func.get(); } Kernel *BuiltinFunctionsLibImpl::getImageFunction(ImageBuiltin func) { auto builtId = static_cast(func); if (imageBuiltins[builtId].get() == nullptr) { initBuiltinImageKernel(func); } return imageBuiltins[builtId]->func.get(); } std::unique_ptr BuiltinFunctionsLibImpl::loadBuiltIn(NEO::EBuiltInOps::Type builtin, const char *builtInName) { using BuiltInCodeType = NEO::BuiltinCode::ECodeType; StackVec supportedTypes{}; if (!NEO::DebugManager.flags.RebuildPrecompiledKernels.get()) { supportedTypes.push_back(BuiltInCodeType::Binary); } supportedTypes.push_back(BuiltInCodeType::Intermediate); NEO::BuiltinCode builtinCode{}; for (auto &builtinCodeType : supportedTypes) { builtinCode = builtInsLib->getBuiltinsLib().getBuiltinCode(builtin, builtinCodeType, *device->getNEODevice()); if (!builtinCode.resource.empty()) { break; } } [[maybe_unused]] ze_result_t res; std::unique_ptr module; ze_module_handle_t moduleHandle; ze_module_desc_t moduleDesc = {}; moduleDesc.format = builtinCode.type == BuiltInCodeType::Binary ? ZE_MODULE_FORMAT_NATIVE : ZE_MODULE_FORMAT_IL_SPIRV; moduleDesc.pInputModule = reinterpret_cast(&builtinCode.resource[0]); moduleDesc.inputSize = builtinCode.resource.size(); res = device->createModule(&moduleDesc, &moduleHandle, nullptr, ModuleType::Builtin); UNRECOVERABLE_IF(res != ZE_RESULT_SUCCESS); module.reset(Module::fromHandle(moduleHandle)); std::unique_ptr kernel; ze_kernel_handle_t kernelHandle; ze_kernel_desc_t kernelDesc = {}; kernelDesc.pKernelName = builtInName; res = module->createKernel(&kernelDesc, &kernelHandle); DEBUG_BREAK_IF(res != ZE_RESULT_SUCCESS); kernel.reset(Kernel::fromHandle(kernelHandle)); return std::unique_ptr(new BuiltinData{std::move(module), std::move(kernel)}); } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/builtin/builtin_functions_lib_impl.h000066400000000000000000000033021422164147700311720ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/builtin/builtin_functions_lib.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/module/module.h" namespace NEO { namespace EBuiltInOps { using Type = uint32_t; } class BuiltIns; } // namespace NEO namespace L0 { struct BuiltinFunctionsLibImpl : BuiltinFunctionsLib { struct BuiltinData; BuiltinFunctionsLibImpl(Device *device, NEO::BuiltIns *builtInsLib) : device(device), builtInsLib(builtInsLib) { } ~BuiltinFunctionsLibImpl() override { builtins->reset(); imageBuiltins->reset(); } Kernel *getFunction(Builtin func) override; Kernel *getImageFunction(ImageBuiltin func) override; void initBuiltinKernel(Builtin builtId) override; void initBuiltinImageKernel(ImageBuiltin func) override; MOCKABLE_VIRTUAL std::unique_ptr loadBuiltIn(NEO::EBuiltInOps::Type builtin, const char *builtInName); protected: std::unique_ptr builtins[static_cast(Builtin::COUNT)]; std::unique_ptr imageBuiltins[static_cast(ImageBuiltin::COUNT)]; Device *device; NEO::BuiltIns *builtInsLib; }; struct BuiltinFunctionsLibImpl::BuiltinData { MOCKABLE_VIRTUAL ~BuiltinData() { func.reset(); module.reset(); } BuiltinData() = default; BuiltinData(std::unique_ptr &&mod, std::unique_ptr &&ker) { module = std::move(mod); func = std::move(ker); } std::unique_ptr module; std::unique_ptr func; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/cache/000077500000000000000000000000001422164147700230135ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/cache/CMakeLists.txt000066400000000000000000000004731422164147700255570ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_CACHE_RESERVATION ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cache_reservation.h ) add_subdirectories() set_property(GLOBAL PROPERTY L0_SRCS_CACHE_RESERVATION ${L0_SRCS_CACHE_RESERVATION}) compute-runtime-22.14.22890/level_zero/core/source/cache/cache_reservation.h000066400000000000000000000011431422164147700266470ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include #include #include namespace L0 { struct Device; class CacheReservation { public: virtual ~CacheReservation() = default; static std::unique_ptr create(Device &device); virtual bool reserveCache(size_t cacheLevel, size_t cacheReservationSize) = 0; virtual bool setCacheAdvice(void *ptr, size_t regionSize, ze_cache_ext_region_t cacheRegion) = 0; virtual size_t getMaxCacheReservationSize() = 0; }; } // namespace L0compute-runtime-22.14.22890/level_zero/core/source/cache/linux/000077500000000000000000000000001422164147700241525ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/cache/linux/CMakeLists.txt000066400000000000000000000006611422164147700267150ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_CACHE_RESERVATION_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}cache_reservation_impl.cpp ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}cache_reservation_impl.h ) if(UNIX) set_property(GLOBAL PROPERTY L0_SRCS_CACHE_RESERVATION_LINUX ${L0_SRCS_CACHE_RESERVATION_LINUX}) endif() compute-runtime-22.14.22890/level_zero/core/source/cache/linux/cache_reservation_impl.cpp000066400000000000000000000012111422164147700313560ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/cache/linux/cache_reservation_impl.h" namespace L0 { std::unique_ptr CacheReservation::create(Device &device) { return std::make_unique(device); } bool CacheReservationImpl::reserveCache(size_t cacheLevel, size_t cacheReservationSize) { return false; } bool CacheReservationImpl::setCacheAdvice(void *ptr, size_t regionSize, ze_cache_ext_region_t cacheRegion) { return false; } size_t CacheReservationImpl::getMaxCacheReservationSize() { return 0; } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/cache/linux/cache_reservation_impl.h000066400000000000000000000011101422164147700310210ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/cache/cache_reservation.h" namespace L0 { class CacheReservationImpl : public CacheReservation { public: ~CacheReservationImpl() override = default; CacheReservationImpl(Device &device){}; bool reserveCache(size_t cacheLevel, size_t cacheReservationSize) override; bool setCacheAdvice(void *ptr, size_t regionSize, ze_cache_ext_region_t cacheRegion) override; size_t getMaxCacheReservationSize() override; }; } // namespace L0compute-runtime-22.14.22890/level_zero/core/source/cache/windows/000077500000000000000000000000001422164147700245055ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/cache/windows/CMakeLists.txt000066400000000000000000000006221422164147700272450ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_CACHE_RESERVATION_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cache_reservation_impl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cache_reservation_impl.h ) if(WIN32) set_property(GLOBAL PROPERTY L0_SRCS_CACHE_RESERVATION_WINDOWS ${L0_SRCS_CACHE_RESERVATION_WINDOWS}) endif() compute-runtime-22.14.22890/level_zero/core/source/cache/windows/cache_reservation_impl.cpp000066400000000000000000000012131422164147700317130ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/cache/windows/cache_reservation_impl.h" namespace L0 { std::unique_ptr CacheReservation::create(Device &device) { return std::make_unique(device); } bool CacheReservationImpl::reserveCache(size_t cacheLevel, size_t cacheReservationSize) { return false; } bool CacheReservationImpl::setCacheAdvice(void *ptr, size_t regionSize, ze_cache_ext_region_t cacheRegion) { return false; } size_t CacheReservationImpl::getMaxCacheReservationSize() { return 0; } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/cache/windows/cache_reservation_impl.h000066400000000000000000000011101422164147700313540ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/cache/cache_reservation.h" namespace L0 { class CacheReservationImpl : public CacheReservation { public: ~CacheReservationImpl() override = default; CacheReservationImpl(Device &device){}; bool reserveCache(size_t cacheLevel, size_t cacheReservationSize) override; bool setCacheAdvice(void *ptr, size_t regionSize, ze_cache_ext_region_t cacheRegion) override; size_t getMaxCacheReservationSize() override; }; } // namespace L0compute-runtime-22.14.22890/level_zero/core/source/cmdlist/000077500000000000000000000000001422164147700234075ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/cmdlist/CMakeLists.txt000066400000000000000000000001421422164147700261440ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # add_subdirectories() compute-runtime-22.14.22890/level_zero/core/source/cmdlist/cmdlist.cpp000066400000000000000000000204041422164147700255520ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/cmdlist/cmdlist.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/device/device_info.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/memory_manager.h" #include "level_zero/core/source/device/device_imp.h" namespace L0 { CommandList::~CommandList() { if (cmdQImmediate) { cmdQImmediate->destroy(); } removeDeallocationContainerData(); if (this->cmdListType == CommandListType::TYPE_REGULAR || !this->isFlushTaskSubmissionEnabled) { removeHostPtrAllocations(); } printfFunctionContainer.clear(); } void CommandList::storePrintfFunction(Kernel *kernel) { auto it = std::find(this->printfFunctionContainer.begin(), this->printfFunctionContainer.end(), kernel); if (it == this->printfFunctionContainer.end()) { this->printfFunctionContainer.push_back(kernel); } } void CommandList::removeHostPtrAllocations() { auto memoryManager = device ? device->getNEODevice()->getMemoryManager() : nullptr; for (auto &allocation : hostPtrMap) { UNRECOVERABLE_IF(memoryManager == nullptr); memoryManager->freeGraphicsMemory(allocation.second); } hostPtrMap.clear(); } NEO::GraphicsAllocation *CommandList::getAllocationFromHostPtrMap(const void *buffer, uint64_t bufferSize) { auto allocation = hostPtrMap.lower_bound(buffer); if (allocation != hostPtrMap.end()) { if (buffer == allocation->first && ptrOffset(allocation->first, allocation->second->getUnderlyingBufferSize()) >= ptrOffset(buffer, bufferSize)) { return allocation->second; } } if (allocation != hostPtrMap.begin()) { allocation--; if (ptrOffset(allocation->first, allocation->second->getUnderlyingBufferSize()) >= ptrOffset(buffer, bufferSize)) { return allocation->second; } } if (this->cmdListType == CommandListType::TYPE_IMMEDIATE && this->isFlushTaskSubmissionEnabled) { auto allocation = this->csr->getInternalAllocationStorage()->obtainTemporaryAllocationWithPtr(bufferSize, buffer, NEO::AllocationType::EXTERNAL_HOST_PTR); if (allocation != nullptr) { auto alloc = allocation.get(); this->csr->getInternalAllocationStorage()->storeAllocation(std::move(allocation), NEO::AllocationUsage::TEMPORARY_ALLOCATION); return alloc; } } return nullptr; } NEO::GraphicsAllocation *CommandList::getHostPtrAlloc(const void *buffer, uint64_t bufferSize, bool hostCopyAllowed) { NEO::GraphicsAllocation *alloc = getAllocationFromHostPtrMap(buffer, bufferSize); if (alloc) { return alloc; } alloc = device->allocateMemoryFromHostPtr(buffer, bufferSize, hostCopyAllowed); UNRECOVERABLE_IF(alloc == nullptr); if (this->cmdListType == CommandListType::TYPE_IMMEDIATE && this->isFlushTaskSubmissionEnabled) { this->csr->getInternalAllocationStorage()->storeAllocation(std::unique_ptr(alloc), NEO::AllocationUsage::TEMPORARY_ALLOCATION); } else if (alloc->getAllocationType() == NEO::AllocationType::EXTERNAL_HOST_PTR) { hostPtrMap.insert(std::make_pair(buffer, alloc)); } else { commandContainer.getDeallocationContainer().push_back(alloc); } return alloc; } void CommandList::removeDeallocationContainerData() { auto memoryManager = device ? device->getNEODevice()->getMemoryManager() : nullptr; auto container = commandContainer.getDeallocationContainer(); for (auto deallocation : container) { DEBUG_BREAK_IF(deallocation == nullptr); UNRECOVERABLE_IF(memoryManager == nullptr); NEO::SvmAllocationData *allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(reinterpret_cast(deallocation->getGpuAddress())); if (allocData) { device->getDriverHandle()->getSvmAllocsManager()->removeSVMAlloc(*allocData); } if (!((deallocation->getAllocationType() == NEO::AllocationType::INTERNAL_HEAP) || (deallocation->getAllocationType() == NEO::AllocationType::LINEAR_STREAM))) { memoryManager->freeGraphicsMemory(deallocation); eraseDeallocationContainerEntry(deallocation); } } } void CommandList::eraseDeallocationContainerEntry(NEO::GraphicsAllocation *allocation) { std::vector::iterator allocErase; auto container = &commandContainer.getDeallocationContainer(); allocErase = std::find(container->begin(), container->end(), allocation); if (allocErase != container->end()) { container->erase(allocErase); } } void CommandList::eraseResidencyContainerEntry(NEO::GraphicsAllocation *allocation) { std::vector::iterator allocErase; auto container = &commandContainer.getResidencyContainer(); allocErase = std::find(container->begin(), container->end(), allocation); if (allocErase != container->end()) { container->erase(allocErase); } } NEO::PreemptionMode CommandList::obtainFunctionPreemptionMode(Kernel *kernel) { NEO::PreemptionFlags flags = NEO::PreemptionHelper::createPreemptionLevelFlags(*device->getNEODevice(), &kernel->getImmutableData()->getDescriptor()); return NEO::PreemptionHelper::taskPreemptionMode(device->getDevicePreemptionMode(), flags); } void CommandList::makeResidentAndMigrate(bool performMigration) { for (auto alloc : commandContainer.getResidencyContainer()) { csr->makeResident(*alloc); if (performMigration && (alloc->getAllocationType() == NEO::AllocationType::SVM_GPU || alloc->getAllocationType() == NEO::AllocationType::SVM_CPU)) { auto pageFaultManager = device->getDriverHandle()->getMemoryManager()->getPageFaultManager(); pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast(alloc->getGpuAddress())); } } } void CommandList::migrateSharedAllocations() { auto deviceImp = static_cast(device); DriverHandleImp *driverHandleImp = static_cast(deviceImp->getDriverHandle()); std::lock_guard lock(driverHandleImp->sharedMakeResidentAllocationsLock); auto pageFaultManager = device->getDriverHandle()->getMemoryManager()->getPageFaultManager(); for (auto alloc : driverHandleImp->sharedMakeResidentAllocations) { pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast(alloc.second->getGpuAddress())); } if (this->unifiedMemoryControls.indirectSharedAllocationsAllowed) { auto pageFaultManager = device->getDriverHandle()->getMemoryManager()->getPageFaultManager(); pageFaultManager->moveAllocationsWithinUMAllocsManagerToGpuDomain(this->device->getDriverHandle()->getSvmAllocsManager()); } } void CommandList::handleIndirectAllocationResidency() { bool indirectAllocationsAllowed = this->hasIndirectAllocationsAllowed(); NEO::Device *neoDevice = this->device->getNEODevice(); if (indirectAllocationsAllowed) { auto svmAllocsManager = this->device->getDriverHandle()->getSvmAllocsManager(); auto submitAsPack = this->device->getDriverHandle()->getMemoryManager()->allowIndirectAllocationsAsPack(neoDevice->getRootDeviceIndex()); if (NEO::DebugManager.flags.MakeIndirectAllocationsResidentAsPack.get() != -1) { submitAsPack = !!NEO::DebugManager.flags.MakeIndirectAllocationsResidentAsPack.get(); } if (submitAsPack) { svmAllocsManager->makeIndirectAllocationsResident(*(this->csr), this->csr->peekTaskCount() + 1u); } else { UnifiedMemoryControls unifiedMemoryControls = this->getUnifiedMemoryControls(); svmAllocsManager->addInternalAllocationsToResidencyContainer(neoDevice->getRootDeviceIndex(), this->commandContainer.getResidencyContainer(), unifiedMemoryControls.generateMask()); } } } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/cmdlist/cmdlist.h000066400000000000000000000354061422164147700252270ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_container/cmdcontainer.h" #include "shared/source/command_stream/preemption_mode.h" #include "shared/source/command_stream/stream_properties.h" #include "shared/source/command_stream/thread_arbitration_policy.h" #include "level_zero/core/source/cmdqueue/cmdqueue.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/kernel/kernel.h" #include #include #include struct _ze_command_list_handle_t {}; namespace L0 { struct EventPool; struct Event; struct Kernel; struct CommandList : _ze_command_list_handle_t { static constexpr uint32_t defaultNumIddsPerBlock = 64u; static constexpr uint32_t commandListimmediateIddsPerBlock = 1u; CommandList() = delete; CommandList(uint32_t numIddsPerBlock) : commandContainer(numIddsPerBlock) {} template struct Allocator { static CommandList *allocate(uint32_t numIddsPerBlock) { return new Type(numIddsPerBlock); } }; struct CommandToPatch { enum CommandType { FrontEndState, Invalid }; void *pDestination = nullptr; void *pCommand = nullptr; CommandType type = Invalid; }; using CommandsToPatch = StackVec; virtual ze_result_t close() = 0; virtual ze_result_t destroy() = 0; virtual ze_result_t appendEventReset(ze_event_handle_t hEvent) = 0; virtual ze_result_t appendBarrier(ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0; virtual ze_result_t appendMemoryRangesBarrier(uint32_t numRanges, const size_t *pRangeSizes, const void **pRanges, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0; virtual ze_result_t appendImageCopyFromMemory(ze_image_handle_t hDstImage, const void *srcptr, const ze_image_region_t *pDstRegion, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0; virtual ze_result_t appendImageCopyToMemory(void *dstptr, ze_image_handle_t hSrcImage, const ze_image_region_t *pSrcRegion, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0; virtual ze_result_t appendImageCopyRegion(ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage, const ze_image_region_t *pDstRegion, const ze_image_region_t *pSrcRegion, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0; virtual ze_result_t appendImageCopy(ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0; virtual ze_result_t appendLaunchKernel(ze_kernel_handle_t hKernel, const ze_group_count_t *pThreadGroupDimensions, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0; virtual ze_result_t appendLaunchCooperativeKernel(ze_kernel_handle_t hKernel, const ze_group_count_t *pLaunchFuncArgs, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0; virtual ze_result_t appendLaunchKernelIndirect(ze_kernel_handle_t hKernel, const ze_group_count_t *pDispatchArgumentsBuffer, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0; virtual ze_result_t appendLaunchMultipleKernelsIndirect(uint32_t numKernels, const ze_kernel_handle_t *phKernels, const uint32_t *pNumLaunchArguments, const ze_group_count_t *pLaunchArgumentsBuffer, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0; virtual ze_result_t appendMemAdvise(ze_device_handle_t hDevice, const void *ptr, size_t size, ze_memory_advice_t advice) = 0; virtual ze_result_t appendMemoryCopy(void *dstptr, const void *srcptr, size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0; virtual ze_result_t appendPageFaultCopy(NEO::GraphicsAllocation *dstptr, NEO::GraphicsAllocation *srcptr, size_t size, bool flushHost) = 0; virtual ze_result_t appendMemoryCopyRegion(void *dstPtr, const ze_copy_region_t *dstRegion, uint32_t dstPitch, uint32_t dstSlicePitch, const void *srcPtr, const ze_copy_region_t *srcRegion, uint32_t srcPitch, uint32_t srcSlicePitch, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0; virtual ze_result_t appendMemoryFill(void *ptr, const void *pattern, size_t patternSize, size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0; virtual ze_result_t appendMemoryPrefetch(const void *ptr, size_t count) = 0; virtual ze_result_t appendSignalEvent(ze_event_handle_t hEvent) = 0; virtual ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent) = 0; virtual ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0; virtual ze_result_t appendMemoryCopyFromContext(void *dstptr, ze_context_handle_t hContextSrc, const void *srcptr, size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0; virtual ze_result_t reserveSpace(size_t size, void **ptr) = 0; virtual ze_result_t reset() = 0; virtual ze_result_t appendMetricMemoryBarrier() = 0; virtual ze_result_t appendMetricStreamerMarker(zet_metric_streamer_handle_t hMetricStreamer, uint32_t value) = 0; virtual ze_result_t appendMetricQueryBegin(zet_metric_query_handle_t hMetricQuery) = 0; virtual ze_result_t appendMetricQueryEnd(zet_metric_query_handle_t hMetricQuery, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0; virtual ze_result_t appendQueryKernelTimestamps(uint32_t numEvents, ze_event_handle_t *phEvents, void *dstptr, const size_t *pOffsets, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0; virtual ze_result_t appendMILoadRegImm(uint32_t reg, uint32_t value) = 0; virtual ze_result_t appendMILoadRegReg(uint32_t reg1, uint32_t reg2) = 0; virtual ze_result_t appendMILoadRegMem(uint32_t reg1, uint64_t address) = 0; virtual ze_result_t appendMIStoreRegMem(uint32_t reg1, uint64_t address) = 0; virtual ze_result_t appendMIMath(void *aluArray, size_t aluCount) = 0; virtual ze_result_t appendMIBBStart(uint64_t address, size_t predication, bool secondLevel) = 0; virtual ze_result_t appendMIBBEnd() = 0; virtual ze_result_t appendMINoop() = 0; virtual ze_result_t appendPipeControl(void *dstPtr, uint64_t value) = 0; virtual ze_result_t appendWaitOnMemory(void *desc, void *ptr, uint32_t data, ze_event_handle_t hSignalEvent) = 0; virtual ze_result_t appendWriteToMemory(void *desc, void *ptr, uint64_t data) = 0; static CommandList *create(uint32_t productFamily, Device *device, NEO::EngineGroupType engineGroupType, ze_command_list_flags_t flags, ze_result_t &resultValue); static CommandList *createImmediate(uint32_t productFamily, Device *device, const ze_command_queue_desc_t *desc, bool internalUsage, NEO::EngineGroupType engineGroupType, ze_result_t &resultValue); static CommandList *fromHandle(ze_command_list_handle_t handle) { return static_cast(handle); } inline ze_command_list_handle_t toHandle() { return this; } uint32_t getCommandListPerThreadScratchSize() const { return commandListPerThreadScratchSize; } void setCommandListPerThreadScratchSize(uint32_t size) { commandListPerThreadScratchSize = size; } uint32_t getCommandListPerThreadPrivateScratchSize() const { return commandListPerThreadPrivateScratchSize; } void setCommandListPerThreadPrivateScratchSize(uint32_t size) { commandListPerThreadPrivateScratchSize = size; } uint32_t getCommandListSLMEnable() const { return commandListSLMEnabled; } void setCommandListSLMEnable(bool isSLMEnabled) { commandListSLMEnabled = isSLMEnabled; } NEO::PreemptionMode getCommandListPreemptionMode() const { return commandListPreemptionMode; } UnifiedMemoryControls getUnifiedMemoryControls() const { return unifiedMemoryControls; } bool hasIndirectAllocationsAllowed() const { return indirectAllocationsAllowed; } void handleIndirectAllocationResidency(); NEO::PreemptionMode obtainFunctionPreemptionMode(Kernel *kernel); std::vector &getPrintfFunctionContainer() { return this->printfFunctionContainer; } void storePrintfFunction(Kernel *kernel); void removeDeallocationContainerData(); void removeHostPtrAllocations(); void eraseDeallocationContainerEntry(NEO::GraphicsAllocation *allocation); void eraseResidencyContainerEntry(NEO::GraphicsAllocation *allocation); bool isCopyOnly() const { return NEO::EngineHelper::isCopyOnlyEngineType(engineGroupType); } bool isInternal() const { return internalUsage; } bool containsCooperativeKernels() const { return containsCooperativeKernelsFlag; } enum CommandListType : uint32_t { TYPE_REGULAR = 0u, TYPE_IMMEDIATE = 1u }; virtual ze_result_t executeCommandListImmediate(bool performMigration) = 0; virtual ze_result_t initialize(Device *device, NEO::EngineGroupType engineGroupType, ze_command_list_flags_t flags) = 0; virtual ~CommandList(); NEO::CommandContainer commandContainer; bool getContainsStatelessUncachedResource() { return containsStatelessUncachedResource; } std::map &getHostPtrMap() { return hostPtrMap; }; const NEO::StreamProperties &getRequiredStreamState() { return requiredStreamState; } const NEO::StreamProperties &getFinalStreamState() { return finalStreamState; } const CommandsToPatch &getCommandsToPatch() { return commandsToPatch; } void makeResidentAndMigrate(bool); void migrateSharedAllocations(); std::vector printfFunctionContainer; CommandQueue *cmdQImmediate = nullptr; NEO::CommandStreamReceiver *csr = nullptr; Device *device = nullptr; NEO::PreemptionMode commandListPreemptionMode = NEO::PreemptionMode::Initial; uint32_t cmdListType = CommandListType::TYPE_REGULAR; uint32_t commandListPerThreadScratchSize = 0u; uint32_t commandListPerThreadPrivateScratchSize = 0u; uint32_t partitionCount = 1; bool isFlushTaskSubmissionEnabled = false; bool isSyncModeQueue = false; bool commandListSLMEnabled = false; bool requiresQueueUncachedMocs = false; protected: NEO::GraphicsAllocation *getAllocationFromHostPtrMap(const void *buffer, uint64_t bufferSize); NEO::GraphicsAllocation *getHostPtrAlloc(const void *buffer, uint64_t bufferSize, bool hostCopyAllowed); std::map hostPtrMap; std::vector ownedPrivateAllocations; std::vector patternAllocations; NEO::StreamProperties requiredStreamState{}; NEO::StreamProperties finalStreamState{}; CommandsToPatch commandsToPatch{}; UnifiedMemoryControls unifiedMemoryControls; ze_command_list_flags_t flags = 0u; NEO::EngineGroupType engineGroupType; bool indirectAllocationsAllowed = false; bool internalUsage = false; bool containsCooperativeKernelsFlag = false; bool containsStatelessUncachedResource = false; }; using CommandListAllocatorFn = CommandList *(*)(uint32_t); extern CommandListAllocatorFn commandListFactory[]; extern CommandListAllocatorFn commandListFactoryImmediate[]; template struct CommandListPopulateFactory { CommandListPopulateFactory() { commandListFactory[productFamily] = CommandList::Allocator::allocate; } }; template struct CommandListImmediatePopulateFactory { CommandListImmediatePopulateFactory() { commandListFactoryImmediate[productFamily] = CommandList::Allocator::allocate; } }; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/cmdlist/cmdlist_extended/000077500000000000000000000000001422164147700267265ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/cmdlist/cmdlist_extended/cmdlist_extended.inl000066400000000000000000000054421422164147700327560ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/linear_stream.h" #include "level_zero/core/source/cmdlist/cmdlist_hw.h" namespace L0 { template ze_result_t CommandListCoreFamily::appendMILoadRegImm(uint32_t reg, uint32_t value) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } template ze_result_t CommandListCoreFamily::appendMILoadRegReg(uint32_t reg1, uint32_t reg2) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } template ze_result_t CommandListCoreFamily::appendMILoadRegMem(uint32_t reg1, uint64_t address) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } template ze_result_t CommandListCoreFamily::appendMIStoreRegMem(uint32_t reg1, uint64_t address) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } template ze_result_t CommandListCoreFamily::appendMIMath(void *aluArray, size_t aluCount) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } template ze_result_t CommandListCoreFamily::appendMIBBStart(uint64_t address, size_t predication, bool secondLevel) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } template ze_result_t CommandListCoreFamily::appendMIBBEnd() { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } template ze_result_t CommandListCoreFamily::appendMINoop() { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } template ze_result_t CommandListCoreFamily::appendPipeControl(void *dstPtr, uint64_t value) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } template ze_result_t CommandListCoreFamily::appendWaitOnMemory(void *desc, void *ptr, uint32_t data, ze_event_handle_t hSignalEvent) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } template ze_result_t CommandListCoreFamily::appendWriteToMemory(void *desc, void *ptr, uint64_t data) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/cmdlist/cmdlist_hw.h000066400000000000000000000406221422164147700257210ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/stream_properties.h" #include "shared/source/helpers/pipe_control_args.h" #include "level_zero/core/source/builtin/builtin_functions_lib.h" #include "level_zero/core/source/cmdlist/cmdlist_imp.h" #include "igfxfmid.h" namespace NEO { enum class ImageType; } namespace L0 { #pragma pack(1) struct EventData { uint64_t address; uint64_t packetsInUse; uint64_t timestampSizeInDw; }; #pragma pack() static_assert(sizeof(EventData) == (3 * sizeof(uint64_t)), "This structure is consumed by GPU and has to follow specific restrictions for padding and size"); struct AlignedAllocationData { uintptr_t alignedAllocationPtr = 0u; size_t offset = 0u; NEO::GraphicsAllocation *alloc = nullptr; bool needsFlush = false; }; struct EventPool; struct Event; template struct CommandListCoreFamily : CommandListImp { using BaseClass = CommandListImp; using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA; using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS; using CommandListImp::CommandListImp; ze_result_t initialize(Device *device, NEO::EngineGroupType engineGroupType, ze_command_list_flags_t flags) override; virtual void programL3(bool isSLMused); ~CommandListCoreFamily() override; ze_result_t close() override; ze_result_t appendEventReset(ze_event_handle_t hEvent) override; ze_result_t appendBarrier(ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendMemoryRangesBarrier(uint32_t numRanges, const size_t *pRangeSizes, const void **pRanges, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendImageCopyFromMemory(ze_image_handle_t hDstImage, const void *srcptr, const ze_image_region_t *pDstRegion, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendImageCopyToMemory(void *dstptr, ze_image_handle_t hSrcImage, const ze_image_region_t *pSrcRegion, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendImageCopyRegion(ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage, const ze_image_region_t *pDstRegion, const ze_image_region_t *pSrcRegion, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendImageCopy(ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendLaunchKernel(ze_kernel_handle_t hKernel, const ze_group_count_t *pThreadGroupDimensions, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendLaunchCooperativeKernel(ze_kernel_handle_t hKernel, const ze_group_count_t *pLaunchFuncArgs, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendLaunchKernelIndirect(ze_kernel_handle_t hKernel, const ze_group_count_t *pDispatchArgumentsBuffer, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendLaunchMultipleKernelsIndirect(uint32_t numKernels, const ze_kernel_handle_t *phKernels, const uint32_t *pNumLaunchArguments, const ze_group_count_t *pLaunchArgumentsBuffer, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendMemAdvise(ze_device_handle_t hDevice, const void *ptr, size_t size, ze_memory_advice_t advice) override; ze_result_t appendMemoryCopy(void *dstptr, const void *srcptr, size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendPageFaultCopy(NEO::GraphicsAllocation *dstAllocation, NEO::GraphicsAllocation *srcAllocation, size_t size, bool flushHost) override; ze_result_t appendMemoryCopyRegion(void *dstPtr, const ze_copy_region_t *dstRegion, uint32_t dstPitch, uint32_t dstSlicePitch, const void *srcPtr, const ze_copy_region_t *srcRegion, uint32_t srcPitch, uint32_t srcSlicePitch, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendMemoryPrefetch(const void *ptr, size_t count) override; ze_result_t appendMemoryFill(void *ptr, const void *pattern, size_t patternSize, size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendMILoadRegImm(uint32_t reg, uint32_t value) override; ze_result_t appendMILoadRegReg(uint32_t reg1, uint32_t reg2) override; ze_result_t appendMILoadRegMem(uint32_t reg1, uint64_t address) override; ze_result_t appendMIStoreRegMem(uint32_t reg1, uint64_t address) override; ze_result_t appendMIMath(void *aluArray, size_t aluCount) override; ze_result_t appendMIBBStart(uint64_t address, size_t predication, bool secondLevel) override; ze_result_t appendMIBBEnd() override; ze_result_t appendMINoop() override; ze_result_t appendPipeControl(void *dstPtr, uint64_t value) override; ze_result_t appendWaitOnMemory(void *desc, void *ptr, uint32_t data, ze_event_handle_t hSignalEvent) override; ze_result_t appendWriteToMemory(void *desc, void *ptr, uint64_t data) override; ze_result_t appendQueryKernelTimestamps(uint32_t numEvents, ze_event_handle_t *phEvents, void *dstptr, const size_t *pOffsets, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendSignalEvent(ze_event_handle_t hEvent) override; ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent) override; ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendMemoryCopyFromContext(void *dstptr, ze_context_handle_t hContextSrc, const void *srcptr, size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; void appendMultiPartitionPrologue(uint32_t partitionDataSize) override; void appendMultiPartitionEpilogue() override; ze_result_t reserveSpace(size_t size, void **ptr) override; ze_result_t reset() override; ze_result_t executeCommandListImmediate(bool performMigration) override; size_t getReserveSshSize(); protected: MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyKernelWithGA(void *dstPtr, NEO::GraphicsAllocation *dstPtrAlloc, uint64_t dstOffset, void *srcPtr, NEO::GraphicsAllocation *srcPtrAlloc, uint64_t srcOffset, uint64_t size, uint64_t elementSize, Builtin builtin, ze_event_handle_t hSignalEvent, bool isStateless); MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyBlit(uintptr_t dstPtr, NEO::GraphicsAllocation *dstPtrAlloc, uint64_t dstOffset, uintptr_t srcPtr, NEO::GraphicsAllocation *srcPtrAlloc, uint64_t srcOffset, uint64_t size); MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyBlitRegion(NEO::GraphicsAllocation *srcAlloc, NEO::GraphicsAllocation *dstAlloc, size_t srcOffset, size_t dstOffset, ze_copy_region_t srcRegion, ze_copy_region_t dstRegion, const Vec3 ©Size, size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, const Vec3 &srcSize, const Vec3 &dstSize, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents); MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyKernel2d(AlignedAllocationData *dstAlignedAllocation, AlignedAllocationData *srcAlignedAllocation, Builtin builtin, const ze_copy_region_t *dstRegion, uint32_t dstPitch, size_t dstOffset, const ze_copy_region_t *srcRegion, uint32_t srcPitch, size_t srcOffset, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents); MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyKernel3d(AlignedAllocationData *dstAlignedAllocation, AlignedAllocationData *srcAlignedAllocation, Builtin builtin, const ze_copy_region_t *dstRegion, uint32_t dstPitch, uint32_t dstSlicePitch, size_t dstOffset, const ze_copy_region_t *srcRegion, uint32_t srcPitch, uint32_t srcSlicePitch, size_t srcOffset, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents); MOCKABLE_VIRTUAL ze_result_t appendBlitFill(void *ptr, const void *pattern, size_t patternSize, size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents); MOCKABLE_VIRTUAL ze_result_t appendCopyImageBlit(NEO::GraphicsAllocation *src, NEO::GraphicsAllocation *dst, const Vec3 &srcOffsets, const Vec3 &dstOffsets, size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, size_t bytesPerPixel, const Vec3 ©Size, const Vec3 &srcSize, const Vec3 &dstSize, ze_event_handle_t hSignalEvent); MOCKABLE_VIRTUAL ze_result_t appendLaunchKernelWithParams(ze_kernel_handle_t hKernel, const ze_group_count_t *pThreadGroupDimensions, ze_event_handle_t hEvent, bool isIndirect, bool isPredicate, bool isCooperative); ze_result_t appendLaunchKernelSplit(ze_kernel_handle_t hKernel, const ze_group_count_t *pThreadGroupDimensions, ze_event_handle_t hEvent); ze_result_t prepareIndirectParams(const ze_group_count_t *pThreadGroupDimensions); void updateStreamProperties(Kernel &kernel, bool isMultiOsContextCapable, bool isCooperative); void clearCommandsToPatch(); void applyMemoryRangesBarrier(uint32_t numRanges, const size_t *pRangeSizes, const void **pRanges); ze_result_t setGlobalWorkSizeIndirect(NEO::CrossThreadDataOffset offsets[3], uint64_t crossThreadAddress, uint32_t lws[3]); ze_result_t programSyncBuffer(Kernel &kernel, NEO::Device &device, const ze_group_count_t *pThreadGroupDimensions); void appendWriteKernelTimestamp(ze_event_handle_t hEvent, bool beforeWalker, bool maskLsb); void adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, bool maskLsb, uint32_t mask); void appendEventForProfiling(ze_event_handle_t hEvent, bool beforeWalker); void appendEventForProfilingAllWalkers(ze_event_handle_t hEvent, bool beforeWalker); void appendEventForProfilingCopyCommand(ze_event_handle_t hEvent, bool beforeWalker); void appendSignalEventPostWalker(ze_event_handle_t hEvent); void programStateBaseAddress(NEO::CommandContainer &container, bool genericMediaStateClearRequired); void appendComputeBarrierCommand(); NEO::PipeControlArgs createBarrierFlags(); void appendMultiTileBarrier(NEO::Device &neoDevice); size_t estimateBufferSizeMultiTileBarrier(const NEO::HardwareInfo &hwInfo); uint64_t getInputBufferSize(NEO::ImageType imageType, uint64_t bytesPerPixel, const ze_image_region_t *region); MOCKABLE_VIRTUAL AlignedAllocationData getAlignedAllocation(Device *device, const void *buffer, uint64_t bufferSize, bool hostCopyAllowed); ze_result_t addEventsToCmdList(uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents); size_t cmdListCurrentStartOffset = 0; bool containsAnyKernel = false; }; template struct CommandListProductFamily; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/cmdlist/cmdlist_hw.inl000066400000000000000000003456141422164147700262650ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/device/device.h" #include "shared/source/helpers/api_specific_config.h" #include "shared/source/helpers/blit_commands_helper.h" #include "shared/source/helpers/heap_helper.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/pipe_control_args.h" #include "shared/source/helpers/preamble.h" #include "shared/source/helpers/register_offsets.h" #include "shared/source/helpers/string.h" #include "shared/source/helpers/surface_format_info.h" #include "shared/source/indirect_heap/indirect_heap.h" #include "shared/source/memory_manager/allocation_properties.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/memadvise_flags.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/page_fault_manager/cpu_page_fault_manager.h" #include "shared/source/program/sync_buffer_handler.h" #include "shared/source/program/sync_buffer_handler.inl" #include "shared/source/utilities/software_tags_manager.h" #include "level_zero/core/source/cmdlist/cmdlist_hw.h" #include "level_zero/core/source/device/device_imp.h" #include "level_zero/core/source/driver/driver_handle_imp.h" #include "level_zero/core/source/event/event.h" #include "level_zero/core/source/image/image.h" #include "level_zero/core/source/module/module.h" #include namespace L0 { template struct EncodeStateBaseAddress; inline ze_result_t parseErrorCode(NEO::ErrorCode returnValue) { switch (returnValue) { case NEO::ErrorCode::OUT_OF_DEVICE_MEMORY: return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY; default: return ZE_RESULT_SUCCESS; } return ZE_RESULT_SUCCESS; } template CommandListCoreFamily::~CommandListCoreFamily() { clearCommandsToPatch(); for (auto alloc : this->ownedPrivateAllocations) { device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(alloc); } this->ownedPrivateAllocations.clear(); for (auto &patternAlloc : this->patternAllocations) { device->storeReusableAllocation(*patternAlloc); } this->patternAllocations.clear(); } template ze_result_t CommandListCoreFamily::reset() { printfFunctionContainer.clear(); removeDeallocationContainerData(); removeHostPtrAllocations(); commandContainer.reset(); containsStatelessUncachedResource = false; indirectAllocationsAllowed = false; unifiedMemoryControls.indirectHostAllocationsAllowed = false; unifiedMemoryControls.indirectSharedAllocationsAllowed = false; unifiedMemoryControls.indirectDeviceAllocationsAllowed = false; commandListPreemptionMode = device->getDevicePreemptionMode(); commandListPerThreadScratchSize = 0u; requiredStreamState = {}; finalStreamState = requiredStreamState; containsAnyKernel = false; containsCooperativeKernelsFlag = false; clearCommandsToPatch(); commandListSLMEnabled = false; if (!isCopyOnly()) { if (!NEO::ApiSpecificConfig::getBindlessConfiguration()) { programStateBaseAddress(commandContainer, false); } commandContainer.setDirtyStateForAllHeaps(false); } for (auto alloc : this->ownedPrivateAllocations) { device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(alloc); } this->ownedPrivateAllocations.clear(); cmdListCurrentStartOffset = 0; return ZE_RESULT_SUCCESS; } template ze_result_t CommandListCoreFamily::initialize(Device *device, NEO::EngineGroupType engineGroupType, ze_command_list_flags_t flags) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; this->device = device; this->commandListPreemptionMode = device->getDevicePreemptionMode(); this->engineGroupType = engineGroupType; this->flags = flags; if (device->isImplicitScalingCapable() && !this->internalUsage && !isCopyOnly()) { this->partitionCount = static_cast(this->device->getNEODevice()->getDeviceBitfield().count()); } if (this->cmdListType == CommandListType::TYPE_IMMEDIATE && !isCopyOnly() && !isInternal()) { const auto &hwInfo = device->getHwInfo(); this->isFlushTaskSubmissionEnabled = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily).isPlatformFlushTaskEnabled(hwInfo); if (NEO::DebugManager.flags.EnableFlushTaskSubmission.get() != -1) { this->isFlushTaskSubmissionEnabled = !!NEO::DebugManager.flags.EnableFlushTaskSubmission.get(); } commandContainer.setFlushTaskUsedForImmediate(this->isFlushTaskSubmissionEnabled); } commandContainer.setReservedSshSize(getReserveSshSize()); DeviceImp *deviceImp = static_cast(device); auto returnValue = commandContainer.initialize(deviceImp->getActiveDevice(), deviceImp->allocationsForReuse.get(), !isCopyOnly()); ze_result_t returnType = parseErrorCode(returnValue); if (returnType == ZE_RESULT_SUCCESS) { if (!isCopyOnly()) { if (!NEO::ApiSpecificConfig::getBindlessConfiguration()) { if (!this->isFlushTaskSubmissionEnabled) { programStateBaseAddress(commandContainer, false); } } commandContainer.setDirtyStateForAllHeaps(false); } } return returnType; } template ze_result_t CommandListCoreFamily::executeCommandListImmediate(bool performMigration) { this->close(); ze_command_list_handle_t immediateHandle = this->toHandle(); this->cmdQImmediate->executeCommandLists(1, &immediateHandle, nullptr, performMigration); this->cmdQImmediate->synchronize(std::numeric_limits::max()); this->reset(); return ZE_RESULT_SUCCESS; } template ze_result_t CommandListCoreFamily::close() { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; commandContainer.removeDuplicatesFromResidencyContainer(); NEO::EncodeBatchBufferStartOrEnd::programBatchBufferEnd(commandContainer); return ZE_RESULT_SUCCESS; } template void CommandListCoreFamily::programL3(bool isSLMused) {} template ze_result_t CommandListCoreFamily::appendLaunchKernel(ze_kernel_handle_t hKernel, const ze_group_count_t *pThreadGroupDimensions, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { NEO::Device *neoDevice = device->getNEODevice(); uint32_t callId = 0; if (NEO::DebugManager.flags.EnableSWTags.get()) { neoDevice->getRootDeviceEnvironment().tagsManager->insertTag( *commandContainer.getCommandStream(), *neoDevice, "zeCommandListAppendLaunchKernel", ++neoDevice->getRootDeviceEnvironment().tagsManager->currentCallCount); callId = neoDevice->getRootDeviceEnvironment().tagsManager->currentCallCount; } ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents); if (ret) { return ret; } auto res = appendLaunchKernelWithParams(hKernel, pThreadGroupDimensions, hEvent, false, false, false); if (NEO::DebugManager.flags.EnableSWTags.get()) { neoDevice->getRootDeviceEnvironment().tagsManager->insertTag( *commandContainer.getCommandStream(), *neoDevice, "zeCommandListAppendLaunchKernel", callId); } return res; } template ze_result_t CommandListCoreFamily::appendLaunchCooperativeKernel(ze_kernel_handle_t hKernel, const ze_group_count_t *pLaunchFuncArgs, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents); if (ret) { return ret; } return appendLaunchKernelWithParams(hKernel, pLaunchFuncArgs, hSignalEvent, false, false, true); } template ze_result_t CommandListCoreFamily::appendLaunchKernelIndirect(ze_kernel_handle_t hKernel, const ze_group_count_t *pDispatchArgumentsBuffer, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents); if (ret) { return ret; } appendEventForProfiling(hEvent, true); ret = appendLaunchKernelWithParams(hKernel, pDispatchArgumentsBuffer, nullptr, true, false, false); appendSignalEventPostWalker(hEvent); return ret; } template ze_result_t CommandListCoreFamily::appendLaunchMultipleKernelsIndirect(uint32_t numKernels, const ze_kernel_handle_t *phKernels, const uint32_t *pNumLaunchArguments, const ze_group_count_t *pLaunchArgumentsBuffer, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents); if (ret) { return ret; } appendEventForProfiling(hEvent, true); const bool haveLaunchArguments = pLaunchArgumentsBuffer != nullptr; auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(pNumLaunchArguments); auto alloc = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); commandContainer.addToResidencyContainer(alloc); using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; for (uint32_t i = 0; i < numKernels; i++) { NEO::EncodeMathMMIO::encodeGreaterThanPredicate(commandContainer, alloc->getGpuAddress(), i); ret = appendLaunchKernelWithParams(phKernels[i], haveLaunchArguments ? &pLaunchArgumentsBuffer[i] : nullptr, nullptr, true, true, false); if (ret) { return ret; } } appendSignalEventPostWalker(hEvent); return ret; } template ze_result_t CommandListCoreFamily::appendEventReset(ze_event_handle_t hEvent) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using POST_SYNC_OPERATION = typename GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION; using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END; auto event = Event::fromHandle(hEvent); uint64_t baseAddr = event->getGpuAddress(this->device); uint32_t packetsToReset = event->getPacketsInUse(); NEO::Device *neoDevice = device->getNEODevice(); uint32_t callId = 0; if (NEO::DebugManager.flags.EnableSWTags.get()) { neoDevice->getRootDeviceEnvironment().tagsManager->insertTag( *commandContainer.getCommandStream(), *neoDevice, "zeCommandListAppendEventReset", ++neoDevice->getRootDeviceEnvironment().tagsManager->currentCallCount); callId = neoDevice->getRootDeviceEnvironment().tagsManager->currentCallCount; } if (event->useContextEndOffset()) { baseAddr += event->getContextEndOffset(); } if (event->isEventTimestampFlagSet()) { packetsToReset = EventPacketsCount::eventPackets; } event->resetPackets(); commandContainer.addToResidencyContainer(&event->getAllocation(this->device)); const auto &hwInfo = this->device->getHwInfo(); if (isCopyOnly()) { NEO::MiFlushArgs args; args.commandWithPostSync = true; NEO::EncodeMiFlushDW::programMiFlushDw(*commandContainer.getCommandStream(), event->getGpuAddress(this->device), Event::STATE_CLEARED, args, hwInfo); } else { NEO::PipeControlArgs args; args.dcFlushEnable = NEO::MemorySynchronizationCommands::getDcFlushEnable(event->signalScope, hwInfo); size_t estimateSize = NEO::MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo) * packetsToReset; if (this->partitionCount > 1) { estimateSize += estimateBufferSizeMultiTileBarrier(hwInfo); } for (uint32_t i = 0u; i < packetsToReset; i++) { NEO::MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( *commandContainer.getCommandStream(), POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, baseAddr, Event::STATE_CLEARED, hwInfo, args); baseAddr += event->getSinglePacketSize(); } if (this->partitionCount > 1) { appendMultiTileBarrier(*neoDevice); } } if (NEO::DebugManager.flags.EnableSWTags.get()) { neoDevice->getRootDeviceEnvironment().tagsManager->insertTag( *commandContainer.getCommandStream(), *neoDevice, "zeCommandListAppendEventReset", callId); } return ZE_RESULT_SUCCESS; } template ze_result_t CommandListCoreFamily::appendMemoryRangesBarrier(uint32_t numRanges, const size_t *pRangeSizes, const void **pRanges, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents); if (ret) { return ret; } appendEventForProfiling(hSignalEvent, true); applyMemoryRangesBarrier(numRanges, pRangeSizes, pRanges); appendSignalEventPostWalker(hSignalEvent); if (this->cmdListType == CommandListType::TYPE_IMMEDIATE) { executeCommandListImmediate(true); } return ZE_RESULT_SUCCESS; } template ze_result_t CommandListCoreFamily::appendImageCopyFromMemory(ze_image_handle_t hDstImage, const void *srcPtr, const ze_image_region_t *pDstRegion, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { auto image = Image::fromHandle(hDstImage); auto bytesPerPixel = static_cast(image->getImageInfo().surfaceFormat->ImageElementSizeInBytes); Vec3 imgSize = {image->getImageDesc().width, image->getImageDesc().height, image->getImageDesc().depth}; ze_image_region_t tmpRegion; if (pDstRegion == nullptr) { // If this is a 1D or 2D image, then the height or depth is ignored and must be set to 1. // Internally, all dimensions must be >= 1. if (image->getImageDesc().type == ZE_IMAGE_TYPE_1D || image->getImageDesc().type == ZE_IMAGE_TYPE_1DARRAY) { imgSize.y = 1; } if (image->getImageDesc().type != ZE_IMAGE_TYPE_3D) { imgSize.z = 1; } tmpRegion = {0, 0, 0, static_cast(imgSize.x), static_cast(imgSize.y), static_cast(imgSize.z)}; pDstRegion = &tmpRegion; } uint64_t bufferSize = getInputBufferSize(image->getImageInfo().imgDesc.imageType, bytesPerPixel, pDstRegion); auto allocationStruct = getAlignedAllocation(this->device, srcPtr, bufferSize, true); auto rowPitch = pDstRegion->width * bytesPerPixel; auto slicePitch = image->getImageInfo().imgDesc.imageType == NEO::ImageType::Image1DArray ? 1 : pDstRegion->height * rowPitch; if (isCopyOnly()) { return appendCopyImageBlit(allocationStruct.alloc, image->getAllocation(), {0, 0, 0}, {pDstRegion->originX, pDstRegion->originY, pDstRegion->originZ}, rowPitch, slicePitch, rowPitch, slicePitch, bytesPerPixel, {pDstRegion->width, pDstRegion->height, pDstRegion->depth}, {pDstRegion->width, pDstRegion->height, pDstRegion->depth}, imgSize, hEvent); } auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership(); Kernel *builtinKernel = nullptr; switch (bytesPerPixel) { default: UNRECOVERABLE_IF(true); case 1u: builtinKernel = device->getBuiltinFunctionsLib()->getImageFunction(ImageBuiltin::CopyBufferToImage3dBytes); break; case 2u: builtinKernel = device->getBuiltinFunctionsLib()->getImageFunction(ImageBuiltin::CopyBufferToImage3d2Bytes); break; case 4u: builtinKernel = device->getBuiltinFunctionsLib()->getImageFunction(ImageBuiltin::CopyBufferToImage3d4Bytes); break; case 8u: builtinKernel = device->getBuiltinFunctionsLib()->getImageFunction(ImageBuiltin::CopyBufferToImage3d8Bytes); break; case 16u: builtinKernel = device->getBuiltinFunctionsLib()->getImageFunction(ImageBuiltin::CopyBufferToImage3d16Bytes); break; } builtinKernel->setArgBufferWithAlloc(0u, allocationStruct.alignedAllocationPtr, allocationStruct.alloc); builtinKernel->setArgRedescribedImage(1u, hDstImage); builtinKernel->setArgumentValue(2u, sizeof(size_t), &allocationStruct.offset); uint32_t origin[] = { static_cast(pDstRegion->originX), static_cast(pDstRegion->originY), static_cast(pDstRegion->originZ), 0}; builtinKernel->setArgumentValue(3u, sizeof(origin), &origin); uint32_t pitch[] = { rowPitch, slicePitch}; builtinKernel->setArgumentValue(4u, sizeof(pitch), &pitch); uint32_t groupSizeX = pDstRegion->width; uint32_t groupSizeY = pDstRegion->height; uint32_t groupSizeZ = pDstRegion->depth; if (builtinKernel->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, &groupSizeY, &groupSizeZ) != ZE_RESULT_SUCCESS) { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } if (builtinKernel->setGroupSize(groupSizeX, groupSizeY, groupSizeZ) != ZE_RESULT_SUCCESS) { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } if (pDstRegion->width % groupSizeX || pDstRegion->height % groupSizeY || pDstRegion->depth % groupSizeZ) { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } ze_group_count_t functionArgs{pDstRegion->width / groupSizeX, pDstRegion->height / groupSizeY, pDstRegion->depth / groupSizeZ}; return CommandListCoreFamily::appendLaunchKernel(builtinKernel->toHandle(), &functionArgs, hEvent, numWaitEvents, phWaitEvents); } template ze_result_t CommandListCoreFamily::appendImageCopyToMemory(void *dstPtr, ze_image_handle_t hSrcImage, const ze_image_region_t *pSrcRegion, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { auto image = Image::fromHandle(hSrcImage); auto bytesPerPixel = static_cast(image->getImageInfo().surfaceFormat->ImageElementSizeInBytes); Vec3 imgSize = {image->getImageDesc().width, image->getImageDesc().height, image->getImageDesc().depth}; ze_image_region_t tmpRegion; if (pSrcRegion == nullptr) { // If this is a 1D or 2D image, then the height or depth is ignored and must be set to 1. // Internally, all dimensions must be >= 1. if (image->getImageDesc().type == ZE_IMAGE_TYPE_1D || image->getImageDesc().type == ZE_IMAGE_TYPE_1DARRAY) { imgSize.y = 1; } if (image->getImageDesc().type != ZE_IMAGE_TYPE_3D) { imgSize.z = 1; } tmpRegion = {0, 0, 0, static_cast(imgSize.x), static_cast(imgSize.y), static_cast(imgSize.z)}; pSrcRegion = &tmpRegion; } uint64_t bufferSize = getInputBufferSize(image->getImageInfo().imgDesc.imageType, bytesPerPixel, pSrcRegion); auto allocationStruct = getAlignedAllocation(this->device, dstPtr, bufferSize, false); auto rowPitch = pSrcRegion->width * bytesPerPixel; auto slicePitch = (image->getImageInfo().imgDesc.imageType == NEO::ImageType::Image1DArray ? 1 : pSrcRegion->height) * rowPitch; if (isCopyOnly()) { return appendCopyImageBlit(image->getAllocation(), allocationStruct.alloc, {pSrcRegion->originX, pSrcRegion->originY, pSrcRegion->originZ}, {0, 0, 0}, rowPitch, slicePitch, rowPitch, slicePitch, bytesPerPixel, {pSrcRegion->width, pSrcRegion->height, pSrcRegion->depth}, imgSize, {pSrcRegion->width, pSrcRegion->height, pSrcRegion->depth}, hEvent); } auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership(); Kernel *builtinKernel = nullptr; switch (bytesPerPixel) { default: UNRECOVERABLE_IF(true); case 1u: builtinKernel = device->getBuiltinFunctionsLib()->getImageFunction(ImageBuiltin::CopyImage3dToBufferBytes); break; case 2u: builtinKernel = device->getBuiltinFunctionsLib()->getImageFunction(ImageBuiltin::CopyImage3dToBuffer2Bytes); break; case 4u: builtinKernel = device->getBuiltinFunctionsLib()->getImageFunction(ImageBuiltin::CopyImage3dToBuffer4Bytes); break; case 8u: builtinKernel = device->getBuiltinFunctionsLib()->getImageFunction(ImageBuiltin::CopyImage3dToBuffer8Bytes); break; case 16u: builtinKernel = device->getBuiltinFunctionsLib()->getImageFunction(ImageBuiltin::CopyImage3dToBuffer16Bytes); break; } builtinKernel->setArgRedescribedImage(0u, hSrcImage); builtinKernel->setArgBufferWithAlloc(1u, allocationStruct.alignedAllocationPtr, allocationStruct.alloc); uint32_t origin[] = { static_cast(pSrcRegion->originX), static_cast(pSrcRegion->originY), static_cast(pSrcRegion->originZ), 0}; builtinKernel->setArgumentValue(2u, sizeof(origin), &origin); builtinKernel->setArgumentValue(3u, sizeof(size_t), &allocationStruct.offset); uint32_t pitch[] = { rowPitch, slicePitch}; builtinKernel->setArgumentValue(4u, sizeof(pitch), &pitch); uint32_t groupSizeX = pSrcRegion->width; uint32_t groupSizeY = pSrcRegion->height; uint32_t groupSizeZ = pSrcRegion->depth; if (builtinKernel->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, &groupSizeY, &groupSizeZ) != ZE_RESULT_SUCCESS) { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } if (builtinKernel->setGroupSize(groupSizeX, groupSizeY, groupSizeZ) != ZE_RESULT_SUCCESS) { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } if (pSrcRegion->width % groupSizeX || pSrcRegion->height % groupSizeY || pSrcRegion->depth % groupSizeZ) { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } ze_group_count_t functionArgs{pSrcRegion->width / groupSizeX, pSrcRegion->height / groupSizeY, pSrcRegion->depth / groupSizeZ}; auto ret = CommandListCoreFamily::appendLaunchKernel(builtinKernel->toHandle(), &functionArgs, hEvent, numWaitEvents, phWaitEvents); auto event = Event::fromHandle(hEvent); if (event) { allocationStruct.needsFlush &= !event->signalScope; } if (allocationStruct.needsFlush) { const auto &hwInfo = this->device->getHwInfo(); NEO::PipeControlArgs args; args.dcFlushEnable = NEO::MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo); NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); } return ret; } template ze_result_t CommandListCoreFamily::appendImageCopyRegion(ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage, const ze_image_region_t *pDstRegion, const ze_image_region_t *pSrcRegion, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { auto dstImage = L0::Image::fromHandle(hDstImage); auto srcImage = L0::Image::fromHandle(hSrcImage); cl_int4 srcOffset, dstOffset; ze_image_region_t srcRegion, dstRegion; if (pSrcRegion != nullptr) { srcRegion = *pSrcRegion; } else { ze_image_desc_t srcDesc = srcImage->getImageDesc(); srcRegion = {0, 0, 0, static_cast(srcDesc.width), srcDesc.height, srcDesc.depth}; } srcOffset.x = static_cast(srcRegion.originX); srcOffset.y = static_cast(srcRegion.originY); srcOffset.z = static_cast(srcRegion.originZ); srcOffset.w = 0; if (pDstRegion != nullptr) { dstRegion = *pDstRegion; } else { ze_image_desc_t dstDesc = dstImage->getImageDesc(); dstRegion = {0, 0, 0, static_cast(dstDesc.width), dstDesc.height, dstDesc.depth}; } dstOffset.x = static_cast(dstRegion.originX); dstOffset.y = static_cast(dstRegion.originY); dstOffset.z = static_cast(dstRegion.originZ); dstOffset.w = 0; if (srcRegion.width != dstRegion.width || srcRegion.height != dstRegion.height || srcRegion.depth != dstRegion.depth) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } uint32_t groupSizeX = srcRegion.width; uint32_t groupSizeY = srcRegion.height; uint32_t groupSizeZ = srcRegion.depth; if (isCopyOnly()) { auto bytesPerPixel = static_cast(srcImage->getImageInfo().surfaceFormat->ImageElementSizeInBytes); Vec3 srcImgSize = {srcImage->getImageInfo().imgDesc.imageWidth, srcImage->getImageInfo().imgDesc.imageHeight, srcImage->getImageInfo().imgDesc.imageDepth}; Vec3 dstImgSize = {dstImage->getImageInfo().imgDesc.imageWidth, dstImage->getImageInfo().imgDesc.imageHeight, dstImage->getImageInfo().imgDesc.imageDepth}; auto srcRowPitch = srcRegion.width * bytesPerPixel; auto srcSlicePitch = (srcImage->getImageInfo().imgDesc.imageType == NEO::ImageType::Image1DArray ? 1 : srcRegion.height) * srcRowPitch; auto dstRowPitch = dstRegion.width * bytesPerPixel; auto dstSlicePitch = (dstImage->getImageInfo().imgDesc.imageType == NEO::ImageType::Image1DArray ? 1 : dstRegion.height) * dstRowPitch; return appendCopyImageBlit(srcImage->getAllocation(), dstImage->getAllocation(), {srcRegion.originX, srcRegion.originY, srcRegion.originZ}, {dstRegion.originX, dstRegion.originY, dstRegion.originZ}, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, bytesPerPixel, {srcRegion.width, srcRegion.height, srcRegion.depth}, srcImgSize, dstImgSize, hEvent); } auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership(); auto kernel = device->getBuiltinFunctionsLib()->getImageFunction(ImageBuiltin::CopyImageRegion); if (kernel->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, &groupSizeY, &groupSizeZ) != ZE_RESULT_SUCCESS) { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } if (kernel->setGroupSize(groupSizeX, groupSizeY, groupSizeZ) != ZE_RESULT_SUCCESS) { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } if (srcRegion.width % groupSizeX || srcRegion.height % groupSizeY || srcRegion.depth % groupSizeZ) { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } ze_group_count_t functionArgs{srcRegion.width / groupSizeX, srcRegion.height / groupSizeY, srcRegion.depth / groupSizeZ}; kernel->setArgRedescribedImage(0, hSrcImage); kernel->setArgRedescribedImage(1, hDstImage); kernel->setArgumentValue(2, sizeof(srcOffset), &srcOffset); kernel->setArgumentValue(3, sizeof(dstOffset), &dstOffset); return CommandListCoreFamily::appendLaunchKernel(kernel->toHandle(), &functionArgs, hEvent, numWaitEvents, phWaitEvents); } template ze_result_t CommandListCoreFamily::appendImageCopy(ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return this->appendImageCopyRegion(hDstImage, hSrcImage, nullptr, nullptr, hEvent, numWaitEvents, phWaitEvents); } template ze_result_t CommandListCoreFamily::appendMemAdvise(ze_device_handle_t hDevice, const void *ptr, size_t size, ze_memory_advice_t advice) { NEO::MemAdviseFlags flags; flags.memadvise_flags = 0; auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr); if (allocData) { DeviceImp *deviceImp = static_cast((L0::Device::fromHandle(hDevice))); if (deviceImp->memAdviseSharedAllocations.find(allocData) != deviceImp->memAdviseSharedAllocations.end()) { flags = deviceImp->memAdviseSharedAllocations[allocData]; } switch (advice) { case ZE_MEMORY_ADVICE_SET_READ_MOSTLY: flags.read_only = 1; break; case ZE_MEMORY_ADVICE_CLEAR_READ_MOSTLY: flags.read_only = 0; break; case ZE_MEMORY_ADVICE_SET_PREFERRED_LOCATION: flags.device_preferred_location = 1; break; case ZE_MEMORY_ADVICE_CLEAR_PREFERRED_LOCATION: flags.device_preferred_location = 0; break; case ZE_MEMORY_ADVICE_BIAS_CACHED: flags.cached_memory = 1; break; case ZE_MEMORY_ADVICE_BIAS_UNCACHED: flags.cached_memory = 0; break; case ZE_MEMORY_ADVICE_SET_NON_ATOMIC_MOSTLY: case ZE_MEMORY_ADVICE_CLEAR_NON_ATOMIC_MOSTLY: default: break; } auto memoryManager = device->getDriverHandle()->getMemoryManager(); auto pageFaultManager = memoryManager->getPageFaultManager(); if (pageFaultManager) { /* If Read Only and Device Preferred Hints have been cleared, then cpu_migration of Shared memory can be re-enabled*/ if (flags.cpu_migration_blocked) { if (flags.read_only == 0 && flags.device_preferred_location == 0) { pageFaultManager->protectCPUMemoryAccess(const_cast(ptr), size); flags.cpu_migration_blocked = 0; } } /* Given MemAdvise hints, use different gpu Domain Handler for the Page Fault Handling */ pageFaultManager->setGpuDomainHandler(L0::handleGpuDomainTransferForHwWithHints); } auto alloc = allocData->gpuAllocations.getGraphicsAllocation(deviceImp->getRootDeviceIndex()); memoryManager->setMemAdvise(alloc, flags, deviceImp->getRootDeviceIndex()); deviceImp->memAdviseSharedAllocations[allocData] = flags; return ZE_RESULT_SUCCESS; } return ZE_RESULT_ERROR_INVALID_ARGUMENT; } template ze_result_t CommandListCoreFamily::appendLaunchKernelSplit(ze_kernel_handle_t hKernel, const ze_group_count_t *pThreadGroupDimensions, ze_event_handle_t hEvent) { return appendLaunchKernelWithParams(hKernel, pThreadGroupDimensions, nullptr, false, false, false); } template void CommandListCoreFamily::appendEventForProfilingAllWalkers(ze_event_handle_t hEvent, bool beforeWalker) { if (beforeWalker) { appendEventForProfiling(hEvent, true); } else { appendSignalEventPostWalker(hEvent); } } template ze_result_t CommandListCoreFamily::appendMemoryCopyKernelWithGA(void *dstPtr, NEO::GraphicsAllocation *dstPtrAlloc, uint64_t dstOffset, void *srcPtr, NEO::GraphicsAllocation *srcPtrAlloc, uint64_t srcOffset, uint64_t size, uint64_t elementSize, Builtin builtin, ze_event_handle_t hSignalEvent, bool isStateless) { auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership(); Kernel *builtinFunction = nullptr; builtinFunction = device->getBuiltinFunctionsLib()->getFunction(builtin); uint32_t groupSizeX = builtinFunction->getImmutableData() ->getDescriptor() .kernelAttributes.simdSize; uint32_t groupSizeY = 1u; uint32_t groupSizeZ = 1u; if (builtinFunction->setGroupSize(groupSizeX, groupSizeY, groupSizeZ)) { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } builtinFunction->setArgBufferWithAlloc(0u, *reinterpret_cast(dstPtr), dstPtrAlloc); builtinFunction->setArgBufferWithAlloc(1u, *reinterpret_cast(srcPtr), srcPtrAlloc); uint64_t elems = size / elementSize; builtinFunction->setArgumentValue(2, sizeof(elems), &elems); builtinFunction->setArgumentValue(3, sizeof(dstOffset), &dstOffset); builtinFunction->setArgumentValue(4, sizeof(srcOffset), &srcOffset); uint32_t groups = static_cast((size + ((static_cast(groupSizeX) * elementSize) - 1)) / (static_cast(groupSizeX) * elementSize)); ze_group_count_t dispatchFuncArgs{groups, 1u, 1u}; return CommandListCoreFamily::appendLaunchKernelSplit(builtinFunction->toHandle(), &dispatchFuncArgs, hSignalEvent); } template ze_result_t CommandListCoreFamily::appendMemoryCopyBlit(uintptr_t dstPtr, NEO::GraphicsAllocation *dstPtrAlloc, uint64_t dstOffset, uintptr_t srcPtr, NEO::GraphicsAllocation *srcPtrAlloc, uint64_t srcOffset, uint64_t size) { dstOffset += ptrDiff(dstPtr, dstPtrAlloc->getGpuAddress()); srcOffset += ptrDiff(srcPtr, srcPtrAlloc->getGpuAddress()); auto clearColorAllocation = device->getNEODevice()->getDefaultEngine().commandStreamReceiver->getClearColorAllocation(); using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; auto blitProperties = NEO::BlitProperties::constructPropertiesForCopy(dstPtrAlloc, srcPtrAlloc, {dstOffset, 0, 0}, {srcOffset, 0, 0}, {size, 0, 0}, 0, 0, 0, 0, clearColorAllocation); commandContainer.addToResidencyContainer(dstPtrAlloc); commandContainer.addToResidencyContainer(srcPtrAlloc); commandContainer.addToResidencyContainer(clearColorAllocation); NEO::BlitPropertiesContainer blitPropertiesContainer{blitProperties}; NEO::BlitCommandsHelper::dispatchBlitCommandsForBufferPerRow(blitProperties, *commandContainer.getCommandStream(), *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]); return ZE_RESULT_SUCCESS; } template ze_result_t CommandListCoreFamily::appendMemoryCopyBlitRegion(NEO::GraphicsAllocation *srcAlloc, NEO::GraphicsAllocation *dstAlloc, size_t srcOffset, size_t dstOffset, ze_copy_region_t srcRegion, ze_copy_region_t dstRegion, const Vec3 ©Size, size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, const Vec3 &srcSize, const Vec3 &dstSize, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; dstRegion.originX += static_cast(dstOffset); srcRegion.originX += static_cast(srcOffset); uint32_t bytesPerPixel = NEO::BlitCommandsHelper::getAvailableBytesPerPixel(copySize.x, srcRegion.originX, dstRegion.originX, srcSize.x, dstSize.x); Vec3 srcPtrOffset = {srcRegion.originX / bytesPerPixel, srcRegion.originY, srcRegion.originZ}; Vec3 dstPtrOffset = {dstRegion.originX / bytesPerPixel, dstRegion.originY, dstRegion.originZ}; auto clearColorAllocation = device->getNEODevice()->getDefaultEngine().commandStreamReceiver->getClearColorAllocation(); Vec3 copySizeModified = {copySize.x / bytesPerPixel, copySize.y, copySize.z}; auto blitProperties = NEO::BlitProperties::constructPropertiesForCopy(dstAlloc, srcAlloc, dstPtrOffset, srcPtrOffset, copySizeModified, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, clearColorAllocation); commandContainer.addToResidencyContainer(dstAlloc); commandContainer.addToResidencyContainer(srcAlloc); commandContainer.addToResidencyContainer(clearColorAllocation); blitProperties.bytesPerPixel = bytesPerPixel; blitProperties.srcSize = srcSize; blitProperties.dstSize = dstSize; ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents); if (ret) { return ret; } NEO::BlitPropertiesContainer blitPropertiesContainer{blitProperties}; appendEventForProfiling(hSignalEvent, true); bool copyRegionPreferred = NEO::BlitCommandsHelper::isCopyRegionPreferred(copySizeModified, *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]); if (copyRegionPreferred) { NEO::BlitCommandsHelper::dispatchBlitCommandsForBufferRegion(blitProperties, *commandContainer.getCommandStream(), *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]); } else { NEO::BlitCommandsHelper::dispatchBlitCommandsForBufferPerRow(blitProperties, *commandContainer.getCommandStream(), *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]); } appendSignalEventPostWalker(hSignalEvent); return ZE_RESULT_SUCCESS; } template ze_result_t CommandListCoreFamily::appendCopyImageBlit(NEO::GraphicsAllocation *src, NEO::GraphicsAllocation *dst, const Vec3 &srcOffsets, const Vec3 &dstOffsets, size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, size_t bytesPerPixel, const Vec3 ©Size, const Vec3 &srcSize, const Vec3 &dstSize, ze_event_handle_t hSignalEvent) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; auto clearColorAllocation = device->getNEODevice()->getDefaultEngine().commandStreamReceiver->getClearColorAllocation(); auto blitProperties = NEO::BlitProperties::constructPropertiesForCopy(dst, src, dstOffsets, srcOffsets, copySize, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, clearColorAllocation); blitProperties.bytesPerPixel = bytesPerPixel; blitProperties.srcSize = srcSize; blitProperties.dstSize = dstSize; commandContainer.addToResidencyContainer(dst); commandContainer.addToResidencyContainer(src); commandContainer.addToResidencyContainer(clearColorAllocation); appendEventForProfiling(hSignalEvent, true); NEO::BlitCommandsHelper::dispatchBlitCommandsForImageRegion(blitProperties, *commandContainer.getCommandStream(), *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]); appendSignalEventPostWalker(hSignalEvent); return ZE_RESULT_SUCCESS; } template ze_result_t CommandListCoreFamily::appendPageFaultCopy(NEO::GraphicsAllocation *dstAllocation, NEO::GraphicsAllocation *srcAllocation, size_t size, bool flushHost) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; size_t middleElSize = sizeof(uint32_t) * 4; uintptr_t rightSize = size % middleElSize; bool isStateless = false; if (size >= 4ull * MemoryConstants::gigaByte) { isStateless = true; } uintptr_t dstAddress = static_cast(dstAllocation->getGpuAddress()); uintptr_t srcAddress = static_cast(srcAllocation->getGpuAddress()); ze_result_t ret = ZE_RESULT_ERROR_UNKNOWN; if (isCopyOnly()) { return appendMemoryCopyBlit(dstAddress, dstAllocation, 0u, srcAddress, srcAllocation, 0u, size); } else { ret = appendMemoryCopyKernelWithGA(reinterpret_cast(&dstAddress), dstAllocation, 0, reinterpret_cast(&srcAddress), srcAllocation, 0, size - rightSize, middleElSize, Builtin::CopyBufferToBufferMiddle, nullptr, isStateless); if (ret == ZE_RESULT_SUCCESS && rightSize) { ret = appendMemoryCopyKernelWithGA(reinterpret_cast(&dstAddress), dstAllocation, size - rightSize, reinterpret_cast(&srcAddress), srcAllocation, size - rightSize, rightSize, 1UL, Builtin::CopyBufferToBufferSide, nullptr, isStateless); } const auto &hwInfo = this->device->getHwInfo(); if (NEO::MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo)) { if (flushHost) { NEO::PipeControlArgs args; args.dcFlushEnable = true; NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); } } } return ret; } template ze_result_t CommandListCoreFamily::appendMemoryCopy(void *dstptr, const void *srcptr, size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; uintptr_t start = reinterpret_cast(dstptr); bool isStateless = false; NEO::Device *neoDevice = device->getNEODevice(); uint32_t callId = 0; if (NEO::DebugManager.flags.EnableSWTags.get()) { neoDevice->getRootDeviceEnvironment().tagsManager->insertTag( *commandContainer.getCommandStream(), *neoDevice, "zeCommandListAppendMemoryCopy", ++neoDevice->getRootDeviceEnvironment().tagsManager->currentCallCount); callId = neoDevice->getRootDeviceEnvironment().tagsManager->currentCallCount; } size_t middleAlignment = MemoryConstants::cacheLineSize; size_t middleElSize = sizeof(uint32_t) * 4; uintptr_t leftSize = start % middleAlignment; leftSize = (leftSize > 0) ? (middleAlignment - leftSize) : 0; leftSize = std::min(leftSize, size); uintptr_t rightSize = (start + size) % middleAlignment; rightSize = std::min(rightSize, size - leftSize); uintptr_t middleSizeBytes = size - leftSize - rightSize; if (!isAligned<4>(reinterpret_cast(srcptr) + leftSize)) { leftSize += middleSizeBytes; middleSizeBytes = 0; } DEBUG_BREAK_IF(size != leftSize + middleSizeBytes + rightSize); auto dstAllocationStruct = getAlignedAllocation(this->device, dstptr, size, false); auto srcAllocationStruct = getAlignedAllocation(this->device, srcptr, size, true); if (dstAllocationStruct.alloc == nullptr || srcAllocationStruct.alloc == nullptr) { return ZE_RESULT_ERROR_UNKNOWN; } if (size >= 4ull * MemoryConstants::gigaByte) { isStateless = true; } ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents); if (ret) { return ret; } appendEventForProfilingAllWalkers(hSignalEvent, true); if (ret == ZE_RESULT_SUCCESS && leftSize) { Builtin func = Builtin::CopyBufferToBufferSide; if (isStateless) { func = Builtin::CopyBufferToBufferSideStateless; } ret = isCopyOnly() ? appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr, dstAllocationStruct.alloc, dstAllocationStruct.offset, srcAllocationStruct.alignedAllocationPtr, srcAllocationStruct.alloc, srcAllocationStruct.offset, leftSize) : appendMemoryCopyKernelWithGA(reinterpret_cast(&dstAllocationStruct.alignedAllocationPtr), dstAllocationStruct.alloc, dstAllocationStruct.offset, reinterpret_cast(&srcAllocationStruct.alignedAllocationPtr), srcAllocationStruct.alloc, srcAllocationStruct.offset, leftSize, 1UL, func, hSignalEvent, isStateless); } if (ret == ZE_RESULT_SUCCESS && middleSizeBytes) { Builtin func = Builtin::CopyBufferToBufferMiddle; if (isStateless) { func = Builtin::CopyBufferToBufferMiddleStateless; } ret = isCopyOnly() ? appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr, dstAllocationStruct.alloc, leftSize + dstAllocationStruct.offset, srcAllocationStruct.alignedAllocationPtr, srcAllocationStruct.alloc, leftSize + srcAllocationStruct.offset, middleSizeBytes) : appendMemoryCopyKernelWithGA(reinterpret_cast(&dstAllocationStruct.alignedAllocationPtr), dstAllocationStruct.alloc, leftSize + dstAllocationStruct.offset, reinterpret_cast(&srcAllocationStruct.alignedAllocationPtr), srcAllocationStruct.alloc, leftSize + srcAllocationStruct.offset, middleSizeBytes, middleElSize, func, hSignalEvent, isStateless); } if (ret == ZE_RESULT_SUCCESS && rightSize) { Builtin func = Builtin::CopyBufferToBufferSide; if (isStateless) { func = Builtin::CopyBufferToBufferSideStateless; } ret = isCopyOnly() ? appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr, dstAllocationStruct.alloc, leftSize + middleSizeBytes + dstAllocationStruct.offset, srcAllocationStruct.alignedAllocationPtr, srcAllocationStruct.alloc, leftSize + middleSizeBytes + srcAllocationStruct.offset, rightSize) : appendMemoryCopyKernelWithGA(reinterpret_cast(&dstAllocationStruct.alignedAllocationPtr), dstAllocationStruct.alloc, leftSize + middleSizeBytes + dstAllocationStruct.offset, reinterpret_cast(&srcAllocationStruct.alignedAllocationPtr), srcAllocationStruct.alloc, leftSize + middleSizeBytes + srcAllocationStruct.offset, rightSize, 1UL, func, hSignalEvent, isStateless); } appendEventForProfilingAllWalkers(hSignalEvent, false); const auto &hwInfo = this->device->getHwInfo(); if (NEO::MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo)) { auto event = Event::fromHandle(hSignalEvent); if (event) { dstAllocationStruct.needsFlush &= !event->signalScope; } if (dstAllocationStruct.needsFlush && !isCopyOnly()) { NEO::PipeControlArgs args; args.dcFlushEnable = true; NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); } } if (NEO::DebugManager.flags.EnableSWTags.get()) { neoDevice->getRootDeviceEnvironment().tagsManager->insertTag( *commandContainer.getCommandStream(), *neoDevice, "zeCommandListAppendMemoryCopy", callId); } return ret; } template ze_result_t CommandListCoreFamily::appendMemoryCopyRegion(void *dstPtr, const ze_copy_region_t *dstRegion, uint32_t dstPitch, uint32_t dstSlicePitch, const void *srcPtr, const ze_copy_region_t *srcRegion, uint32_t srcPitch, uint32_t srcSlicePitch, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { NEO::Device *neoDevice = device->getNEODevice(); uint32_t callId = 0; if (NEO::DebugManager.flags.EnableSWTags.get()) { neoDevice->getRootDeviceEnvironment().tagsManager->insertTag( *commandContainer.getCommandStream(), *neoDevice, "zeCommandListAppendMemoryCopyRegion", ++neoDevice->getRootDeviceEnvironment().tagsManager->currentCallCount); callId = neoDevice->getRootDeviceEnvironment().tagsManager->currentCallCount; } size_t dstSize = 0; size_t srcSize = 0; if (srcRegion->depth > 1) { uint32_t hostPtrDstOffset = dstRegion->originX + ((dstRegion->originY) * dstPitch) + ((dstRegion->originZ) * dstSlicePitch); uint32_t hostPtrSrcOffset = srcRegion->originX + ((srcRegion->originY) * srcPitch) + ((srcRegion->originZ) * srcSlicePitch); dstSize = (dstRegion->width * dstRegion->height * dstRegion->depth) + hostPtrDstOffset; srcSize = (srcRegion->width * srcRegion->height * srcRegion->depth) + hostPtrSrcOffset; } else { uint32_t hostPtrDstOffset = dstRegion->originX + ((dstRegion->originY) * dstPitch); uint32_t hostPtrSrcOffset = srcRegion->originX + ((srcRegion->originY) * srcPitch); dstSize = (dstRegion->width * dstRegion->height) + hostPtrDstOffset; srcSize = (srcRegion->width * srcRegion->height) + hostPtrSrcOffset; } auto dstAllocationStruct = getAlignedAllocation(this->device, dstPtr, dstSize, false); auto srcAllocationStruct = getAlignedAllocation(this->device, srcPtr, srcSize, true); dstSize += dstAllocationStruct.offset; srcSize += srcAllocationStruct.offset; Vec3 srcSize3 = {srcPitch ? srcPitch : srcRegion->width + srcRegion->originX, srcSlicePitch ? srcSlicePitch / srcPitch : srcRegion->height + srcRegion->originY, srcRegion->depth + srcRegion->originZ}; Vec3 dstSize3 = {dstPitch ? dstPitch : dstRegion->width + dstRegion->originX, dstSlicePitch ? dstSlicePitch / dstPitch : dstRegion->height + dstRegion->originY, dstRegion->depth + dstRegion->originZ}; ze_result_t result = ZE_RESULT_SUCCESS; if (srcRegion->depth > 1) { result = isCopyOnly() ? appendMemoryCopyBlitRegion(srcAllocationStruct.alloc, dstAllocationStruct.alloc, srcAllocationStruct.offset, dstAllocationStruct.offset, *srcRegion, *dstRegion, {srcRegion->width, srcRegion->height, srcRegion->depth}, srcPitch, srcSlicePitch, dstPitch, dstSlicePitch, srcSize3, dstSize3, hSignalEvent, numWaitEvents, phWaitEvents) : this->appendMemoryCopyKernel3d(&dstAllocationStruct, &srcAllocationStruct, Builtin::CopyBufferRectBytes3d, dstRegion, dstPitch, dstSlicePitch, dstAllocationStruct.offset, srcRegion, srcPitch, srcSlicePitch, srcAllocationStruct.offset, hSignalEvent, numWaitEvents, phWaitEvents); } else { result = isCopyOnly() ? appendMemoryCopyBlitRegion(srcAllocationStruct.alloc, dstAllocationStruct.alloc, srcAllocationStruct.offset, dstAllocationStruct.offset, *srcRegion, *dstRegion, {srcRegion->width, srcRegion->height, srcRegion->depth}, srcPitch, srcSlicePitch, dstPitch, dstSlicePitch, srcSize3, dstSize3, hSignalEvent, numWaitEvents, phWaitEvents) : this->appendMemoryCopyKernel2d(&dstAllocationStruct, &srcAllocationStruct, Builtin::CopyBufferRectBytes2d, dstRegion, dstPitch, dstAllocationStruct.offset, srcRegion, srcPitch, srcAllocationStruct.offset, hSignalEvent, numWaitEvents, phWaitEvents); } if (result) { return result; } const auto &hwInfo = this->device->getHwInfo(); if (NEO::MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo)) { auto event = Event::fromHandle(hSignalEvent); if (event) { dstAllocationStruct.needsFlush &= !event->signalScope; } if (dstAllocationStruct.needsFlush && !isCopyOnly()) { NEO::PipeControlArgs args; args.dcFlushEnable = true; NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); } } if (NEO::DebugManager.flags.EnableSWTags.get()) { neoDevice->getRootDeviceEnvironment().tagsManager->insertTag( *commandContainer.getCommandStream(), *neoDevice, "zeCommandListAppendMemoryCopyRegion", callId); } return ZE_RESULT_SUCCESS; } template ze_result_t CommandListCoreFamily::appendMemoryCopyKernel3d(AlignedAllocationData *dstAlignedAllocation, AlignedAllocationData *srcAlignedAllocation, Builtin builtin, const ze_copy_region_t *dstRegion, uint32_t dstPitch, uint32_t dstSlicePitch, size_t dstOffset, const ze_copy_region_t *srcRegion, uint32_t srcPitch, uint32_t srcSlicePitch, size_t srcOffset, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership(); auto builtinFunction = device->getBuiltinFunctionsLib()->getFunction(builtin); uint32_t groupSizeX = srcRegion->width; uint32_t groupSizeY = srcRegion->height; uint32_t groupSizeZ = srcRegion->depth; if (builtinFunction->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, &groupSizeY, &groupSizeZ) != ZE_RESULT_SUCCESS) { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } if (builtinFunction->setGroupSize(groupSizeX, groupSizeY, groupSizeZ) != ZE_RESULT_SUCCESS) { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } if (srcRegion->width % groupSizeX || srcRegion->height % groupSizeY || srcRegion->depth % groupSizeZ) { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } ze_group_count_t dispatchFuncArgs{srcRegion->width / groupSizeX, srcRegion->height / groupSizeY, srcRegion->depth / groupSizeZ}; uint32_t srcOrigin[3] = {(srcRegion->originX + static_cast(srcOffset)), (srcRegion->originY), (srcRegion->originZ)}; uint32_t dstOrigin[3] = {(dstRegion->originX + static_cast(dstOffset)), (dstRegion->originY), (dstRegion->originZ)}; uint32_t srcPitches[2] = {(srcPitch), (srcSlicePitch)}; uint32_t dstPitches[2] = {(dstPitch), (dstSlicePitch)}; builtinFunction->setArgBufferWithAlloc(0, srcAlignedAllocation->alignedAllocationPtr, srcAlignedAllocation->alloc); builtinFunction->setArgBufferWithAlloc(1, dstAlignedAllocation->alignedAllocationPtr, dstAlignedAllocation->alloc); builtinFunction->setArgumentValue(2, sizeof(srcOrigin), &srcOrigin); builtinFunction->setArgumentValue(3, sizeof(dstOrigin), &dstOrigin); builtinFunction->setArgumentValue(4, sizeof(srcPitches), &srcPitches); builtinFunction->setArgumentValue(5, sizeof(dstPitches), &dstPitches); return CommandListCoreFamily::appendLaunchKernel(builtinFunction->toHandle(), &dispatchFuncArgs, hSignalEvent, numWaitEvents, phWaitEvents); } template ze_result_t CommandListCoreFamily::appendMemoryCopyKernel2d(AlignedAllocationData *dstAlignedAllocation, AlignedAllocationData *srcAlignedAllocation, Builtin builtin, const ze_copy_region_t *dstRegion, uint32_t dstPitch, size_t dstOffset, const ze_copy_region_t *srcRegion, uint32_t srcPitch, size_t srcOffset, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership(); auto builtinFunction = device->getBuiltinFunctionsLib()->getFunction(builtin); uint32_t groupSizeX = srcRegion->width; uint32_t groupSizeY = srcRegion->height; uint32_t groupSizeZ = 1u; if (builtinFunction->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, &groupSizeY, &groupSizeZ) != ZE_RESULT_SUCCESS) { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } if (builtinFunction->setGroupSize(groupSizeX, groupSizeY, groupSizeZ) != ZE_RESULT_SUCCESS) { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } if (srcRegion->width % groupSizeX || srcRegion->height % groupSizeY) { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } ze_group_count_t dispatchFuncArgs{srcRegion->width / groupSizeX, srcRegion->height / groupSizeY, 1u}; uint32_t srcOrigin[2] = {(srcRegion->originX + static_cast(srcOffset)), (srcRegion->originY)}; uint32_t dstOrigin[2] = {(dstRegion->originX + static_cast(dstOffset)), (dstRegion->originY)}; builtinFunction->setArgBufferWithAlloc(0, srcAlignedAllocation->alignedAllocationPtr, srcAlignedAllocation->alloc); builtinFunction->setArgBufferWithAlloc(1, dstAlignedAllocation->alignedAllocationPtr, dstAlignedAllocation->alloc); builtinFunction->setArgumentValue(2, sizeof(srcOrigin), &srcOrigin); builtinFunction->setArgumentValue(3, sizeof(dstOrigin), &dstOrigin); builtinFunction->setArgumentValue(4, sizeof(srcPitch), &srcPitch); builtinFunction->setArgumentValue(5, sizeof(dstPitch), &dstPitch); return CommandListCoreFamily::appendLaunchKernel(builtinFunction->toHandle(), &dispatchFuncArgs, hSignalEvent, numWaitEvents, phWaitEvents); } template ze_result_t CommandListCoreFamily::appendMemoryPrefetch(const void *ptr, size_t count) { auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr); if (allocData) { return ZE_RESULT_SUCCESS; } return ZE_RESULT_ERROR_INVALID_ARGUMENT; } template ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, const void *pattern, size_t patternSize, size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { bool isStateless = false; NEO::Device *neoDevice = device->getNEODevice(); uint32_t callId = 0; if (NEO::DebugManager.flags.EnableSWTags.get()) { neoDevice->getRootDeviceEnvironment().tagsManager->insertTag( *commandContainer.getCommandStream(), *neoDevice, "zeCommandListAppendMemoryFill", ++neoDevice->getRootDeviceEnvironment().tagsManager->currentCallCount); callId = neoDevice->getRootDeviceEnvironment().tagsManager->currentCallCount; } if (isCopyOnly()) { return appendBlitFill(ptr, pattern, patternSize, size, hSignalEvent, numWaitEvents, phWaitEvents); } ze_result_t res = addEventsToCmdList(numWaitEvents, phWaitEvents); if (res) { return res; } using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; bool hostPointerNeedsFlush = false; NEO::SvmAllocationData *allocData = nullptr; bool dstAllocFound = device->getDriverHandle()->findAllocationDataForRange(ptr, size, &allocData); if (dstAllocFound) { if (allocData->memoryType == InternalMemoryType::HOST_UNIFIED_MEMORY || allocData->memoryType == InternalMemoryType::SHARED_UNIFIED_MEMORY) { hostPointerNeedsFlush = true; } } else { if (device->getDriverHandle()->getHostPointerBaseAddress(ptr, nullptr) != ZE_RESULT_SUCCESS) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } else { hostPointerNeedsFlush = true; } } auto dstAllocation = this->getAlignedAllocation(this->device, ptr, size, false); if (size >= 4ull * MemoryConstants::gigaByte) { isStateless = true; } auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership(); if (patternSize == 1) { Kernel *builtinFunction = nullptr; if (isStateless) { builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediateStateless); } else { builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediate); } uint32_t groupSizeX = builtinFunction->getImmutableData()->getDescriptor().kernelAttributes.simdSize; if (groupSizeX > static_cast(size)) { groupSizeX = static_cast(size); } if (builtinFunction->setGroupSize(groupSizeX, 1u, 1u)) { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } uint32_t value = *(reinterpret_cast(const_cast(pattern))); builtinFunction->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc); builtinFunction->setArgumentValue(1, sizeof(dstAllocation.offset), &dstAllocation.offset); builtinFunction->setArgumentValue(2, sizeof(value), &value); appendEventForProfilingAllWalkers(hSignalEvent, true); uint32_t groups = static_cast(size) / groupSizeX; ze_group_count_t dispatchFuncArgs{groups, 1u, 1u}; res = appendLaunchKernelSplit(builtinFunction->toHandle(), &dispatchFuncArgs, hSignalEvent); if (res) { return res; } uint32_t groupRemainderSizeX = static_cast(size) % groupSizeX; if (groupRemainderSizeX) { builtinFunction->setGroupSize(groupRemainderSizeX, 1u, 1u); ze_group_count_t dispatchFuncRemainderArgs{1u, 1u, 1u}; size_t dstOffset = dstAllocation.offset + (size - groupRemainderSizeX); builtinFunction->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc); builtinFunction->setArgumentValue(1, sizeof(dstOffset), &dstOffset); res = appendLaunchKernelSplit(builtinFunction->toHandle(), &dispatchFuncRemainderArgs, hSignalEvent); if (res) { return res; } } } else { Kernel *builtinFunction = nullptr; if (isStateless) { builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferMiddleStateless); } else { builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferMiddle); } size_t middleElSize = sizeof(uint32_t); size_t adjustedSize = size / middleElSize; uint32_t groupSizeX = static_cast(adjustedSize); uint32_t groupSizeY = 1, groupSizeZ = 1; builtinFunction->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, &groupSizeY, &groupSizeZ); builtinFunction->setGroupSize(groupSizeX, groupSizeY, groupSizeZ); uint32_t groups = static_cast(adjustedSize) / groupSizeX; uint32_t groupRemainderSizeX = static_cast(size) % groupSizeX; size_t patternAllocationSize = alignUp(patternSize, MemoryConstants::cacheLineSize); uint32_t patternSizeInEls = static_cast(patternAllocationSize / middleElSize); auto patternGfxAlloc = device->obtainReusableAllocation(patternAllocationSize, NEO::AllocationType::FILL_PATTERN); if (patternGfxAlloc == nullptr) { patternGfxAlloc = device->getDriverHandle()->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getNEODevice()->getRootDeviceIndex(), patternAllocationSize, NEO::AllocationType::FILL_PATTERN, device->getNEODevice()->getDeviceBitfield()}); } void *patternGfxAllocPtr = patternGfxAlloc->getUnderlyingBuffer(); patternAllocations.push_back(patternGfxAlloc); uint64_t patternAllocPtr = reinterpret_cast(patternGfxAllocPtr); uint64_t patternAllocOffset = 0; uint64_t patternSizeToCopy = patternSize; do { memcpy_s(reinterpret_cast(patternAllocPtr + patternAllocOffset), patternSizeToCopy, pattern, patternSizeToCopy); if ((patternAllocOffset + patternSizeToCopy) > patternAllocationSize) { patternSizeToCopy = patternAllocationSize - patternAllocOffset; } patternAllocOffset += patternSizeToCopy; } while (patternAllocOffset < patternAllocationSize); builtinFunction->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc); builtinFunction->setArgumentValue(1, sizeof(dstAllocation.offset), &dstAllocation.offset); builtinFunction->setArgBufferWithAlloc(2, reinterpret_cast(patternGfxAllocPtr), patternGfxAlloc); builtinFunction->setArgumentValue(3, sizeof(patternSizeInEls), &patternSizeInEls); appendEventForProfilingAllWalkers(hSignalEvent, true); ze_group_count_t dispatchFuncArgs{groups, 1u, 1u}; res = appendLaunchKernelSplit(builtinFunction->toHandle(), &dispatchFuncArgs, hSignalEvent); if (res) { return res; } if (groupRemainderSizeX) { uint32_t dstOffsetRemainder = groups * groupSizeX * static_cast(middleElSize); uint64_t patternOffsetRemainder = (groupSizeX * groups & (patternSizeInEls - 1)) * middleElSize; Kernel *builtinFunctionRemainder; if (isStateless) { builtinFunctionRemainder = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferRightLeftoverStateless); } else { builtinFunctionRemainder = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferRightLeftover); } builtinFunctionRemainder->setGroupSize(groupRemainderSizeX, 1u, 1u); ze_group_count_t dispatchFuncArgs{1u, 1u, 1u}; builtinFunctionRemainder->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc); builtinFunctionRemainder->setArgumentValue(1, sizeof(dstOffsetRemainder), &dstOffsetRemainder); builtinFunctionRemainder->setArgBufferWithAlloc(2, reinterpret_cast(patternGfxAllocPtr) + patternOffsetRemainder, patternGfxAlloc); builtinFunctionRemainder->setArgumentValue(3, sizeof(patternSizeInEls), &patternSizeInEls); res = appendLaunchKernelSplit(builtinFunctionRemainder->toHandle(), &dispatchFuncArgs, hSignalEvent); if (res) { return res; } } } appendEventForProfilingAllWalkers(hSignalEvent, false); const auto &hwInfo = this->device->getHwInfo(); if (NEO::MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo)) { auto event = Event::fromHandle(hSignalEvent); if (event) { hostPointerNeedsFlush &= !event->signalScope; } if (hostPointerNeedsFlush) { NEO::PipeControlArgs args; args.dcFlushEnable = true; NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); } } if (NEO::DebugManager.flags.EnableSWTags.get()) { neoDevice->getRootDeviceEnvironment().tagsManager->insertTag( *commandContainer.getCommandStream(), *neoDevice, "zeCommandListAppendMemoryFill", callId); } return res; } template ze_result_t CommandListCoreFamily::appendBlitFill(void *ptr, const void *pattern, size_t patternSize, size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { auto neoDevice = device->getNEODevice(); if (NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily).getMaxFillPaternSizeForCopyEngine() < patternSize) { return ZE_RESULT_ERROR_INVALID_SIZE; } else { ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents); if (ret) { return ret; } appendEventForProfiling(hSignalEvent, true); NEO::GraphicsAllocation *gpuAllocation = device->getDriverHandle()->getDriverSystemMemoryAllocation(ptr, size, neoDevice->getRootDeviceIndex(), nullptr); DriverHandleImp *driverHandle = static_cast(device->getDriverHandle()); auto allocData = driverHandle->getSvmAllocsManager()->getSVMAlloc(ptr); if (driverHandle->isRemoteResourceNeeded(ptr, gpuAllocation, allocData, device)) { if (allocData) { uint64_t pbase = allocData->gpuAllocations.getDefaultGraphicsAllocation()->getGpuAddress(); gpuAllocation = driverHandle->getPeerAllocation(device, allocData, reinterpret_cast(pbase), nullptr); } if (gpuAllocation == nullptr) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } } uint64_t offset = reinterpret_cast(static_cast(ptr)) - static_cast(gpuAllocation->getGpuAddress()); commandContainer.addToResidencyContainer(gpuAllocation); uint32_t patternToCommand[4] = {}; memcpy_s(&patternToCommand, sizeof(patternToCommand), pattern, patternSize); NEO::BlitCommandsHelper::dispatchBlitMemoryColorFill(gpuAllocation, offset, patternToCommand, patternSize, *commandContainer.getCommandStream(), size, *neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]); appendSignalEventPostWalker(hSignalEvent); } return ZE_RESULT_SUCCESS; } template void CommandListCoreFamily::appendSignalEventPostWalker(ze_event_handle_t hEvent) { if (hEvent == nullptr) { return; } auto event = Event::fromHandle(hEvent); if (event->isEventTimestampFlagSet()) { appendEventForProfiling(hEvent, false); } else { using POST_SYNC_OPERATION = typename GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION; using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; commandContainer.addToResidencyContainer(&event->getAllocation(this->device)); uint64_t baseAddr = event->getGpuAddress(this->device); const auto &hwInfo = this->device->getHwInfo(); if (isCopyOnly()) { NEO::MiFlushArgs args; args.commandWithPostSync = true; NEO::EncodeMiFlushDW::programMiFlushDw(*commandContainer.getCommandStream(), baseAddr, Event::STATE_SIGNALED, args, hwInfo); } else { NEO::PipeControlArgs args; args.dcFlushEnable = NEO::MemorySynchronizationCommands::getDcFlushEnable(event->signalScope, hwInfo); if (this->partitionCount > 1) { args.workloadPartitionOffset = true; event->setPacketsInUse(this->partitionCount); event->setPartitionedEvent(true); baseAddr += event->getContextEndOffset(); } NEO::MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( *commandContainer.getCommandStream(), POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, baseAddr, Event::STATE_SIGNALED, hwInfo, args); } } } template void CommandListCoreFamily::appendEventForProfilingCopyCommand(ze_event_handle_t hEvent, bool beforeWalker) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; auto event = Event::fromHandle(hEvent); if (!event->isEventTimestampFlagSet()) { return; } commandContainer.addToResidencyContainer(&event->getAllocation(this->device)); const auto &hwInfo = this->device->getHwInfo(); if (!beforeWalker) { NEO::MiFlushArgs args; NEO::EncodeMiFlushDW::programMiFlushDw(*commandContainer.getCommandStream(), 0, 0, args, hwInfo); } appendWriteKernelTimestamp(hEvent, beforeWalker, false); } template inline uint64_t CommandListCoreFamily::getInputBufferSize(NEO::ImageType imageType, uint64_t bytesPerPixel, const ze_image_region_t *region) { switch (imageType) { default: UNRECOVERABLE_IF(true); case NEO::ImageType::Image1D: case NEO::ImageType::Image1DArray: return bytesPerPixel * region->width; case NEO::ImageType::Image2D: case NEO::ImageType::Image2DArray: return bytesPerPixel * region->width * region->height; case NEO::ImageType::Image3D: return bytesPerPixel * region->width * region->height * region->depth; } } template inline AlignedAllocationData CommandListCoreFamily::getAlignedAllocation(Device *device, const void *buffer, uint64_t bufferSize, bool hostCopyAllowed) { NEO::SvmAllocationData *allocData = nullptr; void *ptr = const_cast(buffer); bool srcAllocFound = device->getDriverHandle()->findAllocationDataForRange(ptr, bufferSize, &allocData); NEO::GraphicsAllocation *alloc = nullptr; uintptr_t sourcePtr = reinterpret_cast(ptr); size_t offset = 0; NEO::EncodeSurfaceState::getSshAlignedPointer(sourcePtr, offset); uintptr_t alignedPtr = 0u; bool hostPointerNeedsFlush = false; if (srcAllocFound == false) { alloc = device->getDriverHandle()->findHostPointerAllocation(ptr, static_cast(bufferSize), device->getRootDeviceIndex()); if (alloc != nullptr) { alignedPtr = static_cast(alignDown(alloc->getGpuAddress(), NEO::EncodeSurfaceState::getSurfaceBaseAddressAlignment())); //get offset from GPUVA of allocation to align down GPU address offset = static_cast(alloc->getGpuAddress()) - alignedPtr; //get offset from base of allocation to arg address offset += reinterpret_cast(ptr) - reinterpret_cast(alloc->getUnderlyingBuffer()); } else { alloc = getHostPtrAlloc(buffer, bufferSize, hostCopyAllowed); alignedPtr = static_cast(alignDown(alloc->getGpuAddress(), NEO::EncodeSurfaceState::getSurfaceBaseAddressAlignment())); if (alloc->getAllocationType() == NEO::AllocationType::EXTERNAL_HOST_PTR) { auto hostAllocCpuPtr = reinterpret_cast(alloc->getUnderlyingBuffer()); hostAllocCpuPtr = alignDown(hostAllocCpuPtr, NEO::EncodeSurfaceState::getSurfaceBaseAddressAlignment()); auto allignedPtrOffset = sourcePtr - hostAllocCpuPtr; alignedPtr = ptrOffset(alignedPtr, allignedPtrOffset); } } hostPointerNeedsFlush = true; } else { alloc = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); DeviceImp *deviceImp = static_cast(device); DriverHandleImp *driverHandle = static_cast(deviceImp->getDriverHandle()); if (driverHandle->isRemoteResourceNeeded(const_cast(buffer), alloc, allocData, device)) { uint64_t pbase = allocData->gpuAllocations.getDefaultGraphicsAllocation()->getGpuAddress(); uint64_t offset = sourcePtr - pbase; alloc = driverHandle->getPeerAllocation(device, allocData, reinterpret_cast(pbase), &alignedPtr); alignedPtr += offset; } else { alignedPtr = sourcePtr; } if (allocData->memoryType == InternalMemoryType::HOST_UNIFIED_MEMORY || allocData->memoryType == InternalMemoryType::SHARED_UNIFIED_MEMORY) { hostPointerNeedsFlush = true; } } return {alignedPtr, offset, alloc, hostPointerNeedsFlush}; } template inline ze_result_t CommandListCoreFamily::addEventsToCmdList(uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { if (numWaitEvents > 0) { if (phWaitEvents) { CommandListCoreFamily::appendWaitOnEvents(numWaitEvents, phWaitEvents); } else { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } } return ZE_RESULT_SUCCESS; } template ze_result_t CommandListCoreFamily::appendSignalEvent(ze_event_handle_t hEvent) { using POST_SYNC_OPERATION = typename GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION; using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; auto event = Event::fromHandle(hEvent); commandContainer.addToResidencyContainer(&event->getAllocation(this->device)); uint64_t baseAddr = event->getGpuAddress(this->device); NEO::Device *neoDevice = device->getNEODevice(); uint32_t callId = 0; if (NEO::DebugManager.flags.EnableSWTags.get()) { neoDevice->getRootDeviceEnvironment().tagsManager->insertTag( *commandContainer.getCommandStream(), *neoDevice, "zeCommandListAppendSignalEvent", ++neoDevice->getRootDeviceEnvironment().tagsManager->currentCallCount); callId = neoDevice->getRootDeviceEnvironment().tagsManager->currentCallCount; } size_t eventSignalOffset = 0; if (this->partitionCount > 1) { event->setPartitionedEvent(true); event->setPacketsInUse(this->partitionCount); } if (event->useContextEndOffset()) { eventSignalOffset = event->getContextEndOffset(); } const auto &hwInfo = this->device->getHwInfo(); if (isCopyOnly()) { NEO::MiFlushArgs args; args.commandWithPostSync = true; NEO::EncodeMiFlushDW::programMiFlushDw(*commandContainer.getCommandStream(), ptrOffset(baseAddr, eventSignalOffset), Event::STATE_SIGNALED, args, hwInfo); } else { NEO::PipeControlArgs args; bool applyScope = event->signalScope; args.dcFlushEnable = NEO::MemorySynchronizationCommands::getDcFlushEnable(applyScope, hwInfo); args.workloadPartitionOffset = event->isPartitionedEvent(); if (applyScope || event->isEventTimestampFlagSet()) { NEO::MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( *commandContainer.getCommandStream(), POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, ptrOffset(baseAddr, eventSignalOffset), Event::STATE_SIGNALED, hwInfo, args); } else { NEO::EncodeStoreMemory::programStoreDataImm( *commandContainer.getCommandStream(), ptrOffset(baseAddr, eventSignalOffset), Event::STATE_SIGNALED, 0u, false, args.workloadPartitionOffset); } } if (NEO::DebugManager.flags.EnableSWTags.get()) { neoDevice->getRootDeviceEnvironment().tagsManager->insertTag( *commandContainer.getCommandStream(), *neoDevice, "zeCommandListAppendSignalEvent", callId); } return ZE_RESULT_SUCCESS; } template ze_result_t CommandListCoreFamily::appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT; using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION; NEO::Device *neoDevice = device->getNEODevice(); uint32_t callId = 0; if (NEO::DebugManager.flags.EnableSWTags.get()) { neoDevice->getRootDeviceEnvironment().tagsManager->insertTag( *commandContainer.getCommandStream(), *neoDevice, "zeCommandListAppendWaitOnEvents", ++neoDevice->getRootDeviceEnvironment().tagsManager->currentCallCount); callId = neoDevice->getRootDeviceEnvironment().tagsManager->currentCallCount; } uint64_t gpuAddr = 0; constexpr uint32_t eventStateClear = Event::State::STATE_CLEARED; bool dcFlushRequired = false; const auto &hwInfo = this->device->getHwInfo(); if (NEO::MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo)) { for (uint32_t i = 0; i < numEvents; i++) { auto event = Event::fromHandle(phEvent[i]); dcFlushRequired |= !!event->waitScope; } } if (dcFlushRequired) { if (isCopyOnly()) { NEO::MiFlushArgs args; NEO::EncodeMiFlushDW::programMiFlushDw(*commandContainer.getCommandStream(), 0, 0, args, hwInfo); } else { NEO::PipeControlArgs args; args.dcFlushEnable = true; NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); } } for (uint32_t i = 0; i < numEvents; i++) { auto event = Event::fromHandle(phEvent[i]); commandContainer.addToResidencyContainer(&event->getAllocation(this->device)); gpuAddr = event->getGpuAddress(this->device); uint32_t packetsToWait = event->getPacketsInUse(); if (event->useContextEndOffset()) { gpuAddr += event->getContextEndOffset(); } for (uint32_t i = 0u; i < packetsToWait; i++) { NEO::EncodeSempahore::addMiSemaphoreWaitCommand(*commandContainer.getCommandStream(), gpuAddr, eventStateClear, COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD); gpuAddr += event->getSinglePacketSize(); } } if (NEO::DebugManager.flags.EnableSWTags.get()) { neoDevice->getRootDeviceEnvironment().tagsManager->insertTag( *commandContainer.getCommandStream(), *neoDevice, "zeCommandListAppendWaitOnEvents", callId); } return ZE_RESULT_SUCCESS; } template ze_result_t CommandListCoreFamily::programSyncBuffer(Kernel &kernel, NEO::Device &device, const ze_group_count_t *pThreadGroupDimensions) { uint32_t maximalNumberOfWorkgroupsAllowed; auto ret = kernel.suggestMaxCooperativeGroupCount(&maximalNumberOfWorkgroupsAllowed, this->engineGroupType, device.isEngineInstanced()); UNRECOVERABLE_IF(ret != ZE_RESULT_SUCCESS); size_t requestedNumberOfWorkgroups = (pThreadGroupDimensions->groupCountX * pThreadGroupDimensions->groupCountY * pThreadGroupDimensions->groupCountZ); if (requestedNumberOfWorkgroups > maximalNumberOfWorkgroupsAllowed) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } device.allocateSyncBufferHandler(); device.syncBufferHandler->prepareForEnqueue(requestedNumberOfWorkgroups, kernel); return ZE_RESULT_SUCCESS; } template void CommandListCoreFamily::appendWriteKernelTimestamp(ze_event_handle_t hEvent, bool beforeWalker, bool maskLsb) { constexpr uint32_t mask = 0xfffffffe; auto event = Event::fromHandle(hEvent); auto baseAddr = event->getGpuAddress(this->device); auto contextOffset = beforeWalker ? event->getContextStartOffset() : event->getContextEndOffset(); auto globalOffset = beforeWalker ? event->getGlobalStartOffset() : event->getGlobalEndOffset(); uint64_t globalAddress = ptrOffset(baseAddr, globalOffset); uint64_t contextAddress = ptrOffset(baseAddr, contextOffset); if (maskLsb) { NEO::EncodeMathMMIO::encodeBitwiseAndVal(commandContainer, REG_GLOBAL_TIMESTAMP_LDW, mask, globalAddress); NEO::EncodeMathMMIO::encodeBitwiseAndVal(commandContainer, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, mask, contextAddress); } else { NEO::EncodeStoreMMIO::encode(*commandContainer.getCommandStream(), REG_GLOBAL_TIMESTAMP_LDW, globalAddress); NEO::EncodeStoreMMIO::encode(*commandContainer.getCommandStream(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextAddress); } adjustWriteKernelTimestamp(globalAddress, contextAddress, maskLsb, mask); } template void CommandListCoreFamily::appendEventForProfiling(ze_event_handle_t hEvent, bool beforeWalker) { if (!hEvent) { return; } if (isCopyOnly()) { appendEventForProfilingCopyCommand(hEvent, beforeWalker); } else { auto event = Event::fromHandle(hEvent); if (!event->isEventTimestampFlagSet()) { return; } commandContainer.addToResidencyContainer(&event->getAllocation(this->device)); if (beforeWalker) { appendWriteKernelTimestamp(hEvent, beforeWalker, true); } else { const auto &hwInfo = this->device->getHwInfo(); NEO::PipeControlArgs args; args.dcFlushEnable = NEO::MemorySynchronizationCommands::getDcFlushEnable(event->signalScope, hwInfo); NEO::MemorySynchronizationCommands::setPostSyncExtraProperties(args, hwInfo); NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); uint64_t baseAddr = event->getGpuAddress(this->device); NEO::MemorySynchronizationCommands::addAdditionalSynchronization(*commandContainer.getCommandStream(), baseAddr, false, hwInfo); appendWriteKernelTimestamp(hEvent, beforeWalker, true); } } } template ze_result_t CommandListCoreFamily::appendWriteGlobalTimestamp( uint64_t *dstptr, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using POST_SYNC_OPERATION = typename GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION; if (numWaitEvents > 0) { if (phWaitEvents) { CommandListCoreFamily::appendWaitOnEvents(numWaitEvents, phWaitEvents); } else { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } } const auto &hwInfo = this->device->getHwInfo(); if (isCopyOnly()) { NEO::MiFlushArgs args; args.timeStampOperation = true; args.commandWithPostSync = true; NEO::EncodeMiFlushDW::programMiFlushDw(*commandContainer.getCommandStream(), reinterpret_cast(dstptr), 0, args, hwInfo); } else { NEO::PipeControlArgs args; NEO::MemorySynchronizationCommands::addPipeControlWithPostSync( *commandContainer.getCommandStream(), POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_TIMESTAMP, reinterpret_cast(dstptr), 0, args); } if (hSignalEvent) { CommandListCoreFamily::appendSignalEventPostWalker(hSignalEvent); } auto allocationStruct = getAlignedAllocation(this->device, dstptr, sizeof(uint64_t), false); commandContainer.addToResidencyContainer(allocationStruct.alloc); return ZE_RESULT_SUCCESS; } template ze_result_t CommandListCoreFamily::appendMemoryCopyFromContext( void *dstptr, ze_context_handle_t hContextSrc, const void *srcptr, size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return CommandListCoreFamily::appendMemoryCopy(dstptr, srcptr, size, hSignalEvent, numWaitEvents, phWaitEvents); } template ze_result_t CommandListCoreFamily::appendQueryKernelTimestamps( uint32_t numEvents, ze_event_handle_t *phEvents, void *dstptr, const size_t *pOffsets, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { auto dstptrAllocationStruct = getAlignedAllocation(this->device, dstptr, sizeof(ze_kernel_timestamp_result_t) * numEvents, false); commandContainer.addToResidencyContainer(dstptrAllocationStruct.alloc); std::unique_ptr timestampsData = std::make_unique(numEvents); for (uint32_t i = 0u; i < numEvents; ++i) { auto event = Event::fromHandle(phEvents[i]); commandContainer.addToResidencyContainer(&event->getAllocation(this->device)); timestampsData[i].address = event->getGpuAddress(this->device); timestampsData[i].packetsInUse = event->getPacketsInUse(); timestampsData[i].timestampSizeInDw = event->getTimestampSizeInDw(); } size_t alignedSize = alignUp(sizeof(EventData) * numEvents, MemoryConstants::pageSize64k); NEO::AllocationType allocationType = NEO::AllocationType::GPU_TIMESTAMP_DEVICE_BUFFER; auto devices = device->getNEODevice()->getDeviceBitfield(); NEO::AllocationProperties allocationProperties{device->getRootDeviceIndex(), true, alignedSize, allocationType, devices.count() > 1, false, devices}; NEO::GraphicsAllocation *timestampsGPUData = device->getDriverHandle()->getMemoryManager()->allocateGraphicsMemoryWithProperties(allocationProperties); UNRECOVERABLE_IF(timestampsGPUData == nullptr); commandContainer.addToResidencyContainer(timestampsGPUData); commandContainer.getDeallocationContainer().push_back(timestampsGPUData); bool result = device->getDriverHandle()->getMemoryManager()->copyMemoryToAllocation(timestampsGPUData, 0, timestampsData.get(), sizeof(EventData) * numEvents); UNRECOVERABLE_IF(!result); Kernel *builtinFunction = nullptr; auto useOnlyGlobalTimestamps = NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily).useOnlyGlobalTimestamps() ? 1u : 0u; auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership(); if (pOffsets == nullptr) { builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::QueryKernelTimestamps); builtinFunction->setArgumentValue(2u, sizeof(uint32_t), &useOnlyGlobalTimestamps); } else { auto pOffsetAllocationStruct = getAlignedAllocation(this->device, pOffsets, sizeof(size_t) * numEvents, false); auto offsetValPtr = static_cast(pOffsetAllocationStruct.alloc->getGpuAddress()); commandContainer.addToResidencyContainer(pOffsetAllocationStruct.alloc); builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::QueryKernelTimestampsWithOffsets); builtinFunction->setArgBufferWithAlloc(2, offsetValPtr, pOffsetAllocationStruct.alloc); builtinFunction->setArgumentValue(3u, sizeof(uint32_t), &useOnlyGlobalTimestamps); offsetValPtr += sizeof(size_t); } uint32_t groupSizeX = 1u; uint32_t groupSizeY = 1u; uint32_t groupSizeZ = 1u; if (builtinFunction->suggestGroupSize(numEvents, 1u, 1u, &groupSizeX, &groupSizeY, &groupSizeZ) != ZE_RESULT_SUCCESS) { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } if (builtinFunction->setGroupSize(groupSizeX, groupSizeY, groupSizeZ) != ZE_RESULT_SUCCESS) { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } ze_group_count_t dispatchFuncArgs{numEvents / groupSizeX, 1u, 1u}; auto dstValPtr = static_cast(dstptrAllocationStruct.alloc->getGpuAddress()); builtinFunction->setArgBufferWithAlloc(0u, static_cast(timestampsGPUData->getGpuAddress()), timestampsGPUData); builtinFunction->setArgBufferWithAlloc(1, dstValPtr, dstptrAllocationStruct.alloc); auto appendResult = appendLaunchKernel(builtinFunction->toHandle(), &dispatchFuncArgs, hSignalEvent, numWaitEvents, phWaitEvents); if (appendResult != ZE_RESULT_SUCCESS) { return appendResult; } return ZE_RESULT_SUCCESS; } template ze_result_t CommandListCoreFamily::reserveSpace(size_t size, void **ptr) { auto availableSpace = commandContainer.getCommandStream()->getAvailableSpace(); if (availableSpace < size) { *ptr = nullptr; } else { *ptr = commandContainer.getCommandStream()->getSpace(size); } return ZE_RESULT_SUCCESS; } template ze_result_t CommandListCoreFamily::prepareIndirectParams(const ze_group_count_t *pThreadGroupDimensions) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(pThreadGroupDimensions); if (allocData) { auto alloc = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); commandContainer.addToResidencyContainer(alloc); size_t groupCountOffset = 0; if (allocData->cpuAllocation != nullptr) { commandContainer.addToResidencyContainer(allocData->cpuAllocation); groupCountOffset = ptrDiff(pThreadGroupDimensions, allocData->cpuAllocation->getUnderlyingBuffer()); } else { groupCountOffset = ptrDiff(pThreadGroupDimensions, alloc->getGpuAddress()); } auto groupCount = ptrOffset(alloc->getGpuAddress(), groupCountOffset); NEO::EncodeSetMMIO::encodeMEM(commandContainer, GPUGPU_DISPATCHDIMX, ptrOffset(groupCount, offsetof(ze_group_count_t, groupCountX))); NEO::EncodeSetMMIO::encodeMEM(commandContainer, GPUGPU_DISPATCHDIMY, ptrOffset(groupCount, offsetof(ze_group_count_t, groupCountY))); NEO::EncodeSetMMIO::encodeMEM(commandContainer, GPUGPU_DISPATCHDIMZ, ptrOffset(groupCount, offsetof(ze_group_count_t, groupCountZ))); } return ZE_RESULT_SUCCESS; } template void CommandListCoreFamily::updateStreamProperties(Kernel &kernel, bool isMultiOsContextCapable, bool isCooperative) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using VFE_STATE_TYPE = typename GfxFamily::VFE_STATE_TYPE; auto &hwInfo = device->getHwInfo(); const auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily); auto disableOverdispatch = hwInfoConfig.isDisableOverdispatchAvailable(hwInfo); auto &kernelAttributes = kernel.getKernelDescriptor().kernelAttributes; if (!containsAnyKernel) { requiredStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, disableOverdispatch, -1, hwInfo); finalStreamState = requiredStreamState; requiredStreamState.stateComputeMode.setProperties(false, kernelAttributes.numGrfRequired, kernel.getSchedulingHintExp(), hwInfo); containsAnyKernel = true; } finalStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, disableOverdispatch, -1, hwInfo); bool isPatchingVfeStateAllowed = NEO::DebugManager.flags.AllowPatchingVfeStateInCommandLists.get(); if (finalStreamState.frontEndState.isDirty() && isPatchingVfeStateAllowed) { auto pVfeStateAddress = NEO::PreambleHelper::getSpaceForVfeState(commandContainer.getCommandStream(), hwInfo, engineGroupType); auto pVfeState = new VFE_STATE_TYPE; NEO::PreambleHelper::programVfeState(pVfeState, hwInfo, 0, 0, device->getMaxNumHwThreads(), finalStreamState); commandsToPatch.push_back({pVfeStateAddress, pVfeState, CommandToPatch::FrontEndState}); } finalStreamState.stateComputeMode.setProperties(false, kernelAttributes.numGrfRequired, kernel.getSchedulingHintExp(), hwInfo); if (finalStreamState.stateComputeMode.isDirty()) { bool isRcs = (this->engineGroupType == NEO::EngineGroupType::RenderCompute); NEO::EncodeComputeMode::programComputeModeCommandWithSynchronization( *commandContainer.getCommandStream(), finalStreamState.stateComputeMode, {}, false, hwInfo, isRcs); } } template void CommandListCoreFamily::clearCommandsToPatch() { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using VFE_STATE_TYPE = typename GfxFamily::VFE_STATE_TYPE; for (auto &commandToPatch : commandsToPatch) { switch (commandToPatch.type) { case CommandList::CommandToPatch::FrontEndState: UNRECOVERABLE_IF(commandToPatch.pCommand == nullptr); delete reinterpret_cast(commandToPatch.pCommand); break; default: UNRECOVERABLE_IF(true); } } commandsToPatch.clear(); } template ze_result_t CommandListCoreFamily::setGlobalWorkSizeIndirect(NEO::CrossThreadDataOffset offsets[3], uint64_t crossThreadAddress, uint32_t lws[3]) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; NEO::EncodeIndirectParams::setGlobalWorkSizeIndirect(commandContainer, offsets, crossThreadAddress, lws); return ZE_RESULT_SUCCESS; } template void CommandListCoreFamily::programStateBaseAddress(NEO::CommandContainer &container, bool genericMediaStateClearRequired) { const auto &hwInfo = this->device->getHwInfo(); bool isRcs = (this->engineGroupType == NEO::EngineGroupType::RenderCompute); NEO::EncodeWA::addPipeControlBeforeStateBaseAddress(*commandContainer.getCommandStream(), hwInfo, isRcs); STATE_BASE_ADDRESS sba; NEO::EncodeStateBaseAddress::encode(commandContainer, sba, this->partitionCount > 1); if (NEO::Debugger::isDebugEnabled(this->internalUsage) && device->getL0Debugger()) { NEO::Debugger::SbaAddresses sbaAddresses = {}; sbaAddresses.BindlessSurfaceStateBaseAddress = sba.getBindlessSurfaceStateBaseAddress(); sbaAddresses.DynamicStateBaseAddress = sba.getDynamicStateBaseAddress(); sbaAddresses.GeneralStateBaseAddress = sba.getGeneralStateBaseAddress(); NEO::EncodeStateBaseAddress::setIohAddressForDebugger(sbaAddresses, sba); sbaAddresses.InstructionBaseAddress = sba.getInstructionBaseAddress(); sbaAddresses.SurfaceStateBaseAddress = sba.getSurfaceStateBaseAddress(); device->getL0Debugger()->captureStateBaseAddress(commandContainer, sbaAddresses); } } template void CommandListCoreFamily::adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, bool maskLsb, uint32_t mask) {} template ze_result_t CommandListCoreFamily::appendBarrier(ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents); if (ret) { return ret; } appendEventForProfiling(hSignalEvent, true); const auto &hwInfo = this->device->getHwInfo(); if (!hSignalEvent) { if (isCopyOnly()) { NEO::MiFlushArgs args; NEO::EncodeMiFlushDW::programMiFlushDw(*commandContainer.getCommandStream(), 0, 0, args, hwInfo); } else { appendComputeBarrierCommand(); } } else { appendSignalEventPostWalker(hSignalEvent); } return ZE_RESULT_SUCCESS; } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/cmdlist/cmdlist_hw_base.inl000066400000000000000000000226221422164147700272460ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/helpers/pipe_control_args.h" #include "shared/source/helpers/register_offsets.h" #include "shared/source/helpers/simd_helper.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/residency_container.h" #include "shared/source/unified_memory/unified_memory.h" #include "shared/source/utilities/software_tags_manager.h" #include "level_zero/core/source/kernel/kernel_imp.h" #include namespace L0 { struct DeviceImp; template size_t CommandListCoreFamily::getReserveSshSize() { auto &helper = NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily); return helper.getRenderSurfaceStateSize(); } template ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(ze_kernel_handle_t hKernel, const ze_group_count_t *pThreadGroupDimensions, ze_event_handle_t hEvent, bool isIndirect, bool isPredicate, bool isCooperative) { const auto kernel = Kernel::fromHandle(hKernel); const auto &kernelDescriptor = kernel->getKernelDescriptor(); UNRECOVERABLE_IF(kernel == nullptr); appendEventForProfiling(hEvent, true); const auto functionImmutableData = kernel->getImmutableData(); auto perThreadScratchSize = std::max(this->getCommandListPerThreadScratchSize(), kernel->getImmutableData()->getDescriptor().kernelAttributes.perThreadScratchSize[0]); this->setCommandListPerThreadScratchSize(perThreadScratchSize); auto slmEnable = (kernel->getImmutableData()->getDescriptor().kernelAttributes.slmInlineSize > 0); this->setCommandListSLMEnable(slmEnable); auto kernelPreemptionMode = obtainFunctionPreemptionMode(kernel); commandListPreemptionMode = std::min(commandListPreemptionMode, kernelPreemptionMode); kernel->patchGlobalOffset(); if (kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize != 0U && nullptr == kernel->getPrivateMemoryGraphicsAllocation()) { auto privateMemoryGraphicsAllocation = kernel->allocatePrivateMemoryGraphicsAllocation(); kernel->patchCrossthreadDataWithPrivateAllocation(privateMemoryGraphicsAllocation); this->commandContainer.addToResidencyContainer(privateMemoryGraphicsAllocation); this->ownedPrivateAllocations.push_back(privateMemoryGraphicsAllocation); } if (!isIndirect) { kernel->setGroupCount(pThreadGroupDimensions->groupCountX, pThreadGroupDimensions->groupCountY, pThreadGroupDimensions->groupCountZ); } if (isIndirect && pThreadGroupDimensions) { prepareIndirectParams(pThreadGroupDimensions); } if (kernel->hasIndirectAllocationsAllowed()) { UnifiedMemoryControls unifiedMemoryControls = kernel->getUnifiedMemoryControls(); if (unifiedMemoryControls.indirectDeviceAllocationsAllowed) { this->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true; } if (unifiedMemoryControls.indirectHostAllocationsAllowed) { this->unifiedMemoryControls.indirectHostAllocationsAllowed = true; } if (unifiedMemoryControls.indirectSharedAllocationsAllowed) { this->unifiedMemoryControls.indirectSharedAllocationsAllowed = true; } this->indirectAllocationsAllowed = true; } bool isMixingRegularAndCooperativeKernelsAllowed = NEO::DebugManager.flags.AllowMixingRegularAndCooperativeKernels.get(); if ((!containsAnyKernel) || isMixingRegularAndCooperativeKernelsAllowed) { containsCooperativeKernelsFlag = (containsCooperativeKernelsFlag || isCooperative); } else if (containsCooperativeKernelsFlag != isCooperative) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } if (kernel->usesSyncBuffer()) { auto retVal = (isCooperative ? programSyncBuffer(*kernel, *device->getNEODevice(), pThreadGroupDimensions) : ZE_RESULT_ERROR_INVALID_ARGUMENT); if (retVal) { return retVal; } } KernelImp *kernelImp = static_cast(kernel); this->containsStatelessUncachedResource |= kernelImp->getKernelRequiresUncachedMocs(); this->requiresQueueUncachedMocs |= kernelImp->getKernelRequiresQueueUncachedMocs(); NEO::Device *neoDevice = device->getNEODevice(); if (NEO::DebugManager.flags.EnableSWTags.get()) { neoDevice->getRootDeviceEnvironment().tagsManager->insertTag( *commandContainer.getCommandStream(), *neoDevice, kernel->getKernelDescriptor().kernelMetadata.kernelName.c_str(), 0u); } updateStreamProperties(*kernel, false, isCooperative); NEO::EncodeDispatchKernelArgs dispatchKernelArgs{ 0, //eventAddress neoDevice, //device kernel, //dispatchInterface reinterpret_cast(pThreadGroupDimensions), //pThreadGroupDimensions commandListPreemptionMode, //preemptionMode 0, //partitionCount isIndirect, //isIndirect isPredicate, //isPredicate false, //isTimestampEvent false, //L3FlushEnable this->containsStatelessUncachedResource, //requiresUncachedMocs false, //useGlobalAtomics internalUsage, //isInternal isCooperative //isCooperative }; NEO::EncodeDispatchKernel::encode(commandContainer, dispatchKernelArgs); this->containsStatelessUncachedResource = dispatchKernelArgs.requiresUncachedMocs; if (neoDevice->getDebugger()) { auto *ssh = commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE); auto surfaceStateSpace = neoDevice->getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh); auto surfaceState = GfxFamily::cmdInitRenderSurfaceState; NEO::EncodeSurfaceStateArgs args; args.outMemory = &surfaceState; args.graphicsAddress = device->getDebugSurface()->getGpuAddress(); args.size = device->getDebugSurface()->getUnderlyingBufferSize(); args.mocs = device->getMOCS(false, false); args.numAvailableDevices = neoDevice->getNumGenericSubDevices(); args.allocation = device->getDebugSurface(); args.gmmHelper = neoDevice->getGmmHelper(); args.useGlobalAtomics = kernelImp->getKernelDescriptor().kernelAttributes.flags.useGlobalAtomics; args.areMultipleSubDevicesInContext = false; NEO::EncodeSurfaceState::encodeBuffer(args); *reinterpret_cast(surfaceStateSpace) = surfaceState; } appendSignalEventPostWalker(hEvent); commandContainer.addToResidencyContainer(functionImmutableData->getIsaGraphicsAllocation()); auto &residencyContainer = kernel->getResidencyContainer(); for (auto resource : residencyContainer) { commandContainer.addToResidencyContainer(resource); } if (functionImmutableData->getDescriptor().kernelAttributes.flags.usesPrintf) { storePrintfFunction(kernel); } return ZE_RESULT_SUCCESS; } template void CommandListCoreFamily::appendMultiPartitionPrologue(uint32_t partitionDataSize) {} template void CommandListCoreFamily::appendMultiPartitionEpilogue() {} template void CommandListCoreFamily::appendComputeBarrierCommand() { NEO::PipeControlArgs args = createBarrierFlags(); NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); } template inline NEO::PipeControlArgs CommandListCoreFamily::createBarrierFlags() { NEO::PipeControlArgs args; return args; } template inline void CommandListCoreFamily::appendMultiTileBarrier(NEO::Device &neoDevice) { } template inline size_t CommandListCoreFamily::estimateBufferSizeMultiTileBarrier(const NEO::HardwareInfo &hwInfo) { return 0; } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h000066400000000000000000000131061422164147700277340ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/cmdlist/cmdlist_hw.h" namespace L0 { struct EventPool; struct Event; constexpr size_t maxImmediateCommandSize = 4 * MemoryConstants::kiloByte; template struct CommandListCoreFamilyImmediate : public CommandListCoreFamily { using BaseClass = CommandListCoreFamily; using BaseClass::executeCommandListImmediate; using BaseClass::BaseClass; ze_result_t appendLaunchKernel(ze_kernel_handle_t hKernel, const ze_group_count_t *pThreadGroupDimensions, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendLaunchKernelIndirect(ze_kernel_handle_t hKernel, const ze_group_count_t *pDispatchArgumentsBuffer, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendBarrier(ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendMemoryCopy(void *dstptr, const void *srcptr, size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendMemoryCopyRegion(void *dstPtr, const ze_copy_region_t *dstRegion, uint32_t dstPitch, uint32_t dstSlicePitch, const void *srcPtr, const ze_copy_region_t *srcRegion, uint32_t srcPitch, uint32_t srcSlicePitch, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendMemoryFill(void *ptr, const void *pattern, size_t patternSize, size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendSignalEvent(ze_event_handle_t hEvent) override; ze_result_t appendEventReset(ze_event_handle_t hEvent) override; ze_result_t appendPageFaultCopy(NEO::GraphicsAllocation *dstAllocation, NEO::GraphicsAllocation *srcAllocation, size_t size, bool flushHost) override; ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent) override; ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendMemoryCopyFromContext(void *dstptr, ze_context_handle_t hContextSrc, const void *srcptr, size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendImageCopyFromMemory(ze_image_handle_t hDstImage, const void *srcPtr, const ze_image_region_t *pDstRegion, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendImageCopyToMemory(void *dstPtr, ze_image_handle_t hSrcImage, const ze_image_region_t *pSrcRegion, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendImageCopy( ze_image_handle_t dst, ze_image_handle_t src, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendImageCopyRegion(ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage, const ze_image_region_t *pDstRegion, const ze_image_region_t *pSrcRegion, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t executeCommandListImmediateWithFlushTask(bool performMigration); void checkAvailableSpace(); void updateDispatchFlagsWithRequiredStreamState(NEO::DispatchFlags &dispatchFlags); ze_result_t flushImmediate(ze_result_t inputRet, bool performMigration); }; template struct CommandListImmediateProductFamily; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl000066400000000000000000000517511422164147700302770ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/wait_status.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h" namespace L0 { template void CommandListCoreFamilyImmediate::checkAvailableSpace() { if (this->commandContainer.getCommandStream()->getAvailableSpace() < maxImmediateCommandSize) { this->commandContainer.allocateNextCommandBuffer(); this->cmdListCurrentStartOffset = 0; } } template void CommandListCoreFamilyImmediate::updateDispatchFlagsWithRequiredStreamState(NEO::DispatchFlags &dispatchFlags) { const auto &requiredFrontEndState = this->requiredStreamState.frontEndState; dispatchFlags.kernelExecutionType = (requiredFrontEndState.computeDispatchAllWalkerEnable.value == 1) ? NEO::KernelExecutionType::Concurrent : NEO::KernelExecutionType::Default; dispatchFlags.disableEUFusion = (requiredFrontEndState.disableEUFusion.value == 1); dispatchFlags.additionalKernelExecInfo = (requiredFrontEndState.disableOverdispatch.value == 1) ? NEO::AdditionalKernelExecInfo::DisableOverdispatch : NEO::AdditionalKernelExecInfo::NotSet; const auto &requiredStateComputeMode = this->requiredStreamState.stateComputeMode; dispatchFlags.requiresCoherency = (requiredStateComputeMode.isCoherencyRequired.value == 1); dispatchFlags.numGrfRequired = (requiredStateComputeMode.largeGrfMode.value == 1) ? GrfConfig::LargeGrfNumber : GrfConfig::DefaultGrfNumber; dispatchFlags.threadArbitrationPolicy = requiredStateComputeMode.threadArbitrationPolicy.value; } template ze_result_t CommandListCoreFamilyImmediate::executeCommandListImmediateWithFlushTask(bool performMigration) { NEO::DispatchFlags dispatchFlags( {}, // csrDependencies nullptr, // barrierTimestampPacketNodes {}, // pipelineSelectArgs nullptr, // flushStampReference NEO::QueueThrottle::MEDIUM, // throttle this->getCommandListPreemptionMode(), // preemptionMode GrfConfig::NotApplicable, // numGrfRequired NEO::L3CachingSettings::l3CacheOn, // l3CacheSettings NEO::ThreadArbitrationPolicy::NotPresent, // threadArbitrationPolicy NEO::AdditionalKernelExecInfo::NotApplicable, // additionalKernelExecInfo NEO::KernelExecutionType::NotApplicable, // kernelExecutionType NEO::MemoryCompressionState::NotApplicable, // memoryCompressionState NEO::QueueSliceCount::defaultSliceCount, // sliceCount this->isSyncModeQueue, // blocking this->isSyncModeQueue, // dcFlush this->getCommandListSLMEnable(), // useSLM this->isSyncModeQueue, // guardCommandBufferWithPipeControl false, // GSBA32BitRequired false, // requiresCoherency false, // lowPriority true, // implicitFlush this->csr->isNTo1SubmissionModelEnabled(), // outOfOrderExecutionAllowed false, // epilogueRequired false, // usePerDssBackedBuffer false, // useSingleSubdevice false, // useGlobalAtomics this->device->getNEODevice()->getNumGenericSubDevices() > 1, // areMultipleSubDevicesInContext false, // memoryMigrationRequired false // textureCacheFlush ); this->updateDispatchFlagsWithRequiredStreamState(dispatchFlags); this->commandContainer.removeDuplicatesFromResidencyContainer(); auto commandStream = this->commandContainer.getCommandStream(); size_t commandStreamStart = this->cmdListCurrentStartOffset; auto lockCSR = this->csr->obtainUniqueOwnership(); this->handleIndirectAllocationResidency(); this->csr->setRequiredScratchSizes(this->getCommandListPerThreadScratchSize(), this->getCommandListPerThreadScratchSize()); if (performMigration) { auto deviceImp = static_cast(this->device); auto pageFaultManager = deviceImp->getDriverHandle()->getMemoryManager()->getPageFaultManager(); if (pageFaultManager == nullptr) { performMigration = false; } } this->makeResidentAndMigrate(performMigration); if (performMigration) { this->migrateSharedAllocations(); } auto ioh = (this->commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::INDIRECT_OBJECT)); NEO::IndirectHeap *dsh = nullptr; NEO::IndirectHeap *ssh = nullptr; if (!NEO::ApiSpecificConfig::getBindlessConfiguration()) { dsh = (this->commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::DYNAMIC_STATE)); ssh = (this->commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::SURFACE_STATE)); } else { dsh = this->device->getNEODevice()->getBindlessHeapsHelper()->getHeap(NEO::BindlessHeapsHelper::BindlesHeapType::GLOBAL_DSH); ssh = this->device->getNEODevice()->getBindlessHeapsHelper()->getHeap(NEO::BindlessHeapsHelper::BindlesHeapType::GLOBAL_SSH); } auto completionStamp = this->csr->flushTask( *commandStream, commandStreamStart, dsh, ioh, ssh, this->csr->peekTaskLevel(), dispatchFlags, *(this->device->getNEODevice())); if (this->isSyncModeQueue) { auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout; const auto waitStatus = this->csr->waitForCompletionWithTimeout(NEO::WaitParams{false, false, timeoutMicroseconds}, completionStamp.taskCount); if (waitStatus == NEO::WaitStatus::GpuHang) { return ZE_RESULT_ERROR_DEVICE_LOST; } this->csr->getInternalAllocationStorage()->cleanAllocationList(completionStamp.taskCount, NEO::AllocationUsage::TEMPORARY_ALLOCATION); } this->cmdListCurrentStartOffset = commandStream->getUsed(); this->containsAnyKernel = false; this->commandContainer.getResidencyContainer().clear(); return ZE_RESULT_SUCCESS; } template ze_result_t CommandListCoreFamilyImmediate::appendLaunchKernel( ze_kernel_handle_t hKernel, const ze_group_count_t *pThreadGroupDimensions, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { if (this->isFlushTaskSubmissionEnabled) { checkAvailableSpace(); } auto ret = CommandListCoreFamily::appendLaunchKernel(hKernel, pThreadGroupDimensions, hSignalEvent, numWaitEvents, phWaitEvents); return flushImmediate(ret, true); } template ze_result_t CommandListCoreFamilyImmediate::appendLaunchKernelIndirect( ze_kernel_handle_t hKernel, const ze_group_count_t *pDispatchArgumentsBuffer, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { if (this->isFlushTaskSubmissionEnabled) { checkAvailableSpace(); } auto ret = CommandListCoreFamily::appendLaunchKernelIndirect(hKernel, pDispatchArgumentsBuffer, hSignalEvent, numWaitEvents, phWaitEvents); return flushImmediate(ret, true); } template ze_result_t CommandListCoreFamilyImmediate::appendBarrier( ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { ze_result_t ret = ZE_RESULT_SUCCESS; bool isTimestampEvent = false; for (uint32_t i = 0; i < numWaitEvents; i++) { auto event = Event::fromHandle(phWaitEvents[i]); isTimestampEvent |= (event->isEventTimestampFlagSet()) ? true : false; } if (hSignalEvent) { auto signalEvent = Event::fromHandle(hSignalEvent); isTimestampEvent |= signalEvent->isEventTimestampFlagSet(); } if (isTimestampEvent) { if (this->isFlushTaskSubmissionEnabled) { checkAvailableSpace(); } ret = CommandListCoreFamily::appendBarrier(hSignalEvent, numWaitEvents, phWaitEvents); return flushImmediate(ret, true); } else { ret = CommandListCoreFamilyImmediate::appendWaitOnEvents(numWaitEvents, phWaitEvents); if (!hSignalEvent) { NEO::PipeControlArgs args; this->csr->flushNonKernelTask(nullptr, 0, 0, args, false, false, false); if (this->isSyncModeQueue) { auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout; const auto waitStatus = this->csr->waitForCompletionWithTimeout(NEO::WaitParams{false, false, timeoutMicroseconds}, this->csr->peekTaskCount()); if (waitStatus == NEO::WaitStatus::GpuHang) { return ZE_RESULT_ERROR_DEVICE_LOST; } } } else { ret = CommandListCoreFamilyImmediate::appendSignalEvent(hSignalEvent); } } return ret; } template ze_result_t CommandListCoreFamilyImmediate::appendMemoryCopy( void *dstptr, const void *srcptr, size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { if (this->isFlushTaskSubmissionEnabled) { checkAvailableSpace(); } auto ret = CommandListCoreFamily::appendMemoryCopy(dstptr, srcptr, size, hSignalEvent, numWaitEvents, phWaitEvents); return flushImmediate(ret, true); } template ze_result_t CommandListCoreFamilyImmediate::appendMemoryCopyRegion( void *dstPtr, const ze_copy_region_t *dstRegion, uint32_t dstPitch, uint32_t dstSlicePitch, const void *srcPtr, const ze_copy_region_t *srcRegion, uint32_t srcPitch, uint32_t srcSlicePitch, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { if (this->isFlushTaskSubmissionEnabled) { checkAvailableSpace(); } auto ret = CommandListCoreFamily::appendMemoryCopyRegion(dstPtr, dstRegion, dstPitch, dstSlicePitch, srcPtr, srcRegion, srcPitch, srcSlicePitch, hSignalEvent, numWaitEvents, phWaitEvents); return flushImmediate(ret, true); } template ze_result_t CommandListCoreFamilyImmediate::appendMemoryFill(void *ptr, const void *pattern, size_t patternSize, size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { if (this->isFlushTaskSubmissionEnabled) { checkAvailableSpace(); } auto ret = CommandListCoreFamily::appendMemoryFill(ptr, pattern, patternSize, size, hSignalEvent, numWaitEvents, phWaitEvents); return flushImmediate(ret, true); } template ze_result_t CommandListCoreFamilyImmediate::appendSignalEvent(ze_event_handle_t hSignalEvent) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; ze_result_t ret = ZE_RESULT_SUCCESS; auto event = Event::fromHandle(hSignalEvent); bool isTimestampEvent = event->isEventTimestampFlagSet(); if (isTimestampEvent) { if (this->isFlushTaskSubmissionEnabled) { checkAvailableSpace(); } ret = CommandListCoreFamily::appendSignalEvent(hSignalEvent); return flushImmediate(ret, true); } else { const auto &hwInfo = this->device->getHwInfo(); NEO::PipeControlArgs args; args.dcFlushEnable = NEO::MemorySynchronizationCommands::getDcFlushEnable(event->signalScope, hwInfo); this->csr->flushNonKernelTask(&event->getAllocation(this->device), event->getGpuAddress(this->device), Event::STATE_SIGNALED, args, false, false, false); if (this->isSyncModeQueue) { auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout; const auto waitStatus = this->csr->waitForCompletionWithTimeout(NEO::WaitParams{false, false, timeoutMicroseconds}, this->csr->peekTaskCount()); if (waitStatus == NEO::WaitStatus::GpuHang) { return ZE_RESULT_ERROR_DEVICE_LOST; } } } return ret; } template ze_result_t CommandListCoreFamilyImmediate::appendEventReset(ze_event_handle_t hSignalEvent) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; ze_result_t ret = ZE_RESULT_SUCCESS; auto event = Event::fromHandle(hSignalEvent); bool isTimestampEvent = event->isEventTimestampFlagSet(); if (isTimestampEvent) { if (this->isFlushTaskSubmissionEnabled) { checkAvailableSpace(); } ret = CommandListCoreFamily::appendEventReset(hSignalEvent); return flushImmediate(ret, true); } else { const auto &hwInfo = this->device->getHwInfo(); NEO::PipeControlArgs args; args.dcFlushEnable = NEO::MemorySynchronizationCommands::getDcFlushEnable(event->signalScope, hwInfo); this->csr->flushNonKernelTask(&event->getAllocation(this->device), event->getGpuAddress(this->device), Event::STATE_CLEARED, args, false, false, false); if (this->isSyncModeQueue) { auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout; const auto waitStatus = this->csr->waitForCompletionWithTimeout(NEO::WaitParams{false, false, timeoutMicroseconds}, this->csr->peekTaskCount()); if (waitStatus == NEO::WaitStatus::GpuHang) { return ZE_RESULT_ERROR_DEVICE_LOST; } } } return ret; } template ze_result_t CommandListCoreFamilyImmediate::appendPageFaultCopy(NEO::GraphicsAllocation *dstAllocation, NEO::GraphicsAllocation *srcAllocation, size_t size, bool flushHost) { if (this->isFlushTaskSubmissionEnabled) { checkAvailableSpace(); } auto ret = CommandListCoreFamily::appendPageFaultCopy(dstAllocation, srcAllocation, size, flushHost); return flushImmediate(ret, false); } template ze_result_t CommandListCoreFamilyImmediate::appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phWaitEvents) { if (this->isFlushTaskSubmissionEnabled) { checkAvailableSpace(); } auto ret = CommandListCoreFamily::appendWaitOnEvents(numEvents, phWaitEvents); return flushImmediate(ret, true); } template ze_result_t CommandListCoreFamilyImmediate::appendWriteGlobalTimestamp( uint64_t *dstptr, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { if (this->isFlushTaskSubmissionEnabled) { checkAvailableSpace(); } auto ret = CommandListCoreFamily::appendWriteGlobalTimestamp(dstptr, hSignalEvent, numWaitEvents, phWaitEvents); return flushImmediate(ret, true); } template ze_result_t CommandListCoreFamilyImmediate::appendMemoryCopyFromContext( void *dstptr, ze_context_handle_t hContextSrc, const void *srcptr, size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return CommandListCoreFamilyImmediate::appendMemoryCopy(dstptr, srcptr, size, hSignalEvent, numWaitEvents, phWaitEvents); } template ze_result_t CommandListCoreFamilyImmediate::appendImageCopy( ze_image_handle_t dst, ze_image_handle_t src, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return CommandListCoreFamilyImmediate::appendImageCopyRegion(dst, src, nullptr, nullptr, hSignalEvent, numWaitEvents, phWaitEvents); } template ze_result_t CommandListCoreFamilyImmediate::appendImageCopyRegion(ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage, const ze_image_region_t *pDstRegion, const ze_image_region_t *pSrcRegion, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { if (this->isFlushTaskSubmissionEnabled) { checkAvailableSpace(); } auto ret = CommandListCoreFamily::appendImageCopyRegion(hDstImage, hSrcImage, pDstRegion, pSrcRegion, hSignalEvent, numWaitEvents, phWaitEvents); return flushImmediate(ret, true); } template ze_result_t CommandListCoreFamilyImmediate::appendImageCopyFromMemory( ze_image_handle_t hDstImage, const void *srcPtr, const ze_image_region_t *pDstRegion, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { if (this->isFlushTaskSubmissionEnabled) { checkAvailableSpace(); } auto ret = CommandListCoreFamily::appendImageCopyFromMemory(hDstImage, srcPtr, pDstRegion, hSignalEvent, numWaitEvents, phWaitEvents); return flushImmediate(ret, true); } template ze_result_t CommandListCoreFamilyImmediate::appendImageCopyToMemory( void *dstPtr, ze_image_handle_t hSrcImage, const ze_image_region_t *pSrcRegion, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { if (this->isFlushTaskSubmissionEnabled) { checkAvailableSpace(); } auto ret = CommandListCoreFamily::appendImageCopyToMemory(dstPtr, hSrcImage, pSrcRegion, hSignalEvent, numWaitEvents, phWaitEvents); return flushImmediate(ret, true); } template ze_result_t CommandListCoreFamilyImmediate::flushImmediate(ze_result_t inputRet, bool performMigration) { if (inputRet == ZE_RESULT_SUCCESS) { if (this->isFlushTaskSubmissionEnabled) { inputRet = executeCommandListImmediateWithFlushTask(performMigration); } else { inputRet = executeCommandListImmediate(performMigration); } } return inputRet; } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl000066400000000000000000000404751422164147700313170ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_container/implicit_scaling.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/helpers/cache_flush_xehp_and_later.inl" #include "shared/source/helpers/pipeline_select_helper.h" #include "shared/source/helpers/simd_helper.h" #include "shared/source/indirect_heap/indirect_heap.h" #include "shared/source/kernel/grf_config.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/residency_container.h" #include "shared/source/unified_memory/unified_memory.h" #include "shared/source/utilities/software_tags_manager.h" #include "shared/source/xe_hp_core/hw_cmds.h" #include "shared/source/xe_hp_core/hw_info.h" #include "level_zero/core/source/cmdlist/cmdlist_hw.h" #include "level_zero/core/source/kernel/kernel_imp.h" #include "level_zero/core/source/module/module.h" #include "igfxfmid.h" namespace L0 { template struct EncodeStateBaseAddress; template size_t CommandListCoreFamily::getReserveSshSize() { return 4 * MemoryConstants::pageSize; } template void CommandListCoreFamily::applyMemoryRangesBarrier(uint32_t numRanges, const size_t *pRangeSizes, const void **pRanges) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; NEO::LinearStream *commandStream = commandContainer.getCommandStream(); NEO::SVMAllocsManager *svmAllocsManager = device->getDriverHandle()->getSvmAllocsManager(); StackVec subranges; uint64_t postSyncAddressToFlush = 0; for (uint32_t i = 0; i < numRanges; i++) { const uint64_t pRange = reinterpret_cast(pRanges[i]); size_t pRangeSize = pRangeSizes[i]; uint64_t pFlushRange; size_t pFlushRangeSize; NEO::SvmAllocationData *allocData = svmAllocsManager->getSVMAllocs()->get(pRanges[i]); if (allocData == nullptr || pRangeSize > allocData->size) { continue; } pFlushRange = pRange; if (NEO::L3Range::meetsMinimumAlignment(pRange) == false) { pFlushRange = alignDown(pRange, MemoryConstants::pageSize); } pRangeSize = (pRange + pRangeSize) - pFlushRange; pFlushRangeSize = pRangeSize; if (NEO::L3Range::meetsMinimumAlignment(pRangeSize) == false) { pFlushRangeSize = alignUp(pRangeSize, MemoryConstants::pageSize); } coverRangeExact(pFlushRange, pFlushRangeSize, subranges, GfxFamily::L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION); } for (size_t subrangeNumber = 0; subrangeNumber < subranges.size(); subrangeNumber += NEO::maxFlushSubrangeCount) { size_t rangeCount = subranges.size() <= subrangeNumber + NEO::maxFlushSubrangeCount ? subranges.size() - subrangeNumber : NEO::maxFlushSubrangeCount; NEO::Range range = CreateRange(subranges.begin() + subrangeNumber, rangeCount); NEO::flushGpuCache(commandStream, range, postSyncAddressToFlush, device->getHwInfo()); } } template void programEventL3Flush(ze_event_handle_t hEvent, Device *device, uint32_t partitionCount, NEO::CommandContainer &commandContainer) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using POST_SYNC_OPERATION = typename GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION; auto event = Event::fromHandle(hEvent); auto eventPartitionOffset = (partitionCount > 1) ? (partitionCount * event->getSinglePacketSize()) : event->getSinglePacketSize(); uint64_t eventAddress = event->getPacketAddress(device) + eventPartitionOffset; if (event->useContextEndOffset()) { eventAddress += event->getContextEndOffset(); } if (partitionCount > 1) { event->setPacketsInUse(event->getPacketsInUse() + partitionCount); } else { event->setPacketsInUse(event->getPacketsInUse() + 1); } event->l3FlushWaApplied = true; auto &cmdListStream = *commandContainer.getCommandStream(); NEO::PipeControlArgs args; args.dcFlushEnable = true; args.workloadPartitionOffset = partitionCount > 1; NEO::MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( cmdListStream, POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, eventAddress, Event::STATE_SIGNALED, commandContainer.getDevice()->getHardwareInfo(), args); } template ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(ze_kernel_handle_t hKernel, const ze_group_count_t *pThreadGroupDimensions, ze_event_handle_t hEvent, bool isIndirect, bool isPredicate, bool isCooperative) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using COMPUTE_WALKER = typename GfxFamily::COMPUTE_WALKER; using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS; const auto &hwInfo = this->device->getHwInfo(); if (NEO::DebugManager.flags.ForcePipeControlPriorToWalker.get()) { NEO::PipeControlArgs args; NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); } NEO::Device *neoDevice = device->getNEODevice(); const auto kernel = Kernel::fromHandle(hKernel); UNRECOVERABLE_IF(kernel == nullptr); const auto functionImmutableData = kernel->getImmutableData(); auto &kernelDescriptor = kernel->getKernelDescriptor(); commandListPerThreadScratchSize = std::max(commandListPerThreadScratchSize, kernelDescriptor.kernelAttributes.perThreadScratchSize[0]); commandListPerThreadPrivateScratchSize = std::max(commandListPerThreadPrivateScratchSize, kernelDescriptor.kernelAttributes.perThreadScratchSize[1]); auto functionPreemptionMode = obtainFunctionPreemptionMode(kernel); commandListPreemptionMode = std::min(commandListPreemptionMode, functionPreemptionMode); kernel->patchGlobalOffset(); if (isIndirect && pThreadGroupDimensions) { prepareIndirectParams(pThreadGroupDimensions); } if (!isIndirect) { kernel->setGroupCount(pThreadGroupDimensions->groupCountX, pThreadGroupDimensions->groupCountY, pThreadGroupDimensions->groupCountZ); } NEO::GraphicsAllocation *eventAlloc = nullptr; uint64_t eventAddress = 0; bool isTimestampEvent = false; bool L3FlushEnable = false; if (hEvent) { auto event = Event::fromHandle(hEvent); eventAlloc = &event->getAllocation(this->device); commandContainer.addToResidencyContainer(eventAlloc); L3FlushEnable = NEO::MemorySynchronizationCommands::getDcFlushEnable(event->signalScope, hwInfo); isTimestampEvent = event->isEventTimestampFlagSet(); eventAddress = event->getPacketAddress(this->device); } if (kernel->hasIndirectAllocationsAllowed()) { UnifiedMemoryControls unifiedMemoryControls = kernel->getUnifiedMemoryControls(); if (unifiedMemoryControls.indirectDeviceAllocationsAllowed) { this->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true; } if (unifiedMemoryControls.indirectHostAllocationsAllowed) { this->unifiedMemoryControls.indirectHostAllocationsAllowed = true; } if (unifiedMemoryControls.indirectSharedAllocationsAllowed) { this->unifiedMemoryControls.indirectSharedAllocationsAllowed = true; } this->indirectAllocationsAllowed = true; } if (NEO::DebugManager.flags.EnableSWTags.get()) { neoDevice->getRootDeviceEnvironment().tagsManager->insertTag( *commandContainer.getCommandStream(), *neoDevice, kernelDescriptor.kernelMetadata.kernelName.c_str(), 0u); } bool isMixingRegularAndCooperativeKernelsAllowed = NEO::DebugManager.flags.AllowMixingRegularAndCooperativeKernels.get(); if ((!containsAnyKernel) || isMixingRegularAndCooperativeKernelsAllowed) { containsCooperativeKernelsFlag = (containsCooperativeKernelsFlag || isCooperative); } else if (containsCooperativeKernelsFlag != isCooperative) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } if (kernel->usesSyncBuffer()) { auto retVal = (isCooperative ? programSyncBuffer(*kernel, *neoDevice, pThreadGroupDimensions) : ZE_RESULT_ERROR_INVALID_ARGUMENT); if (retVal) { return retVal; } } auto isMultiOsContextCapable = (this->partitionCount > 1) && !isCooperative; updateStreamProperties(*kernel, isMultiOsContextCapable, isCooperative); KernelImp *kernelImp = static_cast(kernel); this->containsStatelessUncachedResource |= kernelImp->getKernelRequiresUncachedMocs(); this->requiresQueueUncachedMocs |= kernelImp->getKernelRequiresQueueUncachedMocs(); NEO::EncodeDispatchKernelArgs dispatchKernelArgs{ eventAddress, //eventAddress neoDevice, //device kernel, //dispatchInterface reinterpret_cast(pThreadGroupDimensions), //pThreadGroupDimensions commandListPreemptionMode, //preemptionMode this->partitionCount, //partitionCount isIndirect, //isIndirect isPredicate, //isPredicate isTimestampEvent, //isTimestampEvent L3FlushEnable, //L3FlushEnable this->containsStatelessUncachedResource, //requiresUncachedMocs kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, //useGlobalAtomics internalUsage, //isInternal isCooperative //isCooperative }; NEO::EncodeDispatchKernel::encode(commandContainer, dispatchKernelArgs); this->containsStatelessUncachedResource = dispatchKernelArgs.requiresUncachedMocs; if (hEvent) { auto event = Event::fromHandle(hEvent); if (partitionCount > 1) { event->setPacketsInUse(partitionCount); event->setPartitionedEvent(true); } if (L3FlushEnable) { programEventL3Flush(hEvent, this->device, partitionCount, commandContainer); } } if (neoDevice->getDebugger()) { auto *ssh = commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE); auto surfaceStateSpace = neoDevice->getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh); auto surfaceState = GfxFamily::cmdInitRenderSurfaceState; NEO::EncodeSurfaceStateArgs args; args.outMemory = &surfaceState; args.graphicsAddress = device->getDebugSurface()->getGpuAddress(); args.size = device->getDebugSurface()->getUnderlyingBufferSize(); args.mocs = device->getMOCS(false, false); args.numAvailableDevices = neoDevice->getNumGenericSubDevices(); args.allocation = device->getDebugSurface(); args.gmmHelper = neoDevice->getGmmHelper(); args.useGlobalAtomics = kernelImp->getKernelDescriptor().kernelAttributes.flags.useGlobalAtomics; args.areMultipleSubDevicesInContext = args.numAvailableDevices > 1; args.implicitScaling = this->partitionCount > 1; NEO::EncodeSurfaceState::encodeBuffer(args); *reinterpret_cast(surfaceStateSpace) = surfaceState; } // Attach Function residency to our CommandList residency { commandContainer.addToResidencyContainer(functionImmutableData->getIsaGraphicsAllocation()); auto &residencyContainer = kernel->getResidencyContainer(); for (auto resource : residencyContainer) { commandContainer.addToResidencyContainer(resource); } } // Store PrintfBuffer from a kernel { if (kernelDescriptor.kernelAttributes.flags.usesPrintf) { storePrintfFunction(kernel); } } if (kernelImp->usesRayTracing()) { NEO::GraphicsAllocation *memoryBackedBuffer = device->getNEODevice()->getRTMemoryBackedBuffer(); if (memoryBackedBuffer == nullptr) { return ZE_RESULT_ERROR_UNINITIALIZED; } else { NEO::LinearStream *linearStream = commandContainer.getCommandStream(); NEO::EncodeEnableRayTracing::programEnableRayTracing(*linearStream, *memoryBackedBuffer); } } return ZE_RESULT_SUCCESS; } template void CommandListCoreFamily::appendMultiPartitionPrologue(uint32_t partitionDataSize) { NEO::ImplicitScalingDispatch::dispatchOffsetRegister(*commandContainer.getCommandStream(), partitionDataSize); } template void CommandListCoreFamily::appendMultiPartitionEpilogue() { NEO::ImplicitScalingDispatch::dispatchOffsetRegister(*commandContainer.getCommandStream(), NEO::ImplicitScalingDispatch::getPostSyncOffset()); } template void CommandListCoreFamily::appendComputeBarrierCommand() { if (this->partitionCount > 1) { auto neoDevice = device->getNEODevice(); appendMultiTileBarrier(*neoDevice); } else { NEO::PipeControlArgs args = createBarrierFlags(); NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); } } template NEO::PipeControlArgs CommandListCoreFamily::createBarrierFlags() { NEO::PipeControlArgs args; args.hdcPipelineFlush = true; args.unTypedDataPortCacheFlush = true; return args; } template void CommandListCoreFamily::appendMultiTileBarrier(NEO::Device &neoDevice) { NEO::PipeControlArgs args = createBarrierFlags(); auto &hwInfo = neoDevice.getHardwareInfo(); NEO::ImplicitScalingDispatch::dispatchBarrierCommands(*commandContainer.getCommandStream(), neoDevice.getDeviceBitfield(), args, hwInfo, 0, 0, true, true); } template inline size_t CommandListCoreFamily::estimateBufferSizeMultiTileBarrier(const NEO::HardwareInfo &hwInfo) { return NEO::ImplicitScalingDispatch::getBarrierSize(hwInfo, true, false); } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/cmdlist/cmdlist_imp.cpp000066400000000000000000000126341422164147700264250ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/cmdlist/cmdlist_imp.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/device/device.h" #include "shared/source/helpers/engine_node_helper.h" #include "shared/source/indirect_heap/indirect_heap.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/device/device_imp.h" #include "level_zero/tools/source/metrics/metric.h" #include "igfxfmid.h" #include namespace L0 { CommandListAllocatorFn commandListFactory[IGFX_MAX_PRODUCT] = {}; CommandListAllocatorFn commandListFactoryImmediate[IGFX_MAX_PRODUCT] = {}; ze_result_t CommandListImp::destroy() { if (this->cmdListType == CommandListType::TYPE_IMMEDIATE && this->isFlushTaskSubmissionEnabled && !this->isSyncModeQueue) { auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout; this->csr->waitForCompletionWithTimeout(NEO::WaitParams{false, false, timeoutMicroseconds}, this->csr->peekTaskCount()); } delete this; return ZE_RESULT_SUCCESS; } ze_result_t CommandListImp::appendMetricMemoryBarrier() { return device->getMetricDeviceContext().appendMetricMemoryBarrier(*this); } ze_result_t CommandListImp::appendMetricStreamerMarker(zet_metric_streamer_handle_t hMetricStreamer, uint32_t value) { return MetricStreamer::fromHandle(hMetricStreamer)->appendStreamerMarker(*this, value); } ze_result_t CommandListImp::appendMetricQueryBegin(zet_metric_query_handle_t hMetricQuery) { return MetricQuery::fromHandle(hMetricQuery)->appendBegin(*this); } ze_result_t CommandListImp::appendMetricQueryEnd(zet_metric_query_handle_t hMetricQuery, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return MetricQuery::fromHandle(hMetricQuery)->appendEnd(*this, hSignalEvent, numWaitEvents, phWaitEvents); } CommandList *CommandList::create(uint32_t productFamily, Device *device, NEO::EngineGroupType engineGroupType, ze_command_list_flags_t flags, ze_result_t &returnValue) { CommandListAllocatorFn allocator = nullptr; if (productFamily < IGFX_MAX_PRODUCT) { allocator = commandListFactory[productFamily]; } CommandListImp *commandList = nullptr; returnValue = ZE_RESULT_ERROR_UNINITIALIZED; if (allocator) { commandList = static_cast((*allocator)(CommandList::defaultNumIddsPerBlock)); returnValue = commandList->initialize(device, engineGroupType, flags); if (returnValue != ZE_RESULT_SUCCESS) { commandList->destroy(); commandList = nullptr; } } return commandList; } CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device, const ze_command_queue_desc_t *desc, bool internalUsage, NEO::EngineGroupType engineGroupType, ze_result_t &returnValue) { CommandListAllocatorFn allocator = nullptr; if (productFamily < IGFX_MAX_PRODUCT) { allocator = commandListFactoryImmediate[productFamily]; } CommandListImp *commandList = nullptr; returnValue = ZE_RESULT_ERROR_UNINITIALIZED; NEO::EngineGroupType engineType = engineGroupType; if (allocator) { NEO::CommandStreamReceiver *csr = nullptr; auto deviceImp = static_cast(device); if (internalUsage) { if (NEO::EngineGroupType::Copy == engineType && deviceImp->getActiveDevice()->getInternalCopyEngine()) { csr = deviceImp->getActiveDevice()->getInternalCopyEngine()->commandStreamReceiver; } else { csr = deviceImp->getActiveDevice()->getInternalEngine().commandStreamReceiver; engineType = NEO::EngineGroupType::RenderCompute; } } else { returnValue = device->getCsrForOrdinalAndIndex(&csr, desc->ordinal, desc->index); if (returnValue != ZE_RESULT_SUCCESS) { return commandList; } } UNRECOVERABLE_IF(nullptr == csr); commandList = static_cast((*allocator)(CommandList::commandListimmediateIddsPerBlock)); commandList->internalUsage = internalUsage; commandList->cmdListType = CommandListType::TYPE_IMMEDIATE; commandList->isSyncModeQueue = (desc->mode == ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS); returnValue = commandList->initialize(device, engineType, desc->flags); if (returnValue != ZE_RESULT_SUCCESS) { commandList->destroy(); commandList = nullptr; return commandList; } auto commandQueue = CommandQueue::create(productFamily, device, csr, desc, NEO::EngineGroupType::Copy == engineType, internalUsage, returnValue); if (!commandQueue) { commandList->destroy(); commandList = nullptr; return commandList; } commandList->cmdQImmediate = commandQueue; commandList->csr = csr; commandList->commandListPreemptionMode = device->getDevicePreemptionMode(); return commandList; } return commandList; } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/cmdlist/cmdlist_imp.h000066400000000000000000000021361422164147700260660ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/cmdlist/cmdlist.h" #include "level_zero/core/source/cmdqueue/cmdqueue_imp.h" #include "level_zero/core/source/device/device.h" namespace L0 { struct CommandListImp : CommandList { using CommandList::CommandList; ze_result_t destroy() override; ze_result_t appendMetricMemoryBarrier() override; ze_result_t appendMetricStreamerMarker(zet_metric_streamer_handle_t hMetricStreamer, uint32_t value) override; ze_result_t appendMetricQueryBegin(zet_metric_query_handle_t hMetricQuery) override; ze_result_t appendMetricQueryEnd(zet_metric_query_handle_t hMetricQuery, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; virtual void appendMultiPartitionPrologue(uint32_t partitionDataSize) = 0; virtual void appendMultiPartitionEpilogue() = 0; protected: ~CommandListImp() override = default; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/cmdqueue/000077500000000000000000000000001422164147700235605ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/cmdqueue/CMakeLists.txt000066400000000000000000000001421422164147700263150ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # add_subdirectories() compute-runtime-22.14.22890/level_zero/core/source/cmdqueue/cmdqueue.cpp000066400000000000000000000244721422164147700261050ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/csr_definitions.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/queue_throttle.h" #include "shared/source/command_stream/wait_status.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/memory_manager/memory_manager.h" #include "level_zero/core/source/cmdlist/cmdlist_hw.h" #include "level_zero/core/source/cmdqueue/cmdqueue_imp.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/device/device_imp.h" #include "igfxfmid.h" namespace L0 { CommandQueueAllocatorFn commandQueueFactory[IGFX_MAX_PRODUCT] = {}; CommandQueueImp::CommandQueueImp(Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc) : desc(*desc), device(device), csr(csr) { int overrideCmdQueueSyncMode = NEO::DebugManager.flags.OverrideCmdQueueSynchronousMode.get(); if (overrideCmdQueueSyncMode != -1) { this->desc.mode = static_cast(overrideCmdQueueSyncMode); } int overrideUseKmdWaitFunction = NEO::DebugManager.flags.OverrideUseKmdWaitFunction.get(); if (overrideUseKmdWaitFunction != -1) { useKmdWaitFunction = !!(overrideUseKmdWaitFunction); } } ze_result_t CommandQueueImp::destroy() { delete this; return ZE_RESULT_SUCCESS; } ze_result_t CommandQueueImp::initialize(bool copyOnly, bool isInternal) { ze_result_t returnValue; internalUsage = isInternal; returnValue = buffers.initialize(device, totalCmdBufferSize); if (returnValue == ZE_RESULT_SUCCESS) { NEO::GraphicsAllocation *bufferAllocation = buffers.getCurrentBufferAllocation(); UNRECOVERABLE_IF(bufferAllocation == nullptr); commandStream = new NEO::LinearStream(bufferAllocation->getUnderlyingBuffer(), defaultQueueCmdBufferSize); UNRECOVERABLE_IF(commandStream == nullptr); commandStream->replaceGraphicsAllocation(bufferAllocation); isCopyOnlyCommandQueue = copyOnly; preemptionCmdSyncProgramming = getPreemptionCmdProgramming(); activeSubDevices = static_cast(csr->getOsContext().getDeviceBitfield().count()); if (!isInternal) { partitionCount = csr->getActivePartitions(); } if (NEO::Debugger::isDebugEnabled(internalUsage) && device->getL0Debugger()) { device->getL0Debugger()->notifyCommandQueueCreated(); } } return returnValue; } void CommandQueueImp::reserveLinearStreamSize(size_t size) { UNRECOVERABLE_IF(commandStream == nullptr); if (commandStream->getAvailableSpace() < size) { buffers.switchBuffers(csr); NEO::GraphicsAllocation *nextBufferAllocation = buffers.getCurrentBufferAllocation(); commandStream->replaceBuffer(nextBufferAllocation->getUnderlyingBuffer(), defaultQueueCmdBufferSize); commandStream->replaceGraphicsAllocation(nextBufferAllocation); } } NEO::SubmissionStatus CommandQueueImp::submitBatchBuffer(size_t offset, NEO::ResidencyContainer &residencyContainer, void *endingCmdPtr, bool isCooperative) { UNRECOVERABLE_IF(csr == nullptr); NEO::BatchBuffer batchBuffer(commandStream->getGraphicsAllocation(), offset, 0u, nullptr, false, false, NEO::QueueThrottle::HIGH, NEO::QueueSliceCount::defaultSliceCount, commandStream->getUsed(), commandStream, endingCmdPtr, isCooperative); commandStream->getGraphicsAllocation()->updateTaskCount(csr->peekTaskCount() + 1, csr->getOsContext().getContextId()); commandStream->getGraphicsAllocation()->updateResidencyTaskCount(csr->peekTaskCount() + 1, csr->getOsContext().getContextId()); csr->setActivePartitions(partitionCount); auto ret = csr->submitBatchBuffer(batchBuffer, csr->getResidencyAllocations()); if (ret != NEO::SubmissionStatus::SUCCESS) { return ret; } buffers.setCurrentFlushStamp(csr->peekTaskCount(), csr->obtainCurrentFlushStamp()); return ret; } ze_result_t CommandQueueImp::synchronize(uint64_t timeout) { if ((timeout == std::numeric_limits::max()) && useKmdWaitFunction) { auto &waitPair = buffers.getCurrentFlushStamp(); const auto waitStatus = csr->waitForTaskCountWithKmdNotifyFallback(waitPair.first, waitPair.second, false, NEO::QueueThrottle::MEDIUM); if (waitStatus == NEO::WaitStatus::GpuHang) { return ZE_RESULT_ERROR_DEVICE_LOST; } postSyncOperations(); return ZE_RESULT_SUCCESS; } else { return synchronizeByPollingForTaskCount(timeout); } } ze_result_t CommandQueueImp::synchronizeByPollingForTaskCount(uint64_t timeout) { UNRECOVERABLE_IF(csr == nullptr); auto taskCountToWait = getTaskCount(); bool enableTimeout = true; int64_t timeoutMicroseconds = static_cast(timeout); if (timeout == std::numeric_limits::max()) { enableTimeout = false; timeoutMicroseconds = NEO::TimeoutControls::maxTimeout; } const auto waitStatus = csr->waitForCompletionWithTimeout(NEO::WaitParams{false, enableTimeout, timeoutMicroseconds}, taskCountToWait); if (waitStatus == NEO::WaitStatus::NotReady) { return ZE_RESULT_NOT_READY; } if (waitStatus == NEO::WaitStatus::GpuHang) { return ZE_RESULT_ERROR_DEVICE_LOST; } postSyncOperations(); return ZE_RESULT_SUCCESS; } void CommandQueueImp::printFunctionsPrintfOutput() { size_t size = this->printfFunctionContainer.size(); for (size_t i = 0; i < size; i++) { this->printfFunctionContainer[i]->printPrintfOutput(); } this->printfFunctionContainer.clear(); } void CommandQueueImp::postSyncOperations() { printFunctionsPrintfOutput(); if (NEO::Debugger::isDebugEnabled(internalUsage) && device->getL0Debugger() && NEO::DebugManager.flags.DebuggerLogBitmask.get()) { device->getL0Debugger()->printTrackedAddresses(csr->getOsContext().getContextId()); } } CommandQueue *CommandQueue::create(uint32_t productFamily, Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc, bool isCopyOnly, bool isInternal, ze_result_t &returnValue) { CommandQueueAllocatorFn allocator = nullptr; if (productFamily < IGFX_MAX_PRODUCT) { allocator = commandQueueFactory[productFamily]; } CommandQueueImp *commandQueue = nullptr; returnValue = ZE_RESULT_ERROR_UNINITIALIZED; if (allocator) { commandQueue = static_cast((*allocator)(device, csr, desc)); returnValue = commandQueue->initialize(isCopyOnly, isInternal); if (returnValue != ZE_RESULT_SUCCESS) { commandQueue->destroy(); commandQueue = nullptr; } } auto &osContext = csr->getOsContext(); DriverHandleImp *driverHandleImp = static_cast(device->getDriverHandle()); if (driverHandleImp->powerHint && driverHandleImp->powerHint != osContext.getUmdPowerHintValue()) { osContext.setUmdPowerHintValue(driverHandleImp->powerHint); osContext.reInitializeContext(); } osContext.ensureContextInitialized(); csr->initDirectSubmission(*device->getNEODevice(), osContext); return commandQueue; } ze_command_queue_mode_t CommandQueueImp::getSynchronousMode() const { return desc.mode; } ze_result_t CommandQueueImp::CommandBufferManager::initialize(Device *device, size_t sizeRequested) { size_t alignedSize = alignUp(sizeRequested, MemoryConstants::pageSize64k); NEO::AllocationProperties properties{device->getRootDeviceIndex(), true, alignedSize, NEO::AllocationType::COMMAND_BUFFER, (device->getNEODevice()->getNumGenericSubDevices() > 1u) /* multiOsContextCapable */, false, device->getNEODevice()->getDeviceBitfield()}; auto firstBuffer = device->obtainReusableAllocation(alignedSize, NEO::AllocationType::COMMAND_BUFFER); if (!firstBuffer) { firstBuffer = device->getNEODevice()->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties); } auto secondBuffer = device->obtainReusableAllocation(alignedSize, NEO::AllocationType::COMMAND_BUFFER); if (!secondBuffer) { secondBuffer = device->getNEODevice()->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties); } buffers[BUFFER_ALLOCATION::FIRST] = firstBuffer; buffers[BUFFER_ALLOCATION::SECOND] = secondBuffer; if (!buffers[BUFFER_ALLOCATION::FIRST] || !buffers[BUFFER_ALLOCATION::SECOND]) { return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY; } memset(buffers[BUFFER_ALLOCATION::FIRST]->getUnderlyingBuffer(), 0, buffers[BUFFER_ALLOCATION::FIRST]->getUnderlyingBufferSize()); memset(buffers[BUFFER_ALLOCATION::SECOND]->getUnderlyingBuffer(), 0, buffers[BUFFER_ALLOCATION::SECOND]->getUnderlyingBufferSize()); flushId[BUFFER_ALLOCATION::FIRST] = std::make_pair(0u, 0u); flushId[BUFFER_ALLOCATION::SECOND] = std::make_pair(0u, 0u); return ZE_RESULT_SUCCESS; } void CommandQueueImp::CommandBufferManager::destroy(Device *device) { if (buffers[BUFFER_ALLOCATION::FIRST]) { device->storeReusableAllocation(*buffers[BUFFER_ALLOCATION::FIRST]); buffers[BUFFER_ALLOCATION::FIRST] = nullptr; } if (buffers[BUFFER_ALLOCATION::SECOND]) { device->storeReusableAllocation(*buffers[BUFFER_ALLOCATION::SECOND]); buffers[BUFFER_ALLOCATION::SECOND] = nullptr; } } void CommandQueueImp::CommandBufferManager::switchBuffers(NEO::CommandStreamReceiver *csr) { if (bufferUse == BUFFER_ALLOCATION::FIRST) { bufferUse = BUFFER_ALLOCATION::SECOND; } else { bufferUse = BUFFER_ALLOCATION::FIRST; } auto completionId = flushId[bufferUse]; if (completionId.second != 0u) { UNRECOVERABLE_IF(csr == nullptr); csr->waitForTaskCountWithKmdNotifyFallback(completionId.first, completionId.second, false, NEO::QueueThrottle::MEDIUM); } } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/cmdqueue/cmdqueue.h000066400000000000000000000053341422164147700255460ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/device/device.h" #include #include struct _ze_command_queue_handle_t {}; namespace NEO { class CommandStreamReceiver; } namespace L0 { struct CommandQueue : _ze_command_queue_handle_t { template struct Allocator { static CommandQueue *allocate(Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc) { return new Type(device, csr, desc); } }; virtual ~CommandQueue() = default; virtual ze_result_t createFence(const ze_fence_desc_t *desc, ze_fence_handle_t *phFence) = 0; virtual ze_result_t destroy() = 0; virtual ze_result_t executeCommandLists(uint32_t numCommandLists, ze_command_list_handle_t *phCommandLists, ze_fence_handle_t hFence, bool performMigration) = 0; virtual ze_result_t executeCommands(uint32_t numCommands, void *phCommands, ze_fence_handle_t hFence) = 0; virtual ze_result_t synchronize(uint64_t timeout) = 0; static CommandQueue *create(uint32_t productFamily, Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc, bool isCopyOnly, bool isInternal, ze_result_t &resultValue); static CommandQueue *fromHandle(ze_command_queue_handle_t handle) { return static_cast(handle); } ze_command_queue_handle_t toHandle() { return this; } void setCommandQueuePreemptionMode(NEO::PreemptionMode newPreemptionMode) { commandQueuePreemptionMode = newPreemptionMode; } bool peekIsCopyOnlyCommandQueue() const { return this->isCopyOnlyCommandQueue; } protected: NEO::PreemptionMode commandQueuePreemptionMode = NEO::PreemptionMode::Initial; uint32_t partitionCount = 1; uint32_t activeSubDevices = 1; bool preemptionCmdSyncProgramming = true; bool commandQueueDebugCmdsProgrammed = false; bool isCopyOnlyCommandQueue = false; bool internalUsage = false; }; using CommandQueueAllocatorFn = CommandQueue *(*)(Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc); extern CommandQueueAllocatorFn commandQueueFactory[]; template struct CommandQueuePopulateFactory { CommandQueuePopulateFactory() { commandQueueFactory[productFamily] = CommandQueue::Allocator::allocate; } }; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/cmdqueue/cmdqueue_extended/000077500000000000000000000000001422164147700272505ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/cmdqueue/cmdqueue_extended/cmdqueue_extended.inl000066400000000000000000000010301422164147700334360ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/cmdqueue/cmdqueue_hw.h" namespace L0 { template ze_result_t CommandQueueHw::executeCommands(uint32_t numCommandGraphs, void *phCommands, ze_fence_handle_t hFence) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/cmdqueue/cmdqueue_hw.h000066400000000000000000000053441422164147700262450ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/scratch_space_controller.h" #include "shared/source/command_stream/submissions_aggregator.h" #include "shared/source/helpers/constants.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/residency_container.h" #include "level_zero/core/source/cmdqueue/cmdqueue_imp.h" #include "igfxfmid.h" namespace L0 { template struct CommandQueueHw : public CommandQueueImp { using CommandQueueImp::CommandQueueImp; using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; ze_result_t createFence(const ze_fence_desc_t *desc, ze_fence_handle_t *phFence) override; ze_result_t destroy() override; ze_result_t executeCommandLists(uint32_t numCommandLists, ze_command_list_handle_t *phCommandLists, ze_fence_handle_t hFence, bool performMigration) override; ze_result_t executeCommands(uint32_t numCommands, void *phCommands, ze_fence_handle_t hFence) override; void dispatchTaskCountPostSync(NEO::LinearStream &commandStream, const NEO::HardwareInfo &hwInfo); bool isDispatchTaskCountPostSyncRequired(ze_fence_handle_t hFence) const; void programStateBaseAddress(uint64_t gsba, bool useLocalMemoryForIndirectHeap, NEO::LinearStream &commandStream, bool cachedMOCSAllowed); size_t estimateStateBaseAddressCmdSize(); MOCKABLE_VIRTUAL void programFrontEnd(uint64_t scratchAddress, uint32_t perThreadScratchSpaceSize, NEO::LinearStream &commandStream); MOCKABLE_VIRTUAL size_t estimateFrontEndCmdSizeForMultipleCommandLists(bool isFrontEndStateDirty, uint32_t numCommandLists, ze_command_list_handle_t *phCommandLists); size_t estimateFrontEndCmdSize(); size_t estimatePipelineSelect(); void programPipelineSelect(NEO::LinearStream &commandStream); MOCKABLE_VIRTUAL void handleScratchSpace(NEO::HeapContainer &heapContainer, NEO::ScratchSpaceController *scratchController, bool &gsbaState, bool &frontEndState, uint32_t perThreadScratchSpaceSize, uint32_t perThreadPrivateScratchSize); bool getPreemptionCmdProgramming() override; void patchCommands(CommandList &commandList, uint64_t scratchAddress); }; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/cmdqueue/cmdqueue_hw.inl000066400000000000000000000644201422164147700266000ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "shared/source/built_ins/sip.h" #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_container/implicit_scaling.h" #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/command_stream/submission_status.h" #include "shared/source/command_stream/thread_arbitration_policy.h" #include "shared/source/device/device.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/pipe_control_args.h" #include "shared/source/helpers/preamble.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/residency_container.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/os_interface/os_context.h" #include "shared/source/page_fault_manager/cpu_page_fault_manager.h" #include "shared/source/unified_memory/unified_memory.h" #include "shared/source/utilities/software_tags_manager.h" #include "level_zero/core/source/cmdlist/cmdlist.h" #include "level_zero/core/source/cmdlist/cmdlist_hw.h" #include "level_zero/core/source/cmdqueue/cmdqueue_hw.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/driver/driver_handle_imp.h" #include "level_zero/core/source/fence/fence.h" #include "level_zero/tools/source/metrics/metric.h" #include #include namespace L0 { template ze_result_t CommandQueueHw::createFence(const ze_fence_desc_t *desc, ze_fence_handle_t *phFence) { *phFence = Fence::create(this, desc); return ZE_RESULT_SUCCESS; } template ze_result_t CommandQueueHw::destroy() { if (commandStream) { delete commandStream; commandStream = nullptr; } buffers.destroy(this->getDevice()); if (NEO::Debugger::isDebugEnabled(internalUsage) && device->getL0Debugger()) { device->getL0Debugger()->notifyCommandQueueDestroyed(); } delete this; return ZE_RESULT_SUCCESS; } template ze_result_t CommandQueueHw::executeCommandLists( uint32_t numCommandLists, ze_command_list_handle_t *phCommandLists, ze_fence_handle_t hFence, bool performMigration) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START; using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END; using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; using MI_LOAD_REGISTER_MEM = typename GfxFamily::MI_LOAD_REGISTER_MEM; using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM; auto lockCSR = csr->obtainUniqueOwnership(); auto anyCommandListWithCooperativeKernels = false; auto anyCommandListWithoutCooperativeKernels = false; bool anyCommandListRequiresDisabledEUFusion = false; bool cachedMOCSAllowed = true; for (auto i = 0u; i < numCommandLists; i++) { auto commandList = CommandList::fromHandle(phCommandLists[i]); if (peekIsCopyOnlyCommandQueue() != commandList->isCopyOnly()) { return ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE; } if (this->activeSubDevices < commandList->partitionCount) { return ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE; } if (commandList->containsCooperativeKernels()) { anyCommandListWithCooperativeKernels = true; } else { anyCommandListWithoutCooperativeKernels = true; } if (commandList->getRequiredStreamState().frontEndState.disableEUFusion.value == 1) { anyCommandListRequiresDisabledEUFusion = true; } // If the Command List has commands that require uncached MOCS, then any changes to the commands in the queue requires the uncached MOCS if (commandList->requiresQueueUncachedMocs && cachedMOCSAllowed == true) { cachedMOCSAllowed = false; } } bool isMixingRegularAndCooperativeKernelsAllowed = NEO::DebugManager.flags.AllowMixingRegularAndCooperativeKernels.get(); if (anyCommandListWithCooperativeKernels && anyCommandListWithoutCooperativeKernels && (!isMixingRegularAndCooperativeKernelsAllowed)) { return ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE; } size_t spaceForResidency = 0; size_t preemptionSize = 0u; size_t debuggerCmdsSize = 0; constexpr size_t residencyContainerSpaceForPreemption = 2; constexpr size_t residencyContainerSpaceForTagWrite = 1; NEO::Device *neoDevice = device->getNEODevice(); auto devicePreemption = device->getDevicePreemptionMode(); const bool initialPreemptionMode = commandQueuePreemptionMode == NEO::PreemptionMode::Initial; NEO::PreemptionMode cmdQueuePreemption = commandQueuePreemptionMode; if (initialPreemptionMode) { cmdQueuePreemption = devicePreemption; } NEO::PreemptionMode statePreemption = cmdQueuePreemption; const bool stateSipRequired = (initialPreemptionMode && devicePreemption == NEO::PreemptionMode::MidThread) || (neoDevice->getDebugger() && NEO::Debugger::isDebugEnabled(internalUsage)); if (initialPreemptionMode) { preemptionSize += NEO::PreemptionHelper::getRequiredPreambleSize(*neoDevice); } if (stateSipRequired) { preemptionSize += NEO::PreemptionHelper::getRequiredStateSipCmdSize(*neoDevice, csr->isRcs()); } preemptionSize += NEO::PreemptionHelper::getRequiredCmdStreamSize(devicePreemption, commandQueuePreemptionMode); if (NEO::Debugger::isDebugEnabled(internalUsage) && !commandQueueDebugCmdsProgrammed) { if (neoDevice->getSourceLevelDebugger() != nullptr) { debuggerCmdsSize += NEO::PreambleHelper::getKernelDebuggingCommandsSize(true); } else if (device->getL0Debugger()) { debuggerCmdsSize += device->getL0Debugger()->getSbaAddressLoadCommandsSize(); } } if (devicePreemption == NEO::PreemptionMode::MidThread) { spaceForResidency += residencyContainerSpaceForPreemption; } bool directSubmissionEnabled = isCopyOnlyCommandQueue ? csr->isBlitterDirectSubmissionEnabled() : csr->isDirectSubmissionEnabled(); bool programActivePartitionConfig = csr->isProgramActivePartitionConfigRequired(); L0::Fence *fence = nullptr; device->activateMetricGroups(); size_t totalCmdBuffers = 0; uint32_t perThreadScratchSpaceSize = 0; uint32_t perThreadPrivateScratchSize = 0; NEO::PageFaultManager *pageFaultManager = nullptr; if (performMigration) { pageFaultManager = device->getDriverHandle()->getMemoryManager()->getPageFaultManager(); if (pageFaultManager == nullptr) { performMigration = false; } } for (auto i = 0u; i < numCommandLists; i++) { auto commandList = CommandList::fromHandle(phCommandLists[i]); commandList->csr = csr; commandList->handleIndirectAllocationResidency(); totalCmdBuffers += commandList->commandContainer.getCmdBufferAllocations().size(); spaceForResidency += commandList->commandContainer.getResidencyContainer().size(); auto commandListPreemption = commandList->getCommandListPreemptionMode(); if (statePreemption != commandListPreemption) { if (preemptionCmdSyncProgramming) { preemptionSize += NEO::MemorySynchronizationCommands::getSizeForSinglePipeControl(); } preemptionSize += NEO::PreemptionHelper::getRequiredCmdStreamSize(commandListPreemption, statePreemption); statePreemption = commandListPreemption; } perThreadScratchSpaceSize = std::max(perThreadScratchSpaceSize, commandList->getCommandListPerThreadScratchSize()); perThreadPrivateScratchSize = std::max(perThreadPrivateScratchSize, commandList->getCommandListPerThreadPrivateScratchSize()); if (commandList->getCommandListPerThreadScratchSize() != 0 || commandList->getCommandListPerThreadPrivateScratchSize() != 0) { if (commandList->commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE) != nullptr) { heapContainer.push_back(commandList->commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE)->getGraphicsAllocation()); } for (auto element : commandList->commandContainer.sshAllocations) { heapContainer.push_back(element); } } partitionCount = std::max(partitionCount, commandList->partitionCount); commandList->makeResidentAndMigrate(performMigration); } size_t linearStreamSizeEstimate = totalCmdBuffers * sizeof(MI_BATCH_BUFFER_START); linearStreamSizeEstimate += csr->getCmdsSizeForHardwareContext(); if (directSubmissionEnabled) { linearStreamSizeEstimate += sizeof(MI_BATCH_BUFFER_START); } else { linearStreamSizeEstimate += sizeof(MI_BATCH_BUFFER_END); } auto csrHw = reinterpret_cast *>(csr); if (programActivePartitionConfig) { linearStreamSizeEstimate += csrHw->getCmdSizeForActivePartitionConfig(); } const auto &hwInfo = this->device->getHwInfo(); spaceForResidency += residencyContainerSpaceForTagWrite; csr->getResidencyAllocations().reserve(spaceForResidency); auto scratchSpaceController = csr->getScratchSpaceController(); bool gsbaStateDirty = false; bool frontEndStateDirty = false; handleScratchSpace(heapContainer, scratchSpaceController, gsbaStateDirty, frontEndStateDirty, perThreadScratchSpaceSize, perThreadPrivateScratchSize); auto &streamProperties = csr->getStreamProperties(); const auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily); auto disableOverdispatch = hwInfoConfig.isDisableOverdispatchAvailable(hwInfo); auto isEngineInstanced = csr->getOsContext().isEngineInstanced(); bool isPatchingVfeStateAllowed = NEO::DebugManager.flags.AllowPatchingVfeStateInCommandLists.get(); if (!isPatchingVfeStateAllowed) { streamProperties.frontEndState.setProperties(anyCommandListWithCooperativeKernels, anyCommandListRequiresDisabledEUFusion, disableOverdispatch, isEngineInstanced, hwInfo); } else { streamProperties.frontEndState.singleSliceDispatchCcsMode.set(isEngineInstanced); } frontEndStateDirty |= streamProperties.frontEndState.isDirty(); gsbaStateDirty |= csr->getGSBAStateDirty(); frontEndStateDirty |= csr->getMediaVFEStateDirty(); if (!isCopyOnlyCommandQueue) { if (!gpgpuEnabled) { linearStreamSizeEstimate += estimatePipelineSelect(); } linearStreamSizeEstimate += estimateFrontEndCmdSizeForMultipleCommandLists(frontEndStateDirty, numCommandLists, phCommandLists); if (gsbaStateDirty) { linearStreamSizeEstimate += estimateStateBaseAddressCmdSize(); } linearStreamSizeEstimate += preemptionSize + debuggerCmdsSize; } if (NEO::DebugManager.flags.EnableSWTags.get()) { linearStreamSizeEstimate += NEO::SWTagsManager::estimateSpaceForSWTags(); } bool dispatchPostSync = isDispatchTaskCountPostSyncRequired(hFence); if (dispatchPostSync) { linearStreamSizeEstimate += isCopyOnlyCommandQueue ? NEO::EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite() : NEO::MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo); } size_t alignedSize = alignUp(linearStreamSizeEstimate, minCmdBufferPtrAlign); size_t padding = alignedSize - linearStreamSizeEstimate; reserveLinearStreamSize(alignedSize); NEO::LinearStream child(commandStream->getSpace(alignedSize), alignedSize); child.setGpuBase(ptrOffset(commandStream->getGpuBase(), commandStream->getUsed() - alignedSize)); const auto globalFenceAllocation = csr->getGlobalFenceAllocation(); if (globalFenceAllocation) { csr->makeResident(*globalFenceAllocation); } const auto workPartitionAllocation = csr->getWorkPartitionAllocation(); if (workPartitionAllocation) { csr->makeResident(*workPartitionAllocation); } if (NEO::DebugManager.flags.EnableSWTags.get()) { NEO::SWTagsManager *tagsManager = neoDevice->getRootDeviceEnvironment().tagsManager.get(); UNRECOVERABLE_IF(tagsManager == nullptr); csr->makeResident(*tagsManager->getBXMLHeapAllocation()); csr->makeResident(*tagsManager->getSWTagHeapAllocation()); tagsManager->insertBXMLHeapAddress(child); tagsManager->insertSWTagHeapAddress(child); } csr->programHardwareContext(child); if (NEO::Debugger::isDebugEnabled(internalUsage) && device->getL0Debugger()) { csr->makeResident(*device->getL0Debugger()->getSbaTrackingBuffer(csr->getOsContext().getContextId())); } if (!isCopyOnlyCommandQueue) { if (!gpgpuEnabled) { programPipelineSelect(child); } if (NEO::Debugger::isDebugEnabled(internalUsage) && !commandQueueDebugCmdsProgrammed) { if (neoDevice->getSourceLevelDebugger()) { NEO::PreambleHelper::programKernelDebugging(&child); commandQueueDebugCmdsProgrammed = true; } else if (device->getL0Debugger()) { device->getL0Debugger()->programSbaAddressLoad(child, device->getL0Debugger()->getSbaTrackingBuffer(csr->getOsContext().getContextId())->getGpuAddress()); commandQueueDebugCmdsProgrammed = true; } } if (gsbaStateDirty) { auto indirectHeap = CommandList::fromHandle(phCommandLists[0])->commandContainer.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT); programStateBaseAddress(scratchSpaceController->calculateNewGSH(), indirectHeap->getGraphicsAllocation()->isAllocatedInLocalMemoryPool(), child, cachedMOCSAllowed); } if (initialPreemptionMode) { NEO::PreemptionHelper::programCsrBaseAddress(child, *neoDevice, csr->getPreemptionAllocation()); } if (stateSipRequired) { NEO::PreemptionHelper::programStateSip(child, *neoDevice); } if (cmdQueuePreemption != commandQueuePreemptionMode) { NEO::PreemptionHelper::programCmdStream(child, cmdQueuePreemption, commandQueuePreemptionMode, csr->getPreemptionAllocation()); } statePreemption = cmdQueuePreemption; const bool sipKernelUsed = devicePreemption == NEO::PreemptionMode::MidThread || (neoDevice->getDebugger() != nullptr && NEO::Debugger::isDebugEnabled(internalUsage)); if (devicePreemption == NEO::PreemptionMode::MidThread) { csr->makeResident(*csr->getPreemptionAllocation()); } if (sipKernelUsed) { auto sipIsa = NEO::SipKernel::getSipKernel(*neoDevice).getSipAllocation(); csr->makeResident(*sipIsa); } if (NEO::Debugger::isDebugEnabled(internalUsage) && neoDevice->getDebugger()) { UNRECOVERABLE_IF(device->getDebugSurface() == nullptr); csr->makeResident(*device->getDebugSurface()); } } if (programActivePartitionConfig) { csrHw->programActivePartitionConfig(child); } for (auto i = 0u; i < numCommandLists; ++i) { auto commandList = CommandList::fromHandle(phCommandLists[i]); auto &cmdBufferAllocations = commandList->commandContainer.getCmdBufferAllocations(); auto cmdBufferCount = cmdBufferAllocations.size(); auto commandListPreemption = commandList->getCommandListPreemptionMode(); if (statePreemption != commandListPreemption) { if (NEO::DebugManager.flags.EnableSWTags.get()) { neoDevice->getRootDeviceEnvironment().tagsManager->insertTag( child, *neoDevice, "ComandList Preemption Mode update", 0u); } if (preemptionCmdSyncProgramming) { NEO::PipeControlArgs args; NEO::MemorySynchronizationCommands::addPipeControl(child, args); } NEO::PreemptionHelper::programCmdStream(child, commandListPreemption, statePreemption, csr->getPreemptionAllocation()); statePreemption = commandListPreemption; } if (!isCopyOnlyCommandQueue) { bool programVfe = frontEndStateDirty; if (isPatchingVfeStateAllowed) { auto &requiredStreamState = commandList->getRequiredStreamState(); streamProperties.frontEndState.setProperties(requiredStreamState.frontEndState); programVfe |= streamProperties.frontEndState.isDirty(); } if (programVfe) { programFrontEnd(scratchSpaceController->getScratchPatchAddress(), scratchSpaceController->getPerThreadScratchSpaceSize(), child); frontEndStateDirty = false; } if (isPatchingVfeStateAllowed) { auto &finalStreamState = commandList->getFinalStreamState(); streamProperties.frontEndState.setProperties(finalStreamState.frontEndState); } } patchCommands(*commandList, scratchSpaceController->getScratchPatchAddress()); for (size_t iter = 0; iter < cmdBufferCount; iter++) { auto allocation = cmdBufferAllocations[iter]; NEO::EncodeBatchBufferStartOrEnd::programBatchBufferStart(&child, allocation->getGpuAddress(), true); } printfFunctionContainer.insert(printfFunctionContainer.end(), commandList->getPrintfFunctionContainer().begin(), commandList->getPrintfFunctionContainer().end()); } if (performMigration) { auto commandList = CommandList::fromHandle(phCommandLists[0]); commandList->migrateSharedAllocations(); } if (stateSipRequired) { NEO::PreemptionHelper::programStateSipEndWa(child, *neoDevice); } commandQueuePreemptionMode = statePreemption; if (hFence) { fence = Fence::fromHandle(hFence); fence->assignTaskCountFromCsr(); } if (dispatchPostSync) { dispatchTaskCountPostSync(child, hwInfo); } csr->makeResident(*csr->getTagAllocation()); void *endingCmd = nullptr; if (directSubmissionEnabled) { auto offset = ptrDiff(child.getCpuBase(), commandStream->getCpuBase()) + child.getUsed(); uint64_t startAddress = commandStream->getGraphicsAllocation()->getGpuAddress() + offset; if (NEO::DebugManager.flags.BatchBufferStartPrepatchingWaEnabled.get() == 0) { startAddress = 0; } endingCmd = child.getSpace(0); NEO::EncodeBatchBufferStartOrEnd::programBatchBufferStart(&child, startAddress, false); } else { MI_BATCH_BUFFER_END cmd = GfxFamily::cmdInitBatchBufferEnd; auto buffer = child.getSpaceForCmd(); *(MI_BATCH_BUFFER_END *)buffer = cmd; } if (padding) { void *paddingPtr = child.getSpace(padding); memset(paddingPtr, 0, padding); } auto ret = submitBatchBuffer(ptrDiff(child.getCpuBase(), commandStream->getCpuBase()), csr->getResidencyAllocations(), endingCmd, anyCommandListWithCooperativeKernels); this->taskCount = csr->peekTaskCount(); csr->makeSurfacePackNonResident(csr->getResidencyAllocations()); if (getSynchronousMode() == ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS) { const auto synchronizeResult = this->synchronize(std::numeric_limits::max()); if (synchronizeResult == ZE_RESULT_ERROR_DEVICE_LOST) { return ZE_RESULT_ERROR_DEVICE_LOST; } } this->heapContainer.clear(); csr->pollForCompletion(); if (ret != NEO::SubmissionStatus::SUCCESS) { if (ret == NEO::SubmissionStatus::OUT_OF_MEMORY) { return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY; } return ZE_RESULT_ERROR_UNKNOWN; } return ZE_RESULT_SUCCESS; } template void CommandQueueHw::programFrontEnd(uint64_t scratchAddress, uint32_t perThreadScratchSpaceSize, NEO::LinearStream &commandStream) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; UNRECOVERABLE_IF(csr == nullptr); auto &hwInfo = device->getHwInfo(); auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily); auto engineGroupType = hwHelper.getEngineGroupType(csr->getOsContext().getEngineType(), csr->getOsContext().getEngineUsage(), hwInfo); auto pVfeState = NEO::PreambleHelper::getSpaceForVfeState(&commandStream, hwInfo, engineGroupType); NEO::PreambleHelper::programVfeState(pVfeState, hwInfo, perThreadScratchSpaceSize, scratchAddress, device->getMaxNumHwThreads(), csr->getStreamProperties()); csr->setMediaVFEStateDirty(false); } template size_t CommandQueueHw::estimateFrontEndCmdSize() { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; return NEO::PreambleHelper::getVFECommandsSize(); } template size_t CommandQueueHw::estimateFrontEndCmdSizeForMultipleCommandLists( bool isFrontEndStateDirty, uint32_t numCommandLists, ze_command_list_handle_t *phCommandLists) { auto singleFrontEndCmdSize = estimateFrontEndCmdSize(); bool isPatchingVfeStateAllowed = NEO::DebugManager.flags.AllowPatchingVfeStateInCommandLists.get(); if (!isPatchingVfeStateAllowed) { return isFrontEndStateDirty * singleFrontEndCmdSize; } auto streamPropertiesCopy = csr->getStreamProperties(); size_t estimatedSize = 0; for (size_t i = 0; i < numCommandLists; i++) { auto commandList = CommandList::fromHandle(phCommandLists[i]); auto &requiredStreamState = commandList->getRequiredStreamState(); streamPropertiesCopy.frontEndState.setProperties(requiredStreamState.frontEndState); if (isFrontEndStateDirty || streamPropertiesCopy.frontEndState.isDirty()) { estimatedSize += singleFrontEndCmdSize; isFrontEndStateDirty = false; } auto &finalStreamState = commandList->getFinalStreamState(); streamPropertiesCopy.frontEndState.setProperties(finalStreamState.frontEndState); } return estimatedSize; } template size_t CommandQueueHw::estimatePipelineSelect() { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; return NEO::PreambleHelper::getCmdSizeForPipelineSelect(device->getHwInfo()); } template void CommandQueueHw::programPipelineSelect(NEO::LinearStream &commandStream) { NEO::PipelineSelectArgs args = {0, 0}; using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; NEO::PreambleHelper::programPipelineSelect(&commandStream, args, device->getHwInfo()); gpgpuEnabled = true; } template bool CommandQueueHw::isDispatchTaskCountPostSyncRequired(ze_fence_handle_t hFence) const { return !csr->isUpdateTagFromWaitEnabled() || hFence != nullptr; } template void CommandQueueHw::dispatchTaskCountPostSync(NEO::LinearStream &commandStream, const NEO::HardwareInfo &hwInfo) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; uint64_t postSyncAddress = csr->getTagAllocation()->getGpuAddress(); uint32_t postSyncData = csr->peekTaskCount() + 1; if (isCopyOnlyCommandQueue) { NEO::MiFlushArgs args; args.commandWithPostSync = true; args.notifyEnable = csr->isUsedNotifyEnableForPostSync(); NEO::EncodeMiFlushDW::programMiFlushDw(commandStream, postSyncAddress, postSyncData, args, hwInfo); } else { NEO::PipeControlArgs args; args.dcFlushEnable = NEO::MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo); args.workloadPartitionOffset = partitionCount > 1; args.notifyEnable = csr->isUsedNotifyEnableForPostSync(); NEO::MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( commandStream, POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, postSyncAddress, postSyncData, hwInfo, args); } } template bool CommandQueueHw::getPreemptionCmdProgramming() { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; return NEO::PreemptionHelper::getRequiredCmdStreamSize(NEO::PreemptionMode::MidThread, NEO::PreemptionMode::Initial) > 0u; } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/cmdqueue/cmdqueue_hw_base.inl000066400000000000000000000150041422164147700275640ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_stream/csr_definitions.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/device/device.h" #include "shared/source/helpers/api_specific_config.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/interlocked_max.h" #include "shared/source/helpers/preamble.h" #include "shared/source/helpers/state_base_address.h" #include "shared/source/os_interface/os_context.h" #include "level_zero/core/source/cmdlist/cmdlist.h" #include "level_zero/core/source/cmdqueue/cmdqueue_hw.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/fence/fence.h" #include "level_zero/tools/source/metrics/metric.h" #include namespace L0 { template void CommandQueueHw::programStateBaseAddress(uint64_t gsba, bool useLocalMemoryForIndirectHeap, NEO::LinearStream &commandStream, bool cachedMOCSAllowed) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS; const auto &hwInfo = this->device->getHwInfo(); NEO::Device *neoDevice = device->getNEODevice(); bool isRcs = this->getCsr()->isRcs(); NEO::EncodeWA::addPipeControlBeforeStateBaseAddress(commandStream, hwInfo, isRcs); NEO::EncodeWA::encodeAdditionalPipelineSelect(commandStream, {}, true, hwInfo, isRcs); auto pSbaCmd = static_cast(commandStream.getSpace(sizeof(STATE_BASE_ADDRESS))); STATE_BASE_ADDRESS sbaCmd; bool useGlobalSshAndDsh = NEO::ApiSpecificConfig::getBindlessConfiguration(); uint64_t globalHeapsBase = 0; if (useGlobalSshAndDsh) { globalHeapsBase = neoDevice->getBindlessHeapsHelper()->getGlobalHeapsBase(); } auto indirectObjectHeapBaseAddress = neoDevice->getMemoryManager()->getInternalHeapBaseAddress(device->getRootDeviceIndex(), useLocalMemoryForIndirectHeap); auto instructionHeapBaseAddress = neoDevice->getMemoryManager()->getInternalHeapBaseAddress(device->getRootDeviceIndex(), neoDevice->getMemoryManager()->isLocalMemoryUsedForIsa(neoDevice->getRootDeviceIndex())); NEO::StateBaseAddressHelper::programStateBaseAddress(&sbaCmd, nullptr, nullptr, nullptr, gsba, true, (device->getMOCS(cachedMOCSAllowed, false) >> 1), indirectObjectHeapBaseAddress, instructionHeapBaseAddress, globalHeapsBase, true, useGlobalSshAndDsh, neoDevice->getGmmHelper(), false, NEO::MemoryCompressionState::NotApplicable, false, 1u); *pSbaCmd = sbaCmd; csr->setGSBAStateDirty(false); if (NEO::Debugger::isDebugEnabled(internalUsage) && device->getL0Debugger()) { NEO::Debugger::SbaAddresses sbaAddresses = {}; sbaAddresses.BindlessSurfaceStateBaseAddress = sbaCmd.getBindlessSurfaceStateBaseAddress(); sbaAddresses.DynamicStateBaseAddress = sbaCmd.getDynamicStateBaseAddress(); sbaAddresses.GeneralStateBaseAddress = sbaCmd.getGeneralStateBaseAddress(); sbaAddresses.IndirectObjectBaseAddress = sbaCmd.getIndirectObjectBaseAddress(); sbaAddresses.InstructionBaseAddress = sbaCmd.getInstructionBaseAddress(); sbaAddresses.SurfaceStateBaseAddress = sbaCmd.getSurfaceStateBaseAddress(); device->getL0Debugger()->programSbaTrackingCommands(commandStream, sbaAddresses); } NEO::EncodeWA::encodeAdditionalPipelineSelect(commandStream, {}, false, hwInfo, isRcs); } template size_t CommandQueueHw::estimateStateBaseAddressCmdSize() { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS; using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; size_t size = sizeof(STATE_BASE_ADDRESS) + sizeof(PIPE_CONTROL) + NEO::EncodeWA::getAdditionalPipelineSelectSize(*device->getNEODevice()); if (NEO::Debugger::isDebugEnabled(internalUsage) && device->getL0Debugger() != nullptr) { const size_t trackedAddressesCount = 6; size += device->getL0Debugger()->getSbaTrackingCommandsSize(trackedAddressesCount); } return size; } template void CommandQueueHw::handleScratchSpace(NEO::HeapContainer &heapContainer, NEO::ScratchSpaceController *scratchController, bool &gsbaState, bool &frontEndState, uint32_t perThreadScratchSpaceSize, uint32_t perThreadPrivateScratchSize) { if (perThreadScratchSpaceSize > 0) { scratchController->setRequiredScratchSpace(nullptr, 0u, perThreadScratchSpaceSize, 0u, csr->peekTaskCount(), csr->getOsContext(), gsbaState, frontEndState); auto scratchAllocation = scratchController->getScratchSpaceAllocation(); csr->makeResident(*scratchAllocation); } } template void CommandQueueHw::patchCommands(CommandList &commandList, uint64_t scratchAddress) { auto &commandsToPatch = commandList.getCommandsToPatch(); UNRECOVERABLE_IF(!commandsToPatch.empty()); } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/cmdqueue/cmdqueue_imp.h000066400000000000000000000064221422164147700264120ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/csr_definitions.h" #include "shared/source/command_stream/submission_status.h" #include "shared/source/command_stream/submissions_aggregator.h" #include "shared/source/helpers/constants.h" #include "shared/source/indirect_heap/indirect_heap.h" #include "level_zero/core/source/cmdqueue/cmdqueue.h" #include namespace NEO { class LinearStream; class GraphicsAllocation; class MemoryManager; } // namespace NEO namespace L0 { struct CommandList; struct Kernel; struct CommandQueueImp : public CommandQueue { class CommandBufferManager { public: enum BUFFER_ALLOCATION : uint32_t { FIRST = 0, SECOND, COUNT }; ze_result_t initialize(Device *device, size_t sizeRequested); void destroy(Device *device); void switchBuffers(NEO::CommandStreamReceiver *csr); NEO::GraphicsAllocation *getCurrentBufferAllocation() { return buffers[bufferUse]; } void setCurrentFlushStamp(uint32_t taskCount, NEO::FlushStamp flushStamp) { flushId[bufferUse] = std::make_pair(taskCount, flushStamp); } std::pair &getCurrentFlushStamp() { return flushId[bufferUse]; } private: NEO::GraphicsAllocation *buffers[BUFFER_ALLOCATION::COUNT]; std::pair flushId[BUFFER_ALLOCATION::COUNT]; BUFFER_ALLOCATION bufferUse = BUFFER_ALLOCATION::FIRST; }; static constexpr size_t defaultQueueCmdBufferSize = 128 * MemoryConstants::kiloByte; static constexpr size_t minCmdBufferPtrAlign = 8; static constexpr size_t totalCmdBufferSize = defaultQueueCmdBufferSize + MemoryConstants::cacheLineSize + NEO::CSRequirements::csOverfetchSize; CommandQueueImp() = delete; CommandQueueImp(Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc); ze_result_t destroy() override; ze_result_t synchronize(uint64_t timeout) override; ze_result_t initialize(bool copyOnly, bool isInternal); Device *getDevice() { return device; } uint32_t getTaskCount() { return taskCount; } NEO::CommandStreamReceiver *getCsr() { return csr; } void reserveLinearStreamSize(size_t size); ze_command_queue_mode_t getSynchronousMode() const; virtual bool getPreemptionCmdProgramming() = 0; protected: MOCKABLE_VIRTUAL NEO::SubmissionStatus submitBatchBuffer(size_t offset, NEO::ResidencyContainer &residencyContainer, void *endingCmdPtr, bool isCooperative); ze_result_t synchronizeByPollingForTaskCount(uint64_t timeout); void printFunctionsPrintfOutput(); void postSyncOperations(); CommandBufferManager buffers; NEO::HeapContainer heapContainer; ze_command_queue_desc_t desc; std::vector printfFunctionContainer; Device *device = nullptr; NEO::CommandStreamReceiver *csr = nullptr; NEO::LinearStream *commandStream = nullptr; std::atomic taskCount{0}; bool gpgpuEnabled = false; bool useKmdWaitFunction = false; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_and_later.inl000066400000000000000000000211371422164147700323040ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/implicit_scaling.h" #include "shared/source/command_stream/csr_definitions.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/api_specific_config.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/pipe_control_args.h" #include "shared/source/helpers/state_base_address.h" #include "shared/source/os_interface/hw_info_config.h" #include "level_zero/core/source/cmdqueue/cmdqueue_hw.h" #include "igfxfmid.h" namespace L0 { template void CommandQueueHw::programStateBaseAddress(uint64_t gsba, bool useLocalMemoryForIndirectHeap, NEO::LinearStream &commandStream, bool cachedMOCSAllowed) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS; if (NEO::ApiSpecificConfig::getBindlessConfiguration()) { NEO::Device *neoDevice = device->getNEODevice(); auto globalHeapsBase = neoDevice->getBindlessHeapsHelper()->getGlobalHeapsBase(); auto &hwInfo = neoDevice->getHardwareInfo(); bool isRcs = this->getCsr()->isRcs(); NEO::EncodeWA::addPipeControlBeforeStateBaseAddress(commandStream, hwInfo, isRcs); auto pSbaCmd = static_cast(commandStream.getSpace(sizeof(STATE_BASE_ADDRESS))); STATE_BASE_ADDRESS sbaCmd; bool multiOsContextCapable = device->isImplicitScalingCapable(); NEO::StateBaseAddressHelper::programStateBaseAddress(&sbaCmd, nullptr, nullptr, nullptr, 0, true, (device->getMOCS(cachedMOCSAllowed, false) >> 1), neoDevice->getMemoryManager()->getInternalHeapBaseAddress(neoDevice->getRootDeviceIndex(), useLocalMemoryForIndirectHeap), neoDevice->getMemoryManager()->getInternalHeapBaseAddress(neoDevice->getRootDeviceIndex(), neoDevice->getMemoryManager()->isLocalMemoryUsedForIsa(neoDevice->getRootDeviceIndex())), globalHeapsBase, true, true, neoDevice->getGmmHelper(), multiOsContextCapable, NEO::MemoryCompressionState::NotApplicable, false, 1u); *pSbaCmd = sbaCmd; auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily); if (hwInfoConfig.isAdditionalStateBaseAddressWARequired(hwInfo)) { pSbaCmd = static_cast(commandStream.getSpace(sizeof(STATE_BASE_ADDRESS))); *pSbaCmd = sbaCmd; } if (NEO::Debugger::isDebugEnabled(internalUsage) && device->getL0Debugger()) { NEO::Debugger::SbaAddresses sbaAddresses = {}; sbaAddresses.BindlessSurfaceStateBaseAddress = sbaCmd.getBindlessSurfaceStateBaseAddress(); sbaAddresses.DynamicStateBaseAddress = sbaCmd.getDynamicStateBaseAddress(); sbaAddresses.GeneralStateBaseAddress = sbaCmd.getGeneralStateBaseAddress(); sbaAddresses.InstructionBaseAddress = sbaCmd.getInstructionBaseAddress(); sbaAddresses.SurfaceStateBaseAddress = sbaCmd.getSurfaceStateBaseAddress(); device->getL0Debugger()->programSbaTrackingCommands(commandStream, sbaAddresses); } auto heap = neoDevice->getBindlessHeapsHelper()->getHeap(NEO::BindlessHeapsHelper::GLOBAL_SSH); auto cmd = GfxFamily::cmdInitStateBindingTablePoolAlloc; cmd.setBindingTablePoolBaseAddress(heap->getHeapGpuBase()); cmd.setBindingTablePoolBufferSize(heap->getHeapSizeInPages()); cmd.setSurfaceObjectControlStateIndexToMocsTables(neoDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_STATE_HEAP_BUFFER)); auto buffer = commandStream.getSpace(sizeof(cmd)); *(typename GfxFamily::_3DSTATE_BINDING_TABLE_POOL_ALLOC *)buffer = cmd; } csr->setGSBAStateDirty(false); } template size_t CommandQueueHw::estimateStateBaseAddressCmdSize() { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS; using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; using _3DSTATE_BINDING_TABLE_POOL_ALLOC = typename GfxFamily::_3DSTATE_BINDING_TABLE_POOL_ALLOC; NEO::Device *neoDevice = device->getNEODevice(); auto &hwInfo = neoDevice->getHardwareInfo(); auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily); size_t size = 0; if (NEO::ApiSpecificConfig::getBindlessConfiguration()) { size += sizeof(STATE_BASE_ADDRESS) + sizeof(PIPE_CONTROL) + sizeof(_3DSTATE_BINDING_TABLE_POOL_ALLOC); if (hwInfoConfig.isAdditionalStateBaseAddressWARequired(hwInfo)) { size += sizeof(STATE_BASE_ADDRESS); } } return size; } constexpr uint32_t maxPtssIndex = 15u; template void CommandQueueHw::handleScratchSpace(NEO::HeapContainer &sshHeaps, NEO::ScratchSpaceController *scratchController, bool &gsbaState, bool &frontEndState, uint32_t perThreadScratchSpaceSize, uint32_t perThreadPrivateScratchSize) { if (perThreadScratchSpaceSize > 0 || perThreadPrivateScratchSize > 0) { if (sshHeaps.size() > 0) { uint32_t offsetIndex = maxPtssIndex * csr->getOsContext().getEngineType() + 1u; scratchController->programHeaps(sshHeaps, offsetIndex, perThreadScratchSpaceSize, perThreadPrivateScratchSize, csr->peekTaskCount(), csr->getOsContext(), gsbaState, frontEndState); } if (NEO::ApiSpecificConfig::getBindlessConfiguration()) { scratchController->programBindlessSurfaceStateForScratch(device->getNEODevice()->getBindlessHeapsHelper(), perThreadScratchSpaceSize, perThreadPrivateScratchSize, csr->peekTaskCount(), csr->getOsContext(), gsbaState, frontEndState, csr); } auto scratchAllocation = scratchController->getScratchSpaceAllocation(); if (scratchAllocation != nullptr) { csr->makeResident(*scratchAllocation); } auto privateScratchAllocation = scratchController->getPrivateScratchSpaceAllocation(); if (privateScratchAllocation != nullptr) { csr->makeResident(*privateScratchAllocation); } } } template void CommandQueueHw::patchCommands(CommandList &commandList, uint64_t scratchAddress) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using CFE_STATE = typename GfxFamily::CFE_STATE; uint32_t lowScratchAddress = uint32_t(0xFFFFFFFF & scratchAddress); CFE_STATE *cfeStateCmd = nullptr; auto &commandsToPatch = commandList.getCommandsToPatch(); for (auto &commandToPatch : commandsToPatch) { switch (commandToPatch.type) { case CommandList::CommandToPatch::FrontEndState: cfeStateCmd = reinterpret_cast(commandToPatch.pCommand); cfeStateCmd->setScratchSpaceBuffer(lowScratchAddress); cfeStateCmd->setSingleSliceDispatchCcsMode(csr->getOsContext().isEngineInstanced()); *reinterpret_cast(commandToPatch.pDestination) = *cfeStateCmd; break; default: UNRECOVERABLE_IF(true); } } } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/compiler_interface/000077500000000000000000000000001422164147700256025ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/compiler_interface/CMakeLists.txt000066400000000000000000000007221422164147700303430ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_COMPILER_INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/default_cache_config.cpp ${CMAKE_CURRENT_SOURCE_DIR}/default_l0_cache_config.cpp ${CMAKE_CURRENT_SOURCE_DIR}/default_l0_cache_config.h ${CMAKE_CURRENT_SOURCE_DIR}/l0_reg_path.h ) set_property(GLOBAL PROPERTY L0_SRCS_COMPILER_INTERFACE ${L0_SRCS_COMPILER_INTERFACE}) compute-runtime-22.14.22890/level_zero/core/source/compiler_interface/default_cache_config.cpp000066400000000000000000000005701422164147700324040ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/compiler_interface/default_cache_config.h" #include "level_zero/core/source/compiler_interface/default_l0_cache_config.h" namespace NEO { CompilerCacheConfig getDefaultCompilerCacheConfig() { return L0::getDefaultL0CompilerCacheConfig(); } } // namespace NEO compute-runtime-22.14.22890/level_zero/core/source/compiler_interface/default_l0_cache_config.cpp000066400000000000000000000015441422164147700330010ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #define L0_CACHE_LOCATION "l0_c_cache" #include "level_zero/core/source/compiler_interface/default_l0_cache_config.h" #include "shared/source/utilities/debug_settings_reader.h" #include "level_zero/core/source/compiler_interface/l0_reg_path.h" #include namespace L0 { NEO::CompilerCacheConfig getDefaultL0CompilerCacheConfig() { NEO::CompilerCacheConfig ret; std::string keyName = registryPath; keyName += "l0_c_cache_dir"; std::unique_ptr settingsReader(NEO::SettingsReader::createOsReader(false, keyName)); ret.cacheDir = settingsReader->getSetting(settingsReader->appSpecificLocation(keyName), static_cast(L0_CACHE_LOCATION)); ret.cacheFileExtension = ".l0_c_cache"; return ret; } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/compiler_interface/default_l0_cache_config.h000066400000000000000000000003601422164147700324410ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/compiler_interface/compiler_cache.h" namespace L0 { NEO::CompilerCacheConfig getDefaultL0CompilerCacheConfig(); } compute-runtime-22.14.22890/level_zero/core/source/compiler_interface/l0_reg_path.h000066400000000000000000000002451422164147700301400ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once namespace L0 { extern const char *registryPath; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/context/000077500000000000000000000000001422164147700234345ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/context/context.h000066400000000000000000000177461422164147700253100ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/driver/driver_handle.h" #include #include struct _ze_context_handle_t { virtual ~_ze_context_handle_t() = default; }; namespace L0 { struct DriverHandle; struct Image; struct Context : _ze_context_handle_t { inline static ze_memory_type_t parseUSMType(InternalMemoryType memoryType) { switch (memoryType) { case InternalMemoryType::SHARED_UNIFIED_MEMORY: return ZE_MEMORY_TYPE_SHARED; case InternalMemoryType::DEVICE_UNIFIED_MEMORY: return ZE_MEMORY_TYPE_DEVICE; case InternalMemoryType::HOST_UNIFIED_MEMORY: return ZE_MEMORY_TYPE_HOST; default: return ZE_MEMORY_TYPE_UNKNOWN; } return ZE_MEMORY_TYPE_UNKNOWN; } virtual ~Context() = default; virtual ze_result_t destroy() = 0; virtual ze_result_t getStatus() = 0; virtual DriverHandle *getDriverHandle() = 0; virtual ze_result_t allocHostMem(const ze_host_mem_alloc_desc_t *hostDesc, size_t size, size_t alignment, void **ptr) = 0; virtual ze_result_t allocDeviceMem(ze_device_handle_t hDevice, const ze_device_mem_alloc_desc_t *deviceDesc, size_t size, size_t alignment, void **ptr) = 0; virtual ze_result_t allocSharedMem(ze_device_handle_t hDevice, const ze_device_mem_alloc_desc_t *deviceDesc, const ze_host_mem_alloc_desc_t *hostDesc, size_t size, size_t alignment, void **ptr) = 0; virtual ze_result_t freeMem(const void *ptr) = 0; virtual ze_result_t freeMem(const void *ptr, bool blocking) = 0; virtual ze_result_t freeMemExt(const ze_memory_free_ext_desc_t *pMemFreeDesc, void *ptr) = 0; virtual ze_result_t makeMemoryResident(ze_device_handle_t hDevice, void *ptr, size_t size) = 0; virtual ze_result_t evictMemory(ze_device_handle_t hDevice, void *ptr, size_t size) = 0; virtual ze_result_t makeImageResident(ze_device_handle_t hDevice, ze_image_handle_t hImage) = 0; virtual ze_result_t evictImage(ze_device_handle_t hDevice, ze_image_handle_t hImage) = 0; virtual ze_result_t getMemAddressRange(const void *ptr, void **pBase, size_t *pSize) = 0; virtual ze_result_t closeIpcMemHandle(const void *ptr) = 0; virtual ze_result_t getIpcMemHandle(const void *ptr, ze_ipc_mem_handle_t *pIpcHandle) = 0; virtual ze_result_t openIpcMemHandle(ze_device_handle_t hDevice, ze_ipc_mem_handle_t handle, ze_ipc_memory_flags_t flags, void **ptr) = 0; virtual ze_result_t getMemAllocProperties(const void *ptr, ze_memory_allocation_properties_t *pMemAllocProperties, ze_device_handle_t *phDevice) = 0; virtual ze_result_t getImageAllocProperties(Image *image, ze_image_allocation_ext_properties_t *pAllocProperties) = 0; virtual ze_result_t createModule(ze_device_handle_t hDevice, const ze_module_desc_t *desc, ze_module_handle_t *phModule, ze_module_build_log_handle_t *phBuildLog) = 0; virtual ze_result_t createSampler(ze_device_handle_t hDevice, const ze_sampler_desc_t *pDesc, ze_sampler_handle_t *phSampler) = 0; virtual ze_result_t createCommandQueue(ze_device_handle_t hDevice, const ze_command_queue_desc_t *desc, ze_command_queue_handle_t *commandQueue) = 0; virtual ze_result_t createCommandList(ze_device_handle_t hDevice, const ze_command_list_desc_t *desc, ze_command_list_handle_t *commandList) = 0; virtual ze_result_t createCommandListImmediate(ze_device_handle_t hDevice, const ze_command_queue_desc_t *desc, ze_command_list_handle_t *commandList) = 0; virtual ze_result_t activateMetricGroups(zet_device_handle_t hDevice, uint32_t count, zet_metric_group_handle_t *phMetricGroups) = 0; virtual ze_result_t reserveVirtualMem(const void *pStart, size_t size, void **pptr) = 0; virtual ze_result_t freeVirtualMem(const void *ptr, size_t size) = 0; virtual ze_result_t queryVirtualMemPageSize(ze_device_handle_t hDevice, size_t size, size_t *pagesize) = 0; virtual ze_result_t createPhysicalMem(ze_device_handle_t hDevice, ze_physical_mem_desc_t *desc, ze_physical_mem_handle_t *phPhysicalMemory) = 0; virtual ze_result_t destroyPhysicalMem(ze_physical_mem_handle_t hPhysicalMemory) = 0; virtual ze_result_t mapVirtualMem(const void *ptr, size_t size, ze_physical_mem_handle_t hPhysicalMemory, size_t offset, ze_memory_access_attribute_t access) = 0; virtual ze_result_t unMapVirtualMem(const void *ptr, size_t size) = 0; virtual ze_result_t setVirtualMemAccessAttribute(const void *ptr, size_t size, ze_memory_access_attribute_t access) = 0; virtual ze_result_t getVirtualMemAccessAttribute(const void *ptr, size_t size, ze_memory_access_attribute_t *access, size_t *outSize) = 0; virtual ze_result_t openEventPoolIpcHandle(ze_ipc_event_pool_handle_t hIpc, ze_event_pool_handle_t *phEventPool) = 0; virtual ze_result_t createEventPool(const ze_event_pool_desc_t *desc, uint32_t numDevices, ze_device_handle_t *phDevices, ze_event_pool_handle_t *phEventPool) = 0; virtual ze_result_t createImage(ze_device_handle_t hDevice, const ze_image_desc_t *desc, ze_image_handle_t *phImage) = 0; virtual bool isShareableMemory(const void *exportDesc, bool exportableMemory, NEO::Device *neoDevice) = 0; virtual void *getMemHandlePtr(ze_device_handle_t hDevice, uint64_t handle, ze_ipc_memory_flags_t flags) = 0; static Context *fromHandle(ze_context_handle_t handle) { return static_cast(handle); } inline ze_context_handle_t toHandle() { return this; } }; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/context/context_imp.cpp000066400000000000000000000750171422164147700265030ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/context/context_imp.h" #include "shared/source/command_container/implicit_scaling.h" #include "shared/source/memory_manager/memory_operations_handler.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "level_zero/core/source/device/device_imp.h" #include "level_zero/core/source/driver/driver_handle_imp.h" #include "level_zero/core/source/helpers/allocation_extensions.h" #include "level_zero/core/source/helpers/properties_parser.h" #include "level_zero/core/source/hw_helpers/l0_hw_helper.h" #include "level_zero/core/source/image/image.h" #include "level_zero/core/source/memory/memory_operations_helper.h" namespace L0 { ze_result_t ContextImp::destroy() { delete this; return ZE_RESULT_SUCCESS; } ze_result_t ContextImp::getStatus() { DriverHandleImp *driverHandleImp = static_cast(this->driverHandle); for (auto device : driverHandleImp->devices) { DeviceImp *deviceImp = static_cast(device); if (deviceImp->resourcesReleased) { return ZE_RESULT_ERROR_DEVICE_LOST; } } return ZE_RESULT_SUCCESS; } DriverHandle *ContextImp::getDriverHandle() { return this->driverHandle; } ContextImp::ContextImp(DriverHandle *driverHandle) { this->driverHandle = static_cast(driverHandle); } void ContextImp::addDeviceAndSubDevices(Device *device) { this->devices.insert(std::make_pair(device->toHandle(), device)); DeviceImp *deviceImp = static_cast(device); for (auto subDevice : deviceImp->subDevices) { this->addDeviceAndSubDevices(subDevice); } } ze_result_t ContextImp::allocHostMem(const ze_host_mem_alloc_desc_t *hostDesc, size_t size, size_t alignment, void **ptr) { if (NEO::DebugManager.flags.ForceExtendedUSMBufferSize.get() >= 1) { size += (MemoryConstants::pageSize * NEO::DebugManager.flags.ForceExtendedUSMBufferSize.get()); } bool relaxedSizeAllowed = NEO::DebugManager.flags.AllowUnrestrictedSize.get(); if (hostDesc->pNext) { const ze_base_desc_t *extendedDesc = reinterpret_cast(hostDesc->pNext); if (extendedDesc->stype == ZE_STRUCTURE_TYPE_RELAXED_ALLOCATION_LIMITS_EXP_DESC) { const ze_relaxed_allocation_limits_exp_desc_t *relaxedLimitsDesc = reinterpret_cast(extendedDesc); if (!(relaxedLimitsDesc->flags & ZE_RELAXED_ALLOCATION_LIMITS_EXP_FLAG_MAX_SIZE)) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } relaxedSizeAllowed = true; } } if (relaxedSizeAllowed == false && (size > this->driverHandle->devices[0]->getNEODevice()->getDeviceInfo().maxMemAllocSize)) { *ptr = nullptr; return ZE_RESULT_ERROR_UNSUPPORTED_SIZE; } NEO::SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY, this->rootDeviceIndices, this->deviceBitfields); if (hostDesc->flags & ZE_HOST_MEM_ALLOC_FLAG_BIAS_UNCACHED) { unifiedMemoryProperties.allocationFlags.flags.locallyUncachedResource = 1; } auto usmPtr = this->driverHandle->svmAllocsManager->createHostUnifiedMemoryAllocation(size, unifiedMemoryProperties); if (usmPtr == nullptr) { return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY; } *ptr = usmPtr; return ZE_RESULT_SUCCESS; } bool ContextImp::isDeviceDefinedForThisContext(Device *inDevice) { return (this->getDevices().find(inDevice->toHandle()) != this->getDevices().end()); } ze_result_t ContextImp::allocDeviceMem(ze_device_handle_t hDevice, const ze_device_mem_alloc_desc_t *deviceDesc, size_t size, size_t alignment, void **ptr) { if (NEO::DebugManager.flags.ForceExtendedUSMBufferSize.get() >= 1) { size += (MemoryConstants::pageSize * NEO::DebugManager.flags.ForceExtendedUSMBufferSize.get()); } auto device = Device::fromHandle(hDevice); if (isDeviceDefinedForThisContext(device) == false) { return ZE_RESULT_ERROR_DEVICE_LOST; } StructuresLookupTable lookupTable = {}; lookupTable.relaxedSizeAllowed = NEO::DebugManager.flags.AllowUnrestrictedSize.get(); auto parseResult = prepareL0StructuresLookupTable(lookupTable, deviceDesc->pNext); if (parseResult != ZE_RESULT_SUCCESS) { return parseResult; } auto neoDevice = device->getNEODevice(); auto rootDeviceIndex = neoDevice->getRootDeviceIndex(); auto deviceBitfields = this->driverHandle->deviceBitfields; deviceBitfields[rootDeviceIndex] = neoDevice->getDeviceBitfield(); if (lookupTable.isSharedHandle) { if (lookupTable.sharedHandleType.isDMABUFHandle) { ze_ipc_memory_flags_t flags = {}; *ptr = getMemHandlePtr(hDevice, lookupTable.sharedHandleType.fd, flags); if (nullptr == *ptr) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } } else { UNRECOVERABLE_IF(!lookupTable.sharedHandleType.isNTHandle); *ptr = this->driverHandle->importNTHandle(hDevice, lookupTable.sharedHandleType.ntHnadle); if (*ptr == nullptr) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } } return ZE_RESULT_SUCCESS; } if (lookupTable.relaxedSizeAllowed == false && (size > neoDevice->getDeviceInfo().maxMemAllocSize)) { *ptr = nullptr; return ZE_RESULT_ERROR_UNSUPPORTED_SIZE; } uint64_t globalMemSize = neoDevice->getDeviceInfo().globalMemSize; uint32_t numSubDevices = neoDevice->getNumGenericSubDevices(); if ((!device->isImplicitScalingCapable()) && (numSubDevices > 1)) { globalMemSize = globalMemSize / numSubDevices; } if (lookupTable.relaxedSizeAllowed && (size > globalMemSize)) { *ptr = nullptr; return ZE_RESULT_ERROR_UNSUPPORTED_SIZE; } deviceBitfields[rootDeviceIndex] = neoDevice->getDeviceBitfield(); NEO::SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY, this->driverHandle->rootDeviceIndices, deviceBitfields); unifiedMemoryProperties.allocationFlags.flags.shareable = isShareableMemory(deviceDesc->pNext, static_cast(lookupTable.exportMemory), neoDevice); unifiedMemoryProperties.device = neoDevice; unifiedMemoryProperties.allocationFlags.flags.compressedHint = isAllocationSuitableForCompression(lookupTable, *device, size); if (deviceDesc->flags & ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED) { unifiedMemoryProperties.allocationFlags.flags.locallyUncachedResource = 1; } void *usmPtr = this->driverHandle->svmAllocsManager->createUnifiedMemoryAllocation(size, unifiedMemoryProperties); if (usmPtr == nullptr) { return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY; } *ptr = usmPtr; return ZE_RESULT_SUCCESS; } ze_result_t ContextImp::allocSharedMem(ze_device_handle_t hDevice, const ze_device_mem_alloc_desc_t *deviceDesc, const ze_host_mem_alloc_desc_t *hostDesc, size_t size, size_t alignment, void **ptr) { if (NEO::DebugManager.flags.ForceExtendedUSMBufferSize.get() >= 1) { size += (MemoryConstants::pageSize * NEO::DebugManager.flags.ForceExtendedUSMBufferSize.get()); } auto device = this->devices.begin()->second; if (hDevice != nullptr) { device = Device::fromHandle(hDevice); } auto neoDevice = device->getNEODevice(); bool relaxedSizeAllowed = NEO::DebugManager.flags.AllowUnrestrictedSize.get(); if (deviceDesc->pNext) { const ze_base_desc_t *extendedDesc = reinterpret_cast(deviceDesc->pNext); if (extendedDesc->stype == ZE_STRUCTURE_TYPE_RELAXED_ALLOCATION_LIMITS_EXP_DESC) { const ze_relaxed_allocation_limits_exp_desc_t *relaxedLimitsDesc = reinterpret_cast(extendedDesc); if (!(relaxedLimitsDesc->flags & ZE_RELAXED_ALLOCATION_LIMITS_EXP_FLAG_MAX_SIZE)) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } relaxedSizeAllowed = true; } } if (relaxedSizeAllowed == false && (size > neoDevice->getDeviceInfo().maxMemAllocSize)) { *ptr = nullptr; return ZE_RESULT_ERROR_UNSUPPORTED_SIZE; } uint64_t globalMemSize = neoDevice->getDeviceInfo().globalMemSize; uint32_t numSubDevices = neoDevice->getNumGenericSubDevices(); if ((!device->isImplicitScalingCapable()) && (numSubDevices > 1)) { globalMemSize = globalMemSize / numSubDevices; } if (relaxedSizeAllowed && (size > globalMemSize)) { *ptr = nullptr; return ZE_RESULT_ERROR_UNSUPPORTED_SIZE; } auto deviceBitfields = this->deviceBitfields; NEO::Device *unifiedMemoryPropertiesDevice = nullptr; if (hDevice) { device = Device::fromHandle(hDevice); if (isDeviceDefinedForThisContext(device) == false) { return ZE_RESULT_ERROR_DEVICE_LOST; } neoDevice = device->getNEODevice(); auto rootDeviceIndex = neoDevice->getRootDeviceIndex(); unifiedMemoryPropertiesDevice = neoDevice; deviceBitfields[rootDeviceIndex] = neoDevice->getDeviceBitfield(); } NEO::SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, this->rootDeviceIndices, deviceBitfields); unifiedMemoryProperties.device = unifiedMemoryPropertiesDevice; if (deviceDesc->flags & ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED) { unifiedMemoryProperties.allocationFlags.flags.locallyUncachedResource = 1; } if (deviceDesc->flags & ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_INITIAL_PLACEMENT) { unifiedMemoryProperties.allocationFlags.allocFlags.usmInitialPlacementGpu = 1; } if (hostDesc->flags & ZE_HOST_MEM_ALLOC_FLAG_BIAS_INITIAL_PLACEMENT) { unifiedMemoryProperties.allocationFlags.allocFlags.usmInitialPlacementCpu = 1; } auto usmPtr = this->driverHandle->svmAllocsManager->createSharedUnifiedMemoryAllocation(size, unifiedMemoryProperties, static_cast(neoDevice->getSpecializedDevice())); if (usmPtr == nullptr) { return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY; } *ptr = usmPtr; return ZE_RESULT_SUCCESS; } ze_result_t ContextImp::freeMem(const void *ptr) { return this->freeMem(ptr, false); } ze_result_t ContextImp::freeMem(const void *ptr, bool blocking) { auto allocation = this->driverHandle->svmAllocsManager->getSVMAlloc(ptr); if (allocation == nullptr) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } for (auto pairDevice : this->devices) { DeviceImp *deviceImp = static_cast(pairDevice.second); std::unique_lock lock(deviceImp->peerAllocationsMutex); auto iter = deviceImp->peerAllocations.allocations.find(ptr); if (iter != deviceImp->peerAllocations.allocations.end()) { auto peerAllocData = &iter->second; auto peerAlloc = peerAllocData->gpuAllocations.getDefaultGraphicsAllocation(); auto peerPtr = reinterpret_cast(peerAlloc->getGpuAddress()); this->driverHandle->svmAllocsManager->freeSVMAlloc(peerPtr, blocking); deviceImp->peerAllocations.allocations.erase(iter); } } this->driverHandle->svmAllocsManager->freeSVMAlloc(const_cast(ptr), blocking); if (this->driverHandle->svmAllocsManager->getSvmMapOperation(ptr)) { this->driverHandle->svmAllocsManager->removeSvmMapOperation(ptr); } return ZE_RESULT_SUCCESS; } ze_result_t ContextImp::freeMemExt(const ze_memory_free_ext_desc_t *pMemFreeDesc, void *ptr) { if (pMemFreeDesc->freePolicy == ZE_DRIVER_MEMORY_FREE_POLICY_EXT_FLAG_BLOCKING_FREE) { return this->freeMem(ptr, true); } if (pMemFreeDesc->freePolicy == ZE_DRIVER_MEMORY_FREE_POLICY_EXT_FLAG_DEFER_FREE) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } return this->freeMem(ptr, false); } ze_result_t ContextImp::makeMemoryResident(ze_device_handle_t hDevice, void *ptr, size_t size) { Device *device = L0::Device::fromHandle(hDevice); NEO::Device *neoDevice = device->getNEODevice(); auto allocation = device->getDriverHandle()->getDriverSystemMemoryAllocation( ptr, size, neoDevice->getRootDeviceIndex(), nullptr); if (allocation == nullptr) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } NEO::MemoryOperationsHandler *memoryOperationsIface = neoDevice->getRootDeviceEnvironment().memoryOperationsInterface.get(); auto success = memoryOperationsIface->makeResident(neoDevice, ArrayRef(&allocation, 1)); ze_result_t res = changeMemoryOperationStatusToL0ResultType(success); if (ZE_RESULT_SUCCESS == res) { auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr); if (allocData && allocData->memoryType == InternalMemoryType::SHARED_UNIFIED_MEMORY) { DriverHandleImp *driverHandleImp = static_cast(device->getDriverHandle()); std::lock_guard lock(driverHandleImp->sharedMakeResidentAllocationsLock); driverHandleImp->sharedMakeResidentAllocations.insert({ptr, allocation}); } } return res; } ze_result_t ContextImp::evictMemory(ze_device_handle_t hDevice, void *ptr, size_t size) { Device *device = L0::Device::fromHandle(hDevice); NEO::Device *neoDevice = device->getNEODevice(); auto allocation = device->getDriverHandle()->getDriverSystemMemoryAllocation( ptr, size, neoDevice->getRootDeviceIndex(), nullptr); if (allocation == nullptr) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } { DriverHandleImp *driverHandleImp = static_cast(device->getDriverHandle()); std::lock_guard lock(driverHandleImp->sharedMakeResidentAllocationsLock); driverHandleImp->sharedMakeResidentAllocations.erase(ptr); } NEO::MemoryOperationsHandler *memoryOperationsIface = neoDevice->getRootDeviceEnvironment().memoryOperationsInterface.get(); auto success = memoryOperationsIface->evict(neoDevice, *allocation); return changeMemoryOperationStatusToL0ResultType(success); } ze_result_t ContextImp::makeImageResident(ze_device_handle_t hDevice, ze_image_handle_t hImage) { auto alloc = Image::fromHandle(hImage)->getAllocation(); NEO::Device *neoDevice = L0::Device::fromHandle(hDevice)->getNEODevice(); NEO::MemoryOperationsHandler *memoryOperationsIface = neoDevice->getRootDeviceEnvironment().memoryOperationsInterface.get(); auto success = memoryOperationsIface->makeResident(neoDevice, ArrayRef(&alloc, 1)); return changeMemoryOperationStatusToL0ResultType(success); } ze_result_t ContextImp::evictImage(ze_device_handle_t hDevice, ze_image_handle_t hImage) { auto alloc = Image::fromHandle(hImage)->getAllocation(); NEO::Device *neoDevice = L0::Device::fromHandle(hDevice)->getNEODevice(); NEO::MemoryOperationsHandler *memoryOperationsIface = neoDevice->getRootDeviceEnvironment().memoryOperationsInterface.get(); auto success = memoryOperationsIface->evict(neoDevice, *alloc); return changeMemoryOperationStatusToL0ResultType(success); } ze_result_t ContextImp::getMemAddressRange(const void *ptr, void **pBase, size_t *pSize) { NEO::SvmAllocationData *allocData = this->driverHandle->svmAllocsManager->getSVMAlloc(ptr); if (allocData) { NEO::GraphicsAllocation *alloc; alloc = allocData->gpuAllocations.getDefaultGraphicsAllocation(); if (pBase) { uint64_t *allocBase = reinterpret_cast(pBase); *allocBase = alloc->getGpuAddress(); } if (pSize) { *pSize = allocData->size; } return ZE_RESULT_SUCCESS; } return ZE_RESULT_ERROR_UNKNOWN; } ze_result_t ContextImp::closeIpcMemHandle(const void *ptr) { return this->freeMem(ptr); } ze_result_t ContextImp::getIpcMemHandle(const void *ptr, ze_ipc_mem_handle_t *pIpcHandle) { NEO::SvmAllocationData *allocData = this->driverHandle->svmAllocsManager->getSVMAlloc(ptr); if (allocData) { uint64_t handle = allocData->gpuAllocations.getDefaultGraphicsAllocation()->peekInternalHandle(this->driverHandle->getMemoryManager()); memcpy_s(reinterpret_cast(pIpcHandle->data), sizeof(ze_ipc_mem_handle_t), &handle, sizeof(handle)); return ZE_RESULT_SUCCESS; } return ZE_RESULT_ERROR_INVALID_ARGUMENT; } ze_result_t ContextImp::openIpcMemHandle(ze_device_handle_t hDevice, ze_ipc_mem_handle_t pIpcHandle, ze_ipc_memory_flags_t flags, void **ptr) { uint64_t handle = 0u; memcpy_s(&handle, sizeof(handle), reinterpret_cast(pIpcHandle.data), sizeof(handle)); *ptr = getMemHandlePtr(hDevice, handle, flags); if (nullptr == *ptr) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } return ZE_RESULT_SUCCESS; } ze_result_t EventPoolImp::closeIpcHandle() { return this->destroy(); } ze_result_t EventPoolImp::getIpcHandle(ze_ipc_event_pool_handle_t *pIpcHandle) { // L0 uses a vector of ZE_MAX_IPC_HANDLE_SIZE bytes to send the IPC handle, i.e. // char data[ZE_MAX_IPC_HANDLE_SIZE]; // First four bytes (which is of size sizeof(int)) of it contain the file descriptor // associated with the dma-buf, // Rest is payload to communicate extra info to the other processes. // For the event pool, this contains: // - the number of events the pool has. // - the id for the device used during pool creation uint64_t handle = this->eventPoolAllocations->getDefaultGraphicsAllocation()->peekInternalHandle(this->context->getDriverHandle()->getMemoryManager()); memcpy_s(pIpcHandle->data, sizeof(int), &handle, sizeof(int)); memcpy_s(pIpcHandle->data + sizeof(int), sizeof(this->numEvents), &this->numEvents, sizeof(this->numEvents)); uint32_t rootDeviceIndex = this->getDevice()->getRootDeviceIndex(); memcpy_s(pIpcHandle->data + sizeof(int) + sizeof(this->numEvents), sizeof(rootDeviceIndex), &rootDeviceIndex, sizeof(rootDeviceIndex)); return ZE_RESULT_SUCCESS; } ze_result_t ContextImp::openEventPoolIpcHandle(ze_ipc_event_pool_handle_t hIpc, ze_event_pool_handle_t *phEventPool) { uint64_t handle = 0u; memcpy_s(&handle, sizeof(int), hIpc.data, sizeof(int)); size_t numEvents = 0; memcpy_s(&numEvents, sizeof(numEvents), hIpc.data + sizeof(int), sizeof(numEvents)); uint32_t rootDeviceIndex = std::numeric_limits::max(); memcpy_s(&rootDeviceIndex, sizeof(rootDeviceIndex), hIpc.data + sizeof(int) + sizeof(numEvents), sizeof(rootDeviceIndex)); Device *device = this->devices.begin()->second; auto neoDevice = device->getNEODevice(); NEO::osHandle osHandle = static_cast(handle); auto &hwHelper = device->getHwHelper(); const uint32_t eventAlignment = static_cast(hwHelper.getTimestampPacketAllocatorAlignment()); uint32_t eventSize = static_cast(alignUp(EventPacketsCount::eventPackets * hwHelper.getSingleTimestampPacketSize(), eventAlignment)); size_t alignedSize = alignUp(numEvents * eventSize, MemoryConstants::pageSize64k); NEO::AllocationProperties unifiedMemoryProperties{rootDeviceIndex, alignedSize, NEO::AllocationType::BUFFER_HOST_MEMORY, systemMemoryBitfield}; unifiedMemoryProperties.subDevicesBitfield = neoDevice->getDeviceBitfield(); auto memoryManager = this->getDriverHandle()->getMemoryManager(); NEO::GraphicsAllocation *alloc = memoryManager->createGraphicsAllocationFromSharedHandle(osHandle, unifiedMemoryProperties, false, true); if (alloc == nullptr) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } ze_event_pool_desc_t desc = {}; auto eventPool = new EventPoolImp(&desc); eventPool->context = this; eventPool->eventPoolAllocations = std::make_unique(static_cast(this->rootDeviceIndices.size())); eventPool->eventPoolAllocations->addAllocation(alloc); eventPool->eventPoolPtr = reinterpret_cast(alloc->getUnderlyingBuffer()); eventPool->devices.push_back(device); eventPool->isImportedIpcPool = true; eventPool->setEventSize(eventSize); eventPool->setEventAlignment(eventAlignment); for (auto currDeviceIndex : this->rootDeviceIndices) { if (currDeviceIndex == rootDeviceIndex) { continue; } unifiedMemoryProperties.rootDeviceIndex = currDeviceIndex; unifiedMemoryProperties.flags.isUSMHostAllocation = true; unifiedMemoryProperties.flags.forceSystemMemory = true; unifiedMemoryProperties.flags.allocateMemory = false; auto graphicsAllocation = memoryManager->createGraphicsAllocationFromExistingStorage(unifiedMemoryProperties, eventPool->eventPoolPtr, eventPool->getAllocation()); if (!graphicsAllocation) { for (auto gpuAllocation : eventPool->getAllocation().getGraphicsAllocations()) { memoryManager->freeGraphicsMemory(gpuAllocation); } memoryManager->freeGraphicsMemory(alloc); delete eventPool; return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY; } eventPool->eventPoolAllocations->addAllocation(graphicsAllocation); } *phEventPool = eventPool; return ZE_RESULT_SUCCESS; } ze_result_t ContextImp::getMemAllocProperties(const void *ptr, ze_memory_allocation_properties_t *pMemAllocProperties, ze_device_handle_t *phDevice) { auto alloc = driverHandle->svmAllocsManager->getSVMAlloc(ptr); if (nullptr == alloc) { pMemAllocProperties->type = ZE_MEMORY_TYPE_UNKNOWN; return ZE_RESULT_SUCCESS; } pMemAllocProperties->type = Context::parseUSMType(alloc->memoryType); pMemAllocProperties->pageSize = alloc->pageSizeForAlignment; pMemAllocProperties->id = alloc->getAllocId(); if (phDevice != nullptr) { if (alloc->device == nullptr) { *phDevice = nullptr; } else { auto device = static_cast(alloc->device)->getSpecializedDevice(); DEBUG_BREAK_IF(device == nullptr); *phDevice = device->toHandle(); } } return handleAllocationExtensions(alloc->gpuAllocations.getDefaultGraphicsAllocation(), pMemAllocProperties->type, pMemAllocProperties->pNext, driverHandle); } ze_result_t ContextImp::getImageAllocProperties(Image *image, ze_image_allocation_ext_properties_t *pAllocProperties) { NEO::GraphicsAllocation *alloc = image->getAllocation(); if (alloc == nullptr) { return ZE_RESULT_ERROR_UNKNOWN; } pAllocProperties->id = 0; return handleAllocationExtensions(alloc, ZE_MEMORY_TYPE_DEVICE, pAllocProperties->pNext, driverHandle); } ze_result_t ContextImp::createModule(ze_device_handle_t hDevice, const ze_module_desc_t *desc, ze_module_handle_t *phModule, ze_module_build_log_handle_t *phBuildLog) { return L0::Device::fromHandle(hDevice)->createModule(desc, phModule, phBuildLog, ModuleType::User); } ze_result_t ContextImp::createSampler(ze_device_handle_t hDevice, const ze_sampler_desc_t *pDesc, ze_sampler_handle_t *phSampler) { return L0::Device::fromHandle(hDevice)->createSampler(pDesc, phSampler); } ze_result_t ContextImp::createCommandQueue(ze_device_handle_t hDevice, const ze_command_queue_desc_t *desc, ze_command_queue_handle_t *commandQueue) { return L0::Device::fromHandle(hDevice)->createCommandQueue(desc, commandQueue); } ze_result_t ContextImp::createCommandList(ze_device_handle_t hDevice, const ze_command_list_desc_t *desc, ze_command_list_handle_t *commandList) { return L0::Device::fromHandle(hDevice)->createCommandList(desc, commandList); } ze_result_t ContextImp::createCommandListImmediate(ze_device_handle_t hDevice, const ze_command_queue_desc_t *desc, ze_command_list_handle_t *commandList) { return L0::Device::fromHandle(hDevice)->createCommandListImmediate(desc, commandList); } ze_result_t ContextImp::activateMetricGroups(zet_device_handle_t hDevice, uint32_t count, zet_metric_group_handle_t *phMetricGroups) { return L0::Device::fromHandle(hDevice)->activateMetricGroupsDeferred(count, phMetricGroups); } ze_result_t ContextImp::reserveVirtualMem(const void *pStart, size_t size, void **pptr) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t ContextImp::freeVirtualMem(const void *ptr, size_t size) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t ContextImp::queryVirtualMemPageSize(ze_device_handle_t hDevice, size_t size, size_t *pagesize) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t ContextImp::createPhysicalMem(ze_device_handle_t hDevice, ze_physical_mem_desc_t *desc, ze_physical_mem_handle_t *phPhysicalMemory) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t ContextImp::destroyPhysicalMem(ze_physical_mem_handle_t hPhysicalMemory) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t ContextImp::mapVirtualMem(const void *ptr, size_t size, ze_physical_mem_handle_t hPhysicalMemory, size_t offset, ze_memory_access_attribute_t access) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t ContextImp::unMapVirtualMem(const void *ptr, size_t size) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t ContextImp::setVirtualMemAccessAttribute(const void *ptr, size_t size, ze_memory_access_attribute_t access) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t ContextImp::getVirtualMemAccessAttribute(const void *ptr, size_t size, ze_memory_access_attribute_t *access, size_t *outSize) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t ContextImp::createEventPool(const ze_event_pool_desc_t *desc, uint32_t numDevices, ze_device_handle_t *phDevices, ze_event_pool_handle_t *phEventPool) { ze_result_t result; EventPool *eventPool = EventPool::create(this->driverHandle, this, numDevices, phDevices, desc, result); if (eventPool == nullptr) { return result; } *phEventPool = eventPool->toHandle(); return ZE_RESULT_SUCCESS; } ze_result_t ContextImp::createImage(ze_device_handle_t hDevice, const ze_image_desc_t *desc, ze_image_handle_t *phImage) { return L0::Device::fromHandle(hDevice)->createImage(desc, phImage); } bool ContextImp::isAllocationSuitableForCompression(const StructuresLookupTable &structuresLookupTable, Device &device, size_t allocSize) { auto &hwInfo = device.getHwInfo(); auto &hwHelper = device.getHwHelper(); auto &l0HwHelper = L0HwHelper::get(hwInfo.platform.eRenderCoreFamily); if (!l0HwHelper.usmCompressionSupported(hwInfo) || !hwHelper.isBufferSizeSuitableForCompression(allocSize, hwInfo) || structuresLookupTable.uncompressedHint) { return false; } if (l0HwHelper.forceDefaultUsmCompressionSupport()) { return true; } return structuresLookupTable.compressedHint; } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/context/context_imp.h000066400000000000000000000164071422164147700261460ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/os_interface.h" #include "level_zero/core/source/context/context.h" #include "level_zero/core/source/driver/driver_handle_imp.h" namespace L0 { struct StructuresLookupTable; struct ContextImp : Context { ContextImp(DriverHandle *driverHandle); ~ContextImp() override = default; ze_result_t destroy() override; ze_result_t getStatus() override; DriverHandle *getDriverHandle() override; ze_result_t allocHostMem(const ze_host_mem_alloc_desc_t *hostDesc, size_t size, size_t alignment, void **ptr) override; ze_result_t allocDeviceMem(ze_device_handle_t hDevice, const ze_device_mem_alloc_desc_t *deviceDesc, size_t size, size_t alignment, void **ptr) override; ze_result_t allocSharedMem(ze_device_handle_t hDevice, const ze_device_mem_alloc_desc_t *deviceDesc, const ze_host_mem_alloc_desc_t *hostDesc, size_t size, size_t alignment, void **ptr) override; ze_result_t freeMem(const void *ptr) override; ze_result_t freeMem(const void *ptr, bool blocking) override; ze_result_t freeMemExt(const ze_memory_free_ext_desc_t *pMemFreeDesc, void *ptr) override; ze_result_t makeMemoryResident(ze_device_handle_t hDevice, void *ptr, size_t size) override; ze_result_t evictMemory(ze_device_handle_t hDevice, void *ptr, size_t size) override; ze_result_t makeImageResident(ze_device_handle_t hDevice, ze_image_handle_t hImage) override; ze_result_t evictImage(ze_device_handle_t hDevice, ze_image_handle_t hImage) override; ze_result_t getMemAddressRange(const void *ptr, void **pBase, size_t *pSize) override; ze_result_t closeIpcMemHandle(const void *ptr) override; ze_result_t getIpcMemHandle(const void *ptr, ze_ipc_mem_handle_t *pIpcHandle) override; ze_result_t openIpcMemHandle(ze_device_handle_t hDevice, ze_ipc_mem_handle_t handle, ze_ipc_memory_flags_t flags, void **ptr) override; ze_result_t getMemAllocProperties(const void *ptr, ze_memory_allocation_properties_t *pMemAllocProperties, ze_device_handle_t *phDevice) override; ze_result_t getImageAllocProperties(Image *image, ze_image_allocation_ext_properties_t *pAllocProperties) override; ze_result_t createModule(ze_device_handle_t hDevice, const ze_module_desc_t *desc, ze_module_handle_t *phModule, ze_module_build_log_handle_t *phBuildLog) override; ze_result_t createSampler(ze_device_handle_t hDevice, const ze_sampler_desc_t *pDesc, ze_sampler_handle_t *phSampler) override; ze_result_t createCommandQueue(ze_device_handle_t hDevice, const ze_command_queue_desc_t *desc, ze_command_queue_handle_t *commandQueue) override; ze_result_t createCommandList(ze_device_handle_t hDevice, const ze_command_list_desc_t *desc, ze_command_list_handle_t *commandList) override; ze_result_t createCommandListImmediate(ze_device_handle_t hDevice, const ze_command_queue_desc_t *desc, ze_command_list_handle_t *commandList) override; ze_result_t activateMetricGroups(zet_device_handle_t hDevice, uint32_t count, zet_metric_group_handle_t *phMetricGroups) override; ze_result_t reserveVirtualMem(const void *pStart, size_t size, void **pptr) override; ze_result_t freeVirtualMem(const void *ptr, size_t size) override; ze_result_t queryVirtualMemPageSize(ze_device_handle_t hDevice, size_t size, size_t *pagesize) override; ze_result_t createPhysicalMem(ze_device_handle_t hDevice, ze_physical_mem_desc_t *desc, ze_physical_mem_handle_t *phPhysicalMemory) override; ze_result_t destroyPhysicalMem(ze_physical_mem_handle_t hPhysicalMemory) override; ze_result_t mapVirtualMem(const void *ptr, size_t size, ze_physical_mem_handle_t hPhysicalMemory, size_t offset, ze_memory_access_attribute_t access) override; ze_result_t unMapVirtualMem(const void *ptr, size_t size) override; ze_result_t setVirtualMemAccessAttribute(const void *ptr, size_t size, ze_memory_access_attribute_t access) override; ze_result_t getVirtualMemAccessAttribute(const void *ptr, size_t size, ze_memory_access_attribute_t *access, size_t *outSize) override; ze_result_t openEventPoolIpcHandle(ze_ipc_event_pool_handle_t hIpc, ze_event_pool_handle_t *phEventPool) override; ze_result_t createEventPool(const ze_event_pool_desc_t *desc, uint32_t numDevices, ze_device_handle_t *phDevices, ze_event_pool_handle_t *phEventPool) override; ze_result_t createImage(ze_device_handle_t hDevice, const ze_image_desc_t *desc, ze_image_handle_t *phImage) override; void addDeviceAndSubDevices(Device *device); std::map &getDevices() { return devices; } std::set rootDeviceIndices = {}; std::map deviceBitfields; bool isDeviceDefinedForThisContext(Device *inDevice); bool isShareableMemory(const void *exportDesc, bool exportableMemory, NEO::Device *neoDevice) override; void *getMemHandlePtr(ze_device_handle_t hDevice, uint64_t handle, ze_ipc_memory_flags_t flags) override; protected: bool isAllocationSuitableForCompression(const StructuresLookupTable &structuresLookupTable, Device &device, size_t allocSize); std::map devices; DriverHandleImp *driverHandle = nullptr; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/context/context_imp_drm/000077500000000000000000000000001422164147700266275ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/context/context_imp_drm/context_imp.cpp000066400000000000000000000010521422164147700316620ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/context/context_imp.h" namespace L0 { bool ContextImp::isShareableMemory(const void *exportDesc, bool exportableMemory, NEO::Device *neoDevice) { if (exportableMemory) { return true; } return false; } void *ContextImp::getMemHandlePtr(ze_device_handle_t hDevice, uint64_t handle, ze_ipc_memory_flags_t flags) { return this->driverHandle->importFdHandle(hDevice, flags, handle, nullptr); } } // namespace L0compute-runtime-22.14.22890/level_zero/core/source/context/context_imp_drm_or_wddm/000077500000000000000000000000001422164147700303425ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/context/context_imp_drm_or_wddm/context_imp.cpp000066400000000000000000000031101422164147700333720ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/context/context_imp.h" namespace L0 { bool ContextImp::isShareableMemory(const void *exportDesc, bool exportableMemory, NEO::Device *neoDevice) { if (exportableMemory) { return true; } if (neoDevice->getRootDeviceEnvironment().osInterface) { NEO::DriverModelType driverType = neoDevice->getRootDeviceEnvironment().osInterface->getDriverModel()->getDriverModelType(); if (!exportDesc && driverType == NEO::DriverModelType::WDDM) { return true; } } return false; } void *ContextImp::getMemHandlePtr(ze_device_handle_t hDevice, uint64_t handle, ze_ipc_memory_flags_t flags) { L0::Device *device = L0::Device::fromHandle(hDevice); auto neoDevice = device->getNEODevice(); NEO::DriverModelType driverType = NEO::DriverModelType::UNKNOWN; if (neoDevice->getRootDeviceEnvironment().osInterface) { driverType = neoDevice->getRootDeviceEnvironment().osInterface->getDriverModel()->getDriverModelType(); } bool isNTHandle = this->getDriverHandle()->getMemoryManager()->isNTHandle(NEO::toOsHandle(reinterpret_cast(handle)), device->getNEODevice()->getRootDeviceIndex()); if (isNTHandle) { return this->driverHandle->importNTHandle(hDevice, reinterpret_cast(handle)); } else if (driverType == NEO::DriverModelType::DRM) { return this->driverHandle->importFdHandle(hDevice, flags, handle, nullptr); } else { return nullptr; } } } // namespace L0compute-runtime-22.14.22890/level_zero/core/source/context/context_imp_wddm/000077500000000000000000000000001422164147700270005ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/context/context_imp_wddm/context_imp.cpp000066400000000000000000000010641422164147700320360ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/context/context_imp.h" namespace L0 { bool ContextImp::isShareableMemory(const void *exportDesc, bool exportableMemory, NEO::Device *neoDevice) { if (exportableMemory) { return true; } return false; } void *ContextImp::getMemHandlePtr(ze_device_handle_t hDevice, uint64_t handle, ze_ipc_memory_flags_t flags) { return this->driverHandle->importNTHandle(hDevice, reinterpret_cast(handle)); } } // namespace L0compute-runtime-22.14.22890/level_zero/core/source/debugger/000077500000000000000000000000001422164147700235345ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/debugger/CMakeLists.txt000066400000000000000000000007641422164147700263030ustar00rootroot00000000000000# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_DEBUGGER ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/debugger_l0.cpp ${CMAKE_CURRENT_SOURCE_DIR}/debugger_l0.h ${CMAKE_CURRENT_SOURCE_DIR}/debugger_l0.inl ${CMAKE_CURRENT_SOURCE_DIR}/debugger_l0_base.inl ${CMAKE_CURRENT_SOURCE_DIR}/debugger_l0_tgllp_and_later.inl ) add_subdirectories() set_property(GLOBAL PROPERTY L0_SRCS_DEBUGGER ${L0_SRCS_DEBUGGER}) compute-runtime-22.14.22890/level_zero/core/source/debugger/debug_manager.cpp000066400000000000000000000005071422164147700270220ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "level_zero/core/source/compiler_interface/l0_reg_path.h" namespace NEO { DebugSettingsManager DebugManager(L0::registryPath); } compute-runtime-22.14.22890/level_zero/core/source/debugger/debugger_l0.cpp000066400000000000000000000124771422164147700264320ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/debugger/debugger_l0.h" #include "shared/source/command_container/cmdcontainer.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/device/device.h" #include "shared/source/helpers/constants.h" #include "shared/source/memory_manager/allocation_properties.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/memory_operations_handler.h" #include "shared/source/os_interface/os_context.h" #include namespace L0 { DebugerL0CreateFn debuggerL0Factory[IGFX_MAX_CORE] = {}; DebuggerL0::DebuggerL0(NEO::Device *device) : device(device) { isLegacyMode = false; initialize(); } void DebuggerL0::initialize() { if (NEO::DebugManager.flags.DebuggerForceSbaTrackingMode.get() != -1) { setSingleAddressSpaceSbaTracking(NEO::DebugManager.flags.DebuggerForceSbaTrackingMode.get()); } auto &engines = device->getMemoryManager()->getRegisteredEngines(); NEO::AllocationProperties properties{device->getRootDeviceIndex(), true, MemoryConstants::pageSize, NEO::AllocationType::DEBUG_SBA_TRACKING_BUFFER, false, device->getDeviceBitfield()}; if (!singleAddressSpaceSbaTracking) { sbaTrackingGpuVa = device->getMemoryManager()->reserveGpuAddress(MemoryConstants::pageSize, device->getRootDeviceIndex()); properties.gpuAddress = sbaTrackingGpuVa.address; } SbaTrackedAddresses sbaHeader; for (auto &engine : engines) { if (!singleAddressSpaceSbaTracking) { properties.osContext = engine.osContext; } auto sbaAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties); memset(sbaAllocation->getUnderlyingBuffer(), 0, sbaAllocation->getUnderlyingBufferSize()); auto sbaHeaderPtr = reinterpret_cast(sbaAllocation->getUnderlyingBuffer()); *sbaHeaderPtr = sbaHeader; perContextSbaAllocations[engine.osContext->getContextId()] = sbaAllocation; } { auto &hwInfo = device->getHardwareInfo(); auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily); NEO::AllocationProperties properties{device->getRootDeviceIndex(), true, MemoryConstants::pageSize64k, NEO::AllocationType::DEBUG_MODULE_AREA, false, device->getDeviceBitfield()}; moduleDebugArea = device->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties); bool bindlessSip = NEO::DebugManager.flags.UseBindlessDebugSip.get(); DebugAreaHeader debugArea = {}; debugArea.reserved1 = bindlessSip ? 1 : 0; debugArea.size = sizeof(DebugAreaHeader); debugArea.pgsize = 1; debugArea.isShared = moduleDebugArea->storageInfo.getNumBanks() == 1; debugArea.scratchBegin = sizeof(DebugAreaHeader); debugArea.scratchEnd = MemoryConstants::pageSize64k - sizeof(DebugAreaHeader); NEO::MemoryOperationsHandler *memoryOperationsIface = device->getRootDeviceEnvironment().memoryOperationsInterface.get(); if (memoryOperationsIface) { memoryOperationsIface->makeResident(device, ArrayRef(&moduleDebugArea, 1)); } NEO::MemoryTransferHelper::transferMemoryToAllocation(hwHelper.isBlitCopyRequiredForLocalMemory(hwInfo, *moduleDebugArea), *device, moduleDebugArea, 0, &debugArea, sizeof(DebugAreaHeader)); if (hwHelper.disableL3CacheForDebug(hwInfo)) { device->getGmmHelper()->forceAllResourcesUncached(); } } } void DebuggerL0::printTrackedAddresses(uint32_t contextId) { auto memory = perContextSbaAllocations[contextId]->getUnderlyingBuffer(); auto sba = reinterpret_cast(memory); PRINT_DEBUGGER_INFO_LOG("Debugger: SBA ssh = %" SCNx64 " gsba = %" SCNx64 " dsba = %" SCNx64 " ioba = %" SCNx64 " iba = %" SCNx64 " bsurfsba = %" SCNx64 "\n", sba->SurfaceStateBaseAddress, sba->GeneralStateBaseAddress, sba->DynamicStateBaseAddress, sba->IndirectObjectBaseAddress, sba->InstructionBaseAddress, sba->BindlessSurfaceStateBaseAddress); } DebuggerL0 ::~DebuggerL0() { for (auto &alloc : perContextSbaAllocations) { device->getMemoryManager()->freeGraphicsMemory(alloc.second); } if (sbaTrackingGpuVa.size != 0) { device->getMemoryManager()->freeGpuAddress(sbaTrackingGpuVa, device->getRootDeviceIndex()); } device->getMemoryManager()->freeGraphicsMemory(moduleDebugArea); } void DebuggerL0::captureStateBaseAddress(NEO::CommandContainer &container, SbaAddresses sba) { if (DebuggerL0::isAnyTrackedAddressChanged(sba)) { programSbaTrackingCommands(*container.getCommandStream(), sba); } } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/debugger/debugger_l0.h000066400000000000000000000116151422164147700260700ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/debugger/debugger.h" #include "shared/source/helpers/non_copyable_or_moveable.h" #include "shared/source/memory_manager/memory_manager.h" #include #include #include #include namespace NEO { class Device; class GraphicsAllocation; class LinearStream; class OSInterface; } // namespace NEO namespace L0 { #pragma pack(1) struct SbaTrackedAddresses { char magic[8] = "sbaarea"; uint64_t Reserved1 = 0; uint8_t Version = 0; uint8_t Reserved2[7]; uint64_t GeneralStateBaseAddress = 0; uint64_t SurfaceStateBaseAddress = 0; uint64_t DynamicStateBaseAddress = 0; uint64_t IndirectObjectBaseAddress = 0; uint64_t InstructionBaseAddress = 0; uint64_t BindlessSurfaceStateBaseAddress = 0; uint64_t BindlessSamplerStateBaseAddress = 0; }; struct DebugAreaHeader { char magic[8] = "dbgarea"; uint64_t reserved1 = 0; uint8_t version = 0; uint8_t pgsize = 0; uint8_t size = 0; uint8_t reserved2 = 0; uint16_t scratchBegin = 0; uint16_t scratchEnd = 0; union { uint64_t isSharedBitfield = 0; struct { uint64_t isShared : 1; uint64_t reserved3 : 63; }; }; }; static_assert(sizeof(DebugAreaHeader) == 32u * sizeof(uint8_t)); struct alignas(4) DebuggerVersion { uint8_t major; uint8_t minor; uint16_t patch; }; struct alignas(8) StateSaveAreaHeader { char magic[8] = "tssarea"; uint64_t reserved1; struct DebuggerVersion version; uint8_t size; uint8_t reserved2[3]; }; #pragma pack() class DebuggerL0 : public NEO::Debugger, NEO::NonCopyableOrMovableClass { public: static std::unique_ptr create(NEO::Device *device); DebuggerL0(NEO::Device *device); ~DebuggerL0() override; NEO::GraphicsAllocation *getSbaTrackingBuffer(uint32_t contextId) { return perContextSbaAllocations[contextId]; } NEO::GraphicsAllocation *getModuleDebugArea() { return moduleDebugArea; } uint64_t getSbaTrackingGpuVa() { return sbaTrackingGpuVa.address; } void captureStateBaseAddress(NEO::CommandContainer &container, SbaAddresses sba) override; void printTrackedAddresses(uint32_t contextId); MOCKABLE_VIRTUAL void registerElf(NEO::DebugData *debugData, NEO::GraphicsAllocation *isaAllocation); MOCKABLE_VIRTUAL void notifyCommandQueueCreated(); MOCKABLE_VIRTUAL void notifyCommandQueueDestroyed(); virtual size_t getSbaTrackingCommandsSize(size_t trackedAddressCount) = 0; virtual void programSbaTrackingCommands(NEO::LinearStream &cmdStream, const SbaAddresses &sba) = 0; virtual size_t getSbaAddressLoadCommandsSize() = 0; virtual void programSbaAddressLoad(NEO::LinearStream &cmdStream, uint64_t sbaGpuVa) = 0; MOCKABLE_VIRTUAL bool attachZebinModuleToSegmentAllocations(const StackVec &kernelAlloc, uint32_t &moduleHandle); MOCKABLE_VIRTUAL bool removeZebinModule(uint32_t moduleHandle); void setSingleAddressSpaceSbaTracking(bool value) { singleAddressSpaceSbaTracking = value; } protected: static bool isAnyTrackedAddressChanged(SbaAddresses sba) { return sba.GeneralStateBaseAddress != 0 || sba.SurfaceStateBaseAddress != 0 || sba.BindlessSurfaceStateBaseAddress != 0; } static bool initDebuggingInOs(NEO::OSInterface *osInterface); void initialize(); NEO::Device *device = nullptr; NEO::GraphicsAllocation *sbaAllocation = nullptr; std::unordered_map perContextSbaAllocations; NEO::AddressRange sbaTrackingGpuVa{}; NEO::GraphicsAllocation *moduleDebugArea = nullptr; std::atomic commandQueueCount = 0u; uint32_t uuidL0CommandQueueHandle = 0; bool singleAddressSpaceSbaTracking = false; }; using DebugerL0CreateFn = DebuggerL0 *(*)(NEO::Device *device); extern DebugerL0CreateFn debuggerL0Factory[]; template class DebuggerL0Hw : public DebuggerL0 { public: static DebuggerL0 *allocate(NEO::Device *device); size_t getSbaTrackingCommandsSize(size_t trackedAddressCount) override; void programSbaTrackingCommands(NEO::LinearStream &cmdStream, const SbaAddresses &sba) override; size_t getSbaAddressLoadCommandsSize() override; void programSbaAddressLoad(NEO::LinearStream &cmdStream, uint64_t sbaGpuVa) override; void programSbaTrackingCommandsSingleAddressSpace(NEO::LinearStream &cmdStream, const SbaAddresses &sba); protected: DebuggerL0Hw(NEO::Device *device) : DebuggerL0(device){}; }; template struct DebuggerL0PopulateFactory { DebuggerL0PopulateFactory() { debuggerL0Factory[coreFamily] = DebuggerL0Hw::allocate; } }; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/debugger/debugger_l0.inl000066400000000000000000000163161422164147700264260ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "level_zero/core/source/debugger/debugger_l0.h" #include "hw_cmds.h" namespace L0 { template void DebuggerL0Hw::programSbaTrackingCommands(NEO::LinearStream &cmdStream, const SbaAddresses &sba) { using MI_STORE_DATA_IMM = typename GfxFamily::MI_STORE_DATA_IMM; using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START; using MI_ARB_CHECK = typename GfxFamily::MI_ARB_CHECK; const auto gpuAddress = NEO::GmmHelper::decanonize(sbaTrackingGpuVa.address); PRINT_DEBUGGER_INFO_LOG("Debugger: SBA stored ssh = %" SCNx64 " gsba = %" SCNx64 " dsba = %" SCNx64 " ioba = %" SCNx64 " iba = %" SCNx64 " bsurfsba = %" SCNx64 "\n", sba.SurfaceStateBaseAddress, sba.GeneralStateBaseAddress, sba.DynamicStateBaseAddress, sba.IndirectObjectBaseAddress, sba.InstructionBaseAddress, sba.BindlessSurfaceStateBaseAddress); if (singleAddressSpaceSbaTracking) { programSbaTrackingCommandsSingleAddressSpace(cmdStream, sba); } else { if (sba.GeneralStateBaseAddress) { auto generalStateBaseAddress = NEO::GmmHelper::decanonize(sba.GeneralStateBaseAddress); NEO::EncodeStoreMemory::programStoreDataImm(cmdStream, gpuAddress + offsetof(SbaTrackedAddresses, GeneralStateBaseAddress), static_cast(generalStateBaseAddress & 0x0000FFFFFFFFULL), static_cast(generalStateBaseAddress >> 32), true, false); } if (sba.SurfaceStateBaseAddress) { auto surfaceStateBaseAddress = NEO::GmmHelper::decanonize(sba.SurfaceStateBaseAddress); NEO::EncodeStoreMemory::programStoreDataImm(cmdStream, gpuAddress + offsetof(SbaTrackedAddresses, SurfaceStateBaseAddress), static_cast(surfaceStateBaseAddress & 0x0000FFFFFFFFULL), static_cast(surfaceStateBaseAddress >> 32), true, false); } if (sba.DynamicStateBaseAddress) { auto dynamicStateBaseAddress = NEO::GmmHelper::decanonize(sba.DynamicStateBaseAddress); NEO::EncodeStoreMemory::programStoreDataImm(cmdStream, gpuAddress + offsetof(SbaTrackedAddresses, DynamicStateBaseAddress), static_cast(dynamicStateBaseAddress & 0x0000FFFFFFFFULL), static_cast(dynamicStateBaseAddress >> 32), true, false); } if (sba.IndirectObjectBaseAddress) { auto indirectObjectBaseAddress = NEO::GmmHelper::decanonize(sba.IndirectObjectBaseAddress); NEO::EncodeStoreMemory::programStoreDataImm(cmdStream, gpuAddress + offsetof(SbaTrackedAddresses, IndirectObjectBaseAddress), static_cast(indirectObjectBaseAddress & 0x0000FFFFFFFFULL), static_cast(indirectObjectBaseAddress >> 32), true, false); } if (sba.InstructionBaseAddress) { auto instructionBaseAddress = NEO::GmmHelper::decanonize(sba.InstructionBaseAddress); NEO::EncodeStoreMemory::programStoreDataImm(cmdStream, gpuAddress + offsetof(SbaTrackedAddresses, InstructionBaseAddress), static_cast(instructionBaseAddress & 0x0000FFFFFFFFULL), static_cast(instructionBaseAddress >> 32), true, false); } if (sba.BindlessSurfaceStateBaseAddress) { auto bindlessSurfaceStateBaseAddress = NEO::GmmHelper::decanonize(sba.BindlessSurfaceStateBaseAddress); NEO::EncodeStoreMemory::programStoreDataImm(cmdStream, gpuAddress + offsetof(SbaTrackedAddresses, BindlessSurfaceStateBaseAddress), static_cast(bindlessSurfaceStateBaseAddress & 0x0000FFFFFFFFULL), static_cast(bindlessSurfaceStateBaseAddress >> 32), true, false); } } } template DebuggerL0 *DebuggerL0Hw::allocate(NEO::Device *device) { return new DebuggerL0Hw(device); } template size_t DebuggerL0Hw::getSbaAddressLoadCommandsSize() { if (!singleAddressSpaceSbaTracking) { return 0; } return 2 * sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM); } template void DebuggerL0Hw::programSbaAddressLoad(NEO::LinearStream &cmdStream, uint64_t sbaGpuVa) { if (!singleAddressSpaceSbaTracking) { return; } uint32_t low = sbaGpuVa & 0xffffffff; uint32_t high = (sbaGpuVa >> 32) & 0xffffffff; NEO::LriHelper::program(&cmdStream, CS_GPR_R15, low, true); NEO::LriHelper::program(&cmdStream, CS_GPR_R15 + 4, high, true); } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/debugger/debugger_l0_base.inl000066400000000000000000000011541422164147700274120ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ namespace L0 { template size_t DebuggerL0Hw::getSbaTrackingCommandsSize(size_t trackedAddressCount) { if (singleAddressSpaceSbaTracking) { UNRECOVERABLE_IF(true); return 0; } return trackedAddressCount * NEO::EncodeStoreMemory::getStoreDataImmSize(); } template void DebuggerL0Hw::programSbaTrackingCommandsSingleAddressSpace(NEO::LinearStream &cmdStream, const SbaAddresses &sba) { UNRECOVERABLE_IF(true); } } // namespace L0compute-runtime-22.14.22890/level_zero/core/source/debugger/debugger_l0_tgllp_and_later.inl000066400000000000000000000211111422164147700316260ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ namespace L0 { template size_t DebuggerL0Hw::getSbaTrackingCommandsSize(size_t trackedAddressCount) { if (singleAddressSpaceSbaTracking) { constexpr uint32_t aluCmdSize = sizeof(typename GfxFamily::MI_MATH) + sizeof(typename GfxFamily::MI_MATH_ALU_INST_INLINE) * NUM_ALU_INST_FOR_READ_MODIFY_WRITE; return 2 * (sizeof(typename GfxFamily::MI_ARB_CHECK) + sizeof(typename GfxFamily::MI_BATCH_BUFFER_START)) + trackedAddressCount * (sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM) + aluCmdSize + 2 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM) + 3 * sizeof(typename GfxFamily::MI_STORE_DATA_IMM) + sizeof(typename GfxFamily::MI_ARB_CHECK) + sizeof(typename GfxFamily::MI_BATCH_BUFFER_START)); } return trackedAddressCount * NEO::EncodeStoreMemory::getStoreDataImmSize(); } template void DebuggerL0Hw::programSbaTrackingCommandsSingleAddressSpace(NEO::LinearStream &cmdStream, const SbaAddresses &sba) { using MI_STORE_DATA_IMM = typename GfxFamily::MI_STORE_DATA_IMM; using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START; using MI_ARB_CHECK = typename GfxFamily::MI_ARB_CHECK; using MI_MATH_ALU_INST_INLINE = typename GfxFamily::MI_MATH_ALU_INST_INLINE; using MI_NOOP = typename GfxFamily::MI_NOOP; const auto offsetToAddress = offsetof(MI_STORE_DATA_IMM, TheStructure.RawData[1]); const auto offsetToData = offsetof(MI_STORE_DATA_IMM, TheStructure.Common.DataDword0); UNRECOVERABLE_IF(!singleAddressSpaceSbaTracking); std::vector> fieldOffsetAndValue; if (sba.GeneralStateBaseAddress) { fieldOffsetAndValue.push_back({offsetof(SbaTrackedAddresses, GeneralStateBaseAddress), NEO::GmmHelper::decanonize(sba.GeneralStateBaseAddress)}); } if (sba.SurfaceStateBaseAddress) { fieldOffsetAndValue.push_back({offsetof(SbaTrackedAddresses, SurfaceStateBaseAddress), NEO::GmmHelper::decanonize(sba.SurfaceStateBaseAddress)}); } if (sba.DynamicStateBaseAddress) { fieldOffsetAndValue.push_back({offsetof(SbaTrackedAddresses, DynamicStateBaseAddress), NEO::GmmHelper::decanonize(sba.DynamicStateBaseAddress)}); } if (sba.IndirectObjectBaseAddress) { fieldOffsetAndValue.push_back({offsetof(SbaTrackedAddresses, IndirectObjectBaseAddress), NEO::GmmHelper::decanonize(sba.IndirectObjectBaseAddress)}); } if (sba.InstructionBaseAddress) { fieldOffsetAndValue.push_back({offsetof(SbaTrackedAddresses, InstructionBaseAddress), NEO::GmmHelper::decanonize(sba.InstructionBaseAddress)}); } if (sba.BindlessSurfaceStateBaseAddress) { fieldOffsetAndValue.push_back({offsetof(SbaTrackedAddresses, BindlessSurfaceStateBaseAddress), NEO::GmmHelper::decanonize(sba.BindlessSurfaceStateBaseAddress)}); } const auto cmdStreamGpuBase = cmdStream.getGpuBase(); const auto cmdStreamCpuBase = reinterpret_cast(cmdStream.getCpuBase()); if (fieldOffsetAndValue.size()) { auto arb = cmdStream.getSpaceForCmd(); auto arbCmd = GfxFamily::cmdInitArbCheck; arbCmd.setPreParserDisable(true); *arb = arbCmd; // Jump to SDI command that is modified auto newBuffer = cmdStream.getSpaceForCmd(); const auto nextCommand = ptrOffset(cmdStreamGpuBase, ptrDiff(reinterpret_cast(cmdStream.getSpace(0)), cmdStreamCpuBase)); MI_BATCH_BUFFER_START bbCmd = GfxFamily::cmdInitBatchBufferStart; bbCmd.setAddressSpaceIndicator(MI_BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR_PPGTT); bbCmd.setBatchBufferStartAddress(nextCommand); bbCmd.setSecondLevelBatchBuffer(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH); *newBuffer = bbCmd; } for (const auto &pair : fieldOffsetAndValue) { // Store SBA field offset to R0 NEO::EncodeSetMMIO::encodeIMM(cmdStream, CS_GPR_R0, static_cast(pair.first), true); // Add GPR0 to GPR15, store result in GPR1 NEO::EncodeMath::addition(cmdStream, AluRegisters::R_0, AluRegisters::R_15, AluRegisters::R_1); // Cmds to store dest address - from GPR auto miStoreRegMemLow = cmdStream.getSpaceForCmd(); auto miStoreRegMemHigh = cmdStream.getSpaceForCmd(); // Cmd to store value ( SBA address ) auto miStoreDataSettingSbaBufferAddress = cmdStream.getSpaceForCmd(); auto miStoreDataSettingSbaBufferAddress2 = cmdStream.getSpaceForCmd(); auto arb = cmdStream.getSpaceForCmd(); auto arbCmd = GfxFamily::cmdInitArbCheck; arbCmd.setPreParserDisable(true); *arb = arbCmd; // Jump to SDI command that is modified auto newBuffer = cmdStream.getSpaceForCmd(); const auto addressOfSDI = ptrOffset(cmdStreamGpuBase, ptrDiff(reinterpret_cast(cmdStream.getSpace(0)), cmdStreamCpuBase)); // Cmd to store value ( SBA address ) auto miStoreSbaField = cmdStream.getSpaceForCmd(); auto gpuVaOfAddress = addressOfSDI + offsetToAddress; auto gpuVaOfData = addressOfSDI + offsetToData; const auto gpuVaOfDataDWORD1 = gpuVaOfData + 4; MI_STORE_REGISTER_MEM srmCmdLow = GfxFamily::cmdInitStoreRegisterMem; srmCmdLow.setRegisterAddress(CS_GPR_R1); srmCmdLow.setMemoryAddress(gpuVaOfAddress); NEO::EncodeStoreMMIO::remapOffset(&srmCmdLow); *miStoreRegMemLow = srmCmdLow; MI_STORE_REGISTER_MEM srmCmdHigh = GfxFamily::cmdInitStoreRegisterMem; srmCmdHigh.setRegisterAddress(CS_GPR_R1 + 4); srmCmdHigh.setMemoryAddress(gpuVaOfAddress + 4); NEO::EncodeStoreMMIO::remapOffset(&srmCmdHigh); *miStoreRegMemHigh = srmCmdHigh; MI_STORE_DATA_IMM setSbaBufferAddress = GfxFamily::cmdInitStoreDataImm; setSbaBufferAddress.setAddress(gpuVaOfData); setSbaBufferAddress.setStoreQword(false); setSbaBufferAddress.setDataDword0(pair.second & 0xffffffff); setSbaBufferAddress.setDataDword1(0); setSbaBufferAddress.setDwordLength(MI_STORE_DATA_IMM::DWORD_LENGTH::DWORD_LENGTH_STORE_DWORD); *miStoreDataSettingSbaBufferAddress = setSbaBufferAddress; setSbaBufferAddress.setAddress(gpuVaOfDataDWORD1); setSbaBufferAddress.setStoreQword(false); setSbaBufferAddress.setDataDword0((pair.second >> 32) & 0xffffffff); setSbaBufferAddress.setDataDword1(0); setSbaBufferAddress.setDwordLength(MI_STORE_DATA_IMM::DWORD_LENGTH::DWORD_LENGTH_STORE_DWORD); *miStoreDataSettingSbaBufferAddress2 = setSbaBufferAddress; MI_BATCH_BUFFER_START bbCmd = GfxFamily::cmdInitBatchBufferStart; bbCmd.setAddressSpaceIndicator(MI_BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR_PPGTT); bbCmd.setBatchBufferStartAddress(addressOfSDI); bbCmd.setSecondLevelBatchBuffer(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH); *newBuffer = bbCmd; auto storeSbaField = GfxFamily::cmdInitStoreDataImm; storeSbaField.setStoreQword(true); storeSbaField.setAddress(0x0); storeSbaField.setDataDword0(0xdeadbeef); storeSbaField.setDataDword1(0xbaadfeed); storeSbaField.setDwordLength(MI_STORE_DATA_IMM::DWORD_LENGTH_STORE_QWORD); *miStoreSbaField = storeSbaField; } if (fieldOffsetAndValue.size()) { auto previousBuffer = cmdStream.getSpaceForCmd(); const auto addressOfPreviousBuffer = ptrOffset(cmdStreamGpuBase, ptrDiff(reinterpret_cast(cmdStream.getSpace(0)), cmdStreamCpuBase)); MI_BATCH_BUFFER_START bbCmd = GfxFamily::cmdInitBatchBufferStart; bbCmd.setAddressSpaceIndicator(MI_BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR_PPGTT); bbCmd.setBatchBufferStartAddress(addressOfPreviousBuffer); bbCmd.setSecondLevelBatchBuffer(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH); *previousBuffer = bbCmd; auto arbCmd = GfxFamily::cmdInitArbCheck; auto arb = cmdStream.getSpaceForCmd(); arbCmd.setPreParserDisable(false); *arb = arbCmd; } } } // namespace L0compute-runtime-22.14.22890/level_zero/core/source/debugger/linux/000077500000000000000000000000001422164147700246735ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/debugger/linux/CMakeLists.txt000066400000000000000000000004621422164147700274350ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_DEBUGGER_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/debugger_l0_linux.cpp ) if(UNIX) set_property(GLOBAL PROPERTY L0_SRCS_DEBUGGER_LINUX ${L0_SRCS_DEBUGGER_LINUX}) endif() compute-runtime-22.14.22890/level_zero/core/source/debugger/linux/debugger_l0_linux.cpp000066400000000000000000000060331422164147700307770ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/kernel/debug_data.h" #include "shared/source/os_interface/linux/drm_allocation.h" #include "shared/source/os_interface/linux/drm_neo.h" #include "shared/source/os_interface/os_interface.h" #include "level_zero/core/source/debugger/debugger_l0.h" namespace L0 { bool DebuggerL0::initDebuggingInOs(NEO::OSInterface *osInterface) { if (osInterface != nullptr) { auto drm = osInterface->getDriverModel()->as(); if (drm->isVmBindAvailable() && drm->isPerContextVMRequired()) { drm->registerResourceClasses(); return true; } } return false; } void DebuggerL0::registerElf(NEO::DebugData *debugData, NEO::GraphicsAllocation *isaAllocation) { if (device->getRootDeviceEnvironment().osInterface.get() != nullptr) { auto drm = device->getRootDeviceEnvironment().osInterface->getDriverModel()->as(); auto handle = drm->registerResource(NEO::Drm::ResourceClass::Elf, debugData->vIsa, debugData->vIsaSize); static_cast(isaAllocation)->linkWithRegisteredHandle(handle); } } bool DebuggerL0::attachZebinModuleToSegmentAllocations(const StackVec &allocs, uint32_t &moduleHandle) { if (device->getRootDeviceEnvironment().osInterface == nullptr) { return false; } auto drm = device->getRootDeviceEnvironment().osInterface->getDriverModel()->as(); uint32_t segmentCount = static_cast(allocs.size()); moduleHandle = drm->registerResource(NEO::Drm::ResourceClass::L0ZebinModule, &segmentCount, sizeof(uint32_t)); for (auto &allocation : allocs) { auto drmAllocation = static_cast(allocation); drmAllocation->linkWithRegisteredHandle(moduleHandle); } return true; } bool DebuggerL0::removeZebinModule(uint32_t moduleHandle) { if (device->getRootDeviceEnvironment().osInterface == nullptr || moduleHandle == 0) { return false; } auto drm = device->getRootDeviceEnvironment().osInterface->getDriverModel()->as(); drm->unregisterResource(moduleHandle); return true; } void DebuggerL0::notifyCommandQueueCreated() { if (device->getRootDeviceEnvironment().osInterface.get() != nullptr) { if (++commandQueueCount == 1) { auto drm = device->getRootDeviceEnvironment().osInterface->getDriverModel()->as(); uuidL0CommandQueueHandle = drm->notifyFirstCommandQueueCreated(); } } } void DebuggerL0::notifyCommandQueueDestroyed() { if (device->getRootDeviceEnvironment().osInterface.get() != nullptr) { if (--commandQueueCount == 0) { auto drm = device->getRootDeviceEnvironment().osInterface->getDriverModel()->as(); drm->notifyLastCommandQueueDestroyed(uuidL0CommandQueueHandle); } } } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/debugger/windows/000077500000000000000000000000001422164147700252265ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/debugger/windows/CMakeLists.txt000066400000000000000000000004731422164147700277720ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_DEBUGGER_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/debugger_l0_windows.cpp ) if(WIN32) set_property(GLOBAL PROPERTY L0_SRCS_DEBUGGER_WINDOWS ${L0_SRCS_DEBUGGER_WINDOWS}) endif() compute-runtime-22.14.22890/level_zero/core/source/debugger/windows/debugger_l0_windows.cpp000066400000000000000000000015211422164147700316620ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/kernel/debug_data.h" #include "level_zero/core/source/debugger/debugger_l0.h" namespace L0 { bool DebuggerL0::initDebuggingInOs(NEO::OSInterface *osInterface) { return false; } void DebuggerL0::registerElf(NEO::DebugData *debugData, NEO::GraphicsAllocation *isaAllocation) { } bool DebuggerL0::attachZebinModuleToSegmentAllocations(const StackVec &kernelAlloc, uint32_t &moduleHandle) { return false; } bool DebuggerL0::removeZebinModule(uint32_t moduleHandle) { return false; } void DebuggerL0::notifyCommandQueueCreated() { } void DebuggerL0::notifyCommandQueueDestroyed() { } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/device/000077500000000000000000000000001422164147700232075ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/device/device.h000066400000000000000000000165771422164147700246370ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/preemption_mode.h" #include "shared/source/device/device.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/os_interface/os_interface.h" #include "level_zero/core/source/debugger/debugger_l0.h" #include "level_zero/core/source/driver/driver.h" #include "level_zero/core/source/driver/driver_handle.h" #include "level_zero/core/source/module/module.h" #include #include #include "CL/cl.h" static_assert(NEO::HwInfoConfig::uuidSize == ZE_MAX_DEVICE_UUID_SIZE); struct _ze_device_handle_t {}; namespace NEO { class Device; class MemoryManager; class SourceLevelDebugger; struct DeviceInfo; } // namespace NEO namespace L0 { struct DriverHandle; struct BuiltinFunctionsLib; struct ExecutionEnvironment; class MetricDeviceContext; struct SysmanDevice; struct DebugSession; enum class ModuleType; struct Device : _ze_device_handle_t { uint32_t getRootDeviceIndex() const { return neoDevice->getRootDeviceIndex(); } NEO::Device *getNEODevice() const { return this->neoDevice; } virtual ze_result_t canAccessPeer(ze_device_handle_t hPeerDevice, ze_bool_t *value) = 0; virtual ze_result_t createCommandList(const ze_command_list_desc_t *desc, ze_command_list_handle_t *commandList) = 0; virtual ze_result_t createCommandListImmediate(const ze_command_queue_desc_t *desc, ze_command_list_handle_t *commandList) = 0; virtual ze_result_t createCommandQueue(const ze_command_queue_desc_t *desc, ze_command_queue_handle_t *commandQueue) = 0; virtual ze_result_t createImage(const ze_image_desc_t *desc, ze_image_handle_t *phImage) = 0; virtual ze_result_t createModule(const ze_module_desc_t *desc, ze_module_handle_t *module, ze_module_build_log_handle_t *buildLog, ModuleType type) = 0; virtual ze_result_t createSampler(const ze_sampler_desc_t *pDesc, ze_sampler_handle_t *phSampler) = 0; virtual ze_result_t getComputeProperties(ze_device_compute_properties_t *pComputeProperties) = 0; virtual ze_result_t getP2PProperties(ze_device_handle_t hPeerDevice, ze_device_p2p_properties_t *pP2PProperties) = 0; virtual ze_result_t getKernelProperties(ze_device_module_properties_t *pKernelProperties) = 0; virtual ze_result_t getPciProperties(ze_pci_ext_properties_t *pPciProperties) = 0; virtual ze_result_t getMemoryProperties(uint32_t *pCount, ze_device_memory_properties_t *pMemProperties) = 0; virtual ze_result_t getMemoryAccessProperties(ze_device_memory_access_properties_t *pMemAccessProperties) = 0; virtual ze_result_t getProperties(ze_device_properties_t *pDeviceProperties) = 0; virtual ze_result_t getSubDevices(uint32_t *pCount, ze_device_handle_t *phSubdevices) = 0; virtual ze_result_t getCacheProperties(uint32_t *pCount, ze_device_cache_properties_t *pCacheProperties) = 0; virtual ze_result_t reserveCache(size_t cacheLevel, size_t cacheReservationSize) = 0; virtual ze_result_t setCacheAdvice(void *ptr, size_t regionSize, ze_cache_ext_region_t cacheRegion) = 0; virtual ze_result_t imageGetProperties(const ze_image_desc_t *desc, ze_image_properties_t *pImageProperties) = 0; virtual ze_result_t getDeviceImageProperties(ze_device_image_properties_t *pDeviceImageProperties) = 0; virtual ze_result_t getExternalMemoryProperties(ze_device_external_memory_properties_t *pExternalMemoryProperties) = 0; virtual ze_result_t getGlobalTimestamps(uint64_t *hostTimestamp, uint64_t *deviceTimestamp) = 0; virtual ze_result_t getCommandQueueGroupProperties(uint32_t *pCount, ze_command_queue_group_properties_t *pCommandQueueGroupProperties) = 0; virtual ze_result_t getDebugProperties(zet_device_debug_properties_t *pDebugProperties) = 0; virtual ze_result_t systemBarrier() = 0; virtual ~Device() = default; virtual void *getExecEnvironment() = 0; virtual BuiltinFunctionsLib *getBuiltinFunctionsLib() = 0; virtual uint32_t getMOCS(bool l3enabled, bool l1enabled) = 0; virtual uint32_t getMaxNumHwThreads() const = 0; virtual NEO::HwHelper &getHwHelper() = 0; bool isImplicitScalingCapable() const { return implicitScalingCapable; } virtual const NEO::HardwareInfo &getHwInfo() const = 0; virtual NEO::OSInterface &getOsInterface() = 0; virtual uint32_t getPlatformInfo() const = 0; virtual MetricDeviceContext &getMetricDeviceContext() = 0; virtual DebugSession *getDebugSession(const zet_debug_config_t &config) = 0; virtual DebugSession *createDebugSession(const zet_debug_config_t &config, ze_result_t &result) = 0; virtual void removeDebugSession() = 0; virtual ze_result_t activateMetricGroupsDeferred(uint32_t count, zet_metric_group_handle_t *phMetricGroups) = 0; virtual void activateMetricGroups() = 0; virtual DriverHandle *getDriverHandle() = 0; virtual void setDriverHandle(DriverHandle *driverHandle) = 0; static Device *fromHandle(ze_device_handle_t handle) { return static_cast(handle); } inline ze_device_handle_t toHandle() { return this; } static Device *create(DriverHandle *driverHandle, NEO::Device *neoDevice, bool isSubDevice, ze_result_t *returnValue); static Device *create(DriverHandle *driverHandle, NEO::Device *neoDevice, bool isSubDevice, ze_result_t *returnValue, L0::Device *deviceL0); static Device *deviceReinit(DriverHandle *driverHandle, L0::Device *device, std::unique_ptr &neoDevice, ze_result_t *returnValue); virtual NEO::PreemptionMode getDevicePreemptionMode() const = 0; virtual const NEO::DeviceInfo &getDeviceInfo() const = 0; NEO::SourceLevelDebugger *getSourceLevelDebugger() { return getNEODevice()->getSourceLevelDebugger(); } DebuggerL0 *getL0Debugger() { auto debugger = getNEODevice()->getDebugger(); if (debugger) { return !debugger->isLegacy() ? static_cast(debugger) : nullptr; } return nullptr; } virtual NEO::GraphicsAllocation *getDebugSurface() const = 0; virtual NEO::GraphicsAllocation *allocateManagedMemoryFromHostPtr(void *buffer, size_t size, struct CommandList *commandList) = 0; virtual NEO::GraphicsAllocation *allocateMemoryFromHostPtr(const void *buffer, size_t size, bool hostCopyAllowed) = 0; virtual void setSysmanHandle(SysmanDevice *pSysmanDevice) = 0; virtual SysmanDevice *getSysmanHandle() = 0; virtual ze_result_t getCsrForOrdinalAndIndex(NEO::CommandStreamReceiver **csr, uint32_t ordinal, uint32_t index) = 0; virtual ze_result_t getCsrForLowPriority(NEO::CommandStreamReceiver **csr) = 0; virtual NEO::GraphicsAllocation *obtainReusableAllocation(size_t requiredSize, NEO::AllocationType type) = 0; virtual void storeReusableAllocation(NEO::GraphicsAllocation &alloc) = 0; protected: NEO::Device *neoDevice = nullptr; bool implicitScalingCapable = false; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/device/device_imp.cpp000066400000000000000000001462531422164147700260320ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/device/device_imp.h" #include "shared/source/built_ins/sip.h" #include "shared/source/command_container/implicit_scaling.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/device/device.h" #include "shared/source/device/device_info.h" #include "shared/source/device/sub_device.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/common_types.h" #include "shared/source/helpers/constants.h" #include "shared/source/helpers/engine_node_helper.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/string.h" #include "shared/source/helpers/topology_map.h" #include "shared/source/kernel/grf_config.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/os_interface/os_time.h" #include "shared/source/source_level_debugger/source_level_debugger.h" #include "shared/source/utilities/debug_settings_reader_creator.h" #include "level_zero/core/source/builtin/builtin_functions_lib.h" #include "level_zero/core/source/cache/cache_reservation.h" #include "level_zero/core/source/cmdlist/cmdlist.h" #include "level_zero/core/source/cmdqueue/cmdqueue.h" #include "level_zero/core/source/context/context_imp.h" #include "level_zero/core/source/driver/driver_handle_imp.h" #include "level_zero/core/source/event/event.h" #include "level_zero/core/source/hw_helpers/l0_hw_helper.h" #include "level_zero/core/source/image/image.h" #include "level_zero/core/source/module/module.h" #include "level_zero/core/source/printf_handler/printf_handler.h" #include "level_zero/core/source/sampler/sampler.h" #include "level_zero/tools/source/debug/debug_session.h" #include "level_zero/tools/source/metrics/metric.h" #include "level_zero/tools/source/sysman/sysman.h" namespace NEO { bool releaseFP64Override(); } // namespace NEO namespace L0 { DriverHandle *DeviceImp::getDriverHandle() { return this->driverHandle; } void DeviceImp::setDriverHandle(DriverHandle *driverHandle) { this->driverHandle = driverHandle; } ze_result_t DeviceImp::canAccessPeer(ze_device_handle_t hPeerDevice, ze_bool_t *value) { *value = false; DeviceImp *pPeerDevice = static_cast(Device::fromHandle(hPeerDevice)); uint32_t peerRootDeviceIndex = pPeerDevice->getNEODevice()->getRootDeviceIndex(); if (this->crossAccessEnabledDevices.find(peerRootDeviceIndex) != this->crossAccessEnabledDevices.end()) { *value = this->crossAccessEnabledDevices[peerRootDeviceIndex]; } else if (this->getNEODevice()->getRootDeviceIndex() == peerRootDeviceIndex) { *value = true; } else { ze_command_list_handle_t commandList = nullptr; ze_command_list_desc_t listDescriptor = {}; listDescriptor.stype = ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC; listDescriptor.pNext = nullptr; listDescriptor.flags = 0; listDescriptor.commandQueueGroupOrdinal = 0; ze_command_queue_handle_t commandQueue = nullptr; ze_command_queue_desc_t queueDescriptor = {}; queueDescriptor.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC; queueDescriptor.pNext = nullptr; queueDescriptor.flags = 0; queueDescriptor.mode = ZE_COMMAND_QUEUE_MODE_DEFAULT; queueDescriptor.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL; queueDescriptor.ordinal = 0; queueDescriptor.index = 0; this->createCommandList(&listDescriptor, &commandList); this->createCommandQueue(&queueDescriptor, &commandQueue); auto driverHandle = this->getDriverHandle(); DriverHandleImp *driverHandleImp = static_cast(driverHandle); ze_context_handle_t context; ze_context_desc_t contextDesc = {}; contextDesc.stype = ZE_STRUCTURE_TYPE_CONTEXT_DESC; driverHandleImp->createContext(&contextDesc, 0u, nullptr, &context); ContextImp *contextImp = static_cast(context); void *memory = nullptr; void *peerMemory = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; deviceDesc.ordinal = 0; deviceDesc.flags = 0; deviceDesc.pNext = nullptr; ze_device_mem_alloc_desc_t peerDeviceDesc = {}; peerDeviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; peerDeviceDesc.ordinal = 0; peerDeviceDesc.flags = 0; peerDeviceDesc.pNext = nullptr; contextImp->allocDeviceMem(this->toHandle(), &deviceDesc, 8, 1, &memory); contextImp->allocDeviceMem(hPeerDevice, &peerDeviceDesc, 8, 1, &peerMemory); auto ret = L0::CommandList::fromHandle(commandList)->appendMemoryCopy(peerMemory, memory, 8, nullptr, 0, nullptr); L0::CommandList::fromHandle(commandList)->close(); if (ret == ZE_RESULT_SUCCESS) { ret = L0::CommandQueue::fromHandle(commandQueue)->executeCommandLists(1, &commandList, nullptr, true); if (ret == ZE_RESULT_SUCCESS) { this->crossAccessEnabledDevices[peerRootDeviceIndex] = true; pPeerDevice->crossAccessEnabledDevices[this->getNEODevice()->getRootDeviceIndex()] = true; L0::CommandQueue::fromHandle(commandQueue)->synchronize(std::numeric_limits::max()); *value = true; } } contextImp->freeMem(peerMemory); contextImp->freeMem(memory); L0::Context::fromHandle(context)->destroy(); L0::CommandQueue::fromHandle(commandQueue)->destroy(); L0::CommandList::fromHandle(commandList)->destroy(); } return ZE_RESULT_SUCCESS; } ze_result_t DeviceImp::createCommandList(const ze_command_list_desc_t *desc, ze_command_list_handle_t *commandList) { auto &engineGroups = getActiveDevice()->getRegularEngineGroups(); if (desc->commandQueueGroupOrdinal >= engineGroups.size()) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily; ze_result_t returnValue = ZE_RESULT_SUCCESS; auto engineGroupType = engineGroups[desc->commandQueueGroupOrdinal].engineGroupType; *commandList = CommandList::create(productFamily, this, engineGroupType, desc->flags, returnValue); return returnValue; } ze_result_t DeviceImp::createCommandListImmediate(const ze_command_queue_desc_t *desc, ze_command_list_handle_t *phCommandList) { auto &engineGroups = getActiveDevice()->getRegularEngineGroups(); if (desc->ordinal >= engineGroups.size()) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily; auto engineGroupType = engineGroups[desc->ordinal].engineGroupType; ze_result_t returnValue = ZE_RESULT_SUCCESS; *phCommandList = CommandList::createImmediate(productFamily, this, desc, false, engineGroupType, returnValue); return returnValue; } ze_result_t DeviceImp::createCommandQueue(const ze_command_queue_desc_t *desc, ze_command_queue_handle_t *commandQueue) { auto &platform = neoDevice->getHardwareInfo().platform; NEO::CommandStreamReceiver *csr = nullptr; auto &engineGroups = getActiveDevice()->getRegularEngineGroups(); if (desc->ordinal >= engineGroups.size()) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } bool isCopyOnly = NEO::EngineHelper::isCopyOnlyEngineType(engineGroups[desc->ordinal].engineGroupType); if (desc->priority == ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_LOW && !isCopyOnly) { getCsrForLowPriority(&csr); } else { auto ret = getCsrForOrdinalAndIndex(&csr, desc->ordinal, desc->index); if (ret != ZE_RESULT_SUCCESS) { return ret; } } UNRECOVERABLE_IF(csr == nullptr); ze_result_t returnValue = ZE_RESULT_SUCCESS; *commandQueue = CommandQueue::create(platform.eProductFamily, this, csr, desc, isCopyOnly, false, returnValue); return returnValue; } ze_result_t DeviceImp::getCommandQueueGroupProperties(uint32_t *pCount, ze_command_queue_group_properties_t *pCommandQueueGroupProperties) { NEO::Device *activeDevice = getActiveDevice(); auto &engineGroups = activeDevice->getRegularEngineGroups(); uint32_t numEngineGroups = static_cast(engineGroups.size()); if (*pCount == 0) { *pCount = numEngineGroups; return ZE_RESULT_SUCCESS; } const auto &hardwareInfo = this->neoDevice->getHardwareInfo(); auto &hwHelper = NEO::HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); auto &l0HwHelper = L0HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); *pCount = std::min(numEngineGroups, *pCount); for (uint32_t i = 0; i < *pCount; i++) { if (engineGroups[i].engineGroupType == NEO::EngineGroupType::RenderCompute) { pCommandQueueGroupProperties[i].flags = ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE | ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY | ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_METRICS | ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COOPERATIVE_KERNELS; pCommandQueueGroupProperties[i].maxMemoryFillPatternSize = std::numeric_limits::max(); } if (engineGroups[i].engineGroupType == NEO::EngineGroupType::Compute) { pCommandQueueGroupProperties[i].flags = ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE | ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY | ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COOPERATIVE_KERNELS; pCommandQueueGroupProperties[i].maxMemoryFillPatternSize = std::numeric_limits::max(); } if (engineGroups[i].engineGroupType == NEO::EngineGroupType::Copy) { pCommandQueueGroupProperties[i].flags = ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY; pCommandQueueGroupProperties[i].maxMemoryFillPatternSize = hwHelper.getMaxFillPaternSizeForCopyEngine(); } l0HwHelper.setAdditionalGroupProperty(pCommandQueueGroupProperties[i], engineGroups[i].engineGroupType); pCommandQueueGroupProperties[i].numQueues = static_cast(engineGroups[i].engines.size()); } return ZE_RESULT_SUCCESS; } ze_result_t DeviceImp::createImage(const ze_image_desc_t *desc, ze_image_handle_t *phImage) { auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily; Image *pImage = nullptr; auto result = Image::create(productFamily, this, desc, &pImage); if (result == ZE_RESULT_SUCCESS) { *phImage = pImage->toHandle(); } return result; } ze_result_t DeviceImp::createSampler(const ze_sampler_desc_t *desc, ze_sampler_handle_t *sampler) { auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily; *sampler = Sampler::create(productFamily, this, desc); return ZE_RESULT_SUCCESS; } ze_result_t DeviceImp::createModule(const ze_module_desc_t *desc, ze_module_handle_t *module, ze_module_build_log_handle_t *buildLog, ModuleType type) { ModuleBuildLog *moduleBuildLog = nullptr; if (buildLog) { moduleBuildLog = ModuleBuildLog::create(); *buildLog = moduleBuildLog->toHandle(); } auto modulePtr = Module::create(this, desc, moduleBuildLog, type); if (modulePtr == nullptr) { return ZE_RESULT_ERROR_MODULE_BUILD_FAILURE; } *module = modulePtr; return ZE_RESULT_SUCCESS; } ze_result_t DeviceImp::getComputeProperties(ze_device_compute_properties_t *pComputeProperties) { const auto &deviceInfo = this->neoDevice->getDeviceInfo(); pComputeProperties->maxTotalGroupSize = static_cast(deviceInfo.maxWorkGroupSize); pComputeProperties->maxGroupSizeX = static_cast(deviceInfo.maxWorkItemSizes[0]); pComputeProperties->maxGroupSizeY = static_cast(deviceInfo.maxWorkItemSizes[1]); pComputeProperties->maxGroupSizeZ = static_cast(deviceInfo.maxWorkItemSizes[2]); pComputeProperties->maxGroupCountX = std::numeric_limits::max(); pComputeProperties->maxGroupCountY = std::numeric_limits::max(); pComputeProperties->maxGroupCountZ = std::numeric_limits::max(); pComputeProperties->maxSharedLocalMemory = static_cast(deviceInfo.localMemSize); pComputeProperties->numSubGroupSizes = static_cast(deviceInfo.maxSubGroups.size()); for (uint32_t i = 0; i < pComputeProperties->numSubGroupSizes; ++i) { pComputeProperties->subGroupSizes[i] = static_cast(deviceInfo.maxSubGroups[i]); } return ZE_RESULT_SUCCESS; } ze_result_t DeviceImp::getP2PProperties(ze_device_handle_t hPeerDevice, ze_device_p2p_properties_t *pP2PProperties) { DeviceImp *peerDevice = static_cast(Device::fromHandle(hPeerDevice)); if (this->getNEODevice()->getHardwareInfo().capabilityTable.p2pAccessSupported && peerDevice->getNEODevice()->getHardwareInfo().capabilityTable.p2pAccessSupported) { pP2PProperties->flags = ZE_DEVICE_P2P_PROPERTY_FLAG_ACCESS; if (this->getNEODevice()->getHardwareInfo().capabilityTable.p2pAtomicAccessSupported && peerDevice->getNEODevice()->getHardwareInfo().capabilityTable.p2pAtomicAccessSupported) { pP2PProperties->flags |= ZE_DEVICE_P2P_PROPERTY_FLAG_ATOMICS; } } return ZE_RESULT_SUCCESS; } ze_result_t DeviceImp::getPciProperties(ze_pci_ext_properties_t *pPciProperties) { if (!driverInfo) { return ZE_RESULT_ERROR_UNINITIALIZED; } auto pciBusInfo = driverInfo->getPciBusInfo(); auto isPciValid = [&](auto pci) -> bool { return (pci.pciDomain != NEO::PhysicalDevicePciBusInfo::InvalidValue && pci.pciBus != NEO::PhysicalDevicePciBusInfo::InvalidValue && pci.pciDevice != NEO::PhysicalDevicePciBusInfo::InvalidValue && pci.pciFunction != NEO::PhysicalDevicePciBusInfo::InvalidValue); }; if (!isPciValid(pciBusInfo)) { return ZE_RESULT_ERROR_UNINITIALIZED; } pPciProperties->address = {pciBusInfo.pciDomain, pciBusInfo.pciBus, pciBusInfo.pciDevice, pciBusInfo.pciFunction}; pPciProperties->maxSpeed = pciMaxSpeed; return ZE_RESULT_SUCCESS; } ze_result_t DeviceImp::getMemoryProperties(uint32_t *pCount, ze_device_memory_properties_t *pMemProperties) { if (*pCount == 0) { *pCount = 1; return ZE_RESULT_SUCCESS; } if (*pCount > 1) { *pCount = 1; } if (nullptr == pMemProperties) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } const auto &deviceInfo = this->neoDevice->getDeviceInfo(); auto &hwInfo = this->getHwInfo(); auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily); strcpy_s(pMemProperties->name, ZE_MAX_DEVICE_NAME, hwInfoConfig.getDeviceMemoryName().c_str()); pMemProperties->maxClockRate = hwInfoConfig.getDeviceMemoryMaxClkRate(&hwInfo); pMemProperties->maxBusWidth = deviceInfo.addressBits; if (this->isImplicitScalingCapable() || this->getNEODevice()->getNumGenericSubDevices() == 0) { pMemProperties->totalSize = deviceInfo.globalMemSize; } else { pMemProperties->totalSize = deviceInfo.globalMemSize / this->numSubDevices; } pMemProperties->flags = 0; return ZE_RESULT_SUCCESS; } ze_result_t DeviceImp::getMemoryAccessProperties(ze_device_memory_access_properties_t *pMemAccessProperties) { auto &hwInfo = this->getHwInfo(); auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily); pMemAccessProperties->hostAllocCapabilities = static_cast(hwInfoConfig.getHostMemCapabilities(&hwInfo)); pMemAccessProperties->deviceAllocCapabilities = static_cast(hwInfoConfig.getDeviceMemCapabilities()); pMemAccessProperties->sharedSingleDeviceAllocCapabilities = static_cast(hwInfoConfig.getSingleDeviceSharedMemCapabilities()); pMemAccessProperties->sharedCrossDeviceAllocCapabilities = {}; if (this->getNEODevice()->getHardwareInfo().capabilityTable.p2pAccessSupported) { pMemAccessProperties->sharedCrossDeviceAllocCapabilities |= ZE_MEMORY_ACCESS_CAP_FLAG_RW | ZE_MEMORY_ACCESS_CAP_FLAG_CONCURRENT; if (this->getNEODevice()->getHardwareInfo().capabilityTable.p2pAtomicAccessSupported) { pMemAccessProperties->sharedCrossDeviceAllocCapabilities |= ZE_MEMORY_ACCESS_CAP_FLAG_ATOMIC | ZE_MEMORY_ACCESS_CAP_FLAG_CONCURRENT_ATOMIC; } } pMemAccessProperties->sharedSystemAllocCapabilities = static_cast(hwInfoConfig.getSharedSystemMemCapabilities(&hwInfo)); return ZE_RESULT_SUCCESS; } static constexpr ze_device_fp_flags_t defaultFpFlags = static_cast(ZE_DEVICE_FP_FLAG_ROUND_TO_NEAREST | ZE_DEVICE_FP_FLAG_ROUND_TO_ZERO | ZE_DEVICE_FP_FLAG_ROUND_TO_INF | ZE_DEVICE_FP_FLAG_INF_NAN | ZE_DEVICE_FP_FLAG_DENORM | ZE_DEVICE_FP_FLAG_FMA); ze_result_t DeviceImp::getKernelProperties(ze_device_module_properties_t *pKernelProperties) { const auto &hardwareInfo = this->neoDevice->getHardwareInfo(); const auto &deviceInfo = this->neoDevice->getDeviceInfo(); auto &hwHelper = NEO::HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); std::string ilVersion = deviceInfo.ilVersion; size_t majorVersionPos = ilVersion.find('_'); size_t minorVersionPos = ilVersion.find('.'); if (majorVersionPos != std::string::npos && minorVersionPos != std::string::npos) { uint32_t majorSpirvVersion = static_cast(std::stoul(ilVersion.substr(majorVersionPos + 1, minorVersionPos))); uint32_t minorSpirvVersion = static_cast(std::stoul(ilVersion.substr(minorVersionPos + 1))); pKernelProperties->spirvVersionSupported = ZE_MAKE_VERSION(majorSpirvVersion, minorSpirvVersion); } else { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } pKernelProperties->flags = ZE_DEVICE_MODULE_FLAG_FP16; if (hardwareInfo.capabilityTable.ftrSupportsInteger64BitAtomics) { pKernelProperties->flags |= ZE_DEVICE_MODULE_FLAG_INT64_ATOMICS; } pKernelProperties->fp16flags = defaultFpFlags; pKernelProperties->fp32flags = defaultFpFlags; if (NEO::DebugManager.flags.OverrideDefaultFP64Settings.get() == 1) { pKernelProperties->flags |= ZE_DEVICE_MODULE_FLAG_FP64; pKernelProperties->fp64flags = defaultFpFlags | ZE_DEVICE_FP_FLAG_ROUNDED_DIVIDE_SQRT; pKernelProperties->fp32flags |= ZE_DEVICE_FP_FLAG_ROUNDED_DIVIDE_SQRT; } else { pKernelProperties->fp64flags = 0; if (hardwareInfo.capabilityTable.ftrSupportsFP64) { pKernelProperties->flags |= ZE_DEVICE_MODULE_FLAG_FP64; pKernelProperties->fp64flags |= defaultFpFlags; if (hardwareInfo.capabilityTable.ftrSupports64BitMath) { pKernelProperties->fp64flags |= ZE_DEVICE_FP_FLAG_ROUNDED_DIVIDE_SQRT; pKernelProperties->fp32flags |= ZE_DEVICE_FP_FLAG_ROUNDED_DIVIDE_SQRT; } } } pKernelProperties->nativeKernelSupported.id[0] = 0; processAdditionalKernelProperties(hwHelper, pKernelProperties); pKernelProperties->maxArgumentsSize = static_cast(this->neoDevice->getDeviceInfo().maxParameterSize); pKernelProperties->printfBufferSize = static_cast(this->neoDevice->getDeviceInfo().printfBufferSize); auto &hwInfo = this->getHwInfo(); auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily); void *pNext = pKernelProperties->pNext; while (pNext) { ze_base_desc_t *extendedProperties = reinterpret_cast(pKernelProperties->pNext); if (extendedProperties->stype == ZE_STRUCTURE_TYPE_FLOAT_ATOMIC_EXT_PROPERTIES) { ze_float_atomic_ext_properties_t *floatProperties = reinterpret_cast(extendedProperties); hwInfoConfig.getKernelExtendedProperties(&floatProperties->fp16Flags, &floatProperties->fp32Flags, &floatProperties->fp64Flags); } else if (extendedProperties->stype == ZE_STRUCTURE_TYPE_SCHEDULING_HINT_EXP_PROPERTIES) { ze_scheduling_hint_exp_properties_t *hintProperties = reinterpret_cast(extendedProperties); auto supportedThreadArbitrationPolicies = hwInfoConfig.getKernelSupportedThreadArbitrationPolicies(); hintProperties->schedulingHintFlags = 0; for (auto &p : supportedThreadArbitrationPolicies) { switch (p) { case NEO::ThreadArbitrationPolicy::AgeBased: hintProperties->schedulingHintFlags |= ZE_SCHEDULING_HINT_EXP_FLAG_OLDEST_FIRST; break; case NEO::ThreadArbitrationPolicy::RoundRobin: hintProperties->schedulingHintFlags |= ZE_SCHEDULING_HINT_EXP_FLAG_ROUND_ROBIN; break; case NEO::ThreadArbitrationPolicy::RoundRobinAfterDependency: hintProperties->schedulingHintFlags |= ZE_SCHEDULING_HINT_EXP_FLAG_STALL_BASED_ROUND_ROBIN; break; } } } pNext = const_cast(extendedProperties->pNext); } return ZE_RESULT_SUCCESS; } ze_result_t DeviceImp::getProperties(ze_device_properties_t *pDeviceProperties) { const auto &deviceInfo = this->neoDevice->getDeviceInfo(); const auto &hardwareInfo = this->neoDevice->getHardwareInfo(); auto &hwHelper = NEO::HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); pDeviceProperties->type = ZE_DEVICE_TYPE_GPU; pDeviceProperties->vendorId = deviceInfo.vendorId; pDeviceProperties->deviceId = hardwareInfo.platform.usDeviceID; pDeviceProperties->flags = 0u; std::array deviceUuid; if (this->neoDevice->getUuid(deviceUuid) == false) { this->neoDevice->generateUuid(deviceUuid); } std::copy_n(std::begin(deviceUuid), ZE_MAX_DEVICE_UUID_SIZE, std::begin(pDeviceProperties->uuid.id)); pDeviceProperties->subdeviceId = isSubdevice ? static_cast(neoDevice)->getSubDeviceIndex() : 0; pDeviceProperties->coreClockRate = deviceInfo.maxClockFrequency; pDeviceProperties->maxMemAllocSize = this->neoDevice->getDeviceInfo().maxMemAllocSize; pDeviceProperties->maxCommandQueuePriority = 0; pDeviceProperties->maxHardwareContexts = 1024 * 64; pDeviceProperties->numThreadsPerEU = deviceInfo.numThreadsPerEU; pDeviceProperties->physicalEUSimdWidth = hwHelper.getMinimalSIMDSize(); pDeviceProperties->numEUsPerSubslice = hardwareInfo.gtSystemInfo.MaxEuPerSubSlice; if (NEO::DebugManager.flags.DebugApiUsed.get() == 1) { pDeviceProperties->numSubslicesPerSlice = hardwareInfo.gtSystemInfo.MaxSubSlicesSupported / hardwareInfo.gtSystemInfo.MaxSlicesSupported; } else { pDeviceProperties->numSubslicesPerSlice = hardwareInfo.gtSystemInfo.SubSliceCount / hardwareInfo.gtSystemInfo.SliceCount; } pDeviceProperties->numSlices = hardwareInfo.gtSystemInfo.SliceCount; if (isImplicitScalingCapable()) { pDeviceProperties->numSlices *= neoDevice->getNumGenericSubDevices(); } if ((NEO::DebugManager.flags.UseCyclesPerSecondTimer.get() == 1) || (pDeviceProperties->stype == ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES_1_2)) { pDeviceProperties->timerResolution = this->neoDevice->getDeviceInfo().outProfilingTimerClock; } else { pDeviceProperties->timerResolution = this->neoDevice->getDeviceInfo().outProfilingTimerResolution; } pDeviceProperties->timestampValidBits = hardwareInfo.capabilityTable.timestampValidBits; pDeviceProperties->kernelTimestampValidBits = hardwareInfo.capabilityTable.kernelTimestampValidBits; if (hardwareInfo.capabilityTable.isIntegratedDevice) { pDeviceProperties->flags |= ZE_DEVICE_PROPERTY_FLAG_INTEGRATED; } if (isSubdevice) { pDeviceProperties->flags |= ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE; } if (this->neoDevice->getDeviceInfo().errorCorrectionSupport) { pDeviceProperties->flags |= ZE_DEVICE_PROPERTY_FLAG_ECC; } if (hardwareInfo.capabilityTable.supportsOnDemandPageFaults) { pDeviceProperties->flags |= ZE_DEVICE_PROPERTY_FLAG_ONDEMANDPAGING; } memset(pDeviceProperties->name, 0, ZE_MAX_DEVICE_NAME); std::string name = getNEODevice()->getDeviceInfo().name; memcpy_s(pDeviceProperties->name, name.length(), name.c_str(), name.length()); return ZE_RESULT_SUCCESS; } ze_result_t DeviceImp::getExternalMemoryProperties(ze_device_external_memory_properties_t *pExternalMemoryProperties) { pExternalMemoryProperties->imageExportTypes = 0u; pExternalMemoryProperties->imageImportTypes = 0u; pExternalMemoryProperties->memoryAllocationExportTypes = ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF; pExternalMemoryProperties->memoryAllocationImportTypes = ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF; return ZE_RESULT_SUCCESS; } ze_result_t DeviceImp::getGlobalTimestamps(uint64_t *hostTimestamp, uint64_t *deviceTimestamp) { NEO::TimeStampData queueTimeStamp; bool retVal = this->neoDevice->getOSTime()->getCpuGpuTime(&queueTimeStamp); if (!retVal) return ZE_RESULT_ERROR_DEVICE_LOST; *deviceTimestamp = queueTimeStamp.GPUTimeStamp; retVal = this->neoDevice->getOSTime()->getCpuTime(hostTimestamp); if (!retVal) return ZE_RESULT_ERROR_DEVICE_LOST; return ZE_RESULT_SUCCESS; } ze_result_t DeviceImp::getSubDevices(uint32_t *pCount, ze_device_handle_t *phSubdevices) { if (*pCount == 0) { *pCount = this->numSubDevices; return ZE_RESULT_SUCCESS; } if (phSubdevices == nullptr) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } if (*pCount > this->numSubDevices) { *pCount = this->numSubDevices; } for (uint32_t i = 0; i < *pCount; i++) { phSubdevices[i] = this->subDevices[i]; } return ZE_RESULT_SUCCESS; } ze_result_t DeviceImp::getCacheProperties(uint32_t *pCount, ze_device_cache_properties_t *pCacheProperties) { if (*pCount == 0) { *pCount = 1; return ZE_RESULT_SUCCESS; } if (*pCount > 1) { *pCount = 1; } const auto &hardwareInfo = this->getHwInfo(); pCacheProperties[0].cacheSize = hardwareInfo.gtSystemInfo.L3BankCount * 128 * KB; pCacheProperties[0].flags = 0; if (pCacheProperties->pNext) { auto extendedProperties = reinterpret_cast(pCacheProperties->pNext); if (extendedProperties->stype == ZE_STRUCTURE_TYPE_CACHE_RESERVATION_EXT_DESC) { auto cacheReservationProperties = reinterpret_cast(extendedProperties); cacheReservationProperties->maxCacheReservationSize = cacheReservation->getMaxCacheReservationSize(); } else { return ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } } return ZE_RESULT_SUCCESS; } ze_result_t DeviceImp::reserveCache(size_t cacheLevel, size_t cacheReservationSize) { if (cacheReservation->getMaxCacheReservationSize() == 0) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } if (cacheLevel == 0) { cacheLevel = 3; } auto result = cacheReservation->reserveCache(cacheLevel, cacheReservationSize); if (result == false) { return ZE_RESULT_ERROR_UNINITIALIZED; } return ZE_RESULT_SUCCESS; } ze_result_t DeviceImp::setCacheAdvice(void *ptr, size_t regionSize, ze_cache_ext_region_t cacheRegion) { if (cacheReservation->getMaxCacheReservationSize() == 0) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } if (cacheRegion == ze_cache_ext_region_t::ZE_CACHE_EXT_REGION_ZE_CACHE_REGION_DEFAULT) { cacheRegion = ze_cache_ext_region_t::ZE_CACHE_EXT_REGION_ZE_CACHE_NON_RESERVED_REGION; } auto result = cacheReservation->setCacheAdvice(ptr, regionSize, cacheRegion); if (result == false) { return ZE_RESULT_ERROR_UNINITIALIZED; } return ZE_RESULT_SUCCESS; } ze_result_t DeviceImp::imageGetProperties(const ze_image_desc_t *desc, ze_image_properties_t *pImageProperties) { const auto &deviceInfo = this->neoDevice->getDeviceInfo(); if (deviceInfo.imageSupport) { pImageProperties->samplerFilterFlags = ZE_IMAGE_SAMPLER_FILTER_FLAG_LINEAR; } else { pImageProperties->samplerFilterFlags = 0; } return ZE_RESULT_SUCCESS; } ze_result_t DeviceImp::getDeviceImageProperties(ze_device_image_properties_t *pDeviceImageProperties) { const auto &deviceInfo = this->neoDevice->getDeviceInfo(); if (deviceInfo.imageSupport) { pDeviceImageProperties->maxImageDims1D = static_cast(deviceInfo.image2DMaxWidth); pDeviceImageProperties->maxImageDims2D = static_cast(deviceInfo.image2DMaxHeight); pDeviceImageProperties->maxImageDims3D = static_cast(deviceInfo.image3DMaxDepth); pDeviceImageProperties->maxImageBufferSize = deviceInfo.imageMaxBufferSize; pDeviceImageProperties->maxImageArraySlices = static_cast(deviceInfo.imageMaxArraySize); pDeviceImageProperties->maxSamplers = deviceInfo.maxSamplers; pDeviceImageProperties->maxReadImageArgs = deviceInfo.maxReadImageArgs; pDeviceImageProperties->maxWriteImageArgs = deviceInfo.maxWriteImageArgs; } else { pDeviceImageProperties->maxImageDims1D = 0u; pDeviceImageProperties->maxImageDims2D = 0u; pDeviceImageProperties->maxImageDims3D = 0u; pDeviceImageProperties->maxImageBufferSize = 0u; pDeviceImageProperties->maxImageArraySlices = 0u; pDeviceImageProperties->maxSamplers = 0u; pDeviceImageProperties->maxReadImageArgs = 0u; pDeviceImageProperties->maxWriteImageArgs = 0u; } return ZE_RESULT_SUCCESS; } ze_result_t DeviceImp::getDebugProperties(zet_device_debug_properties_t *pDebugProperties) { bool isDebugAttachAvailable = getOsInterface().isDebugAttachAvailable(); auto &stateSaveAreaHeader = NEO::SipKernel::getBindlessDebugSipKernel(*this->getNEODevice()).getStateSaveAreaHeader(); if (stateSaveAreaHeader.size() == 0) { PRINT_DEBUGGER_INFO_LOG("Context state save area header missing", ""); isDebugAttachAvailable = false; } if (isDebugAttachAvailable && !isSubdevice) { pDebugProperties->flags = zet_device_debug_property_flag_t::ZET_DEVICE_DEBUG_PROPERTY_FLAG_ATTACH; } else { pDebugProperties->flags = 0; } return ZE_RESULT_SUCCESS; } ze_result_t DeviceImp::systemBarrier() { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t DeviceImp::activateMetricGroupsDeferred(uint32_t count, zet_metric_group_handle_t *phMetricGroups) { ze_result_t result = ZE_RESULT_ERROR_UNKNOWN; if (!this->isSubdevice && this->isImplicitScalingCapable()) { for (auto subDevice : this->subDevices) { result = subDevice->getMetricDeviceContext().activateMetricGroupsDeferred(count, phMetricGroups); if (result != ZE_RESULT_SUCCESS) break; } } else { result = metricContext->activateMetricGroupsDeferred(count, phMetricGroups); } return result; } void *DeviceImp::getExecEnvironment() { return execEnvironment; } BuiltinFunctionsLib *DeviceImp::getBuiltinFunctionsLib() { return builtins.get(); } uint32_t DeviceImp::getMOCS(bool l3enabled, bool l1enabled) { return getHwHelper().getMocsIndex(*getNEODevice()->getGmmHelper(), l3enabled, l1enabled) << 1; } NEO::HwHelper &DeviceImp::getHwHelper() { const auto &hardwareInfo = neoDevice->getHardwareInfo(); return NEO::HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); } NEO::OSInterface &DeviceImp::getOsInterface() { return *neoDevice->getRootDeviceEnvironment().osInterface; } uint32_t DeviceImp::getPlatformInfo() const { const auto &hardwareInfo = neoDevice->getHardwareInfo(); return hardwareInfo.platform.eRenderCoreFamily; } MetricDeviceContext &DeviceImp::getMetricDeviceContext() { return *metricContext; } void DeviceImp::activateMetricGroups() { if (metricContext != nullptr) { if (metricContext->isImplicitScalingCapable()) { for (uint32_t i = 0; i < numSubDevices; i++) { subDevices[i]->getMetricDeviceContext().activateMetricGroups(); } } else { metricContext->activateMetricGroups(); } } } uint32_t DeviceImp::getMaxNumHwThreads() const { return maxNumHwThreads; } const NEO::HardwareInfo &DeviceImp::getHwInfo() const { return neoDevice->getHardwareInfo(); } // Use this method to reinitialize L0::Device *device, that was created during zeInit, with the help of Device::create Device *Device::deviceReinit(DriverHandle *driverHandle, L0::Device *device, std::unique_ptr &neoDevice, ze_result_t *returnValue) { auto pNeoDevice = neoDevice.release(); return Device::create(driverHandle, pNeoDevice, false, returnValue, device); } Device *Device::create(DriverHandle *driverHandle, NEO::Device *neoDevice, bool isSubDevice, ze_result_t *returnValue) { return Device::create(driverHandle, neoDevice, isSubDevice, returnValue, nullptr); } Device *Device::create(DriverHandle *driverHandle, NEO::Device *neoDevice, bool isSubDevice, ze_result_t *returnValue, L0::Device *deviceL0) { L0::DeviceImp *device = nullptr; if (deviceL0 == nullptr) { device = new DeviceImp; } else { device = static_cast(deviceL0); } UNRECOVERABLE_IF(device == nullptr); device->setDriverHandle(driverHandle); neoDevice->setSpecializedDevice(device); device->neoDevice = neoDevice; neoDevice->incRefInternal(); auto &hwInfo = neoDevice->getHardwareInfo(); auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily); device->execEnvironment = (void *)neoDevice->getExecutionEnvironment(); device->allocationsForReuse = std::make_unique(); bool platformImplicitScaling = hwHelper.platformSupportsImplicitScaling(hwInfo); device->implicitScalingCapable = NEO::ImplicitScalingHelper::isImplicitScalingEnabled(neoDevice->getDeviceBitfield(), platformImplicitScaling); device->metricContext = MetricDeviceContext::create(*device); device->builtins = BuiltinFunctionsLib::create( device, neoDevice->getBuiltIns()); device->cacheReservation = CacheReservation::create(*device); device->maxNumHwThreads = NEO::HwHelper::getMaxThreadsForVfe(hwInfo); auto osInterface = neoDevice->getRootDeviceEnvironment().osInterface.get(); device->driverInfo.reset(NEO::DriverInfo::create(&hwInfo, osInterface)); auto debugSurfaceSize = hwHelper.getSipKernelMaxDbgSurfaceSize(hwInfo); std::vector stateSaveAreaHeader; if (neoDevice->getCompilerInterface()) { if (neoDevice->getPreemptionMode() == NEO::PreemptionMode::MidThread || neoDevice->getDebugger()) { bool ret = NEO::SipKernel::initSipKernel(NEO::SipKernel::getSipKernelType(*neoDevice), *neoDevice); UNRECOVERABLE_IF(!ret); stateSaveAreaHeader = NEO::SipKernel::getSipKernel(*neoDevice).getStateSaveAreaHeader(); debugSurfaceSize = NEO::SipKernel::getSipKernel(*neoDevice).getStateSaveAreaSize(neoDevice); } } else { *returnValue = ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE; } const bool allocateDebugSurface = (device->getL0Debugger() || neoDevice->getDeviceInfo().debuggerActive) && !isSubDevice; NEO::GraphicsAllocation *debugSurface = nullptr; if (allocateDebugSurface) { debugSurface = neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties( {device->getRootDeviceIndex(), true, debugSurfaceSize, NEO::AllocationType::DEBUG_CONTEXT_SAVE_AREA, false, false, device->getNEODevice()->getDeviceBitfield()}); device->setDebugSurface(debugSurface); } if (debugSurface && stateSaveAreaHeader.size() > 0) { auto &hwInfo = neoDevice->getHardwareInfo(); NEO::MemoryTransferHelper::transferMemoryToAllocation(hwHelper.isBlitCopyRequiredForLocalMemory(hwInfo, *debugSurface), *neoDevice, debugSurface, 0, stateSaveAreaHeader.data(), stateSaveAreaHeader.size()); } for (auto &neoSubDevice : neoDevice->getSubDevices()) { if (!neoSubDevice) { continue; } ze_device_handle_t subDevice = Device::create(driverHandle, neoSubDevice, true, returnValue, nullptr); if (subDevice == nullptr) { return nullptr; } static_cast(subDevice)->isSubdevice = true; static_cast(subDevice)->setDebugSurface(debugSurface); device->subDevices.push_back(static_cast(subDevice)); } device->numSubDevices = static_cast(device->subDevices.size()); auto supportDualStorageSharedMemory = neoDevice->getMemoryManager()->isLocalMemorySupported(device->neoDevice->getRootDeviceIndex()); if (NEO::DebugManager.flags.AllocateSharedAllocationsWithCpuAndGpuStorage.get() != -1) { supportDualStorageSharedMemory = NEO::DebugManager.flags.AllocateSharedAllocationsWithCpuAndGpuStorage.get(); } if (supportDualStorageSharedMemory) { ze_command_queue_desc_t cmdQueueDesc = {}; cmdQueueDesc.ordinal = 0; cmdQueueDesc.index = 0; cmdQueueDesc.flags = 0; cmdQueueDesc.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC; cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; ze_result_t resultValue = ZE_RESULT_SUCCESS; device->pageFaultCommandList = CommandList::createImmediate( device->neoDevice->getHardwareInfo().platform.eProductFamily, device, &cmdQueueDesc, true, NEO::EngineGroupType::Copy, resultValue); } if (osInterface) { auto pciSpeedInfo = osInterface->getDriverModel()->getPciSpeedInfo(); device->pciMaxSpeed.genVersion = pciSpeedInfo.genVersion; device->pciMaxSpeed.maxBandwidth = pciSpeedInfo.maxBandwidth; device->pciMaxSpeed.width = pciSpeedInfo.width; } if (device->getSourceLevelDebugger()) { auto osInterface = neoDevice->getRootDeviceEnvironment().osInterface.get(); device->getSourceLevelDebugger() ->notifyNewDevice(osInterface ? osInterface->getDriverModel()->getDeviceHandle() : 0); } device->createSysmanHandle(isSubDevice); device->resourcesReleased = false; return device; } void DeviceImp::releaseResources() { if (resourcesReleased) { return; } if (neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->debugger.get() && !neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->debugger->isLegacy()) { neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->debugger.reset(nullptr); } for (uint32_t i = 0; i < this->numSubDevices; i++) { delete this->subDevices[i]; } this->subDevices.clear(); this->numSubDevices = 0; if (this->pageFaultCommandList) { this->pageFaultCommandList->destroy(); this->pageFaultCommandList = nullptr; } metricContext.reset(); builtins.reset(); cacheReservation.reset(); if (allocationsForReuse.get()) { allocationsForReuse->freeAllGraphicsAllocations(neoDevice); allocationsForReuse.reset(); } if (getSourceLevelDebugger()) { getSourceLevelDebugger()->notifyDeviceDestruction(); } if (!isSubdevice) { if (this->debugSurface) { this->neoDevice->getMemoryManager()->freeGraphicsMemory(this->debugSurface); this->debugSurface = nullptr; } } if (neoDevice) { neoDevice->decRefInternal(); neoDevice = nullptr; } resourcesReleased = true; } DeviceImp::~DeviceImp() { releaseResources(); if (!isSubdevice) { if (pSysmanDevice != nullptr) { delete pSysmanDevice; pSysmanDevice = nullptr; } } } NEO::PreemptionMode DeviceImp::getDevicePreemptionMode() const { return neoDevice->getPreemptionMode(); } const NEO::DeviceInfo &DeviceImp::getDeviceInfo() const { return neoDevice->getDeviceInfo(); } NEO::GraphicsAllocation *DeviceImp::allocateManagedMemoryFromHostPtr(void *buffer, size_t size, struct CommandList *commandList) { char *baseAddress = reinterpret_cast(buffer); NEO::GraphicsAllocation *allocation = nullptr; bool allocFound = false; std::vector allocDataArray = driverHandle->findAllocationsWithinRange(buffer, size, &allocFound); if (allocFound) { return allocDataArray[0]->gpuAllocations.getGraphicsAllocation(getRootDeviceIndex()); } if (!allocDataArray.empty()) { UNRECOVERABLE_IF(commandList == nullptr); for (auto allocData : allocDataArray) { allocation = allocData->gpuAllocations.getGraphicsAllocation(getRootDeviceIndex()); char *allocAddress = reinterpret_cast(allocation->getGpuAddress()); size_t allocSize = allocData->size; driverHandle->getSvmAllocsManager()->removeSVMAlloc(*allocData); neoDevice->getMemoryManager()->freeGraphicsMemory(allocation); commandList->eraseDeallocationContainerEntry(allocation); commandList->eraseResidencyContainerEntry(allocation); if (allocAddress < baseAddress) { buffer = reinterpret_cast(allocAddress); baseAddress += size; size = ptrDiff(baseAddress, allocAddress); baseAddress = reinterpret_cast(buffer); } else { allocAddress += allocSize; baseAddress += size; if (allocAddress > baseAddress) { baseAddress = reinterpret_cast(buffer); size = ptrDiff(allocAddress, baseAddress); } else { baseAddress = reinterpret_cast(buffer); } } } } allocation = neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties( {getRootDeviceIndex(), false, size, NEO::AllocationType::BUFFER_HOST_MEMORY, false, neoDevice->getDeviceBitfield()}, buffer); if (allocation == nullptr) { return allocation; } NEO::SvmAllocationData allocData(getRootDeviceIndex()); allocData.gpuAllocations.addAllocation(allocation); allocData.cpuAllocation = nullptr; allocData.size = size; allocData.memoryType = InternalMemoryType::NOT_SPECIFIED; allocData.device = nullptr; driverHandle->getSvmAllocsManager()->insertSVMAlloc(allocData); return allocation; } NEO::GraphicsAllocation *DeviceImp::allocateMemoryFromHostPtr(const void *buffer, size_t size, bool hostCopyAllowed) { NEO::AllocationProperties properties = {getRootDeviceIndex(), false, size, NEO::AllocationType::EXTERNAL_HOST_PTR, false, neoDevice->getDeviceBitfield()}; properties.flags.flushL3RequiredForRead = properties.flags.flushL3RequiredForWrite = true; auto allocation = neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties, buffer); if (allocation == nullptr && hostCopyAllowed) { allocation = neoDevice->getMemoryManager()->allocateInternalGraphicsMemoryWithHostCopy(neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield(), buffer, size); } return allocation; } NEO::GraphicsAllocation *DeviceImp::obtainReusableAllocation(size_t requiredSize, NEO::AllocationType type) { auto alloc = allocationsForReuse->detachAllocation(requiredSize, nullptr, nullptr, type); if (alloc == nullptr) return nullptr; else return alloc.release(); } void DeviceImp::storeReusableAllocation(NEO::GraphicsAllocation &alloc) { allocationsForReuse->pushFrontOne(alloc); } ze_result_t DeviceImp::getCsrForOrdinalAndIndex(NEO::CommandStreamReceiver **csr, uint32_t ordinal, uint32_t index) { auto &engineGroups = getActiveDevice()->getRegularEngineGroups(); if ((ordinal >= engineGroups.size()) || (index >= engineGroups[ordinal].engines.size())) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } *csr = engineGroups[ordinal].engines[index].commandStreamReceiver; return ZE_RESULT_SUCCESS; } ze_result_t DeviceImp::getCsrForLowPriority(NEO::CommandStreamReceiver **csr) { NEO::Device *activeDevice = getActiveDevice(); if (this->implicitScalingCapable) { *csr = activeDevice->getDefaultEngine().commandStreamReceiver; return ZE_RESULT_SUCCESS; } else { for (auto &it : activeDevice->getAllEngines()) { if (it.osContext->isLowPriority()) { *csr = it.commandStreamReceiver; return ZE_RESULT_SUCCESS; } } // if the code falls through, we have no low priority context created by neoDevice. } UNRECOVERABLE_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } DebugSession *DeviceImp::getDebugSession(const zet_debug_config_t &config) { return debugSession.get(); } DebugSession *DeviceImp::createDebugSession(const zet_debug_config_t &config, ze_result_t &result) { if (!this->isSubdevice) { auto session = DebugSession::create(config, this, result); debugSession.reset(session); } else { result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } return debugSession.get(); } bool DeviceImp::toPhysicalSliceId(const NEO::TopologyMap &topologyMap, uint32_t &slice, uint32_t &subslice, uint32_t &deviceIndex) { auto hwInfo = neoDevice->getRootDeviceEnvironment().getHardwareInfo(); uint32_t subDeviceCount = NEO::HwHelper::getSubDevicesCount(hwInfo); auto deviceBitfield = neoDevice->getDeviceBitfield(); if (topologyMap.size() == subDeviceCount && !isSubdevice) { uint32_t sliceId = slice; for (uint32_t i = 0; i < topologyMap.size(); i++) { if (sliceId < topologyMap.at(i).sliceIndices.size()) { slice = topologyMap.at(i).sliceIndices[sliceId]; if (topologyMap.at(i).sliceIndices.size() == 1) { uint32_t subsliceId = subslice; subslice = topologyMap.at(i).subsliceIndices[subsliceId]; } deviceIndex = i; return true; } sliceId = sliceId - static_cast(topologyMap.at(i).sliceIndices.size()); } } else if (isSubdevice) { UNRECOVERABLE_IF(!deviceBitfield.any()); uint32_t subDeviceIndex = Math::log2(static_cast(deviceBitfield.to_ulong())); if (topologyMap.find(subDeviceIndex) != topologyMap.end()) { if (slice < topologyMap.at(subDeviceIndex).sliceIndices.size()) { deviceIndex = subDeviceIndex; slice = topologyMap.at(subDeviceIndex).sliceIndices[slice]; if (topologyMap.at(subDeviceIndex).sliceIndices.size() == 1) { uint32_t subsliceId = subslice; subslice = topologyMap.at(subDeviceIndex).subsliceIndices[subsliceId]; } return true; } } } return false; } bool DeviceImp::toApiSliceId(const NEO::TopologyMap &topologyMap, uint32_t &slice, uint32_t &subslice, uint32_t deviceIndex) { auto deviceBitfield = neoDevice->getDeviceBitfield(); if (isSubdevice) { UNRECOVERABLE_IF(!deviceBitfield.any()); deviceIndex = Math::log2(static_cast(deviceBitfield.to_ulong())); } if (topologyMap.find(deviceIndex) != topologyMap.end()) { uint32_t apiSliceId = 0; if (!isSubdevice) { for (uint32_t devId = 0; devId < deviceIndex; devId++) { apiSliceId += static_cast(topologyMap.at(devId).sliceIndices.size()); } } for (uint32_t i = 0; i < topologyMap.at(deviceIndex).sliceIndices.size(); i++) { if (static_cast(topologyMap.at(deviceIndex).sliceIndices[i]) == slice) { apiSliceId += i; slice = apiSliceId; if (topologyMap.at(deviceIndex).sliceIndices.size() == 1) { for (uint32_t subsliceApiId = 0; subsliceApiId < topologyMap.at(deviceIndex).subsliceIndices.size(); subsliceApiId++) { if (static_cast(topologyMap.at(deviceIndex).subsliceIndices[subsliceApiId]) == subslice) { subslice = subsliceApiId; } } } return true; } } } return false; } NEO::Device *DeviceImp::getActiveDevice() const { if (neoDevice->getNumGenericSubDevices() > 1u) { if (isImplicitScalingCapable()) { return this->neoDevice; } return this->neoDevice->getSubDevice(0); } return this->neoDevice; } uint32_t DeviceImp::getPhysicalSubDeviceId() { if (!neoDevice->isSubDevice()) { uint32_t deviceBitField = static_cast(neoDevice->getDeviceBitfield().to_ulong()); if (neoDevice->getDeviceBitfield().count() > 1) { // Clear all set bits other than the right most bit deviceBitField &= ~deviceBitField + 1; } return Math::log2(deviceBitField); } return static_cast(neoDevice)->getSubDeviceIndex(); } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/device/device_imp.h000066400000000000000000000170531422164147700254720ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/topology_map.h" #include "shared/source/memory_manager/allocations_list.h" #include "shared/source/memory_manager/memadvise_flags.h" #include "shared/source/page_fault_manager/cpu_page_fault_manager.h" #include "shared/source/utilities/spinlock.h" #include "level_zero/core/source/builtin/builtin_functions_lib.h" #include "level_zero/core/source/cache/cache_reservation.h" #include "level_zero/core/source/cmdlist/cmdlist.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/driver/driver_handle.h" #include "level_zero/core/source/module/module.h" #include "level_zero/tools/source/debug/debug_session.h" #include "level_zero/tools/source/metrics/metric.h" #include #include namespace L0 { struct SysmanDevice; struct DeviceImp : public Device { ze_result_t canAccessPeer(ze_device_handle_t hPeerDevice, ze_bool_t *value) override; ze_result_t createCommandList(const ze_command_list_desc_t *desc, ze_command_list_handle_t *commandList) override; ze_result_t createCommandListImmediate(const ze_command_queue_desc_t *desc, ze_command_list_handle_t *phCommandList) override; ze_result_t createCommandQueue(const ze_command_queue_desc_t *desc, ze_command_queue_handle_t *commandQueue) override; ze_result_t createImage(const ze_image_desc_t *desc, ze_image_handle_t *phImage) override; ze_result_t createModule(const ze_module_desc_t *desc, ze_module_handle_t *module, ze_module_build_log_handle_t *buildLog, ModuleType type) override; ze_result_t createSampler(const ze_sampler_desc_t *pDesc, ze_sampler_handle_t *phSampler) override; ze_result_t getComputeProperties(ze_device_compute_properties_t *pComputeProperties) override; ze_result_t getP2PProperties(ze_device_handle_t hPeerDevice, ze_device_p2p_properties_t *pP2PProperties) override; ze_result_t getKernelProperties(ze_device_module_properties_t *pKernelProperties) override; ze_result_t getPciProperties(ze_pci_ext_properties_t *pPciProperties) override; ze_result_t getMemoryProperties(uint32_t *pCount, ze_device_memory_properties_t *pMemProperties) override; ze_result_t getMemoryAccessProperties(ze_device_memory_access_properties_t *pMemAccessProperties) override; ze_result_t getProperties(ze_device_properties_t *pDeviceProperties) override; ze_result_t getSubDevices(uint32_t *pCount, ze_device_handle_t *phSubdevices) override; ze_result_t getCacheProperties(uint32_t *pCount, ze_device_cache_properties_t *pCacheProperties) override; ze_result_t reserveCache(size_t cacheLevel, size_t cacheReservationSize) override; ze_result_t setCacheAdvice(void *ptr, size_t regionSize, ze_cache_ext_region_t cacheRegion) override; ze_result_t imageGetProperties(const ze_image_desc_t *desc, ze_image_properties_t *pImageProperties) override; ze_result_t getDeviceImageProperties(ze_device_image_properties_t *pDeviceImageProperties) override; ze_result_t getCommandQueueGroupProperties(uint32_t *pCount, ze_command_queue_group_properties_t *pCommandQueueGroupProperties) override; ze_result_t getExternalMemoryProperties(ze_device_external_memory_properties_t *pExternalMemoryProperties) override; ze_result_t getGlobalTimestamps(uint64_t *hostTimestamp, uint64_t *deviceTimestamp) override; ze_result_t getDebugProperties(zet_device_debug_properties_t *pDebugProperties) override; ze_result_t systemBarrier() override; void *getExecEnvironment() override; BuiltinFunctionsLib *getBuiltinFunctionsLib() override; uint32_t getMOCS(bool l3enabled, bool l1enabled) override; NEO::HwHelper &getHwHelper() override; const NEO::HardwareInfo &getHwInfo() const override; NEO::OSInterface &getOsInterface() override; uint32_t getPlatformInfo() const override; MetricDeviceContext &getMetricDeviceContext() override; DebugSession *getDebugSession(const zet_debug_config_t &config) override; DebugSession *createDebugSession(const zet_debug_config_t &config, ze_result_t &result) override; void removeDebugSession() override { debugSession.release(); } uint32_t getMaxNumHwThreads() const override; ze_result_t activateMetricGroupsDeferred(uint32_t count, zet_metric_group_handle_t *phMetricGroups) override; DriverHandle *getDriverHandle() override; void setDriverHandle(DriverHandle *driverHandle) override; NEO::PreemptionMode getDevicePreemptionMode() const override; const NEO::DeviceInfo &getDeviceInfo() const override; void activateMetricGroups() override; void processAdditionalKernelProperties(NEO::HwHelper &hwHelper, ze_device_module_properties_t *pKernelProperties); NEO::GraphicsAllocation *getDebugSurface() const override { return debugSurface; } void setDebugSurface(NEO::GraphicsAllocation *debugSurface) { this->debugSurface = debugSurface; }; ~DeviceImp() override; NEO::GraphicsAllocation *allocateManagedMemoryFromHostPtr(void *buffer, size_t size, struct CommandList *commandList) override; NEO::GraphicsAllocation *allocateMemoryFromHostPtr(const void *buffer, size_t size, bool hostCopyAllowed) override; void setSysmanHandle(SysmanDevice *pSysman) override; SysmanDevice *getSysmanHandle() override; ze_result_t getCsrForOrdinalAndIndex(NEO::CommandStreamReceiver **csr, uint32_t ordinal, uint32_t index) override; ze_result_t getCsrForLowPriority(NEO::CommandStreamReceiver **csr) override; NEO::GraphicsAllocation *obtainReusableAllocation(size_t requiredSize, NEO::AllocationType type) override; void storeReusableAllocation(NEO::GraphicsAllocation &alloc) override; NEO::Device *getActiveDevice() const; bool toPhysicalSliceId(const NEO::TopologyMap &topologyMap, uint32_t &slice, uint32_t &subslice, uint32_t &deviceIndex); bool toApiSliceId(const NEO::TopologyMap &topologyMap, uint32_t &slice, uint32_t &subslice, uint32_t deviceIndex); uint32_t getPhysicalSubDeviceId(); bool isSubdevice = false; void *execEnvironment = nullptr; std::unique_ptr builtins = nullptr; std::unique_ptr metricContext = nullptr; std::unique_ptr cacheReservation = nullptr; uint32_t maxNumHwThreads = 0; uint32_t numSubDevices = 0; std::vector subDevices; std::unordered_map crossAccessEnabledDevices; DriverHandle *driverHandle = nullptr; CommandList *pageFaultCommandList = nullptr; ze_pci_speed_ext_t pciMaxSpeed = {-1, -1, -1}; bool resourcesReleased = false; void releaseResources(); NEO::SVMAllocsManager::MapBasedAllocationTracker peerAllocations; NEO::SpinLock peerAllocationsMutex; std::map memAdviseSharedAllocations; std::unique_ptr allocationsForReuse; std::unique_ptr driverInfo; void createSysmanHandle(bool isSubDevice); protected: NEO::GraphicsAllocation *debugSurface = nullptr; SysmanDevice *pSysmanDevice = nullptr; std::unique_ptr debugSession = nullptr; }; void handleGpuDomainTransferForHwWithHints(NEO::PageFaultManager *pageFaultHandler, void *allocPtr, NEO::PageFaultManager::PageFaultData &pageFaultData); } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/device/device_imp_helper.cpp000066400000000000000000000005321422164147700273560ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "level_zero/core/source/device/device_imp.h" namespace L0 { void DeviceImp::processAdditionalKernelProperties(NEO::HwHelper &hwHelper, ze_device_module_properties_t *pKernelProperties) { } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/dll/000077500000000000000000000000001422164147700225235ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/dll/CMakeLists.txt000066400000000000000000000007661422164147700252740ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_DLL ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/disallow_deferred_deleter.cpp ${CMAKE_CURRENT_SOURCE_DIR}/create_builtin_functions_lib.cpp ) add_subdirectories() if(WIN32) append_sources_from_properties(L0_SRCS_DLL L0_SRCS_DLL_WINDOWS) else() append_sources_from_properties(L0_SRCS_DLL L0_SRCS_DLL_LINUX) endif() set_property(GLOBAL PROPERTY L0_SRCS_DLL ${L0_SRCS_DLL}) compute-runtime-22.14.22890/level_zero/core/source/dll/create_builtin_functions_lib.cpp000066400000000000000000000010051422164147700311320ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "level_zero/core/source/builtin/builtin_functions_lib_impl.h" namespace L0 { std::unique_ptr BuiltinFunctionsLib::create(Device *device, NEO::BuiltIns *builtins) { return std::unique_ptr(new BuiltinFunctionsLibImpl(device, builtins)); } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/dll/disallow_deferred_deleter.cpp000066400000000000000000000003431422164147700304110ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/deferred_deleter_helper.h" bool NEO::isDeferredDeleterEnabled() { return false; } // namespace NEO compute-runtime-22.14.22890/level_zero/core/source/dll/linux/000077500000000000000000000000001422164147700236625ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/dll/linux/CMakeLists.txt000066400000000000000000000006011422164147700264170ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_DLL_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/debugger_l0_linux.cpp ${NEO_SOURCE_DIR}/level_zero/tools/source/debug${BRANCH_DIR_SUFFIX}linux/debug_session_linux_helper.cpp ) set_property(GLOBAL PROPERTY L0_SRCS_DLL_LINUX ${L0_SRCS_DLL_LINUX}) compute-runtime-22.14.22890/level_zero/core/source/dll/linux/debugger_l0_linux.cpp000066400000000000000000000013331422164147700277640ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/source/os_interface/linux/drm_neo.h" #include "shared/source/os_interface/os_interface.h" #include "level_zero/core/source/debugger/debugger_l0.h" namespace L0 { std::unique_ptr DebuggerL0::create(NEO::Device *device) { auto success = initDebuggingInOs(device->getRootDeviceEnvironment().osInterface.get()); if (success) { auto debugger = debuggerL0Factory[device->getHardwareInfo().platform.eRenderCoreFamily](device); return std::unique_ptr(debugger); } return std::unique_ptr(nullptr); } } // namespace L0compute-runtime-22.14.22890/level_zero/core/source/dll/linux/ze.exports000066400000000000000000000002071422164147700257250ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ { global: ze*; local: *; }; compute-runtime-22.14.22890/level_zero/core/source/dll/windows/000077500000000000000000000000001422164147700242155ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/dll/windows/CMakeLists.txt000066400000000000000000000004301422164147700267520ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_DLL_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/debugger_l0_windows.cpp ) set_property(GLOBAL PROPERTY L0_SRCS_DLL_WINDOWS ${L0_SRCS_DLL_WINDOWS}) compute-runtime-22.14.22890/level_zero/core/source/dll/windows/debugger_l0_windows.cpp000066400000000000000000000004571422164147700306600ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/debugger/debugger_l0.h" namespace L0 { std::unique_ptr DebuggerL0::create(NEO::Device *device) { return std::unique_ptr(nullptr); } } // namespace L0compute-runtime-22.14.22890/level_zero/core/source/driver/000077500000000000000000000000001422164147700232435ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/driver/CMakeLists.txt000066400000000000000000000003461422164147700260060ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_DRIVER ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) add_subdirectories() set_property(GLOBAL PROPERTY L0_SRCS_DRIVER ${L0_SRCS_DRIVER}) compute-runtime-22.14.22890/level_zero/core/source/driver/driver.cpp000066400000000000000000000074231422164147700252500ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/driver/driver.h" #include "shared/source/device/device.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/os_interface/debug_env_reader.h" #include "shared/source/os_interface/device_factory.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/driver/driver_handle_imp.h" #include "level_zero/core/source/driver/driver_imp.h" #include "level_zero/tools/source/metrics/metric.h" #include "level_zero/tools/source/pin/pin.h" #include #include namespace L0 { _ze_driver_handle_t *GlobalDriverHandle; uint32_t driverCount = 1; void DriverImp::initialize(ze_result_t *result) { *result = ZE_RESULT_ERROR_UNINITIALIZED; NEO::EnvironmentVariableReader envReader; L0EnvVariables envVariables = {}; envVariables.affinityMask = envReader.getSetting("ZE_AFFINITY_MASK", std::string("")); envVariables.programDebugging = envReader.getSetting("ZET_ENABLE_PROGRAM_DEBUGGING", false); envVariables.metrics = envReader.getSetting("ZET_ENABLE_METRICS", false); envVariables.pin = envReader.getSetting("ZET_ENABLE_PROGRAM_INSTRUMENTATION", false); envVariables.sysman = envReader.getSetting("ZES_ENABLE_SYSMAN", false); envVariables.pciIdDeviceOrder = envReader.getSetting("ZE_ENABLE_PCI_ID_DEVICE_ORDER", false); auto executionEnvironment = new NEO::ExecutionEnvironment(); UNRECOVERABLE_IF(nullptr == executionEnvironment); if (envVariables.programDebugging) { executionEnvironment->setDebuggingEnabled(); } executionEnvironment->incRefInternal(); auto neoDevices = NEO::DeviceFactory::createDevices(*executionEnvironment); executionEnvironment->decRefInternal(); if (!neoDevices.empty()) { GlobalDriverHandle = DriverHandle::create(std::move(neoDevices), envVariables, result); if (GlobalDriverHandle != nullptr) { *result = ZE_RESULT_SUCCESS; if (envVariables.metrics) { *result = MetricDeviceContext::enableMetricApi(); if (*result != ZE_RESULT_SUCCESS) { delete GlobalDriver; GlobalDriverHandle = nullptr; GlobalDriver = nullptr; } } if ((*result == ZE_RESULT_SUCCESS) && envVariables.pin) { *result = PinContext::init(); if (*result != ZE_RESULT_SUCCESS) { delete GlobalDriver; GlobalDriverHandle = nullptr; GlobalDriver = nullptr; } } } } } ze_result_t DriverImp::initStatus(ZE_RESULT_ERROR_UNINITIALIZED); ze_result_t DriverImp::driverInit(ze_init_flags_t flags) { std::call_once(initDriverOnce, [this]() { ze_result_t result; this->initialize(&result); initStatus = result; }); return initStatus; } ze_result_t driverHandleGet(uint32_t *pCount, ze_driver_handle_t *phDriverHandles) { if (*pCount == 0) { *pCount = driverCount; return ZE_RESULT_SUCCESS; } if (*pCount > driverCount) { *pCount = driverCount; } if (phDriverHandles == nullptr) { return ZE_RESULT_ERROR_INVALID_NULL_POINTER; } for (uint32_t i = 0; i < *pCount; i++) { phDriverHandles[i] = GlobalDriverHandle; } return ZE_RESULT_SUCCESS; } static DriverImp driverImp; Driver *Driver::driver = &driverImp; ze_result_t init(ze_init_flags_t flags) { if (flags && !(flags & ZE_INIT_FLAG_GPU_ONLY)) return ZE_RESULT_ERROR_UNINITIALIZED; else return Driver::get()->driverInit(flags); } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/driver/driver.h000066400000000000000000000011731422164147700247110ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include #include namespace L0 { struct Driver { virtual ze_result_t driverInit(ze_init_flags_t flags) = 0; virtual void initialize(ze_result_t *result) = 0; static Driver *get() { return driver; } virtual ~Driver() = default; protected: static Driver *driver; }; ze_result_t init(ze_init_flags_t); ze_result_t driverHandleGet(uint32_t *pCount, ze_driver_handle_t *phDrivers); extern uint32_t driverCount; extern _ze_driver_handle_t *GlobalDriverHandle; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/driver/driver_handle.h000066400000000000000000000072611422164147700262300ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/memory_manager/unified_memory_manager.h" #include "level_zero/core/source/context/context.h" #include "level_zero/core/source/device/device.h" #include #include struct _ze_driver_handle_t { virtual ~_ze_driver_handle_t() = default; }; namespace L0 { struct Device; struct L0EnvVariables; struct DriverHandle : _ze_driver_handle_t { virtual ze_result_t createContext(const ze_context_desc_t *desc, uint32_t numDevices, ze_device_handle_t *phDevices, ze_context_handle_t *phContext) = 0; virtual ze_result_t getDevice(uint32_t *pCount, ze_device_handle_t *phDevices) = 0; virtual ze_result_t getProperties(ze_driver_properties_t *properties) = 0; virtual ze_result_t getApiVersion(ze_api_version_t *version) = 0; virtual ze_result_t getIPCProperties(ze_driver_ipc_properties_t *pIPCProperties) = 0; virtual ze_result_t getExtensionFunctionAddress(const char *pFuncName, void **pfunc) = 0; virtual ze_result_t getExtensionProperties(uint32_t *pCount, ze_driver_extension_properties_t *pExtensionProperties) = 0; virtual NEO::MemoryManager *getMemoryManager() = 0; virtual void setMemoryManager(NEO::MemoryManager *memoryManager) = 0; virtual ze_result_t checkMemoryAccessFromDevice(Device *device, const void *ptr) = 0; virtual bool findAllocationDataForRange(const void *buffer, size_t size, NEO::SvmAllocationData **allocData) = 0; virtual std::vector findAllocationsWithinRange(const void *buffer, size_t size, bool *allocationRangeCovered) = 0; virtual NEO::SVMAllocsManager *getSvmAllocsManager() = 0; virtual ze_result_t sysmanEventsListen(uint32_t timeout, uint32_t count, zes_device_handle_t *phDevices, uint32_t *pNumDeviceEvents, zes_event_type_flags_t *pEvents) = 0; virtual ze_result_t sysmanEventsListenEx(uint64_t timeout, uint32_t count, zes_device_handle_t *phDevices, uint32_t *pNumDeviceEvents, zes_event_type_flags_t *pEvents) = 0; virtual ze_result_t importExternalPointer(void *ptr, size_t size) = 0; virtual ze_result_t releaseImportedPointer(void *ptr) = 0; virtual ze_result_t getHostPointerBaseAddress(void *ptr, void **baseAddress) = 0; virtual NEO::GraphicsAllocation *findHostPointerAllocation(void *ptr, size_t size, uint32_t rootDeviceIndex) = 0; virtual NEO::GraphicsAllocation *getDriverSystemMemoryAllocation(void *ptr, size_t size, uint32_t rootDeviceIndex, uintptr_t *gpuAddress) = 0; static DriverHandle *fromHandle(ze_driver_handle_t handle) { return static_cast(handle); } inline ze_driver_handle_t toHandle() { return this; } DriverHandle &operator=(const DriverHandle &) = delete; DriverHandle &operator=(DriverHandle &&) = delete; static DriverHandle *create(std::vector> devices, const L0EnvVariables &envVariables, ze_result_t *returnValue); }; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/driver/driver_handle_imp.cpp000066400000000000000000000516431422164147700274330ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/driver/driver_handle_imp.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/device/device.h" #include "shared/source/helpers/string.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/os_library.h" #include "level_zero/core/source/context/context_imp.h" #include "level_zero/core/source/debugger/debugger_l0.h" #include "level_zero/core/source/device/device_imp.h" #include "level_zero/core/source/driver/driver_imp.h" #include "level_zero/core/source/driver/host_pointer_manager.h" #include "driver_version_l0.h" #include #include #include #include namespace L0 { struct DriverHandleImp *GlobalDriver; DriverHandleImp::DriverHandleImp() = default; ze_result_t DriverHandleImp::createContext(const ze_context_desc_t *desc, uint32_t numDevices, ze_device_handle_t *phDevices, ze_context_handle_t *phContext) { ContextImp *context = new ContextImp(this); if (nullptr == context) { return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY; } if (desc->pNext) { const ze_base_desc_t *expDesc = reinterpret_cast(desc->pNext); if (expDesc->stype == ZE_STRUCTURE_TYPE_POWER_SAVING_HINT_EXP_DESC) { const ze_context_power_saving_hint_exp_desc_t *powerHintExpDesc = reinterpret_cast(expDesc); if (powerHintExpDesc->hint == ZE_POWER_SAVING_HINT_TYPE_MIN || powerHintExpDesc->hint <= ZE_POWER_SAVING_HINT_TYPE_MAX) { powerHint = static_cast(powerHintExpDesc->hint); } else { delete context; return ZE_RESULT_ERROR_INVALID_ENUMERATION; } } } *phContext = context->toHandle(); if (numDevices == 0) { for (auto device : this->devices) { context->addDeviceAndSubDevices(device); } } else { for (uint32_t i = 0; i < numDevices; i++) { context->addDeviceAndSubDevices(Device::fromHandle(phDevices[i])); } } for (auto devicePair : context->getDevices()) { auto neoDevice = devicePair.second->getNEODevice(); context->rootDeviceIndices.insert(neoDevice->getRootDeviceIndex()); context->deviceBitfields.insert({neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield()}); } return ZE_RESULT_SUCCESS; } NEO::MemoryManager *DriverHandleImp::getMemoryManager() { return this->memoryManager; } void DriverHandleImp::setMemoryManager(NEO::MemoryManager *memoryManager) { this->memoryManager = memoryManager; } NEO::SVMAllocsManager *DriverHandleImp::getSvmAllocsManager() { return this->svmAllocsManager; } ze_result_t DriverHandleImp::getApiVersion(ze_api_version_t *version) { *version = ZE_API_VERSION_1_3; return ZE_RESULT_SUCCESS; } ze_result_t DriverHandleImp::getProperties(ze_driver_properties_t *properties) { uint32_t versionMajor = static_cast(strtoul(L0_PROJECT_VERSION_MAJOR, NULL, 10)); uint32_t versionMinor = static_cast(strtoul(L0_PROJECT_VERSION_MINOR, NULL, 10)); uint32_t versionBuild = static_cast(strtoul(NEO_VERSION_BUILD, NULL, 10)); properties->driverVersion = ((versionMajor << 24) & 0xFF000000) | ((versionMinor << 16) & 0x00FF0000) | (versionBuild & 0x0000FFFF); uint64_t uniqueId = (properties->driverVersion) | (uuidTimestamp & 0xFFFFFFFF00000000); memcpy_s(properties->uuid.id, sizeof(uniqueId), &uniqueId, sizeof(uniqueId)); return ZE_RESULT_SUCCESS; } ze_result_t DriverHandleImp::getIPCProperties(ze_driver_ipc_properties_t *pIPCProperties) { pIPCProperties->flags = ZE_IPC_PROPERTY_FLAG_MEMORY; return ZE_RESULT_SUCCESS; } ze_result_t DriverHandleImp::getExtensionFunctionAddress(const char *pFuncName, void **pfunc) { auto funcAddr = extensionFunctionsLookupMap.find(std::string(pFuncName)); if (funcAddr != extensionFunctionsLookupMap.end()) { *pfunc = funcAddr->second; return ZE_RESULT_SUCCESS; } return ZE_RESULT_ERROR_INVALID_ARGUMENT; } ze_result_t DriverHandleImp::getExtensionProperties(uint32_t *pCount, ze_driver_extension_properties_t *pExtensionProperties) { if (nullptr == pExtensionProperties) { *pCount = static_cast(this->extensionsSupported.size()); return ZE_RESULT_SUCCESS; } *pCount = std::min(static_cast(this->extensionsSupported.size()), *pCount); for (uint32_t i = 0; i < *pCount; i++) { auto extension = this->extensionsSupported[i]; strncpy_s(pExtensionProperties[i].name, ZE_MAX_EXTENSION_NAME, extension.first.c_str(), extension.first.length() + 1); pExtensionProperties[i].version = extension.second; } return ZE_RESULT_SUCCESS; } DriverHandleImp::~DriverHandleImp() { for (auto &device : this->devices) { delete device; } if (this->svmAllocsManager) { delete this->svmAllocsManager; this->svmAllocsManager = nullptr; } } void DriverHandleImp::updateRootDeviceBitFields(std::unique_ptr &neoDevice) { const auto rootDeviceIndex = neoDevice->getRootDeviceIndex(); auto entry = this->deviceBitfields.find(rootDeviceIndex); entry->second = neoDevice->getDeviceBitfield(); } void DriverHandleImp::enableRootDeviceDebugger(std::unique_ptr &neoDevice) { const auto rootDeviceIndex = neoDevice->getRootDeviceIndex(); auto rootDeviceEnvironment = neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[rootDeviceIndex].get(); if (enableProgramDebugging) { if (neoDevice->getDebugger() != nullptr) { NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "%s", "Source Level Debugger cannot be used with Environment Variable enabling program debugging.\n"); UNRECOVERABLE_IF(neoDevice->getDebugger() != nullptr && enableProgramDebugging); } rootDeviceEnvironment->getMutableHardwareInfo()->capabilityTable.fusedEuEnabled = false; rootDeviceEnvironment->debugger = DebuggerL0::create(neoDevice.get()); } } ze_result_t DriverHandleImp::initialize(std::vector> neoDevices) { bool multiOsContextDriver = false; for (auto &neoDevice : neoDevices) { ze_result_t returnValue = ZE_RESULT_SUCCESS; if (!neoDevice->getHardwareInfo().capabilityTable.levelZeroSupported) { continue; } if (this->memoryManager == nullptr) { this->memoryManager = neoDevice->getMemoryManager(); if (this->memoryManager == nullptr) { return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY; } } const auto rootDeviceIndex = neoDevice->getRootDeviceIndex(); enableRootDeviceDebugger(neoDevice); this->rootDeviceIndices.insert(rootDeviceIndex); this->deviceBitfields.insert({rootDeviceIndex, neoDevice->getDeviceBitfield()}); auto pNeoDevice = neoDevice.release(); auto device = Device::create(this, pNeoDevice, false, &returnValue); this->devices.push_back(device); multiOsContextDriver |= device->isImplicitScalingCapable(); if (returnValue != ZE_RESULT_SUCCESS) { return returnValue; } } if (this->devices.size() == 0) { return ZE_RESULT_ERROR_UNINITIALIZED; } this->svmAllocsManager = new NEO::SVMAllocsManager(memoryManager, multiOsContextDriver); if (this->svmAllocsManager == nullptr) { return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY; } this->numDevices = static_cast(this->devices.size()); extensionFunctionsLookupMap = getExtensionFunctionsLookupMap(); uuidTimestamp = static_cast(std::chrono::system_clock::now().time_since_epoch().count()); if (NEO::DebugManager.flags.EnableHostPointerImport.get() != 0) { createHostPointerManager(); } return ZE_RESULT_SUCCESS; } DriverHandle *DriverHandle::create(std::vector> devices, const L0EnvVariables &envVariables, ze_result_t *returnValue) { DriverHandleImp *driverHandle = new DriverHandleImp; UNRECOVERABLE_IF(nullptr == driverHandle); driverHandle->enableProgramDebugging = envVariables.programDebugging; driverHandle->enableSysman = envVariables.sysman; driverHandle->enablePciIdDeviceOrder = envVariables.pciIdDeviceOrder; ze_result_t res = driverHandle->initialize(std::move(devices)); if (res != ZE_RESULT_SUCCESS) { delete driverHandle; *returnValue = res; return nullptr; } GlobalDriver = driverHandle; driverHandle->getMemoryManager()->setForceNonSvmForExternalHostPtr(true); return driverHandle; } ze_result_t DriverHandleImp::getDevice(uint32_t *pCount, ze_device_handle_t *phDevices) { if (*pCount == 0) { *pCount = this->numDevices; return ZE_RESULT_SUCCESS; } if (phDevices == nullptr) { return ZE_RESULT_ERROR_INVALID_NULL_HANDLE; } for (uint32_t i = 0; i < *pCount; i++) { phDevices[i] = this->devices[i]; } return ZE_RESULT_SUCCESS; } bool DriverHandleImp::findAllocationDataForRange(const void *buffer, size_t size, NEO::SvmAllocationData **allocData) { size_t offset = 0; if (size > 0) { offset = size - 1; } // Make sure the host buffer does not overlap any existing allocation const char *baseAddress = reinterpret_cast(buffer); NEO::SvmAllocationData *beginAllocData = svmAllocsManager->getSVMAlloc(baseAddress); NEO::SvmAllocationData *endAllocData = svmAllocsManager->getSVMAlloc(baseAddress + offset); if (allocData) { if (beginAllocData) { *allocData = beginAllocData; } else { *allocData = endAllocData; } } // Return true if the whole range requested is covered by the same allocation if (beginAllocData && endAllocData && (beginAllocData->gpuAllocations.getDefaultGraphicsAllocation() == endAllocData->gpuAllocations.getDefaultGraphicsAllocation())) { return true; } return false; } std::vector DriverHandleImp::findAllocationsWithinRange(const void *buffer, size_t size, bool *allocationRangeCovered) { std::vector allocDataArray; const char *baseAddress = reinterpret_cast(buffer); // Check if the host buffer overlaps any existing allocation NEO::SvmAllocationData *beginAllocData = svmAllocsManager->getSVMAlloc(baseAddress); NEO::SvmAllocationData *endAllocData = svmAllocsManager->getSVMAlloc(baseAddress + size - 1); // Add the allocation that matches the beginning address if (beginAllocData) { allocDataArray.push_back(beginAllocData); } // Add the allocation that matches the end address range if there was no beginning allocation // or the beginning allocation does not match the ending allocation if (endAllocData) { if ((beginAllocData && (beginAllocData->gpuAllocations.getDefaultGraphicsAllocation() != endAllocData->gpuAllocations.getDefaultGraphicsAllocation())) || !beginAllocData) { allocDataArray.push_back(endAllocData); } } // Return true if the whole range requested is covered by the same allocation if (beginAllocData && endAllocData && (beginAllocData->gpuAllocations.getDefaultGraphicsAllocation() == endAllocData->gpuAllocations.getDefaultGraphicsAllocation())) { *allocationRangeCovered = true; } else { *allocationRangeCovered = false; } return allocDataArray; } void DriverHandleImp::createHostPointerManager() { hostPointerManager = std::make_unique(getMemoryManager()); } ze_result_t DriverHandleImp::importExternalPointer(void *ptr, size_t size) { if (hostPointerManager.get() != nullptr) { auto ret = hostPointerManager->createHostPointerMultiAllocation(this->devices, ptr, size); return ret; } return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t DriverHandleImp::releaseImportedPointer(void *ptr) { if (hostPointerManager.get() != nullptr) { bool ret = hostPointerManager->freeHostPointerAllocation(ptr); return ret ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_INVALID_ARGUMENT; } return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t DriverHandleImp::getHostPointerBaseAddress(void *ptr, void **baseAddress) { if (hostPointerManager.get() != nullptr) { auto hostPointerData = hostPointerManager->getHostPointerAllocation(ptr); if (hostPointerData != nullptr) { if (baseAddress != nullptr) { *baseAddress = hostPointerData->basePtr; } return ZE_RESULT_SUCCESS; } return ZE_RESULT_ERROR_INVALID_ARGUMENT; } return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } NEO::GraphicsAllocation *DriverHandleImp::findHostPointerAllocation(void *ptr, size_t size, uint32_t rootDeviceIndex) { if (hostPointerManager.get() != nullptr) { HostPointerData *hostData = hostPointerManager->getHostPointerAllocation(ptr); if (hostData != nullptr) { size_t foundEndSize = reinterpret_cast(hostData->basePtr) + hostData->size; size_t inputEndSize = reinterpret_cast(ptr) + size; if (foundEndSize >= inputEndSize) { return hostData->hostPtrAllocations.getGraphicsAllocation(rootDeviceIndex); } return nullptr; } if (NEO::DebugManager.flags.ForceHostPointerImport.get() == 1) { importExternalPointer(ptr, size); return hostPointerManager->getHostPointerAllocation(ptr)->hostPtrAllocations.getGraphicsAllocation(rootDeviceIndex); } return nullptr; } return nullptr; } NEO::GraphicsAllocation *DriverHandleImp::getDriverSystemMemoryAllocation(void *ptr, size_t size, uint32_t rootDeviceIndex, uintptr_t *gpuAddress) { NEO::SvmAllocationData *allocData = nullptr; bool allocFound = findAllocationDataForRange(ptr, size, &allocData); if (allocFound) { if (gpuAddress != nullptr) { *gpuAddress = reinterpret_cast(ptr); } return allocData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex); } auto allocation = findHostPointerAllocation(ptr, size, rootDeviceIndex); if (allocation != nullptr) { if (gpuAddress != nullptr) { uintptr_t offset = reinterpret_cast(ptr) - reinterpret_cast(allocation->getUnderlyingBuffer()); *gpuAddress = static_cast(allocation->getGpuAddress()) + offset; } } return allocation; } bool DriverHandleImp::isRemoteResourceNeeded(void *ptr, NEO::GraphicsAllocation *alloc, NEO::SvmAllocationData *allocData, Device *device) { return (alloc == nullptr || (allocData && ((allocData->gpuAllocations.getGraphicsAllocations().size() - 1) < device->getRootDeviceIndex()))); } void *DriverHandleImp::importFdHandle(ze_device_handle_t hDevice, ze_ipc_memory_flags_t flags, uint64_t handle, NEO::GraphicsAllocation **pAlloc) { auto neoDevice = Device::fromHandle(hDevice)->getNEODevice(); NEO::osHandle osHandle = static_cast(handle); NEO::AllocationProperties unifiedMemoryProperties{neoDevice->getRootDeviceIndex(), MemoryConstants::pageSize, NEO::AllocationType::BUFFER, neoDevice->getDeviceBitfield()}; unifiedMemoryProperties.subDevicesBitfield = neoDevice->getDeviceBitfield(); NEO::GraphicsAllocation *alloc = this->getMemoryManager()->createGraphicsAllocationFromSharedHandle(osHandle, unifiedMemoryProperties, false, false); if (alloc == nullptr) { return nullptr; } NEO::SvmAllocationData allocData(neoDevice->getRootDeviceIndex()); allocData.gpuAllocations.addAllocation(alloc); allocData.cpuAllocation = nullptr; allocData.size = alloc->getUnderlyingBufferSize(); allocData.memoryType = InternalMemoryType::DEVICE_UNIFIED_MEMORY; allocData.device = neoDevice; allocData.isImportedAllocation = true; if (flags & ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED) { allocData.allocationFlagsProperty.flags.locallyUncachedResource = 1; } if (flags & ZE_IPC_MEMORY_FLAG_BIAS_UNCACHED) { allocData.allocationFlagsProperty.flags.locallyUncachedResource = 1; } this->getSvmAllocsManager()->insertSVMAlloc(allocData); if (pAlloc) { *pAlloc = alloc; } return reinterpret_cast(alloc->getGpuAddress()); } NEO::GraphicsAllocation *DriverHandleImp::getPeerAllocation(Device *device, NEO::SvmAllocationData *allocData, void *basePtr, uintptr_t *peerGpuAddress) { DeviceImp *deviceImp = static_cast(device); NEO::GraphicsAllocation *alloc = nullptr; NEO::SvmAllocationData *peerAllocData = nullptr; void *peerPtr = nullptr; std::unique_lock lock(deviceImp->peerAllocationsMutex); auto iter = deviceImp->peerAllocations.allocations.find(basePtr); if (iter != deviceImp->peerAllocations.allocations.end()) { peerAllocData = &iter->second; alloc = peerAllocData->gpuAllocations.getDefaultGraphicsAllocation(); UNRECOVERABLE_IF(alloc == nullptr); peerPtr = reinterpret_cast(alloc->getGpuAddress()); } else { alloc = allocData->gpuAllocations.getDefaultGraphicsAllocation(); UNRECOVERABLE_IF(alloc == nullptr); uint64_t handle = alloc->peekInternalHandle(this->getMemoryManager()); ze_ipc_memory_flags_t flags = {}; peerPtr = this->importFdHandle(device, flags, handle, &alloc); if (peerPtr == nullptr) { return nullptr; } peerAllocData = this->getSvmAllocsManager()->getSVMAlloc(peerPtr); deviceImp->peerAllocations.allocations.insert(std::make_pair(basePtr, *peerAllocData)); } if (peerGpuAddress) { *peerGpuAddress = reinterpret_cast(peerPtr); } return alloc; } void *DriverHandleImp::importNTHandle(ze_device_handle_t hDevice, void *handle) { auto neoDevice = Device::fromHandle(hDevice)->getNEODevice(); auto alloc = this->getMemoryManager()->createGraphicsAllocationFromNTHandle(handle, neoDevice->getRootDeviceIndex(), NEO::AllocationType::SHARED_BUFFER); if (alloc == nullptr) { return nullptr; } NEO::SvmAllocationData allocData(neoDevice->getRootDeviceIndex()); allocData.gpuAllocations.addAllocation(alloc); allocData.cpuAllocation = nullptr; allocData.size = alloc->getUnderlyingBufferSize(); allocData.memoryType = InternalMemoryType::DEVICE_UNIFIED_MEMORY; allocData.device = neoDevice; this->getSvmAllocsManager()->insertSVMAlloc(allocData); return reinterpret_cast(alloc->getGpuAddress()); } ze_result_t DriverHandleImp::checkMemoryAccessFromDevice(Device *device, const void *ptr) { auto allocation = svmAllocsManager->getSVMAlloc(ptr); if (allocation == nullptr) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } if (allocation->memoryType == InternalMemoryType::HOST_UNIFIED_MEMORY || allocation->memoryType == InternalMemoryType::SHARED_UNIFIED_MEMORY) return ZE_RESULT_SUCCESS; if (allocation->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()) != nullptr) { return ZE_RESULT_SUCCESS; } return ZE_RESULT_ERROR_INVALID_ARGUMENT; } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/driver/driver_handle_imp.h000066400000000000000000000136331422164147700270750ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/os_library.h" #include "level_zero/api/extensions/public/ze_exp_ext.h" #include "level_zero/core/source/driver/driver_handle.h" #include "level_zero/core/source/get_extension_function_lookup_map.h" namespace L0 { class HostPointerManager; struct DriverHandleImp : public DriverHandle { ~DriverHandleImp() override; DriverHandleImp(); ze_result_t createContext(const ze_context_desc_t *desc, uint32_t numDevices, ze_device_handle_t *phDevices, ze_context_handle_t *phContext) override; ze_result_t getDevice(uint32_t *pCount, ze_device_handle_t *phDevices) override; ze_result_t getProperties(ze_driver_properties_t *properties) override; ze_result_t getApiVersion(ze_api_version_t *version) override; ze_result_t getIPCProperties(ze_driver_ipc_properties_t *pIPCProperties) override; ze_result_t getExtensionFunctionAddress(const char *pFuncName, void **pfunc) override; ze_result_t getExtensionProperties(uint32_t *pCount, ze_driver_extension_properties_t *pExtensionProperties) override; NEO::MemoryManager *getMemoryManager() override; void setMemoryManager(NEO::MemoryManager *memoryManager) override; MOCKABLE_VIRTUAL void *importFdHandle(ze_device_handle_t hDevice, ze_ipc_memory_flags_t flags, uint64_t handle, NEO::GraphicsAllocation **pAlloc); MOCKABLE_VIRTUAL void *importNTHandle(ze_device_handle_t hDevice, void *handle); ze_result_t checkMemoryAccessFromDevice(Device *device, const void *ptr) override; NEO::SVMAllocsManager *getSvmAllocsManager() override; ze_result_t initialize(std::vector> neoDevices); bool findAllocationDataForRange(const void *buffer, size_t size, NEO::SvmAllocationData **allocData) override; std::vector findAllocationsWithinRange(const void *buffer, size_t size, bool *allocationRangeCovered) override; ze_result_t sysmanEventsListen(uint32_t timeout, uint32_t count, zes_device_handle_t *phDevices, uint32_t *pNumDeviceEvents, zes_event_type_flags_t *pEvents) override; ze_result_t sysmanEventsListenEx(uint64_t timeout, uint32_t count, zes_device_handle_t *phDevices, uint32_t *pNumDeviceEvents, zes_event_type_flags_t *pEvents) override; ze_result_t importExternalPointer(void *ptr, size_t size) override; ze_result_t releaseImportedPointer(void *ptr) override; ze_result_t getHostPointerBaseAddress(void *ptr, void **baseAddress) override; virtual NEO::GraphicsAllocation *findHostPointerAllocation(void *ptr, size_t size, uint32_t rootDeviceIndex) override; virtual NEO::GraphicsAllocation *getDriverSystemMemoryAllocation(void *ptr, size_t size, uint32_t rootDeviceIndex, uintptr_t *gpuAddress) override; NEO::GraphicsAllocation *getPeerAllocation(Device *device, NEO::SvmAllocationData *allocData, void *basePtr, uintptr_t *peerGpuAddress); void createHostPointerManager(); void sortNeoDevices(std::vector> &neoDevices); bool isRemoteResourceNeeded(void *ptr, NEO::GraphicsAllocation *alloc, NEO::SvmAllocationData *allocData, Device *device); std::unique_ptr hostPointerManager; // Experimental functions std::unordered_map extensionFunctionsLookupMap; std::mutex sharedMakeResidentAllocationsLock; std::map sharedMakeResidentAllocations; std::vector devices; // Spec extensions const std::vector> extensionsSupported = { {ZE_FLOAT_ATOMICS_EXT_NAME, ZE_FLOAT_ATOMICS_EXT_VERSION_CURRENT}, {ZE_RELAXED_ALLOCATION_LIMITS_EXP_NAME, ZE_RELAXED_ALLOCATION_LIMITS_EXP_VERSION_CURRENT}, {ZE_MODULE_PROGRAM_EXP_NAME, ZE_MODULE_PROGRAM_EXP_VERSION_CURRENT}, {ZE_KERNEL_SCHEDULING_HINTS_EXP_NAME, ZE_SCHEDULING_HINTS_EXP_VERSION_CURRENT}, {ZE_GLOBAL_OFFSET_EXP_NAME, ZE_GLOBAL_OFFSET_EXP_VERSION_CURRENT}, {ZE_PCI_PROPERTIES_EXT_NAME, ZE_PCI_PROPERTIES_EXT_VERSION_CURRENT}, {ZE_MEMORY_COMPRESSION_HINTS_EXT_NAME, ZE_MEMORY_COMPRESSION_HINTS_EXT_VERSION_CURRENT}, {ZE_IMAGE_VIEW_EXP_NAME, ZE_IMAGE_VIEW_EXP_VERSION_CURRENT}, {ZE_IMAGE_MEMORY_PROPERTIES_EXP_NAME, ZE_IMAGE_MEMORY_PROPERTIES_EXP_VERSION_CURRENT}, {ZE_MEMORY_FREE_POLICIES_EXT_NAME, ZE_MEMORY_FREE_POLICIES_EXT_VERSION_CURRENT}}; uint64_t uuidTimestamp = 0u; NEO::MemoryManager *memoryManager = nullptr; NEO::SVMAllocsManager *svmAllocsManager = nullptr; uint32_t numDevices = 0; std::set rootDeviceIndices = {}; std::map deviceBitfields; void updateRootDeviceBitFields(std::unique_ptr &neoDevice); void enableRootDeviceDebugger(std::unique_ptr &neoDevice); // Environment Variables bool enableProgramDebugging = false; bool enableSysman = false; bool enablePciIdDeviceOrder = false; uint8_t powerHint = 0; }; extern struct DriverHandleImp *GlobalDriver; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/driver/driver_imp.h000066400000000000000000000011671422164147700255610ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/driver/driver.h" #include #include namespace L0 { class DriverImp : public Driver { public: ze_result_t driverInit(ze_init_flags_t flags) override; void initialize(ze_result_t *result) override; protected: std::once_flag initDriverOnce; static ze_result_t initStatus; }; struct L0EnvVariables { std::string affinityMask; bool programDebugging; bool metrics; bool pin; bool sysman; bool pciIdDeviceOrder; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/driver/driver_version.h.in000066400000000000000000000003441422164147700270620ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #ifndef L0_DRIVER_VERSION_H #define L0_DRIVER_VERSION_H #define L0_DRIVER_VERSION @L0_DRIVER_VERSION@ #endif /* L0_DRIVER_VERSION_H */ compute-runtime-22.14.22890/level_zero/core/source/driver/host_pointer_manager.cpp000066400000000000000000000125401422164147700301600ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/driver/host_pointer_manager.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/constants.h" #include "shared/source/memory_manager/allocation_properties.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/multi_graphics_allocation.h" #include "level_zero/core/source/device/device_imp.h" namespace L0 { void HostPointerManager::MapBasedAllocationTracker::insert(HostPointerData allocationsData) { allocations.insert(std::make_pair(reinterpret_cast(allocationsData.basePtr), allocationsData)); } void HostPointerManager::MapBasedAllocationTracker::remove(const void *ptr) { HostPointerContainer::iterator iter; iter = allocations.find(ptr); allocations.erase(iter); } HostPointerData *HostPointerManager::MapBasedAllocationTracker::get(const void *ptr) { HostPointerContainer::iterator iter, end; HostPointerData *hostPtrData; if ((ptr == nullptr) || (allocations.size() == 0)) { return nullptr; } end = allocations.end(); iter = allocations.lower_bound(ptr); if (((iter != end) && (iter->first != ptr)) || (iter == end)) { if (iter == allocations.begin()) { iter = end; } else { iter--; } } if (iter != end) { hostPtrData = &iter->second; char *charPtr = reinterpret_cast(hostPtrData->basePtr); if (ptr < (charPtr + hostPtrData->size)) { return hostPtrData; } } return nullptr; } HostPointerManager::HostPointerManager(NEO::MemoryManager *memoryManager) : memoryManager(memoryManager) { } HostPointerManager::~HostPointerManager() { } ze_result_t HostPointerManager::createHostPointerMultiAllocation(std::vector &devices, void *ptr, size_t size) { if (size == 0 || ptr == nullptr) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } size_t endAddress = reinterpret_cast(ptr) + size; std::unique_lock lock(this->mtx); auto beginAllocation = hostPointerAllocations.get(ptr); auto endingAllocation = hostPointerAllocations.get(reinterpret_cast(endAddress - 1)); if (beginAllocation != nullptr && beginAllocation == endingAllocation) { return ZE_RESULT_SUCCESS; } if (beginAllocation != nullptr) { if (endingAllocation != nullptr) { return ZE_RESULT_ERROR_OVERLAPPING_REGIONS; } return ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE; } if (endingAllocation != nullptr) { UNRECOVERABLE_IF(endingAllocation->basePtr == ptr); return ZE_RESULT_ERROR_INVALID_SIZE; } HostPointerData hostData(static_cast(devices.size() - 1)); hostData.basePtr = ptr; hostData.size = size; for (auto device : devices) { NEO::GraphicsAllocation *gfxAlloc = createHostPointerAllocation(device->getRootDeviceIndex(), ptr, size, device->getNEODevice()->getDeviceBitfield()); if (gfxAlloc == nullptr) { auto allocations = hostData.hostPtrAllocations.getGraphicsAllocations(); for (auto &allocation : allocations) { memoryManager->freeGraphicsMemory(allocation); } return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY; } hostData.hostPtrAllocations.addAllocation(gfxAlloc); } hostPointerAllocations.insert(hostData); return ZE_RESULT_SUCCESS; } NEO::GraphicsAllocation *HostPointerManager::createHostPointerAllocation(uint32_t rootDeviceIndex, void *ptr, size_t size, const NEO::DeviceBitfield &deviceBitfield) { NEO::AllocationProperties properties = {rootDeviceIndex, false, size, NEO::AllocationType::EXTERNAL_HOST_PTR, false, deviceBitfield}; properties.flags.flushL3RequiredForRead = properties.flags.flushL3RequiredForWrite = true; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties, ptr); return allocation; } HostPointerData *HostPointerManager::getHostPointerAllocation(const void *ptr) { std::unique_lock lock(mtx); return hostPointerAllocations.get(ptr); } bool HostPointerManager::freeHostPointerAllocation(void *ptr) { std::unique_lock lock(mtx); HostPointerData *hostPtrData = hostPointerAllocations.get(ptr); if (hostPtrData == nullptr) { return false; } auto graphicsAllocations = hostPtrData->hostPtrAllocations.getGraphicsAllocations(); for (auto gpuAllocation : graphicsAllocations) { memoryManager->freeGraphicsMemory(gpuAllocation); } hostPointerAllocations.remove(hostPtrData->basePtr); return true; } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/driver/host_pointer_manager.h000066400000000000000000000046771422164147700276410ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/common_types.h" #include "shared/source/memory_manager/multi_graphics_allocation.h" #include "shared/source/utilities/spinlock.h" #include #include #include #include #include namespace NEO { class GraphicsAllocation; class MemoryManager; } // namespace NEO namespace L0 { struct Device; struct HostPointerData { HostPointerData(uint32_t maxRootDeviceIndex) : hostPtrAllocations(maxRootDeviceIndex), maxRootDeviceIndex(maxRootDeviceIndex) { } HostPointerData(const HostPointerData &hostPtrData) : HostPointerData(hostPtrData.maxRootDeviceIndex) { basePtr = hostPtrData.basePtr; size = hostPtrData.size; for (auto allocation : hostPtrData.hostPtrAllocations.getGraphicsAllocations()) { if (allocation) { this->hostPtrAllocations.addAllocation(allocation); } } } NEO::MultiGraphicsAllocation hostPtrAllocations; void *basePtr = nullptr; size_t size = 0u; protected: const uint32_t maxRootDeviceIndex; }; class HostPointerManager { public: class MapBasedAllocationTracker { friend class HostPointerManager; public: using HostPointerContainer = std::map; void insert(HostPointerData allocationsData); void remove(const void *ptr); HostPointerData *get(const void *ptr); size_t getNumAllocs() const { return allocations.size(); }; protected: HostPointerContainer allocations; }; HostPointerManager(NEO::MemoryManager *memoryManager); virtual ~HostPointerManager(); ze_result_t createHostPointerMultiAllocation(std::vector &devices, void *ptr, size_t size); HostPointerData *getHostPointerAllocation(const void *ptr); bool freeHostPointerAllocation(void *ptr); protected: NEO::GraphicsAllocation *createHostPointerAllocation(uint32_t rootDeviceIndex, void *ptr, size_t size, const NEO::DeviceBitfield &deviceBitfield); MapBasedAllocationTracker hostPointerAllocations; NEO::MemoryManager *memoryManager; NEO::SpinLock mtx; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/event/000077500000000000000000000000001422164147700230715ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/event/event.cpp000066400000000000000000000146721422164147700247300ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/event/event.h" #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/command_stream/csr_definitions.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/device/device.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/constants.h" #include "shared/source/helpers/string.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/memory_operations_handler.h" #include "shared/source/utilities/cpuintrinsics.h" #include "shared/source/utilities/wait_util.h" #include "level_zero/core/source/cmdlist/cmdlist.h" #include "level_zero/core/source/cmdqueue/cmdqueue.h" #include "level_zero/core/source/context/context_imp.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/device/device_imp.h" #include "level_zero/core/source/driver/driver_handle_imp.h" #include "level_zero/core/source/hw_helpers/l0_hw_helper.h" #include "level_zero/tools/source/metrics/metric.h" #include // #include "level_zero/core/source/event/event_impl.inl" namespace L0 { template Event *Event::create(EventPool *, const ze_event_desc_t *, Device *); template Event *Event::create(EventPool *, const ze_event_desc_t *, Device *); ze_result_t EventPoolImp::initialize(DriverHandle *driver, Context *context, uint32_t numDevices, ze_device_handle_t *phDevices) { this->context = static_cast(context); std::set rootDeviceIndices; uint32_t maxRootDeviceIndex = 0u; DriverHandleImp *driverHandleImp = static_cast(driver); bool useDevicesFromApi = true; bool useDeviceAlloc = isEventPoolDeviceAllocationFlagSet(); if (numDevices == 0) { numDevices = static_cast(driverHandleImp->devices.size()); useDevicesFromApi = false; } for (uint32_t i = 0u; i < numDevices; i++) { Device *eventDevice = nullptr; if (useDevicesFromApi) { eventDevice = Device::fromHandle(phDevices[i]); } else { eventDevice = driverHandleImp->devices[i]; } if (!eventDevice) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } devices.push_back(eventDevice); rootDeviceIndices.insert(eventDevice->getNEODevice()->getRootDeviceIndex()); if (maxRootDeviceIndex < eventDevice->getNEODevice()->getRootDeviceIndex()) { maxRootDeviceIndex = eventDevice->getNEODevice()->getRootDeviceIndex(); } } auto &hwHelper = devices[0]->getHwHelper(); eventAlignment = static_cast(hwHelper.getTimestampPacketAllocatorAlignment()); eventSize = static_cast(alignUp(EventPacketsCount::eventPackets * hwHelper.getSingleTimestampPacketSize(), eventAlignment)); size_t alignedSize = alignUp(numEvents * eventSize, MemoryConstants::pageSize64k); NEO::AllocationType allocationType = isEventPoolTimestampFlagSet() ? NEO::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER : NEO::AllocationType::BUFFER_HOST_MEMORY; if (this->devices.size() > 1) { useDeviceAlloc = false; } if (useDeviceAlloc) { allocationType = NEO::AllocationType::GPU_TIMESTAMP_DEVICE_BUFFER; } eventPoolAllocations = std::make_unique(maxRootDeviceIndex); bool allocatedMemory = false; if (useDeviceAlloc) { NEO::AllocationProperties allocationProperties{*rootDeviceIndices.begin(), alignedSize, allocationType, devices[0]->getNEODevice()->getDeviceBitfield()}; allocationProperties.alignment = eventAlignment; auto graphicsAllocation = driver->getMemoryManager()->allocateGraphicsMemoryWithProperties(allocationProperties); if (graphicsAllocation) { eventPoolAllocations->addAllocation(graphicsAllocation); allocatedMemory = true; } } else { NEO::AllocationProperties allocationProperties{*rootDeviceIndices.begin(), alignedSize, allocationType, systemMemoryBitfield}; allocationProperties.alignment = eventAlignment; std::vector rootDeviceIndicesVector = {rootDeviceIndices.begin(), rootDeviceIndices.end()}; eventPoolPtr = driver->getMemoryManager()->createMultiGraphicsAllocationInSystemMemoryPool(rootDeviceIndicesVector, allocationProperties, *eventPoolAllocations); allocatedMemory = (nullptr != eventPoolPtr); } if (!allocatedMemory) { return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY; } return ZE_RESULT_SUCCESS; } EventPoolImp::~EventPoolImp() { if (eventPoolAllocations) { auto graphicsAllocations = eventPoolAllocations->getGraphicsAllocations(); auto memoryManager = devices[0]->getDriverHandle()->getMemoryManager(); for (auto gpuAllocation : graphicsAllocations) { memoryManager->freeGraphicsMemory(gpuAllocation); } } } ze_result_t EventPoolImp::destroy() { delete this; return ZE_RESULT_SUCCESS; } ze_result_t EventPoolImp::createEvent(const ze_event_desc_t *desc, ze_event_handle_t *phEvent) { if (desc->index > (getNumEvents() - 1)) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } auto &l0HwHelper = L0HwHelper::get(getDevice()->getHwInfo().platform.eRenderCoreFamily); *phEvent = l0HwHelper.createEvent(this, desc, getDevice()); return ZE_RESULT_SUCCESS; } ze_result_t Event::destroy() { delete this; return ZE_RESULT_SUCCESS; } EventPool *EventPool::create(DriverHandle *driver, Context *context, uint32_t numDevices, ze_device_handle_t *phDevices, const ze_event_pool_desc_t *desc, ze_result_t &result) { auto eventPool = std::make_unique(desc); if (!eventPool) { result = ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY; DEBUG_BREAK_IF(true); return nullptr; } result = eventPool->initialize(driver, context, numDevices, phDevices); if (result) { return nullptr; } return eventPool.release(); } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/event/event.h000066400000000000000000000213261422164147700243670ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/timestamp_packet.h" #include "level_zero/core/source/cmdlist/cmdlist.h" #include "level_zero/core/source/context/context_imp.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/driver/driver_handle.h" #include #include #include struct _ze_event_handle_t {}; struct _ze_event_pool_handle_t {}; namespace L0 { typedef uint64_t FlushStamp; struct EventPool; struct MetricStreamer; namespace EventPacketsCount { constexpr uint32_t maxKernelSplit = 3; constexpr uint32_t eventPackets = maxKernelSplit * NEO ::TimestampPacketSizeControl::preferredPacketCount; } // namespace EventPacketsCount struct Event : _ze_event_handle_t { virtual ~Event() = default; virtual ze_result_t destroy(); virtual ze_result_t hostSignal() = 0; virtual ze_result_t hostSynchronize(uint64_t timeout) = 0; virtual ze_result_t queryStatus() = 0; virtual ze_result_t reset() = 0; virtual ze_result_t queryKernelTimestamp(ze_kernel_timestamp_result_t *dstptr) = 0; virtual ze_result_t queryTimestampsExp(Device *device, uint32_t *pCount, ze_kernel_timestamp_result_t *pTimestamps) = 0; enum State : uint32_t { STATE_SIGNALED = 0u, STATE_CLEARED = std::numeric_limits::max(), STATE_INITIAL = STATE_CLEARED }; template static Event *create(EventPool *eventPool, const ze_event_desc_t *desc, Device *device); static Event *fromHandle(ze_event_handle_t handle) { return static_cast(handle); } inline ze_event_handle_t toHandle() { return this; } virtual NEO::GraphicsAllocation &getAllocation(Device *device) = 0; virtual uint64_t getGpuAddress(Device *device) = 0; virtual uint32_t getPacketsInUse() = 0; virtual uint64_t getPacketAddress(Device *device) = 0; virtual void resetPackets() = 0; void *getHostAddress() { return hostAddress; } virtual void setPacketsInUse(uint32_t value) = 0; uint32_t getCurrKernelDataIndex() const { return kernelCount - 1; } size_t getContextStartOffset() const { return contextStartOffset; } size_t getContextEndOffset() const { return contextEndOffset; } size_t getGlobalStartOffset() const { return globalStartOffset; } size_t getGlobalEndOffset() const { return globalEndOffset; } size_t getSinglePacketSize() const { return singlePacketSize; } size_t getTimestampSizeInDw() const { return timestampSizeInDw; } void setEventTimestampFlag(bool timestampFlag) { isTimestampEvent = timestampFlag; } bool isEventTimestampFlagSet() const { return isTimestampEvent; } void setPartitionedEvent(bool partitionedEvent) { this->partitionedEvent = partitionedEvent; } bool isPartitionedEvent() const { return partitionedEvent; } bool useContextEndOffset() const { return isTimestampEvent || partitionedEvent; } uint64_t globalStartTS; uint64_t globalEndTS; uint64_t contextStartTS; uint64_t contextEndTS; std::chrono::microseconds gpuHangCheckPeriod{500'000}; // Metric streamer instance associated with the event. MetricStreamer *metricStreamer = nullptr; NEO::CommandStreamReceiver *csr = nullptr; void *hostAddress = nullptr; ze_event_scope_flags_t signalScope = 0u; ze_event_scope_flags_t waitScope = 0u; uint32_t kernelCount = 1u; bool l3FlushWaApplied = false; protected: size_t contextStartOffset = 0u; size_t contextEndOffset = 0u; size_t globalStartOffset = 0u; size_t globalEndOffset = 0u; size_t timestampSizeInDw = 0u; size_t singlePacketSize = 0u; bool isTimestampEvent = false; bool partitionedEvent = false; }; template class KernelEventCompletionData : public NEO::TimestampPackets { public: uint32_t getPacketsUsed() const { return packetsUsed; } void setPacketsUsed(uint32_t value) { packetsUsed = value; } protected: uint32_t packetsUsed = 1; }; template struct EventImp : public Event { EventImp(EventPool *eventPool, int index, Device *device) : device(device), index(index), eventPool(eventPool) { contextStartOffset = NEO::TimestampPackets::getContextStartOffset(); contextEndOffset = NEO::TimestampPackets::getContextEndOffset(); globalStartOffset = NEO::TimestampPackets::getGlobalStartOffset(); globalEndOffset = NEO::TimestampPackets::getGlobalEndOffset(); timestampSizeInDw = (sizeof(TagSizeT) / 4); singlePacketSize = NEO::TimestampPackets::getSinglePacketSize(); } ~EventImp() override {} ze_result_t hostSignal() override; ze_result_t hostSynchronize(uint64_t timeout) override; ze_result_t queryStatus() override; ze_result_t reset() override; ze_result_t queryKernelTimestamp(ze_kernel_timestamp_result_t *dstptr) override; ze_result_t queryTimestampsExp(Device *device, uint32_t *pCount, ze_kernel_timestamp_result_t *pTimestamps) override; NEO::GraphicsAllocation &getAllocation(Device *device) override; uint64_t getGpuAddress(Device *device) override; void resetPackets() override; uint64_t getPacketAddress(Device *device) override; uint32_t getPacketsInUse() override; void setPacketsInUse(uint32_t value) override; std::unique_ptr[]> kernelEventCompletionData; Device *device; int index; EventPool *eventPool; protected: ze_result_t calculateProfilingData(); ze_result_t queryStatusKernelTimestamp(); ze_result_t queryStatusNonTimestamp(); ze_result_t hostEventSetValue(TagSizeT eventValue); ze_result_t hostEventSetValueTimestamps(TagSizeT eventVal); void assignKernelEventCompletionData(void *address); }; struct EventPool : _ze_event_pool_handle_t { static EventPool *create(DriverHandle *driver, Context *context, uint32_t numDevices, ze_device_handle_t *phDevices, const ze_event_pool_desc_t *desc, ze_result_t &result); virtual ~EventPool() = default; virtual ze_result_t destroy() = 0; virtual ze_result_t getIpcHandle(ze_ipc_event_pool_handle_t *pIpcHandle) = 0; virtual ze_result_t closeIpcHandle() = 0; virtual ze_result_t createEvent(const ze_event_desc_t *desc, ze_event_handle_t *phEvent) = 0; virtual Device *getDevice() = 0; static EventPool *fromHandle(ze_event_pool_handle_t handle) { return static_cast(handle); } inline ze_event_pool_handle_t toHandle() { return this; } virtual NEO::MultiGraphicsAllocation &getAllocation() { return *eventPoolAllocations; } virtual uint32_t getEventSize() = 0; virtual void setEventSize(uint32_t) = 0; virtual void setEventAlignment(uint32_t) = 0; bool isEventPoolTimestampFlagSet() { if (NEO::DebugManager.flags.OverrideTimestampEvents.get() != -1) { auto timestampOverride = !!NEO::DebugManager.flags.OverrideTimestampEvents.get(); return timestampOverride; } if (eventPoolFlags & ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP) { return true; } return false; } bool isEventPoolDeviceAllocationFlagSet() { if (!(eventPoolFlags & ZE_EVENT_POOL_FLAG_HOST_VISIBLE)) { return true; } return false; } std::unique_ptr eventPoolAllocations; ze_event_pool_flags_t eventPoolFlags; }; struct EventPoolImp : public EventPool { EventPoolImp(const ze_event_pool_desc_t *desc) : numEvents(desc->count) { eventPoolFlags = desc->flags; } ze_result_t initialize(DriverHandle *driver, Context *context, uint32_t numDevices, ze_device_handle_t *phDevices); ~EventPoolImp(); ze_result_t destroy() override; ze_result_t getIpcHandle(ze_ipc_event_pool_handle_t *pIpcHandle) override; ze_result_t closeIpcHandle() override; ze_result_t createEvent(const ze_event_desc_t *desc, ze_event_handle_t *phEvent) override; uint32_t getEventSize() override { return eventSize; } void setEventSize(uint32_t size) override { eventSize = size; } void setEventAlignment(uint32_t alignment) override { eventAlignment = alignment; } size_t getNumEvents() { return numEvents; } Device *getDevice() override { return devices[0]; } void *eventPoolPtr = nullptr; std::vector devices; ContextImp *context = nullptr; size_t numEvents; bool isImportedIpcPool = false; protected: uint32_t eventAlignment = 0; uint32_t eventSize = 0; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/event/event_impl.inl000066400000000000000000000355751422164147700257560ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/internal_allocation_storage.h" #include "level_zero/core/source/event/event.h" namespace L0 { template Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *device) { auto event = new EventImp(eventPool, desc->index, device); UNRECOVERABLE_IF(event == nullptr); if (eventPool->isEventPoolTimestampFlagSet()) { event->setEventTimestampFlag(true); } event->kernelEventCompletionData = std::make_unique[]>(EventPacketsCount::maxKernelSplit); auto alloc = eventPool->getAllocation().getGraphicsAllocation(device->getNEODevice()->getRootDeviceIndex()); uint64_t baseHostAddr = reinterpret_cast(alloc->getUnderlyingBuffer()); event->hostAddress = reinterpret_cast(baseHostAddr + (desc->index * eventPool->getEventSize())); event->signalScope = desc->signal; event->waitScope = desc->wait; event->csr = device->getNEODevice()->getDefaultEngine().commandStreamReceiver; EventPoolImp *EventPoolImp = static_cast(eventPool); // do not reset even if it has been imported, since event pool // might have been imported after events being already signaled if (EventPoolImp->isImportedIpcPool == false) { event->reset(); } return event; } template uint64_t EventImp::getGpuAddress(Device *device) { auto alloc = eventPool->getAllocation().getGraphicsAllocation(device->getNEODevice()->getRootDeviceIndex()); return (alloc->getGpuAddress() + (index * eventPool->getEventSize())); } template NEO::GraphicsAllocation &EventImp::getAllocation(Device *device) { return *this->eventPool->getAllocation().getGraphicsAllocation(device->getNEODevice()->getRootDeviceIndex()); } template ze_result_t EventImp::calculateProfilingData() { constexpr uint32_t skipL3EventPacketIndex = 2u; globalStartTS = kernelEventCompletionData[0].getGlobalStartValue(0); globalEndTS = kernelEventCompletionData[0].getGlobalEndValue(0); contextStartTS = kernelEventCompletionData[0].getContextStartValue(0); contextEndTS = kernelEventCompletionData[0].getContextEndValue(0); for (uint32_t i = 0; i < kernelCount; i++) { for (auto packetId = 0u; packetId < kernelEventCompletionData[i].getPacketsUsed(); packetId++) { if (this->l3FlushWaApplied && ((packetId % skipL3EventPacketIndex) != 0)) { continue; } if (globalStartTS > kernelEventCompletionData[i].getGlobalStartValue(packetId)) { globalStartTS = kernelEventCompletionData[i].getGlobalStartValue(packetId); } if (contextStartTS > kernelEventCompletionData[i].getContextStartValue(packetId)) { contextStartTS = kernelEventCompletionData[i].getContextStartValue(packetId); } if (contextEndTS < kernelEventCompletionData[i].getContextEndValue(packetId)) { contextEndTS = kernelEventCompletionData[i].getContextEndValue(packetId); } if (globalEndTS < kernelEventCompletionData[i].getGlobalEndValue(packetId)) { globalEndTS = kernelEventCompletionData[i].getGlobalEndValue(packetId); } } } return ZE_RESULT_SUCCESS; } template void EventImp::assignKernelEventCompletionData(void *address) { for (uint32_t i = 0; i < kernelCount; i++) { uint32_t packetsToCopy = 0; packetsToCopy = kernelEventCompletionData[i].getPacketsUsed(); for (uint32_t packetId = 0; packetId < packetsToCopy; packetId++) { kernelEventCompletionData[i].assignDataToAllTimestamps(packetId, address); address = ptrOffset(address, singlePacketSize); } } } template ze_result_t EventImp::queryStatusKernelTimestamp() { assignKernelEventCompletionData(hostAddress); uint32_t queryVal = Event::STATE_CLEARED; for (uint32_t i = 0; i < kernelCount; i++) { uint32_t packetsToCheck = kernelEventCompletionData[i].getPacketsUsed(); for (uint32_t packetId = 0; packetId < packetsToCheck; packetId++) { bool ready = NEO::WaitUtils::waitFunctionWithPredicate( static_cast(kernelEventCompletionData[i].getContextEndAddress(packetId)), queryVal, std::not_equal_to()); if (!ready) { return ZE_RESULT_NOT_READY; } } } this->csr->getInternalAllocationStorage()->cleanAllocationList(this->csr->peekTaskCount(), NEO::AllocationUsage::TEMPORARY_ALLOCATION); return ZE_RESULT_SUCCESS; } template ze_result_t EventImp::queryStatusNonTimestamp() { assignKernelEventCompletionData(hostAddress); uint32_t queryVal = Event::STATE_CLEARED; for (uint32_t i = 0; i < kernelCount; i++) { uint32_t packetsToCheck = kernelEventCompletionData[i].getPacketsUsed(); for (uint32_t packetId = 0; packetId < packetsToCheck; packetId++) { void const *queryAddress = partitionedEvent ? kernelEventCompletionData[i].getContextEndAddress(packetId) : kernelEventCompletionData[i].getContextStartAddress(packetId); bool ready = NEO::WaitUtils::waitFunctionWithPredicate( static_cast(queryAddress), queryVal, std::not_equal_to()); if (!ready) { return ZE_RESULT_NOT_READY; } } } this->csr->getInternalAllocationStorage()->cleanAllocationList(this->csr->peekTaskCount(), NEO::AllocationUsage::TEMPORARY_ALLOCATION); return ZE_RESULT_SUCCESS; } template ze_result_t EventImp::queryStatus() { TagSizeT *hostAddr = static_cast(hostAddress); if (metricStreamer != nullptr) { *hostAddr = metricStreamer->getNotificationState(); } this->csr->downloadAllocations(); if (isEventTimestampFlagSet()) { return queryStatusKernelTimestamp(); } else { return queryStatusNonTimestamp(); } } template ze_result_t EventImp::hostEventSetValueTimestamps(TagSizeT eventVal) { auto baseAddr = castToUint64(hostAddress); auto eventTsSetFunc = [&eventVal](auto tsAddr) { auto tsptr = reinterpret_cast(tsAddr); memcpy_s(tsptr, sizeof(TagSizeT), static_cast(&eventVal), sizeof(TagSizeT)); }; for (uint32_t i = 0; i < kernelCount; i++) { uint32_t packetsToSet = kernelEventCompletionData[i].getPacketsUsed(); for (uint32_t j = 0; j < packetsToSet; j++) { eventTsSetFunc(baseAddr + contextStartOffset); eventTsSetFunc(baseAddr + globalStartOffset); eventTsSetFunc(baseAddr + contextEndOffset); eventTsSetFunc(baseAddr + globalEndOffset); baseAddr += singlePacketSize; } } const auto dataSize = 4u * EventPacketsCount::maxKernelSplit * NEO::TimestampPacketSizeControl::preferredPacketCount; TagSizeT tagValues[dataSize]; for (uint32_t index = 0u; index < dataSize; index++) { tagValues[index] = eventVal; } assignKernelEventCompletionData(tagValues); return ZE_RESULT_SUCCESS; } template ze_result_t EventImp::hostEventSetValue(TagSizeT eventVal) { if (isEventTimestampFlagSet()) { return hostEventSetValueTimestamps(eventVal); } auto packetHostAddr = hostAddress; UNRECOVERABLE_IF(packetHostAddr == nullptr); for (uint32_t i = 0; i < kernelCount; i++) { uint32_t packetsToSet = kernelEventCompletionData[i].getPacketsUsed(); for (uint32_t j = 0; j < packetsToSet; j++) { memcpy_s(packetHostAddr, sizeof(TagSizeT), static_cast(&eventVal), sizeof(TagSizeT)); if (isPartitionedEvent()) { void *packetContextEndAddr = ptrOffset(packetHostAddr, contextEndOffset); memcpy_s(packetContextEndAddr, sizeof(TagSizeT), static_cast(&eventVal), sizeof(TagSizeT)); } packetHostAddr = ptrOffset(packetHostAddr, singlePacketSize); } } return ZE_RESULT_SUCCESS; } template ze_result_t EventImp::hostSignal() { return hostEventSetValue(Event::STATE_SIGNALED); } template ze_result_t EventImp::hostSynchronize(uint64_t timeout) { std::chrono::microseconds elapsedTimeSinceGpuHangCheck{0}; std::chrono::high_resolution_clock::time_point waitStartTime, lastHangCheckTime, currentTime; uint64_t timeDiff = 0; ze_result_t ret = ZE_RESULT_NOT_READY; if (this->csr->getType() == NEO::CommandStreamReceiverType::CSR_AUB) { return ZE_RESULT_SUCCESS; } if (timeout == 0) { return queryStatus(); } waitStartTime = std::chrono::high_resolution_clock::now(); lastHangCheckTime = waitStartTime; while (true) { ret = queryStatus(); if (ret == ZE_RESULT_SUCCESS) { return ret; } currentTime = std::chrono::high_resolution_clock::now(); elapsedTimeSinceGpuHangCheck = std::chrono::duration_cast(currentTime - lastHangCheckTime); if (elapsedTimeSinceGpuHangCheck.count() >= this->gpuHangCheckPeriod.count()) { lastHangCheckTime = currentTime; if (this->csr->isGpuHangDetected()) { return ZE_RESULT_ERROR_DEVICE_LOST; } } if (timeout == std::numeric_limits::max()) { continue; } timeDiff = std::chrono::duration_cast(currentTime - waitStartTime).count(); if (timeDiff >= timeout) { break; } } return ret; } template ze_result_t EventImp::reset() { if (isEventTimestampFlagSet()) { kernelCount = EventPacketsCount::maxKernelSplit; for (uint32_t i = 0; i < kernelCount; i++) { kernelEventCompletionData[i].setPacketsUsed(NEO::TimestampPacketSizeControl::preferredPacketCount); } } partitionedEvent = true; hostEventSetValue(Event::STATE_INITIAL); resetPackets(); partitionedEvent = false; return ZE_RESULT_SUCCESS; } template ze_result_t EventImp::queryKernelTimestamp(ze_kernel_timestamp_result_t *dstptr) { ze_kernel_timestamp_result_t &result = *dstptr; if (queryStatus() != ZE_RESULT_SUCCESS) { return ZE_RESULT_NOT_READY; } assignKernelEventCompletionData(hostAddress); calculateProfilingData(); auto eventTsSetFunc = [&](uint64_t ×tampFieldToCopy, uint64_t ×tampFieldForWriting) { memcpy_s(&(timestampFieldForWriting), sizeof(uint64_t), static_cast(×tampFieldToCopy), sizeof(uint64_t)); }; if (!NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily).useOnlyGlobalTimestamps()) { eventTsSetFunc(contextStartTS, result.context.kernelStart); eventTsSetFunc(globalStartTS, result.global.kernelStart); eventTsSetFunc(contextEndTS, result.context.kernelEnd); eventTsSetFunc(globalEndTS, result.global.kernelEnd); } else { eventTsSetFunc(globalStartTS, result.context.kernelStart); eventTsSetFunc(globalStartTS, result.global.kernelStart); eventTsSetFunc(globalEndTS, result.context.kernelEnd); eventTsSetFunc(globalEndTS, result.global.kernelEnd); } return ZE_RESULT_SUCCESS; } template ze_result_t EventImp::queryTimestampsExp(Device *device, uint32_t *pCount, ze_kernel_timestamp_result_t *pTimestamps) { uint32_t timestampPacket = 0; uint64_t globalStartTs, globalEndTs, contextStartTs, contextEndTs; globalStartTs = globalEndTs = contextStartTs = contextEndTs = Event::STATE_INITIAL; auto deviceImp = static_cast(device); bool isStaticPartitioning = true; if (NEO::DebugManager.flags.EnableStaticPartitioning.get() == 0) { isStaticPartitioning = false; } if (!isStaticPartitioning) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } uint32_t numPacketsUsed = 1u; if (!deviceImp->isSubdevice) { numPacketsUsed = this->getPacketsInUse(); } if ((*pCount == 0) || (*pCount > numPacketsUsed)) { *pCount = numPacketsUsed; return ZE_RESULT_SUCCESS; } for (auto i = 0u; i < *pCount; i++) { ze_kernel_timestamp_result_t &result = *(pTimestamps + i); auto queryTsEventAssignFunc = [&](uint64_t ×tampFieldForWriting, uint64_t ×tampFieldToCopy) { memcpy_s(×tampFieldForWriting, sizeof(uint64_t), static_cast(×tampFieldToCopy), sizeof(uint64_t)); }; auto packetId = i; if (deviceImp->isSubdevice) { packetId = static_cast(deviceImp->getNEODevice())->getSubDeviceIndex(); } globalStartTs = kernelEventCompletionData[timestampPacket].getGlobalStartValue(packetId); contextStartTs = kernelEventCompletionData[timestampPacket].getContextStartValue(packetId); contextEndTs = kernelEventCompletionData[timestampPacket].getContextEndValue(packetId); globalEndTs = kernelEventCompletionData[timestampPacket].getGlobalEndValue(packetId); queryTsEventAssignFunc(result.global.kernelStart, globalStartTs); queryTsEventAssignFunc(result.context.kernelStart, contextStartTs); queryTsEventAssignFunc(result.global.kernelEnd, globalEndTs); queryTsEventAssignFunc(result.context.kernelEnd, contextEndTs); } return ZE_RESULT_SUCCESS; } template void EventImp::resetPackets() { for (uint32_t i = 0; i < kernelCount; i++) { kernelEventCompletionData[i].setPacketsUsed(1); } kernelCount = 1; } template uint32_t EventImp::getPacketsInUse() { uint32_t packetsInUse = 0; for (uint32_t i = 0; i < kernelCount; i++) { packetsInUse += kernelEventCompletionData[i].getPacketsUsed(); } return packetsInUse; } template void EventImp::setPacketsInUse(uint32_t value) { kernelEventCompletionData[getCurrKernelDataIndex()].setPacketsUsed(value); } template uint64_t EventImp::getPacketAddress(Device *device) { uint64_t address = getGpuAddress(device); for (uint32_t i = 0; i < kernelCount - 1; i++) { address += kernelEventCompletionData[i].getPacketsUsed() * singlePacketSize; } return address; } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/fence/000077500000000000000000000000001422164147700230305ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/fence/fence.cpp000066400000000000000000000053001422164147700246120ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/fence/fence.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/constants.h" #include "shared/source/helpers/string.h" #include "shared/source/memory_manager/memory_manager.h" namespace L0 { Fence *Fence::create(CommandQueueImp *cmdQueue, const ze_fence_desc_t *desc) { auto fence = new Fence(cmdQueue); UNRECOVERABLE_IF(fence == nullptr); fence->reset(!!(desc->flags & ZE_FENCE_FLAG_SIGNALED)); return fence; } ze_result_t Fence::queryStatus() { auto csr = cmdQueue->getCsr(); csr->downloadAllocations(); auto *hostAddr = csr->getTagAddress(); return csr->testTaskCountReady(hostAddr, taskCount) ? ZE_RESULT_SUCCESS : ZE_RESULT_NOT_READY; } ze_result_t Fence::assignTaskCountFromCsr() { auto csr = cmdQueue->getCsr(); taskCount = csr->peekTaskCount() + 1; return ZE_RESULT_SUCCESS; } ze_result_t Fence::reset(bool signaled) { if (signaled) { taskCount = 0; } else { taskCount = std::numeric_limits::max(); } return ZE_RESULT_SUCCESS; } ze_result_t Fence::hostSynchronize(uint64_t timeout) { std::chrono::microseconds elapsedTimeSinceGpuHangCheck{0}; std::chrono::high_resolution_clock::time_point waitStartTime, lastHangCheckTime, currentTime; uint64_t timeDiff = 0; ze_result_t ret = ZE_RESULT_NOT_READY; const auto csr = cmdQueue->getCsr(); if (csr->getType() == NEO::CommandStreamReceiverType::CSR_AUB) { return ZE_RESULT_SUCCESS; } if (std::numeric_limits::max() == taskCount) { return ZE_RESULT_NOT_READY; } if (timeout == 0) { return queryStatus(); } waitStartTime = std::chrono::high_resolution_clock::now(); lastHangCheckTime = waitStartTime; while (timeDiff < timeout) { ret = queryStatus(); if (ret == ZE_RESULT_SUCCESS) { return ZE_RESULT_SUCCESS; } currentTime = std::chrono::high_resolution_clock::now(); elapsedTimeSinceGpuHangCheck = std::chrono::duration_cast(currentTime - lastHangCheckTime); if (elapsedTimeSinceGpuHangCheck.count() >= gpuHangCheckPeriod.count()) { lastHangCheckTime = currentTime; if (csr->isGpuHangDetected()) { return ZE_RESULT_ERROR_DEVICE_LOST; } } if (timeout == std::numeric_limits::max()) { continue; } timeDiff = std::chrono::duration_cast(currentTime - waitStartTime).count(); } return ret; } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/fence/fence.h000066400000000000000000000023471422164147700242670ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/csr_definitions.h" #include "level_zero/core/source/cmdqueue/cmdqueue.h" #include "level_zero/core/source/cmdqueue/cmdqueue_imp.h" #include #include #include struct _ze_fence_handle_t {}; namespace L0 { struct Fence : _ze_fence_handle_t { static Fence *create(CommandQueueImp *cmdQueue, const ze_fence_desc_t *desc); virtual ~Fence() = default; MOCKABLE_VIRTUAL ze_result_t destroy() { delete this; return ZE_RESULT_SUCCESS; } MOCKABLE_VIRTUAL ze_result_t hostSynchronize(uint64_t timeout); MOCKABLE_VIRTUAL ze_result_t queryStatus(); MOCKABLE_VIRTUAL ze_result_t assignTaskCountFromCsr(); MOCKABLE_VIRTUAL ze_result_t reset(bool signaled); static Fence *fromHandle(ze_fence_handle_t handle) { return static_cast(handle); } inline ze_fence_handle_t toHandle() { return this; } protected: Fence(CommandQueueImp *cmdQueueImp) : cmdQueue(cmdQueueImp) {} std::chrono::microseconds gpuHangCheckPeriod{500'000}; CommandQueueImp *cmdQueue; uint32_t taskCount = 0; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/gen11/000077500000000000000000000000001422164147700226635ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/gen11/CMakeLists.txt000066400000000000000000000012461422164147700254260ustar00rootroot00000000000000# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # if(SUPPORT_GEN11) set(HW_SOURCES_GEN11 ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_gen11.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_gen11.h ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_gen11.inl ${CMAKE_CURRENT_SOURCE_DIR}/debugger_gen11.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enable_family_full_l0_gen11.cpp ${CMAKE_CURRENT_SOURCE_DIR}/l0_hw_helper_gen11.cpp ) add_subdirectories() target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${HW_SOURCES_GEN11}) set_property(GLOBAL PROPERTY L0_HW_SOURCES_GEN11 ${HW_SOURCES_GEN11}) endif() compute-runtime-22.14.22890/level_zero/core/source/gen11/cmdlist_gen11.cpp000066400000000000000000000012011422164147700260130ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/gen11/cmdlist_gen11.inl" #include "level_zero/core/source/cmdlist/cmdlist_hw.h" #include "level_zero/core/source/cmdlist/cmdlist_hw.inl" #include "level_zero/core/source/cmdlist/cmdlist_hw_base.inl" #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h" #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl" #include "cmdlist_extended.inl" namespace L0 { template struct CommandListCoreFamily; template struct CommandListCoreFamilyImmediate; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/gen11/cmdlist_gen11.h000066400000000000000000000012171422164147700254670ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/cmdlist/cmdlist_hw.h" #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h" namespace L0 { template struct CommandListProductFamily : public CommandListCoreFamily { using CommandListCoreFamily::CommandListCoreFamily; }; template struct CommandListImmediateProductFamily : public CommandListCoreFamilyImmediate { using CommandListCoreFamilyImmediate::CommandListCoreFamilyImmediate; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/gen11/cmdlist_gen11.inl000066400000000000000000000016251422164147700260250ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/pipe_control_args.h" #include "level_zero/core/source/cmdlist/cmdlist_hw.h" namespace L0 { template void CommandListCoreFamily::applyMemoryRangesBarrier(uint32_t numRanges, const size_t *pRangeSizes, const void **pRanges) { const auto &hwInfo = this->device->getHwInfo(); NEO::PipeControlArgs args; args.dcFlushEnable = NEO::MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo); NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/gen11/debugger_gen11.cpp000066400000000000000000000007101422164147700261440ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/debugger/debugger_l0.inl" #include "level_zero/core/source/debugger/debugger_l0_base.inl" namespace NEO { struct ICLFamily; using GfxFamily = ICLFamily; } // namespace NEO namespace L0 { template class DebuggerL0Hw; static DebuggerL0PopulateFactory debuggerGen11; } // namespace L0compute-runtime-22.14.22890/level_zero/core/source/gen11/enable_family_full_l0_gen11.cpp000066400000000000000000000007231422164147700305700ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen11/hw_cmds.h" #include "level_zero/core/source/helpers/l0_populate_factory.h" #include "level_zero/core/source/hw_helpers/l0_hw_helper.h" namespace NEO { using Family = ICLFamily; struct EnableL0Gen11 { EnableL0Gen11() { L0::populateFactoryTable>(); } }; static EnableL0Gen11 enable; } // namespace NEO compute-runtime-22.14.22890/level_zero/core/source/gen11/icllp/000077500000000000000000000000001422164147700237665ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/gen11/icllp/CMakeLists.txt000066400000000000000000000007471422164147700265360ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(SUPPORT_ICLLP) set(HW_SOURCES_GEN11 ${HW_SOURCES_GEN11} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_icllp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue_icllp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_icllp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_icllp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler_icllp.cpp PARENT_SCOPE ) endif() compute-runtime-22.14.22890/level_zero/core/source/gen11/icllp/cmdlist_icllp.cpp000066400000000000000000000006641422164147700273220ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/gen11/cmdlist_gen11.h" namespace L0 { static CommandListPopulateFactory> populateICLLP; static CommandListImmediatePopulateFactory> populateICLLPImmediate; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/gen11/icllp/cmdqueue_icllp.cpp000066400000000000000000000010311422164147700274600ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen11/hw_cmds.h" #include "shared/source/gen11/hw_info.h" #include "level_zero/core/source/cmdqueue/cmdqueue_hw.inl" #include "level_zero/core/source/cmdqueue/cmdqueue_hw_base.inl" #include "cmdqueue_extended.inl" #include "igfxfmid.h" namespace L0 { template struct CommandQueueHw; static CommandQueuePopulateFactory> populateICLLP; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/gen11/icllp/image_icllp.cpp000066400000000000000000000012411422164147700267350ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen11/hw_cmds.h" #include "shared/source/gen11/hw_info.h" #include "level_zero/core/source/image/image_hw.inl" namespace L0 { template <> struct ImageProductFamily : public ImageCoreFamily { using ImageCoreFamily::ImageCoreFamily; ze_result_t initialize(Device *device, const ze_image_desc_t *desc) override { return ImageCoreFamily::initialize(device, desc); }; }; static ImagePopulateFactory> populateICLLP; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/gen11/icllp/kernel_icllp.cpp000066400000000000000000000004141422164147700271340ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/kernel/kernel_hw.h" namespace L0 { static KernelPopulateFactory> populateICLLP; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/gen11/icllp/sampler_icllp.cpp000066400000000000000000000010131422164147700273130ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen11/hw_cmds.h" #include "shared/source/gen11/hw_info.h" #include "level_zero/core/source/sampler/sampler_hw.inl" namespace L0 { template <> struct SamplerProductFamily : public SamplerCoreFamily { using SamplerCoreFamily::SamplerCoreFamily; }; static SamplerPopulateFactory> populateICLLP; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/gen11/l0_hw_helper_gen11.cpp000066400000000000000000000012131422164147700267270ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/helpers/l0_populate_factory.h" #include "level_zero/core/source/hw_helpers/l0_hw_helper_base.inl" #include "level_zero/core/source/hw_helpers/l0_hw_helper_skl_and_later.inl" #include "hw_cmds.h" namespace L0 { using Family = NEO::ICLFamily; static auto gfxCore = IGFX_GEN11_CORE; template <> void populateFactoryTable>() { extern L0HwHelper *l0HwHelperFactory[IGFX_MAX_CORE]; l0HwHelperFactory[gfxCore] = &L0HwHelperHw::get(); } template class L0HwHelperHw; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/gen12lp/000077500000000000000000000000001422164147700232205ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/gen12lp/CMakeLists.txt000066400000000000000000000014701422164147700257620ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(SUPPORT_GEN12LP) set(HW_SOURCES_GEN12LP ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_gen12lp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_gen12lp.h ${CMAKE_CURRENT_SOURCE_DIR}/debugger_gen12lp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/definitions/cache_flush_gen12lp.inl ${CMAKE_CURRENT_SOURCE_DIR}/enable_family_full_l0_gen12lp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/l0_hw_helper_gen12lp.cpp ) add_subdirectories() target_include_directories(${L0_STATIC_LIB_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/definitions/) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${HW_SOURCES_GEN12LP}) set_property(GLOBAL APPEND PROPERTY L0_HW_SOURCES_GEN12LP ${HW_SOURCES_GEN12LP}) endif() compute-runtime-22.14.22890/level_zero/core/source/gen12lp/adlp/000077500000000000000000000000001422164147700241405ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/gen12lp/adlp/CMakeLists.txt000066400000000000000000000007401422164147700267010ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(SUPPORT_ADLP) set(HW_SOURCES_GEN12LP ${HW_SOURCES_GEN12LP} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_adlp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue_adlp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_adlp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_adlp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler_adlp.cpp PARENT_SCOPE ) endif() compute-runtime-22.14.22890/level_zero/core/source/gen12lp/adlp/cmdlist_adlp.cpp000066400000000000000000000006641422164147700273110ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/gen12lp/cmdlist_gen12lp.h" namespace L0 { static CommandListPopulateFactory> populateADLP; static CommandListImmediatePopulateFactory> populateADLPImmediate; } // namespace L0compute-runtime-22.14.22890/level_zero/core/source/gen12lp/adlp/cmdqueue_adlp.cpp000066400000000000000000000006621422164147700274600ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/cmdqueue/cmdqueue_hw.inl" #include "level_zero/core/source/cmdqueue/cmdqueue_hw_base.inl" #include "cmdqueue_extended.inl" namespace L0 { template struct CommandQueueHw; static CommandQueuePopulateFactory> populateADLP; } // namespace L0compute-runtime-22.14.22890/level_zero/core/source/gen12lp/adlp/image_adlp.cpp000066400000000000000000000011171422164147700267260ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/image/image_hw.inl" namespace L0 { template <> struct ImageProductFamily : public ImageCoreFamily { using ImageCoreFamily::ImageCoreFamily; ze_result_t initialize(Device *device, const ze_image_desc_t *desc) override { return ImageCoreFamily::initialize(device, desc); }; }; static ImagePopulateFactory> populateADLP; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/gen12lp/adlp/kernel_adlp.cpp000066400000000000000000000004101422164147700271170ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/kernel/kernel_hw.h" namespace L0 { static KernelPopulateFactory> populateADLP; } // namespace L0compute-runtime-22.14.22890/level_zero/core/source/gen12lp/adlp/sampler_adlp.cpp000066400000000000000000000006671422164147700273200ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/sampler/sampler_hw.inl" namespace L0 { template <> struct SamplerProductFamily : public SamplerCoreFamily { using SamplerCoreFamily::SamplerCoreFamily; }; static SamplerPopulateFactory> populateADLP; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/gen12lp/adls/000077500000000000000000000000001422164147700241435ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/gen12lp/adls/CMakeLists.txt000066400000000000000000000007401422164147700267040ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(SUPPORT_ADLS) set(HW_SOURCES_GEN12LP ${HW_SOURCES_GEN12LP} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_adls.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue_adls.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_adls.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_adls.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler_adls.cpp PARENT_SCOPE ) endif() compute-runtime-22.14.22890/level_zero/core/source/gen12lp/adls/cmdlist_adls.cpp000066400000000000000000000006651422164147700273200ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/gen12lp/cmdlist_gen12lp.h" namespace L0 { static CommandListPopulateFactory> populateADLS; static CommandListImmediatePopulateFactory> populateADLSImmediate; } // namespace L0compute-runtime-22.14.22890/level_zero/core/source/gen12lp/adls/cmdqueue_adls.cpp000066400000000000000000000006621422164147700274660ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/cmdqueue/cmdqueue_hw.inl" #include "level_zero/core/source/cmdqueue/cmdqueue_hw_base.inl" #include "cmdqueue_extended.inl" namespace L0 { template struct CommandQueueHw; static CommandQueuePopulateFactory> populateADLS; } // namespace L0compute-runtime-22.14.22890/level_zero/core/source/gen12lp/adls/image_adls.cpp000066400000000000000000000011171422164147700267340ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/image/image_hw.inl" namespace L0 { template <> struct ImageProductFamily : public ImageCoreFamily { using ImageCoreFamily::ImageCoreFamily; ze_result_t initialize(Device *device, const ze_image_desc_t *desc) override { return ImageCoreFamily::initialize(device, desc); }; }; static ImagePopulateFactory> populateADLS; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/gen12lp/adls/kernel_adls.cpp000066400000000000000000000004101422164147700271250ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/kernel/kernel_hw.h" namespace L0 { static KernelPopulateFactory> populateADLS; } // namespace L0compute-runtime-22.14.22890/level_zero/core/source/gen12lp/adls/sampler_adls.cpp000066400000000000000000000006671422164147700273260ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/sampler/sampler_hw.inl" namespace L0 { template <> struct SamplerProductFamily : public SamplerCoreFamily { using SamplerCoreFamily::SamplerCoreFamily; }; static SamplerPopulateFactory> populateADLS; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/gen12lp/cmdlist_gen12lp.cpp000066400000000000000000000011611422164147700267120ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/cmdlist/cmdlist_hw.h" #include "level_zero/core/source/cmdlist/cmdlist_hw.inl" #include "level_zero/core/source/cmdlist/cmdlist_hw_base.inl" #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h" #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl" #include "cache_flush_gen12lp.inl" #include "cmdlist_extended.inl" namespace L0 { template struct CommandListCoreFamily; template struct CommandListCoreFamilyImmediate; } // namespace L0compute-runtime-22.14.22890/level_zero/core/source/gen12lp/cmdlist_gen12lp.h000066400000000000000000000012271422164147700263620ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/cmdlist/cmdlist_hw.h" #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h" namespace L0 { template struct CommandListProductFamily : public CommandListCoreFamily { using CommandListCoreFamily::CommandListCoreFamily; }; template struct CommandListImmediateProductFamily : public CommandListCoreFamilyImmediate { using CommandListCoreFamilyImmediate::CommandListCoreFamilyImmediate; }; } // namespace L0compute-runtime-22.14.22890/level_zero/core/source/gen12lp/debugger_gen12lp.cpp000066400000000000000000000006271422164147700270450ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/debugger/debugger_l0.inl" #include "level_zero/core/source/debugger/debugger_l0_tgllp_and_later.inl" namespace L0 { using Family = NEO::TGLLPFamily; template class DebuggerL0Hw; static DebuggerL0PopulateFactory debuggerGen12lp; } // namespace L0compute-runtime-22.14.22890/level_zero/core/source/gen12lp/definitions/000077500000000000000000000000001422164147700255335ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/gen12lp/definitions/cache_flush_gen12lp.inl000066400000000000000000000061351422164147700320400ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/cache_flush.inl" #include "shared/source/helpers/pipe_control_args.h" #include "level_zero/core/source/cmdlist/cmdlist_hw.h" namespace L0 { template void CommandListCoreFamily::applyMemoryRangesBarrier(uint32_t numRanges, const size_t *pRangeSizes, const void **pRanges) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; const auto &hwInfo = this->device->getHwInfo(); bool supportL3Control = hwInfo.capabilityTable.supportCacheFlushAfterWalker; if (!supportL3Control) { NEO::PipeControlArgs args; args.dcFlushEnable = NEO::MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo); NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); } else { NEO::LinearStream *commandStream = commandContainer.getCommandStream(); NEO::SVMAllocsManager *svmAllocsManager = device->getDriverHandle()->getSvmAllocsManager(); for (uint32_t i = 0; i < numRanges; i++) { StackVec subranges; const uint64_t pRange = reinterpret_cast(pRanges[i]); const size_t pRangeSize = pRangeSizes[i]; const uint64_t pEndRange = pRange + pRangeSize; uint64_t pFlushRange; size_t pFlushRangeSize; uint64_t postSyncAddressToFlush = 0; NEO::SvmAllocationData *allocData = svmAllocsManager->getSVMAllocs()->get(pRanges[i]); if (allocData == nullptr || pRangeSize > allocData->size) { continue; } pFlushRange = pRange; pFlushRangeSize = pRangeSize; if (NEO::L3Range::meetsMinimumAlignment(pRange) == false) { pFlushRange = alignDown(pRange, MemoryConstants::pageSize); } if (NEO::L3Range::meetsMinimumAlignment(pRangeSize) == false) { pFlushRangeSize = alignUp(pRangeSize, MemoryConstants::pageSize); } bool isRangeSharedBetweenTwoPages = (alignDown(pEndRange, MemoryConstants::pageSize) != pFlushRange); if (isRangeSharedBetweenTwoPages) { pFlushRangeSize += MemoryConstants::pageSize; } coverRangeExact(pFlushRange, pFlushRangeSize, subranges, GfxFamily::L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION); NEO::flushGpuCache(commandStream, subranges, postSyncAddressToFlush, hwInfo); } } } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/gen12lp/dg1/000077500000000000000000000000001422164147700236735ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/gen12lp/dg1/CMakeLists.txt000066400000000000000000000007371422164147700264420ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(SUPPORT_DG1) set(HW_SOURCES_GEN12LP ${HW_SOURCES_GEN12LP} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_dg1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue_dg1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_dg1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_dg1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler_dg1.cpp PARENT_SCOPE ) endif() compute-runtime-22.14.22890/level_zero/core/source/gen12lp/dg1/cmdlist_dg1.cpp000066400000000000000000000006301422164147700265700ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/gen12lp/cmdlist_gen12lp.h" namespace L0 { static CommandListPopulateFactory> populateDG1; static CommandListImmediatePopulateFactory> populateDG1Immediate; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/gen12lp/dg1/cmdqueue_dg1.cpp000066400000000000000000000010331422164147700267370ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen12lp/hw_cmds.h" #include "shared/source/gen12lp/hw_info.h" #include "level_zero/core/source/cmdqueue/cmdqueue_hw.inl" #include "level_zero/core/source/cmdqueue/cmdqueue_hw_base.inl" #include "cmdqueue_extended.inl" #include "igfxfmid.h" namespace L0 { template struct CommandQueueHw; static CommandQueuePopulateFactory> populateDG1; } // namespace L0compute-runtime-22.14.22890/level_zero/core/source/gen12lp/dg1/image_dg1.cpp000066400000000000000000000012221422164147700262110ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen12lp/hw_cmds.h" #include "shared/source/gen12lp/hw_info.h" #include "level_zero/core/source/image/image_hw.inl" namespace L0 { template <> struct ImageProductFamily : public ImageCoreFamily { using ImageCoreFamily::ImageCoreFamily; ze_result_t initialize(Device *device, const ze_image_desc_t *desc) override { return ImageCoreFamily::initialize(device, desc); }; }; static ImagePopulateFactory> populateDG1; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/gen12lp/dg1/kernel_dg1.cpp000066400000000000000000000004041422164147700264100ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/kernel/kernel_hw.h" namespace L0 { static KernelPopulateFactory> populateDG1; } // namespace L0compute-runtime-22.14.22890/level_zero/core/source/gen12lp/dg1/sampler_dg1.cpp000066400000000000000000000007721422164147700266030ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen12lp/hw_cmds.h" #include "shared/source/gen12lp/hw_info.h" #include "level_zero/core/source/sampler/sampler_hw.inl" namespace L0 { template <> struct SamplerProductFamily : public SamplerCoreFamily { using SamplerCoreFamily::SamplerCoreFamily; }; static SamplerPopulateFactory> populateDG1; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/gen12lp/enable_family_full_l0_gen12lp.cpp000066400000000000000000000007351422164147700314650ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen12lp/hw_cmds.h" #include "level_zero/core/source/helpers/l0_populate_factory.h" #include "level_zero/core/source/hw_helpers/l0_hw_helper.h" namespace NEO { using Family = TGLLPFamily; struct EnableL0Gen12LP { EnableL0Gen12LP() { L0::populateFactoryTable>(); } }; static EnableL0Gen12LP enable; } // namespace NEO compute-runtime-22.14.22890/level_zero/core/source/gen12lp/l0_hw_helper_gen12lp.cpp000066400000000000000000000015221422164147700276240ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/helpers/l0_populate_factory.h" #include "level_zero/core/source/hw_helpers/l0_hw_helper_base.inl" #include "level_zero/core/source/hw_helpers/l0_hw_helper_skl_and_later.inl" #include "hw_cmds.h" namespace L0 { using Family = NEO::TGLLPFamily; static auto gfxCore = IGFX_GEN12LP_CORE; template <> void populateFactoryTable>() { extern L0HwHelper *l0HwHelperFactory[IGFX_MAX_CORE]; l0HwHelperFactory[gfxCore] = &L0HwHelperHw::get(); } template <> bool L0HwHelperHw::isResumeWARequired() { return true; } // clang-format off #include "level_zero/core/source/hw_helpers/l0_hw_helper_tgllp_plus.inl" // clang-format on template class L0HwHelperHw; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/gen12lp/rkl/000077500000000000000000000000001422164147700240105ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/gen12lp/rkl/CMakeLists.txt000066400000000000000000000007321422164147700265520ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(SUPPORT_RKL) set(HW_SOURCES_GEN12LP ${HW_SOURCES_GEN12LP} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_rkl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue_rkl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_rkl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_rkl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler_rkl.cpp PARENT_SCOPE ) endif() compute-runtime-22.14.22890/level_zero/core/source/gen12lp/rkl/cmdlist_rkl.cpp000066400000000000000000000006561422164147700270320ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/gen12lp/cmdlist_gen12lp.h" namespace L0 { static CommandListPopulateFactory> populateRKL; static CommandListImmediatePopulateFactory> populateRKLImmediate; } // namespace L0compute-runtime-22.14.22890/level_zero/core/source/gen12lp/rkl/cmdqueue_rkl.cpp000066400000000000000000000007101422164147700271720ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/cmdqueue/cmdqueue_hw.inl" #include "level_zero/core/source/cmdqueue/cmdqueue_hw_base.inl" #include "cmdqueue_extended.inl" //#include "igfxfmid.h" namespace L0 { template struct CommandQueueHw; static CommandQueuePopulateFactory> populateRKL; } // namespace L0compute-runtime-22.14.22890/level_zero/core/source/gen12lp/rkl/image_rkl.cpp000066400000000000000000000011131422164147700264420ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/image/image_hw.inl" namespace L0 { template <> struct ImageProductFamily : public ImageCoreFamily { using ImageCoreFamily::ImageCoreFamily; ze_result_t initialize(Device *device, const ze_image_desc_t *desc) override { return ImageCoreFamily::initialize(device, desc); }; }; static ImagePopulateFactory> populateRKL; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/gen12lp/rkl/kernel_rkl.cpp000066400000000000000000000004061422164147700266440ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/kernel/kernel_hw.h" namespace L0 { static KernelPopulateFactory> populateRKL; } // namespace L0compute-runtime-22.14.22890/level_zero/core/source/gen12lp/rkl/sampler_rkl.cpp000066400000000000000000000006631422164147700270340ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/sampler/sampler_hw.inl" namespace L0 { template <> struct SamplerProductFamily : public SamplerCoreFamily { using SamplerCoreFamily::SamplerCoreFamily; }; static SamplerPopulateFactory> populateRKL; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/gen12lp/tgllp/000077500000000000000000000000001422164147700243425ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/gen12lp/tgllp/CMakeLists.txt000066400000000000000000000007531422164147700271070ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(SUPPORT_TGLLP) set(HW_SOURCES_GEN12LP ${HW_SOURCES_GEN12LP} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_tgllp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue_tgllp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_tgllp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_tgllp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler_tgllp.cpp PARENT_SCOPE ) endif() compute-runtime-22.14.22890/level_zero/core/source/gen12lp/tgllp/cmdlist_tgllp.cpp000066400000000000000000000006771422164147700277210ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/gen12lp/cmdlist_gen12lp.h" namespace L0 { static CommandListPopulateFactory> populateTGLLP; static CommandListImmediatePopulateFactory> populateTGLLPImmediate; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/gen12lp/tgllp/cmdqueue_tgllp.cpp000066400000000000000000000010471422164147700300620ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen12lp/hw_cmds.h" #include "shared/source/gen12lp/hw_info.h" #include "level_zero/core/source/cmdqueue/cmdqueue_hw.inl" #include "level_zero/core/source/cmdqueue/cmdqueue_hw_base.inl" #include "cmdqueue_extended.inl" #include "igfxfmid.h" namespace L0 { template struct CommandQueueHw; static CommandQueuePopulateFactory> populateTGLLP; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/gen12lp/tgllp/image_tgllp.cpp000066400000000000000000000012571422164147700273370ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen12lp/hw_cmds.h" #include "shared/source/gen12lp/hw_info.h" #include "level_zero/core/source/image/image_hw.inl" namespace L0 { template <> struct ImageProductFamily : public ImageCoreFamily { using ImageCoreFamily::ImageCoreFamily; ze_result_t initialize(Device *device, const ze_image_desc_t *desc) override { return ImageCoreFamily::initialize(device, desc); }; }; static ImagePopulateFactory> populateTGLLP; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/gen12lp/tgllp/kernel_tgllp.cpp000066400000000000000000000004201422164147700275240ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/kernel/kernel_hw.h" namespace L0 { static KernelPopulateFactory> populateTGLLP; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/gen12lp/tgllp/sampler_tgllp.cpp000066400000000000000000000010271422164147700277130ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen12lp/hw_cmds.h" #include "shared/source/gen12lp/hw_info.h" #include "level_zero/core/source/sampler/sampler_hw.inl" namespace L0 { template <> struct SamplerProductFamily : public SamplerCoreFamily { using SamplerCoreFamily::SamplerCoreFamily; }; static SamplerPopulateFactory> populateTGLLP; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/gen8/000077500000000000000000000000001422164147700226115ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/gen8/CMakeLists.txt000066400000000000000000000006651422164147700253600ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(SUPPORT_GEN8) set(HW_SOURCES_GEN8 ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/debugger_gen8.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enable_family_full_l0_gen8.cpp ) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${HW_SOURCES_GEN8}) set_property(GLOBAL PROPERTY L0_HW_SOURCES_GEN8 ${HW_SOURCES_GEN8}) endif() compute-runtime-22.14.22890/level_zero/core/source/gen8/debugger_gen8.cpp000066400000000000000000000005711422164147700260250ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/debugger/debugger_l0.inl" #include "level_zero/core/source/debugger/debugger_l0_base.inl" namespace NEO { struct BDWFamily; using GfxFamily = BDWFamily; } // namespace NEO namespace L0 { template class DebuggerL0Hw; } // namespace L0compute-runtime-22.14.22890/level_zero/core/source/gen8/enable_family_full_l0_gen8.cpp000066400000000000000000000004241422164147700304420ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen8/hw_cmds.h" namespace NEO { using Family = BDWFamily; struct EnableL0Gen8 { EnableL0Gen8() { } }; static EnableL0Gen8 enable; } // namespace NEO compute-runtime-22.14.22890/level_zero/core/source/gen9/000077500000000000000000000000001422164147700226125ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/gen9/CMakeLists.txt000066400000000000000000000012341422164147700253520ustar00rootroot00000000000000# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # if(SUPPORT_GEN9) set(HW_SOURCES_GEN9 ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_gen9.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_gen9.h ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_gen9.inl ${CMAKE_CURRENT_SOURCE_DIR}/debugger_gen9.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enable_family_full_l0_gen9.cpp ${CMAKE_CURRENT_SOURCE_DIR}/l0_hw_helper_gen9.cpp ) add_subdirectories() target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${HW_SOURCES_GEN9}) set_property(GLOBAL PROPERTY L0_HW_SOURCES_GEN9 ${HW_SOURCES_GEN9}) endif() compute-runtime-22.14.22890/level_zero/core/source/gen9/cfl/000077500000000000000000000000001422164147700233565ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/gen9/cfl/CMakeLists.txt000066400000000000000000000007311422164147700261170ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(SUPPORT_CFL) set(HW_SOURCES_GEN9 ${HW_SOURCES_GEN9} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_cfl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue_cfl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_cfl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_cfl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler_cfl.cpp PARENT_SCOPE ) endif() compute-runtime-22.14.22890/level_zero/core/source/gen9/cfl/cmdlist_cfl.cpp000066400000000000000000000006561422164147700263540ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/gen9/cmdlist_gen9.h" namespace L0 { static CommandListPopulateFactory> populateCFL; static CommandListImmediatePopulateFactory> populateCFLImmediate; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/gen9/cfl/cmdqueue_cfl.cpp000066400000000000000000000010231422164147700265120ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds.h" #include "shared/source/gen9/hw_info.h" #include "level_zero/core/source/cmdqueue/cmdqueue_hw.inl" #include "level_zero/core/source/cmdqueue/cmdqueue_hw_base.inl" #include "cmdqueue_extended.inl" #include "igfxfmid.h" namespace L0 { template struct CommandQueueHw; static CommandQueuePopulateFactory> populateCFL; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/gen9/cfl/image_cfl.cpp000066400000000000000000000007661422164147700260010ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds.h" #include "shared/source/gen9/hw_info.h" #include "level_zero/core/source/image/image_hw.inl" namespace L0 { template <> struct ImageProductFamily : public ImageCoreFamily { using ImageCoreFamily::ImageCoreFamily; }; static ImagePopulateFactory> populateCFL; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/gen9/cfl/kernel_cfl.cpp000066400000000000000000000004111422164147700261620ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/kernel/kernel_hw.h" namespace L0 { static KernelPopulateFactory> populateCFL; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/gen9/cfl/sampler_cfl.cpp000066400000000000000000000010061422164147700263460ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds.h" #include "shared/source/gen9/hw_info.h" #include "level_zero/core/source/sampler/sampler_hw.inl" namespace L0 { template <> struct SamplerProductFamily : public SamplerCoreFamily { using SamplerCoreFamily::SamplerCoreFamily; }; static SamplerPopulateFactory> populateCFL; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/gen9/cmdlist_gen9.cpp000066400000000000000000000011731422164147700257010ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/gen9/cmdlist_gen9.inl" #include "level_zero/core/source/cmdlist/cmdlist_hw.h" #include "level_zero/core/source/cmdlist/cmdlist_hw.inl" #include "level_zero/core/source/cmdlist/cmdlist_hw_base.inl" #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h" #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl" #include "cmdlist_extended.inl" namespace L0 { template struct CommandListCoreFamily; template struct CommandListCoreFamilyImmediate; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/gen9/cmdlist_gen9.h000066400000000000000000000020221422164147700253400ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_container/command_encoder.h" #include "shared/source/gen9/hw_cmds.h" #include "shared/source/gen9/hw_info.h" #include "level_zero/core/source/cmdlist/cmdlist_hw.h" #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h" #include "igfxfmid.h" namespace L0 { template struct CommandListProductFamily : public CommandListCoreFamily { using CommandListCoreFamily::CommandListCoreFamily; void programL3(bool isSLMused) override { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; NEO::EncodeL3State::encode(commandContainer, isSLMused); } }; template struct CommandListImmediateProductFamily : public CommandListCoreFamilyImmediate { using CommandListCoreFamilyImmediate::CommandListCoreFamilyImmediate; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/gen9/cmdlist_gen9.inl000066400000000000000000000017611422164147700257040ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/gen9/hw_cmds.h" #include "shared/source/gen9/hw_info.h" #include "shared/source/helpers/pipe_control_args.h" #include "level_zero/core/source/cmdlist/cmdlist_hw.h" namespace L0 { template void CommandListCoreFamily::applyMemoryRangesBarrier(uint32_t numRanges, const size_t *pRangeSizes, const void **pRanges) { const auto &hwInfo = this->device->getHwInfo(); NEO::PipeControlArgs args; args.dcFlushEnable = NEO::MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo); NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/gen9/debugger_gen9.cpp000066400000000000000000000007071422164147700260300ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/debugger/debugger_l0.inl" #include "level_zero/core/source/debugger/debugger_l0_base.inl" namespace NEO { struct SKLFamily; using GfxFamily = SKLFamily; } // namespace NEO namespace L0 { template class DebuggerL0Hw; static DebuggerL0PopulateFactory debuggerGen9; } // namespace L0compute-runtime-22.14.22890/level_zero/core/source/gen9/enable_family_full_l0_gen9.cpp000066400000000000000000000007171422164147700304510ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds.h" #include "level_zero/core/source/helpers/l0_populate_factory.h" #include "level_zero/core/source/hw_helpers/l0_hw_helper.h" namespace NEO { using Family = SKLFamily; struct EnableL0Gen9 { EnableL0Gen9() { L0::populateFactoryTable>(); } }; static EnableL0Gen9 enable; } // namespace NEO compute-runtime-22.14.22890/level_zero/core/source/gen9/kbl/000077500000000000000000000000001422164147700233625ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/gen9/kbl/CMakeLists.txt000066400000000000000000000007311422164147700261230ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(SUPPORT_KBL) set(HW_SOURCES_GEN9 ${HW_SOURCES_GEN9} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_kbl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue_kbl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_kbl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_kbl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler_kbl.cpp PARENT_SCOPE ) endif() compute-runtime-22.14.22890/level_zero/core/source/gen9/kbl/cmdlist_kbl.cpp000066400000000000000000000006451422164147700263620ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/gen9/cmdlist_gen9.h" namespace L0 { static CommandListPopulateFactory> populateKBL; static CommandListImmediatePopulateFactory> populateKBLImmediate; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/gen9/kbl/cmdqueue_kbl.cpp000066400000000000000000000010211422164147700265200ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds.h" #include "shared/source/gen9/hw_info.h" #include "level_zero/core/source/cmdqueue/cmdqueue_hw.inl" #include "level_zero/core/source/cmdqueue/cmdqueue_hw_base.inl" #include "cmdqueue_extended.inl" #include "igfxfmid.h" namespace L0 { template struct CommandQueueHw; static CommandQueuePopulateFactory> populateKBL; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/gen9/kbl/image_kbl.cpp000066400000000000000000000007601422164147700260030ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds.h" #include "shared/source/gen9/hw_info.h" #include "level_zero/core/source/image/image_hw.inl" namespace L0 { template <> struct ImageProductFamily : public ImageCoreFamily { using ImageCoreFamily::ImageCoreFamily; }; static ImagePopulateFactory> populateKBL; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/gen9/kbl/kernel_kbl.cpp000066400000000000000000000004071422164147700261770ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/kernel/kernel_hw.h" namespace L0 { static KernelPopulateFactory> populateKBL; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/gen9/kbl/sampler_kbl.cpp000066400000000000000000000010001422164147700263500ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds.h" #include "shared/source/gen9/hw_info.h" #include "level_zero/core/source/sampler/sampler_hw.inl" namespace L0 { template <> struct SamplerProductFamily : public SamplerCoreFamily { using SamplerCoreFamily::SamplerCoreFamily; }; static SamplerPopulateFactory> populateKBL; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/gen9/l0_hw_helper_gen9.cpp000066400000000000000000000012121422164147700266040ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/helpers/l0_populate_factory.h" #include "level_zero/core/source/hw_helpers/l0_hw_helper_base.inl" #include "level_zero/core/source/hw_helpers/l0_hw_helper_skl_and_later.inl" #include "hw_cmds.h" namespace L0 { using Family = NEO::SKLFamily; static auto gfxCore = IGFX_GEN9_CORE; template <> void populateFactoryTable>() { extern L0HwHelper *l0HwHelperFactory[IGFX_MAX_CORE]; l0HwHelperFactory[gfxCore] = &L0HwHelperHw::get(); } template class L0HwHelperHw; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/gen9/skl/000077500000000000000000000000001422164147700234035ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/gen9/skl/CMakeLists.txt000066400000000000000000000007311422164147700261440ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(SUPPORT_SKL) set(HW_SOURCES_GEN9 ${HW_SOURCES_GEN9} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_skl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue_skl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_skl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_skl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler_skl.cpp PARENT_SCOPE ) endif() compute-runtime-22.14.22890/level_zero/core/source/gen9/skl/cmdlist_skl.cpp000066400000000000000000000006431422164147700264220ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/gen9/cmdlist_gen9.h" namespace L0 { static CommandListPopulateFactory> populateSKL; static CommandListImmediatePopulateFactory> populateSKLImmediate; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/gen9/skl/cmdqueue_skl.cpp000066400000000000000000000010201422164147700265610ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds.h" #include "shared/source/gen9/hw_info.h" #include "level_zero/core/source/cmdqueue/cmdqueue_hw.inl" #include "level_zero/core/source/cmdqueue/cmdqueue_hw_base.inl" #include "cmdqueue_extended.inl" #include "igfxfmid.h" namespace L0 { template struct CommandQueueHw; static CommandQueuePopulateFactory> populateSKL; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/gen9/skl/image_skl.cpp000066400000000000000000000007551422164147700260510ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds.h" #include "shared/source/gen9/hw_info.h" #include "level_zero/core/source/image/image_hw.inl" namespace L0 { template <> struct ImageProductFamily : public ImageCoreFamily { using ImageCoreFamily::ImageCoreFamily; }; static ImagePopulateFactory> populateSKL; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/gen9/skl/kernel_skl.cpp000066400000000000000000000004061422164147700262400ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/kernel/kernel_hw.h" namespace L0 { static KernelPopulateFactory> populateSKL; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/gen9/skl/sampler_skl.cpp000066400000000000000000000007751422164147700264340ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds.h" #include "shared/source/gen9/hw_info.h" #include "level_zero/core/source/sampler/sampler_hw.inl" namespace L0 { template <> struct SamplerProductFamily : public SamplerCoreFamily { using SamplerCoreFamily::SamplerCoreFamily; }; static SamplerPopulateFactory> populateSKL; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/get_extension_function_lookup_map.cpp000066400000000000000000000005061422164147700314630ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/get_extension_function_lookup_map.h" namespace L0 { std::unordered_map getExtensionFunctionsLookupMap() { return std::unordered_map(); } } // namespace L0compute-runtime-22.14.22890/level_zero/core/source/get_extension_function_lookup_map.h000066400000000000000000000003541422164147700311310ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include #include namespace L0 { std::unordered_map getExtensionFunctionsLookupMap(); } // namespace L0compute-runtime-22.14.22890/level_zero/core/source/helpers/000077500000000000000000000000001422164147700234125ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/helpers/allocation_extensions.cpp000066400000000000000000000041341422164147700305240ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/helpers/allocation_extensions.h" #include "shared/source/helpers/memory_properties_helpers.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "level_zero/core/source/driver/driver_handle_imp.h" namespace L0 { ze_result_t handleAllocationExtensions(NEO::GraphicsAllocation *alloc, ze_memory_type_t type, void *pNext, struct DriverHandleImp *driverHandle) { if (pNext != nullptr) { ze_base_properties_t *extendedProperties = reinterpret_cast(pNext); if (extendedProperties->stype == ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_FD) { ze_external_memory_export_fd_t *extendedMemoryExportProperties = reinterpret_cast(extendedProperties); if (extendedMemoryExportProperties->flags & ZE_EXTERNAL_MEMORY_TYPE_FLAG_OPAQUE_FD) { return ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } if (type != ZE_MEMORY_TYPE_DEVICE) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } uint64_t handle = alloc->peekInternalHandle(driverHandle->getMemoryManager()); extendedMemoryExportProperties->fd = static_cast(handle); } else if (extendedProperties->stype == ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_WIN32) { ze_external_memory_export_win32_handle_t *exportStructure = reinterpret_cast(extendedProperties); if (exportStructure->flags != ZE_EXTERNAL_MEMORY_TYPE_FLAG_OPAQUE_WIN32) { return ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } uint64_t handle = alloc->peekInternalHandle(driverHandle->getMemoryManager()); exportStructure->handle = reinterpret_cast(handle); } else { return ZE_RESULT_ERROR_INVALID_ENUMERATION; } } return ZE_RESULT_SUCCESS; } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/helpers/allocation_extensions.h000066400000000000000000000006511422164147700301710ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/memory_manager.h" #include "level_zero/core/source/driver/driver_handle_imp.h" namespace L0 { ze_result_t handleAllocationExtensions(NEO::GraphicsAllocation *alloc, ze_memory_type_t type, void *pNext, struct DriverHandleImp *driverHandle); } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/helpers/api_specific_config_l0.cpp000066400000000000000000000020661422164147700304600ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/api_specific_config.h" #include "level_zero/core/source/compiler_interface/l0_reg_path.h" namespace NEO { bool ApiSpecificConfig::isStatelessCompressionSupported() { return false; } bool ApiSpecificConfig::getHeapConfiguration() { return DebugManager.flags.UseExternalAllocatorForSshAndDsh.get(); } bool ApiSpecificConfig::getBindlessConfiguration() { if (DebugManager.flags.UseBindlessMode.get() != -1) { return DebugManager.flags.UseBindlessMode.get(); } else { return false; } } ApiSpecificConfig::ApiType ApiSpecificConfig::getApiType() { return ApiSpecificConfig::L0; } std::string ApiSpecificConfig::getName() { return "l0"; } uint64_t ApiSpecificConfig::getReducedMaxAllocSize(uint64_t maxAllocSize) { return maxAllocSize; } const char *ApiSpecificConfig::getRegistryPath() { return L0::registryPath; } } // namespace NEO compute-runtime-22.14.22890/level_zero/core/source/helpers/implicit_scaling_l0.cpp000066400000000000000000000004201422164147700300170ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/implicit_scaling.h" namespace NEO { namespace ImplicitScaling { bool apiSupport = true; } // namespace ImplicitScaling } // namespace NEO compute-runtime-22.14.22890/level_zero/core/source/helpers/l0_populate_factory.h000066400000000000000000000003501422164147700275340ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once namespace L0 { // Method called by global factory enabler template void populateFactoryTable(); } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/helpers/properties_parser.h000066400000000000000000000146261422164147700273440ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/surface_format_info.h" #include #include namespace L0 { inline NEO::ImageType convertType(const ze_image_type_t type) { switch (type) { case ZE_IMAGE_TYPE_2D: return NEO::ImageType::Image2D; case ZE_IMAGE_TYPE_3D: return NEO::ImageType::Image3D; case ZE_IMAGE_TYPE_2DARRAY: return NEO::ImageType::Image2DArray; case ZE_IMAGE_TYPE_1D: return NEO::ImageType::Image1D; case ZE_IMAGE_TYPE_1DARRAY: return NEO::ImageType::Image1DArray; case ZE_IMAGE_TYPE_BUFFER: return NEO::ImageType::Image1DBuffer; default: break; } return NEO::ImageType::Invalid; } inline NEO::ImageDescriptor convertDescriptor(const ze_image_desc_t &imageDesc) { NEO::ImageDescriptor desc = {}; desc.fromParent = false; desc.imageArraySize = imageDesc.arraylevels; desc.imageDepth = imageDesc.depth; desc.imageHeight = imageDesc.height; desc.imageRowPitch = 0u; desc.imageSlicePitch = 0u; desc.imageType = convertType(imageDesc.type); desc.imageWidth = imageDesc.width; desc.numMipLevels = imageDesc.miplevels; desc.numSamples = 0u; return desc; } struct StructuresLookupTable { struct ImageProperties { NEO::ImageDescriptor imageDescriptor; uint32_t planeIndex; bool isPlanarExtension; } imageProperties; struct SharedHandleType { void *ntHnadle; int fd; bool isSupportedHandle; bool isDMABUFHandle; bool isNTHandle; } sharedHandleType; bool areImageProperties; bool exportMemory; bool isSharedHandle; bool relaxedSizeAllowed; bool compressedHint; bool uncompressedHint; }; inline ze_result_t prepareL0StructuresLookupTable(StructuresLookupTable &lookupTable, const void *desc) { const ze_base_desc_t *extendedDesc = reinterpret_cast(desc); while (extendedDesc) { if (extendedDesc->stype == ZE_STRUCTURE_TYPE_IMAGE_DESC) { const ze_image_desc_t *imageDesc = reinterpret_cast(extendedDesc); lookupTable.areImageProperties = true; lookupTable.imageProperties.imageDescriptor = convertDescriptor(*imageDesc); } else if (extendedDesc->stype == ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMPORT_FD) { lookupTable.isSharedHandle = true; const ze_external_memory_import_fd_t *linuxExternalMemoryImportDesc = reinterpret_cast(extendedDesc); if (linuxExternalMemoryImportDesc->flags == ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF) { lookupTable.sharedHandleType.isSupportedHandle = true; lookupTable.sharedHandleType.isDMABUFHandle = true; lookupTable.sharedHandleType.fd = linuxExternalMemoryImportDesc->fd; } else { lookupTable.sharedHandleType.isSupportedHandle = false; return ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } } else if (extendedDesc->stype == ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMPORT_WIN32) { lookupTable.isSharedHandle = true; const ze_external_memory_import_win32_handle_t *windowsExternalMemoryImportDesc = reinterpret_cast(extendedDesc); if (windowsExternalMemoryImportDesc->flags == ZE_EXTERNAL_MEMORY_TYPE_FLAG_OPAQUE_WIN32) { lookupTable.sharedHandleType.isSupportedHandle = true; lookupTable.sharedHandleType.isNTHandle = true; lookupTable.sharedHandleType.ntHnadle = windowsExternalMemoryImportDesc->handle; } else { lookupTable.sharedHandleType.isSupportedHandle = false; return ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } } else if (extendedDesc->stype == ZE_STRUCTURE_TYPE_IMAGE_VIEW_PLANAR_EXP_DESC) { const ze_image_view_planar_exp_desc_t *imageViewDesc = reinterpret_cast(extendedDesc); lookupTable.areImageProperties = true; lookupTable.imageProperties.isPlanarExtension = true; lookupTable.imageProperties.planeIndex = imageViewDesc->planeIndex; } else if (extendedDesc->stype == ZE_STRUCTURE_TYPE_RELAXED_ALLOCATION_LIMITS_EXP_DESC) { const ze_relaxed_allocation_limits_exp_desc_t *relaxedLimitsDesc = reinterpret_cast(extendedDesc); if (!(relaxedLimitsDesc->flags & ZE_RELAXED_ALLOCATION_LIMITS_EXP_FLAG_MAX_SIZE)) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } lookupTable.relaxedSizeAllowed = true; } else if (extendedDesc->stype == ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_DESC) { const ze_external_memory_export_desc_t *externalMemoryExportDesc = reinterpret_cast(extendedDesc); if (externalMemoryExportDesc->flags & ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF || externalMemoryExportDesc->flags & ZE_EXTERNAL_MEMORY_TYPE_FLAG_OPAQUE_WIN32) { lookupTable.exportMemory = true; } else { return ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } } else if (extendedDesc->stype == ZE_STRUCTURE_TYPE_MEMORY_COMPRESSION_HINTS_EXT_DESC) { auto memoryCompressionHintsDesc = reinterpret_cast(extendedDesc); if (memoryCompressionHintsDesc->flags == ZE_MEMORY_COMPRESSION_HINTS_EXT_FLAG_COMPRESSED) { lookupTable.compressedHint = true; } else if (memoryCompressionHintsDesc->flags == ZE_MEMORY_COMPRESSION_HINTS_EXT_FLAG_UNCOMPRESSED) { lookupTable.uncompressedHint = true; } else { return ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } } else { return ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } extendedDesc = reinterpret_cast(extendedDesc->pNext); } if (lookupTable.areImageProperties && lookupTable.exportMemory) { return ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } return ZE_RESULT_SUCCESS; } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/hw_helpers/000077500000000000000000000000001422164147700241105ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/hw_helpers/l0_hw_helper.cpp000066400000000000000000000005121422164147700271620ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/hw_helpers/l0_hw_helper.h" namespace L0 { L0HwHelper *l0HwHelperFactory[IGFX_MAX_CORE] = {}; L0HwHelper &L0HwHelper::get(GFXCORE_FAMILY gfxCore) { return *l0HwHelperFactory[gfxCore]; } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/hw_helpers/l0_hw_helper.h000066400000000000000000000051741422164147700266400ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include #include "igfxfmid.h" #include #include namespace NEO { enum class EngineGroupType : uint32_t; struct HardwareInfo; } // namespace NEO namespace L0 { struct Event; struct Device; struct EventPool; class L0HwHelper { public: static L0HwHelper &get(GFXCORE_FAMILY gfxCore); virtual void setAdditionalGroupProperty(ze_command_queue_group_properties_t &groupProperty, NEO::EngineGroupType groupType) const = 0; virtual L0::Event *createEvent(L0::EventPool *eventPool, const ze_event_desc_t *desc, L0::Device *device) const = 0; virtual bool isResumeWARequired() = 0; virtual bool imageCompressionSupported(const NEO::HardwareInfo &hwInfo) const = 0; virtual bool usmCompressionSupported(const NEO::HardwareInfo &hwInfo) const = 0; virtual bool forceDefaultUsmCompressionSupport() const = 0; virtual bool isIpSamplingSupported(const NEO::HardwareInfo &hwInfo) const = 0; virtual void getAttentionBitmaskForSingleThreads(std::vector &threads, const NEO::HardwareInfo &hwInfo, std::unique_ptr &bitmask, size_t &bitmaskSize) const = 0; virtual std::vector getThreadsFromAttentionBitmask(const NEO::HardwareInfo &hwInfo, const uint8_t *bitmask, const size_t bitmaskSize) const = 0; protected: L0HwHelper() = default; }; template class L0HwHelperHw : public L0HwHelper { public: static L0HwHelper &get() { static L0HwHelperHw l0HwHelper; return l0HwHelper; } void setAdditionalGroupProperty(ze_command_queue_group_properties_t &groupProperty, NEO::EngineGroupType groupType) const override; L0::Event *createEvent(L0::EventPool *eventPool, const ze_event_desc_t *desc, L0::Device *device) const override; L0HwHelperHw() = default; bool isResumeWARequired() override; bool imageCompressionSupported(const NEO::HardwareInfo &hwInfo) const override; bool usmCompressionSupported(const NEO::HardwareInfo &hwInfo) const override; bool forceDefaultUsmCompressionSupport() const override; bool isIpSamplingSupported(const NEO::HardwareInfo &hwInfo) const override; void getAttentionBitmaskForSingleThreads(std::vector &threads, const NEO::HardwareInfo &hwInfo, std::unique_ptr &bitmask, size_t &bitmaskSize) const override; std::vector getThreadsFromAttentionBitmask(const NEO::HardwareInfo &hwInfo, const uint8_t *bitmask, const size_t bitmaskSize) const override; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/hw_helpers/l0_hw_helper_base.inl000066400000000000000000000116031422164147700301570ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/event/event.h" #include "level_zero/core/source/hw_helpers/l0_hw_helper.h" namespace L0 { template L0::Event *L0HwHelperHw::createEvent(L0::EventPool *eventPool, const ze_event_desc_t *desc, L0::Device *device) const { if (NEO::DebugManager.flags.OverrideTimestampPacketSize.get() != -1) { if (NEO::DebugManager.flags.OverrideTimestampPacketSize.get() == 4) { return Event::create(eventPool, desc, device); } else if (NEO::DebugManager.flags.OverrideTimestampPacketSize.get() == 8) { return Event::create(eventPool, desc, device); } else { UNRECOVERABLE_IF(true); } } return Event::create(eventPool, desc, device); } template bool L0HwHelperHw::isResumeWARequired() { return false; } template void L0HwHelperHw::getAttentionBitmaskForSingleThreads(std::vector &threads, const NEO::HardwareInfo &hwInfo, std::unique_ptr &bitmask, size_t &bitmaskSize) const { const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported; const uint32_t numEuPerSubslice = hwInfo.gtSystemInfo.MaxEuPerSubSlice; const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount); const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8; const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu; bitmaskSize = hwInfo.gtSystemInfo.MaxSubSlicesSupported * hwInfo.gtSystemInfo.MaxEuPerSubSlice * bytesPerEu; bitmask = std::make_unique(bitmaskSize); memset(bitmask.get(), 0, bitmaskSize); for (auto &thread : threads) { uint8_t *sliceData = ptrOffset(bitmask.get(), threadsSizePerSlice * thread.slice); uint8_t *subsliceData = ptrOffset(sliceData, numEuPerSubslice * bytesPerEu * thread.subslice); uint8_t *euData = ptrOffset(subsliceData, bytesPerEu * thread.eu); UNRECOVERABLE_IF(thread.thread > 7); *euData |= (1 << thread.thread); } } template std::vector L0HwHelperHw::getThreadsFromAttentionBitmask(const NEO::HardwareInfo &hwInfo, const uint8_t *bitmask, const size_t bitmaskSize) const { const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported; const uint32_t numEuPerSubslice = hwInfo.gtSystemInfo.MaxEuPerSubSlice; const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount); const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8; const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu; const uint32_t threadsSizePerSubSlice = numEuPerSubslice * bytesPerEu; UNRECOVERABLE_IF(bytesPerEu != 1); std::vector threads; for (uint32_t slice = 0; slice < hwInfo.gtSystemInfo.MaxSlicesSupported; slice++) { for (uint32_t subslice = 0; subslice < numSubslicesPerSlice; subslice++) { for (uint32_t eu = 0; eu < hwInfo.gtSystemInfo.MaxEuPerSubSlice; eu++) { size_t offset = slice * threadsSizePerSlice + subslice * threadsSizePerSubSlice + eu * bytesPerEu; if (offset >= bitmaskSize) { return threads; } std::bitset<8> bits(bitmask[offset]); for (uint32_t i = 0; i < 8; i++) { if (bits.test(i)) { threads.emplace_back(ze_device_thread_t{slice, subslice, eu, i}); } } } } } return threads; } template bool L0HwHelperHw::imageCompressionSupported(const NEO::HardwareInfo &hwInfo) const { if (NEO::DebugManager.flags.RenderCompressedImagesEnabled.get() != -1) { return !!NEO::DebugManager.flags.RenderCompressedImagesEnabled.get(); } return false; } template bool L0HwHelperHw::usmCompressionSupported(const NEO::HardwareInfo &hwInfo) const { if (NEO::DebugManager.flags.RenderCompressedBuffersEnabled.get() != -1) { return !!NEO::DebugManager.flags.RenderCompressedBuffersEnabled.get(); } return false; } template bool L0HwHelperHw::forceDefaultUsmCompressionSupport() const { return false; } template bool L0HwHelperHw::isIpSamplingSupported(const NEO::HardwareInfo &hwInfo) const { return false; } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/hw_helpers/l0_hw_helper_pvc_and_later.inl000066400000000000000000000017641422164147700320550ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/definitions/engine_group_types.h" #include "shared/source/helpers/engine_node_helper.h" #include "level_zero/core/source/hw_helpers/l0_hw_helper.h" #include namespace L0 { template void L0HwHelperHw::setAdditionalGroupProperty(ze_command_queue_group_properties_t &groupProperty, NEO::EngineGroupType groupType) const { if (groupType == NEO::EngineGroupType::LinkedCopy) { groupProperty.flags = ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY; groupProperty.maxMemoryFillPatternSize = sizeof(uint8_t); } if (groupType == NEO::EngineGroupType::Copy && NEO::EngineHelpers::isBcsVirtualEngineEnabled()) { groupProperty.flags = ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY; groupProperty.maxMemoryFillPatternSize = sizeof(uint8_t); } } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/hw_helpers/l0_hw_helper_skl_and_later.inl000066400000000000000000000005601422164147700320470ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/hw_helpers/l0_hw_helper.h" namespace L0 { template void L0HwHelperHw::setAdditionalGroupProperty(ze_command_queue_group_properties_t &groupProperty, NEO::EngineGroupType groupType) const { } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/hw_helpers/l0_hw_helper_tgllp_plus.inl000066400000000000000000000070511422164147700314340ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ template <> void L0HwHelperHw::getAttentionBitmaskForSingleThreads(std::vector &threads, const NEO::HardwareInfo &hwInfo, std::unique_ptr &bitmask, size_t &bitmaskSize) const { const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported; const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount); const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8; const uint32_t numEuPerSubslice = std::min(hwInfo.gtSystemInfo.MaxEuPerSubSlice, 8u); const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu; const uint32_t eusPerRow = 4; const uint32_t numberOfRows = 2; bitmaskSize = hwInfo.gtSystemInfo.MaxSubSlicesSupported * numEuPerSubslice * bytesPerEu; bitmask = std::make_unique(bitmaskSize); memset(bitmask.get(), 0, bitmaskSize); for (auto &thread : threads) { uint8_t *sliceData = ptrOffset(bitmask.get(), threadsSizePerSlice * thread.slice); uint8_t *subsliceData = ptrOffset(sliceData, numEuPerSubslice * bytesPerEu * thread.subslice); auto eu = thread.eu % eusPerRow; auto dualEu = thread.eu / (numberOfRows * eusPerRow); uint8_t *euData = ptrOffset(subsliceData, bytesPerEu * (eu + dualEu * eusPerRow)); UNRECOVERABLE_IF(thread.thread > 7); *euData |= (1 << thread.thread); } } template <> std::vector L0HwHelperHw::getThreadsFromAttentionBitmask(const NEO::HardwareInfo &hwInfo, const uint8_t *bitmask, const size_t bitmaskSize) const { const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported; const uint32_t numEuPerSubslice = std::min(hwInfo.gtSystemInfo.MaxEuPerSubSlice, 8u); const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount); const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8; const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu; const uint32_t threadsSizePerSubSlice = numEuPerSubslice * bytesPerEu; const uint32_t eusPerRow = 4; const uint32_t numberOfRows = 2; UNRECOVERABLE_IF(bytesPerEu != 1); std::vector threads; for (uint32_t slice = 0; slice < hwInfo.gtSystemInfo.MaxSlicesSupported; slice++) { for (uint32_t subslice = 0; subslice < numSubslicesPerSlice; subslice++) { size_t subSliceOffset = slice * threadsSizePerSlice + subslice * threadsSizePerSubSlice; for (uint32_t dualEu = 0; dualEu < numberOfRows; dualEu++) { for (uint32_t euIndex = 0; euIndex < eusPerRow; euIndex++) { auto offset = subSliceOffset + euIndex + dualEu * eusPerRow; if (offset >= bitmaskSize) { return threads; } std::bitset<8> bits(bitmask[offset]); for (uint32_t i = 0; i < 8; i++) { if (bits.test(i)) { threads.emplace_back(ze_device_thread_t{slice, subslice, euIndex + numEuPerSubslice * dualEu, i}); threads.emplace_back(ze_device_thread_t{slice, subslice, euIndex + eusPerRow + numEuPerSubslice * dualEu, i}); } } } } } } return threads; } compute-runtime-22.14.22890/level_zero/core/source/image/000077500000000000000000000000001422164147700230325ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/image/image.h000066400000000000000000000034611422164147700242710ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_container/cmdcontainer.h" #include "level_zero/core/source/device/device.h" #include struct _ze_image_handle_t {}; namespace NEO { struct ImageInfo; struct ImageDescriptor; } // namespace NEO namespace L0 { struct Image : _ze_image_handle_t { template struct Allocator { static Image *allocate() { return new Type(); } }; virtual ~Image() = default; virtual ze_result_t destroy() = 0; static ze_result_t create(uint32_t productFamily, Device *device, const ze_image_desc_t *desc, Image **pImage); virtual ze_result_t createView(Device *device, const ze_image_desc_t *desc, ze_image_handle_t *pImage) = 0; virtual NEO::GraphicsAllocation *getAllocation() = 0; virtual void copySurfaceStateToSSH(void *surfaceStateHeap, const uint32_t surfaceStateOffset, bool isMediaBlockArg) = 0; virtual void copyRedescribedSurfaceStateToSSH(void *surfaceStateHeap, const uint32_t surfaceStateOffset) = 0; virtual NEO::ImageInfo getImageInfo() = 0; virtual ze_image_desc_t getImageDesc() = 0; virtual ze_result_t getMemoryProperties(ze_image_memory_properties_exp_t *pMemoryProperties) = 0; static Image *fromHandle(ze_image_handle_t handle) { return static_cast(handle); } inline ze_image_handle_t toHandle() { return this; } }; using ImageAllocatorFn = Image *(*)(); extern ImageAllocatorFn imageFactory[]; template struct ImagePopulateFactory { ImagePopulateFactory() { imageFactory[productFamily] = Image::Allocator::allocate; } }; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/image/image_format_desc_helper.cpp000066400000000000000000000113731422164147700305320ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/image/image_format_desc_helper.h" #include "third_party/opencl_headers/CL/cl_ext_intel.h" namespace L0 { cl_channel_type getClChannelDataType(const ze_image_format_t &imgDescription) { switch (imgDescription.layout) { case ZE_IMAGE_FORMAT_LAYOUT_8: case ZE_IMAGE_FORMAT_LAYOUT_8_8: case ZE_IMAGE_FORMAT_LAYOUT_8_8_8_8: if (imgDescription.type == ZE_IMAGE_FORMAT_TYPE_UINT) return CL_UNSIGNED_INT8; if (imgDescription.type == ZE_IMAGE_FORMAT_TYPE_SINT) return CL_SIGNED_INT8; if (imgDescription.type == ZE_IMAGE_FORMAT_TYPE_UNORM) return CL_UNORM_INT8; if (imgDescription.type == ZE_IMAGE_FORMAT_TYPE_SNORM) return CL_SNORM_INT8; break; case ZE_IMAGE_FORMAT_LAYOUT_16: case ZE_IMAGE_FORMAT_LAYOUT_16_16: case ZE_IMAGE_FORMAT_LAYOUT_16_16_16_16: if (imgDescription.type == ZE_IMAGE_FORMAT_TYPE_UINT) return CL_UNSIGNED_INT16; if (imgDescription.type == ZE_IMAGE_FORMAT_TYPE_SINT) return CL_SIGNED_INT16; if (imgDescription.type == ZE_IMAGE_FORMAT_TYPE_UNORM) return CL_UNORM_INT16; if (imgDescription.type == ZE_IMAGE_FORMAT_TYPE_SNORM) return CL_SNORM_INT16; return CL_HALF_FLOAT; [[fallthrough]]; case ZE_IMAGE_FORMAT_LAYOUT_32: case ZE_IMAGE_FORMAT_LAYOUT_32_32: case ZE_IMAGE_FORMAT_LAYOUT_32_32_32_32: if (imgDescription.type == ZE_IMAGE_FORMAT_TYPE_UINT) return CL_UNSIGNED_INT32; if (imgDescription.type == ZE_IMAGE_FORMAT_TYPE_SINT) return CL_SIGNED_INT32; if (imgDescription.type == ZE_IMAGE_FORMAT_TYPE_FLOAT) return CL_FLOAT; break; case ZE_IMAGE_FORMAT_LAYOUT_10_10_10_2: if (imgDescription.type == ZE_IMAGE_FORMAT_TYPE_UNORM) return CL_UNORM_INT_101010_2; break; case ZE_IMAGE_FORMAT_LAYOUT_5_6_5: if (imgDescription.type == ZE_IMAGE_FORMAT_TYPE_UNORM) return CL_UNORM_SHORT_565; break; case ZE_IMAGE_FORMAT_LAYOUT_5_5_5_1: if (imgDescription.type == ZE_IMAGE_FORMAT_TYPE_UNORM) return CL_UNORM_SHORT_555; break; case ZE_IMAGE_FORMAT_LAYOUT_NV12: if (imgDescription.type == ZE_IMAGE_FORMAT_TYPE_UNORM) return CL_NV12_INTEL; break; case ZE_IMAGE_FORMAT_LAYOUT_YUYV: if (imgDescription.type == ZE_IMAGE_FORMAT_TYPE_UNORM) return CL_YUYV_INTEL; break; case ZE_IMAGE_FORMAT_LAYOUT_VYUY: if (imgDescription.type == ZE_IMAGE_FORMAT_TYPE_UNORM) return CL_VYUY_INTEL; break; case ZE_IMAGE_FORMAT_LAYOUT_YVYU: if (imgDescription.type == ZE_IMAGE_FORMAT_TYPE_UNORM) return CL_YVYU_INTEL; break; case ZE_IMAGE_FORMAT_LAYOUT_UYVY: if (imgDescription.type == ZE_IMAGE_FORMAT_TYPE_UNORM) return CL_UYVY_INTEL; break; default: break; } return CL_INVALID_VALUE; } cl_channel_order getClChannelOrder(const ze_image_format_t &imgDescription) { swizzles imgSwizzles{imgDescription.x, imgDescription.y, imgDescription.z, imgDescription.w}; if (imgSwizzles == swizzles{ZE_IMAGE_FORMAT_SWIZZLE_R, ZE_IMAGE_FORMAT_SWIZZLE_0, ZE_IMAGE_FORMAT_SWIZZLE_0, ZE_IMAGE_FORMAT_SWIZZLE_1}) return CL_R; if (imgSwizzles == swizzles{ZE_IMAGE_FORMAT_SWIZZLE_0, ZE_IMAGE_FORMAT_SWIZZLE_0, ZE_IMAGE_FORMAT_SWIZZLE_0, ZE_IMAGE_FORMAT_SWIZZLE_A}) return CL_A; if (imgSwizzles == swizzles{ZE_IMAGE_FORMAT_SWIZZLE_R, ZE_IMAGE_FORMAT_SWIZZLE_G, ZE_IMAGE_FORMAT_SWIZZLE_0, ZE_IMAGE_FORMAT_SWIZZLE_1}) return CL_RG; if (imgSwizzles == swizzles{ZE_IMAGE_FORMAT_SWIZZLE_R, ZE_IMAGE_FORMAT_SWIZZLE_0, ZE_IMAGE_FORMAT_SWIZZLE_0, ZE_IMAGE_FORMAT_SWIZZLE_A}) return CL_RA; if (imgSwizzles == swizzles{ZE_IMAGE_FORMAT_SWIZZLE_R, ZE_IMAGE_FORMAT_SWIZZLE_G, ZE_IMAGE_FORMAT_SWIZZLE_B, ZE_IMAGE_FORMAT_SWIZZLE_1}) return CL_RGB; if (imgSwizzles == swizzles{ZE_IMAGE_FORMAT_SWIZZLE_R, ZE_IMAGE_FORMAT_SWIZZLE_G, ZE_IMAGE_FORMAT_SWIZZLE_B, ZE_IMAGE_FORMAT_SWIZZLE_A}) return CL_RGBA; if (imgSwizzles == swizzles{ZE_IMAGE_FORMAT_SWIZZLE_B, ZE_IMAGE_FORMAT_SWIZZLE_G, ZE_IMAGE_FORMAT_SWIZZLE_R, ZE_IMAGE_FORMAT_SWIZZLE_A}) return CL_BGRA; if (imgSwizzles == swizzles{ZE_IMAGE_FORMAT_SWIZZLE_A, ZE_IMAGE_FORMAT_SWIZZLE_R, ZE_IMAGE_FORMAT_SWIZZLE_G, ZE_IMAGE_FORMAT_SWIZZLE_B}) return CL_ARGB; if (imgSwizzles == swizzles{ZE_IMAGE_FORMAT_SWIZZLE_A, ZE_IMAGE_FORMAT_SWIZZLE_B, ZE_IMAGE_FORMAT_SWIZZLE_G, ZE_IMAGE_FORMAT_SWIZZLE_R}) return CL_ABGR; return CL_INVALID_VALUE; } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/image/image_format_desc_helper.h000066400000000000000000000013761422164147700302010ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/image/image.h" namespace L0 { struct swizzles { ze_image_format_swizzle_t x; ze_image_format_swizzle_t y; ze_image_format_swizzle_t z; ze_image_format_swizzle_t w; bool operator==(const swizzles &rhs) { if (x != rhs.x) return false; if (y != rhs.y) return false; if (z != rhs.z) return false; if (w != rhs.w) return false; return true; } }; cl_channel_type getClChannelDataType(const ze_image_format_t &imgDescription); cl_channel_order getClChannelOrder(const ze_image_format_t &imgDescription); } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/image/image_formats.h000066400000000000000000000644721422164147700260350ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/surface_format_info.h" #include namespace L0 { namespace ImageFormats { constexpr uint32_t ZE_IMAGE_FORMAT_RENDER_LAYOUT_MAX = 43u; using FormatTypes = std::array; constexpr std::array surfaceFormatsForRedescribe = { {{GMM_FORMAT_R8_UINT_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R8_UINT, 0, 1, 1, 1}, {GMM_FORMAT_R16_UINT_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R16_UINT, 0, 1, 2, 2}, {GMM_FORMAT_R32_UINT_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R32_UINT, 0, 1, 4, 4}, {GMM_FORMAT_R32G32_UINT_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R32G32_UINT, 0, 2, 4, 8}, {GMM_FORMAT_R32G32B32A32_UINT_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R32G32B32A32_UINT, 0, 4, 4, 16}}}; constexpr FormatTypes layout8 = {{{GMM_FORMAT_R8_UINT_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R8_UINT, 0, 1, 1, 1}, {GMM_FORMAT_R8_SINT_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R8_SINT, 0, 1, 1, 1}, {GMM_FORMAT_R8_UNORM_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R8_UNORM, 0, 1, 1, 1}, {GMM_FORMAT_R8_SNORM_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R8_SNORM, 0, 1, 1, 1}, {GMM_FORMAT_R8_UINT_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R8_UINT, 0, 1, 1, 1}}}; constexpr FormatTypes layout88 = {{{GMM_FORMAT_R8G8_UINT_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R8G8_UINT, 0, 2, 1, 2}, {GMM_FORMAT_R8G8_SINT_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R8G8_SINT, 0, 2, 1, 2}, {GMM_FORMAT_R8G8_UNORM_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R8G8_UNORM, 0, 2, 1, 2}, {GMM_FORMAT_R8G8_SNORM_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R8G8_SNORM, 0, 2, 1, 2}, {GMM_FORMAT_R8G8_UINT_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R8G8_UINT, 0, 2, 1, 2}}}; constexpr FormatTypes layout8888 = {{{GMM_FORMAT_R8G8B8A8_UINT_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R8G8B8A8_UINT, 0, 4, 1, 4}, {GMM_FORMAT_R8G8B8A8_SINT_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R8G8B8A8_SINT, 0, 4, 1, 4}, {GMM_FORMAT_R8G8B8A8_UNORM_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R8G8B8A8_UNORM, 0, 4, 1, 4}, {GMM_FORMAT_R8G8B8A8_SNORM_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R8G8B8A8_SNORM, 0, 4, 1, 4}, {GMM_FORMAT_R8G8B8A8_UINT_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R8G8B8A8_UINT, 0, 4, 1, 4}}}; constexpr FormatTypes layout16 = {{{GMM_FORMAT_R16_UINT_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R16_UINT, 0, 1, 2, 2}, {GMM_FORMAT_R16_SINT_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R16_SINT, 0, 1, 2, 2}, {GMM_FORMAT_R16_UNORM_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R16_UNORM, 0, 1, 2, 2}, {GMM_FORMAT_R16_SNORM_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R16_SNORM, 0, 1, 2, 2}, {GMM_FORMAT_R16_FLOAT_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R16_FLOAT, 0, 1, 2, 2}}}; constexpr FormatTypes layout1616 = {{{GMM_FORMAT_R16G16_UINT_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R16G16_UINT, 0, 2, 2, 4}, {GMM_FORMAT_R16G16_SINT_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R16G16_SINT, 0, 2, 2, 4}, {GMM_FORMAT_R16G16_UNORM_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R16G16_UNORM, 0, 2, 2, 4}, {GMM_FORMAT_R16G16_SNORM_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R16G16_SNORM, 0, 2, 2, 4}, {GMM_FORMAT_R16G16_FLOAT_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R16G16_FLOAT, 0, 2, 2, 4}}}; constexpr FormatTypes layout16161616 = {{{GMM_FORMAT_R16G16B16A16_UINT_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_UINT, 0, 4, 2, 8}, {GMM_FORMAT_R16G16B16A16_SINT_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_SINT, 0, 4, 2, 8}, {GMM_FORMAT_R16G16B16A16_UNORM_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_UNORM, 0, 4, 2, 8}, {GMM_FORMAT_R16G16B16A16_SNORM_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_SNORM, 0, 4, 2, 8}, {GMM_FORMAT_R16G16B16A16_FLOAT_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_FLOAT, 0, 4, 2, 8}}}; constexpr FormatTypes layout32 = {{{GMM_FORMAT_R32_UINT_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R32_UINT, 0, 1, 4, 4}, {GMM_FORMAT_R32_SINT_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R32_SINT, 0, 1, 4, 4}, {GMM_FORMAT_R32_UNORM_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R32_UNORM, 0, 1, 4, 4}, {GMM_FORMAT_R32_SNORM_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R32_SNORM, 0, 1, 4, 4}, {GMM_FORMAT_R32_FLOAT_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R32_FLOAT, 0, 1, 4, 4}}}; constexpr FormatTypes layout3232 = {{{GMM_FORMAT_R32G32_UINT_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R32G32_UINT, 0, 2, 4, 8}, {GMM_FORMAT_R32G32_SINT_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R32G32_SINT, 0, 2, 4, 8}, {GMM_FORMAT_R32G32_UNORM_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R32G32_UNORM, 0, 2, 4, 8}, {GMM_FORMAT_R32G32_SNORM_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R32G32_SNORM, 0, 2, 4, 8}, {GMM_FORMAT_R32G32_FLOAT_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R32G32_FLOAT, 0, 2, 4, 8}}}; constexpr FormatTypes layout32323232 = {{{GMM_FORMAT_R32G32B32A32_UINT_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R32G32B32A32_UINT, 0, 4, 4, 16}, {GMM_FORMAT_R32G32B32A32_SINT_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R32G32B32A32_SINT, 0, 4, 4, 16}, {GMM_FORMAT_R32G32B32A32_UNORM_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R32G32B32A32_UNORM, 0, 4, 4, 16}, {GMM_FORMAT_R32G32B32A32_SNORM_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R32G32B32A32_SNORM, 0, 4, 4, 16}, {GMM_FORMAT_R32G32B32A32_FLOAT_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R32G32B32A32_FLOAT, 0, 4, 4, 16}}}; constexpr FormatTypes layout1010102 = {{{GMM_FORMAT_R10G10B10A2_UINT_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R10G10B10A2_UINT, 0, 4, 1, 4}, {GMM_FORMAT_R10G10B10A2_SINT_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R10G10B10A2_SINT, 0, 4, 1, 4}, {GMM_FORMAT_R10G10B10A2_UNORM_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R10G10B10A2_UNORM, 0, 4, 1, 4}, {GMM_FORMAT_R10G10B10A2_SNORM_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R10G10B10A2_SNORM, 0, 4, 1, 4}, {GMM_FORMAT_R10G10B10A2_UINT_TYPE, NEO::GFX3DSTATE_SURFACEFORMAT_R10G10B10A2_UINT, 0, 4, 1, 4}}}; constexpr FormatTypes layout111110 = {{{GMM_FORMAT_R11G11B10_FLOAT, NEO::GFX3DSTATE_SURFACEFORMAT_R11G11B10_FLOAT, 0, 3, 0, 4}, {GMM_FORMAT_R11G11B10_FLOAT, NEO::GFX3DSTATE_SURFACEFORMAT_R11G11B10_FLOAT, 0, 3, 0, 4}, {GMM_FORMAT_R11G11B10_FLOAT, NEO::GFX3DSTATE_SURFACEFORMAT_R11G11B10_FLOAT, 0, 3, 0, 4}, {GMM_FORMAT_R11G11B10_FLOAT, NEO::GFX3DSTATE_SURFACEFORMAT_R11G11B10_FLOAT, 0, 3, 0, 4}, {GMM_FORMAT_R11G11B10_FLOAT, NEO::GFX3DSTATE_SURFACEFORMAT_R11G11B10_FLOAT, 0, 3, 0, 4}}}; constexpr FormatTypes layout565 = {{{GMM_FORMAT_B5G6R5_UNORM, NEO::GFX3DSTATE_SURFACEFORMAT_B5G6R5_UNORM, 0, 3, 0, 2}, {GMM_FORMAT_B5G6R5_UNORM, NEO::GFX3DSTATE_SURFACEFORMAT_B5G6R5_UNORM, 0, 3, 0, 2}, {GMM_FORMAT_B5G6R5_UNORM, NEO::GFX3DSTATE_SURFACEFORMAT_B5G6R5_UNORM, 0, 3, 0, 2}, {GMM_FORMAT_B5G6R5_UNORM, NEO::GFX3DSTATE_SURFACEFORMAT_B5G6R5_UNORM, 0, 3, 0, 2}, {GMM_FORMAT_B5G6R5_UNORM, NEO::GFX3DSTATE_SURFACEFORMAT_B5G6R5_UNORM, 0, 3, 0, 2}}}; constexpr FormatTypes layout5551 = {{{GMM_FORMAT_B5G5R5A1_UNORM, NEO::GFX3DSTATE_SURFACEFORMAT_B5G5R5A1_UNORM, 0, 4, 0, 2}, {GMM_FORMAT_B5G5R5A1_UNORM, NEO::GFX3DSTATE_SURFACEFORMAT_B5G5R5A1_UNORM, 0, 4, 0, 2}, {GMM_FORMAT_B5G5R5A1_UNORM, NEO::GFX3DSTATE_SURFACEFORMAT_B5G5R5A1_UNORM, 0, 4, 0, 2}, {GMM_FORMAT_B5G5R5A1_UNORM, NEO::GFX3DSTATE_SURFACEFORMAT_B5G5R5A1_UNORM, 0, 4, 0, 2}, {GMM_FORMAT_B5G5R5A1_UNORM, NEO::GFX3DSTATE_SURFACEFORMAT_B5G5R5A1_UNORM, 0, 4, 0, 2}}}; constexpr FormatTypes layout4444 = {{{GMM_FORMAT_B4G4R4A4_UNORM, NEO::GFX3DSTATE_SURFACEFORMAT_B4G4R4A4_UNORM, 0, 4, 1, 2}, {GMM_FORMAT_B4G4R4A4_UNORM, NEO::GFX3DSTATE_SURFACEFORMAT_B4G4R4A4_UNORM, 0, 4, 1, 2}, {GMM_FORMAT_B4G4R4A4_UNORM, NEO::GFX3DSTATE_SURFACEFORMAT_B4G4R4A4_UNORM, 0, 4, 1, 2}, {GMM_FORMAT_B4G4R4A4_UNORM, NEO::GFX3DSTATE_SURFACEFORMAT_B4G4R4A4_UNORM, 0, 4, 1, 2}, {GMM_FORMAT_B4G4R4A4_UNORM, NEO::GFX3DSTATE_SURFACEFORMAT_B4G4R4A4_UNORM, 0, 4, 1, 2}}}; constexpr FormatTypes layoutY8 = {{{GMM_FORMAT_Y8_UNORM, NEO::GFX3DSTATE_SURFACEFORMAT_Y8_UNORM, 0, 1, 1, 1}, {GMM_FORMAT_Y8_UNORM, NEO::GFX3DSTATE_SURFACEFORMAT_Y8_UNORM, 0, 1, 1, 1}, {GMM_FORMAT_Y8_UNORM, NEO::GFX3DSTATE_SURFACEFORMAT_Y8_UNORM, 0, 1, 1, 1}, {GMM_FORMAT_Y8_UNORM, NEO::GFX3DSTATE_SURFACEFORMAT_Y8_UNORM, 0, 1, 1, 1}, {GMM_FORMAT_Y8_UNORM, NEO::GFX3DSTATE_SURFACEFORMAT_Y8_UNORM, 0, 1, 1, 1}}}; constexpr FormatTypes layoutNV12 = {{{GMM_FORMAT_NV12, NEO::GFX3DSTATE_SURFACEFORMAT_PLANAR_420_8, 0, 1, 1, 1}, {GMM_FORMAT_NV12, NEO::GFX3DSTATE_SURFACEFORMAT_PLANAR_420_8, 0, 1, 1, 1}, {GMM_FORMAT_NV12, NEO::GFX3DSTATE_SURFACEFORMAT_PLANAR_420_8, 0, 1, 1, 1}, {GMM_FORMAT_NV12, NEO::GFX3DSTATE_SURFACEFORMAT_PLANAR_420_8, 0, 1, 1, 1}, {GMM_FORMAT_NV12, NEO::GFX3DSTATE_SURFACEFORMAT_PLANAR_420_8, 0, 1, 1, 1}}}; constexpr FormatTypes layoutYUYV = {{{GMM_FORMAT_YCRCB_NORMAL, NEO::GFX3DSTATE_SURFACEFORMAT_YCRCB_NORMAL, 0, 2, 1, 2}, {GMM_FORMAT_YCRCB_NORMAL, NEO::GFX3DSTATE_SURFACEFORMAT_YCRCB_NORMAL, 0, 2, 1, 2}, {GMM_FORMAT_YCRCB_NORMAL, NEO::GFX3DSTATE_SURFACEFORMAT_YCRCB_NORMAL, 0, 2, 1, 2}, {GMM_FORMAT_YCRCB_NORMAL, NEO::GFX3DSTATE_SURFACEFORMAT_YCRCB_NORMAL, 0, 2, 1, 2}, {GMM_FORMAT_YCRCB_NORMAL, NEO::GFX3DSTATE_SURFACEFORMAT_YCRCB_NORMAL, 0, 2, 1, 2}}}; constexpr FormatTypes layoutVYUY = {{{GMM_FORMAT_YCRCB_SWAPUVY, NEO::GFX3DSTATE_SURFACEFORMAT_YCRCB_SWAPUVY, 0, 2, 1, 2}, {GMM_FORMAT_YCRCB_SWAPUVY, NEO::GFX3DSTATE_SURFACEFORMAT_YCRCB_SWAPUVY, 0, 2, 1, 2}, {GMM_FORMAT_YCRCB_SWAPUVY, NEO::GFX3DSTATE_SURFACEFORMAT_YCRCB_SWAPUVY, 0, 2, 1, 2}, {GMM_FORMAT_YCRCB_SWAPUVY, NEO::GFX3DSTATE_SURFACEFORMAT_YCRCB_SWAPUVY, 0, 2, 1, 2}, {GMM_FORMAT_YCRCB_SWAPUVY, NEO::GFX3DSTATE_SURFACEFORMAT_YCRCB_SWAPUVY, 0, 2, 1, 2}}}; constexpr FormatTypes layoutYVYU = {{{GMM_FORMAT_YCRCB_SWAPUV, NEO::GFX3DSTATE_SURFACEFORMAT_YCRCB_SWAPUV, 0, 2, 1, 2}, {GMM_FORMAT_YCRCB_SWAPUV, NEO::GFX3DSTATE_SURFACEFORMAT_YCRCB_SWAPUV, 0, 2, 1, 2}, {GMM_FORMAT_YCRCB_SWAPUV, NEO::GFX3DSTATE_SURFACEFORMAT_YCRCB_SWAPUV, 0, 2, 1, 2}, {GMM_FORMAT_YCRCB_SWAPUV, NEO::GFX3DSTATE_SURFACEFORMAT_YCRCB_SWAPUV, 0, 2, 1, 2}, {GMM_FORMAT_YCRCB_SWAPUV, NEO::GFX3DSTATE_SURFACEFORMAT_YCRCB_SWAPUV, 0, 2, 1, 2}}}; constexpr FormatTypes layoutUYVY = {{{GMM_FORMAT_YCRCB_SWAPY, NEO::GFX3DSTATE_SURFACEFORMAT_YCRCB_SWAPY, 0, 2, 1, 2}, {GMM_FORMAT_YCRCB_SWAPY, NEO::GFX3DSTATE_SURFACEFORMAT_YCRCB_SWAPY, 0, 2, 1, 2}, {GMM_FORMAT_YCRCB_SWAPY, NEO::GFX3DSTATE_SURFACEFORMAT_YCRCB_SWAPY, 0, 2, 1, 2}, {GMM_FORMAT_YCRCB_SWAPY, NEO::GFX3DSTATE_SURFACEFORMAT_YCRCB_SWAPY, 0, 2, 1, 2}, {GMM_FORMAT_YCRCB_SWAPY, NEO::GFX3DSTATE_SURFACEFORMAT_YCRCB_SWAPY, 0, 2, 1, 2}}}; constexpr FormatTypes layoutAYUV = {{{GMM_FORMAT_AYUV, NEO::GFX3DSTATE_SURFACEFORMAT_R8G8B8A8_UNORM, 0, 4, 1, 4}, {GMM_FORMAT_AYUV, NEO::GFX3DSTATE_SURFACEFORMAT_R8G8B8A8_UNORM, 0, 4, 1, 4}, {GMM_FORMAT_AYUV, NEO::GFX3DSTATE_SURFACEFORMAT_R8G8B8A8_UNORM, 0, 4, 1, 4}, {GMM_FORMAT_AYUV, NEO::GFX3DSTATE_SURFACEFORMAT_R8G8B8A8_UNORM, 0, 4, 1, 4}, {GMM_FORMAT_AYUV, NEO::GFX3DSTATE_SURFACEFORMAT_R8G8B8A8_UNORM, 0, 4, 1, 4}}}; constexpr FormatTypes layoutY410 = {{{GMM_FORMAT_Y410, NEO::GFX3DSTATE_SURFACEFORMAT_R10G10B10A2_UNORM, 0, 4, 1, 4}, {GMM_FORMAT_Y410, NEO::GFX3DSTATE_SURFACEFORMAT_R10G10B10A2_UNORM, 0, 4, 1, 4}, {GMM_FORMAT_Y410, NEO::GFX3DSTATE_SURFACEFORMAT_R10G10B10A2_UNORM, 0, 4, 1, 4}, {GMM_FORMAT_Y410, NEO::GFX3DSTATE_SURFACEFORMAT_R10G10B10A2_UNORM, 0, 4, 1, 4}, {GMM_FORMAT_Y410, NEO::GFX3DSTATE_SURFACEFORMAT_R10G10B10A2_UNORM, 0, 4, 1, 4}}}; constexpr FormatTypes layoutY16 = {{{GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}, {GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}, {GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}, {GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}, {GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}}}; constexpr FormatTypes layoutP010 = {{{GMM_FORMAT_P010, NEO::GFX3DSTATE_SURFACEFORMAT_PLANAR_420_16, 0, 2, 1, 1}, {GMM_FORMAT_P010, NEO::GFX3DSTATE_SURFACEFORMAT_PLANAR_420_16, 0, 2, 1, 1}, {GMM_FORMAT_P010, NEO::GFX3DSTATE_SURFACEFORMAT_PLANAR_420_16, 0, 2, 1, 1}, {GMM_FORMAT_P010, NEO::GFX3DSTATE_SURFACEFORMAT_PLANAR_420_16, 0, 2, 1, 1}, {GMM_FORMAT_P010, NEO::GFX3DSTATE_SURFACEFORMAT_PLANAR_420_16, 0, 2, 1, 1}}}; constexpr FormatTypes layoutP012 = {{{GMM_FORMAT_P012, NEO::GFX3DSTATE_SURFACEFORMAT_PLANAR_420_16, 0, 2, 1, 1}, {GMM_FORMAT_P012, NEO::GFX3DSTATE_SURFACEFORMAT_PLANAR_420_16, 0, 2, 1, 1}, {GMM_FORMAT_P012, NEO::GFX3DSTATE_SURFACEFORMAT_PLANAR_420_16, 0, 2, 1, 1}, {GMM_FORMAT_P012, NEO::GFX3DSTATE_SURFACEFORMAT_PLANAR_420_16, 0, 2, 1, 1}, {GMM_FORMAT_P012, NEO::GFX3DSTATE_SURFACEFORMAT_PLANAR_420_16, 0, 2, 1, 1}}}; constexpr FormatTypes layoutP016 = {{{GMM_FORMAT_P016, NEO::GFX3DSTATE_SURFACEFORMAT_PLANAR_420_16, 0, 2, 1, 1}, {GMM_FORMAT_P016, NEO::GFX3DSTATE_SURFACEFORMAT_PLANAR_420_16, 0, 2, 1, 1}, {GMM_FORMAT_P016, NEO::GFX3DSTATE_SURFACEFORMAT_PLANAR_420_16, 0, 2, 1, 1}, {GMM_FORMAT_P016, NEO::GFX3DSTATE_SURFACEFORMAT_PLANAR_420_16, 0, 2, 1, 1}, {GMM_FORMAT_P016, NEO::GFX3DSTATE_SURFACEFORMAT_PLANAR_420_16, 0, 2, 1, 1}}}; constexpr FormatTypes layoutY216 = {{{GMM_FORMAT_Y216, NEO::GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_UNORM, 0, 4, 2, 8}, {GMM_FORMAT_Y216, NEO::GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_UNORM, 0, 4, 2, 8}, {GMM_FORMAT_Y216, NEO::GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_UNORM, 0, 4, 2, 8}, {GMM_FORMAT_Y216, NEO::GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_UNORM, 0, 4, 2, 8}, {GMM_FORMAT_Y216, NEO::GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_UNORM, 0, 4, 2, 8}}}; constexpr FormatTypes layoutP216 = {{{GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}, {GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}, {GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}, {GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}, {GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}}}; constexpr FormatTypes layoutP8 = {{{GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}, {GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}, {GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}, {GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}, {GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}}}; constexpr FormatTypes layoutYUY2 = {{{GMM_FORMAT_YCRCB_NORMAL, NEO::GFX3DSTATE_SURFACEFORMAT_YCRCB_NORMAL, 0, 2, 1, 2}, {GMM_FORMAT_YCRCB_NORMAL, NEO::GFX3DSTATE_SURFACEFORMAT_YCRCB_NORMAL, 0, 2, 1, 2}, {GMM_FORMAT_YCRCB_NORMAL, NEO::GFX3DSTATE_SURFACEFORMAT_YCRCB_NORMAL, 0, 2, 1, 2}, {GMM_FORMAT_YCRCB_NORMAL, NEO::GFX3DSTATE_SURFACEFORMAT_YCRCB_NORMAL, 0, 2, 1, 2}, {GMM_FORMAT_YCRCB_NORMAL, NEO::GFX3DSTATE_SURFACEFORMAT_YCRCB_NORMAL, 0, 2, 1, 2}}}; constexpr FormatTypes layoutA8P8 = {{{GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}, {GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}, {GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}, {GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}, {GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}}}; constexpr FormatTypes layoutIA44 = {{{GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}, {GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}, {GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}, {GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}, {GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}}}; constexpr FormatTypes layoutAI44 = {{{GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}, {GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}, {GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}, {GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}, {GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}}}; constexpr FormatTypes layoutY416 = {{{GMM_FORMAT_Y416, NEO::GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_UNORM, 0, 4, 2, 8}, {GMM_FORMAT_Y416, NEO::GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_UNORM, 0, 4, 2, 8}, {GMM_FORMAT_Y416, NEO::GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_UNORM, 0, 4, 2, 8}, {GMM_FORMAT_Y416, NEO::GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_UNORM, 0, 4, 2, 8}, {GMM_FORMAT_Y416, NEO::GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_UNORM, 0, 4, 2, 8}}}; constexpr FormatTypes layoutY210 = {{{GMM_FORMAT_Y210, NEO::GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_UNORM, 0, 4, 2, 8}, {GMM_FORMAT_Y210, NEO::GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_UNORM, 0, 4, 2, 8}, {GMM_FORMAT_Y210, NEO::GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_UNORM, 0, 4, 2, 8}, {GMM_FORMAT_Y210, NEO::GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_UNORM, 0, 4, 2, 8}, {GMM_FORMAT_Y210, NEO::GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_UNORM, 0, 4, 2, 8}}}; constexpr FormatTypes layoutI420 = {{{GMM_FORMAT_I420, NEO::GFX3DSTATE_SURFACEFORMAT_PLANAR_420_8, 0, 1, 1, 1}, {GMM_FORMAT_I420, NEO::GFX3DSTATE_SURFACEFORMAT_PLANAR_420_8, 0, 1, 1, 1}, {GMM_FORMAT_I420, NEO::GFX3DSTATE_SURFACEFORMAT_PLANAR_420_8, 0, 1, 1, 1}, {GMM_FORMAT_I420, NEO::GFX3DSTATE_SURFACEFORMAT_PLANAR_420_8, 0, 1, 1, 1}, {GMM_FORMAT_I420, NEO::GFX3DSTATE_SURFACEFORMAT_PLANAR_420_8, 0, 1, 1, 1}}}; constexpr FormatTypes layoutYV12 = {{{GMM_FORMAT_YV12, NEO::GFX3DSTATE_SURFACEFORMAT_PLANAR_420_8, 0, 1, 1, 1}, {GMM_FORMAT_YV12, NEO::GFX3DSTATE_SURFACEFORMAT_PLANAR_420_8, 0, 1, 1, 1}, {GMM_FORMAT_YV12, NEO::GFX3DSTATE_SURFACEFORMAT_PLANAR_420_8, 0, 1, 1, 1}, {GMM_FORMAT_YV12, NEO::GFX3DSTATE_SURFACEFORMAT_PLANAR_420_8, 0, 1, 1, 1}, {GMM_FORMAT_YV12, NEO::GFX3DSTATE_SURFACEFORMAT_PLANAR_420_8, 0, 1, 1, 1}}}; constexpr FormatTypes layout400P = {{{GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}, {GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}, {GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}, {GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}, {GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}}}; constexpr FormatTypes layout422H = {{{GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}, {GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}, {GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}, {GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}, {GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}}}; constexpr FormatTypes layout422V = {{{GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}, {GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}, {GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}, {GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}, {GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}}}; constexpr FormatTypes layout444P = {{{GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}, {GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}, {GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}, {GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}, {GMM_FORMAT_INVALID, NEO::NUM_GFX3DSTATE_SURFACEFORMATS, 0, 1, 1, 1}}}; constexpr FormatTypes layoutRGBP = {{{GMM_FORMAT_RGBP, NEO::GFX3DSTATE_SURFACEFORMAT_R8_UINT, 0, 3, 1, 3}, {GMM_FORMAT_RGBP, NEO::GFX3DSTATE_SURFACEFORMAT_R8_UINT, 0, 3, 1, 3}, {GMM_FORMAT_RGBP, NEO::GFX3DSTATE_SURFACEFORMAT_R8_UINT, 0, 3, 1, 3}, {GMM_FORMAT_RGBP, NEO::GFX3DSTATE_SURFACEFORMAT_R8_UINT, 0, 3, 1, 3}, {GMM_FORMAT_RGBP, NEO::GFX3DSTATE_SURFACEFORMAT_R8_UINT, 0, 3, 1, 3}}}; constexpr FormatTypes layoutBGRP = {{{GMM_FORMAT_BGRP, NEO::GFX3DSTATE_SURFACEFORMAT_R8_UINT, 0, 3, 1, 3}, {GMM_FORMAT_BGRP, NEO::GFX3DSTATE_SURFACEFORMAT_R8_UINT, 0, 3, 1, 3}, {GMM_FORMAT_BGRP, NEO::GFX3DSTATE_SURFACEFORMAT_R8_UINT, 0, 3, 1, 3}, {GMM_FORMAT_BGRP, NEO::GFX3DSTATE_SURFACEFORMAT_R8_UINT, 0, 3, 1, 3}, {GMM_FORMAT_BGRP, NEO::GFX3DSTATE_SURFACEFORMAT_R8_UINT, 0, 3, 1, 3}}}; constexpr std::array formats = {layout8, layout16, layout32, layout88, layout8888, layout1616, layout16161616, layout3232, layout32323232, layout1010102, layout111110, layout565, layout5551, layout4444, layoutY8, layoutNV12, layoutYUYV, layoutVYUY, layoutYVYU, layoutUYVY, layoutAYUV, layoutP010, layoutY410, layoutP012, layoutY16, layoutP016, layoutY216, layoutP216, layoutP8, layoutYUY2, layoutA8P8, layoutIA44, layoutAI44, layoutY416, layoutY210, layoutI420, layoutYV12, layout400P, layout422H, layout422V, layout444P, layoutRGBP, layoutBGRP}; } // namespace ImageFormats } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/image/image_hw.h000066400000000000000000000047021422164147700247660ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/linear_stream.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/surface_format_info.h" #include "shared/source/indirect_heap/indirect_heap.h" #include "level_zero/core/source/image/image_imp.h" namespace L0 { struct StructuresLookupTable; template struct ImageCoreFamily : public ImageImp { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE; ze_result_t initialize(Device *device, const ze_image_desc_t *desc) override; void copySurfaceStateToSSH(void *surfaceStateHeap, const uint32_t surfaceStateOffset, bool isMediaBlockArg) override; void copyRedescribedSurfaceStateToSSH(void *surfaceStateHeap, const uint32_t surfaceStateOffset) override; bool isMediaFormat(const ze_image_format_layout_t layout) { if (layout == ze_image_format_layout_t::ZE_IMAGE_FORMAT_LAYOUT_NV12 || layout == ze_image_format_layout_t::ZE_IMAGE_FORMAT_LAYOUT_P010 || layout == ze_image_format_layout_t::ZE_IMAGE_FORMAT_LAYOUT_P012 || layout == ze_image_format_layout_t::ZE_IMAGE_FORMAT_LAYOUT_P016 || layout == ze_image_format_layout_t::ZE_IMAGE_FORMAT_LAYOUT_RGBP || layout == ze_image_format_layout_t::ZE_IMAGE_FORMAT_LAYOUT_BRGP) { return true; } return false; } static constexpr uint32_t zeImageFormatSwizzleMax = ZE_IMAGE_FORMAT_SWIZZLE_X + 1u; const std::array shaderChannelSelect = { RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_RED, RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_GREEN, RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_BLUE, RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA, RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO, RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ONE, RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO}; protected: bool isSuitableForCompression(const StructuresLookupTable &structuresLookupTable, const NEO::ImageInfo &imgInfo); RENDER_SURFACE_STATE surfaceState; RENDER_SURFACE_STATE redescribedSurfaceState; }; template struct ImageProductFamily; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/image/image_hw.inl000066400000000000000000000275271422164147700253330ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_container/command_encoder.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/resource_info.h" #include "shared/source/helpers/string.h" #include "shared/source/helpers/surface_format_info.h" #include "shared/source/image/image_surface_state.h" #include "shared/source/memory_manager/allocation_properties.h" #include "shared/source/memory_manager/memory_manager.h" #include "level_zero/core/source/helpers/properties_parser.h" #include "level_zero/core/source/hw_helpers/l0_hw_helper.h" #include "level_zero/core/source/image/image_formats.h" #include "level_zero/core/source/image/image_hw.h" namespace L0 { template ze_result_t ImageCoreFamily::initialize(Device *device, const ze_image_desc_t *desc) { using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE; StructuresLookupTable lookupTable = {}; lookupTable.areImageProperties = true; lookupTable.imageProperties.imageDescriptor = convertDescriptor(*desc); auto parseResult = prepareL0StructuresLookupTable(lookupTable, desc->pNext); if (parseResult != ZE_RESULT_SUCCESS) { return parseResult; } bool isMediaFormatLayout = isMediaFormat(desc->format.layout); imgInfo.imgDesc = lookupTable.imageProperties.imageDescriptor; imgInfo.surfaceFormat = &ImageFormats::formats[desc->format.layout][desc->format.type]; imageFormatDesc = *const_cast(desc); UNRECOVERABLE_IF(device == nullptr); this->device = device; if (imgInfo.surfaceFormat->GMMSurfaceFormat == GMM_FORMAT_INVALID) { return ZE_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT; } typename RENDER_SURFACE_STATE::SURFACE_TYPE surfaceType; switch (desc->type) { case ZE_IMAGE_TYPE_1D: case ZE_IMAGE_TYPE_1DARRAY: surfaceType = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_1D; break; case ZE_IMAGE_TYPE_2D: case ZE_IMAGE_TYPE_2DARRAY: surfaceType = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_2D; break; case ZE_IMAGE_TYPE_3D: surfaceType = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_3D; break; default: return ZE_RESULT_ERROR_INVALID_ARGUMENT; } imgInfo.linearStorage = surfaceType == RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_1D; imgInfo.plane = lookupTable.imageProperties.isPlanarExtension ? static_cast(lookupTable.imageProperties.planeIndex + 1u) : GMM_NO_PLANE; imgInfo.useLocalMemory = false; if (!isImageView) { if (lookupTable.isSharedHandle) { if (!lookupTable.sharedHandleType.isSupportedHandle) { return ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } if (lookupTable.sharedHandleType.isDMABUFHandle) { NEO::AllocationProperties properties(device->getRootDeviceIndex(), true, imgInfo, NEO::AllocationType::SHARED_IMAGE, device->getNEODevice()->getDeviceBitfield()); allocation = device->getNEODevice()->getMemoryManager()->createGraphicsAllocationFromSharedHandle(lookupTable.sharedHandleType.fd, properties, false, false); device->getNEODevice()->getMemoryManager()->closeSharedHandle(allocation); } else if (lookupTable.sharedHandleType.isNTHandle) { auto verifyResult = device->getNEODevice()->getMemoryManager()->verifyHandle(NEO::toOsHandle(lookupTable.sharedHandleType.ntHnadle), device->getNEODevice()->getRootDeviceIndex(), true); if (!verifyResult) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } allocation = device->getNEODevice()->getMemoryManager()->createGraphicsAllocationFromNTHandle(lookupTable.sharedHandleType.ntHnadle, device->getNEODevice()->getRootDeviceIndex(), NEO::AllocationType::SHARED_IMAGE); } } else { NEO::AllocationProperties properties(device->getRootDeviceIndex(), true, imgInfo, NEO::AllocationType::IMAGE, device->getNEODevice()->getDeviceBitfield()); properties.flags.preferCompressed = isSuitableForCompression(lookupTable, imgInfo); allocation = device->getNEODevice()->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties); } if (allocation == nullptr) { return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY; } } auto gmm = this->allocation->getDefaultGmm(); auto gmmHelper = static_cast(device->getNEODevice()->getRootDeviceEnvironment()).getGmmHelper(); if (gmm != nullptr) { gmm->updateImgInfoAndDesc(imgInfo, 0u); } NEO::SurfaceOffsets surfaceOffsets = {imgInfo.offset, imgInfo.xOffset, imgInfo.yOffset, imgInfo.yOffsetForUVPlane}; { surfaceState = GfxFamily::cmdInitRenderSurfaceState; NEO::setImageSurfaceState(&surfaceState, imgInfo, gmm, *gmmHelper, __GMM_NO_CUBE_MAP, this->allocation->getGpuAddress(), surfaceOffsets, isMediaFormatLayout); NEO::setImageSurfaceStateDimensions(&surfaceState, imgInfo, __GMM_NO_CUBE_MAP, surfaceType); surfaceState.setSurfaceMinLod(0u); surfaceState.setMipCountLod(0u); NEO::setMipTailStartLod(&surfaceState, gmm); if (!isMediaFormatLayout) { surfaceState.setShaderChannelSelectRed( static_cast( shaderChannelSelect[desc->format.x])); surfaceState.setShaderChannelSelectGreen( static_cast( shaderChannelSelect[desc->format.y])); surfaceState.setShaderChannelSelectBlue( static_cast( shaderChannelSelect[desc->format.z])); surfaceState.setShaderChannelSelectAlpha( static_cast( shaderChannelSelect[desc->format.w])); } else { surfaceState.setShaderChannelSelectRed(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_RED); surfaceState.setShaderChannelSelectGreen(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_GREEN); surfaceState.setShaderChannelSelectBlue(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_BLUE); surfaceState.setShaderChannelSelectAlpha(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO); } surfaceState.setNumberOfMultisamples(RENDER_SURFACE_STATE::NUMBER_OF_MULTISAMPLES::NUMBER_OF_MULTISAMPLES_MULTISAMPLECOUNT_1); if (allocation->isCompressionEnabled()) { NEO::EncodeSurfaceState::setImageAuxParamsForCCS(&surfaceState, gmm); } } { const uint32_t exponent = Math::log2(imgInfo.surfaceFormat->ImageElementSizeInBytes); DEBUG_BREAK_IF(exponent >= 5u); NEO::ImageInfo imgInfoRedescirebed; imgInfoRedescirebed.surfaceFormat = &ImageFormats::surfaceFormatsForRedescribe[exponent % 5]; imgInfoRedescirebed.imgDesc = imgInfo.imgDesc; imgInfoRedescirebed.qPitch = imgInfo.qPitch; redescribedSurfaceState = GfxFamily::cmdInitRenderSurfaceState; NEO::setImageSurfaceState(&redescribedSurfaceState, imgInfoRedescirebed, gmm, *gmmHelper, __GMM_NO_CUBE_MAP, this->allocation->getGpuAddress(), surfaceOffsets, desc->format.layout == ZE_IMAGE_FORMAT_LAYOUT_NV12); NEO::setImageSurfaceStateDimensions(&redescribedSurfaceState, imgInfoRedescirebed, __GMM_NO_CUBE_MAP, surfaceType); redescribedSurfaceState.setSurfaceMinLod(0u); redescribedSurfaceState.setMipCountLod(0u); NEO::setMipTailStartLod(&redescribedSurfaceState, gmm); if (imgInfoRedescirebed.surfaceFormat->GMMSurfaceFormat == GMM_FORMAT_R8_UINT_TYPE || imgInfoRedescirebed.surfaceFormat->GMMSurfaceFormat == GMM_FORMAT_R16_UINT_TYPE || imgInfoRedescirebed.surfaceFormat->GMMSurfaceFormat == GMM_FORMAT_R32_UINT_TYPE) { redescribedSurfaceState.setShaderChannelSelectRed(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_RED); redescribedSurfaceState.setShaderChannelSelectGreen(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO); redescribedSurfaceState.setShaderChannelSelectBlue(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO); } else if (imgInfoRedescirebed.surfaceFormat->GMMSurfaceFormat == GMM_FORMAT_R32G32_UINT_TYPE) { redescribedSurfaceState.setShaderChannelSelectRed(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_RED); redescribedSurfaceState.setShaderChannelSelectGreen(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_GREEN); redescribedSurfaceState.setShaderChannelSelectBlue(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO); } else { redescribedSurfaceState.setShaderChannelSelectRed(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_RED); redescribedSurfaceState.setShaderChannelSelectGreen(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_GREEN); redescribedSurfaceState.setShaderChannelSelectBlue(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_BLUE); } redescribedSurfaceState.setNumberOfMultisamples(RENDER_SURFACE_STATE::NUMBER_OF_MULTISAMPLES::NUMBER_OF_MULTISAMPLES_MULTISAMPLECOUNT_1); if (allocation->isCompressionEnabled()) { NEO::EncodeSurfaceState::setImageAuxParamsForCCS(&redescribedSurfaceState, gmm); } } return ZE_RESULT_SUCCESS; } template void ImageCoreFamily::copySurfaceStateToSSH(void *surfaceStateHeap, const uint32_t surfaceStateOffset, bool isMediaBlockArg) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE; // Copy the image's surface state into position in the provided surface state heap auto destSurfaceState = ptrOffset(surfaceStateHeap, surfaceStateOffset); memcpy_s(destSurfaceState, sizeof(RENDER_SURFACE_STATE), &surfaceState, sizeof(RENDER_SURFACE_STATE)); if (isMediaBlockArg) { RENDER_SURFACE_STATE *dstRss = static_cast(destSurfaceState); NEO::setWidthForMediaBlockSurfaceState(dstRss, imgInfo); } } template void ImageCoreFamily::copyRedescribedSurfaceStateToSSH(void *surfaceStateHeap, const uint32_t surfaceStateOffset) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE; // Copy the image's surface state into position in the provided surface state heap auto destSurfaceState = ptrOffset(surfaceStateHeap, surfaceStateOffset); memcpy_s(destSurfaceState, sizeof(RENDER_SURFACE_STATE), &redescribedSurfaceState, sizeof(RENDER_SURFACE_STATE)); } template bool ImageCoreFamily::isSuitableForCompression(const StructuresLookupTable &structuresLookupTable, const NEO::ImageInfo &imgInfo) { auto &hwInfo = device->getHwInfo(); auto &l0HwHelper = L0HwHelper::get(hwInfo.platform.eRenderCoreFamily); if (structuresLookupTable.uncompressedHint) { return false; } return (l0HwHelper.imageCompressionSupported(hwInfo) && !imgInfo.linearStorage); } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/image/image_imp.cpp000066400000000000000000000035351422164147700254730ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/image/image_imp.h" #include "shared/source/memory_manager/memory_manager.h" #include "igfxfmid.h" namespace L0 { ImageAllocatorFn imageFactory[IGFX_MAX_PRODUCT] = {}; ImageImp::~ImageImp() { if (!isImageView && this->device != nullptr) { this->device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(this->allocation); } } ze_result_t ImageImp::destroy() { delete this; return ZE_RESULT_SUCCESS; } ze_result_t ImageImp::createView(Device *device, const ze_image_desc_t *desc, ze_image_handle_t *pImage) { auto productFamily = device->getNEODevice()->getHardwareInfo().platform.eProductFamily; ImageAllocatorFn allocator = nullptr; allocator = imageFactory[productFamily]; ImageImp *image = nullptr; image = static_cast((*allocator)()); image->isImageView = true; image->allocation = allocation; auto result = image->initialize(device, desc); if (result != ZE_RESULT_SUCCESS) { image->destroy(); image = nullptr; } *pImage = image; return result; } ze_result_t Image::create(uint32_t productFamily, Device *device, const ze_image_desc_t *desc, Image **pImage) { ze_result_t result = ZE_RESULT_SUCCESS; ImageAllocatorFn allocator = nullptr; if (productFamily < IGFX_MAX_PRODUCT) { allocator = imageFactory[productFamily]; } ImageImp *image = nullptr; if (allocator) { image = static_cast((*allocator)()); result = image->initialize(device, desc); if (result != ZE_RESULT_SUCCESS) { image->destroy(); image = nullptr; } } else { result = ZE_RESULT_ERROR_UNKNOWN; } *pImage = image; return result; } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/image/image_imp.h000066400000000000000000000024301422164147700251310ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/surface_format_info.h" #include "level_zero/core/source/image/image.h" namespace L0 { struct ImageImp : public Image { ze_result_t destroy() override; virtual ze_result_t initialize(Device *device, const ze_image_desc_t *desc) = 0; ~ImageImp() override; NEO::GraphicsAllocation *getAllocation() override { return allocation; } NEO::ImageInfo getImageInfo() override { return imgInfo; } ze_image_desc_t getImageDesc() override { return imageFormatDesc; } ze_result_t createView(Device *device, const ze_image_desc_t *desc, ze_image_handle_t *pImage) override; ze_result_t getMemoryProperties(ze_image_memory_properties_exp_t *pMemoryProperties) override { pMemoryProperties->rowPitch = imgInfo.rowPitch; pMemoryProperties->slicePitch = imgInfo.slicePitch; pMemoryProperties->size = imgInfo.surfaceFormat->ImageElementSizeInBytes; return ZE_RESULT_SUCCESS; } protected: bool isImageView = false; Device *device = nullptr; NEO::ImageInfo imgInfo = {}; NEO::GraphicsAllocation *allocation = nullptr; ze_image_desc_t imageFormatDesc = {}; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/kernel/000077500000000000000000000000001422164147700232305ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/kernel/kernel.cpp000066400000000000000000000004001422164147700252060ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/kernel/kernel.h" #include "igfxfmid.h" namespace L0 { KernelAllocatorFn kernelFactory[IGFX_MAX_PRODUCT] = {}; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/kernel/kernel.h000066400000000000000000000155031422164147700246650ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/kernel/dispatch_kernel_encoder_interface.h" #include "shared/source/kernel/kernel_descriptor.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/unified_memory/unified_memory.h" #include #include #include #include struct _ze_kernel_handle_t {}; namespace NEO { class Device; struct KernelInfo; class MemoryManager; } // namespace NEO namespace L0 { struct Device; struct Module; struct KernelImmutableData { KernelImmutableData(L0::Device *l0device = nullptr); virtual ~KernelImmutableData(); void initialize(NEO::KernelInfo *kernelInfo, Device *device, uint32_t computeUnitsUsedForSratch, NEO::GraphicsAllocation *globalConstBuffer, NEO::GraphicsAllocation *globalVarBuffer, bool internalKernel); const std::vector &getResidencyContainer() const { return residencyContainer; } std::vector &getResidencyContainer() { return residencyContainer; } uint32_t getIsaSize() const; NEO::GraphicsAllocation *getIsaGraphicsAllocation() const { return isaGraphicsAllocation.get(); } const uint8_t *getCrossThreadDataTemplate() const { return crossThreadDataTemplate.get(); } uint32_t getSurfaceStateHeapSize() const { return surfaceStateHeapSize; } const uint8_t *getSurfaceStateHeapTemplate() const { return surfaceStateHeapTemplate.get(); } uint32_t getDynamicStateHeapDataSize() const { return dynamicStateHeapSize; } const uint8_t *getDynamicStateHeapTemplate() const { return dynamicStateHeapTemplate.get(); } const NEO::KernelDescriptor &getDescriptor() const { return *kernelDescriptor; } Device *getDevice() { return this->device; } const NEO::KernelInfo *getKernelInfo() const { return kernelInfo; } void setIsaCopiedToAllocation() { isaCopiedToAllocation = true; } bool isIsaCopiedToAllocation() const { return isaCopiedToAllocation; } MOCKABLE_VIRTUAL void createRelocatedDebugData(NEO::GraphicsAllocation *globalConstBuffer, NEO::GraphicsAllocation *globalVarBuffer); protected: Device *device = nullptr; NEO::KernelInfo *kernelInfo = nullptr; NEO::KernelDescriptor *kernelDescriptor = nullptr; std::unique_ptr isaGraphicsAllocation = nullptr; uint32_t crossThreadDataSize = 0; std::unique_ptr crossThreadDataTemplate = nullptr; uint32_t surfaceStateHeapSize = 0; std::unique_ptr surfaceStateHeapTemplate = nullptr; uint32_t dynamicStateHeapSize = 0; std::unique_ptr dynamicStateHeapTemplate = nullptr; std::vector residencyContainer; bool isaCopiedToAllocation = false; }; struct Kernel : _ze_kernel_handle_t, virtual NEO::DispatchKernelEncoderI { template struct Allocator { static Kernel *allocate(Module *module) { return new Type(module); } }; static Kernel *create(uint32_t productFamily, Module *module, const ze_kernel_desc_t *desc, ze_result_t *ret); ~Kernel() override = default; virtual ze_result_t destroy() = 0; virtual ze_result_t getBaseAddress(uint64_t *baseAddress) = 0; virtual ze_result_t setIndirectAccess(ze_kernel_indirect_access_flags_t flags) = 0; virtual ze_result_t getIndirectAccess(ze_kernel_indirect_access_flags_t *flags) = 0; virtual ze_result_t getSourceAttributes(uint32_t *pSize, char **pString) = 0; virtual ze_result_t getProperties(ze_kernel_properties_t *pKernelProperties) = 0; virtual ze_result_t setArgumentValue(uint32_t argIndex, size_t argSize, const void *pArgValue) = 0; virtual void setGroupCount(uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) = 0; virtual ze_result_t setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation) = 0; virtual ze_result_t setArgRedescribedImage(uint32_t argIndex, ze_image_handle_t argVal) = 0; virtual ze_result_t setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY, uint32_t groupSizeZ) = 0; virtual ze_result_t suggestGroupSize(uint32_t globalSizeX, uint32_t globalSizeY, uint32_t globalSizeZ, uint32_t *groupSizeX, uint32_t *groupSizeY, uint32_t *groupSizeZ) = 0; virtual ze_result_t getKernelName(size_t *pSize, char *pName) = 0; virtual uint32_t *getGlobalOffsets() = 0; virtual ze_result_t setGlobalOffsetExp(uint32_t offsetX, uint32_t offsetY, uint32_t offsetZ) = 0; virtual void patchGlobalOffset() = 0; virtual ze_result_t suggestMaxCooperativeGroupCount(uint32_t *totalGroupCount, NEO::EngineGroupType engineGroupType, bool isEngineInstanced) = 0; virtual ze_result_t setCacheConfig(ze_cache_config_flags_t flags) = 0; virtual ze_result_t getProfileInfo(zet_profile_properties_t *pProfileProperties) = 0; virtual const KernelImmutableData *getImmutableData() const = 0; virtual const std::vector &getResidencyContainer() const = 0; virtual UnifiedMemoryControls getUnifiedMemoryControls() const = 0; virtual bool hasIndirectAllocationsAllowed() const = 0; virtual NEO::GraphicsAllocation *getPrintfBufferAllocation() = 0; virtual void printPrintfOutput() = 0; virtual bool usesSyncBuffer() = 0; virtual void patchSyncBuffer(NEO::GraphicsAllocation *gfxAllocation, size_t bufferOffset) = 0; virtual NEO::GraphicsAllocation *allocatePrivateMemoryGraphicsAllocation() = 0; virtual void patchCrossthreadDataWithPrivateAllocation(NEO::GraphicsAllocation *privateAllocation) = 0; virtual NEO::GraphicsAllocation *getPrivateMemoryGraphicsAllocation() = 0; virtual ze_result_t setSchedulingHintExp(ze_scheduling_hint_exp_desc_t *pHint) = 0; virtual int32_t getSchedulingHintExp() = 0; Kernel() = default; Kernel(const Kernel &) = delete; Kernel(Kernel &&) = delete; Kernel &operator=(const Kernel &) = delete; Kernel &operator=(Kernel &&) = delete; static Kernel *fromHandle(ze_kernel_handle_t handle) { return static_cast(handle); } inline ze_kernel_handle_t toHandle() { return this; } }; using KernelAllocatorFn = Kernel *(*)(Module *module); extern KernelAllocatorFn kernelFactory[]; template struct KernelPopulateFactory { KernelPopulateFactory() { kernelFactory[productFamily] = KernelType::template Allocator::allocate; } }; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/kernel/kernel_hw.h000066400000000000000000000126031422164147700253610ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_container/command_encoder.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/bindless_heaps_helper.h" #include "shared/source/helpers/cache_policy.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/string.h" #include "shared/source/kernel/implicit_args.h" #include "level_zero/core/source/kernel/kernel_imp.h" #include "level_zero/core/source/module/module.h" #include "igfxfmid.h" #include namespace L0 { template struct KernelHw : public KernelImp { using KernelImp::KernelImp; using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; void setBufferSurfaceState(uint32_t argIndex, void *address, NEO::GraphicsAllocation *alloc) override { uint64_t baseAddress = alloc->getGpuAddressToPatch(); auto sshAlignmentMask = NEO::EncodeSurfaceState::getSurfaceBaseAddressAlignmentMask(); // Remove misalligned bytes, accounted for in in bufferOffset patch token baseAddress &= sshAlignmentMask; auto misalignedSize = ptrDiff(alloc->getGpuAddressToPatch(), baseAddress); auto offset = ptrDiff(address, reinterpret_cast(baseAddress)); size_t bufferSizeForSsh = alloc->getUnderlyingBufferSize(); auto argInfo = kernelImmData->getDescriptor().payloadMappings.explicitArgs[argIndex].as(); bool offsetWasPatched = NEO::patchNonPointer(ArrayRef(this->crossThreadData.get(), this->crossThreadDataSize), argInfo.bufferOffset, static_cast(offset)); if (false == offsetWasPatched) { // fallback to handling offset in surface state baseAddress = reinterpret_cast(address); bufferSizeForSsh -= offset; DEBUG_BREAK_IF(baseAddress != (baseAddress & sshAlignmentMask)); offset = 0; } void *surfaceStateAddress = nullptr; auto surfaceState = GfxFamily::cmdInitRenderSurfaceState; if (NEO::isValidOffset(argInfo.bindless)) { surfaceStateAddress = patchBindlessSurfaceState(alloc, argInfo.bindless); } else { surfaceStateAddress = ptrOffset(surfaceStateHeapData.get(), argInfo.bindful); surfaceState = *reinterpret_cast(surfaceStateAddress); } uint64_t bufferAddressForSsh = baseAddress; auto alignment = NEO::EncodeSurfaceState::getSurfaceBaseAddressAlignment(); bufferSizeForSsh += misalignedSize; bufferSizeForSsh = alignUp(bufferSizeForSsh, alignment); bool l3Enabled = true; // Allocation MUST be cacheline (64 byte) aligned in order to enable L3 caching otherwise Heap corruption will occur coming from the KMD. // Most commonly this issue will occur with Host Point Allocations from customers. l3Enabled = isL3Capable(*alloc); Device *device = module->getDevice(); NEO::Device *neoDevice = device->getNEODevice(); auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(reinterpret_cast(alloc->getGpuAddress())); if (allocData && allocData->allocationFlagsProperty.flags.locallyUncachedResource) { l3Enabled = false; } if (l3Enabled == false) { this->kernelRequiresQueueUncachedMocsCount++; } NEO::EncodeSurfaceStateArgs args; args.outMemory = &surfaceState; args.graphicsAddress = bufferAddressForSsh; args.size = bufferSizeForSsh; args.mocs = device->getMOCS(l3Enabled, false); args.numAvailableDevices = neoDevice->getNumGenericSubDevices(); args.allocation = alloc; args.gmmHelper = neoDevice->getGmmHelper(); args.useGlobalAtomics = kernelImmData->getDescriptor().kernelAttributes.flags.useGlobalAtomics; args.areMultipleSubDevicesInContext = args.numAvailableDevices > 1; args.implicitScaling = device->isImplicitScalingCapable(); NEO::EncodeSurfaceState::encodeBuffer(args); *reinterpret_cast(surfaceStateAddress) = surfaceState; } void evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor) override { size_t localWorkSizes[3]; localWorkSizes[0] = this->groupSize[0]; localWorkSizes[1] = this->groupSize[1]; localWorkSizes[2] = this->groupSize[2]; kernelRequiresGenerationOfLocalIdsByRuntime = NEO::EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired( kernelDescriptor.kernelAttributes.numLocalIdChannels, localWorkSizes, std::array{ {kernelDescriptor.kernelAttributes.workgroupWalkOrder[0], kernelDescriptor.kernelAttributes.workgroupWalkOrder[1], kernelDescriptor.kernelAttributes.workgroupWalkOrder[2]}}, kernelDescriptor.kernelAttributes.flags.requiresWorkgroupWalkOrder, requiredWorkgroupOrder, kernelDescriptor.kernelAttributes.simdSize); } }; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/kernel/kernel_imp.cpp000066400000000000000000001531331422164147700260670ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/kernel/kernel_imp.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/blit_commands_helper.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/kernel_helpers.h" #include "shared/source/helpers/local_work_size.h" #include "shared/source/helpers/per_thread_data.h" #include "shared/source/helpers/ray_tracing_helper.h" #include "shared/source/helpers/register_offsets.h" #include "shared/source/helpers/string.h" #include "shared/source/helpers/surface_format_info.h" #include "shared/source/kernel/implicit_args.h" #include "shared/source/kernel/kernel_arg_descriptor.h" #include "shared/source/kernel/kernel_descriptor.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/memory_operations_handler.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/source/program/kernel_info.h" #include "shared/source/utilities/arrayref.h" #include "level_zero/core/source/debugger/debugger_l0.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/device/device_imp.h" #include "level_zero/core/source/driver/driver_handle_imp.h" #include "level_zero/core/source/image/image.h" #include "level_zero/core/source/image/image_format_desc_helper.h" #include "level_zero/core/source/kernel/sampler_patch_values.h" #include "level_zero/core/source/module/module.h" #include "level_zero/core/source/module/module_imp.h" #include "level_zero/core/source/printf_handler/printf_handler.h" #include "level_zero/core/source/sampler/sampler.h" #include namespace L0 { KernelImmutableData::KernelImmutableData(L0::Device *l0device) : device(l0device) {} KernelImmutableData::~KernelImmutableData() { if (nullptr != isaGraphicsAllocation) { this->getDevice()->getNEODevice()->getMemoryManager()->freeGraphicsMemory(&*isaGraphicsAllocation); isaGraphicsAllocation.release(); } crossThreadDataTemplate.reset(); surfaceStateHeapTemplate.reset(); dynamicStateHeapTemplate.reset(); } inline void patchWithImplicitSurface(ArrayRef crossThreadData, ArrayRef surfaceStateHeap, uintptr_t ptrToPatchInCrossThreadData, NEO::GraphicsAllocation &allocation, const NEO::ArgDescPointer &ptr, const NEO::Device &device, bool useGlobalAtomics, bool implicitScaling) { if (false == crossThreadData.empty()) { NEO::patchPointer(crossThreadData, ptr, ptrToPatchInCrossThreadData); } if ((false == surfaceStateHeap.empty()) && (NEO::isValidOffset(ptr.bindful))) { auto surfaceState = surfaceStateHeap.begin() + ptr.bindful; auto addressToPatch = allocation.getGpuAddress(); size_t sizeToPatch = allocation.getUnderlyingBufferSize(); auto &hwInfo = device.getHardwareInfo(); auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily); NEO::EncodeSurfaceStateArgs args; args.outMemory = surfaceState; args.size = sizeToPatch; args.graphicsAddress = addressToPatch; args.gmmHelper = device.getGmmHelper(); args.allocation = &allocation; args.useGlobalAtomics = useGlobalAtomics; args.numAvailableDevices = device.getNumGenericSubDevices(); args.areMultipleSubDevicesInContext = args.numAvailableDevices > 1; args.mocs = hwHelper.getMocsIndex(*args.gmmHelper, true, false) << 1; args.implicitScaling = implicitScaling; hwHelper.encodeBufferSurfaceState(args); } } void KernelImmutableData::initialize(NEO::KernelInfo *kernelInfo, Device *device, uint32_t computeUnitsUsedForSratch, NEO::GraphicsAllocation *globalConstBuffer, NEO::GraphicsAllocation *globalVarBuffer, bool internalKernel) { UNRECOVERABLE_IF(kernelInfo == nullptr); this->kernelInfo = kernelInfo; this->kernelDescriptor = &kernelInfo->kernelDescriptor; DeviceImp *deviceImp = static_cast(device); auto neoDevice = deviceImp->getActiveDevice(); auto memoryManager = neoDevice->getMemoryManager(); auto kernelIsaSize = kernelInfo->heapInfo.KernelHeapSize; UNRECOVERABLE_IF(kernelIsaSize == 0); UNRECOVERABLE_IF(!kernelInfo->heapInfo.pKernelHeap); const auto allocType = internalKernel ? NEO::AllocationType::KERNEL_ISA_INTERNAL : NEO::AllocationType::KERNEL_ISA; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties( {neoDevice->getRootDeviceIndex(), kernelIsaSize, allocType, neoDevice->getDeviceBitfield()}); UNRECOVERABLE_IF(allocation == nullptr); isaGraphicsAllocation.reset(allocation); if (neoDevice->getDebugger() && kernelInfo->kernelDescriptor.external.debugData.get()) { createRelocatedDebugData(globalConstBuffer, globalVarBuffer); } this->crossThreadDataSize = this->kernelDescriptor->kernelAttributes.crossThreadDataSize; ArrayRef crossThredDataArrayRef; if (crossThreadDataSize != 0) { crossThreadDataTemplate.reset(new uint8_t[crossThreadDataSize]); if (kernelInfo->crossThreadData) { memcpy_s(crossThreadDataTemplate.get(), crossThreadDataSize, kernelInfo->crossThreadData, crossThreadDataSize); } else { memset(crossThreadDataTemplate.get(), 0x00, crossThreadDataSize); } crossThredDataArrayRef = ArrayRef(this->crossThreadDataTemplate.get(), this->crossThreadDataSize); NEO::patchNonPointer(crossThredDataArrayRef, kernelDescriptor->payloadMappings.implicitArgs.simdSize, kernelDescriptor->kernelAttributes.simdSize); } if (kernelInfo->heapInfo.SurfaceStateHeapSize != 0) { this->surfaceStateHeapSize = kernelInfo->heapInfo.SurfaceStateHeapSize; surfaceStateHeapTemplate.reset(new uint8_t[surfaceStateHeapSize]); memcpy_s(surfaceStateHeapTemplate.get(), surfaceStateHeapSize, kernelInfo->heapInfo.pSsh, surfaceStateHeapSize); } if (kernelInfo->heapInfo.DynamicStateHeapSize != 0) { this->dynamicStateHeapSize = kernelInfo->heapInfo.DynamicStateHeapSize; dynamicStateHeapTemplate.reset(new uint8_t[dynamicStateHeapSize]); memcpy_s(dynamicStateHeapTemplate.get(), dynamicStateHeapSize, kernelInfo->heapInfo.pDsh, dynamicStateHeapSize); } ArrayRef surfaceStateHeapArrayRef = ArrayRef(surfaceStateHeapTemplate.get(), getSurfaceStateHeapSize()); if (NEO::isValidOffset(kernelDescriptor->payloadMappings.implicitArgs.globalConstantsSurfaceAddress.stateless)) { UNRECOVERABLE_IF(nullptr == globalConstBuffer); patchWithImplicitSurface(crossThredDataArrayRef, surfaceStateHeapArrayRef, static_cast(globalConstBuffer->getGpuAddressToPatch()), *globalConstBuffer, kernelDescriptor->payloadMappings.implicitArgs.globalConstantsSurfaceAddress, *neoDevice, kernelDescriptor->kernelAttributes.flags.useGlobalAtomics, deviceImp->isImplicitScalingCapable()); this->residencyContainer.push_back(globalConstBuffer); } else if (nullptr != globalConstBuffer) { this->residencyContainer.push_back(globalConstBuffer); } if (NEO::isValidOffset(kernelDescriptor->payloadMappings.implicitArgs.globalVariablesSurfaceAddress.stateless)) { UNRECOVERABLE_IF(globalVarBuffer == nullptr); patchWithImplicitSurface(crossThredDataArrayRef, surfaceStateHeapArrayRef, static_cast(globalVarBuffer->getGpuAddressToPatch()), *globalVarBuffer, kernelDescriptor->payloadMappings.implicitArgs.globalVariablesSurfaceAddress, *neoDevice, kernelDescriptor->kernelAttributes.flags.useGlobalAtomics, deviceImp->isImplicitScalingCapable()); this->residencyContainer.push_back(globalVarBuffer); } else if (nullptr != globalVarBuffer) { this->residencyContainer.push_back(globalVarBuffer); } } void KernelImmutableData::createRelocatedDebugData(NEO::GraphicsAllocation *globalConstBuffer, NEO::GraphicsAllocation *globalVarBuffer) { NEO::Linker::SegmentInfo globalData; NEO::Linker::SegmentInfo constData; if (globalVarBuffer) { globalData.gpuAddress = globalVarBuffer->getGpuAddress(); globalData.segmentSize = globalVarBuffer->getUnderlyingBufferSize(); } if (globalConstBuffer) { constData.gpuAddress = globalConstBuffer->getGpuAddress(); constData.segmentSize = globalConstBuffer->getUnderlyingBufferSize(); } if (kernelInfo->kernelDescriptor.external.debugData.get()) { std::string outErrReason; std::string outWarning; auto decodedElf = NEO::Elf::decodeElf(ArrayRef(reinterpret_cast(kernelInfo->kernelDescriptor.external.debugData->vIsa), kernelInfo->kernelDescriptor.external.debugData->vIsaSize), outErrReason, outWarning); if (decodedElf.getDebugInfoRelocations().size() > 1) { UNRECOVERABLE_IF(kernelInfo->kernelDescriptor.external.relocatedDebugData.get() != nullptr); auto size = kernelInfo->kernelDescriptor.external.debugData->vIsaSize; kernelInfo->kernelDescriptor.external.relocatedDebugData = std::make_unique(size); memcpy_s(kernelInfo->kernelDescriptor.external.relocatedDebugData.get(), size, kernelInfo->kernelDescriptor.external.debugData->vIsa, kernelInfo->kernelDescriptor.external.debugData->vIsaSize); NEO::Linker::SegmentInfo textSegment = {getIsaGraphicsAllocation()->getGpuAddress(), getIsaGraphicsAllocation()->getUnderlyingBufferSize()}; NEO::Linker::applyDebugDataRelocations(decodedElf, ArrayRef(kernelInfo->kernelDescriptor.external.relocatedDebugData.get(), size), textSegment, globalData, constData); } } } ze_result_t KernelImp::getBaseAddress(uint64_t *baseAddress) { if (baseAddress) { *baseAddress = NEO::GmmHelper::decanonize(this->kernelImmData->getKernelInfo()->kernelAllocation->getGpuAddress()); } return ZE_RESULT_SUCCESS; } uint32_t KernelImmutableData::getIsaSize() const { return static_cast(isaGraphicsAllocation->getUnderlyingBufferSize()); } KernelImp::KernelImp(Module *module) : module(module) {} KernelImp::~KernelImp() { if (nullptr != privateMemoryGraphicsAllocation) { module->getDevice()->getNEODevice()->getMemoryManager()->freeGraphicsMemory(privateMemoryGraphicsAllocation); } if (perThreadDataForWholeThreadGroup != nullptr) { alignedFree(perThreadDataForWholeThreadGroup); } if (printfBuffer != nullptr) { //not allowed to call virtual function on destructor, so calling printOutput directly PrintfHandler::printOutput(kernelImmData, this->printfBuffer, module->getDevice()); module->getDevice()->getNEODevice()->getMemoryManager()->freeGraphicsMemory(printfBuffer); } slmArgSizes.clear(); crossThreadData.reset(); surfaceStateHeapData.reset(); dynamicStateHeapData.reset(); } ze_result_t KernelImp::setArgumentValue(uint32_t argIndex, size_t argSize, const void *pArgValue) { if (argIndex >= kernelArgHandlers.size()) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } return (this->*kernelArgHandlers[argIndex])(argIndex, argSize, pArgValue); } void KernelImp::setGroupCount(uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) { const NEO::KernelDescriptor &desc = kernelImmData->getDescriptor(); uint32_t globalWorkSize[3] = {groupCountX * groupSize[0], groupCountY * groupSize[1], groupCountZ * groupSize[2]}; auto dst = ArrayRef(crossThreadData.get(), crossThreadDataSize); NEO::patchVecNonPointer(dst, desc.payloadMappings.dispatchTraits.globalWorkSize, globalWorkSize); uint32_t groupCount[3] = {groupCountX, groupCountY, groupCountZ}; NEO::patchVecNonPointer(dst, desc.payloadMappings.dispatchTraits.numWorkGroups, groupCount); uint32_t workDim = 1; if (groupCountZ * groupSize[2] > 1) { workDim = 3; } else if (groupCountY * groupSize[1] > 1) { workDim = 2; } auto workDimOffset = desc.payloadMappings.dispatchTraits.workDim; if (NEO::isValidOffset(workDimOffset)) { auto destinationBuffer = ArrayRef(crossThreadData.get(), crossThreadDataSize); NEO::patchNonPointer(destinationBuffer, desc.payloadMappings.dispatchTraits.workDim, workDim); } if (pImplicitArgs) { pImplicitArgs->numWorkDim = workDim; pImplicitArgs->globalSizeX = globalWorkSize[0]; pImplicitArgs->globalSizeY = globalWorkSize[1]; pImplicitArgs->globalSizeZ = globalWorkSize[2]; pImplicitArgs->groupCountX = groupCount[0]; pImplicitArgs->groupCountY = groupCount[1]; pImplicitArgs->groupCountZ = groupCount[2]; } } ze_result_t KernelImp::setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY, uint32_t groupSizeZ) { if ((0 == groupSizeX) || (0 == groupSizeY) || (0 == groupSizeZ)) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } auto numChannels = kernelImmData->getDescriptor().kernelAttributes.numLocalIdChannels; Vec3 groupSize{groupSizeX, groupSizeY, groupSizeZ}; auto itemsInGroup = Math::computeTotalElementsCount(groupSize); if (itemsInGroup > module->getMaxGroupSize()) { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION; } this->groupSize[0] = groupSizeX; this->groupSize[1] = groupSizeY; this->groupSize[2] = groupSizeZ; const NEO::KernelDescriptor &kernelDescriptor = kernelImmData->getDescriptor(); for (uint32_t i = 0u; i < 3u; i++) { if (kernelDescriptor.kernelAttributes.requiredWorkgroupSize[i] != 0 && kernelDescriptor.kernelAttributes.requiredWorkgroupSize[i] != this->groupSize[i]) { NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Invalid group size {%d, %d, %d} specified, requiredWorkGroupSize = {%d, %d, %d}\n", this->groupSize[0], this->groupSize[1], this->groupSize[2], kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0], kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1], kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2]); return ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION; } } auto simdSize = kernelDescriptor.kernelAttributes.simdSize; this->numThreadsPerThreadGroup = static_cast((itemsInGroup + simdSize - 1u) / simdSize); patchWorkgroupSizeInCrossThreadData(groupSizeX, groupSizeY, groupSizeZ); auto remainderSimdLanes = itemsInGroup & (simdSize - 1u); threadExecutionMask = static_cast(maxNBitValue(remainderSimdLanes)); if (!threadExecutionMask) { threadExecutionMask = static_cast(maxNBitValue((simdSize == 1) ? 32 : simdSize)); } evaluateIfRequiresGenerationOfLocalIdsByRuntime(kernelDescriptor); if (kernelRequiresGenerationOfLocalIdsByRuntime) { auto grfSize = this->module->getDevice()->getHwInfo().capabilityTable.grfSize; uint32_t perThreadDataSizeForWholeThreadGroupNeeded = static_cast(NEO::PerThreadDataHelper::getPerThreadDataSizeTotal( simdSize, grfSize, numChannels, itemsInGroup)); if (perThreadDataSizeForWholeThreadGroupNeeded > perThreadDataSizeForWholeThreadGroupAllocated) { alignedFree(perThreadDataForWholeThreadGroup); perThreadDataForWholeThreadGroup = static_cast(alignedMalloc(perThreadDataSizeForWholeThreadGroupNeeded, 32)); perThreadDataSizeForWholeThreadGroupAllocated = perThreadDataSizeForWholeThreadGroupNeeded; } perThreadDataSizeForWholeThreadGroup = perThreadDataSizeForWholeThreadGroupNeeded; if (numChannels > 0) { UNRECOVERABLE_IF(3 != numChannels); NEO::generateLocalIDs( perThreadDataForWholeThreadGroup, static_cast(simdSize), std::array{{static_cast(groupSizeX), static_cast(groupSizeY), static_cast(groupSizeZ)}}, std::array{{0, 1, 2}}, false, grfSize); } this->perThreadDataSize = perThreadDataSizeForWholeThreadGroup / numThreadsPerThreadGroup; } return ZE_RESULT_SUCCESS; } ze_result_t KernelImp::suggestGroupSize(uint32_t globalSizeX, uint32_t globalSizeY, uint32_t globalSizeZ, uint32_t *groupSizeX, uint32_t *groupSizeY, uint32_t *groupSizeZ) { size_t retGroupSize[3] = {}; auto maxWorkGroupSize = module->getMaxGroupSize(); auto simd = kernelImmData->getDescriptor().kernelAttributes.simdSize; size_t workItems[3] = {globalSizeX, globalSizeY, globalSizeZ}; uint32_t dim = (globalSizeY > 1U) ? 2 : 1U; dim = (globalSizeZ > 1U) ? 3 : dim; if (NEO::DebugManager.flags.EnableComputeWorkSizeND.get()) { auto usesImages = getImmutableData()->getDescriptor().kernelAttributes.flags.usesImages; auto neoDevice = module->getDevice()->getNEODevice(); const auto hwInfo = &neoDevice->getHardwareInfo(); const auto &deviceInfo = neoDevice->getDeviceInfo(); uint32_t numThreadsPerSubSlice = (uint32_t)deviceInfo.maxNumEUsPerSubSlice * deviceInfo.numThreadsPerEU; uint32_t localMemSize = (uint32_t)deviceInfo.localMemSize; NEO::WorkSizeInfo wsInfo(maxWorkGroupSize, kernelImmData->getDescriptor().kernelAttributes.usesBarriers(), simd, this->getSlmTotalSize(), hwInfo, numThreadsPerSubSlice, localMemSize, usesImages, false, kernelImmData->getDescriptor().kernelAttributes.flags.requiresDisabledEUFusion); NEO::computeWorkgroupSizeND(wsInfo, retGroupSize, workItems, dim); } else { if (1U == dim) { NEO::computeWorkgroupSize1D(maxWorkGroupSize, retGroupSize, workItems, simd); } else if (NEO::DebugManager.flags.EnableComputeWorkSizeSquared.get() && (2U == dim)) { NEO::computeWorkgroupSizeSquared(maxWorkGroupSize, retGroupSize, workItems, simd, dim); } else { NEO::computeWorkgroupSize2D(maxWorkGroupSize, retGroupSize, workItems, simd); } } *groupSizeX = static_cast(retGroupSize[0]); *groupSizeY = static_cast(retGroupSize[1]); *groupSizeZ = static_cast(retGroupSize[2]); return ZE_RESULT_SUCCESS; } ze_result_t KernelImp::suggestMaxCooperativeGroupCount(uint32_t *totalGroupCount, NEO::EngineGroupType engineGroupType, bool isEngineInstanced) { UNRECOVERABLE_IF(0 == groupSize[0]); UNRECOVERABLE_IF(0 == groupSize[1]); UNRECOVERABLE_IF(0 == groupSize[2]); auto &hardwareInfo = module->getDevice()->getHwInfo(); auto dssCount = hardwareInfo.gtSystemInfo.DualSubSliceCount; if (dssCount == 0) { dssCount = hardwareInfo.gtSystemInfo.SubSliceCount; } auto &hwHelper = NEO::HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); auto &descriptor = kernelImmData->getDescriptor(); auto availableThreadCount = hwHelper.calculateAvailableThreadCount( hardwareInfo.platform.eProductFamily, descriptor.kernelAttributes.numGrfRequired, hardwareInfo.gtSystemInfo.EUCount, hardwareInfo.gtSystemInfo.ThreadCount / hardwareInfo.gtSystemInfo.EUCount); auto barrierCount = descriptor.kernelAttributes.barrierCount; const uint32_t workDim = 3; const size_t localWorkSize[] = {groupSize[0], groupSize[1], groupSize[2]}; *totalGroupCount = NEO::KernelHelper::getMaxWorkGroupCount(descriptor.kernelAttributes.simdSize, availableThreadCount, dssCount, dssCount * KB * hardwareInfo.capabilityTable.slmSize, hwHelper.alignSlmSize(slmArgsTotalSize + descriptor.kernelAttributes.slmInlineSize), static_cast(hwHelper.getMaxBarrierRegisterPerSlice()), hwHelper.getBarriersCountFromHasBarriers(barrierCount), workDim, localWorkSize); *totalGroupCount = hwHelper.adjustMaxWorkGroupCount(*totalGroupCount, engineGroupType, hardwareInfo, isEngineInstanced); return ZE_RESULT_SUCCESS; } ze_result_t KernelImp::setIndirectAccess(ze_kernel_indirect_access_flags_t flags) { if (NEO::DebugManager.flags.DisableIndirectAccess.get() == 1 || this->kernelHasIndirectAccess == false) { return ZE_RESULT_SUCCESS; } if (flags & ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE) { this->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true; } if (flags & ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST) { this->unifiedMemoryControls.indirectHostAllocationsAllowed = true; } if (flags & ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED) { this->unifiedMemoryControls.indirectSharedAllocationsAllowed = true; } return ZE_RESULT_SUCCESS; } ze_result_t KernelImp::getIndirectAccess(ze_kernel_indirect_access_flags_t *flags) { *flags = 0; if (this->unifiedMemoryControls.indirectDeviceAllocationsAllowed) { *flags |= ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE; } if (this->unifiedMemoryControls.indirectHostAllocationsAllowed) { *flags |= ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST; } if (this->unifiedMemoryControls.indirectSharedAllocationsAllowed) { *flags |= ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED; } return ZE_RESULT_SUCCESS; } ze_result_t KernelImp::getSourceAttributes(uint32_t *pSize, char **pString) { auto &desc = kernelImmData->getDescriptor(); if (pString == nullptr) { *pSize = (uint32_t)desc.kernelMetadata.kernelLanguageAttributes.length() + 1; } else { strncpy_s(*pString, desc.kernelMetadata.kernelLanguageAttributes.length() + 1, desc.kernelMetadata.kernelLanguageAttributes.c_str(), desc.kernelMetadata.kernelLanguageAttributes.length() + 1); } return ZE_RESULT_SUCCESS; } ze_result_t KernelImp::setArgImmediate(uint32_t argIndex, size_t argSize, const void *argVal) { if (kernelImmData->getDescriptor().payloadMappings.explicitArgs.size() <= argIndex) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } const auto &arg = kernelImmData->getDescriptor().payloadMappings.explicitArgs[argIndex]; for (const auto &element : arg.as().elements) { if (element.sourceOffset < argSize) { size_t maxBytesToCopy = argSize - element.sourceOffset; size_t bytesToCopy = std::min(static_cast(element.size), maxBytesToCopy); auto pDst = ptrOffset(crossThreadData.get(), element.offset); if (argVal) { auto pSrc = ptrOffset(argVal, element.sourceOffset); memcpy_s(pDst, element.size, pSrc, bytesToCopy); } else { uint64_t val = 0; memcpy_s(pDst, element.size, reinterpret_cast(&val), bytesToCopy); } } else { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } } return ZE_RESULT_SUCCESS; } ze_result_t KernelImp::setArgRedescribedImage(uint32_t argIndex, ze_image_handle_t argVal) { const auto &arg = kernelImmData->getDescriptor().payloadMappings.explicitArgs[argIndex].as(); if (argVal == nullptr) { residencyContainer[argIndex] = nullptr; return ZE_RESULT_SUCCESS; } const auto image = Image::fromHandle(argVal); image->copyRedescribedSurfaceStateToSSH(surfaceStateHeapData.get(), arg.bindful); residencyContainer[argIndex] = image->getAllocation(); return ZE_RESULT_SUCCESS; } ze_result_t KernelImp::setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation) { const auto &arg = kernelImmData->getDescriptor().payloadMappings.explicitArgs[argIndex].as(); const auto val = argVal; NEO::patchPointer(ArrayRef(crossThreadData.get(), crossThreadDataSize), arg, val); if (NEO::isValidOffset(arg.bindful) || NEO::isValidOffset(arg.bindless)) { setBufferSurfaceState(argIndex, reinterpret_cast(val), allocation); } auto allocData = this->module->getDevice()->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(reinterpret_cast(allocation->getGpuAddress())); if (allocData) { bool argWasUncacheable = isArgUncached[argIndex]; bool argIsUncacheable = allocData->allocationFlagsProperty.flags.locallyUncachedResource; if (argWasUncacheable == false && argIsUncacheable) { kernelRequiresUncachedMocsCount++; } else if (argWasUncacheable && argIsUncacheable == false) { kernelRequiresUncachedMocsCount--; } this->setKernelArgUncached(argIndex, argIsUncacheable); } residencyContainer[argIndex] = allocation; return ZE_RESULT_SUCCESS; } ze_result_t KernelImp::setArgUnknown(uint32_t argIndex, size_t argSize, const void *argVal) { return ZE_RESULT_SUCCESS; } ze_result_t KernelImp::setArgBuffer(uint32_t argIndex, size_t argSize, const void *argVal) { const auto device = static_cast(this->module->getDevice()); const auto driverHandle = static_cast(device->getDriverHandle()); const auto svmAllocsManager = driverHandle->getSvmAllocsManager(); const auto allocationsCounter = svmAllocsManager->allocationsCounter.load(); NEO::SvmAllocationData *allocData = nullptr; if (argVal != nullptr) { const auto &argInfo = this->kernelArgInfos[argIndex]; const auto requestedAddress = *reinterpret_cast(argVal); if (argInfo.allocId > 0 && requestedAddress == argInfo.value) { bool reuseFromCache = false; if (allocationsCounter > 0) { if (allocationsCounter == argInfo.allocIdMemoryManagerCounter) { reuseFromCache = true; } else { allocData = svmAllocsManager->getSVMAlloc(requestedAddress); if (allocData && allocData->getAllocId() == argInfo.allocId) { reuseFromCache = true; this->kernelArgInfos[argIndex].allocIdMemoryManagerCounter = allocationsCounter; } } if (reuseFromCache) { return ZE_RESULT_SUCCESS; } } } } const auto &allArgs = kernelImmData->getDescriptor().payloadMappings.explicitArgs; const auto &currArg = allArgs[argIndex]; if (currArg.getTraits().getAddressQualifier() == NEO::KernelArgMetadata::AddrLocal) { slmArgSizes[argIndex] = static_cast(argSize); UNRECOVERABLE_IF(NEO::isUndefinedOffset(currArg.as().slmOffset)); auto slmOffset = *reinterpret_cast(crossThreadData.get() + currArg.as().slmOffset); slmOffset += static_cast(argSize); ++argIndex; while (argIndex < kernelImmData->getDescriptor().payloadMappings.explicitArgs.size()) { if (allArgs[argIndex].getTraits().getAddressQualifier() != NEO::KernelArgMetadata::AddrLocal) { ++argIndex; continue; } const auto &nextArg = allArgs[argIndex].as(); UNRECOVERABLE_IF(0 == nextArg.requiredSlmAlignment); slmOffset = alignUp(slmOffset, nextArg.requiredSlmAlignment); NEO::patchNonPointer(ArrayRef(crossThreadData.get(), crossThreadDataSize), nextArg.slmOffset, slmOffset); slmOffset += static_cast(slmArgSizes[argIndex]); ++argIndex; } slmArgsTotalSize = static_cast(alignUp(slmOffset, KB)); return ZE_RESULT_SUCCESS; } if (nullptr == argVal) { residencyContainer[argIndex] = nullptr; const auto &arg = kernelImmData->getDescriptor().payloadMappings.explicitArgs[argIndex].as(); uintptr_t nullBufferValue = 0; NEO::patchPointer(ArrayRef(crossThreadData.get(), crossThreadDataSize), arg, nullBufferValue); return ZE_RESULT_SUCCESS; } const auto requestedAddress = *reinterpret_cast(argVal); uintptr_t gpuAddress = 0u; NEO::GraphicsAllocation *alloc = driverHandle->getDriverSystemMemoryAllocation(requestedAddress, 1u, module->getDevice()->getRootDeviceIndex(), &gpuAddress); if (allocData == nullptr) { allocData = svmAllocsManager->getSVMAlloc(requestedAddress); } if (driverHandle->isRemoteResourceNeeded(requestedAddress, alloc, allocData, device)) { if (allocData == nullptr) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } uint64_t pbase = allocData->gpuAllocations.getDefaultGraphicsAllocation()->getGpuAddress(); uint64_t offset = (uint64_t)requestedAddress - pbase; alloc = driverHandle->getPeerAllocation(device, allocData, reinterpret_cast(pbase), &gpuAddress); if (alloc == nullptr) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } gpuAddress += offset; } const uint32_t allocId = allocData ? allocData->getAllocId() : 0u; kernelArgInfos[argIndex] = KernelArgInfo{requestedAddress, allocId, allocationsCounter}; return setArgBufferWithAlloc(argIndex, gpuAddress, alloc); } ze_result_t KernelImp::setArgImage(uint32_t argIndex, size_t argSize, const void *argVal) { if (argVal == nullptr) { residencyContainer[argIndex] = nullptr; return ZE_RESULT_SUCCESS; } const auto &hwInfo = module->getDevice()->getNEODevice()->getHardwareInfo(); auto isMediaBlockImage = (hwInfo.capabilityTable.supportsMediaBlock && kernelImmData->getDescriptor().payloadMappings.explicitArgs[argIndex].getExtendedTypeInfo().isMediaBlockImage); const auto &arg = kernelImmData->getDescriptor().payloadMappings.explicitArgs[argIndex].as(); const auto image = Image::fromHandle(*static_cast(argVal)); if (kernelImmData->getDescriptor().kernelAttributes.imageAddressingMode == NEO::KernelDescriptor::Bindless) { image->copySurfaceStateToSSH(patchBindlessSurfaceState(image->getAllocation(), arg.bindless), 0u, isMediaBlockImage); } else { image->copySurfaceStateToSSH(surfaceStateHeapData.get(), arg.bindful, isMediaBlockImage); } residencyContainer[argIndex] = image->getAllocation(); auto imageInfo = image->getImageInfo(); auto clChannelType = getClChannelDataType(image->getImageDesc().format); auto clChannelOrder = getClChannelOrder(image->getImageDesc().format); NEO::patchNonPointer(ArrayRef(crossThreadData.get(), crossThreadDataSize), arg.metadataPayload.imgWidth, imageInfo.imgDesc.imageWidth); NEO::patchNonPointer(ArrayRef(crossThreadData.get(), crossThreadDataSize), arg.metadataPayload.imgHeight, imageInfo.imgDesc.imageHeight); NEO::patchNonPointer(ArrayRef(crossThreadData.get(), crossThreadDataSize), arg.metadataPayload.imgDepth, imageInfo.imgDesc.imageDepth); NEO::patchNonPointer(ArrayRef(crossThreadData.get(), crossThreadDataSize), arg.metadataPayload.numSamples, imageInfo.imgDesc.numSamples); NEO::patchNonPointer(ArrayRef(crossThreadData.get(), crossThreadDataSize), arg.metadataPayload.arraySize, imageInfo.imgDesc.imageArraySize); NEO::patchNonPointer(ArrayRef(crossThreadData.get(), crossThreadDataSize), arg.metadataPayload.channelDataType, clChannelType); NEO::patchNonPointer(ArrayRef(crossThreadData.get(), crossThreadDataSize), arg.metadataPayload.channelOrder, clChannelOrder); NEO::patchNonPointer(ArrayRef(crossThreadData.get(), crossThreadDataSize), arg.metadataPayload.numMipLevels, imageInfo.imgDesc.numMipLevels); auto pixelSize = imageInfo.surfaceFormat->ImageElementSizeInBytes; NEO::patchNonPointer(ArrayRef(crossThreadData.get(), crossThreadDataSize), arg.metadataPayload.flatBaseOffset, image->getAllocation()->getGpuAddress()); NEO::patchNonPointer(ArrayRef(crossThreadData.get(), crossThreadDataSize), arg.metadataPayload.flatWidth, (imageInfo.imgDesc.imageWidth * pixelSize) - 1u); NEO::patchNonPointer(ArrayRef(crossThreadData.get(), crossThreadDataSize), arg.metadataPayload.flatHeight, (imageInfo.imgDesc.imageHeight * pixelSize) - 1u); NEO::patchNonPointer(ArrayRef(crossThreadData.get(), crossThreadDataSize), arg.metadataPayload.flatPitch, imageInfo.imgDesc.imageRowPitch - 1u); return ZE_RESULT_SUCCESS; } ze_result_t KernelImp::setArgSampler(uint32_t argIndex, size_t argSize, const void *argVal) { const auto &arg = kernelImmData->getDescriptor().payloadMappings.explicitArgs[argIndex].as(); const auto sampler = Sampler::fromHandle(*static_cast(argVal)); sampler->copySamplerStateToDSH(dynamicStateHeapData.get(), dynamicStateHeapDataSize, arg.bindful); auto samplerDesc = sampler->getSamplerDesc(); NEO::patchNonPointer(ArrayRef(crossThreadData.get(), crossThreadDataSize), arg.metadataPayload.samplerSnapWa, (samplerDesc.addressMode == ZE_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER && samplerDesc.filterMode == ZE_SAMPLER_FILTER_MODE_NEAREST) ? std::numeric_limits::max() : 0u); NEO::patchNonPointer(ArrayRef(crossThreadData.get(), crossThreadDataSize), arg.metadataPayload.samplerAddressingMode, static_cast(getAddrMode(samplerDesc.addressMode))); NEO::patchNonPointer(ArrayRef(crossThreadData.get(), crossThreadDataSize), arg.metadataPayload.samplerNormalizedCoords, samplerDesc.isNormalized ? static_cast(SamplerPatchValues::NormalizedCoordsTrue) : static_cast(SamplerPatchValues::NormalizedCoordsFalse)); return ZE_RESULT_SUCCESS; } ze_result_t KernelImp::getKernelName(size_t *pSize, char *pName) { size_t kernelNameSize = this->kernelImmData->getDescriptor().kernelMetadata.kernelName.size() + 1; if (0 == *pSize || nullptr == pName) { *pSize = kernelNameSize; return ZE_RESULT_SUCCESS; } *pSize = std::min(*pSize, kernelNameSize); strncpy_s(pName, *pSize, this->kernelImmData->getDescriptor().kernelMetadata.kernelName.c_str(), kernelNameSize); return ZE_RESULT_SUCCESS; } ze_result_t KernelImp::getProperties(ze_kernel_properties_t *pKernelProperties) { const auto &kernelDescriptor = this->kernelImmData->getDescriptor(); pKernelProperties->numKernelArgs = static_cast(kernelDescriptor.payloadMappings.explicitArgs.size()); pKernelProperties->requiredGroupSizeX = kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0]; pKernelProperties->requiredGroupSizeY = kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1]; pKernelProperties->requiredGroupSizeZ = kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2]; pKernelProperties->requiredNumSubGroups = kernelDescriptor.kernelMetadata.compiledSubGroupsNumber; pKernelProperties->requiredSubgroupSize = kernelDescriptor.kernelMetadata.requiredSubGroupSize; pKernelProperties->maxSubgroupSize = kernelDescriptor.kernelAttributes.simdSize; pKernelProperties->localMemSize = kernelDescriptor.kernelAttributes.slmInlineSize; pKernelProperties->privateMemSize = kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize; pKernelProperties->spillMemSize = kernelDescriptor.kernelAttributes.perThreadScratchSize[0]; memset(pKernelProperties->uuid.kid, 0, ZE_MAX_KERNEL_UUID_SIZE); memset(pKernelProperties->uuid.mid, 0, ZE_MAX_MODULE_UUID_SIZE); uint32_t maxKernelWorkGroupSize = static_cast(this->module->getDevice()->getNEODevice()->getDeviceInfo().maxWorkGroupSize); pKernelProperties->maxNumSubgroups = maxKernelWorkGroupSize / kernelDescriptor.kernelAttributes.simdSize; void *pNext = pKernelProperties->pNext; while (pNext) { ze_base_desc_t *extendedProperties = reinterpret_cast(pKernelProperties->pNext); if (extendedProperties->stype == ZE_STRUCTURE_TYPE_KERNEL_PREFERRED_GROUP_SIZE_PROPERTIES) { ze_kernel_preferred_group_size_properties_t *preferredGroupSizeProperties = reinterpret_cast(extendedProperties); preferredGroupSizeProperties->preferredMultiple = this->kernelImmData->getKernelInfo()->getMaxSimdSize(); auto &hwHelper = NEO::HwHelper::get(this->module->getDevice()->getHwInfo().platform.eRenderCoreFamily); if (hwHelper.isFusedEuDispatchEnabled(this->module->getDevice()->getHwInfo(), kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion)) { preferredGroupSizeProperties->preferredMultiple *= 2; } } pNext = const_cast(extendedProperties->pNext); } return ZE_RESULT_SUCCESS; } NEO::GraphicsAllocation *KernelImp::allocatePrivateMemoryGraphicsAllocation() { auto &kernelAttributes = kernelImmData->getDescriptor().kernelAttributes; auto neoDevice = module->getDevice()->getNEODevice(); auto privateSurfaceSize = NEO::KernelHelper::getPrivateSurfaceSize(kernelAttributes.perHwThreadPrivateMemorySize, neoDevice->getDeviceInfo().computeUnitsUsedForScratch); UNRECOVERABLE_IF(privateSurfaceSize == 0); auto privateMemoryGraphicsAllocation = neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties( {neoDevice->getRootDeviceIndex(), privateSurfaceSize, NEO::AllocationType::PRIVATE_SURFACE, neoDevice->getDeviceBitfield()}); UNRECOVERABLE_IF(privateMemoryGraphicsAllocation == nullptr); return privateMemoryGraphicsAllocation; } void KernelImp::patchCrossthreadDataWithPrivateAllocation(NEO::GraphicsAllocation *privateAllocation) { auto &kernelAttributes = kernelImmData->getDescriptor().kernelAttributes; auto device = module->getDevice(); ArrayRef crossThredDataArrayRef = ArrayRef(this->crossThreadData.get(), this->crossThreadDataSize); ArrayRef surfaceStateHeapArrayRef = ArrayRef(this->surfaceStateHeapData.get(), this->surfaceStateHeapDataSize); patchWithImplicitSurface(crossThredDataArrayRef, surfaceStateHeapArrayRef, static_cast(privateAllocation->getGpuAddressToPatch()), *privateAllocation, kernelImmData->getDescriptor().payloadMappings.implicitArgs.privateMemoryAddress, *device->getNEODevice(), kernelAttributes.flags.useGlobalAtomics, device->isImplicitScalingCapable()); } ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) { this->kernelImmData = module->getKernelImmutableData(desc->pKernelName); if (this->kernelImmData == nullptr) { return ZE_RESULT_ERROR_INVALID_KERNEL_NAME; } auto isaAllocation = this->kernelImmData->getIsaGraphicsAllocation(); auto neoDevice = module->getDevice()->getNEODevice(); auto &hwInfo = neoDevice->getHardwareInfo(); auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily); auto &kernelDescriptor = kernelImmData->getDescriptor(); UNRECOVERABLE_IF(!this->kernelImmData->getKernelInfo()->heapInfo.pKernelHeap); if (isaAllocation->getAllocationType() == NEO::AllocationType::KERNEL_ISA_INTERNAL) { NEO::MemoryTransferHelper::transferMemoryToAllocation(hwHelper.isBlitCopyRequiredForLocalMemory(hwInfo, *isaAllocation), *neoDevice, isaAllocation, 0, this->kernelImmData->getKernelInfo()->heapInfo.pKernelHeap, static_cast(this->kernelImmData->getKernelInfo()->heapInfo.KernelHeapSize)); } for (const auto &argT : kernelDescriptor.payloadMappings.explicitArgs) { switch (argT.type) { default: this->kernelArgHandlers.push_back(&KernelImp::setArgUnknown); break; case NEO::ArgDescriptor::ArgTPointer: this->kernelArgHandlers.push_back(&KernelImp::setArgBuffer); break; case NEO::ArgDescriptor::ArgTImage: this->kernelArgHandlers.push_back(&KernelImp::setArgImage); break; case NEO::ArgDescriptor::ArgTSampler: this->kernelArgHandlers.push_back(&KernelImp::setArgSampler); break; case NEO::ArgDescriptor::ArgTValue: this->kernelArgHandlers.push_back(&KernelImp::setArgImmediate); break; } } slmArgSizes.resize(this->kernelArgHandlers.size(), 0); kernelArgInfos.resize(this->kernelArgHandlers.size(), {}); isArgUncached.resize(this->kernelArgHandlers.size(), 0); if (kernelImmData->getSurfaceStateHeapSize() > 0) { this->surfaceStateHeapData.reset(new uint8_t[kernelImmData->getSurfaceStateHeapSize()]); memcpy_s(this->surfaceStateHeapData.get(), kernelImmData->getSurfaceStateHeapSize(), kernelImmData->getSurfaceStateHeapTemplate(), kernelImmData->getSurfaceStateHeapSize()); this->surfaceStateHeapDataSize = kernelImmData->getSurfaceStateHeapSize(); } if (kernelDescriptor.kernelAttributes.crossThreadDataSize != 0) { this->crossThreadData.reset(new uint8_t[kernelDescriptor.kernelAttributes.crossThreadDataSize]); memcpy_s(this->crossThreadData.get(), kernelDescriptor.kernelAttributes.crossThreadDataSize, kernelImmData->getCrossThreadDataTemplate(), kernelDescriptor.kernelAttributes.crossThreadDataSize); this->crossThreadDataSize = kernelDescriptor.kernelAttributes.crossThreadDataSize; } if (kernelImmData->getDynamicStateHeapDataSize() != 0) { this->dynamicStateHeapData.reset(new uint8_t[kernelImmData->getDynamicStateHeapDataSize()]); memcpy_s(this->dynamicStateHeapData.get(), kernelImmData->getDynamicStateHeapDataSize(), kernelImmData->getDynamicStateHeapTemplate(), kernelImmData->getDynamicStateHeapDataSize()); this->dynamicStateHeapDataSize = kernelImmData->getDynamicStateHeapDataSize(); } if (kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] > 0) { auto *reqdSize = kernelDescriptor.kernelAttributes.requiredWorkgroupSize; UNRECOVERABLE_IF(reqdSize[1] == 0); UNRECOVERABLE_IF(reqdSize[2] == 0); auto result = setGroupSize(reqdSize[0], reqdSize[1], reqdSize[2]); if (result != ZE_RESULT_SUCCESS) { return result; } } else { auto result = setGroupSize(kernelDescriptor.kernelAttributes.simdSize, 1, 1); if (result != ZE_RESULT_SUCCESS) { return result; } } residencyContainer.resize(this->kernelArgHandlers.size(), nullptr); auto &kernelAttributes = kernelDescriptor.kernelAttributes; if ((kernelAttributes.perHwThreadPrivateMemorySize != 0U) && (false == module->shouldAllocatePrivateMemoryPerDispatch())) { this->privateMemoryGraphicsAllocation = allocatePrivateMemoryGraphicsAllocation(); this->patchCrossthreadDataWithPrivateAllocation(this->privateMemoryGraphicsAllocation); this->residencyContainer.push_back(this->privateMemoryGraphicsAllocation); } if (kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs) { pImplicitArgs = std::make_unique(); *pImplicitArgs = {}; pImplicitArgs->structSize = sizeof(NEO::ImplicitArgs); pImplicitArgs->structVersion = 0; pImplicitArgs->simdWidth = kernelDescriptor.kernelAttributes.simdSize; } this->createPrintfBuffer(); this->setDebugSurface(); residencyContainer.insert(residencyContainer.end(), kernelImmData->getResidencyContainer().begin(), kernelImmData->getResidencyContainer().end()); kernelHasIndirectAccess = kernelDescriptor.kernelAttributes.hasNonKernelArgLoad || kernelDescriptor.kernelAttributes.hasNonKernelArgStore || kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic; if (this->usesRayTracing()) { if (this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.rtDispatchGlobals.pointerSize > 0) { uint32_t bvhLevels = NEO::RayTracingHelper::maxBvhLevels; neoDevice->initializeRayTracing(bvhLevels); auto rtDispatchGlobals = neoDevice->getRTDispatchGlobals(bvhLevels); if (rtDispatchGlobals == nullptr) { return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY; } this->residencyContainer.push_back(neoDevice->getRTMemoryBackedBuffer()); this->residencyContainer.push_back(rtDispatchGlobals); NEO::patchPointer(ArrayRef(crossThreadData.get(), crossThreadDataSize), this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.rtDispatchGlobals, static_cast(rtDispatchGlobals->getGpuAddressToPatch())); } else { neoDevice->initializeRayTracing(0); this->residencyContainer.push_back(neoDevice->getRTMemoryBackedBuffer()); } } return ZE_RESULT_SUCCESS; } void KernelImp::createPrintfBuffer() { if (this->kernelImmData->getDescriptor().kernelAttributes.flags.usesPrintf || pImplicitArgs) { this->printfBuffer = PrintfHandler::createPrintfBuffer(this->module->getDevice()); this->residencyContainer.push_back(printfBuffer); if (this->kernelImmData->getDescriptor().kernelAttributes.flags.usesPrintf) { NEO::patchPointer(ArrayRef(crossThreadData.get(), crossThreadDataSize), this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.printfSurfaceAddress, static_cast(this->printfBuffer->getGpuAddressToPatch())); } if (pImplicitArgs) { pImplicitArgs->printfBufferPtr = printfBuffer->getGpuAddress(); } } } void KernelImp::printPrintfOutput() { PrintfHandler::printOutput(kernelImmData, this->printfBuffer, module->getDevice()); } bool KernelImp::usesSyncBuffer() { return this->kernelImmData->getDescriptor().kernelAttributes.flags.usesSyncBuffer; } void KernelImp::patchSyncBuffer(NEO::GraphicsAllocation *gfxAllocation, size_t bufferOffset) { this->residencyContainer.push_back(gfxAllocation); NEO::patchPointer(ArrayRef(crossThreadData.get(), crossThreadDataSize), this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.syncBufferAddress, static_cast(ptrOffset(gfxAllocation->getGpuAddressToPatch(), bufferOffset))); } void KernelImp::setDebugSurface() { auto device = module->getDevice(); if (module->isDebugEnabled() && device->getNEODevice()->getDebugger()) { auto surfaceStateHeapRef = ArrayRef(surfaceStateHeapData.get(), surfaceStateHeapDataSize); patchWithImplicitSurface(ArrayRef(), surfaceStateHeapRef, 0, *device->getDebugSurface(), this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.systemThreadSurfaceAddress, *device->getNEODevice(), getKernelDescriptor().kernelAttributes.flags.useGlobalAtomics, device->isImplicitScalingCapable()); } } void *KernelImp::patchBindlessSurfaceState(NEO::GraphicsAllocation *alloc, uint32_t bindless) { auto &hwHelper = NEO::HwHelper::get(this->module->getDevice()->getHwInfo().platform.eRenderCoreFamily); auto surfaceStateSize = hwHelper.getRenderSurfaceStateSize(); NEO::BindlessHeapsHelper *bindlessHeapsHelper = this->module->getDevice()->getNEODevice()->getBindlessHeapsHelper(); auto ssInHeap = bindlessHeapsHelper->allocateSSInHeap(surfaceStateSize, alloc, NEO::BindlessHeapsHelper::GLOBAL_SSH); this->residencyContainer.push_back(ssInHeap.heapAllocation); auto patchLocation = ptrOffset(getCrossThreadData(), bindless); auto patchValue = hwHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast(ssInHeap.surfaceStateOffset)); patchWithRequiredSize(const_cast(patchLocation), sizeof(patchValue), patchValue); return ssInHeap.ssPtr; } void KernelImp::patchWorkgroupSizeInCrossThreadData(uint32_t x, uint32_t y, uint32_t z) { const NEO::KernelDescriptor &desc = kernelImmData->getDescriptor(); auto dst = ArrayRef(crossThreadData.get(), crossThreadDataSize); uint32_t workgroupSize[3] = {x, y, z}; NEO::patchVecNonPointer(dst, desc.payloadMappings.dispatchTraits.localWorkSize, workgroupSize); NEO::patchVecNonPointer(dst, desc.payloadMappings.dispatchTraits.localWorkSize2, workgroupSize); NEO::patchVecNonPointer(dst, desc.payloadMappings.dispatchTraits.enqueuedLocalWorkSize, workgroupSize); if (pImplicitArgs) { pImplicitArgs->localSizeX = x; pImplicitArgs->localSizeY = y; pImplicitArgs->localSizeZ = z; } } ze_result_t KernelImp::setGlobalOffsetExp(uint32_t offsetX, uint32_t offsetY, uint32_t offsetZ) { this->globalOffsets[0] = offsetX; this->globalOffsets[1] = offsetY; this->globalOffsets[2] = offsetZ; return ZE_RESULT_SUCCESS; } void KernelImp::patchGlobalOffset() { const NEO::KernelDescriptor &desc = kernelImmData->getDescriptor(); auto dst = ArrayRef(crossThreadData.get(), crossThreadDataSize); NEO::patchVecNonPointer(dst, desc.payloadMappings.dispatchTraits.globalWorkOffset, this->globalOffsets); if (pImplicitArgs) { pImplicitArgs->globalOffsetX = globalOffsets[0]; pImplicitArgs->globalOffsetY = globalOffsets[1]; pImplicitArgs->globalOffsetZ = globalOffsets[2]; } } Kernel *Kernel::create(uint32_t productFamily, Module *module, const ze_kernel_desc_t *desc, ze_result_t *res) { UNRECOVERABLE_IF(productFamily >= IGFX_MAX_PRODUCT); KernelAllocatorFn allocator = kernelFactory[productFamily]; auto kernel = static_cast(allocator(module)); *res = kernel->initialize(desc); if (*res) { kernel->destroy(); return nullptr; } return kernel; } bool KernelImp::hasIndirectAllocationsAllowed() const { return (unifiedMemoryControls.indirectDeviceAllocationsAllowed || unifiedMemoryControls.indirectHostAllocationsAllowed || unifiedMemoryControls.indirectSharedAllocationsAllowed); } uint32_t KernelImp::getSlmTotalSize() const { return slmArgsTotalSize + getImmutableData()->getDescriptor().kernelAttributes.slmInlineSize; } ze_result_t KernelImp::setCacheConfig(ze_cache_config_flags_t flags) { cacheConfigFlags = flags; return ZE_RESULT_SUCCESS; } NEO::GraphicsAllocation *KernelImp::getIsaAllocation() const { return getImmutableData()->getIsaGraphicsAllocation(); } ze_result_t KernelImp::setSchedulingHintExp(ze_scheduling_hint_exp_desc_t *pHint) { if (pHint->flags == ZE_SCHEDULING_HINT_EXP_FLAG_OLDEST_FIRST) { this->schedulingHintExpFlag = NEO::ThreadArbitrationPolicy::AgeBased; } else if (pHint->flags == ZE_SCHEDULING_HINT_EXP_FLAG_ROUND_ROBIN) { this->schedulingHintExpFlag = NEO::ThreadArbitrationPolicy::RoundRobin; } else { this->schedulingHintExpFlag = NEO::ThreadArbitrationPolicy::RoundRobinAfterDependency; } return ZE_RESULT_SUCCESS; } int32_t KernelImp::getSchedulingHintExp() { return this->schedulingHintExpFlag; } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/kernel/kernel_imp.h000066400000000000000000000214161422164147700255320ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/thread_arbitration_policy.h" #include "shared/source/kernel/dispatch_kernel_encoder_interface.h" #include "shared/source/unified_memory/unified_memory.h" #include "level_zero/core/source/kernel/kernel.h" #include namespace L0 { struct KernelArgInfo { const void *value; uint32_t allocId; uint32_t allocIdMemoryManagerCounter; }; struct KernelImp : Kernel { KernelImp(Module *module); ~KernelImp() override; ze_result_t destroy() override { delete this; return ZE_RESULT_SUCCESS; } ze_result_t getBaseAddress(uint64_t *baseAddress) override; ze_result_t setIndirectAccess(ze_kernel_indirect_access_flags_t flags) override; ze_result_t getIndirectAccess(ze_kernel_indirect_access_flags_t *flags) override; ze_result_t getSourceAttributes(uint32_t *pSize, char **pString) override; ze_result_t getProperties(ze_kernel_properties_t *pKernelProperties) override; ze_result_t setArgumentValue(uint32_t argIndex, size_t argSize, const void *pArgValue) override; void setGroupCount(uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) override; ze_result_t setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY, uint32_t groupSizeZ) override; ze_result_t suggestGroupSize(uint32_t globalSizeX, uint32_t globalSizeY, uint32_t globalSizeZ, uint32_t *groupSizeX, uint32_t *groupSizeY, uint32_t *groupSizeZ) override; ze_result_t getKernelName(size_t *pSize, char *pName) override; ze_result_t suggestMaxCooperativeGroupCount(uint32_t *totalGroupCount, NEO::EngineGroupType engineGroupType, bool isEngineInstanced) override; const uint8_t *getCrossThreadData() const override { return crossThreadData.get(); } uint32_t getCrossThreadDataSize() const override { return crossThreadDataSize; } const std::vector &getResidencyContainer() const override { return residencyContainer; } ze_result_t setArgImmediate(uint32_t argIndex, size_t argSize, const void *argVal); ze_result_t setArgBuffer(uint32_t argIndex, size_t argSize, const void *argVal); ze_result_t setArgUnknown(uint32_t argIndex, size_t argSize, const void *argVal); ze_result_t setArgRedescribedImage(uint32_t argIndex, ze_image_handle_t argVal) override; ze_result_t setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation) override; ze_result_t setArgImage(uint32_t argIndex, size_t argSize, const void *argVal); ze_result_t setArgSampler(uint32_t argIndex, size_t argSize, const void *argVal); virtual void setBufferSurfaceState(uint32_t argIndex, void *address, NEO::GraphicsAllocation *alloc) = 0; ze_result_t initialize(const ze_kernel_desc_t *desc); const uint8_t *getPerThreadData() const override { return perThreadDataForWholeThreadGroup; } uint32_t getPerThreadDataSizeForWholeThreadGroup() const override { return perThreadDataSizeForWholeThreadGroup; } uint32_t getPerThreadDataSize() const override { return perThreadDataSize; } uint32_t getNumThreadsPerThreadGroup() const override { return numThreadsPerThreadGroup; } uint32_t getThreadExecutionMask() const override { return threadExecutionMask; } NEO::GraphicsAllocation *getPrintfBufferAllocation() override { return this->printfBuffer; } void printPrintfOutput() override; bool usesSyncBuffer() override; void patchSyncBuffer(NEO::GraphicsAllocation *gfxAllocation, size_t bufferOffset) override; const uint8_t *getSurfaceStateHeapData() const override { return surfaceStateHeapData.get(); } uint32_t getSurfaceStateHeapDataSize() const override { return surfaceStateHeapDataSize; } const uint8_t *getDynamicStateHeapData() const override { return dynamicStateHeapData.get(); } const KernelImmutableData *getImmutableData() const override { return kernelImmData; } UnifiedMemoryControls getUnifiedMemoryControls() const override { return unifiedMemoryControls; } bool hasIndirectAllocationsAllowed() const override; const NEO::KernelDescriptor &getKernelDescriptor() const override { return kernelImmData->getDescriptor(); } const uint32_t *getGroupSize() const override { return groupSize; } uint32_t getSlmTotalSize() const override; NEO::SlmPolicy getSlmPolicy() const override { if (cacheConfigFlags & ZE_CACHE_CONFIG_FLAG_LARGE_SLM) { return NEO::SlmPolicy::SlmPolicyLargeSlm; } else if (cacheConfigFlags & ZE_CACHE_CONFIG_FLAG_LARGE_DATA) { return NEO::SlmPolicy::SlmPolicyLargeData; } else { return NEO::SlmPolicy::SlmPolicyNone; } } NEO::GraphicsAllocation *getIsaAllocation() const override; uint32_t getRequiredWorkgroupOrder() const override { return requiredWorkgroupOrder; } bool requiresGenerationOfLocalIdsByRuntime() const override { return kernelRequiresGenerationOfLocalIdsByRuntime; } bool getKernelRequiresUncachedMocs() { return (kernelRequiresUncachedMocsCount > 0); } bool getKernelRequiresQueueUncachedMocs() { return (kernelRequiresQueueUncachedMocsCount > 0); } void setKernelArgUncached(uint32_t index, bool val) { isArgUncached[index] = val; } uint32_t *getGlobalOffsets() override { return this->globalOffsets; } ze_result_t setGlobalOffsetExp(uint32_t offsetX, uint32_t offsetY, uint32_t offsetZ) override; void patchGlobalOffset() override; ze_result_t setCacheConfig(ze_cache_config_flags_t flags) override; bool usesRayTracing() { return kernelImmData->getDescriptor().hasRTCalls(); } ze_result_t getProfileInfo(zet_profile_properties_t *pProfileProperties) override { pProfileProperties->flags = 0; pProfileProperties->numTokens = 0; return ZE_RESULT_SUCCESS; } bool hasIndirectAccess() { return kernelHasIndirectAccess; } NEO::GraphicsAllocation *allocatePrivateMemoryGraphicsAllocation() override; void patchCrossthreadDataWithPrivateAllocation(NEO::GraphicsAllocation *privateAllocation) override; NEO::GraphicsAllocation *getPrivateMemoryGraphicsAllocation() override { return privateMemoryGraphicsAllocation; } ze_result_t setSchedulingHintExp(ze_scheduling_hint_exp_desc_t *pHint) override; int32_t getSchedulingHintExp() override; NEO::ImplicitArgs *getImplicitArgs() const override { return pImplicitArgs.get(); } protected: KernelImp() = default; void patchWorkgroupSizeInCrossThreadData(uint32_t x, uint32_t y, uint32_t z); NEO::GraphicsAllocation *privateMemoryGraphicsAllocation = nullptr; void createPrintfBuffer(); void setDebugSurface(); virtual void evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor) = 0; void *patchBindlessSurfaceState(NEO::GraphicsAllocation *alloc, uint32_t bindless); const KernelImmutableData *kernelImmData = nullptr; Module *module = nullptr; typedef ze_result_t (KernelImp::*KernelArgHandler)(uint32_t argIndex, size_t argSize, const void *argVal); std::vector kernelArgInfos; std::vector kernelArgHandlers; std::vector residencyContainer; NEO::GraphicsAllocation *printfBuffer = nullptr; uint32_t groupSize[3] = {0u, 0u, 0u}; uint32_t numThreadsPerThreadGroup = 1u; uint32_t threadExecutionMask = 0u; std::unique_ptr crossThreadData = nullptr; uint32_t crossThreadDataSize = 0; std::unique_ptr surfaceStateHeapData = nullptr; uint32_t surfaceStateHeapDataSize = 0; std::unique_ptr dynamicStateHeapData = nullptr; uint32_t dynamicStateHeapDataSize = 0; uint8_t *perThreadDataForWholeThreadGroup = nullptr; uint32_t perThreadDataSizeForWholeThreadGroupAllocated = 0; uint32_t perThreadDataSizeForWholeThreadGroup = 0u; uint32_t perThreadDataSize = 0u; UnifiedMemoryControls unifiedMemoryControls; std::vector slmArgSizes; uint32_t slmArgsTotalSize = 0U; uint32_t requiredWorkgroupOrder = 0u; bool kernelRequiresGenerationOfLocalIdsByRuntime = true; uint32_t kernelRequiresUncachedMocsCount = false; uint32_t kernelRequiresQueueUncachedMocsCount = false; std::vector isArgUncached; uint32_t globalOffsets[3] = {}; ze_cache_config_flags_t cacheConfigFlags = 0u; bool kernelHasIndirectAccess = true; int32_t schedulingHintExpFlag = NEO::ThreadArbitrationPolicy::NotPresent; std::unique_ptr pImplicitArgs; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/kernel/sampler_patch_values.h000066400000000000000000000023161422164147700276040ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/debug_helpers.h" #include namespace L0 { enum class SamplerPatchValues : uint32_t { DefaultSampler = 0x00, AddressNone = 0x00, AddressClampToBorder = 0x01, AddressClampToEdge = 0x02, AddressRepeat = 0x03, AddressMirroredRepeat = 0x04, AddressMirroredRepeat101 = 0x05, NormalizedCoordsFalse = 0x00, NormalizedCoordsTrue = 0x08 }; inline SamplerPatchValues getAddrMode(ze_sampler_address_mode_t addressingMode) { switch (addressingMode) { case ZE_SAMPLER_ADDRESS_MODE_REPEAT: return SamplerPatchValues::AddressRepeat; case ZE_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER: return SamplerPatchValues::AddressClampToBorder; case ZE_SAMPLER_ADDRESS_MODE_CLAMP: return SamplerPatchValues::AddressClampToEdge; case ZE_SAMPLER_ADDRESS_MODE_NONE: return SamplerPatchValues::AddressNone; case ZE_SAMPLER_ADDRESS_MODE_MIRROR: return SamplerPatchValues::AddressMirroredRepeat; default: DEBUG_BREAK_IF(true); } return SamplerPatchValues::AddressNone; } } // namespace L0compute-runtime-22.14.22890/level_zero/core/source/linux/000077500000000000000000000000001422164147700231075ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/linux/CMakeLists.txt000066400000000000000000000005361422164147700256530ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(UNIX) set(L0_SOURCES_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/registry_path.cpp ${CMAKE_CURRENT_SOURCE_DIR}/driver_teardown.cpp ) set_property(GLOBAL PROPERTY L0_SOURCES_LINUX ${L0_SOURCES_LINUX}) endif() compute-runtime-22.14.22890/level_zero/core/source/linux/driver_teardown.cpp000066400000000000000000000005141422164147700270110ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/driver/driver_handle_imp.h" using namespace L0; void __attribute__((destructor)) driverHandleDestructor() { if (GlobalDriver != nullptr) { delete GlobalDriver; GlobalDriver = nullptr; } }compute-runtime-22.14.22890/level_zero/core/source/linux/registry_path.cpp000066400000000000000000000002131422164147700264730ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ namespace L0 { const char *registryPath = ""; } compute-runtime-22.14.22890/level_zero/core/source/memory/000077500000000000000000000000001422164147700232605ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/memory/cpu_page_fault_memory_manager.cpp000066400000000000000000000063221422164147700320270ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/page_fault_manager/cpu_page_fault_manager.h" #include "level_zero/core/source/cmdlist/cmdlist.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/device/device_imp.h" #include "level_zero/core/source/driver/driver_handle_imp.h" namespace NEO { void PageFaultManager::transferToCpu(void *ptr, size_t size, void *device) { L0::DeviceImp *deviceImp = static_cast(device); NEO::SvmAllocationData *allocData = deviceImp->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr); UNRECOVERABLE_IF(allocData == nullptr); auto ret = deviceImp->pageFaultCommandList->appendPageFaultCopy(allocData->cpuAllocation, allocData->gpuAllocations.getGraphicsAllocation(deviceImp->getRootDeviceIndex()), allocData->size, true); UNRECOVERABLE_IF(ret); } void PageFaultManager::transferToGpu(void *ptr, void *device) { L0::DeviceImp *deviceImp = static_cast(device); NEO::SvmAllocationData *allocData = deviceImp->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr); UNRECOVERABLE_IF(allocData == nullptr); auto ret = deviceImp->pageFaultCommandList->appendPageFaultCopy(allocData->gpuAllocations.getGraphicsAllocation(deviceImp->getRootDeviceIndex()), allocData->cpuAllocation, allocData->size, false); UNRECOVERABLE_IF(ret); this->evictMemoryAfterImplCopy(allocData->cpuAllocation, deviceImp->getNEODevice()); } } // namespace NEO namespace L0 { void handleGpuDomainTransferForHwWithHints(NEO::PageFaultManager *pageFaultHandler, void *allocPtr, NEO::PageFaultManager::PageFaultData &pageFaultData) { bool migration = true; if (pageFaultData.domain == NEO::PageFaultManager::AllocationDomain::Gpu) { L0::DeviceImp *deviceImp = static_cast(pageFaultData.cmdQ); NEO::SvmAllocationData *allocData = deviceImp->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(allocPtr); if (deviceImp->memAdviseSharedAllocations.find(allocData) != deviceImp->memAdviseSharedAllocations.end()) { if (deviceImp->memAdviseSharedAllocations[allocData].read_only && deviceImp->memAdviseSharedAllocations[allocData].device_preferred_location) { migration = false; deviceImp->memAdviseSharedAllocations[allocData].cpu_migration_blocked = 1; } } if (migration) { if (NEO::DebugManager.flags.PrintUmdSharedMigration.get()) { printf("UMD transferring shared allocation %llx from GPU to CPU\n", reinterpret_cast(allocPtr)); } pageFaultHandler->transferToCpu(allocPtr, pageFaultData.size, pageFaultData.cmdQ); } } if (migration) { pageFaultData.domain = NEO::PageFaultManager::AllocationDomain::Cpu; } pageFaultHandler->allowCPUMemoryAccess(allocPtr, pageFaultData.size); } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/memory/memory_operations_helper.h000066400000000000000000000017641422164147700305530ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/memory_manager/memory_operations_status.h" #include static ze_result_t changeMemoryOperationStatusToL0ResultType(NEO::MemoryOperationsStatus status) { switch (status) { case NEO::MemoryOperationsStatus::SUCCESS: return ZE_RESULT_SUCCESS; case NEO::MemoryOperationsStatus::MEMORY_NOT_FOUND: return ZE_RESULT_ERROR_INVALID_ARGUMENT; case NEO::MemoryOperationsStatus::OUT_OF_MEMORY: return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY; case NEO::MemoryOperationsStatus::FAILED: return ZE_RESULT_ERROR_DEVICE_LOST; case NEO::MemoryOperationsStatus::DEVICE_UNINITIALIZED: return ZE_RESULT_ERROR_UNINITIALIZED; case NEO::MemoryOperationsStatus::UNSUPPORTED: return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; default: DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } } compute-runtime-22.14.22890/level_zero/core/source/memory_manager/000077500000000000000000000000001422164147700247525ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/memory_manager/compression_selector_l0.cpp000066400000000000000000000013561422164147700323170ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/memory_manager/compression_selector.h" namespace NEO { bool CompressionSelector::preferCompressedAllocation(const AllocationProperties &properties, const HardwareInfo &hwInfo) { bool preferredCompression = false; int32_t compressionEnabled = DebugManager.flags.EnableUsmCompression.get(); if (compressionEnabled == 1) { if ((properties.allocationType == AllocationType::SVM_GPU) || (properties.flags.isUSMDeviceAllocation)) { preferredCompression = true; } } return preferredCompression; } } // namespace NEO compute-runtime-22.14.22890/level_zero/core/source/module/000077500000000000000000000000001422164147700232355ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/module/module.h000066400000000000000000000046041422164147700246770ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/context/context.h" #include "level_zero/core/source/kernel/kernel.h" #include "level_zero/core/source/module/module_build_log.h" #include #include #include struct _ze_module_handle_t {}; namespace L0 { struct Device; enum class ModuleType { Builtin, User }; struct Module : _ze_module_handle_t { static Module *create(Device *device, const ze_module_desc_t *desc, ModuleBuildLog *moduleBuildLog, ModuleType type); virtual ~Module() = default; virtual Device *getDevice() const = 0; virtual ze_result_t createKernel(const ze_kernel_desc_t *desc, ze_kernel_handle_t *phFunction) = 0; virtual ze_result_t destroy() = 0; virtual ze_result_t getNativeBinary(size_t *pSize, uint8_t *pModuleNativeBinary) = 0; virtual ze_result_t getFunctionPointer(const char *pKernelName, void **pfnFunction) = 0; virtual ze_result_t getGlobalPointer(const char *pGlobalName, size_t *pSize, void **pPtr) = 0; virtual ze_result_t getDebugInfo(size_t *pDebugDataSize, uint8_t *pDebugData) = 0; virtual ze_result_t getKernelNames(uint32_t *pCount, const char **pNames) = 0; virtual ze_result_t getProperties(ze_module_properties_t *pModuleProperties) = 0; virtual ze_result_t performDynamicLink(uint32_t numModules, ze_module_handle_t *phModules, ze_module_build_log_handle_t *phLinkLog) = 0; virtual const KernelImmutableData *getKernelImmutableData(const char *functionName) const = 0; virtual const std::vector> &getKernelImmutableDataVector() const = 0; virtual uint32_t getMaxGroupSize() const = 0; virtual bool isDebugEnabled() const = 0; virtual bool shouldAllocatePrivateMemoryPerDispatch() const = 0; virtual void checkIfPrivateMemoryPerDispatchIsNeeded() = 0; Module() = default; Module(const Module &) = delete; Module(Module &&) = delete; Module &operator=(const Module &) = delete; Module &operator=(Module &&) = delete; static Module *fromHandle(ze_module_handle_t handle) { return static_cast(handle); } inline ze_module_handle_t toHandle() { return this; } }; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/module/module_build_log.cpp000066400000000000000000000031221422164147700272440ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/string.h" #include "level_zero/core/source/module/module.h" #include namespace L0 { struct ModuleBuildLogImp : public ModuleBuildLog { ModuleBuildLogImp() {} ~ModuleBuildLogImp() override {} ze_result_t destroy() override { delete this; return ZE_RESULT_SUCCESS; } ze_result_t getString(size_t *pSize, char *pBuildLog) override { const size_t requiredSize{buildLog.size() + 1}; if (pBuildLog == nullptr) { *pSize = requiredSize; return ZE_RESULT_SUCCESS; } if (*pSize < requiredSize) { return ZE_RESULT_ERROR_INVALID_SIZE; } memcpy_s(pBuildLog, *pSize, buildLog.c_str(), buildLog.size()); pBuildLog[buildLog.size()] = '\0'; *pSize = requiredSize; return ZE_RESULT_SUCCESS; } void appendString(const char *pBuildLog, size_t size) override { if ((pBuildLog == nullptr) || (size == 0) || (pBuildLog[0] == '\0')) return; if (pBuildLog[size - 1] == '\0') --size; if (this->buildLog.length() != 0) this->buildLog.append("\n"); this->buildLog.append(pBuildLog, size); } protected: std::string buildLog; }; ModuleBuildLog *ModuleBuildLog::create() { auto moduleBuildLog = new ModuleBuildLogImp(); UNRECOVERABLE_IF(moduleBuildLog == nullptr); return moduleBuildLog; } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/module/module_build_log.h000066400000000000000000000017641422164147700267230ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include struct _ze_module_build_log_handle_t {}; namespace L0 { struct Module; struct ModuleBuildLog : _ze_module_build_log_handle_t { static ModuleBuildLog *create(); virtual ~ModuleBuildLog() = default; virtual ze_result_t destroy() = 0; virtual ze_result_t getString(size_t *pSize, char *pBuildLog) = 0; virtual void appendString(const char *pBuildLog, size_t size) = 0; ModuleBuildLog() = default; ModuleBuildLog(const ModuleBuildLog &) = delete; ModuleBuildLog(ModuleBuildLog &&) = delete; ModuleBuildLog &operator=(const ModuleBuildLog &) = delete; ModuleBuildLog &operator=(ModuleBuildLog &&) = delete; static ModuleBuildLog *fromHandle(ze_module_build_log_handle_t handle) { return static_cast(handle); } inline ze_module_build_log_handle_t toHandle() { return this; } }; } // namespace L0compute-runtime-22.14.22890/level_zero/core/source/module/module_extra_options.cpp000066400000000000000000000004331422164147700302040ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/module/module_imp.h" namespace L0 { void ModuleImp::createBuildExtraOptions(std::string &apiOptions, std::string &nternalBuildOptions) { } } // namespace L0compute-runtime-22.14.22890/level_zero/core/source/module/module_imp.cpp000066400000000000000000001642231422164147700261030ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/module/module_imp.h" #include "shared/source/compiler_interface/compiler_warnings/compiler_warnings.h" #include "shared/source/compiler_interface/intermediate_representations.h" #include "shared/source/compiler_interface/linker.h" #include "shared/source/device/device.h" #include "shared/source/device_binary_format/debug_zebin.h" #include "shared/source/device_binary_format/device_binary_formats.h" #include "shared/source/device_binary_format/elf/elf.h" #include "shared/source/device_binary_format/elf/elf_encoder.h" #include "shared/source/device_binary_format/elf/ocl_elf.h" #include "shared/source/helpers/api_specific_config.h" #include "shared/source/helpers/compiler_hw_info_config.h" #include "shared/source/helpers/constants.h" #include "shared/source/helpers/kernel_helpers.h" #include "shared/source/helpers/string.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/memory_operations_handler.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/source/program/kernel_info.h" #include "shared/source/program/program_initialization.h" #include "shared/source/source_level_debugger/source_level_debugger.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/kernel/kernel.h" #include "level_zero/core/source/module/module_build_log.h" #include "compiler_options.h" #include "program_debug_data.h" #include #include #include namespace L0 { namespace BuildOptions { NEO::ConstStringRef optDisable = "-ze-opt-disable"; NEO::ConstStringRef optLevel = "-ze-opt-level"; NEO::ConstStringRef greaterThan4GbRequired = "-ze-opt-greater-than-4GB-buffer-required"; NEO::ConstStringRef hasBufferOffsetArg = "-ze-intel-has-buffer-offset-arg"; NEO::ConstStringRef debugKernelEnable = "-ze-kernel-debug-enable"; } // namespace BuildOptions ModuleTranslationUnit::ModuleTranslationUnit(L0::Device *device) : device(device) { } ModuleTranslationUnit::~ModuleTranslationUnit() { if (globalConstBuffer) { auto svmAllocsManager = device->getDriverHandle()->getSvmAllocsManager(); if (svmAllocsManager->getSVMAlloc(reinterpret_cast(globalConstBuffer->getGpuAddress()))) { svmAllocsManager->freeSVMAlloc(reinterpret_cast(globalConstBuffer->getGpuAddress())); } else { this->device->getNEODevice()->getExecutionEnvironment()->memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(globalConstBuffer); } } if (globalVarBuffer) { auto svmAllocsManager = device->getDriverHandle()->getSvmAllocsManager(); if (svmAllocsManager->getSVMAlloc(reinterpret_cast(globalVarBuffer->getGpuAddress()))) { svmAllocsManager->freeSVMAlloc(reinterpret_cast(globalVarBuffer->getGpuAddress())); } else { this->device->getNEODevice()->getExecutionEnvironment()->memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(globalVarBuffer); } } if (this->debugData != nullptr) { for (std::vector::iterator iter = alignedvIsas.begin(); iter != alignedvIsas.end(); ++iter) { alignedFree(static_cast(*iter)); } } } std::vector ModuleTranslationUnit::generateElfFromSpirV(std::vector inputSpirVs, std::vector inputModuleSizes) { NEO::Elf::ElfEncoder<> elfEncoder(true, false, 1U); elfEncoder.getElfFileHeader().type = NEO::Elf::ET_OPENCL_OBJECTS; StackVec specConstIds; StackVec specConstValues; for (uint32_t i = 0; i < static_cast(inputSpirVs.size()); i++) { if (specConstantsValues.size() > 0) { specConstIds.clear(); specConstValues.clear(); specConstIds.reserve(specConstantsValues.size()); specConstValues.reserve(specConstantsValues.size()); for (const auto &specConst : specConstantsValues) { specConstIds.push_back(specConst.first); specConstValues.push_back(specConst.second); } elfEncoder.appendSection(NEO::Elf::SHT_OPENCL_SPIRV_SC_IDS, NEO::Elf::SectionNamesOpenCl::spirvSpecConstIds, ArrayRef::fromAny(specConstIds.begin(), specConstIds.size())); elfEncoder.appendSection(NEO::Elf::SHT_OPENCL_SPIRV_SC_VALUES, NEO::Elf::SectionNamesOpenCl::spirvSpecConstValues, ArrayRef::fromAny(specConstValues.begin(), specConstValues.size())); } auto sectionType = NEO::Elf::SHT_OPENCL_SPIRV; NEO::ConstStringRef sectionName = NEO::Elf::SectionNamesOpenCl::spirvObject; elfEncoder.appendSection(sectionType, sectionName, ArrayRef(reinterpret_cast(inputSpirVs[i]), inputModuleSizes[i])); } return elfEncoder.encode(); } std::string ModuleTranslationUnit::generateCompilerOptions(const char *buildOptions, const char *internalBuildOptions) { if (nullptr != buildOptions) { options = buildOptions; } std::string internalOptions = NEO::CompilerOptions::concatenate(internalBuildOptions, BuildOptions::hasBufferOffsetArg); if (device->getNEODevice()->getDeviceInfo().debuggerActive) { if (NEO::SourceLevelDebugger::shouldAppendOptDisable(*device->getSourceLevelDebugger())) { NEO::CompilerOptions::concatenateAppend(options, BuildOptions::optDisable); } options = NEO::CompilerOptions::concatenate(options, NEO::CompilerOptions::generateDebugInfo); internalOptions = NEO::CompilerOptions::concatenate(internalOptions, BuildOptions::debugKernelEnable); } if (NEO::DebugManager.flags.DisableStatelessToStatefulOptimization.get() || device->getNEODevice()->areSharedSystemAllocationsAllowed()) { internalOptions = NEO::CompilerOptions::concatenate(internalOptions, NEO::CompilerOptions::greaterThan4gbBuffersRequired); } return internalOptions; } bool ModuleTranslationUnit::processSpecConstantInfo(NEO::CompilerInterface *compilerInterface, const ze_module_constants_t *pConstants, const char *input, uint32_t inputSize) { if (pConstants) { NEO::SpecConstantInfo specConstInfo; auto retVal = compilerInterface->getSpecConstantsInfo(*device->getNEODevice(), ArrayRef(input, inputSize), specConstInfo); if (retVal != NEO::TranslationOutput::ErrorCode::Success) { return false; } for (uint32_t i = 0; i < pConstants->numConstants; i++) { uint64_t specConstantValue = 0; uint32_t specConstantId = pConstants->pConstantIds[i]; auto atributeSize = 0u; uint32_t j; for (j = 0; j < specConstInfo.sizesBuffer->GetSize(); j++) { if (specConstantId == specConstInfo.idsBuffer->GetMemory()[j]) { atributeSize = specConstInfo.sizesBuffer->GetMemory()[j]; break; } } if (j == specConstInfo.sizesBuffer->GetSize()) { return false; } memcpy_s(&specConstantValue, sizeof(uint64_t), const_cast(pConstants->pConstantValues[i]), atributeSize); specConstantsValues[specConstantId] = specConstantValue; } } return true; } bool ModuleTranslationUnit::compileGenBinary(NEO::TranslationInput inputArgs, bool staticLink) { auto compilerInterface = device->getNEODevice()->getCompilerInterface(); UNRECOVERABLE_IF(nullptr == compilerInterface); inputArgs.specializedValues = this->specConstantsValues; NEO::TranslationOutput compilerOuput = {}; NEO::TranslationOutput::ErrorCode compilerErr; if (staticLink) { compilerErr = compilerInterface->link(*device->getNEODevice(), inputArgs, compilerOuput); } else { compilerErr = compilerInterface->build(*device->getNEODevice(), inputArgs, compilerOuput); } this->updateBuildLog(compilerOuput.frontendCompilerLog); this->updateBuildLog(compilerOuput.backendCompilerLog); if (NEO::TranslationOutput::ErrorCode::Success != compilerErr) { return false; } this->irBinary = std::move(compilerOuput.intermediateRepresentation.mem); this->irBinarySize = compilerOuput.intermediateRepresentation.size; this->unpackedDeviceBinary = std::move(compilerOuput.deviceBinary.mem); this->unpackedDeviceBinarySize = compilerOuput.deviceBinary.size; this->debugData = std::move(compilerOuput.debugData.mem); this->debugDataSize = compilerOuput.debugData.size; return processUnpackedBinary(); } bool ModuleTranslationUnit::staticLinkSpirV(std::vector inputSpirVs, std::vector inputModuleSizes, const char *buildOptions, const char *internalBuildOptions, std::vector specConstants) { auto compilerInterface = device->getNEODevice()->getCompilerInterface(); UNRECOVERABLE_IF(nullptr == compilerInterface); std::string internalOptions = this->generateCompilerOptions(buildOptions, internalBuildOptions); for (uint32_t i = 0; i < static_cast(specConstants.size()); i++) { auto specConstantResult = this->processSpecConstantInfo(compilerInterface, specConstants[i], inputSpirVs[i], inputModuleSizes[i]); if (!specConstantResult) { return false; } } NEO::TranslationInput linkInputArgs = {IGC::CodeType::elf, IGC::CodeType::oclGenBin}; auto spirvElfSource = generateElfFromSpirV(inputSpirVs, inputModuleSizes); linkInputArgs.src = ArrayRef(reinterpret_cast(spirvElfSource.data()), spirvElfSource.size()); linkInputArgs.apiOptions = ArrayRef(options.c_str(), options.length()); linkInputArgs.internalOptions = ArrayRef(internalOptions.c_str(), internalOptions.length()); return this->compileGenBinary(linkInputArgs, true); } bool ModuleTranslationUnit::buildFromSpirV(const char *input, uint32_t inputSize, const char *buildOptions, const char *internalBuildOptions, const ze_module_constants_t *pConstants) { auto compilerInterface = device->getNEODevice()->getCompilerInterface(); UNRECOVERABLE_IF(nullptr == compilerInterface); std::string internalOptions = this->generateCompilerOptions(buildOptions, internalBuildOptions); auto specConstantResult = this->processSpecConstantInfo(compilerInterface, pConstants, input, inputSize); if (!specConstantResult) return false; NEO::TranslationInput inputArgs = {IGC::CodeType::spirV, IGC::CodeType::oclGenBin}; inputArgs.src = ArrayRef(input, inputSize); inputArgs.apiOptions = ArrayRef(options.c_str(), options.length()); inputArgs.internalOptions = ArrayRef(internalOptions.c_str(), internalOptions.length()); return this->compileGenBinary(inputArgs, false); } bool ModuleTranslationUnit::createFromNativeBinary(const char *input, size_t inputSize) { UNRECOVERABLE_IF((nullptr == device) || (nullptr == device->getNEODevice())); auto productAbbreviation = NEO::hardwarePrefix[device->getNEODevice()->getHardwareInfo().platform.eProductFamily]; auto copyHwInfo = device->getNEODevice()->getHardwareInfo(); const auto &compilerHwInfoConfig = *NEO::CompilerHwInfoConfig::get(copyHwInfo.platform.eProductFamily); compilerHwInfoConfig.adjustHwInfoForIgc(copyHwInfo); NEO::TargetDevice targetDevice = NEO::targetDeviceFromHwInfo(copyHwInfo); std::string decodeErrors; std::string decodeWarnings; ArrayRef archive(reinterpret_cast(input), inputSize); auto singleDeviceBinary = unpackSingleDeviceBinary(archive, NEO::ConstStringRef(productAbbreviation, strlen(productAbbreviation)), targetDevice, decodeErrors, decodeWarnings); if (decodeWarnings.empty() == false) { PRINT_DEBUG_STRING(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "%s\n", decodeWarnings.c_str()); } if (singleDeviceBinary.intermediateRepresentation.empty() && singleDeviceBinary.deviceBinary.empty()) { PRINT_DEBUG_STRING(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "%s\n", decodeErrors.c_str()); return false; } else { this->irBinary = makeCopy(reinterpret_cast(singleDeviceBinary.intermediateRepresentation.begin()), singleDeviceBinary.intermediateRepresentation.size()); this->irBinarySize = singleDeviceBinary.intermediateRepresentation.size(); this->options = singleDeviceBinary.buildOptions.str(); if (false == singleDeviceBinary.debugData.empty()) { this->debugData = makeCopy(reinterpret_cast(singleDeviceBinary.debugData.begin()), singleDeviceBinary.debugData.size()); this->debugDataSize = singleDeviceBinary.debugData.size(); } bool rebuild = NEO::DebugManager.flags.RebuildPrecompiledKernels.get() && irBinarySize != 0; if ((false == singleDeviceBinary.deviceBinary.empty()) && (false == rebuild)) { this->unpackedDeviceBinary = makeCopy(reinterpret_cast(singleDeviceBinary.deviceBinary.begin()), singleDeviceBinary.deviceBinary.size()); this->unpackedDeviceBinarySize = singleDeviceBinary.deviceBinary.size(); this->packedDeviceBinary = makeCopy(reinterpret_cast(archive.begin()), archive.size()); this->packedDeviceBinarySize = archive.size(); } } if (nullptr == this->unpackedDeviceBinary) { if (!shouldSuppressRebuildWarning) { updateBuildLog(NEO::CompilerWarnings::recompiledFromIr.str()); } return buildFromSpirV(this->irBinary.get(), static_cast(this->irBinarySize), this->options.c_str(), "", nullptr); } else { return processUnpackedBinary(); } } bool ModuleTranslationUnit::processUnpackedBinary() { if (0 == unpackedDeviceBinarySize) { return false; } auto blob = ArrayRef(reinterpret_cast(this->unpackedDeviceBinary.get()), this->unpackedDeviceBinarySize); NEO::SingleDeviceBinary binary = {}; binary.deviceBinary = blob; binary.targetDevice = NEO::targetDeviceFromHwInfo(device->getHwInfo()); std::string decodeErrors; std::string decodeWarnings; NEO::DecodeError decodeError; NEO::DeviceBinaryFormat singleDeviceBinaryFormat; std::tie(decodeError, singleDeviceBinaryFormat) = NEO::decodeSingleDeviceBinary(programInfo, binary, decodeErrors, decodeWarnings); if (decodeWarnings.empty() == false) { PRINT_DEBUG_STRING(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "%s\n", decodeWarnings.c_str()); } if (NEO::DecodeError::Success != decodeError) { PRINT_DEBUG_STRING(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "%s\n", decodeErrors.c_str()); return false; } if (programInfo.decodedElf.elfFileHeader) { NEO::LinkerInput::SectionNameToSegmentIdMap nameToKernelId; uint32_t id = 0; for (auto &kernelInfo : this->programInfo.kernelInfos) { nameToKernelId[kernelInfo->kernelDescriptor.kernelMetadata.kernelName] = id; id++; } programInfo.prepareLinkerInputStorage(); programInfo.linkerInput->decodeElfSymbolTableAndRelocations(programInfo.decodedElf, nameToKernelId); } processDebugData(); size_t slmNeeded = NEO::getMaxInlineSlmNeeded(programInfo); size_t slmAvailable = 0U; NEO::DeviceInfoKernelPayloadConstants deviceInfoConstants; slmAvailable = static_cast(device->getDeviceInfo().localMemSize); deviceInfoConstants.maxWorkGroupSize = static_cast(device->getDeviceInfo().maxWorkGroupSize); deviceInfoConstants.computeUnitsUsedForScratch = static_cast(device->getDeviceInfo().computeUnitsUsedForScratch); deviceInfoConstants.slmWindowSize = static_cast(device->getDeviceInfo().localMemSize); if (NEO::requiresLocalMemoryWindowVA(programInfo)) { deviceInfoConstants.slmWindow = device->getNEODevice()->getExecutionEnvironment()->memoryManager->getReservedMemory(MemoryConstants::slmWindowSize, MemoryConstants::slmWindowAlignment); } if (slmNeeded > slmAvailable) { return false; } auto svmAllocsManager = device->getDriverHandle()->getSvmAllocsManager(); if (programInfo.globalConstants.size != 0) { this->globalConstBuffer = NEO::allocateGlobalsSurface(svmAllocsManager, *device->getNEODevice(), programInfo.globalConstants.size, true, programInfo.linkerInput.get(), programInfo.globalConstants.initData); } if (programInfo.globalVariables.size != 0) { this->globalVarBuffer = NEO::allocateGlobalsSurface(svmAllocsManager, *device->getNEODevice(), programInfo.globalVariables.size, false, programInfo.linkerInput.get(), programInfo.globalVariables.initData); } for (auto &kernelInfo : this->programInfo.kernelInfos) { kernelInfo->apply(deviceInfoConstants); } if (this->packedDeviceBinary != nullptr) { return true; } NEO::SingleDeviceBinary singleDeviceBinary = {}; singleDeviceBinary.targetDevice = NEO::targetDeviceFromHwInfo(device->getNEODevice()->getHardwareInfo()); singleDeviceBinary.buildOptions = this->options; singleDeviceBinary.deviceBinary = ArrayRef(reinterpret_cast(this->unpackedDeviceBinary.get()), this->unpackedDeviceBinarySize); singleDeviceBinary.intermediateRepresentation = ArrayRef(reinterpret_cast(this->irBinary.get()), this->irBinarySize); singleDeviceBinary.debugData = ArrayRef(reinterpret_cast(this->debugData.get()), this->debugDataSize); std::string packWarnings; std::string packErrors; auto packedDeviceBinary = NEO::packDeviceBinary(singleDeviceBinary, packErrors, packWarnings); if (packedDeviceBinary.empty()) { DEBUG_BREAK_IF(true); return false; } this->packedDeviceBinary = makeCopy(packedDeviceBinary.data(), packedDeviceBinary.size()); this->packedDeviceBinarySize = packedDeviceBinary.size(); return true; } void ModuleTranslationUnit::updateBuildLog(const std::string &newLogEntry) { if (newLogEntry.empty() || ('\0' == newLogEntry[0])) { return; } buildLog += newLogEntry.c_str(); if ('\n' != *buildLog.rbegin()) { buildLog.append("\n"); } } void ModuleTranslationUnit::processDebugData() { if (this->debugData != nullptr) { iOpenCL::SProgramDebugDataHeaderIGC *programDebugHeader = reinterpret_cast(debugData.get()); DEBUG_BREAK_IF(programDebugHeader->NumberOfKernels != programInfo.kernelInfos.size()); const iOpenCL::SKernelDebugDataHeaderIGC *kernelDebugHeader = reinterpret_cast( ptrOffset(programDebugHeader, sizeof(iOpenCL::SProgramDebugDataHeaderIGC))); const char *kernelName = nullptr; const char *kernelDebugData = nullptr; for (uint32_t i = 0; i < programDebugHeader->NumberOfKernels; i++) { kernelName = reinterpret_cast(ptrOffset(kernelDebugHeader, sizeof(iOpenCL::SKernelDebugDataHeaderIGC))); auto kernelInfo = programInfo.kernelInfos[i]; UNRECOVERABLE_IF(kernelInfo->kernelDescriptor.kernelMetadata.kernelName.compare(0, kernelInfo->kernelDescriptor.kernelMetadata.kernelName.size(), kernelName) != 0); kernelDebugData = ptrOffset(kernelName, kernelDebugHeader->KernelNameSize); kernelInfo->kernelDescriptor.external.debugData = std::make_unique(); char *alignedAlloc = static_cast(alignedMalloc(kernelDebugHeader->SizeVisaDbgInBytes, MemoryConstants::pageSize)); memcpy_s(static_cast(alignedAlloc), kernelDebugHeader->SizeVisaDbgInBytes, kernelDebugData, kernelDebugHeader->SizeVisaDbgInBytes); kernelInfo->kernelDescriptor.external.debugData->vIsa = alignedAlloc; kernelInfo->kernelDescriptor.external.debugData->genIsa = ptrOffset(kernelDebugData, kernelDebugHeader->SizeVisaDbgInBytes); kernelInfo->kernelDescriptor.external.debugData->vIsaSize = kernelDebugHeader->SizeVisaDbgInBytes; kernelInfo->kernelDescriptor.external.debugData->genIsaSize = kernelDebugHeader->SizeGenIsaDbgInBytes; kernelDebugData = ptrOffset(kernelDebugData, static_cast(kernelDebugHeader->SizeVisaDbgInBytes) + kernelDebugHeader->SizeGenIsaDbgInBytes); kernelDebugHeader = reinterpret_cast(kernelDebugData); alignedvIsas.push_back(alignedAlloc); } } } ModuleImp::ModuleImp(Device *device, ModuleBuildLog *moduleBuildLog, ModuleType type) : device(device), translationUnit(std::make_unique(device)), moduleBuildLog(moduleBuildLog), type(type) { productFamily = device->getHwInfo().platform.eProductFamily; } ModuleImp::~ModuleImp() { kernelImmDatas.clear(); } NEO::Debug::Segments ModuleImp::getZebinSegments() { std::vector> kernels; for (const auto &kernelImmData : kernelImmDatas) kernels.push_back({kernelImmData->getDescriptor().kernelMetadata.kernelName, kernelImmData->getIsaGraphicsAllocation()}); ArrayRef strings = {reinterpret_cast(translationUnit->programInfo.globalStrings.initData), translationUnit->programInfo.globalStrings.size}; return NEO::Debug::Segments(translationUnit->globalVarBuffer, translationUnit->globalConstBuffer, strings, kernels); } bool ModuleImp::initialize(const ze_module_desc_t *desc, NEO::Device *neoDevice) { bool success = true; std::string buildOptions; std::string internalBuildOptions; if (desc->pNext) { const ze_base_desc_t *expDesc = reinterpret_cast(desc->pNext); if (expDesc->stype == ZE_STRUCTURE_TYPE_MODULE_PROGRAM_EXP_DESC) { if (desc->format != ZE_MODULE_FORMAT_IL_SPIRV) { return false; } const ze_module_program_exp_desc_t *programExpDesc = reinterpret_cast(expDesc); std::vector inputSpirVs; std::vector inputModuleSizes; std::vector specConstants; const ze_module_constants_t *firstSpecConstants = nullptr; this->createBuildOptions(nullptr, buildOptions, internalBuildOptions); for (uint32_t i = 0; i < static_cast(programExpDesc->count); i++) { std::string tmpBuildOptions; std::string tmpInternalBuildOptions; inputSpirVs.push_back(reinterpret_cast(programExpDesc->pInputModules[i])); auto inputSizesInfo = const_cast(programExpDesc->inputSizes); uint32_t inputSize = static_cast(inputSizesInfo[i]); inputModuleSizes.push_back(inputSize); if (programExpDesc->pConstants) { specConstants.push_back(programExpDesc->pConstants[i]); if (i == 0) { firstSpecConstants = specConstants[0]; } } if (programExpDesc->pBuildFlags) { this->createBuildOptions(programExpDesc->pBuildFlags[i], tmpBuildOptions, tmpInternalBuildOptions); buildOptions = buildOptions + tmpBuildOptions; internalBuildOptions = internalBuildOptions + tmpInternalBuildOptions; } } // If the user passed in only 1 SPIRV, then fallback to standard build if (inputSpirVs.size() > 1) { success = this->translationUnit->staticLinkSpirV(inputSpirVs, inputModuleSizes, buildOptions.c_str(), internalBuildOptions.c_str(), specConstants); } else { success = this->translationUnit->buildFromSpirV(reinterpret_cast(programExpDesc->pInputModules[0]), inputModuleSizes[0], buildOptions.c_str(), internalBuildOptions.c_str(), firstSpecConstants); } } else { return false; } } else { std::string buildFlagsInput{desc->pBuildFlags != nullptr ? desc->pBuildFlags : ""}; this->translationUnit->shouldSuppressRebuildWarning = NEO::CompilerOptions::extract(NEO::CompilerOptions::noRecompiledFromIr, buildFlagsInput); this->createBuildOptions(buildFlagsInput.c_str(), buildOptions, internalBuildOptions); if (type == ModuleType::User && NEO::DebugManager.flags.InjectInternalBuildOptions.get() != "unk") { NEO::CompilerOptions::concatenateAppend(internalBuildOptions, NEO::DebugManager.flags.InjectInternalBuildOptions.get()); } if (desc->format == ZE_MODULE_FORMAT_NATIVE) { success = this->translationUnit->createFromNativeBinary( reinterpret_cast(desc->pInputModule), desc->inputSize); } else if (desc->format == ZE_MODULE_FORMAT_IL_SPIRV) { success = this->translationUnit->buildFromSpirV(reinterpret_cast(desc->pInputModule), static_cast(desc->inputSize), buildOptions.c_str(), internalBuildOptions.c_str(), desc->pConstants); } else { return false; } } this->updateBuildLog(neoDevice); verifyDebugCapabilities(); if (false == success) { return false; } kernelImmDatas.reserve(this->translationUnit->programInfo.kernelInfos.size()); for (auto &ki : this->translationUnit->programInfo.kernelInfos) { std::unique_ptr kernelImmData{new KernelImmutableData(this->device)}; kernelImmData->initialize(ki, device, device->getNEODevice()->getDeviceInfo().computeUnitsUsedForScratch, this->translationUnit->globalConstBuffer, this->translationUnit->globalVarBuffer, this->type == ModuleType::Builtin); kernelImmDatas.push_back(std::move(kernelImmData)); } registerElfInDebuggerL0(); this->maxGroupSize = static_cast(this->translationUnit->device->getNEODevice()->getDeviceInfo().maxWorkGroupSize); checkIfPrivateMemoryPerDispatchIsNeeded(); success = this->linkBinary(); success &= populateHostGlobalSymbolsMap(this->translationUnit->programInfo.globalsDeviceToHostNameMap); this->updateBuildLog(neoDevice); if (debugEnabled) { passDebugData(); } auto &hwInfo = neoDevice->getHardwareInfo(); auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily); if (this->isFullyLinked) { for (auto &ki : kernelImmDatas) { if (this->type == ModuleType::User && !ki->isIsaCopiedToAllocation()) { NEO::MemoryTransferHelper::transferMemoryToAllocation(hwHelper.isBlitCopyRequiredForLocalMemory(hwInfo, *ki->getIsaGraphicsAllocation()), *neoDevice, ki->getIsaGraphicsAllocation(), 0, ki->getKernelInfo()->heapInfo.pKernelHeap, static_cast(ki->getKernelInfo()->heapInfo.KernelHeapSize)); ki->setIsaCopiedToAllocation(); } if (device->getL0Debugger()) { NEO::MemoryOperationsHandler *memoryOperationsIface = neoDevice->getRootDeviceEnvironment().memoryOperationsInterface.get(); if (memoryOperationsIface) { auto allocation = ki->getIsaGraphicsAllocation(); memoryOperationsIface->makeResident(neoDevice, ArrayRef(&allocation, 1)); } } } } return success; } void ModuleImp::createDebugZebin() { auto refBin = ArrayRef(reinterpret_cast(translationUnit->unpackedDeviceBinary.get()), translationUnit->unpackedDeviceBinarySize); auto segments = getZebinSegments(); auto debugZebin = NEO::Debug::createDebugZebin(refBin, segments); translationUnit->debugDataSize = debugZebin.size(); translationUnit->debugData.reset(new char[translationUnit->debugDataSize]); memcpy_s(translationUnit->debugData.get(), translationUnit->debugDataSize, debugZebin.data(), debugZebin.size()); } void ModuleImp::passDebugData() { auto refBin = ArrayRef(reinterpret_cast(translationUnit->unpackedDeviceBinary.get()), translationUnit->unpackedDeviceBinarySize); if (NEO::isDeviceBinaryFormat(refBin)) { createDebugZebin(); if (device->getSourceLevelDebugger()) { NEO::DebugData debugData; // pass debug zebin in vIsa field debugData.vIsa = reinterpret_cast(translationUnit->debugData.get()); debugData.vIsaSize = static_cast(translationUnit->debugDataSize); device->getSourceLevelDebugger()->notifyKernelDebugData(&debugData, "debug_zebin", nullptr, 0); } } else { if (device->getSourceLevelDebugger()) { for (auto kernelInfo : this->translationUnit->programInfo.kernelInfos) { NEO::DebugData *notifyDebugData = kernelInfo->kernelDescriptor.external.debugData.get(); NEO::DebugData relocatedDebugData; if (kernelInfo->kernelDescriptor.external.relocatedDebugData.get()) { relocatedDebugData.genIsa = kernelInfo->kernelDescriptor.external.debugData->genIsa; relocatedDebugData.genIsaSize = kernelInfo->kernelDescriptor.external.debugData->genIsaSize; relocatedDebugData.vIsa = reinterpret_cast(kernelInfo->kernelDescriptor.external.relocatedDebugData.get()); relocatedDebugData.vIsaSize = kernelInfo->kernelDescriptor.external.debugData->vIsaSize; notifyDebugData = &relocatedDebugData; } device->getSourceLevelDebugger()->notifyKernelDebugData(notifyDebugData, kernelInfo->kernelDescriptor.kernelMetadata.kernelName, kernelInfo->heapInfo.pKernelHeap, kernelInfo->heapInfo.KernelHeapSize); } } } } const KernelImmutableData *ModuleImp::getKernelImmutableData(const char *functionName) const { for (auto &kernelImmData : kernelImmDatas) { if (kernelImmData->getDescriptor().kernelMetadata.kernelName.compare(functionName) == 0) { return kernelImmData.get(); } } return nullptr; } void ModuleImp::createBuildOptions(const char *pBuildFlags, std::string &apiOptions, std::string &internalBuildOptions) { if (pBuildFlags != nullptr) { std::string buildFlags(pBuildFlags); apiOptions = pBuildFlags; moveBuildOption(apiOptions, apiOptions, NEO::CompilerOptions::optDisable, BuildOptions::optDisable); moveBuildOption(apiOptions, apiOptions, NEO::CompilerOptions::optLevel, BuildOptions::optLevel); moveBuildOption(internalBuildOptions, apiOptions, NEO::CompilerOptions::greaterThan4gbBuffersRequired, BuildOptions::greaterThan4GbRequired); moveBuildOption(internalBuildOptions, apiOptions, NEO::CompilerOptions::allowZebin, NEO::CompilerOptions::allowZebin); createBuildExtraOptions(apiOptions, internalBuildOptions); } if (NEO::ApiSpecificConfig::getBindlessConfiguration()) { NEO::CompilerOptions::concatenateAppend(internalBuildOptions, NEO::CompilerOptions::bindlessMode.str()); } } void ModuleImp::updateBuildLog(NEO::Device *neoDevice) { if (this->moduleBuildLog) { moduleBuildLog->appendString(this->translationUnit->buildLog.c_str(), this->translationUnit->buildLog.size()); } } ze_result_t ModuleImp::createKernel(const ze_kernel_desc_t *desc, ze_kernel_handle_t *phFunction) { ze_result_t res; if (!isFullyLinked) { return ZE_RESULT_ERROR_INVALID_MODULE_UNLINKED; } auto kernel = Kernel::create(productFamily, this, desc, &res); if (res == ZE_RESULT_SUCCESS) { *phFunction = kernel->toHandle(); } return res; } ze_result_t ModuleImp::getNativeBinary(size_t *pSize, uint8_t *pModuleNativeBinary) { auto genBinary = this->translationUnit->packedDeviceBinary.get(); *pSize = this->translationUnit->packedDeviceBinarySize; if (pModuleNativeBinary != nullptr) { memcpy_s(pModuleNativeBinary, this->translationUnit->packedDeviceBinarySize, genBinary, this->translationUnit->packedDeviceBinarySize); } return ZE_RESULT_SUCCESS; } ze_result_t ModuleImp::getDebugInfo(size_t *pDebugDataSize, uint8_t *pDebugData) { if (translationUnit == nullptr) { return ZE_RESULT_ERROR_UNINITIALIZED; } auto refBin = ArrayRef(reinterpret_cast(translationUnit->unpackedDeviceBinary.get()), translationUnit->unpackedDeviceBinarySize); if (nullptr == translationUnit->debugData.get() && NEO::isDeviceBinaryFormat(refBin)) { createDebugZebin(); } if (pDebugData != nullptr) { if (*pDebugDataSize < translationUnit->debugDataSize) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } memcpy_s(pDebugData, *pDebugDataSize, translationUnit->debugData.get(), translationUnit->debugDataSize); } *pDebugDataSize = translationUnit->debugDataSize; return ZE_RESULT_SUCCESS; } void ModuleImp::copyPatchedSegments(const NEO::Linker::PatchableSegments &isaSegmentsForPatching) { if (this->translationUnit->programInfo.linkerInput && this->translationUnit->programInfo.linkerInput->getTraits().requiresPatchingOfInstructionSegments) { for (auto &kernelImmData : this->kernelImmDatas) { if (nullptr == kernelImmData->getIsaGraphicsAllocation()) { continue; } UNRECOVERABLE_IF(kernelImmData->isIsaCopiedToAllocation()); kernelImmData->getIsaGraphicsAllocation()->setTbxWritable(true, std::numeric_limits::max()); kernelImmData->getIsaGraphicsAllocation()->setAubWritable(true, std::numeric_limits::max()); auto segmentId = &kernelImmData - &this->kernelImmDatas[0]; this->device->getDriverHandle()->getMemoryManager()->copyMemoryToAllocation(kernelImmData->getIsaGraphicsAllocation(), 0, isaSegmentsForPatching[segmentId].hostPointer, isaSegmentsForPatching[segmentId].segmentSize); kernelImmData->setIsaCopiedToAllocation(); if (device->getL0Debugger()) { NEO::MemoryOperationsHandler *memoryOperationsIface = device->getNEODevice()->getRootDeviceEnvironment().memoryOperationsInterface.get(); auto allocation = kernelImmData->getIsaGraphicsAllocation(); memoryOperationsIface->makeResident(device->getNEODevice(), ArrayRef(&allocation, 1)); } } } } bool ModuleImp::linkBinary() { using namespace NEO; auto linkerInput = this->translationUnit->programInfo.linkerInput.get(); if (linkerInput == nullptr) { isFullyLinked = true; return true; } Linker linker(*linkerInput); Linker::SegmentInfo globals; Linker::SegmentInfo constants; Linker::SegmentInfo exportedFunctions; Linker::SegmentInfo strings; GraphicsAllocation *globalsForPatching = translationUnit->globalVarBuffer; GraphicsAllocation *constantsForPatching = translationUnit->globalConstBuffer; if (globalsForPatching != nullptr) { globals.gpuAddress = static_cast(globalsForPatching->getGpuAddress()); globals.segmentSize = globalsForPatching->getUnderlyingBufferSize(); } if (constantsForPatching != nullptr) { constants.gpuAddress = static_cast(constantsForPatching->getGpuAddress()); constants.segmentSize = constantsForPatching->getUnderlyingBufferSize(); } if (translationUnit->programInfo.globalStrings.initData != nullptr) { strings.gpuAddress = reinterpret_cast(translationUnit->programInfo.globalStrings.initData); strings.segmentSize = translationUnit->programInfo.globalStrings.size; } if (linkerInput->getExportedFunctionsSegmentId() >= 0) { auto exportedFunctionHeapId = linkerInput->getExportedFunctionsSegmentId(); this->exportedFunctionsSurface = this->kernelImmDatas[exportedFunctionHeapId]->getIsaGraphicsAllocation(); exportedFunctions.gpuAddress = static_cast(exportedFunctionsSurface->getGpuAddressToPatch()); exportedFunctions.segmentSize = exportedFunctionsSurface->getUnderlyingBufferSize(); } Linker::PatchableSegments isaSegmentsForPatching; std::vector> patchedIsaTempStorage; Linker::KernelDescriptorsT kernelDescriptors; if (linkerInput->getTraits().requiresPatchingOfInstructionSegments) { patchedIsaTempStorage.reserve(this->kernelImmDatas.size()); kernelDescriptors.reserve(this->kernelImmDatas.size()); for (const auto &kernelInfo : this->translationUnit->programInfo.kernelInfos) { auto &kernHeapInfo = kernelInfo->heapInfo; const char *originalIsa = reinterpret_cast(kernHeapInfo.pKernelHeap); patchedIsaTempStorage.push_back(std::vector(originalIsa, originalIsa + kernHeapInfo.KernelHeapSize)); isaSegmentsForPatching.push_back(Linker::PatchableSegment{patchedIsaTempStorage.rbegin()->data(), kernHeapInfo.KernelHeapSize}); kernelDescriptors.push_back(&kernelInfo->kernelDescriptor); } } auto linkStatus = linker.link(globals, constants, exportedFunctions, strings, globalsForPatching, constantsForPatching, isaSegmentsForPatching, unresolvedExternalsInfo, this->device->getNEODevice(), translationUnit->programInfo.globalConstants.initData, translationUnit->programInfo.globalVariables.initData, kernelDescriptors, translationUnit->programInfo.externalFunctions); this->symbols = linker.extractRelocatedSymbols(); if (LinkingStatus::LinkedFully != linkStatus) { if (moduleBuildLog) { std::vector kernelNames; for (const auto &kernelInfo : this->translationUnit->programInfo.kernelInfos) { kernelNames.push_back("kernel : " + kernelInfo->kernelDescriptor.kernelMetadata.kernelName); } auto error = constructLinkerErrorMessage(unresolvedExternalsInfo, kernelNames); moduleBuildLog->appendString(error.c_str(), error.size()); } isFullyLinked = false; return LinkingStatus::LinkedPartially == linkStatus; } else if (type != ModuleType::Builtin) { copyPatchedSegments(isaSegmentsForPatching); } DBG_LOG(PrintRelocations, NEO::constructRelocationsDebugMessage(this->symbols)); isFullyLinked = true; for (auto kernelId = 0u; kernelId < kernelImmDatas.size(); kernelId++) { auto &kernImmData = kernelImmDatas[kernelId]; kernImmData->getResidencyContainer().reserve(kernImmData->getResidencyContainer().size() + ((this->exportedFunctionsSurface != nullptr) ? 1 : 0) + this->importedSymbolAllocations.size()); if (nullptr != this->exportedFunctionsSurface) { kernImmData->getResidencyContainer().push_back(this->exportedFunctionsSurface); } kernImmData->getResidencyContainer().insert(kernImmData->getResidencyContainer().end(), this->importedSymbolAllocations.begin(), this->importedSymbolAllocations.end()); } return true; } ze_result_t ModuleImp::getFunctionPointer(const char *pFunctionName, void **pfnFunction) { auto symbolIt = symbols.find(pFunctionName); if ((symbolIt == symbols.end()) || (symbolIt->second.symbol.segment != NEO::SegmentType::Instructions)) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } *pfnFunction = reinterpret_cast(symbolIt->second.gpuAddress); return ZE_RESULT_SUCCESS; } ze_result_t ModuleImp::getGlobalPointer(const char *pGlobalName, size_t *pSize, void **pPtr) { uint64_t address; size_t size; auto hostSymbolIt = hostGlobalSymbolsMap.find(pGlobalName); if (hostSymbolIt != hostGlobalSymbolsMap.end()) { address = hostSymbolIt->second.address; size = hostSymbolIt->second.size; } else { auto deviceSymbolIt = symbols.find(pGlobalName); if (deviceSymbolIt != symbols.end()) { if (deviceSymbolIt->second.symbol.segment == NEO::SegmentType::Instructions) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } } else { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } address = deviceSymbolIt->second.gpuAddress; size = deviceSymbolIt->second.symbol.size; } if (pPtr) { *pPtr = reinterpret_cast(address); } if (pSize) { *pSize = size; } return ZE_RESULT_SUCCESS; } Module *Module::create(Device *device, const ze_module_desc_t *desc, ModuleBuildLog *moduleBuildLog, ModuleType type) { auto module = new ModuleImp(device, moduleBuildLog, type); bool success = module->initialize(desc, device->getNEODevice()); if (success == false) { module->destroy(); return nullptr; } return module; } ze_result_t ModuleImp::getKernelNames(uint32_t *pCount, const char **pNames) { auto &kernelImmDatas = this->getKernelImmutableDataVector(); if (*pCount == 0) { *pCount = static_cast(kernelImmDatas.size()); return ZE_RESULT_SUCCESS; } if (*pCount > static_cast(kernelImmDatas.size())) { *pCount = static_cast(kernelImmDatas.size()); } uint32_t outCount = 0; for (auto &kernelImmData : kernelImmDatas) { *(pNames + outCount) = kernelImmData->getDescriptor().kernelMetadata.kernelName.c_str(); outCount++; if (outCount == *pCount) { break; } } return ZE_RESULT_SUCCESS; } bool ModuleImp::isDebugEnabled() const { return debugEnabled; } void ModuleImp::verifyDebugCapabilities() { bool debugCapabilities = device->getNEODevice()->getDebugger() != nullptr; if (debugCapabilities) { // verify all kernels are debuggable for (auto kernelInfo : this->translationUnit->programInfo.kernelInfos) { bool systemThreadSurfaceAvailable = NEO::isValidOffset(kernelInfo->kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful) || NEO::isValidOffset(kernelInfo->kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindless); debugCapabilities &= systemThreadSurfaceAvailable; } } debugEnabled = debugCapabilities; } void ModuleImp::checkIfPrivateMemoryPerDispatchIsNeeded() { size_t modulePrivateMemorySize = 0; for (auto &kernelImmData : this->kernelImmDatas) { if (0 == kernelImmData->getDescriptor().kernelAttributes.perHwThreadPrivateMemorySize) { continue; } auto kernelPrivateMemorySize = NEO::KernelHelper::getPrivateSurfaceSize(kernelImmData->getDescriptor().kernelAttributes.perHwThreadPrivateMemorySize, this->device->getNEODevice()->getDeviceInfo().computeUnitsUsedForScratch); modulePrivateMemorySize += kernelPrivateMemorySize; } this->allocatePrivateMemoryPerDispatch = false; if (modulePrivateMemorySize > 0U) { auto globalMemorySize = device->getNEODevice()->getRootDevice()->getGlobalMemorySize(static_cast(device->getNEODevice()->getDeviceBitfield().to_ulong())); this->allocatePrivateMemoryPerDispatch = modulePrivateMemorySize > globalMemorySize; } } ze_result_t ModuleImp::getProperties(ze_module_properties_t *pModuleProperties) { pModuleProperties->flags = 0; if (!unresolvedExternalsInfo.empty()) { pModuleProperties->flags |= ZE_MODULE_PROPERTY_FLAG_IMPORTS; } return ZE_RESULT_SUCCESS; } void ModuleImp::moduleDependencyWalker(std::map> inDeps, void *moduleHandle, std::list *outDeps) { std::map>::iterator it; it = inDeps.find(moduleHandle); if (it != inDeps.end()) { std::map dependencies = it->second; inDeps.erase(it); for (auto const &dependency : dependencies) { moduleDependencyWalker(inDeps, dependency.first, outDeps); outDeps->push_back(static_cast(dependency.first)); } } } ze_result_t ModuleImp::performDynamicLink(uint32_t numModules, ze_module_handle_t *phModules, ze_module_build_log_handle_t *phLinkLog) { std::map> dependencies; ModuleBuildLog *moduleLinkLog = nullptr; if (phLinkLog) { moduleLinkLog = ModuleBuildLog::create(); *phLinkLog = moduleLinkLog->toHandle(); } for (auto i = 0u; i < numModules; i++) { auto moduleId = static_cast(Module::fromHandle(phModules[i])); if (moduleId->isFullyLinked) { continue; } std::map moduleDeps; NEO::Linker::PatchableSegments isaSegmentsForPatching; std::vector> patchedIsaTempStorage; uint32_t numPatchedSymbols = 0u; std::vector unresolvedSymbolLogMessages; if (moduleId->translationUnit->programInfo.linkerInput && moduleId->translationUnit->programInfo.linkerInput->getTraits().requiresPatchingOfInstructionSegments) { patchedIsaTempStorage.reserve(moduleId->kernelImmDatas.size()); for (const auto &kernelInfo : moduleId->translationUnit->programInfo.kernelInfos) { auto &kernHeapInfo = kernelInfo->heapInfo; const char *originalIsa = reinterpret_cast(kernHeapInfo.pKernelHeap); patchedIsaTempStorage.push_back(std::vector(originalIsa, originalIsa + kernHeapInfo.KernelHeapSize)); isaSegmentsForPatching.push_back(NEO::Linker::PatchableSegment{patchedIsaTempStorage.rbegin()->data(), kernHeapInfo.KernelHeapSize}); } for (const auto &unresolvedExternal : moduleId->unresolvedExternalsInfo) { if (moduleLinkLog) { std::stringstream logMessage; logMessage << "Module <" << moduleId << ">: " << " Unresolved Symbol <" << unresolvedExternal.unresolvedRelocation.symbolName << ">"; unresolvedSymbolLogMessages.push_back(logMessage.str()); } for (auto i = 0u; i < numModules; i++) { auto moduleHandle = static_cast(Module::fromHandle(phModules[i])); auto symbolIt = moduleHandle->symbols.find(unresolvedExternal.unresolvedRelocation.symbolName); if (symbolIt != moduleHandle->symbols.end()) { auto relocAddress = ptrOffset(isaSegmentsForPatching[unresolvedExternal.instructionsSegmentId].hostPointer, static_cast(unresolvedExternal.unresolvedRelocation.offset)); NEO::Linker::patchAddress(relocAddress, symbolIt->second, unresolvedExternal.unresolvedRelocation); numPatchedSymbols++; moduleId->importedSymbolAllocations.insert(moduleHandle->exportedFunctionsSurface); std::map::iterator it; it = moduleDeps.find(moduleHandle); if ((it == moduleDeps.end()) && (nullptr != moduleHandle->exportedFunctionsSurface)) { moduleDeps.insert(std::pair(moduleHandle, moduleHandle)); } if (moduleLinkLog) { std::stringstream logMessage; logMessage << " Successfully Resolved Thru Dynamic Link to Module <" << moduleHandle << ">"; unresolvedSymbolLogMessages.back().append(logMessage.str()); } break; } } } } if (moduleLinkLog) { for (int i = 0; i < (int)unresolvedSymbolLogMessages.size(); i++) { moduleLinkLog->appendString(unresolvedSymbolLogMessages[i].c_str(), unresolvedSymbolLogMessages[i].size()); } } if (numPatchedSymbols != moduleId->unresolvedExternalsInfo.size()) { return ZE_RESULT_ERROR_MODULE_LINK_FAILURE; } dependencies.insert(std::pair>(moduleId, moduleDeps)); moduleId->copyPatchedSegments(isaSegmentsForPatching); moduleId->isFullyLinked = true; } for (auto i = 0u; i < numModules; i++) { static std::mutex depWalkMutex; std::lock_guard autolock(depWalkMutex); auto moduleId = static_cast(Module::fromHandle(phModules[i])); std::map>::iterator it; std::list dependentModules; // Walk the dependencies for each Module and dependent Module to determine // the dependency exportedFunctionsSurfaces that must be resident for a given Module's kernels // to execute on the device using Dynamic Module Linking. it = dependencies.find(moduleId); if (it != dependencies.end()) { moduleDependencyWalker(dependencies, moduleId, &dependentModules); // Apply the exported functions surface state from the export module(s) to the import module if it exists. // Enables import modules to access the exported function(s) during kernel execution. for (auto &kernImmData : moduleId->kernelImmDatas) { for (auto const &dependency : dependentModules) { kernImmData->getResidencyContainer().reserve(kernImmData->getResidencyContainer().size() + 1 + moduleId->importedSymbolAllocations.size()); kernImmData->getResidencyContainer().push_back(dependency->exportedFunctionsSurface); } kernImmData->getResidencyContainer().insert(kernImmData->getResidencyContainer().end(), moduleId->importedSymbolAllocations.begin(), moduleId->importedSymbolAllocations.end()); } } } { NEO::ExternalFunctionInfosT externalFunctionInfos; NEO::FunctionDependenciesT extFuncDependencies; NEO::KernelDependenciesT kernelDependencies; NEO::KernelDescriptorMapT nameToKernelDescriptor; for (auto i = 0u; i < numModules; i++) { auto moduleId = static_cast(Module::fromHandle(phModules[i])); auto &programInfo = moduleId->translationUnit->programInfo; auto toPtrVec = [](auto &inVec, auto &outPtrVec) { auto pos = outPtrVec.size(); outPtrVec.resize(pos + inVec.size()); for (size_t i = 0; i < inVec.size(); i++) { outPtrVec[pos + i] = &inVec[i]; } }; toPtrVec(programInfo.externalFunctions, externalFunctionInfos); if (programInfo.linkerInput) { toPtrVec(programInfo.linkerInput->getFunctionDependencies(), extFuncDependencies); toPtrVec(programInfo.linkerInput->getKernelDependencies(), kernelDependencies); } for (auto &kernelInfo : programInfo.kernelInfos) { auto &kd = kernelInfo->kernelDescriptor; nameToKernelDescriptor[kd.kernelMetadata.kernelName] = &kd; } } auto error = NEO::resolveBarrierCount(externalFunctionInfos, kernelDependencies, extFuncDependencies, nameToKernelDescriptor); if (error != NEO::RESOLVE_SUCCESS) { return ZE_RESULT_ERROR_MODULE_LINK_FAILURE; } } return ZE_RESULT_SUCCESS; } bool ModuleImp::populateHostGlobalSymbolsMap(std::unordered_map &devToHostNameMapping) { bool retVal = true; hostGlobalSymbolsMap.reserve(devToHostNameMapping.size()); for (auto &[devName, hostName] : devToHostNameMapping) { auto findSymbolRes = symbols.find(devName); if (findSymbolRes != symbols.end()) { auto symbol = findSymbolRes->second; if (isDataSegment(symbol.symbol.segment)) { HostGlobalSymbol hostGlobalSymbol; hostGlobalSymbol.address = symbol.gpuAddress; hostGlobalSymbol.size = symbol.symbol.size; hostGlobalSymbolsMap[hostName] = hostGlobalSymbol; } else { translationUnit->buildLog.append("Error: Symbol with given device name: " + devName + " is not in .data segment.\n"); retVal = false; } } else { translationUnit->buildLog.append("Error: No symbol found with given device name: " + devName + ".\n"); retVal = false; } } return retVal; } void ModuleImp::registerElfInDebuggerL0() { if (device->getL0Debugger() == nullptr) { return; } auto refBin = ArrayRef(reinterpret_cast(translationUnit->unpackedDeviceBinary.get()), translationUnit->unpackedDeviceBinarySize); if (NEO::isDeviceBinaryFormat(refBin)) { size_t debugDataSize = 0; getDebugInfo(&debugDataSize, nullptr); NEO::DebugData debugData; // pass debug zebin in vIsa field debugData.vIsa = reinterpret_cast(translationUnit->debugData.get()); debugData.vIsaSize = static_cast(translationUnit->debugDataSize); StackVec segmentAllocs; for (auto &kernImmData : kernelImmDatas) { device->getL0Debugger()->registerElf(&debugData, kernImmData->getIsaGraphicsAllocation()); segmentAllocs.push_back(kernImmData->getIsaGraphicsAllocation()); } if (translationUnit->globalVarBuffer) { segmentAllocs.push_back(translationUnit->globalVarBuffer); } if (translationUnit->globalConstBuffer) { segmentAllocs.push_back(translationUnit->globalConstBuffer); } device->getL0Debugger()->attachZebinModuleToSegmentAllocations(segmentAllocs, debugModuleHandle); } else { for (auto &kernImmData : kernelImmDatas) { if (kernImmData->getKernelInfo()->kernelDescriptor.external.debugData.get()) { NEO::DebugData *notifyDebugData = kernImmData->getKernelInfo()->kernelDescriptor.external.debugData.get(); NEO::DebugData relocatedDebugData; if (kernImmData->getKernelInfo()->kernelDescriptor.external.relocatedDebugData.get()) { relocatedDebugData.genIsa = kernImmData->getKernelInfo()->kernelDescriptor.external.debugData->genIsa; relocatedDebugData.genIsaSize = kernImmData->getKernelInfo()->kernelDescriptor.external.debugData->genIsaSize; relocatedDebugData.vIsa = reinterpret_cast(kernImmData->getKernelInfo()->kernelDescriptor.external.relocatedDebugData.get()); relocatedDebugData.vIsaSize = kernImmData->getKernelInfo()->kernelDescriptor.external.debugData->vIsaSize; notifyDebugData = &relocatedDebugData; } device->getL0Debugger()->registerElf(notifyDebugData, kernImmData->getIsaGraphicsAllocation()); } } } } bool moveBuildOption(std::string &dstOptionsSet, std::string &srcOptionSet, NEO::ConstStringRef dstOptionName, NEO::ConstStringRef srcOptionName) { const char optDelim = ' '; const char valDelim = '='; auto optInSrcPos = srcOptionSet.find(srcOptionName.begin()); if (std::string::npos == optInSrcPos) { return false; } std::string dstOptionStr(dstOptionName); auto optInSrcEndPos = srcOptionSet.find(optDelim, optInSrcPos); if (srcOptionName == BuildOptions::optLevel) { auto valInSrcPos = srcOptionSet.find(valDelim, optInSrcPos); if (std::string::npos == valInSrcPos) { return false; } dstOptionStr += srcOptionSet.substr(valInSrcPos + 1, optInSrcEndPos); } srcOptionSet.erase(optInSrcPos, (optInSrcEndPos - optInSrcPos)); NEO::CompilerOptions::concatenateAppend(dstOptionsSet, dstOptionStr); return true; } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/module/module_imp.h000066400000000000000000000153471422164147700255520ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/compiler_interface/linker.h" #include "shared/source/program/program_info.h" #include "shared/source/utilities/const_stringref.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/module/module.h" #include "igfxfmid.h" #include #include #include namespace NEO { namespace Debug { struct Segments; } } // namespace NEO namespace L0 { namespace BuildOptions { extern NEO::ConstStringRef optDisable; extern NEO::ConstStringRef optLevel; extern NEO::ConstStringRef greaterThan4GbRequired; extern NEO::ConstStringRef hasBufferOffsetArg; extern NEO::ConstStringRef debugKernelEnable; } // namespace BuildOptions struct ModuleTranslationUnit { ModuleTranslationUnit(L0::Device *device); virtual ~ModuleTranslationUnit(); MOCKABLE_VIRTUAL bool buildFromSpirV(const char *input, uint32_t inputSize, const char *buildOptions, const char *internalBuildOptions, const ze_module_constants_t *pConstants); MOCKABLE_VIRTUAL bool staticLinkSpirV(std::vector inputSpirVs, std::vector inputModuleSizes, const char *buildOptions, const char *internalBuildOptions, std::vector specConstants); MOCKABLE_VIRTUAL bool createFromNativeBinary(const char *input, size_t inputSize); MOCKABLE_VIRTUAL bool processUnpackedBinary(); std::vector generateElfFromSpirV(std::vector inputSpirVs, std::vector inputModuleSizes); bool processSpecConstantInfo(NEO::CompilerInterface *compilerInterface, const ze_module_constants_t *pConstants, const char *input, uint32_t inputSize); std::string generateCompilerOptions(const char *buildOptions, const char *internalBuildOptions); MOCKABLE_VIRTUAL bool compileGenBinary(NEO::TranslationInput inputArgs, bool staticLink); void updateBuildLog(const std::string &newLogEntry); void processDebugData(); L0::Device *device = nullptr; NEO::GraphicsAllocation *globalConstBuffer = nullptr; NEO::GraphicsAllocation *globalVarBuffer = nullptr; NEO::ProgramInfo programInfo; std::string options; bool shouldSuppressRebuildWarning{false}; std::string buildLog; std::unique_ptr irBinary; size_t irBinarySize = 0U; std::unique_ptr unpackedDeviceBinary; size_t unpackedDeviceBinarySize = 0U; std::unique_ptr packedDeviceBinary; size_t packedDeviceBinarySize = 0U; std::unique_ptr debugData; size_t debugDataSize = 0U; std::vector alignedvIsas; NEO::specConstValuesMap specConstantsValues; }; struct ModuleImp : public Module { ModuleImp() = delete; ModuleImp(Device *device, ModuleBuildLog *moduleBuildLog, ModuleType type); ~ModuleImp() override; ze_result_t destroy() override { auto tempHandle = debugModuleHandle; auto tempDevice = device; delete this; if (tempDevice->getL0Debugger() && tempHandle != 0) { tempDevice->getL0Debugger()->removeZebinModule(tempHandle); } return ZE_RESULT_SUCCESS; } ze_result_t createKernel(const ze_kernel_desc_t *desc, ze_kernel_handle_t *phFunction) override; ze_result_t getNativeBinary(size_t *pSize, uint8_t *pModuleNativeBinary) override; ze_result_t getFunctionPointer(const char *pFunctionName, void **pfnFunction) override; ze_result_t getGlobalPointer(const char *pGlobalName, size_t *pSize, void **pPtr) override; ze_result_t getKernelNames(uint32_t *pCount, const char **pNames) override; ze_result_t getProperties(ze_module_properties_t *pModuleProperties) override; ze_result_t performDynamicLink(uint32_t numModules, ze_module_handle_t *phModules, ze_module_build_log_handle_t *phLinkLog) override; ze_result_t getDebugInfo(size_t *pDebugDataSize, uint8_t *pDebugData) override; const KernelImmutableData *getKernelImmutableData(const char *functionName) const override; const std::vector> &getKernelImmutableDataVector() const override { return kernelImmDatas; } uint32_t getMaxGroupSize() const override { return maxGroupSize; } void createBuildOptions(const char *pBuildFlags, std::string &buildOptions, std::string &internalBuildOptions); void createBuildExtraOptions(std::string &buildOptions, std::string &internalBuildOptions); void updateBuildLog(NEO::Device *neoDevice); Device *getDevice() const override { return device; } bool linkBinary(); bool initialize(const ze_module_desc_t *desc, NEO::Device *neoDevice); bool isDebugEnabled() const override; bool shouldAllocatePrivateMemoryPerDispatch() const override { return allocatePrivateMemoryPerDispatch; } ModuleTranslationUnit *getTranslationUnit() { return this->translationUnit.get(); } void moduleDependencyWalker(std::map> inDeps, void *moduleHandle, std::list *outDeps); protected: void copyPatchedSegments(const NEO::Linker::PatchableSegments &isaSegmentsForPatching); void verifyDebugCapabilities(); void checkIfPrivateMemoryPerDispatchIsNeeded() override; NEO::Debug::Segments getZebinSegments(); void passDebugData(); void createDebugZebin(); void registerElfInDebuggerL0(); bool populateHostGlobalSymbolsMap(std::unordered_map &devToHostNameMapping); Device *device = nullptr; PRODUCT_FAMILY productFamily{}; std::unique_ptr translationUnit; ModuleBuildLog *moduleBuildLog = nullptr; NEO::GraphicsAllocation *exportedFunctionsSurface = nullptr; uint32_t maxGroupSize = 0U; std::vector> kernelImmDatas; NEO::Linker::RelocatedSymbolsMap symbols; struct HostGlobalSymbol { uintptr_t address = std::numeric_limits::max(); size_t size = 0U; }; std::unordered_map hostGlobalSymbolsMap; bool debugEnabled = false; bool isFullyLinked = false; bool allocatePrivateMemoryPerDispatch = true; ModuleType type; NEO::Linker::UnresolvedExternals unresolvedExternalsInfo{}; std::set importedSymbolAllocations{}; uint32_t debugModuleHandle = 0; }; bool moveBuildOption(std::string &dstOptionsSet, std::string &srcOptionSet, NEO::ConstStringRef dstOptionName, NEO::ConstStringRef srcOptionName); } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/printf_handler/000077500000000000000000000000001422164147700247475ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/printf_handler/printf_handler.cpp000066400000000000000000000036311422164147700304550ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/printf_handler/printf_handler.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/program/print_formatter.h" #include "level_zero/core/source/device/device_imp.h" namespace L0 { NEO::GraphicsAllocation *PrintfHandler::createPrintfBuffer(Device *device) { NEO::AllocationProperties properties( device->getRootDeviceIndex(), PrintfHandler::printfBufferSize, NEO::AllocationType::PRINTF_SURFACE, device->getNEODevice()->getDeviceBitfield()); properties.alignment = MemoryConstants::pageSize64k; auto allocation = device->getNEODevice()->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties); *reinterpret_cast(allocation->getUnderlyingBuffer()) = PrintfHandler::printfSurfaceInitialDataSize; return allocation; } void PrintfHandler::printOutput(const KernelImmutableData *kernelData, NEO::GraphicsAllocation *printfBuffer, Device *device) { bool using32BitGpuPointers = kernelData->getDescriptor().kernelAttributes.gpuPointerSize == 4u; auto usesStringMap = kernelData->getDescriptor().kernelAttributes.usesStringMap(); NEO::PrintFormatter printfFormatter{ static_cast(printfBuffer->getUnderlyingBuffer()), static_cast(printfBuffer->getUnderlyingBufferSize()), using32BitGpuPointers, usesStringMap ? &kernelData->getDescriptor().kernelMetadata.printfStringsMap : nullptr}; printfFormatter.printKernelOutput(); *reinterpret_cast(printfBuffer->getUnderlyingBuffer()) = PrintfHandler::printfSurfaceInitialDataSize; } size_t PrintfHandler::getPrintBufferSize() { return PrintfHandler::printfBufferSize; } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/printf_handler/printf_handler.h000066400000000000000000000016701422164147700301230ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/constants.h" #include "level_zero/core/source/kernel/kernel.h" #include namespace NEO { class Kernel; class GraphicsAllocation; } // namespace NEO namespace L0 { struct Device; struct PrintfHandler { static NEO::GraphicsAllocation *createPrintfBuffer(Device *device); static void printOutput(const KernelImmutableData *kernelData, NEO::GraphicsAllocation *printfBuffer, Device *device); static size_t getPrintBufferSize(); protected: PrintfHandler(const PrintfHandler &) = delete; PrintfHandler &operator=(PrintfHandler const &) = delete; PrintfHandler() = delete; constexpr static size_t printfBufferSize = 4 * MemoryConstants::megaByte; constexpr static uint32_t printfSurfaceInitialDataSize = sizeof(uint32_t); }; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/sampler/000077500000000000000000000000001422164147700234135ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/sampler/sampler.h000066400000000000000000000026571422164147700252410ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/device/device.h" #include struct _ze_sampler_handle_t {}; namespace L0 { struct Sampler : _ze_sampler_handle_t { template struct Allocator { static Sampler *allocate() { return new Type(); } }; virtual ~Sampler() = default; virtual ze_result_t destroy() = 0; static Sampler *create(uint32_t productFamily, Device *device, const ze_sampler_desc_t *desc); virtual void copySamplerStateToDSH(void *dynamicStateHeap, const uint32_t dynamicStateHeapSize, const uint32_t heapOffset) = 0; static Sampler *fromHandle(ze_sampler_handle_t handle) { return static_cast(handle); } inline ze_sampler_handle_t toHandle() { return this; } const ze_sampler_desc_t getSamplerDesc() const { return samplerDesc; } protected: ze_sampler_desc_t samplerDesc = {}; }; using SamplerAllocatorFn = Sampler *(*)(); extern SamplerAllocatorFn samplerFactory[]; template struct SamplerPopulateFactory { SamplerPopulateFactory() { samplerFactory[productFamily] = Sampler::Allocator::allocate; } }; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/sampler/sampler_hw.h000066400000000000000000000017761422164147700257400ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/sampler/sampler_imp.h" namespace L0 { template struct SamplerCoreFamily : public SamplerImp { public: using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using SAMPLER_STATE = typename GfxFamily::SAMPLER_STATE; using BaseClass = SamplerImp; ze_result_t initialize(Device *device, const ze_sampler_desc_t *desc) override; virtual void appendSamplerStateParams(SAMPLER_STATE *state){}; void copySamplerStateToDSH(void *dynamicStateHeap, const uint32_t dynamicStateHeapSize, const uint32_t offset) override; static constexpr float getGenSamplerMaxLod() { return 14.0f; } protected: SAMPLER_STATE samplerState; float lodMin = 1.0f; float lodMax = 1.0f; }; template struct SamplerProductFamily; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/sampler/sampler_hw.inl000066400000000000000000000121061422164147700262600ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/string.h" #include "shared/source/utilities/numeric.h" #include "level_zero/core/source/sampler/sampler_hw.h" namespace L0 { template ze_result_t SamplerCoreFamily::initialize(Device *device, const ze_sampler_desc_t *desc) { using SAMPLER_STATE = typename GfxFamily::SAMPLER_STATE; BaseClass::initialize(device, desc); samplerState.setNonNormalizedCoordinateEnable(!desc->isNormalized); samplerState.setLodPreclampMode(SAMPLER_STATE::LOD_PRECLAMP_MODE::LOD_PRECLAMP_MODE_OGL); auto addressControlModeX = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP_BORDER; auto addressControlModeY = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP_BORDER; auto addressControlModeZ = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP_BORDER; switch (desc->addressMode) { case ZE_SAMPLER_ADDRESS_MODE_NONE: case ZE_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER: break; case ZE_SAMPLER_ADDRESS_MODE_CLAMP: addressControlModeX = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP; addressControlModeY = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP; addressControlModeZ = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP; break; case ZE_SAMPLER_ADDRESS_MODE_MIRROR: addressControlModeX = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_MIRROR; addressControlModeY = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_MIRROR; addressControlModeZ = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_MIRROR; break; case ZE_SAMPLER_ADDRESS_MODE_REPEAT: addressControlModeX = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_WRAP; addressControlModeY = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_WRAP; addressControlModeZ = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_WRAP; break; default: return ZE_RESULT_ERROR_INVALID_ARGUMENT; } auto minMode = SAMPLER_STATE::MIN_MODE_FILTER_NEAREST; auto magMode = SAMPLER_STATE::MAG_MODE_FILTER_NEAREST; auto mipMode = SAMPLER_STATE::MIP_MODE_FILTER_NEAREST; auto rAddrMinFilterRounding = false; auto rAddrMagFilterRounding = false; auto vAddrMinFilterRounding = false; auto vAddrMagFilterRounding = false; auto uAddrMinFilterRounding = false; auto uAddrMagFilterRounding = false; switch (desc->filterMode) { case ZE_SAMPLER_FILTER_MODE_NEAREST: minMode = SAMPLER_STATE::MIN_MODE_FILTER_NEAREST; magMode = SAMPLER_STATE::MAG_MODE_FILTER_NEAREST; mipMode = SAMPLER_STATE::MIP_MODE_FILTER_NEAREST; break; case ZE_SAMPLER_FILTER_MODE_LINEAR: minMode = SAMPLER_STATE::MIN_MODE_FILTER_LINEAR; magMode = SAMPLER_STATE::MAG_MODE_FILTER_LINEAR; mipMode = SAMPLER_STATE::MIP_MODE_FILTER_NEAREST; rAddrMinFilterRounding = true; rAddrMagFilterRounding = true; vAddrMinFilterRounding = true; vAddrMagFilterRounding = true; uAddrMinFilterRounding = true; uAddrMagFilterRounding = true; break; default: return ZE_RESULT_ERROR_INVALID_ARGUMENT; } samplerState.setMinModeFilter(minMode); samplerState.setMagModeFilter(magMode); samplerState.setMipModeFilter(mipMode); samplerState.setRAddressMinFilterRoundingEnable(rAddrMinFilterRounding); samplerState.setRAddressMagFilterRoundingEnable(rAddrMagFilterRounding); samplerState.setVAddressMinFilterRoundingEnable(vAddrMinFilterRounding); samplerState.setVAddressMagFilterRoundingEnable(vAddrMagFilterRounding); samplerState.setUAddressMinFilterRoundingEnable(uAddrMinFilterRounding); samplerState.setUAddressMagFilterRoundingEnable(uAddrMagFilterRounding); samplerState.setTcxAddressControlMode(addressControlModeX); samplerState.setTcyAddressControlMode(addressControlModeY); samplerState.setTczAddressControlMode(addressControlModeZ); NEO::FixedU4D8 minLodValue = NEO::FixedU4D8(std::min(getGenSamplerMaxLod(), this->lodMin)); NEO::FixedU4D8 maxLodValue = NEO::FixedU4D8(std::min(getGenSamplerMaxLod(), this->lodMax)); samplerState.setMinLod(minLodValue.getRawAccess()); samplerState.setMaxLod(maxLodValue.getRawAccess()); appendSamplerStateParams(&samplerState); return ZE_RESULT_SUCCESS; } template void SamplerCoreFamily::copySamplerStateToDSH(void *dynamicStateHeap, const uint32_t dynamicStateHeapSize, const uint32_t samplerOffset) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using SAMPLER_STATE = typename GfxFamily::SAMPLER_STATE; using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE; auto destSamplerState = ptrOffset(dynamicStateHeap, samplerOffset); auto freeSpace = dynamicStateHeapSize - (samplerOffset + sizeof(SAMPLER_STATE)); memcpy_s(destSamplerState, freeSpace, &samplerState, sizeof(SAMPLER_STATE)); } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/sampler/sampler_imp.cpp000066400000000000000000000020361422164147700264300ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/sampler/sampler_imp.h" #include "level_zero/core/source/device/device.h" namespace L0 { SamplerAllocatorFn samplerFactory[IGFX_MAX_PRODUCT] = {}; ze_result_t SamplerImp::destroy() { delete this; return ZE_RESULT_SUCCESS; } ze_result_t SamplerImp::initialize(Device *device, const ze_sampler_desc_t *desc) { samplerDesc = *desc; return ZE_RESULT_SUCCESS; } Sampler *Sampler::create(uint32_t productFamily, Device *device, const ze_sampler_desc_t *desc) { SamplerAllocatorFn allocator = nullptr; if (productFamily < IGFX_MAX_PRODUCT) { allocator = samplerFactory[productFamily]; } SamplerImp *sampler = nullptr; if (allocator) { sampler = static_cast((*allocator)()); if (sampler->initialize(device, desc) != ZE_RESULT_SUCCESS) { sampler->destroy(); sampler = nullptr; } } return sampler; } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/sampler/sampler_imp.h000066400000000000000000000005511422164147700260750ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/sampler/sampler.h" namespace L0 { class SamplerImp : public Sampler { public: ze_result_t destroy() override; virtual ze_result_t initialize(Device *device, const ze_sampler_desc_t *desc); }; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/windows/000077500000000000000000000000001422164147700234425ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/windows/CMakeLists.txt000066400000000000000000000005451422164147700262060ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) set(L0_SOURCES_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/registry_path.cpp ${CMAKE_CURRENT_SOURCE_DIR}/driver_teardown.cpp ) set_property(GLOBAL PROPERTY L0_SOURCES_WINDOWS ${L0_SOURCES_WINDOWS}) endif() compute-runtime-22.14.22890/level_zero/core/source/windows/driver_teardown.cpp000066400000000000000000000007121422164147700273440ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/driver/driver_handle_imp.h" #include using namespace L0; BOOL APIENTRY DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) { if (fdwReason == DLL_PROCESS_DETACH) { if (GlobalDriver != nullptr) { delete GlobalDriver; GlobalDriver = nullptr; } } return TRUE; } compute-runtime-22.14.22890/level_zero/core/source/windows/registry_path.cpp000066400000000000000000000003521422164147700270320ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/compiler_interface/l0_reg_path.h" namespace L0 { const char *registryPath = "Software\\Intel\\IGFX\\L0\\"; } compute-runtime-22.14.22890/level_zero/core/source/xe_hp_core/000077500000000000000000000000001422164147700240635ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/xe_hp_core/CMakeLists.txt000066400000000000000000000014251422164147700266250ustar00rootroot00000000000000# # Copyright (C) 2021-2022 Intel Corporation # # SPDX-License-Identifier: MIT # if(SUPPORT_XE_HP_CORE) set(HW_SOURCES_XE_HP_CORE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_xe_hp_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_xe_hp_core.h ${CMAKE_CURRENT_SOURCE_DIR}/debugger_xe_hp_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_xe_hp_core.inl ${CMAKE_CURRENT_SOURCE_DIR}/sampler_xe_hp_core.inl ${CMAKE_CURRENT_SOURCE_DIR}/enable_family_full_l0_xe_hp_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/l0_hw_helper_xe_hp_core.cpp ) add_subdirectories() target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${HW_SOURCES_XE_HP_CORE}) set_property(GLOBAL PROPERTY L0_HW_SOURCES_XE_HP_CORE ${HW_SOURCES_XE_HP_CORE}) endif() compute-runtime-22.14.22890/level_zero/core/source/xe_hp_core/cmdlist_xe_hp_core.cpp000066400000000000000000000011201422164147700304130ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/cmdlist/cmdlist_hw.h" #include "level_zero/core/source/cmdlist/cmdlist_hw.inl" #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h" #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl" #include "level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl" #include "cmdlist_extended.inl" namespace L0 { template struct CommandListCoreFamily; template struct CommandListCoreFamilyImmediate; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/xe_hp_core/cmdlist_xe_hp_core.h000066400000000000000000000012171422164147700300670ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/cmdlist/cmdlist_hw.h" #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h" namespace L0 { template struct CommandListProductFamily : public CommandListCoreFamily { using CommandListCoreFamily::CommandListCoreFamily; }; template struct CommandListImmediateProductFamily : public CommandListCoreFamilyImmediate { using CommandListCoreFamilyImmediate::CommandListCoreFamilyImmediate; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/xe_hp_core/debugger_xe_hp_core.cpp000066400000000000000000000006221422164147700305460ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/debugger/debugger_l0.inl" #include "level_zero/core/source/debugger/debugger_l0_tgllp_and_later.inl" namespace L0 { using Family = NEO::XeHpFamily; DebuggerL0PopulateFactory debuggerXE_HP_CORE; template class DebuggerL0Hw; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/xe_hp_core/enable_family_full_l0_xe_hp_core.cpp000066400000000000000000000007351422164147700331730ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/xe_hp_core/hw_cmds.h" #include "level_zero/core/source/helpers/l0_populate_factory.h" #include "level_zero/core/source/hw_helpers/l0_hw_helper.h" namespace NEO { using Family = XeHpFamily; struct EnableL0XeHpCore { EnableL0XeHpCore() { L0::populateFactoryTable>(); } }; static EnableL0XeHpCore enable; } // namespace NEO compute-runtime-22.14.22890/level_zero/core/source/xe_hp_core/image_xe_hp_core.inl000066400000000000000000000003501422164147700300420ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/xe_hp_core/hw_cmds.h" #include "shared/source/xe_hp_core/hw_info.h" #include "level_zero/core/source/image/image_hw.inl" compute-runtime-22.14.22890/level_zero/core/source/xe_hp_core/l0_hw_helper_xe_hp_core.cpp000066400000000000000000000015121422164147700313310ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/helpers/l0_populate_factory.h" #include "level_zero/core/source/hw_helpers/l0_hw_helper_base.inl" #include "level_zero/core/source/hw_helpers/l0_hw_helper_skl_and_later.inl" #include "hw_cmds.h" namespace L0 { using Family = NEO::XeHpFamily; static auto gfxCore = IGFX_XE_HP_CORE; template <> void populateFactoryTable>() { extern L0HwHelper *l0HwHelperFactory[IGFX_MAX_CORE]; l0HwHelperFactory[gfxCore] = &L0HwHelperHw::get(); } template <> bool L0HwHelperHw::isResumeWARequired() { return true; } // clang-format off #include "level_zero/core/source/hw_helpers/l0_hw_helper_tgllp_plus.inl" // clang-format on template class L0HwHelperHw; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/xe_hp_core/sampler_xe_hp_core.inl000066400000000000000000000003541422164147700304270ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/xe_hp_core/hw_cmds.h" #include "shared/source/xe_hp_core/hw_info.h" #include "level_zero/core/source/sampler/sampler_hw.inl" compute-runtime-22.14.22890/level_zero/core/source/xe_hp_core/xehp/000077500000000000000000000000001422164147700250275ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/xe_hp_core/xehp/CMakeLists.txt000066400000000000000000000007531422164147700275740ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(SUPPORT_XE_HP_SDV) set(HW_SOURCES_XE_HP_CORE ${HW_SOURCES_XE_HP_CORE} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_xehp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue_xehp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_xehp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_xehp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler_xehp.cpp PARENT_SCOPE ) endif() compute-runtime-22.14.22890/level_zero/core/source/xe_hp_core/xehp/cmdlist_xehp.cpp000066400000000000000000000006701422164147700302210ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/xe_hp_core/cmdlist_xe_hp_core.h" namespace L0 { static CommandListPopulateFactory> populateXEHP; static CommandListImmediatePopulateFactory> populateXEHPImmediate; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/xe_hp_core/xehp/cmdqueue_xehp.cpp000066400000000000000000000010261422164147700303660ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/xe_hp_core/hw_cmds.h" #include "shared/source/xe_hp_core/hw_info.h" #include "level_zero/core/source/cmdqueue/cmdqueue_hw.inl" #include "level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_and_later.inl" #include "cmdqueue_extended.inl" namespace L0 { template struct CommandQueueHw; static CommandQueuePopulateFactory> populateXEHP; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/xe_hp_core/xehp/image_xehp.cpp000066400000000000000000000011221422164147700276350ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/xe_hp_core/image_xe_hp_core.inl" namespace L0 { template <> struct ImageProductFamily : public ImageCoreFamily { using ImageCoreFamily::ImageCoreFamily; ze_result_t initialize(Device *device, const ze_image_desc_t *desc) override { return ImageCoreFamily::initialize(device, desc); }; }; static ImagePopulateFactory> populateXEHP; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/xe_hp_core/xehp/kernel_xehp.cpp000066400000000000000000000004051422164147700300360ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/kernel/kernel_hw.h" namespace L0 { static KernelPopulateFactory> populateXEHP; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/xe_hp_core/xehp/sampler_xehp.cpp000066400000000000000000000006721422164147700302270ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/xe_hp_core/sampler_xe_hp_core.inl" namespace L0 { template <> struct SamplerProductFamily : public SamplerCoreFamily { using SamplerCoreFamily::SamplerCoreFamily; }; static SamplerPopulateFactory> populateXEHP; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/xe_hpc_core/000077500000000000000000000000001422164147700242265ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/xe_hpc_core/CMakeLists.txt000066400000000000000000000011331422164147700267640ustar00rootroot00000000000000# # Copyright (C) 2021-2022 Intel Corporation # # SPDX-License-Identifier: MIT # if(SUPPORT_XE_HPC_CORE) set(HW_SOURCES_XE_HPC_CORE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_xe_hpc_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_xe_hpc_core.h ${CMAKE_CURRENT_SOURCE_DIR}/debugger_xe_hpc_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enable_family_full_l0_xe_hpc_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/l0_hw_helper_xe_hpc_core.cpp ) add_subdirectories() target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${HW_SOURCES_XE_HPC_CORE}) endif() compute-runtime-22.14.22890/level_zero/core/source/xe_hpc_core/cmdlist_xe_hpc_core.cpp000066400000000000000000000062331422164147700307330ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/cmdlist/cmdlist_hw.h" #include "level_zero/core/source/cmdlist/cmdlist_hw.inl" #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h" #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl" #include "level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl" #include "cmdlist_extended.inl" namespace L0 { template <> NEO::PipeControlArgs CommandListCoreFamily::createBarrierFlags() { NEO::PipeControlArgs args; args.hdcPipelineFlush = true; args.unTypedDataPortCacheFlush = true; return args; } template <> ze_result_t CommandListCoreFamily::appendMemoryPrefetch(const void *ptr, size_t size) { using MI_BATCH_BUFFER_END = GfxFamily::MI_BATCH_BUFFER_END; auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr); if (!allocData) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } auto allowPrefetchingKmdMigratedSharedAllocation = false; if (NEO::DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.get() != -1) { allowPrefetchingKmdMigratedSharedAllocation = !!NEO::DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.get(); } if (allowPrefetchingKmdMigratedSharedAllocation) { auto memoryManager = device->getDriverHandle()->getMemoryManager(); if (memoryManager->isKmdMigrationAvailable(device->getRootDeviceIndex()) && (allocData->memoryType == InternalMemoryType::SHARED_UNIFIED_MEMORY)) { auto alloc = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); auto subDeviceId = static_cast(device)->getPhysicalSubDeviceId(); memoryManager->setMemPrefetch(alloc, subDeviceId, device->getRootDeviceIndex()); } } if (NEO::DebugManager.flags.AddStatePrefetchCmdToMemoryPrefetchAPI.get() != 1) { return ZE_RESULT_SUCCESS; } auto gpuAlloc = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); auto &hwInfo = device->getHwInfo(); commandContainer.addToResidencyContainer(gpuAlloc); size_t offset = ptrDiff(ptr, gpuAlloc->getGpuAddress()); NEO::LinearStream &cmdStream = *commandContainer.getCommandStream(); NEO::EncodeMemoryPrefetch::programMemoryPrefetch(cmdStream, *gpuAlloc, static_cast(size), offset, hwInfo); return ZE_RESULT_SUCCESS; } template <> void CommandListCoreFamily::applyMemoryRangesBarrier(uint32_t numRanges, const size_t *pRangeSizes, const void **pRanges) { NEO::PipeControlArgs args; args.hdcPipelineFlush = true; args.unTypedDataPortCacheFlush = true; NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); } template struct CommandListCoreFamily; template struct CommandListCoreFamilyImmediate; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/xe_hpc_core/cmdlist_xe_hpc_core.h000066400000000000000000000012211422164147700303700ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/cmdlist/cmdlist_hw.h" #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h" namespace L0 { template struct CommandListProductFamily : public CommandListCoreFamily { using CommandListCoreFamily::CommandListCoreFamily; }; template struct CommandListImmediateProductFamily : public CommandListCoreFamilyImmediate { using CommandListCoreFamilyImmediate::CommandListCoreFamilyImmediate; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/xe_hpc_core/debugger_xe_hpc_core.cpp000066400000000000000000000006351422164147700310600ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/debugger/debugger_l0.inl" #include "level_zero/core/source/debugger/debugger_l0_tgllp_and_later.inl" namespace L0 { using Family = NEO::XE_HPC_COREFamily; template class DebuggerL0Hw; static DebuggerL0PopulateFactory debuggerXeHpcCore; } // namespace L0compute-runtime-22.14.22890/level_zero/core/source/xe_hpc_core/enable_family_full_l0_xe_hpc_core.cpp000066400000000000000000000007501422164147700334760ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/xe_hpc_core/hw_cmds.h" #include "level_zero/core/source/helpers/l0_populate_factory.h" #include "level_zero/core/source/hw_helpers/l0_hw_helper.h" namespace NEO { using Family = XE_HPC_COREFamily; struct EnableL0XeHpcCore { EnableL0XeHpcCore() { L0::populateFactoryTable>(); } }; static EnableL0XeHpcCore enable; } // namespace NEO compute-runtime-22.14.22890/level_zero/core/source/xe_hpc_core/image_xe_hpc_core.inl000066400000000000000000000003521422164147700303520ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/xe_hpc_core/hw_cmds.h" #include "shared/source/xe_hpc_core/hw_info.h" #include "level_zero/core/source/image/image_hw.inl" compute-runtime-22.14.22890/level_zero/core/source/xe_hpc_core/l0_hw_helper_xe_hpc_core.cpp000066400000000000000000000015241422164147700316420ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/xe_hpc_core/hw_cmds_pvc.h" #include "level_zero/core/source/helpers/l0_populate_factory.h" #include "level_zero/core/source/hw_helpers/l0_hw_helper_base.inl" #include "level_zero/core/source/hw_helpers/l0_hw_helper_pvc_and_later.inl" #include "hw_cmds.h" namespace L0 { using Family = NEO::XE_HPC_COREFamily; static auto gfxCore = IGFX_XE_HPC_CORE; template <> void populateFactoryTable>() { extern L0HwHelper *l0HwHelperFactory[IGFX_MAX_CORE]; l0HwHelperFactory[gfxCore] = &L0HwHelperHw::get(); } template <> bool L0HwHelperHw::isIpSamplingSupported(const NEO::HardwareInfo &hwInfo) const { return NEO::PVC::isXt(hwInfo); } template class L0HwHelperHw; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/xe_hpc_core/pvc/000077500000000000000000000000001422164147700250165ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/xe_hpc_core/pvc/CMakeLists.txt000066400000000000000000000006601422164147700275600ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(SUPPORT_PVC) set(HW_SOURCES_XE_HPC_CORE ${HW_SOURCES_XE_HPC_CORE} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/image_pvc.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_pvc.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue_pvc.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_pvc.cpp PARENT_SCOPE ) endif() compute-runtime-22.14.22890/level_zero/core/source/xe_hpc_core/pvc/cmdlist_pvc.cpp000066400000000000000000000006321422164147700300320ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/xe_hpc_core/cmdlist_xe_hpc_core.h" namespace L0 { static CommandListPopulateFactory> populatePVC; static CommandListImmediatePopulateFactory> populatePVCImmediate; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/xe_hpc_core/pvc/cmdqueue_pvc.cpp000066400000000000000000000010231422164147700301760ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/xe_hpc_core/hw_cmds.h" #include "shared/source/xe_hpc_core/hw_info.h" #include "level_zero/core/source/cmdqueue/cmdqueue_hw.inl" #include "level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_and_later.inl" #include "cmdqueue_extended.inl" namespace L0 { template struct CommandQueueHw; static CommandQueuePopulateFactory> populatePVC; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/xe_hpc_core/pvc/image_pvc.cpp000066400000000000000000000011031422164147700274470ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/xe_hpc_core/image_xe_hpc_core.inl" namespace L0 { template <> struct ImageProductFamily : public ImageCoreFamily { using ImageCoreFamily::ImageCoreFamily; ze_result_t initialize(Device *device, const ze_image_desc_t *desc) override { return ImageCoreFamily::initialize(device, desc); }; }; static ImagePopulateFactory> populatePVC; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/xe_hpc_core/pvc/kernel_pvc.cpp000066400000000000000000000003771422164147700276610ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/kernel/kernel_hw.h" namespace L0 { static KernelPopulateFactory> populatePVC; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/xe_hpg_core/000077500000000000000000000000001422164147700242325ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/xe_hpg_core/CMakeLists.txt000066400000000000000000000014411422164147700267720ustar00rootroot00000000000000# # Copyright (C) 2021-2022 Intel Corporation # # SPDX-License-Identifier: MIT # if(SUPPORT_XE_HPG_CORE) set(HW_SOURCES_XE_HPG_CORE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_xe_hpg_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_xe_hpg_core.h ${CMAKE_CURRENT_SOURCE_DIR}/debugger_xe_hpg_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_xe_hpg_core.inl ${CMAKE_CURRENT_SOURCE_DIR}/sampler_xe_hpg_core.inl ${CMAKE_CURRENT_SOURCE_DIR}/enable_family_full_l0_xe_hpg_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/l0_hw_helper_xe_hpg_core.cpp ) add_subdirectories() target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${HW_SOURCES_XE_HPG_CORE}) set_property(GLOBAL PROPERTY L0_HW_SOURCES_XE_HPG_CORE ${HW_SOURCES_XE_HPG_CORE}) endif() compute-runtime-22.14.22890/level_zero/core/source/xe_hpg_core/cmdlist_xe_hpg_core.cpp000066400000000000000000000011261422164147700307370ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/cmdlist/cmdlist_hw.h" #include "level_zero/core/source/cmdlist/cmdlist_hw.inl" #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h" #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl" #include "level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl" #include "cmdlist_extended.inl" namespace L0 { template struct CommandListCoreFamily; template struct CommandListCoreFamilyImmediate; } // namespace L0compute-runtime-22.14.22890/level_zero/core/source/xe_hpg_core/cmdlist_xe_hpg_core.h000066400000000000000000000012271422164147700304060ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/cmdlist/cmdlist_hw.h" #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h" namespace L0 { template struct CommandListProductFamily : public CommandListCoreFamily { using CommandListCoreFamily::CommandListCoreFamily; }; template struct CommandListImmediateProductFamily : public CommandListCoreFamilyImmediate { using CommandListCoreFamilyImmediate::CommandListCoreFamilyImmediate; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/xe_hpg_core/debugger_xe_hpg_core.cpp000066400000000000000000000006351422164147700310700ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/debugger/debugger_l0.inl" #include "level_zero/core/source/debugger/debugger_l0_tgllp_and_later.inl" namespace L0 { using Family = NEO::XE_HPG_COREFamily; template class DebuggerL0Hw; static DebuggerL0PopulateFactory debuggerXeHpgCore; } // namespace L0compute-runtime-22.14.22890/level_zero/core/source/xe_hpg_core/dg2/000077500000000000000000000000001422164147700247065ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/source/xe_hpg_core/dg2/CMakeLists.txt000066400000000000000000000007421422164147700274510ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(SUPPORT_DG2) set(HW_SOURCES_XE_HPG_CORE ${HW_SOURCES_XE_HPG_CORE} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_dg2.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue_dg2.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_dg2.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_dg2.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler_dg2.cpp PARENT_SCOPE ) endif() compute-runtime-22.14.22890/level_zero/core/source/xe_hpg_core/dg2/cmdlist_dg2.cpp000066400000000000000000000006401422164147700276050ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/xe_hpg_core/cmdlist_xe_hpg_core.h" namespace L0 { static CommandListPopulateFactory> populateDG2; static CommandListImmediatePopulateFactory> populateDG2Immediate; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/xe_hpg_core/dg2/cmdqueue_dg2.cpp000066400000000000000000000010241422164147700277530ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/xe_hpg_core/hw_cmds.h" #include "shared/source/xe_hpg_core/hw_info.h" #include "level_zero/core/source/cmdqueue/cmdqueue_hw.inl" #include "level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_and_later.inl" #include "cmdqueue_extended.inl" namespace L0 { template struct CommandQueueHw; static CommandQueuePopulateFactory> populateDG2; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/xe_hpg_core/dg2/image_dg2.cpp000066400000000000000000000011031422164147700272230ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/xe_hpg_core/image_xe_hpg_core.inl" namespace L0 { template <> struct ImageProductFamily : public ImageCoreFamily { using ImageCoreFamily::ImageCoreFamily; ze_result_t initialize(Device *device, const ze_image_desc_t *desc) override { return ImageCoreFamily::initialize(device, desc); }; }; static ImagePopulateFactory> populateDG2; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/xe_hpg_core/dg2/kernel_dg2.cpp000066400000000000000000000003771422164147700274350ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/kernel/kernel_hw.h" namespace L0 { static KernelPopulateFactory> populateDG2; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/xe_hpg_core/dg2/sampler_dg2.cpp000066400000000000000000000016761422164147700276230ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "level_zero/core/source/xe_hpg_core/sampler_xe_hpg_core.inl" namespace L0 { template struct SamplerCoreFamily; template <> struct SamplerProductFamily : public SamplerCoreFamily { using SamplerCoreFamily::SamplerCoreFamily; void appendSamplerStateParams(SAMPLER_STATE *state) override { if (NEO::DebugManager.flags.ForceSamplerLowFilteringPrecision.get()) { state->setLowQualityFilter(SAMPLER_STATE::LOW_QUALITY_FILTER_ENABLE); } } ze_result_t initialize(Device *device, const ze_sampler_desc_t *desc) override { return SamplerCoreFamily::initialize(device, desc); }; }; static SamplerPopulateFactory> populateDG2; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/xe_hpg_core/enable_family_full_l0_xe_hpg_core.cpp000066400000000000000000000007501422164147700335060ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/xe_hpg_core/hw_cmds.h" #include "level_zero/core/source/helpers/l0_populate_factory.h" #include "level_zero/core/source/hw_helpers/l0_hw_helper.h" namespace NEO { using Family = XE_HPG_COREFamily; struct EnableL0XeHpgCore { EnableL0XeHpgCore() { L0::populateFactoryTable>(); } }; static EnableL0XeHpgCore enable; } // namespace NEO compute-runtime-22.14.22890/level_zero/core/source/xe_hpg_core/image_xe_hpg_core.inl000066400000000000000000000003521422164147700303620ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/xe_hpg_core/hw_cmds.h" #include "shared/source/xe_hpg_core/hw_info.h" #include "level_zero/core/source/image/image_hw.inl" compute-runtime-22.14.22890/level_zero/core/source/xe_hpg_core/l0_hw_helper_xe_hpg_core.cpp000066400000000000000000000015221422164147700316500ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/helpers/l0_populate_factory.h" #include "level_zero/core/source/hw_helpers/l0_hw_helper_base.inl" #include "level_zero/core/source/hw_helpers/l0_hw_helper_skl_and_later.inl" #include "hw_cmds.h" namespace L0 { using Family = NEO::XE_HPG_COREFamily; static auto gfxCore = IGFX_XE_HPG_CORE; template <> void populateFactoryTable>() { extern L0HwHelper *l0HwHelperFactory[IGFX_MAX_CORE]; l0HwHelperFactory[gfxCore] = &L0HwHelperHw::get(); } template <> bool L0HwHelperHw::isResumeWARequired() { return true; } // clang-format off #include "level_zero/core/source/hw_helpers/l0_hw_helper_tgllp_plus.inl" // clang-format on template class L0HwHelperHw; } // namespace L0 compute-runtime-22.14.22890/level_zero/core/source/xe_hpg_core/sampler_xe_hpg_core.inl000066400000000000000000000003561422164147700307470ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/xe_hpg_core/hw_cmds.h" #include "shared/source/xe_hpg_core/hw_info.h" #include "level_zero/core/source/sampler/sampler_hw.inl" compute-runtime-22.14.22890/level_zero/core/test/000077500000000000000000000000001422164147700214275ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/.clang-tidy000066400000000000000000000035161422164147700234700ustar00rootroot00000000000000--- Checks: 'clang-diagnostic-*,clang-analyzer-*,google-default-arguments,modernize-use-override,modernize-use-default-member-init,-clang-analyzer-alpha*,readability-identifier-naming,-clang-analyzer-optin.performance.Padding,-clang-analyzer-security.insecureAPI.strcpy,-clang-analyzer-cplusplus.NewDeleteLeaks,-clang-analyzer-core.CallAndMessage,-clang-analyzer-unix.MismatchedDeallocator,-clang-analyzer-core.NullDereference,-clang-analyzer-cplusplus.NewDelete,-clang-analyzer-optin.cplusplus.VirtualCall' # WarningsAsErrors: '.*' HeaderFilterRegex: '^((?!^third_party\/).+)\.(h|hpp|inl)$' AnalyzeTemporaryDtors: false CheckOptions: - key: google-readability-braces-around-statements.ShortStatementLines value: '1' - key: google-readability-function-size.StatementThreshold value: '800' - key: google-readability-namespace-comments.ShortNamespaceLines value: '10' - key: google-readability-namespace-comments.SpacesBeforeComments value: '2' - key: readability-identifier-naming.ParameterCase value: camelBack - key: readability-identifier-naming.StructMemberCase value: camelBack - key: modernize-loop-convert.MaxCopySize value: '16' - key: modernize-loop-convert.MinConfidence value: reasonable - key: modernize-loop-convert.NamingStyle value: CamelCase - key: modernize-pass-by-value.IncludeStyle value: llvm - key: modernize-replace-auto-ptr.IncludeStyle value: llvm - key: modernize-use-nullptr.NullMacros value: 'NULL' - key: modernize-use-default-member-init.UseAssignment value: '1' ... compute-runtime-22.14.22890/level_zero/core/test/aub_tests/000077500000000000000000000000001422164147700234205ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/aub_tests/CMakeLists.txt000066400000000000000000000072451422164147700261700ustar00rootroot00000000000000# # Copyright (C) 2021-2022 Intel Corporation # # SPDX-License-Identifier: MIT # link_libraries(${ASAN_LIBS} ${TSAN_LIBS}) set(TARGET_NAME ${TARGET_NAME_L0}_aub_tests) set(L0_AUB_TEST_DIR ${CMAKE_CURRENT_SOURCE_DIR}) list(APPEND L0_AUB_TESTS__TARGET_OBJECTS ${CMAKE_CURRENT_SOURCE_DIR}/aub_hello_world_test.cpp ${NEO_SOURCE_DIR}/level_zero/core/test/unit_tests/main.cpp ${NEO_SHARED_TEST_DIRECTORY}/unit_test/tests_configuration.h ) function(ADD_SUPPORTED_TEST_PRODUCT_FAMILIES_DEFINITION) set(L0_TESTED_PRODUCT_FAMILIES ${ALL_TESTED_PRODUCT_FAMILY}) string(REPLACE ";" "," L0_TESTED_PRODUCT_FAMILIES "${L0_TESTED_PRODUCT_FAMILIES}") add_definitions(-DSUPPORTED_TEST_PRODUCT_FAMILIES=${L0_TESTED_PRODUCT_FAMILIES}) endfunction() ADD_SUPPORTED_TEST_PRODUCT_FAMILIES_DEFINITION() if(DEFINED AUB_STREAM_PROJECT_NAME) list(APPEND L0_AUB_TESTS__TARGET_OBJECTS $) endif() add_executable(${TARGET_NAME} ${L0_AUB_TESTS__TARGET_OBJECTS} ) set_target_properties(${TARGET_NAME} PROPERTIES FOLDER ${TARGET_NAME_L0}) target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${L0_CORE_ENABLERS} ${NEO_SOURCE_DIR}/level_zero/core/test/unit_tests/main.cpp ${NEO_SOURCE_DIR}/level_zero/core/test/unit_tests/mock.h ${NEO_SOURCE_DIR}/level_zero/core/test/unit_tests/white_box.h ${NEO_SOURCE_DIR}/level_zero/tools/test/unit_tests/sources/debug/debug_session_helper.cpp ${NEO_SOURCE_DIR}/level_zero/core/source/dll/create_builtin_functions_lib.cpp ${NEO_SOURCE_DIR}/level_zero/core/source/dll/disallow_deferred_deleter.cpp ) target_sources(${TARGET_NAME} PRIVATE $ $ $ $ $ $ $ $ $ $ $ ) if(TARGET ${BUILTINS_SPIRV_LIB_NAME}) target_sources(${TARGET_NAME} PRIVATE $ ) endif() copy_gmm_dll_for(${TARGET_NAME}) add_subdirectories() target_link_libraries(${TARGET_NAME} ${NEO_SHARED_MOCKABLE_LIB_NAME}) target_link_libraries(${TARGET_NAME} ${HW_LIBS_ULT}) target_link_libraries(${TARGET_NAME} gmock-gtest) target_link_libraries(${TARGET_NAME} ${NEO_EXTRA_LIBS}) if(UNIX) target_link_libraries(${TARGET_NAME} rt) target_link_libraries(${TARGET_NAME} ${GMM_LINK_NAME}) else() target_link_libraries(${TARGET_NAME} dbghelp) add_dependencies(${TARGET_NAME} ${GMM_TARGET_NAME}) endif() target_include_directories(${TARGET_NAME} PRIVATE ${NEO_SHARED_TEST_DIRECTORY}/common/test_configuration/aub_tests ${NEO_SOURCE_DIR}/shared/test/common/helpers/includes${BRANCH_DIR_SUFFIX} ${NEO_SHARED_TEST_DIRECTORY}/common/test_macros/header${BRANCH_DIR_SUFFIX} ${NEO_SOURCE_DIR}/level_zero/core/test/unit_tests ${ENGINE_NODE_DIR} ${CIF_BASE_DIR} ${NEO__GMM_INCLUDE_DIR} ${THIRD_PARTY_DIR} ${WDK_DIR} ) create_project_source_tree(${TARGET_NAME}) compute-runtime-22.14.22890/level_zero/core/test/aub_tests/aub_hello_world_test.cpp000066400000000000000000000033101422164147700303210ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "level_zero/core/test/aub_tests/fixtures/aub_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_driver_handle.h" #include "test_mode.h" namespace L0 { namespace ult { using AUBHelloWorldL0 = Test; TEST_F(AUBHelloWorldL0, whenAppendMemoryCopyIsCalledThenMemoryIsProperlyCopied) { uint8_t size = 8; uint8_t val = 255; NEO::SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY, context->rootDeviceIndices, context->deviceBitfields); auto srcMemory = driverHandle->svmAllocsManager->createHostUnifiedMemoryAllocation(size, unifiedMemoryProperties); auto dstMemory = driverHandle->svmAllocsManager->createHostUnifiedMemoryAllocation(size, unifiedMemoryProperties); memset(srcMemory, val, size); commandList->appendMemoryCopy(dstMemory, srcMemory, size, 0, 0, nullptr); commandList->close(); auto pHCmdList = std::make_unique(commandList->toHandle()); pCmdq->executeCommandLists(1, pHCmdList.get(), nullptr, false); pCmdq->synchronize(std::numeric_limits::max()); EXPECT_TRUE(csr->expectMemory(dstMemory, srcMemory, size, AubMemDump::CmdServicesMemTraceMemoryCompare::CompareOperationValues::CompareEqual)); driverHandle->svmAllocsManager->freeSVMAlloc(srcMemory); driverHandle->svmAllocsManager->freeSVMAlloc(dstMemory); } } // namespace ult } // namespace L0compute-runtime-22.14.22890/level_zero/core/test/aub_tests/bindless/000077500000000000000000000000001422164147700252235ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/aub_tests/bindless/CMakeLists.txt000066400000000000000000000004001422164147700277550ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(ze_intel_gpu_aub_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/bindless_kernel_aub_tests.cpp ) compute-runtime-22.14.22890/level_zero/core/test/aub_tests/bindless/bindless_kernel_aub_tests.cpp000066400000000000000000000113461422164147700331500ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/array_count.h" #include "shared/source/helpers/file_io.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/test_files.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/module/module_imp.h" #include "level_zero/core/test/aub_tests/fixtures/aub_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_driver_handle.h" namespace L0 { namespace ult { struct L0BindlessAub : Test { void SetUp() override { DebugManager.flags.UseBindlessMode.set(1); DebugManager.flags.UseExternalAllocatorForSshAndDsh.set(1); AUBFixtureL0::SetUp(); } void TearDown() override { module->destroy(); AUBFixtureL0::TearDown(); } void createModuleFromFile(const std::string &fileName, ze_context_handle_t context, L0::Device *device) { std::string testFile; retrieveBinaryKernelFilenameApiSpecific(testFile, fileName + "_", ".bin"); size_t size = 0; auto src = loadDataFromFile( testFile.c_str(), size); ASSERT_NE(0u, size); ASSERT_NE(nullptr, src); ze_module_desc_t moduleDesc = {ZE_STRUCTURE_TYPE_MODULE_DESC}; moduleDesc.format = ZE_MODULE_FORMAT_NATIVE; moduleDesc.pInputModule = reinterpret_cast(src.get()); moduleDesc.inputSize = size; moduleDesc.pBuildFlags = ""; module = new ModuleImp(device, nullptr, ModuleType::User); bool success = module->initialize(&moduleDesc, device->getNEODevice()); ASSERT_TRUE(success); } DebugManagerStateRestore restorer; ModuleImp *module = nullptr; }; HWTEST_F(L0BindlessAub, GivenBindlessKernelWhenExecutedThenOutputIsCorrect) { constexpr size_t bufferSize = MemoryConstants::pageSize; const uint32_t groupSize[] = {32, 1, 1}; const uint32_t groupCount[] = {bufferSize / 32, 1, 1}; NEO::DebugManager.flags.UpdateCrossThreadDataSize.set(true); NEO::SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY, context->rootDeviceIndices, context->deviceBitfields); auto bufferSrc = driverHandle->svmAllocsManager->createHostUnifiedMemoryAllocation(bufferSize, unifiedMemoryProperties); memset(bufferSrc, 55, bufferSize); auto bufferDst = driverHandle->svmAllocsManager->createHostUnifiedMemoryAllocation(bufferSize, unifiedMemoryProperties); memset(bufferDst, 0, bufferSize); auto simulatedCsr = AUBFixtureL0::getSimulatedCsr(); simulatedCsr->initializeEngine(); simulatedCsr->writeMemory(*driverHandle->svmAllocsManager->getSVMAlloc(bufferSrc)->gpuAllocations.getDefaultGraphicsAllocation()); simulatedCsr->writeMemory(*driverHandle->svmAllocsManager->getSVMAlloc(bufferDst)->gpuAllocations.getDefaultGraphicsAllocation()); ze_group_count_t dispatchTraits; dispatchTraits.groupCountX = groupCount[0]; dispatchTraits.groupCountY = groupCount[1]; dispatchTraits.groupCountZ = groupCount[2]; createModuleFromFile("bindless_stateful_copy_buffer", context, device); ze_kernel_handle_t kernel; ze_kernel_desc_t kernelDesc = {ZE_STRUCTURE_TYPE_KERNEL_DESC}; kernelDesc.pKernelName = "StatefulCopyBuffer"; EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelCreate(module->toHandle(), &kernelDesc, &kernel)); EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelSetArgumentValue(kernel, 0, sizeof(void *), &bufferSrc)); EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelSetArgumentValue(kernel, 1, sizeof(void *), &bufferDst)); EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelSetGroupSize(kernel, groupSize[0], groupSize[1], groupSize[2])); ze_command_list_handle_t cmdListHandle = commandList->toHandle(); EXPECT_EQ(ZE_RESULT_SUCCESS, zeCommandListAppendLaunchKernel(cmdListHandle, kernel, &dispatchTraits, nullptr, 0, nullptr)); commandList->close(); pCmdq->executeCommandLists(1, &cmdListHandle, nullptr, false); pCmdq->synchronize(std::numeric_limits::max()); expectMemory(reinterpret_cast(driverHandle->svmAllocsManager->getSVMAlloc(bufferDst)->gpuAllocations.getDefaultGraphicsAllocation()->getGpuAddress()), bufferSrc, bufferSize); EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelDestroy(kernel)); driverHandle->svmAllocsManager->freeSVMAlloc(bufferSrc); driverHandle->svmAllocsManager->freeSVMAlloc(bufferDst); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/aub_tests/cmdlist/000077500000000000000000000000001422164147700250575ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/aub_tests/cmdlist/CMakeLists.txt000066400000000000000000000004071422164147700276200ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(ze_intel_gpu_aub_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/append_kernel_indirect_aub_tests.cpp ) append_kernel_indirect_aub_tests.cpp000066400000000000000000000234301422164147700342470ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/aub_tests/cmdlist/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/array_count.h" #include "shared/source/helpers/file_io.h" #include "shared/test/common/helpers/test_files.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/test/aub_tests/fixtures/aub_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_driver_handle.h" namespace L0 { namespace ult { struct AUBAppendKernelIndirectL0 : Test { static ze_module_handle_t createModuleFromFile(const std::string &fileName, ze_context_handle_t context, ze_device_handle_t device) { ze_module_handle_t moduleHandle; std::string testFile; retrieveBinaryKernelFilenameApiSpecific(testFile, fileName + "_", ".bin"); size_t size = 0; auto src = loadDataFromFile( testFile.c_str(), size); EXPECT_NE(0u, size); EXPECT_NE(nullptr, src); if (!src || size == 0) { return nullptr; } ze_module_desc_t moduleDesc = {ZE_STRUCTURE_TYPE_MODULE_DESC}; moduleDesc.format = ZE_MODULE_FORMAT_NATIVE; moduleDesc.pInputModule = reinterpret_cast(src.get()); moduleDesc.inputSize = size; moduleDesc.pBuildFlags = ""; EXPECT_EQ(ZE_RESULT_SUCCESS, zeModuleCreate(context, device, &moduleDesc, &moduleHandle, nullptr)); return moduleHandle; } }; TEST_F(AUBAppendKernelIndirectL0, whenAppendKernelIndirectThenGlobalWorkSizeIsProperlyProgrammed) { const uint32_t groupSize[] = {1, 2, 3}; const uint32_t groupCount[] = {4, 3, 1}; const uint32_t expectedGlobalWorkSize[] = {groupSize[0] * groupCount[0], groupSize[1] * groupCount[1], groupSize[2] * groupCount[2]}; uint8_t size = 3 * sizeof(uint32_t); NEO::SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY, context->rootDeviceIndices, context->deviceBitfields); auto pDispatchTraits = driverHandle->svmAllocsManager->createHostUnifiedMemoryAllocation(sizeof(ze_group_count_t), unifiedMemoryProperties); auto outBuffer = driverHandle->svmAllocsManager->createHostUnifiedMemoryAllocation(size, unifiedMemoryProperties); memset(outBuffer, 0, size); ze_group_count_t &dispatchTraits = *reinterpret_cast(pDispatchTraits); dispatchTraits.groupCountX = groupCount[0]; dispatchTraits.groupCountY = groupCount[1]; dispatchTraits.groupCountZ = groupCount[2]; ze_module_handle_t moduleHandle = createModuleFromFile("test_kernel", context, device); ASSERT_NE(nullptr, moduleHandle); ze_kernel_handle_t kernel; ze_kernel_desc_t kernelDesc = {ZE_STRUCTURE_TYPE_KERNEL_DESC}; kernelDesc.pKernelName = "test_get_global_sizes"; EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelCreate(moduleHandle, &kernelDesc, &kernel)); EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelSetArgumentValue(kernel, 0, sizeof(void *), &outBuffer)); EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelSetGroupSize(kernel, groupSize[0], groupSize[1], groupSize[2])); ze_command_list_handle_t cmdListHandle = commandList->toHandle(); EXPECT_EQ(ZE_RESULT_SUCCESS, zeCommandListAppendLaunchKernelIndirect(cmdListHandle, kernel, &dispatchTraits, nullptr, 0, nullptr)); commandList->close(); pCmdq->executeCommandLists(1, &cmdListHandle, nullptr, false); pCmdq->synchronize(std::numeric_limits::max()); EXPECT_TRUE(csr->expectMemory(outBuffer, expectedGlobalWorkSize, size, AubMemDump::CmdServicesMemTraceMemoryCompare::CompareOperationValues::CompareEqual)); EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelDestroy(kernel)); EXPECT_EQ(ZE_RESULT_SUCCESS, zeModuleDestroy(moduleHandle)); driverHandle->svmAllocsManager->freeSVMAlloc(outBuffer); driverHandle->svmAllocsManager->freeSVMAlloc(pDispatchTraits); } TEST_F(AUBAppendKernelIndirectL0, whenAppendKernelIndirectThenGroupCountIsProperlyProgrammed) { const uint32_t groupSize[] = {1, 2, 3}; const uint32_t groupCount[] = {4, 3, 1}; uint8_t size = 3 * sizeof(uint32_t); NEO::SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY, context->rootDeviceIndices, context->deviceBitfields); auto pDispatchTraits = driverHandle->svmAllocsManager->createHostUnifiedMemoryAllocation(sizeof(ze_group_count_t), unifiedMemoryProperties); auto outBuffer = driverHandle->svmAllocsManager->createHostUnifiedMemoryAllocation(size, unifiedMemoryProperties); memset(outBuffer, 0, size); ze_group_count_t &dispatchTraits = *reinterpret_cast(pDispatchTraits); dispatchTraits.groupCountX = groupCount[0]; dispatchTraits.groupCountY = groupCount[1]; dispatchTraits.groupCountZ = groupCount[2]; ze_module_handle_t moduleHandle = createModuleFromFile("test_kernel", context, device); ASSERT_NE(nullptr, moduleHandle); ze_kernel_handle_t kernel; ze_kernel_desc_t kernelDesc = {ZE_STRUCTURE_TYPE_KERNEL_DESC}; kernelDesc.pKernelName = "test_get_group_count"; EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelCreate(moduleHandle, &kernelDesc, &kernel)); EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelSetArgumentValue(kernel, 0, sizeof(void *), &outBuffer)); EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelSetGroupSize(kernel, groupSize[0], groupSize[1], groupSize[2])); ze_command_list_handle_t cmdListHandle = commandList->toHandle(); EXPECT_EQ(ZE_RESULT_SUCCESS, zeCommandListAppendLaunchKernelIndirect(cmdListHandle, kernel, &dispatchTraits, nullptr, 0, nullptr)); commandList->close(); pCmdq->executeCommandLists(1, &cmdListHandle, nullptr, false); pCmdq->synchronize(std::numeric_limits::max()); EXPECT_TRUE(csr->expectMemory(outBuffer, groupCount, size, AubMemDump::CmdServicesMemTraceMemoryCompare::CompareOperationValues::CompareEqual)); EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelDestroy(kernel)); EXPECT_EQ(ZE_RESULT_SUCCESS, zeModuleDestroy(moduleHandle)); driverHandle->svmAllocsManager->freeSVMAlloc(outBuffer); driverHandle->svmAllocsManager->freeSVMAlloc(pDispatchTraits); } TEST_F(AUBAppendKernelIndirectL0, whenAppendKernelIndirectThenWorkDimIsProperlyProgrammed) { NEO::SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY, context->rootDeviceIndices, context->deviceBitfields); std::tuple /*groupSize*/, std::array /*groupCount*/, uint32_t /*expected workdim*/> testData[]{ {{1, 1, 1}, {1, 1, 1}, 1}, {{1, 1, 1}, {2, 1, 1}, 1}, {{1, 1, 1}, {1, 2, 1}, 2}, {{1, 1, 1}, {1, 1, 2}, 3}, {{2, 1, 1}, {1, 1, 1}, 1}, {{2, 1, 1}, {2, 1, 1}, 1}, {{2, 1, 1}, {1, 2, 1}, 2}, {{2, 1, 1}, {1, 1, 2}, 3}, {{1, 2, 1}, {1, 1, 1}, 2}, {{1, 2, 1}, {2, 1, 1}, 2}, {{1, 2, 1}, {1, 2, 1}, 2}, {{1, 2, 1}, {1, 1, 2}, 3}, {{1, 1, 2}, {1, 1, 1}, 3}, {{1, 1, 2}, {2, 1, 1}, 3}, {{1, 1, 2}, {1, 2, 1}, 3}, {{1, 1, 2}, {1, 1, 2}, 3}}; ze_command_list_handle_t cmdListHandle = commandList->toHandle(); ze_module_handle_t moduleHandle = createModuleFromFile("test_kernel", context, device); ASSERT_NE(nullptr, moduleHandle); ze_kernel_handle_t kernel; ze_kernel_desc_t kernelDesc = {ZE_STRUCTURE_TYPE_KERNEL_DESC}; kernelDesc.pKernelName = "test_get_work_dim"; EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelCreate(moduleHandle, &kernelDesc, &kernel)); for (auto i = 0u; i < arrayCount<>(testData); i++) { std::array groupSize; std::array groupCount; uint32_t expectedWorkDim; std::tie(groupSize, groupCount, expectedWorkDim) = testData[i]; uint8_t size = sizeof(uint32_t); auto pDispatchTraits = driverHandle->svmAllocsManager->createHostUnifiedMemoryAllocation(sizeof(ze_group_count_t), unifiedMemoryProperties); auto outBuffer = driverHandle->svmAllocsManager->createHostUnifiedMemoryAllocation(size, unifiedMemoryProperties); memset(outBuffer, 0, size); ze_group_count_t &dispatchTraits = *reinterpret_cast(pDispatchTraits); dispatchTraits.groupCountX = groupCount[0]; dispatchTraits.groupCountY = groupCount[1]; dispatchTraits.groupCountZ = groupCount[2]; EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelSetArgumentValue(kernel, 0, sizeof(void *), &outBuffer)); EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelSetGroupSize(kernel, groupSize[0], groupSize[1], groupSize[2])); EXPECT_EQ(ZE_RESULT_SUCCESS, zeCommandListAppendLaunchKernelIndirect(cmdListHandle, kernel, &dispatchTraits, nullptr, 0, nullptr)); commandList->close(); pCmdq->executeCommandLists(1, &cmdListHandle, nullptr, false); pCmdq->synchronize(std::numeric_limits::max()); EXPECT_TRUE(csr->expectMemory(outBuffer, &expectedWorkDim, size, AubMemDump::CmdServicesMemTraceMemoryCompare::CompareOperationValues::CompareEqual)); driverHandle->svmAllocsManager->freeSVMAlloc(outBuffer); driverHandle->svmAllocsManager->freeSVMAlloc(pDispatchTraits); commandList->reset(); } EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelDestroy(kernel)); EXPECT_EQ(ZE_RESULT_SUCCESS, zeModuleDestroy(moduleHandle)); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/aub_tests/debugger/000077500000000000000000000000001422164147700252045ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/aub_tests/debugger/CMakeLists.txt000066400000000000000000000003761422164147700277520ustar00rootroot00000000000000# # Copyright (C) 2021-2022 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(ze_intel_gpu_aub_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/debugger_aub_tests.cpp ) compute-runtime-22.14.22890/level_zero/core/test/aub_tests/debugger/debugger_aub_tests.cpp000066400000000000000000000151231422164147700315470ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/array_count.h" #include "shared/source/helpers/file_io.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/helpers/test_files.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_io_functions.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/module/module_imp.h" #include "level_zero/core/test/aub_tests/fixtures/aub_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_driver_handle.h" namespace L0 { namespace ult { struct DebuggerAub : Test { void SetUp() override { AUBFixtureL0::SetUp(NEO::defaultHwInfo.get(), true); } void TearDown() override { module->destroy(); AUBFixtureL0::TearDown(); } void createModuleFromFile(const std::string &fileName, ze_context_handle_t context, L0::Device *device) { std::string testFile; retrieveBinaryKernelFilenameApiSpecific(testFile, fileName + "_", ".bin"); size_t size = 0; auto src = loadDataFromFile( testFile.c_str(), size); ASSERT_NE(0u, size); ASSERT_NE(nullptr, src); ze_module_desc_t moduleDesc = {ZE_STRUCTURE_TYPE_MODULE_DESC}; moduleDesc.format = ZE_MODULE_FORMAT_NATIVE; moduleDesc.pInputModule = reinterpret_cast(src.get()); moduleDesc.inputSize = size; moduleDesc.pBuildFlags = ""; module = new ModuleImp(device, nullptr, ModuleType::User); bool success = module->initialize(&moduleDesc, device->getNEODevice()); ASSERT_TRUE(success); } DebugManagerStateRestore restorer; ModuleImp *module = nullptr; }; struct DebuggerSingleAddressSpaceAub : public DebuggerAub { void SetUp() override { NEO::DebugManager.flags.DebuggerForceSbaTrackingMode.set(1); DebuggerAub::SetUp(); } void TearDown() override { DebuggerAub::TearDown(); } }; using IsBetweenGen12LpAndXeHp = IsWithinGfxCore; HWTEST2_F(DebuggerSingleAddressSpaceAub, GivenSingleAddressSpaceWhenCmdListIsExecutedThenSbaAddressesAreTracked, IsBetweenGen12LpAndXeHp) { constexpr size_t bufferSize = MemoryConstants::pageSize; const uint32_t groupSize[] = {32, 1, 1}; const uint32_t groupCount[] = {bufferSize / 32, 1, 1}; const uint32_t expectedSizes[] = {bufferSize, 1, 1}; NEO::DebugManager.flags.UpdateCrossThreadDataSize.set(true); NEO::SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY, context->rootDeviceIndices, context->deviceBitfields); auto bufferDst = driverHandle->svmAllocsManager->createHostUnifiedMemoryAllocation(bufferSize, unifiedMemoryProperties); memset(bufferDst, 0, bufferSize); auto simulatedCsr = AUBFixtureL0::getSimulatedCsr(); simulatedCsr->initializeEngine(); simulatedCsr->writeMemory(*driverHandle->svmAllocsManager->getSVMAlloc(bufferDst)->gpuAllocations.getDefaultGraphicsAllocation()); ze_group_count_t dispatchTraits; dispatchTraits.groupCountX = groupCount[0]; dispatchTraits.groupCountY = groupCount[1]; dispatchTraits.groupCountZ = groupCount[2]; createModuleFromFile("test_kernel", context, device); ze_kernel_handle_t kernel; ze_kernel_desc_t kernelDesc = {ZE_STRUCTURE_TYPE_KERNEL_DESC}; kernelDesc.pKernelName = "test_get_global_sizes"; EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelCreate(module->toHandle(), &kernelDesc, &kernel)); EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelSetArgumentValue(kernel, 0, sizeof(void *), &bufferDst)); EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelSetGroupSize(kernel, groupSize[0], groupSize[1], groupSize[2])); ze_command_list_handle_t cmdListHandle = commandList->toHandle(); EXPECT_EQ(ZE_RESULT_SUCCESS, zeCommandListAppendLaunchKernel(cmdListHandle, kernel, &dispatchTraits, nullptr, 0, nullptr)); commandList->close(); pCmdq->executeCommandLists(1, &cmdListHandle, nullptr, false); pCmdq->synchronize(std::numeric_limits::max()); expectMemory(reinterpret_cast(driverHandle->svmAllocsManager->getSVMAlloc(bufferDst)->gpuAllocations.getDefaultGraphicsAllocation()->getGpuAddress()), expectedSizes, sizeof(expectedSizes)); const auto sbaAddress = device->getL0Debugger()->getSbaTrackingBuffer(csr->getOsContext().getContextId())->getGpuAddress(); uint32_t low = sbaAddress & 0xffffffff; uint32_t high = (sbaAddress >> 32) & 0xffffffff; expectMMIO(CS_GPR_R15, low); expectMMIO(CS_GPR_R15 + 4, high); auto instructionHeapBaseAddress = neoDevice->getMemoryManager()->getInternalHeapBaseAddress(device->getRootDeviceIndex(), neoDevice->getMemoryManager()->isLocalMemoryUsedForIsa(neoDevice->getRootDeviceIndex())); auto dynamicStateBaseAddress = NEO::GmmHelper::decanonize(commandList->commandContainer.getIndirectHeap(HeapType::DYNAMIC_STATE)->getGraphicsAllocation()->getGpuAddress()); auto surfaceStateBaseAddress = NEO::GmmHelper::decanonize(commandList->commandContainer.getIndirectHeap(HeapType::SURFACE_STATE)->getGraphicsAllocation()->getGpuAddress()); expectMemory(reinterpret_cast(sbaAddress + offsetof(SbaTrackedAddresses, SurfaceStateBaseAddress)), &surfaceStateBaseAddress, sizeof(surfaceStateBaseAddress)); expectMemory(reinterpret_cast(sbaAddress + offsetof(SbaTrackedAddresses, DynamicStateBaseAddress)), &dynamicStateBaseAddress, sizeof(dynamicStateBaseAddress)); expectMemory(reinterpret_cast(sbaAddress + offsetof(SbaTrackedAddresses, InstructionBaseAddress)), &instructionHeapBaseAddress, sizeof(instructionHeapBaseAddress)); expectMemory(reinterpret_cast(sbaAddress + offsetof(SbaTrackedAddresses, BindlessSurfaceStateBaseAddress)), &surfaceStateBaseAddress, sizeof(surfaceStateBaseAddress)); EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelDestroy(kernel)); driverHandle->svmAllocsManager->freeSVMAlloc(bufferDst); } } // namespace ult } // namespace L0compute-runtime-22.14.22890/level_zero/core/test/aub_tests/fixtures/000077500000000000000000000000001422164147700252715ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/aub_tests/fixtures/CMakeLists.txt000066400000000000000000000005011422164147700300250ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(ze_intel_gpu_aub_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/aub_fixture.cpp ${CMAKE_CURRENT_SOURCE_DIR}/aub_fixture.h ) add_subdirectories() compute-runtime-22.14.22890/level_zero/core/test/aub_tests/fixtures/aub_fixture.cpp000066400000000000000000000076351422164147700303250ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/test/aub_tests/fixtures/aub_fixture.h" #include "shared/source/command_stream/tbx_command_stream_receiver_hw.h" #include "shared/source/helpers/api_specific_config.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/unit_test/tests_configuration.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_driver_handle.h" #include "gtest/gtest.h" #include "test_mode.h" namespace L0 { AUBFixtureL0::AUBFixtureL0() = default; AUBFixtureL0::~AUBFixtureL0() = default; void AUBFixtureL0::prepareCopyEngines(NEO::MockDevice &device, const std::string &filename) { for (auto i = 0u; i < device.allEngines.size(); i++) { if (NEO::EngineHelpers::isBcs(device.allEngines[i].getEngineType())) { NEO::CommandStreamReceiver *pBcsCommandStreamReceiver = nullptr; pBcsCommandStreamReceiver = NEO::AUBCommandStreamReceiver::create(filename, true, *device.executionEnvironment, device.getRootDeviceIndex(), device.getDeviceBitfield()); device.resetCommandStreamReceiver(pBcsCommandStreamReceiver, i); } } } void AUBFixtureL0::SetUp() { SetUp(NEO::defaultHwInfo.get(), false); } void AUBFixtureL0::SetUp(const NEO::HardwareInfo *hardwareInfo, bool debuggingEnabled) { ASSERT_NE(nullptr, hardwareInfo); const auto &hwInfo = *hardwareInfo; auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily); auto engineType = getChosenEngineType(hwInfo); const ::testing::TestInfo *const testInfo = ::testing::UnitTest::GetInstance()->current_test_info(); std::stringstream strfilename; strfilename << NEO::ApiSpecificConfig::getAubPrefixForSpecificApi(); strfilename << testInfo->test_case_name() << "_" << testInfo->name() << "_" << hwHelper.getCsTraits(engineType).name; executionEnvironment = new NEO::ExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(1u); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(&hwInfo); if (debuggingEnabled) { executionEnvironment->setDebuggingEnabled(); } neoDevice = NEO::MockDevice::createWithExecutionEnvironment(&hwInfo, executionEnvironment, 0u); if (NEO::testMode == NEO::TestMode::AubTestsWithTbx) { this->csr = NEO::TbxCommandStreamReceiver::create(strfilename.str(), true, *executionEnvironment, 0, neoDevice->getDeviceBitfield()); } else { this->csr = NEO::AUBCommandStreamReceiver::create(strfilename.str(), true, *executionEnvironment, 0, neoDevice->getDeviceBitfield()); } neoDevice->resetCommandStreamReceiver(this->csr); prepareCopyEngines(*neoDevice, strfilename.str()); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); driverHandle = std::make_unique>(); driverHandle->enableProgramDebugging = debuggingEnabled; driverHandle->initialize(std::move(devices)); device = driverHandle->devices[0]; ze_context_handle_t hContext; ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; ze_result_t res = driverHandle->createContext(&desc, 0u, nullptr, &hContext); EXPECT_EQ(ZE_RESULT_SUCCESS, res); context = static_cast(Context::fromHandle(hContext)); ze_result_t returnValue; commandList.reset(ult::whitebox_cast(CommandList::create(hwInfo.platform.eProductFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue))); returnValue = ZE_RESULT_ERROR_UNINITIALIZED; ze_command_queue_desc_t queueDesc = {}; pCmdq = CommandQueue::create(hwInfo.platform.eProductFamily, device, csr, &queueDesc, false, false, returnValue); EXPECT_EQ(ZE_RESULT_SUCCESS, res); } void AUBFixtureL0::TearDown() { context->destroy(); pCmdq->destroy(); } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/aub_tests/fixtures/aub_fixture.h000066400000000000000000000077101422164147700277640ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/aub_command_stream_receiver_hw.h" #include "shared/source/command_stream/command_stream_receiver_simulated_common_hw.h" #include "shared/source/command_stream/command_stream_receiver_with_aub_dump.h" #include "shared/source/command_stream/tbx_command_stream_receiver_hw.h" #include "test_mode.h" #include #include #include namespace NEO { class CommandStreamReceiver; class MockDevice; class ExecutionEnvironment; class MemoryManager; struct HardwareInfo; } // namespace NEO namespace L0 { namespace ult { template struct Mock; template struct WhiteBox; } // namespace ult struct ContextImp; struct DriverHandleImp; struct CommandQueue; struct CommandList; struct Device; class AUBFixtureL0 { public: AUBFixtureL0(); virtual ~AUBFixtureL0(); void SetUp(); void SetUp(const NEO::HardwareInfo *hardwareInfo, bool debuggingEnabled); void TearDown(); static void prepareCopyEngines(NEO::MockDevice &device, const std::string &filename); template NEO::CommandStreamReceiverSimulatedCommonHw *getSimulatedCsr() const { return static_cast *>(csr); } template void expectMemory(void *gfxAddress, const void *srcAddress, size_t length) { NEO::CommandStreamReceiverSimulatedCommonHw *csrSimulated = getSimulatedCsr(); if (NEO::testMode == NEO::TestMode::AubTestsWithTbx) { auto tbxCsr = csrSimulated; EXPECT_TRUE(tbxCsr->expectMemoryEqual(gfxAddress, srcAddress, length)); csrSimulated = static_cast *>( static_cast> *>(csr)->aubCSR.get()); } if (csrSimulated) { csrSimulated->expectMemoryEqual(gfxAddress, srcAddress, length); } } template void expectNotEqualMemory(void *gfxAddress, const void *srcAddress, size_t length) { NEO::CommandStreamReceiverSimulatedCommonHw *csrSimulated = getSimulatedCsr(); if (NEO::testMode == NEO::TestMode::AubTestsWithTbx) { auto tbxCsr = csrSimulated; EXPECT_TRUE(tbxCsr->expectMemoryNotEqual(gfxAddress, srcAddress, length)); csrSimulated = static_cast *>( static_cast> *>(csr)->aubCSR.get()); } if (csrSimulated) { csrSimulated->expectMemoryNotEqual(gfxAddress, srcAddress, length); } } template void expectMMIO(uint32_t mmioRegister, uint32_t expectedValue) { NEO::AUBCommandStreamReceiverHw *aubCsr = static_cast *>(csr); if (NEO::testMode == NEO::TestMode::AubTestsWithTbx) { aubCsr = static_cast *>(static_cast> *>(csr)->aubCSR.get()); } if (aubCsr) { aubCsr->expectMMIO(mmioRegister, expectedValue); } } const uint32_t rootDeviceIndex = 0; NEO::ExecutionEnvironment *executionEnvironment; NEO::MemoryManager *memoryManager = nullptr; NEO::MockDevice *neoDevice = nullptr; std::unique_ptr> driverHandle; std::unique_ptr> commandList; Device *device = nullptr; ContextImp *context = nullptr; CommandQueue *pCmdq = nullptr; NEO::CommandStreamReceiver *csr = nullptr; }; } // namespace L0compute-runtime-22.14.22890/level_zero/core/test/black_box_tests/000077500000000000000000000000001422164147700245755ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/black_box_tests/CMakeLists.txt000066400000000000000000000035651422164147700273460ustar00rootroot00000000000000# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_BLACK_BOX_TEST_PROJECT_FOLDER "ze_intel_gpu/black_box_tests") set(TEST_TARGETS zello_timestamp zello_world_gpu zello_world_jitc_ocloc zello_ipc_copy_dma_buf zello_ipc_copy_dma_buf_p2p zello_copy zello_copy_only zello_copy_fence zello_copy_image zello_world_usm zello_world_global_work_offset zello_scratch zello_fence zello_printf zello_image zello_image_view zello_dynamic_link zello_events zello_immediate ) include_directories(common) foreach(TEST_NAME ${TEST_TARGETS}) if(MSVC) if(${TEST_NAME} STREQUAL "zello_ipc_copy_dma_buf") continue() endif() if(${TEST_NAME} STREQUAL "zello_ipc_copy_dma_buf_p2p") continue() endif() if(${TEST_NAME} STREQUAL "zello_world_global_work_offset") continue() endif() endif() add_executable(${TEST_NAME} ${TEST_NAME}.cpp) set_target_properties(${TEST_NAME} PROPERTIES VS_DEBUGGER_COMMAND "$(TargetPath)" VS_DEBUGGER_COMMAND_ARGUMENTS "" VS_DEBUGGER_WORKING_DIRECTORY "${TargetDir}" ) add_dependencies(${TEST_NAME} ${TARGET_NAME_L0}) target_link_libraries(${TEST_NAME} PUBLIC ${TARGET_NAME_L0}) set_target_properties(${TEST_NAME} PROPERTIES FOLDER ${L0_BLACK_BOX_TEST_PROJECT_FOLDER}) endforeach() target_link_libraries(zello_world_jitc_ocloc PUBLIC ocloc_lib) target_link_libraries(zello_scratch PUBLIC ocloc_lib) target_link_libraries(zello_fence PUBLIC ocloc_lib) target_link_libraries(zello_printf PUBLIC ocloc_lib) target_link_libraries(zello_image_view PUBLIC ocloc_lib) target_link_libraries(zello_dynamic_link PUBLIC ocloc_lib) if(UNIX) target_link_libraries(zello_world_global_work_offset PUBLIC ocloc_lib) endif() add_subdirectories() compute-runtime-22.14.22890/level_zero/core/test/black_box_tests/common/000077500000000000000000000000001422164147700260655ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/black_box_tests/common/zello_common.h000066400000000000000000000213631422164147700307400ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include #include #include #include #include #include #include #include extern bool verbose; template inline void validate(ResulT result, const char *message) { if (result == ZE_RESULT_SUCCESS) { if (verbose) { std::cerr << "SUCCESS : " << message << std::endl; } return; } if (verbose) { std::cerr << (TerminateOnFailure ? "ERROR : " : "WARNING : ") << message << " : " << result << std::endl; } if (TerminateOnFailure) { std::terminate(); } } #define SUCCESS_OR_TERMINATE(CALL) validate(CALL, #CALL) #define SUCCESS_OR_TERMINATE_BOOL(FLAG) validate(!(FLAG), #FLAG) #define SUCCESS_OR_WARNING(CALL) validate(CALL, #CALL) #define SUCCESS_OR_WARNING_BOOL(FLAG) validate(!(FLAG), #FLAG) inline bool isParamEnabled(int argc, char *argv[], const char *shortName, const char *longName) { char **arg = &argv[1]; char **argE = &argv[argc]; for (; arg != argE; ++arg) { if ((0 == strcmp(*arg, shortName)) || (0 == strcmp(*arg, longName))) { return true; } } return false; } inline int getParamValue(int argc, char *argv[], const char *shortName, const char *longName, int defaultValue) { char **arg = &argv[1]; char **argE = &argv[argc]; for (; arg != argE; ++arg) { if ((0 == strcmp(*arg, shortName)) || (0 == strcmp(*arg, longName))) { arg++; return atoi(*arg); } } return defaultValue; } inline bool isCircularDepTest(int argc, char *argv[]) { bool enabled = isParamEnabled(argc, argv, "-c", "--circular"); if (enabled == false) { return false; } std::cerr << "Circular Dependency Test mode detected" << std::endl; return true; } inline bool isVerbose(int argc, char *argv[]) { bool enabled = isParamEnabled(argc, argv, "-v", "--verbose"); if (enabled == false) { return false; } std::cerr << "Verbose mode detected" << std::endl; return true; } inline bool isSyncQueueEnabled(int argc, char *argv[]) { bool enabled = isParamEnabled(argc, argv, "-s", "--sync"); if (enabled == false) { std::cerr << "Async Queue detected" << std::endl; return false; } std::cerr << "Sync Queue detected" << std::endl; return true; } uint32_t getCommandQueueOrdinal(ze_device_handle_t &device) { uint32_t numQueueGroups = 0; SUCCESS_OR_TERMINATE(zeDeviceGetCommandQueueGroupProperties(device, &numQueueGroups, nullptr)); if (numQueueGroups == 0) { std::cout << "No queue groups found!\n"; std::terminate(); } std::vector queueProperties(numQueueGroups); SUCCESS_OR_TERMINATE(zeDeviceGetCommandQueueGroupProperties(device, &numQueueGroups, queueProperties.data())); uint32_t computeQueueGroupOrdinal = numQueueGroups; for (uint32_t i = 0; i < numQueueGroups; i++) { if (queueProperties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) { computeQueueGroupOrdinal = i; break; } } return computeQueueGroupOrdinal; } int32_t getCopyOnlyCommandQueueOrdinal(ze_device_handle_t &device) { uint32_t numQueueGroups = 0; SUCCESS_OR_TERMINATE(zeDeviceGetCommandQueueGroupProperties(device, &numQueueGroups, nullptr)); if (numQueueGroups == 0) { std::cout << "No queue groups found!\n"; std::terminate(); } std::vector queueProperties(numQueueGroups); SUCCESS_OR_TERMINATE(zeDeviceGetCommandQueueGroupProperties(device, &numQueueGroups, queueProperties.data())); int32_t copyOnlyQueueGroupOrdinal = -1; for (uint32_t i = 0; i < numQueueGroups; i++) { if (!(queueProperties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) && (queueProperties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY)) { copyOnlyQueueGroupOrdinal = i; break; } } return copyOnlyQueueGroupOrdinal; } ze_command_queue_handle_t createCommandQueue(ze_context_handle_t &context, ze_device_handle_t &device, uint32_t *ordinal) { ze_command_queue_handle_t cmdQueue; ze_command_queue_desc_t descriptor = {}; descriptor.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC; descriptor.pNext = nullptr; descriptor.flags = 0; descriptor.mode = ZE_COMMAND_QUEUE_MODE_DEFAULT; descriptor.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL; descriptor.ordinal = getCommandQueueOrdinal(device); descriptor.index = 0; SUCCESS_OR_TERMINATE(zeCommandQueueCreate(context, device, &descriptor, &cmdQueue)); if (ordinal != nullptr) { *ordinal = descriptor.ordinal; } return cmdQueue; } ze_result_t createCommandList(ze_context_handle_t &context, ze_device_handle_t &device, ze_command_list_handle_t &cmdList) { ze_command_list_desc_t descriptor = {}; descriptor.stype = ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC; descriptor.pNext = nullptr; descriptor.flags = 0; descriptor.commandQueueGroupOrdinal = getCommandQueueOrdinal(device); return zeCommandListCreate(context, device, &descriptor, &cmdList); } void createEventPoolAndEvents(ze_context_handle_t &context, ze_device_handle_t &device, ze_event_pool_handle_t &eventPool, ze_event_pool_flag_t poolFlag, uint32_t poolSize, ze_event_handle_t *events, ze_event_scope_flag_t signalScope, ze_event_scope_flag_t waitScope) { ze_event_pool_desc_t eventPoolDesc{ZE_STRUCTURE_TYPE_EVENT_POOL_DESC}; ze_event_desc_t eventDesc = {ZE_STRUCTURE_TYPE_EVENT_DESC}; eventPoolDesc.count = poolSize; eventPoolDesc.flags = poolFlag; SUCCESS_OR_TERMINATE(zeEventPoolCreate(context, &eventPoolDesc, 1, &device, &eventPool)); for (uint32_t i = 0; i < poolSize; i++) { eventDesc.index = i; eventDesc.signal = signalScope; eventDesc.wait = waitScope; SUCCESS_OR_TERMINATE(zeEventCreate(eventPool, &eventDesc, events + i)); } } std::vector zelloGetSubDevices(ze_device_handle_t &device, int &subDevCount) { uint32_t deviceCount = 0; std::vector subdevs(deviceCount, nullptr); SUCCESS_OR_TERMINATE(zeDeviceGetSubDevices(device, &deviceCount, nullptr)); if (deviceCount == 0) { std::cout << "No sub device found!\n"; subDevCount = 0; return subdevs; } subDevCount = deviceCount; subdevs.resize(deviceCount); SUCCESS_OR_TERMINATE(zeDeviceGetSubDevices(device, &deviceCount, subdevs.data())); return subdevs; } std::vector zelloInitContextAndGetDevices(ze_context_handle_t &context, ze_driver_handle_t &driverHandle) { SUCCESS_OR_TERMINATE(zeInit(ZE_INIT_FLAG_GPU_ONLY)); uint32_t driverCount = 0; SUCCESS_OR_TERMINATE(zeDriverGet(&driverCount, nullptr)); if (driverCount == 0) { std::cout << "No driver handle found!\n"; std::terminate(); } SUCCESS_OR_TERMINATE(zeDriverGet(&driverCount, &driverHandle)); ze_context_desc_t context_desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; SUCCESS_OR_TERMINATE(zeContextCreate(driverHandle, &context_desc, &context)); uint32_t deviceCount = 0; SUCCESS_OR_TERMINATE(zeDeviceGet(driverHandle, &deviceCount, nullptr)); if (deviceCount == 0) { std::cout << "No device found!\n"; std::terminate(); } std::vector devices(deviceCount, nullptr); SUCCESS_OR_TERMINATE(zeDeviceGet(driverHandle, &deviceCount, devices.data())); return devices; } std::vector zelloInitContextAndGetDevices(ze_context_handle_t &context) { ze_driver_handle_t driverHandle; return zelloInitContextAndGetDevices(context, driverHandle); } void initialize(ze_driver_handle_t &driver, ze_context_handle_t &context, ze_device_handle_t &device, ze_command_queue_handle_t &cmdQueue, uint32_t &ordinal) { std::vector devices; devices = zelloInitContextAndGetDevices(context, driver); device = devices[0]; cmdQueue = createCommandQueue(context, device, &ordinal); } static inline void teardown(ze_context_handle_t context, ze_command_queue_handle_t cmdQueue) { SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue)); SUCCESS_OR_TERMINATE(zeContextDestroy(context)); } compute-runtime-22.14.22890/level_zero/core/test/black_box_tests/common/zello_compile.h000066400000000000000000000045121422164147700310750ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/offline_compiler/source/ocloc_api.h" #include std::vector compileToSpirV(const std::string &src, const std::string &options, std::string &outCompilerLog) { std::vector ret; const char *mainFileName = "main.cl"; const char *argv[] = {"ocloc", "-q", "-device", "skl", "-file", mainFileName, "", ""}; uint32_t numArgs = sizeof(argv) / sizeof(argv[0]) - 2; if (options.size() > 0) { argv[6] = "-options"; argv[7] = options.c_str(); numArgs += 2; } const unsigned char *sources[] = {reinterpret_cast(src.c_str())}; size_t sourcesLengths[] = {src.size() + 1}; const char *sourcesNames[] = {mainFileName}; unsigned int numOutputs = 0U; unsigned char **outputs = nullptr; size_t *ouputLengths = nullptr; char **outputNames = nullptr; int result = oclocInvoke(numArgs, argv, 1, sources, sourcesLengths, sourcesNames, 0, nullptr, nullptr, nullptr, &numOutputs, &outputs, &ouputLengths, &outputNames); unsigned char *spirV = nullptr; size_t spirVlen = 0; const char *log = nullptr; size_t logLen = 0; for (unsigned int i = 0; i < numOutputs; ++i) { std::string spvExtension = ".spv"; std::string logFileName = "stdout.log"; auto nameLen = strlen(outputNames[i]); if ((nameLen > spvExtension.size()) && (strstr(&outputNames[i][nameLen - spvExtension.size()], spvExtension.c_str()) != nullptr)) { spirV = outputs[i]; spirVlen = ouputLengths[i]; } else if ((nameLen >= logFileName.size()) && (strstr(outputNames[i], logFileName.c_str()) != nullptr)) { log = reinterpret_cast(outputs[i]); logLen = ouputLengths[i]; break; } } if ((result != 0) && (logLen == 0)) { outCompilerLog = "Unknown error, ocloc returned : " + std::to_string(result) + "\n"; return ret; } if (logLen != 0) { outCompilerLog = std::string(log, logLen).c_str(); } ret.assign(spirV, spirV + spirVlen); oclocFreeOutput(&numOutputs, &outputs, &ouputLengths, &outputNames); return ret; }compute-runtime-22.14.22890/level_zero/core/test/black_box_tests/zello_copy.cpp000066400000000000000000000504031422164147700274620ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "zello_common.h" #include extern bool verbose; bool verbose = false; void testAppendMemoryCopyFromHeapToDeviceToStack(ze_context_handle_t context, ze_device_handle_t &device, bool &validRet) { const size_t allocSize = 4096 + 7; // +7 to break alignment and make it harder char *heapBuffer = new char[allocSize]; void *zeBuffer = nullptr; char stackBuffer[allocSize]; ze_command_queue_handle_t cmdQueue; ze_command_list_handle_t cmdList; cmdQueue = createCommandQueue(context, device, nullptr); SUCCESS_OR_TERMINATE(createCommandList(context, device, cmdList)); ze_device_mem_alloc_desc_t deviceDesc = {}; deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; deviceDesc.ordinal = 0; deviceDesc.flags = 0; deviceDesc.pNext = nullptr; SUCCESS_OR_TERMINATE(zeMemAllocDevice(context, &deviceDesc, allocSize, allocSize, device, &zeBuffer)); for (size_t i = 0; i < allocSize; ++i) { heapBuffer[i] = static_cast(i + 1); } memset(stackBuffer, 0, allocSize); // Copy from heap to device-allocated memory SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, zeBuffer, heapBuffer, allocSize, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList, nullptr, 0, nullptr)); // Copy from device-allocated memory to stack SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, stackBuffer, zeBuffer, allocSize, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); // Validate stack and ze buffers have the original data from heapBuffer validRet = (0 == memcmp(heapBuffer, stackBuffer, allocSize)); delete[] heapBuffer; SUCCESS_OR_TERMINATE(zeMemFree(context, zeBuffer)); SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue)); } void testAppendMemoryCopyFromHostToDeviceToStack(ze_context_handle_t context, ze_device_handle_t &device, bool &validRet) { const size_t allocSize = 4096 + 7; // +7 to break alignment and make it harder char *hostBuffer; void *zeBuffer = nullptr; char stackBuffer[allocSize]; ze_command_queue_handle_t cmdQueue; ze_command_list_handle_t cmdList; cmdQueue = createCommandQueue(context, device, nullptr); SUCCESS_OR_TERMINATE(createCommandList(context, device, cmdList)); ze_host_mem_alloc_desc_t hostDesc = {}; hostDesc.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC; hostDesc.pNext = nullptr; hostDesc.flags = 0; SUCCESS_OR_TERMINATE(zeMemAllocHost(context, &hostDesc, allocSize, 1, (void **)(&hostBuffer))); ze_device_mem_alloc_desc_t deviceDesc = {}; deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; deviceDesc.ordinal = 0; deviceDesc.flags = 0; deviceDesc.pNext = nullptr; SUCCESS_OR_TERMINATE(zeMemAllocDevice(context, &deviceDesc, allocSize, allocSize, device, &zeBuffer)); for (size_t i = 0; i < allocSize; ++i) { hostBuffer[i] = static_cast(i + 1); } memset(stackBuffer, 0, allocSize); // Copy from host-allocated to device-allocated memory SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, zeBuffer, hostBuffer, allocSize, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList, nullptr, 0, nullptr)); // Copy from device-allocated memory to stack SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, stackBuffer, zeBuffer, allocSize, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); // Validate stack and ze buffers have the original data from hostBuffer validRet = (0 == memcmp(hostBuffer, stackBuffer, allocSize)); SUCCESS_OR_TERMINATE(zeMemFree(context, hostBuffer)); SUCCESS_OR_TERMINATE(zeMemFree(context, zeBuffer)); SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue)); } void testAppendMemoryCopy2DRegion(ze_context_handle_t context, ze_device_handle_t &device, bool &validRet) { validRet = true; ze_command_queue_handle_t cmdQueue; ze_command_list_handle_t cmdList; cmdQueue = createCommandQueue(context, device, nullptr); SUCCESS_OR_TERMINATE(createCommandList(context, device, cmdList)); void *dstBuffer = nullptr; uint32_t dstWidth = verbose ? 16 : 256; // width of the dst 2D buffer in bytes uint32_t dstHeight = verbose ? 32 : 128; // height of the dst 2D buffer in bytes uint32_t dstOriginX = verbose ? 8 : 32; // Offset in bytes uint32_t dstOriginY = verbose ? 8 : 64; // Offset in rows uint32_t dstSize = dstHeight * dstWidth; // Size of the dst buffer void *srcBuffer = nullptr; uint32_t srcWidth = verbose ? 24 : 128; // width of the src 2D buffer in bytes uint32_t srcHeight = verbose ? 16 : 96; // height of the src 2D buffer in bytes uint32_t srcOriginX = verbose ? 4 : 16; // Offset in bytes uint32_t srcOriginY = verbose ? 4 : 32; // Offset in rows uint32_t srcSize = srcHeight * srcWidth; // Size of the src buffer uint32_t width = verbose ? 8 : 64; // width of the region to copy uint32_t height = verbose ? 12 : 32; // height of the region to copy const ze_copy_region_t dstRegion = {dstOriginX, dstOriginY, 0, width, height, 0}; const ze_copy_region_t srcRegion = {srcOriginX, srcOriginY, 0, width, height, 0}; ze_device_mem_alloc_desc_t deviceDesc = {}; deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; deviceDesc.pNext = nullptr; deviceDesc.ordinal = 0; deviceDesc.flags = 0; ze_host_mem_alloc_desc_t hostDesc = {}; hostDesc.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC; hostDesc.pNext = nullptr; hostDesc.flags = 0; SUCCESS_OR_TERMINATE( zeMemAllocShared(context, &deviceDesc, &hostDesc, srcSize, 1, device, &srcBuffer)); SUCCESS_OR_TERMINATE( zeMemAllocShared(context, &deviceDesc, &hostDesc, dstSize, 1, device, &dstBuffer)); // Initialize buffers // dstBuffer and srcBuffer are shared allocations, so they have UVA between host and device // and there's no need to perform explicit copies uint8_t *srcBufferChar = reinterpret_cast(srcBuffer); for (uint32_t i = 0; i < srcHeight; i++) { for (uint32_t j = 0; j < srcWidth; j++) { srcBufferChar[i * srcWidth + j] = static_cast(i * srcWidth + j); } } int value = 0; SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryFill(cmdList, dstBuffer, reinterpret_cast(&value), sizeof(value), dstSize, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList, nullptr, 0, nullptr)); // Perform the copy SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopyRegion(cmdList, dstBuffer, &dstRegion, dstWidth, 0, const_cast(srcBuffer), &srcRegion, srcWidth, 0, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); uint8_t *dstBufferChar = reinterpret_cast(dstBuffer); if (verbose) { std::cout << "srcBufferChar\n"; for (uint32_t i = 0; i < srcHeight; i++) { for (uint32_t j = 0; j < srcWidth; j++) { std::cout << std::setw(3) << std::dec << static_cast(srcBufferChar[i * srcWidth + j]) << " "; } std::cout << "\n"; } std::cout << "dstBuffer\n"; for (uint32_t i = 0; i < dstHeight; i++) { for (uint32_t j = 0; j < dstWidth; j++) { std::cout << std::setw(3) << std::dec << static_cast(dstBufferChar[i * dstWidth + j]) << " "; } std::cout << "\n"; } } uint32_t dstOffset = dstOriginX + dstOriginY * dstWidth; uint32_t srcOffset = srcOriginX + srcOriginY * srcWidth; for (uint32_t i = 0; i < height; i++) { for (uint32_t j = 0; j < width; j++) { uint8_t dstVal = dstBufferChar[dstOffset + (i * dstWidth) + j]; uint8_t srcVal = srcBufferChar[srcOffset + (i * srcWidth) + j]; if (dstVal != srcVal) { validRet = false; } } } SUCCESS_OR_TERMINATE(zeMemFree(context, srcBuffer)); SUCCESS_OR_TERMINATE(zeMemFree(context, dstBuffer)); SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue)); } void testAppendMemoryFillWithSomePattern(ze_context_handle_t context, ze_device_handle_t &device, bool &validRet) { const size_t allocSize = 4096 + 7; char pattern0 = 5; const size_t pattern1Size = 8; char *pattern1 = new char[pattern1Size]; void *zeBuffer0 = nullptr; void *zeBuffer1 = nullptr; ze_command_queue_handle_t cmdQueue; ze_command_list_handle_t cmdList; cmdQueue = createCommandQueue(context, device, nullptr); SUCCESS_OR_TERMINATE(createCommandList(context, device, cmdList)); // Initialize buffers // zeBuffer0 and zeBuffer1 are shared allocations, so they have UVA between host and device // and there's no need to perform explicit copies ze_device_mem_alloc_desc_t deviceDesc = {}; deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; deviceDesc.pNext = nullptr; deviceDesc.ordinal = 0; deviceDesc.flags = 0; ze_host_mem_alloc_desc_t hostDesc = {}; hostDesc.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC; hostDesc.pNext = nullptr; hostDesc.flags = 0; SUCCESS_OR_TERMINATE( zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &zeBuffer0)); SUCCESS_OR_TERMINATE( zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &zeBuffer1)); // Fibonacci pattern1[0] = 1; pattern1[1] = 2; for (size_t i = 2; i < pattern1Size; i++) { pattern1[i] = pattern1[i - 1] + pattern1[i - 2]; } SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryFill(cmdList, zeBuffer0, &pattern0, sizeof(pattern0), allocSize, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryFill(cmdList, zeBuffer1, pattern1, pattern1Size, allocSize, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); validRet = true; uint8_t *zeBufferChar0 = reinterpret_cast(zeBuffer0); for (size_t i = 0; i < allocSize; ++i) { if (zeBufferChar0[i] != pattern0) { validRet = false; if (verbose) { std::cout << "dstBufferChar0[" << i << " ] " << static_cast(zeBufferChar0[i]) << "!= pattern0 " << pattern0 << "\n"; } break; } } if (validRet == true) { uint8_t *zeBufferChar1 = reinterpret_cast(zeBuffer1); for (size_t i = 0; i < allocSize; i++) { if (zeBufferChar1[i] != pattern1[i % pattern1Size]) { validRet = false; if (verbose) { std::cout << "dstBufferChar1[" << i << " ] " << static_cast(zeBufferChar1[i]) << "!= pattern1[" << i % pattern1Size << " ] " << static_cast(pattern1[i % pattern1Size]) << "\n"; } break; } } } delete[] pattern1; SUCCESS_OR_TERMINATE(zeMemFree(context, zeBuffer0)); SUCCESS_OR_TERMINATE(zeMemFree(context, zeBuffer1)); SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue)); } void testAppendMemoryCopy3DRegion(ze_context_handle_t context, ze_device_handle_t &device, bool &validRet) { validRet = true; ze_command_queue_handle_t cmdQueue; ze_command_list_handle_t cmdList; cmdQueue = createCommandQueue(context, device, nullptr); SUCCESS_OR_TERMINATE(createCommandList(context, device, cmdList)); void *dstBuffer = nullptr; uint32_t dstWidth = verbose ? 8 : 64; // width of the dst 3D buffer in bytes uint32_t dstHeight = verbose ? 8 : 64; // height of the dst 3D buffer in bytes uint32_t dstDepth = verbose ? 2 : 4; // depth of the dst 3D buffer in bytes uint32_t dstOriginX = 0; // Offset in bytes uint32_t dstOriginY = 0; // Offset in rows uint32_t dstOriginZ = 0; // Offset in rows uint32_t dstSize = dstHeight * dstWidth * dstDepth; // Size of the dst buffer void *srcBuffer = nullptr; uint32_t srcWidth = verbose ? 8 : 64; // width of the src 3D buffer in bytes uint32_t srcHeight = verbose ? 8 : 64; // height of the src 3D buffer in bytes uint32_t srcDepth = verbose ? 2 : 4; // depth of the src 3D buffer in bytes uint32_t srcOriginX = 0; // Offset in bytes uint32_t srcOriginY = 0; // Offset in rows uint32_t srcOriginZ = 0; // Offset in rows uint32_t srcSize = srcHeight * srcWidth * srcDepth; // Size of the src buffer uint32_t width = verbose ? 8 : 64; // width of the region to copy uint32_t height = verbose ? 8 : 64; // height of the region to copy uint32_t depth = verbose ? 2 : 4; // height of the region to copy const ze_copy_region_t dstRegion = {dstOriginX, dstOriginY, dstOriginZ, width, height, depth}; const ze_copy_region_t srcRegion = {srcOriginX, srcOriginY, dstOriginZ, width, height, depth}; ze_device_mem_alloc_desc_t deviceDesc = {}; deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; deviceDesc.pNext = nullptr; deviceDesc.ordinal = 0; deviceDesc.flags = 0; ze_host_mem_alloc_desc_t hostDesc = {}; hostDesc.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC; hostDesc.pNext = nullptr; hostDesc.flags = 0; SUCCESS_OR_TERMINATE( zeMemAllocShared(context, &deviceDesc, &hostDesc, srcSize, 1, device, &srcBuffer)); SUCCESS_OR_TERMINATE( zeMemAllocShared(context, &deviceDesc, &hostDesc, dstSize, 1, device, &dstBuffer)); // Initialize buffers // dstBuffer and srcBuffer are shared allocations, so they have UVA between host and device // and there's no need to perform explicit copies uint8_t *srcBufferChar = reinterpret_cast(srcBuffer); for (uint32_t i = 0; i < srcDepth; i++) { for (uint32_t j = 0; j < srcHeight; j++) { for (uint32_t k = 0; k < srcWidth; k++) { size_t index = (i * srcWidth * srcHeight) + (j * srcWidth) + k; srcBufferChar[index] = static_cast(index); } } } int value = 0; SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryFill(cmdList, dstBuffer, reinterpret_cast(&value), sizeof(value), dstSize, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList, nullptr, 0, nullptr)); // Perform the copy SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopyRegion(cmdList, dstBuffer, &dstRegion, dstWidth, (dstWidth * dstHeight), const_cast(srcBuffer), &srcRegion, srcWidth, (srcWidth * srcHeight), nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); uint8_t *dstBufferChar = reinterpret_cast(dstBuffer); if (verbose) { std::cout << "srcBufferChar\n"; for (uint32_t i = 0; i < srcDepth; i++) { for (uint32_t j = 0; j < srcHeight; j++) { for (uint32_t k = 0; k < srcWidth; k++) { size_t index = (i * srcWidth * srcHeight) + (j * srcWidth) + k; std::cout << std::setw(3) << std::dec << static_cast(srcBufferChar[index]) << " "; } std::cout << "\n"; } std::cout << "\n"; } std::cout << "dstBuffer\n"; for (uint32_t i = 0; i < dstDepth; i++) { for (uint32_t j = 0; j < dstHeight; j++) { for (uint32_t k = 0; k < dstWidth; k++) { size_t index = (i * dstWidth * dstHeight) + (j * dstWidth) + k; std::cout << std::setw(3) << std::dec << static_cast(dstBufferChar[index]) << " "; } std::cout << "\n"; } std::cout << "\n"; } } uint32_t dstOffset = dstOriginX + dstOriginY * dstWidth + dstOriginZ * dstDepth * dstWidth; uint32_t srcOffset = srcOriginX + srcOriginY * srcWidth + srcOriginZ * srcDepth * srcWidth; for (uint32_t i = 0; i < depth; i++) { for (uint32_t j = 0; j < height; j++) { for (uint32_t k = 0; k < width; k++) { uint8_t dstVal = dstBufferChar[dstOffset + (i * dstWidth * dstHeight) + (j * dstWidth) + k]; uint8_t srcVal = srcBufferChar[srcOffset + (i * srcWidth * srcHeight) + (j * srcWidth) + k]; if (dstVal != srcVal) { validRet = false; } } } } SUCCESS_OR_TERMINATE(zeMemFree(context, srcBuffer)); SUCCESS_OR_TERMINATE(zeMemFree(context, dstBuffer)); SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue)); } int main(int argc, char *argv[]) { verbose = isVerbose(argc, argv); ze_context_handle_t context = nullptr; auto devices = zelloInitContextAndGetDevices(context); auto device = devices[0]; bool outputValidationSuccessful = false; ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; SUCCESS_OR_TERMINATE(zeDeviceGetProperties(device, &deviceProperties)); std::cout << "Device : \n" << " * name : " << deviceProperties.name << "\n" << " * vendorId : " << std::hex << deviceProperties.vendorId << "\n"; testAppendMemoryCopyFromHeapToDeviceToStack(context, device, outputValidationSuccessful); if (outputValidationSuccessful) testAppendMemoryCopyFromHostToDeviceToStack(context, device, outputValidationSuccessful); if (outputValidationSuccessful) testAppendMemoryCopy2DRegion(context, device, outputValidationSuccessful); if (outputValidationSuccessful) testAppendMemoryFillWithSomePattern(context, device, outputValidationSuccessful); if (outputValidationSuccessful) testAppendMemoryCopy3DRegion(context, device, outputValidationSuccessful); SUCCESS_OR_TERMINATE(zeContextDestroy(context)); std::cout << "\nZello Copy Results validation " << (outputValidationSuccessful ? "PASSED" : "FAILED") << "\n"; return (outputValidationSuccessful ? 0 : 1); } compute-runtime-22.14.22890/level_zero/core/test/black_box_tests/zello_copy_fence.cpp000066400000000000000000000105621422164147700306240ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include #include "zello_common.h" #include #include #include #include bool verbose = false; void testAppendMemoryCopy(ze_context_handle_t &context, ze_device_handle_t &device, bool &validRet) { const size_t allocSize = 4096; char *heapBuffer = new char[allocSize]; void *xeBuffer = nullptr; char stackBuffer[allocSize]; ze_command_queue_handle_t cmdQueue = nullptr; ze_command_list_handle_t cmdList = nullptr; ze_command_queue_desc_t cmdQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC}; cmdQueueDesc.pNext = nullptr; cmdQueueDesc.flags = 0; cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; cmdQueueDesc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL; cmdQueueDesc.ordinal = 0; cmdQueueDesc.index = 0; SUCCESS_OR_TERMINATE(zeCommandQueueCreate(context, device, &cmdQueueDesc, &cmdQueue)); ze_command_list_desc_t cmdListDesc = {}; cmdListDesc.stype = ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC; cmdListDesc.pNext = nullptr; cmdListDesc.flags = 0; SUCCESS_OR_TERMINATE(zeCommandListCreate(context, device, &cmdListDesc, &cmdList)); ze_device_mem_alloc_desc_t deviceDesc = {}; deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; deviceDesc.ordinal = 0; deviceDesc.flags = 0; deviceDesc.pNext = nullptr; deviceDesc.ordinal = 0; SUCCESS_OR_TERMINATE(zeMemAllocDevice(context, &deviceDesc, allocSize, allocSize, device, &xeBuffer)); for (size_t i = 0; i < allocSize; ++i) { heapBuffer[i] = static_cast(i + 1); } memset(stackBuffer, 0, allocSize); ze_fence_handle_t hFence = {}; ze_fence_desc_t fenceDesc = {}; fenceDesc.stype = ZE_STRUCTURE_TYPE_FENCE_DESC; fenceDesc.pNext = nullptr; fenceDesc.flags = 0; SUCCESS_OR_TERMINATE(zeFenceCreate(cmdQueue, &fenceDesc, &hFence)); for (int i = 0; i < 2; i++) { if (verbose) std::cout << "zeFenceHostSynchronize start iter:" << i << std::endl; // Copy from heap to device-allocated memory SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, xeBuffer, heapBuffer, allocSize, nullptr, 0, nullptr)); // Copy from device-allocated memory to stack SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, stackBuffer, xeBuffer, allocSize, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, hFence)); SUCCESS_OR_TERMINATE(zeFenceHostSynchronize(hFence, std::numeric_limits::max())); if (verbose) std::cout << "zeFenceHostSynchronize success iter:" << i << std::endl; SUCCESS_OR_TERMINATE(zeFenceReset(hFence)); SUCCESS_OR_TERMINATE(zeCommandListReset(cmdList)); } // Validate stack and xe buffers have the original data from heapBuffer validRet = (0 == memcmp(heapBuffer, stackBuffer, allocSize)); delete[] heapBuffer; SUCCESS_OR_TERMINATE(zeMemFree(context, xeBuffer)); SUCCESS_OR_TERMINATE(zeFenceDestroy(hFence)); SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue)); } int main(int argc, char *argv[]) { verbose = isVerbose(argc, argv); ze_context_handle_t context = nullptr; ze_driver_handle_t driverHandle = nullptr; auto devices = zelloInitContextAndGetDevices(context, driverHandle); auto device = devices[0]; ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; SUCCESS_OR_TERMINATE(zeDeviceGetProperties(device, &deviceProperties)); std::cout << "Device : \n" << " * name : " << deviceProperties.name << "\n" << " * vendorId : " << std::hex << deviceProperties.vendorId << "\n"; bool outputValidationSuccessful; testAppendMemoryCopy(context, device, outputValidationSuccessful); SUCCESS_OR_WARNING_BOOL(outputValidationSuccessful); SUCCESS_OR_TERMINATE(zeContextDestroy(context)); std::cout << "\nZello Copy Fence Results validation " << (outputValidationSuccessful ? "PASSED" : "FAILED") << "\n"; return (outputValidationSuccessful ? 0 : 1); } compute-runtime-22.14.22890/level_zero/core/test/black_box_tests/zello_copy_image.cpp000066400000000000000000000125641422164147700306320ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include #include "zello_common.h" #include #include #include bool verbose = false; void testAppendImageCopy(ze_context_handle_t &context, ze_device_handle_t &device, bool &validRet) { const size_t width = 32; const size_t height = 24; const size_t depth = 1; const size_t size = 4 * width * height * depth; /* 4 channels per pixel */ ze_command_queue_handle_t cmdQueue = nullptr; ze_command_list_handle_t cmdList = nullptr; ze_command_queue_desc_t cmdQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC}; cmdQueueDesc.pNext = nullptr; cmdQueueDesc.flags = 0; cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; cmdQueueDesc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL; cmdQueueDesc.ordinal = 0; cmdQueueDesc.index = 0; SUCCESS_OR_TERMINATE(zeCommandQueueCreate(context, device, &cmdQueueDesc, &cmdQueue)); ze_command_list_desc_t cmdListDesc = {}; cmdListDesc.stype = ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC; cmdListDesc.pNext = nullptr; cmdListDesc.flags = 0; SUCCESS_OR_TERMINATE(zeCommandListCreate(context, device, &cmdListDesc, &cmdList)); ze_image_desc_t srcImgDesc = {ZE_STRUCTURE_TYPE_IMAGE_DESC, nullptr, 0, ZE_IMAGE_TYPE_2D, {ZE_IMAGE_FORMAT_LAYOUT_8_8_8_8, ZE_IMAGE_FORMAT_TYPE_UINT, ZE_IMAGE_FORMAT_SWIZZLE_R, ZE_IMAGE_FORMAT_SWIZZLE_G, ZE_IMAGE_FORMAT_SWIZZLE_B, ZE_IMAGE_FORMAT_SWIZZLE_A}, width, height, depth, 0, 0}; ze_image_handle_t srcImg; ze_image_region_t srcRegion = {0, 0, 0, width, height, depth}; SUCCESS_OR_TERMINATE( zeImageCreate(context, device, const_cast(&srcImgDesc), &srcImg)); ze_image_desc_t dstImgDesc = {ZE_STRUCTURE_TYPE_IMAGE_DESC, nullptr, ZE_IMAGE_FLAG_KERNEL_WRITE, ZE_IMAGE_TYPE_2D, {ZE_IMAGE_FORMAT_LAYOUT_8_8_8_8, ZE_IMAGE_FORMAT_TYPE_UINT, ZE_IMAGE_FORMAT_SWIZZLE_R, ZE_IMAGE_FORMAT_SWIZZLE_G, ZE_IMAGE_FORMAT_SWIZZLE_B, ZE_IMAGE_FORMAT_SWIZZLE_A}, width, height, depth, 0, 0}; ze_image_handle_t dstImg; ze_image_region_t dstRegion = {0, 0, 0, width, height, depth}; SUCCESS_OR_TERMINATE( zeImageCreate(context, device, const_cast(&dstImgDesc), &dstImg)); uint8_t *srcBuffer = new uint8_t[size]; uint8_t *dstBuffer = new uint8_t[size]; for (size_t i = 0; i < size; ++i) { srcBuffer[i] = static_cast(i); dstBuffer[i] = 0xff; } // Copy from srcBuffer->srcImg->dstImg->dstBuffer, so at the end dstBuffer = srcBuffer SUCCESS_OR_TERMINATE(zeCommandListAppendImageCopyFromMemory(cmdList, srcImg, srcBuffer, &srcRegion, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE( zeCommandListAppendImageCopy(cmdList, dstImg, srcImg, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListAppendImageCopyToMemory(cmdList, dstBuffer, dstImg, &dstRegion, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); validRet = (0 == memcmp(srcBuffer, dstBuffer, size)); delete[] srcBuffer; delete[] dstBuffer; SUCCESS_OR_TERMINATE(zeImageDestroy(dstImg)); SUCCESS_OR_TERMINATE(zeImageDestroy(srcImg)); SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue)); } int main(int argc, char *argv[]) { verbose = isVerbose(argc, argv); ze_context_handle_t context = nullptr; auto devices = zelloInitContextAndGetDevices(context); auto device = devices[0]; bool outputValidationSuccessful; ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; SUCCESS_OR_TERMINATE(zeDeviceGetProperties(device, &deviceProperties)); std::cout << "Device : \n" << " * name : " << deviceProperties.name << "\n" << " * vendorId : " << std::hex << deviceProperties.vendorId << "\n"; testAppendImageCopy(context, device, outputValidationSuccessful); SUCCESS_OR_TERMINATE(zeContextDestroy(context)); std::cout << "\nZello Copy Image Results validation " << (outputValidationSuccessful ? "PASSED" : "FAILED") << "\n"; return (outputValidationSuccessful ? 0 : 1); } compute-runtime-22.14.22890/level_zero/core/test/black_box_tests/zello_copy_only.cpp000066400000000000000000000620471422164147700305320ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include #include "zello_common.h" #include #include #include #include #include bool verbose = false; void testCopyBetweenHeapDeviceAndStack(ze_context_handle_t &context, ze_device_handle_t &device, bool &validRet) { const size_t allocSize = 4096 + 7; // +7 to brake alignment and make it harder char *heapBuffer = new char[allocSize]; void *buffer1 = nullptr; void *buffer2 = nullptr; char *stackBuffer = new char[allocSize]; ze_command_queue_handle_t cmdQueue; ze_command_list_handle_t cmdList; ze_command_queue_desc_t cmdQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC}; int32_t copyQueueGroup = getCopyOnlyCommandQueueOrdinal(device); if (copyQueueGroup < 0) { std::cout << "No Copy queue group found. Skipping test run\n"; validRet = true; return; } cmdQueueDesc.pNext = nullptr; cmdQueueDesc.flags = 0; cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; cmdQueueDesc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL; cmdQueueDesc.ordinal = copyQueueGroup; cmdQueueDesc.index = 0; SUCCESS_OR_TERMINATE(zeCommandQueueCreate(context, device, &cmdQueueDesc, &cmdQueue)); ze_command_list_desc_t cmdListDesc = {}; cmdListDesc.stype = ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC; cmdListDesc.pNext = nullptr; cmdListDesc.flags = 0; cmdListDesc.commandQueueGroupOrdinal = copyQueueGroup; SUCCESS_OR_TERMINATE(zeCommandListCreate(context, device, &cmdListDesc, &cmdList)); ze_device_mem_alloc_desc_t deviceDesc = {}; deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; deviceDesc.ordinal = 0; deviceDesc.flags = 0; deviceDesc.pNext = nullptr; SUCCESS_OR_TERMINATE(zeMemAllocDevice(context, &deviceDesc, allocSize, allocSize, device, &buffer1)); SUCCESS_OR_TERMINATE(zeMemAllocDevice(context, &deviceDesc, allocSize, allocSize, device, &buffer2)); for (size_t i = 0; i < allocSize; ++i) { heapBuffer[i] = static_cast(i + 1); } memset(stackBuffer, 0, allocSize); // Copy from heap to device-allocated memory SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, buffer1, heapBuffer, allocSize, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, buffer2, buffer1, allocSize, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList, nullptr, 0, nullptr)); // Copy from device-allocated memory to stack SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, stackBuffer, buffer2, allocSize, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); // Validate stack and xe buffers have the original data from heapBuffer validRet = (0 == memcmp(heapBuffer, stackBuffer, allocSize)); delete[] heapBuffer; delete[] stackBuffer; SUCCESS_OR_TERMINATE(zeMemFree(context, buffer1)); SUCCESS_OR_TERMINATE(zeMemFree(context, buffer2)); SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue)); } void testCopyBetweenHostMemAndDeviceMem(ze_context_handle_t &context, ze_device_handle_t &device, bool &validRet) { const size_t allocSize = 4096 + 7; // +7 to brake alignment and make it harder char *hostBuffer = nullptr; void *deviceBuffer = nullptr; char *stackBuffer = new char[allocSize]; ze_command_queue_handle_t cmdQueue; ze_command_list_handle_t cmdList; ze_command_queue_desc_t cmdQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC}; int32_t copyQueueGroup = getCopyOnlyCommandQueueOrdinal(device); if (copyQueueGroup < 0) { std::cout << "No Copy queue group found. Skipping test run\n"; validRet = true; return; } cmdQueueDesc.pNext = nullptr; cmdQueueDesc.flags = 0; cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; cmdQueueDesc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL; cmdQueueDesc.ordinal = copyQueueGroup; cmdQueueDesc.index = 0; SUCCESS_OR_TERMINATE(zeCommandQueueCreate(context, device, &cmdQueueDesc, &cmdQueue)); ze_command_list_desc_t cmdListDesc = {}; cmdListDesc.stype = ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC; cmdListDesc.pNext = nullptr; cmdListDesc.flags = 0; cmdListDesc.commandQueueGroupOrdinal = copyQueueGroup; SUCCESS_OR_TERMINATE(zeCommandListCreate(context, device, &cmdListDesc, &cmdList)); ze_host_mem_alloc_desc_t hostDesc = {}; hostDesc.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC; hostDesc.pNext = nullptr; hostDesc.flags = 0; SUCCESS_OR_TERMINATE(zeMemAllocHost(context, &hostDesc, allocSize, 1, (void **)(&hostBuffer))); ze_device_mem_alloc_desc_t deviceDesc = {}; deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; deviceDesc.ordinal = 0; deviceDesc.flags = 0; deviceDesc.pNext = nullptr; SUCCESS_OR_TERMINATE(zeMemAllocDevice(context, &deviceDesc, allocSize, allocSize, device, &deviceBuffer)); for (size_t i = 0; i < allocSize; ++i) { hostBuffer[i] = static_cast(i + 1); } memset(stackBuffer, 0, allocSize); // Copy from host-allocated to device-allocated memory SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, deviceBuffer, hostBuffer, allocSize, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList, nullptr, 0, nullptr)); // Copy from device-allocated memory to stack SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, stackBuffer, deviceBuffer, allocSize, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); // Validate stack and xe deviceBuffers have the original data from hostBuffer validRet = (0 == memcmp(hostBuffer, stackBuffer, allocSize)); //delete[] heapBuffer; delete[] stackBuffer; SUCCESS_OR_TERMINATE(zeMemFree(context, hostBuffer)); SUCCESS_OR_TERMINATE(zeMemFree(context, deviceBuffer)); SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue)); } void testRegionCopyOf2DSharedMem(ze_context_handle_t &context, ze_device_handle_t &device, bool &validRet) { validRet = true; ze_command_queue_handle_t cmdQueue; ze_command_list_handle_t cmdList; ze_command_queue_desc_t cmdQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC}; int32_t copyQueueGroup = getCopyOnlyCommandQueueOrdinal(device); if (copyQueueGroup < 0) { std::cout << "No Copy queue group found. Skipping test run\n"; validRet = true; return; } cmdQueueDesc.pNext = nullptr; cmdQueueDesc.flags = 0; cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; cmdQueueDesc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL; cmdQueueDesc.ordinal = copyQueueGroup; cmdQueueDesc.index = 0; SUCCESS_OR_TERMINATE(zeCommandQueueCreate(context, device, &cmdQueueDesc, &cmdQueue)); ze_command_list_desc_t cmdListDesc = {}; cmdListDesc.stype = ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC; cmdListDesc.pNext = nullptr; cmdListDesc.flags = 0; cmdListDesc.commandQueueGroupOrdinal = copyQueueGroup; SUCCESS_OR_TERMINATE(zeCommandListCreate(context, device, &cmdListDesc, &cmdList)); void *dstBuffer = nullptr; uint32_t dstWidth = verbose ? 16 : 256; // width of the dst 2D buffer in bytes uint32_t dstHeight = verbose ? 32 : 128; // height of the dst 2D buffer in bytes uint32_t dstOriginX = verbose ? 8 : 32; // Offset in bytes uint32_t dstOriginY = verbose ? 8 : 64; // Offset in rows uint32_t dstSize = dstHeight * dstWidth; // Size of the dst buffer void *srcBuffer = nullptr; uint32_t srcWidth = verbose ? 16 : 256; // width of the dst 2D buffer in bytes uint32_t srcHeight = verbose ? 32 : 128; // height of the dst 2D buffer in bytes uint32_t srcOriginX = verbose ? 8 : 32; // Offset in bytes uint32_t srcOriginY = verbose ? 8 : 64; // Offset in rows uint32_t srcSize = dstHeight * dstWidth; // Size of the dst buffer uint32_t width = verbose ? 8 : 64; // width of the region to copy uint32_t height = verbose ? 12 : 32; // height of the region to copy const ze_copy_region_t dstRegion = {dstOriginX, dstOriginY, 0, width, height, 0}; const ze_copy_region_t srcRegion = {srcOriginX, srcOriginY, 0, width, height, 0}; ze_device_mem_alloc_desc_t deviceDesc = {}; deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; deviceDesc.ordinal = 0; deviceDesc.flags = 0; deviceDesc.pNext = nullptr; ze_host_mem_alloc_desc_t hostDesc = {}; hostDesc.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC; hostDesc.pNext = nullptr; hostDesc.flags = 0; SUCCESS_OR_TERMINATE( zeMemAllocShared(context, &deviceDesc, &hostDesc, srcSize, 1, device, &srcBuffer)); SUCCESS_OR_TERMINATE( zeMemAllocShared(context, &deviceDesc, &hostDesc, dstSize, 1, device, &dstBuffer)); // Initialize buffers // dstBuffer and srcBuffer are shared allocations, so they have UVA between host and device // and there's no need to perform explicit copies uint8_t *srcBufferChar = reinterpret_cast(srcBuffer); for (uint32_t i = 0; i < srcHeight; i++) { for (uint32_t j = 0; j < srcWidth; j++) { srcBufferChar[i * srcWidth + j] = static_cast(i * srcWidth + j); } } int value = 0; SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryFill(cmdList, dstBuffer, reinterpret_cast(&value), sizeof(value), dstSize, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList, nullptr, 0, nullptr)); // Perform the copy SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopyRegion(cmdList, dstBuffer, &dstRegion, dstWidth, 0, const_cast(srcBuffer), &srcRegion, srcWidth, 0, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); uint8_t *dstBufferChar = reinterpret_cast(dstBuffer); if (verbose) { std::cout << "srcBufferChar\n"; for (uint32_t i = 0; i < srcHeight; i++) { for (uint32_t j = 0; j < srcWidth; j++) { std::cout << std::setw(3) << std::dec << static_cast(srcBufferChar[i * srcWidth + j]) << " "; } std::cout << "\n"; } std::cout << "dstBuffer\n"; for (uint32_t i = 0; i < dstHeight; i++) { for (uint32_t j = 0; j < dstWidth; j++) { std::cout << std::setw(3) << std::dec << static_cast(dstBufferChar[i * dstWidth + j]) << " "; } std::cout << "\n"; } } uint32_t dstOffset = dstOriginX + dstOriginY * dstWidth; uint32_t srcOffset = srcOriginX + srcOriginY * srcWidth; for (uint32_t i = 0; i < height; i++) { for (uint32_t j = 0; j < width; j++) { uint8_t dstVal = dstBufferChar[dstOffset + (i * dstWidth) + j]; uint8_t srcVal = srcBufferChar[srcOffset + (i * srcWidth) + j]; if (dstVal != srcVal) { validRet = false; } } } SUCCESS_OR_TERMINATE(zeMemFree(context, srcBuffer)); SUCCESS_OR_TERMINATE(zeMemFree(context, dstBuffer)); SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue)); } void testSharedMemDataAccessWithoutCopy(ze_context_handle_t &context, ze_device_handle_t &device, bool &validRet) { const size_t allocSize = 4096; char pattern0 = 5; const size_t pattern1Size = 8; char *pattern1 = new char[pattern1Size]; void *buffer0 = nullptr; void *buffer1 = nullptr; ze_command_queue_handle_t cmdQueue; ze_command_list_handle_t cmdList; ze_command_queue_desc_t cmdQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC}; int32_t copyQueueGroup = getCopyOnlyCommandQueueOrdinal(device); if (copyQueueGroup < 0) { std::cout << "No Copy queue group found. Skipping test run\n"; validRet = true; return; } cmdQueueDesc.pNext = nullptr; cmdQueueDesc.flags = 0; cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; cmdQueueDesc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL; cmdQueueDesc.ordinal = copyQueueGroup; cmdQueueDesc.index = 0; SUCCESS_OR_TERMINATE(zeCommandQueueCreate(context, device, &cmdQueueDesc, &cmdQueue)); ze_command_list_desc_t cmdListDesc = {}; cmdListDesc.stype = ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC; cmdListDesc.pNext = nullptr; cmdListDesc.flags = 0; cmdListDesc.commandQueueGroupOrdinal = copyQueueGroup; SUCCESS_OR_TERMINATE(zeCommandListCreate(context, device, &cmdListDesc, &cmdList)); // Initialize buffers // buffer0 and buffer1 are shared allocations, so they have UVA between host and device // and there's no need to perform explicit copies ze_device_mem_alloc_desc_t deviceDesc = {}; deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; deviceDesc.ordinal = 0; deviceDesc.flags = 0; deviceDesc.pNext = nullptr; ze_host_mem_alloc_desc_t hostDesc = {}; hostDesc.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC; hostDesc.pNext = nullptr; hostDesc.flags = 0; SUCCESS_OR_TERMINATE( zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &buffer0)); SUCCESS_OR_TERMINATE( zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &buffer1)); // Fibonacci pattern1[0] = 1; pattern1[1] = 2; for (size_t i = 2; i < pattern1Size; i++) { pattern1[i] = pattern1[i - 1] + pattern1[i - 2]; } SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryFill(cmdList, buffer0, &pattern0, sizeof(pattern0), allocSize, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryFill(cmdList, buffer1, pattern1, pattern1Size, allocSize, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); validRet = true; uint8_t *bufferChar0 = reinterpret_cast(buffer0); for (size_t i = 0; i < allocSize; ++i) { if (bufferChar0[i] != pattern0) { validRet = false; if (verbose) { std::cout << "dstBufferChar0[" << i << " ] " << static_cast(bufferChar0[i]) << "!= pattern0 " << pattern0 << "\n"; } break; } } if (validRet == true) { uint8_t *bufferChar1 = reinterpret_cast(buffer1); size_t j = 0; for (size_t i = 0; i < allocSize; i++) { if (bufferChar1[i] != pattern1[j]) { validRet = false; if (verbose) { std::cout << "dstBufferChar1[" << i << " ] " << static_cast(bufferChar1[i]) << "!= pattern1[" << j << " ] " << static_cast(pattern1[j]) << "\n"; } break; } j++; if (j >= pattern1Size) { j = 0; } } } delete[] pattern1; SUCCESS_OR_TERMINATE(zeMemFree(context, buffer0)); SUCCESS_OR_TERMINATE(zeMemFree(context, buffer1)); SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue)); } void testRegionCopyOf3DSharedMem(ze_context_handle_t &context, ze_device_handle_t &device, bool &validRet) { validRet = true; ze_command_queue_handle_t cmdQueue; ze_command_list_handle_t cmdList; ze_command_queue_desc_t cmdQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC}; int32_t copyQueueGroup = getCopyOnlyCommandQueueOrdinal(device); if (copyQueueGroup < 0) { std::cout << "No Copy queue group found. Skipping test run\n"; validRet = true; return; } cmdQueueDesc.pNext = nullptr; cmdQueueDesc.flags = 0; cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; cmdQueueDesc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL; cmdQueueDesc.ordinal = copyQueueGroup; cmdQueueDesc.index = 0; SUCCESS_OR_TERMINATE(zeCommandQueueCreate(context, device, &cmdQueueDesc, &cmdQueue)); ze_command_list_desc_t cmdListDesc = {}; cmdListDesc.stype = ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC; cmdListDesc.pNext = nullptr; cmdListDesc.flags = 0; cmdListDesc.commandQueueGroupOrdinal = copyQueueGroup; SUCCESS_OR_TERMINATE(zeCommandListCreate(context, device, &cmdListDesc, &cmdList)); void *dstBuffer = nullptr; uint32_t dstWidth = verbose ? 8 : 64; // width of the dst 3D buffer in bytes uint32_t dstHeight = verbose ? 8 : 64; // height of the dst 3D buffer in bytes uint32_t dstDepth = verbose ? 2 : 4; // depth of the dst 3D buffer in bytes uint32_t dstOriginX = 0; // Offset in bytes uint32_t dstOriginY = 0; // Offset in rows uint32_t dstOriginZ = 0; // Offset in rows uint32_t dstSize = dstHeight * dstWidth * dstDepth; // Size of the dst buffer void *srcBuffer = nullptr; uint32_t srcWidth = verbose ? 8 : 64; // width of the src 3D buffer in bytes uint32_t srcHeight = verbose ? 8 : 64; // height of the src 3D buffer in bytes uint32_t srcDepth = verbose ? 2 : 4; // depth of the src 3D buffer in bytes uint32_t srcOriginX = 0; // Offset in bytes uint32_t srcOriginY = 0; // Offset in rows uint32_t srcOriginZ = 0; // Offset in rows uint32_t srcSize = srcHeight * srcWidth * srcDepth; // Size of the src buffer uint32_t width = verbose ? 8 : 64; // width of the region to copy uint32_t height = verbose ? 8 : 64; // height of the region to copy uint32_t depth = verbose ? 2 : 4; // height of the region to copy const ze_copy_region_t dstRegion = {dstOriginX, dstOriginY, dstOriginZ, width, height, depth}; const ze_copy_region_t srcRegion = {srcOriginX, srcOriginY, dstOriginZ, width, height, depth}; ze_device_mem_alloc_desc_t deviceDesc = {}; deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; deviceDesc.ordinal = 0; deviceDesc.flags = 0; deviceDesc.pNext = nullptr; ze_host_mem_alloc_desc_t hostDesc = {}; hostDesc.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC; hostDesc.pNext = nullptr; hostDesc.flags = 0; SUCCESS_OR_TERMINATE( zeMemAllocShared(context, &deviceDesc, &hostDesc, srcSize, 1, device, &srcBuffer)); SUCCESS_OR_TERMINATE( zeMemAllocShared(context, &deviceDesc, &hostDesc, dstSize, 1, device, &dstBuffer)); // Initialize buffers // dstBuffer and srcBuffer are shared allocations, so they have UVA between host and device // and there's no need to perform explicit copies uint8_t *srcBufferChar = reinterpret_cast(srcBuffer); for (uint32_t i = 0; i < srcDepth; i++) { for (uint32_t j = 0; j < srcHeight; j++) { for (uint32_t k = 0; k < srcWidth; k++) { size_t index = (i * srcWidth * srcHeight) + (j * srcWidth) + k; srcBufferChar[index] = static_cast(index); } } } int value = 0; SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryFill(cmdList, dstBuffer, reinterpret_cast(&value), sizeof(value), dstSize, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList, nullptr, 0, nullptr)); // Perform the copy SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopyRegion(cmdList, dstBuffer, &dstRegion, dstWidth, (dstWidth * dstHeight), const_cast(srcBuffer), &srcRegion, srcWidth, (srcWidth * srcHeight), nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); uint8_t *dstBufferChar = reinterpret_cast(dstBuffer); if (verbose) { std::cout << "srcBufferChar\n"; for (uint32_t i = 0; i < srcDepth; i++) { for (uint32_t j = 0; j < srcHeight; j++) { for (uint32_t k = 0; k < srcWidth; k++) { size_t index = (i * srcWidth * srcHeight) + (j * srcWidth) + k; std::cout << std::setw(3) << std::dec << static_cast(srcBufferChar[index]) << " "; } std::cout << "\n"; } std::cout << "\n"; } std::cout << "dstBuffer\n"; for (uint32_t i = 0; i < dstDepth; i++) { for (uint32_t j = 0; j < dstHeight; j++) { for (uint32_t k = 0; k < dstWidth; k++) { size_t index = (i * dstWidth * dstHeight) + (j * dstWidth) + k; std::cout << std::setw(3) << std::dec << static_cast(dstBufferChar[index]) << " "; } std::cout << "\n"; } std::cout << "\n"; } } uint32_t dstOffset = dstOriginX + dstOriginY * dstWidth + dstOriginZ * dstDepth * dstWidth; uint32_t srcOffset = srcOriginX + srcOriginY * srcWidth + srcOriginZ * srcDepth * srcWidth; for (uint32_t i = 0; i < depth; i++) { for (uint32_t j = 0; j < height; j++) { for (uint32_t k = 0; k < width; k++) { uint8_t dstVal = dstBufferChar[dstOffset + (i * dstWidth * dstHeight) + (j * dstWidth) + k]; uint8_t srcVal = srcBufferChar[srcOffset + (i * srcWidth * srcHeight) + (j * srcWidth) + k]; if (dstVal != srcVal) { validRet = false; } } } } SUCCESS_OR_TERMINATE(zeMemFree(context, srcBuffer)); SUCCESS_OR_TERMINATE(zeMemFree(context, dstBuffer)); SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue)); } int main(int argc, char *argv[]) { verbose = isVerbose(argc, argv); ze_context_handle_t context = nullptr; ze_driver_handle_t driverHandle = nullptr; auto devices = zelloInitContextAndGetDevices(context, driverHandle); auto device = devices[0]; ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; SUCCESS_OR_TERMINATE(zeDeviceGetProperties(device, &deviceProperties)); std::cout << "Device : \n" << " * name : " << deviceProperties.name << "\n" << " * vendorId : " << std::hex << deviceProperties.vendorId << "\n"; bool outputValidationSuccessful = true; if (outputValidationSuccessful) testCopyBetweenHeapDeviceAndStack(context, device, outputValidationSuccessful); if (outputValidationSuccessful) testCopyBetweenHostMemAndDeviceMem(context, device, outputValidationSuccessful); if (outputValidationSuccessful) testRegionCopyOf2DSharedMem(context, device, outputValidationSuccessful); if (outputValidationSuccessful) testSharedMemDataAccessWithoutCopy(context, device, outputValidationSuccessful); if (outputValidationSuccessful) testRegionCopyOf3DSharedMem(context, device, outputValidationSuccessful); SUCCESS_OR_TERMINATE(zeContextDestroy(context)); std::cout << "\nZello Copy Only Results validation " << (outputValidationSuccessful ? "PASSED" : "FAILED") << "\n"; return (outputValidationSuccessful ? 0 : 1); } compute-runtime-22.14.22890/level_zero/core/test/black_box_tests/zello_dynamic_link.cpp000066400000000000000000000246551422164147700311630ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include #include "zello_common.h" #include "zello_compile.h" #include #include #include const char *importModuleSrc = R"===( int lib_func_add(int x, int y); int lib_func_mult(int x, int y); int lib_func_sub(int x, int y); kernel void call_library_funcs(__global int* result) { int add_result = lib_func_add(1,2); int mult_result = lib_func_mult(add_result,2); result[0] = lib_func_sub(mult_result, 1); } )==="; const char *exportModuleSrc = R"===( int lib_func_add(int x, int y) { return x+y; } int lib_func_mult(int x, int y) { return x*y; } int lib_func_sub(int x, int y) { return x-y; } )==="; const char *importModuleSrcCircDep = R"===( int lib_func_add(int x, int y); int lib_func_mult(int x, int y); int lib_func_sub(int x, int y); kernel void call_library_funcs(__global int* result) { int add_result = lib_func_add(1,2); int mult_result = lib_func_mult(add_result,2); result[0] = lib_func_sub(mult_result, 1); } int lib_func_add2(int x) { return x+2; } )==="; const char *exportModuleSrcCircDep = R"===( int lib_func_add2(int x); int lib_func_add5(int x); int lib_func_add(int x, int y) { return lib_func_add5(lib_func_add2(x + y)); } int lib_func_mult(int x, int y) { return x*y; } int lib_func_sub(int x, int y) { return x-y; } )==="; const char *exportModuleSrc2CircDep = R"===( int lib_func_add5(int x) { return x+5; } )==="; extern bool verbose; bool verbose = false; int main(int argc, char *argv[]) { bool outputValidationSuccessful = true; verbose = isVerbose(argc, argv); bool circularDep = isCircularDepTest(argc, argv); int numModules = 2; char *exportModuleSrcValue = const_cast(exportModuleSrc); char *importModuleSrcValue = const_cast(importModuleSrc); ze_module_handle_t exportModule2 = {}; if (circularDep) { exportModuleSrcValue = const_cast(exportModuleSrcCircDep); importModuleSrcValue = const_cast(importModuleSrcCircDep); numModules = 3; } // Setup SUCCESS_OR_TERMINATE(zeInit(ZE_INIT_FLAG_GPU_ONLY)); uint32_t driverCount = 0; SUCCESS_OR_TERMINATE(zeDriverGet(&driverCount, nullptr)); if (driverCount == 0) std::terminate(); ze_driver_handle_t driverHandle; driverCount = 1; SUCCESS_OR_TERMINATE(zeDriverGet(&driverCount, &driverHandle)); uint32_t deviceCount = 0; SUCCESS_OR_TERMINATE(zeDeviceGet(driverHandle, &deviceCount, nullptr)); if (deviceCount == 0) std::terminate(); ze_device_handle_t device; deviceCount = 1; SUCCESS_OR_TERMINATE(zeDeviceGet(driverHandle, &deviceCount, &device)); ze_context_desc_t contextDesc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC}; ze_context_handle_t context; SUCCESS_OR_TERMINATE(zeContextCreate(driverHandle, &contextDesc, &context)); ze_device_mem_alloc_desc_t deviceDesc = {}; deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; deviceDesc.pNext = nullptr; deviceDesc.ordinal = 0; deviceDesc.flags = 0; ze_host_mem_alloc_desc_t hostDesc = {}; hostDesc.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC; hostDesc.pNext = nullptr; hostDesc.flags = 0; void *resultBuffer; SUCCESS_OR_TERMINATE( zeMemAllocShared(context, &deviceDesc, &hostDesc, sizeof(int), 1, device, &resultBuffer)); // Build Import/Export SPIRVs & Modules if (verbose) { std::cout << "reading export module for spirv\n"; } std::string buildLog; auto exportBinaryModule = compileToSpirV(const_cast(exportModuleSrcValue), "", buildLog); if (buildLog.size() > 0) { std::cout << "Build log " << buildLog; } SUCCESS_OR_TERMINATE((0 == exportBinaryModule.size())); ze_module_handle_t exportModule; ze_module_desc_t exportModuleDesc = {ZE_STRUCTURE_TYPE_MODULE_DESC}; exportModuleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; exportModuleDesc.pInputModule = reinterpret_cast(exportBinaryModule.data()); exportModuleDesc.inputSize = exportBinaryModule.size(); // -library-compliation is required for the non-kernel functions to be listed as exported by the Intel Graphics Compiler exportModuleDesc.pBuildFlags = "-library-compilation"; if (verbose) { std::cout << "building export module\n"; } SUCCESS_OR_TERMINATE(zeModuleCreate(context, device, &exportModuleDesc, &exportModule, nullptr)); if (circularDep) { if (verbose) { std::cout << "reading export module2 for spirv\n"; } auto exportBinaryModule2 = compileToSpirV(exportModuleSrc2CircDep, "", buildLog); if (buildLog.size() > 0) { std::cout << "Build log " << buildLog; } SUCCESS_OR_TERMINATE((0 == exportBinaryModule2.size())); ze_module_desc_t exportModuleDesc2 = {ZE_STRUCTURE_TYPE_MODULE_DESC}; exportModuleDesc2.format = ZE_MODULE_FORMAT_IL_SPIRV; exportModuleDesc2.pInputModule = reinterpret_cast(exportBinaryModule2.data()); exportModuleDesc2.inputSize = exportBinaryModule2.size(); // -library-compliation is required for the non-kernel functions to be listed as exported by the Intel Graphics Compiler exportModuleDesc2.pBuildFlags = "-library-compilation"; if (verbose) { std::cout << "building export module\n"; } SUCCESS_OR_TERMINATE(zeModuleCreate(context, device, &exportModuleDesc2, &exportModule2, nullptr)); } if (verbose) { std::cout << "reading import module for spirv\n"; } auto importBinaryModule = compileToSpirV(const_cast(importModuleSrcValue), "", buildLog); if (buildLog.size() > 0) { std::cout << "Build log " << buildLog; } SUCCESS_OR_TERMINATE((0 == importBinaryModule.size())); ze_module_handle_t importModule; ze_module_desc_t importModuleDesc = {ZE_STRUCTURE_TYPE_MODULE_DESC}; importModuleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; importModuleDesc.pInputModule = reinterpret_cast(importBinaryModule.data()); importModuleDesc.inputSize = importBinaryModule.size(); if (circularDep) { importModuleDesc.pBuildFlags = "-library-compilation"; } if (verbose) { std::cout << "building import module\n"; } SUCCESS_OR_TERMINATE(zeModuleCreate(context, device, &importModuleDesc, &importModule, nullptr)); // Dynamically linking the two Modules to resolve the symbols if (verbose) { std::cout << "Dynamically linking modules\n"; } ze_module_build_log_handle_t dynLinkLog; if (circularDep) { ze_module_handle_t modulesToLink[] = {importModule, exportModule, exportModule2}; SUCCESS_OR_TERMINATE(zeModuleDynamicLink(numModules, modulesToLink, &dynLinkLog)); } else { ze_module_handle_t modulesToLink[] = {importModule, exportModule}; SUCCESS_OR_TERMINATE(zeModuleDynamicLink(numModules, modulesToLink, &dynLinkLog)); } size_t buildLogSize; SUCCESS_OR_TERMINATE(zeModuleBuildLogGetString(dynLinkLog, &buildLogSize, nullptr)); char *logBuffer = new char[buildLogSize](); SUCCESS_OR_TERMINATE(zeModuleBuildLogGetString(dynLinkLog, &buildLogSize, logBuffer)); if (verbose) { std::cout << "Dynamically linked modules\n"; std::cout << logBuffer << "\n"; } // Create Kernel to call ze_kernel_handle_t importKernel; ze_kernel_desc_t importKernelDesc = {ZE_STRUCTURE_TYPE_KERNEL_DESC}; importKernelDesc.pKernelName = "call_library_funcs"; SUCCESS_OR_TERMINATE(zeKernelCreate(importModule, &importKernelDesc, &importKernel)); SUCCESS_OR_TERMINATE(zeKernelSetGroupSize(importKernel, 1, 1, 1)); SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(importKernel, 0, sizeof(resultBuffer), &resultBuffer)); // Create Command Queue and List ze_command_queue_handle_t cmdQueue; ze_command_queue_desc_t cmdQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC}; cmdQueueDesc.ordinal = 0; cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; SUCCESS_OR_TERMINATE(zeCommandQueueCreate(context, device, &cmdQueueDesc, &cmdQueue)); ze_command_list_handle_t cmdList; ze_command_list_desc_t cmdListDesc = {ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC}; SUCCESS_OR_TERMINATE(zeCommandListCreate(context, device, &cmdListDesc, &cmdList)); // Append call to Kernel ze_group_count_t dispatchTraits; dispatchTraits.groupCountX = 1; dispatchTraits.groupCountY = 1u; dispatchTraits.groupCountZ = 1u; SUCCESS_OR_TERMINATE(zeCommandListAppendLaunchKernel(cmdList, importKernel, &dispatchTraits, nullptr, 0, nullptr)); // Execute the Kernel in the Import module which calls the Export Module's functions SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); if (verbose) { std::cout << "execute kernel in import module\n"; } SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); if (verbose) { std::cout << "sync results from kernel\n"; } SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); // Validate results int expectedResult = (((1 + 2) * 2) - 1); if (circularDep) { expectedResult = (((((1 + 2) + 2) + 5) * 2) - 1); } if (expectedResult != *(int *)resultBuffer) { std::cout << "Result:" << *(int *)resultBuffer << " invalid\n"; outputValidationSuccessful = false; } else { if (verbose) { std::cout << "Result Buffer is correct with a value of:" << *(int *)resultBuffer << "\n"; } } // Cleanup delete[] logBuffer; SUCCESS_OR_TERMINATE(zeModuleBuildLogDestroy(dynLinkLog)); SUCCESS_OR_TERMINATE(zeMemFree(context, resultBuffer)); SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue)); SUCCESS_OR_TERMINATE(zeKernelDestroy(importKernel)); SUCCESS_OR_TERMINATE(zeModuleDestroy(importModule)); SUCCESS_OR_TERMINATE(zeModuleDestroy(exportModule)); if (circularDep) { SUCCESS_OR_TERMINATE(zeModuleDestroy(exportModule2)); } SUCCESS_OR_TERMINATE(zeContextDestroy(context)); std::cout << "\nZello Dynamic Link Results validation " << (outputValidationSuccessful ? "PASSED" : "FAILED") << "\n"; return 0; } compute-runtime-22.14.22890/level_zero/core/test/black_box_tests/zello_events.cpp000066400000000000000000000226051422164147700300170ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include #include "zello_common.h" #include #include #include #include extern bool verbose; bool verbose = false; void createCmdQueueAndCmdList(ze_device_handle_t &device, ze_context_handle_t &context, ze_command_queue_handle_t &cmdqueue, ze_command_list_handle_t &cmdList) { // Create commandQueue and cmdList ze_command_queue_desc_t cmdQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC}; cmdQueueDesc.ordinal = getCommandQueueOrdinal(device); cmdQueueDesc.index = 0; cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; SUCCESS_OR_TERMINATE(zeCommandQueueCreate(context, device, &cmdQueueDesc, &cmdqueue)); SUCCESS_OR_TERMINATE(createCommandList(context, device, cmdList)); } // Test Device Signal and Device wait followed by Host Wait bool testEventsDeviceSignalDeviceWait(ze_context_handle_t &context, ze_device_handle_t &device) { ze_command_queue_handle_t cmdQueue; ze_command_list_handle_t cmdList; // Create commandQueue and cmdList createCmdQueueAndCmdList(device, context, cmdQueue, cmdList); // Create two shared buffers constexpr size_t allocSize = 4096; ze_device_mem_alloc_desc_t deviceDesc = {ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC}; deviceDesc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED; deviceDesc.ordinal = 0; ze_host_mem_alloc_desc_t hostDesc = {ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC}; hostDesc.flags = ZE_HOST_MEM_ALLOC_FLAG_BIAS_UNCACHED; void *srcBuffer = nullptr; SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &srcBuffer)); void *dstBuffer = nullptr; SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &dstBuffer)); // Create Event Pool and kernel launch event ze_event_pool_handle_t eventPoolDevice, eventPoolHost; uint32_t numEvents = 2; std::vector deviceEvents(numEvents), hostEvents(numEvents); createEventPoolAndEvents(context, device, eventPoolDevice, (ze_event_pool_flag_t)0, numEvents, deviceEvents.data(), ZE_EVENT_SCOPE_FLAG_SUBDEVICE, (ze_event_scope_flag_t)0); createEventPoolAndEvents(context, device, eventPoolHost, (ze_event_pool_flag_t)(ZE_EVENT_POOL_FLAG_HOST_VISIBLE), numEvents, hostEvents.data(), ZE_EVENT_SCOPE_FLAG_HOST, (ze_event_scope_flag_t)0); //Initialize memory uint8_t dstValue = 0; uint8_t srcValue = 55; SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryFill(cmdList, dstBuffer, reinterpret_cast(&dstValue), sizeof(dstValue), allocSize, deviceEvents[0], 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryFill(cmdList, srcBuffer, reinterpret_cast(&srcValue), sizeof(srcValue), allocSize, deviceEvents[1], 1, &deviceEvents[0])); SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, dstBuffer, srcBuffer, allocSize, hostEvents[0], 1, &deviceEvents[1])); SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); SUCCESS_OR_TERMINATE(zeEventHostSynchronize(hostEvents[0], std::numeric_limits::max())); // Validate bool outputValidationSuccessful = true; if (memcmp(dstBuffer, srcBuffer, allocSize)) { outputValidationSuccessful = false; uint8_t *srcCharBuffer = static_cast(srcBuffer); uint8_t *dstCharBuffer = static_cast(dstBuffer); for (size_t i = 0; i < allocSize; i++) { if (srcCharBuffer[i] != dstCharBuffer[i]) { std::cout << "srcBuffer[" << i << "] = " << static_cast(srcCharBuffer[i]) << " not equal to " << "dstBuffer[" << i << "] = " << static_cast(dstCharBuffer[i]) << "\n"; break; } } } // Cleanup for (auto event : hostEvents) { SUCCESS_OR_TERMINATE(zeEventDestroy(event)); } for (auto event : deviceEvents) { SUCCESS_OR_TERMINATE(zeEventDestroy(event)); } SUCCESS_OR_TERMINATE(zeEventPoolDestroy(eventPoolHost)); SUCCESS_OR_TERMINATE(zeEventPoolDestroy(eventPoolDevice)); SUCCESS_OR_TERMINATE(zeMemFree(context, dstBuffer)); SUCCESS_OR_TERMINATE(zeMemFree(context, srcBuffer)); SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue)); return outputValidationSuccessful; } // Test Device Signal and Host wait bool testEventsDeviceSignalHostWait(ze_context_handle_t &context, ze_device_handle_t &device) { ze_command_queue_handle_t cmdQueue; ze_command_list_handle_t cmdList; // Create commandQueue and cmdList createCmdQueueAndCmdList(device, context, cmdQueue, cmdList); // Create two shared buffers constexpr size_t allocSize = 4096; ze_device_mem_alloc_desc_t deviceDesc = {ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC}; deviceDesc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED; deviceDesc.ordinal = 0; ze_host_mem_alloc_desc_t hostDesc = {ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC}; hostDesc.flags = ZE_HOST_MEM_ALLOC_FLAG_BIAS_UNCACHED; void *srcBuffer = nullptr; SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &srcBuffer)); void *dstBuffer = nullptr; SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &dstBuffer)); // Initialize memory constexpr uint8_t val = 55; memset(srcBuffer, val, allocSize); memset(dstBuffer, 0, allocSize); // Create Event Pool and kernel launch event ze_event_pool_handle_t eventPool; uint32_t numEvents = 2; std::vector events(numEvents); createEventPoolAndEvents(context, device, eventPool, (ze_event_pool_flag_t)(ZE_EVENT_POOL_FLAG_HOST_VISIBLE), numEvents, events.data(), ZE_EVENT_SCOPE_FLAG_HOST, (ze_event_scope_flag_t)0); SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, dstBuffer, srcBuffer, allocSize, events[0], 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); SUCCESS_OR_TERMINATE(zeEventHostSynchronize(events[0], std::numeric_limits::max())); // Validate bool outputValidationSuccessful = true; if (memcmp(dstBuffer, srcBuffer, allocSize)) { outputValidationSuccessful = false; uint8_t *srcCharBuffer = static_cast(srcBuffer); uint8_t *dstCharBuffer = static_cast(dstBuffer); for (size_t i = 0; i < allocSize; i++) { if (srcCharBuffer[i] != dstCharBuffer[i]) { std::cout << "srcBuffer[" << i << "] = " << static_cast(srcCharBuffer[i]) << " not equal to " << "dstBuffer[" << i << "] = " << static_cast(dstCharBuffer[i]) << "\n"; break; } } } // Cleanup for (auto event : events) { SUCCESS_OR_TERMINATE(zeEventDestroy(event)); } SUCCESS_OR_TERMINATE(zeEventPoolDestroy(eventPool)); SUCCESS_OR_TERMINATE(zeMemFree(context, dstBuffer)); SUCCESS_OR_TERMINATE(zeMemFree(context, srcBuffer)); SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue)); return outputValidationSuccessful; } void printResult(bool outputValidationSuccessful, std::string ¤tTest) { std::cout << "\nZello Events: " << currentTest.c_str() << " Results validation " << (outputValidationSuccessful ? "PASSED" : "FAILED") << std::endl << std::endl; } int main(int argc, char *argv[]) { bool outputValidationSuccessful; verbose = isVerbose(argc, argv); ze_context_handle_t context = nullptr; ze_driver_handle_t driverHandle = nullptr; auto devices = zelloInitContextAndGetDevices(context, driverHandle); auto device = devices[0]; ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; SUCCESS_OR_TERMINATE(zeDeviceGetProperties(device, &deviceProperties)); std::cout << "Device : \n" << " * name : " << deviceProperties.name << "\n" << " * vendorId : " << std::hex << deviceProperties.vendorId << "\n"; std::string currentTest; currentTest = "Device signal and host wait test"; outputValidationSuccessful = testEventsDeviceSignalHostWait(context, device); printResult(outputValidationSuccessful, currentTest); currentTest = "Device signal and device wait test"; outputValidationSuccessful = testEventsDeviceSignalDeviceWait(context, device); printResult(outputValidationSuccessful, currentTest); SUCCESS_OR_TERMINATE(zeContextDestroy(context)); return outputValidationSuccessful ? 0 : 1; } compute-runtime-22.14.22890/level_zero/core/test/black_box_tests/zello_fence.cpp000066400000000000000000000211371422164147700275720ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include #include "zello_common.h" #include "zello_compile.h" #include #include #include bool verbose = false; const char *clProgram = R"==( __kernel void increment_by_one(__global uchar *dst, __global uchar *src) { unsigned int gid = get_global_id(0); dst[gid] = (uchar)(src[gid] + 1); } )=="; void createModule(ze_context_handle_t &context, ze_module_handle_t &module, ze_device_handle_t &device) { // Prepare spirV std::string buildLog; auto spirV = compileToSpirV(clProgram, "", buildLog); if (buildLog.size() > 0) { std::cout << "Build log " << buildLog; } SUCCESS_OR_TERMINATE((0 == spirV.size())); ze_module_desc_t moduleDesc = {ZE_STRUCTURE_TYPE_MODULE_DESC}; ze_module_build_log_handle_t buildlog; moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; moduleDesc.pInputModule = spirV.data(); moduleDesc.inputSize = spirV.size(); moduleDesc.pBuildFlags = ""; if (zeModuleCreate(context, device, &moduleDesc, &module, &buildlog) != ZE_RESULT_SUCCESS) { size_t szLog = 0; zeModuleBuildLogGetString(buildlog, &szLog, nullptr); char *strLog = (char *)malloc(szLog); zeModuleBuildLogGetString(buildlog, &szLog, strLog); std::cout << "Build log:" << strLog << std::endl; free(strLog); } SUCCESS_OR_TERMINATE(zeModuleBuildLogDestroy(buildlog)); } void createKernel(ze_module_handle_t &module, ze_kernel_handle_t &kernel, uint32_t numThreads, uint32_t sizex, uint32_t sizey, uint32_t sizez) { ze_kernel_desc_t kernelDesc = {ZE_STRUCTURE_TYPE_KERNEL_DESC}; kernelDesc.pKernelName = "increment_by_one"; SUCCESS_OR_TERMINATE(zeKernelCreate(module, &kernelDesc, &kernel)); ze_kernel_properties_t kernProps{ZE_STRUCTURE_TYPE_KERNEL_PROPERTIES}; SUCCESS_OR_TERMINATE(zeKernelGetProperties(kernel, &kernProps)); std::cout << "Kernel : \n" << " * name : " << kernelDesc.pKernelName << "\n" << " * uuid.mid : " << kernProps.uuid.mid << "\n" << " * uuid.kid : " << kernProps.uuid.kid << "\n" << " * maxSubgroupSize : " << kernProps.maxSubgroupSize << "\n" << " * localMemSize : " << kernProps.localMemSize << "\n" << " * spillMemSize : " << kernProps.spillMemSize << "\n" << " * privateMemSize : " << kernProps.privateMemSize << "\n" << " * maxNumSubgroups : " << kernProps.maxNumSubgroups << "\n" << " * numKernelArgs : " << kernProps.numKernelArgs << "\n" << " * requiredSubgroupSize : " << kernProps.requiredSubgroupSize << "\n" << " * requiredNumSubGroups : " << kernProps.requiredNumSubGroups << "\n" << " * requiredGroupSizeX : " << kernProps.requiredGroupSizeX << "\n" << " * requiredGroupSizeY : " << kernProps.requiredGroupSizeY << "\n" << " * requiredGroupSizeZ : " << kernProps.requiredGroupSizeZ << "\n"; uint32_t groupSizeX = sizex; uint32_t groupSizeY = sizey; uint32_t groupSizeZ = sizey; SUCCESS_OR_TERMINATE(zeKernelSuggestGroupSize(kernel, numThreads, 1U, 1U, &groupSizeX, &groupSizeY, &groupSizeZ)); SUCCESS_OR_TERMINATE_BOOL(numThreads % groupSizeX == 0); if (verbose) { std::cout << "Group size : (" << groupSizeX << ", " << groupSizeY << ", " << groupSizeZ << ")" << std::endl; } SUCCESS_OR_TERMINATE(zeKernelSetGroupSize(kernel, groupSizeX, groupSizeY, groupSizeZ)); } bool testFence(ze_context_handle_t &context, ze_device_handle_t &device) { constexpr size_t allocSize = 4096; constexpr size_t bytesPerThread = sizeof(char); constexpr size_t numThreads = allocSize / bytesPerThread; ze_module_handle_t module; ze_kernel_handle_t kernel; ze_command_queue_handle_t cmdQueue; ze_command_list_handle_t cmdList; void *srcBuffer; void *dstBuffer; uint32_t groupSizeX = 32u; uint32_t groupSizeY = 1u; uint32_t groupSizeZ = 1u; // Create commandQueue and cmdList ze_command_queue_desc_t cmdQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC}; cmdQueueDesc.ordinal = getCommandQueueOrdinal(device); cmdQueueDesc.index = 0; cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; SUCCESS_OR_TERMINATE(zeCommandQueueCreate(context, device, &cmdQueueDesc, &cmdQueue)); SUCCESS_OR_TERMINATE(createCommandList(context, device, cmdList)); // Create module and kernel createModule(context, module, device); createKernel(module, kernel, numThreads, groupSizeX, groupSizeY, groupSizeZ); // Alloc buffers srcBuffer = nullptr; dstBuffer = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; deviceDesc.ordinal = 0; deviceDesc.flags = 0; deviceDesc.pNext = nullptr; SUCCESS_OR_TERMINATE(zeMemAllocDevice(context, &deviceDesc, allocSize, 1, device, &srcBuffer)); SUCCESS_OR_TERMINATE(zeMemAllocDevice(context, &deviceDesc, allocSize, 1, device, &dstBuffer)); // Init data and copy to device uint8_t initDataSrc[allocSize]; memset(initDataSrc, 7, sizeof(initDataSrc)); uint8_t initDataDst[allocSize]; memset(initDataDst, 3, sizeof(initDataDst)); SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy( cmdList, srcBuffer, initDataSrc, sizeof(initDataSrc), nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy( cmdList, dstBuffer, initDataDst, sizeof(initDataDst), nullptr, 0, nullptr)); // copying of data must finish before running the user kernel SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList, nullptr, 0, nullptr)); // Set kernel args and get ready to dispatch SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 0, sizeof(dstBuffer), &dstBuffer)); SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 1, sizeof(srcBuffer), &srcBuffer)); ze_group_count_t dispatchTraits; dispatchTraits.groupCountX = numThreads / groupSizeX; dispatchTraits.groupCountY = 1u; dispatchTraits.groupCountZ = 1u; if (verbose) { std::cerr << "Number of groups : (" << dispatchTraits.groupCountX << ", " << dispatchTraits.groupCountY << ", " << dispatchTraits.groupCountZ << ")" << std::endl; } SUCCESS_OR_TERMINATE_BOOL(dispatchTraits.groupCountX * groupSizeX == allocSize); SUCCESS_OR_TERMINATE(zeCommandListAppendLaunchKernel( cmdList, kernel, &dispatchTraits, nullptr, 0, nullptr)); // Create Fence ze_fence_handle_t fence; ze_fence_desc_t fenceDesc = {}; fenceDesc.stype = ZE_STRUCTURE_TYPE_FENCE_DESC; fenceDesc.pNext = nullptr; fenceDesc.flags = 0; SUCCESS_OR_TERMINATE(zeFenceCreate(cmdQueue, &fenceDesc, &fence)); // Execute CommandList SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, fence)); // Wait for fence to be signaled SUCCESS_OR_TERMINATE(zeFenceHostSynchronize(fence, std::numeric_limits::max())); if (verbose) std::cout << "zeFenceHostSynchronize success" << std::endl; //Tear down SUCCESS_OR_TERMINATE(zeFenceReset(fence)); SUCCESS_OR_TERMINATE(zeCommandListReset(cmdList)); SUCCESS_OR_TERMINATE(zeMemFree(context, dstBuffer)); SUCCESS_OR_TERMINATE(zeMemFree(context, srcBuffer)); SUCCESS_OR_TERMINATE(zeFenceDestroy(fence)); SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue)); SUCCESS_OR_TERMINATE(zeKernelDestroy(kernel)); SUCCESS_OR_TERMINATE(zeModuleDestroy(module)); return true; } int main(int argc, char *argv[]) { bool outputValidationSuccessful; verbose = isVerbose(argc, argv); ze_context_handle_t context = nullptr; ze_driver_handle_t driverHandle = nullptr; auto devices = zelloInitContextAndGetDevices(context, driverHandle); auto device = devices[0]; ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; SUCCESS_OR_TERMINATE(zeDeviceGetProperties(device, &deviceProperties)); std::cout << "Device : \n" << " * name : " << deviceProperties.name << "\n" << " * vendorId : " << std::hex << deviceProperties.vendorId << "\n"; outputValidationSuccessful = testFence(context, device); SUCCESS_OR_TERMINATE(zeContextDestroy(context)); std::cout << "\nZello Fence Results validation " << (outputValidationSuccessful ? "PASSED" : "FAILED") << "\n"; return 0; } compute-runtime-22.14.22890/level_zero/core/test/black_box_tests/zello_image.cpp000066400000000000000000000236641422164147700276030ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include #include "zello_common.h" #include #include #include #include #include extern bool verbose; bool verbose = false; #define imageIndex(buf, x, y, z, chan) \ (buf)[(chan) + \ (x)*channels + \ (y)*channels * hostWidth + \ (z)*channels * hostWidth * hostHeight] void testAppendImageFunction(ze_driver_handle_t driver, ze_context_handle_t &context, ze_device_handle_t &device, ze_command_queue_handle_t &cmdQueue, uint32_t cmdQueueOrdinal, bool &validRet, ze_image_type_t imageType) { const size_t channels = 4; uint32_t hostWidth = verbose ? 5 : 131; uint32_t hostHeight = verbose ? 4 : 89; uint32_t hostDepth = verbose ? 3 : 10; // Apply a few pixels of offset to copy-in and copy-out uint32_t inOffsetX = 1; uint32_t inOffsetY = 2; uint32_t inOffsetZ = 3; uint32_t outOffsetX = 2; uint32_t outOffsetY = 3; uint32_t outOffsetZ = 1; if (imageType != ZE_IMAGE_TYPE_3D) { hostDepth = 1; inOffsetZ = 0; outOffsetZ = 0; if (imageType != ZE_IMAGE_TYPE_2D) { hostHeight = 1; inOffsetY = 0; outOffsetY = 0; } } const size_t hostSize = hostWidth * hostHeight * hostDepth * channels; // GPU image should be big enough to accomodate host image plus offsets const uint32_t gpuWidth = hostWidth + std::max(inOffsetX, outOffsetX); const uint32_t gpuHeight = hostHeight + std::max(inOffsetY, outOffsetY); const uint32_t gpuDepth = hostDepth + std::max(inOffsetZ, outOffsetZ); ze_command_list_handle_t cmdList; ze_command_list_desc_t cmdListDesc = {ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC}; cmdListDesc.commandQueueGroupOrdinal = cmdQueueOrdinal; SUCCESS_OR_TERMINATE(zeCommandListCreate(context, device, &cmdListDesc, &cmdList)); ze_image_desc_t srcImgDesc = {ZE_STRUCTURE_TYPE_IMAGE_DESC, nullptr, 0, imageType, {ZE_IMAGE_FORMAT_LAYOUT_32_32_32_32, ZE_IMAGE_FORMAT_TYPE_UINT, ZE_IMAGE_FORMAT_SWIZZLE_R, ZE_IMAGE_FORMAT_SWIZZLE_G, ZE_IMAGE_FORMAT_SWIZZLE_B, ZE_IMAGE_FORMAT_SWIZZLE_A}, gpuWidth, gpuHeight, gpuDepth, 0, 0}; ze_image_handle_t srcImg; ze_image_region_t srcRegion = {inOffsetX, inOffsetY, inOffsetZ, hostWidth, hostHeight, hostDepth}; SUCCESS_OR_TERMINATE( zeImageCreate(context, device, const_cast(&srcImgDesc), &srcImg)); ze_image_desc_t dstImgDesc = {ZE_STRUCTURE_TYPE_IMAGE_DESC, nullptr, ZE_IMAGE_FLAG_KERNEL_WRITE, imageType, {ZE_IMAGE_FORMAT_LAYOUT_32_32_32_32, ZE_IMAGE_FORMAT_TYPE_UINT, ZE_IMAGE_FORMAT_SWIZZLE_R, ZE_IMAGE_FORMAT_SWIZZLE_G, ZE_IMAGE_FORMAT_SWIZZLE_B, ZE_IMAGE_FORMAT_SWIZZLE_A}, gpuWidth, gpuHeight, gpuDepth, 0, 0}; ze_image_handle_t dstImg; ze_image_region_t dstRegion = {outOffsetX, outOffsetY, outOffsetZ, hostWidth, hostHeight, hostDepth}; SUCCESS_OR_TERMINATE( zeImageCreate(context, device, const_cast(&dstImgDesc), &dstImg)); uint32_t *srcBuffer = new uint32_t[hostSize]; uint32_t *dstBuffer = new uint32_t[hostSize]; for (size_t i = 0; i < hostSize; ++i) { srcBuffer[i] = static_cast((i + 1) & 0xffffffff); dstBuffer[i] = 0xff; } // Copy from srcBuffer to srcImg to dstImg to dstBuffer // At the end dstBuffer should be the same srcBuffer with some offset SUCCESS_OR_TERMINATE(zeCommandListAppendImageCopyFromMemory(cmdList, srcImg, srcBuffer, &srcRegion, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListAppendImageCopy(cmdList, dstImg, srcImg, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListAppendImageCopyToMemory(cmdList, dstBuffer, dstImg, &dstRegion, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); if (verbose) { size_t pixelWidth = channels; size_t rowWidth = pixelWidth * hostWidth; size_t sliceWidth = rowWidth * hostHeight; size_t totalWidth = sliceWidth * hostDepth; std::cout << "source" << std::endl; for (size_t i = 0; i < hostSize; ++i) { std::cout << static_cast(srcBuffer[i]) << " "; if (i % sliceWidth == sliceWidth - 1 && i != totalWidth - 1) std::cout << std::endl << "---" << std::endl; else if (i % rowWidth == rowWidth - 1) std::cout << std::endl; else if (i % pixelWidth == pixelWidth - 1) std::cout << "| "; } std::cout << std::endl; std::cout << "destination" << std::endl; for (size_t i = 0; i < hostSize; ++i) { std::cout << static_cast(dstBuffer[i]) << " "; if (i % sliceWidth == sliceWidth - 1 && i != totalWidth - 1) std::cout << std::endl << "---" << std::endl; else if (i % rowWidth == rowWidth - 1) std::cout << std::endl; else if (i % pixelWidth == pixelWidth - 1) std::cout << "| "; } std::cout << std::endl; } validRet = 1; int errorPrintLimit = 30; for (uint32_t xi = 0; xi < hostWidth; xi++) { for (uint32_t yi = 0; yi < hostHeight; yi++) { for (uint32_t zi = 0; zi < hostDepth; zi++) { int xo = xi + inOffsetX - outOffsetX; int yo = yi + inOffsetY - outOffsetY; int zo = zi + inOffsetZ - outOffsetZ; // pixels in the margins will have junk if offsets don't match; ignore those if (xo >= 0 && yo >= 0 && zo >= 0 && xo < static_cast(hostWidth) && yo < static_cast(hostHeight) && zo < static_cast(hostDepth)) { for (size_t chan = 0; chan < channels; chan++) { auto input = imageIndex(srcBuffer, xi, yi, zi, chan); auto output = imageIndex(dstBuffer, xo, yo, zo, chan); if (input != output) { validRet = 0; if (errorPrintLimit > 0) { std::cout << "error: " << xi << "," << yi << "," << zi << " (" << input << ") does not match " << xo << "," << yo << "," << zo << " (" << output << ")" << std::endl; errorPrintLimit--; } } } } } } } delete[] srcBuffer; delete[] dstBuffer; SUCCESS_OR_TERMINATE(zeImageDestroy(srcImg)); SUCCESS_OR_TERMINATE(zeImageDestroy(dstImg)); SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); } int main(int argc, char *argv[]) { verbose = isVerbose(argc, argv); bool do1D = isParamEnabled(argc, argv, "-1", "--1D"); bool do2D = isParamEnabled(argc, argv, "-2", "--2D"); bool do3D = isParamEnabled(argc, argv, "-3", "--3D"); // by default, do all tests if (!do1D && !do2D && !do3D) { do1D = do2D = do3D = true; } ze_driver_handle_t driver; ze_context_handle_t context; ze_device_handle_t device; ze_command_queue_handle_t cmdQueue; uint32_t cmdQueueOrdinal; initialize(driver, context, device, cmdQueue, cmdQueueOrdinal); bool success1D = false; bool success2D = false; bool success3D = false; if (do1D) testAppendImageFunction(driver, context, device, cmdQueue, cmdQueueOrdinal, success1D, ZE_IMAGE_TYPE_1D); if (do2D) testAppendImageFunction(driver, context, device, cmdQueue, cmdQueueOrdinal, success2D, ZE_IMAGE_TYPE_2D); if (do3D) testAppendImageFunction(driver, context, device, cmdQueue, cmdQueueOrdinal, success3D, ZE_IMAGE_TYPE_3D); if (do1D) std::cout << "\nZello Image 1D Results validation " << (success1D ? "PASSED" : "FAILED") << std::endl; if (do2D) std::cout << "\nZello Image 2D Results validation " << (success2D ? "PASSED" : "FAILED") << std::endl; if (do3D) std::cout << "\nZello Image 3D Results validation " << (success3D ? "PASSED" : "FAILED") << std::endl; teardown(context, cmdQueue); return ((do1D && !success1D) || (do2D && !success2D) || (do3D && !success3D)) ? 1 : 0; } compute-runtime-22.14.22890/level_zero/core/test/black_box_tests/zello_image_view.cpp000066400000000000000000000544241422164147700306330ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "zello_common.h" #include "zello_compile.h" extern bool verbose; bool verbose = false; const char *readNV12Module = R"===( __kernel void ReadNV12Kernel( read_only image2d_t nv12Img, uint width, uint height, __global uchar *pDest) { int tid_x = get_global_id(0); int tid_y = get_global_id(1); float4 colorY; int2 coord; const sampler_t samplerA = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; if (tid_x < width && tid_y < height) { coord = (int2)(tid_x, tid_y); if (((tid_y * width) + tid_x) < (width * height)) { colorY = read_imagef(nv12Img, samplerA, coord); pDest[(tid_y * width) + tid_x] = (uchar)(255.0f * colorY.y); if ((tid_x % 2 == 0) && (tid_y % 2 == 0)) { pDest[(width * height) + (tid_y / 2 * width) + (tid_x)] = (uchar)(255.0f * colorY.z); pDest[(width * height) + (tid_y / 2 * width) + (tid_x) + 1] = (uchar)(255.0f * colorY.x); } } } } )==="; void testAppendImageViewNV12Copy(ze_context_handle_t &context, ze_device_handle_t &device, bool &validRet) { std::string buildLog; auto spirV = compileToSpirV(readNV12Module, "", buildLog); if (buildLog.size() > 0) { std::cout << "Build log " << buildLog; } SUCCESS_OR_TERMINATE((0 == spirV.size())); const size_t width = 32; const size_t height = 32; const size_t depth = 1; const size_t size = 4 * width * height * depth; /* 4 channels per pixel */ ze_command_queue_handle_t cmdQueue = nullptr; ze_command_list_handle_t cmdList = nullptr; ze_command_queue_desc_t cmdQueueDesc = {}; cmdQueueDesc.pNext = nullptr; cmdQueueDesc.flags = 0; cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; cmdQueueDesc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL; cmdQueueDesc.ordinal = 0; cmdQueueDesc.index = 0; SUCCESS_OR_TERMINATE(zeCommandQueueCreate(context, device, &cmdQueueDesc, &cmdQueue)); ze_command_list_desc_t cmdListDesc = {}; cmdListDesc.stype = ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC; cmdListDesc.pNext = nullptr; cmdListDesc.flags = 0; SUCCESS_OR_TERMINATE(zeCommandListCreate(context, device, &cmdListDesc, &cmdList)); // create NV12 image ze_image_desc_t srcImgDesc = {ZE_STRUCTURE_TYPE_IMAGE_DESC, nullptr, (ZE_IMAGE_FLAG_BIAS_UNCACHED), ZE_IMAGE_TYPE_2D, {ZE_IMAGE_FORMAT_LAYOUT_NV12, ZE_IMAGE_FORMAT_TYPE_UINT, ZE_IMAGE_FORMAT_SWIZZLE_R, ZE_IMAGE_FORMAT_SWIZZLE_G, ZE_IMAGE_FORMAT_SWIZZLE_B, ZE_IMAGE_FORMAT_SWIZZLE_A}, width, height, depth, 0, 0}; ze_image_handle_t srcImg; SUCCESS_OR_TERMINATE( zeImageCreate(context, device, const_cast(&srcImgDesc), &srcImg)); // create image_veiw for Y plane ze_image_view_planar_exp_desc_t planeYdesc = {}; planeYdesc.stype = ZE_STRUCTURE_TYPE_IMAGE_VIEW_PLANAR_EXP_DESC; planeYdesc.planeIndex = 0u; // Y plane ze_image_desc_t imageViewDescPlaneY = {ZE_STRUCTURE_TYPE_IMAGE_DESC, &planeYdesc, (ZE_IMAGE_FLAG_KERNEL_WRITE | ZE_IMAGE_FLAG_BIAS_UNCACHED), ZE_IMAGE_TYPE_2D, {ZE_IMAGE_FORMAT_LAYOUT_8, ZE_IMAGE_FORMAT_TYPE_UINT, ZE_IMAGE_FORMAT_SWIZZLE_A, ZE_IMAGE_FORMAT_SWIZZLE_B, ZE_IMAGE_FORMAT_SWIZZLE_G, ZE_IMAGE_FORMAT_SWIZZLE_R}, width, height, depth, 0, 0}; ze_image_handle_t planeYImageView; SUCCESS_OR_TERMINATE( zeImageViewCreateExp(context, device, &imageViewDescPlaneY, srcImg, &planeYImageView)); // create image_view for UV plane ze_image_view_planar_exp_desc_t planeUVdesc = {}; planeUVdesc.stype = ZE_STRUCTURE_TYPE_IMAGE_VIEW_PLANAR_EXP_DESC; planeUVdesc.planeIndex = 1u; // UV plane ze_image_desc_t imageViewDescPlaneUV = {ZE_STRUCTURE_TYPE_IMAGE_DESC, &planeUVdesc, (ZE_IMAGE_FLAG_KERNEL_WRITE | ZE_IMAGE_FLAG_BIAS_UNCACHED), ZE_IMAGE_TYPE_2D, {ZE_IMAGE_FORMAT_LAYOUT_8_8, ZE_IMAGE_FORMAT_TYPE_UINT, ZE_IMAGE_FORMAT_SWIZZLE_A, ZE_IMAGE_FORMAT_SWIZZLE_B, ZE_IMAGE_FORMAT_SWIZZLE_G, ZE_IMAGE_FORMAT_SWIZZLE_R}, width / 2, height / 2, depth, 0, 0}; ze_image_handle_t planeUVImageView; SUCCESS_OR_TERMINATE( zeImageViewCreateExp(context, device, &imageViewDescPlaneUV, srcImg, &planeUVImageView)); // prepare input data std::vector srcVecY; srcVecY.resize(width * height); for (size_t i = 0; i < width * height; ++i) { srcVecY[i] = static_cast(i); } std::vector srcVecUV; srcVecUV.resize((width / 2) * (height)); for (size_t i = 0; i < (width / 2) * (height); ++i) { if (i % 2 == 0) { srcVecUV[i] = static_cast(0x33); } else { srcVecUV[i] = static_cast(0x55); } } // prepare destination buffer uint8_t *dstMem; ze_host_mem_alloc_desc_t hostDesc = {}; hostDesc.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC; hostDesc.pNext = nullptr; hostDesc.flags = 0; SUCCESS_OR_TERMINATE(zeMemAllocHost(context, &hostDesc, size, 1, (void **)(&dstMem))); for (size_t i = 0; i < size; ++i) { dstMem[i] = 0xff; } ze_image_region_t srcYRegion = {0, 0, 0, width, height, depth}; ze_image_region_t srcUVRegion = {0, 0, 0, width / 2, height / 2, depth}; SUCCESS_OR_TERMINATE(zeCommandListAppendImageCopyFromMemory(cmdList, planeYImageView, srcVecY.data(), &srcYRegion, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListAppendImageCopyFromMemory(cmdList, planeUVImageView, srcVecUV.data(), &srcUVRegion, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList, nullptr, 0, nullptr)); { // create kernel which reads NV12 surface ze_module_handle_t module = nullptr; ze_kernel_handle_t kernel = nullptr; ze_module_desc_t moduleDesc = {}; ze_module_build_log_handle_t buildlog; moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; moduleDesc.pInputModule = spirV.data(); moduleDesc.inputSize = spirV.size(); moduleDesc.pBuildFlags = ""; if (zeModuleCreate(context, device, &moduleDesc, &module, &buildlog) != ZE_RESULT_SUCCESS) { size_t szLog = 0; zeModuleBuildLogGetString(buildlog, &szLog, nullptr); char *strLog = (char *)malloc(szLog); zeModuleBuildLogGetString(buildlog, &szLog, strLog); std::cout << "Build log:" << strLog << std::endl; free(strLog); } SUCCESS_OR_TERMINATE(zeModuleBuildLogDestroy(buildlog)); ze_kernel_desc_t kernelDesc = {}; kernelDesc.pKernelName = "ReadNV12Kernel"; SUCCESS_OR_TERMINATE(zeKernelCreate(module, &kernelDesc, &kernel)); uint32_t groupSizeX = width; uint32_t groupSizeY = height; uint32_t groupSizeZ = 1u; SUCCESS_OR_TERMINATE(zeKernelSuggestGroupSize(kernel, groupSizeX, 1U, 1U, &groupSizeX, &groupSizeY, &groupSizeZ)); SUCCESS_OR_TERMINATE(zeKernelSetGroupSize(kernel, groupSizeX, groupSizeY, groupSizeZ)); SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 0, sizeof(srcImg), &srcImg)); SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 1, sizeof(int), &width)); SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 2, sizeof(int), &height)); SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 3, sizeof(void *), &dstMem)); ze_group_count_t dispatchTraits; dispatchTraits.groupCountX = width / groupSizeX; dispatchTraits.groupCountY = height / groupSizeY; dispatchTraits.groupCountZ = 1u; SUCCESS_OR_TERMINATE(zeCommandListAppendLaunchKernel(cmdList, kernel, &dispatchTraits, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList, nullptr, 0, nullptr)); } // destination buffer for Y plane std::vector dstVecY; dstVecY.resize(width * height); // destination buffer for UV plane std::vector dstVecUV; dstVecUV.resize((width / 2) * (height)); // read Y plane data SUCCESS_OR_TERMINATE(zeCommandListAppendImageCopyToMemory(cmdList, dstVecY.data(), planeYImageView, &srcYRegion, nullptr, 0, nullptr)); // read UV plane data SUCCESS_OR_TERMINATE(zeCommandListAppendImageCopyToMemory(cmdList, dstVecUV.data(), planeUVImageView, &srcUVRegion, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); // validate Y plane data auto result = memcmp(srcVecY.data(), dstVecY.data(), width * height); validRet = true; if (result != 0) { std::cout << "Failed to validate data read for plane Y from Y-plane view" << std::endl; validRet = false; } result = memcmp(dstVecY.data(), dstMem, width * height); if (result != 0 && validRet) { std::cout << "Failed to validate data read for plane Y from nv12 surface" << std::endl; validRet = false; } // validate UV plane data result = memcmp(srcVecUV.data(), dstVecUV.data(), (width / 2) * (height)); if (result != 0 && validRet) { std::cout << "Failed to validate data read for plane Y from Y-plane view" << std::endl; validRet = false; } result = memcmp(dstVecUV.data(), (dstMem + (width * height)), (width / 2) * (height)); if (result != 0 && validRet) { std::cout << "Failed to validate data read for plane UV from nv12 surface" << std::endl; validRet = false; } // cleanup SUCCESS_OR_TERMINATE(zeImageDestroy(srcImg)); SUCCESS_OR_TERMINATE(zeImageDestroy(planeYImageView)); SUCCESS_OR_TERMINATE(zeImageDestroy(planeUVImageView)); SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue)); } void testAppendImageViewRGBPCopy(ze_context_handle_t &context, ze_device_handle_t &device, bool &validRet) { const size_t width = 32; const size_t height = 32; const size_t depth = 1; const size_t size = 3 * width * height * depth; /* 3 channels per pixel */ ze_command_queue_handle_t cmdQueue = nullptr; ze_command_list_handle_t cmdList = nullptr; ze_command_queue_desc_t cmdQueueDesc = {}; cmdQueueDesc.pNext = nullptr; cmdQueueDesc.flags = 0; cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; cmdQueueDesc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL; cmdQueueDesc.ordinal = 0; cmdQueueDesc.index = 0; SUCCESS_OR_TERMINATE(zeCommandQueueCreate(context, device, &cmdQueueDesc, &cmdQueue)); ze_command_list_desc_t cmdListDesc = {}; cmdListDesc.stype = ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC; cmdListDesc.pNext = nullptr; cmdListDesc.flags = 0; SUCCESS_OR_TERMINATE(zeCommandListCreate(context, device, &cmdListDesc, &cmdList)); // create RGBP image ze_image_desc_t srcImgDesc = {ZE_STRUCTURE_TYPE_IMAGE_DESC, nullptr, (ZE_IMAGE_FLAG_BIAS_UNCACHED), ZE_IMAGE_TYPE_2D, {ZE_IMAGE_FORMAT_LAYOUT_RGBP, ZE_IMAGE_FORMAT_TYPE_UINT, ZE_IMAGE_FORMAT_SWIZZLE_R, ZE_IMAGE_FORMAT_SWIZZLE_G, ZE_IMAGE_FORMAT_SWIZZLE_B, ZE_IMAGE_FORMAT_SWIZZLE_A}, width, height, depth, 0, 0}; ze_image_handle_t srcImg; SUCCESS_OR_TERMINATE( zeImageCreate(context, device, const_cast(&srcImgDesc), &srcImg)); // create image_veiw for Y plane ze_image_view_planar_exp_desc_t planeYdesc = {}; planeYdesc.stype = ZE_STRUCTURE_TYPE_IMAGE_VIEW_PLANAR_EXP_DESC; planeYdesc.planeIndex = 0u; // Y plane ze_image_desc_t imageViewDescPlaneY = {ZE_STRUCTURE_TYPE_IMAGE_DESC, &planeYdesc, (ZE_IMAGE_FLAG_KERNEL_WRITE | ZE_IMAGE_FLAG_BIAS_UNCACHED), ZE_IMAGE_TYPE_2D, {ZE_IMAGE_FORMAT_LAYOUT_8, ZE_IMAGE_FORMAT_TYPE_UINT, ZE_IMAGE_FORMAT_SWIZZLE_A, ZE_IMAGE_FORMAT_SWIZZLE_B, ZE_IMAGE_FORMAT_SWIZZLE_G, ZE_IMAGE_FORMAT_SWIZZLE_R}, width, height, depth, 0, 0}; ze_image_handle_t planeYImageView; SUCCESS_OR_TERMINATE( zeImageViewCreateExp(context, device, &imageViewDescPlaneY, srcImg, &planeYImageView)); // create image_view for U plane ze_image_view_planar_exp_desc_t planeUdesc = {}; planeUdesc.stype = ZE_STRUCTURE_TYPE_IMAGE_VIEW_PLANAR_EXP_DESC; planeUdesc.planeIndex = 1u; // U plane ze_image_desc_t imageViewDescPlaneU = {ZE_STRUCTURE_TYPE_IMAGE_DESC, &planeUdesc, (ZE_IMAGE_FLAG_KERNEL_WRITE | ZE_IMAGE_FLAG_BIAS_UNCACHED), ZE_IMAGE_TYPE_2D, {ZE_IMAGE_FORMAT_LAYOUT_8, ZE_IMAGE_FORMAT_TYPE_UINT, ZE_IMAGE_FORMAT_SWIZZLE_A, ZE_IMAGE_FORMAT_SWIZZLE_B, ZE_IMAGE_FORMAT_SWIZZLE_G, ZE_IMAGE_FORMAT_SWIZZLE_R}, width, height, depth, 0, 0}; ze_image_handle_t planeUImageView; SUCCESS_OR_TERMINATE( zeImageViewCreateExp(context, device, &imageViewDescPlaneU, srcImg, &planeUImageView)); // create image_view for V plane ze_image_view_planar_exp_desc_t planeVdesc = {}; planeVdesc.stype = ZE_STRUCTURE_TYPE_IMAGE_VIEW_PLANAR_EXP_DESC; planeVdesc.planeIndex = 2u; // V plane ze_image_desc_t imageViewDescPlaneV = {ZE_STRUCTURE_TYPE_IMAGE_DESC, &planeVdesc, (ZE_IMAGE_FLAG_KERNEL_WRITE | ZE_IMAGE_FLAG_BIAS_UNCACHED), ZE_IMAGE_TYPE_2D, {ZE_IMAGE_FORMAT_LAYOUT_8, ZE_IMAGE_FORMAT_TYPE_UINT, ZE_IMAGE_FORMAT_SWIZZLE_A, ZE_IMAGE_FORMAT_SWIZZLE_B, ZE_IMAGE_FORMAT_SWIZZLE_G, ZE_IMAGE_FORMAT_SWIZZLE_R}, width, height, depth, 0, 0}; ze_image_handle_t planeVImageView; SUCCESS_OR_TERMINATE( zeImageViewCreateExp(context, device, &imageViewDescPlaneV, srcImg, &planeVImageView)); // prepare input data std::vector srcVecY; srcVecY.resize(width * height); for (size_t i = 0; i < width * height; ++i) { srcVecY[i] = static_cast(i); } std::vector srcVecU; srcVecU.resize(width * height); for (size_t i = 0; i < width * height; ++i) { if (i % 2 == 0) { srcVecU[i] = static_cast(0x33); } else { srcVecU[i] = static_cast(0x55); } } std::vector srcVecV; srcVecV.resize(width * height); for (size_t i = 0; i < width * height; ++i) { if (i % 2 == 0) { srcVecV[i] = static_cast(0x44); } else { srcVecV[i] = static_cast(0x66); } } // prepare destination buffer uint8_t *dstMem; ze_host_mem_alloc_desc_t hostDesc = {}; hostDesc.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC; hostDesc.pNext = nullptr; hostDesc.flags = 0; SUCCESS_OR_TERMINATE(zeMemAllocHost(context, &hostDesc, size, 1, (void **)(&dstMem))); for (size_t i = 0; i < size; ++i) { dstMem[i] = 0xff; } ze_image_region_t srcYRegion = {0, 0, 0, width, height, depth}; ze_image_region_t srcURegion = {0, 0, 0, width, height, depth}; ze_image_region_t srcVRegion = {0, 0, 0, width, height, depth}; SUCCESS_OR_TERMINATE(zeCommandListAppendImageCopyFromMemory(cmdList, planeYImageView, srcVecY.data(), &srcYRegion, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListAppendImageCopyFromMemory(cmdList, planeUImageView, srcVecU.data(), &srcURegion, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListAppendImageCopyFromMemory(cmdList, planeVImageView, srcVecV.data(), &srcVRegion, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList, nullptr, 0, nullptr)); // destination buffer for Y plane std::vector dstVecY; dstVecY.resize(width * height); // destination buffer for U plane std::vector dstVecU; dstVecU.resize(width * height); // destination buffer for V plane std::vector dstVecV; dstVecV.resize(width * height); // read Y plane data SUCCESS_OR_TERMINATE(zeCommandListAppendImageCopyToMemory(cmdList, dstVecY.data(), planeYImageView, &srcYRegion, nullptr, 0, nullptr)); // read U plane data SUCCESS_OR_TERMINATE(zeCommandListAppendImageCopyToMemory(cmdList, dstVecU.data(), planeUImageView, &srcURegion, nullptr, 0, nullptr)); // read V plane data SUCCESS_OR_TERMINATE(zeCommandListAppendImageCopyToMemory(cmdList, dstVecV.data(), planeVImageView, &srcVRegion, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); // validate Y plane data auto result = memcmp(srcVecY.data(), dstVecY.data(), width * height); if (result != 0) { std::cout << "Failed to validate data read for plane Y from Y-plane view" << std::endl; validRet = false; } // validate U plane data result = memcmp(srcVecU.data(), dstVecU.data(), width * height); if (result != 0 && validRet) { std::cout << "Failed to validate data read for plane U from U-plane view" << std::endl; validRet = false; } // validate V plane data result = memcmp(srcVecV.data(), dstVecV.data(), width * height); if (result != 0 && validRet) { std::cout << "Failed to validate data read for plane V from V-plane view" << std::endl; validRet = false; } // cleanup SUCCESS_OR_TERMINATE(zeImageDestroy(srcImg)); SUCCESS_OR_TERMINATE(zeImageDestroy(planeYImageView)); SUCCESS_OR_TERMINATE(zeImageDestroy(planeUImageView)); SUCCESS_OR_TERMINATE(zeImageDestroy(planeVImageView)); SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue)); } int main(int argc, char *argv[]) { ze_context_handle_t context = nullptr; auto devices = zelloInitContextAndGetDevices(context); auto device = devices[0]; bool outputValidationSuccessful = false; ze_device_properties_t deviceProperties = {}; SUCCESS_OR_TERMINATE(zeDeviceGetProperties(device, &deviceProperties)); std::cout << "Device : \n" << " * name : " << deviceProperties.name << "\n" << " * vendorId : " << std::hex << deviceProperties.vendorId << "\n"; testAppendImageViewNV12Copy(context, device, outputValidationSuccessful); testAppendImageViewRGBPCopy(context, device, outputValidationSuccessful); SUCCESS_OR_TERMINATE(zeContextDestroy(context)); std::cout << "\nZello Image View Results validation " << (outputValidationSuccessful ? "PASSED" : "FAILED") << "\n"; return (outputValidationSuccessful ? 0 : 1); } compute-runtime-22.14.22890/level_zero/core/test/black_box_tests/zello_immediate.cpp000066400000000000000000000335061422164147700304530ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include #include "zello_common.h" #include #include #include #include #include bool verbose = false; void createImmediateCommandList(ze_device_handle_t &device, ze_context_handle_t &context, uint32_t queueGroupOrdinal, bool syncMode, ze_command_list_handle_t &cmdList) { ze_command_queue_desc_t cmdQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC}; cmdQueueDesc.pNext = nullptr; cmdQueueDesc.flags = 0; cmdQueueDesc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL; cmdQueueDesc.ordinal = queueGroupOrdinal; cmdQueueDesc.index = 0; if (syncMode) { if (verbose) std::cout << "Choosing Command Queue mode synchronous" << std::endl; cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; } else { if (verbose) std::cout << "Choosing Command Queue mode asynchronous" << std::endl; cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; } SUCCESS_OR_TERMINATE(zeCommandListCreateImmediate(context, device, &cmdQueueDesc, &cmdList)); } void testCopyBetweenHostMemAndDeviceMem(ze_context_handle_t &context, ze_device_handle_t &device, bool syncMode, int32_t copyQueueGroup, bool &validRet) { const size_t allocSize = 4096 + 7; // +7 to brake alignment and make it harder char *hostBuffer = nullptr; void *deviceBuffer = nullptr; char *stackBuffer = new char[allocSize]; ze_command_list_handle_t cmdList; createImmediateCommandList(device, context, copyQueueGroup, syncMode, cmdList); ze_host_mem_alloc_desc_t hostDesc = {}; hostDesc.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC; hostDesc.pNext = nullptr; hostDesc.flags = 0; SUCCESS_OR_TERMINATE(zeMemAllocHost(context, &hostDesc, allocSize, 1, (void **)(&hostBuffer))); ze_device_mem_alloc_desc_t deviceDesc = {}; deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; deviceDesc.ordinal = 0; deviceDesc.flags = 0; deviceDesc.pNext = nullptr; SUCCESS_OR_TERMINATE(zeMemAllocDevice(context, &deviceDesc, allocSize, allocSize, device, &deviceBuffer)); for (size_t i = 0; i < allocSize; ++i) { hostBuffer[i] = static_cast(i + 1); } memset(stackBuffer, 0, allocSize); // Create Events for synchronization ze_event_pool_handle_t eventPoolDevice, eventPoolHost; uint32_t numEvents = 2; std::vector deviceEvents(numEvents), hostEvents(numEvents); createEventPoolAndEvents(context, device, eventPoolDevice, (ze_event_pool_flag_t)(0), numEvents, deviceEvents.data(), ZE_EVENT_SCOPE_FLAG_SUBDEVICE, (ze_event_scope_flag_t)0); createEventPoolAndEvents(context, device, eventPoolHost, (ze_event_pool_flag_t)(ZE_EVENT_POOL_FLAG_HOST_VISIBLE), numEvents, hostEvents.data(), ZE_EVENT_SCOPE_FLAG_HOST, (ze_event_scope_flag_t)0); // Copy from host-allocated to device-allocated memory SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, deviceBuffer, hostBuffer, allocSize, syncMode ? nullptr : deviceEvents[0], 0, nullptr)); // Copy from device-allocated memory to stack SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, stackBuffer, deviceBuffer, allocSize, syncMode ? nullptr : hostEvents[0], syncMode ? 0 : 1, syncMode ? nullptr : &deviceEvents[0])); if (!syncMode) { // If Async mode, use event for sync SUCCESS_OR_TERMINATE(zeEventHostSynchronize(hostEvents[0], std::numeric_limits::max() - 1)); } // Validate stack and xe deviceBuffers have the original data from hostBuffer validRet = (0 == memcmp(hostBuffer, stackBuffer, allocSize)); delete[] stackBuffer; for (auto event : hostEvents) { SUCCESS_OR_TERMINATE(zeEventDestroy(event)); } for (auto event : deviceEvents) { SUCCESS_OR_TERMINATE(zeEventDestroy(event)); } SUCCESS_OR_TERMINATE(zeEventPoolDestroy(eventPoolHost)); SUCCESS_OR_TERMINATE(zeEventPoolDestroy(eventPoolDevice)); SUCCESS_OR_TERMINATE(zeMemFree(context, hostBuffer)); SUCCESS_OR_TERMINATE(zeMemFree(context, deviceBuffer)); SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); } void executeGpuKernelAndValidate(ze_context_handle_t context, ze_device_handle_t &device, bool syncMode, bool &outputValidationSuccessful) { ze_command_list_handle_t cmdList; uint32_t computeOrdinal = getCommandQueueOrdinal(device); createImmediateCommandList(device, context, computeOrdinal, syncMode, cmdList); // Create two shared buffers constexpr size_t allocSize = 4096; ze_device_mem_alloc_desc_t deviceDesc = {ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC}; deviceDesc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED; deviceDesc.ordinal = 0; ze_host_mem_alloc_desc_t hostDesc = {ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC}; hostDesc.flags = ZE_HOST_MEM_ALLOC_FLAG_BIAS_UNCACHED; void *srcBuffer = nullptr; SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &srcBuffer)); void *dstBuffer = nullptr; SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &dstBuffer)); // Initialize memory constexpr uint8_t val = 55; memset(srcBuffer, val, allocSize); memset(dstBuffer, 0, allocSize); ze_module_handle_t module = nullptr; ze_kernel_handle_t kernel = nullptr; std::ifstream file("copy_buffer_to_buffer.spv", std::ios::binary); ze_event_pool_handle_t eventPoolHost; uint32_t numEvents = 2; std::vector hostEvents(numEvents); createEventPoolAndEvents(context, device, eventPoolHost, (ze_event_pool_flag_t)(ZE_EVENT_POOL_FLAG_HOST_VISIBLE), numEvents, hostEvents.data(), ZE_EVENT_SCOPE_FLAG_HOST, (ze_event_scope_flag_t)0); if (file.is_open()) { file.seekg(0, file.end); auto length = file.tellg(); file.seekg(0, file.beg); std::unique_ptr spirvInput(new char[length]); file.read(spirvInput.get(), length); ze_module_desc_t moduleDesc = {ZE_STRUCTURE_TYPE_MODULE_DESC}; ze_module_build_log_handle_t buildlog; moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; moduleDesc.pInputModule = reinterpret_cast(spirvInput.get()); moduleDesc.inputSize = length; moduleDesc.pBuildFlags = ""; if (zeModuleCreate(context, device, &moduleDesc, &module, &buildlog) != ZE_RESULT_SUCCESS) { size_t szLog = 0; zeModuleBuildLogGetString(buildlog, &szLog, nullptr); char *strLog = (char *)malloc(szLog); zeModuleBuildLogGetString(buildlog, &szLog, strLog); std::cout << "Build log:" << strLog << std::endl; free(strLog); } SUCCESS_OR_TERMINATE(zeModuleBuildLogDestroy(buildlog)); ze_kernel_desc_t kernelDesc = {ZE_STRUCTURE_TYPE_KERNEL_DESC}; kernelDesc.pKernelName = "CopyBufferToBufferBytes"; SUCCESS_OR_TERMINATE(zeKernelCreate(module, &kernelDesc, &kernel)); uint32_t groupSizeX = 32u; uint32_t groupSizeY = 1u; uint32_t groupSizeZ = 1u; SUCCESS_OR_TERMINATE(zeKernelSuggestGroupSize(kernel, allocSize, 1U, 1U, &groupSizeX, &groupSizeY, &groupSizeZ)); SUCCESS_OR_TERMINATE(zeKernelSetGroupSize(kernel, groupSizeX, groupSizeY, groupSizeZ)); uint32_t offset = 0; SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 1, sizeof(dstBuffer), &dstBuffer)); SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 0, sizeof(srcBuffer), &srcBuffer)); SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 2, sizeof(uint32_t), &offset)); SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 3, sizeof(uint32_t), &offset)); SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 4, sizeof(uint32_t), &offset)); ze_group_count_t dispatchTraits; dispatchTraits.groupCountX = allocSize / groupSizeX; dispatchTraits.groupCountY = 1u; dispatchTraits.groupCountZ = 1u; SUCCESS_OR_TERMINATE(zeCommandListAppendLaunchKernel(cmdList, kernel, &dispatchTraits, syncMode ? nullptr : hostEvents[0], 0, nullptr)); file.close(); } else { // Perform a GPU copy SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, dstBuffer, srcBuffer, allocSize, syncMode ? nullptr : hostEvents[0], 0, nullptr)); } if (!syncMode) { // If Async mode, use event for sync SUCCESS_OR_TERMINATE(zeEventHostSynchronize(hostEvents[0], std::numeric_limits::max() - 1)); } // Validate outputValidationSuccessful = true; if (memcmp(dstBuffer, srcBuffer, allocSize)) { outputValidationSuccessful = false; uint8_t *srcCharBuffer = static_cast(srcBuffer); uint8_t *dstCharBuffer = static_cast(dstBuffer); for (size_t i = 0; i < allocSize; i++) { if (srcCharBuffer[i] != dstCharBuffer[i]) { std::cout << "srcBuffer[" << i << "] = " << static_cast(srcCharBuffer[i]) << " not equal to " << "dstBuffer[" << i << "] = " << static_cast(dstCharBuffer[i]) << "\n"; break; } } } // Cleanup for (auto event : hostEvents) { SUCCESS_OR_TERMINATE(zeEventDestroy(event)); } SUCCESS_OR_TERMINATE(zeMemFree(context, dstBuffer)); SUCCESS_OR_TERMINATE(zeMemFree(context, srcBuffer)); SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); } int main(int argc, char *argv[]) { verbose = isVerbose(argc, argv); ze_context_handle_t context = nullptr; ze_driver_handle_t driverHandle = nullptr; auto devices = zelloInitContextAndGetDevices(context, driverHandle); auto device = devices[0]; ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; SUCCESS_OR_TERMINATE(zeDeviceGetProperties(device, &deviceProperties)); std::cout << "Device : \n" << " * name : " << deviceProperties.name << "\n" << " * vendorId : " << std::hex << deviceProperties.vendorId << "\n"; bool outputValidationSuccessful = true; if (outputValidationSuccessful) { //Sync mode with Compute queue std::cout << "Test case: Sync mode compute queue with Kernel launch \n"; executeGpuKernelAndValidate(context, device, true, outputValidationSuccessful); } if (outputValidationSuccessful) { //Async mode with Compute queue std::cout << "\nTest case: Async mode compute queue with Kernel launch \n"; executeGpuKernelAndValidate(context, device, false, outputValidationSuccessful); } // Find copy queue in root device, if not found, try subdevices int32_t copyQueueGroup = 0; bool copyQueueFound = false; auto copyQueueDev = devices[0]; for (auto &rd : devices) { copyQueueGroup = getCopyOnlyCommandQueueOrdinal(rd); if (copyQueueGroup >= 0) { copyQueueFound = true; copyQueueDev = rd; if (verbose) { std::cout << "\nCopy queue group found in root device\n"; } break; } } if (!copyQueueFound) { if (verbose) { std::cout << "\nNo Copy queue group found in root device. Checking subdevices now...\n"; } copyQueueGroup = 0; for (auto &rd : devices) { int subDevCount = 0; auto subdevs = zelloGetSubDevices(rd, subDevCount); if (!subDevCount) { continue; } // Find subdev that has a copy engine. If not skip tests for (auto &sd : subdevs) { copyQueueGroup = getCopyOnlyCommandQueueOrdinal(sd); if (copyQueueGroup >= 0) { copyQueueFound = true; copyQueueDev = sd; break; } } if (copyQueueFound) { if (verbose) { std::cout << "\nCopy queue group found in sub device\n"; } break; } } } if (!copyQueueFound) { std::cout << "No Copy queue group found. Skipping further test runs\n"; } else { if (outputValidationSuccessful) { //Sync mode with Copy queue std::cout << "\nTest case: Sync mode copy queue for memory copy\n"; testCopyBetweenHostMemAndDeviceMem(context, copyQueueDev, true, copyQueueGroup, outputValidationSuccessful); } if (outputValidationSuccessful) { //Async mode with Copy queue std::cout << "\nTest case: Async mode copy queue for memory copy\n"; testCopyBetweenHostMemAndDeviceMem(context, copyQueueDev, false, copyQueueGroup, outputValidationSuccessful); } } SUCCESS_OR_TERMINATE(zeContextDestroy(context)); std::cout << "\nZello Immediate Results validation " << (outputValidationSuccessful ? "PASSED" : "FAILED") << "\n"; return (outputValidationSuccessful ? 0 : 1); } compute-runtime-22.14.22890/level_zero/core/test/black_box_tests/zello_ipc_copy_dma_buf.cpp000066400000000000000000000254361422164147700320020ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "zello_common.h" #include #include #include #include #define CHILDPROCESSES 4 int sv[CHILDPROCESSES][2]; extern bool verbose; bool verbose = false; size_t allocSize = 4096 + 7; // +7 to break alignment and make it harder static int sendmsg_fd(int socket, int fd) { char sendBuf[sizeof(ze_ipc_mem_handle_t)] = {}; char cmsgBuf[CMSG_SPACE(sizeof(ze_ipc_mem_handle_t))]; struct iovec msgBuffer; msgBuffer.iov_base = sendBuf; msgBuffer.iov_len = sizeof(*sendBuf); struct msghdr msgHeader = {}; msgHeader.msg_iov = &msgBuffer; msgHeader.msg_iovlen = 1; msgHeader.msg_control = cmsgBuf; msgHeader.msg_controllen = CMSG_LEN(sizeof(fd)); struct cmsghdr *controlHeader = CMSG_FIRSTHDR(&msgHeader); controlHeader->cmsg_type = SCM_RIGHTS; controlHeader->cmsg_level = SOL_SOCKET; controlHeader->cmsg_len = CMSG_LEN(sizeof(fd)); *(int *)CMSG_DATA(controlHeader) = fd; ssize_t bytesSent = sendmsg(socket, &msgHeader, 0); if (bytesSent < 0) { return -1; } return 0; } static int recvmsg_fd(int socket) { int fd = -1; char recvBuf[sizeof(ze_ipc_mem_handle_t)] = {}; char cmsgBuf[CMSG_SPACE(sizeof(ze_ipc_mem_handle_t))]; struct iovec msgBuffer; msgBuffer.iov_base = recvBuf; msgBuffer.iov_len = sizeof(recvBuf); struct msghdr msgHeader = {}; msgHeader.msg_iov = &msgBuffer; msgHeader.msg_iovlen = 1; msgHeader.msg_control = cmsgBuf; msgHeader.msg_controllen = CMSG_LEN(sizeof(fd)); ssize_t bytesSent = recvmsg(socket, &msgHeader, 0); if (bytesSent < 0) { return -1; } struct cmsghdr *controlHeader = CMSG_FIRSTHDR(&msgHeader); if (CMSG_DATA(controlHeader) == nullptr) { return -1; } memmove(&fd, CMSG_DATA(controlHeader), sizeof(int)); return fd; } inline void initializeProcess(ze_context_handle_t &context, ze_device_handle_t &device, ze_command_queue_handle_t &cmdQueue, ze_command_list_handle_t &cmdList) { SUCCESS_OR_TERMINATE(zeInit(ZE_INIT_FLAG_GPU_ONLY)); // Retrieve driver uint32_t driverCount = 0; SUCCESS_OR_TERMINATE(zeDriverGet(&driverCount, nullptr)); ze_driver_handle_t driverHandle; SUCCESS_OR_TERMINATE(zeDriverGet(&driverCount, &driverHandle)); ze_context_desc_t contextDesc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC}; SUCCESS_OR_TERMINATE(zeContextCreate(driverHandle, &contextDesc, &context)); // Retrieve device uint32_t deviceCount = 0; SUCCESS_OR_TERMINATE(zeDeviceGet(driverHandle, &deviceCount, nullptr)); deviceCount = 1; SUCCESS_OR_TERMINATE(zeDeviceGet(driverHandle, &deviceCount, &device)); // Print some properties ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; SUCCESS_OR_TERMINATE(zeDeviceGetProperties(device, &deviceProperties)); std::cout << "Device : \n" << " * name : " << deviceProperties.name << "\n" << " * vendorId : " << std::hex << deviceProperties.vendorId << "\n"; // Create command queue uint32_t numQueueGroups = 0; SUCCESS_OR_TERMINATE(zeDeviceGetCommandQueueGroupProperties(device, &numQueueGroups, nullptr)); if (numQueueGroups == 0) { std::cerr << "No queue groups found!\n"; std::terminate(); } std::vector queueProperties(numQueueGroups); for (auto &queueProperty : queueProperties) { queueProperty.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_GROUP_PROPERTIES; } SUCCESS_OR_TERMINATE(zeDeviceGetCommandQueueGroupProperties(device, &numQueueGroups, queueProperties.data())); ze_command_queue_desc_t cmdQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC}; for (uint32_t i = 0; i < numQueueGroups; i++) { if (queueProperties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) { cmdQueueDesc.ordinal = i; } } cmdQueueDesc.index = 0; cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; SUCCESS_OR_TERMINATE(zeCommandQueueCreate(context, device, &cmdQueueDesc, &cmdQueue)); // Create command list ze_command_list_desc_t cmdListDesc = {ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC}; cmdListDesc.commandQueueGroupOrdinal = cmdQueueDesc.ordinal; SUCCESS_OR_TERMINATE(zeCommandListCreate(context, device, &cmdListDesc, &cmdList)); } void run_client(int commSocket, uint32_t clientId) { std::cout << "Client " << clientId << ", process ID: " << std::dec << getpid() << "\n"; ze_context_handle_t context; ze_device_handle_t device; ze_command_queue_handle_t cmdQueue; ze_command_list_handle_t cmdList; initializeProcess(context, device, cmdQueue, cmdList); char *heapBuffer = new char[allocSize]; for (size_t i = 0; i < allocSize; ++i) { heapBuffer[i] = static_cast(i + 1); } // get the dma_buf from the other process int dma_buf_fd = recvmsg_fd(commSocket); if (dma_buf_fd < 0) { std::cerr << "Failing to get dma_buf fd from server\n"; std::terminate(); } ze_ipc_mem_handle_t pIpcHandle; memcpy(&pIpcHandle, static_cast(&dma_buf_fd), sizeof(dma_buf_fd)); // get a memory pointer to the BO associated with the dma_buf void *zeIpcBuffer; SUCCESS_OR_TERMINATE(zeMemOpenIpcHandle(context, device, pIpcHandle, 0u, &zeIpcBuffer)); // Copy from heap to IPC buffer memory SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, zeIpcBuffer, heapBuffer, allocSize, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); SUCCESS_OR_TERMINATE(zeMemCloseIpcHandle(context, zeIpcBuffer)); SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue)); SUCCESS_OR_TERMINATE(zeContextDestroy(context)); delete[] heapBuffer; } void run_server(bool &validRet) { std::cout << "Server process ID " << std::dec << getpid() << "\n"; ze_context_handle_t context; ze_device_handle_t device; ze_command_queue_handle_t cmdQueue; ze_command_list_handle_t cmdList; initializeProcess(context, device, cmdQueue, cmdList); void *zeBuffer = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC}; SUCCESS_OR_TERMINATE(zeMemAllocDevice(context, &deviceDesc, allocSize, allocSize, device, &zeBuffer)); for (uint32_t i = 0; i < CHILDPROCESSES; i++) { // Initialize the IPC buffer int value = 3; SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryFill(cmdList, zeBuffer, reinterpret_cast(&value), sizeof(value), allocSize, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); SUCCESS_OR_TERMINATE(zeCommandListReset(cmdList)); // Get a dma_buf for the previously allocated pointer ze_ipc_mem_handle_t pIpcHandle; SUCCESS_OR_TERMINATE(zeMemGetIpcHandle(context, zeBuffer, &pIpcHandle)); // Pass the dma_buf to the other process int dma_buf_fd; memcpy(static_cast(&dma_buf_fd), &pIpcHandle, sizeof(dma_buf_fd)); int commSocket = sv[i][0]; if (sendmsg_fd(commSocket, static_cast(dma_buf_fd)) < 0) { std::cerr << "Failing to send dma_buf fd to client\n"; std::terminate(); } char *heapBuffer = new char[allocSize]; for (size_t i = 0; i < allocSize; ++i) { heapBuffer[i] = static_cast(i + 1); } // Wait for child to exit int child_status; pid_t clientPId = wait(&child_status); if (clientPId <= 0) { std::cerr << "Client terminated abruptly with error code " << strerror(errno) << "\n"; std::terminate(); } void *validateBuffer = nullptr; ze_host_mem_alloc_desc_t hostDesc = {ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC}; SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &validateBuffer)); value = 5; SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryFill(cmdList, validateBuffer, reinterpret_cast(&value), sizeof(value), allocSize, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList, nullptr, 0, nullptr)); // Copy from device-allocated memory SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, validateBuffer, zeBuffer, allocSize, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); // Validate stack and buffers have the original data from heapBuffer validRet = (0 == memcmp(heapBuffer, validateBuffer, allocSize)); delete[] heapBuffer; } SUCCESS_OR_TERMINATE(zeMemFree(context, zeBuffer)); SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue)); SUCCESS_OR_TERMINATE(zeContextDestroy(context)); } int main(int argc, char *argv[]) { verbose = isVerbose(argc, argv); bool outputValidationSuccessful; for (uint32_t i = 0; i < CHILDPROCESSES; i++) { if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv[i]) < 0) { perror("socketpair"); exit(1); } } pid_t childPids[CHILDPROCESSES]; for (uint32_t i = 0; i < CHILDPROCESSES; i++) { childPids[i] = fork(); if (childPids[i] < 0) { perror("fork"); exit(1); } else if (childPids[i] == 0) { close(sv[i][0]); run_client(sv[i][1], i); close(sv[i][1]); exit(0); } } run_server(outputValidationSuccessful); std::cout << "\nZello IPC Results validation " << (outputValidationSuccessful ? "PASSED" : "FAILED") << std::endl; return 0; } compute-runtime-22.14.22890/level_zero/core/test/black_box_tests/zello_ipc_copy_dma_buf_p2p.cpp000066400000000000000000000336011422164147700325540ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "zello_common.h" #include #include #include #include extern bool verbose; bool verbose = false; bool useCopyEngine = false; uint8_t uinitializedPattern = 1; uint8_t expectedPattern = 7; size_t allocSize = 4096 + 7; // +7 to break alignment and make it harder static int sendmsg_fd(int socket, int fd) { char sendBuf[sizeof(ze_ipc_mem_handle_t)] = {}; char cmsgBuf[CMSG_SPACE(sizeof(ze_ipc_mem_handle_t))]; struct iovec msgBuffer; msgBuffer.iov_base = sendBuf; msgBuffer.iov_len = sizeof(*sendBuf); struct msghdr msgHeader = {}; msgHeader.msg_iov = &msgBuffer; msgHeader.msg_iovlen = 1; msgHeader.msg_control = cmsgBuf; msgHeader.msg_controllen = CMSG_LEN(sizeof(fd)); struct cmsghdr *controlHeader = CMSG_FIRSTHDR(&msgHeader); controlHeader->cmsg_type = SCM_RIGHTS; controlHeader->cmsg_level = SOL_SOCKET; controlHeader->cmsg_len = CMSG_LEN(sizeof(fd)); *(int *)CMSG_DATA(controlHeader) = fd; ssize_t bytesSent = sendmsg(socket, &msgHeader, 0); if (bytesSent < 0) { return -1; } return 0; } static int recvmsg_fd(int socket) { int fd = -1; char recvBuf[sizeof(ze_ipc_mem_handle_t)] = {}; char cmsgBuf[CMSG_SPACE(sizeof(ze_ipc_mem_handle_t))]; struct iovec msgBuffer; msgBuffer.iov_base = recvBuf; msgBuffer.iov_len = sizeof(recvBuf); struct msghdr msgHeader = {}; msgHeader.msg_iov = &msgBuffer; msgHeader.msg_iovlen = 1; msgHeader.msg_control = cmsgBuf; msgHeader.msg_controllen = CMSG_LEN(sizeof(fd)); ssize_t bytesSent = recvmsg(socket, &msgHeader, 0); if (bytesSent < 0) { return -1; } struct cmsghdr *controlHeader = CMSG_FIRSTHDR(&msgHeader); if (CMSG_DATA(controlHeader) == nullptr) { return -1; } memmove(&fd, CMSG_DATA(controlHeader), sizeof(int)); return fd; } inline void initializeProcess(ze_context_handle_t &context, ze_device_handle_t &device, ze_command_queue_handle_t &cmdQueue, ze_command_list_handle_t &cmdList, ze_command_queue_handle_t &cmdQueueCopy, ze_command_list_handle_t &cmdListCopy, bool isServer) { SUCCESS_OR_TERMINATE(zeInit(ZE_INIT_FLAG_GPU_ONLY)); // Retrieve driver uint32_t driverCount = 0; SUCCESS_OR_TERMINATE(zeDriverGet(&driverCount, nullptr)); ze_driver_handle_t driverHandle; SUCCESS_OR_TERMINATE(zeDriverGet(&driverCount, &driverHandle)); ze_context_desc_t contextDesc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC}; SUCCESS_OR_TERMINATE(zeContextCreate(driverHandle, &contextDesc, &context)); // Retrieve device uint32_t deviceCount = 0; SUCCESS_OR_TERMINATE(zeDeviceGet(driverHandle, &deviceCount, nullptr)); std::cout << "Number of devices found: " << deviceCount << "\n"; std::vector devices(deviceCount); SUCCESS_OR_TERMINATE(zeDeviceGet(driverHandle, &deviceCount, devices.data())); // Make the server use device0 and the client device1 if available if (deviceCount > 1) { ze_bool_t canAccessPeer = false; SUCCESS_OR_TERMINATE(zeDeviceCanAccessPeer(devices[0], devices[1], &canAccessPeer)); if (canAccessPeer == false) { std::cerr << "Two devices found but no P2P capabilities detected\n"; std::terminate(); } else { std::cerr << "Two devices found and P2P capabilities detected\n"; } } if (isServer) { device = devices[0]; std::cout << "Server using device 0\n"; } else { if (deviceCount > 1) { device = devices[1]; std::cout << "Client using device 1\n"; } else { device = devices[0]; std::cout << "Client using device 0\n"; } } // Print some properties ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; SUCCESS_OR_TERMINATE(zeDeviceGetProperties(device, &deviceProperties)); std::cout << "Device : \n" << " * name : " << deviceProperties.name << "\n" << " * vendorId : " << std::hex << deviceProperties.vendorId << "\n"; // Create command queue uint32_t numQueueGroups = 0; SUCCESS_OR_TERMINATE(zeDeviceGetCommandQueueGroupProperties(device, &numQueueGroups, nullptr)); if (numQueueGroups == 0) { std::cerr << "No queue groups found!\n"; std::terminate(); } std::vector queueProperties(numQueueGroups); for (auto &queueProperty : queueProperties) { queueProperty.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_GROUP_PROPERTIES; } SUCCESS_OR_TERMINATE(zeDeviceGetCommandQueueGroupProperties(device, &numQueueGroups, queueProperties.data())); ze_command_queue_desc_t cmdQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC}; ze_command_queue_desc_t cmdQueueDescCopy = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC}; for (uint32_t i = 0; i < numQueueGroups; i++) { if (queueProperties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) { cmdQueueDesc.ordinal = i; break; } } uint32_t copyOrdinal = std::numeric_limits::max(); if (useCopyEngine) { for (uint32_t i = 0; i < numQueueGroups; i++) { if ((queueProperties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) == 0 && (queueProperties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY)) { copyOrdinal = i; break; } } } if (copyOrdinal == std::numeric_limits::max()) { std::cout << "Using EUs for copies\n"; cmdQueueDescCopy.ordinal = cmdQueueDesc.ordinal; } else { std::cout << "Using copy engines for copies\n"; cmdQueueDescCopy.ordinal = copyOrdinal; } std::cout << (isServer ? "Server " : "Client ") << " using queues " << cmdQueueDescCopy.ordinal << " and " << cmdQueueDesc.ordinal << "\n"; cmdQueueDesc.index = 0; cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; SUCCESS_OR_TERMINATE(zeCommandQueueCreate(context, device, &cmdQueueDesc, &cmdQueue)); cmdQueueDescCopy.index = 0; cmdQueueDescCopy.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; SUCCESS_OR_TERMINATE(zeCommandQueueCreate(context, device, &cmdQueueDescCopy, &cmdQueueCopy)); // Create command list ze_command_list_desc_t cmdListDesc = {ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC}; cmdListDesc.commandQueueGroupOrdinal = cmdQueueDesc.ordinal; SUCCESS_OR_TERMINATE(zeCommandListCreate(context, device, &cmdListDesc, &cmdList)); ze_command_list_desc_t cmdListDescCopy = {ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC}; cmdListDescCopy.commandQueueGroupOrdinal = cmdQueueDescCopy.ordinal; SUCCESS_OR_TERMINATE(zeCommandListCreate(context, device, &cmdListDescCopy, &cmdListCopy)); } void run_client(int commSocket) { std::cout << "Client process " << std::dec << getpid() << "\n"; ze_context_handle_t context; ze_device_handle_t device; ze_command_queue_handle_t cmdQueue; ze_command_list_handle_t cmdList; ze_command_queue_handle_t cmdQueueCopy; ze_command_list_handle_t cmdListCopy; initializeProcess(context, device, cmdQueue, cmdList, cmdQueueCopy, cmdListCopy, false); void *zeBuffer = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC}; SUCCESS_OR_TERMINATE(zeMemAllocDevice(context, &deviceDesc, allocSize, allocSize, device, &zeBuffer)); SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryFill(cmdList, zeBuffer, reinterpret_cast(&expectedPattern), sizeof(expectedPattern), allocSize, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); // get the dma_buf from the other process int dma_buf_fd = recvmsg_fd(commSocket); if (dma_buf_fd < 0) { std::cerr << "Failing to get dma_buf fd from server\n"; std::terminate(); } ze_ipc_mem_handle_t pIpcHandle; memcpy(&pIpcHandle, static_cast(&dma_buf_fd), sizeof(dma_buf_fd)); // get a memory pointer to the BO associated with the dma_buf void *zeIpcBuffer; SUCCESS_OR_TERMINATE(zeMemOpenIpcHandle(context, device, pIpcHandle, 0u, &zeIpcBuffer)); // Copy from client to server SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdListCopy, zeIpcBuffer, zeBuffer, allocSize, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListClose(cmdListCopy)); SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueueCopy, 1, &cmdListCopy, nullptr)); SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueueCopy, std::numeric_limits::max())); SUCCESS_OR_TERMINATE(zeMemCloseIpcHandle(context, zeIpcBuffer)); SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue)); SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdListCopy)); SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueueCopy)); SUCCESS_OR_TERMINATE(zeMemFree(context, zeBuffer)); SUCCESS_OR_TERMINATE(zeContextDestroy(context)); } void run_server(int commSocket, bool &validRet) { std::cout << "Server process " << std::dec << getpid() << "\n"; ze_context_handle_t context; ze_device_handle_t device; ze_command_queue_handle_t cmdQueue; ze_command_list_handle_t cmdList; ze_command_queue_handle_t cmdQueueCopy; ze_command_list_handle_t cmdListCopy; initializeProcess(context, device, cmdQueue, cmdList, cmdQueueCopy, cmdListCopy, true); void *zeBuffer = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC}; SUCCESS_OR_TERMINATE(zeMemAllocDevice(context, &deviceDesc, allocSize, allocSize, device, &zeBuffer)); // Initialize the IPC buffer SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryFill(cmdList, zeBuffer, reinterpret_cast(&uinitializedPattern), sizeof(uinitializedPattern), allocSize, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); SUCCESS_OR_TERMINATE(zeCommandListReset(cmdList)); // Get a dma_buf for the previously allocated pointer ze_ipc_mem_handle_t pIpcHandle; SUCCESS_OR_TERMINATE(zeMemGetIpcHandle(context, zeBuffer, &pIpcHandle)); // Pass the dma_buf to the other process int dma_buf_fd; memcpy(static_cast(&dma_buf_fd), &pIpcHandle, sizeof(dma_buf_fd)); if (sendmsg_fd(commSocket, static_cast(dma_buf_fd)) < 0) { std::cerr << "Failing to send dma_buf fd to client\n"; std::terminate(); } char *heapBuffer = new char[allocSize]; for (size_t i = 0; i < allocSize; ++i) { heapBuffer[i] = expectedPattern; } // Wait for child to exit int child_status; pid_t clientPId = wait(&child_status); if (clientPId <= 0) { std::cerr << "Client terminated abruptly with error code " << strerror(errno) << "\n"; std::terminate(); } void *validateBuffer = nullptr; ze_host_mem_alloc_desc_t hostDesc = {ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC}; SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &validateBuffer)); SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryFill(cmdList, validateBuffer, reinterpret_cast(&uinitializedPattern), sizeof(uinitializedPattern), allocSize, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList, nullptr, 0, nullptr)); // Copy from device-allocated memory SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, validateBuffer, zeBuffer, allocSize, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); // Validate stack and buffers have the original data from heapBuffer validRet = (0 == memcmp(heapBuffer, validateBuffer, allocSize)); delete[] heapBuffer; SUCCESS_OR_TERMINATE(zeMemFree(context, zeBuffer)); SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue)); SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdListCopy)); SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueueCopy)); SUCCESS_OR_TERMINATE(zeContextDestroy(context)); } int main(int argc, char *argv[]) { verbose = isVerbose(argc, argv); bool outputValidationSuccessful; useCopyEngine = isParamEnabled(argc, argv, "-c", "--copyengine"); int sv[2]; if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) < 0) { perror("socketpair"); exit(1); } int child = fork(); if (child < 0) { perror("fork"); exit(1); } else if (0 == child) { close(sv[0]); run_client(sv[1]); close(sv[1]); exit(0); } else { close(sv[1]); run_server(sv[0], outputValidationSuccessful); close(sv[0]); } std::cout << "\nZello IPC P2P Results validation " << (outputValidationSuccessful ? "PASSED" : "FAILED") << std::endl; return 0; } compute-runtime-22.14.22890/level_zero/core/test/black_box_tests/zello_ipc_event.cpp000066400000000000000000000360511422164147700304670ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "zello_common.h" #include #include #include #include #define CHILDPROCESSES 1 int sv[CHILDPROCESSES][2]; extern bool verbose; bool verbose = false; size_t allocSize = 4096 + 7; // +7 to break alignment and make it harder // Helpers to send and receive the IPC handles. // // L0 uses a vector of ZE_MAX_IPC_HANDLE_SIZE bytes to send the IPC handle // char data[ZE_MAX_IPC_HANDLE_SIZE]; // First four bytes (which is the sizeof(int)) of it contain the file descriptor // associated with the dma-buf, // Rest is payload to communicate extra info to the other processes. // For instance, the payload in the event pool's IPC handle contains the // number of events the pool has. static int sendmsgForIpcHandle(int socket, char *payload) { int fd = 0; memcpy(&fd, payload, sizeof(fd)); char sendBuf[ZE_MAX_IPC_HANDLE_SIZE] = {}; memcpy(sendBuf, payload + sizeof(int), sizeof(sendBuf) - sizeof(int)); char cmsgBuf[CMSG_SPACE(ZE_MAX_IPC_HANDLE_SIZE)]; struct iovec msgBuffer = {}; msgBuffer.iov_base = sendBuf; msgBuffer.iov_len = sizeof(*sendBuf); struct msghdr msgHeader = {}; msgHeader.msg_iov = &msgBuffer; msgHeader.msg_iovlen = 1; msgHeader.msg_control = cmsgBuf; msgHeader.msg_controllen = CMSG_LEN(sizeof(fd)); struct cmsghdr *controlHeader = CMSG_FIRSTHDR(&msgHeader); controlHeader->cmsg_type = SCM_RIGHTS; controlHeader->cmsg_level = SOL_SOCKET; controlHeader->cmsg_len = CMSG_LEN(sizeof(fd)); *(int *)CMSG_DATA(controlHeader) = fd; ssize_t bytesSent = sendmsg(socket, &msgHeader, 0); if (bytesSent < 0) { std::cerr << "Error on sendmsgForIpcHandle " << strerror(errno) << "\n"; return -1; } return 0; } static int recvmsgForIpcHandle(int socket, char *payload) { int fd = -1; char recvBuf[ZE_MAX_IPC_HANDLE_SIZE] = {}; char cmsgBuf[CMSG_SPACE(ZE_MAX_IPC_HANDLE_SIZE)]; struct iovec msgBuffer; msgBuffer.iov_base = recvBuf; msgBuffer.iov_len = sizeof(recvBuf); struct msghdr msgHeader = {}; msgHeader.msg_iov = &msgBuffer; msgHeader.msg_iovlen = 1; msgHeader.msg_control = cmsgBuf; msgHeader.msg_controllen = CMSG_LEN(sizeof(fd)); ssize_t bytesSent = recvmsg(socket, &msgHeader, 0); if (bytesSent < 0) { std::cerr << "Error on recvmsgForIpcHandle " << strerror(errno) << "\n"; return -1; } struct cmsghdr *controlHeader = CMSG_FIRSTHDR(&msgHeader); memcpy(&fd, CMSG_DATA(controlHeader), sizeof(int)); memcpy(payload, &fd, sizeof(fd)); memcpy(payload + sizeof(int), recvBuf, sizeof(recvBuf) - sizeof(int)); return 0; } inline void initializeProcess(ze_context_handle_t &context, ze_device_handle_t &device, ze_command_queue_handle_t &cmdQueue, ze_command_list_handle_t &cmdList) { SUCCESS_OR_TERMINATE(zeInit(ZE_INIT_FLAG_GPU_ONLY)); // Retrieve driver uint32_t driverCount = 0; SUCCESS_OR_TERMINATE(zeDriverGet(&driverCount, nullptr)); ze_driver_handle_t driverHandle; SUCCESS_OR_TERMINATE(zeDriverGet(&driverCount, &driverHandle)); ze_context_desc_t contextDesc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC}; SUCCESS_OR_TERMINATE(zeContextCreate(driverHandle, &contextDesc, &context)); // Retrieve device uint32_t deviceCount = 0; SUCCESS_OR_TERMINATE(zeDeviceGet(driverHandle, &deviceCount, nullptr)); deviceCount = 1; SUCCESS_OR_TERMINATE(zeDeviceGet(driverHandle, &deviceCount, &device)); // Print some properties ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; SUCCESS_OR_TERMINATE(zeDeviceGetProperties(device, &deviceProperties)); std::cout << "Device : \n" << " * name : " << deviceProperties.name << "\n" << " * vendorId : " << std::hex << deviceProperties.vendorId << "\n"; // Create command queue uint32_t numQueueGroups = 0; SUCCESS_OR_TERMINATE(zeDeviceGetCommandQueueGroupProperties(device, &numQueueGroups, nullptr)); if (numQueueGroups == 0) { std::cerr << "No queue groups found!\n"; std::terminate(); } std::vector queueProperties(numQueueGroups); for (auto &queueProperty : queueProperties) { queueProperty.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_GROUP_PROPERTIES; } SUCCESS_OR_TERMINATE(zeDeviceGetCommandQueueGroupProperties(device, &numQueueGroups, queueProperties.data())); ze_command_queue_desc_t cmdQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC}; for (uint32_t i = 0; i < numQueueGroups; i++) { if (queueProperties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) { cmdQueueDesc.ordinal = i; } } cmdQueueDesc.index = 0; cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; SUCCESS_OR_TERMINATE(zeCommandQueueCreate(context, device, &cmdQueueDesc, &cmdQueue)); // Create command list ze_command_list_desc_t cmdListDesc = {ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC}; cmdListDesc.commandQueueGroupOrdinal = cmdQueueDesc.ordinal; SUCCESS_OR_TERMINATE(zeCommandListCreate(context, device, &cmdListDesc, &cmdList)); } void run_client(int commSocket, uint32_t clientId) { std::cout << "Client " << clientId << ", process ID: " << std::dec << getpid() << "\n"; ze_context_handle_t context; ze_device_handle_t device; ze_command_queue_handle_t cmdQueue; ze_command_list_handle_t cmdList; initializeProcess(context, device, cmdQueue, cmdList); char *heapBuffer = new char[allocSize]; for (size_t i = 0; i < allocSize; ++i) { heapBuffer[i] = static_cast(i + 1); } // receieve the IPC handle for the memory from the other process ze_ipc_mem_handle_t pIpcHandle = {}; int ret = recvmsgForIpcHandle(commSocket, pIpcHandle.data); if (ret < 0) { std::cerr << "Failing to get IPC memory handle from server\n"; std::terminate(); } // get the allocation associated with the IPC handle void *zeIpcBuffer; SUCCESS_OR_TERMINATE(zeMemOpenIpcHandle(context, device, pIpcHandle, 0u, &zeIpcBuffer)); // receieve the IPC handle for the event pool from the other process ze_ipc_event_pool_handle_t pIpcEventPoolHandle = {}; ret = recvmsgForIpcHandle(commSocket, pIpcEventPoolHandle.data); if (ret < 0) { std::cerr << "Failing to get IPC event pool handle from server\n"; std::terminate(); } // get the event pool associated with the IPC handle ze_event_pool_handle_t eventPool = {}; SUCCESS_OR_TERMINATE(zeEventPoolOpenIpcHandle(context, pIpcEventPoolHandle, &eventPool)); // get the number of events from the payload uint32_t numEvents = 0; memcpy(&numEvents, pIpcEventPoolHandle.data + sizeof(int), sizeof(int)); std::vector events(numEvents); uint32_t i = 0; ze_event_desc_t eventDesc = {ZE_STRUCTURE_TYPE_EVENT_DESC}; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; for (auto &event : events) { eventDesc.index = i++; SUCCESS_OR_TERMINATE(zeEventCreate(eventPool, &eventDesc, &event)); SUCCESS_OR_TERMINATE(zeEventHostReset(event)); ze_result_t eventStatus = zeEventQueryStatus(event); if (eventStatus != ZE_RESULT_NOT_READY) { std::cerr << "Event reset in clinent failed\n"; std::terminate(); } } // Copy from heap to IPC buffer memory SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, zeIpcBuffer, heapBuffer, allocSize, events[0], 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListAppendWaitOnEvents(cmdList, 1, &events[1])); SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); for (auto &event : events) { ze_result_t eventStatus = zeEventQueryStatus(event); if (eventStatus != ZE_RESULT_SUCCESS) { std::cerr << "Event status in client not correct\n"; std::terminate(); } SUCCESS_OR_TERMINATE(zeEventDestroy(event)); } SUCCESS_OR_TERMINATE(zeEventPoolCloseIpcHandle(eventPool)); SUCCESS_OR_TERMINATE(zeMemCloseIpcHandle(context, zeIpcBuffer)); SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue)); SUCCESS_OR_TERMINATE(zeContextDestroy(context)); delete[] heapBuffer; } void run_server(bool &validRet) { std::cout << "Server process ID " << std::dec << getpid() << "\n"; ze_context_handle_t context; ze_device_handle_t device; ze_command_queue_handle_t cmdQueue; ze_command_list_handle_t cmdList; initializeProcess(context, device, cmdQueue, cmdList); void *zeBuffer = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC}; SUCCESS_OR_TERMINATE(zeMemAllocDevice(context, &deviceDesc, allocSize, allocSize, device, &zeBuffer)); uint32_t numEvents = 2; ze_event_pool_handle_t eventPool = {}; ze_event_pool_desc_t eventPoolDesc = {ZE_STRUCTURE_TYPE_EVENT_POOL_DESC}; eventPoolDesc.count = numEvents; eventPoolDesc.flags = {}; SUCCESS_OR_TERMINATE(zeEventPoolCreate(context, &eventPoolDesc, 1, &device, &eventPool)); std::vector events(numEvents); uint32_t i = 0; ze_event_desc_t eventDesc = {ZE_STRUCTURE_TYPE_EVENT_DESC}; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; for (auto &event : events) { eventDesc.index = i++; SUCCESS_OR_TERMINATE(zeEventCreate(eventPool, &eventDesc, &event)); SUCCESS_OR_TERMINATE(zeEventHostReset(event)); ze_result_t eventStatus = zeEventQueryStatus(event); if (eventStatus != ZE_RESULT_NOT_READY) { std::cerr << "Event status in server before starting not correct\n"; std::terminate(); } } for (uint32_t i = 0; i < CHILDPROCESSES; i++) { // Initialize the IPC buffer char value = 3; SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryFill(cmdList, zeBuffer, reinterpret_cast(&value), sizeof(value), allocSize, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); SUCCESS_OR_TERMINATE(zeCommandListReset(cmdList)); // Get the IPC handle for the previously allocated pointer ze_ipc_mem_handle_t pIpcHandle; SUCCESS_OR_TERMINATE(zeMemGetIpcHandle(context, zeBuffer, &pIpcHandle)); // Pass the IPC handle to the other process int commSocket = sv[i][0]; int ret = sendmsgForIpcHandle(commSocket, pIpcHandle.data); if (ret < 0) { std::cerr << "Failing to send IPC memory handle to client\n"; std::terminate(); } // Get the IPC handle for the event pool ze_ipc_event_pool_handle_t pIpcEventPoolHandle; SUCCESS_OR_TERMINATE(zeEventPoolGetIpcHandle(eventPool, &pIpcEventPoolHandle)); // Pass the IPC handle to the other process ret = sendmsgForIpcHandle(commSocket, pIpcEventPoolHandle.data); if (ret) { std::cerr << "Failing to send IPC event pool handle to client\n"; std::terminate(); } // Initialize buffer char *heapBuffer = new char[allocSize]; for (size_t i = 0; i < allocSize; ++i) { heapBuffer[i] = static_cast(i + 1); } void *validateBuffer = nullptr; ze_host_mem_alloc_desc_t hostDesc = {ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC}; SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &validateBuffer)); value = 5; SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryFill(cmdList, validateBuffer, reinterpret_cast(&value), sizeof(value), allocSize, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList, nullptr, 0, nullptr)); // Copy from device-allocated memory SUCCESS_OR_TERMINATE(zeCommandListAppendWaitOnEvents(cmdList, 1, &events[0])); SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, validateBuffer, zeBuffer, allocSize, events[1], 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); // Validate stack and buffers have the original data from heapBuffer validRet = (0 == memcmp(heapBuffer, validateBuffer, allocSize)); char *valBuffer = (char *)validateBuffer; if (verbose) { for (uint32_t i = 0; i < allocSize; i++) { printf("valBuffer[%d] = %d heapBuffer[%d] = %d\n", i, valBuffer[i], i, heapBuffer[i]); } } delete[] heapBuffer; } for (auto &event : events) { ze_result_t eventStatus = zeEventQueryStatus(event); if (eventStatus != ZE_RESULT_SUCCESS) { std::cerr << "Event status in server after finishing not correct\n"; std::terminate(); } SUCCESS_OR_TERMINATE(zeEventDestroy(event)); } SUCCESS_OR_TERMINATE(zeEventPoolDestroy(eventPool)); SUCCESS_OR_TERMINATE(zeMemFree(context, zeBuffer)); SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue)); SUCCESS_OR_TERMINATE(zeContextDestroy(context)); } int main(int argc, char *argv[]) { verbose = isVerbose(argc, argv); bool outputValidationSuccessful; if (verbose) { allocSize = 16; } for (uint32_t i = 0; i < CHILDPROCESSES; i++) { if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv[i]) < 0) { perror("socketpair"); exit(1); } } pid_t childPids[CHILDPROCESSES]; for (uint32_t i = 0; i < CHILDPROCESSES; i++) { childPids[i] = fork(); if (childPids[i] < 0) { perror("fork"); exit(1); } else if (childPids[i] == 0) { close(sv[i][0]); run_client(sv[i][1], i); close(sv[i][1]); exit(0); } } run_server(outputValidationSuccessful); std::cout << "\nZello IPC Results validation " << (outputValidationSuccessful ? "PASSED" : "FAILED") << std::endl; return (outputValidationSuccessful ? 0 : 1); } compute-runtime-22.14.22890/level_zero/core/test/black_box_tests/zello_ipc_p2p_with_ipc_event.cpp000066400000000000000000000403041422164147700331320ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "zello_common.h" #include #include #include #include extern bool verbose; bool verbose = false; uint8_t uinitializedPattern = 1; uint8_t expectedPattern = 7; size_t allocSize = 4096 + 7; // +7 to break alignment and make it harder uint32_t serverDevice = 0; uint32_t clientDevice = 1; static int sendmsg_fd(int socket, int fd, char *payload, size_t payloadLen) { char sendBuf[sizeof(ze_ipc_mem_handle_t)] = {}; char cmsgBuf[CMSG_SPACE(sizeof(ze_ipc_mem_handle_t))]; struct iovec msgBuffer; msgBuffer.iov_base = payload; msgBuffer.iov_len = payloadLen; struct msghdr msgHeader = {}; msgHeader.msg_iov = &msgBuffer; msgHeader.msg_iovlen = 1; msgHeader.msg_control = cmsgBuf; msgHeader.msg_controllen = CMSG_LEN(sizeof(fd)); struct cmsghdr *controlHeader = CMSG_FIRSTHDR(&msgHeader); controlHeader->cmsg_type = SCM_RIGHTS; controlHeader->cmsg_level = SOL_SOCKET; controlHeader->cmsg_len = CMSG_LEN(sizeof(fd)); *(int *)CMSG_DATA(controlHeader) = fd; ssize_t bytesSent = sendmsg(socket, &msgHeader, 0); if (bytesSent < 0) { return -1; } return 0; } static int recvmsg_fd(int socket, char *payload, size_t payloadLen) { int fd = -1; char recvBuf[sizeof(ze_ipc_mem_handle_t)] = {}; char cmsgBuf[CMSG_SPACE(sizeof(ze_ipc_mem_handle_t))]; struct iovec msgBuffer; msgBuffer.iov_base = payload; msgBuffer.iov_len = payloadLen; struct msghdr msgHeader = {}; msgHeader.msg_iov = &msgBuffer; msgHeader.msg_iovlen = 1; msgHeader.msg_control = cmsgBuf; msgHeader.msg_controllen = CMSG_LEN(sizeof(fd)); ssize_t bytesSent = recvmsg(socket, &msgHeader, 0); if (bytesSent < 0) { return -1; } struct cmsghdr *controlHeader = CMSG_FIRSTHDR(&msgHeader); memmove(&fd, CMSG_DATA(controlHeader), sizeof(int)); return fd; } inline void initializeProcess(ze_context_handle_t &context, ze_device_handle_t &device, ze_command_queue_handle_t &cmdQueue, ze_command_list_handle_t &cmdList, ze_command_queue_handle_t &cmdQueueCopy, ze_command_list_handle_t &cmdListCopy, bool isServer) { SUCCESS_OR_TERMINATE(zeInit(ZE_INIT_FLAG_GPU_ONLY)); // Retrieve driver uint32_t driverCount = 0; SUCCESS_OR_TERMINATE(zeDriverGet(&driverCount, nullptr)); ze_driver_handle_t driverHandle; SUCCESS_OR_TERMINATE(zeDriverGet(&driverCount, &driverHandle)); ze_context_desc_t contextDesc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC}; SUCCESS_OR_TERMINATE(zeContextCreate(driverHandle, &contextDesc, &context)); // Retrieve device uint32_t deviceCount = 0; SUCCESS_OR_TERMINATE(zeDeviceGet(driverHandle, &deviceCount, nullptr)); std::cout << "Number of devices found: " << deviceCount << "\n"; std::vector devices(deviceCount); SUCCESS_OR_TERMINATE(zeDeviceGet(driverHandle, &deviceCount, devices.data())); // Make the server use device0 and the client device1 if available if (deviceCount > 1) { ze_bool_t canAccessPeer = false; SUCCESS_OR_TERMINATE(zeDeviceCanAccessPeer(devices[0], devices[1], &canAccessPeer)); if (canAccessPeer == false) { std::cerr << "Two devices found but no P2P capabilities detected\n"; std::terminate(); } else { std::cerr << "Two devices found and P2P capabilities detected\n"; } } if (deviceCount == 1) { serverDevice = clientDevice = 0; } if (isServer == false) { device = devices[clientDevice]; std::cout << "Client using device " << clientDevice << "\n"; } else { device = devices[serverDevice]; std::cout << "Server using device " << serverDevice << "\n"; } // Print some properties ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; SUCCESS_OR_TERMINATE(zeDeviceGetProperties(device, &deviceProperties)); std::cout << "Device : \n" << " * name : " << deviceProperties.name << "\n" << " * vendorId : " << std::hex << deviceProperties.vendorId << "\n"; // Create command queue uint32_t numQueueGroups = 0; SUCCESS_OR_TERMINATE(zeDeviceGetCommandQueueGroupProperties(device, &numQueueGroups, nullptr)); if (numQueueGroups == 0) { std::cerr << "No queue groups found!\n"; std::terminate(); } std::vector queueProperties(numQueueGroups); for (auto &queueProperty : queueProperties) { queueProperty.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_GROUP_PROPERTIES; } SUCCESS_OR_TERMINATE(zeDeviceGetCommandQueueGroupProperties(device, &numQueueGroups, queueProperties.data())); ze_command_queue_desc_t cmdQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC}; ze_command_queue_desc_t cmdQueueDescCopy = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC}; for (uint32_t i = 0; i < numQueueGroups; i++) { if (queueProperties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) { cmdQueueDesc.ordinal = i; break; } } std::cout << (isServer ? "Server " : "Client ") << " using queues " << cmdQueueDescCopy.ordinal << " and " << cmdQueueDesc.ordinal << "\n"; cmdQueueDesc.index = 0; cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; SUCCESS_OR_TERMINATE(zeCommandQueueCreate(context, device, &cmdQueueDesc, &cmdQueue)); cmdQueueDescCopy.index = 0; cmdQueueDescCopy.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; SUCCESS_OR_TERMINATE(zeCommandQueueCreate(context, device, &cmdQueueDescCopy, &cmdQueueCopy)); // Create command list ze_command_list_desc_t cmdListDesc = {ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC}; cmdListDesc.commandQueueGroupOrdinal = cmdQueueDesc.ordinal; SUCCESS_OR_TERMINATE(zeCommandListCreate(context, device, &cmdListDesc, &cmdList)); ze_command_list_desc_t cmdListDescCopy = {ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC}; cmdListDescCopy.commandQueueGroupOrdinal = cmdQueueDescCopy.ordinal; SUCCESS_OR_TERMINATE(zeCommandListCreate(context, device, &cmdListDescCopy, &cmdListCopy)); } void run_client(int commSocket) { std::cout << "Client process " << std::dec << getpid() << "\n"; ze_context_handle_t context; ze_device_handle_t device; ze_command_queue_handle_t cmdQueue; ze_command_list_handle_t cmdList; ze_command_queue_handle_t cmdQueueCopy; ze_command_list_handle_t cmdListCopy; initializeProcess(context, device, cmdQueue, cmdList, cmdQueueCopy, cmdListCopy, false); // receieve the IPC handle for the event pool from the other process ze_ipc_event_pool_handle_t pIpcEventPoolHandle = {}; int dma_buf_fd = recvmsg_fd(commSocket, pIpcEventPoolHandle.data, ZE_MAX_IPC_HANDLE_SIZE); if (dma_buf_fd < 0) { std::cerr << "Failing to get IPC event pool handle from server\n"; std::terminate(); } // get the event pool associated with the IPC handle ze_event_pool_handle_t eventPool = {}; SUCCESS_OR_TERMINATE(zeEventPoolOpenIpcHandle(context, pIpcEventPoolHandle, &eventPool)); // get the number of events from the payload uint32_t numEvents = 0; memcpy(&numEvents, pIpcEventPoolHandle.data + sizeof(int), sizeof(int)); std::vector events(numEvents); uint32_t i = 0; ze_event_desc_t eventDesc = {ZE_STRUCTURE_TYPE_EVENT_DESC}; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; for (auto &event : events) { eventDesc.index = i++; SUCCESS_OR_TERMINATE(zeEventCreate(eventPool, &eventDesc, &event)); SUCCESS_OR_TERMINATE(zeEventHostReset(event)); ze_result_t eventStatus = zeEventQueryStatus(event); if (eventStatus != ZE_RESULT_NOT_READY) { std::cerr << "Event reset in clinent failed\n"; std::terminate(); } } void *zeBuffer = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC}; SUCCESS_OR_TERMINATE(zeMemAllocDevice(context, &deviceDesc, allocSize, allocSize, device, &zeBuffer)); SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryFill(cmdList, zeBuffer, reinterpret_cast(&expectedPattern), sizeof(expectedPattern), allocSize, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); // get the dma_buf from the other process ze_ipc_mem_handle_t pIpcHandle; dma_buf_fd = recvmsg_fd(commSocket, pIpcHandle.data, ZE_MAX_IPC_HANDLE_SIZE); if (dma_buf_fd < 0) { std::cerr << "Failing to get dma_buf fd from server\n"; std::terminate(); } memcpy(&pIpcHandle, static_cast(&dma_buf_fd), sizeof(dma_buf_fd)); // get a memory pointer to the BO associated with the dma_buf void *zeIpcBuffer; SUCCESS_OR_TERMINATE(zeMemOpenIpcHandle(context, device, pIpcHandle, 0u, &zeIpcBuffer)); // Copy from client to server SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdListCopy, zeIpcBuffer, zeBuffer, allocSize, events[0], 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListClose(cmdListCopy)); SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueueCopy, 1, &cmdListCopy, nullptr)); SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueueCopy, std::numeric_limits::max())); SUCCESS_OR_TERMINATE(zeMemCloseIpcHandle(context, zeIpcBuffer)); SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue)); SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdListCopy)); SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueueCopy)); SUCCESS_OR_TERMINATE(zeMemFree(context, zeBuffer)); SUCCESS_OR_TERMINATE(zeContextDestroy(context)); } void run_server(int commSocket, bool &validRet) { std::cout << "Server process " << std::dec << getpid() << "\n"; ze_context_handle_t context; ze_device_handle_t device; ze_command_queue_handle_t cmdQueue; ze_command_list_handle_t cmdList; ze_command_queue_handle_t cmdQueueCopy; ze_command_list_handle_t cmdListCopy; initializeProcess(context, device, cmdQueue, cmdList, cmdQueueCopy, cmdListCopy, true); uint32_t numEvents = 2; ze_event_pool_handle_t eventPool = {}; ze_event_pool_desc_t eventPoolDesc = {ZE_STRUCTURE_TYPE_EVENT_POOL_DESC}; eventPoolDesc.count = numEvents; eventPoolDesc.flags = {}; SUCCESS_OR_TERMINATE(zeEventPoolCreate(context, &eventPoolDesc, 1, &device, &eventPool)); std::vector events(numEvents); uint32_t i = 0; ze_event_desc_t eventDesc = {ZE_STRUCTURE_TYPE_EVENT_DESC}; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; for (auto &event : events) { eventDesc.index = i++; SUCCESS_OR_TERMINATE(zeEventCreate(eventPool, &eventDesc, &event)); SUCCESS_OR_TERMINATE(zeEventHostReset(event)); ze_result_t eventStatus = zeEventQueryStatus(event); if (eventStatus != ZE_RESULT_NOT_READY) { std::cerr << "Event status in server before starting not correct\n"; std::terminate(); } } // Get the IPC handle for the event pool ze_ipc_event_pool_handle_t pIpcEventPoolHandle; SUCCESS_OR_TERMINATE(zeEventPoolGetIpcHandle(eventPool, &pIpcEventPoolHandle)); // Pass the IPC handle to the other process int dma_buf_fd; memcpy(static_cast(&dma_buf_fd), &pIpcEventPoolHandle, sizeof(dma_buf_fd)); if (sendmsg_fd(commSocket, static_cast(dma_buf_fd), pIpcEventPoolHandle.data, ZE_MAX_IPC_HANDLE_SIZE) < 0) { std::cerr << "Failing to send IPC event pool handle to client\n"; std::terminate(); } void *zeBuffer = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC}; SUCCESS_OR_TERMINATE(zeMemAllocDevice(context, &deviceDesc, allocSize, allocSize, device, &zeBuffer)); // Initialize the IPC buffer SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryFill(cmdList, zeBuffer, reinterpret_cast(&uinitializedPattern), sizeof(uinitializedPattern), allocSize, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); SUCCESS_OR_TERMINATE(zeCommandListReset(cmdList)); // Get a dma_buf for the previously allocated pointer ze_ipc_mem_handle_t pIpcHandle; SUCCESS_OR_TERMINATE(zeMemGetIpcHandle(context, zeBuffer, &pIpcHandle)); // Pass the dma_buf to the other process memcpy(static_cast(&dma_buf_fd), &pIpcHandle, sizeof(dma_buf_fd)); if (sendmsg_fd(commSocket, static_cast(dma_buf_fd), pIpcHandle.data, ZE_MAX_IPC_HANDLE_SIZE) < 0) { std::cerr << "Failing to send dma_buf fd to client\n"; std::terminate(); } char *heapBuffer = new char[allocSize]; for (size_t i = 0; i < allocSize; ++i) { heapBuffer[i] = expectedPattern; } // Wait for child to exit int child_status; pid_t clientPId = wait(&child_status); if (clientPId <= 0) { std::cerr << "Client terminated abruptly with error code " << strerror(errno) << "\n"; std::terminate(); } void *validateBuffer = nullptr; ze_host_mem_alloc_desc_t hostDesc = {ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC}; SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &validateBuffer)); SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryFill(cmdList, validateBuffer, reinterpret_cast(&uinitializedPattern), sizeof(uinitializedPattern), allocSize, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList, nullptr, 0, nullptr)); // Copy from device-allocated memory SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, validateBuffer, zeBuffer, allocSize, nullptr, 1, &events[0])); //nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); // Validate stack and buffers have the original data from heapBuffer validRet = (0 == memcmp(heapBuffer, validateBuffer, allocSize)); delete[] heapBuffer; SUCCESS_OR_TERMINATE(zeMemFree(context, zeBuffer)); SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue)); SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdListCopy)); SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueueCopy)); SUCCESS_OR_TERMINATE(zeContextDestroy(context)); } int main(int argc, char *argv[]) { verbose = isVerbose(argc, argv); bool outputValidationSuccessful; serverDevice = getParamValue(argc, argv, "-s", "--serverdevice", 1); clientDevice = getParamValue(argc, argv, "-c", "--clientdevice", 0); int sv[2]; if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) < 0) { perror("socketpair"); exit(1); } int child = fork(); if (child < 0) { perror("fork"); exit(1); } else if (0 == child) { close(sv[0]); run_client(sv[1]); close(sv[1]); exit(0); } else { close(sv[1]); run_server(sv[0], outputValidationSuccessful); close(sv[0]); } std::cout << "\nZello IPC P2P With Event Results validation " << (outputValidationSuccessful ? "PASSED" : "FAILED") << std::endl; return 0; } compute-runtime-22.14.22890/level_zero/core/test/black_box_tests/zello_printf.cpp000066400000000000000000000076551422164147700300250ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include #include "zello_common.h" #include "zello_compile.h" #include #include #include extern bool verbose; bool verbose = false; const char *source = R"===( __kernel void test_printf(__global char *dst, __global char *src){ uint gid = get_global_id(0); printf("global_id = %d\n", gid); } )==="; void testPrintfKernel(ze_context_handle_t context, ze_device_handle_t &device) { ze_module_handle_t module; ze_kernel_handle_t kernel; ze_command_queue_handle_t cmdQueue; ze_command_list_handle_t cmdList; ze_group_count_t dispatchTraits; ze_command_queue_desc_t cmdQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC}; cmdQueueDesc.ordinal = 0; cmdQueueDesc.index = 0; cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; SUCCESS_OR_TERMINATE(zeCommandQueueCreate(context, device, &cmdQueueDesc, &cmdQueue)); SUCCESS_OR_TERMINATE(createCommandList(context, device, cmdList)); std::string buildLog; auto spirV = compileToSpirV(source, "", buildLog); if (buildLog.size() > 0) { std::cout << "Build log " << buildLog; } SUCCESS_OR_TERMINATE((0 == spirV.size())); ze_module_desc_t moduleDesc = {ZE_STRUCTURE_TYPE_MODULE_DESC}; moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; moduleDesc.pInputModule = spirV.data(); moduleDesc.inputSize = spirV.size(); moduleDesc.pBuildFlags = ""; SUCCESS_OR_TERMINATE(zeModuleCreate(context, device, &moduleDesc, &module, nullptr)); ze_kernel_desc_t kernelDesc = {ZE_STRUCTURE_TYPE_KERNEL_DESC}; kernelDesc.pKernelName = "test_printf"; SUCCESS_OR_TERMINATE(zeKernelCreate(module, &kernelDesc, &kernel)); uint32_t groupSizeX = 1; uint32_t groupSizeY = 1; uint32_t groupSizeZ = 1; uint32_t globalSizeX = 64; SUCCESS_OR_TERMINATE(zeKernelSuggestGroupSize(kernel, globalSizeX, 1, 1, &groupSizeX, &groupSizeY, &groupSizeZ)); SUCCESS_OR_TERMINATE(zeKernelSetGroupSize(kernel, groupSizeX, groupSizeY, groupSizeZ)); dispatchTraits.groupCountX = globalSizeX / groupSizeX; dispatchTraits.groupCountY = 1; dispatchTraits.groupCountZ = 1; if (verbose) { std::cout << "Number of groups : (" << dispatchTraits.groupCountX << ", " << dispatchTraits.groupCountY << ", " << dispatchTraits.groupCountZ << ")" << std::endl; } SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 0, sizeof(size_t), nullptr)); SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 1, sizeof(size_t), nullptr)); SUCCESS_OR_TERMINATE(zeCommandListAppendLaunchKernel(cmdList, kernel, &dispatchTraits, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); SUCCESS_OR_TERMINATE(zeKernelDestroy(kernel)); SUCCESS_OR_TERMINATE(zeModuleDestroy(module)); SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue)); } int main(int argc, char *argv[]) { verbose = isVerbose(argc, argv); ze_context_handle_t context = nullptr; auto devices = zelloInitContextAndGetDevices(context); auto device = devices[0]; ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; SUCCESS_OR_TERMINATE(zeDeviceGetProperties(device, &deviceProperties)); std::cout << "Device : \n" << " * name : " << deviceProperties.name << "\n" << " * vendorId : " << std::hex << deviceProperties.vendorId << "\n"; testPrintfKernel(context, device); // always pass - no printf capturing std::cout << "\nZello Printf Always PASS " << std::endl; return 0; } compute-runtime-22.14.22890/level_zero/core/test/black_box_tests/zello_scratch.cpp000066400000000000000000000175601422164147700301460ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "zello_common.h" #include "zello_compile.h" extern bool verbose; bool verbose = false; const char *module = R"===( typedef long16 TYPE; __attribute__((reqd_work_group_size(32, 1, 1))) // force LWS to 32 __attribute__((intel_reqd_sub_group_size(16))) // force SIMD to 16 __kernel void scratch_kernel(__global int *resIdx, global TYPE *src, global TYPE *dst) { size_t lid = get_local_id(0); size_t gid = get_global_id(0); TYPE res1 = src[gid * 3]; TYPE res2 = src[gid * 3 + 1]; TYPE res3 = src[gid * 3 + 2]; __local TYPE locMem[32]; locMem[lid] = res1; barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_GLOBAL_MEM_FENCE); TYPE res = (locMem[resIdx[gid]] * res3) * res2 + res1; dst[gid] = res; } )==="; void executeGpuKernelAndValidate(ze_context_handle_t context, ze_device_handle_t &device, bool &outputValidationSuccessful) { ze_command_queue_handle_t cmdQueue; ze_command_queue_desc_t cmdQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC}; ze_command_list_handle_t cmdList; cmdQueueDesc.ordinal = getCommandQueueOrdinal(device); cmdQueueDesc.index = 0; cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; SUCCESS_OR_TERMINATE(zeCommandQueueCreate(context, device, &cmdQueueDesc, &cmdQueue)); SUCCESS_OR_TERMINATE(createCommandList(context, device, cmdList)); // Create two shared buffers uint32_t arraySize = 32; uint32_t vectorSize = 16; uint32_t typeSize = sizeof(uint32_t); uint32_t srcAdditionalMul = 3u; uint32_t expectedMemorySize = arraySize * vectorSize * typeSize * 2; uint32_t srcMemorySize = expectedMemorySize * srcAdditionalMul; uint32_t idxMemorySize = arraySize * sizeof(uint32_t); ze_device_mem_alloc_desc_t deviceDesc = {ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC}; deviceDesc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED; deviceDesc.ordinal = 0; ze_host_mem_alloc_desc_t hostDesc = {ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC}; hostDesc.flags = ZE_HOST_MEM_ALLOC_FLAG_BIAS_UNCACHED; void *srcBuffer = nullptr; SUCCESS_OR_TERMINATE(zeMemAllocHost(context, &hostDesc, srcMemorySize, 1, &srcBuffer)); void *dstBuffer = nullptr; SUCCESS_OR_TERMINATE(zeMemAllocHost(context, &hostDesc, expectedMemorySize, 1, &dstBuffer)); void *idxBuffer = nullptr; SUCCESS_OR_TERMINATE(zeMemAllocHost(context, &hostDesc, idxMemorySize, 1, &idxBuffer)); void *expectedMemory = nullptr; SUCCESS_OR_TERMINATE(zeMemAllocHost(context, &hostDesc, expectedMemorySize, 1, &expectedMemory)); // Initialize memory constexpr uint8_t val = 0; memset(srcBuffer, val, srcMemorySize); memset(idxBuffer, 0, idxMemorySize); memset(dstBuffer, 0, expectedMemorySize); memset(expectedMemory, 0, expectedMemorySize); auto srcBufferLong = static_cast(srcBuffer); auto expectedMemoryLong = static_cast(expectedMemory); for (uint32_t i = 0; i < arraySize; ++i) { static_cast(idxBuffer)[i] = 2; for (uint32_t vecIdx = 0; vecIdx < vectorSize; ++vecIdx) { for (uint32_t srcMulIdx = 0; srcMulIdx < srcAdditionalMul; ++srcMulIdx) { srcBufferLong[(i * vectorSize * srcAdditionalMul) + srcMulIdx * vectorSize + vecIdx] = 1l; } expectedMemoryLong[i * vectorSize + vecIdx] = 2l; } } std::string buildLog; auto spirV = compileToSpirV(module, "", buildLog); if (buildLog.size() > 0) { std::cout << "Build log " << buildLog; } SUCCESS_OR_TERMINATE((0 == spirV.size())); ze_module_handle_t module = nullptr; ze_kernel_handle_t kernel = nullptr; ze_module_desc_t moduleDesc = {ZE_STRUCTURE_TYPE_MODULE_DESC}; ze_module_build_log_handle_t buildlog; moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; moduleDesc.pInputModule = spirV.data(); moduleDesc.inputSize = spirV.size(); moduleDesc.pBuildFlags = ""; if (zeModuleCreate(context, device, &moduleDesc, &module, &buildlog) != ZE_RESULT_SUCCESS) { size_t szLog = 0; zeModuleBuildLogGetString(buildlog, &szLog, nullptr); char *strLog = (char *)malloc(szLog); zeModuleBuildLogGetString(buildlog, &szLog, strLog); std::cout << "Build log:" << strLog << std::endl; free(strLog); } SUCCESS_OR_TERMINATE(zeModuleBuildLogDestroy(buildlog)); ze_kernel_desc_t kernelDesc = {ZE_STRUCTURE_TYPE_KERNEL_DESC}; kernelDesc.pKernelName = "scratch_kernel"; SUCCESS_OR_TERMINATE(zeKernelCreate(module, &kernelDesc, &kernel)); ze_kernel_properties_t kernelProperties{ZE_STRUCTURE_TYPE_KERNEL_PROPERTIES}; SUCCESS_OR_TERMINATE(zeKernelGetProperties(kernel, &kernelProperties)); std::cout << "Scratch size = " << kernelProperties.spillMemSize << "\n"; uint32_t groupSizeX = arraySize; uint32_t groupSizeY = 1u; uint32_t groupSizeZ = 1u; SUCCESS_OR_TERMINATE(zeKernelSuggestGroupSize(kernel, groupSizeX, 1U, 1U, &groupSizeX, &groupSizeY, &groupSizeZ)); SUCCESS_OR_TERMINATE(zeKernelSetGroupSize(kernel, groupSizeX, groupSizeY, groupSizeZ)); SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 2, sizeof(dstBuffer), &dstBuffer)); SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 1, sizeof(srcBuffer), &srcBuffer)); SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 0, sizeof(idxBuffer), &idxBuffer)); ze_group_count_t dispatchTraits; dispatchTraits.groupCountX = 1u; dispatchTraits.groupCountY = 1u; dispatchTraits.groupCountZ = 1u; SUCCESS_OR_TERMINATE(zeCommandListAppendLaunchKernel(cmdList, kernel, &dispatchTraits, nullptr, 0, nullptr)); // Close list and submit for execution SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); // Validate outputValidationSuccessful = true; if (memcmp(dstBuffer, expectedMemory, expectedMemorySize)) { outputValidationSuccessful = false; uint8_t *srcCharBuffer = static_cast(expectedMemory); uint8_t *dstCharBuffer = static_cast(dstBuffer); for (size_t i = 0; i < expectedMemorySize; i++) { if (srcCharBuffer[i] != dstCharBuffer[i]) { std::cout << "srcBuffer[" << i << "] = " << static_cast(srcCharBuffer[i]) << " not equal to " << "dstBuffer[" << i << "] = " << static_cast(dstCharBuffer[i]) << "\n"; break; } } } // Cleanup SUCCESS_OR_TERMINATE(zeMemFree(context, dstBuffer)); SUCCESS_OR_TERMINATE(zeMemFree(context, srcBuffer)); SUCCESS_OR_TERMINATE(zeMemFree(context, idxBuffer)); SUCCESS_OR_TERMINATE(zeMemFree(context, expectedMemory)); SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue)); } int main(int argc, char *argv[]) { verbose = isVerbose(argc, argv); ze_context_handle_t context = nullptr; auto devices = zelloInitContextAndGetDevices(context); auto device = devices[0]; bool outputValidationSuccessful; ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; SUCCESS_OR_TERMINATE(zeDeviceGetProperties(device, &deviceProperties)); std::cout << "Device : \n" << " * name : " << deviceProperties.name << "\n" << " * vendorId : " << std::hex << deviceProperties.vendorId << "\n\n"; executeGpuKernelAndValidate(context, device, outputValidationSuccessful); SUCCESS_OR_TERMINATE(zeContextDestroy(context)); std::cout << "\nZello Scratch Results validation " << (outputValidationSuccessful ? "PASSED" : "FAILED") << "\n"; return 0; } compute-runtime-22.14.22890/level_zero/core/test/black_box_tests/zello_timestamp.cpp000066400000000000000000000460501422164147700305160ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "zello_common.h" extern bool verbose; bool verbose = false; inline std::vector loadBinaryFile(const std::string &filePath) { std::ifstream stream(filePath, std::ios::binary); if (!stream.good()) { std::cerr << "Failed to load binary file: " << filePath << " " << strerror(errno) << "\n"; return {}; } stream.seekg(0, stream.end); const size_t length = static_cast(stream.tellg()); stream.seekg(0, stream.beg); std::vector binary_file(length); stream.read(reinterpret_cast(binary_file.data()), length); return binary_file; } void createCmdQueueAndCmdList(ze_context_handle_t &context, ze_device_handle_t &device, ze_command_queue_handle_t &cmdQueue, ze_command_list_handle_t &cmdList) { // Create command queue uint32_t numQueueGroups = 0; SUCCESS_OR_TERMINATE(zeDeviceGetCommandQueueGroupProperties(device, &numQueueGroups, nullptr)); if (numQueueGroups == 0) { std::cout << "No queue groups found!\n"; std::terminate(); } std::vector queueProperties(numQueueGroups); SUCCESS_OR_TERMINATE(zeDeviceGetCommandQueueGroupProperties(device, &numQueueGroups, queueProperties.data())); ze_command_queue_desc_t cmdQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC}; for (uint32_t i = 0; i < numQueueGroups; i++) { if (queueProperties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) { cmdQueueDesc.ordinal = i; break; } } cmdQueueDesc.index = 0; cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; SUCCESS_OR_TERMINATE(zeCommandQueueCreate(context, device, &cmdQueueDesc, &cmdQueue)); // Create command list ze_command_list_desc_t cmdListDesc = {ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC}; cmdListDesc.commandQueueGroupOrdinal = cmdQueueDesc.ordinal; SUCCESS_OR_TERMINATE(zeCommandListCreate(context, device, &cmdListDesc, &cmdList)); } void createEventPoolAndEvents(ze_context_handle_t &context, ze_device_handle_t &device, ze_event_pool_handle_t &eventPool, ze_event_pool_flag_t poolFlag, uint32_t poolSize, ze_event_handle_t *events) { ze_event_pool_desc_t eventPoolDesc = {ZE_STRUCTURE_TYPE_EVENT_POOL_DESC}; ze_event_desc_t eventDesc = {ZE_STRUCTURE_TYPE_EVENT_DESC}; eventPoolDesc.count = poolSize; eventPoolDesc.flags = poolFlag; SUCCESS_OR_TERMINATE(zeEventPoolCreate(context, &eventPoolDesc, 1, &device, &eventPool)); for (uint32_t i = 0; i < poolSize; i++) { eventDesc.index = i; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; SUCCESS_OR_TERMINATE(zeEventCreate(eventPool, &eventDesc, events + i)); } } bool testWriteGlobalTimestamp(ze_context_handle_t &context, ze_driver_handle_t &driver, ze_device_handle_t &device) { constexpr size_t allocSize = 4096; constexpr size_t tsAllocSize = 64; ze_command_queue_handle_t cmdQueue; ze_command_list_handle_t cmdList; uint64_t tsStartResult = 0, tsEndResult = 0; void *dstBuffer; void *globalTsStart, *globalTsEnd; // Create commandQueue and cmdList createCmdQueueAndCmdList(context, device, cmdQueue, cmdList); // Alloc buffers dstBuffer = nullptr; globalTsStart = nullptr; globalTsEnd = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC}; deviceDesc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED; deviceDesc.ordinal = 0; SUCCESS_OR_TERMINATE(zeMemAllocDevice(context, &deviceDesc, allocSize, 1, device, &dstBuffer)); SUCCESS_OR_TERMINATE(zeMemAllocDevice(context, &deviceDesc, tsAllocSize, 1, device, &globalTsStart)); SUCCESS_OR_TERMINATE(zeMemAllocDevice(context, &deviceDesc, tsAllocSize, 1, device, &globalTsEnd)); // Init data and copy to device uint8_t initDataDst[allocSize]; memset(initDataDst, 3, sizeof(initDataDst)); SUCCESS_OR_TERMINATE(zeCommandListAppendWriteGlobalTimestamp(cmdList, (uint64_t *)globalTsStart, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, dstBuffer, initDataDst, sizeof(initDataDst), nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList, nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListAppendWriteGlobalTimestamp(cmdList, (uint64_t *)globalTsEnd, nullptr, 0, nullptr)); // Copy back timestamp data SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, &tsStartResult, globalTsStart, sizeof(tsStartResult), nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, &tsEndResult, globalTsEnd, sizeof(tsEndResult), nullptr, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); ze_device_properties_t devProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; SUCCESS_OR_TERMINATE(zeDeviceGetProperties(device, &devProperties)); uint64_t commandDuration = tsEndResult - tsStartResult; uint64_t timerResolution = devProperties.timerResolution; std::cout << "Global timestamp statistics: \n" << std::fixed << " Command start : " << std::dec << tsStartResult << " cycles\n" << " Command end : " << std::dec << tsEndResult << " cycles\n" << " Command duration : " << std::dec << commandDuration << " cycles, " << commandDuration * timerResolution << " ns\n"; // Tear down SUCCESS_OR_TERMINATE(zeMemFree(context, dstBuffer)); SUCCESS_OR_TERMINATE(zeMemFree(context, globalTsStart)); SUCCESS_OR_TERMINATE(zeMemFree(context, globalTsEnd)); SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue)); return true; } bool testKernelTimestampHostQuery(ze_context_handle_t &context, ze_driver_handle_t &driver, ze_device_handle_t &device) { ze_command_queue_handle_t cmdQueue; ze_command_list_handle_t cmdList; // Create commandQueue and cmdList createCmdQueueAndCmdList(context, device, cmdQueue, cmdList); // Create two shared buffers constexpr size_t allocSize = 4096; ze_device_mem_alloc_desc_t deviceDesc = {ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC}; deviceDesc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED; deviceDesc.ordinal = 0; ze_host_mem_alloc_desc_t hostDesc = {ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC}; hostDesc.flags = ZE_HOST_MEM_ALLOC_FLAG_BIAS_UNCACHED; void *srcBuffer = nullptr; SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &srcBuffer)); void *dstBuffer = nullptr; SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &dstBuffer)); // Initialize memory constexpr uint8_t val = 55; memset(srcBuffer, val, allocSize); memset(dstBuffer, 0, allocSize); // Create kernel auto spirvModule = loadBinaryFile("copy_buffer_to_buffer.spv"); if (spirvModule.size() == 0) { return false; } ze_module_handle_t module; ze_kernel_handle_t kernel; ze_module_desc_t moduleDesc = {ZE_STRUCTURE_TYPE_MODULE_DESC}; moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; moduleDesc.pInputModule = reinterpret_cast(spirvModule.data()); moduleDesc.inputSize = spirvModule.size(); SUCCESS_OR_TERMINATE(zeModuleCreate(context, device, &moduleDesc, &module, nullptr)); ze_kernel_desc_t kernelDesc = {ZE_STRUCTURE_TYPE_KERNEL_DESC}; kernelDesc.pKernelName = "CopyBufferToBufferBytes"; SUCCESS_OR_TERMINATE(zeKernelCreate(module, &kernelDesc, &kernel)); uint32_t groupSizeX = 32u; uint32_t groupSizeY = 1u; uint32_t groupSizeZ = 1u; SUCCESS_OR_TERMINATE(zeKernelSuggestGroupSize(kernel, allocSize, 1U, 1U, &groupSizeX, &groupSizeY, &groupSizeZ)); SUCCESS_OR_TERMINATE(zeKernelSetGroupSize(kernel, groupSizeX, groupSizeY, groupSizeZ)); uint32_t offset = 0; SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 1, sizeof(dstBuffer), &dstBuffer)); SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 0, sizeof(srcBuffer), &srcBuffer)); SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 2, sizeof(uint32_t), &offset)); SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 3, sizeof(uint32_t), &offset)); SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 4, sizeof(uint32_t), &offset)); ze_group_count_t dispatchTraits; dispatchTraits.groupCountX = allocSize / groupSizeX; dispatchTraits.groupCountY = 1u; dispatchTraits.groupCountZ = 1u; ze_event_pool_handle_t eventPool; ze_event_handle_t kernelTsEvent; createEventPoolAndEvents(context, device, eventPool, ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP, 1, &kernelTsEvent); SUCCESS_OR_TERMINATE(zeCommandListAppendLaunchKernel(cmdList, kernel, &dispatchTraits, kernelTsEvent, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); ze_kernel_timestamp_result_t kernelTsResults; SUCCESS_OR_TERMINATE(zeEventQueryKernelTimestamp(kernelTsEvent, &kernelTsResults)); ze_device_properties_t devProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; SUCCESS_OR_TERMINATE(zeDeviceGetProperties(device, &devProperties)); uint64_t timerResolution = devProperties.timerResolution; uint64_t kernelDuration = kernelTsResults.context.kernelEnd - kernelTsResults.context.kernelStart; std::cout << "Kernel timestamp statistics: \n" << std::fixed << " Global start : " << std::dec << kernelTsResults.global.kernelStart << " cycles\n" << " Kernel start: " << std::dec << kernelTsResults.context.kernelStart << " cycles\n" << " Kernel end: " << std::dec << kernelTsResults.context.kernelEnd << " cycles\n" << " Global end: " << std::dec << kernelTsResults.global.kernelEnd << " cycles\n" << " timerResolution clock: " << std::dec << timerResolution << " cycles/s\n" << " Kernel duration : " << std::dec << kernelDuration << " cycles, " << kernelDuration * (1000000000.0 / static_cast(timerResolution)) << " ns\n"; // Cleanup SUCCESS_OR_TERMINATE(zeMemFree(context, dstBuffer)); SUCCESS_OR_TERMINATE(zeMemFree(context, srcBuffer)); SUCCESS_OR_TERMINATE(zeEventDestroy(kernelTsEvent)); SUCCESS_OR_TERMINATE(zeEventPoolDestroy(eventPool)); SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue)); return true; } bool testKernelTimestampApendQuery(ze_context_handle_t &context, ze_device_handle_t &device, ze_device_properties_t devProperties) { ze_command_queue_handle_t cmdQueue; ze_command_list_handle_t cmdList; // Create commandQueue and cmdList createCmdQueueAndCmdList(context, device, cmdQueue, cmdList); // Create two shared buffers constexpr size_t allocSize = 4096; ze_device_mem_alloc_desc_t deviceDesc = {ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC}; deviceDesc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED; deviceDesc.ordinal = 0; ze_host_mem_alloc_desc_t hostDesc = {ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC}; hostDesc.flags = ZE_HOST_MEM_ALLOC_FLAG_BIAS_UNCACHED; void *srcBuffer = nullptr; SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &srcBuffer)); void *dstBuffer = nullptr; SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &dstBuffer)); void *timestampBuffer = nullptr; SUCCESS_OR_TERMINATE(zeMemAllocHost(context, &hostDesc, sizeof(ze_kernel_timestamp_result_t), 1, ×tampBuffer)); // Initialize memory constexpr uint8_t val = 55; memset(srcBuffer, val, allocSize); memset(dstBuffer, 0, allocSize); memset(timestampBuffer, 0, sizeof(ze_kernel_timestamp_result_t)); // Create kernel auto spirvModule = loadBinaryFile("copy_buffer_to_buffer.spv"); if (spirvModule.size() == 0) { return false; } ze_module_handle_t module; ze_kernel_handle_t kernel; ze_module_desc_t moduleDesc = {ZE_STRUCTURE_TYPE_MODULE_DESC}; moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; moduleDesc.pInputModule = reinterpret_cast(spirvModule.data()); moduleDesc.inputSize = spirvModule.size(); SUCCESS_OR_TERMINATE(zeModuleCreate(context, device, &moduleDesc, &module, nullptr)); ze_kernel_desc_t kernelDesc = {ZE_STRUCTURE_TYPE_KERNEL_DESC}; kernelDesc.pKernelName = "CopyBufferToBufferBytes"; SUCCESS_OR_TERMINATE(zeKernelCreate(module, &kernelDesc, &kernel)); uint32_t groupSizeX = 32u; uint32_t groupSizeY = 1u; uint32_t groupSizeZ = 1u; SUCCESS_OR_TERMINATE(zeKernelSuggestGroupSize(kernel, allocSize, 1U, 1U, &groupSizeX, &groupSizeY, &groupSizeZ)); SUCCESS_OR_TERMINATE(zeKernelSetGroupSize(kernel, groupSizeX, groupSizeY, groupSizeZ)); uint32_t offset = 0; SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 1, sizeof(dstBuffer), &dstBuffer)); SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 0, sizeof(srcBuffer), &srcBuffer)); SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 2, sizeof(uint32_t), &offset)); SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 3, sizeof(uint32_t), &offset)); SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 4, sizeof(uint32_t), &offset)); ze_group_count_t dispatchTraits; dispatchTraits.groupCountX = allocSize / groupSizeX; dispatchTraits.groupCountY = 1u; dispatchTraits.groupCountZ = 1u; ze_event_pool_handle_t eventPool; ze_event_handle_t kernelTsEvent; createEventPoolAndEvents(context, device, eventPool, ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP, 1, &kernelTsEvent); SUCCESS_OR_TERMINATE(zeCommandListAppendLaunchKernel(cmdList, kernel, &dispatchTraits, kernelTsEvent, 0, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList, nullptr, 0u, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListAppendQueryKernelTimestamps(cmdList, 1u, &kernelTsEvent, timestampBuffer, nullptr, nullptr, 0u, nullptr)); SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); ze_kernel_timestamp_result_t *kernelTsResults = reinterpret_cast(timestampBuffer); SUCCESS_OR_TERMINATE(zeDeviceGetProperties(device, &devProperties)); uint64_t timerResolution = devProperties.timerResolution; uint64_t kernelDuration = kernelTsResults->context.kernelEnd - kernelTsResults->context.kernelStart; if (devProperties.stype == ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES_1_2) { std::cout << "Kernel timestamp statistics (V1.2 and later): \n" << std::fixed << " Global start : " << std::dec << kernelTsResults->global.kernelStart << " cycles\n" << " Kernel start: " << std::dec << kernelTsResults->context.kernelStart << " cycles\n" << " Kernel end: " << std::dec << kernelTsResults->context.kernelEnd << " cycles\n" << " Global end: " << std::dec << kernelTsResults->global.kernelEnd << " cycles\n" << " timerResolution clock: " << std::dec << timerResolution << " cycles/s\n" << " Kernel duration : " << std::dec << kernelDuration << " cycles, " << kernelDuration * (1000000000.0 / static_cast(timerResolution)) << " ns\n"; } else { std::cout << "Kernel timestamp statistics (prior to V1.2): \n" << std::fixed << " Global start : " << std::dec << kernelTsResults->global.kernelStart << " cycles\n" << " Kernel start: " << std::dec << kernelTsResults->context.kernelStart << " cycles\n" << " Kernel end: " << std::dec << kernelTsResults->context.kernelEnd << " cycles\n" << " Global end: " << std::dec << kernelTsResults->global.kernelEnd << " cycles\n" << " timerResolution: " << std::dec << timerResolution << " ns\n" << " Kernel duration : " << std::dec << kernelDuration << " cycles, " << kernelDuration * timerResolution << " ns\n"; } // Cleanup SUCCESS_OR_TERMINATE(zeMemFree(context, dstBuffer)); SUCCESS_OR_TERMINATE(zeMemFree(context, srcBuffer)); SUCCESS_OR_TERMINATE(zeMemFree(context, timestampBuffer)); SUCCESS_OR_TERMINATE(zeEventDestroy(kernelTsEvent)); SUCCESS_OR_TERMINATE(zeEventPoolDestroy(eventPool)); SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue)); return true; } void printResult(bool result, std::string ¤tTest) { std::cout << "\nZello Timestamp: " << currentTest.c_str() << " Results validation " << (result ? "PASSED" : "FAILED") << std::endl << std::endl; } int main(int argc, char *argv[]) { verbose = isVerbose(argc, argv); ze_context_handle_t context = nullptr; auto devices = zelloInitContextAndGetDevices(context); auto device = devices[0]; ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; SUCCESS_OR_TERMINATE(zeDeviceGetProperties(device, &deviceProperties)); std::cout << "Device : \n" << " * name : " << deviceProperties.name << "\n" << " * vendorId : " << std::hex << deviceProperties.vendorId << "\n"; bool result; std::string currentTest; currentTest = "Test Append Write of Global Timestamp: Default Device Properties Structure"; deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; result = testKernelTimestampApendQuery(context, device, deviceProperties); printResult(result, currentTest); currentTest = "Test Append Write of Global Timestamp: V1.2 (and later) Device Properties Structure"; deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES_1_2}; result = testKernelTimestampApendQuery(context, device, deviceProperties); printResult(result, currentTest); SUCCESS_OR_TERMINATE(zeContextDestroy(context)); return result ? 0 : 1; } compute-runtime-22.14.22890/level_zero/core/test/black_box_tests/zello_world_global_work_offset.cpp000066400000000000000000000306501422164147700335710ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/offline_compiler/source/ocloc_api.h" #include "shared/source/helpers/string.h" #include "level_zero/api/extensions/public/ze_exp_ext.h" #include "zello_common.h" #include #include #include extern bool verbose; bool verbose = false; const char *module = R"===( __kernel void kernel_copy(__global char *dst, __global char *src){ uint gid = get_global_id(0); dst[gid] = src[gid]; } )==="; std::vector compileToSpirV(const std::string &src, const std::string &options, std::string &outCompilerLog) { std::vector ret; const char *mainFileName = "main.cl"; const char *argv[] = {"ocloc", "-q", "-device", "skl", "-file", mainFileName}; const unsigned char *sources[] = {reinterpret_cast(src.c_str())}; size_t sourcesLengths[] = {src.size() + 1}; const char *sourcesNames[] = {mainFileName}; unsigned int numOutputs = 0U; unsigned char **outputs = nullptr; size_t *ouputLengths = nullptr; char **outputNames = nullptr; int result = oclocInvoke(sizeof(argv) / sizeof(argv[0]), argv, 1, sources, sourcesLengths, sourcesNames, 0, nullptr, nullptr, nullptr, &numOutputs, &outputs, &ouputLengths, &outputNames); unsigned char *spirV = nullptr; size_t spirVlen = 0; const char *log = nullptr; size_t logLen = 0; for (unsigned int i = 0; i < numOutputs; ++i) { std::string spvExtension = ".spv"; std::string logFileName = "stdout.log"; auto nameLen = strlen(outputNames[i]); if ((nameLen > spvExtension.size()) && (strstr(&outputNames[i][nameLen - spvExtension.size()], spvExtension.c_str()) != nullptr)) { spirV = outputs[i]; spirVlen = ouputLengths[i]; } else if ((nameLen >= logFileName.size()) && (strstr(outputNames[i], logFileName.c_str()) != nullptr)) { log = reinterpret_cast(outputs[i]); logLen = ouputLengths[i]; break; } } if ((result != 0) && (logLen == 0)) { outCompilerLog = "Unknown error, ocloc returned : " + std::to_string(result) + "\n"; return ret; } if (logLen != 0) { outCompilerLog = std::string(log, logLen).c_str(); } ret.assign(spirV, spirV + spirVlen); oclocFreeOutput(&numOutputs, &outputs, &ouputLengths, &outputNames); return ret; } typedef ze_result_t (*setGlobalWorkOffsetFunctionType)(ze_kernel_handle_t, uint32_t, uint32_t, uint32_t); setGlobalWorkOffsetFunctionType findSymbolForSetGlobalWorkOffsetFunction(char *userPath) { char libPath[256]; sprintf(libPath, "%s/libze_intel_gpu.so.1", userPath); void *libHandle = dlopen(libPath, RTLD_LAZY | RTLD_LOCAL); if (!libHandle) { std::cout << "libze_intel_gpu.so not found\n"; std::terminate(); } ze_result_t (*pfnSetGlobalWorkOffset)(ze_kernel_handle_t, uint32_t, uint32_t, uint32_t); *(void **)(&pfnSetGlobalWorkOffset) = dlsym(libHandle, "zeKernelSetGlobalOffsetExp"); char *error; if ((error = dlerror()) != NULL) { std::cout << "Error while opening symbol: " << error << "\n"; std::terminate(); } return pfnSetGlobalWorkOffset; } void executeKernelAndValidate(ze_context_handle_t context, ze_device_handle_t &device, setGlobalWorkOffsetFunctionType pfnSetGlobalWorkOffset, bool &outputValidationSuccessful) { ze_command_queue_handle_t cmdQueue; ze_command_queue_desc_t cmdQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC}; ze_command_list_handle_t cmdList; cmdQueueDesc.ordinal = getCommandQueueOrdinal(device); cmdQueueDesc.index = 0; cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; SUCCESS_OR_TERMINATE(zeCommandQueueCreate(context, device, &cmdQueueDesc, &cmdQueue)); SUCCESS_OR_TERMINATE(createCommandList(context, device, cmdList)); // Create two shared buffers constexpr size_t allocSize = 4096; ze_device_mem_alloc_desc_t deviceDesc = {ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC}; deviceDesc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED; deviceDesc.ordinal = 0; ze_host_mem_alloc_desc_t hostDesc = {ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC}; hostDesc.flags = ZE_HOST_MEM_ALLOC_FLAG_BIAS_UNCACHED; void *srcBuffer = nullptr; SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &srcBuffer)); void *dstBuffer = nullptr; SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &dstBuffer)); // Initialize memory constexpr uint32_t bufferOffset = 8; constexpr uint8_t srcVal = 55; constexpr uint8_t dstVal = 77; memset(srcBuffer, srcVal, allocSize); memset(dstBuffer, 0, allocSize); uint8_t *dstCharBuffer = static_cast(dstBuffer); for (uint32_t i = 0; i < bufferOffset; i++) { dstCharBuffer[i] = dstVal; } std::string buildLog; auto spirV = compileToSpirV(module, "", buildLog); if (buildLog.size() > 0) { std::cout << "Build log " << buildLog; } SUCCESS_OR_TERMINATE((0 == spirV.size())); ze_module_handle_t module = nullptr; ze_kernel_handle_t kernel = nullptr; ze_module_desc_t moduleDesc = {ZE_STRUCTURE_TYPE_MODULE_DESC}; ze_module_build_log_handle_t buildlog; moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; moduleDesc.pInputModule = spirV.data(); moduleDesc.inputSize = spirV.size(); moduleDesc.pBuildFlags = ""; if (zeModuleCreate(context, device, &moduleDesc, &module, &buildlog) != ZE_RESULT_SUCCESS) { size_t szLog = 0; zeModuleBuildLogGetString(buildlog, &szLog, nullptr); char *strLog = (char *)malloc(szLog); zeModuleBuildLogGetString(buildlog, &szLog, strLog); std::cout << "Build log:" << strLog << std::endl; free(strLog); } SUCCESS_OR_TERMINATE(zeModuleBuildLogDestroy(buildlog)); ze_kernel_desc_t kernelDesc = {ZE_STRUCTURE_TYPE_KERNEL_DESC}; kernelDesc.pKernelName = "kernel_copy"; SUCCESS_OR_TERMINATE(zeKernelCreate(module, &kernelDesc, &kernel)); ze_kernel_properties_t kernProps = {ZE_STRUCTURE_TYPE_KERNEL_PROPERTIES}; SUCCESS_OR_TERMINATE(zeKernelGetProperties(kernel, &kernProps)); std::cout << "Kernel : \n" << " * name : " << kernelDesc.pKernelName << "\n" << " * uuid.mid : " << kernProps.uuid.mid << "\n" << " * uuid.kid : " << kernProps.uuid.kid << "\n" << " * maxSubgroupSize : " << kernProps.maxSubgroupSize << "\n" << " * localMemSize : " << kernProps.localMemSize << "\n" << " * spillMemSize : " << kernProps.spillMemSize << "\n" << " * privateMemSize : " << kernProps.privateMemSize << "\n" << " * maxNumSubgroups : " << kernProps.maxNumSubgroups << "\n" << " * numKernelArgs : " << kernProps.numKernelArgs << "\n" << " * requiredSubgroupSize : " << kernProps.requiredSubgroupSize << "\n" << " * requiredNumSubGroups : " << kernProps.requiredNumSubGroups << "\n" << " * requiredGroupSizeX : " << kernProps.requiredGroupSizeX << "\n" << " * requiredGroupSizeY : " << kernProps.requiredGroupSizeY << "\n" << " * requiredGroupSizeZ : " << kernProps.requiredGroupSizeZ << "\n"; uint32_t groupSizeX = 32u; uint32_t groupSizeY = 1u; uint32_t groupSizeZ = 1u; SUCCESS_OR_TERMINATE(zeKernelSuggestGroupSize(kernel, allocSize, 1U, 1U, &groupSizeX, &groupSizeY, &groupSizeZ)); SUCCESS_OR_TERMINATE(zeKernelSetGroupSize(kernel, groupSizeX, groupSizeY, groupSizeZ)); SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 0, sizeof(dstBuffer), &dstBuffer)); SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 1, sizeof(srcBuffer), &srcBuffer)); uint32_t offsetx = bufferOffset; uint32_t offsety = 0; uint32_t offsetz = 0; SUCCESS_OR_TERMINATE(pfnSetGlobalWorkOffset(kernel, offsetx, offsety, offsetz)); ze_group_count_t dispatchTraits; dispatchTraits.groupCountX = allocSize / groupSizeX; dispatchTraits.groupCountY = 1u; dispatchTraits.groupCountZ = 1u; SUCCESS_OR_TERMINATE(zeCommandListAppendLaunchKernel(cmdList, kernel, &dispatchTraits, nullptr, 0, nullptr)); // Close list and submit for execution SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); // Validate outputValidationSuccessful = true; uint8_t *srcCharBuffer = static_cast(srcBuffer); for (size_t i = 0; i < allocSize; i++) { if (i < bufferOffset) { if (dstCharBuffer[i] != dstVal) { std::cout << "dstBuffer[" << i << "] = " << std::dec << static_cast(dstCharBuffer[i]) << " not equal to " << dstVal << "\n"; outputValidationSuccessful = false; break; } } else { if (dstCharBuffer[i] != srcCharBuffer[i]) { std::cout << "dstBuffer[" << i << "] = " << std::dec << static_cast(dstCharBuffer[i]) << " not equal to " << "srcBuffer[" << i << "] = " << std::dec << static_cast(srcCharBuffer[i]) << "\n"; outputValidationSuccessful = false; break; } } } // Cleanup SUCCESS_OR_TERMINATE(zeMemFree(context, dstBuffer)); SUCCESS_OR_TERMINATE(zeMemFree(context, srcBuffer)); SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue)); } int main(int argc, char *argv[]) { verbose = isVerbose(argc, argv); ze_driver_handle_t driverHandle; ze_context_handle_t context = nullptr; auto devices = zelloInitContextAndGetDevices(context, driverHandle); auto device = devices[0]; bool outputValidationSuccessful; const char *defaultPath = "/usr/local/lib/"; char userPath[256]{}; if (argc == 2) { strncpy_s(userPath, sizeof(userPath), argv[1], 256); } else { strncpy_s(userPath, sizeof(userPath), defaultPath, strlen(defaultPath)); } uint32_t extensionsCount = 0; SUCCESS_OR_TERMINATE(zeDriverGetExtensionProperties(driverHandle, &extensionsCount, nullptr)); if (extensionsCount == 0) { std::cout << "No extensions supported on this driver\n"; std::terminate(); } std::vector extensionsSupported(extensionsCount); SUCCESS_OR_TERMINATE(zeDriverGetExtensionProperties(driverHandle, &extensionsCount, extensionsSupported.data())); bool globalOffsetExtensionFound = false; std::string globalOffsetName = "ZE_experimental_global_offset"; for (uint32_t i = 0; i < extensionsSupported.size(); i++) { if (strncmp(extensionsSupported[i].name, globalOffsetName.c_str(), globalOffsetName.size()) == 0) { if (extensionsSupported[i].version == ZE_GLOBAL_OFFSET_EXP_VERSION_1_0) { globalOffsetExtensionFound = true; break; } } } if (globalOffsetExtensionFound == false) { std::cout << "No global offset extension found on this driver\n"; std::terminate(); } setGlobalWorkOffsetFunctionType pfnSetGlobalWorkOffset = findSymbolForSetGlobalWorkOffsetFunction(userPath); ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; SUCCESS_OR_TERMINATE(zeDeviceGetProperties(device, &deviceProperties)); std::cout << "Device : \n" << " * name : " << deviceProperties.name << "\n" << " * vendorId : " << std::hex << deviceProperties.vendorId << "\n"; executeKernelAndValidate(context, device, pfnSetGlobalWorkOffset, outputValidationSuccessful); SUCCESS_OR_TERMINATE(zeContextDestroy(context)); std::cout << "\nZello World Global Work Offset Results validation " << (outputValidationSuccessful ? "PASSED" : "FAILED") << "\n"; return 0; } compute-runtime-22.14.22890/level_zero/core/test/black_box_tests/zello_world_gpu.cpp000066400000000000000000000144621422164147700305170ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "zello_common.h" extern bool verbose; bool verbose = false; void executeGpuKernelAndValidate(ze_context_handle_t context, ze_device_handle_t &device, bool &outputValidationSuccessful) { ze_command_queue_handle_t cmdQueue; ze_command_queue_desc_t cmdQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC}; ze_command_list_handle_t cmdList; cmdQueueDesc.ordinal = getCommandQueueOrdinal(device); cmdQueueDesc.index = 0; cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; SUCCESS_OR_TERMINATE(zeCommandQueueCreate(context, device, &cmdQueueDesc, &cmdQueue)); SUCCESS_OR_TERMINATE(createCommandList(context, device, cmdList)); // Create two shared buffers constexpr size_t allocSize = 4096; ze_device_mem_alloc_desc_t deviceDesc = {ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC}; deviceDesc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED; deviceDesc.ordinal = 0; ze_host_mem_alloc_desc_t hostDesc = {ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC}; hostDesc.flags = ZE_HOST_MEM_ALLOC_FLAG_BIAS_UNCACHED; void *srcBuffer = nullptr; SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &srcBuffer)); void *dstBuffer = nullptr; SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &dstBuffer)); // Initialize memory constexpr uint8_t val = 55; memset(srcBuffer, val, allocSize); memset(dstBuffer, 0, allocSize); ze_module_handle_t module = nullptr; ze_kernel_handle_t kernel = nullptr; std::ifstream file("copy_buffer_to_buffer.spv", std::ios::binary); if (file.is_open()) { file.seekg(0, file.end); auto length = file.tellg(); file.seekg(0, file.beg); std::unique_ptr spirvInput(new char[length]); file.read(spirvInput.get(), length); ze_module_desc_t moduleDesc = {ZE_STRUCTURE_TYPE_MODULE_DESC}; ze_module_build_log_handle_t buildlog; moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; moduleDesc.pInputModule = reinterpret_cast(spirvInput.get()); moduleDesc.inputSize = length; moduleDesc.pBuildFlags = ""; if (zeModuleCreate(context, device, &moduleDesc, &module, &buildlog) != ZE_RESULT_SUCCESS) { size_t szLog = 0; zeModuleBuildLogGetString(buildlog, &szLog, nullptr); char *strLog = (char *)malloc(szLog); zeModuleBuildLogGetString(buildlog, &szLog, strLog); std::cout << "Build log:" << strLog << std::endl; free(strLog); } SUCCESS_OR_TERMINATE(zeModuleBuildLogDestroy(buildlog)); ze_kernel_desc_t kernelDesc = {ZE_STRUCTURE_TYPE_KERNEL_DESC}; kernelDesc.pKernelName = "CopyBufferToBufferBytes"; SUCCESS_OR_TERMINATE(zeKernelCreate(module, &kernelDesc, &kernel)); uint32_t groupSizeX = 32u; uint32_t groupSizeY = 1u; uint32_t groupSizeZ = 1u; SUCCESS_OR_TERMINATE(zeKernelSuggestGroupSize(kernel, allocSize, 1U, 1U, &groupSizeX, &groupSizeY, &groupSizeZ)); SUCCESS_OR_TERMINATE(zeKernelSetGroupSize(kernel, groupSizeX, groupSizeY, groupSizeZ)); uint32_t offset = 0; SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 1, sizeof(dstBuffer), &dstBuffer)); SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 0, sizeof(srcBuffer), &srcBuffer)); SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 2, sizeof(uint32_t), &offset)); SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 3, sizeof(uint32_t), &offset)); SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 4, sizeof(uint32_t), &offset)); ze_group_count_t dispatchTraits; dispatchTraits.groupCountX = allocSize / groupSizeX; dispatchTraits.groupCountY = 1u; dispatchTraits.groupCountZ = 1u; SUCCESS_OR_TERMINATE(zeCommandListAppendLaunchKernel(cmdList, kernel, &dispatchTraits, nullptr, 0, nullptr)); file.close(); } else { // Perform a GPU copy SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, dstBuffer, srcBuffer, allocSize, nullptr, 0, nullptr)); } // Close list and submit for execution SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); // Validate outputValidationSuccessful = true; if (memcmp(dstBuffer, srcBuffer, allocSize)) { outputValidationSuccessful = false; uint8_t *srcCharBuffer = static_cast(srcBuffer); uint8_t *dstCharBuffer = static_cast(dstBuffer); for (size_t i = 0; i < allocSize; i++) { if (srcCharBuffer[i] != dstCharBuffer[i]) { std::cout << "srcBuffer[" << i << "] = " << static_cast(srcCharBuffer[i]) << " not equal to " << "dstBuffer[" << i << "] = " << static_cast(dstCharBuffer[i]) << "\n"; break; } } } // Cleanup SUCCESS_OR_TERMINATE(zeMemFree(context, dstBuffer)); SUCCESS_OR_TERMINATE(zeMemFree(context, srcBuffer)); SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue)); } int main(int argc, char *argv[]) { verbose = isVerbose(argc, argv); ze_context_handle_t context = nullptr; auto devices = zelloInitContextAndGetDevices(context); auto device = devices[0]; bool outputValidationSuccessful; ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; SUCCESS_OR_TERMINATE(zeDeviceGetProperties(device, &deviceProperties)); std::cout << "Device : \n" << " * name : " << deviceProperties.name << "\n" << " * vendorId : " << std::hex << deviceProperties.vendorId << "\n"; executeGpuKernelAndValidate(context, device, outputValidationSuccessful); SUCCESS_OR_TERMINATE(zeContextDestroy(context)); std::cout << "\nZello World Results validation " << (outputValidationSuccessful ? "PASSED" : "FAILED") << "\n"; return 0; } compute-runtime-22.14.22890/level_zero/core/test/black_box_tests/zello_world_jitc_ocloc.cpp000066400000000000000000000161241422164147700320310ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "zello_common.h" #include "zello_compile.h" extern bool verbose; bool verbose = false; const char *module = R"===( __kernel void kernel_copy(__global char *dst, __global char *src){ uint gid = get_global_id(0); dst[gid] = src[gid]; } )==="; void executeKernelAndValidate(ze_context_handle_t context, ze_device_handle_t &device, bool &outputValidationSuccessful) { ze_command_queue_handle_t cmdQueue; ze_command_queue_desc_t cmdQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC}; ze_command_list_handle_t cmdList; cmdQueueDesc.ordinal = getCommandQueueOrdinal(device); cmdQueueDesc.index = 0; cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; SUCCESS_OR_TERMINATE(zeCommandQueueCreate(context, device, &cmdQueueDesc, &cmdQueue)); SUCCESS_OR_TERMINATE(createCommandList(context, device, cmdList)); // Create two shared buffers constexpr size_t allocSize = 4096; ze_device_mem_alloc_desc_t deviceDesc = {ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC}; deviceDesc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED; deviceDesc.ordinal = 0; ze_host_mem_alloc_desc_t hostDesc = {ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC}; hostDesc.flags = ZE_HOST_MEM_ALLOC_FLAG_BIAS_UNCACHED; void *srcBuffer = nullptr; SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &srcBuffer)); void *dstBuffer = nullptr; SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &dstBuffer)); // Initialize memory constexpr uint8_t val = 55; memset(srcBuffer, val, allocSize); memset(dstBuffer, 0, allocSize); std::string buildLog; auto spirV = compileToSpirV(module, "", buildLog); if (buildLog.size() > 0) { std::cout << "Build log " << buildLog; } SUCCESS_OR_TERMINATE((0 == spirV.size())); ze_module_handle_t module = nullptr; ze_kernel_handle_t kernel = nullptr; ze_module_desc_t moduleDesc = {ZE_STRUCTURE_TYPE_MODULE_DESC}; ze_module_build_log_handle_t buildlog; moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; moduleDesc.pInputModule = spirV.data(); moduleDesc.inputSize = spirV.size(); moduleDesc.pBuildFlags = ""; if (zeModuleCreate(context, device, &moduleDesc, &module, &buildlog) != ZE_RESULT_SUCCESS) { size_t szLog = 0; zeModuleBuildLogGetString(buildlog, &szLog, nullptr); char *strLog = (char *)malloc(szLog); zeModuleBuildLogGetString(buildlog, &szLog, strLog); std::cout << "Build log:" << strLog << std::endl; free(strLog); } SUCCESS_OR_TERMINATE(zeModuleBuildLogDestroy(buildlog)); ze_kernel_desc_t kernelDesc = {ZE_STRUCTURE_TYPE_KERNEL_DESC}; kernelDesc.pKernelName = "kernel_copy"; SUCCESS_OR_TERMINATE(zeKernelCreate(module, &kernelDesc, &kernel)); ze_kernel_properties_t kernProps{ZE_STRUCTURE_TYPE_KERNEL_PROPERTIES}; SUCCESS_OR_TERMINATE(zeKernelGetProperties(kernel, &kernProps)); std::cout << "Kernel : \n" << " * name : " << kernelDesc.pKernelName << "\n" << " * uuid.mid : " << kernProps.uuid.mid << "\n" << " * uuid.kid : " << kernProps.uuid.kid << "\n" << " * maxSubgroupSize : " << kernProps.maxSubgroupSize << "\n" << " * localMemSize : " << kernProps.localMemSize << "\n" << " * spillMemSize : " << kernProps.spillMemSize << "\n" << " * privateMemSize : " << kernProps.privateMemSize << "\n" << " * maxNumSubgroups : " << kernProps.maxNumSubgroups << "\n" << " * numKernelArgs : " << kernProps.numKernelArgs << "\n" << " * requiredSubgroupSize : " << kernProps.requiredSubgroupSize << "\n" << " * requiredNumSubGroups : " << kernProps.requiredNumSubGroups << "\n" << " * requiredGroupSizeX : " << kernProps.requiredGroupSizeX << "\n" << " * requiredGroupSizeY : " << kernProps.requiredGroupSizeY << "\n" << " * requiredGroupSizeZ : " << kernProps.requiredGroupSizeZ << "\n"; uint32_t groupSizeX = 32u; uint32_t groupSizeY = 1u; uint32_t groupSizeZ = 1u; SUCCESS_OR_TERMINATE(zeKernelSuggestGroupSize(kernel, allocSize, 1U, 1U, &groupSizeX, &groupSizeY, &groupSizeZ)); SUCCESS_OR_TERMINATE(zeKernelSetGroupSize(kernel, groupSizeX, groupSizeY, groupSizeZ)); SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 0, sizeof(dstBuffer), &dstBuffer)); SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 1, sizeof(srcBuffer), &srcBuffer)); ze_group_count_t dispatchTraits; dispatchTraits.groupCountX = allocSize / groupSizeX; dispatchTraits.groupCountY = 1u; dispatchTraits.groupCountZ = 1u; SUCCESS_OR_TERMINATE(zeCommandListAppendLaunchKernel(cmdList, kernel, &dispatchTraits, nullptr, 0, nullptr)); // Close list and submit for execution SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); // Validate outputValidationSuccessful = true; if (memcmp(dstBuffer, srcBuffer, allocSize)) { outputValidationSuccessful = false; uint8_t *srcCharBuffer = static_cast(srcBuffer); uint8_t *dstCharBuffer = static_cast(dstBuffer); for (size_t i = 0; i < allocSize; i++) { if (srcCharBuffer[i] != dstCharBuffer[i]) { std::cout << "srcBuffer[" << i << "] = " << std::dec << static_cast(srcCharBuffer[i]) << " not equal to " << "dstBuffer[" << i << "] = " << std::dec << static_cast(dstCharBuffer[i]) << "\n"; break; } } } // Cleanup SUCCESS_OR_TERMINATE(zeKernelDestroy(kernel)); SUCCESS_OR_TERMINATE(zeModuleDestroy(module)); SUCCESS_OR_TERMINATE(zeMemFree(context, dstBuffer)); SUCCESS_OR_TERMINATE(zeMemFree(context, srcBuffer)); SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue)); } int main(int argc, char *argv[]) { verbose = isVerbose(argc, argv); ze_context_handle_t context = nullptr; auto devices = zelloInitContextAndGetDevices(context); auto device = devices[0]; bool outputValidationSuccessful; ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; SUCCESS_OR_TERMINATE(zeDeviceGetProperties(device, &deviceProperties)); std::cout << "Device : \n" << " * name : " << deviceProperties.name << "\n" << " * vendorId : " << std::hex << deviceProperties.vendorId << "\n"; executeKernelAndValidate(context, device, outputValidationSuccessful); SUCCESS_OR_TERMINATE(zeContextDestroy(context)); std::cout << "\nZello World JIT Results validation " << (outputValidationSuccessful ? "PASSED" : "FAILED") << "\n"; return 0; } compute-runtime-22.14.22890/level_zero/core/test/black_box_tests/zello_world_usm.cpp000066400000000000000000000160721422164147700305270ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include #include "zello_common.h" #include #include #include bool verbose = false; bool useSyncQueue = false; int main(int argc, char *argv[]) { verbose = isVerbose(argc, argv); useSyncQueue = isSyncQueueEnabled(argc, argv); bool outputValidationSuccessful = false; // 1. Set-up constexpr size_t allocSize = 4096 + 7; constexpr size_t bytesPerThread = sizeof(char); constexpr size_t numThreads = allocSize / bytesPerThread; ze_module_handle_t module; ze_kernel_handle_t kernel; ze_command_queue_handle_t cmdQueue; void *srcBuffer = nullptr; void *dstBuffer = nullptr; std::ifstream file("copy_buffer_to_buffer.spv", std::ios::binary); if (!file.is_open()) { std::cout << "Unable to open copy_buffer_to_buffer.spv file" << std::endl; std::cout << "\nZello World USM Results validation " << (outputValidationSuccessful ? "PASSED" : "FAILED") << "\n"; return -1; } ze_context_handle_t context = nullptr; ze_driver_handle_t driverHandle = nullptr; auto devices = zelloInitContextAndGetDevices(context, driverHandle); auto device = devices[0]; ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; SUCCESS_OR_TERMINATE(zeDeviceGetProperties(device, &deviceProperties)); std::cout << "Device : \n" << " * name : " << deviceProperties.name << "\n" << " * vendorId : " << std::hex << deviceProperties.vendorId << "\n"; file.seekg(0, file.end); auto length = file.tellg(); file.seekg(0, file.beg); std::unique_ptr spirvInput(new char[length]); file.read(spirvInput.get(), length); ze_module_desc_t moduleDesc = {ZE_STRUCTURE_TYPE_MODULE_DESC}; ze_module_build_log_handle_t buildlog; moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; moduleDesc.pInputModule = reinterpret_cast(spirvInput.get()); moduleDesc.inputSize = length; moduleDesc.pBuildFlags = ""; if (zeModuleCreate(context, device, &moduleDesc, &module, &buildlog) != ZE_RESULT_SUCCESS) { size_t szLog = 0; zeModuleBuildLogGetString(buildlog, &szLog, nullptr); char *strLog = (char *)malloc(szLog); zeModuleBuildLogGetString(buildlog, &szLog, strLog); std::cout << "Build log:" << strLog << std::endl; free(strLog); } SUCCESS_OR_TERMINATE(zeModuleBuildLogDestroy(buildlog)); ze_kernel_desc_t kernelDesc = {ZE_STRUCTURE_TYPE_KERNEL_DESC}; kernelDesc.pKernelName = "CopyBufferToBufferBytes"; SUCCESS_OR_TERMINATE(zeKernelCreate(module, &kernelDesc, &kernel)); uint32_t groupSizeX = 32u; uint32_t groupSizeY = 1u; uint32_t groupSizeZ = 1u; SUCCESS_OR_TERMINATE(zeKernelSuggestGroupSize(kernel, numThreads, 1U, 1U, &groupSizeX, &groupSizeY, &groupSizeZ)); SUCCESS_OR_TERMINATE_BOOL(numThreads % groupSizeX == 0); if (verbose) { std::cout << "Group size : (" << groupSizeX << ", " << groupSizeY << ", " << groupSizeZ << ")" << std::endl; } SUCCESS_OR_TERMINATE(zeKernelSetGroupSize(kernel, groupSizeX, groupSizeY, groupSizeZ)); ze_command_queue_desc_t cmdQueueDesc = {}; cmdQueueDesc.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC; cmdQueueDesc.pNext = nullptr; cmdQueueDesc.flags = 0; if (useSyncQueue) cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; else cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; cmdQueueDesc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL; cmdQueueDesc.ordinal = getCommandQueueOrdinal(device); cmdQueueDesc.index = 0; SUCCESS_OR_TERMINATE(zeCommandQueueCreate(context, device, &cmdQueueDesc, &cmdQueue)); ze_command_list_handle_t cmdList; SUCCESS_OR_TERMINATE(createCommandList(context, device, cmdList)); ze_device_mem_alloc_desc_t deviceDesc = {}; deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; deviceDesc.pNext = nullptr; deviceDesc.ordinal = 0; deviceDesc.flags = 0; ze_host_mem_alloc_desc_t hostDesc = {}; hostDesc.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC; hostDesc.pNext = nullptr; hostDesc.flags = 0; SUCCESS_OR_TERMINATE( zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &srcBuffer)); SUCCESS_OR_TERMINATE( zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &dstBuffer)); ze_memory_allocation_properties_t memProperties = {ZE_STRUCTURE_TYPE_MEMORY_ALLOCATION_PROPERTIES}; SUCCESS_OR_TERMINATE(zeMemGetAllocProperties(context, srcBuffer, &memProperties, &device)); SUCCESS_OR_TERMINATE_BOOL(memProperties.type == ZE_MEMORY_TYPE_SHARED); // initialize the src buffer memset(srcBuffer, 7, allocSize); // Encode run user kernel SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 0, sizeof(dstBuffer), &dstBuffer)); SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 1, sizeof(srcBuffer), &srcBuffer)); ze_group_count_t dispatchTraits; dispatchTraits.groupCountX = numThreads / groupSizeX; dispatchTraits.groupCountY = 1u; dispatchTraits.groupCountZ = 1u; if (verbose) { std::cerr << "Number of groups : (" << dispatchTraits.groupCountX << ", " << dispatchTraits.groupCountY << ", " << dispatchTraits.groupCountZ << ")" << std::endl; } SUCCESS_OR_TERMINATE_BOOL(dispatchTraits.groupCountX * groupSizeX == allocSize); SUCCESS_OR_TERMINATE(zeCommandListAppendLaunchKernel(cmdList, kernel, &dispatchTraits, nullptr, 0, nullptr)); // initialize the dst buffer after appending the kernel but before executing the lists, to // ensure page-fault manager is correctly making resident the buffers in the GPU at // execution time memset(dstBuffer, 3, allocSize); // Dispatch and wait SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); // if using async command queue, explicit sync must be used for correctness if (useSyncQueue == false) SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); // Validate outputValidationSuccessful = (0 == memcmp(dstBuffer, srcBuffer, allocSize)); SUCCESS_OR_WARNING_BOOL(outputValidationSuccessful); // Cleanup SUCCESS_OR_TERMINATE(zeMemFree(context, dstBuffer)); SUCCESS_OR_TERMINATE(zeMemFree(context, srcBuffer)); SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue)); SUCCESS_OR_TERMINATE(zeKernelDestroy(kernel)); SUCCESS_OR_TERMINATE(zeModuleDestroy(module)); SUCCESS_OR_TERMINATE(zeContextDestroy(context)); std::cout << "\nZello World USM Results validation " << (outputValidationSuccessful ? "PASSED" : "FAILED") << "\n"; return 0; } compute-runtime-22.14.22890/level_zero/core/test/common/000077500000000000000000000000001422164147700227175ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/common/CMakeLists.txt000066400000000000000000000055071422164147700254660ustar00rootroot00000000000000# # Copyright (C) 2021-2022 Intel Corporation # # SPDX-License-Identifier: MIT # include(gen_kernel.cmake) add_custom_target(prepare_test_kernels_for_l0) set_target_properties(prepare_test_kernels_for_l0 PROPERTIES FOLDER ${TARGET_NAME_L0}) set(TEST_MODULES ${CMAKE_CURRENT_SOURCE_DIR}/test_modules/test_kernel.cl ) set(TEST_KERNEL_BINDLESS_internal_options "-cl-intel-use-bindless-mode -cl-intel-use-bindless-advanced-mode" ) set(TEST_KERNEL_BINDLESS ${CMAKE_CURRENT_SOURCE_DIR}/test_modules/stateful_copy_buffer.cl ) set(l0_test_kernels_outputs) macro(macro_for_each_core_type) foreach(PLATFORM_TYPE ${PLATFORM_TYPES}) foreach(REVISION_ID ${${PLATFORM_TYPE}_${CORE_TYPE}_REVISIONS}) if(${CORE_TYPE}_HAS_${PLATFORM_TYPE}) get_family_name_with_type(${CORE_TYPE} ${PLATFORM_TYPE}) string(TOLOWER ${PLATFORM_TYPE} PLATFORM_TYPE_LOWER) set(PLATFORM_LOWER ${DEFAULT_SUPPORTED_${CORE_TYPE}_${PLATFORM_TYPE}_PLATFORM}) string(TOLOWER ${CORE_TYPE} CORE_TYPE_LOWER) level_zero_generate_kernels(l0_test_kernel_outputs ${PLATFORM_LOWER} ${family_name_with_type} ${REVISION_ID} "-g" ${TEST_MODULES}) #skip Gen8 bindless kernel generation if(NOT ("${CORE_TYPE_LOWER}" STREQUAL "gen8")) level_zero_generate_kernels_with_internal_options(l0_bindless_test_kernel_outputs ${PLATFORM_LOWER} ${family_name_with_type} "bindless" ${REVISION_ID} "-g" ${TEST_KERNEL_BINDLESS_internal_options} ${TEST_KERNEL_BINDLESS}) endif() endif() endforeach() endforeach() endmacro() apply_macro_for_each_core_type("TESTED") add_custom_target(l0_common_test_kernels DEPENDS ${l0_test_kernel_outputs} ${l0_bindless_test_kernel_outputs} copy_compiler_files) set_target_properties(l0_common_test_kernels PROPERTIES FOLDER ${TARGET_NAME_L0}) add_dependencies(prepare_test_kernels_for_l0 l0_common_test_kernels) macro(macro_for_each_core_type) foreach(BRANCH_DIR ${BRANCH_DIR_LIST}) set(ENABLE_L0_MOCKS_CPP ${NEO_SOURCE_DIR}/level_zero/core/test/unit_tests${BRANCH_DIR}${CORE_TYPE_LOWER}/enable_l0_mocks_${CORE_TYPE_LOWER}.cpp) if(EXISTS ${ENABLE_L0_MOCKS_CPP}) list(APPEND LIBULT_L0_SOURCES ${ENABLE_L0_MOCKS_CPP}) endif() endforeach() endmacro() apply_macro_for_each_core_type("TESTED") add_library(l0_libult OBJECT EXCLUDE_FROM_ALL ${LIBULT_L0_SOURCES} ) set_target_properties(l0_libult PROPERTIES POSITION_INDEPENDENT_CODE ON) set_target_properties(l0_libult PROPERTIES FOLDER ${TARGET_NAME_L0}) set_property(TARGET l0_libult APPEND_STRING PROPERTY COMPILE_FLAGS ${ASAN_FLAGS} ${TSAN_FLAGS}) target_include_directories(l0_libult PRIVATE $) target_compile_definitions(l0_libult PRIVATE $) create_project_source_tree(l0_libult) compute-runtime-22.14.22890/level_zero/core/test/common/gen_kernel.cmake000066400000000000000000000107301422164147700260330ustar00rootroot00000000000000# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # function(level_zero_generate_kernels target_list platform_name suffix revision_id options) list(APPEND results copy_compiler_files) set(relativeDir "level_zero/${suffix}/${revision_id}/test_files/${NEO_ARCH}") set(outputdir "${TargetDir}/${relativeDir}/") foreach(filepath ${ARGN}) get_filename_component(filename ${filepath} NAME) get_filename_component(basename ${filepath} NAME_WE) get_filename_component(workdir ${filepath} DIRECTORY) get_filename_component(absolute_filepath ${filepath} ABSOLUTE) set(outputpath_base "${outputdir}${basename}_${suffix}") if(NOT NEO_DISABLE_BUILTINS_COMPILATION) set(output_files ${outputpath_base}.bin ${outputpath_base}.gen ${outputpath_base}.spv ${outputpath_base}.dbg ) add_custom_command( COMMAND echo generate ${ocloc_cmd_prefix} -q -file ${absolute_filepath} -device ${platform_name} -out_dir ${outputdir} -revision_id ${revision_id} -options "${options}" OUTPUT ${output_files} COMMAND ${ocloc_cmd_prefix} -q -file ${absolute_filepath} -device ${platform_name} -out_dir ${outputdir} -revision_id ${revision_id} -options "${options}" WORKING_DIRECTORY ${workdir} DEPENDS ${filepath} ocloc ) list(APPEND ${target_list} ${output_files}) else() foreach(_file_name "bin" "gen" "spv" "dbg") set(_file_prebuilt "${NEO_KERNELS_BIN_DIR}/${relativeDir}/${basename}_${suffix}.${_file_name}") add_custom_command( OUTPUT ${outputpath_base}.${_file_name} COMMAND ${CMAKE_COMMAND} -E make_directory ${outputdir} COMMAND ${CMAKE_COMMAND} -E copy_if_different ${_file_prebuilt} ${outputdir} ) list(APPEND ${target_list} ${outputpath_base}.${_file_name}) endforeach() endif() endforeach() set(${target_list} ${${target_list}} PARENT_SCOPE) endfunction() function(level_zero_generate_kernels_with_internal_options target_list platform_name suffix prefix revision_id options internal_options) list(APPEND results copy_compiler_files) set(relativeDir "level_zero/${suffix}/${revision_id}/test_files/${NEO_ARCH}") set(outputdir "${TargetDir}/${relativeDir}/") foreach(filepath ${ARGN}) get_filename_component(filename ${filepath} NAME) get_filename_component(basename ${filepath} NAME_WE) get_filename_component(workdir ${filepath} DIRECTORY) get_filename_component(absolute_filepath ${filepath} ABSOLUTE) set(outputpath_base "${outputdir}${prefix}_${basename}_${suffix}") if(NOT NEO_DISABLE_BUILTINS_COMPILATION) set(output_files ${outputpath_base}.bin ${outputpath_base}.gen ${outputpath_base}.spv ${outputpath_base}.dbg ) set(output_name "-output" "${prefix}_${basename}") string(CONCAT options \" ${options} \" ) string(CONCAT internal_options \" ${internal_options} \" ) add_custom_command( COMMAND echo generate ${ocloc_cmd_prefix} -q -file ${absolute_filepath} -device ${platform_name} -out_dir ${outputdir} ${output_name} -revision_id ${revision_id} -options ${options} -internal_options ${internal_options} , workdir is ${workdir} OUTPUT ${output_files} COMMAND ${ocloc_cmd_prefix} -q -file ${absolute_filepath} -device ${platform_name} -out_dir ${outputdir} ${output_name} -revision_id ${revision_id} -options ${options} -internal_options ${internal_options} WORKING_DIRECTORY ${workdir} DEPENDS ${filepath} ocloc ) list(APPEND ${target_list} ${output_files}) else() foreach(_file_name "bin" "gen" "spv" "dbg") set(_file_prebuilt "${NEO_KERNELS_BIN_DIR}/${relativeDir}/${prefix}_${basename}_${suffix}.${_file_name}") add_custom_command( OUTPUT ${outputpath_base}.${_file_name} COMMAND ${CMAKE_COMMAND} -E make_directory ${outputdir} COMMAND ${CMAKE_COMMAND} -E copy_if_different ${_file_prebuilt} ${outputdir} ) list(APPEND ${target_list} ${outputpath_base}.${_file_name}) endforeach() endif() endforeach() set(${target_list} ${${target_list}} PARENT_SCOPE) endfunction() compute-runtime-22.14.22890/level_zero/core/test/common/test_modules/000077500000000000000000000000001422164147700254265ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/common/test_modules/stateful_copy_buffer.cl000066400000000000000000000003531422164147700321610ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ __kernel void StatefulCopyBuffer( const __global uchar* src, __global uchar* dst) { uint id = get_global_id(0); dst[id] = src[id]; } compute-runtime-22.14.22890/level_zero/core/test/common/test_modules/test_kernel.cl000066400000000000000000000025771422164147700303000ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; kernel void test(const global float *a, const global float *b, global float *c, read_only image2d_t input, write_only image2d_t output, sampler_t sampler) { const int global_id = get_global_id(0); const int local_id = get_local_id(0); local float a_local[16]; float sum = 0.0f; a_local[local_id] = a[local_id] + b[local_id]; barrier(CLK_LOCAL_MEM_FENCE); for (int i = 0; i < get_local_size(0); ++i) { sum += a_local[i]; } barrier(CLK_LOCAL_MEM_FENCE); c[global_id] = sum; int2 coord = {get_global_id(0), get_global_id(1)}; printf("local_id = %d, global_id = %d \n", local_id, global_id); } __kernel void test_get_global_sizes(__global uint *outGlobalSize) { outGlobalSize[0] = get_global_size(0); outGlobalSize[1] = get_global_size(1); outGlobalSize[2] = get_global_size(2); } __kernel void test_get_work_dim(__global uint *outWorkDim) { outWorkDim[0] = get_work_dim(); } __kernel void test_get_group_count(__global uint *outGroupCount) { outGroupCount[0] = get_num_groups(0); outGroupCount[1] = get_num_groups(1); outGroupCount[2] = get_num_groups(2); }compute-runtime-22.14.22890/level_zero/core/test/unit_tests/000077500000000000000000000000001422164147700236305ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/CMakeLists.txt000066400000000000000000000116271422164147700263770ustar00rootroot00000000000000# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # link_libraries(${ASAN_LIBS} ${TSAN_LIBS}) set(TARGET_NAME ${TARGET_NAME_L0}_core_tests) include(${NEO_SOURCE_DIR}/cmake/setup_ult_global_flags.cmake) function(ADD_SUPPORTED_TEST_PRODUCT_FAMILIES_DEFINITION) set(L0_TESTED_PRODUCT_FAMILIES ${ALL_TESTED_PRODUCT_FAMILY}) string(REPLACE ";" "," L0_TESTED_PRODUCT_FAMILIES "${L0_TESTED_PRODUCT_FAMILIES}") add_definitions(-DSUPPORTED_TEST_PRODUCT_FAMILIES=${L0_TESTED_PRODUCT_FAMILIES}) endfunction() ADD_SUPPORTED_TEST_PRODUCT_FAMILIES_DEFINITION() add_executable(${TARGET_NAME} ${NEO_SOURCE_DIR}/level_zero/core/source/dll/disallow_deferred_deleter.cpp ${NEO_SOURCE_DIR}/level_zero/tools/test/unit_tests/sources/debug/debug_session_helper.cpp ) target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock.h ${CMAKE_CURRENT_SOURCE_DIR}/white_box.h ${NEO_SHARED_TEST_DIRECTORY}/unit_test/tests_configuration.h ${NEO_SOURCE_DIR}/level_zero/core/test/common/gen_kernel.cmake ) target_sources(${TARGET_NAME} PRIVATE $ $ $ $ $ $ $ ) set_property(TARGET ${TARGET_NAME} APPEND_STRING PROPERTY COMPILE_FLAGS ${ASAN_FLAGS}) set_target_properties(${TARGET_NAME} PROPERTIES FOLDER ${TARGET_NAME_L0}) add_subdirectoriesL0(${CMAKE_CURRENT_SOURCE_DIR} "*") target_compile_definitions(${TARGET_NAME} PRIVATE $) target_include_directories(${TARGET_NAME} BEFORE PRIVATE $/core $/tools $/../../../../instrumentation/inc/common/instrumentation/api/ $/core/os_interface ${NEO_SHARED_TEST_DIRECTORY}/common/test_macros/header${BRANCH_DIR_SUFFIX} ${NEO_SHARED_TEST_DIRECTORY}/common/helpers/includes${BRANCH_DIR_SUFFIX} ${NEO_SHARED_TEST_DIRECTORY}/common/test_configuration/unit_tests ) if(UNIX) target_include_directories(${TARGET_NAME} BEFORE PRIVATE $/core/os_interface/linux $/tools/linux ) else() target_include_directories(${TARGET_NAME} BEFORE PRIVATE $/core/os_interface/windows $/tools/windows ) endif() if(WIN32) target_link_libraries(${TARGET_NAME} dbghelp) add_dependencies(${TARGET_NAME} mock_gdi) endif() target_link_libraries(${TARGET_NAME} ${NEO_SHARED_MOCKABLE_LIB_NAME} ${HW_LIBS_ULT} gmock-gtest ${NEO_EXTRA_LIBS} ) target_sources(${TARGET_NAME} PRIVATE $ $ $ $ $ ) if(TARGET ${BUILTINS_SPIRV_LIB_NAME}) target_sources(${TARGET_NAME} PRIVATE $ ) endif() option(L0_ULT_VERBOSE "Use the default/verbose test output" OFF) if(NOT L0_ULT_VERBOSE) set(L0_TESTS_LISTENER_OPTION "--disable_default_listener") else() set(L0_TESTS_LISTENER_OPTION "--enable_default_listener") endif() if(L0_ULT_FILTER) set(L0_TESTS_FILTER_OPTION "--gtest_filter=*${L0_ULT_FILTER}*") else() set(L0_TESTS_FILTER_OPTION "--gtest_filter=*") endif() if(MSVC) set_target_properties(${TARGET_NAME} PROPERTIES VS_DEBUGGER_COMMAND_ARGUMENTS "${L0_TESTS_FILTER_OPTION} --gtest_catch_exceptions=0 ${L0_TESTS_LISTENER_OPTION}" VS_DEBUGGER_WORKING_DIRECTORY "$(OutDir)" ) endif() add_dependencies(unit_tests ${TARGET_NAME} prepare_test_kernels_for_l0) create_source_tree(${TARGET_NAME} ${L0_ROOT_DIR}/..) compute-runtime-22.14.22890/level_zero/core/test/unit_tests/fixtures/000077500000000000000000000000001422164147700255015ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/fixtures/CMakeLists.txt000066400000000000000000000024641422164147700302470ustar00rootroot00000000000000# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(TARGET_NAME ${TARGET_NAME_L0}_fixtures) set(L0_FIXTURES_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/aub_csr_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/device_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/device_fixture.cpp ${CMAKE_CURRENT_SOURCE_DIR}/host_pointer_manager_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/module_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/memory_ipc_fixture.h ) add_library(${TARGET_NAME} OBJECT ${L0_FIXTURES_SOURCES} ${NEO_CORE_tests_compiler_mocks}) target_include_directories(${TARGET_NAME} PRIVATE $ $ $ ${NEO_SOURCE_DIR}/level_zero/core/test/unit_test ) target_compile_definitions(${TARGET_NAME} PRIVATE $) set_target_properties(${TARGET_NAME} PROPERTIES FOLDER ${TARGET_NAME_L0}) create_source_tree(${TARGET_NAME} ${L0_ROOT_DIR}) compute-runtime-22.14.22890/level_zero/core/test/unit_tests/fixtures/aub_csr_fixture.h000066400000000000000000000021631422164147700310400ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/libult/ult_aub_command_stream_receiver.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" namespace NEO { extern CommandStreamReceiverCreateFunc commandStreamReceiverFactory[2 * IGFX_MAX_CORE]; } // namespace NEO namespace L0 { namespace ult { struct AubCsrFixture : public ContextFixture { template void SetUpT() { auto csrCreateFcn = &commandStreamReceiverFactory[IGFX_MAX_CORE + NEO::defaultHwInfo->platform.eRenderCoreFamily]; variableBackup = std::make_unique>(csrCreateFcn); *csrCreateFcn = UltAubCommandStreamReceiver::create; ContextFixture::SetUp(); } template void TearDownT() { ContextFixture::TearDown(); } void SetUp() {} void TearDown() {} std::unique_ptr> variableBackup; }; } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h000066400000000000000000000065661422164147700310740ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_container/implicit_scaling.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" namespace L0 { namespace ult { class CommandListFixture : public DeviceFixture { public: void SetUp() { DeviceFixture::SetUp(); ze_result_t returnValue; commandList.reset(whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue))); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; eventPoolDesc.count = 2; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.wait = 0; eventDesc.signal = 0; eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); } void TearDown() { DeviceFixture::TearDown(); } std::unique_ptr commandList; std::unique_ptr eventPool; std::unique_ptr event; }; template struct MultiTileCommandListFixture : public SingleRootMultiSubDeviceFixture { void SetUp() { DebugManager.flags.EnableImplicitScaling.set(1); osLocalMemoryBackup = std::make_unique>(&NEO::OSInterface::osEnableLocalMemory, true); apiSupportBackup = std::make_unique>(&NEO::ImplicitScaling::apiSupport, true); SingleRootMultiSubDeviceFixture::SetUp(); ze_result_t returnValue; NEO::EngineGroupType cmdListEngineType = createCopy ? NEO::EngineGroupType::Copy : NEO::EngineGroupType::RenderCompute; if (!createImmediate) { commandList.reset(whitebox_cast(CommandList::create(productFamily, device, cmdListEngineType, 0u, returnValue))); } else { const ze_command_queue_desc_t desc = {}; commandList.reset(whitebox_cast(CommandList::createImmediate(productFamily, device, &desc, createInternal, cmdListEngineType, returnValue))); } ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; eventPoolDesc.count = 2; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.wait = 0; eventDesc.signal = 0; eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); } void TearDown() { SingleRootMultiSubDeviceFixture::TearDown(); } std::unique_ptr commandList; std::unique_ptr eventPool; std::unique_ptr event; std::unique_ptr> apiSupportBackup; std::unique_ptr> osLocalMemoryBackup; }; } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/fixtures/device_fixture.cpp000066400000000000000000000172221422164147700312160ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "shared/source/os_interface/device_factory.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/unit_test/page_fault_manager/mock_cpu_page_fault_manager.h" #include "level_zero/core/test/unit_tests/mocks/mock_built_ins.h" #include "level_zero/core/test/unit_tests/mocks/mock_context.h" #include "level_zero/core/test/unit_tests/mocks/mock_device.h" namespace L0 { namespace ult { void DeviceFixture::SetUp() { // NOLINT(readability-identifier-naming) auto executionEnvironment = MockDevice::prepareExecutionEnvironment(NEO::defaultHwInfo.get(), 0u); setupWithExecutionEnvironment(*executionEnvironment); } void DeviceFixture::setupWithExecutionEnvironment(NEO::ExecutionEnvironment &executionEnvironment) { neoDevice = NEO::MockDevice::createWithExecutionEnvironment(NEO::defaultHwInfo.get(), &executionEnvironment, 0u); mockBuiltIns = new MockBuiltins(); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->builtins.reset(mockBuiltIns); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); driverHandle = std::make_unique>(); driverHandle->initialize(std::move(devices)); device = driverHandle->devices[0]; ze_context_handle_t hContext; ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; ze_result_t res = driverHandle->createContext(&desc, 0u, nullptr, &hContext); EXPECT_EQ(ZE_RESULT_SUCCESS, res); context = static_cast(Context::fromHandle(hContext)); } void DeviceFixture::TearDown() { // NOLINT(readability-identifier-naming) context->destroy(); } void PageFaultDeviceFixture::SetUp() { // NOLINT(readability-identifier-naming) neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(NEO::defaultHwInfo.get()); auto mockBuiltIns = new MockBuiltins(); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->builtins.reset(mockBuiltIns); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); driverHandle = std::make_unique>(); driverHandle->initialize(std::move(devices)); device = driverHandle->devices[0]; ze_context_handle_t hContext; ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; ze_result_t res = driverHandle->createContext(&desc, 0u, nullptr, &hContext); EXPECT_EQ(ZE_RESULT_SUCCESS, res); context = static_cast(Context::fromHandle(hContext)); mockPageFaultManager = new MockPageFaultManager; mockMemoryManager = std::make_unique(); memoryManager = device->getDriverHandle()->getMemoryManager(); mockMemoryManager->pageFaultManager.reset(mockPageFaultManager); device->getDriverHandle()->setMemoryManager(mockMemoryManager.get()); } void PageFaultDeviceFixture::TearDown() { // NOLINT(readability-identifier-naming) device->getDriverHandle()->setMemoryManager(memoryManager); context->destroy(); } void MultiDeviceFixture::SetUp() { // NOLINT(readability-identifier-naming) DebugManager.flags.CreateMultipleRootDevices.set(numRootDevices); DebugManager.flags.CreateMultipleSubDevices.set(numSubDevices); auto executionEnvironment = new NEO::ExecutionEnvironment; auto devices = NEO::DeviceFactory::createDevices(*executionEnvironment); driverHandle = std::make_unique>(); ze_result_t res = driverHandle->initialize(std::move(devices)); EXPECT_EQ(ZE_RESULT_SUCCESS, res); ze_context_handle_t hContext; ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; res = driverHandle->createContext(&desc, 0u, nullptr, &hContext); EXPECT_EQ(ZE_RESULT_SUCCESS, res); context = static_cast(Context::fromHandle(hContext)); } void MultiDeviceFixture::TearDown() { // NOLINT(readability-identifier-naming) context->destroy(); } void ContextFixture::SetUp() { DeviceFixture::SetUp(); } void ContextFixture::TearDown() { DeviceFixture::TearDown(); } void MultipleDevicesWithCustomHwInfo::SetUp() { NEO::MockCompilerEnableGuard mock(true); VariableBackup mockDeviceFlagBackup(&MockDevice::createSingleDevice, false); std::vector> devices; NEO::ExecutionEnvironment *executionEnvironment = new NEO::ExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(numRootDevices); hwInfo = *NEO::defaultHwInfo.get(); hwInfo.gtSystemInfo.SliceCount = sliceCount; hwInfo.gtSystemInfo.SubSliceCount = subsliceCount; hwInfo.gtSystemInfo.EUCount = subsliceCount * numEuPerSubslice; hwInfo.gtSystemInfo.ThreadCount = subsliceCount * numEuPerSubslice * numThreadsPerEu; hwInfo.gtSystemInfo.MaxEuPerSubSlice = numEuPerSubslice; hwInfo.gtSystemInfo.NumThreadsPerEu = numThreadsPerEu; hwInfo.gtSystemInfo.MaxSlicesSupported = sliceCount; hwInfo.gtSystemInfo.MaxSubSlicesSupported = sliceCount * subsliceCount; hwInfo.gtSystemInfo.MaxDualSubSlicesSupported = sliceCount * subsliceCount; hwInfo.gtSystemInfo.MultiTileArchInfo.IsValid = 1; hwInfo.gtSystemInfo.MultiTileArchInfo.TileCount = numSubDevices; hwInfo.gtSystemInfo.MultiTileArchInfo.Tile0 = 1; hwInfo.gtSystemInfo.MultiTileArchInfo.Tile1 = 1; for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(&hwInfo); } memoryManager = new NEO::OsAgnosticMemoryManager(*executionEnvironment); executionEnvironment->memoryManager.reset(memoryManager); deviceFactory = std::make_unique(numRootDevices, numSubDevices, *executionEnvironment); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { devices.push_back(std::unique_ptr(deviceFactory->rootDevices[i])); } driverHandle = std::make_unique>(); driverHandle->initialize(std::move(devices)); } void SingleRootMultiSubDeviceFixture::SetUp() { MultiDeviceFixture::numRootDevices = 1u; MultiDeviceFixture::SetUp(); device = driverHandle->devices[0]; neoDevice = device->getNEODevice(); } void GetMemHandlePtrTestFixture::SetUp() { NEO::MockCompilerEnableGuard mock(true); neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(NEO::defaultHwInfo.get()); auto mockBuiltIns = new MockBuiltins(); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->builtins.reset(mockBuiltIns); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); driverHandle = std::make_unique(); driverHandle->initialize(std::move(devices)); prevMemoryManager = driverHandle->getMemoryManager(); currMemoryManager = new MemoryManagerMemHandleMock(); driverHandle->setMemoryManager(currMemoryManager); device = driverHandle->devices[0]; context = std::make_unique(driverHandle.get()); EXPECT_NE(context, nullptr); context->getDevices().insert(std::make_pair(device->toHandle(), device)); auto neoDevice = device->getNEODevice(); context->rootDeviceIndices.insert(neoDevice->getRootDeviceIndex()); context->deviceBitfields.insert({neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield()}); } void GetMemHandlePtrTestFixture::TearDown() { driverHandle->setMemoryManager(prevMemoryManager); delete currMemoryManager; } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/fixtures/device_fixture.h000066400000000000000000000150111422164147700306550ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_compilers.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "level_zero/core/test/unit_tests/mocks/mock_built_ins.h" #include "level_zero/core/test/unit_tests/mocks/mock_driver_handle.h" class MockPageFaultManager; namespace NEO { struct UltDeviceFactory; } // namespace NEO namespace L0 { struct Context; struct Device; struct ContextImp; namespace ult { struct MockDriverModel : NEO::DriverModel { size_t maxAllocSize; MockDriverModel(size_t maxAllocSize) : NEO::DriverModel(NEO::DriverModelType::UNKNOWN), maxAllocSize(maxAllocSize) {} void setGmmInputArgs(void *args) override {} uint32_t getDeviceHandle() const override { return {}; } PhysicalDevicePciBusInfo getPciBusInfo() const override { return {}; } size_t getMaxMemAllocSize() const override { return maxAllocSize; } bool isGpuHangDetected(NEO::OsContext &osContext) override { return false; } PhyicalDevicePciSpeedInfo getPciSpeedInfo() const override { return {}; } }; struct MockDriverModelWDDM : NEO::DriverModel { size_t maxAllocSize; MockDriverModelWDDM(size_t maxAllocSize) : NEO::DriverModel(NEO::DriverModelType::WDDM), maxAllocSize(maxAllocSize) {} void setGmmInputArgs(void *args) override {} uint32_t getDeviceHandle() const override { return {}; } PhysicalDevicePciBusInfo getPciBusInfo() const override { return {}; } size_t getMaxMemAllocSize() const override { return maxAllocSize; } PhyicalDevicePciSpeedInfo getPciSpeedInfo() const override { return {}; } bool isGpuHangDetected(NEO::OsContext &osContext) override { return false; } }; struct MockDriverModelDRM : NEO::DriverModel { size_t maxAllocSize; MockDriverModelDRM(size_t maxAllocSize) : NEO::DriverModel(NEO::DriverModelType::DRM), maxAllocSize(maxAllocSize) {} void setGmmInputArgs(void *args) override {} uint32_t getDeviceHandle() const override { return {}; } PhysicalDevicePciBusInfo getPciBusInfo() const override { return {}; } size_t getMaxMemAllocSize() const override { return maxAllocSize; } PhyicalDevicePciSpeedInfo getPciSpeedInfo() const override { return {}; } bool isGpuHangDetected(NEO::OsContext &osContext) override { return false; } }; struct ContextShareableMock : public L0::ContextImp { ContextShareableMock(L0::DriverHandleImp *driverHandle) : L0::ContextImp(driverHandle) {} bool isShareableMemory(const void *pNext, bool exportableMemory, NEO::Device *neoDevice) override { return true; } }; struct DeviceFixture { NEO::MockCompilerEnableGuard compilerMock = NEO::MockCompilerEnableGuard(true); void SetUp(); // NOLINT(readability-identifier-naming) void TearDown(); // NOLINT(readability-identifier-naming) void setupWithExecutionEnvironment(NEO::ExecutionEnvironment &executionEnvironment); std::unique_ptr> driverHandle; NEO::MockDevice *neoDevice = nullptr; L0::Device *device = nullptr; L0::ContextImp *context = nullptr; MockBuiltins *mockBuiltIns = nullptr; }; struct DriverHandleGetMemHandlePtrMock : public L0::DriverHandleImp { void *importFdHandle(ze_device_handle_t hDevice, ze_ipc_memory_flags_t flags, uint64_t handle, NEO::GraphicsAllocation **pAloc) override { if (failHandleLookup) { return nullptr; } return &mockFd; } void *importNTHandle(ze_device_handle_t hDevice, void *handle) override { if (failHandleLookup) { return nullptr; } return &mockHandle; } uint64_t mockHandle = 57; int mockFd = 57; bool failHandleLookup = false; }; class MemoryManagerMemHandleMock : public MockMemoryManager { public: bool isNTHandle(osHandle handle, uint32_t rootDeviceIndex) override { return NTHandle; }; bool NTHandle = false; }; struct GetMemHandlePtrTestFixture { NEO::MockCompilerEnableGuard compilerMock = NEO::MockCompilerEnableGuard(true); void SetUp(); // NOLINT(readability-identifier-naming) void TearDown(); // NOLINT(readability-identifier-naming) NEO::MemoryManager *prevMemoryManager = nullptr; MemoryManagerMemHandleMock *currMemoryManager = nullptr; std::unique_ptr driverHandle; NEO::MockDevice *neoDevice = nullptr; L0::Device *device = nullptr; std::unique_ptr context; }; struct PageFaultDeviceFixture { NEO::MockCompilerEnableGuard compilerMock = NEO::MockCompilerEnableGuard(true); void SetUp(); // NOLINT(readability-identifier-naming) void TearDown(); // NOLINT(readability-identifier-naming) std::unique_ptr> driverHandle; std::unique_ptr mockMemoryManager; NEO::MockDevice *neoDevice = nullptr; L0::Device *device = nullptr; L0::ContextImp *context = nullptr; MockPageFaultManager *mockPageFaultManager = nullptr; NEO::MemoryManager *memoryManager = nullptr; }; struct MultiDeviceFixture { NEO::MockCompilerEnableGuard compilerMock = NEO::MockCompilerEnableGuard(true); void SetUp(); // NOLINT(readability-identifier-naming) void TearDown(); // NOLINT(readability-identifier-naming) DebugManagerStateRestore restorer; std::unique_ptr> driverHandle; std::vector devices; uint32_t numRootDevices = 4u; uint32_t numSubDevices = 2u; L0::ContextImp *context = nullptr; }; struct SingleRootMultiSubDeviceFixture : public MultiDeviceFixture { void SetUp(); L0::Device *device = nullptr; NEO::Device *neoDevice = nullptr; }; struct ContextFixture : DeviceFixture { void SetUp(); void TearDown(); }; struct MultipleDevicesWithCustomHwInfo { void SetUp(); void TearDown() {} NEO::HardwareInfo hwInfo; const uint32_t numSubslicesPerSlice = 4; const uint32_t numEuPerSubslice = 8; const uint32_t numThreadsPerEu = 7; const uint32_t sliceCount = 2; const uint32_t subsliceCount = 8; std::unique_ptr> driverHandle; NEO::OsAgnosticMemoryManager *memoryManager = nullptr; std::unique_ptr deviceFactory; const uint32_t numRootDevices = 1u; const uint32_t numSubDevices = 2u; }; } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/fixtures/host_pointer_manager_fixture.h000066400000000000000000000071051422164147700336320ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/constants.h" #include "shared/source/os_interface/device_factory.h" #include "shared/source/os_interface/os_inc_base.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/mocks/mock_compilers.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_memory_operations_handler.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_built_ins.h" #include "level_zero/core/test/unit_tests/mocks/mock_driver_handle.h" #include "level_zero/core/test/unit_tests/mocks/mock_host_pointer_manager.h" namespace L0 { namespace ult { struct HostPointerManagerFixure { void SetUp() { NEO::MockCompilerEnableGuard mock(true); NEO::DeviceVector devices; neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(NEO::defaultHwInfo.get()); auto mockBuiltIns = new MockBuiltins(); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->builtins.reset(mockBuiltIns); neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->memoryOperationsInterface = std::make_unique(); mockMemoryInterface = static_cast( neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->memoryOperationsInterface.get()); devices.push_back(std::unique_ptr(neoDevice)); hostDriverHandle = std::make_unique(); hostDriverHandle->initialize(std::move(devices)); device = hostDriverHandle->devices[0]; openHostPointerManager = static_cast(hostDriverHandle->hostPointerManager.get()); heapPointer = hostDriverHandle->getMemoryManager()->allocateSystemMemory(heapSize, MemoryConstants::pageSize); ASSERT_NE(nullptr, heapPointer); ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; ze_result_t ret = hostDriverHandle->createContext(&desc, 0u, nullptr, &hContext); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); context = L0::Context::fromHandle(hContext); } void TearDown() { context->destroy(); hostDriverHandle->getMemoryManager()->freeSystemMemory(heapPointer); } DebugManagerStateRestore debugRestore; std::unique_ptr hostDriverHandle; L0::ult::HostPointerManager *openHostPointerManager = nullptr; NEO::MockDevice *neoDevice = nullptr; L0::Device *device = nullptr; NEO::MockMemoryOperationsHandlerTests *mockMemoryInterface = nullptr; ze_context_handle_t hContext; L0::Context *context; void *heapPointer = nullptr; size_t heapSize = 4 * MemoryConstants::pageSize; }; struct ForceDisabledHostPointerManagerFixure : public HostPointerManagerFixure { void SetUp() { DebugManager.flags.EnableHostPointerImport.set(0); HostPointerManagerFixure::SetUp(); } void TearDown() { HostPointerManagerFixure::TearDown(); } }; struct ForceEnabledHostPointerManagerFixure : public HostPointerManagerFixure { void SetUp() { DebugManager.flags.EnableHostPointerImport.set(1); HostPointerManagerFixure::SetUp(); } void TearDown() { HostPointerManagerFixure::TearDown(); } }; } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/fixtures/memory_ipc_fixture.h000066400000000000000000000576711422164147700316030ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/file_io.h" #include "shared/source/helpers/string.h" #include "shared/source/memory_manager/memory_operations_status.h" #include "shared/source/os_interface/device_factory.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/test_files.h" #include "shared/test/common/mocks/mock_compilers.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/cmdlist/cmdlist_hw.h" #include "level_zero/core/source/context/context_imp.h" #include "level_zero/core/source/device/device_imp.h" #include "level_zero/core/source/driver/host_pointer_manager.h" #include "level_zero/core/source/hw_helpers/l0_hw_helper.h" #include "level_zero/core/source/module/module.h" #include "level_zero/core/source/module/module_imp.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_built_ins.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_kernel.h" namespace L0 { namespace ult { struct DriverHandleGetFdMock : public L0::DriverHandleImp { void *importFdHandle(ze_device_handle_t hDevice, ze_ipc_memory_flags_t flags, uint64_t handle, NEO::GraphicsAllocation **pAloc) override { if (mockFd == allocationMap.second) { return allocationMap.first; } return nullptr; } const int mockFd = 57; std::pair allocationMap; }; struct ContextFdMock : public L0::ContextImp { ContextFdMock(DriverHandleGetFdMock *inDriverHandle) : L0::ContextImp(static_cast(inDriverHandle)) { driverHandle = inDriverHandle; } ze_result_t allocDeviceMem(ze_device_handle_t hDevice, const ze_device_mem_alloc_desc_t *deviceDesc, size_t size, size_t alignment, void **ptr) override { ze_result_t res = L0::ContextImp::allocDeviceMem(hDevice, deviceDesc, size, alignment, ptr); if (ZE_RESULT_SUCCESS == res) { driverHandle->allocationMap.first = *ptr; driverHandle->allocationMap.second = driverHandle->mockFd; } return res; } ze_result_t getMemAllocProperties(const void *ptr, ze_memory_allocation_properties_t *pMemAllocProperties, ze_device_handle_t *phDevice) override { ze_result_t res = ContextImp::getMemAllocProperties(ptr, pMemAllocProperties, phDevice); if (ZE_RESULT_SUCCESS == res && pMemAllocProperties->pNext) { ze_base_properties_t *baseProperties = reinterpret_cast(pMemAllocProperties->pNext); if (baseProperties->stype == ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_FD) { ze_external_memory_export_fd_t *extendedMemoryExportProperties = reinterpret_cast(pMemAllocProperties->pNext); extendedMemoryExportProperties->fd = driverHandle->mockFd; } } return res; } ze_result_t getImageAllocProperties(Image *image, ze_image_allocation_ext_properties_t *pAllocProperties) override { ze_result_t res = ContextImp::getImageAllocProperties(image, pAllocProperties); if (ZE_RESULT_SUCCESS == res && pAllocProperties->pNext) { ze_base_properties_t *baseProperties = reinterpret_cast(pAllocProperties->pNext); if (baseProperties->stype == ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_FD) { ze_external_memory_export_fd_t *extendedMemoryExportProperties = reinterpret_cast(pAllocProperties->pNext); extendedMemoryExportProperties->fd = driverHandle->mockFd; } } return res; } ze_result_t closeIpcMemHandle(const void *ptr) override { return ZE_RESULT_SUCCESS; } DriverHandleGetFdMock *driverHandle = nullptr; }; struct MemoryExportImportTest : public ::testing::Test { void SetUp() override { NEO::MockCompilerEnableGuard mock(true); neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(NEO::defaultHwInfo.get()); auto mockBuiltIns = new MockBuiltins(); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->builtins.reset(mockBuiltIns); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); driverHandle = std::make_unique(); driverHandle->initialize(std::move(devices)); device = driverHandle->devices[0]; context = std::make_unique(driverHandle.get()); EXPECT_NE(context, nullptr); context->getDevices().insert(std::make_pair(device->toHandle(), device)); auto neoDevice = device->getNEODevice(); context->rootDeviceIndices.insert(neoDevice->getRootDeviceIndex()); context->deviceBitfields.insert({neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield()}); } void TearDown() override { } std::unique_ptr driverHandle; NEO::MockDevice *neoDevice = nullptr; L0::Device *device = nullptr; ze_context_handle_t hContext; std::unique_ptr context; }; struct DriverHandleGetMemHandleMock : public L0::DriverHandleImp { void *importNTHandle(ze_device_handle_t hDevice, void *handle) override { if (mockHandle == allocationHandleMap.second) { return allocationHandleMap.first; } return nullptr; } void *importFdHandle(ze_device_handle_t hDevice, ze_ipc_memory_flags_t flags, uint64_t handle, NEO::GraphicsAllocation **pAloc) override { if (mockFd == allocationFdMap.second) { return allocationFdMap.first; } return nullptr; } const int mockFd = 57; std::pair allocationFdMap; uint64_t mockHandle = 57; std::pair allocationHandleMap; }; struct ContextMemHandleMock : public L0::ContextImp { ContextMemHandleMock(DriverHandleGetMemHandleMock *inDriverHandle) : L0::ContextImp(static_cast(inDriverHandle)) { driverHandle = inDriverHandle; } ze_result_t allocDeviceMem(ze_device_handle_t hDevice, const ze_device_mem_alloc_desc_t *deviceDesc, size_t size, size_t alignment, void **ptr) override { ze_result_t res = L0::ContextImp::allocDeviceMem(hDevice, deviceDesc, size, alignment, ptr); if (ZE_RESULT_SUCCESS == res) { driverHandle->allocationFdMap.first = *ptr; driverHandle->allocationFdMap.second = driverHandle->mockFd; driverHandle->allocationHandleMap.first = *ptr; driverHandle->allocationHandleMap.second = driverHandle->mockHandle; } return res; } ze_result_t getMemAllocProperties(const void *ptr, ze_memory_allocation_properties_t *pMemAllocProperties, ze_device_handle_t *phDevice) override { ze_result_t res = ContextImp::getMemAllocProperties(ptr, pMemAllocProperties, phDevice); if (ZE_RESULT_SUCCESS == res && pMemAllocProperties->pNext) { ze_base_properties_t *baseProperties = reinterpret_cast(pMemAllocProperties->pNext); if (baseProperties->stype == ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_FD) { ze_external_memory_export_fd_t *extendedMemoryExportProperties = reinterpret_cast(pMemAllocProperties->pNext); extendedMemoryExportProperties->fd = driverHandle->mockFd; } } return res; } ze_result_t getImageAllocProperties(Image *image, ze_image_allocation_ext_properties_t *pAllocProperties) override { ze_result_t res = ContextImp::getImageAllocProperties(image, pAllocProperties); if (ZE_RESULT_SUCCESS == res && pAllocProperties->pNext) { ze_base_properties_t *baseProperties = reinterpret_cast(pAllocProperties->pNext); if (baseProperties->stype == ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_FD) { ze_external_memory_export_fd_t *extendedMemoryExportProperties = reinterpret_cast(pAllocProperties->pNext); extendedMemoryExportProperties->fd = driverHandle->mockFd; } } return res; } ze_result_t closeIpcMemHandle(const void *ptr) override { return ZE_RESULT_SUCCESS; } DriverHandleGetMemHandleMock *driverHandle = nullptr; }; struct MemoryExportImportWSLTest : public ::testing::Test { void SetUp() override { NEO::MockCompilerEnableGuard mock(true); neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(NEO::defaultHwInfo.get()); auto mockBuiltIns = new MockBuiltins(); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->builtins.reset(mockBuiltIns); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); driverHandle = std::make_unique(); prevMemoryManager = driverHandle->getMemoryManager(); currMemoryManager = new MemoryManagerMemHandleMock(); driverHandle->setMemoryManager(currMemoryManager); driverHandle->initialize(std::move(devices)); device = driverHandle->devices[0]; context = std::make_unique(driverHandle.get()); EXPECT_NE(context, nullptr); context->getDevices().insert(std::make_pair(device->toHandle(), device)); auto neoDevice = device->getNEODevice(); context->rootDeviceIndices.insert(neoDevice->getRootDeviceIndex()); context->deviceBitfields.insert({neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield()}); } void TearDown() override { driverHandle->setMemoryManager(prevMemoryManager); delete currMemoryManager; } std::unique_ptr driverHandle; NEO::MockDevice *neoDevice = nullptr; L0::Device *device = nullptr; ze_context_handle_t hContext; std::unique_ptr context; NEO::MemoryManager *prevMemoryManager = nullptr; MemoryManagerMemHandleMock *currMemoryManager = nullptr; }; struct DriverHandleGetWinHandleMock : public L0::DriverHandleImp { void *importNTHandle(ze_device_handle_t hDevice, void *handle) override { if (mockHandle == allocationMap.second) { return allocationMap.first; } return nullptr; } uint64_t mockHandle = 57; std::pair allocationMap; }; struct ContextHandleMock : public L0::ContextImp { ContextHandleMock(DriverHandleGetWinHandleMock *inDriverHandle) : L0::ContextImp(static_cast(inDriverHandle)) { driverHandle = inDriverHandle; } ze_result_t allocDeviceMem(ze_device_handle_t hDevice, const ze_device_mem_alloc_desc_t *deviceDesc, size_t size, size_t alignment, void **ptr) override { ze_result_t res = L0::ContextImp::allocDeviceMem(hDevice, deviceDesc, size, alignment, ptr); if (ZE_RESULT_SUCCESS == res) { driverHandle->allocationMap.first = *ptr; driverHandle->allocationMap.second = driverHandle->mockHandle; } return res; } ze_result_t getMemAllocProperties(const void *ptr, ze_memory_allocation_properties_t *pMemAllocProperties, ze_device_handle_t *phDevice) override { ze_result_t res = ContextImp::getMemAllocProperties(ptr, pMemAllocProperties, phDevice); if (ZE_RESULT_SUCCESS == res && pMemAllocProperties->pNext) { ze_external_memory_export_win32_handle_t *extendedMemoryExportProperties = reinterpret_cast(pMemAllocProperties->pNext); extendedMemoryExportProperties->handle = reinterpret_cast(reinterpret_cast(driverHandle->mockHandle)); } return res; } ze_result_t getImageAllocProperties(Image *image, ze_image_allocation_ext_properties_t *pAllocProperties) override { ze_result_t res = ContextImp::getImageAllocProperties(image, pAllocProperties); if (ZE_RESULT_SUCCESS == res && pAllocProperties->pNext) { ze_external_memory_export_win32_handle_t *extendedMemoryExportProperties = reinterpret_cast(pAllocProperties->pNext); extendedMemoryExportProperties->handle = reinterpret_cast(reinterpret_cast(driverHandle->mockHandle)); } return res; } ze_result_t freeMem(const void *ptr) override { L0::ContextImp::freeMem(ptr); return ZE_RESULT_SUCCESS; } DriverHandleGetWinHandleMock *driverHandle = nullptr; }; struct MemoryExportImportWinHandleTest : public ::testing::Test { void SetUp() override { NEO::MockCompilerEnableGuard mock(true); neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(NEO::defaultHwInfo.get()); auto mockBuiltIns = new MockBuiltins(); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->builtins.reset(mockBuiltIns); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); driverHandle = std::make_unique(); driverHandle->initialize(std::move(devices)); device = driverHandle->devices[0]; context = std::make_unique(driverHandle.get()); EXPECT_NE(context, nullptr); context->getDevices().insert(std::make_pair(device->toHandle(), device)); auto neoDevice = device->getNEODevice(); context->rootDeviceIndices.insert(neoDevice->getRootDeviceIndex()); context->deviceBitfields.insert({neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield()}); } void TearDown() override { } std::unique_ptr driverHandle; NEO::MockDevice *neoDevice = nullptr; L0::Device *device = nullptr; ze_context_handle_t hContext; std::unique_ptr context; }; struct DriverHandleGetIpcHandleMock : public DriverHandleImp { void *importFdHandle(ze_device_handle_t hDevice, ze_ipc_memory_flags_t flags, uint64_t handle, NEO::GraphicsAllocation **pAlloc) override { EXPECT_EQ(handle, static_cast(mockFd)); if (mockFd == allocationMap.second) { return allocationMap.first; } return nullptr; } const int mockFd = 999; std::pair allocationMap; }; struct ContextGetIpcHandleMock : public L0::ContextImp { ContextGetIpcHandleMock(DriverHandleGetIpcHandleMock *inDriverHandle) : L0::ContextImp(static_cast(inDriverHandle)) { driverHandle = inDriverHandle; } ze_result_t allocDeviceMem(ze_device_handle_t hDevice, const ze_device_mem_alloc_desc_t *deviceDesc, size_t size, size_t alignment, void **ptr) override { ze_result_t res = L0::ContextImp::allocDeviceMem(hDevice, deviceDesc, size, alignment, ptr); if (ZE_RESULT_SUCCESS == res) { driverHandle->allocationMap.first = *ptr; driverHandle->allocationMap.second = driverHandle->mockFd; } return res; } ze_result_t getIpcMemHandle(const void *ptr, ze_ipc_mem_handle_t *pIpcHandle) override { uint64_t handle = driverHandle->mockFd; memcpy_s(reinterpret_cast(pIpcHandle->data), sizeof(ze_ipc_mem_handle_t), &handle, sizeof(handle)); return ZE_RESULT_SUCCESS; } DriverHandleGetIpcHandleMock *driverHandle = nullptr; }; class MemoryManagerIpcMock : public NEO::MemoryManager { public: MemoryManagerIpcMock(NEO::ExecutionEnvironment &executionEnvironment) : NEO::MemoryManager(executionEnvironment) {} NEO::GraphicsAllocation *createGraphicsAllocationFromSharedHandle(osHandle handle, const AllocationProperties &properties, bool requireSpecificBitness, bool isHostIpcAllocation) override { return nullptr; } void addAllocationToHostPtrManager(NEO::GraphicsAllocation *memory) override{}; void removeAllocationFromHostPtrManager(NEO::GraphicsAllocation *memory) override{}; NEO::GraphicsAllocation *createGraphicsAllocationFromNTHandle(void *handle, uint32_t rootDeviceIndex, AllocationType allocType) override { return nullptr; }; AllocationStatus populateOsHandles(NEO::OsHandleStorage &handleStorage, uint32_t rootDeviceIndex) override { return AllocationStatus::Success; }; void cleanOsHandles(NEO::OsHandleStorage &handleStorage, uint32_t rootDeviceIndex) override{}; void freeGraphicsMemoryImpl(NEO::GraphicsAllocation *gfxAllocation) override{}; void freeGraphicsMemoryImpl(GraphicsAllocation *gfxAllocation, bool isImportedAllocation) override{}; uint64_t getSystemSharedMemory(uint32_t rootDeviceIndex) override { return 0; }; uint64_t getLocalMemorySize(uint32_t rootDeviceIndex, uint32_t deviceBitfield) override { return 0; }; double getPercentOfGlobalMemoryAvailable(uint32_t rootDeviceIndex) override { return 0; } AddressRange reserveGpuAddress(size_t size, uint32_t rootDeviceIndex) override { return {}; } void freeGpuAddress(AddressRange addressRange, uint32_t rootDeviceIndex) override{}; NEO::GraphicsAllocation *createGraphicsAllocation(OsHandleStorage &handleStorage, const NEO::AllocationData &allocationData) override { return nullptr; }; NEO::GraphicsAllocation *allocateGraphicsMemoryForNonSvmHostPtr(const NEO::AllocationData &allocationData) override { return nullptr; }; NEO::GraphicsAllocation *allocateGraphicsMemoryWithAlignment(const NEO::AllocationData &allocationData) override { return nullptr; }; NEO::GraphicsAllocation *allocateUSMHostGraphicsMemory(const NEO::AllocationData &allocationData) override { return nullptr; }; NEO::GraphicsAllocation *allocateGraphicsMemory64kb(const NEO::AllocationData &allocationData) override { return nullptr; }; NEO::GraphicsAllocation *allocate32BitGraphicsMemoryImpl(const NEO::AllocationData &allocationData, bool useLocalMemory) override { return nullptr; }; NEO::GraphicsAllocation *allocateGraphicsMemoryInDevicePool(const NEO::AllocationData &allocationData, AllocationStatus &status) override { return nullptr; }; NEO::GraphicsAllocation *allocateGraphicsMemoryWithGpuVa(const NEO::AllocationData &allocationData) override { return nullptr; }; NEO::GraphicsAllocation *allocateGraphicsMemoryForImageImpl(const NEO::AllocationData &allocationData, std::unique_ptr gmm) override { return nullptr; }; NEO::GraphicsAllocation *allocateMemoryByKMD(const NEO::AllocationData &allocationData) override { return nullptr; }; void *lockResourceImpl(NEO::GraphicsAllocation &graphicsAllocation) override { return nullptr; }; void unlockResourceImpl(NEO::GraphicsAllocation &graphicsAllocation) override{}; }; class MemoryManagerOpenIpcMock : public MemoryManagerIpcMock { public: MemoryManagerOpenIpcMock(NEO::ExecutionEnvironment &executionEnvironment) : MemoryManagerIpcMock(executionEnvironment) {} NEO::GraphicsAllocation *createGraphicsAllocationFromSharedHandle(osHandle handle, const AllocationProperties &properties, bool requireSpecificBitness, bool isHostIpcAllocation) override { if (failOnCreateGraphicsAllocationFromSharedHandle) { return nullptr; } auto alloc = new NEO::MockGraphicsAllocation(0, NEO::AllocationType::BUFFER, reinterpret_cast(sharedHandleAddress++), 0x1000, 0, sizeof(uint32_t), MemoryPool::System4KBPages); alloc->setGpuBaseAddress(0xabcd); return alloc; } NEO::GraphicsAllocation *createGraphicsAllocationFromNTHandle(void *handle, uint32_t rootDeviceIndex, AllocationType allocType) override { auto alloc = new NEO::MockGraphicsAllocation(0, NEO::AllocationType::BUFFER, reinterpret_cast(sharedHandleAddress++), 0x1000, 0, sizeof(uint32_t), MemoryPool::System4KBPages); alloc->setGpuBaseAddress(0xabcd); return alloc; }; uint64_t sharedHandleAddress = 0x1234; bool failOnCreateGraphicsAllocationFromSharedHandle = false; }; struct ContextIpcMock : public L0::ContextImp { ContextIpcMock(DriverHandleImp *inDriverHandle) : L0::ContextImp(static_cast(inDriverHandle)) { driverHandle = inDriverHandle; } ze_result_t getIpcMemHandle(const void *ptr, ze_ipc_mem_handle_t *pIpcHandle) override { uint64_t handle = mockFd; memcpy_s(reinterpret_cast(pIpcHandle->data), sizeof(ze_ipc_mem_handle_t), &handle, sizeof(handle)); return ZE_RESULT_SUCCESS; } const int mockFd = 999; }; struct MemoryOpenIpcHandleTest : public ::testing::Test { void SetUp() override { NEO::MockCompilerEnableGuard mock(true); neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(NEO::defaultHwInfo.get()); auto mockBuiltIns = new MockBuiltins(); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->builtins.reset(mockBuiltIns); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); driverHandle = std::make_unique(); driverHandle->initialize(std::move(devices)); prevMemoryManager = driverHandle->getMemoryManager(); currMemoryManager = new MemoryManagerOpenIpcMock(*neoDevice->executionEnvironment); driverHandle->setMemoryManager(currMemoryManager); device = driverHandle->devices[0]; context = std::make_unique(driverHandle.get()); EXPECT_NE(context, nullptr); context->getDevices().insert(std::make_pair(device->toHandle(), device)); auto neoDevice = device->getNEODevice(); context->rootDeviceIndices.insert(neoDevice->getRootDeviceIndex()); context->deviceBitfields.insert({neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield()}); } void TearDown() override { driverHandle->setMemoryManager(prevMemoryManager); delete currMemoryManager; } NEO::MemoryManager *prevMemoryManager = nullptr; NEO::MemoryManager *currMemoryManager = nullptr; std::unique_ptr driverHandle; NEO::MockDevice *neoDevice = nullptr; L0::Device *device = nullptr; std::unique_ptr context; }; } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/fixtures/module_fixture.h000066400000000000000000000411061422164147700307070ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_container/implicit_scaling.h" #include "shared/source/helpers/file_io.h" #include "shared/source/memory_manager/allocation_properties.h" #include "shared/source/program/kernel_info.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/test_files.h" #include "shared/test/common/mocks/mock_compilers.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/unit_test/device_binary_format/zebin_tests.h" #include "level_zero/core/source/module/module.h" #include "level_zero/core/source/module/module_imp.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_built_ins.h" #include "level_zero/core/test/unit_tests/mocks/mock_context.h" #include "level_zero/core/test/unit_tests/mocks/mock_device.h" #include "level_zero/core/test/unit_tests/mocks/mock_kernel.h" namespace L0 { namespace ult { struct ModuleImmutableDataFixture : public DeviceFixture { struct MockImmutableMemoryManager : public NEO::MockMemoryManager { MockImmutableMemoryManager(NEO::ExecutionEnvironment &executionEnvironment) : NEO::MockMemoryManager(const_cast(executionEnvironment)) {} bool copyMemoryToAllocation(NEO::GraphicsAllocation *graphicsAllocation, size_t destinationOffset, const void *memoryToCopy, size_t sizeToCopy) override { copyMemoryToAllocationCalledTimes++; return true; } uint32_t copyMemoryToAllocationCalledTimes = 0; }; struct MockImmutableData : KernelImmutableData { using KernelImmutableData::crossThreadDataSize; using KernelImmutableData::crossThreadDataTemplate; using KernelImmutableData::kernelDescriptor; using KernelImmutableData::kernelInfo; MockImmutableData(uint32_t perHwThreadPrivateMemorySize) { mockKernelDescriptor = new NEO::KernelDescriptor; mockKernelDescriptor->kernelAttributes.perHwThreadPrivateMemorySize = perHwThreadPrivateMemorySize; kernelDescriptor = mockKernelDescriptor; mockKernelInfo = new NEO::KernelInfo; mockKernelInfo->heapInfo.pKernelHeap = kernelHeap; mockKernelInfo->heapInfo.KernelHeapSize = MemoryConstants::pageSize; kernelInfo = mockKernelInfo; if (getIsaGraphicsAllocation() != nullptr) { device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(&*isaGraphicsAllocation); isaGraphicsAllocation.release(); } isaGraphicsAllocation.reset(new NEO::MockGraphicsAllocation(0, NEO::AllocationType::KERNEL_ISA, reinterpret_cast(0x1234), 0x1000, 0, sizeof(uint32_t), MemoryPool::System4KBPages)); kernelInfo->kernelAllocation = isaGraphicsAllocation.get(); } void setDevice(L0::Device *inDevice) { device = inDevice; } ~MockImmutableData() override { delete mockKernelInfo; delete mockKernelDescriptor; } void resizeExplicitArgs(size_t size) { kernelDescriptor->payloadMappings.explicitArgs.resize(size); } NEO::KernelDescriptor *mockKernelDescriptor = nullptr; char kernelHeap[MemoryConstants::pageSize] = {}; NEO::KernelInfo *mockKernelInfo = nullptr; }; struct MockModule : public L0::ModuleImp { using ModuleImp::getKernelImmutableDataVector; using ModuleImp::kernelImmDatas; using ModuleImp::maxGroupSize; using ModuleImp::translationUnit; using ModuleImp::type; MockModule(L0::Device *device, L0::ModuleBuildLog *moduleBuildLog, L0::ModuleType type, uint32_t perHwThreadPrivateMemorySize, MockImmutableData *inMockKernelImmData) : ModuleImp(device, moduleBuildLog, type), mockKernelImmData(inMockKernelImmData) { mockKernelImmData->setDevice(device); } ~MockModule() { } const KernelImmutableData *getKernelImmutableData(const char *functionName) const override { return mockKernelImmData; } void checkIfPrivateMemoryPerDispatchIsNeeded() override { const_cast(kernelImmDatas[0]->getDescriptor()).kernelAttributes.perHwThreadPrivateMemorySize = mockKernelImmData->getDescriptor().kernelAttributes.perHwThreadPrivateMemorySize; ModuleImp::checkIfPrivateMemoryPerDispatchIsNeeded(); } MockImmutableData *mockKernelImmData = nullptr; }; class MockKernel : public WhiteBox { public: using KernelImp::crossThreadData; using KernelImp::crossThreadDataSize; using KernelImp::kernelArgHandlers; using KernelImp::kernelHasIndirectAccess; using KernelImp::kernelRequiresGenerationOfLocalIdsByRuntime; using KernelImp::privateMemoryGraphicsAllocation; using KernelImp::requiredWorkgroupOrder; MockKernel(MockModule *mockModule) : WhiteBox(mockModule) { } void setBufferSurfaceState(uint32_t argIndex, void *address, NEO::GraphicsAllocation *alloc) override { return; } void evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor) override { return; } void setCrossThreadData(uint32_t dataSize) { crossThreadData.reset(new uint8_t[dataSize]); crossThreadDataSize = dataSize; memset(crossThreadData.get(), 0x00, crossThreadDataSize); } ~MockKernel() override { } }; void SetUp() { auto executionEnvironment = MockDevice::prepareExecutionEnvironment(NEO::defaultHwInfo.get(), 0u); memoryManager = new MockImmutableMemoryManager(*executionEnvironment); executionEnvironment->memoryManager.reset(memoryManager); DeviceFixture::setupWithExecutionEnvironment(*executionEnvironment); } void createModuleFromBinary(uint32_t perHwThreadPrivateMemorySize, bool isInternal, MockImmutableData *mockKernelImmData) { std::string testFile; retrieveBinaryKernelFilenameApiSpecific(testFile, binaryFilename + "_", ".bin"); size_t size = 0; auto src = loadDataFromFile( testFile.c_str(), size); ASSERT_NE(0u, size); ASSERT_NE(nullptr, src); ze_module_desc_t moduleDesc = {}; moduleDesc.format = ZE_MODULE_FORMAT_NATIVE; moduleDesc.pInputModule = reinterpret_cast(src.get()); moduleDesc.inputSize = size; ModuleBuildLog *moduleBuildLog = nullptr; module = std::make_unique(device, moduleBuildLog, ModuleType::User, perHwThreadPrivateMemorySize, mockKernelImmData); module->type = isInternal ? ModuleType::Builtin : ModuleType::User; bool result = module->initialize(&moduleDesc, device->getNEODevice()); EXPECT_TRUE(result); } void createKernel(MockKernel *kernel) { ze_kernel_desc_t desc = {}; desc.pKernelName = kernelName.c_str(); kernel->initialize(&desc); } void TearDown() { DeviceFixture::TearDown(); } const std::string binaryFilename = "test_kernel"; const std::string kernelName = "test"; const uint32_t numKernelArguments = 6; std::unique_ptr module; MockImmutableMemoryManager *memoryManager; }; struct ModuleFixture : public DeviceFixture { void SetUp() { NEO::MockCompilerEnableGuard mock(true); DeviceFixture::SetUp(); createModuleFromBinary(); } void createModuleFromBinary(ModuleType type = ModuleType::User) { std::string testFile; retrieveBinaryKernelFilenameApiSpecific(testFile, binaryFilename + "_", ".bin"); size_t size = 0; auto src = loadDataFromFile( testFile.c_str(), size); ASSERT_NE(0u, size); ASSERT_NE(nullptr, src); ze_module_desc_t moduleDesc = {}; moduleDesc.format = ZE_MODULE_FORMAT_NATIVE; moduleDesc.pInputModule = reinterpret_cast(src.get()); moduleDesc.inputSize = size; ModuleBuildLog *moduleBuildLog = nullptr; module.reset(Module::create(device, &moduleDesc, moduleBuildLog, type)); } void createKernel() { ze_kernel_desc_t desc = {}; desc.pKernelName = kernelName.c_str(); kernel = std::make_unique>(); kernel->module = module.get(); kernel->initialize(&desc); } void TearDown() { DeviceFixture::TearDown(); } const std::string binaryFilename = "test_kernel"; const std::string kernelName = "test"; const uint32_t numKernelArguments = 6; std::unique_ptr module; std::unique_ptr> kernel; }; struct MultiDeviceModuleFixture : public MultiDeviceFixture { void SetUp() { MultiDeviceFixture::SetUp(); modules.resize(numRootDevices); } void createModuleFromBinary(uint32_t rootDeviceIndex) { std::string testFile; retrieveBinaryKernelFilenameApiSpecific(testFile, binaryFilename + "_", ".bin"); size_t size = 0; auto src = loadDataFromFile(testFile.c_str(), size); ASSERT_NE(0u, size); ASSERT_NE(nullptr, src); ze_module_desc_t moduleDesc = {}; moduleDesc.format = ZE_MODULE_FORMAT_NATIVE; moduleDesc.pInputModule = reinterpret_cast(src.get()); moduleDesc.inputSize = size; ModuleBuildLog *moduleBuildLog = nullptr; auto device = driverHandle->devices[rootDeviceIndex]; modules[rootDeviceIndex].reset(Module::create(device, &moduleDesc, moduleBuildLog, ModuleType::User)); } void createKernel(uint32_t rootDeviceIndex) { ze_kernel_desc_t desc = {}; desc.pKernelName = kernelName.c_str(); kernel = std::make_unique>(); kernel->module = modules[rootDeviceIndex].get(); kernel->initialize(&desc); } void TearDown() { MultiDeviceFixture::TearDown(); } const std::string binaryFilename = "test_kernel"; const std::string kernelName = "test"; const uint32_t numKernelArguments = 6; std::vector> modules; std::unique_ptr> kernel; }; struct ModuleWithZebinFixture : public DeviceFixture { struct MockImmutableData : public KernelImmutableData { using KernelImmutableData::device; using KernelImmutableData::isaGraphicsAllocation; using KernelImmutableData::kernelDescriptor; MockImmutableData(L0::Device *device) { auto mockKernelDescriptor = new NEO::KernelDescriptor; mockKernelDescriptor->kernelMetadata.kernelName = "kernel"; kernelDescriptor = mockKernelDescriptor; this->device = device; isaGraphicsAllocation.reset(new NEO::MockGraphicsAllocation(0, NEO::AllocationType::KERNEL_ISA, reinterpret_cast(0x1234), 0x1000, 0, sizeof(uint32_t), MemoryPool::System4KBPages)); } ~MockImmutableData() { delete kernelDescriptor; } }; struct MockModuleWithZebin : public L0::ModuleImp { using ModuleImp::getDebugInfo; using ModuleImp::getZebinSegments; using ModuleImp::kernelImmDatas; using ModuleImp::passDebugData; using ModuleImp::translationUnit; MockModuleWithZebin(L0::Device *device) : ModuleImp(device, nullptr, ModuleType::User) {} void addSegments() { kernelImmDatas.push_back(std::make_unique(device)); translationUnit->globalVarBuffer = new NEO::MockGraphicsAllocation(0, NEO::AllocationType::GLOBAL_SURFACE, reinterpret_cast(0x1234), 0x1000, 0, sizeof(uint32_t), MemoryPool::System4KBPages); translationUnit->globalConstBuffer = new NEO::MockGraphicsAllocation(0, NEO::AllocationType::GLOBAL_SURFACE, reinterpret_cast(0x1234), 0x1000, 0, sizeof(uint32_t), MemoryPool::System4KBPages); translationUnit->programInfo.globalStrings.initData = &strings; translationUnit->programInfo.globalStrings.size = sizeof(strings); } void addKernelSegment() { } void addEmptyZebin() { auto zebin = ZebinTestData::ValidEmptyProgram(); translationUnit->unpackedDeviceBinarySize = zebin.storage.size(); translationUnit->unpackedDeviceBinary.reset(new char[zebin.storage.size()]); memcpy_s(translationUnit->unpackedDeviceBinary.get(), translationUnit->unpackedDeviceBinarySize, zebin.storage.data(), zebin.storage.size()); } ~MockModuleWithZebin() { } const char strings[12] = "Hello olleH"; }; void SetUp() { NEO::MockCompilerEnableGuard mock(true); DeviceFixture::SetUp(); module = std::make_unique(device); } void TearDown() { DeviceFixture::TearDown(); } std::unique_ptr module; }; struct ImportHostPointerModuleFixture : public ModuleFixture { void SetUp() { DebugManager.flags.EnableHostPointerImport.set(1); ModuleFixture::SetUp(); hostPointer = driverHandle->getMemoryManager()->allocateSystemMemory(MemoryConstants::pageSize, MemoryConstants::pageSize); } void TearDown() { driverHandle->getMemoryManager()->freeSystemMemory(hostPointer); ModuleFixture::TearDown(); } DebugManagerStateRestore debugRestore; void *hostPointer = nullptr; }; struct MultiTileModuleFixture : public MultiDeviceModuleFixture { void SetUp() { DebugManager.flags.EnableImplicitScaling.set(1); MultiDeviceFixture::numRootDevices = 1u; MultiDeviceFixture::numSubDevices = 2u; MultiDeviceModuleFixture::SetUp(); createModuleFromBinary(0); device = driverHandle->devices[0]; } void TearDown() { MultiDeviceModuleFixture::TearDown(); } DebugManagerStateRestore debugRestore; VariableBackup backup{&NEO::ImplicitScaling::apiSupport, true}; L0::Device *device = nullptr; }; } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/gen11/000077500000000000000000000000001422164147700245435ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/gen11/CMakeLists.txt000066400000000000000000000005641422164147700273100ustar00rootroot00000000000000# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_GEN11) target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdqueue_thread_arbitration_policy_gen11.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_module_gen11.cpp ) endif() compute-runtime-22.14.22890/level_zero/core/test/unit_tests/gen11/enable_l0_mocks_gen11.cpp000066400000000000000000000005741422164147700312650ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/test/unit_tests/mocks/mock_l0_debugger.h" namespace NEO { struct ICLFamily; using GfxFamily = ICLFamily; } // namespace NEO namespace L0 { namespace ult { static MockDebuggerL0HwPopulateFactory mockDebuggerGen11; } } // namespace L0test_cmdqueue_thread_arbitration_policy_gen11.cpp000066400000000000000000000156301422164147700363430ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/gen11/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/reg_configs.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_compilers.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/driver/driver_imp.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_built_ins.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h" namespace L0 { namespace ult { struct CommandQueueThreadArbitrationPolicyTests : public ::testing::Test { void SetUp() override { NEO::MockCompilerEnableGuard mock(true); ze_result_t returnValue = ZE_RESULT_SUCCESS; auto executionEnvironment = new NEO::ExecutionEnvironment(); auto mockBuiltIns = new MockBuiltins(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->builtins.reset(mockBuiltIns); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(NEO::defaultHwInfo.get()); neoDevice = NEO::MockDevice::create(executionEnvironment, 0u); std::vector> devices; devices.push_back(std::unique_ptr(neoDevice)); auto driverHandleUlt = whitebox_cast(DriverHandle::create(std::move(devices), L0EnvVariables{}, &returnValue)); driverHandle.reset(driverHandleUlt); ASSERT_NE(nullptr, driverHandle); ze_device_handle_t hDevice; uint32_t count = 1; ze_result_t result = driverHandle->getDevice(&count, &hDevice); EXPECT_EQ(ZE_RESULT_SUCCESS, result); device = L0::Device::fromHandle(hDevice); ASSERT_NE(nullptr, device); ze_command_queue_desc_t queueDesc = {}; commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, false, false, returnValue)); ASSERT_NE(nullptr, commandQueue->commandStream); commandList = CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue); ASSERT_NE(nullptr, commandList); } void TearDown() override { commandList->destroy(); commandQueue->destroy(); L0::GlobalDriver = nullptr; } DebugManagerStateRestore restorer; WhiteBox *commandQueue = nullptr; L0::CommandList *commandList = nullptr; std::unique_ptr> driverHandle; NEO::MockDevice *neoDevice = nullptr; L0::Device *device; }; HWTEST2_F(CommandQueueThreadArbitrationPolicyTests, whenCommandListIsExecutedThenDefaultRoundRobinThreadArbitrationPolicyIsUsed, IsGen11HP) { size_t usedSpaceBefore = commandQueue->commandStream->getUsed(); ze_command_list_handle_t hCommandList = commandList->toHandle(); auto result = commandQueue->executeCommandLists(1, &hCommandList, nullptr, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); size_t usedSpaceAfter = commandQueue->commandStream->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter)); using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; auto miLoadImm = findAll(cmdList.begin(), cmdList.end()); EXPECT_GE(2u, miLoadImm.size()); for (auto it : miLoadImm) { auto cmd = genCmdCast(*it); if (cmd->getRegisterOffset() == NEO::DebugControlReg2::address) { EXPECT_EQ(NEO::DebugControlReg2::getRegData(NEO::ThreadArbitrationPolicy::RoundRobin), cmd->getDataDword()); } } } HWTEST2_F(CommandQueueThreadArbitrationPolicyTests, whenCommandListIsExecutedAndOverrideThreadArbitrationPolicyDebugFlagIsSetToZeroThenAgeBasedThreadArbitrationPolicyIsUsed, IsGen11HP) { DebugManager.flags.OverrideThreadArbitrationPolicy.set(0); size_t usedSpaceBefore = commandQueue->commandStream->getUsed(); ze_command_list_handle_t hCommandList = commandList->toHandle(); auto result = commandQueue->executeCommandLists(1, &hCommandList, nullptr, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); size_t usedSpaceAfter = commandQueue->commandStream->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter)); using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; auto miLoadImm = findAll(cmdList.begin(), cmdList.end()); EXPECT_GE(2u, miLoadImm.size()); for (auto it : miLoadImm) { auto cmd = genCmdCast(*it); if (cmd->getRegisterOffset() == NEO::DebugControlReg2::address) { EXPECT_EQ(NEO::DebugControlReg2::getRegData(NEO::ThreadArbitrationPolicy::AgeBased), cmd->getDataDword()); } } } HWTEST2_F(CommandQueueThreadArbitrationPolicyTests, whenCommandListIsExecutedAndOverrideThreadArbitrationPolicyDebugFlagIsSetToOneThenRoundRobinThreadArbitrationPolicyIsUsed, IsGen11HP) { DebugManager.flags.OverrideThreadArbitrationPolicy.set(1); size_t usedSpaceBefore = commandQueue->commandStream->getUsed(); ze_command_list_handle_t hCommandList = commandList->toHandle(); auto result = commandQueue->executeCommandLists(1, &hCommandList, nullptr, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); size_t usedSpaceAfter = commandQueue->commandStream->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter)); using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; auto miLoadImm = findAll(cmdList.begin(), cmdList.end()); EXPECT_GE(2u, miLoadImm.size()); for (auto it : miLoadImm) { auto cmd = genCmdCast(*it); if (cmd->getRegisterOffset() == NEO::DebugControlReg2::address) { EXPECT_EQ(NEO::DebugControlReg2::getRegData(NEO::ThreadArbitrationPolicy::RoundRobin), cmd->getDataDword()); } } } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/gen11/test_module_gen11.cpp000066400000000000000000000017551422164147700305760ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" namespace L0 { namespace ult { using KernelPropertyTest = Test; HWTEST2_F(KernelPropertyTest, givenKernelExtendedPropertiesStructureWhenKernelPropertiesCalledThenPropertiesAreCorrectlySet, IsGen11HP) { ze_device_module_properties_t kernelProperties = {}; ze_float_atomic_ext_properties_t kernelExtendedProperties = {}; kernelExtendedProperties.stype = ZE_STRUCTURE_TYPE_FLOAT_ATOMIC_EXT_PROPERTIES; kernelProperties.pNext = &kernelExtendedProperties; ze_result_t res = device->getKernelProperties(&kernelProperties); EXPECT_EQ(res, ZE_RESULT_SUCCESS); EXPECT_EQ(0u, kernelExtendedProperties.fp16Flags); EXPECT_EQ(0u, kernelExtendedProperties.fp32Flags); EXPECT_EQ(0u, kernelExtendedProperties.fp64Flags); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/gen12lp/000077500000000000000000000000001422164147700251005ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/gen12lp/CMakeLists.txt000066400000000000000000000013571422164147700276460ustar00rootroot00000000000000# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_GEN12LP) target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_cache_flush.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_gen12lp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_device_gen12lp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_events_gen12lp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_l0_hw_helper_gen12lp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_module_gen12lp.cpp ) target_include_directories(${TARGET_NAME} PRIVATE ${COMPUTE_RUNTIME_DIR}/level_zero/core/source/gen12lp/definitions/) endif() compute-runtime-22.14.22890/level_zero/core/test/unit_tests/gen12lp/enable_l0_mocks_gen12lp.cpp000066400000000000000000000006041422164147700321510ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/test/unit_tests/mocks/mock_l0_debugger.h" namespace NEO { struct TGLLPFamily; using GfxFamily = TGLLPFamily; } // namespace NEO namespace L0 { namespace ult { static MockDebuggerL0HwPopulateFactory mockDebuggerGen12lp; } } // namespace L0compute-runtime-22.14.22890/level_zero/core/test/unit_tests/gen12lp/test_cache_flush.cpp000066400000000000000000000074171422164147700311200ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/cache_flush.inl" #include "shared/source/helpers/l3_range.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/gen12lp/cmdlist_gen12lp.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" namespace L0 { namespace ult { using CacheFlushTests = Test; HWTEST2_F(CacheFlushTests, GivenCommandStreamWithSingleL3RangeAndNonZeroPostSyncAddressWhenFlushGpuCacheIsCalledThenPostSyncOperationIsSetForL3Control, IsDG1) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using L3_CONTROL = typename GfxFamily::L3_CONTROL; auto &hardwareInfo = this->neoDevice->getHardwareInfo(); auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); LinearStream *cmdStream = commandList->commandContainer.getCommandStream(); uint64_t gpuAddress = 0x1200; void *buffer = reinterpret_cast(gpuAddress); size_t size = 0x1000; uint64_t postSyncAddress = 0x1200; NEO::MockGraphicsAllocation mockAllocation(buffer, gpuAddress, size); NEO::SvmAllocationData allocData(0); allocData.size = size; allocData.gpuAllocations.addAllocation(&mockAllocation); device->getDriverHandle()->getSvmAllocsManager()->insertSVMAlloc(allocData); L3RangesVec ranges; ranges.push_back(L3Range::fromAddressSizeWithPolicy( gpuAddress, size, GfxFamily::L3_FLUSH_ADDRESS_RANGE:: L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION)); NEO::flushGpuCache(cmdStream, ranges, postSyncAddress, hardwareInfo); GenCmdList cmdList; EXPECT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(cmdStream->getCpuBase(), 0), cmdStream->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); } HWTEST2_F(CacheFlushTests, GivenCommandStreamWithMultipleL3RangeAndUsePostSyncIsSetToTrueWhenGetSizeNeededToFlushGpuCacheIsCalledThenCorrectSizeIsReturned, IsDG1) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using L3_CONTROL = typename GfxFamily::L3_CONTROL; uint64_t gpuAddress = 0x1200; size_t size = 0x1000; L3RangesVec ranges; ranges.push_back(L3Range::fromAddressSizeWithPolicy( gpuAddress, size, GfxFamily::L3_FLUSH_ADDRESS_RANGE:: L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION)); EXPECT_NE(0u, ranges.size()); size_t ret = NEO::getSizeNeededToFlushGpuCache(ranges, true); size_t expected = ranges.size() * sizeof(L3_CONTROL); EXPECT_EQ(ret, expected); } HWTEST2_F(CacheFlushTests, GivenCommandStreamWithMultipleL3RangeAndUsePostSyncIsSetToFalseWhenGetSizeNeededToFlushGpuCacheIsCalledThenCorrectSizeIsReturned, IsDG1) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using L3_CONTROL = typename GfxFamily::L3_CONTROL; uint64_t gpuAddress = 0x1200; size_t size = 0x1000; L3RangesVec ranges; ranges.push_back(L3Range::fromAddressSizeWithPolicy( gpuAddress, size, GfxFamily::L3_FLUSH_ADDRESS_RANGE:: L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION)); EXPECT_NE(0u, ranges.size()); size_t ret = NEO::getSizeNeededToFlushGpuCache(ranges, false); size_t expected = ranges.size() * sizeof(L3_CONTROL); EXPECT_EQ(ret, expected); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/gen12lp/test_cmdlist_gen12lp.cpp000066400000000000000000000274131422164147700316410ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/register_offsets.h" #include "shared/source/helpers/state_base_address.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/gen12lp/cmdlist_gen12lp.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_kernel.h" namespace L0 { namespace ult { using CommandListCreate = Test; template struct CommandListAdjustStateComputeMode : public WhiteBox<::L0::CommandListProductFamily> { CommandListAdjustStateComputeMode() : WhiteBox<::L0::CommandListProductFamily>(1) {} using ::L0::CommandListProductFamily::applyMemoryRangesBarrier; }; HWTEST2_F(CommandListCreate, givenAllocationsWhenApplyRangesBarrierThenCheckWhetherL3ControlIsProgrammed, IsGen12LP) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using L3_CONTROL = typename GfxFamily::L3_CONTROL; auto &hardwareInfo = this->neoDevice->getHardwareInfo(); auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); uint64_t gpuAddress = 0x1200; void *buffer = reinterpret_cast(gpuAddress); size_t size = 0x1100; NEO::MockGraphicsAllocation mockAllocation(buffer, gpuAddress, size); NEO::SvmAllocationData allocData(0); allocData.size = size; allocData.gpuAllocations.addAllocation(&mockAllocation); device->getDriverHandle()->getSvmAllocsManager()->insertSVMAlloc(allocData); const void *ranges[] = {buffer}; const size_t sizes[] = {size}; commandList->applyMemoryRangesBarrier(1, sizes, ranges); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); if (hardwareInfo.capabilityTable.supportCacheFlushAfterWalker) { EXPECT_NE(cmdList.end(), itor); } else { EXPECT_EQ(cmdList.end(), itor); } } HWTEST2_F(CommandListCreate, GivenHostMemoryNotInSvmManagerWhenAppendingMemoryBarrierThenAdditionalCommandsNotAdded, IsDG1) { ze_result_t result; uint32_t numRanges = 1; const size_t pRangeSizes = 1; const char *_pRanges[pRangeSizes]; const void **pRanges = reinterpret_cast(&_pRanges[0]); auto commandList = new CommandListAdjustStateComputeMode(); bool ret = commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); ASSERT_FALSE(ret); auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed(); result = commandList->appendMemoryRangesBarrier(numRanges, &pRangeSizes, pRanges, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); ASSERT_EQ(usedSpaceAfter, usedSpaceBefore); commandList->destroy(); } HWTEST2_F(CommandListCreate, GivenHostMemoryInSvmManagerWhenAppendingMemoryBarrierThenL3CommandsAdded, IsDG1) { ze_result_t result; uint32_t numRanges = 1; const size_t pRangeSizes = 1; void *_pRanges; ze_device_mem_alloc_desc_t deviceDesc = {}; result = context->allocDeviceMem(device->toHandle(), &deviceDesc, pRangeSizes, 4096u, &_pRanges); EXPECT_EQ(ZE_RESULT_SUCCESS, result); const void **pRanges = const_cast(&_pRanges); auto commandList = new CommandListAdjustStateComputeMode(); bool ret = commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); ASSERT_FALSE(ret); auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed(); result = commandList->appendMemoryRangesBarrier(numRanges, &pRangeSizes, pRanges, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); ASSERT_NE(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset( commandList->commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); using L3_CONTROL = typename FamilyType::L3_CONTROL; auto itorPC = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itorPC); { using L3_FLUSH_EVICTION_POLICY = typename FamilyType::L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY; auto cmd = genCmdCast(*itorPC); const auto &hwInfoConfig = *NEO::HwInfoConfig::get(device->getHwInfo().platform.eProductFamily); auto isA0Stepping = (hwInfoConfig.getSteppingFromHwRevId(device->getHwInfo()) == REVISION_A0); auto maskedAddress = cmd->getL3FlushAddressRange().getAddress(isA0Stepping); EXPECT_NE(maskedAddress, 0u); EXPECT_EQ(reinterpret_cast(*pRanges), static_cast(maskedAddress)); EXPECT_EQ( cmd->getL3FlushAddressRange().getL3FlushEvictionPolicy(isA0Stepping), L3_FLUSH_EVICTION_POLICY::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION); } commandList->destroy(); context->freeMem(_pRanges); } HWTEST2_F(CommandListCreate, GivenHostMemoryWhenAppendingMemoryBarrierThenAddressMisalignmentCorrected, IsDG1) { ze_result_t result; uint32_t numRanges = 1; const size_t misalignment_factor = 761; const size_t pRangeSizes = 4096; void *_pRanges; ze_device_mem_alloc_desc_t deviceDesc = {}; result = context->allocDeviceMem(device->toHandle(), &deviceDesc, pRangeSizes, 4096u, &_pRanges); EXPECT_EQ(ZE_RESULT_SUCCESS, result); unsigned char *c_pRanges = reinterpret_cast(_pRanges); c_pRanges += misalignment_factor; _pRanges = static_cast(c_pRanges); const void **pRanges = const_cast(&_pRanges); auto commandList = new CommandListAdjustStateComputeMode(); bool ret = commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); ASSERT_FALSE(ret); auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed(); result = commandList->appendMemoryRangesBarrier(numRanges, &pRangeSizes, pRanges, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); ASSERT_NE(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset( commandList->commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); using L3_CONTROL = typename FamilyType::L3_CONTROL; auto itorPC = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itorPC); { using L3_FLUSH_EVICTION_POLICY = typename FamilyType::L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY; auto cmd = genCmdCast(*itorPC); const auto &hwInfoConfig = *NEO::HwInfoConfig::get(device->getHwInfo().platform.eProductFamily); auto isA0Stepping = (hwInfoConfig.getSteppingFromHwRevId(device->getHwInfo()) == REVISION_A0); auto maskedAddress = cmd->getL3FlushAddressRange().getAddress(isA0Stepping); EXPECT_NE(maskedAddress, 0u); EXPECT_EQ(reinterpret_cast(*pRanges) - misalignment_factor, static_cast(maskedAddress)); EXPECT_EQ( cmd->getL3FlushAddressRange().getL3FlushEvictionPolicy(isA0Stepping), L3_FLUSH_EVICTION_POLICY::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION); } commandList->destroy(); context->freeMem(_pRanges); } HWTEST2_F(CommandListCreate, givenAllocationsWhenApplyRangesBarrierWithInvalidAddressSizeThenL3ControlIsNotProgrammed, IsDG1) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using L3_CONTROL = typename GfxFamily::L3_CONTROL; ze_result_t result; const size_t pRangeSizes = 4096; void *_pRanges; ze_device_mem_alloc_desc_t deviceDesc = {}; result = context->allocDeviceMem(device->toHandle(), &deviceDesc, pRangeSizes, 4096u, &_pRanges); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto commandList = new CommandListAdjustStateComputeMode(); ASSERT_NE(nullptr, commandList); bool ret = commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); ASSERT_FALSE(ret); const void *ranges[] = {_pRanges}; const size_t sizes[] = {2 * pRangeSizes}; commandList->applyMemoryRangesBarrier(1, sizes, ranges); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(cmdList.end(), itor); commandList->destroy(); context->freeMem(_pRanges); } HWTEST2_F(CommandListCreate, givenAllocationsWhenApplyRangesBarrierWithInvalidAddressThenL3ControlIsNotProgrammed, IsDG1) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using L3_CONTROL = typename GfxFamily::L3_CONTROL; ze_result_t result; const size_t pRangeSizes = 4096; void *_pRanges; ze_device_mem_alloc_desc_t deviceDesc = {}; result = context->allocDeviceMem(device->toHandle(), &deviceDesc, pRangeSizes, 4096u, &_pRanges); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto commandList = new CommandListAdjustStateComputeMode(); ASSERT_NE(nullptr, commandList); bool ret = commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); ASSERT_FALSE(ret); const void *ranges[] = {nullptr}; const size_t sizes[] = {pRangeSizes}; commandList->applyMemoryRangesBarrier(1, sizes, ranges); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(cmdList.end(), itor); commandList->destroy(); context->freeMem(_pRanges); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/gen12lp/test_device_gen12lp.cpp000066400000000000000000000341631422164147700314410ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" namespace L0 { namespace ult { using DeviceFixtureGen12LP = Test; HWTEST2_F(DeviceFixtureGen12LP, GivenTargetGen12LPaWhenGettingMemoryPropertiesThenMemoryNameComesAsDDR, IsGen12LP) { ze_device_memory_properties_t memProperties = {}; uint32_t pCount = 1u; EXPECT_EQ(ZE_RESULT_SUCCESS, device->getMemoryProperties(&pCount, &memProperties)); EXPECT_EQ(0, strcmp(memProperties.name, "DDR")); EXPECT_EQ(0u, memProperties.maxClockRate); } using CommandQueueGroupTest = Test; HWTEST2_F(CommandQueueGroupTest, givenNoBlitterSupportAndNoCCSThenOneQueueGroupIsReturned, IsGen12LP) { const uint32_t rootDeviceIndex = 0u; NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get(); hwInfo.featureTable.flags.ftrCCSNode = false; hwInfo.capabilityTable.blitterOperationsSupported = false; hwInfo.featureTable.ftrBcsInfo.set(0, false); auto *neoMockDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, rootDeviceIndex); Mock deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment()); uint32_t count = 0; ze_result_t res = deviceImp.getCommandQueueGroupProperties(&count, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_EQ(count, 1u); } HWTEST2_F(CommandQueueGroupTest, givenBlitterSupportAndNoCCSThenTwoQueueGroupsAreReturned, IsGen12LP) { const uint32_t rootDeviceIndex = 0u; NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get(); hwInfo.featureTable.flags.ftrCCSNode = false; hwInfo.capabilityTable.blitterOperationsSupported = true; hwInfo.featureTable.ftrBcsInfo.set(0); auto *neoMockDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, rootDeviceIndex); Mock deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment()); uint32_t count = 0; ze_result_t res = deviceImp.getCommandQueueGroupProperties(&count, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_EQ(count, 2u); } class DeviceCopyQueueGroupFixture : public DeviceFixture { public: void SetUp() { DebugManager.flags.EnableBlitterOperationsSupport.set(0); DeviceFixture::SetUp(); } void TearDown() { DeviceFixture::TearDown(); } DebugManagerStateRestore restorer; }; using DeviceCopyQueueGroupTest = Test; HWTEST2_F(DeviceCopyQueueGroupTest, givenBlitterSupportAndEnableBlitterOperationsSupportSetToZeroThenNoCopyEngineIsReturned, IsGen12LP) { const uint32_t rootDeviceIndex = 0u; NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get(); hwInfo.featureTable.flags.ftrCCSNode = false; hwInfo.capabilityTable.blitterOperationsSupported = true; hwInfo.featureTable.ftrBcsInfo.set(0); auto *neoMockDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, rootDeviceIndex); Mock deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment()); uint32_t count = 0; ze_result_t res = deviceImp.getCommandQueueGroupProperties(&count, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); std::vector properties(count); res = deviceImp.getCommandQueueGroupProperties(&count, properties.data()); EXPECT_EQ(ZE_RESULT_SUCCESS, res); for (auto &engineGroup : neoMockDevice->getRegularEngineGroups()) { EXPECT_NE(NEO::EngineGroupType::Copy, engineGroup.engineGroupType); } } HWTEST2_F(CommandQueueGroupTest, givenBlitterSupportAndCCSDefaultEngineThenThreeQueueGroupsAreReturned, IsGen12LP) { const uint32_t rootDeviceIndex = 0u; NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get(); hwInfo.featureTable.flags.ftrCCSNode = true; hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_CCS; hwInfo.capabilityTable.blitterOperationsSupported = true; hwInfo.featureTable.ftrBcsInfo.set(0); auto *neoMockDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, rootDeviceIndex); Mock deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment()); uint32_t count = 0; ze_result_t res = deviceImp.getCommandQueueGroupProperties(&count, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_EQ(count, 3u); std::vector properties(count); res = deviceImp.getCommandQueueGroupProperties(&count, properties.data()); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto &engineGroups = neoMockDevice->getRegularEngineGroups(); for (uint32_t i = 0; i < count; i++) { if (engineGroups[i].engineGroupType == NEO::EngineGroupType::RenderCompute) { EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE); EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY); EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COOPERATIVE_KERNELS); EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_METRICS); EXPECT_EQ(properties[i].numQueues, 1u); EXPECT_EQ(properties[i].maxMemoryFillPatternSize, std::numeric_limits::max()); } else if (engineGroups[i].engineGroupType == NEO::EngineGroupType::Compute) { EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE); EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY); EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COOPERATIVE_KERNELS); EXPECT_EQ(properties[i].numQueues, 1u); EXPECT_EQ(properties[i].maxMemoryFillPatternSize, std::numeric_limits::max()); } else if (engineGroups[i].engineGroupType == NEO::EngineGroupType::Copy) { EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY); EXPECT_EQ(properties[i].numQueues, 1u); EXPECT_EQ(properties[i].maxMemoryFillPatternSize, 4 * sizeof(uint32_t)); } } } HWTEST2_F(CommandQueueGroupTest, givenBlitterSupportAndCCSDefaultEngineAndOnlyTwoQueueGroupsRequestedThenTwoQueueGroupsAreReturned, IsGen12LP) { const uint32_t rootDeviceIndex = 0u; NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get(); hwInfo.featureTable.flags.ftrCCSNode = true; hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_CCS; hwInfo.capabilityTable.blitterOperationsSupported = true; hwInfo.featureTable.ftrBcsInfo.set(0); auto *neoMockDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, rootDeviceIndex); Mock deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment()); uint32_t count = 0; ze_result_t res = deviceImp.getCommandQueueGroupProperties(&count, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_EQ(count, 3u); count = 2; std::vector properties(count); res = deviceImp.getCommandQueueGroupProperties(&count, properties.data()); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_EQ(2u, count); auto &engineGroups = neoMockDevice->getRegularEngineGroups(); for (uint32_t i = 0; i < count; i++) { if (engineGroups[i].engineGroupType == NEO::EngineGroupType::RenderCompute) { EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE); EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY); EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COOPERATIVE_KERNELS); EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_METRICS); EXPECT_EQ(properties[i].numQueues, 1u); EXPECT_EQ(properties[i].maxMemoryFillPatternSize, std::numeric_limits::max()); } else if (engineGroups[i].engineGroupType == NEO::EngineGroupType::Compute) { EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE); EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY); EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COOPERATIVE_KERNELS); EXPECT_EQ(properties[i].numQueues, 1u); EXPECT_EQ(properties[i].maxMemoryFillPatternSize, std::numeric_limits::max()); } else if (engineGroups[i].engineGroupType == NEO::EngineGroupType::Copy) { EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY); EXPECT_EQ(properties[i].numQueues, 1u); EXPECT_EQ(properties[i].maxMemoryFillPatternSize, 4 * sizeof(uint32_t)); } } } HWTEST2_F(CommandQueueGroupTest, givenBlitterSupportAndNoCCSThenTwoQueueGroupsPropertiesAreReturned, IsGen12LP) { const uint32_t rootDeviceIndex = 0u; NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get(); hwInfo.featureTable.flags.ftrCCSNode = false; hwInfo.capabilityTable.blitterOperationsSupported = true; hwInfo.featureTable.ftrBcsInfo.set(0); auto *neoMockDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, rootDeviceIndex); Mock deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment()); uint32_t count = 0; ze_result_t res = deviceImp.getCommandQueueGroupProperties(&count, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_EQ(count, 2u); std::vector properties(count); res = deviceImp.getCommandQueueGroupProperties(&count, properties.data()); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto &engineGroups = neoMockDevice->getRegularEngineGroups(); for (uint32_t i = 0; i < count; i++) { if (engineGroups[i].engineGroupType == NEO::EngineGroupType::RenderCompute) { EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE); EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY); EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COOPERATIVE_KERNELS); EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_METRICS); EXPECT_EQ(properties[i].maxMemoryFillPatternSize, std::numeric_limits::max()); EXPECT_EQ(properties[i].numQueues, 1u); } else if (engineGroups[i].engineGroupType == NEO::EngineGroupType::Copy) { EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY); EXPECT_EQ(properties[i].maxMemoryFillPatternSize, 4 * sizeof(uint32_t)); EXPECT_EQ(properties[i].numQueues, 1u); } } } HWTEST2_F(CommandQueueGroupTest, givenQueueGroupsReturnedThenCommandListsAreCreatedCorrectly, IsGen12LP) { const uint32_t rootDeviceIndex = 0u; NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get(); hwInfo.featureTable.flags.ftrCCSNode = false; hwInfo.capabilityTable.blitterOperationsSupported = true; hwInfo.featureTable.ftrBcsInfo.set(0); auto *neoMockDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, rootDeviceIndex); Mock deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment()); uint32_t count = 0; ze_result_t res = deviceImp.getCommandQueueGroupProperties(&count, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_EQ(count, 2u); std::vector properties(count); res = deviceImp.getCommandQueueGroupProperties(&count, properties.data()); EXPECT_EQ(ZE_RESULT_SUCCESS, res); ze_context_handle_t hContext; ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; res = driverHandle->createContext(&desc, 0u, nullptr, &hContext); EXPECT_EQ(ZE_RESULT_SUCCESS, res); L0::Context *context = Context::fromHandle(hContext); auto &engineGroups = neoMockDevice->getRegularEngineGroups(); for (uint32_t i = 0; i < count; i++) { if (engineGroups[i].engineGroupType == NEO::EngineGroupType::RenderCompute) { EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE); EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY); EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COOPERATIVE_KERNELS); EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_METRICS); EXPECT_EQ(properties[i].maxMemoryFillPatternSize, std::numeric_limits::max()); EXPECT_EQ(properties[i].numQueues, 1u); } else if (engineGroups[i].engineGroupType == NEO::EngineGroupType::Copy) { EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY); EXPECT_EQ(properties[i].maxMemoryFillPatternSize, 4 * sizeof(uint32_t)); EXPECT_EQ(properties[i].numQueues, 1u); } ze_command_list_desc_t desc = {}; desc.commandQueueGroupOrdinal = i; ze_command_list_handle_t hCommandList = {}; res = context->createCommandList(&deviceImp, &desc, &hCommandList); EXPECT_EQ(ZE_RESULT_SUCCESS, res); CommandList *commandList = CommandList::fromHandle(hCommandList); commandList->destroy(); } context->destroy(); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/gen12lp/test_events_gen12lp.cpp000066400000000000000000000061541422164147700315050ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/driver/driver_handle_imp.h" #include "level_zero/core/source/event/event.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_event.h" namespace L0 { namespace ult { struct TimestampEvent : public Test { public: class MockTimestampPackets32 : public TimestampPackets { public: using typename TimestampPackets::Packet; }; void SetUp() override { DeviceFixture::SetUp(); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = 0; eventDesc.wait = 0; ze_result_t result = ZE_RESULT_SUCCESS; eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); ASSERT_NE(nullptr, eventPool); event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); ASSERT_NE(nullptr, event); } void TearDown() override { DeviceFixture::TearDown(); } std::unique_ptr eventPool; std::unique_ptr event; }; GEN12LPTEST_F(TimestampEvent, givenEventTimestampsWhenQueryKernelTimestampThenCorrectDataAreSet) { typename MockTimestampPackets32::Packet data = {}; data.contextStart = 1u; data.contextEnd = 2u; data.globalStart = 3u; data.globalEnd = 4u; event->hostAddress = &data; ze_kernel_timestamp_result_t result = {}; event->queryKernelTimestamp(&result); EXPECT_EQ(data.globalStart, result.context.kernelStart); EXPECT_EQ(data.globalEnd, result.context.kernelEnd); EXPECT_EQ(data.globalStart, result.global.kernelStart); EXPECT_EQ(data.globalEnd, result.global.kernelEnd); } GEN12LPTEST_F(TimestampEvent, givenEventMoreThanOneTimestampsPacketWhenQueryKernelTimestampThenCorrectCalculationAreMade) { typename MockTimestampPackets32::Packet data[3] = {}; data[0].contextStart = 3u; data[0].contextEnd = 4u; data[0].globalStart = 5u; data[0].globalEnd = 6u; data[1].contextStart = 2u; data[1].contextEnd = 6u; data[1].globalStart = 4u; data[1].globalEnd = 8u; data[2].contextStart = 4u; data[2].contextEnd = 5u; data[2].globalStart = 6u; data[2].globalEnd = 7u; event->hostAddress = &data; event->setPacketsInUse(3u); ze_kernel_timestamp_result_t result = {}; event->queryKernelTimestamp(&result); EXPECT_EQ(data[1].globalStart, result.context.kernelStart); EXPECT_EQ(data[1].globalEnd, result.context.kernelEnd); EXPECT_EQ(data[1].globalStart, result.global.kernelStart); EXPECT_EQ(data[1].globalEnd, result.global.kernelEnd); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/gen12lp/test_l0_hw_helper_gen12lp.cpp000066400000000000000000000017721422164147700325520ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" namespace L0 { namespace ult { HWTEST_EXCLUDE_PRODUCT(L0HwHelperTest, givenBitmaskWithAttentionBitsForSingleThreadWhenGettingThreadsThenSingleCorrectThreadReturned, IGFX_GEN12LP_CORE); HWTEST_EXCLUDE_PRODUCT(L0HwHelperTest, givenBitmaskWithAttentionBitsForAllSubslicesWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_GEN12LP_CORE); HWTEST_EXCLUDE_PRODUCT(L0HwHelperTest, givenBitmaskWithAttentionBitsForAllEUsWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_GEN12LP_CORE); HWTEST_EXCLUDE_PRODUCT(L0HwHelperTest, givenEu0To1Threads0To3BitmaskWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_GEN12LP_CORE); HWTEST_EXCLUDE_PRODUCT(L0HwHelperTest, givenBitmaskWithAttentionBitsForHalfOfThreadsWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_GEN12LP_CORE); } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/gen12lp/test_module_gen12lp.cpp000066400000000000000000000017551422164147700314700ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" namespace L0 { namespace ult { using KernelPropertyTest = Test; HWTEST2_F(KernelPropertyTest, givenKernelExtendedPropertiesStructureWhenKernelPropertiesCalledThenPropertiesAreCorrectlySet, IsGen12LP) { ze_device_module_properties_t kernelProperties = {}; ze_float_atomic_ext_properties_t kernelExtendedProperties = {}; kernelExtendedProperties.stype = ZE_STRUCTURE_TYPE_FLOAT_ATOMIC_EXT_PROPERTIES; kernelProperties.pNext = &kernelExtendedProperties; ze_result_t res = device->getKernelProperties(&kernelProperties); EXPECT_EQ(res, ZE_RESULT_SUCCESS); EXPECT_EQ(0u, kernelExtendedProperties.fp16Flags); EXPECT_EQ(0u, kernelExtendedProperties.fp32Flags); EXPECT_EQ(0u, kernelExtendedProperties.fp64Flags); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/gen9/000077500000000000000000000000001422164147700244725ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/gen9/CMakeLists.txt000066400000000000000000000012131422164147700272270ustar00rootroot00000000000000# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_GEN9) target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_gen9.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_append_launch_kernel_gen9.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdqueue_enqueuecommandlist_gen9.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdqueue_gen9.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_device_gen9.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_module_gen9.cpp ) endif() compute-runtime-22.14.22890/level_zero/core/test/unit_tests/gen9/enable_l0_mocks_gen9.cpp000066400000000000000000000005721422164147700311410ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/test/unit_tests/mocks/mock_l0_debugger.h" namespace NEO { struct SKLFamily; using GfxFamily = SKLFamily; } // namespace NEO namespace L0 { namespace ult { static MockDebuggerL0HwPopulateFactory mockDebuggerGen9; } } // namespace L0test_cmdlist_append_launch_kernel_gen9.cpp000066400000000000000000000040271422164147700347630ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/gen9/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/reg_configs.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/test/unit_tests/fixtures/module_fixture.h" namespace L0 { namespace ult { using CommandListAppendLaunchKernel = Test; using IsSKLOrKBL = IsWithinProducts; HWTEST2_F(CommandListAppendLaunchKernel, givenKernelWithSLMThenL3IsProgrammedWithSLMValue, IsSKLOrKBL) { using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; createKernel(); ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); ze_group_count_t groupCount{1, 1, 1}; EXPECT_LE(0u, kernel->kernelImmData->getDescriptor().kernelAttributes.slmInlineSize); auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); bool foundL3 = false; for (auto it = cmdList.begin(); it != cmdList.end(); it++) { auto lri = genCmdCast(*it); if (lri) { if (lri->getRegisterOffset() == NEO::L3CNTLRegisterOffset::registerOffset) { auto value = lri->getDataDword(); auto dataSlm = NEO::PreambleHelper::getL3Config(commandList->commandContainer.getDevice()->getHardwareInfo(), true); EXPECT_EQ(dataSlm, value); foundL3 = true; break; } } } EXPECT_TRUE(foundL3); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/gen9/test_cmdlist_gen9.cpp000066400000000000000000000114161422164147700306210ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/reg_configs.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/gen9/cmdlist_gen9.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" namespace L0 { namespace ult { using CommandListProgramL3 = Test; template struct CommandListAdjustStateComputeMode : public WhiteBox<::L0::CommandListProductFamily> { CommandListAdjustStateComputeMode() : WhiteBox<::L0::CommandListProductFamily>(1) {} }; HWTEST2_F(CommandListProgramL3, givenAllocationsWhenProgramL3ThenMmioIsAppended, IsGen9) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM; const uint32_t registerOffset = NEO::L3CNTLRegisterOffset::registerOffset; auto commandList = new CommandListAdjustStateComputeMode(); bool ret = commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); ASSERT_FALSE(ret); commandList->programL3(false); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); bool found = false; auto miLoadImm = findAll(cmdList.begin(), cmdList.end()); for (auto itor : miLoadImm) { auto cmd = genCmdCast(*itor); if (registerOffset == cmd->getRegisterOffset()) { found = true; break; } } EXPECT_TRUE(found); commandList->destroy(); } HWTEST2_F(CommandListProgramL3, givenAllocationsWhenProgramL3WithSlmThenMmioIsAppendedWithSlm, IsGen9) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM; const uint32_t registerOffset = NEO::L3CNTLRegisterOffset::registerOffset; auto hwInfo = device->getNEODevice()->getHardwareInfo(); const uint32_t valueForSLM = NEO::PreambleHelper::getL3Config(hwInfo, true); auto commandList = new CommandListAdjustStateComputeMode(); bool ret = commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); ASSERT_FALSE(ret); commandList->programL3(true); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); bool found = false; auto miLoadImm = findAll(cmdList.begin(), cmdList.end()); for (auto itor : miLoadImm) { auto cmd = genCmdCast(*itor); if (registerOffset == cmd->getRegisterOffset()) { EXPECT_EQ(cmd->getRegisterOffset(), registerOffset); EXPECT_EQ(cmd->getDataDword(), valueForSLM); found = true; break; } } EXPECT_TRUE(found); commandList->destroy(); } HWTEST2_F(CommandListProgramL3, givenAllocationsWhenProgramL3WithoutSlmThenMmioIsAppendedWithoutSlm, IsGen9) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM; const uint32_t registerOffset = NEO::L3CNTLRegisterOffset::registerOffset; const uint32_t valueForNoSLM = NEO::PreambleHelper::getL3Config(*defaultHwInfo, false); auto commandList = new CommandListAdjustStateComputeMode(); bool ret = commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); ASSERT_FALSE(ret); commandList->programL3(false); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); bool found = false; auto miLoadImm = findAll(cmdList.begin(), cmdList.end()); for (auto itor : miLoadImm) { auto cmd = genCmdCast(*itor); if (registerOffset == cmd->getRegisterOffset()) { EXPECT_EQ(cmd->getRegisterOffset(), registerOffset); EXPECT_EQ(cmd->getDataDword(), valueForNoSLM); found = true; break; } } EXPECT_TRUE(found); commandList->destroy(); } } // namespace ult } // namespace L0 test_cmdqueue_enqueuecommandlist_gen9.cpp000066400000000000000000000270271422164147700347020ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/gen9/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h" #include "level_zero/core/test/unit_tests/mocks/mock_device.h" #include "level_zero/core/test/unit_tests/mocks/mock_memory_manager.h" #include #include "gtest/gtest.h" #include namespace L0 { namespace ult { using CommandQueueExecuteCommandListsGen9 = Test; GEN9TEST_F(CommandQueueExecuteCommandListsGen9, WhenExecutingCmdListsThenPipelineSelectAndVfeStateAreAddedToCmdBuffer) { const ze_command_queue_desc_t desc = {}; ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create( productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc, false, false, returnValue)); ASSERT_NE(nullptr, commandQueue->commandStream); auto usedSpaceBefore = commandQueue->commandStream->getUsed(); ze_command_list_handle_t commandLists[] = { CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle()}; uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]); auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandQueue->commandStream->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter)); using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE; auto itorVFE = find(cmdList.begin(), cmdList.end()); ASSERT_NE(itorVFE, cmdList.end()); // Should have a PS before a VFE using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT; auto itorPS = find(cmdList.begin(), itorVFE); ASSERT_NE(itorPS, itorVFE); { auto cmd = genCmdCast(*itorPS); EXPECT_EQ(cmd->getMaskBits() & 3u, 3u); EXPECT_EQ(cmd->getPipelineSelection(), PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU); } CommandList::fromHandle(commandLists[0])->destroy(); commandQueue->destroy(); } GEN9TEST_F(CommandQueueExecuteCommandListsGen9, WhenExecutingCmdListsThenStateBaseAddressForGeneralStateBaseAddressIsAdded) { const ze_command_queue_desc_t desc = {}; ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create( productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc, false, false, returnValue)); ASSERT_NE(nullptr, commandQueue->commandStream); auto usedSpaceBefore = commandQueue->commandStream->getUsed(); ze_command_list_handle_t commandLists[] = { CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle()}; uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]); auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandQueue->commandStream->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter)); using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; auto itorSba = find(cmdList.begin(), cmdList.end()); ASSERT_NE(itorSba, cmdList.end()); { auto cmd = genCmdCast(*itorSba); EXPECT_TRUE(cmd->getGeneralStateBaseAddressModifyEnable()); EXPECT_EQ(0u, cmd->getGeneralStateBaseAddress()); EXPECT_TRUE(cmd->getGeneralStateBufferSizeModifyEnable()); uint32_t expectedGsbaSize = std::numeric_limits::max(); expectedGsbaSize >>= 12; EXPECT_EQ(expectedGsbaSize, cmd->getGeneralStateBufferSize()); EXPECT_TRUE(cmd->getInstructionBaseAddressModifyEnable()); EXPECT_TRUE(cmd->getInstructionBufferSizeModifyEnable()); EXPECT_EQ(MemoryConstants::sizeOf4GBinPageEntities, cmd->getInstructionBufferSize()); EXPECT_EQ(device->getDriverHandle()->getMemoryManager()->getInternalHeapBaseAddress(0, false), cmd->getInstructionBaseAddress()); EXPECT_EQ(commandQueue->getDevice()->getNEODevice()->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_STATE_HEAP_BUFFER), cmd->getInstructionMemoryObjectControlState()); } CommandList::fromHandle(commandLists[0])->destroy(); commandQueue->destroy(); } GEN9TEST_F(CommandQueueExecuteCommandListsGen9, WhenExecutingCmdListsThenMidThreadPreemptionForFirstExecuteIsConfigured) { const ze_command_queue_desc_t desc = {}; ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create( productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc, false, false, returnValue)); ASSERT_NE(nullptr, commandQueue->commandStream); auto usedSpaceBefore = commandQueue->commandStream->getUsed(); auto commandList = whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); commandList->commandListPreemptionMode = NEO::PreemptionMode::MidThread; ze_command_list_handle_t commandLists[] = {commandList->toHandle()}; uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]); auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandQueue->commandStream->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter)); using STATE_SIP = typename FamilyType::STATE_SIP; using GPGPU_CSR_BASE_ADDRESS = typename FamilyType::GPGPU_CSR_BASE_ADDRESS; using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; auto itorCsr = find(cmdList.begin(), cmdList.end()); EXPECT_NE(itorCsr, cmdList.end()); auto itorStateSip = find(itorCsr, cmdList.end()); EXPECT_NE(itorStateSip, cmdList.end()); auto itorLri = find(itorStateSip, cmdList.end()); EXPECT_NE(itorLri, cmdList.end()); MI_LOAD_REGISTER_IMM *lriCmd = static_cast(*itorLri); EXPECT_EQ(0x2580u, lriCmd->getRegisterOffset()); uint32_t data = ((1 << 1) | (1 << 2)) << 16; EXPECT_EQ(data, lriCmd->getDataDword()); commandList->destroy(); commandQueue->destroy(); } GEN9TEST_F(CommandQueueExecuteCommandListsGen9, GivenCmdListsWithDifferentPreemptionModesWhenExecutingThenQueuePreemptionIsSwitchedFromMidThreadToThreadGroupAndMidThread) { const ze_command_queue_desc_t desc = {}; ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create( productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc, false, false, returnValue)); ASSERT_NE(nullptr, commandQueue->commandStream); auto usedSpaceBefore = commandQueue->commandStream->getUsed(); auto commandListMidThread = whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); commandListMidThread->commandListPreemptionMode = NEO::PreemptionMode::MidThread; auto commandListThreadGroup = whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); commandListThreadGroup->commandListPreemptionMode = NEO::PreemptionMode::ThreadGroup; ze_command_list_handle_t commandLists[] = {commandListMidThread->toHandle(), commandListThreadGroup->toHandle(), commandListMidThread->toHandle()}; uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]); auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandQueue->commandStream->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, commandQueue->commandStream->getCpuBase(), usedSpaceAfter)); using STATE_SIP = typename FamilyType::STATE_SIP; using GPGPU_CSR_BASE_ADDRESS = typename FamilyType::GPGPU_CSR_BASE_ADDRESS; using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; auto itorCsr = find(cmdList.begin(), cmdList.end()); EXPECT_NE(itorCsr, cmdList.end()); auto itorStateSip = find(itorCsr, cmdList.end()); EXPECT_NE(itorStateSip, cmdList.end()); auto itorLri = find(itorStateSip, cmdList.end()); EXPECT_NE(itorLri, cmdList.end()); MI_LOAD_REGISTER_IMM *lriCmd = static_cast(*itorLri); EXPECT_EQ(0x2580u, lriCmd->getRegisterOffset()); uint32_t data = ((1 << 1) | (1 << 2)) << 16; EXPECT_EQ(data, lriCmd->getDataDword()); //next should be BB_START to 1st Mid-Thread Cmd List auto itorBBStart = find(itorLri, cmdList.end()); EXPECT_NE(itorBBStart, cmdList.end()); //next should be PIPE_CONTROL and LRI switching to thread-group auto itorPipeControl = find(itorBBStart, cmdList.end()); EXPECT_NE(itorPipeControl, cmdList.end()); itorLri = find(itorPipeControl, cmdList.end()); EXPECT_NE(itorLri, cmdList.end()); lriCmd = static_cast(*itorLri); EXPECT_EQ(0x2580u, lriCmd->getRegisterOffset()); data = (1 << 1) | (((1 << 1) | (1 << 2)) << 16); EXPECT_EQ(data, lriCmd->getDataDword()); //start of thread-group command list itorBBStart = find(itorLri, cmdList.end()); EXPECT_NE(itorBBStart, cmdList.end()); //next should be PIPE_CONTROL and LRI switching to mid-thread again itorPipeControl = find(itorBBStart, cmdList.end()); EXPECT_NE(itorPipeControl, cmdList.end()); itorLri = find(itorPipeControl, cmdList.end()); EXPECT_NE(itorLri, cmdList.end()); lriCmd = static_cast(*itorLri); EXPECT_EQ(0x2580u, lriCmd->getRegisterOffset()); data = ((1 << 1) | (1 << 2)) << 16; EXPECT_EQ(data, lriCmd->getDataDword()); //start of thread-group command list itorBBStart = find(itorLri, cmdList.end()); EXPECT_NE(itorBBStart, cmdList.end()); commandListMidThread->destroy(); commandListThreadGroup->destroy(); commandQueue->destroy(); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/gen9/test_cmdqueue_gen9.cpp000066400000000000000000000225461422164147700310000ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/reg_configs.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_compilers.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/cmdqueue/cmdqueue_imp.h" #include "level_zero/core/source/driver/driver_handle_imp.h" #include "level_zero/core/source/driver/driver_imp.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_built_ins.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h" namespace L0 { namespace ult { struct CommandQueueThreadArbitrationPolicyTests : public ::testing::Test { void SetUp() override { NEO::MockCompilerEnableGuard mock(true); ze_result_t returnValue = ZE_RESULT_SUCCESS; auto executionEnvironment = new NEO::ExecutionEnvironment(); auto mockBuiltIns = new MockBuiltins(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->builtins.reset(mockBuiltIns); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(NEO::defaultHwInfo.get()); neoDevice = NEO::MockDevice::create(executionEnvironment, 0u); std::vector> devices; devices.push_back(std::unique_ptr(neoDevice)); auto driverHandleUlt = whitebox_cast(DriverHandle::create(std::move(devices), L0EnvVariables{}, &returnValue)); driverHandle.reset(driverHandleUlt); ASSERT_NE(nullptr, driverHandle); ze_device_handle_t hDevice; uint32_t count = 1; ze_result_t result = driverHandle->getDevice(&count, &hDevice); EXPECT_EQ(ZE_RESULT_SUCCESS, result); device = L0::Device::fromHandle(hDevice); ASSERT_NE(nullptr, device); ze_command_queue_desc_t queueDesc = {}; commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, false, false, returnValue)); ASSERT_NE(nullptr, commandQueue->commandStream); commandList = CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue); ASSERT_NE(nullptr, commandList); } void TearDown() override { commandList->destroy(); commandQueue->destroy(); L0::GlobalDriver = nullptr; } DebugManagerStateRestore restorer; WhiteBox *commandQueue = nullptr; L0::CommandList *commandList = nullptr; std::unique_ptr> driverHandle; NEO::MockDevice *neoDevice = nullptr; L0::Device *device; }; HWTEST2_F(CommandQueueThreadArbitrationPolicyTests, whenCommandListIsExecutedThenDefaultRoundRobinThreadArbitrationPolicyIsUsed, IsGen9) { size_t usedSpaceBefore = commandQueue->commandStream->getUsed(); ze_command_list_handle_t hCommandList = commandList->toHandle(); auto result = commandQueue->executeCommandLists(1, &hCommandList, nullptr, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); size_t usedSpaceAfter = commandQueue->commandStream->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter)); using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; auto miLoadImm = findAll(cmdList.begin(), cmdList.end()); EXPECT_GE(2u, miLoadImm.size()); for (auto it : miLoadImm) { auto cmd = genCmdCast(*it); if (cmd->getRegisterOffset() == NEO::DebugControlReg2::address) { EXPECT_EQ(NEO::DebugControlReg2::getRegData(NEO::ThreadArbitrationPolicy::RoundRobin), cmd->getDataDword()); } } } HWTEST2_F(CommandQueueThreadArbitrationPolicyTests, whenCommandListIsExecutedAndOverrideThreadArbitrationPolicyDebugFlagIsSetToZeroThenAgeBasedThreadArbitrationPolicyIsUsed, IsGen9) { DebugManager.flags.OverrideThreadArbitrationPolicy.set(0); size_t usedSpaceBefore = commandQueue->commandStream->getUsed(); ze_command_list_handle_t hCommandList = commandList->toHandle(); auto result = commandQueue->executeCommandLists(1, &hCommandList, nullptr, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); size_t usedSpaceAfter = commandQueue->commandStream->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter)); using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; auto miLoadImm = findAll(cmdList.begin(), cmdList.end()); EXPECT_GE(2u, miLoadImm.size()); for (auto it : miLoadImm) { auto cmd = genCmdCast(*it); if (cmd->getRegisterOffset() == NEO::DebugControlReg2::address) { EXPECT_EQ(NEO::DebugControlReg2::getRegData(NEO::ThreadArbitrationPolicy::AgeBased), cmd->getDataDword()); } } } HWTEST2_F(CommandQueueThreadArbitrationPolicyTests, whenCommandListIsExecutedAndOverrideThreadArbitrationPolicyDebugFlagIsSetToOneThenRoundRobinThreadArbitrationPolicyIsUsed, IsGen9) { DebugManager.flags.OverrideThreadArbitrationPolicy.set(1); size_t usedSpaceBefore = commandQueue->commandStream->getUsed(); ze_command_list_handle_t hCommandList = commandList->toHandle(); auto result = commandQueue->executeCommandLists(1, &hCommandList, nullptr, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); size_t usedSpaceAfter = commandQueue->commandStream->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter)); using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; auto miLoadImm = findAll(cmdList.begin(), cmdList.end()); EXPECT_GE(2u, miLoadImm.size()); for (auto it : miLoadImm) { auto cmd = genCmdCast(*it); if (cmd->getRegisterOffset() == NEO::DebugControlReg2::address) { EXPECT_EQ(NEO::DebugControlReg2::getRegData(NEO::ThreadArbitrationPolicy::RoundRobin), cmd->getDataDword()); } } } struct CommandQueueGroupMultiDeviceFixture : public MultiDeviceFixture { void SetUp() { NEO::MockCompilerEnableGuard mock(true); MultiDeviceFixture::SetUp(); uint32_t count = 1; ze_device_handle_t hDevice; ze_result_t res = driverHandle->getDevice(&count, &hDevice); ASSERT_EQ(ZE_RESULT_SUCCESS, res); device = L0::Device::fromHandle(hDevice); ASSERT_NE(nullptr, device); } void TearDown() { MultiDeviceFixture::TearDown(); } L0::Device *device = nullptr; }; using CommandQueueGroupMultiDevice = Test; HWTEST2_F(CommandQueueGroupMultiDevice, givenCommandQueuePropertiesCallThenCallSucceedsAndCommandListImmediateIsCreated, IsGen9) { uint32_t count = 0; ze_result_t res = device->getCommandQueueGroupProperties(&count, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_GE(count, 1u); std::vector queueProperties(count); res = device->getCommandQueueGroupProperties(&count, queueProperties.data()); EXPECT_EQ(ZE_RESULT_SUCCESS, res); uint32_t queueGroupOrdinal = 0u; uint32_t queueGroupIndex = 0u; ze_command_queue_desc_t desc = {}; desc.ordinal = queueGroupOrdinal; desc.index = queueGroupIndex; ze_result_t returnValue; std::unique_ptr commandList0(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); L0::CommandQueueImp *cmdQueue = reinterpret_cast(commandList0->cmdQImmediate); auto &nearestSubDevice = *device->getNEODevice()->getNearestGenericSubDevice(0); const auto rcsIndex = nearestSubDevice.getEngineGroupIndexFromEngineGroupType(NEO::EngineGroupType::RenderCompute); auto expectedCSR = nearestSubDevice.getRegularEngineGroups()[rcsIndex].engines[queueGroupIndex].commandStreamReceiver; EXPECT_EQ(cmdQueue->getCsr(), expectedCSR); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/gen9/test_device_gen9.cpp000066400000000000000000000065231422164147700304240ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" namespace L0 { namespace ult { using DevicePropertyTest = Test; HWTEST2_F(DevicePropertyTest, givenReturnedDevicePropertiesThenExpectedPropertiesFlagsSet, IsGen9) { ze_device_properties_t deviceProps = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; device->getProperties(&deviceProps); EXPECT_EQ(0u, deviceProps.flags & ZE_DEVICE_PROPERTY_FLAG_ONDEMANDPAGING); EXPECT_EQ(0u, deviceProps.flags & ZE_DEVICE_PROPERTY_FLAG_ECC); EXPECT_EQ(0u, deviceProps.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE); EXPECT_EQ(ZE_DEVICE_PROPERTY_FLAG_INTEGRATED, deviceProps.flags & ZE_DEVICE_PROPERTY_FLAG_INTEGRATED); } using CommandQueueGroupTest = Test; HWTEST2_F(CommandQueueGroupTest, givenCommandQueuePropertiesCallThenCorrectNumberOfGroupsIsReturned, IsGen9) { uint32_t count = 0; ze_result_t res = device->getCommandQueueGroupProperties(&count, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_EQ(1u, count); ze_command_queue_group_properties_t properties; res = device->getCommandQueueGroupProperties(&count, &properties); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_TRUE(properties.flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE); EXPECT_TRUE(properties.flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY); EXPECT_TRUE(properties.flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COOPERATIVE_KERNELS); EXPECT_TRUE(properties.flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_METRICS); EXPECT_EQ(properties.numQueues, 1u); EXPECT_EQ(properties.maxMemoryFillPatternSize, std::numeric_limits::max()); } HWTEST2_F(CommandQueueGroupTest, givenQueueGroupsReturnedThenCommandListIsCreatedCorrectly, IsGen9) { uint32_t count = 0; ze_result_t res = device->getCommandQueueGroupProperties(&count, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_EQ(1u, count); ze_command_queue_group_properties_t properties; res = device->getCommandQueueGroupProperties(&count, &properties); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_TRUE(properties.flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE); EXPECT_TRUE(properties.flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY); EXPECT_TRUE(properties.flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COOPERATIVE_KERNELS); EXPECT_TRUE(properties.flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_METRICS); EXPECT_EQ(properties.numQueues, 1u); EXPECT_EQ(properties.maxMemoryFillPatternSize, std::numeric_limits::max()); ze_context_handle_t hContext; ze_context_desc_t contextDesc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; res = driverHandle->createContext(&contextDesc, 0u, nullptr, &hContext); EXPECT_EQ(ZE_RESULT_SUCCESS, res); L0::Context *context = Context::fromHandle(hContext); ze_command_list_desc_t listDesc = {}; listDesc.commandQueueGroupOrdinal = 0; ze_command_list_handle_t hCommandList = {}; res = context->createCommandList(device, &listDesc, &hCommandList); EXPECT_EQ(ZE_RESULT_SUCCESS, res); CommandList *commandList = CommandList::fromHandle(hCommandList); commandList->destroy(); context->destroy(); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/gen9/test_module_gen9.cpp000066400000000000000000000023721422164147700304500ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" namespace L0 { namespace ult { using KernelPropertyTest = Test; HWTEST2_F(KernelPropertyTest, givenReturnedKernelPropertiesThenExpectedDp4aSupportReturned, IsGen9) { ze_device_module_properties_t kernelProps = {}; device->getKernelProperties(&kernelProps); EXPECT_EQ(0u, kernelProps.flags & ZE_DEVICE_MODULE_FLAG_DP4A); } HWTEST2_F(KernelPropertyTest, givenKernelExtendedPropertiesStructureWhenKernelPropertiesCalledThenPropertiesAreCorrectlySet, IsGen9) { ze_device_module_properties_t kernelProperties = {}; ze_float_atomic_ext_properties_t kernelExtendedProperties = {}; kernelExtendedProperties.stype = ZE_STRUCTURE_TYPE_FLOAT_ATOMIC_EXT_PROPERTIES; kernelProperties.pNext = &kernelExtendedProperties; ze_result_t res = device->getKernelProperties(&kernelProperties); EXPECT_EQ(res, ZE_RESULT_SUCCESS); EXPECT_EQ(0u, kernelExtendedProperties.fp16Flags); EXPECT_EQ(0u, kernelExtendedProperties.fp32Flags); EXPECT_EQ(0u, kernelExtendedProperties.fp64Flags); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/main.cpp000066400000000000000000000327621422164147700252720ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/gmm_helper/gmm_interface.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/program/kernel_info.h" #include "shared/source/utilities/debug_settings_reader.h" #include "shared/source/utilities/logger.h" #include "shared/test/common/helpers/custom_event_listener.h" #include "shared/test/common/helpers/default_hw_info.inl" #include "shared/test/common/helpers/memory_leak_listener.h" #include "shared/test/common/helpers/test_files.h" #include "shared/test/common/helpers/ult_hw_config.inl" #include "shared/test/common/libult/global_environment.h" #include "shared/test/common/libult/signal_utils.h" #include "shared/test/common/mocks/mock_gmm_client_context.h" #include "shared/test/common/mocks/mock_sip.h" #include "shared/test/unit_test/base_ult_config_listener.h" #include "shared/test/unit_test/test_stats.h" #include "level_zero/core/source/cmdlist/cmdlist.h" #include "level_zero/core/source/compiler_interface/l0_reg_path.h" #include "gmock/gmock.h" #include "igfxfmid.h" #include #include #include #include #ifdef WIN32 const char *fSeparator = "\\"; #else const char *fSeparator = "/"; #endif TEST(Should, pass) { EXPECT_TRUE(true); } namespace L0 { namespace ult { TestEnvironment *environment = nullptr; } } // namespace L0 using namespace L0::ult; extern PRODUCT_FAMILY productFamily; extern GFXCORE_FAMILY renderCoreFamily; int32_t revId = -1; uint32_t euPerSubSlice = 0; uint32_t sliceCount = 0; uint32_t subSlicePerSliceCount = 0; int dieRecovery = 0; namespace NEO { extern const HardwareInfo *hardwareInfoTable[IGFX_MAX_PRODUCT]; extern bool useMockGmm; extern TestMode testMode; extern const char *executionDirectorySuffix; namespace MockSipData { extern std::unique_ptr mockSipKernel; } // namespace MockSipData } // namespace NEO std::string getRunPath(char *argv0) { std::string res(argv0); auto pos = res.rfind(fSeparator); if (pos != std::string::npos) res = res.substr(0, pos); if (res == "." || pos == std::string::npos) { char *cwd; #if defined(__linux__) cwd = getcwd(nullptr, 0); #else cwd = _getcwd(nullptr, 0); #endif res = cwd; free(cwd); } return res; } std::thread::id tempThreadID; bool sysmanUltsEnable = false; void applyWorkarounds() { { std::ofstream f; const std::string fileName("_tmp_"); f.open(fileName, std::ofstream::binary); f.close(); } { std::mutex mtx; std::unique_lock stateLock(mtx); } { std::stringstream ss("1"); int val; ss >> val; } { class BaseClass { public: int method(int param) { return 1; } }; class MockClass : public BaseClass { public: MOCK_METHOD1(method, int(int param)); }; ::testing::NiceMock mockObj; EXPECT_CALL(mockObj, method(::testing::_)) .Times(1); mockObj.method(2); } //intialize rand srand(static_cast(time(nullptr))); //Create at least on thread to prevent false memory leaks in tests using threads std::thread t([&]() { }); tempThreadID = t.get_id(); t.join(); //Create FileLogger to prevent false memory leaks { NEO::FileLoggerInstance(); } } bool checkAubTestsExecutionPathValidity() { bool valid = true; if ((testMode == TestMode::AubTests || testMode == TestMode::AubTestsWithTbx)) { std::ofstream testFile; std::string aubPath = folderAUB; aubPath += fSeparator; aubPath += "testAubFolder"; testFile.open(aubPath, std::ofstream::app); if (testFile.is_open()) { testFile.close(); } else { valid = false; std::cout << "ERROR: Aub tests must be run in directory containing \" " << folderAUB << "\" folder!\n"; } } return valid; } int main(int argc, char **argv) { bool useDefaultListener = false; bool enableAlarm = true; bool setupFeatureTableAndWorkaroundTable = testMode == TestMode::AubTests ? true : false; bool showTestStats = false; auto sysmanUltsEnableEnv = getenv("NEO_L0_SYSMAN_ULTS_ENABLE"); if (sysmanUltsEnableEnv != nullptr) { sysmanUltsEnable = (strcmp(sysmanUltsEnableEnv, "1") == 0); } applyWorkarounds(); { std::string envVar = std::string("NEO_") + executionName + "_DISABLE_TEST_ALARM"; char *envValue = getenv(envVar.c_str()); if (envValue != nullptr) { enableAlarm = false; } } testing::InitGoogleMock(&argc, argv); NEO::HardwareInfo hwInfoForTests = NEO::DEFAULT_TEST_PLATFORM::hwInfo; for (int i = 1; i < argc; ++i) { if (!strcmp("--product", argv[i])) { ++i; if (i < argc) { if (::isdigit(argv[i][0])) { int productValue = atoi(argv[i]); if (productValue > 0 && productValue < IGFX_MAX_PRODUCT && NEO::hardwarePrefix[productValue] != nullptr) { productFamily = static_cast(productValue); } else { productFamily = IGFX_UNKNOWN; } } else { productFamily = IGFX_UNKNOWN; for (int j = 0; j < IGFX_MAX_PRODUCT; j++) { if (NEO::hardwarePrefix[j] == nullptr) continue; if (strcmp(NEO::hardwarePrefix[j], argv[i]) == 0) { productFamily = static_cast(j); break; } } } if (productFamily == IGFX_UNKNOWN) { std::cout << "unknown product family has been set: " << argv[i] << std::endl; return -1; } hwInfoForTests = *NEO::hardwareInfoTable[productFamily]; if (!hwInfoForTests.capabilityTable.levelZeroSupported) { std::cout << "unsupported product family has been set: " << argv[i] << std::endl; return 0; } std::cout << "product family: " << NEO::hardwarePrefix[productFamily] << " (" << productFamily << ")" << std::endl; } } else if (!strcmp("--rev_id", argv[i])) { ++i; if (i < argc) { revId = atoi(argv[i]); } } else if (!strcmp("--slices", argv[i])) { ++i; if (i < argc) { sliceCount = atoi(argv[i]); } } else if (!strcmp("--subslices", argv[i])) { ++i; if (i < argc) { subSlicePerSliceCount = atoi(argv[i]); } } else if (!strcmp("--eu_per_ss", argv[i])) { ++i; if (i < argc) { euPerSubSlice = atoi(argv[i]); } } else if (!strcmp("--die_recovery", argv[i])) { ++i; if (i < argc) { dieRecovery = atoi(argv[i]) ? 1 : 0; } } else if (!strcmp("--disable_default_listener", argv[i])) { useDefaultListener = false; } else if (!strcmp("--enable_default_listener", argv[i])) { useDefaultListener = true; } else if (!strcmp("--disable_alarm", argv[i])) { enableAlarm = false; } else if (!strcmp("--tbx", argv[i])) { if (testMode == TestMode::AubTests) { testMode = TestMode::AubTestsWithTbx; } initialHardwareTag = 0; } else if (!strcmp("--read-config", argv[i]) && (testMode == TestMode::AubTests || testMode == TestMode::AubTestsWithTbx)) { if (DebugManager.registryReadAvailable()) { DebugManager.setReaderImpl(NEO::SettingsReader::create(L0::registryPath)); DebugManager.injectSettingsFromReader(); } } else if (!strcmp("--show_test_stats", argv[i])) { showTestStats = true; } } if (showTestStats) { std::cout << getTestStats() << std::endl; return 0; } productFamily = hwInfoForTests.platform.eProductFamily; renderCoreFamily = hwInfoForTests.platform.eRenderCoreFamily; uint32_t threadsPerEu = hwInfoConfigFactory[productFamily]->threadsPerEu; PLATFORM &platform = hwInfoForTests.platform; if (revId != -1) { platform.usRevId = revId; } else { revId = platform.usRevId; } uint64_t hwInfoConfig = defaultHardwareInfoConfigTable[productFamily]; setHwInfoValuesFromConfig(hwInfoConfig, hwInfoForTests); // set Gt and FeatureTable to initial state hardwareInfoSetup[productFamily](&hwInfoForTests, setupFeatureTableAndWorkaroundTable, hwInfoConfig); GT_SYSTEM_INFO >SystemInfo = hwInfoForTests.gtSystemInfo; // and adjust dynamic values if not secified sliceCount = sliceCount > 0 ? sliceCount : gtSystemInfo.SliceCount; subSlicePerSliceCount = subSlicePerSliceCount > 0 ? subSlicePerSliceCount : (gtSystemInfo.SubSliceCount / sliceCount); euPerSubSlice = euPerSubSlice > 0 ? euPerSubSlice : gtSystemInfo.MaxEuPerSubSlice; // clang-format off gtSystemInfo.SliceCount = sliceCount; gtSystemInfo.SubSliceCount = gtSystemInfo.SliceCount * subSlicePerSliceCount; gtSystemInfo.EUCount = gtSystemInfo.SubSliceCount * euPerSubSlice - dieRecovery; gtSystemInfo.ThreadCount = gtSystemInfo.EUCount * threadsPerEu; gtSystemInfo.MaxEuPerSubSlice = std::max(gtSystemInfo.MaxEuPerSubSlice, euPerSubSlice); gtSystemInfo.MaxSlicesSupported = std::max(gtSystemInfo.MaxSlicesSupported, gtSystemInfo.SliceCount); gtSystemInfo.MaxSubSlicesSupported = std::max(gtSystemInfo.MaxSubSlicesSupported, gtSystemInfo.SubSliceCount); gtSystemInfo.IsDynamicallyPopulated = false; // clang-format on // Platforms with uninitialized factory are not supported if (L0::commandListFactory[productFamily] == nullptr) { std::cout << "unsupported product family has been set: " << NEO::hardwarePrefix[::productFamily] << std::endl; std::cout << "skipping tests" << std::endl; return 0; } auto &listeners = ::testing::UnitTest::GetInstance()->listeners(); if (useDefaultListener == false) { auto defaultListener = listeners.default_result_printer(); auto customEventListener = new CCustomEventListener(defaultListener, NEO::hardwarePrefix[productFamily]); listeners.Release(defaultListener); listeners.Append(customEventListener); } listeners.Append(new NEO::MemoryLeakListener); listeners.Append(new NEO::BaseUltConfigListener); binaryNameSuffix.append(NEO::familyName[hwInfoForTests.platform.eRenderCoreFamily]); binaryNameSuffix.append(hwInfoForTests.capabilityTable.platformType); std::string testBinaryFiles = getRunPath(argv[0]); std::string testBinaryFilesApiSpecific = testBinaryFiles; testBinaryFilesApiSpecific.append("/level_zero/"); testBinaryFiles.append("/" + binaryNameSuffix + "/"); testBinaryFilesApiSpecific.append(binaryNameSuffix + "/"); testBinaryFiles.append(std::to_string(revId)); testBinaryFiles.append("/"); testBinaryFiles.append(testFiles); testBinaryFilesApiSpecific.append(std::to_string(revId)); testBinaryFilesApiSpecific.append("/"); testBinaryFilesApiSpecific.append(testFilesApiSpecific); testFiles = testBinaryFiles; testFilesApiSpecific = testBinaryFilesApiSpecific; std::string executionDirectory(hardwarePrefix[productFamily]); executionDirectory += NEO::executionDirectorySuffix; //_aub for aub_tests, empty otherwise executionDirectory += "/"; executionDirectory += std::to_string(revId); #ifdef WIN32 #include if (_chdir(executionDirectory.c_str())) { std::cout << "chdir into " << executionDirectory << " directory failed.\nThis might cause test failures." << std::endl; } #elif defined(__linux__) #include if (chdir(executionDirectory.c_str()) != 0) { std::cout << "chdir into " << executionDirectory << " directory failed.\nThis might cause test failures." << std::endl; } #endif if (!checkAubTestsExecutionPathValidity()) { return -1; } if (useMockGmm) { NEO::GmmHelper::createGmmContextWrapperFunc = NEO::GmmClientContext::create; } else { NEO::GmmInterface::initialize(nullptr, nullptr); } NEO::defaultHwInfo = std::make_unique(); *NEO::defaultHwInfo = hwInfoForTests; NEO::MockSipData::mockSipKernel.reset(new NEO::MockSipKernel()); if (testMode == TestMode::AubTests || testMode == TestMode::AubTestsWithTbx) { MockSipData::useMockSip = false; } environment = reinterpret_cast(::testing::AddGlobalTestEnvironment(new TestEnvironment)); MockCompilerDebugVars fclDebugVars; MockCompilerDebugVars igcDebugVars; environment->setDefaultDebugVars(fclDebugVars, igcDebugVars, hwInfoForTests); int sigOut = setAlarm(enableAlarm); if (sigOut != 0) return sigOut; auto retVal = RUN_ALL_TESTS(); return retVal; } compute-runtime-22.14.22890/level_zero/core/test/unit_tests/mock.h000066400000000000000000000003451422164147700247340ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once namespace L0 { namespace ult { template struct Mock : public Type {}; } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/mocks/000077500000000000000000000000001422164147700247445ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/mocks/CMakeLists.txt000066400000000000000000000045321422164147700275100ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(TARGET_NAME ${TARGET_NAME_L0}_mocks) set(L0_MOCKS_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/debugger_l0_create.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_built_ins.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_built_ins.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_builtin_functions_lib_impl.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_builtin_functions_lib_impl_timestamps.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_cmdlist.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_cmdlist.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_cmdqueue.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_cmdqueue.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_context.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_device.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_device_for_spirv.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_device_for_spirv.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_device_recompile_built_ins.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_driver.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_driver.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_driver_handle.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_driver_handle.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_event.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_event.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_fence.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_gmm_resource_info_l0.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_host_pointer_manager.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_kernel.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_l0_debugger.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_memory_manager.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_module.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_sampler.h ) add_library(${TARGET_NAME} OBJECT ${L0_MOCKS_SOURCES}) target_include_directories(${TARGET_NAME} PRIVATE $ $ $ ${NEO_SOURCE_DIR}/level_zero/core/test/unit_test ) target_compile_definitions(${TARGET_NAME} PUBLIC $) set_target_properties(${TARGET_NAME} PROPERTIES FOLDER ${TARGET_NAME_L0}) create_source_tree(${TARGET_NAME} ${L0_ROOT_DIR}) compute-runtime-22.14.22890/level_zero/core/test/unit_tests/mocks/debugger_l0_create.cpp000066400000000000000000000012161422164147700311520ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "level_zero/core/test/unit_tests/mocks/mock_l0_debugger.h" namespace L0 { namespace ult { DebugerL0CreateFn mockDebuggerL0HwFactory[IGFX_MAX_CORE]; } } // namespace L0 namespace L0 { std::unique_ptr DebuggerL0::create(NEO::Device *device) { initDebuggingInOs(device->getRootDeviceEnvironment().osInterface.get()); auto debugger = ult::mockDebuggerL0HwFactory[device->getHardwareInfo().platform.eRenderCoreFamily](device); return std::unique_ptr(debugger); } } // namespace L0compute-runtime-22.14.22890/level_zero/core/test/unit_tests/mocks/mock_built_ins.cpp000066400000000000000000000010411422164147700304450ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/test/unit_tests/mocks/mock_built_ins.h" #include "shared/source/built_ins/sip.h" namespace L0 { namespace ult { const NEO::SipKernel &MockBuiltins::getSipKernel(NEO::SipKernelType type, NEO::Device &device) { if (!(sipKernel && sipKernel->getType() == type)) { sipKernel.reset(new NEO::SipKernel(type, allocation.get(), stateSaveAreaHeader)); } return *sipKernel; } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/mocks/mock_built_ins.h000066400000000000000000000013041422164147700301140ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" namespace L0 { namespace ult { class MockBuiltins : public NEO::BuiltIns { public: MockBuiltins() : BuiltIns() { allocation.reset(new NEO::MockGraphicsAllocation()); } const NEO::SipKernel &getSipKernel(NEO::SipKernelType type, NEO::Device &device) override; std::unique_ptr sipKernel; std::unique_ptr allocation; std::vector stateSaveAreaHeader{'s', 's', 'a', 'h'}; }; } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/mocks/mock_builtin_functions_lib_impl.h000066400000000000000000000041061422164147700335340ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/test/unit_tests/mocks/mock_kernel.h" #include "level_zero/core/test/unit_tests/mocks/mock_module.h" namespace L0 { namespace ult { struct MockBuiltinFunctionsLibImpl : BuiltinFunctionsLibImpl { using BuiltinFunctionsLibImpl::builtins; using BuiltinFunctionsLibImpl::getFunction; using BuiltinFunctionsLibImpl::imageBuiltins; MockBuiltinFunctionsLibImpl(L0::Device *device, NEO::BuiltIns *builtInsLib) : BuiltinFunctionsLibImpl(device, builtInsLib) { dummyKernel = std::unique_ptr>(new Mock<::L0::Kernel>()); dummyModule = std::unique_ptr(new Mock(device, nullptr)); dummyKernel->module = dummyModule.get(); } void initBuiltinKernel(L0::Builtin func) override { auto builtId = static_cast(func); if (builtins[builtId].get() == nullptr) { builtins[builtId] = loadBuiltIn(NEO::EBuiltInOps::CopyBufferToBuffer, "copyBufferToBufferBytesSingle"); } } void initBuiltinImageKernel(L0::ImageBuiltin func) override { auto builtId = static_cast(func); if (imageBuiltins[builtId].get() == nullptr) { imageBuiltins[builtId] = loadBuiltIn(NEO::EBuiltInOps::CopyImage3dToBuffer, "CopyImage3dToBuffer16Bytes"); } } std::unique_ptr> dummyKernel; std::unique_ptr dummyModule; Kernel *getFunction(Builtin func) override { return dummyKernel.get(); } Kernel *getImageFunction(ImageBuiltin func) override { return dummyKernel.get(); } std::unique_ptr loadBuiltIn(NEO::EBuiltInOps::Type builtin, const char *builtInName) override { std::unique_ptr mockKernel(new Mock<::L0::Kernel>()); std::unique_ptr mockModule(new Mock(device, nullptr)); return std::unique_ptr(new BuiltinData{std::move(mockModule), std::move(mockKernel)}); } }; } // namespace ult } // namespace L0mock_builtin_functions_lib_impl_timestamps.h000066400000000000000000000057741422164147700357370ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/mocks/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "level_zero/core/source/builtin/builtin_functions_lib_impl.h" namespace L0 { namespace ult { struct MockBuiltinDataTimestamp : BuiltinFunctionsLibImpl::BuiltinData { using BuiltinFunctionsLibImpl::BuiltinData::BuiltinData; ~MockBuiltinDataTimestamp() { module.release(); } }; struct MockBuiltinFunctionsLibImplTimestamps : BuiltinFunctionsLibImpl { using BuiltinFunctionsLibImpl::BuiltinFunctionsLibImpl; void initBuiltinKernel(Builtin func) override { switch (static_cast(func)) { case Builtin::QueryKernelTimestamps: if (builtins[0].get() == nullptr) { builtins[0] = loadBuiltIn(NEO::EBuiltInOps::QueryKernelTimestamps, "QueryKernelTimestamps"); } break; case Builtin::QueryKernelTimestampsWithOffsets: if (builtins[1].get() == nullptr) { builtins[1] = loadBuiltIn(NEO::EBuiltInOps::QueryKernelTimestamps, "QueryKernelTimestampsWithOffsets"); } break; default: break; }; } void initBuiltinImageKernel(ImageBuiltin func) override { } Kernel *getFunction(Builtin func) override { return func == Builtin::QueryKernelTimestampsWithOffsets ? builtins[1]->func.get() : builtins[0]->func.get(); } std::unique_ptr loadBuiltIn(NEO::EBuiltInOps::Type builtin, const char *builtInName) override { using BuiltInCodeType = NEO::BuiltinCode::ECodeType; auto builtInCodeType = NEO::DebugManager.flags.RebuildPrecompiledKernels.get() ? BuiltInCodeType::Intermediate : BuiltInCodeType::Binary; auto builtInCode = builtInsLib->getBuiltinsLib().getBuiltinCode(builtin, builtInCodeType, *device->getNEODevice()); ze_result_t res; std::unique_ptr module; ze_module_handle_t moduleHandle; ze_module_desc_t moduleDesc = {}; moduleDesc.format = builtInCode.type == BuiltInCodeType::Binary ? ZE_MODULE_FORMAT_NATIVE : ZE_MODULE_FORMAT_IL_SPIRV; moduleDesc.pInputModule = reinterpret_cast(&builtInCode.resource[0]); moduleDesc.inputSize = builtInCode.resource.size(); res = device->createModule(&moduleDesc, &moduleHandle, nullptr, ModuleType::Builtin); UNRECOVERABLE_IF(res != ZE_RESULT_SUCCESS); module.reset(Module::fromHandle(moduleHandle)); std::unique_ptr kernel; ze_kernel_handle_t kernelHandle; ze_kernel_desc_t kernelDesc = {}; kernelDesc.pKernelName = builtInName; res = module->createKernel(&kernelDesc, &kernelHandle); DEBUG_BREAK_IF(res != ZE_RESULT_SUCCESS); kernel.reset(Kernel::fromHandle(kernelHandle)); return std::unique_ptr(new MockBuiltinDataTimestamp{std::move(module), std::move(kernel)}); } }; } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/mocks/mock_cmdlist.cpp000066400000000000000000000017231422164147700301230ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" namespace L0 { namespace ult { WhiteBox<::L0::CommandList>::WhiteBox(Device *device) : BaseClass(BaseClass::defaultNumIddsPerBlock) {} WhiteBox<::L0::CommandList>::~WhiteBox() {} MockCommandList::MockCommandList(Device *device) : WhiteBox<::L0::CommandList>(device) { this->device = device; size_t batchBufferSize = 65536u; batchBuffer = new uint8_t[batchBufferSize]; mockAllocation = new NEO::GraphicsAllocation(0, NEO::AllocationType::INTERNAL_HOST_MEMORY, &batchBuffer, reinterpret_cast(&batchBuffer), 0, sizeof(batchBufferSize), MemoryPool::System4KBPages); } MockCommandList::~MockCommandList() { delete mockAllocation; delete[] batchBuffer; } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h000066400000000000000000000344261422164147700275760ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/common/test_macros/mock_method_macros.h" #include "level_zero/core/source/cmdlist/cmdlist_hw.h" #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h" #include "level_zero/core/test/unit_tests/mock.h" #include "level_zero/core/test/unit_tests/mocks/mock_device.h" #include "level_zero/core/test/unit_tests/white_box.h" namespace NEO { class GraphicsAllocation; } namespace L0 { struct Device; namespace ult { template struct WhiteBox<::L0::CommandListCoreFamily> : public ::L0::CommandListCoreFamily { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using BaseClass = ::L0::CommandListCoreFamily; using BaseClass::appendBlitFill; using BaseClass::appendCopyImageBlit; using BaseClass::appendEventForProfiling; using BaseClass::appendEventForProfilingCopyCommand; using BaseClass::appendLaunchKernelWithParams; using BaseClass::appendMemoryCopyBlit; using BaseClass::appendMemoryCopyBlitRegion; using BaseClass::appendMultiTileBarrier; using BaseClass::appendSignalEventPostWalker; using BaseClass::appendWriteKernelTimestamp; using BaseClass::applyMemoryRangesBarrier; using BaseClass::clearCommandsToPatch; using BaseClass::cmdQImmediate; using BaseClass::commandContainer; using BaseClass::commandListPerThreadScratchSize; using BaseClass::commandListPreemptionMode; using BaseClass::commandsToPatch; using BaseClass::containsAnyKernel; using BaseClass::containsCooperativeKernelsFlag; using BaseClass::csr; using BaseClass::engineGroupType; using BaseClass::estimateBufferSizeMultiTileBarrier; using BaseClass::finalStreamState; using BaseClass::flags; using BaseClass::getAlignedAllocation; using BaseClass::getAllocationFromHostPtrMap; using BaseClass::getHostPtrAlloc; using BaseClass::hostPtrMap; using BaseClass::indirectAllocationsAllowed; using BaseClass::initialize; using BaseClass::partitionCount; using BaseClass::patternAllocations; using BaseClass::requiredStreamState; using BaseClass::unifiedMemoryControls; using BaseClass::updateStreamProperties; WhiteBox() : ::L0::CommandListCoreFamily(BaseClass::defaultNumIddsPerBlock) {} }; template using CommandListCoreFamily = WhiteBox<::L0::CommandListCoreFamily>; template struct WhiteBox> : public L0::CommandListCoreFamilyImmediate { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using BaseClass = L0::CommandListCoreFamilyImmediate; using BaseClass::clearCommandsToPatch; using BaseClass::cmdQImmediate; using BaseClass::commandsToPatch; using BaseClass::csr; using BaseClass::finalStreamState; using BaseClass::partitionCount; using BaseClass::requiredStreamState; WhiteBox() : BaseClass(BaseClass::defaultNumIddsPerBlock) {} }; template struct MockCommandListImmediate : public CommandListCoreFamilyImmediate { using CommandListCoreFamilyImmediate::requiredStreamState; using CommandListCoreFamilyImmediate::containsAnyKernel; }; template <> struct WhiteBox<::L0::CommandList> : public ::L0::CommandListImp { using BaseClass = ::L0::CommandListImp; using BaseClass::BaseClass; using BaseClass::cmdQImmediate; using BaseClass::commandContainer; using BaseClass::commandListPreemptionMode; using BaseClass::csr; using BaseClass::initialize; using BaseClass::partitionCount; WhiteBox(Device *device); ~WhiteBox() override; }; using CommandList = WhiteBox<::L0::CommandList>; struct MockCommandList : public CommandList { using BaseClass = CommandList; MockCommandList(Device *device = nullptr); ~MockCommandList() override; ADDMETHOD_NOBASE(close, ze_result_t, ZE_RESULT_SUCCESS, ()); ADDMETHOD_NOBASE(destroy, ze_result_t, ZE_RESULT_SUCCESS, ()); ADDMETHOD_NOBASE(appendLaunchKernel, ze_result_t, ZE_RESULT_SUCCESS, (ze_kernel_handle_t hFunction, const ze_group_count_t *pThreadGroupDimensions, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents)); ADDMETHOD_NOBASE(appendLaunchCooperativeKernel, ze_result_t, ZE_RESULT_SUCCESS, (ze_kernel_handle_t hKernel, const ze_group_count_t *pLaunchFuncArgs, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents)); ADDMETHOD_NOBASE(appendLaunchKernelIndirect, ze_result_t, ZE_RESULT_SUCCESS, (ze_kernel_handle_t hFunction, const ze_group_count_t *pDispatchArgumentsBuffer, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents)); ADDMETHOD_NOBASE(appendLaunchMultipleKernelsIndirect, ze_result_t, ZE_RESULT_SUCCESS, (uint32_t numFunctions, const ze_kernel_handle_t *phFunctions, const uint32_t *pNumLaunchArguments, const ze_group_count_t *pLaunchArgumentsBuffer, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents)); ADDMETHOD_NOBASE(appendEventReset, ze_result_t, ZE_RESULT_SUCCESS, (ze_event_handle_t hEvent)); ADDMETHOD_NOBASE(appendBarrier, ze_result_t, ZE_RESULT_SUCCESS, (ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents)); ADDMETHOD_NOBASE(appendMemoryRangesBarrier, ze_result_t, ZE_RESULT_SUCCESS, (uint32_t numRanges, const size_t *pRangeSizes, const void **pRanges, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents)); ADDMETHOD_NOBASE(appendImageCopyFromMemory, ze_result_t, ZE_RESULT_SUCCESS, (ze_image_handle_t hDstImage, const void *srcptr, const ze_image_region_t *pDstRegion, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents)); ADDMETHOD_NOBASE(appendImageCopyToMemory, ze_result_t, ZE_RESULT_SUCCESS, (void *dstptr, ze_image_handle_t hSrcImage, const ze_image_region_t *pSrcRegion, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents)); ADDMETHOD_NOBASE(appendImageCopyRegion, ze_result_t, ZE_RESULT_SUCCESS, (ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage, const ze_image_region_t *pDstRegion, const ze_image_region_t *pSrcRegion, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents)); ADDMETHOD_NOBASE(appendImageCopy, ze_result_t, ZE_RESULT_SUCCESS, (ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents)); ADDMETHOD_NOBASE(appendMemAdvise, ze_result_t, ZE_RESULT_SUCCESS, (ze_device_handle_t hDevice, const void *ptr, size_t size, ze_memory_advice_t advice)); ADDMETHOD_NOBASE(appendMemoryCopy, ze_result_t, ZE_RESULT_SUCCESS, (void *dstptr, const void *srcptr, size_t size, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents)); ADDMETHOD_NOBASE(appendPageFaultCopy, ze_result_t, ZE_RESULT_SUCCESS, (NEO::GraphicsAllocation * dstptr, NEO::GraphicsAllocation *srcptr, size_t size, bool flushHost)); ADDMETHOD_NOBASE(appendMemoryCopyRegion, ze_result_t, ZE_RESULT_SUCCESS, (void *dstptr, const ze_copy_region_t *dstRegion, uint32_t dstPitch, uint32_t dstSlicePitch, const void *srcptr, const ze_copy_region_t *srcRegion, uint32_t srcPitch, uint32_t srcSlicePitch, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents)); ADDMETHOD_NOBASE(appendMemoryPrefetch, ze_result_t, ZE_RESULT_SUCCESS, (const void *ptr, size_t count)); ADDMETHOD_NOBASE(appendMemoryFill, ze_result_t, ZE_RESULT_SUCCESS, (void *ptr, const void *pattern, size_t pattern_size, size_t size, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents)); ADDMETHOD_NOBASE(appendSignalEvent, ze_result_t, ZE_RESULT_SUCCESS, (ze_event_handle_t hEvent)); ADDMETHOD_NOBASE(appendWaitOnEvents, ze_result_t, ZE_RESULT_SUCCESS, (uint32_t numEvents, ze_event_handle_t *phEvent)); ADDMETHOD_NOBASE(appendWriteGlobalTimestamp, ze_result_t, ZE_RESULT_SUCCESS, (uint64_t * dstptr, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents)); ADDMETHOD_NOBASE(appendQueryKernelTimestamps, ze_result_t, ZE_RESULT_SUCCESS, (uint32_t numEvents, ze_event_handle_t *phEvents, void *dstptr, const size_t *pOffsets, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents)) ADDMETHOD_NOBASE(appendMemoryCopyFromContext, ze_result_t, ZE_RESULT_SUCCESS, (void *dstptr, ze_context_handle_t hContextSrc, const void *srcptr, size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents)); ADDMETHOD_NOBASE(reserveSpace, ze_result_t, ZE_RESULT_SUCCESS, (size_t size, void **ptr)); ADDMETHOD_NOBASE(reset, ze_result_t, ZE_RESULT_SUCCESS, ()); ADDMETHOD_NOBASE(appendMetricMemoryBarrier, ze_result_t, ZE_RESULT_SUCCESS, ()); ADDMETHOD_NOBASE(appendMetricStreamerMarker, ze_result_t, ZE_RESULT_SUCCESS, (zet_metric_streamer_handle_t hMetricStreamer, uint32_t value)); ADDMETHOD_NOBASE(appendMetricQueryBegin, ze_result_t, ZE_RESULT_SUCCESS, (zet_metric_query_handle_t hMetricQuery)); ADDMETHOD_NOBASE(appendMetricQueryEnd, ze_result_t, ZE_RESULT_SUCCESS, (zet_metric_query_handle_t hMetricQuery, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents)); ADDMETHOD_NOBASE(appendMILoadRegImm, ze_result_t, ZE_RESULT_SUCCESS, (uint32_t reg, uint32_t value)); ADDMETHOD_NOBASE(appendMILoadRegReg, ze_result_t, ZE_RESULT_SUCCESS, (uint32_t reg1, uint32_t reg2)); ADDMETHOD_NOBASE(appendMILoadRegMem, ze_result_t, ZE_RESULT_SUCCESS, (uint32_t reg1, uint64_t address)); ADDMETHOD_NOBASE(appendMIStoreRegMem, ze_result_t, ZE_RESULT_SUCCESS, (uint32_t reg1, uint64_t address)); ADDMETHOD_NOBASE(appendMIMath, ze_result_t, ZE_RESULT_SUCCESS, (void *aluArray, size_t aluCount)); ADDMETHOD_NOBASE(appendMIBBStart, ze_result_t, ZE_RESULT_SUCCESS, (uint64_t address, size_t predication, bool secondLevel)); ADDMETHOD_NOBASE(appendMIBBEnd, ze_result_t, ZE_RESULT_SUCCESS, ()); ADDMETHOD_NOBASE(appendMINoop, ze_result_t, ZE_RESULT_SUCCESS, ()); ADDMETHOD_NOBASE(appendPipeControl, ze_result_t, ZE_RESULT_SUCCESS, (void *dstPtr, uint64_t value)); ADDMETHOD_NOBASE(appendWaitOnMemory, ze_result_t, ZE_RESULT_SUCCESS, (void *desc, void *ptr, uint32_t data, ze_event_handle_t hSignalEvent)); ADDMETHOD_NOBASE(appendWriteToMemory, ze_result_t, ZE_RESULT_SUCCESS, (void *desc, void *ptr, uint64_t data)); ADDMETHOD_NOBASE(executeCommandListImmediate, ze_result_t, ZE_RESULT_SUCCESS, (bool perforMigration)); ADDMETHOD_NOBASE(initialize, ze_result_t, ZE_RESULT_SUCCESS, (L0::Device * device, NEO::EngineGroupType engineGroupType, ze_command_list_flags_t flags)); ADDMETHOD_NOBASE_VOIDRETURN(appendMultiPartitionPrologue, (uint32_t partitionDataSize)); ADDMETHOD_NOBASE_VOIDRETURN(appendMultiPartitionEpilogue, (void)); uint8_t *batchBuffer = nullptr; NEO::GraphicsAllocation *mockAllocation = nullptr; }; } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/mocks/mock_cmdqueue.cpp000066400000000000000000000013301422164147700302660ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h" #include "shared/source/device/device.h" namespace L0 { namespace ult { WhiteBox<::L0::CommandQueue>::WhiteBox(L0::Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc) : ::L0::CommandQueueImp(device, csr, desc) {} WhiteBox<::L0::CommandQueue>::~WhiteBox() {} Mock::Mock(L0::Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc) : WhiteBox<::L0::CommandQueue>(device, csr, desc) { this->device = device; } Mock::~Mock() { } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h000066400000000000000000000070631422164147700277440ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/common/test_macros/mock_method_macros.h" #include "level_zero/core/source/cmdqueue/cmdqueue_hw.h" #include "level_zero/core/source/cmdqueue/cmdqueue_imp.h" #include "level_zero/core/test/unit_tests/mock.h" #include "level_zero/core/test/unit_tests/white_box.h" namespace L0 { namespace ult { template <> struct WhiteBox<::L0::CommandQueue> : public ::L0::CommandQueueImp { using BaseClass = ::L0::CommandQueueImp; using BaseClass::buffers; using BaseClass::commandStream; using BaseClass::csr; using BaseClass::device; using BaseClass::preemptionCmdSyncProgramming; using BaseClass::printfFunctionContainer; using BaseClass::submitBatchBuffer; using BaseClass::synchronizeByPollingForTaskCount; using BaseClass::taskCount; using CommandQueue::activeSubDevices; using CommandQueue::commandQueuePreemptionMode; using CommandQueue::internalUsage; using CommandQueue::partitionCount; WhiteBox(Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc); ~WhiteBox() override; }; using CommandQueue = WhiteBox<::L0::CommandQueue>; static ze_command_queue_desc_t default_cmd_queue_desc = {}; template <> struct Mock : public CommandQueue { Mock(L0::Device *device = nullptr, NEO::CommandStreamReceiver *csr = nullptr, const ze_command_queue_desc_t *desc = &default_cmd_queue_desc); ~Mock() override; ADDMETHOD_NOBASE(createFence, ze_result_t, ZE_RESULT_SUCCESS, (const ze_fence_desc_t *desc, ze_fence_handle_t *phFence)); ADDMETHOD_NOBASE(destroy, ze_result_t, ZE_RESULT_SUCCESS, ()); ADDMETHOD_NOBASE(executeCommandLists, ze_result_t, ZE_RESULT_SUCCESS, (uint32_t numCommandLists, ze_command_list_handle_t *phCommandLists, ze_fence_handle_t hFence, bool performMigration)); ADDMETHOD_NOBASE(executeCommands, ze_result_t, ZE_RESULT_SUCCESS, (uint32_t numCommands, void *phCommands, ze_fence_handle_t hFence)); ADDMETHOD_NOBASE(synchronize, ze_result_t, ZE_RESULT_SUCCESS, (uint64_t timeout)); ADDMETHOD_NOBASE(getPreemptionCmdProgramming, bool, false, ()); }; template struct MockCommandQueueHw : public L0::CommandQueueHw { using BaseClass = ::L0::CommandQueueHw; using BaseClass::commandStream; using BaseClass::printfFunctionContainer; using L0::CommandQueue::activeSubDevices; using L0::CommandQueue::internalUsage; using L0::CommandQueue::partitionCount; using L0::CommandQueue::preemptionCmdSyncProgramming; using L0::CommandQueueImp::csr; MockCommandQueueHw(L0::Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc) : L0::CommandQueueHw(device, csr, desc) { } ze_result_t synchronize(uint64_t timeout) override { synchronizedCalled++; return synchronizeReturnValue; } NEO::SubmissionStatus submitBatchBuffer(size_t offset, NEO::ResidencyContainer &residencyContainer, void *endingCmdPtr, bool isCooperative) override { residencyContainerSnapshot = residencyContainer; return BaseClass::submitBatchBuffer(offset, residencyContainer, endingCmdPtr, isCooperative); } uint32_t synchronizedCalled = 0; NEO::ResidencyContainer residencyContainerSnapshot; ze_result_t synchronizeReturnValue{ZE_RESULT_SUCCESS}; }; struct Deleter { void operator()(CommandQueueImp *cmdQ) { cmdQ->destroy(); } }; } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/mocks/mock_context.h000066400000000000000000000126301422164147700276140ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/common/test_macros/mock_method_macros.h" #include "level_zero/core/source/context/context.h" #include "level_zero/core/test/unit_tests/mock.h" #include "level_zero/core/test/unit_tests/white_box.h" namespace L0 { namespace ult { template <> struct WhiteBox<::L0::Context> : public ::L0::Context {}; using Context = WhiteBox<::L0::Context>; template <> struct Mock : public Context { Mock() = default; ~Mock() override = default; ADDMETHOD_NOBASE(destroy, ze_result_t, ZE_RESULT_SUCCESS, ()); ADDMETHOD_NOBASE(getStatus, ze_result_t, ZE_RESULT_SUCCESS, ()); ADDMETHOD_NOBASE(getDriverHandle, DriverHandle *, nullptr, ()); ADDMETHOD_NOBASE(allocHostMem, ze_result_t, ZE_RESULT_SUCCESS, (const ze_host_mem_alloc_desc_t *hostDesc, size_t size, size_t alignment, void **ptr)); ADDMETHOD_NOBASE(allocDeviceMem, ze_result_t, ZE_RESULT_SUCCESS, (ze_device_handle_t hDevice, const ze_device_mem_alloc_desc_t *deviceDesc, size_t size, size_t alignment, void **ptr)); ADDMETHOD_NOBASE(allocSharedMem, ze_result_t, ZE_RESULT_SUCCESS, (ze_device_handle_t hDevice, const ze_device_mem_alloc_desc_t *deviceDesc, const ze_host_mem_alloc_desc_t *hostDesc, size_t size, size_t alignment, void **ptr)); ADDMETHOD_NOBASE(makeMemoryResident, ze_result_t, ZE_RESULT_SUCCESS, (ze_device_handle_t hDevice, void *ptr, size_t size)); ADDMETHOD_NOBASE(evictMemory, ze_result_t, ZE_RESULT_SUCCESS, (ze_device_handle_t hDevice, void *ptr, size_t size)); ADDMETHOD_NOBASE(makeImageResident, ze_result_t, ZE_RESULT_SUCCESS, (ze_device_handle_t hDevice, ze_image_handle_t hImage)); ADDMETHOD_NOBASE(evictImage, ze_result_t, ZE_RESULT_SUCCESS, (ze_device_handle_t hDevice, ze_image_handle_t hImage)); ADDMETHOD_NOBASE(freeMem, ze_result_t, ZE_RESULT_SUCCESS, (const void *ptr)); ADDMETHOD_NOBASE(getMemAllocProperties, ze_result_t, ZE_RESULT_SUCCESS, (const void *ptr, ze_memory_allocation_properties_t *pMemAllocProperties, ze_device_handle_t *phDevice)); ADDMETHOD_NOBASE(getMemAddressRange, ze_result_t, ZE_RESULT_SUCCESS, (const void *ptr, void **pBase, size_t *pSize)); ADDMETHOD_NOBASE(getIpcMemHandle, ze_result_t, ZE_RESULT_SUCCESS, (const void *ptr, ze_ipc_mem_handle_t *pIpcHandle)); ADDMETHOD_NOBASE(closeIpcMemHandle, ze_result_t, ZE_RESULT_SUCCESS, (const void *ptr)); ADDMETHOD_NOBASE(openIpcMemHandle, ze_result_t, ZE_RESULT_SUCCESS, (ze_device_handle_t hDevice, ze_ipc_mem_handle_t handle, ze_ipc_memory_flags_t flags, void **ptr)); ADDMETHOD_NOBASE(createModule, ze_result_t, ZE_RESULT_SUCCESS, (ze_device_handle_t hDevice, const ze_module_desc_t *desc, ze_module_handle_t *phModule, ze_module_build_log_handle_t *phBuildLog)); ADDMETHOD_NOBASE(createSampler, ze_result_t, ZE_RESULT_SUCCESS, (ze_device_handle_t hDevice, const ze_sampler_desc_t *pDesc, ze_sampler_handle_t *phSampler)); ADDMETHOD_NOBASE(createCommandQueue, ze_result_t, ZE_RESULT_SUCCESS, (ze_device_handle_t hDevice, const ze_command_queue_desc_t *desc, ze_command_queue_handle_t *commandQueue)); ADDMETHOD_NOBASE(createCommandList, ze_result_t, ZE_RESULT_SUCCESS, (ze_device_handle_t hDevice, const ze_command_list_desc_t *desc, ze_command_list_handle_t *commandList)); ADDMETHOD_NOBASE(createCommandListImmediate, ze_result_t, ZE_RESULT_SUCCESS, (ze_device_handle_t hDevice, const ze_command_queue_desc_t *desc, ze_command_list_handle_t *commandList)); ADDMETHOD_NOBASE(activateMetricGroups, ze_result_t, ZE_RESULT_SUCCESS, (zet_device_handle_t hDevice, uint32_t count, zet_metric_group_handle_t *phMetricGroups)); ADDMETHOD_NOBASE(reserveVirtualMem, ze_result_t, ZE_RESULT_SUCCESS, (const void *pStart, size_t size, void **pptr)); ADDMETHOD_NOBASE(freeVirtualMem, ze_result_t, ZE_RESULT_SUCCESS, (const void *ptr, size_t size)); ADDMETHOD_NOBASE(queryVirtualMemPageSize, ze_result_t, ZE_RESULT_SUCCESS, (ze_device_handle_t hDevice, size_t size, size_t *pagesize)); ADDMETHOD_NOBASE(createPhysicalMem, ze_result_t, ZE_RESULT_SUCCESS, (ze_device_handle_t hDevice, ze_physical_mem_desc_t *desc, ze_physical_mem_handle_t *phPhysicalMemory)); ADDMETHOD_NOBASE(destroyPhysicalMem, ze_result_t, ZE_RESULT_SUCCESS, (ze_physical_mem_handle_t hPhysicalMemory)); ADDMETHOD_NOBASE(mapVirtualMem, ze_result_t, ZE_RESULT_SUCCESS, (const void *ptr, size_t size, ze_physical_mem_handle_t hPhysicalMemory, size_t offset, ze_memory_access_attribute_t access)); ADDMETHOD_NOBASE(unMapVirtualMem, ze_result_t, ZE_RESULT_SUCCESS, (const void *ptr, size_t size)); ADDMETHOD_NOBASE(setVirtualMemAccessAttribute, ze_result_t, ZE_RESULT_SUCCESS, (const void *ptr, size_t size, ze_memory_access_attribute_t access)); ADDMETHOD_NOBASE(getVirtualMemAccessAttribute, ze_result_t, ZE_RESULT_SUCCESS, (const void *ptr, size_t size, ze_memory_access_attribute_t *access, size_t *outSize)); ADDMETHOD_NOBASE(openEventPoolIpcHandle, ze_result_t, ZE_RESULT_SUCCESS, (ze_ipc_event_pool_handle_t hIpc, ze_event_pool_handle_t *phEventPool)); ADDMETHOD_NOBASE(createEventPool, ze_result_t, ZE_RESULT_SUCCESS, (const ze_event_pool_desc_t *desc, uint32_t numDevices, ze_device_handle_t *phDevices, ze_event_pool_handle_t *phEventPool)); ADDMETHOD_NOBASE(createImage, ze_result_t, ZE_RESULT_SUCCESS, (ze_device_handle_t hDevice, const ze_image_desc_t *desc, ze_image_handle_t *phImage)); }; } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/mocks/mock_device.h000066400000000000000000000161561422164147700273760ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/device/device.h" #include "shared/test/common/test_macros/mock_method_macros.h" #include "level_zero/core/source/device/device_imp.h" #include "level_zero/core/test/unit_tests/mock.h" #include "level_zero/core/test/unit_tests/white_box.h" namespace L0 { namespace ult { template <> struct WhiteBox<::L0::Device> : public ::L0::Device { using Base = L0::Device; using Base::implicitScalingCapable; using L0::Device::getNEODevice; using L0::Device::neoDevice; }; using Device = WhiteBox<::L0::Device>; template <> struct Mock : public Device { Mock() = default; ADDMETHOD_NOBASE(canAccessPeer, ze_result_t, ZE_RESULT_SUCCESS, (ze_device_handle_t hPeerDevice, ze_bool_t *value)); ADDMETHOD_NOBASE(createCommandList, ze_result_t, ZE_RESULT_SUCCESS, (const ze_command_list_desc_t *desc, ze_command_list_handle_t *commandList)); ADDMETHOD_NOBASE(createCommandListImmediate, ze_result_t, ZE_RESULT_SUCCESS, (const ze_command_queue_desc_t *desc, ze_command_list_handle_t *commandList)); ADDMETHOD_NOBASE(createCommandQueue, ze_result_t, ZE_RESULT_SUCCESS, (const ze_command_queue_desc_t *desc, ze_command_queue_handle_t *commandQueue)); ADDMETHOD_NOBASE(createImage, ze_result_t, ZE_RESULT_SUCCESS, (const ze_image_desc_t *desc, ze_image_handle_t *phImage)); ADDMETHOD_NOBASE(createModule, ze_result_t, ZE_RESULT_SUCCESS, (const ze_module_desc_t *desc, ze_module_handle_t *module, ze_module_build_log_handle_t *buildLog, ModuleType type)); ADDMETHOD_NOBASE(createSampler, ze_result_t, ZE_RESULT_SUCCESS, (const ze_sampler_desc_t *pDesc, ze_sampler_handle_t *phSampler)); ADDMETHOD_NOBASE(getComputeProperties, ze_result_t, ZE_RESULT_SUCCESS, (ze_device_compute_properties_t * pComputeProperties)); ADDMETHOD_NOBASE(getP2PProperties, ze_result_t, ZE_RESULT_SUCCESS, (ze_device_handle_t hPeerDevice, ze_device_p2p_properties_t *pP2PProperties)); ADDMETHOD_NOBASE(getKernelProperties, ze_result_t, ZE_RESULT_SUCCESS, (ze_device_module_properties_t * pKernelProperties)); ADDMETHOD_NOBASE(getPciProperties, ze_result_t, ZE_RESULT_SUCCESS, (ze_pci_ext_properties_t * pPciProperties)); ADDMETHOD_NOBASE(getMemoryProperties, ze_result_t, ZE_RESULT_SUCCESS, (uint32_t * pCount, ze_device_memory_properties_t *pMemProperties)); ADDMETHOD_NOBASE(getMemoryAccessProperties, ze_result_t, ZE_RESULT_SUCCESS, (ze_device_memory_access_properties_t * pMemAccessProperties)); ADDMETHOD_NOBASE(getProperties, ze_result_t, ZE_RESULT_SUCCESS, (ze_device_properties_t * pDeviceProperties)); ADDMETHOD_NOBASE(getSubDevices, ze_result_t, ZE_RESULT_SUCCESS, (uint32_t * pCount, ze_device_handle_t *phSubdevices)); ADDMETHOD_NOBASE(getCacheProperties, ze_result_t, ZE_RESULT_SUCCESS, (uint32_t * pCount, ze_device_cache_properties_t *pCacheProperties)); ADDMETHOD_NOBASE(reserveCache, ze_result_t, ZE_RESULT_SUCCESS, (size_t cacheLevel, size_t cacheReservationSize)); ADDMETHOD_NOBASE(setCacheAdvice, ze_result_t, ZE_RESULT_SUCCESS, (void *ptr, size_t regionSize, ze_cache_ext_region_t cacheRegion)); ADDMETHOD_NOBASE(imageGetProperties, ze_result_t, ZE_RESULT_SUCCESS, (const ze_image_desc_t *desc, ze_image_properties_t *pImageProperties)); ADDMETHOD_NOBASE(getCommandQueueGroupProperties, ze_result_t, ZE_RESULT_SUCCESS, (uint32_t * pCount, ze_command_queue_group_properties_t *pCommandQueueGroupProperties)); ADDMETHOD_NOBASE(getDeviceImageProperties, ze_result_t, ZE_RESULT_SUCCESS, (ze_device_image_properties_t * pDeviceImageProperties)); ADDMETHOD_NOBASE(getExternalMemoryProperties, ze_result_t, ZE_RESULT_SUCCESS, (ze_device_external_memory_properties_t * pExternalMemoryProperties)); ADDMETHOD_NOBASE(getGlobalTimestamps, ze_result_t, ZE_RESULT_SUCCESS, (uint64_t * hostTimestamp, uint64_t *deviceTimestamp)); ADDMETHOD_NOBASE(systemBarrier, ze_result_t, ZE_RESULT_SUCCESS, ()); // Runtime internal methods ADDMETHOD_NOBASE(getExecEnvironment, void *, nullptr, ()); ADDMETHOD_NOBASE_REFRETURN(getHwHelper, NEO::HwHelper &, ()); ADDMETHOD_NOBASE(getBuiltinFunctionsLib, BuiltinFunctionsLib *, nullptr, ()); ADDMETHOD_CONST_NOBASE(getMaxNumHwThreads, uint32_t, 16u, ()); ADDMETHOD_NOBASE(activateMetricGroupsDeferred, ze_result_t, ZE_RESULT_SUCCESS, (uint32_t count, zet_metric_group_handle_t *phMetricGroups)); ADDMETHOD_NOBASE_REFRETURN(getOsInterface, NEO::OSInterface &, ()); ADDMETHOD_CONST_NOBASE(getPlatformInfo, uint32_t, 0u, ()); ADDMETHOD_NOBASE_REFRETURN(getMetricDeviceContext, MetricDeviceContext &, ()); ADDMETHOD_CONST_NOBASE_REFRETURN(getHwInfo, const NEO::HardwareInfo &, ()); ADDMETHOD_NOBASE(getDriverHandle, L0::DriverHandle *, nullptr, ()); ADDMETHOD_NOBASE_VOIDRETURN(setDriverHandle, (L0::DriverHandle *)); ADDMETHOD_CONST_NOBASE(getDevicePreemptionMode, NEO::PreemptionMode, NEO::PreemptionMode::Initial, ()); ADDMETHOD_CONST_NOBASE_REFRETURN(getDeviceInfo, const NEO::DeviceInfo &, ()); ADDMETHOD_NOBASE_VOIDRETURN(activateMetricGroups, ()); ADDMETHOD_CONST_NOBASE(getDebugSurface, NEO::GraphicsAllocation *, nullptr, ()); ADDMETHOD_NOBASE(allocateManagedMemoryFromHostPtr, NEO::GraphicsAllocation *, nullptr, (void *buffer, size_t size, struct L0::CommandList *commandList)); ADDMETHOD_NOBASE(allocateMemoryFromHostPtr, NEO::GraphicsAllocation *, nullptr, (const void *buffer, size_t size, bool hostCopyAllowed)); ADDMETHOD_NOBASE_VOIDRETURN(setSysmanHandle, (SysmanDevice *)); ADDMETHOD_NOBASE(getSysmanHandle, SysmanDevice *, nullptr, ()); ADDMETHOD_NOBASE(getCsrForOrdinalAndIndex, ze_result_t, ZE_RESULT_SUCCESS, (NEO::CommandStreamReceiver * *csr, uint32_t ordinal, uint32_t index)); ADDMETHOD_NOBASE(getCsrForLowPriority, ze_result_t, ZE_RESULT_SUCCESS, (NEO::CommandStreamReceiver * *csr)); ADDMETHOD_NOBASE(getDebugProperties, ze_result_t, ZE_RESULT_SUCCESS, (zet_device_debug_properties_t * properties)); ADDMETHOD_NOBASE(getDebugSession, DebugSession *, nullptr, (const zet_debug_config_t &config)); ADDMETHOD_NOBASE_VOIDRETURN(removeDebugSession, ()); ADDMETHOD_NOBASE(obtainReusableAllocation, NEO::GraphicsAllocation *, nullptr, (size_t requiredSize, NEO::AllocationType type)) ADDMETHOD_NOBASE_VOIDRETURN(storeReusableAllocation, (NEO::GraphicsAllocation & alloc)); DebugSession *createDebugSession(const zet_debug_config_t &config, ze_result_t &result) override { result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; return nullptr; } uint32_t getMOCS(bool l3enabled, bool l1enabled) override { if (l3enabled && !l1enabled) { return 2; } return 0; } }; template <> struct Mock : public L0::DeviceImp { using Base = L0::DeviceImp; using Base::debugSession; using Base::implicitScalingCapable; explicit Mock(NEO::Device *device, NEO::ExecutionEnvironment *execEnv) { device->incRefInternal(); Base::execEnvironment = execEnv; Base::neoDevice = device; Base::allocationsForReuse = std::make_unique(); } }; } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/mocks/mock_device_for_spirv.cpp000066400000000000000000000041141422164147700320110ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/test/unit_tests/mocks/mock_device_for_spirv.h" #include "shared/source/helpers/file_io.h" #include "shared/test/common/helpers/kernel_binary_helper.h" #include "shared/test/common/helpers/test_files.h" #include "shared/test/common/libult/global_environment.h" #include "level_zero/tools/source/debug/debug_session.h" namespace L0 { namespace ult { template ze_result_t MockDeviceForSpv::createModule(const ze_module_desc_t *desc, ze_module_handle_t *module, ze_module_build_log_handle_t *buildLog, ModuleType type) { const std::string BUILTIN_COPYFILL("builtin_copyfill"); const std::string BUILTIN_IMAGES("builtin_images"); if ((wasModuleCreated) && ((useImagesBuiltins != useImagesBuiltins_prev) || (isStateless != isStateless_prev))) wasModuleCreated = false; if (!wasModuleCreated) { std::string kernelName; retrieveBinaryKernelFilename(kernelName, (useImagesBuiltins ? BUILTIN_IMAGES : BUILTIN_COPYFILL) + (isStateless ? "_stateless_" : "_"), ".gen"); size_t size = 0; auto src = loadDataFromFile( kernelName.c_str(), size); ze_module_desc_t moduleDesc = {}; moduleDesc.format = ZE_MODULE_FORMAT_NATIVE; moduleDesc.pInputModule = reinterpret_cast(src.get()); moduleDesc.inputSize = size; ModuleBuildLog *moduleBuildLog = nullptr; mockModulePtr.reset(Module::create(this, &moduleDesc, moduleBuildLog, ModuleType::Builtin)); wasModuleCreated = true; useImagesBuiltins_prev = useImagesBuiltins; isStateless_prev = isStateless; } *module = mockModulePtr.get(); return ZE_RESULT_SUCCESS; } template class MockDeviceForSpv; template class MockDeviceForSpv; template class MockDeviceForSpv; }; // namespace ult } // namespace L0compute-runtime-22.14.22890/level_zero/core/test/unit_tests/mocks/mock_device_for_spirv.h000066400000000000000000000020441422164147700314560ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/driver/driver_handle_imp.h" #include "level_zero/core/source/module/module.h" #include "level_zero/core/test/unit_tests/mocks/mock_device.h" namespace L0 { namespace ult { template class MockDeviceForSpv : public Mock { protected: bool wasModuleCreated = false; bool useImagesBuiltins_prev = false; bool isStateless_prev = false; std::unique_ptr mockModulePtr; public: MockDeviceForSpv(NEO::Device *device, NEO::ExecutionEnvironment *ex, L0::DriverHandleImp *driverHandle) : Mock(device, ex) { this->driverHandle = driverHandle; wasModuleCreated = false; } ze_result_t createModule(const ze_module_desc_t *desc, ze_module_handle_t *module, ze_module_build_log_handle_t *buildLog, ModuleType type) override; ~MockDeviceForSpv() { } }; } // namespace ult } // namespace L0compute-runtime-22.14.22890/level_zero/core/test/unit_tests/mocks/mock_device_recompile_built_ins.h000066400000000000000000000025261422164147700335010ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/module/module_imp.h" #include "level_zero/core/test/unit_tests/mocks/mock_device.h" #include "level_zero/core/test/unit_tests/mocks/mock_kernel.h" namespace L0 { namespace ult { struct MockDeviceForRebuildBuilins : public Mock { struct MockModuleForRebuildBuiltins : public ModuleImp { MockModuleForRebuildBuiltins(Device *device, ModuleType type) : ModuleImp(device, nullptr, type) {} ze_result_t createKernel(const ze_kernel_desc_t *desc, ze_kernel_handle_t *phFunction) override { *phFunction = new Mock(); return ZE_RESULT_SUCCESS; } }; MockDeviceForRebuildBuilins(NEO::Device *device) : Mock(device, device->getExecutionEnvironment()) { } ze_result_t createModule(const ze_module_desc_t *desc, ze_module_handle_t *module, ze_module_build_log_handle_t *buildLog, ModuleType type) override { if (desc) { formatForModule = desc->format; } *module = new MockModuleForRebuildBuiltins(this, type); return ZE_RESULT_SUCCESS; } ze_module_format_t formatForModule{}; }; } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/mocks/mock_driver.cpp000066400000000000000000000006151422164147700277560ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/test/unit_tests/mocks/mock_driver.h" namespace L0 { namespace ult { using MockDriver = Mock; Mock::Mock() { previousDriver = driver; driver = this; } Mock::~Mock() { driver = previousDriver; } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/mocks/mock_driver.h000066400000000000000000000013701422164147700274220ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/driver/driver_imp.h" #include "level_zero/core/test/unit_tests/mock.h" #include "level_zero/core/test/unit_tests/white_box.h" #include namespace L0 { namespace ult { template <> struct WhiteBox<::L0::DriverImp> : public ::L0::DriverImp { }; using Driver = WhiteBox<::L0::DriverImp>; template <> struct Mock : public Driver { Mock(); ~Mock() override; ze_result_t driverInit(ze_init_flags_t flag) override { initCalledCount++; return ZE_RESULT_SUCCESS; } Driver *previousDriver = nullptr; uint32_t initCalledCount = 0; }; } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/mocks/mock_driver_handle.cpp000066400000000000000000000052101422164147700312650ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/test/unit_tests/mocks/mock_driver_handle.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "level_zero/core/source/driver/host_pointer_manager.h" namespace L0 { namespace ult { using MockDriverHandle = Mock; Mock::Mock() = default; NEO::MemoryManager *Mock::getMemoryManager() { return memoryManager; } NEO::SVMAllocsManager *Mock::getSvmAllocManager() { return svmAllocsManager; } ze_result_t Mock::getDevice(uint32_t *pCount, ze_device_handle_t *phDevices) { if (*pCount == 0) { // User wants to know number of devices *pCount = this->num_devices; return ZE_RESULT_SUCCESS; } if (phDevices == nullptr) // User is expected to allocate space return ZE_RESULT_ERROR_INVALID_ARGUMENT; phDevices[0] = &this->device; return ZE_RESULT_SUCCESS; } ze_result_t Mock::allocDeviceMem(ze_device_handle_t hDevice, const ze_device_mem_alloc_desc_t *deviceDesc, size_t size, size_t alignment, void **ptr) { NEO::SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields); auto allocation = svmAllocsManager->createUnifiedMemoryAllocation(size, unifiedMemoryProperties); if (allocation == nullptr) { return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY; } *ptr = allocation; return ZE_RESULT_SUCCESS; } ze_result_t Mock::freeMem(const void *ptr) { auto allocation = svmAllocsManager->getSVMAlloc(ptr); if (allocation == nullptr) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } svmAllocsManager->freeSVMAlloc(const_cast(ptr)); if (svmAllocsManager->getSvmMapOperation(ptr)) { svmAllocsManager->removeSvmMapOperation(ptr); } return ZE_RESULT_SUCCESS; } void Mock::setupDevices(std::vector> neoDevices) { this->numDevices = static_cast(neoDevices.size()); for (auto &neoDevice : neoDevices) { ze_result_t returnValue = ZE_RESULT_SUCCESS; this->rootDeviceIndices.insert(neoDevice->getRootDeviceIndex()); this->deviceBitfields.insert({neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield()}); auto device = Device::create(this, neoDevice.release(), false, &returnValue); this->devices.push_back(device); } } Mock::~Mock(){}; } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/mocks/mock_driver_handle.h000066400000000000000000000055311422164147700307400ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/common/test_macros/mock_method_macros.h" #include "level_zero/core/source/driver/driver_handle_imp.h" #include "level_zero/core/test/unit_tests/mock.h" #include "level_zero/core/test/unit_tests/mocks/mock_device.h" #include "level_zero/core/test/unit_tests/white_box.h" namespace L0 { namespace ult { template <> struct WhiteBox<::L0::DriverHandle> : public ::L0::DriverHandleImp { using ::L0::DriverHandleImp::enableProgramDebugging; }; using DriverHandle = WhiteBox<::L0::DriverHandle>; template <> struct Mock : public DriverHandleImp { Mock(); ~Mock() override; ADDMETHOD_NOBASE(getProperties, ze_result_t, ZE_RESULT_SUCCESS, (ze_driver_properties_t * properties)) ADDMETHOD_NOBASE(getApiVersion, ze_result_t, ZE_RESULT_SUCCESS, (ze_api_version_t * version)) ADDMETHOD_NOBASE(getIPCProperties, ze_result_t, ZE_RESULT_SUCCESS, (ze_driver_ipc_properties_t * pIPCProperties)) ADDMETHOD_NOBASE(importExternalPointer, ze_result_t, ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, (void *ptr, size_t size)) ADDMETHOD_NOBASE(releaseImportedPointer, ze_result_t, ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, (void *ptr)) ADDMETHOD_NOBASE(getHostPointerBaseAddress, ze_result_t, ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, (void *ptr, void **baseAddress)) ADDMETHOD_NOBASE(findHostPointerAllocation, NEO::GraphicsAllocation *, nullptr, (void *ptr, size_t size, uint32_t rootDeviceIndex)) uint32_t num_devices = 1; Mock device; void setupDevices(std::vector> devices); ze_result_t freeMem(const void *ptr); ze_result_t getDevice(uint32_t *pCount, ze_device_handle_t *phDevices) override; NEO::MemoryManager *getMemoryManager() override; NEO::SVMAllocsManager *getSvmAllocManager(); ze_result_t allocDeviceMem(ze_device_handle_t hDevice, const ze_device_mem_alloc_desc_t *deviceDesc, size_t size, size_t alignment, void **ptr); NEO::GraphicsAllocation *getDriverSystemMemoryAllocation(void *ptr, size_t size, uint32_t rootDeviceIndex, uintptr_t *gpuAddress) override { auto svmData = svmAllocsManager->getSVMAlloc(ptr); if (svmData != nullptr) { if (gpuAddress != nullptr) { *gpuAddress = reinterpret_cast(ptr); } return svmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex); } return nullptr; } }; } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/mocks/mock_event.cpp000066400000000000000000000007661422164147700276130ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/test/unit_tests/mocks/mock_event.h" namespace L0 { namespace ult { Mock::Mock() : mockAllocation(0, NEO::AllocationType::INTERNAL_HOST_MEMORY, &memory, reinterpret_cast(&memory), 0, sizeof(memory), MemoryPool::System4KBPages) {} Mock::~Mock() {} } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/mocks/mock_event.h000066400000000000000000000077361422164147700272640ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/test_macros/mock_method_macros.h" #include "level_zero/core/source/event/event.h" #include "level_zero/core/test/unit_tests/mock.h" #include "level_zero/core/test/unit_tests/white_box.h" namespace L0 { namespace ult { template <> struct WhiteBox<::L0::Event> : public ::L0::Event { using BaseClass = ::L0::Event; }; using Event = WhiteBox<::L0::Event>; template <> struct WhiteBox<::L0::EventPool> : public ::L0::EventPool { using BaseClass = ::L0::EventPool; }; using EventPool = WhiteBox<::L0::EventPool>; template <> struct Mock : public Event { Mock(); ~Mock() override; ADDMETHOD_NOBASE(destroy, ze_result_t, ZE_RESULT_SUCCESS, ()); ADDMETHOD_NOBASE(hostSignal, ze_result_t, ZE_RESULT_SUCCESS, ()); ADDMETHOD_NOBASE(hostSynchronize, ze_result_t, ZE_RESULT_SUCCESS, (uint64_t timeout)); ADDMETHOD_NOBASE(queryStatus, ze_result_t, ZE_RESULT_SUCCESS, ()); ADDMETHOD_NOBASE(reset, ze_result_t, ZE_RESULT_SUCCESS, ()); ADDMETHOD_NOBASE(queryKernelTimestamp, ze_result_t, ZE_RESULT_SUCCESS, (ze_kernel_timestamp_result_t * dstptr)); ADDMETHOD_NOBASE(queryTimestampsExp, ze_result_t, ZE_RESULT_SUCCESS, (::L0::Device * device, uint32_t *pCount, ze_kernel_timestamp_result_t *pTimestamps)); // Fake an allocation for event memory alignas(16) uint32_t memory = -1; NEO::GraphicsAllocation mockAllocation; }; template <> struct Mock : public EventPool { Mock() = default; ADDMETHOD_NOBASE(destroy, ze_result_t, ZE_RESULT_SUCCESS, ()); ADDMETHOD_NOBASE(getIpcHandle, ze_result_t, ZE_RESULT_SUCCESS, (ze_ipc_event_pool_handle_t * pIpcHandle)); ADDMETHOD_NOBASE(closeIpcHandle, ze_result_t, ZE_RESULT_SUCCESS, ()); ADDMETHOD_NOBASE(createEvent, ze_result_t, ZE_RESULT_SUCCESS, (const ze_event_desc_t *desc, ze_event_handle_t *phEvent)); ADDMETHOD_NOBASE(getDevice, Device *, nullptr, ()); ADDMETHOD_NOBASE(getEventSize, uint32_t, 0u, ()); using EventPool::eventPoolAllocations; }; class MockEvent : public ::L0::Event { public: MockEvent() { mockAllocation.reset(new NEO::MockGraphicsAllocation(0, NEO::AllocationType::INTERNAL_HOST_MEMORY, reinterpret_cast(0x1234), 0x1000, 0, sizeof(uint32_t), MemoryPool::System4KBPages)); this->timestampSizeInDw = 1; this->contextStartOffset = 0; this->contextEndOffset = 4; this->globalStartOffset = 8; this->globalEndOffset = 12; this->singlePacketSize = 16; } NEO::GraphicsAllocation &getAllocation(L0::Device *device) override { return *mockAllocation.get(); } uint64_t getGpuAddress(L0::Device *device) override { return mockAllocation.get()->getGpuAddress(); } ze_result_t destroy() override { return ZE_RESULT_SUCCESS; } ze_result_t hostSignal() override { return ZE_RESULT_SUCCESS; } ze_result_t hostSynchronize(uint64_t timeout) override { return ZE_RESULT_SUCCESS; } ze_result_t queryStatus() override { return ZE_RESULT_SUCCESS; } ze_result_t reset() override { return ZE_RESULT_SUCCESS; } ze_result_t queryKernelTimestamp(ze_kernel_timestamp_result_t *dstptr) override { return ZE_RESULT_SUCCESS; } ze_result_t queryTimestampsExp(L0::Device *device, uint32_t *pCount, ze_kernel_timestamp_result_t *pTimestamps) override { return ZE_RESULT_SUCCESS; } uint32_t getPacketsInUse() override { return 1; } void resetPackets() override {} void setPacketsInUse(uint32_t value) override {} uint64_t getPacketAddress(L0::Device *) override { return 0; } std::unique_ptr mockAllocation; }; } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/mocks/mock_fence.h000066400000000000000000000021721422164147700272100ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/common/test_macros/mock_method_macros.h" #include "level_zero/core/source/fence/fence.h" #include "level_zero/core/test/unit_tests/mock.h" #include "level_zero/core/test/unit_tests/white_box.h" #include "gtest/gtest.h" namespace L0 { namespace ult { template <> struct WhiteBox<::L0::Fence> : public ::L0::Fence { WhiteBox() : Fence(nullptr) {} ~WhiteBox() override = default; using ::L0::Fence::gpuHangCheckPeriod; using ::L0::Fence::taskCount; }; using Fence = WhiteBox<::L0::Fence>; template <> struct Mock : public Fence { ~Mock() override = default; ADDMETHOD_NOBASE(destroy, ze_result_t, ZE_RESULT_SUCCESS, ()); ADDMETHOD_NOBASE(hostSynchronize, ze_result_t, ZE_RESULT_SUCCESS, (uint64_t timeout)); ADDMETHOD_NOBASE(queryStatus, ze_result_t, ZE_RESULT_SUCCESS, ()); ADDMETHOD_NOBASE(assignTaskCountFromCsr, ze_result_t, ZE_RESULT_SUCCESS, ()); ADDMETHOD_NOBASE(reset, ze_result_t, ZE_RESULT_SUCCESS, (bool signaled)); }; } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/mocks/mock_gmm_resource_info_l0.cpp000066400000000000000000000024171422164147700325620ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/mocks/mock_gmm_resource_info.h" #include "level_zero/core/source/image/image_formats.h" #include "gtest/gtest.h" using namespace ::testing; namespace NEO { void MockGmmResourceInfo::setSurfaceFormat() { auto iterateL0Formats = [&](const std::array &formats) { if (!surfaceFormatInfo) { for (auto &formatArray : formats) { for (auto &format : formatArray) { if (mockResourceCreateParams.Format == format.GMMSurfaceFormat) { surfaceFormatInfo = &format; ASSERT_NE(nullptr, surfaceFormatInfo); return; } } } } }; iterateL0Formats(L0::ImageFormats::formats); if (mockResourceCreateParams.Format == GMM_FORMAT_GENERIC_8BIT) { static const NEO::SurfaceFormatInfo surfaceFormatGMM8BIT = {GMM_FORMAT_GENERIC_8BIT, GFX3DSTATE_SURFACEFORMAT_R8_UNORM, 0, 1, 1, 1}; surfaceFormatInfo = &surfaceFormatGMM8BIT; } ASSERT_NE(nullptr, surfaceFormatInfo); } } // namespace NEO compute-runtime-22.14.22890/level_zero/core/test/unit_tests/mocks/mock_host_pointer_manager.h000066400000000000000000000011561422164147700323400ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/driver/host_pointer_manager.h" #include "level_zero/core/test/unit_tests/white_box.h" namespace L0 { namespace ult { template <> struct WhiteBox<::L0::HostPointerManager> : public ::L0::HostPointerManager { using ::L0::HostPointerManager::createHostPointerAllocation; using ::L0::HostPointerManager::hostPointerAllocations; using ::L0::HostPointerManager::memoryManager; }; using HostPointerManager = WhiteBox<::L0::HostPointerManager>; } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/mocks/mock_kernel.h000066400000000000000000000134241422164147700274120ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/device_binary_format/patchtokens_decoder.h" #include "shared/source/kernel/kernel_descriptor.h" #include "shared/source/kernel/kernel_descriptor_from_patchtokens.h" #include "shared/test/common/test_macros/mock_method_macros.h" #include "level_zero/core/source/kernel/kernel_hw.h" #include "level_zero/core/source/kernel/kernel_imp.h" #include "level_zero/core/test/unit_tests/mock.h" #include "level_zero/core/test/unit_tests/white_box.h" namespace L0 { namespace ult { template <> struct WhiteBox<::L0::KernelImmutableData> : public ::L0::KernelImmutableData { using BaseClass = ::L0::KernelImmutableData; using ::L0::KernelImmutableData::createRelocatedDebugData; using ::L0::KernelImmutableData::crossThreadDataSize; using ::L0::KernelImmutableData::crossThreadDataTemplate; using ::L0::KernelImmutableData::device; using ::L0::KernelImmutableData::isaGraphicsAllocation; using ::L0::KernelImmutableData::kernelDescriptor; using ::L0::KernelImmutableData::KernelImmutableData; using ::L0::KernelImmutableData::kernelInfo; using ::L0::KernelImmutableData::residencyContainer; using ::L0::KernelImmutableData::surfaceStateHeapSize; using ::L0::KernelImmutableData::surfaceStateHeapTemplate; WhiteBox() : ::L0::KernelImmutableData() {} }; template <> struct WhiteBox<::L0::Kernel> : public ::L0::KernelImp { using BaseClass = ::L0::KernelImp; using BaseClass::BaseClass; using ::L0::KernelImp::createPrintfBuffer; using ::L0::KernelImp::crossThreadData; using ::L0::KernelImp::crossThreadDataSize; using ::L0::KernelImp::groupSize; using ::L0::KernelImp::kernelImmData; using ::L0::KernelImp::kernelRequiresGenerationOfLocalIdsByRuntime; using ::L0::KernelImp::module; using ::L0::KernelImp::numThreadsPerThreadGroup; using ::L0::KernelImp::patchBindlessSurfaceState; using ::L0::KernelImp::perThreadDataForWholeThreadGroup; using ::L0::KernelImp::perThreadDataSize; using ::L0::KernelImp::perThreadDataSizeForWholeThreadGroup; using ::L0::KernelImp::pImplicitArgs; using ::L0::KernelImp::printfBuffer; using ::L0::KernelImp::requiredWorkgroupOrder; using ::L0::KernelImp::residencyContainer; using ::L0::KernelImp::surfaceStateHeapData; using ::L0::KernelImp::surfaceStateHeapDataSize; using ::L0::KernelImp::unifiedMemoryControls; void setBufferSurfaceState(uint32_t argIndex, void *address, NEO::GraphicsAllocation *alloc) override {} void evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor) override {} WhiteBox() : ::L0::KernelImp(nullptr) {} }; template struct WhiteBoxKernelHw : public KernelHw { using BaseClass = KernelHw; using BaseClass::BaseClass; using ::L0::KernelImp::createPrintfBuffer; using ::L0::KernelImp::crossThreadData; using ::L0::KernelImp::crossThreadDataSize; using ::L0::KernelImp::groupSize; using ::L0::KernelImp::kernelImmData; using ::L0::KernelImp::kernelRequiresGenerationOfLocalIdsByRuntime; using ::L0::KernelImp::module; using ::L0::KernelImp::numThreadsPerThreadGroup; using ::L0::KernelImp::patchBindlessSurfaceState; using ::L0::KernelImp::perThreadDataForWholeThreadGroup; using ::L0::KernelImp::perThreadDataSize; using ::L0::KernelImp::perThreadDataSizeForWholeThreadGroup; using ::L0::KernelImp::printfBuffer; using ::L0::KernelImp::requiredWorkgroupOrder; using ::L0::KernelImp::residencyContainer; using ::L0::KernelImp::surfaceStateHeapData; using ::L0::KernelImp::unifiedMemoryControls; void evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor) override {} WhiteBoxKernelHw() : ::L0::KernelHw(nullptr) {} }; template <> struct Mock<::L0::Kernel> : public WhiteBox<::L0::Kernel> { using BaseClass = WhiteBox<::L0::Kernel>; ADDMETHOD_NOBASE(getProperties, ze_result_t, ZE_RESULT_SUCCESS, (ze_kernel_properties_t * pKernelProperties)) Mock() : BaseClass(nullptr) { NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens; iOpenCL::SKernelBinaryHeaderCommon kernelHeader; kernelTokens.header = &kernelHeader; iOpenCL::SPatchExecutionEnvironment execEnv = {}; execEnv.LargestCompiledSIMDSize = 8; kernelTokens.tokens.executionEnvironment = &execEnv; this->kernelImmData = &immutableData; auto allocation = new NEO::GraphicsAllocation(0, NEO::AllocationType::KERNEL_ISA, nullptr, 0, 0, 4096, MemoryPool::System4KBPages); immutableData.isaGraphicsAllocation.reset(allocation); NEO::populateKernelDescriptor(descriptor, kernelTokens, 8); immutableData.kernelDescriptor = &descriptor; crossThreadData.reset(new uint8_t[100]); } ~Mock() override { delete immutableData.isaGraphicsAllocation.release(); } void setBufferSurfaceState(uint32_t argIndex, void *address, NEO::GraphicsAllocation *alloc) override {} void evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor) override {} ze_result_t setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation) override { return ZE_RESULT_SUCCESS; } void printPrintfOutput() override { printPrintfOutputCalledTimes++; } WhiteBox<::L0::KernelImmutableData> immutableData; NEO::KernelDescriptor descriptor; uint32_t printPrintfOutputCalledTimes = 0; }; } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/mocks/mock_l0_debugger.h000066400000000000000000000072241422164147700303120ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/kernel/debug_data.h" #include "level_zero/core/source/debugger/debugger_l0.h" #include "level_zero/core/test/unit_tests/white_box.h" namespace L0 { namespace ult { extern DebugerL0CreateFn mockDebuggerL0HwFactory[]; template class MockDebuggerL0Hw : public L0::DebuggerL0Hw { public: using L0::DebuggerL0::perContextSbaAllocations; using L0::DebuggerL0::sbaTrackingGpuVa; using L0::DebuggerL0::singleAddressSpaceSbaTracking; MockDebuggerL0Hw(NEO::Device *device) : L0::DebuggerL0Hw(device) {} ~MockDebuggerL0Hw() override = default; static DebuggerL0 *allocate(NEO::Device *device) { return new MockDebuggerL0Hw(device); } void captureStateBaseAddress(NEO::CommandContainer &container, NEO::Debugger::SbaAddresses sba) override { captureStateBaseAddressCount++; L0::DebuggerL0Hw::captureStateBaseAddress(container, sba); } size_t getSbaTrackingCommandsSize(size_t trackedAddressCount) override { getSbaTrackingCommandsSizeCount++; return L0::DebuggerL0Hw::getSbaTrackingCommandsSize(trackedAddressCount); } void programSbaTrackingCommands(NEO::LinearStream &cmdStream, const NEO::Debugger::SbaAddresses &sba) override { programSbaTrackingCommandsCount++; L0::DebuggerL0Hw::programSbaTrackingCommands(cmdStream, sba); } void registerElf(NEO::DebugData *debugData, NEO::GraphicsAllocation *isaAllocation) override { registerElfCount++; lastReceivedElf = debugData->vIsa; L0::DebuggerL0Hw::registerElf(debugData, isaAllocation); } bool attachZebinModuleToSegmentAllocations(const StackVec &allocs, uint32_t &moduleHandle) override { segmentCountWithAttachedModuleHandle = static_cast(allocs.size()); if (std::numeric_limits::max() != moduleHandleToReturn) { moduleHandle = moduleHandleToReturn; return true; } return L0::DebuggerL0Hw::attachZebinModuleToSegmentAllocations(allocs, moduleHandle); } bool removeZebinModule(uint32_t moduleHandle) override { removedZebinModuleHandle = moduleHandle; return L0::DebuggerL0Hw::removeZebinModule(moduleHandle); } void notifyCommandQueueCreated() override { commandQueueCreatedCount++; L0::DebuggerL0Hw::notifyCommandQueueCreated(); } void notifyCommandQueueDestroyed() override { commandQueueDestroyedCount++; L0::DebuggerL0Hw::notifyCommandQueueDestroyed(); } uint32_t captureStateBaseAddressCount = 0; uint32_t programSbaTrackingCommandsCount = 0; uint32_t getSbaTrackingCommandsSizeCount = 0; uint32_t registerElfCount = 0; uint32_t commandQueueCreatedCount = 0; uint32_t commandQueueDestroyedCount = 0; const char *lastReceivedElf = nullptr; uint32_t segmentCountWithAttachedModuleHandle = 0; uint32_t removedZebinModuleHandle = 0; uint32_t moduleHandleToReturn = std::numeric_limits::max(); }; template struct MockDebuggerL0HwPopulateFactory { MockDebuggerL0HwPopulateFactory() { mockDebuggerL0HwFactory[productFamily] = MockDebuggerL0Hw::allocate; } }; template <> struct WhiteBox<::L0::DebuggerL0> : public ::L0::DebuggerL0 { using BaseClass = ::L0::DebuggerL0; using BaseClass::initDebuggingInOs; }; } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/mocks/mock_memory_manager.h000066400000000000000000000017721422164147700311370ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/memory_manager/os_agnostic_memory_manager.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "level_zero/core/test/unit_tests/mock.h" #include "level_zero/core/test/unit_tests/white_box.h" #include #if defined(__clang__) #pragma clang diagnostic push #pragma clang diagnostic ignored "-Winconsistent-missing-override" #endif namespace L0 { namespace ult { template <> struct WhiteBox<::NEO::OsAgnosticMemoryManager> : public ::NEO::OsAgnosticMemoryManager { using BaseClass = ::NEO::OsAgnosticMemoryManager; using BaseClass::localMemorySupported; WhiteBox(NEO::ExecutionEnvironment &executionEnvironment) : NEO::OsAgnosticMemoryManager(executionEnvironment) {} }; using MemoryManagerMock = WhiteBox<::NEO::OsAgnosticMemoryManager>; } // namespace ult } // namespace L0 #if defined(__clang__) #pragma clang diagnostic pop #endif compute-runtime-22.14.22890/level_zero/core/test/unit_tests/mocks/mock_module.h000066400000000000000000000165271422164147700274260ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/common/mocks/mock_cif.h" #include "shared/test/common/mocks/mock_compiler_interface.h" #include "shared/test/common/test_macros/mock_method_macros.h" #include "level_zero/core/source/module/module_imp.h" #include "level_zero/core/test/unit_tests/mock.h" #include "level_zero/core/test/unit_tests/white_box.h" #include "gtest/gtest.h" namespace L0 { namespace ult { template <> struct WhiteBox<::L0::Module> : public ::L0::ModuleImp { using BaseClass = ::L0::ModuleImp; using BaseClass::BaseClass; using BaseClass::copyPatchedSegments; using BaseClass::device; using BaseClass::exportedFunctionsSurface; using BaseClass::isFullyLinked; using BaseClass::kernelImmDatas; using BaseClass::symbols; using BaseClass::translationUnit; using BaseClass::type; using BaseClass::unresolvedExternalsInfo; }; using Module = WhiteBox<::L0::Module>; template <> struct Mock : public Module { Mock(::L0::Device *device, ModuleBuildLog *moduleBuildLog, ModuleType type) : WhiteBox(device, moduleBuildLog, type) {} Mock(::L0::Device *device, ModuleBuildLog *moduleBuildLog) : Mock(device, moduleBuildLog, ModuleType::User){}; ADDMETHOD_NOBASE(createKernel, ze_result_t, ZE_RESULT_SUCCESS, (const ze_kernel_desc_t *desc, ze_kernel_handle_t *phFunction)); ADDMETHOD_NOBASE(destroy, ze_result_t, ZE_RESULT_SUCCESS, ()); ADDMETHOD_NOBASE(getFunctionPointer, ze_result_t, ZE_RESULT_SUCCESS, (const char *pKernelName, void **pfnFunction)); ADDMETHOD_NOBASE(getNativeBinary, ze_result_t, ZE_RESULT_SUCCESS, (size_t * pSize, uint8_t *pModuleNativeBinary)); ADDMETHOD_CONST_NOBASE(getKernelImmutableData, const L0::KernelImmutableData *, nullptr, (const char *functionName)); ADDMETHOD_CONST_NOBASE(getMaxGroupSize, uint32_t, 256, ()); ADDMETHOD_NOBASE(getKernelNames, ze_result_t, ZE_RESULT_SUCCESS, (uint32_t * pCount, const char **pNames)); ADDMETHOD_NOBASE(performDynamicLink, ze_result_t, ZE_RESULT_SUCCESS, (uint32_t numModules, ze_module_handle_t *phModules, ze_module_build_log_handle_t *phLinkLog)); ADDMETHOD_NOBASE(getProperties, ze_result_t, ZE_RESULT_SUCCESS, (ze_module_properties_t * pModuleProperties)); ADDMETHOD_NOBASE(getGlobalPointer, ze_result_t, ZE_RESULT_SUCCESS, (const char *pGlobalName, size_t *pSize, void **pPtr)); ADDMETHOD_CONST_NOBASE(isDebugEnabled, bool, false, ()); }; struct MockModuleTranslationUnit : public L0::ModuleTranslationUnit { MockModuleTranslationUnit(L0::Device *device) : L0::ModuleTranslationUnit(device) { } bool processUnpackedBinary() override { return true; } bool compileGenBinary(NEO::TranslationInput inputArgs, bool staticLink) override { if (unpackedDeviceBinarySize && unpackedDeviceBinary) { return true; } else { return ModuleTranslationUnit::compileGenBinary(inputArgs, staticLink); } } }; struct MockModule : public L0::ModuleImp { using ModuleImp::debugEnabled; using ModuleImp::debugModuleHandle; using ModuleImp::kernelImmDatas; using ModuleImp::populateHostGlobalSymbolsMap; using ModuleImp::symbols; using ModuleImp::translationUnit; MockModule(L0::Device *device, L0::ModuleBuildLog *moduleBuildLog, L0::ModuleType type) : ModuleImp(device, moduleBuildLog, type) { maxGroupSize = 32; }; ~MockModule() = default; const KernelImmutableData *getKernelImmutableData(const char *functionName) const override { return kernelImmData; } KernelImmutableData *kernelImmData = nullptr; }; struct MockCompilerInterface : public NEO::CompilerInterface { NEO::TranslationOutput::ErrorCode build(const NEO::Device &device, const NEO::TranslationInput &input, NEO::TranslationOutput &output) override { receivedApiOptions = input.apiOptions.begin(); inputInternalOptions = input.internalOptions.begin(); if (failBuild) { return NEO::TranslationOutput::ErrorCode::BuildFailure; } return NEO::TranslationOutput::ErrorCode::Success; } NEO::TranslationOutput::ErrorCode link(const NEO::Device &device, const NEO::TranslationInput &input, NEO::TranslationOutput &output) override { receivedApiOptions = input.apiOptions.begin(); inputInternalOptions = input.internalOptions.begin(); return NEO::TranslationOutput::ErrorCode::Success; } std::string receivedApiOptions; std::string inputInternalOptions; bool failBuild = false; }; template struct MockCompilerInterfaceWithSpecConstants : public NEO::CompilerInterface { MockCompilerInterfaceWithSpecConstants(uint32_t moduleNumSpecConstants) : moduleNumSpecConstants(moduleNumSpecConstants) { } NEO::TranslationOutput::ErrorCode build(const NEO::Device &device, const NEO::TranslationInput &input, NEO::TranslationOutput &output) override { EXPECT_EQ(moduleNumSpecConstants, input.specializedValues.size()); return NEO::TranslationOutput::ErrorCode::Success; } NEO::TranslationOutput::ErrorCode link(const NEO::Device &device, const NEO::TranslationInput &input, NEO::TranslationOutput &output) override { EXPECT_EQ(moduleNumSpecConstants, input.specializedValues.size()); return NEO::TranslationOutput::ErrorCode::Success; } NEO::TranslationOutput::ErrorCode getSpecConstantsInfo(const NEO::Device &device, ArrayRef srcSpirV, NEO::SpecConstantInfo &output) override { output.idsBuffer.reset(new NEO::MockCIFBuffer()); output.sizesBuffer.reset(new NEO::MockCIFBuffer()); for (uint32_t i = 0; i < moduleNumSpecConstants; i++) { output.idsBuffer->PushBackRawCopy(moduleSpecConstantsIds[i]); output.sizesBuffer->PushBackRawCopy(moduleSpecConstantsSizes[i]); } return NEO::TranslationOutput::ErrorCode::Success; } uint32_t moduleNumSpecConstants = 0u; const std::vector moduleSpecConstantsIds{2, 0, 1, 3, 5, 4}; const std::vector moduleSpecConstantsValuesT1{10, 20, 30}; const std::vector moduleSpecConstantsValuesT2{static_cast(std::numeric_limits::max()) + 60u, static_cast(std::numeric_limits::max()) + 50u, static_cast(std::numeric_limits::max()) + 40u}; const std::vector moduleSpecConstantsSizes{sizeof(T2), sizeof(T1), sizeof(T2), sizeof(T1), sizeof(T2), sizeof(T1)}; static_assert(sizeof(T1) < sizeof(T2)); }; struct MockCompilerInterfaceLinkFailure : public NEO::CompilerInterface { NEO::TranslationOutput::ErrorCode link(const NEO::Device &device, const NEO::TranslationInput &input, NEO::TranslationOutput &output) override { return NEO::TranslationOutput::ErrorCode::BuildFailure; } }; } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/mocks/mock_sampler.h000066400000000000000000000007611422164147700275750ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/sampler/sampler_hw.h" namespace L0 { namespace ult { template struct MockSamplerHw : public L0::SamplerCoreFamily { using BaseClass = ::L0::SamplerCoreFamily; using BaseClass::lodMax; using BaseClass::lodMin; using BaseClass::samplerState; }; } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/os_interface/000077500000000000000000000000001422164147700262715ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/os_interface/CMakeLists.txt000066400000000000000000000004771422164147700310410ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(NEO_L0_OS_INTERFACE_TESTS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/os_context_l0_tests.cpp ) add_subdirectories() target_sources(${TARGET_NAME} PRIVATE ${NEO_L0_OS_INTERFACE_TESTS} ) compute-runtime-22.14.22890/level_zero/core/test/unit_tests/os_interface/os_context_l0_tests.cpp000066400000000000000000000051471422164147700330060ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/device_factory.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/helpers/engine_descriptor_helper.h" #include "shared/test/common/mocks/mock_device.h" #include "gtest/gtest.h" using namespace NEO; struct DeferredOsContextCreationL0Tests : ::testing::Test { void SetUp() override { device = std::unique_ptr{MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())}; DeviceFactory::prepareDeviceEnvironments(*device->getExecutionEnvironment()); } std::unique_ptr createOsContext(EngineTypeUsage engineTypeUsage, bool defaultEngine) { OSInterface *osInterface = device->getRootDeviceEnvironment().osInterface.get(); std::unique_ptr osContext{OsContext::create(osInterface, 0, EngineDescriptorHelper::getDefaultDescriptor(engineTypeUsage))}; EXPECT_FALSE(osContext->isInitialized()); return osContext; } void expectContextCreation(EngineTypeUsage engineTypeUsage, bool defaultEngine, bool expectedImmediate) { auto osContext = createOsContext(engineTypeUsage, defaultEngine); const bool immediate = osContext->isImmediateContextInitializationEnabled(defaultEngine); EXPECT_EQ(expectedImmediate, immediate); if (immediate) { osContext->ensureContextInitialized(); EXPECT_TRUE(osContext->isInitialized()); } } void expectDeferredContextCreation(EngineTypeUsage engineTypeUsage, bool defaultEngine) { expectContextCreation(engineTypeUsage, defaultEngine, false); } void expectImmediateContextCreation(EngineTypeUsage engineTypeUsage, bool defaultEngine) { expectContextCreation(engineTypeUsage, defaultEngine, true); } std::unique_ptr device; static inline const EngineTypeUsage engineTypeUsageBlitter{aub_stream::ENGINE_BCS, EngineUsage::Regular}; }; TEST_F(DeferredOsContextCreationL0Tests, givenBlitterEngineWhenCreatingOsContextThenOsContextInitializationIsDeferred) { DebugManagerStateRestore restore{}; expectDeferredContextCreation(engineTypeUsageBlitter, false); DebugManager.flags.DeferOsContextInitialization.set(1); expectDeferredContextCreation(engineTypeUsageBlitter, false); DebugManager.flags.DeferOsContextInitialization.set(0); expectImmediateContextCreation(engineTypeUsageBlitter, false); } compute-runtime-22.14.22890/level_zero/core/test/unit_tests/os_interface/windows/000077500000000000000000000000001422164147700277635ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/os_interface/windows/CMakeLists.txt000066400000000000000000000006611422164147700325260ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(NEO_L0_OS_INTERFACE_TESTS_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/wddm_l0_tests.cpp ) set_property(GLOBAL PROPERTY NEO_L0_OS_INTERFACE_TESTS_WINDOWS ${NEO_L0_OS_INTERFACE_TESTS_WINDOWS}) if(WIN32) target_sources(${TARGET_NAME} PRIVATE ${NEO_L0_OS_INTERFACE_TESTS_WINDOWS} ) endif() compute-runtime-22.14.22890/level_zero/core/test/unit_tests/os_interface/windows/wddm_l0_tests.cpp000066400000000000000000000010401422164147700332320ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/os_interface/windows/wddm_fixture.h" #include "shared/test/common/test_macros/test.h" namespace NEO { TEST_F(WddmTestWithMockGdiDll, givenWddmWhenContextCreatedThenHintPassedIsOneApiL0) { init(); auto createContextParams = getCreateContextDataFcn(); EXPECT_EQ(D3DKMT_CLIENTHINT_ONEAPI_LEVEL0, createContextParams->ClientHint); } } // namespace NEOcompute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/000077500000000000000000000000001422164147700253135ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/CMakeLists.txt000066400000000000000000000004121422164147700300500ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/deferred_deleter_test.cpp ) add_subdirectories() compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/builtin/000077500000000000000000000000001422164147700267615ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/builtin/CMakeLists.txt000066400000000000000000000010551422164147700315220ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/create_ult_builtin_functions_lib.cpp ${CMAKE_CURRENT_SOURCE_DIR}/built_in_tests_l0.cpp ${CMAKE_CURRENT_SOURCE_DIR}/builtin_functions_tests.cpp ) if(TARGET ${BUILTINS_SPIRV_LIB_NAME}) target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/built_in_spv_tests_l0.cpp ) endif() built_in_spv_tests_l0.cpp000066400000000000000000000062361422164147700337270ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/builtin/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/test/common/fixtures/device_fixture.h" #include "shared/test/common/mocks/mock_builtinslib.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/builtin/builtin_functions_lib_impl.h" #include "level_zero/core/test/unit_tests/mocks/mock_device_recompile_built_ins.h" using namespace NEO; using BuiltInTestL0 = Test; TEST_F(BuiltInTestL0, GivenBuiltinTypeIntermediateWhenGettingBuiltinResourceForNotRegisteredRevisionThenResourceSizeIsNonZero) { pDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->platform.usRevId += 0xdead; auto mockBuiltinsLib = std::unique_ptr(new MockBuiltinsLib()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyBufferToBuffer, BuiltinCode::ECodeType::Intermediate, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyBufferRect, BuiltinCode::ECodeType::Intermediate, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::FillBuffer, BuiltinCode::ECodeType::Intermediate, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyBufferToImage3d, BuiltinCode::ECodeType::Intermediate, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyImage3dToBuffer, BuiltinCode::ECodeType::Intermediate, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyImageToImage1d, BuiltinCode::ECodeType::Intermediate, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyImageToImage2d, BuiltinCode::ECodeType::Intermediate, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyImageToImage3d, BuiltinCode::ECodeType::Intermediate, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::FillImage1d, BuiltinCode::ECodeType::Intermediate, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::FillImage2d, BuiltinCode::ECodeType::Intermediate, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::FillImage3d, BuiltinCode::ECodeType::Intermediate, *pDevice).size()); EXPECT_EQ(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::COUNT, BuiltinCode::ECodeType::Intermediate, *pDevice).size()); } HWTEST_F(BuiltInTestL0, givenDeviceWithUnregisteredBinaryBuiltinWhenGettingBuiltinKernelThenTakeBinaryBuiltinFromDefaultRevision) { pDevice->incRefInternal(); L0::ult::MockDeviceForRebuildBuilins deviceL0(pDevice); pDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->platform.usRevId += 0xdead; L0::BuiltinFunctionsLibImpl builtinFunctionsLib{&deviceL0, pDevice->getBuiltIns()}; for (uint32_t builtId = 0; builtId < static_cast(L0::Builtin::COUNT); builtId++) { deviceL0.formatForModule = {}; ASSERT_NE(nullptr, builtinFunctionsLib.getFunction(static_cast(builtId))); EXPECT_EQ(ZE_MODULE_FORMAT_NATIVE, deviceL0.formatForModule); } } compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/builtin/built_in_tests_l0.cpp000066400000000000000000000116001422164147700331050ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/source/helpers/api_specific_config.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/built_ins/built_in_tests_shared.inl" #include "gtest/gtest.h" #include using namespace NEO; TEST(BuiltInTestsL0, givenUseBindlessBuiltinInApiDependentModeWhenBinExtensionPassedThenNameHasCorrectPrefix) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.UseBindlessMode.set(-1); EBuiltInOps::Type builtin = EBuiltInOps::CopyBufferToBuffer; const std::string extension = ".bin"; const std::string platformName = "platformName"; const uint32_t deviceRevId = 123; std::string prefix = ApiSpecificConfig::getBindlessConfiguration() ? "bindless" : "bindful"; std::string resourceNameGeneric = createBuiltinResourceName(builtin, extension); std::string resourceNameForPlatform = createBuiltinResourceName(builtin, extension, platformName); std::string resourceNameForPlatformAndStepping = createBuiltinResourceName(builtin, extension, platformName, deviceRevId); std::string expectedResourceNameGeneric = prefix + "_copy_buffer_to_buffer.builtin_kernel.bin"; std::string expectedResourceNameForPlatform = platformName.c_str(); expectedResourceNameForPlatform += "_0_" + prefix + "_copy_buffer_to_buffer.builtin_kernel.bin"; std::string expectedResourceNameForPlatformAndStepping = platformName.c_str(); expectedResourceNameForPlatformAndStepping += "_"; expectedResourceNameForPlatformAndStepping += std::to_string(deviceRevId).c_str(); expectedResourceNameForPlatformAndStepping += "_" + prefix + "_copy_buffer_to_buffer.builtin_kernel.bin"; EXPECT_EQ(0, strcmp(expectedResourceNameGeneric.c_str(), resourceNameGeneric.c_str())); EXPECT_EQ(0, strcmp(expectedResourceNameForPlatform.c_str(), resourceNameForPlatform.c_str())); EXPECT_EQ(0, strcmp(expectedResourceNameForPlatformAndStepping.c_str(), resourceNameForPlatformAndStepping.c_str())); } TEST(BuiltInTestsL0, givenUseBindlessBuiltinDisabledInL0ApiWhenBinExtensionPassedThenNameHasBindfulPrefix) { givenUseBindlessBuiltinDisabledWhenBinExtensionPassedThenNameHasBindfulPrefix(); } TEST(BuiltInTestsL0, givenUseBindlessBuiltinEnabledInL0ApiWhenBinExtensionPassedThenNameHasBindlessPrefix) { givenUseBindlessBuiltinEnabledWhenBinExtensionPassedThenNameHasBindlessPrefix(); } TEST(BuiltInTestsL0, GivenBindlessConfigWhenGettingBuiltinResourceThenResourceSizeIsNonZero) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.UseBindlessMode.set(1); auto hwInfo = *defaultHwInfo; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); auto mockBuiltinsLib = std::unique_ptr(new MockBuiltinsLib()); ASSERT_LE(2u, mockBuiltinsLib->allStorages.size()); bool bindlessFound = false; auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); std::string resourceNameGeneric = createBuiltinResourceName(EBuiltInOps::CopyBufferToBuffer, BuiltinCode::getExtension(BuiltinCode::ECodeType::Binary)); std::string resourceNameForPlatformType = createBuiltinResourceName(EBuiltInOps::CopyBufferToBuffer, BuiltinCode::getExtension(BuiltinCode::ECodeType::Binary), getFamilyNameWithType(hwInfo), hwHelper.getDefaultRevisionId(hwInfo)); std::string resourceNameForPlatformTypeAndStepping = createBuiltinResourceName(EBuiltInOps::CopyBufferToBuffer, BuiltinCode::getExtension(BuiltinCode::ECodeType::Binary), getFamilyNameWithType(hwInfo), hwInfo.platform.usRevId); for (const auto &storage : mockBuiltinsLib->allStorages) { if (storage->load(resourceNameForPlatformTypeAndStepping).size() != 0) { bindlessFound = true; } } EXPECT_TRUE(bindlessFound); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyBufferToBuffer, BuiltinCode::ECodeType::Binary, *device).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyBufferRect, BuiltinCode::ECodeType::Binary, *device).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::FillBuffer, BuiltinCode::ECodeType::Binary, *device).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::QueryKernelTimestamps, BuiltinCode::ECodeType::Binary, *device).size()); EXPECT_EQ(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::COUNT, BuiltinCode::ECodeType::Binary, *device).size()); } builtin_functions_tests.cpp000066400000000000000000000356551422164147700344040ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/builtin/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_compiler_interface_spirv.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/builtin/builtin_functions_lib_impl.h" #include "level_zero/core/source/device/device_imp.h" #include "level_zero/core/source/module/module.h" #include "level_zero/core/source/module/module_imp.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_device_for_spirv.h" #include "gtest/gtest.h" namespace L0 { namespace ult { template class BuiltinFunctionsLibFixture : public DeviceFixture { public: struct MockBuiltinFunctionsLibImpl : BuiltinFunctionsLibImpl { using BuiltinFunctionsLibImpl::builtins; using BuiltinFunctionsLibImpl::getFunction; using BuiltinFunctionsLibImpl::imageBuiltins; MockBuiltinFunctionsLibImpl(L0::Device *device, NEO::BuiltIns *builtInsLib) : BuiltinFunctionsLibImpl(device, builtInsLib) {} std::unique_ptr loadBuiltIn(NEO::EBuiltInOps::Type builtin, const char *builtInName) override { ze_result_t res; std::unique_ptr module; ze_module_handle_t moduleHandle; ze_module_desc_t moduleDesc = {}; moduleDesc.format = ZE_MODULE_FORMAT_NATIVE; moduleDesc.pInputModule = nullptr; moduleDesc.inputSize = 0u; res = device->createModule(&moduleDesc, &moduleHandle, nullptr, ModuleType::Builtin); UNRECOVERABLE_IF(res != ZE_RESULT_SUCCESS); module.reset(Module::fromHandle(moduleHandle)); std::unique_ptr kernel; ze_kernel_handle_t kernelHandle; ze_kernel_desc_t kernelDesc = {}; kernelDesc.pKernelName = builtInName; res = module->createKernel(&kernelDesc, &kernelHandle); DEBUG_BREAK_IF(res != ZE_RESULT_SUCCESS); kernel.reset(Kernel::fromHandle(kernelHandle)); return std::unique_ptr(new MockBuiltinData{std::move(module), std::move(kernel)}); } }; struct MockBuiltinData : BuiltinFunctionsLibImpl::BuiltinData { using BuiltinFunctionsLibImpl::BuiltinData::func; using BuiltinFunctionsLibImpl::BuiltinData::module; MockBuiltinData(std::unique_ptr &&mod, std::unique_ptr &&ker) { module = std::move(mod); func = std::move(ker); } ~MockBuiltinData() override { module.release(); } }; void SetUp() { DeviceFixture::SetUp(); mockDevicePtr = std::unique_ptr>(new MockDeviceForSpv(device->getNEODevice(), device->getNEODevice()->getExecutionEnvironment(), driverHandle.get())); mockBuiltinFunctionsLibImpl.reset(new MockBuiltinFunctionsLibImpl(mockDevicePtr.get(), neoDevice->getBuiltIns())); } void TearDown() { mockBuiltinFunctionsLibImpl.reset(); DeviceFixture::TearDown(); } std::unique_ptr mockBuiltinFunctionsLibImpl; std::unique_ptr> mockDevicePtr; }; template using TestBuiltinFunctionsLibImpl = Test>; class TestBuiltinFunctionsLibImplDefault : public TestBuiltinFunctionsLibImpl {}; class TestBuiltinFunctionsLibImplStateless : public TestBuiltinFunctionsLibImpl {}; class TestBuiltinFunctionsLibImplImages : public TestBuiltinFunctionsLibImpl {}; HWTEST_F(TestBuiltinFunctionsLibImplImages, givenImageSupportThenEachBuiltinImageFunctionsIsLoadedOnlyOnce) { L0::Kernel *initializedImageBuiltins[static_cast(ImageBuiltin::COUNT)]; for (uint32_t builtId = 0; builtId < static_cast(ImageBuiltin::COUNT); builtId++) { EXPECT_EQ(nullptr, mockBuiltinFunctionsLibImpl->imageBuiltins[builtId]); } if (mockDevicePtr.get()->getHwInfo().capabilityTable.supportsImages) { for (uint32_t builtId = 0; builtId < static_cast(ImageBuiltin::COUNT); builtId++) { EXPECT_NE(nullptr, mockBuiltinFunctionsLibImpl->getImageFunction(static_cast(builtId))); EXPECT_NE(nullptr, mockBuiltinFunctionsLibImpl->imageBuiltins[builtId]); initializedImageBuiltins[builtId] = mockBuiltinFunctionsLibImpl->imageBuiltins[builtId]->func.get(); } for (uint32_t builtId = 0; builtId < static_cast(ImageBuiltin::COUNT); builtId++) { EXPECT_EQ(initializedImageBuiltins[builtId], mockBuiltinFunctionsLibImpl->getImageFunction(static_cast(builtId))); } } } HWTEST_F(TestBuiltinFunctionsLibImplImages, givenImageSupportAndWrongIdWhenCallingBuiltinImageFunctionThenExceptionIsThrown) { for (uint32_t builtId = 0; builtId < static_cast(ImageBuiltin::COUNT); builtId++) { EXPECT_EQ(nullptr, mockBuiltinFunctionsLibImpl->imageBuiltins[builtId]); } if (mockDevicePtr.get()->getHwInfo().capabilityTable.supportsImages) { uint32_t builtId = static_cast(ImageBuiltin::COUNT) + 1; EXPECT_THROW(mockBuiltinFunctionsLibImpl->initBuiltinImageKernel(static_cast(builtId)), std::exception); } } HWTEST_F(TestBuiltinFunctionsLibImplDefault, givenCallsToGetFunctionThenEachBuiltinFunctionsIsLoadedOnlyOnce) { L0::Kernel *initializedBuiltins[static_cast(Builtin::COUNT)]; for (uint32_t builtId = 0; builtId < static_cast(Builtin::COUNT); builtId++) { EXPECT_EQ(nullptr, mockBuiltinFunctionsLibImpl->builtins[builtId]); } for (uint32_t builtId = 0; builtId < static_cast(Builtin::COUNT); builtId++) { EXPECT_NE(nullptr, mockBuiltinFunctionsLibImpl->getFunction(static_cast(builtId))); EXPECT_NE(nullptr, mockBuiltinFunctionsLibImpl->builtins[builtId]); initializedBuiltins[builtId] = mockBuiltinFunctionsLibImpl->builtins[builtId]->func.get(); } for (uint32_t builtId = 0; builtId < static_cast(Builtin::COUNT); builtId++) { EXPECT_EQ(initializedBuiltins[builtId], mockBuiltinFunctionsLibImpl->getFunction(static_cast(builtId))); } } HWTEST_F(TestBuiltinFunctionsLibImplStateless, givenCallsToGetFunctionThenEachBuiltinFunctionsIsLoadedOnlyOnce) { L0::Kernel *initializedBuiltins[static_cast(Builtin::COUNT)]; for (uint32_t builtId = 0; builtId < static_cast(Builtin::COUNT); builtId++) { EXPECT_EQ(nullptr, mockBuiltinFunctionsLibImpl->builtins[builtId]); } for (uint32_t builtId = 0; builtId < static_cast(Builtin::COUNT); builtId++) { EXPECT_NE(nullptr, mockBuiltinFunctionsLibImpl->getFunction(static_cast(builtId))); EXPECT_NE(nullptr, mockBuiltinFunctionsLibImpl->builtins[builtId]); initializedBuiltins[builtId] = mockBuiltinFunctionsLibImpl->builtins[builtId]->func.get(); } for (uint32_t builtId = 0; builtId < static_cast(Builtin::COUNT); builtId++) { EXPECT_EQ(initializedBuiltins[builtId], mockBuiltinFunctionsLibImpl->getFunction(static_cast(builtId))); } } HWTEST_F(TestBuiltinFunctionsLibImplDefault, givenCallToBuiltinFunctionWithWrongIdThenExceptionIsThrown) { for (uint32_t builtId = 0; builtId < static_cast(Builtin::COUNT); builtId++) { EXPECT_EQ(nullptr, mockBuiltinFunctionsLibImpl->builtins[builtId]); } uint32_t builtId = static_cast(Builtin::COUNT) + 1; EXPECT_THROW(mockBuiltinFunctionsLibImpl->initBuiltinKernel(static_cast(builtId)), std::exception); } HWTEST_F(TestBuiltinFunctionsLibImplStateless, givenCallToStatelessBuiltinFunctionWithWrongIdThenExceptionIsThrown) { for (uint32_t builtId = 0; builtId < static_cast(Builtin::COUNT); builtId++) { EXPECT_EQ(nullptr, mockBuiltinFunctionsLibImpl->builtins[builtId]); } uint32_t builtId = static_cast(Builtin::COUNT) + 1; EXPECT_THROW(mockBuiltinFunctionsLibImpl->initBuiltinKernel(static_cast(builtId)), std::exception); } HWTEST_F(TestBuiltinFunctionsLibImplDefault, givenCompilerInterfaceWhenCreateDeviceAndImageSupportedThenBuiltinsImageFunctionsAreLoaded) { ze_result_t returnValue = ZE_RESULT_SUCCESS; neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->compilerInterface.reset(new NEO::MockCompilerInterfaceSpirv()); std::unique_ptr testDevice(Device::create(device->getDriverHandle(), neoDevice, false, &returnValue)); if (device->getHwInfo().capabilityTable.supportsImages) { for (uint32_t builtId = 0; builtId < static_cast(ImageBuiltin::COUNT); builtId++) { EXPECT_NE(nullptr, testDevice->getBuiltinFunctionsLib()->getImageFunction(static_cast(builtId))); } } } HWTEST_F(TestBuiltinFunctionsLibImplDefault, givenCompilerInterfaceWhenCreateDeviceThenBuiltinsFunctionsAreLoaded) { ze_result_t returnValue = ZE_RESULT_SUCCESS; neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->compilerInterface.reset(new NEO::MockCompilerInterfaceSpirv()); std::unique_ptr testDevice(Device::create(device->getDriverHandle(), neoDevice, false, &returnValue)); for (uint32_t builtId = 0; builtId < static_cast(Builtin::COUNT); builtId++) { EXPECT_NE(nullptr, testDevice->getBuiltinFunctionsLib()->getFunction(static_cast(builtId))); } } HWTEST_F(TestBuiltinFunctionsLibImplDefault, givenRebuildPrecompiledKernelsDebugFlagWhenInitFuctionsThenIntermediateCodeForBuiltinsIsRequested) { struct MockDeviceForRebuildBuilins : public Mock { struct MockModuleForRebuildBuiltins : public ModuleImp { MockModuleForRebuildBuiltins(Device *device) : ModuleImp(device, nullptr, ModuleType::Builtin) {} ze_result_t createKernel(const ze_kernel_desc_t *desc, ze_kernel_handle_t *phFunction) override { *phFunction = nullptr; return ZE_RESULT_SUCCESS; } }; MockDeviceForRebuildBuilins(L0::Device *device) : Mock(device->getNEODevice(), static_cast(device->getExecEnvironment())) { driverHandle = device->getDriverHandle(); builtins = BuiltinFunctionsLib::create(this, neoDevice->getBuiltIns()); } ze_result_t createModule(const ze_module_desc_t *desc, ze_module_handle_t *module, ze_module_build_log_handle_t *buildLog, ModuleType type) override { EXPECT_EQ(desc->format, ZE_MODULE_FORMAT_IL_SPIRV); EXPECT_GT(desc->inputSize, 0u); EXPECT_NE(desc->pInputModule, nullptr); createModuleCalled = true; *module = new MockModuleForRebuildBuiltins(this); return ZE_RESULT_SUCCESS; } bool createModuleCalled = false; }; DebugManagerStateRestore dgbRestorer; NEO::DebugManager.flags.RebuildPrecompiledKernels.set(true); MockDeviceForRebuildBuilins testDevice(device); testDevice.builtins.reset(new BuiltinFunctionsLibImpl(&testDevice, neoDevice->getBuiltIns())); for (uint32_t builtId = 0; builtId < static_cast(Builtin::COUNT); builtId++) { testDevice.getBuiltinFunctionsLib()->initBuiltinKernel(static_cast(builtId)); } EXPECT_TRUE(testDevice.createModuleCalled); } HWTEST_F(TestBuiltinFunctionsLibImplDefault, givenNotToRebuildPrecompiledKernelsDebugFlagWhenInitFuctionsThenNativeCodeForBuiltinsIsRequested) { struct MockDeviceForRebuildBuilins : public Mock { MockDeviceForRebuildBuilins(L0::Device *device) : Mock(device->getNEODevice(), static_cast(device->getExecEnvironment())) { driverHandle = device->getDriverHandle(); builtins = BuiltinFunctionsLib::create(this, neoDevice->getBuiltIns()); } ze_result_t createModule(const ze_module_desc_t *desc, ze_module_handle_t *module, ze_module_build_log_handle_t *buildLog, ModuleType type) override { EXPECT_EQ(desc->format, ZE_MODULE_FORMAT_NATIVE); EXPECT_GT(desc->inputSize, 0u); EXPECT_NE(desc->pInputModule, nullptr); createModuleCalled = true; return DeviceImp::createModule(desc, module, buildLog, type); } bool createModuleCalled = false; }; DebugManagerStateRestore dgbRestorer; NEO::DebugManager.flags.RebuildPrecompiledKernels.set(false); MockDeviceForRebuildBuilins testDevice(device); L0::Device *testDevicePtr = &testDevice; testDevice.builtins.reset(new BuiltinFunctionsLibImpl(testDevicePtr, neoDevice->getBuiltIns())); for (uint32_t builtId = 0; builtId < static_cast(Builtin::COUNT); builtId++) { testDevice.getBuiltinFunctionsLib()->initBuiltinKernel(static_cast(builtId)); } EXPECT_TRUE(testDevice.createModuleCalled); } HWTEST_F(TestBuiltinFunctionsLibImplDefault, GivenBuiltinsWhenInitializingFunctionsThenModulesWithProperTypeAreCreated) { struct MockDeviceWithBuilins : public Mock { MockDeviceWithBuilins(L0::Device *device) : Mock(device->getNEODevice(), static_cast(device->getExecEnvironment())) { driverHandle = device->getDriverHandle(); builtins = BuiltinFunctionsLib::create(this, neoDevice->getBuiltIns()); } ze_result_t createModule(const ze_module_desc_t *desc, ze_module_handle_t *module, ze_module_build_log_handle_t *buildLog, ModuleType type) override { typeCreated = type; EXPECT_EQ(ModuleType::Builtin, type); return DeviceImp::createModule(desc, module, buildLog, type); } ModuleType typeCreated = ModuleType::User; }; MockDeviceWithBuilins testDevice(device); L0::Device *testDevicePtr = &testDevice; testDevice.builtins.reset(new BuiltinFunctionsLibImpl(testDevicePtr, neoDevice->getBuiltIns())); for (uint32_t builtId = 0; builtId < static_cast(Builtin::COUNT); builtId++) { testDevice.getBuiltinFunctionsLib()->initBuiltinKernel(static_cast(builtId)); } EXPECT_EQ(ModuleType::Builtin, testDevice.typeCreated); } } // namespace ult } // namespace L0 create_ult_builtin_functions_lib.cpp000066400000000000000000000011341422164147700362000ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/builtin/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "level_zero/core/source/builtin/builtin_functions_lib_impl.h" #include "level_zero/core/test/unit_tests/mocks/mock_builtin_functions_lib_impl.h" namespace L0 { std::unique_ptr BuiltinFunctionsLib::create(Device *device, NEO::BuiltIns *builtins) { return std::unique_ptr(new ult::MockBuiltinFunctionsLibImpl(device, builtins)); } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/cache/000077500000000000000000000000001422164147700263565ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/cache/CMakeLists.txt000066400000000000000000000003051422164147700311140ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) add_subdirectories() compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/cache/linux/000077500000000000000000000000001422164147700275155ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/cache/linux/CMakeLists.txt000066400000000000000000000004461422164147700322610ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(UNIX) target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}test_cache_reservation_impl.cpp ) endif() test_cache_reservation_impl.cpp000066400000000000000000000032001422164147700357010ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/cache/linux/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/cache/linux/cache_reservation_impl.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" using namespace NEO; namespace L0 { namespace ult { class CacheReservationFixture : public DeviceFixture { public: void SetUp() { DeviceFixture::SetUp(); auto deviceImp = static_cast(device); ASSERT_NE(nullptr, deviceImp->cacheReservation.get()); cache = deviceImp->cacheReservation.get(); } void TearDown() { DeviceFixture::TearDown(); } CacheReservation *cache = nullptr; }; using CacheReservationTest = Test; TEST_F(CacheReservationTest, GivenCacheReservationCreatedWhenCallingReserveCacheThenReturnFalse) { size_t cacheLevel = 3; size_t cacheReservationSize = 1024; auto result = cache->reserveCache(cacheLevel, cacheReservationSize); EXPECT_FALSE(result); } TEST_F(CacheReservationTest, GivenCacheReservationCreatedWhenCallingSetCacheAdviceThenReturnFalse) { void *ptr = reinterpret_cast(0x123456789); size_t regionSize = 512; ze_cache_ext_region_t cacheRegion = ze_cache_ext_region_t::ZE_CACHE_EXT_REGION_ZE_CACHE_REGION_DEFAULT; auto result = cache->setCacheAdvice(ptr, regionSize, cacheRegion); EXPECT_FALSE(result); } TEST_F(CacheReservationTest, GivenCacheReservationCreatedWhenCallingGetMaxCacheReservationSizeThenReturnZero) { EXPECT_EQ(0u, cache->getMaxCacheReservationSize()); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/cache/windows/000077500000000000000000000000001422164147700300505ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/cache/windows/CMakeLists.txt000066400000000000000000000004241422164147700326100ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_cache_reservation_impl.cpp ) endif() test_cache_reservation_impl.cpp000066400000000000000000000032021422164147700362360ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/cache/windows/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/cache/windows/cache_reservation_impl.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" using namespace NEO; namespace L0 { namespace ult { class CacheReservationFixture : public DeviceFixture { public: void SetUp() { DeviceFixture::SetUp(); auto deviceImp = static_cast(device); ASSERT_NE(nullptr, deviceImp->cacheReservation.get()); cache = deviceImp->cacheReservation.get(); } void TearDown() { DeviceFixture::TearDown(); } CacheReservation *cache = nullptr; }; using CacheReservationTest = Test; TEST_F(CacheReservationTest, GivenCacheReservationCreatedWhenCallingReserveCacheThenReturnFalse) { size_t cacheLevel = 3; size_t cacheReservationSize = 1024; auto result = cache->reserveCache(cacheLevel, cacheReservationSize); EXPECT_FALSE(result); } TEST_F(CacheReservationTest, GivenCacheReservationCreatedWhenCallingSetCacheAdviceThenReturnFalse) { void *ptr = reinterpret_cast(0x123456789); size_t regionSize = 512; ze_cache_ext_region_t cacheRegion = ze_cache_ext_region_t::ZE_CACHE_EXT_REGION_ZE_CACHE_REGION_DEFAULT; auto result = cache->setCacheAdvice(ptr, regionSize, cacheRegion); EXPECT_FALSE(result); } TEST_F(CacheReservationTest, GivenCacheReservationCreatedWhenCallingGetMaxCacheReservationSizeThenReturnZero) { EXPECT_EQ(0u, cache->getMaxCacheReservationSize()); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/cmdlist/000077500000000000000000000000001422164147700267525ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/cmdlist/CMakeLists.txt000066400000000000000000000030421422164147700315110ustar00rootroot00000000000000# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_2.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_3.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_4.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_5.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_6.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_api.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_append_barrier.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_append_event_reset.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_append_launch_kernel_1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_append_launch_kernel_2.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_append_memory.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_append_signal_event.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_append_wait_on_events.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_blit.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_fill.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_append_multipartition_prologue.cpp ) if(TESTS_XEHP_AND_LATER) target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_xehp_and_later.cpp ) endif() add_subdirectories() compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp000066400000000000000000002450661422164147700324110ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/wait_status.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/mocks/mock_command_stream_receiver.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/page_fault_manager/mock_cpu_page_fault_manager.h" #include "level_zero/core/source/cmdqueue/cmdqueue_imp.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" namespace L0 { namespace ult { using ContextCommandListCreate = Test; TEST_F(ContextCommandListCreate, whenCreatingCommandListFromContextThenSuccessIsReturned) { ze_command_list_desc_t desc = {}; ze_command_list_handle_t hCommandList = {}; ze_result_t result = context->createCommandList(device, &desc, &hCommandList); EXPECT_EQ(ZE_RESULT_SUCCESS, result); L0::CommandList *commandList = L0::CommandList::fromHandle(hCommandList); commandList->destroy(); } TEST_F(ContextCommandListCreate, whenCreatingCommandListImmediateFromContextThenSuccessIsReturned) { ze_command_queue_desc_t desc = {}; ze_command_list_handle_t hCommandList = {}; ze_result_t result = context->createCommandListImmediate(device, &desc, &hCommandList); EXPECT_EQ(ZE_RESULT_SUCCESS, result); L0::CommandList *commandList = L0::CommandList::fromHandle(hCommandList); commandList->destroy(); } using CommandListCreate = Test; TEST_F(CommandListCreate, whenCommandListIsCreatedWithInvalidProductFamilyThenFailureIsReturned) { ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(PRODUCT_FAMILY::IGFX_MAX_PRODUCT, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); EXPECT_EQ(ZE_RESULT_ERROR_UNINITIALIZED, returnValue); ASSERT_EQ(nullptr, commandList); } TEST_F(CommandListCreate, whenCommandListImmediateIsCreatedWithInvalidProductFamilyThenFailureIsReturned) { ze_result_t returnValue; const ze_command_queue_desc_t desc = {}; bool internalEngine = true; std::unique_ptr commandList(CommandList::createImmediate(PRODUCT_FAMILY::IGFX_MAX_PRODUCT, device, &desc, internalEngine, NEO::EngineGroupType::RenderCompute, returnValue)); EXPECT_EQ(ZE_RESULT_ERROR_UNINITIALIZED, returnValue); ASSERT_EQ(nullptr, commandList); } TEST_F(CommandListCreate, whenCommandListIsCreatedThenItIsInitialized) { ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); ASSERT_NE(nullptr, commandList); EXPECT_EQ(device, commandList->device); ASSERT_GT(commandList->commandContainer.getCmdBufferAllocations().size(), 0u); auto numAllocations = 0u; auto allocation = whitebox_cast(commandList->commandContainer.getCmdBufferAllocations()[0]); ASSERT_NE(allocation, nullptr); ++numAllocations; ASSERT_NE(nullptr, commandList->commandContainer.getCommandStream()); for (uint32_t i = 0; i < NEO::HeapType::NUM_TYPES; i++) { auto heapType = static_cast(i); if (NEO::HeapType::DYNAMIC_STATE == heapType && !device->getHwInfo().capabilityTable.supportsImages) { ASSERT_EQ(commandList->commandContainer.getIndirectHeap(heapType), nullptr); } else { ASSERT_NE(commandList->commandContainer.getIndirectHeap(heapType), nullptr); ++numAllocations; ASSERT_NE(commandList->commandContainer.getIndirectHeapAllocation(heapType), nullptr); } } EXPECT_LT(0u, commandList->commandContainer.getCommandStream()->getAvailableSpace()); ASSERT_EQ(commandList->commandContainer.getResidencyContainer().size(), numAllocations); EXPECT_EQ(commandList->commandContainer.getResidencyContainer().front(), allocation); } TEST_F(CommandListCreate, givenRegularCommandListThenDefaultNumIddPerBlockIsUsed) { ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); ASSERT_NE(nullptr, commandList); const uint32_t defaultNumIdds = CommandList::defaultNumIddsPerBlock; EXPECT_EQ(defaultNumIdds, commandList->commandContainer.getNumIddPerBlock()); } TEST_F(CommandListCreate, givenNonExistingPtrThenAppendMemAdviseReturnsError) { ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); ASSERT_NE(nullptr, commandList); auto res = commandList->appendMemAdvise(device, nullptr, 0, ZE_MEMORY_ADVICE_SET_READ_MOSTLY); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, res); } TEST_F(CommandListCreate, givenNonExistingPtrThenAppendMemoryPrefetchReturnsError) { ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); ASSERT_NE(nullptr, commandList); auto res = commandList->appendMemoryPrefetch(nullptr, 0); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, res); } TEST_F(CommandListCreate, givenValidPtrWhenAppendMemAdviseFailsThenReturnSuccess) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; auto res = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_NE(nullptr, ptr); ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); ASSERT_NE(nullptr, commandList); auto memoryManager = static_cast(device->getDriverHandle()->getMemoryManager()); memoryManager->failSetMemAdvise = true; res = commandList->appendMemAdvise(device, ptr, size, ZE_MEMORY_ADVICE_SET_PREFERRED_LOCATION); EXPECT_EQ(ZE_RESULT_SUCCESS, res); res = context->freeMem(ptr); ASSERT_EQ(res, ZE_RESULT_SUCCESS); } TEST_F(CommandListCreate, givenValidPtrWhenAppendMemAdviseSucceedsThenReturnSuccess) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; auto res = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_NE(nullptr, ptr); ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); ASSERT_NE(nullptr, commandList); res = commandList->appendMemAdvise(device, ptr, size, ZE_MEMORY_ADVICE_SET_READ_MOSTLY); EXPECT_EQ(ZE_RESULT_SUCCESS, res); res = context->freeMem(ptr); ASSERT_EQ(res, ZE_RESULT_SUCCESS); } TEST_F(CommandListCreate, givenValidPtrThenAppendMemAdviseSetWithMaxHintThenSuccessReturned) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; auto res = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_NE(nullptr, ptr); ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); ASSERT_NE(nullptr, commandList); res = commandList->appendMemAdvise(device, ptr, size, ZE_MEMORY_ADVICE_FORCE_UINT32); EXPECT_EQ(ZE_RESULT_SUCCESS, res); res = context->freeMem(ptr); ASSERT_EQ(res, ZE_RESULT_SUCCESS); } TEST_F(CommandListCreate, givenValidPtrThenAppendMemAdviseSetAndClearReadMostlyThenMemAdviseReadOnlySet) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; auto res = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_NE(nullptr, ptr); ze_result_t returnValue; NEO::MemAdviseFlags flags; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); ASSERT_NE(nullptr, commandList); res = commandList->appendMemAdvise(device, ptr, size, ZE_MEMORY_ADVICE_SET_READ_MOSTLY); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr); L0::DeviceImp *deviceImp = static_cast((L0::Device::fromHandle(device))); flags = deviceImp->memAdviseSharedAllocations[allocData]; EXPECT_EQ(1, flags.read_only); res = commandList->appendMemAdvise(device, ptr, size, ZE_MEMORY_ADVICE_CLEAR_READ_MOSTLY); EXPECT_EQ(ZE_RESULT_SUCCESS, res); flags = deviceImp->memAdviseSharedAllocations[allocData]; EXPECT_EQ(0, flags.read_only); res = context->freeMem(ptr); ASSERT_EQ(res, ZE_RESULT_SUCCESS); } TEST_F(CommandListCreate, givenValidPtrThenAppendMemAdviseSetAndClearPreferredLocationThenMemAdvisePreferredDeviceSet) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; auto res = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_NE(nullptr, ptr); ze_result_t returnValue; NEO::MemAdviseFlags flags; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); ASSERT_NE(nullptr, commandList); res = commandList->appendMemAdvise(device, ptr, size, ZE_MEMORY_ADVICE_SET_PREFERRED_LOCATION); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr); L0::DeviceImp *deviceImp = static_cast((L0::Device::fromHandle(device))); flags = deviceImp->memAdviseSharedAllocations[allocData]; EXPECT_EQ(1, flags.device_preferred_location); res = commandList->appendMemAdvise(device, ptr, size, ZE_MEMORY_ADVICE_CLEAR_PREFERRED_LOCATION); EXPECT_EQ(ZE_RESULT_SUCCESS, res); flags = deviceImp->memAdviseSharedAllocations[allocData]; EXPECT_EQ(0, flags.device_preferred_location); res = context->freeMem(ptr); ASSERT_EQ(res, ZE_RESULT_SUCCESS); } TEST_F(CommandListCreate, givenValidPtrWhenAppendMemAdviseSetAndClearNonAtomicMostlyThenMemAdviseNonAtomicIgnored) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; auto res = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_NE(nullptr, ptr); ze_result_t returnValue; NEO::MemAdviseFlags flags; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); ASSERT_NE(nullptr, commandList); res = commandList->appendMemAdvise(device, ptr, size, ZE_MEMORY_ADVICE_SET_NON_ATOMIC_MOSTLY); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr); L0::DeviceImp *deviceImp = static_cast((L0::Device::fromHandle(device))); flags = deviceImp->memAdviseSharedAllocations[allocData]; EXPECT_EQ(0, flags.non_atomic); res = commandList->appendMemAdvise(device, ptr, size, ZE_MEMORY_ADVICE_CLEAR_NON_ATOMIC_MOSTLY); EXPECT_EQ(ZE_RESULT_SUCCESS, res); flags = deviceImp->memAdviseSharedAllocations[allocData]; EXPECT_EQ(0, flags.non_atomic); res = context->freeMem(ptr); ASSERT_EQ(res, ZE_RESULT_SUCCESS); } TEST_F(CommandListCreate, givenValidPtrThenAppendMemAdviseSetAndClearCachingThenMemAdviseCachingSet) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; auto res = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_NE(nullptr, ptr); ze_result_t returnValue; NEO::MemAdviseFlags flags; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); ASSERT_NE(nullptr, commandList); res = commandList->appendMemAdvise(device, ptr, size, ZE_MEMORY_ADVICE_BIAS_CACHED); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr); L0::DeviceImp *deviceImp = static_cast((L0::Device::fromHandle(device))); flags = deviceImp->memAdviseSharedAllocations[allocData]; EXPECT_EQ(1, flags.cached_memory); auto memoryManager = static_cast(device->getDriverHandle()->getMemoryManager()); EXPECT_EQ(1, memoryManager->memAdviseFlags.cached_memory); res = commandList->appendMemAdvise(device, ptr, size, ZE_MEMORY_ADVICE_BIAS_UNCACHED); EXPECT_EQ(ZE_RESULT_SUCCESS, res); flags = deviceImp->memAdviseSharedAllocations[allocData]; EXPECT_EQ(0, flags.cached_memory); EXPECT_EQ(0, memoryManager->memAdviseFlags.cached_memory); res = context->freeMem(ptr); ASSERT_EQ(res, ZE_RESULT_SUCCESS); } using CommandListMemAdvisePageFault = Test; TEST_F(CommandListMemAdvisePageFault, givenValidPtrAndPageFaultHandlerThenAppendMemAdviseWithReadOnlyAndDevicePreferredClearsMigrationBlocked) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; auto res = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_NE(nullptr, ptr); ze_result_t returnValue; NEO::MemAdviseFlags flags; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); ASSERT_NE(nullptr, commandList); L0::DeviceImp *deviceImp = static_cast((L0::Device::fromHandle(device))); auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr); flags = deviceImp->memAdviseSharedAllocations[allocData]; flags.cpu_migration_blocked = 1; deviceImp->memAdviseSharedAllocations[allocData] = flags; res = commandList->appendMemAdvise(device, ptr, size, ZE_MEMORY_ADVICE_SET_READ_MOSTLY); EXPECT_EQ(ZE_RESULT_SUCCESS, res); flags = deviceImp->memAdviseSharedAllocations[allocData]; EXPECT_EQ(1, flags.read_only); res = commandList->appendMemAdvise(device, ptr, size, ZE_MEMORY_ADVICE_SET_PREFERRED_LOCATION); EXPECT_EQ(ZE_RESULT_SUCCESS, res); flags = deviceImp->memAdviseSharedAllocations[allocData]; EXPECT_EQ(1, flags.device_preferred_location); res = commandList->appendMemAdvise(device, ptr, size, ZE_MEMORY_ADVICE_CLEAR_READ_MOSTLY); EXPECT_EQ(ZE_RESULT_SUCCESS, res); res = commandList->appendMemAdvise(device, ptr, size, ZE_MEMORY_ADVICE_CLEAR_PREFERRED_LOCATION); EXPECT_EQ(ZE_RESULT_SUCCESS, res); flags = deviceImp->memAdviseSharedAllocations[allocData]; EXPECT_EQ(0, flags.read_only); EXPECT_EQ(0, flags.device_preferred_location); EXPECT_EQ(0, flags.cpu_migration_blocked); res = context->freeMem(ptr); ASSERT_EQ(res, ZE_RESULT_SUCCESS); } TEST_F(CommandListMemAdvisePageFault, givenValidPtrAndPageFaultHandlerThenGpuDomainHanlderWithHintsIsSet) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; auto res = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_NE(nullptr, ptr); ze_result_t returnValue; NEO::MemAdviseFlags flags; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); ASSERT_NE(nullptr, commandList); L0::DeviceImp *deviceImp = static_cast((L0::Device::fromHandle(device))); auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr); flags = deviceImp->memAdviseSharedAllocations[allocData]; deviceImp->memAdviseSharedAllocations[allocData] = flags; res = commandList->appendMemAdvise(device, ptr, size, ZE_MEMORY_ADVICE_SET_READ_MOSTLY); EXPECT_EQ(ZE_RESULT_SUCCESS, res); flags = deviceImp->memAdviseSharedAllocations[allocData]; EXPECT_EQ(1, flags.read_only); res = commandList->appendMemAdvise(device, ptr, size, ZE_MEMORY_ADVICE_SET_PREFERRED_LOCATION); EXPECT_EQ(ZE_RESULT_SUCCESS, res); flags = deviceImp->memAdviseSharedAllocations[allocData]; EXPECT_EQ(1, flags.device_preferred_location); auto handlerWithHints = L0::handleGpuDomainTransferForHwWithHints; EXPECT_EQ(handlerWithHints, reinterpret_cast(mockPageFaultManager->gpuDomainHandler)); res = context->freeMem(ptr); ASSERT_EQ(res, ZE_RESULT_SUCCESS); } TEST_F(CommandListMemAdvisePageFault, givenValidPtrAndPageFaultHandlerAndGpuDomainHandlerWithHintsSetThenHandlerBlocksCpuMigration) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; auto res = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_NE(nullptr, ptr); ze_result_t returnValue; NEO::MemAdviseFlags flags; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); ASSERT_NE(nullptr, commandList); L0::DeviceImp *deviceImp = static_cast((L0::Device::fromHandle(device))); auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr); res = commandList->appendMemAdvise(device, ptr, size, ZE_MEMORY_ADVICE_SET_READ_MOSTLY); EXPECT_EQ(ZE_RESULT_SUCCESS, res); flags = deviceImp->memAdviseSharedAllocations[allocData]; EXPECT_EQ(1, flags.read_only); res = commandList->appendMemAdvise(device, ptr, size, ZE_MEMORY_ADVICE_SET_PREFERRED_LOCATION); EXPECT_EQ(ZE_RESULT_SUCCESS, res); flags = deviceImp->memAdviseSharedAllocations[allocData]; EXPECT_EQ(1, flags.device_preferred_location); auto handlerWithHints = L0::handleGpuDomainTransferForHwWithHints; EXPECT_EQ(handlerWithHints, reinterpret_cast(mockPageFaultManager->gpuDomainHandler)); NEO::PageFaultManager::PageFaultData pageData; pageData.cmdQ = deviceImp; pageData.domain = NEO::PageFaultManager::AllocationDomain::Gpu; mockPageFaultManager->gpuDomainHandler(mockPageFaultManager, ptr, pageData); flags = deviceImp->memAdviseSharedAllocations[allocData]; EXPECT_EQ(1, flags.cpu_migration_blocked); res = context->freeMem(ptr); ASSERT_EQ(res, ZE_RESULT_SUCCESS); } TEST_F(CommandListMemAdvisePageFault, givenValidPtrAndPageFaultHandlerAndGpuDomainHandlerWithHintsSetAndOnlyReadOnlyOrDevicePreferredHintThenHandlerAllowsCpuMigration) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; auto res = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_NE(nullptr, ptr); ze_result_t returnValue; NEO::MemAdviseFlags flags; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); ASSERT_NE(nullptr, commandList); L0::DeviceImp *deviceImp = static_cast((L0::Device::fromHandle(device))); auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr); res = commandList->appendMemAdvise(device, ptr, size, ZE_MEMORY_ADVICE_SET_READ_MOSTLY); EXPECT_EQ(ZE_RESULT_SUCCESS, res); flags = deviceImp->memAdviseSharedAllocations[allocData]; EXPECT_EQ(1, flags.read_only); auto handlerWithHints = L0::handleGpuDomainTransferForHwWithHints; EXPECT_EQ(handlerWithHints, reinterpret_cast(mockPageFaultManager->gpuDomainHandler)); NEO::PageFaultManager::PageFaultData pageData; pageData.cmdQ = deviceImp; pageData.domain = NEO::PageFaultManager::AllocationDomain::Gpu; mockPageFaultManager->gpuDomainHandler(mockPageFaultManager, ptr, pageData); flags = deviceImp->memAdviseSharedAllocations[allocData]; EXPECT_EQ(0, flags.cpu_migration_blocked); res = commandList->appendMemAdvise(device, ptr, size, ZE_MEMORY_ADVICE_CLEAR_READ_MOSTLY); EXPECT_EQ(ZE_RESULT_SUCCESS, res); flags = deviceImp->memAdviseSharedAllocations[allocData]; EXPECT_EQ(0, flags.read_only); res = commandList->appendMemAdvise(device, ptr, size, ZE_MEMORY_ADVICE_SET_PREFERRED_LOCATION); EXPECT_EQ(ZE_RESULT_SUCCESS, res); flags = deviceImp->memAdviseSharedAllocations[allocData]; EXPECT_EQ(1, flags.device_preferred_location); mockPageFaultManager->gpuDomainHandler(mockPageFaultManager, ptr, pageData); flags = deviceImp->memAdviseSharedAllocations[allocData]; EXPECT_EQ(0, flags.cpu_migration_blocked); res = commandList->appendMemAdvise(device, ptr, size, ZE_MEMORY_ADVICE_CLEAR_PREFERRED_LOCATION); EXPECT_EQ(ZE_RESULT_SUCCESS, res); flags = deviceImp->memAdviseSharedAllocations[allocData]; EXPECT_EQ(0, flags.device_preferred_location); mockPageFaultManager->gpuDomainHandler(mockPageFaultManager, ptr, pageData); flags = deviceImp->memAdviseSharedAllocations[allocData]; EXPECT_EQ(0, flags.cpu_migration_blocked); res = context->freeMem(ptr); ASSERT_EQ(res, ZE_RESULT_SUCCESS); } TEST_F(CommandListMemAdvisePageFault, givenValidPtrAndPageFaultHandlerAndGpuDomainHandlerWithHintsSetAndWithPrintUsmSharedMigrationDebugKeyThenMessageIsPrinted) { DebugManagerStateRestore restorer; DebugManager.flags.PrintUmdSharedMigration.set(1); size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_host_mem_alloc_desc_t hostDesc = {}; auto res = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_NE(nullptr, ptr); ze_result_t returnValue; NEO::MemAdviseFlags flags; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); ASSERT_NE(nullptr, commandList); L0::DeviceImp *deviceImp = static_cast((L0::Device::fromHandle(device))); auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr); res = commandList->appendMemAdvise(device, ptr, size, ZE_MEMORY_ADVICE_SET_READ_MOSTLY); EXPECT_EQ(ZE_RESULT_SUCCESS, res); flags = deviceImp->memAdviseSharedAllocations[allocData]; EXPECT_EQ(1, flags.read_only); auto handlerWithHints = L0::handleGpuDomainTransferForHwWithHints; EXPECT_EQ(handlerWithHints, reinterpret_cast(mockPageFaultManager->gpuDomainHandler)); testing::internal::CaptureStdout(); // start capturing NEO::PageFaultManager::PageFaultData pageData; pageData.cmdQ = deviceImp; pageData.domain = NEO::PageFaultManager::AllocationDomain::Gpu; mockPageFaultManager->gpuDomainHandler(mockPageFaultManager, ptr, pageData); flags = deviceImp->memAdviseSharedAllocations[allocData]; EXPECT_EQ(0, flags.cpu_migration_blocked); std::string output = testing::internal::GetCapturedStdout(); // stop capturing std::string expectedString = "UMD transferring shared allocation"; uint32_t occurrences = 0u; uint32_t expectedOccurrences = 1u; size_t idx = output.find(expectedString); while (idx != std::string::npos) { occurrences++; idx = output.find(expectedString, idx + 1); } EXPECT_EQ(expectedOccurrences, occurrences); res = context->freeMem(ptr); ASSERT_EQ(res, ZE_RESULT_SUCCESS); } TEST_F(CommandListMemAdvisePageFault, givenValidPtrAndPageFaultHandlerAndGpuDomainHandlerWithHintsSetAndInvalidHintsThenHandlerAllowsCpuMigration) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; auto res = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_NE(nullptr, ptr); ze_result_t returnValue; NEO::MemAdviseFlags flags; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); ASSERT_NE(nullptr, commandList); L0::DeviceImp *deviceImp = static_cast((L0::Device::fromHandle(device))); auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr); res = commandList->appendMemAdvise(device, ptr, size, ZE_MEMORY_ADVICE_BIAS_CACHED); EXPECT_EQ(ZE_RESULT_SUCCESS, res); flags = deviceImp->memAdviseSharedAllocations[allocData]; EXPECT_EQ(1, flags.cached_memory); auto handlerWithHints = L0::handleGpuDomainTransferForHwWithHints; EXPECT_EQ(handlerWithHints, reinterpret_cast(mockPageFaultManager->gpuDomainHandler)); NEO::PageFaultManager::PageFaultData pageData; pageData.cmdQ = deviceImp; pageData.domain = NEO::PageFaultManager::AllocationDomain::Gpu; mockPageFaultManager->gpuDomainHandler(mockPageFaultManager, ptr, pageData); flags = deviceImp->memAdviseSharedAllocations[allocData]; EXPECT_EQ(0, flags.cpu_migration_blocked); res = context->freeMem(ptr); ASSERT_EQ(res, ZE_RESULT_SUCCESS); } TEST_F(CommandListMemAdvisePageFault, givenValidPtrAndPageFaultHandlerAndGpuDomainHandlerWithHintsSetAndCpuDomainThenHandlerAllowsCpuMigration) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; auto res = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_NE(nullptr, ptr); ze_result_t returnValue; NEO::MemAdviseFlags flags; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); ASSERT_NE(nullptr, commandList); L0::DeviceImp *deviceImp = static_cast((L0::Device::fromHandle(device))); auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr); res = commandList->appendMemAdvise(device, ptr, size, ZE_MEMORY_ADVICE_SET_READ_MOSTLY); EXPECT_EQ(ZE_RESULT_SUCCESS, res); flags = deviceImp->memAdviseSharedAllocations[allocData]; EXPECT_EQ(1, flags.read_only); res = commandList->appendMemAdvise(device, ptr, size, ZE_MEMORY_ADVICE_SET_PREFERRED_LOCATION); EXPECT_EQ(ZE_RESULT_SUCCESS, res); flags = deviceImp->memAdviseSharedAllocations[allocData]; EXPECT_EQ(1, flags.device_preferred_location); auto handlerWithHints = L0::handleGpuDomainTransferForHwWithHints; EXPECT_EQ(handlerWithHints, reinterpret_cast(mockPageFaultManager->gpuDomainHandler)); NEO::PageFaultManager::PageFaultData pageData; pageData.cmdQ = deviceImp; pageData.domain = NEO::PageFaultManager::AllocationDomain::Cpu; mockPageFaultManager->gpuDomainHandler(mockPageFaultManager, ptr, pageData); flags = deviceImp->memAdviseSharedAllocations[allocData]; EXPECT_EQ(0, flags.cpu_migration_blocked); res = context->freeMem(ptr); ASSERT_EQ(res, ZE_RESULT_SUCCESS); } TEST_F(CommandListMemAdvisePageFault, givenInvalidPtrAndPageFaultHandlerAndGpuDomainHandlerWithHintsSetThenHandlerAllowsCpuMigration) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; auto res = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_NE(nullptr, ptr); ze_result_t returnValue; NEO::MemAdviseFlags flags; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); ASSERT_NE(nullptr, commandList); L0::DeviceImp *deviceImp = static_cast((L0::Device::fromHandle(device))); auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr); res = commandList->appendMemAdvise(device, ptr, size, ZE_MEMORY_ADVICE_SET_READ_MOSTLY); EXPECT_EQ(ZE_RESULT_SUCCESS, res); flags = deviceImp->memAdviseSharedAllocations[allocData]; EXPECT_EQ(1, flags.read_only); res = commandList->appendMemAdvise(device, ptr, size, ZE_MEMORY_ADVICE_SET_PREFERRED_LOCATION); EXPECT_EQ(ZE_RESULT_SUCCESS, res); flags = deviceImp->memAdviseSharedAllocations[allocData]; EXPECT_EQ(1, flags.device_preferred_location); auto handlerWithHints = L0::handleGpuDomainTransferForHwWithHints; EXPECT_EQ(handlerWithHints, reinterpret_cast(mockPageFaultManager->gpuDomainHandler)); NEO::PageFaultManager::PageFaultData pageData; pageData.cmdQ = deviceImp; pageData.domain = NEO::PageFaultManager::AllocationDomain::Gpu; void *alloc = reinterpret_cast(0x1); mockPageFaultManager->gpuDomainHandler(mockPageFaultManager, alloc, pageData); flags = deviceImp->memAdviseSharedAllocations[allocData]; EXPECT_EQ(0, flags.cpu_migration_blocked); res = context->freeMem(ptr); ASSERT_EQ(res, ZE_RESULT_SUCCESS); } TEST_F(CommandListCreate, givenValidPtrThenAppendMemoryPrefetchReturnsSuccess) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; auto res = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_NE(nullptr, ptr); ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); ASSERT_NE(nullptr, commandList); res = commandList->appendMemoryPrefetch(ptr, size); EXPECT_EQ(ZE_RESULT_SUCCESS, res); res = context->freeMem(ptr); ASSERT_EQ(res, ZE_RESULT_SUCCESS); } TEST_F(CommandListCreate, givenImmediateCommandListThenInternalEngineIsUsedIfRequested) { const ze_command_queue_desc_t desc = {}; bool internalEngine = true; ze_result_t returnValue; std::unique_ptr commandList0(CommandList::createImmediate(productFamily, device, &desc, internalEngine, NEO::EngineGroupType::RenderCompute, returnValue)); ASSERT_NE(nullptr, commandList0); CommandQueueImp *cmdQueue = reinterpret_cast(commandList0->cmdQImmediate); EXPECT_EQ(cmdQueue->getCsr(), neoDevice->getInternalEngine().commandStreamReceiver); internalEngine = false; std::unique_ptr commandList1(CommandList::createImmediate(productFamily, device, &desc, internalEngine, NEO::EngineGroupType::RenderCompute, returnValue)); ASSERT_NE(nullptr, commandList1); cmdQueue = reinterpret_cast(commandList1->cmdQImmediate); EXPECT_NE(cmdQueue->getCsr(), neoDevice->getInternalEngine().commandStreamReceiver); } TEST_F(CommandListCreate, givenInternalUsageCommandListThenIsInternalReturnsTrue) { const ze_command_queue_desc_t desc = {}; ze_result_t returnValue; std::unique_ptr commandList0(CommandList::createImmediate(productFamily, device, &desc, true, NEO::EngineGroupType::RenderCompute, returnValue)); EXPECT_TRUE(commandList0->isInternal()); } TEST_F(CommandListCreate, givenNonInternalUsageCommandListThenIsInternalReturnsFalse) { const ze_command_queue_desc_t desc = {}; ze_result_t returnValue; std::unique_ptr commandList0(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); EXPECT_FALSE(commandList0->isInternal()); } TEST_F(CommandListCreate, givenImmediateCommandListThenCustomNumIddPerBlockUsed) { const ze_command_queue_desc_t desc = {}; ze_result_t returnValue; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); ASSERT_NE(nullptr, commandList); const uint32_t cmdListImmediateIdds = CommandList::commandListimmediateIddsPerBlock; EXPECT_EQ(cmdListImmediateIdds, commandList->commandContainer.getNumIddPerBlock()); } TEST_F(CommandListCreate, whenCreatingImmediateCommandListThenItHasImmediateCommandQueueCreated) { const ze_command_queue_desc_t desc = {}; ze_result_t returnValue; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); ASSERT_NE(nullptr, commandList); EXPECT_EQ(device, commandList->device); EXPECT_EQ(1u, commandList->cmdListType); EXPECT_NE(nullptr, commandList->cmdQImmediate); } TEST_F(CommandListCreate, whenCreatingImmediateCommandListWithSyncModeThenItHasImmediateCommandQueueCreated) { ze_command_queue_desc_t desc = {}; desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; ze_result_t returnValue; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); ASSERT_NE(nullptr, commandList); EXPECT_EQ(device, commandList->device); EXPECT_EQ(1u, commandList->cmdListType); EXPECT_NE(nullptr, commandList->cmdQImmediate); } TEST_F(CommandListCreate, whenCreatingImmediateCommandListWithASyncModeThenItHasImmediateCommandQueueCreated) { ze_command_queue_desc_t desc = {}; desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; ze_result_t returnValue; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); ASSERT_NE(nullptr, commandList); EXPECT_EQ(device, commandList->device); EXPECT_EQ(1u, commandList->cmdListType); EXPECT_NE(nullptr, commandList->cmdQImmediate); } TEST_F(CommandListCreate, whenCreatingImmCmdListWithSyncModeAndAppendSignalEventThenUpdateTaskCountNeededFlagIsDisabled) { ze_command_queue_desc_t desc = {}; desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; ze_result_t returnValue; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); ASSERT_NE(nullptr, commandList); EXPECT_EQ(device, commandList->device); EXPECT_EQ(1u, commandList->cmdListType); EXPECT_NE(nullptr, commandList->cmdQImmediate); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; ze_event_handle_t event = nullptr; std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); ASSERT_NE(nullptr, eventPool); eventPool->createEvent(&eventDesc, &event); std::unique_ptr event_object(L0::Event::fromHandle(event)); ASSERT_NE(nullptr, event_object->csr); ASSERT_EQ(device->getNEODevice()->getDefaultEngine().commandStreamReceiver, event_object->csr); commandList->appendSignalEvent(event); auto result = event_object->hostSignal(); ASSERT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(event_object->queryStatus(), ZE_RESULT_SUCCESS); } TEST_F(CommandListCreate, whenCreatingImmCmdListWithSyncModeAndAppendBarrierThenUpdateTaskCountNeededFlagIsDisabled) { ze_command_queue_desc_t desc = {}; desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; ze_result_t returnValue; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); ASSERT_NE(nullptr, commandList); EXPECT_EQ(device, commandList->device); EXPECT_EQ(1u, commandList->cmdListType); EXPECT_NE(nullptr, commandList->cmdQImmediate); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; ze_event_handle_t event = nullptr; std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); ASSERT_NE(nullptr, eventPool); eventPool->createEvent(&eventDesc, &event); std::unique_ptr event_object(L0::Event::fromHandle(event)); ASSERT_NE(nullptr, event_object->csr); ASSERT_EQ(device->getNEODevice()->getDefaultEngine().commandStreamReceiver, event_object->csr); commandList->appendBarrier(nullptr, 1, &event); auto result = event_object->hostSignal(); ASSERT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(event_object->queryStatus(), ZE_RESULT_SUCCESS); commandList->appendBarrier(nullptr, 0, nullptr); } TEST_F(CommandListCreate, GivenGpuHangWhenCreatingImmCmdListWithSyncModeAndAppendBarrierThenAppendBarrierReturnsDeviceLost) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true); ze_command_queue_desc_t desc = {}; desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; ze_result_t returnValue; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue); ASSERT_NE(nullptr, commandList); EXPECT_EQ(device, commandList->device); EXPECT_EQ(CommandList::CommandListType::TYPE_IMMEDIATE, commandList->cmdListType); EXPECT_NE(nullptr, commandList->cmdQImmediate); MockCommandStreamReceiver mockCommandStreamReceiver(*neoDevice->executionEnvironment, neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield()); mockCommandStreamReceiver.waitForCompletionWithTimeoutReturnValue = WaitStatus::GpuHang; const auto oldCsr = commandList->csr; commandList->csr = &mockCommandStreamReceiver; const auto appendBarrierResult = commandList->appendBarrier(nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, appendBarrierResult); commandList->csr = oldCsr; } HWTEST_F(CommandListCreate, GivenGpuHangWhenCreatingImmediateCommandListAndAppendingSignalEventsThenDeviceLostIsReturned) { ze_command_queue_desc_t desc = {}; desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; ze_result_t returnValue; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue); ASSERT_NE(nullptr, commandList); EXPECT_EQ(device, commandList->device); EXPECT_EQ(CommandList::CommandListType::TYPE_IMMEDIATE, commandList->cmdListType); EXPECT_NE(nullptr, commandList->cmdQImmediate); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; ze_event_handle_t event = nullptr; std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); ASSERT_NE(nullptr, eventPool); eventPool->createEvent(&eventDesc, &event); std::unique_ptr event_object(L0::Event::fromHandle(event)); ASSERT_NE(nullptr, event_object->csr); ASSERT_EQ(static_cast(device)->getNEODevice()->getDefaultEngine().commandStreamReceiver, event_object->csr); returnValue = commandList->appendWaitOnEvents(1, &event); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); returnValue = commandList->appendBarrier(nullptr, 1, &event); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); MockCommandStreamReceiver mockCommandStreamReceiver(*neoDevice->executionEnvironment, neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield()); mockCommandStreamReceiver.waitForCompletionWithTimeoutReturnValue = WaitStatus::GpuHang; const auto oldCsr = commandList->csr; commandList->csr = &mockCommandStreamReceiver; returnValue = commandList->appendSignalEvent(event); EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, returnValue); commandList->csr = oldCsr; } HWTEST_F(CommandListCreate, GivenGpuHangWhenCreatingImmediateCommandListAndAppendingEventResetThenDeviceLostIsReturned) { ze_command_queue_desc_t desc = {}; desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; ze_result_t returnValue; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue); ASSERT_NE(nullptr, commandList); EXPECT_EQ(device, commandList->device); EXPECT_EQ(CommandList::CommandListType::TYPE_IMMEDIATE, commandList->cmdListType); EXPECT_NE(nullptr, commandList->cmdQImmediate); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; ze_event_handle_t event = nullptr; std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); ASSERT_NE(nullptr, eventPool); eventPool->createEvent(&eventDesc, &event); std::unique_ptr event_object(L0::Event::fromHandle(event)); ASSERT_NE(nullptr, event_object->csr); ASSERT_EQ(static_cast(device)->getNEODevice()->getDefaultEngine().commandStreamReceiver, event_object->csr); returnValue = commandList->appendWaitOnEvents(1, &event); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); returnValue = commandList->appendBarrier(nullptr, 1, &event); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); returnValue = commandList->appendSignalEvent(event); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); returnValue = event_object->hostSignal(); ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue); EXPECT_EQ(ZE_RESULT_SUCCESS, event_object->queryStatus()); MockCommandStreamReceiver mockCommandStreamReceiver(*neoDevice->executionEnvironment, neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield()); mockCommandStreamReceiver.waitForCompletionWithTimeoutReturnValue = WaitStatus::GpuHang; const auto oldCsr = commandList->csr; commandList->csr = &mockCommandStreamReceiver; returnValue = commandList->appendEventReset(event); EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, returnValue); commandList->csr = oldCsr; } HWTEST_F(CommandListCreate, GivenGpuHangAndEnabledFlushTaskSubmissionFlagWhenCreatingImmediateCommandListAndAppendingWaitOnEventsThenDeviceLostIsReturned) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true); ze_command_queue_desc_t desc = {}; desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; ze_result_t returnValue; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); ASSERT_NE(nullptr, commandList); ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue); EXPECT_EQ(device, commandList->device); EXPECT_EQ(1u, commandList->cmdListType); EXPECT_NE(nullptr, commandList->cmdQImmediate); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; ze_event_handle_t event = nullptr; std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); ASSERT_NE(nullptr, eventPool); eventPool->createEvent(&eventDesc, &event); std::unique_ptr event_object(L0::Event::fromHandle(event)); ASSERT_NE(nullptr, event_object->csr); ASSERT_EQ(static_cast(device)->getNEODevice()->getDefaultEngine().commandStreamReceiver, event_object->csr); MockCommandStreamReceiver mockCommandStreamReceiver(*neoDevice->executionEnvironment, neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield()); mockCommandStreamReceiver.waitForCompletionWithTimeoutReturnValue = WaitStatus::GpuHang; const auto oldCsr = commandList->csr; commandList->csr = &mockCommandStreamReceiver; returnValue = commandList->appendWaitOnEvents(1, &event); EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, returnValue); commandList->csr = oldCsr; } TEST_F(CommandListCreate, whenCreatingImmCmdListWithSyncModeAndAppendResetEventThenUpdateTaskCountNeededFlagIsDisabled) { ze_command_queue_desc_t desc = {}; desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; ze_result_t returnValue; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); ASSERT_NE(nullptr, commandList); EXPECT_EQ(device, commandList->device); EXPECT_EQ(1u, commandList->cmdListType); EXPECT_NE(nullptr, commandList->cmdQImmediate); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; ze_event_handle_t event = nullptr; std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); ASSERT_NE(nullptr, eventPool); eventPool->createEvent(&eventDesc, &event); std::unique_ptr event_object(L0::Event::fromHandle(event)); ASSERT_NE(nullptr, event_object->csr); ASSERT_EQ(device->getNEODevice()->getDefaultEngine().commandStreamReceiver, event_object->csr); commandList->appendEventReset(event); auto result = event_object->hostSignal(); ASSERT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(event_object->queryStatus(), ZE_RESULT_SUCCESS); } TEST_F(CommandListCreate, whenCreatingImmCmdListWithASyncModeAndAppendSignalEventThenUpdateTaskCountNeededFlagIsEnabled) { ze_command_queue_desc_t desc = {}; desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; ze_result_t returnValue; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); ASSERT_NE(nullptr, commandList); EXPECT_EQ(device, commandList->device); EXPECT_EQ(1u, commandList->cmdListType); EXPECT_NE(nullptr, commandList->cmdQImmediate); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = 0; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; ze_event_handle_t event = nullptr; std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); ASSERT_NE(nullptr, eventPool); eventPool->createEvent(&eventDesc, &event); std::unique_ptr event_object(L0::Event::fromHandle(event)); ASSERT_NE(nullptr, event_object->csr); ASSERT_EQ(device->getNEODevice()->getDefaultEngine().commandStreamReceiver, event_object->csr); commandList->appendSignalEvent(event); auto result = event_object->hostSignal(); ASSERT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(event_object->queryStatus(), ZE_RESULT_SUCCESS); } TEST_F(CommandListCreate, whenCreatingImmCmdListWithASyncModeAndAppendBarrierThenUpdateTaskCountNeededFlagIsEnabled) { ze_command_queue_desc_t desc = {}; desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; ze_result_t returnValue; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); ASSERT_NE(nullptr, commandList); EXPECT_EQ(device, commandList->device); EXPECT_EQ(1u, commandList->cmdListType); EXPECT_NE(nullptr, commandList->cmdQImmediate); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; ze_event_handle_t event = nullptr; std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); ASSERT_NE(nullptr, eventPool); eventPool->createEvent(&eventDesc, &event); std::unique_ptr event_object(L0::Event::fromHandle(event)); ASSERT_NE(nullptr, event_object->csr); ASSERT_EQ(device->getNEODevice()->getDefaultEngine().commandStreamReceiver, event_object->csr); commandList->appendBarrier(event, 0, nullptr); auto result = event_object->hostSignal(); ASSERT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(event_object->queryStatus(), ZE_RESULT_SUCCESS); commandList->appendBarrier(nullptr, 0, nullptr); } TEST_F(CommandListCreate, whenCreatingImmCmdListWithASyncModeAndCopyEngineAndAppendBarrierThenUpdateTaskCountNeededFlagIsEnabled) { ze_command_queue_desc_t desc = {}; desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; ze_result_t returnValue; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::Copy, returnValue)); ASSERT_NE(nullptr, commandList); EXPECT_EQ(device, commandList->device); EXPECT_EQ(1u, commandList->cmdListType); EXPECT_NE(nullptr, commandList->cmdQImmediate); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; ze_event_handle_t event = nullptr; std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); ASSERT_NE(nullptr, eventPool); eventPool->createEvent(&eventDesc, &event); std::unique_ptr event_object(L0::Event::fromHandle(event)); ASSERT_NE(nullptr, event_object->csr); ASSERT_EQ(device->getNEODevice()->getDefaultEngine().commandStreamReceiver, event_object->csr); commandList->appendBarrier(event, 0, nullptr); auto result = event_object->hostSignal(); ASSERT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(event_object->queryStatus(), ZE_RESULT_SUCCESS); commandList->appendBarrier(nullptr, 0, nullptr); } TEST_F(CommandListCreate, whenCreatingImmCmdListWithASyncModeAndAppendEventResetThenUpdateTaskCountNeededFlagIsEnabled) { ze_command_queue_desc_t desc = {}; desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; ze_result_t returnValue; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); ASSERT_NE(nullptr, commandList); EXPECT_EQ(device, commandList->device); EXPECT_EQ(1u, commandList->cmdListType); EXPECT_NE(nullptr, commandList->cmdQImmediate); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; ze_event_handle_t event = nullptr; std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); ASSERT_NE(nullptr, eventPool); eventPool->createEvent(&eventDesc, &event); std::unique_ptr event_object(L0::Event::fromHandle(event)); ASSERT_NE(nullptr, event_object->csr); ASSERT_EQ(device->getNEODevice()->getDefaultEngine().commandStreamReceiver, event_object->csr); commandList->appendEventReset(event); auto result = event_object->hostSignal(); ASSERT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(event_object->queryStatus(), ZE_RESULT_SUCCESS); } TEST_F(CommandListCreate, whenInvokingAppendMemoryCopyFromContextForImmediateCommandListWithSyncModeThenSuccessIsReturned) { ze_command_queue_desc_t desc = {}; desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; ze_result_t returnValue; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::Copy, returnValue)); ASSERT_NE(nullptr, commandList); EXPECT_EQ(device, commandList->device); EXPECT_EQ(CommandList::CommandListType::TYPE_IMMEDIATE, commandList->cmdListType); EXPECT_NE(nullptr, commandList->cmdQImmediate); void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x2345); auto result = commandList->appendMemoryCopyFromContext(dstPtr, nullptr, srcPtr, 8, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } struct CommandListCreateWithDeferredOsContextInitialization : ContextCommandListCreate { void SetUp() override { DebugManager.flags.DeferOsContextInitialization.set(1); ContextCommandListCreate::SetUp(); } void TearDown() override { ContextCommandListCreate::TearDown(); } DebugManagerStateRestore restore; }; TEST_F(ContextCommandListCreate, givenDeferredEngineCreationWhenImmediateCommandListIsCreatedThenEngineIsInitialized) { uint32_t groupsCount{}; EXPECT_EQ(ZE_RESULT_SUCCESS, device->getCommandQueueGroupProperties(&groupsCount, nullptr)); auto groups = std::vector(groupsCount); EXPECT_EQ(ZE_RESULT_SUCCESS, device->getCommandQueueGroupProperties(&groupsCount, groups.data())); for (uint32_t groupIndex = 0u; groupIndex < groupsCount; groupIndex++) { const auto &group = groups[groupIndex]; for (uint32_t queueIndex = 0; queueIndex < group.numQueues; queueIndex++) { CommandStreamReceiver *expectedCsr{}; EXPECT_EQ(ZE_RESULT_SUCCESS, device->getCsrForOrdinalAndIndex(&expectedCsr, groupIndex, queueIndex)); ze_command_queue_desc_t desc = {}; desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; desc.ordinal = groupIndex; desc.index = queueIndex; ze_command_list_handle_t cmdListHandle; ze_result_t result = context->createCommandListImmediate(device, &desc, &cmdListHandle); L0::CommandList *cmdList = L0::CommandList::fromHandle(cmdListHandle); EXPECT_EQ(device, cmdList->device); EXPECT_EQ(CommandList::CommandListType::TYPE_IMMEDIATE, cmdList->cmdListType); EXPECT_NE(nullptr, cmdList); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_TRUE(expectedCsr->getOsContext().isInitialized()); EXPECT_EQ(ZE_RESULT_SUCCESS, cmdList->destroy()); } } } TEST_F(CommandListCreate, whenInvokingAppendMemoryCopyFromContextForImmediateCommandListWithASyncModeThenSuccessIsReturned) { ze_command_queue_desc_t desc = {}; desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; ze_result_t returnValue; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::Copy, returnValue)); ASSERT_NE(nullptr, commandList); EXPECT_EQ(device, commandList->device); EXPECT_EQ(CommandList::CommandListType::TYPE_IMMEDIATE, commandList->cmdListType); EXPECT_NE(nullptr, commandList->cmdQImmediate); void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x2345); auto result = commandList->appendMemoryCopyFromContext(dstPtr, nullptr, srcPtr, 8, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(CommandListCreate, whenInvokingAppendMemoryCopyFromContextForImmediateCommandListThenSuccessIsReturned) { const ze_command_queue_desc_t desc = {}; ze_result_t returnValue; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::Copy, returnValue)); ASSERT_NE(nullptr, commandList); EXPECT_EQ(device, commandList->device); EXPECT_EQ(CommandList::CommandListType::TYPE_IMMEDIATE, commandList->cmdListType); EXPECT_NE(nullptr, commandList->cmdQImmediate); void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x2345); auto result = commandList->appendMemoryCopyFromContext(dstPtr, nullptr, srcPtr, 8, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(CommandListCreate, givenQueueDescriptionwhenCreatingImmediateCommandListForEveryEnigneThenItHasImmediateCommandQueueCreated) { auto &engineGroups = neoDevice->getRegularEngineGroups(); for (uint32_t ordinal = 0; ordinal < engineGroups.size(); ordinal++) { for (uint32_t index = 0; index < engineGroups[ordinal].engines.size(); index++) { ze_command_queue_desc_t desc = {}; desc.ordinal = ordinal; desc.index = index; ze_result_t returnValue; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); ASSERT_NE(nullptr, commandList); EXPECT_EQ(device, commandList->device); EXPECT_EQ(CommandList::CommandListType::TYPE_IMMEDIATE, commandList->cmdListType); EXPECT_NE(nullptr, commandList->cmdQImmediate); } } } TEST_F(CommandListCreate, givenInvalidProductFamilyThenReturnsNullPointer) { ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(IGFX_UNKNOWN, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); EXPECT_EQ(nullptr, commandList); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandListCreate, whenCommandListIsCreatedThenPCAndStateBaseAddressCmdsAreAddedAndCorrectlyProgrammed) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.UseBindlessMode.set(0); using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); auto &commandContainer = commandList->commandContainer; auto gmmHelper = commandContainer.getDevice()->getGmmHelper(); ASSERT_NE(nullptr, commandContainer.getCommandStream()); auto usedSpaceBefore = commandContainer.getCommandStream()->getUsed(); auto result = commandList->close(); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandContainer.getCommandStream()->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); auto itorPc = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itorPc); auto cmdPc = genCmdCast(*itorPc); EXPECT_TRUE(cmdPc->getDcFlushEnable()); EXPECT_TRUE(cmdPc->getCommandStreamerStallEnable()); EXPECT_TRUE(cmdPc->getTextureCacheInvalidationEnable()); auto itor = find(itorPc, cmdList.end()); ASSERT_NE(cmdList.end(), itor); auto cmdSba = genCmdCast(*itor); auto dsh = commandContainer.getIndirectHeap(NEO::HeapType::DYNAMIC_STATE); auto ioh = commandContainer.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT); auto ssh = commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE); EXPECT_TRUE(cmdSba->getDynamicStateBaseAddressModifyEnable()); EXPECT_TRUE(cmdSba->getDynamicStateBufferSizeModifyEnable()); EXPECT_EQ(dsh->getHeapGpuBase(), cmdSba->getDynamicStateBaseAddress()); EXPECT_EQ(dsh->getHeapSizeInPages(), cmdSba->getDynamicStateBufferSize()); EXPECT_TRUE(cmdSba->getIndirectObjectBaseAddressModifyEnable()); EXPECT_TRUE(cmdSba->getIndirectObjectBufferSizeModifyEnable()); EXPECT_EQ(ioh->getHeapGpuBase(), cmdSba->getIndirectObjectBaseAddress()); EXPECT_EQ(ioh->getHeapSizeInPages(), cmdSba->getIndirectObjectBufferSize()); EXPECT_TRUE(cmdSba->getSurfaceStateBaseAddressModifyEnable()); EXPECT_EQ(ssh->getHeapGpuBase(), cmdSba->getSurfaceStateBaseAddress()); EXPECT_EQ(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER), cmdSba->getStatelessDataPortAccessMemoryObjectControlState()); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandListCreate, whenBindlessModeEnabledWhenCommandListIsCreatedThenStateBaseAddressCmdsIsNotAdded) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.UseBindlessMode.set(1); using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); auto &commandContainer = commandList->commandContainer; ASSERT_NE(nullptr, commandContainer.getCommandStream()); auto usedSpaceBefore = commandContainer.getCommandStream()->getUsed(); auto result = commandList->close(); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandContainer.getCommandStream()->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); auto itor = find(cmdList.begin(), cmdList.end()); ASSERT_EQ(cmdList.end(), itor); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandListCreate, whenBindlessModeEnabledWhenCommandListImmediateIsCreatedThenStateBaseAddressCmdsIsNotAdded) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.UseBindlessMode.set(1); using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; ze_command_queue_desc_t desc = {}; desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; ze_result_t returnValue; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); auto &commandContainer = commandList->commandContainer; ASSERT_NE(nullptr, commandContainer.getCommandStream()); auto usedSpaceBefore = commandContainer.getCommandStream()->getUsed(); auto result = commandList->close(); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandContainer.getCommandStream()->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); auto itor = find(cmdList.begin(), cmdList.end()); ASSERT_EQ(cmdList.end(), itor); } HWTEST_F(CommandListCreate, givenCommandListWithCopyOnlyWhenCreatedThenStateBaseAddressCmdIsNotProgrammedAndHeapIsNotAllocated) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Copy, 0u, returnValue)); auto &commandContainer = commandList->commandContainer; GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(cmdList.end(), itor); for (uint32_t i = 0; i < NEO::HeapType::NUM_TYPES; i++) { ASSERT_EQ(commandList->commandContainer.getIndirectHeap(static_cast(i)), nullptr); ASSERT_EQ(commandList->commandContainer.getIndirectHeapAllocation(static_cast(i)), nullptr); } } HWTEST_F(CommandListCreate, givenCommandListWithCopyOnlyWhenSetBarrierThenMiFlushDWIsProgrammed) { using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Copy, 0u, returnValue)); auto &commandContainer = commandList->commandContainer; commandList->appendBarrier(nullptr, 0, nullptr); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); } HWTEST_F(CommandListCreate, givenImmediateCommandListWithCopyOnlyWhenSetBarrierThenMiFlushCmdIsNotInsertedInTheCmdContainer) { using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; ze_command_queue_desc_t desc = {}; desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; ze_result_t returnValue; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::Copy, returnValue)); ASSERT_NE(nullptr, commandList); EXPECT_EQ(device, commandList->device); EXPECT_EQ(1u, commandList->cmdListType); EXPECT_NE(nullptr, commandList->cmdQImmediate); auto &commandContainer = commandList->commandContainer; commandList->appendBarrier(nullptr, 0, nullptr); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(cmdList.end(), itor); } HWTEST_F(CommandListCreate, whenCommandListIsResetThenContainsStatelessUncachedResourceIsSetToFalse) { ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Compute, 0u, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); returnValue = commandList->reset(); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); EXPECT_FALSE(commandList->getContainsStatelessUncachedResource()); } HWTEST_F(CommandListCreate, givenBindlessModeEnabledWhenCommandListsResetThenSbaNotReloaded) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.UseBindlessMode.set(1); using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Compute, 0u, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); returnValue = commandList->reset(); auto usedAfter = commandList->commandContainer.getCommandStream()->getUsed(); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), usedAfter)); auto itor = find(cmdList.begin(), cmdList.end()); ASSERT_EQ(cmdList.end(), itor); } HWTEST_F(CommandListCreate, givenBindlessModeDisabledWhenCommandListsResetThenSbaReloaded) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.UseBindlessMode.set(0); using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Compute, 0u, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); returnValue = commandList->reset(); auto usedAfter = commandList->commandContainer.getCommandStream()->getUsed(); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), usedAfter)); auto itor = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itor); } HWTEST_F(CommandListCreate, givenCommandListWithCopyOnlyWhenResetThenStateBaseAddressNotProgrammed) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Copy, 0u, returnValue)); auto &commandContainer = commandList->commandContainer; commandList->reset(); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(cmdList.end(), itor); } HWTEST_F(CommandListCreate, givenCommandListWhenSetBarrierThenPipeControlIsProgrammed) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); auto &commandContainer = commandList->commandContainer; commandList->appendBarrier(nullptr, 0, nullptr); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); } HWTEST2_F(CommandListCreate, givenCommandListWhenAppendingBarrierThenPipeControlIsProgrammedAndHdcFlushIsSet, IsAtLeastXeHpCore) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); auto &commandContainer = commandList->commandContainer; size_t usedBefore = commandContainer.getCommandStream()->getUsed(); returnValue = commandList->appendBarrier(nullptr, 0, nullptr); EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), usedBefore), commandContainer.getCommandStream()->getUsed() - usedBefore)); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); auto pipeControlCmd = reinterpret_cast(*itor); EXPECT_TRUE(UnitTestHelper::getPipeControlHdcPipelineFlush(*pipeControlCmd)); } HWTEST2_F(CommandListCreate, givenCommandListWhenAppendingBarrierThenPipeControlIsProgrammedWithHdcAndUntypedFlushSet, IsAtLeastXeHpgCore) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); auto &commandContainer = commandList->commandContainer; size_t usedBefore = commandContainer.getCommandStream()->getUsed(); returnValue = commandList->appendBarrier(nullptr, 0, nullptr); EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), usedBefore), commandContainer.getCommandStream()->getUsed() - usedBefore)); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); auto pipeControlCmd = reinterpret_cast(*itor); EXPECT_TRUE(UnitTestHelper::getPipeControlHdcPipelineFlush(*pipeControlCmd)); EXPECT_TRUE(pipeControlCmd->getUnTypedDataPortCacheFlush()); } HWTEST_F(CommandListCreate, givenCommandListWhenAppendingBarrierWithIncorrectWaitEventsThenInvalidArgumentIsReturned) { ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); returnValue = commandList->appendBarrier(nullptr, 4u, nullptr); EXPECT_EQ(returnValue, ZE_RESULT_ERROR_INVALID_ARGUMENT); } HWTEST2_F(CommandListCreate, givenCopyCommandListWhenProfilingBeforeCommandForCopyOnlyThenCommandsHaveCorrectEventOffsets, IsAtLeastSkl) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; ze_result_t result = ZE_RESULT_SUCCESS; auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); auto baseAddr = event->getGpuAddress(device); auto contextOffset = event->getContextStartOffset(); auto globalOffset = event->getGlobalStartOffset(); EXPECT_EQ(baseAddr, event->getPacketAddress(device)); commandList->appendEventForProfilingCopyCommand(event->toHandle(), true); EXPECT_EQ(1u, event->getPacketsInUse()); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); auto cmd = genCmdCast(*itor); EXPECT_EQ(cmd->getRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW); EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, globalOffset)); EXPECT_NE(cmdList.end(), ++itor); cmd = genCmdCast(*itor); EXPECT_EQ(cmd->getRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW); EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, contextOffset)); } HWTEST2_F(CommandListCreate, givenCopyCommandListWhenProfilingAfterCommandForCopyOnlyThenCommandsHaveCorrectEventOffsets, IsAtLeastSkl) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; ze_result_t result = ZE_RESULT_SUCCESS; auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); commandList->appendEventForProfilingCopyCommand(event->toHandle(), false); auto contextOffset = event->getContextEndOffset(); auto globalOffset = event->getGlobalEndOffset(); auto baseAddr = event->getGpuAddress(device); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); auto cmd = genCmdCast(*itor); EXPECT_EQ(cmd->getRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW); EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, globalOffset)); EXPECT_NE(cmdList.end(), ++itor); cmd = genCmdCast(*itor); EXPECT_EQ(cmd->getRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW); EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, contextOffset)); } HWTEST2_F(CommandListCreate, givenNullEventWhenAppendEventAfterWalkerThenNothingAddedToStream, IsAtLeastSkl) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); auto usedBefore = commandList->commandContainer.getCommandStream()->getUsed(); commandList->appendSignalEventPostWalker(nullptr); EXPECT_EQ(commandList->commandContainer.getCommandStream()->getUsed(), usedBefore); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_2.cpp000066400000000000000000001713741422164147700324120ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/image/image_hw.h" #include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" namespace L0 { namespace ult { using CommandListCreate = Test; template class MockCommandListHw : public WhiteBox<::L0::CommandListCoreFamily> { public: MockCommandListHw() : WhiteBox<::L0::CommandListCoreFamily>() {} MockCommandListHw(bool failOnFirst) : WhiteBox<::L0::CommandListCoreFamily>(), failOnFirstCopy(failOnFirst) {} AlignedAllocationData getAlignedAllocation(L0::Device *device, const void *buffer, uint64_t bufferSize, bool allowHostCopy) override { getAlignedAllocationCalledTimes++; if (buffer) { return {0, 0, &alignedAlloc, true}; } return {0, 0, nullptr, false}; } ze_result_t appendMemoryCopyKernelWithGA(void *dstPtr, NEO::GraphicsAllocation *dstPtrAlloc, uint64_t dstOffset, void *srcPtr, NEO::GraphicsAllocation *srcPtrAlloc, uint64_t srcOffset, uint64_t size, uint64_t elementSize, Builtin builtin, ze_event_handle_t hSignalEvent, bool isStateless) override { appendMemoryCopyKernelWithGACalledTimes++; if (isStateless) appendMemoryCopyKernelWithGAStatelessCalledTimes++; if (failOnFirstCopy && (appendMemoryCopyKernelWithGACalledTimes == 1 || appendMemoryCopyKernelWithGAStatelessCalledTimes == 1)) { return ZE_RESULT_ERROR_UNKNOWN; } return ZE_RESULT_SUCCESS; } ze_result_t appendMemoryCopyBlit(uintptr_t dstPtr, NEO::GraphicsAllocation *dstPtrAlloc, uint64_t dstOffset, uintptr_t srcPtr, NEO::GraphicsAllocation *srcPtrAlloc, uint64_t srcOffset, uint64_t size) override { appendMemoryCopyBlitCalledTimes++; if (failOnFirstCopy && appendMemoryCopyBlitCalledTimes == 1) { return ZE_RESULT_ERROR_UNKNOWN; } return ZE_RESULT_SUCCESS; } ze_result_t appendMemoryCopyBlitRegion(NEO::GraphicsAllocation *srcAllocation, NEO::GraphicsAllocation *dstAllocation, size_t srcOffset, size_t dstOffset, ze_copy_region_t srcRegion, ze_copy_region_t dstRegion, const Vec3 ©Size, size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, const Vec3 &srcSize, const Vec3 &dstSize, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override { appendMemoryCopyBlitRegionCalledTimes++; return ZE_RESULT_SUCCESS; } ze_result_t appendMemoryCopyKernel2d(AlignedAllocationData *dstAlignedAllocation, AlignedAllocationData *srcAlignedAllocation, Builtin builtin, const ze_copy_region_t *dstRegion, uint32_t dstPitch, size_t dstOffset, const ze_copy_region_t *srcRegion, uint32_t srcPitch, size_t srcOffset, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override { appendMemoryCopyKernel2dCalledTimes++; return ZE_RESULT_SUCCESS; } ze_result_t appendMemoryCopyKernel3d(AlignedAllocationData *dstAlignedAllocation, AlignedAllocationData *srcAlignedAllocation, Builtin builtin, const ze_copy_region_t *dstRegion, uint32_t dstPitch, uint32_t dstSlicePitch, size_t dstOffset, const ze_copy_region_t *srcRegion, uint32_t srcPitch, uint32_t srcSlicePitch, size_t srcOffset, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override { appendMemoryCopyKernel3dCalledTimes++; return ZE_RESULT_SUCCESS; } ze_result_t appendBlitFill(void *ptr, const void *pattern, size_t patternSize, size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override { appendBlitFillCalledTimes++; return ZE_RESULT_SUCCESS; } ze_result_t appendCopyImageBlit(NEO::GraphicsAllocation *src, NEO::GraphicsAllocation *dst, const Vec3 &srcOffsets, const Vec3 &dstOffsets, size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, size_t bytesPerPixel, const Vec3 ©Size, const Vec3 &srcSize, const Vec3 &dstSize, ze_event_handle_t hSignalEvent) override { appendCopyImageBlitCalledTimes++; appendImageRegionCopySize = copySize; appendImageRegionSrcOrigin = srcOffsets; appendImageRegionDstOrigin = dstOffsets; return ZE_RESULT_SUCCESS; } uint32_t appendMemoryCopyKernelWithGACalledTimes = 0; uint32_t appendMemoryCopyKernelWithGAStatelessCalledTimes = 0; uint32_t appendMemoryCopyBlitCalledTimes = 0; uint32_t appendMemoryCopyBlitRegionCalledTimes = 0; uint32_t appendMemoryCopyKernel2dCalledTimes = 0; uint32_t appendMemoryCopyKernel3dCalledTimes = 0; uint32_t appendBlitFillCalledTimes = 0; uint32_t appendCopyImageBlitCalledTimes = 0; uint32_t getAlignedAllocationCalledTimes = 0; Vec3 appendImageRegionCopySize = {0, 0, 0}; Vec3 appendImageRegionSrcOrigin = {9, 9, 9}; Vec3 appendImageRegionDstOrigin = {9, 9, 9}; bool failOnFirstCopy = false; uint8_t mockAlignedAllocData[2 * MemoryConstants::pageSize]{}; void *alignedDataPtr = alignUp(mockAlignedAllocData, MemoryConstants::pageSize); NEO::MockGraphicsAllocation alignedAlloc{alignedDataPtr, reinterpret_cast(alignedDataPtr), MemoryConstants::pageSize}; }; HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyCalledWithNullDstPtrThenAppendMemoryCopyWithappendMemoryCopyReturnsError, IsAtLeastSkl) { MockCommandListHw cmdList; cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = nullptr; ze_result_t ret = cmdList.appendMemoryCopy(dstPtr, srcPtr, 0x1001, nullptr, 0, nullptr); EXPECT_GT(cmdList.getAlignedAllocationCalledTimes, 0u); EXPECT_EQ(ret, ZE_RESULT_ERROR_UNKNOWN); } HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyCalledWithNullSrcPtrThenAppendMemoryCopyWithappendMemoryCopyReturnsError, IsAtLeastSkl) { MockCommandListHw cmdList; cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); void *srcPtr = nullptr; void *dstPtr = reinterpret_cast(0x2345); ze_result_t ret = cmdList.appendMemoryCopy(dstPtr, srcPtr, 0x1001, nullptr, 0, nullptr); EXPECT_GT(cmdList.getAlignedAllocationCalledTimes, 0u); EXPECT_EQ(ret, ZE_RESULT_ERROR_UNKNOWN); } HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyCalledWithNullSrcPtrAndDstPtrThenAppendMemoryCopyWithappendMemoryCopyReturnsError, IsAtLeastSkl) { MockCommandListHw cmdList; cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); void *srcPtr = nullptr; void *dstPtr = nullptr; ze_result_t ret = cmdList.appendMemoryCopy(dstPtr, srcPtr, 0x1001, nullptr, 0, nullptr); EXPECT_GT(cmdList.getAlignedAllocationCalledTimes, 0u); EXPECT_EQ(ret, ZE_RESULT_ERROR_UNKNOWN); } HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyCalledThenAppendMemoryCopyWithappendMemoryCopyKernelWithGACalled, IsAtLeastSkl) { MockCommandListHw cmdList; cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x2345); cmdList.appendMemoryCopy(dstPtr, srcPtr, 0x1001, nullptr, 0, nullptr); EXPECT_GT(cmdList.appendMemoryCopyKernelWithGACalledTimes, 0u); EXPECT_EQ(cmdList.appendMemoryCopyBlitCalledTimes, 0u); } HWTEST2_F(CommandListCreate, givenCommandListWhen4GByteMemoryCopyCalledThenAppendMemoryCopyWithappendMemoryCopyKernelWithGAStatelessCalled, IsAtLeastSkl) { MockCommandListHw cmdList; cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x100001234); cmdList.appendMemoryCopy(dstPtr, srcPtr, 0x100000000, nullptr, 0, nullptr); EXPECT_GT(cmdList.appendMemoryCopyKernelWithGACalledTimes, 0u); EXPECT_GT(cmdList.appendMemoryCopyKernelWithGAStatelessCalledTimes, 0u); EXPECT_EQ(cmdList.appendMemoryCopyBlitCalledTimes, 0u); } HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyCalledThenAppendMemoryCopyWithappendMemoryCopyWithBliterCalled, IsAtLeastSkl) { MockCommandListHw cmdList; cmdList.initialize(device, NEO::EngineGroupType::Copy, 0u); void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x2345); cmdList.appendMemoryCopy(dstPtr, srcPtr, 0x1001, nullptr, 0, nullptr); EXPECT_EQ(cmdList.appendMemoryCopyKernelWithGACalledTimes, 0u); EXPECT_GT(cmdList.appendMemoryCopyBlitCalledTimes, 0u); } HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyRegionCalledThenAppendMemoryCopyWithappendMemoryCopyWithBliterCalled, IsAtLeastSkl) { MockCommandListHw cmdList; cmdList.initialize(device, NEO::EngineGroupType::Copy, 0u); void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x2345); ze_copy_region_t dstRegion = {}; ze_copy_region_t srcRegion = {}; cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr); EXPECT_GT(cmdList.appendMemoryCopyBlitRegionCalledTimes, 0u); } HWTEST2_F(CommandListCreate, givenCommandListWhenPageFaultCopyCalledThenappendPageFaultCopyWithappendMemoryCopyKernelWithGACalled, IsAtLeastSkl) { MockCommandListHw cmdList; size_t size = (sizeof(uint32_t) * 4); cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); NEO::MockGraphicsAllocation mockAllocationSrc(0, NEO::AllocationType::INTERNAL_HOST_MEMORY, reinterpret_cast(0x1234), size, 0, sizeof(uint32_t), MemoryPool::System4KBPages); NEO::MockGraphicsAllocation mockAllocationDst(0, NEO::AllocationType::INTERNAL_HOST_MEMORY, reinterpret_cast(0x2345), size, 0, sizeof(uint32_t), MemoryPool::System4KBPages); cmdList.appendPageFaultCopy(&mockAllocationDst, &mockAllocationSrc, size, false); EXPECT_EQ(cmdList.appendMemoryCopyKernelWithGACalledTimes, 1u); EXPECT_EQ(cmdList.appendMemoryCopyKernelWithGAStatelessCalledTimes, 0u); } HWTEST2_F(CommandListCreate, givenCommandListWhenPageFaultCopyCalledWithCopyEngineThenappendPageFaultCopyWithappendMemoryCopyKernelWithGACalled, IsAtLeastSkl) { MockCommandListHw cmdList; size_t size = (sizeof(uint32_t) * 4); cmdList.initialize(device, NEO::EngineGroupType::Copy, 0u); NEO::MockGraphicsAllocation mockAllocationSrc(0, NEO::AllocationType::INTERNAL_HOST_MEMORY, reinterpret_cast(0x1234), size, 0, sizeof(uint32_t), MemoryPool::System4KBPages); NEO::MockGraphicsAllocation mockAllocationDst(0, NEO::AllocationType::INTERNAL_HOST_MEMORY, reinterpret_cast(0x2345), size, 0, sizeof(uint32_t), MemoryPool::System4KBPages); cmdList.appendPageFaultCopy(&mockAllocationDst, &mockAllocationSrc, size, false); EXPECT_EQ(cmdList.appendMemoryCopyBlitCalledTimes, 1u); } HWTEST2_F(CommandListCreate, givenCommandListWhenPageFaultCopyCalledThenappendPageFaultCopyWithappendMemoryCopyKernelWithGACalledForMiddleAndRightSizesAreCalled, IsAtLeastSkl) { MockCommandListHw cmdList; size_t size = ((sizeof(uint32_t) * 4) + 1); cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); NEO::MockGraphicsAllocation mockAllocationSrc(0, NEO::AllocationType::INTERNAL_HOST_MEMORY, reinterpret_cast(0x1234), size, 0, sizeof(uint32_t), MemoryPool::System4KBPages); NEO::MockGraphicsAllocation mockAllocationDst(0, NEO::AllocationType::INTERNAL_HOST_MEMORY, reinterpret_cast(0x2345), size, 0, sizeof(uint32_t), MemoryPool::System4KBPages); cmdList.appendPageFaultCopy(&mockAllocationDst, &mockAllocationSrc, size, false); EXPECT_EQ(cmdList.appendMemoryCopyKernelWithGACalledTimes, 2u); EXPECT_EQ(cmdList.appendMemoryCopyKernelWithGAStatelessCalledTimes, 0u); } HWTEST2_F(CommandListCreate, givenCommandListWhenPageFaultCopyCalledAndErrorOnMidCopyThenappendPageFaultCopyWithappendMemoryCopyKernelWithGACalledForMiddleIsCalled, IsAtLeastSkl) { MockCommandListHw cmdList(true); size_t size = ((sizeof(uint32_t) * 4) + 1); cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); NEO::MockGraphicsAllocation mockAllocationSrc(0, NEO::AllocationType::INTERNAL_HOST_MEMORY, reinterpret_cast(0x1234), size, 0, sizeof(uint32_t), MemoryPool::System4KBPages); NEO::MockGraphicsAllocation mockAllocationDst(0, NEO::AllocationType::INTERNAL_HOST_MEMORY, reinterpret_cast(0x2345), size, 0, sizeof(uint32_t), MemoryPool::System4KBPages); cmdList.appendPageFaultCopy(&mockAllocationDst, &mockAllocationSrc, size, false); EXPECT_EQ(cmdList.appendMemoryCopyKernelWithGACalledTimes, 1u); EXPECT_EQ(cmdList.appendMemoryCopyKernelWithGAStatelessCalledTimes, 0u); } HWTEST2_F(CommandListCreate, givenCommandListWhenPageFaultCopyCalledWithCopyEngineThenappendPageFaultCopyWithappendMemoryCopyCalledOnlyOnce, IsAtLeastSkl) { MockCommandListHw cmdList; size_t size = ((sizeof(uint32_t) * 4) + 1); cmdList.initialize(device, NEO::EngineGroupType::Copy, 0u); NEO::MockGraphicsAllocation mockAllocationSrc(0, NEO::AllocationType::INTERNAL_HOST_MEMORY, reinterpret_cast(0x1234), size, 0, sizeof(uint32_t), MemoryPool::System4KBPages); NEO::MockGraphicsAllocation mockAllocationDst(0, NEO::AllocationType::INTERNAL_HOST_MEMORY, reinterpret_cast(0x2345), size, 0, sizeof(uint32_t), MemoryPool::System4KBPages); cmdList.appendPageFaultCopy(&mockAllocationDst, &mockAllocationSrc, size, false); EXPECT_EQ(cmdList.appendMemoryCopyBlitCalledTimes, 1u); } HWTEST2_F(CommandListCreate, givenCommandListWhenPageFaultCopyCalledWithCopyEngineAndErrorOnMidOperationThenappendPageFaultCopyWithappendMemoryCopyKernelWithGACalledForMiddleIsCalled, IsAtLeastSkl) { MockCommandListHw cmdList(true); size_t size = ((sizeof(uint32_t) * 4) + 1); cmdList.initialize(device, NEO::EngineGroupType::Copy, 0u); NEO::MockGraphicsAllocation mockAllocationSrc(0, NEO::AllocationType::INTERNAL_HOST_MEMORY, reinterpret_cast(0x1234), size, 0, sizeof(uint32_t), MemoryPool::System4KBPages); NEO::MockGraphicsAllocation mockAllocationDst(0, NEO::AllocationType::INTERNAL_HOST_MEMORY, reinterpret_cast(0x2345), size, 0, sizeof(uint32_t), MemoryPool::System4KBPages); cmdList.appendPageFaultCopy(&mockAllocationDst, &mockAllocationSrc, size, false); EXPECT_EQ(cmdList.appendMemoryCopyBlitCalledTimes, 1u); } HWTEST2_F(CommandListCreate, givenCommandListWhen4GBytePageFaultCopyCalledThenPageFaultCopyWithappendMemoryCopyKernelWithGAStatelessCalled, IsAtLeastSkl) { MockCommandListHw cmdList; size_t size = 0x100000000; cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); NEO::MockGraphicsAllocation mockAllocationSrc(0, NEO::AllocationType::INTERNAL_HOST_MEMORY, reinterpret_cast(0x1234), size, 0, sizeof(uint32_t), MemoryPool::System4KBPages); NEO::MockGraphicsAllocation mockAllocationDst(0, NEO::AllocationType::INTERNAL_HOST_MEMORY, reinterpret_cast(0x100003456), size, 0, sizeof(uint32_t), MemoryPool::System4KBPages); cmdList.appendPageFaultCopy(&mockAllocationDst, &mockAllocationSrc, size, false); EXPECT_EQ(cmdList.appendMemoryCopyKernelWithGACalledTimes, 1u); EXPECT_EQ(cmdList.appendMemoryCopyKernelWithGAStatelessCalledTimes, 1u); } HWTEST2_F(CommandListCreate, givenCommandListWhen4GBytePageFaultCopyCalledThenPageFaultCopyWithappendMemoryCopyKernelWithGAStatelessCalledForMiddleAndRightSizesAreCalled, IsAtLeastSkl) { MockCommandListHw cmdList; size_t size = 0x100000001; cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); NEO::MockGraphicsAllocation mockAllocationSrc(0, NEO::AllocationType::INTERNAL_HOST_MEMORY, reinterpret_cast(0x1234), size, 0, sizeof(uint32_t), MemoryPool::System4KBPages); NEO::MockGraphicsAllocation mockAllocationDst(0, NEO::AllocationType::INTERNAL_HOST_MEMORY, reinterpret_cast(0x100003456), size, 0, sizeof(uint32_t), MemoryPool::System4KBPages); cmdList.appendPageFaultCopy(&mockAllocationDst, &mockAllocationSrc, size, false); EXPECT_EQ(cmdList.appendMemoryCopyKernelWithGACalledTimes, 2u); EXPECT_EQ(cmdList.appendMemoryCopyKernelWithGAStatelessCalledTimes, 2u); } HWTEST2_F(CommandListCreate, givenCommandListAnd3DWhbufferenMemoryCopyRegionCalledThenCopyKernel3DCalled, IsAtLeastSkl) { MockCommandListHw cmdList; cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x2345); ze_copy_region_t dstRegion = {4, 4, 4, 2, 2, 2}; ze_copy_region_t srcRegion = {4, 4, 4, 2, 2, 2}; cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr); EXPECT_EQ(cmdList.appendMemoryCopyBlitRegionCalledTimes, 0u); EXPECT_GT(cmdList.appendMemoryCopyKernel3dCalledTimes, 0u); } HWTEST2_F(CommandListCreate, givenImmediateCommandListWhenAppendingMemoryCopyThenSuccessIsReturned, IsAtLeastSkl) { const ze_command_queue_desc_t desc = {}; bool internalEngine = true; ze_result_t returnValue; std::unique_ptr commandList0(CommandList::createImmediate(productFamily, device, &desc, internalEngine, NEO::EngineGroupType::RenderCompute, returnValue)); ASSERT_NE(nullptr, commandList0); CommandQueueImp *cmdQueue = reinterpret_cast(commandList0->cmdQImmediate); EXPECT_EQ(cmdQueue->getCsr(), neoDevice->getInternalEngine().commandStreamReceiver); void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x2345); auto result = commandList0->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); } HWTEST2_F(CommandListCreate, givenImmediateCommandListWhenAppendingMemoryCopyWithInvalidEventThenInvalidArgumentErrorIsReturned, IsAtLeastSkl) { const ze_command_queue_desc_t desc = {}; bool internalEngine = true; ze_result_t returnValue; std::unique_ptr commandList0(CommandList::createImmediate(productFamily, device, &desc, internalEngine, NEO::EngineGroupType::RenderCompute, returnValue)); ASSERT_NE(nullptr, commandList0); CommandQueueImp *cmdQueue = reinterpret_cast(commandList0->cmdQImmediate); EXPECT_EQ(cmdQueue->getCsr(), neoDevice->getInternalEngine().commandStreamReceiver); void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x2345); auto result = commandList0->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 1, nullptr); ASSERT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); } HWTEST2_F(CommandListCreate, givenCommandListAndHostPointersWhenMemoryCopyCalledThenPipeControlWithDcFlushAdded, IsAtLeastSkl) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; ze_result_t result; std::unique_ptr commandList0(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, result)); ASSERT_NE(nullptr, commandList0); void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x2345); result = commandList0->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto &commandContainer = commandList0->commandContainer; GenCmdList genCmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( genCmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); auto itor = find(genCmdList.begin(), genCmdList.end()); ASSERT_NE(genCmdList.end(), itor); PIPE_CONTROL *cmd = nullptr; while (itor != genCmdList.end()) { cmd = genCmdCast(*itor); itor = find(++itor, genCmdList.end()); } EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), cmd->getDcFlushEnable()); } HWTEST2_F(CommandListCreate, givenCommandListAnd2DWhbufferenMemoryCopyRegionCalledThenCopyKernel2DCalled, IsAtLeastSkl) { MockCommandListHw cmdList; cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x2345); ze_copy_region_t dstRegion = {4, 4, 0, 2, 2, 1}; ze_copy_region_t srcRegion = {4, 4, 0, 2, 2, 1}; cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr); EXPECT_EQ(cmdList.appendMemoryCopyBlitRegionCalledTimes, 0u); EXPECT_GT(cmdList.appendMemoryCopyKernel2dCalledTimes, 0u); } HWTEST2_F(CommandListCreate, givenImmediateCommandListWithFlushTaskEnabledWhenAppendingMemoryCopyRegionThenSuccessIsReturned, IsAtLeastXeHpCore) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(1); MockCommandListHw cmdList; cmdList.cmdListType = CommandList::CommandListType::TYPE_IMMEDIATE; cmdList.initialize(device, NEO::EngineGroupType::Compute, 0u); void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x2345); ze_copy_region_t dstRegion = {4, 4, 4, 2, 2, 2}; ze_copy_region_t srcRegion = {4, 4, 4, 2, 2, 2}; auto result = cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } HWTEST2_F(CommandListCreate, givenCopyOnlyCommandListWhenAppendMemoryFillCalledThenAppendBlitFillCalled, IsAtLeastSkl) { MockCommandListHw cmdList; cmdList.initialize(device, NEO::EngineGroupType::Copy, 0u); void *dstPtr = reinterpret_cast(0x1234); int pattern = 1; cmdList.appendMemoryFill(dstPtr, reinterpret_cast(&pattern), sizeof(pattern), 0, nullptr, 0, nullptr); EXPECT_GT(cmdList.appendBlitFillCalledTimes, 0u); } HWTEST2_F(CommandListCreate, givenCommandListWhenAppendMemoryFillCalledThenAppendBlitFillNotCalled, IsAtLeastSkl) { MockCommandListHw cmdList; cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); void *dstPtr = reinterpret_cast(0x1234); int pattern = 1; cmdList.appendMemoryFill(dstPtr, reinterpret_cast(&pattern), sizeof(pattern), 0, nullptr, 0, nullptr); EXPECT_EQ(cmdList.appendBlitFillCalledTimes, 0u); } HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyWithSignalEventsThenSemaphoreWaitAndPipeControlAreFound, IsAtLeastSkl) { using SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; ze_result_t result = ZE_RESULT_SUCCESS; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, result)); auto &commandContainer = commandList->commandContainer; void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x2345); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 2; auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); std::vector events; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event.get()); eventDesc.index = 1; auto event1 = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event1.get()); result = commandList->appendMemoryCopy(dstPtr, srcPtr, 0x1001, nullptr, 2u, events.data()); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = commandList->appendMemoryCopy(dstPtr, srcPtr, 0x1001, nullptr, 2u, events.data()); EXPECT_EQ(ZE_RESULT_SUCCESS, result); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); itor++; itor = find(itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); itor++; itor = find(itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); itor++; itor = find(itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); itor++; itor = find(itor, cmdList.end()); if (MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo)) { EXPECT_NE(cmdList.end(), itor); } else { EXPECT_EQ(cmdList.end(), itor); } } using platformSupport = IsWithinProducts; HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyWithSignalEventScopeSetToDeviceThenSinglePipeControlIsAddedWithDcFlush, platformSupport) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; ze_result_t result = ZE_RESULT_SUCCESS; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, result)); auto &commandContainer = commandList->commandContainer; void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x2345); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE; auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); result = commandList->appendMemoryCopy(dstPtr, srcPtr, 0x1001, event.get(), 0u, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); auto iterator = findAll(cmdList.begin(), cmdList.end()); bool postSyncFound = false; ASSERT_NE(0u, iterator.size()); uint32_t numPCs = 0; for (auto it : iterator) { auto cmd = genCmdCast(*it); numPCs++; if ((cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) && (cmd->getImmediateData() == Event::STATE_SIGNALED) && (cmd->getDcFlushEnable())) { postSyncFound = true; break; } } ASSERT_TRUE(postSyncFound); EXPECT_EQ(numPCs, iterator.size()); } HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyWithSignalEventScopeSetToSubDeviceThenB2BPipeControlIsAddedWithDcFlushForLastPC, platformSupport) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; ze_result_t result = ZE_RESULT_SUCCESS; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, result)); auto &commandContainer = commandList->commandContainer; void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x2345); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); ze_event_desc_t eventDesc = {}; eventDesc.index = 0; auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); result = commandList->appendMemoryCopy(dstPtr, srcPtr, 0x1001, event.get(), 0u, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); auto iterator = findAll(cmdList.begin(), cmdList.end()); bool postSyncFound = false; ASSERT_NE(0u, iterator.size()); uint32_t numPCs = 0; for (auto it : iterator) { auto cmd = genCmdCast(*it); numPCs++; if ((cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) && (cmd->getImmediateData() == Event::STATE_SIGNALED) && (!cmd->getDcFlushEnable())) { postSyncFound = true; break; } } ASSERT_TRUE(postSyncFound); EXPECT_EQ(numPCs, iterator.size() - 1); auto it = *(iterator.end() - 1); auto cmd1 = genCmdCast(*it); EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), cmd1->getDcFlushEnable()); } using ImageSupport = IsWithinProducts; HWTEST2_F(CommandListCreate, givenCopyCommandListWhenCopyFromMemoryToImageThenBlitImageCopyCalled, ImageSupport) { MockCommandListHw cmdList; cmdList.initialize(device, NEO::EngineGroupType::Copy, 0u); void *srcPtr = reinterpret_cast(0x1234); ze_image_desc_t zeDesc = {}; zeDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; auto imageHW = std::make_unique>>(); imageHW->initialize(device, &zeDesc); ze_image_region_t dstRegion = {4, 4, 4, 2, 2, 2}; cmdList.appendImageCopyFromMemory(imageHW->toHandle(), srcPtr, &dstRegion, nullptr, 0, nullptr); EXPECT_GT(cmdList.appendCopyImageBlitCalledTimes, 0u); } HWTEST2_F(CommandListCreate, givenCopyCommandListAndNullDestinationRegionWhenImageCopyFromMemoryThenBlitImageCopyCalledWithCorrectImageSize, ImageSupport) { MockCommandListHw cmdList; cmdList.initialize(device, NEO::EngineGroupType::Copy, 0u); void *srcPtr = reinterpret_cast(0x1234); ze_image_desc_t zeDesc = {}; zeDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; auto imageHW = std::make_unique>>(); imageHW->initialize(device, &zeDesc); zeDesc.height = 1; zeDesc.depth = 1; Vec3 expectedRegionCopySize = {zeDesc.width, zeDesc.height, zeDesc.depth}; Vec3 expectedRegionOrigin = {0, 0, 0}; cmdList.appendImageCopyFromMemory(imageHW->toHandle(), srcPtr, nullptr, nullptr, 0, nullptr); EXPECT_EQ(cmdList.appendImageRegionCopySize, expectedRegionCopySize); EXPECT_EQ(cmdList.appendImageRegionDstOrigin, expectedRegionOrigin); } HWTEST2_F(CommandListCreate, givenCopyCommandListAndNullDestinationRegionWhenImageCopyToMemoryThenBlitImageCopyCalledWithCorrectImageSize, ImageSupport) { MockCommandListHw cmdList; cmdList.initialize(device, NEO::EngineGroupType::Copy, 0u); void *dstPtr = reinterpret_cast(0x1234); ze_image_desc_t zeDesc = {}; zeDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; auto imageHW = std::make_unique>>(); imageHW->initialize(device, &zeDesc); zeDesc.height = 1; zeDesc.depth = 1; Vec3 expectedRegionCopySize = {zeDesc.width, zeDesc.height, zeDesc.depth}; Vec3 expectedRegionOrigin = {0, 0, 0}; cmdList.appendImageCopyToMemory(dstPtr, imageHW->toHandle(), nullptr, nullptr, 0, nullptr); EXPECT_EQ(cmdList.appendImageRegionCopySize, expectedRegionCopySize); EXPECT_EQ(cmdList.appendImageRegionSrcOrigin, expectedRegionOrigin); } HWTEST2_F(CommandListCreate, givenCopyCommandListAndNullDestinationRegionWhen1DImageCopyFromMemoryWithInvalidHeightAndDepthThenBlitImageCopyCalledWithCorrectImageSize, ImageSupport) { MockCommandListHw cmdList; cmdList.initialize(device, NEO::EngineGroupType::Copy, 0u); void *srcPtr = reinterpret_cast(0x1234); ze_image_desc_t zeDesc = {}; zeDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; zeDesc.type = ZE_IMAGE_TYPE_1D; zeDesc.height = 9; zeDesc.depth = 9; auto imageHW = std::make_unique>>(); imageHW->initialize(device, &zeDesc); zeDesc.height = 1; zeDesc.depth = 1; Vec3 expectedRegionCopySize = {zeDesc.width, zeDesc.height, zeDesc.depth}; Vec3 expectedRegionOrigin = {0, 0, 0}; cmdList.appendImageCopyFromMemory(imageHW->toHandle(), srcPtr, nullptr, nullptr, 0, nullptr); EXPECT_EQ(cmdList.appendImageRegionCopySize, expectedRegionCopySize); EXPECT_EQ(cmdList.appendImageRegionDstOrigin, expectedRegionOrigin); } HWTEST2_F(CommandListCreate, givenCopyCommandListAndNullDestinationRegionWhen1DImageCopyToMemoryWithInvalidHeightAndDepthThenBlitImageCopyCalledWithCorrectImageSize, ImageSupport) { MockCommandListHw cmdList; cmdList.initialize(device, NEO::EngineGroupType::Copy, 0u); void *dstPtr = reinterpret_cast(0x1234); ze_image_desc_t zeDesc = {}; zeDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; zeDesc.type = ZE_IMAGE_TYPE_1D; zeDesc.height = 9; zeDesc.depth = 9; auto imageHW = std::make_unique>>(); imageHW->initialize(device, &zeDesc); zeDesc.height = 1; zeDesc.depth = 1; Vec3 expectedRegionCopySize = {zeDesc.width, zeDesc.height, zeDesc.depth}; Vec3 expectedRegionOrigin = {0, 0, 0}; cmdList.appendImageCopyToMemory(dstPtr, imageHW->toHandle(), nullptr, nullptr, 0, nullptr); EXPECT_EQ(cmdList.appendImageRegionCopySize, expectedRegionCopySize); EXPECT_EQ(cmdList.appendImageRegionSrcOrigin, expectedRegionOrigin); } HWTEST2_F(CommandListCreate, givenCopyCommandListAndNullDestinationRegionWhen1DArrayImageCopyFromMemoryWithInvalidHeightAndDepthThenBlitImageCopyCalledWithCorrectImageSize, ImageSupport) { MockCommandListHw cmdList; cmdList.initialize(device, NEO::EngineGroupType::Copy, 0u); void *srcPtr = reinterpret_cast(0x1234); ze_image_desc_t zeDesc = {}; zeDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; zeDesc.type = ZE_IMAGE_TYPE_1DARRAY; zeDesc.height = 9; zeDesc.depth = 9; auto imageHW = std::make_unique>>(); imageHW->initialize(device, &zeDesc); zeDesc.height = 1; zeDesc.depth = 1; Vec3 expectedRegionCopySize = {zeDesc.width, zeDesc.height, zeDesc.depth}; Vec3 expectedRegionOrigin = {0, 0, 0}; cmdList.appendImageCopyFromMemory(imageHW->toHandle(), srcPtr, nullptr, nullptr, 0, nullptr); EXPECT_EQ(cmdList.appendImageRegionCopySize, expectedRegionCopySize); EXPECT_EQ(cmdList.appendImageRegionDstOrigin, expectedRegionOrigin); } HWTEST2_F(CommandListCreate, givenCopyCommandListAndNullDestinationRegionWhen1DArrayImageCopyToMemoryWithInvalidHeightAndDepthThenBlitImageCopyCalledWithCorrectImageSize, ImageSupport) { MockCommandListHw cmdList; cmdList.initialize(device, NEO::EngineGroupType::Copy, 0u); void *dstPtr = reinterpret_cast(0x1234); ze_image_desc_t zeDesc = {}; zeDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; zeDesc.type = ZE_IMAGE_TYPE_1DARRAY; zeDesc.height = 9; zeDesc.depth = 9; auto imageHW = std::make_unique>>(); imageHW->initialize(device, &zeDesc); zeDesc.height = 1; zeDesc.depth = 1; Vec3 expectedRegionCopySize = {zeDesc.width, zeDesc.height, zeDesc.depth}; Vec3 expectedRegionOrigin = {0, 0, 0}; cmdList.appendImageCopyToMemory(dstPtr, imageHW->toHandle(), nullptr, nullptr, 0, nullptr); EXPECT_EQ(cmdList.appendImageRegionCopySize, expectedRegionCopySize); EXPECT_EQ(cmdList.appendImageRegionSrcOrigin, expectedRegionOrigin); } HWTEST2_F(CommandListCreate, givenCopyCommandListAndNullDestinationRegionWhen2DImageCopyToMemoryThenBlitImageCopyCalledWithCorrectImageSize, ImageSupport) { MockCommandListHw cmdList; cmdList.initialize(device, NEO::EngineGroupType::Copy, 0u); void *dstPtr = reinterpret_cast(0x1234); ze_image_desc_t zeDesc = {}; zeDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; zeDesc.type = ZE_IMAGE_TYPE_2D; zeDesc.height = 2; auto imageHW = std::make_unique>>(); imageHW->initialize(device, &zeDesc); zeDesc.depth = 1; Vec3 expectedRegionCopySize = {zeDesc.width, zeDesc.height, zeDesc.depth}; Vec3 expectedRegionOrigin = {0, 0, 0}; cmdList.appendImageCopyToMemory(dstPtr, imageHW->toHandle(), nullptr, nullptr, 0, nullptr); EXPECT_EQ(cmdList.appendImageRegionCopySize, expectedRegionCopySize); EXPECT_EQ(cmdList.appendImageRegionSrcOrigin, expectedRegionOrigin); } HWTEST2_F(CommandListCreate, givenCopyCommandListAndNullDestinationRegionWhen2DImageCopyFromMemoryThenBlitImageCopyCalledWithCorrectImageSize, ImageSupport) { MockCommandListHw cmdList; cmdList.initialize(device, NEO::EngineGroupType::Copy, 0u); void *srcPtr = reinterpret_cast(0x1234); ze_image_desc_t zeDesc = {}; zeDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; zeDesc.type = ZE_IMAGE_TYPE_2D; zeDesc.height = 2; auto imageHW = std::make_unique>>(); imageHW->initialize(device, &zeDesc); zeDesc.depth = 1; Vec3 expectedRegionCopySize = {zeDesc.width, zeDesc.height, zeDesc.depth}; Vec3 expectedRegionOrigin = {0, 0, 0}; cmdList.appendImageCopyFromMemory(imageHW->toHandle(), srcPtr, nullptr, nullptr, 0, nullptr); EXPECT_EQ(cmdList.appendImageRegionCopySize, expectedRegionCopySize); EXPECT_EQ(cmdList.appendImageRegionDstOrigin, expectedRegionOrigin); } HWTEST2_F(CommandListCreate, givenCopyCommandListAndNullDestinationRegionWhen2DImageCopyToMemoryWithInvalidDepthThenBlitImageCopyCalledWithCorrectImageSize, ImageSupport) { MockCommandListHw cmdList; cmdList.initialize(device, NEO::EngineGroupType::Copy, 0u); void *dstPtr = reinterpret_cast(0x1234); ze_image_desc_t zeDesc = {}; zeDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; zeDesc.type = ZE_IMAGE_TYPE_2D; zeDesc.height = 2; zeDesc.depth = 9; auto imageHW = std::make_unique>>(); imageHW->initialize(device, &zeDesc); zeDesc.depth = 1; Vec3 expectedRegionCopySize = {zeDesc.width, zeDesc.height, zeDesc.depth}; Vec3 expectedRegionOrigin = {0, 0, 0}; cmdList.appendImageCopyToMemory(dstPtr, imageHW->toHandle(), nullptr, nullptr, 0, nullptr); EXPECT_EQ(cmdList.appendImageRegionCopySize, expectedRegionCopySize); EXPECT_EQ(cmdList.appendImageRegionSrcOrigin, expectedRegionOrigin); } HWTEST2_F(CommandListCreate, givenCopyCommandListAndNullDestinationRegionWhen2DImageCopyFromMemoryWithInvalidDepthThenBlitImageCopyCalledWithCorrectImageSize, ImageSupport) { MockCommandListHw cmdList; cmdList.initialize(device, NEO::EngineGroupType::Copy, 0u); void *srcPtr = reinterpret_cast(0x1234); ze_image_desc_t zeDesc = {}; zeDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; zeDesc.type = ZE_IMAGE_TYPE_2D; zeDesc.height = 2; zeDesc.depth = 9; auto imageHW = std::make_unique>>(); imageHW->initialize(device, &zeDesc); zeDesc.depth = 1; Vec3 expectedRegionCopySize = {zeDesc.width, zeDesc.height, zeDesc.depth}; Vec3 expectedRegionOrigin = {0, 0, 0}; cmdList.appendImageCopyFromMemory(imageHW->toHandle(), srcPtr, nullptr, nullptr, 0, nullptr); EXPECT_EQ(cmdList.appendImageRegionCopySize, expectedRegionCopySize); EXPECT_EQ(cmdList.appendImageRegionDstOrigin, expectedRegionOrigin); } HWTEST2_F(CommandListCreate, givenCopyCommandListAndNullDestinationRegionWhen3DImageCopyToMemoryThenBlitImageCopyCalledWithCorrectImageSize, ImageSupport) { MockCommandListHw cmdList; cmdList.initialize(device, NEO::EngineGroupType::Copy, 0u); void *dstPtr = reinterpret_cast(0x1234); ze_image_desc_t zeDesc = {}; zeDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; zeDesc.type = ZE_IMAGE_TYPE_3D; zeDesc.height = 2; zeDesc.depth = 2; auto imageHW = std::make_unique>>(); imageHW->initialize(device, &zeDesc); Vec3 expectedRegionCopySize = {zeDesc.width, zeDesc.height, zeDesc.depth}; Vec3 expectedRegionOrigin = {0, 0, 0}; cmdList.appendImageCopyToMemory(dstPtr, imageHW->toHandle(), nullptr, nullptr, 0, nullptr); EXPECT_EQ(cmdList.appendImageRegionCopySize, expectedRegionCopySize); EXPECT_EQ(cmdList.appendImageRegionSrcOrigin, expectedRegionOrigin); } HWTEST2_F(CommandListCreate, givenCopyCommandListAndNullDestinationRegionWhen3DImageCopyFromMemoryThenBlitImageCopyCalledWithCorrectImageSize, ImageSupport) { MockCommandListHw cmdList; cmdList.initialize(device, NEO::EngineGroupType::Copy, 0u); void *srcPtr = reinterpret_cast(0x1234); ze_image_desc_t zeDesc = {}; zeDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; zeDesc.type = ZE_IMAGE_TYPE_3D; zeDesc.height = 2; zeDesc.depth = 2; auto imageHW = std::make_unique>>(); imageHW->initialize(device, &zeDesc); Vec3 expectedRegionCopySize = {zeDesc.width, zeDesc.height, zeDesc.depth}; Vec3 expectedRegionOrigin = {0, 0, 0}; cmdList.appendImageCopyFromMemory(imageHW->toHandle(), srcPtr, nullptr, nullptr, 0, nullptr); EXPECT_EQ(cmdList.appendImageRegionCopySize, expectedRegionCopySize); EXPECT_EQ(cmdList.appendImageRegionDstOrigin, expectedRegionOrigin); } HWTEST2_F(CommandListCreate, givenCopyCommandListWhenCopyFromImageToMemoryThenBlitImageCopyCalled, ImageSupport) { MockCommandListHw cmdList; cmdList.initialize(device, NEO::EngineGroupType::Copy, 0u); void *dstPtr = reinterpret_cast(0x1234); ze_image_desc_t zeDesc = {}; zeDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; auto imageHW = std::make_unique>>(); imageHW->initialize(device, &zeDesc); ze_image_region_t srcRegion = {4, 4, 4, 2, 2, 2}; cmdList.appendImageCopyToMemory(dstPtr, imageHW->toHandle(), &srcRegion, nullptr, 0, nullptr); EXPECT_GT(cmdList.appendCopyImageBlitCalledTimes, 0u); } HWTEST2_F(CommandListCreate, givenCopyCommandListWhenCopyFromImageToImageThenBlitImageCopyCalled, ImageSupport) { MockCommandListHw cmdList; cmdList.initialize(device, NEO::EngineGroupType::Copy, 0u); ze_image_desc_t zeDesc = {}; zeDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; auto imageHWSrc = std::make_unique>>(); auto imageHWDst = std::make_unique>>(); imageHWSrc->initialize(device, &zeDesc); imageHWDst->initialize(device, &zeDesc); ze_image_region_t srcRegion = {4, 4, 4, 2, 2, 2}; ze_image_region_t dstRegion = {4, 4, 4, 2, 2, 2}; cmdList.appendImageCopyRegion(imageHWDst->toHandle(), imageHWSrc->toHandle(), &dstRegion, &srcRegion, nullptr, 0, nullptr); EXPECT_GT(cmdList.appendCopyImageBlitCalledTimes, 0u); } using BlitBlockCopyPlatforms = IsWithinProducts; HWTEST2_F(CommandListCreate, givenCopyCommandListWhenCopyRegionWithinMaxBlitSizeThenOneBlitCommandHasBeenSpown, BlitBlockCopyPlatforms) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using XY_COPY_BLT = typename GfxFamily::XY_COPY_BLT; auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); uint32_t offsetX = 0x10; uint32_t offsetY = 0x10; Vec3 copySize = {0x100, 0x10, 1}; ze_copy_region_t srcRegion = {offsetX, offsetY, 0, static_cast(copySize.x), static_cast(copySize.y), static_cast(copySize.z)}; ze_copy_region_t dstRegion = srcRegion; Vec3 srcSize = {0x1000, 0x100, 1}; Vec3 dstSize = {0x100, 0x100, 1}; NEO::MockGraphicsAllocation mockAllocationSrc(0, NEO::AllocationType::INTERNAL_HOST_MEMORY, reinterpret_cast(0x1234), 0x1000, 0, sizeof(uint32_t), MemoryPool::System4KBPages); NEO::MockGraphicsAllocation mockAllocationDst(0, NEO::AllocationType::INTERNAL_HOST_MEMORY, reinterpret_cast(0x1234), 0x1000, 0, sizeof(uint32_t), MemoryPool::System4KBPages); size_t rowPitch = copySize.x; size_t slicePitch = copySize.x * copySize.y; commandList->appendMemoryCopyBlitRegion(&mockAllocationDst, &mockAllocationSrc, 0, 0, srcRegion, dstRegion, copySize, rowPitch, slicePitch, rowPitch, slicePitch, srcSize, dstSize, nullptr, 0, nullptr); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); itor++; itor = find(itor, cmdList.end()); EXPECT_EQ(cmdList.end(), itor); } HWTEST2_F(CommandListCreate, givenCopyCommandListWhenCopyRegionWithinMaxBlitSizeThenDestinationCoordinatesAreCorrectlySet, BlitBlockCopyPlatforms) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using XY_COPY_BLT = typename GfxFamily::XY_COPY_BLT; auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); uint32_t offsetX = 0x10; uint32_t offsetY = 0x10; Vec3 copySize = {0x100, 0x10, 1}; ze_copy_region_t srcRegion = {offsetX, offsetY, 0, static_cast(copySize.x), static_cast(copySize.y), static_cast(copySize.z)}; ze_copy_region_t dstRegion = srcRegion; Vec3 srcSize = {0x1000, 0x100, 1}; Vec3 dstSize = {0x100, 0x100, 1}; NEO::MockGraphicsAllocation mockAllocationSrc(0, NEO::AllocationType::INTERNAL_HOST_MEMORY, reinterpret_cast(0x1234), 0x1000, 0, sizeof(uint32_t), MemoryPool::System4KBPages); NEO::MockGraphicsAllocation mockAllocationDst(0, NEO::AllocationType::INTERNAL_HOST_MEMORY, reinterpret_cast(0x1234), 0x1000, 0, sizeof(uint32_t), MemoryPool::System4KBPages); size_t rowPitch = copySize.x; size_t slicePitch = copySize.x * copySize.y; commandList->appendMemoryCopyBlitRegion(&mockAllocationDst, &mockAllocationSrc, 0, 0, srcRegion, dstRegion, copySize, rowPitch, slicePitch, rowPitch, slicePitch, srcSize, dstSize, nullptr, 0, nullptr); uint32_t bytesPerPixel = NEO::BlitCommandsHelper::getAvailableBytesPerPixel(copySize.x, srcRegion.originX, dstRegion.originY, srcSize.x, dstSize.x); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); auto cmd = genCmdCast(*itor); EXPECT_EQ(cmd->getDestinationX2CoordinateRight(), static_cast(copySize.x) / bytesPerPixel); EXPECT_EQ(cmd->getDestinationY2CoordinateBottom(), static_cast(copySize.y)); } HWTEST2_F(CommandListCreate, givenCopyCommandListWhenCopyRegionGreaterThanMaxBlitSizeThenMoreThanOneBlitCommandHasBeenSpown, BlitBlockCopyPlatforms) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using XY_COPY_BLT = typename GfxFamily::XY_COPY_BLT; auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); uint32_t offsetX = 0x1; uint32_t offsetY = 0x1; Vec3 copySize = {BlitterConstants::maxBlitWidth + 0x100, 0x10, 1}; ze_copy_region_t srcRegion = {offsetX, offsetY, 0, static_cast(copySize.x), static_cast(copySize.y), static_cast(copySize.z)}; ze_copy_region_t dstRegion = srcRegion; Vec3 srcSize = {2 * BlitterConstants::maxBlitWidth, 2 * BlitterConstants::maxBlitHeight, 1}; Vec3 dstSize = srcSize; NEO::MockGraphicsAllocation mockAllocationSrc(0, NEO::AllocationType::INTERNAL_HOST_MEMORY, reinterpret_cast(0x1234), 0x1000, 0, sizeof(uint32_t), MemoryPool::System4KBPages); NEO::MockGraphicsAllocation mockAllocationDst(0, NEO::AllocationType::INTERNAL_HOST_MEMORY, reinterpret_cast(0x1234), 0x1000, 0, sizeof(uint32_t), MemoryPool::System4KBPages); size_t rowPitch = copySize.x; size_t slicePitch = copySize.x * copySize.y; commandList->appendMemoryCopyBlitRegion(&mockAllocationDst, &mockAllocationSrc, 0, 0, srcRegion, dstRegion, copySize, rowPitch, slicePitch, rowPitch, slicePitch, srcSize, dstSize, nullptr, 0, nullptr); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); itor++; EXPECT_NE(cmdList.end(), itor); } template class MockCommandListForRegionSize : public WhiteBox<::L0::CommandListCoreFamily> { public: MockCommandListForRegionSize() : WhiteBox<::L0::CommandListCoreFamily>() {} AlignedAllocationData getAlignedAllocation(L0::Device *device, const void *buffer, uint64_t bufferSize, bool allowHostCopy) override { return {0, 0, nullptr, true}; } ze_result_t appendMemoryCopyBlitRegion(NEO::GraphicsAllocation *srcAllocation, NEO::GraphicsAllocation *dstAllocation, size_t srcOffset, size_t dstOffset, ze_copy_region_t srcRegion, ze_copy_region_t dstRegion, const Vec3 ©Size, size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, const Vec3 &srcSize, const Vec3 &dstSize, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override { this->srcSize = srcSize; this->dstSize = dstSize; return ZE_RESULT_SUCCESS; } Vec3 srcSize = {0, 0, 0}; Vec3 dstSize = {0, 0, 0}; }; HWTEST2_F(CommandListCreate, givenZeroAsPitchAndSlicePitchWhenMemoryCopyRegionCalledThenSizesEqualOffsetPlusCopySize, IsAtLeastSkl) { MockCommandListForRegionSize cmdList; cmdList.initialize(device, NEO::EngineGroupType::Copy, 0u); void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x2345); ze_copy_region_t dstRegion = {0x10, 0x10, 0, 0x100, 0x100, 1}; ze_copy_region_t srcRegion = dstRegion; uint32_t pitch = 0; uint32_t slicePitch = 0; cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, pitch, slicePitch, srcPtr, &srcRegion, pitch, slicePitch, nullptr, 0, nullptr); EXPECT_EQ(cmdList.dstSize.x, dstRegion.width + dstRegion.originX); EXPECT_EQ(cmdList.dstSize.y, dstRegion.height + dstRegion.originY); EXPECT_EQ(cmdList.dstSize.z, dstRegion.depth + dstRegion.originZ); EXPECT_EQ(cmdList.srcSize.x, srcRegion.width + srcRegion.originX); EXPECT_EQ(cmdList.srcSize.y, srcRegion.height + srcRegion.originY); EXPECT_EQ(cmdList.srcSize.z, srcRegion.depth + srcRegion.originZ); } HWTEST2_F(CommandListCreate, givenPitchAndSlicePitchWhenMemoryCopyRegionCalledThenSizesAreBasedOnPitch, IsAtLeastSkl) { MockCommandListForRegionSize cmdList; cmdList.initialize(device, NEO::EngineGroupType::Copy, 0u); void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x2345); ze_copy_region_t dstRegion = {0x10, 0x10, 0, 0x100, 0x100, 1}; ze_copy_region_t srcRegion = dstRegion; uint32_t pitch = 0x1000; uint32_t slicePitch = 0x100000; cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, pitch, slicePitch, srcPtr, &srcRegion, pitch, slicePitch, nullptr, 0, nullptr); EXPECT_EQ(cmdList.dstSize.x, pitch); EXPECT_EQ(cmdList.dstSize.y, slicePitch / pitch); EXPECT_EQ(cmdList.srcSize.x, pitch); EXPECT_EQ(cmdList.srcSize.y, slicePitch / pitch); } using SupportedPlatforms = IsWithinProducts; HWTEST2_F(CommandListCreate, givenCommandListThenSshCorrectlyReserved, SupportedPlatforms) { MockCommandListHw commandList; commandList.initialize(device, NEO::EngineGroupType::Compute, 0u); auto &helper = NEO::HwHelper::get(commandList.device->getHwInfo().platform.eRenderCoreFamily); auto size = helper.getRenderSurfaceStateSize(); EXPECT_EQ(commandList.getReserveSshSize(), size); } using CommandListAppendMemoryCopyBlit = Test; HWTEST2_F(CommandListAppendMemoryCopyBlit, whenAppendMemoryCopyBlitIsAppendedAndNoSpaceIsAvailableThenNextCommandBufferIsCreated, IsAtLeastSkl) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; uint64_t size = 1024; ze_result_t res = ZE_RESULT_SUCCESS; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Copy, 0u, res)); auto firstBatchBufferAllocation = commandList->commandContainer.getCommandStream()->getGraphicsAllocation(); auto useSize = commandList->commandContainer.getCommandStream()->getAvailableSpace(); useSize -= sizeof(MI_BATCH_BUFFER_END); commandList->commandContainer.getCommandStream()->getSpace(useSize); NEO::MockGraphicsAllocation mockAllocationSrc(0, NEO::AllocationType::INTERNAL_HOST_MEMORY, reinterpret_cast(0x1234), size, 0, sizeof(uint32_t), MemoryPool::System4KBPages); void *srcPtr = reinterpret_cast(mockAllocationSrc.getGpuAddress()); NEO::MockGraphicsAllocation mockAllocationDst(0, NEO::AllocationType::INTERNAL_HOST_MEMORY, reinterpret_cast(0x2345), size, 0, sizeof(uint32_t), MemoryPool::System4KBPages); void *dstPtr = reinterpret_cast(mockAllocationDst.getGpuAddress()); auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, size, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto secondBatchBufferAllocation = commandList->commandContainer.getCommandStream()->getGraphicsAllocation(); EXPECT_NE(firstBatchBufferAllocation, secondBatchBufferAllocation); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp000066400000000000000000001727661422164147700324210ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/image/image_hw.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h" namespace L0 { namespace ult { struct MemoryManagerCommandListCreateNegativeTest : public NEO::MockMemoryManager { MemoryManagerCommandListCreateNegativeTest(NEO::ExecutionEnvironment &executionEnvironment) : NEO::MockMemoryManager(const_cast(executionEnvironment)) {} NEO::GraphicsAllocation *allocateGraphicsMemoryWithProperties(const NEO::AllocationProperties &properties) override { if (forceFailureInPrimaryAllocation) { return nullptr; } return NEO::MemoryManager::allocateGraphicsMemoryWithProperties(properties); } bool forceFailureInPrimaryAllocation = false; }; struct CommandListCreateNegativeTest : public ::testing::Test { void SetUp() override { executionEnvironment = new NEO::ExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(numRootDevices); for (uint32_t i = 0; i < numRootDevices; i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(NEO::defaultHwInfo.get()); } memoryManager = new MemoryManagerCommandListCreateNegativeTest(*executionEnvironment); executionEnvironment->memoryManager.reset(memoryManager); std::vector> devices; for (uint32_t i = 0; i < numRootDevices; i++) { neoDevice = NEO::MockDevice::create(executionEnvironment, i); devices.push_back(std::unique_ptr(neoDevice)); } driverHandle = std::make_unique>(); driverHandle->initialize(std::move(devices)); device = driverHandle->devices[0]; } void TearDown() override { } NEO::ExecutionEnvironment *executionEnvironment = nullptr; std::unique_ptr> driverHandle; NEO::MockDevice *neoDevice = nullptr; L0::Device *device = nullptr; MemoryManagerCommandListCreateNegativeTest *memoryManager = nullptr; const uint32_t numRootDevices = 1u; }; TEST_F(CommandListCreateNegativeTest, whenDeviceAllocationFailsDuringCommandListCreateThenAppropriateValueIsReturned) { ze_result_t returnValue; memoryManager->forceFailureInPrimaryAllocation = true; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY, returnValue); ASSERT_EQ(nullptr, commandList); } TEST_F(CommandListCreateNegativeTest, whenDeviceAllocationFailsDuringCommandListImmediateCreateThenAppropriateValueIsReturned) { ze_result_t returnValue; const ze_command_queue_desc_t desc = {}; bool internalEngine = true; memoryManager->forceFailureInPrimaryAllocation = true; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, internalEngine, NEO::EngineGroupType::RenderCompute, returnValue)); EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY, returnValue); ASSERT_EQ(nullptr, commandList); } using CommandListCreate = Test; HWTEST2_F(CommandListCreate, givenHostAllocInMapWhenGettingAllocInRangeThenAllocFromMapReturned, IsAtLeastSkl) { auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); uint64_t gpuAddress = 0x1200; const void *cpuPtr = reinterpret_cast(gpuAddress); size_t allocSize = 0x1000; NEO::MockGraphicsAllocation alloc(const_cast(cpuPtr), gpuAddress, allocSize); commandList->hostPtrMap.insert(std::make_pair(cpuPtr, &alloc)); EXPECT_EQ(commandList->getHostPtrMap().size(), 1u); auto newBufferPtr = ptrOffset(cpuPtr, 0x10); auto newBufferSize = allocSize - 0x20; auto newAlloc = commandList->getAllocationFromHostPtrMap(newBufferPtr, newBufferSize); EXPECT_NE(newAlloc, nullptr); commandList->hostPtrMap.clear(); } HWTEST2_F(CommandListCreate, givenHostAllocInMapWhenSizeIsOutOfRangeThenNullPtrReturned, IsAtLeastSkl) { auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); uint64_t gpuAddress = 0x1200; const void *cpuPtr = reinterpret_cast(gpuAddress); size_t allocSize = 0x1000; NEO::MockGraphicsAllocation alloc(const_cast(cpuPtr), gpuAddress, allocSize); commandList->hostPtrMap.insert(std::make_pair(cpuPtr, &alloc)); EXPECT_EQ(commandList->getHostPtrMap().size(), 1u); auto newBufferPtr = ptrOffset(cpuPtr, 0x10); auto newBufferSize = allocSize + 0x20; auto newAlloc = commandList->getAllocationFromHostPtrMap(newBufferPtr, newBufferSize); EXPECT_EQ(newAlloc, nullptr); commandList->hostPtrMap.clear(); } HWTEST2_F(CommandListCreate, givenHostAllocInMapWhenPtrIsOutOfRangeThenNullPtrReturned, IsAtLeastSkl) { auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); uint64_t gpuAddress = 0x1200; const void *cpuPtr = reinterpret_cast(gpuAddress); size_t allocSize = 0x1000; NEO::MockGraphicsAllocation alloc(const_cast(cpuPtr), gpuAddress, allocSize); commandList->hostPtrMap.insert(std::make_pair(cpuPtr, &alloc)); EXPECT_EQ(commandList->getHostPtrMap().size(), 1u); auto newBufferPtr = reinterpret_cast(gpuAddress - 0x100); auto newBufferSize = allocSize - 0x200; auto newAlloc = commandList->getAllocationFromHostPtrMap(newBufferPtr, newBufferSize); EXPECT_EQ(newAlloc, nullptr); commandList->hostPtrMap.clear(); } HWTEST2_F(CommandListCreate, givenHostAllocInMapWhenGetHostPtrAllocCalledThenCorrectOffsetIsSet, IsAtLeastSkl) { auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); uint64_t gpuAddress = 0x1200; const void *cpuPtr = reinterpret_cast(gpuAddress); size_t allocSize = 0x1000; NEO::MockGraphicsAllocation alloc(const_cast(cpuPtr), gpuAddress, allocSize); commandList->hostPtrMap.insert(std::make_pair(cpuPtr, &alloc)); EXPECT_EQ(commandList->getHostPtrMap().size(), 1u); size_t expectedOffset = 0x10; auto newBufferPtr = ptrOffset(cpuPtr, expectedOffset); auto newBufferSize = allocSize - 0x20; auto newAlloc = commandList->getHostPtrAlloc(newBufferPtr, newBufferSize, false); EXPECT_NE(nullptr, newAlloc); commandList->hostPtrMap.clear(); } HWTEST2_F(CommandListCreate, givenHostAllocInMapWhenPtrIsInMapThenAllocationReturned, IsAtLeastSkl) { auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); uint64_t gpuAddress = 0x1200; const void *cpuPtr = reinterpret_cast(gpuAddress); size_t allocSize = 0x1000; NEO::MockGraphicsAllocation alloc(const_cast(cpuPtr), gpuAddress, allocSize); commandList->hostPtrMap.insert(std::make_pair(cpuPtr, &alloc)); EXPECT_EQ(commandList->getHostPtrMap().size(), 1u); auto newBufferPtr = cpuPtr; auto newBufferSize = allocSize - 0x20; auto newAlloc = commandList->getAllocationFromHostPtrMap(newBufferPtr, newBufferSize); EXPECT_EQ(newAlloc, &alloc); commandList->hostPtrMap.clear(); } HWTEST2_F(CommandListCreate, givenHostAllocInMapWhenPtrIsInMapButWithBiggerSizeThenNullPtrReturned, IsAtLeastSkl) { auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); uint64_t gpuAddress = 0x1200; const void *cpuPtr = reinterpret_cast(gpuAddress); size_t allocSize = 0x1000; NEO::MockGraphicsAllocation alloc(const_cast(cpuPtr), gpuAddress, allocSize); commandList->hostPtrMap.insert(std::make_pair(cpuPtr, &alloc)); EXPECT_EQ(commandList->getHostPtrMap().size(), 1u); auto newBufferPtr = cpuPtr; auto newBufferSize = allocSize + 0x20; auto newAlloc = commandList->getAllocationFromHostPtrMap(newBufferPtr, newBufferSize); EXPECT_EQ(newAlloc, nullptr); commandList->hostPtrMap.clear(); } HWTEST2_F(CommandListCreate, givenHostAllocInMapWhenPtrLowerThanAnyInMapThenNullPtrReturned, IsAtLeastSkl) { auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); uint64_t gpuAddress = 0x1200; const void *cpuPtr = reinterpret_cast(gpuAddress); size_t allocSize = 0x1000; NEO::MockGraphicsAllocation alloc(const_cast(cpuPtr), gpuAddress, allocSize); commandList->hostPtrMap.insert(std::make_pair(cpuPtr, &alloc)); EXPECT_EQ(commandList->getHostPtrMap().size(), 1u); auto newBufferPtr = reinterpret_cast(gpuAddress - 0x10); auto newBufferSize = allocSize - 0x20; auto newAlloc = commandList->getAllocationFromHostPtrMap(newBufferPtr, newBufferSize); EXPECT_EQ(newAlloc, nullptr); commandList->hostPtrMap.clear(); } HWTEST2_F(CommandListCreate, givenCmdListHostPointerUsedWhenGettingAlignedAllocationThenRetrieveProperOffsetAndAddress, IsAtLeastSkl) { auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily>(); commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); size_t cmdListHostPtrSize = MemoryConstants::pageSize; void *cmdListHostBuffer = device->getNEODevice()->getMemoryManager()->allocateSystemMemory(cmdListHostPtrSize, cmdListHostPtrSize); void *startMemory = cmdListHostBuffer; void *baseAddress = alignDown(startMemory, MemoryConstants::pageSize); size_t expectedOffset = ptrDiff(startMemory, baseAddress); AlignedAllocationData outData = commandList->getAlignedAllocation(device, startMemory, cmdListHostPtrSize, false); ASSERT_NE(nullptr, outData.alloc); auto firstAlloc = outData.alloc; auto expectedGpuAddress = static_cast(alignDown(outData.alloc->getGpuAddress(), MemoryConstants::pageSize)); EXPECT_EQ(startMemory, outData.alloc->getUnderlyingBuffer()); EXPECT_EQ(expectedGpuAddress, outData.alignedAllocationPtr); EXPECT_EQ(expectedOffset, outData.offset); size_t offset = 0x21u; void *offsetMemory = ptrOffset(startMemory, offset); expectedOffset = ptrDiff(offsetMemory, baseAddress); size_t alignedOffset = offset & EncodeSurfaceState::getSurfaceBaseAddressAlignmentMask(); expectedGpuAddress = ptrOffset(expectedGpuAddress, alignedOffset); EXPECT_EQ(outData.offset + offset, expectedOffset); outData = commandList->getAlignedAllocation(device, offsetMemory, 4u, false); ASSERT_NE(nullptr, outData.alloc); EXPECT_EQ(firstAlloc, outData.alloc); EXPECT_EQ(startMemory, outData.alloc->getUnderlyingBuffer()); EXPECT_EQ(expectedGpuAddress, outData.alignedAllocationPtr); EXPECT_EQ((expectedOffset & (EncodeSurfaceState::getSurfaceBaseAddressAlignment() - 1)), outData.offset); commandList->removeHostPtrAllocations(); device->getNEODevice()->getMemoryManager()->freeSystemMemory(cmdListHostBuffer); } using PlatformSupport = IsWithinProducts; HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyRegionHavingHostMemoryWithSignalAndWaitEventsUsingRenderEngineThenPipeControlIsFound, PlatformSupport) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; ze_result_t result = ZE_RESULT_SUCCESS; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, result)); auto &commandContainer = commandList->commandContainer; void *srcBuffer = reinterpret_cast(0x1234); void *dstBuffer = reinterpret_cast(0x2345); uint32_t width = 16; uint32_t height = 16; ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 2; auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); std::vector events; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event.get()); eventDesc.index = 1; auto event1 = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event1.get()); ze_copy_region_t sr = {0U, 0U, 0U, width, height, 0U}; ze_copy_region_t dr = {0U, 0U, 0U, width, height, 0U}; result = commandList->appendMemoryCopyRegion(dstBuffer, &dr, width, 0, srcBuffer, &sr, width, 0, events[0], 1u, &events[1]); EXPECT_EQ(ZE_RESULT_SUCCESS, result); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); auto allPcCommands = findAll(cmdList.begin(), cmdList.end()); EXPECT_EQ(7u, allPcCommands.size()); } HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyRegionHavingDeviceMemoryWithSignalAndWaitEventsUsingRenderEngineThenPipeControlIsNotFound, PlatformSupport) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; ze_result_t result = ZE_RESULT_SUCCESS; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, result)); auto &commandContainer = commandList->commandContainer; void *srcBuffer = nullptr; void *dstBuffer = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; result = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 4096u, &srcBuffer); ASSERT_EQ(ZE_RESULT_SUCCESS, result); result = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 4096u, &dstBuffer); ASSERT_EQ(ZE_RESULT_SUCCESS, result); uint32_t width = 16; uint32_t height = 16; ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 2; auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); std::vector events; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event.get()); eventDesc.index = 1; auto event1 = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event1.get()); ze_copy_region_t sr = {0U, 0U, 0U, width, height, 0U}; ze_copy_region_t dr = {0U, 0U, 0U, width, height, 0U}; result = commandList->appendMemoryCopyRegion(dstBuffer, &dr, width, 0, srcBuffer, &sr, width, 0, events[0], 1u, &events[1]); EXPECT_EQ(ZE_RESULT_SUCCESS, result); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); auto allPcCommands = findAll(cmdList.begin(), cmdList.end()); EXPECT_EQ(6u, allPcCommands.size()); context->freeMem(srcBuffer); context->freeMem(dstBuffer); } HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryFillHavingDeviceMemoryWithSignalAndWaitEventsUsingRenderEngineThenPipeControlIsNotFound, PlatformSupport) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; ze_result_t result = ZE_RESULT_SUCCESS; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, result)); auto &commandContainer = commandList->commandContainer; void *dstBuffer = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; result = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 4096u, &dstBuffer); ASSERT_EQ(ZE_RESULT_SUCCESS, result); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 2; auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); std::vector events; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event.get()); eventDesc.index = 1; auto event1 = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event1.get()); int one = 1; result = commandList->appendMemoryFill(dstBuffer, reinterpret_cast(&one), sizeof(one), 4096u, events[0], 1u, &events[1]); EXPECT_EQ(ZE_RESULT_SUCCESS, result); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); auto allPcCommands = findAll(cmdList.begin(), cmdList.end()); EXPECT_EQ(6u, allPcCommands.size()); context->freeMem(dstBuffer); } HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryFillHavingSharedMemoryWithSignalAndWaitEventsUsingRenderEngineThenPipeControlIsFound, PlatformSupport) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; ze_result_t result = ZE_RESULT_SUCCESS; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, result)); auto &commandContainer = commandList->commandContainer; void *dstBuffer = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_host_mem_alloc_desc_t hostDesc = {}; result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, 16384u, 4096u, &dstBuffer); ASSERT_EQ(ZE_RESULT_SUCCESS, result); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 2; auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); std::vector events; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event.get()); eventDesc.index = 1; auto event1 = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event1.get()); int one = 1; result = commandList->appendMemoryFill(dstBuffer, reinterpret_cast(&one), sizeof(one), 4096u, events[0], 1u, &events[1]); EXPECT_EQ(ZE_RESULT_SUCCESS, result); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); auto allPcCommands = findAll(cmdList.begin(), cmdList.end()); EXPECT_EQ(7u, allPcCommands.size()); context->freeMem(dstBuffer); } HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryFillHavingHostMemoryWithSignalAndWaitEventsUsingRenderEngineThenPipeControlIsFound, PlatformSupport) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; ze_result_t result = ZE_RESULT_SUCCESS; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, result)); auto &commandContainer = commandList->commandContainer; void *dstBuffer = nullptr; ze_host_mem_alloc_desc_t hostDesc = {}; result = context->allocHostMem(&hostDesc, 16384u, 4090u, &dstBuffer); ASSERT_EQ(ZE_RESULT_SUCCESS, result); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 2; auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); std::vector events; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event.get()); eventDesc.index = 1; auto event1 = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event1.get()); int one = 1; result = commandList->appendMemoryFill(dstBuffer, reinterpret_cast(&one), sizeof(one), 4090u, events[0], 1u, &events[1]); EXPECT_EQ(ZE_RESULT_SUCCESS, result); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); auto allPcCommands = findAll(cmdList.begin(), cmdList.end()); EXPECT_EQ(6u, allPcCommands.size()); context->freeMem(dstBuffer); } HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryFillHavingEventsWithDeviceScopeThenPCDueToWaitEventIsAddedAndPCDueToSignalEventIsAddedWithDCFlush, PlatformSupport) { using SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; ze_result_t result = ZE_RESULT_SUCCESS; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, result)); auto &commandContainer = commandList->commandContainer; void *dstBuffer = nullptr; ze_host_mem_alloc_desc_t hostDesc = {}; result = context->allocHostMem(&hostDesc, 16384u, 4090u, &dstBuffer); ASSERT_EQ(ZE_RESULT_SUCCESS, result); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 2; auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); std::vector events; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_DEVICE; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE; auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event.get()); eventDesc.index = 1; auto event1 = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event1.get()); int one = 1; result = commandList->appendMemoryFill(dstBuffer, reinterpret_cast(&one), sizeof(one), 4090u, events[0], 1u, &events[1]); EXPECT_EQ(ZE_RESULT_SUCCESS, result); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); auto allPcCommands = findAll(cmdList.begin(), cmdList.end()); EXPECT_EQ(6u, allPcCommands.size()); auto cmd = genCmdCast(*allPcCommands.back()); EXPECT_TRUE(cmd->getDcFlushEnable()); context->freeMem(dstBuffer); } HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryFillHavingEventsWithDeviceScopeThenPCDueToWaitEventIsNotAddedAndPCDueToSignalEventIsAddedWithOutDCFlush, PlatformSupport) { using SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; ze_result_t result = ZE_RESULT_SUCCESS; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, result)); auto &commandContainer = commandList->commandContainer; void *dstBuffer = nullptr; ze_host_mem_alloc_desc_t hostDesc = {}; result = context->allocHostMem(&hostDesc, 16384u, 4090u, &dstBuffer); ASSERT_EQ(ZE_RESULT_SUCCESS, result); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 2; auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); std::vector events; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.wait = 0; eventDesc.signal = 0; auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event.get()); eventDesc.index = 1; auto event1 = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event1.get()); int one = 1; result = commandList->appendMemoryFill(dstBuffer, reinterpret_cast(&one), sizeof(one), 4090u, events[0], 1u, &events[1]); EXPECT_EQ(ZE_RESULT_SUCCESS, result); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); itor++; itor = find(itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); itor++; itor = find(itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); itor++; itor = find(itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); itor++; itor = find(itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); auto cmd = genCmdCast(*itor); EXPECT_FALSE(cmd->getDcFlushEnable()); context->freeMem(dstBuffer); } HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyRegionWithSignalAndWaitEventsUsingCopyEngineThenSuccessIsReturned, IsAtLeastSkl) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; ze_result_t result = ZE_RESULT_SUCCESS; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Copy, 0u, result)); void *srcBuffer = reinterpret_cast(0x1234); void *dstBuffer = reinterpret_cast(0x2345); uint32_t width = 16; uint32_t height = 16; ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 2; auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); std::vector events; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event.get()); eventDesc.index = 1; auto event1 = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event1.get()); ze_copy_region_t sr = {0U, 0U, 0U, width, height, 0U}; ze_copy_region_t dr = {0U, 0U, 0U, width, height, 0U}; result = commandList->appendMemoryCopyRegion(dstBuffer, &dr, width, 0, srcBuffer, &sr, width, 0, events[0], 1u, &events[1]); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyRegionWithSignalAndInvalidWaitHandleUsingCopyEngineThenErrorIsReturned, IsAtLeastSkl) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; ze_result_t result = ZE_RESULT_SUCCESS; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Copy, 0u, result)); void *srcBuffer = reinterpret_cast(0x1234); void *dstBuffer = reinterpret_cast(0x2345); uint32_t width = 16; uint32_t height = 16; ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 2; auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); std::vector events; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event.get()); eventDesc.index = 1; auto event1 = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event1.get()); ze_copy_region_t sr = {0U, 0U, 0U, width, height, 0U}; ze_copy_region_t dr = {0U, 0U, 0U, width, height, 0U}; result = commandList->appendMemoryCopyRegion(dstBuffer, &dr, width, 0, srcBuffer, &sr, width, 0, events[0], 1u, nullptr); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); } HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyRegionHasEmptyRegionWithSignalAndWaitEventsUsingCopyEngineThenSuccessIsReturned, IsAtLeastSkl) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; ze_result_t result = ZE_RESULT_SUCCESS; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Copy, 0u, result)); void *srcBuffer = reinterpret_cast(0x1234); void *dstBuffer = reinterpret_cast(0x2345); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 2; auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); std::vector events; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event.get()); eventDesc.index = 1; auto event1 = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event1.get()); // set regions to 0 ze_copy_region_t sr = {0U, 0U, 0U, 0U, 0U, 0U}; ze_copy_region_t dr = {0U, 0U, 0U, 0U, 0U, 0U}; result = commandList->appendMemoryCopyRegion(dstBuffer, &dr, 0, 0, srcBuffer, &sr, 0, 0, events[0], 1u, &events[1]); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } HWTEST2_F(CommandListCreate, givenImmediateCommandListWhenMemoryCopyRegionWithSignalAndWaitEventsUsingRenderEngineThenSuccessIsReturned, IsAtLeastSkl) { const ze_command_queue_desc_t desc = {}; bool internalEngine = true; ze_result_t result = ZE_RESULT_SUCCESS; std::unique_ptr commandList0(CommandList::createImmediate(productFamily, device, &desc, internalEngine, NEO::EngineGroupType::RenderCompute, result)); ASSERT_NE(nullptr, commandList0); CommandQueueImp *cmdQueue = reinterpret_cast(commandList0->cmdQImmediate); EXPECT_EQ(cmdQueue->getCsr(), neoDevice->getInternalEngine().commandStreamReceiver); void *srcBuffer = reinterpret_cast(0x1234); void *dstBuffer = reinterpret_cast(0x2345); uint32_t width = 16; uint32_t height = 16; ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 2; auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); std::vector events; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event.get()); eventDesc.index = 1; auto event1 = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event1.get()); ze_copy_region_t sr = {0U, 0U, 0U, width, height, 0U}; ze_copy_region_t dr = {0U, 0U, 0U, width, height, 0U}; result = commandList0->appendMemoryCopyRegion(dstBuffer, &dr, width, 0, srcBuffer, &sr, width, 0, events[0], 1u, &events[1]); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(CommandListCreate, givenImmediateCommandListWhenMemoryCopyRegionWithSignalAndWaitEventsUsingRenderEngineInALoopThenSuccessIsReturned) { const ze_command_queue_desc_t desc = {}; bool internalEngine = true; ze_result_t ret = ZE_RESULT_SUCCESS; std::unique_ptr commandList0(CommandList::createImmediate(productFamily, device, &desc, internalEngine, NEO::EngineGroupType::RenderCompute, ret)); ASSERT_NE(nullptr, commandList0); CommandQueueImp *cmdQueue = reinterpret_cast(commandList0->cmdQImmediate); EXPECT_EQ(cmdQueue->getCsr(), neoDevice->getInternalEngine().commandStreamReceiver); void *srcBuffer = reinterpret_cast(0x1234); void *dstBuffer = reinterpret_cast(0x2345); uint32_t width = 16; uint32_t height = 16; ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 2; auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, ret)); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); std::vector events; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event.get()); eventDesc.index = 1; auto event1 = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event1.get()); ze_copy_region_t sr = {0U, 0U, 0U, width, height, 0U}; ze_copy_region_t dr = {0U, 0U, 0U, width, height, 0U}; for (auto i = 0; i < 2000; i++) { ret = commandList0->appendMemoryCopyRegion(dstBuffer, &dr, width, 0, srcBuffer, &sr, width, 0, events[0], 1u, &events[1]); } EXPECT_EQ(ZE_RESULT_SUCCESS, ret); } HWTEST2_F(CommandListCreate, givenImmediateCommandListWhenMemoryCopyRegionWithSignalAndWaitEventsUsingCopyEngineThenSuccessIsReturned, IsAtLeastSkl) { const ze_command_queue_desc_t desc = {}; bool internalEngine = true; ze_result_t returnValue; std::unique_ptr commandList0(CommandList::createImmediate(productFamily, device, &desc, internalEngine, NEO::EngineGroupType::Copy, returnValue)); ASSERT_NE(nullptr, commandList0); CommandQueueImp *cmdQueue = reinterpret_cast(commandList0->cmdQImmediate); if (neoDevice->getInternalCopyEngine()) { EXPECT_EQ(cmdQueue->getCsr(), neoDevice->getInternalCopyEngine()->commandStreamReceiver); } else { EXPECT_EQ(cmdQueue->getCsr(), neoDevice->getInternalEngine().commandStreamReceiver); } void *srcBuffer = reinterpret_cast(0x1234); void *dstBuffer = reinterpret_cast(0x2345); uint32_t width = 16; uint32_t height = 16; ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 2; auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); std::vector events; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event.get()); eventDesc.index = 1; auto event1 = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event1.get()); ze_copy_region_t sr = {0U, 0U, 0U, width, height, 0U}; ze_copy_region_t dr = {0U, 0U, 0U, width, height, 0U}; auto result = commandList0->appendMemoryCopyRegion(dstBuffer, &dr, width, 0, srcBuffer, &sr, width, 0, events[0], 1u, &events[1]); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } struct CommandListCreateWithBcs : public CommandListCreate { void SetUp() override { VariableBackup backupHwInfo(defaultHwInfo.get()); defaultHwInfo->capabilityTable.blitterOperationsSupported = true; CommandListCreate::SetUp(); } }; HWTEST2_F(CommandListCreateWithBcs, givenImmediateCommandListWhenCopyRegionFromImageToImageUsingRenderThenSuccessIsReturned, IsAtLeastXeHpCore) { const ze_command_queue_desc_t queueDesc = {}; bool internalEngine = true; ze_result_t returnValue; std::unique_ptr commandList0(CommandList::createImmediate(productFamily, device, &queueDesc, internalEngine, NEO::EngineGroupType::Copy, returnValue)); ASSERT_NE(nullptr, commandList0); CommandQueueImp *cmdQueue = reinterpret_cast(commandList0->cmdQImmediate); if (neoDevice->getInternalCopyEngine()) { EXPECT_EQ(cmdQueue->getCsr(), neoDevice->getInternalCopyEngine()->commandStreamReceiver); } else { EXPECT_EQ(cmdQueue->getCsr(), neoDevice->getInternalEngine().commandStreamReceiver); } ze_image_desc_t desc = {}; desc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; desc.type = ZE_IMAGE_TYPE_3D; desc.format.layout = ZE_IMAGE_FORMAT_LAYOUT_8_8_8_8; desc.format.type = ZE_IMAGE_FORMAT_TYPE_UINT; desc.width = 11; desc.height = 13; desc.depth = 17; desc.format.x = ZE_IMAGE_FORMAT_SWIZZLE_A; desc.format.y = ZE_IMAGE_FORMAT_SWIZZLE_0; desc.format.z = ZE_IMAGE_FORMAT_SWIZZLE_1; desc.format.w = ZE_IMAGE_FORMAT_SWIZZLE_X; auto imageHWSrc = std::make_unique>>(); auto imageHWDst = std::make_unique>>(); imageHWSrc->initialize(device, &desc); imageHWDst->initialize(device, &desc); ze_image_region_t srcRegion = {4, 4, 4, 2, 2, 2}; ze_image_region_t dstRegion = {4, 4, 4, 2, 2, 2}; returnValue = commandList0->appendImageCopyRegion(imageHWDst->toHandle(), imageHWSrc->toHandle(), &dstRegion, &srcRegion, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); } HWTEST2_F(CommandListCreateWithBcs, givenImmediateCommandListWhenCopyRegionFromImageToImageUsingCopyWintInvalidRegionArguementsThenErrorIsReturned, IsAtLeastXeHpCore) { const ze_command_queue_desc_t queueDesc = {}; bool internalEngine = true; neoDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = true; ze_result_t returnValue; std::unique_ptr commandList0(CommandList::createImmediate(productFamily, device, &queueDesc, internalEngine, NEO::EngineGroupType::Copy, returnValue)); ASSERT_NE(nullptr, commandList0); CommandQueueImp *cmdQueue = reinterpret_cast(commandList0->cmdQImmediate); if (neoDevice->getInternalCopyEngine()) { EXPECT_EQ(cmdQueue->getCsr(), neoDevice->getInternalCopyEngine()->commandStreamReceiver); } else { EXPECT_EQ(cmdQueue->getCsr(), neoDevice->getInternalEngine().commandStreamReceiver); } ze_image_desc_t desc = {}; desc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; desc.type = ZE_IMAGE_TYPE_3D; desc.format.layout = ZE_IMAGE_FORMAT_LAYOUT_8_8_8_8; desc.format.type = ZE_IMAGE_FORMAT_TYPE_UINT; desc.width = 11; desc.height = 13; desc.depth = 17; desc.format.x = ZE_IMAGE_FORMAT_SWIZZLE_A; desc.format.y = ZE_IMAGE_FORMAT_SWIZZLE_0; desc.format.z = ZE_IMAGE_FORMAT_SWIZZLE_1; desc.format.w = ZE_IMAGE_FORMAT_SWIZZLE_X; auto imageHWSrc = std::make_unique>>(); auto imageHWDst = std::make_unique>>(); imageHWSrc->initialize(device, &desc); imageHWDst->initialize(device, &desc); ze_image_region_t srcRegion = {4, 4, 4, 2, 2, 2}; ze_image_region_t dstRegion = {2, 2, 2, 4, 4, 4}; returnValue = commandList0->appendImageCopyRegion(imageHWDst->toHandle(), imageHWSrc->toHandle(), &dstRegion, &srcRegion, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, returnValue); } HWTEST2_F(CommandListCreateWithBcs, givenImmediateCommandListWhenCopyFromImageToImageUsingRenderThenSuccessIsReturned, IsAtLeastXeHpCore) { const ze_command_queue_desc_t queueDesc = {}; bool internalEngine = true; neoDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = true; ze_result_t returnValue; std::unique_ptr commandList0(CommandList::createImmediate(productFamily, device, &queueDesc, internalEngine, NEO::EngineGroupType::Copy, returnValue)); ASSERT_NE(nullptr, commandList0); CommandQueueImp *cmdQueue = reinterpret_cast(commandList0->cmdQImmediate); if (neoDevice->getInternalCopyEngine()) { EXPECT_EQ(cmdQueue->getCsr(), neoDevice->getInternalCopyEngine()->commandStreamReceiver); } else { EXPECT_EQ(cmdQueue->getCsr(), neoDevice->getInternalEngine().commandStreamReceiver); } ze_image_desc_t desc = {}; desc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; desc.type = ZE_IMAGE_TYPE_3D; desc.format.layout = ZE_IMAGE_FORMAT_LAYOUT_8_8_8_8; desc.format.type = ZE_IMAGE_FORMAT_TYPE_UINT; desc.width = 11; desc.height = 13; desc.depth = 17; desc.format.x = ZE_IMAGE_FORMAT_SWIZZLE_A; desc.format.y = ZE_IMAGE_FORMAT_SWIZZLE_0; desc.format.z = ZE_IMAGE_FORMAT_SWIZZLE_1; desc.format.w = ZE_IMAGE_FORMAT_SWIZZLE_X; auto imageHWSrc = std::make_unique>>(); auto imageHWDst = std::make_unique>>(); imageHWSrc->initialize(device, &desc); imageHWDst->initialize(device, &desc); returnValue = commandList0->appendImageCopy(imageHWDst->toHandle(), imageHWSrc->toHandle(), nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); } HWTEST2_F(CommandListCreateWithBcs, givenImmediateCommandListWhenMemoryCopyRegionWithSignalAndInvalidWaitHandleUsingCopyEngineThenErrorIsReturned, IsAtLeastSkl) { const ze_command_queue_desc_t desc = {}; bool internalEngine = true; neoDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = true; ze_result_t result = ZE_RESULT_SUCCESS; std::unique_ptr commandList0(CommandList::createImmediate(productFamily, device, &desc, internalEngine, NEO::EngineGroupType::Copy, result)); ASSERT_NE(nullptr, commandList0); CommandQueueImp *cmdQueue = reinterpret_cast(commandList0->cmdQImmediate); if (neoDevice->getInternalCopyEngine()) { EXPECT_EQ(cmdQueue->getCsr(), neoDevice->getInternalCopyEngine()->commandStreamReceiver); } else { EXPECT_EQ(cmdQueue->getCsr(), neoDevice->getInternalEngine().commandStreamReceiver); } void *srcBuffer = reinterpret_cast(0x1234); void *dstBuffer = reinterpret_cast(0x2345); uint32_t width = 16; uint32_t height = 16; ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 2; auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); std::vector events; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event.get()); eventDesc.index = 1; auto event1 = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event1.get()); ze_copy_region_t sr = {0U, 0U, 0U, width, height, 0U}; ze_copy_region_t dr = {0U, 0U, 0U, width, height, 0U}; result = commandList0->appendMemoryCopyRegion(dstBuffer, &dr, width, 0, srcBuffer, &sr, width, 0, events[0], 1u, nullptr); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); } TEST_F(CommandListCreate, whenCreatingImmCmdListWithASyncModeAndAppendSignalEventWithTimestampThenUpdateTaskCountNeededFlagIsDisabled) { ze_command_queue_desc_t desc = {}; desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; ze_result_t returnValue; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); ASSERT_NE(nullptr, commandList); EXPECT_EQ(device, commandList->device); EXPECT_EQ(1u, commandList->cmdListType); EXPECT_NE(nullptr, commandList->cmdQImmediate); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = 0; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; ze_event_handle_t event = nullptr; std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); ASSERT_NE(nullptr, eventPool); eventPool->createEvent(&eventDesc, &event); std::unique_ptr event_object(L0::Event::fromHandle(event)); ASSERT_NE(nullptr, event_object->csr); ASSERT_EQ(device->getNEODevice()->getDefaultEngine().commandStreamReceiver, event_object->csr); commandList->appendSignalEvent(event); auto result = event_object->hostSignal(); ASSERT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(event_object->queryStatus(), ZE_RESULT_SUCCESS); } TEST_F(CommandListCreate, whenCreatingImmCmdListWithASyncModeAndAppendBarrierThenUpdateTaskCountNeededFlagIsDisabled) { ze_command_queue_desc_t desc = {}; desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; ze_result_t returnValue; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); ASSERT_NE(nullptr, commandList); EXPECT_EQ(device, commandList->device); EXPECT_EQ(1u, commandList->cmdListType); EXPECT_NE(nullptr, commandList->cmdQImmediate); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; ze_event_handle_t event = nullptr; std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); ASSERT_NE(nullptr, eventPool); eventPool->createEvent(&eventDesc, &event); std::unique_ptr event_object(L0::Event::fromHandle(event)); ASSERT_NE(nullptr, event_object->csr); ASSERT_EQ(device->getNEODevice()->getDefaultEngine().commandStreamReceiver, event_object->csr); commandList->appendBarrier(event, 0, nullptr); auto result = event_object->hostSignal(); ASSERT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(event_object->queryStatus(), ZE_RESULT_SUCCESS); commandList->appendBarrier(nullptr, 0, nullptr); } TEST_F(CommandListCreate, whenCreatingImmCmdListWithASyncModeAndAppendEventResetThenUpdateTaskCountNeededFlagIsDisabled) { ze_command_queue_desc_t desc = {}; desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; ze_result_t returnValue; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); ASSERT_NE(nullptr, commandList); EXPECT_EQ(device, commandList->device); EXPECT_EQ(1u, commandList->cmdListType); EXPECT_NE(nullptr, commandList->cmdQImmediate); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; ze_event_handle_t event = nullptr; std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); ASSERT_NE(nullptr, eventPool); eventPool->createEvent(&eventDesc, &event); std::unique_ptr event_object(L0::Event::fromHandle(event)); ASSERT_NE(nullptr, event_object->csr); ASSERT_EQ(device->getNEODevice()->getDefaultEngine().commandStreamReceiver, event_object->csr); commandList->appendEventReset(event); auto result = event_object->hostSignal(); ASSERT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(event_object->queryStatus(), ZE_RESULT_SUCCESS); } TEST_F(CommandListCreateWithBcs, givenQueueDescriptionwhenCreatingImmediateCommandListForCopyEnigneThenItHasImmediateCommandQueueCreated) { auto &engineGroups = neoDevice->getRegularEngineGroups(); for (uint32_t ordinal = 0; ordinal < engineGroups.size(); ordinal++) { for (uint32_t index = 0; index < engineGroups[ordinal].engines.size(); index++) { ze_command_queue_desc_t desc = {}; desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; desc.ordinal = ordinal; desc.index = index; ze_result_t returnValue; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::Copy, returnValue)); ASSERT_NE(nullptr, commandList); EXPECT_EQ(device, commandList->device); EXPECT_EQ(CommandList::CommandListType::TYPE_IMMEDIATE, commandList->cmdListType); EXPECT_NE(nullptr, commandList->cmdQImmediate); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 3; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; ze_event_desc_t eventDesc = {}; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); auto event1 = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); auto event2 = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); ze_event_handle_t events[] = {event1->toHandle(), event2->toHandle()}; commandList->appendBarrier(nullptr, 0, nullptr); commandList->appendBarrier(event->toHandle(), 2, events); auto result = event->hostSignal(); ASSERT_EQ(ZE_RESULT_SUCCESS, result); result = event1->hostSignal(); ASSERT_EQ(ZE_RESULT_SUCCESS, result); result = event2->hostSignal(); ASSERT_EQ(ZE_RESULT_SUCCESS, result); } } } HWTEST2_F(CommandListCreate, whenGettingCommandsToPatchThenCorrectValuesAreReturned, IsAtLeastSkl) { auto commandList = std::make_unique>>(); EXPECT_EQ(&commandList->requiredStreamState, &commandList->getRequiredStreamState()); EXPECT_EQ(&commandList->finalStreamState, &commandList->getFinalStreamState()); EXPECT_EQ(&commandList->commandsToPatch, &commandList->getCommandsToPatch()); } HWTEST2_F(CommandListCreate, givenNonEmptyCommandsToPatchWhenClearCommandsToPatchIsCalledThenCommandsAreCorrectlyCleared, IsAtLeastSkl) { using VFE_STATE_TYPE = typename FamilyType::VFE_STATE_TYPE; auto pCommandList = std::make_unique>>(); EXPECT_TRUE(pCommandList->commandsToPatch.empty()); EXPECT_NO_THROW(pCommandList->clearCommandsToPatch()); EXPECT_TRUE(pCommandList->commandsToPatch.empty()); CommandList::CommandToPatch commandToPatch{}; pCommandList->commandsToPatch.push_back(commandToPatch); EXPECT_ANY_THROW(pCommandList->clearCommandsToPatch()); pCommandList->commandsToPatch.clear(); commandToPatch.type = CommandList::CommandToPatch::CommandType::FrontEndState; pCommandList->commandsToPatch.push_back(commandToPatch); EXPECT_ANY_THROW(pCommandList->clearCommandsToPatch()); pCommandList->commandsToPatch.clear(); commandToPatch.pCommand = new VFE_STATE_TYPE; pCommandList->commandsToPatch.push_back(commandToPatch); EXPECT_NO_THROW(pCommandList->clearCommandsToPatch()); EXPECT_TRUE(pCommandList->commandsToPatch.empty()); } template class MyDeviceMock : public Mock { public: NEO::GraphicsAllocation *allocateMemoryFromHostPtr(const void *buffer, size_t size, bool hostCopyAllowed) override { auto alloc = std::make_unique(const_cast(buffer), reinterpret_cast(buffer), size); alloc->allocationType = AllocType; return alloc.release(); } const NEO::HardwareInfo &getHwInfo() const override { return neoDevice->getHardwareInfo(); } }; HWTEST2_F(CommandListCreate, givenHostPtrAllocAllocWhenInternalMemCreatedThenNewAllocAddedToDealocationContainer, IsAtLeastSkl) { auto myDevice = std::make_unique>(); myDevice->neoDevice = device->getNEODevice(); auto commandList = std::make_unique>>(); commandList->initialize(myDevice.get(), NEO::EngineGroupType::Copy, 0u); auto buffer = std::make_unique(0x100); auto deallocationSize = commandList->commandContainer.getDeallocationContainer().size(); auto alloc = commandList->getHostPtrAlloc(buffer.get(), 0x80, true); EXPECT_EQ(deallocationSize + 1, commandList->commandContainer.getDeallocationContainer().size()); EXPECT_NE(alloc, nullptr); driverHandle.get()->getMemoryManager()->freeGraphicsMemory(alloc); commandList->commandContainer.getDeallocationContainer().clear(); } HWTEST2_F(CommandListCreate, givenHostPtrAllocAllocWhenExternalMemCreatedThenNewAllocAddedToHostPtrMap, IsAtLeastSkl) { auto myDevice = std::make_unique>(); myDevice->neoDevice = device->getNEODevice(); auto commandList = std::make_unique>>(); commandList->initialize(myDevice.get(), NEO::EngineGroupType::Copy, 0u); auto buffer = std::make_unique(0x100); auto hostPtrMapSize = commandList->getHostPtrMap().size(); auto alloc = commandList->getHostPtrAlloc(buffer.get(), 0x100, true); EXPECT_EQ(hostPtrMapSize + 1, commandList->getHostPtrMap().size()); EXPECT_NE(alloc, nullptr); driverHandle.get()->getMemoryManager()->freeGraphicsMemory(alloc); commandList->hostPtrMap.clear(); } HWTEST2_F(CommandListCreate, givenGetAlignedAllocationWhenInternalMemWithinDifferentAllocThenReturnNewAlloc, IsAtLeastSkl) { auto myDevice = std::make_unique>(); myDevice->neoDevice = device->getNEODevice(); auto commandList = std::make_unique>>(); commandList->initialize(myDevice.get(), NEO::EngineGroupType::Copy, 0u); auto buffer = std::make_unique(0x100); auto outData1 = commandList->getAlignedAllocation(device, buffer.get(), 0x100, true); auto outData2 = commandList->getAlignedAllocation(device, &buffer.get()[5], 0x1, true); EXPECT_NE(outData1.alloc, outData2.alloc); driverHandle.get()->getMemoryManager()->freeGraphicsMemory(outData1.alloc); driverHandle.get()->getMemoryManager()->freeGraphicsMemory(outData2.alloc); commandList->commandContainer.getDeallocationContainer().clear(); } HWTEST2_F(CommandListCreate, givenGetAlignedAllocationWhenExternalMemWithinDifferentAllocThenReturnPreviouslyAllocatedMem, IsAtLeastSkl) { auto myDevice = std::make_unique>(); myDevice->neoDevice = device->getNEODevice(); auto commandList = std::make_unique>>(); commandList->initialize(myDevice.get(), NEO::EngineGroupType::Copy, 0u); auto buffer = std::make_unique(0x100); auto outData1 = commandList->getAlignedAllocation(device, buffer.get(), 0x100, true); auto outData2 = commandList->getAlignedAllocation(device, &buffer.get()[5], 0x1, true); EXPECT_EQ(outData1.alloc, outData2.alloc); driverHandle.get()->getMemoryManager()->freeGraphicsMemory(outData1.alloc); commandList->hostPtrMap.clear(); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_4.cpp000066400000000000000000001706611422164147700324120ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/image/image_hw.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/fixtures/host_pointer_manager_fixture.h" #include "level_zero/core/test/unit_tests/fixtures/module_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h" #include "level_zero/core/test/unit_tests/mocks/mock_module.h" #include "test_traits_common.h" namespace L0 { namespace ult { using CommandListCreate = Test; HWTEST2_F(CommandListCreate, givenCopyOnlyCommandListWhenAppendWriteGlobalTimestampCalledThenMiFlushDWWithTimestampEncoded, IsAtLeastSkl) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using MI_FLUSH_DW = typename GfxFamily::MI_FLUSH_DW; ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Copy, 0u, returnValue)); auto &commandContainer = commandList->commandContainer; uint64_t timestampAddress = 0xfffffffffff0L; uint64_t *dstptr = reinterpret_cast(timestampAddress); const auto commandStreamOffset = commandContainer.getCommandStream()->getUsed(); commandList->appendWriteGlobalTimestamp(dstptr, nullptr, 0, nullptr); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), commandStreamOffset), commandContainer.getCommandStream()->getUsed() - commandStreamOffset)); auto iterator = findAll(cmdList.begin(), cmdList.end()); bool postSyncFound = false; ASSERT_NE(0u, iterator.size()); for (auto it : iterator) { auto cmd = genCmdCast(*it); if ((cmd->getPostSyncOperation() == MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_TIMESTAMP_REGISTER) && (cmd->getDestinationAddress() == timestampAddress)) { postSyncFound = true; } } ASSERT_TRUE(postSyncFound); } HWTEST2_F(CommandListCreate, givenCommandListWhenAppendWriteGlobalTimestampCalledThenPipeControlWithTimestampWriteEncoded, IsAtLeastSkl) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); auto &commandContainer = commandList->commandContainer; uint64_t timestampAddress = 0x123456785500; uint64_t *dstptr = reinterpret_cast(timestampAddress); const auto commandStreamOffset = commandContainer.getCommandStream()->getUsed(); commandList->appendWriteGlobalTimestamp(dstptr, nullptr, 0, nullptr); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), commandStreamOffset), commandContainer.getCommandStream()->getUsed() - commandStreamOffset)); auto iterator = find(cmdList.begin(), cmdList.end()); auto cmd = genCmdCast(*iterator); EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); EXPECT_FALSE(cmd->getDcFlushEnable()); EXPECT_EQ(timestampAddress, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*cmd)); EXPECT_EQ(POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_TIMESTAMP, cmd->getPostSyncOperation()); } HWTEST2_F(CommandListCreate, givenCommandListWhenAppendWriteGlobalTimestampCalledThenTimestampAllocationIsInsideResidencyContainer, IsAtLeastSkl) { ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); uint64_t timestampAddress = 0x123456785500; uint64_t *dstptr = reinterpret_cast(timestampAddress); commandList->appendWriteGlobalTimestamp(dstptr, nullptr, 0, nullptr); auto &commandContainer = commandList->commandContainer; auto &residencyContainer = commandContainer.getResidencyContainer(); const bool addressIsInContainer = std::any_of(residencyContainer.begin(), residencyContainer.end(), [timestampAddress](NEO::GraphicsAllocation *alloc) { return alloc->getGpuAddress() == timestampAddress; }); EXPECT_TRUE(addressIsInContainer); } HWTEST2_F(CommandListCreate, givenImmediateCommandListWhenAppendWriteGlobalTimestampThenReturnsSuccess, IsAtLeastSkl) { const ze_command_queue_desc_t desc = {}; bool internalEngine = true; ze_result_t returnValue; std::unique_ptr commandList0(CommandList::createImmediate(productFamily, device, &desc, internalEngine, NEO::EngineGroupType::RenderCompute, returnValue)); ASSERT_NE(nullptr, commandList0); CommandQueueImp *cmdQueue = reinterpret_cast(commandList0->cmdQImmediate); EXPECT_EQ(cmdQueue->getCsr(), neoDevice->getInternalEngine().commandStreamReceiver); uint64_t timestampAddress = 0x123456785500; uint64_t *dstptr = reinterpret_cast(timestampAddress); auto result = commandList0->appendWriteGlobalTimestamp(dstptr, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); } HWTEST2_F(CommandListCreate, givenUseCsrImmediateSubmissionEnabledForCopyImmediateCommandListThenAppendImageCopyRegionReturnsSuccess, IsAtLeastXeHpCore) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true); const ze_command_queue_desc_t queueDesc = {}; void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x2345); neoDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = true; ze_result_t returnValue; std::unique_ptr commandList0(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::Copy, returnValue)); ASSERT_NE(nullptr, commandList0); ze_image_desc_t desc = {}; desc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; desc.type = ZE_IMAGE_TYPE_3D; desc.format.layout = ZE_IMAGE_FORMAT_LAYOUT_8_8_8_8; desc.format.type = ZE_IMAGE_FORMAT_TYPE_UINT; desc.width = 11; desc.height = 13; desc.depth = 17; desc.format.x = ZE_IMAGE_FORMAT_SWIZZLE_A; desc.format.y = ZE_IMAGE_FORMAT_SWIZZLE_0; desc.format.z = ZE_IMAGE_FORMAT_SWIZZLE_1; desc.format.w = ZE_IMAGE_FORMAT_SWIZZLE_X; auto imageHWSrc = std::make_unique>>(); auto imageHWDst = std::make_unique>>(); imageHWSrc->initialize(device, &desc); imageHWDst->initialize(device, &desc); returnValue = commandList0->appendImageCopy(imageHWDst->toHandle(), imageHWSrc->toHandle(), nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); returnValue = commandList0->appendImageCopyFromMemory(imageHWDst->toHandle(), srcPtr, nullptr, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); returnValue = commandList0->appendImageCopyToMemory(dstPtr, imageHWSrc->toHandle(), nullptr, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); } HWTEST2_F(CommandListCreate, givenUseCsrImmediateSubmissionDisabledForCopyImmediateCommandListThenAppendImageCopyRegionReturnsSuccess, IsAtLeastXeHpCore) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(false); const ze_command_queue_desc_t queueDesc = {}; void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x2345); neoDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = true; ze_result_t returnValue; std::unique_ptr commandList0(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::Copy, returnValue)); ASSERT_NE(nullptr, commandList0); ze_image_desc_t desc = {}; desc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; desc.type = ZE_IMAGE_TYPE_3D; desc.format.layout = ZE_IMAGE_FORMAT_LAYOUT_8_8_8_8; desc.format.type = ZE_IMAGE_FORMAT_TYPE_UINT; desc.width = 11; desc.height = 13; desc.depth = 17; desc.format.x = ZE_IMAGE_FORMAT_SWIZZLE_A; desc.format.y = ZE_IMAGE_FORMAT_SWIZZLE_0; desc.format.z = ZE_IMAGE_FORMAT_SWIZZLE_1; desc.format.w = ZE_IMAGE_FORMAT_SWIZZLE_X; auto imageHWSrc = std::make_unique>>(); auto imageHWDst = std::make_unique>>(); imageHWSrc->initialize(device, &desc); imageHWDst->initialize(device, &desc); returnValue = commandList0->appendImageCopy(imageHWDst->toHandle(), imageHWSrc->toHandle(), nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); returnValue = commandList0->appendImageCopyFromMemory(imageHWDst->toHandle(), srcPtr, nullptr, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); returnValue = commandList0->appendImageCopyToMemory(dstPtr, imageHWSrc->toHandle(), nullptr, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); } HWTEST_F(CommandListCreate, givenUseCsrImmediateSubmissionEnabledForCopyImmediateCommandListthenAppendMemoryCopyRegionReturnsSuccess) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true); void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x2345); uint32_t width = 16; uint32_t height = 16; ze_copy_region_t sr = {0U, 0U, 0U, width, height, 0U}; ze_copy_region_t dr = {0U, 0U, 0U, width, height, 0U}; ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue = ZE_RESULT_SUCCESS; auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::Copy, returnValue); auto result = commandList->appendMemoryCopyRegion(dstPtr, &dr, 0, 0, srcPtr, &sr, 0, 0, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); commandList->destroy(); } class CommandListImmediateFlushTaskTests : public DeviceFixture { public: void SetUp() { DeviceFixture::SetUp(); } void TearDown() { DeviceFixture::TearDown(); } DebugManagerStateRestore restorer; }; using CommandListImmediateFlushTaskComputeTests = Test; HWTEST2_F(CommandListImmediateFlushTaskComputeTests, givenDG2CommandListIsInititalizedThenByDefaultFlushTaskSubmissionEnabled, IsDG2) { ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::Compute, returnValue)); EXPECT_EQ(true, commandList->isFlushTaskSubmissionEnabled); } using MatchXeHpc = IsGfxCore; HWTEST2_F(CommandListImmediateFlushTaskComputeTests, givenXeHPCCommandListIsInititalizedThenByDefaultFlushTaskSubmissionEnabled, MatchXeHpc) { ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::Compute, returnValue)); EXPECT_EQ(true, commandList->isFlushTaskSubmissionEnabled); } HWTEST2_F(CommandListImmediateFlushTaskComputeTests, givenCommandListIsInititalizedThenByDefaultFlushTaskSubmissionDisabled, IsAtMostGen12lp) { ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::Compute, returnValue)); EXPECT_EQ(false, commandList->isFlushTaskSubmissionEnabled); } HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenFlushTaskSubmissionDisabledWhenCommandListIsInititalizedThenFlushTaskIsSetToFalse) { NEO::DebugManager.flags.EnableFlushTaskSubmission.set(0); ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::Compute, returnValue)); EXPECT_EQ(false, commandList->isFlushTaskSubmissionEnabled); } HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissionDisabledForImmediateCommandListForAppendLaunchKernelThenSuccessIsReturned) { Mock<::L0::Kernel> kernel; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(0); ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue = ZE_RESULT_SUCCESS; ze_group_count_t groupCount{1, 1, 1}; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::Compute, returnValue)); auto result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); } HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissionEnabledForImmediateCommandListForAppendPageFaultThenSuccessIsReturned) { NEO::DebugManager.flags.EnableFlushTaskSubmission.set(1); size_t size = 0x100000001; NEO::MockGraphicsAllocation mockAllocationSrc(0, NEO::AllocationType::INTERNAL_HOST_MEMORY, reinterpret_cast(0x1234), size, 0, sizeof(uint32_t), MemoryPool::System4KBPages); NEO::MockGraphicsAllocation mockAllocationDst(0, NEO::AllocationType::INTERNAL_HOST_MEMORY, reinterpret_cast(0x100003456), size, 0, sizeof(uint32_t), MemoryPool::System4KBPages); ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue = ZE_RESULT_SUCCESS; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::Compute, returnValue)); auto result = commandList->appendPageFaultCopy(&mockAllocationDst, &mockAllocationSrc, size, false); ASSERT_EQ(ZE_RESULT_SUCCESS, result); } HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenBindlessModeAndUseCsrImmediateSubmissionEnabledForImmediateCommandListForAppendPageFaultThenSuccessIsReturned) { NEO::DebugManager.flags.EnableFlushTaskSubmission.set(1); NEO::DebugManager.flags.UseBindlessMode.set(1); neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->createBindlessHeapsHelper(neoDevice->getMemoryManager(), neoDevice->getNumGenericSubDevices() > 1, neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield()); size_t size = 0x100000001; NEO::MockGraphicsAllocation mockAllocationSrc(0, NEO::AllocationType::INTERNAL_HOST_MEMORY, reinterpret_cast(0x1234), size, 0, sizeof(uint32_t), MemoryPool::System4KBPages); NEO::MockGraphicsAllocation mockAllocationDst(0, NEO::AllocationType::INTERNAL_HOST_MEMORY, reinterpret_cast(0x100003456), size, 0, sizeof(uint32_t), MemoryPool::System4KBPages); ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue = ZE_RESULT_SUCCESS; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::Compute, returnValue)); auto result = commandList->appendPageFaultCopy(&mockAllocationDst, &mockAllocationSrc, size, false); ASSERT_EQ(ZE_RESULT_SUCCESS, result); } HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissionEnabledForImmediateWhenAppendEventResetThenSuccessIsReturned) { NEO::DebugManager.flags.EnableFlushTaskSubmission.set(1); ze_context_handle_t hContext; ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; ze_result_t res = driverHandle->createContext(&desc, 0u, nullptr, &hContext); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto context = static_cast(Context::fromHandle(hContext)); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE; ze_result_t result = ZE_RESULT_SUCCESS; auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue = ZE_RESULT_SUCCESS; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::Compute, returnValue)); result = commandList->appendEventReset(event->toHandle()); ASSERT_EQ(ZE_RESULT_SUCCESS, result); context->destroy(); } HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissionEnabledForImmediateWhenAppendEventResetWithTimestampThenSuccessIsReturned) { NEO::DebugManager.flags.EnableFlushTaskSubmission.set(1); ze_context_handle_t hContext; ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; ze_result_t res = driverHandle->createContext(&desc, 0u, nullptr, &hContext); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto context = static_cast(Context::fromHandle(hContext)); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE; ze_result_t result = ZE_RESULT_SUCCESS; auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue = ZE_RESULT_SUCCESS; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::Compute, returnValue)); result = commandList->appendEventReset(event->toHandle()); ASSERT_EQ(ZE_RESULT_SUCCESS, result); context->destroy(); } HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissionDisabledForImmediateWhenAppendEventResetWithTimestampThenSuccessIsReturned) { NEO::DebugManager.flags.EnableFlushTaskSubmission.set(0); ze_context_handle_t hContext; ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; ze_result_t res = driverHandle->createContext(&desc, 0u, nullptr, &hContext); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto context = static_cast(Context::fromHandle(hContext)); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE; ze_result_t result = ZE_RESULT_SUCCESS; auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue = ZE_RESULT_SUCCESS; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::Compute, returnValue)); result = commandList->appendEventReset(event->toHandle()); ASSERT_EQ(ZE_RESULT_SUCCESS, result); context->destroy(); } HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissionEnabledForImmediateWhenAppendSignalEventThenSuccessIsReturned) { NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true); ze_context_handle_t hContext; ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; ze_result_t res = driverHandle->createContext(&desc, 0u, nullptr, &hContext); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto context = static_cast(Context::fromHandle(hContext)); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE; ze_result_t result = ZE_RESULT_SUCCESS; auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue = ZE_RESULT_SUCCESS; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::Compute, returnValue)); result = commandList->appendSignalEvent(event->toHandle()); ASSERT_EQ(ZE_RESULT_SUCCESS, result); context->destroy(); } HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissionEnabledForImmediateWhenAppendSignalEventWithTimestampThenSuccessIsReturned) { NEO::DebugManager.flags.EnableFlushTaskSubmission.set(1); ze_context_handle_t hContext; ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; ze_result_t res = driverHandle->createContext(&desc, 0u, nullptr, &hContext); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto context = static_cast(Context::fromHandle(hContext)); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE; ze_result_t result = ZE_RESULT_SUCCESS; auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue = ZE_RESULT_SUCCESS; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::Compute, returnValue)); result = commandList->appendSignalEvent(event->toHandle()); ASSERT_EQ(ZE_RESULT_SUCCESS, result); context->destroy(); } HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissionDisabledForImmediateWhenAppendSignalEventWithTimestampThenSuccessIsReturned) { NEO::DebugManager.flags.EnableFlushTaskSubmission.set(0); ze_context_handle_t hContext; ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; ze_result_t res = driverHandle->createContext(&desc, 0u, nullptr, &hContext); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto context = static_cast(Context::fromHandle(hContext)); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE; ze_result_t result = ZE_RESULT_SUCCESS; auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue = ZE_RESULT_SUCCESS; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::Compute, returnValue)); result = commandList->appendSignalEvent(event->toHandle()); ASSERT_EQ(ZE_RESULT_SUCCESS, result); context->destroy(); } HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissionDisabledForImmediateWhenAppendWaitOnEventWithTimestampThenSuccessIsReturned) { NEO::DebugManager.flags.EnableFlushTaskSubmission.set(0); ze_context_handle_t hContext; ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; ze_result_t res = driverHandle->createContext(&desc, 0u, nullptr, &hContext); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto context = static_cast(Context::fromHandle(hContext)); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE; ze_result_t result = ZE_RESULT_SUCCESS; auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue = ZE_RESULT_SUCCESS; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::Compute, returnValue)); ze_event_handle_t hEventHandle = event->toHandle(); result = commandList->appendWaitOnEvents(1, &hEventHandle); ASSERT_EQ(ZE_RESULT_SUCCESS, result); context->destroy(); } HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissionEnabledForImmediateWhenAppendBarrierWithEventThenSuccessIsReturned) { NEO::DebugManager.flags.EnableFlushTaskSubmission.set(1); ze_context_handle_t hContext; ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; ze_result_t res = driverHandle->createContext(&desc, 0u, nullptr, &hContext); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto context = static_cast(Context::fromHandle(hContext)); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE; ze_result_t result = ZE_RESULT_SUCCESS; auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue = ZE_RESULT_SUCCESS; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::Compute, returnValue)); result = commandList->appendBarrier(event->toHandle(), 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); context->destroy(); } HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissionDisabledForImmediateWhenAppendBarrierWithTimestampEventThenSuccessIsReturned) { NEO::DebugManager.flags.EnableFlushTaskSubmission.set(0); ze_context_handle_t hContext; ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; ze_result_t res = driverHandle->createContext(&desc, 0u, nullptr, &hContext); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto context = static_cast(Context::fromHandle(hContext)); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE; ze_result_t result = ZE_RESULT_SUCCESS; auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue = ZE_RESULT_SUCCESS; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::Compute, returnValue)); result = commandList->appendBarrier(event->toHandle(), 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); context->destroy(); } HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissionEnabledForImmediateWhenAppendBarrierWithoutEventThenSuccessIsReturned) { NEO::DebugManager.flags.EnableFlushTaskSubmission.set(1); ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue = ZE_RESULT_SUCCESS; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::Compute, returnValue)); auto result = commandList->appendBarrier(nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); } HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissionDisabledForImmediateWhenAppendBarrierWithEventThenSuccessIsReturned) { NEO::DebugManager.flags.EnableFlushTaskSubmission.set(0); ze_context_handle_t hContext; ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; ze_result_t res = driverHandle->createContext(&desc, 0u, nullptr, &hContext); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto context = static_cast(Context::fromHandle(hContext)); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE; ze_result_t result = ZE_RESULT_SUCCESS; auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue = ZE_RESULT_SUCCESS; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::Compute, returnValue)); result = commandList->appendBarrier(event->toHandle(), 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); context->destroy(); } HWTEST_F(CommandListCreate, GivenCommandListWhenUnalignedPtrThenLeftMiddleAndRightCopyAdded) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Copy, 0u, returnValue)); ASSERT_NE(nullptr, commandList); EXPECT_EQ(device, commandList->device); void *srcPtr = reinterpret_cast(0x4321); void *dstPtr = reinterpret_cast(0x2345); auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, 2 * MemoryConstants::cacheLineSize, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); itor = find(++itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); itor = find(++itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); } HWTEST2_F(CommandListCreate, whenCommandListIsCreatedThenFlagsAreCorrectlySet, IsAtLeastSkl) { ze_command_list_flags_t flags[] = {0b0, 0b1, 0b10, 0b11}; ze_result_t returnValue; for (auto flag : flags) { std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Compute, flag, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); auto pCommandListCoreFamily = static_cast> *>(commandList.get()); EXPECT_EQ(flag, pCommandListCoreFamily->flags); } } using CommandListAppendLaunchKernel = Test; struct ProgramChangedFieldsInComputeMode { template static constexpr bool isMatched() { if (productFamily == IGFX_BROADWELL) return false; return TestTraits::get()>::programOnlyChangedFieldsInComputeStateMode; } }; HWTEST2_F(CommandListAppendLaunchKernel, GivenComputeModePropertiesWhenUpdateStreamPropertiesIsCalledTwiceThenChangedFieldsAreDirty, ProgramChangedFieldsInComputeMode) { DebugManagerStateRestore restorer; Mock<::L0::Kernel> kernel; auto pMockModule = std::unique_ptr(new Mock(device, nullptr)); kernel.module = pMockModule.get(); auto pCommandList = std::make_unique>>(); auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); ASSERT_EQ(ZE_RESULT_SUCCESS, result); const_cast(&kernel.getKernelDescriptor())->kernelAttributes.numGrfRequired = 0x100; pCommandList->updateStreamProperties(kernel, false, false); EXPECT_TRUE(pCommandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty); EXPECT_TRUE(pCommandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty); const_cast(&kernel.getKernelDescriptor())->kernelAttributes.numGrfRequired = 0x80; pCommandList->updateStreamProperties(kernel, false, false); EXPECT_TRUE(pCommandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty); EXPECT_FALSE(pCommandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty); } struct ProgramAllFieldsInComputeMode { template static constexpr bool isMatched() { return !TestTraits::get()>::programOnlyChangedFieldsInComputeStateMode; } }; HWTEST2_F(CommandListAppendLaunchKernel, GivenComputeModeTraitsSetToFalsePropertiesWhenUpdateStreamPropertiesIsCalledTwiceThenAllFieldsAreDirty, ProgramAllFieldsInComputeMode) { DebugManagerStateRestore restorer; Mock<::L0::Kernel> kernel; auto pMockModule = std::unique_ptr(new Mock(device, nullptr)); kernel.module = pMockModule.get(); auto pCommandList = std::make_unique>>(); auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); ASSERT_EQ(ZE_RESULT_SUCCESS, result); const_cast(&kernel.getKernelDescriptor())->kernelAttributes.numGrfRequired = 0x100; pCommandList->updateStreamProperties(kernel, false, false); EXPECT_TRUE(pCommandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty); EXPECT_TRUE(pCommandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty); const_cast(&kernel.getKernelDescriptor())->kernelAttributes.numGrfRequired = 0x80; pCommandList->updateStreamProperties(kernel, false, false); EXPECT_TRUE(pCommandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty); EXPECT_FALSE(pCommandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty); } HWTEST2_F(CommandListAppendLaunchKernel, GivenComputeModePropertiesWhenPropertesNotChangedThenAllFieldsAreNotDirty, IsAtLeastSkl) { DebugManagerStateRestore restorer; Mock<::L0::Kernel> kernel; auto pMockModule = std::unique_ptr(new Mock(device, nullptr)); kernel.module = pMockModule.get(); auto pCommandList = std::make_unique>>(); auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); ASSERT_EQ(ZE_RESULT_SUCCESS, result); const_cast(&kernel.getKernelDescriptor())->kernelAttributes.numGrfRequired = 0x100; pCommandList->updateStreamProperties(kernel, false, false); EXPECT_TRUE(pCommandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty); EXPECT_TRUE(pCommandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty); pCommandList->updateStreamProperties(kernel, false, false); EXPECT_FALSE(pCommandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty); EXPECT_FALSE(pCommandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty); } using HostPointerManagerCommandListTest = Test; HWTEST2_F(HostPointerManagerCommandListTest, givenImportedHostPointerWhenAppendMemoryFillUsingHostPointerThenAppendFillUsingHostPointerAllocation, IsAtLeastSkl) { auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); auto ret = hostDriverHandle->importExternalPointer(heapPointer, MemoryConstants::pageSize); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); int pattern = 1; ret = commandList->appendMemoryFill(heapPointer, reinterpret_cast(&pattern), sizeof(pattern), 64u, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); ret = hostDriverHandle->releaseImportedPointer(heapPointer); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); } HWTEST2_F(HostPointerManagerCommandListTest, givenImportedHostPointerAndCopyEngineWhenAppendMemoryFillUsingHostPointerThenAppendFillUsingHostPointerAllocation, IsAtLeastSkl) { auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); auto ret = hostDriverHandle->importExternalPointer(heapPointer, MemoryConstants::pageSize); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); int pattern = 1; ret = commandList->appendMemoryFill(heapPointer, reinterpret_cast(&pattern), sizeof(pattern), 64u, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); ret = hostDriverHandle->releaseImportedPointer(heapPointer); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); } HWTEST2_F(HostPointerManagerCommandListTest, givenHostPointerImportedWhenGettingAlignedAllocationThenRetrieveProperOffsetAndAddress, IsAtLeastSkl) { auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily>(); commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); size_t mainOffset = 100; size_t importSize = 100; void *importPointer = ptrOffset(heapPointer, mainOffset); auto ret = hostDriverHandle->importExternalPointer(importPointer, importSize); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); auto hostAllocation = hostDriverHandle->findHostPointerAllocation(importPointer, importSize, device->getRootDeviceIndex()); ASSERT_NE(nullptr, hostAllocation); size_t allocOffset = 10; size_t offsetSize = 20; void *offsetPointer = ptrOffset(importPointer, allocOffset); AlignedAllocationData outData = commandList->getAlignedAllocation(device, importPointer, importSize, false); auto gpuBaseAddress = static_cast(hostAllocation->getGpuAddress()); auto expectedAlignedAddress = alignDown(gpuBaseAddress, NEO::EncodeSurfaceState::getSurfaceBaseAddressAlignment()); size_t expectedOffset = gpuBaseAddress - expectedAlignedAddress; EXPECT_EQ(importPointer, hostAllocation->getUnderlyingBuffer()); EXPECT_EQ(expectedAlignedAddress, outData.alignedAllocationPtr); EXPECT_EQ(hostAllocation, outData.alloc); EXPECT_EQ(expectedOffset, outData.offset); outData = commandList->getAlignedAllocation(device, offsetPointer, offsetSize, false); expectedOffset += allocOffset; EXPECT_EQ(importPointer, hostAllocation->getUnderlyingBuffer()); EXPECT_EQ(expectedAlignedAddress, outData.alignedAllocationPtr); EXPECT_EQ(hostAllocation, outData.alloc); EXPECT_EQ(expectedOffset, outData.offset); ret = hostDriverHandle->releaseImportedPointer(importPointer); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); } HWTEST2_F(HostPointerManagerCommandListTest, givenHostPointerImportedWhenGettingPointerFromAnotherPageThenRetrieveBaseAddressAndProperOffset, IsAtLeastSkl) { auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily>(); commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); size_t pointerSize = MemoryConstants::pageSize; size_t offset = 100u + 2 * MemoryConstants::pageSize; void *offsetPointer = ptrOffset(heapPointer, offset); auto ret = hostDriverHandle->importExternalPointer(heapPointer, heapSize); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); auto hostAllocation = hostDriverHandle->findHostPointerAllocation(offsetPointer, pointerSize, device->getRootDeviceIndex()); ASSERT_NE(nullptr, hostAllocation); AlignedAllocationData outData = commandList->getAlignedAllocation(device, offsetPointer, pointerSize, false); auto expectedAlignedAddress = static_cast(hostAllocation->getGpuAddress()); EXPECT_EQ(heapPointer, hostAllocation->getUnderlyingBuffer()); EXPECT_EQ(expectedAlignedAddress, outData.alignedAllocationPtr); EXPECT_EQ(hostAllocation, outData.alloc); EXPECT_EQ(offset, outData.offset); ret = hostDriverHandle->releaseImportedPointer(heapPointer); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); } HWTEST2_F(HostPointerManagerCommandListTest, givenCommandListWhenMemoryFillWithSignalAndWaitEventsUsingRenderEngineThenPipeControlIsFound, IsAtLeastSkl) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; ze_result_t result = ZE_RESULT_SUCCESS; auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); auto &commandContainer = commandList->commandContainer; auto ret = hostDriverHandle->importExternalPointer(heapPointer, MemoryConstants::pageSize); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); int one = 1; size_t size = 16; ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 2; auto eventPool = std::unique_ptr(L0::EventPool::create(hostDriverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); std::vector events; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event.get()); eventDesc.index = 1; auto event1 = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event1.get()); result = commandList->appendMemoryFill(heapPointer, reinterpret_cast(&one), sizeof(one), size, events[0], 1u, &events[1]); EXPECT_EQ(ZE_RESULT_SUCCESS, result); ret = hostDriverHandle->releaseImportedPointer(heapPointer); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); } using SupportedPlatformsSklIcllp = IsWithinProducts; HWTEST2_F(HostPointerManagerCommandListTest, givenCommandListWhenMemoryFillWithSignalAndInvalidWaitHandleUsingRenderEngineThenErrorIsReturnedAndPipeControlIsNotAdded, SupportedPlatformsSklIcllp) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; ze_result_t result = ZE_RESULT_SUCCESS; auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); auto &commandContainer = commandList->commandContainer; auto ret = hostDriverHandle->importExternalPointer(heapPointer, MemoryConstants::pageSize); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); int one = 1; size_t size = 16; ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 2; auto eventPool = std::unique_ptr(L0::EventPool::create(hostDriverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); std::vector events; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event.get()); eventDesc.index = 1; auto event1 = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event1.get()); auto offset = commandContainer.getCommandStream()->getUsed(); result = commandList->appendMemoryFill(heapPointer, reinterpret_cast(&one), sizeof(one), size, events[0], 1u, nullptr); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); ret = hostDriverHandle->releaseImportedPointer(heapPointer); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), offset), commandContainer.getCommandStream()->getUsed() - offset)); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(cmdList.end(), itor); } HWTEST2_F(HostPointerManagerCommandListTest, givenCommandListWhenMemoryFillWithSignalAndWaitEventsUsingCopyEngineThenSuccessIsReturned, IsAtLeastSkl) { ze_result_t result = ZE_RESULT_SUCCESS; auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); auto ret = hostDriverHandle->importExternalPointer(heapPointer, MemoryConstants::pageSize); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); int one = 1; size_t size = 16; ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 2; auto eventPool = std::unique_ptr(L0::EventPool::create(hostDriverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); std::vector events; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event.get()); eventDesc.index = 1; auto event1 = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event1.get()); result = commandList->appendMemoryFill(heapPointer, reinterpret_cast(&one), sizeof(one), size, events[0], 1u, &events[1]); EXPECT_EQ(ZE_RESULT_SUCCESS, result); ret = hostDriverHandle->releaseImportedPointer(heapPointer); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); } HWTEST2_F(HostPointerManagerCommandListTest, givenCommandListWhenMemoryFillWithSignalAndiInvalidWaitHandleUsingCopyEngineThenErrorIsReturned, SupportedPlatformsSklIcllp) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; ze_result_t result = ZE_RESULT_SUCCESS; auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); auto &commandContainer = commandList->commandContainer; auto ret = hostDriverHandle->importExternalPointer(heapPointer, MemoryConstants::pageSize); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); int one = 1; size_t size = 16; ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 2; auto eventPool = std::unique_ptr(L0::EventPool::create(hostDriverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); std::vector events; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event.get()); eventDesc.index = 1; auto event1 = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event1.get()); result = commandList->appendMemoryFill(heapPointer, reinterpret_cast(&one), sizeof(one), size, events[0], 1u, nullptr); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); ret = hostDriverHandle->releaseImportedPointer(heapPointer); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(cmdList.end(), itor); } HWTEST2_F(HostPointerManagerCommandListTest, givenImmediateCommandListWhenMemoryFillWithSignalAndWaitEventsUsingRenderEngineThenSuccessIsReturned, IsAtLeastSkl) { const ze_command_queue_desc_t desc = {}; bool internalEngine = true; ze_result_t ret = ZE_RESULT_SUCCESS; std::unique_ptr commandList0(CommandList::createImmediate(productFamily, device, &desc, internalEngine, NEO::EngineGroupType::RenderCompute, ret)); ASSERT_NE(nullptr, commandList0); CommandQueueImp *cmdQueue = reinterpret_cast(commandList0->cmdQImmediate); EXPECT_EQ(cmdQueue->getCsr(), neoDevice->getInternalEngine().commandStreamReceiver); ret = hostDriverHandle->importExternalPointer(heapPointer, MemoryConstants::pageSize); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); int one = 1; size_t size = 16; ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 2; auto eventPool = std::unique_ptr(L0::EventPool::create(hostDriverHandle.get(), context, 0, nullptr, &eventPoolDesc, ret)); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); std::vector events; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event.get()); eventDesc.index = 1; auto event1 = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event1.get()); ret = commandList0->appendMemoryFill(heapPointer, reinterpret_cast(&one), sizeof(one), size, events[0], 1u, &events[1]); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); ret = hostDriverHandle->releaseImportedPointer(heapPointer); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); } HWTEST2_F(HostPointerManagerCommandListTest, givenImmediateCommandListWhenMemoryFillWithSignalAndWaitEventsUsingCopyEngineThenSuccessIsReturned, IsAtLeastSkl) { const ze_command_queue_desc_t desc = {}; bool internalEngine = true; ze_result_t ret = ZE_RESULT_SUCCESS; std::unique_ptr commandList0(CommandList::createImmediate(productFamily, device, &desc, internalEngine, NEO::EngineGroupType::Copy, ret)); ASSERT_NE(nullptr, commandList0); CommandQueueImp *cmdQueue = reinterpret_cast(commandList0->cmdQImmediate); if (neoDevice->getInternalCopyEngine()) { EXPECT_EQ(cmdQueue->getCsr(), neoDevice->getInternalCopyEngine()->commandStreamReceiver); } else { EXPECT_EQ(cmdQueue->getCsr(), neoDevice->getInternalEngine().commandStreamReceiver); } ret = hostDriverHandle->importExternalPointer(heapPointer, MemoryConstants::pageSize); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); int one = 1; size_t size = 16; ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 2; auto eventPool = std::unique_ptr(L0::EventPool::create(hostDriverHandle.get(), context, 0, nullptr, &eventPoolDesc, ret)); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); std::vector events; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event.get()); eventDesc.index = 1; auto event1 = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event1.get()); ret = commandList0->appendMemoryFill(heapPointer, reinterpret_cast(&one), sizeof(one), size, events[0], 1u, &events[1]); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); ret = hostDriverHandle->releaseImportedPointer(heapPointer); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); } HWTEST2_F(HostPointerManagerCommandListTest, givenImmediateCommandListWhenMemoryFillWithSignalAndInvalidWaitHandleUsingCopyEngineThenErrorIsReturned, SupportedPlatformsSklIcllp) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; ze_result_t ret = ZE_RESULT_SUCCESS; const ze_command_queue_desc_t desc = {}; bool internalEngine = true; std::unique_ptr commandList0(CommandList::createImmediate(productFamily, device, &desc, internalEngine, NEO::EngineGroupType::Copy, ret)); ASSERT_NE(nullptr, commandList0); CommandQueueImp *cmdQueue = reinterpret_cast(commandList0->cmdQImmediate); if (neoDevice->getInternalCopyEngine()) { EXPECT_EQ(cmdQueue->getCsr(), neoDevice->getInternalCopyEngine()->commandStreamReceiver); } else { EXPECT_EQ(cmdQueue->getCsr(), neoDevice->getInternalEngine().commandStreamReceiver); } ret = hostDriverHandle->importExternalPointer(heapPointer, MemoryConstants::pageSize); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); int one = 1; size_t size = 16; ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 2; auto eventPool = std::unique_ptr(L0::EventPool::create(hostDriverHandle.get(), context, 0, nullptr, &eventPoolDesc, ret)); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); std::vector events; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event.get()); eventDesc.index = 1; auto event1 = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event1.get()); ret = commandList0->appendMemoryFill(heapPointer, reinterpret_cast(&one), sizeof(one), size, events[0], 1u, nullptr); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, ret); ret = hostDriverHandle->releaseImportedPointer(heapPointer); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); } HWTEST2_F(HostPointerManagerCommandListTest, givenDebugModeToRegisterAllHostPointerWhenFindIsCalledThenRegisterHappens, IsAtLeastSkl) { DebugManagerStateRestore restorer; DebugManager.flags.ForceHostPointerImport.set(1); void *testPtr = heapPointer; auto gfxAllocation = hostDriverHandle->findHostPointerAllocation(testPtr, 0x10u, device->getRootDeviceIndex()); EXPECT_NE(nullptr, gfxAllocation); EXPECT_EQ(testPtr, gfxAllocation->getUnderlyingBuffer()); auto result = hostDriverHandle->releaseImportedPointer(testPtr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } using SingleTileOnlyPlatforms = IsWithinGfxCore; HWTEST2_F(CommandListCreate, givenSingleTileOnlyPlatformsWhenProgrammingMultiTileBarrierThenNoProgrammingIsExpected, SingleTileOnlyPlatforms) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; auto neoDevice = device->getNEODevice(); auto &hwInfo = neoDevice->getHardwareInfo(); auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily>(); ASSERT_NE(nullptr, commandList); ze_result_t returnValue = commandList->initialize(device, NEO::EngineGroupType::Compute, 0u); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); EXPECT_EQ(0u, commandList->estimateBufferSizeMultiTileBarrier(hwInfo)); auto cmdListStream = commandList->commandContainer.getCommandStream(); size_t usedBefore = cmdListStream->getUsed(); commandList->appendMultiTileBarrier(*neoDevice); size_t usedAfter = cmdListStream->getUsed(); EXPECT_EQ(usedBefore, usedAfter); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp000066400000000000000000001174321422164147700324100ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/kernel/kernel_imp.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_builtin_functions_lib_impl_timestamps.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_device_for_spirv.h" #include "level_zero/core/test/unit_tests/mocks/mock_event.h" #include "level_zero/core/test/unit_tests/mocks/mock_kernel.h" #include "level_zero/core/test/unit_tests/mocks/mock_module.h" namespace L0 { namespace ult { using CommandListCreate = Test; HWTEST_F(CommandListCreate, givenCommandListWithInvalidWaitEventArgWhenAppendQueryKernelTimestampsThenProperErrorRetruned) { ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); device->getBuiltinFunctionsLib()->initBuiltinKernel(L0::Builtin::QueryKernelTimestamps); MockEvent event; event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST; event.signalScope = ZE_EVENT_SCOPE_FLAG_HOST; void *alloc; ze_device_mem_alloc_desc_t deviceDesc = {}; auto result = context->allocDeviceMem(device, &deviceDesc, 128, 1, &alloc); EXPECT_EQ(result, ZE_RESULT_SUCCESS); auto eventHandle = event.toHandle(); result = commandList->appendQueryKernelTimestamps(1u, &eventHandle, alloc, nullptr, nullptr, 1u, nullptr); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); context->freeMem(alloc); } struct CmdListHelper { NEO::GraphicsAllocation *isaAllocation = nullptr; NEO::ResidencyContainer residencyContainer; ze_group_count_t threadGroupDimensions; const uint32_t *groupSize = nullptr; uint32_t useOnlyGlobalTimestamp = std::numeric_limits::max(); }; template class MockCommandListForAppendLaunchKernel : public WhiteBox<::L0::CommandListCoreFamily> { public: CmdListHelper cmdListHelper; ze_result_t appendLaunchKernel(ze_kernel_handle_t hKernel, const ze_group_count_t *pThreadGroupDimensions, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override { const auto kernel = Kernel::fromHandle(hKernel); cmdListHelper.isaAllocation = kernel->getIsaAllocation(); cmdListHelper.residencyContainer = kernel->getResidencyContainer(); cmdListHelper.groupSize = kernel->getGroupSize(); cmdListHelper.threadGroupDimensions = *pThreadGroupDimensions; auto kernelName = kernel->getImmutableData()->getDescriptor().kernelMetadata.kernelName; NEO::ArgDescriptor arg; if (kernelName == "QueryKernelTimestamps") { arg = kernel->getImmutableData()->getDescriptor().payloadMappings.explicitArgs[2u]; } else if (kernelName == "QueryKernelTimestampsWithOffsets") { arg = kernel->getImmutableData()->getDescriptor().payloadMappings.explicitArgs[3u]; } else { return ZE_RESULT_SUCCESS; } auto crossThreadData = kernel->getCrossThreadData(); auto element = arg.as().elements[0]; auto pDst = ptrOffset(crossThreadData, element.offset); cmdListHelper.useOnlyGlobalTimestamp = *(uint32_t *)(pDst); return ZE_RESULT_SUCCESS; } }; using AppendQueryKernelTimestamps = CommandListCreate; HWTEST2_F(AppendQueryKernelTimestamps, givenCommandListWhenAppendQueryKernelTimestampsWithoutOffsetsThenProperBuiltinWasAdded, IsAtLeastSkl) { std::unique_ptr> testDevice = std::unique_ptr>(new MockDeviceForSpv(device->getNEODevice(), device->getNEODevice()->getExecutionEnvironment(), driverHandle.get())); testDevice->builtins.reset(new MockBuiltinFunctionsLibImplTimestamps(testDevice.get(), testDevice->getNEODevice()->getBuiltIns())); testDevice->getBuiltinFunctionsLib()->initBuiltinKernel(L0::Builtin::QueryKernelTimestamps); testDevice->getBuiltinFunctionsLib()->initBuiltinKernel(L0::Builtin::QueryKernelTimestampsWithOffsets); device = testDevice.get(); MockCommandListForAppendLaunchKernel commandList; commandList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); MockEvent event; event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST; event.signalScope = ZE_EVENT_SCOPE_FLAG_HOST; void *alloc; ze_device_mem_alloc_desc_t deviceDesc = {}; context->getDevices().insert(std::make_pair(device->toHandle(), device)); auto result = context->allocDeviceMem(device, &deviceDesc, 128, 1, &alloc); EXPECT_EQ(result, ZE_RESULT_SUCCESS); ze_event_handle_t events[2] = {event.toHandle(), event.toHandle()}; result = commandList.appendQueryKernelTimestamps(2u, events, alloc, nullptr, nullptr, 0u, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); bool containsDstPtr = false; bool gpuTimeStampAlloc = false; for (auto &residentGfxAlloc : commandList.cmdListHelper.residencyContainer) { if (residentGfxAlloc != nullptr) { if (residentGfxAlloc->getGpuAddress() == reinterpret_cast(alloc)) { containsDstPtr = true; } if (residentGfxAlloc->getAllocationType() == NEO::AllocationType::GPU_TIMESTAMP_DEVICE_BUFFER) { gpuTimeStampAlloc = true; } } } EXPECT_TRUE(containsDstPtr); EXPECT_TRUE(gpuTimeStampAlloc); EXPECT_EQ(testDevice->getBuiltinFunctionsLib()->getFunction(Builtin::QueryKernelTimestamps)->getIsaAllocation()->getGpuAddress(), commandList.cmdListHelper.isaAllocation->getGpuAddress()); EXPECT_EQ(2u, commandList.cmdListHelper.groupSize[0]); EXPECT_EQ(1u, commandList.cmdListHelper.groupSize[1]); EXPECT_EQ(1u, commandList.cmdListHelper.groupSize[2]); EXPECT_EQ(NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily).useOnlyGlobalTimestamps() ? 1u : 0u, commandList.cmdListHelper.useOnlyGlobalTimestamp); EXPECT_EQ(1u, commandList.cmdListHelper.threadGroupDimensions.groupCountX); EXPECT_EQ(1u, commandList.cmdListHelper.threadGroupDimensions.groupCountY); EXPECT_EQ(1u, commandList.cmdListHelper.threadGroupDimensions.groupCountZ); context->freeMem(alloc); } HWTEST2_F(AppendQueryKernelTimestamps, givenCommandListWhenAppendQueryKernelTimestampsWithOffsetsThenProperBuiltinWasAdded, IsAtLeastSkl) { std::unique_ptr> testDevice = std::unique_ptr>(new MockDeviceForSpv(device->getNEODevice(), device->getNEODevice()->getExecutionEnvironment(), driverHandle.get())); testDevice->builtins.reset(new MockBuiltinFunctionsLibImplTimestamps(testDevice.get(), testDevice->getNEODevice()->getBuiltIns())); testDevice->getBuiltinFunctionsLib()->initBuiltinKernel(L0::Builtin::QueryKernelTimestamps); testDevice->getBuiltinFunctionsLib()->initBuiltinKernel(L0::Builtin::QueryKernelTimestampsWithOffsets); device = testDevice.get(); MockCommandListForAppendLaunchKernel commandList; commandList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); MockEvent event; event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST; event.signalScope = ZE_EVENT_SCOPE_FLAG_HOST; void *alloc; ze_device_mem_alloc_desc_t deviceDesc = {}; context->getDevices().insert(std::make_pair(device->toHandle(), device)); auto result = context->allocDeviceMem(device, &deviceDesc, 128, 1, &alloc); EXPECT_EQ(result, ZE_RESULT_SUCCESS); void *offsetAlloc; result = context->allocDeviceMem(device, &deviceDesc, 128, 1, &offsetAlloc); EXPECT_EQ(result, ZE_RESULT_SUCCESS); ze_event_handle_t events[2] = {event.toHandle(), event.toHandle()}; auto offsetSizes = reinterpret_cast(offsetAlloc); result = commandList.appendQueryKernelTimestamps(2u, events, alloc, offsetSizes, nullptr, 0u, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); bool containsDstPtr = false; for (auto &a : commandList.cmdListHelper.residencyContainer) { if (a != nullptr && a->getGpuAddress() == reinterpret_cast(alloc)) { containsDstPtr = true; } } EXPECT_TRUE(containsDstPtr); bool containOffsetPtr = false; for (auto &a : commandList.cmdListHelper.residencyContainer) { if (a != nullptr && a->getGpuAddress() == reinterpret_cast(offsetAlloc)) { containOffsetPtr = true; } } EXPECT_TRUE(containOffsetPtr); EXPECT_EQ(device->getBuiltinFunctionsLib()->getFunction(Builtin::QueryKernelTimestampsWithOffsets)->getIsaAllocation()->getGpuAddress(), commandList.cmdListHelper.isaAllocation->getGpuAddress()); EXPECT_EQ(2u, commandList.cmdListHelper.groupSize[0]); EXPECT_EQ(1u, commandList.cmdListHelper.groupSize[1]); EXPECT_EQ(1u, commandList.cmdListHelper.groupSize[2]); EXPECT_EQ(NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily).useOnlyGlobalTimestamps() ? 1u : 0u, commandList.cmdListHelper.useOnlyGlobalTimestamp); EXPECT_EQ(1u, commandList.cmdListHelper.threadGroupDimensions.groupCountX); EXPECT_EQ(1u, commandList.cmdListHelper.threadGroupDimensions.groupCountY); EXPECT_EQ(1u, commandList.cmdListHelper.threadGroupDimensions.groupCountZ); context->freeMem(alloc); context->freeMem(offsetAlloc); } HWTEST2_F(AppendQueryKernelTimestamps, givenCommandListWhenAppendQueryKernelTimestampsWithEventsNumberBiggerThanMaxWorkItemSizeThenProperGroupSizeAndGroupCountIsSet, IsAtLeastSkl) { std::unique_ptr> testDevice = std::unique_ptr>(new MockDeviceForSpv(device->getNEODevice(), device->getNEODevice()->getExecutionEnvironment(), driverHandle.get())); testDevice->builtins.reset(new MockBuiltinFunctionsLibImplTimestamps(testDevice.get(), testDevice->getNEODevice()->getBuiltIns())); testDevice->getBuiltinFunctionsLib()->initBuiltinKernel(L0::Builtin::QueryKernelTimestamps); device = testDevice.get(); MockCommandListForAppendLaunchKernel commandList; commandList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); MockEvent event; event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST; event.signalScope = ZE_EVENT_SCOPE_FLAG_HOST; void *alloc; ze_device_mem_alloc_desc_t deviceDesc = {}; context->getDevices().insert(std::make_pair(device->toHandle(), device)); auto result = context->allocDeviceMem(device, &deviceDesc, 128, 1, &alloc); EXPECT_EQ(result, ZE_RESULT_SUCCESS); size_t eventCount = device->getNEODevice()->getDeviceInfo().maxWorkItemSizes[0] * 2u; std::unique_ptr events = std::make_unique(eventCount); for (size_t i = 0u; i < eventCount; ++i) { events[i] = event.toHandle(); } result = commandList.appendQueryKernelTimestamps(static_cast(eventCount), events.get(), alloc, nullptr, nullptr, 0u, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(device->getBuiltinFunctionsLib()->getFunction(Builtin::QueryKernelTimestamps)->getIsaAllocation()->getGpuAddress(), commandList.cmdListHelper.isaAllocation->getGpuAddress()); uint32_t groupSizeX = static_cast(eventCount); uint32_t groupSizeY = 1u; uint32_t groupSizeZ = 1u; device->getBuiltinFunctionsLib()->getFunction(Builtin::QueryKernelTimestamps)->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, &groupSizeY, &groupSizeZ); EXPECT_EQ(groupSizeX, commandList.cmdListHelper.groupSize[0]); EXPECT_EQ(groupSizeY, commandList.cmdListHelper.groupSize[1]); EXPECT_EQ(groupSizeZ, commandList.cmdListHelper.groupSize[2]); EXPECT_EQ(NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily).useOnlyGlobalTimestamps() ? 1u : 0u, commandList.cmdListHelper.useOnlyGlobalTimestamp); EXPECT_EQ(static_cast(eventCount) / groupSizeX, commandList.cmdListHelper.threadGroupDimensions.groupCountX); EXPECT_EQ(1u, commandList.cmdListHelper.threadGroupDimensions.groupCountY); EXPECT_EQ(1u, commandList.cmdListHelper.threadGroupDimensions.groupCountZ); context->freeMem(alloc); } HWTEST2_F(AppendQueryKernelTimestamps, givenCommandListWhenAppendQueryKernelTimestampsAndInvalidResultSuggestGroupSizeThenUnknownResultReturned, IsAtLeastSkl) { class MockQueryKernelTimestampsKernel : public L0::KernelImp { public: ze_result_t suggestGroupSize(uint32_t globalSizeX, uint32_t globalSizeY, uint32_t globalSizeZ, uint32_t *groupSizeX, uint32_t *groupSizeY, uint32_t *groupSizeZ) override { return ZE_RESULT_ERROR_UNKNOWN; } void setBufferSurfaceState(uint32_t argIndex, void *address, NEO::GraphicsAllocation *alloc) override { return; } void evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor) override { return; } }; struct MockBuiltinFunctionsLibImpl : BuiltinFunctionsLibImpl { using BuiltinFunctionsLibImpl::builtins; using BuiltinFunctionsLibImpl::getFunction; using BuiltinFunctionsLibImpl::imageBuiltins; MockBuiltinFunctionsLibImpl(L0::Device *device, NEO::BuiltIns *builtInsLib) : BuiltinFunctionsLibImpl(device, builtInsLib) {} }; struct MockBuiltinFunctionsForQueryKernelTimestamps : BuiltinFunctionsLibImpl { MockBuiltinFunctionsForQueryKernelTimestamps(L0::Device *device, NEO::BuiltIns *builtInsLib) : BuiltinFunctionsLibImpl(device, builtInsLib) { tmpMockKernel = new MockQueryKernelTimestampsKernel; } MockQueryKernelTimestampsKernel *getFunction(Builtin func) override { return tmpMockKernel; } ~MockBuiltinFunctionsForQueryKernelTimestamps() override { delete tmpMockKernel; } MockQueryKernelTimestampsKernel *tmpMockKernel = nullptr; }; class MockDeviceHandle : public L0::DeviceImp { public: MockDeviceHandle() { } void initialize(L0::Device *device) { neoDevice = device->getNEODevice(); neoDevice->incRefInternal(); execEnvironment = device->getExecEnvironment(); driverHandle = device->getDriverHandle(); tmpMockBultinLib = new MockBuiltinFunctionsForQueryKernelTimestamps{nullptr, nullptr}; } MockBuiltinFunctionsForQueryKernelTimestamps *getBuiltinFunctionsLib() override { return tmpMockBultinLib; } ~MockDeviceHandle() override { delete tmpMockBultinLib; } MockBuiltinFunctionsForQueryKernelTimestamps *tmpMockBultinLib = nullptr; }; MockDeviceHandle mockDevice; mockDevice.initialize(device); MockCommandListForAppendLaunchKernel commandList; commandList.initialize(&mockDevice, NEO::EngineGroupType::RenderCompute, 0u); MockEvent event; ze_event_handle_t events[2] = {event.toHandle(), event.toHandle()}; event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST; event.signalScope = ZE_EVENT_SCOPE_FLAG_HOST; void *alloc; ze_device_mem_alloc_desc_t deviceDesc = {}; context->getDevices().insert(std::make_pair(mockDevice.toHandle(), &mockDevice)); auto result = context->allocDeviceMem(&mockDevice, &deviceDesc, 128, 1, &alloc); EXPECT_EQ(result, ZE_RESULT_SUCCESS); result = commandList.appendQueryKernelTimestamps(2u, events, alloc, nullptr, nullptr, 0u, nullptr); EXPECT_EQ(ZE_RESULT_ERROR_UNKNOWN, result); context->freeMem(alloc); } HWTEST2_F(AppendQueryKernelTimestamps, givenCommandListWhenAppendQueryKernelTimestampsAndInvalidResultSetGroupSizeThenUnknownResultReturned, IsAtLeastSkl) { class MockQueryKernelTimestampsKernel : public L0::KernelImp { public: ze_result_t suggestGroupSize(uint32_t globalSizeX, uint32_t globalSizeY, uint32_t globalSizeZ, uint32_t *groupSizeX, uint32_t *groupSizeY, uint32_t *groupSizeZ) override { *groupSizeX = static_cast(1u); *groupSizeY = static_cast(1u); *groupSizeZ = static_cast(1u); return ZE_RESULT_SUCCESS; } ze_result_t setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY, uint32_t groupSizeZ) override { return ZE_RESULT_ERROR_UNKNOWN; } void setBufferSurfaceState(uint32_t argIndex, void *address, NEO::GraphicsAllocation *alloc) override { return; } void evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor) override { return; } }; struct MockBuiltinFunctionsLibImpl : BuiltinFunctionsLibImpl { using BuiltinFunctionsLibImpl::builtins; using BuiltinFunctionsLibImpl::getFunction; using BuiltinFunctionsLibImpl::imageBuiltins; MockBuiltinFunctionsLibImpl(L0::Device *device, NEO::BuiltIns *builtInsLib) : BuiltinFunctionsLibImpl(device, builtInsLib) {} }; struct MockBuiltinFunctionsForQueryKernelTimestamps : BuiltinFunctionsLibImpl { MockBuiltinFunctionsForQueryKernelTimestamps(L0::Device *device, NEO::BuiltIns *builtInsLib) : BuiltinFunctionsLibImpl(device, builtInsLib) { tmpMockKernel = new MockQueryKernelTimestampsKernel; } MockQueryKernelTimestampsKernel *getFunction(Builtin func) override { return tmpMockKernel; } ~MockBuiltinFunctionsForQueryKernelTimestamps() override { delete tmpMockKernel; } MockQueryKernelTimestampsKernel *tmpMockKernel = nullptr; }; class MockDeviceHandle : public L0::DeviceImp { public: MockDeviceHandle() { } void initialize(L0::Device *device) { neoDevice = device->getNEODevice(); neoDevice->incRefInternal(); execEnvironment = device->getExecEnvironment(); driverHandle = device->getDriverHandle(); tmpMockBultinLib = new MockBuiltinFunctionsForQueryKernelTimestamps{nullptr, nullptr}; } MockBuiltinFunctionsForQueryKernelTimestamps *getBuiltinFunctionsLib() override { return tmpMockBultinLib; } ~MockDeviceHandle() override { delete tmpMockBultinLib; } MockBuiltinFunctionsForQueryKernelTimestamps *tmpMockBultinLib = nullptr; }; MockDeviceHandle mockDevice; mockDevice.initialize(device); MockCommandListForAppendLaunchKernel commandList; commandList.initialize(&mockDevice, NEO::EngineGroupType::RenderCompute, 0u); MockEvent event; ze_event_handle_t events[2] = {event.toHandle(), event.toHandle()}; event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST; event.signalScope = ZE_EVENT_SCOPE_FLAG_HOST; void *alloc; ze_device_mem_alloc_desc_t deviceDesc = {}; context->getDevices().insert(std::make_pair(mockDevice.toHandle(), &mockDevice)); auto result = context->allocDeviceMem(&mockDevice, &deviceDesc, 128, 1, &alloc); EXPECT_EQ(result, ZE_RESULT_SUCCESS); result = commandList.appendQueryKernelTimestamps(2u, events, alloc, nullptr, nullptr, 0u, nullptr); EXPECT_EQ(ZE_RESULT_ERROR_UNKNOWN, result); context->freeMem(alloc); } HWTEST2_F(AppendQueryKernelTimestamps, givenEventWhenAppendQueryIsCalledThenSetAllEventData, IsAtLeastSkl) { class MockQueryKernelTimestampsKernel : public L0::KernelImp { public: MockQueryKernelTimestampsKernel(L0::Module *module) : KernelImp(module) { mockKernelImmutableData.kernelDescriptor = &mockKernelDescriptor; this->kernelImmData = &mockKernelImmutableData; } ze_result_t setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation) override { if (argIndex == 0) { index0Allocation = allocation; } return ZE_RESULT_SUCCESS; } void setBufferSurfaceState(uint32_t argIndex, void *address, NEO::GraphicsAllocation *alloc) override { return; } void evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor) override { return; } NEO::GraphicsAllocation *index0Allocation = nullptr; KernelDescriptor mockKernelDescriptor = {}; WhiteBox<::L0::KernelImmutableData> mockKernelImmutableData = {}; }; struct MockBuiltinFunctionsForQueryKernelTimestamps : BuiltinFunctionsLibImpl { MockBuiltinFunctionsForQueryKernelTimestamps(L0::Device *device, NEO::BuiltIns *builtInsLib) : BuiltinFunctionsLibImpl(device, builtInsLib) { tmpModule = std::make_unique(device, nullptr, ModuleType::Builtin); tmpMockKernel = std::make_unique(static_cast(tmpModule.get())); } MockQueryKernelTimestampsKernel *getFunction(Builtin func) override { return tmpMockKernel.get(); } std::unique_ptr tmpModule; std::unique_ptr tmpMockKernel; }; class MockDeviceHandle : public L0::DeviceImp { public: MockDeviceHandle() { } void initialize(L0::Device *device) { neoDevice = device->getNEODevice(); neoDevice->incRefInternal(); execEnvironment = device->getExecEnvironment(); driverHandle = device->getDriverHandle(); tmpMockBultinLib = std::make_unique(this, nullptr); } MockBuiltinFunctionsForQueryKernelTimestamps *getBuiltinFunctionsLib() override { return tmpMockBultinLib.get(); } std::unique_ptr tmpMockBultinLib; }; MockDeviceHandle mockDevice; mockDevice.initialize(device); MockCommandListForAppendLaunchKernel commandList; commandList.initialize(&mockDevice, NEO::EngineGroupType::RenderCompute, 0u); MockEvent event; ze_event_handle_t events[2] = {event.toHandle(), event.toHandle()}; event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST; event.signalScope = ZE_EVENT_SCOPE_FLAG_HOST; void *alloc; ze_device_mem_alloc_desc_t deviceDesc = {}; context->getDevices().insert(std::make_pair(mockDevice.toHandle(), &mockDevice)); auto result = context->allocDeviceMem(&mockDevice, &deviceDesc, 128, 1, &alloc); EXPECT_EQ(result, ZE_RESULT_SUCCESS); result = commandList.appendQueryKernelTimestamps(2u, events, alloc, nullptr, nullptr, 0u, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto index0Allocation = mockDevice.tmpMockBultinLib->tmpMockKernel->index0Allocation; EXPECT_NE(nullptr, index0Allocation); EventData *eventData = reinterpret_cast(index0Allocation->getUnderlyingBuffer()); EXPECT_EQ(eventData[0].address, event.getGpuAddress(&mockDevice)); EXPECT_EQ(eventData[0].packetsInUse, event.getPacketsInUse()); EXPECT_EQ(eventData[0].timestampSizeInDw, event.getTimestampSizeInDw()); EXPECT_EQ(eventData[1].address, event.getGpuAddress(&mockDevice)); EXPECT_EQ(eventData[1].packetsInUse, event.getPacketsInUse()); EXPECT_EQ(eventData[1].timestampSizeInDw, event.getTimestampSizeInDw()); context->freeMem(alloc); } HWTEST_F(CommandListCreate, givenCommandListWithCopyOnlyWhenAppendSignalEventThenMiFlushDWIsProgrammed) { using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Copy, 0u, returnValue)); auto &commandContainer = commandList->commandContainer; MockEvent event; event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST; event.signalScope = ZE_EVENT_SCOPE_FLAG_HOST; commandList->appendSignalEvent(event.toHandle()); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); } HWTEST_F(CommandListCreate, givenCommandListWhenAppendSignalEventWithScopeThenPipeControlIsProgrammed) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); auto &commandContainer = commandList->commandContainer; MockEvent event; event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST; event.signalScope = ZE_EVENT_SCOPE_FLAG_HOST; commandList->appendSignalEvent(event.toHandle()); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); } HWTEST_F(CommandListCreate, givenCommandListWithCopyOnlyWhenAppendWaitEventsWithDcFlushThenMiFlushDWIsProgrammed) { using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Copy, 0u, returnValue)); auto &commandContainer = commandList->commandContainer; MockEvent event; event.signalScope = 0; event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST; auto eventHandle = event.toHandle(); commandList->appendWaitOnEvents(1, &eventHandle); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); if (MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo)) { EXPECT_NE(cmdList.end(), itor); } else { EXPECT_EQ(cmdList.end(), itor); } } HWTEST_F(CommandListCreate, givenCommandListyWhenAppendWaitEventsWithDcFlushThenPipeControlIsProgrammed) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); auto &commandContainer = commandList->commandContainer; MockEvent event; event.signalScope = 0; event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST; auto eventHandle = event.toHandle(); commandList->appendWaitOnEvents(1, &eventHandle); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); } HWTEST_F(CommandListCreate, givenCommandListWhenAppendWaitEventsWithDcFlushThenPipeControlIsProgrammedOnlyOnce) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); auto &commandContainer = commandList->commandContainer; MockEvent event, event2; event.signalScope = 0; event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST; event2.waitScope = ZE_EVENT_SCOPE_FLAG_HOST; ze_event_handle_t events[] = {&event, &event2}; commandList->appendWaitOnEvents(2, events); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); itor++; auto itor2 = find(itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor2); } HWTEST_F(CommandListCreate, givenFlushTaskFlagEnabledAndAsyncCmdQueueAndCopyOnlyImmediateCommandListWhenAppendWaitEventsWithHostScopeThenMiFlushAndSemWaitAreAdded) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true); using SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; ze_command_queue_desc_t desc = {}; desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; ze_result_t returnValue; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::Copy, returnValue)); ASSERT_NE(nullptr, commandList); EXPECT_EQ(device, commandList->device); EXPECT_EQ(1u, commandList->cmdListType); EXPECT_NE(nullptr, commandList->cmdQImmediate); auto &commandContainer = commandList->commandContainer; MockEvent event, event2; event.signalScope = 0; event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST; event2.waitScope = 0; ze_event_handle_t events[] = {&event, &event2}; auto used = commandContainer.getCommandStream()->getUsed(); commandList->appendWaitOnEvents(2, events); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(cmdList.end(), itor); EXPECT_EQ(used, commandContainer.getCommandStream()->getUsed()); } HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndCopyOnlyImmediateCommandListWhenAppendWaitEventsWithSubdeviceScopeThenMiFlushAndSemWaitAreAddedViaFlushTask) { using SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; ze_command_queue_desc_t desc = {}; desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; ze_result_t returnValue; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::Copy, returnValue)); ASSERT_NE(nullptr, commandList); EXPECT_EQ(device, commandList->device); EXPECT_EQ(1u, commandList->cmdListType); EXPECT_NE(nullptr, commandList->cmdQImmediate); auto &commandContainer = commandList->commandContainer; MockEvent event, event2; event.signalScope = 0; event.waitScope = 0; event2.waitScope = 0; ze_event_handle_t events[] = {&event, &event2}; auto used = commandContainer.getCommandStream()->getUsed(); commandList->appendWaitOnEvents(2, events); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(cmdList.end(), itor); EXPECT_EQ(used, commandContainer.getCommandStream()->getUsed()); } HWTEST_F(CommandListCreate, givenFlushTaskFlagEnabledAndAsyncCmdQueueWithCopyOnlyImmediateCommandListCreatedThenSlushTaskSubmissionIsSetToFalse) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true); ze_command_queue_desc_t desc = {}; desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; ze_result_t returnValue; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::Copy, returnValue)); ASSERT_NE(nullptr, commandList); EXPECT_EQ(false, commandList->isFlushTaskSubmissionEnabled); } HWTEST2_F(CommandListCreate, givenIndirectAccessFlagsAreChangedWhenResetingCommandListThenExpectAllFlagsSetToDefault, IsAtLeastSkl) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily>(); ASSERT_NE(nullptr, commandList); ze_result_t returnValue = commandList->initialize(device, NEO::EngineGroupType::Compute, 0u); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); EXPECT_FALSE(commandList->indirectAllocationsAllowed); EXPECT_FALSE(commandList->unifiedMemoryControls.indirectHostAllocationsAllowed); EXPECT_FALSE(commandList->unifiedMemoryControls.indirectSharedAllocationsAllowed); EXPECT_FALSE(commandList->unifiedMemoryControls.indirectDeviceAllocationsAllowed); commandList->indirectAllocationsAllowed = true; commandList->unifiedMemoryControls.indirectHostAllocationsAllowed = true; commandList->unifiedMemoryControls.indirectSharedAllocationsAllowed = true; commandList->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true; returnValue = commandList->reset(); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); EXPECT_FALSE(commandList->indirectAllocationsAllowed); EXPECT_FALSE(commandList->unifiedMemoryControls.indirectHostAllocationsAllowed); EXPECT_FALSE(commandList->unifiedMemoryControls.indirectSharedAllocationsAllowed); EXPECT_FALSE(commandList->unifiedMemoryControls.indirectDeviceAllocationsAllowed); } HWTEST2_F(CommandListCreate, whenContainsCooperativeKernelsIsCalledThenCorrectValueIsReturned, IsAtLeastSkl) { for (auto testValue : ::testing::Bool()) { MockCommandListForAppendLaunchKernel commandList; commandList.initialize(device, NEO::EngineGroupType::Compute, 0u); commandList.containsCooperativeKernelsFlag = testValue; EXPECT_EQ(testValue, commandList.containsCooperativeKernels()); commandList.reset(); EXPECT_FALSE(commandList.containsCooperativeKernels()); } } HWTEST_F(CommandListCreate, GivenSingleTileDeviceWhenCommandListIsResetThenPartitionCountIsReversedToOne) { ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Compute, 0u, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); EXPECT_EQ(1u, commandList->partitionCount); returnValue = commandList->reset(); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); EXPECT_EQ(1u, commandList->partitionCount); } HWTEST_F(CommandListCreate, WhenReservingSpaceThenCommandsAddedToBatchBuffer) { ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); ASSERT_NE(nullptr, commandList); ASSERT_NE(nullptr, commandList->commandContainer.getCommandStream()); auto commandStream = commandList->commandContainer.getCommandStream(); auto usedSpaceBefore = commandStream->getUsed(); using MI_NOOP = typename FamilyType::MI_NOOP; MI_NOOP cmd = FamilyType::cmdInitNoop; uint32_t uniqueIDforTest = 0x12345u; cmd.setIdentificationNumber(uniqueIDforTest); size_t sizeToReserveForCommand = sizeof(cmd); void *ptrToReservedMemory = nullptr; returnValue = commandList->reserveSpace(sizeToReserveForCommand, &ptrToReservedMemory); ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue); if (ptrToReservedMemory != nullptr) { *reinterpret_cast(ptrToReservedMemory) = cmd; } auto usedSpaceAfter = commandStream->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, commandStream->getCpuBase(), usedSpaceAfter)); auto itor = cmdList.begin(); while (itor != cmdList.end()) { using MI_NOOP = typename FamilyType::MI_NOOP; itor = find(itor, cmdList.end()); if (itor == cmdList.end()) break; auto cmd = genCmdCast(*itor); if (uniqueIDforTest == cmd->getIdentificationNumber()) { break; } itor++; } ASSERT_NE(itor, cmdList.end()); } TEST_F(CommandListCreate, givenOrdinalBiggerThanAvailableEnginesWhenCreatingCommandListThenInvalidArgumentErrorIsReturned) { auto numAvailableEngineGroups = static_cast(neoDevice->getRegularEngineGroups().size()); ze_command_list_handle_t commandList = nullptr; ze_command_list_desc_t desc = {ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC}; desc.commandQueueGroupOrdinal = numAvailableEngineGroups; auto returnValue = device->createCommandList(&desc, &commandList); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, returnValue); EXPECT_EQ(nullptr, commandList); ze_command_queue_desc_t desc2 = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC}; desc2.ordinal = numAvailableEngineGroups; desc2.index = 0; returnValue = device->createCommandListImmediate(&desc2, &commandList); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, returnValue); EXPECT_EQ(nullptr, commandList); desc2.ordinal = 0; desc2.index = 0x1000; returnValue = device->createCommandListImmediate(&desc2, &commandList); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, returnValue); EXPECT_EQ(nullptr, commandList); } TEST_F(CommandListCreate, givenRootDeviceAndImplicitScalingDisabledWhenCreatingCommandListThenValidateQueueOrdinalUsingSubDeviceEngines) { NEO::UltDeviceFactory deviceFactory{1, 2}; auto &rootDevice = *deviceFactory.rootDevices[0]; auto &subDevice0 = *deviceFactory.subDevices[0]; rootDevice.regularEngineGroups.resize(1); subDevice0.getRegularEngineGroups().push_back(NEO::Device::EngineGroupT{}); subDevice0.getRegularEngineGroups().back().engineGroupType = EngineGroupType::Compute; subDevice0.getRegularEngineGroups().back().engines.resize(1); subDevice0.getRegularEngineGroups().back().engines[0].commandStreamReceiver = &rootDevice.getGpgpuCommandStreamReceiver(); auto ordinal = static_cast(subDevice0.getRegularEngineGroups().size() - 1); Mock l0RootDevice(&rootDevice, rootDevice.getExecutionEnvironment()); ze_command_list_handle_t commandList = nullptr; ze_command_list_desc_t cmdDesc = {ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC}; cmdDesc.commandQueueGroupOrdinal = ordinal; ze_command_queue_desc_t queueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC}; queueDesc.ordinal = ordinal; queueDesc.index = 0; l0RootDevice.driverHandle = driverHandle.get(); l0RootDevice.implicitScalingCapable = true; auto returnValue = l0RootDevice.createCommandList(&cmdDesc, &commandList); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, returnValue); EXPECT_EQ(nullptr, commandList); returnValue = l0RootDevice.createCommandListImmediate(&queueDesc, &commandList); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, returnValue); EXPECT_EQ(nullptr, commandList); l0RootDevice.implicitScalingCapable = false; returnValue = l0RootDevice.createCommandList(&cmdDesc, &commandList); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); EXPECT_NE(nullptr, commandList); L0::CommandList::fromHandle(commandList)->destroy(); commandList = nullptr; returnValue = l0RootDevice.createCommandListImmediate(&queueDesc, &commandList); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); EXPECT_NE(nullptr, commandList); L0::CommandList::fromHandle(commandList)->destroy(); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp000066400000000000000000000141021422164147700323770ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" namespace L0 { namespace ult { using MultiTileImmediateCommandListTest = Test>; HWTEST2_F(MultiTileImmediateCommandListTest, GivenMultiTileDeviceWhenCreatingImmediateCommandListThenExpectPartitionCountMatchTileCount, IsWithinXeGfxFamily) { EXPECT_EQ(2u, device->getNEODevice()->getDeviceBitfield().count()); EXPECT_EQ(2u, commandList->partitionCount); auto returnValue = commandList->reset(); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); EXPECT_EQ(2u, commandList->partitionCount); } using MultiTileImmediateInternalCommandListTest = Test>; HWTEST2_F(MultiTileImmediateInternalCommandListTest, GivenMultiTileDeviceWhenCreatingInternalImmediateCommandListThenExpectPartitionCountEqualOne, IsWithinXeGfxFamily) { EXPECT_EQ(2u, device->getNEODevice()->getDeviceBitfield().count()); EXPECT_EQ(1u, commandList->partitionCount); auto returnValue = commandList->reset(); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); EXPECT_EQ(1u, commandList->partitionCount); } using MultiTileCopyEngineCommandListTest = Test>; HWTEST2_F(MultiTileCopyEngineCommandListTest, GivenMultiTileDeviceWhenCreatingCopyEngineCommandListThenExpectPartitionCountEqualOne, IsWithinXeGfxFamily) { EXPECT_EQ(2u, device->getNEODevice()->getDeviceBitfield().count()); EXPECT_EQ(1u, commandList->partitionCount); auto returnValue = commandList->reset(); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); EXPECT_EQ(1u, commandList->partitionCount); } using CommandListExecuteImmediate = Test; HWTEST2_F(CommandListExecuteImmediate, whenExecutingCommandListImmediateWithFlushTaskThenRequiredStreamStateIsCorrectlyReported, IsAtLeastSkl) { auto &hwHelper = NEO::HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily); auto &hwInfoConfig = *NEO::HwInfoConfig::get(defaultHwInfo->platform.eProductFamily); std::unique_ptr commandList; const ze_command_queue_desc_t desc = {}; ze_result_t returnValue; commandList.reset(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); auto &commandListImmediate = static_cast &>(*commandList); auto ¤tCsrStreamProperties = commandListImmediate.csr->getStreamProperties(); commandListImmediate.requiredStreamState.frontEndState.computeDispatchAllWalkerEnable.value = 1; commandListImmediate.requiredStreamState.frontEndState.disableEUFusion.value = 1; commandListImmediate.requiredStreamState.frontEndState.disableOverdispatch.value = 1; commandListImmediate.requiredStreamState.stateComputeMode.isCoherencyRequired.value = 1; commandListImmediate.requiredStreamState.stateComputeMode.largeGrfMode.value = 1; commandListImmediate.requiredStreamState.stateComputeMode.threadArbitrationPolicy.value = NEO::ThreadArbitrationPolicy::RoundRobin; commandListImmediate.executeCommandListImmediateWithFlushTask(false); int expectedDisableOverdispatch = hwInfoConfig.isDisableOverdispatchAvailable(*defaultHwInfo); bool expectedIsCoherencyRequired = hwHelper.forceNonGpuCoherencyWA(true); EXPECT_EQ(1, currentCsrStreamProperties.frontEndState.computeDispatchAllWalkerEnable.value); EXPECT_EQ(1, currentCsrStreamProperties.frontEndState.disableEUFusion.value); EXPECT_EQ(expectedDisableOverdispatch, currentCsrStreamProperties.frontEndState.disableOverdispatch.value); EXPECT_EQ(expectedIsCoherencyRequired, currentCsrStreamProperties.stateComputeMode.isCoherencyRequired.value); EXPECT_EQ(1, currentCsrStreamProperties.stateComputeMode.largeGrfMode.value); EXPECT_EQ(NEO::ThreadArbitrationPolicy::RoundRobin, currentCsrStreamProperties.stateComputeMode.threadArbitrationPolicy.value); commandListImmediate.requiredStreamState.frontEndState.computeDispatchAllWalkerEnable.value = 0; commandListImmediate.requiredStreamState.frontEndState.disableEUFusion.value = 0; commandListImmediate.requiredStreamState.frontEndState.disableOverdispatch.value = 0; commandListImmediate.requiredStreamState.stateComputeMode.isCoherencyRequired.value = 0; commandListImmediate.requiredStreamState.stateComputeMode.largeGrfMode.value = 0; commandListImmediate.requiredStreamState.stateComputeMode.threadArbitrationPolicy.value = NEO::ThreadArbitrationPolicy::AgeBased; commandListImmediate.executeCommandListImmediateWithFlushTask(false); EXPECT_EQ(0, currentCsrStreamProperties.frontEndState.computeDispatchAllWalkerEnable.value); EXPECT_EQ(0, currentCsrStreamProperties.frontEndState.disableEUFusion.value); EXPECT_EQ(0, currentCsrStreamProperties.frontEndState.disableOverdispatch.value); EXPECT_EQ(0, currentCsrStreamProperties.stateComputeMode.isCoherencyRequired.value); EXPECT_EQ(0, currentCsrStreamProperties.stateComputeMode.largeGrfMode.value); EXPECT_EQ(NEO::ThreadArbitrationPolicy::AgeBased, currentCsrStreamProperties.stateComputeMode.threadArbitrationPolicy.value); } HWTEST2_F(CommandListExecuteImmediate, whenExecutingCommandListImmediateWithFlushTaskThenContainsAnyKernelFlagIsReset, IsAtLeastSkl) { std::unique_ptr commandList; const ze_command_queue_desc_t desc = {}; ze_result_t returnValue; commandList.reset(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); auto &commandListImmediate = static_cast &>(*commandList); commandListImmediate.containsAnyKernel = true; commandListImmediate.executeCommandListImmediateWithFlushTask(false); EXPECT_FALSE(commandListImmediate.containsAnyKernel); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_api.cpp000066400000000000000000000074621422164147700330160ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_kernel.h" #include namespace L0 { namespace ult { TEST(zeCommandListClose, whenCalledThenRedirectedToObject) { MockCommandList commandList; auto result = zeCommandListClose(commandList.toHandle()); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST(zeCommandListAppendMemoryPrefetch, whenCalledThenRedirectedToObject) { MockCommandList commandList; auto res = zeCommandListAppendMemoryPrefetch(&commandList, reinterpret_cast(0x1000), 0x1000); ASSERT_EQ(ZE_RESULT_SUCCESS, res); } using zeCommandListAppendMemAdviseTest = Test; TEST_F(zeCommandListAppendMemAdviseTest, whenCalledThenRedirectedToObject) { MockCommandList commandList; auto res = zeCommandListAppendMemAdvise(&commandList, device->toHandle(), reinterpret_cast(0x1000), 0x1000, ZE_MEMORY_ADVICE_BIAS_CACHED); ASSERT_EQ(ZE_RESULT_SUCCESS, res); } TEST(zeCommandListAppendMemoryCopy, whenCalledThenRedirectedToObject) { MockCommandList commandList; auto res = zeCommandListAppendMemoryCopy(&commandList, reinterpret_cast(0x2000), reinterpret_cast(0x1000), 0x1000, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, res); } TEST(zeCommandListAppendMemoryFill, whenCalledThenRedirectedToObject) { MockCommandList commandList; size_t bufferSize = 4096u; int value = 0; auto res = zeCommandListAppendMemoryFill(&commandList, reinterpret_cast(0x1000), reinterpret_cast(&value), sizeof(value), bufferSize, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, res); } TEST(zeCommandListAppendWaitOnEvent, whenCalledThenRedirectedToObject) { MockCommandList commandList; ze_event_handle_t event = reinterpret_cast(0x2000); auto result = zeCommandListAppendWaitOnEvents(commandList.toHandle(), 1, &event); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST(zeCommandListAppendWriteGlobalTimestamp, whenCalledThenRedirectedToObject) { MockCommandList commandList; auto result = zeCommandListAppendWriteGlobalTimestamp(commandList.toHandle(), nullptr, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST(zeCommandListAppendLaunchKernel, whenCalledThenRedirectedToObject) { MockCommandList commandList; Mock<::L0::Kernel> kernel; ze_group_count_t dispatchFunctionArguments; auto result = zeCommandListAppendLaunchKernel(commandList.toHandle(), kernel.toHandle(), &dispatchFunctionArguments, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST(zeCommandListAppendEventReset, whenCalledThenRedirectedToObject) { MockCommandList commandList; ze_event_handle_t event = reinterpret_cast(0x2000); auto result = zeCommandListAppendEventReset(commandList.toHandle(), event); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST(zeCommandListAppendExecutionBarrier, whenCalledThenRedirectedToObject) { MockCommandList commandList; auto result = zeCommandListAppendBarrier(commandList.toHandle(), nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST(zeCommandListAppendSignalEvent, WhenAppendingSignalEventThenSuccessIsReturned) { MockCommandList commandList; ze_event_handle_t event = reinterpret_cast(0x2000); auto result = zeCommandListAppendSignalEvent(commandList.toHandle(), event); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } } // namespace ult } // namespace L0 test_cmdlist_append_barrier.cpp000066400000000000000000000432671422164147700351460ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/cmdlist/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/command_encoder.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h" #include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" namespace L0 { namespace ult { using CommandListAppendBarrier = Test; HWTEST_F(CommandListAppendBarrier, WhenAppendingBarrierThenPipeControlIsGenerated) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed(); auto result = commandList->appendBarrier(nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), usedSpaceBefore), usedSpaceAfter - usedSpaceBefore)); // Find a PC w/ CS stall auto itorPC = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itorPC); auto cmd = genCmdCast(*itorPC); EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); EXPECT_FALSE(cmd->getDcFlushEnable()); } HWTEST_F(CommandListAppendBarrier, GivenEventVsNoEventWhenAppendingBarrierThenCorrectPipeControlsIsAddedToCommandStream) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed(); commandList->reset(); auto result = commandList->appendBarrier(event->toHandle(), 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList1, cmdList2; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList1, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); auto itor1 = findAll(cmdList1.begin(), cmdList1.end()); ASSERT_FALSE(itor1.empty()); commandList->reset(); usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed(); result = commandList->appendBarrier(nullptr, 0, nullptr); usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); ASSERT_EQ(ZE_RESULT_SUCCESS, result); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList2, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); auto itor2 = findAll(cmdList2.begin(), cmdList2.end()); ASSERT_FALSE(itor2.empty()); auto sizeWithoutEvent = itor2.size(); auto sizeWithEvent = itor1.size(); ASSERT_LE(sizeWithoutEvent, sizeWithEvent); } using MultiTileCommandListAppendBarrier = Test>; HWTEST2_F(MultiTileCommandListAppendBarrier, WhenAppendingBarrierThenPipeControlIsGenerated, IsWithinXeGfxFamily) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; using MI_ATOMIC = typename FamilyType::MI_ATOMIC; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; EXPECT_EQ(2u, device->getNEODevice()->getDeviceBitfield().count()); EXPECT_EQ(2u, commandList->partitionCount); size_t beforeControlSectionOffset = sizeof(MI_STORE_DATA_IMM) + sizeof(PIPE_CONTROL) + sizeof(MI_ATOMIC) + sizeof(MI_SEMAPHORE_WAIT) + sizeof(MI_BATCH_BUFFER_START); size_t startOffset = beforeControlSectionOffset + (2 * sizeof(uint32_t)); size_t expectedUseBuffer = startOffset + sizeof(MI_ATOMIC) + sizeof(MI_SEMAPHORE_WAIT) + sizeof(MI_STORE_DATA_IMM) + sizeof(MI_ATOMIC) + sizeof(MI_SEMAPHORE_WAIT); auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed(); auto gpuBaseAddress = commandList->commandContainer.getCommandStream()->getGraphicsAllocation()->getGpuAddress() + usedSpaceBefore; auto gpuCrossTileSyncAddress = gpuBaseAddress + beforeControlSectionOffset; auto gpuFinalSyncAddress = gpuCrossTileSyncAddress + sizeof(uint32_t); auto gpuStartAddress = gpuBaseAddress + startOffset; auto result = commandList->appendBarrier(nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); size_t usedBuffer = usedSpaceAfter - usedSpaceBefore; EXPECT_EQ(expectedUseBuffer, usedBuffer); void *cmdBuffer = ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), usedSpaceBefore); size_t parsedOffset = 0; { auto storeDataImm = genCmdCast(ptrOffset(cmdBuffer, parsedOffset)); ASSERT_NE(nullptr, storeDataImm); EXPECT_EQ(gpuFinalSyncAddress, storeDataImm->getAddress()); EXPECT_EQ(0u, storeDataImm->getDataDword0()); parsedOffset += sizeof(MI_STORE_DATA_IMM); } { auto pipeControl = genCmdCast(ptrOffset(cmdBuffer, parsedOffset)); ASSERT_NE(nullptr, pipeControl); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); EXPECT_FALSE(pipeControl->getDcFlushEnable()); parsedOffset += sizeof(PIPE_CONTROL); } { auto miAtomic = genCmdCast(ptrOffset(cmdBuffer, parsedOffset)); ASSERT_NE(nullptr, miAtomic); auto miAtomicProgrammedAddress = NEO::UnitTestHelper::getAtomicMemoryAddress(*miAtomic); EXPECT_EQ(gpuCrossTileSyncAddress, miAtomicProgrammedAddress); EXPECT_FALSE(miAtomic->getReturnDataControl()); EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); parsedOffset += sizeof(MI_ATOMIC); } { auto miSemaphore = genCmdCast(ptrOffset(cmdBuffer, parsedOffset)); ASSERT_NE(nullptr, miSemaphore); EXPECT_EQ(gpuCrossTileSyncAddress, miSemaphore->getSemaphoreGraphicsAddress()); EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, miSemaphore->getCompareOperation()); EXPECT_EQ(2u, miSemaphore->getSemaphoreDataDword()); parsedOffset += sizeof(MI_SEMAPHORE_WAIT); } { auto bbStart = genCmdCast(ptrOffset(cmdBuffer, parsedOffset)); ASSERT_NE(nullptr, bbStart); EXPECT_EQ(gpuStartAddress, bbStart->getBatchBufferStartAddress()); EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer()); parsedOffset += sizeof(MI_BATCH_BUFFER_START); } { auto crossField = reinterpret_cast(ptrOffset(cmdBuffer, parsedOffset)); EXPECT_EQ(0u, *crossField); parsedOffset += sizeof(uint32_t); auto finalField = reinterpret_cast(ptrOffset(cmdBuffer, parsedOffset)); EXPECT_EQ(0u, *finalField); parsedOffset += sizeof(uint32_t); } { auto miAtomic = genCmdCast(ptrOffset(cmdBuffer, parsedOffset)); ASSERT_NE(nullptr, miAtomic); auto miAtomicProgrammedAddress = NEO::UnitTestHelper::getAtomicMemoryAddress(*miAtomic); EXPECT_EQ(gpuFinalSyncAddress, miAtomicProgrammedAddress); EXPECT_FALSE(miAtomic->getReturnDataControl()); EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); parsedOffset += sizeof(MI_ATOMIC); } { auto miSemaphore = genCmdCast(ptrOffset(cmdBuffer, parsedOffset)); ASSERT_NE(nullptr, miSemaphore); EXPECT_EQ(gpuFinalSyncAddress, miSemaphore->getSemaphoreGraphicsAddress()); EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, miSemaphore->getCompareOperation()); EXPECT_EQ(2u, miSemaphore->getSemaphoreDataDword()); parsedOffset += sizeof(MI_SEMAPHORE_WAIT); } { auto storeDataImm = genCmdCast(ptrOffset(cmdBuffer, parsedOffset)); ASSERT_NE(nullptr, storeDataImm); EXPECT_EQ(gpuCrossTileSyncAddress, storeDataImm->getAddress()); EXPECT_EQ(0u, storeDataImm->getDataDword0()); parsedOffset += sizeof(MI_STORE_DATA_IMM); } { auto miAtomic = genCmdCast(ptrOffset(cmdBuffer, parsedOffset)); ASSERT_NE(nullptr, miAtomic); auto miAtomicProgrammedAddress = NEO::UnitTestHelper::getAtomicMemoryAddress(*miAtomic); EXPECT_EQ(gpuFinalSyncAddress, miAtomicProgrammedAddress); EXPECT_FALSE(miAtomic->getReturnDataControl()); EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); parsedOffset += sizeof(MI_ATOMIC); } { auto miSemaphore = genCmdCast(ptrOffset(cmdBuffer, parsedOffset)); ASSERT_NE(nullptr, miSemaphore); EXPECT_EQ(gpuFinalSyncAddress, miSemaphore->getSemaphoreGraphicsAddress()); EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, miSemaphore->getCompareOperation()); EXPECT_EQ(4u, miSemaphore->getSemaphoreDataDword()); parsedOffset += sizeof(MI_SEMAPHORE_WAIT); } EXPECT_EQ(expectedUseBuffer, parsedOffset); } HWTEST2_F(MultiTileCommandListAppendBarrier, GivenCurrentCommandBufferExhaustedWhenAppendingMultiTileBarrierThenPipeControlAndCrossTileSyncIsGeneratedInNewBuffer, IsWithinXeGfxFamily) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; using MI_ATOMIC = typename FamilyType::MI_ATOMIC; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; EXPECT_EQ(2u, device->getNEODevice()->getDeviceBitfield().count()); EXPECT_EQ(2u, commandList->partitionCount); LinearStream *cmdListStream = commandList->commandContainer.getCommandStream(); size_t beforeControlSectionOffset = sizeof(MI_STORE_DATA_IMM) + sizeof(PIPE_CONTROL) + sizeof(MI_ATOMIC) + sizeof(MI_SEMAPHORE_WAIT) + sizeof(MI_BATCH_BUFFER_START); size_t bbStartOffset = beforeControlSectionOffset + (2 * sizeof(uint32_t)); size_t expectedUseBuffer = bbStartOffset + sizeof(MI_ATOMIC) + sizeof(MI_SEMAPHORE_WAIT) + sizeof(MI_STORE_DATA_IMM) + sizeof(MI_ATOMIC) + sizeof(MI_SEMAPHORE_WAIT); auto firstBatchBufferAllocation = cmdListStream->getGraphicsAllocation(); auto useSize = cmdListStream->getAvailableSpace(); useSize -= (sizeof(MI_BATCH_BUFFER_END) + sizeof(MI_STORE_DATA_IMM) + sizeof(PIPE_CONTROL)); cmdListStream->getSpace(useSize); auto result = commandList->appendBarrier(nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto secondBatchBufferAllocation = cmdListStream->getGraphicsAllocation(); EXPECT_NE(firstBatchBufferAllocation, secondBatchBufferAllocation); auto gpuBaseAddress = secondBatchBufferAllocation->getGpuAddress(); auto gpuCrossTileSyncAddress = gpuBaseAddress + beforeControlSectionOffset; auto gpuFinalSyncAddress = gpuCrossTileSyncAddress + sizeof(uint32_t); auto gpuStartAddress = gpuBaseAddress + bbStartOffset; auto usedSpace = cmdListStream->getUsed(); EXPECT_EQ(expectedUseBuffer, usedSpace); void *cmdBuffer = cmdListStream->getCpuBase(); size_t parsedOffset = 0; { auto storeDataImm = genCmdCast(ptrOffset(cmdBuffer, parsedOffset)); ASSERT_NE(nullptr, storeDataImm); EXPECT_EQ(gpuFinalSyncAddress, storeDataImm->getAddress()); EXPECT_EQ(0u, storeDataImm->getDataDword0()); parsedOffset += sizeof(MI_STORE_DATA_IMM); } { auto pipeControl = genCmdCast(ptrOffset(cmdBuffer, parsedOffset)); ASSERT_NE(nullptr, pipeControl); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); EXPECT_FALSE(pipeControl->getDcFlushEnable()); parsedOffset += sizeof(PIPE_CONTROL); } { auto miAtomic = genCmdCast(ptrOffset(cmdBuffer, parsedOffset)); ASSERT_NE(nullptr, miAtomic); auto miAtomicProgrammedAddress = NEO::UnitTestHelper::getAtomicMemoryAddress(*miAtomic); EXPECT_EQ(gpuCrossTileSyncAddress, miAtomicProgrammedAddress); EXPECT_FALSE(miAtomic->getReturnDataControl()); EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); parsedOffset += sizeof(MI_ATOMIC); } { auto miSemaphore = genCmdCast(ptrOffset(cmdBuffer, parsedOffset)); ASSERT_NE(nullptr, miSemaphore); EXPECT_EQ(gpuCrossTileSyncAddress, miSemaphore->getSemaphoreGraphicsAddress()); EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, miSemaphore->getCompareOperation()); EXPECT_EQ(2u, miSemaphore->getSemaphoreDataDword()); parsedOffset += sizeof(MI_SEMAPHORE_WAIT); } { auto bbStart = genCmdCast(ptrOffset(cmdBuffer, parsedOffset)); ASSERT_NE(nullptr, bbStart); EXPECT_EQ(gpuStartAddress, bbStart->getBatchBufferStartAddress()); EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer()); parsedOffset += sizeof(MI_BATCH_BUFFER_START); } { auto crossField = reinterpret_cast(ptrOffset(cmdBuffer, parsedOffset)); EXPECT_EQ(0u, *crossField); parsedOffset += sizeof(uint32_t); auto finalField = reinterpret_cast(ptrOffset(cmdBuffer, parsedOffset)); EXPECT_EQ(0u, *finalField); parsedOffset += sizeof(uint32_t); } { auto miAtomic = genCmdCast(ptrOffset(cmdBuffer, parsedOffset)); ASSERT_NE(nullptr, miAtomic); auto miAtomicProgrammedAddress = NEO::UnitTestHelper::getAtomicMemoryAddress(*miAtomic); EXPECT_EQ(gpuFinalSyncAddress, miAtomicProgrammedAddress); EXPECT_FALSE(miAtomic->getReturnDataControl()); EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); parsedOffset += sizeof(MI_ATOMIC); } { auto miSemaphore = genCmdCast(ptrOffset(cmdBuffer, parsedOffset)); ASSERT_NE(nullptr, miSemaphore); EXPECT_EQ(gpuFinalSyncAddress, miSemaphore->getSemaphoreGraphicsAddress()); EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, miSemaphore->getCompareOperation()); EXPECT_EQ(2u, miSemaphore->getSemaphoreDataDword()); parsedOffset += sizeof(MI_SEMAPHORE_WAIT); } { auto storeDataImm = genCmdCast(ptrOffset(cmdBuffer, parsedOffset)); ASSERT_NE(nullptr, storeDataImm); EXPECT_EQ(gpuCrossTileSyncAddress, storeDataImm->getAddress()); EXPECT_EQ(0u, storeDataImm->getDataDword0()); parsedOffset += sizeof(MI_STORE_DATA_IMM); } { auto miAtomic = genCmdCast(ptrOffset(cmdBuffer, parsedOffset)); ASSERT_NE(nullptr, miAtomic); auto miAtomicProgrammedAddress = NEO::UnitTestHelper::getAtomicMemoryAddress(*miAtomic); EXPECT_EQ(gpuFinalSyncAddress, miAtomicProgrammedAddress); EXPECT_FALSE(miAtomic->getReturnDataControl()); EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomic->getAtomicOpcode()); parsedOffset += sizeof(MI_ATOMIC); } { auto miSemaphore = genCmdCast(ptrOffset(cmdBuffer, parsedOffset)); ASSERT_NE(nullptr, miSemaphore); EXPECT_EQ(gpuFinalSyncAddress, miSemaphore->getSemaphoreGraphicsAddress()); EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, miSemaphore->getCompareOperation()); EXPECT_EQ(4u, miSemaphore->getSemaphoreDataDword()); parsedOffset += sizeof(MI_SEMAPHORE_WAIT); } EXPECT_EQ(expectedUseBuffer, parsedOffset); } } // namespace ult } // namespace L0 test_cmdlist_append_event_reset.cpp000066400000000000000000000337421422164147700360400ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/cmdlist/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/command_encoder.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h" #include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h" namespace L0 { namespace ult { using CommandListAppendEventReset = Test; HWTEST_F(CommandListAppendEventReset, givenCmdlistWhenResetEventAppendedThenPostSyncWriteIsGenerated) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed(); auto result = commandList->appendEventReset(event->toHandle()); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); auto itorPC = findAll(cmdList.begin(), cmdList.end()); ASSERT_NE(0u, itorPC.size()); bool postSyncFound = false; for (auto it : itorPC) { auto cmd = genCmdCast(*it); if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); EXPECT_EQ(cmd->getImmediateData(), Event::STATE_INITIAL); auto gpuAddress = event->getGpuAddress(device); EXPECT_EQ(gpuAddress, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*cmd)); postSyncFound = true; } } ASSERT_TRUE(postSyncFound); } HWTEST_F(CommandListAppendEventReset, whenResetEventIsAppendedAndNoSpaceIsAvailableThenNextCommandBufferIsCreated) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; auto firstBatchBufferAllocation = commandList->commandContainer.getCommandStream()->getGraphicsAllocation(); auto useSize = commandList->commandContainer.getCommandStream()->getAvailableSpace(); useSize -= sizeof(MI_BATCH_BUFFER_END); commandList->commandContainer.getCommandStream()->getSpace(useSize); auto result = commandList->appendEventReset(event->toHandle()); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto secondBatchBufferAllocation = commandList->commandContainer.getCommandStream()->getGraphicsAllocation(); EXPECT_NE(firstBatchBufferAllocation, secondBatchBufferAllocation); } HWTEST_F(CommandListAppendEventReset, givenCopyOnlyCmdlistWhenResetEventAppendedThenMiFlushWithPostSyncIsGenerated) { using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; ze_result_t returnValue; commandList.reset(whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::Copy, 0u, returnValue))); auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed(); auto result = commandList->appendEventReset(event->toHandle()); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); auto itorPC = findAll(cmdList.begin(), cmdList.end()); ASSERT_NE(0u, itorPC.size()); bool postSyncFound = false; for (auto it : itorPC) { auto cmd = genCmdCast(*it); if (cmd->getPostSyncOperation() == MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA_QWORD) { EXPECT_EQ(cmd->getImmediateData(), Event::STATE_INITIAL); auto gpuAddress = event->getGpuAddress(device); EXPECT_EQ(cmd->getDestinationAddress(), gpuAddress); postSyncFound = true; } } ASSERT_TRUE(postSyncFound); } HWTEST_F(CommandListAppendEventReset, givenCmdlistWhenAppendingEventResetThenEventPoolGraphicsAllocationIsAddedToResidencyContainer) { auto result = commandList->appendEventReset(event->toHandle()); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto &residencyContainer = commandList->commandContainer.getResidencyContainer(); auto eventPoolAlloc = &eventPool->getAllocation(); for (auto alloc : eventPoolAlloc->getGraphicsAllocations()) { auto itor = std::find(std::begin(residencyContainer), std::end(residencyContainer), alloc); EXPECT_NE(itor, std::end(residencyContainer)); } } HWTEST2_F(CommandListAppendEventReset, givenImmediateCmdlistWhenAppendingEventResetThenCommandsAreExecuted, IsAtLeastSkl) { const ze_command_queue_desc_t desc = {}; bool internalEngine = true; ze_result_t returnValue; std::unique_ptr commandList0(CommandList::createImmediate(productFamily, device, &desc, internalEngine, NEO::EngineGroupType::RenderCompute, returnValue)); ASSERT_NE(nullptr, commandList0); auto result = commandList0->appendEventReset(event->toHandle()); ASSERT_EQ(ZE_RESULT_SUCCESS, result); } HWTEST2_F(CommandListAppendEventReset, givenTimestampEventUsedInResetThenPipeControlAppendedCorrectly, IsAtLeastSkl) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; auto &commandContainer = commandList->commandContainer; ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; ze_result_t result = ZE_RESULT_SUCCESS; auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); event->setPacketsInUse(16u); commandList->appendEventReset(event->toHandle()); ASSERT_EQ(1u, event->getPacketsInUse()); auto contextOffset = event->getContextEndOffset(); auto baseAddr = event->getGpuAddress(device); auto gpuAddress = ptrOffset(baseAddr, contextOffset); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); auto itorPC = findAll(cmdList.begin(), cmdList.end()); ASSERT_NE(0u, itorPC.size()); uint32_t postSyncFound = 0u; for (auto it : itorPC) { auto cmd = genCmdCast(*it); if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { EXPECT_EQ(cmd->getImmediateData(), Event::STATE_CLEARED); EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); EXPECT_EQ(gpuAddress, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*cmd)); EXPECT_FALSE(cmd->getDcFlushEnable()); postSyncFound++; gpuAddress += event->getSinglePacketSize(); } } ASSERT_EQ(EventPacketsCount::eventPackets, postSyncFound); } HWTEST2_F(CommandListAppendEventReset, givenEventWithHostScopeUsedInResetThenPipeControlWithDcFlushAppended, IsAtLeastSkl) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; auto &commandContainer = commandList->commandContainer; ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; ze_result_t result = ZE_RESULT_SUCCESS; auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); commandList->appendEventReset(event->toHandle()); auto gpuAddress = event->getGpuAddress(device); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); auto itorPC = findAll(cmdList.begin(), cmdList.end()); ASSERT_NE(0u, itorPC.size()); bool postSyncFound = false; for (auto it : itorPC) { auto cmd = genCmdCast(*it); if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { EXPECT_EQ(cmd->getImmediateData(), Event::STATE_CLEARED); EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); EXPECT_EQ(gpuAddress, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*cmd)); EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), cmd->getDcFlushEnable()); postSyncFound = true; } } ASSERT_TRUE(postSyncFound); } HWTEST2_F(CommandListAppendEventReset, givenMultiTileCommandListWhenAppendingMultiPacketEventThenExpectSameNumberOfResetPostSyncAndMultiBarrierCommands, IsAtLeastXeHpCore) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; using MI_ATOMIC = typename FamilyType::MI_ATOMIC; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily>(); ASSERT_NE(nullptr, commandList); ze_result_t returnValue = commandList->initialize(device, NEO::EngineGroupType::Compute, 0u); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); auto cmdStream = commandList->commandContainer.getCommandStream(); size_t useSize = cmdStream->getAvailableSpace(); useSize -= sizeof(MI_BATCH_BUFFER_END); cmdStream->getSpace(useSize); constexpr uint32_t packets = 2u; event->setPacketsInUse(packets); event->setEventTimestampFlag(false); event->setPartitionedEvent(true); event->signalScope = ZE_EVENT_SCOPE_FLAG_HOST; commandList->partitionCount = packets; returnValue = commandList->appendEventReset(event->toHandle()); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); EXPECT_EQ(1u, event->getPacketsInUse()); auto gpuAddress = event->getGpuAddress(device) + event->getContextEndOffset(); auto &hwInfo = device->getNEODevice()->getHardwareInfo(); size_t expectedSize = NEO::MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo) * packets + commandList->estimateBufferSizeMultiTileBarrier(hwInfo); size_t usedSize = cmdStream->getUsed(); EXPECT_EQ(expectedSize, usedSize); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, cmdStream->getCpuBase(), usedSize)); auto pipeControlList = findAll(cmdList.begin(), cmdList.end()); ASSERT_NE(0u, pipeControlList.size()); uint32_t postSyncFound = 0; auto postSyncPipeControlItor = cmdList.end(); for (auto &it : pipeControlList) { auto cmd = genCmdCast(*it); if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { EXPECT_EQ(cmd->getImmediateData(), Event::STATE_CLEARED); EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); EXPECT_EQ(gpuAddress, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*cmd)); EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), cmd->getDcFlushEnable()); postSyncFound++; gpuAddress += event->getSinglePacketSize(); postSyncPipeControlItor = it; } } EXPECT_EQ(packets, postSyncFound); postSyncPipeControlItor++; ASSERT_NE(cmdList.end(), postSyncPipeControlItor); //find multi tile barrier section: pipe control + atomic/semaphore auto itorPipeControl = find(postSyncPipeControlItor, cmdList.end()); ASSERT_NE(cmdList.end(), itorPipeControl); auto itorAtomic = find(itorPipeControl, cmdList.end()); ASSERT_NE(cmdList.end(), itorAtomic); auto itorSemaphore = find(itorAtomic, cmdList.end()); ASSERT_NE(cmdList.end(), itorSemaphore); } } // namespace ult } // namespace L0 test_cmdlist_append_launch_kernel_1.cpp000066400000000000000000002034721422164147700365460ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/cmdlist/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/command_encoder.h" #include "shared/source/helpers/api_specific_config.h" #include "shared/source/helpers/preamble.h" #include "shared/source/helpers/register_offsets.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h" #include "level_zero/core/source/event/event.h" #include "level_zero/core/test/unit_tests/fixtures/module_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h" #include "level_zero/core/test/unit_tests/mocks/mock_module.h" namespace L0 { namespace ult { using CommandListAppendLaunchKernel = Test; HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithIndirectAllocationsAllowedThenCommandListReturnsExpectedIndirectAllocationsAllowed) { createKernel(); kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true; kernel->unifiedMemoryControls.indirectSharedAllocationsAllowed = true; kernel->unifiedMemoryControls.indirectHostAllocationsAllowed = true; EXPECT_TRUE(kernel->getUnifiedMemoryControls().indirectDeviceAllocationsAllowed); EXPECT_TRUE(kernel->hasIndirectAllocationsAllowed()); ze_group_count_t groupCount{1, 1, 1}; ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); ASSERT_TRUE(commandList->hasIndirectAllocationsAllowed()); } HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithIndirectAllocationsNotAllowedThenCommandListReturnsExpectedIndirectAllocationsAllowed) { createKernel(); kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = false; kernel->unifiedMemoryControls.indirectSharedAllocationsAllowed = false; kernel->unifiedMemoryControls.indirectHostAllocationsAllowed = false; ze_group_count_t groupCount{1, 1, 1}; ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); ASSERT_FALSE(commandList->hasIndirectAllocationsAllowed()); } HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithOldestFirstThreadArbitrationPolicySetUsingSchedulingHintExtensionThenCorrectInternalPolicyIsReturned) { createKernel(); ze_scheduling_hint_exp_desc_t pHint{}; pHint.flags = ZE_SCHEDULING_HINT_EXP_FLAG_OLDEST_FIRST; kernel->setSchedulingHintExp(&pHint); ASSERT_EQ(kernel->getSchedulingHintExp(), NEO::ThreadArbitrationPolicy::AgeBased); } HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithRRThreadArbitrationPolicySetUsingSchedulingHintExtensionThenCorrectInternalPolicyIsReturned) { createKernel(); ze_scheduling_hint_exp_desc_t pHint{}; pHint.flags = ZE_SCHEDULING_HINT_EXP_FLAG_ROUND_ROBIN; kernel->setSchedulingHintExp(&pHint); ASSERT_EQ(kernel->getSchedulingHintExp(), NEO::ThreadArbitrationPolicy::RoundRobin); } HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithStallRRThreadArbitrationPolicySetUsingSchedulingHintExtensionThenCorrectInternalPolicyIsReturned) { createKernel(); ze_scheduling_hint_exp_desc_t pHint{}; pHint.flags = ZE_SCHEDULING_HINT_EXP_FLAG_STALL_BASED_ROUND_ROBIN; kernel->setSchedulingHintExp(&pHint); ASSERT_EQ(kernel->getSchedulingHintExp(), NEO::ThreadArbitrationPolicy::RoundRobinAfterDependency); } HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithThreadArbitrationPolicySetUsingSchedulingHintExtensionTheSameFlagIsUsedToSetCmdListThreadArbitrationPolicy) { createKernel(); ze_scheduling_hint_exp_desc_t *pHint = new ze_scheduling_hint_exp_desc_t; pHint->pNext = nullptr; pHint->flags = ZE_SCHEDULING_HINT_EXP_FLAG_ROUND_ROBIN; kernel->setSchedulingHintExp(pHint); ze_group_count_t groupCount{1, 1, 1}; ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); ASSERT_EQ(NEO::ThreadArbitrationPolicy::RoundRobin, commandList->getFinalStreamState().stateComputeMode.threadArbitrationPolicy.value); delete (pHint); } HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithThreadArbitrationPolicySetUsingSchedulingHintExtensionAndOverrideThreadArbitrationPolicyThenTheLatterIsUsedToSetCmdListThreadArbitrationPolicy) { createKernel(); ze_scheduling_hint_exp_desc_t *pHint = new ze_scheduling_hint_exp_desc_t; pHint->pNext = nullptr; pHint->flags = ZE_SCHEDULING_HINT_EXP_FLAG_ROUND_ROBIN; kernel->setSchedulingHintExp(pHint); DebugManagerStateRestore restorer; DebugManager.flags.OverrideThreadArbitrationPolicy.set(0); ze_group_count_t groupCount{1, 1, 1}; ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); ASSERT_EQ(NEO::ThreadArbitrationPolicy::AgeBased, commandList->getFinalStreamState().stateComputeMode.threadArbitrationPolicy.value); delete (pHint); } HWTEST_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenAppendingKernelThenBbEndIsAddedAndNewCmdBufferAllocated) { using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; createKernel(); ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); auto &commandContainer = commandList->commandContainer; const auto stream = commandContainer.getCommandStream(); const auto streamCpu = stream->getCpuBase(); Vec3 groupCount{1, 1, 1}; auto sizeLeftInStream = sizeof(MI_BATCH_BUFFER_END); auto available = stream->getAvailableSpace(); stream->getSpace(available - sizeLeftInStream); auto bbEndPosition = stream->getSpace(0); const uint32_t threadGroupDimensions[3] = {1, 1, 1}; NEO::EncodeDispatchKernelArgs dispatchKernelArgs{ 0, device->getNEODevice(), kernel.get(), threadGroupDimensions, PreemptionMode::MidBatch, 0, false, false, false, false, false, false, false, false}; NEO::EncodeDispatchKernel::encode(commandContainer, dispatchKernelArgs); auto usedSpaceAfter = commandContainer.getCommandStream()->getUsed(); ASSERT_GT(usedSpaceAfter, 0u); const auto streamCpu2 = stream->getCpuBase(); EXPECT_NE(nullptr, streamCpu2); EXPECT_NE(streamCpu, streamCpu2); EXPECT_EQ(2u, commandContainer.getCmdBufferAllocations().size()); GenCmdList cmdList; FamilyType::PARSE::parseCommandBuffer(cmdList, bbEndPosition, 2 * sizeof(MI_BATCH_BUFFER_END)); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandListAppendLaunchKernel, givenFunctionWhenBindingTablePrefetchAllowedThenProgramBindingTableEntryCount) { using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; createKernel(); ze_group_count_t groupCount{1, 1, 1}; ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr); auto commandStream = commandList->commandContainer.getCommandStream(); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, commandStream->getCpuBase(), commandStream->getUsed())); auto itorMIDL = find(cmdList.begin(), cmdList.end()); ASSERT_NE(itorMIDL, cmdList.end()); auto cmd = genCmdCast(*itorMIDL); ASSERT_NE(cmd, nullptr); auto dsh = NEO::ApiSpecificConfig::getBindlessConfiguration() ? device->getNEODevice()->getBindlessHeapsHelper()->getHeap(BindlessHeapsHelper::GLOBAL_DSH) : commandList->commandContainer.getIndirectHeap(NEO::HeapType::DYNAMIC_STATE); auto idd = static_cast(ptrOffset(dsh->getCpuBase(), cmd->getInterfaceDescriptorDataStartAddress())); if (NEO::EncodeSurfaceState::doBindingTablePrefetch()) { uint32_t numArgs = kernel->kernelImmData->getDescriptor().payloadMappings.bindingTable.numEntries; EXPECT_EQ(numArgs, idd->getBindingTableEntryCount()); } else { EXPECT_EQ(0u, idd->getBindingTableEntryCount()); } } HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfUsedWhenAppendedToCommandListThenKernelIsStored) { createKernel(); ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); ze_group_count_t groupCount{1, 1, 1}; EXPECT_TRUE(kernel->kernelImmData->getDescriptor().kernelAttributes.flags.usesPrintf); auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(1u, commandList->getPrintfFunctionContainer().size()); EXPECT_EQ(kernel.get(), commandList->getPrintfFunctionContainer()[0]); } HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfUsedWhenAppendedToCommandListMultipleTimesThenKernelIsStoredOnce) { createKernel(); ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); ze_group_count_t groupCount{1, 1, 1}; EXPECT_TRUE(kernel->kernelImmData->getDescriptor().kernelAttributes.flags.usesPrintf); auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(1u, commandList->getPrintfFunctionContainer().size()); EXPECT_EQ(kernel.get(), commandList->getPrintfFunctionContainer()[0]); result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(1u, commandList->getPrintfFunctionContainer().size()); } HWTEST_F(CommandListAppendLaunchKernel, WhenAppendingMultipleTimesThenSshIsNotDepletedButReallocated) { createKernel(); ze_result_t returnValue; DebugManagerStateRestore dbgRestorer; DebugManager.flags.UseBindlessMode.set(0); std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); ze_group_count_t groupCount{1, 1, 1}; auto kernelSshSize = kernel->getSurfaceStateHeapDataSize(); auto ssh = commandList->commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE); auto sshHeapSize = ssh->getMaxAvailableSpace(); auto initialAllocation = ssh->getGraphicsAllocation(); EXPECT_NE(nullptr, initialAllocation); const_cast(kernel->getKernelDescriptor().kernelAttributes.bufferAddressingMode) = KernelDescriptor::BindfulAndStateless; for (size_t i = 0; i < sshHeapSize / kernelSshSize + 1; i++) { auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); } auto reallocatedAllocation = ssh->getGraphicsAllocation(); EXPECT_NE(nullptr, reallocatedAllocation); EXPECT_NE(initialAllocation, reallocatedAllocation); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandListAppendLaunchKernel, givenEventsWhenAppendingKernelThenPostSyncToEventIsGenerated) { using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; Mock<::L0::Kernel> kernel; ze_result_t returnValue; std::unique_ptr commandList(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed(); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; eventPoolDesc.count = 1; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); ze_group_count_t groupCount{1, 1, 1}; auto result = commandList->appendLaunchKernel( kernel.toHandle(), &groupCount, event->toHandle(), 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); EXPECT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; EXPECT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); auto itor = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itor); auto itorPC = findAll(cmdList.begin(), cmdList.end()); EXPECT_NE(0u, itorPC.size()); bool postSyncFound = false; for (auto it : itorPC) { auto cmd = genCmdCast(*it); if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { EXPECT_EQ(cmd->getImmediateData(), Event::STATE_SIGNALED); EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); EXPECT_FALSE(cmd->getDcFlushEnable()); auto gpuAddress = event->getGpuAddress(device); EXPECT_EQ(gpuAddress, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*cmd)); postSyncFound = true; } } EXPECT_TRUE(postSyncFound); { auto itorEvent = std::find(std::begin(commandList->commandContainer.getResidencyContainer()), std::end(commandList->commandContainer.getResidencyContainer()), &event->getAllocation(device)); EXPECT_NE(itorEvent, std::end(commandList->commandContainer.getResidencyContainer())); } } using TimestampEventSupport = IsWithinProducts; HWTEST2_F(CommandListAppendLaunchKernel, givenTimestampEventsWhenAppendingKernelThenSRMAndPCEncoded, TimestampEventSupport) { using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG; Mock<::L0::Kernel> kernel; ze_result_t returnValue; std::unique_ptr commandList(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed(); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE; auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); ze_group_count_t groupCount{1, 1, 1}; auto result = commandList->appendLaunchKernel( kernel.toHandle(), &groupCount, event->toHandle(), 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); EXPECT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; EXPECT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); auto itor = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itor); { auto cmd = genCmdCast(*itor); EXPECT_EQ(REG_GLOBAL_TIMESTAMP_LDW, cmd->getSourceRegisterAddress()); } itor++; itor = find(itor, cmdList.end()); ASSERT_NE(cmdList.end(), itor); { auto cmd = genCmdCast(*itor); EXPECT_EQ(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, cmd->getSourceRegisterAddress()); } itor++; itor = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itor); itor++; itor = find(itor, cmdList.end()); ASSERT_NE(cmdList.end(), itor); { auto cmd = genCmdCast(*itor); EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); EXPECT_TRUE(cmd->getDcFlushEnable()); } itor++; itor = find(itor, cmdList.end()); ASSERT_NE(cmdList.end(), itor); { auto cmd = genCmdCast(*itor); EXPECT_EQ(REG_GLOBAL_TIMESTAMP_LDW, cmd->getSourceRegisterAddress()); } itor++; itor = find(itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); { auto cmd = genCmdCast(*itor); EXPECT_EQ(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, cmd->getSourceRegisterAddress()); } itor++; auto numPCs = findAll(itor, cmdList.end()); //we should not have PC when signal scope is device ASSERT_EQ(0u, numPCs.size()); { auto itorEvent = std::find(std::begin(commandList->commandContainer.getResidencyContainer()), std::end(commandList->commandContainer.getResidencyContainer()), &event->getAllocation(device)); EXPECT_NE(itorEvent, std::end(commandList->commandContainer.getResidencyContainer())); } } HWTEST2_F(CommandListAppendLaunchKernel, givenKernelLaunchWithTSEventAndScopeFlagHostThenPCWithDCFlushEncoded, TimestampEventSupport) { using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM; Mock<::L0::Kernel> kernel; ze_result_t returnValue; std::unique_ptr commandList(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed(); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; const ze_event_desc_t eventDesc = { ZE_STRUCTURE_TYPE_EVENT_DESC, nullptr, 0, ZE_EVENT_SCOPE_FLAG_HOST, ZE_EVENT_SCOPE_FLAG_HOST}; auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); ze_group_count_t groupCount{1, 1, 1}; auto result = commandList->appendLaunchKernel( kernel.toHandle(), &groupCount, event->toHandle(), 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); EXPECT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; EXPECT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); auto itorPC = findAll(cmdList.begin(), cmdList.end()); ASSERT_NE(0u, itorPC.size()); PIPE_CONTROL *cmd = genCmdCast(*itorPC[itorPC.size() - 1]); EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); EXPECT_TRUE(cmd->getDcFlushEnable()); } HWTEST2_F(CommandListAppendLaunchKernel, givenForcePipeControlPriorToWalkerKeyThenAdditionalPCIsAdded, IsAtLeastXeHpCore) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; Mock<::L0::Kernel> kernel; ze_result_t result; std::unique_ptr commandListBase(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceBefore = commandListBase->commandContainer.getCommandStream()->getUsed(); ze_group_count_t groupCount{1, 1, 1}; result = commandListBase->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandListBase->commandContainer.getCommandStream()->getUsed(); EXPECT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdListBase; EXPECT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdListBase, ptrOffset(commandListBase->commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); auto itorPC = findAll(cmdListBase.begin(), cmdListBase.end()); size_t numberOfPCsBase = itorPC.size(); DebugManagerStateRestore restorer; DebugManager.flags.ForcePipeControlPriorToWalker.set(1); std::unique_ptr commandListWithDebugKey(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); usedSpaceBefore = commandListWithDebugKey->commandContainer.getCommandStream()->getUsed(); result = commandListWithDebugKey->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); usedSpaceAfter = commandListWithDebugKey->commandContainer.getCommandStream()->getUsed(); EXPECT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdListBaseWithDebugKey; EXPECT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdListBaseWithDebugKey, ptrOffset(commandListWithDebugKey->commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); itorPC = findAll(cmdListBaseWithDebugKey.begin(), cmdListBaseWithDebugKey.end()); size_t numberOfPCsWithDebugKey = itorPC.size(); EXPECT_EQ(numberOfPCsWithDebugKey, numberOfPCsBase + 1); } HWTEST2_F(CommandListAppendLaunchKernel, givenForcePipeControlPriorToWalkerKeyAndNoSpaceThenNewBatchBufferAllocationIsUsed, IsAtLeastXeHpCore) { DebugManagerStateRestore restorer; DebugManager.flags.ForcePipeControlPriorToWalker.set(1); using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; Mock<::L0::Kernel> kernel; ze_result_t result; std::unique_ptr commandList(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto firstBatchBufferAllocation = commandList->commandContainer.getCommandStream()->getGraphicsAllocation(); auto useSize = commandList->commandContainer.getCommandStream()->getAvailableSpace(); useSize -= sizeof(PIPE_CONTROL); commandList->commandContainer.getCommandStream()->getSpace(useSize); ze_group_count_t groupCount{1, 1, 1}; result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto secondBatchBufferAllocation = commandList->commandContainer.getCommandStream()->getGraphicsAllocation(); EXPECT_NE(firstBatchBufferAllocation, secondBatchBufferAllocation); } HWTEST2_F(CommandListAppendLaunchKernel, givenImmediateCommandListWhenAppendingLaunchKernelThenKernelIsExecutedOnImmediateCmdQ, IsAtLeastSkl) { createKernel(); const ze_command_queue_desc_t desc = {}; bool internalEngine = true; ze_result_t result = ZE_RESULT_SUCCESS; std::unique_ptr commandList0(CommandList::createImmediate(productFamily, device, &desc, internalEngine, NEO::EngineGroupType::RenderCompute, result)); ASSERT_NE(nullptr, commandList0); CommandQueueImp *cmdQueue = reinterpret_cast(commandList0->cmdQImmediate); EXPECT_EQ(cmdQueue->getCsr(), neoDevice->getInternalEngine().commandStreamReceiver); ze_group_count_t groupCount{1, 1, 1}; result = commandList0->appendLaunchKernel( kernel->toHandle(), &groupCount, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); } HWTEST2_F(CommandListAppendLaunchKernel, givenImmediateCommandListWhenAppendingLaunchKernelWithInvalidEventThenInvalidArgumentErrorIsReturned, IsAtLeastSkl) { createKernel(); const ze_command_queue_desc_t desc = {}; bool internalEngine = true; ze_result_t result = ZE_RESULT_SUCCESS; std::unique_ptr commandList0(CommandList::createImmediate(productFamily, device, &desc, internalEngine, NEO::EngineGroupType::RenderCompute, result)); ASSERT_NE(nullptr, commandList0); CommandQueueImp *cmdQueue = reinterpret_cast(commandList0->cmdQImmediate); EXPECT_EQ(cmdQueue->getCsr(), neoDevice->getInternalEngine().commandStreamReceiver); ze_group_count_t groupCount{1, 1, 1}; result = commandList0->appendLaunchKernel( kernel->toHandle(), &groupCount, nullptr, 1, nullptr); ASSERT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); } HWTEST2_F(CommandListAppendLaunchKernel, givenImmediateCommandListWhenAppendingLaunchKernelIndirectThenKernelIsExecutedOnImmediateCmdQ, IsAtLeastSkl) { createKernel(); const ze_command_queue_desc_t desc = {}; bool internalEngine = true; ze_result_t result = ZE_RESULT_SUCCESS; std::unique_ptr commandList0(CommandList::createImmediate(productFamily, device, &desc, internalEngine, NEO::EngineGroupType::RenderCompute, result)); ASSERT_NE(nullptr, commandList0); CommandQueueImp *cmdQueue = reinterpret_cast(commandList0->cmdQImmediate); EXPECT_EQ(cmdQueue->getCsr(), neoDevice->getInternalEngine().commandStreamReceiver); ze_group_count_t groupCount{1, 1, 1}; result = commandList0->appendLaunchKernelIndirect( kernel->toHandle(), &groupCount, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); } HWTEST2_F(CommandListAppendLaunchKernel, givenImmediateCommandListWhenAppendingLaunchKernelIndirectWithInvalidEventThenInvalidArgumentErrorIsReturned, IsAtLeastSkl) { createKernel(); const ze_command_queue_desc_t desc = {}; bool internalEngine = true; ze_result_t result = ZE_RESULT_SUCCESS; std::unique_ptr commandList0(CommandList::createImmediate(productFamily, device, &desc, internalEngine, NEO::EngineGroupType::RenderCompute, result)); ASSERT_NE(nullptr, commandList0); CommandQueueImp *cmdQueue = reinterpret_cast(commandList0->cmdQImmediate); EXPECT_EQ(cmdQueue->getCsr(), neoDevice->getInternalEngine().commandStreamReceiver); ze_group_count_t groupCount{1, 1, 1}; result = commandList0->appendLaunchKernelIndirect( kernel->toHandle(), &groupCount, nullptr, 1, nullptr); ASSERT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); } using SupportedPlatforms = IsWithinProducts; HWTEST2_F(CommandListAppendLaunchKernel, givenCommandListWhenAppendLaunchKernelSeveralTimesThenAlwaysFirstEventPacketIsUsed, SupportedPlatforms) { createKernel(); ze_result_t returnValue; std::unique_ptr commandList(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP | ZE_EVENT_POOL_FLAG_HOST_VISIBLE; const ze_event_desc_t eventDesc = { ZE_STRUCTURE_TYPE_EVENT_DESC, nullptr, 0, ZE_EVENT_SCOPE_FLAG_HOST, ZE_EVENT_SCOPE_FLAG_HOST}; auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); EXPECT_EQ(1u, event->getPacketsInUse()); ze_group_count_t groupCount{1, 1, 1}; for (uint32_t i = 0; i < NEO::TimestampPacketSizeControl::preferredPacketCount + 4; i++) { auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, event->toHandle(), 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } EXPECT_EQ(1u, event->getPacketsInUse()); } struct CommandListAppendLaunchKernelWithImplicitArgs : CommandListAppendLaunchKernel { template uint64_t getIndirectHeapOffsetForImplicitArgsBuffer(const Mock<::L0::Kernel> &kernel) { if (FamilyType::supportsCmdSet(IGFX_XE_HP_CORE)) { auto implicitArgsProgrammingSize = ImplicitArgsHelper::getSizeForImplicitArgsPatching(kernel.pImplicitArgs.get(), kernel.getKernelDescriptor(), neoDevice->getHardwareInfo()); return implicitArgsProgrammingSize - sizeof(ImplicitArgs); } else { return 0u; } } }; HWTEST_F(CommandListAppendLaunchKernelWithImplicitArgs, givenIndirectDispatchWithImplicitArgsWhenAppendingThenMiMathCommandsForWorkGroupCountAndGlobalWorkSizeAndWorkDimAreProgrammed) { using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM; using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG; using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; using MI_LOAD_REGISTER_MEM = typename FamilyType::MI_LOAD_REGISTER_MEM; Mock<::L0::Kernel> kernel; auto pMockModule = std::unique_ptr(new Mock(device, nullptr)); kernel.module = pMockModule.get(); kernel.immutableData.crossThreadDataSize = sizeof(uint64_t); kernel.pImplicitArgs.reset(new ImplicitArgs()); UnitTestHelper::adjustKernelDescriptorForImplicitArgs(*kernel.immutableData.kernelDescriptor); kernel.setGroupSize(1, 1, 1); ze_result_t returnValue; std::unique_ptr commandList(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); void *alloc = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; auto result = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 4096u, &alloc); ASSERT_EQ(result, ZE_RESULT_SUCCESS); result = commandList->appendLaunchKernelIndirect(kernel.toHandle(), static_cast(alloc), nullptr, 0, nullptr); EXPECT_EQ(result, ZE_RESULT_SUCCESS); auto heap = commandList->commandContainer.getIndirectHeap(HeapType::INDIRECT_OBJECT); uint64_t pImplicitArgsGPUVA = heap->getGraphicsAllocation()->getGpuAddress() + getIndirectHeapOffsetForImplicitArgsBuffer(kernel); auto workDimStoreRegisterMemCmd = FamilyType::cmdInitStoreRegisterMem; workDimStoreRegisterMemCmd.setRegisterAddress(CS_GPR_R0); workDimStoreRegisterMemCmd.setMemoryAddress(pImplicitArgsGPUVA); auto groupCountXStoreRegisterMemCmd = FamilyType::cmdInitStoreRegisterMem; groupCountXStoreRegisterMemCmd.setRegisterAddress(GPUGPU_DISPATCHDIMX); groupCountXStoreRegisterMemCmd.setMemoryAddress(pImplicitArgsGPUVA + offsetof(ImplicitArgs, groupCountX)); auto groupCountYStoreRegisterMemCmd = FamilyType::cmdInitStoreRegisterMem; groupCountYStoreRegisterMemCmd.setRegisterAddress(GPUGPU_DISPATCHDIMY); groupCountYStoreRegisterMemCmd.setMemoryAddress(pImplicitArgsGPUVA + offsetof(ImplicitArgs, groupCountY)); auto groupCountZStoreRegisterMemCmd = FamilyType::cmdInitStoreRegisterMem; groupCountZStoreRegisterMemCmd.setRegisterAddress(GPUGPU_DISPATCHDIMZ); groupCountZStoreRegisterMemCmd.setMemoryAddress(pImplicitArgsGPUVA + offsetof(ImplicitArgs, groupCountZ)); auto globalSizeXStoreRegisterMemCmd = FamilyType::cmdInitStoreRegisterMem; globalSizeXStoreRegisterMemCmd.setRegisterAddress(CS_GPR_R1); globalSizeXStoreRegisterMemCmd.setMemoryAddress(pImplicitArgsGPUVA + offsetof(ImplicitArgs, globalSizeX)); auto globalSizeYStoreRegisterMemCmd = FamilyType::cmdInitStoreRegisterMem; globalSizeYStoreRegisterMemCmd.setRegisterAddress(CS_GPR_R1); globalSizeYStoreRegisterMemCmd.setMemoryAddress(pImplicitArgsGPUVA + offsetof(ImplicitArgs, globalSizeY)); auto globalSizeZStoreRegisterMemCmd = FamilyType::cmdInitStoreRegisterMem; globalSizeZStoreRegisterMemCmd.setRegisterAddress(CS_GPR_R1); globalSizeZStoreRegisterMemCmd.setMemoryAddress(pImplicitArgsGPUVA + offsetof(ImplicitArgs, globalSizeZ)); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(itor, cmdList.end()); auto cmd = genCmdCast(*itor); EXPECT_EQ(cmd->getRegisterAddress(), groupCountXStoreRegisterMemCmd.getRegisterAddress()); EXPECT_EQ(cmd->getMemoryAddress(), groupCountXStoreRegisterMemCmd.getMemoryAddress()); itor = find(++itor, cmdList.end()); EXPECT_NE(itor, cmdList.end()); cmd = genCmdCast(*itor); EXPECT_EQ(cmd->getRegisterAddress(), groupCountYStoreRegisterMemCmd.getRegisterAddress()); EXPECT_EQ(cmd->getMemoryAddress(), groupCountYStoreRegisterMemCmd.getMemoryAddress()); itor = find(++itor, cmdList.end()); EXPECT_NE(itor, cmdList.end()); cmd = genCmdCast(*itor); EXPECT_EQ(cmd->getRegisterAddress(), groupCountZStoreRegisterMemCmd.getRegisterAddress()); EXPECT_EQ(cmd->getMemoryAddress(), groupCountZStoreRegisterMemCmd.getMemoryAddress()); itor = find(++itor, cmdList.end()); EXPECT_NE(itor, cmdList.end()); cmd = genCmdCast(*itor); EXPECT_EQ(cmd->getRegisterAddress(), globalSizeXStoreRegisterMemCmd.getRegisterAddress()); EXPECT_EQ(cmd->getMemoryAddress(), globalSizeXStoreRegisterMemCmd.getMemoryAddress()); itor = find(++itor, cmdList.end()); EXPECT_NE(itor, cmdList.end()); cmd = genCmdCast(*itor); EXPECT_EQ(cmd->getRegisterAddress(), globalSizeYStoreRegisterMemCmd.getRegisterAddress()); EXPECT_EQ(cmd->getMemoryAddress(), globalSizeYStoreRegisterMemCmd.getMemoryAddress()); itor = find(++itor, cmdList.end()); EXPECT_NE(itor, cmdList.end()); cmd = genCmdCast(*itor); EXPECT_EQ(cmd->getRegisterAddress(), globalSizeZStoreRegisterMemCmd.getRegisterAddress()); EXPECT_EQ(cmd->getMemoryAddress(), globalSizeZStoreRegisterMemCmd.getMemoryAddress()); itor = find(++itor, cmdList.end()); EXPECT_NE(itor, cmdList.end()); itor = find(++itor, cmdList.end()); EXPECT_NE(itor, cmdList.end()); auto cmd2 = genCmdCast(*itor); auto memoryMaskCmd = FamilyType::cmdInitLoadRegisterImm; memoryMaskCmd.setDataDword(0xFF00FFFF); EXPECT_EQ(cmd2->getDataDword(), memoryMaskCmd.getDataDword()); itor++; //MI_MATH_ALU_INST_INLINE doesn't have tagMI_COMMAND_OPCODE, can't find it in cmdList EXPECT_NE(itor, cmdList.end()); itor = find(++itor, cmdList.end()); EXPECT_NE(itor, cmdList.end()); cmd2 = genCmdCast(*itor); auto offsetCmd = FamilyType::cmdInitLoadRegisterImm; offsetCmd.setDataDword(0x0000FFFF); EXPECT_EQ(cmd2->getDataDword(), offsetCmd.getDataDword()); itor = find(++itor, cmdList.end()); EXPECT_NE(itor, cmdList.end()); itor = find(++itor, cmdList.end()); EXPECT_NE(itor, cmdList.end()); itor = find(++itor, cmdList.end()); EXPECT_NE(itor, cmdList.end()); itor++; //MI_MATH_ALU_INST_INLINE doesn't have tagMI_COMMAND_OPCODE, can't find it in cmdList EXPECT_NE(itor, cmdList.end()); itor++; EXPECT_NE(itor, cmdList.end()); itor = find(++itor, cmdList.end()); EXPECT_NE(itor, cmdList.end()); itor = find(++itor, cmdList.end()); EXPECT_NE(itor, cmdList.end()); itor++; //MI_MATH_ALU_INST_INLINE doesn't have tagMI_COMMAND_OPCODE, can't find it in cmdList EXPECT_NE(itor, cmdList.end()); itor++; EXPECT_NE(itor, cmdList.end()); itor++; EXPECT_NE(itor, cmdList.end()); itor++; EXPECT_NE(itor, cmdList.end()); itor++; EXPECT_NE(itor, cmdList.end()); itor++; EXPECT_NE(itor, cmdList.end()); itor++; EXPECT_NE(itor, cmdList.end()); itor++; EXPECT_NE(itor, cmdList.end()); itor++; EXPECT_NE(itor, cmdList.end()); itor = find(++itor, cmdList.end()); EXPECT_NE(itor, cmdList.end()); itor++; //MI_MATH_ALU_INST_INLINE doesn't have tagMI_COMMAND_OPCODE, can't find it in cmdList EXPECT_NE(itor, cmdList.end()); itor++; EXPECT_NE(itor, cmdList.end()); itor++; EXPECT_NE(itor, cmdList.end()); itor = find(++itor, cmdList.end()); EXPECT_NE(itor, cmdList.end()); cmd = genCmdCast(*itor); EXPECT_EQ(cmd->getRegisterAddress(), workDimStoreRegisterMemCmd.getRegisterAddress()); EXPECT_EQ(cmd->getMemoryAddress(), workDimStoreRegisterMemCmd.getMemoryAddress()); context->freeMem(alloc); } HWTEST_F(CommandListAppendLaunchKernel, givenIndirectDispatchWhenAppendingThenWorkGroupCountAndGlobalWorkSizeAndWorkDimIsSetInCrossThreadData) { using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM; using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG; using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; Mock<::L0::Kernel> kernel; kernel.groupSize[0] = 2; kernel.descriptor.payloadMappings.dispatchTraits.numWorkGroups[0] = 2; kernel.descriptor.payloadMappings.dispatchTraits.globalWorkSize[0] = 2; kernel.descriptor.payloadMappings.dispatchTraits.workDim = 4; ze_result_t returnValue; std::unique_ptr commandList(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); void *alloc = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; auto result = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 4096u, &alloc); ASSERT_EQ(result, ZE_RESULT_SUCCESS); result = commandList->appendLaunchKernelIndirect(kernel.toHandle(), static_cast(alloc), nullptr, 0, nullptr); EXPECT_EQ(result, ZE_RESULT_SUCCESS); kernel.groupSize[2] = 2; result = commandList->appendLaunchKernelIndirect(kernel.toHandle(), static_cast(alloc), nullptr, 0, nullptr); EXPECT_EQ(result, ZE_RESULT_SUCCESS); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(itor, cmdList.end()); itor = find(++itor, cmdList.end()); EXPECT_NE(itor, cmdList.end()); itor = find(++itor, cmdList.end()); EXPECT_NE(itor, cmdList.end()); itor = find(++itor, cmdList.end()); EXPECT_NE(itor, cmdList.end()); itor = find(++itor, cmdList.end()); EXPECT_NE(itor, cmdList.end()); itor = find(++itor, cmdList.end()); EXPECT_NE(itor, cmdList.end()); itor = find(++itor, cmdList.end()); EXPECT_NE(itor, cmdList.end()); itor++; //MI_MATH_ALU_INST_INLINE doesn't have tagMI_COMMAND_OPCODE, can't find it in cmdList EXPECT_NE(itor, cmdList.end()); itor++; EXPECT_NE(itor, cmdList.end()); itor = find(++itor, cmdList.end()); EXPECT_NE(itor, cmdList.end()); itor = find(++itor, cmdList.end()); EXPECT_NE(itor, cmdList.end()); itor++; //MI_MATH_ALU_INST_INLINE doesn't have tagMI_COMMAND_OPCODE, can't find it in cmdList EXPECT_NE(itor, cmdList.end()); itor++; EXPECT_NE(itor, cmdList.end()); itor++; EXPECT_NE(itor, cmdList.end()); itor++; EXPECT_NE(itor, cmdList.end()); itor = find(++itor, cmdList.end()); EXPECT_NE(itor, cmdList.end()); itor++; //MI_MATH_ALU_INST_INLINE doesn't have tagMI_COMMAND_OPCODE, can't find it in cmdList EXPECT_NE(itor, cmdList.end()); itor++; EXPECT_NE(itor, cmdList.end()); itor = find(++itor, cmdList.end()); EXPECT_NE(itor, cmdList.end()); itor = find(++itor, cmdList.end()); //kernel with groupSize[2] = 2 EXPECT_NE(itor, cmdList.end()); itor = find(++itor, cmdList.end()); EXPECT_NE(itor, cmdList.end()); itor = find(++itor, cmdList.end()); EXPECT_NE(itor, cmdList.end()); itor = find(++itor, cmdList.end()); EXPECT_NE(itor, cmdList.end()); itor = find(++itor, cmdList.end()); EXPECT_NE(itor, cmdList.end()); itor = find(++itor, cmdList.end()); EXPECT_NE(itor, cmdList.end()); context->freeMem(alloc); } HWTEST_F(CommandListAppendLaunchKernel, givenCommandListWhenResetCalledThenStateIsCleaned) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; createKernel(); ze_result_t returnValue; auto commandList = std::unique_ptr(whitebox_cast(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue))); ASSERT_NE(nullptr, commandList); ASSERT_NE(nullptr, commandList->commandContainer.getCommandStream()); auto commandListControl = std::unique_ptr(whitebox_cast(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue))); ASSERT_NE(nullptr, commandListControl); ASSERT_NE(nullptr, commandListControl->commandContainer.getCommandStream()); ze_group_count_t groupCount{1, 1, 1}; auto result = commandList->appendLaunchKernel( kernel->toHandle(), &groupCount, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); result = commandList->close(); ASSERT_EQ(ZE_RESULT_SUCCESS, result); result = commandList->reset(); ASSERT_EQ(ZE_RESULT_SUCCESS, result); ASSERT_EQ(device, commandList->device); ASSERT_NE(nullptr, commandList->commandContainer.getCommandStream()); ASSERT_GE(commandListControl->commandContainer.getCmdBufferAllocations()[0]->getUnderlyingBufferSize(), commandList->commandContainer.getCmdBufferAllocations()[0]->getUnderlyingBufferSize()); ASSERT_EQ(commandListControl->commandContainer.getResidencyContainer().size(), commandList->commandContainer.getResidencyContainer().size()); ASSERT_EQ(commandListControl->commandContainer.getDeallocationContainer().size(), commandList->commandContainer.getDeallocationContainer().size()); ASSERT_EQ(commandListControl->getPrintfFunctionContainer().size(), commandList->getPrintfFunctionContainer().size()); ASSERT_EQ(commandListControl->commandContainer.getCommandStream()->getUsed(), commandList->commandContainer.getCommandStream()->getUsed()); ASSERT_EQ(commandListControl->commandContainer.slmSize, commandList->commandContainer.slmSize); for (uint32_t i = 0; i < NEO::HeapType::NUM_TYPES; i++) { auto heapType = static_cast(i); if (NEO::HeapType::DYNAMIC_STATE == heapType && !device->getHwInfo().capabilityTable.supportsImages) { ASSERT_EQ(nullptr, commandListControl->commandContainer.getIndirectHeapAllocation(heapType)); ASSERT_EQ(nullptr, commandListControl->commandContainer.getIndirectHeap(heapType)); } else { ASSERT_NE(nullptr, commandListControl->commandContainer.getIndirectHeapAllocation(heapType)); ASSERT_NE(nullptr, commandList->commandContainer.getIndirectHeapAllocation(heapType)); ASSERT_EQ(commandListControl->commandContainer.getIndirectHeapAllocation(heapType)->getUnderlyingBufferSize(), commandList->commandContainer.getIndirectHeapAllocation(heapType)->getUnderlyingBufferSize()); ASSERT_NE(nullptr, commandListControl->commandContainer.getIndirectHeap(heapType)); ASSERT_NE(nullptr, commandList->commandContainer.getIndirectHeap(heapType)); ASSERT_EQ(commandListControl->commandContainer.getIndirectHeap(heapType)->getUsed(), commandList->commandContainer.getIndirectHeap(heapType)->getUsed()); ASSERT_EQ(commandListControl->commandContainer.isHeapDirty(heapType), commandList->commandContainer.isHeapDirty(heapType)); } } GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); } HWTEST_F(CommandListAppendLaunchKernel, WhenAddingKernelsThenResidencyContainerDoesNotContainDuplicatesAfterClosingCommandList) { Mock<::L0::Kernel> kernel; ze_result_t returnValue; std::unique_ptr commandList(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); ze_group_count_t groupCount{1, 1, 1}; for (int i = 0; i < 4; ++i) { auto result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } commandList->close(); uint32_t it = 0; const auto &residencyCont = commandList->commandContainer.getResidencyContainer(); for (auto alloc : residencyCont) { auto occurences = std::count(residencyCont.begin(), residencyCont.end(), alloc); EXPECT_EQ(1U, static_cast(occurences)) << it; ++it; } } HWTEST_F(CommandListAppendLaunchKernel, givenSingleValidWaitEventsThenAddSemaphoreToCommandStream) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; Mock<::L0::Kernel> kernel; ze_result_t returnValue; auto commandList = std::unique_ptr(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); ASSERT_NE(nullptr, commandList->commandContainer.getCommandStream()); auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed(); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; eventPoolDesc.count = 1; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); std::unique_ptr event(Event::create(eventPool.get(), &eventDesc, device)); ze_event_handle_t hEventHandle = event->toHandle(); ze_group_count_t groupCount{1, 1, 1}; auto result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 1, &hEventHandle); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); auto itor = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itor); { auto cmd = genCmdCast(*itor); EXPECT_EQ(cmd->getCompareOperation(), MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD); EXPECT_EQ(static_cast(-1), cmd->getSemaphoreDataDword()); EXPECT_EQ(cmd->getSemaphoreGraphicsAddress() & device->getHwInfo().capabilityTable.gpuAddressSpace, event->getGpuAddress(device) & device->getHwInfo().capabilityTable.gpuAddressSpace); } } HWTEST_F(CommandListAppendLaunchKernel, givenMultipleValidWaitEventsThenAddSemaphoreCommands) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; Mock<::L0::Kernel> kernel; ze_result_t returnValue; auto commandList = std::unique_ptr(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); ASSERT_NE(nullptr, commandList->commandContainer.getCommandStream()); auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed(); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; eventPoolDesc.count = 2; ze_event_desc_t eventDesc1 = {}; eventDesc1.index = 0; ze_event_desc_t eventDesc2 = {}; eventDesc2.index = 1; std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); std::unique_ptr event1(Event::create(eventPool.get(), &eventDesc1, device)); std::unique_ptr event2(Event::create(eventPool.get(), &eventDesc2, device)); ze_event_handle_t hEventHandle1 = event1->toHandle(); ze_event_handle_t hEventHandle2 = event2->toHandle(); ze_event_handle_t waitEvents[2]; waitEvents[0] = hEventHandle1; waitEvents[1] = hEventHandle2; ze_group_count_t groupCount{1, 1, 1}; auto result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 2, waitEvents); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto itor = findAll(cmdList.begin(), cmdList.end()); ASSERT_FALSE(itor.empty()); ASSERT_EQ(2, static_cast(itor.size())); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandListAppendLaunchKernel, givenAppendLaunchMultipleKernelsIndirectThenEnablesPredicate) { createKernel(); using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; ze_result_t returnValue; auto commandList = std::unique_ptr(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); const ze_kernel_handle_t launchFn = kernel->toHandle(); uint32_t *numLaunchArgs; ze_device_mem_alloc_desc_t deviceDesc = {}; auto result = context->allocDeviceMem( device->toHandle(), &deviceDesc, 16384u, 4096u, reinterpret_cast(&numLaunchArgs)); result = commandList->appendLaunchMultipleKernelsIndirect(1, &launchFn, numLaunchArgs, nullptr, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); *numLaunchArgs = 0; auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); auto itorWalker = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itorWalker); auto cmd = genCmdCast(*itorWalker); EXPECT_TRUE(cmd->getPredicateEnable()); context->freeMem(reinterpret_cast(numLaunchArgs)); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandListAppendLaunchKernel, givenAppendLaunchMultipleKernelsThenUsesMathAndWalker) { createKernel(); using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; using MI_MATH = typename FamilyType::MI_MATH; ze_result_t returnValue; auto commandList = std::unique_ptr(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); const ze_kernel_handle_t launchFn[3] = {kernel->toHandle(), kernel->toHandle(), kernel->toHandle()}; uint32_t *numLaunchArgs; const uint32_t numKernels = 3; ze_device_mem_alloc_desc_t deviceDesc = {}; auto result = context->allocDeviceMem( device->toHandle(), &deviceDesc, 16384u, 4096u, reinterpret_cast(&numLaunchArgs)); result = commandList->appendLaunchMultipleKernelsIndirect(numKernels, launchFn, numLaunchArgs, nullptr, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); *numLaunchArgs = 2; auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); auto itor = cmdList.begin(); for (uint32_t i = 0; i < numKernels; i++) { itor = find(itor, cmdList.end()); ASSERT_NE(cmdList.end(), itor); itor = find(itor, cmdList.end()); ASSERT_NE(cmdList.end(), itor); } itor = find(itor, cmdList.end()); ASSERT_EQ(cmdList.end(), itor); context->freeMem(reinterpret_cast(numLaunchArgs)); } HWTEST_F(CommandListAppendLaunchKernel, givenInvalidEventListWhenAppendLaunchCooperativeKernelIsCalledThenErrorIsReturned) { createKernel(); ze_group_count_t groupCount{1, 1, 1}; ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); returnValue = commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, nullptr, 1, nullptr); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, returnValue); } HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingSyncBufferWhenAppendLaunchCooperativeKernelIsCalledThenCorrectValueIsReturned, IsAtLeastSkl) { Mock<::L0::Kernel> kernel; auto pMockModule = std::unique_ptr(new Mock(device, nullptr)); kernel.module = pMockModule.get(); kernel.setGroupSize(4, 1, 1); ze_group_count_t groupCount{8, 1, 1}; auto &kernelAttributes = kernel.immutableData.kernelDescriptor->kernelAttributes; kernelAttributes.flags.usesSyncBuffer = true; kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber; bool isCooperative = true; auto pCommandList = std::make_unique>>(); auto &hwHelper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily); auto engineGroupType = NEO::EngineGroupType::Compute; if (hwHelper.isCooperativeEngineSupported(*defaultHwInfo)) { engineGroupType = hwHelper.getEngineGroupType(aub_stream::EngineType::ENGINE_CCS, EngineUsage::Cooperative, *defaultHwInfo); } pCommandList->initialize(device, engineGroupType, 0u); auto result = pCommandList->appendLaunchCooperativeKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); pCommandList = std::make_unique>>(); pCommandList->initialize(device, engineGroupType, 0u); result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, nullptr, false, false, isCooperative); EXPECT_EQ(ZE_RESULT_SUCCESS, result); { VariableBackup usesSyncBuffer{&kernelAttributes.flags.packed}; usesSyncBuffer = false; pCommandList = std::make_unique>>(); pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, nullptr, false, false, isCooperative); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } { VariableBackup groupCountX{&groupCount.groupCountX}; uint32_t maximalNumberOfWorkgroupsAllowed; kernel.suggestMaxCooperativeGroupCount(&maximalNumberOfWorkgroupsAllowed, engineGroupType, false); groupCountX = maximalNumberOfWorkgroupsAllowed + 1; pCommandList = std::make_unique>>(); pCommandList->initialize(device, engineGroupType, 0u); result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, nullptr, false, false, isCooperative); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); } { VariableBackup cooperative{&isCooperative}; cooperative = false; result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, nullptr, false, false, isCooperative); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); } } HWTEST2_F(CommandListAppendLaunchKernel, whenUpdateStreamPropertiesIsCalledThenRequiredStateAndFinalStateAreCorrectlySet, IsAtLeastSkl) { Mock<::L0::Kernel> kernel; auto pMockModule = std::unique_ptr(new Mock(device, nullptr)); kernel.module = pMockModule.get(); auto pCommandList = std::make_unique>>(); auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); ASSERT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(-1, pCommandList->requiredStreamState.frontEndState.disableOverdispatch.value); EXPECT_EQ(-1, pCommandList->finalStreamState.frontEndState.disableOverdispatch.value); const auto &hwInfoConfig = *NEO::HwInfoConfig::get(defaultHwInfo->platform.eProductFamily); int32_t expectedDisableOverdispatch = hwInfoConfig.isDisableOverdispatchAvailable(*defaultHwInfo); pCommandList->updateStreamProperties(kernel, false, false); EXPECT_EQ(expectedDisableOverdispatch, pCommandList->requiredStreamState.frontEndState.disableOverdispatch.value); EXPECT_EQ(expectedDisableOverdispatch, pCommandList->finalStreamState.frontEndState.disableOverdispatch.value); pCommandList->updateStreamProperties(kernel, false, false); EXPECT_EQ(expectedDisableOverdispatch, pCommandList->requiredStreamState.frontEndState.disableOverdispatch.value); EXPECT_EQ(expectedDisableOverdispatch, pCommandList->finalStreamState.frontEndState.disableOverdispatch.value); } HWTEST2_F(CommandListAppendLaunchKernel, givenCooperativeKernelWhenAppendLaunchCooperativeKernelIsCalledThenCommandListTypeIsProperlySet, IsAtLeastSkl) { createKernel(); kernel->setGroupSize(4, 1, 1); ze_group_count_t groupCount{8, 1, 1}; auto pCommandList = std::make_unique>>(); pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); bool isCooperative = false; auto result = pCommandList->appendLaunchKernelWithParams(kernel->toHandle(), &groupCount, nullptr, false, false, isCooperative); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_TRUE(pCommandList->containsAnyKernel); EXPECT_FALSE(pCommandList->containsCooperativeKernelsFlag); pCommandList = std::make_unique>>(); pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); isCooperative = true; result = pCommandList->appendLaunchKernelWithParams(kernel->toHandle(), &groupCount, nullptr, false, false, isCooperative); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_TRUE(pCommandList->containsAnyKernel); EXPECT_TRUE(pCommandList->containsCooperativeKernelsFlag); } HWTEST2_F(CommandListAppendLaunchKernel, givenAnyCooperativeKernelAndMixingAllowedWhenAppendLaunchCooperativeKernelIsCalledThenCommandListTypeIsProperlySet, IsAtLeastSkl) { DebugManagerStateRestore restorer; DebugManager.flags.AllowMixingRegularAndCooperativeKernels.set(1); createKernel(); kernel->setGroupSize(4, 1, 1); ze_group_count_t groupCount{8, 1, 1}; auto pCommandList = std::make_unique>>(); pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); bool isCooperative = false; auto result = pCommandList->appendLaunchKernelWithParams(kernel->toHandle(), &groupCount, nullptr, false, false, isCooperative); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_TRUE(pCommandList->containsAnyKernel); EXPECT_FALSE(pCommandList->containsCooperativeKernelsFlag); isCooperative = true; result = pCommandList->appendLaunchKernelWithParams(kernel->toHandle(), &groupCount, nullptr, false, false, isCooperative); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_TRUE(pCommandList->containsAnyKernel); EXPECT_TRUE(pCommandList->containsCooperativeKernelsFlag); isCooperative = false; result = pCommandList->appendLaunchKernelWithParams(kernel->toHandle(), &groupCount, nullptr, false, false, isCooperative); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_TRUE(pCommandList->containsAnyKernel); EXPECT_TRUE(pCommandList->containsCooperativeKernelsFlag); } HWTEST2_F(CommandListAppendLaunchKernel, givenCooperativeAndNonCooperativeKernelsAndAllowMixingWhenAppendLaunchCooperativeKernelIsCalledThenReturnSuccess, IsAtLeastSkl) { DebugManagerStateRestore restorer; DebugManager.flags.AllowMixingRegularAndCooperativeKernels.set(1); Mock<::L0::Kernel> kernel; auto pMockModule = std::unique_ptr(new Mock(device, nullptr)); kernel.module = pMockModule.get(); kernel.setGroupSize(4, 1, 1); ze_group_count_t groupCount{8, 1, 1}; auto pCommandList = std::make_unique>>(); pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); bool isCooperative = false; auto result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, nullptr, false, false, isCooperative); EXPECT_EQ(ZE_RESULT_SUCCESS, result); isCooperative = true; result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, nullptr, false, false, isCooperative); EXPECT_EQ(ZE_RESULT_SUCCESS, result); pCommandList = std::make_unique>>(); pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); isCooperative = true; result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, nullptr, false, false, isCooperative); EXPECT_EQ(ZE_RESULT_SUCCESS, result); isCooperative = false; result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, nullptr, false, false, isCooperative); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } } // namespace ult } // namespace L0 test_cmdlist_append_launch_kernel_2.cpp000066400000000000000000002041471422164147700365470ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/cmdlist/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/implicit_scaling.h" #include "shared/source/gen9/reg_configs.h" #include "shared/source/helpers/local_id_gen.h" #include "shared/source/helpers/per_thread_data.h" #include "shared/source/utilities/software_tags_manager.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/mocks/mock_compilers.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/test/unit_tests/fixtures/module_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_module.h" namespace L0 { namespace ult { struct CommandListAppendLaunchKernelSWTags : public Test { void SetUp() override { NEO::MockCompilerEnableGuard mock(true); NEO::DebugManager.flags.EnableSWTags.set(true); ModuleFixture::SetUp(); } DebugManagerStateRestore dbgRestorer; }; struct CommandListDualStorage : public Test { void SetUp() override { NEO::MockCompilerEnableGuard mock(true); DebugManager.flags.EnableLocalMemory.set(1); DebugManager.flags.AllocateSharedAllocationsWithCpuAndGpuStorage.set(1); ModuleFixture::SetUp(); } void TearDown() override { ModuleFixture::TearDown(); } DebugManagerStateRestore restorer; }; HWTEST_F(CommandListDualStorage, givenIndirectDispatchWithSharedDualStorageMemoryWhenAppendingThenWorkGroupCountAndGlobalWorkSizeIsSetInCrossThreadData) { using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM; using MI_LOAD_REGISTER_MEM = typename FamilyType::MI_LOAD_REGISTER_MEM; using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG; using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; Mock<::L0::Kernel> kernel; uint32_t globalWorkSizeXOffset = 0x20u; uint32_t globalWorkSizeYOffset = 0x24u; uint32_t globalWorkSizeZOffset = 0x28u; uint32_t numWorkGroupXOffset = 0x30u; uint32_t numWorkGroupYOffset = 0x34u; uint32_t numWorkGroupZOffset = 0x38u; kernel.descriptor.payloadMappings.dispatchTraits.globalWorkSize[0] = globalWorkSizeXOffset; kernel.descriptor.payloadMappings.dispatchTraits.globalWorkSize[1] = globalWorkSizeYOffset; kernel.descriptor.payloadMappings.dispatchTraits.globalWorkSize[2] = globalWorkSizeZOffset; kernel.descriptor.payloadMappings.dispatchTraits.numWorkGroups[0] = numWorkGroupXOffset; kernel.descriptor.payloadMappings.dispatchTraits.numWorkGroups[1] = numWorkGroupYOffset; kernel.descriptor.payloadMappings.dispatchTraits.numWorkGroups[2] = numWorkGroupZOffset; ze_result_t returnValue; std::unique_ptr commandList(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); void *alloc = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_host_mem_alloc_desc_t hostDesc = {}; auto result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, 16384u, 4096u, &alloc); ASSERT_EQ(ZE_RESULT_SUCCESS, result); ze_group_count_t *pThreadGroupDimensions = static_cast(ptrOffset(alloc, sizeof(ze_group_count_t))); pThreadGroupDimensions->groupCountX = 3; pThreadGroupDimensions->groupCountY = 4; pThreadGroupDimensions->groupCountZ = 5; result = commandList->appendLaunchKernelIndirect(kernel.toHandle(), pThreadGroupDimensions, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(pThreadGroupDimensions); ASSERT_NE(nullptr, allocData->cpuAllocation); auto gpuAllocation = allocData->gpuAllocations.getGraphicsAllocation(device->getNEODevice()->getRootDeviceIndex()); ASSERT_NE(nullptr, gpuAllocation); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); uint32_t regAddress = 0; uint64_t gpuAddress = 0; auto expectedXAddress = reinterpret_cast(ptrOffset(pThreadGroupDimensions, offsetof(ze_group_count_t, groupCountX))); auto expectedYAddress = reinterpret_cast(ptrOffset(pThreadGroupDimensions, offsetof(ze_group_count_t, groupCountY))); auto expectedZAddress = reinterpret_cast(ptrOffset(pThreadGroupDimensions, offsetof(ze_group_count_t, groupCountZ))); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); auto cmd = genCmdCast(*itor); regAddress = cmd->getRegisterAddress(); gpuAddress = cmd->getMemoryAddress(); EXPECT_EQ(GPUGPU_DISPATCHDIMX, regAddress); EXPECT_EQ(expectedXAddress, gpuAddress); itor = find(++itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); cmd = genCmdCast(*itor); regAddress = cmd->getRegisterAddress(); gpuAddress = cmd->getMemoryAddress(); EXPECT_EQ(GPUGPU_DISPATCHDIMY, regAddress); EXPECT_EQ(expectedYAddress, gpuAddress); itor = find(++itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); cmd = genCmdCast(*itor); regAddress = cmd->getRegisterAddress(); gpuAddress = cmd->getMemoryAddress(); EXPECT_EQ(GPUGPU_DISPATCHDIMZ, regAddress); EXPECT_EQ(expectedZAddress, gpuAddress); MI_STORE_REGISTER_MEM *cmd2 = nullptr; // Find group count cmds do { itor = find(++itor, cmdList.end()); cmd2 = genCmdCast(*itor); } while (itor != cmdList.end() && cmd2->getRegisterAddress() != GPUGPU_DISPATCHDIMX); EXPECT_NE(cmdList.end(), itor); auto groupCountStoreRegisterMemCmd = FamilyType::cmdInitStoreRegisterMem; groupCountStoreRegisterMemCmd.setRegisterAddress(GPUGPU_DISPATCHDIMX); groupCountStoreRegisterMemCmd.setMemoryAddress(commandList->commandContainer.getIndirectHeap(HeapType::INDIRECT_OBJECT)->getGraphicsAllocation()->getGpuAddress() + numWorkGroupXOffset); EXPECT_EQ(cmd2->getRegisterAddress(), groupCountStoreRegisterMemCmd.getRegisterAddress()); EXPECT_EQ(cmd2->getMemoryAddress(), groupCountStoreRegisterMemCmd.getMemoryAddress()); itor = find(++itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); cmd2 = genCmdCast(*itor); groupCountStoreRegisterMemCmd.setRegisterAddress(GPUGPU_DISPATCHDIMY); groupCountStoreRegisterMemCmd.setMemoryAddress(commandList->commandContainer.getIndirectHeap(HeapType::INDIRECT_OBJECT)->getGraphicsAllocation()->getGpuAddress() + numWorkGroupYOffset); EXPECT_EQ(cmd2->getRegisterAddress(), groupCountStoreRegisterMemCmd.getRegisterAddress()); EXPECT_EQ(cmd2->getMemoryAddress(), groupCountStoreRegisterMemCmd.getMemoryAddress()); itor = find(++itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); cmd2 = genCmdCast(*itor); groupCountStoreRegisterMemCmd.setRegisterAddress(GPUGPU_DISPATCHDIMZ); groupCountStoreRegisterMemCmd.setMemoryAddress(commandList->commandContainer.getIndirectHeap(HeapType::INDIRECT_OBJECT)->getGraphicsAllocation()->getGpuAddress() + numWorkGroupZOffset); EXPECT_EQ(cmd2->getRegisterAddress(), groupCountStoreRegisterMemCmd.getRegisterAddress()); EXPECT_EQ(cmd2->getMemoryAddress(), groupCountStoreRegisterMemCmd.getMemoryAddress()); auto workSizeStoreRegisterMemCmd = FamilyType::cmdInitStoreRegisterMem; workSizeStoreRegisterMemCmd.setRegisterAddress(CS_GPR_R1); // Find workgroup size cmds itor = find(++itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); itor = find(++itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); itor = find(++itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); cmd2 = genCmdCast(*itor); workSizeStoreRegisterMemCmd.setMemoryAddress(commandList->commandContainer.getIndirectHeap(HeapType::INDIRECT_OBJECT)->getGraphicsAllocation()->getGpuAddress() + globalWorkSizeXOffset); EXPECT_EQ(cmd2->getRegisterAddress(), workSizeStoreRegisterMemCmd.getRegisterAddress()); EXPECT_EQ(cmd2->getMemoryAddress(), workSizeStoreRegisterMemCmd.getMemoryAddress()); itor = find(++itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); itor = find(++itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); itor = find(++itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); cmd2 = genCmdCast(*itor); workSizeStoreRegisterMemCmd.setMemoryAddress(commandList->commandContainer.getIndirectHeap(HeapType::INDIRECT_OBJECT)->getGraphicsAllocation()->getGpuAddress() + globalWorkSizeYOffset); EXPECT_EQ(cmd2->getRegisterAddress(), workSizeStoreRegisterMemCmd.getRegisterAddress()); EXPECT_EQ(cmd2->getMemoryAddress(), workSizeStoreRegisterMemCmd.getMemoryAddress()); EXPECT_EQ(cmd2->getRegisterAddress(), workSizeStoreRegisterMemCmd.getRegisterAddress()); EXPECT_EQ(cmd2->getMemoryAddress(), workSizeStoreRegisterMemCmd.getMemoryAddress()); itor = find(++itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); itor = find(++itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); itor = find(++itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); cmd2 = genCmdCast(*itor); workSizeStoreRegisterMemCmd.setMemoryAddress(commandList->commandContainer.getIndirectHeap(HeapType::INDIRECT_OBJECT)->getGraphicsAllocation()->getGpuAddress() + globalWorkSizeZOffset); EXPECT_EQ(cmd2->getRegisterAddress(), workSizeStoreRegisterMemCmd.getRegisterAddress()); EXPECT_EQ(cmd2->getMemoryAddress(), workSizeStoreRegisterMemCmd.getMemoryAddress()); context->freeMem(alloc); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandListDualStorage, givenIndirectDispatchWithSharedDualStorageMemoryAndInlineDataWhenAppendingThenWorkGroupCountAndGlobalWorkSizeIsSetInCrossThreadData) { using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM; using MI_LOAD_REGISTER_MEM = typename FamilyType::MI_LOAD_REGISTER_MEM; using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG; using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; using INLINE_DATA = typename FamilyType::INLINE_DATA; Mock<::L0::Kernel> kernel; kernel.crossThreadDataSize = 0x60u; kernel.descriptor.kernelAttributes.flags.passInlineData = true; uint32_t globalWorkSizeXOffset = 0x40u; uint32_t globalWorkSizeYOffset = 0x44u; uint32_t globalWorkSizeZOffset = 0x48u; uint32_t numWorkGroupXOffset = 0x30u; uint32_t numWorkGroupYOffset = 0x34u; uint32_t numWorkGroupZOffset = 0x38u; kernel.descriptor.payloadMappings.dispatchTraits.globalWorkSize[0] = globalWorkSizeXOffset; kernel.descriptor.payloadMappings.dispatchTraits.globalWorkSize[1] = globalWorkSizeYOffset; kernel.descriptor.payloadMappings.dispatchTraits.globalWorkSize[2] = globalWorkSizeZOffset; kernel.descriptor.payloadMappings.dispatchTraits.numWorkGroups[0] = numWorkGroupXOffset; kernel.descriptor.payloadMappings.dispatchTraits.numWorkGroups[1] = numWorkGroupYOffset; kernel.descriptor.payloadMappings.dispatchTraits.numWorkGroups[2] = numWorkGroupZOffset; ze_result_t returnValue; std::unique_ptr commandList(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); void *alloc = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_host_mem_alloc_desc_t hostDesc = {}; auto result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, 16384u, 4096u, &alloc); ASSERT_EQ(ZE_RESULT_SUCCESS, result); ze_group_count_t *pThreadGroupDimensions = static_cast(ptrOffset(alloc, sizeof(ze_group_count_t))); pThreadGroupDimensions->groupCountX = 3; pThreadGroupDimensions->groupCountY = 4; pThreadGroupDimensions->groupCountZ = 5; result = commandList->appendLaunchKernelIndirect(kernel.toHandle(), pThreadGroupDimensions, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(pThreadGroupDimensions); ASSERT_NE(nullptr, allocData->cpuAllocation); auto gpuAllocation = allocData->gpuAllocations.getGraphicsAllocation(device->getNEODevice()->getRootDeviceIndex()); ASSERT_NE(nullptr, gpuAllocation); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); uint32_t regAddress = 0; uint64_t gpuAddress = 0; auto expectedXAddress = reinterpret_cast(ptrOffset(pThreadGroupDimensions, offsetof(ze_group_count_t, groupCountX))); auto expectedYAddress = reinterpret_cast(ptrOffset(pThreadGroupDimensions, offsetof(ze_group_count_t, groupCountY))); auto expectedZAddress = reinterpret_cast(ptrOffset(pThreadGroupDimensions, offsetof(ze_group_count_t, groupCountZ))); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); auto cmd = genCmdCast(*itor); regAddress = cmd->getRegisterAddress(); gpuAddress = cmd->getMemoryAddress(); EXPECT_EQ(GPUGPU_DISPATCHDIMX, regAddress); EXPECT_EQ(expectedXAddress, gpuAddress); itor = find(++itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); cmd = genCmdCast(*itor); regAddress = cmd->getRegisterAddress(); gpuAddress = cmd->getMemoryAddress(); EXPECT_EQ(GPUGPU_DISPATCHDIMY, regAddress); EXPECT_EQ(expectedYAddress, gpuAddress); itor = find(++itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); cmd = genCmdCast(*itor); regAddress = cmd->getRegisterAddress(); gpuAddress = cmd->getMemoryAddress(); EXPECT_EQ(GPUGPU_DISPATCHDIMZ, regAddress); EXPECT_EQ(expectedZAddress, gpuAddress); MI_STORE_REGISTER_MEM *cmd2 = nullptr; // Find group count cmds do { itor = find(++itor, cmdList.end()); cmd2 = genCmdCast(*itor); } while (itor != cmdList.end() && cmd2->getRegisterAddress() != GPUGPU_DISPATCHDIMX); EXPECT_NE(cmdList.end(), itor); auto groupCountStoreRegisterMemCmd = FamilyType::cmdInitStoreRegisterMem; groupCountStoreRegisterMemCmd.setRegisterAddress(GPUGPU_DISPATCHDIMX); groupCountStoreRegisterMemCmd.setMemoryAddress(commandList->commandContainer.getIndirectHeap(HeapType::INDIRECT_OBJECT)->getGraphicsAllocation()->getGpuAddress() + numWorkGroupXOffset - sizeof(INLINE_DATA)); EXPECT_EQ(cmd2->getRegisterAddress(), groupCountStoreRegisterMemCmd.getRegisterAddress()); EXPECT_EQ(cmd2->getMemoryAddress(), groupCountStoreRegisterMemCmd.getMemoryAddress()); itor = find(++itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); cmd2 = genCmdCast(*itor); groupCountStoreRegisterMemCmd.setRegisterAddress(GPUGPU_DISPATCHDIMY); groupCountStoreRegisterMemCmd.setMemoryAddress(commandList->commandContainer.getIndirectHeap(HeapType::INDIRECT_OBJECT)->getGraphicsAllocation()->getGpuAddress() + numWorkGroupYOffset - sizeof(INLINE_DATA)); EXPECT_EQ(cmd2->getRegisterAddress(), groupCountStoreRegisterMemCmd.getRegisterAddress()); EXPECT_EQ(cmd2->getMemoryAddress(), groupCountStoreRegisterMemCmd.getMemoryAddress()); itor = find(++itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); cmd2 = genCmdCast(*itor); groupCountStoreRegisterMemCmd.setRegisterAddress(GPUGPU_DISPATCHDIMZ); groupCountStoreRegisterMemCmd.setMemoryAddress(commandList->commandContainer.getIndirectHeap(HeapType::INDIRECT_OBJECT)->getGraphicsAllocation()->getGpuAddress() + numWorkGroupZOffset - sizeof(INLINE_DATA)); EXPECT_EQ(cmd2->getRegisterAddress(), groupCountStoreRegisterMemCmd.getRegisterAddress()); EXPECT_EQ(cmd2->getMemoryAddress(), groupCountStoreRegisterMemCmd.getMemoryAddress()); auto workSizeStoreRegisterMemCmd = FamilyType::cmdInitStoreRegisterMem; workSizeStoreRegisterMemCmd.setRegisterAddress(CS_GPR_R1); // Find workgroup size cmds itor = find(++itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); itor = find(++itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); itor = find(++itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); cmd2 = genCmdCast(*itor); workSizeStoreRegisterMemCmd.setMemoryAddress(commandList->commandContainer.getIndirectHeap(HeapType::INDIRECT_OBJECT)->getGraphicsAllocation()->getGpuAddress() + globalWorkSizeXOffset - sizeof(INLINE_DATA)); EXPECT_EQ(cmd2->getRegisterAddress(), workSizeStoreRegisterMemCmd.getRegisterAddress()); EXPECT_EQ(cmd2->getMemoryAddress(), workSizeStoreRegisterMemCmd.getMemoryAddress()); itor = find(++itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); itor = find(++itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); itor = find(++itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); cmd2 = genCmdCast(*itor); workSizeStoreRegisterMemCmd.setMemoryAddress(commandList->commandContainer.getIndirectHeap(HeapType::INDIRECT_OBJECT)->getGraphicsAllocation()->getGpuAddress() + globalWorkSizeYOffset - sizeof(INLINE_DATA)); EXPECT_EQ(cmd2->getRegisterAddress(), workSizeStoreRegisterMemCmd.getRegisterAddress()); EXPECT_EQ(cmd2->getMemoryAddress(), workSizeStoreRegisterMemCmd.getMemoryAddress()); EXPECT_EQ(cmd2->getRegisterAddress(), workSizeStoreRegisterMemCmd.getRegisterAddress()); EXPECT_EQ(cmd2->getMemoryAddress(), workSizeStoreRegisterMemCmd.getMemoryAddress()); itor = find(++itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); itor = find(++itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); itor = find(++itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); cmd2 = genCmdCast(*itor); workSizeStoreRegisterMemCmd.setMemoryAddress(commandList->commandContainer.getIndirectHeap(HeapType::INDIRECT_OBJECT)->getGraphicsAllocation()->getGpuAddress() + globalWorkSizeZOffset - sizeof(INLINE_DATA)); EXPECT_EQ(cmd2->getRegisterAddress(), workSizeStoreRegisterMemCmd.getRegisterAddress()); EXPECT_EQ(cmd2->getMemoryAddress(), workSizeStoreRegisterMemCmd.getMemoryAddress()); context->freeMem(alloc); } HWTEST_F(CommandListAppendLaunchKernelSWTags, givenEnableSWTagsWhenAppendLaunchKernelThenTagsAreInserted) { using MI_NOOP = typename FamilyType::MI_NOOP; createKernel(); ze_group_count_t groupCount{1, 1, 1}; ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); auto cmdStream = commandList->commandContainer.getCommandStream(); auto usedSpaceBefore = cmdStream->getUsed(); auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = cmdStream->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, ptrOffset(cmdStream->getCpuBase(), 0), usedSpaceAfter)); auto noops = findAll(cmdList.begin(), cmdList.end()); EXPECT_EQ(6u, noops.size()); bool tagFound = false; for (auto it = noops.begin(); it != noops.end() && !tagFound; ++it) { auto noop = genCmdCast(*(*it)); if (NEO::SWTags::BaseTag::getMarkerNoopID(SWTags::OpCode::KernelName) == noop->getIdentificationNumber() && noop->getIdentificationNumberRegisterWriteEnable() == true && ++it != noops.end()) { noop = genCmdCast(*(*it)); if (noop->getIdentificationNumber() & 1 << 21 && noop->getIdentificationNumberRegisterWriteEnable() == false) { tagFound = true; } } } EXPECT_TRUE(tagFound); tagFound = false; for (auto it = noops.begin(); it != noops.end() && !tagFound; ++it) { auto noop = genCmdCast(*(*it)); if (NEO::SWTags::BaseTag::getMarkerNoopID(SWTags::OpCode::CallNameBegin) == noop->getIdentificationNumber() && noop->getIdentificationNumberRegisterWriteEnable() == true && ++it != noops.end()) { noop = genCmdCast(*(*it)); if (noop->getIdentificationNumber() & 1 << 21 && noop->getIdentificationNumberRegisterWriteEnable() == false) { tagFound = true; } } } EXPECT_TRUE(tagFound); tagFound = false; for (auto it = noops.begin(); it != noops.end() && !tagFound; ++it) { auto noop = genCmdCast(*(*it)); if (NEO::SWTags::BaseTag::getMarkerNoopID(SWTags::OpCode::CallNameEnd) == noop->getIdentificationNumber() && noop->getIdentificationNumberRegisterWriteEnable() == true && ++it != noops.end()) { noop = genCmdCast(*(*it)); if (noop->getIdentificationNumber() & 1 << 21 && noop->getIdentificationNumberRegisterWriteEnable() == false) { tagFound = true; } } } EXPECT_TRUE(tagFound); } HWTEST_F(CommandListAppendLaunchKernelSWTags, givenEnableSWTagsWhenAppendEventResetIsCalledThenTagsAreInserted) { using MI_NOOP = typename FamilyType::MI_NOOP; ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); auto cmdStream = commandList->commandContainer.getCommandStream(); auto usedSpaceBefore = cmdStream->getUsed(); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; ze_event_handle_t hEvent = nullptr; std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); ASSERT_NE(nullptr, eventPool); eventPool->createEvent(&eventDesc, &hEvent); auto result = commandList->appendEventReset(hEvent); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = cmdStream->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, ptrOffset(cmdStream->getCpuBase(), 0), usedSpaceAfter)); auto noops = findAll(cmdList.begin(), cmdList.end()); uint32_t expecteNumberOfNops = 4u; EXPECT_EQ(expecteNumberOfNops, noops.size()); bool tagFound = false; for (auto it = noops.begin(); it != noops.end() && !tagFound; ++it) { auto noop = genCmdCast(*(*it)); if (NEO::SWTags::BaseTag::getMarkerNoopID(SWTags::OpCode::CallNameBegin) == noop->getIdentificationNumber() && noop->getIdentificationNumberRegisterWriteEnable() == true && ++it != noops.end()) { noop = genCmdCast(*(*it)); if (noop->getIdentificationNumber() & 1 << 21 && noop->getIdentificationNumberRegisterWriteEnable() == false) { tagFound = true; } } } EXPECT_TRUE(tagFound); tagFound = false; for (auto it = noops.begin(); it != noops.end() && !tagFound; ++it) { auto noop = genCmdCast(*(*it)); if (NEO::SWTags::BaseTag::getMarkerNoopID(SWTags::OpCode::CallNameEnd) == noop->getIdentificationNumber() && noop->getIdentificationNumberRegisterWriteEnable() == true && ++it != noops.end()) { noop = genCmdCast(*(*it)); if (noop->getIdentificationNumber() & 1 << 21 && noop->getIdentificationNumberRegisterWriteEnable() == false) { tagFound = true; } } } EXPECT_TRUE(tagFound); Event *event = Event::fromHandle(hEvent); event->destroy(); } HWTEST_F(CommandListAppendLaunchKernelSWTags, givenEnableSWTagsWhenAppendSignalEventThenThenTagsAreInserted) { using MI_NOOP = typename FamilyType::MI_NOOP; ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); auto cmdStream = commandList->commandContainer.getCommandStream(); auto usedSpaceBefore = cmdStream->getUsed(); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; ze_event_handle_t hEvent = nullptr; std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); ASSERT_NE(nullptr, eventPool); eventPool->createEvent(&eventDesc, &hEvent); auto result = commandList->appendSignalEvent(hEvent); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = cmdStream->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, ptrOffset(cmdStream->getCpuBase(), 0), usedSpaceAfter)); auto noops = findAll(cmdList.begin(), cmdList.end()); uint32_t expecteNumberOfNops = 4u; EXPECT_EQ(expecteNumberOfNops, noops.size()); bool tagFound = false; for (auto it = noops.begin(); it != noops.end() && !tagFound; ++it) { auto noop = genCmdCast(*(*it)); if (NEO::SWTags::BaseTag::getMarkerNoopID(SWTags::OpCode::CallNameBegin) == noop->getIdentificationNumber() && noop->getIdentificationNumberRegisterWriteEnable() == true && ++it != noops.end()) { noop = genCmdCast(*(*it)); if (noop->getIdentificationNumber() & 1 << 21 && noop->getIdentificationNumberRegisterWriteEnable() == false) { tagFound = true; } } } EXPECT_TRUE(tagFound); tagFound = false; for (auto it = noops.begin(); it != noops.end() && !tagFound; ++it) { auto noop = genCmdCast(*(*it)); if (NEO::SWTags::BaseTag::getMarkerNoopID(SWTags::OpCode::CallNameEnd) == noop->getIdentificationNumber() && noop->getIdentificationNumberRegisterWriteEnable() == true && ++it != noops.end()) { noop = genCmdCast(*(*it)); if (noop->getIdentificationNumber() & 1 << 21 && noop->getIdentificationNumberRegisterWriteEnable() == false) { tagFound = true; } } } EXPECT_TRUE(tagFound); Event *event = Event::fromHandle(hEvent); event->destroy(); } HWTEST_F(CommandListAppendLaunchKernelSWTags, givenEnableSWTagsWhenAppendWaitOnEventsThenThenTagsAreInserted) { using MI_NOOP = typename FamilyType::MI_NOOP; ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); auto cmdStream = commandList->commandContainer.getCommandStream(); auto usedSpaceBefore = cmdStream->getUsed(); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; ze_event_handle_t hEvent = nullptr; std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); ASSERT_NE(nullptr, eventPool); eventPool->createEvent(&eventDesc, &hEvent); auto result = commandList->appendWaitOnEvents(1, &hEvent); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = cmdStream->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, ptrOffset(cmdStream->getCpuBase(), 0), usedSpaceAfter)); auto noops = findAll(cmdList.begin(), cmdList.end()); uint32_t expecteNumberOfNops = 4u; EXPECT_EQ(expecteNumberOfNops, noops.size()); bool tagFound = false; for (auto it = noops.begin(); it != noops.end() && !tagFound; ++it) { auto noop = genCmdCast(*(*it)); if (NEO::SWTags::BaseTag::getMarkerNoopID(SWTags::OpCode::CallNameBegin) == noop->getIdentificationNumber() && noop->getIdentificationNumberRegisterWriteEnable() == true && ++it != noops.end()) { noop = genCmdCast(*(*it)); if (noop->getIdentificationNumber() & 1 << 21 && noop->getIdentificationNumberRegisterWriteEnable() == false) { tagFound = true; } } } EXPECT_TRUE(tagFound); tagFound = false; for (auto it = noops.begin(); it != noops.end() && !tagFound; ++it) { auto noop = genCmdCast(*(*it)); if (NEO::SWTags::BaseTag::getMarkerNoopID(SWTags::OpCode::CallNameEnd) == noop->getIdentificationNumber() && noop->getIdentificationNumberRegisterWriteEnable() == true && ++it != noops.end()) { noop = genCmdCast(*(*it)); if (noop->getIdentificationNumber() & 1 << 21 && noop->getIdentificationNumberRegisterWriteEnable() == false) { tagFound = true; } } } EXPECT_TRUE(tagFound); Event *event = Event::fromHandle(hEvent); event->destroy(); } HWTEST_F(CommandListAppendLaunchKernelSWTags, givenEnableSWTagsWhenAppendMemoryCopyThenThenTagsAreInserted) { using MI_NOOP = typename FamilyType::MI_NOOP; ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); auto cmdStream = commandList->commandContainer.getCommandStream(); auto usedSpaceBefore = cmdStream->getUsed(); void *srcBuffer = reinterpret_cast(0x0F000000); void *dstBuffer = reinterpret_cast(0x0FF00000); size_t size = 1024; auto result = commandList->appendMemoryCopy(dstBuffer, srcBuffer, size, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = cmdStream->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, ptrOffset(cmdStream->getCpuBase(), 0), usedSpaceAfter)); auto noops = findAll(cmdList.begin(), cmdList.end()); uint32_t expecteNumberOfNops = 6u; EXPECT_EQ(expecteNumberOfNops, noops.size()); bool tagFound = false; for (auto it = noops.begin(); it != noops.end() && !tagFound; ++it) { auto noop = genCmdCast(*(*it)); if (NEO::SWTags::BaseTag::getMarkerNoopID(SWTags::OpCode::CallNameBegin) == noop->getIdentificationNumber() && noop->getIdentificationNumberRegisterWriteEnable() == true && ++it != noops.end()) { noop = genCmdCast(*(*it)); if (noop->getIdentificationNumber() & 1 << 21 && noop->getIdentificationNumberRegisterWriteEnable() == false) { tagFound = true; } } } EXPECT_TRUE(tagFound); tagFound = false; for (auto it = noops.begin(); it != noops.end() && !tagFound; ++it) { auto noop = genCmdCast(*(*it)); if (NEO::SWTags::BaseTag::getMarkerNoopID(SWTags::OpCode::CallNameEnd) == noop->getIdentificationNumber() && noop->getIdentificationNumberRegisterWriteEnable() == true && ++it != noops.end()) { noop = genCmdCast(*(*it)); if (noop->getIdentificationNumber() & 1 << 21 && noop->getIdentificationNumberRegisterWriteEnable() == false) { tagFound = true; } } } EXPECT_TRUE(tagFound); } HWTEST_F(CommandListAppendLaunchKernelSWTags, givenEnableSWTagsWhenAppendMemoryCopyRegionThenThenTagsAreInserted) { using MI_NOOP = typename FamilyType::MI_NOOP; ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); auto cmdStream = commandList->commandContainer.getCommandStream(); auto usedSpaceBefore = cmdStream->getUsed(); void *srcBuffer = reinterpret_cast(0x0F000000); void *dstBuffer = reinterpret_cast(0x0FF00000); uint32_t width = 16; uint32_t height = 16; ze_copy_region_t sr = {0U, 0U, 0U, width, height, 0U}; ze_copy_region_t dr = {0U, 0U, 0U, width, height, 0U}; ze_result_t result = commandList->appendMemoryCopyRegion(dstBuffer, &dr, width, 0, srcBuffer, &sr, width, 0, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = cmdStream->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, ptrOffset(cmdStream->getCpuBase(), 0), usedSpaceAfter)); auto noops = findAll(cmdList.begin(), cmdList.end()); uint32_t expecteNumberOfNops = 10u; EXPECT_EQ(expecteNumberOfNops, noops.size()); bool tagFound = false; for (auto it = noops.begin(); it != noops.end() && !tagFound; ++it) { auto noop = genCmdCast(*(*it)); if (NEO::SWTags::BaseTag::getMarkerNoopID(SWTags::OpCode::CallNameBegin) == noop->getIdentificationNumber() && noop->getIdentificationNumberRegisterWriteEnable() == true && ++it != noops.end()) { noop = genCmdCast(*(*it)); if (noop->getIdentificationNumber() & 1 << 21 && noop->getIdentificationNumberRegisterWriteEnable() == false) { tagFound = true; } } } EXPECT_TRUE(tagFound); tagFound = false; for (auto it = noops.begin(); it != noops.end() && !tagFound; ++it) { auto noop = genCmdCast(*(*it)); if (NEO::SWTags::BaseTag::getMarkerNoopID(SWTags::OpCode::CallNameEnd) == noop->getIdentificationNumber() && noop->getIdentificationNumberRegisterWriteEnable() == true && ++it != noops.end()) { noop = genCmdCast(*(*it)); if (noop->getIdentificationNumber() & 1 << 21 && noop->getIdentificationNumberRegisterWriteEnable() == false) { tagFound = true; } } } EXPECT_TRUE(tagFound); } using CommandListArbitrationPolicyTest = Test; HWTEST_F(CommandListArbitrationPolicyTest, whenCreatingCommandListThenDefaultThreadArbitrationPolicyIsUsed) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; ze_result_t returnValue; auto commandList = std::unique_ptr(whitebox_cast(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue))); EXPECT_NE(nullptr, commandList); EXPECT_NE(nullptr, commandList->commandContainer.getCommandStream()); GenCmdList parsedCommandList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( parsedCommandList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; auto miLoadImm = findAll(parsedCommandList.begin(), parsedCommandList.end()); EXPECT_GE(2u, miLoadImm.size()); for (auto it : miLoadImm) { auto cmd = genCmdCast(*it); if (cmd->getRegisterOffset() == NEO::DebugControlReg2::address) { EXPECT_EQ(NEO::DebugControlReg2::getRegData(NEO::ThreadArbitrationPolicy::RoundRobin), cmd->getDataDword()); } } } HWTEST_F(CommandListArbitrationPolicyTest, whenCreatingCommandListThenChosenThreadArbitrationPolicyIsUsed) { DebugManagerStateRestore restorer; DebugManager.flags.OverrideThreadArbitrationPolicy.set(0); using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; ze_result_t returnValue; auto commandList = std::unique_ptr(whitebox_cast(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue))); EXPECT_NE(nullptr, commandList); EXPECT_NE(nullptr, commandList->commandContainer.getCommandStream()); GenCmdList parsedCommandList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( parsedCommandList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; auto miLoadImm = findAll(parsedCommandList.begin(), parsedCommandList.end()); EXPECT_GE(2u, miLoadImm.size()); for (auto it : miLoadImm) { auto cmd = genCmdCast(*it); if (cmd->getRegisterOffset() == NEO::DebugControlReg2::address) { EXPECT_EQ(NEO::DebugControlReg2::getRegData(NEO::ThreadArbitrationPolicy::AgeBased), cmd->getDataDword()); } } } HWTEST_F(CommandListArbitrationPolicyTest, whenCommandListIsResetThenOriginalThreadArbitrationPolicyIsKept) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; ze_result_t returnValue; auto commandList = std::unique_ptr(whitebox_cast(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue))); EXPECT_NE(nullptr, commandList); EXPECT_NE(nullptr, commandList->commandContainer.getCommandStream()); bool found; uint64_t originalThreadArbitrationPolicy = std::numeric_limits::max(); { GenCmdList parsedCommandList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( parsedCommandList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; auto miLoadImm = findAll(parsedCommandList.begin(), parsedCommandList.end()); EXPECT_GE(2u, miLoadImm.size()); for (auto it : miLoadImm) { auto cmd = genCmdCast(*it); if (cmd->getRegisterOffset() == NEO::DebugControlReg2::address) { EXPECT_EQ(NEO::DebugControlReg2::getRegData(NEO::ThreadArbitrationPolicy::RoundRobin), cmd->getDataDword()); originalThreadArbitrationPolicy = cmd->getDataDword(); found = false; } } } commandList->reset(); { GenCmdList parsedCommandList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( parsedCommandList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; auto miLoadImm = findAll(parsedCommandList.begin(), parsedCommandList.end()); EXPECT_GE(2u, miLoadImm.size()); uint64_t newThreadArbitrationPolicy = std::numeric_limits::max(); for (auto it : miLoadImm) { auto cmd = genCmdCast(*it); if (cmd->getRegisterOffset() == NEO::DebugControlReg2::address) { EXPECT_EQ(NEO::DebugControlReg2::getRegData(NEO::ThreadArbitrationPolicy::RoundRobin), cmd->getDataDword()); newThreadArbitrationPolicy = cmd->getDataDword(); EXPECT_EQ(originalThreadArbitrationPolicy, newThreadArbitrationPolicy); } } } } using CmdlistAppendLaunchKernelTests = Test; struct CmdlistAppendLaunchKernelWithImplicitArgsTests : CmdlistAppendLaunchKernelTests { void SetUp() override { CmdlistAppendLaunchKernelTests::SetUp(); expectedImplicitArgs.numWorkDim = 3; expectedImplicitArgs.simdWidth = 32; expectedImplicitArgs.localSizeX = 2; expectedImplicitArgs.localSizeY = 3; expectedImplicitArgs.localSizeZ = 4; expectedImplicitArgs.globalOffsetX = 1; expectedImplicitArgs.globalOffsetY = 2; expectedImplicitArgs.globalOffsetZ = 3; expectedImplicitArgs.groupCountX = 2; expectedImplicitArgs.groupCountY = 1; expectedImplicitArgs.groupCountZ = 3; } template void dispatchKernelWithImplicitArgs() { expectedImplicitArgs.globalSizeX = expectedImplicitArgs.localSizeX * expectedImplicitArgs.groupCountX; expectedImplicitArgs.globalSizeY = expectedImplicitArgs.localSizeY * expectedImplicitArgs.groupCountY; expectedImplicitArgs.globalSizeZ = expectedImplicitArgs.localSizeZ * expectedImplicitArgs.groupCountZ; std::unique_ptr mockKernelImmData = std::make_unique(0u); auto kernelDescriptor = mockKernelImmData->kernelDescriptor; UnitTestHelper::adjustKernelDescriptorForImplicitArgs(*kernelDescriptor); kernelDescriptor->kernelAttributes.simdSize = expectedImplicitArgs.simdWidth; kernelDescriptor->kernelAttributes.workgroupDimensionsOrder[0] = workgroupDimOrder[0]; kernelDescriptor->kernelAttributes.workgroupDimensionsOrder[1] = workgroupDimOrder[1]; kernelDescriptor->kernelAttributes.workgroupDimensionsOrder[2] = workgroupDimOrder[2]; createModuleFromBinary(0u, false, mockKernelImmData.get()); auto kernel = std::make_unique(module.get()); ze_kernel_desc_t kernelDesc{ZE_STRUCTURE_TYPE_KERNEL_DESC}; kernel->initialize(&kernelDesc); kernel->kernelRequiresGenerationOfLocalIdsByRuntime = kernelRequiresGenerationOfLocalIdsByRuntime; kernel->requiredWorkgroupOrder = requiredWorkgroupOrder; kernel->setCrossThreadData(sizeof(uint64_t)); EXPECT_TRUE(kernel->getKernelDescriptor().kernelAttributes.flags.requiresImplicitArgs); ASSERT_NE(nullptr, kernel->getImplicitArgs()); kernel->setGroupSize(expectedImplicitArgs.localSizeX, expectedImplicitArgs.localSizeY, expectedImplicitArgs.localSizeZ); kernel->setGlobalOffsetExp(static_cast(expectedImplicitArgs.globalOffsetX), static_cast(expectedImplicitArgs.globalOffsetY), static_cast(expectedImplicitArgs.globalOffsetZ)); kernel->patchGlobalOffset(); ze_result_t result{}; commandList.reset(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto indirectHeap = commandList->commandContainer.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT); indirectHeapAllocation = indirectHeap->getGraphicsAllocation(); ze_group_count_t groupCount{expectedImplicitArgs.groupCountX, expectedImplicitArgs.groupCountY, expectedImplicitArgs.groupCountZ}; result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); implicitArgsProgrammingSize = ImplicitArgsHelper::getSizeForImplicitArgsPatching(&expectedImplicitArgs, *kernelDescriptor, neoDevice->getHardwareInfo()); auto sizeCrossThreadData = kernel->getCrossThreadDataSize(); auto sizePerThreadDataForWholeGroup = kernel->getPerThreadDataSizeForWholeThreadGroup(); EXPECT_EQ(indirectHeap->getUsed(), sizeCrossThreadData + sizePerThreadDataForWholeGroup + implicitArgsProgrammingSize); if (FamilyType::supportsCmdSet(IGFX_XE_HP_CORE)) { expectedImplicitArgs.localIdTablePtr = indirectHeapAllocation->getGpuAddress(); } expectedImplicitArgs.printfBufferPtr = kernel->getPrintfBufferAllocation()->getGpuAddress(); } std::unique_ptr commandList; GraphicsAllocation *indirectHeapAllocation = nullptr; ImplicitArgs expectedImplicitArgs = {sizeof(ImplicitArgs)}; std::array workgroupDimOrder{0, 1, 2}; uint32_t implicitArgsProgrammingSize = 0u; bool kernelRequiresGenerationOfLocalIdsByRuntime = true; uint32_t requiredWorkgroupOrder = 0; }; HWCMDTEST_F(IGFX_XE_HP_CORE, CmdlistAppendLaunchKernelWithImplicitArgsTests, givenXeHpAndLaterPlatformWhenAppendLaunchKernelWithImplicitArgsThenImplicitArgsAreSentToIndirectHeapWithLocalIds) { std::array localSize{2, 3, 4}; size_t totalLocalSize = localSize[0] * localSize[1] * localSize[2]; expectedImplicitArgs.localSizeX = localSize[0]; expectedImplicitArgs.localSizeY = localSize[1]; expectedImplicitArgs.localSizeZ = localSize[2]; dispatchKernelWithImplicitArgs(); auto grfSize = ImplicitArgsHelper::getGrfSize(expectedImplicitArgs.simdWidth, sizeof(typename FamilyType::GRF)); auto expectedLocalIds = alignedMalloc(implicitArgsProgrammingSize - sizeof(ImplicitArgs), MemoryConstants::cacheLineSize); generateLocalIDs(expectedLocalIds, expectedImplicitArgs.simdWidth, localSize, workgroupDimOrder, false, grfSize); auto localIdsProgrammingSize = implicitArgsProgrammingSize - sizeof(ImplicitArgs); size_t sizeForLocalIds = NEO::PerThreadDataHelper::getPerThreadDataSizeTotal(expectedImplicitArgs.simdWidth, grfSize, 3u, totalLocalSize); EXPECT_EQ(0, memcmp(expectedLocalIds, indirectHeapAllocation->getUnderlyingBuffer(), sizeForLocalIds)); alignedFree(expectedLocalIds); auto implicitArgsInIndirectData = ptrOffset(indirectHeapAllocation->getUnderlyingBuffer(), localIdsProgrammingSize); EXPECT_EQ(0, memcmp(implicitArgsInIndirectData, &expectedImplicitArgs, sizeof(ImplicitArgs))); } HWCMDTEST_F(IGFX_GEN8_CORE, CmdlistAppendLaunchKernelWithImplicitArgsTests, givenPreXeHpPlatformWhenAppendLaunchKernelWithImplicitArgsThenImplicitArgsAreSentToIndirectHeapWithoutLocalIds) { dispatchKernelWithImplicitArgs(); auto implicitArgsInIndirectData = indirectHeapAllocation->getUnderlyingBuffer(); EXPECT_EQ(0, memcmp(implicitArgsInIndirectData, &expectedImplicitArgs, sizeof(ImplicitArgs))); auto crossThreadDataInIndirectData = ptrOffset(indirectHeapAllocation->getUnderlyingBuffer(), 0x80); auto programmedImplicitArgsGpuVA = reinterpret_cast(crossThreadDataInIndirectData)[0]; EXPECT_EQ(indirectHeapAllocation->getGpuAddress(), programmedImplicitArgsGpuVA); } HWCMDTEST_F(IGFX_XE_HP_CORE, CmdlistAppendLaunchKernelWithImplicitArgsTests, givenXeHpAndLaterPlatformAndHwGeneratedLocalIdsWhenAppendLaunchKernelWithImplicitArgsThenImplicitArgsLocalIdsRespectWalkOrder) { workgroupDimOrder[0] = 2; workgroupDimOrder[1] = 1; workgroupDimOrder[2] = 0; kernelRequiresGenerationOfLocalIdsByRuntime = false; requiredWorkgroupOrder = 2; // walk order 1 0 2 std::array expectedDimOrder = {1, 0, 2}; std::array localSize{2, 3, 4}; size_t totalLocalSize = localSize[0] * localSize[1] * localSize[2]; expectedImplicitArgs.localSizeX = localSize[0]; expectedImplicitArgs.localSizeY = localSize[1]; expectedImplicitArgs.localSizeZ = localSize[2]; dispatchKernelWithImplicitArgs(); auto grfSize = ImplicitArgsHelper::getGrfSize(expectedImplicitArgs.simdWidth, sizeof(typename FamilyType::GRF)); auto expectedLocalIds = alignedMalloc(implicitArgsProgrammingSize - sizeof(ImplicitArgs), MemoryConstants::cacheLineSize); generateLocalIDs(expectedLocalIds, expectedImplicitArgs.simdWidth, localSize, expectedDimOrder, false, grfSize); auto localIdsProgrammingSize = implicitArgsProgrammingSize - sizeof(ImplicitArgs); size_t sizeForLocalIds = NEO::PerThreadDataHelper::getPerThreadDataSizeTotal(expectedImplicitArgs.simdWidth, grfSize, 3u, totalLocalSize); EXPECT_EQ(0, memcmp(expectedLocalIds, indirectHeapAllocation->getUnderlyingBuffer(), sizeForLocalIds)); alignedFree(expectedLocalIds); auto implicitArgsInIndirectData = ptrOffset(indirectHeapAllocation->getUnderlyingBuffer(), localIdsProgrammingSize); EXPECT_EQ(0, memcmp(implicitArgsInIndirectData, &expectedImplicitArgs, sizeof(ImplicitArgs))); } HWCMDTEST_F(IGFX_XE_HP_CORE, CmdlistAppendLaunchKernelWithImplicitArgsTests, givenXeHpAndLaterPlatformWhenAppendLaunchKernelWithImplicitArgsAndSimd1ThenLocalIdsAreGeneratedCorrectly) { workgroupDimOrder[0] = 2; workgroupDimOrder[1] = 1; workgroupDimOrder[2] = 0; expectedImplicitArgs.simdWidth = 1; expectedImplicitArgs.localSizeX = 2; expectedImplicitArgs.localSizeY = 2; expectedImplicitArgs.localSizeZ = 1; dispatchKernelWithImplicitArgs(); uint16_t expectedLocalIds[][3] = {{0, 0, 0}, {0, 1, 0}, {0, 0, 1}, {0, 1, 1}}; EXPECT_EQ(0, memcmp(expectedLocalIds, indirectHeapAllocation->getUnderlyingBuffer(), sizeof(expectedLocalIds))); auto localIdsProgrammingSize = implicitArgsProgrammingSize - sizeof(ImplicitArgs); EXPECT_EQ(alignUp(sizeof(expectedLocalIds), MemoryConstants::cacheLineSize), localIdsProgrammingSize); auto implicitArgsInIndirectData = ptrOffset(indirectHeapAllocation->getUnderlyingBuffer(), localIdsProgrammingSize); EXPECT_EQ(0, memcmp(implicitArgsInIndirectData, &expectedImplicitArgs, sizeof(ImplicitArgs))); } HWTEST_F(CmdlistAppendLaunchKernelTests, givenKernelWithoutImplicitArgsWhenAppendLaunchKernelThenImplicitArgsAreNotSentToIndirectHeap) { std::unique_ptr mockKernelImmData = std::make_unique(0u); auto kernelDescriptor = mockKernelImmData->kernelDescriptor; kernelDescriptor->kernelAttributes.flags.requiresImplicitArgs = false; createModuleFromBinary(0u, false, mockKernelImmData.get()); auto kernel = std::make_unique(module.get()); ze_kernel_desc_t kernelDesc{ZE_STRUCTURE_TYPE_KERNEL_DESC}; kernel->initialize(&kernelDesc); EXPECT_FALSE(kernel->getKernelDescriptor().kernelAttributes.flags.requiresImplicitArgs); EXPECT_EQ(nullptr, kernel->getImplicitArgs()); kernel->setGroupSize(4, 5, 6); kernel->setGroupCount(3, 2, 1); kernel->setGlobalOffsetExp(1, 2, 3); kernel->patchGlobalOffset(); ze_result_t result{}; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); ze_group_count_t groupCount = {3, 2, 1}; result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto indirectHeap = commandList->commandContainer.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT); auto sizeCrossThreadData = kernel->getCrossThreadDataSize(); auto sizePerThreadDataForWholeGroup = kernel->getPerThreadDataSizeForWholeThreadGroup(); EXPECT_EQ(indirectHeap->getUsed(), sizeCrossThreadData + sizePerThreadDataForWholeGroup); } HWTEST2_F(CmdlistAppendLaunchKernelTests, givenKernelWitchScratchAndPrivateWhenAppendLaunchKernelThenCmdListHasCorrectPrivateAndScratchSizesSet, IsAtLeastXeHpCore) { std::unique_ptr mockKernelImmData = std::make_unique(0u); auto kernelDescriptor = mockKernelImmData->kernelDescriptor; kernelDescriptor->kernelAttributes.flags.requiresImplicitArgs = false; kernelDescriptor->kernelAttributes.perThreadScratchSize[0] = 0x200; kernelDescriptor->kernelAttributes.perThreadScratchSize[1] = 0x100; createModuleFromBinary(0u, false, mockKernelImmData.get()); auto kernel = std::make_unique(module.get()); ze_kernel_desc_t kernelDesc{ZE_STRUCTURE_TYPE_KERNEL_DESC}; kernel->initialize(&kernelDesc); EXPECT_FALSE(kernel->getKernelDescriptor().kernelAttributes.flags.requiresImplicitArgs); EXPECT_EQ(nullptr, kernel->getImplicitArgs()); kernel->setGroupSize(4, 5, 6); kernel->setGroupCount(3, 2, 1); kernel->setGlobalOffsetExp(1, 2, 3); kernel->patchGlobalOffset(); ze_result_t result{}; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); ze_group_count_t groupCount = {3, 2, 1}; result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(commandList->getCommandListPerThreadPrivateScratchSize(), static_cast(0x100)); EXPECT_EQ(commandList->getCommandListPerThreadScratchSize(), static_cast(0x200)); } HWTEST_F(CmdlistAppendLaunchKernelTests, whenEncodingWorkDimForIndirectDispatchThenSizeIsProperlyEstimated) { Mock<::L0::Kernel> kernel; ze_result_t returnValue; std::unique_ptr commandList(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); { uint32_t groupSize[] = {1, 1, 1}; auto estimate = EncodeIndirectParams::getCmdsSizeForSetWorkDimIndirect(groupSize, false); auto sizeBefore = commandList->commandContainer.getCommandStream()->getUsed(); EncodeIndirectParams::setWorkDimIndirect(commandList->commandContainer, 0x4, 0u, groupSize); auto sizeAfter = commandList->commandContainer.getCommandStream()->getUsed(); EXPECT_LE(sizeAfter - sizeBefore, estimate); } { uint32_t groupSize[] = {1, 1, 2}; auto estimate = EncodeIndirectParams::getCmdsSizeForSetWorkDimIndirect(groupSize, false); auto sizeBefore = commandList->commandContainer.getCommandStream()->getUsed(); EncodeIndirectParams::setWorkDimIndirect(commandList->commandContainer, 0x4, 0u, groupSize); auto sizeAfter = commandList->commandContainer.getCommandStream()->getUsed(); EXPECT_LE(sizeAfter - sizeBefore, estimate); } { uint32_t groupSize[] = {1, 1, 1}; auto estimate = EncodeIndirectParams::getCmdsSizeForSetWorkDimIndirect(groupSize, true); auto sizeBefore = commandList->commandContainer.getCommandStream()->getUsed(); EncodeIndirectParams::setWorkDimIndirect(commandList->commandContainer, 0x2, 0u, groupSize); auto sizeAfter = commandList->commandContainer.getCommandStream()->getUsed(); EXPECT_LE(sizeAfter - sizeBefore, estimate); } { uint32_t groupSize[] = {1, 1, 2}; auto estimate = EncodeIndirectParams::getCmdsSizeForSetWorkDimIndirect(groupSize, true); auto sizeBefore = commandList->commandContainer.getCommandStream()->getUsed(); EncodeIndirectParams::setWorkDimIndirect(commandList->commandContainer, 0x2, 0u, groupSize); auto sizeAfter = commandList->commandContainer.getCommandStream()->getUsed(); EXPECT_LE(sizeAfter - sizeBefore, estimate); } } using CommandListAppendLaunchKernel = Test; HWTEST2_F(CommandListAppendLaunchKernel, givenCooperativeAndNonCooperativeKernelsWhenAppendLaunchCooperativeKernelIsCalledThenReturnError, IsAtLeastSkl) { Mock<::L0::Kernel> kernel; auto pMockModule = std::unique_ptr(new Mock(device, nullptr)); kernel.module = pMockModule.get(); kernel.setGroupSize(4, 1, 1); ze_group_count_t groupCount{8, 1, 1}; auto pCommandList = std::make_unique>>(); pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); bool isCooperative = false; auto result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, nullptr, false, false, isCooperative); EXPECT_EQ(ZE_RESULT_SUCCESS, result); isCooperative = true; result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, nullptr, false, false, isCooperative); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); pCommandList = std::make_unique>>(); pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); isCooperative = true; result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, nullptr, false, false, isCooperative); EXPECT_EQ(ZE_RESULT_SUCCESS, result); isCooperative = false; result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, nullptr, false, false, isCooperative); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); } HWTEST2_F(CommandListAppendLaunchKernel, GivenDebugToggleSetWhenUpdateStreamPropertiesIsCalledThenCorrectThreadArbitrationPolicyIsSet, IsAtLeastSkl) { DebugManagerStateRestore restorer; auto &hwHelper = NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily); auto defaultThreadArbitrationPolicy = hwHelper.getDefaultThreadArbitrationPolicy(); auto nonDefaultThreadArbitrationPolicy = defaultThreadArbitrationPolicy + 1; Mock<::L0::Kernel> kernel; auto pMockModule = std::unique_ptr(new Mock(device, nullptr)); kernel.module = pMockModule.get(); auto pCommandList = std::make_unique>>(); auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); ASSERT_EQ(ZE_RESULT_SUCCESS, result); // initial kernel with no policy preference pCommandList->updateStreamProperties(kernel, false, false); EXPECT_EQ(defaultThreadArbitrationPolicy, pCommandList->finalStreamState.stateComputeMode.threadArbitrationPolicy.value); // policy changed to non-default state pCommandList->finalStreamState.stateComputeMode.threadArbitrationPolicy.value = nonDefaultThreadArbitrationPolicy; // another kernel with no policy preference - do not update policy pCommandList->updateStreamProperties(kernel, false, false); EXPECT_EQ(nonDefaultThreadArbitrationPolicy, pCommandList->finalStreamState.stateComputeMode.threadArbitrationPolicy.value); // another kernel with no policy preference, this time with debug toggle set - update policy back to default value DebugManager.flags.ForceDefaultThreadArbitrationPolicyIfNotSpecified.set(true); pCommandList->updateStreamProperties(kernel, false, false); EXPECT_EQ(defaultThreadArbitrationPolicy, pCommandList->finalStreamState.stateComputeMode.threadArbitrationPolicy.value); } struct MultiTileCommandListAppendLaunchFunctionXeHpCoreFixture : public MultiDeviceModuleFixture { void SetUp() { DebugManager.flags.EnableImplicitScaling.set(1); MultiDeviceFixture::numRootDevices = 1u; MultiDeviceFixture::numSubDevices = 4u; MultiDeviceModuleFixture::SetUp(); createModuleFromBinary(0u); createKernel(0u); device = driverHandle->devices[0]; ze_context_handle_t hContext; ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; ze_result_t res = device->getDriverHandle()->createContext(&desc, 0u, nullptr, &hContext); EXPECT_EQ(ZE_RESULT_SUCCESS, res); contextImp = static_cast(Context::fromHandle(hContext)); ze_result_t returnValue; commandList = whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); } void TearDown() { commandList->destroy(); contextImp->destroy(); MultiDeviceModuleFixture::TearDown(); } ContextImp *contextImp = nullptr; WhiteBox<::L0::CommandList> *commandList = nullptr; L0::Device *device = nullptr; VariableBackup backup{&NEO::ImplicitScaling::apiSupport, true}; }; using MultiTileCommandListAppendLaunchFunctionXeHpCoreTest = Test; HWCMDTEST_F(IGFX_XE_HP_CORE, MultiTileCommandListAppendLaunchFunctionXeHpCoreTest, givenImplicitScalingEnabledWhenAppendingKernelWithEventThenAllEventPacketsAreUsed) { ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.stype = ZE_STRUCTURE_TYPE_EVENT_POOL_DESC; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; eventPoolDesc.count = 1; ze_event_desc_t eventDesc = {}; eventDesc.stype = ZE_STRUCTURE_TYPE_EVENT_DESC; eventDesc.index = 0; auto deviceHandle = device->toHandle(); ze_result_t result = ZE_RESULT_SUCCESS; std::unique_ptr eventPool(EventPool::create(device->getDriverHandle(), context, 1, &deviceHandle, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); std::unique_ptr event(Event::create(eventPool.get(), &eventDesc, device)); ze_event_handle_t hEventHandle = event->toHandle(); EXPECT_EQ(4u, commandList->partitionCount); ze_group_count_t groupCount{256, 1, 1}; result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, hEventHandle, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(4u, event->getPacketsInUse()); EXPECT_EQ(4u, commandList->partitionCount); } HWTEST2_F(MultiTileCommandListAppendLaunchFunctionXeHpCoreTest, givenCooperativeKernelWhenAppendingKernelsThenDoNotUseImplicitScaling, IsAtLeastXeHpCore) { ze_group_count_t groupCount{1, 1, 1}; auto commandListWithNonCooperativeKernel = std::make_unique>>(); auto result = commandListWithNonCooperativeKernel->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto sizeBefore = commandListWithNonCooperativeKernel->commandContainer.getCommandStream()->getUsed(); result = commandListWithNonCooperativeKernel->appendLaunchKernelWithParams(kernel->toHandle(), &groupCount, nullptr, false, false, false); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto sizeAfter = commandListWithNonCooperativeKernel->commandContainer.getCommandStream()->getUsed(); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandListWithNonCooperativeKernel->commandContainer.getCommandStream()->getCpuBase(), sizeBefore), sizeAfter - sizeBefore)); auto itorWalker = find(cmdList.begin(), cmdList.end()); auto cmd = genCmdCast(*itorWalker); EXPECT_TRUE(cmd->getWorkloadPartitionEnable()); auto commandListWithCooperativeKernel = std::make_unique>>(); result = commandListWithCooperativeKernel->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); ASSERT_EQ(ZE_RESULT_SUCCESS, result); sizeBefore = commandListWithCooperativeKernel->commandContainer.getCommandStream()->getUsed(); result = commandListWithCooperativeKernel->appendLaunchKernelWithParams(kernel->toHandle(), &groupCount, nullptr, false, false, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); sizeAfter = commandListWithCooperativeKernel->commandContainer.getCommandStream()->getUsed(); cmdList.clear(); ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandListWithNonCooperativeKernel->commandContainer.getCommandStream()->getCpuBase(), sizeBefore), sizeAfter - sizeBefore)); itorWalker = find(cmdList.begin(), cmdList.end()); cmd = genCmdCast(*itorWalker); EXPECT_TRUE(cmd->getWorkloadPartitionEnable()); } } // namespace ult } // namespace L0 test_cmdlist_append_memory.cpp000066400000000000000000000476021422164147700350250ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/cmdlist/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h" namespace L0 { namespace ult { using AppendMemoryCopy = Test; template class MockAppendMemoryCopy : public WhiteBox<::L0::CommandListCoreFamily> { public: ADDMETHOD_NOBASE(appendMemoryCopyKernelWithGA, ze_result_t, ZE_RESULT_SUCCESS, (void *dstPtr, NEO::GraphicsAllocation *dstPtrAlloc, uint64_t dstOffset, void *srcPtr, NEO::GraphicsAllocation *srcPtrAlloc, uint64_t srcOffset, uint64_t size, uint64_t elementSize, Builtin builtin, ze_event_handle_t hSignalEvent, bool isStateless)); ADDMETHOD_NOBASE(appendMemoryCopyBlit, ze_result_t, ZE_RESULT_SUCCESS, (uintptr_t dstPtr, NEO::GraphicsAllocation *dstPtrAlloc, uint64_t dstOffset, uintptr_t srcPtr, NEO::GraphicsAllocation *srcPtrAlloc, uint64_t srcOffset, uint64_t size)); AlignedAllocationData getAlignedAllocation(L0::Device *device, const void *buffer, uint64_t bufferSize, bool allowHostCopy) override { return L0::CommandListCoreFamily::getAlignedAllocation(device, buffer, bufferSize, allowHostCopy); } ze_result_t appendMemoryCopyKernel2d(AlignedAllocationData *dstAlignedAllocation, AlignedAllocationData *srcAlignedAllocation, Builtin builtin, const ze_copy_region_t *dstRegion, uint32_t dstPitch, size_t dstOffset, const ze_copy_region_t *srcRegion, uint32_t srcPitch, size_t srcOffset, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override { srcAlignedPtr = srcAlignedAllocation->alignedAllocationPtr; dstAlignedPtr = dstAlignedAllocation->alignedAllocationPtr; return L0::CommandListCoreFamily::appendMemoryCopyKernel2d(dstAlignedAllocation, srcAlignedAllocation, builtin, dstRegion, dstPitch, dstOffset, srcRegion, srcPitch, srcOffset, hSignalEvent, numWaitEvents, phWaitEvents); } ze_result_t appendMemoryCopyKernel3d(AlignedAllocationData *dstAlignedAllocation, AlignedAllocationData *srcAlignedAllocation, Builtin builtin, const ze_copy_region_t *dstRegion, uint32_t dstPitch, uint32_t dstSlicePitch, size_t dstOffset, const ze_copy_region_t *srcRegion, uint32_t srcPitch, uint32_t srcSlicePitch, size_t srcOffset, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override { srcAlignedPtr = srcAlignedAllocation->alignedAllocationPtr; dstAlignedPtr = dstAlignedAllocation->alignedAllocationPtr; return L0::CommandListCoreFamily::appendMemoryCopyKernel3d(dstAlignedAllocation, srcAlignedAllocation, builtin, dstRegion, dstPitch, dstSlicePitch, dstOffset, srcRegion, srcPitch, srcSlicePitch, srcOffset, hSignalEvent, numWaitEvents, phWaitEvents); } ze_result_t appendMemoryCopyBlitRegion(NEO::GraphicsAllocation *srcAllocation, NEO::GraphicsAllocation *dstAllocation, size_t srcOffset, size_t dstOffset, ze_copy_region_t srcRegion, ze_copy_region_t dstRegion, const Vec3 ©Size, size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, const Vec3 &srcSize, const Vec3 &dstSize, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override { srcBlitCopyRegionOffset = srcOffset; dstBlitCopyRegionOffset = dstOffset; return L0::CommandListCoreFamily::appendMemoryCopyBlitRegion(srcAllocation, dstAllocation, srcOffset, dstOffset, srcRegion, dstRegion, copySize, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, srcSize, dstSize, hSignalEvent, numWaitEvents, phWaitEvents); } uintptr_t srcAlignedPtr; uintptr_t dstAlignedPtr; size_t srcBlitCopyRegionOffset = 0; size_t dstBlitCopyRegionOffset = 0; }; HWTEST2_F(AppendMemoryCopy, givenCommandListAndHostPointersWhenMemoryCopyRegionCalledThenTwoNewAllocationAreAddedToHostMapPtr, IsAtLeastSkl) { MockAppendMemoryCopy cmdList; cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x2345); ze_copy_region_t dstRegion = {4, 4, 4, 2, 2, 2}; ze_copy_region_t srcRegion = {4, 4, 4, 2, 2, 2}; cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr); EXPECT_EQ(cmdList.hostPtrMap.size(), 2u); } HWTEST2_F(AppendMemoryCopy, givenCommandListAndUnalignedHostPointersWhenMemoryCopyRegion2DCalledThenSrcDstPointersArePageAligned, IsAtLeastSkl) { MockAppendMemoryCopy cmdList; cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x2345); ze_copy_region_t dstRegion = {4, 4, 0, 2, 2, 0}; ze_copy_region_t srcRegion = {4, 4, 0, 2, 2, 0}; cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr); auto sshAlignmentMask = NEO::EncodeSurfaceState::getSurfaceBaseAddressAlignmentMask(); EXPECT_TRUE(cmdList.srcAlignedPtr == (cmdList.srcAlignedPtr & sshAlignmentMask)); EXPECT_TRUE(cmdList.dstAlignedPtr == (cmdList.dstAlignedPtr & sshAlignmentMask)); } HWTEST2_F(AppendMemoryCopy, givenCommandListAndUnalignedHostPointersWhenMemoryCopyRegion3DCalledThenSrcDstPointersArePageAligned, IsAtLeastSkl) { MockAppendMemoryCopy cmdList; cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x2345); ze_copy_region_t dstRegion = {4, 4, 4, 2, 2, 2}; ze_copy_region_t srcRegion = {4, 4, 4, 2, 2, 2}; cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr); auto sshAlignmentMask = NEO::EncodeSurfaceState::getSurfaceBaseAddressAlignmentMask(); EXPECT_TRUE(cmdList.srcAlignedPtr == (cmdList.srcAlignedPtr & sshAlignmentMask)); EXPECT_TRUE(cmdList.dstAlignedPtr == (cmdList.dstAlignedPtr & sshAlignmentMask)); } HWTEST2_F(AppendMemoryCopy, givenCommandListAndUnalignedHostPointersWhenBlitMemoryCopyRegion2DCalledThenSrcDstNotZeroOffsetsArePassed, IsAtLeastSkl) { MockAppendMemoryCopy cmdList; cmdList.initialize(device, NEO::EngineGroupType::Copy, 0u); void *srcPtr = reinterpret_cast(0x1233); void *dstPtr = reinterpret_cast(0x2345); ze_copy_region_t dstRegion = {4, 4, 0, 2, 2, 0}; ze_copy_region_t srcRegion = {4, 4, 0, 2, 2, 0}; cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr); EXPECT_GT(cmdList.srcBlitCopyRegionOffset, 0u); EXPECT_GT(cmdList.dstBlitCopyRegionOffset, 0u); } HWTEST2_F(AppendMemoryCopy, givenCommandListAndUnalignedHostPointersWhenBlitMemoryCopyRegion3DCalledThenSrcDstNotZeroOffsetsArePassed, IsAtLeastSkl) { MockAppendMemoryCopy cmdList; cmdList.initialize(device, NEO::EngineGroupType::Copy, 0u); void *srcPtr = reinterpret_cast(0x1233); void *dstPtr = reinterpret_cast(0x2345); ze_copy_region_t dstRegion = {4, 4, 4, 2, 2, 2}; ze_copy_region_t srcRegion = {4, 4, 4, 2, 2, 2}; cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr); EXPECT_GT(cmdList.srcBlitCopyRegionOffset, 0u); EXPECT_GT(cmdList.dstBlitCopyRegionOffset, 0u); } HWTEST2_F(AppendMemoryCopy, givenCommandListAndAlignedHostPointersWhenBlitMemoryCopyRegion3DCalledThenSrcDstZeroOffsetsArePassed, IsAtLeastSkl) { MockAppendMemoryCopy cmdList; cmdList.initialize(device, NEO::EngineGroupType::Copy, 0u); void *srcPtr = alignDown(reinterpret_cast(0x1233), NEO::EncodeSurfaceState::getSurfaceBaseAddressAlignment()); void *dstPtr = alignDown(reinterpret_cast(0x2345), NEO::EncodeSurfaceState::getSurfaceBaseAddressAlignment()); ze_copy_region_t dstRegion = {4, 4, 4, 2, 2, 2}; ze_copy_region_t srcRegion = {4, 4, 4, 2, 2, 2}; cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr); EXPECT_EQ(cmdList.srcBlitCopyRegionOffset, 0u); EXPECT_EQ(cmdList.dstBlitCopyRegionOffset, 0u); } HWTEST2_F(AppendMemoryCopy, givenCommandListAndHostPointersWhenMemoryCopyRegionCalledThenPipeControlWithDcFlushAdded, IsAtLeastSkl) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; MockAppendMemoryCopy cmdList; cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x2345); ze_copy_region_t dstRegion = {4, 4, 4, 2, 2, 2}; ze_copy_region_t srcRegion = {4, 4, 4, 2, 2, 2}; cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr); auto &commandContainer = cmdList.commandContainer; GenCmdList genCmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( genCmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); auto itor = find(genCmdList.begin(), genCmdList.end()); ASSERT_NE(genCmdList.end(), itor); PIPE_CONTROL *cmd = nullptr; while (itor != genCmdList.end()) { cmd = genCmdCast(*itor); itor = find(++itor, genCmdList.end()); } EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), cmd->getDcFlushEnable()); } HWTEST2_F(AppendMemoryCopy, givenImmediateCommandListWhenAppendingMemoryCopyThenSuccessIsReturned, IsAtLeastSkl) { Mock cmdQueue; void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x2345); auto commandList = std::make_unique>>(); ASSERT_NE(nullptr, commandList); ze_result_t ret = commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); ASSERT_EQ(ZE_RESULT_SUCCESS, ret); commandList->device = device; commandList->cmdQImmediate = &cmdQueue; commandList->cmdListType = CommandList::CommandListType::TYPE_IMMEDIATE; auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(1u, cmdQueue.executeCommandListsCalled); EXPECT_EQ(1u, cmdQueue.synchronizeCalled); commandList->cmdQImmediate = nullptr; } HWTEST2_F(AppendMemoryCopy, givenImmediateCommandListWhenAppendingMemoryCopyWithInvalidEventThenInvalidArgumentErrorIsReturned, IsAtLeastSkl) { Mock cmdQueue; void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x2345); auto commandList = std::make_unique>>(); ASSERT_NE(nullptr, commandList); ze_result_t ret = commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); ASSERT_EQ(ZE_RESULT_SUCCESS, ret); commandList->device = device; commandList->cmdQImmediate = &cmdQueue; commandList->cmdListType = CommandList::CommandListType::TYPE_IMMEDIATE; auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 1, nullptr); ASSERT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); commandList->cmdQImmediate = nullptr; } HWTEST2_F(AppendMemoryCopy, givenCommandListAndHostPointersWhenMemoryCopyCalledThenPipeControlWithDcFlushAdded, IsAtLeastSkl) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; MockAppendMemoryCopy cmdList; cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x2345); cmdList.appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr); auto &commandContainer = cmdList.commandContainer; GenCmdList genCmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( genCmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); auto itor = find(genCmdList.begin(), genCmdList.end()); ASSERT_NE(genCmdList.end(), itor); PIPE_CONTROL *cmd = nullptr; while (itor != genCmdList.end()) { cmd = genCmdCast(*itor); itor = find(++itor, genCmdList.end()); } EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), cmd->getDcFlushEnable()); } HWTEST2_F(AppendMemoryCopy, givenCopyCommandListWhenTimestampPassedToMemoryCopyThenAppendProfilingCalledOnceBeforeAndAfterCommand, IsAtLeastSkl) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; using MI_FLUSH_DW = typename GfxFamily::MI_FLUSH_DW; MockAppendMemoryCopy commandList; commandList.initialize(device, NEO::EngineGroupType::Copy, 0u); void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x2345); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; eventPoolDesc.count = 1; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = 0; eventDesc.wait = 0; ze_result_t result = ZE_RESULT_SUCCESS; auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100, event->toHandle(), 0, nullptr); EXPECT_GT(commandList.appendMemoryCopyBlitCalled, 1u); EXPECT_EQ(1u, event->getPacketsInUse()); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList.commandContainer.getCommandStream()->getCpuBase(), 0), commandList.commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); auto cmd = genCmdCast(*itor); EXPECT_EQ(cmd->getRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW); itor++; EXPECT_NE(cmdList.end(), itor); cmd = genCmdCast(*itor); EXPECT_EQ(cmd->getRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW); itor = find(itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); itor = find(itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); cmd = genCmdCast(*itor); EXPECT_EQ(cmd->getRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW); itor++; EXPECT_NE(cmdList.end(), itor); cmd = genCmdCast(*itor); EXPECT_EQ(cmd->getRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW); itor++; EXPECT_EQ(cmdList.end(), itor); } using SupportedPlatforms = IsWithinProducts; HWTEST2_F(AppendMemoryCopy, givenCommandListWhenTimestampPassedToMemoryCopyThenAppendProfilingCalledOnceBeforeAndAfterCommand, SupportedPlatforms) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using MI_LOAD_REGISTER_REG = typename GfxFamily::MI_LOAD_REGISTER_REG; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; MockAppendMemoryCopy commandList; commandList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x2345); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; ze_result_t result = ZE_RESULT_SUCCESS; auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100, event->toHandle(), 0, nullptr); EXPECT_GT(commandList.appendMemoryCopyKernelWithGACalled, 0u); EXPECT_EQ(commandList.appendMemoryCopyBlitCalled, 0u); EXPECT_EQ(1u, event->getPacketsInUse()); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList.commandContainer.getCommandStream()->getCpuBase(), 0), commandList.commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); { auto cmd = genCmdCast(*itor); EXPECT_EQ(cmd->getSourceRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW); } itor++; itor = find(itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); { auto cmd = genCmdCast(*itor); EXPECT_EQ(cmd->getSourceRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW); } itor++; itor = find(itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); { auto cmd = genCmdCast(*itor); EXPECT_FALSE(cmd->getDcFlushEnable()); } itor++; itor = find(itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); { auto cmd = genCmdCast(*itor); EXPECT_EQ(cmd->getSourceRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW); } itor++; itor = find(itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); { auto cmd = genCmdCast(*itor); EXPECT_EQ(cmd->getSourceRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW); } auto temp = itor; auto numPCs = findAll(temp, cmdList.end()); //we should have only one PC with dcFlush added ASSERT_EQ(1u, numPCs.size()); itor = find(itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); { auto cmd = genCmdCast(*itor); EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), cmd->getDcFlushEnable()); } } } // namespace ult } // namespace L0 test_cmdlist_append_multipartition_prologue.cpp000066400000000000000000000131351422164147700405070ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/cmdlist/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/implicit_scaling.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" namespace L0 { namespace ult { using MultiPartitionPrologueTest = Test; HWTEST2_F(MultiPartitionPrologueTest, whenAppendMultiPartitionPrologueIsCalledThenCommandListIsUpdated, IsAtLeastXeHpCore) { using MI_LOAD_REGISTER_MEM = typename FamilyType::MI_LOAD_REGISTER_MEM; using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Compute, 0u, returnValue)); auto &commandContainer = commandList->commandContainer; ASSERT_NE(nullptr, commandContainer.getCommandStream()); auto usedSpaceBefore = commandContainer.getCommandStream()->getUsed(); CommandListImp *cmdListImp = static_cast(commandList.get()); uint32_t dataPartitionSize = 16; cmdListImp->appendMultiPartitionPrologue(dataPartitionSize); auto usedSpaceAfter = commandContainer.getCommandStream()->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); auto itorLrm = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(cmdList.end(), itorLrm); auto itorLri = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itorLri); auto lriCmd = genCmdCast(*itorLri); EXPECT_EQ(NEO::PartitionRegisters::addressOffsetCCSOffset, static_cast(lriCmd->getRegisterOffset())); EXPECT_EQ(dataPartitionSize, static_cast(lriCmd->getDataDword())); EXPECT_EQ(true, lriCmd->getMmioRemapEnable()); auto result = commandList->close(); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } HWTEST2_F(MultiPartitionPrologueTest, whenAppendMultiPartitionPrologueIsCalledThenCommandListIsNotUpdated, IsAtMostGen12lp) { ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Compute, 0u, returnValue)); auto &commandContainer = commandList->commandContainer; ASSERT_NE(nullptr, commandContainer.getCommandStream()); auto usedSpaceBefore = commandContainer.getCommandStream()->getUsed(); CommandListImp *cmdListImp = static_cast(commandList.get()); uint32_t dataPartitionSize = 16; cmdListImp->appendMultiPartitionPrologue(dataPartitionSize); auto usedSpaceAfter = commandContainer.getCommandStream()->getUsed(); ASSERT_EQ(usedSpaceAfter, usedSpaceBefore); auto result = commandList->close(); ASSERT_EQ(ZE_RESULT_SUCCESS, result); } using MultiPartitionEpilogueTest = Test; HWTEST2_F(MultiPartitionEpilogueTest, whenAppendMultiPartitionEpilogueIsCalledThenCommandListIsUpdated, IsAtLeastXeHpCore) { using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Compute, 0u, returnValue)); auto &commandContainer = commandList->commandContainer; ASSERT_NE(nullptr, commandContainer.getCommandStream()); auto usedSpaceBefore = commandContainer.getCommandStream()->getUsed(); CommandListImp *cmdListImp = static_cast(commandList.get()); cmdListImp->appendMultiPartitionEpilogue(); auto usedSpaceAfter = commandContainer.getCommandStream()->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); auto itorLri = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itorLri); auto lriCmd = genCmdCast(*itorLri); EXPECT_EQ(NEO::PartitionRegisters::addressOffsetCCSOffset, static_cast(lriCmd->getRegisterOffset())); EXPECT_EQ(NEO::ImplicitScalingDispatch::getPostSyncOffset(), static_cast(lriCmd->getDataDword())); EXPECT_EQ(true, lriCmd->getMmioRemapEnable()); auto result = commandList->close(); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } HWTEST2_F(MultiPartitionEpilogueTest, whenAppendMultiPartitionPrologueIsCalledThenCommandListIsNotUpdated, IsAtMostGen12lp) { ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Compute, 0u, returnValue)); auto &commandContainer = commandList->commandContainer; ASSERT_NE(nullptr, commandContainer.getCommandStream()); auto usedSpaceBefore = commandContainer.getCommandStream()->getUsed(); CommandListImp *cmdListImp = static_cast(commandList.get()); cmdListImp->appendMultiPartitionEpilogue(); auto usedSpaceAfter = commandContainer.getCommandStream()->getUsed(); ASSERT_EQ(usedSpaceAfter, usedSpaceBefore); auto result = commandList->close(); ASSERT_EQ(ZE_RESULT_SUCCESS, result); } } // namespace ult } // namespace L0 test_cmdlist_append_signal_event.cpp000066400000000000000000000371631422164147700361740ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/cmdlist/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/command_encoder.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h" #include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h" namespace L0 { namespace ult { using CommandListAppendSignalEvent = Test; HWTEST_F(CommandListAppendSignalEvent, WhenAppendingSignalEventWithoutScopeThenMiStoreImmIsGenerated) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed(); auto result = commandList->appendSignalEvent(event->toHandle()); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); auto baseAddr = event->getGpuAddress(device); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); auto cmd = genCmdCast(*itor); EXPECT_EQ(cmd->getAddress(), baseAddr); } HWTEST_F(CommandListAppendSignalEvent, givenCmdlistWhenAppendingSignalEventThenEventPoolGraphicsAllocationIsAddedToResidencyContainer) { auto result = commandList->appendSignalEvent(event->toHandle()); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto &residencyContainer = commandList->commandContainer.getResidencyContainer(); auto eventPoolAlloc = &eventPool->getAllocation(); for (auto alloc : eventPoolAlloc->getGraphicsAllocations()) { auto itor = std::find(std::begin(residencyContainer), std::end(residencyContainer), alloc); EXPECT_NE(itor, std::end(residencyContainer)); } } HWTEST_F(CommandListAppendSignalEvent, givenEventWithScopeFlagDeviceWhenAppendingSignalEventThenPipeControlHasNoDcFlush) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE; ze_result_t result = ZE_RESULT_SUCCESS; auto eventPoolHostVisible = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto eventHostVisible = std::unique_ptr(Event::create(eventPoolHostVisible.get(), &eventDesc, device)); auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed(); result = commandList->appendSignalEvent(eventHostVisible->toHandle()); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); auto itorPC = findAll(cmdList.begin(), cmdList.end()); ASSERT_NE(0u, itorPC.size()); bool postSyncFound = false; for (auto it : itorPC) { auto cmd = genCmdCast(*it); if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { EXPECT_EQ(cmd->getImmediateData(), Event::STATE_SIGNALED); EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), cmd->getDcFlushEnable()); postSyncFound = true; } } ASSERT_TRUE(postSyncFound); } HWTEST2_F(CommandListAppendSignalEvent, givenCommandListWhenAppendWriteGlobalTimestampCalledWithSignalEventThenPipeControlForTimestampAndSignalEncoded, IsAtLeastSkl) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; auto &commandContainer = commandList->commandContainer; uint64_t timestampAddress = 0x12345678555500; uint64_t *dstptr = reinterpret_cast(timestampAddress); commandList->appendWriteGlobalTimestamp(dstptr, event->toHandle(), 0, nullptr); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); auto itorPC = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itorPC); auto cmd = genCmdCast(*itorPC); while (cmd->getPostSyncOperation() != POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_TIMESTAMP) { itorPC++; itorPC = find(itorPC, cmdList.end()); EXPECT_NE(cmdList.end(), itorPC); cmd = genCmdCast(*itorPC); } EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); EXPECT_FALSE(cmd->getDcFlushEnable()); EXPECT_EQ(timestampAddress, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*cmd)); itorPC++; itorPC = find(itorPC, cmdList.end()); EXPECT_NE(cmdList.end(), itorPC); cmd = genCmdCast(*itorPC); while (cmd->getPostSyncOperation() != POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { itorPC++; itorPC = find(itorPC, cmdList.end()); EXPECT_NE(cmdList.end(), itorPC); cmd = genCmdCast(*itorPC); } EXPECT_EQ(cmd->getImmediateData(), Event::STATE_SIGNALED); EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); EXPECT_FALSE(cmd->getDcFlushEnable()); } HWTEST2_F(CommandListAppendSignalEvent, givenTimestampEventUsedInSignalThenPipeControlAppendedCorrectly, IsAtLeastSkl) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; auto &commandContainer = commandList->commandContainer; ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; ze_result_t result = ZE_RESULT_SUCCESS; auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); commandList->appendSignalEvent(event->toHandle()); auto contextOffset = event->getContextEndOffset(); auto baseAddr = event->getGpuAddress(device); auto gpuAddress = ptrOffset(baseAddr, contextOffset); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); auto itorPC = findAll(cmdList.begin(), cmdList.end()); ASSERT_NE(0u, itorPC.size()); bool postSyncFound = false; for (auto it : itorPC) { auto cmd = genCmdCast(*it); if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { EXPECT_EQ(cmd->getImmediateData(), Event::STATE_SIGNALED); EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); EXPECT_EQ(gpuAddress, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*cmd)); EXPECT_FALSE(cmd->getDcFlushEnable()); postSyncFound = true; } } ASSERT_TRUE(postSyncFound); } HWTEST2_F(CommandListAppendSignalEvent, givenMultiTileCommandListWhenAppendingScopeEventSignalThenExpectPartitionedPipeControl, IsAtLeastXeHpCore) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; auto cmdStream = commandList->commandContainer.getCommandStream(); size_t useSize = cmdStream->getAvailableSpace(); useSize -= sizeof(MI_BATCH_BUFFER_END); cmdStream->getSpace(useSize); constexpr uint32_t packets = 2u; event->setEventTimestampFlag(false); event->signalScope = ZE_EVENT_SCOPE_FLAG_HOST; commandList->partitionCount = packets; ze_result_t returnValue = commandList->appendSignalEvent(event->toHandle()); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); EXPECT_EQ(packets, event->getPacketsInUse()); auto gpuAddress = event->getGpuAddress(device) + event->getContextEndOffset(); auto &hwInfo = device->getNEODevice()->getHardwareInfo(); size_t expectedSize = NEO::MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo); size_t usedSize = cmdStream->getUsed(); EXPECT_EQ(expectedSize, usedSize); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, cmdStream->getCpuBase(), usedSize)); auto pipeControlList = findAll(cmdList.begin(), cmdList.end()); ASSERT_NE(0u, pipeControlList.size()); uint32_t postSyncFound = 0; for (auto &it : pipeControlList) { auto cmd = genCmdCast(*it); if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { EXPECT_EQ(Event::STATE_SIGNALED, cmd->getImmediateData()); EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); EXPECT_EQ(gpuAddress, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*cmd)); EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), cmd->getDcFlushEnable()); EXPECT_TRUE(cmd->getWorkloadPartitionIdOffsetEnable()); postSyncFound++; gpuAddress += event->getSinglePacketSize(); } } EXPECT_EQ(1u, postSyncFound); } HWTEST2_F(CommandListAppendSignalEvent, givenMultiTileCommandListWhenAppendingNonScopeEventSignalThenExpectPartitionedStoreDataImm, IsAtLeastXeHpCore) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; auto cmdStream = commandList->commandContainer.getCommandStream(); size_t useSize = cmdStream->getAvailableSpace(); useSize -= sizeof(MI_BATCH_BUFFER_END); cmdStream->getSpace(useSize); constexpr uint32_t packets = 2u; event->setEventTimestampFlag(false); event->signalScope = 0; commandList->partitionCount = packets; ze_result_t returnValue = commandList->appendSignalEvent(event->toHandle()); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); EXPECT_EQ(packets, event->getPacketsInUse()); auto gpuAddress = event->getGpuAddress(device) + event->getContextEndOffset(); size_t expectedSize = NEO::EncodeStoreMemory::getStoreDataImmSize(); size_t usedSize = cmdStream->getUsed(); EXPECT_EQ(expectedSize, usedSize); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, cmdStream->getCpuBase(), usedSize)); auto storeDataImmList = findAll(cmdList.begin(), cmdList.end()); ASSERT_NE(0u, storeDataImmList.size()); uint32_t postSyncFound = 0; for (auto &it : storeDataImmList) { auto cmd = genCmdCast(*it); EXPECT_EQ(gpuAddress, cmd->getAddress()); EXPECT_FALSE(cmd->getStoreQword()); EXPECT_EQ(Event::STATE_SIGNALED, cmd->getDataDword0()); EXPECT_EQ(0u, cmd->getDataDword1()); EXPECT_EQ(MI_STORE_DATA_IMM::DWORD_LENGTH::DWORD_LENGTH_STORE_DWORD, cmd->getDwordLength()); EXPECT_TRUE(cmd->getWorkloadPartitionIdOffsetEnable()); postSyncFound++; gpuAddress += event->getSinglePacketSize(); } EXPECT_EQ(1u, postSyncFound); } HWTEST2_F(CommandListAppendSignalEvent, givenMultiTileCommandListWhenAppendingScopeEventSignalAfterWalkerThenExpectPartitionedPipeControl, IsAtLeastXeHpCore) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily>(); ASSERT_NE(nullptr, commandList); ze_result_t returnValue = commandList->initialize(device, NEO::EngineGroupType::Compute, 0u); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); auto cmdStream = commandList->commandContainer.getCommandStream(); size_t useSize = cmdStream->getAvailableSpace(); useSize -= sizeof(MI_BATCH_BUFFER_END); cmdStream->getSpace(useSize); constexpr uint32_t packets = 2u; event->setEventTimestampFlag(false); event->signalScope = ZE_EVENT_SCOPE_FLAG_HOST; commandList->partitionCount = packets; commandList->appendSignalEventPostWalker(event->toHandle()); EXPECT_EQ(packets, event->getPacketsInUse()); auto gpuAddress = event->getGpuAddress(device) + event->getContextEndOffset(); auto &hwInfo = device->getNEODevice()->getHardwareInfo(); size_t expectedSize = NEO::MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo); size_t usedSize = cmdStream->getUsed(); EXPECT_EQ(expectedSize, usedSize); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, cmdStream->getCpuBase(), usedSize)); auto pipeControlList = findAll(cmdList.begin(), cmdList.end()); ASSERT_NE(0u, pipeControlList.size()); uint32_t postSyncFound = 0; for (auto &it : pipeControlList) { auto cmd = genCmdCast(*it); if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { EXPECT_EQ(Event::STATE_SIGNALED, cmd->getImmediateData()); EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); EXPECT_EQ(gpuAddress, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*cmd)); EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), cmd->getDcFlushEnable()); EXPECT_TRUE(cmd->getWorkloadPartitionIdOffsetEnable()); postSyncFound++; gpuAddress += event->getSinglePacketSize(); } } EXPECT_EQ(1u, postSyncFound); } } // namespace ult } // namespace L0 test_cmdlist_append_wait_on_events.cpp000066400000000000000000000445321422164147700365400ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/cmdlist/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/command_encoder.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h" #include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h" namespace L0 { namespace ult { using CommandListAppendWaitOnEvent = Test; HWTEST_F(CommandListAppendWaitOnEvent, WhenAppendingWaitOnEventThenSemaphoreWaitCmdIsGenerated) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed(); ze_event_handle_t hEventHandle = event->toHandle(); auto result = commandList->appendWaitOnEvents(1, &hEventHandle); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); auto itor = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itor); { auto cmd = genCmdCast(*itor); EXPECT_EQ(cmd->getCompareOperation(), MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD); EXPECT_EQ(static_cast(-1), cmd->getSemaphoreDataDword()); auto addressSpace = device->getHwInfo().capabilityTable.gpuAddressSpace; EXPECT_EQ(cmd->getSemaphoreGraphicsAddress() & addressSpace, event->getGpuAddress(device) & addressSpace); EXPECT_EQ(cmd->getWaitMode(), MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE); } } HWTEST_F(CommandListAppendWaitOnEvent, givenTwoEventsWhenWaitOnEventsAppendedThenTwoSemaphoreWaitCmdsAreGenerated) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed(); ze_event_handle_t handles[2] = {event->toHandle(), event->toHandle()}; auto result = commandList->appendWaitOnEvents(2, handles); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); auto itor = findAll(cmdList.begin(), cmdList.end()); ASSERT_EQ(2u, itor.size()); for (int i = 0; i < 2; i++) { auto cmd = genCmdCast(*itor[i]); EXPECT_EQ(cmd->getCompareOperation(), MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD); EXPECT_EQ(static_cast(-1), cmd->getSemaphoreDataDword()); auto addressSpace = device->getHwInfo().capabilityTable.gpuAddressSpace; EXPECT_EQ(cmd->getSemaphoreGraphicsAddress() & addressSpace, event->getGpuAddress(device) & addressSpace); EXPECT_EQ(cmd->getWaitMode(), MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE); } } HWTEST_F(CommandListAppendWaitOnEvent, WhenAppendingWaitOnEventsThenEventGraphicsAllocationIsAddedToResidencyContainer) { ze_event_handle_t hEventHandle = event->toHandle(); auto result = commandList->appendWaitOnEvents(1, &hEventHandle); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto &residencyContainer = commandList->commandContainer.getResidencyContainer(); auto eventPoolAlloc = &eventPool->getAllocation(); for (auto alloc : eventPoolAlloc->getGraphicsAllocations()) { auto itor = std::find(std::begin(residencyContainer), std::end(residencyContainer), alloc); EXPECT_NE(itor, std::end(residencyContainer)); } } HWTEST_F(CommandListAppendWaitOnEvent, givenEventWithWaitScopeFlagDeviceWhenAppendingWaitOnEventThenPCWithDcFlushIsGenerated) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed(); const ze_event_desc_t eventDesc = { ZE_STRUCTURE_TYPE_EVENT_DESC, nullptr, 0, 0, ZE_EVENT_SCOPE_FLAG_DEVICE}; auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); ze_event_handle_t hEventHandle = event->toHandle(); auto result = commandList->appendWaitOnEvents(1, &hEventHandle); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); auto itorPC = findAll(cmdList.begin(), cmdList.end()).back(); ASSERT_NE(cmdList.end(), itorPC); { auto cmd = genCmdCast(*itorPC); ASSERT_NE(cmd, nullptr); EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), cmd->getDcFlushEnable()); } } HWTEST_F(CommandListAppendWaitOnEvent, WhenAppendingWaitOnTimestampEventWithThreePacketsThenSemaphoreWaitCmdIsGeneratedThreeTimes) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed(); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; ze_result_t result = ZE_RESULT_SUCCESS; std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); event->setPacketsInUse(3u); ze_event_handle_t hEventHandle = event->toHandle(); result = commandList->appendWaitOnEvents(1, &hEventHandle); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); auto gpuAddress = event->getGpuAddress(device) + event->getContextEndOffset(); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); auto itorSW = findAll(cmdList.begin(), cmdList.end()); ASSERT_NE(0u, itorSW.size()); uint32_t semaphoreWaitsFound = 0; for (auto it : itorSW) { auto cmd = genCmdCast(*it); auto addressSpace = device->getHwInfo().capabilityTable.gpuAddressSpace; EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, cmd->getCompareOperation()); EXPECT_EQ(cmd->getSemaphoreDataDword(), static_cast(-1)); EXPECT_EQ(gpuAddress & addressSpace, cmd->getSemaphoreGraphicsAddress() & addressSpace); EXPECT_EQ(MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE, cmd->getWaitMode()); semaphoreWaitsFound++; gpuAddress += event->getSinglePacketSize(); } ASSERT_EQ(3u, semaphoreWaitsFound); } HWTEST_F(CommandListAppendWaitOnEvent, WhenAppendingWaitOnTimestampEventWithThreeKernelsThenSemaphoreWaitCmdIsGeneratedCorrectly) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed(); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; ze_result_t result = ZE_RESULT_SUCCESS; std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); event->setPacketsInUse(3u); event->kernelCount = 2; event->setPacketsInUse(3u); event->kernelCount = 3; event->setPacketsInUse(3u); ASSERT_EQ(9u, event->getPacketsInUse()); ze_event_handle_t hEventHandle = event->toHandle(); result = commandList->appendWaitOnEvents(1, &hEventHandle); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); auto gpuAddress = event->getGpuAddress(device) + event->getContextEndOffset(); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); auto itorSW = findAll(cmdList.begin(), cmdList.end()); ASSERT_NE(0u, itorSW.size()); uint32_t semaphoreWaitsFound = 0; for (auto it : itorSW) { auto cmd = genCmdCast(*it); auto addressSpace = device->getHwInfo().capabilityTable.gpuAddressSpace; EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, cmd->getCompareOperation()); EXPECT_EQ(cmd->getSemaphoreDataDword(), static_cast(-1)); EXPECT_EQ(gpuAddress & addressSpace, cmd->getSemaphoreGraphicsAddress() & addressSpace); EXPECT_EQ(MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE, cmd->getWaitMode()); semaphoreWaitsFound++; gpuAddress += event->getSinglePacketSize(); } ASSERT_EQ(9u, semaphoreWaitsFound); } HWTEST2_F(CommandListAppendWaitOnEvent, givenCommandListWhenAppendWriteGlobalTimestampCalledWithWaitOnEventsThenSemaphoreWaitAndPipeControlForTimestampEncoded, IsAtLeastSkl) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; uint64_t timestampAddress = 0x12345678555500; uint64_t *dstptr = reinterpret_cast(timestampAddress); ze_event_handle_t hEventHandle = event->toHandle(); commandList->appendWriteGlobalTimestamp(dstptr, nullptr, 1, &hEventHandle); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itor); auto cmd = genCmdCast(*itor); EXPECT_EQ(cmd->getCompareOperation(), MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD); EXPECT_EQ(static_cast(-1), cmd->getSemaphoreDataDword()); auto addressSpace = device->getHwInfo().capabilityTable.gpuAddressSpace; EXPECT_EQ(cmd->getSemaphoreGraphicsAddress() & addressSpace, event->getGpuAddress(device) & addressSpace); EXPECT_EQ(cmd->getWaitMode(), MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE); itor++; auto itorPC = findAll(itor, cmdList.end()); ASSERT_NE(0u, itorPC.size()); bool postSyncFound = false; for (auto it : itorPC) { auto cmdPC = genCmdCast(*it); if (cmdPC->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_TIMESTAMP) { EXPECT_TRUE(cmdPC->getCommandStreamerStallEnable()); EXPECT_FALSE(cmdPC->getDcFlushEnable()); EXPECT_EQ(timestampAddress, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*cmdPC)); postSyncFound = true; } } ASSERT_TRUE(postSyncFound); } HWTEST_F(CommandListAppendWaitOnEvent, givenCommandBufferIsEmptyWhenAppendingWaitOnEventThenAllocateNewCommandBuffer) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; auto consumeSpace = commandList->commandContainer.getCommandStream()->getAvailableSpace(); consumeSpace -= sizeof(MI_BATCH_BUFFER_END); commandList->commandContainer.getCommandStream()->getSpace(consumeSpace); size_t expectedConsumedSpace = sizeof(MI_SEMAPHORE_WAIT); if (MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo)) { expectedConsumedSpace += sizeof(PIPE_CONTROL); } const ze_event_desc_t eventDesc = { ZE_STRUCTURE_TYPE_EVENT_DESC, nullptr, 0, 0, ZE_EVENT_SCOPE_FLAG_DEVICE}; auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); ze_event_handle_t hEventHandle = event->toHandle(); auto oldCommandBuffer = commandList->commandContainer.getCommandStream()->getGraphicsAllocation(); auto result = commandList->appendWaitOnEvents(1, &hEventHandle); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); auto newCommandBuffer = commandList->commandContainer.getCommandStream()->getGraphicsAllocation(); EXPECT_EQ(expectedConsumedSpace, usedSpaceAfter); EXPECT_NE(oldCommandBuffer, newCommandBuffer); auto gpuAddress = event->getGpuAddress(device); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, commandList->commandContainer.getCommandStream()->getCpuBase(), usedSpaceAfter)); auto itorPC = find(cmdList.begin(), cmdList.end()); if (MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo)) { ASSERT_NE(cmdList.end(), itorPC); { auto cmd = genCmdCast(*itorPC); ASSERT_NE(cmd, nullptr); EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), cmd->getDcFlushEnable()); } } else { EXPECT_EQ(cmdList.end(), itorPC); } auto itorSW = findAll(cmdList.begin(), cmdList.end()); ASSERT_NE(0u, itorSW.size()); uint32_t semaphoreWaitsFound = 0; for (auto it : itorSW) { auto cmd = genCmdCast(*it); auto addressSpace = device->getHwInfo().capabilityTable.gpuAddressSpace; EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, cmd->getCompareOperation()); EXPECT_EQ(cmd->getSemaphoreDataDword(), std::numeric_limits::max()); EXPECT_EQ(gpuAddress & addressSpace, cmd->getSemaphoreGraphicsAddress() & addressSpace); EXPECT_EQ(MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE, cmd->getWaitMode()); semaphoreWaitsFound++; gpuAddress += event->getSinglePacketSize(); } EXPECT_EQ(1u, semaphoreWaitsFound); } using MultTileCommandListAppendWaitOnEvent = Test>; HWTEST2_F(MultTileCommandListAppendWaitOnEvent, GivenMultiTileCmdListWhenPartitionedEventUsedToWaitThenExpectProperGpuAddressAndSemaphoreCount, IsAtLeastXeHpCore) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; size_t expectedSize = commandList->partitionCount * sizeof(MI_SEMAPHORE_WAIT); event->setPacketsInUse(commandList->partitionCount); event->setPartitionedEvent(true); ze_event_handle_t eventHandle = event->toHandle(); auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed(); auto result = commandList->appendWaitOnEvents(1, &eventHandle); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); EXPECT_EQ(expectedSize, (usedSpaceAfter - usedSpaceBefore)); auto gpuAddress = event->getGpuAddress(device) + event->getContextEndOffset(); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), usedSpaceBefore), expectedSize)); auto itorSW = findAll(cmdList.begin(), cmdList.end()); ASSERT_NE(0u, itorSW.size()); uint32_t semaphoreWaitsFound = 0; for (auto it : itorSW) { auto cmd = genCmdCast(*it); EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, cmd->getCompareOperation()); EXPECT_EQ(cmd->getSemaphoreDataDword(), std::numeric_limits::max()); EXPECT_EQ(gpuAddress, cmd->getSemaphoreGraphicsAddress()); EXPECT_EQ(MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE, cmd->getWaitMode()); semaphoreWaitsFound++; gpuAddress += event->getSinglePacketSize(); } EXPECT_EQ(2u, semaphoreWaitsFound); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_blit.cpp000066400000000000000000000415121422164147700331710ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/register_offsets.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/builtin/builtin_functions_lib_impl.h" #include "level_zero/core/source/image/image_hw.h" #include "level_zero/core/source/kernel/kernel_imp.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" namespace L0 { namespace ult { template class MockCommandListForMemFill : public WhiteBox<::L0::CommandListCoreFamily> { public: MockCommandListForMemFill() : WhiteBox<::L0::CommandListCoreFamily>() {} AlignedAllocationData getAlignedAllocation(L0::Device *device, const void *buffer, uint64_t bufferSize, bool allowHostCopy) override { return {0, 0, nullptr, true}; } ze_result_t appendMemoryCopyBlit(uintptr_t dstPtr, NEO::GraphicsAllocation *dstPtrAlloc, uint64_t dstOffset, uintptr_t srcPtr, NEO::GraphicsAllocation *srcPtrAlloc, uint64_t srcOffset, uint64_t size) override { appendMemoryCopyBlitCalledTimes++; return ZE_RESULT_SUCCESS; } uint32_t appendMemoryCopyBlitCalledTimes = 0; }; class MockDriverHandle : public L0::DriverHandleImp { public: bool findAllocationDataForRange(const void *buffer, size_t size, NEO::SvmAllocationData **allocData) override { mockAllocation.reset(new NEO::MockGraphicsAllocation(rootDeviceIndex, NEO::AllocationType::INTERNAL_HOST_MEMORY, reinterpret_cast(0x1234), 0x1000, 0, sizeof(uint32_t), MemoryPool::System4KBPages)); data.gpuAllocations.addAllocation(mockAllocation.get()); if (allocData) { *allocData = &data; } return true; } const uint32_t rootDeviceIndex = 0u; std::unique_ptr mockAllocation; NEO::SvmAllocationData data{rootDeviceIndex}; }; using AppendMemoryCopy = Test; HWTEST2_F(AppendMemoryCopy, givenCopyOnlyCommandListWhenAppenBlitFillCalledWithLargePatternSizeThenMemCopyWasCalled, IsAtLeastSkl) { MockCommandListForMemFill cmdList; cmdList.initialize(device, NEO::EngineGroupType::Copy, 0u); uint64_t pattern[4] = {1, 2, 3, 4}; void *ptr = reinterpret_cast(0x1234); auto ret = cmdList.appendMemoryFill(ptr, reinterpret_cast(&pattern), sizeof(pattern), 0x1000, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_SIZE, ret); } HWTEST2_F(AppendMemoryCopy, givenCopyOnlyCommandListWhenAppenBlitFillToNotDeviceMemThenInvalidArgumentReturned, IsAtLeastSkl) { MockCommandListForMemFill cmdList; cmdList.initialize(device, NEO::EngineGroupType::Copy, 0u); uint8_t pattern = 1; void *ptr = reinterpret_cast(0x1234); auto ret = cmdList.appendMemoryFill(ptr, reinterpret_cast(&pattern), sizeof(pattern), 0x1000, nullptr, 0, nullptr); EXPECT_EQ(ret, ZE_RESULT_ERROR_INVALID_ARGUMENT); } using MemFillPlatforms = IsWithinProducts; HWTEST2_F(AppendMemoryCopy, givenCopyOnlyCommandListWhenAppenBlitFillThenCopyBltIsProgrammed, MemFillPlatforms) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using XY_COLOR_BLT = typename GfxFamily::XY_COLOR_BLT; MockCommandListForMemFill commandList; MockDriverHandle driverHandleMock; NEO::DeviceVector neoDevices; neoDevices.push_back(std::unique_ptr(neoDevice)); driverHandleMock.initialize(std::move(neoDevices)); device->setDriverHandle(&driverHandleMock); commandList.initialize(device, NEO::EngineGroupType::Copy, 0u); uint16_t pattern = 1; void *ptr = reinterpret_cast(0x1234); commandList.appendMemoryFill(ptr, reinterpret_cast(&pattern), sizeof(pattern), 0x1000, nullptr, 0, nullptr); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList.commandContainer.getCommandStream()->getCpuBase(), 0), commandList.commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); device->setDriverHandle(driverHandle.get()); } HWTEST2_F(AppendMemoryCopy, givenCopyOnlyCommandListAndHostPointersWhenMemoryCopyCalledThenPipeControlWithDcFlushAddedIsNotAddedAfterBlitCopy, IsAtLeastSkl) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using XY_COPY_BLT = typename GfxFamily::XY_COPY_BLT; auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x2345); commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr); auto &commandContainer = commandList->commandContainer; GenCmdList genCmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( genCmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); auto itor = find(genCmdList.begin(), genCmdList.end()); ASSERT_NE(genCmdList.end(), itor); itor = find(++itor, genCmdList.end()); EXPECT_EQ(genCmdList.end(), itor); } HWTEST2_F(AppendMemoryCopy, givenCopyOnlyCommandListAndHostPointersWhenMemoryCopyRegionCalledThenPipeControlWithDcFlushAddedIsNotAddedAfterBlitCopy, IsAtLeastSkl) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using XY_COPY_BLT = typename GfxFamily::XY_COPY_BLT; auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x2345); ze_copy_region_t dstRegion = {4, 4, 0, 2, 2, 1}; ze_copy_region_t srcRegion = {4, 4, 0, 2, 2, 1}; commandList->appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr); auto &commandContainer = commandList->commandContainer; GenCmdList genCmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( genCmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); auto itor = find(genCmdList.begin(), genCmdList.end()); ASSERT_NE(genCmdList.end(), itor); itor = find(++itor, genCmdList.end()); EXPECT_EQ(genCmdList.end(), itor); } HWTEST2_F(AppendMemoryCopy, givenCopyOnlyCommandListThenDcFlushIsNotAddedAfterBlitCopy, IsAtLeastSkl) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using XY_COPY_BLT = typename GfxFamily::XY_COPY_BLT; auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); uintptr_t srcPtr = 0x5001; uintptr_t dstPtr = 0x7001; uint64_t srcOffset = 0x101; uint64_t dstOffset = 0x201; uint64_t copySize = 0x301; NEO::MockGraphicsAllocation mockAllocationSrc(0, NEO::AllocationType::INTERNAL_HOST_MEMORY, reinterpret_cast(srcPtr), 0x1000, 0, sizeof(uint32_t), MemoryPool::System4KBPages); NEO::MockGraphicsAllocation mockAllocationDst(0, NEO::AllocationType::INTERNAL_HOST_MEMORY, reinterpret_cast(dstPtr), 0x1000, 0, sizeof(uint32_t), MemoryPool::System4KBPages); commandList->appendMemoryCopyBlit(ptrOffset(dstPtr, dstOffset), &mockAllocationDst, 0, ptrOffset(srcPtr, srcOffset), &mockAllocationSrc, 0, copySize); auto &commandContainer = commandList->commandContainer; GenCmdList genCmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( genCmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); auto itor = find(genCmdList.begin(), genCmdList.end()); ASSERT_NE(genCmdList.end(), itor); auto cmd = genCmdCast(*itor); EXPECT_EQ(cmd->getDestinationBaseAddress(), ptrOffset(dstPtr, dstOffset)); EXPECT_EQ(cmd->getSourceBaseAddress(), ptrOffset(srcPtr, srcOffset)); } HWTEST2_F(AppendMemoryCopy, givenCopyCommandListWhenTimestampPassedToMemoryCopyRegionBlitThenTimeStampRegistersAreAdded, IsAtLeastSkl) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; ze_result_t result = ZE_RESULT_SUCCESS; auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); ze_copy_region_t srcRegion = {4, 4, 4, 2, 2, 2}; ze_copy_region_t dstRegion = {4, 4, 4, 2, 2, 2}; NEO::MockGraphicsAllocation mockAllocationSrc(0, NEO::AllocationType::INTERNAL_HOST_MEMORY, reinterpret_cast(0x1234), 0x1000, 0, sizeof(uint32_t), MemoryPool::System4KBPages); NEO::MockGraphicsAllocation mockAllocationDst(0, NEO::AllocationType::INTERNAL_HOST_MEMORY, reinterpret_cast(0x1234), 0x1000, 0, sizeof(uint32_t), MemoryPool::System4KBPages); commandList->appendMemoryCopyBlitRegion(&mockAllocationDst, &mockAllocationSrc, 0, 0, srcRegion, dstRegion, {0, 0, 0}, 0, 0, 0, 0, 0, 0, event->toHandle(), 0, nullptr); GenCmdList cmdList; auto baseAddr = event->getGpuAddress(device); auto contextStartOffset = event->getContextStartOffset(); auto globalStartOffset = event->getGlobalStartOffset(); auto contextEndOffset = event->getContextEndOffset(); auto globalEndOffset = event->getGlobalEndOffset(); ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); auto cmd = genCmdCast(*itor); EXPECT_EQ(cmd->getRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW); EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, globalStartOffset)); itor++; EXPECT_NE(cmdList.end(), itor); cmd = genCmdCast(*itor); EXPECT_EQ(cmd->getRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW); EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, contextStartOffset)); itor++; itor = find(itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); cmd = genCmdCast(*itor); EXPECT_EQ(cmd->getRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW); EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, globalEndOffset)); itor++; EXPECT_NE(cmdList.end(), itor); cmd = genCmdCast(*itor); EXPECT_EQ(cmd->getRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW); EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, contextEndOffset)); itor++; EXPECT_EQ(cmdList.end(), itor); } HWTEST2_F(AppendMemoryCopy, givenCopyCommandListWhenTimestampPassedToImageCopyBlitThenTimeStampRegistersAreAdded, IsAtLeastSkl) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; ze_result_t result = ZE_RESULT_SUCCESS; auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); NEO::MockGraphicsAllocation mockAllocationSrc(0, NEO::AllocationType::INTERNAL_HOST_MEMORY, reinterpret_cast(0x1234), 0x1000, 0, sizeof(uint32_t), MemoryPool::System4KBPages); NEO::MockGraphicsAllocation mockAllocationDst(0, NEO::AllocationType::INTERNAL_HOST_MEMORY, reinterpret_cast(0x1234), 0x1000, 0, sizeof(uint32_t), MemoryPool::System4KBPages); commandList->appendCopyImageBlit(&mockAllocationDst, &mockAllocationSrc, {0, 0, 0}, {0, 0, 0}, 1, 1, 1, 1, 1, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, event->toHandle()); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); auto cmd = genCmdCast(*itor); EXPECT_EQ(cmd->getRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW); } using ImageSupport = IsWithinProducts; HWTEST2_F(AppendMemoryCopy, givenCopyCommandListWhenCopyFromImagBlitThenCommandAddedToStream, ImageSupport) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using XY_COPY_BLT = typename GfxFamily::XY_COPY_BLT; ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Copy, 0u, returnValue)); ze_image_desc_t zeDesc = {}; zeDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; auto imageHWSrc = std::make_unique>>(); auto imageHWDst = std::make_unique>>(); imageHWSrc->initialize(device, &zeDesc); imageHWDst->initialize(device, &zeDesc); commandList->appendImageCopyRegion(imageHWDst->toHandle(), imageHWSrc->toHandle(), nullptr, nullptr, nullptr, 0, nullptr); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); } using AppendMemoryCopyFromContext = AppendMemoryCopy; HWTEST2_F(AppendMemoryCopyFromContext, givenCommandListThenUpOnPerformingAppendMemoryCopyFromContextSuccessIsReturned, IsAtLeastSkl) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x2345); auto result = commandList->appendMemoryCopyFromContext(dstPtr, nullptr, srcPtr, 8, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill.cpp000066400000000000000000000230461422164147700331670ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/memory_manager.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/builtin/builtin_functions_lib_impl.h" #include "level_zero/core/source/kernel/kernel_imp.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_built_ins.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include namespace L0 { namespace ult { class AppendFillFixture : public DeviceFixture { public: class MockDriverFillHandle : public L0::DriverHandleImp { public: bool findAllocationDataForRange(const void *buffer, size_t size, NEO::SvmAllocationData **allocData) override { mockAllocation.reset(new NEO::MockGraphicsAllocation(const_cast(buffer), size)); data.gpuAllocations.addAllocation(mockAllocation.get()); if (allocData) { *allocData = &data; } return true; } const uint32_t rootDeviceIndex = 0u; std::unique_ptr mockAllocation; NEO::SvmAllocationData data{rootDeviceIndex}; }; template class MockCommandList : public WhiteBox<::L0::CommandListCoreFamily> { public: MockCommandList() : WhiteBox<::L0::CommandListCoreFamily>() {} ze_result_t appendLaunchKernelWithParams(ze_kernel_handle_t hKernel, const ze_group_count_t *pThreadGroupDimensions, ze_event_handle_t hEvent, bool isIndirect, bool isPredicate, bool isCooperative) override { if (numberOfCallsToAppendLaunchKernelWithParams == thresholdOfCallsToAppendLaunchKernelWithParamsToFail) { return ZE_RESULT_ERROR_UNKNOWN; } numberOfCallsToAppendLaunchKernelWithParams++; return CommandListCoreFamily::appendLaunchKernelWithParams(hKernel, pThreadGroupDimensions, hEvent, isIndirect, isPredicate, isCooperative); } uint32_t thresholdOfCallsToAppendLaunchKernelWithParamsToFail = std::numeric_limits::max(); uint32_t numberOfCallsToAppendLaunchKernelWithParams = 0; }; void SetUp() { dstPtr = new uint8_t[allocSize]; immediateDstPtr = new uint8_t[allocSize]; neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(NEO::defaultHwInfo.get()); auto mockBuiltIns = new MockBuiltins(); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->builtins.reset(mockBuiltIns); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); driverHandle = std::make_unique>(); driverHandle->initialize(std::move(devices)); device = driverHandle->devices[0]; } void TearDown() { delete[] immediateDstPtr; delete[] dstPtr; } std::unique_ptr> driverHandle; NEO::MockDevice *neoDevice = nullptr; L0::Device *device = nullptr; static constexpr size_t allocSize = 70; static constexpr size_t patternSize = 8; uint8_t *dstPtr = nullptr; uint8_t pattern[patternSize] = {1, 2, 3, 4}; static constexpr size_t immediateAllocSize = 106; uint8_t immediatePattern = 4; uint8_t *immediateDstPtr = nullptr; }; using AppendFillTest = Test; HWTEST2_F(AppendFillTest, givenCallToAppendMemoryFillWithImmediateValueThenSuccessIsReturned, IsAtLeastSkl) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); auto result = commandList->appendMemoryFill(immediateDstPtr, &immediatePattern, sizeof(immediatePattern), immediateAllocSize, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } HWTEST2_F(AppendFillTest, givenCallToAppendMemoryFillThenSuccessIsReturned, IsAtLeastSkl) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); auto result = commandList->appendMemoryFill(dstPtr, pattern, patternSize, allocSize, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } HWTEST2_F(AppendFillTest, givenCallToAppendMemoryFillWithAppendLaunchKernelFailureThenSuccessIsNotReturned, IsAtLeastSkl) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); commandList->thresholdOfCallsToAppendLaunchKernelWithParamsToFail = 0; auto result = commandList->appendMemoryFill(dstPtr, pattern, patternSize, allocSize, nullptr, 0, nullptr); EXPECT_NE(ZE_RESULT_SUCCESS, result); } HWTEST2_F(AppendFillTest, givenTwoCallsToAppendMemoryFillWithSamePatternThenAllocationIsCreatedForEachCall, IsAtLeastSkl) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); ze_result_t result = commandList->appendMemoryFill(dstPtr, pattern, 4, allocSize, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); size_t patternAllocationsVectorSize = commandList->patternAllocations.size(); EXPECT_EQ(patternAllocationsVectorSize, 1u); uint8_t *newDstPtr = new uint8_t[allocSize]; result = commandList->appendMemoryFill(newDstPtr, pattern, patternSize, allocSize, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); size_t newPatternAllocationsVectorSize = commandList->patternAllocations.size(); EXPECT_GT(newPatternAllocationsVectorSize, patternAllocationsVectorSize); delete[] newDstPtr; } HWTEST2_F(AppendFillTest, givenTwoCallsToAppendMemoryFillWithDifferentPatternsThenAllocationIsCreatedForEachPattern, IsAtLeastSkl) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); ze_result_t result = commandList->appendMemoryFill(dstPtr, pattern, 4, allocSize, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); size_t patternAllocationsVectorSize = commandList->patternAllocations.size(); EXPECT_EQ(patternAllocationsVectorSize, 1u); uint8_t newPattern[patternSize] = {1, 2, 3, 4}; result = commandList->appendMemoryFill(dstPtr, newPattern, patternSize, allocSize, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); size_t newPatternAllocationsVectorSize = commandList->patternAllocations.size(); EXPECT_EQ(patternAllocationsVectorSize + 1u, newPatternAllocationsVectorSize); } HWTEST2_F(AppendFillTest, givenCallToAppendMemoryFillWithSizeNotMultipleOfPatternSizeThenSuccessIsReturned, IsAtLeastSkl) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); size_t nonMultipleSize = allocSize + 1; uint8_t *nonMultipleDstPtr = new uint8_t[nonMultipleSize]; auto result = commandList->appendMemoryFill(nonMultipleDstPtr, pattern, 4, nonMultipleSize, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); delete[] nonMultipleDstPtr; } HWTEST2_F(AppendFillTest, givenCallToAppendMemoryFillWithSizeNotMultipleOfPatternSizeAndAppendLaunchKernelFailureOnRemainderThenSuccessIsNotReturned, IsAtLeastSkl) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); commandList->thresholdOfCallsToAppendLaunchKernelWithParamsToFail = 1; size_t nonMultipleSize = allocSize + 1; uint8_t *nonMultipleDstPtr = new uint8_t[nonMultipleSize]; auto result = commandList->appendMemoryFill(nonMultipleDstPtr, pattern, 4, nonMultipleSize, nullptr, 0, nullptr); EXPECT_NE(ZE_RESULT_SUCCESS, result); delete[] nonMultipleDstPtr; } } // namespace ult } // namespace L0 test_cmdlist_xehp_and_later.cpp000066400000000000000000000337301422164147700351400ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/cmdlist/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/preamble.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/cmdlist/cmdlist_hw.h" #include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/fixtures/module_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_module.h" #include "test_traits_common.h" namespace L0 { namespace ult { using CommandListTests = Test; HWCMDTEST_F(IGFX_XE_HP_CORE, CommandListTests, whenCommandListIsCreatedThenPCAndStateBaseAddressCmdsAreAddedAndCorrectlyProgrammed) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Compute, 0u, returnValue)); auto &commandContainer = commandList->commandContainer; auto gmmHelper = commandContainer.getDevice()->getGmmHelper(); ASSERT_NE(nullptr, commandContainer.getCommandStream()); auto usedSpaceBefore = commandContainer.getCommandStream()->getUsed(); auto result = commandList->close(); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandContainer.getCommandStream()->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); auto itorPc = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itorPc); auto cmdPc = genCmdCast(*itorPc); EXPECT_TRUE(UnitTestHelper::getPipeControlHdcPipelineFlush(*cmdPc)); EXPECT_TRUE(cmdPc->getTextureCacheInvalidationEnable()); EXPECT_TRUE(cmdPc->getCommandStreamerStallEnable()); auto itor = find(itorPc, cmdList.end()); ASSERT_NE(cmdList.end(), itor); auto cmdSba = genCmdCast(*itor); if constexpr (FamilyType::supportsSampler) { auto dsh = commandContainer.getIndirectHeap(NEO::HeapType::DYNAMIC_STATE); EXPECT_TRUE(cmdSba->getDynamicStateBaseAddressModifyEnable()); EXPECT_TRUE(cmdSba->getDynamicStateBufferSizeModifyEnable()); EXPECT_EQ(dsh->getHeapGpuBase(), cmdSba->getDynamicStateBaseAddress()); EXPECT_EQ(dsh->getHeapSizeInPages(), cmdSba->getDynamicStateBufferSize()); } else { EXPECT_FALSE(cmdSba->getDynamicStateBaseAddressModifyEnable()); EXPECT_FALSE(cmdSba->getDynamicStateBufferSizeModifyEnable()); } auto ssh = commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE); EXPECT_TRUE(cmdSba->getSurfaceStateBaseAddressModifyEnable()); EXPECT_EQ(ssh->getHeapGpuBase(), cmdSba->getSurfaceStateBaseAddress()); EXPECT_EQ(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST), cmdSba->getStatelessDataPortAccessMemoryObjectControlState()); EXPECT_TRUE(cmdSba->getDisableSupportForMultiGpuPartialWritesForStatelessMessages()); EXPECT_TRUE(cmdSba->getDisableSupportForMultiGpuAtomicsForStatelessAccesses()); } HWTEST2_F(CommandListTests, whenCommandListIsCreatedAndProgramExtendedPipeControlPriorToNonPipelinedStateCommandIsEnabledThenPCAndStateBaseAddressCmdsAreAddedAndCorrectlyProgrammed, IsAtLeastXeHpCore) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; DebugManagerStateRestore restorer; DebugManager.flags.ProgramExtendedPipeControlPriorToNonPipelinedStateCommand.set(1); ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Compute, 0u, returnValue)); auto &commandContainer = commandList->commandContainer; auto gmmHelper = commandContainer.getDevice()->getGmmHelper(); ASSERT_NE(nullptr, commandContainer.getCommandStream()); auto usedSpaceBefore = commandContainer.getCommandStream()->getUsed(); auto result = commandList->close(); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandContainer.getCommandStream()->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); auto itorPc = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itorPc); auto cmdPc = genCmdCast(*itorPc); EXPECT_TRUE(UnitTestHelper::getPipeControlHdcPipelineFlush(*cmdPc)); EXPECT_TRUE(cmdPc->getTextureCacheInvalidationEnable()); EXPECT_TRUE(cmdPc->getCommandStreamerStallEnable()); if constexpr (TestTraits::isPipeControlExtendedPriorToNonPipelinedStateCommandSupported) { EXPECT_TRUE(cmdPc->getAmfsFlushEnable()); EXPECT_TRUE(cmdPc->getInstructionCacheInvalidateEnable()); EXPECT_TRUE(cmdPc->getConstantCacheInvalidationEnable()); EXPECT_TRUE(cmdPc->getStateCacheInvalidationEnable()); if constexpr (TestTraits::isUnTypedDataPortCacheFlushSupported) { EXPECT_TRUE(cmdPc->getUnTypedDataPortCacheFlush()); } } auto itor = find(itorPc, cmdList.end()); ASSERT_NE(cmdList.end(), itor); auto cmdSba = genCmdCast(*itor); if constexpr (FamilyType::supportsSampler) { auto dsh = commandContainer.getIndirectHeap(NEO::HeapType::DYNAMIC_STATE); EXPECT_TRUE(cmdSba->getDynamicStateBaseAddressModifyEnable()); EXPECT_TRUE(cmdSba->getDynamicStateBufferSizeModifyEnable()); EXPECT_EQ(dsh->getHeapGpuBase(), cmdSba->getDynamicStateBaseAddress()); EXPECT_EQ(dsh->getHeapSizeInPages(), cmdSba->getDynamicStateBufferSize()); } else { EXPECT_FALSE(cmdSba->getDynamicStateBaseAddressModifyEnable()); EXPECT_FALSE(cmdSba->getDynamicStateBufferSizeModifyEnable()); } auto ssh = commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE); EXPECT_TRUE(cmdSba->getSurfaceStateBaseAddressModifyEnable()); EXPECT_EQ(ssh->getHeapGpuBase(), cmdSba->getSurfaceStateBaseAddress()); EXPECT_EQ(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST), cmdSba->getStatelessDataPortAccessMemoryObjectControlState()); EXPECT_TRUE(cmdSba->getDisableSupportForMultiGpuPartialWritesForStatelessMessages()); EXPECT_TRUE(cmdSba->getDisableSupportForMultiGpuAtomicsForStatelessAccesses()); } using MultiTileCommandListTests = Test>; HWCMDTEST_F(IGFX_XE_HP_CORE, MultiTileCommandListTests, givenPartitionedCommandListWhenCommandListIsCreatedThenStateBaseAddressCmdWithMultiPartialAndAtomicsCorrectlyProgrammed) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Compute, 0u, returnValue)); EXPECT_EQ(2u, commandList->partitionCount); auto &commandContainer = commandList->commandContainer; ASSERT_NE(nullptr, commandContainer.getCommandStream()); auto usedSpaceBefore = commandContainer.getCommandStream()->getUsed(); auto result = commandList->close(); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandContainer.getCommandStream()->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); auto itorSba = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itorSba); auto cmdSba = genCmdCast(*itorSba); EXPECT_FALSE(cmdSba->getDisableSupportForMultiGpuPartialWritesForStatelessMessages()); EXPECT_TRUE(cmdSba->getDisableSupportForMultiGpuAtomicsForStatelessAccesses()); } using CommandListTestsReserveSize = Test; HWTEST2_F(CommandListTestsReserveSize, givenCommandListWhenGetReserveSshSizeThen4PagesReturned, IsAtLeastXeHpCore) { L0::CommandListCoreFamily commandList(1u); EXPECT_EQ(commandList.getReserveSshSize(), 4 * MemoryConstants::pageSize); } using CommandListAppendLaunchKernel = Test; HWTEST2_F(CommandListAppendLaunchKernel, givenVariousKernelsWhenUpdateStreamPropertiesIsCalledThenRequiredStateFinalStateAndCommandsToPatchAreCorrectlySet, IsAtLeastXeHpCore) { DebugManagerStateRestore restorer; DebugManager.flags.AllowMixingRegularAndCooperativeKernels.set(1); DebugManager.flags.AllowPatchingVfeStateInCommandLists.set(1); Mock<::L0::Kernel> defaultKernel; auto pMockModule1 = std::unique_ptr(new Mock(device, nullptr)); defaultKernel.module = pMockModule1.get(); Mock<::L0::Kernel> cooperativeKernel; auto pMockModule2 = std::unique_ptr(new Mock(device, nullptr)); cooperativeKernel.module = pMockModule2.get(); cooperativeKernel.immutableData.kernelDescriptor->kernelAttributes.flags.usesSyncBuffer = true; auto pCommandList = std::make_unique>>(); auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); ASSERT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(-1, pCommandList->requiredStreamState.frontEndState.computeDispatchAllWalkerEnable.value); EXPECT_EQ(-1, pCommandList->finalStreamState.frontEndState.computeDispatchAllWalkerEnable.value); EXPECT_EQ(0u, pCommandList->commandsToPatch.size()); pCommandList->updateStreamProperties(defaultKernel, false, false); EXPECT_EQ(0, pCommandList->requiredStreamState.frontEndState.computeDispatchAllWalkerEnable.value); EXPECT_EQ(0, pCommandList->finalStreamState.frontEndState.computeDispatchAllWalkerEnable.value); EXPECT_EQ(0u, pCommandList->commandsToPatch.size()); pCommandList->reset(); pCommandList->updateStreamProperties(cooperativeKernel, false, true); pCommandList->updateStreamProperties(cooperativeKernel, false, true); EXPECT_EQ(1, pCommandList->requiredStreamState.frontEndState.computeDispatchAllWalkerEnable.value); EXPECT_EQ(1, pCommandList->finalStreamState.frontEndState.computeDispatchAllWalkerEnable.value); EXPECT_EQ(0u, pCommandList->commandsToPatch.size()); pCommandList->reset(); pCommandList->updateStreamProperties(defaultKernel, false, false); pCommandList->updateStreamProperties(cooperativeKernel, false, true); EXPECT_EQ(0, pCommandList->requiredStreamState.frontEndState.computeDispatchAllWalkerEnable.value); EXPECT_EQ(1, pCommandList->finalStreamState.frontEndState.computeDispatchAllWalkerEnable.value); EXPECT_EQ(1u, pCommandList->commandsToPatch.size()); pCommandList->reset(); pCommandList->updateStreamProperties(cooperativeKernel, false, true); pCommandList->updateStreamProperties(defaultKernel, false, false); pCommandList->updateStreamProperties(cooperativeKernel, false, true); EXPECT_EQ(1, pCommandList->requiredStreamState.frontEndState.computeDispatchAllWalkerEnable.value); EXPECT_EQ(1, pCommandList->finalStreamState.frontEndState.computeDispatchAllWalkerEnable.value); EXPECT_EQ(2u, pCommandList->commandsToPatch.size()); pCommandList->reset(); pCommandList->updateStreamProperties(defaultKernel, false, false); pCommandList->updateStreamProperties(defaultKernel, false, false); pCommandList->updateStreamProperties(cooperativeKernel, false, true); EXPECT_EQ(0, pCommandList->requiredStreamState.frontEndState.computeDispatchAllWalkerEnable.value); EXPECT_EQ(1, pCommandList->finalStreamState.frontEndState.computeDispatchAllWalkerEnable.value); EXPECT_EQ(1u, pCommandList->commandsToPatch.size()); pCommandList->reset(); EXPECT_EQ(-1, pCommandList->requiredStreamState.frontEndState.computeDispatchAllWalkerEnable.value); EXPECT_EQ(-1, pCommandList->finalStreamState.frontEndState.computeDispatchAllWalkerEnable.value); EXPECT_EQ(0u, pCommandList->commandsToPatch.size()); } HWTEST2_F(CommandListAppendLaunchKernel, givenVariousKernelsAndPatchingDisallowedWhenUpdateStreamPropertiesIsCalledThenCommandsToPatchAreEmpty, IsAtLeastXeHpCore) { DebugManagerStateRestore restorer; DebugManager.flags.AllowMixingRegularAndCooperativeKernels.set(1); Mock<::L0::Kernel> defaultKernel; auto pMockModule1 = std::unique_ptr(new Mock(device, nullptr)); defaultKernel.module = pMockModule1.get(); Mock<::L0::Kernel> cooperativeKernel; auto pMockModule2 = std::unique_ptr(new Mock(device, nullptr)); cooperativeKernel.module = pMockModule2.get(); cooperativeKernel.immutableData.kernelDescriptor->kernelAttributes.flags.usesSyncBuffer = true; auto pCommandList = std::make_unique>>(); auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); ASSERT_EQ(ZE_RESULT_SUCCESS, result); pCommandList->updateStreamProperties(defaultKernel, false, false); pCommandList->updateStreamProperties(cooperativeKernel, false, true); EXPECT_EQ(0u, pCommandList->commandsToPatch.size()); pCommandList->reset(); DebugManager.flags.AllowPatchingVfeStateInCommandLists.set(1); pCommandList->updateStreamProperties(defaultKernel, false, false); pCommandList->updateStreamProperties(cooperativeKernel, false, true); EXPECT_EQ(1u, pCommandList->commandsToPatch.size()); pCommandList->reset(); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/cmdqueue/000077500000000000000000000000001422164147700271235ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/cmdqueue/CMakeLists.txt000066400000000000000000000010331422164147700316600ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdqueue_1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdqueue_2.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdqueue_enqueue_cmdlist.cpp ) if(TESTS_DG2) target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdqueue_dg2.cpp ) endif() add_subdirectories() compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp000066400000000000000000003231141422164147700327220ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/state_base_address.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/mocks/mock_bindless_heaps_helper.h" #include "shared/test/common/mocks/mock_command_stream_receiver.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/fixtures/module_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h" #include "level_zero/core/test/unit_tests/mocks/mock_memory_manager.h" namespace L0 { namespace ult { using CommandQueueCreate = Test; TEST_F(CommandQueueCreate, whenCreatingCommandQueueThenItIsInitialized) { auto csr = std::unique_ptr(neoDevice->createCommandStreamReceiver()); csr->setupContext(*neoDevice->getDefaultEngine().osContext); const ze_command_queue_desc_t desc{}; ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, csr.get(), &desc, false, false, returnValue)); EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS); ASSERT_NE(nullptr, commandQueue); size_t commandStreamSize = MemoryConstants::kiloByte * 128u; ASSERT_NE(nullptr, commandQueue->commandStream); EXPECT_EQ(commandStreamSize, commandQueue->commandStream->getMaxAvailableSpace()); EXPECT_EQ(commandQueue->buffers.getCurrentBufferAllocation(), commandQueue->commandStream->getGraphicsAllocation()); EXPECT_LT(0u, commandQueue->commandStream->getAvailableSpace()); EXPECT_EQ(csr.get(), commandQueue->getCsr()); EXPECT_EQ(device, commandQueue->getDevice()); EXPECT_EQ(0u, commandQueue->getTaskCount()); EXPECT_NE(nullptr, commandQueue->buffers.getCurrentBufferAllocation()); size_t expectedCommandBufferAllocationSize = commandStreamSize + MemoryConstants::cacheLineSize + NEO::CSRequirements::csOverfetchSize; expectedCommandBufferAllocationSize = alignUp(expectedCommandBufferAllocationSize, MemoryConstants::pageSize64k); size_t actualCommandBufferSize = commandQueue->buffers.getCurrentBufferAllocation()->getUnderlyingBufferSize(); EXPECT_EQ(expectedCommandBufferAllocationSize, actualCommandBufferSize); returnValue = commandQueue->destroy(); EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS); } TEST_F(CommandQueueCreate, whenSynchronizeByPollingTaskCountThenCallsPrintOutputOnPrintfFunctionsStoredAndClearsFunctionContainer) { const ze_command_queue_desc_t desc{}; ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc, false, false, returnValue)); Mock kernel1, kernel2; commandQueue->printfFunctionContainer.push_back(&kernel1); commandQueue->printfFunctionContainer.push_back(&kernel2); commandQueue->synchronizeByPollingForTaskCount(0u); EXPECT_EQ(0u, commandQueue->printfFunctionContainer.size()); EXPECT_EQ(1u, kernel1.printPrintfOutputCalledTimes); EXPECT_EQ(1u, kernel2.printPrintfOutputCalledTimes); commandQueue->destroy(); } HWTEST_F(CommandQueueCreate, whenReserveLinearStreamThenBufferAllocationSwitched) { const ze_command_queue_desc_t desc{}; ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc, false, false, returnValue)); size_t maxSize = commandQueue->commandStream->getMaxAvailableSpace(); auto firstAllocation = commandQueue->commandStream->getGraphicsAllocation(); EXPECT_EQ(firstAllocation, commandQueue->buffers.getCurrentBufferAllocation()); uint32_t currentTaskCount = 33u; auto &csr = neoDevice->getUltCommandStreamReceiver(); csr.latestWaitForCompletionWithTimeoutTaskCount = currentTaskCount; commandQueue->commandStream->getSpace(maxSize - 16u); commandQueue->buffers.setCurrentFlushStamp(121u, 121u); size_t nextSize = 16u + 16u; commandQueue->reserveLinearStreamSize(nextSize); auto secondAllocation = commandQueue->commandStream->getGraphicsAllocation(); EXPECT_EQ(secondAllocation, commandQueue->buffers.getCurrentBufferAllocation()); EXPECT_NE(firstAllocation, secondAllocation); EXPECT_EQ(csr.latestWaitForCompletionWithTimeoutTaskCount, currentTaskCount); commandQueue->commandStream->getSpace(maxSize - 16u); commandQueue->buffers.setCurrentFlushStamp(244u, 244u); commandQueue->reserveLinearStreamSize(nextSize); auto thirdAllocation = commandQueue->commandStream->getGraphicsAllocation(); EXPECT_EQ(thirdAllocation, commandQueue->buffers.getCurrentBufferAllocation()); EXPECT_EQ(thirdAllocation, firstAllocation); EXPECT_NE(thirdAllocation, secondAllocation); EXPECT_EQ(csr.latestWaitForCompletionWithTimeoutTaskCount, 121u); commandQueue->commandStream->getSpace(maxSize - 16u); commandQueue->reserveLinearStreamSize(nextSize); auto fourthAllocation = commandQueue->commandStream->getGraphicsAllocation(); EXPECT_EQ(fourthAllocation, commandQueue->buffers.getCurrentBufferAllocation()); EXPECT_EQ(fourthAllocation, secondAllocation); EXPECT_NE(fourthAllocation, firstAllocation); EXPECT_EQ(csr.latestWaitForCompletionWithTimeoutTaskCount, 244u); commandQueue->destroy(); } TEST_F(CommandQueueCreate, whenCreatingCommandQueueWithInvalidProductFamilyThenFailureIsReturned) { const ze_command_queue_desc_t desc = {}; ze_result_t returnValue; auto csr = std::unique_ptr(neoDevice->createCommandStreamReceiver()); csr->setupContext(*neoDevice->getDefaultEngine().osContext); L0::CommandQueue *commandQueue = CommandQueue::create(PRODUCT_FAMILY::IGFX_MAX_PRODUCT, device, csr.get(), &desc, false, false, returnValue); ASSERT_EQ(nullptr, commandQueue); } TEST_F(CommandQueueCreate, whenCmdBuffersAllocationsAreCreatedThenSizeIsNotLessThanQueuesLinearStreamSize) { const ze_command_queue_desc_t desc = {}; ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc, false, false, returnValue)); size_t maxSize = commandQueue->commandStream->getMaxAvailableSpace(); auto sizeFirstBuffer = commandQueue->buffers.getCurrentBufferAllocation()->getUnderlyingBufferSize(); EXPECT_LE(maxSize, sizeFirstBuffer); commandQueue->commandStream->getSpace(maxSize - 16u); size_t nextSize = 16u + 16u; commandQueue->reserveLinearStreamSize(nextSize); auto sizeSecondBuffer = commandQueue->buffers.getCurrentBufferAllocation()->getUnderlyingBufferSize(); EXPECT_LE(maxSize, sizeSecondBuffer); commandQueue->destroy(); } HWTEST_F(CommandQueueCreate, given100CmdListsWhenExecutingThenCommandStreamIsNotDepleted) { using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; const ze_command_queue_desc_t desc = {}; ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc, false, false, returnValue)); ASSERT_NE(nullptr, commandQueue); Mock kernel; kernel.immutableData.device = device; auto commandList = std::unique_ptr(whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue))); ASSERT_NE(nullptr, commandList); ze_group_count_t dispatchFunctionArguments{1, 1, 1}; commandList->appendLaunchKernel(kernel.toHandle(), &dispatchFunctionArguments, nullptr, 0, nullptr); const size_t numHandles = 100; ze_command_list_handle_t cmdListHandles[numHandles]; for (size_t i = 0; i < numHandles; i++) { cmdListHandles[i] = commandList->toHandle(); } auto sizeBefore = commandQueue->commandStream->getUsed(); commandQueue->executeCommandLists(numHandles, cmdListHandles, nullptr, false); auto sizeAfter = commandQueue->commandStream->getUsed(); EXPECT_LT(sizeBefore, sizeAfter); size_t streamSizeMinimum = sizeof(MI_BATCH_BUFFER_END) + numHandles * sizeof(MI_BATCH_BUFFER_START); EXPECT_LE(streamSizeMinimum, sizeAfter - sizeBefore); size_t maxSize = 2 * streamSizeMinimum; EXPECT_GT(maxSize, sizeAfter - sizeBefore); commandQueue->destroy(); } HWTEST_F(CommandQueueCreate, givenUpdateTaskCountFromWaitWhenDispatchTaskCountWriteThenNoPipeControlFlushed) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using POST_SYNC_OPERATION = typename FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION; DebugManagerStateRestore restorer; DebugManager.flags.UpdateTaskCountFromWait.set(3); const ze_command_queue_desc_t desc = {}; ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc, false, false, returnValue)); auto commandList = std::unique_ptr(whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue))); ASSERT_NE(nullptr, commandList); ze_command_list_handle_t cmdListHandle = commandList->toHandle(); commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, false); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), commandQueue->commandStream->getUsed())); auto pipeControls = findAll(cmdList.begin(), cmdList.end()); bool pipeControlsPostSync = false; for (size_t i = 0; i < pipeControls.size(); i++) { auto pipeControl = reinterpret_cast(*pipeControls[i]); if (pipeControl->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { pipeControlsPostSync = true; } } EXPECT_FALSE(pipeControlsPostSync); commandQueue->destroy(); } HWTEST_F(CommandQueueCreate, givenContainerWithAllocationsWhenResidencyContainerIsEmptyThenMakeResidentWasNotCalled) { auto csr = std::make_unique(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield()); csr->setupContext(*neoDevice->getDefaultEngine().osContext); const ze_command_queue_desc_t desc = {}; ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, csr.get(), &desc, false, false, returnValue)); ResidencyContainer container; commandQueue->submitBatchBuffer(0, container, nullptr, false); EXPECT_EQ(csr->makeResidentCalledTimes, 0u); EXPECT_EQ(commandQueue->commandStream->getGraphicsAllocation()->getTaskCount(commandQueue->csr->getOsContext().getContextId()), commandQueue->csr->peekTaskCount()); EXPECT_EQ(commandQueue->commandStream->getGraphicsAllocation()->getTaskCount(commandQueue->csr->getOsContext().getContextId()), commandQueue->csr->peekTaskCount()); commandQueue->destroy(); } HWTEST_F(CommandQueueCreate, givenCommandStreamReceiverFailsThenSubmitBatchBufferReturnsError) { auto csr = std::make_unique(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield()); csr->setupContext(*neoDevice->getDefaultEngine().osContext); const ze_command_queue_desc_t desc = {}; ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, csr.get(), &desc, false, false, returnValue)); ResidencyContainer container; NEO::SubmissionStatus ret = commandQueue->submitBatchBuffer(0, container, nullptr, false); EXPECT_EQ(ret, NEO::SubmissionStatus::FAILED); commandQueue->destroy(); } HWTEST_F(CommandQueueCreate, givenOutOfMemoryThenSubmitBatchBufferReturnsOutOfMemoryError) { auto csr = std::make_unique(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield()); csr->setupContext(*neoDevice->getDefaultEngine().osContext); const ze_command_queue_desc_t desc = {}; ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, csr.get(), &desc, false, false, returnValue)); ResidencyContainer container; NEO::SubmissionStatus ret = commandQueue->submitBatchBuffer(0, container, nullptr, false); EXPECT_EQ(ret, NEO::SubmissionStatus::OUT_OF_MEMORY); commandQueue->destroy(); } TEST_F(CommandQueueCreate, whenCommandQueueCreatedThenExpectLinearStreamInitializedWithExpectedSize) { const ze_command_queue_desc_t desc = {}; ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc, false, false, returnValue)); ASSERT_NE(commandQueue, nullptr); size_t commandStreamSize = MemoryConstants::kiloByte * 128u; EXPECT_EQ(commandStreamSize, commandQueue->commandStream->getMaxAvailableSpace()); size_t expectedCommandBufferAllocationSize = commandStreamSize + MemoryConstants::cacheLineSize + NEO::CSRequirements::csOverfetchSize; expectedCommandBufferAllocationSize = alignUp(expectedCommandBufferAllocationSize, MemoryConstants::pageSize64k); size_t actualCommandBufferSize = commandQueue->buffers.getCurrentBufferAllocation()->getUnderlyingBufferSize(); EXPECT_EQ(expectedCommandBufferAllocationSize, actualCommandBufferSize); commandQueue->destroy(); } HWTEST_F(CommandQueueCreate, givenQueueInAsyncModeAndRugularCmdListWithAppendBarrierThenFlushTaskIsNotUsed) { ze_command_queue_desc_t desc = {}; desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc, false, false, returnValue)); ASSERT_NE(nullptr, commandQueue); auto commandList = std::unique_ptr(whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue))); ASSERT_NE(nullptr, commandList); commandList->appendBarrier(nullptr, 0, nullptr); commandQueue->destroy(); } HWTEST_F(CommandQueueCreate, givenQueueInSyncModeAndRugularCmdListWithAppendBarrierThenFlushTaskIsNotUsed) { ze_command_queue_desc_t desc = {}; desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc, false, false, returnValue)); ASSERT_NE(nullptr, commandQueue); auto commandList = std::unique_ptr(whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue))); ASSERT_NE(nullptr, commandList); commandList->appendBarrier(nullptr, 0, nullptr); commandQueue->destroy(); } using CommandQueueSBASupport = IsWithinProducts; struct MockMemoryManagerCommandQueueSBA : public MemoryManagerMock { MockMemoryManagerCommandQueueSBA(NEO::ExecutionEnvironment &executionEnvironment) : MemoryManagerMock(const_cast(executionEnvironment)) {} uint64_t getInternalHeapBaseAddress(uint32_t rootDeviceIndex, bool useLocalMemory) override { getInternalHeapBaseAddressCalled++; getInternalHeapBaseAddressParamsPassed.push_back({rootDeviceIndex, useLocalMemory}); return getInternalHeapBaseAddressResult; } struct GetInternalHeapBaseAddressParams { uint32_t rootDeviceIndex{}; bool useLocalMemory{}; }; uint32_t getInternalHeapBaseAddressCalled = 0u; uint64_t getInternalHeapBaseAddressResult = 0u; StackVec getInternalHeapBaseAddressParamsPassed{}; }; struct CommandQueueProgramSBATest : public ::testing::Test { void SetUp() override { executionEnvironment = new NEO::ExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(numRootDevices); for (uint32_t i = 0; i < numRootDevices; i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(NEO::defaultHwInfo.get()); } memoryManager = new MockMemoryManagerCommandQueueSBA(*executionEnvironment); executionEnvironment->memoryManager.reset(memoryManager); neoDevice = NEO::MockDevice::create(executionEnvironment, rootDeviceIndex); std::vector> devices; devices.push_back(std::unique_ptr(neoDevice)); driverHandle = std::make_unique>(); driverHandle->initialize(std::move(devices)); device = driverHandle->devices[0]; } void TearDown() override { } NEO::ExecutionEnvironment *executionEnvironment = nullptr; std::unique_ptr> driverHandle; NEO::MockDevice *neoDevice = nullptr; L0::Device *device = nullptr; MockMemoryManagerCommandQueueSBA *memoryManager = nullptr; const uint32_t rootDeviceIndex = 1u; const uint32_t numRootDevices = 2u; }; HWTEST2_F(CommandQueueProgramSBATest, whenCreatingCommandQueueThenItIsInitialized, CommandQueueSBASupport) { ze_command_queue_desc_t desc = {}; auto csr = std::unique_ptr(neoDevice->createCommandStreamReceiver()); csr->setupContext(*neoDevice->getDefaultEngine().osContext); auto commandQueue = new MockCommandQueueHw(device, csr.get(), &desc); commandQueue->initialize(false, false); uint32_t alignedSize = 4096u; NEO::LinearStream child(commandQueue->commandStream->getSpace(alignedSize), alignedSize); auto &hwHelper = HwHelper::get(neoDevice->getHardwareInfo().platform.eRenderCoreFamily); const bool isaInLocalMemory = !hwHelper.useSystemMemoryPlacementForISA(neoDevice->getHardwareInfo()); commandQueue->programStateBaseAddress(0u, true, child, true); EXPECT_EQ(2u, memoryManager->getInternalHeapBaseAddressCalled); EXPECT_EQ(rootDeviceIndex, memoryManager->getInternalHeapBaseAddressParamsPassed[0].rootDeviceIndex); EXPECT_EQ(rootDeviceIndex, memoryManager->getInternalHeapBaseAddressParamsPassed[1].rootDeviceIndex); if (isaInLocalMemory) { EXPECT_TRUE(memoryManager->getInternalHeapBaseAddressParamsPassed[0].useLocalMemory); EXPECT_TRUE(memoryManager->getInternalHeapBaseAddressParamsPassed[1].useLocalMemory); } else { EXPECT_TRUE(memoryManager->getInternalHeapBaseAddressParamsPassed[0].useLocalMemory); EXPECT_FALSE(memoryManager->getInternalHeapBaseAddressParamsPassed[1].useLocalMemory); } commandQueue->programStateBaseAddress(0u, false, child, true); EXPECT_EQ(4u, memoryManager->getInternalHeapBaseAddressCalled); EXPECT_EQ(rootDeviceIndex, memoryManager->getInternalHeapBaseAddressParamsPassed[2].rootDeviceIndex); EXPECT_EQ(rootDeviceIndex, memoryManager->getInternalHeapBaseAddressParamsPassed[3].rootDeviceIndex); if (isaInLocalMemory) { EXPECT_TRUE(memoryManager->getInternalHeapBaseAddressParamsPassed[2].useLocalMemory); EXPECT_FALSE(memoryManager->getInternalHeapBaseAddressParamsPassed[3].useLocalMemory); } else { EXPECT_FALSE(memoryManager->getInternalHeapBaseAddressParamsPassed[2].useLocalMemory); EXPECT_FALSE(memoryManager->getInternalHeapBaseAddressParamsPassed[3].useLocalMemory); } commandQueue->destroy(); } HWTEST2_F(CommandQueueProgramSBATest, whenProgrammingStateBaseAddressWithcontainsStatelessUncachedResourceThenCorrectMocsAreSet, CommandQueueSBASupport) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; ze_command_queue_desc_t desc = {}; auto csr = std::unique_ptr(neoDevice->createCommandStreamReceiver()); csr->setupContext(*neoDevice->getDefaultEngine().osContext); auto commandQueue = new MockCommandQueueHw(device, csr.get(), &desc); commandQueue->initialize(false, false); uint32_t alignedSize = 4096u; NEO::LinearStream child(commandQueue->commandStream->getSpace(alignedSize), alignedSize); commandQueue->programStateBaseAddress(0u, true, child, true); auto pSbaCmd = static_cast(commandQueue->commandStream->getSpace(sizeof(STATE_BASE_ADDRESS))); uint32_t statelessMocsIndex = pSbaCmd->getStatelessDataPortAccessMemoryObjectControlState(); auto gmmHelper = device->getNEODevice()->getGmmHelper(); uint32_t expectedMocs = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED); EXPECT_EQ(statelessMocsIndex, expectedMocs); commandQueue->destroy(); } HWTEST2_F(CommandQueueProgramSBATest, givenBindlessModeEnabledWhenProgrammingStateBaseAddressThenBindlessBaseAddressAndSizeAreSet, IsAtLeastSkl) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; DebugManagerStateRestore dbgRestorer; DebugManager.flags.UseBindlessMode.set(1); auto bindlessHeapsHelper = std::make_unique(neoDevice->getMemoryManager(), neoDevice->getNumGenericSubDevices() > 1, neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield()); MockBindlesHeapsHelper *bindlessHeapsHelperPtr = bindlessHeapsHelper.get(); neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->bindlessHeapsHelper.reset(bindlessHeapsHelper.release()); NEO::MockGraphicsAllocation baseAllocation; bindlessHeapsHelperPtr->surfaceStateHeaps[NEO::BindlessHeapsHelper::GLOBAL_SSH].reset(new IndirectHeap(&baseAllocation, true)); baseAllocation.setGpuBaseAddress(0x123000); ze_command_queue_desc_t desc = {}; auto csr = std::unique_ptr(neoDevice->createCommandStreamReceiver()); csr->setupContext(*neoDevice->getDefaultEngine().osContext); auto commandQueue = new MockCommandQueueHw(device, csr.get(), &desc); commandQueue->initialize(false, false); uint32_t alignedSize = 4096u; NEO::LinearStream child(commandQueue->commandStream->getSpace(alignedSize), alignedSize); commandQueue->programStateBaseAddress(0u, true, child, true); auto usedSpaceAfter = commandQueue->commandStream->getUsed(); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter)); auto itor = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itor); auto cmdSba = genCmdCast(*itor); EXPECT_EQ(cmdSba->getBindlessSurfaceStateBaseAddressModifyEnable(), true); EXPECT_EQ(cmdSba->getBindlessSurfaceStateBaseAddress(), neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->getBindlessHeapsHelper()->getGlobalHeapsBase()); auto surfaceStateCount = StateBaseAddressHelper::getMaxBindlessSurfaceStates(); EXPECT_EQ(surfaceStateCount, cmdSba->getBindlessSurfaceStateSize()); commandQueue->destroy(); } HWTEST2_F(CommandQueueProgramSBATest, givenBindlessModeDisabledWhenProgrammingStateBaseAddressThenBindlessBaseAddressNotPassed, CommandQueueSBASupport) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; DebugManagerStateRestore dbgRestorer; DebugManager.flags.UseBindlessMode.set(0); auto bindlessHeapsHelper = std::make_unique(neoDevice->getMemoryManager(), neoDevice->getNumGenericSubDevices() > 1, neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield()); MockBindlesHeapsHelper *bindlessHeapsHelperPtr = bindlessHeapsHelper.get(); neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->bindlessHeapsHelper.reset(bindlessHeapsHelper.release()); NEO::MockGraphicsAllocation baseAllocation; bindlessHeapsHelperPtr->surfaceStateHeaps[NEO::BindlessHeapsHelper::GLOBAL_SSH].reset(new IndirectHeap(&baseAllocation, true)); baseAllocation.setGpuBaseAddress(0x123000); ze_command_queue_desc_t desc = {}; auto csr = std::unique_ptr(neoDevice->createCommandStreamReceiver()); csr->setupContext(*neoDevice->getDefaultEngine().osContext); auto commandQueue = new MockCommandQueueHw(device, csr.get(), &desc); commandQueue->initialize(false, false); uint32_t alignedSize = 4096u; NEO::LinearStream child(commandQueue->commandStream->getSpace(alignedSize), alignedSize); commandQueue->programStateBaseAddress(0u, true, child, true); auto usedSpaceAfter = commandQueue->commandStream->getUsed(); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter)); auto itor = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itor); auto cmdSba = genCmdCast(*itor); EXPECT_NE(cmdSba->getBindlessSurfaceStateBaseAddress(), neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->getBindlessHeapsHelper()->getGlobalHeapsBase()); commandQueue->destroy(); } TEST_F(CommandQueueCreate, givenCmdQueueWithBlitCopyWhenExecutingNonCopyBlitCommandListThenWrongCommandListStatusReturned) { const ze_command_queue_desc_t desc = {}; ze_result_t returnValue; auto csr = std::unique_ptr(neoDevice->createCommandStreamReceiver()); csr->setupContext(*neoDevice->getDefaultEngine().osContext); L0::CommandQueue *commandQueue = CommandQueue::create(productFamily, device, csr.get(), &desc, true, false, returnValue); ASSERT_NE(nullptr, commandQueue); std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); ASSERT_NE(nullptr, commandList); auto commandListHandle = commandList->toHandle(); auto status = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false); EXPECT_EQ(status, ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE); commandQueue->destroy(); } TEST_F(CommandQueueCreate, givenCmdQueueWithBlitCopyWhenExecutingCopyBlitCommandListThenSuccessReturned) { const ze_command_queue_desc_t desc = {}; ze_result_t returnValue; L0::CommandQueue *commandQueue = CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc, true, false, returnValue); ASSERT_NE(nullptr, commandQueue); std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Copy, 0u, returnValue)); ASSERT_NE(nullptr, commandList); auto commandListHandle = commandList->toHandle(); auto status = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false); EXPECT_EQ(status, ZE_RESULT_SUCCESS); commandQueue->destroy(); } using CommandQueueDestroy = Test; template struct CommandQueueCommands : DeviceFixture, ::testing::Test { void SetUp() override { DebugManager.flags.ForcePreemptionMode.set(static_cast(NEO::PreemptionMode::Disabled)); DebugManager.flags.CreateMultipleSubDevices.set(multiTile ? 2 : 1); DeviceFixture::SetUp(); } void TearDown() override { DeviceFixture::TearDown(); } template bool isAllocationInResidencyContainer(MockCsrHw2 &csr, NEO::GraphicsAllocation *graphicsAllocation) { for (auto alloc : csr.copyOfAllocations) { if (alloc == graphicsAllocation) { return true; } } return false; } const ze_command_queue_desc_t desc = {}; DebugManagerStateRestore restore{}; VariableBackup mockDeviceFlagBackup{&NEO::MockDevice::createSingleDevice, false}; }; using CommandQueueCommandsSingleTile = CommandQueueCommands; using CommandQueueCommandsMultiTile = CommandQueueCommands; HWTEST_F(CommandQueueCommandsSingleTile, givenCommandQueueWhenExecutingCommandListsThenHardwareContextIsProgrammedAndGlobalAllocationResident) { MockCsrHw2 csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield()); csr.initializeTagAllocation(); csr.setupContext(*neoDevice->getDefaultEngine().osContext); ze_result_t returnValue; L0::CommandQueue *commandQueue = CommandQueue::create(productFamily, device, &csr, &desc, true, false, returnValue); ASSERT_NE(nullptr, commandQueue); std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Copy, 0u, returnValue)); auto commandListHandle = commandList->toHandle(); auto status = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false); auto globalFence = csr.getGlobalFenceAllocation(); if (globalFence) { EXPECT_TRUE(isAllocationInResidencyContainer(csr, globalFence)); } EXPECT_EQ(status, ZE_RESULT_SUCCESS); EXPECT_TRUE(csr.programHardwareContextCalled); commandQueue->destroy(); } HWTEST2_F(CommandQueueCommandsMultiTile, givenCommandQueueOnMultiTileWhenExecutingCommandListsThenWorkPartitionAllocationIsMadeResident, IsAtLeastXeHpCore) { DebugManagerStateRestore restorer; DebugManager.flags.EnableWalkerPartition.set(1); class MyCsrMock : public MockCsrHw2 { using MockCsrHw2::MockCsrHw2; public: void makeResident(GraphicsAllocation &graphicsAllocation) override { if (expectedGa == &graphicsAllocation) { expectedGAWasMadeResident = true; } } GraphicsAllocation *expectedGa = nullptr; bool expectedGAWasMadeResident = false; }; MyCsrMock csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield()); EXPECT_EQ(2u, csr.activePartitions); csr.initializeTagAllocation(); csr.createWorkPartitionAllocation(*neoDevice); csr.setupContext(*neoDevice->getDefaultEngine().osContext); ze_result_t returnValue; L0::CommandQueue *commandQueue = CommandQueue::create(productFamily, device, &csr, &desc, false, false, returnValue); ASSERT_NE(nullptr, commandQueue); std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Compute, 0u, returnValue)); auto commandListHandle = commandList->toHandle(); auto workPartitionAllocation = csr.getWorkPartitionAllocation(); csr.expectedGa = workPartitionAllocation; auto status = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false); EXPECT_EQ(status, ZE_RESULT_SUCCESS); EXPECT_EQ(2u, csr.activePartitionsConfig); ASSERT_NE(nullptr, workPartitionAllocation); EXPECT_TRUE(csr.expectedGAWasMadeResident); commandQueue->destroy(); } HWTEST2_F(CommandQueueCommandsMultiTile, givenCommandQueueOnMultiTileWhenWalkerPartitionIsDisabledThenWorkPartitionAllocationIsNotCreated, IsAtLeastXeHpCore) { DebugManagerStateRestore restorer; DebugManager.flags.EnableWalkerPartition.set(0); class MyCsrMock : public MockCsrHw2 { using MockCsrHw2::MockCsrHw2; public: void makeResident(GraphicsAllocation &graphicsAllocation) override { if (expectedGa == &graphicsAllocation) { expectedGAWasMadeResident = true; } } GraphicsAllocation *expectedGa = nullptr; bool expectedGAWasMadeResident = false; }; MyCsrMock csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield()); csr.initializeTagAllocation(); csr.createWorkPartitionAllocation(*neoDevice); auto workPartitionAllocation = csr.getWorkPartitionAllocation(); EXPECT_EQ(nullptr, workPartitionAllocation); } using CommandQueueIndirectAllocations = Test; HWTEST_F(CommandQueueIndirectAllocations, givenCommandQueueWhenExecutingCommandListsThenExpectedIndirectAllocationsAddedToResidencyContainer) { const ze_command_queue_desc_t desc = {}; MockCsrHw2 csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield()); csr.initializeTagAllocation(); csr.setupContext(*neoDevice->getDefaultEngine().osContext); if (device->getNEODevice()->getPreemptionMode() == PreemptionMode::MidThread || device->getNEODevice()->isDebuggerActive()) { csr.createPreemptionAllocation(); } ze_result_t returnValue; L0::CommandQueue *commandQueue = CommandQueue::create(productFamily, device, &csr, &desc, false, false, returnValue); ASSERT_NE(nullptr, commandQueue); std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Compute, 0u, returnValue)); void *deviceAlloc = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; auto result = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 4096u, &deviceAlloc); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto gpuAlloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(deviceAlloc)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); ASSERT_NE(nullptr, gpuAlloc); createKernel(); kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true; EXPECT_TRUE(kernel->getUnifiedMemoryControls().indirectDeviceAllocationsAllowed); ze_group_count_t groupCount{1, 1, 1}; result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto itorEvent = std::find(std::begin(commandList->commandContainer.getResidencyContainer()), std::end(commandList->commandContainer.getResidencyContainer()), gpuAlloc); EXPECT_EQ(itorEvent, std::end(commandList->commandContainer.getResidencyContainer())); auto commandListHandle = commandList->toHandle(); result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false); ASSERT_EQ(ZE_RESULT_SUCCESS, result); itorEvent = std::find(std::begin(commandList->commandContainer.getResidencyContainer()), std::end(commandList->commandContainer.getResidencyContainer()), gpuAlloc); EXPECT_NE(itorEvent, std::end(commandList->commandContainer.getResidencyContainer())); device->getDriverHandle()->getSvmAllocsManager()->freeSVMAlloc(deviceAlloc); commandQueue->destroy(); } HWTEST_F(CommandQueueIndirectAllocations, givenDebugModeToTreatIndirectAllocationsAsOnePackWhenIndirectAccessIsUsedThenWholePackIsMadeResident) { DebugManagerStateRestore restorer; DebugManager.flags.MakeIndirectAllocationsResidentAsPack.set(1); const ze_command_queue_desc_t desc = {}; MockCsrHw2 csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield()); csr.initializeTagAllocation(); csr.setupContext(*neoDevice->getDefaultEngine().osContext); if (device->getNEODevice()->getPreemptionMode() == PreemptionMode::MidThread || device->getNEODevice()->isDebuggerActive()) { csr.createPreemptionAllocation(); } ze_result_t returnValue; L0::CommandQueue *commandQueue = CommandQueue::create(productFamily, device, &csr, &desc, false, false, returnValue); ASSERT_NE(nullptr, commandQueue); std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Compute, 0u, returnValue)); void *deviceAlloc = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; auto result = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 4096u, &deviceAlloc); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto gpuAlloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(deviceAlloc)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); ASSERT_NE(nullptr, gpuAlloc); createKernel(); kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true; EXPECT_TRUE(kernel->getUnifiedMemoryControls().indirectDeviceAllocationsAllowed); ze_group_count_t groupCount{1, 1, 1}; result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto itorEvent = std::find(std::begin(commandList->commandContainer.getResidencyContainer()), std::end(commandList->commandContainer.getResidencyContainer()), gpuAlloc); EXPECT_EQ(itorEvent, std::end(commandList->commandContainer.getResidencyContainer())); auto commandListHandle = commandList->toHandle(); EXPECT_FALSE(gpuAlloc->isResident(csr.getOsContext().getContextId())); result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false); ASSERT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_TRUE(gpuAlloc->isResident(csr.getOsContext().getContextId())); EXPECT_EQ(GraphicsAllocation::objectAlwaysResident, gpuAlloc->getResidencyTaskCount(csr.getOsContext().getContextId())); device->getDriverHandle()->getSvmAllocsManager()->freeSVMAlloc(deviceAlloc); commandQueue->destroy(); } HWTEST_F(CommandQueueIndirectAllocations, givenDeviceThatSupportsSubmittingIndirectAllocationsAsPackWhenIndirectAccessIsUsedThenWholePackIsMadeResident) { const ze_command_queue_desc_t desc = {}; MockCsrHw2 csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield()); csr.initializeTagAllocation(); csr.setupContext(*neoDevice->getDefaultEngine().osContext); if (device->getNEODevice()->getPreemptionMode() == PreemptionMode::MidThread || device->getNEODevice()->isDebuggerActive()) { csr.createPreemptionAllocation(); } ze_result_t returnValue; L0::CommandQueue *commandQueue = CommandQueue::create(productFamily, device, &csr, &desc, false, false, returnValue); ASSERT_NE(nullptr, commandQueue); std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Compute, 0u, returnValue)); void *deviceAlloc = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; auto result = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 4096u, &deviceAlloc); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto gpuAlloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(deviceAlloc)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); ASSERT_NE(nullptr, gpuAlloc); createKernel(); kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true; EXPECT_TRUE(kernel->getUnifiedMemoryControls().indirectDeviceAllocationsAllowed); ze_group_count_t groupCount{1, 1, 1}; result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto itorEvent = std::find(std::begin(commandList->commandContainer.getResidencyContainer()), std::end(commandList->commandContainer.getResidencyContainer()), gpuAlloc); EXPECT_EQ(itorEvent, std::end(commandList->commandContainer.getResidencyContainer())); auto commandListHandle = commandList->toHandle(); EXPECT_FALSE(gpuAlloc->isResident(csr.getOsContext().getContextId())); static_cast(driverHandle.get()->getMemoryManager())->overrideAllocateAsPackReturn = 1u; result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false); ASSERT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_TRUE(gpuAlloc->isResident(csr.getOsContext().getContextId())); EXPECT_EQ(GraphicsAllocation::objectAlwaysResident, gpuAlloc->getResidencyTaskCount(csr.getOsContext().getContextId())); device->getDriverHandle()->getSvmAllocsManager()->freeSVMAlloc(deviceAlloc); commandQueue->destroy(); } HWTEST_F(CommandQueueIndirectAllocations, givenDeviceThatSupportsSubmittingIndirectAllocationsAsPackWhenIndirectAccessIsUsedThenWholePackIsMadeResidentWithImmediateCommandListAndFlushTask) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true); MockCsrHw2 csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield()); csr.initializeTagAllocation(); csr.setupContext(*neoDevice->getDefaultEngine().osContext); ze_result_t returnValue; ze_command_queue_desc_t desc = {}; desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::Compute, returnValue)); ASSERT_NE(nullptr, commandList); EXPECT_EQ(1u, commandList->cmdListType); EXPECT_NE(nullptr, commandList->cmdQImmediate); void *deviceAlloc = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; auto result = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 4096u, &deviceAlloc); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto gpuAlloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(deviceAlloc)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); ASSERT_NE(nullptr, gpuAlloc); createKernel(); kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true; EXPECT_TRUE(kernel->getUnifiedMemoryControls().indirectDeviceAllocationsAllowed); static_cast(driverHandle.get()->getMemoryManager())->overrideAllocateAsPackReturn = 1u; ze_group_count_t groupCount{1, 1, 1}; result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_TRUE(gpuAlloc->isResident(csr.getOsContext().getContextId())); EXPECT_EQ(GraphicsAllocation::objectAlwaysResident, gpuAlloc->getResidencyTaskCount(csr.getOsContext().getContextId())); device->getDriverHandle()->getSvmAllocsManager()->freeSVMAlloc(deviceAlloc); } HWTEST_F(CommandQueueIndirectAllocations, givenImmediateCommandListAndFlushTaskWithIndirectAllocsAsPackDisabledThenLaunchKernelWorks) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true); NEO::DebugManager.flags.MakeIndirectAllocationsResidentAsPack.set(0); MockCsrHw2 csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield()); csr.initializeTagAllocation(); csr.setupContext(*neoDevice->getDefaultEngine().osContext); ze_result_t returnValue; ze_command_queue_desc_t desc = {}; desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::Compute, returnValue)); ASSERT_NE(nullptr, commandList); EXPECT_EQ(1u, commandList->cmdListType); EXPECT_NE(nullptr, commandList->cmdQImmediate); void *deviceAlloc = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; auto result = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 4096u, &deviceAlloc); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto gpuAlloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(deviceAlloc)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); ASSERT_NE(nullptr, gpuAlloc); createKernel(); kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true; EXPECT_TRUE(kernel->getUnifiedMemoryControls().indirectDeviceAllocationsAllowed); static_cast(driverHandle.get()->getMemoryManager())->overrideAllocateAsPackReturn = 1u; ze_group_count_t groupCount{1, 1, 1}; result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); device->getDriverHandle()->getSvmAllocsManager()->freeSVMAlloc(deviceAlloc); } using DeviceCreateCommandQueueTest = Test; TEST_F(DeviceCreateCommandQueueTest, givenLowPriorityDescWhenCreateCommandQueueIsCalledThenLowPriorityCsrIsAssigned) { ze_command_queue_desc_t desc{}; desc.ordinal = 0u; desc.index = 0u; desc.priority = ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_LOW; ze_command_queue_handle_t commandQueueHandle = {}; ze_result_t res = device->createCommandQueue(&desc, &commandQueueHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto commandQueue = static_cast(L0::CommandQueue::fromHandle(commandQueueHandle)); EXPECT_NE(commandQueue, nullptr); EXPECT_TRUE(commandQueue->getCsr()->getOsContext().isLowPriority()); NEO::CommandStreamReceiver *csr = nullptr; device->getCsrForLowPriority(&csr); EXPECT_EQ(commandQueue->getCsr(), csr); commandQueue->destroy(); } TEST_F(DeviceCreateCommandQueueTest, givenCopyOrdinalWhenCreateCommandQueueWithLowPriorityDescIsCalledThenCopyCsrIsAssigned) { auto copyCsr = std::unique_ptr(neoDevice->createCommandStreamReceiver()); EngineDescriptor copyEngineDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular}, neoDevice->getDeviceBitfield(), neoDevice->getPreemptionMode(), false, false); auto copyOsContext = neoDevice->getExecutionEnvironment()->memoryManager->createAndRegisterOsContext(copyCsr.get(), copyEngineDescriptor); copyCsr->setupContext(*copyOsContext); auto computeCsr = std::unique_ptr(neoDevice->createCommandStreamReceiver()); EngineDescriptor computeEngineDescriptor({aub_stream::ENGINE_CCS, EngineUsage::LowPriority}, neoDevice->getDeviceBitfield(), neoDevice->getPreemptionMode(), false, false); auto computeOsContext = neoDevice->getExecutionEnvironment()->memoryManager->createAndRegisterOsContext(computeCsr.get(), computeEngineDescriptor); computeCsr->setupContext(*computeOsContext); auto &engineGroups = neoDevice->getRegularEngineGroups(); engineGroups.clear(); auto &allEngines = const_cast &>(neoDevice->getAllEngines()); allEngines.clear(); engineGroups.push_back(NEO::Device::EngineGroupT{}); engineGroups.back().engineGroupType = EngineGroupType::Copy; engineGroups.back().engines.resize(1); engineGroups.back().engines[0].commandStreamReceiver = copyCsr.get(); EngineControl copyEngine{copyCsr.get(), copyOsContext}; allEngines.push_back(copyEngine); engineGroups.push_back(NEO::Device::EngineGroupT{}); engineGroups.back().engineGroupType = EngineGroupType::Compute; engineGroups.back().engines.resize(1); engineGroups.back().engines[0].commandStreamReceiver = computeCsr.get(); EngineControl computeEngine{computeCsr.get(), computeOsContext}; allEngines.push_back(computeEngine); uint32_t count = 0u; ze_result_t res = device->getCommandQueueGroupProperties(&count, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_GT(count, 0u); std::vector properties(count); res = device->getCommandQueueGroupProperties(&count, properties.data()); EXPECT_EQ(ZE_RESULT_SUCCESS, res); uint32_t ordinal = 0u; for (ordinal = 0u; ordinal < count; ordinal++) { if ((properties[ordinal].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY) && !(properties[ordinal].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE)) { if (properties[ordinal].numQueues == 0) continue; break; } } EXPECT_LT(ordinal, count); ze_command_queue_desc_t desc{}; desc.ordinal = ordinal; desc.index = 0u; desc.priority = ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_LOW; ze_command_queue_handle_t commandQueueHandle = {}; res = device->createCommandQueue(&desc, &commandQueueHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto commandQueue = static_cast(L0::CommandQueue::fromHandle(commandQueueHandle)); EXPECT_NE(commandQueue, nullptr); EXPECT_EQ(copyCsr.get(), commandQueue->getCsr()); commandQueue->destroy(); engineGroups.clear(); allEngines.clear(); } struct DeferredContextCreationDeviceCreateCommandQueueTest : DeviceCreateCommandQueueTest { void SetUp() override { DebugManager.flags.DeferOsContextInitialization.set(1); DeviceCreateCommandQueueTest::SetUp(); } DebugManagerStateRestore restore; }; TEST_F(DeferredContextCreationDeviceCreateCommandQueueTest, givenLowPriorityEngineNotInitializedWhenCreateLowPriorityCommandQueueIsCalledThenEngineIsInitialized) { NEO::CommandStreamReceiver *lowPriorityCsr = nullptr; device->getCsrForLowPriority(&lowPriorityCsr); ASSERT_FALSE(lowPriorityCsr->getOsContext().isInitialized()); ze_command_queue_desc_t desc{}; desc.ordinal = 0u; desc.index = 0u; desc.priority = ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_LOW; ze_command_queue_handle_t commandQueueHandle = {}; ze_result_t res = device->createCommandQueue(&desc, &commandQueueHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_TRUE(lowPriorityCsr->getOsContext().isInitialized()); auto commandQueue = static_cast(L0::CommandQueue::fromHandle(commandQueueHandle)); commandQueue->destroy(); } TEST_F(DeviceCreateCommandQueueTest, givenNormalPriorityDescWhenCreateCommandQueueIsCalledWithValidArgumentThenCsrIsAssignedWithOrdinalAndIndex) { ze_command_queue_desc_t desc{}; desc.ordinal = 0u; desc.index = 0u; desc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL; ze_command_queue_handle_t commandQueueHandle = {}; ze_result_t res = device->createCommandQueue(&desc, &commandQueueHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto commandQueue = static_cast(L0::CommandQueue::fromHandle(commandQueueHandle)); EXPECT_NE(commandQueue, nullptr); EXPECT_FALSE(commandQueue->getCsr()->getOsContext().isLowPriority()); NEO::CommandStreamReceiver *csr = nullptr; device->getCsrForOrdinalAndIndex(&csr, 0u, 0u); EXPECT_EQ(commandQueue->getCsr(), csr); commandQueue->destroy(); } TEST_F(DeviceCreateCommandQueueTest, whenCallingGetCsrForOrdinalAndIndexWithInvalidOrdinalThenInvalidArgumentIsReturned) { ze_command_queue_desc_t desc{}; desc.ordinal = 0u; desc.index = 0u; desc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL; ze_command_queue_handle_t commandQueueHandle = {}; ze_result_t res = device->createCommandQueue(&desc, &commandQueueHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto commandQueue = static_cast(L0::CommandQueue::fromHandle(commandQueueHandle)); EXPECT_NE(commandQueue, nullptr); EXPECT_FALSE(commandQueue->getCsr()->getOsContext().isLowPriority()); NEO::CommandStreamReceiver *csr = nullptr; res = device->getCsrForOrdinalAndIndex(&csr, std::numeric_limits::max(), 0u); EXPECT_EQ(res, ZE_RESULT_ERROR_INVALID_ARGUMENT); commandQueue->destroy(); } TEST_F(DeviceCreateCommandQueueTest, whenCallingGetCsrForOrdinalAndIndexWithInvalidIndexThenInvalidArgumentIsReturned) { ze_command_queue_desc_t desc{}; desc.ordinal = 0u; desc.index = 0u; desc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL; ze_command_queue_handle_t commandQueueHandle = {}; ze_result_t res = device->createCommandQueue(&desc, &commandQueueHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto commandQueue = static_cast(L0::CommandQueue::fromHandle(commandQueueHandle)); EXPECT_NE(commandQueue, nullptr); EXPECT_FALSE(commandQueue->getCsr()->getOsContext().isLowPriority()); NEO::CommandStreamReceiver *csr = nullptr; res = device->getCsrForOrdinalAndIndex(&csr, 0u, std::numeric_limits::max()); EXPECT_EQ(res, ZE_RESULT_ERROR_INVALID_ARGUMENT); commandQueue->destroy(); } TEST_F(DeviceCreateCommandQueueTest, givenLowPriorityDescAndWithoutLowPriorityCsrWhenCreateCommandQueueIsCalledThenAbortIsThrown) { // remove low priority EngineControl objects for negative testing neoDevice->allEngines.erase(std::remove_if( neoDevice->allEngines.begin(), neoDevice->allEngines.end(), [](EngineControl &p) { return p.osContext->isLowPriority(); })); ze_command_queue_desc_t desc{}; desc.ordinal = 0u; desc.index = 0u; desc.priority = ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_LOW; ze_command_queue_handle_t commandQueueHandle = {}; ze_result_t res{}; EXPECT_THROW(res = device->createCommandQueue(&desc, &commandQueueHandle), std::exception); } using MultiDeviceCreateCommandQueueTest = Test; TEST_F(MultiDeviceCreateCommandQueueTest, givenLowPriorityDescWhenCreateCommandQueueIsCalledThenLowPriorityCsrIsAssigned) { auto device = driverHandle->devices[0]; ze_command_queue_desc_t desc{}; desc.ordinal = 0u; desc.index = 0u; desc.priority = ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_LOW; ze_command_queue_handle_t commandQueueHandle = {}; ze_result_t res = device->createCommandQueue(&desc, &commandQueueHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto commandQueue = static_cast(L0::CommandQueue::fromHandle(commandQueueHandle)); EXPECT_NE(commandQueue, nullptr); EXPECT_TRUE(commandQueue->getCsr()->getOsContext().isLowPriority()); NEO::CommandStreamReceiver *csr = nullptr; device->getCsrForLowPriority(&csr); EXPECT_EQ(commandQueue->getCsr(), csr); commandQueue->destroy(); } template class MockCommandQueue : public L0::CommandQueueHw { public: using L0::CommandQueueHw::CommandQueueHw; MockCommandQueue(L0::Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc) : L0::CommandQueueHw(device, csr, desc) {} using BaseClass = ::L0::CommandQueueHw; using BaseClass::csr; using BaseClass::heapContainer; NEO::HeapContainer mockHeapContainer; void handleScratchSpace(NEO::HeapContainer &heapContainer, NEO::ScratchSpaceController *scratchController, bool &gsbaState, bool &frontEndState, uint32_t perThreadScratchSpaceSize, uint32_t perThreadPrivateScratchSize) override { this->mockHeapContainer = heapContainer; } void programFrontEnd(uint64_t scratchAddress, uint32_t perThreadScratchSpaceSize, NEO::LinearStream &commandStream) override { return; } }; using CommandQueueExecuteTest = Test; HWTEST2_F(CommandQueueDestroy, givenCommandQueueAndCommandListWithSshAndScratchWhenExecuteThenSshWasUsed, IsAtLeastSkl) { ze_command_queue_desc_t desc = {}; NEO::CommandStreamReceiver *csr; device->getCsrForOrdinalAndIndex(&csr, 0u, 0u); auto commandQueue = new MockCommandQueue(device, csr, &desc); commandQueue->initialize(false, false); auto commandList = new CommandListCoreFamily(); commandList->initialize(device, NEO::EngineGroupType::Compute, 0u); commandList->setCommandListPerThreadScratchSize(100u); auto commandListHandle = commandList->toHandle(); void *alloc = alignedMalloc(0x100, 0x100); NEO::GraphicsAllocation graphicsAllocation1(0, NEO::AllocationType::BUFFER, alloc, 0u, 0u, 1u, MemoryPool::System4KBPages, 1u); NEO::GraphicsAllocation graphicsAllocation2(0, NEO::AllocationType::BUFFER, alloc, 0u, 0u, 1u, MemoryPool::System4KBPages, 1u); commandList->commandContainer.sshAllocations.push_back(&graphicsAllocation1); commandList->commandContainer.sshAllocations.push_back(&graphicsAllocation2); commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false); EXPECT_EQ(commandQueue->mockHeapContainer.size(), 3u); commandQueue->destroy(); commandList->destroy(); alignedFree(alloc); } using CommandQueueExecuteTest = Test; HWTEST2_F(CommandQueueDestroy, givenCommandQueueAndCommandListWithSshAndPrivateScratchWhenExecuteThenSshWasUsed, IsAtLeastXeHpCore) { ze_command_queue_desc_t desc = {}; NEO::CommandStreamReceiver *csr; device->getCsrForOrdinalAndIndex(&csr, 0u, 0u); auto commandQueue = new MockCommandQueue(device, csr, &desc); commandQueue->initialize(false, false); auto commandList = new CommandListCoreFamily(); commandList->initialize(device, NEO::EngineGroupType::Compute, 0u); commandList->setCommandListPerThreadPrivateScratchSize(100u); auto commandListHandle = commandList->toHandle(); void *alloc = alignedMalloc(0x100, 0x100); NEO::GraphicsAllocation graphicsAllocation1(0, NEO::AllocationType::BUFFER, alloc, 0u, 0u, 1u, MemoryPool::System4KBPages, 1u); NEO::GraphicsAllocation graphicsAllocation2(0, NEO::AllocationType::BUFFER, alloc, 0u, 0u, 1u, MemoryPool::System4KBPages, 1u); commandList->commandContainer.sshAllocations.push_back(&graphicsAllocation1); commandList->commandContainer.sshAllocations.push_back(&graphicsAllocation2); commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false); EXPECT_EQ(commandQueue->mockHeapContainer.size(), 3u); commandQueue->destroy(); commandList->destroy(); alignedFree(alloc); } HWTEST2_F(CommandQueueDestroy, givenCommandQueueAndCommandListWithWhenBindlessEnabledThenHeapContainerIsEmpty, IsAtLeastSkl) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.UseBindlessMode.set(1); auto bindlessHeapsHelper = std::make_unique(neoDevice->getMemoryManager(), neoDevice->getNumGenericSubDevices() > 1, neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield()); neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->bindlessHeapsHelper.reset(bindlessHeapsHelper.release()); ze_command_queue_desc_t desc = {}; NEO::CommandStreamReceiver *csr; device->getCsrForOrdinalAndIndex(&csr, 0u, 0u); auto commandQueue = new MockCommandQueue(device, csr, &desc); commandQueue->initialize(false, false); auto commandList = new CommandListCoreFamily(); commandList->initialize(device, NEO::EngineGroupType::Compute, 0u); commandList->setCommandListPerThreadScratchSize(100u); auto commandListHandle = commandList->toHandle(); commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false); EXPECT_EQ(commandQueue->mockHeapContainer.size(), 0u); commandQueue->destroy(); commandList->destroy(); } using ExecuteCommandListTests = Test; HWTEST2_F(ExecuteCommandListTests, givenExecuteCommandListWhenItReturnsThenContainersAreEmpty, IsAtLeastSkl) { ze_command_queue_desc_t desc = {}; NEO::CommandStreamReceiver *csr; device->getCsrForOrdinalAndIndex(&csr, 0u, 0u); auto commandQueue = new MockCommandQueue(device, csr, &desc); commandQueue->initialize(false, false); auto commandList = new CommandListCoreFamily(); commandList->initialize(device, NEO::EngineGroupType::Compute, 0u); commandList->setCommandListPerThreadScratchSize(100u); auto commandListHandle = commandList->toHandle(); void *alloc = alignedMalloc(0x100, 0x100); NEO::GraphicsAllocation graphicsAllocation1(0, NEO::AllocationType::BUFFER, alloc, 0u, 0u, 1u, MemoryPool::System4KBPages, 1u); NEO::GraphicsAllocation graphicsAllocation2(0, NEO::AllocationType::BUFFER, alloc, 0u, 0u, 1u, MemoryPool::System4KBPages, 1u); commandList->commandContainer.sshAllocations.push_back(&graphicsAllocation1); commandList->commandContainer.sshAllocations.push_back(&graphicsAllocation2); commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false); EXPECT_EQ(0u, commandQueue->csr->getResidencyAllocations().size()); EXPECT_EQ(0u, commandQueue->heapContainer.size()); commandQueue->destroy(); commandList->destroy(); alignedFree(alloc); } template class MockCommandQueueSubmitBatchBuffer : public MockCommandQueue { public: MockCommandQueueSubmitBatchBuffer(L0::Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc) : MockCommandQueue(device, csr, desc) {} ADDMETHOD_NOBASE(submitBatchBuffer, NEO::SubmissionStatus, NEO::SubmissionStatus::SUCCESS, (size_t offset, NEO::ResidencyContainer &residencyContainer, void *endingCmdPtr, bool isCooperative)); }; HWTEST2_F(ExecuteCommandListTests, givenOutOfMemorySubmitBatchBufferThenExecuteCommandListReturnsOutOfMemoryError, IsAtLeastSkl) { ze_command_queue_desc_t desc = {}; NEO::CommandStreamReceiver *csr; device->getCsrForOrdinalAndIndex(&csr, 0u, 0u); auto commandQueue = new MockCommandQueueSubmitBatchBuffer(device, csr, &desc); commandQueue->submitBatchBufferResult = NEO::SubmissionStatus::OUT_OF_MEMORY; commandQueue->initialize(false, false); auto commandList = new CommandListCoreFamily(); commandList->initialize(device, NEO::EngineGroupType::Compute, 0u); auto commandListHandle = commandList->toHandle(); auto res = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false); EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY, res); commandQueue->destroy(); commandList->destroy(); } HWTEST2_F(ExecuteCommandListTests, givenFailingSubmitBatchBufferThenExecuteCommandListReturnsErrorUnknown, IsAtLeastSkl) { ze_command_queue_desc_t desc = {}; NEO::CommandStreamReceiver *csr; device->getCsrForOrdinalAndIndex(&csr, 0u, 0u); auto commandQueue = new MockCommandQueueSubmitBatchBuffer(device, csr, &desc); commandQueue->submitBatchBufferResult = NEO::SubmissionStatus::FAILED; commandQueue->initialize(false, false); auto commandList = new CommandListCoreFamily(); commandList->initialize(device, NEO::EngineGroupType::Compute, 0u); auto commandListHandle = commandList->toHandle(); auto res = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false); EXPECT_EQ(ZE_RESULT_ERROR_UNKNOWN, res); commandQueue->destroy(); commandList->destroy(); } HWTEST2_F(ExecuteCommandListTests, givenSuccessfulSubmitBatchBufferThenExecuteCommandListReturnsSuccess, IsAtLeastSkl) { ze_command_queue_desc_t desc = {}; NEO::CommandStreamReceiver *csr; device->getCsrForOrdinalAndIndex(&csr, 0u, 0u); auto commandQueue = new MockCommandQueueSubmitBatchBuffer(device, csr, &desc); commandQueue->submitBatchBufferResult = NEO::SubmissionStatus::SUCCESS; commandQueue->initialize(false, false); auto commandList = new CommandListCoreFamily(); commandList->initialize(device, NEO::EngineGroupType::Compute, 0u); auto commandListHandle = commandList->toHandle(); auto res = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false); EXPECT_EQ(ZE_RESULT_SUCCESS, res); commandQueue->destroy(); commandList->destroy(); } HWTEST2_F(ExecuteCommandListTests, givenCommandQueueHavingTwoB2BCommandListsThenMVSDirtyFlagAndGSBADirtyFlagAreSetOnlyOnce, IsAtLeastSkl) { ze_command_queue_desc_t desc = {}; NEO::CommandStreamReceiver *csr; device->getCsrForOrdinalAndIndex(&csr, 0u, 0u); ze_result_t returnValue; auto commandQueue = CommandQueue::create(productFamily, device, csr, &desc, false, false, returnValue); auto commandList0 = new CommandListCoreFamily(); commandList0->initialize(device, NEO::EngineGroupType::Compute, 0u); commandList0->setCommandListPerThreadScratchSize(0u); auto commandList1 = new CommandListCoreFamily(); commandList1->initialize(device, NEO::EngineGroupType::Compute, 0u); commandList1->setCommandListPerThreadScratchSize(0u); auto commandListHandle0 = commandList0->toHandle(); auto commandListHandle1 = commandList1->toHandle(); EXPECT_EQ(true, csr->getMediaVFEStateDirty()); EXPECT_EQ(true, csr->getGSBAStateDirty()); commandQueue->executeCommandLists(1, &commandListHandle0, nullptr, false); EXPECT_EQ(false, csr->getMediaVFEStateDirty()); EXPECT_EQ(false, csr->getGSBAStateDirty()); commandQueue->executeCommandLists(1, &commandListHandle1, nullptr, false); EXPECT_EQ(false, csr->getMediaVFEStateDirty()); EXPECT_EQ(false, csr->getGSBAStateDirty()); commandQueue->destroy(); commandList0->destroy(); commandList1->destroy(); } using CommandQueueExecuteSupport = IsWithinProducts; HWTEST2_F(ExecuteCommandListTests, givenCommandQueueHavingTwoB2BCommandListsThenMVSIsProgrammedOnlyOnce, CommandQueueExecuteSupport) { using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE; using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; ze_command_queue_desc_t desc = {}; NEO::CommandStreamReceiver *csr; device->getCsrForOrdinalAndIndex(&csr, 0u, 0u); ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, csr, &desc, false, false, returnValue)); auto commandList0 = std::unique_ptr(whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue))); commandList0->setCommandListPerThreadScratchSize(0u); auto commandList1 = std::unique_ptr(whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue))); commandList1->setCommandListPerThreadScratchSize(0u); auto commandListHandle0 = commandList0->toHandle(); auto commandListHandle1 = commandList1->toHandle(); ASSERT_NE(nullptr, commandQueue->commandStream); commandQueue->executeCommandLists(1, &commandListHandle0, nullptr, false); commandQueue->executeCommandLists(1, &commandListHandle1, nullptr, false); auto usedSpaceAfter = commandQueue->commandStream->getUsed(); GenCmdList cmdList1; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList1, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter)); auto mediaVfeStates = findAll(cmdList1.begin(), cmdList1.end()); auto GSBAStates = findAll(cmdList1.begin(), cmdList1.end()); // We should have only 1 state added ASSERT_EQ(1u, mediaVfeStates.size()); ASSERT_EQ(1u, GSBAStates.size()); commandQueue->destroy(); } struct EngineInstancedDeviceExecuteTests : public ::testing::Test { void SetUp() override { DebugManager.flags.EngineInstancedSubDevices.set(true); } bool createDevices(uint32_t numGenericSubDevices, uint32_t numCcs) { DebugManager.flags.CreateMultipleSubDevices.set(numGenericSubDevices); auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); auto hwInfo = executionEnvironment->rootDeviceEnvironments[0]->getMutableHardwareInfo(); hwInfo->gtSystemInfo.CCSInfo.NumberOfCCSEnabled = numCcs; hwInfo->featureTable.flags.ftrCCSNode = (numCcs > 0); HwHelper::get(hwInfo->platform.eRenderCoreFamily).adjustDefaultEngineType(hwInfo); if (!multiCcsDevice(*hwInfo, numCcs)) { return false; } executionEnvironment->parseAffinityMask(); deviceFactory = std::make_unique(1, numGenericSubDevices, *executionEnvironment.release()); rootDevice = deviceFactory->rootDevices[0]; EXPECT_NE(nullptr, rootDevice); return true; } bool multiCcsDevice(const HardwareInfo &hwInfo, uint32_t expectedNumCcs) { auto gpgpuEngines = HwHelper::get(hwInfo.platform.eRenderCoreFamily).getGpgpuEngineInstances(hwInfo); uint32_t numCcs = 0; for (auto &engine : gpgpuEngines) { if (EngineHelpers::isCcs(engine.first) && (engine.second == EngineUsage::Regular)) { numCcs++; } } return (numCcs == expectedNumCcs); } DebugManagerStateRestore restorer; std::unique_ptr deviceFactory; MockDevice *rootDevice = nullptr; }; HWTEST2_F(EngineInstancedDeviceExecuteTests, givenEngineInstancedDeviceWhenExecutingThenEnableSingleSliceDispatch, IsAtLeastXeHpCore) { using CFE_STATE = typename FamilyType::CFE_STATE; constexpr uint32_t genericDevicesCount = 1; constexpr uint32_t ccsCount = 2; DebugManager.flags.AllowSingleTileEngineInstancedSubDevices.set(true); if (!createDevices(genericDevicesCount, ccsCount)) { GTEST_SKIP(); } auto subDevice = static_cast(rootDevice->getSubDevice(0)); auto defaultEngine = subDevice->getDefaultEngine(); EXPECT_TRUE(defaultEngine.osContext->isEngineInstanced()); std::vector> devices; devices.push_back(std::unique_ptr(subDevice)); auto driverHandle = std::make_unique>(); driverHandle->initialize(std::move(devices)); auto l0Device = driverHandle->devices[0]; ze_command_queue_desc_t desc = {}; NEO::CommandStreamReceiver *csr; l0Device->getCsrForOrdinalAndIndex(&csr, 0u, 0u); ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, l0Device, csr, &desc, false, false, returnValue)); auto commandList = std::unique_ptr(whitebox_cast(CommandList::create(productFamily, l0Device, NEO::EngineGroupType::Compute, 0u, returnValue))); auto commandListHandle = commandList->toHandle(); commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false); GenCmdList cmdList; FamilyType::PARSE::parseCommandBuffer(cmdList, commandQueue->commandStream->getCpuBase(), commandQueue->commandStream->getUsed()); auto cfeStates = findAll(cmdList.begin(), cmdList.end()); EXPECT_NE(0u, cfeStates.size()); for (auto &cmd : cfeStates) { auto cfeState = reinterpret_cast(*cmd); EXPECT_TRUE(cfeState->getSingleSliceDispatchCcsMode()); } commandQueue->destroy(); } HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandListsWithPTSSsetForFirstCmdListThenMVSAndGSBAAreProgrammedOnlyOnce, CommandQueueExecuteSupport) { using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE; using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; ze_command_queue_desc_t desc = {}; NEO::CommandStreamReceiver *csr; device->getCsrForOrdinalAndIndex(&csr, 0u, 0u); ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, csr, &desc, false, false, returnValue)); auto commandList0 = std::unique_ptr(whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue))); auto commandList1 = std::unique_ptr(whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue))); commandList0->setCommandListPerThreadScratchSize(512u); commandList1->setCommandListPerThreadScratchSize(0u); auto commandListHandle0 = commandList0->toHandle(); auto commandListHandle1 = commandList1->toHandle(); commandQueue->executeCommandLists(1, &commandListHandle0, nullptr, false); EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize()); commandQueue->executeCommandLists(1, &commandListHandle1, nullptr, false); EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize()); auto usedSpaceAfter = commandQueue->commandStream->getUsed(); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter)); auto mediaVfeStates = findAll(cmdList.begin(), cmdList.end()); auto GSBAStates = findAll(cmdList.begin(), cmdList.end()); // We should have only 1 state added ASSERT_EQ(1u, mediaVfeStates.size()); ASSERT_EQ(1u, GSBAStates.size()); commandList0->reset(); commandList0->setCommandListPerThreadScratchSize(0u); commandList1->reset(); commandList1->setCommandListPerThreadScratchSize(0u); auto commandQueue1 = whitebox_cast(CommandQueue::create(productFamily, device, csr, &desc, false, false, returnValue)); commandQueue1->executeCommandLists(1, &commandListHandle0, nullptr, false); EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize()); commandQueue1->executeCommandLists(1, &commandListHandle1, nullptr, false); EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize()); usedSpaceAfter = commandQueue1->commandStream->getUsed(); GenCmdList cmdList1; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList1, ptrOffset(commandQueue1->commandStream->getCpuBase(), 0), usedSpaceAfter)); mediaVfeStates = findAll(cmdList1.begin(), cmdList1.end()); GSBAStates = findAll(cmdList1.begin(), cmdList1.end()); // We should have no state added ASSERT_EQ(0u, mediaVfeStates.size()); ASSERT_EQ(0u, GSBAStates.size()); commandQueue->destroy(); commandQueue1->destroy(); } HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandListsAndWithPTSSsetForSecondCmdListThenMVSandGSBAAreProgrammedTwice, CommandQueueExecuteSupport) { using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE; using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; ze_command_queue_desc_t desc = {}; NEO::CommandStreamReceiver *csr; device->getCsrForOrdinalAndIndex(&csr, 0u, 0u); ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, csr, &desc, false, false, returnValue)); auto commandList0 = std::unique_ptr(whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue))); auto commandList1 = std::unique_ptr(whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue))); commandList0->setCommandListPerThreadScratchSize(0u); commandList1->setCommandListPerThreadScratchSize(512u); auto commandListHandle0 = commandList0->toHandle(); auto commandListHandle1 = commandList1->toHandle(); commandQueue->executeCommandLists(1, &commandListHandle0, nullptr, false); EXPECT_EQ(0u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize()); commandQueue->executeCommandLists(1, &commandListHandle1, nullptr, false); EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize()); auto usedSpaceAfter = commandQueue->commandStream->getUsed(); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter)); auto mediaVfeStates = findAll(cmdList.begin(), cmdList.end()); auto GSBAStates = findAll(cmdList.begin(), cmdList.end()); // We should have 2 states added ASSERT_EQ(2u, mediaVfeStates.size()); ASSERT_EQ(2u, GSBAStates.size()); commandList0->reset(); commandList0->setCommandListPerThreadScratchSize(512u); commandList1->reset(); commandList1->setCommandListPerThreadScratchSize(0u); auto commandQueue1 = whitebox_cast(CommandQueue::create(productFamily, device, csr, &desc, false, false, returnValue)); commandQueue1->executeCommandLists(1, &commandListHandle0, nullptr, false); EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize()); commandQueue1->executeCommandLists(1, &commandListHandle1, nullptr, false); EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize()); usedSpaceAfter = commandQueue1->commandStream->getUsed(); GenCmdList cmdList1; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList1, ptrOffset(commandQueue1->commandStream->getCpuBase(), 0), usedSpaceAfter)); mediaVfeStates = findAll(cmdList1.begin(), cmdList1.end()); GSBAStates = findAll(cmdList1.begin(), cmdList1.end()); // We should have no state added ASSERT_EQ(0u, mediaVfeStates.size()); ASSERT_EQ(0u, GSBAStates.size()); commandQueue->destroy(); commandQueue1->destroy(); } HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandListsAndWithPTSSGrowingThenMVSAndGSBAAreProgrammedTwice, CommandQueueExecuteSupport) { using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE; using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; ze_command_queue_desc_t desc = {}; NEO::CommandStreamReceiver *csr; device->getCsrForOrdinalAndIndex(&csr, 0u, 0u); ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, csr, &desc, false, false, returnValue)); auto commandList0 = std::unique_ptr(whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue))); auto commandList1 = std::unique_ptr(whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue))); commandList0->setCommandListPerThreadScratchSize(512u); commandList1->setCommandListPerThreadScratchSize(512u); auto commandListHandle0 = commandList0->toHandle(); auto commandListHandle1 = commandList1->toHandle(); commandQueue->executeCommandLists(1, &commandListHandle0, nullptr, false); EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize()); commandQueue->executeCommandLists(1, &commandListHandle1, nullptr, false); EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize()); auto usedSpaceAfter = commandQueue->commandStream->getUsed(); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter)); auto mediaVfeStates = findAll(cmdList.begin(), cmdList.end()); auto GSBAStates = findAll(cmdList.begin(), cmdList.end()); // We should have only 1 state added ASSERT_EQ(1u, mediaVfeStates.size()); ASSERT_EQ(1u, GSBAStates.size()); commandList0->reset(); commandList0->setCommandListPerThreadScratchSize(1024u); commandList1->reset(); commandList1->setCommandListPerThreadScratchSize(1024u); auto commandQueue1 = whitebox_cast(CommandQueue::create(productFamily, device, csr, &desc, false, false, returnValue)); commandQueue1->executeCommandLists(1, &commandListHandle0, nullptr, false); EXPECT_EQ(1024u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize()); commandQueue1->executeCommandLists(1, &commandListHandle1, nullptr, false); EXPECT_EQ(1024u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize()); usedSpaceAfter = commandQueue1->commandStream->getUsed(); GenCmdList cmdList1; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList1, ptrOffset(commandQueue1->commandStream->getCpuBase(), 0), usedSpaceAfter)); mediaVfeStates = findAll(cmdList1.begin(), cmdList1.end()); GSBAStates = findAll(cmdList1.begin(), cmdList1.end()); // We should have only 1 state added ASSERT_EQ(1u, mediaVfeStates.size()); ASSERT_EQ(1u, GSBAStates.size()); commandQueue->destroy(); commandQueue1->destroy(); } HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandListsAndWithPTSSUniquePerCmdListThenMVSAndGSBAAreProgrammedOncePerSubmission, CommandQueueExecuteSupport) { using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE; using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; ze_command_queue_desc_t desc = {}; NEO::CommandStreamReceiver *csr; device->getCsrForOrdinalAndIndex(&csr, 0u, 0u); ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, csr, &desc, false, false, returnValue)); auto commandList0 = std::unique_ptr(whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue))); auto commandList1 = std::unique_ptr(whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue))); commandList0->setCommandListPerThreadScratchSize(0u); commandList1->setCommandListPerThreadScratchSize(512u); auto commandListHandle0 = commandList0->toHandle(); auto commandListHandle1 = commandList1->toHandle(); commandQueue->executeCommandLists(1, &commandListHandle0, nullptr, false); EXPECT_EQ(0u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize()); commandQueue->executeCommandLists(1, &commandListHandle1, nullptr, false); EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize()); auto usedSpaceAfter = commandQueue->commandStream->getUsed(); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter)); auto mediaVfeStates = findAll(cmdList.begin(), cmdList.end()); auto GSBAStates = findAll(cmdList.begin(), cmdList.end()); // We should have 2 states added ASSERT_EQ(2u, mediaVfeStates.size()); ASSERT_EQ(2u, GSBAStates.size()); commandList0->reset(); commandList0->setCommandListPerThreadScratchSize(1024u); commandList1->reset(); commandList1->setCommandListPerThreadScratchSize(2048u); auto commandQueue1 = whitebox_cast(CommandQueue::create(productFamily, device, csr, &desc, false, false, returnValue)); commandQueue1->executeCommandLists(1, &commandListHandle0, nullptr, false); EXPECT_EQ(1024u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize()); commandQueue1->executeCommandLists(1, &commandListHandle1, nullptr, false); EXPECT_EQ(2048u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize()); usedSpaceAfter = commandQueue1->commandStream->getUsed(); GenCmdList cmdList1; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList1, ptrOffset(commandQueue1->commandStream->getCpuBase(), 0), usedSpaceAfter)); mediaVfeStates = findAll(cmdList1.begin(), cmdList1.end()); GSBAStates = findAll(cmdList1.begin(), cmdList1.end()); // We should have 2 states added ASSERT_EQ(2u, mediaVfeStates.size()); ASSERT_EQ(2u, GSBAStates.size()); commandQueue->destroy(); commandQueue1->destroy(); } HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandListsAndWithPrivateScratchUniquePerCmdListThenCFEIsProgrammedOncePerSubmission, IsAtLeastXeHpCore) { using CFE_STATE = typename FamilyType::CFE_STATE; ze_command_queue_desc_t desc = {}; NEO::CommandStreamReceiver *csr; device->getCsrForOrdinalAndIndex(&csr, 0u, 0u); ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, csr, &desc, false, false, returnValue)); auto commandList0 = std::unique_ptr(whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue))); auto commandList1 = std::unique_ptr(whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue))); commandList0->setCommandListPerThreadPrivateScratchSize(0u); commandList1->setCommandListPerThreadPrivateScratchSize(512u); auto commandListHandle0 = commandList0->toHandle(); auto commandListHandle1 = commandList1->toHandle(); commandQueue->executeCommandLists(1, &commandListHandle0, nullptr, false); EXPECT_EQ(0u, csr->getScratchSpaceController()->getPerThreadPrivateScratchSize()); commandQueue->executeCommandLists(1, &commandListHandle1, nullptr, false); EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadPrivateScratchSize()); auto usedSpaceAfter = commandQueue->commandStream->getUsed(); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter)); auto mediaVfeStates = findAll(cmdList.begin(), cmdList.end()); ASSERT_EQ(2u, mediaVfeStates.size()); commandList0->reset(); commandList0->setCommandListPerThreadPrivateScratchSize(1024u); commandList1->reset(); commandList1->setCommandListPerThreadPrivateScratchSize(2048u); auto commandQueue1 = whitebox_cast(CommandQueue::create(productFamily, device, csr, &desc, false, false, returnValue)); commandQueue1->executeCommandLists(1, &commandListHandle0, nullptr, false); EXPECT_EQ(1024u, csr->getScratchSpaceController()->getPerThreadPrivateScratchSize()); commandQueue1->executeCommandLists(1, &commandListHandle1, nullptr, false); EXPECT_EQ(2048u, csr->getScratchSpaceController()->getPerThreadPrivateScratchSize()); usedSpaceAfter = commandQueue1->commandStream->getUsed(); GenCmdList cmdList1; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList1, ptrOffset(commandQueue1->commandStream->getCpuBase(), 0), usedSpaceAfter)); mediaVfeStates = findAll(cmdList1.begin(), cmdList1.end()); ASSERT_EQ(2u, mediaVfeStates.size()); commandQueue->destroy(); commandQueue1->destroy(); } HWTEST_F(ExecuteCommandListTests, givenDirectSubmissionEnabledWhenExecutingCmdListThenSetNonZeroBatchBufferStartAddress) { using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; ze_command_queue_desc_t desc = {}; NEO::CommandStreamReceiver *csr; device->getCsrForOrdinalAndIndex(&csr, 0u, 0u); static_cast *>(csr)->directSubmissionAvailable = true; ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, csr, &desc, false, false, returnValue)); auto commandList = std::unique_ptr(whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue))); commandList->setCommandListPerThreadPrivateScratchSize(0u); auto commandListHandle = commandList->toHandle(); commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false); auto usedSpaceAfter = commandQueue->commandStream->getUsed(); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter)); auto bbStartCmds = findAll(cmdList.begin(), cmdList.end()); ASSERT_EQ(2u, bbStartCmds.size()); for (auto &cmd : bbStartCmds) { auto bbStart = genCmdCast(*cmd); EXPECT_NE(0u, bbStart->getBatchBufferStartAddress()); } commandQueue->destroy(); } HWTEST_F(ExecuteCommandListTests, givenDirectSubmissionEnabledAndDebugFlagSetWhenExecutingCmdListThenSetZeroBatchBufferStartAddress) { DebugManagerStateRestore restore; NEO::DebugManager.flags.BatchBufferStartPrepatchingWaEnabled.set(0); using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; ze_command_queue_desc_t desc = {}; NEO::CommandStreamReceiver *csr; device->getCsrForOrdinalAndIndex(&csr, 0u, 0u); static_cast *>(csr)->directSubmissionAvailable = true; ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, csr, &desc, false, false, returnValue)); auto commandList = std::unique_ptr(whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue))); commandList->setCommandListPerThreadPrivateScratchSize(0u); auto commandListHandle = commandList->toHandle(); commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false); auto usedSpaceAfter = commandQueue->commandStream->getUsed(); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter)); auto bbStartCmds = findAll(cmdList.begin(), cmdList.end()); EXPECT_EQ(2u, bbStartCmds.size()); for (auto &cmd : bbStartCmds) { auto bbStart = genCmdCast(*cmd); if (cmd == bbStartCmds.back()) { EXPECT_EQ(0u, bbStart->getBatchBufferStartAddress()); } else { EXPECT_NE(0u, bbStart->getBatchBufferStartAddress()); } } commandQueue->destroy(); } TEST_F(CommandQueueCreate, givenOverrideCmdQueueSyncModeToDefaultWhenCommandQueueIsCreatedWithSynchronousModeThenDefaultModeIsSelected) { DebugManagerStateRestore restore; NEO::DebugManager.flags.OverrideCmdQueueSynchronousMode.set(0); ze_command_queue_desc_t desc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC}; desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; ze_result_t returnValue = ZE_RESULT_ERROR_DEVICE_LOST; L0::CommandQueue *commandQueue = CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc, false, false, returnValue); ASSERT_NE(nullptr, commandQueue); EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS); auto cmdQueueSynchronousMode = reinterpret_cast(commandQueue)->getSynchronousMode(); EXPECT_EQ(ZE_COMMAND_QUEUE_MODE_DEFAULT, cmdQueueSynchronousMode); commandQueue->destroy(); } TEST_F(CommandQueueCreate, givenOverrideCmdQueueSyncModeToAsynchronousWhenCommandQueueIsCreatedWithSynchronousModeThenAsynchronousModeIsSelected) { DebugManagerStateRestore restore; NEO::DebugManager.flags.OverrideCmdQueueSynchronousMode.set(2); ze_command_queue_desc_t desc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC}; desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; ze_result_t returnValue = ZE_RESULT_ERROR_DEVICE_LOST; L0::CommandQueue *commandQueue = CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc, false, false, returnValue); ASSERT_NE(nullptr, commandQueue); EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS); auto cmdQueueSynchronousMode = reinterpret_cast(commandQueue)->getSynchronousMode(); EXPECT_EQ(ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS, cmdQueueSynchronousMode); commandQueue->destroy(); } TEST_F(CommandQueueCreate, givenOverrideCmdQueueSyncModeToSynchronousWhenCommandQueueIsCreatedWithAsynchronousModeThenSynchronousModeIsSelected) { DebugManagerStateRestore restore; NEO::DebugManager.flags.OverrideCmdQueueSynchronousMode.set(1); ze_command_queue_desc_t desc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC}; desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; ze_result_t returnValue = ZE_RESULT_ERROR_DEVICE_LOST; L0::CommandQueue *commandQueue = CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc, false, false, returnValue); ASSERT_NE(nullptr, commandQueue); EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS); auto cmdQueueSynchronousMode = reinterpret_cast(commandQueue)->getSynchronousMode(); EXPECT_EQ(ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS, cmdQueueSynchronousMode); commandQueue->destroy(); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp000066400000000000000000001334511422164147700327260ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/scratch_space_controller_xehp_and_later.h" #include "shared/source/command_stream/wait_status.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/mocks/mock_command_stream_receiver.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/mocks/mock_memory_operations_handler.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/mock_method_macros.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/test/unit_tests/fixtures/aub_csr_fixture.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h" #include "level_zero/core/test/unit_tests/mocks/mock_kernel.h" #include "test_traits_common.h" namespace L0 { namespace ult { using ContextCreateCommandQueueTest = Test; TEST_F(ContextCreateCommandQueueTest, givenCallToContextCreateCommandQueueThenCallSucceeds) { ze_command_queue_desc_t desc = {}; desc.ordinal = 0u; ze_command_queue_handle_t commandQueue = {}; ze_result_t res = context->createCommandQueue(device, &desc, &commandQueue); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_NE(nullptr, commandQueue); L0::CommandQueue::fromHandle(commandQueue)->destroy(); } HWTEST_F(ContextCreateCommandQueueTest, givenEveryPossibleGroupIndexWhenCreatingCommandQueueThenCommandQueueIsCreated) { ze_command_queue_handle_t commandQueue = {}; auto &engineGroups = neoDevice->getRegularEngineGroups(); for (uint32_t ordinal = 0; ordinal < engineGroups.size(); ordinal++) { for (uint32_t index = 0; index < engineGroups[ordinal].engines.size(); index++) { ze_command_queue_desc_t desc = {}; desc.ordinal = ordinal; desc.index = index; ze_result_t res = context->createCommandQueue(device, &desc, &commandQueue); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_NE(nullptr, commandQueue); L0::CommandQueue::fromHandle(commandQueue)->destroy(); } } } HWTEST_F(ContextCreateCommandQueueTest, givenOrdinalBiggerThanAvailableEnginesWhenCreatingCommandQueueThenInvalidArgumentErrorIsReturned) { ze_command_queue_handle_t commandQueue = {}; auto &engineGroups = neoDevice->getRegularEngineGroups(); ze_command_queue_desc_t desc = {}; desc.ordinal = static_cast(engineGroups.size()); desc.index = 0; ze_result_t res = context->createCommandQueue(device, &desc, &commandQueue); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, res); EXPECT_EQ(nullptr, commandQueue); desc.ordinal = 0; desc.index = 0x1000; res = context->createCommandQueue(device, &desc, &commandQueue); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, res); EXPECT_EQ(nullptr, commandQueue); } HWTEST_F(ContextCreateCommandQueueTest, givenRootDeviceAndImplicitScalingDisabledWhenCreatingCommandQueueThenValidateQueueOrdinalUsingSubDeviceEngines) { NEO::UltDeviceFactory deviceFactory{1, 2}; auto &rootDevice = *deviceFactory.rootDevices[0]; auto &subDevice0 = *deviceFactory.subDevices[0]; rootDevice.regularEngineGroups.resize(1); subDevice0.getRegularEngineGroups().push_back(NEO::Device::EngineGroupT{}); subDevice0.getRegularEngineGroups().back().engineGroupType = EngineGroupType::Compute; subDevice0.getRegularEngineGroups().back().engines.resize(1); subDevice0.getRegularEngineGroups().back().engines[0].commandStreamReceiver = &rootDevice.getGpgpuCommandStreamReceiver(); auto ordinal = static_cast(subDevice0.getRegularEngineGroups().size() - 1); Mock l0RootDevice(&rootDevice, rootDevice.getExecutionEnvironment()); l0RootDevice.driverHandle = driverHandle.get(); ze_command_queue_handle_t commandQueue = nullptr; ze_command_queue_desc_t desc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC}; desc.ordinal = ordinal; desc.index = 0; l0RootDevice.implicitScalingCapable = true; ze_result_t res = context->createCommandQueue(l0RootDevice.toHandle(), &desc, &commandQueue); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, res); EXPECT_EQ(nullptr, commandQueue); l0RootDevice.implicitScalingCapable = false; res = context->createCommandQueue(l0RootDevice.toHandle(), &desc, &commandQueue); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_NE(nullptr, commandQueue); L0::CommandQueue::fromHandle(commandQueue)->destroy(); } using AubCsrTest = Test; HWTEST_TEMPLATED_F(AubCsrTest, givenAubCsrWhenCallingExecuteCommandListsThenPollForCompletionIsCalled) { auto csr = neoDevice->getDefaultEngine().commandStreamReceiver; ze_result_t returnValue; ze_command_queue_desc_t desc = {}; ze_command_queue_handle_t commandQueue = {}; ze_result_t res = context->createCommandQueue(device, &desc, &commandQueue); ASSERT_EQ(ZE_RESULT_SUCCESS, res); ASSERT_NE(nullptr, commandQueue); auto aub_csr = static_cast *>(csr); CommandQueue *queue = static_cast(L0::CommandQueue::fromHandle(commandQueue)); queue->setCommandQueuePreemptionMode(PreemptionMode::Disabled); EXPECT_EQ(aub_csr->pollForCompletionCalled, 0u); std::unique_ptr commandList(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::Compute, 0u, returnValue)); ASSERT_NE(nullptr, commandList); auto commandListHandle = commandList->toHandle(); queue->executeCommandLists(1, &commandListHandle, nullptr, false); EXPECT_EQ(aub_csr->pollForCompletionCalled, 1u); L0::CommandQueue::fromHandle(commandQueue)->destroy(); } using CommandQueueSynchronizeTest = Test; using MultiTileCommandQueueSynchronizeTest = Test; template struct SynchronizeCsr : public NEO::UltCommandStreamReceiver { SynchronizeCsr(const NEO::ExecutionEnvironment &executionEnvironment, const DeviceBitfield deviceBitfield) : NEO::UltCommandStreamReceiver(const_cast(executionEnvironment), 0, deviceBitfield) { CommandStreamReceiver::tagAddress = &tagAddressData[0]; memset(const_cast(CommandStreamReceiver::tagAddress), 0xFFFFFFFF, tagSize * sizeof(uint32_t)); } WaitStatus waitForCompletionWithTimeout(const WaitParams ¶ms, uint32_t taskCountToWait) override { enableTimeoutSet = params.enableTimeout; waitForComplitionCalledTimes++; partitionCountSet = this->activePartitions; return waitForCompletionWithTimeoutResult; } WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, NEO::QueueThrottle throttle) override { waitForTaskCountWithKmdNotifyFallbackCalled++; return NEO::UltCommandStreamReceiver::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, quickKmdSleep, throttle); } static constexpr size_t tagSize = 128; static volatile uint32_t tagAddressData[tagSize]; uint32_t waitForComplitionCalledTimes = 0; uint32_t waitForTaskCountWithKmdNotifyFallbackCalled = 0; uint32_t partitionCountSet = 0; bool enableTimeoutSet = false; WaitStatus waitForCompletionWithTimeoutResult = WaitStatus::Ready; }; template volatile uint32_t SynchronizeCsr::tagAddressData[SynchronizeCsr::tagSize]; HWTEST_F(CommandQueueSynchronizeTest, givenCallToSynchronizeThenCorrectEnableTimeoutAndTimeoutValuesAreUsed) { auto csr = std::unique_ptr>(new SynchronizeCsr(*device->getNEODevice()->getExecutionEnvironment(), device->getNEODevice()->getDeviceBitfield())); ze_command_queue_desc_t desc = {}; ze_command_queue_handle_t commandQueue = {}; ze_result_t res = context->createCommandQueue(device, &desc, &commandQueue); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_NE(nullptr, commandQueue); CommandQueue *queue = reinterpret_cast(L0::CommandQueue::fromHandle(commandQueue)); queue->csr = csr.get(); uint64_t timeout = 10; int64_t timeoutMicrosecondsExpected = timeout; queue->synchronize(timeout); EXPECT_EQ(1u, csr->waitForComplitionCalledTimes); EXPECT_EQ(0u, csr->waitForTaskCountWithKmdNotifyFallbackCalled); EXPECT_TRUE(csr->enableTimeoutSet); timeout = std::numeric_limits::max(); timeoutMicrosecondsExpected = NEO::TimeoutControls::maxTimeout; queue->synchronize(timeout); EXPECT_EQ(2u, csr->waitForComplitionCalledTimes); EXPECT_EQ(0u, csr->waitForTaskCountWithKmdNotifyFallbackCalled); EXPECT_FALSE(csr->enableTimeoutSet); L0::CommandQueue::fromHandle(commandQueue)->destroy(); } HWTEST_F(CommandQueueSynchronizeTest, givenGpuHangWhenCallingSynchronizeThenErrorIsPropagated) { auto csr = std::unique_ptr>(new SynchronizeCsr(*device->getNEODevice()->getExecutionEnvironment(), device->getNEODevice()->getDeviceBitfield())); csr->waitForCompletionWithTimeoutResult = NEO::WaitStatus::GpuHang; ze_command_queue_desc_t desc{}; ze_command_queue_handle_t commandQueue{}; ze_result_t res = context->createCommandQueue(device, &desc, &commandQueue); ASSERT_EQ(ZE_RESULT_SUCCESS, res); ASSERT_NE(nullptr, commandQueue); auto queue = whitebox_cast(L0::CommandQueue::fromHandle(commandQueue)); queue->csr = csr.get(); constexpr auto timeout{std::numeric_limits::max()}; const auto synchronizationResult{queue->synchronize(timeout)}; EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, synchronizationResult); EXPECT_EQ(1u, csr->waitForComplitionCalledTimes); EXPECT_EQ(0u, csr->waitForTaskCountWithKmdNotifyFallbackCalled); EXPECT_FALSE(csr->enableTimeoutSet); L0::CommandQueue::fromHandle(commandQueue)->destroy(); } HWTEST_F(CommandQueueSynchronizeTest, givenDebugOverrideEnabledAndGpuHangWhenCallingSynchronizeThenErrorIsPropagated) { DebugManagerStateRestore restore; NEO::DebugManager.flags.OverrideUseKmdWaitFunction.set(1); auto csr = std::unique_ptr>(new SynchronizeCsr(*device->getNEODevice()->getExecutionEnvironment(), device->getNEODevice()->getDeviceBitfield())); csr->waitForCompletionWithTimeoutResult = NEO::WaitStatus::GpuHang; ze_command_queue_desc_t desc{}; ze_command_queue_handle_t commandQueue{}; ze_result_t res = context->createCommandQueue(device, &desc, &commandQueue); ASSERT_EQ(ZE_RESULT_SUCCESS, res); ASSERT_NE(nullptr, commandQueue); auto queue = whitebox_cast(L0::CommandQueue::fromHandle(commandQueue)); queue->csr = csr.get(); constexpr auto timeout{std::numeric_limits::max()}; const auto synchronizationResult{queue->synchronize(timeout)}; EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, synchronizationResult); EXPECT_EQ(1u, csr->waitForComplitionCalledTimes); EXPECT_EQ(1u, csr->waitForTaskCountWithKmdNotifyFallbackCalled); EXPECT_FALSE(csr->enableTimeoutSet); L0::CommandQueue::fromHandle(commandQueue)->destroy(); } HWTEST_F(CommandQueueSynchronizeTest, givenDebugOverrideEnabledWhenCallToSynchronizeThenCorrectEnableTimeoutAndTimeoutValuesAreUsed) { DebugManagerStateRestore restore; NEO::DebugManager.flags.OverrideUseKmdWaitFunction.set(1); auto csr = std::unique_ptr>(new SynchronizeCsr(*device->getNEODevice()->getExecutionEnvironment(), device->getNEODevice()->getDeviceBitfield())); ze_command_queue_desc_t desc = {}; ze_command_queue_handle_t commandQueue = {}; ze_result_t res = context->createCommandQueue(device, &desc, &commandQueue); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_NE(nullptr, commandQueue); CommandQueue *queue = reinterpret_cast(L0::CommandQueue::fromHandle(commandQueue)); queue->csr = csr.get(); uint64_t timeout = 10; bool enableTimeoutExpected = true; int64_t timeoutMicrosecondsExpected = timeout; queue->synchronize(timeout); EXPECT_EQ(1u, csr->waitForComplitionCalledTimes); EXPECT_EQ(0u, csr->waitForTaskCountWithKmdNotifyFallbackCalled); EXPECT_TRUE(csr->enableTimeoutSet); timeout = std::numeric_limits::max(); enableTimeoutExpected = false; timeoutMicrosecondsExpected = NEO::TimeoutControls::maxTimeout; queue->synchronize(timeout); EXPECT_EQ(2u, csr->waitForComplitionCalledTimes); EXPECT_EQ(1u, csr->waitForTaskCountWithKmdNotifyFallbackCalled); EXPECT_FALSE(csr->enableTimeoutSet); L0::CommandQueue::fromHandle(commandQueue)->destroy(); } HWTEST2_F(MultiTileCommandQueueSynchronizeTest, givenMultiplePartitionCountWhenCallingSynchronizeThenExpectTheSameNumberCsrSynchronizeCalls, IsAtLeastXeHpCore) { const ze_command_queue_desc_t desc{}; ze_result_t returnValue; auto csr = reinterpret_cast *>(neoDevice->getDefaultEngine().commandStreamReceiver); if (device->getNEODevice()->getPreemptionMode() == PreemptionMode::MidThread || device->getNEODevice()->isDebuggerActive()) { csr->createPreemptionAllocation(); } EXPECT_NE(0u, csr->getPostSyncWriteOffset()); volatile uint32_t *tagAddress = csr->getTagAddress(); for (uint32_t i = 0; i < 2; i++) { *tagAddress = 0xFF; tagAddress = ptrOffset(tagAddress, csr->getPostSyncWriteOffset()); } csr->activePartitions = 2u; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc, false, false, returnValue)); EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS); ASSERT_NE(nullptr, commandQueue); EXPECT_EQ(2u, commandQueue->activeSubDevices); auto commandList = std::unique_ptr(whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue))); ASSERT_NE(nullptr, commandList); commandList->partitionCount = 2; ze_command_list_handle_t cmdListHandle = commandList->toHandle(); returnValue = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, false); EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS); uint64_t timeout = std::numeric_limits::max(); commandQueue->synchronize(timeout); L0::CommandQueue::fromHandle(commandQueue)->destroy(); } HWTEST2_F(MultiTileCommandQueueSynchronizeTest, givenCsrHasMultipleActivePartitionWhenExecutingCmdListOnNewCmdQueueThenExpectCmdPartitionCountMatchCsrActivePartitions, IsAtLeastXeHpCore) { const ze_command_queue_desc_t desc{}; ze_result_t returnValue; auto csr = reinterpret_cast *>(neoDevice->getDefaultEngine().commandStreamReceiver); if (device->getNEODevice()->getPreemptionMode() == PreemptionMode::MidThread || device->getNEODevice()->isDebuggerActive()) { csr->createPreemptionAllocation(); } EXPECT_NE(0u, csr->getPostSyncWriteOffset()); volatile uint32_t *tagAddress = csr->getTagAddress(); for (uint32_t i = 0; i < 2; i++) { *tagAddress = 0xFF; tagAddress = ptrOffset(tagAddress, csr->getPostSyncWriteOffset()); } csr->activePartitions = 2u; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc, false, false, returnValue)); EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS); ASSERT_NE(nullptr, commandQueue); EXPECT_EQ(2u, commandQueue->activeSubDevices); auto commandList = std::unique_ptr(whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue))); ASSERT_NE(nullptr, commandList); ze_command_list_handle_t cmdListHandle = commandList->toHandle(); commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, false); EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS); EXPECT_EQ(2u, commandQueue->partitionCount); L0::CommandQueue::fromHandle(commandQueue)->destroy(); } HWTEST_F(CommandQueueSynchronizeTest, givenSingleTileCsrWhenExecutingMultiTileCommandListThenExpectErrorOnExecute) { const ze_command_queue_desc_t desc{}; ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc, false, false, returnValue)); EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS); ASSERT_NE(nullptr, commandQueue); EXPECT_EQ(1u, commandQueue->activeSubDevices); auto commandList = std::unique_ptr(whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue))); ASSERT_NE(nullptr, commandList); commandList->partitionCount = 2; ze_command_list_handle_t cmdListHandle = commandList->toHandle(); returnValue = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, false); EXPECT_EQ(returnValue, ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE); L0::CommandQueue::fromHandle(commandQueue)->destroy(); } template struct TestCmdQueueCsr : public NEO::UltCommandStreamReceiver { TestCmdQueueCsr(const NEO::ExecutionEnvironment &executionEnvironment, const DeviceBitfield deviceBitfield) : NEO::UltCommandStreamReceiver(const_cast(executionEnvironment), 0, deviceBitfield) { } ADDMETHOD_NOBASE(waitForCompletionWithTimeout, NEO::WaitStatus, NEO::WaitStatus::NotReady, (const WaitParams ¶ms, uint32_t taskCountToWait)); }; HWTEST_F(CommandQueueSynchronizeTest, givenSinglePartitionCountWhenWaitFunctionFailsThenReturnNotReady) { auto csr = std::unique_ptr>(new TestCmdQueueCsr(*device->getNEODevice()->getExecutionEnvironment(), device->getNEODevice()->getDeviceBitfield())); csr->setupContext(*device->getNEODevice()->getDefaultEngine().osContext); const ze_command_queue_desc_t desc{}; ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, csr.get(), &desc, false, false, returnValue)); EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS); ASSERT_NE(nullptr, commandQueue); uint64_t timeout = std::numeric_limits::max(); returnValue = commandQueue->synchronize(timeout); EXPECT_EQ(returnValue, ZE_RESULT_NOT_READY); commandQueue->destroy(); EXPECT_EQ(1u, csr->waitForCompletionWithTimeoutCalled); } using CommandQueuePowerHintTest = Test; HWTEST_F(CommandQueuePowerHintTest, givenDriverHandleWithPowerHintAndOsContextPowerHintUnsetThenSuccessIsReturned) { auto csr = std::unique_ptr>(new TestCmdQueueCsr(*device->getNEODevice()->getExecutionEnvironment(), device->getNEODevice()->getDeviceBitfield())); csr->setupContext(*device->getNEODevice()->getDefaultEngine().osContext); DriverHandleImp *driverHandleImp = static_cast(device->getDriverHandle()); driverHandleImp->powerHint = 1; const ze_command_queue_desc_t desc{}; ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, csr.get(), &desc, false, false, returnValue)); EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS); ASSERT_NE(nullptr, commandQueue); commandQueue->destroy(); } HWTEST_F(CommandQueuePowerHintTest, givenDriverHandleWithPowerHintAndOsContextPowerHintAlreadySetThenSuccessIsReturned) { auto csr = std::unique_ptr>(new TestCmdQueueCsr(*device->getNEODevice()->getExecutionEnvironment(), device->getNEODevice()->getDeviceBitfield())); csr->setupContext(*device->getNEODevice()->getDefaultEngine().osContext); DriverHandleImp *driverHandleImp = static_cast(device->getDriverHandle()); driverHandleImp->powerHint = 1; auto &osContext = csr->getOsContext(); osContext.setUmdPowerHintValue(1); const ze_command_queue_desc_t desc{}; ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, csr.get(), &desc, false, false, returnValue)); EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS); ASSERT_NE(nullptr, commandQueue); commandQueue->destroy(); } struct MemoryManagerCommandQueueCreateNegativeTest : public NEO::MockMemoryManager { MemoryManagerCommandQueueCreateNegativeTest(NEO::ExecutionEnvironment &executionEnvironment) : NEO::MockMemoryManager(const_cast(executionEnvironment)) {} NEO::GraphicsAllocation *allocateGraphicsMemoryWithProperties(const NEO::AllocationProperties &properties) override { if (forceFailureInPrimaryAllocation) { return nullptr; } return NEO::MemoryManager::allocateGraphicsMemoryWithProperties(properties); } bool forceFailureInPrimaryAllocation = false; }; struct CommandQueueCreateNegativeTest : public ::testing::Test { void SetUp() override { executionEnvironment = new NEO::ExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(numRootDevices); for (uint32_t i = 0; i < numRootDevices; i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(NEO::defaultHwInfo.get()); } memoryManager = new MemoryManagerCommandQueueCreateNegativeTest(*executionEnvironment); executionEnvironment->memoryManager.reset(memoryManager); std::vector> devices; for (uint32_t i = 0; i < numRootDevices; i++) { neoDevice = NEO::MockDevice::create(executionEnvironment, i); devices.push_back(std::unique_ptr(neoDevice)); } driverHandle = std::make_unique>(); driverHandle->initialize(std::move(devices)); device = driverHandle->devices[0]; } void TearDown() override { } NEO::ExecutionEnvironment *executionEnvironment = nullptr; std::unique_ptr> driverHandle; NEO::MockDevice *neoDevice = nullptr; L0::Device *device = nullptr; MemoryManagerCommandQueueCreateNegativeTest *memoryManager = nullptr; const uint32_t numRootDevices = 1u; }; TEST_F(CommandQueueCreateNegativeTest, whenDeviceAllocationFailsDuringCommandQueueCreateThenAppropriateValueIsReturned) { const ze_command_queue_desc_t desc = {}; auto csr = std::unique_ptr(neoDevice->createCommandStreamReceiver()); csr->setupContext(*neoDevice->getDefaultEngine().osContext); memoryManager->forceFailureInPrimaryAllocation = true; ze_result_t returnValue; L0::CommandQueue *commandQueue = CommandQueue::create(productFamily, device, csr.get(), &desc, false, false, returnValue); EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY, returnValue); ASSERT_EQ(nullptr, commandQueue); } struct CommandQueueInitTests : public ::testing::Test { class MyMemoryManager : public OsAgnosticMemoryManager { public: using OsAgnosticMemoryManager::OsAgnosticMemoryManager; NEO::GraphicsAllocation *allocateGraphicsMemoryWithProperties(const AllocationProperties &properties) override { storedAllocationProperties.push_back(properties); return OsAgnosticMemoryManager::allocateGraphicsMemoryWithProperties(properties); } std::vector storedAllocationProperties; }; void SetUp() override { DebugManager.flags.CreateMultipleSubDevices.set(numSubDevices); auto executionEnvironment = new NEO::ExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(numRootDevices); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(NEO::defaultHwInfo.get()); memoryManager = new MyMemoryManager(*executionEnvironment); executionEnvironment->memoryManager.reset(memoryManager); neoDevice = NEO::MockDevice::create(executionEnvironment, 0); std::vector> devices; devices.push_back(std::unique_ptr(neoDevice)); driverHandle = std::make_unique>(); driverHandle->initialize(std::move(devices)); device = driverHandle->devices[0]; } VariableBackup mockDeviceFlagBackup{&NEO::MockDevice::createSingleDevice, false}; DebugManagerStateRestore restore; NEO::MockDevice *neoDevice = nullptr; std::unique_ptr> driverHandle; L0::Device *device = nullptr; MyMemoryManager *memoryManager = nullptr; const uint32_t numRootDevices = 1; const uint32_t numSubDevices = 4; }; TEST_F(CommandQueueInitTests, givenMultipleSubDevicesWhenInitializingThenAllocateForAllSubDevices) { ze_command_queue_desc_t desc = {}; auto csr = std::unique_ptr(neoDevice->createCommandStreamReceiver()); csr->setupContext(*neoDevice->getDefaultEngine().osContext); ze_result_t returnValue; L0::CommandQueue *commandQueue = CommandQueue::create(productFamily, device, csr.get(), &desc, false, false, returnValue); EXPECT_NE(nullptr, commandQueue); const uint64_t expectedBitfield = maxNBitValue(numSubDevices); uint32_t cmdBufferAllocationsFound = 0; for (auto &allocationProperties : memoryManager->storedAllocationProperties) { if (allocationProperties.allocationType == NEO::AllocationType::COMMAND_BUFFER) { cmdBufferAllocationsFound++; EXPECT_EQ(expectedBitfield, allocationProperties.subDevicesBitfield.to_ulong()); EXPECT_EQ(1u, allocationProperties.flags.multiOsContextCapable); } } EXPECT_EQ(static_cast(CommandQueueImp::CommandBufferManager::BUFFER_ALLOCATION::COUNT), cmdBufferAllocationsFound); commandQueue->destroy(); } TEST_F(CommandQueueInitTests, whenDestroyCommandQueueThenStoreCommandBuffersAsReusableAllocations) { ze_command_queue_desc_t desc = {}; auto csr = std::unique_ptr(neoDevice->createCommandStreamReceiver()); csr->setupContext(*neoDevice->getDefaultEngine().osContext); ze_result_t returnValue; L0::CommandQueue *commandQueue = CommandQueue::create(productFamily, device, csr.get(), &desc, false, false, returnValue); EXPECT_NE(nullptr, commandQueue); auto deviceImp = static_cast(device); EXPECT_TRUE(deviceImp->allocationsForReuse->peekIsEmpty()); commandQueue->destroy(); EXPECT_FALSE(deviceImp->allocationsForReuse->peekIsEmpty()); } struct DeviceWithDualStorage : Test { void SetUp() override { NEO::MockCompilerEnableGuard mock(true); DebugManager.flags.EnableLocalMemory.set(1); DebugManager.flags.AllocateSharedAllocationsWithCpuAndGpuStorage.set(1); DeviceFixture::SetUp(); } void TearDown() override { DeviceFixture::TearDown(); } DebugManagerStateRestore restorer; }; HWTEST2_F(DeviceWithDualStorage, givenCmdListWithAppendedKernelAndUsmTransferAndBlitterDisabledWhenExecuteCmdListThenCfeStateOnceProgrammed, IsAtLeastXeHpCore) { using CFE_STATE = typename FamilyType::CFE_STATE; neoDevice->executionEnvironment->rootDeviceEnvironments[0]->memoryOperationsInterface = std::make_unique(); ze_result_t res = ZE_RESULT_SUCCESS; const ze_command_queue_desc_t desc = {}; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, neoDevice->getInternalEngine().commandStreamReceiver, &desc, false, false, res)); EXPECT_EQ(ZE_RESULT_SUCCESS, res); ASSERT_NE(nullptr, commandQueue); auto commandList = std::unique_ptr(whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, res))); EXPECT_EQ(ZE_RESULT_SUCCESS, res); ASSERT_NE(nullptr, commandList); Mock kernel; kernel.immutableData.device = device; size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_host_mem_alloc_desc_t hostDesc = {}; res = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto gpuAlloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(ptr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); kernel.residencyContainer.push_back(gpuAlloc); ze_group_count_t dispatchFunctionArguments{1, 1, 1}; commandList->appendLaunchKernel(kernel.toHandle(), &dispatchFunctionArguments, nullptr, 0, nullptr); auto deviceImp = static_cast(device); auto pageFaultCmdQueue = whitebox_cast(deviceImp->pageFaultCommandList->cmdQImmediate); auto sizeBefore = commandQueue->commandStream->getUsed(); auto pageFaultSizeBefore = pageFaultCmdQueue->commandStream->getUsed(); auto handle = commandList->toHandle(); commandQueue->executeCommandLists(1, &handle, nullptr, true); auto sizeAfter = commandQueue->commandStream->getUsed(); auto pageFaultSizeAfter = pageFaultCmdQueue->commandStream->getUsed(); EXPECT_LT(sizeBefore, sizeAfter); EXPECT_LT(pageFaultSizeBefore, pageFaultSizeAfter); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), sizeAfter); auto count = findAll(commands.begin(), commands.end()).size(); EXPECT_EQ(0u, count); CmdParse::parseCommandBuffer(commands, ptrOffset(pageFaultCmdQueue->commandStream->getCpuBase(), 0), pageFaultSizeAfter); count = findAll(commands.begin(), commands.end()).size(); EXPECT_EQ(1u, count); res = context->freeMem(ptr); ASSERT_EQ(ZE_RESULT_SUCCESS, res); commandQueue->destroy(); } using CommandQueueScratchTests = Test; using Platforms = IsAtLeastProduct; HWTEST2_F(CommandQueueScratchTests, givenCommandQueueWhenHandleScratchSpaceThenProperScratchSlotIsSetAndScratchAllocationReturned, Platforms) { class MockScratchSpaceControllerXeHPAndLater : public NEO::ScratchSpaceControllerXeHPAndLater { public: uint32_t scratchSlot = 0u; bool programHeapsCalled = false; NEO::GraphicsAllocation *scratchAllocation = nullptr; MockScratchSpaceControllerXeHPAndLater(uint32_t rootDeviceIndex, NEO::ExecutionEnvironment &environment, InternalAllocationStorage &allocationStorage) : NEO::ScratchSpaceControllerXeHPAndLater(rootDeviceIndex, environment, allocationStorage) {} void programHeaps(HeapContainer &heapContainer, uint32_t scratchSlot, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, uint32_t currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty) override { this->scratchSlot = scratchSlot; programHeapsCalled = true; } NEO::GraphicsAllocation *getScratchSpaceAllocation() override { return scratchAllocation; } protected: }; MockCommandStreamReceiver csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield()); csr.initializeTagAllocation(); csr.setupContext(*neoDevice->getDefaultEngine().osContext); NEO::ExecutionEnvironment *execEnv = static_cast(device->getExecEnvironment()); std::unique_ptr scratchController = std::make_unique(device->getRootDeviceIndex(), *execEnv, *csr.getInternalAllocationStorage()); const ze_command_queue_desc_t desc = {}; std::unique_ptr commandQueue = std::make_unique>(device, &csr, &desc); auto commandQueueHw = static_cast *>(commandQueue.get()); NEO::ResidencyContainer residencyContainer; NEO::HeapContainer heapContainer; void *surfaceHeap = alignedMalloc(0x1000, 0x1000); NEO::GraphicsAllocation graphicsAllocationHeap(0, NEO::AllocationType::BUFFER, surfaceHeap, 0u, 0u, 1u, MemoryPool::System4KBPages, 1u); heapContainer.push_back(&graphicsAllocationHeap); bool gsbaStateDirty = false; bool frontEndStateDirty = false; NEO::GraphicsAllocation graphicsAllocation(1u, NEO::AllocationType::BUFFER, nullptr, 0u, 0u, 0u, MemoryPool::System4KBPages, 0u); auto scratch = static_cast(scratchController.get()); scratch->scratchAllocation = &graphicsAllocation; commandQueueHw->handleScratchSpace(heapContainer, scratchController.get(), gsbaStateDirty, frontEndStateDirty, 0x1000, 0u); EXPECT_TRUE(scratch->programHeapsCalled); EXPECT_GT(csr.makeResidentCalledTimes, 0u); alignedFree(surfaceHeap); } HWTEST2_F(CommandQueueScratchTests, givenCommandQueueWhenHandleScratchSpaceAndHeapContainerIsZeroSizeThenNoFunctionIsCalled, Platforms) { class MockScratchSpaceControllerXeHPAndLater : public NEO::ScratchSpaceControllerXeHPAndLater { public: using NEO::ScratchSpaceControllerXeHPAndLater::scratchAllocation; bool programHeapsCalled = false; MockScratchSpaceControllerXeHPAndLater(uint32_t rootDeviceIndex, NEO::ExecutionEnvironment &environment, InternalAllocationStorage &allocationStorage) : NEO::ScratchSpaceControllerXeHPAndLater(rootDeviceIndex, environment, allocationStorage) {} void programHeaps(HeapContainer &heapContainer, uint32_t scratchSlot, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, uint32_t currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty) override { programHeapsCalled = true; } protected: }; MockCommandStreamReceiver csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield()); csr.initializeTagAllocation(); csr.setupContext(*neoDevice->getDefaultEngine().osContext); NEO::ExecutionEnvironment *execEnv = static_cast(device->getExecEnvironment()); std::unique_ptr scratchController = std::make_unique(device->getRootDeviceIndex(), *execEnv, *csr.getInternalAllocationStorage()); const ze_command_queue_desc_t desc = {}; std::unique_ptr commandQueue = std::make_unique>(device, &csr, &desc); auto commandQueueHw = static_cast *>(commandQueue.get()); NEO::ResidencyContainer residencyContainer; NEO::HeapContainer heapContainer; bool gsbaStateDirty = false; bool frontEndStateDirty = false; NEO::GraphicsAllocation graphicsAllocation(1u, NEO::AllocationType::BUFFER, nullptr, 0u, 0u, 0u, MemoryPool::System4KBPages, 0u); auto scratch = static_cast(scratchController.get()); scratch->scratchAllocation = &graphicsAllocation; commandQueueHw->handleScratchSpace(heapContainer, scratchController.get(), gsbaStateDirty, frontEndStateDirty, 0x1000, 0u); EXPECT_FALSE(scratch->programHeapsCalled); scratch->scratchAllocation = nullptr; } HWTEST2_F(CommandQueueScratchTests, givenCommandQueueWhenBindlessEnabledThenHandleScratchSpaceCallsProgramBindlessSurfaceStateForScratch, Platforms) { DebugManagerStateRestore restorer; DebugManager.flags.UseBindlessMode.set(1); class MockScratchSpaceControllerXeHPAndLater : public NEO::ScratchSpaceControllerXeHPAndLater { public: bool programHeapsCalled = false; NEO::MockGraphicsAllocation alloc; MockScratchSpaceControllerXeHPAndLater(uint32_t rootDeviceIndex, NEO::ExecutionEnvironment &environment, InternalAllocationStorage &allocationStorage) : NEO::ScratchSpaceControllerXeHPAndLater(rootDeviceIndex, environment, allocationStorage) {} void programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, uint32_t currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty, NEO::CommandStreamReceiver *csr) override { programHeapsCalled = true; } NEO::GraphicsAllocation *getScratchSpaceAllocation() override { return &alloc; } protected: }; MockCsrHw2 csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield()); csr.initializeTagAllocation(); csr.setupContext(*neoDevice->getDefaultEngine().osContext); NEO::ExecutionEnvironment *execEnv = static_cast(device->getExecEnvironment()); std::unique_ptr scratchController = std::make_unique(device->getRootDeviceIndex(), *execEnv, *csr.getInternalAllocationStorage()); const ze_command_queue_desc_t desc = {}; std::unique_ptr commandQueue = std::make_unique>(device, &csr, &desc); auto commandQueueHw = static_cast *>(commandQueue.get()); bool gsbaStateDirty = false; bool frontEndStateDirty = false; NEO::ResidencyContainer residency; NEO::HeapContainer heapContainer; // scratch part commandQueueHw->handleScratchSpace(heapContainer, scratchController.get(), gsbaStateDirty, frontEndStateDirty, 0x1000, 0u); EXPECT_TRUE(static_cast(scratchController.get())->programHeapsCalled); // private part static_cast(scratchController.get())->programHeapsCalled = false; commandQueueHw->handleScratchSpace(heapContainer, scratchController.get(), gsbaStateDirty, frontEndStateDirty, 0x0, 0x1000); EXPECT_TRUE(static_cast(scratchController.get())->programHeapsCalled); } HWTEST2_F(CommandQueueScratchTests, whenPatchCommandsIsCalledThenCommandsAreCorrectlyPatched, IsAtLeastXeHpCore) { using CFE_STATE = typename FamilyType::CFE_STATE; ze_command_queue_desc_t desc = {}; NEO::CommandStreamReceiver *csr = nullptr; device->getCsrForOrdinalAndIndex(&csr, 0u, 0u); auto commandQueue = std::make_unique>(device, csr, &desc); auto commandList = std::make_unique>>(); EXPECT_NO_THROW(commandQueue->patchCommands(*commandList, 0)); commandList->commandsToPatch.push_back({}); EXPECT_ANY_THROW(commandQueue->patchCommands(*commandList, 0)); commandList->commandsToPatch.clear(); CFE_STATE destinationCfeStates[4]; int32_t initialScratchAddress = 0x123400; for (size_t i = 0; i < 4; i++) { auto sourceCfeState = new CFE_STATE; *sourceCfeState = FamilyType::cmdInitCfeState; if constexpr (TestTraits::numberOfWalkersInCfeStateSupported) { sourceCfeState->setNumberOfWalkers(2); } sourceCfeState->setMaximumNumberOfThreads(16); sourceCfeState->setScratchSpaceBuffer(initialScratchAddress); destinationCfeStates[i] = FamilyType::cmdInitCfeState; if constexpr (TestTraits::numberOfWalkersInCfeStateSupported) { EXPECT_NE(destinationCfeStates[i].getNumberOfWalkers(), sourceCfeState->getNumberOfWalkers()); } EXPECT_NE(destinationCfeStates[i].getMaximumNumberOfThreads(), sourceCfeState->getMaximumNumberOfThreads()); CommandList::CommandToPatch commandToPatch; commandToPatch.pDestination = &destinationCfeStates[i]; commandToPatch.pCommand = sourceCfeState; commandToPatch.type = CommandList::CommandToPatch::CommandType::FrontEndState; commandList->commandsToPatch.push_back(commandToPatch); } uint64_t patchedScratchAddress = 0xABCD00; commandQueue->patchCommands(*commandList, patchedScratchAddress); for (size_t i = 0; i < 4; i++) { EXPECT_EQ(patchedScratchAddress, destinationCfeStates[i].getScratchSpaceBuffer()); auto &sourceCfeState = *reinterpret_cast(commandList->commandsToPatch[i].pCommand); if constexpr (TestTraits::numberOfWalkersInCfeStateSupported) { EXPECT_EQ(destinationCfeStates[i].getNumberOfWalkers(), sourceCfeState.getNumberOfWalkers()); } EXPECT_EQ(destinationCfeStates[i].getMaximumNumberOfThreads(), sourceCfeState.getMaximumNumberOfThreads()); EXPECT_EQ(destinationCfeStates[i].getScratchSpaceBuffer(), sourceCfeState.getScratchSpaceBuffer()); } } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_dg2.cpp000066400000000000000000000027131422164147700332350ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h" namespace L0 { namespace ult { using CommandQueueTestDG2 = Test; HWTEST2_F(CommandQueueTestDG2, givenBindlessEnabledWhenEstimateStateBaseAddressCmdSizeCalledOnDG2ThenReturnedSizeOf2SBAAndPCAnd3DBindingTablePoolPool, IsDG2) { DebugManagerStateRestore restorer; DebugManager.flags.UseBindlessMode.set(1); using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS; using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; using _3DSTATE_BINDING_TABLE_POOL_ALLOC = typename GfxFamily::_3DSTATE_BINDING_TABLE_POOL_ALLOC; ze_command_queue_desc_t desc = {}; auto csr = std::unique_ptr(neoDevice->createCommandStreamReceiver()); auto commandQueue = std::make_unique>(device, csr.get(), &desc); auto size = commandQueue->estimateStateBaseAddressCmdSize(); auto expectedSize = 2 * sizeof(STATE_BASE_ADDRESS) + sizeof(PIPE_CONTROL) + sizeof(_3DSTATE_BINDING_TABLE_POOL_ALLOC); EXPECT_EQ(size, expectedSize); } } // namespace ult } // namespace L0 test_cmdqueue_enqueue_cmdlist.cpp000066400000000000000000001431311422164147700356700ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/cmdqueue/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/implicit_scaling.h" #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/utilities/software_tags_manager.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/fence/fence.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h" #include "level_zero/core/test/unit_tests/mocks/mock_fence.h" #include "level_zero/core/test/unit_tests/mocks/mock_kernel.h" #include "level_zero/core/test/unit_tests/mocks/mock_module.h" namespace L0 { namespace ult { struct CommandQueueExecuteCommandLists : public Test { void SetUp() override { DeviceFixture::SetUp(); ze_result_t returnValue; commandLists[0] = CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle(); ASSERT_NE(nullptr, commandLists[0]); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); commandLists[1] = CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle(); ASSERT_NE(nullptr, commandLists[1]); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); } void TearDown() override { for (auto i = 0u; i < numCommandLists; i++) { auto commandList = CommandList::fromHandle(commandLists[i]); commandList->destroy(); } DeviceFixture::TearDown(); } template void twoCommandListCommandPreemptionTest(bool preemptionCmdProgramming); const static uint32_t numCommandLists = 2; ze_command_list_handle_t commandLists[numCommandLists]; }; struct MultiDeviceCommandQueueExecuteCommandLists : public Test { void SetUp() override { DebugManager.flags.EnableWalkerPartition.set(1); numRootDevices = 1u; MultiDeviceFixture::SetUp(); uint32_t deviceCount = 1; ze_device_handle_t deviceHandle; driverHandle->getDevice(&deviceCount, &deviceHandle); device = Device::fromHandle(deviceHandle); ASSERT_NE(nullptr, device); ze_result_t returnValue; commandLists[0] = CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle(); ASSERT_NE(nullptr, commandLists[0]); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); EXPECT_EQ(2u, CommandList::fromHandle(commandLists[0])->partitionCount); commandLists[1] = CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle(); ASSERT_NE(nullptr, commandLists[1]); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); EXPECT_EQ(2u, CommandList::fromHandle(commandLists[1])->partitionCount); } void TearDown() override { for (auto i = 0u; i < numCommandLists; i++) { auto commandList = CommandList::fromHandle(commandLists[i]); commandList->destroy(); } MultiDeviceFixture::TearDown(); } L0::Device *device = nullptr; const static uint32_t numCommandLists = 2; ze_command_list_handle_t commandLists[numCommandLists]; }; HWTEST_F(CommandQueueExecuteCommandLists, whenACommandListExecutedRequiresUncachedMOCSThenSuccessisReturned) { using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; using PARSE = typename FamilyType::PARSE; const ze_command_queue_desc_t desc{}; ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc, false, false, returnValue)); ASSERT_NE(nullptr, commandQueue->commandStream); auto commandList1 = whitebox_cast(CommandList::fromHandle(commandLists[0])); auto commandList2 = whitebox_cast(CommandList::fromHandle(commandLists[1])); commandList1->requiresQueueUncachedMocs = true; commandList2->requiresQueueUncachedMocs = true; auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); commandQueue->destroy(); } HWTEST_F(CommandQueueExecuteCommandLists, givenCommandListThatRequiresDisabledEUFusionWhenExecutingCommandListsThenCommandQueueHasProperStreamProperties) { struct WhiteBoxCommandList : public L0::CommandList { using CommandList::CommandList; using CommandList::requiredStreamState; }; const ze_command_queue_desc_t desc{}; ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc, false, false, returnValue)); ASSERT_NE(nullptr, commandQueue->commandStream); auto commandList1 = static_cast(CommandList::fromHandle(commandLists[0])); commandList1->requiredStreamState.frontEndState.disableEUFusion.set(true); auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(1, commandQueue->getCsr()->getStreamProperties().frontEndState.disableEUFusion.value); commandQueue->destroy(); } HWTEST_F(CommandQueueExecuteCommandLists, whenASecondLevelBatchBufferPerCommandListAddedThenProperSizeExpected) { using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; using PARSE = typename FamilyType::PARSE; const ze_command_queue_desc_t desc{}; ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc, false, false, returnValue)); ASSERT_NE(nullptr, commandQueue->commandStream); auto usedSpaceBefore = commandQueue->commandStream->getUsed(); auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandQueue->commandStream->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter)); auto itorCurrent = cmdList.begin(); for (auto i = 0u; i < numCommandLists; i++) { auto commandList = CommandList::fromHandle(commandLists[i]); auto allocation = commandList->commandContainer.getCmdBufferAllocations()[0]; itorCurrent = find(itorCurrent, cmdList.end()); ASSERT_NE(cmdList.end(), itorCurrent); auto bbs = genCmdCast(*itorCurrent++); ASSERT_NE(nullptr, bbs); EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbs->getSecondLevelBatchBuffer()); EXPECT_EQ(MI_BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR_PPGTT, bbs->getAddressSpaceIndicator()); EXPECT_EQ(allocation->getGpuAddress(), bbs->getBatchBufferStartAddress()); } auto itorBBE = find(itorCurrent, cmdList.end()); EXPECT_NE(cmdList.end(), itorBBE); commandQueue->destroy(); } HWTEST_F(CommandQueueExecuteCommandLists, givenFenceWhenExecutingCmdListThenFenceStatusIsCorrect) { const ze_command_queue_desc_t desc{}; ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc, false, false, returnValue)); ASSERT_NE(nullptr, commandQueue->commandStream); auto &csr = neoDevice->getUltCommandStreamReceiver(); *csr.tagAddress = 10; csr.taskCount = 10; ze_fence_desc_t fenceDesc{}; auto fence = whitebox_cast(Fence::create(commandQueue, &fenceDesc)); ASSERT_NE(nullptr, fence); EXPECT_EQ(ZE_RESULT_NOT_READY, fence->queryStatus()); auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, fence, true); *csr.tagAddress = 11; ASSERT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(*csr.tagAddress, fence->taskCount); EXPECT_EQ(ZE_RESULT_SUCCESS, fence->queryStatus()); //reset fence fence->assignTaskCountFromCsr(); EXPECT_EQ(ZE_RESULT_NOT_READY, fence->queryStatus()); fence->destroy(); commandQueue->destroy(); } HWTEST2_F(CommandQueueExecuteCommandLists, whenUsingFenceThenExpectEndingPipeControlUpdatingTagAllocation, IsGen9) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using POST_SYNC_OPERATION = typename FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION; using PARSE = typename FamilyType::PARSE; ze_command_queue_desc_t desc{}; ze_result_t returnValue; desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc, false, false, returnValue)); ASSERT_NE(nullptr, commandQueue->commandStream); ze_fence_desc_t fenceDesc{}; auto fence = whitebox_cast(Fence::create(commandQueue, &fenceDesc)); ASSERT_NE(nullptr, fence); ze_fence_handle_t fenceHandle = fence->toHandle(); auto usedSpaceBefore = commandQueue->commandStream->getUsed(); auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, fenceHandle, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandQueue->commandStream->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter)); auto pipeControls = findAll(cmdList.begin(), cmdList.end()); size_t pipeControlsPostSyncNumber = 0u; for (size_t i = 0; i < pipeControls.size(); i++) { auto pipeControl = reinterpret_cast(*pipeControls[i]); if (pipeControl->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { EXPECT_EQ(commandQueue->getCsr()->getTagAllocation()->getGpuAddress(), NEO::UnitTestHelper::getPipeControlPostSyncAddress(*pipeControl)); EXPECT_EQ(fence->taskCount, pipeControl->getImmediateData()); pipeControlsPostSyncNumber++; } } EXPECT_EQ(1u, pipeControlsPostSyncNumber); fence->destroy(); commandQueue->destroy(); } HWTEST_F(CommandQueueExecuteCommandLists, whenExecutingCommandListsThenEndingPipeControlCommandIsExpected) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using POST_SYNC_OPERATION = typename FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION; using PARSE = typename FamilyType::PARSE; const ze_command_queue_desc_t desc{}; ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc, false, false, returnValue)); ASSERT_NE(nullptr, commandQueue->commandStream); auto usedSpaceBefore = commandQueue->commandStream->getUsed(); auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandQueue->commandStream->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter)); // Pipe control w/ Post-sync operation should be the last command auto pipeControls = findAll(cmdList.begin(), cmdList.end()); // We require at least one PIPE_CONTROL ASSERT_LE(1u, pipeControls.size()); PIPE_CONTROL *taskCountToWriteCmd = genCmdCast(*pipeControls[pipeControls.size() - 1]); EXPECT_EQ(POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, taskCountToWriteCmd->getPostSyncOperation()); uint64_t taskCountToWrite = neoDevice->getDefaultEngine().commandStreamReceiver->peekTaskCount(); EXPECT_EQ(taskCountToWrite, taskCountToWriteCmd->getImmediateData()); commandQueue->destroy(); } using CommandQueueExecuteSupport = IsWithinProducts; HWTEST2_F(CommandQueueExecuteCommandLists, givenCommandQueueHaving2CommandListsThenMVSIsProgrammedWithMaxPTSS, CommandQueueExecuteSupport) { using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE; using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; using PARSE = typename FamilyType::PARSE; ze_command_queue_desc_t desc = {}; ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc, false, false, returnValue)); CommandList::fromHandle(commandLists[0])->setCommandListPerThreadScratchSize(512u); CommandList::fromHandle(commandLists[1])->setCommandListPerThreadScratchSize(1024u); ASSERT_NE(nullptr, commandQueue->commandStream); auto usedSpaceBefore = commandQueue->commandStream->getUsed(); auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(1024u, neoDevice->getDefaultEngine().commandStreamReceiver->getScratchSpaceController()->getPerThreadScratchSpaceSize()); auto usedSpaceAfter = commandQueue->commandStream->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter)); auto mediaVfeStates = findAll(cmdList.begin(), cmdList.end()); auto GSBAStates = findAll(cmdList.begin(), cmdList.end()); // We should have only 1 state added ASSERT_EQ(1u, mediaVfeStates.size()); ASSERT_EQ(1u, GSBAStates.size()); CommandList::fromHandle(commandLists[0])->reset(); CommandList::fromHandle(commandLists[1])->reset(); CommandList::fromHandle(commandLists[0])->setCommandListPerThreadScratchSize(2048u); CommandList::fromHandle(commandLists[1])->setCommandListPerThreadScratchSize(1024u); ASSERT_NE(nullptr, commandQueue->commandStream); usedSpaceBefore = commandQueue->commandStream->getUsed(); result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(2048u, neoDevice->getDefaultEngine().commandStreamReceiver->getScratchSpaceController()->getPerThreadScratchSpaceSize()); usedSpaceAfter = commandQueue->commandStream->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList1; ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList1, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter)); mediaVfeStates = findAll(cmdList1.begin(), cmdList1.end()); GSBAStates = findAll(cmdList1.begin(), cmdList1.end()); // We should have 2 states added ASSERT_EQ(2u, mediaVfeStates.size()); ASSERT_EQ(2u, GSBAStates.size()); commandQueue->destroy(); } HWTEST_F(CommandQueueExecuteCommandLists, givenMidThreadPreemptionWhenCommandsAreExecutedThenStateSipIsAdded) { using STATE_SIP = typename FamilyType::STATE_SIP; using PARSE = typename FamilyType::PARSE; ze_command_queue_desc_t desc{}; desc.ordinal = 0u; desc.index = 0u; desc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL; std::array testedInternalFlags = {true, false}; for (auto flagInternal : testedInternalFlags) { ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc, false, flagInternal, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); ASSERT_NE(nullptr, commandQueue->commandStream); auto usedSpaceBefore = commandQueue->commandStream->getUsed(); auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandQueue->commandStream->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter)); auto itorSip = find(cmdList.begin(), cmdList.end()); auto preemptionMode = neoDevice->getPreemptionMode(); if (preemptionMode == NEO::PreemptionMode::MidThread) { EXPECT_NE(cmdList.end(), itorSip); auto sipAllocation = SipKernel::getSipKernel(*neoDevice).getSipAllocation(); STATE_SIP *stateSipCmd = reinterpret_cast(*itorSip); EXPECT_EQ(sipAllocation->getGpuAddressToPatch(), stateSipCmd->getSystemInstructionPointer()); } else { EXPECT_EQ(cmdList.end(), itorSip); } commandQueue->destroy(); } } HWTEST2_F(CommandQueueExecuteCommandLists, givenMidThreadPreemptionWhenCommandsAreExecutedTwoTimesThenStateSipIsAddedOnlyTheFirstTime, IsAtLeastSkl) { using STATE_SIP = typename FamilyType::STATE_SIP; using PARSE = typename FamilyType::PARSE; ze_command_queue_desc_t desc{}; desc.ordinal = 0u; desc.index = 0u; desc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL; std::array testedInternalFlags = {true, false}; for (auto flagInternal : testedInternalFlags) { ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc, false, flagInternal, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); ASSERT_NE(nullptr, commandQueue->commandStream); auto usedSpaceBefore = commandQueue->commandStream->getUsed(); auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); result = commandQueue->synchronize(0); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandQueue->commandStream->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter)); auto itorSip = find(cmdList.begin(), cmdList.end()); auto preemptionMode = neoDevice->getPreemptionMode(); if (preemptionMode == NEO::PreemptionMode::MidThread) { EXPECT_NE(cmdList.end(), itorSip); auto sipAllocation = SipKernel::getSipKernel(*neoDevice).getSipAllocation(); STATE_SIP *stateSipCmd = reinterpret_cast(*itorSip); EXPECT_EQ(sipAllocation->getGpuAddressToPatch(), stateSipCmd->getSystemInstructionPointer()); } else { EXPECT_EQ(cmdList.end(), itorSip); } result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); result = commandQueue->synchronize(0); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfterSecondExec = commandQueue->commandStream->getUsed(); GenCmdList cmdList2; ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList2, ptrOffset(commandQueue->commandStream->getCpuBase(), usedSpaceAfter), usedSpaceAfterSecondExec)); itorSip = find(cmdList2.begin(), cmdList2.end()); EXPECT_EQ(cmdList2.end(), itorSip); // No preemption reprogramming auto secondExecMmioCount = countMmio(cmdList2.begin(), cmdList2.end(), 0x2580u); EXPECT_EQ(0u, secondExecMmioCount); commandQueue->destroy(); } } HWTEST2_F(CommandQueueExecuteCommandLists, givenCommandListsWithCooperativeAndNonCooperativeKernelsWhenExecuteCommandListsIsCalledThenErrorIsReturned, IsAtLeastSkl) { ze_command_queue_desc_t desc = {}; NEO::CommandStreamReceiver *csr; device->getCsrForOrdinalAndIndex(&csr, 0u, 0u); auto pCommandQueue = new MockCommandQueueHw{device, csr, &desc}; pCommandQueue->initialize(false, false); Mock<::L0::Kernel> kernel; auto pMockModule = std::unique_ptr(new Mock(device, nullptr)); kernel.module = pMockModule.get(); ze_group_count_t threadGroupDimensions{1, 1, 1}; auto pCommandListWithCooperativeKernels = std::make_unique>>(); pCommandListWithCooperativeKernels->initialize(device, NEO::EngineGroupType::Compute, 0u); pCommandListWithCooperativeKernels->appendLaunchKernelWithParams(&kernel, &threadGroupDimensions, nullptr, false, false, true); auto pCommandListWithNonCooperativeKernels = std::make_unique>>(); pCommandListWithNonCooperativeKernels->initialize(device, NEO::EngineGroupType::Compute, 0u); pCommandListWithNonCooperativeKernels->appendLaunchKernelWithParams(&kernel, &threadGroupDimensions, nullptr, false, false, false); { ze_command_list_handle_t commandLists[] = {pCommandListWithCooperativeKernels->toHandle(), pCommandListWithNonCooperativeKernels->toHandle()}; auto result = pCommandQueue->executeCommandLists(2, commandLists, nullptr, false); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE, result); } { ze_command_list_handle_t commandLists[] = {pCommandListWithNonCooperativeKernels->toHandle(), pCommandListWithCooperativeKernels->toHandle()}; auto result = pCommandQueue->executeCommandLists(2, commandLists, nullptr, false); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE, result); } DebugManagerStateRestore restorer; DebugManager.flags.AllowMixingRegularAndCooperativeKernels.set(1); { ze_command_list_handle_t commandLists[] = {pCommandListWithCooperativeKernels->toHandle(), pCommandListWithNonCooperativeKernels->toHandle()}; auto result = pCommandQueue->executeCommandLists(2, commandLists, nullptr, false); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } { ze_command_list_handle_t commandLists[] = {pCommandListWithNonCooperativeKernels->toHandle(), pCommandListWithCooperativeKernels->toHandle()}; auto result = pCommandQueue->executeCommandLists(2, commandLists, nullptr, false); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } pCommandQueue->destroy(); } HWTEST2_F(CommandQueueExecuteCommandLists, givenCommandListWithCooperativeKernelsWhenExecuteCommandListsIsCalledThenCorrectBatchBufferIsSubmitted, IsAtLeastXeHpCore) { struct MockCsr : NEO::CommandStreamReceiverHw { using NEO::CommandStreamReceiverHw::CommandStreamReceiverHw; NEO::SubmissionStatus submitBatchBuffer(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override { useSingleSubdeviceValue = batchBuffer.useSingleSubdevice; submitBatchBufferCalled++; return NEO::CommandStreamReceiver::submitBatchBuffer(batchBuffer, allocationsForResidency); } bool useSingleSubdeviceValue = false; uint32_t submitBatchBufferCalled = 0; }; NEO::UltDeviceFactory deviceFactory{1, 4}; auto pNeoDevice = deviceFactory.rootDevices[0]; ze_command_queue_desc_t desc = {}; MockCsr *pMockCsr = new MockCsr{*pNeoDevice->getExecutionEnvironment(), pNeoDevice->getRootDeviceIndex(), pNeoDevice->getDeviceBitfield()}; pNeoDevice->resetCommandStreamReceiver(pMockCsr); Mock device{pNeoDevice, pNeoDevice->getExecutionEnvironment()}; auto pCommandQueue = new MockCommandQueueHw{&device, pMockCsr, &desc}; pCommandQueue->initialize(false, false); Mock<::L0::Kernel> kernel; auto pMockModule = std::unique_ptr(new Mock(&device, nullptr)); kernel.module = pMockModule.get(); ze_group_count_t threadGroupDimensions{1, 1, 1}; auto pCommandListWithCooperativeKernels = std::make_unique>>(); pCommandListWithCooperativeKernels->initialize(&device, NEO::EngineGroupType::Compute, 0u); pCommandListWithCooperativeKernels->appendLaunchKernelWithParams(&kernel, &threadGroupDimensions, nullptr, false, false, true); ze_command_list_handle_t commandListCooperative[] = {pCommandListWithCooperativeKernels->toHandle()}; auto result = pCommandQueue->executeCommandLists(1, commandListCooperative, nullptr, false); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(1u, pMockCsr->submitBatchBufferCalled); EXPECT_TRUE(pMockCsr->useSingleSubdeviceValue); auto pCommandListWithNonCooperativeKernels = std::make_unique>>(); pCommandListWithNonCooperativeKernels->initialize(&device, NEO::EngineGroupType::Compute, 0u); pCommandListWithNonCooperativeKernels->appendLaunchKernelWithParams(&kernel, &threadGroupDimensions, nullptr, false, false, false); ze_command_list_handle_t commandListNonCooperative[] = {pCommandListWithNonCooperativeKernels->toHandle()}; result = pCommandQueue->executeCommandLists(1, commandListNonCooperative, nullptr, false); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(2u, pMockCsr->submitBatchBufferCalled); EXPECT_FALSE(pMockCsr->useSingleSubdeviceValue); pCommandQueue->destroy(); } template void CommandQueueExecuteCommandLists::twoCommandListCommandPreemptionTest(bool preemptionCmdProgramming) { ze_command_queue_desc_t desc = {}; desc.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC; desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create( productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc, false, false, returnValue)); ASSERT_NE(nullptr, commandQueue->commandStream); commandQueue->preemptionCmdSyncProgramming = preemptionCmdProgramming; preemptionCmdProgramming = NEO::PreemptionHelper::getRequiredCmdStreamSize(NEO::PreemptionMode::ThreadGroup, NEO::PreemptionMode::Disabled) > 0u; auto usedSpaceBefore = commandQueue->commandStream->getUsed(); auto commandListDisabled = whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); commandListDisabled->commandListPreemptionMode = NEO::PreemptionMode::Disabled; auto commandListThreadGroup = whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); commandListThreadGroup->commandListPreemptionMode = NEO::PreemptionMode::ThreadGroup; ze_command_list_handle_t commandLists[] = {commandListDisabled->toHandle(), commandListThreadGroup->toHandle(), commandListDisabled->toHandle()}; uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]); auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); result = commandQueue->synchronize(0); ASSERT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(NEO::PreemptionMode::Disabled, commandQueue->commandQueuePreemptionMode); result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(NEO::PreemptionMode::Disabled, commandQueue->commandQueuePreemptionMode); auto usedSpaceAfter = commandQueue->commandStream->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, commandQueue->commandStream->getCpuBase(), usedSpaceAfter)); using STATE_SIP = typename FamilyType::STATE_SIP; using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using POST_SYNC_OPERATION = typename FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION; auto preemptionMode = neoDevice->getPreemptionMode(); GenCmdList::iterator itor = cmdList.begin(); GenCmdList::iterator itorStateSip = find(cmdList.begin(), cmdList.end()); if (preemptionMode == NEO::PreemptionMode::MidThread) { EXPECT_NE(itorStateSip, cmdList.end()); itor = itorStateSip; } else { EXPECT_EQ(itorStateSip, cmdList.end()); } MI_LOAD_REGISTER_IMM *lriCmd = nullptr; auto itorLri = find(itor, cmdList.end()); if (preemptionCmdProgramming) { EXPECT_NE(itorLri, cmdList.end()); //Initial cmdQ preemption lriCmd = static_cast(*itorLri); EXPECT_EQ(0x2580u, lriCmd->getRegisterOffset()); itor = itorLri; } else { EXPECT_EQ(itorLri, cmdList.end()); } uint32_t data = 0; //next should be BB_START to 1st Disabled preemption Cmd List auto itorBBStart = find(itor, cmdList.end()); EXPECT_NE(itorBBStart, cmdList.end()); itor = itorBBStart; itorLri = find(itor, cmdList.end()); if (preemptionCmdProgramming) { EXPECT_NE(itorLri, cmdList.end()); lriCmd = static_cast(*itorLri); EXPECT_EQ(0x2580u, lriCmd->getRegisterOffset()); data = (1 << 1) | (((1 << 1) | (1 << 2)) << 16); EXPECT_EQ(data, lriCmd->getDataDword()); //verify presence of sync PIPE_CONTROL just before LRI switching to thread-group if (commandQueue->preemptionCmdSyncProgramming) { auto itorPipeControl = find(itor, itorLri); EXPECT_NE(itorPipeControl, cmdList.end()); } itor = itorLri; } else { EXPECT_EQ(itorLri, cmdList.end()); } //start of thread-group command list itorBBStart = find(itor, cmdList.end()); EXPECT_NE(itorBBStart, cmdList.end()); itor = itorBBStart; itorLri = find(itor, cmdList.end()); if (preemptionCmdProgramming) { EXPECT_NE(itorLri, cmdList.end()); lriCmd = static_cast(*itorLri); EXPECT_EQ(0x2580u, lriCmd->getRegisterOffset()); data = (1 << 2) | (((1 << 1) | (1 << 2)) << 16); EXPECT_EQ(data, lriCmd->getDataDword()); //verify presence of sync PIPE_CONTROL just before LRI switching to thread-group if (commandQueue->preemptionCmdSyncProgramming) { auto itorPipeControl = find(itor, itorLri); EXPECT_NE(itorPipeControl, cmdList.end()); } itor = itorLri; } else { EXPECT_EQ(itorLri, cmdList.end()); } //start of thread-group command list itorBBStart = find(itor, cmdList.end()); EXPECT_NE(itorBBStart, cmdList.end()); itor = itorBBStart; // BB end auto itorBBEnd = find(itor, cmdList.end()); EXPECT_NE(itorBBStart, cmdList.end()); auto allStateSips = findAll(cmdList.begin(), cmdList.end()); if (preemptionMode == NEO::PreemptionMode::MidThread) { EXPECT_EQ(1u, allStateSips.size()); } else { EXPECT_EQ(0u, allStateSips.size()); } auto firstExecMmioCount = countMmio(cmdList.begin(), itorBBEnd, 0x2580u); size_t expectedMmioCount = preemptionCmdProgramming ? 4u : 0u; EXPECT_EQ(expectedMmioCount, firstExecMmioCount); // Count next MMIOs for preemption - only two should be present as last cmdlist from 1st exec // and first cmdlist from 2nd exec has the same mode - cmdQ state should remember it auto secondExecMmioCount = countMmio(itorBBEnd, cmdList.end(), 0x2580u); expectedMmioCount = preemptionCmdProgramming ? 2u : 0u; EXPECT_EQ(expectedMmioCount, secondExecMmioCount); commandListDisabled->destroy(); commandListThreadGroup->destroy(); commandQueue->destroy(); } HWTEST2_F(CommandQueueExecuteCommandLists, GivenCmdListsWithDifferentPreemptionModesWhenExecutingThenQueuePreemptionIsSwitchedAndStateSipProgrammedOnce, IsAtLeastSkl) { twoCommandListCommandPreemptionTest(false); } HWTEST2_F(CommandQueueExecuteCommandLists, GivenCmdListsWithDifferentPreemptionModesWhenNoCmdStreamPreemptionRequiredThenNoCmdStreamProgrammingAndStateSipProgrammedOnce, IsAtLeastSkl) { twoCommandListCommandPreemptionTest(true); } struct CommandQueueExecuteCommandListSWTagsTests : public Test { void SetUp() override { DebugManager.flags.EnableSWTags.set(true); DeviceFixture::SetUp(); ze_result_t returnValue; commandLists[0] = CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle(); ASSERT_NE(nullptr, commandLists[0]); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); ze_command_queue_desc_t desc = {}; commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc, false, false, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); ASSERT_NE(nullptr, commandQueue->commandStream); } void TearDown() override { commandQueue->destroy(); for (auto i = 0u; i < numCommandLists; i++) { auto commandList = CommandList::fromHandle(commandLists[i]); commandList->destroy(); } DeviceFixture::TearDown(); } DebugManagerStateRestore dbgRestorer; const static uint32_t numCommandLists = 1; ze_command_list_handle_t commandLists[numCommandLists]; L0::ult::CommandQueue *commandQueue; }; HWTEST_F(CommandQueueExecuteCommandListSWTagsTests, givenEnableSWTagsWhenExecutingCommandListThenHeapAddressesAreInserted) { using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; using PARSE = typename FamilyType::PARSE; auto usedSpaceBefore = commandQueue->commandStream->getUsed(); auto result = commandQueue->executeCommandLists(1, commandLists, nullptr, false); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandQueue->commandStream->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter)); auto sdis = findAll(cmdList.begin(), cmdList.end()); ASSERT_LE(2u, sdis.size()); auto dbgdocSDI = genCmdCast(*sdis[0]); auto dbgddiSDI = genCmdCast(*sdis[1]); EXPECT_EQ(dbgdocSDI->getAddress(), neoDevice->getRootDeviceEnvironment().tagsManager->getBXMLHeapAllocation()->getGpuAddress()); EXPECT_EQ(dbgddiSDI->getAddress(), neoDevice->getRootDeviceEnvironment().tagsManager->getSWTagHeapAllocation()->getGpuAddress()); } HWTEST_F(CommandQueueExecuteCommandListSWTagsTests, givenEnableSWTagsAndCommandListWithDifferentPreemtpionWhenExecutingCommandListThenPipeControlReasonTagIsInserted) { using MI_NOOP = typename FamilyType::MI_NOOP; using PARSE = typename FamilyType::PARSE; whitebox_cast(CommandList::fromHandle(commandLists[0]))->commandListPreemptionMode = PreemptionMode::Disabled; auto usedSpaceBefore = commandQueue->commandStream->getUsed(); auto result = commandQueue->executeCommandLists(1, commandLists, nullptr, false); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandQueue->commandStream->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter)); auto noops = findAll(cmdList.begin(), cmdList.end()); ASSERT_LE(2u, noops.size()); bool tagFound = false; for (auto it = noops.begin(); it != noops.end() && !tagFound; ++it) { auto noop = genCmdCast(*(*it)); if (NEO::SWTags::BaseTag::getMarkerNoopID(SWTags::OpCode::PipeControlReason) == noop->getIdentificationNumber() && noop->getIdentificationNumberRegisterWriteEnable() == true && ++it != noops.end()) { noop = genCmdCast(*(*it)); if (noop->getIdentificationNumber() & 1 << 21 && noop->getIdentificationNumberRegisterWriteEnable() == false) { tagFound = true; } } } EXPECT_TRUE(tagFound); } template void findPartitionRegister(GenCmdList &cmdList, bool expectToFind) { using MI_LOAD_REGISTER_MEM = typename GfxFamily::MI_LOAD_REGISTER_MEM; using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM; auto loadRegisterMemList = findAll(cmdList.begin(), cmdList.end()); bool wparidRegisterFound = false; for (size_t i = 0; i < loadRegisterMemList.size(); i++) { auto loadRegMem = reinterpret_cast(*loadRegisterMemList[i]); if (NEO::PartitionRegisters::wparidCCSOffset == loadRegMem->getRegisterAddress()) { wparidRegisterFound = true; } } auto loadRegisterImmList = findAll(cmdList.begin(), cmdList.end()); bool offsetRegisterFound = false; for (size_t i = 0; i < loadRegisterImmList.size(); i++) { auto loadRegImm = reinterpret_cast(*loadRegisterImmList[i]); if (NEO::PartitionRegisters::addressOffsetCCSOffset == loadRegImm->getRegisterOffset()) { offsetRegisterFound = true; } } if (expectToFind) { EXPECT_TRUE(wparidRegisterFound); EXPECT_TRUE(offsetRegisterFound); } else { EXPECT_FALSE(wparidRegisterFound); EXPECT_FALSE(offsetRegisterFound); } } HWTEST2_F(MultiDeviceCommandQueueExecuteCommandLists, givenMultiplePartitionCountWhenExecutingCmdListThenExpectMmioProgrammingAndCorrectEstimation, IsAtLeastXeHpCore) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using POST_SYNC_OPERATION = typename FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION; using PARSE = typename FamilyType::PARSE; auto neoDevice = device->getNEODevice(); auto csr = reinterpret_cast *>(neoDevice->getDefaultEngine().commandStreamReceiver); csr->useNotifyEnableForPostSync = true; ze_command_queue_desc_t desc{}; desc.ordinal = 0u; desc.index = 0u; desc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL; desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc, false, false, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); EXPECT_EQ(2u, commandQueue->partitionCount); ASSERT_NE(nullptr, commandQueue->commandStream); auto &commandStreamReceiver = device->getNEODevice()->getDefaultEngine().commandStreamReceiver; if (neoDevice->getPreemptionMode() == PreemptionMode::MidThread || neoDevice->isDebuggerActive()) { commandStreamReceiver->createPreemptionAllocation(); } ze_fence_desc_t fenceDesc{}; auto fence = whitebox_cast(Fence::create(commandQueue, &fenceDesc)); ASSERT_NE(nullptr, fence); ze_fence_handle_t fenceHandle = fence->toHandle(); //1st execute call initialized pipeline auto usedSpaceBefore = commandQueue->commandStream->getUsed(); auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, fenceHandle, true); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); auto usedSpaceAfter = commandQueue->commandStream->getUsed(); //1st call then initialize registers GenCmdList cmdList; ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), usedSpaceBefore), usedSpaceAfter)); findPartitionRegister(cmdList, true); usedSpaceBefore = commandQueue->commandStream->getUsed(); result = commandQueue->executeCommandLists(numCommandLists, commandLists, fenceHandle, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); usedSpaceAfter = commandQueue->commandStream->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); size_t cmdBufferSizeWithoutMmioProgramming = usedSpaceAfter - usedSpaceBefore; for (auto i = 0u; i < numCommandLists; i++) { auto commandList = CommandList::fromHandle(commandLists[i]); commandList->partitionCount = 2; } cmdList.clear(); ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), usedSpaceBefore), usedSpaceAfter)); findPartitionRegister(cmdList, false); usedSpaceBefore = commandQueue->commandStream->getUsed(); result = commandQueue->executeCommandLists(numCommandLists, commandLists, fenceHandle, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); usedSpaceAfter = commandQueue->commandStream->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); size_t cmdBufferSizeWithtMmioProgramming = usedSpaceAfter - usedSpaceBefore; size_t expectedSizeWithMmioProgramming = cmdBufferSizeWithoutMmioProgramming; EXPECT_GE(expectedSizeWithMmioProgramming, cmdBufferSizeWithtMmioProgramming); cmdList.clear(); ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), usedSpaceBefore), usedSpaceAfter)); findPartitionRegister(cmdList, false); auto pipeControlList = findAll(cmdList.begin(), cmdList.end()); uint32_t foundPostSyncPipeControl = 0u; for (size_t i = 0; i < pipeControlList.size(); i++) { auto pipeControl = reinterpret_cast(*pipeControlList[i]); if (pipeControl->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { EXPECT_TRUE(pipeControl->getWorkloadPartitionIdOffsetEnable()); foundPostSyncPipeControl++; EXPECT_TRUE(pipeControl->getNotifyEnable()); } } EXPECT_EQ(1u, foundPostSyncPipeControl); fence->destroy(); commandQueue->destroy(); } HWTEST_F(CommandQueueExecuteCommandLists, GivenCopyCommandQueueWhenExecutingCopyCommandListWithFenceThenExpectSingleCopyPostSyncCommand) { using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; using PARSE = typename FamilyType::PARSE; ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Copy, 0u, returnValue)); ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue); auto csr = reinterpret_cast *>(neoDevice->getDefaultEngine().commandStreamReceiver); csr->useNotifyEnableForPostSync = true; const ze_command_queue_desc_t desc{}; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc, true, false, returnValue)); ASSERT_NE(nullptr, commandQueue); ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue); ze_fence_desc_t fenceDesc{}; auto fence = whitebox_cast(Fence::create(commandQueue, &fenceDesc)); ASSERT_NE(nullptr, fence); ze_fence_handle_t fenceHandle = fence->toHandle(); zet_command_list_handle_t cmdListHandle = commandList->toHandle(); returnValue = commandQueue->executeCommandLists(1, &cmdListHandle, fenceHandle, false); ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue); size_t usedSpaceAfter = commandQueue->commandStream->getUsed(); GenCmdList cmdList; ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList, commandQueue->commandStream->getCpuBase(), usedSpaceAfter)); uint32_t foundPostSyncMiFlush = 0u; auto miFlushList = findAll(cmdList.begin(), cmdList.end()); for (auto cmdIt : miFlushList) { auto miFlush = reinterpret_cast(*cmdIt); if (miFlush->getPostSyncOperation() == MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA_QWORD) { foundPostSyncMiFlush++; EXPECT_TRUE(miFlush->getNotifyEnable()); } } EXPECT_EQ(1u, foundPostSyncMiFlush); fence->destroy(); commandQueue->destroy(); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/context/000077500000000000000000000000001422164147700267775ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/context/CMakeLists.txt000066400000000000000000000005041422164147700315360ustar00rootroot00000000000000# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_context.cpp ${CMAKE_CURRENT_SOURCE_DIR}/context_${DRIVER_MODEL}/test_context.cpp ) compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/context/context_drm/000077500000000000000000000000001422164147700313255ustar00rootroot00000000000000test_context.cpp000066400000000000000000000052571422164147700345060ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/context/context_drm/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/mocks/mock_command_stream_receiver.h" #include "shared/test/common/mocks/mock_compilers.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/mocks/mock_svm_manager.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/page_fault_manager/mock_cpu_page_fault_manager.h" #include "level_zero/core/source/context/context_imp.h" #include "level_zero/core/source/driver/driver_handle_imp.h" #include "level_zero/core/source/image/image.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/fixtures/host_pointer_manager_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_driver_handle.h" #include "gtest/gtest.h" namespace L0 { namespace ult { using ContextIsShareable = Test; TEST_F(ContextIsShareable, whenCallingisSharedMemoryThenCorrectResultIsReturned) { ze_context_handle_t hContext; ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; ze_result_t res = driverHandle->createContext(&desc, 0u, nullptr, &hContext); EXPECT_EQ(ZE_RESULT_SUCCESS, res); ContextImp *contextImp = static_cast(L0::Context::fromHandle(hContext)); bool exportableMemoryFalse = false; bool exportableMemoryTrue = true; EXPECT_EQ(exportableMemoryFalse, contextImp->isShareableMemory(nullptr, exportableMemoryFalse, neoDevice)); EXPECT_EQ(exportableMemoryTrue, contextImp->isShareableMemory(nullptr, exportableMemoryTrue, neoDevice)); res = contextImp->destroy(); EXPECT_EQ(ZE_RESULT_SUCCESS, res); } using GetMemHandlePtrTest = Test; TEST_F(GetMemHandlePtrTest, whenCallingGetMemHandlePtrWithValidHandleThenSuccessIsReturned) { MemoryManagerMemHandleMock *fixtureMemoryManager = static_cast(currMemoryManager); uint64_t handle = 57; // Test Successfully returning fd Handle fixtureMemoryManager->NTHandle = false; EXPECT_NE(nullptr, context->getMemHandlePtr(device, handle, 0)); } TEST_F(GetMemHandlePtrTest, whenCallingGetMemHandlePtrWithInvalidHandleThenNullptrIsReturned) { MemoryManagerMemHandleMock *fixtureMemoryManager = static_cast(currMemoryManager); uint64_t handle = 57; driverHandle->failHandleLookup = true; // Test Failing returning fd Handle fixtureMemoryManager->NTHandle = false; EXPECT_EQ(nullptr, context->getMemHandlePtr(device, handle, 0)); } } // namespace ult } // namespace L0compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/context/context_drm_or_wddm/000077500000000000000000000000001422164147700330405ustar00rootroot00000000000000test_context.cpp000066400000000000000000000121771422164147700362200ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/context/context_drm_or_wddm/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/mocks/mock_command_stream_receiver.h" #include "shared/test/common/mocks/mock_compilers.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/mocks/mock_svm_manager.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/page_fault_manager/mock_cpu_page_fault_manager.h" #include "level_zero/core/source/context/context_imp.h" #include "level_zero/core/source/driver/driver_handle_imp.h" #include "level_zero/core/source/image/image.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/fixtures/host_pointer_manager_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_driver_handle.h" #include "gtest/gtest.h" namespace L0 { namespace ult { using ContextIsShareable = Test; TEST_F(ContextIsShareable, whenCallingisSharedMemoryThenCorrectResultIsReturned) { ze_context_handle_t hContext; ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; ze_result_t res = driverHandle->createContext(&desc, 0u, nullptr, &hContext); EXPECT_EQ(ZE_RESULT_SUCCESS, res); ContextImp *contextImp = static_cast(L0::Context::fromHandle(hContext)); bool exportableMemoryFalse = false; bool exportableMemoryTrue = true; neoDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface.reset(new NEO::OSInterface()); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel(std::make_unique(512)); EXPECT_EQ(exportableMemoryFalse, contextImp->isShareableMemory(nullptr, exportableMemoryFalse, neoDevice)); EXPECT_EQ(exportableMemoryTrue, contextImp->isShareableMemory(nullptr, exportableMemoryTrue, neoDevice)); // exportDesc set && neoDevice is NOT WDDM EXPECT_EQ(exportableMemoryFalse, contextImp->isShareableMemory(&desc, exportableMemoryFalse, neoDevice)); // exportDesc unset && neoDevice is NOT WDDM EXPECT_EQ(exportableMemoryFalse, contextImp->isShareableMemory(nullptr, exportableMemoryFalse, neoDevice)); // exportDesc unset && neoDevice is WDDM neoDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel(std::make_unique(512)); EXPECT_EQ(exportableMemoryTrue, contextImp->isShareableMemory(nullptr, exportableMemoryFalse, neoDevice)); // exportDesc is set && Exportable Memory is False && neoDevice is WDDM EXPECT_EQ(exportableMemoryFalse, contextImp->isShareableMemory(&desc, exportableMemoryFalse, neoDevice)); res = contextImp->destroy(); EXPECT_EQ(ZE_RESULT_SUCCESS, res); } using GetMemHandlePtrTest = Test; TEST_F(GetMemHandlePtrTest, whenCallingGetMemHandlePtrWithValidNTHandleThenSuccessIsReturned) { MemoryManagerMemHandleMock *fixtureMemoryManager = static_cast(currMemoryManager); uint64_t handle = 57; // Test Successfully returning NT Handle fixtureMemoryManager->NTHandle = true; EXPECT_NE(nullptr, context->getMemHandlePtr(device, handle, 0)); } TEST_F(GetMemHandlePtrTest, whenCallingGetMemHandlePtrWithInvalidHandleThenNullptrIsReturned) { MemoryManagerMemHandleMock *fixtureMemoryManager = static_cast(currMemoryManager); uint64_t handle = 57; driverHandle->failHandleLookup = true; // Test Failing returning NT Handle fixtureMemoryManager->NTHandle = true; EXPECT_EQ(nullptr, context->getMemHandlePtr(device, handle, 0)); // Test Failing returning fd Handle fixtureMemoryManager->NTHandle = false; EXPECT_EQ(nullptr, context->getMemHandlePtr(device, handle, 0)); } TEST_F(GetMemHandlePtrTest, whenCallingGetMemHandlePtrWithDRMDriverTypeWithNonNTHandleThenSuccessIsReturned) { MemoryManagerMemHandleMock *fixtureMemoryManager = static_cast(currMemoryManager); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface.reset(new NEO::OSInterface()); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel(std::make_unique(512)); uint64_t handle = 57; // Test Successfully returning fd Handle fixtureMemoryManager->NTHandle = false; EXPECT_NE(nullptr, context->getMemHandlePtr(device, handle, 0)); } TEST_F(GetMemHandlePtrTest, whenCallingGetMemHandlePtrWithWDDMDriverTypeWithNonNTHandleThenNullPtrIsReturned) { MemoryManagerMemHandleMock *fixtureMemoryManager = static_cast(currMemoryManager); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface.reset(new NEO::OSInterface()); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel(std::make_unique(512)); uint64_t handle = 57; // Test Successfully returning fd Handle fixtureMemoryManager->NTHandle = false; EXPECT_EQ(nullptr, context->getMemHandlePtr(device, handle, 0)); } } // namespace ult } // namespace L0compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/context/context_wddm/000077500000000000000000000000001422164147700314765ustar00rootroot00000000000000test_context.cpp000066400000000000000000000052551422164147700346550ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/context/context_wddm/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/mocks/mock_command_stream_receiver.h" #include "shared/test/common/mocks/mock_compilers.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/mocks/mock_svm_manager.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/page_fault_manager/mock_cpu_page_fault_manager.h" #include "level_zero/core/source/context/context_imp.h" #include "level_zero/core/source/driver/driver_handle_imp.h" #include "level_zero/core/source/image/image.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/fixtures/host_pointer_manager_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_driver_handle.h" #include "gtest/gtest.h" namespace L0 { namespace ult { using ContextIsShareable = Test; TEST_F(ContextIsShareable, whenCallingisSharedMemoryThenCorrectResultIsReturned) { ze_context_handle_t hContext; ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; ze_result_t res = driverHandle->createContext(&desc, 0u, nullptr, &hContext); EXPECT_EQ(ZE_RESULT_SUCCESS, res); ContextImp *contextImp = static_cast(L0::Context::fromHandle(hContext)); bool exportableMemoryFalse = false; bool exportableMemoryTrue = true; EXPECT_EQ(exportableMemoryFalse, contextImp->isShareableMemory(nullptr, exportableMemoryFalse, neoDevice)); EXPECT_EQ(exportableMemoryTrue, contextImp->isShareableMemory(nullptr, exportableMemoryTrue, neoDevice)); res = contextImp->destroy(); EXPECT_EQ(ZE_RESULT_SUCCESS, res); } using GetMemHandlePtrTest = Test; TEST_F(GetMemHandlePtrTest, whenCallingGetMemHandlePtrWithValidHandleThenSuccessIsReturned) { MemoryManagerMemHandleMock *fixtureMemoryManager = static_cast(currMemoryManager); uint64_t handle = 57; // Test Successfully returning NT Handle fixtureMemoryManager->NTHandle = true; EXPECT_NE(nullptr, context->getMemHandlePtr(device, handle, 0)); } TEST_F(GetMemHandlePtrTest, whenCallingGetMemHandlePtrWithInvalidHandleThenNullptrIsReturned) { MemoryManagerMemHandleMock *fixtureMemoryManager = static_cast(currMemoryManager); uint64_t handle = 57; driverHandle->failHandleLookup = true; // Test Failing returning NT Handle fixtureMemoryManager->NTHandle = true; EXPECT_EQ(nullptr, context->getMemHandlePtr(device, handle, 0)); } } // namespace ult } // namespace L0compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/context/test_context.cpp000066400000000000000000001153241422164147700322340ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/mocks/mock_command_stream_receiver.h" #include "shared/test/common/mocks/mock_compilers.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/mocks/mock_svm_manager.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/page_fault_manager/mock_cpu_page_fault_manager.h" #include "level_zero/core/source/context/context_imp.h" #include "level_zero/core/source/driver/driver_handle_imp.h" #include "level_zero/core/source/image/image.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/fixtures/host_pointer_manager_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_driver_handle.h" #include "gtest/gtest.h" namespace L0 { namespace ult { using MultiDeviceContextTests = Test; TEST_F(MultiDeviceContextTests, whenCreatingContextWithZeroNumDevicesThenAllDevicesAreAssociatedWithTheContext) { ze_context_handle_t hContext; ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; ze_result_t res = driverHandle->createContext(&desc, 0u, nullptr, &hContext); EXPECT_EQ(ZE_RESULT_SUCCESS, res); ContextImp *contextImp = static_cast(Context::fromHandle(hContext)); for (size_t i = 0; i < driverHandle->devices.size(); i++) { EXPECT_NE(contextImp->getDevices().find(driverHandle->devices[i]->toHandle()), contextImp->getDevices().end()); } res = L0::Context::fromHandle(hContext)->destroy(); EXPECT_EQ(ZE_RESULT_SUCCESS, res); } TEST_F(MultiDeviceContextTests, whenCreatingContextWithNonZeroNumDevicesThenOnlySpecifiedDeviceAndItsSubDevicesAreAssociatedWithTheContext) { ze_context_handle_t hContext; ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; ze_device_handle_t device0 = driverHandle->devices[0]->toHandle(); DeviceImp *deviceImp0 = static_cast(device0); uint32_t subDeviceCount0 = 0; ze_result_t res = deviceImp0->getSubDevices(&subDeviceCount0, nullptr); EXPECT_EQ(res, ZE_RESULT_SUCCESS); EXPECT_EQ(subDeviceCount0, numSubDevices); std::vector subDevices0(subDeviceCount0); res = deviceImp0->getSubDevices(&subDeviceCount0, subDevices0.data()); EXPECT_EQ(res, ZE_RESULT_SUCCESS); ze_device_handle_t device1 = driverHandle->devices[1]->toHandle(); DeviceImp *deviceImp1 = static_cast(device1); uint32_t subDeviceCount1 = 0; res = deviceImp1->getSubDevices(&subDeviceCount1, nullptr); EXPECT_EQ(res, ZE_RESULT_SUCCESS); EXPECT_EQ(subDeviceCount1, numSubDevices); std::vector subDevices1(subDeviceCount1); res = deviceImp1->getSubDevices(&subDeviceCount1, subDevices1.data()); EXPECT_EQ(res, ZE_RESULT_SUCCESS); uint32_t subSubDeviceCount1 = 0; res = static_cast(subDevices1[0])->getSubDevices(&subSubDeviceCount1, nullptr); EXPECT_EQ(res, ZE_RESULT_SUCCESS); res = driverHandle->createContext(&desc, 1u, &device1, &hContext); EXPECT_EQ(ZE_RESULT_SUCCESS, res); ContextImp *contextImp = static_cast(Context::fromHandle(hContext)); uint32_t expectedDeviceCountInContext = 1 + subDeviceCount1 + (subDeviceCount1 * subSubDeviceCount1); EXPECT_EQ(contextImp->getDevices().size(), expectedDeviceCountInContext); EXPECT_FALSE(contextImp->isDeviceDefinedForThisContext(L0::Device::fromHandle(device0))); for (auto subDevice : subDevices0) { EXPECT_FALSE(contextImp->isDeviceDefinedForThisContext(L0::Device::fromHandle(subDevice))); } EXPECT_TRUE(contextImp->isDeviceDefinedForThisContext(L0::Device::fromHandle(device1))); for (auto subDevice : subDevices1) { auto l0SubDevice = static_cast(subDevice); EXPECT_TRUE(contextImp->isDeviceDefinedForThisContext(l0SubDevice)); for (auto &subSubDevice : l0SubDevice->subDevices) { EXPECT_TRUE(contextImp->isDeviceDefinedForThisContext(L0::Device::fromHandle(subSubDevice))); } } res = L0::Context::fromHandle(hContext)->destroy(); EXPECT_EQ(ZE_RESULT_SUCCESS, res); } TEST_F(MultiDeviceContextTests, whenAllocatingDeviceMemoryWithDeviceNotDefinedForContextThenDeviceLostIsReturned) { ze_context_handle_t hContext; ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; ze_device_handle_t device = driverHandle->devices[1]->toHandle(); ze_result_t res = driverHandle->createContext(&desc, 1u, &device, &hContext); EXPECT_EQ(ZE_RESULT_SUCCESS, res); ContextImp *contextImp = static_cast(Context::fromHandle(hContext)); ze_device_mem_alloc_desc_t deviceDesc = {}; size_t size = 4096; void *ptr = nullptr; res = contextImp->allocDeviceMem(driverHandle->devices[0]->toHandle(), &deviceDesc, size, 0u, &ptr); EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, res); res = L0::Context::fromHandle(hContext)->destroy(); EXPECT_EQ(ZE_RESULT_SUCCESS, res); } TEST_F(MultiDeviceContextTests, whenAllocatingSharedMemoryWithDeviceNotDefinedForContextThenDeviceLostIsReturned) { ze_context_handle_t hContext; ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; ze_device_handle_t device = driverHandle->devices[1]->toHandle(); ze_result_t res = driverHandle->createContext(&desc, 1u, &device, &hContext); EXPECT_EQ(ZE_RESULT_SUCCESS, res); ContextImp *contextImp = static_cast(Context::fromHandle(hContext)); ze_device_mem_alloc_desc_t deviceDesc = {}; ze_host_mem_alloc_desc_t hostDesc = {}; size_t size = 4096; void *ptr = nullptr; res = contextImp->allocSharedMem(driverHandle->devices[0]->toHandle(), &deviceDesc, &hostDesc, size, 0u, &ptr); EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, res); res = L0::Context::fromHandle(hContext)->destroy(); EXPECT_EQ(ZE_RESULT_SUCCESS, res); } struct SVMAllocsManagerContextMock : public NEO::SVMAllocsManager { SVMAllocsManagerContextMock(MemoryManager *memoryManager) : NEO::SVMAllocsManager(memoryManager, false) {} void *createHostUnifiedMemoryAllocation(size_t size, const UnifiedMemoryProperties &memoryProperties) override { EXPECT_EQ(expectedRootDeviceIndexes.size(), memoryProperties.rootDeviceIndices.size()); EXPECT_NE(memoryProperties.rootDeviceIndices.find(expectedRootDeviceIndexes[0]), memoryProperties.rootDeviceIndices.end()); EXPECT_NE(memoryProperties.rootDeviceIndices.find(expectedRootDeviceIndexes[1]), memoryProperties.rootDeviceIndices.end()); return NEO::SVMAllocsManager::createHostUnifiedMemoryAllocation(size, memoryProperties); } std::vector expectedRootDeviceIndexes; }; struct ContextHostAllocTests : public ::testing::Test { void SetUp() override { NEO::MockCompilerEnableGuard mock(true); DebugManager.flags.CreateMultipleRootDevices.set(numRootDevices); auto executionEnvironment = new NEO::ExecutionEnvironment; auto devices = NEO::DeviceFactory::createDevices(*executionEnvironment); driverHandle = std::make_unique(); ze_result_t res = driverHandle->initialize(std::move(devices)); EXPECT_EQ(ZE_RESULT_SUCCESS, res); prevSvmAllocsManager = driverHandle->svmAllocsManager; currSvmAllocsManager = new SVMAllocsManagerContextMock(driverHandle->memoryManager); driverHandle->svmAllocsManager = currSvmAllocsManager; zeDevices.resize(numberOfDevicesInContext); driverHandle->getDevice(&numberOfDevicesInContext, zeDevices.data()); for (uint32_t i = 0; i < numberOfDevicesInContext; i++) { L0::DeviceImp *deviceImp = static_cast(L0::Device::fromHandle(zeDevices[i])); currSvmAllocsManager->expectedRootDeviceIndexes.push_back(deviceImp->getRootDeviceIndex()); } } void TearDown() override { driverHandle->svmAllocsManager = prevSvmAllocsManager; delete currSvmAllocsManager; } DebugManagerStateRestore restorer; NEO::SVMAllocsManager *prevSvmAllocsManager; SVMAllocsManagerContextMock *currSvmAllocsManager; std::unique_ptr driverHandle; std::vector zeDevices; const uint32_t numRootDevices = 4u; uint32_t numberOfDevicesInContext = 2u; }; TEST_F(ContextHostAllocTests, whenAllocatingHostMemoryOnlyIndexesOfDevicesWithinTheContextAreUsed) { L0::ContextImp *context = nullptr; ze_context_handle_t hContext; ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; ze_result_t res = driverHandle->createContext(&desc, numberOfDevicesInContext, zeDevices.data(), &hContext); EXPECT_EQ(ZE_RESULT_SUCCESS, res); context = static_cast(Context::fromHandle(hContext)); void *hostPtr = nullptr; ze_host_mem_alloc_desc_t hostDesc = {}; size_t size = 1024; res = context->allocHostMem(&hostDesc, size, 0u, &hostPtr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_NE(nullptr, hostPtr); res = context->freeMem(hostPtr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); context->destroy(); } using ContextGetStatusTest = Test; TEST_F(ContextGetStatusTest, givenCallToContextGetStatusThenCorrectErrorCodeIsReturnedWhenResourcesHaveBeenReleased) { ze_context_handle_t hContext; ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; ze_result_t res = driverHandle->createContext(&desc, 0u, nullptr, &hContext); EXPECT_EQ(ZE_RESULT_SUCCESS, res); L0::Context *context = L0::Context::fromHandle(hContext); res = context->getStatus(); EXPECT_EQ(ZE_RESULT_SUCCESS, res); for (auto device : driverHandle->devices) { L0::DeviceImp *deviceImp = static_cast(device); deviceImp->releaseResources(); } res = context->getStatus(); EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, res); context->destroy(); } using ContextPowerSavingHintTest = Test; TEST_F(ContextPowerSavingHintTest, givenCallToContextCreateWithPowerHintDescThenPowerHintSetInDriverHandle) { ze_context_handle_t hContext; ze_context_desc_t ctxtDesc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC}; ze_context_power_saving_hint_exp_desc_t powerHintContext = {}; powerHintContext.stype = ZE_STRUCTURE_TYPE_POWER_SAVING_HINT_EXP_DESC; powerHintContext.hint = 1; ctxtDesc.pNext = &powerHintContext; ze_result_t res = driverHandle->createContext(&ctxtDesc, 0u, nullptr, &hContext); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_EQ(powerHintContext.hint, driverHandle->powerHint); L0::Context *context = L0::Context::fromHandle(hContext); context->destroy(); } TEST_F(ContextPowerSavingHintTest, givenCallToContextCreateWithPowerHintMinimumThenPowerHintSetInDriverHandle) { ze_context_handle_t hContext; ze_context_desc_t ctxtDesc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC}; ze_context_power_saving_hint_exp_desc_t powerHintContext = {}; powerHintContext.stype = ZE_STRUCTURE_TYPE_POWER_SAVING_HINT_EXP_DESC; powerHintContext.hint = ZE_POWER_SAVING_HINT_TYPE_MIN; ctxtDesc.pNext = &powerHintContext; ze_result_t res = driverHandle->createContext(&ctxtDesc, 0u, nullptr, &hContext); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_EQ(powerHintContext.hint, driverHandle->powerHint); L0::Context *context = L0::Context::fromHandle(hContext); context->destroy(); } TEST_F(ContextPowerSavingHintTest, givenCallToContextCreateWithPowerHintMaximumThenPowerHintSetInDriverHandle) { ze_context_handle_t hContext; ze_context_desc_t ctxtDesc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC}; ze_context_power_saving_hint_exp_desc_t powerHintContext = {}; powerHintContext.stype = ZE_STRUCTURE_TYPE_POWER_SAVING_HINT_EXP_DESC; powerHintContext.hint = ZE_POWER_SAVING_HINT_TYPE_MAX; ctxtDesc.pNext = &powerHintContext; ze_result_t res = driverHandle->createContext(&ctxtDesc, 0u, nullptr, &hContext); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_EQ(powerHintContext.hint, driverHandle->powerHint); L0::Context *context = L0::Context::fromHandle(hContext); context->destroy(); } TEST_F(ContextPowerSavingHintTest, givenCallToContextCreateWithPowerHintGreaterThanMaxHintThenErrorIsReturned) { ze_context_handle_t hContext; ze_context_desc_t ctxtDesc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; ze_context_power_saving_hint_exp_desc_t powerHintContext = {}; powerHintContext.stype = ZE_STRUCTURE_TYPE_POWER_SAVING_HINT_EXP_DESC; powerHintContext.hint = ZE_POWER_SAVING_HINT_TYPE_MAX + 1; ctxtDesc.pNext = &powerHintContext; ze_result_t res = driverHandle->createContext(&ctxtDesc, 0u, nullptr, &hContext); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ENUMERATION, res); } TEST_F(ContextPowerSavingHintTest, givenCallToContextCreateWithoutPowerHintDescThenPowerHintIsNotSetInDriverHandle) { ze_context_handle_t hContext; ze_context_desc_t ctxtDesc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; ze_scheduling_hint_exp_desc_t invalidExpContext = {}; invalidExpContext.stype = ZE_STRUCTURE_TYPE_SCHEDULING_HINT_EXP_DESC; ctxtDesc.pNext = &invalidExpContext; ze_result_t res = driverHandle->createContext(&ctxtDesc, 0u, nullptr, &hContext); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_EQ(0, driverHandle->powerHint); L0::Context *context = L0::Context::fromHandle(hContext); context->destroy(); } using ContextTest = Test; TEST_F(ContextTest, whenCreatingAndDestroyingContextThenSuccessIsReturned) { ze_context_handle_t hContext; ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; ze_result_t res = driverHandle->createContext(&desc, 0u, nullptr, &hContext); EXPECT_EQ(ZE_RESULT_SUCCESS, res); res = L0::Context::fromHandle(hContext)->destroy(); EXPECT_EQ(ZE_RESULT_SUCCESS, res); } using ContextMakeMemoryResidentTests = Test; TEST_F(ContextMakeMemoryResidentTests, givenUknownPointerPassedToMakeMemoryResidentThenInvalidArgumentIsReturned) { const size_t size = 4096; uint8_t *ptr = new uint8_t[size]; ze_result_t res = context->makeMemoryResident(device, ptr, size); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, res); delete[] ptr; } TEST_F(ContextMakeMemoryResidentTests, givenValidPointerPassedToMakeMemoryResidentThenSuccessIsReturned) { const size_t size = 4096; void *ptr = nullptr; ze_host_mem_alloc_desc_t hostDesc = {}; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t res = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, 0, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); mockMemoryInterface->makeResidentResult = NEO::MemoryOperationsStatus::SUCCESS; res = context->makeMemoryResident(device, ptr, size); EXPECT_EQ(ZE_RESULT_SUCCESS, res); mockMemoryInterface->evictResult = NEO::MemoryOperationsStatus::SUCCESS; res = context->evictMemory(device, ptr, size); EXPECT_EQ(ZE_RESULT_SUCCESS, res); context->freeMem(ptr); } TEST_F(ContextMakeMemoryResidentTests, whenMakingASharedMemoryResidentThenIsAddedToVectorOfResidentAllocations) { const size_t size = 4096; void *ptr = nullptr; ze_host_mem_alloc_desc_t hostDesc = {}; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t res = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, 0, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); DriverHandleImp *driverHandleImp = static_cast(hostDriverHandle.get()); size_t previousSize = driverHandleImp->sharedMakeResidentAllocations.size(); mockMemoryInterface->makeResidentResult = NEO::MemoryOperationsStatus::SUCCESS; res = context->makeMemoryResident(device, ptr, size); EXPECT_EQ(ZE_RESULT_SUCCESS, res); size_t currentSize = driverHandleImp->sharedMakeResidentAllocations.size(); EXPECT_EQ(previousSize + 1, currentSize); mockMemoryInterface->evictResult = NEO::MemoryOperationsStatus::SUCCESS; res = context->evictMemory(device, ptr, size); EXPECT_EQ(ZE_RESULT_SUCCESS, res); size_t finalSize = driverHandleImp->sharedMakeResidentAllocations.size(); EXPECT_EQ(previousSize, finalSize); context->freeMem(ptr); } TEST_F(ContextMakeMemoryResidentTests, whenMakingADeviceMemoryResidentThenIsNotAddedToVectorOfResidentAllocations) { const size_t size = 4096; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t res = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, 0, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); DriverHandleImp *driverHandleImp = static_cast(hostDriverHandle.get()); size_t previousSize = driverHandleImp->sharedMakeResidentAllocations.size(); mockMemoryInterface->makeResidentResult = NEO::MemoryOperationsStatus::SUCCESS; res = context->makeMemoryResident(device, ptr, size); EXPECT_EQ(ZE_RESULT_SUCCESS, res); size_t currentSize = driverHandleImp->sharedMakeResidentAllocations.size(); EXPECT_EQ(previousSize, currentSize); mockMemoryInterface->evictResult = NEO::MemoryOperationsStatus::SUCCESS; res = context->evictMemory(device, ptr, size); EXPECT_EQ(ZE_RESULT_SUCCESS, res); context->freeMem(ptr); } TEST_F(ContextMakeMemoryResidentTests, whenMakingASharedMemoryResidentButMemoryInterfaceFailsThenIsNotAddedToVectorOfResidentAllocations) { const size_t size = 4096; void *ptr = nullptr; ze_host_mem_alloc_desc_t hostDesc = {}; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t res = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, 0, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); DriverHandleImp *driverHandleImp = static_cast(hostDriverHandle.get()); size_t previousSize = driverHandleImp->sharedMakeResidentAllocations.size(); mockMemoryInterface->makeResidentResult = NEO::MemoryOperationsStatus::FAILED; res = context->makeMemoryResident(device, ptr, size); EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, res); size_t currentSize = driverHandleImp->sharedMakeResidentAllocations.size(); EXPECT_EQ(previousSize, currentSize); context->freeMem(ptr); } struct ContextMakeMemoryResidentAndMigrationTests : public ContextMakeMemoryResidentTests { struct MockResidentTestsPageFaultManager : public MockPageFaultManager { void moveAllocationToGpuDomain(void *ptr) override { moveAllocationToGpuDomainCalledTimes++; migratedAddress = ptr; } void moveAllocationsWithinUMAllocsManagerToGpuDomain(SVMAllocsManager *unifiedMemoryManager) override { moveAllocationsWithinUMAllocsManagerToGpuDomainCalled++; } uint32_t moveAllocationToGpuDomainCalledTimes = 0; uint32_t moveAllocationsWithinUMAllocsManagerToGpuDomainCalled = 0; void *migratedAddress = nullptr; }; void SetUp() override { ContextMakeMemoryResidentTests::SetUp(); mockMemoryManager = std::make_unique(); mockPageFaultManager = new MockResidentTestsPageFaultManager; svmManager = std::make_unique(mockMemoryManager.get(), false); mockMemoryManager->pageFaultManager.reset(mockPageFaultManager); memoryManager = device->getDriverHandle()->getMemoryManager(); device->getDriverHandle()->setMemoryManager(mockMemoryManager.get()); ze_host_mem_alloc_desc_t hostDesc = {}; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t res = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, 0, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); } void TearDown() override { device->getDriverHandle()->setMemoryManager(memoryManager); ContextMakeMemoryResidentTests::TearDown(); } const size_t size = 4096; void *ptr = nullptr; std::unique_ptr mockMemoryManager; std::unique_ptr svmManager; MockResidentTestsPageFaultManager *mockPageFaultManager = nullptr; NEO::MemoryManager *memoryManager = nullptr; }; HWTEST_F(ContextMakeMemoryResidentAndMigrationTests, whenExecutingCommandListsWithMigrationThenMemoryFromMakeResidentIsMovedToGpu) { DriverHandleImp *driverHandleImp = static_cast(hostDriverHandle.get()); size_t previousSize = driverHandleImp->sharedMakeResidentAllocations.size(); mockMemoryInterface->makeResidentResult = NEO::MemoryOperationsStatus::SUCCESS; ze_result_t res = context->makeMemoryResident(device, ptr, size); EXPECT_EQ(ZE_RESULT_SUCCESS, res); size_t currentSize = driverHandleImp->sharedMakeResidentAllocations.size(); EXPECT_EQ(previousSize + 1, currentSize); const ze_command_queue_desc_t desc = {}; MockCsrHw2 csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield()); csr.initializeTagAllocation(); csr.setupContext(*neoDevice->getDefaultEngine().osContext); ze_result_t returnValue; L0::CommandQueue *commandQueue = CommandQueue::create(productFamily, device, &csr, &desc, true, false, returnValue); EXPECT_NE(nullptr, commandQueue); EXPECT_EQ(mockPageFaultManager->moveAllocationToGpuDomainCalledTimes, 0u); std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Copy, 0u, returnValue)); auto commandListHandle = commandList->toHandle(); res = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, true); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_EQ(mockPageFaultManager->moveAllocationToGpuDomainCalledTimes, 1u); EXPECT_EQ(mockPageFaultManager->migratedAddress, ptr); mockMemoryInterface->evictResult = NEO::MemoryOperationsStatus::SUCCESS; res = context->evictMemory(device, ptr, size); EXPECT_EQ(ZE_RESULT_SUCCESS, res); commandQueue->destroy(); context->freeMem(ptr); } HWTEST2_F(ContextMakeMemoryResidentAndMigrationTests, whenExecutingKernelWithIndirectAccessThenSharedAllocationsAreMigrated, IsAtLeastSkl) { DriverHandleImp *driverHandleImp = static_cast(hostDriverHandle.get()); size_t previousSize = driverHandleImp->sharedMakeResidentAllocations.size(); mockMemoryInterface->makeResidentResult = NEO::MemoryOperationsStatus::SUCCESS; ze_result_t res = context->makeMemoryResident(device, ptr, size); EXPECT_EQ(ZE_RESULT_SUCCESS, res); size_t currentSize = driverHandleImp->sharedMakeResidentAllocations.size(); EXPECT_EQ(previousSize + 1, currentSize); const ze_command_queue_desc_t desc = {}; MockCsrHw2 csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield()); csr.initializeTagAllocation(); csr.setupContext(*neoDevice->getDefaultEngine().osContext); ze_result_t returnValue; L0::CommandQueue *commandQueue = CommandQueue::create(productFamily, device, &csr, &desc, true, false, returnValue); EXPECT_NE(nullptr, commandQueue); EXPECT_EQ(mockPageFaultManager->moveAllocationToGpuDomainCalledTimes, 0u); auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); commandList->unifiedMemoryControls.indirectSharedAllocationsAllowed = true; commandList->indirectAllocationsAllowed = true; commandList->close(); auto commandListHandle = commandList->toHandle(); res = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, true); EXPECT_EQ(mockPageFaultManager->moveAllocationsWithinUMAllocsManagerToGpuDomainCalled, 1u); mockMemoryInterface->evictResult = NEO::MemoryOperationsStatus::SUCCESS; res = context->evictMemory(device, ptr, size); EXPECT_EQ(ZE_RESULT_SUCCESS, res); commandQueue->destroy(); context->freeMem(ptr); } HWTEST_F(ContextMakeMemoryResidentAndMigrationTests, whenExecutingCommandListsWithNoMigrationThenMemoryFromMakeResidentIsNotMovedToGpu) { DriverHandleImp *driverHandleImp = static_cast(hostDriverHandle.get()); size_t previousSize = driverHandleImp->sharedMakeResidentAllocations.size(); mockMemoryInterface->makeResidentResult = NEO::MemoryOperationsStatus::SUCCESS; ze_result_t res = context->makeMemoryResident(device, ptr, size); EXPECT_EQ(ZE_RESULT_SUCCESS, res); size_t currentSize = driverHandleImp->sharedMakeResidentAllocations.size(); EXPECT_EQ(previousSize + 1, currentSize); const ze_command_queue_desc_t desc = {}; MockCsrHw2 csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield()); csr.initializeTagAllocation(); csr.setupContext(*neoDevice->getDefaultEngine().osContext); ze_result_t returnValue; L0::CommandQueue *commandQueue = CommandQueue::create(productFamily, device, &csr, &desc, true, false, returnValue); EXPECT_NE(nullptr, commandQueue); EXPECT_EQ(mockPageFaultManager->moveAllocationToGpuDomainCalledTimes, 0u); std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Copy, 0u, returnValue)); auto commandListHandle = commandList->toHandle(); res = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_EQ(mockPageFaultManager->moveAllocationToGpuDomainCalledTimes, 0u); EXPECT_EQ(mockPageFaultManager->migratedAddress, nullptr); mockMemoryInterface->evictResult = NEO::MemoryOperationsStatus::SUCCESS; res = context->evictMemory(device, ptr, size); EXPECT_EQ(ZE_RESULT_SUCCESS, res); commandQueue->destroy(); context->freeMem(ptr); } HWTEST_F(ContextMakeMemoryResidentAndMigrationTests, whenExecutingImmediateCommandListsHavingSharedAllocationWithMigrationThenMemoryFromMakeResidentIsMovedToGpu) { DriverHandleImp *driverHandleImp = static_cast(hostDriverHandle.get()); size_t previousSize = driverHandleImp->sharedMakeResidentAllocations.size(); mockMemoryInterface->makeResidentResult = NEO::MemoryOperationsStatus::SUCCESS; ze_result_t res = context->makeMemoryResident(device, ptr, size); EXPECT_EQ(ZE_RESULT_SUCCESS, res); size_t currentSize = driverHandleImp->sharedMakeResidentAllocations.size(); EXPECT_EQ(previousSize + 1, currentSize); const ze_command_queue_desc_t desc = {}; MockCsrHw2 csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield()); csr.initializeTagAllocation(); csr.setupContext(*neoDevice->getDefaultEngine().osContext); ze_result_t result = ZE_RESULT_SUCCESS; DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true); std::unique_ptr commandList0(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, result)); ASSERT_NE(nullptr, commandList0); void *dst_buffer = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_host_mem_alloc_desc_t hostDesc = {}; result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, 16384u, 4090u, &dst_buffer); ASSERT_EQ(ZE_RESULT_SUCCESS, result); int one = 1; result = commandList0->appendMemoryFill(dst_buffer, reinterpret_cast(&one), sizeof(one), 4090u, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(mockPageFaultManager->moveAllocationToGpuDomainCalledTimes, 1u); EXPECT_EQ(mockPageFaultManager->migratedAddress, ptr); mockMemoryInterface->evictResult = NEO::MemoryOperationsStatus::SUCCESS; res = context->evictMemory(device, ptr, size); EXPECT_EQ(ZE_RESULT_SUCCESS, res); context->freeMem(ptr); context->freeMem(dst_buffer); } HWTEST_F(ContextMakeMemoryResidentAndMigrationTests, whenExecutingImmediateCommandListsHavingHostAllocationWithMigrationThenMemoryFromMakeResidentIsMovedToGpu) { DriverHandleImp *driverHandleImp = static_cast(hostDriverHandle.get()); size_t previousSize = driverHandleImp->sharedMakeResidentAllocations.size(); mockMemoryInterface->makeResidentResult = NEO::MemoryOperationsStatus::SUCCESS; ze_result_t res = context->makeMemoryResident(device, ptr, size); EXPECT_EQ(ZE_RESULT_SUCCESS, res); size_t currentSize = driverHandleImp->sharedMakeResidentAllocations.size(); EXPECT_EQ(previousSize + 1, currentSize); const ze_command_queue_desc_t desc = {}; MockCsrHw2 csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield()); csr.initializeTagAllocation(); csr.setupContext(*neoDevice->getDefaultEngine().osContext); ze_result_t result = ZE_RESULT_SUCCESS; DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true); std::unique_ptr commandList0(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, result)); ASSERT_NE(nullptr, commandList0); DebugManagerStateRestore restore; DebugManager.flags.AllocateSharedAllocationsWithCpuAndGpuStorage.set(true); std::set rootDeviceIndices{mockRootDeviceIndex}; std::map deviceBitfields{{mockRootDeviceIndex, mockDeviceBitfield}}; NEO::SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields); auto sharedPtr = svmManager->createSharedUnifiedMemoryAllocation(4096u, unifiedMemoryProperties, device); EXPECT_NE(nullptr, sharedPtr); auto allocation = svmManager->getSVMAlloc(sharedPtr); auto gpuAllocation = allocation->gpuAllocations.getGraphicsAllocation(mockRootDeviceIndex); auto &commandContainer = commandList0->commandContainer; commandContainer.addToResidencyContainer(gpuAllocation); commandContainer.addToResidencyContainer(allocation->cpuAllocation); void *dst_buffer = nullptr; ze_host_mem_alloc_desc_t hostDesc = {}; result = context->allocHostMem(&hostDesc, 4096u, 0u, &dst_buffer); ASSERT_EQ(ZE_RESULT_SUCCESS, result); int one = 1; result = commandList0->appendMemoryFill(dst_buffer, reinterpret_cast(&one), sizeof(one), 4090u, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(mockPageFaultManager->moveAllocationToGpuDomainCalledTimes, 3u); context->freeMem(ptr); svmManager->freeSVMAlloc(sharedPtr); context->freeMem(dst_buffer); } TEST_F(ContextTest, whenGettingDriverThenDriverIsRetrievedSuccessfully) { ze_context_handle_t hContext; ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; ze_result_t res = driverHandle->createContext(&desc, 0u, nullptr, &hContext); EXPECT_EQ(ZE_RESULT_SUCCESS, res); ContextImp *contextImp = static_cast(L0::Context::fromHandle(hContext)); L0::DriverHandle *driverHandleFromContext = contextImp->getDriverHandle(); EXPECT_EQ(driverHandleFromContext, driverHandle.get()); res = contextImp->destroy(); EXPECT_EQ(ZE_RESULT_SUCCESS, res); } TEST_F(ContextTest, whenCallingVirtualMemInterfacesThenUnsupportedIsReturned) { ze_context_handle_t hContext; ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; ze_result_t res = driverHandle->createContext(&desc, 0u, nullptr, &hContext); EXPECT_EQ(ZE_RESULT_SUCCESS, res); ContextImp *contextImp = static_cast(L0::Context::fromHandle(hContext)); void *pStart = 0x0; size_t size = 0u; void *ptr = nullptr; res = contextImp->reserveVirtualMem(pStart, size, &ptr); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, res); size_t pagesize = 0u; res = contextImp->queryVirtualMemPageSize(device, size, &pagesize); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, res); res = contextImp->freeVirtualMem(ptr, size); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, res); res = contextImp->destroy(); EXPECT_EQ(ZE_RESULT_SUCCESS, res); } TEST_F(ContextTest, whenCallingPhysicalMemInterfacesThenUnsupportedIsReturned) { ze_context_handle_t hContext; ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; ze_result_t res = driverHandle->createContext(&desc, 0u, nullptr, &hContext); EXPECT_EQ(ZE_RESULT_SUCCESS, res); ContextImp *contextImp = static_cast(L0::Context::fromHandle(hContext)); ze_physical_mem_desc_t descMem = {}; ze_physical_mem_handle_t mem = {}; res = contextImp->createPhysicalMem(device, &descMem, &mem); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, res); res = contextImp->destroyPhysicalMem(mem); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, res); res = contextImp->destroy(); EXPECT_EQ(ZE_RESULT_SUCCESS, res); } TEST_F(ContextTest, whenCallingMappingVirtualInterfacesThenUnsupportedIsReturned) { ze_context_handle_t hContext; ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; ze_result_t res = driverHandle->createContext(&desc, 0u, nullptr, &hContext); EXPECT_EQ(ZE_RESULT_SUCCESS, res); ContextImp *contextImp = static_cast(L0::Context::fromHandle(hContext)); ze_physical_mem_desc_t descMem = {}; ze_physical_mem_handle_t mem = {}; res = contextImp->createPhysicalMem(device, &descMem, &mem); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, res); ze_memory_access_attribute_t access = {}; size_t offset = 0; void *ptr = nullptr; size_t size = 0; res = contextImp->mapVirtualMem(ptr, size, mem, offset, access); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, res); res = contextImp->setVirtualMemAccessAttribute(ptr, size, access); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, res); ze_memory_access_attribute_t outAccess = {}; size_t outSize = 0; res = contextImp->getVirtualMemAccessAttribute(ptr, size, &outAccess, &outSize); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, res); res = contextImp->unMapVirtualMem(ptr, size); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, res); res = contextImp->destroyPhysicalMem(mem); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, res); res = contextImp->destroy(); EXPECT_EQ(ZE_RESULT_SUCCESS, res); } using IsAtMostProductDG1 = IsAtMostProduct; HWTEST2_F(ContextTest, WhenCreatingImageThenSuccessIsReturned, IsAtMostProductDG1) { ze_context_handle_t hContext; ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; ze_result_t res = driverHandle->createContext(&desc, 0u, nullptr, &hContext); EXPECT_EQ(ZE_RESULT_SUCCESS, res); ContextImp *contextImp = static_cast(L0::Context::fromHandle(hContext)); ze_image_handle_t image = {}; ze_image_desc_t imageDesc = {}; imageDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; res = contextImp->createImage(device, &imageDesc, &image); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_NE(nullptr, image); Image::fromHandle(image)->destroy(); res = contextImp->destroy(); EXPECT_EQ(ZE_RESULT_SUCCESS, res); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/debugger/000077500000000000000000000000001422164147700270775ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/debugger/CMakeLists.txt000066400000000000000000000012711422164147700316400ustar00rootroot00000000000000# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/active_debugger_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/l0_debugger_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/test_source_level_debugger.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_l0_debugger_1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_l0_debugger_2.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_l0_debugger_single_address_space.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_module_with_debug.cpp ) add_subdirectories() active_debugger_fixture.h000066400000000000000000000064251422164147700340650ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/debugger/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/common/mocks/mock_compiler_interface.h" #include "shared/test/common/mocks/mock_compilers.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_os_library.h" #include "shared/test/common/mocks/mock_sip.h" #include "shared/test/common/mocks/mock_source_level_debugger.h" #include "level_zero/core/source/cmdqueue/cmdqueue_hw.h" #include "level_zero/core/source/driver/driver_handle_imp.h" #include "level_zero/core/source/fence/fence.h" #include "level_zero/core/source/module/module.h" #include "level_zero/core/test/unit_tests/mocks/mock_built_ins.h" #include "level_zero/core/test/unit_tests/mocks/mock_device.h" #include "level_zero/core/test/unit_tests/mocks/mock_driver.h" #include "level_zero/core/test/unit_tests/mocks/mock_driver_handle.h" #include "level_zero/core/test/unit_tests/mocks/mock_memory_manager.h" namespace L0 { namespace ult { struct ActiveDebuggerFixture { void SetUp() { // NOLINT(readability-identifier-naming) NEO::MockCompilerEnableGuard mock(true); ze_result_t returnValue; auto executionEnvironment = new NEO::ExecutionEnvironment(); auto mockBuiltIns = new MockBuiltins(); executionEnvironment->prepareRootDeviceEnvironments(1); hwInfo = *defaultHwInfo.get(); executionEnvironment->rootDeviceEnvironments[0]->builtins.reset(mockBuiltIns); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(&hwInfo); auto isHexadecimalArrayPrefered = HwHelper::get(hwInfo.platform.eRenderCoreFamily).isSipKernelAsHexadecimalArrayPreferred(); if (isHexadecimalArrayPrefered) { MockSipData::useMockSip = true; } debugger = new MockActiveSourceLevelDebugger(new MockOsLibrary); executionEnvironment->rootDeviceEnvironments[0]->debugger.reset(debugger); executionEnvironment->initializeMemoryManager(); device = NEO::MockDevice::create(executionEnvironment, 0u); device->setDebuggerActive(true); std::vector> devices; devices.push_back(std::unique_ptr(device)); auto driverHandleUlt = whitebox_cast(DriverHandle::create(std::move(devices), L0EnvVariables{}, &returnValue)); driverHandle.reset(driverHandleUlt); ASSERT_NE(nullptr, driverHandle); ze_device_handle_t hDevice; uint32_t count = 1; ze_result_t result = driverHandle->getDevice(&count, &hDevice); EXPECT_EQ(ZE_RESULT_SUCCESS, result); deviceL0 = L0::Device::fromHandle(hDevice); ASSERT_NE(nullptr, deviceL0); } void TearDown() { // NOLINT(readability-identifier-naming) L0::GlobalDriver = nullptr; } std::unique_ptr> driverHandle; NEO::MockDevice *device = nullptr; L0::Device *deviceL0; MockActiveSourceLevelDebugger *debugger = nullptr; HardwareInfo hwInfo; VariableBackup mockSipCalled{&NEO::MockSipData::called}; VariableBackup mockSipCalledType{&NEO::MockSipData::calledType}; VariableBackup backupSipInitType{&MockSipData::useMockSip}; }; } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/debugger/l0_debugger_fixture.h000066400000000000000000000063111422164147700331760ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_compilers.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_memory_operations_handler.h" #include "shared/test/common/mocks/mock_sip.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_built_ins.h" #include "level_zero/core/test/unit_tests/mocks/mock_l0_debugger.h" namespace L0 { namespace ult { struct L0DebuggerFixture { void SetUp() { NEO::MockCompilerEnableGuard mock(true); auto executionEnvironment = new NEO::ExecutionEnvironment(); auto mockBuiltIns = new MockBuiltins(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->builtins.reset(mockBuiltIns); memoryOperationsHandler = new NEO::MockMemoryOperations(); executionEnvironment->rootDeviceEnvironments[0]->memoryOperationsInterface.reset(memoryOperationsHandler); executionEnvironment->setDebuggingEnabled(); hwInfo = *NEO::defaultHwInfo.get(); hwInfo.featureTable.flags.ftrLocalMemory = true; auto isHexadecimalArrayPrefered = HwHelper::get(hwInfo.platform.eRenderCoreFamily).isSipKernelAsHexadecimalArrayPreferred(); if (isHexadecimalArrayPrefered) { MockSipData::useMockSip = true; } executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(&hwInfo); executionEnvironment->initializeMemoryManager(); neoDevice = NEO::MockDevice::create(executionEnvironment, 0u); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); driverHandle = std::make_unique>(); driverHandle->enableProgramDebugging = true; driverHandle->initialize(std::move(devices)); device = driverHandle->devices[0]; } void TearDown() { } std::unique_ptr> driverHandle; NEO::MockDevice *neoDevice = nullptr; L0::Device *device = nullptr; NEO::HardwareInfo hwInfo; MockMemoryOperations *memoryOperationsHandler = nullptr; VariableBackup mockSipCalled{&NEO::MockSipData::called}; VariableBackup mockSipCalledType{&NEO::MockSipData::calledType}; VariableBackup backupSipInitType{&MockSipData::useMockSip}; }; struct L0DebuggerHwFixture : public L0DebuggerFixture { void SetUp() { L0DebuggerFixture::SetUp(); debuggerHw = static_cast(neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->debugger.get()); neoDevice->setPreemptionMode(PreemptionMode::Disabled); } void TearDown() { L0DebuggerFixture::TearDown(); debuggerHw = nullptr; } template MockDebuggerL0Hw *getMockDebuggerL0Hw() { return static_cast *>(debuggerHw); } DebuggerL0 *debuggerHw = nullptr; }; } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/debugger/linux/000077500000000000000000000000001422164147700302365ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/debugger/linux/CMakeLists.txt000066400000000000000000000004161422164147700327770ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(UNIX) target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_l0_debugger_linux.cpp ) endif() test_l0_debugger_linux.cpp000066400000000000000000000250671422164147700353320ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/debugger/linux/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/kernel/debug_data.h" #include "shared/source/os_interface/os_interface.h" #include "shared/test/common/libult/linux/drm_mock.h" #include "shared/test/common/mocks/linux/mock_drm_allocation.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h" #include "level_zero/core/test/unit_tests/sources/debugger/l0_debugger_fixture.h" #include #include using namespace NEO; namespace L0 { namespace ult { struct L0DebuggerLinuxFixture { void SetUp() { auto executionEnvironment = new NEO::ExecutionEnvironment(); auto mockBuiltIns = new MockBuiltins(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->setDebuggingEnabled(); executionEnvironment->rootDeviceEnvironments[0]->builtins.reset(mockBuiltIns); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); executionEnvironment->initializeMemoryManager(); auto osInterface = new OSInterface(); drmMock = new DrmMockResources(*executionEnvironment->rootDeviceEnvironments[0]); executionEnvironment->rootDeviceEnvironments[0]->osInterface.reset(osInterface); executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel(std::unique_ptr(drmMock)); neoDevice = NEO::MockDevice::create(executionEnvironment, 0u); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); driverHandle = std::make_unique>(); driverHandle->enableProgramDebugging = true; driverHandle->initialize(std::move(devices)); device = driverHandle->devices[0]; } void TearDown() { } std::unique_ptr> driverHandle; NEO::MockDevice *neoDevice = nullptr; L0::Device *device = nullptr; DrmMockResources *drmMock = nullptr; }; using L0DebuggerLinuxTest = Test; TEST_F(L0DebuggerLinuxTest, givenProgramDebuggingEnabledWhenDriverHandleIsCreatedThenItAllocatesL0Debugger) { EXPECT_NE(nullptr, neoDevice->getDebugger()); EXPECT_FALSE(neoDevice->getDebugger()->isLegacy()); EXPECT_EQ(nullptr, neoDevice->getSourceLevelDebugger()); } TEST_F(L0DebuggerLinuxTest, whenDebuggerIsCreatedThenItCallsDrmToRegisterResourceClasses) { EXPECT_NE(nullptr, neoDevice->getDebugger()); EXPECT_TRUE(drmMock->registerClassesCalled); } TEST(L0DebuggerLinux, givenVmBindAndPerContextVmEnabledInDrmWhenInitializingDebuggingInOsThenRegisterResourceClassesIsCalled) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->setDebuggingEnabled(); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); executionEnvironment->initializeMemoryManager(); auto osInterface = new OSInterface(); auto drmMock = new DrmMockResources(*executionEnvironment->rootDeviceEnvironments[0]); drmMock->bindAvailable = true; drmMock->setPerContextVMRequired(true); executionEnvironment->rootDeviceEnvironments[0]->osInterface.reset(osInterface); executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel(std::unique_ptr(drmMock)); auto result = WhiteBox<::L0::DebuggerL0>::initDebuggingInOs(osInterface); EXPECT_TRUE(result); EXPECT_TRUE(drmMock->registerClassesCalled); } TEST(L0DebuggerLinux, givenVmBindNotAvailableInDrmWhenInitializingDebuggingInOsThenRegisterResourceClassesIsNotCalled) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->setDebuggingEnabled(); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); executionEnvironment->initializeMemoryManager(); auto osInterface = new OSInterface(); auto drmMock = new DrmMockResources(*executionEnvironment->rootDeviceEnvironments[0]); drmMock->bindAvailable = false; drmMock->setPerContextVMRequired(true); executionEnvironment->rootDeviceEnvironments[0]->osInterface.reset(osInterface); executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel(std::unique_ptr(drmMock)); auto result = WhiteBox<::L0::DebuggerL0>::initDebuggingInOs(osInterface); EXPECT_FALSE(result); EXPECT_FALSE(drmMock->registerClassesCalled); } TEST(L0DebuggerLinux, givenPerContextVmNotEnabledWhenInitializingDebuggingInOsThenRegisterResourceClassesIsNotCalled) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->setDebuggingEnabled(); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); executionEnvironment->initializeMemoryManager(); auto osInterface = new OSInterface(); auto drmMock = new DrmMockResources(*executionEnvironment->rootDeviceEnvironments[0]); drmMock->bindAvailable = true; drmMock->setPerContextVMRequired(false); executionEnvironment->rootDeviceEnvironments[0]->osInterface.reset(osInterface); executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel(std::unique_ptr(drmMock)); auto result = WhiteBox<::L0::DebuggerL0>::initDebuggingInOs(osInterface); EXPECT_FALSE(result); EXPECT_FALSE(drmMock->registerClassesCalled); } TEST_F(L0DebuggerLinuxTest, whenRegisterElfisCalledThenItRegistersBindExtHandles) { NEO::DebugData debugData; debugData.vIsa = "01234567890"; debugData.vIsaSize = 10; MockDrmAllocation isaAllocation(AllocationType::KERNEL_ISA, MemoryPool::System4KBPages); MockBufferObject bo(drmMock, 0, 0, 1); isaAllocation.bufferObjects[0] = &bo; device->getL0Debugger()->registerElf(&debugData, &isaAllocation); EXPECT_EQ(static_cast(10), drmMock->registeredDataSize); auto &bos = isaAllocation.getBOs(); for (auto bo : bos) { if (bo) { auto extBindHandles = bo->getBindExtHandles(); EXPECT_NE(static_cast(0), extBindHandles.size()); } } } TEST_F(L0DebuggerLinuxTest, whenRegisterElfisCalledInAllocationWithNoBOThenItRegistersBindExtHandles) { NEO::DebugData debugData; debugData.vIsa = "01234567890"; debugData.vIsaSize = 10; MockDrmAllocation isaAllocation(AllocationType::KERNEL_ISA, MemoryPool::System4KBPages); device->getL0Debugger()->registerElf(&debugData, &isaAllocation); EXPECT_EQ(static_cast(10u), drmMock->registeredDataSize); } TEST_F(L0DebuggerLinuxTest, givenNoOSInterfaceThenRegisterElfDoesNothing) { NEO::OSInterface *OSInterface_tmp = neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.release(); NEO::DebugData debugData; debugData.vIsa = "01234567890"; debugData.vIsaSize = 10; drmMock->registeredDataSize = 0; MockDrmAllocation isaAllocation(AllocationType::KERNEL_ISA, MemoryPool::System4KBPages); device->getL0Debugger()->registerElf(&debugData, &isaAllocation); EXPECT_EQ(static_cast(0u), drmMock->registeredDataSize); neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.reset(OSInterface_tmp); } TEST_F(L0DebuggerLinuxTest, givenAllocationsWhenAttachingZebinModuleThenAllAllocationsHaveRegisteredHandle) { MockDrmAllocation isaAllocation(AllocationType::KERNEL_ISA, MemoryPool::System4KBPages); MockBufferObject bo(drmMock, 0, 0, 1); isaAllocation.bufferObjects[0] = &bo; MockDrmAllocation isaAllocation2(AllocationType::KERNEL_ISA, MemoryPool::System4KBPages); MockBufferObject bo2(drmMock, 0, 0, 1); isaAllocation2.bufferObjects[0] = &bo2; uint32_t handle = 0; StackVec kernelAllocs; kernelAllocs.push_back(&isaAllocation); kernelAllocs.push_back(&isaAllocation2); drmMock->registeredDataSize = 0; drmMock->registeredClass = NEO::Drm::ResourceClass::MaxSize; EXPECT_TRUE(device->getL0Debugger()->attachZebinModuleToSegmentAllocations(kernelAllocs, handle)); EXPECT_EQ(sizeof(uint32_t), drmMock->registeredDataSize); EXPECT_EQ(NEO::Drm::ResourceClass::L0ZebinModule, drmMock->registeredClass); const auto containsModuleHandle = [handle](const auto &bufferObject) { const auto &bindExtHandles = bufferObject.getBindExtHandles(); return std::find(bindExtHandles.begin(), bindExtHandles.end(), handle) != bindExtHandles.end(); }; EXPECT_TRUE(containsModuleHandle(bo)); EXPECT_TRUE(containsModuleHandle(bo2)); } TEST_F(L0DebuggerLinuxTest, givenModuleHandleWhenRemoveZebinModuleIsCalledThenHandleIsUnregistered) { uint32_t handle = 20; EXPECT_TRUE(device->getL0Debugger()->removeZebinModule(handle)); EXPECT_EQ(1u, drmMock->unregisterCalledCount); EXPECT_EQ(20u, drmMock->unregisteredHandle); } TEST_F(L0DebuggerLinuxTest, givenModuleHandleZeroWhenRemoveZebinModuleIsCalledThenDrmUnregisterIsNotCalled) { uint32_t handle = 0; EXPECT_FALSE(device->getL0Debugger()->removeZebinModule(handle)); EXPECT_EQ(0u, drmMock->unregisterCalledCount); } HWTEST_F(L0DebuggerLinuxTest, givenDebuggingEnabledAndCommandQueuesAreCreatedAndDestroyedThanDebuggerL0IsNotified) { auto debuggerL0Hw = static_cast *>(device->getL0Debugger()); neoDevice->getDefaultEngine().commandStreamReceiver->getOsContext().ensureContextInitialized(); drmMock->ioctlCallsCount = 0; ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue; auto commandQueue1 = CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, false, false, returnValue); EXPECT_EQ(1u, drmMock->ioctlCallsCount); EXPECT_EQ(1u, debuggerL0Hw->commandQueueCreatedCount); auto commandQueue2 = CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, false, false, returnValue); EXPECT_EQ(1u, drmMock->ioctlCallsCount); EXPECT_EQ(2u, debuggerL0Hw->commandQueueCreatedCount); commandQueue1->destroy(); EXPECT_EQ(1u, drmMock->ioctlCallsCount); EXPECT_EQ(1u, debuggerL0Hw->commandQueueDestroyedCount); commandQueue2->destroy(); EXPECT_EQ(1u, drmMock->unregisterCalledCount); EXPECT_EQ(2u, debuggerL0Hw->commandQueueDestroyedCount); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_1.cpp000066400000000000000000001507271422164147700332550ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen_common/reg_configs_common.h" #include "shared/source/helpers/preamble.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/mocks/mock_gmm_helper.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h" #include "level_zero/core/test/unit_tests/mocks/mock_kernel.h" #include "level_zero/core/test/unit_tests/sources/debugger/l0_debugger_fixture.h" namespace L0 { namespace ult { using L0DebuggerTest = Test; TEST_F(L0DebuggerTest, givenL0DebuggerWhenCallingIsLegacyThenFalseIsReturned) { EXPECT_FALSE(neoDevice->getDebugger()->isLegacy()); } TEST_F(L0DebuggerTest, givenL0DebuggerWhenGettingSourceLevelDebuggerThenNullptrReturned) { EXPECT_EQ(nullptr, neoDevice->getSourceLevelDebugger()); } TEST_F(L0DebuggerTest, givenL0DebuggerWhenGettingL0DebuggerThenValidDebuggerInstanceIsReturned) { EXPECT_NE(nullptr, device->getL0Debugger()); } TEST_F(L0DebuggerTest, givenL0DebuggerWhenGettingSipAllocationThenValidSipTypeIsReturned) { neoDevice->setDebuggerActive(true); auto systemRoutine = SipKernel::getSipKernel(*neoDevice).getSipAllocation(); ASSERT_NE(nullptr, systemRoutine); auto sipType = SipKernel::getSipKernelType(*neoDevice); auto isHexadecimalArrayPreferred = HwHelper::get(hwInfo.platform.eRenderCoreFamily).isSipKernelAsHexadecimalArrayPreferred(); auto expectedSipAllocation = isHexadecimalArrayPreferred ? NEO::MockSipData::mockSipKernel->getSipAllocation() : neoDevice->getBuiltIns()->getSipKernel(sipType, *neoDevice).getSipAllocation(); EXPECT_EQ(expectedSipAllocation, systemRoutine); } TEST_F(L0DebuggerTest, givenL0DebuggerWhenGettingStateSaveAreaHeaderThenValidSipTypeIsReturned) { auto &stateSaveAreaHeader = SipKernel::getSipKernel(*neoDevice).getStateSaveAreaHeader(); auto sipType = SipKernel::getSipKernelType(*neoDevice); auto &expectedStateSaveAreaHeader = neoDevice->getBuiltIns()->getSipKernel(sipType, *neoDevice).getStateSaveAreaHeader(); EXPECT_EQ(expectedStateSaveAreaHeader, stateSaveAreaHeader); } TEST_F(L0DebuggerTest, givenProgramDebuggingEnabledWhenDebuggerIsCreatedThenFusedEusAreDisabled) { EXPECT_TRUE(driverHandle->enableProgramDebugging); EXPECT_FALSE(neoDevice->getHardwareInfo().capabilityTable.fusedEuEnabled); } TEST(Debugger, givenL0DebuggerOFFWhenGettingStateSaveAreaHeaderThenValidSipTypeIsReturned) { auto executionEnvironment = new NEO::ExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(1); auto isHexadecimalArrayPreferred = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).isSipKernelAsHexadecimalArrayPreferred(); if (!isHexadecimalArrayPreferred) { auto mockBuiltIns = new MockBuiltins(); executionEnvironment->rootDeviceEnvironments[0]->builtins.reset(mockBuiltIns); } auto hwInfo = *NEO::defaultHwInfo.get(); hwInfo.featureTable.flags.ftrLocalMemory = true; executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(&hwInfo); executionEnvironment->initializeMemoryManager(); auto neoDevice = NEO::MockDevice::create(executionEnvironment, 0u); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); auto driverHandle = std::make_unique>(); driverHandle->enableProgramDebugging = false; driverHandle->initialize(std::move(devices)); auto sipType = SipKernel::getSipKernelType(*neoDevice); if (isHexadecimalArrayPreferred) { SipKernel::initSipKernel(sipType, *neoDevice); } auto &stateSaveAreaHeader = SipKernel::getSipKernel(*neoDevice).getStateSaveAreaHeader(); if (isHexadecimalArrayPreferred) { auto sipKernel = neoDevice->getRootDeviceEnvironment().sipKernels[static_cast(sipType)].get(); ASSERT_NE(sipKernel, nullptr); auto &expectedStateSaveAreaHeader = sipKernel->getStateSaveAreaHeader(); EXPECT_EQ(expectedStateSaveAreaHeader, stateSaveAreaHeader); } else { auto &expectedStateSaveAreaHeader = neoDevice->getBuiltIns()->getSipKernel(sipType, *neoDevice).getStateSaveAreaHeader(); EXPECT_EQ(expectedStateSaveAreaHeader, stateSaveAreaHeader); } } TEST(Debugger, givenDebuggingEnabledInExecEnvWhenAllocatingIsaThenSingleBankIsUsed) { auto executionEnvironment = new NEO::ExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->setDebuggingEnabled(); auto hwInfo = *NEO::defaultHwInfo.get(); hwInfo.featureTable.flags.ftrLocalMemory = true; executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(&hwInfo); executionEnvironment->initializeMemoryManager(); std::unique_ptr neoDevice(NEO::MockDevice::create(executionEnvironment, 0u)); auto allocation = neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties( {neoDevice->getRootDeviceIndex(), 4096, NEO::AllocationType::KERNEL_ISA, neoDevice->getDeviceBitfield()}); if (allocation->getMemoryPool() == MemoryPool::LocalMemory) { EXPECT_EQ(1u, allocation->storageInfo.getMemoryBanks()); } else { EXPECT_EQ(0u, allocation->storageInfo.getMemoryBanks()); } neoDevice->getMemoryManager()->freeGraphicsMemory(allocation); } HWTEST_F(L0DebuggerTest, givenL0DebuggerWhenCreatedThenPerContextSbaTrackingBuffersAreAllocated) { auto debugger = device->getL0Debugger(); ASSERT_NE(nullptr, debugger); EXPECT_NE(0u, debugger->getSbaTrackingGpuVa()); std::vector allocations; auto &allEngines = device->getNEODevice()->getMemoryManager()->getRegisteredEngines(); for (auto &engine : allEngines) { auto sbaAllocation = debugger->getSbaTrackingBuffer(engine.osContext->getContextId()); ASSERT_NE(nullptr, sbaAllocation); allocations.push_back(sbaAllocation); EXPECT_EQ(NEO::AllocationType::DEBUG_SBA_TRACKING_BUFFER, sbaAllocation->getAllocationType()); EXPECT_EQ(MemoryPool::System4KBPages, sbaAllocation->getMemoryPool()); } for (uint32_t i = 0; i < allocations.size() - 1; i++) { EXPECT_NE(allocations[i], allocations[i + 1]); } EXPECT_EQ(allEngines.size(), getMockDebuggerL0Hw()->perContextSbaAllocations.size()); } HWTEST_F(L0DebuggerTest, givenCreatedL0DebuggerThenSbaTrackingBuffersContainValidHeader) { auto debugger = device->getL0Debugger(); ASSERT_NE(nullptr, debugger); for (auto &sbaBuffer : getMockDebuggerL0Hw()->perContextSbaAllocations) { auto sbaAllocation = sbaBuffer.second; ASSERT_NE(nullptr, sbaAllocation); auto sbaHeader = reinterpret_cast(sbaAllocation->getUnderlyingBuffer()); EXPECT_STREQ("sbaarea", sbaHeader->magic); EXPECT_EQ(0u, sbaHeader->BindlessSamplerStateBaseAddress); EXPECT_EQ(0u, sbaHeader->BindlessSurfaceStateBaseAddress); EXPECT_EQ(0u, sbaHeader->DynamicStateBaseAddress); EXPECT_EQ(0u, sbaHeader->GeneralStateBaseAddress); EXPECT_EQ(0u, sbaHeader->IndirectObjectBaseAddress); EXPECT_EQ(0u, sbaHeader->InstructionBaseAddress); EXPECT_EQ(0u, sbaHeader->SurfaceStateBaseAddress); EXPECT_EQ(0u, sbaHeader->Version); } } HWTEST_F(L0DebuggerTest, givenDebuggingEnabledWhenCommandListIsExecutedThenValidKernelDebugCommandsAreAdded) { using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; using STATE_SIP = typename FamilyType::STATE_SIP; ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, false, false, returnValue)); ASSERT_NE(nullptr, commandQueue->commandStream); auto usedSpaceBefore = commandQueue->commandStream->getUsed(); ze_command_list_handle_t commandLists[] = { CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle()}; uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]); auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandQueue->commandStream->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter)); auto miLoadImm = findAll(cmdList.begin(), cmdList.end()); size_t debugModeRegisterCount = 0; size_t tdDebugControlRegisterCount = 0; for (size_t i = 0; i < miLoadImm.size(); i++) { MI_LOAD_REGISTER_IMM *miLoad = genCmdCast(*miLoadImm[i]); ASSERT_NE(nullptr, miLoad); if (miLoad->getRegisterOffset() == DebugModeRegisterOffset::registerOffset) { EXPECT_EQ(DebugModeRegisterOffset::debugEnabledValue, miLoad->getDataDword()); debugModeRegisterCount++; } if (miLoad->getRegisterOffset() == TdDebugControlRegisterOffset::registerOffset) { EXPECT_EQ(TdDebugControlRegisterOffset::debugEnabledValue, miLoad->getDataDword()); tdDebugControlRegisterCount++; } } // those register should not be used EXPECT_EQ(0u, debugModeRegisterCount); EXPECT_EQ(0u, tdDebugControlRegisterCount); if (!HwHelper::get(hwInfo.platform.eRenderCoreFamily).isSipWANeeded(hwInfo)) { auto stateSipCmds = findAll(cmdList.begin(), cmdList.end()); ASSERT_EQ(1u, stateSipCmds.size()); STATE_SIP *stateSip = genCmdCast(*stateSipCmds[0]); auto systemRoutine = SipKernel::getSipKernel(*neoDevice).getSipAllocation(); ASSERT_NE(nullptr, systemRoutine); EXPECT_EQ(systemRoutine->getGpuAddressToPatch(), stateSip->getSystemInstructionPointer()); } for (auto i = 0u; i < numCommandLists; i++) { auto commandList = CommandList::fromHandle(commandLists[i]); commandList->destroy(); } commandQueue->destroy(); } using NotGen8Or11 = AreNotGfxCores; HWTEST2_F(L0DebuggerTest, givenDebuggingEnabledAndRequiredGsbaWhenCommandListIsExecutedThenProgramGsbaWritesToSbaTrackingBuffer, NotGen8Or11) { using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue; auto cmdQ = CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, false, false, returnValue); ASSERT_NE(nullptr, cmdQ); auto commandQueue = whitebox_cast(cmdQ); auto cmdQHw = static_cast *>(cmdQ); if (cmdQHw->estimateStateBaseAddressCmdSize() == 0) { commandQueue->destroy(); GTEST_SKIP(); } auto usedSpaceBefore = commandQueue->commandStream->getUsed(); ze_command_list_handle_t commandLists[] = { CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle()}; CommandList::fromHandle(commandLists[0])->setCommandListPerThreadScratchSize(4096); uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]); auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandQueue->commandStream->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter)); auto sbaItor = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), sbaItor); auto cmdSba = genCmdCast(*sbaItor); auto sdiItor = find(sbaItor, cmdList.end()); ASSERT_NE(cmdList.end(), sdiItor); auto cmdSdi = genCmdCast(*sdiItor); uint64_t gsbaGpuVa = cmdSba->getGeneralStateBaseAddress(); EXPECT_EQ(static_cast(gsbaGpuVa & 0x0000FFFFFFFFULL), cmdSdi->getDataDword0()); EXPECT_EQ(static_cast(gsbaGpuVa >> 32), cmdSdi->getDataDword1()); auto expectedGpuVa = GmmHelper::decanonize(device->getL0Debugger()->getSbaTrackingGpuVa()) + offsetof(SbaTrackedAddresses, GeneralStateBaseAddress); EXPECT_EQ(expectedGpuVa, cmdSdi->getAddress()); for (auto i = 0u; i < numCommandLists; i++) { auto commandList = CommandList::fromHandle(commandLists[i]); commandList->destroy(); } commandQueue->destroy(); } HWTEST_F(L0DebuggerTest, givenDebuggingEnabledAndDebuggerLogsWhenCommandQueueIsSynchronizedThenSbaAddressesArePrinted) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.DebuggerLogBitmask.set(255); testing::internal::CaptureStdout(); ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, false, false, returnValue)); ASSERT_NE(nullptr, commandQueue->commandStream); ze_command_list_handle_t commandLists[] = { CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle()}; const uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]); auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); EXPECT_EQ(ZE_RESULT_SUCCESS, result); commandQueue->synchronize(0); std::string output = testing::internal::GetCapturedStdout(); size_t pos = output.find("INFO: Debugger: SBA stored ssh"); EXPECT_NE(std::string::npos, pos); pos = output.find("Debugger: SBA ssh"); EXPECT_NE(std::string::npos, pos); auto commandList = CommandList::fromHandle(commandLists[0]); commandList->destroy(); commandQueue->destroy(); } using L0DebuggerSimpleTest = Test; HWTEST_F(L0DebuggerSimpleTest, givenNullL0DebuggerAndDebuggerLogsWhenCommandQueueIsSynchronizedThenSbaAddressesAreNotPrinted) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.DebuggerLogBitmask.set(255); EXPECT_EQ(nullptr, device->getL0Debugger()); testing::internal::CaptureStdout(); ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, false, false, returnValue)); ASSERT_NE(nullptr, commandQueue->commandStream); ze_command_list_handle_t commandLists[] = { CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle()}; const uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]); auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); EXPECT_EQ(ZE_RESULT_SUCCESS, result); commandQueue->synchronize(0); std::string output = testing::internal::GetCapturedStdout(); size_t pos = output.find("Debugger: SBA"); EXPECT_EQ(std::string::npos, pos); auto commandList = CommandList::fromHandle(commandLists[0]); commandList->destroy(); commandQueue->destroy(); } HWTEST_F(L0DebuggerTest, givenL0DebuggerAndDebuggerLogsDisabledWhenCommandQueueIsSynchronizedThenSbaAddressesAreNotPrinted) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.DebuggerLogBitmask.set(0); EXPECT_NE(nullptr, device->getL0Debugger()); testing::internal::CaptureStdout(); ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, false, false, returnValue)); ASSERT_NE(nullptr, commandQueue->commandStream); ze_command_list_handle_t commandLists[] = { CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle()}; const uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]); auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); EXPECT_EQ(ZE_RESULT_SUCCESS, result); commandQueue->synchronize(0); std::string output = testing::internal::GetCapturedStdout(); size_t pos = output.find("Debugger: SBA"); EXPECT_EQ(std::string::npos, pos); auto commandList = CommandList::fromHandle(commandLists[0]); commandList->destroy(); commandQueue->destroy(); } HWTEST2_F(L0DebuggerTest, givenDebuggingEnabledWhenNonCopyCommandListIsInititalizedOrResetThenSSHAddressIsTracked, NotGen8Or11) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; size_t usedSpaceBefore = 0; ze_result_t returnValue; ze_command_list_handle_t commandListHandle = CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle(); auto commandList = CommandList::fromHandle(commandListHandle); auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, commandList->commandContainer.getCommandStream()->getCpuBase(), usedSpaceAfter)); auto sbaItor = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), sbaItor); auto cmdSba = genCmdCast(*sbaItor); uint64_t sshGpuVa = cmdSba->getSurfaceStateBaseAddress(); auto expectedGpuVa = commandList->commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE)->getHeapGpuBase(); EXPECT_EQ(expectedGpuVa, sshGpuVa); EXPECT_EQ(1u, getMockDebuggerL0Hw()->captureStateBaseAddressCount); commandList->reset(); EXPECT_EQ(2u, getMockDebuggerL0Hw()->captureStateBaseAddressCount); commandList->destroy(); } HWTEST_F(L0DebuggerTest, givenDebuggerWhenAppendingKernelToCommandListThenBindlessSurfaceStateForDebugSurfaceIsProgrammedAtOffsetZero) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; Mock<::L0::Kernel> kernel; ze_result_t returnValue; std::unique_ptr commandList(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); ze_group_count_t groupCount{1, 1, 1}; auto result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); commandList->close(); auto *ssh = commandList->commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE); auto debugSurfaceState = reinterpret_cast(ssh->getCpuBase()); auto debugSurface = static_cast(device)->getDebugSurface(); SURFACE_STATE_BUFFER_LENGTH length; length.Length = static_cast(debugSurface->getUnderlyingBufferSize() - 1); EXPECT_EQ(length.SurfaceState.Depth + 1u, debugSurfaceState->getDepth()); EXPECT_EQ(length.SurfaceState.Width + 1u, debugSurfaceState->getWidth()); EXPECT_EQ(length.SurfaceState.Height + 1u, debugSurfaceState->getHeight()); EXPECT_EQ(debugSurface->getGpuAddress(), debugSurfaceState->getSurfaceBaseAddress()); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER, debugSurfaceState->getSurfaceType()); EXPECT_EQ(RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT, debugSurfaceState->getCoherencyType()); } HWTEST_F(L0DebuggerTest, givenDebuggerWhenAppendingKernelToCommandListThenDebugSurfaceiIsProgrammedWithL3DisabledMOCS) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; Mock<::L0::Kernel> kernel; ze_result_t returnValue; std::unique_ptr commandList(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); ze_group_count_t groupCount{1, 1, 1}; auto result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); commandList->close(); auto *ssh = commandList->commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE); auto debugSurfaceState = reinterpret_cast(ssh->getCpuBase()); const auto mocsNoCache = device->getNEODevice()->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) >> 1; const auto actualMocs = debugSurfaceState->getMemoryObjectControlState(); EXPECT_EQ(actualMocs, mocsNoCache); } HWTEST2_F(L0DebuggerTest, givenDebuggingEnabledWhenCommandListIsExecutedThenSbaBufferIsPushedToResidencyContainer, IsAtLeastSkl) { ze_command_queue_desc_t queueDesc = {}; std::unique_ptr, Deleter> commandQueue(new MockCommandQueueHw(device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc)); commandQueue->initialize(false, false); ze_result_t returnValue; ze_command_list_handle_t commandLists[] = { CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle()}; uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]); auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto sbaBuffer = device->getL0Debugger()->getSbaTrackingBuffer(neoDevice->getDefaultEngine().commandStreamReceiver->getOsContext().getContextId()); bool sbaFound = false; for (auto iter : commandQueue->residencyContainerSnapshot) { if (iter == sbaBuffer) { sbaFound = true; } } EXPECT_TRUE(sbaFound); auto commandList = CommandList::fromHandle(commandLists[0]); commandList->destroy(); } HWTEST_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionEnabledWithImmediateCommandListToInvokeNonKernelOperationsThenSuccessIsReturned) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true); void *dstPtr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_host_mem_alloc_desc_t hostDesc = {}; auto result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, 16384u, 4096u, &dstPtr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); ze_command_queue_desc_t desc = {}; desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; ze_result_t returnValue; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); ASSERT_NE(nullptr, commandList); EXPECT_EQ(device, commandList->device); EXPECT_EQ(1u, commandList->cmdListType); EXPECT_NE(nullptr, commandList->cmdQImmediate); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; ze_event_handle_t event = nullptr; std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); ASSERT_NE(nullptr, eventPool); eventPool->createEvent(&eventDesc, &event); std::unique_ptr event_object(L0::Event::fromHandle(event)); ASSERT_NE(nullptr, event_object->csr); ASSERT_EQ(static_cast(device)->getNEODevice()->getDefaultEngine().commandStreamReceiver, event_object->csr); returnValue = commandList->appendWaitOnEvents(1, &event); EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS); returnValue = commandList->appendBarrier(nullptr, 1, &event); EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS); returnValue = commandList->appendSignalEvent(event); EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS); returnValue = event_object->hostSignal(); ASSERT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(event_object->queryStatus(), ZE_RESULT_SUCCESS); returnValue = commandList->appendWriteGlobalTimestamp(reinterpret_cast(dstPtr), nullptr, 0, nullptr); EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS); returnValue = commandList->appendEventReset(event); EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS); context->freeMem(dstPtr); } HWTEST_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionDisabledWithImmediateCommandListToInvokeNonKernelOperationsThenSuccessIsReturned) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(false); void *dstPtr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_host_mem_alloc_desc_t hostDesc = {}; auto result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, 16384u, 4096u, &dstPtr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); ze_command_queue_desc_t desc = {}; desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; ze_result_t returnValue; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); ASSERT_NE(nullptr, commandList); EXPECT_EQ(device, commandList->device); EXPECT_EQ(1u, commandList->cmdListType); EXPECT_NE(nullptr, commandList->cmdQImmediate); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; ze_event_handle_t event = nullptr; std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); ASSERT_NE(nullptr, eventPool); eventPool->createEvent(&eventDesc, &event); std::unique_ptr event_object(L0::Event::fromHandle(event)); ASSERT_NE(nullptr, event_object->csr); ASSERT_EQ(static_cast(device)->getNEODevice()->getDefaultEngine().commandStreamReceiver, event_object->csr); returnValue = commandList->appendWaitOnEvents(1, &event); EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS); returnValue = commandList->appendBarrier(nullptr, 1, &event); EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS); returnValue = commandList->appendSignalEvent(event); EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS); returnValue = event_object->hostSignal(); ASSERT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(event_object->queryStatus(), ZE_RESULT_SUCCESS); returnValue = commandList->appendWriteGlobalTimestamp(reinterpret_cast(dstPtr), nullptr, 0, nullptr); EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS); returnValue = commandList->appendEventReset(event); EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS); context->freeMem(dstPtr); } HWTEST_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionEnabledForImmediateCommandListForAppendMemoryFillWithDeviceMemoryThenSuccessIsReturned) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true); void *dstPtr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; auto result = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 4096u, &dstPtr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); int pattern = 1; ze_command_queue_desc_t queueDesc = {}; queueDesc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; ze_result_t returnValue = ZE_RESULT_SUCCESS; auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, true, NEO::EngineGroupType::RenderCompute, returnValue); result = commandList->appendMemoryFill(dstPtr, reinterpret_cast(&pattern), sizeof(pattern), 4096u, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); context->freeMem(dstPtr); commandList->destroy(); } HWTEST_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionEnabledForImmediateCommandListForAppendMemoryFillThenSuccessIsReturned) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true); void *dstPtr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_host_mem_alloc_desc_t hostDesc = {}; auto result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, 16384u, 4096u, &dstPtr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); int pattern = 1; ze_command_queue_desc_t queueDesc = {}; queueDesc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; ze_result_t returnValue = ZE_RESULT_SUCCESS; auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, true, NEO::EngineGroupType::RenderCompute, returnValue); result = commandList->appendMemoryFill(dstPtr, reinterpret_cast(&pattern), sizeof(pattern), 4096u, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); context->freeMem(dstPtr); commandList->destroy(); } HWTEST_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionDisabledForImmediateCommandListForAppendMemoryFillThenSuccessIsReturned) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(false); void *dstPtr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_host_mem_alloc_desc_t hostDesc = {}; auto result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, 16384u, 4096u, &dstPtr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); int pattern = 1; ze_command_queue_desc_t queueDesc = {}; queueDesc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; ze_result_t returnValue = ZE_RESULT_SUCCESS; auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, true, NEO::EngineGroupType::RenderCompute, returnValue); result = commandList->appendMemoryFill(dstPtr, reinterpret_cast(&pattern), sizeof(pattern), 4096u, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); context->freeMem(dstPtr); commandList->destroy(); } HWTEST_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionEnabledForRegularCommandListForAppendMemoryFillThenSuccessIsReturned) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true); ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, false, false, returnValue)); ASSERT_NE(nullptr, commandQueue->commandStream); ze_command_list_handle_t commandLists[] = { CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle()}; const uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]); void *dstPtr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; auto result = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 4096u, &dstPtr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); int pattern = 1; auto commandList = CommandList::fromHandle(commandLists[0]); result = commandList->appendMemoryFill(dstPtr, reinterpret_cast(&pattern), sizeof(pattern), 4096u, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); EXPECT_EQ(ZE_RESULT_SUCCESS, result); commandQueue->synchronize(0); context->freeMem(dstPtr); commandList->destroy(); commandQueue->destroy(); } HWTEST_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionDisabledForRegularCommandListForAppendMemoryFillThenSuccessIsReturned) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(false); ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, false, false, returnValue)); ASSERT_NE(nullptr, commandQueue->commandStream); ze_command_list_handle_t commandLists[] = { CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle()}; const uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]); void *dstPtr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_host_mem_alloc_desc_t hostDesc = {}; auto result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, 16384u, 4096u, &dstPtr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); int pattern = 1; auto commandList = CommandList::fromHandle(commandLists[0]); result = commandList->appendMemoryFill(dstPtr, reinterpret_cast(&pattern), sizeof(pattern), 4096u, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); EXPECT_EQ(ZE_RESULT_SUCCESS, result); commandQueue->synchronize(0); context->freeMem(dstPtr); commandList->destroy(); commandQueue->destroy(); } HWTEST2_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionEnabledCommandListAndAppendPageFaultCopyThenSuccessIsReturned, IsAtLeastSkl) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true); size_t size = (sizeof(uint32_t) * 4); ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue = ZE_RESULT_SUCCESS; auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, true, NEO::EngineGroupType::RenderCompute, returnValue); ASSERT_NE(nullptr, commandList); NEO::GraphicsAllocation srcPtr(0, NEO::AllocationType::INTERNAL_HOST_MEMORY, reinterpret_cast(0x1234), size, 0, sizeof(uint32_t), MemoryPool::System4KBPages); NEO::GraphicsAllocation dstPtr(0, NEO::AllocationType::INTERNAL_HOST_MEMORY, reinterpret_cast(0x2345), size, 0, sizeof(uint32_t), MemoryPool::System4KBPages); auto result = commandList->appendPageFaultCopy(&dstPtr, &srcPtr, 0x100, false); ASSERT_EQ(ZE_RESULT_SUCCESS, result); commandList->destroy(); } HWTEST2_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionDisabledCommandListAndAppendPageFaultCopyThenSuccessIsReturned, IsAtLeastSkl) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(false); size_t size = (sizeof(uint32_t) * 4); ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue = ZE_RESULT_SUCCESS; auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, true, NEO::EngineGroupType::RenderCompute, returnValue); ASSERT_NE(nullptr, commandList); NEO::GraphicsAllocation srcPtr(0, NEO::AllocationType::INTERNAL_HOST_MEMORY, reinterpret_cast(0x1234), size, 0, sizeof(uint32_t), MemoryPool::System4KBPages); NEO::GraphicsAllocation dstPtr(0, NEO::AllocationType::INTERNAL_HOST_MEMORY, reinterpret_cast(0x2345), size, 0, sizeof(uint32_t), MemoryPool::System4KBPages); auto result = commandList->appendPageFaultCopy(&dstPtr, &srcPtr, 0x100, false); ASSERT_EQ(ZE_RESULT_SUCCESS, result); commandList->destroy(); } HWTEST_F(L0DebuggerSimpleTest, givenNonZeroGpuVasWhenProgrammingSbaTrackingThenCorrectCmdsAreAddedToStream) { using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; auto debugger = std::make_unique>(neoDevice); debugger->sbaTrackingGpuVa.address = 0x45670000; auto expectedGpuVa = debugger->sbaTrackingGpuVa.address + offsetof(SbaTrackedAddresses, GeneralStateBaseAddress); StackVec buffer(4096); NEO::LinearStream cmdStream(buffer.begin(), buffer.size()); uint64_t gsba = 0x60000; uint64_t ssba = 0x1234567000; uint64_t iba = 0xfff80000; uint64_t ioba = 0x8100000; uint64_t dsba = 0xffff0000aaaa0000; NEO::Debugger::SbaAddresses sbaAddresses = {}; sbaAddresses.GeneralStateBaseAddress = gsba; sbaAddresses.SurfaceStateBaseAddress = ssba; sbaAddresses.InstructionBaseAddress = iba; sbaAddresses.IndirectObjectBaseAddress = ioba; sbaAddresses.DynamicStateBaseAddress = dsba; sbaAddresses.BindlessSurfaceStateBaseAddress = ssba; debugger->programSbaTrackingCommands(cmdStream, sbaAddresses); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, cmdStream.getCpuBase(), cmdStream.getUsed())); EXPECT_EQ(6 * sizeof(MI_STORE_DATA_IMM), cmdStream.getUsed()); auto sdiItor = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), sdiItor); auto cmdSdi = genCmdCast(*sdiItor); EXPECT_EQ(static_cast(gsba & 0x0000FFFFFFFFULL), cmdSdi->getDataDword0()); EXPECT_EQ(static_cast(gsba >> 32), cmdSdi->getDataDword1()); EXPECT_EQ(expectedGpuVa, cmdSdi->getAddress()); EXPECT_TRUE(cmdSdi->getStoreQword()); sdiItor++; cmdSdi = genCmdCast(*sdiItor); expectedGpuVa = debugger->sbaTrackingGpuVa.address + offsetof(SbaTrackedAddresses, SurfaceStateBaseAddress); EXPECT_EQ(static_cast(ssba & 0x0000FFFFFFFFULL), cmdSdi->getDataDword0()); EXPECT_EQ(static_cast(ssba >> 32), cmdSdi->getDataDword1()); EXPECT_EQ(expectedGpuVa, cmdSdi->getAddress()); EXPECT_TRUE(cmdSdi->getStoreQword()); sdiItor++; cmdSdi = genCmdCast(*sdiItor); expectedGpuVa = debugger->sbaTrackingGpuVa.address + offsetof(SbaTrackedAddresses, DynamicStateBaseAddress); auto decanonizedAddress = GmmHelper::decanonize(dsba); EXPECT_EQ(static_cast(decanonizedAddress & 0x0000FFFFFFFFULL), cmdSdi->getDataDword0()); EXPECT_EQ(static_cast(decanonizedAddress >> 32), cmdSdi->getDataDword1()); EXPECT_EQ(expectedGpuVa, cmdSdi->getAddress()); EXPECT_TRUE(cmdSdi->getStoreQword()); sdiItor++; cmdSdi = genCmdCast(*sdiItor); expectedGpuVa = debugger->sbaTrackingGpuVa.address + offsetof(SbaTrackedAddresses, IndirectObjectBaseAddress); EXPECT_EQ(static_cast(ioba & 0x0000FFFFFFFFULL), cmdSdi->getDataDword0()); EXPECT_EQ(static_cast(ioba >> 32), cmdSdi->getDataDword1()); EXPECT_EQ(expectedGpuVa, cmdSdi->getAddress()); EXPECT_TRUE(cmdSdi->getStoreQword()); sdiItor++; cmdSdi = genCmdCast(*sdiItor); expectedGpuVa = debugger->sbaTrackingGpuVa.address + offsetof(SbaTrackedAddresses, InstructionBaseAddress); EXPECT_EQ(static_cast(iba & 0x0000FFFFFFFFULL), cmdSdi->getDataDword0()); EXPECT_EQ(static_cast(iba >> 32), cmdSdi->getDataDword1()); EXPECT_EQ(expectedGpuVa, cmdSdi->getAddress()); EXPECT_TRUE(cmdSdi->getStoreQword()); sdiItor++; cmdSdi = genCmdCast(*sdiItor); expectedGpuVa = debugger->sbaTrackingGpuVa.address + offsetof(SbaTrackedAddresses, BindlessSurfaceStateBaseAddress); EXPECT_EQ(static_cast(ssba & 0x0000FFFFFFFFULL), cmdSdi->getDataDword0()); EXPECT_EQ(static_cast(ssba >> 32), cmdSdi->getDataDword1()); EXPECT_EQ(expectedGpuVa, cmdSdi->getAddress()); EXPECT_TRUE(cmdSdi->getStoreQword()); } HWTEST_F(L0DebuggerSimpleTest, givenCanonizedGpuVasWhenProgrammingSbaTrackingThenNonCanonicalAddressesAreStored) { using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; auto debugger = std::make_unique>(neoDevice); debugger->sbaTrackingGpuVa.address = 0x45670000; auto expectedGpuVa = debugger->sbaTrackingGpuVa.address + offsetof(SbaTrackedAddresses, GeneralStateBaseAddress); StackVec buffer(4096); NEO::LinearStream cmdStream(buffer.begin(), buffer.size()); uint64_t gsba = 0xffff800000060000; uint64_t ssba = 0xffff801234567000; uint64_t iba = 0xffff8000fff80000; uint64_t ioba = 0xffff800008100000; uint64_t dsba = 0xffff8000aaaa0000; NEO::Debugger::SbaAddresses sbaAddresses = {}; sbaAddresses.GeneralStateBaseAddress = gsba; sbaAddresses.SurfaceStateBaseAddress = ssba; sbaAddresses.InstructionBaseAddress = iba; sbaAddresses.IndirectObjectBaseAddress = ioba; sbaAddresses.DynamicStateBaseAddress = dsba; sbaAddresses.BindlessSurfaceStateBaseAddress = ssba; debugger->programSbaTrackingCommands(cmdStream, sbaAddresses); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, cmdStream.getCpuBase(), cmdStream.getUsed())); EXPECT_EQ(6 * sizeof(MI_STORE_DATA_IMM), cmdStream.getUsed()); auto sdiItor = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), sdiItor); auto cmdSdi = genCmdCast(*sdiItor); auto decanonizedAddress = GmmHelper::decanonize(gsba); EXPECT_EQ(static_cast(decanonizedAddress & 0x0000FFFFFFFFULL), cmdSdi->getDataDword0()); EXPECT_EQ(static_cast(decanonizedAddress >> 32), cmdSdi->getDataDword1()); EXPECT_EQ(expectedGpuVa, cmdSdi->getAddress()); EXPECT_TRUE(cmdSdi->getStoreQword()); sdiItor++; cmdSdi = genCmdCast(*sdiItor); expectedGpuVa = debugger->sbaTrackingGpuVa.address + offsetof(SbaTrackedAddresses, SurfaceStateBaseAddress); decanonizedAddress = GmmHelper::decanonize(ssba); EXPECT_EQ(static_cast(decanonizedAddress & 0x0000FFFFFFFFULL), cmdSdi->getDataDword0()); EXPECT_EQ(static_cast(decanonizedAddress >> 32), cmdSdi->getDataDword1()); EXPECT_EQ(expectedGpuVa, cmdSdi->getAddress()); EXPECT_TRUE(cmdSdi->getStoreQword()); sdiItor++; cmdSdi = genCmdCast(*sdiItor); expectedGpuVa = debugger->sbaTrackingGpuVa.address + offsetof(SbaTrackedAddresses, DynamicStateBaseAddress); decanonizedAddress = GmmHelper::decanonize(dsba); EXPECT_EQ(static_cast(decanonizedAddress & 0x0000FFFFFFFFULL), cmdSdi->getDataDword0()); EXPECT_EQ(static_cast(decanonizedAddress >> 32), cmdSdi->getDataDword1()); EXPECT_EQ(expectedGpuVa, cmdSdi->getAddress()); EXPECT_TRUE(cmdSdi->getStoreQword()); sdiItor++; cmdSdi = genCmdCast(*sdiItor); expectedGpuVa = debugger->sbaTrackingGpuVa.address + offsetof(SbaTrackedAddresses, IndirectObjectBaseAddress); decanonizedAddress = GmmHelper::decanonize(ioba); EXPECT_EQ(static_cast(decanonizedAddress & 0x0000FFFFFFFFULL), cmdSdi->getDataDword0()); EXPECT_EQ(static_cast(decanonizedAddress >> 32), cmdSdi->getDataDword1()); EXPECT_EQ(expectedGpuVa, cmdSdi->getAddress()); EXPECT_TRUE(cmdSdi->getStoreQword()); sdiItor++; cmdSdi = genCmdCast(*sdiItor); expectedGpuVa = debugger->sbaTrackingGpuVa.address + offsetof(SbaTrackedAddresses, InstructionBaseAddress); decanonizedAddress = GmmHelper::decanonize(iba); EXPECT_EQ(static_cast(decanonizedAddress & 0x0000FFFFFFFFULL), cmdSdi->getDataDword0()); EXPECT_EQ(static_cast(decanonizedAddress >> 32), cmdSdi->getDataDword1()); EXPECT_EQ(expectedGpuVa, cmdSdi->getAddress()); EXPECT_TRUE(cmdSdi->getStoreQword()); sdiItor++; cmdSdi = genCmdCast(*sdiItor); expectedGpuVa = debugger->sbaTrackingGpuVa.address + offsetof(SbaTrackedAddresses, BindlessSurfaceStateBaseAddress); decanonizedAddress = GmmHelper::decanonize(ssba); EXPECT_EQ(static_cast(decanonizedAddress & 0x0000FFFFFFFFULL), cmdSdi->getDataDword0()); EXPECT_EQ(static_cast(decanonizedAddress >> 32), cmdSdi->getDataDword1()); EXPECT_EQ(expectedGpuVa, cmdSdi->getAddress()); EXPECT_TRUE(cmdSdi->getStoreQword()); } HWTEST_F(L0DebuggerSimpleTest, givenZeroGpuVasWhenProgrammingSbaTrackingThenStreamIsNotUsed) { using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; auto debugger = std::make_unique>(neoDevice); debugger->sbaTrackingGpuVa.address = 0x45670000; StackVec buffer(4096); NEO::LinearStream cmdStream(buffer.begin(), buffer.size()); uint64_t gsba = 0; uint64_t ssba = 0; NEO::Debugger::SbaAddresses sbaAddresses = {}; sbaAddresses.GeneralStateBaseAddress = gsba; sbaAddresses.SurfaceStateBaseAddress = ssba; debugger->programSbaTrackingCommands(cmdStream, sbaAddresses); EXPECT_EQ(0u, cmdStream.getUsed()); } HWTEST_F(L0DebuggerSimpleTest, whenAllocateCalledThenDebuggerIsCreated) { auto debugger = DebuggerL0Hw::allocate(neoDevice); EXPECT_NE(nullptr, debugger); delete debugger; } HWTEST_F(L0DebuggerSimpleTest, givenNotChangedSurfaceStateWhenCapturingSBAThenNoTrackingCmdsAreAdded) { auto debugger = std::make_unique>(neoDevice); debugger->sbaTrackingGpuVa.address = 0x45670000; NEO::CommandContainer container; container.initialize(neoDevice, nullptr, true); NEO::Debugger::SbaAddresses sba = {}; sba.SurfaceStateBaseAddress = 0x123456000; debugger->captureStateBaseAddress(container, sba); auto sizeUsed = container.getCommandStream()->getUsed(); EXPECT_NE(0u, sizeUsed); sba.SurfaceStateBaseAddress = 0; debugger->captureStateBaseAddress(container, sba); auto sizeUsed2 = container.getCommandStream()->getUsed(); EXPECT_EQ(sizeUsed, sizeUsed2); } HWTEST_F(L0DebuggerSimpleTest, givenChangedBaseAddressesWhenCapturingSBAThenNoTrackingCmdsAreAdded) { auto debugger = std::make_unique>(neoDevice); debugger->sbaTrackingGpuVa.address = 0x45670000; { NEO::CommandContainer container; container.initialize(neoDevice, nullptr, true); NEO::Debugger::SbaAddresses sba = {}; sba.SurfaceStateBaseAddress = 0x123456000; debugger->captureStateBaseAddress(container, sba); auto sizeUsed = container.getCommandStream()->getUsed(); EXPECT_NE(0u, sizeUsed); } { NEO::CommandContainer container; container.initialize(neoDevice, nullptr, true); NEO::Debugger::SbaAddresses sba = {}; sba.GeneralStateBaseAddress = 0x123456000; debugger->captureStateBaseAddress(container, sba); auto sizeUsed = container.getCommandStream()->getUsed(); EXPECT_NE(0u, sizeUsed); } { NEO::CommandContainer container; container.initialize(neoDevice, nullptr, true); NEO::Debugger::SbaAddresses sba = {}; sba.BindlessSurfaceStateBaseAddress = 0x123456000; debugger->captureStateBaseAddress(container, sba); auto sizeUsed = container.getCommandStream()->getUsed(); EXPECT_NE(0u, sizeUsed); } } HWTEST_F(L0DebuggerTest, givenDebuggerWhenCreatedThenModuleHeapDebugAreaIsCreated) { auto mockBlitMemoryToAllocation = [](const NEO::Device &device, NEO::GraphicsAllocation *memory, size_t offset, const void *hostPtr, Vec3 size) -> NEO::BlitOperationResult { memcpy(memory->getUnderlyingBuffer(), hostPtr, size.x); return BlitOperationResult::Success; }; VariableBackup blitMemoryToAllocationFuncBackup( &NEO::BlitHelperFunctions::blitMemoryToAllocation, mockBlitMemoryToAllocation); memoryOperationsHandler->makeResidentCalledCount = 0; auto debugger = std::make_unique>(neoDevice); auto debugArea = debugger->getModuleDebugArea(); EXPECT_EQ(1, memoryOperationsHandler->makeResidentCalledCount); auto allocation = neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties( {neoDevice->getRootDeviceIndex(), 4096, NEO::AllocationType::KERNEL_ISA, neoDevice->getDeviceBitfield()}); EXPECT_EQ(allocation->storageInfo.getMemoryBanks(), debugArea->storageInfo.getMemoryBanks()); DebugAreaHeader *header = reinterpret_cast(debugArea->getUnderlyingBuffer()); EXPECT_EQ(1u, header->pgsize); uint64_t isShared = debugArea->storageInfo.getNumBanks() == 1 ? 1 : 0; EXPECT_EQ(isShared, header->isShared); EXPECT_STREQ("dbgarea", header->magic); EXPECT_EQ(sizeof(DebugAreaHeader), header->size); EXPECT_EQ(sizeof(DebugAreaHeader), header->scratchBegin); EXPECT_EQ(MemoryConstants::pageSize64k - sizeof(DebugAreaHeader), header->scratchEnd); neoDevice->getMemoryManager()->freeGraphicsMemory(allocation); } HWTEST_F(L0DebuggerTest, givenBindlessSipWhenModuleHeapDebugAreaIsCreatedThenReservedFieldIsSet) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.UseBindlessDebugSip.set(1); auto mockBlitMemoryToAllocation = [](const NEO::Device &device, NEO::GraphicsAllocation *memory, size_t offset, const void *hostPtr, Vec3 size) -> NEO::BlitOperationResult { memcpy(memory->getUnderlyingBuffer(), hostPtr, size.x); return BlitOperationResult::Success; }; VariableBackup blitMemoryToAllocationFuncBackup( &NEO::BlitHelperFunctions::blitMemoryToAllocation, mockBlitMemoryToAllocation); memoryOperationsHandler->makeResidentCalledCount = 0; auto debugger = std::make_unique>(neoDevice); auto debugArea = debugger->getModuleDebugArea(); DebugAreaHeader *header = reinterpret_cast(debugArea->getUnderlyingBuffer()); EXPECT_EQ(1u, header->reserved1); } HWTEST_F(L0DebuggerTest, givenBindfulSipWhenModuleHeapDebugAreaIsCreatedThenReservedFieldIsNotSet) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.UseBindlessDebugSip.set(0); auto mockBlitMemoryToAllocation = [](const NEO::Device &device, NEO::GraphicsAllocation *memory, size_t offset, const void *hostPtr, Vec3 size) -> NEO::BlitOperationResult { memcpy(memory->getUnderlyingBuffer(), hostPtr, size.x); return BlitOperationResult::Success; }; VariableBackup blitMemoryToAllocationFuncBackup( &NEO::BlitHelperFunctions::blitMemoryToAllocation, mockBlitMemoryToAllocation); memoryOperationsHandler->makeResidentCalledCount = 0; auto debugger = std::make_unique>(neoDevice); auto debugArea = debugger->getModuleDebugArea(); DebugAreaHeader *header = reinterpret_cast(debugArea->getUnderlyingBuffer()); EXPECT_EQ(0u, header->reserved1); } TEST(Debugger, givenNonLegacyDebuggerWhenInitializingDeviceCapsThenUnrecoverableIsCalled) { class MockDebugger : public NEO::Debugger { public: MockDebugger() { isLegacyMode = false; } void captureStateBaseAddress(CommandContainer &container, SbaAddresses sba) override{}; }; auto executionEnvironment = new NEO::ExecutionEnvironment(); auto mockBuiltIns = new MockBuiltins(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->builtins.reset(mockBuiltIns); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); auto debugger = new MockDebugger; executionEnvironment->rootDeviceEnvironments[0]->debugger.reset(debugger); executionEnvironment->initializeMemoryManager(); EXPECT_THROW(NEO::MockDevice::create(executionEnvironment, 0u), std::exception); } using NotXeHPOrDG2 = AreNotGfxCores; HWTEST2_F(L0DebuggerTest, givenNotAtsOrDg2AndDebugIsActiveThenDisableL3CacheInGmmHelperIsNotSet, NotXeHPOrDG2) { EXPECT_FALSE(static_cast(neoDevice->getGmmHelper())->allResourcesUncached); } using ATSOrDG2 = IsWithinGfxCore; HWTEST2_F(L0DebuggerTest, givenAtsOrDg2AndDebugIsActiveThenDisableL3CacheInGmmHelperIsSet, ATSOrDG2) { EXPECT_TRUE(static_cast(neoDevice->getGmmHelper())->allResourcesUncached); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_2.cpp000066400000000000000000000371351422164147700332530ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/image/image_hw.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h" #include "level_zero/core/test/unit_tests/mocks/mock_kernel.h" #include "level_zero/core/test/unit_tests/sources/debugger/l0_debugger_fixture.h" namespace L0 { namespace ult { using L0DebuggerTest = Test; struct L0DebuggerInternalUsageTest : public L0DebuggerTest { void SetUp() override { VariableBackup backupHwInfo(defaultHwInfo.get()); defaultHwInfo->capabilityTable.blitterOperationsSupported = true; L0DebuggerTest::SetUp(); } }; HWTEST_F(L0DebuggerInternalUsageTest, givenFlushTaskSubmissionEnabledWhenCommandListIsInititalizedOrResetThenCaptureSbaIsNotCalled) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true); size_t usedSpaceBefore = 0; ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue = ZE_RESULT_SUCCESS; auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, true, NEO::EngineGroupType::RenderCompute, returnValue); auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); ASSERT_GE(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, commandList->commandContainer.getCommandStream()->getCpuBase(), usedSpaceAfter)); auto sbaItor = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), sbaItor); EXPECT_EQ(0u, getMockDebuggerL0Hw()->captureStateBaseAddressCount); commandList->reset(); EXPECT_EQ(0u, getMockDebuggerL0Hw()->captureStateBaseAddressCount); commandList->destroy(); } HWTEST_F(L0DebuggerInternalUsageTest, givenFlushTaskSubmissionDisabledWhenCommandListIsInititalizedOrResetThenCaptureSbaIsCalled) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(false); size_t usedSpaceBefore = 0; ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue = ZE_RESULT_SUCCESS; auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, true, NEO::EngineGroupType::RenderCompute, returnValue); auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, commandList->commandContainer.getCommandStream()->getCpuBase(), usedSpaceAfter)); auto sbaItor = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), sbaItor); EXPECT_EQ(0u, getMockDebuggerL0Hw()->captureStateBaseAddressCount); commandList->reset(); EXPECT_EQ(0u, getMockDebuggerL0Hw()->captureStateBaseAddressCount); commandList->destroy(); } HWTEST_F(L0DebuggerInternalUsageTest, givenDebuggerLogsDisabledWhenCommandListIsSynchronizedThenSbaAddressesAreNotPrinted) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.DebuggerLogBitmask.set(0); EXPECT_NE(nullptr, device->getL0Debugger()); testing::internal::CaptureStdout(); ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue = ZE_RESULT_SUCCESS; auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, true, NEO::EngineGroupType::RenderCompute, returnValue); commandList->executeCommandListImmediate(false); std::string output = testing::internal::GetCapturedStdout(); size_t pos = output.find("Debugger: SBA"); EXPECT_EQ(std::string::npos, pos); commandList->destroy(); } HWTEST_F(L0DebuggerInternalUsageTest, givenUseCsrImmediateSubmissionEnabledForImmediateCommandListForAppendLaunchKernelThenSuccessIsReturned) { Mock<::L0::Kernel> kernel; DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true); ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue = ZE_RESULT_SUCCESS; ze_group_count_t groupCount{1, 1, 1}; auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::RenderCompute, returnValue); auto result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); commandList->destroy(); } HWTEST_F(L0DebuggerInternalUsageTest, givenUseCsrImmediateSubmissionEnabledWithInternalCommandListForImmediateWhenAppendLaunchKernelThenSuccessIsReturned) { Mock<::L0::Kernel> kernel; DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true); ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue = ZE_RESULT_SUCCESS; ze_group_count_t groupCount{1, 1, 1}; auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, true, NEO::EngineGroupType::RenderCompute, returnValue); auto result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); commandList->destroy(); } HWTEST_F(L0DebuggerInternalUsageTest, givenUseCsrImmediateSubmissionEnabledForImmediateCommandListForAppendLaunchKernelIndirectThenSuccessIsReturned) { Mock<::L0::Kernel> kernel; DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true); ze_command_queue_desc_t queueDesc = {}; queueDesc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; ze_result_t returnValue = ZE_RESULT_SUCCESS; ze_group_count_t groupCount{1, 1, 1}; auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::RenderCompute, returnValue); auto result = commandList->appendLaunchKernelIndirect(kernel.toHandle(), &groupCount, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); commandList->destroy(); } HWTEST_F(L0DebuggerInternalUsageTest, givenUseCsrImmediateSubmissionDisabledForImmediateCommandListForAppendLaunchKernelIndirectThenSuccessIsReturned) { Mock<::L0::Kernel> kernel; DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(false); ze_command_queue_desc_t queueDesc = {}; queueDesc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; ze_result_t returnValue = ZE_RESULT_SUCCESS; ze_group_count_t groupCount{1, 1, 1}; auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::RenderCompute, returnValue); auto result = commandList->appendLaunchKernelIndirect(kernel.toHandle(), &groupCount, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); commandList->destroy(); } HWTEST_F(L0DebuggerInternalUsageTest, givenUseCsrImmediateSubmissionEnabledForImmediateCommandListForAppendMemoryCopyThenSuccessIsReturned) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true); void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x2345); ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue = ZE_RESULT_SUCCESS; auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::RenderCompute, returnValue); auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, 0x100, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); commandList->destroy(); } HWTEST_F(L0DebuggerInternalUsageTest, givenUseCsrImmediateSubmissionDisabledForImmediateCommandListForAppendMemoryCopyThenSuccessIsReturned) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(false); void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x2345); ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue = ZE_RESULT_SUCCESS; auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::RenderCompute, returnValue); auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, 0x100, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); commandList->destroy(); } HWTEST_F(L0DebuggerInternalUsageTest, givenUseCsrImmediateSubmissionEnabledForImmediateCommandListForAppendMemoryCopyRegionThenSuccessIsReturned) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true); void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x2345); uint32_t width = 16; uint32_t height = 16; ze_copy_region_t sr = {0U, 0U, 0U, width, height, 0U}; ze_copy_region_t dr = {0U, 0U, 0U, width, height, 0U}; ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue = ZE_RESULT_SUCCESS; auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::RenderCompute, returnValue); auto result = commandList->appendMemoryCopyRegion(dstPtr, &dr, 0, 0, srcPtr, &sr, 0, 0, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); commandList->destroy(); } HWTEST_F(L0DebuggerInternalUsageTest, givenUseCsrImmediateSubmissionEnabledForRegularCommandListForAppendMemoryCopyRegionThenSuccessIsReturned) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true); void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x2345); uint32_t width = 16; uint32_t height = 16; ze_copy_region_t sr = {0U, 0U, 0U, width, height, 0U}; ze_copy_region_t dr = {0U, 0U, 0U, width, height, 0U}; ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, false, false, returnValue)); ASSERT_NE(nullptr, commandQueue->commandStream); ze_command_list_handle_t commandLists[] = { CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle()}; const uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]); auto commandList = CommandList::fromHandle(commandLists[0]); auto result = commandList->appendMemoryCopyRegion(dstPtr, &dr, 0, 0, srcPtr, &sr, 0, 0, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); EXPECT_EQ(ZE_RESULT_SUCCESS, result); commandQueue->synchronize(0); commandList->destroy(); commandQueue->destroy(); } HWTEST_F(L0DebuggerInternalUsageTest, givenUseCsrImmediateSubmissionDisabledForImmediateCommandListForAppendMemoryCopyRegionThenSuccessIsReturned) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(false); void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x2345); ze_copy_region_t dstRegion = {}; ze_copy_region_t srcRegion = {}; ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue = ZE_RESULT_SUCCESS; auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::RenderCompute, returnValue); auto result = commandList->appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); commandList->destroy(); } HWTEST2_F(L0DebuggerInternalUsageTest, givenUseCsrImmediateSubmissionEnabledCommandListAndAppendMemoryCopyCalledInLoopThenMultipleCommandBufferAreUsedAndSuccessIsReturned, IsAtLeastSkl) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true); void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x2345); ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue = ZE_RESULT_SUCCESS; auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::RenderCompute, returnValue); ASSERT_NE(nullptr, commandList); for (uint32_t count = 0; count < 2048; count++) { auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, 0x100, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); } commandList->destroy(); } HWTEST2_F(L0DebuggerInternalUsageTest, givenUseCsrImmediateSubmissionDisabledCommandListAndAppendMemoryCopyCalledInLoopThenMultipleCommandBufferAreUsedAndSuccessIsReturned, IsAtLeastSkl) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(false); void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x2345); ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue = ZE_RESULT_SUCCESS; auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::RenderCompute, returnValue); ASSERT_NE(nullptr, commandList); for (uint32_t count = 0; count < 2048; count++) { auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, 0x100, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); } commandList->destroy(); } HWTEST2_F(L0DebuggerInternalUsageTest, givenDebuggingEnabledWhenInternalCmdQIsUsedThenDebuggerPathsAreNotExecuted, IsAtLeastSkl) { ze_command_queue_desc_t queueDesc = {}; std::unique_ptr, Deleter> commandQueue(new MockCommandQueueHw(device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc)); commandQueue->initialize(false, true); EXPECT_TRUE(commandQueue->internalUsage); ze_result_t returnValue; ze_command_list_handle_t commandLists[] = { CommandList::createImmediate(productFamily, device, &queueDesc, true, NEO::EngineGroupType::RenderCompute, returnValue)->toHandle()}; uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]); auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto sbaBuffer = device->getL0Debugger()->getSbaTrackingBuffer(neoDevice->getDefaultEngine().commandStreamReceiver->getOsContext().getContextId()); auto sipIsa = NEO::SipKernel::getSipKernel(*neoDevice).getSipAllocation(); auto debugSurface = device->getDebugSurface(); bool sbaFound = false; bool sipFound = false; bool debugSurfaceFound = false; for (auto iter : commandQueue->residencyContainerSnapshot) { if (iter == sbaBuffer) { sbaFound = true; } if (iter == sipIsa) { sipFound = true; } if (iter == debugSurface) { debugSurfaceFound = true; } } EXPECT_FALSE(sbaFound); EXPECT_FALSE(sipFound); EXPECT_FALSE(debugSurfaceFound); EXPECT_EQ(0u, getMockDebuggerL0Hw()->captureStateBaseAddressCount); EXPECT_EQ(0u, getMockDebuggerL0Hw()->programSbaTrackingCommandsCount); EXPECT_EQ(0u, getMockDebuggerL0Hw()->getSbaTrackingCommandsSizeCount); auto commandList = CommandList::fromHandle(commandLists[0]); commandList->destroy(); } } // namespace ult } // namespace L0 test_l0_debugger_single_address_space.cpp000066400000000000000000000414761422164147700371770ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/debugger/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/image/image_hw.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h" #include "level_zero/core/test/unit_tests/mocks/mock_kernel.h" #include "level_zero/core/test/unit_tests/sources/debugger/l0_debugger_fixture.h" namespace L0 { namespace ult { struct SingleAddressSpaceFixture : public Test { void SetUp() override { NEO::DebugManager.flags.DebuggerForceSbaTrackingMode.set(1); Test::SetUp(); } void TearDown() override { Test::TearDown(); } DebugManagerStateRestore restorer; }; struct L0DebuggerSingleAddressSpace : public Test { void SetUp() override { NEO::DebugManager.flags.DebuggerForceSbaTrackingMode.set(1); Test::SetUp(); } void TearDown() override { Test::TearDown(); } DebugManagerStateRestore restorer; }; HWTEST_F(SingleAddressSpaceFixture, givenDebugFlagForceSbaTrackingModeSetWhenDebuggerIsCreatedThenItHasCorrectSingleAddressSpaceValue) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.DebuggerForceSbaTrackingMode.set(1); auto debugger = std::make_unique>(neoDevice); EXPECT_TRUE(debugger->singleAddressSpaceSbaTracking); NEO::DebugManager.flags.DebuggerForceSbaTrackingMode.set(0); debugger = std::make_unique>(neoDevice); EXPECT_FALSE(debugger->singleAddressSpaceSbaTracking); } HWTEST_F(SingleAddressSpaceFixture, givenSingleAddressSpaceWhenDebuggerIsCreatedThenSbaTrackingGpuVaIsNotReserved) { auto debugger = std::make_unique>(neoDevice); EXPECT_EQ(0u, debugger->sbaTrackingGpuVa.address); EXPECT_EQ(0u, debugger->sbaTrackingGpuVa.size); EXPECT_EQ(0u, debugger->getSbaTrackingGpuVa()); std::vector allocations; auto &allEngines = device->getNEODevice()->getMemoryManager()->getRegisteredEngines(); for (auto &engine : allEngines) { auto sbaAllocation = debugger->getSbaTrackingBuffer(engine.osContext->getContextId()); ASSERT_NE(nullptr, sbaAllocation); allocations.push_back(sbaAllocation); EXPECT_EQ(NEO::AllocationType::DEBUG_SBA_TRACKING_BUFFER, sbaAllocation->getAllocationType()); } for (uint32_t i = 0; i < allocations.size() - 1; i++) { EXPECT_NE(allocations[i]->getGpuAddress(), allocations[i + 1]->getGpuAddress()); } } HWTEST2_F(SingleAddressSpaceFixture, WhenProgrammingSbaTrackingCommandsForSingleAddressSpaceThenAbortIsCalledAndNoCommandsAreAddedToStream, IsAtMostGen11) { auto debugger = std::make_unique>(neoDevice); using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; using MI_ARB_CHECK = typename FamilyType::MI_ARB_CHECK; using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM; using MI_MATH = typename FamilyType::MI_MATH; StackVec buffer(4096); NEO::LinearStream cmdStream(buffer.begin(), buffer.size()); uint64_t gsba = 0x60000; uint64_t ssba = 0x1234567000; uint64_t iba = 0xfff80000; uint64_t ioba = 0x8100000; uint64_t dsba = 0xffff0000aaaa0000; NEO::Debugger::SbaAddresses sbaAddresses = {}; sbaAddresses.GeneralStateBaseAddress = gsba; sbaAddresses.SurfaceStateBaseAddress = ssba; sbaAddresses.InstructionBaseAddress = iba; sbaAddresses.IndirectObjectBaseAddress = ioba; sbaAddresses.DynamicStateBaseAddress = dsba; sbaAddresses.BindlessSurfaceStateBaseAddress = ssba; EXPECT_THROW(debugger->programSbaTrackingCommandsSingleAddressSpace(cmdStream, sbaAddresses), std::exception); EXPECT_EQ(0u, cmdStream.getUsed()); EXPECT_THROW(debugger->getSbaTrackingCommandsSize(6), std::exception); } HWTEST2_F(SingleAddressSpaceFixture, GivenNonZeroSbaAddressesWhenProgrammingSbaTrackingCommandsForSingleAddressSpaceThenCorrectSequenceOfCommandsAreAddedToStream, IsAtLeastGen12lp) { auto debugger = std::make_unique>(neoDevice); using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; using MI_ARB_CHECK = typename FamilyType::MI_ARB_CHECK; using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM; using MI_MATH = typename FamilyType::MI_MATH; using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; AllocationProperties commandBufferProperties = {device->getRootDeviceIndex(), true, MemoryConstants::pageSize, AllocationType::COMMAND_BUFFER, false, device->getNEODevice()->getDeviceBitfield()}; auto streamAllocation = device->getNEODevice()->getMemoryManager()->allocateGraphicsMemoryWithProperties(commandBufferProperties); ASSERT_NE(nullptr, streamAllocation); NEO::LinearStream cmdStream; cmdStream.replaceGraphicsAllocation(streamAllocation); cmdStream.replaceBuffer(streamAllocation->getUnderlyingBuffer(), streamAllocation->getUnderlyingBufferSize()); uint64_t gsba = 0x60000; uint64_t ssba = 0x1234567000; uint64_t iba = 0xfff80000; uint64_t ioba = 0x8100000; uint64_t dsba = 0xffff0000aaaa0000; NEO::Debugger::SbaAddresses sbaAddresses = {}; sbaAddresses.GeneralStateBaseAddress = gsba; sbaAddresses.SurfaceStateBaseAddress = ssba; sbaAddresses.InstructionBaseAddress = iba; sbaAddresses.IndirectObjectBaseAddress = ioba; sbaAddresses.DynamicStateBaseAddress = dsba; sbaAddresses.BindlessSurfaceStateBaseAddress = ssba; debugger->programSbaTrackingCommandsSingleAddressSpace(cmdStream, sbaAddresses); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, cmdStream.getCpuBase(), cmdStream.getUsed())); size_t sizeExpected = sizeof(MI_ARB_CHECK) + sizeof(MI_BATCH_BUFFER_START); for (int i = 0; i < 6; i++) { sizeExpected += NEO::EncodeSetMMIO::sizeIMM; sizeExpected += NEO::EncodeMath::streamCommandSize; sizeExpected += 2 * sizeof(MI_STORE_REGISTER_MEM); sizeExpected += 2 * sizeof(MI_STORE_DATA_IMM); sizeExpected += sizeof(MI_ARB_CHECK); sizeExpected += sizeof(MI_BATCH_BUFFER_START); sizeExpected += sizeof(MI_STORE_DATA_IMM); } sizeExpected += sizeof(MI_ARB_CHECK) + sizeof(MI_BATCH_BUFFER_START); EXPECT_EQ(sizeExpected, cmdStream.getUsed()); EXPECT_EQ(sizeExpected, debugger->getSbaTrackingCommandsSize(6)); auto itor = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itor); itor = find(itor, cmdList.end()); ASSERT_NE(cmdList.end(), itor); for (int i = 0; i < 6; i++) { itor = find(itor, cmdList.end()); ASSERT_NE(cmdList.end(), itor); auto lri = genCmdCast(*itor); EXPECT_EQ(CS_GPR_R0, lri->getRegisterOffset()); itor = find(itor, cmdList.end()); ASSERT_NE(cmdList.end(), itor); itor = find(itor, cmdList.end()); ASSERT_NE(cmdList.end(), itor); itor = find(itor, cmdList.end()); ASSERT_NE(cmdList.end(), itor); itor = find(itor, cmdList.end()); ASSERT_NE(cmdList.end(), itor); itor = find(itor, cmdList.end()); ASSERT_NE(cmdList.end(), itor); itor = find(itor, cmdList.end()); ASSERT_NE(cmdList.end(), itor); itor = find(itor, cmdList.end()); ASSERT_NE(cmdList.end(), itor); itor = find(itor, cmdList.end()); ASSERT_NE(cmdList.end(), itor); } itor = find(itor, cmdList.end()); ASSERT_NE(cmdList.end(), itor); itor = find(itor, cmdList.end()); ASSERT_NE(cmdList.end(), itor); auto miArb = genCmdCast(*itor); EXPECT_FALSE(miArb->getPreParserDisable()); device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(streamAllocation); } HWTEST2_F(SingleAddressSpaceFixture, GivenOneNonZeroSbaAddressesWhenProgrammingSbaTrackingCommandsForSingleAddressSpaceThenONlyPartOfCommandsAreAddedToStream, IsAtLeastGen12lp) { auto debugger = std::make_unique>(neoDevice); using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; using MI_ARB_CHECK = typename FamilyType::MI_ARB_CHECK; using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM; using MI_MATH = typename FamilyType::MI_MATH; using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; AllocationProperties commandBufferProperties = {device->getRootDeviceIndex(), true, MemoryConstants::pageSize, AllocationType::COMMAND_BUFFER, false, device->getNEODevice()->getDeviceBitfield()}; auto streamAllocation = device->getNEODevice()->getMemoryManager()->allocateGraphicsMemoryWithProperties(commandBufferProperties); ASSERT_NE(nullptr, streamAllocation); NEO::LinearStream cmdStream; cmdStream.replaceGraphicsAllocation(streamAllocation); cmdStream.replaceBuffer(streamAllocation->getUnderlyingBuffer(), streamAllocation->getUnderlyingBufferSize()); uint64_t ssba = 0x1234567000; NEO::Debugger::SbaAddresses sbaAddresses = {0}; sbaAddresses.SurfaceStateBaseAddress = ssba; debugger->programSbaTrackingCommandsSingleAddressSpace(cmdStream, sbaAddresses); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, cmdStream.getCpuBase(), cmdStream.getUsed())); size_t sizeExpected = sizeof(MI_ARB_CHECK) + sizeof(MI_BATCH_BUFFER_START); sizeExpected += NEO::EncodeSetMMIO::sizeIMM; sizeExpected += NEO::EncodeMath::streamCommandSize; sizeExpected += 2 * sizeof(MI_STORE_REGISTER_MEM); sizeExpected += 2 * sizeof(MI_STORE_DATA_IMM); sizeExpected += sizeof(MI_ARB_CHECK); sizeExpected += sizeof(MI_BATCH_BUFFER_START); sizeExpected += sizeof(MI_STORE_DATA_IMM); sizeExpected += sizeof(MI_ARB_CHECK) + sizeof(MI_BATCH_BUFFER_START); EXPECT_EQ(sizeExpected, cmdStream.getUsed()); EXPECT_EQ(sizeExpected, debugger->getSbaTrackingCommandsSize(1)); auto itor = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itor); itor = find(itor, cmdList.end()); ASSERT_NE(cmdList.end(), itor); itor = find(itor, cmdList.end()); ASSERT_NE(cmdList.end(), itor); auto lri = genCmdCast(*itor); EXPECT_EQ(CS_GPR_R0, lri->getRegisterOffset()); itor = find(itor, cmdList.end()); ASSERT_NE(cmdList.end(), itor); itor = find(itor, cmdList.end()); ASSERT_NE(cmdList.end(), itor); itor = find(itor, cmdList.end()); ASSERT_NE(cmdList.end(), itor); itor = find(itor, cmdList.end()); ASSERT_NE(cmdList.end(), itor); itor = find(itor, cmdList.end()); ASSERT_NE(cmdList.end(), itor); itor = find(itor, cmdList.end()); ASSERT_NE(cmdList.end(), itor); itor = find(itor, cmdList.end()); ASSERT_NE(cmdList.end(), itor); itor = find(itor, cmdList.end()); ASSERT_NE(cmdList.end(), itor); itor = find(itor, cmdList.end()); ASSERT_NE(cmdList.end(), itor); itor = find(itor, cmdList.end()); ASSERT_NE(cmdList.end(), itor); auto miArb = genCmdCast(*itor); EXPECT_FALSE(miArb->getPreParserDisable()); device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(streamAllocation); } HWTEST2_F(SingleAddressSpaceFixture, GivenAllZeroSbaAddressesWhenProgrammingSbaTrackingCommandsForSingleAddressSpaceThenNoCommandsAreAddedToStream, IsAtLeastGen12lp) { auto debugger = std::make_unique>(neoDevice); AllocationProperties commandBufferProperties = {device->getRootDeviceIndex(), true, MemoryConstants::pageSize, AllocationType::COMMAND_BUFFER, false, device->getNEODevice()->getDeviceBitfield()}; auto streamAllocation = device->getNEODevice()->getMemoryManager()->allocateGraphicsMemoryWithProperties(commandBufferProperties); ASSERT_NE(nullptr, streamAllocation); NEO::LinearStream cmdStream; cmdStream.replaceGraphicsAllocation(streamAllocation); cmdStream.replaceBuffer(streamAllocation->getUnderlyingBuffer(), streamAllocation->getUnderlyingBufferSize()); NEO::Debugger::SbaAddresses sbaAddresses = {0}; debugger->programSbaTrackingCommandsSingleAddressSpace(cmdStream, sbaAddresses); size_t sizeExpected = 0; EXPECT_EQ(sizeExpected, cmdStream.getUsed()); device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(streamAllocation); } HWTEST2_F(L0DebuggerSingleAddressSpace, givenDebuggingEnabledWhenCommandListIsExecutedThenValidKernelDebugCommandsAreAdded, IsAtLeastGen12lp) { using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; using STATE_SIP = typename FamilyType::STATE_SIP; ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, false, false, returnValue)); ASSERT_NE(nullptr, commandQueue->commandStream); auto usedSpaceBefore = commandQueue->commandStream->getUsed(); ze_command_list_handle_t commandLists[] = { CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle()}; uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]); auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandQueue->commandStream->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter)); auto miLoadImm = findAll(cmdList.begin(), cmdList.end()); size_t gpr15RegisterCount = 0; size_t gprMiLoadindex = std::numeric_limits::max(); for (size_t i = 0; i < miLoadImm.size(); i++) { MI_LOAD_REGISTER_IMM *miLoad = genCmdCast(*miLoadImm[i]); ASSERT_NE(nullptr, miLoad); if (miLoad->getRegisterOffset() == CS_GPR_R15) { gpr15RegisterCount++; gprMiLoadindex = i; } if (miLoad->getRegisterOffset() == CS_GPR_R15 + 4) { gpr15RegisterCount++; } } // 2 LRI commands to store SBA buffer address EXPECT_EQ(2u, gpr15RegisterCount); auto sbaGpuVa = getMockDebuggerL0Hw()->getSbaTrackingBuffer(commandQueue->getCsr()->getOsContext().getContextId())->getGpuAddress(); uint32_t low = sbaGpuVa & 0xffffffff; uint32_t high = (sbaGpuVa >> 32) & 0xffffffff; MI_LOAD_REGISTER_IMM *miLoad = genCmdCast(*miLoadImm[gprMiLoadindex]); EXPECT_EQ(CS_GPR_R15, miLoad->getRegisterOffset()); EXPECT_EQ(low, miLoad->getDataDword()); miLoad = genCmdCast(*miLoadImm[gprMiLoadindex + 1]); EXPECT_EQ(CS_GPR_R15 + 4, miLoad->getRegisterOffset()); EXPECT_EQ(high, miLoad->getDataDword()); for (auto i = 0u; i < numCommandLists; i++) { auto commandList = CommandList::fromHandle(commandLists[i]); commandList->destroy(); } commandQueue->destroy(); } } // namespace ult } // namespace L0 test_module_with_debug.cpp000066400000000000000000001170351422164147700342600ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/debugger/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device_binary_format/patchtokens_decoder.h" #include "shared/source/kernel/kernel_descriptor_from_patchtokens.h" #include "shared/source/program/kernel_info.h" #include "shared/source/program/kernel_info_from_patchtokens.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/mocks/mock_compilers.h" #include "shared/test/common/mocks/mock_elf.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/compiler_interface/linker_mock.h" #include "level_zero/core/source/module/module_imp.h" #include "level_zero/core/test/unit_tests/fixtures/module_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_l0_debugger.h" #include "level_zero/core/test/unit_tests/mocks/mock_module.h" #include "level_zero/core/test/unit_tests/sources/debugger/l0_debugger_fixture.h" #include "active_debugger_fixture.h" namespace L0 { namespace ult { using DeviceWithDebuggerEnabledTest = Test; TEST_F(DeviceWithDebuggerEnabledTest, givenDebuggingEnabledWhenModuleIsCreatedThenDebugOptionsAreUsed) { NEO::MockCompilerEnableGuard mock(true); auto cip = new NEO::MockCompilerInterfaceCaptureBuildOptions(); device->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]->compilerInterface.reset(cip); debugger->isOptDisabled = true; uint8_t binary[10]; ze_module_desc_t moduleDesc = {}; moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; moduleDesc.pInputModule = binary; moduleDesc.inputSize = 10; ModuleBuildLog *moduleBuildLog = nullptr; auto module = std::unique_ptr(new L0::ModuleImp(deviceL0, moduleBuildLog, ModuleType::User)); ASSERT_NE(nullptr, module.get()); module->initialize(&moduleDesc, device); EXPECT_TRUE(CompilerOptions::contains(cip->buildInternalOptions, L0::BuildOptions::debugKernelEnable)); EXPECT_TRUE(CompilerOptions::contains(cip->buildOptions, NEO::CompilerOptions::generateDebugInfo)); EXPECT_TRUE(CompilerOptions::contains(cip->buildOptions, L0::BuildOptions::optDisable)); }; TEST_F(DeviceWithDebuggerEnabledTest, GivenDebugVarDebuggerOptDisableZeroWhenOptDisableIsTrueFromDebuggerThenOptDisableIsNotAdded) { DebugManagerStateRestore dgbRestorer; NEO::DebugManager.flags.DebuggerOptDisable.set(0); NEO::MockCompilerEnableGuard mock(true); auto cip = new NEO::MockCompilerInterfaceCaptureBuildOptions(); device->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]->compilerInterface.reset(cip); debugger->isOptDisabled = true; uint8_t binary[10]; ze_module_desc_t moduleDesc = {}; moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; moduleDesc.pInputModule = binary; moduleDesc.inputSize = 10; ModuleBuildLog *moduleBuildLog = nullptr; auto module = std::unique_ptr(new L0::ModuleImp(deviceL0, moduleBuildLog, ModuleType::User)); ASSERT_NE(nullptr, module.get()); module->initialize(&moduleDesc, device); EXPECT_FALSE(CompilerOptions::contains(cip->buildOptions, L0::BuildOptions::optDisable)); }; TEST_F(DeviceWithDebuggerEnabledTest, GivenDebugVarDebuggerOptDisableOneWhenOptDisableIsFalseFromDebuggerThenOptDisableIsAdded) { DebugManagerStateRestore dgbRestorer; NEO::DebugManager.flags.DebuggerOptDisable.set(1); NEO::MockCompilerEnableGuard mock(true); auto cip = new NEO::MockCompilerInterfaceCaptureBuildOptions(); device->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]->compilerInterface.reset(cip); debugger->isOptDisabled = false; uint8_t binary[10]; ze_module_desc_t moduleDesc = {}; moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; moduleDesc.pInputModule = binary; moduleDesc.inputSize = 10; ModuleBuildLog *moduleBuildLog = nullptr; auto module = std::unique_ptr(new L0::ModuleImp(deviceL0, moduleBuildLog, ModuleType::User)); ASSERT_NE(nullptr, module.get()); module->initialize(&moduleDesc, device); EXPECT_TRUE(CompilerOptions::contains(cip->buildOptions, L0::BuildOptions::optDisable)); }; TEST_F(DeviceWithDebuggerEnabledTest, GivenDebuggeableKernelWhenModuleIsInitializedThenDebugEnabledIsTrue) { NEO::MockCompilerEnableGuard mock(true); auto cip = new NEO::MockCompilerInterfaceCaptureBuildOptions(); device->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]->compilerInterface.reset(cip); uint8_t binary[10]; ze_module_desc_t moduleDesc = {}; moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; moduleDesc.pInputModule = binary; moduleDesc.inputSize = 10; ModuleBuildLog *moduleBuildLog = nullptr; auto module = std::make_unique>(deviceL0, moduleBuildLog, ModuleType::User); ASSERT_NE(nullptr, module.get()); NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens; iOpenCL::SKernelBinaryHeaderCommon kernelHeader; kernelTokens.header = &kernelHeader; iOpenCL::SPatchAllocateSystemThreadSurface systemThreadSurface = {}; systemThreadSurface.Offset = 2; systemThreadSurface.PerThreadSystemThreadSurfaceSize = 3; kernelTokens.tokens.allocateSystemThreadSurface = &systemThreadSurface; auto kernelInfo = std::make_unique(); populateKernelInfo(*kernelInfo, kernelTokens, sizeof(size_t)); module->translationUnit->programInfo.kernelInfos.push_back(kernelInfo.release()); module->initialize(&moduleDesc, device); EXPECT_TRUE(module->isDebugEnabled()); } TEST_F(DeviceWithDebuggerEnabledTest, GivenNonDebuggeableKernelWhenModuleIsInitializedThenDebugEnabledIsFalse) { NEO::MockCompilerEnableGuard mock(true); auto cip = new NEO::MockCompilerInterfaceCaptureBuildOptions(); device->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]->compilerInterface.reset(cip); uint8_t binary[10]; ze_module_desc_t moduleDesc = {}; moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; moduleDesc.pInputModule = binary; moduleDesc.inputSize = 10; ModuleBuildLog *moduleBuildLog = nullptr; auto module = std::make_unique>(deviceL0, moduleBuildLog, ModuleType::User); ASSERT_NE(nullptr, module.get()); NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens; iOpenCL::SKernelBinaryHeaderCommon kernelHeader; kernelTokens.header = &kernelHeader; kernelTokens.tokens.allocateSystemThreadSurface = nullptr; auto kernelInfo = std::make_unique(); populateKernelInfo(*kernelInfo, kernelTokens, sizeof(size_t)); module->translationUnit->programInfo.kernelInfos.push_back(kernelInfo.release()); module->initialize(&moduleDesc, device); EXPECT_FALSE(module->isDebugEnabled()); } using ModuleWithSLDTest = Test; TEST_F(ModuleWithSLDTest, GivenNoDebugDataWhenInitializingModuleThenRelocatedDebugDataIsNotCreated) { NEO::MockCompilerEnableGuard mock(true); auto cip = new NEO::MockCompilerInterfaceCaptureBuildOptions(); neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]->compilerInterface.reset(cip); auto debugger = new MockActiveSourceLevelDebugger(new MockOsLibrary); neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->debugger.reset(debugger); uint8_t binary[10]; ze_module_desc_t moduleDesc = {}; moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; moduleDesc.pInputModule = binary; moduleDesc.inputSize = 10; ModuleBuildLog *moduleBuildLog = nullptr; std::unique_ptr module = std::make_unique(device, moduleBuildLog, ModuleType::User); module->translationUnit = std::make_unique(device); uint32_t kernelHeap = 0; auto kernelInfo = new KernelInfo(); kernelInfo->heapInfo.KernelHeapSize = 1; kernelInfo->heapInfo.pKernelHeap = &kernelHeap; Mock<::L0::Kernel> kernel; kernel.module = module.get(); kernel.immutableData.kernelInfo = kernelInfo; kernel.immutableData.surfaceStateHeapSize = 64; kernel.immutableData.surfaceStateHeapTemplate.reset(new uint8_t[64]); kernelInfo->kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful = 0; module->kernelImmData = &kernel.immutableData; module->translationUnit->programInfo.kernelInfos.push_back(kernelInfo); EXPECT_EQ(nullptr, module->translationUnit->debugData.get()); auto result = module->initialize(&moduleDesc, neoDevice); EXPECT_TRUE(result); EXPECT_EQ(nullptr, kernelInfo->kernelDescriptor.external.relocatedDebugData); } TEST_F(ModuleWithSLDTest, GivenDebugDataWithSingleRelocationWhenInitializingModuleThenRelocatedDebugDataIsNotCreated) { NEO::MockCompilerEnableGuard mock(true); auto cip = new NEO::MockCompilerInterfaceCaptureBuildOptions(); neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]->compilerInterface.reset(cip); auto debugger = new MockActiveSourceLevelDebugger(new MockOsLibrary); neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->debugger.reset(debugger); createKernel(); uint8_t binary[10]; ze_module_desc_t moduleDesc = {}; moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; moduleDesc.pInputModule = binary; moduleDesc.inputSize = 10; ModuleBuildLog *moduleBuildLog = nullptr; std::unique_ptr moduleMock = std::make_unique(device, moduleBuildLog, ModuleType::User); moduleMock->translationUnit = std::make_unique(device); uint32_t kernelHeap = 0; auto kernelInfo = new KernelInfo(); kernelInfo->heapInfo.KernelHeapSize = 1; kernelInfo->heapInfo.pKernelHeap = &kernelHeap; Mock<::L0::Kernel> kernelMock; kernelMock.module = moduleMock.get(); kernelMock.immutableData.kernelInfo = kernelInfo; kernelMock.immutableData.surfaceStateHeapSize = 64; kernelMock.immutableData.surfaceStateHeapTemplate.reset(new uint8_t[64]); kernelInfo->kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful = 0; moduleMock->kernelImmData = &kernelMock.immutableData; moduleMock->translationUnit->programInfo.kernelInfos.push_back(kernelInfo); kernelInfo->kernelDescriptor.external.debugData = std::make_unique(); kernelInfo->kernelDescriptor.external.debugData->vIsa = kernel->getKernelDescriptor().external.debugData->vIsa; kernelInfo->kernelDescriptor.external.debugData->vIsaSize = kernel->getKernelDescriptor().external.debugData->vIsaSize; kernelInfo->kernelDescriptor.external.debugData->genIsa = nullptr; kernelInfo->kernelDescriptor.external.debugData->genIsaSize = 0; auto result = moduleMock->initialize(&moduleDesc, neoDevice); EXPECT_TRUE(result); EXPECT_EQ(nullptr, kernelInfo->kernelDescriptor.external.relocatedDebugData); } TEST_F(ModuleWithSLDTest, GivenDebugDataWithMultipleRelocationsWhenInitializingModuleThenRelocatedDebugDataIsCreated) { NEO::MockCompilerEnableGuard mock(true); auto cip = new NEO::MockCompilerInterfaceCaptureBuildOptions(); neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]->compilerInterface.reset(cip); auto debugger = new MockActiveSourceLevelDebugger(new MockOsLibrary); neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->debugger.reset(debugger); uint8_t binary[10]; ze_module_desc_t moduleDesc = {}; moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; moduleDesc.pInputModule = binary; moduleDesc.inputSize = 10; ModuleBuildLog *moduleBuildLog = nullptr; std::unique_ptr moduleMock = std::make_unique(device, moduleBuildLog, ModuleType::User); moduleMock->translationUnit = std::make_unique(device); uint32_t kernelHeap = 0; auto kernelInfo = new KernelInfo(); kernelInfo->heapInfo.KernelHeapSize = 1; kernelInfo->heapInfo.pKernelHeap = &kernelHeap; Mock<::L0::Kernel> kernelMock; kernelMock.module = moduleMock.get(); kernelMock.immutableData.kernelInfo = kernelInfo; kernelInfo->kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful = 0; moduleMock->kernelImmData = &kernelMock.immutableData; moduleMock->translationUnit->programInfo.kernelInfos.push_back(kernelInfo); kernelInfo->kernelDescriptor.external.debugData = std::make_unique(); auto debugData = MockElfEncoder<>::createRelocateableDebugDataElf(); kernelInfo->kernelDescriptor.external.debugData->vIsaSize = static_cast(debugData.size()); kernelInfo->kernelDescriptor.external.debugData->vIsa = reinterpret_cast(debugData.data()); kernelInfo->kernelDescriptor.external.debugData->genIsa = nullptr; kernelInfo->kernelDescriptor.external.debugData->genIsaSize = 0; EXPECT_EQ(nullptr, kernelInfo->kernelDescriptor.external.relocatedDebugData); auto result = moduleMock->initialize(&moduleDesc, neoDevice); EXPECT_TRUE(result); EXPECT_NE(nullptr, kernelInfo->kernelDescriptor.external.relocatedDebugData); } using ModuleWithZebinAndSLDTest = Test; TEST_F(ModuleWithZebinAndSLDTest, GivenZebinThenCreateDebugZebinAndPassToSLD) { module->addEmptyZebin(); auto debugger = new MockActiveSourceLevelDebugger(new MockOsLibrary); neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->debugger.reset(debugger); module->passDebugData(); EXPECT_TRUE(module->translationUnit->debugData); } using KernelDebugSurfaceTest = Test; HWTEST_F(KernelDebugSurfaceTest, givenDebuggerAndBindfulKernelWhenAppendingKernelToCommandListThenBindfulSurfaceStateForDebugSurfaceIsProgrammed) { NEO::MockCompilerEnableGuard mock(true); using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; auto debugger = MockDebuggerL0Hw::allocate(neoDevice); neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->debugger.reset(debugger); auto &hwInfo = *NEO::defaultHwInfo.get(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); auto maxDbgSurfaceSize = hwHelper.getSipKernelMaxDbgSurfaceSize(hwInfo); auto debugSurface = neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties( {device->getRootDeviceIndex(), true, maxDbgSurfaceSize, NEO::AllocationType::DEBUG_CONTEXT_SAVE_AREA, false, false, device->getNEODevice()->getDeviceBitfield()}); static_cast(device)->setDebugSurface(debugSurface); uint8_t binary[10]; ze_module_desc_t moduleDesc = {}; moduleDesc.format = ZE_MODULE_FORMAT_NATIVE; moduleDesc.pInputModule = binary; moduleDesc.inputSize = 10; ModuleBuildLog *moduleBuildLog = nullptr; std::unique_ptr module = std::make_unique(device, moduleBuildLog, ModuleType::User); module->debugEnabled = true; uint32_t kernelHeap = 0; KernelInfo kernelInfo; kernelInfo.heapInfo.KernelHeapSize = 1; kernelInfo.heapInfo.pKernelHeap = &kernelHeap; Mock<::L0::Kernel> kernel; kernel.module = module.get(); kernel.immutableData.kernelInfo = &kernelInfo; ze_kernel_desc_t desc = {}; kernel.immutableData.kernelDescriptor->payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful = sizeof(RENDER_SURFACE_STATE); kernel.immutableData.surfaceStateHeapSize = 2 * sizeof(RENDER_SURFACE_STATE); kernel.immutableData.surfaceStateHeapTemplate.reset(new uint8_t[2 * sizeof(RENDER_SURFACE_STATE)]); module->kernelImmData = &kernel.immutableData; kernel.initialize(&desc); auto debugSurfaceState = reinterpret_cast(kernel.surfaceStateHeapData.get()); debugSurfaceState = ptrOffset(debugSurfaceState, sizeof(RENDER_SURFACE_STATE)); SURFACE_STATE_BUFFER_LENGTH length; length.Length = static_cast(debugSurface->getUnderlyingBufferSize() - 1); EXPECT_EQ(length.SurfaceState.Depth + 1u, debugSurfaceState->getDepth()); EXPECT_EQ(length.SurfaceState.Width + 1u, debugSurfaceState->getWidth()); EXPECT_EQ(length.SurfaceState.Height + 1u, debugSurfaceState->getHeight()); EXPECT_EQ(debugSurface->getGpuAddress(), debugSurfaceState->getSurfaceBaseAddress()); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER, debugSurfaceState->getSurfaceType()); } using ModuleWithDebuggerL0Test = Test; TEST_F(ModuleWithDebuggerL0Test, givenDebuggingEnabledWhenModuleIsCreatedThenDebugOptionsAreNotUsed) { NEO::MockCompilerEnableGuard mock(true); auto cip = new NEO::MockCompilerInterfaceCaptureBuildOptions(); neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->compilerInterface.reset(cip); uint8_t binary[10]; ze_module_desc_t moduleDesc = {}; moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; moduleDesc.pInputModule = binary; moduleDesc.inputSize = 10; ModuleBuildLog *moduleBuildLog = nullptr; auto module = std::unique_ptr(new L0::ModuleImp(device, moduleBuildLog, ModuleType::User)); ASSERT_NE(nullptr, module.get()); module->initialize(&moduleDesc, neoDevice); EXPECT_TRUE(module->isDebugEnabled()); EXPECT_FALSE(CompilerOptions::contains(cip->buildInternalOptions, L0::BuildOptions::debugKernelEnable)); EXPECT_FALSE(CompilerOptions::contains(cip->buildOptions, NEO::CompilerOptions::generateDebugInfo)); EXPECT_FALSE(CompilerOptions::contains(cip->buildOptions, L0::BuildOptions::optDisable)); } using KernelInitializeTest = Test; TEST_F(KernelInitializeTest, givenDebuggingEnabledWhenKernelsAreInitializedThenAllocationsAreNotResidentAndNotCopied) { uint32_t kernelHeap = 0xDEAD; KernelInfo kernelInfo; kernelInfo.heapInfo.KernelHeapSize = 4; kernelInfo.heapInfo.pKernelHeap = &kernelHeap; KernelImmutableData kernelImmutableData(device); memoryOperationsHandler->makeResidentCalledCount = 0; kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false); EXPECT_EQ(0, memoryOperationsHandler->makeResidentCalledCount); auto isa = kernelImmutableData.getIsaGraphicsAllocation()->getUnderlyingBuffer(); EXPECT_NE(0, memcmp(isa, &kernelHeap, sizeof(kernelHeap))); }; HWTEST_F(ModuleWithDebuggerL0Test, GivenDebugDataWithRelocationsWhenInitializingModuleThenRegisterElfWithRelocatedElf) { NEO::MockCompilerEnableGuard mock(true); auto cip = new NEO::MockCompilerInterfaceCaptureBuildOptions(); neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]->compilerInterface.reset(cip); uint8_t binary[10]; ze_module_desc_t moduleDesc = {}; moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; moduleDesc.pInputModule = binary; moduleDesc.inputSize = 10; ModuleBuildLog *moduleBuildLog = nullptr; std::unique_ptr moduleMock = std::make_unique(device, moduleBuildLog, ModuleType::User); moduleMock->translationUnit = std::make_unique(device); uint32_t kernelHeap = 0; auto kernelInfo = new KernelInfo(); kernelInfo->heapInfo.KernelHeapSize = 1; kernelInfo->heapInfo.pKernelHeap = &kernelHeap; Mock<::L0::Kernel> kernelMock; kernelMock.module = moduleMock.get(); kernelMock.immutableData.kernelInfo = kernelInfo; kernelInfo->kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful = 0; moduleMock->kernelImmData = &kernelMock.immutableData; moduleMock->translationUnit->programInfo.kernelInfos.push_back(kernelInfo); kernelInfo->kernelDescriptor.external.debugData = std::make_unique(); auto debugData = MockElfEncoder<>::createRelocateableDebugDataElf(); kernelInfo->kernelDescriptor.external.debugData->vIsaSize = static_cast(debugData.size()); kernelInfo->kernelDescriptor.external.debugData->vIsa = reinterpret_cast(debugData.data()); kernelInfo->kernelDescriptor.external.debugData->genIsa = nullptr; kernelInfo->kernelDescriptor.external.debugData->genIsaSize = 0; EXPECT_EQ(0u, getMockDebuggerL0Hw()->registerElfCount); EXPECT_TRUE(moduleMock->initialize(&moduleDesc, neoDevice)); EXPECT_EQ(1u, getMockDebuggerL0Hw()->registerElfCount); EXPECT_NE(nullptr, kernelInfo->kernelDescriptor.external.relocatedDebugData.get()); EXPECT_EQ(reinterpret_cast(kernelInfo->kernelDescriptor.external.relocatedDebugData.get()), getMockDebuggerL0Hw()->lastReceivedElf); } HWTEST_F(ModuleWithDebuggerL0Test, GivenDebugDataWithoutRelocationsWhenInitializingModuleThenRegisterElfWithUnrelocatedElf) { NEO::MockCompilerEnableGuard mock(true); auto cip = new NEO::MockCompilerInterfaceCaptureBuildOptions(); neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]->compilerInterface.reset(cip); uint8_t binary[10]; ze_module_desc_t moduleDesc = {}; moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; moduleDesc.pInputModule = binary; moduleDesc.inputSize = 10; ModuleBuildLog *moduleBuildLog = nullptr; std::unique_ptr moduleMock = std::make_unique(device, moduleBuildLog, ModuleType::User); moduleMock->translationUnit = std::make_unique(device); uint32_t kernelHeap = 0; auto kernelInfo = new KernelInfo(); kernelInfo->heapInfo.KernelHeapSize = 1; kernelInfo->heapInfo.pKernelHeap = &kernelHeap; Mock<::L0::Kernel> kernelMock; kernelMock.module = moduleMock.get(); kernelMock.immutableData.kernelInfo = kernelInfo; kernelInfo->kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful = 0; moduleMock->kernelImmData = &kernelMock.immutableData; moduleMock->translationUnit->programInfo.kernelInfos.push_back(kernelInfo); kernelInfo->kernelDescriptor.external.debugData = std::make_unique(); std::vector data; data.resize(4); NEO::Elf::ElfEncoder<> elfEncoder; elfEncoder.getElfFileHeader().type = NEO::Elf::SHT_PROGBITS; elfEncoder.appendSection(NEO::Elf::SHT_PROGBITS, NEO::Elf::SectionsNamesZebin::textPrefix, data); auto elfBinary = elfEncoder.encode(); kernelInfo->kernelDescriptor.external.debugData->vIsaSize = static_cast(elfBinary.size()); kernelInfo->kernelDescriptor.external.debugData->vIsa = reinterpret_cast(elfBinary.data()); kernelInfo->kernelDescriptor.external.debugData->genIsa = nullptr; kernelInfo->kernelDescriptor.external.debugData->genIsaSize = 0; EXPECT_EQ(0u, getMockDebuggerL0Hw()->registerElfCount); EXPECT_TRUE(moduleMock->initialize(&moduleDesc, neoDevice)); EXPECT_EQ(1u, getMockDebuggerL0Hw()->registerElfCount); EXPECT_EQ(nullptr, kernelInfo->kernelDescriptor.external.relocatedDebugData.get()); EXPECT_EQ(kernelInfo->kernelDescriptor.external.debugData->vIsa, getMockDebuggerL0Hw()->lastReceivedElf); } HWTEST_F(ModuleWithDebuggerL0Test, GivenNoDebugDataWhenInitializingModuleThenDoNotRegisterElf) { NEO::MockCompilerEnableGuard mock(true); auto cip = new NEO::MockCompilerInterfaceCaptureBuildOptions(); neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]->compilerInterface.reset(cip); uint8_t binary[10]; ze_module_desc_t moduleDesc = {}; moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; moduleDesc.pInputModule = binary; moduleDesc.inputSize = 10; ModuleBuildLog *moduleBuildLog = nullptr; std::unique_ptr moduleMock = std::make_unique(device, moduleBuildLog, ModuleType::User); moduleMock->translationUnit = std::make_unique(device); uint32_t kernelHeap = 0; auto kernelInfo = new KernelInfo(); kernelInfo->heapInfo.KernelHeapSize = 1; kernelInfo->heapInfo.pKernelHeap = &kernelHeap; Mock<::L0::Kernel> kernelMock; kernelMock.module = moduleMock.get(); kernelMock.immutableData.kernelInfo = kernelInfo; kernelInfo->kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful = 0; moduleMock->kernelImmData = &kernelMock.immutableData; moduleMock->translationUnit->programInfo.kernelInfos.push_back(kernelInfo); EXPECT_EQ(0u, getMockDebuggerL0Hw()->registerElfCount); EXPECT_TRUE(moduleMock->initialize(&moduleDesc, neoDevice)); EXPECT_EQ(0u, getMockDebuggerL0Hw()->registerElfCount); } using ModuleWithZebinAndL0DebuggerTest = Test; HWTEST_F(ModuleWithZebinAndL0DebuggerTest, GivenZebinDebugDataWhenInitializingModuleThenRegisterElf) { NEO::MockCompilerEnableGuard mock(true); auto cip = new NEO::MockCompilerInterfaceCaptureBuildOptions(); neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->compilerInterface.reset(cip); uint8_t binary[10]; ze_module_desc_t moduleDesc = {}; moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; moduleDesc.pInputModule = binary; moduleDesc.inputSize = 10; uint32_t kernelHeap = 0; auto kernelInfo = std::make_unique(); kernelInfo->heapInfo.KernelHeapSize = 1; kernelInfo->heapInfo.pKernelHeap = &kernelHeap; auto kernelImmutableData = ::std::make_unique(device); kernelImmutableData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, false); std::unique_ptr moduleMock = std::make_unique(device, nullptr, ModuleType::User); moduleMock->translationUnit = std::make_unique(device); moduleMock->kernelImmDatas.push_back(std::move(kernelImmutableData)); kernelImmutableData = ::std::make_unique(device); kernelImmutableData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, false); moduleMock->kernelImmDatas.push_back(std::move(kernelImmutableData)); auto zebin = ZebinTestData::ValidEmptyProgram(); moduleMock->translationUnit = std::make_unique(device); moduleMock->translationUnit->unpackedDeviceBinarySize = zebin.storage.size(); moduleMock->translationUnit->unpackedDeviceBinary.reset(new char[zebin.storage.size()]); memcpy_s(moduleMock->translationUnit->unpackedDeviceBinary.get(), moduleMock->translationUnit->unpackedDeviceBinarySize, zebin.storage.data(), zebin.storage.size()); EXPECT_EQ(0u, getMockDebuggerL0Hw()->registerElfCount); EXPECT_TRUE(moduleMock->initialize(&moduleDesc, neoDevice)); EXPECT_EQ(2u, getMockDebuggerL0Hw()->registerElfCount); } HWTEST_F(ModuleWithZebinAndL0DebuggerTest, GivenZebinNoDebugDataWhenInitializingModuleThenDoNotRegisterElf) { NEO::MockCompilerEnableGuard mock(true); auto cip = new NEO::MockCompilerInterfaceCaptureBuildOptions(); neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->compilerInterface.reset(cip); uint8_t binary[10]; ze_module_desc_t moduleDesc = {}; moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; moduleDesc.pInputModule = binary; moduleDesc.inputSize = 10; std::unique_ptr moduleMock = std::make_unique(device, nullptr, ModuleType::User); moduleMock->translationUnit = std::make_unique(device); EXPECT_EQ(0u, getMockDebuggerL0Hw()->registerElfCount); EXPECT_TRUE(moduleMock->initialize(&moduleDesc, neoDevice)); EXPECT_EQ(0u, getMockDebuggerL0Hw()->registerElfCount); } HWTEST_F(ModuleWithZebinAndL0DebuggerTest, GivenZebinWhenModuleIsInitializedAndDestroyedThenModuleHandleIsAttachedAndRemoved) { NEO::MockCompilerEnableGuard mock(true); auto cip = new NEO::MockCompilerInterfaceCaptureBuildOptions(); neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->compilerInterface.reset(cip); uint8_t binary[10]; ze_module_desc_t moduleDesc = {}; moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; moduleDesc.pInputModule = binary; moduleDesc.inputSize = 10; uint32_t kernelHeap = 0; auto kernelInfo = std::make_unique(); kernelInfo->heapInfo.KernelHeapSize = 1; kernelInfo->heapInfo.pKernelHeap = &kernelHeap; auto kernelImmutableData = ::std::make_unique(device); kernelImmutableData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, false); std::unique_ptr moduleMock = std::make_unique(device, nullptr, ModuleType::User); moduleMock->translationUnit = std::make_unique(device); moduleMock->kernelImmDatas.push_back(std::move(kernelImmutableData)); auto zebin = ZebinTestData::ValidEmptyProgram(); moduleMock->translationUnit = std::make_unique(device); moduleMock->translationUnit->unpackedDeviceBinarySize = zebin.storage.size(); moduleMock->translationUnit->unpackedDeviceBinary.reset(new char[zebin.storage.size()]); memcpy_s(moduleMock->translationUnit->unpackedDeviceBinary.get(), moduleMock->translationUnit->unpackedDeviceBinarySize, zebin.storage.data(), zebin.storage.size()); getMockDebuggerL0Hw()->moduleHandleToReturn = 6; EXPECT_TRUE(moduleMock->initialize(&moduleDesc, neoDevice)); auto expectedSegmentAllocationCount = 1u; expectedSegmentAllocationCount += moduleMock->translationUnit->globalConstBuffer != nullptr ? 1 : 0; expectedSegmentAllocationCount += moduleMock->translationUnit->globalVarBuffer != nullptr ? 1 : 0; EXPECT_EQ(expectedSegmentAllocationCount, getMockDebuggerL0Hw()->segmentCountWithAttachedModuleHandle); EXPECT_EQ(getMockDebuggerL0Hw()->moduleHandleToReturn, moduleMock->debugModuleHandle); moduleMock->destroy(); moduleMock.release(); EXPECT_EQ(6u, getMockDebuggerL0Hw()->removedZebinModuleHandle); } HWTEST_F(ModuleWithZebinAndL0DebuggerTest, GivenModuleDebugHandleZeroWhenInitializingAndDestoryingModuleThenHandleIsNotPassedToDebugger) { NEO::MockCompilerEnableGuard mock(true); auto cip = new NEO::MockCompilerInterfaceCaptureBuildOptions(); neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->compilerInterface.reset(cip); uint8_t binary[10]; ze_module_desc_t moduleDesc = {}; moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; moduleDesc.pInputModule = binary; moduleDesc.inputSize = 10; uint32_t kernelHeap = 0; auto kernelInfo = std::make_unique(); kernelInfo->heapInfo.KernelHeapSize = 1; kernelInfo->heapInfo.pKernelHeap = &kernelHeap; auto kernelImmutableData = ::std::make_unique(device); kernelImmutableData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, false); std::unique_ptr moduleMock = std::make_unique(device, nullptr, ModuleType::User); moduleMock->translationUnit = std::make_unique(device); moduleMock->kernelImmDatas.push_back(std::move(kernelImmutableData)); auto zebin = ZebinTestData::ValidEmptyProgram(); moduleMock->translationUnit = std::make_unique(device); moduleMock->translationUnit->unpackedDeviceBinarySize = zebin.storage.size(); moduleMock->translationUnit->unpackedDeviceBinary.reset(new char[zebin.storage.size()]); memcpy_s(moduleMock->translationUnit->unpackedDeviceBinary.get(), moduleMock->translationUnit->unpackedDeviceBinarySize, zebin.storage.data(), zebin.storage.size()); getMockDebuggerL0Hw()->moduleHandleToReturn = 0u; EXPECT_TRUE(moduleMock->initialize(&moduleDesc, neoDevice)); EXPECT_EQ(1u, getMockDebuggerL0Hw()->segmentCountWithAttachedModuleHandle); EXPECT_EQ(getMockDebuggerL0Hw()->moduleHandleToReturn, moduleMock->debugModuleHandle); getMockDebuggerL0Hw()->removedZebinModuleHandle = std::numeric_limits::max(); moduleMock->destroy(); moduleMock.release(); EXPECT_EQ(std::numeric_limits::max(), getMockDebuggerL0Hw()->removedZebinModuleHandle); } using NotifyModuleLoadTest = Test; HWTEST_F(NotifyModuleLoadTest, givenDebuggingEnabledWhenModuleIsCreatedAndFullyLinkedThenIsaAllocationsAreCopiedAndResident) { NEO::MockCompilerEnableGuard mock(true); auto cip = new NEO::MockCompilerInterfaceCaptureBuildOptions(); neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->compilerInterface.reset(cip); auto memoryOperationsHandler = new NEO::MockMemoryOperations(); neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->memoryOperationsInterface.reset(memoryOperationsHandler); auto debugger = MockDebuggerL0Hw::allocate(neoDevice); neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->debugger.reset(debugger); std::string testFile; retrieveBinaryKernelFilenameApiSpecific(testFile, binaryFilename + "_", ".bin"); size_t size = 0; auto src = loadDataFromFile(testFile.c_str(), size); ASSERT_NE(0u, size); ASSERT_NE(nullptr, src); ze_module_desc_t moduleDesc = {}; moduleDesc.format = ZE_MODULE_FORMAT_NATIVE; moduleDesc.pInputModule = reinterpret_cast(src.get()); moduleDesc.inputSize = size; ModuleBuildLog *moduleBuildLog = nullptr; auto module = std::unique_ptr(new L0::ModuleImp(device, moduleBuildLog, ModuleType::User)); ASSERT_NE(nullptr, module.get()); memoryOperationsHandler->makeResidentCalledCount = 0; neoDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface.reset(nullptr); module->initialize(&moduleDesc, neoDevice); EXPECT_EQ(4, memoryOperationsHandler->makeResidentCalledCount); for (auto &ki : module->getKernelImmutableDataVector()) { EXPECT_TRUE(ki->isIsaCopiedToAllocation()); } } HWTEST_F(NotifyModuleLoadTest, givenDebuggingEnabledWhenModuleWithUnresolvedSymbolsIsCreatedThenIsaAllocationsAreNotCopiedAndNotResident) { NEO::MockCompilerEnableGuard mock(true); auto cip = new NEO::MockCompilerInterfaceCaptureBuildOptions(); neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->compilerInterface.reset(cip); auto memoryOperationsHandler = new NEO::MockMemoryOperations(); neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->memoryOperationsInterface.reset(memoryOperationsHandler); auto debugger = MockDebuggerL0Hw::allocate(neoDevice); neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->debugger.reset(debugger); std::string testFile; retrieveBinaryKernelFilenameApiSpecific(testFile, binaryFilename + "_", ".bin"); size_t size = 0; auto src = loadDataFromFile(testFile.c_str(), size); ASSERT_NE(0u, size); ASSERT_NE(nullptr, src); ze_module_desc_t moduleDesc = {}; moduleDesc.format = ZE_MODULE_FORMAT_NATIVE; moduleDesc.pInputModule = reinterpret_cast(src.get()); moduleDesc.inputSize = size; ModuleBuildLog *moduleBuildLog = nullptr; auto module = std::unique_ptr(new Module(device, moduleBuildLog, ModuleType::User)); ASSERT_NE(nullptr, module.get()); NEO::Linker::RelocationInfo unresolvedRelocation; unresolvedRelocation.symbolName = "unresolved"; auto linkerInput = std::make_unique<::WhiteBox>(); linkerInput->dataRelocations.push_back(unresolvedRelocation); linkerInput->traits.requiresPatchingOfGlobalVariablesBuffer = true; module->unresolvedExternalsInfo.push_back({unresolvedRelocation}); module->translationUnit->programInfo.linkerInput = std::move(linkerInput); memoryOperationsHandler->makeResidentCalledCount = 0; neoDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface.reset(nullptr); module->initialize(&moduleDesc, neoDevice); EXPECT_EQ(0, memoryOperationsHandler->makeResidentCalledCount); for (auto &ki : module->getKernelImmutableDataVector()) { EXPECT_FALSE(ki->isIsaCopiedToAllocation()); } EXPECT_FALSE(module->isFullyLinked); } HWTEST_F(NotifyModuleLoadTest, givenDebuggingEnabledWhenModuleWithUnresolvedSymbolsIsDynamicallyLinkedThenIsaAllocationsAreCopiedAndMadeResident) { NEO::MockCompilerEnableGuard mock(true); auto cip = new NEO::MockCompilerInterfaceCaptureBuildOptions(); neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->compilerInterface.reset(cip); auto memoryOperationsHandler = new NEO::MockMemoryOperations(); neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->memoryOperationsInterface.reset(memoryOperationsHandler); auto debugger = MockDebuggerL0Hw::allocate(neoDevice); neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->debugger.reset(debugger); std::string testFile; retrieveBinaryKernelFilenameApiSpecific(testFile, binaryFilename + "_", ".bin"); size_t size = 0; auto src = loadDataFromFile(testFile.c_str(), size); ASSERT_NE(0u, size); ASSERT_NE(nullptr, src); ze_module_desc_t moduleDesc = {}; moduleDesc.format = ZE_MODULE_FORMAT_NATIVE; moduleDesc.pInputModule = reinterpret_cast(src.get()); moduleDesc.inputSize = size; ModuleBuildLog *moduleBuildLog = nullptr; auto module = std::unique_ptr(new Module(device, moduleBuildLog, ModuleType::User)); ASSERT_NE(nullptr, module.get()); constexpr uint64_t gpuAddress = 0x12345; constexpr uint32_t offset = 0x20; NEO::Linker::RelocationInfo unresolvedRelocation; unresolvedRelocation.symbolName = "unresolved"; unresolvedRelocation.offset = offset; unresolvedRelocation.type = NEO::Linker::RelocationInfo::Type::Address; NEO::Linker::UnresolvedExternal unresolvedExternal; unresolvedExternal.unresolvedRelocation = unresolvedRelocation; auto linkerInput = std::make_unique<::WhiteBox>(); linkerInput->traits.requiresPatchingOfInstructionSegments = true; linkerInput->relocations.push_back({unresolvedRelocation}); module->getTranslationUnit()->programInfo.linkerInput = std::move(linkerInput); module->unresolvedExternalsInfo.push_back({unresolvedRelocation}); module->unresolvedExternalsInfo[0].instructionsSegmentId = 0u; memoryOperationsHandler->makeResidentCalledCount = 0; module->initialize(&moduleDesc, neoDevice); EXPECT_EQ(0, memoryOperationsHandler->makeResidentCalledCount); for (auto &ki : module->getKernelImmutableDataVector()) { EXPECT_FALSE(ki->isIsaCopiedToAllocation()); } NEO::SymbolInfo symbolInfo{}; NEO::Linker::RelocatedSymbol relocatedSymbol{symbolInfo, gpuAddress}; auto module1 = std::make_unique(device, nullptr, ModuleType::User); module1->symbols[unresolvedRelocation.symbolName] = relocatedSymbol; std::vector hModules = {module->toHandle(), module1->toHandle()}; ze_result_t res = module->performDynamicLink(2, hModules.data(), nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_TRUE(module->isFullyLinked); for (auto &ki : module->getKernelImmutableDataVector()) { EXPECT_TRUE(ki->isIsaCopiedToAllocation()); } EXPECT_EQ(4, memoryOperationsHandler->makeResidentCalledCount); } } // namespace ult } // namespace L0 test_source_level_debugger.cpp000066400000000000000000000323631422164147700351250ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/debugger/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/linear_stream.h" #include "shared/source/gen_common/reg_configs_common.h" #include "shared/source/helpers/preamble.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_compilers.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/cmdqueue/cmdqueue_hw.h" #include "level_zero/core/source/fence/fence.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h" #include #include "active_debugger_fixture.h" #include "gtest/gtest.h" namespace L0 { namespace ult { using CommandQueueDebugCommandsTest = Test; HWTEST2_F(CommandQueueDebugCommandsTest, givenDebuggingEnabledWhenCommandListIsExecutedThenKernelDebugCommandsAreAdded, IsAtMostGen12lp) { NEO::MockCompilerEnableGuard mock(true); ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, deviceL0, device->getDefaultEngine().commandStreamReceiver, &queueDesc, false, false, returnValue)); ASSERT_NE(nullptr, commandQueue->commandStream); auto usedSpaceBefore = commandQueue->commandStream->getUsed(); ze_command_list_handle_t commandLists[] = { CommandList::create(productFamily, deviceL0, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle()}; uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]); auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandQueue->commandStream->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter)); using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; auto miLoadImm = findAll(cmdList.begin(), cmdList.end()); ASSERT_LE(2u, miLoadImm.size()); MI_LOAD_REGISTER_IMM *miLoad = genCmdCast(*miLoadImm[0]); ASSERT_NE(nullptr, miLoad); EXPECT_EQ(DebugModeRegisterOffset::registerOffset, miLoad->getRegisterOffset()); EXPECT_EQ(DebugModeRegisterOffset::debugEnabledValue, miLoad->getDataDword()); miLoad = genCmdCast(*miLoadImm[1]); ASSERT_NE(nullptr, miLoad); EXPECT_EQ(TdDebugControlRegisterOffset::registerOffset, miLoad->getRegisterOffset()); EXPECT_EQ(TdDebugControlRegisterOffset::debugEnabledValue, miLoad->getDataDword()); for (auto i = 0u; i < numCommandLists; i++) { auto commandList = CommandList::fromHandle(commandLists[i]); commandList->destroy(); } commandQueue->destroy(); } HWTEST2_F(CommandQueueDebugCommandsTest, givenDebuggingEnabledWhenCommandListIsExecutedTwiceThenKernelDebugCommandsAreAddedOnlyOnce, IsAtMostGen12lp) { using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; using STATE_SIP = typename FamilyType::STATE_SIP; ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, deviceL0, device->getDefaultEngine().commandStreamReceiver, &queueDesc, false, false, returnValue)); ASSERT_NE(nullptr, commandQueue->commandStream); auto usedSpaceBefore = commandQueue->commandStream->getUsed(); ze_command_list_handle_t commandLists[] = { CommandList::create(productFamily, deviceL0, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle()}; uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]); auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandQueue->commandStream->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); size_t debugModeRegisterCount = 0; size_t tdDebugControlRegisterCount = 0; { GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter)); auto miLoadImm = findAll(cmdList.begin(), cmdList.end()); for (size_t i = 0; i < miLoadImm.size(); i++) { MI_LOAD_REGISTER_IMM *miLoad = genCmdCast(*miLoadImm[i]); ASSERT_NE(nullptr, miLoad); if (miLoad->getRegisterOffset() == DebugModeRegisterOffset::registerOffset) { EXPECT_EQ(DebugModeRegisterOffset::debugEnabledValue, miLoad->getDataDword()); debugModeRegisterCount++; } if (miLoad->getRegisterOffset() == TdDebugControlRegisterOffset::registerOffset) { EXPECT_EQ(TdDebugControlRegisterOffset::debugEnabledValue, miLoad->getDataDword()); tdDebugControlRegisterCount++; } } EXPECT_EQ(1u, debugModeRegisterCount); EXPECT_EQ(1u, tdDebugControlRegisterCount); } result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter2 = commandQueue->commandStream->getUsed(); ASSERT_GT(usedSpaceAfter2, usedSpaceAfter); { GenCmdList cmdList2; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList2, ptrOffset(commandQueue->commandStream->getCpuBase(), usedSpaceAfter), usedSpaceAfter2 - usedSpaceAfter)); auto miLoadImm2 = findAll(cmdList2.begin(), cmdList2.end()); for (size_t i = 0; i < miLoadImm2.size(); i++) { MI_LOAD_REGISTER_IMM *miLoad = genCmdCast(*miLoadImm2[i]); ASSERT_NE(nullptr, miLoad); if (miLoad->getRegisterOffset() == DebugModeRegisterOffset::registerOffset) { debugModeRegisterCount++; } if (miLoad->getRegisterOffset() == TdDebugControlRegisterOffset::registerOffset) { tdDebugControlRegisterCount++; } } EXPECT_EQ(1u, debugModeRegisterCount); EXPECT_EQ(1u, tdDebugControlRegisterCount); } for (auto i = 0u; i < numCommandLists; i++) { auto commandList = CommandList::fromHandle(commandLists[i]); commandList->destroy(); } commandQueue->destroy(); } using SLDebuggerInternalUsageTest = Test; HWTEST2_F(SLDebuggerInternalUsageTest, givenDebuggingEnabledWhenInternalCmdQIsUsedThenDebuggerPathsAreNotExecuted, IsAtLeastSkl) { using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; using STATE_SIP = typename FamilyType::STATE_SIP; ze_command_queue_desc_t queueDesc = {}; device->setPreemptionMode(NEO::PreemptionMode::Disabled); std::unique_ptr, Deleter> commandQueue(new MockCommandQueueHw(deviceL0, device->getDefaultEngine().commandStreamReceiver, &queueDesc)); commandQueue->initialize(false, true); EXPECT_TRUE(commandQueue->internalUsage); ze_result_t returnValue; ze_command_list_handle_t commandLists[] = { CommandList::createImmediate(productFamily, deviceL0, &queueDesc, true, NEO::EngineGroupType::RenderCompute, returnValue)->toHandle()}; uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]); auto usedSpaceBefore = commandQueue->commandStream->getUsed(); auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandQueue->commandStream->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); size_t debugModeRegisterCount = 0; size_t tdDebugControlRegisterCount = 0; { GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter)); auto miLoadImm = findAll(cmdList.begin(), cmdList.end()); for (size_t i = 0; i < miLoadImm.size(); i++) { MI_LOAD_REGISTER_IMM *miLoad = genCmdCast(*miLoadImm[i]); if (miLoad) { if (miLoad->getRegisterOffset() == DebugModeRegisterOffset::registerOffset) { debugModeRegisterCount++; } if (miLoad->getRegisterOffset() == TdDebugControlRegisterOffset::registerOffset) { tdDebugControlRegisterCount++; } } } EXPECT_EQ(0u, debugModeRegisterCount); EXPECT_EQ(0u, tdDebugControlRegisterCount); auto stateSip = findAll(cmdList.begin(), cmdList.end()); EXPECT_EQ(0u, stateSip.size()); } auto sipIsa = NEO::SipKernel::getSipKernel(*device).getSipAllocation(); auto debugSurface = deviceL0->getDebugSurface(); bool sipFound = false; bool debugSurfaceFound = false; for (auto iter : commandQueue->residencyContainerSnapshot) { if (iter == sipIsa) { sipFound = true; } if (iter == debugSurface) { debugSurfaceFound = true; } } EXPECT_FALSE(sipFound); EXPECT_FALSE(debugSurfaceFound); auto commandList = CommandList::fromHandle(commandLists[0]); commandList->destroy(); } using DeviceWithDebuggerEnabledTest = Test; TEST_F(DeviceWithDebuggerEnabledTest, givenDebuggingEnabledWhenDeviceIsCreatedThenItHasDebugSurfaceCreatedWithCorrectAllocationType) { ASSERT_NE(nullptr, deviceL0->getDebugSurface()); EXPECT_EQ(NEO::AllocationType::DEBUG_CONTEXT_SAVE_AREA, deviceL0->getDebugSurface()->getAllocationType()); } TEST_F(DeviceWithDebuggerEnabledTest, givenSldDebuggerWhenGettingL0DebuggerThenNullptrIsReturned) { EXPECT_EQ(nullptr, deviceL0->getL0Debugger()); } struct TwoSubDevicesDebuggerEnabledTest : public ActiveDebuggerFixture, public ::testing::Test { void SetUp() override { // NOLINT(readability-identifier-naming) DebugManager.flags.CreateMultipleSubDevices.set(2); VariableBackup mockDeviceFlagBackup(&MockDevice::createSingleDevice, false); ActiveDebuggerFixture::SetUp(); } void TearDown() override { // NOLINT(readability-identifier-naming) ActiveDebuggerFixture::TearDown(); } DebugManagerStateRestore restorer; }; TEST_F(TwoSubDevicesDebuggerEnabledTest, givenDebuggingEnabledWhenSubDevicesAreCreatedThenDebugSurfaceFromRootDeviceIsSet) { NEO::MockCompilerEnableGuard mock(true); auto subDevice0 = static_cast(deviceL0)->subDevices[0]; auto subDevice1 = static_cast(deviceL0)->subDevices[1]; EXPECT_NE(nullptr, subDevice0->getDebugSurface()); EXPECT_NE(nullptr, subDevice1->getDebugSurface()); EXPECT_EQ(subDevice0->getDebugSurface(), subDevice1->getDebugSurface()); EXPECT_EQ(deviceL0->getDebugSurface(), subDevice0->getDebugSurface()); } TEST_F(TwoSubDevicesDebuggerEnabledTest, givenDebuggingEnabledWhenSubDevicesAreCreatedThenDebugSurfaceIsProperlyInitialized) { NEO::MockCompilerEnableGuard mock(true); auto debugSurface = deviceL0->getDebugSurface(); EXPECT_NE(nullptr, debugSurface); auto &stateSaveAreaHeader = SipKernel::getSipKernel(*deviceL0->getNEODevice()).getStateSaveAreaHeader(); for (auto i = 0u; i < debugSurface->storageInfo.getNumBanks(); ++i) { EXPECT_EQ(0, memcmp(static_cast(debugSurface->getUnderlyingBuffer()) + i * debugSurface->getUnderlyingBufferSize(), stateSaveAreaHeader.data(), stateSaveAreaHeader.size())); } } TEST(Debugger, GivenLegacyDebuggerAndProgramDebuggingEnabledWhenInitializingDriverThenAbortIsCalledAfterPrintingError) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.PrintDebugMessages.set(1); ::testing::internal::CaptureStderr(); auto executionEnvironment = new NEO::ExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->debugger.reset(new MockSourceLevelDebugger()); auto hwInfo = *NEO::defaultHwInfo.get(); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(&hwInfo); executionEnvironment->initializeMemoryManager(); executionEnvironment->setDebuggingEnabled(); auto neoDevice = NEO::MockDevice::create(executionEnvironment, 0u); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); auto driverHandle = std::make_unique>(); driverHandle->enableProgramDebugging = true; EXPECT_THROW(driverHandle->initialize(std::move(devices)), std::exception); std::string output = testing::internal::GetCapturedStderr(); EXPECT_EQ(std::string("Source Level Debugger cannot be used with Environment Variable enabling program debugging.\n"), output); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/deferred_deleter_test.cpp000066400000000000000000000005401422164147700323410ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/deferred_deleter_helper.h" #include "gtest/gtest.h" using namespace NEO; TEST(DeferredDeleterHelper, GivenDeferredDeleterHelperWhenCheckIFDeferrDeleterIsEnabledThenFalseIsReturned) { EXPECT_FALSE(isDeferredDeleterEnabled()); } compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/device/000077500000000000000000000000001422164147700265525ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/device/CMakeLists.txt000066400000000000000000000006261422164147700313160ustar00rootroot00000000000000# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_device.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_device_pci_speed_info.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_device_pci_speed_info.h ) add_subdirectories() compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/device/linux/000077500000000000000000000000001422164147700277115ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/device/linux/CMakeLists.txt000066400000000000000000000005371422164147700324560ustar00rootroot00000000000000# # Copyright (C) 2021-2022 Intel Corporation # # SPDX-License-Identifier: MIT # if(UNIX) target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_device_uuid.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_device_pci_speed_info_linux.cpp ) endif() test_device_pci_speed_info_linux.cpp000066400000000000000000000030621422164147700371020ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/device/linux/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/sources/device/test_device_pci_speed_info.h" #include "gtest/gtest.h" namespace L0 { namespace ult { class PciSpeedInfoTestDriverModel : public MockDriverModel { public: PciSpeedInfoTestDriverModel() : MockDriverModel(0) {} void setExpectedPciSpeedInfo(const PhyicalDevicePciSpeedInfo &pciSpeedInfo) { returnedSpeedInfo = pciSpeedInfo; } NEO::PhyicalDevicePciSpeedInfo getPciSpeedInfo() const override { return returnedSpeedInfo; } NEO::PhysicalDevicePciBusInfo getPciBusInfo() const override { return NEO::PhysicalDevicePciBusInfo(0, 1, 2, 3); } PhyicalDevicePciSpeedInfo returnedSpeedInfo = {-1, -1, -1}; }; void PciSpeedInfoTest::setPciSpeedInfo(NEO::ExecutionEnvironment *executionEnvironment, const NEO::PhyicalDevicePciSpeedInfo &pciSpeedInfo) { executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel( std::make_unique()); PciSpeedInfoTestDriverModel *driverModel = static_cast(executionEnvironment->rootDeviceEnvironments[0]->osInterface->getDriverModel()); driverModel->setExpectedPciSpeedInfo(pciSpeedInfo); } } // namespace ult } // namespace L0 test_device_uuid.cpp000066400000000000000000000162331422164147700336670ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/device/linux/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/root_device.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/libult/linux/drm_mock.h" #include "shared/test/common/mocks/linux/mock_drm_allocation.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/os_interface/linux/sys_calls_linux_ult.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_built_ins.h" #include "gtest/gtest.h" namespace L0 { namespace ult { struct TestDeviceUuid : public ::testing::Test { void SetUp() override {} void TearDown() override {} DebugManagerStateRestore restorer; }; HWTEST2_F(TestDeviceUuid, GivenCorrectTelemetryNodesAreAvailableWhenRetrievingDeviceAndSubDevicePropertiesThenCorrectUuidIsReceived, IsXEHP) { VariableBackup mockReadLink(&NEO::SysCalls::sysCallsReadlink, [](const char *path, char *buf, size_t bufsize) -> int { std::map fileNameLinkMap = { {"/sys/dev/char/226:128", "../../devices/pci0000:37/0000:37:01.0/0000:38:00.0/0000:39:01.0/0000:3a:00.0/drm/renderD128"}, {"/sys/class/intel_pmt/telem3", "./../devices/pci0000:37/0000:37:01.0/0000:38:00.0/0000:39:02.0/0000:3c:00.1/intel-dvsec-2.1.auto/intel_pmt/telem3/"}, {"/sys/class/intel_pmt/telem1", "./../devices/pci0000:37/0000:37:01.0/0000:38:00.0/0000:39:02.0/0000:3c:00.1/intel-dvsec-2.1.auto/intel_pmt/telem1/"}, {"/sys/class/intel_pmt/telem2", "./../devices/pci0000:37/0000:37:01.0/0000:38:00.0/0000:39:02.0/0000:3c:00.1/intel-dvsec-2.1.auto/intel_pmt/telem2/"}, }; auto it = fileNameLinkMap.find(std::string(path)); if (it != fileNameLinkMap.end()) { std::memcpy(buf, it->second.c_str(), it->second.size()); return static_cast(it->second.size()); } return -1; }); VariableBackup mockOpen(&NEO::SysCalls::sysCallsOpen, [](const char *pathname, int flags) -> int { std::vector supportedFiles = { "/sys/class/intel_pmt/telem1/guid", "/sys/class/intel_pmt/telem1/offset", "/sys/class/intel_pmt/telem1/telem", }; auto itr = std::find(supportedFiles.begin(), supportedFiles.end(), std::string(pathname)); if (itr != supportedFiles.end()) { // skipping "0" return static_cast(std::distance(supportedFiles.begin(), itr)) + 1; } return 0; }); VariableBackup mockPread(&NEO::SysCalls::sysCallsPread, [](int fd, void *buf, size_t count, off_t offset) -> ssize_t { std::vector> supportedFiles = { {"/sys/class/intel_pmt/telem1/guid", "0xfdc76195"}, {"/sys/class/intel_pmt/telem1/offset", "0\n"}, {"/sys/class/intel_pmt/telem1/telem", "dummy"}, }; fd -= 1; if ((fd >= 0) && (fd < static_cast(supportedFiles.size()))) { if (supportedFiles[fd].second == "dummy") { uint64_t data = 0xFEEDBEADDEABDEEF; memcpy(buf, &data, sizeof(data)); return sizeof(data); } memcpy(buf, supportedFiles[fd].second.c_str(), supportedFiles[fd].second.size()); return supportedFiles[fd].second.size(); } return -1; }); DebugManager.flags.EnableChipsetUniqueUUID.set(1); DebugManager.flags.CreateMultipleSubDevices.set(2); std::unique_ptr> driverHandle; NEO::MockDevice *neoDevice = nullptr; L0::Device *device = nullptr; DrmMockResources *drmMock = nullptr; auto executionEnvironment = new NEO::ExecutionEnvironment(); auto mockBuiltIns = new MockBuiltins(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->builtins.reset(mockBuiltIns); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); auto osInterface = new OSInterface(); drmMock = new DrmMockResources(*executionEnvironment->rootDeviceEnvironments[0]); executionEnvironment->rootDeviceEnvironments[0]->osInterface.reset(osInterface); std::vector pciPaths = { "0000:3a:00.0"}; drmMock->setPciPath(pciPaths[0].c_str()); executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel(std::unique_ptr(drmMock)); neoDevice = NEO::MockDevice::create(executionEnvironment, 0u); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); driverHandle = std::make_unique>(); driverHandle->initialize(std::move(devices)); device = driverHandle->devices[0]; uint64_t expectedVal = 0xFEEDBEADDEABDEEF; ze_device_properties_t deviceProps; deviceProps = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; EXPECT_EQ(ZE_RESULT_SUCCESS, device->getProperties(&deviceProps)); EXPECT_TRUE(0 == std::memcmp(deviceProps.uuid.id, &expectedVal, sizeof(expectedVal))); uint32_t subdeviceCount = neoDevice->getNumGenericSubDevices(); std::vector subdevices; subdevices.resize(subdeviceCount); device->getSubDevices(&subdeviceCount, subdevices.data()); uint8_t expectedUuid[16] = {0}; std::memcpy(expectedUuid, &expectedVal, sizeof(uint64_t)); expectedUuid[15] = 1; EXPECT_EQ(ZE_RESULT_SUCCESS, static_cast(subdevices[0])->getProperties(&deviceProps)); EXPECT_TRUE(0 == std::memcmp(deviceProps.uuid.id, expectedUuid, sizeof(expectedUuid))); expectedUuid[15] = 2; EXPECT_EQ(ZE_RESULT_SUCCESS, static_cast(subdevices[1])->getProperties(&deviceProps)); EXPECT_TRUE(0 == std::memcmp(deviceProps.uuid.id, expectedUuid, sizeof(expectedUuid))); } TEST_F(TestDeviceUuid, GivenEnableChipsetUniqueUuidIsSetWhenOsInterfaceIsNotSetThenUuidOfFallbackPathIsReceived) { DebugManager.flags.EnableChipsetUniqueUUID.set(1); auto neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(NEO::defaultHwInfo.get()); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); auto driverHandle = std::make_unique>(); driverHandle->initialize(std::move(devices)); auto device = driverHandle->devices[0]; ze_device_properties_t deviceProperties, devicePropertiesBefore; deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; memset(&deviceProperties.uuid, std::numeric_limits::max(), sizeof(deviceProperties.uuid)); devicePropertiesBefore = deviceProperties; EXPECT_EQ(ZE_RESULT_SUCCESS, device->getProperties(&deviceProperties)); EXPECT_NE(0, memcmp(&deviceProperties.uuid, &devicePropertiesBefore.uuid, sizeof(devicePropertiesBefore.uuid))); } } // namespace ult } // namespace L0compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/device/test_device.cpp000066400000000000000000004121551422164147700315640ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/implicit_scaling.h" #include "shared/source/device/root_device.h" #include "shared/source/helpers/bindless_heaps_helper.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/preamble.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/os_interface/os_inc_base.h" #include "shared/source/os_interface/os_time.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_compilers.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_driver_info.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/mocks/mock_sip.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/cmdqueue/cmdqueue_imp.h" #include "level_zero/core/source/context/context_imp.h" #include "level_zero/core/source/driver/driver_handle_imp.h" #include "level_zero/core/source/driver/host_pointer_manager.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_built_ins.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h" #include "level_zero/core/test/unit_tests/mocks/mock_driver_handle.h" #include "level_zero/core/test/unit_tests/mocks/mock_memory_manager.h" #include "gtest/gtest.h" #include namespace NEO { extern HwHelper *hwHelperFactory[IGFX_MAX_CORE]; } // namespace NEO namespace L0 { namespace ult { TEST(L0DeviceTest, GivenCreatedDeviceHandleWhenCallingdeviceReinitThenNewDeviceHandleIsNotCreated) { ze_result_t returnValue = ZE_RESULT_SUCCESS; std::unique_ptr driverHandle(new DriverHandleImp); auto hwInfo = *NEO::defaultHwInfo; auto neoDevice = std::unique_ptr(NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); auto device = Device::create(driverHandle.get(), neoDevice.release(), false, &returnValue); ASSERT_NE(nullptr, device); static_cast(device)->releaseResources(); auto newNeoDevice = std::unique_ptr(NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); EXPECT_EQ(device, Device::deviceReinit(device->getDriverHandle(), device, newNeoDevice, &returnValue)); delete device; } TEST(L0DeviceTest, GivenDualStorageSharedMemorySupportedWhenCreatingDeviceThenPageFaultCmdListImmediateWithInitializedCmdQIsCreated) { ze_result_t returnValue = ZE_RESULT_SUCCESS; DebugManagerStateRestore restorer; NEO::DebugManager.flags.AllocateSharedAllocationsWithCpuAndGpuStorage.set(1); std::unique_ptr driverHandle(new DriverHandleImp); auto hwInfo = *NEO::defaultHwInfo; hwInfo.featureTable.flags.ftrLocalMemory = true; auto neoDevice = std::unique_ptr(NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); auto device = std::unique_ptr(Device::create(driverHandle.get(), neoDevice.release(), false, &returnValue)); ASSERT_NE(nullptr, device); auto deviceImp = static_cast(device.get()); ASSERT_NE(nullptr, deviceImp->pageFaultCommandList); ASSERT_NE(nullptr, deviceImp->pageFaultCommandList->cmdQImmediate); EXPECT_NE(nullptr, static_cast(deviceImp->pageFaultCommandList->cmdQImmediate)->getCsr()); EXPECT_EQ(ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS, static_cast(deviceImp->pageFaultCommandList->cmdQImmediate)->getSynchronousMode()); } TEST(L0DeviceTest, givenMultipleMaskedSubDevicesWhenCreatingL0DeviceThenDontAddDisabledNeoDevies) { constexpr uint32_t numSubDevices = 3; constexpr uint32_t numMaskedSubDevices = 2; DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(numSubDevices); DebugManager.flags.ZE_AFFINITY_MASK.set("0.0,0.2"); auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); executionEnvironment->parseAffinityMask(); auto deviceFactory = std::make_unique(1, numSubDevices, *executionEnvironment.release()); auto rootDevice = deviceFactory->rootDevices[0]; EXPECT_NE(nullptr, rootDevice); EXPECT_EQ(numMaskedSubDevices, rootDevice->getNumSubDevices()); auto driverHandle = std::make_unique(); ze_result_t returnValue = ZE_RESULT_SUCCESS; auto device = std::unique_ptr(Device::create(driverHandle.get(), rootDevice, false, &returnValue)); ASSERT_NE(nullptr, device); auto deviceImp = static_cast(device.get()); ASSERT_EQ(numMaskedSubDevices, deviceImp->numSubDevices); EXPECT_EQ(0b1u, deviceImp->subDevices[0]->getNEODevice()->getDeviceBitfield().to_ulong()); EXPECT_EQ(0b100u, deviceImp->subDevices[1]->getNEODevice()->getDeviceBitfield().to_ulong()); } TEST(L0DeviceTest, givenMidThreadPreemptionWhenCreatingDeviceThenSipKernelIsInitialized) { NEO::MockCompilerEnableGuard mock(true); ze_result_t returnValue = ZE_RESULT_SUCCESS; VariableBackup mockSipCalled(&NEO::MockSipData::called, false); VariableBackup mockSipCalledType(&NEO::MockSipData::calledType, NEO::SipKernelType::COUNT); VariableBackup backupSipInitType(&MockSipData::useMockSip, true); std::unique_ptr driverHandle(new DriverHandleImp); auto hwInfo = *NEO::defaultHwInfo; hwInfo.capabilityTable.defaultPreemptionMode = NEO::PreemptionMode::MidThread; auto neoDevice = std::unique_ptr(NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); EXPECT_EQ(NEO::SipKernelType::COUNT, NEO::MockSipData::calledType); EXPECT_FALSE(NEO::MockSipData::called); auto device = std::unique_ptr(Device::create(driverHandle.get(), neoDevice.release(), false, &returnValue)); ASSERT_NE(nullptr, device); EXPECT_EQ(NEO::SipKernelType::Csr, NEO::MockSipData::calledType); EXPECT_TRUE(NEO::MockSipData::called); } TEST(L0DeviceTest, givenDebuggerEnabledButIGCNotReturnsSSAHThenSSAHIsNotCopied) { NEO::MockCompilerEnableGuard mock(true); auto executionEnvironment = new NEO::ExecutionEnvironment(); auto mockBuiltIns = new MockBuiltins(); mockBuiltIns->stateSaveAreaHeader.clear(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->builtins.reset(mockBuiltIns); auto hwInfo = *NEO::defaultHwInfo.get(); hwInfo.featureTable.flags.ftrLocalMemory = true; executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(&hwInfo); executionEnvironment->initializeMemoryManager(); auto neoDevice = NEO::MockDevice::create(executionEnvironment, 0u); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); auto driverHandle = std::make_unique>(); driverHandle->enableProgramDebugging = true; driverHandle->initialize(std::move(devices)); auto sipType = SipKernel::getSipKernelType(*neoDevice); auto &stateSaveAreaHeader = neoDevice->getBuiltIns()->getSipKernel(sipType, *neoDevice).getStateSaveAreaHeader(); EXPECT_EQ(static_cast(0), stateSaveAreaHeader.size()); } TEST(L0DeviceTest, givenDisabledPreemptionWhenCreatingDeviceThenSipKernelIsNotInitialized) { ze_result_t returnValue = ZE_RESULT_SUCCESS; VariableBackup mockSipCalled(&NEO::MockSipData::called, false); VariableBackup mockSipCalledType(&NEO::MockSipData::calledType, NEO::SipKernelType::COUNT); VariableBackup backupSipInitType(&MockSipData::useMockSip, true); std::unique_ptr driverHandle(new DriverHandleImp); auto hwInfo = *NEO::defaultHwInfo; hwInfo.capabilityTable.defaultPreemptionMode = NEO::PreemptionMode::Disabled; auto neoDevice = std::unique_ptr(NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); EXPECT_EQ(NEO::SipKernelType::COUNT, NEO::MockSipData::calledType); EXPECT_FALSE(NEO::MockSipData::called); auto device = std::unique_ptr(Device::create(driverHandle.get(), neoDevice.release(), false, &returnValue)); ASSERT_NE(nullptr, device); EXPECT_EQ(NEO::SipKernelType::COUNT, NEO::MockSipData::calledType); EXPECT_FALSE(NEO::MockSipData::called); } TEST(L0DeviceTest, givenDeviceWithoutFCLCompilerLibraryThenInvalidDependencyReturned) { ze_result_t returnValue = ZE_RESULT_SUCCESS; std::unique_ptr driverHandle(new DriverHandleImp); auto hwInfo = *NEO::defaultHwInfo; auto neoDevice = std::unique_ptr(NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); auto oldFclDllName = Os::frontEndDllName; Os::frontEndDllName = "_invalidFCL"; auto device = std::unique_ptr(Device::create(driverHandle.get(), neoDevice.release(), false, &returnValue)); ASSERT_NE(nullptr, device); EXPECT_EQ(returnValue, ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE); Os::frontEndDllName = oldFclDllName; } TEST(L0DeviceTest, givenDeviceWithoutIGCCompilerLibraryThenInvalidDependencyReturned) { ze_result_t returnValue = ZE_RESULT_SUCCESS; std::unique_ptr driverHandle(new DriverHandleImp); auto hwInfo = *NEO::defaultHwInfo; auto neoDevice = std::unique_ptr(NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); auto oldIgcDllName = Os::igcDllName; Os::igcDllName = "_invalidIGC"; auto device = std::unique_ptr(Device::create(driverHandle.get(), neoDevice.release(), false, &returnValue)); ASSERT_NE(nullptr, device); EXPECT_EQ(returnValue, ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE); Os::igcDllName = oldIgcDllName; } TEST(L0DeviceTest, givenDeviceWithoutAnyCompilerLibraryThenInvalidDependencyReturned) { ze_result_t returnValue = ZE_RESULT_SUCCESS; std::unique_ptr driverHandle(new DriverHandleImp); auto hwInfo = *NEO::defaultHwInfo; auto neoDevice = std::unique_ptr(NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); auto oldFclDllName = Os::frontEndDllName; auto oldIgcDllName = Os::igcDllName; Os::frontEndDllName = "_invalidFCL"; Os::igcDllName = "_invalidIGC"; auto device = std::unique_ptr(Device::create(driverHandle.get(), neoDevice.release(), false, &returnValue)); ASSERT_NE(nullptr, device); EXPECT_EQ(returnValue, ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE); Os::igcDllName = oldIgcDllName; Os::frontEndDllName = oldFclDllName; } TEST(L0DeviceTest, givenFilledTopologyWhenGettingApiSliceThenCorrectSliceIdIsReturned) { NEO::MockCompilerEnableGuard mock(true); std::unique_ptr driverHandle(new DriverHandleImp); auto hwInfo = *NEO::defaultHwInfo; auto neoDevice = std::unique_ptr(NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); auto device = std::unique_ptr(Device::create(driverHandle.get(), neoDevice.release(), false, nullptr)); ASSERT_NE(nullptr, device); auto deviceImp = static_cast(device.get()); NEO::TopologyMap map; TopologyMapping mapping; mapping.sliceIndices.resize(hwInfo.gtSystemInfo.SliceCount); for (uint32_t i = 0; i < hwInfo.gtSystemInfo.SliceCount; i++) { mapping.sliceIndices[i] = i; } mapping.subsliceIndices.resize(hwInfo.gtSystemInfo.SubSliceCount / hwInfo.gtSystemInfo.SliceCount); for (uint32_t i = 0; i < hwInfo.gtSystemInfo.SubSliceCount / hwInfo.gtSystemInfo.SliceCount; i++) { mapping.subsliceIndices[i] = i; } map[0] = mapping; uint32_t sliceId = hwInfo.gtSystemInfo.SliceCount - 1; uint32_t subsliceId = 0; auto ret = deviceImp->toApiSliceId(map, sliceId, subsliceId, 0); EXPECT_EQ(hwInfo.gtSystemInfo.SliceCount - 1, sliceId); EXPECT_TRUE(ret); for (uint32_t i = 0; i < hwInfo.gtSystemInfo.SliceCount; i++) { mapping.sliceIndices[i] = i + 1; } map[0] = mapping; sliceId = 1; ret = deviceImp->toApiSliceId(map, sliceId, subsliceId, 0); EXPECT_EQ(0u, sliceId); EXPECT_TRUE(ret); } TEST(L0DeviceTest, givenFilledTopologyForZeroSubDeviceWhenGettingApiSliceForHigherSubDevicesThenFalseIsReturned) { NEO::MockCompilerEnableGuard mock(true); std::unique_ptr driverHandle(new DriverHandleImp); auto hwInfo = *NEO::defaultHwInfo; auto neoDevice = std::unique_ptr(NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); auto device = std::unique_ptr(Device::create(driverHandle.get(), neoDevice.release(), false, nullptr)); ASSERT_NE(nullptr, device); auto deviceImp = static_cast(device.get()); NEO::TopologyMap map; TopologyMapping mapping; mapping.sliceIndices.resize(hwInfo.gtSystemInfo.SliceCount); for (uint32_t i = 0; i < hwInfo.gtSystemInfo.SliceCount; i++) { mapping.sliceIndices[i] = i + 1; } mapping.subsliceIndices.resize(hwInfo.gtSystemInfo.SubSliceCount / hwInfo.gtSystemInfo.SliceCount); for (uint32_t i = 0; i < hwInfo.gtSystemInfo.SubSliceCount / hwInfo.gtSystemInfo.SliceCount; i++) { mapping.subsliceIndices[i] = i; } map[0] = mapping; uint32_t sliceId = 1; uint32_t subsliceId = 0; const uint32_t deviceIndex = 2; auto ret = deviceImp->toApiSliceId(map, sliceId, subsliceId, deviceIndex); EXPECT_FALSE(ret); } TEST(L0DeviceTest, givenInvalidPhysicalSliceIdWhenGettingApiSliceIdThenFalseIsReturned) { NEO::MockCompilerEnableGuard mock(true); std::unique_ptr driverHandle(new DriverHandleImp); auto hwInfo = *NEO::defaultHwInfo; auto neoDevice = std::unique_ptr(NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); auto device = std::unique_ptr(Device::create(driverHandle.get(), neoDevice.release(), false, nullptr)); ASSERT_NE(nullptr, device); auto deviceImp = static_cast(device.get()); NEO::TopologyMap map; TopologyMapping mapping; mapping.sliceIndices.resize(hwInfo.gtSystemInfo.SliceCount); for (uint32_t i = 0; i < hwInfo.gtSystemInfo.SliceCount; i++) { mapping.sliceIndices[i] = i; } mapping.subsliceIndices.resize(hwInfo.gtSystemInfo.SubSliceCount / hwInfo.gtSystemInfo.SliceCount); for (uint32_t i = 0; i < hwInfo.gtSystemInfo.SubSliceCount / hwInfo.gtSystemInfo.SliceCount; i++) { mapping.subsliceIndices[i] = i; } map[0] = mapping; uint32_t sliceId = hwInfo.gtSystemInfo.SliceCount + 1; uint32_t subsliceId = 0; auto ret = deviceImp->toApiSliceId(map, sliceId, subsliceId, 0); EXPECT_FALSE(ret); } TEST(L0DeviceTest, givenInvalidApiSliceIdWhenGettingPhysicalSliceIdThenFalseIsReturned) { NEO::MockCompilerEnableGuard mock(true); std::unique_ptr driverHandle(new DriverHandleImp); auto hwInfo = *NEO::defaultHwInfo; auto neoDevice = std::unique_ptr(NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); auto device = std::unique_ptr(Device::create(driverHandle.get(), neoDevice.release(), false, nullptr)); ASSERT_NE(nullptr, device); auto deviceImp = static_cast(device.get()); NEO::TopologyMap map; TopologyMapping mapping; mapping.sliceIndices.resize(hwInfo.gtSystemInfo.SliceCount); for (uint32_t i = 0; i < hwInfo.gtSystemInfo.SliceCount; i++) { mapping.sliceIndices[i] = i; } mapping.subsliceIndices.resize(hwInfo.gtSystemInfo.SubSliceCount / hwInfo.gtSystemInfo.SliceCount); for (uint32_t i = 0; i < hwInfo.gtSystemInfo.SubSliceCount / hwInfo.gtSystemInfo.SliceCount; i++) { mapping.subsliceIndices[i] = i; } map[0] = mapping; uint32_t sliceId = hwInfo.gtSystemInfo.SliceCount + 1; uint32_t subsliceId = 1; uint32_t deviceIndex = 0; auto ret = deviceImp->toPhysicalSliceId(map, sliceId, subsliceId, deviceIndex); EXPECT_FALSE(ret); } TEST(L0DeviceTest, givenEmptyTopologyWhenGettingApiSliceIdThenFalseIsReturned) { NEO::MockCompilerEnableGuard mock(true); std::unique_ptr driverHandle(new DriverHandleImp); auto hwInfo = *NEO::defaultHwInfo; auto neoDevice = std::unique_ptr(NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); auto device = std::unique_ptr(Device::create(driverHandle.get(), neoDevice.release(), false, nullptr)); ASSERT_NE(nullptr, device); auto deviceImp = static_cast(device.get()); NEO::TopologyMap map; uint32_t sliceId = hwInfo.gtSystemInfo.SliceCount - 1; uint32_t subsliceId = 0; auto ret = deviceImp->toApiSliceId(map, sliceId, subsliceId, 0); EXPECT_FALSE(ret); } TEST(L0DeviceTest, givenDeviceWithoutSubDevicesWhenGettingPhysicalSliceIdThenCorrectValuesAreReturned) { NEO::MockCompilerEnableGuard mock(true); std::unique_ptr driverHandle(new DriverHandleImp); auto hwInfo = *NEO::defaultHwInfo; auto neoDevice = std::unique_ptr(NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); auto device = std::unique_ptr(Device::create(driverHandle.get(), neoDevice.release(), false, nullptr)); ASSERT_NE(nullptr, device); auto deviceImp = static_cast(device.get()); NEO::TopologyMap map; TopologyMapping mapping; mapping.sliceIndices.resize(hwInfo.gtSystemInfo.SliceCount); for (uint32_t i = 0; i < hwInfo.gtSystemInfo.SliceCount; i++) { mapping.sliceIndices[i] = i + 1; } mapping.subsliceIndices.resize(hwInfo.gtSystemInfo.SubSliceCount / hwInfo.gtSystemInfo.SliceCount); for (uint32_t i = 0; i < hwInfo.gtSystemInfo.SubSliceCount / hwInfo.gtSystemInfo.SliceCount; i++) { mapping.subsliceIndices[i] = i; } map[0] = mapping; uint32_t sliceId = 0; uint32_t subsliceId = 0; uint32_t deviceIndex = 10; auto ret = deviceImp->toPhysicalSliceId(map, sliceId, subsliceId, deviceIndex); EXPECT_TRUE(ret); EXPECT_EQ(1u, sliceId); EXPECT_EQ(0u, subsliceId); EXPECT_EQ(0u, deviceIndex); } TEST(L0DeviceTest, givenTopologyNotAvaialbleWhenGettingPhysicalSliceIdThenFalseIsReturned) { NEO::MockCompilerEnableGuard mock(true); std::unique_ptr driverHandle(new DriverHandleImp); auto hwInfo = *NEO::defaultHwInfo; auto neoDevice = std::unique_ptr(NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); auto device = std::unique_ptr(Device::create(driverHandle.get(), neoDevice.release(), false, nullptr)); ASSERT_NE(nullptr, device); auto deviceImp = static_cast(device.get()); NEO::TopologyMap map; TopologyMapping mapping; uint32_t sliceId = 0; uint32_t subsliceId = 0; uint32_t deviceIndex = 10; auto ret = deviceImp->toPhysicalSliceId(map, sliceId, subsliceId, deviceIndex); EXPECT_FALSE(ret); } TEST(L0DeviceTest, givenSingleSliceTopologyWhenConvertingToApiIdsThenSubsliceIdsAreRemapped) { NEO::MockCompilerEnableGuard mock(true); std::unique_ptr driverHandle(new DriverHandleImp); auto hwInfo = *NEO::defaultHwInfo; hwInfo.gtSystemInfo.SliceCount = 1; auto neoDevice = std::unique_ptr(NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); auto device = std::unique_ptr(Device::create(driverHandle.get(), neoDevice.release(), false, nullptr)); ASSERT_NE(nullptr, device); auto deviceImp = static_cast(device.get()); NEO::TopologyMap map; TopologyMapping mapping; mapping.sliceIndices.resize(hwInfo.gtSystemInfo.SliceCount); for (uint32_t i = 0; i < hwInfo.gtSystemInfo.SliceCount; i++) { mapping.sliceIndices[i] = i; } mapping.subsliceIndices.resize(hwInfo.gtSystemInfo.SubSliceCount / hwInfo.gtSystemInfo.SliceCount); //disable 5 physical subslices, shift subslice ids by 5 for (uint32_t i = 0; i < hwInfo.gtSystemInfo.SubSliceCount / hwInfo.gtSystemInfo.SliceCount; i++) { mapping.subsliceIndices[i] = i + 5; } map[0] = mapping; uint32_t sliceId = 0; uint32_t subsliceId = 5; auto ret = deviceImp->toApiSliceId(map, sliceId, subsliceId, 0); EXPECT_EQ(0u, sliceId); EXPECT_EQ(0u, subsliceId); EXPECT_TRUE(ret); for (uint32_t i = 0; i < hwInfo.gtSystemInfo.SliceCount; i++) { mapping.sliceIndices[i] = i + 1; } map[0] = mapping; sliceId = 1; subsliceId = 5; ret = deviceImp->toApiSliceId(map, sliceId, subsliceId, 0); EXPECT_EQ(0u, sliceId); EXPECT_EQ(0u, subsliceId); EXPECT_TRUE(ret); } TEST(L0DeviceTest, givenSingleSliceTopologyWhenConvertingToPhysicalIdsThenSubsliceIdsAreRemapped) { NEO::MockCompilerEnableGuard mock(true); std::unique_ptr driverHandle(new DriverHandleImp); auto hwInfo = *NEO::defaultHwInfo; hwInfo.gtSystemInfo.SliceCount = 1; auto neoDevice = std::unique_ptr(NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); auto device = std::unique_ptr(Device::create(driverHandle.get(), neoDevice.release(), false, nullptr)); ASSERT_NE(nullptr, device); auto deviceImp = static_cast(device.get()); NEO::TopologyMap map; TopologyMapping mapping; mapping.sliceIndices.resize(hwInfo.gtSystemInfo.SliceCount); for (uint32_t i = 0; i < hwInfo.gtSystemInfo.SliceCount; i++) { mapping.sliceIndices[i] = i; } mapping.subsliceIndices.resize(hwInfo.gtSystemInfo.SubSliceCount / hwInfo.gtSystemInfo.SliceCount); //disable 5 physical subslices, shift subslice ids by 5 for (uint32_t i = 0; i < hwInfo.gtSystemInfo.SubSliceCount / hwInfo.gtSystemInfo.SliceCount; i++) { mapping.subsliceIndices[i] = i + 5; } map[0] = mapping; uint32_t sliceId = 0; uint32_t subsliceId = 0; uint32_t deviceIndex = 0; auto ret = deviceImp->toPhysicalSliceId(map, sliceId, subsliceId, deviceIndex); EXPECT_EQ(0u, sliceId); EXPECT_EQ(5u, subsliceId); EXPECT_EQ(0u, deviceIndex); EXPECT_TRUE(ret); for (uint32_t i = 0; i < hwInfo.gtSystemInfo.SliceCount; i++) { mapping.sliceIndices[i] = i + 1; } map[0] = mapping; sliceId = 0; subsliceId = 0; ret = deviceImp->toPhysicalSliceId(map, sliceId, subsliceId, deviceIndex); EXPECT_EQ(1u, sliceId); EXPECT_EQ(5u, subsliceId); EXPECT_EQ(0u, deviceIndex); EXPECT_TRUE(ret); } struct DeviceTest : public ::testing::Test { void SetUp() override { NEO::MockCompilerEnableGuard mock(true); DebugManager.flags.CreateMultipleRootDevices.set(numRootDevices); neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(NEO::defaultHwInfo.get(), rootDeviceIndex); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); driverHandle = std::make_unique>(); driverHandle->initialize(std::move(devices)); device = driverHandle->devices[0]; } DebugManagerStateRestore restorer; std::unique_ptr> driverHandle; NEO::Device *neoDevice = nullptr; L0::Device *device = nullptr; const uint32_t rootDeviceIndex = 1u; const uint32_t numRootDevices = 2u; }; TEST_F(DeviceTest, givenEmptySVmAllocStorageWhenAllocateManagedMemoryFromHostPtrThenBufferHostAllocationIsCreated) { int data; auto allocation = device->allocateManagedMemoryFromHostPtr(&data, sizeof(data), nullptr); EXPECT_NE(nullptr, allocation); EXPECT_EQ(NEO::AllocationType::BUFFER_HOST_MEMORY, allocation->getAllocationType()); EXPECT_EQ(rootDeviceIndex, allocation->getRootDeviceIndex()); neoDevice->getMemoryManager()->freeGraphicsMemory(allocation); } TEST_F(DeviceTest, givenEmptySVmAllocStorageWhenAllocateMemoryFromHostPtrThenValidExternalHostPtrAllocationIsCreated) { DebugManager.flags.EnableHostPtrTracking.set(0); constexpr auto dataSize = 1024u; auto data = std::make_unique(dataSize); constexpr auto allocationSize = sizeof(int) * dataSize; auto allocation = device->allocateMemoryFromHostPtr(data.get(), allocationSize, false); EXPECT_NE(nullptr, allocation); EXPECT_EQ(NEO::AllocationType::EXTERNAL_HOST_PTR, allocation->getAllocationType()); EXPECT_EQ(rootDeviceIndex, allocation->getRootDeviceIndex()); auto alignedPtr = alignDown(data.get(), MemoryConstants::pageSize); auto offsetInPage = ptrDiff(data.get(), alignedPtr); EXPECT_EQ(allocation->getAllocationOffset(), offsetInPage); EXPECT_EQ(allocation->getUnderlyingBufferSize(), allocationSize); EXPECT_EQ(allocation->isFlushL3Required(), true); neoDevice->getMemoryManager()->freeGraphicsMemory(allocation); } TEST_F(DeviceTest, givenNonEmptyAllocationsListWhenRequestingAllocationSmallerOrEqualInSizeThenAllocationFromListIsReturned) { auto deviceImp = static_cast(device); constexpr auto dataSize = 1024u; auto data = std::make_unique(dataSize); constexpr auto allocationSize = sizeof(int) * dataSize; auto allocation = device->getDriverHandle()->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getNEODevice()->getRootDeviceIndex(), allocationSize, NEO::AllocationType::FILL_PATTERN, neoDevice->getDeviceBitfield()}); device->storeReusableAllocation(*allocation); EXPECT_FALSE(deviceImp->allocationsForReuse->peekIsEmpty()); auto obtaindedAllocation = device->obtainReusableAllocation(dataSize, NEO::AllocationType::FILL_PATTERN); EXPECT_TRUE(deviceImp->allocationsForReuse->peekIsEmpty()); EXPECT_NE(nullptr, obtaindedAllocation); EXPECT_EQ(allocation, obtaindedAllocation); neoDevice->getMemoryManager()->freeGraphicsMemory(allocation); } TEST_F(DeviceTest, givenNonEmptyAllocationsListWhenRequestingAllocationBiggerInSizeThenNullptrIsReturned) { auto deviceImp = static_cast(device); constexpr auto dataSize = 1024u; auto data = std::make_unique(dataSize); constexpr auto allocationSize = sizeof(int) * dataSize; auto allocation = device->getDriverHandle()->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getNEODevice()->getRootDeviceIndex(), allocationSize, NEO::AllocationType::FILL_PATTERN, neoDevice->getDeviceBitfield()}); device->storeReusableAllocation(*allocation); EXPECT_FALSE(deviceImp->allocationsForReuse->peekIsEmpty()); auto obtaindedAllocation = device->obtainReusableAllocation(4 * dataSize + 1u, NEO::AllocationType::FILL_PATTERN); EXPECT_EQ(nullptr, obtaindedAllocation); EXPECT_FALSE(deviceImp->allocationsForReuse->peekIsEmpty()); } TEST_F(DeviceTest, givenNonEmptyAllocationsListAndUnproperAllocationTypeWhenRequestingAllocationThenNullptrIsReturned) { auto deviceImp = static_cast(device); constexpr auto dataSize = 1024u; auto data = std::make_unique(dataSize); constexpr auto allocationSize = sizeof(int) * dataSize; auto allocation = device->getDriverHandle()->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getNEODevice()->getRootDeviceIndex(), allocationSize, NEO::AllocationType::BUFFER, neoDevice->getDeviceBitfield()}); device->storeReusableAllocation(*allocation); EXPECT_FALSE(deviceImp->allocationsForReuse->peekIsEmpty()); auto obtaindedAllocation = device->obtainReusableAllocation(4 * dataSize + 1u, NEO::AllocationType::FILL_PATTERN); EXPECT_EQ(nullptr, obtaindedAllocation); EXPECT_FALSE(deviceImp->allocationsForReuse->peekIsEmpty()); } struct DeviceHostPointerTest : public ::testing::Test { void SetUp() override { executionEnvironment = new NEO::ExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(numRootDevices); for (uint32_t i = 0; i < numRootDevices; i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(NEO::defaultHwInfo.get()); } neoDevice = NEO::MockDevice::create(executionEnvironment, rootDeviceIndex); std::vector> devices; devices.push_back(std::unique_ptr(neoDevice)); driverHandle = std::make_unique>(); driverHandle->initialize(std::move(devices)); static_cast(driverHandle.get()->getMemoryManager())->isMockHostMemoryManager = true; static_cast(driverHandle.get()->getMemoryManager())->forceFailureInAllocationWithHostPointer = true; device = driverHandle->devices[0]; } void TearDown() override { } NEO::ExecutionEnvironment *executionEnvironment = nullptr; std::unique_ptr> driverHandle; NEO::MockDevice *neoDevice = nullptr; L0::Device *device = nullptr; const uint32_t rootDeviceIndex = 1u; const uint32_t numRootDevices = 2u; }; TEST_F(DeviceHostPointerTest, givenHostPointerNotAcceptedByKernelThenNewAllocationIsCreatedAndHostPointerCopied) { size_t size = 55; uint64_t *buffer = new uint64_t[size]; for (uint32_t i = 0; i < size; i++) { buffer[i] = i + 10; } auto allocation = device->allocateMemoryFromHostPtr(buffer, size, true); EXPECT_NE(nullptr, allocation); EXPECT_EQ(NEO::AllocationType::INTERNAL_HOST_MEMORY, allocation->getAllocationType()); EXPECT_EQ(rootDeviceIndex, allocation->getRootDeviceIndex()); EXPECT_NE(allocation->getUnderlyingBuffer(), reinterpret_cast(buffer)); EXPECT_EQ(allocation->getUnderlyingBufferSize(), size); EXPECT_EQ(0, memcmp(buffer, allocation->getUnderlyingBuffer(), size)); neoDevice->getMemoryManager()->freeGraphicsMemory(allocation); delete[] buffer; } TEST_F(DeviceHostPointerTest, givenHostPointerNotAcceptedByKernelAndHostPointerCopyIsNotAllowedThenAllocationIsNull) { size_t size = 55; uint64_t *buffer = new uint64_t[size]; for (uint32_t i = 0; i < size; i++) { buffer[i] = i + 10; } auto allocation = device->allocateMemoryFromHostPtr(buffer, size, false); EXPECT_EQ(nullptr, allocation); delete[] buffer; } TEST_F(DeviceTest, givenKernelExtendedPropertiesStructureWhenKernelPropertiesCalledThenSuccessIsReturnedAndPropertiesAreSet) { ze_device_module_properties_t kernelProperties = {}; ze_float_atomic_ext_properties_t kernelExtendedProperties = {}; kernelExtendedProperties.stype = ZE_STRUCTURE_TYPE_FLOAT_ATOMIC_EXT_PROPERTIES; uint32_t maxValue = static_cast(std::numeric_limits::max()); kernelExtendedProperties.fp16Flags = maxValue; kernelExtendedProperties.fp32Flags = maxValue; kernelExtendedProperties.fp64Flags = maxValue; kernelProperties.pNext = &kernelExtendedProperties; ze_result_t res = device->getKernelProperties(&kernelProperties); EXPECT_EQ(res, ZE_RESULT_SUCCESS); EXPECT_NE(maxValue, kernelExtendedProperties.fp16Flags); EXPECT_NE(maxValue, kernelExtendedProperties.fp32Flags); EXPECT_NE(maxValue, kernelExtendedProperties.fp64Flags); } TEST_F(DeviceTest, givenKernelExtendedPropertiesStructureWhenKernelPropertiesCalledWithIncorrectsStypeThenSuccessIsReturnedButPropertiesAreNotSet) { ze_device_module_properties_t kernelProperties = {}; ze_float_atomic_ext_properties_t kernelExtendedProperties = {}; kernelExtendedProperties.stype = ZE_STRUCTURE_TYPE_FORCE_UINT32; uint32_t maxValue = static_cast(std::numeric_limits::max()); kernelExtendedProperties.fp16Flags = maxValue; kernelExtendedProperties.fp32Flags = maxValue; kernelExtendedProperties.fp64Flags = maxValue; kernelProperties.pNext = &kernelExtendedProperties; ze_result_t res = device->getKernelProperties(&kernelProperties); EXPECT_EQ(res, ZE_RESULT_SUCCESS); EXPECT_EQ(maxValue, kernelExtendedProperties.fp16Flags); EXPECT_EQ(maxValue, kernelExtendedProperties.fp32Flags); EXPECT_EQ(maxValue, kernelExtendedProperties.fp64Flags); } HWTEST_F(DeviceTest, whenPassingSchedulingHintExpStructToGetPropertiesThenPropertiesWithCorrectFlagIsReturned) { ze_device_module_properties_t kernelProperties = {}; kernelProperties.stype = ZE_STRUCTURE_TYPE_KERNEL_PROPERTIES; ze_scheduling_hint_exp_properties_t schedulingHintProperties = {}; schedulingHintProperties.stype = ZE_STRUCTURE_TYPE_SCHEDULING_HINT_EXP_PROPERTIES; schedulingHintProperties.schedulingHintFlags = ZE_SCHEDULING_HINT_EXP_FLAG_FORCE_UINT32; kernelProperties.pNext = &schedulingHintProperties; ze_result_t res = device->getKernelProperties(&kernelProperties); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_NE(ZE_SCHEDULING_HINT_EXP_FLAG_FORCE_UINT32, schedulingHintProperties.schedulingHintFlags); auto supportedThreadArbitrationPolicies = NEO::PreambleHelper::getSupportedThreadArbitrationPolicies(); for (auto &p : supportedThreadArbitrationPolicies) { switch (p) { case ThreadArbitrationPolicy::AgeBased: EXPECT_NE(0u, (schedulingHintProperties.schedulingHintFlags & ZE_SCHEDULING_HINT_EXP_FLAG_OLDEST_FIRST)); break; case ThreadArbitrationPolicy::RoundRobin: EXPECT_NE(0u, (schedulingHintProperties.schedulingHintFlags & ZE_SCHEDULING_HINT_EXP_FLAG_ROUND_ROBIN)); break; case ThreadArbitrationPolicy::RoundRobinAfterDependency: EXPECT_NE(0u, (schedulingHintProperties.schedulingHintFlags & ZE_SCHEDULING_HINT_EXP_FLAG_STALL_BASED_ROUND_ROBIN)); break; default: FAIL(); } } } HWTEST2_F(DeviceTest, givenAllThreadArbitrationPoliciesWhenPassingSchedulingHintExpStructToGetPropertiesThenPropertiesWithAllFlagsAreReturned, MatchAny) { struct MockHwInfoConfig : NEO::HwInfoConfigHw { std::vector getKernelSupportedThreadArbitrationPolicies() override { return threadArbPolicies; } std::vector threadArbPolicies; }; const uint32_t rootDeviceIndex = 0u; auto hwInfo = *NEO::defaultHwInfo; auto *neoMockDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, rootDeviceIndex); Mock deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment()); MockHwInfoConfig hwInfoConfig{}; hwInfoConfig.threadArbPolicies = {ThreadArbitrationPolicy::AgeBased, ThreadArbitrationPolicy::RoundRobin, ThreadArbitrationPolicy::RoundRobinAfterDependency}; VariableBackup hwInfoConfigFactoryBackup{&NEO::hwInfoConfigFactory[static_cast(hwInfo.platform.eProductFamily)]}; hwInfoConfigFactoryBackup = &hwInfoConfig; ze_device_module_properties_t kernelProperties = {}; kernelProperties.stype = ZE_STRUCTURE_TYPE_KERNEL_PROPERTIES; ze_scheduling_hint_exp_properties_t schedulingHintProperties = {}; schedulingHintProperties.stype = ZE_STRUCTURE_TYPE_SCHEDULING_HINT_EXP_PROPERTIES; schedulingHintProperties.schedulingHintFlags = ZE_SCHEDULING_HINT_EXP_FLAG_FORCE_UINT32; kernelProperties.pNext = &schedulingHintProperties; ze_result_t res = deviceImp.getKernelProperties(&kernelProperties); EXPECT_EQ(ZE_RESULT_SUCCESS, res); ze_scheduling_hint_exp_flags_t expected = (ZE_SCHEDULING_HINT_EXP_FLAG_OLDEST_FIRST | ZE_SCHEDULING_HINT_EXP_FLAG_ROUND_ROBIN | ZE_SCHEDULING_HINT_EXP_FLAG_STALL_BASED_ROUND_ROBIN); EXPECT_EQ(expected, schedulingHintProperties.schedulingHintFlags); } HWTEST2_F(DeviceTest, givenIncorrectThreadArbitrationPolicyWhenPassingSchedulingHintExpStructToGetPropertiesThenNoneIsReturned, MatchAny) { struct MockHwInfoConfig : NEO::HwInfoConfigHw { std::vector getKernelSupportedThreadArbitrationPolicies() override { return threadArbPolicies; } std::vector threadArbPolicies; }; const uint32_t rootDeviceIndex = 0u; auto hwInfo = *NEO::defaultHwInfo; auto *neoMockDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, rootDeviceIndex); Mock deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment()); MockHwInfoConfig hwInfoConfig{}; hwInfoConfig.threadArbPolicies = {ThreadArbitrationPolicy::NotPresent}; VariableBackup hwInfoConfigFactoryBackup{&NEO::hwInfoConfigFactory[static_cast(hwInfo.platform.eProductFamily)]}; hwInfoConfigFactoryBackup = &hwInfoConfig; ze_device_module_properties_t kernelProperties = {}; kernelProperties.stype = ZE_STRUCTURE_TYPE_KERNEL_PROPERTIES; ze_scheduling_hint_exp_properties_t schedulingHintProperties = {}; schedulingHintProperties.stype = ZE_STRUCTURE_TYPE_SCHEDULING_HINT_EXP_PROPERTIES; schedulingHintProperties.schedulingHintFlags = ZE_SCHEDULING_HINT_EXP_FLAG_FORCE_UINT32; kernelProperties.pNext = &schedulingHintProperties; ze_result_t res = deviceImp.getKernelProperties(&kernelProperties); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_EQ(0u, schedulingHintProperties.schedulingHintFlags); } TEST_F(DeviceTest, givenKernelPropertiesStructureWhenKernelPropertiesCalledThenAllPropertiesAreAssigned) { const auto &hardwareInfo = this->neoDevice->getHardwareInfo(); ze_device_module_properties_t kernelProperties = {}; ze_device_module_properties_t kernelPropertiesBefore = {}; memset(&kernelProperties, std::numeric_limits::max(), sizeof(ze_device_module_properties_t)); kernelProperties.pNext = nullptr; kernelPropertiesBefore = kernelProperties; device->getKernelProperties(&kernelProperties); EXPECT_NE(kernelPropertiesBefore.spirvVersionSupported, kernelProperties.spirvVersionSupported); EXPECT_NE(kernelPropertiesBefore.nativeKernelSupported.id, kernelProperties.nativeKernelSupported.id); EXPECT_TRUE(kernelPropertiesBefore.flags & ZE_DEVICE_MODULE_FLAG_FP16); if (hardwareInfo.capabilityTable.ftrSupportsInteger64BitAtomics) { EXPECT_TRUE(kernelPropertiesBefore.flags & ZE_DEVICE_MODULE_FLAG_INT64_ATOMICS); } EXPECT_NE(kernelPropertiesBefore.maxArgumentsSize, kernelProperties.maxArgumentsSize); EXPECT_NE(kernelPropertiesBefore.printfBufferSize, kernelProperties.printfBufferSize); } TEST_F(DeviceTest, givenDeviceCachePropertiesThenAllPropertiesAreAssigned) { ze_device_cache_properties_t deviceCacheProperties = {}; ze_device_cache_properties_t deviceCachePropertiesBefore = {}; deviceCacheProperties.cacheSize = std::numeric_limits::max(); deviceCachePropertiesBefore = deviceCacheProperties; uint32_t count = 0; ze_result_t res = device->getCacheProperties(&count, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_EQ(count, 1u); res = device->getCacheProperties(&count, &deviceCacheProperties); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_NE(deviceCacheProperties.cacheSize, deviceCachePropertiesBefore.cacheSize); } TEST_F(DeviceTest, givenDevicePropertiesStructureWhenDevicePropertiesCalledThenAllPropertiesAreAssigned) { ze_device_properties_t deviceProperties, devicePropertiesBefore; deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; deviceProperties.type = ZE_DEVICE_TYPE_FPGA; memset(&deviceProperties.vendorId, std::numeric_limits::max(), sizeof(deviceProperties.vendorId)); memset(&deviceProperties.deviceId, std::numeric_limits::max(), sizeof(deviceProperties.deviceId)); memset(&deviceProperties.uuid, std::numeric_limits::max(), sizeof(deviceProperties.uuid)); memset(&deviceProperties.subdeviceId, std::numeric_limits::max(), sizeof(deviceProperties.subdeviceId)); memset(&deviceProperties.coreClockRate, std::numeric_limits::max(), sizeof(deviceProperties.coreClockRate)); memset(&deviceProperties.maxCommandQueuePriority, std::numeric_limits::max(), sizeof(deviceProperties.maxCommandQueuePriority)); memset(&deviceProperties.maxHardwareContexts, std::numeric_limits::max(), sizeof(deviceProperties.maxHardwareContexts)); memset(&deviceProperties.numThreadsPerEU, std::numeric_limits::max(), sizeof(deviceProperties.numThreadsPerEU)); memset(&deviceProperties.physicalEUSimdWidth, std::numeric_limits::max(), sizeof(deviceProperties.physicalEUSimdWidth)); memset(&deviceProperties.numEUsPerSubslice, std::numeric_limits::max(), sizeof(deviceProperties.numEUsPerSubslice)); memset(&deviceProperties.numSubslicesPerSlice, std::numeric_limits::max(), sizeof(deviceProperties.numSubslicesPerSlice)); memset(&deviceProperties.numSlices, std::numeric_limits::max(), sizeof(deviceProperties.numSlices)); memset(&deviceProperties.timerResolution, std::numeric_limits::max(), sizeof(deviceProperties.timerResolution)); memset(&deviceProperties.timestampValidBits, std::numeric_limits::max(), sizeof(deviceProperties.timestampValidBits)); memset(&deviceProperties.kernelTimestampValidBits, std::numeric_limits::max(), sizeof(deviceProperties.kernelTimestampValidBits)); memset(&deviceProperties.name, std::numeric_limits::max(), sizeof(deviceProperties.name)); deviceProperties.maxMemAllocSize = 0; devicePropertiesBefore = deviceProperties; device->getProperties(&deviceProperties); EXPECT_NE(deviceProperties.type, devicePropertiesBefore.type); EXPECT_NE(deviceProperties.vendorId, devicePropertiesBefore.vendorId); EXPECT_NE(deviceProperties.deviceId, devicePropertiesBefore.deviceId); EXPECT_NE(0, memcmp(&deviceProperties.uuid, &devicePropertiesBefore.uuid, sizeof(devicePropertiesBefore.uuid))); EXPECT_NE(deviceProperties.subdeviceId, devicePropertiesBefore.subdeviceId); EXPECT_NE(deviceProperties.coreClockRate, devicePropertiesBefore.coreClockRate); EXPECT_NE(deviceProperties.maxCommandQueuePriority, devicePropertiesBefore.maxCommandQueuePriority); EXPECT_NE(deviceProperties.maxHardwareContexts, devicePropertiesBefore.maxHardwareContexts); EXPECT_NE(deviceProperties.numThreadsPerEU, devicePropertiesBefore.numThreadsPerEU); EXPECT_NE(deviceProperties.physicalEUSimdWidth, devicePropertiesBefore.physicalEUSimdWidth); EXPECT_NE(deviceProperties.numEUsPerSubslice, devicePropertiesBefore.numEUsPerSubslice); EXPECT_NE(deviceProperties.numSubslicesPerSlice, devicePropertiesBefore.numSubslicesPerSlice); EXPECT_NE(deviceProperties.numSlices, devicePropertiesBefore.numSlices); EXPECT_NE(deviceProperties.timerResolution, devicePropertiesBefore.timerResolution); EXPECT_NE(deviceProperties.timestampValidBits, devicePropertiesBefore.timestampValidBits); EXPECT_NE(deviceProperties.kernelTimestampValidBits, devicePropertiesBefore.kernelTimestampValidBits); EXPECT_NE(0, memcmp(&deviceProperties.name, &devicePropertiesBefore.name, sizeof(devicePropertiesBefore.name))); EXPECT_NE(deviceProperties.maxMemAllocSize, devicePropertiesBefore.maxMemAllocSize); } TEST_F(DeviceTest, WhenGettingDevicePropertiesThenSubslicesPerSliceIsBasedOnSubslicesSupported) { ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; deviceProperties.type = ZE_DEVICE_TYPE_GPU; device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->gtSystemInfo.MaxSubSlicesSupported = 48; device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->gtSystemInfo.MaxSlicesSupported = 3; device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->gtSystemInfo.SubSliceCount = 8; device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->gtSystemInfo.SliceCount = 1; device->getProperties(&deviceProperties); EXPECT_EQ(8u, deviceProperties.numSubslicesPerSlice); } TEST_F(DeviceTest, GivenDebugApiUsedSetWhenGettingDevicePropertiesThenSubslicesPerSliceIsBasedOnMaxSubslicesSupported) { DebugManagerStateRestore restorer; DebugManager.flags.DebugApiUsed.set(1); ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; deviceProperties.type = ZE_DEVICE_TYPE_GPU; device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->gtSystemInfo.MaxSubSlicesSupported = 48; device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->gtSystemInfo.MaxSlicesSupported = 3; device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->gtSystemInfo.SubSliceCount = 8; device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->gtSystemInfo.SliceCount = 1; device->getProperties(&deviceProperties); EXPECT_EQ(16u, deviceProperties.numSubslicesPerSlice); } TEST_F(DeviceTest, givenCallToDevicePropertiesThenMaximumMemoryToBeAllocatedIsCorrectlyReturned) { ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; deviceProperties.maxMemAllocSize = 0; device->getProperties(&deviceProperties); EXPECT_EQ(deviceProperties.maxMemAllocSize, this->neoDevice->getDeviceInfo().maxMemAllocSize); auto &hwHelper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily); auto expectedSize = this->neoDevice->getDeviceInfo().globalMemSize; if (!this->neoDevice->areSharedSystemAllocationsAllowed()) { expectedSize = std::min(expectedSize, hwHelper.getMaxMemAllocSize()); } EXPECT_EQ(deviceProperties.maxMemAllocSize, expectedSize); } TEST_F(DeviceTest, whenCheckingIfStatelessCompressionIsSupportedThenReturnFalse) { const auto &hwInfoConfig = *HwInfoConfig::get(defaultHwInfo->platform.eProductFamily); EXPECT_FALSE(hwInfoConfig.allowStatelessCompression(*defaultHwInfo)); } struct DeviceHwInfoTest : public ::testing::Test { void SetUp() override { executionEnvironment = new NEO::ExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(1U); } void TearDown() override { } void setDriverAndDevice() { NEO::MockCompilerEnableGuard mock(true); std::vector> devices; neoDevice = NEO::MockDevice::create(executionEnvironment, 0); EXPECT_NE(neoDevice, nullptr); devices.push_back(std::unique_ptr(neoDevice)); driverHandle = std::make_unique>(); ze_result_t res = driverHandle->initialize(std::move(devices)); EXPECT_EQ(res, ZE_RESULT_SUCCESS); device = driverHandle->devices[0]; } NEO::ExecutionEnvironment *executionEnvironment = nullptr; std::unique_ptr> driverHandle; NEO::MockDevice *neoDevice = nullptr; L0::Device *device = nullptr; }; TEST_F(DeviceHwInfoTest, givenDeviceWithNoPageFaultSupportThenFlagIsNotSet) { NEO::HardwareInfo hardwareInfo = *NEO::defaultHwInfo; hardwareInfo.capabilityTable.supportsOnDemandPageFaults = false; executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(&hardwareInfo); setDriverAndDevice(); ze_device_properties_t deviceProps = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; device->getProperties(&deviceProps); EXPECT_FALSE(deviceProps.flags & ZE_DEVICE_PROPERTY_FLAG_ONDEMANDPAGING); } TEST_F(DeviceHwInfoTest, givenDeviceWithPageFaultSupportThenFlagIsSet) { NEO::HardwareInfo hardwareInfo = *NEO::defaultHwInfo; hardwareInfo.capabilityTable.supportsOnDemandPageFaults = true; executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(&hardwareInfo); setDriverAndDevice(); ze_device_properties_t deviceProps = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; device->getProperties(&deviceProps); EXPECT_TRUE(deviceProps.flags & ZE_DEVICE_PROPERTY_FLAG_ONDEMANDPAGING); } TEST_F(DeviceTest, whenGetDevicePropertiesCalledThenCorrectDevicePropertyEccFlagSet) { ze_device_properties_t deviceProps = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; device->getProperties(&deviceProps); auto expected = (this->neoDevice->getDeviceInfo().errorCorrectionSupport) ? ZE_DEVICE_PROPERTY_FLAG_ECC : static_cast(0u); EXPECT_EQ(expected, deviceProps.flags & ZE_DEVICE_PROPERTY_FLAG_ECC); } TEST_F(DeviceTest, givenCommandQueuePropertiesCallThenCallSucceeds) { uint32_t count = 0; ze_result_t res = device->getCommandQueueGroupProperties(&count, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_GE(count, 1u); std::vector queueProperties(count); res = device->getCommandQueueGroupProperties(&count, queueProperties.data()); EXPECT_EQ(ZE_RESULT_SUCCESS, res); } TEST_F(DeviceTest, givenCallToDevicePropertiesThenTimestampValidBitsAreCorrectlyAssigned) { ze_device_properties_t deviceProps = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; device->getProperties(&deviceProps); EXPECT_EQ(device->getHwInfo().capabilityTable.timestampValidBits, deviceProps.timestampValidBits); EXPECT_EQ(device->getHwInfo().capabilityTable.kernelTimestampValidBits, deviceProps.kernelTimestampValidBits); } TEST_F(DeviceTest, givenNullDriverInfowhenPciPropertiesIsCalledThenUninitializedErrorIsReturned) { auto deviceImp = static_cast(device); ze_pci_ext_properties_t pciProperties = {}; deviceImp->driverInfo.reset(nullptr); ze_result_t res = device->getPciProperties(&pciProperties); EXPECT_EQ(ZE_RESULT_ERROR_UNINITIALIZED, res); } TEST_F(DeviceTest, givenValidPciExtPropertiesWhenPciPropertiesIsCalledThenSuccessIsReturned) { auto deviceImp = static_cast(device); const NEO::PhysicalDevicePciBusInfo pciBusInfo(0, 1, 2, 3); NEO::DriverInfoMock *driverInfo = new DriverInfoMock(); ze_pci_ext_properties_t pciProperties = {}; driverInfo->setPciBusInfo(pciBusInfo); deviceImp->driverInfo.reset(driverInfo); ze_result_t res = device->getPciProperties(&pciProperties); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_EQ(pciBusInfo.pciDomain, pciProperties.address.domain); EXPECT_EQ(pciBusInfo.pciBus, pciProperties.address.bus); EXPECT_EQ(pciBusInfo.pciDevice, pciProperties.address.device); EXPECT_EQ(pciBusInfo.pciFunction, pciProperties.address.function); } TEST_F(DeviceTest, givenInvalidPciBusInfoWhenPciPropertiesIsCalledThenUninitializedErrorIsReturned) { constexpr uint32_t INVALID = NEO::PhysicalDevicePciBusInfo::InvalidValue; auto deviceImp = static_cast(device); ze_pci_ext_properties_t pciProperties = {}; std::vector pciBusInfos; pciBusInfos.push_back(NEO::PhysicalDevicePciBusInfo(0, 1, 2, INVALID)); pciBusInfos.push_back(NEO::PhysicalDevicePciBusInfo(0, 1, INVALID, 3)); pciBusInfos.push_back(NEO::PhysicalDevicePciBusInfo(0, INVALID, 2, 3)); pciBusInfos.push_back(NEO::PhysicalDevicePciBusInfo(INVALID, 1, 2, 3)); for (auto pciBusInfo : pciBusInfos) { NEO::DriverInfoMock *driverInfo = new DriverInfoMock(); driverInfo->setPciBusInfo(pciBusInfo); deviceImp->driverInfo.reset(driverInfo); ze_result_t res = device->getPciProperties(&pciProperties); EXPECT_EQ(ZE_RESULT_ERROR_UNINITIALIZED, res); } } TEST_F(DeviceTest, whenGetExternalMemoryPropertiesIsCalledThenSuccessIsReturnedAndNoPropertiesAreReturned) { ze_device_external_memory_properties_t externalMemoryProperties; ze_result_t result = device->getExternalMemoryProperties(&externalMemoryProperties); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_FALSE(externalMemoryProperties.imageExportTypes & ZE_EXTERNAL_MEMORY_TYPE_FLAG_OPAQUE_FD); EXPECT_FALSE(externalMemoryProperties.imageExportTypes & ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF); EXPECT_FALSE(externalMemoryProperties.imageImportTypes & ZE_EXTERNAL_MEMORY_TYPE_FLAG_OPAQUE_FD); EXPECT_FALSE(externalMemoryProperties.imageImportTypes & ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF); EXPECT_FALSE(externalMemoryProperties.memoryAllocationExportTypes & ZE_EXTERNAL_MEMORY_TYPE_FLAG_OPAQUE_FD); EXPECT_TRUE(externalMemoryProperties.memoryAllocationExportTypes & ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF); EXPECT_FALSE(externalMemoryProperties.memoryAllocationImportTypes & ZE_EXTERNAL_MEMORY_TYPE_FLAG_OPAQUE_FD); EXPECT_TRUE(externalMemoryProperties.memoryAllocationImportTypes & ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF); } TEST_F(DeviceTest, whenGetGlobalTimestampIsCalledThenSuccessIsReturnedAndValuesSetCorrectly) { uint64_t hostTs = 0u; uint64_t deviceTs = 0u; ze_result_t result = device->getGlobalTimestamps(&hostTs, &deviceTs); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(0u, hostTs); EXPECT_NE(0u, deviceTs); } class FalseCpuGpuDeviceTime : public NEO::DeviceTime { public: bool getCpuGpuTime(TimeStampData *pGpuCpuTime, OSTime *osTime) override { return false; } double getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const override { return NEO::OSTime::getDeviceTimerResolution(hwInfo); } uint64_t getDynamicDeviceTimerClock(HardwareInfo const &hwInfo) const override { return static_cast(1000000000.0 / OSTime::getDeviceTimerResolution(hwInfo)); } }; class FalseCpuGpuTime : public NEO::OSTime { public: FalseCpuGpuTime() { this->deviceTime = std::make_unique(); } bool getCpuTime(uint64_t *timeStamp) override { return true; }; double getHostTimerResolution() const override { return 0; } uint64_t getCpuRawTimestamp() override { return 0; } static std::unique_ptr create() { return std::unique_ptr(new FalseCpuGpuTime()); } }; struct GlobalTimestampTest : public ::testing::Test { void SetUp() override { DebugManager.flags.CreateMultipleRootDevices.set(numRootDevices); neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(NEO::defaultHwInfo.get(), rootDeviceIndex); } DebugManagerStateRestore restorer; std::unique_ptr> driverHandle; NEO::MockDevice *neoDevice = nullptr; L0::Device *device = nullptr; const uint32_t rootDeviceIndex = 1u; const uint32_t numRootDevices = 2u; }; TEST_F(GlobalTimestampTest, whenGetGlobalTimestampCalledAndGetCpuGpuTimeIsFalseReturnError) { uint64_t hostTs = 0u; uint64_t deviceTs = 0u; neoDevice->setOSTime(new FalseCpuGpuTime()); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); driverHandle = std::make_unique>(); driverHandle->initialize(std::move(devices)); device = driverHandle->devices[0]; ze_result_t result = device->getGlobalTimestamps(&hostTs, &deviceTs); EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, result); } TEST_F(GlobalTimestampTest, whenGetProfilingTimerClockandProfilingTimerResolutionThenVerifyRelation) { neoDevice->setOSTime(new FalseCpuGpuTime()); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); driverHandle = std::make_unique>(); driverHandle->initialize(std::move(devices)); uint64_t timerClock = neoDevice->getProfilingTimerClock(); EXPECT_NE(timerClock, 0u); double timerResolution = neoDevice->getProfilingTimerResolution(); EXPECT_NE(timerResolution, 0.0); EXPECT_EQ(timerClock, static_cast(1000000000.0 / timerResolution)); } TEST_F(GlobalTimestampTest, whenQueryingForTimerResolutionWithLegacyDevicePropertiesStructThenDefaultTimerResolutionInNanoSecondsIsReturned) { neoDevice->setOSTime(new FalseCpuGpuTime()); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); std::unique_ptr driverHandle = std::make_unique(); driverHandle->initialize(std::move(devices)); double timerResolution = neoDevice->getProfilingTimerResolution(); EXPECT_NE(timerResolution, 0.0); ze_device_properties_t deviceProps = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; ze_result_t res = driverHandle.get()->devices[0]->getProperties(&deviceProps); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_EQ(deviceProps.timerResolution, static_cast(timerResolution)); } TEST_F(GlobalTimestampTest, whenQueryingForTimerResolutionWithDeviceProperties_1_2_StructThenDefaultTimerResolutionInCyclesPerSecondsIsReturned) { neoDevice->setOSTime(new FalseCpuGpuTime()); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); std::unique_ptr driverHandle = std::make_unique(); driverHandle->initialize(std::move(devices)); uint64_t timerClock = neoDevice->getProfilingTimerClock(); EXPECT_NE(timerClock, 0u); ze_device_properties_t deviceProps = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES_1_2}; ze_result_t res = driverHandle.get()->devices[0]->getProperties(&deviceProps); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_EQ(deviceProps.timerResolution, timerClock); } TEST_F(GlobalTimestampTest, whenQueryingForTimerResolutionWithUseCyclesPerSecondTimerSetThenTimerResolutionInCyclesPerSecondsIsReturned) { DebugManagerStateRestore restorer; DebugManager.flags.UseCyclesPerSecondTimer.set(1u); neoDevice->setOSTime(new FalseCpuGpuTime()); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); std::unique_ptr driverHandle = std::make_unique(); driverHandle->initialize(std::move(devices)); uint64_t timerClock = neoDevice->getProfilingTimerClock(); EXPECT_NE(timerClock, 0u); ze_device_properties_t deviceProps = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; ze_result_t res = driverHandle.get()->devices[0]->getProperties(&deviceProps); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_EQ(deviceProps.timerResolution, timerClock); } class FalseCpuDeviceTime : public NEO::DeviceTime { public: bool getCpuGpuTime(TimeStampData *pGpuCpuTime, NEO::OSTime *) override { return true; } double getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const override { return NEO::OSTime::getDeviceTimerResolution(hwInfo); } uint64_t getDynamicDeviceTimerClock(HardwareInfo const &hwInfo) const override { return static_cast(1000000000.0 / OSTime::getDeviceTimerResolution(hwInfo)); } }; class FalseCpuTime : public NEO::OSTime { public: FalseCpuTime() { this->deviceTime = std::make_unique(); } bool getCpuTime(uint64_t *timeStamp) override { return false; }; double getHostTimerResolution() const override { return 0; } uint64_t getCpuRawTimestamp() override { return 0; } static std::unique_ptr create() { return std::unique_ptr(new FalseCpuTime()); } }; TEST_F(GlobalTimestampTest, whenGetGlobalTimestampCalledAndGetCpuTimeIsFalseReturnError) { uint64_t hostTs = 0u; uint64_t deviceTs = 0u; neoDevice->setOSTime(new FalseCpuTime()); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); driverHandle = std::make_unique>(); driverHandle->initialize(std::move(devices)); device = driverHandle->devices[0]; ze_result_t result = device->getGlobalTimestamps(&hostTs, &deviceTs); EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, result); } using DeviceGetMemoryTests = DeviceTest; TEST_F(DeviceGetMemoryTests, whenCallingGetMemoryPropertiesWithCountZeroThenOneIsReturned) { uint32_t count = 0; ze_result_t res = device->getMemoryProperties(&count, nullptr); EXPECT_EQ(res, ZE_RESULT_SUCCESS); EXPECT_EQ(1u, count); } TEST_F(DeviceGetMemoryTests, whenCallingGetMemoryPropertiesWithNullPtrThenInvalidArgumentIsReturned) { uint32_t count = 0; ze_result_t res = device->getMemoryProperties(&count, nullptr); EXPECT_EQ(res, ZE_RESULT_SUCCESS); EXPECT_EQ(1u, count); count++; res = device->getMemoryProperties(&count, nullptr); EXPECT_EQ(res, ZE_RESULT_ERROR_INVALID_ARGUMENT); EXPECT_EQ(1u, count); } TEST_F(DeviceGetMemoryTests, whenCallingGetMemoryPropertiesWithNonNullPtrThenPropertiesAreReturned) { uint32_t count = 0; ze_result_t res = device->getMemoryProperties(&count, nullptr); EXPECT_EQ(res, ZE_RESULT_SUCCESS); EXPECT_EQ(1u, count); ze_device_memory_properties_t memProperties = {}; res = device->getMemoryProperties(&count, &memProperties); EXPECT_EQ(res, ZE_RESULT_SUCCESS); EXPECT_EQ(1u, count); auto hwInfo = *NEO::defaultHwInfo; auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily); EXPECT_EQ(memProperties.maxClockRate, hwInfoConfig.getDeviceMemoryMaxClkRate(&hwInfo)); EXPECT_EQ(memProperties.maxBusWidth, this->neoDevice->getDeviceInfo().addressBits); EXPECT_EQ(memProperties.totalSize, this->neoDevice->getDeviceInfo().globalMemSize); EXPECT_EQ(0u, memProperties.flags); } struct DeviceHasNoDoubleFp64Test : public ::testing::Test { void SetUp() override { DebugManager.flags.CreateMultipleRootDevices.set(numRootDevices); HardwareInfo nonFp64Device = *defaultHwInfo; nonFp64Device.capabilityTable.ftrSupportsFP64 = false; nonFp64Device.capabilityTable.ftrSupports64BitMath = false; neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&nonFp64Device, rootDeviceIndex); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); driverHandle = std::make_unique>(); driverHandle->initialize(std::move(devices)); device = driverHandle->devices[0]; } DebugManagerStateRestore restorer; std::unique_ptr> driverHandle; NEO::Device *neoDevice = nullptr; L0::Device *device = nullptr; const uint32_t rootDeviceIndex = 1u; const uint32_t numRootDevices = 2u; }; TEST_F(DeviceHasNoDoubleFp64Test, givenDeviceThatDoesntHaveFp64WhenDbgFlagEnablesFp64ThenReportFp64Flags) { ze_device_module_properties_t kernelProperties = {}; memset(&kernelProperties, std::numeric_limits::max(), sizeof(ze_device_module_properties_t)); kernelProperties.pNext = nullptr; device->getKernelProperties(&kernelProperties); EXPECT_FALSE(kernelProperties.flags & ZE_DEVICE_MODULE_FLAG_FP64); EXPECT_EQ(0u, kernelProperties.fp64flags); DebugManagerStateRestore dbgRestorer; DebugManager.flags.OverrideDefaultFP64Settings.set(1); device->getKernelProperties(&kernelProperties); EXPECT_TRUE(kernelProperties.flags & ZE_DEVICE_MODULE_FLAG_FP64); EXPECT_TRUE(kernelProperties.fp64flags & ZE_DEVICE_FP_FLAG_ROUNDED_DIVIDE_SQRT); EXPECT_TRUE(kernelProperties.fp64flags & ZE_DEVICE_FP_FLAG_ROUND_TO_NEAREST); EXPECT_TRUE(kernelProperties.fp64flags & ZE_DEVICE_FP_FLAG_ROUND_TO_ZERO); EXPECT_TRUE(kernelProperties.fp64flags & ZE_DEVICE_FP_FLAG_ROUND_TO_INF); EXPECT_TRUE(kernelProperties.fp64flags & ZE_DEVICE_FP_FLAG_INF_NAN); EXPECT_TRUE(kernelProperties.fp64flags & ZE_DEVICE_FP_FLAG_DENORM); EXPECT_TRUE(kernelProperties.fp64flags & ZE_DEVICE_FP_FLAG_FMA); } struct DeviceHasFp64Test : public ::testing::Test { void SetUp() override { DebugManager.flags.CreateMultipleRootDevices.set(numRootDevices); HardwareInfo fp64DeviceInfo = *defaultHwInfo; fp64DeviceInfo.capabilityTable.ftrSupportsFP64 = true; fp64DeviceInfo.capabilityTable.ftrSupports64BitMath = true; neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&fp64DeviceInfo, rootDeviceIndex); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); driverHandle = std::make_unique>(); driverHandle->initialize(std::move(devices)); device = driverHandle->devices[0]; } DebugManagerStateRestore restorer; std::unique_ptr> driverHandle; NEO::Device *neoDevice = nullptr; L0::Device *device = nullptr; const uint32_t rootDeviceIndex = 1u; const uint32_t numRootDevices = 2u; }; TEST_F(DeviceHasFp64Test, givenDeviceWithFp64ThenReportCorrectFp64Flags) { ze_device_module_properties_t kernelProperties = {}; memset(&kernelProperties, std::numeric_limits::max(), sizeof(ze_device_module_properties_t)); kernelProperties.pNext = nullptr; device->getKernelProperties(&kernelProperties); device->getKernelProperties(&kernelProperties); EXPECT_TRUE(kernelProperties.flags & ZE_DEVICE_MODULE_FLAG_FP64); EXPECT_TRUE(kernelProperties.fp64flags & ZE_DEVICE_FP_FLAG_ROUNDED_DIVIDE_SQRT); EXPECT_TRUE(kernelProperties.fp64flags & ZE_DEVICE_FP_FLAG_ROUND_TO_NEAREST); EXPECT_TRUE(kernelProperties.fp64flags & ZE_DEVICE_FP_FLAG_ROUND_TO_ZERO); EXPECT_TRUE(kernelProperties.fp64flags & ZE_DEVICE_FP_FLAG_ROUND_TO_INF); EXPECT_TRUE(kernelProperties.fp64flags & ZE_DEVICE_FP_FLAG_INF_NAN); EXPECT_TRUE(kernelProperties.fp64flags & ZE_DEVICE_FP_FLAG_DENORM); EXPECT_TRUE(kernelProperties.fp64flags & ZE_DEVICE_FP_FLAG_FMA); EXPECT_TRUE(kernelProperties.fp32flags & ZE_DEVICE_FP_FLAG_ROUNDED_DIVIDE_SQRT); EXPECT_TRUE(kernelProperties.fp32flags & ZE_DEVICE_FP_FLAG_ROUND_TO_NEAREST); EXPECT_TRUE(kernelProperties.fp32flags & ZE_DEVICE_FP_FLAG_ROUND_TO_ZERO); EXPECT_TRUE(kernelProperties.fp32flags & ZE_DEVICE_FP_FLAG_ROUND_TO_INF); EXPECT_TRUE(kernelProperties.fp32flags & ZE_DEVICE_FP_FLAG_INF_NAN); EXPECT_TRUE(kernelProperties.fp32flags & ZE_DEVICE_FP_FLAG_DENORM); EXPECT_TRUE(kernelProperties.fp32flags & ZE_DEVICE_FP_FLAG_FMA); } struct DeviceHasFp64ButNoBitMathTest : public ::testing::Test { void SetUp() override { DebugManager.flags.CreateMultipleRootDevices.set(numRootDevices); HardwareInfo fp64DeviceInfo = *defaultHwInfo; fp64DeviceInfo.capabilityTable.ftrSupportsFP64 = true; fp64DeviceInfo.capabilityTable.ftrSupports64BitMath = false; neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&fp64DeviceInfo, rootDeviceIndex); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); driverHandle = std::make_unique>(); driverHandle->initialize(std::move(devices)); device = driverHandle->devices[0]; } DebugManagerStateRestore restorer; std::unique_ptr> driverHandle; NEO::Device *neoDevice = nullptr; L0::Device *device = nullptr; const uint32_t rootDeviceIndex = 1u; const uint32_t numRootDevices = 2u; }; TEST_F(DeviceHasFp64ButNoBitMathTest, givenDeviceWithFp64ButNoBitMathThenReportCorrectFp64Flags) { ze_device_module_properties_t kernelProperties = {}; memset(&kernelProperties, std::numeric_limits::max(), sizeof(ze_device_module_properties_t)); kernelProperties.pNext = nullptr; device->getKernelProperties(&kernelProperties); device->getKernelProperties(&kernelProperties); EXPECT_TRUE(kernelProperties.flags & ZE_DEVICE_MODULE_FLAG_FP64); EXPECT_FALSE(kernelProperties.fp64flags & ZE_DEVICE_FP_FLAG_ROUNDED_DIVIDE_SQRT); EXPECT_TRUE(kernelProperties.fp64flags & ZE_DEVICE_FP_FLAG_ROUND_TO_NEAREST); EXPECT_TRUE(kernelProperties.fp64flags & ZE_DEVICE_FP_FLAG_ROUND_TO_ZERO); EXPECT_TRUE(kernelProperties.fp64flags & ZE_DEVICE_FP_FLAG_ROUND_TO_INF); EXPECT_TRUE(kernelProperties.fp64flags & ZE_DEVICE_FP_FLAG_INF_NAN); EXPECT_TRUE(kernelProperties.fp64flags & ZE_DEVICE_FP_FLAG_DENORM); EXPECT_TRUE(kernelProperties.fp64flags & ZE_DEVICE_FP_FLAG_FMA); EXPECT_FALSE(kernelProperties.fp32flags & ZE_DEVICE_FP_FLAG_ROUNDED_DIVIDE_SQRT); EXPECT_TRUE(kernelProperties.fp32flags & ZE_DEVICE_FP_FLAG_ROUND_TO_NEAREST); EXPECT_TRUE(kernelProperties.fp32flags & ZE_DEVICE_FP_FLAG_ROUND_TO_ZERO); EXPECT_TRUE(kernelProperties.fp32flags & ZE_DEVICE_FP_FLAG_ROUND_TO_INF); EXPECT_TRUE(kernelProperties.fp32flags & ZE_DEVICE_FP_FLAG_INF_NAN); EXPECT_TRUE(kernelProperties.fp32flags & ZE_DEVICE_FP_FLAG_DENORM); EXPECT_TRUE(kernelProperties.fp32flags & ZE_DEVICE_FP_FLAG_FMA); } struct DeviceHasNo64BitAtomicTest : public ::testing::Test { void SetUp() override { HardwareInfo nonFp64Device = *defaultHwInfo; nonFp64Device.capabilityTable.ftrSupportsInteger64BitAtomics = false; neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&nonFp64Device, rootDeviceIndex); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); driverHandle = std::make_unique>(); driverHandle->initialize(std::move(devices)); device = driverHandle->devices[rootDeviceIndex]; } std::unique_ptr> driverHandle; NEO::Device *neoDevice = nullptr; L0::Device *device = nullptr; const uint32_t rootDeviceIndex = 0u; }; TEST_F(DeviceHasNo64BitAtomicTest, givenDeviceWithNoSupportForInteger64BitAtomicsThenFlagsAreSetCorrectly) { ze_device_module_properties_t kernelProperties = {}; memset(&kernelProperties, std::numeric_limits::max(), sizeof(ze_device_module_properties_t)); kernelProperties.pNext = nullptr; device->getKernelProperties(&kernelProperties); EXPECT_TRUE(kernelProperties.flags & ZE_DEVICE_MODULE_FLAG_FP16); EXPECT_FALSE(kernelProperties.flags & ZE_DEVICE_MODULE_FLAG_INT64_ATOMICS); } struct DeviceHas64BitAtomicTest : public ::testing::Test { void SetUp() override { HardwareInfo nonFp64Device = *defaultHwInfo; nonFp64Device.capabilityTable.ftrSupportsInteger64BitAtomics = true; neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&nonFp64Device, rootDeviceIndex); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); driverHandle = std::make_unique>(); driverHandle->initialize(std::move(devices)); device = driverHandle->devices[rootDeviceIndex]; } std::unique_ptr> driverHandle; NEO::Device *neoDevice = nullptr; L0::Device *device = nullptr; const uint32_t rootDeviceIndex = 0u; }; TEST_F(DeviceHas64BitAtomicTest, givenDeviceWithSupportForInteger64BitAtomicsThenFlagsAreSetCorrectly) { ze_device_module_properties_t kernelProperties = {}; memset(&kernelProperties, std::numeric_limits::max(), sizeof(ze_device_module_properties_t)); kernelProperties.pNext = nullptr; device->getKernelProperties(&kernelProperties); EXPECT_TRUE(kernelProperties.flags & ZE_DEVICE_MODULE_FLAG_FP16); EXPECT_TRUE(kernelProperties.flags & ZE_DEVICE_MODULE_FLAG_INT64_ATOMICS); } struct MockMemoryManagerMultiDevice : public MemoryManagerMock { MockMemoryManagerMultiDevice(NEO::ExecutionEnvironment &executionEnvironment) : MemoryManagerMock(const_cast(executionEnvironment)) {} }; template struct MultipleDevicesFixture : public ::testing::Test { void SetUp() override { NEO::MockCompilerEnableGuard mock(true); DebugManager.flags.EnableWalkerPartition.set(enablePartitionWalker); DebugManager.flags.CreateMultipleSubDevices.set(numSubDevices); VariableBackup mockDeviceFlagBackup(&MockDevice::createSingleDevice, false); std::vector> devices; NEO::ExecutionEnvironment *executionEnvironment = new NEO::ExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(numRootDevices); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(NEO::defaultHwInfo.get()); } memoryManager = new MockMemoryManagerMultiDevice(*executionEnvironment); executionEnvironment->memoryManager.reset(memoryManager); deviceFactory = std::make_unique(numRootDevices, numSubDevices, *executionEnvironment); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { devices.push_back(std::unique_ptr(deviceFactory->rootDevices[i])); } driverHandle = std::make_unique>(); driverHandle->initialize(std::move(devices)); context = std::make_unique(driverHandle.get()); EXPECT_NE(context, nullptr); for (auto i = 0u; i < numRootDevices; i++) { auto device = driverHandle->devices[i]; context->getDevices().insert(std::make_pair(device->toHandle(), device)); auto neoDevice = device->getNEODevice(); context->rootDeviceIndices.insert(neoDevice->getRootDeviceIndex()); context->deviceBitfields.insert({neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield()}); } } DebugManagerStateRestore restorer; std::unique_ptr> driverHandle; MockMemoryManagerMultiDevice *memoryManager = nullptr; std::unique_ptr deviceFactory; std::unique_ptr context; const uint32_t numRootDevices = 2u; const uint32_t numSubDevices = 2u; }; using MultipleDevicesTest = MultipleDevicesFixture<-1>; using MultipleDevicesDisabledImplicitScalingTest = MultipleDevicesFixture<0>; using MultipleDevicesEnabledImplicitScalingTest = MultipleDevicesFixture<1>; TEST_F(MultipleDevicesDisabledImplicitScalingTest, whenCallingGetMemoryPropertiesWithSubDevicesThenCorrectSizeReturned) { L0::Device *device0 = driverHandle->devices[0]; uint32_t count = 1; ze_device_memory_properties_t memProperties = {}; ze_result_t res = device0->getMemoryProperties(&count, &memProperties); EXPECT_EQ(res, ZE_RESULT_SUCCESS); EXPECT_EQ(1u, count); EXPECT_EQ(memProperties.totalSize, device0->getNEODevice()->getDeviceInfo().globalMemSize / numSubDevices); } TEST_F(MultipleDevicesEnabledImplicitScalingTest, whenCallingGetMemoryPropertiesWithSubDevicesThenCorrectSizeReturned) { L0::Device *device0 = driverHandle->devices[0]; uint32_t count = 1; ze_device_memory_properties_t memProperties = {}; ze_result_t res = device0->getMemoryProperties(&count, &memProperties); EXPECT_EQ(res, ZE_RESULT_SUCCESS); EXPECT_EQ(1u, count); EXPECT_EQ(memProperties.totalSize, device0->getNEODevice()->getDeviceInfo().globalMemSize); } TEST_F(MultipleDevicesDisabledImplicitScalingTest, GivenImplicitScalingDisabledWhenGettingDevicePropertiesGetSubslicesPerSliceThenCorrectValuesReturned) { L0::Device *device = driverHandle->devices[0]; ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; deviceProperties.type = ZE_DEVICE_TYPE_GPU; device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->gtSystemInfo.MaxSubSlicesSupported = 48; device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->gtSystemInfo.MaxSlicesSupported = 3; device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->gtSystemInfo.SubSliceCount = 8; device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->gtSystemInfo.SliceCount = 2; auto >SysInfo = device->getNEODevice()->getHardwareInfo().gtSystemInfo; device->getProperties(&deviceProperties); EXPECT_EQ(((gtSysInfo.SubSliceCount / gtSysInfo.SliceCount)), deviceProperties.numSubslicesPerSlice); EXPECT_EQ(gtSysInfo.SliceCount, deviceProperties.numSlices); } TEST_F(MultipleDevicesEnabledImplicitScalingTest, GivenImplicitScalingEnabledWhenGettingDevicePropertiesGetSubslicesPerSliceThenCorrectValuesReturned) { L0::Device *device = driverHandle->devices[0]; ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; deviceProperties.type = ZE_DEVICE_TYPE_GPU; device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->gtSystemInfo.MaxSubSlicesSupported = 48; device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->gtSystemInfo.MaxSlicesSupported = 3; device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->gtSystemInfo.SubSliceCount = 8; device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->gtSystemInfo.SliceCount = 2; auto >SysInfo = device->getNEODevice()->getHardwareInfo().gtSystemInfo; device->getProperties(&deviceProperties); EXPECT_EQ((gtSysInfo.SubSliceCount / gtSysInfo.SliceCount), deviceProperties.numSubslicesPerSlice); EXPECT_EQ((gtSysInfo.SliceCount * numSubDevices), deviceProperties.numSlices); } TEST_F(MultipleDevicesTest, whenRetrievingNumberOfSubdevicesThenCorrectNumberIsReturned) { L0::Device *device0 = driverHandle->devices[0]; uint32_t count = 0; auto result = device0->getSubDevices(&count, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(numSubDevices, count); std::vector subDevices(count); count++; result = device0->getSubDevices(&count, subDevices.data()); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(numSubDevices, count); for (auto subDevice : subDevices) { EXPECT_NE(nullptr, subDevice); EXPECT_TRUE(static_cast(subDevice)->isSubdevice); } } TEST_F(MultipleDevicesTest, givenNonZeroNumbersOfSubdevicesWhenGetSubDevicesIsCalledWithNullPointerThenInvalidArgumentIsReturned) { L0::Device *device0 = driverHandle->devices[0]; uint32_t count = 1; auto result = device0->getSubDevices(&count, nullptr); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); } TEST_F(MultipleDevicesTest, whenRetriecingSubDevicePropertiesThenCorrectFlagIsSet) { L0::Device *device0 = driverHandle->devices[0]; uint32_t count = 0; auto result = device0->getSubDevices(&count, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(numSubDevices, count); std::vector subDevices(count); count++; result = device0->getSubDevices(&count, subDevices.data()); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(numSubDevices, count); ze_device_properties_t deviceProps = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; L0::Device *subdevice0 = static_cast(subDevices[0]); subdevice0->getProperties(&deviceProps); EXPECT_EQ(ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE, deviceProps.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE); } TEST_F(MultipleDevicesTest, givenTheSameDeviceThenCanAccessPeerReturnsTrue) { L0::Device *device0 = driverHandle->devices[0]; ze_bool_t canAccess = false; ze_result_t res = device0->canAccessPeer(device0->toHandle(), &canAccess); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_TRUE(canAccess); } TEST_F(MultipleDevicesTest, givenTwoRootDevicesFromSameFamilyThenCanAccessPeerSuccessfullyCompletes) { L0::Device *device0 = driverHandle->devices[0]; L0::Device *device1 = driverHandle->devices[1]; GFXCORE_FAMILY device0Family = device0->getNEODevice()->getHardwareInfo().platform.eRenderCoreFamily; GFXCORE_FAMILY device1Family = device1->getNEODevice()->getHardwareInfo().platform.eRenderCoreFamily; EXPECT_EQ(device0Family, device1Family); ze_bool_t canAccess = true; ze_result_t res = device0->canAccessPeer(device1->toHandle(), &canAccess); EXPECT_EQ(ZE_RESULT_SUCCESS, res); } using DeviceTests = Test; TEST_F(DeviceTests, WhenGettingMemoryAccessPropertiesThenSuccessIsReturned) { ze_device_memory_access_properties_t properties; auto result = device->getMemoryAccessProperties(&properties); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto &hwInfo = device->getHwInfo(); auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily); auto expectedHostAllocCapabilities = static_cast(hwInfoConfig.getHostMemCapabilities(&hwInfo)); EXPECT_EQ(expectedHostAllocCapabilities, properties.hostAllocCapabilities); auto expectedDeviceAllocCapabilities = static_cast(hwInfoConfig.getDeviceMemCapabilities()); EXPECT_EQ(expectedDeviceAllocCapabilities, properties.deviceAllocCapabilities); auto expectedSharedSingleDeviceAllocCapabilities = static_cast(hwInfoConfig.getSingleDeviceSharedMemCapabilities()); EXPECT_EQ(expectedSharedSingleDeviceAllocCapabilities, properties.sharedSingleDeviceAllocCapabilities); auto expectedSharedSystemAllocCapabilities = static_cast(hwInfoConfig.getSharedSystemMemCapabilities(&hwInfo)); EXPECT_EQ(expectedSharedSystemAllocCapabilities, properties.sharedSystemAllocCapabilities); } template struct MultipleDevicesP2PFixture : public ::testing::Test { void SetUp() override { NEO::MockCompilerEnableGuard mock(true); VariableBackup mockDeviceFlagBackup(&MockDevice::createSingleDevice, false); std::vector> devices; NEO::ExecutionEnvironment *executionEnvironment = new NEO::ExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(numRootDevices); NEO::HardwareInfo hardwareInfo = *NEO::defaultHwInfo; hardwareInfo.capabilityTable.p2pAccessSupported = p2pAccessDevice0; hardwareInfo.capabilityTable.p2pAtomicAccessSupported = p2pAtomicAccessDevice0; executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(&hardwareInfo); hardwareInfo.capabilityTable.p2pAccessSupported = p2pAccessDevice1; hardwareInfo.capabilityTable.p2pAtomicAccessSupported = p2pAtomicAccessDevice1; executionEnvironment->rootDeviceEnvironments[1]->setHwInfo(&hardwareInfo); memoryManager = new MockMemoryManagerMultiDevice(*executionEnvironment); executionEnvironment->memoryManager.reset(memoryManager); deviceFactory = std::make_unique(numRootDevices, numSubDevices, *executionEnvironment); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { devices.push_back(std::unique_ptr(deviceFactory->rootDevices[i])); } driverHandle = std::make_unique>(); driverHandle->initialize(std::move(devices)); context = std::make_unique(driverHandle.get()); EXPECT_NE(context, nullptr); for (auto i = 0u; i < numRootDevices; i++) { auto device = driverHandle->devices[i]; context->getDevices().insert(std::make_pair(device->toHandle(), device)); auto neoDevice = device->getNEODevice(); context->rootDeviceIndices.insert(neoDevice->getRootDeviceIndex()); context->deviceBitfields.insert({neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield()}); } } DebugManagerStateRestore restorer; std::unique_ptr> driverHandle; MockMemoryManagerMultiDevice *memoryManager = nullptr; std::unique_ptr deviceFactory; std::unique_ptr context; const uint32_t numRootDevices = 2u; const uint32_t numSubDevices = 2u; }; using MemoryAccessPropertieP2PAccess0Atomic0 = MultipleDevicesP2PFixture<0, 0, 0, 0>; TEST_F(MemoryAccessPropertieP2PAccess0Atomic0, WhenCallingGetMemoryAccessPropertiesWithDevicesHavingNoAccessSupportThenNoSupportIsReturned) { L0::Device *device = driverHandle->devices[0]; ze_device_memory_access_properties_t properties; auto result = device->getMemoryAccessProperties(&properties); EXPECT_EQ(ZE_RESULT_SUCCESS, result); ze_memory_access_cap_flags_t expectedSharedCrossDeviceAllocCapabilities = {}; EXPECT_EQ(expectedSharedCrossDeviceAllocCapabilities, properties.sharedCrossDeviceAllocCapabilities); } using MemoryAccessPropertieP2PAccess1Atomic0 = MultipleDevicesP2PFixture<1, 0, 0, 0>; TEST_F(MemoryAccessPropertieP2PAccess1Atomic0, WhenCallingGetMemoryAccessPropertiesWithDevicesHavingP2PAccessSupportThenSupportIsReturned) { L0::Device *device = driverHandle->devices[0]; ze_device_memory_access_properties_t properties; auto result = device->getMemoryAccessProperties(&properties); EXPECT_EQ(ZE_RESULT_SUCCESS, result); ze_memory_access_cap_flags_t expectedSharedCrossDeviceAllocCapabilities = ZE_MEMORY_ACCESS_CAP_FLAG_RW | ZE_MEMORY_ACCESS_CAP_FLAG_CONCURRENT; EXPECT_EQ(expectedSharedCrossDeviceAllocCapabilities, properties.sharedCrossDeviceAllocCapabilities); } using MemoryAccessPropertieP2PAccess1Atomic1 = MultipleDevicesP2PFixture<1, 1, 0, 0>; TEST_F(MemoryAccessPropertieP2PAccess1Atomic1, WhenCallingGetMemoryAccessPropertiesWithDevicesHavingP2PAndAtomicAccessSupportThenSupportIsReturned) { L0::Device *device = driverHandle->devices[0]; ze_device_memory_access_properties_t properties; auto result = device->getMemoryAccessProperties(&properties); EXPECT_EQ(ZE_RESULT_SUCCESS, result); ze_memory_access_cap_flags_t expectedSharedCrossDeviceAllocCapabilities = ZE_MEMORY_ACCESS_CAP_FLAG_RW | ZE_MEMORY_ACCESS_CAP_FLAG_CONCURRENT | ZE_MEMORY_ACCESS_CAP_FLAG_ATOMIC | ZE_MEMORY_ACCESS_CAP_FLAG_CONCURRENT_ATOMIC; EXPECT_EQ(expectedSharedCrossDeviceAllocCapabilities, properties.sharedCrossDeviceAllocCapabilities); } using MultipleDevicesP2PDevice0Access0Atomic0Device1Access0Atomic0Test = MultipleDevicesP2PFixture<0, 0, 0, 0>; TEST_F(MultipleDevicesP2PDevice0Access0Atomic0Device1Access0Atomic0Test, WhenCallingGetP2PPropertiesWithBothDevicesHavingNoAccessSupportThenNoSupportIsReturned) { L0::Device *device0 = driverHandle->devices[0]; L0::Device *device1 = driverHandle->devices[1]; ze_device_p2p_properties_t p2pProperties = {}; device0->getP2PProperties(device1, &p2pProperties); EXPECT_FALSE(p2pProperties.flags & ZE_DEVICE_P2P_PROPERTY_FLAG_ACCESS); EXPECT_FALSE(p2pProperties.flags & ZE_DEVICE_P2P_PROPERTY_FLAG_ATOMICS); } using MultipleDevicesP2PDevice0Access0Atomic0Device1Access1Atomic0Test = MultipleDevicesP2PFixture<0, 0, 1, 0>; TEST_F(MultipleDevicesP2PDevice0Access0Atomic0Device1Access1Atomic0Test, WhenCallingGetP2PPropertiesWithOnlyOneDeviceHavingAccessSupportThenNoSupportIsReturned) { L0::Device *device0 = driverHandle->devices[0]; L0::Device *device1 = driverHandle->devices[1]; ze_device_p2p_properties_t p2pProperties = {}; device0->getP2PProperties(device1, &p2pProperties); EXPECT_FALSE(p2pProperties.flags & ZE_DEVICE_P2P_PROPERTY_FLAG_ACCESS); EXPECT_FALSE(p2pProperties.flags & ZE_DEVICE_P2P_PROPERTY_FLAG_ATOMICS); } using MultipleDevicesP2PDevice0Access1Atomic0Device1Access0Atomic0Test = MultipleDevicesP2PFixture<1, 0, 0, 0>; TEST_F(MultipleDevicesP2PDevice0Access1Atomic0Device1Access0Atomic0Test, WhenCallingGetP2PPropertiesWithOnlyFirstDeviceHavingAccessSupportThenNoSupportIsReturned) { L0::Device *device0 = driverHandle->devices[0]; L0::Device *device1 = driverHandle->devices[1]; ze_device_p2p_properties_t p2pProperties = {}; device0->getP2PProperties(device1, &p2pProperties); EXPECT_FALSE(p2pProperties.flags & ZE_DEVICE_P2P_PROPERTY_FLAG_ACCESS); EXPECT_FALSE(p2pProperties.flags & ZE_DEVICE_P2P_PROPERTY_FLAG_ATOMICS); } using MultipleDevicesP2PDevice0Access1Atomic0Device1Access1Atomic0Test = MultipleDevicesP2PFixture<1, 0, 1, 0>; TEST_F(MultipleDevicesP2PDevice0Access1Atomic0Device1Access1Atomic0Test, WhenCallingGetP2PPropertiesWithBothDevicesHavingAccessSupportThenSupportIsReturned) { L0::Device *device0 = driverHandle->devices[0]; L0::Device *device1 = driverHandle->devices[1]; ze_device_p2p_properties_t p2pProperties = {}; device0->getP2PProperties(device1, &p2pProperties); EXPECT_TRUE(p2pProperties.flags & ZE_DEVICE_P2P_PROPERTY_FLAG_ACCESS); EXPECT_FALSE(p2pProperties.flags & ZE_DEVICE_P2P_PROPERTY_FLAG_ATOMICS); } using MultipleDevicesP2PDevice0Access1Atomic0Device1Access1Atomic1Test = MultipleDevicesP2PFixture<1, 0, 1, 1>; TEST_F(MultipleDevicesP2PDevice0Access1Atomic0Device1Access1Atomic1Test, WhenCallingGetP2PPropertiesWithBothDevicesHavingAccessSupportAndOnlyOneWithAtomicThenSupportIsReturnedOnlyForAccess) { L0::Device *device0 = driverHandle->devices[0]; L0::Device *device1 = driverHandle->devices[1]; ze_device_p2p_properties_t p2pProperties = {}; device0->getP2PProperties(device1, &p2pProperties); EXPECT_TRUE(p2pProperties.flags & ZE_DEVICE_P2P_PROPERTY_FLAG_ACCESS); EXPECT_FALSE(p2pProperties.flags & ZE_DEVICE_P2P_PROPERTY_FLAG_ATOMICS); } using MultipleDevicesP2PDevice0Access1Atomic1Device1Access1Atomic0Test = MultipleDevicesP2PFixture<1, 1, 1, 0>; TEST_F(MultipleDevicesP2PDevice0Access1Atomic1Device1Access1Atomic0Test, WhenCallingGetP2PPropertiesWithBothDevicesHavingAccessSupportAndOnlyFirstWithAtomicThenSupportIsReturnedOnlyForAccess) { L0::Device *device0 = driverHandle->devices[0]; L0::Device *device1 = driverHandle->devices[1]; ze_device_p2p_properties_t p2pProperties = {}; device0->getP2PProperties(device1, &p2pProperties); EXPECT_TRUE(p2pProperties.flags & ZE_DEVICE_P2P_PROPERTY_FLAG_ACCESS); EXPECT_FALSE(p2pProperties.flags & ZE_DEVICE_P2P_PROPERTY_FLAG_ATOMICS); } using MultipleDevicesP2PDevice0Access1Atomic1Device1Access1Atomic1Test = MultipleDevicesP2PFixture<1, 1, 1, 1>; TEST_F(MultipleDevicesP2PDevice0Access1Atomic1Device1Access1Atomic1Test, WhenCallingGetP2PPropertiesWithBothDevicesHavingAccessAndAtomicSupportThenSupportIsReturned) { L0::Device *device0 = driverHandle->devices[0]; L0::Device *device1 = driverHandle->devices[1]; ze_device_p2p_properties_t p2pProperties = {}; device0->getP2PProperties(device1, &p2pProperties); EXPECT_TRUE(p2pProperties.flags & ZE_DEVICE_P2P_PROPERTY_FLAG_ACCESS); EXPECT_TRUE(p2pProperties.flags & ZE_DEVICE_P2P_PROPERTY_FLAG_ATOMICS); } TEST_F(MultipleDevicesTest, givenTwoRootDevicesFromSameFamilyThenCanAccessPeerReturnsTrue) { L0::Device *device0 = driverHandle->devices[0]; L0::Device *device1 = driverHandle->devices[1]; GFXCORE_FAMILY device0Family = device0->getNEODevice()->getHardwareInfo().platform.eRenderCoreFamily; GFXCORE_FAMILY device1Family = device1->getNEODevice()->getHardwareInfo().platform.eRenderCoreFamily; EXPECT_EQ(device0Family, device1Family); ze_bool_t canAccess = false; ze_result_t res = device0->canAccessPeer(device1->toHandle(), &canAccess); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_TRUE(canAccess); } TEST_F(MultipleDevicesTest, givenCanAccessPeerCalledTwiceThenCanAccessPeerReturnsSameValueEachTime) { L0::Device *device0 = driverHandle->devices[0]; L0::Device *device1 = driverHandle->devices[1]; GFXCORE_FAMILY device0Family = device0->getNEODevice()->getHardwareInfo().platform.eRenderCoreFamily; GFXCORE_FAMILY device1Family = device1->getNEODevice()->getHardwareInfo().platform.eRenderCoreFamily; EXPECT_EQ(device0Family, device1Family); ze_bool_t canAccess = false; ze_result_t res = device0->canAccessPeer(device1->toHandle(), &canAccess); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_TRUE(canAccess); res = device0->canAccessPeer(device1->toHandle(), &canAccess); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_TRUE(canAccess); } TEST_F(MultipleDevicesTest, givenDeviceFailsAppendMemoryCopyThenCanAccessPeerReturnsFalse) { struct MockDeviceFail : public Mock { MockDeviceFail(L0::Device *device) : Mock(device->getNEODevice(), static_cast(device->getExecEnvironment())) { this->driverHandle = device->getDriverHandle(); this->commandList.appendMemoryCopyResult = ZE_RESULT_ERROR_UNKNOWN; } ze_result_t createCommandQueue(const ze_command_queue_desc_t *desc, ze_command_queue_handle_t *commandQueue) override { *commandQueue = &this->commandQueue; return ZE_RESULT_SUCCESS; } ze_result_t createCommandList(const ze_command_list_desc_t *desc, ze_command_list_handle_t *commandList) override { *commandList = &this->commandList; return ZE_RESULT_SUCCESS; } MockCommandList commandList; Mock commandQueue; }; MockDeviceFail *device0 = new MockDeviceFail(driverHandle->devices[0]); L0::Device *device1 = driverHandle->devices[1]; ze_bool_t canAccess = false; ze_result_t res = device0->canAccessPeer(device1->toHandle(), &canAccess); EXPECT_GT(device0->commandList.appendMemoryCopyCalled, 0u); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_FALSE(canAccess); delete device0; } TEST_F(MultipleDevicesTest, givenDeviceFailsExecuteCommandListThenCanAccessPeerReturnsFalse) { struct MockDeviceFail : public Mock { struct MockCommandQueueImp : public Mock { ze_result_t destroy() override { return ZE_RESULT_SUCCESS; } ze_result_t executeCommandLists(uint32_t numCommandLists, ze_command_list_handle_t *phCommandLists, ze_fence_handle_t hFence, bool performMigration) override { return ZE_RESULT_ERROR_UNKNOWN; } }; MockDeviceFail(L0::Device *device) : Mock(device->getNEODevice(), static_cast(device->getExecEnvironment())) { this->driverHandle = device->getDriverHandle(); } ze_result_t createCommandQueue(const ze_command_queue_desc_t *desc, ze_command_queue_handle_t *commandQueue) override { *commandQueue = &this->commandQueue; return ZE_RESULT_SUCCESS; } ze_result_t createCommandList(const ze_command_list_desc_t *desc, ze_command_list_handle_t *commandList) override { *commandList = &this->commandList; return ZE_RESULT_SUCCESS; } MockCommandList commandList; MockCommandQueueImp commandQueue; }; MockDeviceFail *device0 = new MockDeviceFail(driverHandle->devices[0]); L0::Device *device1 = driverHandle->devices[1]; ze_bool_t canAccess = false; ze_result_t res = device0->canAccessPeer(device1->toHandle(), &canAccess); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_FALSE(canAccess); delete device0; } TEST_F(MultipleDevicesTest, givenTwoSubDevicesFromTheSameRootDeviceThenCanAccessPeerReturnsTrue) { L0::Device *device0 = driverHandle->devices[0]; L0::Device *device1 = driverHandle->devices[1]; uint32_t subDeviceCount = 0; ze_result_t res = device0->getSubDevices(&subDeviceCount, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_EQ(numSubDevices, subDeviceCount); std::vector subDevices0(subDeviceCount); res = device0->getSubDevices(&subDeviceCount, subDevices0.data()); EXPECT_EQ(ZE_RESULT_SUCCESS, res); subDeviceCount = 0; res = device1->getSubDevices(&subDeviceCount, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_EQ(numSubDevices, subDeviceCount); std::vector subDevices1(subDeviceCount); res = device1->getSubDevices(&subDeviceCount, subDevices1.data()); EXPECT_EQ(ZE_RESULT_SUCCESS, res); ze_bool_t canAccess = false; L0::Device *subDevice0_0 = Device::fromHandle(subDevices0[0]); subDevice0_0->canAccessPeer(subDevices0[1], &canAccess); EXPECT_TRUE(canAccess); canAccess = false; L0::Device *subDevice1_0 = Device::fromHandle(subDevices1[0]); subDevice1_0->canAccessPeer(subDevices1[1], &canAccess); EXPECT_TRUE(canAccess); } TEST_F(MultipleDevicesTest, givenTopologyForTwoSubdevicesWhenGettingApiSliceIdWithRootDeviceThenCorrectMappingIsUsedAndApiSliceIdsForSubdeviceReturned) { L0::Device *device0 = driverHandle->devices[0]; auto deviceImp0 = static_cast(device0); auto hwInfo = device0->getHwInfo(); ze_device_properties_t deviceProperties = {}; deviceImp0->getProperties(&deviceProperties); NEO::TopologyMap map; TopologyMapping mapping; EXPECT_EQ(hwInfo.gtSystemInfo.SliceCount, deviceProperties.numSlices); mapping.sliceIndices.resize(hwInfo.gtSystemInfo.SliceCount); for (uint32_t i = 0; i < hwInfo.gtSystemInfo.SliceCount; i++) { mapping.sliceIndices[i] = i; } mapping.subsliceIndices.resize(hwInfo.gtSystemInfo.SubSliceCount / hwInfo.gtSystemInfo.SliceCount); for (uint32_t i = 0; i < hwInfo.gtSystemInfo.SubSliceCount / hwInfo.gtSystemInfo.SliceCount; i++) { mapping.subsliceIndices[i] = i; } map[0] = mapping; for (uint32_t i = 0; i < hwInfo.gtSystemInfo.SliceCount; i++) { mapping.sliceIndices[i] = i + 10; } map[1] = mapping; uint32_t sliceId = 0; uint32_t subsliceId = 0; auto ret = deviceImp0->toApiSliceId(map, sliceId, subsliceId, 0); EXPECT_TRUE(ret); EXPECT_EQ(0u, sliceId); sliceId = 10; ret = deviceImp0->toApiSliceId(map, sliceId, subsliceId, 1); EXPECT_TRUE(ret); EXPECT_EQ(hwInfo.gtSystemInfo.SliceCount + 0u, sliceId); } TEST_F(MultipleDevicesTest, givenTopologyForSingleSubdeviceWhenGettingApiSliceIdWithRootDeviceThenCorrectApiSliceIdsForFirstSubDeviceIsReturned) { L0::Device *device0 = driverHandle->devices[0]; auto deviceImp0 = static_cast(device0); auto hwInfo = device0->getHwInfo(); ze_device_properties_t deviceProperties = {}; deviceImp0->getProperties(&deviceProperties); NEO::TopologyMap map; TopologyMapping mapping; EXPECT_EQ(hwInfo.gtSystemInfo.SliceCount, deviceProperties.numSlices); mapping.sliceIndices.resize(hwInfo.gtSystemInfo.SliceCount); for (uint32_t i = 0; i < hwInfo.gtSystemInfo.SliceCount; i++) { mapping.sliceIndices[i] = i; } mapping.subsliceIndices.resize(hwInfo.gtSystemInfo.SubSliceCount / hwInfo.gtSystemInfo.SliceCount); for (uint32_t i = 0; i < hwInfo.gtSystemInfo.SubSliceCount / hwInfo.gtSystemInfo.SliceCount; i++) { mapping.subsliceIndices[i] = i; } map[0] = mapping; uint32_t sliceId = 0; uint32_t subsliceId = 0; auto ret = deviceImp0->toApiSliceId(map, sliceId, subsliceId, 0); EXPECT_TRUE(ret); EXPECT_EQ(0u, sliceId); sliceId = 0; ret = deviceImp0->toApiSliceId(map, sliceId, subsliceId, 1); EXPECT_FALSE(ret); } TEST_F(MultipleDevicesTest, givenTopologyForTwoSubdevicesWhenGettingApiSliceIdWithSubDeviceThenCorrectSliceIdsReturned) { L0::Device *device0 = driverHandle->devices[0]; auto hwInfo = device0->getHwInfo(); uint32_t subDeviceCount = numSubDevices; std::vector subDevices0(subDeviceCount); auto res = device0->getSubDevices(&subDeviceCount, subDevices0.data()); EXPECT_EQ(ZE_RESULT_SUCCESS, res); L0::Device *subDevice0 = Device::fromHandle(subDevices0[0]); L0::Device *subDevice1 = Device::fromHandle(subDevices0[1]); L0::DeviceImp *subDeviceImp0 = static_cast(subDevice0); L0::DeviceImp *subDeviceImp1 = static_cast(subDevice1); NEO::TopologyMap map; TopologyMapping mapping; mapping.sliceIndices.resize(hwInfo.gtSystemInfo.SliceCount); for (uint32_t i = 0; i < hwInfo.gtSystemInfo.SliceCount; i++) { mapping.sliceIndices[i] = i; } mapping.subsliceIndices.resize(hwInfo.gtSystemInfo.SubSliceCount / hwInfo.gtSystemInfo.SliceCount); for (uint32_t i = 0; i < hwInfo.gtSystemInfo.SubSliceCount / hwInfo.gtSystemInfo.SliceCount; i++) { mapping.subsliceIndices[i] = i; } map[0] = mapping; for (uint32_t i = 0; i < hwInfo.gtSystemInfo.SliceCount; i++) { mapping.sliceIndices[i] = i + 10; } map[1] = mapping; uint32_t sliceId = 0; uint32_t subsliceId = 0; auto ret = subDeviceImp0->toApiSliceId(map, sliceId, subsliceId, 0); EXPECT_TRUE(ret); EXPECT_EQ(0u, sliceId); sliceId = 10; ret = subDeviceImp1->toApiSliceId(map, sliceId, subsliceId, 1); EXPECT_TRUE(ret); EXPECT_EQ(0u, sliceId); } TEST_F(MultipleDevicesTest, givenTopologyForTwoSubdevicesWhenGettingPhysicalSliceIdWithRootDeviceThenCorrectSliceIdAndDeviceIndexIsReturned) { L0::Device *device0 = driverHandle->devices[0]; auto deviceImp0 = static_cast(device0); auto hwInfo = device0->getHwInfo(); NEO::TopologyMap map; TopologyMapping mapping; mapping.sliceIndices.resize(hwInfo.gtSystemInfo.SliceCount); for (uint32_t i = 0; i < hwInfo.gtSystemInfo.SliceCount; i++) { mapping.sliceIndices[i] = i; } mapping.subsliceIndices.resize(hwInfo.gtSystemInfo.SubSliceCount / hwInfo.gtSystemInfo.SliceCount); for (uint32_t i = 0; i < hwInfo.gtSystemInfo.SubSliceCount / hwInfo.gtSystemInfo.SliceCount; i++) { mapping.subsliceIndices[i] = i; } map[0] = mapping; for (uint32_t i = 0; i < hwInfo.gtSystemInfo.SliceCount; i++) { mapping.sliceIndices[i] = i + 10; } map[1] = mapping; uint32_t sliceId = 0; uint32_t subsliceId = 0; uint32_t deviceIndex = 100; auto ret = deviceImp0->toPhysicalSliceId(map, sliceId, subsliceId, deviceIndex); EXPECT_TRUE(ret); EXPECT_EQ(0u, sliceId); EXPECT_EQ(0u, subsliceId); EXPECT_EQ(0u, deviceIndex); sliceId = hwInfo.gtSystemInfo.SliceCount; deviceIndex = 200; ret = deviceImp0->toPhysicalSliceId(map, sliceId, subsliceId, deviceIndex); EXPECT_TRUE(ret); EXPECT_EQ(10u, sliceId); EXPECT_EQ(0u, subsliceId); EXPECT_EQ(1u, deviceIndex); } TEST_F(MultipleDevicesTest, givenTopologyForTwoSubdevicesWhenGettingPhysicalSliceIdWithSubDeviceThenCorrectSliceIdAndDeviceIndexIsReturned) { L0::Device *device0 = driverHandle->devices[0]; auto hwInfo = device0->getHwInfo(); uint32_t subDeviceCount = numSubDevices; std::vector subDevices0(subDeviceCount); auto res = device0->getSubDevices(&subDeviceCount, subDevices0.data()); EXPECT_EQ(ZE_RESULT_SUCCESS, res); L0::Device *subDevice0 = Device::fromHandle(subDevices0[0]); L0::Device *subDevice1 = Device::fromHandle(subDevices0[1]); L0::DeviceImp *subDeviceImp0 = static_cast(subDevice0); L0::DeviceImp *subDeviceImp1 = static_cast(subDevice1); NEO::TopologyMap map; TopologyMapping mapping; mapping.sliceIndices.resize(hwInfo.gtSystemInfo.SliceCount); for (uint32_t i = 0; i < hwInfo.gtSystemInfo.SliceCount; i++) { mapping.sliceIndices[i] = i + 5; } mapping.subsliceIndices.resize(hwInfo.gtSystemInfo.SubSliceCount / hwInfo.gtSystemInfo.SliceCount); for (uint32_t i = 0; i < hwInfo.gtSystemInfo.SubSliceCount / hwInfo.gtSystemInfo.SliceCount; i++) { mapping.subsliceIndices[i] = i; } map[0] = mapping; for (uint32_t i = 0; i < hwInfo.gtSystemInfo.SliceCount; i++) { mapping.sliceIndices[i] = i + 10; } map[1] = mapping; uint32_t sliceId = 0; uint32_t subsliceId = 1; uint32_t deviceIndex = 0; auto ret = subDeviceImp0->toPhysicalSliceId(map, sliceId, subsliceId, deviceIndex); EXPECT_TRUE(ret); EXPECT_EQ(5u, sliceId); EXPECT_EQ(1u, subsliceId); EXPECT_EQ(0u, deviceIndex); sliceId = 0; subsliceId = 1; deviceIndex = 100; ret = subDeviceImp1->toPhysicalSliceId(map, sliceId, subsliceId, deviceIndex); EXPECT_TRUE(ret); EXPECT_EQ(10u, sliceId); EXPECT_EQ(1u, subsliceId); EXPECT_EQ(1u, deviceIndex); } TEST_F(MultipleDevicesTest, givenInvalidApiSliceIdWhenGettingPhysicalSliceIdThenFalseIsReturned) { L0::Device *device0 = driverHandle->devices[0]; auto hwInfo = device0->getHwInfo(); uint32_t subDeviceCount = numSubDevices; std::vector subDevices0(subDeviceCount); auto res = device0->getSubDevices(&subDeviceCount, subDevices0.data()); EXPECT_EQ(ZE_RESULT_SUCCESS, res); L0::Device *subDevice1 = Device::fromHandle(subDevices0[1]); L0::DeviceImp *subDeviceImp1 = static_cast(subDevice1); NEO::TopologyMap map; TopologyMapping mapping; mapping.sliceIndices.resize(hwInfo.gtSystemInfo.SliceCount); for (uint32_t i = 0; i < hwInfo.gtSystemInfo.SliceCount; i++) { mapping.sliceIndices[i] = i + 5; } map[0] = mapping; for (uint32_t i = 0; i < hwInfo.gtSystemInfo.SliceCount; i++) { mapping.sliceIndices[i] = i + 10; } map[1] = mapping; uint32_t sliceId = hwInfo.gtSystemInfo.SliceCount; uint32_t subsliceId = 0; uint32_t deviceIndex = 1; auto ret = subDeviceImp1->toPhysicalSliceId(map, sliceId, subsliceId, deviceIndex); EXPECT_FALSE(ret); } TEST_F(MultipleDevicesTest, givenTopologyMapForSubdeviceZeroWhenGettingPhysicalSliceIdForSubdeviceOneThenFalseIsReturned) { L0::Device *device0 = driverHandle->devices[0]; auto hwInfo = device0->getHwInfo(); uint32_t subDeviceCount = numSubDevices; std::vector subDevices0(subDeviceCount); auto res = device0->getSubDevices(&subDeviceCount, subDevices0.data()); EXPECT_EQ(ZE_RESULT_SUCCESS, res); L0::Device *subDevice1 = Device::fromHandle(subDevices0[1]); L0::DeviceImp *subDeviceImp1 = static_cast(subDevice1); NEO::TopologyMap map; TopologyMapping mapping; mapping.sliceIndices.resize(hwInfo.gtSystemInfo.SliceCount); for (uint32_t i = 0; i < hwInfo.gtSystemInfo.SliceCount; i++) { mapping.sliceIndices[i] = i + 5; } mapping.subsliceIndices.resize(hwInfo.gtSystemInfo.SubSliceCount / hwInfo.gtSystemInfo.SliceCount); for (uint32_t i = 0; i < hwInfo.gtSystemInfo.SubSliceCount / hwInfo.gtSystemInfo.SliceCount; i++) { mapping.subsliceIndices[i] = i; } map[0] = mapping; uint32_t sliceId = 0; uint32_t subsliceId = 3; uint32_t deviceIndex = 1; auto ret = subDeviceImp1->toPhysicalSliceId(map, sliceId, subsliceId, deviceIndex); EXPECT_FALSE(ret); } using MultipleDevicesWithCustomHwInfoTest = Test; TEST_F(MultipleDevicesWithCustomHwInfoTest, givenTopologyWhenMappingToAndFromApiAndPhysicalSliceIdThenIdsAreMatching) { L0::Device *device0 = driverHandle->devices[0]; auto hwInfo = device0->getHwInfo(); uint32_t subDeviceCount = numSubDevices; std::vector subDevices0(subDeviceCount); auto res = device0->getSubDevices(&subDeviceCount, subDevices0.data()); EXPECT_EQ(ZE_RESULT_SUCCESS, res); L0::Device *subDevice1 = Device::fromHandle(subDevices0[1]); L0::DeviceImp *subDeviceImp1 = static_cast(subDevice1); L0::DeviceImp *device = static_cast(device0); NEO::TopologyMap map; TopologyMapping mapping; mapping.sliceIndices.resize(hwInfo.gtSystemInfo.SliceCount); for (uint32_t i = 0; i < hwInfo.gtSystemInfo.SliceCount; i++) { mapping.sliceIndices[i] = i; } mapping.subsliceIndices.resize(hwInfo.gtSystemInfo.SubSliceCount / hwInfo.gtSystemInfo.SliceCount); for (uint32_t i = 0; i < hwInfo.gtSystemInfo.SubSliceCount / hwInfo.gtSystemInfo.SliceCount; i++) { mapping.subsliceIndices[i] = i + 10; } map[0] = mapping; for (uint32_t i = 0; i < hwInfo.gtSystemInfo.SliceCount; i++) { mapping.sliceIndices[i] = i + 1; } map[1] = mapping; uint32_t deviceIndex = 0; ze_device_properties_t deviceProperties = {}; device->getProperties(&deviceProperties); for (uint32_t i = 0; i < deviceProperties.numSlices; i++) { uint32_t sliceId = i; uint32_t subsliceId = deviceProperties.numSubslicesPerSlice / 2; auto ret = device->toPhysicalSliceId(map, sliceId, subsliceId, deviceIndex); EXPECT_TRUE(ret); if (i < sliceCount) { EXPECT_EQ(i, sliceId); EXPECT_EQ(0u, deviceIndex); } else { EXPECT_EQ(i + 1 - (deviceProperties.numSlices / subDeviceCount), sliceId); EXPECT_EQ(1u, deviceIndex); } ret = device->toApiSliceId(map, sliceId, subsliceId, deviceIndex); EXPECT_TRUE(ret); EXPECT_EQ(i, sliceId); EXPECT_EQ(deviceProperties.numSubslicesPerSlice / 2, subsliceId); } subDeviceImp1->getProperties(&deviceProperties); for (uint32_t i = 0; i < deviceProperties.numSlices; i++) { uint32_t sliceId = i; uint32_t subsliceId = deviceProperties.numSubslicesPerSlice - 1; auto ret = subDeviceImp1->toPhysicalSliceId(map, sliceId, subsliceId, deviceIndex); EXPECT_TRUE(ret); EXPECT_EQ(i + 1, sliceId); EXPECT_EQ(1u, deviceIndex); ret = subDeviceImp1->toApiSliceId(map, sliceId, subsliceId, deviceIndex); EXPECT_TRUE(ret); EXPECT_EQ(i, sliceId); EXPECT_EQ(deviceProperties.numSubslicesPerSlice - 1, subsliceId); } } TEST_F(MultipleDevicesWithCustomHwInfoTest, givenSingleSliceTopologyWhenMappingToAndFromApiAndPhysicalSubSliceIdThenIdsAreMatching) { L0::Device *device0 = driverHandle->devices[0]; uint32_t subDeviceCount = numSubDevices; std::vector subDevices0(subDeviceCount); auto res = device0->getSubDevices(&subDeviceCount, subDevices0.data()); EXPECT_EQ(ZE_RESULT_SUCCESS, res); L0::Device *subDevice1 = Device::fromHandle(subDevices0[1]); L0::DeviceImp *subDeviceImp1 = static_cast(subDevice1); L0::DeviceImp *device = static_cast(device0); NEO::TopologyMap map; TopologyMapping mapping; mapping.sliceIndices.resize(1); mapping.sliceIndices[0] = 1; mapping.subsliceIndices.resize(hwInfo.gtSystemInfo.SubSliceCount / hwInfo.gtSystemInfo.SliceCount); for (uint32_t i = 0; i < hwInfo.gtSystemInfo.SubSliceCount / hwInfo.gtSystemInfo.SliceCount; i++) { mapping.subsliceIndices[i] = i + 10; } map[0] = mapping; mapping.sliceIndices.resize(1); mapping.sliceIndices[0] = 0; for (uint32_t i = 0; i < hwInfo.gtSystemInfo.SubSliceCount / hwInfo.gtSystemInfo.SliceCount; i++) { mapping.subsliceIndices[i] = i + 20; } map[1] = mapping; uint32_t deviceIndex = 0; for (uint32_t i = 0; i < 2; i++) { uint32_t sliceId = i; uint32_t subsliceId = 2; auto ret = device->toPhysicalSliceId(map, sliceId, subsliceId, deviceIndex); EXPECT_TRUE(ret); if (i < 1) { EXPECT_EQ(1u, sliceId); EXPECT_EQ(12u, subsliceId); EXPECT_EQ(0u, deviceIndex); } else { EXPECT_EQ(0u, sliceId); EXPECT_EQ(22u, subsliceId); EXPECT_EQ(1u, deviceIndex); } ret = device->toApiSliceId(map, sliceId, subsliceId, deviceIndex); EXPECT_TRUE(ret); EXPECT_EQ(i, sliceId); EXPECT_EQ(2u, subsliceId); } // subdevice 1 { uint32_t sliceId = 0; uint32_t subsliceId = 1; auto ret = subDeviceImp1->toPhysicalSliceId(map, sliceId, subsliceId, deviceIndex); EXPECT_TRUE(ret); EXPECT_EQ(0u, sliceId); EXPECT_EQ(21u, subsliceId); EXPECT_EQ(1u, deviceIndex); ret = subDeviceImp1->toApiSliceId(map, sliceId, subsliceId, deviceIndex); EXPECT_TRUE(ret); EXPECT_EQ(0u, sliceId); EXPECT_EQ(1u, subsliceId); } } struct MultipleDevicesDifferentLocalMemorySupportTest : public MultipleDevicesTest { void SetUp() override { MultipleDevicesTest::SetUp(); memoryManager->localMemorySupported[0] = 1; deviceWithLocalMemory = driverHandle->devices[0]; deviceWithoutLocalMemory = driverHandle->devices[1]; } L0::Device *deviceWithLocalMemory = nullptr; L0::Device *deviceWithoutLocalMemory = nullptr; }; struct MultipleDevicesDifferentFamilyAndLocalMemorySupportTest : public MultipleDevicesTest { void SetUp() override { if ((NEO::HwInfoConfig::get(IGFX_SKYLAKE) == nullptr) || (NEO::HwInfoConfig::get(IGFX_KABYLAKE) == nullptr)) { GTEST_SKIP(); } MultipleDevicesTest::SetUp(); memoryManager->localMemorySupported[0] = 1; memoryManager->localMemorySupported[1] = 1; deviceSKL = driverHandle->devices[0]; deviceKBL = driverHandle->devices[1]; deviceSKL->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->platform.eProductFamily = IGFX_SKYLAKE; deviceKBL->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->platform.eProductFamily = IGFX_KABYLAKE; } L0::Device *deviceSKL = nullptr; L0::Device *deviceKBL = nullptr; }; struct MultipleDevicesSameFamilyAndLocalMemorySupportTest : public MultipleDevicesTest { void SetUp() override { MultipleDevicesTest::SetUp(); memoryManager->localMemorySupported[0] = 1; memoryManager->localMemorySupported[1] = 1; device0 = driverHandle->devices[0]; device1 = driverHandle->devices[1]; } L0::Device *device0 = nullptr; L0::Device *device1 = nullptr; }; TEST_F(DeviceTest, givenNoActiveSourceLevelDebuggerWhenGetIsCalledThenNullptrIsReturned) { EXPECT_EQ(nullptr, device->getSourceLevelDebugger()); } TEST_F(DeviceTest, givenNoL0DebuggerWhenGettingL0DebuggerThenNullptrReturned) { EXPECT_EQ(nullptr, device->getL0Debugger()); } TEST_F(DeviceTest, givenValidDeviceWhenCallingReleaseResourcesThenResourcesReleased) { auto deviceImp = static_cast(device); EXPECT_FALSE(deviceImp->resourcesReleased); EXPECT_FALSE(nullptr == deviceImp->getNEODevice()); deviceImp->releaseResources(); EXPECT_TRUE(deviceImp->resourcesReleased); EXPECT_TRUE(nullptr == deviceImp->getNEODevice()); EXPECT_TRUE(nullptr == deviceImp->pageFaultCommandList); EXPECT_TRUE(nullptr == deviceImp->getDebugSurface()); deviceImp->releaseResources(); EXPECT_TRUE(deviceImp->resourcesReleased); } HWTEST_F(DeviceTest, givenCooperativeDispatchSupportedWhenQueryingPropertiesFlagsThenCooperativeKernelsAreSupported) { struct MockHwHelper : NEO::HwHelperHw { bool isCooperativeDispatchSupported(const EngineGroupType engineGroupType, const HardwareInfo &hwInfo) const override { return isCooperativeDispatchSupportedValue; } bool isCooperativeDispatchSupportedValue = true; }; const uint32_t rootDeviceIndex = 0u; auto hwInfo = *NEO::defaultHwInfo; hwInfo.featureTable.flags.ftrCCSNode = true; hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_CCS; auto *neoMockDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, rootDeviceIndex); Mock deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment()); MockHwHelper hwHelper{}; VariableBackup hwHelperFactoryBackup{&NEO::hwHelperFactory[static_cast(hwInfo.platform.eRenderCoreFamily)]}; hwHelperFactoryBackup = &hwHelper; uint32_t count = 0; ze_result_t res = deviceImp.getCommandQueueGroupProperties(&count, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); NEO::EngineGroupType engineGroupTypes[] = {NEO::EngineGroupType::RenderCompute, NEO::EngineGroupType::Compute}; for (auto isCooperativeDispatchSupported : ::testing::Bool()) { hwHelper.isCooperativeDispatchSupportedValue = isCooperativeDispatchSupported; std::vector properties(count); res = deviceImp.getCommandQueueGroupProperties(&count, properties.data()); EXPECT_EQ(ZE_RESULT_SUCCESS, res); for (auto engineGroupType : engineGroupTypes) { auto groupOrdinal = static_cast(engineGroupType); if (groupOrdinal >= count) { continue; } auto actualValue = NEO::isValueSet(properties[groupOrdinal].flags, ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COOPERATIVE_KERNELS); EXPECT_TRUE(actualValue); } } } TEST(DevicePropertyFlagIsIntegratedTest, givenIntegratedDeviceThenCorrectDevicePropertyFlagSet) { std::unique_ptr> driverHandle; NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get(); hwInfo.capabilityTable.isIntegratedDevice = true; NEO::MockDevice *neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); driverHandle = std::make_unique>(); driverHandle->initialize(std::move(devices)); auto device = driverHandle->devices[0]; ze_device_properties_t deviceProps = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; device->getProperties(&deviceProps); EXPECT_EQ(ZE_DEVICE_PROPERTY_FLAG_INTEGRATED, deviceProps.flags & ZE_DEVICE_PROPERTY_FLAG_INTEGRATED); } TEST(DevicePropertyFlagDiscreteDeviceTest, givenDiscreteDeviceThenCorrectDevicePropertyFlagSet) { std::unique_ptr> driverHandle; NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get(); hwInfo.capabilityTable.isIntegratedDevice = false; NEO::MockDevice *neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); driverHandle = std::make_unique>(); driverHandle->initialize(std::move(devices)); auto device = driverHandle->devices[0]; ze_device_properties_t deviceProps = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; device->getProperties(&deviceProps); EXPECT_EQ(0u, deviceProps.flags & ZE_DEVICE_PROPERTY_FLAG_INTEGRATED); } TEST(zeDevice, givenValidImagePropertiesStructWhenGettingImagePropertiesThenSuccessIsReturned) { Mock device; ze_result_t result; ze_device_image_properties_t imageProperties; result = zeDeviceGetImageProperties(device.toHandle(), &imageProperties); EXPECT_EQ(1u, device.getDeviceImagePropertiesCalled); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST(zeDevice, givenImagesSupportedWhenGettingImagePropertiesThenValidValuesAreReturned) { ze_result_t errorValue; NEO::MockCompilerEnableGuard mock(true); DriverHandleImp driverHandle{}; NEO::MockDevice *neoDevice = (NEO::MockDevice::createWithNewExecutionEnvironment(NEO::defaultHwInfo.get(), 0)); auto device = std::unique_ptr(Device::create(&driverHandle, neoDevice, false, &errorValue)); DeviceInfo &deviceInfo = neoDevice->deviceInfo; deviceInfo.imageSupport = true; deviceInfo.image2DMaxWidth = 1; deviceInfo.image2DMaxHeight = 2; deviceInfo.image3DMaxDepth = 3; deviceInfo.imageMaxBufferSize = 4; deviceInfo.imageMaxArraySize = 5; deviceInfo.maxSamplers = 6; deviceInfo.maxReadImageArgs = 7; deviceInfo.maxWriteImageArgs = 8; ze_device_image_properties_t properties{}; ze_result_t result = zeDeviceGetImageProperties(device->toHandle(), &properties); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(deviceInfo.image2DMaxWidth, static_cast(properties.maxImageDims1D)); EXPECT_EQ(deviceInfo.image2DMaxHeight, static_cast(properties.maxImageDims2D)); EXPECT_EQ(deviceInfo.image3DMaxDepth, static_cast(properties.maxImageDims3D)); EXPECT_EQ(deviceInfo.imageMaxBufferSize, properties.maxImageBufferSize); EXPECT_EQ(deviceInfo.imageMaxArraySize, static_cast(properties.maxImageArraySlices)); EXPECT_EQ(deviceInfo.maxSamplers, properties.maxSamplers); EXPECT_EQ(deviceInfo.maxReadImageArgs, properties.maxReadImageArgs); EXPECT_EQ(deviceInfo.maxWriteImageArgs, properties.maxWriteImageArgs); } TEST(zeDevice, givenNoImagesSupportedWhenGettingImagePropertiesThenZeroValuesAreReturned) { ze_result_t errorValue; NEO::MockCompilerEnableGuard mock(true); DriverHandleImp driverHandle{}; NEO::MockDevice *neoDevice = (NEO::MockDevice::createWithNewExecutionEnvironment(NEO::defaultHwInfo.get(), 0)); auto device = std::unique_ptr(Device::create(&driverHandle, neoDevice, false, &errorValue)); DeviceInfo &deviceInfo = neoDevice->deviceInfo; neoDevice->deviceInfo.imageSupport = false; deviceInfo.image2DMaxWidth = 1; deviceInfo.image2DMaxHeight = 2; deviceInfo.image3DMaxDepth = 3; deviceInfo.imageMaxBufferSize = 4; deviceInfo.imageMaxArraySize = 5; deviceInfo.maxSamplers = 6; deviceInfo.maxReadImageArgs = 7; deviceInfo.maxWriteImageArgs = 8; ze_device_image_properties_t properties{}; ze_result_t result = zeDeviceGetImageProperties(device->toHandle(), &properties); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(0u, properties.maxImageDims1D); EXPECT_EQ(0u, properties.maxImageDims2D); EXPECT_EQ(0u, properties.maxImageDims3D); EXPECT_EQ(0u, properties.maxImageBufferSize); EXPECT_EQ(0u, properties.maxImageArraySlices); EXPECT_EQ(0u, properties.maxSamplers); EXPECT_EQ(0u, properties.maxReadImageArgs); EXPECT_EQ(0u, properties.maxWriteImageArgs); } class MockCacheReservation : public CacheReservation { public: ~MockCacheReservation() override = default; MockCacheReservation(L0::Device &device, bool initialize) : isInitialized(initialize){}; bool reserveCache(size_t cacheLevel, size_t cacheReservationSize) override { receivedCacheLevel = cacheLevel; return isInitialized; } bool setCacheAdvice(void *ptr, size_t regionSize, ze_cache_ext_region_t cacheRegion) override { receivedCacheRegion = cacheRegion; return isInitialized; } size_t getMaxCacheReservationSize() override { return maxCacheReservationSize; } static size_t maxCacheReservationSize; bool isInitialized = false; size_t receivedCacheLevel = 3; ze_cache_ext_region_t receivedCacheRegion = ze_cache_ext_region_t::ZE_CACHE_EXT_REGION_ZE_CACHE_REGION_DEFAULT; }; size_t MockCacheReservation::maxCacheReservationSize = 1024; using zeDeviceCacheReservationTest = DeviceTest; TEST_F(zeDeviceCacheReservationTest, givenDeviceCacheExtendedDescriptorWhenGetCachePropertiesCalledWithIncorrectStructureTypeThenReturnErrorUnsupportedEnumeration) { ze_cache_reservation_ext_desc_t cacheReservationExtDesc = {}; ze_device_cache_properties_t deviceCacheProperties = {}; deviceCacheProperties.pNext = &cacheReservationExtDesc; uint32_t count = 1; ze_result_t res = device->getCacheProperties(&count, &deviceCacheProperties); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION, res); } TEST_F(zeDeviceCacheReservationTest, givenGreaterThanOneCountOfDeviceCachePropertiesWhenGetCachePropertiesIsCalledThenSetCountToOne) { static_cast(device)->cacheReservation.reset(new MockCacheReservation(*device, true)); ze_device_cache_properties_t deviceCacheProperties = {}; uint32_t count = 10; ze_result_t res = device->getCacheProperties(&count, &deviceCacheProperties); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_EQ(count, 1u); } TEST_F(zeDeviceCacheReservationTest, givenDeviceCacheExtendedDescriptorWhenGetCachePropertiesCalledOnDeviceWithNoSupportForCacheReservationThenReturnZeroMaxCacheReservationSize) { VariableBackup maxCacheReservationSizeBackup{&MockCacheReservation::maxCacheReservationSize, 0}; static_cast(device)->cacheReservation.reset(new MockCacheReservation(*device, true)); ze_cache_reservation_ext_desc_t cacheReservationExtDesc = {}; cacheReservationExtDesc.stype = ZE_STRUCTURE_TYPE_CACHE_RESERVATION_EXT_DESC; ze_device_cache_properties_t deviceCacheProperties = {}; deviceCacheProperties.pNext = &cacheReservationExtDesc; uint32_t count = 1; ze_result_t res = device->getCacheProperties(&count, &deviceCacheProperties); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_EQ(0u, cacheReservationExtDesc.maxCacheReservationSize); } TEST_F(zeDeviceCacheReservationTest, givenDeviceCacheExtendedDescriptorWhenGetCachePropertiesCalledOnDeviceWithSupportForCacheReservationThenReturnNonZeroMaxCacheReservationSize) { static_cast(device)->cacheReservation.reset(new MockCacheReservation(*device, true)); ze_cache_reservation_ext_desc_t cacheReservationExtDesc = {}; cacheReservationExtDesc.stype = ZE_STRUCTURE_TYPE_CACHE_RESERVATION_EXT_DESC; ze_device_cache_properties_t deviceCacheProperties = {}; deviceCacheProperties.pNext = &cacheReservationExtDesc; uint32_t count = 1; ze_result_t res = device->getCacheProperties(&count, &deviceCacheProperties); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_NE(0u, cacheReservationExtDesc.maxCacheReservationSize); } TEST_F(zeDeviceCacheReservationTest, WhenCallingZeDeviceReserveCacheExtOnDeviceWithNoSupportForCacheReservationThenReturnErrorUnsupportedFeature) { VariableBackup maxCacheReservationSizeBackup{&MockCacheReservation::maxCacheReservationSize, 0}; static_cast(device)->cacheReservation.reset(new MockCacheReservation(*device, true)); size_t cacheLevel = 3; size_t cacheReservationSize = 1024; auto result = zeDeviceReserveCacheExt(device->toHandle(), cacheLevel, cacheReservationSize); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, result); } TEST_F(zeDeviceCacheReservationTest, WhenCallingZeDeviceReserveCacheExtWithCacheLevel0ThenDriverShouldDefaultToCacheLevel3) { auto mockCacheReservation = new MockCacheReservation(*device, true); static_cast(device)->cacheReservation.reset(mockCacheReservation); size_t cacheLevel = 0; size_t cacheReservationSize = 1024; auto result = zeDeviceReserveCacheExt(device->toHandle(), cacheLevel, cacheReservationSize); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(3u, mockCacheReservation->receivedCacheLevel); } TEST_F(zeDeviceCacheReservationTest, WhenCallingZeDeviceReserveCacheExtFailsToReserveCacheOnDeviceThenReturnErrorUninitialized) { size_t cacheLevel = 3; size_t cacheReservationSize = 1024; for (auto initialize : {false, true}) { auto mockCacheReservation = new MockCacheReservation(*device, initialize); static_cast(device)->cacheReservation.reset(mockCacheReservation); auto result = zeDeviceReserveCacheExt(device->toHandle(), cacheLevel, cacheReservationSize); if (initialize) { EXPECT_EQ(ZE_RESULT_SUCCESS, result); } else { EXPECT_EQ(ZE_RESULT_ERROR_UNINITIALIZED, result); } EXPECT_EQ(3u, mockCacheReservation->receivedCacheLevel); } } TEST_F(zeDeviceCacheReservationTest, WhenCallingZeDeviceSetCacheAdviceExtWithDefaultCacheRegionThenDriverShouldDefaultToNonReservedRegion) { auto mockCacheReservation = new MockCacheReservation(*device, true); static_cast(device)->cacheReservation.reset(mockCacheReservation); void *ptr = reinterpret_cast(0x123456789); size_t regionSize = 512; ze_cache_ext_region_t cacheRegion = ze_cache_ext_region_t::ZE_CACHE_EXT_REGION_ZE_CACHE_REGION_DEFAULT; auto result = zeDeviceSetCacheAdviceExt(device->toHandle(), ptr, regionSize, cacheRegion); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(ze_cache_ext_region_t::ZE_CACHE_EXT_REGION_ZE_CACHE_NON_RESERVED_REGION, mockCacheReservation->receivedCacheRegion); } TEST_F(zeDeviceCacheReservationTest, WhenCallingZeDeviceSetCacheAdviceExtOnDeviceWithNoSupportForCacheReservationThenReturnErrorUnsupportedFeature) { VariableBackup maxCacheReservationSizeBackup{&MockCacheReservation::maxCacheReservationSize, 0}; static_cast(device)->cacheReservation.reset(new MockCacheReservation(*device, true)); void *ptr = reinterpret_cast(0x123456789); size_t regionSize = 512; ze_cache_ext_region_t cacheRegion = ze_cache_ext_region_t::ZE_CACHE_EXT_REGION_ZE_CACHE_REGION_DEFAULT; auto result = zeDeviceSetCacheAdviceExt(device->toHandle(), ptr, regionSize, cacheRegion); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, result); } TEST_F(zeDeviceCacheReservationTest, WhenCallingZeDeviceSetCacheAdviceExtFailsToSetCacheRegionThenReturnErrorUnitialized) { void *ptr = reinterpret_cast(0x123456789); size_t regionSize = 512; ze_cache_ext_region_t cacheRegion = ze_cache_ext_region_t::ZE_CACHE_EXT_REGION_ZE_CACHE_RESERVE_REGION; for (auto initialize : {false, true}) { auto mockCacheReservation = new MockCacheReservation(*device, initialize); static_cast(device)->cacheReservation.reset(mockCacheReservation); auto result = zeDeviceSetCacheAdviceExt(device->toHandle(), ptr, regionSize, cacheRegion); if (initialize) { EXPECT_EQ(ZE_RESULT_SUCCESS, result); } else { EXPECT_EQ(ZE_RESULT_ERROR_UNINITIALIZED, result); } EXPECT_EQ(ze_cache_ext_region_t::ZE_CACHE_EXT_REGION_ZE_CACHE_RESERVE_REGION, mockCacheReservation->receivedCacheRegion); } } using zeDeviceSystemBarrierTest = DeviceTest; TEST_F(zeDeviceSystemBarrierTest, whenCallingSystemBarrierThenReturnErrorUnsupportedFeature) { auto result = static_cast(device)->systemBarrier(); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, result); } template struct MultiSubDeviceFixture : public DeviceFixture { void SetUp() { DebugManager.flags.CreateMultipleSubDevices.set(2); DebugManager.flags.EnableWalkerPartition.set(enablePartitionWalker); DebugManager.flags.EnableImplicitScaling.set(enableImplicitScaling); osLocalMemoryBackup = std::make_unique>(&NEO::OSInterface::osEnableLocalMemory, osLocalMemory); apiSupportBackup = std::make_unique>(&NEO::ImplicitScaling::apiSupport, apiSupport); DeviceFixture::SetUp(); deviceImp = reinterpret_cast(device); subDevice = neoDevice->getSubDevice(0); } L0::DeviceImp *deviceImp = nullptr; NEO::Device *subDevice = nullptr; DebugManagerStateRestore restorer; std::unique_ptr> osLocalMemoryBackup; std::unique_ptr> apiSupportBackup; }; using MultiSubDeviceTest = Test>; TEST_F(MultiSubDeviceTest, GivenApiSupportAndLocalMemoryEnabledWhenDeviceContainsSubDevicesThenItIsImplicitScalingCapable) { if (NEO::HwHelper::get(neoDevice->getHardwareInfo().platform.eRenderCoreFamily).platformSupportsImplicitScaling(neoDevice->getHardwareInfo())) { EXPECT_TRUE(device->isImplicitScalingCapable()); EXPECT_EQ(neoDevice, deviceImp->getActiveDevice()); } else { EXPECT_FALSE(device->isImplicitScalingCapable()); EXPECT_EQ(subDevice, deviceImp->getActiveDevice()); } } using MultiSubDeviceTestNoApi = Test>; TEST_F(MultiSubDeviceTestNoApi, GivenNoApiSupportAndLocalMemoryEnabledWhenDeviceContainsSubDevicesThenItIsNotImplicitScalingCapable) { EXPECT_FALSE(device->isImplicitScalingCapable()); EXPECT_EQ(subDevice, deviceImp->getActiveDevice()); } using MultiSubDeviceTestNoLocalMemory = Test>; TEST_F(MultiSubDeviceTestNoLocalMemory, GivenApiSupportAndLocalMemoryDisabledWhenDeviceContainsSubDevicesThenItIsNotImplicitScalingCapable) { EXPECT_FALSE(device->isImplicitScalingCapable()); EXPECT_EQ(subDevice, deviceImp->getActiveDevice()); } using MultiSubDeviceTestNoApiForceOn = Test>; TEST_F(MultiSubDeviceTestNoApiForceOn, GivenNoApiSupportAndLocalMemoryEnabledWhenForcedImplicitScalingThenItIsImplicitScalingCapable) { EXPECT_TRUE(device->isImplicitScalingCapable()); EXPECT_EQ(neoDevice, deviceImp->getActiveDevice()); } using MultiSubDeviceEnabledImplicitScalingTest = Test>; TEST_F(MultiSubDeviceEnabledImplicitScalingTest, GivenApiSupportAndLocalMemoryEnabledWhenDeviceContainsSubDevicesAndSupportsImplicitScalingThenItIsImplicitScalingCapable) { EXPECT_TRUE(device->isImplicitScalingCapable()); EXPECT_EQ(neoDevice, deviceImp->getActiveDevice()); } TEST_F(MultiSubDeviceEnabledImplicitScalingTest, GivenEnabledImplicitScalingWhenDeviceReturnsLowPriorityCsrThenItIsDefaultCsr) { auto &defaultEngine = deviceImp->getActiveDevice()->getDefaultEngine(); NEO::CommandStreamReceiver *csr = nullptr; auto ret = deviceImp->getCsrForLowPriority(&csr); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); EXPECT_EQ(defaultEngine.commandStreamReceiver, csr); } } // namespace ult } // namespace L0 test_device_pci_speed_info.cpp000066400000000000000000000051071422164147700345260ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/device/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/test/unit_tests/sources/device/test_device_pci_speed_info.h" #include "shared/source/os_interface/driver_info.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" namespace L0 { namespace ult { std::unique_ptr PciSpeedInfoTest::createDevices(uint32_t numSubDevices, const NEO::PhyicalDevicePciSpeedInfo &pciSpeedInfo) { DebugManager.flags.CreateMultipleSubDevices.set(numSubDevices); NEO::ExecutionEnvironment *executionEnvironment = new MockExecutionEnvironment(defaultHwInfo.get(), false, 1); executionEnvironment->rootDeviceEnvironments[0]->osInterface.reset(new OSInterface); executionEnvironment->memoryManager.reset(new MockMemoryManagerOsAgnosticContext(*executionEnvironment)); setPciSpeedInfo(executionEnvironment, pciSpeedInfo); return std::make_unique(1, numSubDevices, *executionEnvironment); } TEST_F(PciSpeedInfoTest, givenSuccessfulReadingOfSpeedValuesCorrectValuesAreReturned) { NEO::PhyicalDevicePciSpeedInfo expectedSpeedInfo; expectedSpeedInfo.genVersion = 4; expectedSpeedInfo.width = 1024; expectedSpeedInfo.maxBandwidth = 4096; auto deviceFactory = createDevices(2, expectedSpeedInfo); auto driverHandle = std::make_unique(); ze_result_t returnValue = ZE_RESULT_SUCCESS; auto device = std::unique_ptr(L0::Device::create(driverHandle.get(), deviceFactory->rootDevices[0], false, &returnValue)); ze_pci_ext_properties_t pciProperties = {}; EXPECT_EQ(ZE_RESULT_SUCCESS, device->getPciProperties(&pciProperties)); EXPECT_EQ(4, pciProperties.maxSpeed.genVersion); EXPECT_EQ(1024, pciProperties.maxSpeed.width); EXPECT_EQ(4096, pciProperties.maxSpeed.maxBandwidth); uint32_t subDeviceCount = 0; device->getSubDevices(&subDeviceCount, nullptr); EXPECT_EQ(subDeviceCount, 2u); std::vector subDevices; subDevices.resize(subDeviceCount); EXPECT_EQ(ZE_RESULT_SUCCESS, device->getSubDevices(&subDeviceCount, subDevices.data())); for (auto subDevice : subDevices) { EXPECT_EQ(ZE_RESULT_SUCCESS, zeDevicePciGetPropertiesExt(subDevice, &pciProperties)); EXPECT_EQ(4, pciProperties.maxSpeed.genVersion); EXPECT_EQ(1024, pciProperties.maxSpeed.width); EXPECT_EQ(4096, pciProperties.maxSpeed.maxBandwidth); } } } // namespace ult } // namespace L0 test_device_pci_speed_info.h000066400000000000000000000013441422164147700341720ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/device/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "gtest/gtest.h" namespace NEO { struct PhyicalDevicePciSpeedInfo; struct UltDeviceFactory; class ExecutionEnvironment; } // namespace NEO namespace L0 { namespace ult { struct PciSpeedInfoTest : public ::testing::Test { std::unique_ptr createDevices(uint32_t numSubDevices, const NEO::PhyicalDevicePciSpeedInfo &pciSpeedInfo); DebugManagerStateRestore restorer; private: void setPciSpeedInfo(NEO::ExecutionEnvironment *executionEnvironment, const NEO::PhyicalDevicePciSpeedInfo &pciSpeedInfo); }; } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/device/windows/000077500000000000000000000000001422164147700302445ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/device/windows/CMakeLists.txt000066400000000000000000000004331422164147700330040ustar00rootroot00000000000000# # Copyright (C) 2022 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_device_pci_speed_info_windows.cpp ) endif() test_device_pci_speed_info_windows.cpp000066400000000000000000000033141422164147700377700ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/device/windows/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/os_interface/windows/wddm_fixture.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/sources/device/test_device_pci_speed_info.h" #include "gtest/gtest.h" namespace L0 { namespace ult { class PciSpeedInfoTestDriverModel : public WddmMock { public: PciSpeedInfoTestDriverModel(RootDeviceEnvironment &rootDeviceEnvironment) : WddmMock(rootDeviceEnvironment) {} void setExpectedPciSpeedInfo(const PhyicalDevicePciSpeedInfo &pciSpeedInfo) { returnedSpeedInfo = pciSpeedInfo; } PhyicalDevicePciSpeedInfo getPciSpeedInfo() const override { return returnedSpeedInfo; } PhysicalDevicePciBusInfo getPciBusInfo() const override { return NEO::PhysicalDevicePciBusInfo(0, 1, 2, 3); } PhyicalDevicePciSpeedInfo returnedSpeedInfo = {-1, -1, -1}; }; void PciSpeedInfoTest::setPciSpeedInfo(NEO::ExecutionEnvironment *executionEnvironment, const NEO::PhyicalDevicePciSpeedInfo &pciSpeedInfo) { executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel( std::make_unique(*executionEnvironment->rootDeviceEnvironments[0])); PciSpeedInfoTestDriverModel *driverModel = static_cast(executionEnvironment->rootDeviceEnvironments[0]->osInterface->getDriverModel()); driverModel->setExpectedPciSpeedInfo(pciSpeedInfo); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/driver/000077500000000000000000000000001422164147700266065ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/driver/CMakeLists.txt000066400000000000000000000005121422164147700313440ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/host_pointer_manager_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_driver.cpp ) add_subdirectories() host_pointer_manager_tests.cpp000066400000000000000000000532471422164147700346770ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/driver/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/driver/driver_handle_imp.h" #include "level_zero/core/test/unit_tests/fixtures/host_pointer_manager_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_host_pointer_manager.h" namespace L0 { namespace ult { using HostPointerManagerTest = Test; TEST_F(HostPointerManagerTest, givenMultipleGraphicsAllocationWhenCopyingHostPointerDataThenCopyOnlyExistingAllocations) { EXPECT_NE(nullptr, hostDriverHandle->hostPointerManager.get()); HostPointerData originData(4); auto gfxAllocation = openHostPointerManager->createHostPointerAllocation(device->getRootDeviceIndex(), heapPointer, MemoryConstants::pageSize, device->getNEODevice()->getDeviceBitfield()); originData.hostPtrAllocations.addAllocation(gfxAllocation); HostPointerData copyData(originData); for (auto allocation : copyData.hostPtrAllocations.getGraphicsAllocations()) { if (allocation != nullptr) { EXPECT_EQ(device->getRootDeviceIndex(), allocation->getRootDeviceIndex()); EXPECT_EQ(gfxAllocation, allocation); } } hostDriverHandle->getMemoryManager()->freeGraphicsMemory(gfxAllocation); } TEST_F(HostPointerManagerTest, givenHostPointerImportedWhenGettingExistingAllocationThenRetrieveProperGpuAddress) { EXPECT_NE(nullptr, hostDriverHandle->hostPointerManager.get()); void *testPtr = heapPointer; auto result = hostDriverHandle->importExternalPointer(testPtr, MemoryConstants::pageSize); EXPECT_EQ(ZE_RESULT_SUCCESS, result); uintptr_t gpuAddress = 0u; auto gfxAllocation = hostDriverHandle->getDriverSystemMemoryAllocation(testPtr, 1u, device->getRootDeviceIndex(), &gpuAddress); ASSERT_NE(nullptr, gfxAllocation); EXPECT_EQ(testPtr, gfxAllocation->getUnderlyingBuffer()); EXPECT_EQ(static_cast(gfxAllocation->getGpuAddress()), gpuAddress); size_t offset = 10u; testPtr = ptrOffset(testPtr, offset); gfxAllocation = hostDriverHandle->getDriverSystemMemoryAllocation(testPtr, 1u, device->getRootDeviceIndex(), &gpuAddress); ASSERT_NE(nullptr, gfxAllocation); EXPECT_EQ(heapPointer, gfxAllocation->getUnderlyingBuffer()); auto expectedGpuAddress = static_cast(gfxAllocation->getGpuAddress()) + offset; EXPECT_EQ(expectedGpuAddress, gpuAddress); result = hostDriverHandle->releaseImportedPointer(heapPointer); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(HostPointerManagerTest, givenPointerRegisteredWhenSvmAllocationExistsThenRetrieveSvmFirst) { EXPECT_NE(nullptr, hostDriverHandle->hostPointerManager.get()); void *testPtr = heapPointer; size_t usmSize = MemoryConstants::pageSize; void *usmBuffer = hostDriverHandle->getMemoryManager()->allocateSystemMemory(usmSize, usmSize); NEO::GraphicsAllocation *usmAllocation = hostDriverHandle->getMemoryManager()->allocateGraphicsMemoryWithProperties( {device->getRootDeviceIndex(), false, usmSize, NEO::AllocationType::BUFFER_HOST_MEMORY, false, neoDevice->getDeviceBitfield()}, usmBuffer); ASSERT_NE(nullptr, usmAllocation); NEO::SvmAllocationData allocData(device->getRootDeviceIndex()); allocData.gpuAllocations.addAllocation(usmAllocation); allocData.cpuAllocation = nullptr; allocData.size = usmSize; allocData.memoryType = InternalMemoryType::NOT_SPECIFIED; allocData.device = nullptr; hostDriverHandle->getSvmAllocsManager()->insertSVMAlloc(allocData); auto result = hostDriverHandle->importExternalPointer(testPtr, MemoryConstants::pageSize); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto gfxAllocation = hostDriverHandle->getDriverSystemMemoryAllocation(usmBuffer, 1u, device->getRootDeviceIndex(), nullptr); ASSERT_NE(nullptr, gfxAllocation); EXPECT_EQ(usmBuffer, gfxAllocation->getUnderlyingBuffer()); result = hostDriverHandle->releaseImportedPointer(testPtr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); hostDriverHandle->getMemoryManager()->freeGraphicsMemory(usmAllocation); hostDriverHandle->getMemoryManager()->freeSystemMemory(usmBuffer); } TEST_F(HostPointerManagerTest, givenSvmAllocationExistsWhenGettingExistingAllocationThenRetrieveProperGpuAddress) { size_t usmSize = MemoryConstants::pageSize; void *usmBuffer = hostDriverHandle->getMemoryManager()->allocateSystemMemory(usmSize, usmSize); NEO::GraphicsAllocation *usmAllocation = hostDriverHandle->getMemoryManager()->allocateGraphicsMemoryWithProperties( {device->getRootDeviceIndex(), false, usmSize, NEO::AllocationType::BUFFER_HOST_MEMORY, false, neoDevice->getDeviceBitfield()}, usmBuffer); ASSERT_NE(nullptr, usmAllocation); NEO::SvmAllocationData allocData(device->getRootDeviceIndex()); allocData.gpuAllocations.addAllocation(usmAllocation); allocData.cpuAllocation = nullptr; allocData.size = usmSize; allocData.memoryType = InternalMemoryType::NOT_SPECIFIED; allocData.device = nullptr; hostDriverHandle->getSvmAllocsManager()->insertSVMAlloc(allocData); uintptr_t gpuAddress = 0u; auto gfxAllocation = hostDriverHandle->getDriverSystemMemoryAllocation(usmBuffer, 1u, device->getRootDeviceIndex(), &gpuAddress); ASSERT_NE(nullptr, gfxAllocation); EXPECT_EQ(usmBuffer, gfxAllocation->getUnderlyingBuffer()); EXPECT_EQ(static_cast(gfxAllocation->getGpuAddress()), gpuAddress); size_t offset = 10u; void *offsetUsmBuffer = ptrOffset(usmBuffer, offset); gfxAllocation = hostDriverHandle->getDriverSystemMemoryAllocation(offsetUsmBuffer, 1u, device->getRootDeviceIndex(), &gpuAddress); ASSERT_NE(nullptr, gfxAllocation); EXPECT_EQ(usmBuffer, gfxAllocation->getUnderlyingBuffer()); EXPECT_EQ(reinterpret_cast(offsetUsmBuffer), gpuAddress); hostDriverHandle->getMemoryManager()->freeGraphicsMemory(usmAllocation); hostDriverHandle->getMemoryManager()->freeSystemMemory(usmBuffer); } TEST_F(HostPointerManagerTest, WhenSizeIsZeroThenExpectInvalidArgument) { void *testPtr = heapPointer; auto result = hostDriverHandle->importExternalPointer(testPtr, 0u); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); } TEST_F(HostPointerManagerTest, WhenPointerIsNullThenExpectInvalidArgument) { auto result = hostDriverHandle->importExternalPointer(nullptr, 0x10); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); } TEST_F(HostPointerManagerTest, givenNoPointerWhenImportAddressThenRegisterNewHostData) { void *testPtr = heapPointer; void *baseAddress; auto result = hostDriverHandle->getHostPointerBaseAddress(testPtr, nullptr); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); result = hostDriverHandle->releaseImportedPointer(testPtr); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); result = hostDriverHandle->importExternalPointer(testPtr, MemoryConstants::pageSize); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = hostDriverHandle->getHostPointerBaseAddress(testPtr, &baseAddress); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(heapPointer, baseAddress); result = hostDriverHandle->releaseImportedPointer(testPtr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = hostDriverHandle->getHostPointerBaseAddress(testPtr, nullptr); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); } TEST_F(HostPointerManagerTest, givenNoPointerWhenImportMisalignedAddressThenRegisterNewHostData) { void *testPtr = heapPointer; testPtr = reinterpret_cast(reinterpret_cast(testPtr) + 0x10); size_t size = 0x10; void *baseAddress = nullptr; auto result = hostDriverHandle->getHostPointerBaseAddress(testPtr, nullptr); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); result = hostDriverHandle->releaseImportedPointer(testPtr); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); result = hostDriverHandle->importExternalPointer(testPtr, size); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = hostDriverHandle->getHostPointerBaseAddress(testPtr, &baseAddress); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(testPtr, baseAddress); auto hostPointerData = openHostPointerManager->hostPointerAllocations.get(testPtr); ASSERT_NE(nullptr, hostPointerData); EXPECT_EQ(size, hostPointerData->size); result = hostDriverHandle->releaseImportedPointer(testPtr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(HostPointerManagerTest, givenBiggerAddressImportedWhenImportingWithinThenReturnSuccess) { void *testPtr = heapPointer; auto result = hostDriverHandle->importExternalPointer(testPtr, MemoryConstants::pageSize); EXPECT_EQ(ZE_RESULT_SUCCESS, result); testPtr = reinterpret_cast(reinterpret_cast(testPtr) + 0x10); size_t size = 0x10; result = hostDriverHandle->importExternalPointer(testPtr, size); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(1u, openHostPointerManager->hostPointerAllocations.getNumAllocs()); result = hostDriverHandle->getHostPointerBaseAddress(testPtr, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = hostDriverHandle->releaseImportedPointer(testPtr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(HostPointerManagerTest, givenPointerRegisteredWhenUsingInvalidAddressThenReturnError) { void *testPtr = heapPointer; auto result = hostDriverHandle->importExternalPointer(testPtr, MemoryConstants::pageSize); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = hostDriverHandle->getHostPointerBaseAddress(nullptr, nullptr); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); result = hostDriverHandle->releaseImportedPointer(heapPointer); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(HostPointerManagerTest, givenPointerRegisteredWhenSizeEndsInNoAllocationThenExpectObjectInUseError) { void *testPtr = heapPointer; auto result = hostDriverHandle->importExternalPointer(testPtr, MemoryConstants::pageSize); EXPECT_EQ(ZE_RESULT_SUCCESS, result); testPtr = reinterpret_cast(reinterpret_cast(testPtr) + 0x10); size_t size = MemoryConstants::pageSize + 0x10; result = hostDriverHandle->importExternalPointer(testPtr, size); EXPECT_EQ(ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE, result); result = hostDriverHandle->releaseImportedPointer(heapPointer); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(HostPointerManagerTest, givenPointerRegisteredWhenSizeEndsInDifferentAllocationThenExpectOverlappingError) { void *testPtr = heapPointer; auto result = hostDriverHandle->importExternalPointer(testPtr, MemoryConstants::pageSize); EXPECT_EQ(ZE_RESULT_SUCCESS, result); testPtr = reinterpret_cast(reinterpret_cast(testPtr) + MemoryConstants::pageSize); size_t size = MemoryConstants::pageSize; result = hostDriverHandle->importExternalPointer(testPtr, size); EXPECT_EQ(ZE_RESULT_SUCCESS, result); void *errorPtr = reinterpret_cast(reinterpret_cast(heapPointer) + 0x10); result = hostDriverHandle->importExternalPointer(errorPtr, size); EXPECT_EQ(ZE_RESULT_ERROR_OVERLAPPING_REGIONS, result); result = hostDriverHandle->releaseImportedPointer(heapPointer); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = hostDriverHandle->releaseImportedPointer(testPtr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(HostPointerManagerTest, givenPointerNotRegisteredWhenSizeEndsInDifferentAllocationThenExpectInvalidSizeError) { void *testPtr = reinterpret_cast(reinterpret_cast(heapPointer) + MemoryConstants::pageSize); auto result = hostDriverHandle->importExternalPointer(testPtr, MemoryConstants::pageSize); EXPECT_EQ(ZE_RESULT_SUCCESS, result); void *errorPtr = reinterpret_cast(reinterpret_cast(heapPointer) + 0x10); size_t size = MemoryConstants::pageSize; result = hostDriverHandle->importExternalPointer(errorPtr, size); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_SIZE, result); result = hostDriverHandle->releaseImportedPointer(testPtr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(HostPointerManagerTest, givenPointerUsesTwoPagesThenBothPagesAreAvailableAndSizeIsCorrect) { void *testPtr = heapPointer; testPtr = reinterpret_cast(reinterpret_cast(testPtr) + 0x10); size_t size = MemoryConstants::pageSize; void *baseAddress = nullptr; auto result = hostDriverHandle->importExternalPointer(testPtr, size); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = hostDriverHandle->getHostPointerBaseAddress(testPtr, &baseAddress); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(testPtr, baseAddress); auto hostPointerData = openHostPointerManager->hostPointerAllocations.get(testPtr); ASSERT_NE(nullptr, hostPointerData); EXPECT_EQ(size, hostPointerData->size); void *testPtr2 = reinterpret_cast(reinterpret_cast(testPtr) + MemoryConstants::pageSize); size = 0x010; result = hostDriverHandle->importExternalPointer(testPtr2, size); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = hostDriverHandle->getHostPointerBaseAddress(testPtr2, &baseAddress); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(testPtr2, baseAddress); auto hostPointerData2 = openHostPointerManager->hostPointerAllocations.get(testPtr2); ASSERT_NE(nullptr, hostPointerData2); EXPECT_EQ(size, hostPointerData2->size); EXPECT_EQ(hostPointerData->hostPtrAllocations.getGraphicsAllocations().size(), hostPointerData2->hostPtrAllocations.getGraphicsAllocations().size()); for (uint32_t i = 0; i < hostPointerData->hostPtrAllocations.getGraphicsAllocations().size(); i++) { auto hostPointerAllocation = hostPointerData->hostPtrAllocations.getGraphicsAllocation(i); auto hostPointerAllocation2 = hostPointerData2->hostPtrAllocations.getGraphicsAllocation(i); EXPECT_NE(hostPointerAllocation, hostPointerAllocation2); } result = hostDriverHandle->releaseImportedPointer(testPtr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = hostDriverHandle->releaseImportedPointer(testPtr2); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(HostPointerManagerTest, givenPointerRegisteredWhenSizeFitsThenReturnGraphicsAllocation) { void *testPtr = heapPointer; auto result = hostDriverHandle->importExternalPointer(testPtr, MemoryConstants::pageSize); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto gfxAllocation = hostDriverHandle->findHostPointerAllocation(testPtr, 0x10u, device->getRootDeviceIndex()); EXPECT_NE(nullptr, gfxAllocation); EXPECT_EQ(testPtr, gfxAllocation->getUnderlyingBuffer()); result = hostDriverHandle->releaseImportedPointer(testPtr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(HostPointerManagerTest, givenPointerNotRegisteredThenReturnNullptrGraphicsAllocation) { void *testPtr = heapPointer; auto result = hostDriverHandle->importExternalPointer(testPtr, MemoryConstants::pageSize); EXPECT_EQ(ZE_RESULT_SUCCESS, result); testPtr = reinterpret_cast(reinterpret_cast(testPtr) + MemoryConstants::pageSize); auto gfxAllocation = hostDriverHandle->findHostPointerAllocation(testPtr, 0x10u, device->getRootDeviceIndex()); EXPECT_EQ(nullptr, gfxAllocation); result = hostDriverHandle->releaseImportedPointer(heapPointer); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(HostPointerManagerTest, givenPointerRegisteredWhenSizeExceedsAllocationThenReturnNullptrGraphicsAllocation) { void *testPtr = heapPointer; auto result = hostDriverHandle->importExternalPointer(testPtr, MemoryConstants::pageSize); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto gfxAllocation = hostDriverHandle->findHostPointerAllocation(testPtr, MemoryConstants::pageSize + 0x10u, device->getRootDeviceIndex()); EXPECT_EQ(nullptr, gfxAllocation); result = hostDriverHandle->releaseImportedPointer(testPtr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(HostPointerManagerTest, givenNoPointerRegisteredWhenAllocationCreationFailThenExpectOutOfMemoryError) { std::unique_ptr failMemoryManager = std::make_unique(0, *neoDevice->executionEnvironment); openHostPointerManager->memoryManager = failMemoryManager.get(); auto result = hostDriverHandle->importExternalPointer(heapPointer, MemoryConstants::pageSize); EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY, result); } TEST_F(HostPointerManagerTest, givenHostAllocationImportedWhenMakingResidentAddressThenAllocationMadeResident) { void *testPtr = heapPointer; mockMemoryInterface->makeResidentResult = NEO::MemoryOperationsStatus::SUCCESS; mockMemoryInterface->evictResult = NEO::MemoryOperationsStatus::SUCCESS; auto result = context->makeMemoryResident(device, testPtr, MemoryConstants::pageSize); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); result = hostDriverHandle->importExternalPointer(testPtr, MemoryConstants::pageSize); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = context->makeMemoryResident(device, testPtr, MemoryConstants::pageSize); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = context->evictMemory(device, testPtr, MemoryConstants::pageSize); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = hostDriverHandle->releaseImportedPointer(testPtr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = context->evictMemory(device, testPtr, MemoryConstants::pageSize); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); EXPECT_EQ(1u, mockMemoryInterface->makeResidentCalled); EXPECT_EQ(1u, mockMemoryInterface->evictCalled); } TEST_F(HostPointerManagerTest, givenMisalignedPointerRegisteredWhenGettingRelativeOffsetAddressThenRetrieveMisalignedPointerAsBaseAddress) { size_t mainOffset = 0x10; void *testPtr = reinterpret_cast(reinterpret_cast(heapPointer) + mainOffset); size_t size = MemoryConstants::pageSize + 0x10; auto result = hostDriverHandle->importExternalPointer(testPtr, size); EXPECT_EQ(ZE_RESULT_SUCCESS, result); size_t relativeOffset = 0x20; void *relativeAddress = ptrOffset(testPtr, relativeOffset); void *baseAddress = nullptr; result = hostDriverHandle->getHostPointerBaseAddress(relativeAddress, &baseAddress); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(testPtr, baseAddress); auto gfxAllocation = hostDriverHandle->findHostPointerAllocation(testPtr, 0x10u, device->getRootDeviceIndex()); ASSERT_NE(nullptr, gfxAllocation); size_t gpuVA = static_cast(gfxAllocation->getGpuAddress()); size_t gpuAddressOffset = gpuVA - alignDown(gpuVA, MemoryConstants::pageSize); EXPECT_EQ(mainOffset, gpuAddressOffset); result = hostDriverHandle->releaseImportedPointer(testPtr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } using ForceDisabledHostPointerManagerTest = Test; TEST_F(ForceDisabledHostPointerManagerTest, givenHostPointerManagerForceDisabledThenReturnFeatureUnsupported) { EXPECT_EQ(nullptr, hostDriverHandle->hostPointerManager.get()); void *testPtr = heapPointer; auto result = hostDriverHandle->importExternalPointer(testPtr, MemoryConstants::pageSize); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, result); void *basePtr = nullptr; result = hostDriverHandle->getHostPointerBaseAddress(testPtr, &basePtr); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, result); uintptr_t gpuAddress = 0; auto gfxAllocation = hostDriverHandle->getDriverSystemMemoryAllocation(testPtr, 1u, device->getRootDeviceIndex(), &gpuAddress); EXPECT_EQ(nullptr, gfxAllocation); result = hostDriverHandle->releaseImportedPointer(testPtr); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, result); } using ForceEnabledHostPointerManagerTest = Test; TEST_F(ForceEnabledHostPointerManagerTest, givenHostPointerManagerForceEnabledThenReturnSuccess) { EXPECT_NE(nullptr, hostDriverHandle->hostPointerManager.get()); void *testPtr = heapPointer; auto result = hostDriverHandle->importExternalPointer(testPtr, MemoryConstants::pageSize); EXPECT_EQ(ZE_RESULT_SUCCESS, result); void *basePtr = nullptr; result = hostDriverHandle->getHostPointerBaseAddress(testPtr, &basePtr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(testPtr, basePtr); uintptr_t gpuAddress = 0; auto gfxAllocation = hostDriverHandle->getDriverSystemMemoryAllocation(testPtr, 1u, device->getRootDeviceIndex(), &gpuAddress); EXPECT_NE(nullptr, gfxAllocation); result = hostDriverHandle->releaseImportedPointer(testPtr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/driver/linux/000077500000000000000000000000001422164147700277455ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/driver/linux/CMakeLists.txt000066400000000000000000000004241422164147700325050ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(UNIX) target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_driver_handle_imp_linux.cpp ) endif() test_driver_handle_imp_linux.cpp000066400000000000000000000371531422164147700363340ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/driver/linux/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/os_interface/device_factory.h" #include "shared/source/os_interface/linux/drm_neo.h" #include "shared/source/os_interface/os_interface.h" #include "shared/test/common/mocks/mock_compilers.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "level_zero/core/source/driver/driver_imp.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" using namespace NEO; namespace L0 { namespace ult { constexpr int mockFd = 0; class TestDriverMockDrm : public Drm { public: TestDriverMockDrm(std::string &bdf, RootDeviceEnvironment &rootDeviceEnvironment) : Drm(std::make_unique(mockFd, bdf.c_str()), rootDeviceEnvironment) {} }; class DriverLinuxFixture : public ::testing::Test { public: void SetUp() override { NEO::MockCompilerEnableGuard mock(true); auto executionEnvironment = new NEO::ExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(numRootDevices); NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get(); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(&hwInfo); } deviceFactory = std::make_unique(numRootDevices, numSubDevices, *executionEnvironment); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { devices.push_back(std::unique_ptr(deviceFactory->rootDevices[i])); } for (auto i = 0u; i < devices.size(); i++) { devices[i]->getExecutionEnvironment()->rootDeviceEnvironments[i]->osInterface = std::make_unique(); auto osInterface = devices[i]->getExecutionEnvironment()->rootDeviceEnvironments[i]->osInterface.get(); osInterface->setDriverModel(std::make_unique(bdf[i], const_cast(devices[i]->getRootDeviceEnvironment()))); } executionEnvironment->sortNeoDevices(); } void TearDown() override {} static constexpr uint32_t numRootDevices = 5u; static constexpr uint32_t numSubDevices = 2u; std::vector> devices; std::string bdf[numRootDevices] = {"0000:03:04.0", "0000:08:02.0", "0000:08:03.1", "0000:10:03.0", "0000:02:01.0"}; std::string sortedBdf[numRootDevices] = {"0000:02:01.0", "0000:03:04.0", "0000:08:02.0", "0000:08:03.1", "0000:10:03.0"}; std::unique_ptr deviceFactory; }; class DriverLinuxWithPciOrderTests : public DriverLinuxFixture { public: void SetUp() override { DebugManagerStateRestore restorer; DebugManager.flags.ZE_ENABLE_PCI_ID_DEVICE_ORDER.set(1); DriverLinuxFixture::SetUp(); } void TearDown() override { DriverLinuxFixture::TearDown(); } }; TEST_F(DriverLinuxWithPciOrderTests, GivenEnvironmentVariableForDeviceOrderAccordingToPciSetWhenRetrievingNeoDevicesThenNeoDevicesAccordingToBusOrderRetrieved) { NEO::MockCompilerEnableGuard mock(true); DriverHandleImp *driverHandle = new DriverHandleImp; EXPECT_EQ(ZE_RESULT_SUCCESS, driverHandle->initialize(std::move(devices))); for (uint32_t i = 0; i < numRootDevices; i++) { auto L0Device = driverHandle->devices[i]; if (L0Device != nullptr) { auto pDrm = L0Device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[L0Device->getRootDeviceIndex()]->osInterface->getDriverModel()->as(); EXPECT_NE(pDrm, nullptr); EXPECT_TRUE(!pDrm->getPciPath().compare(sortedBdf[i])); } } delete driverHandle; } class DriverLinuxWithouthPciOrderTests : public DriverLinuxFixture { public: void SetUp() override { DebugManagerStateRestore restorer; DebugManager.flags.ZE_ENABLE_PCI_ID_DEVICE_ORDER.set(0); DriverLinuxFixture::SetUp(); } void TearDown() override { DriverLinuxFixture::TearDown(); } }; TEST_F(DriverLinuxWithouthPciOrderTests, GivenNoEnvironmentVariableForDeviceOrderAccordingToPciSetWhenRetrievingNeoDevicesThenNeoDevicesAreNotSorted) { NEO::MockCompilerEnableGuard mock(true); DriverHandleImp *driverHandle = new DriverHandleImp; EXPECT_EQ(ZE_RESULT_SUCCESS, driverHandle->initialize(std::move(devices))); for (uint32_t i = 0; i < numRootDevices; i++) { auto L0Device = driverHandle->devices[i]; if (L0Device != nullptr) { auto pDrm = L0Device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[L0Device->getRootDeviceIndex()]->osInterface->getDriverModel()->as(); EXPECT_NE(pDrm, nullptr); EXPECT_FALSE(!pDrm->getPciPath().compare(sortedBdf[i])); } } delete driverHandle; } class DriverPciOrderWitSimilarBusLinuxFixture : public ::testing::Test { public: void SetUp() override { DebugManagerStateRestore restorer; DebugManager.flags.ZE_ENABLE_PCI_ID_DEVICE_ORDER.set(1); NEO::MockCompilerEnableGuard mock(true); auto executionEnvironment = new NEO::ExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(numRootDevices); NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get(); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(&hwInfo); } deviceFactory = std::make_unique(numRootDevices, numSubDevices, *executionEnvironment); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { devices.push_back(std::unique_ptr(deviceFactory->rootDevices[i])); } for (auto i = 0u; i < devices.size(); i++) { devices[i]->getExecutionEnvironment()->rootDeviceEnvironments[i]->osInterface = std::make_unique(); auto osInterface = devices[i]->getExecutionEnvironment()->rootDeviceEnvironments[i]->osInterface.get(); osInterface->setDriverModel(std::make_unique(bdf[i], const_cast(devices[i]->getRootDeviceEnvironment()))); } executionEnvironment->sortNeoDevices(); } void TearDown() override {} static constexpr uint32_t numRootDevices = 4u; static constexpr uint32_t numSubDevices = 2u; std::vector> devices; std::string bdf[numRootDevices] = {"0000:03:04.0", "0000:03:05.0", "0000:03:06.0", "0000:03:01.0"}; std::string sortedBdf[numRootDevices] = {"0000:03:01.0", "0000:03:04.0", "0000:03:05.0", "0000:03:06.0"}; std::unique_ptr deviceFactory; }; TEST_F(DriverPciOrderWitSimilarBusLinuxFixture, GivenEnvironmentVariableForDeviceOrderAccordingToPciSetWhenRetrievingNeoDevicesThenNeoDevicesAccordingToBusOrderRetrieved) { NEO::MockCompilerEnableGuard mock(true); DriverHandleImp *driverHandle = new DriverHandleImp; EXPECT_EQ(ZE_RESULT_SUCCESS, driverHandle->initialize(std::move(devices))); for (uint32_t i = 0; i < numRootDevices; i++) { auto L0Device = driverHandle->devices[i]; if (L0Device != nullptr) { auto pDrm = L0Device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[L0Device->getRootDeviceIndex()]->osInterface->getDriverModel()->as(); EXPECT_NE(pDrm, nullptr); EXPECT_TRUE(!pDrm->getPciPath().compare(sortedBdf[i])); } } delete driverHandle; } class DriverPciOrderWitDifferentDeviceLinuxFixture : public ::testing::Test { public: void SetUp() override { DebugManagerStateRestore restorer; DebugManager.flags.ZE_ENABLE_PCI_ID_DEVICE_ORDER.set(1); NEO::MockCompilerEnableGuard mock(true); auto executionEnvironment = new NEO::ExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(numRootDevices); NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get(); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(&hwInfo); } deviceFactory = std::make_unique(numRootDevices, numSubDevices, *executionEnvironment); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { devices.push_back(std::unique_ptr(deviceFactory->rootDevices[i])); } for (auto i = 0u; i < devices.size(); i++) { devices[i]->getExecutionEnvironment()->rootDeviceEnvironments[i]->osInterface = std::make_unique(); auto osInterface = devices[i]->getExecutionEnvironment()->rootDeviceEnvironments[i]->osInterface.get(); osInterface->setDriverModel(std::make_unique(bdf[i], const_cast(devices[i]->getRootDeviceEnvironment()))); } executionEnvironment->sortNeoDevices(); } void TearDown() override {} static constexpr uint32_t numRootDevices = 2u; static constexpr uint32_t numSubDevices = 2u; std::vector> devices; std::string bdf[numRootDevices] = {"0000:03:05.0", "0000:03:04.0"}; std::string sortedBdf[numRootDevices] = {"0000:03:04.0", "0000:03:05.0"}; std::unique_ptr deviceFactory; }; TEST_F(DriverPciOrderWitDifferentDeviceLinuxFixture, GivenEnvironmentVariableForDeviceOrderAccordingToPciSetWhenRetrievingNeoDevicesThenNeoDevicesAccordingToBusOrderRetrieved) { NEO::MockCompilerEnableGuard mock(true); DriverHandleImp *driverHandle = new DriverHandleImp; EXPECT_EQ(ZE_RESULT_SUCCESS, driverHandle->initialize(std::move(devices))); for (uint32_t i = 0; i < numRootDevices; i++) { auto L0Device = driverHandle->devices[i]; if (L0Device != nullptr) { auto pDrm = L0Device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[L0Device->getRootDeviceIndex()]->osInterface->getDriverModel()->as(); EXPECT_NE(pDrm, nullptr); EXPECT_TRUE(!pDrm->getPciPath().compare(sortedBdf[i])); } } delete driverHandle; } class DriverPciOrderWitSimilarBusAndDeviceLinuxFixture : public ::testing::Test { public: void SetUp() override { DebugManagerStateRestore restorer; DebugManager.flags.ZE_ENABLE_PCI_ID_DEVICE_ORDER.set(1); NEO::MockCompilerEnableGuard mock(true); auto executionEnvironment = new NEO::ExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(numRootDevices); NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get(); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(&hwInfo); } deviceFactory = std::make_unique(numRootDevices, numSubDevices, *executionEnvironment); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { devices.push_back(std::unique_ptr(deviceFactory->rootDevices[i])); } for (auto i = 0u; i < devices.size(); i++) { devices[i]->getExecutionEnvironment()->rootDeviceEnvironments[i]->osInterface = std::make_unique(); auto osInterface = devices[i]->getExecutionEnvironment()->rootDeviceEnvironments[i]->osInterface.get(); osInterface->setDriverModel(std::make_unique(bdf[i], const_cast(devices[i]->getRootDeviceEnvironment()))); } executionEnvironment->sortNeoDevices(); } void TearDown() override {} static constexpr uint32_t numRootDevices = 2u; static constexpr uint32_t numSubDevices = 2u; std::vector> devices; std::string bdf[numRootDevices] = {"0000:03:04.1", "0000:03:04.0"}; std::string sortedBdf[numRootDevices] = {"0000:03:04.0", "0000:03:04.1"}; std::unique_ptr deviceFactory; }; TEST_F(DriverPciOrderWitSimilarBusAndDeviceLinuxFixture, GivenEnvironmentVariableForDeviceOrderAccordingToPciSetWhenRetrievingNeoDevicesThenNeoDevicesAccordingToBusOrderRetrieved) { NEO::MockCompilerEnableGuard mock(true); DriverHandleImp *driverHandle = new DriverHandleImp; EXPECT_EQ(ZE_RESULT_SUCCESS, driverHandle->initialize(std::move(devices))); for (uint32_t i = 0; i < numRootDevices; i++) { auto L0Device = driverHandle->devices[i]; if (L0Device != nullptr) { auto pDrm = L0Device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[L0Device->getRootDeviceIndex()]->osInterface->getDriverModel()->as(); EXPECT_NE(pDrm, nullptr); EXPECT_TRUE(!pDrm->getPciPath().compare(sortedBdf[i])); } } delete driverHandle; } class DriverPciOrderWitSimilarBDFLinuxFixture : public ::testing::Test { public: void SetUp() override { DebugManagerStateRestore restorer; DebugManager.flags.ZE_ENABLE_PCI_ID_DEVICE_ORDER.set(1); NEO::MockCompilerEnableGuard mock(true); auto executionEnvironment = new NEO::ExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(numRootDevices); NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get(); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(&hwInfo); } deviceFactory = std::make_unique(numRootDevices, numSubDevices, *executionEnvironment); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { devices.push_back(std::unique_ptr(deviceFactory->rootDevices[i])); } for (auto i = 0u; i < devices.size(); i++) { devices[i]->getExecutionEnvironment()->rootDeviceEnvironments[i]->osInterface = std::make_unique(); auto osInterface = devices[i]->getExecutionEnvironment()->rootDeviceEnvironments[i]->osInterface.get(); osInterface->setDriverModel(std::make_unique(bdf[i], const_cast(devices[i]->getRootDeviceEnvironment()))); } executionEnvironment->sortNeoDevices(); } void TearDown() override {} static constexpr uint32_t numRootDevices = 2u; static constexpr uint32_t numSubDevices = 2u; std::vector> devices; std::string bdf[numRootDevices] = {"0001:03:04.0", "0000:03:04.0"}; std::string sortedBdf[numRootDevices] = {"0000:03:04.0", "0001:03:04.0"}; std::unique_ptr deviceFactory; }; TEST_F(DriverPciOrderWitSimilarBDFLinuxFixture, GivenEnvironmentVariableForDeviceOrderAccordingToPciSetWhenRetrievingNeoDevicesThenNeoDevicesAccordingToDomainOrderRetrieved) { NEO::MockCompilerEnableGuard mock(true); DriverHandleImp *driverHandle = new DriverHandleImp; EXPECT_EQ(ZE_RESULT_SUCCESS, driverHandle->initialize(std::move(devices))); for (uint32_t i = 0; i < numRootDevices; i++) { auto L0Device = driverHandle->devices[i]; if (L0Device != nullptr) { auto pDrm = L0Device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[L0Device->getRootDeviceIndex()]->osInterface->getDriverModel()->as(); EXPECT_NE(pDrm, nullptr); EXPECT_TRUE(!pDrm->getPciPath().compare(sortedBdf[i])); } } delete driverHandle; } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/driver/test_driver.cpp000066400000000000000000000626501422164147700316550ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/string.h" #include "shared/source/os_interface/device_factory.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/os_interface/os_inc_base.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/ult_hw_config.h" #include "shared/test/common/mocks/mock_compilers.h" #include "shared/test/common/mocks/mock_io_functions.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/driver/driver_handle_imp.h" #include "level_zero/core/source/driver/driver_imp.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_driver.h" #include namespace L0 { namespace ult { TEST(zeInit, whenCallingZeInitThenInitializeOnDriverIsCalled) { Mock driver; auto result = zeInit(ZE_INIT_FLAG_GPU_ONLY); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(1u, driver.initCalledCount); } TEST(zeInit, whenCallingZeInitWithNoFlagsThenInitializeOnDriverIsCalled) { Mock driver; auto result = zeInit(0); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(1u, driver.initCalledCount); } TEST(zeInit, whenCallingZeInitWithoutGpuOnlyFlagThenInitializeOnDriverIsNotCalled) { Mock driver; auto result = zeInit(ZE_INIT_FLAG_VPU_ONLY); EXPECT_EQ(ZE_RESULT_ERROR_UNINITIALIZED, result); EXPECT_EQ(0u, driver.initCalledCount); } using DriverHandleImpTest = Test; TEST_F(DriverHandleImpTest, givenDriverImpWhenCallingupdateRootDeviceBitFieldsThendeviceBitfieldsAreUpdatedInAccordanceWithNeoDevice) { auto hwInfo = *NEO::defaultHwInfo; auto newNeoDevice = std::unique_ptr(NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); driverHandle->updateRootDeviceBitFields(newNeoDevice); const auto rootDeviceIndex = neoDevice->getRootDeviceIndex(); auto entry = driverHandle->deviceBitfields.find(rootDeviceIndex); EXPECT_EQ(newNeoDevice->getDeviceBitfield(), entry->second); } using DriverVersionTest = Test; TEST_F(DriverVersionTest, givenCallToGetExtensionPropertiesThenSupportedExtensionsAreReturned) { uint32_t count = 0; ze_result_t res = driverHandle->getExtensionProperties(&count, nullptr); EXPECT_EQ(count, static_cast(driverHandle->extensionsSupported.size())); EXPECT_EQ(ZE_RESULT_SUCCESS, res); ze_driver_extension_properties_t *extensionProperties = new ze_driver_extension_properties_t[count]; count++; res = driverHandle->getExtensionProperties(&count, extensionProperties); EXPECT_EQ(count, static_cast(driverHandle->extensionsSupported.size())); EXPECT_EQ(ZE_RESULT_SUCCESS, res); DriverHandleImp *driverHandleImp = static_cast(driverHandle.get()); for (uint32_t i = 0; i < count; i++) { auto extension = extensionProperties[i]; EXPECT_EQ(0, strcmp(extension.name, driverHandleImp->extensionsSupported[i].first.c_str())); EXPECT_EQ(extension.version, driverHandleImp->extensionsSupported[i].second); } delete[] extensionProperties; } TEST_F(DriverVersionTest, WhenGettingDriverVersionThenExpectedDriverVersionIsReturned) { ze_driver_properties_t properties; ze_result_t res = driverHandle->getProperties(&properties); EXPECT_EQ(ZE_RESULT_SUCCESS, res); uint32_t versionMajor = static_cast((properties.driverVersion & 0xFF000000) >> 24); uint32_t versionMinor = static_cast((properties.driverVersion & 0x00FF0000) >> 16); uint32_t versionBuild = static_cast(properties.driverVersion & 0x0000FFFF); EXPECT_EQ(static_cast(strtoul(L0_PROJECT_VERSION_MAJOR, NULL, 10)), versionMajor); EXPECT_EQ(static_cast(strtoul(L0_PROJECT_VERSION_MINOR, NULL, 10)), versionMinor); EXPECT_EQ(static_cast(strtoul(NEO_VERSION_BUILD, NULL, 10)), versionBuild); } TEST_F(DriverVersionTest, givenCallToGetDriverPropertiesThenUuidIsSet) { NEO::MockCompilerEnableGuard mock(true); ze_driver_properties_t properties; ze_result_t res = driverHandle->getProperties(&properties); EXPECT_EQ(ZE_RESULT_SUCCESS, res); uint64_t uuid = 0u; memcpy_s(&uuid, sizeof(uuid), properties.uuid.id, sizeof(uuid)); uint32_t uniqueId = static_cast((uuid & 0xFFFFFFFF00000000) >> 32); uint32_t versionMajor = static_cast((uuid & 0xFF000000) >> 24); uint32_t versionMinor = static_cast((uuid & 0x00FF0000) >> 16); uint32_t versionBuild = static_cast(uuid & 0x0000FFFF); EXPECT_NE(0u, uniqueId); EXPECT_EQ(static_cast(strtoul(L0_PROJECT_VERSION_MAJOR, NULL, 10)), versionMajor); EXPECT_EQ(static_cast(strtoul(L0_PROJECT_VERSION_MINOR, NULL, 10)), versionMinor); EXPECT_EQ(static_cast(strtoul(NEO_VERSION_BUILD, NULL, 10)), versionBuild); } TEST_F(DriverVersionTest, whenCallingGetDriverPropertiesRepeatedlyThenTheSameUuidIsReturned) { NEO::MockCompilerEnableGuard mock(true); ze_driver_properties_t properties; ze_result_t res = driverHandle->getProperties(&properties); EXPECT_EQ(ZE_RESULT_SUCCESS, res); uint64_t uuid = 0u; memcpy_s(&uuid, sizeof(uuid), properties.uuid.id, sizeof(uuid)); uint32_t uniqueId = static_cast((uuid & 0xFFFFFFFF00000000) >> 32); uint32_t versionMajor = static_cast((uuid & 0xFF000000) >> 24); uint32_t versionMinor = static_cast((uuid & 0x00FF0000) >> 16); uint32_t versionBuild = static_cast(uuid & 0x0000FFFF); EXPECT_NE(0u, uniqueId); EXPECT_EQ(static_cast(strtoul(L0_PROJECT_VERSION_MAJOR, NULL, 10)), versionMajor); EXPECT_EQ(static_cast(strtoul(L0_PROJECT_VERSION_MINOR, NULL, 10)), versionMinor); EXPECT_EQ(static_cast(strtoul(NEO_VERSION_BUILD, NULL, 10)), versionBuild); for (uint32_t i = 0; i < 32; i++) { ze_driver_properties_t newProperties; res = driverHandle->getProperties(&newProperties); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_EQ(0, memcmp(properties.uuid.id, newProperties.uuid.id, sizeof(uint64_t))); } } using ImportNTHandle = Test; class MemoryManagerNTHandleMock : public NEO::OsAgnosticMemoryManager { public: MemoryManagerNTHandleMock(NEO::ExecutionEnvironment &executionEnvironment) : NEO::OsAgnosticMemoryManager(executionEnvironment) {} NEO::GraphicsAllocation *createGraphicsAllocationFromNTHandle(void *handle, uint32_t rootDeviceIndex, AllocationType allocType) override { auto graphicsAllocation = createMemoryAllocation(allocType, nullptr, reinterpret_cast(1), 1, 4096u, reinterpret_cast(handle), MemoryPool::SystemCpuInaccessible, rootDeviceIndex, false, false, false); graphicsAllocation->setSharedHandle(static_cast(reinterpret_cast(handle))); graphicsAllocation->set32BitAllocation(false); graphicsAllocation->setDefaultGmm(new Gmm(executionEnvironment.rootDeviceEnvironments[0]->getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); return graphicsAllocation; } }; HWTEST_F(ImportNTHandle, givenNTHandleWhenCreatingDeviceMemoryThenSuccessIsReturned) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; ze_device_mem_alloc_desc_t devProperties = {}; devProperties.stype = ZE_STRUCTURE_TYPE_DEVICE_MEMORY_PROPERTIES; uint64_t imageHandle = 0x1; ze_external_memory_import_win32_handle_t importNTHandle = {}; importNTHandle.handle = &imageHandle; importNTHandle.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_OPAQUE_WIN32; importNTHandle.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMPORT_WIN32; devProperties.pNext = &importNTHandle; NEO::MockDevice *neoDevice = nullptr; auto executionEnvironment = NEO::MockDevice::prepareExecutionEnvironment(NEO::defaultHwInfo.get(), 0); executionEnvironment->memoryManager.reset(new MemoryManagerNTHandleMock(*executionEnvironment)); neoDevice = NEO::MockDevice::createWithExecutionEnvironment(NEO::defaultHwInfo.get(), executionEnvironment, 0); driverHandle->setMemoryManager(executionEnvironment->memoryManager.get()); ze_result_t result = ZE_RESULT_SUCCESS; auto device = L0::Device::create(driverHandle.get(), neoDevice, false, &result); context->addDeviceAndSubDevices(device); void *ptr; result = context->allocDeviceMem(device, &devProperties, 100, 1, &ptr); EXPECT_EQ(result, ZE_RESULT_SUCCESS); auto alloc = driverHandle->getSvmAllocsManager()->getSVMAlloc(ptr); ASSERT_EQ(alloc->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex())->peekSharedHandle(), NEO::toOsHandle(importNTHandle.handle)); result = context->freeMem(ptr); EXPECT_EQ(result, ZE_RESULT_SUCCESS); delete device; } HWTEST_F(ImportNTHandle, givenNotExistingNTHandleWhenCreatingDeviceMemoryThenErrorIsReturned) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; ze_device_mem_alloc_desc_t devProperties = {}; devProperties.stype = ZE_STRUCTURE_TYPE_DEVICE_MEMORY_PROPERTIES; uint64_t imageHandle = 0x1; ze_external_memory_import_win32_handle_t importNTHandle = {}; importNTHandle.handle = &imageHandle; importNTHandle.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_OPAQUE_WIN32; importNTHandle.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMPORT_WIN32; devProperties.pNext = &importNTHandle; void *ptr; auto result = context->allocDeviceMem(device, &devProperties, 100, 1, &ptr); EXPECT_EQ(result, ZE_RESULT_ERROR_INVALID_ARGUMENT); } TEST(DriverTestFamilySupport, whenInitializingDriverOnSupportedFamilyThenDriverIsCreated) { NEO::MockCompilerEnableGuard mock(true); ze_result_t returnValue; NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get(); hwInfo.capabilityTable.levelZeroSupported = true; NEO::MockDevice *neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); auto driverHandle = DriverHandle::create(std::move(devices), L0EnvVariables{}, &returnValue); EXPECT_NE(nullptr, driverHandle); delete driverHandle; L0::GlobalDriver = nullptr; } TEST(DriverTestFamilySupport, whenInitializingDriverOnNotSupportedFamilyThenDriverIsNotCreated) { ze_result_t returnValue; NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get(); hwInfo.capabilityTable.levelZeroSupported = false; NEO::MockDevice *neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); auto driverHandle = DriverHandle::create(std::move(devices), L0EnvVariables{}, &returnValue); EXPECT_EQ(nullptr, driverHandle); } TEST(DriverTest, givenNullEnvVariableWhenCreatingDriverThenEnableProgramDebuggingIsFalse) { NEO::MockCompilerEnableGuard mock(true); ze_result_t returnValue; NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get(); hwInfo.capabilityTable.levelZeroSupported = true; NEO::MockDevice *neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); L0EnvVariables envVariables = {}; envVariables.programDebugging = false; auto driverHandle = whitebox_cast(DriverHandle::create(std::move(devices), envVariables, &returnValue)); EXPECT_NE(nullptr, driverHandle); EXPECT_FALSE(driverHandle->enableProgramDebugging); delete driverHandle; L0::GlobalDriver = nullptr; } TEST(DriverImpTest, givenDriverImpWhenInitializedThenEnvVariablesAreRead) { NEO::MockCompilerEnableGuard mock(true); NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get(); hwInfo.capabilityTable.levelZeroSupported = true; VariableBackup mockGetenvCalledBackup(&IoFunctions::mockGetenvCalled, 0); ze_result_t result = ZE_RESULT_ERROR_UNINITIALIZED; DriverImp driverImp; driverImp.initialize(&result); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_LE(3u, IoFunctions::mockGetenvCalled); delete L0::GlobalDriver; L0::GlobalDriverHandle = nullptr; L0::GlobalDriver = nullptr; } TEST(DriverImpTest, givenMissingMetricApiDependenciesWhenInitializingDriverImpThenGlobalDriverHandleIsNull) { NEO::MockCompilerEnableGuard mock(true); NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get(); hwInfo.capabilityTable.levelZeroSupported = true; VariableBackup mockGetenvCalledBackup(&IoFunctions::mockGetenvCalled, 0); std::unordered_map mockableEnvs = {{"ZET_ENABLE_METRICS", "1"}}; VariableBackup *> mockableEnvValuesBackup(&IoFunctions::mockableEnvValues, &mockableEnvs); ze_result_t result = ZE_RESULT_ERROR_UNINITIALIZED; DriverImp driverImp; driverImp.initialize(&result); EXPECT_NE(ZE_RESULT_SUCCESS, result); EXPECT_EQ(nullptr, L0::GlobalDriverHandle); EXPECT_EQ(nullptr, L0::GlobalDriver); } TEST(DriverImpTest, givenEnabledProgramDebuggingWhenCreatingExecutionEnvironmentThenDebuggingEnabledIsTrue) { NEO::MockCompilerEnableGuard mock(true); NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get(); hwInfo.capabilityTable.levelZeroSupported = true; VariableBackup mockGetenvCalledBackup(&IoFunctions::mockGetenvCalled, 0); std::unordered_map mockableEnvs = {{"ZET_ENABLE_PROGRAM_DEBUGGING", "1"}}; VariableBackup *> mockableEnvValuesBackup(&IoFunctions::mockableEnvValues, &mockableEnvs); ze_result_t result = ZE_RESULT_ERROR_UNINITIALIZED; DriverImp driverImp; driverImp.initialize(&result); ASSERT_NE(nullptr, L0::GlobalDriver); ASSERT_NE(0u, L0::GlobalDriver->numDevices); EXPECT_TRUE(L0::GlobalDriver->devices[0]->getNEODevice()->getExecutionEnvironment()->isDebuggingEnabled()); delete L0::GlobalDriver; L0::GlobalDriverHandle = nullptr; L0::GlobalDriver = nullptr; } TEST(DriverImpTest, givenNoProgramDebuggingEnvVarWhenCreatingExecutionEnvironmentThenDebuggingEnabledIsFalse) { NEO::MockCompilerEnableGuard mock(true); NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get(); hwInfo.capabilityTable.levelZeroSupported = true; ze_result_t result = ZE_RESULT_ERROR_UNINITIALIZED; DriverImp driverImp; driverImp.initialize(&result); ASSERT_NE(nullptr, L0::GlobalDriver); ASSERT_NE(0u, L0::GlobalDriver->numDevices); EXPECT_FALSE(L0::GlobalDriver->devices[0]->getNEODevice()->getExecutionEnvironment()->isDebuggingEnabled()); delete L0::GlobalDriver; L0::GlobalDriverHandle = nullptr; L0::GlobalDriver = nullptr; } TEST(DriverTest, givenProgramDebuggingEnvVarNonZeroWhenCreatingDriverThenEnableProgramDebuggingIsSetTrue) { NEO::MockCompilerEnableGuard mock(true); ze_result_t returnValue; NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get(); hwInfo.capabilityTable.levelZeroSupported = true; NEO::MockDevice *neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); L0EnvVariables envVariables = {}; envVariables.programDebugging = true; auto driverHandle = whitebox_cast(DriverHandle::create(std::move(devices), envVariables, &returnValue)); EXPECT_NE(nullptr, driverHandle); EXPECT_TRUE(driverHandle->enableProgramDebugging); delete driverHandle; L0::GlobalDriver = nullptr; } TEST(DriverTest, givenInvalidCompilerEnvironmentThenDependencyUnavailableErrorIsReturned) { NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get(); hwInfo.capabilityTable.levelZeroSupported = true; ze_result_t result = ZE_RESULT_ERROR_UNINITIALIZED; DriverImp driverImp; auto oldFclDllName = Os::frontEndDllName; auto oldIgcDllName = Os::igcDllName; Os::frontEndDllName = "_invalidFCL"; Os::igcDllName = "_invalidIGC"; driverImp.initialize(&result); EXPECT_EQ(result, ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE); Os::igcDllName = oldIgcDllName; Os::frontEndDllName = oldFclDllName; ASSERT_EQ(nullptr, L0::GlobalDriver); } struct DriverTestMultipleFamilySupport : public ::testing::Test { void SetUp() override { NEO::MockCompilerEnableGuard mock(true); VariableBackup mockDeviceFlagBackup(&MockDevice::createSingleDevice, false); deviceFactory = std::make_unique(numRootDevices, numSubDevices); for (auto i = 0u; i < numRootDevices; i++) { devices.push_back(std::unique_ptr(deviceFactory->rootDevices[i])); if (i < numSupportedRootDevices) { devices[i]->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.levelZeroSupported = true; } else { deviceFactory->rootDevices.erase(deviceFactory->rootDevices.begin() + i); devices[i]->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.levelZeroSupported = false; } } } DebugManagerStateRestore restorer; std::vector> devices; std::unique_ptr deviceFactory; const uint32_t numRootDevices = 3u; const uint32_t numSubDevices = 2u; const uint32_t numSupportedRootDevices = 2u; }; TEST_F(DriverTestMultipleFamilySupport, whenInitializingDriverWithArrayOfDevicesThenDriverIsInitializedOnlyWithThoseSupported) { NEO::MockCompilerEnableGuard mock(true); ze_result_t returnValue; auto driverHandle = DriverHandle::create(std::move(devices), L0EnvVariables{}, &returnValue); EXPECT_NE(nullptr, driverHandle); L0::DriverHandleImp *driverHandleImp = reinterpret_cast(driverHandle); EXPECT_EQ(numSupportedRootDevices, driverHandleImp->devices.size()); for (auto d : driverHandleImp->devices) { EXPECT_TRUE(d->getNEODevice()->getHardwareInfo().capabilityTable.levelZeroSupported); } delete driverHandle; L0::GlobalDriver = nullptr; } struct DriverTestMultipleFamilyNoSupport : public ::testing::Test { void SetUp() override { NEO::MockCompilerEnableGuard mock(true); VariableBackup mockDeviceFlagBackup(&MockDevice::createSingleDevice, false); NEO::ExecutionEnvironment *executionEnvironment = new NEO::ExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(numRootDevices); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(NEO::defaultHwInfo.get()); } for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { devices.push_back(std::unique_ptr(NEO::MockDevice::createWithExecutionEnvironment(NEO::defaultHwInfo.get(), executionEnvironment, i))); devices[i]->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.levelZeroSupported = false; } } DebugManagerStateRestore restorer; std::vector> devices; const uint32_t numRootDevices = 3u; }; TEST_F(DriverTestMultipleFamilyNoSupport, whenInitializingDriverWithArrayOfNotSupportedDevicesThenDriverIsNull) { NEO::MockCompilerEnableGuard mock(true); ze_result_t returnValue; auto driverHandle = DriverHandle::create(std::move(devices), L0EnvVariables{}, &returnValue); EXPECT_EQ(nullptr, driverHandle); } struct MaskArray { const std::string masks[4] = {"0", "1", "2", "3"}; // fixture has 4 subDevices }; struct DriverHandleTest : public ::testing::Test { void SetUp() override { NEO::MockCompilerEnableGuard mock(true); ze_result_t returnValue; NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get(); hwInfo.capabilityTable.levelZeroSupported = true; NEO::MockDevice *neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); L0EnvVariables envVariables = {}; envVariables.programDebugging = true; driverHandle = whitebox_cast(DriverHandle::create(std::move(devices), envVariables, &returnValue)); L0::GlobalDriverHandle = driverHandle; } void TearDown() override { delete driverHandle; L0::GlobalDriver = nullptr; L0::GlobalDriverHandle = nullptr; } L0::DriverHandle *driverHandle; }; TEST_F(DriverHandleTest, givenInitializedDriverWhenZeDriverGetIsCalledThenDriverHandleCountIsObtained) { uint32_t count = 0; auto result = zeDriverGet(&count, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(1U, count); } TEST_F(DriverHandleTest, givenInitializedDriverWhenZeDriverGetIsCalledWithGreaterThanCountAvailableThenCorrectCountIsReturned) { uint32_t count = 0; ze_result_t result = zeDriverGet(&count, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(1U, count); count++; ze_driver_handle_t driverHandle = {}; result = zeDriverGet(&count, &driverHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(1U, count); EXPECT_NE(nullptr, driverHandle); } TEST_F(DriverHandleTest, givenInitializedDriverWhenZeDriverGetIsCalledWithGreaterThanZeroCountAndNullDriverHandleThenInvalidNullPointerIsReturned) { uint32_t count = 0; ze_result_t result = zeDriverGet(&count, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(1U, count); result = zeDriverGet(&count, nullptr); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_NULL_POINTER, result); } TEST_F(DriverHandleTest, givenInitializedDriverWhenZeDriverGetIsCalledThenDriverHandleIsObtained) { ze_result_t result; uint32_t count = 0; result = zeDriverGet(&count, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(1U, count); ze_driver_handle_t *phDriverHandles = new ze_driver_handle_t[count]; result = zeDriverGet(&count, phDriverHandles); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(driverHandle->toHandle(), phDriverHandles[0]); delete[] phDriverHandles; } TEST_F(DriverHandleTest, givenInitializedDriverWhenZeDriverGetIsCalledThenGlobalDriverHandleIsObtained) { ze_result_t result; uint32_t count = 1; ze_driver_handle_t hDriverHandle = reinterpret_cast(&hDriverHandle); result = zeDriverGet(&count, &hDriverHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, hDriverHandle); EXPECT_EQ(hDriverHandle, GlobalDriver); } TEST_F(DriverHandleTest, givenInitializedDriverWhenGetDeviceIsCalledThenOneDeviceIsObtained) { ze_result_t result; uint32_t count = 1; ze_device_handle_t device; result = driverHandle->getDevice(&count, &device); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, &device); } TEST_F(DriverHandleTest, whenQueryingForApiVersionThenExpectedVersionIsReturned) { ze_api_version_t version = {}; ze_result_t result = driverHandle->getApiVersion(&version); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(ZE_API_VERSION_1_3, version); } TEST_F(DriverHandleTest, whenQueryingForDevicesWithCountGreaterThanZeroAndNullDevicePointerThenNullHandleIsReturned) { uint32_t count = 1; ze_result_t result = driverHandle->getDevice(&count, nullptr); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_NULL_HANDLE, result); } TEST_F(DriverHandleTest, givenValidDriverHandleWhenGetSvmAllocManagerIsCalledThenSvmAllocsManagerIsObtained) { auto svmAllocsManager = driverHandle->getSvmAllocsManager(); EXPECT_NE(nullptr, svmAllocsManager); } TEST(zeDriverHandleGetProperties, whenZeDriverGetPropertiesIsCalledThenGetPropertiesIsCalled) { ze_result_t result; Mock driverHandle; ze_driver_properties_t properties; ze_result_t expectedResult = ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS; driverHandle.getPropertiesResult = expectedResult; result = zeDriverGetProperties(driverHandle.toHandle(), &properties); EXPECT_EQ(expectedResult, result); EXPECT_EQ(1u, driverHandle.getPropertiesCalled); } TEST(zeDriverHandleGetApiVersion, whenZeDriverGetApiIsCalledThenGetApiVersionIsCalled) { ze_result_t result; Mock driverHandle; ze_api_version_t version; ze_result_t expectedResult = ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS; driverHandle.getApiVersionResult = expectedResult; result = zeDriverGetApiVersion(driverHandle.toHandle(), &version); EXPECT_EQ(expectedResult, result); EXPECT_EQ(1u, driverHandle.getApiVersionCalled); } TEST(zeDriverGetIpcProperties, whenZeDriverGetIpcPropertiesIsCalledThenGetIPCPropertiesIsCalled) { ze_result_t result; Mock driverHandle; ze_driver_ipc_properties_t ipcProperties; ze_result_t expectedResult = ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS; driverHandle.getIPCPropertiesResult = expectedResult; result = zeDriverGetIpcProperties(driverHandle.toHandle(), &ipcProperties); EXPECT_EQ(expectedResult, result); EXPECT_EQ(1u, driverHandle.getIPCPropertiesCalled); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/event/000077500000000000000000000000001422164147700264345ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/event/CMakeLists.txt000066400000000000000000000003511422164147700311730ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_event.cpp ) compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/event/test_event.cpp000066400000000000000000002210761422164147700313300ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/mocks/mock_compilers.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/mocks/mock_memory_operations_handler.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/context/context_imp.h" #include "level_zero/core/source/driver/driver_handle_imp.h" #include "level_zero/core/source/event/event.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_built_ins.h" #include "level_zero/core/test/unit_tests/mocks/mock_device.h" #include "level_zero/core/test/unit_tests/mocks/mock_event.h" #include #include #include #include #include using namespace std::chrono_literals; namespace CpuIntrinsicsTests { extern std::atomic pauseCounter; extern volatile uint32_t *pauseAddress; extern uint32_t pauseValue; extern uint32_t pauseOffset; extern std::function setupPauseAddress; } // namespace CpuIntrinsicsTests namespace L0 { namespace ult { using EventPoolCreate = Test; using EventCreate = Test; class MemoryManagerEventPoolFailMock : public NEO::MemoryManager { public: MemoryManagerEventPoolFailMock(NEO::ExecutionEnvironment &executionEnvironment) : NEO::MemoryManager(executionEnvironment) {} void *createMultiGraphicsAllocationInSystemMemoryPool(std::vector &rootDeviceIndices, AllocationProperties &properties, NEO::MultiGraphicsAllocation &multiGraphicsAllocation) override { return nullptr; }; NEO::GraphicsAllocation *createGraphicsAllocationFromSharedHandle(osHandle handle, const AllocationProperties &properties, bool requireSpecificBitness, bool isHostIpcAllocation) override { return nullptr; } void addAllocationToHostPtrManager(NEO::GraphicsAllocation *memory) override{}; void removeAllocationFromHostPtrManager(NEO::GraphicsAllocation *memory) override{}; NEO::GraphicsAllocation *createGraphicsAllocationFromNTHandle(void *handle, uint32_t rootDeviceIndex, AllocationType allocType) override { return nullptr; }; AllocationStatus populateOsHandles(NEO::OsHandleStorage &handleStorage, uint32_t rootDeviceIndex) override { return AllocationStatus::Success; }; void cleanOsHandles(NEO::OsHandleStorage &handleStorage, uint32_t rootDeviceIndex) override{}; void freeGraphicsMemoryImpl(NEO::GraphicsAllocation *gfxAllocation) override{}; void freeGraphicsMemoryImpl(GraphicsAllocation *gfxAllocation, bool isImportedAllocation) override{}; uint64_t getSystemSharedMemory(uint32_t rootDeviceIndex) override { return 0; }; uint64_t getLocalMemorySize(uint32_t rootDeviceIndex, uint32_t deviceBitfield) override { return 0; }; double getPercentOfGlobalMemoryAvailable(uint32_t rootDeviceIndex) override { return 0; } AddressRange reserveGpuAddress(size_t size, uint32_t rootDeviceIndex) override { return {}; } void freeGpuAddress(AddressRange addressRange, uint32_t rootDeviceIndex) override{}; NEO::GraphicsAllocation *createGraphicsAllocation(OsHandleStorage &handleStorage, const NEO::AllocationData &allocationData) override { return nullptr; }; NEO::GraphicsAllocation *allocateGraphicsMemoryForNonSvmHostPtr(const NEO::AllocationData &allocationData) override { return nullptr; }; NEO::GraphicsAllocation *allocateGraphicsMemoryWithAlignment(const NEO::AllocationData &allocationData) override { return nullptr; }; NEO::GraphicsAllocation *allocateUSMHostGraphicsMemory(const NEO::AllocationData &allocationData) override { return nullptr; }; NEO::GraphicsAllocation *allocateGraphicsMemory64kb(const NEO::AllocationData &allocationData) override { return nullptr; }; NEO::GraphicsAllocation *allocate32BitGraphicsMemoryImpl(const NEO::AllocationData &allocationData, bool useLocalMemory) override { return nullptr; }; NEO::GraphicsAllocation *allocateGraphicsMemoryInDevicePool(const NEO::AllocationData &allocationData, AllocationStatus &status) override { return nullptr; }; NEO::GraphicsAllocation *allocateGraphicsMemoryWithGpuVa(const NEO::AllocationData &allocationData) override { return nullptr; }; NEO::GraphicsAllocation *allocateGraphicsMemoryForImageImpl(const NEO::AllocationData &allocationData, std::unique_ptr gmm) override { return nullptr; }; NEO::GraphicsAllocation *allocateMemoryByKMD(const NEO::AllocationData &allocationData) override { return nullptr; }; void *lockResourceImpl(NEO::GraphicsAllocation &graphicsAllocation) override { return nullptr; }; void unlockResourceImpl(NEO::GraphicsAllocation &graphicsAllocation) override{}; }; struct EventPoolFailTests : public ::testing::Test { void SetUp() override { NEO::MockCompilerEnableGuard mock(true); neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(NEO::defaultHwInfo.get()); auto mockBuiltIns = new MockBuiltins(); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->builtins.reset(mockBuiltIns); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); driverHandle = std::make_unique(); driverHandle->initialize(std::move(devices)); prevMemoryManager = driverHandle->getMemoryManager(); currMemoryManager = new MemoryManagerEventPoolFailMock(*neoDevice->executionEnvironment); driverHandle->setMemoryManager(currMemoryManager); device = driverHandle->devices[0]; context = std::make_unique(driverHandle.get()); EXPECT_NE(context, nullptr); context->getDevices().insert(std::make_pair(device->toHandle(), device)); auto neoDevice = device->getNEODevice(); context->rootDeviceIndices.insert(neoDevice->getRootDeviceIndex()); context->deviceBitfields.insert({neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield()}); } void TearDown() override { driverHandle->setMemoryManager(prevMemoryManager); delete currMemoryManager; } NEO::MemoryManager *prevMemoryManager = nullptr; NEO::MemoryManager *currMemoryManager = nullptr; std::unique_ptr driverHandle; NEO::MockDevice *neoDevice = nullptr; L0::Device *device = nullptr; std::unique_ptr context; }; TEST_F(EventPoolFailTests, whenCreatingEventPoolAndAllocationFailsThenOutOfDeviceMemoryIsReturned) { ze_event_pool_desc_t eventPoolDesc = { ZE_STRUCTURE_TYPE_EVENT_POOL_DESC, nullptr, ZE_EVENT_POOL_FLAG_HOST_VISIBLE, 1}; ze_event_pool_handle_t eventPool = {}; ze_result_t res = context->createEventPool(&eventPoolDesc, 0, nullptr, &eventPool); EXPECT_EQ(res, ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY); } TEST_F(EventPoolCreate, GivenEventPoolThenAllocationContainsAtLeast16Bytes) { ze_event_pool_desc_t eventPoolDesc = { ZE_STRUCTURE_TYPE_EVENT_POOL_DESC, nullptr, ZE_EVENT_POOL_FLAG_HOST_VISIBLE, 1}; ze_result_t result = ZE_RESULT_SUCCESS; std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); ASSERT_NE(nullptr, eventPool); auto allocation = &eventPool->getAllocation(); ASSERT_NE(nullptr, allocation); uint32_t minAllocationSize = eventPool->getEventSize(); EXPECT_GE(allocation->getGraphicsAllocation(device->getNEODevice()->getRootDeviceIndex())->getUnderlyingBufferSize(), minAllocationSize); } HWTEST_F(EventPoolCreate, givenTimestampEventsThenEventSizeSufficientForAllKernelTimestamps) { ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; ze_result_t result = ZE_RESULT_SUCCESS; std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); ASSERT_NE(nullptr, eventPool); uint32_t maxKernelSplit = 3; uint32_t packetsSize = maxKernelSplit * NEO::TimestampPacketSizeControl::preferredPacketCount * static_cast(NEO::TimestampPackets::getSinglePacketSize()); uint32_t kernelTimestampsSize = static_cast(alignUp(packetsSize, 4 * MemoryConstants::cacheLineSize)); EXPECT_EQ(kernelTimestampsSize, eventPool->getEventSize()); } TEST_F(EventPoolCreate, givenEventPoolCreatedWithTimestampFlagThenHasTimestampEventsReturnsTrue) { ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; ze_result_t result = ZE_RESULT_SUCCESS; std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); ASSERT_NE(nullptr, eventPool); EventPoolImp *eventPoolImp = static_cast(eventPool.get()); EXPECT_TRUE(eventPoolImp->isEventPoolTimestampFlagSet()); } TEST_F(EventPoolCreate, givenEventPoolCreatedWithNoTimestampFlagThenHasTimestampEventsReturnsFalse) { ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; ze_result_t result = ZE_RESULT_SUCCESS; std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); ASSERT_NE(nullptr, eventPool); EventPoolImp *eventPoolImp = static_cast(eventPool.get()); EXPECT_FALSE(eventPoolImp->isEventPoolTimestampFlagSet()); } TEST_F(EventPoolCreate, givenEventPoolCreatedWithTimestampFlagAndOverrideTimestampEventsFlagThenHasTimestampEventsReturnsFalse) { DebugManagerStateRestore restore; NEO::DebugManager.flags.OverrideTimestampEvents.set(0); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; ze_result_t result = ZE_RESULT_SUCCESS; std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); ASSERT_NE(nullptr, eventPool); EventPoolImp *eventPoolImp = static_cast(eventPool.get()); EXPECT_FALSE(eventPoolImp->isEventPoolTimestampFlagSet()); } TEST_F(EventPoolCreate, givenEventPoolCreatedWithoutTimestampFlagAndOverrideTimestampEventsFlagThenHasTimestampEventsReturnsTrue) { DebugManagerStateRestore restore; NEO::DebugManager.flags.OverrideTimestampEvents.set(1); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = 0; ze_result_t result = ZE_RESULT_SUCCESS; std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); ASSERT_NE(nullptr, eventPool); EventPoolImp *eventPoolImp = static_cast(eventPool.get()); EXPECT_TRUE(eventPoolImp->isEventPoolTimestampFlagSet()); } TEST_F(EventPoolCreate, givenAnEventIsCreatedFromThisEventPoolThenEventContainsDeviceCommandStreamReceiver) { ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; ze_event_handle_t event = nullptr; ze_result_t result = ZE_RESULT_SUCCESS; std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); ASSERT_NE(nullptr, eventPool); eventPool->createEvent(&eventDesc, &event); std::unique_ptr event_object(L0::Event::fromHandle(event)); ASSERT_NE(nullptr, event_object->csr); ASSERT_EQ(device->getNEODevice()->getDefaultEngine().commandStreamReceiver, event_object->csr); } TEST_F(EventPoolCreate, GivenNoDeviceThenEventPoolIsCreated) { ze_event_pool_desc_t eventPoolDesc = { ZE_STRUCTURE_TYPE_EVENT_POOL_DESC, nullptr, ZE_EVENT_POOL_FLAG_HOST_VISIBLE, 4}; ze_result_t result = ZE_RESULT_SUCCESS; auto eventPool = EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result); EXPECT_EQ(ZE_RESULT_SUCCESS, result); ASSERT_NE(nullptr, eventPool); eventPool->destroy(); } TEST_F(EventPoolCreate, GivenDeviceThenEventPoolIsCreated) { ze_event_pool_desc_t eventPoolDesc = { ZE_STRUCTURE_TYPE_EVENT_POOL_DESC, nullptr, ZE_EVENT_POOL_FLAG_HOST_VISIBLE, 4}; auto deviceHandle = device->toHandle(); ze_result_t result = ZE_RESULT_SUCCESS; auto eventPool = EventPool::create(driverHandle.get(), context, 1, &deviceHandle, &eventPoolDesc, result); EXPECT_EQ(ZE_RESULT_SUCCESS, result); ASSERT_NE(nullptr, eventPool); eventPool->destroy(); } using EventPoolIPCHandleTests = Test; TEST_F(EventPoolIPCHandleTests, whenGettingIpcHandleForEventPoolThenHandleAndNumberOfEventsAreReturnedInHandle) { uint32_t numEvents = 4; ze_event_pool_desc_t eventPoolDesc = { ZE_STRUCTURE_TYPE_EVENT_POOL_DESC, nullptr, ZE_EVENT_POOL_FLAG_HOST_VISIBLE, numEvents}; auto deviceHandle = device->toHandle(); ze_result_t result = ZE_RESULT_SUCCESS; auto eventPool = EventPool::create(driverHandle.get(), context, 1, &deviceHandle, &eventPoolDesc, result); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, eventPool); ze_ipc_event_pool_handle_t ipcHandle = {}; ze_result_t res = eventPool->getIpcHandle(&ipcHandle); EXPECT_EQ(res, ZE_RESULT_SUCCESS); int handle = -1; memcpy_s(&handle, sizeof(int), ipcHandle.data, sizeof(int)); EXPECT_NE(handle, -1); uint32_t expectedNumEvents = 0; memcpy_s(&expectedNumEvents, sizeof(expectedNumEvents), ipcHandle.data + sizeof(int), sizeof(expectedNumEvents)); EXPECT_EQ(numEvents, expectedNumEvents); res = eventPool->destroy(); EXPECT_EQ(res, ZE_RESULT_SUCCESS); } TEST_F(EventPoolIPCHandleTests, whenOpeningIpcHandleForEventPoolThenEventPoolIsCreatedAndEventSizesAreTheSame) { uint32_t numEvents = 4; ze_event_pool_desc_t eventPoolDesc = { ZE_STRUCTURE_TYPE_EVENT_POOL_DESC, nullptr, ZE_EVENT_POOL_FLAG_HOST_VISIBLE, numEvents}; auto deviceHandle = device->toHandle(); ze_result_t result = ZE_RESULT_SUCCESS; auto eventPool = EventPool::create(driverHandle.get(), context, 1, &deviceHandle, &eventPoolDesc, result); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, eventPool); ze_ipc_event_pool_handle_t ipcHandle = {}; ze_result_t res = eventPool->getIpcHandle(&ipcHandle); EXPECT_EQ(res, ZE_RESULT_SUCCESS); ze_event_pool_handle_t ipcEventPoolHandle = {}; res = context->openEventPoolIpcHandle(ipcHandle, &ipcEventPoolHandle); EXPECT_EQ(res, ZE_RESULT_SUCCESS); L0::EventPool *ipcEventPool = L0::EventPool::fromHandle(ipcEventPoolHandle); EXPECT_EQ(ipcEventPool->getEventSize(), eventPool->getEventSize()); res = ipcEventPool->closeIpcHandle(); EXPECT_EQ(res, ZE_RESULT_SUCCESS); res = eventPool->destroy(); EXPECT_EQ(res, ZE_RESULT_SUCCESS); } using EventPoolOpenIPCHandleFailTests = Test; TEST_F(EventPoolOpenIPCHandleFailTests, givenFailureToAllocateMemoryWhenOpeningIpcHandleForEventPoolThenInvalidArgumentIsReturned) { uint32_t numEvents = 4; ze_event_pool_desc_t eventPoolDesc = { ZE_STRUCTURE_TYPE_EVENT_POOL_DESC, nullptr, ZE_EVENT_POOL_FLAG_HOST_VISIBLE, numEvents}; auto deviceHandle = device->toHandle(); ze_result_t result = ZE_RESULT_SUCCESS; auto eventPool = EventPool::create(driverHandle.get(), context, 1, &deviceHandle, &eventPoolDesc, result); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, eventPool); ze_ipc_event_pool_handle_t ipcHandle = {}; ze_result_t res = eventPool->getIpcHandle(&ipcHandle); EXPECT_EQ(res, ZE_RESULT_SUCCESS); { NEO::MemoryManager *prevMemoryManager = nullptr; NEO::MemoryManager *currMemoryManager = nullptr; prevMemoryManager = driverHandle->getMemoryManager(); currMemoryManager = new FailMemoryManager(*neoDevice->executionEnvironment); driverHandle->setMemoryManager(currMemoryManager); ze_event_pool_handle_t ipcEventPoolHandle = {}; res = context->openEventPoolIpcHandle(ipcHandle, &ipcEventPoolHandle); EXPECT_EQ(res, ZE_RESULT_ERROR_INVALID_ARGUMENT); driverHandle->setMemoryManager(prevMemoryManager); delete currMemoryManager; } res = eventPool->destroy(); EXPECT_EQ(res, ZE_RESULT_SUCCESS); } class MultiDeviceEventPoolOpenIPCHandleFailTestsMemoryManager : public FailMemoryManager { public: MultiDeviceEventPoolOpenIPCHandleFailTestsMemoryManager(NEO::ExecutionEnvironment &executionEnvironment) : FailMemoryManager(executionEnvironment) {} GraphicsAllocation *createGraphicsAllocationFromSharedHandle(osHandle handle, const AllocationProperties &properties, bool requireSpecificBitness, bool isHostIpcAllocation) override { return &mockAllocation0; } GraphicsAllocation *createGraphicsAllocationFromExistingStorage(AllocationProperties &properties, void *ptr, MultiGraphicsAllocation &multiGraphicsAllocation) override { if (calls == 0) { calls++; return &mockAllocation1; } return nullptr; } void freeGraphicsMemory(GraphicsAllocation *gfxAllocation) override { } NEO::MockGraphicsAllocation mockAllocation0; NEO::MockGraphicsAllocation mockAllocation1; uint32_t calls = 0; }; using MultiDeviceEventPoolOpenIPCHandleFailTests = Test; TEST_F(MultiDeviceEventPoolOpenIPCHandleFailTests, givenFailureToAllocateMemoryWhenOpeningIpcHandleForEventPoolWithMultipleDevicesThenOutOfHostMemoryIsReturned) { uint32_t numEvents = 4; ze_event_pool_desc_t eventPoolDesc = { ZE_STRUCTURE_TYPE_EVENT_POOL_DESC, nullptr, ZE_EVENT_POOL_FLAG_HOST_VISIBLE, numEvents}; auto deviceHandle = driverHandle->devices[0]->toHandle(); ze_result_t result = ZE_RESULT_SUCCESS; auto eventPool = EventPool::create(driverHandle.get(), context, 1, &deviceHandle, &eventPoolDesc, result); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, eventPool); ze_ipc_event_pool_handle_t ipcHandle = {}; ze_result_t res = eventPool->getIpcHandle(&ipcHandle); EXPECT_EQ(res, ZE_RESULT_SUCCESS); { NEO::MemoryManager *prevMemoryManager = nullptr; NEO::MemoryManager *currMemoryManager = nullptr; prevMemoryManager = driverHandle->getMemoryManager(); NEO::MockDevice *neoDevice = static_cast(driverHandle->devices[0]->getNEODevice()); currMemoryManager = new MultiDeviceEventPoolOpenIPCHandleFailTestsMemoryManager(*neoDevice->executionEnvironment); driverHandle->setMemoryManager(currMemoryManager); ze_event_pool_handle_t ipcEventPoolHandle = {}; res = context->openEventPoolIpcHandle(ipcHandle, &ipcEventPoolHandle); EXPECT_EQ(res, ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY); driverHandle->setMemoryManager(prevMemoryManager); delete currMemoryManager; } res = eventPool->destroy(); EXPECT_EQ(res, ZE_RESULT_SUCCESS); } TEST_F(EventPoolCreate, GivenNullptrDeviceAndNumberOfDevicesWhenCreatingEventPoolThenReturnError) { ze_event_pool_desc_t eventPoolDesc = { ZE_STRUCTURE_TYPE_EVENT_POOL_DESC, nullptr, ZE_EVENT_POOL_FLAG_HOST_VISIBLE, 1}; ze_device_handle_t devices[] = {nullptr, device->toHandle()}; ze_result_t result = ZE_RESULT_SUCCESS; std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 2, devices, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); ASSERT_EQ(nullptr, eventPool); } TEST_F(EventPoolCreate, GivenNullptrDeviceWithoutNumberOfDevicesWhenCreatingEventPoolThenEventPoolCreated) { ze_event_pool_desc_t eventPoolDesc = { ZE_STRUCTURE_TYPE_EVENT_POOL_DESC, nullptr, ZE_EVENT_POOL_FLAG_HOST_VISIBLE, 1}; ze_device_handle_t devices[] = {nullptr, device->toHandle()}; ze_result_t result = ZE_RESULT_SUCCESS; std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, devices, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); ASSERT_NE(nullptr, eventPool); } TEST_F(EventPoolCreate, whenHostVisibleFlagNotSetThenEventAllocationIsOnDevice) { ze_event_pool_desc_t eventPoolDesc = { ZE_STRUCTURE_TYPE_EVENT_POOL_DESC, nullptr, 0u, 4}; ze_device_handle_t devices[] = {nullptr, device->toHandle()}; auto memoryManager = static_cast(neoDevice->getMemoryManager()); memoryManager->recentlyPassedDeviceBitfield = systemMemoryBitfield; ze_result_t result = ZE_RESULT_SUCCESS; std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, devices, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); ASSERT_NE(nullptr, eventPool); EXPECT_EQ(NEO::AllocationType::GPU_TIMESTAMP_DEVICE_BUFFER, eventPool->getAllocation().getAllocationType()); EXPECT_NE(systemMemoryBitfield, memoryManager->recentlyPassedDeviceBitfield); EXPECT_EQ(neoDevice->getDeviceBitfield(), memoryManager->recentlyPassedDeviceBitfield); } TEST_F(EventPoolCreate, whenAllocationMemoryFailsThenEventAllocationIsNotCreated) { ze_event_pool_desc_t eventPoolDesc = { ZE_STRUCTURE_TYPE_EVENT_POOL_DESC, nullptr, 0u, 4}; ze_device_handle_t devices[] = {nullptr, device->toHandle()}; auto memoryManager = static_cast(neoDevice->getMemoryManager()); memoryManager->isMockHostMemoryManager = true; memoryManager->forceFailureInPrimaryAllocation = true; ze_result_t result = ZE_RESULT_SUCCESS; std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, devices, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY, result); EXPECT_EQ(nullptr, eventPool); } TEST_F(EventCreate, givenAnEventCreatedThenTheEventHasTheDeviceCommandStreamReceiverSet) { ze_event_pool_desc_t eventPoolDesc = { ZE_STRUCTURE_TYPE_EVENT_POOL_DESC, nullptr, ZE_EVENT_POOL_FLAG_HOST_VISIBLE, 1}; const ze_event_desc_t eventDesc = { ZE_STRUCTURE_TYPE_EVENT_DESC, nullptr, 0, ZE_EVENT_SCOPE_FLAG_DEVICE, ZE_EVENT_SCOPE_FLAG_DEVICE}; ze_result_t result = ZE_RESULT_SUCCESS; std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); ASSERT_NE(nullptr, eventPool); std::unique_ptr event(Event::create(eventPool.get(), &eventDesc, device)); ASSERT_NE(nullptr, event); ASSERT_NE(nullptr, event.get()->csr); ASSERT_EQ(device->getNEODevice()->getDefaultEngine().commandStreamReceiver, event.get()->csr); } TEST_F(EventCreate, givenEventWhenSignaledAndResetFromTheHostThenCorrectDataAndOffsetAreSet) { ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; ze_result_t result = ZE_RESULT_SUCCESS; auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); ASSERT_NE(nullptr, eventPool); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); ASSERT_NE(nullptr, event); result = event->queryStatus(); EXPECT_EQ(ZE_RESULT_NOT_READY, result); uint64_t gpuAddr = event->getGpuAddress(device); EXPECT_EQ(gpuAddr, event->getPacketAddress(device)); event->hostSignal(); result = event->queryStatus(); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(gpuAddr, event->getPacketAddress(device)); event->reset(); result = event->queryStatus(); EXPECT_EQ(gpuAddr, event->getPacketAddress(device)); EXPECT_EQ(ZE_RESULT_NOT_READY, result); } TEST_F(EventCreate, givenAnEventCreateWithInvalidIndexUsingThisEventPoolThenErrorIsReturned) { ze_event_pool_desc_t eventPoolDesc = { ZE_STRUCTURE_TYPE_EVENT_POOL_DESC, nullptr, ZE_EVENT_POOL_FLAG_HOST_VISIBLE, 1}; const ze_event_desc_t eventDesc = { ZE_STRUCTURE_TYPE_EVENT_DESC, nullptr, 2, ZE_EVENT_SCOPE_FLAG_DEVICE, ZE_EVENT_SCOPE_FLAG_DEVICE}; ze_event_handle_t event = nullptr; ze_result_t result = ZE_RESULT_SUCCESS; std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); ASSERT_NE(nullptr, eventPool); ze_result_t value = eventPool->createEvent(&eventDesc, &event); ASSERT_EQ(nullptr, event); ASSERT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, value); } class EventSynchronizeTest : public Test { public: void SetUp() override { DeviceFixture::SetUp(); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = 0; eventDesc.wait = 0; ze_result_t result = ZE_RESULT_SUCCESS; eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); ASSERT_NE(nullptr, eventPool); event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); ASSERT_NE(nullptr, event); } void TearDown() override { DeviceFixture::TearDown(); } std::unique_ptr eventPool = nullptr; std::unique_ptr event; }; TEST_F(EventSynchronizeTest, GivenGpuHangWhenHostSynchronizeIsCalledThenDeviceLostIsReturned) { const auto csr = std::make_unique(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield()); csr->isGpuHangDetectedReturnValue = true; event->csr = csr.get(); event->gpuHangCheckPeriod = 0ms; constexpr uint64_t timeout = std::numeric_limits::max(); auto result = event->hostSynchronize(timeout); EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, result); } TEST_F(EventSynchronizeTest, GivenNoGpuHangAndOneNanosecondTimeoutWhenHostSynchronizeIsCalledThenResultNotReadyIsReturnedDueToTimeout) { const auto csr = std::make_unique(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield()); csr->isGpuHangDetectedReturnValue = false; event->csr = csr.get(); event->gpuHangCheckPeriod = 0ms; constexpr uint64_t timeoutNanoseconds = 1; auto result = event->hostSynchronize(timeoutNanoseconds); EXPECT_EQ(ZE_RESULT_NOT_READY, result); } TEST_F(EventSynchronizeTest, GivenLongPeriodOfGpuCheckAndOneNanosecondTimeoutWhenHostSynchronizeIsCalledThenResultNotReadyIsReturnedDueToTimeout) { const auto csr = std::make_unique(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield()); event->csr = csr.get(); event->gpuHangCheckPeriod = 50000000ms; constexpr uint64_t timeoutNanoseconds = 1; auto result = event->hostSynchronize(timeoutNanoseconds); EXPECT_EQ(ZE_RESULT_NOT_READY, result); } TEST_F(EventSynchronizeTest, givenCallToEventHostSynchronizeWithTimeoutZeroAndStateInitialHostSynchronizeReturnsNotReady) { ze_result_t result = event->hostSynchronize(0); EXPECT_EQ(ZE_RESULT_NOT_READY, result); } TEST_F(EventSynchronizeTest, givenCallToEventHostSynchronizeWithNonZeroTimeoutAndStateInitialHostSynchronizeReturnsNotReady) { ze_result_t result = event->hostSynchronize(10); EXPECT_EQ(ZE_RESULT_NOT_READY, result); } TEST_F(EventSynchronizeTest, givenCallToEventHostSynchronizeWithTimeoutZeroAndStateSignaledHostSynchronizeReturnsSuccess) { uint32_t *hostAddr = static_cast(event->getHostAddress()); *hostAddr = Event::STATE_SIGNALED; ze_result_t result = event->hostSynchronize(0); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(EventSynchronizeTest, givenCallToEventHostSynchronizeWithTimeoutNonZeroAndStateSignaledHostSynchronizeReturnsSuccess) { uint32_t *hostAddr = static_cast(event->getHostAddress()); *hostAddr = Event::STATE_SIGNALED; ze_result_t result = event->hostSynchronize(10); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(EventSynchronizeTest, givenInfiniteTimeoutWhenWaitingForNonTimestampEventCompletionThenReturnOnlyAfterAllEventPacketsAreCompleted) { constexpr uint32_t packetsInUse = 2; event->setPacketsInUse(packetsInUse); const size_t eventPacketSize = event->getSinglePacketSize(); const size_t eventCompletionOffset = event->getContextStartOffset(); VariableBackup backupPauseAddress(&CpuIntrinsicsTests::pauseAddress); VariableBackup backupPauseValue(&CpuIntrinsicsTests::pauseValue, Event::STATE_CLEARED); VariableBackup backupPauseOffset(&CpuIntrinsicsTests::pauseOffset); VariableBackup> backupSetupPauseAddress(&CpuIntrinsicsTests::setupPauseAddress); CpuIntrinsicsTests::pauseCounter = 0u; CpuIntrinsicsTests::pauseAddress = static_cast(ptrOffset(event->getHostAddress(), eventCompletionOffset)); uint32_t *hostAddr = static_cast(ptrOffset(event->getHostAddress(), eventCompletionOffset)); for (uint32_t i = 0; i < packetsInUse; i++) { *hostAddr = Event::STATE_CLEARED; hostAddr = ptrOffset(hostAddr, eventPacketSize); } CpuIntrinsicsTests::setupPauseAddress = [&]() { if (CpuIntrinsicsTests::pauseCounter > 10) { volatile uint32_t *nextPacket = CpuIntrinsicsTests::pauseAddress; for (uint32_t i = 0; i < packetsInUse; i++) { *nextPacket = Event::STATE_SIGNALED; nextPacket = ptrOffset(nextPacket, eventPacketSize); } } }; constexpr uint64_t infiniteTimeout = std::numeric_limits::max(); ze_result_t result = event->hostSynchronize(infiniteTimeout); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(EventSynchronizeTest, givenInfiniteTimeoutWhenWaitingForPartitionedNonTimestampEventCompletionThenReturnOnlyAfterAllEventPacketsAreCompleted) { constexpr uint32_t packetsInUse = 2; event->setPacketsInUse(packetsInUse); event->setPartitionedEvent(true); const size_t eventPacketSize = event->getSinglePacketSize(); const size_t eventCompletionOffset = event->getContextEndOffset(); VariableBackup backupPauseAddress(&CpuIntrinsicsTests::pauseAddress); VariableBackup backupPauseValue(&CpuIntrinsicsTests::pauseValue, Event::STATE_CLEARED); VariableBackup backupPauseOffset(&CpuIntrinsicsTests::pauseOffset); VariableBackup> backupSetupPauseAddress(&CpuIntrinsicsTests::setupPauseAddress); CpuIntrinsicsTests::pauseCounter = 0u; CpuIntrinsicsTests::pauseAddress = static_cast(ptrOffset(event->getHostAddress(), eventCompletionOffset)); uint32_t *hostAddr = static_cast(ptrOffset(event->getHostAddress(), eventCompletionOffset)); for (uint32_t i = 0; i < packetsInUse; i++) { *hostAddr = Event::STATE_CLEARED; hostAddr = ptrOffset(hostAddr, eventPacketSize); } CpuIntrinsicsTests::setupPauseAddress = [&]() { if (CpuIntrinsicsTests::pauseCounter > 10) { volatile uint32_t *nextPacket = CpuIntrinsicsTests::pauseAddress; for (uint32_t i = 0; i < packetsInUse; i++) { *nextPacket = Event::STATE_SIGNALED; nextPacket = ptrOffset(nextPacket, eventPacketSize); } } }; constexpr uint64_t infiniteTimeout = std::numeric_limits::max(); ze_result_t result = event->hostSynchronize(infiniteTimeout); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(EventSynchronizeTest, givenInfiniteTimeoutWhenWaitingForTimestampEventCompletionThenReturnOnlyAfterAllEventPacketsAreCompleted) { constexpr uint32_t packetsInUse = 2; event->setPacketsInUse(packetsInUse); event->setEventTimestampFlag(true); const size_t eventPacketSize = event->getSinglePacketSize(); const size_t eventCompletionOffset = event->getContextEndOffset(); VariableBackup backupPauseAddress(&CpuIntrinsicsTests::pauseAddress); VariableBackup backupPauseValue(&CpuIntrinsicsTests::pauseValue, Event::STATE_CLEARED); VariableBackup backupPauseOffset(&CpuIntrinsicsTests::pauseOffset); VariableBackup> backupSetupPauseAddress(&CpuIntrinsicsTests::setupPauseAddress); CpuIntrinsicsTests::pauseCounter = 0u; CpuIntrinsicsTests::pauseAddress = static_cast(ptrOffset(event->getHostAddress(), eventCompletionOffset)); uint32_t *hostAddr = static_cast(ptrOffset(event->getHostAddress(), eventCompletionOffset)); for (uint32_t i = 0; i < packetsInUse; i++) { *hostAddr = Event::STATE_CLEARED; hostAddr = ptrOffset(hostAddr, eventPacketSize); } CpuIntrinsicsTests::setupPauseAddress = [&]() { if (CpuIntrinsicsTests::pauseCounter > 10) { volatile uint32_t *nextPacket = CpuIntrinsicsTests::pauseAddress; for (uint32_t i = 0; i < packetsInUse; i++) { *nextPacket = Event::STATE_SIGNALED; nextPacket = ptrOffset(nextPacket, eventPacketSize); } } }; constexpr uint64_t infiniteTimeout = std::numeric_limits::max(); ze_result_t result = event->hostSynchronize(infiniteTimeout); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } using EventPoolIPCEventResetTests = Test; TEST_F(EventPoolIPCEventResetTests, whenOpeningIpcHandleForEventPoolCreateWithIpcFlagThenEventsInNewPoolAreNotReset) { std::unique_ptr eventPool = nullptr; ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; ze_result_t result = ZE_RESULT_SUCCESS; eventPool = std::unique_ptr(static_cast(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result))); EXPECT_NE(nullptr, eventPool); EXPECT_EQ(ZE_RESULT_SUCCESS, result); std::unique_ptr> event0; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = 0; eventDesc.wait = 0; event0 = std::unique_ptr>(static_cast *>(L0::Event::create(eventPool.get(), &eventDesc, device))); EXPECT_NE(nullptr, event0); uint32_t *hostAddr = static_cast(event0->getHostAddress()); EXPECT_EQ(*hostAddr, Event::STATE_INITIAL); // change state event0->hostSignal(); hostAddr = static_cast(event0->getHostAddress()); EXPECT_EQ(*hostAddr, Event::STATE_SIGNALED); // create an event from the pool with the same index as event0, but this time, since isImportedIpcPool is true, no reset should happen eventPool->isImportedIpcPool = true; std::unique_ptr> event1; event1 = std::unique_ptr>(static_cast *>(L0::Event::create(eventPool.get(), &eventDesc, device))); EXPECT_NE(nullptr, event1); uint32_t *hostAddr1 = static_cast(event1->getHostAddress()); EXPECT_EQ(*hostAddr1, Event::STATE_SIGNALED); // create another event from the pool with the same index, but this time, since isImportedIpcPool is false, reset should happen eventPool->isImportedIpcPool = false; std::unique_ptr> event2; event2 = std::unique_ptr>(static_cast *>(L0::Event::create(eventPool.get(), &eventDesc, device))); EXPECT_NE(nullptr, event2); uint32_t *hostAddr2 = static_cast(event2->getHostAddress()); EXPECT_EQ(*hostAddr2, Event::STATE_INITIAL); } using EventAubCsrTest = Test; HWTEST_F(EventAubCsrTest, givenCallToEventHostSynchronizeWithAubModeCsrReturnsSuccess) { std::unique_ptr> driverHandle; NEO::MockDevice *neoDevice = nullptr; L0::Device *device = nullptr; neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(NEO::defaultHwInfo.get()); auto mockBuiltIns = new MockBuiltins(); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->builtins.reset(mockBuiltIns); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); driverHandle = std::make_unique>(); driverHandle->initialize(std::move(devices)); device = driverHandle->devices[0]; int32_t tag; auto aubCsr = new MockCsrAub(tag, *neoDevice->executionEnvironment, neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield()); neoDevice->resetCommandStreamReceiver(aubCsr); std::unique_ptr eventPool = nullptr; std::unique_ptr event; ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = 0; eventDesc.wait = 0; ze_result_t result = ZE_RESULT_SUCCESS; eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); ASSERT_NE(nullptr, eventPool); event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); ASSERT_NE(nullptr, event); result = event->hostSynchronize(10); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } struct EventCreateAllocationResidencyTest : public ::testing::Test { void SetUp() override { neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(NEO::defaultHwInfo.get()); mockMemoryOperationsHandler = new NEO::MockMemoryOperationsHandlerTests; neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->memoryOperationsInterface.reset( mockMemoryOperationsHandler); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); driverHandle = std::make_unique>(); driverHandle->initialize(std::move(devices)); device = driverHandle->devices[0]; } void TearDown() override { } NEO::MockMemoryOperationsHandlerTests *mockMemoryOperationsHandler; std::unique_ptr> driverHandle; NEO::MockDevice *neoDevice = nullptr; L0::Device *device = nullptr; }; class TimestampEventCreate : public Test { public: void SetUp() override { DeviceFixture::SetUp(); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = 0; eventDesc.wait = 0; ze_result_t result = ZE_RESULT_SUCCESS; eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); ASSERT_NE(nullptr, eventPool); event = std::unique_ptr>(static_cast *>(L0::Event::create(eventPool.get(), &eventDesc, device))); ASSERT_NE(nullptr, event); } void TearDown() override { DeviceFixture::TearDown(); } std::unique_ptr eventPool; std::unique_ptr> event; }; TEST_F(TimestampEventCreate, givenEventCreatedWithTimestampThenIsTimestampEventFlagSet) { EXPECT_TRUE(event->isEventTimestampFlagSet()); } TEST_F(TimestampEventCreate, givenEventTimestampsCreatedWhenResetIsInvokeThenCorrectDataAreSet) { EXPECT_NE(nullptr, event->kernelEventCompletionData); for (auto j = 0u; j < EventPacketsCount::maxKernelSplit; j++) { for (auto i = 0u; i < NEO::TimestampPacketSizeControl::preferredPacketCount; i++) { EXPECT_EQ(static_cast(Event::State::STATE_INITIAL), event->kernelEventCompletionData[j].getContextStartValue(i)); EXPECT_EQ(static_cast(Event::State::STATE_INITIAL), event->kernelEventCompletionData[j].getGlobalStartValue(i)); EXPECT_EQ(static_cast(Event::State::STATE_INITIAL), event->kernelEventCompletionData[j].getContextEndValue(i)); EXPECT_EQ(static_cast(Event::State::STATE_INITIAL), event->kernelEventCompletionData[j].getGlobalEndValue(i)); } EXPECT_EQ(1u, event->kernelEventCompletionData[j].getPacketsUsed()); } EXPECT_EQ(1u, event->kernelCount); } TEST_F(TimestampEventCreate, givenSingleTimestampEventThenAllocationSizeCreatedForAllTimestamps) { auto allocation = &eventPool->getAllocation(); ASSERT_NE(nullptr, allocation); uint32_t minTimestampEventAllocation = eventPool->getEventSize(); EXPECT_GE(allocation->getGraphicsAllocation(device->getNEODevice()->getRootDeviceIndex())->getUnderlyingBufferSize(), minTimestampEventAllocation); } TEST_F(TimestampEventCreate, givenTimestampEventThenAllocationsIsOfPacketTagBufferType) { auto allocation = &eventPool->getAllocation(); ASSERT_NE(nullptr, allocation); EXPECT_EQ(NEO::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, allocation->getAllocationType()); } TEST_F(TimestampEventCreate, givenEventTimestampWhenPacketCountIsSetThenCorrectOffsetIsReturned) { EXPECT_EQ(1u, event->getPacketsInUse()); auto gpuAddr = event->getGpuAddress(device); EXPECT_EQ(gpuAddr, event->getPacketAddress(device)); event->setPacketsInUse(4u); EXPECT_EQ(4u, event->getPacketsInUse()); gpuAddr += (4u * event->getSinglePacketSize()); event->kernelCount = 2; event->setPacketsInUse(2u); EXPECT_EQ(6u, event->getPacketsInUse()); EXPECT_EQ(gpuAddr, event->getPacketAddress(device)); gpuAddr += (2u * event->getSinglePacketSize()); event->kernelCount = 3; EXPECT_EQ(gpuAddr, event->getPacketAddress(device)); EXPECT_EQ(7u, event->getPacketsInUse()); } TEST_F(TimestampEventCreate, givenEventWhenSignaledAndResetFromTheHostThenCorrectDataAreSet) { EXPECT_NE(nullptr, event->kernelEventCompletionData); event->hostSignal(); ze_result_t result = event->queryStatus(); EXPECT_EQ(ZE_RESULT_SUCCESS, result); event->reset(); result = event->queryStatus(); EXPECT_EQ(ZE_RESULT_NOT_READY, result); for (auto j = 0u; j < EventPacketsCount::maxKernelSplit; j++) { for (auto i = 0u; i < NEO::TimestampPacketSizeControl::preferredPacketCount; i++) { EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelEventCompletionData[j].getContextStartValue(i)); EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelEventCompletionData[j].getGlobalStartValue(i)); EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelEventCompletionData[j].getContextEndValue(i)); EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelEventCompletionData[j].getGlobalEndValue(i)); } EXPECT_EQ(1u, event->kernelEventCompletionData[j].getPacketsUsed()); } EXPECT_EQ(1u, event->kernelCount); } TEST_F(TimestampEventCreate, givenpCountZeroCallingQueryTimestampExpThenpCountSetProperly) { uint32_t pCount = 0; auto result = event->queryTimestampsExp(device, &pCount, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(0u, pCount); } TEST_F(TimestampEventCreate, givenpCountLargerThanSupportedWhenCallingQueryTimestampExpThenpCountSetProperly) { uint32_t pCount = 10; event->setPacketsInUse(2u); auto result = event->queryTimestampsExp(device, &pCount, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(2u, pCount); } TEST_F(TimestampEventCreate, givenEventWithStaticPartitionOffThenQueryTimestampExpReturnsUnsupported) { DebugManagerStateRestore restore; NEO::DebugManager.flags.EnableStaticPartitioning.set(0); uint32_t pCount = 0; auto result = event->queryTimestampsExp(device, &pCount, nullptr); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, result); } class TimestampDeviceEventCreate : public Test { public: void SetUp() override { DeviceFixture::SetUp(); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = 0; eventDesc.wait = 0; ze_result_t result = ZE_RESULT_SUCCESS; eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); ASSERT_NE(nullptr, eventPool); event = std::unique_ptr>(static_cast *>(L0::Event::create(eventPool.get(), &eventDesc, device))); ASSERT_NE(nullptr, event); } void TearDown() override { DeviceFixture::TearDown(); } std::unique_ptr eventPool; std::unique_ptr> event; }; TEST_F(TimestampDeviceEventCreate, givenTimestampDeviceEventThenAllocationsIsOfGpuDeviceTimestampType) { auto allocation = &eventPool->getAllocation(); ASSERT_NE(nullptr, allocation); EXPECT_EQ(NEO::AllocationType::GPU_TIMESTAMP_DEVICE_BUFFER, allocation->getAllocationType()); } using EventQueryTimestampExpWithSubDevice = Test; TEST_F(EventQueryTimestampExpWithSubDevice, givenEventWhenQuerytimestampExpWithSubDeviceThenReturnsCorrectValueReturned) { std::unique_ptr eventPool; std::unique_ptr> event; uint32_t deviceCount = 1; ze_device_handle_t rootDeviceHandle; ze_result_t result = zeDeviceGet(driverHandle.get(), &deviceCount, &rootDeviceHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); deviceCount = 0; result = zeDeviceGetSubDevices(rootDeviceHandle, &deviceCount, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_TRUE(deviceCount >= 2); auto subDeviceHandle = std::make_unique(deviceCount); result = zeDeviceGetSubDevices(rootDeviceHandle, &deviceCount, subDeviceHandle.get()); EXPECT_EQ(ZE_RESULT_SUCCESS, result); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = 0; eventDesc.wait = 0; auto subDeviceId = 0u; auto subdevice = L0::Device::fromHandle(subDeviceHandle[subDeviceId]); eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 1, &subDeviceHandle[0], &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); ASSERT_NE(nullptr, eventPool); event = std::unique_ptr>(static_cast *>(L0::Event::create(eventPool.get(), &eventDesc, subdevice))); ASSERT_NE(nullptr, event); class MockTimestampPackets32 : public TimestampPackets { public: using typename TimestampPackets::Packet; }; typename MockTimestampPackets32::Packet packetData[2]; event->setPacketsInUse(2u); packetData[0].contextStart = 1u; packetData[0].contextEnd = 2u; packetData[0].globalStart = 3u; packetData[0].globalEnd = 4u; packetData[1].contextStart = 5u; packetData[1].contextEnd = 6u; packetData[1].globalStart = 7u; packetData[1].globalEnd = 8u; event->hostAddress = packetData; ze_kernel_timestamp_result_t results[2]; uint32_t numPackets = 2; for (uint32_t packetId = 0; packetId < numPackets; packetId++) { event->kernelEventCompletionData[0].assignDataToAllTimestamps(packetId, event->hostAddress); event->hostAddress = ptrOffset(event->hostAddress, NEO::TimestampPackets::getSinglePacketSize()); } uint32_t pCount = 0; result = event->queryTimestampsExp(subdevice, &pCount, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(1u, pCount); result = event->queryTimestampsExp(subdevice, &pCount, results); EXPECT_EQ(ZE_RESULT_SUCCESS, result); for (uint32_t i = 0; i < pCount; i++) { EXPECT_EQ(packetData[subDeviceId].contextStart, results[i].context.kernelStart); EXPECT_EQ(packetData[subDeviceId].contextEnd, results[i].context.kernelEnd); EXPECT_EQ(packetData[subDeviceId].globalStart, results[i].global.kernelStart); EXPECT_EQ(packetData[subDeviceId].globalEnd, results[i].global.kernelEnd); } } HWCMDTEST_F(IGFX_GEN9_CORE, TimestampEventCreate, givenEventTimestampsWhenQueryKernelTimestampThenCorrectDataAreSet) { class MockTimestampPackets32 : public TimestampPackets { public: using typename TimestampPackets::Packet; }; typename MockTimestampPackets32::Packet data = {}; data.contextStart = 1u; data.contextEnd = 2u; data.globalStart = 3u; data.globalEnd = 4u; event->hostAddress = &data; ze_kernel_timestamp_result_t result = {}; event->queryKernelTimestamp(&result); EXPECT_EQ(data.contextStart, result.context.kernelStart); EXPECT_EQ(data.contextEnd, result.context.kernelEnd); EXPECT_EQ(data.globalStart, result.global.kernelStart); EXPECT_EQ(data.globalEnd, result.global.kernelEnd); } TEST_F(TimestampEventCreate, givenEventWhenQueryingTimestampExpThenCorrectDataSet) { class MockTimestampPackets32 : public TimestampPackets { public: using typename TimestampPackets::Packet; }; typename MockTimestampPackets32::Packet packetData[2]; event->setPacketsInUse(2u); packetData[0].contextStart = 1u; packetData[0].contextEnd = 2u; packetData[0].globalStart = 3u; packetData[0].globalEnd = 4u; packetData[1].contextStart = 5u; packetData[1].contextEnd = 6u; packetData[1].globalStart = 7u; packetData[1].globalEnd = 8u; event->hostAddress = packetData; ze_kernel_timestamp_result_t results[2]; uint32_t pCount = 2; for (uint32_t packetId = 0; packetId < pCount; packetId++) { event->kernelEventCompletionData[0].assignDataToAllTimestamps(packetId, event->hostAddress); event->hostAddress = ptrOffset(event->hostAddress, NEO::TimestampPackets::getSinglePacketSize()); } auto result = event->queryTimestampsExp(device, &pCount, results); EXPECT_EQ(ZE_RESULT_SUCCESS, result); for (uint32_t i = 0; i < pCount; i++) { EXPECT_EQ(packetData[i].contextStart, results[i].context.kernelStart); EXPECT_EQ(packetData[i].contextEnd, results[i].context.kernelEnd); EXPECT_EQ(packetData[i].globalStart, results[i].global.kernelStart); EXPECT_EQ(packetData[i].globalEnd, results[i].global.kernelEnd); } } HWTEST_EXCLUDE_PRODUCT(TimestampEventCreate, givenEventTimestampsWhenQueryKernelTimestampThenCorrectDataAreSet, IGFX_GEN12LP_CORE); TEST_F(TimestampEventCreate, givenEventWhenQueryKernelTimestampThenNotReadyReturned) { struct MockEventQuery : public EventImp { MockEventQuery(L0::EventPool *eventPool, int index, L0::Device *device) : EventImp(eventPool, index, device) {} ze_result_t queryStatus() override { return ZE_RESULT_NOT_READY; } }; auto mockEvent = std::make_unique(eventPool.get(), 1u, device); ze_kernel_timestamp_result_t resultTimestamp = {}; auto result = mockEvent->queryKernelTimestamp(&resultTimestamp); EXPECT_EQ(ZE_RESULT_NOT_READY, result); EXPECT_EQ(0u, resultTimestamp.context.kernelStart); EXPECT_EQ(0u, resultTimestamp.context.kernelEnd); EXPECT_EQ(0u, resultTimestamp.global.kernelStart); EXPECT_EQ(0u, resultTimestamp.global.kernelEnd); } using EventPoolCreateMultiDevice = Test; TEST_F(EventPoolCreateMultiDevice, whenCreatingEventPoolWithMultipleDevicesThenEventPoolCreateSucceeds) { ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.stype = ZE_STRUCTURE_TYPE_EVENT_POOL_DESC; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; eventPoolDesc.count = 32; uint32_t deviceCount = 0; ze_result_t result = zeDeviceGet(driverHandle.get(), &deviceCount, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(deviceCount, numRootDevices); ze_device_handle_t *devices = new ze_device_handle_t[deviceCount]; result = zeDeviceGet(driverHandle.get(), &deviceCount, devices); EXPECT_EQ(ZE_RESULT_SUCCESS, result); std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, deviceCount, devices, &eventPoolDesc, result)); EXPECT_NE(nullptr, eventPool); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto allocation = &eventPool->getAllocation(); EXPECT_NE(nullptr, allocation); EXPECT_EQ(allocation->getGraphicsAllocations().size(), numRootDevices); delete[] devices; } TEST_F(EventPoolCreateMultiDevice, whenCreatingEventPoolWithMultipleDevicesThenDontDuplicateRootDeviceIndices) { ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.stype = ZE_STRUCTURE_TYPE_EVENT_POOL_DESC; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; eventPoolDesc.count = 32; uint32_t deviceCount = 1; ze_device_handle_t rootDeviceHandle; ze_result_t result = zeDeviceGet(driverHandle.get(), &deviceCount, &rootDeviceHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); deviceCount = 0; result = zeDeviceGetSubDevices(rootDeviceHandle, &deviceCount, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_TRUE(deviceCount >= 2); auto subDeviceHandle = std::make_unique(deviceCount); result = zeDeviceGetSubDevices(rootDeviceHandle, &deviceCount, subDeviceHandle.get()); EXPECT_EQ(ZE_RESULT_SUCCESS, result); std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, deviceCount, subDeviceHandle.get(), &eventPoolDesc, result)); EXPECT_NE(nullptr, eventPool); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto allocation = &eventPool->getAllocation(); EXPECT_NE(nullptr, allocation); EXPECT_EQ(allocation->getGraphicsAllocations().size(), 1u); } TEST_F(EventPoolCreateMultiDevice, whenCreatingEventPoolWithNoDevicesThenEventPoolCreateSucceedsAndAllDeviceAreUsed) { ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.stype = ZE_STRUCTURE_TYPE_EVENT_POOL_DESC; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; eventPoolDesc.count = 32; ze_result_t result = ZE_RESULT_SUCCESS; std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_NE(nullptr, eventPool); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto allocation = &eventPool->getAllocation(); EXPECT_NE(nullptr, allocation); EXPECT_EQ(allocation->getGraphicsAllocations().size(), numRootDevices); } using EventPoolCreateSingleDevice = Test; TEST_F(EventPoolCreateSingleDevice, whenCreatingEventPoolWithNoDevicesThenEventPoolCreateSucceedsAndSingleDeviceIsUsed) { ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.stype = ZE_STRUCTURE_TYPE_EVENT_POOL_DESC; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; eventPoolDesc.count = 32; ze_result_t result = ZE_RESULT_SUCCESS; std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_NE(nullptr, eventPool); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto allocation = &eventPool->getAllocation(); EXPECT_NE(nullptr, allocation); EXPECT_EQ(allocation->getGraphicsAllocations().size(), 1u); } struct EventPoolCreateNegativeTest : public ::testing::Test { void SetUp() override { NEO::MockCompilerEnableGuard mock(true); executionEnvironment = new NEO::ExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(numRootDevices); for (uint32_t i = 0; i < numRootDevices; i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(NEO::defaultHwInfo.get()); } std::vector> devices; for (uint32_t i = 0; i < numRootDevices; i++) { neoDevice = NEO::MockDevice::create(executionEnvironment, i); devices.push_back(std::unique_ptr(neoDevice)); } driverHandle = std::make_unique>(); driverHandle->initialize(std::move(devices)); static_cast(driverHandle.get()->getMemoryManager())->isMockEventPoolCreateMemoryManager = true; device = driverHandle->devices[0]; ze_context_handle_t hContext; ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; ze_result_t res = driverHandle->createContext(&desc, 0u, nullptr, &hContext); EXPECT_EQ(ZE_RESULT_SUCCESS, res); context = static_cast(Context::fromHandle(hContext)); } void TearDown() override { context->destroy(); } NEO::ExecutionEnvironment *executionEnvironment = nullptr; std::unique_ptr> driverHandle; NEO::MockDevice *neoDevice = nullptr; L0::Device *device = nullptr; const uint32_t numRootDevices = 2u; L0::ContextImp *context = nullptr; }; TEST_F(EventPoolCreateNegativeTest, whenCreatingEventPoolButMemoryManagerFailsThenErrorIsReturned) { ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.stype = ZE_STRUCTURE_TYPE_EVENT_POOL_DESC; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; eventPoolDesc.count = 32; uint32_t deviceCount = 0; ze_result_t result = zeDeviceGet(driverHandle.get(), &deviceCount, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(deviceCount, numRootDevices); ze_device_handle_t *devices = new ze_device_handle_t[deviceCount]; result = zeDeviceGet(driverHandle.get(), &deviceCount, devices); EXPECT_EQ(ZE_RESULT_SUCCESS, result); std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, deviceCount, devices, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY, result); EXPECT_EQ(nullptr, eventPool); delete[] devices; } TEST_F(EventPoolCreateNegativeTest, whenInitializingEventPoolButMemoryManagerFailsThenErrorIsReturned) { ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.stype = ZE_STRUCTURE_TYPE_EVENT_POOL_DESC; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; eventPoolDesc.count = 32; uint32_t deviceCount = 0; ze_result_t result = zeDeviceGet(driverHandle.get(), &deviceCount, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(deviceCount, numRootDevices); ze_device_handle_t *devices = new ze_device_handle_t[deviceCount]; result = zeDeviceGet(driverHandle.get(), &deviceCount, devices); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto eventPool = new L0::EventPoolImp(&eventPoolDesc); EXPECT_NE(nullptr, eventPool); result = eventPool->initialize(driverHandle.get(), context, numRootDevices, devices); EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY, result); delete eventPool; delete[] devices; } class EventFixture : public DeviceFixture { public: void SetUp() { DeviceFixture::SetUp(); auto hDevice = device->toHandle(); ze_result_t result = ZE_RESULT_SUCCESS; eventPool = whitebox_cast(EventPool::create(device->getDriverHandle(), context, 1, &hDevice, &eventPoolDesc, result)); } void TearDown() { eventPool->destroy(); DeviceFixture::TearDown(); } ze_event_pool_desc_t eventPoolDesc = { ZE_STRUCTURE_TYPE_EVENT_POOL_DESC, nullptr, ZE_EVENT_POOL_FLAG_HOST_VISIBLE, 4}; ze_event_desc_t eventDesc = {}; EventPool *eventPool; }; using EventTests = Test; TEST_F(EventTests, WhenQueryingStatusThenSuccessIsReturned) { auto event = whitebox_cast(Event::create(eventPool, &eventDesc, device)); ASSERT_NE(event, nullptr); auto result = event->hostSignal(); ASSERT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(event->queryStatus(), ZE_RESULT_SUCCESS); event->destroy(); } TEST_F(EventTests, GivenResetWhenQueryingStatusThenNotReadyIsReturned) { auto event = whitebox_cast(Event::create(eventPool, &eventDesc, device)); ASSERT_NE(event, nullptr); auto result = event->hostSignal(); ASSERT_EQ(ZE_RESULT_SUCCESS, result); event->setPartitionedEvent(true); result = event->reset(); ASSERT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_FALSE(event->isPartitionedEvent()); EXPECT_EQ(event->queryStatus(), ZE_RESULT_NOT_READY); event->destroy(); } TEST_F(EventTests, WhenDestroyingAnEventThenSuccessIsReturned) { auto event = whitebox_cast(Event::create(eventPool, &eventDesc, device)); ASSERT_NE(event, nullptr); auto result = event->destroy(); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(EventTests, givenTwoEventsCreatedThenTheyHaveDifferentAddresses) { ze_event_desc_t eventDesc0 = {}; eventDesc0.index = 0; eventDesc.index = 0; ze_event_desc_t eventDesc1 = {}; eventDesc1.index = 1; eventDesc.index = 1; auto event0 = whitebox_cast(Event::create(eventPool, &eventDesc0, device)); ASSERT_NE(event0, nullptr); auto event1 = whitebox_cast(Event::create(eventPool, &eventDesc1, device)); ASSERT_NE(event1, nullptr); EXPECT_NE(event0->hostAddress, event1->hostAddress); EXPECT_NE(event0->getGpuAddress(device), event1->getGpuAddress(device)); event0->destroy(); event1->destroy(); } TEST_F(EventTests, givenRegularEventUseMultiplePacketsWhenHostSignalThenExpectAllPacketsAreSignaled) { eventDesc.index = 0; eventDesc.signal = 0; eventDesc.wait = 0; auto event = std::unique_ptr>(static_cast *>(L0::Event::create(eventPool, &eventDesc, device))); ASSERT_NE(event, nullptr); uint32_t *hostAddr = static_cast(event->getHostAddress()); EXPECT_EQ(*hostAddr, Event::STATE_INITIAL); EXPECT_EQ(1u, event->getPacketsInUse()); constexpr uint32_t packetsUsed = 4u; event->setPacketsInUse(packetsUsed); event->setEventTimestampFlag(false); event->hostSignal(); for (uint32_t i = 0; i < packetsUsed; i++) { EXPECT_EQ(Event::STATE_SIGNALED, *hostAddr); hostAddr = ptrOffset(hostAddr, event->getSinglePacketSize()); } } TEST_F(EventTests, givenPartitionedEventUseMultiplePacketsWhenHostSignalThenExpectAllPacketsAreSignaled) { eventDesc.index = 0; eventDesc.signal = 0; eventDesc.wait = 0; auto event = std::unique_ptr>(static_cast *>(L0::Event::create(eventPool, &eventDesc, device))); ASSERT_NE(event, nullptr); uint32_t *hostAddr = static_cast(ptrOffset(event->getHostAddress(), event->getContextEndOffset())); EXPECT_EQ(Event::STATE_INITIAL, *hostAddr); EXPECT_EQ(1u, event->getPacketsInUse()); constexpr uint32_t packetsUsed = 4u; event->setPacketsInUse(packetsUsed); event->setEventTimestampFlag(false); event->setPartitionedEvent(true); event->hostSignal(); for (uint32_t i = 0; i < packetsUsed; i++) { EXPECT_EQ(Event::STATE_SIGNALED, *hostAddr); hostAddr = ptrOffset(hostAddr, event->getSinglePacketSize()); } } struct EventSizeFixture : public DeviceFixture { void SetUp() { DeviceFixture::SetUp(); hDevice = device->toHandle(); } void TearDown() { DeviceFixture::TearDown(); } void createEvents() { ze_event_handle_t hEvent0 = 0; ze_event_handle_t hEvent1 = 0; ze_event_desc_t eventDesc0 = {}; ze_event_desc_t eventDesc1 = {}; eventDesc0.index = 0; eventDesc1.index = 1; auto result = eventPool->createEvent(&eventDesc0, &hEvent0); ASSERT_EQ(ZE_RESULT_SUCCESS, result); result = eventPool->createEvent(&eventDesc1, &hEvent1); ASSERT_EQ(ZE_RESULT_SUCCESS, result); eventObj0.reset(L0::Event::fromHandle(hEvent0)); eventObj1.reset(L0::Event::fromHandle(hEvent1)); } ze_event_pool_desc_t eventPoolDesc = { ZE_STRUCTURE_TYPE_EVENT_POOL_DESC, nullptr, ZE_EVENT_POOL_FLAG_HOST_VISIBLE, 4}; DebugManagerStateRestore restore; ze_device_handle_t hDevice = 0; std::unique_ptr eventPool; std::unique_ptr eventObj0; std::unique_ptr eventObj1; }; using EventSizeTests = Test; HWTEST_F(EventSizeTests, whenCreatingEventPoolThenUseCorrectSizeAndAlignment) { ze_result_t result = ZE_RESULT_SUCCESS; eventPool.reset(static_cast(EventPool::create(device->getDriverHandle(), context, 1, &hDevice, &eventPoolDesc, result))); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto &hwHelper = device->getHwHelper(); auto expectedAlignment = static_cast(hwHelper.getTimestampPacketAllocatorAlignment()); auto singlePacketSize = TimestampPackets::getSinglePacketSize(); auto expectedSize = static_cast(alignUp(EventPacketsCount::eventPackets * singlePacketSize, expectedAlignment)); EXPECT_EQ(expectedSize, eventPool->getEventSize()); createEvents(); constexpr size_t timestampPacketTypeSize = sizeof(typename FamilyType::TimestampPacketType); EXPECT_EQ(timestampPacketTypeSize / 4, eventObj0->getTimestampSizeInDw()); EXPECT_EQ(timestampPacketTypeSize / 4, eventObj1->getTimestampSizeInDw()); EXPECT_EQ(0u, eventObj0->getContextStartOffset()); EXPECT_EQ(timestampPacketTypeSize, eventObj0->getGlobalStartOffset()); EXPECT_EQ(timestampPacketTypeSize * 2, eventObj0->getContextEndOffset()); EXPECT_EQ(timestampPacketTypeSize * 3, eventObj0->getGlobalEndOffset()); EXPECT_EQ(timestampPacketTypeSize * 4, eventObj0->getSinglePacketSize()); auto hostPtrDiff = ptrDiff(eventObj1->getHostAddress(), eventObj0->getHostAddress()); EXPECT_EQ(expectedSize, hostPtrDiff); } HWTEST_F(EventSizeTests, givenDebugFlagwhenCreatingEventPoolThenUseCorrectSizeAndAlignment) { auto &hwHelper = device->getHwHelper(); auto expectedAlignment = static_cast(hwHelper.getTimestampPacketAllocatorAlignment()); { DebugManager.flags.OverrideTimestampPacketSize.set(4); ze_result_t result = ZE_RESULT_SUCCESS; eventPool.reset(static_cast(EventPool::create(device->getDriverHandle(), context, 1, &hDevice, &eventPoolDesc, result))); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto singlePacketSize = TimestampPackets::getSinglePacketSize(); auto expectedSize = static_cast(alignUp(EventPacketsCount::eventPackets * singlePacketSize, expectedAlignment)); EXPECT_EQ(expectedSize, eventPool->getEventSize()); createEvents(); EXPECT_EQ(1u, eventObj0->getTimestampSizeInDw()); EXPECT_EQ(1u, eventObj1->getTimestampSizeInDw()); auto hostPtrDiff = ptrDiff(eventObj1->getHostAddress(), eventObj0->getHostAddress()); EXPECT_EQ(expectedSize, hostPtrDiff); } { DebugManager.flags.OverrideTimestampPacketSize.set(8); ze_result_t result = ZE_RESULT_SUCCESS; eventPool.reset(static_cast(EventPool::create(device->getDriverHandle(), context, 1, &hDevice, &eventPoolDesc, result))); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto singlePacketSize = TimestampPackets::getSinglePacketSize(); auto expectedSize = static_cast(alignUp(EventPacketsCount::eventPackets * singlePacketSize, expectedAlignment)); EXPECT_EQ(expectedSize, eventPool->getEventSize()); createEvents(); EXPECT_EQ(2u, eventObj0->getTimestampSizeInDw()); EXPECT_EQ(2u, eventObj1->getTimestampSizeInDw()); auto hostPtrDiff = ptrDiff(eventObj1->getHostAddress(), eventObj0->getHostAddress()); EXPECT_EQ(expectedSize, hostPtrDiff); } { DebugManager.flags.OverrideTimestampPacketSize.set(12); ze_result_t result = ZE_RESULT_SUCCESS; EXPECT_ANY_THROW(EventPool::create(device->getDriverHandle(), context, 1, &hDevice, &eventPoolDesc, result)); EXPECT_ANY_THROW(createEvents()); } } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/fence/000077500000000000000000000000001422164147700263735ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/fence/CMakeLists.txt000066400000000000000000000003511422164147700311320ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_fence.cpp ) compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/fence/test_fence.cpp000066400000000000000000000276561422164147700312360ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/mocks/mock_command_stream_receiver.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/fence/fence.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_built_ins.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h" #include "level_zero/core/test/unit_tests/mocks/mock_fence.h" #include #include #include #include using namespace std::chrono_literals; namespace CpuIntrinsicsTests { extern std::atomic pauseCounter; extern volatile uint32_t *pauseAddress; extern uint32_t pauseValue; extern uint32_t pauseOffset; extern std::function setupPauseAddress; } // namespace CpuIntrinsicsTests namespace L0 { namespace ult { using FenceTest = Test; TEST_F(FenceTest, whenQueryingStatusThenCsrAllocationsAreDownloaded) { auto csr = std::make_unique(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield()); *csr->tagAddress = 0; Mock cmdQueue(device, csr.get()); ze_fence_desc_t fenceDesc = {}; auto fence = Fence::create(&cmdQueue, &fenceDesc); EXPECT_NE(nullptr, fence); EXPECT_FALSE(csr->downloadAllocationsCalled); auto status = fence->queryStatus(); EXPECT_EQ(ZE_RESULT_NOT_READY, status); EXPECT_TRUE(csr->downloadAllocationsCalled); status = fence->destroy(); EXPECT_EQ(ZE_RESULT_SUCCESS, status); } TEST_F(FenceTest, givenFenceSignalFlagUsedWhenQueryingFenceAfterCreationThenReturnReadyStatus) { auto csr = std::make_unique(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield()); *csr->tagAddress = 0; Mock cmdQueue(device, csr.get()); ze_fence_desc_t fenceDesc = {}; fenceDesc.stype = ZE_STRUCTURE_TYPE_FENCE_DESC; fenceDesc.pNext = nullptr; fenceDesc.flags = ZE_FENCE_FLAG_SIGNALED; auto fence = Fence::create(&cmdQueue, &fenceDesc); EXPECT_NE(nullptr, fence); EXPECT_FALSE(csr->downloadAllocationsCalled); auto status = fence->queryStatus(); EXPECT_EQ(ZE_RESULT_SUCCESS, status); status = fence->destroy(); EXPECT_EQ(ZE_RESULT_SUCCESS, status); } TEST_F(FenceTest, whenQueryingStatusWithoutCsrAndFenceUnsignaledThenReturnsNotReady) { auto csr = std::make_unique(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield()); *csr->tagAddress = 0; Mock cmdQueue(device, csr.get()); ze_fence_desc_t fenceDesc = {}; auto fence = Fence::create(&cmdQueue, &fenceDesc); EXPECT_NE(nullptr, fence); auto status = fence->queryStatus(); EXPECT_EQ(ZE_RESULT_NOT_READY, status); fence->destroy(); } TEST_F(FenceTest, GivenGpuHangWhenHostSynchronizeIsCalledThenDeviceLostIsReturned) { const auto csr = std::make_unique(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield()); csr->isGpuHangDetectedReturnValue = true; csr->testTaskCountReadyReturnValue = false; Mock cmdqueue(device, csr.get()); ze_fence_desc_t desc = {}; std::unique_ptr> fence; fence.reset(whitebox_cast(Fence::create(&cmdqueue, &desc))); ASSERT_NE(nullptr, fence); fence->taskCount = 1; fence->gpuHangCheckPeriod = 0ms; constexpr uint64_t timeout = std::numeric_limits::max(); auto result = fence->hostSynchronize(timeout); EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, result); } TEST_F(FenceTest, GivenNoGpuHangAndOneNanosecondTimeoutWhenHostSynchronizeIsCalledThenResultNotReadyIsReturnedDueToTimeout) { const auto csr = std::make_unique(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield()); csr->isGpuHangDetectedReturnValue = false; csr->testTaskCountReadyReturnValue = false; Mock cmdqueue(device, csr.get()); ze_fence_desc_t desc = {}; std::unique_ptr> fence; fence.reset(whitebox_cast(Fence::create(&cmdqueue, &desc))); ASSERT_NE(nullptr, fence); fence->taskCount = 1; fence->gpuHangCheckPeriod = 0ms; constexpr uint64_t timeoutNanoseconds = 1; auto result = fence->hostSynchronize(timeoutNanoseconds); EXPECT_EQ(ZE_RESULT_NOT_READY, result); } TEST_F(FenceTest, GivenLongPeriodOfGpuCheckAndOneNanosecondTimeoutWhenHostSynchronizeIsCalledThenResultNotReadyIsReturnedDueToTimeout) { const auto csr = std::make_unique(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield()); csr->testTaskCountReadyReturnValue = false; Mock cmdqueue(device, csr.get()); ze_fence_desc_t desc = {}; std::unique_ptr> fence; fence.reset(whitebox_cast(Fence::create(&cmdqueue, &desc))); ASSERT_NE(nullptr, fence); fence->taskCount = 1; fence->gpuHangCheckPeriod = 50000000ms; constexpr uint64_t timeoutNanoseconds = 1; auto result = fence->hostSynchronize(timeoutNanoseconds); EXPECT_EQ(ZE_RESULT_NOT_READY, result); } TEST_F(FenceTest, GivenSuccessfulQueryResultAndNoTimeoutWhenHostSynchronizeIsCalledThenResultSuccessIsReturned) { const auto csr = std::make_unique(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield()); csr->testTaskCountReadyReturnValue = true; Mock cmdqueue(device, csr.get()); ze_fence_desc_t desc = {}; std::unique_ptr> fence; fence.reset(whitebox_cast(Fence::create(&cmdqueue, &desc))); ASSERT_NE(nullptr, fence); fence->taskCount = 1; constexpr uint64_t timeout = std::numeric_limits::max(); auto result = fence->hostSynchronize(timeout); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } using FenceSynchronizeTest = Test; TEST_F(FenceSynchronizeTest, givenCallToFenceHostSynchronizeWithTimeoutZeroAndStateInitialThenHostSynchronizeReturnsNotReady) { std::unique_ptr csr = nullptr; csr = std::make_unique(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield()); Mock cmdQueue(device, csr.get()); std::unique_ptr fence; ze_fence_desc_t fenceDesc = {}; fence = std::unique_ptr(L0::Fence::create(&cmdQueue, &fenceDesc)); EXPECT_NE(nullptr, fence); *csr->tagAddress = 0; ze_result_t result = fence->hostSynchronize(0); EXPECT_EQ(ZE_RESULT_NOT_READY, result); } TEST_F(FenceSynchronizeTest, givenCallToFenceHostSynchronizeWithNonZeroTimeoutAndStateInitialThenHostSynchronizeReturnsNotReady) { std::unique_ptr csr = nullptr; csr = std::make_unique(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield()); Mock cmdQueue(device, csr.get()); std::unique_ptr fence; ze_fence_desc_t fenceDesc = {}; fence = std::unique_ptr(L0::Fence::create(&cmdQueue, &fenceDesc)); EXPECT_NE(nullptr, fence); *csr->tagAddress = 0; ze_result_t result = fence->hostSynchronize(10); EXPECT_EQ(ZE_RESULT_NOT_READY, result); } TEST_F(FenceSynchronizeTest, givenCallToFenceHostSynchronizeWithTimeoutZeroAndTaskCountEqualsTagAllocationThenHostSynchronizeReturnsSuccess) { std::unique_ptr csr = nullptr; csr = std::make_unique(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield()); Mock cmdQueue(device, csr.get()); ze_fence_desc_t fenceDesc = {}; auto fence = std::unique_ptr(whitebox_cast(Fence::create(&cmdQueue, &fenceDesc))); EXPECT_NE(nullptr, fence); fence->taskCount = 1; *csr->tagAddress = 1; ze_result_t result = fence->hostSynchronize(0); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(FenceSynchronizeTest, givenCallToFenceHostSynchronizeWithTimeoutNonZeroAndTaskCountEqualsTagAllocationThenHostSynchronizeReturnsSuccess) { std::unique_ptr csr = nullptr; csr = std::make_unique(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield()); Mock cmdQueue(device, csr.get()); ze_fence_desc_t fenceDesc = {}; auto fence = std::unique_ptr(whitebox_cast(Fence::create(&cmdQueue, &fenceDesc))); EXPECT_NE(nullptr, fence); fence->taskCount = 1; *csr->tagAddress = 1; ze_result_t result = fence->hostSynchronize(10); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(FenceSynchronizeTest, givenInfiniteTimeoutWhenWaitingForFenceCompletionThenReturnOnlyAfterAllCsrPartitionsCompleted) { constexpr uint32_t activePartitions = 2; constexpr uint32_t postSyncOffset = 16; const auto csr = std::make_unique(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield()); ASSERT_NE(nullptr, csr->getTagAddress()); csr->postSyncWriteOffset = postSyncOffset; csr->activePartitions = activePartitions; Mock cmdqueue(device, csr.get()); ze_fence_desc_t desc = {}; std::unique_ptr> fence; fence.reset(whitebox_cast(Fence::create(&cmdqueue, &desc))); ASSERT_NE(nullptr, fence); fence->taskCount = 1; VariableBackup backupPauseAddress(&CpuIntrinsicsTests::pauseAddress); VariableBackup backupPauseValue(&CpuIntrinsicsTests::pauseValue, 0); VariableBackup backupPauseOffset(&CpuIntrinsicsTests::pauseOffset); VariableBackup> backupSetupPauseAddress(&CpuIntrinsicsTests::setupPauseAddress); CpuIntrinsicsTests::pauseCounter = 0u; CpuIntrinsicsTests::pauseAddress = csr->getTagAddress(); volatile uint32_t *hostAddr = csr->getTagAddress(); for (uint32_t i = 0; i < activePartitions; i++) { *hostAddr = 0; hostAddr = ptrOffset(hostAddr, postSyncOffset); } CpuIntrinsicsTests::setupPauseAddress = [&]() { if (CpuIntrinsicsTests::pauseCounter > 10) { volatile uint32_t *nextPacket = CpuIntrinsicsTests::pauseAddress; for (uint32_t i = 0; i < activePartitions; i++) { *nextPacket = 1; nextPacket = ptrOffset(nextPacket, postSyncOffset); } } }; constexpr uint64_t infiniteTimeout = std::numeric_limits::max(); auto result = fence->hostSynchronize(infiniteTimeout); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } using FenceAubCsrTest = Test; HWTEST_F(FenceAubCsrTest, givenCallToFenceHostSynchronizeWithAubModeCsrReturnsSuccess) { std::unique_ptr> driverHandle; NEO::MockDevice *neoDevice = nullptr; L0::Device *device = nullptr; neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(NEO::defaultHwInfo.get()); auto mockBuiltIns = new MockBuiltins(); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->builtins.reset(mockBuiltIns); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); driverHandle = std::make_unique>(); driverHandle->initialize(std::move(devices)); device = driverHandle->devices[0]; int32_t tag = 1; auto aubCsr = new MockCsrAub(tag, *neoDevice->executionEnvironment, neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield()); neoDevice->resetCommandStreamReceiver(aubCsr); Mock cmdQueue(device, aubCsr); ze_fence_desc_t fenceDesc = {}; auto fence = std::unique_ptr(Fence::create(&cmdQueue, &fenceDesc)); ze_result_t result = fence->hostSynchronize(10); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/helper/000077500000000000000000000000001422164147700265725ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/helper/CMakeLists.txt000066400000000000000000000007441422164147700313370ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/api_specific_config_l0_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/heap_assigner_l0_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/l0_hw_helper_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/properties_parser_tests.cpp ) add_subdirectories() api_specific_config_l0_tests.cpp000066400000000000000000000026171422164147700350050ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/helper/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/implicit_scaling.h" #include "shared/source/helpers/api_specific_config.h" #include "level_zero/core/source/compiler_interface/l0_reg_path.h" #include "gtest/gtest.h" namespace NEO { TEST(ApiSpecificConfigL0Tests, WhenGettingApiTypeThenCorrectTypeIsReturned) { EXPECT_EQ(ApiSpecificConfig::L0, ApiSpecificConfig::getApiType()); } TEST(ApiSpecificConfigL0Tests, WhenGettingAUBPrefixByApiTypeL0IsReturned) { EXPECT_EQ(0, strcmp("l0_", ApiSpecificConfig::getAubPrefixForSpecificApi().c_str())); } TEST(ApiSpecificConfigL0Tests, WhenGettingNameL0IsReturned) { EXPECT_EQ(0, strcmp("l0", ApiSpecificConfig::getName().c_str())); } TEST(ApiSpecificConfigL0Tests, WhenCheckingIfStatelessCompressionIsSupportedThenReturnFalse) { EXPECT_FALSE(ApiSpecificConfig::isStatelessCompressionSupported()); } TEST(ApiSpecificConfigL0Tests, givenMaxAllocSizeWhenGettingReducedMaxAllocSizeThenReturnSameValue) { EXPECT_EQ(1024u, ApiSpecificConfig::getReducedMaxAllocSize(1024)); } TEST(ApiSpecificConfigL0Tests, WhenGettingRegistryPathThenL0RegistryPathIsReturned) { EXPECT_STREQ(L0::registryPath, ApiSpecificConfig::getRegistryPath()); } TEST(ImplicitScalingApiTests, givenLevelZeroApiUsedThenSupportEnabled) { EXPECT_TRUE(ImplicitScaling::apiSupport); } } // namespace NEO heap_assigner_l0_tests.cpp000066400000000000000000000116731422164147700336540ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/helper/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/heap_assigner.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_memory_manager.h" namespace L0 { namespace ult { using AlocationHelperTests = Test; using Platforms = IsAtMostProduct; HWTEST2_F(AlocationHelperTests, givenLinearStreamTypeWhenUseExternalAllocatorForSshAndDshDisabledThenUse32BitIsFalse, Platforms) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.UseExternalAllocatorForSshAndDsh.set(false); HeapAssigner heapAssigner = {}; EXPECT_FALSE(heapAssigner.use32BitHeap(AllocationType::LINEAR_STREAM)); } HWTEST2_F(AlocationHelperTests, givenLinearStreamTypeWhenUseExternalAllocatorForSshAndDshEnabledThenUse32BitIsTrue, Platforms) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.UseExternalAllocatorForSshAndDsh.set(true); HeapAssigner heapAssigner = {}; EXPECT_TRUE(heapAssigner.use32BitHeap(AllocationType::LINEAR_STREAM)); } HWTEST2_F(AlocationHelperTests, givenLinearStreamTypeWhenUseIternalAllocatorThenUseHeapExternal, Platforms) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.UseExternalAllocatorForSshAndDsh.set(true); HeapAssigner heapAssigner = {}; auto heapIndex = heapAssigner.get32BitHeapIndex(AllocationType::LINEAR_STREAM, true, *defaultHwInfo.get(), false); EXPECT_EQ(heapIndex, NEO::HeapIndex::HEAP_EXTERNAL_DEVICE_MEMORY); } struct MockMemoryManagerAllocationHelper : public MemoryManagerMock { MockMemoryManagerAllocationHelper(NEO::ExecutionEnvironment &executionEnvironment) : MemoryManagerMock(const_cast(executionEnvironment)) {} GraphicsAllocation *allocate32BitGraphicsMemoryImpl(const AllocationData &allocationData, bool useLocalMemory) override { passedUseLocalMem = useLocalMemory; return nullptr; } bool passedUseLocalMem = false; }; TEST_F(AlocationHelperTests, GivenLinearStreamAllocTypeWhenUseExternalAllocatorForSshAndDshEnabledThenUseLocalMemEqualHwHelperValue) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.UseExternalAllocatorForSshAndDsh.set(true); AllocationData allocationData; allocationData.type = AllocationType::LINEAR_STREAM; std::unique_ptr mockMemoryManager(new MockMemoryManagerAllocationHelper(*device->getNEODevice()->getExecutionEnvironment())); mockMemoryManager->allocateGraphicsMemory(allocationData); EXPECT_EQ(mockMemoryManager->passedUseLocalMem, HwInfoConfig::get(device->getHwInfo().platform.eProductFamily)->heapInLocalMem(device->getHwInfo())); } TEST_F(AlocationHelperTests, GivenInternalAllocTypeWhenUseExternalAllocatorForSshAndDshDisabledThenUseLocalMemEqualFalse) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.UseExternalAllocatorForSshAndDsh.set(false); AllocationData allocationData; allocationData.type = AllocationType::KERNEL_ISA; std::unique_ptr mockMemoryManager(new MockMemoryManagerAllocationHelper(*device->getNEODevice()->getExecutionEnvironment())); mockMemoryManager->allocateGraphicsMemory(allocationData); EXPECT_FALSE(mockMemoryManager->passedUseLocalMem); } TEST_F(AlocationHelperTests, givenLinearStreamAllocationWhenSelectingHeapWithUseExternalAllocatorForSshAndDshEnabledThenExternalHeapIsUsed) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.UseExternalAllocatorForSshAndDsh.set(true); std::unique_ptr mockMemoryManager(new MockMemoryManagerAllocationHelper(*device->getNEODevice()->getExecutionEnvironment())); GraphicsAllocation allocation{0, AllocationType::LINEAR_STREAM, nullptr, 0, 0, 0, MemoryPool::MemoryNull}; allocation.set32BitAllocation(false); EXPECT_EQ(MemoryManager::selectExternalHeap(allocation.isAllocatedInLocalMemoryPool()), mockMemoryManager->selectHeap(&allocation, false, false, false)); } TEST_F(AlocationHelperTests, givenExternalHeapIndexWhenMapingToExternalFrontWindowThenEternalFrontWindowReturned) { EXPECT_EQ(HeapIndex::HEAP_EXTERNAL_FRONT_WINDOW, HeapAssigner::mapExternalWindowIndex(HeapIndex::HEAP_EXTERNAL)); } TEST_F(AlocationHelperTests, givenExternalDeviceHeapIndexWhenMapingToExternalFrontWindowThenEternalDeviceFrontWindowReturned) { EXPECT_EQ(HeapIndex::HEAP_EXTERNAL_DEVICE_FRONT_WINDOW, HeapAssigner::mapExternalWindowIndex(HeapIndex::HEAP_EXTERNAL_DEVICE_MEMORY)); } TEST_F(AlocationHelperTests, givenOtherThanExternalHeapIndexWhenMapingToExternalFrontWindowThenAbortHasBeenThrown) { EXPECT_THROW(HeapAssigner::mapExternalWindowIndex(HeapIndex::HEAP_STANDARD), std::exception); } } // namespace ult } // namespace L0compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/helper/l0_hw_helper_tests.cpp000066400000000000000000000542001422164147700330710ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/ptr_math.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "level_zero/core/source/hw_helpers/l0_hw_helper.h" namespace L0 { namespace ult { using L0HwHelperTest = ::testing::Test; using PlatformsWithWa = IsWithinGfxCore; HWTEST2_F(L0HwHelperTest, givenResumeWANotNeededThenFalseIsReturned, IsAtMostGen11) { auto &l0HwHelper = L0::L0HwHelper::get(NEO::defaultHwInfo->platform.eRenderCoreFamily); EXPECT_FALSE(l0HwHelper.isResumeWARequired()); } HWTEST2_F(L0HwHelperTest, givenResumeWANeededThenTrueIsReturned, PlatformsWithWa) { auto &l0HwHelper = L0::L0HwHelper::get(NEO::defaultHwInfo->platform.eRenderCoreFamily); EXPECT_TRUE(l0HwHelper.isResumeWARequired()); } static void printAttentionBitmask(uint8_t *expected, uint8_t *actual, uint32_t maxSlices, uint32_t maxSubSlicesPerSlice, uint32_t maxEuPerSubslice, uint32_t threadsPerEu, bool printBitmask = false) { auto bytesPerThread = threadsPerEu > 8 ? 2u : 1u; auto maxEUsInAtt = maxEuPerSubslice > 8 ? 8 : maxEuPerSubslice; auto bytesPerSlice = maxSubSlicesPerSlice * maxEUsInAtt * bytesPerThread; auto bytesPerSubSlice = maxEUsInAtt * bytesPerThread; for (uint32_t slice = 0; slice < maxSlices; slice++) { for (uint32_t subslice = 0; subslice < maxSubSlicesPerSlice; subslice++) { for (uint32_t eu = 0; eu < maxEUsInAtt; eu++) { for (uint32_t byte = 0; byte < bytesPerThread; byte++) { if (printBitmask) { std::bitset<8> bits(actual[slice * bytesPerSlice + subslice * bytesPerSubSlice + eu * bytesPerThread + byte]); std::cout << " slice = " << slice << " subslice = " << subslice << " eu = " << eu << " threads bitmask = " << bits << "\n"; } if (expected[slice * bytesPerSlice + subslice * bytesPerSubSlice + eu * bytesPerThread + byte] != actual[slice * bytesPerSlice + subslice * bytesPerSubSlice + eu * bytesPerThread + byte]) { std::bitset<8> bits(actual[slice * bytesPerSlice + subslice * bytesPerSubSlice + eu * bytesPerThread + byte]); std::bitset<8> bitsExpected(expected[slice * bytesPerSlice + subslice * bytesPerSubSlice + eu * bytesPerThread + byte]); ASSERT_FALSE(true) << " got: slice = " << slice << " subslice = " << subslice << " eu = " << eu << " threads bitmask = " << bits << "\n" << " expected: slice = " << slice << " subslice = " << subslice << " eu = " << eu << " threads bitmask = " << bitsExpected << "\n"; ; } } } } } if (printBitmask) { std::cout << "\n\n"; } } HWTEST_F(L0HwHelperTest, givenL0HwHelperWhenAskingForImageCompressionSupportThenReturnFalse) { DebugManagerStateRestore restore; auto &l0HwHelper = L0::L0HwHelper::get(NEO::defaultHwInfo->platform.eRenderCoreFamily); EXPECT_FALSE(l0HwHelper.imageCompressionSupported(*NEO::defaultHwInfo)); NEO::DebugManager.flags.RenderCompressedImagesEnabled.set(1); EXPECT_TRUE(l0HwHelper.imageCompressionSupported(*NEO::defaultHwInfo)); NEO::DebugManager.flags.RenderCompressedImagesEnabled.set(0); EXPECT_FALSE(l0HwHelper.imageCompressionSupported(*NEO::defaultHwInfo)); } HWTEST_F(L0HwHelperTest, givenL0HwHelperWhenAskingForUsmCompressionSupportThenReturnFalse) { DebugManagerStateRestore restore; auto &l0HwHelper = L0::L0HwHelper::get(NEO::defaultHwInfo->platform.eRenderCoreFamily); EXPECT_FALSE(l0HwHelper.forceDefaultUsmCompressionSupport()); HardwareInfo hwInfo = *NEO::defaultHwInfo; hwInfo.capabilityTable.ftrRenderCompressedBuffers = true; EXPECT_FALSE(l0HwHelper.usmCompressionSupported(hwInfo)); hwInfo.capabilityTable.ftrRenderCompressedBuffers = false; EXPECT_FALSE(l0HwHelper.usmCompressionSupported(hwInfo)); NEO::DebugManager.flags.RenderCompressedBuffersEnabled.set(1); EXPECT_TRUE(l0HwHelper.usmCompressionSupported(hwInfo)); hwInfo.capabilityTable.ftrRenderCompressedBuffers = true; NEO::DebugManager.flags.RenderCompressedBuffersEnabled.set(0); EXPECT_FALSE(l0HwHelper.usmCompressionSupported(hwInfo)); } HWTEST_F(L0HwHelperTest, givenSliceSubsliceEuAndThreadIdsWhenGettingBitmaskThenCorrectBitmaskIsReturned) { auto hwInfo = *NEO::defaultHwInfo.get(); auto &l0HwHelper = L0::L0HwHelper::get(hwInfo.platform.eRenderCoreFamily); std::unique_ptr bitmask; size_t size = 0; uint32_t subslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported; uint32_t subslice = subslicesPerSlice > 1 ? subslicesPerSlice - 1 : 0; const auto threadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount); const auto bytesPerEu = 1; const auto maxEUsInAtt = hwInfo.gtSystemInfo.MaxEuPerSubSlice > 8 ? 8 : hwInfo.gtSystemInfo.MaxEuPerSubSlice; const auto threadsSizePerSubSlice = maxEUsInAtt * bytesPerEu; const auto threadsSizePerSlice = threadsSizePerSubSlice * subslicesPerSlice; std::vector threads; threads.push_back({0, 0, 0, 6}); l0HwHelper.getAttentionBitmaskForSingleThreads(threads, hwInfo, bitmask, size); auto expectedBitmask = std::make_unique(size); uint8_t *data = nullptr; memset(expectedBitmask.get(), 0, size); auto returnedBitmask = bitmask.get(); EXPECT_EQ(uint8_t(1u << 6), returnedBitmask[0]); threads.clear(); threads.push_back({0, 0, 1, 3}); l0HwHelper.getAttentionBitmaskForSingleThreads(threads, hwInfo, bitmask, size); returnedBitmask = bitmask.get(); returnedBitmask += bytesPerEu; EXPECT_EQ(uint8_t(1u << 3), returnedBitmask[0]); threads.clear(); threads.push_back({0, subslice, 3, 6}); l0HwHelper.getAttentionBitmaskForSingleThreads(threads, hwInfo, bitmask, size); data = expectedBitmask.get(); memset(expectedBitmask.get(), 0, size); data = ptrOffset(data, subslice * threadsSizePerSubSlice); data = ptrOffset(data, 3 * bytesPerEu); data[0] = 1 << 6; printAttentionBitmask(expectedBitmask.get(), bitmask.get(), hwInfo.gtSystemInfo.MaxSlicesSupported, subslicesPerSlice, hwInfo.gtSystemInfo.MaxEuPerSubSlice, threadsPerEu); EXPECT_EQ(0, memcmp(bitmask.get(), expectedBitmask.get(), size)); threads.clear(); threads.push_back({hwInfo.gtSystemInfo.MaxSlicesSupported - 1, subslice, 3, 6}); l0HwHelper.getAttentionBitmaskForSingleThreads(threads, hwInfo, bitmask, size); data = expectedBitmask.get(); memset(expectedBitmask.get(), 0, size); data = ptrOffset(data, (hwInfo.gtSystemInfo.MaxSlicesSupported - 1) * threadsSizePerSlice); data = ptrOffset(data, subslice * threadsSizePerSubSlice); data = ptrOffset(data, 3 * bytesPerEu); data[0] = 1 << 6; printAttentionBitmask(expectedBitmask.get(), bitmask.get(), hwInfo.gtSystemInfo.MaxSlicesSupported, subslicesPerSlice, hwInfo.gtSystemInfo.MaxEuPerSubSlice, threadsPerEu); EXPECT_EQ(0, memcmp(bitmask.get(), expectedBitmask.get(), size)); threads.clear(); threads.push_back({hwInfo.gtSystemInfo.MaxSlicesSupported - 1, subslice, maxEUsInAtt - 1, 0}); l0HwHelper.getAttentionBitmaskForSingleThreads(threads, hwInfo, bitmask, size); data = expectedBitmask.get(); memset(expectedBitmask.get(), 0, size); data = ptrOffset(data, (hwInfo.gtSystemInfo.MaxSlicesSupported - 1) * threadsSizePerSlice); data = ptrOffset(data, subslice * threadsSizePerSubSlice); if (l0HwHelper.isResumeWARequired()) { data = ptrOffset(data, (maxEUsInAtt - 1) % 4 * bytesPerEu); } else { data = ptrOffset(data, maxEUsInAtt - 1 * bytesPerEu); } data[0] = 1; printAttentionBitmask(expectedBitmask.get(), bitmask.get(), hwInfo.gtSystemInfo.MaxSlicesSupported, subslicesPerSlice, hwInfo.gtSystemInfo.MaxEuPerSubSlice, threadsPerEu); EXPECT_EQ(0, memcmp(bitmask.get(), expectedBitmask.get(), size)); } HWTEST_F(L0HwHelperTest, givenSingleThreadsWhenGettingBitmaskThenCorrectBitsAreSet) { auto hwInfo = *NEO::defaultHwInfo.get(); auto &l0HwHelper = L0::L0HwHelper::get(hwInfo.platform.eRenderCoreFamily); std::unique_ptr bitmask; size_t size = 0; std::vector threads; threads.push_back({0, 0, 0, 3}); threads.push_back({0, 0, 1, 0}); l0HwHelper.getAttentionBitmaskForSingleThreads(threads, hwInfo, bitmask, size); auto data = bitmask.get(); EXPECT_EQ(1u << 3, data[0]); EXPECT_EQ(1u, data[1]); EXPECT_TRUE(memoryZeroed(&data[2], size - 2)); } HWTEST_F(L0HwHelperTest, givenBitmaskWithAttentionBitsForSingleThreadWhenGettingThreadsThenSingleCorrectThreadReturned) { auto hwInfo = *NEO::defaultHwInfo.get(); auto &l0HwHelper = L0::L0HwHelper::get(hwInfo.platform.eRenderCoreFamily); std::unique_ptr bitmask; size_t size = 0; uint32_t subslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported; uint32_t subsliceID = subslicesPerSlice > 2 ? subslicesPerSlice - 2 : 0; uint32_t threadID = 3; std::vector threadsWithAtt; threadsWithAtt.push_back({0, subsliceID, 0, threadID}); l0HwHelper.getAttentionBitmaskForSingleThreads(threadsWithAtt, hwInfo, bitmask, size); auto threads = l0HwHelper.getThreadsFromAttentionBitmask(hwInfo, bitmask.get(), size); ASSERT_EQ(1u, threads.size()); EXPECT_EQ(0u, threads[0].slice); EXPECT_EQ(subsliceID, threads[0].subslice); EXPECT_EQ(0u, threads[0].eu); EXPECT_EQ(threadID, threads[0].thread); } HWTEST_F(L0HwHelperTest, givenBitmaskWithAttentionBitsForAllSubslicesWhenGettingThreadsThenCorrectThreadsAreReturned) { auto hwInfo = *NEO::defaultHwInfo.get(); auto &l0HwHelper = L0::L0HwHelper::get(hwInfo.platform.eRenderCoreFamily); std::unique_ptr bitmask; size_t size = 0; uint32_t subslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported; uint32_t threadID = 0; std::vector threadsWithAtt; for (uint32_t subsliceID = 0; subsliceID < subslicesPerSlice; subsliceID++) { threadsWithAtt.push_back({0, subsliceID, 0, threadID}); } l0HwHelper.getAttentionBitmaskForSingleThreads(threadsWithAtt, hwInfo, bitmask, size); auto threads = l0HwHelper.getThreadsFromAttentionBitmask(hwInfo, bitmask.get(), size); ASSERT_EQ(subslicesPerSlice, threads.size()); for (uint32_t i = 0; i < subslicesPerSlice; i++) { EXPECT_EQ(0u, threads[i].slice); EXPECT_EQ(i, threads[i].subslice); EXPECT_EQ(0u, threads[i].eu); EXPECT_EQ(threadID, threads[i].thread); } } HWTEST_F(L0HwHelperTest, givenBitmaskWithAttentionBitsForAllEUsWhenGettingThreadsThenCorrectThreadsAreReturned) { auto hwInfo = *NEO::defaultHwInfo.get(); auto &l0HwHelper = L0::L0HwHelper::get(hwInfo.platform.eRenderCoreFamily); std::unique_ptr bitmask; size_t size = 0; const auto numEUsPerSS = hwInfo.gtSystemInfo.MaxEuPerSubSlice; uint32_t threadID = 3; std::vector threadsWithAtt; for (uint32_t euId = 0; euId < numEUsPerSS; euId++) { threadsWithAtt.push_back({0, 0, euId, threadID}); } l0HwHelper.getAttentionBitmaskForSingleThreads(threadsWithAtt, hwInfo, bitmask, size); auto threads = l0HwHelper.getThreadsFromAttentionBitmask(hwInfo, bitmask.get(), size); ASSERT_EQ(numEUsPerSS, threads.size()); for (uint32_t i = 0; i < numEUsPerSS; i++) { EXPECT_EQ(0u, threads[i].slice); EXPECT_EQ(0u, threads[i].subslice); EXPECT_EQ(i, threads[i].eu); EXPECT_EQ(threadID, threads[i].thread); } } HWTEST_F(L0HwHelperTest, givenEu0To1Threads0To3BitmaskWhenGettingThreadsThenCorrectThreadsAreReturned) { auto hwInfo = *NEO::defaultHwInfo.get(); auto &l0HwHelper = L0::L0HwHelper::get(hwInfo.platform.eRenderCoreFamily); uint8_t data[2] = {0x0f, 0x0f}; auto threads = l0HwHelper.getThreadsFromAttentionBitmask(hwInfo, data, sizeof(data)); ASSERT_EQ(8u, threads.size()); ze_device_thread_t expectedThreads[] = { {0, 0, 0, 0}, {0, 0, 0, 1}, {0, 0, 0, 2}, {0, 0, 0, 3}, {0, 0, 1, 0}, {0, 0, 1, 1}, {0, 0, 1, 2}, {0, 0, 1, 3}}; for (uint32_t i = 0; i < 8u; i++) { EXPECT_EQ(expectedThreads[i].slice, threads[i].slice); EXPECT_EQ(expectedThreads[i].subslice, threads[i].subslice); EXPECT_EQ(expectedThreads[i].eu, threads[i].eu); EXPECT_EQ(expectedThreads[i].thread, threads[i].thread); } } HWTEST_F(L0HwHelperTest, givenBitmaskWithAttentionBitsForHalfOfThreadsWhenGettingThreadsThenCorrectThreadsAreReturned) { auto hwInfo = *NEO::defaultHwInfo.get(); auto &l0HwHelper = L0::L0HwHelper::get(hwInfo.platform.eRenderCoreFamily); std::unique_ptr bitmask; size_t size = 0; uint32_t subslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported; uint32_t threadID = 3; auto numOfActiveSubslices = ((subslicesPerSlice + 1) / 2); std::vector threadsWithAtt; for (uint32_t subsliceID = 0; subsliceID < numOfActiveSubslices; subsliceID++) { threadsWithAtt.push_back({0, subsliceID, 0, threadID}); } l0HwHelper.getAttentionBitmaskForSingleThreads(threadsWithAtt, hwInfo, bitmask, size); auto bitmaskSizePerSingleSubslice = size / hwInfo.gtSystemInfo.MaxSlicesSupported / subslicesPerSlice; auto threads = l0HwHelper.getThreadsFromAttentionBitmask(hwInfo, bitmask.get(), bitmaskSizePerSingleSubslice * numOfActiveSubslices); ASSERT_EQ(numOfActiveSubslices, threads.size()); for (uint32_t i = 0; i < numOfActiveSubslices; i++) { EXPECT_EQ(0u, threads[i].slice); EXPECT_EQ(i, threads[i].subslice); EXPECT_EQ(0u, threads[i].eu); EXPECT_EQ(threadID, threads[i].thread); } } using PlatformsWithFusedEus = IsWithinGfxCore; using L0HwHelperFusedEuTest = ::testing::Test; HWTEST2_F(L0HwHelperFusedEuTest, givenBitmaskWithAttentionBitsForSingleThreadWhenGettingThreadsThenThreadForTwoEUsReturned, PlatformsWithFusedEus) { auto hwInfo = *NEO::defaultHwInfo.get(); if (hwInfo.gtSystemInfo.MaxEuPerSubSlice <= 8) { GTEST_SKIP(); } auto &l0HwHelper = L0::L0HwHelper::get(hwInfo.platform.eRenderCoreFamily); std::unique_ptr bitmask; size_t size = 0; uint32_t subslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported; uint32_t subsliceID = subslicesPerSlice > 2 ? subslicesPerSlice - 2 : 0; uint32_t threadID = 3; std::vector threadsWithAtt; threadsWithAtt.push_back({0, subsliceID, 0, threadID}); l0HwHelper.getAttentionBitmaskForSingleThreads(threadsWithAtt, hwInfo, bitmask, size); auto threads = l0HwHelper.getThreadsFromAttentionBitmask(hwInfo, bitmask.get(), size); ASSERT_EQ(2u, threads.size()); EXPECT_EQ(0u, threads[0].slice); EXPECT_EQ(subsliceID, threads[0].subslice); EXPECT_EQ(0u, threads[0].eu); EXPECT_EQ(threadID, threads[0].thread); EXPECT_EQ(0u, threads[1].slice); EXPECT_EQ(subsliceID, threads[1].subslice); EXPECT_EQ(4u, threads[1].eu); EXPECT_EQ(threadID, threads[1].thread); } HWTEST2_F(L0HwHelperFusedEuTest, givenBitmaskWithAttentionBitsForAllSubslicesWhenGettingThreadsThenCorrectThreadsForTwoEUsAreReturned, PlatformsWithFusedEus) { auto hwInfo = *NEO::defaultHwInfo.get(); if (hwInfo.gtSystemInfo.MaxEuPerSubSlice <= 8) { GTEST_SKIP(); } auto &l0HwHelper = L0::L0HwHelper::get(hwInfo.platform.eRenderCoreFamily); std::unique_ptr bitmask; size_t size = 0; uint32_t subslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported; uint32_t threadID = 0; std::vector threadsWithAtt; for (uint32_t subsliceID = 0; subsliceID < subslicesPerSlice; subsliceID++) { threadsWithAtt.push_back({0, subsliceID, 0, threadID}); } l0HwHelper.getAttentionBitmaskForSingleThreads(threadsWithAtt, hwInfo, bitmask, size); auto threads = l0HwHelper.getThreadsFromAttentionBitmask(hwInfo, bitmask.get(), size); ASSERT_EQ(2 * subslicesPerSlice, threads.size()); auto threadIndex = 0; for (uint32_t i = 0; i < subslicesPerSlice; i++) { EXPECT_EQ(0u, threads[threadIndex].slice); EXPECT_EQ(i, threads[threadIndex].subslice); EXPECT_EQ(threadID, threads[threadIndex].thread); EXPECT_EQ(0u, threads[threadIndex].eu); threadIndex++; EXPECT_EQ(threadID, threads[threadIndex].thread); EXPECT_EQ(4u, threads[threadIndex].eu); threadIndex++; } } HWTEST2_F(L0HwHelperFusedEuTest, givenBitmaskWithAttentionBitsForAllEUsWhenGettingThreadsThenCorrectThreadsAreReturned, PlatformsWithFusedEus) { auto hwInfo = *NEO::defaultHwInfo.get(); if (hwInfo.gtSystemInfo.MaxEuPerSubSlice <= 8) { GTEST_SKIP(); } auto &l0HwHelper = L0::L0HwHelper::get(hwInfo.platform.eRenderCoreFamily); std::unique_ptr bitmask; size_t size = 0; const auto maxEUsInAtt = 8u; uint32_t threadID = 3; std::vector threadsWithAtt; for (uint32_t euId = 0; euId < maxEUsInAtt; euId++) { threadsWithAtt.push_back({0, 0, euId, threadID}); } l0HwHelper.getAttentionBitmaskForSingleThreads(threadsWithAtt, hwInfo, bitmask, size); auto threads = l0HwHelper.getThreadsFromAttentionBitmask(hwInfo, bitmask.get(), size); ASSERT_EQ(maxEUsInAtt, threads.size()); uint32_t expectedEUs[] = {0, 4, 1, 5, 2, 6, 3, 7}; for (uint32_t i = 0; i < threads.size(); i++) { EXPECT_EQ(0u, threads[i].slice); EXPECT_EQ(0u, threads[i].subslice); EXPECT_EQ(expectedEUs[i], threads[i].eu); EXPECT_EQ(threadID, threads[i].thread); } } HWTEST2_F(L0HwHelperFusedEuTest, givenEu0To1Threads0To3BitmaskWhenGettingThreadsThenCorrectThreadsAreReturned, PlatformsWithFusedEus) { auto hwInfo = *NEO::defaultHwInfo.get(); if (hwInfo.gtSystemInfo.MaxEuPerSubSlice <= 8) { GTEST_SKIP(); } auto &l0HwHelper = L0::L0HwHelper::get(hwInfo.platform.eRenderCoreFamily); uint8_t data[2] = {0x0f, 0x0f}; auto threads = l0HwHelper.getThreadsFromAttentionBitmask(hwInfo, data, sizeof(data)); ASSERT_EQ(16u, threads.size()); ze_device_thread_t expectedThreads[] = { {0, 0, 0, 0}, {0, 0, 4, 0}, {0, 0, 0, 1}, {0, 0, 4, 1}, {0, 0, 0, 2}, {0, 0, 4, 2}, {0, 0, 0, 3}, {0, 0, 4, 3}, {0, 0, 1, 0}, {0, 0, 5, 0}, {0, 0, 1, 1}, {0, 0, 5, 1}, {0, 0, 1, 2}, {0, 0, 5, 2}, {0, 0, 1, 3}, {0, 0, 5, 3}}; for (uint32_t i = 0; i < 16u; i++) { EXPECT_EQ(expectedThreads[i].slice, threads[i].slice); EXPECT_EQ(expectedThreads[i].subslice, threads[i].subslice); EXPECT_EQ(expectedThreads[i].eu, threads[i].eu); EXPECT_EQ(expectedThreads[i].thread, threads[i].thread); } } HWTEST2_F(L0HwHelperFusedEuTest, givenEu8To9Threads0To3BitmaskWhenGettingThreadsThenCorrectThreadsAreReturned, PlatformsWithFusedEus) { auto hwInfo = *NEO::defaultHwInfo.get(); if (hwInfo.gtSystemInfo.MaxEuPerSubSlice <= 8) { GTEST_SKIP(); } auto &l0HwHelper = L0::L0HwHelper::get(hwInfo.platform.eRenderCoreFamily); uint8_t data[] = {0x00, 0x00, 0x00, 0x00, 0x0f, 0x0f}; auto threads = l0HwHelper.getThreadsFromAttentionBitmask(hwInfo, data, sizeof(data)); ASSERT_EQ(16u, threads.size()); ze_device_thread_t expectedThreads[] = { {0, 0, 8, 0}, {0, 0, 12, 0}, {0, 0, 8, 1}, {0, 0, 12, 1}, {0, 0, 8, 2}, {0, 0, 12, 2}, {0, 0, 8, 3}, {0, 0, 12, 3}, {0, 0, 9, 0}, {0, 0, 13, 0}, {0, 0, 9, 1}, {0, 0, 13, 1}, {0, 0, 9, 2}, {0, 0, 13, 2}, {0, 0, 9, 3}, {0, 0, 13, 3}}; for (uint32_t i = 0; i < 16u; i++) { EXPECT_EQ(expectedThreads[i].slice, threads[i].slice); EXPECT_EQ(expectedThreads[i].subslice, threads[i].subslice); EXPECT_EQ(expectedThreads[i].eu, threads[i].eu); EXPECT_EQ(expectedThreads[i].thread, threads[i].thread); } } HWTEST2_F(L0HwHelperFusedEuTest, givenBitmaskWithAttentionBitsForHalfOfThreadsWhenGettingThreadsThenCorrectThreadsAreReturned, PlatformsWithFusedEus) { auto hwInfo = *NEO::defaultHwInfo.get(); if (hwInfo.gtSystemInfo.MaxEuPerSubSlice <= 8) { GTEST_SKIP(); } auto &l0HwHelper = L0::L0HwHelper::get(hwInfo.platform.eRenderCoreFamily); std::unique_ptr bitmask; size_t size = 0; uint32_t subslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported; uint32_t threadID = 3; auto numOfActiveSubslices = ((subslicesPerSlice + 1) / 2); std::vector threadsWithAtt; for (uint32_t subsliceID = 0; subsliceID < numOfActiveSubslices; subsliceID++) { threadsWithAtt.push_back({0, subsliceID, 0, threadID}); } l0HwHelper.getAttentionBitmaskForSingleThreads(threadsWithAtt, hwInfo, bitmask, size); auto bitmaskSizePerSingleSubslice = size / hwInfo.gtSystemInfo.MaxSlicesSupported / subslicesPerSlice; auto threads = l0HwHelper.getThreadsFromAttentionBitmask(hwInfo, bitmask.get(), bitmaskSizePerSingleSubslice * numOfActiveSubslices); ASSERT_EQ(2 * numOfActiveSubslices, threads.size()); uint32_t subsliceIndex = 0; for (uint32_t i = 0; i < threads.size(); i++) { if (i % 2 == 0) { EXPECT_EQ(0u, threads[i].slice); EXPECT_EQ(subsliceIndex, threads[i].subslice); EXPECT_EQ(0u, threads[i].eu); EXPECT_EQ(threadID, threads[i].thread); } else { EXPECT_EQ(0u, threads[i].slice); EXPECT_EQ(subsliceIndex, threads[i].subslice); EXPECT_EQ(4u, threads[i].eu); EXPECT_EQ(threadID, threads[i].thread); subsliceIndex++; } } } } // namespace ult } // namespace L0properties_parser_tests.cpp000066400000000000000000000241141422164147700342130ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/helper/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/helpers/properties_parser.h" namespace L0 { namespace ult { struct ImageStaticFunctionConvertTypeTest : public testing::TestWithParam> { void SetUp() override { } void TearDown() override { } }; TEST_P(ImageStaticFunctionConvertTypeTest, givenZeImageFormatTypeWhenConvertTypeThenCorrectImageTypeReturned) { auto params = GetParam(); EXPECT_EQ(convertType(params.first), params.second); } std::pair validTypes[] = { {ZE_IMAGE_TYPE_2D, NEO::ImageType::Image2D}, {ZE_IMAGE_TYPE_3D, NEO::ImageType::Image3D}, {ZE_IMAGE_TYPE_2DARRAY, NEO::ImageType::Image2DArray}, {ZE_IMAGE_TYPE_1D, NEO::ImageType::Image1D}, {ZE_IMAGE_TYPE_1DARRAY, NEO::ImageType::Image1DArray}, {ZE_IMAGE_TYPE_BUFFER, NEO::ImageType::Image1DBuffer}}; INSTANTIATE_TEST_CASE_P( imageTypeFlags, ImageStaticFunctionConvertTypeTest, testing::ValuesIn(validTypes)); TEST(ImageStaticFunctionConvertInvalidType, givenInvalidZeImageFormatTypeWhenConvertTypeThenInvalidFormatIsRetrurned) { EXPECT_EQ(convertType(ZE_IMAGE_TYPE_FORCE_UINT32), NEO::ImageType::Invalid); } TEST(ConvertDescriptorTest, givenZeImageDescWhenConvertDescriptorThenCorrectImageDescriptorReturned) { ze_image_desc_t zeDesc = {}; zeDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; zeDesc.arraylevels = 1u; zeDesc.depth = 1u; zeDesc.height = 1u; zeDesc.width = 1u; zeDesc.miplevels = 1u; zeDesc.type = ZE_IMAGE_TYPE_2DARRAY; NEO::ImageDescriptor desc = convertDescriptor(zeDesc); EXPECT_EQ(desc.fromParent, false); EXPECT_EQ(desc.imageArraySize, zeDesc.arraylevels); EXPECT_EQ(desc.imageDepth, zeDesc.depth); EXPECT_EQ(desc.imageHeight, zeDesc.height); EXPECT_EQ(desc.imageRowPitch, 0u); EXPECT_EQ(desc.imageSlicePitch, 0u); EXPECT_EQ(desc.imageType, NEO::ImageType::Image2DArray); EXPECT_EQ(desc.imageWidth, zeDesc.width); EXPECT_EQ(desc.numMipLevels, zeDesc.miplevels); EXPECT_EQ(desc.numSamples, 0u); } TEST(L0StructuresLookupTableTests, givenL0StructuresWithFDWhenPrepareLookupTableThenProperFieldsInLookupTableAreSet) { ze_image_desc_t imageDesc = {}; imageDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; imageDesc.height = 10; imageDesc.width = 10; imageDesc.depth = 10; ze_external_memory_import_fd_t fdStructure = {}; fdStructure.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMPORT_FD; fdStructure.fd = 1; fdStructure.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF; ze_image_view_planar_exp_desc_t imageView = {}; imageView.stype = ZE_STRUCTURE_TYPE_IMAGE_VIEW_PLANAR_EXP_DESC; imageView.planeIndex = 1u; imageDesc.pNext = &fdStructure; fdStructure.pNext = &imageView; imageView.pNext = nullptr; StructuresLookupTable l0LookupTable = {}; auto result = prepareL0StructuresLookupTable(l0LookupTable, &imageDesc); EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_TRUE(l0LookupTable.isSharedHandle); EXPECT_TRUE(l0LookupTable.sharedHandleType.isSupportedHandle); EXPECT_TRUE(l0LookupTable.sharedHandleType.isDMABUFHandle); EXPECT_EQ(l0LookupTable.sharedHandleType.fd, fdStructure.fd); EXPECT_TRUE(l0LookupTable.areImageProperties); EXPECT_EQ(l0LookupTable.imageProperties.planeIndex, imageView.planeIndex); EXPECT_EQ(l0LookupTable.imageProperties.imageDescriptor.imageWidth, imageDesc.width); EXPECT_EQ(l0LookupTable.imageProperties.imageDescriptor.imageHeight, imageDesc.height); EXPECT_EQ(l0LookupTable.imageProperties.imageDescriptor.imageDepth, imageDesc.depth); } TEST(L0StructuresLookupTableTests, givenL0StructuresWithNTHandleWhenPrepareLookupTableThenProperFieldsInLookupTableAreSet) { uint64_t handle = 0x02; ze_external_memory_import_win32_handle_t importNTHandle = {}; importNTHandle.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMPORT_WIN32; importNTHandle.handle = &handle; importNTHandle.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_OPAQUE_WIN32; StructuresLookupTable l0LookupTable = {}; auto result = prepareL0StructuresLookupTable(l0LookupTable, &importNTHandle); EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_TRUE(l0LookupTable.isSharedHandle); EXPECT_TRUE(l0LookupTable.sharedHandleType.isSupportedHandle); EXPECT_TRUE(l0LookupTable.sharedHandleType.isNTHandle); EXPECT_EQ(l0LookupTable.sharedHandleType.ntHnadle, importNTHandle.handle); } TEST(L0StructuresLookupTableTests, givenL0StructuresWithSupportedExportHandlesWhenPrepareLookupTableThenProperFieldsInLookupTableAreSet) { ze_external_memory_import_win32_handle_t exportStruct = {}; exportStruct.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_DESC; exportStruct.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_OPAQUE_WIN32; StructuresLookupTable l0LookupTable = {}; auto result = prepareL0StructuresLookupTable(l0LookupTable, &exportStruct); EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_TRUE(l0LookupTable.exportMemory); exportStruct.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF; l0LookupTable = {}; result = prepareL0StructuresLookupTable(l0LookupTable, &exportStruct); EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_TRUE(l0LookupTable.exportMemory); } TEST(L0StructuresLookupTableTests, givenL0StructuresWithSupportedCompressionHintsWhenPrepareLookupTableThenProperFieldsInLookupTableAreSet) { { ze_external_memory_import_win32_handle_t exportStruct = {}; exportStruct.stype = ZE_STRUCTURE_TYPE_MEMORY_COMPRESSION_HINTS_EXT_DESC; exportStruct.flags = ZE_MEMORY_COMPRESSION_HINTS_EXT_FLAG_COMPRESSED; StructuresLookupTable l0LookupTable = {}; auto result = prepareL0StructuresLookupTable(l0LookupTable, &exportStruct); EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_TRUE(l0LookupTable.compressedHint); EXPECT_FALSE(l0LookupTable.uncompressedHint); } { ze_external_memory_import_win32_handle_t exportStruct = {}; exportStruct.stype = ZE_STRUCTURE_TYPE_MEMORY_COMPRESSION_HINTS_EXT_DESC; exportStruct.flags = ZE_MEMORY_COMPRESSION_HINTS_EXT_FLAG_UNCOMPRESSED; StructuresLookupTable l0LookupTable = {}; auto result = prepareL0StructuresLookupTable(l0LookupTable, &exportStruct); EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_FALSE(l0LookupTable.compressedHint); EXPECT_TRUE(l0LookupTable.uncompressedHint); } { ze_external_memory_import_win32_handle_t exportStruct = {}; exportStruct.stype = ZE_STRUCTURE_TYPE_MEMORY_COMPRESSION_HINTS_EXT_DESC; exportStruct.flags = ZE_MEMORY_COMPRESSION_HINTS_EXT_FLAG_COMPRESSED | ZE_MEMORY_COMPRESSION_HINTS_EXT_FLAG_UNCOMPRESSED; StructuresLookupTable l0LookupTable = {}; auto result = prepareL0StructuresLookupTable(l0LookupTable, &exportStruct); EXPECT_EQ(result, ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION); } } TEST(L0StructuresLookupTableTests, givenL0StructuresWithUnsupportedExportHandlesWhenPrepareLookupTableThenUnsuppoertedErrorIsReturned) { ze_external_memory_import_win32_handle_t exportStruct = {}; exportStruct.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_DESC; exportStruct.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_D3D11_TEXTURE; StructuresLookupTable l0LookupTable = {}; auto result = prepareL0StructuresLookupTable(l0LookupTable, &exportStruct); EXPECT_EQ(result, ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION); EXPECT_FALSE(l0LookupTable.exportMemory); exportStruct.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_OPAQUE_FD; l0LookupTable = {}; result = prepareL0StructuresLookupTable(l0LookupTable, &exportStruct); EXPECT_EQ(result, ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION); EXPECT_FALSE(l0LookupTable.exportMemory); } TEST(L0StructuresLookupTableTests, givenL0StructuresWithSupportedExportHandlesAndImageDescWhenPrepareLookupTableThenUnsupportedErrorIsReturned) { ze_image_desc_t imageDesc = {}; imageDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; ze_external_memory_import_win32_handle_t exportStruct = {}; exportStruct.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_DESC; exportStruct.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_OPAQUE_WIN32; exportStruct.pNext = &imageDesc; StructuresLookupTable l0LookupTable = {}; auto result = prepareL0StructuresLookupTable(l0LookupTable, &exportStruct); EXPECT_EQ(result, ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION); EXPECT_TRUE(l0LookupTable.exportMemory); EXPECT_TRUE(l0LookupTable.areImageProperties); } TEST(L0StructuresLookupTableTests, givenL0StructuresWithUnsuportedOptionsWhenPrepareLookupTableThenProperFieldsInLookupTableAreSet) { uint64_t handle = 0x02; ze_external_memory_import_win32_handle_t importNTHandle = {}; importNTHandle.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMPORT_WIN32; importNTHandle.handle = &handle; importNTHandle.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_OPAQUE_WIN32_KMT; StructuresLookupTable l0LookupTable = {}; auto result = prepareL0StructuresLookupTable(l0LookupTable, &importNTHandle); EXPECT_EQ(result, ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION); EXPECT_TRUE(l0LookupTable.isSharedHandle); EXPECT_FALSE(l0LookupTable.sharedHandleType.isSupportedHandle); ze_external_memory_import_fd_t fdStructure = {}; fdStructure.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMPORT_FD; fdStructure.fd = 1; fdStructure.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_OPAQUE_FD; l0LookupTable = {}; result = prepareL0StructuresLookupTable(l0LookupTable, &importNTHandle); EXPECT_EQ(result, ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION); EXPECT_TRUE(l0LookupTable.isSharedHandle); EXPECT_FALSE(l0LookupTable.sharedHandleType.isSupportedHandle); l0LookupTable = {}; ze_device_module_properties_t moduleProperties = {}; moduleProperties.stype = ZE_STRUCTURE_TYPE_DEVICE_MODULE_PROPERTIES; result = prepareL0StructuresLookupTable(l0LookupTable, &importNTHandle); EXPECT_EQ(result, ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/image/000077500000000000000000000000001422164147700263755ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/image/CMakeLists.txt000066400000000000000000000003511422164147700311340ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_image.cpp ) compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/image/test_image.cpp000066400000000000000000001417421422164147700312330ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/resource_info.h" #include "shared/source/helpers/surface_format_info.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_gmm_client_context.h" #include "shared/test/common/mocks/mock_sip.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/hw_helpers/l0_hw_helper.h" #include "level_zero/core/source/image/image_format_desc_helper.h" #include "level_zero/core/source/image/image_formats.h" #include "level_zero/core/source/image/image_hw.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_device.h" #include "third_party/opencl_headers/CL/cl_ext_intel.h" namespace L0 { namespace ult { using ImageCreate = Test; using ImageView = Test; HWTEST2_F(ImageCreate, givenValidImageDescriptionWhenImageCreateThenImageIsCreatedCorrectly, IsAtLeastSkl) { ze_image_desc_t zeDesc = {}; zeDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; zeDesc.arraylevels = 1u; zeDesc.depth = 1u; zeDesc.height = 1u; zeDesc.width = 1u; zeDesc.miplevels = 1u; zeDesc.type = ZE_IMAGE_TYPE_2DARRAY; zeDesc.flags = ZE_IMAGE_FLAG_BIAS_UNCACHED; zeDesc.format = {ZE_IMAGE_FORMAT_LAYOUT_32, ZE_IMAGE_FORMAT_TYPE_UINT, ZE_IMAGE_FORMAT_SWIZZLE_R, ZE_IMAGE_FORMAT_SWIZZLE_G, ZE_IMAGE_FORMAT_SWIZZLE_B, ZE_IMAGE_FORMAT_SWIZZLE_A}; Image *image_ptr; auto result = Image::create(productFamily, device, &zeDesc, &image_ptr); EXPECT_EQ(result, ZE_RESULT_SUCCESS); std::unique_ptr image(image_ptr); ASSERT_NE(image, nullptr); auto imageInfo = image->getImageInfo(); EXPECT_EQ(imageInfo.imgDesc.fromParent, false); EXPECT_EQ(imageInfo.imgDesc.imageArraySize, zeDesc.arraylevels); EXPECT_EQ(imageInfo.imgDesc.imageDepth, zeDesc.depth); EXPECT_EQ(imageInfo.imgDesc.imageHeight, zeDesc.height); EXPECT_EQ(imageInfo.imgDesc.imageType, NEO::ImageType::Image2DArray); EXPECT_EQ(imageInfo.imgDesc.imageWidth, zeDesc.width); EXPECT_EQ(imageInfo.imgDesc.numMipLevels, zeDesc.miplevels); EXPECT_EQ(imageInfo.imgDesc.numSamples, 0u); EXPECT_EQ(imageInfo.baseMipLevel, 0u); EXPECT_EQ(imageInfo.linearStorage, false); EXPECT_EQ(imageInfo.mipCount, 0u); EXPECT_EQ(imageInfo.plane, GMM_NO_PLANE); EXPECT_EQ(imageInfo.useLocalMemory, false); } HWTEST2_F(ImageCreate, givenValidImageDescriptionWhenImageCreateWithUnsupportedImageThenNullPtrImageIsReturned, IsAtLeastSkl) { ze_image_desc_t zeDesc = {}; zeDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; zeDesc.arraylevels = 1u; zeDesc.depth = 1u; zeDesc.height = 1u; zeDesc.width = 1u; zeDesc.miplevels = 1u; zeDesc.type = ZE_IMAGE_TYPE_2DARRAY; zeDesc.flags = ZE_IMAGE_FLAG_BIAS_UNCACHED; zeDesc.format = {ZE_IMAGE_FORMAT_LAYOUT_P216}; Image *image_ptr; auto result = Image::create(productFamily, device, &zeDesc, &image_ptr); ASSERT_EQ(result, ZE_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT); ASSERT_EQ(image_ptr, nullptr); } class TestImageFormats : public DeviceFixture, public testing::TestWithParam> { public: void SetUp() override { DeviceFixture::SetUp(); } void TearDown() override { DeviceFixture::TearDown(); } }; template struct WhiteBox<::L0::ImageCoreFamily> : public ::L0::ImageCoreFamily { using BaseClass = ::L0::ImageCoreFamily; using BaseClass::redescribedSurfaceState; using BaseClass::surfaceState; }; HWTEST2_F(ImageCreate, givenDifferentSwizzleFormatWhenImageInitializeThenCorrectSwizzleInRSSIsSet, IsAtLeastSkl) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; ze_image_desc_t desc = {}; desc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; desc.type = ZE_IMAGE_TYPE_3D; desc.format.layout = ZE_IMAGE_FORMAT_LAYOUT_8_8_8_8; desc.format.type = ZE_IMAGE_FORMAT_TYPE_UINT; desc.width = 11; desc.height = 13; desc.depth = 17; desc.format.x = ZE_IMAGE_FORMAT_SWIZZLE_A; desc.format.y = ZE_IMAGE_FORMAT_SWIZZLE_0; desc.format.z = ZE_IMAGE_FORMAT_SWIZZLE_1; desc.format.w = ZE_IMAGE_FORMAT_SWIZZLE_X; auto imageHW = std::make_unique>>(); auto ret = imageHW->initialize(device, &desc); ASSERT_EQ(ZE_RESULT_SUCCESS, ret); auto surfaceState = &imageHW->surfaceState; ASSERT_EQ(surfaceState->getShaderChannelSelectRed(), RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA); ASSERT_EQ(surfaceState->getShaderChannelSelectGreen(), RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO); ASSERT_EQ(surfaceState->getShaderChannelSelectBlue(), RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ONE); ASSERT_EQ(surfaceState->getShaderChannelSelectAlpha(), RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO); } HWTEST2_F(ImageView, givenPlanarImageWhenCreateImageViewThenProperPlaneIsCreated, IsAtLeastSkl) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; const size_t width = 32; const size_t height = 32; const size_t depth = 1; ze_image_desc_t srcImgDesc = {ZE_STRUCTURE_TYPE_IMAGE_DESC, nullptr, (ZE_IMAGE_FLAG_KERNEL_WRITE | ZE_IMAGE_FLAG_BIAS_UNCACHED), ZE_IMAGE_TYPE_2D, {ZE_IMAGE_FORMAT_LAYOUT_NV12, ZE_IMAGE_FORMAT_TYPE_UINT, ZE_IMAGE_FORMAT_SWIZZLE_R, ZE_IMAGE_FORMAT_SWIZZLE_G, ZE_IMAGE_FORMAT_SWIZZLE_B, ZE_IMAGE_FORMAT_SWIZZLE_A}, width, height, depth, 0, 0}; auto imageHW = std::make_unique>>(); auto ret = imageHW->initialize(device, &srcImgDesc); ASSERT_EQ(ZE_RESULT_SUCCESS, ret); ze_image_view_planar_exp_desc_t planeYdesc = {}; planeYdesc.stype = ZE_STRUCTURE_TYPE_IMAGE_VIEW_PLANAR_EXP_DESC; planeYdesc.planeIndex = 0u; // Y plane ze_image_desc_t imageViewDescPlaneY = {ZE_STRUCTURE_TYPE_IMAGE_DESC, &planeYdesc, (ZE_IMAGE_FLAG_KERNEL_WRITE | ZE_IMAGE_FLAG_BIAS_UNCACHED), ZE_IMAGE_TYPE_2D, {ZE_IMAGE_FORMAT_LAYOUT_8, ZE_IMAGE_FORMAT_TYPE_UINT, ZE_IMAGE_FORMAT_SWIZZLE_A, ZE_IMAGE_FORMAT_SWIZZLE_B, ZE_IMAGE_FORMAT_SWIZZLE_G, ZE_IMAGE_FORMAT_SWIZZLE_R}, width, height, depth, 0, 0}; ze_image_handle_t planeY; ret = imageHW->createView(device, &imageViewDescPlaneY, &planeY); ASSERT_EQ(ZE_RESULT_SUCCESS, ret); ze_image_view_planar_exp_desc_t planeUVdesc = {}; planeUVdesc.stype = ZE_STRUCTURE_TYPE_IMAGE_VIEW_PLANAR_EXP_DESC; planeUVdesc.planeIndex = 1u; // UV plane ze_image_desc_t imageViewDescPlaneUV = {ZE_STRUCTURE_TYPE_IMAGE_DESC, &planeUVdesc, (ZE_IMAGE_FLAG_KERNEL_WRITE | ZE_IMAGE_FLAG_BIAS_UNCACHED), ZE_IMAGE_TYPE_2D, {ZE_IMAGE_FORMAT_LAYOUT_8_8, ZE_IMAGE_FORMAT_TYPE_UINT, ZE_IMAGE_FORMAT_SWIZZLE_A, ZE_IMAGE_FORMAT_SWIZZLE_B, ZE_IMAGE_FORMAT_SWIZZLE_G, ZE_IMAGE_FORMAT_SWIZZLE_R}, width / 2, height / 2, depth, 0, 0}; ze_image_handle_t planeUV; ret = imageHW->createView(device, &imageViewDescPlaneUV, &planeUV); ASSERT_EQ(ZE_RESULT_SUCCESS, ret); auto nv12Allocation = imageHW->getAllocation(); auto planeYAllocation = Image::fromHandle(planeY)->getAllocation(); auto planeUVAllocation = Image::fromHandle(planeUV)->getAllocation(); EXPECT_EQ(nv12Allocation->getGpuBaseAddress(), planeYAllocation->getGpuBaseAddress()); EXPECT_EQ(nv12Allocation->getGpuBaseAddress(), planeUVAllocation->getGpuBaseAddress()); zeImageDestroy(planeY); zeImageDestroy(planeUV); } HWTEST2_F(ImageView, givenPlanarImageWhenCreateImageWithInvalidStructViewThenProperErrorIsReturned, IsAtLeastSkl) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; const size_t width = 32; const size_t height = 32; const size_t depth = 1; ze_image_desc_t srcImgDesc = {ZE_STRUCTURE_TYPE_IMAGE_DESC, nullptr, (ZE_IMAGE_FLAG_KERNEL_WRITE | ZE_IMAGE_FLAG_BIAS_UNCACHED), ZE_IMAGE_TYPE_2D, {ZE_IMAGE_FORMAT_LAYOUT_NV12, ZE_IMAGE_FORMAT_TYPE_UINT, ZE_IMAGE_FORMAT_SWIZZLE_R, ZE_IMAGE_FORMAT_SWIZZLE_G, ZE_IMAGE_FORMAT_SWIZZLE_B, ZE_IMAGE_FORMAT_SWIZZLE_A}, width, height, depth, 0, 0}; auto imageHW = std::make_unique>>(); auto ret = imageHW->initialize(device, &srcImgDesc); ASSERT_EQ(ZE_RESULT_SUCCESS, ret); ze_image_view_planar_exp_desc_t planeYdesc = {}; planeYdesc.stype = ZE_STRUCTURE_TYPE_DEVICE_CACHE_PROPERTIES; planeYdesc.planeIndex = 0u; // Y plane ze_image_desc_t imageViewDescPlaneY = {ZE_STRUCTURE_TYPE_IMAGE_DESC, &planeYdesc, (ZE_IMAGE_FLAG_KERNEL_WRITE | ZE_IMAGE_FLAG_BIAS_UNCACHED), ZE_IMAGE_TYPE_2D, {ZE_IMAGE_FORMAT_LAYOUT_8, ZE_IMAGE_FORMAT_TYPE_UINT, ZE_IMAGE_FORMAT_SWIZZLE_A, ZE_IMAGE_FORMAT_SWIZZLE_B, ZE_IMAGE_FORMAT_SWIZZLE_G, ZE_IMAGE_FORMAT_SWIZZLE_R}, width, height, depth, 0, 0}; ze_image_handle_t planeY; ret = imageHW->createView(device, &imageViewDescPlaneY, &planeY); ASSERT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION, ret); } HWTEST2_F(ImageCreate, givenFDWhenCreatingImageThenSuccessIsReturned, IsAtLeastSkl) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; ze_image_desc_t desc = {}; desc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; desc.type = ZE_IMAGE_TYPE_3D; desc.format.layout = ZE_IMAGE_FORMAT_LAYOUT_8_8_8_8; desc.format.type = ZE_IMAGE_FORMAT_TYPE_UINT; desc.width = 11; desc.height = 13; desc.depth = 17; desc.format.x = ZE_IMAGE_FORMAT_SWIZZLE_A; desc.format.y = ZE_IMAGE_FORMAT_SWIZZLE_0; desc.format.z = ZE_IMAGE_FORMAT_SWIZZLE_1; desc.format.w = ZE_IMAGE_FORMAT_SWIZZLE_X; ze_external_memory_import_fd_t importFd = {}; importFd.fd = 1; importFd.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF; importFd.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMPORT_FD; desc.pNext = &importFd; auto imageHW = std::make_unique>>(); auto ret = imageHW->initialize(device, &desc); ASSERT_EQ(ZE_RESULT_SUCCESS, ret); ASSERT_EQ(static_cast(imageHW->getAllocation()->peekSharedHandle()), importFd.fd); } HWTEST2_F(ImageCreate, givenOpaqueFdWhenCreatingImageThenUnsuportedErrorIsReturned, IsAtLeastSkl) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; ze_image_desc_t desc = {}; desc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; desc.type = ZE_IMAGE_TYPE_3D; desc.format.layout = ZE_IMAGE_FORMAT_LAYOUT_8_8_8_8; desc.format.type = ZE_IMAGE_FORMAT_TYPE_UINT; desc.width = 11; desc.height = 13; desc.depth = 17; desc.format.x = ZE_IMAGE_FORMAT_SWIZZLE_A; desc.format.y = ZE_IMAGE_FORMAT_SWIZZLE_0; desc.format.z = ZE_IMAGE_FORMAT_SWIZZLE_1; desc.format.w = ZE_IMAGE_FORMAT_SWIZZLE_X; ze_external_memory_import_fd_t importFd = {}; importFd.fd = 1; importFd.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_OPAQUE_FD; importFd.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMPORT_FD; desc.pNext = &importFd; auto imageHW = std::make_unique>>(); auto ret = imageHW->initialize(device, &desc); ASSERT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION, ret); } HWTEST2_F(ImageCreate, givenExportStructWhenCreatingImageThenUnsupportedErrorIsReturned, IsAtLeastSkl) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; ze_image_desc_t desc = {}; desc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; desc.type = ZE_IMAGE_TYPE_3D; desc.format.layout = ZE_IMAGE_FORMAT_LAYOUT_8_8_8_8; desc.format.type = ZE_IMAGE_FORMAT_TYPE_UINT; desc.width = 11; desc.height = 13; desc.depth = 17; desc.format.x = ZE_IMAGE_FORMAT_SWIZZLE_A; desc.format.y = ZE_IMAGE_FORMAT_SWIZZLE_0; desc.format.z = ZE_IMAGE_FORMAT_SWIZZLE_1; desc.format.w = ZE_IMAGE_FORMAT_SWIZZLE_X; ze_external_memory_export_fd_t exportFd = {}; exportFd.fd = 1; exportFd.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_DESC; exportFd.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_OPAQUE_WIN32; desc.pNext = &exportFd; auto imageHW = std::make_unique>>(); auto ret = imageHW->initialize(device, &desc); ASSERT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION, ret); } class MemoryManagerNTHandleMock : public NEO::OsAgnosticMemoryManager { public: MemoryManagerNTHandleMock(NEO::ExecutionEnvironment &executionEnvironment) : NEO::OsAgnosticMemoryManager(executionEnvironment) {} NEO::GraphicsAllocation *createGraphicsAllocationFromNTHandle(void *handle, uint32_t rootDeviceIndex, AllocationType allocType) override { auto graphicsAllocation = createMemoryAllocation(AllocationType::INTERNAL_HOST_MEMORY, nullptr, reinterpret_cast(1), 1, 4096u, reinterpret_cast(handle), MemoryPool::SystemCpuInaccessible, rootDeviceIndex, false, false, false); graphicsAllocation->setSharedHandle(static_cast(reinterpret_cast(handle))); graphicsAllocation->set32BitAllocation(false); graphicsAllocation->setDefaultGmm(new Gmm(executionEnvironment.rootDeviceEnvironments[0]->getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); return graphicsAllocation; } }; HWTEST2_F(ImageCreate, givenNTHandleWhenCreatingImageThenSuccessIsReturned, IsAtLeastSkl) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; ze_image_desc_t desc = {}; desc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; desc.type = ZE_IMAGE_TYPE_3D; desc.format.layout = ZE_IMAGE_FORMAT_LAYOUT_8_8_8_8; desc.format.type = ZE_IMAGE_FORMAT_TYPE_UINT; desc.width = 11; desc.height = 13; desc.depth = 17; desc.format.x = ZE_IMAGE_FORMAT_SWIZZLE_A; desc.format.y = ZE_IMAGE_FORMAT_SWIZZLE_0; desc.format.z = ZE_IMAGE_FORMAT_SWIZZLE_1; desc.format.w = ZE_IMAGE_FORMAT_SWIZZLE_X; uint64_t imageHandle = 0x1; ze_external_memory_import_win32_handle_t importNTHandle = {}; importNTHandle.handle = &imageHandle; importNTHandle.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_OPAQUE_WIN32; importNTHandle.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMPORT_WIN32; desc.pNext = &importNTHandle; NEO::MockDevice *neoDevice = nullptr; auto executionEnvironment = NEO::MockDevice::prepareExecutionEnvironment(NEO::defaultHwInfo.get(), 0); executionEnvironment->memoryManager.reset(new MemoryManagerNTHandleMock(*executionEnvironment)); neoDevice = NEO::MockDevice::createWithExecutionEnvironment(NEO::defaultHwInfo.get(), executionEnvironment, 0); driverHandle->setMemoryManager(executionEnvironment->memoryManager.get()); ze_result_t result = ZE_RESULT_SUCCESS; auto device = L0::Device::create(driverHandle.get(), neoDevice, false, &result); auto imageHW = std::make_unique>>(); auto ret = imageHW->initialize(device, &desc); ASSERT_EQ(ZE_RESULT_SUCCESS, ret); ASSERT_EQ(imageHW->getAllocation()->peekSharedHandle(), NEO::toOsHandle(importNTHandle.handle)); imageHW.reset(nullptr); delete device; } class FailMemoryManagerMock : public NEO::OsAgnosticMemoryManager { public: FailMemoryManagerMock(NEO::ExecutionEnvironment &executionEnvironment) : NEO::OsAgnosticMemoryManager(executionEnvironment) {} NEO::GraphicsAllocation *allocateGraphicsMemoryWithProperties(const AllocationProperties &properties) override { if (fail) { return nullptr; } return OsAgnosticMemoryManager::allocateGraphicsMemoryWithProperties(properties); } bool fail = false; }; HWTEST2_F(ImageCreate, givenImageDescWhenFailImageAllocationThenProperErrorIsReturned, IsAtLeastSkl) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; VariableBackup backupSipInitType{&MockSipData::useMockSip}; ze_image_desc_t desc = {}; desc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; desc.type = ZE_IMAGE_TYPE_3D; desc.format.layout = ZE_IMAGE_FORMAT_LAYOUT_8_8_8_8; desc.format.type = ZE_IMAGE_FORMAT_TYPE_UINT; desc.width = 11; desc.height = 13; desc.depth = 17; desc.format.x = ZE_IMAGE_FORMAT_SWIZZLE_A; desc.format.y = ZE_IMAGE_FORMAT_SWIZZLE_0; desc.format.z = ZE_IMAGE_FORMAT_SWIZZLE_1; desc.format.w = ZE_IMAGE_FORMAT_SWIZZLE_X; auto isHexadecimalArrayPrefered = NEO::HwHelper::get(NEO::defaultHwInfo->platform.eRenderCoreFamily).isSipKernelAsHexadecimalArrayPreferred(); if (isHexadecimalArrayPrefered) { backupSipInitType = true; } NEO::MockDevice *neoDevice = nullptr; auto executionEnvironment = NEO::MockDevice::prepareExecutionEnvironment(NEO::defaultHwInfo.get(), 0); auto failMemMngr = new FailMemoryManagerMock(*executionEnvironment); executionEnvironment->memoryManager.reset(failMemMngr); neoDevice = NEO::MockDevice::createWithExecutionEnvironment(NEO::defaultHwInfo.get(), executionEnvironment, 0); driverHandle->setMemoryManager(executionEnvironment->memoryManager.get()); ze_result_t result = ZE_RESULT_SUCCESS; auto device = L0::Device::create(driverHandle.get(), neoDevice, false, &result); L0::Image *imageHandle = nullptr; failMemMngr->fail = true; auto ret = L0::Image::create(neoDevice->getHardwareInfo().platform.eProductFamily, device, &desc, &imageHandle); ASSERT_EQ(ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY, ret); EXPECT_EQ(imageHandle, nullptr); delete device; } HWTEST2_F(ImageCreate, givenMediaBlockOptionWhenCopySurfaceStateThenSurfaceStateIsSet, IsAtLeastSkl) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; ze_image_desc_t desc = {}; desc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; desc.type = ZE_IMAGE_TYPE_3D; desc.format.layout = ZE_IMAGE_FORMAT_LAYOUT_8_8_8_8; desc.format.type = ZE_IMAGE_FORMAT_TYPE_UINT; desc.width = 11; desc.height = 13; desc.depth = 17; desc.format.x = ZE_IMAGE_FORMAT_SWIZZLE_A; desc.format.y = ZE_IMAGE_FORMAT_SWIZZLE_0; desc.format.z = ZE_IMAGE_FORMAT_SWIZZLE_1; desc.format.w = ZE_IMAGE_FORMAT_SWIZZLE_X; auto imageHW = std::make_unique>>(); auto ret = imageHW->initialize(device, &desc); ASSERT_EQ(ZE_RESULT_SUCCESS, ret); auto surfaceState = &imageHW->surfaceState; RENDER_SURFACE_STATE rss = {}; imageHW->copySurfaceStateToSSH(&rss, 0u, true); EXPECT_EQ(surfaceState->getWidth(), (static_cast(imageHW->getImageInfo().surfaceFormat->ImageElementSizeInBytes) * static_cast(imageHW->getImageInfo().imgDesc.imageWidth)) / sizeof(uint32_t)); } HWTEST2_P(TestImageFormats, givenValidLayoutAndTypeWhenCreateImageCoreFamilyThenValidImageIsCreated, IsAtLeastSkl) { auto params = GetParam(); ze_image_desc_t zeDesc = {}; zeDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; zeDesc.arraylevels = 1u; zeDesc.depth = 10u; zeDesc.height = 10u; zeDesc.width = 10u; zeDesc.miplevels = 1u; zeDesc.type = ZE_IMAGE_TYPE_2D; zeDesc.flags = ZE_IMAGE_FLAG_KERNEL_WRITE; zeDesc.format = {}; zeDesc.format.layout = params.first; zeDesc.format.type = params.second; zeDesc.format.x = ZE_IMAGE_FORMAT_SWIZZLE_R; zeDesc.format.y = ZE_IMAGE_FORMAT_SWIZZLE_G; zeDesc.format.z = ZE_IMAGE_FORMAT_SWIZZLE_B; zeDesc.format.w = ZE_IMAGE_FORMAT_SWIZZLE_A; auto imageHW = std::make_unique>>(); imageHW->initialize(device, &zeDesc); EXPECT_EQ(imageHW->getAllocation()->getAllocationType(), NEO::AllocationType::IMAGE); auto RSS = imageHW->surfaceState; EXPECT_EQ(RSS.getSurfaceType(), FamilyType::RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_2D); EXPECT_EQ(RSS.getAuxiliarySurfaceMode(), FamilyType::RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE); EXPECT_EQ(RSS.getRenderTargetViewExtent(), 1u); auto hAlign = static_cast(imageHW->getAllocation()->getDefaultGmm()->gmmResourceInfo->getHAlignSurfaceState()); auto vAlign = static_cast(imageHW->getAllocation()->getDefaultGmm()->gmmResourceInfo->getVAlignSurfaceState()); EXPECT_EQ(RSS.getSurfaceHorizontalAlignment(), hAlign); EXPECT_EQ(RSS.getSurfaceVerticalAlignment(), vAlign); auto isMediaFormatLayout = imageHW->isMediaFormat(params.first); if (isMediaFormatLayout) { auto imgInfo = imageHW->getImageInfo(); EXPECT_EQ(RSS.getShaderChannelSelectAlpha(), FamilyType::RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO); EXPECT_EQ(RSS.getYOffsetForUOrUvPlane(), imgInfo.yOffsetForUVPlane); EXPECT_EQ(RSS.getXOffsetForUOrUvPlane(), imgInfo.xOffset); } else { EXPECT_EQ(RSS.getYOffsetForUOrUvPlane(), 0u); EXPECT_EQ(RSS.getXOffsetForUOrUvPlane(), 0u); } EXPECT_EQ(RSS.getSurfaceMinLod(), 0u); EXPECT_EQ(RSS.getMipCountLod(), 0u); if (!isMediaFormatLayout) { EXPECT_EQ(RSS.getShaderChannelSelectRed(), FamilyType::RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT::SHADER_CHANNEL_SELECT_RED); EXPECT_EQ(RSS.getShaderChannelSelectGreen(), FamilyType::RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT::SHADER_CHANNEL_SELECT_GREEN); EXPECT_EQ(RSS.getShaderChannelSelectBlue(), FamilyType::RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT::SHADER_CHANNEL_SELECT_BLUE); EXPECT_EQ(RSS.getShaderChannelSelectAlpha(), FamilyType::RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT::SHADER_CHANNEL_SELECT_ALPHA); } else { EXPECT_EQ(RSS.getShaderChannelSelectRed(), FamilyType::RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT::SHADER_CHANNEL_SELECT_RED); EXPECT_EQ(RSS.getShaderChannelSelectGreen(), FamilyType::RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT::SHADER_CHANNEL_SELECT_GREEN); EXPECT_EQ(RSS.getShaderChannelSelectBlue(), FamilyType::RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT::SHADER_CHANNEL_SELECT_BLUE); } EXPECT_EQ(RSS.getNumberOfMultisamples(), FamilyType::RENDER_SURFACE_STATE::NUMBER_OF_MULTISAMPLES::NUMBER_OF_MULTISAMPLES_MULTISAMPLECOUNT_1); } std::pair validFormats[] = { {ZE_IMAGE_FORMAT_LAYOUT_8, ZE_IMAGE_FORMAT_TYPE_UINT}, {ZE_IMAGE_FORMAT_LAYOUT_8, ZE_IMAGE_FORMAT_TYPE_SINT}, {ZE_IMAGE_FORMAT_LAYOUT_8, ZE_IMAGE_FORMAT_TYPE_UNORM}, {ZE_IMAGE_FORMAT_LAYOUT_8, ZE_IMAGE_FORMAT_TYPE_SNORM}, {ZE_IMAGE_FORMAT_LAYOUT_16, ZE_IMAGE_FORMAT_TYPE_UINT}, {ZE_IMAGE_FORMAT_LAYOUT_16, ZE_IMAGE_FORMAT_TYPE_SINT}, {ZE_IMAGE_FORMAT_LAYOUT_16, ZE_IMAGE_FORMAT_TYPE_UNORM}, {ZE_IMAGE_FORMAT_LAYOUT_16, ZE_IMAGE_FORMAT_TYPE_SNORM}, {ZE_IMAGE_FORMAT_LAYOUT_16, ZE_IMAGE_FORMAT_TYPE_FLOAT}, {ZE_IMAGE_FORMAT_LAYOUT_32, ZE_IMAGE_FORMAT_TYPE_UINT}, {ZE_IMAGE_FORMAT_LAYOUT_32, ZE_IMAGE_FORMAT_TYPE_SINT}, {ZE_IMAGE_FORMAT_LAYOUT_32, ZE_IMAGE_FORMAT_TYPE_UNORM}, {ZE_IMAGE_FORMAT_LAYOUT_32, ZE_IMAGE_FORMAT_TYPE_SNORM}, {ZE_IMAGE_FORMAT_LAYOUT_32, ZE_IMAGE_FORMAT_TYPE_FLOAT}, {ZE_IMAGE_FORMAT_LAYOUT_8_8, ZE_IMAGE_FORMAT_TYPE_UINT}, {ZE_IMAGE_FORMAT_LAYOUT_8_8, ZE_IMAGE_FORMAT_TYPE_SINT}, {ZE_IMAGE_FORMAT_LAYOUT_8_8, ZE_IMAGE_FORMAT_TYPE_UNORM}, {ZE_IMAGE_FORMAT_LAYOUT_8_8, ZE_IMAGE_FORMAT_TYPE_SNORM}, {ZE_IMAGE_FORMAT_LAYOUT_8_8_8_8, ZE_IMAGE_FORMAT_TYPE_UINT}, {ZE_IMAGE_FORMAT_LAYOUT_8_8_8_8, ZE_IMAGE_FORMAT_TYPE_SINT}, {ZE_IMAGE_FORMAT_LAYOUT_8_8_8_8, ZE_IMAGE_FORMAT_TYPE_UNORM}, {ZE_IMAGE_FORMAT_LAYOUT_8_8_8_8, ZE_IMAGE_FORMAT_TYPE_SNORM}, {ZE_IMAGE_FORMAT_LAYOUT_16_16, ZE_IMAGE_FORMAT_TYPE_UINT}, {ZE_IMAGE_FORMAT_LAYOUT_16_16, ZE_IMAGE_FORMAT_TYPE_SINT}, {ZE_IMAGE_FORMAT_LAYOUT_16_16, ZE_IMAGE_FORMAT_TYPE_UNORM}, {ZE_IMAGE_FORMAT_LAYOUT_16_16, ZE_IMAGE_FORMAT_TYPE_SNORM}, {ZE_IMAGE_FORMAT_LAYOUT_16_16, ZE_IMAGE_FORMAT_TYPE_FLOAT}, {ZE_IMAGE_FORMAT_LAYOUT_16_16_16_16, ZE_IMAGE_FORMAT_TYPE_UINT}, {ZE_IMAGE_FORMAT_LAYOUT_16_16_16_16, ZE_IMAGE_FORMAT_TYPE_SINT}, {ZE_IMAGE_FORMAT_LAYOUT_16_16_16_16, ZE_IMAGE_FORMAT_TYPE_UNORM}, {ZE_IMAGE_FORMAT_LAYOUT_16_16_16_16, ZE_IMAGE_FORMAT_TYPE_SNORM}, {ZE_IMAGE_FORMAT_LAYOUT_16_16_16_16, ZE_IMAGE_FORMAT_TYPE_FLOAT}, {ZE_IMAGE_FORMAT_LAYOUT_32_32, ZE_IMAGE_FORMAT_TYPE_UINT}, {ZE_IMAGE_FORMAT_LAYOUT_32_32, ZE_IMAGE_FORMAT_TYPE_SINT}, {ZE_IMAGE_FORMAT_LAYOUT_32_32, ZE_IMAGE_FORMAT_TYPE_UNORM}, {ZE_IMAGE_FORMAT_LAYOUT_32_32, ZE_IMAGE_FORMAT_TYPE_SNORM}, {ZE_IMAGE_FORMAT_LAYOUT_32_32, ZE_IMAGE_FORMAT_TYPE_FLOAT}, {ZE_IMAGE_FORMAT_LAYOUT_32_32_32_32, ZE_IMAGE_FORMAT_TYPE_UINT}, {ZE_IMAGE_FORMAT_LAYOUT_32_32_32_32, ZE_IMAGE_FORMAT_TYPE_SINT}, {ZE_IMAGE_FORMAT_LAYOUT_32_32_32_32, ZE_IMAGE_FORMAT_TYPE_UNORM}, {ZE_IMAGE_FORMAT_LAYOUT_32_32_32_32, ZE_IMAGE_FORMAT_TYPE_SNORM}, {ZE_IMAGE_FORMAT_LAYOUT_32_32_32_32, ZE_IMAGE_FORMAT_TYPE_FLOAT}, {ZE_IMAGE_FORMAT_LAYOUT_10_10_10_2, ZE_IMAGE_FORMAT_TYPE_UINT}, {ZE_IMAGE_FORMAT_LAYOUT_10_10_10_2, ZE_IMAGE_FORMAT_TYPE_SINT}, {ZE_IMAGE_FORMAT_LAYOUT_10_10_10_2, ZE_IMAGE_FORMAT_TYPE_UNORM}, {ZE_IMAGE_FORMAT_LAYOUT_10_10_10_2, ZE_IMAGE_FORMAT_TYPE_SNORM}, {ZE_IMAGE_FORMAT_LAYOUT_11_11_10, ZE_IMAGE_FORMAT_TYPE_FLOAT}, {ZE_IMAGE_FORMAT_LAYOUT_5_6_5, ZE_IMAGE_FORMAT_TYPE_UNORM}, {ZE_IMAGE_FORMAT_LAYOUT_5_5_5_1, ZE_IMAGE_FORMAT_TYPE_UNORM}, {ZE_IMAGE_FORMAT_LAYOUT_4_4_4_4, ZE_IMAGE_FORMAT_TYPE_UNORM}, {ZE_IMAGE_FORMAT_LAYOUT_Y8, ZE_IMAGE_FORMAT_TYPE_UNORM}, {ZE_IMAGE_FORMAT_LAYOUT_NV12, ZE_IMAGE_FORMAT_TYPE_UNORM}, {ZE_IMAGE_FORMAT_LAYOUT_YUYV, ZE_IMAGE_FORMAT_TYPE_UNORM}, {ZE_IMAGE_FORMAT_LAYOUT_VYUY, ZE_IMAGE_FORMAT_TYPE_UNORM}, {ZE_IMAGE_FORMAT_LAYOUT_YVYU, ZE_IMAGE_FORMAT_TYPE_UNORM}, {ZE_IMAGE_FORMAT_LAYOUT_UYVY, ZE_IMAGE_FORMAT_TYPE_UNORM}, {ZE_IMAGE_FORMAT_LAYOUT_AYUV, ZE_IMAGE_FORMAT_TYPE_UNORM}, {ZE_IMAGE_FORMAT_LAYOUT_P010, ZE_IMAGE_FORMAT_TYPE_UNORM}, {ZE_IMAGE_FORMAT_LAYOUT_Y410, ZE_IMAGE_FORMAT_TYPE_UNORM}, {ZE_IMAGE_FORMAT_LAYOUT_P012, ZE_IMAGE_FORMAT_TYPE_UNORM}, {ZE_IMAGE_FORMAT_LAYOUT_Y216, ZE_IMAGE_FORMAT_TYPE_UNORM}, {ZE_IMAGE_FORMAT_LAYOUT_P016, ZE_IMAGE_FORMAT_TYPE_UNORM}, {ZE_IMAGE_FORMAT_LAYOUT_RGBP, ZE_IMAGE_FORMAT_TYPE_UNORM}, {ZE_IMAGE_FORMAT_LAYOUT_BRGP, ZE_IMAGE_FORMAT_TYPE_UNORM}}; INSTANTIATE_TEST_CASE_P( validImageFormats, TestImageFormats, testing::ValuesIn(validFormats)); TEST(ImageFormatDescHelperTest, givenUnsupportedImageFormatLayoutAndTypeThenProperClEnumIsReturned) { auto invalid = static_cast(CL_INVALID_VALUE); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_8, ZE_IMAGE_FORMAT_TYPE_FLOAT}), invalid); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_8_8, ZE_IMAGE_FORMAT_TYPE_FLOAT}), invalid); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_8_8_8_8, ZE_IMAGE_FORMAT_TYPE_FLOAT}), invalid); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_32, ZE_IMAGE_FORMAT_TYPE_UNORM}), invalid); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_10_10_10_2, ZE_IMAGE_FORMAT_TYPE_FLOAT}), invalid); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_11_11_10, ZE_IMAGE_FORMAT_TYPE_UINT}), invalid); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_11_11_10, ZE_IMAGE_FORMAT_TYPE_SINT}), invalid); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_11_11_10, ZE_IMAGE_FORMAT_TYPE_UNORM}), invalid); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_11_11_10, ZE_IMAGE_FORMAT_TYPE_SNORM}), invalid); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_5_6_5, ZE_IMAGE_FORMAT_TYPE_UINT}), invalid); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_5_6_5, ZE_IMAGE_FORMAT_TYPE_SINT}), invalid); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_5_6_5, ZE_IMAGE_FORMAT_TYPE_SNORM}), invalid); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_5_6_5, ZE_IMAGE_FORMAT_TYPE_FLOAT}), invalid); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_5_5_5_1, ZE_IMAGE_FORMAT_TYPE_UINT}), invalid); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_5_5_5_1, ZE_IMAGE_FORMAT_TYPE_SINT}), invalid); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_5_5_5_1, ZE_IMAGE_FORMAT_TYPE_SNORM}), invalid); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_5_5_5_1, ZE_IMAGE_FORMAT_TYPE_FLOAT}), invalid); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_4_4_4_4, ZE_IMAGE_FORMAT_TYPE_UINT}), invalid); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_4_4_4_4, ZE_IMAGE_FORMAT_TYPE_SINT}), invalid); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_4_4_4_4, ZE_IMAGE_FORMAT_TYPE_SNORM}), invalid); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_4_4_4_4, ZE_IMAGE_FORMAT_TYPE_FLOAT}), invalid); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_NV12, ZE_IMAGE_FORMAT_TYPE_UINT}), invalid); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_NV12, ZE_IMAGE_FORMAT_TYPE_SINT}), invalid); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_NV12, ZE_IMAGE_FORMAT_TYPE_SNORM}), invalid); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_NV12, ZE_IMAGE_FORMAT_TYPE_FLOAT}), invalid); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_YUYV, ZE_IMAGE_FORMAT_TYPE_UINT}), invalid); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_YUYV, ZE_IMAGE_FORMAT_TYPE_SINT}), invalid); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_YUYV, ZE_IMAGE_FORMAT_TYPE_SNORM}), invalid); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_YUYV, ZE_IMAGE_FORMAT_TYPE_FLOAT}), invalid); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_VYUY, ZE_IMAGE_FORMAT_TYPE_UINT}), invalid); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_VYUY, ZE_IMAGE_FORMAT_TYPE_SINT}), invalid); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_VYUY, ZE_IMAGE_FORMAT_TYPE_SNORM}), invalid); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_VYUY, ZE_IMAGE_FORMAT_TYPE_FLOAT}), invalid); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_YVYU, ZE_IMAGE_FORMAT_TYPE_UINT}), invalid); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_YVYU, ZE_IMAGE_FORMAT_TYPE_SINT}), invalid); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_YVYU, ZE_IMAGE_FORMAT_TYPE_SNORM}), invalid); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_YVYU, ZE_IMAGE_FORMAT_TYPE_FLOAT}), invalid); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_UYVY, ZE_IMAGE_FORMAT_TYPE_UINT}), invalid); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_UYVY, ZE_IMAGE_FORMAT_TYPE_SINT}), invalid); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_UYVY, ZE_IMAGE_FORMAT_TYPE_SNORM}), invalid); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_UYVY, ZE_IMAGE_FORMAT_TYPE_FLOAT}), invalid); } TEST(ImageFormatDescHelperTest, givenSupportedImageFormatLayoutAndTypeThenProperClEnumIsReturned) { EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_8, ZE_IMAGE_FORMAT_TYPE_UINT}), static_cast(CL_UNSIGNED_INT8)); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_8, ZE_IMAGE_FORMAT_TYPE_SINT}), static_cast(CL_SIGNED_INT8)); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_8, ZE_IMAGE_FORMAT_TYPE_UNORM}), static_cast(CL_UNORM_INT8)); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_8, ZE_IMAGE_FORMAT_TYPE_SNORM}), static_cast(CL_SNORM_INT8)); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_16, ZE_IMAGE_FORMAT_TYPE_UINT}), static_cast(CL_UNSIGNED_INT16)); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_16, ZE_IMAGE_FORMAT_TYPE_SINT}), static_cast(CL_SIGNED_INT16)); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_16, ZE_IMAGE_FORMAT_TYPE_UNORM}), static_cast(CL_UNORM_INT16)); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_16, ZE_IMAGE_FORMAT_TYPE_SNORM}), static_cast(CL_SNORM_INT16)); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_16, ZE_IMAGE_FORMAT_TYPE_FLOAT}), static_cast(CL_HALF_FLOAT)); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_32, ZE_IMAGE_FORMAT_TYPE_UINT}), static_cast(CL_UNSIGNED_INT32)); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_32, ZE_IMAGE_FORMAT_TYPE_SINT}), static_cast(CL_SIGNED_INT32)); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_32, ZE_IMAGE_FORMAT_TYPE_FLOAT}), static_cast(CL_FLOAT)); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_8_8, ZE_IMAGE_FORMAT_TYPE_UINT}), static_cast(CL_UNSIGNED_INT8)); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_8_8, ZE_IMAGE_FORMAT_TYPE_SINT}), static_cast(CL_SIGNED_INT8)); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_8_8, ZE_IMAGE_FORMAT_TYPE_UNORM}), static_cast(CL_UNORM_INT8)); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_8_8, ZE_IMAGE_FORMAT_TYPE_SNORM}), static_cast(CL_SNORM_INT8)); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_8_8_8_8, ZE_IMAGE_FORMAT_TYPE_UINT}), static_cast(CL_UNSIGNED_INT8)); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_8_8_8_8, ZE_IMAGE_FORMAT_TYPE_SINT}), static_cast(CL_SIGNED_INT8)); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_8_8_8_8, ZE_IMAGE_FORMAT_TYPE_UNORM}), static_cast(CL_UNORM_INT8)); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_8_8_8_8, ZE_IMAGE_FORMAT_TYPE_SNORM}), static_cast(CL_SNORM_INT8)); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_16_16, ZE_IMAGE_FORMAT_TYPE_UINT}), static_cast(CL_UNSIGNED_INT16)); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_16_16, ZE_IMAGE_FORMAT_TYPE_SINT}), static_cast(CL_SIGNED_INT16)); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_16_16, ZE_IMAGE_FORMAT_TYPE_UNORM}), static_cast(CL_UNORM_INT16)); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_16_16, ZE_IMAGE_FORMAT_TYPE_SNORM}), static_cast(CL_SNORM_INT16)); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_16_16, ZE_IMAGE_FORMAT_TYPE_FLOAT}), static_cast(CL_HALF_FLOAT)); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_16_16_16_16, ZE_IMAGE_FORMAT_TYPE_UINT}), static_cast(CL_UNSIGNED_INT16)); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_16_16_16_16, ZE_IMAGE_FORMAT_TYPE_SINT}), static_cast(CL_SIGNED_INT16)); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_16_16_16_16, ZE_IMAGE_FORMAT_TYPE_UNORM}), static_cast(CL_UNORM_INT16)); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_16_16_16_16, ZE_IMAGE_FORMAT_TYPE_SNORM}), static_cast(CL_SNORM_INT16)); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_16_16_16_16, ZE_IMAGE_FORMAT_TYPE_FLOAT}), static_cast(CL_HALF_FLOAT)); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_32_32, ZE_IMAGE_FORMAT_TYPE_UINT}), static_cast(CL_UNSIGNED_INT32)); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_32_32, ZE_IMAGE_FORMAT_TYPE_SINT}), static_cast(CL_SIGNED_INT32)); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_32_32, ZE_IMAGE_FORMAT_TYPE_FLOAT}), static_cast(CL_FLOAT)); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_32_32_32_32, ZE_IMAGE_FORMAT_TYPE_UINT}), static_cast(CL_UNSIGNED_INT32)); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_32_32_32_32, ZE_IMAGE_FORMAT_TYPE_SINT}), static_cast(CL_SIGNED_INT32)); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_32_32_32_32, ZE_IMAGE_FORMAT_TYPE_FLOAT}), static_cast(CL_FLOAT)); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_10_10_10_2, ZE_IMAGE_FORMAT_TYPE_UNORM}), static_cast(CL_UNORM_INT_101010_2)); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_5_6_5, ZE_IMAGE_FORMAT_TYPE_UNORM}), static_cast(CL_UNORM_SHORT_565)); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_5_5_5_1, ZE_IMAGE_FORMAT_TYPE_UNORM}), static_cast(CL_UNORM_SHORT_555)); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_NV12, ZE_IMAGE_FORMAT_TYPE_UNORM}), static_cast(CL_NV12_INTEL)); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_YUYV, ZE_IMAGE_FORMAT_TYPE_UNORM}), static_cast(CL_YUYV_INTEL)); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_VYUY, ZE_IMAGE_FORMAT_TYPE_UNORM}), static_cast(CL_VYUY_INTEL)); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_YVYU, ZE_IMAGE_FORMAT_TYPE_UNORM}), static_cast(CL_YVYU_INTEL)); EXPECT_EQ(getClChannelDataType({ZE_IMAGE_FORMAT_LAYOUT_UYVY, ZE_IMAGE_FORMAT_TYPE_UNORM}), static_cast(CL_UYVY_INTEL)); } TEST(ImageFormatDescHelperTest, givenSwizzlesThenEqualityIsProperlyDetermined) { swizzles ref{ZE_IMAGE_FORMAT_SWIZZLE_R, ZE_IMAGE_FORMAT_SWIZZLE_G, ZE_IMAGE_FORMAT_SWIZZLE_B, ZE_IMAGE_FORMAT_SWIZZLE_A}; EXPECT_FALSE((ref == swizzles{ZE_IMAGE_FORMAT_SWIZZLE_0, ZE_IMAGE_FORMAT_SWIZZLE_0, ZE_IMAGE_FORMAT_SWIZZLE_0, ZE_IMAGE_FORMAT_SWIZZLE_0})); EXPECT_FALSE((ref == swizzles{ZE_IMAGE_FORMAT_SWIZZLE_R, ZE_IMAGE_FORMAT_SWIZZLE_0, ZE_IMAGE_FORMAT_SWIZZLE_0, ZE_IMAGE_FORMAT_SWIZZLE_0})); EXPECT_FALSE((ref == swizzles{ZE_IMAGE_FORMAT_SWIZZLE_R, ZE_IMAGE_FORMAT_SWIZZLE_G, ZE_IMAGE_FORMAT_SWIZZLE_0, ZE_IMAGE_FORMAT_SWIZZLE_0})); EXPECT_FALSE((ref == swizzles{ZE_IMAGE_FORMAT_SWIZZLE_R, ZE_IMAGE_FORMAT_SWIZZLE_G, ZE_IMAGE_FORMAT_SWIZZLE_B, ZE_IMAGE_FORMAT_SWIZZLE_0})); EXPECT_TRUE((ref == swizzles{ZE_IMAGE_FORMAT_SWIZZLE_R, ZE_IMAGE_FORMAT_SWIZZLE_G, ZE_IMAGE_FORMAT_SWIZZLE_B, ZE_IMAGE_FORMAT_SWIZZLE_A})); } TEST(ImageFormatDescHelperTest, givenSupportedSwizzlesThenProperClEnumIsReturned) { ze_image_format_t format{}; format.x = ZE_IMAGE_FORMAT_SWIZZLE_R; format.y = ZE_IMAGE_FORMAT_SWIZZLE_0; format.z = ZE_IMAGE_FORMAT_SWIZZLE_0; format.w = ZE_IMAGE_FORMAT_SWIZZLE_1; EXPECT_EQ(getClChannelOrder(format), static_cast(CL_R)); format.x = ZE_IMAGE_FORMAT_SWIZZLE_0; format.y = ZE_IMAGE_FORMAT_SWIZZLE_0; format.z = ZE_IMAGE_FORMAT_SWIZZLE_0; format.w = ZE_IMAGE_FORMAT_SWIZZLE_A; EXPECT_EQ(getClChannelOrder(format), static_cast(CL_A)); format.x = ZE_IMAGE_FORMAT_SWIZZLE_R; format.y = ZE_IMAGE_FORMAT_SWIZZLE_G; format.z = ZE_IMAGE_FORMAT_SWIZZLE_0; format.w = ZE_IMAGE_FORMAT_SWIZZLE_1; EXPECT_EQ(getClChannelOrder(format), static_cast(CL_RG)); format.x = ZE_IMAGE_FORMAT_SWIZZLE_R; format.y = ZE_IMAGE_FORMAT_SWIZZLE_0; format.z = ZE_IMAGE_FORMAT_SWIZZLE_0; format.w = ZE_IMAGE_FORMAT_SWIZZLE_A; EXPECT_EQ(getClChannelOrder(format), static_cast(CL_RA)); format.x = ZE_IMAGE_FORMAT_SWIZZLE_R; format.y = ZE_IMAGE_FORMAT_SWIZZLE_G; format.z = ZE_IMAGE_FORMAT_SWIZZLE_B; format.w = ZE_IMAGE_FORMAT_SWIZZLE_1; EXPECT_EQ(getClChannelOrder(format), static_cast(CL_RGB)); format.x = ZE_IMAGE_FORMAT_SWIZZLE_R; format.y = ZE_IMAGE_FORMAT_SWIZZLE_G; format.z = ZE_IMAGE_FORMAT_SWIZZLE_B; format.w = ZE_IMAGE_FORMAT_SWIZZLE_A; EXPECT_EQ(getClChannelOrder(format), static_cast(CL_RGBA)); format.x = ZE_IMAGE_FORMAT_SWIZZLE_A; format.y = ZE_IMAGE_FORMAT_SWIZZLE_R; format.z = ZE_IMAGE_FORMAT_SWIZZLE_G; format.w = ZE_IMAGE_FORMAT_SWIZZLE_B; EXPECT_EQ(getClChannelOrder(format), static_cast(CL_ARGB)); format.x = ZE_IMAGE_FORMAT_SWIZZLE_A; format.y = ZE_IMAGE_FORMAT_SWIZZLE_B; format.z = ZE_IMAGE_FORMAT_SWIZZLE_G; format.w = ZE_IMAGE_FORMAT_SWIZZLE_R; EXPECT_EQ(getClChannelOrder(format), static_cast(CL_ABGR)); format.x = ZE_IMAGE_FORMAT_SWIZZLE_B; format.y = ZE_IMAGE_FORMAT_SWIZZLE_G; format.z = ZE_IMAGE_FORMAT_SWIZZLE_R; format.w = ZE_IMAGE_FORMAT_SWIZZLE_A; EXPECT_EQ(getClChannelOrder(format), static_cast(CL_BGRA)); } using ImageGetMemoryProperties = Test; HWTEST2_F(ImageGetMemoryProperties, givenImageMemoryPropertiesExpStructureWhenGetMemroyPropertiesThenProperDataAreSet, IsAtLeastSkl) { ze_image_desc_t zeDesc = {}; zeDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; zeDesc.arraylevels = 1u; zeDesc.depth = 1u; zeDesc.height = 1u; zeDesc.width = 1u; zeDesc.miplevels = 1u; zeDesc.type = ZE_IMAGE_TYPE_2DARRAY; zeDesc.flags = ZE_IMAGE_FLAG_BIAS_UNCACHED; zeDesc.format = {ZE_IMAGE_FORMAT_LAYOUT_32, ZE_IMAGE_FORMAT_TYPE_UINT, ZE_IMAGE_FORMAT_SWIZZLE_R, ZE_IMAGE_FORMAT_SWIZZLE_G, ZE_IMAGE_FORMAT_SWIZZLE_B, ZE_IMAGE_FORMAT_SWIZZLE_A}; Image *image_ptr; auto result = Image::create(productFamily, device, &zeDesc, &image_ptr); EXPECT_EQ(result, ZE_RESULT_SUCCESS); std::unique_ptr image(image_ptr); ASSERT_NE(image, nullptr); ze_image_memory_properties_exp_t imageMemoryPropertiesExp = {}; image->getMemoryProperties(&imageMemoryPropertiesExp); auto imageInfo = image->getImageInfo(); EXPECT_EQ(imageInfo.surfaceFormat->ImageElementSizeInBytes, imageMemoryPropertiesExp.size); EXPECT_EQ(imageInfo.slicePitch, imageMemoryPropertiesExp.slicePitch); EXPECT_EQ(imageInfo.rowPitch, imageMemoryPropertiesExp.rowPitch); } HWTEST2_F(ImageGetMemoryProperties, givenDebugFlagSetWhenCreatingImageThenEnableCompression, IsAtLeastSkl) { DebugManagerStateRestore restore; device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.ftrRenderCompressedImages = true; ze_image_desc_t zeDesc = {}; zeDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; zeDesc.arraylevels = 1u; zeDesc.depth = 1u; zeDesc.height = 1u; zeDesc.width = 1u; zeDesc.miplevels = 1u; zeDesc.type = ZE_IMAGE_TYPE_2DARRAY; zeDesc.flags = ZE_IMAGE_FLAG_BIAS_UNCACHED; zeDesc.format = {ZE_IMAGE_FORMAT_LAYOUT_32, ZE_IMAGE_FORMAT_TYPE_UINT, ZE_IMAGE_FORMAT_SWIZZLE_R, ZE_IMAGE_FORMAT_SWIZZLE_G, ZE_IMAGE_FORMAT_SWIZZLE_B, ZE_IMAGE_FORMAT_SWIZZLE_A}; { Image *image_ptr = nullptr; auto result = Image::create(productFamily, device, &zeDesc, &image_ptr); EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_NE(nullptr, image_ptr); std::unique_ptr image(image_ptr); EXPECT_EQ(L0HwHelperHw::get().imageCompressionSupported(device->getHwInfo()), image->getAllocation()->isCompressionEnabled()); } { NEO::DebugManager.flags.RenderCompressedImagesEnabled.set(1); ze_external_memory_import_win32_handle_t compressionHint = {}; compressionHint.stype = ZE_STRUCTURE_TYPE_MEMORY_COMPRESSION_HINTS_EXT_DESC; compressionHint.flags = ZE_MEMORY_COMPRESSION_HINTS_EXT_FLAG_UNCOMPRESSED; zeDesc.pNext = &compressionHint; Image *image_ptr = nullptr; auto result = Image::create(productFamily, device, &zeDesc, &image_ptr); EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_NE(nullptr, image_ptr); std::unique_ptr image(image_ptr); EXPECT_FALSE(image->getAllocation()->isCompressionEnabled()); zeDesc.pNext = nullptr; } { NEO::DebugManager.flags.RenderCompressedImagesEnabled.set(1); Image *image_ptr = nullptr; auto result = Image::create(productFamily, device, &zeDesc, &image_ptr); EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_NE(nullptr, image_ptr); std::unique_ptr image(image_ptr); EXPECT_TRUE(image->getAllocation()->isCompressionEnabled()); } { NEO::DebugManager.flags.RenderCompressedImagesEnabled.set(0); Image *image_ptr = nullptr; auto result = Image::create(productFamily, device, &zeDesc, &image_ptr); EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_NE(nullptr, image_ptr); std::unique_ptr image(image_ptr); EXPECT_FALSE(image->getAllocation()->isCompressionEnabled()); } } HWTEST2_F(ImageGetMemoryProperties, givenDebugFlagSetWhenCreatingLinearImageThenDontEnableCompression, IsAtLeastSkl) { DebugManagerStateRestore restore; device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.ftrRenderCompressedImages = true; ze_image_desc_t zeDesc = {}; zeDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; zeDesc.arraylevels = 1u; zeDesc.depth = 1u; zeDesc.height = 1u; zeDesc.width = 1u; zeDesc.miplevels = 1u; zeDesc.type = ZE_IMAGE_TYPE_1D; zeDesc.flags = ZE_IMAGE_FLAG_BIAS_UNCACHED; zeDesc.format = {ZE_IMAGE_FORMAT_LAYOUT_32, ZE_IMAGE_FORMAT_TYPE_UINT, ZE_IMAGE_FORMAT_SWIZZLE_R, ZE_IMAGE_FORMAT_SWIZZLE_G, ZE_IMAGE_FORMAT_SWIZZLE_B, ZE_IMAGE_FORMAT_SWIZZLE_A}; Image *image_ptr = nullptr; auto result = Image::create(productFamily, device, &zeDesc, &image_ptr); EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_NE(nullptr, image_ptr); std::unique_ptr image(image_ptr); EXPECT_FALSE(image->getAllocation()->isCompressionEnabled()); } HWTEST2_F(ImageCreate, givenImageSizeZeroThenDummyImageIsCreated, IsAtMostXeHpgCore) { ze_image_desc_t desc = {}; desc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; desc.type = ZE_IMAGE_TYPE_3D; desc.format.layout = ZE_IMAGE_FORMAT_LAYOUT_8_8_8_8; desc.format.type = ZE_IMAGE_FORMAT_TYPE_UINT; desc.format.x = desc.format.y = desc.format.z = desc.format.w = ZE_IMAGE_FORMAT_SWIZZLE_R; desc.width = 0; desc.height = 0; desc.depth = 0; L0::Image *image_ptr; auto result = Image::create(productFamily, device, &desc, &image_ptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto image = whitebox_cast(image_ptr); ASSERT_NE(nullptr, image); auto alloc = image->getAllocation(); ASSERT_NE(nullptr, alloc); auto renderSurfaceState = FamilyType::cmdInitRenderSurfaceState; image->copySurfaceStateToSSH(&renderSurfaceState, 0u, false); EXPECT_EQ(1u, renderSurfaceState.getWidth()); EXPECT_EQ(1u, renderSurfaceState.getHeight()); EXPECT_EQ(1u, renderSurfaceState.getDepth()); image->destroy(); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/kernel/000077500000000000000000000000001422164147700265735ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/kernel/CMakeLists.txt000066400000000000000000000004671422164147700313420ustar00rootroot00000000000000# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_kernel.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_sampler_patch_value.cpp ) compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp000066400000000000000000003445051422164147700316310ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device_binary_format/patchtokens_decoder.h" #include "shared/source/helpers/local_memory_access_modes.h" #include "shared/source/helpers/ray_tracing_helper.h" #include "shared/source/kernel/kernel_descriptor.h" #include "shared/source/program/kernel_info.h" #include "shared/source/program/kernel_info_from_patchtokens.h" #include "shared/source/utilities/stackvec.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/engine_descriptor_helper.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/compiler_interface/linker_mock.h" #include "shared/test/unit_test/device_binary_format/patchtokens_tests.h" #include "level_zero/core/source/debugger/debugger_l0.h" #include "level_zero/core/source/image/image_format_desc_helper.h" #include "level_zero/core/source/image/image_hw.h" #include "level_zero/core/source/kernel/kernel_hw.h" #include "level_zero/core/source/kernel/sampler_patch_values.h" #include "level_zero/core/source/module/module_imp.h" #include "level_zero/core/source/printf_handler/printf_handler.h" #include "level_zero/core/source/sampler/sampler_hw.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/fixtures/module_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_device.h" #include "level_zero/core/test/unit_tests/mocks/mock_kernel.h" #include "level_zero/core/test/unit_tests/mocks/mock_module.h" namespace NEO { void populatePointerKernelArg(ArgDescPointer &dst, CrossThreadDataOffset stateless, uint8_t pointerSize, SurfaceStateHeapOffset bindful, CrossThreadDataOffset bindless, KernelDescriptor::AddressingMode addressingMode); } namespace L0 { namespace ult { using KernelInitTest = Test; TEST_F(KernelInitTest, givenKernelToInitWhenItHasUnknownArgThenUnknowKernelArgHandlerAssigned) { uint32_t perHwThreadPrivateMemorySizeRequested = 32u; std::unique_ptr mockKernelImmData = std::make_unique(perHwThreadPrivateMemorySizeRequested); createModuleFromBinary(perHwThreadPrivateMemorySizeRequested, false, mockKernelImmData.get()); std::unique_ptr kernel; kernel = std::make_unique(module.get()); ze_kernel_desc_t desc = {}; desc.pKernelName = kernelName.c_str(); mockKernelImmData->resizeExplicitArgs(1); kernel->initialize(&desc); EXPECT_EQ(kernel->kernelArgHandlers[0], &KernelImp::setArgUnknown); EXPECT_EQ(mockKernelImmData->getDescriptor().payloadMappings.explicitArgs[0].type, NEO::ArgDescriptor::ArgTUnknown); } using KernelBaseAddressTests = Test; TEST_F(KernelBaseAddressTests, whenQueryingKernelBaseAddressThenCorrectAddressIsReturned) { uint32_t perHwThreadPrivateMemorySizeRequested = 32u; std::unique_ptr mockKernelImmData = std::make_unique(perHwThreadPrivateMemorySizeRequested); createModuleFromBinary(perHwThreadPrivateMemorySizeRequested, false, mockKernelImmData.get()); std::unique_ptr kernel; kernel = std::make_unique(module.get()); ze_kernel_desc_t desc = {}; desc.pKernelName = kernelName.c_str(); mockKernelImmData->resizeExplicitArgs(1); kernel->initialize(&desc); uint64_t baseAddress = 0; ze_result_t res = kernel->getBaseAddress(nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); res = kernel->getBaseAddress(&baseAddress); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_NE(baseAddress, 0u); EXPECT_EQ(baseAddress, kernel->getImmutableData()->getKernelInfo()->kernelAllocation->getGpuAddress()); } TEST(KernelArgTest, givenKernelWhenSetArgUnknownCalledThenSuccessRteurned) { Mock mockKernel; EXPECT_EQ(mockKernel.setArgUnknown(0, 0, nullptr), ZE_RESULT_SUCCESS); } struct MockKernelWithCallTracking : Mock<::L0::Kernel> { using ::L0::KernelImp::kernelArgInfos; ze_result_t setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation) override { ++setArgBufferWithAllocCalled; return KernelImp::setArgBufferWithAlloc(argIndex, argVal, allocation); } size_t setArgBufferWithAllocCalled = 0u; }; using SetKernelArgCacheTest = Test; TEST_F(SetKernelArgCacheTest, givenValidBufferArgumentWhenSetMultipleTimesThenSetArgBufferWithAllocOnlyCalledIfNeeded) { MockKernelWithCallTracking mockKernel; mockKernel.module = module.get(); ze_kernel_desc_t desc = {}; desc.pKernelName = kernelName.c_str(); mockKernel.initialize(&desc); auto svmAllocsManager = device->getDriverHandle()->getSvmAllocsManager(); auto allocationProperties = NEO::SVMAllocsManager::SvmAllocationProperties{}; auto svmAllocation = svmAllocsManager->createSVMAlloc(4096, allocationProperties, context->rootDeviceIndices, context->deviceBitfields); size_t callCounter = 0u; //first setArg - called EXPECT_EQ(ZE_RESULT_SUCCESS, mockKernel.setArgBuffer(0, sizeof(svmAllocation), &svmAllocation)); EXPECT_EQ(++callCounter, mockKernel.setArgBufferWithAllocCalled); //same setArg but allocationCounter == 0 - called EXPECT_EQ(ZE_RESULT_SUCCESS, mockKernel.setArgBuffer(0, sizeof(svmAllocation), &svmAllocation)); EXPECT_EQ(++callCounter, mockKernel.setArgBufferWithAllocCalled); //same setArg - not called and argInfo.allocationCounter is updated ++svmAllocsManager->allocationsCounter; EXPECT_EQ(0u, mockKernel.kernelArgInfos[0].allocIdMemoryManagerCounter); EXPECT_EQ(ZE_RESULT_SUCCESS, mockKernel.setArgBuffer(0, sizeof(svmAllocation), &svmAllocation)); EXPECT_EQ(callCounter, mockKernel.setArgBufferWithAllocCalled); EXPECT_EQ(svmAllocsManager->allocationsCounter, mockKernel.kernelArgInfos[0].allocIdMemoryManagerCounter); //same setArg and allocationCounter - not called EXPECT_EQ(ZE_RESULT_SUCCESS, mockKernel.setArgBuffer(0, sizeof(svmAllocation), &svmAllocation)); EXPECT_EQ(callCounter, mockKernel.setArgBufferWithAllocCalled); //same setArg but different allocId - called svmAllocsManager->getSVMAlloc(svmAllocation)->setAllocId(1u); ++svmAllocsManager->allocationsCounter; EXPECT_EQ(ZE_RESULT_SUCCESS, mockKernel.setArgBuffer(0, sizeof(svmAllocation), &svmAllocation)); EXPECT_EQ(++callCounter, mockKernel.setArgBufferWithAllocCalled); //different value - called auto secondSvmAllocation = svmAllocsManager->createSVMAlloc(4096, allocationProperties, context->rootDeviceIndices, context->deviceBitfields); EXPECT_EQ(ZE_RESULT_SUCCESS, mockKernel.setArgBuffer(0, sizeof(secondSvmAllocation), &secondSvmAllocation)); EXPECT_EQ(++callCounter, mockKernel.setArgBufferWithAllocCalled); //same value but no svmData - ZE_RESULT_ERROR_INVALID_ARGUMENT svmAllocsManager->freeSVMAlloc(secondSvmAllocation); ++svmAllocsManager->allocationsCounter; EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, mockKernel.setArgBuffer(0, sizeof(secondSvmAllocation), &secondSvmAllocation)); EXPECT_EQ(callCounter, mockKernel.setArgBufferWithAllocCalled); svmAllocsManager->freeSVMAlloc(svmAllocation); } using KernelImpSetGroupSizeTest = Test; TEST_F(KernelImpSetGroupSizeTest, WhenCalculatingLocalIdsThenGrfSizeIsTakenFromCapabilityTable) { Mock mockKernel; Mock mockModule(this->device, nullptr); mockKernel.descriptor.kernelAttributes.simdSize = 1; mockKernel.descriptor.kernelAttributes.numLocalIdChannels = 3; mockKernel.module = &mockModule; auto grfSize = mockModule.getDevice()->getHwInfo().capabilityTable.grfSize; uint32_t groupSize[3] = {2, 3, 5}; auto ret = mockKernel.setGroupSize(groupSize[0], groupSize[1], groupSize[2]); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); EXPECT_EQ(groupSize[0] * groupSize[1] * groupSize[2], mockKernel.numThreadsPerThreadGroup); EXPECT_EQ(grfSize * groupSize[0] * groupSize[1] * groupSize[2], mockKernel.perThreadDataSizeForWholeThreadGroup); ASSERT_LE(grfSize * groupSize[0] * groupSize[1] * groupSize[2], mockKernel.perThreadDataSizeForWholeThreadGroup); using LocalIdT = unsigned short; auto threadOffsetInLocalIds = grfSize / sizeof(LocalIdT); auto generatedLocalIds = reinterpret_cast(mockKernel.perThreadDataForWholeThreadGroup); uint32_t threadId = 0; for (uint32_t z = 0; z < groupSize[2]; ++z) { for (uint32_t y = 0; y < groupSize[1]; ++y) { for (uint32_t x = 0; x < groupSize[0]; ++x) { EXPECT_EQ(x, generatedLocalIds[0 + threadId * threadOffsetInLocalIds]) << " thread : " << threadId; EXPECT_EQ(y, generatedLocalIds[1 + threadId * threadOffsetInLocalIds]) << " thread : " << threadId; EXPECT_EQ(z, generatedLocalIds[2 + threadId * threadOffsetInLocalIds]) << " thread : " << threadId; ++threadId; } } } } TEST_F(KernelImpSetGroupSizeTest, givenLocalIdGenerationByRuntimeDisabledWhenSettingGroupSizeThenLocalIdsAreNotGenerated) { Mock mockKernel; Mock mockModule(this->device, nullptr); mockKernel.descriptor.kernelAttributes.simdSize = 1; mockKernel.module = &mockModule; mockKernel.kernelRequiresGenerationOfLocalIdsByRuntime = false; uint32_t groupSize[3] = {2, 3, 5}; auto ret = mockKernel.setGroupSize(groupSize[0], groupSize[1], groupSize[2]); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); EXPECT_EQ(groupSize[0] * groupSize[1] * groupSize[2], mockKernel.numThreadsPerThreadGroup); EXPECT_EQ(0u, mockKernel.perThreadDataSizeForWholeThreadGroup); EXPECT_EQ(0u, mockKernel.perThreadDataSize); EXPECT_EQ(nullptr, mockKernel.perThreadDataForWholeThreadGroup); } TEST_F(KernelImpSetGroupSizeTest, givenIncorrectGroupSizeWhenSettingGroupSizeThenInvalidGroupSizeDimensionErrorIsReturned) { Mock mockKernel; Mock mockModule(this->device, nullptr); for (auto i = 0u; i < 3u; i++) { mockKernel.descriptor.kernelAttributes.requiredWorkgroupSize[i] = 2; } mockKernel.module = &mockModule; uint32_t groupSize[3] = {1, 1, 1}; auto ret = mockKernel.setGroupSize(groupSize[0], groupSize[1], groupSize[2]); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION, ret); } TEST_F(KernelImpSetGroupSizeTest, givenZeroGroupSizeWhenSettingGroupSizeThenInvalidArgumentErrorIsReturned) { Mock mockKernel; Mock mockModule(this->device, nullptr); for (auto i = 0u; i < 3u; i++) { mockKernel.descriptor.kernelAttributes.requiredWorkgroupSize[i] = 2; } mockKernel.module = &mockModule; uint32_t groupSize[3] = {0, 0, 0}; auto ret = mockKernel.setGroupSize(groupSize[0], groupSize[1], groupSize[2]); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, ret); } using SetKernelArg = Test; using ImageSupport = IsWithinProducts; HWTEST2_F(SetKernelArg, givenImageAndKernelWhenSetArgImageThenCrossThreadDataIsSet, ImageSupport) { createKernel(); auto &imageArg = const_cast(kernel->kernelImmData->getDescriptor().payloadMappings.explicitArgs[3].as()); imageArg.metadataPayload.imgWidth = 0x1c; imageArg.metadataPayload.imgHeight = 0x18; imageArg.metadataPayload.imgDepth = 0x14; imageArg.metadataPayload.arraySize = 0x10; imageArg.metadataPayload.numSamples = 0xc; imageArg.metadataPayload.channelDataType = 0x8; imageArg.metadataPayload.channelOrder = 0x4; imageArg.metadataPayload.numMipLevels = 0x0; imageArg.metadataPayload.flatWidth = 0x30; imageArg.metadataPayload.flatHeight = 0x2c; imageArg.metadataPayload.flatPitch = 0x28; imageArg.metadataPayload.flatBaseOffset = 0x20; ze_image_desc_t desc = {}; desc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; desc.type = ZE_IMAGE_TYPE_3D; desc.format.layout = ZE_IMAGE_FORMAT_LAYOUT_8_8_8_8; desc.format.type = ZE_IMAGE_FORMAT_TYPE_UINT; desc.width = 11; desc.height = 13; desc.depth = 17; desc.format.x = ZE_IMAGE_FORMAT_SWIZZLE_A; desc.format.y = ZE_IMAGE_FORMAT_SWIZZLE_0; desc.format.z = ZE_IMAGE_FORMAT_SWIZZLE_1; desc.format.w = ZE_IMAGE_FORMAT_SWIZZLE_X; auto imageHW = std::make_unique>>(); auto ret = imageHW->initialize(device, &desc); ASSERT_EQ(ZE_RESULT_SUCCESS, ret); auto handle = imageHW->toHandle(); auto imgInfo = imageHW->getImageInfo(); auto pixelSize = imgInfo.surfaceFormat->ImageElementSizeInBytes; kernel->setArgImage(3, sizeof(imageHW.get()), &handle); auto crossThreadData = kernel->getCrossThreadData(); auto pImgWidth = ptrOffset(crossThreadData, imageArg.metadataPayload.imgWidth); EXPECT_EQ(imgInfo.imgDesc.imageWidth, *pImgWidth); auto pImgHeight = ptrOffset(crossThreadData, imageArg.metadataPayload.imgHeight); EXPECT_EQ(imgInfo.imgDesc.imageHeight, *pImgHeight); auto pImgDepth = ptrOffset(crossThreadData, imageArg.metadataPayload.imgDepth); EXPECT_EQ(imgInfo.imgDesc.imageDepth, *pImgDepth); auto pArraySize = ptrOffset(crossThreadData, imageArg.metadataPayload.arraySize); EXPECT_EQ(imgInfo.imgDesc.imageArraySize, *pArraySize); auto pNumSamples = ptrOffset(crossThreadData, imageArg.metadataPayload.numSamples); EXPECT_EQ(imgInfo.imgDesc.numSamples, *pNumSamples); auto pNumMipLevels = ptrOffset(crossThreadData, imageArg.metadataPayload.numMipLevels); EXPECT_EQ(imgInfo.imgDesc.numMipLevels, *pNumMipLevels); auto pFlatBaseOffset = ptrOffset(crossThreadData, imageArg.metadataPayload.flatBaseOffset); EXPECT_EQ(imageHW->getAllocation()->getGpuAddress(), *reinterpret_cast(pFlatBaseOffset)); auto pFlatWidth = ptrOffset(crossThreadData, imageArg.metadataPayload.flatWidth); EXPECT_EQ((imgInfo.imgDesc.imageWidth * pixelSize) - 1u, *pFlatWidth); auto pFlatHeight = ptrOffset(crossThreadData, imageArg.metadataPayload.flatHeight); EXPECT_EQ((imgInfo.imgDesc.imageHeight * pixelSize) - 1u, *pFlatHeight); auto pFlatPitch = ptrOffset(crossThreadData, imageArg.metadataPayload.flatPitch); EXPECT_EQ(imgInfo.imgDesc.imageRowPitch - 1u, *pFlatPitch); auto pChannelDataType = ptrOffset(crossThreadData, imageArg.metadataPayload.channelDataType); EXPECT_EQ(getClChannelDataType(desc.format), *reinterpret_cast(pChannelDataType)); auto pChannelOrder = ptrOffset(crossThreadData, imageArg.metadataPayload.channelOrder); EXPECT_EQ(getClChannelOrder(desc.format), *reinterpret_cast(pChannelOrder)); } HWTEST2_F(SetKernelArg, givenSamplerAndKernelWhenSetArgSamplerThenCrossThreadDataIsSet, ImageSupport) { createKernel(); auto &samplerArg = const_cast(kernel->kernelImmData->getDescriptor().payloadMappings.explicitArgs[5].as()); samplerArg.metadataPayload.samplerAddressingMode = 0x0; samplerArg.metadataPayload.samplerNormalizedCoords = 0x4; samplerArg.metadataPayload.samplerSnapWa = 0x8; ze_sampler_desc_t desc = {}; desc.addressMode = ZE_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; desc.filterMode = ZE_SAMPLER_FILTER_MODE_NEAREST; desc.isNormalized = true; auto sampler = std::make_unique>>(); auto ret = sampler->initialize(device, &desc); ASSERT_EQ(ZE_RESULT_SUCCESS, ret); auto handle = sampler->toHandle(); kernel->setArgSampler(5, sizeof(sampler.get()), &handle); auto crossThreadData = kernel->getCrossThreadData(); auto pSamplerSnapWa = ptrOffset(crossThreadData, samplerArg.metadataPayload.samplerSnapWa); EXPECT_EQ(std::numeric_limits::max(), *reinterpret_cast(pSamplerSnapWa)); auto pSamplerAddressingMode = ptrOffset(crossThreadData, samplerArg.metadataPayload.samplerAddressingMode); EXPECT_EQ(static_cast(SamplerPatchValues::AddressClampToBorder), *pSamplerAddressingMode); auto pSamplerNormalizedCoords = ptrOffset(crossThreadData, samplerArg.metadataPayload.samplerNormalizedCoords); EXPECT_EQ(static_cast(SamplerPatchValues::NormalizedCoordsTrue), *pSamplerNormalizedCoords); } using ArgSupport = IsWithinProducts; HWTEST2_F(SetKernelArg, givenBufferArgumentWhichHasNotBeenAllocatedByRuntimeThenInvalidArgumentIsReturned, ArgSupport) { createKernel(); uint64_t hostAddress = 0x1234; ze_result_t res = kernel->setArgBuffer(0, sizeof(hostAddress), &hostAddress); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, res); } class KernelImmutableDataFixture : public ModuleImmutableDataFixture { public: void SetUp() { ModuleImmutableDataFixture::SetUp(); } void TearDown() { ModuleImmutableDataFixture::TearDown(); } }; using KernelImmutableDataTests = Test; TEST_F(KernelImmutableDataTests, givenKernelInitializedWithNoPrivateMemoryThenPrivateMemoryIsNull) { uint32_t perHwThreadPrivateMemorySizeRequested = 0u; bool isInternal = false; std::unique_ptr mockKernelImmData = std::make_unique(perHwThreadPrivateMemorySizeRequested); createModuleFromBinary(perHwThreadPrivateMemorySizeRequested, isInternal, mockKernelImmData.get()); std::unique_ptr kernel; kernel = std::make_unique(module.get()); createKernel(kernel.get()); EXPECT_EQ(nullptr, kernel->privateMemoryGraphicsAllocation); } TEST_F(KernelImmutableDataTests, givenKernelInitializedWithPrivateMemoryThenPrivateMemoryIsCreated) { uint32_t perHwThreadPrivateMemorySizeRequested = 32u; bool isInternal = false; std::unique_ptr mockKernelImmData = std::make_unique(perHwThreadPrivateMemorySizeRequested); createModuleFromBinary(perHwThreadPrivateMemorySizeRequested, isInternal, mockKernelImmData.get()); std::unique_ptr kernel; kernel = std::make_unique(module.get()); createKernel(kernel.get()); EXPECT_NE(nullptr, kernel->privateMemoryGraphicsAllocation); size_t expectedSize = perHwThreadPrivateMemorySizeRequested * device->getNEODevice()->getDeviceInfo().computeUnitsUsedForScratch; EXPECT_EQ(expectedSize, kernel->privateMemoryGraphicsAllocation->getUnderlyingBufferSize()); } using KernelImmutableDataIsaCopyTests = KernelImmutableDataTests; TEST_F(KernelImmutableDataIsaCopyTests, whenUserKernelIsCreatedThenIsaIsCopiedWhenModuleIsCreated) { MockImmutableMemoryManager *mockMemoryManager = static_cast(device->getNEODevice()->getMemoryManager()); uint32_t perHwThreadPrivateMemorySizeRequested = 32u; bool isInternal = false; size_t previouscopyMemoryToAllocationCalledTimes = mockMemoryManager->copyMemoryToAllocationCalledTimes; std::unique_ptr mockKernelImmData = std::make_unique(perHwThreadPrivateMemorySizeRequested); createModuleFromBinary(perHwThreadPrivateMemorySizeRequested, isInternal, mockKernelImmData.get()); size_t copyForGlobalSurface = 1u; auto copyForIsa = module->getKernelImmutableDataVector().size(); size_t expectedPreviouscopyMemoryToAllocationCalledTimes = previouscopyMemoryToAllocationCalledTimes + copyForGlobalSurface + copyForIsa; EXPECT_EQ(expectedPreviouscopyMemoryToAllocationCalledTimes, mockMemoryManager->copyMemoryToAllocationCalledTimes); std::unique_ptr kernel; kernel = std::make_unique(module.get()); createKernel(kernel.get()); EXPECT_EQ(expectedPreviouscopyMemoryToAllocationCalledTimes, mockMemoryManager->copyMemoryToAllocationCalledTimes); } TEST_F(KernelImmutableDataIsaCopyTests, whenImmutableDataIsInitializedForUserKernelThenIsaIsNotCopied) { MockImmutableMemoryManager *mockMemoryManager = static_cast(device->getNEODevice()->getMemoryManager()); uint32_t perHwThreadPrivateMemorySizeRequested = 32u; bool isInternal = false; std::unique_ptr mockKernelImmData = std::make_unique(perHwThreadPrivateMemorySizeRequested); createModuleFromBinary(perHwThreadPrivateMemorySizeRequested, isInternal, mockKernelImmData.get()); uint32_t previouscopyMemoryToAllocationCalledTimes = mockMemoryManager->copyMemoryToAllocationCalledTimes; mockKernelImmData->initialize(mockKernelImmData->mockKernelInfo, device, device->getNEODevice()->getDeviceInfo().computeUnitsUsedForScratch, module.get()->translationUnit->globalConstBuffer, module.get()->translationUnit->globalVarBuffer, isInternal); EXPECT_EQ(previouscopyMemoryToAllocationCalledTimes, mockMemoryManager->copyMemoryToAllocationCalledTimes); } TEST_F(KernelImmutableDataIsaCopyTests, whenImmutableDataIsInitializedForInternalKernelThenIsaIsNotCopied) { MockImmutableMemoryManager *mockMemoryManager = static_cast(device->getNEODevice()->getMemoryManager()); uint32_t perHwThreadPrivateMemorySizeRequested = 32u; bool isInternal = true; std::unique_ptr mockKernelImmData = std::make_unique(perHwThreadPrivateMemorySizeRequested); createModuleFromBinary(perHwThreadPrivateMemorySizeRequested, isInternal, mockKernelImmData.get()); uint32_t previouscopyMemoryToAllocationCalledTimes = mockMemoryManager->copyMemoryToAllocationCalledTimes; mockKernelImmData->initialize(mockKernelImmData->mockKernelInfo, device, device->getNEODevice()->getDeviceInfo().computeUnitsUsedForScratch, module.get()->translationUnit->globalConstBuffer, module.get()->translationUnit->globalVarBuffer, isInternal); EXPECT_EQ(previouscopyMemoryToAllocationCalledTimes, mockMemoryManager->copyMemoryToAllocationCalledTimes); } using KernelImmutableDataWithNullHeapTests = KernelImmutableDataTests; TEST_F(KernelImmutableDataTests, givenInternalModuleWhenKernelIsCreatedThenIsaIsCopiedOnce) { MockImmutableMemoryManager *mockMemoryManager = static_cast(device->getNEODevice()->getMemoryManager()); uint32_t perHwThreadPrivateMemorySizeRequested = 32u; bool isInternal = true; std::unique_ptr mockKernelImmData = std::make_unique(perHwThreadPrivateMemorySizeRequested); mockKernelImmData->getIsaGraphicsAllocation()->setAllocationType(AllocationType::KERNEL_ISA_INTERNAL); size_t previouscopyMemoryToAllocationCalledTimes = mockMemoryManager->copyMemoryToAllocationCalledTimes; createModuleFromBinary(perHwThreadPrivateMemorySizeRequested, isInternal, mockKernelImmData.get()); size_t copyForGlobalSurface = 1u; size_t copyForPatchingIsa = 0u; size_t expectedPreviouscopyMemoryToAllocationCalledTimes = previouscopyMemoryToAllocationCalledTimes + copyForGlobalSurface + copyForPatchingIsa; EXPECT_EQ(expectedPreviouscopyMemoryToAllocationCalledTimes, mockMemoryManager->copyMemoryToAllocationCalledTimes); std::unique_ptr kernel; kernel = std::make_unique(module.get()); expectedPreviouscopyMemoryToAllocationCalledTimes++; createKernel(kernel.get()); EXPECT_EQ(expectedPreviouscopyMemoryToAllocationCalledTimes, mockMemoryManager->copyMemoryToAllocationCalledTimes); } TEST_F(KernelImmutableDataTests, givenInternalModuleWhenKernelIsCreatedIsaIsNotCopiedDuringLinking) { NEO::MockCompilerEnableGuard mock(true); auto cip = new NEO::MockCompilerInterfaceCaptureBuildOptions(); neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]->compilerInterface.reset(cip); MockImmutableMemoryManager *mockMemoryManager = static_cast(device->getNEODevice()->getMemoryManager()); uint8_t binary[16]; ze_module_desc_t moduleDesc = {}; moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; moduleDesc.pInputModule = binary; moduleDesc.inputSize = 10; ModuleBuildLog *moduleBuildLog = nullptr; auto linkerInput = std::make_unique<::WhiteBox>(); linkerInput->traits.requiresPatchingOfGlobalVariablesBuffer = true; std::unique_ptr moduleMock = std::make_unique(device, moduleBuildLog, ModuleType::Builtin); moduleMock->translationUnit = std::make_unique(device); moduleMock->translationUnit->programInfo.linkerInput = std::move(linkerInput); uint32_t kernelHeap = 0; auto kernelInfo = new KernelInfo(); kernelInfo->heapInfo.KernelHeapSize = 1; kernelInfo->heapInfo.pKernelHeap = &kernelHeap; Mock<::L0::Kernel> kernelMock; kernelMock.module = moduleMock.get(); kernelMock.immutableData.kernelInfo = kernelInfo; kernelMock.immutableData.surfaceStateHeapSize = 64; kernelMock.immutableData.surfaceStateHeapTemplate.reset(new uint8_t[64]); kernelMock.immutableData.getIsaGraphicsAllocation()->setAllocationType(AllocationType::KERNEL_ISA_INTERNAL); kernelInfo->kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful = 0; moduleMock->translationUnit->programInfo.kernelInfos.push_back(kernelInfo); moduleMock->kernelImmData = &kernelMock.immutableData; size_t previouscopyMemoryToAllocationCalledTimes = mockMemoryManager->copyMemoryToAllocationCalledTimes; auto result = moduleMock->initialize(&moduleDesc, neoDevice); EXPECT_TRUE(result); size_t expectedPreviouscopyMemoryToAllocationCalledTimes = previouscopyMemoryToAllocationCalledTimes; EXPECT_EQ(expectedPreviouscopyMemoryToAllocationCalledTimes, mockMemoryManager->copyMemoryToAllocationCalledTimes); for (auto &ki : moduleMock->kernelImmDatas) { EXPECT_FALSE(ki->isIsaCopiedToAllocation()); } expectedPreviouscopyMemoryToAllocationCalledTimes++; ze_kernel_desc_t desc = {}; desc.pKernelName = ""; moduleMock->kernelImmData = moduleMock->kernelImmDatas[0].get(); kernelMock.initialize(&desc); EXPECT_EQ(expectedPreviouscopyMemoryToAllocationCalledTimes, mockMemoryManager->copyMemoryToAllocationCalledTimes); } TEST_F(KernelImmutableDataTests, givenKernelInitializedWithPrivateMemoryThenContainerHasOneExtraSpaceForAllocation) { std::string testFile; retrieveBinaryKernelFilenameApiSpecific(testFile, binaryFilename + "_", ".bin"); size_t size = 0; auto src = loadDataFromFile( testFile.c_str(), size); ASSERT_NE(0u, size); ASSERT_NE(nullptr, src); ze_module_desc_t moduleDesc = {}; moduleDesc.format = ZE_MODULE_FORMAT_NATIVE; moduleDesc.pInputModule = reinterpret_cast(src.get()); moduleDesc.inputSize = size; ModuleBuildLog *moduleBuildLog = nullptr; uint32_t perHwThreadPrivateMemorySizeRequested = 32u; std::unique_ptr mockKernelImmData = std::make_unique(perHwThreadPrivateMemorySizeRequested); std::unique_ptr moduleWithPrivateMemory = std::make_unique(device, moduleBuildLog, ModuleType::User, perHwThreadPrivateMemorySizeRequested, mockKernelImmData.get()); bool result = moduleWithPrivateMemory->initialize(&moduleDesc, device->getNEODevice()); EXPECT_TRUE(result); std::unique_ptr kernelWithPrivateMemory; kernelWithPrivateMemory = std::make_unique(moduleWithPrivateMemory.get()); createKernel(kernelWithPrivateMemory.get()); EXPECT_NE(nullptr, kernelWithPrivateMemory->privateMemoryGraphicsAllocation); size_t sizeContainerWithPrivateMemory = kernelWithPrivateMemory->getResidencyContainer().size(); perHwThreadPrivateMemorySizeRequested = 0u; std::unique_ptr mockKernelImmDataForModuleWithoutPrivateMemory = std::make_unique(perHwThreadPrivateMemorySizeRequested); std::unique_ptr moduleWithoutPrivateMemory = std::make_unique(device, moduleBuildLog, ModuleType::User, perHwThreadPrivateMemorySizeRequested, mockKernelImmDataForModuleWithoutPrivateMemory.get()); result = moduleWithoutPrivateMemory->initialize(&moduleDesc, device->getNEODevice()); EXPECT_TRUE(result); std::unique_ptr kernelWithoutPrivateMemory; kernelWithoutPrivateMemory = std::make_unique(moduleWithoutPrivateMemory.get()); createKernel(kernelWithoutPrivateMemory.get()); EXPECT_EQ(nullptr, kernelWithoutPrivateMemory->privateMemoryGraphicsAllocation); size_t sizeContainerWithoutPrivateMemory = kernelWithoutPrivateMemory->getResidencyContainer().size(); EXPECT_EQ(sizeContainerWithoutPrivateMemory + 1u, sizeContainerWithPrivateMemory); } TEST_F(KernelImmutableDataTests, givenKernelWithPrivateMemoryBiggerThanGlobalMemoryThenPrivateMemoryIsNotAllocated) { std::string testFile; retrieveBinaryKernelFilenameApiSpecific(testFile, binaryFilename + "_", ".bin"); size_t size = 0; auto src = loadDataFromFile( testFile.c_str(), size); ASSERT_NE(0u, size); ASSERT_NE(nullptr, src); ze_module_desc_t moduleDesc = {}; moduleDesc.format = ZE_MODULE_FORMAT_NATIVE; moduleDesc.pInputModule = reinterpret_cast(src.get()); moduleDesc.inputSize = size; ModuleBuildLog *moduleBuildLog = nullptr; uint32_t perHwThreadPrivateMemorySizeRequested = std::numeric_limits::max(); std::unique_ptr mockKernelImmData = std::make_unique(perHwThreadPrivateMemorySizeRequested); std::unique_ptr module = std::make_unique(device, moduleBuildLog, ModuleType::User, perHwThreadPrivateMemorySizeRequested, mockKernelImmData.get()); bool result = module->initialize(&moduleDesc, device->getNEODevice()); EXPECT_TRUE(result); EXPECT_TRUE(module->shouldAllocatePrivateMemoryPerDispatch()); std::unique_ptr kernel; kernel = std::make_unique(module.get()); createKernel(kernel.get()); EXPECT_EQ(nullptr, kernel->getPrivateMemoryGraphicsAllocation()); } class KernelDescriptorRTCallsTrue : public NEO::KernelDescriptor { bool hasRTCalls() const override { return true; } }; class KernelDescriptorRTCallsFalse : public NEO::KernelDescriptor { bool hasRTCalls() const override { return false; } }; TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenRayTracingIsInitialized) { KernelDescriptorRTCallsTrue mockDescriptor = {}; mockDescriptor.kernelMetadata.kernelName = "rt_test"; for (auto i = 0u; i < 3u; i++) { mockDescriptor.kernelAttributes.requiredWorkgroupSize[i] = 0; } std::unique_ptr mockKernelImmutableData = std::make_unique(32u); mockKernelImmutableData->kernelDescriptor = &mockDescriptor; mockDescriptor.payloadMappings.implicitArgs.rtDispatchGlobals.pointerSize = 4; ModuleBuildLog *moduleBuildLog = nullptr; module = std::make_unique(device, moduleBuildLog, ModuleType::User, 32u, mockKernelImmutableData.get()); module->maxGroupSize = 10; std::unique_ptr kernel; kernel = std::make_unique(module.get()); ze_kernel_desc_t kernelDesc = {}; kernelDesc.pKernelName = "rt_test"; auto immDataVector = const_cast> *>(&module.get()->getKernelImmutableDataVector()); immDataVector->push_back(std::move(mockKernelImmutableData)); neoDevice->setRTDispatchGlobalsForceAllocation(); auto result = kernel->initialize(&kernelDesc); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, module.get()->getDevice()->getNEODevice()->getRTMemoryBackedBuffer()); auto rtDispatchGlobals = neoDevice->getRTDispatchGlobals(NEO::RayTracingHelper::maxBvhLevels); EXPECT_NE(nullptr, rtDispatchGlobals); size_t residencySize = kernel->getResidencyContainer().size(); EXPECT_NE(0u, residencySize); EXPECT_EQ(kernel->getResidencyContainer()[residencySize - 1], rtDispatchGlobals); } TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueButKernelDoesNotHaveRTDGAllocationTokenThenRayTracingStillEnabledWithoutAllocation) { KernelDescriptorRTCallsTrue mockDescriptor = {}; mockDescriptor.kernelMetadata.kernelName = "rt_test"; for (auto i = 0u; i < 3u; i++) { mockDescriptor.kernelAttributes.requiredWorkgroupSize[i] = 0; } std::unique_ptr mockKernelImmutableData = std::make_unique(32u); mockKernelImmutableData->kernelDescriptor = &mockDescriptor; ModuleBuildLog *moduleBuildLog = nullptr; module = std::make_unique(device, moduleBuildLog, ModuleType::User, 32u, mockKernelImmutableData.get()); module->maxGroupSize = 10; std::unique_ptr kernel; kernel = std::make_unique(module.get()); ze_kernel_desc_t kernelDesc = {}; kernelDesc.pKernelName = "rt_test"; auto immDataVector = const_cast> *>(&module.get()->getKernelImmutableDataVector()); immDataVector->push_back(std::move(mockKernelImmutableData)); auto result = kernel->initialize(&kernelDesc); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, module.get()->getDevice()->getNEODevice()->getRTMemoryBackedBuffer()); auto rtDispatchGlobals = neoDevice->getRTDispatchGlobals(NEO::RayTracingHelper::maxBvhLevels); EXPECT_EQ(nullptr, rtDispatchGlobals); } TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndNoRTDispatchGlobalsIsAllocatedThenRayTracingIsNotInitialized) { KernelDescriptorRTCallsTrue mockDescriptor = {}; mockDescriptor.kernelMetadata.kernelName = "rt_test"; for (auto i = 0u; i < 3u; i++) { mockDescriptor.kernelAttributes.requiredWorkgroupSize[i] = 0; } mockDescriptor.payloadMappings.implicitArgs.rtDispatchGlobals.pointerSize = 4; NEO::MemoryManager *currMemoryManager = new NEO::FailMemoryManager(0, *neoDevice->executionEnvironment); std::unique_ptr mockKernelImmutableData = std::make_unique(32u); mockKernelImmutableData->kernelDescriptor = &mockDescriptor; ModuleBuildLog *moduleBuildLog = nullptr; module = std::make_unique(device, moduleBuildLog, ModuleType::User, 32u, mockKernelImmutableData.get()); module->maxGroupSize = 10; std::unique_ptr kernel; kernel = std::make_unique(module.get()); ze_kernel_desc_t kernelDesc = {}; kernelDesc.pKernelName = "rt_test"; auto immDataVector = const_cast> *>(&module.get()->getKernelImmutableDataVector()); immDataVector->push_back(std::move(mockKernelImmutableData)); neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->bindlessHeapsHelper.reset(nullptr); neoDevice->injectMemoryManager(currMemoryManager); EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY, kernel->initialize(&kernelDesc)); } TEST_F(KernelImmutableDataTests, whenHasRTCallsIsFalseThenRayTracingIsNotInitialized) { KernelDescriptorRTCallsFalse mockDescriptor = {}; mockDescriptor.kernelMetadata.kernelName = "rt_test"; for (auto i = 0u; i < 3u; i++) { mockDescriptor.kernelAttributes.requiredWorkgroupSize[i] = 0; } std::unique_ptr mockKernelImmutableData = std::make_unique(32u); mockKernelImmutableData->kernelDescriptor = &mockDescriptor; ModuleBuildLog *moduleBuildLog = nullptr; module = std::make_unique(device, moduleBuildLog, ModuleType::User, 32u, mockKernelImmutableData.get()); module->maxGroupSize = 10; std::unique_ptr kernel; kernel = std::make_unique(module.get()); ze_kernel_desc_t kernelDesc = {}; kernelDesc.pKernelName = "rt_test"; auto immDataVector = const_cast> *>(&module.get()->getKernelImmutableDataVector()); immDataVector->push_back(std::move(mockKernelImmutableData)); EXPECT_EQ(ZE_RESULT_SUCCESS, kernel->initialize(&kernelDesc)); EXPECT_EQ(nullptr, module.get()->getDevice()->getNEODevice()->getRTMemoryBackedBuffer()); } TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenCrossThreadDataIsPatched) { KernelDescriptorRTCallsTrue mockDescriptor = {}; mockDescriptor.kernelMetadata.kernelName = "rt_test"; for (auto i = 0u; i < 3u; i++) { mockDescriptor.kernelAttributes.requiredWorkgroupSize[i] = 0; } std::unique_ptr mockKernelImmutableData = std::make_unique(32u); mockKernelImmutableData->kernelDescriptor = &mockDescriptor; mockDescriptor.payloadMappings.implicitArgs.rtDispatchGlobals.pointerSize = 4; ModuleBuildLog *moduleBuildLog = nullptr; module = std::make_unique(device, moduleBuildLog, ModuleType::User, 32u, mockKernelImmutableData.get()); module->maxGroupSize = 10; std::unique_ptr kernel; kernel = std::make_unique(module.get()); ze_kernel_desc_t kernelDesc = {}; kernelDesc.pKernelName = "rt_test"; auto immDataVector = const_cast> *>(&module.get()->getKernelImmutableDataVector()); immDataVector->push_back(std::move(mockKernelImmutableData)); auto crossThreadData = std::make_unique(4); kernel->crossThreadData.reset(reinterpret_cast(crossThreadData.get())); kernel->crossThreadDataSize = sizeof(uint32_t[4]); neoDevice->setRTDispatchGlobalsForceAllocation(); auto result = kernel->initialize(&kernelDesc); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto rtDispatchGlobals = neoDevice->getRTDispatchGlobals(NEO::RayTracingHelper::maxBvhLevels); EXPECT_NE(nullptr, rtDispatchGlobals); auto dispatchGlobalsAddressPatched = *reinterpret_cast(crossThreadData.get()); auto dispatchGlobalsGpuAddressOffset = static_cast(rtDispatchGlobals->getGpuAddressToPatch()); EXPECT_EQ(dispatchGlobalsGpuAddressOffset, dispatchGlobalsAddressPatched); kernel->crossThreadData.release(); } using KernelIndirectPropertiesFromIGCTests = KernelImmutableDataTests; TEST_F(KernelIndirectPropertiesFromIGCTests, whenInitializingKernelWithNoKernelLoadAndNoStoreAndNoAtomicThenHasIndirectAccessIsSetToFalse) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.DisableIndirectAccess.set(0); uint32_t perHwThreadPrivateMemorySizeRequested = 32u; bool isInternal = false; std::unique_ptr mockKernelImmData = std::make_unique(perHwThreadPrivateMemorySizeRequested); createModuleFromBinary(perHwThreadPrivateMemorySizeRequested, isInternal, mockKernelImmData.get()); std::unique_ptr kernel; kernel = std::make_unique(module.get()); ze_kernel_desc_t desc = {}; desc.pKernelName = kernelName.c_str(); module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgLoad = false; module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = false; module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = false; kernel->initialize(&desc); EXPECT_FALSE(kernel->hasIndirectAccess()); } TEST_F(KernelIndirectPropertiesFromIGCTests, whenInitializingKernelWithKernelLoadStoreAtomicThenHasIndirectAccessIsSetToTrue) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.DisableIndirectAccess.set(0); uint32_t perHwThreadPrivateMemorySizeRequested = 32u; bool isInternal = false; std::unique_ptr mockKernelImmData = std::make_unique(perHwThreadPrivateMemorySizeRequested); createModuleFromBinary(perHwThreadPrivateMemorySizeRequested, isInternal, mockKernelImmData.get()); { std::unique_ptr kernel; kernel = std::make_unique(module.get()); ze_kernel_desc_t desc = {}; desc.pKernelName = kernelName.c_str(); module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgLoad = true; module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = false; module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = false; kernel->initialize(&desc); EXPECT_TRUE(kernel->hasIndirectAccess()); } { std::unique_ptr kernel; kernel = std::make_unique(module.get()); ze_kernel_desc_t desc = {}; desc.pKernelName = kernelName.c_str(); module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgLoad = false; module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = true; module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = false; kernel->initialize(&desc); EXPECT_TRUE(kernel->hasIndirectAccess()); } { std::unique_ptr kernel; kernel = std::make_unique(module.get()); ze_kernel_desc_t desc = {}; desc.pKernelName = kernelName.c_str(); module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgLoad = false; module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgStore = false; module->mockKernelImmData->mockKernelDescriptor->kernelAttributes.hasNonKernelArgAtomic = true; kernel->initialize(&desc); EXPECT_TRUE(kernel->hasIndirectAccess()); } } class KernelPropertiesTests : public ModuleFixture, public ::testing::Test { public: class MockKernel : public KernelImp { public: using KernelImp::kernelHasIndirectAccess; }; void SetUp() override { ModuleFixture::SetUp(); ze_kernel_desc_t kernelDesc = {}; kernelDesc.pKernelName = kernelName.c_str(); ze_result_t res = module->createKernel(&kernelDesc, &kernelHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, res); kernel = static_cast(L0::Kernel::fromHandle(kernelHandle)); kernel->kernelHasIndirectAccess = true; } void TearDown() override { Kernel::fromHandle(kernelHandle)->destroy(); ModuleFixture::TearDown(); } ze_kernel_handle_t kernelHandle; MockKernel *kernel = nullptr; }; TEST_F(KernelPropertiesTests, givenKernelThenCorrectNameIsRetrieved) { size_t kernelSize = 0; ze_result_t res = kernel->getKernelName(&kernelSize, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_EQ(kernelSize, kernelName.length() + 1); size_t alteredKernelSize = kernelSize * 2; res = kernel->getKernelName(&alteredKernelSize, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_EQ(alteredKernelSize, kernelSize); char *kernelNameRetrieved = new char[kernelSize]; res = kernel->getKernelName(&kernelSize, kernelNameRetrieved); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_EQ(0, strncmp(kernelName.c_str(), kernelNameRetrieved, kernelSize)); delete[] kernelNameRetrieved; } TEST_F(KernelPropertiesTests, givenValidKernelThenPropertiesAreRetrieved) { ze_kernel_properties_t kernelProperties = {}; kernelProperties.requiredNumSubGroups = std::numeric_limits::max(); kernelProperties.requiredSubgroupSize = std::numeric_limits::max(); kernelProperties.maxSubgroupSize = std::numeric_limits::max(); kernelProperties.maxNumSubgroups = std::numeric_limits::max(); kernelProperties.localMemSize = std::numeric_limits::max(); kernelProperties.privateMemSize = std::numeric_limits::max(); kernelProperties.spillMemSize = std::numeric_limits::max(); kernelProperties.numKernelArgs = std::numeric_limits::max(); memset(&kernelProperties.uuid.kid, std::numeric_limits::max(), sizeof(kernelProperties.uuid.kid)); memset(&kernelProperties.uuid.mid, std::numeric_limits::max(), sizeof(kernelProperties.uuid.mid)); ze_kernel_properties_t kernelPropertiesBefore = {}; kernelPropertiesBefore = kernelProperties; ze_result_t res = kernel->getProperties(&kernelProperties); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_EQ(6U, kernelProperties.numKernelArgs); EXPECT_EQ(0U, kernelProperties.requiredNumSubGroups); EXPECT_EQ(0U, kernelProperties.requiredSubgroupSize); uint32_t maxSubgroupSize = this->kernel->getKernelDescriptor().kernelAttributes.simdSize; ASSERT_NE(0U, maxSubgroupSize); EXPECT_EQ(maxSubgroupSize, kernelProperties.maxSubgroupSize); uint32_t maxKernelWorkGroupSize = static_cast(this->module->getDevice()->getNEODevice()->getDeviceInfo().maxWorkGroupSize); uint32_t maxNumSubgroups = maxKernelWorkGroupSize / maxSubgroupSize; EXPECT_EQ(maxNumSubgroups, kernelProperties.maxNumSubgroups); EXPECT_EQ(sizeof(float) * 16U, kernelProperties.localMemSize); EXPECT_EQ(0U, kernelProperties.privateMemSize); EXPECT_EQ(0U, kernelProperties.spillMemSize); uint8_t zeroKid[ZE_MAX_KERNEL_UUID_SIZE]; uint8_t zeroMid[ZE_MAX_MODULE_UUID_SIZE]; memset(&zeroKid, 0, ZE_MAX_KERNEL_UUID_SIZE); memset(&zeroMid, 0, ZE_MAX_MODULE_UUID_SIZE); EXPECT_EQ(0, memcmp(&kernelProperties.uuid.kid, &zeroKid, sizeof(kernelProperties.uuid.kid))); EXPECT_EQ(0, memcmp(&kernelProperties.uuid.mid, &zeroMid, sizeof(kernelProperties.uuid.mid))); } TEST_F(KernelPropertiesTests, whenPassingPreferredGroupSizeStructToGetPropertiesThenPreferredMultipleIsReturned) { ze_kernel_properties_t kernelProperties = {}; kernelProperties.stype = ZE_STRUCTURE_TYPE_KERNEL_PROPERTIES; ze_kernel_preferred_group_size_properties_t preferredGroupProperties = {}; preferredGroupProperties.stype = ZE_STRUCTURE_TYPE_KERNEL_PREFERRED_GROUP_SIZE_PROPERTIES; kernelProperties.pNext = &preferredGroupProperties; ze_result_t res = kernel->getProperties(&kernelProperties); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto &hwHelper = NEO::HwHelper::get(module->getDevice()->getHwInfo().platform.eRenderCoreFamily); if (hwHelper.isFusedEuDispatchEnabled(module->getDevice()->getHwInfo(), false)) { EXPECT_EQ(preferredGroupProperties.preferredMultiple, static_cast(kernel->getImmutableData()->getKernelInfo()->getMaxSimdSize()) * 2); } else { EXPECT_EQ(preferredGroupProperties.preferredMultiple, static_cast(kernel->getImmutableData()->getKernelInfo()->getMaxSimdSize())); } } TEST_F(KernelPropertiesTests, whenPassingPreferredGroupSizeStructWithWrongStypeSuccessIsReturnedAndNoFieldsInPreferredGroupSizeStructAreSet) { ze_kernel_properties_t kernelProperties = {}; kernelProperties.stype = ZE_STRUCTURE_TYPE_KERNEL_PROPERTIES; ze_kernel_preferred_group_size_properties_t preferredGroupProperties = {}; preferredGroupProperties.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMPORT_WIN32; kernelProperties.pNext = &preferredGroupProperties; uint32_t dummyPreferredMultiple = 101; preferredGroupProperties.preferredMultiple = dummyPreferredMultiple; ze_result_t res = kernel->getProperties(&kernelProperties); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_EQ(preferredGroupProperties.preferredMultiple, dummyPreferredMultiple); } TEST_F(KernelPropertiesTests, givenValidKernelThenProfilePropertiesAreRetrieved) { zet_profile_properties_t kernelProfileProperties = {}; kernelProfileProperties.flags = std::numeric_limits::max(); kernelProfileProperties.numTokens = std::numeric_limits::max(); ze_result_t res = kernel->getProfileInfo(&kernelProfileProperties); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_EQ(0U, kernelProfileProperties.flags); EXPECT_EQ(0U, kernelProfileProperties.numTokens); } TEST_F(KernelPropertiesTests, whenSettingValidKernelIndirectAccessFlagsThenFlagsAreSetCorrectly) { UnifiedMemoryControls unifiedMemoryControls = kernel->getUnifiedMemoryControls(); EXPECT_EQ(false, unifiedMemoryControls.indirectDeviceAllocationsAllowed); EXPECT_EQ(false, unifiedMemoryControls.indirectHostAllocationsAllowed); EXPECT_EQ(false, unifiedMemoryControls.indirectSharedAllocationsAllowed); ze_kernel_indirect_access_flags_t flags = ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE | ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST | ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED; auto res = kernel->setIndirectAccess(flags); EXPECT_EQ(ZE_RESULT_SUCCESS, res); unifiedMemoryControls = kernel->getUnifiedMemoryControls(); EXPECT_EQ(true, unifiedMemoryControls.indirectDeviceAllocationsAllowed); EXPECT_EQ(true, unifiedMemoryControls.indirectHostAllocationsAllowed); EXPECT_EQ(true, unifiedMemoryControls.indirectSharedAllocationsAllowed); } TEST_F(KernelPropertiesTests, whenCallingGetIndirectAccessAfterSetIndirectAccessWithDeviceFlagThenCorrectFlagIsReturned) { ze_kernel_indirect_access_flags_t flags = ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE; auto res = kernel->setIndirectAccess(flags); EXPECT_EQ(ZE_RESULT_SUCCESS, res); ze_kernel_indirect_access_flags_t returnedFlags; res = kernel->getIndirectAccess(&returnedFlags); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_TRUE(returnedFlags & ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE); EXPECT_FALSE(returnedFlags & ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST); EXPECT_FALSE(returnedFlags & ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED); } TEST_F(KernelPropertiesTests, whenCallingGetIndirectAccessAfterSetIndirectAccessWithHostFlagThenCorrectFlagIsReturned) { ze_kernel_indirect_access_flags_t flags = ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST; auto res = kernel->setIndirectAccess(flags); EXPECT_EQ(ZE_RESULT_SUCCESS, res); ze_kernel_indirect_access_flags_t returnedFlags; res = kernel->getIndirectAccess(&returnedFlags); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_FALSE(returnedFlags & ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE); EXPECT_TRUE(returnedFlags & ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST); EXPECT_FALSE(returnedFlags & ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED); } TEST_F(KernelPropertiesTests, whenCallingGetIndirectAccessAfterSetIndirectAccessWithSharedFlagThenCorrectFlagIsReturned) { ze_kernel_indirect_access_flags_t flags = ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED; auto res = kernel->setIndirectAccess(flags); EXPECT_EQ(ZE_RESULT_SUCCESS, res); ze_kernel_indirect_access_flags_t returnedFlags; res = kernel->getIndirectAccess(&returnedFlags); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_FALSE(returnedFlags & ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE); EXPECT_FALSE(returnedFlags & ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST); EXPECT_TRUE(returnedFlags & ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED); } TEST_F(KernelPropertiesTests, givenValidKernelWithIndirectAccessFlagsAndDisableIndirectAccessSetToZeroThenFlagsAreSet) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.DisableIndirectAccess.set(0); UnifiedMemoryControls unifiedMemoryControls = kernel->getUnifiedMemoryControls(); EXPECT_EQ(false, unifiedMemoryControls.indirectDeviceAllocationsAllowed); EXPECT_EQ(false, unifiedMemoryControls.indirectHostAllocationsAllowed); EXPECT_EQ(false, unifiedMemoryControls.indirectSharedAllocationsAllowed); ze_kernel_indirect_access_flags_t flags = ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE | ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST | ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED; auto res = kernel->setIndirectAccess(flags); EXPECT_EQ(ZE_RESULT_SUCCESS, res); unifiedMemoryControls = kernel->getUnifiedMemoryControls(); EXPECT_TRUE(unifiedMemoryControls.indirectDeviceAllocationsAllowed); EXPECT_TRUE(unifiedMemoryControls.indirectHostAllocationsAllowed); EXPECT_TRUE(unifiedMemoryControls.indirectSharedAllocationsAllowed); } HWTEST2_F(KernelPropertiesTests, whenHasRTCallsIsTrueThenUsesRayTracingIsTrue, MatchAny) { WhiteBoxKernelHw mockKernel; KernelDescriptorRTCallsTrue mockDescriptor = {}; WhiteBox<::L0::KernelImmutableData> mockKernelImmutableData = {}; mockKernelImmutableData.kernelDescriptor = &mockDescriptor; mockKernel.kernelImmData = &mockKernelImmutableData; EXPECT_TRUE(mockKernel.usesRayTracing()); } HWTEST2_F(KernelPropertiesTests, whenHasRTCallsIsFalseThenUsesRayTracingIsFalse, MatchAny) { WhiteBoxKernelHw mockKernel; KernelDescriptorRTCallsFalse mockDescriptor = {}; WhiteBox<::L0::KernelImmutableData> mockKernelImmutableData = {}; mockKernelImmutableData.kernelDescriptor = &mockDescriptor; mockKernel.kernelImmData = &mockKernelImmutableData; EXPECT_FALSE(mockKernel.usesRayTracing()); } using KernelIndirectPropertiesTests = KernelPropertiesTests; TEST_F(KernelIndirectPropertiesTests, whenCallingSetIndirectAccessWithKernelThatHasIndirectAccessThenIndirectAccessIsSet) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.DisableIndirectAccess.set(0); kernel->kernelHasIndirectAccess = true; UnifiedMemoryControls unifiedMemoryControls = kernel->getUnifiedMemoryControls(); EXPECT_EQ(false, unifiedMemoryControls.indirectDeviceAllocationsAllowed); EXPECT_EQ(false, unifiedMemoryControls.indirectHostAllocationsAllowed); EXPECT_EQ(false, unifiedMemoryControls.indirectSharedAllocationsAllowed); ze_kernel_indirect_access_flags_t flags = ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE | ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST | ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED; auto res = kernel->setIndirectAccess(flags); EXPECT_EQ(ZE_RESULT_SUCCESS, res); unifiedMemoryControls = kernel->getUnifiedMemoryControls(); EXPECT_TRUE(unifiedMemoryControls.indirectDeviceAllocationsAllowed); EXPECT_TRUE(unifiedMemoryControls.indirectHostAllocationsAllowed); EXPECT_TRUE(unifiedMemoryControls.indirectSharedAllocationsAllowed); } TEST_F(KernelIndirectPropertiesTests, whenCallingSetIndirectAccessWithKernelThatHasIndirectAccessButWithDisableIndirectAccessSetThenIndirectAccessIsNotSet) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.DisableIndirectAccess.set(1); kernel->kernelHasIndirectAccess = true; UnifiedMemoryControls unifiedMemoryControls = kernel->getUnifiedMemoryControls(); EXPECT_EQ(false, unifiedMemoryControls.indirectDeviceAllocationsAllowed); EXPECT_EQ(false, unifiedMemoryControls.indirectHostAllocationsAllowed); EXPECT_EQ(false, unifiedMemoryControls.indirectSharedAllocationsAllowed); ze_kernel_indirect_access_flags_t flags = ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE | ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST | ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED; auto res = kernel->setIndirectAccess(flags); EXPECT_EQ(ZE_RESULT_SUCCESS, res); unifiedMemoryControls = kernel->getUnifiedMemoryControls(); EXPECT_FALSE(unifiedMemoryControls.indirectDeviceAllocationsAllowed); EXPECT_FALSE(unifiedMemoryControls.indirectHostAllocationsAllowed); EXPECT_FALSE(unifiedMemoryControls.indirectSharedAllocationsAllowed); } TEST_F(KernelIndirectPropertiesTests, whenCallingSetIndirectAccessWithKernelThatHasIndirectAccessAndDisableIndirectAccessNotSetThenIndirectAccessIsSet) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.DisableIndirectAccess.set(0); kernel->kernelHasIndirectAccess = true; UnifiedMemoryControls unifiedMemoryControls = kernel->getUnifiedMemoryControls(); EXPECT_EQ(false, unifiedMemoryControls.indirectDeviceAllocationsAllowed); EXPECT_EQ(false, unifiedMemoryControls.indirectHostAllocationsAllowed); EXPECT_EQ(false, unifiedMemoryControls.indirectSharedAllocationsAllowed); ze_kernel_indirect_access_flags_t flags = ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE | ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST | ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED; auto res = kernel->setIndirectAccess(flags); EXPECT_EQ(ZE_RESULT_SUCCESS, res); unifiedMemoryControls = kernel->getUnifiedMemoryControls(); EXPECT_TRUE(unifiedMemoryControls.indirectDeviceAllocationsAllowed); EXPECT_TRUE(unifiedMemoryControls.indirectHostAllocationsAllowed); EXPECT_TRUE(unifiedMemoryControls.indirectSharedAllocationsAllowed); } TEST_F(KernelIndirectPropertiesTests, whenCallingSetIndirectAccessWithKernelThatDoesNotHaveIndirectAccessThenIndirectAccessIsNotSet) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.DisableIndirectAccess.set(0); kernel->kernelHasIndirectAccess = false; UnifiedMemoryControls unifiedMemoryControls = kernel->getUnifiedMemoryControls(); EXPECT_EQ(false, unifiedMemoryControls.indirectDeviceAllocationsAllowed); EXPECT_EQ(false, unifiedMemoryControls.indirectHostAllocationsAllowed); EXPECT_EQ(false, unifiedMemoryControls.indirectSharedAllocationsAllowed); ze_kernel_indirect_access_flags_t flags = ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE | ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST | ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED; auto res = kernel->setIndirectAccess(flags); EXPECT_EQ(ZE_RESULT_SUCCESS, res); unifiedMemoryControls = kernel->getUnifiedMemoryControls(); EXPECT_FALSE(unifiedMemoryControls.indirectDeviceAllocationsAllowed); EXPECT_FALSE(unifiedMemoryControls.indirectHostAllocationsAllowed); EXPECT_FALSE(unifiedMemoryControls.indirectSharedAllocationsAllowed); } TEST_F(KernelPropertiesTests, givenValidKernelIndirectAccessFlagsSetThenExpectKernelIndirectAllocationsAllowedTrue) { EXPECT_EQ(false, kernel->hasIndirectAllocationsAllowed()); ze_kernel_indirect_access_flags_t flags = ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE; auto res = kernel->setIndirectAccess(flags); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_EQ(true, kernel->hasIndirectAllocationsAllowed()); } TEST_F(KernelPropertiesTests, givenValidKernelAndNoMediavfestateThenSpillMemSizeIsZero) { ze_kernel_properties_t kernelProperties = {}; kernelProperties.spillMemSize = std::numeric_limits::max(); ze_kernel_properties_t kernelPropertiesBefore = {}; kernelPropertiesBefore = kernelProperties; ze_result_t res = kernel->getProperties(&kernelProperties); EXPECT_EQ(ZE_RESULT_SUCCESS, res); L0::ModuleImp *moduleImp = reinterpret_cast(module.get()); NEO::KernelInfo *ki = nullptr; for (uint32_t i = 0; i < moduleImp->getTranslationUnit()->programInfo.kernelInfos.size(); i++) { ki = moduleImp->getTranslationUnit()->programInfo.kernelInfos[i]; if (ki->kernelDescriptor.kernelMetadata.kernelName.compare(0, ki->kernelDescriptor.kernelMetadata.kernelName.size(), kernel->getImmutableData()->getDescriptor().kernelMetadata.kernelName) == 0) { break; } } EXPECT_EQ(0u, kernelProperties.spillMemSize); } TEST_F(KernelPropertiesTests, givenValidKernelAndNollocateStatelessPrivateSurfaceThenPrivateMemSizeIsZero) { ze_kernel_properties_t kernelProperties = {}; kernelProperties.spillMemSize = std::numeric_limits::max(); ze_kernel_properties_t kernelPropertiesBefore = {}; kernelPropertiesBefore = kernelProperties; ze_result_t res = kernel->getProperties(&kernelProperties); EXPECT_EQ(ZE_RESULT_SUCCESS, res); L0::ModuleImp *moduleImp = reinterpret_cast(module.get()); NEO::KernelInfo *ki = nullptr; for (uint32_t i = 0; i < moduleImp->getTranslationUnit()->programInfo.kernelInfos.size(); i++) { ki = moduleImp->getTranslationUnit()->programInfo.kernelInfos[i]; if (ki->kernelDescriptor.kernelMetadata.kernelName.compare(0, ki->kernelDescriptor.kernelMetadata.kernelName.size(), kernel->getImmutableData()->getDescriptor().kernelMetadata.kernelName) == 0) { break; } } EXPECT_EQ(0u, kernelProperties.privateMemSize); } TEST_F(KernelPropertiesTests, givenValidKernelAndLargeSlmIsSetThenForceLargeSlmIsTrue) { EXPECT_EQ(NEO::SlmPolicy::SlmPolicyNone, kernel->getSlmPolicy()); ze_result_t res = kernel->setCacheConfig(ZE_CACHE_CONFIG_FLAG_LARGE_SLM); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_EQ(NEO::SlmPolicy::SlmPolicyLargeSlm, kernel->getSlmPolicy()); } TEST_F(KernelPropertiesTests, givenValidKernelAndLargeDataIsSetThenForceLargeDataIsTrue) { EXPECT_EQ(NEO::SlmPolicy::SlmPolicyNone, kernel->getSlmPolicy()); ze_result_t res = kernel->setCacheConfig(ZE_CACHE_CONFIG_FLAG_LARGE_DATA); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_EQ(NEO::SlmPolicy::SlmPolicyLargeData, kernel->getSlmPolicy()); } using KernelLocalIdsTest = Test; TEST_F(KernelLocalIdsTest, WhenKernelIsCreatedThenDefaultLocalIdGenerationbyRuntimeIsTrue) { createKernel(); EXPECT_TRUE(kernel->requiresGenerationOfLocalIdsByRuntime()); } struct KernelIsaTests : Test { void SetUp() override { Test::SetUp(); auto &capabilityTable = device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable; bool createBcsEngine = !capabilityTable.blitterOperationsSupported; capabilityTable.blitterOperationsSupported = true; if (createBcsEngine) { auto &engine = device->getNEODevice()->getEngine(0); bcsOsContext.reset(OsContext::create(nullptr, 0, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular}, device->getNEODevice()->getDeviceBitfield()))); engine.osContext = bcsOsContext.get(); engine.commandStreamReceiver->setupContext(*bcsOsContext); } } std::unique_ptr bcsOsContext; }; TEST_F(KernelIsaTests, givenKernelAllocationInLocalMemoryWhenCreatingWithoutAllowedCpuAccessThenUseBcsForTransfer) { DebugManagerStateRestore restore; DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast(LocalMemoryAccessMode::CpuAccessDisallowed)); DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast(NEO::AllocationType::KERNEL_ISA) - 1)); uint32_t kernelHeap = 0; KernelInfo kernelInfo; kernelInfo.heapInfo.KernelHeapSize = 1; kernelInfo.heapInfo.pKernelHeap = &kernelHeap; KernelImmutableData kernelImmutableData(device); auto bcsCsr = device->getNEODevice()->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular).commandStreamReceiver; auto initialTaskCount = bcsCsr->peekTaskCount(); kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false); if (kernelImmutableData.getIsaGraphicsAllocation()->isAllocatedInLocalMemoryPool()) { EXPECT_EQ(initialTaskCount + 1, bcsCsr->peekTaskCount()); } else { EXPECT_EQ(initialTaskCount, bcsCsr->peekTaskCount()); } device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation); } TEST_F(KernelIsaTests, givenKernelAllocationInLocalMemoryWhenCreatingWithAllowedCpuAccessThenDontUseBcsForTransfer) { DebugManagerStateRestore restore; DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast(LocalMemoryAccessMode::CpuAccessAllowed)); DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast(NEO::AllocationType::KERNEL_ISA) - 1)); uint32_t kernelHeap = 0; KernelInfo kernelInfo; kernelInfo.heapInfo.KernelHeapSize = 1; kernelInfo.heapInfo.pKernelHeap = &kernelHeap; KernelImmutableData kernelImmutableData(device); auto bcsCsr = device->getNEODevice()->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular).commandStreamReceiver; auto initialTaskCount = bcsCsr->peekTaskCount(); kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false); EXPECT_EQ(initialTaskCount, bcsCsr->peekTaskCount()); device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation); } TEST_F(KernelIsaTests, givenKernelAllocationInLocalMemoryWhenCreatingWithDisallowedCpuAccessAndDisabledBlitterThenFallbackToCpuCopy) { DebugManagerStateRestore restore; DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast(LocalMemoryAccessMode::CpuAccessDisallowed)); DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast(NEO::AllocationType::KERNEL_ISA) - 1)); device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[0]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = false; uint32_t kernelHeap = 0; KernelInfo kernelInfo; kernelInfo.heapInfo.KernelHeapSize = 1; kernelInfo.heapInfo.pKernelHeap = &kernelHeap; KernelImmutableData kernelImmutableData(device); auto bcsCsr = device->getNEODevice()->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular).commandStreamReceiver; auto initialTaskCount = bcsCsr->peekTaskCount(); kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false); EXPECT_EQ(initialTaskCount, bcsCsr->peekTaskCount()); device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation); } TEST_F(KernelIsaTests, givenKernelInfoWhenInitializingImmutableDataWithInternalIsaThenCorrectAllocationTypeIsUsed) { uint32_t kernelHeap = 0; KernelInfo kernelInfo; kernelInfo.heapInfo.KernelHeapSize = 1; kernelInfo.heapInfo.pKernelHeap = &kernelHeap; KernelImmutableData kernelImmutableData(device); kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, true); EXPECT_EQ(NEO::AllocationType::KERNEL_ISA_INTERNAL, kernelImmutableData.getIsaGraphicsAllocation()->getAllocationType()); } TEST_F(KernelIsaTests, givenKernelInfoWhenInitializingImmutableDataWithNonInternalIsaThenCorrectAllocationTypeIsUsed) { uint32_t kernelHeap = 0; KernelInfo kernelInfo; kernelInfo.heapInfo.KernelHeapSize = 1; kernelInfo.heapInfo.pKernelHeap = &kernelHeap; KernelImmutableData kernelImmutableData(device); kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false); EXPECT_EQ(NEO::AllocationType::KERNEL_ISA, kernelImmutableData.getIsaGraphicsAllocation()->getAllocationType()); } TEST_F(KernelIsaTests, givenKernelInfoWhenInitializingImmutableDataWithIsaThenPaddingIsAdded) { uint32_t kernelHeap = 0; KernelInfo kernelInfo; kernelInfo.heapInfo.KernelHeapSize = 1; kernelInfo.heapInfo.pKernelHeap = &kernelHeap; KernelImmutableData kernelImmutableData(device); kernelImmutableData.initialize(&kernelInfo, device, 0, nullptr, nullptr, false); auto graphicsAllocation = kernelImmutableData.getIsaGraphicsAllocation(); auto &hwHelper = NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily); size_t isaPadding = hwHelper.getPaddingForISAAllocation(); EXPECT_EQ(graphicsAllocation->getUnderlyingBufferSize(), kernelInfo.heapInfo.KernelHeapSize + isaPadding); } TEST_F(KernelIsaTests, givenGlobalBuffersWhenCreatingKernelImmutableDataThenBuffersAreAddedToResidencyContainer) { uint32_t kernelHeap = 0; KernelInfo kernelInfo; kernelInfo.heapInfo.KernelHeapSize = 1; kernelInfo.heapInfo.pKernelHeap = &kernelHeap; KernelImmutableData kernelImmutableData(device); uint64_t gpuAddress = 0x1200; void *buffer = reinterpret_cast(gpuAddress); size_t size = 0x1100; NEO::MockGraphicsAllocation globalVarBuffer(buffer, gpuAddress, size); NEO::MockGraphicsAllocation globalConstBuffer(buffer, gpuAddress, size); kernelImmutableData.initialize(&kernelInfo, device, 0, &globalConstBuffer, &globalVarBuffer, false); auto &resCont = kernelImmutableData.getResidencyContainer(); EXPECT_EQ(1, std::count(resCont.begin(), resCont.end(), &globalVarBuffer)); EXPECT_EQ(1, std::count(resCont.begin(), resCont.end(), &globalConstBuffer)); } using KernelImpPatchBindlessTest = Test; TEST_F(KernelImpPatchBindlessTest, GivenKernelImpWhenPatchBindlessOffsetCalledThenOffsetPatchedCorrectly) { Mock kernel; neoDevice->incRefInternal(); neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->createBindlessHeapsHelper(neoDevice->getMemoryManager(), neoDevice->getNumGenericSubDevices() > 1, neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield()); Mock mockModule(device, nullptr); kernel.module = &mockModule; NEO::MockGraphicsAllocation alloc; uint32_t bindless = 0x40; auto &hwHelper = NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily); size_t size = hwHelper.getRenderSurfaceStateSize(); auto expectedSsInHeap = device->getNEODevice()->getBindlessHeapsHelper()->allocateSSInHeap(size, &alloc, NEO::BindlessHeapsHelper::GLOBAL_SSH); auto patchLocation = ptrOffset(kernel.getCrossThreadData(), bindless); auto patchValue = hwHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast(expectedSsInHeap.surfaceStateOffset)); auto ssPtr = kernel.patchBindlessSurfaceState(&alloc, bindless); EXPECT_EQ(ssPtr, expectedSsInHeap.ssPtr); EXPECT_TRUE(memcmp(const_cast(patchLocation), &patchValue, sizeof(patchValue)) == 0); EXPECT_TRUE(std::find(kernel.getResidencyContainer().begin(), kernel.getResidencyContainer().end(), expectedSsInHeap.heapAllocation) != kernel.getResidencyContainer().end()); neoDevice->decRefInternal(); } HWTEST2_F(KernelImpPatchBindlessTest, GivenKernelImpWhenSetSurfaceStateBindlessThenSurfaceStateUpdated, MatchAny) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; ze_kernel_desc_t desc = {}; desc.pKernelName = kernelName.c_str(); WhiteBoxKernelHw mockKernel; mockKernel.module = module.get(); mockKernel.initialize(&desc); auto &arg = const_cast(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].template as()); arg.bindless = 0x40; arg.bindful = undefined; neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->createBindlessHeapsHelper(neoDevice->getMemoryManager(), neoDevice->getNumGenericSubDevices() > 1, neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield()); auto &hwHelper = NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily); size_t size = hwHelper.getRenderSurfaceStateSize(); uint64_t gpuAddress = 0x2000; void *buffer = reinterpret_cast(gpuAddress); NEO::MockGraphicsAllocation mockAllocation(buffer, gpuAddress, size); auto expectedSsInHeap = device->getNEODevice()->getBindlessHeapsHelper()->allocateSSInHeap(size, &mockAllocation, NEO::BindlessHeapsHelper::GLOBAL_SSH); memset(expectedSsInHeap.ssPtr, 0, size); auto surfaceStateBefore = *reinterpret_cast(expectedSsInHeap.ssPtr); mockKernel.setBufferSurfaceState(0, buffer, &mockAllocation); auto surfaceStateAfter = *reinterpret_cast(expectedSsInHeap.ssPtr); EXPECT_FALSE(memcmp(&surfaceStateAfter, &surfaceStateBefore, size) == 0); } HWTEST2_F(KernelImpPatchBindlessTest, GivenKernelImpWhenSetSurfaceStateBindfulThenSurfaceStateNotUpdated, MatchAny) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; ze_kernel_desc_t desc = {}; desc.pKernelName = kernelName.c_str(); WhiteBoxKernelHw mockKernel; mockKernel.module = module.get(); mockKernel.initialize(&desc); auto &arg = const_cast(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].template as()); arg.bindless = undefined; arg.bindful = 0x40; neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->createBindlessHeapsHelper(neoDevice->getMemoryManager(), neoDevice->getNumGenericSubDevices() > 1, neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield()); auto &hwHelper = NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily); size_t size = hwHelper.getRenderSurfaceStateSize(); uint64_t gpuAddress = 0x2000; void *buffer = reinterpret_cast(gpuAddress); NEO::MockGraphicsAllocation mockAllocation(buffer, gpuAddress, size); auto expectedSsInHeap = device->getNEODevice()->getBindlessHeapsHelper()->allocateSSInHeap(size, &mockAllocation, NEO::BindlessHeapsHelper::GLOBAL_SSH); memset(expectedSsInHeap.ssPtr, 0, size); auto surfaceStateBefore = *reinterpret_cast(expectedSsInHeap.ssPtr); mockKernel.setBufferSurfaceState(0, buffer, &mockAllocation); auto surfaceStateAfter = *reinterpret_cast(expectedSsInHeap.ssPtr); EXPECT_TRUE(memcmp(&surfaceStateAfter, &surfaceStateBefore, size) == 0); } using KernelImpL3CachingTests = Test; HWTEST2_F(KernelImpL3CachingTests, GivenKernelImpWhenSetSurfaceStateWithUnalignedMemoryThenL3CachingIsDisabled, MatchAny) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; ze_kernel_desc_t desc = {}; desc.pKernelName = kernelName.c_str(); WhiteBoxKernelHw mockKernel; mockKernel.module = module.get(); mockKernel.initialize(&desc); auto &arg = const_cast(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].template as()); arg.bindless = undefined; arg.bindful = 0x40; neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->createBindlessHeapsHelper(neoDevice->getMemoryManager(), neoDevice->getNumGenericSubDevices() > 1, neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield()); auto &hwHelper = NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily); size_t size = hwHelper.getRenderSurfaceStateSize(); uint64_t gpuAddress = 0x2000; void *buffer = reinterpret_cast(0x20123); NEO::MockGraphicsAllocation mockAllocation(buffer, gpuAddress, size); auto expectedSsInHeap = device->getNEODevice()->getBindlessHeapsHelper()->allocateSSInHeap(size, &mockAllocation, NEO::BindlessHeapsHelper::GLOBAL_SSH); memset(expectedSsInHeap.ssPtr, 0, size); mockKernel.setBufferSurfaceState(0, buffer, &mockAllocation); EXPECT_EQ(mockKernel.getKernelRequiresQueueUncachedMocs(), true); } struct MyMockKernel : public Mock { void setBufferSurfaceState(uint32_t argIndex, void *address, NEO::GraphicsAllocation *alloc) override { setSurfaceStateCalled = true; } ze_result_t setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation) override { return KernelImp::setArgBufferWithAlloc(argIndex, argVal, allocation); } bool setSurfaceStateCalled = false; }; TEST_F(KernelImpPatchBindlessTest, GivenValidBindlessOffsetWhenSetArgBufferWithAllocThensetBufferSurfaceStateCalled) { ze_kernel_desc_t desc = {}; desc.pKernelName = kernelName.c_str(); MyMockKernel mockKernel; mockKernel.module = module.get(); mockKernel.initialize(&desc); auto &arg = const_cast(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].as()); arg.bindless = 0x40; arg.bindful = undefined; NEO::MockGraphicsAllocation alloc; mockKernel.setArgBufferWithAlloc(0, 0x1234, &alloc); EXPECT_TRUE(mockKernel.setSurfaceStateCalled); } TEST_F(KernelImpPatchBindlessTest, GivenValidBindfulOffsetWhenSetArgBufferWithAllocThensetBufferSurfaceStateCalled) { ze_kernel_desc_t desc = {}; desc.pKernelName = kernelName.c_str(); MyMockKernel mockKernel; mockKernel.module = module.get(); mockKernel.initialize(&desc); auto &arg = const_cast(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].as()); arg.bindless = undefined; arg.bindful = 0x40; NEO::MockGraphicsAllocation alloc; mockKernel.setArgBufferWithAlloc(0, 0x1234, &alloc); EXPECT_TRUE(mockKernel.setSurfaceStateCalled); } TEST_F(KernelImpPatchBindlessTest, GivenUndefiedBidfulAndBindlesstOffsetWhenSetArgBufferWithAllocThenSetBufferSurfaceStateIsNotCalled) { ze_kernel_desc_t desc = {}; desc.pKernelName = kernelName.c_str(); MyMockKernel mockKernel; mockKernel.module = module.get(); mockKernel.initialize(&desc); auto &arg = const_cast(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].as()); arg.bindless = undefined; arg.bindful = undefined; NEO::MockGraphicsAllocation alloc; mockKernel.setArgBufferWithAlloc(0, 0x1234, &alloc); EXPECT_FALSE(mockKernel.setSurfaceStateCalled); } using KernelBindlessUncachedMemoryTests = Test; TEST_F(KernelBindlessUncachedMemoryTests, givenBindlessKernelAndAllocDataNoTfoundThenKernelRequiresUncachedMocsIsSet) { ze_kernel_desc_t desc = {}; desc.pKernelName = kernelName.c_str(); MyMockKernel mockKernel; mockKernel.module = module.get(); mockKernel.initialize(&desc); auto &arg = const_cast(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].as()); arg.bindless = undefined; arg.bindful = undefined; NEO::MockGraphicsAllocation alloc; mockKernel.setArgBufferWithAlloc(0, 0x1234, &alloc); EXPECT_FALSE(mockKernel.getKernelRequiresUncachedMocs()); } TEST_F(KernelBindlessUncachedMemoryTests, givenNonUncachedAllocationSetAsArgumentFollowedByNonUncachedAllocationThenRequiresUncachedMocsIsCorrectlySet) { ze_kernel_desc_t desc = {}; desc.pKernelName = kernelName.c_str(); MyMockKernel mockKernel; mockKernel.module = module.get(); mockKernel.initialize(&desc); auto &arg = const_cast(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].as()); arg.bindless = undefined; arg.bindful = undefined; { void *devicePtr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t res = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 0u, &devicePtr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto alloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(devicePtr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); EXPECT_NE(nullptr, alloc); mockKernel.setArgBufferWithAlloc(0, 0x1234, alloc); EXPECT_FALSE(mockKernel.getKernelRequiresUncachedMocs()); context->freeMem(devicePtr); } { void *devicePtr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t res = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 0u, &devicePtr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto alloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(devicePtr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); EXPECT_NE(nullptr, alloc); mockKernel.setArgBufferWithAlloc(0, 0x1234, alloc); EXPECT_FALSE(mockKernel.getKernelRequiresUncachedMocs()); context->freeMem(devicePtr); } } TEST_F(KernelBindlessUncachedMemoryTests, givenUncachedAllocationSetAsArgumentFollowedByUncachedAllocationThenRequiresUncachedMocsIsCorrectlySet) { ze_kernel_desc_t desc = {}; desc.pKernelName = kernelName.c_str(); MyMockKernel mockKernel; mockKernel.module = module.get(); mockKernel.initialize(&desc); auto &arg = const_cast(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].as()); arg.bindless = undefined; arg.bindful = undefined; { void *devicePtr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; deviceDesc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED; ze_result_t res = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 0u, &devicePtr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto alloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(devicePtr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); EXPECT_NE(nullptr, alloc); mockKernel.setArgBufferWithAlloc(0, 0x1234, alloc); EXPECT_TRUE(mockKernel.getKernelRequiresUncachedMocs()); context->freeMem(devicePtr); } { void *devicePtr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; deviceDesc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED; ze_result_t res = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 0u, &devicePtr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto alloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(devicePtr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); EXPECT_NE(nullptr, alloc); mockKernel.setArgBufferWithAlloc(0, 0x1234, alloc); EXPECT_TRUE(mockKernel.getKernelRequiresUncachedMocs()); context->freeMem(devicePtr); } } TEST_F(KernelBindlessUncachedMemoryTests, givenUncachedAllocationSetAsArgumentFollowedByNonUncachedAllocationThenRequiresUncachedMocsIsCorrectlySet) { ze_kernel_desc_t desc = {}; desc.pKernelName = kernelName.c_str(); MyMockKernel mockKernel; mockKernel.module = module.get(); mockKernel.initialize(&desc); auto &arg = const_cast(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].as()); arg.bindless = undefined; arg.bindful = undefined; { void *devicePtr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; deviceDesc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED; ze_result_t res = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 0u, &devicePtr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto alloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(devicePtr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); EXPECT_NE(nullptr, alloc); mockKernel.setArgBufferWithAlloc(0, 0x1234, alloc); EXPECT_TRUE(mockKernel.getKernelRequiresUncachedMocs()); context->freeMem(devicePtr); } { void *devicePtr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t res = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 0u, &devicePtr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto alloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(devicePtr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); EXPECT_NE(nullptr, alloc); mockKernel.setArgBufferWithAlloc(0, 0x1234, alloc); EXPECT_FALSE(mockKernel.getKernelRequiresUncachedMocs()); context->freeMem(devicePtr); } } TEST_F(KernelBindlessUncachedMemoryTests, givenUncachedHostAllocationSetAsArgumentFollowedByNonUncachedHostAllocationThenRequiresUncachedMocsIsCorrectlySet) { ze_kernel_desc_t desc = {}; desc.pKernelName = kernelName.c_str(); MyMockKernel mockKernel; mockKernel.module = module.get(); mockKernel.initialize(&desc); auto &arg = const_cast(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].as()); arg.bindless = undefined; arg.bindful = undefined; { void *ptr = nullptr; ze_host_mem_alloc_desc_t hostDesc = {}; hostDesc.flags = ZE_HOST_MEM_ALLOC_FLAG_BIAS_UNCACHED; ze_result_t res = context->allocHostMem(&hostDesc, 16384u, 0u, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto alloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(ptr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); EXPECT_NE(nullptr, alloc); mockKernel.setArgBufferWithAlloc(0, 0x1234, alloc); EXPECT_TRUE(mockKernel.getKernelRequiresUncachedMocs()); context->freeMem(ptr); } { void *ptr = nullptr; ze_host_mem_alloc_desc_t hostDesc = {}; ze_result_t res = context->allocHostMem(&hostDesc, 16384u, 0u, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto alloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(ptr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); EXPECT_NE(nullptr, alloc); mockKernel.setArgBufferWithAlloc(0, 0x1234, alloc); EXPECT_FALSE(mockKernel.getKernelRequiresUncachedMocs()); context->freeMem(ptr); } } template struct MyMockImage : public WhiteBox<::L0::ImageCoreFamily> { //MyMockImage() : WhiteBox<::L0::ImageCoreFamily>(); void copySurfaceStateToSSH(void *surfaceStateHeap, const uint32_t surfaceStateOffset, bool isMediaBlockArg) override { passedSurfaceStateHeap = surfaceStateHeap; passedSurfaceStateOffset = surfaceStateOffset; } void *passedSurfaceStateHeap = nullptr; uint32_t passedSurfaceStateOffset = 0; }; HWTEST2_F(SetKernelArg, givenImageAndBindlessKernelWhenSetArgImageThenCopySurfaceStateToSSHCalledWithCorrectArgs, ImageSupport) { createKernel(); neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->createBindlessHeapsHelper(neoDevice->getMemoryManager(), neoDevice->getNumGenericSubDevices() > 1, neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield()); auto &imageArg = const_cast(kernel->kernelImmData->getDescriptor().payloadMappings.explicitArgs[3].template as()); auto &addressingMode = kernel->kernelImmData->getDescriptor().kernelAttributes.imageAddressingMode; const_cast(addressingMode) = NEO::KernelDescriptor::Bindless; imageArg.bindless = 0x0; imageArg.bindful = undefined; ze_image_desc_t desc = {}; desc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; auto &hwHelper = NEO::HwHelper::get(neoDevice->getHardwareInfo().platform.eRenderCoreFamily); auto surfaceStateSize = hwHelper.getRenderSurfaceStateSize(); auto imageHW = std::make_unique>(); auto ret = imageHW->initialize(device, &desc); auto handle = imageHW->toHandle(); ASSERT_EQ(ZE_RESULT_SUCCESS, ret); auto expectedSsInHeap = neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->getBindlessHeapsHelper()->allocateSSInHeap(surfaceStateSize, imageHW->getAllocation(), BindlessHeapsHelper::BindlesHeapType::GLOBAL_SSH); kernel->setArgImage(3, sizeof(imageHW.get()), &handle); EXPECT_EQ(imageHW->passedSurfaceStateHeap, expectedSsInHeap.ssPtr); EXPECT_EQ(imageHW->passedSurfaceStateOffset, 0u); } HWTEST2_F(SetKernelArg, givenImageAndBindfulKernelWhenSetArgImageThenCopySurfaceStateToSSHCalledWithCorrectArgs, ImageSupport) { createKernel(); auto &imageArg = const_cast(kernel->kernelImmData->getDescriptor().payloadMappings.explicitArgs[3].template as()); auto addressingMode = const_cast(kernel->kernelImmData->getDescriptor().kernelAttributes.imageAddressingMode); addressingMode = NEO::KernelDescriptor::Bindful; imageArg.bindless = undefined; imageArg.bindful = 0x40; ze_image_desc_t desc = {}; desc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; auto imageHW = std::make_unique>(); auto ret = imageHW->initialize(device, &desc); auto handle = imageHW->toHandle(); ASSERT_EQ(ZE_RESULT_SUCCESS, ret); kernel->setArgImage(3, sizeof(imageHW.get()), &handle); EXPECT_EQ(imageHW->passedSurfaceStateHeap, kernel->getSurfaceStateHeapData()); EXPECT_EQ(imageHW->passedSurfaceStateOffset, imageArg.bindful); } template struct MyMockImageMediaBlock : public WhiteBox<::L0::ImageCoreFamily> { void copySurfaceStateToSSH(void *surfaceStateHeap, const uint32_t surfaceStateOffset, bool isMediaBlockArg) override { isMediaBlockPassedValue = isMediaBlockArg; } bool isMediaBlockPassedValue = false; }; HWTEST2_F(SetKernelArg, givenSupportsMediaBlockAndIsMediaBlockImageWhenSetArgImageIsCalledThenIsMediaBlockArgIsPassedCorrectly, ImageSupport) { auto hwInfo = device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo(); createKernel(); auto argIndex = 3u; auto &arg = const_cast(kernel->kernelImmData->getDescriptor().payloadMappings.explicitArgs[argIndex]); auto imageHW = std::make_unique>(); ze_image_desc_t desc = {}; desc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; auto ret = imageHW->initialize(device, &desc); ASSERT_EQ(ZE_RESULT_SUCCESS, ret); auto handle = imageHW->toHandle(); { hwInfo->capabilityTable.supportsMediaBlock = true; arg.getExtendedTypeInfo().isMediaBlockImage = true; kernel->setArgImage(argIndex, sizeof(imageHW.get()), &handle); EXPECT_TRUE(imageHW->isMediaBlockPassedValue); } { hwInfo->capabilityTable.supportsMediaBlock = false; arg.getExtendedTypeInfo().isMediaBlockImage = true; kernel->setArgImage(argIndex, sizeof(imageHW.get()), &handle); EXPECT_FALSE(imageHW->isMediaBlockPassedValue); } { hwInfo->capabilityTable.supportsMediaBlock = true; arg.getExtendedTypeInfo().isMediaBlockImage = false; kernel->setArgImage(argIndex, sizeof(imageHW.get()), &handle); EXPECT_FALSE(imageHW->isMediaBlockPassedValue); } { hwInfo->capabilityTable.supportsMediaBlock = false; arg.getExtendedTypeInfo().isMediaBlockImage = false; kernel->setArgImage(argIndex, sizeof(imageHW.get()), &handle); EXPECT_FALSE(imageHW->isMediaBlockPassedValue); } } using ImportHostPointerSetKernelArg = Test; TEST_F(ImportHostPointerSetKernelArg, givenHostPointerImportedWhenSettingKernelArgThenUseHostPointerAllocation) { createKernel(); auto ret = driverHandle->importExternalPointer(hostPointer, MemoryConstants::pageSize); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); ret = kernel->setArgBuffer(0, sizeof(hostPointer), &hostPointer); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); ret = driverHandle->releaseImportedPointer(hostPointer); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); } class KernelGlobalWorkOffsetTests : public ModuleFixture, public ::testing::Test { public: void SetUp() override { ModuleFixture::SetUp(); ze_kernel_desc_t kernelDesc = {}; kernelDesc.pKernelName = kernelName.c_str(); ze_result_t res = module->createKernel(&kernelDesc, &kernelHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, res); kernel = L0::Kernel::fromHandle(kernelHandle); } void TearDown() override { Kernel::fromHandle(kernelHandle)->destroy(); ModuleFixture::TearDown(); } ze_kernel_handle_t kernelHandle; L0::Kernel *kernel = nullptr; }; TEST_F(KernelGlobalWorkOffsetTests, givenCallToSetGlobalWorkOffsetThenOffsetsAreSet) { uint32_t globalOffsetx = 10; uint32_t globalOffsety = 20; uint32_t globalOffsetz = 30; ze_result_t res = kernel->setGlobalOffsetExp(globalOffsetx, globalOffsety, globalOffsetz); EXPECT_EQ(ZE_RESULT_SUCCESS, res); KernelImp *kernelImp = static_cast(kernel); EXPECT_EQ(globalOffsetx, kernelImp->getGlobalOffsets()[0]); EXPECT_EQ(globalOffsety, kernelImp->getGlobalOffsets()[1]); EXPECT_EQ(globalOffsetz, kernelImp->getGlobalOffsets()[2]); } TEST_F(KernelGlobalWorkOffsetTests, whenSettingGlobalOffsetThenCrossThreadDataIsPatched) { uint32_t globalOffsetx = 10; uint32_t globalOffsety = 20; uint32_t globalOffsetz = 30; ze_result_t res = kernel->setGlobalOffsetExp(globalOffsetx, globalOffsety, globalOffsetz); EXPECT_EQ(ZE_RESULT_SUCCESS, res); KernelImp *kernelImp = static_cast(kernel); kernelImp->patchGlobalOffset(); const NEO::KernelDescriptor &desc = kernelImp->getImmutableData()->getDescriptor(); auto dst = ArrayRef(kernelImp->getCrossThreadData(), kernelImp->getCrossThreadDataSize()); EXPECT_EQ(*(dst.begin() + desc.payloadMappings.dispatchTraits.globalWorkOffset[0]), globalOffsetx); EXPECT_EQ(*(dst.begin() + desc.payloadMappings.dispatchTraits.globalWorkOffset[1]), globalOffsety); EXPECT_EQ(*(dst.begin() + desc.payloadMappings.dispatchTraits.globalWorkOffset[2]), globalOffsetz); } using KernelWorkDimTests = Test; TEST_F(KernelWorkDimTests, givenGroupCountsWhenPatchingWorkDimThenCrossThreadDataIsPatched) { uint32_t perHwThreadPrivateMemorySizeRequested = 32u; std::unique_ptr mockKernelImmData = std::make_unique(perHwThreadPrivateMemorySizeRequested); createModuleFromBinary(perHwThreadPrivateMemorySizeRequested, false, mockKernelImmData.get()); auto kernel = std::make_unique(module.get()); createKernel(kernel.get()); kernel->setCrossThreadData(sizeof(uint32_t)); mockKernelImmData->mockKernelDescriptor->payloadMappings.dispatchTraits.workDim = 0x0u; auto destinationBuffer = ArrayRef(kernel->getCrossThreadData(), kernel->getCrossThreadDataSize()); auto &kernelDescriptor = mockKernelImmData->getDescriptor(); auto workDimInCrossThreadDataPtr = destinationBuffer.begin() + kernelDescriptor.payloadMappings.dispatchTraits.workDim; EXPECT_EQ(*workDimInCrossThreadDataPtr, 0u); std::array, 8> sizesCountsWorkDim = {{{2, 1, 1, 1, 1, 1, 1}, {1, 1, 1, 1, 1, 1, 1}, {1, 2, 1, 2, 1, 1, 2}, {1, 2, 1, 1, 1, 1, 2}, {1, 1, 1, 1, 2, 1, 2}, {1, 1, 1, 2, 2, 2, 3}, {1, 1, 2, 1, 1, 1, 3}, {1, 1, 1, 1, 1, 2, 3}}}; for (auto &[groupSizeX, groupSizeY, groupSizeZ, groupCountX, groupCountY, groupCountZ, expectedWorkDim] : sizesCountsWorkDim) { ze_result_t res = kernel->setGroupSize(groupSizeX, groupSizeY, groupSizeZ); EXPECT_EQ(res, ZE_RESULT_SUCCESS); kernel->setGroupCount(groupCountX, groupCountY, groupCountZ); EXPECT_EQ(*workDimInCrossThreadDataPtr, expectedWorkDim); } } using KernelPrintHandlerTest = Test; struct MyPrintfHandler : public PrintfHandler { static uint32_t getPrintfSurfaceInitialDataSize() { return PrintfHandler::printfSurfaceInitialDataSize; } }; TEST_F(KernelPrintHandlerTest, whenPrintPrintfOutputIsCalledThenPrintfBufferIsUsed) { ze_kernel_desc_t desc = {}; desc.pKernelName = kernelName.c_str(); kernel = std::make_unique>(); kernel->module = module.get(); kernel->initialize(&desc); EXPECT_FALSE(kernel->printfBuffer == nullptr); kernel->printPrintfOutput(); auto buffer = *reinterpret_cast(kernel->printfBuffer->getUnderlyingBuffer()); EXPECT_EQ(buffer, MyPrintfHandler::getPrintfSurfaceInitialDataSize()); } using PrintfTest = Test; TEST_F(PrintfTest, givenKernelWithPrintfThenPrintfBufferIsCreated) { Mock mockModule(this->device, nullptr); Mock mockKernel; mockKernel.descriptor.kernelAttributes.flags.usesPrintf = true; mockKernel.module = &mockModule; EXPECT_TRUE(mockKernel.getImmutableData()->getDescriptor().kernelAttributes.flags.usesPrintf); ze_kernel_desc_t kernelDesc = {}; kernelDesc.pKernelName = "mock"; mockKernel.createPrintfBuffer(); EXPECT_NE(nullptr, mockKernel.getPrintfBufferAllocation()); } TEST_F(PrintfTest, GivenKernelNotUsingPrintfWhenCreatingPrintfBufferThenAllocationIsNotCreated) { Mock mockModule(this->device, nullptr); Mock mockKernel; mockKernel.descriptor.kernelAttributes.flags.usesPrintf = false; mockKernel.module = &mockModule; ze_kernel_desc_t kernelDesc = {}; kernelDesc.pKernelName = "mock"; mockKernel.createPrintfBuffer(); EXPECT_EQ(nullptr, mockKernel.getPrintfBufferAllocation()); } TEST_F(PrintfTest, WhenCreatingPrintfBufferThenAllocationAddedToResidencyContainer) { Mock mockModule(this->device, nullptr); Mock mockKernel; mockKernel.descriptor.kernelAttributes.flags.usesPrintf = true; mockKernel.module = &mockModule; ze_kernel_desc_t kernelDesc = {}; kernelDesc.pKernelName = "mock"; mockKernel.createPrintfBuffer(); auto printfBufferAllocation = mockKernel.getPrintfBufferAllocation(); EXPECT_NE(nullptr, printfBufferAllocation); EXPECT_NE(0u, mockKernel.residencyContainer.size()); EXPECT_EQ(mockKernel.residencyContainer[mockKernel.residencyContainer.size() - 1], printfBufferAllocation); } TEST_F(PrintfTest, WhenCreatingPrintfBufferThenCrossThreadDataIsPatched) { Mock mockModule(this->device, nullptr); Mock mockKernel; mockKernel.descriptor.kernelAttributes.flags.usesPrintf = true; mockKernel.module = &mockModule; ze_kernel_desc_t kernelDesc = {}; kernelDesc.pKernelName = "mock"; auto crossThreadData = std::make_unique(4); mockKernel.descriptor.payloadMappings.implicitArgs.printfSurfaceAddress.stateless = 0; mockKernel.descriptor.payloadMappings.implicitArgs.printfSurfaceAddress.pointerSize = sizeof(uintptr_t); mockKernel.crossThreadData.reset(reinterpret_cast(crossThreadData.get())); mockKernel.crossThreadDataSize = sizeof(uint32_t[4]); mockKernel.createPrintfBuffer(); auto printfBufferAllocation = mockKernel.getPrintfBufferAllocation(); EXPECT_NE(nullptr, printfBufferAllocation); auto printfBufferAddressPatched = *reinterpret_cast(crossThreadData.get()); auto printfBufferGpuAddressOffset = static_cast(printfBufferAllocation->getGpuAddressToPatch()); EXPECT_EQ(printfBufferGpuAddressOffset, printfBufferAddressPatched); mockKernel.crossThreadData.release(); } using KernelPatchtokensPrintfStringMapTests = Test; TEST_F(KernelPatchtokensPrintfStringMapTests, givenKernelWithPrintfStringsMapUsageEnabledWhenPrintOutputThenProperStringIsPrinted) { std::unique_ptr mockKernelImmData = std::make_unique(0u); auto kernelDescriptor = mockKernelImmData->kernelDescriptor; kernelDescriptor->kernelAttributes.flags.usesPrintf = true; kernelDescriptor->kernelAttributes.flags.usesStringMapForPrintf = true; kernelDescriptor->kernelAttributes.binaryFormat = DeviceBinaryFormat::Patchtokens; std::string expectedString("test123"); kernelDescriptor->kernelMetadata.printfStringsMap.insert(std::make_pair(0u, expectedString)); createModuleFromBinary(0u, false, mockKernelImmData.get()); auto kernel = std::make_unique(module.get()); ze_kernel_desc_t kernelDesc{ZE_STRUCTURE_TYPE_KERNEL_DESC}; kernel->initialize(&kernelDesc); auto printfAllocation = reinterpret_cast(kernel->getPrintfBufferAllocation()->getUnderlyingBuffer()); printfAllocation[0] = 8; printfAllocation[1] = 0; testing::internal::CaptureStdout(); kernel->printPrintfOutput(); std::string output = testing::internal::GetCapturedStdout(); EXPECT_STREQ(expectedString.c_str(), output.c_str()); } TEST_F(KernelPatchtokensPrintfStringMapTests, givenKernelWithPrintfStringsMapUsageDisabledAndNoImplicitArgsWhenPrintOutputThenNothingIsPrinted) { std::unique_ptr mockKernelImmData = std::make_unique(0u); auto kernelDescriptor = mockKernelImmData->kernelDescriptor; kernelDescriptor->kernelAttributes.flags.usesPrintf = true; kernelDescriptor->kernelAttributes.flags.usesStringMapForPrintf = false; kernelDescriptor->kernelAttributes.flags.requiresImplicitArgs = false; kernelDescriptor->kernelAttributes.binaryFormat = DeviceBinaryFormat::Patchtokens; std::string expectedString("test123"); kernelDescriptor->kernelMetadata.printfStringsMap.insert(std::make_pair(0u, expectedString)); createModuleFromBinary(0u, false, mockKernelImmData.get()); auto kernel = std::make_unique(module.get()); ze_kernel_desc_t kernelDesc{ZE_STRUCTURE_TYPE_KERNEL_DESC}; kernel->initialize(&kernelDesc); auto printfAllocation = reinterpret_cast(kernel->getPrintfBufferAllocation()->getUnderlyingBuffer()); printfAllocation[0] = 8; printfAllocation[1] = 0; testing::internal::CaptureStdout(); kernel->printPrintfOutput(); std::string output = testing::internal::GetCapturedStdout(); EXPECT_STREQ("", output.c_str()); } TEST_F(KernelPatchtokensPrintfStringMapTests, givenKernelWithPrintfStringsMapUsageDisabledAndWithImplicitArgsWhenPrintOutputThenOutputIsPrinted) { std::unique_ptr mockKernelImmData = std::make_unique(0u); auto kernelDescriptor = mockKernelImmData->kernelDescriptor; kernelDescriptor->kernelAttributes.flags.usesPrintf = true; kernelDescriptor->kernelAttributes.flags.usesStringMapForPrintf = false; kernelDescriptor->kernelAttributes.flags.requiresImplicitArgs = true; kernelDescriptor->kernelAttributes.binaryFormat = DeviceBinaryFormat::Patchtokens; std::string expectedString("test123"); kernelDescriptor->kernelMetadata.printfStringsMap.insert(std::make_pair(0u, expectedString)); createModuleFromBinary(0u, false, mockKernelImmData.get()); auto kernel = std::make_unique(module.get()); ze_kernel_desc_t kernelDesc{ZE_STRUCTURE_TYPE_KERNEL_DESC}; kernel->initialize(&kernelDesc); auto printfAllocation = reinterpret_cast(kernel->getPrintfBufferAllocation()->getUnderlyingBuffer()); printfAllocation[0] = 8; printfAllocation[1] = 0; testing::internal::CaptureStdout(); kernel->printPrintfOutput(); std::string output = testing::internal::GetCapturedStdout(); EXPECT_STREQ(expectedString.c_str(), output.c_str()); } using KernelImplicitArgTests = Test; TEST_F(KernelImplicitArgTests, givenKernelWithImplicitArgsWhenInitializeThenPrintfSurfaceIsCreatedAndProperlyPatchedInImplicitArgs) { std::unique_ptr mockKernelImmData = std::make_unique(0u); mockKernelImmData->kernelDescriptor->kernelAttributes.flags.requiresImplicitArgs = true; mockKernelImmData->kernelDescriptor->kernelAttributes.flags.usesPrintf = false; createModuleFromBinary(0u, false, mockKernelImmData.get()); auto kernel = std::make_unique(module.get()); ze_kernel_desc_t kernelDesc{ZE_STRUCTURE_TYPE_KERNEL_DESC}; kernel->initialize(&kernelDesc); EXPECT_TRUE(kernel->getKernelDescriptor().kernelAttributes.flags.requiresImplicitArgs); auto pImplicitArgs = kernel->getImplicitArgs(); ASSERT_NE(nullptr, pImplicitArgs); auto printfSurface = kernel->getPrintfBufferAllocation(); ASSERT_NE(nullptr, printfSurface); EXPECT_NE(0u, pImplicitArgs->printfBufferPtr); EXPECT_EQ(printfSurface->getGpuAddress(), pImplicitArgs->printfBufferPtr); } TEST_F(KernelImplicitArgTests, givenImplicitArgsRequiredWhenCreatingKernelThenImplicitArgsAreCreated) { std::unique_ptr mockKernelImmData = std::make_unique(0u); mockKernelImmData->kernelDescriptor->kernelAttributes.flags.requiresImplicitArgs = true; createModuleFromBinary(0u, false, mockKernelImmData.get()); auto kernel = std::make_unique(module.get()); ze_kernel_desc_t kernelDesc{ZE_STRUCTURE_TYPE_KERNEL_DESC}; kernel->initialize(&kernelDesc); EXPECT_TRUE(kernel->getKernelDescriptor().kernelAttributes.flags.requiresImplicitArgs); auto pImplicitArgs = kernel->getImplicitArgs(); ASSERT_NE(nullptr, pImplicitArgs); EXPECT_EQ(sizeof(ImplicitArgs), pImplicitArgs->structSize); EXPECT_EQ(0u, pImplicitArgs->structVersion); } TEST_F(KernelImplicitArgTests, givenKernelWithImplicitArgsWhenSettingKernelParamsThenImplicitArgsAreUpdated) { std::unique_ptr mockKernelImmData = std::make_unique(0u); mockKernelImmData->kernelDescriptor->kernelAttributes.flags.requiresImplicitArgs = true; auto simd = mockKernelImmData->kernelDescriptor->kernelAttributes.simdSize; createModuleFromBinary(0u, false, mockKernelImmData.get()); auto kernel = std::make_unique(module.get()); ze_kernel_desc_t kernelDesc{ZE_STRUCTURE_TYPE_KERNEL_DESC}; kernel->initialize(&kernelDesc); EXPECT_TRUE(kernel->getKernelDescriptor().kernelAttributes.flags.requiresImplicitArgs); auto pImplicitArgs = kernel->getImplicitArgs(); ASSERT_NE(nullptr, pImplicitArgs); ImplicitArgs expectedImplicitArgs{sizeof(ImplicitArgs)}; expectedImplicitArgs.numWorkDim = 3; expectedImplicitArgs.simdWidth = simd; expectedImplicitArgs.localSizeX = 4; expectedImplicitArgs.localSizeY = 5; expectedImplicitArgs.localSizeZ = 6; expectedImplicitArgs.globalSizeX = 12; expectedImplicitArgs.globalSizeY = 10; expectedImplicitArgs.globalSizeZ = 6; expectedImplicitArgs.globalOffsetX = 1; expectedImplicitArgs.globalOffsetY = 2; expectedImplicitArgs.globalOffsetZ = 3; expectedImplicitArgs.groupCountX = 3; expectedImplicitArgs.groupCountY = 2; expectedImplicitArgs.groupCountZ = 1; expectedImplicitArgs.printfBufferPtr = kernel->getPrintfBufferAllocation()->getGpuAddress(); kernel->setGroupSize(4, 5, 6); kernel->setGroupCount(3, 2, 1); kernel->setGlobalOffsetExp(1, 2, 3); kernel->patchGlobalOffset(); EXPECT_EQ(0, memcmp(pImplicitArgs, &expectedImplicitArgs, sizeof(ImplicitArgs))); } using MultiTileModuleTest = Test; HWTEST2_F(MultiTileModuleTest, GivenMultiTileDeviceWhenSettingKernelArgAndSurfaceStateThenMultiTileFlagsAreSetCorrectly, IsXeHpCore) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; ze_kernel_desc_t desc = {}; desc.pKernelName = kernelName.c_str(); WhiteBoxKernelHw mockKernel; mockKernel.module = modules[0].get(); mockKernel.initialize(&desc); auto &arg = const_cast(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].template as()); arg.bindless = undefined; arg.bindful = 0x40; constexpr size_t size = 128; uint64_t gpuAddress = 0x2000; char bufferArray[size] = {}; void *buffer = reinterpret_cast(bufferArray); NEO::MockGraphicsAllocation mockAllocation(buffer, gpuAddress, size); mockKernel.setBufferSurfaceState(0, buffer, &mockAllocation); void *surfaceStateAddress = ptrOffset(mockKernel.surfaceStateHeapData.get(), arg.bindful); RENDER_SURFACE_STATE *surfaceState = reinterpret_cast(surfaceStateAddress); EXPECT_FALSE(surfaceState->getDisableSupportForMultiGpuAtomics()); EXPECT_FALSE(surfaceState->getDisableSupportForMultiGpuPartialWrites()); } } // namespace ult } // namespace L0 test_sampler_patch_value.cpp000066400000000000000000000016371422164147700343040ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/kernel/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/kernel/sampler_patch_values.h" using namespace L0; TEST(SamplerPatchValueTest, givenSamplerAddressingModeWhenGetingPathValueThenCorrectValueReturned) { EXPECT_EQ(getAddrMode(ZE_SAMPLER_ADDRESS_MODE_REPEAT), SamplerPatchValues::AddressRepeat); EXPECT_EQ(getAddrMode(ZE_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER), SamplerPatchValues::AddressClampToBorder); EXPECT_EQ(getAddrMode(ZE_SAMPLER_ADDRESS_MODE_CLAMP), SamplerPatchValues::AddressClampToEdge); EXPECT_EQ(getAddrMode(ZE_SAMPLER_ADDRESS_MODE_NONE), SamplerPatchValues::AddressNone); EXPECT_EQ(getAddrMode(ZE_SAMPLER_ADDRESS_MODE_MIRROR), SamplerPatchValues::AddressMirroredRepeat); EXPECT_EQ(getAddrMode(ZE_SAMPLER_ADDRESS_MODE_FORCE_UINT32), SamplerPatchValues::AddressNone); }compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/loader/000077500000000000000000000000001422164147700265615ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/loader/CMakeLists.txt000066400000000000000000000003521422164147700313210ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_loader.cpp ) compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/loader/test_loader.cpp000066400000000000000000000333761422164147700316060ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include #include "ze_ddi_tables.h" namespace L0 { namespace ult { TEST(zeGetDriverProcAddrTableTest, whenCallingZeGetDriverProcAddrTableWithCorrectMajorVersionThenSuccessIsReturnedAndMinorVersionIsIgnored) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(1u, 64u)); ze_driver_dditable_t pDdiTable = {}; ze_result_t result = zeGetDriverProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST(zeGetMemProcAddrTableTest, whenCallingZeGetMemProcAddrTableWithCorrectMajorVersionThenSuccessIsReturnedAndMinorVersionIsIgnored) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(1u, 64u)); ze_mem_dditable_t pDdiTable = {}; ze_result_t result = zeGetMemProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST(zeGetContextProcAddrTableTest, whenCallingZeGetContextProcAddrTableWithCorrectMajorVersionThenSuccessIsReturnedAndMinorVersionIsIgnored) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(1u, 64u)); ze_context_dditable_t pDdiTable = {}; ze_result_t result = zeGetContextProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST(zeGetPhysicalMemProcAddrTableTest, whenCallingZeGetPhysicalMemProcAddrTableWithCorrectMajorVersionThenSuccessIsReturnedAndMinorVersionIsIgnored) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(1u, 64u)); ze_physical_mem_dditable_t pDdiTable = {}; ze_result_t result = zeGetPhysicalMemProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST(zeGetVirtualMemProcAddrTableTest, whenCallingZeGetVirtualMemProcAddrTableWithCorrectMajorVersionThenSuccessIsReturnedAndMinorVersionIsIgnored) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(1u, 64u)); ze_virtual_mem_dditable_t pDdiTable = {}; ze_result_t result = zeGetVirtualMemProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST(zeGetGlobalProcAddrTableTest, whenCallingZeGetGlobalProcAddrTableWithCorrectMajorVersionThenSuccessIsReturnedAndMinorVersionIsIgnored) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(1u, 64u)); ze_global_dditable_t pDdiTable = {}; ze_result_t result = zeGetGlobalProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST(zeGetDeviceProcAddrTableTest, whenCallingZeGetDeviceProcAddrTableWithCorrectMajorVersionThenSuccessIsReturnedAndMinorVersionIsIgnored) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(1u, 64u)); ze_device_dditable_t pDdiTable = {}; ze_result_t result = zeGetDeviceProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST(zeGetCommandQueueProcAddrTableTest, whenCallingZeGetCommandQueueProcAddrTableWithCorrectMajorVersionThenSuccessIsReturnedAndMinorVersionIsIgnored) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(1u, 64u)); ze_command_queue_dditable_t pDdiTable = {}; ze_result_t result = zeGetCommandQueueProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST(zeGetCommandListProcAddrTableTest, whenCallingZeGetCommandListProcAddrTableWithCorrectMajorVersionThenSuccessIsReturnedAndMinorVersionIsIgnored) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(1u, 64u)); ze_command_list_dditable_t pDdiTable = {}; ze_result_t result = zeGetCommandListProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST(zeGetFenceProcAddrTableTest, whenCallingZeGetFenceProcAddrTableWithCorrectMajorVersionThenSuccessIsReturnedAndMinorVersionIsIgnored) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(1u, 64u)); ze_fence_dditable_t pDdiTable = {}; ze_result_t result = zeGetFenceProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST(zeGetEventPoolProcAddrTableTest, whenCallingZeGetEventPoolProcAddrTableWithCorrectMajorVersionThenSuccessIsReturnedAndMinorVersionIsIgnored) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(1u, 64u)); ze_event_pool_dditable_t pDdiTable = {}; ze_result_t result = zeGetEventPoolProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST(zeGetEventProcAddrTableTest, whenCallingZeGetEventProcAddrTableWithCorrectMajorVersionThenSuccessIsReturnedAndMinorVersionIsIgnored) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(1u, 64u)); ze_event_dditable_t pDdiTable = {}; ze_result_t result = zeGetEventProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST(zeGetImageProcAddrTableTest, whenCallingZeGetImageProcAddrTableWithCorrectMajorVersionThenSuccessIsReturnedAndMinorVersionIsIgnored) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(1u, 64u)); ze_image_dditable_t pDdiTable = {}; ze_result_t result = zeGetImageProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST(zeGetModuleProcAddrTableTest, whenCallingZeGetModuleProcAddrTableWithCorrectMajorVersionThenSuccessIsReturnedAndMinorVersionIsIgnored) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(1u, 64u)); ze_module_dditable_t pDdiTable = {}; ze_result_t result = zeGetModuleProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST(zeGetModuleBuildLogProcAddrTableTest, whenCallingZeGetModuleBuildLogProcAddrTableWithCorrectMajorVersionThenSuccessIsReturnedAndMinorVersionIsIgnored) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(1u, 64u)); ze_module_build_log_dditable_t pDdiTable = {}; ze_result_t result = zeGetModuleBuildLogProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST(zeGetKernelProcAddrTableTest, whenCallingZeGetKernelProcAddrTableWithCorrectMajorVersionThenSuccessIsReturnedAndMinorVersionIsIgnored) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(1u, 64u)); ze_kernel_dditable_t pDdiTable = {}; ze_result_t result = zeGetKernelProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST(zeGetSamplerProcAddrTableTest, whenCallingZeGetSamplerProcAddrTableWithCorrectMajorVersionThenSuccessIsReturnedAndMinorVersionIsIgnored) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(1u, 64u)); ze_sampler_dditable_t pDdiTable = {}; ze_result_t result = zeGetSamplerProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST(zeGetDriverProcAddrTableTest, whenCallingZeGetDriverProcAddrTableWithGreaterThanAllowedMajorVersionThenUnitializedIsReturned) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(64u, 0u)); ze_driver_dditable_t pDdiTable = {}; ze_result_t result = zeGetDriverProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_VERSION, result); } TEST(zeGetMemProcAddrTableTest, whenCallingZeGetMemProcAddrTableWithGreaterThanAllowedMajorVersionThenUnitializedIsReturned) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(64u, 0u)); ze_mem_dditable_t pDdiTable = {}; ze_result_t result = zeGetMemProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_VERSION, result); } TEST(zeGetContextProcAddrTableTest, whenCallingZeGetContextProcAddrTableWithGreaterThanAllowedMajorVersionThenUnitializedIsReturned) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(64u, 0u)); ze_context_dditable_t pDdiTable = {}; ze_result_t result = zeGetContextProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_VERSION, result); } TEST(zeGetPhysicalMemProcAddrTableTest, whenCallingZeGetPhysicalMemProcAddrTableWithGreaterThanAllowedMajorVersionThenUnitializedIsReturned) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(64u, 0u)); ze_physical_mem_dditable_t pDdiTable = {}; ze_result_t result = zeGetPhysicalMemProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_VERSION, result); } TEST(zeGetVirtualMemProcAddrTableTest, whenCallingZeGetVirtualMemProcAddrTableWithGreaterThanAllowedMajorVersionThenUnitializedIsReturned) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(64u, 0u)); ze_virtual_mem_dditable_t pDdiTable = {}; ze_result_t result = zeGetVirtualMemProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_VERSION, result); } TEST(zeGetGlobalProcAddrTableTest, whenCallingZeGetGlobalProcAddrTableWithGreaterThanAllowedMajorVersionThenUnitializedIsReturned) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(64u, 0u)); ze_global_dditable_t pDdiTable = {}; ze_result_t result = zeGetGlobalProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_VERSION, result); } TEST(zeGetDeviceProcAddrTableTest, whenCallingZeGetDeviceProcAddrTableWithGreaterThanAllowedMajorVersionThenUnitializedIsReturned) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(64u, 0u)); ze_device_dditable_t pDdiTable = {}; ze_result_t result = zeGetDeviceProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_VERSION, result); } TEST(zeGetCommandQueueProcAddrTableTest, whenCallingZeGetCommandQueueProcAddrTableWithGreaterThanAllowedMajorVersionThenUnitializedIsReturned) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(64u, 0u)); ze_command_queue_dditable_t pDdiTable = {}; ze_result_t result = zeGetCommandQueueProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_VERSION, result); } TEST(zeGetCommandListProcAddrTableTest, whenCallingZeGetCommandListProcAddrTableWithGreaterThanAllowedMajorVersionThenUnitializedIsReturned) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(64u, 0u)); ze_command_list_dditable_t pDdiTable = {}; ze_result_t result = zeGetCommandListProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_VERSION, result); } TEST(zeGetFenceProcAddrTableTest, whenCallingZeGetFenceProcAddrTableWithGreaterThanAllowedMajorVersionThenUnitializedIsReturned) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(64u, 0u)); ze_fence_dditable_t pDdiTable = {}; ze_result_t result = zeGetFenceProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_VERSION, result); } TEST(zeGetEventPoolProcAddrTableTest, whenCallingZeGetEventPoolProcAddrTableWithGreaterThanAllowedMajorVersionThenUnitializedIsReturned) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(64u, 0u)); ze_event_pool_dditable_t pDdiTable = {}; ze_result_t result = zeGetEventPoolProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_VERSION, result); } TEST(zeGetEventProcAddrTableTest, whenCallingZeGetEventProcAddrTableWithGreaterThanAllowedMajorVersionThenUnitializedIsReturned) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(64u, 0u)); ze_event_dditable_t pDdiTable = {}; ze_result_t result = zeGetEventProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_VERSION, result); } TEST(zeGetImageProcAddrTableTest, whenCallingZeGetImageProcAddrTableWithGreaterThanAllowedMajorVersionThenUnitializedIsReturned) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(64u, 0u)); ze_image_dditable_t pDdiTable = {}; ze_result_t result = zeGetImageProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_VERSION, result); } TEST(zeGetModuleProcAddrTableTest, whenCallingZeGetModuleProcAddrTableWithGreaterThanAllowedMajorVersionThenUnitializedIsReturned) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(64u, 0u)); ze_module_dditable_t pDdiTable = {}; ze_result_t result = zeGetModuleProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_VERSION, result); } TEST(zeGetModuleBuildLogProcAddrTableTest, whenCallingZeGetModuleBuildLogProcAddrTableWithGreaterThanAllowedMajorVersionThenUnitializedIsReturned) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(64u, 0u)); ze_module_build_log_dditable_t pDdiTable = {}; ze_result_t result = zeGetModuleBuildLogProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_VERSION, result); } TEST(zeGetKernelProcAddrTableTest, whenCallingZeGetKernelProcAddrTableWithGreaterThanAllowedMajorVersionThenUnitializedIsReturned) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(64u, 0u)); ze_kernel_dditable_t pDdiTable = {}; ze_result_t result = zeGetKernelProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_VERSION, result); } TEST(zeGetSamplerProcAddrTableTest, whenCallingZeGetSamplerProcAddrTableWithGreaterThanAllowedMajorVersionThenUnitializedIsReturned) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(64u, 0u)); ze_sampler_dditable_t pDdiTable = {}; ze_result_t result = zeGetSamplerProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_VERSION, result); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/memory/000077500000000000000000000000001422164147700266235ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/memory/CMakeLists.txt000066400000000000000000000005061422164147700313640ustar00rootroot00000000000000# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_memory.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_memory_${DRIVER_MODEL}/test_memory.cpp ) compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp000066400000000000000000004366431422164147700317160ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/file_io.h" #include "shared/source/helpers/string.h" #include "shared/source/memory_manager/memory_operations_status.h" #include "shared/source/os_interface/device_factory.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/test_files.h" #include "shared/test/common/mocks/mock_compilers.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/cmdlist/cmdlist_hw.h" #include "level_zero/core/source/context/context_imp.h" #include "level_zero/core/source/device/device_imp.h" #include "level_zero/core/source/driver/host_pointer_manager.h" #include "level_zero/core/source/hw_helpers/l0_hw_helper.h" #include "level_zero/core/source/image/image.h" #include "level_zero/core/source/image/image_hw.h" #include "level_zero/core/source/memory/memory_operations_helper.h" #include "level_zero/core/source/module/module.h" #include "level_zero/core/source/module/module_imp.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/fixtures/memory_ipc_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_built_ins.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_kernel.h" namespace L0 { namespace ult { using MemoryTest = Test; struct CompressionMemoryTest : public MemoryTest { GraphicsAllocation *allocDeviceMem(size_t size) { ptr = nullptr; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, 4096, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); NEO::SvmAllocationData *allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr); EXPECT_NE(nullptr, allocData); auto allocation = allocData->gpuAllocations.getDefaultGraphicsAllocation(); EXPECT_NE(nullptr, allocation); return allocation; } DebugManagerStateRestore restore; ze_device_mem_alloc_desc_t deviceDesc = {}; void *ptr = nullptr; }; HWTEST2_F(CompressionMemoryTest, givenDeviceUsmWhenAllocatingThenEnableCompressionIfPossible, IsAtLeastSkl) { device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.ftrRenderCompressedBuffers = true; auto &hwInfo = device->getHwInfo(); auto &l0HwHelper = L0HwHelperHw::get(); auto &hwHelper = NEO::HwHelperHw::get(); // Default path { auto allocation = allocDeviceMem(2048); auto supportedByDefault = l0HwHelper.usmCompressionSupported(hwInfo) && l0HwHelper.forceDefaultUsmCompressionSupport(); EXPECT_EQ(supportedByDefault, allocation->isCompressionEnabled()); context->freeMem(ptr); } // Compressed hint { NEO::DebugManager.flags.RenderCompressedBuffersEnabled.set(1); ze_external_memory_import_win32_handle_t compressionHint = {}; compressionHint.stype = ZE_STRUCTURE_TYPE_MEMORY_COMPRESSION_HINTS_EXT_DESC; compressionHint.flags = ZE_MEMORY_COMPRESSION_HINTS_EXT_FLAG_COMPRESSED; deviceDesc.pNext = &compressionHint; auto allocation = allocDeviceMem(2048); EXPECT_EQ(hwHelper.isBufferSizeSuitableForCompression(2048, hwInfo), allocation->isCompressionEnabled()); context->freeMem(ptr); deviceDesc.pNext = nullptr; NEO::DebugManager.flags.RenderCompressedBuffersEnabled.set(-1); } // Compressed hint { NEO::DebugManager.flags.RenderCompressedBuffersEnabled.set(1); NEO::DebugManager.flags.OverrideBufferSuitableForRenderCompression.set(1); ze_external_memory_import_win32_handle_t compressionHint = {}; compressionHint.stype = ZE_STRUCTURE_TYPE_MEMORY_COMPRESSION_HINTS_EXT_DESC; compressionHint.flags = ZE_MEMORY_COMPRESSION_HINTS_EXT_FLAG_COMPRESSED; deviceDesc.pNext = &compressionHint; auto allocation = allocDeviceMem(2048); EXPECT_TRUE(allocation->isCompressionEnabled()); context->freeMem(ptr); deviceDesc.pNext = nullptr; NEO::DebugManager.flags.RenderCompressedBuffersEnabled.set(-1); NEO::DebugManager.flags.OverrideBufferSuitableForRenderCompression.set(-1); } // Compressed hint without debug flag { ze_external_memory_import_win32_handle_t compressionHint = {}; compressionHint.stype = ZE_STRUCTURE_TYPE_MEMORY_COMPRESSION_HINTS_EXT_DESC; compressionHint.flags = ZE_MEMORY_COMPRESSION_HINTS_EXT_FLAG_COMPRESSED; deviceDesc.pNext = &compressionHint; auto allocation = allocDeviceMem(2048); EXPECT_EQ(l0HwHelper.usmCompressionSupported(hwInfo), allocation->isCompressionEnabled()); context->freeMem(ptr); deviceDesc.pNext = nullptr; NEO::DebugManager.flags.RenderCompressedBuffersEnabled.set(-1); } // Uncompressed hint { NEO::DebugManager.flags.RenderCompressedBuffersEnabled.set(1); ze_external_memory_import_win32_handle_t compressionHint = {}; compressionHint.stype = ZE_STRUCTURE_TYPE_MEMORY_COMPRESSION_HINTS_EXT_DESC; compressionHint.flags = ZE_MEMORY_COMPRESSION_HINTS_EXT_FLAG_UNCOMPRESSED; deviceDesc.pNext = &compressionHint; auto allocation = allocDeviceMem(2048); EXPECT_FALSE(allocation->isCompressionEnabled()); context->freeMem(ptr); deviceDesc.pNext = nullptr; NEO::DebugManager.flags.RenderCompressedBuffersEnabled.set(-1); } // Debug flag == 0 { NEO::DebugManager.flags.RenderCompressedBuffersEnabled.set(0); auto allocation = allocDeviceMem(2048); EXPECT_FALSE(allocation->isCompressionEnabled()); context->freeMem(ptr); NEO::DebugManager.flags.RenderCompressedBuffersEnabled.set(-1); } // Size restriction { NEO::DebugManager.flags.RenderCompressedBuffersEnabled.set(1); auto allocation = allocDeviceMem(1); if (!hwHelper.isBufferSizeSuitableForCompression(1, hwInfo)) { EXPECT_FALSE(allocation->isCompressionEnabled()); } context->freeMem(ptr); } } TEST_F(MemoryTest, givenDevicePointerThenDriverGetAllocPropertiesReturnsExpectedProperties) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); ze_memory_allocation_properties_t memoryProperties = {}; ze_device_handle_t deviceHandle; result = context->getMemAllocProperties(ptr, &memoryProperties, &deviceHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(memoryProperties.type, ZE_MEMORY_TYPE_DEVICE); EXPECT_EQ(deviceHandle, device->toHandle()); EXPECT_EQ(memoryProperties.id, context->getDriverHandle()->getSvmAllocsManager()->allocationsCounter - 1); auto alloc = context->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr); EXPECT_NE(alloc, nullptr); EXPECT_NE(alloc->pageSizeForAlignment, 0u); EXPECT_EQ(alloc->pageSizeForAlignment, memoryProperties.pageSize); result = context->freeMem(ptr); ASSERT_EQ(result, ZE_RESULT_SUCCESS); } TEST_F(MemoryTest, givenHostPointerThenDriverGetAllocPropertiesReturnsExpectedProperties) { size_t size = 128; size_t alignment = 4096; void *ptr = nullptr; ze_host_mem_alloc_desc_t hostDesc = {}; ze_result_t result = context->allocHostMem(&hostDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); ze_memory_allocation_properties_t memoryProperties = {}; ze_device_handle_t deviceHandle; result = context->getMemAllocProperties(ptr, &memoryProperties, &deviceHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(memoryProperties.type, ZE_MEMORY_TYPE_HOST); EXPECT_EQ(memoryProperties.id, context->getDriverHandle()->getSvmAllocsManager()->allocationsCounter - 1); auto alloc = context->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr); EXPECT_NE(alloc, nullptr); EXPECT_NE(alloc->pageSizeForAlignment, 0u); EXPECT_EQ(alloc->pageSizeForAlignment, memoryProperties.pageSize); result = context->freeMem(ptr); ASSERT_EQ(result, ZE_RESULT_SUCCESS); } TEST_F(MemoryTest, givenSharedPointerThenDriverGetAllocPropertiesReturnsExpectedProperties) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_host_mem_alloc_desc_t hostDesc = {}; ze_result_t result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); ze_memory_allocation_properties_t memoryProperties = {}; ze_device_handle_t deviceHandle; result = context->getMemAllocProperties(ptr, &memoryProperties, &deviceHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(memoryProperties.type, ZE_MEMORY_TYPE_SHARED); EXPECT_EQ(deviceHandle, device->toHandle()); EXPECT_EQ(memoryProperties.id, context->getDriverHandle()->getSvmAllocsManager()->allocationsCounter - 1); auto alloc = context->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr); EXPECT_NE(alloc, nullptr); EXPECT_NE(alloc->pageSizeForAlignment, 0u); EXPECT_EQ(alloc->pageSizeForAlignment, memoryProperties.pageSize); result = context->freeMem(ptr); ASSERT_EQ(result, ZE_RESULT_SUCCESS); } TEST_F(MemoryTest, givenForceExtendedUSMBufferSizeDebugFlagWhenUSMAllocationIsCreatedThenSizeIsProperlyExtended) { DebugManagerStateRestore restorer; constexpr auto bufferSize = 16; auto pageSizeNumber = 2; NEO::DebugManager.flags.ForceExtendedUSMBufferSize.set(pageSizeNumber); auto extendedBufferSize = bufferSize + MemoryConstants::pageSize * pageSizeNumber; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_host_mem_alloc_desc_t hostDesc = {}; ze_result_t result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, bufferSize, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); auto alloc = context->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr); EXPECT_NE(alloc, nullptr); EXPECT_EQ(alloc->size, extendedBufferSize); result = context->freeMem(ptr); ASSERT_EQ(result, ZE_RESULT_SUCCESS); pageSizeNumber = 4; NEO::DebugManager.flags.ForceExtendedUSMBufferSize.set(pageSizeNumber); extendedBufferSize = bufferSize + MemoryConstants::pageSize * pageSizeNumber; hostDesc = {}; result = context->allocHostMem(&hostDesc, bufferSize, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); alloc = context->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr); EXPECT_NE(alloc, nullptr); EXPECT_EQ(alloc->size, extendedBufferSize); result = context->freeMem(ptr); ASSERT_EQ(result, ZE_RESULT_SUCCESS); pageSizeNumber = 8; NEO::DebugManager.flags.ForceExtendedUSMBufferSize.set(pageSizeNumber); extendedBufferSize = bufferSize + MemoryConstants::pageSize * pageSizeNumber; deviceDesc = {}; result = context->allocDeviceMem(device->toHandle(), &deviceDesc, bufferSize, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); alloc = context->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr); EXPECT_NE(alloc, nullptr); EXPECT_EQ(alloc->size, extendedBufferSize); result = context->freeMem(ptr); ASSERT_EQ(result, ZE_RESULT_SUCCESS); } TEST_F(MemoryTest, givenHostPointerThenDriverGetAllocPropertiesReturnsMemoryId) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_host_mem_alloc_desc_t hostDesc = {}; ze_result_t result = context->allocHostMem(&hostDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); ze_memory_allocation_properties_t memoryProperties = {}; ze_device_handle_t deviceHandle; result = context->getMemAllocProperties(ptr, &memoryProperties, &deviceHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(memoryProperties.type, ZE_MEMORY_TYPE_HOST); EXPECT_EQ(deviceHandle, nullptr); EXPECT_EQ(memoryProperties.id, context->getDriverHandle()->getSvmAllocsManager()->allocationsCounter - 1); result = context->freeMem(ptr); ASSERT_EQ(result, ZE_RESULT_SUCCESS); } TEST_F(MemoryTest, givenSharedPointerThenDriverGetAllocPropertiesReturnsMemoryId) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_host_mem_alloc_desc_t hostDesc = {}; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); ze_memory_allocation_properties_t memoryProperties = {}; ze_device_handle_t deviceHandle; result = context->getMemAllocProperties(ptr, &memoryProperties, &deviceHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(memoryProperties.type, ZE_MEMORY_TYPE_SHARED); EXPECT_EQ(deviceHandle, device->toHandle()); EXPECT_EQ(memoryProperties.id, context->getDriverHandle()->getSvmAllocsManager()->allocationsCounter - 1); result = context->freeMem(ptr); ASSERT_EQ(result, ZE_RESULT_SUCCESS); } TEST_F(MemoryTest, whenAllocatingDeviceMemoryWithUncachedFlagThenLocallyUncachedResourceIsSet) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; deviceDesc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); auto allocData = driverHandle->getSvmAllocsManager()->getSVMAlloc(ptr); EXPECT_NE(nullptr, allocData); EXPECT_EQ(allocData->allocationFlagsProperty.flags.locallyUncachedResource, 1u); result = context->freeMem(ptr); ASSERT_EQ(result, ZE_RESULT_SUCCESS); } TEST_F(MemoryTest, whenAllocatingSharedMemoryWithUncachedFlagThenLocallyUncachedResourceIsSet) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; deviceDesc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED; ze_host_mem_alloc_desc_t hostDesc = {}; ze_result_t result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); auto allocData = driverHandle->getSvmAllocsManager()->getSVMAlloc(ptr); EXPECT_NE(nullptr, allocData); EXPECT_EQ(allocData->allocationFlagsProperty.flags.locallyUncachedResource, 1u); result = context->freeMem(ptr); ASSERT_EQ(result, ZE_RESULT_SUCCESS); } TEST_F(MemoryTest, whenAllocatingSharedMemoryWithDeviceInitialPlacementBiasFlagThenFlagsAreSetupCorrectly) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; deviceDesc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_INITIAL_PLACEMENT; ze_host_mem_alloc_desc_t hostDesc = {}; ze_result_t result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); auto allocData = driverHandle->getSvmAllocsManager()->getSVMAlloc(ptr); EXPECT_NE(nullptr, allocData); EXPECT_EQ(0u, allocData->allocationFlagsProperty.allocFlags.usmInitialPlacementCpu); EXPECT_EQ(1u, allocData->allocationFlagsProperty.allocFlags.usmInitialPlacementGpu); result = context->freeMem(ptr); ASSERT_EQ(result, ZE_RESULT_SUCCESS); } TEST_F(MemoryTest, whenAllocatingSharedMemoryWithHostInitialPlacementBiasFlagThenFlagsAreSetupCorrectly) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_host_mem_alloc_desc_t hostDesc = {}; hostDesc.flags = ZE_HOST_MEM_ALLOC_FLAG_BIAS_INITIAL_PLACEMENT; ze_result_t result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); auto allocData = driverHandle->getSvmAllocsManager()->getSVMAlloc(ptr); EXPECT_NE(nullptr, allocData); EXPECT_EQ(1u, allocData->allocationFlagsProperty.allocFlags.usmInitialPlacementCpu); EXPECT_EQ(0u, allocData->allocationFlagsProperty.allocFlags.usmInitialPlacementGpu); result = context->freeMem(ptr); ASSERT_EQ(result, ZE_RESULT_SUCCESS); } struct SVMAllocsManagerFreeExtMock : public NEO::SVMAllocsManager { SVMAllocsManagerFreeExtMock(MemoryManager *memoryManager) : NEO::SVMAllocsManager(memoryManager, false) {} bool freeSVMAlloc(void *ptr, bool blocking) override { if (blocking) { blockingCallsMade++; } return SVMAllocsManager::freeSVMAlloc(ptr, blocking); } uint32_t blockingCallsMade = 0; }; struct FreeExtTests : public ::testing::Test { void SetUp() override { NEO::MockCompilerEnableGuard mock(true); neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(NEO::defaultHwInfo.get()); auto mockBuiltIns = new MockBuiltins(); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->builtins.reset(mockBuiltIns); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); driverHandle = std::make_unique(); driverHandle->initialize(std::move(devices)); prevSvmAllocsManager = driverHandle->svmAllocsManager; currSvmAllocsManager = new SVMAllocsManagerFreeExtMock(driverHandle->memoryManager); driverHandle->svmAllocsManager = currSvmAllocsManager; device = driverHandle->devices[0]; context = std::make_unique(driverHandle.get()); EXPECT_NE(context, nullptr); context->getDevices().insert(std::make_pair(device->toHandle(), device)); auto neoDevice = device->getNEODevice(); context->rootDeviceIndices.insert(neoDevice->getRootDeviceIndex()); context->deviceBitfields.insert({neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield()}); } void TearDown() override { driverHandle->svmAllocsManager = prevSvmAllocsManager; delete currSvmAllocsManager; } NEO::SVMAllocsManager *prevSvmAllocsManager; NEO::SVMAllocsManager *currSvmAllocsManager; std::unique_ptr driverHandle; NEO::MockDevice *neoDevice = nullptr; L0::Device *device = nullptr; std::unique_ptr context; }; TEST_F(FreeExtTests, whenFreeMemIsCalledWithoutArgumentThenNoBlockingCallIsMade) { size_t size = 1024; size_t alignment = 1u; void *ptr = nullptr; ze_host_mem_alloc_desc_t hostDesc = {}; ze_result_t result = context->allocHostMem(&hostDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); result = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); SVMAllocsManagerFreeExtMock *memManager = reinterpret_cast(currSvmAllocsManager); EXPECT_EQ(0u, memManager->blockingCallsMade); } TEST_F(FreeExtTests, whenFreeMemExtIsCalledWithBlockingFreePolicyThenBlockingCallIsMade) { size_t size = 1024; size_t alignment = 1u; void *ptr = nullptr; ze_host_mem_alloc_desc_t hostDesc = {}; ze_result_t result = context->allocHostMem(&hostDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); ze_memory_free_ext_desc_t memFreeDesc = {}; memFreeDesc.freePolicy = ZE_DRIVER_MEMORY_FREE_POLICY_EXT_FLAG_BLOCKING_FREE; result = context->freeMemExt(&memFreeDesc, ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); SVMAllocsManagerFreeExtMock *memManager = reinterpret_cast(currSvmAllocsManager); EXPECT_EQ(1u, memManager->blockingCallsMade); } TEST_F(FreeExtTests, whenFreeMemExtIsCalledWithDeferFreePolicyThenUnsuportedIsReturned) { size_t size = 1024; size_t alignment = 1u; void *ptr = nullptr; ze_host_mem_alloc_desc_t hostDesc = {}; ze_result_t result = context->allocHostMem(&hostDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); ze_memory_free_ext_desc_t memFreeDesc = {}; memFreeDesc.freePolicy = ZE_DRIVER_MEMORY_FREE_POLICY_EXT_FLAG_DEFER_FREE; result = context->freeMemExt(&memFreeDesc, ptr); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, result); result = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(FreeExtTests, whenFreeMemExtIsCalledWithDefaultFreePolicyThenNonBlockingCallIsMade) { size_t size = 1024; size_t alignment = 1u; void *ptr = nullptr; ze_host_mem_alloc_desc_t hostDesc = {}; ze_result_t result = context->allocHostMem(&hostDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); ze_memory_free_ext_desc_t memFreeDesc = {}; result = context->freeMemExt(&memFreeDesc, ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); SVMAllocsManagerFreeExtMock *memManager = reinterpret_cast(currSvmAllocsManager); EXPECT_EQ(0u, memManager->blockingCallsMade); } struct SVMAllocsManagerOutOFMemoryMock : public NEO::SVMAllocsManager { SVMAllocsManagerOutOFMemoryMock(MemoryManager *memoryManager) : NEO::SVMAllocsManager(memoryManager, false) {} void *createUnifiedMemoryAllocation(size_t size, const UnifiedMemoryProperties &svmProperties) override { return nullptr; } }; struct OutOfMemoryTests : public ::testing::Test { void SetUp() override { NEO::MockCompilerEnableGuard mock(true); neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(NEO::defaultHwInfo.get()); auto mockBuiltIns = new MockBuiltins(); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->builtins.reset(mockBuiltIns); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); driverHandle = std::make_unique(); driverHandle->initialize(std::move(devices)); prevSvmAllocsManager = driverHandle->svmAllocsManager; currSvmAllocsManager = new SVMAllocsManagerOutOFMemoryMock(driverHandle->memoryManager); driverHandle->svmAllocsManager = currSvmAllocsManager; device = driverHandle->devices[0]; context = std::make_unique(driverHandle.get()); EXPECT_NE(context, nullptr); context->getDevices().insert(std::make_pair(device->toHandle(), device)); auto neoDevice = device->getNEODevice(); context->rootDeviceIndices.insert(neoDevice->getRootDeviceIndex()); context->deviceBitfields.insert({neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield()}); } void TearDown() override { driverHandle->svmAllocsManager = prevSvmAllocsManager; delete currSvmAllocsManager; } NEO::SVMAllocsManager *prevSvmAllocsManager; NEO::SVMAllocsManager *currSvmAllocsManager; std::unique_ptr driverHandle; NEO::MockDevice *neoDevice = nullptr; L0::Device *device = nullptr; std::unique_ptr context; }; TEST_F(OutOfMemoryTests, givenCallToDeviceAllocAndFailureToAllocateThenOutOfDeviceMemoryIsReturned) { size_t size = device->getNEODevice()->getDeviceInfo().maxMemAllocSize - 1; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY, result); } struct SVMAllocsManagerRelaxedSizeMock : public NEO::SVMAllocsManager { SVMAllocsManagerRelaxedSizeMock(MemoryManager *memoryManager) : NEO::SVMAllocsManager(memoryManager, false) {} void *createUnifiedMemoryAllocation(size_t size, const UnifiedMemoryProperties &svmProperties) override { return alignedMalloc(4096u, 4096u); } void *createSharedUnifiedMemoryAllocation(size_t size, const UnifiedMemoryProperties &svmProperties, void *cmdQ) override { return alignedMalloc(4096u, 4096u); } void *createHostUnifiedMemoryAllocation(size_t size, const UnifiedMemoryProperties &memoryProperties) override { return alignedMalloc(4096u, 4096u); } }; struct ContextRelaxedSizeMock : public ContextImp { ContextRelaxedSizeMock(L0::DriverHandleImp *driverHandle) : ContextImp(driverHandle) {} ze_result_t freeMem(const void *ptr) override { alignedFree(const_cast(ptr)); return ZE_RESULT_SUCCESS; } }; struct MemoryRelaxedSizeTests : public ::testing::Test { void SetUp() override { NEO::MockCompilerEnableGuard mock(true); neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(NEO::defaultHwInfo.get()); auto mockBuiltIns = new MockBuiltins(); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->builtins.reset(mockBuiltIns); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); driverHandle = std::make_unique(); driverHandle->initialize(std::move(devices)); prevSvmAllocsManager = driverHandle->svmAllocsManager; currSvmAllocsManager = new SVMAllocsManagerRelaxedSizeMock(driverHandle->memoryManager); driverHandle->svmAllocsManager = currSvmAllocsManager; device = driverHandle->devices[0]; context = std::make_unique(driverHandle.get()); EXPECT_NE(context, nullptr); context->getDevices().insert(std::make_pair(device->toHandle(), device)); auto neoDevice = device->getNEODevice(); context->rootDeviceIndices.insert(neoDevice->getRootDeviceIndex()); context->deviceBitfields.insert({neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield()}); } void TearDown() override { driverHandle->svmAllocsManager = prevSvmAllocsManager; delete currSvmAllocsManager; } NEO::SVMAllocsManager *prevSvmAllocsManager; NEO::SVMAllocsManager *currSvmAllocsManager; std::unique_ptr driverHandle; NEO::MockDevice *neoDevice = nullptr; L0::Device *device = nullptr; std::unique_ptr context; }; TEST_F(MemoryRelaxedSizeTests, givenCallToHostAllocWithAllowedSizeAndWithoutRelaxedFlagThenAllocationIsMade) { size_t size = device->getNEODevice()->getDeviceInfo().maxMemAllocSize - 1; size_t alignment = 1u; void *ptr = nullptr; ze_host_mem_alloc_desc_t hostDesc = {}; ze_result_t result = context->allocHostMem(&hostDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); result = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(MemoryRelaxedSizeTests, givenCallToHostAllocWithLargerThanAllowedSizeAndWithoutRelaxedFlagThenAllocationIsNotMade) { size_t size = device->getNEODevice()->getDeviceInfo().maxMemAllocSize + 1; size_t alignment = 1u; void *ptr = nullptr; ze_host_mem_alloc_desc_t hostDesc = {}; ze_result_t result = context->allocHostMem(&hostDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_SIZE, result); EXPECT_EQ(nullptr, ptr); } TEST_F(MemoryRelaxedSizeTests, givenCallToHostAllocWithLargerThanAllowedSizeAndRelaxedFlagThenAllocationIsMade) { size_t size = device->getNEODevice()->getDeviceInfo().maxMemAllocSize + 1; size_t alignment = 1u; void *ptr = nullptr; ze_host_mem_alloc_desc_t hostDesc = {}; hostDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; ze_relaxed_allocation_limits_exp_desc_t relaxedSizeDesc = {}; relaxedSizeDesc.stype = ZE_STRUCTURE_TYPE_RELAXED_ALLOCATION_LIMITS_EXP_DESC; relaxedSizeDesc.flags = ZE_RELAXED_ALLOCATION_LIMITS_EXP_FLAG_MAX_SIZE; hostDesc.pNext = &relaxedSizeDesc; ze_result_t result = context->allocHostMem(&hostDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); result = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(MemoryRelaxedSizeTests, givenCallToHostAllocWithLargerThanAllowedSizeAndDebugFlagThenAllocationIsMade) { DebugManagerStateRestore restorer; DebugManager.flags.AllowUnrestrictedSize.set(1); size_t size = device->getNEODevice()->getDeviceInfo().maxMemAllocSize + 1; size_t alignment = 1u; void *ptr = nullptr; ze_host_mem_alloc_desc_t hostDesc = {}; hostDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; ze_result_t result = context->allocHostMem(&hostDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); result = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(MemoryRelaxedSizeTests, givenCallToHostAllocWithLargerThanAllowedSizeAndRelaxedFlagWithIncorrectFlagThenAllocationIsNotMade) { size_t size = device->getNEODevice()->getDeviceInfo().maxMemAllocSize + 1; size_t alignment = 1u; void *ptr = nullptr; ze_host_mem_alloc_desc_t hostDesc = {}; hostDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; ze_relaxed_allocation_limits_exp_desc_t relaxedSizeDesc = {}; relaxedSizeDesc.stype = ZE_STRUCTURE_TYPE_RELAXED_ALLOCATION_LIMITS_EXP_DESC; relaxedSizeDesc.flags = static_cast(ZE_BIT(1)); hostDesc.pNext = &relaxedSizeDesc; ze_result_t result = context->allocHostMem(&hostDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); EXPECT_EQ(nullptr, ptr); } TEST_F(MemoryRelaxedSizeTests, givenCallToHostAllocWithLargerThanAllowedSizeAndRelaxedDescriptorWithWrongStypeThenUnsupportedSizeIsReturned) { size_t size = device->getNEODevice()->getDeviceInfo().maxMemAllocSize + 1; size_t alignment = 1u; void *ptr = nullptr; ze_host_mem_alloc_desc_t hostDesc = {}; hostDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; ze_relaxed_allocation_limits_exp_desc_t relaxedSizeDesc = {}; relaxedSizeDesc.stype = ZE_STRUCTURE_TYPE_DRIVER_PROPERTIES; relaxedSizeDesc.flags = static_cast(ZE_BIT(1)); hostDesc.pNext = &relaxedSizeDesc; ze_result_t result = context->allocHostMem(&hostDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_SIZE, result); EXPECT_EQ(nullptr, ptr); } TEST_F(MemoryRelaxedSizeTests, givenCallToDeviceAllocWithAllowedSizeAndWithoutRelaxedFlagThenAllocationIsMade) { size_t size = device->getNEODevice()->getDeviceInfo().maxMemAllocSize - 1; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); result = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(MemoryRelaxedSizeTests, givenCallToDeviceAllocWithLargerThanAllowedSizeAndWithoutRelaxedFlagThenAllocationIsNotMade) { size_t size = device->getNEODevice()->getDeviceInfo().maxMemAllocSize + 1; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_SIZE, result); EXPECT_EQ(nullptr, ptr); } TEST_F(MemoryRelaxedSizeTests, givenCallToDeviceAllocWithLargerThanAllowedSizeAndRelaxedFlagThenAllocationIsMade) { if (device->getNEODevice()->areSharedSystemAllocationsAllowed()) { GTEST_SKIP(); } size_t size = device->getNEODevice()->getDeviceInfo().maxMemAllocSize + 1; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; ze_relaxed_allocation_limits_exp_desc_t relaxedSizeDesc = {}; relaxedSizeDesc.stype = ZE_STRUCTURE_TYPE_RELAXED_ALLOCATION_LIMITS_EXP_DESC; relaxedSizeDesc.flags = ZE_RELAXED_ALLOCATION_LIMITS_EXP_FLAG_MAX_SIZE; deviceDesc.pNext = &relaxedSizeDesc; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); result = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(MemoryRelaxedSizeTests, givenCallToDeviceAllocWithLargerThanAllowedSizeAndDebugFlagThenAllocationIsMade) { if (device->getNEODevice()->areSharedSystemAllocationsAllowed()) { GTEST_SKIP(); } DebugManagerStateRestore restorer; DebugManager.flags.AllowUnrestrictedSize.set(1); size_t size = device->getNEODevice()->getDeviceInfo().maxMemAllocSize + 1; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); result = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(MemoryRelaxedSizeTests, givenCallToDeviceAllocWithLargerThanGlobalMemSizeAndRelaxedFlagThenAllocationIsNotMade) { size_t size = device->getNEODevice()->getDeviceInfo().globalMemSize + 1; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; ze_relaxed_allocation_limits_exp_desc_t relaxedSizeDesc = {}; relaxedSizeDesc.stype = ZE_STRUCTURE_TYPE_RELAXED_ALLOCATION_LIMITS_EXP_DESC; relaxedSizeDesc.flags = ZE_RELAXED_ALLOCATION_LIMITS_EXP_FLAG_MAX_SIZE; deviceDesc.pNext = &relaxedSizeDesc; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_SIZE, result); EXPECT_EQ(nullptr, ptr); } TEST_F(MemoryRelaxedSizeTests, givenCallToDeviceAllocWithLargerThanAllowedSizeAndRelaxedFlagWithIncorrectFlagThenAllocationIsNotMade) { size_t size = device->getNEODevice()->getDeviceInfo().maxMemAllocSize + 1; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; ze_relaxed_allocation_limits_exp_desc_t relaxedSizeDesc = {}; relaxedSizeDesc.stype = ZE_STRUCTURE_TYPE_RELAXED_ALLOCATION_LIMITS_EXP_DESC; relaxedSizeDesc.flags = static_cast(ZE_BIT(1)); deviceDesc.pNext = &relaxedSizeDesc; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); EXPECT_EQ(nullptr, ptr); } TEST_F(MemoryRelaxedSizeTests, givenCallToDeviceAllocWithLargerThanAllowedSizeAndRelaxedDescriptorWithWrongStypeThenUnsupportedEnumerationIsReturned) { size_t size = device->getNEODevice()->getDeviceInfo().maxMemAllocSize + 1; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; ze_relaxed_allocation_limits_exp_desc_t relaxedSizeDesc = {}; relaxedSizeDesc.stype = ZE_STRUCTURE_TYPE_DRIVER_PROPERTIES; relaxedSizeDesc.flags = static_cast(ZE_BIT(1)); deviceDesc.pNext = &relaxedSizeDesc; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION, result); EXPECT_EQ(nullptr, ptr); } TEST_F(MemoryRelaxedSizeTests, givenCallToSharedAllocWithAllowedSizeAndWithoutRelaxedFlagThenAllocationIsMade) { size_t size = device->getNEODevice()->getDeviceInfo().maxMemAllocSize - 1; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_host_mem_alloc_desc_t hostDesc = {}; ze_result_t result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); result = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(MemoryRelaxedSizeTests, givenCallToSharedAllocWithLargerThanAllowedSizeAndWithoutRelaxedFlagThenAllocationIsNotMade) { size_t size = device->getNEODevice()->getDeviceInfo().maxMemAllocSize + 1; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_host_mem_alloc_desc_t hostDesc = {}; ze_result_t result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_SIZE, result); EXPECT_EQ(nullptr, ptr); } TEST_F(MemoryRelaxedSizeTests, givenCallToSharedAllocWithLargerThanAllowedSizeAndRelaxedFlagThenAllocationIsMade) { if (device->getNEODevice()->areSharedSystemAllocationsAllowed()) { GTEST_SKIP(); } size_t size = device->getNEODevice()->getDeviceInfo().maxMemAllocSize + 1; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; ze_relaxed_allocation_limits_exp_desc_t relaxedSizeDesc = {}; relaxedSizeDesc.stype = ZE_STRUCTURE_TYPE_RELAXED_ALLOCATION_LIMITS_EXP_DESC; relaxedSizeDesc.flags = ZE_RELAXED_ALLOCATION_LIMITS_EXP_FLAG_MAX_SIZE; deviceDesc.pNext = &relaxedSizeDesc; ze_host_mem_alloc_desc_t hostDesc = {}; ze_result_t result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); result = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(MemoryRelaxedSizeTests, givenCallToSharedAllocWithLargerThanAllowedSizeAndDebugFlagThenAllocationIsMade) { if (device->getNEODevice()->areSharedSystemAllocationsAllowed()) { GTEST_SKIP(); } DebugManagerStateRestore restorer; DebugManager.flags.AllowUnrestrictedSize.set(1); size_t size = device->getNEODevice()->getDeviceInfo().maxMemAllocSize + 1; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; ze_host_mem_alloc_desc_t hostDesc = {}; ze_result_t result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); result = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(MemoryRelaxedSizeTests, givenCallToSharedAllocWithLargerThanGlobalMemSizeAndRelaxedFlagThenAllocationIsNotMade) { size_t size = device->getNEODevice()->getDeviceInfo().globalMemSize + 1; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; ze_relaxed_allocation_limits_exp_desc_t relaxedSizeDesc = {}; relaxedSizeDesc.stype = ZE_STRUCTURE_TYPE_RELAXED_ALLOCATION_LIMITS_EXP_DESC; relaxedSizeDesc.flags = ZE_RELAXED_ALLOCATION_LIMITS_EXP_FLAG_MAX_SIZE; deviceDesc.pNext = &relaxedSizeDesc; ze_host_mem_alloc_desc_t hostDesc = {}; ze_result_t result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_SIZE, result); EXPECT_EQ(nullptr, ptr); } TEST_F(MemoryRelaxedSizeTests, givenCallToSharedAllocWithLargerThanAllowedSizeAndRelaxedFlagWithIncorrectFlagThenAllocationIsNotMade) { size_t size = device->getNEODevice()->getDeviceInfo().maxMemAllocSize + 1; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; ze_relaxed_allocation_limits_exp_desc_t relaxedSizeDesc = {}; relaxedSizeDesc.stype = ZE_STRUCTURE_TYPE_RELAXED_ALLOCATION_LIMITS_EXP_DESC; relaxedSizeDesc.flags = static_cast(ZE_BIT(1)); deviceDesc.pNext = &relaxedSizeDesc; ze_host_mem_alloc_desc_t hostDesc = {}; ze_result_t result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); EXPECT_EQ(nullptr, ptr); } TEST_F(MemoryRelaxedSizeTests, givenCallToSharedAllocWithLargerThanAllowedSizeAndRelaxedDescriptorWithWrongStypeThenUnsupportedSizeIsReturned) { size_t size = device->getNEODevice()->getDeviceInfo().maxMemAllocSize + 1; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; ze_relaxed_allocation_limits_exp_desc_t relaxedSizeDesc = {}; relaxedSizeDesc.stype = ZE_STRUCTURE_TYPE_DRIVER_PROPERTIES; relaxedSizeDesc.flags = static_cast(ZE_BIT(1)); deviceDesc.pNext = &relaxedSizeDesc; ze_host_mem_alloc_desc_t hostDesc = {}; ze_result_t result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_SIZE, result); EXPECT_EQ(nullptr, ptr); } struct ContextMemoryTests : public MemoryRelaxedSizeTests { void SetUp() override { DebugManager.flags.AllowUnrestrictedSize.set(true); DebugManager.flags.CreateMultipleSubDevices.set(4); MemoryRelaxedSizeTests::SetUp(); EXPECT_EQ(4u, device->getNEODevice()->getNumGenericSubDevices()); } DebugManagerStateRestore restore; }; TEST_F(ContextMemoryTests, givenMultipleSubDevicesWhenAllocatingThenUseCorrectGlobalMemorySize) { size_t allocationSize = neoDevice->getDeviceInfo().globalMemSize; size_t alignment = 1u; void *ptr = nullptr; ze_host_mem_alloc_desc_t hostDesc = {}; ze_device_mem_alloc_desc_t deviceDesc = {}; deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; ze_result_t result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, allocationSize, alignment, &ptr); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_SIZE, result); EXPECT_EQ(nullptr, ptr); result = context->allocDeviceMem(device->toHandle(), &deviceDesc, allocationSize, alignment, &ptr); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_SIZE, result); EXPECT_EQ(nullptr, ptr); allocationSize /= 4; result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, allocationSize, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); context->freeMem(ptr); result = context->allocDeviceMem(device->toHandle(), &deviceDesc, allocationSize, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); context->freeMem(ptr); } struct DriverHandleFailGetFdMock : public L0::DriverHandleImp { void *importFdHandle(ze_device_handle_t hDevice, ze_ipc_memory_flags_t flags, uint64_t handle, NEO::GraphicsAllocation **pAloc) override { importFdHandleCalledTimes++; if (mockFd == allocationMap.second) { return allocationMap.first; } return nullptr; } const int mockFd = 57; std::pair allocationMap; uint32_t importFdHandleCalledTimes = 0; }; struct ContextFailFdMock : public L0::ContextImp { ContextFailFdMock(DriverHandleFailGetFdMock *inDriverHandle) : L0::ContextImp(static_cast(inDriverHandle)) { driverHandle = inDriverHandle; } ze_result_t allocDeviceMem(ze_device_handle_t hDevice, const ze_device_mem_alloc_desc_t *deviceDesc, size_t size, size_t alignment, void **ptr) override { ze_result_t res = L0::ContextImp::allocDeviceMem(hDevice, deviceDesc, size, alignment, ptr); if (ZE_RESULT_SUCCESS == res) { driverHandle->allocationMap.first = *ptr; driverHandle->allocationMap.second = driverHandle->mockFd; } return res; } ze_result_t closeIpcMemHandle(const void *ptr) override { return ZE_RESULT_SUCCESS; } DriverHandleFailGetFdMock *driverHandle = nullptr; }; struct MemoryExportImportFailTest : public ::testing::Test { void SetUp() override { NEO::MockCompilerEnableGuard mock(true); neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(NEO::defaultHwInfo.get()); auto mockBuiltIns = new MockBuiltins(); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->builtins.reset(mockBuiltIns); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); driverHandle = std::make_unique(); driverHandle->initialize(std::move(devices)); device = driverHandle->devices[0]; context = std::make_unique(driverHandle.get()); EXPECT_NE(context, nullptr); context->getDevices().insert(std::make_pair(device->toHandle(), device)); auto neoDevice = device->getNEODevice(); context->rootDeviceIndices.insert(neoDevice->getRootDeviceIndex()); context->deviceBitfields.insert({neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield()}); } void TearDown() override { } std::unique_ptr driverHandle; NEO::MockDevice *neoDevice = nullptr; L0::Device *device = nullptr; ze_context_handle_t hContext; std::unique_ptr context; }; TEST_F(MemoryExportImportFailTest, givenCallToMemAllocPropertiesWithExtendedExportPropertiesAndIncorrectStypeThenFileDescriptorIsNotReturned) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); ze_memory_allocation_properties_t memoryProperties = {}; ze_external_memory_export_fd_t extendedProperties = {}; extendedProperties.stype = ZE_STRUCTURE_TYPE_DRIVER_PROPERTIES; extendedProperties.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF; extendedProperties.fd = std::numeric_limits::max(); memoryProperties.pNext = &extendedProperties; ze_device_handle_t deviceHandle; result = context->getMemAllocProperties(ptr, &memoryProperties, &deviceHandle); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ENUMERATION, result); EXPECT_EQ(memoryProperties.type, ZE_MEMORY_TYPE_DEVICE); EXPECT_EQ(deviceHandle, device->toHandle()); EXPECT_EQ(extendedProperties.fd, std::numeric_limits::max()); EXPECT_NE(extendedProperties.fd, driverHandle->mockFd); result = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(MemoryExportImportFailTest, whenParsingMemoryTypeWithNotSpecifidTypeThenUnknownTypeIsReturned) { InternalMemoryType memoryType = InternalMemoryType::NOT_SPECIFIED; ze_memory_type_t usmType = L0::Context::parseUSMType(memoryType); EXPECT_EQ(usmType, ZE_MEMORY_TYPE_UNKNOWN); } TEST_F(MemoryExportImportTest, givenCallToDeviceAllocWithExtendedExportDescriptorAndNonSupportedFlagThenUnsuportedEnumerationIsReturned) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_external_memory_export_desc_t extendedDesc = {}; extendedDesc.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_DESC; extendedDesc.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_OPAQUE_FD; deviceDesc.pNext = &extendedDesc; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION, result); EXPECT_EQ(nullptr, ptr); } TEST_F(MemoryExportImportTest, givenCallToDeviceAllocWithExtendedExportDescriptorAndSupportedFlagThenAllocationIsMade) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_external_memory_export_desc_t extendedDesc = {}; extendedDesc.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_DESC; extendedDesc.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF; deviceDesc.pNext = &extendedDesc; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); result = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(MemoryExportImportTest, givenCallToMemAllocPropertiesWithExtendedExportPropertiesAndUnsupportedFlagThenUnsupportedEnumerationIsReturned) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); ze_memory_allocation_properties_t memoryProperties = {}; ze_external_memory_export_fd_t extendedProperties = {}; extendedProperties.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_FD; extendedProperties.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_OPAQUE_FD; extendedProperties.fd = std::numeric_limits::max(); memoryProperties.pNext = &extendedProperties; ze_device_handle_t deviceHandle; result = context->getMemAllocProperties(ptr, &memoryProperties, &deviceHandle); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION, result); EXPECT_EQ(extendedProperties.fd, std::numeric_limits::max()); result = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(MemoryExportImportTest, givenCallToMemAllocPropertiesWithExtendedExportPropertiesForNonDeviceAllocationThenUnsupportedFeatureIsReturned) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_host_mem_alloc_desc_t hostDesc = {}; ze_result_t result = context->allocHostMem(&hostDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); ze_memory_allocation_properties_t memoryProperties = {}; ze_external_memory_export_fd_t extendedProperties = {}; extendedProperties.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_FD; extendedProperties.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF; extendedProperties.fd = std::numeric_limits::max(); memoryProperties.pNext = &extendedProperties; result = context->getMemAllocProperties(ptr, &memoryProperties, nullptr); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, result); EXPECT_EQ(extendedProperties.fd, std::numeric_limits::max()); result = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(MemoryExportImportTest, givenCallToMemAllocPropertiesWithExtendedExportPropertiesAndSupportedFlagThenValidFileDescriptorIsReturned) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); ze_memory_allocation_properties_t memoryProperties = {}; ze_external_memory_export_fd_t extendedProperties = {}; extendedProperties.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_FD; extendedProperties.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF; extendedProperties.fd = std::numeric_limits::max(); memoryProperties.pNext = &extendedProperties; ze_device_handle_t deviceHandle; result = context->getMemAllocProperties(ptr, &memoryProperties, &deviceHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(memoryProperties.type, ZE_MEMORY_TYPE_DEVICE); EXPECT_EQ(deviceHandle, device->toHandle()); EXPECT_NE(extendedProperties.fd, std::numeric_limits::max()); EXPECT_EQ(extendedProperties.fd, driverHandle->mockFd); result = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(MemoryExportImportTest, givenCallToDeviceAllocWithExtendedImportDescriptorAndNonSupportedFlagThenUnsupportedEnumerationIsReturned) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_external_memory_export_desc_t extendedDesc = {}; extendedDesc.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_DESC; extendedDesc.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF; deviceDesc.pNext = &extendedDesc; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); ze_memory_allocation_properties_t memoryProperties = {}; ze_external_memory_export_fd_t extendedProperties = {}; extendedProperties.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_FD; extendedProperties.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF; extendedProperties.fd = std::numeric_limits::max(); memoryProperties.pNext = &extendedProperties; ze_device_handle_t deviceHandle; result = context->getMemAllocProperties(ptr, &memoryProperties, &deviceHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(memoryProperties.type, ZE_MEMORY_TYPE_DEVICE); EXPECT_EQ(deviceHandle, device->toHandle()); EXPECT_NE(extendedProperties.fd, std::numeric_limits::max()); EXPECT_EQ(extendedProperties.fd, driverHandle->mockFd); ze_device_mem_alloc_desc_t importDeviceDesc = {}; ze_external_memory_import_fd_t extendedImportDesc = {}; extendedImportDesc.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMPORT_FD; extendedImportDesc.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_OPAQUE_FD; extendedImportDesc.fd = extendedProperties.fd; importDeviceDesc.pNext = &extendedImportDesc; void *importedPtr = nullptr; result = context->allocDeviceMem(device->toHandle(), &importDeviceDesc, size, alignment, &importedPtr); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION, result); EXPECT_EQ(nullptr, importedPtr); result = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(MemoryExportImportWinHandleTest, givenCallToDeviceAllocWithExtendedExportDescriptorAndNTHandleFlagThenAllocationIsMade) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_external_memory_export_desc_t extendedDesc = {}; extendedDesc.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_DESC; extendedDesc.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_OPAQUE_WIN32; deviceDesc.pNext = &extendedDesc; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); result = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(MemoryExportImportWinHandleTest, givenCallToMemAllocPropertiesWithExtendedExportPropertiesAndUnsupportedFlagThenUnsupportedEnumerationIsReturned) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); ze_memory_allocation_properties_t memoryProperties = {}; ze_external_memory_export_win32_handle_t extendedProperties = {}; extendedProperties.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_WIN32; extendedProperties.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_D3D12_HEAP; extendedProperties.handle = nullptr; memoryProperties.pNext = &extendedProperties; ze_device_handle_t deviceHandle; result = context->getMemAllocProperties(ptr, &memoryProperties, &deviceHandle); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION, result); EXPECT_EQ(extendedProperties.handle, nullptr); result = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(MemoryExportImportWinHandleTest, givenCallToMemAllocPropertiesWithExtendedExportPropertiesAndSupportedFlagThenValidHandleIsReturned) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); ze_memory_allocation_properties_t memoryProperties = {}; ze_external_memory_export_win32_handle_t extendedProperties = {}; extendedProperties.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_WIN32; extendedProperties.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_OPAQUE_WIN32; memoryProperties.pNext = &extendedProperties; ze_device_handle_t deviceHandle; result = context->getMemAllocProperties(ptr, &memoryProperties, &deviceHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(extendedProperties.handle, reinterpret_cast(reinterpret_cast(driverHandle->mockHandle))); result = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } HWTEST2_F(MemoryTest, givenCallToGetImageAllocPropertiesWithNoBackingAllocationErrorIsReturned, IsAtLeastSkl) { ze_image_allocation_ext_properties_t imageProperties = {}; imageProperties.stype = ZE_STRUCTURE_TYPE_IMAGE_ALLOCATION_EXT_PROPERTIES; // uninitialized, so no backing graphics allocation struct ImageCoreFamily image = {}; ze_result_t result = context->getImageAllocProperties(&image, &imageProperties); EXPECT_EQ(ZE_RESULT_ERROR_UNKNOWN, result); } struct ImageWindowsExportImportTest : public MemoryExportImportWinHandleTest { void SetUp() override { MemoryExportImportWinHandleTest::SetUp(); ze_image_desc_t zeDesc = {}; zeDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; zeDesc.arraylevels = 1u; zeDesc.depth = 1u; zeDesc.height = 1u; zeDesc.width = 1u; zeDesc.miplevels = 1u; zeDesc.type = ZE_IMAGE_TYPE_2DARRAY; zeDesc.flags = ZE_IMAGE_FLAG_BIAS_UNCACHED; zeDesc.format = {ZE_IMAGE_FORMAT_LAYOUT_32, ZE_IMAGE_FORMAT_TYPE_UINT, ZE_IMAGE_FORMAT_SWIZZLE_R, ZE_IMAGE_FORMAT_SWIZZLE_G, ZE_IMAGE_FORMAT_SWIZZLE_B, ZE_IMAGE_FORMAT_SWIZZLE_A}; auto result = Image::create(productFamily, device, &zeDesc, &image); EXPECT_EQ(result, ZE_RESULT_SUCCESS); } void TearDown() override { image->destroy(); MemoryExportImportWinHandleTest::TearDown(); } L0::Image *image; }; TEST_F(ImageWindowsExportImportTest, givenCallToGetImageAllocPropertiesThenIdIsReturned) { ze_image_allocation_ext_properties_t imageProperties = {}; imageProperties.stype = ZE_STRUCTURE_TYPE_IMAGE_ALLOCATION_EXT_PROPERTIES; ze_result_t result = context->getImageAllocProperties(image, &imageProperties); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(imageProperties.id, 0ul); } TEST_F(ImageWindowsExportImportTest, givenCallToGetImageAllocPropertiesWithExtendedExportPropertiesThenValidHandleIsReturned) { ze_image_allocation_ext_properties_t imageProperties = {}; imageProperties.stype = ZE_STRUCTURE_TYPE_IMAGE_ALLOCATION_EXT_PROPERTIES; ze_external_memory_export_win32_handle_t extendedProperties = {}; extendedProperties.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_WIN32; extendedProperties.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_OPAQUE_WIN32; extendedProperties.handle = nullptr; imageProperties.pNext = &extendedProperties; ze_result_t result = context->getImageAllocProperties(image, &imageProperties); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(imageProperties.id, 0ul); EXPECT_NE(extendedProperties.handle, nullptr); EXPECT_EQ(extendedProperties.handle, reinterpret_cast(reinterpret_cast(driverHandle->mockHandle))); } TEST_F(ImageWindowsExportImportTest, givenCallToGetImageAllocPropertiesWithExtendedExportPropertiesAndInvalidStructureTypeThenErrorIsReturned) { ze_image_allocation_ext_properties_t imageProperties = {}; imageProperties.stype = ZE_STRUCTURE_TYPE_IMAGE_ALLOCATION_EXT_PROPERTIES; ze_external_memory_export_win32_handle_t extendedProperties = {}; extendedProperties.stype = ZE_STRUCTURE_TYPE_FLOAT_ATOMIC_EXT_PROPERTIES; extendedProperties.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_OPAQUE_WIN32; extendedProperties.handle = nullptr; imageProperties.pNext = &extendedProperties; ze_result_t result = context->getImageAllocProperties(image, &imageProperties); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ENUMERATION, result); } TEST_F(ImageWindowsExportImportTest, givenCallToGetImageAllocPropertiesWithExtendedExportPropertiesAndUnsupportedFlagsThenErrorIsReturned) { ze_image_allocation_ext_properties_t imageProperties = {}; imageProperties.stype = ZE_STRUCTURE_TYPE_IMAGE_ALLOCATION_EXT_PROPERTIES; ze_external_memory_export_win32_handle_t extendedProperties = {}; extendedProperties.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_WIN32; extendedProperties.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_D3D12_RESOURCE; extendedProperties.handle = nullptr; imageProperties.pNext = &extendedProperties; ze_result_t result = context->getImageAllocProperties(image, &imageProperties); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION, result); } struct ImageFdExportImportTest : public MemoryExportImportTest { void SetUp() override { MemoryExportImportTest::SetUp(); ze_image_desc_t zeDesc = {}; zeDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; zeDesc.arraylevels = 1u; zeDesc.depth = 1u; zeDesc.height = 1u; zeDesc.width = 1u; zeDesc.miplevels = 1u; zeDesc.type = ZE_IMAGE_TYPE_2DARRAY; zeDesc.flags = ZE_IMAGE_FLAG_BIAS_UNCACHED; zeDesc.format = {ZE_IMAGE_FORMAT_LAYOUT_32, ZE_IMAGE_FORMAT_TYPE_UINT, ZE_IMAGE_FORMAT_SWIZZLE_R, ZE_IMAGE_FORMAT_SWIZZLE_G, ZE_IMAGE_FORMAT_SWIZZLE_B, ZE_IMAGE_FORMAT_SWIZZLE_A}; auto result = Image::create(productFamily, device, &zeDesc, &image); EXPECT_EQ(result, ZE_RESULT_SUCCESS); } void TearDown() override { image->destroy(); MemoryExportImportTest::TearDown(); } L0::Image *image; }; TEST_F(ImageFdExportImportTest, givenCallToGetImageAllocPropertiesWithExtendedExportPropertiesThenValidFileDescriptorIsReturned) { ze_image_allocation_ext_properties_t imageProperties = {}; imageProperties.stype = ZE_STRUCTURE_TYPE_IMAGE_ALLOCATION_EXT_PROPERTIES; ze_external_memory_export_fd_t extendedProperties = {}; extendedProperties.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_FD; extendedProperties.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF; extendedProperties.fd = std::numeric_limits::max(); imageProperties.pNext = &extendedProperties; ze_result_t result = context->getImageAllocProperties(image, &imageProperties); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(imageProperties.id, 0ul); EXPECT_EQ(extendedProperties.fd, driverHandle->mockFd); } struct MultipleDevicePeerAllocationFailTest : public ::testing::Test { void SetUp() override { NEO::MockCompilerEnableGuard mock(true); std::vector> devices; NEO::ExecutionEnvironment *executionEnvironment = new NEO::ExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(numRootDevices); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(NEO::defaultHwInfo.get()); } deviceFactory = std::make_unique(numRootDevices, 0, *executionEnvironment); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { devices.push_back(std::unique_ptr(deviceFactory->rootDevices[i])); } driverHandle = std::make_unique(); driverHandle->initialize(std::move(devices)); driverHandle->setMemoryManager(driverHandle->getMemoryManager()); context = std::make_unique(driverHandle.get()); EXPECT_NE(context, nullptr); for (auto i = 0u; i < numRootDevices; i++) { auto device = driverHandle->devices[i]; context->getDevices().insert(std::make_pair(device->toHandle(), device)); auto neoDevice = device->getNEODevice(); context->rootDeviceIndices.insert(neoDevice->getRootDeviceIndex()); context->deviceBitfields.insert({neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield()}); } } DebugManagerStateRestore restorer; std::unique_ptr driverHandle; std::unique_ptr deviceFactory; std::unique_ptr context; const uint32_t numRootDevices = 2u; }; TEST_F(MultipleDevicePeerAllocationFailTest, givenImportFdHandleFailedThenPeerAllocationReturnsNullptr) { L0::Device *device0 = driverHandle->devices[0]; L0::Device *device1 = driverHandle->devices[1]; size_t size = 1024; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device0->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); uintptr_t peerGpuAddress = 0u; auto allocData = context->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr); EXPECT_NE(allocData, nullptr); DriverHandleFailGetFdMock *driverHandleFailGetFdMock = static_cast(context->getDriverHandle()); auto peerAlloc = driverHandle->getPeerAllocation(device1, allocData, ptr, &peerGpuAddress); EXPECT_GT(driverHandleFailGetFdMock->importFdHandleCalledTimes, 0u); EXPECT_EQ(peerAlloc, nullptr); result = context->freeMem(ptr); ASSERT_EQ(result, ZE_RESULT_SUCCESS); } struct MultipleDevicePeerAllocationTest : public ::testing::Test { void createModuleFromBinary(L0::Device *device, ModuleType type = ModuleType::User) { std::string testFile; retrieveBinaryKernelFilenameApiSpecific(testFile, binaryFilename + "_", ".bin"); size_t size = 0; auto src = loadDataFromFile( testFile.c_str(), size); ASSERT_NE(0u, size); ASSERT_NE(nullptr, src); ze_module_desc_t moduleDesc = {}; moduleDesc.format = ZE_MODULE_FORMAT_NATIVE; moduleDesc.pInputModule = reinterpret_cast(src.get()); moduleDesc.inputSize = size; ModuleBuildLog *moduleBuildLog = nullptr; module.reset(Module::create(device, &moduleDesc, moduleBuildLog, type)); } void SetUp() override { NEO::MockCompilerEnableGuard mock(true); DebugManager.flags.CreateMultipleSubDevices.set(numSubDevices); VariableBackup mockDeviceFlagBackup(&MockDevice::createSingleDevice, false); std::vector> devices; NEO::ExecutionEnvironment *executionEnvironment = new NEO::ExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(numRootDevices); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(NEO::defaultHwInfo.get()); } deviceFactory = std::make_unique(numRootDevices, numSubDevices, *executionEnvironment); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { devices.push_back(std::unique_ptr(deviceFactory->rootDevices[i])); } driverHandle = std::make_unique(); driverHandle->initialize(std::move(devices)); prevMemoryManager = driverHandle->getMemoryManager(); currMemoryManager = new MemoryManagerOpenIpcMock(*executionEnvironment); driverHandle->setMemoryManager(currMemoryManager); context = std::make_unique(driverHandle.get()); EXPECT_NE(context, nullptr); for (auto i = 0u; i < numRootDevices; i++) { auto device = driverHandle->devices[i]; context->getDevices().insert(std::make_pair(device->toHandle(), device)); auto neoDevice = device->getNEODevice(); context->rootDeviceIndices.insert(neoDevice->getRootDeviceIndex()); context->deviceBitfields.insert({neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield()}); } } void createKernel() { ze_kernel_desc_t desc = {}; desc.pKernelName = kernelName.c_str(); kernel = std::make_unique>(); kernel->module = module.get(); kernel->initialize(&desc); } void TearDown() override { driverHandle->setMemoryManager(prevMemoryManager); delete currMemoryManager; } DebugManagerStateRestore restorer; NEO::MemoryManager *prevMemoryManager = nullptr; NEO::MemoryManager *currMemoryManager = nullptr; std::unique_ptr driverHandle; std::unique_ptr deviceFactory; std::unique_ptr context; const std::string binaryFilename = "test_kernel"; const std::string kernelName = "test"; const uint32_t numKernelArguments = 6; std::unique_ptr module; std::unique_ptr> kernel; const uint32_t numRootDevices = 2u; const uint32_t numSubDevices = 2u; }; HWTEST2_F(MultipleDevicePeerAllocationTest, givenCallToMPrepareIndirectAllocationForDestructionThenOnlyValidAllocationCountsAreUpdated, IsAtLeastSkl) { MemoryManagerOpenIpcMock *fixtureMemoryManager = static_cast(currMemoryManager); fixtureMemoryManager->failOnCreateGraphicsAllocationFromSharedHandle = true; L0::Device *device0 = driverHandle->devices[0]; L0::Device *device1 = driverHandle->devices[1]; auto svmManager = driverHandle->getSvmAllocsManager(); NEO::CommandStreamReceiver *csr0 = nullptr; L0::DeviceImp *deviceImp0 = static_cast(device0); auto ret = deviceImp0->getCsrForOrdinalAndIndex(&csr0, 0u, 0u); ASSERT_EQ(ret, ZE_RESULT_SUCCESS); size_t size = 1024; size_t alignment = 1u; ze_device_mem_alloc_desc_t deviceDesc = {}; void *ptr0 = nullptr; ze_result_t result = context->allocDeviceMem(device0->toHandle(), &deviceDesc, size, alignment, &ptr0); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr0); void *ptr1 = nullptr; result = context->allocDeviceMem(device1->toHandle(), &deviceDesc, size, alignment, &ptr1); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr1); auto allocationData1 = svmManager->getSVMAlloc(ptr1); uint32_t prevPeekTaskCount1 = allocationData1->gpuAllocations.getGraphicsAllocation(1u)->getTaskCount(csr0->getOsContext().getContextId()); svmManager->prepareIndirectAllocationForDestruction(allocationData1); uint32_t postPeekTaskCount1 = allocationData1->gpuAllocations.getGraphicsAllocation(1u)->getTaskCount(csr0->getOsContext().getContextId()); EXPECT_EQ(postPeekTaskCount1, prevPeekTaskCount1); ret = context->freeMem(ptr0); ASSERT_EQ(ret, ZE_RESULT_SUCCESS); ret = context->freeMem(ptr1); ASSERT_EQ(ret, ZE_RESULT_SUCCESS); } HWTEST2_F(MultipleDevicePeerAllocationTest, whenisRemoteResourceNeededIsCalledWithDifferentCombinationsOfInputsThenExpectedOutputIsReturned, IsAtLeastSkl) { MemoryManagerOpenIpcMock *fixtureMemoryManager = static_cast(currMemoryManager); fixtureMemoryManager->failOnCreateGraphicsAllocationFromSharedHandle = true; L0::Device *device0 = driverHandle->devices[0]; L0::Device *device1 = driverHandle->devices[1]; auto svmManager = driverHandle->getSvmAllocsManager(); NEO::CommandStreamReceiver *csr0 = nullptr; L0::DeviceImp *deviceImp0 = static_cast(device0); auto ret = deviceImp0->getCsrForOrdinalAndIndex(&csr0, 0u, 0u); ASSERT_EQ(ret, ZE_RESULT_SUCCESS); size_t size = 1024; size_t alignment = 1u; ze_device_mem_alloc_desc_t deviceDesc = {}; void *ptr0 = nullptr; ze_result_t result = context->allocDeviceMem(device0->toHandle(), &deviceDesc, size, alignment, &ptr0); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr0); void *ptr1 = nullptr; result = context->allocDeviceMem(device1->toHandle(), &deviceDesc, size, alignment, &ptr1); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr1); auto allocationData0 = svmManager->getSVMAlloc(ptr0); auto allocationData1 = svmManager->getSVMAlloc(ptr1); bool isNeeded = driverHandle->isRemoteResourceNeeded(ptr0, nullptr, allocationData1, device0); EXPECT_TRUE(isNeeded); isNeeded = driverHandle->isRemoteResourceNeeded(ptr0, allocationData0->gpuAllocations.getGraphicsAllocation(0u), allocationData0, device0); EXPECT_FALSE(isNeeded); isNeeded = driverHandle->isRemoteResourceNeeded(ptr0, allocationData0->gpuAllocations.getGraphicsAllocation(1u), nullptr, device0); EXPECT_TRUE(isNeeded); isNeeded = driverHandle->isRemoteResourceNeeded(ptr0, allocationData0->gpuAllocations.getGraphicsAllocation(0u), allocationData0, device1); EXPECT_TRUE(isNeeded); ret = context->freeMem(ptr0); ASSERT_EQ(ret, ZE_RESULT_SUCCESS); ret = context->freeMem(ptr1); ASSERT_EQ(ret, ZE_RESULT_SUCCESS); } HWTEST2_F(MultipleDevicePeerAllocationTest, givenCallToMakeIndirectAllocationsResidentThenOnlyValidAllocationsAreMadeResident, IsAtLeastSkl) { MemoryManagerOpenIpcMock *fixtureMemoryManager = static_cast(currMemoryManager); fixtureMemoryManager->failOnCreateGraphicsAllocationFromSharedHandle = true; L0::Device *device0 = driverHandle->devices[0]; L0::Device *device1 = driverHandle->devices[1]; auto svmManager = driverHandle->getSvmAllocsManager(); NEO::CommandStreamReceiver *csr = nullptr; L0::DeviceImp *deviceImp1 = static_cast(device1); auto ret = deviceImp1->getCsrForOrdinalAndIndex(&csr, 0u, 0u); ASSERT_EQ(ret, ZE_RESULT_SUCCESS); size_t size = 1024; size_t alignment = 1u; ze_device_mem_alloc_desc_t deviceDesc = {}; void *ptr0 = nullptr; ze_result_t result = context->allocDeviceMem(device0->toHandle(), &deviceDesc, size, alignment, &ptr0); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr0); void *ptr1 = nullptr; result = context->allocDeviceMem(device1->toHandle(), &deviceDesc, size, alignment, &ptr1); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr1); auto &residentAllocations = csr->getResidencyAllocations(); EXPECT_EQ(0u, residentAllocations.size()); svmManager->makeIndirectAllocationsResident(*csr, 1u); EXPECT_EQ(1u, residentAllocations.size()); EXPECT_EQ(residentAllocations[0]->getGpuAddress(), reinterpret_cast(ptr1)); ret = context->freeMem(ptr0); ASSERT_EQ(ret, ZE_RESULT_SUCCESS); ret = context->freeMem(ptr1); ASSERT_EQ(ret, ZE_RESULT_SUCCESS); } HWTEST2_F(MultipleDevicePeerAllocationTest, givenCallToMakeInternalAllocationsResidentThenOnlyValidAllocationsAreMadeResident, IsAtLeastSkl) { MemoryManagerOpenIpcMock *fixtureMemoryManager = static_cast(currMemoryManager); fixtureMemoryManager->failOnCreateGraphicsAllocationFromSharedHandle = true; L0::Device *device0 = driverHandle->devices[0]; L0::Device *device1 = driverHandle->devices[1]; auto svmManager = driverHandle->getSvmAllocsManager(); NEO::CommandStreamReceiver *csr = nullptr; L0::DeviceImp *deviceImp1 = static_cast(device1); auto ret = deviceImp1->getCsrForOrdinalAndIndex(&csr, 0u, 0u); ASSERT_EQ(ret, ZE_RESULT_SUCCESS); size_t size = 1024; size_t alignment = 1u; ze_device_mem_alloc_desc_t deviceDesc = {}; void *ptr0 = nullptr; ze_result_t result = context->allocDeviceMem(device0->toHandle(), &deviceDesc, size, alignment, &ptr0); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr0); void *ptr1 = nullptr; result = context->allocDeviceMem(device1->toHandle(), &deviceDesc, size, alignment, &ptr1); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr1); auto &residentAllocations = csr->getResidencyAllocations(); EXPECT_EQ(0u, residentAllocations.size()); svmManager->makeInternalAllocationsResident(*csr, InternalMemoryType::DEVICE_UNIFIED_MEMORY); EXPECT_EQ(1u, residentAllocations.size()); EXPECT_EQ(residentAllocations[0]->getGpuAddress(), reinterpret_cast(ptr1)); ret = context->freeMem(ptr0); ASSERT_EQ(ret, ZE_RESULT_SUCCESS); ret = context->freeMem(ptr1); ASSERT_EQ(ret, ZE_RESULT_SUCCESS); } HWTEST2_F(MultipleDevicePeerAllocationTest, givenDeviceAllocationPassedToAppendBlitFillAndImportFdHandleFailingThenInvalidArgumentIsReturned, IsAtLeastSkl) { MemoryManagerOpenIpcMock *fixtureMemoryManager = static_cast(currMemoryManager); fixtureMemoryManager->failOnCreateGraphicsAllocationFromSharedHandle = true; L0::Device *device0 = driverHandle->devices[0]; L0::Device *device1 = driverHandle->devices[1]; size_t size = 1024; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device0->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily>(); commandList->initialize(device1, NEO::EngineGroupType::RenderCompute, 0u); uint32_t pattern = 1; result = commandList->appendBlitFill(ptr, &pattern, sizeof(pattern), size, nullptr, 0, nullptr); EXPECT_EQ(result, ZE_RESULT_ERROR_INVALID_ARGUMENT); result = context->freeMem(ptr); ASSERT_EQ(result, ZE_RESULT_SUCCESS); } HWTEST2_F(MultipleDevicePeerAllocationTest, givenDeviceAllocationPassedToAppendBlitFillUsingSameDeviceThenSuccessIsReturned, IsAtLeastSkl) { L0::Device *device0 = driverHandle->devices[0]; size_t size = 1024; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device0->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily>(); commandList->initialize(device0, NEO::EngineGroupType::RenderCompute, 0u); uint32_t pattern = 1; result = commandList->appendBlitFill(ptr, &pattern, sizeof(pattern), size, nullptr, 0, nullptr); EXPECT_EQ(result, ZE_RESULT_SUCCESS); result = context->freeMem(ptr); ASSERT_EQ(result, ZE_RESULT_SUCCESS); } HWTEST2_F(MultipleDevicePeerAllocationTest, givenDeviceAllocationPassedToAppendBlitFillUsingDevice1ThenSuccessIsReturned, IsAtLeastSkl) { L0::Device *device0 = driverHandle->devices[0]; L0::Device *device1 = driverHandle->devices[1]; size_t size = 1024; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device0->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily>(); commandList->initialize(device1, NEO::EngineGroupType::RenderCompute, 0u); uint32_t pattern = 1; result = commandList->appendBlitFill(ptr, &pattern, sizeof(pattern), size, nullptr, 0, nullptr); EXPECT_EQ(result, ZE_RESULT_SUCCESS); result = context->freeMem(ptr); ASSERT_EQ(result, ZE_RESULT_SUCCESS); } HWTEST2_F(MultipleDevicePeerAllocationTest, givenDeviceAllocationPassedToAppendBlitFillUsingDevice0ThenSuccessIsReturned, IsAtLeastSkl) { L0::Device *device0 = driverHandle->devices[0]; L0::Device *device1 = driverHandle->devices[1]; size_t size = 1024; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device1->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily>(); commandList->initialize(device0, NEO::EngineGroupType::RenderCompute, 0u); uint32_t pattern = 1; result = commandList->appendBlitFill(ptr, &pattern, sizeof(pattern), size, nullptr, 0, nullptr); EXPECT_EQ(result, ZE_RESULT_SUCCESS); result = context->freeMem(ptr); ASSERT_EQ(result, ZE_RESULT_SUCCESS); } HWTEST2_F(MultipleDevicePeerAllocationTest, givenHostPointerAllocationPassedToAppendBlitFillUsingDevice0ThenInvalidArgumentIsReturned, IsAtLeastSkl) { L0::Device *device0 = driverHandle->devices[0]; size_t size = 1024; uint8_t *ptr = new uint8_t[size]; auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily>(); commandList->initialize(device0, NEO::EngineGroupType::RenderCompute, 0u); uint32_t pattern = 1; ze_result_t result = commandList->appendBlitFill(ptr, &pattern, sizeof(pattern), size, nullptr, 0, nullptr); EXPECT_EQ(result, ZE_RESULT_ERROR_INVALID_ARGUMENT); delete[] ptr; } HWTEST2_F(MultipleDevicePeerAllocationTest, givenDeviceAllocationPassedToGetAllignedAllocationAndImportFdHandleFailingThenPeerAllocNotFoundReturnsTrue, IsAtLeastSkl) { MemoryManagerOpenIpcMock *fixtureMemoryManager = static_cast(currMemoryManager); fixtureMemoryManager->failOnCreateGraphicsAllocationFromSharedHandle = true; L0::Device *device0 = driverHandle->devices[0]; L0::Device *device1 = driverHandle->devices[1]; size_t size = 1024; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device0->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily>(); commandList->initialize(device1, NEO::EngineGroupType::RenderCompute, 0u); AlignedAllocationData outData = commandList->getAlignedAllocation(device1, ptr, size, false); EXPECT_EQ(nullptr, outData.alloc); result = context->freeMem(ptr); ASSERT_EQ(result, ZE_RESULT_SUCCESS); } HWTEST2_F(MultipleDevicePeerAllocationTest, givenDeviceAllocationPassedToGetAllignedAllocationUsingDevice1ThenAlignedAllocationWithPeerAllocationIsReturned, IsAtLeastSkl) { L0::Device *device0 = driverHandle->devices[0]; L0::Device *device1 = driverHandle->devices[1]; size_t size = 1024; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device0->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily>(); commandList->initialize(device1, NEO::EngineGroupType::RenderCompute, 0u); AlignedAllocationData outData = commandList->getAlignedAllocation(device1, ptr, size, false); EXPECT_NE(outData.alignedAllocationPtr, 0u); result = context->freeMem(ptr); ASSERT_EQ(result, ZE_RESULT_SUCCESS); } HWTEST2_F(MultipleDevicePeerAllocationTest, givenDeviceAllocationPassedToGetAllignedAllocationUsingDevice0ThenAlignedAllocationWithPeerAllocationIsReturned, IsAtLeastSkl) { L0::Device *device0 = driverHandle->devices[0]; L0::Device *device1 = driverHandle->devices[1]; size_t size = 1024; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device1->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily>(); commandList->initialize(device0, NEO::EngineGroupType::RenderCompute, 0u); AlignedAllocationData outData = commandList->getAlignedAllocation(device0, ptr, size, false); EXPECT_NE(outData.alignedAllocationPtr, 0u); result = context->freeMem(ptr); ASSERT_EQ(result, ZE_RESULT_SUCCESS); } HWTEST_F(MultipleDevicePeerAllocationTest, givenDeviceAllocationPassedAsArgumentToKernelInPeerDeviceThenPeerAllocationIsUsed) { L0::Device *device0 = driverHandle->devices[0]; L0::Device *device1 = driverHandle->devices[1]; L0::DeviceImp *deviceImp1 = static_cast(device1); size_t size = 1024; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device0->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); void *ptr1 = nullptr; result = context->allocDeviceMem(device0->toHandle(), &deviceDesc, size, alignment, &ptr1); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr1); createModuleFromBinary(device1); createKernel(); // set argument in device 1's list with ptr from device 0: peer allocation is created result = kernel->setArgBuffer(0, sizeof(ptr), &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(static_cast(deviceImp1->peerAllocations.getNumAllocs()), 1u); // set argument in device 1's list with ptr1 from device 0: anoter peer allocation is created result = kernel->setArgBuffer(0, sizeof(ptr), &ptr1); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(static_cast(deviceImp1->peerAllocations.getNumAllocs()), 2u); // set argument in device 1's list with ptr from device 0 plus offset: no new peer allocation is created // since a peer allocation is already avialable void *ptrOffset = reinterpret_cast(reinterpret_cast(ptr) + 4); result = kernel->setArgBuffer(0, sizeof(ptr), &ptrOffset); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(static_cast(deviceImp1->peerAllocations.getNumAllocs()), 2u); result = context->freeMem(ptr1); ASSERT_EQ(result, ZE_RESULT_SUCCESS); result = context->freeMem(ptr); ASSERT_EQ(result, ZE_RESULT_SUCCESS); } HWTEST_F(MultipleDevicePeerAllocationTest, givenDeviceAllocationPassedAsArgumentToKernelInPeerDeviceAndCreationOfSharedHandleAllocationFailedThenInvalidArgumentIsReturned) { MemoryManagerOpenIpcMock *fixtureMemoryManager = static_cast(currMemoryManager); fixtureMemoryManager->failOnCreateGraphicsAllocationFromSharedHandle = true; L0::Device *device0 = driverHandle->devices[0]; L0::Device *device1 = driverHandle->devices[1]; size_t size = 1024; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device0->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); createModuleFromBinary(device1); createKernel(); result = kernel->setArgBuffer(0, sizeof(ptr), &ptr); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); result = context->freeMem(ptr); ASSERT_EQ(result, ZE_RESULT_SUCCESS); } TEST_F(MultipleDevicePeerAllocationTest, whenPeerAllocationForDeviceAllocationIsRequestedAndImportFdHandleFailingThenNullptrIsReturned) { MemoryManagerOpenIpcMock *fixtureMemoryManager = static_cast(currMemoryManager); fixtureMemoryManager->failOnCreateGraphicsAllocationFromSharedHandle = true; L0::Device *device0 = driverHandle->devices[0]; L0::Device *device1 = driverHandle->devices[1]; size_t size = 1024; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device0->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); uintptr_t peerGpuAddress = 0u; auto allocData = context->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr); EXPECT_NE(allocData, nullptr); auto peerAlloc = driverHandle->getPeerAllocation(device1, allocData, ptr, &peerGpuAddress); EXPECT_EQ(peerAlloc, nullptr); result = context->freeMem(ptr); ASSERT_EQ(result, ZE_RESULT_SUCCESS); } TEST_F(MultipleDevicePeerAllocationTest, whenPeerAllocationForDeviceAllocationIsRequestedThenPeerAllocationIsReturned) { L0::Device *device0 = driverHandle->devices[0]; L0::Device *device1 = driverHandle->devices[1]; size_t size = 1024; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device0->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); uintptr_t peerGpuAddress = 0u; auto allocData = context->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr); EXPECT_NE(allocData, nullptr); auto peerAlloc = driverHandle->getPeerAllocation(device1, allocData, ptr, &peerGpuAddress); EXPECT_NE(peerAlloc, nullptr); result = context->freeMem(ptr); ASSERT_EQ(result, ZE_RESULT_SUCCESS); } TEST_F(MultipleDevicePeerAllocationTest, whenPeerAllocationForDeviceAllocationIsRequestedThenPeerAllocationIsAddedToDeviceMapAndRemovedWhenAllocationIsFreed) { L0::Device *device0 = driverHandle->devices[0]; L0::Device *device1 = driverHandle->devices[1]; size_t size = 1024; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device0->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); uintptr_t peerGpuAddress = 0u; auto allocData = context->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr); EXPECT_NE(allocData, nullptr); auto peerAlloc = driverHandle->getPeerAllocation(device1, allocData, ptr, &peerGpuAddress); EXPECT_NE(peerAlloc, nullptr); DeviceImp *deviceImp1 = static_cast(device1); { auto iter = deviceImp1->peerAllocations.allocations.find(ptr); EXPECT_NE(iter, deviceImp1->peerAllocations.allocations.end()); } result = context->freeMem(ptr); { auto iter = deviceImp1->peerAllocations.allocations.find(ptr); EXPECT_EQ(iter, deviceImp1->peerAllocations.allocations.end()); } ASSERT_EQ(result, ZE_RESULT_SUCCESS); } TEST_F(MultipleDevicePeerAllocationTest, whenPeerAllocationForDeviceAllocationIsRequestedThenPeerAllocationIsAddedToDeviceMapAndReturnedWhenLookingForPeerAllocationAgain) { L0::Device *device0 = driverHandle->devices[0]; L0::Device *device1 = driverHandle->devices[1]; size_t size = 1024; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device0->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); uintptr_t peerGpuAddress = 0u; auto allocData = context->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr); EXPECT_NE(allocData, nullptr); DeviceImp *deviceImp1 = static_cast(device1); EXPECT_EQ(0u, deviceImp1->peerAllocations.allocations.size()); auto peerAlloc = driverHandle->getPeerAllocation(device1, allocData, ptr, &peerGpuAddress); EXPECT_NE(peerAlloc, nullptr); EXPECT_EQ(1u, deviceImp1->peerAllocations.allocations.size()); { auto iter = deviceImp1->peerAllocations.allocations.find(ptr); EXPECT_NE(iter, deviceImp1->peerAllocations.allocations.end()); } uintptr_t peerGpuAddress2 = 0u; peerAlloc = driverHandle->getPeerAllocation(device1, allocData, ptr, &peerGpuAddress2); EXPECT_NE(peerAlloc, nullptr); EXPECT_EQ(1u, deviceImp1->peerAllocations.allocations.size()); EXPECT_EQ(peerGpuAddress, peerGpuAddress2); result = context->freeMem(ptr); { auto iter = deviceImp1->peerAllocations.allocations.find(ptr); EXPECT_EQ(iter, deviceImp1->peerAllocations.allocations.end()); } ASSERT_EQ(result, ZE_RESULT_SUCCESS); } TEST_F(MultipleDevicePeerAllocationTest, whenPeerAllocationForDeviceAllocationIsRequestedWithoutPassingPeerGpuAddressParameterThenPeerAllocationIsReturned) { L0::Device *device0 = driverHandle->devices[0]; L0::Device *device1 = driverHandle->devices[1]; size_t size = 1024; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device0->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); auto allocData = context->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr); EXPECT_NE(allocData, nullptr); auto peerAlloc = driverHandle->getPeerAllocation(device1, allocData, ptr, nullptr); EXPECT_NE(peerAlloc, nullptr); result = context->freeMem(ptr); ASSERT_EQ(result, ZE_RESULT_SUCCESS); } TEST_F(MultipleDevicePeerAllocationTest, whenPeerAllocationForDeviceAllocationIsRequestedTwiceThenSamePeerAllocationIsReturned) { L0::Device *device0 = driverHandle->devices[0]; L0::Device *device1 = driverHandle->devices[1]; size_t size = 1024; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device0->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); auto allocData = context->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr); EXPECT_NE(allocData, nullptr); uintptr_t peerGpuAddress0 = 0u; auto peerAlloc0 = driverHandle->getPeerAllocation(device1, allocData, ptr, &peerGpuAddress0); EXPECT_NE(peerAlloc0, nullptr); uintptr_t peerGpuAddress1 = 0u; auto peerAlloc1 = driverHandle->getPeerAllocation(device1, allocData, ptr, &peerGpuAddress1); EXPECT_NE(peerAlloc1, nullptr); EXPECT_EQ(peerAlloc0, peerAlloc1); EXPECT_EQ(peerGpuAddress0, peerGpuAddress1); result = context->freeMem(ptr); ASSERT_EQ(result, ZE_RESULT_SUCCESS); } struct MemoryFailedOpenIpcHandleTest : public ::testing::Test { void SetUp() override { NEO::MockCompilerEnableGuard mock(true); neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(NEO::defaultHwInfo.get()); auto mockBuiltIns = new MockBuiltins(); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->builtins.reset(mockBuiltIns); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); driverHandle = std::make_unique(); driverHandle->initialize(std::move(devices)); prevMemoryManager = driverHandle->getMemoryManager(); currMemoryManager = new MemoryManagerIpcMock(*neoDevice->executionEnvironment); driverHandle->setMemoryManager(currMemoryManager); device = driverHandle->devices[0]; context = std::make_unique(driverHandle.get()); EXPECT_NE(context, nullptr); context->getDevices().insert(std::make_pair(device->toHandle(), device)); auto neoDevice = device->getNEODevice(); context->rootDeviceIndices.insert(neoDevice->getRootDeviceIndex()); context->deviceBitfields.insert({neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield()}); } void TearDown() override { driverHandle->setMemoryManager(prevMemoryManager); delete currMemoryManager; } NEO::MemoryManager *prevMemoryManager = nullptr; NEO::MemoryManager *currMemoryManager = nullptr; std::unique_ptr driverHandle; NEO::MockDevice *neoDevice = nullptr; L0::Device *device = nullptr; std::unique_ptr context; }; TEST_F(MemoryFailedOpenIpcHandleTest, givenCallToOpenIpcMemHandleWithNullPtrFromCreateGraphicsAllocationFromSharedHandleThenInvalidArgumentIsReturned) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); ze_ipc_mem_handle_t ipcHandle = {}; result = context->getIpcMemHandle(ptr, &ipcHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); ze_ipc_memory_flags_t flags = {}; void *ipcPtr; result = context->openIpcMemHandle(device->toHandle(), ipcHandle, flags, &ipcPtr); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); EXPECT_EQ(ipcPtr, nullptr); result = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } using DeviceMemorySizeTest = Test; TEST_F(DeviceMemorySizeTest, givenSizeGreaterThanLimitThenDeviceAllocationFails) { size_t size = neoDevice->getDeviceInfo().maxMemAllocSize + 1; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device, &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_SIZE, result); } TEST_F(MemoryTest, givenSharedPointerThenDriverGetAllocPropertiesReturnsDeviceHandle) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_host_mem_alloc_desc_t hostDesc = {}; ze_result_t result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); ze_memory_allocation_properties_t memoryProperties = {}; ze_device_handle_t deviceHandle; result = context->getMemAllocProperties(ptr, &memoryProperties, &deviceHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(memoryProperties.type, ZE_MEMORY_TYPE_SHARED); EXPECT_EQ(deviceHandle, device->toHandle()); result = context->freeMem(ptr); ASSERT_EQ(result, ZE_RESULT_SUCCESS); } TEST_F(MemoryTest, givenHostPointerThenDriverGetAllocPropertiesReturnsNullDevice) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_host_mem_alloc_desc_t hostDesc = {}; ze_result_t result = context->allocHostMem(&hostDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); ze_memory_allocation_properties_t memoryProperties = {}; ze_device_handle_t deviceHandle; result = context->getMemAllocProperties(ptr, &memoryProperties, &deviceHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(memoryProperties.type, ZE_MEMORY_TYPE_HOST); EXPECT_EQ(deviceHandle, nullptr); result = context->freeMem(ptr); ASSERT_EQ(result, ZE_RESULT_SUCCESS); } TEST_F(MemoryTest, givenSystemAllocatedPointerThenDriverGetAllocPropertiesReturnsUnknownType) { size_t size = 10; int *ptr = new int[size]; ze_memory_allocation_properties_t memoryProperties = {}; ze_device_handle_t deviceHandle; ze_result_t result = context->getMemAllocProperties(ptr, &memoryProperties, &deviceHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(memoryProperties.type, ZE_MEMORY_TYPE_UNKNOWN); delete[] ptr; } TEST_F(MemoryTest, givenSharedPointerAndDeviceHandleAsNullThenDriverReturnsSuccessAndReturnsPointerToSharedAllocation) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ASSERT_NE(nullptr, device->toHandle()); ze_device_mem_alloc_desc_t deviceDesc = {}; ze_host_mem_alloc_desc_t hostDesc = {}; ze_result_t result = context->allocSharedMem(nullptr, &deviceDesc, &hostDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); result = context->freeMem(ptr); ASSERT_EQ(result, ZE_RESULT_SUCCESS); } TEST_F(MemoryTest, givenNoDeviceWhenAllocatingSharedMemoryThenDeviceInAllocationIsNullptr) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ASSERT_NE(nullptr, device->toHandle()); ze_device_mem_alloc_desc_t deviceDesc = {}; ze_host_mem_alloc_desc_t hostDesc = {}; ze_result_t result = context->allocSharedMem(nullptr, &deviceDesc, &hostDesc, size, alignment, &ptr); auto alloc = driverHandle->svmAllocsManager->getSVMAlloc(ptr); EXPECT_EQ(alloc->device, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); result = context->freeMem(ptr); ASSERT_EQ(result, ZE_RESULT_SUCCESS); } TEST_F(MemoryTest, givenCallToCheckMemoryAccessFromDeviceWithInvalidPointerThenInvalidArgumentIsReturned) { void *ptr = nullptr; ze_result_t res = driverHandle->checkMemoryAccessFromDevice(device, ptr); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, res); } TEST_F(MemoryTest, givenCallToCheckMemoryAccessFromDeviceWithValidDeviceAllocationPointerThenSuccessIsReturned) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t res = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_NE(nullptr, ptr); res = driverHandle->checkMemoryAccessFromDevice(device, ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); res = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); } TEST_F(MemoryTest, givenCallToCheckMemoryAccessFromDeviceWithValidSharedAllocationPointerThenSuccessIsReturned) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_host_mem_alloc_desc_t hostDesc = {}; ze_result_t res = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_NE(nullptr, ptr); res = driverHandle->checkMemoryAccessFromDevice(device, ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); res = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); } TEST_F(MemoryTest, givenCallToCheckMemoryAccessFromDeviceWithValidHostAllocationPointerThenSuccessIsReturned) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_host_mem_alloc_desc_t hostDesc = {}; ze_result_t res = context->allocHostMem(&hostDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_NE(nullptr, ptr); res = driverHandle->checkMemoryAccessFromDevice(device, ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); res = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); } struct MemoryBitfieldTest : testing::Test { void SetUp() override { NEO::MockCompilerEnableGuard mock(true); executionEnvironment = new NEO::ExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); memoryManager = new NEO::MockMemoryManager(*executionEnvironment); executionEnvironment->memoryManager.reset(memoryManager); neoDevice = NEO::Device::create(executionEnvironment, 0u); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); driverHandle = std::make_unique>(); driverHandle->initialize(std::move(devices)); device = driverHandle->devices[0]; memoryManager->recentlyPassedDeviceBitfield = {}; ASSERT_NE(nullptr, driverHandle->devices[0]->toHandle()); EXPECT_NE(neoDevice->getDeviceBitfield(), memoryManager->recentlyPassedDeviceBitfield); context = std::make_unique(driverHandle.get()); EXPECT_NE(context, nullptr); context->getDevices().insert(std::make_pair(device->toHandle(), device)); auto neoDevice = device->getNEODevice(); context->rootDeviceIndices.insert(neoDevice->getRootDeviceIndex()); context->deviceBitfields.insert({neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield()}); } void TearDown() override { auto result = context->freeMem(ptr); ASSERT_EQ(result, ZE_RESULT_SUCCESS); } std::unique_ptr> driverHandle; NEO::Device *neoDevice = nullptr; L0::Device *device = nullptr; NEO::MockMemoryManager *memoryManager; size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; std::unique_ptr context; NEO::ExecutionEnvironment *executionEnvironment; }; TEST_F(MemoryBitfieldTest, givenDeviceWithValidBitfieldWhenAllocatingDeviceMemoryThenPassProperBitfield) { NEO::MockCompilerEnableGuard mock(true); ze_device_mem_alloc_desc_t deviceDesc = {}; auto result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(neoDevice->getDeviceBitfield(), memoryManager->recentlyPassedDeviceBitfield); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); } TEST(MemoryBitfieldTests, givenDeviceWithValidBitfieldWhenAllocatingSharedMemoryThenPassProperBitfield) { NEO::MockCompilerEnableGuard mock(true); DebugManagerStateRestore restorer; size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; auto executionEnvironment = new NEO::ExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(2); for (size_t i = 0; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(defaultHwInfo.get()); } auto memoryManager = new NEO::MockMemoryManager(*executionEnvironment); executionEnvironment->memoryManager.reset(memoryManager); NEO::Device *neoDevice0 = NEO::Device::create(executionEnvironment, 0u); DebugManager.flags.CreateMultipleSubDevices.set(4); NEO::Device *neoDevice1 = NEO::Device::create(executionEnvironment, 1u); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice0)); devices.push_back(std::unique_ptr(neoDevice1)); auto driverHandle = std::make_unique>(); driverHandle->initialize(std::move(devices)); auto device = driverHandle->devices[0]; std::unique_ptr context; context = std::make_unique(driverHandle.get()); EXPECT_NE(context, nullptr); context->getDevices().insert(std::make_pair(device->toHandle(), device)); auto neoDevice = device->getNEODevice(); context->rootDeviceIndices.insert(neoDevice->getRootDeviceIndex()); context->deviceBitfields.insert({neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield()}); memoryManager->recentlyPassedDeviceBitfield = {}; ASSERT_NE(nullptr, driverHandle->devices[1]->toHandle()); EXPECT_NE(neoDevice0->getDeviceBitfield(), neoDevice1->getDeviceBitfield()); EXPECT_NE(neoDevice0->getDeviceBitfield(), memoryManager->recentlyPassedDeviceBitfield); ze_device_mem_alloc_desc_t deviceDesc = {}; ze_host_mem_alloc_desc_t hostDesc = {}; auto result = context->allocSharedMem(nullptr, &deviceDesc, &hostDesc, size, alignment, &ptr); EXPECT_EQ(neoDevice0->getDeviceBitfield(), memoryManager->recentlyPassedDeviceBitfield); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); result = context->freeMem(ptr); ASSERT_EQ(result, ZE_RESULT_SUCCESS); memoryManager->recentlyPassedDeviceBitfield = {}; EXPECT_NE(neoDevice1->getDeviceBitfield(), memoryManager->recentlyPassedDeviceBitfield); result = context->allocSharedMem(driverHandle->devices[0]->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr); EXPECT_EQ(neoDevice0->getDeviceBitfield(), memoryManager->recentlyPassedDeviceBitfield); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); result = context->freeMem(ptr); ASSERT_EQ(result, ZE_RESULT_SUCCESS); } struct AllocHostMemoryTest : public ::testing::Test { void SetUp() override { NEO::MockCompilerEnableGuard mock(true); std::vector> devices; NEO::ExecutionEnvironment *executionEnvironment = new NEO::ExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(numRootDevices); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(NEO::defaultHwInfo.get()); } for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { devices.push_back(std::unique_ptr(NEO::MockDevice::createWithExecutionEnvironment(NEO::defaultHwInfo.get(), executionEnvironment, i))); } driverHandle = std::make_unique>(); driverHandle->initialize(std::move(devices)); ze_context_handle_t hContext; ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; ze_result_t res = driverHandle->createContext(&desc, 0u, nullptr, &hContext); EXPECT_EQ(ZE_RESULT_SUCCESS, res); context = static_cast(Context::fromHandle(hContext)); } void TearDown() override { context->destroy(); } DebugManagerStateRestore restorer; std::unique_ptr> driverHandle; const uint32_t numRootDevices = 2u; L0::ContextImp *context = nullptr; }; TEST_F(AllocHostMemoryTest, whenCallingAllocHostMemThenAllocateGraphicsMemoryWithPropertiesIsCalledTheNumberOfTimesOfRootDevices) { void *ptr = nullptr; static_cast(driverHandle.get()->getMemoryManager())->isMockHostMemoryManager = true; static_cast(driverHandle.get()->getMemoryManager())->allocateGraphicsMemoryWithPropertiesCount = 0; ze_host_mem_alloc_desc_t hostDesc = {}; ze_result_t result = context->allocHostMem(&hostDesc, 4096u, 0u, &ptr); EXPECT_EQ(static_cast(driverHandle.get()->getMemoryManager())->allocateGraphicsMemoryWithPropertiesCount, numRootDevices); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); context->freeMem(ptr); } TEST_F(AllocHostMemoryTest, whenCallingAllocHostMemAndFailingOnCreatingGraphicsAllocationThenNullIsReturned) { static_cast(driverHandle.get()->getMemoryManager())->isMockHostMemoryManager = true; static_cast(driverHandle.get()->getMemoryManager())->forceFailureInPrimaryAllocation = true; void *ptr = nullptr; static_cast(driverHandle.get()->getMemoryManager())->allocateGraphicsMemoryWithPropertiesCount = 0; ze_host_mem_alloc_desc_t hostDesc = {}; ze_result_t result = context->allocHostMem(&hostDesc, 4096u, 0u, &ptr); EXPECT_EQ(static_cast(driverHandle.get()->getMemoryManager())->allocateGraphicsMemoryWithPropertiesCount, 1u); EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY, result); EXPECT_EQ(nullptr, ptr); } TEST_F(AllocHostMemoryTest, whenCallingAllocHostMemAndFailingOnCreatingGraphicsAllocationWithHostPointerThenNullIsReturned) { static_cast(driverHandle.get()->getMemoryManager())->isMockHostMemoryManager = true; static_cast(driverHandle.get()->getMemoryManager())->forceFailureInAllocationWithHostPointer = true; void *ptr = nullptr; static_cast(driverHandle.get()->getMemoryManager())->allocateGraphicsMemoryWithPropertiesCount = 0; ze_host_mem_alloc_desc_t hostDesc = {}; ze_result_t result = context->allocHostMem(&hostDesc, 4096u, 0u, &ptr); EXPECT_EQ(static_cast(driverHandle.get()->getMemoryManager())->allocateGraphicsMemoryWithPropertiesCount, numRootDevices); EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY, result); EXPECT_EQ(nullptr, ptr); } using ContextMemoryTest = Test; TEST_F(ContextMemoryTest, whenAllocatingSharedAllocationFromContextThenAllocationSucceeds) { size_t size = 10u; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_host_mem_alloc_desc_t hostDesc = {}; ze_result_t result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); result = context->freeMem(ptr); EXPECT_EQ(result, ZE_RESULT_SUCCESS); } TEST_F(ContextMemoryTest, whenAllocatingHostAllocationFromContextThenAllocationSucceeds) { size_t size = 10u; size_t alignment = 1u; void *ptr = nullptr; ze_host_mem_alloc_desc_t hostDesc = {}; ze_result_t result = context->allocHostMem(&hostDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); result = context->freeMem(ptr); EXPECT_EQ(result, ZE_RESULT_SUCCESS); } TEST_F(ContextMemoryTest, whenAllocatingDeviceAllocationFromContextThenAllocationSucceeds) { size_t size = 10u; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); result = context->freeMem(ptr); EXPECT_EQ(result, ZE_RESULT_SUCCESS); } TEST_F(ContextMemoryTest, whenRetrievingAddressRangeForDeviceAllocationThenRangeIsCorrect) { size_t allocSize = 4096u; size_t alignment = 1u; void *allocPtr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, allocSize, alignment, &allocPtr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, allocPtr); void *base = nullptr; size_t size = 0u; void *pPtr = reinterpret_cast(reinterpret_cast(allocPtr) + 77); result = context->getMemAddressRange(pPtr, &base, &size); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(base, allocPtr); EXPECT_GE(size, allocSize); result = context->freeMem(allocPtr); EXPECT_EQ(result, ZE_RESULT_SUCCESS); } TEST_F(ContextMemoryTest, whenRetrievingSizeForDeviceAllocationThenUserSizeIsReturned) { size_t allocSize = 100; size_t alignment = 1u; void *allocPtr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, allocSize, alignment, &allocPtr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, allocPtr); void *base = nullptr; size_t size = 0u; void *pPtr = reinterpret_cast(reinterpret_cast(allocPtr) + 77); result = context->getMemAddressRange(pPtr, &base, &size); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(base, allocPtr); EXPECT_EQ(size, allocSize); result = context->freeMem(allocPtr); EXPECT_EQ(result, ZE_RESULT_SUCCESS); } TEST_F(ContextMemoryTest, whenRetrievingAddressRangeForDeviceAllocationWithNoBaseArgumentThenSizeIsCorrectAndSuccessIsReturned) { size_t allocSize = 4096u; size_t alignment = 1u; void *allocPtr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, allocSize, alignment, &allocPtr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, allocPtr); size_t size = 0u; void *pPtr = reinterpret_cast(reinterpret_cast(allocPtr) + 77); result = context->getMemAddressRange(pPtr, nullptr, &size); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_GE(size, allocSize); result = context->freeMem(allocPtr); EXPECT_EQ(result, ZE_RESULT_SUCCESS); } TEST_F(ContextMemoryTest, whenRetrievingAddressRangeForDeviceAllocationWithNoSizeArgumentThenRangeIsCorrectAndSuccessIsReturned) { size_t allocSize = 4096u; size_t alignment = 1u; void *allocPtr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, allocSize, alignment, &allocPtr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, allocPtr); void *base = nullptr; void *pPtr = reinterpret_cast(reinterpret_cast(allocPtr) + 77); result = context->getMemAddressRange(pPtr, &base, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(base, allocPtr); result = context->freeMem(allocPtr); EXPECT_EQ(result, ZE_RESULT_SUCCESS); } TEST_F(ContextMemoryTest, whenRetrievingAddressRangeForUnknownDeviceAllocationThenResultUnknownIsReturned) { void *base = nullptr; size_t size = 0u; uint64_t var = 0; ze_result_t res = context->getMemAddressRange(&var, &base, &size); EXPECT_EQ(ZE_RESULT_ERROR_UNKNOWN, res); } TEST_F(ContextMemoryTest, givenSystemAllocatedPointerThenGetAllocPropertiesReturnsUnknownType) { size_t size = 10; int *ptr = new int[size]; ze_memory_allocation_properties_t memoryProperties = {}; ze_device_handle_t deviceHandle; ze_result_t result = context->getMemAllocProperties(ptr, &memoryProperties, &deviceHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(memoryProperties.type, ZE_MEMORY_TYPE_UNKNOWN); delete[] ptr; } TEST_F(ContextMemoryTest, givenCallTochangeMemoryOperationStatusToL0ResultTypeThenExpectedValueIsReturned) { NEO::MemoryOperationsStatus status = NEO::MemoryOperationsStatus::SUCCESS; ze_result_t res = changeMemoryOperationStatusToL0ResultType(status); EXPECT_EQ(res, ZE_RESULT_SUCCESS); status = NEO::MemoryOperationsStatus::FAILED; res = changeMemoryOperationStatusToL0ResultType(status); EXPECT_EQ(res, ZE_RESULT_ERROR_DEVICE_LOST); status = NEO::MemoryOperationsStatus::MEMORY_NOT_FOUND; res = changeMemoryOperationStatusToL0ResultType(status); EXPECT_EQ(res, ZE_RESULT_ERROR_INVALID_ARGUMENT); status = NEO::MemoryOperationsStatus::OUT_OF_MEMORY; res = changeMemoryOperationStatusToL0ResultType(status); EXPECT_EQ(res, ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY); status = NEO::MemoryOperationsStatus::UNSUPPORTED; res = changeMemoryOperationStatusToL0ResultType(status); EXPECT_EQ(res, ZE_RESULT_ERROR_UNSUPPORTED_FEATURE); status = NEO::MemoryOperationsStatus::DEVICE_UNINITIALIZED; res = changeMemoryOperationStatusToL0ResultType(status); EXPECT_EQ(res, ZE_RESULT_ERROR_UNINITIALIZED); status = static_cast(static_cast(NEO::MemoryOperationsStatus::DEVICE_UNINITIALIZED) + 1); res = changeMemoryOperationStatusToL0ResultType(status); EXPECT_EQ(res, ZE_RESULT_ERROR_UNKNOWN); } using ImportFdUncachedTests = MemoryOpenIpcHandleTest; TEST_F(ImportFdUncachedTests, givenCallToImportFdHandleWithUncachedFlagsThenLocallyUncachedResourceIsSet) { ze_ipc_memory_flags_t flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED; uint64_t handle = 1; void *ptr = driverHandle->importFdHandle(device->toHandle(), flags, handle, nullptr); EXPECT_NE(nullptr, ptr); auto allocData = driverHandle->svmAllocsManager->getSVMAlloc(ptr); EXPECT_EQ(allocData->allocationFlagsProperty.flags.locallyUncachedResource, 1u); context->freeMem(ptr); } TEST_F(ImportFdUncachedTests, givenCallToImportFdHandleWithUncachedIpcFlagsThenLocallyUncachedResourceIsSet) { ze_ipc_memory_flags_t flags = ZE_IPC_MEMORY_FLAG_BIAS_UNCACHED; uint64_t handle = 1; void *ptr = driverHandle->importFdHandle(device->toHandle(), flags, handle, nullptr); EXPECT_NE(nullptr, ptr); auto allocData = driverHandle->svmAllocsManager->getSVMAlloc(ptr); EXPECT_EQ(allocData->allocationFlagsProperty.flags.locallyUncachedResource, 1u); context->freeMem(ptr); } TEST_F(ImportFdUncachedTests, givenCallToImportFdHandleWithBothUncachedFlagsThenLocallyUncachedResourceIsSet) { ze_ipc_memory_flags_t flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED | ZE_IPC_MEMORY_FLAG_BIAS_UNCACHED; uint64_t handle = 1; void *ptr = driverHandle->importFdHandle(device->toHandle(), flags, handle, nullptr); EXPECT_NE(nullptr, ptr); auto allocData = driverHandle->svmAllocsManager->getSVMAlloc(ptr); EXPECT_EQ(allocData->allocationFlagsProperty.flags.locallyUncachedResource, 1u); context->freeMem(ptr); } TEST_F(ImportFdUncachedTests, givenCallToImportFdHandleWithoutUncachedFlagsThenLocallyUncachedResourceIsNotSet) { ze_ipc_memory_flags_t flags = {}; uint64_t handle = 1; void *ptr = driverHandle->importFdHandle(device->toHandle(), flags, handle, nullptr); EXPECT_NE(nullptr, ptr); auto allocData = driverHandle->svmAllocsManager->getSVMAlloc(ptr); EXPECT_EQ(allocData->allocationFlagsProperty.flags.locallyUncachedResource, 0u); context->freeMem(ptr); } struct SVMAllocsManagerSharedAllocFailMock : public NEO::SVMAllocsManager { SVMAllocsManagerSharedAllocFailMock(MemoryManager *memoryManager) : NEO::SVMAllocsManager(memoryManager, false) {} void *createSharedUnifiedMemoryAllocation(size_t size, const UnifiedMemoryProperties &svmProperties, void *cmdQ) override { return nullptr; } }; struct SharedAllocFailTests : public ::testing::Test { void SetUp() override { NEO::MockCompilerEnableGuard mock(true); neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(NEO::defaultHwInfo.get()); auto mockBuiltIns = new MockBuiltins(); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->builtins.reset(mockBuiltIns); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); driverHandle = std::make_unique(); driverHandle->initialize(std::move(devices)); prevSvmAllocsManager = driverHandle->svmAllocsManager; currSvmAllocsManager = new SVMAllocsManagerSharedAllocFailMock(driverHandle->memoryManager); driverHandle->svmAllocsManager = currSvmAllocsManager; device = driverHandle->devices[0]; context = std::make_unique(driverHandle.get()); EXPECT_NE(context, nullptr); context->getDevices().insert(std::make_pair(device->toHandle(), device)); auto neoDevice = device->getNEODevice(); context->rootDeviceIndices.insert(neoDevice->getRootDeviceIndex()); context->deviceBitfields.insert({neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield()}); } void TearDown() override { driverHandle->svmAllocsManager = prevSvmAllocsManager; delete currSvmAllocsManager; } NEO::SVMAllocsManager *prevSvmAllocsManager; NEO::SVMAllocsManager *currSvmAllocsManager; std::unique_ptr driverHandle; NEO::MockDevice *neoDevice = nullptr; L0::Device *device = nullptr; std::unique_ptr context; }; TEST_F(SharedAllocFailTests, whenAllocatinSharedMemoryAndAllocationFailsThenOutOfDeviceMemoryIsReturned) { ze_device_mem_alloc_desc_t deviceDesc = {}; ze_host_mem_alloc_desc_t hostDesc = {}; void *ptr = nullptr; size_t size = 1024; ze_result_t res = context->allocSharedMem(nullptr, &deviceDesc, &hostDesc, size, 0u, &ptr); EXPECT_EQ(res, ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY); } struct SVMAllocsManagerSharedAllocMultiDeviceMock : public NEO::SVMAllocsManager { SVMAllocsManagerSharedAllocMultiDeviceMock(MemoryManager *memoryManager) : NEO::SVMAllocsManager(memoryManager, false) {} void *createHostUnifiedMemoryAllocation(size_t size, const UnifiedMemoryProperties &memoryProperties) override { createHostUnifiedMemoryAllocationTimes++; return alignedMalloc(4096u, 4096u); } uint32_t createHostUnifiedMemoryAllocationTimes = 0; }; struct ContextMultiDeviceMock : public L0::ContextImp { ContextMultiDeviceMock(L0::DriverHandleImp *driverHandle) : L0::ContextImp(driverHandle) {} ze_result_t freeMem(const void *ptr) override { SVMAllocsManagerSharedAllocMultiDeviceMock *currSvmAllocsManager = static_cast(this->driverHandle->svmAllocsManager); if (currSvmAllocsManager->createHostUnifiedMemoryAllocationTimes == 0) { return ContextImp::freeMem(ptr); } alignedFree(const_cast(ptr)); return ZE_RESULT_SUCCESS; } bool isShareableMemory(const void *pNext, bool exportableMemory, NEO::Device *neoDevice) override { return true; } }; struct SharedAllocMultiDeviceTests : public ::testing::Test { void SetUp() override { NEO::MockCompilerEnableGuard mock(true); DebugManager.flags.CreateMultipleRootDevices.set(numRootDevices); auto executionEnvironment = new NEO::ExecutionEnvironment; auto devices = NEO::DeviceFactory::createDevices(*executionEnvironment); driverHandle = std::make_unique(); ze_result_t res = driverHandle->initialize(std::move(devices)); EXPECT_EQ(ZE_RESULT_SUCCESS, res); prevSvmAllocsManager = driverHandle->svmAllocsManager; currSvmAllocsManager = new SVMAllocsManagerSharedAllocMultiDeviceMock(driverHandle->memoryManager); driverHandle->svmAllocsManager = currSvmAllocsManager; context = std::make_unique(driverHandle.get()); EXPECT_NE(context, nullptr); for (uint32_t i = 0; i < numRootDevices; i++) { auto device = driverHandle->devices[i]; context->getDevices().insert(std::make_pair(device->toHandle(), device)); auto neoDevice = device->getNEODevice(); context->rootDeviceIndices.insert(neoDevice->getRootDeviceIndex()); context->deviceBitfields.insert({neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield()}); } } void TearDown() override { driverHandle->svmAllocsManager = prevSvmAllocsManager; delete currSvmAllocsManager; } DebugManagerStateRestore restorer; NEO::SVMAllocsManager *prevSvmAllocsManager; SVMAllocsManagerSharedAllocMultiDeviceMock *currSvmAllocsManager; std::unique_ptr driverHandle; std::unique_ptr context; const uint32_t numRootDevices = 4u; }; TEST_F(SharedAllocMultiDeviceTests, whenAllocatinSharedMemoryWithNullDeviceInAMultiDeviceSystemThenHostAllocationIsCreated) { ze_device_mem_alloc_desc_t deviceDesc = {}; ze_host_mem_alloc_desc_t hostDesc = {}; void *ptr = nullptr; size_t size = 1024; EXPECT_EQ(currSvmAllocsManager->createHostUnifiedMemoryAllocationTimes, 0u); ze_result_t res = context->allocSharedMem(nullptr, &deviceDesc, &hostDesc, size, 0u, &ptr); EXPECT_EQ(res, ZE_RESULT_SUCCESS); EXPECT_EQ(currSvmAllocsManager->createHostUnifiedMemoryAllocationTimes, 1u); res = context->freeMem(ptr); EXPECT_EQ(res, ZE_RESULT_SUCCESS); } TEST_F(SharedAllocMultiDeviceTests, whenAllocatinSharedMemoryWithNonNullDeviceInAMultiDeviceSystemThenDeviceAllocationIsCreated) { ze_device_mem_alloc_desc_t deviceDesc = {}; ze_host_mem_alloc_desc_t hostDesc = {}; void *ptr = nullptr; size_t size = 1024; ze_result_t res = ZE_RESULT_ERROR_UNKNOWN; ze_memory_allocation_properties_t memoryProperties = {}; ze_device_handle_t deviceHandle; EXPECT_EQ(currSvmAllocsManager->createHostUnifiedMemoryAllocationTimes, 0u); for (uint32_t i = 0; i < numRootDevices; i++) { res = context->allocSharedMem(driverHandle->devices[i]->toHandle(), &deviceDesc, &hostDesc, size, 0u, &ptr); EXPECT_EQ(res, ZE_RESULT_SUCCESS); res = context->getMemAllocProperties(ptr, &memoryProperties, &deviceHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_EQ(memoryProperties.type, ZE_MEMORY_TYPE_SHARED); EXPECT_EQ(deviceHandle, driverHandle->devices[i]->toHandle()); res = context->freeMem(ptr); EXPECT_EQ(res, ZE_RESULT_SUCCESS); } EXPECT_EQ(currSvmAllocsManager->createHostUnifiedMemoryAllocationTimes, 0u); } template struct MemAllocMultiSubDeviceTests : public ::testing::Test { void SetUp() override { NEO::MockCompilerEnableGuard mock(true); DebugManager.flags.EnableWalkerPartition.set(enableWalkerPartition); DebugManager.flags.CreateMultipleSubDevices.set(numSubDevices); auto executionEnvironment = new NEO::ExecutionEnvironment; auto devices = NEO::DeviceFactory::createDevices(*executionEnvironment); driverHandle = std::make_unique(); ze_result_t res = driverHandle->initialize(std::move(devices)); EXPECT_EQ(ZE_RESULT_SUCCESS, res); prevSvmAllocsManager = driverHandle->svmAllocsManager; currSvmAllocsManager = new SVMAllocsManagerSharedAllocMultiDeviceMock(driverHandle->memoryManager); driverHandle->svmAllocsManager = currSvmAllocsManager; context = std::make_unique(driverHandle.get()); EXPECT_NE(context, nullptr); for (uint32_t i = 0; i < numRootDevices; i++) { auto device = driverHandle->devices[i]; context->getDevices().insert(std::make_pair(device->toHandle(), device)); auto neoDevice = device->getNEODevice(); context->rootDeviceIndices.insert(neoDevice->getRootDeviceIndex()); context->deviceBitfields.insert({neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield()}); } } void TearDown() override { driverHandle->svmAllocsManager = prevSvmAllocsManager; delete currSvmAllocsManager; } DebugManagerStateRestore restorer; NEO::SVMAllocsManager *prevSvmAllocsManager; SVMAllocsManagerSharedAllocMultiDeviceMock *currSvmAllocsManager; std::unique_ptr driverHandle; std::unique_ptr context; const uint32_t numSubDevices = 2u; const uint32_t numRootDevices = 1u; }; using MemAllocMultiSubDeviceTestsDisabledImplicitScaling = MemAllocMultiSubDeviceTests<0>; using MemAllocMultiSubDeviceTestsEnabledImplicitScaling = MemAllocMultiSubDeviceTests<1>; TEST_F(MemAllocMultiSubDeviceTestsDisabledImplicitScaling, GivenImplicitScalingDisabledWhenAllocatingDeviceMemorySubDeviceMemorySizeUsedThenExpectCorrectErrorReturned) { ze_device_mem_alloc_desc_t deviceDesc = {}; void *ptr = nullptr; size_t size = driverHandle->devices[0]->getNEODevice()->getDeviceInfo().globalMemSize; deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; ze_relaxed_allocation_limits_exp_desc_t relaxedSizeDesc = {}; relaxedSizeDesc.stype = ZE_STRUCTURE_TYPE_RELAXED_ALLOCATION_LIMITS_EXP_DESC; relaxedSizeDesc.flags = ZE_RELAXED_ALLOCATION_LIMITS_EXP_FLAG_MAX_SIZE; deviceDesc.pNext = &relaxedSizeDesc; ze_result_t res = context->allocDeviceMem(driverHandle->devices[0]->toHandle(), &deviceDesc, size, 0u, &ptr); EXPECT_EQ(res, ZE_RESULT_ERROR_UNSUPPORTED_SIZE); } TEST_F(MemAllocMultiSubDeviceTestsEnabledImplicitScaling, GivenImplicitScalingEnabledWhenAllocatingDeviceMemorySubDeviceMemorySizeUsedThenExpectCorrectErrorReturned) { ze_device_mem_alloc_desc_t deviceDesc = {}; void *ptr = nullptr; size_t size = driverHandle->devices[0]->getNEODevice()->getDeviceInfo().globalMemSize; deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; ze_relaxed_allocation_limits_exp_desc_t relaxedSizeDesc = {}; relaxedSizeDesc.stype = ZE_STRUCTURE_TYPE_RELAXED_ALLOCATION_LIMITS_EXP_DESC; relaxedSizeDesc.flags = ZE_RELAXED_ALLOCATION_LIMITS_EXP_FLAG_MAX_SIZE; deviceDesc.pNext = &relaxedSizeDesc; ze_result_t res = context->allocDeviceMem(driverHandle->devices[0]->toHandle(), &deviceDesc, size, 0u, &ptr); EXPECT_EQ(res, ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY); } TEST_F(MemAllocMultiSubDeviceTestsDisabledImplicitScaling, GivenImplicitScalingDisabledWhenAllocatingSharedMemorySubDeviceMemorySizeUsedThenExpectCorrectErrorReturned) { ze_device_mem_alloc_desc_t deviceDesc = {}; ze_host_mem_alloc_desc_t hostDesc = {}; void *ptr = nullptr; size_t size = driverHandle->devices[0]->getNEODevice()->getDeviceInfo().globalMemSize; deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; ze_relaxed_allocation_limits_exp_desc_t relaxedSizeDesc = {}; relaxedSizeDesc.stype = ZE_STRUCTURE_TYPE_RELAXED_ALLOCATION_LIMITS_EXP_DESC; relaxedSizeDesc.flags = ZE_RELAXED_ALLOCATION_LIMITS_EXP_FLAG_MAX_SIZE; deviceDesc.pNext = &relaxedSizeDesc; ze_result_t res = context->allocSharedMem(driverHandle->devices[0]->toHandle(), &deviceDesc, &hostDesc, size, 0u, &ptr); EXPECT_EQ(res, ZE_RESULT_ERROR_UNSUPPORTED_SIZE); } TEST_F(MemAllocMultiSubDeviceTestsEnabledImplicitScaling, GivenImplicitScalingDisabledWhenAllocatingSharedMemorySubDeviceMemorySizeUsedThenExpectCorrectErrorReturned) { ze_device_mem_alloc_desc_t deviceDesc = {}; ze_host_mem_alloc_desc_t hostDesc = {}; void *ptr = nullptr; size_t size = driverHandle->devices[0]->getNEODevice()->getDeviceInfo().globalMemSize; deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; ze_relaxed_allocation_limits_exp_desc_t relaxedSizeDesc = {}; relaxedSizeDesc.stype = ZE_STRUCTURE_TYPE_RELAXED_ALLOCATION_LIMITS_EXP_DESC; relaxedSizeDesc.flags = ZE_RELAXED_ALLOCATION_LIMITS_EXP_FLAG_MAX_SIZE; deviceDesc.pNext = &relaxedSizeDesc; ze_result_t res = context->allocSharedMem(driverHandle->devices[0]->toHandle(), &deviceDesc, &hostDesc, size, 0u, &ptr); EXPECT_EQ(res, ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/memory/test_memory_drm/000077500000000000000000000000001422164147700320345ustar00rootroot00000000000000test_memory.cpp000066400000000000000000000257151422164147700350420ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/memory/test_memory_drm/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/test/unit_tests/fixtures/memory_ipc_fixture.h" namespace L0 { namespace ult { using MemoryIPCTests = MemoryExportImportTest; TEST_F(MemoryIPCTests, givenCallToGetIpcHandleWithNotKnownPointerThenInvalidArgumentIsReturned) { uint32_t value = 0; ze_ipc_mem_handle_t ipcHandle; ze_result_t result = context->getIpcMemHandle(&value, &ipcHandle); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); } TEST_F(MemoryIPCTests, givenCallToGetIpcHandleWithDeviceAllocationThenIpcHandleIsReturned) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); ze_ipc_mem_handle_t ipcHandle; result = context->getIpcMemHandle(ptr, &ipcHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(MemoryIPCTests, whenCallingOpenIpcHandleWithIpcHandleThenDeviceAllocationIsReturned) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); ze_ipc_mem_handle_t ipcHandle = {}; result = context->getIpcMemHandle(ptr, &ipcHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface.reset(new NEO::OSInterface()); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel(std::make_unique(512)); ze_ipc_memory_flags_t flags = {}; void *ipcPtr; result = context->openIpcMemHandle(device->toHandle(), ipcHandle, flags, &ipcPtr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(ipcPtr, ptr); result = context->closeIpcMemHandle(ipcPtr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(MemoryIPCTests, whenCallingOpenIpcHandleWithIpcHandleAndUsingContextThenDeviceAllocationIsReturned) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); ze_ipc_mem_handle_t ipcHandle = {}; result = context->getIpcMemHandle(ptr, &ipcHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface.reset(new NEO::OSInterface()); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel(std::make_unique(512)); ze_ipc_memory_flags_t flags = {}; void *ipcPtr; result = context->openIpcMemHandle(device->toHandle(), ipcHandle, flags, &ipcPtr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(ipcPtr, ptr); result = context->closeIpcMemHandle(ipcPtr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(MemoryIPCTests, givenCallingGetIpcHandleWithDeviceAllocationAndUsingContextThenIpcHandleIsReturned) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); ze_ipc_mem_handle_t ipcHandle; result = context->getIpcMemHandle(ptr, &ipcHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(MemoryIPCTests, whenCallingOpenIpcHandleWithIncorrectHandleThenInvalidArgumentIsReturned) { ze_ipc_mem_handle_t ipcHandle = {}; ze_ipc_memory_flags_t flags = {}; void *ipcPtr; ze_result_t res = context->openIpcMemHandle(device->toHandle(), ipcHandle, flags, &ipcPtr); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, res); } struct MemoryGetIpcHandleTest : public ::testing::Test { void SetUp() override { NEO::MockCompilerEnableGuard mock(true); neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(NEO::defaultHwInfo.get()); auto mockBuiltIns = new MockBuiltins(); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->builtins.reset(mockBuiltIns); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); driverHandle = std::make_unique(); driverHandle->initialize(std::move(devices)); device = driverHandle->devices[0]; context = std::make_unique(driverHandle.get()); EXPECT_NE(context, nullptr); context->getDevices().insert(std::make_pair(device->toHandle(), device)); auto neoDevice = device->getNEODevice(); context->rootDeviceIndices.insert(neoDevice->getRootDeviceIndex()); context->deviceBitfields.insert({neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield()}); } void TearDown() override { } std::unique_ptr driverHandle; NEO::MockDevice *neoDevice = nullptr; L0::Device *device = nullptr; std::unique_ptr context; }; TEST_F(MemoryGetIpcHandleTest, whenCallingOpenIpcHandleWithIpcHandleThenFdHandleIsCorrectlyRead) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); ze_ipc_mem_handle_t ipcHandle = {}; result = context->getIpcMemHandle(ptr, &ipcHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface.reset(new NEO::OSInterface()); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel(std::make_unique(512)); ze_ipc_memory_flags_t flags = {}; void *ipcPtr; result = context->openIpcMemHandle(device->toHandle(), ipcHandle, flags, &ipcPtr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(ipcPtr, ptr); result = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(MemoryOpenIpcHandleTest, givenCallToOpenIpcMemHandleItIsSuccessfullyOpenedAndClosed) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); ze_ipc_mem_handle_t ipcHandle = {}; result = context->getIpcMemHandle(ptr, &ipcHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface.reset(new NEO::OSInterface()); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel(std::make_unique(512)); ze_ipc_memory_flags_t flags = {}; void *ipcPtr; result = context->openIpcMemHandle(device->toHandle(), ipcHandle, flags, &ipcPtr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(ipcPtr, nullptr); result = context->closeIpcMemHandle(ipcPtr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(MemoryExportImportTest, givenCallToDeviceAllocWithExtendedImportDescriptorAndSupportedFlagThenSuccessIsReturned) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_external_memory_export_desc_t extendedDesc = {}; extendedDesc.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_DESC; extendedDesc.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF; deviceDesc.pNext = &extendedDesc; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); ze_memory_allocation_properties_t memoryProperties = {}; ze_external_memory_export_fd_t extendedProperties = {}; extendedProperties.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_FD; extendedProperties.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF; extendedProperties.fd = std::numeric_limits::max(); memoryProperties.pNext = &extendedProperties; ze_device_handle_t deviceHandle; result = context->getMemAllocProperties(ptr, &memoryProperties, &deviceHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(memoryProperties.type, ZE_MEMORY_TYPE_DEVICE); EXPECT_EQ(deviceHandle, device->toHandle()); EXPECT_NE(extendedProperties.fd, std::numeric_limits::max()); EXPECT_EQ(extendedProperties.fd, driverHandle->mockFd); ze_device_mem_alloc_desc_t importDeviceDesc = {}; ze_external_memory_import_fd_t extendedImportDesc = {}; extendedImportDesc.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMPORT_FD; extendedImportDesc.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF; extendedImportDesc.fd = extendedProperties.fd; importDeviceDesc.pNext = &extendedImportDesc; neoDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface.reset(new NEO::OSInterface()); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel(std::make_unique(512)); void *importedPtr = nullptr; result = context->allocDeviceMem(device->toHandle(), &importDeviceDesc, size, alignment, &importedPtr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } } // namespace ult } // namespace L0compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/memory/test_memory_drm_or_wddm/000077500000000000000000000000001422164147700335475ustar00rootroot00000000000000test_memory.cpp000066400000000000000000000377561422164147700365650ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/memory/test_memory_drm_or_wddm/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/test/unit_tests/fixtures/memory_ipc_fixture.h" namespace L0 { namespace ult { using MemoryIPCTests = MemoryExportImportTest; TEST_F(MemoryIPCTests, givenCallToGetIpcHandleWithNotKnownPointerThenInvalidArgumentIsReturned) { uint32_t value = 0; ze_ipc_mem_handle_t ipcHandle; ze_result_t result = context->getIpcMemHandle(&value, &ipcHandle); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); } TEST_F(MemoryIPCTests, givenCallToGetIpcHandleWithDeviceAllocationThenIpcHandleIsReturned) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); ze_ipc_mem_handle_t ipcHandle; result = context->getIpcMemHandle(ptr, &ipcHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(MemoryIPCTests, whenCallingOpenIpcHandleWithIpcHandleThenDeviceAllocationIsReturned) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); ze_ipc_mem_handle_t ipcHandle = {}; result = context->getIpcMemHandle(ptr, &ipcHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface.reset(new NEO::OSInterface()); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel(std::make_unique(512)); ze_ipc_memory_flags_t flags = {}; void *ipcPtr; result = context->openIpcMemHandle(device->toHandle(), ipcHandle, flags, &ipcPtr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(ipcPtr, ptr); result = context->closeIpcMemHandle(ipcPtr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(MemoryIPCTests, whenCallingOpenIpcHandleWithIpcHandleAndUsingContextThenDeviceAllocationIsReturned) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); ze_ipc_mem_handle_t ipcHandle = {}; result = context->getIpcMemHandle(ptr, &ipcHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface.reset(new NEO::OSInterface()); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel(std::make_unique(512)); ze_ipc_memory_flags_t flags = {}; void *ipcPtr; result = context->openIpcMemHandle(device->toHandle(), ipcHandle, flags, &ipcPtr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(ipcPtr, ptr); result = context->closeIpcMemHandle(ipcPtr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(MemoryIPCTests, givenCallingGetIpcHandleWithDeviceAllocationAndUsingContextThenIpcHandleIsReturned) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); ze_ipc_mem_handle_t ipcHandle; result = context->getIpcMemHandle(ptr, &ipcHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(MemoryIPCTests, whenCallingOpenIpcHandleWithIncorrectHandleThenInvalidArgumentIsReturned) { ze_ipc_mem_handle_t ipcHandle = {}; ze_ipc_memory_flags_t flags = {}; void *ipcPtr; ze_result_t res = context->openIpcMemHandle(device->toHandle(), ipcHandle, flags, &ipcPtr); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, res); } struct MemoryGetIpcHandleTest : public ::testing::Test { void SetUp() override { NEO::MockCompilerEnableGuard mock(true); neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(NEO::defaultHwInfo.get()); auto mockBuiltIns = new MockBuiltins(); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->builtins.reset(mockBuiltIns); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); driverHandle = std::make_unique(); driverHandle->initialize(std::move(devices)); device = driverHandle->devices[0]; context = std::make_unique(driverHandle.get()); EXPECT_NE(context, nullptr); context->getDevices().insert(std::make_pair(device->toHandle(), device)); auto neoDevice = device->getNEODevice(); context->rootDeviceIndices.insert(neoDevice->getRootDeviceIndex()); context->deviceBitfields.insert({neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield()}); } void TearDown() override { } std::unique_ptr driverHandle; NEO::MockDevice *neoDevice = nullptr; L0::Device *device = nullptr; std::unique_ptr context; }; TEST_F(MemoryGetIpcHandleTest, whenCallingOpenIpcHandleWithIpcHandleThenFdHandleIsCorrectlyRead) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); ze_ipc_mem_handle_t ipcHandle = {}; result = context->getIpcMemHandle(ptr, &ipcHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface.reset(new NEO::OSInterface()); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel(std::make_unique(512)); ze_ipc_memory_flags_t flags = {}; void *ipcPtr; result = context->openIpcMemHandle(device->toHandle(), ipcHandle, flags, &ipcPtr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(ipcPtr, ptr); result = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(MemoryOpenIpcHandleTest, givenCallToOpenIpcMemHandleItIsSuccessfullyOpenedAndClosed) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); ze_ipc_mem_handle_t ipcHandle = {}; result = context->getIpcMemHandle(ptr, &ipcHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface.reset(new NEO::OSInterface()); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel(std::make_unique(512)); ze_ipc_memory_flags_t flags = {}; void *ipcPtr; result = context->openIpcMemHandle(device->toHandle(), ipcHandle, flags, &ipcPtr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(ipcPtr, nullptr); result = context->closeIpcMemHandle(ipcPtr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(MemoryExportImportTest, givenCallToDeviceAllocWithExtendedImportDescriptorAndSupportedFlagThenSuccessIsReturned) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_external_memory_export_desc_t extendedDesc = {}; extendedDesc.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_DESC; extendedDesc.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF; deviceDesc.pNext = &extendedDesc; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); ze_memory_allocation_properties_t memoryProperties = {}; ze_external_memory_export_fd_t extendedProperties = {}; extendedProperties.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_FD; extendedProperties.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF; extendedProperties.fd = std::numeric_limits::max(); memoryProperties.pNext = &extendedProperties; ze_device_handle_t deviceHandle; result = context->getMemAllocProperties(ptr, &memoryProperties, &deviceHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(memoryProperties.type, ZE_MEMORY_TYPE_DEVICE); EXPECT_EQ(deviceHandle, device->toHandle()); EXPECT_NE(extendedProperties.fd, std::numeric_limits::max()); EXPECT_EQ(extendedProperties.fd, driverHandle->mockFd); ze_device_mem_alloc_desc_t importDeviceDesc = {}; ze_external_memory_import_fd_t extendedImportDesc = {}; extendedImportDesc.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMPORT_FD; extendedImportDesc.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF; extendedImportDesc.fd = extendedProperties.fd; importDeviceDesc.pNext = &extendedImportDesc; neoDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface.reset(new NEO::OSInterface()); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel(std::make_unique(512)); void *importedPtr = nullptr; result = context->allocDeviceMem(device->toHandle(), &importDeviceDesc, size, alignment, &importedPtr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(MemoryExportImportTest, givenCallToDeviceAllocWithExtendedImportDescriptorAndWDDMDriverTypeThenFailureIsReturned) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_external_memory_export_desc_t extendedDesc = {}; extendedDesc.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_DESC; extendedDesc.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF; deviceDesc.pNext = &extendedDesc; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); ze_memory_allocation_properties_t memoryProperties = {}; ze_external_memory_export_fd_t extendedProperties = {}; extendedProperties.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_FD; extendedProperties.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF; extendedProperties.fd = std::numeric_limits::max(); memoryProperties.pNext = &extendedProperties; ze_device_handle_t deviceHandle; result = context->getMemAllocProperties(ptr, &memoryProperties, &deviceHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(memoryProperties.type, ZE_MEMORY_TYPE_DEVICE); EXPECT_EQ(deviceHandle, device->toHandle()); EXPECT_NE(extendedProperties.fd, std::numeric_limits::max()); EXPECT_EQ(extendedProperties.fd, driverHandle->mockFd); ze_device_mem_alloc_desc_t importDeviceDesc = {}; ze_external_memory_import_fd_t extendedImportDesc = {}; extendedImportDesc.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMPORT_FD; extendedImportDesc.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF; extendedImportDesc.fd = extendedProperties.fd; importDeviceDesc.pNext = &extendedImportDesc; neoDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface.reset(new NEO::OSInterface()); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel(std::make_unique(512)); void *importedPtr = nullptr; result = context->allocDeviceMem(device->toHandle(), &importDeviceDesc, size, alignment, &importedPtr); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); result = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(MemoryExportImportWSLTest, givenCallToDeviceAllocWithExtendedImportDescriptorAndNTHandleWithWDDMThenSuccessIsReturned) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; MemoryManagerMemHandleMock *fixtureMemoryManager = static_cast(currMemoryManager); fixtureMemoryManager->NTHandle = true; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_external_memory_export_desc_t extendedDesc = {}; extendedDesc.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_DESC; extendedDesc.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF; deviceDesc.pNext = &extendedDesc; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); ze_memory_allocation_properties_t memoryProperties = {}; ze_external_memory_export_fd_t extendedProperties = {}; extendedProperties.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_FD; extendedProperties.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF; extendedProperties.fd = std::numeric_limits::max(); memoryProperties.pNext = &extendedProperties; ze_device_handle_t deviceHandle; result = context->getMemAllocProperties(ptr, &memoryProperties, &deviceHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(memoryProperties.type, ZE_MEMORY_TYPE_DEVICE); EXPECT_EQ(deviceHandle, device->toHandle()); EXPECT_NE(extendedProperties.fd, std::numeric_limits::max()); EXPECT_EQ(extendedProperties.fd, driverHandle->mockFd); ze_device_mem_alloc_desc_t importDeviceDesc = {}; ze_external_memory_import_fd_t extendedImportDesc = {}; extendedImportDesc.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMPORT_FD; extendedImportDesc.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF; extendedImportDesc.fd = extendedProperties.fd; importDeviceDesc.pNext = &extendedImportDesc; neoDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface.reset(new NEO::OSInterface()); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel(std::make_unique(512)); void *importedPtr = nullptr; result = context->allocDeviceMem(device->toHandle(), &importDeviceDesc, size, alignment, &importedPtr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } } // namespace ult } // namespace L0compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/memory/test_memory_wddm/000077500000000000000000000000001422164147700322055ustar00rootroot00000000000000test_memory.cpp000066400000000000000000000171061422164147700352060ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/memory/test_memory_wddm/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/test/unit_tests/fixtures/memory_ipc_fixture.h" namespace L0 { namespace ult { using MemoryIPCTests = MemoryExportImportWinHandleTest; TEST_F(MemoryIPCTests, givenCallToGetIpcHandleWithNotKnownPointerThenInvalidArgumentIsReturned) { uint32_t value = 0; ze_ipc_mem_handle_t ipcHandle; ze_result_t result = context->getIpcMemHandle(&value, &ipcHandle); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); } TEST_F(MemoryIPCTests, givenCallToGetIpcHandleWithDeviceAllocationThenIpcHandleIsReturned) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); ze_ipc_mem_handle_t ipcHandle; result = context->getIpcMemHandle(ptr, &ipcHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(MemoryIPCTests, whenCallingOpenIpcHandleWithIpcHandleThenDeviceAllocationIsReturned) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); ze_ipc_mem_handle_t ipcHandle = {}; result = context->getIpcMemHandle(ptr, &ipcHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); ze_ipc_memory_flags_t flags = {}; void *ipcPtr; result = context->openIpcMemHandle(device->toHandle(), ipcHandle, flags, &ipcPtr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(ipcPtr, ptr); result = context->closeIpcMemHandle(ipcPtr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(MemoryIPCTests, whenCallingOpenIpcHandleWithIpcHandleAndUsingContextThenDeviceAllocationIsReturned) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); ze_ipc_mem_handle_t ipcHandle = {}; result = context->getIpcMemHandle(ptr, &ipcHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); ze_ipc_memory_flags_t flags = {}; void *ipcPtr; result = context->openIpcMemHandle(device->toHandle(), ipcHandle, flags, &ipcPtr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(ipcPtr, ptr); result = context->closeIpcMemHandle(ipcPtr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(MemoryIPCTests, givenCallingGetIpcHandleWithDeviceAllocationAndUsingContextThenIpcHandleIsReturned) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); ze_ipc_mem_handle_t ipcHandle; result = context->getIpcMemHandle(ptr, &ipcHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(MemoryIPCTests, whenCallingOpenIpcHandleWithIncorrectHandleThenInvalidArgumentIsReturned) { ze_ipc_mem_handle_t ipcHandle = {}; ze_ipc_memory_flags_t flags = {}; void *ipcPtr; ze_result_t res = context->openIpcMemHandle(device->toHandle(), ipcHandle, flags, &ipcPtr); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, res); } struct MemoryGetIpcHandleTest : public ::testing::Test { void SetUp() override { NEO::MockCompilerEnableGuard mock(true); neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(NEO::defaultHwInfo.get()); auto mockBuiltIns = new MockBuiltins(); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->builtins.reset(mockBuiltIns); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); driverHandle = std::make_unique(); driverHandle->initialize(std::move(devices)); device = driverHandle->devices[0]; context = std::make_unique(driverHandle.get()); EXPECT_NE(context, nullptr); context->getDevices().insert(std::make_pair(device->toHandle(), device)); auto neoDevice = device->getNEODevice(); context->rootDeviceIndices.insert(neoDevice->getRootDeviceIndex()); context->deviceBitfields.insert({neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield()}); } void TearDown() override { } std::unique_ptr driverHandle; NEO::MockDevice *neoDevice = nullptr; L0::Device *device = nullptr; std::unique_ptr context; }; TEST_F(MemoryGetIpcHandleTest, whenCallingOpenIpcHandleWithIpcHandleThenFdHandleIsCorrectlyRead) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); ze_ipc_mem_handle_t ipcHandle = {}; result = context->getIpcMemHandle(ptr, &ipcHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); ze_ipc_memory_flags_t flags = {}; void *ipcPtr; result = context->openIpcMemHandle(device->toHandle(), ipcHandle, flags, &ipcPtr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(ipcPtr, ptr); result = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(MemoryOpenIpcHandleTest, givenCallToOpenIpcMemHandleItIsSuccessfullyOpenedAndClosed) { size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); ze_ipc_mem_handle_t ipcHandle = {}; result = context->getIpcMemHandle(ptr, &ipcHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); ze_ipc_memory_flags_t flags = {}; void *ipcPtr; result = context->openIpcMemHandle(device->toHandle(), ipcHandle, flags, &ipcPtr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(ipcPtr, nullptr); result = context->closeIpcMemHandle(ipcPtr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } } // namespace ult } // namespace L0compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/memory_manager/000077500000000000000000000000001422164147700303155ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/memory_manager/CMakeLists.txt000066400000000000000000000004211422164147700330520ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/compression_selector_l0_tests.cpp ) add_subdirectories() compression_selector_l0_tests.cpp000066400000000000000000000060571422164147700370300ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/memory_manager/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/compression_selector.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/test_macros/test.h" namespace L0 { namespace ult { TEST(CompressionSelectorL0Tests, GivenDefaultDebugFlagWhenProvidingUsmAllocationThenExpectCompressionDisabled) { DeviceBitfield deviceBitfield{0x0}; AllocationProperties properties(0, MemoryConstants::pageSize, AllocationType::BUFFER, deviceBitfield); properties.flags.isUSMDeviceAllocation = 1u; EXPECT_FALSE(NEO::CompressionSelector::preferCompressedAllocation(properties, *defaultHwInfo)); } TEST(CompressionSelectorL0Tests, GivenDisabledDebugFlagWhenProvidingUsmAllocationThenExpectCompressionDisabled) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableUsmCompression.set(0); DeviceBitfield deviceBitfield{0x0}; AllocationProperties properties(0, MemoryConstants::pageSize, AllocationType::BUFFER, deviceBitfield); properties.flags.isUSMDeviceAllocation = 1u; EXPECT_FALSE(NEO::CompressionSelector::preferCompressedAllocation(properties, *defaultHwInfo)); } TEST(CompressionSelectorL0Tests, GivenEnabledDebugFlagWhenProvidingUsmAllocationThenExpectCompressionEnabled) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableUsmCompression.set(1); DeviceBitfield deviceBitfield{0x0}; AllocationProperties properties(0, MemoryConstants::pageSize, AllocationType::BUFFER, deviceBitfield); properties.flags.isUSMDeviceAllocation = 1u; EXPECT_TRUE(NEO::CompressionSelector::preferCompressedAllocation(properties, *defaultHwInfo)); } TEST(CompressionSelectorL0Tests, GivenEnabledDebugFlagWhenProvidingSvmGpuAllocationThenExpectCompressionEnabled) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableUsmCompression.set(1); DeviceBitfield deviceBitfield{0x0}; AllocationProperties properties(0, MemoryConstants::pageSize, AllocationType::SVM_GPU, deviceBitfield); EXPECT_TRUE(NEO::CompressionSelector::preferCompressedAllocation(properties, *defaultHwInfo)); } TEST(CompressionSelectorL0Tests, GivenEnabledDebugFlagWhenProvidingOtherAllocationThenExpectCompressionDisabled) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableUsmCompression.set(1); DeviceBitfield deviceBitfield{0x0}; AllocationProperties properties(0, MemoryConstants::pageSize, AllocationType::BUFFER_HOST_MEMORY, deviceBitfield); EXPECT_FALSE(NEO::CompressionSelector::preferCompressedAllocation(properties, *defaultHwInfo)); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/module/000077500000000000000000000000001422164147700266005ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/module/CMakeLists.txt000066400000000000000000000003521422164147700313400ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_module.cpp ) compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/module/test_module.cpp000066400000000000000000003476141422164147700316470ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/compiler_interface/compiler_options/compiler_options.h" #include "shared/source/compiler_interface/compiler_warnings/compiler_warnings.h" #include "shared/source/device_binary_format/debug_zebin.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/kernel/implicit_args.h" #include "shared/source/program/kernel_info.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_elf.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/compiler_interface/linker_mock.h" #include "shared/test/unit_test/device_binary_format/zebin_tests.h" #include "level_zero/core/source/context/context.h" #include "level_zero/core/source/kernel/kernel_imp.h" #include "level_zero/core/source/module/module_imp.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/fixtures/module_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_kernel.h" #include "level_zero/core/test/unit_tests/mocks/mock_module.h" namespace L0 { namespace ult { using ModuleTest = Test; HWTEST_F(ModuleTest, givenBinaryWithDebugDataWhenModuleCreatedFromNativeBinaryThenDebugDataIsStored) { size_t size = 0; std::unique_ptr data; auto result = module->getDebugInfo(&size, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); data = std::make_unique(size); result = module->getDebugInfo(&size, data.get()); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, data.get()); EXPECT_NE(0u, size); } HWTEST_F(ModuleTest, WhenCreatingKernelThenSuccessIsReturned) { ze_kernel_handle_t kernelHandle; ze_kernel_desc_t kernelDesc = {}; kernelDesc.pKernelName = kernelName.c_str(); ze_result_t res = module->createKernel(&kernelDesc, &kernelHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, res); Kernel::fromHandle(kernelHandle)->destroy(); } HWTEST_F(ModuleTest, givenZeroCountWhenGettingKernelNamesThenCountIsFilled) { uint32_t count = 0; auto result = module->getKernelNames(&count, nullptr); auto whiteboxModule = whitebox_cast(module.get()); EXPECT_EQ(whiteboxModule->kernelImmDatas.size(), count); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } HWTEST_F(ModuleTest, givenNonZeroCountWhenGettingKernelNamesThenNamesAreReturned) { uint32_t count = 1; const char *kernelNames = nullptr; auto result = module->getKernelNames(&count, &kernelNames); EXPECT_EQ(1u, count); EXPECT_STREQ(this->kernelName.c_str(), kernelNames); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } HWTEST_F(ModuleTest, givenUserModuleTypeWhenCreatingModuleThenCorrectTypeIsSet) { WhiteBox module(device, nullptr, ModuleType::User); EXPECT_EQ(ModuleType::User, module.type); } HWTEST_F(ModuleTest, givenBuiltinModuleTypeWhenCreatingModuleThenCorrectTypeIsSet) { WhiteBox module(device, nullptr, ModuleType::Builtin); EXPECT_EQ(ModuleType::Builtin, module.type); } HWTEST_F(ModuleTest, givenUserModuleWhenCreatedThenCorrectAllocationTypeIsUsedForIsa) { createKernel(); EXPECT_EQ(NEO::AllocationType::KERNEL_ISA, kernel->getIsaAllocation()->getAllocationType()); } HWTEST_F(ModuleTest, givenBuiltinModuleWhenCreatedThenCorrectAllocationTypeIsUsedForIsa) { createModuleFromBinary(ModuleType::Builtin); createKernel(); EXPECT_EQ(NEO::AllocationType::KERNEL_ISA_INTERNAL, kernel->getIsaAllocation()->getAllocationType()); } using ModuleTestSupport = IsWithinProducts; HWTEST2_F(ModuleTest, givenNonPatchedTokenThenSurfaceBaseAddressIsCorrectlySet, ModuleTestSupport) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; ze_kernel_handle_t kernelHandle; ze_kernel_desc_t kernelDesc = {}; kernelDesc.pKernelName = kernelName.c_str(); ze_result_t res = module->createKernel(&kernelDesc, &kernelHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto kernelImp = reinterpret_cast(L0::Kernel::fromHandle(kernelHandle)); void *devicePtr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; res = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 0u, &devicePtr); ASSERT_EQ(ZE_RESULT_SUCCESS, res); auto gpuAlloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(devicePtr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); ASSERT_NE(nullptr, gpuAlloc); uint32_t argIndex = 0u; kernelImp->setBufferSurfaceState(argIndex, devicePtr, gpuAlloc); auto argInfo = kernelImp->getImmutableData()->getDescriptor().payloadMappings.explicitArgs[argIndex].as(); auto surfaceStateAddressRaw = ptrOffset(kernelImp->getSurfaceStateHeapData(), argInfo.bindful); auto surfaceStateAddress = reinterpret_cast(const_cast(surfaceStateAddressRaw)); EXPECT_EQ(devicePtr, reinterpret_cast(surfaceStateAddress->getSurfaceBaseAddress())); EXPECT_EQ(RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT, surfaceStateAddress->getCoherencyType()); Kernel::fromHandle(kernelHandle)->destroy(); context->freeMem(devicePtr); } HWTEST_F(ModuleTest, givenStatefulBufferWhenOffsetIsPatchedThenAllocBaseAddressIsSet) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; ze_kernel_handle_t kernelHandle; ze_kernel_desc_t kernelDesc = {}; kernelDesc.pKernelName = kernelName.c_str(); ze_result_t res = module->createKernel(&kernelDesc, &kernelHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto kernelImp = reinterpret_cast(L0::Kernel::fromHandle(kernelHandle)); void *devicePtr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; res = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 0u, &devicePtr); ASSERT_EQ(ZE_RESULT_SUCCESS, res); auto gpuAlloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(devicePtr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); ASSERT_NE(nullptr, gpuAlloc); uint32_t argIndex = 0u; uint32_t offset = 0x1234; const_cast(&(kernelImp->getImmutableData()->getDescriptor()))->payloadMappings.explicitArgs[argIndex].as().bufferOffset = 0; const_cast(&(kernelImp->getImmutableData()->getDescriptor()))->payloadMappings.explicitArgs[argIndex].as().bindful = 0x80; kernelImp->setBufferSurfaceState(argIndex, ptrOffset(devicePtr, offset), gpuAlloc); auto argInfo = kernelImp->getImmutableData()->getDescriptor().payloadMappings.explicitArgs[argIndex].as(); auto surfaceStateAddressRaw = ptrOffset(kernelImp->getSurfaceStateHeapData(), argInfo.bindful); auto surfaceStateAddress = reinterpret_cast(const_cast(surfaceStateAddressRaw)); EXPECT_EQ(devicePtr, reinterpret_cast(surfaceStateAddress->getSurfaceBaseAddress())); Kernel::fromHandle(kernelHandle)->destroy(); context->freeMem(devicePtr); } HWTEST_F(ModuleTest, givenBufferWhenOffsetIsNotPatchedThenPassedPtrIsSetAsBaseAddress) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; ze_kernel_handle_t kernelHandle; ze_kernel_desc_t kernelDesc = {}; kernelDesc.pKernelName = kernelName.c_str(); ze_result_t res = module->createKernel(&kernelDesc, &kernelHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto kernelImp = reinterpret_cast(L0::Kernel::fromHandle(kernelHandle)); void *devicePtr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; res = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 0u, &devicePtr); ASSERT_EQ(ZE_RESULT_SUCCESS, res); auto gpuAlloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(devicePtr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); ASSERT_NE(nullptr, gpuAlloc); uint32_t argIndex = 0u; uint32_t offset = 0x1234; const_cast(&(kernelImp->getImmutableData()->getDescriptor()))->payloadMappings.explicitArgs[argIndex].as().bufferOffset = undefined; const_cast(&(kernelImp->getImmutableData()->getDescriptor()))->payloadMappings.explicitArgs[argIndex].as().bindful = 0x80; kernelImp->setBufferSurfaceState(argIndex, ptrOffset(devicePtr, offset), gpuAlloc); auto argInfo = kernelImp->getImmutableData()->getDescriptor().payloadMappings.explicitArgs[argIndex].as(); auto surfaceStateAddressRaw = ptrOffset(kernelImp->getSurfaceStateHeapData(), argInfo.bindful); auto surfaceStateAddress = reinterpret_cast(const_cast(surfaceStateAddressRaw)); EXPECT_EQ(ptrOffset(devicePtr, offset), reinterpret_cast(surfaceStateAddress->getSurfaceBaseAddress())); Kernel::fromHandle(kernelHandle)->destroy(); context->freeMem(devicePtr); } HWTEST_F(ModuleTest, givenBufferWhenOffsetIsNotPatchedThenSizeIsDecereasedByOffset) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; ze_kernel_handle_t kernelHandle; ze_kernel_desc_t kernelDesc = {}; kernelDesc.pKernelName = kernelName.c_str(); ze_result_t res = module->createKernel(&kernelDesc, &kernelHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto kernelImp = reinterpret_cast(L0::Kernel::fromHandle(kernelHandle)); auto allocSize = 16384u; void *devicePtr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; res = context->allocDeviceMem(device->toHandle(), &deviceDesc, allocSize, 0u, &devicePtr); ASSERT_EQ(ZE_RESULT_SUCCESS, res); auto gpuAlloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(devicePtr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); ASSERT_NE(nullptr, gpuAlloc); uint32_t argIndex = 0u; uint32_t offset = 0x1234; const_cast(&(kernelImp->getImmutableData()->getDescriptor()))->payloadMappings.explicitArgs[argIndex].as().bufferOffset = undefined; const_cast(&(kernelImp->getImmutableData()->getDescriptor()))->payloadMappings.explicitArgs[argIndex].as().bindful = 0x80; kernelImp->setBufferSurfaceState(argIndex, ptrOffset(devicePtr, offset), gpuAlloc); auto argInfo = kernelImp->getImmutableData()->getDescriptor().payloadMappings.explicitArgs[argIndex].as(); auto surfaceStateAddressRaw = ptrOffset(kernelImp->getSurfaceStateHeapData(), argInfo.bindful); auto surfaceStateAddress = reinterpret_cast(const_cast(surfaceStateAddressRaw)); SURFACE_STATE_BUFFER_LENGTH length = {0}; length.Length = static_cast((gpuAlloc->getUnderlyingBufferSize() - offset) - 1); EXPECT_EQ(surfaceStateAddress->getWidth(), static_cast(length.SurfaceState.Width + 1)); EXPECT_EQ(surfaceStateAddress->getHeight(), static_cast(length.SurfaceState.Height + 1)); EXPECT_EQ(surfaceStateAddress->getDepth(), static_cast(length.SurfaceState.Depth + 1)); Kernel::fromHandle(kernelHandle)->destroy(); context->freeMem(devicePtr); } HWTEST_F(ModuleTest, givenUnalignedHostBufferWhenSurfaceStateProgrammedThenUnalignedSizeIsAdded) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; ze_kernel_handle_t kernelHandle; ze_kernel_desc_t kernelDesc = {}; kernelDesc.pKernelName = kernelName.c_str(); ze_result_t res = module->createKernel(&kernelDesc, &kernelHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto kernelImp = reinterpret_cast(L0::Kernel::fromHandle(kernelHandle)); void *address = reinterpret_cast(0x23000); MockGraphicsAllocation mockGa; mockGa.gpuAddress = 0x23000; mockGa.size = 0xc; auto alignment = NEO::EncodeSurfaceState::getSurfaceBaseAddressAlignment(); auto allocationOffset = alignment - 1; mockGa.allocationOffset = allocationOffset; uint32_t argIndex = 0u; const_cast(&(kernelImp->getImmutableData()->getDescriptor()))->payloadMappings.explicitArgs[argIndex].as().bufferOffset = undefined; const_cast(&(kernelImp->getImmutableData()->getDescriptor()))->payloadMappings.explicitArgs[argIndex].as().bindful = 0x80; kernelImp->setBufferSurfaceState(argIndex, address, &mockGa); auto argInfo = kernelImp->getImmutableData()->getDescriptor().payloadMappings.explicitArgs[argIndex].as(); auto surfaceStateAddressRaw = ptrOffset(kernelImp->getSurfaceStateHeapData(), argInfo.bindful); auto surfaceStateAddress = reinterpret_cast(const_cast(surfaceStateAddressRaw)); SURFACE_STATE_BUFFER_LENGTH length = {0}; length.Length = alignUp(static_cast((mockGa.getUnderlyingBufferSize() + allocationOffset)), alignment) - 1; EXPECT_EQ(surfaceStateAddress->getWidth(), static_cast(length.SurfaceState.Width + 1)); EXPECT_EQ(surfaceStateAddress->getHeight(), static_cast(length.SurfaceState.Height + 1)); EXPECT_EQ(surfaceStateAddress->getDepth(), static_cast(length.SurfaceState.Depth + 1)); Kernel::fromHandle(kernelHandle)->destroy(); } using ModuleUncachedBufferTest = Test; struct KernelImpUncachedTest : public KernelImp { using KernelImp::kernelRequiresUncachedMocsCount; }; HWTEST2_F(ModuleUncachedBufferTest, givenKernelWithNonUncachedArgumentAndPreviouslyNotSetUncachedThenUncachedMocsNotSet, ModuleTestSupport) { ze_kernel_handle_t kernelHandle; ze_kernel_desc_t kernelDesc = {}; kernelDesc.pKernelName = kernelName.c_str(); ze_result_t res = module->createKernel(&kernelDesc, &kernelHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto kernelImp = reinterpret_cast(L0::Kernel::fromHandle(kernelHandle)); void *devicePtr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; res = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 0u, &devicePtr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto gpuAlloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(devicePtr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); EXPECT_NE(nullptr, gpuAlloc); uint32_t argIndex = 0u; kernelImp->setArgBufferWithAlloc(argIndex, reinterpret_cast(devicePtr), gpuAlloc); EXPECT_FALSE(kernelImp->getKernelRequiresUncachedMocs()); Kernel::fromHandle(kernelHandle)->destroy(); context->freeMem(devicePtr); } HWTEST2_F(ModuleUncachedBufferTest, givenKernelWithNonUncachedArgumentAndPreviouslySetUncachedArgumentThenUncachedMocsNotSet, ModuleTestSupport) { ze_kernel_handle_t kernelHandle; ze_kernel_desc_t kernelDesc = {}; kernelDesc.pKernelName = kernelName.c_str(); ze_result_t res = module->createKernel(&kernelDesc, &kernelHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto kernelImp = reinterpret_cast(L0::Kernel::fromHandle(kernelHandle)); void *devicePtr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; res = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 0u, &devicePtr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto gpuAlloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(devicePtr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); EXPECT_NE(nullptr, gpuAlloc); uint32_t argIndex = 0u; kernelImp->setKernelArgUncached(argIndex, true); kernelImp->kernelRequiresUncachedMocsCount++; kernelImp->setArgBufferWithAlloc(argIndex, reinterpret_cast(devicePtr), gpuAlloc); EXPECT_FALSE(kernelImp->getKernelRequiresUncachedMocs()); Kernel::fromHandle(kernelHandle)->destroy(); context->freeMem(devicePtr); } HWTEST2_F(ModuleUncachedBufferTest, givenKernelWithUncachedArgumentAndPreviouslyNotSetUncachedArgumentThenUncachedMocsIsSet, ModuleTestSupport) { ze_kernel_handle_t kernelHandle; ze_kernel_desc_t kernelDesc = {}; kernelDesc.pKernelName = kernelName.c_str(); ze_result_t res = module->createKernel(&kernelDesc, &kernelHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto kernelImp = reinterpret_cast(L0::Kernel::fromHandle(kernelHandle)); void *devicePtr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; deviceDesc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED; res = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 0u, &devicePtr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto gpuAlloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(devicePtr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); EXPECT_NE(nullptr, gpuAlloc); uint32_t argIndex = 0u; kernelImp->setArgBufferWithAlloc(argIndex, reinterpret_cast(devicePtr), gpuAlloc); EXPECT_TRUE(kernelImp->getKernelRequiresUncachedMocs()); Kernel::fromHandle(kernelHandle)->destroy(); context->freeMem(devicePtr); } HWTEST2_F(ModuleUncachedBufferTest, givenKernelWithUncachedArgumentAndPreviouslySetUncachedArgumentThenUncachedMocsIsSet, ModuleTestSupport) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; ze_kernel_handle_t kernelHandle; ze_kernel_desc_t kernelDesc = {}; kernelDesc.pKernelName = kernelName.c_str(); ze_result_t res = module->createKernel(&kernelDesc, &kernelHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto kernelImp = reinterpret_cast(L0::Kernel::fromHandle(kernelHandle)); void *devicePtr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; deviceDesc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED; res = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 0u, &devicePtr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto gpuAlloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(devicePtr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); EXPECT_NE(nullptr, gpuAlloc); uint32_t argIndex = 0u; kernelImp->setKernelArgUncached(argIndex, true); kernelImp->kernelRequiresUncachedMocsCount++; kernelImp->setArgBufferWithAlloc(argIndex, reinterpret_cast(devicePtr), gpuAlloc); EXPECT_TRUE(kernelImp->getKernelRequiresUncachedMocs()); auto argInfo = kernelImp->getImmutableData()->getDescriptor().payloadMappings.explicitArgs[argIndex].as(); auto surfaceStateAddressRaw = ptrOffset(kernelImp->getSurfaceStateHeapData(), argInfo.bindful); auto surfaceStateAddress = reinterpret_cast(const_cast(surfaceStateAddressRaw)); EXPECT_EQ(devicePtr, reinterpret_cast(surfaceStateAddress->getSurfaceBaseAddress())); auto gmmHelper = device->getNEODevice()->getGmmHelper(); uint32_t expectedMocs = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED); EXPECT_EQ(expectedMocs, surfaceStateAddress->getMemoryObjectControlStateReserved()); Kernel::fromHandle(kernelHandle)->destroy(); context->freeMem(devicePtr); } HWTEST_F(ModuleTest, GivenIncorrectNameWhenCreatingKernelThenResultErrorInvalidArgumentErrorIsReturned) { ze_kernel_handle_t kernelHandle; ze_kernel_desc_t kernelDesc = {}; kernelDesc.pKernelName = "nonexistent_function"; ze_result_t res = module->createKernel(&kernelDesc, &kernelHandle); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_KERNEL_NAME, res); } template struct ModuleSpecConstantsFixture : public DeviceFixture { void SetUp() { DeviceFixture::SetUp(); mockCompiler = new MockCompilerInterfaceWithSpecConstants(moduleNumSpecConstants); auto rootDeviceEnvironment = neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0].get(); rootDeviceEnvironment->compilerInterface.reset(mockCompiler); mockTranslationUnit = new MockModuleTranslationUnit(device); } void TearDown() { DeviceFixture::TearDown(); } void runTest() { std::string testFile; retrieveBinaryKernelFilenameApiSpecific(testFile, binaryFilename + "_", ".spv"); size_t size = 0; auto src = loadDataFromFile(testFile.c_str(), size); ASSERT_NE(0u, size); ASSERT_NE(nullptr, src); ze_module_desc_t moduleDesc = {}; moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; moduleDesc.pInputModule = reinterpret_cast(src.get()); moduleDesc.inputSize = size; specConstants.numConstants = mockCompiler->moduleNumSpecConstants; for (uint32_t i = mockCompiler->moduleNumSpecConstants / 2; i > 0; i--) { specConstantsPointerValues.push_back(&mockCompiler->moduleSpecConstantsValuesT1[i - 1]); specConstantsPointerValues.push_back(&mockCompiler->moduleSpecConstantsValuesT2[i - 1]); } for (uint32_t i = mockCompiler->moduleNumSpecConstants; i > 0; i--) { specConstantsPointerIds.push_back(mockCompiler->moduleSpecConstantsIds[i - 1]); } specConstants.pConstantIds = specConstantsPointerIds.data(); specConstants.pConstantValues = specConstantsPointerValues.data(); moduleDesc.pConstants = &specConstants; auto module = new Module(device, nullptr, ModuleType::User); module->translationUnit.reset(mockTranslationUnit); bool success = module->initialize(&moduleDesc, neoDevice); for (uint32_t i = 0; i < mockCompiler->moduleNumSpecConstants / 2; i++) { EXPECT_EQ(static_cast(module->translationUnit->specConstantsValues[mockCompiler->moduleSpecConstantsIds[2 * i]]), static_cast(mockCompiler->moduleSpecConstantsValuesT2[i])); EXPECT_EQ(static_cast(module->translationUnit->specConstantsValues[mockCompiler->moduleSpecConstantsIds[2 * i + 1]]), static_cast(mockCompiler->moduleSpecConstantsValuesT1[i])); } EXPECT_TRUE(success); module->destroy(); } void runTestStatic() { std::string testFile; retrieveBinaryKernelFilenameApiSpecific(testFile, binaryFilename + "_", ".spv"); size_t sizeModule1, sizeModule2 = 0; auto srcModule1 = loadDataFromFile(testFile.c_str(), sizeModule1); auto srcModule2 = loadDataFromFile(testFile.c_str(), sizeModule2); ASSERT_NE(0u, sizeModule1); ASSERT_NE(0u, sizeModule2); ASSERT_NE(nullptr, srcModule1); ASSERT_NE(nullptr, srcModule2); ze_module_desc_t combinedModuleDesc = {ZE_STRUCTURE_TYPE_MODULE_DESC}; ze_module_program_exp_desc_t staticLinkModuleDesc = {ZE_STRUCTURE_TYPE_MODULE_PROGRAM_EXP_DESC}; std::vector inputSpirVs; std::vector inputSizes; std::vector specConstantsArray; combinedModuleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; combinedModuleDesc.pNext = &staticLinkModuleDesc; specConstants.numConstants = mockCompiler->moduleNumSpecConstants; for (uint32_t i = mockCompiler->moduleNumSpecConstants / 2; i > 0; i--) { specConstantsPointerValues.push_back(&mockCompiler->moduleSpecConstantsValuesT1[i - 1]); specConstantsPointerValues.push_back(&mockCompiler->moduleSpecConstantsValuesT2[i - 1]); } for (uint32_t i = mockCompiler->moduleNumSpecConstants; i > 0; i--) { specConstantsPointerIds.push_back(mockCompiler->moduleSpecConstantsIds[i - 1]); } specConstants.pConstantIds = specConstantsPointerIds.data(); specConstants.pConstantValues = specConstantsPointerValues.data(); specConstantsArray.push_back(&specConstants); specConstantsArray.push_back(&specConstants); inputSizes.push_back(sizeModule1); inputSpirVs.push_back(reinterpret_cast(srcModule1.get())); inputSizes.push_back(sizeModule2); inputSpirVs.push_back(reinterpret_cast(srcModule2.get())); staticLinkModuleDesc.count = 2; staticLinkModuleDesc.inputSizes = inputSizes.data(); staticLinkModuleDesc.pInputModules = inputSpirVs.data(); staticLinkModuleDesc.pConstants = const_cast(specConstantsArray.data()); auto module = new Module(device, nullptr, ModuleType::User); module->translationUnit.reset(mockTranslationUnit); bool success = module->initialize(&combinedModuleDesc, neoDevice); for (uint32_t i = 0; i < mockCompiler->moduleNumSpecConstants / 2; i++) { EXPECT_EQ(static_cast(module->translationUnit->specConstantsValues[mockCompiler->moduleSpecConstantsIds[2 * i]]), static_cast(mockCompiler->moduleSpecConstantsValuesT2[i])); EXPECT_EQ(static_cast(module->translationUnit->specConstantsValues[mockCompiler->moduleSpecConstantsIds[2 * i + 1]]), static_cast(mockCompiler->moduleSpecConstantsValuesT1[i])); } EXPECT_TRUE(success); module->destroy(); } const uint32_t moduleNumSpecConstants = 3 * 2; ze_module_constants_t specConstants; std::vector specConstantsPointerValues; std::vector specConstantsPointerIds; const std::string binaryFilename = "test_kernel"; const std::string kernelName = "test"; MockCompilerInterfaceWithSpecConstants *mockCompiler; MockModuleTranslationUnit *mockTranslationUnit; }; template using ModuleSpecConstantsTests = Test>; using ModuleSpecConstantsLongTests = ModuleSpecConstantsTests; TEST_F(ModuleSpecConstantsLongTests, givenSpecializationConstantsSetWithLongSizeInDescriptorThenModuleCorrectlyPassesThemToTheCompiler) { runTest(); } using ModuleSpecConstantsCharTests = ModuleSpecConstantsTests; TEST_F(ModuleSpecConstantsCharTests, givenSpecializationConstantsSetWithCharSizeInDescriptorThenModuleCorrectlyPassesThemToTheCompiler) { runTest(); } TEST_F(ModuleSpecConstantsLongTests, givenSpecializationConstantsSetWhenCompilerReturnsErrorThenModuleInitFails) { class FailingMockCompilerInterfaceWithSpecConstants : public MockCompilerInterfaceWithSpecConstants { public: FailingMockCompilerInterfaceWithSpecConstants(uint32_t moduleNumSpecConstants) : MockCompilerInterfaceWithSpecConstants(moduleNumSpecConstants) {} NEO::TranslationOutput::ErrorCode getSpecConstantsInfo(const NEO::Device &device, ArrayRef srcSpirV, NEO::SpecConstantInfo &output) override { return NEO::TranslationOutput::ErrorCode::CompilerNotAvailable; } }; mockCompiler = new FailingMockCompilerInterfaceWithSpecConstants(moduleNumSpecConstants); auto rootDeviceEnvironment = neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0].get(); rootDeviceEnvironment->compilerInterface.reset(mockCompiler); std::string testFile; retrieveBinaryKernelFilenameApiSpecific(testFile, binaryFilename + "_", ".spv"); size_t size = 0; auto src = loadDataFromFile(testFile.c_str(), size); ASSERT_NE(0u, size); ASSERT_NE(nullptr, src); ze_module_desc_t moduleDesc = {}; moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; moduleDesc.pInputModule = reinterpret_cast(src.get()); moduleDesc.inputSize = size; specConstants.numConstants = mockCompiler->moduleNumSpecConstants; for (uint32_t i = 0; i < mockCompiler->moduleNumSpecConstants / 2; i++) { specConstantsPointerValues.push_back(&mockCompiler->moduleSpecConstantsValuesT2[i]); specConstantsPointerValues.push_back(&mockCompiler->moduleSpecConstantsValuesT1[i]); } specConstants.pConstantIds = mockCompiler->moduleSpecConstantsIds.data(); specConstants.pConstantValues = specConstantsPointerValues.data(); moduleDesc.pConstants = &specConstants; auto module = new Module(device, nullptr, ModuleType::User); module->translationUnit.reset(mockTranslationUnit); bool success = module->initialize(&moduleDesc, neoDevice); EXPECT_FALSE(success); module->destroy(); } TEST_F(ModuleSpecConstantsLongTests, givenSpecializationConstantsSetWhenUserPassTooMuchConstsIdsThenModuleInitFails) { std::string testFile; retrieveBinaryKernelFilenameApiSpecific(testFile, binaryFilename + "_", ".spv"); size_t size = 0; auto src = loadDataFromFile(testFile.c_str(), size); ASSERT_NE(0u, size); ASSERT_NE(nullptr, src); ze_module_desc_t moduleDesc = {}; moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; moduleDesc.pInputModule = reinterpret_cast(src.get()); moduleDesc.inputSize = size; specConstants.numConstants = mockCompiler->moduleNumSpecConstants; for (uint32_t i = mockCompiler->moduleNumSpecConstants / 2; i > 0; i--) { specConstantsPointerValues.push_back(&mockCompiler->moduleSpecConstantsValuesT1[i - 1]); specConstantsPointerValues.push_back(&mockCompiler->moduleSpecConstantsValuesT2[i - 1]); } for (uint32_t i = mockCompiler->moduleNumSpecConstants; i > 0; i--) { specConstantsPointerIds.push_back(mockCompiler->moduleSpecConstantsIds[i - 1]); } specConstantsPointerIds.push_back(0x1000); specConstants.numConstants += 1; specConstants.pConstantIds = specConstantsPointerIds.data(); specConstants.pConstantValues = specConstantsPointerValues.data(); moduleDesc.pConstants = &specConstants; auto module = new Module(device, nullptr, ModuleType::User); module->translationUnit.reset(mockTranslationUnit); bool success = module->initialize(&moduleDesc, neoDevice); EXPECT_FALSE(success); module->destroy(); } using ModuleSpecConstantsLongTests = ModuleSpecConstantsTests; TEST_F(ModuleSpecConstantsLongTests, givenSpecializationConstantsSetWithLongSizeInExpDescriptorThenStaticLinkedModuleCorrectlyPassesThemToTheCompiler) { runTestStatic(); } using ModuleSpecConstantsCharTests = ModuleSpecConstantsTests; TEST_F(ModuleSpecConstantsCharTests, givenSpecializationConstantsSetWithCharSizeInExpDescriptorThenStaticLinkedModuleCorrectlyPassesThemToTheCompiler) { runTestStatic(); } TEST_F(ModuleSpecConstantsLongTests, givenSpecializationConstantsSetWhenCompilerReturnsErrorFromStaticLinkThenModuleInitFails) { class FailingMockCompilerInterfaceWithSpecConstants : public MockCompilerInterfaceWithSpecConstants { public: FailingMockCompilerInterfaceWithSpecConstants(uint32_t moduleNumSpecConstants) : MockCompilerInterfaceWithSpecConstants(moduleNumSpecConstants) {} NEO::TranslationOutput::ErrorCode getSpecConstantsInfo(const NEO::Device &device, ArrayRef srcSpirV, NEO::SpecConstantInfo &output) override { return NEO::TranslationOutput::ErrorCode::CompilerNotAvailable; } }; mockCompiler = new FailingMockCompilerInterfaceWithSpecConstants(moduleNumSpecConstants); auto rootDeviceEnvironment = neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0].get(); rootDeviceEnvironment->compilerInterface.reset(mockCompiler); std::string testFile; retrieveBinaryKernelFilenameApiSpecific(testFile, binaryFilename + "_", ".spv"); size_t sizeModule1, sizeModule2 = 0; auto srcModule1 = loadDataFromFile(testFile.c_str(), sizeModule1); auto srcModule2 = loadDataFromFile(testFile.c_str(), sizeModule2); ASSERT_NE(0u, sizeModule1); ASSERT_NE(0u, sizeModule2); ASSERT_NE(nullptr, srcModule1); ASSERT_NE(nullptr, srcModule2); ze_module_desc_t combinedModuleDesc = {ZE_STRUCTURE_TYPE_MODULE_DESC}; ze_module_program_exp_desc_t staticLinkModuleDesc = {ZE_STRUCTURE_TYPE_MODULE_PROGRAM_EXP_DESC}; std::vector inputSpirVs; std::vector inputSizes; std::vector specConstantsArray; combinedModuleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; combinedModuleDesc.pNext = &staticLinkModuleDesc; specConstants.numConstants = mockCompiler->moduleNumSpecConstants; for (uint32_t i = mockCompiler->moduleNumSpecConstants / 2; i > 0; i--) { specConstantsPointerValues.push_back(&mockCompiler->moduleSpecConstantsValuesT1[i - 1]); specConstantsPointerValues.push_back(&mockCompiler->moduleSpecConstantsValuesT2[i - 1]); } for (uint32_t i = mockCompiler->moduleNumSpecConstants; i > 0; i--) { specConstantsPointerIds.push_back(mockCompiler->moduleSpecConstantsIds[i - 1]); } specConstants.pConstantIds = specConstantsPointerIds.data(); specConstants.pConstantValues = specConstantsPointerValues.data(); specConstantsArray.push_back(&specConstants); specConstantsArray.push_back(&specConstants); inputSizes.push_back(sizeModule1); inputSpirVs.push_back(reinterpret_cast(srcModule1.get())); inputSizes.push_back(sizeModule2); inputSpirVs.push_back(reinterpret_cast(srcModule2.get())); staticLinkModuleDesc.count = 2; staticLinkModuleDesc.inputSizes = inputSizes.data(); staticLinkModuleDesc.pInputModules = inputSpirVs.data(); staticLinkModuleDesc.pConstants = const_cast(specConstantsArray.data()); auto module = new Module(device, nullptr, ModuleType::User); module->translationUnit.reset(mockTranslationUnit); bool success = module->initialize(&combinedModuleDesc, neoDevice); EXPECT_FALSE(success); module->destroy(); } struct ModuleStaticLinkFixture : public DeviceFixture { void SetUp() { DeviceFixture::SetUp(); } void TearDown() { DeviceFixture::TearDown(); } void loadModules(bool multiple) { std::string testFile; retrieveBinaryKernelFilenameApiSpecific(testFile, binaryFilename + "_", ".spv"); srcModule1 = loadDataFromFile(testFile.c_str(), sizeModule1); if (multiple) { srcModule2 = loadDataFromFile(testFile.c_str(), sizeModule2); } ASSERT_NE(0u, sizeModule1); ASSERT_NE(nullptr, srcModule1); if (multiple) { ASSERT_NE(0u, sizeModule2); ASSERT_NE(nullptr, srcModule2); } } void setupExpProgramDesc(ze_module_format_t format, bool multiple) { combinedModuleDesc.format = format; combinedModuleDesc.pNext = &staticLinkModuleDesc; inputSizes.push_back(sizeModule1); inputSpirVs.push_back(reinterpret_cast(srcModule1.get())); staticLinkModuleDesc.count = 1; if (multiple) { inputSizes.push_back(sizeModule2); inputSpirVs.push_back(reinterpret_cast(srcModule2.get())); staticLinkModuleDesc.count = 2; } staticLinkModuleDesc.inputSizes = inputSizes.data(); staticLinkModuleDesc.pInputModules = inputSpirVs.data(); } void runLinkFailureTest() { MockCompilerInterfaceLinkFailure *mockCompiler; mockCompiler = new MockCompilerInterfaceLinkFailure(); auto rootDeviceEnvironment = neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0].get(); rootDeviceEnvironment->compilerInterface.reset(mockCompiler); mockTranslationUnit = new MockModuleTranslationUnit(device); loadModules(testMultiple); setupExpProgramDesc(ZE_MODULE_FORMAT_IL_SPIRV, testMultiple); auto module = new Module(device, nullptr, ModuleType::User); module->translationUnit.reset(mockTranslationUnit); bool success = module->initialize(&combinedModuleDesc, neoDevice); EXPECT_FALSE(success); module->destroy(); } void runSpirvFailureTest() { MockCompilerInterface *mockCompiler; mockCompiler = new MockCompilerInterface(); auto rootDeviceEnvironment = neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0].get(); rootDeviceEnvironment->compilerInterface.reset(mockCompiler); mockTranslationUnit = new MockModuleTranslationUnit(device); loadModules(testMultiple); setupExpProgramDesc(ZE_MODULE_FORMAT_NATIVE, testMultiple); auto module = new Module(device, nullptr, ModuleType::User); module->translationUnit.reset(mockTranslationUnit); bool success = module->initialize(&combinedModuleDesc, neoDevice); EXPECT_FALSE(success); module->destroy(); } void runExpDescFailureTest() { MockCompilerInterface *mockCompiler; mockCompiler = new MockCompilerInterface(); auto rootDeviceEnvironment = neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0].get(); rootDeviceEnvironment->compilerInterface.reset(mockCompiler); mockTranslationUnit = new MockModuleTranslationUnit(device); ze_module_desc_t invalidExpDesc = {ZE_STRUCTURE_TYPE_MODULE_DESC}; combinedModuleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; combinedModuleDesc.pNext = &invalidExpDesc; auto module = new Module(device, nullptr, ModuleType::User); module->translationUnit.reset(mockTranslationUnit); bool success = module->initialize(&combinedModuleDesc, neoDevice); EXPECT_FALSE(success); module->destroy(); } void runSprivLinkBuildFlags() { MockCompilerInterface *mockCompiler; mockCompiler = new MockCompilerInterface(); auto rootDeviceEnvironment = neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0].get(); rootDeviceEnvironment->compilerInterface.reset(mockCompiler); mockTranslationUnit = new MockModuleTranslationUnit(device); loadModules(testMultiple); setupExpProgramDesc(ZE_MODULE_FORMAT_IL_SPIRV, testMultiple); std::vector buildFlags; std::string module1BuildFlags("-ze-opt-disable"); std::string module2BuildFlags("-ze-opt-greater-than-4GB-buffer-required"); buildFlags.push_back(const_cast(module1BuildFlags.c_str())); buildFlags.push_back(const_cast(module2BuildFlags.c_str())); staticLinkModuleDesc.pBuildFlags = const_cast(buildFlags.data()); auto module = new Module(device, nullptr, ModuleType::User); module->translationUnit.reset(mockTranslationUnit); bool success = module->initialize(&combinedModuleDesc, neoDevice); EXPECT_TRUE(success); module->destroy(); } void runSprivLinkBuildWithOneModule() { MockCompilerInterface *mockCompiler; mockCompiler = new MockCompilerInterface(); auto rootDeviceEnvironment = neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0].get(); rootDeviceEnvironment->compilerInterface.reset(mockCompiler); mockTranslationUnit = new MockModuleTranslationUnit(device); loadModules(testSingle); setupExpProgramDesc(ZE_MODULE_FORMAT_IL_SPIRV, testSingle); std::vector buildFlags; std::string module1BuildFlags("-ze-opt-disable"); buildFlags.push_back(const_cast(module1BuildFlags.c_str())); staticLinkModuleDesc.pBuildFlags = const_cast(buildFlags.data()); auto module = new Module(device, nullptr, ModuleType::User); module->translationUnit.reset(mockTranslationUnit); bool success = module->initialize(&combinedModuleDesc, neoDevice); EXPECT_TRUE(success); module->destroy(); } const std::string binaryFilename = "test_kernel"; const std::string kernelName = "test"; MockModuleTranslationUnit *mockTranslationUnit; std::unique_ptr srcModule1; std::unique_ptr srcModule2; size_t sizeModule1, sizeModule2 = 0; std::vector inputSpirVs; std::vector inputSizes; ze_module_desc_t combinedModuleDesc = {ZE_STRUCTURE_TYPE_MODULE_DESC}; ze_module_program_exp_desc_t staticLinkModuleDesc = {ZE_STRUCTURE_TYPE_MODULE_PROGRAM_EXP_DESC}; bool testMultiple = true; bool testSingle = false; }; using ModuleStaticLinkTests = Test; TEST_F(ModuleStaticLinkTests, givenMultipleModulesProvidedForSpirVStaticLinkAndCompilerFailsThenFailureIsReturned) { runLinkFailureTest(); } TEST_F(ModuleStaticLinkTests, givenMultipleModulesProvidedForSpirVStaticLinkAndFormatIsNotSpirvThenFailureisReturned) { runSpirvFailureTest(); } TEST_F(ModuleStaticLinkTests, givenInvalidExpDescForModuleCreateThenFailureisReturned) { runExpDescFailureTest(); } TEST_F(ModuleStaticLinkTests, givenMultipleModulesProvidedForSpirVStaticLinkAndBuildFlagsRequestedThenSuccessisReturned) { runSprivLinkBuildFlags(); } TEST_F(ModuleStaticLinkTests, givenSingleModuleProvidedForSpirVStaticLinkAndBuildFlagsRequestedThenSuccessisReturned) { runSprivLinkBuildWithOneModule(); } using ModuleLinkingTest = Test; HWTEST_F(ModuleLinkingTest, whenExternFunctionsAllocationIsPresentThenItsBeingAddedToResidencyContainer) { Mock module(device, nullptr); MockGraphicsAllocation alloc; module.exportedFunctionsSurface = &alloc; uint8_t data{}; KernelInfo kernelInfo{}; kernelInfo.heapInfo.pKernelHeap = &data; kernelInfo.heapInfo.KernelHeapSize = sizeof(data); std::unique_ptr> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)}; kernelImmData->initialize(&kernelInfo, device, 0, nullptr, nullptr, false); module.kernelImmDatas.push_back(std::move(kernelImmData)); module.translationUnit->programInfo.linkerInput.reset(new NEO::LinkerInput); module.linkBinary(); ASSERT_EQ(1U, module.kernelImmDatas[0]->getResidencyContainer().size()); EXPECT_EQ(&alloc, module.kernelImmDatas[0]->getResidencyContainer()[0]); } HWTEST_F(ModuleLinkingTest, givenFailureDuringLinkingWhenCreatingModuleThenModuleInitialiationFails) { auto mockCompiler = new MockCompilerInterface(); auto rootDeviceEnvironment = neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0].get(); rootDeviceEnvironment->compilerInterface.reset(mockCompiler); auto mockTranslationUnit = new MockModuleTranslationUnit(device); auto linkerInput = std::make_unique<::WhiteBox>(); linkerInput->valid = false; mockTranslationUnit->programInfo.linkerInput = std::move(linkerInput); uint8_t spirvData{}; ze_module_desc_t moduleDesc = {}; moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; moduleDesc.pInputModule = &spirvData; moduleDesc.inputSize = sizeof(spirvData); Module module(device, nullptr, ModuleType::User); module.translationUnit.reset(mockTranslationUnit); bool success = module.initialize(&moduleDesc, neoDevice); EXPECT_FALSE(success); } HWTEST_F(ModuleLinkingTest, givenRemainingUnresolvedSymbolsDuringLinkingWhenCreatingModuleThenModuleIsNotLinkedFully) { auto mockCompiler = new MockCompilerInterface(); auto rootDeviceEnvironment = neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0].get(); rootDeviceEnvironment->compilerInterface.reset(mockCompiler); auto mockTranslationUnit = new MockModuleTranslationUnit(device); auto linkerInput = std::make_unique<::WhiteBox>(); NEO::LinkerInput::RelocationInfo relocation; relocation.symbolName = "unresolved"; linkerInput->dataRelocations.push_back(relocation); linkerInput->traits.requiresPatchingOfGlobalVariablesBuffer = true; mockTranslationUnit->programInfo.linkerInput = std::move(linkerInput); uint8_t spirvData{}; ze_module_desc_t moduleDesc = {}; moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; moduleDesc.pInputModule = &spirvData; moduleDesc.inputSize = sizeof(spirvData); Module module(device, nullptr, ModuleType::User); module.translationUnit.reset(mockTranslationUnit); bool success = module.initialize(&moduleDesc, neoDevice); EXPECT_TRUE(success); EXPECT_FALSE(module.isFullyLinked); } HWTEST_F(ModuleLinkingTest, givenNotFullyLinkedModuleWhenCreatingKernelThenErrorIsReturned) { Module module(device, nullptr, ModuleType::User); module.isFullyLinked = false; auto retVal = module.createKernel(nullptr, nullptr); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_MODULE_UNLINKED, retVal); } using ModulePropertyTest = Test; TEST_F(ModulePropertyTest, whenZeModuleGetPropertiesIsCalledThenGetPropertiesIsCalled) { Mock module(device, nullptr); ze_module_properties_t moduleProperties; moduleProperties.stype = ZE_STRUCTURE_TYPE_MODULE_PROPERTIES; moduleProperties.pNext = nullptr; // returning error code that is unlikely to be returned by the function module.getPropertiesResult = ZE_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT; ze_result_t res = zeModuleGetProperties(module.toHandle(), &moduleProperties); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT, res); } TEST_F(ModulePropertyTest, givenCallToGetPropertiesWithoutUnresolvedSymbolsThenFlagIsNotSet) { ze_module_properties_t moduleProperties; ze_module_property_flags_t expectedFlags = 0; ze_result_t res = module->getProperties(&moduleProperties); moduleProperties.stype = ZE_STRUCTURE_TYPE_MODULE_PROPERTIES; moduleProperties.pNext = nullptr; EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_EQ(expectedFlags, moduleProperties.flags); } TEST_F(ModulePropertyTest, givenCallToGetPropertiesWithUnresolvedSymbolsThenFlagIsSet) { NEO::Linker::RelocationInfo unresolvedRelocation; unresolvedRelocation.symbolName = "unresolved"; whitebox_cast(module.get())->unresolvedExternalsInfo.push_back({unresolvedRelocation}); ze_module_property_flags_t expectedFlags = 0; expectedFlags |= ZE_MODULE_PROPERTY_FLAG_IMPORTS; ze_module_properties_t moduleProperties; moduleProperties.stype = ZE_STRUCTURE_TYPE_MODULE_PROPERTIES; moduleProperties.pNext = nullptr; ze_result_t res = module->getProperties(&moduleProperties); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_EQ(expectedFlags, moduleProperties.flags); } struct ModuleDynamicLinkTests : public Test { void SetUp() override { Test::SetUp(); module0 = std::make_unique(device, nullptr, ModuleType::User); module1 = std::make_unique(device, nullptr, ModuleType::User); module2 = std::make_unique(device, nullptr, ModuleType::User); } std::unique_ptr module0; std::unique_ptr module1; std::unique_ptr module2; }; TEST_F(ModuleDynamicLinkTests, givenCallToDynamicLinkOnModulesWithoutUnresolvedSymbolsThenSuccessIsReturned) { std::vector hModules = {module0->toHandle(), module1->toHandle()}; ze_result_t res = module0->performDynamicLink(2, hModules.data(), nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); } TEST_F(ModuleDynamicLinkTests, givenModuleWithUnresolvedSymbolsNotPresentInOtherModulesWhenDynamicLinkThenLinkFailureIsReturned) { NEO::Linker::RelocationInfo unresolvedRelocation; unresolvedRelocation.symbolName = "unresolved"; module0->unresolvedExternalsInfo.push_back({unresolvedRelocation}); std::vector hModules = {module0->toHandle(), module1->toHandle()}; ze_result_t res = module0->performDynamicLink(2, hModules.data(), nullptr); EXPECT_EQ(ZE_RESULT_ERROR_MODULE_LINK_FAILURE, res); } TEST_F(ModuleDynamicLinkTests, whenModuleIsAlreadyLinkedThenThereIsNoSymbolsVerification) { NEO::Linker::RelocationInfo unresolvedRelocation; unresolvedRelocation.symbolName = "unresolved"; module0->unresolvedExternalsInfo.push_back({unresolvedRelocation}); module0->isFullyLinked = true; std::vector hModules = {module0->toHandle(), module1->toHandle()}; ze_result_t res = module0->performDynamicLink(2, hModules.data(), nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); } TEST_F(ModuleDynamicLinkTests, givenModuleWithUnresolvedSymbolWhenTheOtherModuleDefinesTheSymbolThenTheSegmentIsPatched) { uint64_t gpuAddress = 0x12345; uint32_t offset = 0x20; NEO::Linker::RelocationInfo unresolvedRelocation; unresolvedRelocation.symbolName = "unresolved"; unresolvedRelocation.offset = offset; unresolvedRelocation.type = NEO::Linker::RelocationInfo::Type::Address; NEO::Linker::UnresolvedExternal unresolvedExternal; unresolvedExternal.unresolvedRelocation = unresolvedRelocation; NEO::SymbolInfo symbolInfo{}; NEO::Linker::RelocatedSymbol relocatedSymbol{symbolInfo, gpuAddress}; char kernelHeap[MemoryConstants::pageSize] = {}; auto kernelInfo = std::make_unique(); kernelInfo->heapInfo.pKernelHeap = kernelHeap; kernelInfo->heapInfo.KernelHeapSize = MemoryConstants::pageSize; module0->getTranslationUnit()->programInfo.kernelInfos.push_back(kernelInfo.release()); auto linkerInput = std::make_unique<::WhiteBox>(); linkerInput->traits.requiresPatchingOfInstructionSegments = true; module0->getTranslationUnit()->programInfo.linkerInput = std::move(linkerInput); module0->unresolvedExternalsInfo.push_back({unresolvedRelocation}); module0->unresolvedExternalsInfo[0].instructionsSegmentId = 0u; auto kernelImmData = std::make_unique>(device); kernelImmData->isaGraphicsAllocation.reset(neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties( {device->getRootDeviceIndex(), MemoryConstants::pageSize, NEO::AllocationType::KERNEL_ISA, neoDevice->getDeviceBitfield()})); auto isaPtr = kernelImmData->getIsaGraphicsAllocation()->getUnderlyingBuffer(); module0->kernelImmDatas.push_back(std::move(kernelImmData)); module1->symbols[unresolvedRelocation.symbolName] = relocatedSymbol; std::vector hModules = {module0->toHandle(), module1->toHandle()}; ze_result_t res = module0->performDynamicLink(2, hModules.data(), nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_EQ(gpuAddress, *reinterpret_cast(ptrOffset(isaPtr, offset))); } TEST_F(ModuleDynamicLinkTests, givenModuleWithUnresolvedSymbolWhenTheOtherModuleDefinesTheSymbolThenTheExportedFunctionSurfaceIntheExportModuleIsAddedToTheImportModuleResidencyContainer) { uint64_t gpuAddress = 0x12345; uint32_t offset = 0x20; NEO::Linker::RelocationInfo unresolvedRelocation; unresolvedRelocation.symbolName = "unresolved"; unresolvedRelocation.offset = offset; unresolvedRelocation.type = NEO::Linker::RelocationInfo::Type::Address; NEO::Linker::UnresolvedExternal unresolvedExternal; unresolvedExternal.unresolvedRelocation = unresolvedRelocation; NEO::SymbolInfo symbolInfo{}; NEO::Linker::RelocatedSymbol relocatedSymbol{symbolInfo, gpuAddress}; char kernelHeap[MemoryConstants::pageSize] = {}; auto kernelInfo = std::make_unique(); kernelInfo->heapInfo.pKernelHeap = kernelHeap; kernelInfo->heapInfo.KernelHeapSize = MemoryConstants::pageSize; module0->getTranslationUnit()->programInfo.kernelInfos.push_back(kernelInfo.release()); auto linkerInput = std::make_unique<::WhiteBox>(); linkerInput->traits.requiresPatchingOfInstructionSegments = true; module0->getTranslationUnit()->programInfo.linkerInput = std::move(linkerInput); module0->unresolvedExternalsInfo.push_back({unresolvedRelocation}); module0->unresolvedExternalsInfo[0].instructionsSegmentId = 0u; auto kernelImmData = std::make_unique>(device); kernelImmData->isaGraphicsAllocation.reset(neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties( {device->getRootDeviceIndex(), MemoryConstants::pageSize, NEO::AllocationType::KERNEL_ISA, neoDevice->getDeviceBitfield()})); module0->kernelImmDatas.push_back(std::move(kernelImmData)); module1->symbols[unresolvedRelocation.symbolName] = relocatedSymbol; MockGraphicsAllocation alloc; module1->exportedFunctionsSurface = &alloc; std::vector hModules = {module0->toHandle(), module1->toHandle()}; ze_result_t res = module0->performDynamicLink(2, hModules.data(), nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_EQ((int)module0->kernelImmDatas[0]->getResidencyContainer().size(), 2); EXPECT_EQ(module0->kernelImmDatas[0]->getResidencyContainer().back(), &alloc); } TEST_F(ModuleDynamicLinkTests, givenMultipleModulesWithUnresolvedSymbolWhenTheEachModuleDefinesTheSymbolThenTheExportedFunctionSurfaceInBothModulesIsAddedToTheResidencyContainer) { uint64_t gpuAddress0 = 0x12345; uint64_t gpuAddress1 = 0x6789; uint64_t gpuAddress2 = 0x1479; uint32_t offset = 0x20; NEO::Linker::RelocationInfo unresolvedRelocation; unresolvedRelocation.symbolName = "unresolved"; unresolvedRelocation.offset = offset; unresolvedRelocation.type = NEO::Linker::RelocationInfo::Type::Address; NEO::Linker::UnresolvedExternal unresolvedExternal; unresolvedExternal.unresolvedRelocation = unresolvedRelocation; NEO::Linker::RelocationInfo unresolvedRelocationCircular; unresolvedRelocationCircular.symbolName = "unresolvedCircular"; unresolvedRelocationCircular.offset = offset; unresolvedRelocationCircular.type = NEO::Linker::RelocationInfo::Type::Address; NEO::Linker::UnresolvedExternal unresolvedExternalCircular; unresolvedExternalCircular.unresolvedRelocation = unresolvedRelocationCircular; NEO::Linker::RelocationInfo unresolvedRelocationChained; unresolvedRelocationChained.symbolName = "unresolvedChained"; unresolvedRelocationChained.offset = offset; unresolvedRelocationChained.type = NEO::Linker::RelocationInfo::Type::Address; NEO::Linker::UnresolvedExternal unresolvedExternalChained; unresolvedExternalChained.unresolvedRelocation = unresolvedRelocationChained; NEO::SymbolInfo module0SymbolInfo{}; NEO::Linker::RelocatedSymbol module0RelocatedSymbol{module0SymbolInfo, gpuAddress0}; NEO::SymbolInfo module1SymbolInfo{}; NEO::Linker::RelocatedSymbol module1RelocatedSymbol{module1SymbolInfo, gpuAddress1}; NEO::SymbolInfo module2SymbolInfo{}; NEO::Linker::RelocatedSymbol module2RelocatedSymbol{module2SymbolInfo, gpuAddress2}; char kernelHeap[MemoryConstants::pageSize] = {}; auto kernelInfo = std::make_unique(); kernelInfo->heapInfo.pKernelHeap = kernelHeap; kernelInfo->heapInfo.KernelHeapSize = MemoryConstants::pageSize; module0->getTranslationUnit()->programInfo.kernelInfos.push_back(kernelInfo.release()); auto linkerInput = std::make_unique<::WhiteBox>(); linkerInput->traits.requiresPatchingOfInstructionSegments = true; module0->getTranslationUnit()->programInfo.linkerInput = std::move(linkerInput); module0->unresolvedExternalsInfo.push_back({unresolvedRelocation}); module0->unresolvedExternalsInfo[0].instructionsSegmentId = 0u; auto kernelImmData = std::make_unique>(device); kernelImmData->isaGraphicsAllocation.reset(neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties( {device->getRootDeviceIndex(), MemoryConstants::pageSize, NEO::AllocationType::KERNEL_ISA, neoDevice->getDeviceBitfield()})); module0->kernelImmDatas.push_back(std::move(kernelImmData)); module0->symbols[unresolvedRelocationCircular.symbolName] = module0RelocatedSymbol; MockGraphicsAllocation alloc0; module0->exportedFunctionsSurface = &alloc0; char kernelHeap2[MemoryConstants::pageSize] = {}; auto kernelInfo2 = std::make_unique(); kernelInfo2->heapInfo.pKernelHeap = kernelHeap2; kernelInfo2->heapInfo.KernelHeapSize = MemoryConstants::pageSize; module1->getTranslationUnit()->programInfo.kernelInfos.push_back(kernelInfo2.release()); auto linkerInput1 = std::make_unique<::WhiteBox>(); linkerInput1->traits.requiresPatchingOfInstructionSegments = true; module1->getTranslationUnit()->programInfo.linkerInput = std::move(linkerInput1); module1->unresolvedExternalsInfo.push_back({unresolvedRelocationCircular}); module1->unresolvedExternalsInfo[0].instructionsSegmentId = 0u; module1->unresolvedExternalsInfo.push_back({unresolvedRelocationChained}); module1->unresolvedExternalsInfo[1].instructionsSegmentId = 0u; module1->symbols[unresolvedRelocation.symbolName] = module1RelocatedSymbol; MockGraphicsAllocation alloc1; module1->exportedFunctionsSurface = &alloc1; module2->symbols[unresolvedRelocationChained.symbolName] = module2RelocatedSymbol; MockGraphicsAllocation alloc2; module2->exportedFunctionsSurface = &alloc2; std::vector hModules = {module0->toHandle(), module1->toHandle(), module2->toHandle()}; ze_result_t res = module0->performDynamicLink(3, hModules.data(), nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_EQ((int)module0->kernelImmDatas[0]->getResidencyContainer().size(), 4); EXPECT_TRUE(std::find(module0->kernelImmDatas[0]->getResidencyContainer().begin(), module0->kernelImmDatas[0]->getResidencyContainer().end(), &alloc0) != module0->kernelImmDatas[0]->getResidencyContainer().end()); EXPECT_TRUE(std::find(module0->kernelImmDatas[0]->getResidencyContainer().begin(), module0->kernelImmDatas[0]->getResidencyContainer().end(), &alloc1) != module0->kernelImmDatas[0]->getResidencyContainer().end()); EXPECT_TRUE(std::find(module0->kernelImmDatas[0]->getResidencyContainer().begin(), module0->kernelImmDatas[0]->getResidencyContainer().end(), &alloc2) != module0->kernelImmDatas[0]->getResidencyContainer().end()); } TEST_F(ModuleDynamicLinkTests, givenModuleWithUnresolvedSymbolWhenTheOtherModuleDefinesTheSymbolThenTheBuildLogContainsTheSuccessfulLinkage) { uint64_t gpuAddress = 0x12345; uint32_t offset = 0x20; uint32_t offset2 = 0x40; ze_module_build_log_handle_t dynLinkLog; NEO::Linker::RelocationInfo unresolvedRelocation; unresolvedRelocation.symbolName = "unresolved"; unresolvedRelocation.offset = offset; unresolvedRelocation.type = NEO::Linker::RelocationInfo::Type::Address; NEO::Linker::UnresolvedExternal unresolvedExternal; unresolvedExternal.unresolvedRelocation = unresolvedRelocation; NEO::Linker::RelocationInfo unresolvedRelocation2; unresolvedRelocation2.symbolName = "unresolved2"; unresolvedRelocation2.offset = offset2; unresolvedRelocation2.type = NEO::Linker::RelocationInfo::Type::Address; NEO::Linker::UnresolvedExternal unresolvedExternal2; unresolvedExternal2.unresolvedRelocation = unresolvedRelocation2; NEO::SymbolInfo symbolInfo{}; NEO::Linker::RelocatedSymbol relocatedSymbol{symbolInfo, gpuAddress}; NEO::SymbolInfo symbolInfo2{}; NEO::Linker::RelocatedSymbol relocatedSymbol2{symbolInfo2, gpuAddress}; char kernelHeap[MemoryConstants::pageSize] = {}; auto kernelInfo = std::make_unique(); kernelInfo->heapInfo.pKernelHeap = kernelHeap; kernelInfo->heapInfo.KernelHeapSize = MemoryConstants::pageSize; module0->getTranslationUnit()->programInfo.kernelInfos.push_back(kernelInfo.release()); auto linkerInput = std::make_unique<::WhiteBox>(); linkerInput->traits.requiresPatchingOfInstructionSegments = true; module0->getTranslationUnit()->programInfo.linkerInput = std::move(linkerInput); module0->unresolvedExternalsInfo.push_back({unresolvedRelocation}); module0->unresolvedExternalsInfo.push_back({unresolvedRelocation2}); module0->unresolvedExternalsInfo[0].instructionsSegmentId = 0u; module0->unresolvedExternalsInfo[1].instructionsSegmentId = 0u; auto kernelImmData = std::make_unique>(device); kernelImmData->isaGraphicsAllocation.reset(neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties( {device->getRootDeviceIndex(), MemoryConstants::pageSize, NEO::AllocationType::KERNEL_ISA, neoDevice->getDeviceBitfield()})); module0->kernelImmDatas.push_back(std::move(kernelImmData)); module1->symbols[unresolvedRelocation.symbolName] = relocatedSymbol; module1->symbols[unresolvedRelocation2.symbolName] = relocatedSymbol2; MockGraphicsAllocation alloc; module1->exportedFunctionsSurface = &alloc; std::vector hModules = {module0->toHandle(), module1->toHandle()}; ze_result_t res = module0->performDynamicLink(2, hModules.data(), &dynLinkLog); EXPECT_EQ(ZE_RESULT_SUCCESS, res); size_t buildLogSize; zeModuleBuildLogGetString(dynLinkLog, &buildLogSize, nullptr); EXPECT_GT((int)buildLogSize, 0); char *logBuffer = new char[buildLogSize](); zeModuleBuildLogGetString(dynLinkLog, &buildLogSize, logBuffer); EXPECT_NE(logBuffer, ""); delete[] logBuffer; zeModuleBuildLogDestroy(dynLinkLog); } TEST_F(ModuleDynamicLinkTests, givenModuleWithUnresolvedSymbolsNotPresentInAnotherModuleWhenDynamicLinkThenLinkFailureIsReturnedAndLogged) { uint32_t offset = 0x20; uint32_t offset2 = 0x40; ze_module_build_log_handle_t dynLinkLog; NEO::Linker::RelocationInfo unresolvedRelocation; unresolvedRelocation.symbolName = "unresolved"; unresolvedRelocation.offset = offset; unresolvedRelocation.type = NEO::Linker::RelocationInfo::Type::Address; NEO::Linker::UnresolvedExternal unresolvedExternal; unresolvedExternal.unresolvedRelocation = unresolvedRelocation; NEO::Linker::RelocationInfo unresolvedRelocation2; unresolvedRelocation2.symbolName = "unresolved2"; unresolvedRelocation2.offset = offset2; unresolvedRelocation2.type = NEO::Linker::RelocationInfo::Type::Address; NEO::Linker::UnresolvedExternal unresolvedExternal2; unresolvedExternal2.unresolvedRelocation = unresolvedRelocation2; char kernelHeap[MemoryConstants::pageSize] = {}; auto kernelInfo = std::make_unique(); kernelInfo->heapInfo.pKernelHeap = kernelHeap; kernelInfo->heapInfo.KernelHeapSize = MemoryConstants::pageSize; module0->getTranslationUnit()->programInfo.kernelInfos.push_back(kernelInfo.release()); auto linkerInput = std::make_unique<::WhiteBox>(); linkerInput->traits.requiresPatchingOfInstructionSegments = true; module0->getTranslationUnit()->programInfo.linkerInput = std::move(linkerInput); module0->unresolvedExternalsInfo.push_back({unresolvedRelocation}); module0->unresolvedExternalsInfo.push_back({unresolvedRelocation2}); module0->unresolvedExternalsInfo[0].instructionsSegmentId = 0u; module0->unresolvedExternalsInfo[1].instructionsSegmentId = 0u; auto kernelImmData = std::make_unique>(device); kernelImmData->isaGraphicsAllocation.reset(neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties( {device->getRootDeviceIndex(), MemoryConstants::pageSize, NEO::AllocationType::KERNEL_ISA, neoDevice->getDeviceBitfield()})); module0->kernelImmDatas.push_back(std::move(kernelImmData)); std::vector hModules = {module0->toHandle(), module1->toHandle()}; ze_result_t res = module0->performDynamicLink(2, hModules.data(), &dynLinkLog); EXPECT_EQ(ZE_RESULT_ERROR_MODULE_LINK_FAILURE, res); size_t buildLogSize; zeModuleBuildLogGetString(dynLinkLog, &buildLogSize, nullptr); EXPECT_GT((int)buildLogSize, 0); char *logBuffer = new char[buildLogSize](); zeModuleBuildLogGetString(dynLinkLog, &buildLogSize, logBuffer); EXPECT_NE(logBuffer, ""); delete[] logBuffer; zeModuleBuildLogDestroy(dynLinkLog); } TEST_F(ModuleDynamicLinkTests, givenUnresolvedSymbolsWhenModuleIsCreatedThenIsaAllocationsAreNotCopied) { NEO::MockCompilerEnableGuard mock(true); auto cip = new NEO::MockCompilerInterfaceCaptureBuildOptions(); neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->compilerInterface.reset(cip); std::string testFile; retrieveBinaryKernelFilenameApiSpecific(testFile, binaryFilename + "_", ".bin"); size_t size = 0; auto src = loadDataFromFile(testFile.c_str(), size); ASSERT_NE(0u, size); ASSERT_NE(nullptr, src); ze_module_desc_t moduleDesc = {}; moduleDesc.format = ZE_MODULE_FORMAT_NATIVE; moduleDesc.pInputModule = reinterpret_cast(src.get()); moduleDesc.inputSize = size; ModuleBuildLog *moduleBuildLog = nullptr; auto module = std::unique_ptr(new Module(device, moduleBuildLog, ModuleType::User)); ASSERT_NE(nullptr, module.get()); NEO::Linker::RelocationInfo unresolvedRelocation; unresolvedRelocation.symbolName = "unresolved"; auto linkerInput = std::make_unique<::WhiteBox>(); linkerInput->dataRelocations.push_back(unresolvedRelocation); linkerInput->traits.requiresPatchingOfGlobalVariablesBuffer = true; module->unresolvedExternalsInfo.push_back({unresolvedRelocation}); module->translationUnit->programInfo.linkerInput = std::move(linkerInput); module->initialize(&moduleDesc, neoDevice); for (auto &ki : module->getKernelImmutableDataVector()) { EXPECT_FALSE(ki->isIsaCopiedToAllocation()); } EXPECT_FALSE(module->isFullyLinked); } TEST_F(ModuleDynamicLinkTests, givenModuleWithFunctionDependenciesWhenOtherModuleDefinesThisFunctionThenBarrierCountIsProperlyResolved) { std::vector hModules = {module0->toHandle(), module1->toHandle()}; auto linkerInput = new ::WhiteBox(); linkerInput->extFunDependencies.push_back({"funMod1", "funMod0"}); linkerInput->kernelDependencies.push_back({"funMod1", "kernel"}); module0->translationUnit->programInfo.linkerInput.reset(linkerInput); module0->translationUnit->programInfo.externalFunctions.push_back({"funMod0", 1U, 128U, 8U}); KernelInfo *ki = new KernelInfo(); ki->kernelDescriptor.kernelMetadata.kernelName = "kernel"; module0->translationUnit->programInfo.kernelInfos.push_back(ki); module1->translationUnit->programInfo.externalFunctions.push_back({"funMod1", 3U, 128U, 8U}); ze_result_t res = module0->performDynamicLink(2, hModules.data(), nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_EQ(3U, module0->translationUnit->programInfo.externalFunctions[0].barrierCount); EXPECT_EQ(3U, ki->kernelDescriptor.kernelAttributes.barrierCount); } TEST_F(ModuleDynamicLinkTests, givenModuleWithFunctionDependenciesWhenOtherModuleDoesNotDefineThisFunctionThenLinkFailureIsReturned) { std::vector hModules = {module0->toHandle(), module1->toHandle()}; auto linkerInput = new ::WhiteBox(); linkerInput->extFunDependencies.push_back({"funMod1", "funMod0"}); linkerInput->kernelDependencies.push_back({"funMod1", "kernel"}); module0->translationUnit->programInfo.linkerInput.reset(linkerInput); module0->translationUnit->programInfo.externalFunctions.push_back({"funMod0", 1U, 128U, 8U}); KernelInfo *ki = new KernelInfo(); ki->kernelDescriptor.kernelMetadata.kernelName = "kernel"; module0->translationUnit->programInfo.kernelInfos.push_back(ki); ze_result_t res = module0->performDynamicLink(2, hModules.data(), nullptr); EXPECT_EQ(ZE_RESULT_ERROR_MODULE_LINK_FAILURE, res); } class DeviceModuleSetArgBufferTest : public ModuleFixture, public ::testing::Test { public: void SetUp() override { ModuleFixture::SetUp(); } void TearDown() override { ModuleFixture::TearDown(); } void createKernelAndAllocMemory(uint32_t rootDeviceIndex, void **ptr, ze_kernel_handle_t *kernelHandle) { ze_kernel_desc_t kernelDesc = {}; kernelDesc.pKernelName = kernelName.c_str(); ze_result_t res = module.get()->createKernel(&kernelDesc, kernelHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, res); ze_host_mem_alloc_desc_t hostDesc = {}; res = context->allocHostMem(&hostDesc, 4096u, rootDeviceIndex, ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); } }; HWTEST_F(DeviceModuleSetArgBufferTest, givenValidMemoryUsedinFirstCallToSetArgBufferThenNullptrSetOnTheSecondCallThenArgBufferisUpdatedInEachCallAndSuccessIsReturned) { uint32_t rootDeviceIndex = 0; createModuleFromBinary(); ze_kernel_handle_t kernelHandle; void *validBufferPtr = nullptr; createKernelAndAllocMemory(rootDeviceIndex, &validBufferPtr, &kernelHandle); L0::KernelImp *kernel = reinterpret_cast(Kernel::fromHandle(kernelHandle)); ze_result_t res = kernel->setArgBuffer(0, sizeof(validBufferPtr), &validBufferPtr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto arg = kernel->getImmutableData()->getDescriptor().payloadMappings.explicitArgs[0].as(); auto crossThreadData = kernel->getCrossThreadData(); auto argBufferPtr = ptrOffset(crossThreadData, arg.stateless); auto argBufferValue = *reinterpret_cast(const_cast(argBufferPtr)); EXPECT_EQ(argBufferValue, reinterpret_cast(validBufferPtr)); for (auto alloc : kernel->getResidencyContainer()) { if (alloc && alloc->getGpuAddress() == reinterpret_cast(validBufferPtr)) { EXPECT_EQ(rootDeviceIndex, alloc->getRootDeviceIndex()); } } res = kernel->setArgBuffer(0, sizeof(validBufferPtr), nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); arg = kernel->getImmutableData()->getDescriptor().payloadMappings.explicitArgs[0].as(); crossThreadData = kernel->getCrossThreadData(); argBufferPtr = ptrOffset(crossThreadData, arg.stateless); argBufferValue = *reinterpret_cast(const_cast(argBufferPtr)); EXPECT_NE(argBufferValue, reinterpret_cast(validBufferPtr)); context->freeMem(validBufferPtr); Kernel::fromHandle(kernelHandle)->destroy(); } class MultiDeviceModuleSetArgBufferTest : public MultiDeviceModuleFixture, public ::testing::Test { public: void SetUp() override { MultiDeviceModuleFixture::SetUp(); } void TearDown() override { MultiDeviceModuleFixture::TearDown(); } void createKernelAndAllocMemory(uint32_t rootDeviceIndex, void **ptr, ze_kernel_handle_t *kernelHandle) { ze_kernel_desc_t kernelDesc = {}; kernelDesc.pKernelName = kernelName.c_str(); ze_result_t res = modules[rootDeviceIndex].get()->createKernel(&kernelDesc, kernelHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, res); ze_host_mem_alloc_desc_t hostDesc = {}; res = context->allocHostMem(&hostDesc, 4096u, rootDeviceIndex, ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); } }; HWTEST_F(MultiDeviceModuleSetArgBufferTest, givenCallsToSetArgBufferThenAllocationIsSetForCorrectDevice) { for (uint32_t rootDeviceIndex = 0; rootDeviceIndex < numRootDevices; rootDeviceIndex++) { createModuleFromBinary(rootDeviceIndex); ze_kernel_handle_t kernelHandle; void *ptr = nullptr; createKernelAndAllocMemory(rootDeviceIndex, &ptr, &kernelHandle); L0::KernelImp *kernel = reinterpret_cast(Kernel::fromHandle(kernelHandle)); kernel->setArgBuffer(0, sizeof(ptr), &ptr); for (auto alloc : kernel->getResidencyContainer()) { if (alloc && alloc->getGpuAddress() == reinterpret_cast(ptr)) { EXPECT_EQ(rootDeviceIndex, alloc->getRootDeviceIndex()); } } context->freeMem(ptr); Kernel::fromHandle(kernelHandle)->destroy(); } } using ContextModuleCreateTest = Test; HWTEST_F(ContextModuleCreateTest, givenCallToCreateModuleThenModuleIsReturned) { std::string testFile; retrieveBinaryKernelFilenameApiSpecific(testFile, "test_kernel_", ".bin"); size_t size = 0; auto src = loadDataFromFile(testFile.c_str(), size); ASSERT_NE(0u, size); ASSERT_NE(nullptr, src); ze_module_desc_t moduleDesc = {}; moduleDesc.format = ZE_MODULE_FORMAT_NATIVE; moduleDesc.pInputModule = reinterpret_cast(src.get()); moduleDesc.inputSize = size; ze_module_handle_t hModule; ze_device_handle_t hDevice = device->toHandle(); ze_result_t res = context->createModule(hDevice, &moduleDesc, &hModule, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); L0::Module *pModule = L0::Module::fromHandle(hModule); res = pModule->destroy(); EXPECT_EQ(ZE_RESULT_SUCCESS, res); } using ModuleTranslationUnitTest = Test; struct MockModuleTU : public L0::ModuleTranslationUnit { MockModuleTU(L0::Device *device) : L0::ModuleTranslationUnit(device) {} bool buildFromSpirV(const char *input, uint32_t inputSize, const char *buildOptions, const char *internalBuildOptions, const ze_module_constants_t *pConstants) override { wasBuildFromSpirVCalled = true; return true; } bool createFromNativeBinary(const char *input, size_t inputSize) override { wasCreateFromNativeBinaryCalled = true; return L0::ModuleTranslationUnit::createFromNativeBinary(input, inputSize); } bool wasBuildFromSpirVCalled = false; bool wasCreateFromNativeBinaryCalled = false; }; HWTEST_F(ModuleTranslationUnitTest, GivenRebuildPrecompiledKernelsFlagAndFileWithoutIntermediateCodeWhenCreatingModuleFromNativeBinaryThenModuleIsNotRecompiled) { DebugManagerStateRestore dgbRestorer; NEO::DebugManager.flags.RebuildPrecompiledKernels.set(true); std::string testFile; retrieveBinaryKernelFilenameApiSpecific(testFile, "test_kernel_", ".gen"); size_t size = 0; auto src = loadDataFromFile(testFile.c_str(), size); ASSERT_NE(0u, size); ASSERT_NE(nullptr, src); ze_module_desc_t moduleDesc = {}; moduleDesc.format = ZE_MODULE_FORMAT_NATIVE; moduleDesc.pInputModule = reinterpret_cast(src.get()); moduleDesc.inputSize = size; Module module(device, nullptr, ModuleType::User); MockModuleTU *tu = new MockModuleTU(device); module.translationUnit.reset(tu); bool success = module.initialize(&moduleDesc, neoDevice); EXPECT_TRUE(success); EXPECT_TRUE(tu->wasCreateFromNativeBinaryCalled); EXPECT_FALSE(tu->wasBuildFromSpirVCalled); } HWTEST_F(ModuleTranslationUnitTest, GivenRebuildPrecompiledKernelsFlagAndFileWithIntermediateCodeWhenCreatingModuleFromNativeBinaryThenModuleIsRecompiled) { DebugManagerStateRestore dgbRestorer; NEO::DebugManager.flags.RebuildPrecompiledKernels.set(true); std::string testFile; retrieveBinaryKernelFilenameApiSpecific(testFile, "test_kernel_", ".bin"); size_t size = 0; auto src = loadDataFromFile(testFile.c_str(), size); ASSERT_NE(0u, size); ASSERT_NE(nullptr, src); ze_module_desc_t moduleDesc = {}; moduleDesc.format = ZE_MODULE_FORMAT_NATIVE; moduleDesc.pInputModule = reinterpret_cast(src.get()); moduleDesc.inputSize = size; Module module(device, nullptr, ModuleType::User); MockModuleTU *tu = new MockModuleTU(device); module.translationUnit.reset(tu); bool success = module.initialize(&moduleDesc, neoDevice); EXPECT_TRUE(success); EXPECT_TRUE(tu->wasCreateFromNativeBinaryCalled); EXPECT_EQ(tu->irBinarySize != 0, tu->wasBuildFromSpirVCalled); } HWTEST_F(ModuleTranslationUnitTest, GivenRebuildFlagWhenCreatingModuleFromNativeBinaryThenModuleRecompilationWarningIsIssued) { DebugManagerStateRestore dgbRestorer; NEO::DebugManager.flags.RebuildPrecompiledKernels.set(true); std::string testFile; retrieveBinaryKernelFilenameApiSpecific(testFile, "test_kernel_", ".bin"); size_t size = 0; auto src = loadDataFromFile(testFile.c_str(), size); ASSERT_NE(0u, size); ASSERT_NE(nullptr, src); ze_module_desc_t moduleDesc = {}; moduleDesc.format = ZE_MODULE_FORMAT_NATIVE; moduleDesc.pInputModule = reinterpret_cast(src.get()); moduleDesc.inputSize = size; std::unique_ptr moduleBuildLog{ModuleBuildLog::create()}; Module module(device, moduleBuildLog.get(), ModuleType::User); MockModuleTU *tu = new MockModuleTU(device); module.translationUnit.reset(tu); bool success = module.initialize(&moduleDesc, neoDevice); ASSERT_TRUE(success); size_t buildLogSize{}; const auto querySizeResult{moduleBuildLog->getString(&buildLogSize, nullptr)}; ASSERT_EQ(ZE_RESULT_SUCCESS, querySizeResult); std::string buildLog(buildLogSize, '\0'); const auto queryBuildLogResult{moduleBuildLog->getString(&buildLogSize, buildLog.data())}; ASSERT_EQ(ZE_RESULT_SUCCESS, queryBuildLogResult); const auto containsWarning{buildLog.find(CompilerWarnings::recompiledFromIr.data()) != std::string::npos}; EXPECT_TRUE(containsWarning); } HWTEST_F(ModuleTranslationUnitTest, GivenRebuildFlagWhenCreatingModuleFromNativeBinaryAndWarningSuppressionIsPresentThenModuleRecompilationWarningIsNotIssued) { DebugManagerStateRestore dgbRestorer; NEO::DebugManager.flags.RebuildPrecompiledKernels.set(true); std::string testFile; retrieveBinaryKernelFilenameApiSpecific(testFile, "test_kernel_", ".bin"); size_t size = 0; auto src = loadDataFromFile(testFile.c_str(), size); ASSERT_NE(0u, size); ASSERT_NE(nullptr, src); ze_module_desc_t moduleDesc = {}; moduleDesc.format = ZE_MODULE_FORMAT_NATIVE; moduleDesc.pInputModule = reinterpret_cast(src.get()); moduleDesc.inputSize = size; moduleDesc.pBuildFlags = CompilerOptions::noRecompiledFromIr.data(); std::unique_ptr moduleBuildLog{ModuleBuildLog::create()}; Module module(device, moduleBuildLog.get(), ModuleType::User); MockModuleTU *tu = new MockModuleTU(device); module.translationUnit.reset(tu); bool success = module.initialize(&moduleDesc, neoDevice); ASSERT_TRUE(success); size_t buildLogSize{}; const auto querySizeResult{moduleBuildLog->getString(&buildLogSize, nullptr)}; ASSERT_EQ(ZE_RESULT_SUCCESS, querySizeResult); std::string buildLog(buildLogSize, '\0'); const auto queryBuildLogResult{moduleBuildLog->getString(&buildLogSize, buildLog.data())}; ASSERT_EQ(ZE_RESULT_SUCCESS, queryBuildLogResult); const auto containsWarning{buildLog.find(CompilerWarnings::recompiledFromIr.data()) != std::string::npos}; EXPECT_FALSE(containsWarning); } HWTEST_F(ModuleTranslationUnitTest, WhenCreatingFromNativeBinaryThenSetsUpRequiredTargetProductProperly) { ZebinTestData::ValidEmptyProgram emptyProgram; auto hwInfo = device->getNEODevice()->getHardwareInfo(); emptyProgram.elfHeader->machine = hwInfo.platform.eProductFamily; L0::ModuleTranslationUnit moduleTuValid(this->device); bool success = moduleTuValid.createFromNativeBinary(reinterpret_cast(emptyProgram.storage.data()), emptyProgram.storage.size()); EXPECT_TRUE(success); emptyProgram.elfHeader->machine = hwInfo.platform.eProductFamily; ++emptyProgram.elfHeader->machine; L0::ModuleTranslationUnit moduleTuInvalid(this->device); success = moduleTuInvalid.createFromNativeBinary(reinterpret_cast(emptyProgram.storage.data()), emptyProgram.storage.size()); EXPECT_FALSE(success); } HWTEST_F(ModuleTranslationUnitTest, WhenCreatingFromZeBinaryThenLinkerInputIsCreated) { std::string validZeInfo = std::string("version :\'") + toString(zeInfoDecoderVersion) + R"===(' kernels: - name : some_kernel execution_env : simd_size : 8 - name : some_other_kernel execution_env : simd_size : 32 )==="; ZebinTestData::ValidEmptyProgram zebin; zebin.removeSection(NEO::Elf::SHT_ZEBIN::SHT_ZEBIN_ZEINFO, NEO::Elf::SectionsNamesZebin::zeInfo); zebin.appendSection(NEO::Elf::SHT_ZEBIN::SHT_ZEBIN_ZEINFO, NEO::Elf::SectionsNamesZebin::zeInfo, ArrayRef::fromAny(validZeInfo.data(), validZeInfo.size())); zebin.appendSection(NEO::Elf::SHT_PROGBITS, NEO::Elf::SectionsNamesZebin::textPrefix.str() + "some_kernel", {}); zebin.appendSection(NEO::Elf::SHT_PROGBITS, NEO::Elf::SectionsNamesZebin::textPrefix.str() + "some_other_kernel", {}); auto hwInfo = device->getNEODevice()->getHardwareInfo(); zebin.elfHeader->machine = hwInfo.platform.eProductFamily; L0::ModuleTranslationUnit moduleTuValid(this->device); bool success = moduleTuValid.createFromNativeBinary(reinterpret_cast(zebin.storage.data()), zebin.storage.size()); EXPECT_TRUE(success); EXPECT_NE(nullptr, moduleTuValid.programInfo.linkerInput.get()); } TEST_F(ModuleTranslationUnitTest, WhenCreatingFromZeBinaryAndGlobalsAreExportedThenTheirAllocationTypeIsSVM) { std::string zeInfo = std::string("version :\'") + toString(zeInfoDecoderVersion) + R"===(' kernels: - name : kernel execution_env : simd_size : 8 )==="; MockElfEncoder<> elfEncoder; elfEncoder.getElfFileHeader().type = NEO::Elf::ET_ZEBIN_EXE; elfEncoder.appendSection(NEO::Elf::SHT_PROGBITS, NEO::Elf::SectionsNamesZebin::textPrefix.str() + "kernel", std::string{}); elfEncoder.appendSection(NEO::Elf::SHT_PROGBITS, NEO::Elf::SectionsNamesZebin::dataConst, std::string{"12345678"}); auto dataConstSectionIndex = elfEncoder.getLastSectionHeaderIndex(); elfEncoder.appendSection(NEO::Elf::SHT_PROGBITS, NEO::Elf::SectionsNamesZebin::dataGlobal, std::string{"12345678"}); auto dataGlobalSectionIndex = elfEncoder.getLastSectionHeaderIndex(); NEO::Elf::ElfSymbolEntry symbolTable[2] = {}; symbolTable[0].name = decltype(symbolTable[0].name)(elfEncoder.appendSectionName("const.data")); symbolTable[0].info = NEO::Elf::SYMBOL_TABLE_TYPE::STT_OBJECT | NEO::Elf::SYMBOL_TABLE_BIND::STB_GLOBAL << 4; symbolTable[0].shndx = decltype(symbolTable[0].shndx)(dataConstSectionIndex); symbolTable[0].size = 4; symbolTable[0].value = 0; symbolTable[1].name = decltype(symbolTable[1].name)(elfEncoder.appendSectionName("global.data")); symbolTable[1].info = NEO::Elf::SYMBOL_TABLE_TYPE::STT_OBJECT | NEO::Elf::SYMBOL_TABLE_BIND::STB_GLOBAL << 4; symbolTable[1].shndx = decltype(symbolTable[1].shndx)(dataGlobalSectionIndex); symbolTable[1].size = 4; symbolTable[1].value = 0; elfEncoder.appendSection(NEO::Elf::SHT_SYMTAB, NEO::Elf::SectionsNamesZebin::symtab, ArrayRef(reinterpret_cast(symbolTable), sizeof(symbolTable))); elfEncoder.appendSection(NEO::Elf::SHT_ZEBIN_ZEINFO, NEO::Elf::SectionsNamesZebin::zeInfo, zeInfo); auto zebin = elfEncoder.encode(); L0::ModuleTranslationUnit moduleTu(this->device); moduleTu.unpackedDeviceBinarySize = zebin.size(); moduleTu.unpackedDeviceBinary = std::make_unique(moduleTu.unpackedDeviceBinarySize); memcpy_s(moduleTu.unpackedDeviceBinary.get(), moduleTu.unpackedDeviceBinarySize, zebin.data(), zebin.size()); auto retVal = moduleTu.processUnpackedBinary(); EXPECT_TRUE(retVal); EXPECT_EQ(AllocationType::SVM_ZERO_COPY, moduleTu.globalConstBuffer->getAllocationType()); EXPECT_EQ(AllocationType::SVM_ZERO_COPY, moduleTu.globalVarBuffer->getAllocationType()); } HWTEST_F(ModuleTranslationUnitTest, WhenBuildOptionsAreNullThenReuseExistingOptions) { auto *pMockCompilerInterface = new MockCompilerInterface; auto &rootDeviceEnvironment = this->neoDevice->executionEnvironment->rootDeviceEnvironments[this->neoDevice->getRootDeviceIndex()]; rootDeviceEnvironment->compilerInterface.reset(pMockCompilerInterface); L0::ModuleTranslationUnit moduleTu(this->device); moduleTu.options = "abcd"; pMockCompilerInterface->failBuild = true; auto ret = moduleTu.buildFromSpirV("", 0U, nullptr, "", nullptr); EXPECT_FALSE(ret); EXPECT_STREQ("abcd", moduleTu.options.c_str()); EXPECT_STREQ("abcd", pMockCompilerInterface->receivedApiOptions.c_str()); } HWTEST_F(ModuleTranslationUnitTest, WhenBuildOptionsAreNullThenReuseExistingOptions2) { auto pMockCompilerInterface = new MockCompilerInterface; auto &rootDeviceEnvironment = this->neoDevice->executionEnvironment->rootDeviceEnvironments[this->neoDevice->getRootDeviceIndex()]; rootDeviceEnvironment->compilerInterface.reset(pMockCompilerInterface); DebugManagerStateRestore restorer; DebugManager.flags.DisableStatelessToStatefulOptimization.set(1); MockModuleTranslationUnit moduleTu(this->device); auto ret = moduleTu.buildFromSpirV("", 0U, nullptr, "", nullptr); EXPECT_TRUE(ret); EXPECT_NE(pMockCompilerInterface->inputInternalOptions.find("cl-intel-greater-than-4GB-buffer-required"), std::string::npos); } HWTEST_F(ModuleTranslationUnitTest, givenSystemSharedAllocationAllowedWhenBuildingModuleThen4GbBuffersAreRequired) { auto mockCompilerInterface = new MockCompilerInterface; auto &rootDeviceEnvironment = neoDevice->executionEnvironment->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]; rootDeviceEnvironment->compilerInterface.reset(mockCompilerInterface); { neoDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.sharedSystemMemCapabilities = 1; MockModuleTranslationUnit moduleTu(device); auto ret = moduleTu.buildFromSpirV("", 0U, nullptr, "", nullptr); EXPECT_TRUE(ret); EXPECT_NE(mockCompilerInterface->inputInternalOptions.find("cl-intel-greater-than-4GB-buffer-required"), std::string::npos); } { neoDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.sharedSystemMemCapabilities = 0; MockModuleTranslationUnit moduleTu(device); auto ret = moduleTu.buildFromSpirV("", 0U, nullptr, "", nullptr); EXPECT_TRUE(ret); EXPECT_EQ(mockCompilerInterface->inputInternalOptions.find("cl-intel-greater-than-4GB-buffer-required"), std::string::npos); } } TEST(ModuleBuildLog, WhenGreaterBufferIsPassedToGetStringThenOutputSizeIsOverridden) { const auto infoLog{"[INFO] This is a log!"}; const auto infoLogLength{strlen(infoLog)}; const auto moduleBuildLog{ModuleBuildLog::create()}; moduleBuildLog->appendString(infoLog, infoLogLength); size_t buildLogSize{0}; const auto querySizeResult{moduleBuildLog->getString(&buildLogSize, nullptr)}; EXPECT_EQ(ZE_RESULT_SUCCESS, querySizeResult); EXPECT_EQ(infoLogLength + 1, buildLogSize); const auto bufferSize{buildLogSize + 100}; std::string buffer(bufferSize, '\0'); buildLogSize = bufferSize; const auto queryBuildLogResult{moduleBuildLog->getString(&buildLogSize, buffer.data())}; EXPECT_EQ(ZE_RESULT_SUCCESS, queryBuildLogResult); EXPECT_GT(bufferSize, buildLogSize); EXPECT_EQ(infoLogLength + 1, buildLogSize); EXPECT_STREQ(infoLog, buffer.c_str()); const auto destroyResult{moduleBuildLog->destroy()}; EXPECT_EQ(ZE_RESULT_SUCCESS, destroyResult); } TEST(ModuleBuildLog, WhenTooSmallBufferIsPassedToGetStringThenErrorIsReturned) { const auto sampleLog{"Sample log!"}; const auto moduleBuildLog{ModuleBuildLog::create()}; moduleBuildLog->appendString(sampleLog, strlen(sampleLog)); std::array buffer{}; size_t buildLogSize{buffer.size()}; const auto queryBuildLogResult{moduleBuildLog->getString(&buildLogSize, buffer.data())}; EXPECT_EQ(ZE_RESULT_ERROR_INVALID_SIZE, queryBuildLogResult); const auto destroyResult{moduleBuildLog->destroy()}; EXPECT_EQ(ZE_RESULT_SUCCESS, destroyResult); } using PrintfModuleTest = Test; HWTEST_F(PrintfModuleTest, GivenModuleWithPrintfWhenKernelIsCreatedThenPrintfAllocationIsPlacedInResidencyContainer) { std::string testFile; retrieveBinaryKernelFilenameApiSpecific(testFile, "test_kernel_", ".gen"); size_t size = 0; auto src = loadDataFromFile(testFile.c_str(), size); ASSERT_NE(0u, size); ASSERT_NE(nullptr, src); ze_module_desc_t moduleDesc = {}; moduleDesc.format = ZE_MODULE_FORMAT_NATIVE; moduleDesc.pInputModule = reinterpret_cast(src.get()); moduleDesc.inputSize = size; auto module = std::unique_ptr(Module::create(device, &moduleDesc, nullptr, ModuleType::User)); auto kernel = std::make_unique>(); ASSERT_NE(nullptr, kernel); kernel->module = module.get(); ze_kernel_desc_t kernelDesc = {}; kernelDesc.pKernelName = "test"; kernel->initialize(&kernelDesc); auto &container = kernel->residencyContainer; auto printfPos = std::find(container.begin(), container.end(), kernel->getPrintfBufferAllocation()); EXPECT_NE(container.end(), printfPos); bool correctPos = printfPos >= container.begin() + kernel->getImmutableData()->getDescriptor().payloadMappings.explicitArgs.size(); EXPECT_TRUE(correctPos); } TEST(BuildOptions, givenNoSrcOptionNameInSrcNamesWhenMovingBuildOptionsThenFalseIsReturned) { std::string srcNames = NEO::CompilerOptions::concatenate(NEO::CompilerOptions::fastRelaxedMath, NEO::CompilerOptions::finiteMathOnly); std::string dstNames; auto result = moveBuildOption(dstNames, srcNames, BuildOptions::optDisable, NEO::CompilerOptions::optDisable); EXPECT_FALSE(result); } TEST(BuildOptions, givenSrcOptionNameInSrcNamesWhenMovingBuildOptionsThenOptionIsRemovedFromSrcNamesAndTranslatedOptionsStoredInDstNames) { std::string srcNames = NEO::CompilerOptions::concatenate(NEO::CompilerOptions::fastRelaxedMath, NEO::CompilerOptions::optDisable); std::string dstNames; auto result = moveBuildOption(dstNames, srcNames, BuildOptions::optDisable, NEO::CompilerOptions::optDisable); EXPECT_TRUE(result); EXPECT_EQ(BuildOptions::optDisable, dstNames); EXPECT_EQ(std::string::npos, srcNames.find(NEO::CompilerOptions::optDisable.str())); } TEST(BuildOptions, givenSrcOptLevelInSrcNamesWhenMovingBuildOptionsThenOptionIsRemovedFromSrcNamesAndTranslatedOptionsStoredInDstNames) { std::string srcNames = NEO::CompilerOptions::concatenate(NEO::CompilerOptions::fastRelaxedMath, BuildOptions::optLevel); srcNames += "=2"; std::string dstNames; auto result = moveBuildOption(dstNames, srcNames, NEO::CompilerOptions::optLevel, BuildOptions::optLevel); EXPECT_TRUE(result); EXPECT_EQ(NEO::CompilerOptions::optLevel.str() + std::string("2"), dstNames); EXPECT_EQ(std::string::npos, srcNames.find(BuildOptions::optLevel.str())); EXPECT_EQ(std::string::npos, srcNames.find(std::string("=2"))); } TEST(BuildOptions, givenSrcOptLevelWithoutLevelIntegerInSrcNamesWhenMovingBuildOptionsThenFalseIsReturned) { std::string srcNames = NEO::CompilerOptions::concatenate(NEO::CompilerOptions::fastRelaxedMath, BuildOptions::optLevel); std::string dstNames; auto result = moveBuildOption(dstNames, srcNames, NEO::CompilerOptions::optLevel, BuildOptions::optLevel); EXPECT_FALSE(result); } TEST_F(ModuleTest, givenInternalOptionsWhenBindlessEnabledThenBindlesOptionsPassed) { DebugManagerStateRestore restorer; DebugManager.flags.UseBindlessMode.set(1); auto module = std::make_unique(device, nullptr, ModuleType::User); ASSERT_NE(nullptr, module); std::string buildOptions; std::string internalBuildOptions; module->createBuildOptions("", buildOptions, internalBuildOptions); EXPECT_TRUE(NEO::CompilerOptions::contains(internalBuildOptions, NEO::CompilerOptions::bindlessMode)); } TEST_F(ModuleTest, givenInternalOptionsWhenBuildFlagsIsNullPtrAndBindlessEnabledThenBindlesOptionsPassed) { DebugManagerStateRestore restorer; DebugManager.flags.UseBindlessMode.set(1); auto module = std::make_unique(device, nullptr, ModuleType::User); ASSERT_NE(nullptr, module); std::string buildOptions; std::string internalBuildOptions; module->createBuildOptions(nullptr, buildOptions, internalBuildOptions); EXPECT_TRUE(NEO::CompilerOptions::contains(internalBuildOptions, NEO::CompilerOptions::bindlessMode)); } TEST_F(ModuleTest, givenInternalOptionsWhenBindlessDisabledThenBindlesOptionsNotPassed) { DebugManagerStateRestore restorer; DebugManager.flags.UseBindlessMode.set(0); auto module = std::make_unique(device, nullptr, ModuleType::User); ASSERT_NE(nullptr, module); std::string buildOptions; std::string internalBuildOptions; module->createBuildOptions("", buildOptions, internalBuildOptions); EXPECT_FALSE(NEO::CompilerOptions::contains(internalBuildOptions, NEO::CompilerOptions::bindlessMode)); } TEST_F(ModuleTest, GivenInjectInternalBuildOptionsWhenBuildingUserModuleThenInternalOptionsAreAppended) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.InjectInternalBuildOptions.set(" -abc"); NEO::MockCompilerEnableGuard mock(true); auto cip = new NEO::MockCompilerInterfaceCaptureBuildOptions(); device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]->compilerInterface.reset(cip); uint8_t binary[10]; ze_module_desc_t moduleDesc = {}; moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; moduleDesc.pInputModule = binary; moduleDesc.inputSize = 10; ModuleBuildLog *moduleBuildLog = nullptr; auto module = std::unique_ptr(new L0::ModuleImp(device, moduleBuildLog, ModuleType::User)); ASSERT_NE(nullptr, module.get()); module->initialize(&moduleDesc, device->getNEODevice()); EXPECT_TRUE(CompilerOptions::contains(cip->buildInternalOptions, "-abc")); }; TEST_F(ModuleTest, GivenInjectInternalBuildOptionsWhenBuildingBuiltinModuleThenInternalOptionsAreNotAppended) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.InjectInternalBuildOptions.set(" -abc"); NEO::MockCompilerEnableGuard mock(true); auto cip = new NEO::MockCompilerInterfaceCaptureBuildOptions(); device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]->compilerInterface.reset(cip); uint8_t binary[10]; ze_module_desc_t moduleDesc = {}; moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; moduleDesc.pInputModule = binary; moduleDesc.inputSize = 10; ModuleBuildLog *moduleBuildLog = nullptr; auto module = std::unique_ptr(new L0::ModuleImp(device, moduleBuildLog, ModuleType::Builtin)); ASSERT_NE(nullptr, module.get()); module->initialize(&moduleDesc, device->getNEODevice()); EXPECT_FALSE(CompilerOptions::contains(cip->buildInternalOptions, "-abc")); }; using ModuleDebugDataTest = Test; TEST_F(ModuleDebugDataTest, GivenDebugDataWithRelocationsWhenCreatingRelocatedDebugDataThenRelocationsAreApplied) { auto cip = new NEO::MockCompilerInterfaceCaptureBuildOptions(); neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]->compilerInterface.reset(cip); uint8_t binary[10]; ze_module_desc_t moduleDesc = {}; moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; moduleDesc.pInputModule = binary; moduleDesc.inputSize = 10; ModuleBuildLog *moduleBuildLog = nullptr; std::unique_ptr module = std::make_unique(device, moduleBuildLog, ModuleType::User); module->translationUnit = std::make_unique(device); module->translationUnit->globalVarBuffer = neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties( {device->getRootDeviceIndex(), MemoryConstants::pageSize, NEO::AllocationType::BUFFER, neoDevice->getDeviceBitfield()}); module->translationUnit->globalConstBuffer = neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties( {device->getRootDeviceIndex(), MemoryConstants::pageSize, NEO::AllocationType::BUFFER, neoDevice->getDeviceBitfield()}); uint32_t kernelHeap = 0; auto kernelInfo = new KernelInfo(); kernelInfo->heapInfo.KernelHeapSize = 1; kernelInfo->heapInfo.pKernelHeap = &kernelHeap; kernelInfo->kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful = 0; kernelInfo->kernelDescriptor.external.debugData = std::make_unique(); auto debugData = MockElfEncoder<>::createRelocateableDebugDataElf(); kernelInfo->kernelDescriptor.external.debugData->vIsaSize = static_cast(debugData.size()); kernelInfo->kernelDescriptor.external.debugData->vIsa = reinterpret_cast(debugData.data()); // pass kernelInfo ownership to programInfo module->translationUnit->programInfo.kernelInfos.push_back(kernelInfo); std::unique_ptr> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)}; kernelImmData->initialize(kernelInfo, device, 0, module->translationUnit->globalConstBuffer, module->translationUnit->globalVarBuffer, false); kernelImmData->createRelocatedDebugData(module->translationUnit->globalConstBuffer, module->translationUnit->globalVarBuffer); module->kernelImmDatas.push_back(std::move(kernelImmData)); EXPECT_NE(nullptr, kernelInfo->kernelDescriptor.external.relocatedDebugData); uint64_t *relocAddress = reinterpret_cast(kernelInfo->kernelDescriptor.external.relocatedDebugData.get() + 600); auto expectedValue = module->kernelImmDatas[0]->getIsaGraphicsAllocation()->getGpuAddress() + 0x1a8; EXPECT_EQ(expectedValue, *relocAddress); } TEST_F(ModuleTest, givenModuleWithSymbolWhenGettingGlobalPointerThenSizeAndPointerAreReurned) { uint64_t gpuAddress = 0x12345000; NEO::SymbolInfo symbolInfo{0, 1024u, SegmentType::GlobalVariables}; NEO::Linker::RelocatedSymbol relocatedSymbol{symbolInfo, gpuAddress}; auto module0 = std::make_unique(device, nullptr, ModuleType::User); module0->symbols["symbol"] = relocatedSymbol; size_t size = 0; void *ptr = nullptr; auto result = module0->getGlobalPointer("symbol", &size, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(1024u, size); EXPECT_EQ(gpuAddress, reinterpret_cast(ptr)); } TEST_F(ModuleTest, givenModuleWithSymbolWhenGettingGlobalPointerWithNullptrInputsThenSuccessIsReturned) { uint64_t gpuAddress = 0x12345000; NEO::SymbolInfo symbolInfo{0, 1024u, SegmentType::GlobalVariables}; NEO::Linker::RelocatedSymbol relocatedSymbol{symbolInfo, gpuAddress}; auto module0 = std::make_unique(device, nullptr, ModuleType::User); module0->symbols["symbol"] = relocatedSymbol; auto result = module0->getGlobalPointer("symbol", nullptr, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(ModuleTest, givenModuleWithGlobalSymbolMapWhenGettingGlobalPointerByHostSymbolNameExistingInMapThenCorrectPointerAndSuccessIsReturned) { std::unordered_map mapping; mapping["devSymbolOne"] = "hostSymbolOne"; mapping["devSymbolTwo"] = "hostSymbolTwo"; size_t symbolsSize = 1024u; uint64_t globalVarGpuAddress = 0x12345000; NEO::SymbolInfo globalVariablesSymbolInfo{0, static_cast(symbolsSize), SegmentType::GlobalVariables}; NEO::Linker::RelocatedSymbol globalVariablesRelocatedSymbol{globalVariablesSymbolInfo, globalVarGpuAddress}; uint64_t globalConstGpuAddress = 0x12347000; NEO::SymbolInfo globalConstantsSymbolInfo{0, static_cast(symbolsSize), SegmentType::GlobalConstants}; NEO::Linker::RelocatedSymbol globalConstansRelocatedSymbol{globalConstantsSymbolInfo, globalConstGpuAddress}; auto module0 = std::make_unique(device, nullptr, ModuleType::User); module0->symbols["devSymbolOne"] = globalVariablesRelocatedSymbol; module0->symbols["devSymbolTwo"] = globalConstansRelocatedSymbol; auto success = module0->populateHostGlobalSymbolsMap(mapping); EXPECT_TRUE(success); EXPECT_TRUE(module0->getTranslationUnit()->buildLog.empty()); size_t size = 0; void *ptr = nullptr; auto result = module0->getGlobalPointer("hostSymbolOne", &size, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(symbolsSize, size); EXPECT_EQ(globalVarGpuAddress, reinterpret_cast(ptr)); result = module0->getGlobalPointer("hostSymbolTwo", &size, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(symbolsSize, size); EXPECT_EQ(globalConstGpuAddress, reinterpret_cast(ptr)); } TEST_F(ModuleTest, givenModuleWithGlobalSymbolsMapWhenPopulatingMapWithSymbolNotFoundInRelocatedSymbolsMapThenPrintErrorStringAndReturnFalse) { std::unordered_map mapping; std::string notFoundDevSymbolName = "anotherDevSymbolOne"; mapping[notFoundDevSymbolName] = "anotherHostSymbolOne"; auto module0 = std::make_unique(device, nullptr, ModuleType::User); EXPECT_EQ(0u, module0->symbols.count(notFoundDevSymbolName)); auto result = module0->populateHostGlobalSymbolsMap(mapping); EXPECT_FALSE(result); std::string expectedErrorOutput = "Error: No symbol found with given device name: " + notFoundDevSymbolName + ".\n"; EXPECT_STREQ(expectedErrorOutput.c_str(), module0->getTranslationUnit()->buildLog.c_str()); } TEST_F(ModuleTest, givenModuleWithGlobalSymbolsMapWhenPopulatingMapWithSymbolFromIncorrectSegmentThenPrintErrorStringAndReturnFalse) { std::unordered_map mapping; std::string incorrectDevSymbolName = "incorrectSegmentDevSymbolOne"; mapping[incorrectDevSymbolName] = "incorrectSegmentHostSymbolOne"; size_t symbolSize = 1024u; uint64_t gpuAddress = 0x12345000; NEO::SymbolInfo symbolInfo{0, static_cast(symbolSize), SegmentType::Instructions}; NEO::Linker::RelocatedSymbol relocatedSymbol{symbolInfo, gpuAddress}; auto module0 = std::make_unique(device, nullptr, ModuleType::User); module0->symbols[incorrectDevSymbolName] = relocatedSymbol; auto result = module0->populateHostGlobalSymbolsMap(mapping); EXPECT_FALSE(result); std::string expectedErrorOutput = "Error: Symbol with given device name: " + incorrectDevSymbolName + " is not in .data segment.\n"; EXPECT_STREQ(expectedErrorOutput.c_str(), module0->getTranslationUnit()->buildLog.c_str()); } using ModuleTests = Test; TEST_F(ModuleTests, whenCopyingPatchedSegmentsThenAllocationsAreSetWritableForTbxAndAub) { auto pModule = std::make_unique(device, nullptr, ModuleType::User); char data[1]{}; auto kernelInfo = std::make_unique(); kernelInfo->heapInfo.KernelHeapSize = 1; kernelInfo->heapInfo.pKernelHeap = data; std::unique_ptr> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)}; kernelImmData->initialize(kernelInfo.get(), device, 0, nullptr, nullptr, false); pModule->kernelImmDatas.push_back(std::move(kernelImmData)); auto linkerInput = std::make_unique<::WhiteBox>(); linkerInput->traits.requiresPatchingOfInstructionSegments = true; pModule->translationUnit->programInfo.linkerInput = std::move(linkerInput); NEO::Linker::PatchableSegments segments{{data, 1}}; auto allocation = pModule->kernelImmDatas[0]->getIsaGraphicsAllocation(); allocation->setTbxWritable(false, std::numeric_limits::max()); allocation->setAubWritable(false, std::numeric_limits::max()); pModule->copyPatchedSegments(segments); EXPECT_TRUE(allocation->isTbxWritable(std::numeric_limits::max())); EXPECT_TRUE(allocation->isAubWritable(std::numeric_limits::max())); } TEST_F(ModuleTests, givenConstDataStringSectionWhenLinkingModuleThenSegmentIsPatched) { auto pModule = std::make_unique(device, nullptr, ModuleType::User); char data[64]{}; auto kernelInfo = new KernelInfo(); kernelInfo->heapInfo.KernelHeapSize = 64; kernelInfo->heapInfo.pKernelHeap = data; std::unique_ptr> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)}; kernelImmData->initialize(kernelInfo, device, 0, nullptr, nullptr, false); auto patchAddr = reinterpret_cast(ptrOffset(kernelImmData->isaGraphicsAllocation->getUnderlyingBuffer(), 0x8)); pModule->kernelImmDatas.push_back(std::move(kernelImmData)); pModule->translationUnit->programInfo.kernelInfos.push_back(kernelInfo); auto linkerInput = std::make_unique<::WhiteBox>(); linkerInput->relocations.push_back({{".str", 0x8, LinkerInput::RelocationInfo::Type::Address, SegmentType::Instructions}}); linkerInput->symbols.insert({".str", {0x0, 0x8, SegmentType::GlobalStrings}}); linkerInput->traits.requiresPatchingOfInstructionSegments = true; pModule->translationUnit->programInfo.linkerInput = std::move(linkerInput); const char constStringData[] = "Hello World!\n"; auto stringsAddr = reinterpret_cast(constStringData); pModule->translationUnit->programInfo.globalStrings.initData = constStringData; pModule->translationUnit->programInfo.globalStrings.size = sizeof(constStringData); auto status = pModule->linkBinary(); EXPECT_TRUE(status); EXPECT_EQ(static_cast(stringsAddr), *reinterpret_cast(patchAddr)); } TEST_F(ModuleTests, givenImplicitArgsRelocationAndStackCallsWhenLinkingModuleThenSegmentIsPatchedAndImplicitArgsAreRequired) { auto pModule = std::make_unique(device, nullptr, ModuleType::User); char data[64]{}; auto kernelInfo = new KernelInfo(); kernelInfo->heapInfo.KernelHeapSize = 64; kernelInfo->heapInfo.pKernelHeap = data; std::unique_ptr> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)}; kernelImmData->initialize(kernelInfo, device, 0, nullptr, nullptr, false); kernelImmData->kernelDescriptor->kernelAttributes.flags.useStackCalls = true; auto isaCpuPtr = reinterpret_cast(kernelImmData->isaGraphicsAllocation->getUnderlyingBuffer()); pModule->kernelImmDatas.push_back(std::move(kernelImmData)); pModule->translationUnit->programInfo.kernelInfos.push_back(kernelInfo); auto linkerInput = std::make_unique<::WhiteBox>(); linkerInput->traits.requiresPatchingOfInstructionSegments = true; linkerInput->relocations.push_back({{implicitArgsRelocationSymbolNames[0], 0x8, LinkerInput::RelocationInfo::Type::AddressLow, SegmentType::Instructions}}); pModule->translationUnit->programInfo.linkerInput = std::move(linkerInput); EXPECT_FALSE(kernelInfo->kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs); auto status = pModule->linkBinary(); EXPECT_TRUE(status); EXPECT_EQ(sizeof(ImplicitArgs), *reinterpret_cast(ptrOffset(isaCpuPtr, 0x8))); EXPECT_TRUE(kernelInfo->kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs); } TEST_F(ModuleTests, givenImplicitArgsRelocationAndDebuggerEnabledWhenLinkingModuleThenSegmentIsPatchedAndImplicitArgsAreRequired) { if (!defaultHwInfo->capabilityTable.debuggerSupported) { GTEST_SKIP(); } DebugManagerStateRestore restorer; DebugManager.flags.EnableMockSourceLevelDebugger.set(1); auto pModule = std::make_unique(device, nullptr, ModuleType::User); device->getNEODevice()->getRootDeviceEnvironmentRef().initDebugger(); EXPECT_NE(nullptr, neoDevice->getDebugger()); char data[64]{}; auto kernelInfo = new KernelInfo(); kernelInfo->heapInfo.KernelHeapSize = 64; kernelInfo->heapInfo.pKernelHeap = data; std::unique_ptr> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)}; kernelImmData->initialize(kernelInfo, device, 0, nullptr, nullptr, false); kernelImmData->kernelDescriptor->kernelAttributes.flags.useStackCalls = false; auto isaCpuPtr = reinterpret_cast(kernelImmData->isaGraphicsAllocation->getUnderlyingBuffer()); pModule->kernelImmDatas.push_back(std::move(kernelImmData)); pModule->translationUnit->programInfo.kernelInfos.push_back(kernelInfo); auto linkerInput = std::make_unique<::WhiteBox>(); linkerInput->traits.requiresPatchingOfInstructionSegments = true; linkerInput->relocations.push_back({{implicitArgsRelocationSymbolNames[0], 0x8, LinkerInput::RelocationInfo::Type::AddressLow, SegmentType::Instructions}}); pModule->translationUnit->programInfo.linkerInput = std::move(linkerInput); EXPECT_FALSE(kernelInfo->kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs); auto status = pModule->linkBinary(); EXPECT_TRUE(status); EXPECT_EQ(sizeof(ImplicitArgs), *reinterpret_cast(ptrOffset(isaCpuPtr, 0x8))); EXPECT_TRUE(kernelInfo->kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs); } TEST_F(ModuleTests, givenImplicitArgsRelocationAndNoDebuggerOrStackCallsWhenLinkingModuleThenSegmentIsPatchedAndImplicitArgsAreNotRequired) { auto pModule = std::make_unique(device, nullptr, ModuleType::User); EXPECT_EQ(nullptr, neoDevice->getDebugger()); char data[64]{}; auto kernelInfo = new KernelInfo(); kernelInfo->heapInfo.KernelHeapSize = 64; kernelInfo->heapInfo.pKernelHeap = data; std::unique_ptr> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)}; kernelImmData->initialize(kernelInfo, device, 0, nullptr, nullptr, false); kernelImmData->kernelDescriptor->kernelAttributes.flags.useStackCalls = false; auto isaCpuPtr = reinterpret_cast(kernelImmData->isaGraphicsAllocation->getUnderlyingBuffer()); pModule->kernelImmDatas.push_back(std::move(kernelImmData)); pModule->translationUnit->programInfo.kernelInfos.push_back(kernelInfo); auto linkerInput = std::make_unique<::WhiteBox>(); linkerInput->traits.requiresPatchingOfInstructionSegments = true; linkerInput->relocations.push_back({{implicitArgsRelocationSymbolNames[0], 0x8, LinkerInput::RelocationInfo::Type::AddressLow, SegmentType::Instructions}}); pModule->translationUnit->programInfo.linkerInput = std::move(linkerInput); EXPECT_FALSE(kernelInfo->kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs); auto status = pModule->linkBinary(); EXPECT_TRUE(status); EXPECT_EQ(0u, *reinterpret_cast(ptrOffset(isaCpuPtr, 0x8))); EXPECT_FALSE(kernelInfo->kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs); } using ModuleIsaCopyTest = Test; TEST_F(ModuleIsaCopyTest, whenModuleIsInitializedThenIsaIsCopied) { MockImmutableMemoryManager *mockMemoryManager = static_cast(device->getNEODevice()->getMemoryManager()); uint32_t perHwThreadPrivateMemorySizeRequested = 32u; bool isInternal = false; std::unique_ptr mockKernelImmData = std::make_unique(perHwThreadPrivateMemorySizeRequested); uint32_t previouscopyMemoryToAllocationCalledTimes = mockMemoryManager->copyMemoryToAllocationCalledTimes; createModuleFromBinary(perHwThreadPrivateMemorySizeRequested, isInternal, mockKernelImmData.get()); uint32_t numOfKernels = static_cast(module->getKernelImmutableDataVector().size()); const uint32_t numOfGlobalBuffers = 1; EXPECT_EQ(previouscopyMemoryToAllocationCalledTimes + numOfGlobalBuffers + numOfKernels, mockMemoryManager->copyMemoryToAllocationCalledTimes); for (auto &kid : module->getKernelImmutableDataVector()) { EXPECT_TRUE(kid->isIsaCopiedToAllocation()); } } using ModuleWithZebinTest = Test; TEST_F(ModuleWithZebinTest, givenNoZebinThenSegmentsAreEmpty) { auto segments = module->getZebinSegments(); EXPECT_EQ(std::numeric_limits::max(), segments.constData.address); EXPECT_EQ(0ULL, segments.constData.size); EXPECT_EQ(std::numeric_limits::max(), segments.varData.address); EXPECT_EQ(0ULL, segments.varData.size); EXPECT_EQ(std::numeric_limits::max(), segments.stringData.address); EXPECT_EQ(0ULL, segments.stringData.size); EXPECT_TRUE(segments.nameToSegMap.empty()); } TEST_F(ModuleWithZebinTest, givenZebinSegmentsThenSegmentsArePopulated) { module->addSegments(); auto segments = module->getZebinSegments(); auto checkGPUSeg = [](NEO::GraphicsAllocation *alloc, NEO::Debug::Segments::Segment segment) { EXPECT_EQ(alloc->getGpuAddress(), segment.address); EXPECT_EQ(alloc->getUnderlyingBufferSize(), segment.size); }; checkGPUSeg(module->translationUnit->globalConstBuffer, segments.constData); checkGPUSeg(module->translationUnit->globalConstBuffer, segments.varData); checkGPUSeg(module->kernelImmDatas[0]->getIsaGraphicsAllocation(), segments.nameToSegMap["kernel"]); EXPECT_EQ(reinterpret_cast(module->translationUnit->programInfo.globalStrings.initData), segments.stringData.address); EXPECT_EQ(module->translationUnit->programInfo.globalStrings.size, segments.stringData.size); } TEST_F(ModuleWithZebinTest, givenValidZebinWhenGettingDebugInfoThenDebugZebinIsCreatedAndReturned) { module->addEmptyZebin(); size_t debugDataSize; module->getDebugInfo(&debugDataSize, nullptr); auto debugData = std::make_unique(debugDataSize); ze_result_t retCode = module->getDebugInfo(&debugDataSize, debugData.get()); ASSERT_NE(nullptr, module->translationUnit->debugData.get()); EXPECT_EQ(0, memcmp(module->translationUnit->debugData.get(), debugData.get(), debugDataSize)); EXPECT_EQ(retCode, ZE_RESULT_SUCCESS); } TEST_F(ModuleWithZebinTest, givenValidZebinAndPassedDataSmallerThanDebugDataThenErrorIsReturned) { module->addEmptyZebin(); size_t debugDataSize; module->getDebugInfo(&debugDataSize, nullptr); auto debugData = std::make_unique(debugDataSize); debugDataSize = 0; ze_result_t errorCode = module->getDebugInfo(&debugDataSize, debugData.get()); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, errorCode); } TEST_F(ModuleWithZebinTest, givenNonZebinaryFormatWhenGettingDebugInfoThenDebugZebinIsNotCreated) { size_t mockProgramSize = sizeof(Elf::ElfFileHeader); module->translationUnit->unpackedDeviceBinary = std::make_unique(mockProgramSize); module->translationUnit->unpackedDeviceBinarySize = mockProgramSize; size_t debugDataSize; ze_result_t retCode = module->getDebugInfo(&debugDataSize, nullptr); EXPECT_EQ(debugDataSize, 0u); EXPECT_EQ(retCode, ZE_RESULT_SUCCESS); } HWTEST_F(ModuleWithZebinTest, givenZebinWithKernelCallingExternalFunctionThenUpdateKernelsBarrierCount) { ZebinTestData::ZebinWithExternalFunctionsInfo zebin; zebin.setProductFamily(static_cast(device->getHwInfo().platform.eProductFamily)); ze_module_desc_t moduleDesc = {}; moduleDesc.format = ZE_MODULE_FORMAT_NATIVE; moduleDesc.pInputModule = zebin.storage.data(); moduleDesc.inputSize = zebin.storage.size(); ModuleBuildLog *moduleBuildLog = nullptr; auto module = std::unique_ptr(new L0::ModuleImp(device, moduleBuildLog, ModuleType::User)); ASSERT_NE(nullptr, module.get()); auto moduleInitSuccess = module->initialize(&moduleDesc, device->getNEODevice()); EXPECT_TRUE(moduleInitSuccess); const auto &kernImmData = module->getKernelImmutableData("kernel"); ASSERT_NE(nullptr, kernImmData); EXPECT_EQ(zebin.barrierCount, kernImmData->getDescriptor().kernelAttributes.barrierCount); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/printf_handler/000077500000000000000000000000001422164147700303125ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/printf_handler/CMakeLists.txt000066400000000000000000000003621422164147700330530ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_printf_handler.cpp ) test_printf_handler.cpp000066400000000000000000000016071422164147700350010ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/printf_handler/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/printf_handler/printf_handler.h" #include "level_zero/core/test/unit_tests/mocks/mock_device.h" namespace L0 { namespace ult { TEST(PrintfHandler, whenPrintfBufferIscreatedThenCorrectAllocationTypeIsUsed) { NEO::Device *neoDevice(NEO::MockDevice::createWithNewExecutionEnvironment(NEO::defaultHwInfo.get(), 0)); Mock l0Device(neoDevice, neoDevice->getExecutionEnvironment()); auto allocation = PrintfHandler::createPrintfBuffer(&l0Device); EXPECT_EQ(NEO::AllocationType::PRINTF_SURFACE, allocation->getAllocationType()); neoDevice->getMemoryManager()->freeGraphicsMemory(allocation); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/sampler/000077500000000000000000000000001422164147700267565ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/sampler/CMakeLists.txt000066400000000000000000000003531422164147700315170ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_sampler.cpp ) compute-runtime-22.14.22890/level_zero/core/test/unit_tests/sources/sampler/test_sampler.cpp000066400000000000000000000234661422164147700321770ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/utilities/numeric.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/sampler/sampler_hw.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_sampler.h" namespace L0 { namespace ult { const auto samplerAddressMode = ::testing::Values( ZE_SAMPLER_ADDRESS_MODE_NONE, ZE_SAMPLER_ADDRESS_MODE_REPEAT, ZE_SAMPLER_ADDRESS_MODE_CLAMP, ZE_SAMPLER_ADDRESS_MODE_MIRROR, ZE_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER); const auto samplerFilterMode = ::testing::Values( ZE_SAMPLER_FILTER_MODE_NEAREST, ZE_SAMPLER_FILTER_MODE_LINEAR); const auto samplerIsNormalized = ::testing::Values( true, false); using SamplerCreateSupport = IsWithinProducts; class SamplerCreateTest : public Test, public ::testing::WithParamInterface> { public: void SetUp() override { Test::SetUp(); } void TearDown() override { Test::TearDown(); } }; HWTEST2_P(SamplerCreateTest, givenDifferentDescriptorValuesThenSamplerIsCorrectlyCreated, SamplerCreateSupport) { using SAMPLER_STATE = typename FamilyType::SAMPLER_STATE; ze_sampler_address_mode_t addressMode = std::get<0>(GetParam()); ze_sampler_filter_mode_t filterMode = std::get<1>(GetParam()); ze_bool_t isNormalized = std::get<2>(GetParam()); ze_sampler_desc_t desc = {}; desc.addressMode = addressMode; desc.filterMode = filterMode; desc.isNormalized = isNormalized; auto sampler = new MockSamplerHw(); EXPECT_NE(nullptr, sampler); sampler->initialize(device, &desc); EXPECT_EQ(SAMPLER_STATE::LOD_PRECLAMP_MODE::LOD_PRECLAMP_MODE_OGL, sampler->samplerState.getLodPreclampMode()); if (isNormalized == static_cast(true)) { EXPECT_FALSE(sampler->samplerState.getNonNormalizedCoordinateEnable()); } else { EXPECT_TRUE(sampler->samplerState.getNonNormalizedCoordinateEnable()); } if (addressMode == ZE_SAMPLER_ADDRESS_MODE_NONE) { EXPECT_EQ(SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP_BORDER, sampler->samplerState.getTcxAddressControlMode()); EXPECT_EQ(SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP_BORDER, sampler->samplerState.getTcyAddressControlMode()); EXPECT_EQ(SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP_BORDER, sampler->samplerState.getTczAddressControlMode()); } else if (addressMode == ZE_SAMPLER_ADDRESS_MODE_REPEAT) { EXPECT_EQ(SAMPLER_STATE::TEXTURE_COORDINATE_MODE_WRAP, sampler->samplerState.getTcxAddressControlMode()); EXPECT_EQ(SAMPLER_STATE::TEXTURE_COORDINATE_MODE_WRAP, sampler->samplerState.getTcyAddressControlMode()); EXPECT_EQ(SAMPLER_STATE::TEXTURE_COORDINATE_MODE_WRAP, sampler->samplerState.getTczAddressControlMode()); } else if (addressMode == ZE_SAMPLER_ADDRESS_MODE_CLAMP) { EXPECT_EQ(SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP, sampler->samplerState.getTcxAddressControlMode()); EXPECT_EQ(SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP, sampler->samplerState.getTcyAddressControlMode()); EXPECT_EQ(SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP, sampler->samplerState.getTczAddressControlMode()); } else if (addressMode == ZE_SAMPLER_ADDRESS_MODE_MIRROR) { EXPECT_EQ(SAMPLER_STATE::TEXTURE_COORDINATE_MODE_MIRROR, sampler->samplerState.getTcxAddressControlMode()); EXPECT_EQ(SAMPLER_STATE::TEXTURE_COORDINATE_MODE_MIRROR, sampler->samplerState.getTcyAddressControlMode()); EXPECT_EQ(SAMPLER_STATE::TEXTURE_COORDINATE_MODE_MIRROR, sampler->samplerState.getTczAddressControlMode()); } else if (addressMode == ZE_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER) { EXPECT_EQ(SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP_BORDER, sampler->samplerState.getTcxAddressControlMode()); EXPECT_EQ(SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP_BORDER, sampler->samplerState.getTcyAddressControlMode()); EXPECT_EQ(SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP_BORDER, sampler->samplerState.getTczAddressControlMode()); } if (filterMode == ZE_SAMPLER_FILTER_MODE_NEAREST) { EXPECT_EQ(SAMPLER_STATE::MIN_MODE_FILTER_NEAREST, sampler->samplerState.getMinModeFilter()); EXPECT_EQ(SAMPLER_STATE::MAG_MODE_FILTER_NEAREST, sampler->samplerState.getMagModeFilter()); EXPECT_EQ(SAMPLER_STATE::MIP_MODE_FILTER_NEAREST, sampler->samplerState.getMipModeFilter()); EXPECT_FALSE(sampler->samplerState.getRAddressMinFilterRoundingEnable()); EXPECT_FALSE(sampler->samplerState.getRAddressMagFilterRoundingEnable()); EXPECT_FALSE(sampler->samplerState.getVAddressMinFilterRoundingEnable()); EXPECT_FALSE(sampler->samplerState.getVAddressMagFilterRoundingEnable()); EXPECT_FALSE(sampler->samplerState.getUAddressMinFilterRoundingEnable()); EXPECT_FALSE(sampler->samplerState.getUAddressMagFilterRoundingEnable()); } else if (filterMode == ZE_SAMPLER_FILTER_MODE_LINEAR) { EXPECT_EQ(SAMPLER_STATE::MIN_MODE_FILTER_LINEAR, sampler->samplerState.getMinModeFilter()); EXPECT_EQ(SAMPLER_STATE::MAG_MODE_FILTER_LINEAR, sampler->samplerState.getMagModeFilter()); EXPECT_EQ(SAMPLER_STATE::MIP_MODE_FILTER_NEAREST, sampler->samplerState.getMipModeFilter()); EXPECT_TRUE(sampler->samplerState.getRAddressMinFilterRoundingEnable()); EXPECT_TRUE(sampler->samplerState.getRAddressMagFilterRoundingEnable()); EXPECT_TRUE(sampler->samplerState.getVAddressMinFilterRoundingEnable()); EXPECT_TRUE(sampler->samplerState.getVAddressMagFilterRoundingEnable()); EXPECT_TRUE(sampler->samplerState.getUAddressMinFilterRoundingEnable()); EXPECT_TRUE(sampler->samplerState.getUAddressMagFilterRoundingEnable()); } NEO::FixedU4D8 minLodValue = NEO::FixedU4D8(std::min(sampler->getGenSamplerMaxLod(), sampler->lodMin)); NEO::FixedU4D8 maxLodValue = NEO::FixedU4D8(std::min(sampler->getGenSamplerMaxLod(), sampler->lodMax)); EXPECT_EQ(minLodValue.getRawAccess(), sampler->samplerState.getMinLod()); EXPECT_EQ(maxLodValue.getRawAccess(), sampler->samplerState.getMaxLod()); sampler->destroy(); } INSTANTIATE_TEST_CASE_P(SamplerDescCombinations, SamplerCreateTest, ::testing::Combine(samplerAddressMode, samplerFilterMode, samplerIsNormalized)); using ContextCreateSamplerTest = Test; HWTEST2_F(ContextCreateSamplerTest, givenDifferentDescriptorValuesThenSamplerIsCorrectlyCreated, SamplerCreateSupport) { ze_sampler_address_mode_t addressMode = ZE_SAMPLER_ADDRESS_MODE_NONE; ze_sampler_filter_mode_t filterMode = ZE_SAMPLER_FILTER_MODE_LINEAR; ze_bool_t isNormalized = false; ze_sampler_desc_t desc = {}; desc.addressMode = addressMode; desc.filterMode = filterMode; desc.isNormalized = isNormalized; ze_sampler_handle_t hSampler; ze_result_t res = context->createSampler(device, &desc, &hSampler); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto sampler = reinterpret_cast(L0::Sampler::fromHandle(hSampler)); EXPECT_NE(nullptr, sampler); sampler->destroy(); } HWTEST2_F(ContextCreateSamplerTest, givenInvalidHardwareFamilyThenSamplerIsNotCreated, SamplerCreateSupport) { ze_sampler_address_mode_t addressMode = ZE_SAMPLER_ADDRESS_MODE_NONE; ze_sampler_filter_mode_t filterMode = ZE_SAMPLER_FILTER_MODE_LINEAR; ze_bool_t isNormalized = false; ze_sampler_desc_t desc = {}; desc.addressMode = addressMode; desc.filterMode = filterMode; desc.isNormalized = isNormalized; L0::Sampler *sampler = Sampler::create(IGFX_MAX_PRODUCT, device, &desc); EXPECT_EQ(nullptr, sampler); } HWTEST2_F(ContextCreateSamplerTest, givenInvalidAddressModeThenSamplerIsNotCreated, SamplerCreateSupport) { auto addressModeArray = std::make_unique(sizeof(ze_sampler_address_mode_t)); addressModeArray[0] = 99; // out of range value auto addressMode = *reinterpret_cast(addressModeArray.get()); ze_sampler_filter_mode_t filterMode = ZE_SAMPLER_FILTER_MODE_LINEAR; ze_bool_t isNormalized = false; ze_sampler_desc_t desc = {}; desc.addressMode = addressMode; desc.filterMode = filterMode; desc.isNormalized = isNormalized; L0::Sampler *sampler = Sampler::create(gfxCoreFamily, device, &desc); EXPECT_EQ(nullptr, sampler); } HWTEST2_F(ContextCreateSamplerTest, givenInvalidFilterModeThenSamplerIsNotCreated, SamplerCreateSupport) { ze_sampler_address_mode_t addressMode = ZE_SAMPLER_ADDRESS_MODE_NONE; auto filterModeArray = std::make_unique(sizeof(ze_sampler_filter_mode_t)); filterModeArray[0] = 99; // out of range value ze_sampler_filter_mode_t filterMode = *reinterpret_cast(filterModeArray.get()); ze_bool_t isNormalized = false; ze_sampler_desc_t desc = {}; desc.addressMode = addressMode; desc.filterMode = filterMode; desc.isNormalized = isNormalized; L0::Sampler *sampler = Sampler::create(gfxCoreFamily, device, &desc); EXPECT_EQ(nullptr, sampler); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/white_box.h000066400000000000000000000011311422164147700257650ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once namespace L0 { namespace ult { template struct WhiteBox : public Type { using Type::Type; }; template WhiteBox *whitebox_cast(Type *obj) { return static_cast *>(obj); } template WhiteBox &whitebox_cast(Type &obj) { return static_cast &>(obj); } template Type *blackbox_cast(WhiteBox *obj) { return static_cast(obj); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/xe_hp_core/000077500000000000000000000000001422164147700257435ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/xe_hp_core/CMakeLists.txt000066400000000000000000000013461422164147700305070ustar00rootroot00000000000000# # Copyright (C) 2021-2022 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_XE_HP_CORE) target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_xe_hp_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdqueue_debugger_xe_hp_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdqueue_enqueuecommandlist_xe_hp_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_device_xe_hp_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_l0_hw_helper_xe_hp_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_module_xe_hp_core.cpp ) add_subdirectoriesL0(${CMAKE_CURRENT_SOURCE_DIR} "*") endif() enable_l0_mocks_xe_hp_core.cpp000066400000000000000000000006031422164147700335770ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/xe_hp_core/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/test/unit_tests/mocks/mock_l0_debugger.h" namespace NEO { struct XeHpFamily; using GfxFamily = XeHpFamily; } // namespace NEO namespace L0 { namespace ult { static MockDebuggerL0HwPopulateFactory mockDebuggerXE_HP_CORE; } } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/xe_hp_core/test_cmdlist_xe_hp_core.cpp000066400000000000000000001151351422164147700333460ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/l3_range.h" #include "shared/source/helpers/register_offsets.h" #include "shared/source/helpers/state_base_address.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/fixtures/module_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_kernel.h" #include "level_zero/core/test/unit_tests/mocks/mock_module.h" namespace L0 { namespace ult { using CommandListCreate = Test; using CommandListAppendLaunchKernel = Test; using CommandListAppendLaunchKernelWithAtomics = Test; HWTEST2_F(CommandListAppendLaunchKernelWithAtomics, givenKernelWithNoGlobalAtomicsThenLastSentGlobalAtomicsInContainerStaysFalse, IsXeHpCore) { Mock<::L0::Kernel> kernel; auto pMockModule = std::unique_ptr(new Mock(device, nullptr)); kernel.module = pMockModule.get(); kernel.setGroupSize(1, 1, 1); ze_group_count_t groupCount{8, 1, 1}; auto pCommandList = std::make_unique>>(); auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); ASSERT_EQ(ZE_RESULT_SUCCESS, result); pCommandList->partitionCount = 2; auto &kernelAttributes = kernel.immutableData.kernelDescriptor->kernelAttributes; kernelAttributes.flags.useGlobalAtomics = false; EXPECT_FALSE(pCommandList->commandContainer.lastSentUseGlobalAtomics); result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, nullptr, false, false, false); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_FALSE(pCommandList->commandContainer.lastSentUseGlobalAtomics); } HWTEST2_F(CommandListAppendLaunchKernelWithAtomics, givenKernelWithGlobalAtomicsThenLastSentGlobalAtomicsInContainerIsSetToTrue, IsXeHpCore) { Mock<::L0::Kernel> kernel; auto pMockModule = std::unique_ptr(new Mock(device, nullptr)); kernel.module = pMockModule.get(); kernel.setGroupSize(1, 1, 1); ze_group_count_t groupCount{8, 1, 1}; auto pCommandList = std::make_unique>>(); auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); ASSERT_EQ(ZE_RESULT_SUCCESS, result); pCommandList->partitionCount = 2; auto &kernelAttributes = kernel.immutableData.kernelDescriptor->kernelAttributes; kernelAttributes.flags.useGlobalAtomics = true; EXPECT_FALSE(pCommandList->commandContainer.lastSentUseGlobalAtomics); result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, nullptr, false, false, false); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_TRUE(pCommandList->commandContainer.lastSentUseGlobalAtomics); } HWTEST2_F(CommandListAppendLaunchKernelWithAtomics, givenKernelWithGlobalAtomicsAndLastSentGlobalAtomicsInContainerTrueThenLastSentGlobalAtomicsStaysTrue, IsXeHpCore) { Mock<::L0::Kernel> kernel; auto pMockModule = std::unique_ptr(new Mock(device, nullptr)); kernel.module = pMockModule.get(); kernel.setGroupSize(1, 1, 1); ze_group_count_t groupCount{8, 1, 1}; auto pCommandList = std::make_unique>>(); auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); ASSERT_EQ(ZE_RESULT_SUCCESS, result); pCommandList->partitionCount = 2; auto &kernelAttributes = kernel.immutableData.kernelDescriptor->kernelAttributes; kernelAttributes.flags.useGlobalAtomics = true; pCommandList->commandContainer.lastSentUseGlobalAtomics = true; result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, nullptr, false, false, false); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_TRUE(pCommandList->commandContainer.lastSentUseGlobalAtomics); } HWTEST2_F(CommandListAppendLaunchKernelWithAtomics, givenKernelWithNoGlobalAtomicsAndLastSentGlobalAtomicsInContainerTrueThenLastSentGlobalAtomicsIsSetToFalse, IsXeHpCore) { Mock<::L0::Kernel> kernel; auto pMockModule = std::unique_ptr(new Mock(device, nullptr)); kernel.module = pMockModule.get(); kernel.setGroupSize(1, 1, 1); ze_group_count_t groupCount{8, 1, 1}; auto pCommandList = std::make_unique>>(); auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); ASSERT_EQ(ZE_RESULT_SUCCESS, result); pCommandList->partitionCount = 2; auto &kernelAttributes = kernel.immutableData.kernelDescriptor->kernelAttributes; kernelAttributes.flags.useGlobalAtomics = false; pCommandList->commandContainer.lastSentUseGlobalAtomics = true; result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, nullptr, false, false, false); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_FALSE(pCommandList->commandContainer.lastSentUseGlobalAtomics); } HWTEST2_F(CommandListAppendLaunchKernelWithAtomics, givenKernelWithGlobalAtomicsAndNoImplicitScalingThenLastSentGlobalAtomicsInContainerStaysFalse, IsXeHpCore) { Mock<::L0::Kernel> kernel; auto pMockModule = std::unique_ptr(new Mock(device, nullptr)); kernel.module = pMockModule.get(); kernel.setGroupSize(1, 1, 1); ze_group_count_t groupCount{8, 1, 1}; auto pCommandList = std::make_unique>>(); auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto &kernelAttributes = kernel.immutableData.kernelDescriptor->kernelAttributes; kernelAttributes.flags.useGlobalAtomics = true; EXPECT_FALSE(pCommandList->commandContainer.lastSentUseGlobalAtomics); result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, nullptr, false, false, false); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_FALSE(pCommandList->commandContainer.lastSentUseGlobalAtomics); } using MultTileCommandListAppendLaunchKernelL3Flush = Test>; HWTEST2_F(MultTileCommandListAppendLaunchKernelL3Flush, givenKernelWithRegularEventAndWithWalkerPartitionThenProperCommandsEncoded, IsXeHpCore) { using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; Mock<::L0::Kernel> kernel; auto pMockModule = std::unique_ptr(new Mock(device, nullptr)); kernel.module = pMockModule.get(); kernel.setGroupSize(1, 1, 1); ze_group_count_t groupCount{8, 1, 1}; auto pCommandList = std::make_unique>>(); auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); ASSERT_EQ(ZE_RESULT_SUCCESS, result); pCommandList->partitionCount = 2; ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, event->toHandle(), false, false, false); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto gpuAddress = event->getGpuAddress(device) + (pCommandList->partitionCount * event->getSinglePacketSize()) + event->getContextEndOffset(); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(pCommandList->commandContainer.getCommandStream()->getCpuBase(), 0), pCommandList->commandContainer.getCommandStream()->getUsed())); EXPECT_EQ(2u, pCommandList->partitionCount); auto itorLri = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(cmdList.end(), itorLri); auto itorPC = findAll(cmdList.begin(), cmdList.end()); ASSERT_NE(0u, itorPC.size()); uint32_t postSyncCount = 0u; for (auto it : itorPC) { auto cmd = genCmdCast(*it); if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { EXPECT_EQ(gpuAddress, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*cmd)); EXPECT_TRUE(cmd->getWorkloadPartitionIdOffsetEnable()); postSyncCount++; } } ASSERT_LE(1u, postSyncCount); } HWTEST2_F(MultTileCommandListAppendLaunchKernelL3Flush, givenKernelWithTimestampEventAndWithWalkerPartitionThenProperCommandsEncoded, IsXeHpCore) { using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; Mock<::L0::Kernel> kernel; auto pMockModule = std::unique_ptr(new Mock(device, nullptr)); kernel.module = pMockModule.get(); kernel.setGroupSize(1, 1, 1); ze_group_count_t groupCount{8, 1, 1}; auto pCommandList = std::make_unique>>(); auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); ASSERT_EQ(ZE_RESULT_SUCCESS, result); pCommandList->partitionCount = 2; ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, event->toHandle(), false, false, false); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto gpuAddress = event->getGpuAddress(device) + (pCommandList->partitionCount * event->getSinglePacketSize()) + event->getContextEndOffset(); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(pCommandList->commandContainer.getCommandStream()->getCpuBase(), 0), pCommandList->commandContainer.getCommandStream()->getUsed())); EXPECT_EQ(2u, pCommandList->partitionCount); auto itorLri = findAll(cmdList.begin(), cmdList.end()); EXPECT_EQ(0u, itorLri.size()); auto itorPC = findAll(cmdList.begin(), cmdList.end()); ASSERT_NE(0u, itorPC.size()); uint32_t postSyncCount = 0u; for (auto it : itorPC) { auto cmd = genCmdCast(*it); if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { EXPECT_EQ(gpuAddress, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*cmd)); EXPECT_TRUE(cmd->getWorkloadPartitionIdOffsetEnable()); postSyncCount++; } } ASSERT_LE(1u, postSyncCount); } using CommandListAppendLaunchKernelL3Flush = Test; HWTEST2_F(CommandListAppendLaunchKernelL3Flush, givenKernelWithEventAndWithoutWalkerPartitionThenProperCommandsEncoded, IsXeHpCore) { using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; Mock<::L0::Kernel> kernel; auto pMockModule = std::unique_ptr(new Mock(device, nullptr)); kernel.module = pMockModule.get(); kernel.setGroupSize(1, 1, 1); ze_group_count_t groupCount{8, 1, 1}; auto pCommandList = std::make_unique>>(); auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); ASSERT_EQ(ZE_RESULT_SUCCESS, result); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, event->toHandle(), false, false, false); EXPECT_EQ(ZE_RESULT_SUCCESS, result); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(pCommandList->commandContainer.getCommandStream()->getCpuBase(), 0), pCommandList->commandContainer.getCommandStream()->getUsed())); EXPECT_EQ(1u, pCommandList->partitionCount); auto itorLri = find(cmdList.begin(), cmdList.end()); ASSERT_EQ(cmdList.end(), itorLri); } HWTEST2_F(CommandListAppendLaunchKernelL3Flush, givenKernelWithEventHostScopeWithoutWalkerPartitionThenEventL3FlushWaSet, IsXeHpCore) { using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; Mock<::L0::Kernel> kernel; auto pMockModule = std::unique_ptr(new Mock(device, nullptr)); kernel.module = pMockModule.get(); kernel.setGroupSize(1, 1, 1); ze_group_count_t groupCount{8, 1, 1}; auto pCommandList = std::make_unique>>(); auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); ASSERT_EQ(ZE_RESULT_SUCCESS, result); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, event->toHandle(), false, false, false); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(true, event->l3FlushWaApplied); } HWTEST2_F(CommandListAppendLaunchKernelL3Flush, givenKernelWithEventZeroScopeWithoutWalkerPartitionThenEventL3FlushWaNotSet, IsXeHpCore) { using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; Mock<::L0::Kernel> kernel; auto pMockModule = std::unique_ptr(new Mock(device, nullptr)); kernel.module = pMockModule.get(); kernel.setGroupSize(1, 1, 1); ze_group_count_t groupCount{8, 1, 1}; auto pCommandList = std::make_unique>>(); auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); ASSERT_EQ(ZE_RESULT_SUCCESS, result); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, event->toHandle(), false, false, false); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(false, event->l3FlushWaApplied); } HWTEST2_F(CommandListAppendLaunchKernelL3Flush, givenKernelWithEventHostScopeWithoutWalkerPartitionThenSkipOddPacketsDuringQuery, IsXeHpCore) { using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; class MockTimestampPackets32 : public TimestampPackets { public: using typename TimestampPackets::Packet; }; Mock<::L0::Kernel> kernel; auto pMockModule = std::unique_ptr(new Mock(device, nullptr)); kernel.module = pMockModule.get(); kernel.setGroupSize(1, 1, 1); ze_group_count_t groupCount{8, 1, 1}; auto pCommandList = std::make_unique>>(); auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); ASSERT_EQ(ZE_RESULT_SUCCESS, result); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, event->toHandle(), false, false, false); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(true, event->l3FlushWaApplied); EXPECT_EQ(2u, event->getPacketsInUse()); typename MockTimestampPackets32::Packet data[3] = {}; data[0].contextStart = 3u; data[0].contextEnd = 4u; data[0].globalStart = 5u; data[0].globalEnd = 6u; data[1].contextStart = 2u; data[1].contextEnd = 6u; data[1].globalStart = 4u; data[1].globalEnd = 8u; event->hostAddress = &data; ze_kernel_timestamp_result_t tsResult = {}; event->queryKernelTimestamp(&tsResult); EXPECT_EQ(data[0].contextStart, tsResult.context.kernelStart); EXPECT_EQ(data[0].contextEnd, tsResult.context.kernelEnd); EXPECT_EQ(data[0].globalStart, tsResult.global.kernelStart); EXPECT_EQ(data[0].globalEnd, tsResult.global.kernelEnd); } HWTEST2_F(CommandListCreate, WhenCreatingCommandListThenBindingTablePoolAllocAddedToBatchBuffer, IsXeHpCore) { using _3DSTATE_BINDING_TABLE_POOL_ALLOC = typename FamilyType::_3DSTATE_BINDING_TABLE_POOL_ALLOC; ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Compute, 0u, returnValue)); auto &commandContainer = commandList->commandContainer; auto gmmHelper = commandContainer.getDevice()->getGmmHelper(); ASSERT_NE(nullptr, commandContainer.getCommandStream()); auto usedSpaceBefore = commandContainer.getCommandStream()->getUsed(); auto result = commandList->close(); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandContainer.getCommandStream()->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); auto itor = find<_3DSTATE_BINDING_TABLE_POOL_ALLOC *>(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itor); { uint32_t streamBuffer[50] = {}; NEO::LinearStream linearStream(streamBuffer, sizeof(streamBuffer)); NEO::StateBaseAddressHelper::programBindingTableBaseAddress( linearStream, *commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE), gmmHelper); auto expectedCommand = reinterpret_cast<_3DSTATE_BINDING_TABLE_POOL_ALLOC *>(streamBuffer); auto programmedCommand = genCmdCast<_3DSTATE_BINDING_TABLE_POOL_ALLOC *>(*itor); EXPECT_EQ(0, memcmp(expectedCommand, programmedCommand, sizeof(_3DSTATE_BINDING_TABLE_POOL_ALLOC))); } } HWTEST2_F(CommandListCreate, givenNotCopyCommandListWhenProfilingEventBeforeCommandThenStoreRegMemAdded, IsXeHpCore) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using MI_LOAD_REGISTER_REG = typename GfxFamily::MI_LOAD_REGISTER_REG; auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Compute, 0u); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = 0; eventDesc.wait = 0; ze_result_t result = ZE_RESULT_SUCCESS; auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); commandList->appendEventForProfiling(event->toHandle(), true); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); auto cmd = genCmdCast(*itor); EXPECT_EQ(cmd->getSourceRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW); itor++; itor = find(itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); cmd = genCmdCast(*itor); EXPECT_EQ(cmd->getSourceRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW); } HWTEST2_F(CommandListCreate, givenNotCopyCommandListWhenProfilingEventAfterCommandThenPipeControlAndStoreRegMemAdded, IsXeHpCore) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using MI_LOAD_REGISTER_REG = typename GfxFamily::MI_LOAD_REGISTER_REG; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Compute, 0u); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = 0; eventDesc.wait = 0; ze_result_t result = ZE_RESULT_SUCCESS; auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); commandList->appendEventForProfiling(event->toHandle(), false); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); itor++; itor = find(itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); auto cmd = genCmdCast(*itor); EXPECT_EQ(cmd->getSourceRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW); itor++; itor = find(itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); cmd = genCmdCast(*itor); EXPECT_EQ(cmd->getSourceRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW); } HWTEST2_F(CommandListCreate, givenCopyCommandListWhenProfilingEventThenStoreRegCommandIsAdded, IsXeHpCore) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; ze_result_t result = ZE_RESULT_SUCCESS; auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); commandList->appendEventForProfiling(event->toHandle(), false); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); } HWTEST2_F(CommandListCreate, givenAllocationsWhenAppendRangesBarrierThenL3ControlIsProgrammed, IsXeHpCore) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using L3_CONTROL = typename GfxFamily::L3_CONTROL; auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); uint64_t gpuAddress = 0x1200; void *buffer = reinterpret_cast(gpuAddress); size_t size = 0x1100; NEO::MockGraphicsAllocation mockAllocation(buffer, gpuAddress, size); NEO::SvmAllocationData allocData(0); allocData.size = size; allocData.gpuAllocations.addAllocation(&mockAllocation); device->getDriverHandle()->getSvmAllocsManager()->insertSVMAlloc(allocData); const void *ranges[] = {buffer}; const size_t sizes[] = {size}; commandList->applyMemoryRangesBarrier(1, sizes, ranges); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); EXPECT_EQ(cmdList.end(), ++itor); } HWTEST2_F(CommandListCreate, givenAllocationWithSizeTooBigForL3ControlWhenAppendRangesBarrierThenTwoL3ControlAreProgrammed, IsXeHpCore) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using L3_CONTROL = typename GfxFamily::L3_CONTROL; auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); uint64_t gpuAddress = 0x2000; void *buffer = reinterpret_cast(gpuAddress); size_t size = NEO::L3Range::maxSingleRange * (NEO::maxFlushSubrangeCount + 1); NEO::MockGraphicsAllocation mockAllocation(buffer, gpuAddress, size); NEO::SvmAllocationData allocData(0); allocData.size = size; allocData.gpuAllocations.addAllocation(&mockAllocation); device->getDriverHandle()->getSvmAllocsManager()->insertSVMAlloc(allocData); const void *ranges[] = {buffer}; const size_t sizes[] = {size}; commandList->applyMemoryRangesBarrier(1, sizes, ranges); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); EXPECT_NE(cmdList.end(), ++itor); } HWTEST2_F(CommandListCreate, givenRangeSizeTwiceBiggerThanAllocWhenAppendRangesBarrierThenL3ControlIsNotProgrammed, IsXeHpCore) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using L3_CONTROL = typename GfxFamily::L3_CONTROL; auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); uint64_t gpuAddress = 0x1200; void *buffer = reinterpret_cast(gpuAddress); size_t size = 0x1000; NEO::MockGraphicsAllocation mockAllocation(buffer, gpuAddress, size); NEO::SvmAllocationData allocData(0); allocData.size = size; allocData.gpuAllocations.addAllocation(&mockAllocation); device->getDriverHandle()->getSvmAllocsManager()->insertSVMAlloc(allocData); const void *ranges[] = {buffer}; const size_t sizes[] = {2 * size}; commandList->applyMemoryRangesBarrier(1, sizes, ranges); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(cmdList.end(), itor); } HWTEST2_F(CommandListCreate, givenRangeNotInSvmManagerThanAllocWhenAppendRangesBarrierThenL3ControlIsNotProgrammed, IsXeHpCore) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using L3_CONTROL = typename GfxFamily::L3_CONTROL; auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); uint64_t gpuAddress = 0x1200; void *buffer = reinterpret_cast(gpuAddress); size_t size = 0x1000; const void *ranges[] = {buffer}; const size_t sizes[] = {size}; commandList->applyMemoryRangesBarrier(1, sizes, ranges); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(cmdList.end(), itor); } HWTEST2_F(CommandListCreate, givenRangeNotAlignedToPageWhenAppendRangesBarrierThenCommandAdressIsAligned, IsXeHpCore) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using L3_CONTROL = typename GfxFamily::L3_CONTROL; using L3_FLUSH_ADDRESS_RANGE = typename GfxFamily::L3_FLUSH_ADDRESS_RANGE; auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); uint64_t gpuAddress = 0x1200; void *buffer = reinterpret_cast(gpuAddress); size_t size = 0x1100; NEO::MockGraphicsAllocation mockAllocation(buffer, gpuAddress, size); NEO::SvmAllocationData allocData(0); allocData.size = size; allocData.gpuAllocations.addAllocation(&mockAllocation); device->getDriverHandle()->getSvmAllocsManager()->insertSVMAlloc(allocData); const void *ranges[] = {buffer}; const size_t sizes[] = {size}; commandList->applyMemoryRangesBarrier(1, sizes, ranges); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); auto programmedCommand = genCmdCast(*itor); programmedCommand++; L3_FLUSH_ADDRESS_RANGE *l3Ranges = reinterpret_cast(programmedCommand); EXPECT_EQ(l3Ranges->getAddress(), alignDown(gpuAddress, MemoryConstants::pageSize)); } HWTEST2_F(CommandListCreate, givenRangeBetweenTwoPagesWhenAppendRangesBarrierThenAddressMaskIsCorrect, IsXeHpCore) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using L3_CONTROL = typename GfxFamily::L3_CONTROL; using L3_FLUSH_ADDRESS_RANGE = typename GfxFamily::L3_FLUSH_ADDRESS_RANGE; auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); uint64_t gpuAddress = 2 * MemoryConstants::pageSize + MemoryConstants::pageSize / 2; void *buffer = reinterpret_cast(gpuAddress); size_t size = MemoryConstants::pageSize / 2 + 1; NEO::MockGraphicsAllocation mockAllocation(buffer, gpuAddress, size); NEO::SvmAllocationData allocData(0); allocData.size = size; allocData.gpuAllocations.addAllocation(&mockAllocation); device->getDriverHandle()->getSvmAllocsManager()->insertSVMAlloc(allocData); const void *ranges[] = {buffer}; const size_t sizes[] = {size}; commandList->applyMemoryRangesBarrier(1, sizes, ranges); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); auto programmedCommand = genCmdCast(*itor); programmedCommand++; L3_FLUSH_ADDRESS_RANGE *l3Ranges = reinterpret_cast(programmedCommand); EXPECT_EQ(l3Ranges->getAddressMask(), NEO::L3Range::getMaskFromSize(2 * MemoryConstants::pageSize)); } HWTEST2_F(CommandListAppendLaunchKernel, givenEventWhenInvokingAppendLaunchKernelThenPostSyncIsAdded, IsXeHpCore) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA; using WALKER_TYPE = typename FamilyType::WALKER_TYPE; createKernel(); ze_result_t returnValue; std::unique_ptr commandList(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed(); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; eventPoolDesc.count = 1; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); ze_group_count_t groupCount{1, 1, 1}; auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, event->toHandle(), 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); EXPECT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; EXPECT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); bool postSyncFound = false; auto gpuAddress = event->getGpuAddress(device); auto itorPS = findAll(cmdList.begin(), cmdList.end()); ASSERT_NE(0u, itorPS.size()); for (auto it : itorPS) { auto cmd = genCmdCast(*it); auto &postSync = cmd->getPostSync(); EXPECT_EQ(POSTSYNC_DATA::OPERATION_WRITE_IMMEDIATE_DATA, postSync.getOperation()); EXPECT_EQ(gpuAddress, postSync.getDestinationAddress()); postSyncFound = true; } EXPECT_TRUE(postSyncFound); } HWTEST2_F(CommandListAppendLaunchKernel, givenTimestampEventWhenInvokingAppendLaunchKernelThenPostSyncIsAdded, IsXeHpCore) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA; using WALKER_TYPE = typename FamilyType::WALKER_TYPE; createKernel(); ze_result_t returnValue; std::unique_ptr commandList(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed(); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); ze_group_count_t groupCount{1, 1, 1}; auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, event->toHandle(), 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); EXPECT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; EXPECT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); bool postSyncFound = false; auto gpuAddress = event->getGpuAddress(device); auto itorPS = findAll(cmdList.begin(), cmdList.end()); ASSERT_NE(0u, itorPS.size()); for (auto it : itorPS) { auto cmd = genCmdCast(*it); auto &postSync = cmd->getPostSync(); EXPECT_EQ(POSTSYNC_DATA::OPERATION_WRITE_TIMESTAMP, postSync.getOperation()); EXPECT_EQ(gpuAddress, postSync.getDestinationAddress()); postSyncFound = true; } EXPECT_TRUE(postSyncFound); } } // namespace ult } // namespace L0 test_cmdqueue_debugger_xe_hp_core.cpp000066400000000000000000000170101422164147700352750ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/xe_hp_core/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/linear_stream.h" #include "shared/source/gen_common/reg_configs_common.h" #include "shared/source/helpers/preamble.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/cmdqueue/cmdqueue_hw.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h" #include "level_zero/core/test/unit_tests/sources/debugger/active_debugger_fixture.h" #include "level_zero/core/test/unit_tests/sources/debugger/l0_debugger_fixture.h" #include namespace L0 { namespace ult { using CommandQueueDebugCommandsForSldXeHP = Test; using CommandQueueDebugCommandsDebuggerL0XeHP = Test; XEHPTEST_F(CommandQueueDebugCommandsForSldXeHP, givenSteppingA0OrBWhenGlobalSipIsUsedThenMmioIsRestoredAtTheEndOfCmdBuffer) { using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue; const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily); std::array revisions = {hwInfoConfig.getHwRevIdFromStepping(REVID::REVISION_A0, hwInfo), hwInfoConfig.getHwRevIdFromStepping(REVID::REVISION_B, hwInfo)}; for (auto revision : revisions) { hwInfo.platform.usRevId = revision; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, deviceL0, device->getDefaultEngine().commandStreamReceiver, &queueDesc, false, false, returnValue)); ASSERT_NE(nullptr, commandQueue->commandStream); ze_command_list_handle_t commandLists[] = { CommandList::create(productFamily, deviceL0, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle()}; uint32_t globalSipFound = 0; uint32_t debugModeFound = 0; uint32_t tdCtlFound = 0; std::vector globalSip; for (uint32_t execCount = 0; execCount < 2; execCount++) { auto startPointer = ptrOffset(commandQueue->commandStream->getCpuBase(), commandQueue->commandStream->getUsed()); auto usedSpaceBefore = commandQueue->commandStream->getUsed(); auto result = commandQueue->executeCommandLists(1, commandLists, nullptr, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); commandQueue->synchronize(0); auto usedSpaceAfter = commandQueue->commandStream->getUsed(); EXPECT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, startPointer, usedSpaceAfter - usedSpaceBefore)); auto miLoadImm = findAll(cmdList.begin(), cmdList.end()); for (size_t i = 0; i < miLoadImm.size(); i++) { MI_LOAD_REGISTER_IMM *miLoad = genCmdCast(*miLoadImm[i]); ASSERT_NE(nullptr, miLoad); if (miLoad->getRegisterOffset() == GlobalSipRegister::registerOffset) { globalSip.push_back(miLoad); globalSipFound++; } else if (miLoad->getRegisterOffset() == 0x20d8u) { debugModeFound++; } else if (miLoad->getRegisterOffset() == TdDebugControlRegisterOffset::registerOffset) { tdCtlFound++; } } } EXPECT_EQ(1u, debugModeFound); EXPECT_EQ(1u, tdCtlFound); ASSERT_EQ(4u, globalSipFound); auto sipAddress = globalSip[0]->getDataDword(); auto sipAllocation = SipKernel::getSipKernel(*device).getSipAllocation(); EXPECT_EQ(sipAllocation->getGpuAddressToPatch(), sipAddress & 0xfffffff8); auto sipAddress2 = globalSip[1]->getDataDword(); EXPECT_EQ(0u, sipAddress2); auto sipAddress3 = globalSip[2]->getDataDword(); EXPECT_EQ(sipAllocation->getGpuAddressToPatch(), sipAddress3 & 0xfffffff8); auto sipAddress4 = globalSip[3]->getDataDword(); EXPECT_EQ(0u, sipAddress4); auto commandList = CommandList::fromHandle(commandLists[0]); commandList->destroy(); commandQueue->destroy(); } } XEHPTEST_F(CommandQueueDebugCommandsDebuggerL0XeHP, givenSteppingA0OrBWhenGlobalSipIsUsedThenMmioIsRestoredAtTheEndOfCmdBuffer) { using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue; const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily); std::array revisions = {hwInfoConfig.getHwRevIdFromStepping(REVID::REVISION_A0, hwInfo), hwInfoConfig.getHwRevIdFromStepping(REVID::REVISION_B, hwInfo)}; for (auto revision : revisions) { hwInfo.platform.usRevId = revision; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, false, false, returnValue)); ASSERT_NE(nullptr, commandQueue->commandStream); ze_command_list_handle_t commandLists[] = { CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle()}; uint32_t globalSipFound = 0; std::vector globalSip; for (uint32_t execCount = 0; execCount < 2; execCount++) { auto startPointer = ptrOffset(commandQueue->commandStream->getCpuBase(), commandQueue->commandStream->getUsed()); auto usedSpaceBefore = commandQueue->commandStream->getUsed(); auto result = commandQueue->executeCommandLists(1, commandLists, nullptr, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); commandQueue->synchronize(0); auto usedSpaceAfter = commandQueue->commandStream->getUsed(); EXPECT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, startPointer, usedSpaceAfter - usedSpaceBefore)); auto miLoadImm = findAll(cmdList.begin(), cmdList.end()); for (size_t i = 0; i < miLoadImm.size(); i++) { MI_LOAD_REGISTER_IMM *miLoad = genCmdCast(*miLoadImm[i]); ASSERT_NE(nullptr, miLoad); if (miLoad->getRegisterOffset() == GlobalSipRegister::registerOffset) { globalSip.push_back(miLoad); globalSipFound++; } } } ASSERT_EQ(4u, globalSipFound); auto sipAddress = globalSip[0]->getDataDword(); auto sipAllocation = SipKernel::getSipKernel(*neoDevice).getSipAllocation(); EXPECT_EQ(sipAllocation->getGpuAddressToPatch(), sipAddress & 0xfffffff8); auto sipAddress2 = globalSip[1]->getDataDword(); EXPECT_EQ(0u, sipAddress2); auto sipAddress3 = globalSip[2]->getDataDword(); EXPECT_EQ(sipAllocation->getGpuAddressToPatch(), sipAddress3 & 0xfffffff8); auto sipAddress4 = globalSip[3]->getDataDword(); EXPECT_EQ(0u, sipAddress4); auto commandList = CommandList::fromHandle(commandLists[0]); commandList->destroy(); commandQueue->destroy(); } } } // namespace ult } // namespace L0 test_cmdqueue_enqueuecommandlist_xe_hp_core.cpp000066400000000000000000000101271422164147700374150ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/xe_hp_core/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/linear_stream.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h" #include "level_zero/core/test/unit_tests/mocks/mock_device.h" #include "level_zero/core/test/unit_tests/mocks/mock_memory_manager.h" #include #include "gtest/gtest.h" namespace L0 { namespace ult { using CommandQueueExecuteCommandListsXE_HP_CORE = Test; XE_HP_CORE_TEST_F(CommandQueueExecuteCommandListsXE_HP_CORE, WhenExecutingCmdListsThenPipelineSelectAndCfeStateAreAddedToCmdBuffer) { const ze_command_queue_desc_t desc = {}; ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create( productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc, false, false, returnValue)); ASSERT_NE(nullptr, commandQueue->commandStream); auto usedSpaceBefore = commandQueue->commandStream->getUsed(); ze_command_list_handle_t commandLists[] = { CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle()}; uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]); auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandQueue->commandStream->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter)); using CFE_STATE = typename FamilyType::CFE_STATE; auto itorCFE = find(cmdList.begin(), cmdList.end()); ASSERT_NE(itorCFE, cmdList.end()); // Should have a PS before a CFE using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT; auto itorPS = find(cmdList.begin(), itorCFE); ASSERT_NE(itorPS, itorCFE); { auto cmd = genCmdCast(*itorPS); EXPECT_EQ(cmd->getMaskBits() & 3u, 3u); EXPECT_EQ(cmd->getPipelineSelection(), PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU); } CommandList::fromHandle(commandLists[0])->destroy(); commandQueue->destroy(); } XE_HP_CORE_TEST_F(CommandQueueExecuteCommandListsXE_HP_CORE, WhenExecutingCmdListsThenStateBaseAddressForGeneralStateBaseAddressIsNotAdded) { const ze_command_queue_desc_t desc = {}; ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create( productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc, false, false, returnValue)); ASSERT_NE(nullptr, commandQueue->commandStream); auto usedSpaceBefore = commandQueue->commandStream->getUsed(); ze_command_list_handle_t commandLists[] = { CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle()}; uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]); auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandQueue->commandStream->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter)); using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; auto itorSba = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(itorSba, cmdList.end()); CommandList::fromHandle(commandLists[0])->destroy(); commandQueue->destroy(); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/xe_hp_core/test_device_xe_hp_core.cpp000066400000000000000000000153541422164147700331500ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" namespace L0 { namespace ult { using DeviceFixtureXeHpCore = Test; HWTEST2_F(DeviceFixtureXeHpCore, GivenTargetXeHpCoreaWhenGettingMemoryPropertiesThenMemoryNameComesAsHBM, IsXeHpCore) { ze_device_memory_properties_t memProperties = {}; uint32_t pCount = 1u; EXPECT_EQ(ZE_RESULT_SUCCESS, device->getMemoryProperties(&pCount, &memProperties)); EXPECT_EQ(0, strcmp(memProperties.name, "HBM")); } HWTEST2_F(DeviceFixtureXeHpCore, givenReturnedDevicePropertiesThenExpectedPropertyFlagsSet, IsXeHpCore) { ze_device_properties_t deviceProps = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; device->getProperties(&deviceProps); EXPECT_EQ(0u, deviceProps.flags & ZE_DEVICE_PROPERTY_FLAG_ONDEMANDPAGING); EXPECT_EQ(0u, deviceProps.flags & ZE_DEVICE_PROPERTY_FLAG_ECC); EXPECT_EQ(0u, deviceProps.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE); EXPECT_EQ(0u, deviceProps.flags & ZE_DEVICE_PROPERTY_FLAG_INTEGRATED); } using CommandQueueGroupTest = Test; HWTEST2_F(CommandQueueGroupTest, givenNoBlitterSupportAndNoCCSThenOneQueueGroupIsReturned, IsXeHpCore) { const uint32_t rootDeviceIndex = 0u; NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get(); hwInfo.featureTable.flags.ftrCCSNode = false; hwInfo.capabilityTable.blitterOperationsSupported = false; auto *neoMockDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, rootDeviceIndex); Mock deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment()); uint32_t count = 0; ze_result_t res = deviceImp.getCommandQueueGroupProperties(&count, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_GE(count, 1u); } HWTEST2_F(CommandQueueGroupTest, givenNoBlitterSupportAndCCSThenTwoQueueGroupsAreReturned, IsXeHpCore) { const uint32_t rootDeviceIndex = 0u; NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get(); hwInfo.featureTable.flags.ftrCCSNode = true; hwInfo.capabilityTable.blitterOperationsSupported = false; auto *neoMockDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, rootDeviceIndex); Mock deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment()); uint32_t count = 0; ze_result_t res = deviceImp.getCommandQueueGroupProperties(&count, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_GE(count, 2u); } HWTEST2_F(CommandQueueGroupTest, givenBlitterSupportAndCCSThenThreeQueueGroupsAreReturned, IsXeHpCore) { const uint32_t rootDeviceIndex = 0u; NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get(); hwInfo.featureTable.flags.ftrCCSNode = true; hwInfo.capabilityTable.blitterOperationsSupported = true; hwInfo.featureTable.ftrBcsInfo.set(0); auto *neoMockDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, rootDeviceIndex); Mock deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment()); uint32_t count = 0; ze_result_t res = deviceImp.getCommandQueueGroupProperties(&count, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_GE(count, 3u); std::vector properties(count); res = deviceImp.getCommandQueueGroupProperties(&count, properties.data()); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto &engineGroups = neoMockDevice->getRegularEngineGroups(); for (uint32_t i = 0; i < count; i++) { if (engineGroups[i].engineGroupType == NEO::EngineGroupType::RenderCompute) { EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE); EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY); EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COOPERATIVE_KERNELS); EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_METRICS); EXPECT_EQ(properties[i].numQueues, 1u); EXPECT_EQ(properties[i].maxMemoryFillPatternSize, std::numeric_limits::max()); } else if (engineGroups[i].engineGroupType == NEO::EngineGroupType::Compute) { EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE); EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY); EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COOPERATIVE_KERNELS); uint32_t numerOfCCSEnabled = hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled; EXPECT_EQ(properties[i].numQueues, numerOfCCSEnabled); EXPECT_EQ(properties[i].maxMemoryFillPatternSize, std::numeric_limits::max()); } else if (engineGroups[i].engineGroupType == NEO::EngineGroupType::Copy) { EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY); EXPECT_EQ(properties[i].numQueues, hwInfo.featureTable.ftrBcsInfo.count()); EXPECT_EQ(properties[i].maxMemoryFillPatternSize, 4 * sizeof(uint32_t)); } } } class DeviceCopyQueueGroupFixture : public DeviceFixture { public: void SetUp() { DebugManager.flags.EnableBlitterOperationsSupport.set(0); DeviceFixture::SetUp(); } void TearDown() { DeviceFixture::TearDown(); } DebugManagerStateRestore restorer; }; using DeviceCopyQueueGroupTest = Test; HWTEST2_F(DeviceCopyQueueGroupTest, givenBlitterSupportAndEnableBlitterOperationsSupportSetToZeroThenNoCopyEngineIsReturned, IsXeHpCore) { const uint32_t rootDeviceIndex = 0u; NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get(); hwInfo.featureTable.flags.ftrCCSNode = false; hwInfo.capabilityTable.blitterOperationsSupported = true; hwInfo.featureTable.ftrBcsInfo.set(0); auto *neoMockDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, rootDeviceIndex); Mock deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment()); uint32_t count = 0; ze_result_t res = deviceImp.getCommandQueueGroupProperties(&count, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); std::vector properties(count); res = deviceImp.getCommandQueueGroupProperties(&count, properties.data()); EXPECT_EQ(ZE_RESULT_SUCCESS, res); for (auto &engineGroup : neoMockDevice->getRegularEngineGroups()) { EXPECT_NE(NEO::EngineGroupType::Copy, engineGroup.engineGroupType); } } } // namespace ult } // namespace L0 test_l0_hw_helper_xe_hp_core.cpp000066400000000000000000000017601422164147700341760ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/xe_hp_core/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" namespace L0 { namespace ult { HWTEST_EXCLUDE_PRODUCT(L0HwHelperTest, givenBitmaskWithAttentionBitsForSingleThreadWhenGettingThreadsThenSingleCorrectThreadReturned, IGFX_XE_HP_CORE); HWTEST_EXCLUDE_PRODUCT(L0HwHelperTest, givenBitmaskWithAttentionBitsForAllSubslicesWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_XE_HP_CORE); HWTEST_EXCLUDE_PRODUCT(L0HwHelperTest, givenBitmaskWithAttentionBitsForAllEUsWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_XE_HP_CORE); HWTEST_EXCLUDE_PRODUCT(L0HwHelperTest, givenEu0To1Threads0To3BitmaskWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_XE_HP_CORE); HWTEST_EXCLUDE_PRODUCT(L0HwHelperTest, givenBitmaskWithAttentionBitsForHalfOfThreadsWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_XE_HP_CORE); } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/xe_hp_core/test_module_xe_hp_core.cpp000066400000000000000000000047031422164147700331720ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/kernel/kernel_properties.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" namespace L0 { namespace ult { using KernelPropertyTest = Test; HWTEST2_F(KernelPropertyTest, givenKernelExtendedPropertiesStructureWhenKernelPropertiesCalledThenPropertiesAreCorrectlySet, IsXeHpCore) { ze_device_module_properties_t kernelProperties = {}; ze_float_atomic_ext_properties_t kernelExtendedProperties = {}; kernelExtendedProperties.stype = ZE_STRUCTURE_TYPE_FLOAT_ATOMIC_EXT_PROPERTIES; kernelProperties.pNext = &kernelExtendedProperties; ze_result_t res = device->getKernelProperties(&kernelProperties); EXPECT_EQ(res, ZE_RESULT_SUCCESS); EXPECT_FALSE(kernelExtendedProperties.fp16Flags & FP_ATOMIC_EXT_FLAG_GLOBAL_LOAD_STORE); EXPECT_FALSE(kernelExtendedProperties.fp16Flags & FP_ATOMIC_EXT_FLAG_GLOBAL_ADD); EXPECT_FALSE(kernelExtendedProperties.fp16Flags & FP_ATOMIC_EXT_FLAG_GLOBAL_MIN_MAX); EXPECT_FALSE(kernelExtendedProperties.fp16Flags & FP_ATOMIC_EXT_FLAG_LOCAL_LOAD_STORE); EXPECT_FALSE(kernelExtendedProperties.fp16Flags & FP_ATOMIC_EXT_FLAG_LOCAL_ADD); EXPECT_FALSE(kernelExtendedProperties.fp16Flags & FP_ATOMIC_EXT_FLAG_LOCAL_MIN_MAX); EXPECT_FALSE(kernelExtendedProperties.fp32Flags & FP_ATOMIC_EXT_FLAG_GLOBAL_LOAD_STORE); EXPECT_TRUE(kernelExtendedProperties.fp32Flags & FP_ATOMIC_EXT_FLAG_GLOBAL_ADD); EXPECT_FALSE(kernelExtendedProperties.fp32Flags & FP_ATOMIC_EXT_FLAG_GLOBAL_MIN_MAX); EXPECT_FALSE(kernelExtendedProperties.fp32Flags & FP_ATOMIC_EXT_FLAG_LOCAL_LOAD_STORE); EXPECT_FALSE(kernelExtendedProperties.fp32Flags & FP_ATOMIC_EXT_FLAG_LOCAL_ADD); EXPECT_FALSE(kernelExtendedProperties.fp32Flags & FP_ATOMIC_EXT_FLAG_LOCAL_MIN_MAX); EXPECT_FALSE(kernelExtendedProperties.fp64Flags & FP_ATOMIC_EXT_FLAG_GLOBAL_LOAD_STORE); EXPECT_FALSE(kernelExtendedProperties.fp64Flags & FP_ATOMIC_EXT_FLAG_GLOBAL_ADD); EXPECT_FALSE(kernelExtendedProperties.fp64Flags & FP_ATOMIC_EXT_FLAG_GLOBAL_MIN_MAX); EXPECT_FALSE(kernelExtendedProperties.fp64Flags & FP_ATOMIC_EXT_FLAG_LOCAL_LOAD_STORE); EXPECT_FALSE(kernelExtendedProperties.fp64Flags & FP_ATOMIC_EXT_FLAG_LOCAL_ADD); EXPECT_FALSE(kernelExtendedProperties.fp64Flags & FP_ATOMIC_EXT_FLAG_LOCAL_MIN_MAX); } } // namespace ult } // namespace L0compute-runtime-22.14.22890/level_zero/core/test/unit_tests/xe_hpc_core/000077500000000000000000000000001422164147700261065ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/xe_hpc_core/CMakeLists.txt000066400000000000000000000011451422164147700306470ustar00rootroot00000000000000# # Copyright (C) 2021-2022 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_XE_HPC_CORE) target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/built_in_xe_hpc_core_tests_l0.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_xe_hpc_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdqueue_xe_hpc_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_device_xe_hpc_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_module_xe_hpc_core.cpp ) add_subdirectories() endif() built_in_xe_hpc_core_tests_l0.cpp000066400000000000000000000067701422164147700345250ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/xe_hpc_core/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/test/common/fixtures/device_fixture.h" #include "shared/test/common/mocks/mock_builtinslib.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/builtin/builtin_functions_lib_impl.h" #include "level_zero/core/test/unit_tests/mocks/mock_device_recompile_built_ins.h" namespace L0 { namespace ult { using XeHpcCoreBuiltInTestL0 = Test; HWTEST_EXCLUDE_PRODUCT(BuiltInSharedTest, GivenBuiltinTypeBinaryWhenGettingBuiltinResourceForNotRegisteredRevisionThenBuiltinFromDefaultRevisionIsTaken, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(BuiltInTestL0, givenDeviceWithUnregisteredBinaryBuiltinWhenGettingBuiltinKernelThenTakeBinaryBuiltinFromDefaultRevision, IGFX_XE_HPC_CORE); HWTEST2_F(XeHpcCoreBuiltInTestL0, givenDeviceWithUnregisteredBinaryBuiltinWhenGettingBuiltinKernelThenFallbackToIntermediate, IsXeHpcCore) { pDevice->incRefInternal(); L0::ult::MockDeviceForRebuildBuilins deviceL0(pDevice); pDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->platform.usRevId += 0xdead; L0::BuiltinFunctionsLibImpl builtinFunctionsLib{&deviceL0, pDevice->getBuiltIns()}; for (uint32_t builtId = 0; builtId < static_cast(L0::Builtin::COUNT); builtId++) { deviceL0.formatForModule = {}; ASSERT_NE(nullptr, builtinFunctionsLib.getFunction(static_cast(builtId))); EXPECT_EQ(ZE_MODULE_FORMAT_IL_SPIRV, deviceL0.formatForModule); } } HWTEST2_F(XeHpcCoreBuiltInTestL0, GivenBuiltinTypeBinaryWhenGettingBuiltinResourceForNotRegisteredRevisionThenResourceSizeIsZero, IsXeHpcCore) { pDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->platform.usRevId += 0xdead; auto mockBuiltinsLib = std::unique_ptr(new MockBuiltinsLib()); EXPECT_EQ(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyBufferToBuffer, BuiltinCode::ECodeType::Binary, *pDevice).size()); EXPECT_EQ(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyBufferRect, BuiltinCode::ECodeType::Binary, *pDevice).size()); EXPECT_EQ(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::FillBuffer, BuiltinCode::ECodeType::Binary, *pDevice).size()); EXPECT_EQ(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyBufferToImage3d, BuiltinCode::ECodeType::Binary, *pDevice).size()); EXPECT_EQ(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyImage3dToBuffer, BuiltinCode::ECodeType::Binary, *pDevice).size()); EXPECT_EQ(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyImageToImage1d, BuiltinCode::ECodeType::Binary, *pDevice).size()); EXPECT_EQ(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyImageToImage2d, BuiltinCode::ECodeType::Binary, *pDevice).size()); EXPECT_EQ(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyImageToImage3d, BuiltinCode::ECodeType::Binary, *pDevice).size()); EXPECT_EQ(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::FillImage1d, BuiltinCode::ECodeType::Binary, *pDevice).size()); EXPECT_EQ(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::FillImage2d, BuiltinCode::ECodeType::Binary, *pDevice).size()); EXPECT_EQ(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::FillImage3d, BuiltinCode::ECodeType::Binary, *pDevice).size()); EXPECT_EQ(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::COUNT, BuiltinCode::ECodeType::Binary, *pDevice).size()); } } // namespace ult } // namespace L0enable_l0_mocks_xe_hpc_core.cpp000066400000000000000000000006201422164147700341040ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/xe_hpc_core/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/test/unit_tests/mocks/mock_l0_debugger.h" namespace NEO { struct XE_HPC_COREFamily; using GfxFamily = XE_HPC_COREFamily; } // namespace NEO namespace L0 { namespace ult { static MockDebuggerL0HwPopulateFactory mockDebuggerXeHpcCore; } } // namespace L0compute-runtime-22.14.22890/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdlist_xe_hpc_core.cpp000066400000000000000000000541031422164147700336510ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/test/unit_tests/fixtures/module_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_module.h" namespace L0 { namespace ult { using CommandListAppendLaunchKernelXeHpcCore = Test; HWTEST2_F(CommandListAppendLaunchKernelXeHpcCore, givenKernelUsingSyncBufferWhenAppendLaunchCooperativeKernelIsCalledThenCorrectValueIsReturned, IsXeHpcCore) { auto &hwInfo = *device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo(); auto &hwConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily); Mock<::L0::Kernel> kernel; auto pMockModule = std::unique_ptr(new Mock(device, nullptr)); kernel.module = pMockModule.get(); kernel.setGroupSize(1, 1, 1); ze_group_count_t groupCount{8, 1, 1}; auto pCommandList = std::make_unique>>(); auto result = pCommandList->initialize(device, NEO::EngineGroupType::CooperativeCompute, 0u); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto &kernelAttributes = kernel.immutableData.kernelDescriptor->kernelAttributes; kernelAttributes.flags.usesSyncBuffer = true; kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber; bool isCooperative = true; result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, nullptr, false, false, isCooperative); EXPECT_EQ(ZE_RESULT_SUCCESS, result); { VariableBackup engineGroupType{&pCommandList->engineGroupType}; VariableBackup hwRevId{&hwInfo.platform.usRevId}; engineGroupType = EngineGroupType::RenderCompute; hwRevId = hwConfig.getHwRevIdFromStepping(REVISION_B, hwInfo); result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, nullptr, false, false, isCooperative); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); ze_group_count_t groupCount1{1, 1, 1}; result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount1, nullptr, false, false, isCooperative); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } } using CommandListStatePrefetchXeHpcCore = Test; HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenDebugFlagSetWhenPrefetchApiCalledThenProgramStatePrefetch, IsXeHpcCore) { using STATE_PREFETCH = typename FamilyType::STATE_PREFETCH; DebugManagerStateRestore restore; auto pCommandList = std::make_unique>>(); auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); ASSERT_EQ(ZE_RESULT_SUCCESS, result); constexpr size_t size = MemoryConstants::cacheLineSize * 2; constexpr size_t alignment = MemoryConstants::pageSize64k; constexpr size_t offset = MemoryConstants::cacheLineSize; constexpr uint32_t mocsIndexForL3 = (2 << 1); void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; context->allocDeviceMem(device->toHandle(), &deviceDesc, size + offset, alignment, &ptr); EXPECT_NE(nullptr, ptr); auto hwInfo = device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo(); hwInfo->platform.usRevId |= FamilyType::pvcBaseDieRevMask; auto cmdListBaseOffset = pCommandList->commandContainer.getCommandStream()->getUsed(); { auto ret = pCommandList->appendMemoryPrefetch(ptrOffset(ptr, offset), size); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); EXPECT_EQ(cmdListBaseOffset, pCommandList->commandContainer.getCommandStream()->getUsed()); } { DebugManager.flags.AddStatePrefetchCmdToMemoryPrefetchAPI.set(1); auto ret = pCommandList->appendMemoryPrefetch(ptrOffset(ptr, offset), size); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); EXPECT_EQ(cmdListBaseOffset + sizeof(STATE_PREFETCH), pCommandList->commandContainer.getCommandStream()->getUsed()); auto statePrefetchCmd = reinterpret_cast(ptrOffset(pCommandList->commandContainer.getCommandStream()->getCpuBase(), cmdListBaseOffset)); EXPECT_EQ(statePrefetchCmd->getAddress(), reinterpret_cast(ptrOffset(ptr, offset))); EXPECT_FALSE(statePrefetchCmd->getKernelInstructionPrefetch()); EXPECT_EQ(mocsIndexForL3, statePrefetchCmd->getMemoryObjectControlState()); EXPECT_EQ(1u, statePrefetchCmd->getPrefetchSize()); EXPECT_EQ(reinterpret_cast(ptr), pCommandList->commandContainer.getResidencyContainer().back()->getGpuAddress()); } context->freeMem(ptr); } HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenUnifiedSharedMemoryWhenPrefetchApiCalledThenDontSetMemPrefetch, IsXeHpcCore) { auto pCommandList = std::make_unique>>(); auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); ASSERT_EQ(ZE_RESULT_SUCCESS, result); size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_host_mem_alloc_desc_t hostDesc = {}; auto res = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_NE(nullptr, ptr); auto ret = pCommandList->appendMemoryPrefetch(ptr, size); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); auto memoryManager = static_cast(device->getDriverHandle()->getMemoryManager()); EXPECT_FALSE(memoryManager->setMemPrefetchCalled); context->freeMem(ptr); } HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenAppendMemoryPrefetchForKmdMigratedSharedAllocationsWhenPrefetchApiCalledThenDontCallSetMemPrefetchByDefault, IsXeHpcCore) { DebugManagerStateRestore restore; DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(1); auto pCommandList = std::make_unique>>(); auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); ASSERT_EQ(ZE_RESULT_SUCCESS, result); size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_host_mem_alloc_desc_t hostDesc = {}; auto res = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_NE(nullptr, ptr); auto ret = pCommandList->appendMemoryPrefetch(ptr, size); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); auto memoryManager = static_cast(device->getDriverHandle()->getMemoryManager()); EXPECT_FALSE(memoryManager->setMemPrefetchCalled); context->freeMem(ptr); } HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenAppendMemoryPrefetchForKmdMigratedSharedAllocationsSetWhenPrefetchApiCalledOnUnifiedSharedMemoryThenCallSetMemPrefetch, IsXeHpcCore) { DebugManagerStateRestore restore; DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(1); DebugManager.flags.UseKmdMigration.set(1); EXPECT_EQ(0b0001u, neoDevice->deviceBitfield.to_ulong()); auto pCommandList = std::make_unique>>(); auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); ASSERT_EQ(ZE_RESULT_SUCCESS, result); size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_host_mem_alloc_desc_t hostDesc = {}; auto res = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_NE(nullptr, ptr); auto ret = pCommandList->appendMemoryPrefetch(ptr, size); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); auto memoryManager = static_cast(device->getDriverHandle()->getMemoryManager()); EXPECT_TRUE(memoryManager->setMemPrefetchCalled); EXPECT_EQ(0u, memoryManager->memPrefetchSubDeviceId); context->freeMem(ptr); } HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenAppendMemoryPrefetchForKmdMigratedSharedAllocationsSetWhenPrefetchApiCalledOnUnifiedSharedMemoryThenCallSetMemPrefetchOnTheAssociatedDevice, IsXeHpcCore) { DebugManagerStateRestore restore; DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(1); DebugManager.flags.UseKmdMigration.set(1); neoDevice->deviceBitfield = 0b0010; auto pCommandList = std::make_unique>>(); auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); ASSERT_EQ(ZE_RESULT_SUCCESS, result); size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_host_mem_alloc_desc_t hostDesc = {}; auto res = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_NE(nullptr, ptr); auto ret = pCommandList->appendMemoryPrefetch(ptr, size); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); auto memoryManager = static_cast(device->getDriverHandle()->getMemoryManager()); EXPECT_TRUE(memoryManager->setMemPrefetchCalled); EXPECT_EQ(1u, memoryManager->memPrefetchSubDeviceId); context->freeMem(ptr); } HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenAppendMemoryPrefetchForKmdMigratedSharedAllocationsSetWhenPrefetchApiCalledOnUnifiedDeviceMemoryThenDontCallSetMemPrefetch, IsXeHpcCore) { DebugManagerStateRestore restore; DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(1); DebugManager.flags.UseKmdMigration.set(1); auto pCommandList = std::make_unique>>(); auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); ASSERT_EQ(ZE_RESULT_SUCCESS, result); size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_NE(nullptr, ptr); auto ret = pCommandList->appendMemoryPrefetch(ptr, size); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); auto memoryManager = static_cast(device->getDriverHandle()->getMemoryManager()); EXPECT_FALSE(memoryManager->setMemPrefetchCalled); context->freeMem(ptr); } HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenAppendMemoryPrefetchForKmdMigratedSharedAllocationsSetWhenPrefetchApiCalledOnUnifiedHostMemoryThenDontCallSetMemPrefetch, IsXeHpcCore) { DebugManagerStateRestore restore; DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(1); DebugManager.flags.UseKmdMigration.set(1); auto pCommandList = std::make_unique>>(); auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); ASSERT_EQ(ZE_RESULT_SUCCESS, result); size_t size = 10; size_t alignment = 1u; void *ptr = nullptr; ze_host_mem_alloc_desc_t hostDesc = {}; context->allocHostMem(&hostDesc, size, alignment, &ptr); EXPECT_NE(nullptr, ptr); auto ret = pCommandList->appendMemoryPrefetch(ptr, size); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); auto memoryManager = static_cast(device->getDriverHandle()->getMemoryManager()); EXPECT_FALSE(memoryManager->setMemPrefetchCalled); context->freeMem(ptr); } HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenCommandBufferIsExhaustedWhenPrefetchApiCalledThenProgramStatePrefetch, IsXeHpcCore) { using STATE_PREFETCH = typename FamilyType::STATE_PREFETCH; using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; DebugManagerStateRestore restore; auto pCommandList = std::make_unique>>(); auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); ASSERT_EQ(ZE_RESULT_SUCCESS, result); constexpr size_t size = MemoryConstants::cacheLineSize * 2; constexpr size_t alignment = MemoryConstants::pageSize64k; constexpr size_t offset = MemoryConstants::cacheLineSize; constexpr uint32_t mocsIndexForL3 = (2 << 1); void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; context->allocDeviceMem(device->toHandle(), &deviceDesc, size + offset, alignment, &ptr); EXPECT_NE(nullptr, ptr); auto hwInfo = device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo(); hwInfo->platform.usRevId |= FamilyType::pvcBaseDieRevMask; auto firstBatchBufferAllocation = pCommandList->commandContainer.getCommandStream()->getGraphicsAllocation(); auto useSize = pCommandList->commandContainer.getCommandStream()->getAvailableSpace(); useSize -= sizeof(MI_BATCH_BUFFER_END); pCommandList->commandContainer.getCommandStream()->getSpace(useSize); DebugManager.flags.AddStatePrefetchCmdToMemoryPrefetchAPI.set(1); auto ret = pCommandList->appendMemoryPrefetch(ptrOffset(ptr, offset), size); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); auto secondBatchBufferAllocation = pCommandList->commandContainer.getCommandStream()->getGraphicsAllocation(); EXPECT_NE(firstBatchBufferAllocation, secondBatchBufferAllocation); auto statePrefetchCmd = reinterpret_cast(pCommandList->commandContainer.getCommandStream()->getCpuBase()); EXPECT_EQ(statePrefetchCmd->getAddress(), reinterpret_cast(ptrOffset(ptr, offset))); EXPECT_FALSE(statePrefetchCmd->getKernelInstructionPrefetch()); EXPECT_EQ(mocsIndexForL3, statePrefetchCmd->getMemoryObjectControlState()); EXPECT_EQ(1u, statePrefetchCmd->getPrefetchSize()); NEO::ResidencyContainer::iterator it = pCommandList->commandContainer.getResidencyContainer().end(); it--; EXPECT_EQ(secondBatchBufferAllocation->getGpuAddress(), (*it)->getGpuAddress()); it--; EXPECT_EQ(reinterpret_cast(ptr), (*it)->getGpuAddress()); context->freeMem(ptr); } using CommandListEventFenceTestsXeHpcCore = Test; HWTEST2_F(CommandListEventFenceTestsXeHpcCore, givenCommandListWithProfilingEventAfterCommandOnPvcRev00ThenMiFenceIsNotAdded, IsXeHpcCore) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE; if (defaultHwInfo->platform.eProductFamily != IGFX_PVC) { GTEST_SKIP(); } auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Compute, 0u); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; auto hwInfo = commandList->commandContainer.getDevice()->getExecutionEnvironment()->rootDeviceEnvironments[0]->getMutableHardwareInfo(); hwInfo->platform.usRevId = 0x00; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = 0; eventDesc.wait = 0; ze_result_t result = ZE_RESULT_SUCCESS; auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); commandList->appendEventForProfiling(event->toHandle(), false); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(cmdList.end(), itor); } HWTEST2_F(CommandListEventFenceTestsXeHpcCore, givenCommandListWithProfilingEventAfterCommandOnPvcRev03ThenMiFenceIsAdded, IsXeHpcCore) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE; auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Compute, 0u); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; auto hwInfo = commandList->commandContainer.getDevice()->getExecutionEnvironment()->rootDeviceEnvironments[0]->getMutableHardwareInfo(); hwInfo->platform.usRevId = 0x03; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = 0; eventDesc.wait = 0; ze_result_t result = ZE_RESULT_SUCCESS; auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); commandList->appendEventForProfiling(event->toHandle(), false); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); } HWTEST2_F(CommandListEventFenceTestsXeHpcCore, givenCommandListWithRegularEventAfterCommandOnPvcRev03ThenMiFenceIsAdded, IsXeHpcCore) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE; auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Compute, 0u); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; auto hwInfo = commandList->commandContainer.getDevice()->getExecutionEnvironment()->rootDeviceEnvironments[0]->getMutableHardwareInfo(); hwInfo->platform.usRevId = 0x03; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = 0; eventDesc.wait = 0; ze_result_t result = ZE_RESULT_SUCCESS; auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); commandList->appendSignalEventPostWalker(event->toHandle()); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); } using CommandListAppendRangesBarrierXeHpcCore = Test; HWTEST2_F(CommandListAppendRangesBarrierXeHpcCore, givenCallToAppendRangesBarrierThenPipeControlProgrammed, IsXeHpcCore) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); uint64_t gpuAddress = 0x1200; void *buffer = reinterpret_cast(gpuAddress); size_t size = 0x1100; NEO::MockGraphicsAllocation mockAllocation(buffer, gpuAddress, size); NEO::SvmAllocationData allocData(0); allocData.size = size; allocData.gpuAllocations.addAllocation(&mockAllocation); device->getDriverHandle()->getSvmAllocsManager()->insertSVMAlloc(allocData); const void *ranges[] = {buffer}; const size_t sizes[] = {size}; commandList->applyMemoryRangesBarrier(1, sizes, ranges); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); auto pipeControlCmd = reinterpret_cast(*itor); EXPECT_TRUE(pipeControlCmd->getHdcPipelineFlush()); EXPECT_TRUE(pipeControlCmd->getUnTypedDataPortCacheFlush()); } using CommandListAppendBarrierXeHpcCore = Test; HWTEST2_F(CommandListAppendBarrierXeHpcCore, givenCommandListWhenAppendingBarrierThenPipeControlIsProgrammedAndHdcAndUnTypedFlushesAreSet, IsPVC) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); ze_result_t returnValue = commandList->appendBarrier(nullptr, 0, nullptr); EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); // PC for STATE_BASE_ADDRESS from list initialization auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); itor++; // PC for appendBarrier itor = find(itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); auto pipeControlCmd = reinterpret_cast(*itor); EXPECT_TRUE(pipeControlCmd->getHdcPipelineFlush()); EXPECT_TRUE(pipeControlCmd->getUnTypedDataPortCacheFlush()); } } // namespace ult } // namespace L0 test_cmdqueue_xe_hpc_core.cpp000066400000000000000000000155631422164147700337520ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/xe_hpc_core/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/mocks/mock_command_stream_receiver.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/driver/driver_handle_imp.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_built_ins.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h" #include "level_zero/core/test/unit_tests/mocks/mock_kernel.h" #include "level_zero/core/test/unit_tests/mocks/mock_module.h" namespace L0 { namespace ult { struct CommandQueueCreateMultiOrdinalFixture : public DeviceFixture { void SetUp() { NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo; hwInfo.featureTable.flags.ftrCCSNode = true; hwInfo.featureTable.ftrBcsInfo = 0; hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 4; neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); driverHandle = std::make_unique>(); driverHandle->initialize(std::move(devices)); device = driverHandle->devices[0]; } void TearDown() { } std::unique_ptr> driverHandle; NEO::MockDevice *neoDevice = nullptr; L0::Device *device = nullptr; }; using CommandQueueCommandsPvc = Test; HWTEST2_F(CommandQueueCommandsPvc, givenCommandQueueWhenExecutingCommandListsThenGlobalFenceAllocationIsResident, IsXeHpcCore) { using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename FamilyType::STATE_SYSTEM_MEM_FENCE_ADDRESS; ze_command_queue_desc_t desc = {}; MockCsrHw2 csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield()); csr.initializeTagAllocation(); csr.setupContext(*neoDevice->getDefaultEngine().osContext); csr.createGlobalFenceAllocation(); auto commandQueue = new MockCommandQueueHw(device, &csr, &desc); commandQueue->initialize(false, false); ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Compute, 0u, returnValue)); auto commandListHandle = commandList->toHandle(); commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false); auto globalFence = csr.getGlobalFenceAllocation(); bool found = false; for (auto alloc : csr.copyOfAllocations) { if (alloc == globalFence) { found = true; break; } } EXPECT_TRUE(found); commandQueue->destroy(); } HWTEST2_F(CommandQueueCommandsPvc, givenCommandQueueWhenExecutingCommandListsThenStateSystemMemFenceAddressCmdIsGenerated, IsXeHpcCore) { using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename FamilyType::STATE_SYSTEM_MEM_FENCE_ADDRESS; ze_command_queue_desc_t desc = {}; auto csr = neoDevice->getDefaultEngine().commandStreamReceiver; auto commandQueue = new MockCommandQueueHw(device, csr, &desc); commandQueue->initialize(false, false); ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Compute, 0u, returnValue)); auto commandListHandle = commandList->toHandle(); commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false); auto globalFence = csr->getGlobalFenceAllocation(); auto used = commandQueue->commandStream->getUsed(); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, commandQueue->commandStream->getCpuBase(), used)); auto itor = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itor); auto systemMemFenceAddressCmd = genCmdCast(*itor); EXPECT_EQ(globalFence->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress()); commandQueue->destroy(); } HWTEST2_F(CommandQueueCommandsPvc, givenCommandQueueWhenExecutingCommandListsForTheSecondTimeThenStateSystemMemFenceAddressCmdIsNotGenerated, IsXeHpcCore) { using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename FamilyType::STATE_SYSTEM_MEM_FENCE_ADDRESS; ze_command_queue_desc_t desc = {}; auto csr = neoDevice->getDefaultEngine().commandStreamReceiver; auto commandQueue = new MockCommandQueueHw(device, csr, &desc); commandQueue->initialize(false, false); ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Compute, 0u, returnValue)); auto commandListHandle = commandList->toHandle(); commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false); auto used = commandQueue->commandStream->getUsed(); commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false); auto sizeUsed2 = commandQueue->commandStream->getUsed(); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), used), sizeUsed2)); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(cmdList.end(), itor); commandQueue->destroy(); } HWTEST2_F(CommandQueueCommandsPvc, givenLinkedCopyEngineOrdinalWhenCreatingThenSetAsCopyOnly, IsXeHpcCore) { ze_result_t returnValue; auto hwInfo = *NEO::defaultHwInfo; hwInfo.featureTable.ftrBcsInfo.set(1, true); hwInfo.capabilityTable.blitterOperationsSupported = true; auto testNeoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo); auto testL0Device = std::unique_ptr(L0::Device::create(driverHandle.get(), testNeoDevice, false, &returnValue)); ze_context_handle_t hContext; ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; ze_result_t result = driverHandle->createContext(&desc, 0u, nullptr, &hContext); EXPECT_EQ(ZE_RESULT_SUCCESS, result); ze_command_queue_desc_t cmdQueueDesc = {}; ze_command_queue_handle_t cmdQueue; cmdQueueDesc.ordinal = static_cast(testNeoDevice->getEngineGroupIndexFromEngineGroupType(NEO::EngineGroupType::LinkedCopy)); result = zeCommandQueueCreate(hContext, testL0Device.get(), &cmdQueueDesc, &cmdQueue); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto cmdQ = L0::CommandQueue::fromHandle(cmdQueue); EXPECT_TRUE(cmdQ->peekIsCopyOnlyCommandQueue()); cmdQ->destroy(); L0::Context::fromHandle(hContext)->destroy(); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/xe_hpc_core/test_device_xe_hpc_core.cpp000066400000000000000000000225151422164147700334530ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/implicit_scaling.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/hw_helpers/l0_hw_helper.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" namespace L0 { namespace ult { HWTEST_EXCLUDE_PRODUCT(AppendMemoryCopy, givenCopyOnlyCommandListAndHostPointersWhenMemoryCopyCalledThenPipeControlWithDcFlushAddedIsNotAddedAfterBlitCopy, IGFX_XE_HPC_CORE); using DeviceTestXeHpc = Test; HWTEST2_F(DeviceTestXeHpc, whenCallingGetMemoryPropertiesWithNonNullPtrThenPropertiesAreReturned, IsXeHpcCore) { uint32_t count = 0; ze_result_t res = device->getMemoryProperties(&count, nullptr); EXPECT_EQ(res, ZE_RESULT_SUCCESS); EXPECT_EQ(1u, count); if (defaultHwInfo->platform.eProductFamily != IGFX_PVC) { GTEST_SKIP(); } ze_device_memory_properties_t memProperties = {}; res = device->getMemoryProperties(&count, &memProperties); EXPECT_EQ(res, ZE_RESULT_SUCCESS); EXPECT_EQ(1u, count); EXPECT_EQ(memProperties.maxClockRate, 3200u); EXPECT_EQ(memProperties.maxBusWidth, this->neoDevice->getDeviceInfo().addressBits); EXPECT_EQ(memProperties.totalSize, this->neoDevice->getDeviceInfo().globalMemSize); } HWTEST2_F(DeviceTestXeHpc, whenCallingGetMemoryPropertiesWithNonNullPtrAndBdRevisionIsNotA0ThenmaxClockRateReturnedIsZero, IsXeHpcCore) { uint32_t count = 0; auto device = driverHandle->devices[0]; auto hwInfo = device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo(); hwInfo->platform.usRevId = FamilyType::pvcBaseDieA0Masked ^ FamilyType::pvcBaseDieRevMask; ze_result_t res = device->getMemoryProperties(&count, nullptr); EXPECT_EQ(res, ZE_RESULT_SUCCESS); EXPECT_EQ(1u, count); ze_device_memory_properties_t memProperties = {}; res = device->getMemoryProperties(&count, &memProperties); EXPECT_EQ(res, ZE_RESULT_SUCCESS); EXPECT_EQ(1u, count); EXPECT_EQ(memProperties.maxClockRate, 0u); } HWTEST2_F(DeviceTestXeHpc, givenXeHpcAStepWhenCreatingMultiTileDeviceThenExpectImplicitScalingDisabled, IsXeHpcCore) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(2); VariableBackup apiSupportBackup(&NEO::ImplicitScaling::apiSupport, true); ze_result_t returnValue = ZE_RESULT_SUCCESS; std::unique_ptr driverHandle(new DriverHandleImp); auto hwInfo = *NEO::defaultHwInfo; hwInfo.platform.usRevId = 0x3; if (hwInfo.platform.eProductFamily != IGFX_PVC) { GTEST_SKIP(); } auto neoDevice = std::unique_ptr(NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); auto device = Device::create(driverHandle.get(), neoDevice.release(), false, &returnValue); ASSERT_NE(nullptr, device); EXPECT_FALSE(device->isImplicitScalingCapable()); static_cast(device)->releaseResources(); delete device; } HWTEST2_F(DeviceTestXeHpc, givenXeHpcAStepAndDebugFlagOverridesWhenCreatingMultiTileDeviceThenExpectImplicitScalingEnabled, IsXeHpcCore) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(2); DebugManager.flags.EnableImplicitScaling.set(1); VariableBackup apiSupportBackup(&NEO::ImplicitScaling::apiSupport, true); ze_result_t returnValue = ZE_RESULT_SUCCESS; std::unique_ptr driverHandle(new DriverHandleImp); auto hwInfo = *NEO::defaultHwInfo; hwInfo.platform.usRevId = 0x3; auto neoDevice = std::unique_ptr(NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); auto device = Device::create(driverHandle.get(), neoDevice.release(), false, &returnValue); ASSERT_NE(nullptr, device); EXPECT_TRUE(device->isImplicitScalingCapable()); static_cast(device)->releaseResources(); delete device; } HWTEST2_F(DeviceTestXeHpc, givenXeHpcBStepWhenCreatingMultiTileDeviceThenExpectImplicitScalingEnabled, IsXeHpcCore) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(2); VariableBackup apiSupportBackup(&NEO::ImplicitScaling::apiSupport, true); ze_result_t returnValue = ZE_RESULT_SUCCESS; std::unique_ptr driverHandle(new DriverHandleImp); auto hwInfo = *NEO::defaultHwInfo; hwInfo.platform.usRevId = 0x6; auto neoDevice = std::unique_ptr(NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); auto device = Device::create(driverHandle.get(), neoDevice.release(), false, &returnValue); ASSERT_NE(nullptr, device); EXPECT_TRUE(device->isImplicitScalingCapable()); static_cast(device)->releaseResources(); delete device; } using CommandQueueGroupTest = Test; HWTEST2_F(CommandQueueGroupTest, givenNoBlitterSupportAndNoCCSThenOneQueueGroupIsReturned, IsXeHpcCore) { const uint32_t rootDeviceIndex = 0u; NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get(); hwInfo.featureTable.flags.ftrCCSNode = false; hwInfo.capabilityTable.blitterOperationsSupported = false; auto *neoMockDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, rootDeviceIndex); Mock deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment()); uint32_t count = 0; ze_result_t res = deviceImp.getCommandQueueGroupProperties(&count, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_GE(count, 1u); } HWTEST2_F(CommandQueueGroupTest, givenNoBlitterSupportAndCCSThenTwoQueueGroupsAreReturned, IsXeHpcCore) { const uint32_t rootDeviceIndex = 0u; NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get(); hwInfo.featureTable.flags.ftrCCSNode = true; hwInfo.capabilityTable.blitterOperationsSupported = false; auto *neoMockDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, rootDeviceIndex); Mock deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment()); uint32_t count = 0; ze_result_t res = deviceImp.getCommandQueueGroupProperties(&count, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_GE(count, 2u); } HWTEST2_F(CommandQueueGroupTest, givenBlitterDisabledAndAllBcsSetThenTwoQueueGroupsAreReturned, IsXeHpcCore) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableBlitterOperationsSupport.set(0); const uint32_t rootDeviceIndex = 0u; NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get(); hwInfo.featureTable.flags.ftrCCSNode = true; hwInfo.featureTable.ftrBcsInfo.set(); auto *neoMockDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, rootDeviceIndex); Mock deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment()); uint32_t count = 0; ze_result_t res = deviceImp.getCommandQueueGroupProperties(&count, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_EQ(count, 2u); } class DeviceCopyQueueGroupFixture : public DeviceFixture { public: void SetUp() { DebugManager.flags.EnableBlitterOperationsSupport.set(0); DeviceFixture::SetUp(); } void TearDown() { DeviceFixture::TearDown(); } DebugManagerStateRestore restorer; }; using DeviceCopyQueueGroupTest = Test; HWTEST2_F(DeviceCopyQueueGroupTest, givenBlitterSupportAndEnableBlitterOperationsSupportSetToZeroThenNoCopyEngineIsReturned, IsXeHpcCore) { const uint32_t rootDeviceIndex = 0u; NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get(); hwInfo.featureTable.flags.ftrCCSNode = false; hwInfo.capabilityTable.blitterOperationsSupported = true; hwInfo.featureTable.ftrBcsInfo.set(0); auto *neoMockDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, rootDeviceIndex); Mock deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment()); uint32_t count = 0; ze_result_t res = deviceImp.getCommandQueueGroupProperties(&count, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); std::vector properties(count); res = deviceImp.getCommandQueueGroupProperties(&count, properties.data()); EXPECT_EQ(ZE_RESULT_SUCCESS, res); for (auto &engineGroup : neoMockDevice->getRegularEngineGroups()) { EXPECT_NE(NEO::EngineGroupType::Copy, engineGroup.engineGroupType); } } HWTEST2_F(DeviceTestXeHpc, givenReturnedDevicePropertiesThenExpectedPropertyFlagsSet, IsXeHpcCore) { ze_device_properties_t deviceProps = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; device->getProperties(&deviceProps); EXPECT_EQ(ZE_DEVICE_PROPERTY_FLAG_ONDEMANDPAGING, deviceProps.flags & ZE_DEVICE_PROPERTY_FLAG_ONDEMANDPAGING); EXPECT_EQ(0u, deviceProps.flags & ZE_DEVICE_PROPERTY_FLAG_ECC); EXPECT_EQ(0u, deviceProps.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE); EXPECT_EQ(0u, deviceProps.flags & ZE_DEVICE_PROPERTY_FLAG_INTEGRATED); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/xe_hpc_core/test_module_xe_hpc_core.cpp000066400000000000000000000046731422164147700335060ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/kernel/kernel_properties.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" namespace L0 { namespace ult { using KernelPropertyTest = Test; HWTEST2_F(KernelPropertyTest, givenKernelExtendedPropertiesStructureWhenKernelPropertiesCalledThenPropertiesAreCorrectlySet, IsXeHpcCore) { ze_device_module_properties_t kernelProperties = {}; ze_float_atomic_ext_properties_t kernelExtendedProperties = {}; kernelExtendedProperties.stype = ZE_STRUCTURE_TYPE_FLOAT_ATOMIC_EXT_PROPERTIES; kernelProperties.pNext = &kernelExtendedProperties; ze_result_t res = device->getKernelProperties(&kernelProperties); EXPECT_EQ(res, ZE_RESULT_SUCCESS); EXPECT_TRUE(kernelExtendedProperties.fp16Flags & FP_ATOMIC_EXT_FLAG_GLOBAL_LOAD_STORE); EXPECT_TRUE(kernelExtendedProperties.fp16Flags & FP_ATOMIC_EXT_FLAG_GLOBAL_ADD); EXPECT_TRUE(kernelExtendedProperties.fp16Flags & FP_ATOMIC_EXT_FLAG_GLOBAL_MIN_MAX); EXPECT_FALSE(kernelExtendedProperties.fp16Flags & FP_ATOMIC_EXT_FLAG_LOCAL_LOAD_STORE); EXPECT_FALSE(kernelExtendedProperties.fp16Flags & FP_ATOMIC_EXT_FLAG_LOCAL_ADD); EXPECT_FALSE(kernelExtendedProperties.fp16Flags & FP_ATOMIC_EXT_FLAG_LOCAL_MIN_MAX); EXPECT_TRUE(kernelExtendedProperties.fp32Flags & FP_ATOMIC_EXT_FLAG_GLOBAL_LOAD_STORE); EXPECT_TRUE(kernelExtendedProperties.fp32Flags & FP_ATOMIC_EXT_FLAG_GLOBAL_ADD); EXPECT_TRUE(kernelExtendedProperties.fp32Flags & FP_ATOMIC_EXT_FLAG_GLOBAL_MIN_MAX); EXPECT_FALSE(kernelExtendedProperties.fp32Flags & FP_ATOMIC_EXT_FLAG_LOCAL_LOAD_STORE); EXPECT_FALSE(kernelExtendedProperties.fp32Flags & FP_ATOMIC_EXT_FLAG_LOCAL_ADD); EXPECT_FALSE(kernelExtendedProperties.fp32Flags & FP_ATOMIC_EXT_FLAG_LOCAL_MIN_MAX); EXPECT_TRUE(kernelExtendedProperties.fp64Flags & FP_ATOMIC_EXT_FLAG_GLOBAL_LOAD_STORE); EXPECT_TRUE(kernelExtendedProperties.fp64Flags & FP_ATOMIC_EXT_FLAG_GLOBAL_ADD); EXPECT_TRUE(kernelExtendedProperties.fp64Flags & FP_ATOMIC_EXT_FLAG_GLOBAL_MIN_MAX); EXPECT_FALSE(kernelExtendedProperties.fp64Flags & FP_ATOMIC_EXT_FLAG_LOCAL_LOAD_STORE); EXPECT_FALSE(kernelExtendedProperties.fp64Flags & FP_ATOMIC_EXT_FLAG_LOCAL_ADD); EXPECT_FALSE(kernelExtendedProperties.fp64Flags & FP_ATOMIC_EXT_FLAG_LOCAL_MIN_MAX); } } // namespace ult } // namespace L0compute-runtime-22.14.22890/level_zero/core/test/unit_tests/xe_hpg_core/000077500000000000000000000000001422164147700261125ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/xe_hpg_core/CMakeLists.txt000066400000000000000000000013411422164147700306510ustar00rootroot00000000000000# # Copyright (C) 2021-2022 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_XE_HPG_CORE) target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_xe_hpg_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdqueue_enqueuecommandlist_xe_hpg_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_device_xe_hpg_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_image_xe_hpg_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_module_xe_hpg_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_l0_hw_helper_xe_hpg_core.cpp ) add_subdirectoriesL0(${CMAKE_CURRENT_SOURCE_DIR} "*") endif() compute-runtime-22.14.22890/level_zero/core/test/unit_tests/xe_hpg_core/dg2/000077500000000000000000000000001422164147700265665ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/xe_hpg_core/dg2/CMakeLists.txt000066400000000000000000000005461422164147700313330ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_DG2) target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_sampler_dg2.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_dg2.cpp ) add_subdirectories() endif() compute-runtime-22.14.22890/level_zero/core/test/unit_tests/xe_hpg_core/dg2/test_cmdlist_dg2.cpp000066400000000000000000000047101422164147700325260ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/cmdlist/cmdlist_hw.h" #include "level_zero/core/source/xe_hpg_core/cmdlist_xe_hpg_core.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/fixtures/module_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_module.h" namespace L0 { namespace ult { using CommandListTests = Test; HWTEST2_F(CommandListTests, givenDG2WithBSteppingWhenCreatingCommandListThenAdditionalStateBaseAddressCmdIsAdded, IsDG2) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; ze_result_t returnValue; auto &hwInfo = *neoDevice->getRootDeviceEnvironment().getMutableHardwareInfo(); const auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily); hwInfo.platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_B, hwInfo); std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Compute, 0u, returnValue)); auto &commandContainer = commandList->commandContainer; ASSERT_NE(nullptr, commandContainer.getCommandStream()); auto usedSpaceBefore = commandContainer.getCommandStream()->getUsed(); auto result = commandList->close(); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandContainer.getCommandStream()->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); auto itor = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itor); auto cmdSba = genCmdCast(*itor); EXPECT_TRUE(cmdSba->getDynamicStateBaseAddressModifyEnable()); EXPECT_TRUE(cmdSba->getDynamicStateBufferSizeModifyEnable()); itor++; itor = find(itor, cmdList.end()); ASSERT_NE(cmdList.end(), itor); cmdSba = genCmdCast(*itor); EXPECT_TRUE(cmdSba->getDynamicStateBaseAddressModifyEnable()); EXPECT_TRUE(cmdSba->getDynamicStateBufferSizeModifyEnable()); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/xe_hpg_core/dg2/test_sampler_dg2.cpp000066400000000000000000000045101422164147700325300ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/utilities/numeric.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/sampler/sampler_hw.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_sampler.h" namespace L0 { namespace ult { using SamplerCreateTest = Test; HWTEST2_F(SamplerCreateTest, givenDg2WhenInitializeSamplerAndForceSamplerLowFilteringPrecisionIsFalseThenLowQualityFilterIsDisabled, IsDG2) { using SAMPLER_STATE = typename NEO::XE_HPG_COREFamily::SAMPLER_STATE; EXPECT_FALSE(DebugManager.flags.ForceSamplerLowFilteringPrecision.get()); ze_sampler_address_mode_t addressMode = ZE_SAMPLER_ADDRESS_MODE_REPEAT; ze_sampler_filter_mode_t filterMode = ZE_SAMPLER_FILTER_MODE_NEAREST; ze_bool_t isNormalized = true; ze_sampler_desc_t desc = {}; desc.addressMode = addressMode; desc.filterMode = filterMode; desc.isNormalized = isNormalized; auto sampler = static_cast *>((*samplerFactory[IGFX_DG2])()); sampler->initialize(device, &desc); EXPECT_EQ(SAMPLER_STATE::LOW_QUALITY_FILTER_DISABLE, sampler->samplerState.getLowQualityFilter()); sampler->destroy(); } HWTEST2_F(SamplerCreateTest, givenDg2WhenInitializeSamplerAndForceSamplerLowFilteringPrecisionIsTrueThenLowQualityFilterIsEnabled, IsDG2) { using SAMPLER_STATE = typename NEO::XE_HPG_COREFamily::SAMPLER_STATE; DebugManagerStateRestore dbgRestore; DebugManager.flags.ForceSamplerLowFilteringPrecision.set(true); EXPECT_TRUE(DebugManager.flags.ForceSamplerLowFilteringPrecision.get()); ze_sampler_address_mode_t addressMode = ZE_SAMPLER_ADDRESS_MODE_REPEAT; ze_sampler_filter_mode_t filterMode = ZE_SAMPLER_FILTER_MODE_NEAREST; ze_bool_t isNormalized = true; ze_sampler_desc_t desc = {}; desc.addressMode = addressMode; desc.filterMode = filterMode; desc.isNormalized = isNormalized; auto sampler = static_cast *>((*samplerFactory[IGFX_DG2])()); sampler->initialize(device, &desc); EXPECT_EQ(SAMPLER_STATE::LOW_QUALITY_FILTER_ENABLE, sampler->samplerState.getLowQualityFilter()); sampler->destroy(); } } // namespace ult } // namespace L0 enable_l0_mocks_xe_hpg_core.cpp000066400000000000000000000006201422164147700341140ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/xe_hpg_core/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/test/unit_tests/mocks/mock_l0_debugger.h" namespace NEO { struct XE_HPG_COREFamily; using GfxFamily = XE_HPG_COREFamily; } // namespace NEO namespace L0 { namespace ult { static MockDebuggerL0HwPopulateFactory mockDebuggerXeHpgCore; } } // namespace L0compute-runtime-22.14.22890/level_zero/core/test/unit_tests/xe_hpg_core/test_cmdlist_xe_hpg_core.cpp000066400000000000000000000475151422164147700336720ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/l3_range.h" #include "shared/source/helpers/register_offsets.h" #include "shared/source/helpers/state_base_address.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/xe_hpg_core/cmdlist_xe_hpg_core.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_kernel.h" #include "level_zero/core/test/unit_tests/mocks/mock_module.h" #include "test_traits_platforms_common.h" namespace L0 { namespace ult { using CommandListCreate = Test; HWTEST2_F(CommandListCreate, WhenCreatingCommandListThenBindingTablePoolAllocAddedToBatchBuffer, IsXeHpgCore) { using _3DSTATE_BINDING_TABLE_POOL_ALLOC = typename FamilyType::_3DSTATE_BINDING_TABLE_POOL_ALLOC; ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Compute, 0u, returnValue)); auto &commandContainer = commandList->commandContainer; auto gmmHelper = commandContainer.getDevice()->getGmmHelper(); ASSERT_NE(nullptr, commandContainer.getCommandStream()); auto usedSpaceBefore = commandContainer.getCommandStream()->getUsed(); auto result = commandList->close(); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandContainer.getCommandStream()->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); auto itor = find<_3DSTATE_BINDING_TABLE_POOL_ALLOC *>(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itor); { uint32_t streamBuffer[50] = {}; NEO::LinearStream linearStream(streamBuffer, sizeof(streamBuffer)); NEO::StateBaseAddressHelper::programBindingTableBaseAddress( linearStream, *commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE), gmmHelper); auto expectedCommand = reinterpret_cast<_3DSTATE_BINDING_TABLE_POOL_ALLOC *>(streamBuffer); auto programmedCommand = genCmdCast<_3DSTATE_BINDING_TABLE_POOL_ALLOC *>(*itor); EXPECT_EQ(0, memcmp(expectedCommand, programmedCommand, sizeof(_3DSTATE_BINDING_TABLE_POOL_ALLOC))); } } HWTEST2_F(CommandListCreate, givenNotCopyCommandListWhenProfilingEventBeforeCommandThenStoreRegMemAdded, IsXeHpgCore) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using MI_LOAD_REGISTER_REG = typename GfxFamily::MI_LOAD_REGISTER_REG; auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Compute, 0u); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = 0; eventDesc.wait = 0; ze_result_t result = ZE_RESULT_SUCCESS; auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); commandList->appendEventForProfiling(event->toHandle(), true); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); auto cmd = genCmdCast(*itor); EXPECT_EQ(cmd->getSourceRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW); itor++; itor = find(itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); cmd = genCmdCast(*itor); EXPECT_EQ(cmd->getSourceRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW); } HWTEST2_F(CommandListCreate, givenNotCopyCommandListWhenProfilingEventAfterCommandThenPipeControlAndStoreRegMemAdded, IsXeHpgCore) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using MI_LOAD_REGISTER_REG = typename GfxFamily::MI_LOAD_REGISTER_REG; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Compute, 0u); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.signal = 0; eventDesc.wait = 0; ze_result_t result = ZE_RESULT_SUCCESS; auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); commandList->appendEventForProfiling(event->toHandle(), false); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); itor++; itor = find(itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); auto cmd = genCmdCast(*itor); EXPECT_EQ(cmd->getSourceRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW); itor++; itor = find(itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); cmd = genCmdCast(*itor); EXPECT_EQ(cmd->getSourceRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW); } HWTEST2_F(CommandListCreate, givenCopyCommandListWhenProfilingEventThenStoreRegCommandIsAdded, IsXeHpgCore) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; ze_result_t result = ZE_RESULT_SUCCESS; auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); commandList->appendEventForProfiling(event->toHandle(), false); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); } HWTEST2_F(CommandListCreate, givenAllocationsWhenAppendRangesBarrierThenL3ControlIsProgrammed, IsXeHpgCore) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using L3_CONTROL = typename GfxFamily::L3_CONTROL; auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); uint64_t gpuAddress = 0x1200; void *buffer = reinterpret_cast(gpuAddress); size_t size = 0x1100; NEO::MockGraphicsAllocation mockAllocation(buffer, gpuAddress, size); NEO::SvmAllocationData allocData(0); allocData.size = size; allocData.gpuAllocations.addAllocation(&mockAllocation); device->getDriverHandle()->getSvmAllocsManager()->insertSVMAlloc(allocData); const void *ranges[] = {buffer}; const size_t sizes[] = {size}; commandList->applyMemoryRangesBarrier(1, sizes, ranges); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); EXPECT_EQ(cmdList.end(), ++itor); } HWTEST2_F(CommandListCreate, givenAllocationWithSizeTooBigForL3ControlWhenAppendRangesBarrierThenTwoL3ControlAreProgrammed, IsXeHpgCore) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using L3_CONTROL = typename GfxFamily::L3_CONTROL; auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); uint64_t gpuAddress = 0x2000; void *buffer = reinterpret_cast(gpuAddress); size_t size = NEO::L3Range::maxSingleRange * (NEO::maxFlushSubrangeCount + 1); NEO::MockGraphicsAllocation mockAllocation(buffer, gpuAddress, size); NEO::SvmAllocationData allocData(0); allocData.size = size; allocData.gpuAllocations.addAllocation(&mockAllocation); device->getDriverHandle()->getSvmAllocsManager()->insertSVMAlloc(allocData); const void *ranges[] = {buffer}; const size_t sizes[] = {size}; commandList->applyMemoryRangesBarrier(1, sizes, ranges); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); EXPECT_NE(cmdList.end(), ++itor); } HWTEST2_F(CommandListCreate, givenRangeSizeTwiceBiggerThanAllocWhenAppendRangesBarrierThenL3ControlIsNotProgrammed, IsXeHpgCore) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using L3_CONTROL = typename GfxFamily::L3_CONTROL; auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); uint64_t gpuAddress = 0x1200; void *buffer = reinterpret_cast(gpuAddress); size_t size = 0x1000; NEO::MockGraphicsAllocation mockAllocation(buffer, gpuAddress, size); NEO::SvmAllocationData allocData(0); allocData.size = size; allocData.gpuAllocations.addAllocation(&mockAllocation); device->getDriverHandle()->getSvmAllocsManager()->insertSVMAlloc(allocData); const void *ranges[] = {buffer}; const size_t sizes[] = {2 * size}; commandList->applyMemoryRangesBarrier(1, sizes, ranges); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(cmdList.end(), itor); } HWTEST2_F(CommandListCreate, givenRangeNotInSvmManagerThanAllocWhenAppendRangesBarrierThenL3ControlIsNotProgrammed, IsXeHpgCore) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using L3_CONTROL = typename GfxFamily::L3_CONTROL; auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); uint64_t gpuAddress = 0x1200; void *buffer = reinterpret_cast(gpuAddress); size_t size = 0x1000; const void *ranges[] = {buffer}; const size_t sizes[] = {size}; commandList->applyMemoryRangesBarrier(1, sizes, ranges); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(cmdList.end(), itor); } HWTEST2_F(CommandListCreate, givenRangeNotAlignedToPageWhenAppendRangesBarrierThenCommandAdressIsAligned, IsXeHpgCore) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using L3_CONTROL = typename GfxFamily::L3_CONTROL; using L3_FLUSH_ADDRESS_RANGE = typename GfxFamily::L3_FLUSH_ADDRESS_RANGE; auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); uint64_t gpuAddress = 0x1200; void *buffer = reinterpret_cast(gpuAddress); size_t size = 0x1100; NEO::MockGraphicsAllocation mockAllocation(buffer, gpuAddress, size); NEO::SvmAllocationData allocData(0); allocData.size = size; allocData.gpuAllocations.addAllocation(&mockAllocation); device->getDriverHandle()->getSvmAllocsManager()->insertSVMAlloc(allocData); const void *ranges[] = {buffer}; const size_t sizes[] = {size}; commandList->applyMemoryRangesBarrier(1, sizes, ranges); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); auto programmedCommand = genCmdCast(*itor); programmedCommand++; L3_FLUSH_ADDRESS_RANGE *l3Ranges = reinterpret_cast(programmedCommand); EXPECT_EQ(l3Ranges->getAddress(), alignDown(gpuAddress, MemoryConstants::pageSize)); } HWTEST2_F(CommandListCreate, givenRangeBetweenTwoPagesWhenAppendRangesBarrierThenAddressMaskIsCorrect, IsXeHpgCore) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using L3_CONTROL = typename GfxFamily::L3_CONTROL; using L3_FLUSH_ADDRESS_RANGE = typename GfxFamily::L3_FLUSH_ADDRESS_RANGE; auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); uint64_t gpuAddress = 2 * MemoryConstants::pageSize + MemoryConstants::pageSize / 2; void *buffer = reinterpret_cast(gpuAddress); size_t size = MemoryConstants::pageSize / 2 + 1; NEO::MockGraphicsAllocation mockAllocation(buffer, gpuAddress, size); NEO::SvmAllocationData allocData(0); allocData.size = size; allocData.gpuAllocations.addAllocation(&mockAllocation); device->getDriverHandle()->getSvmAllocsManager()->insertSVMAlloc(allocData); const void *ranges[] = {buffer}; const size_t sizes[] = {size}; commandList->applyMemoryRangesBarrier(1, sizes, ranges); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); auto programmedCommand = genCmdCast(*itor); programmedCommand++; L3_FLUSH_ADDRESS_RANGE *l3Ranges = reinterpret_cast(programmedCommand); EXPECT_EQ(l3Ranges->getAddressMask(), NEO::L3Range::getMaskFromSize(2 * MemoryConstants::pageSize)); } template struct CommandListAdjustStateComputeMode : public WhiteBox<::L0::CommandListProductFamily> { CommandListAdjustStateComputeMode() : WhiteBox<::L0::CommandListProductFamily>(1) {} using ::L0::CommandListProductFamily::updateStreamProperties; using ::L0::CommandListProductFamily::finalStreamState; }; struct ProgramAllFieldsInComputeMode { template static constexpr bool isMatched() { if constexpr (NEO::ToGfxCoreFamily::get() != IGFX_XE_HPG_CORE) { return false; } else { return !TestTraitsPlatforms::programOnlyChangedFieldsInComputeStateMode; } } }; HWTEST2_F(CommandListCreate, GivenComputeModePropertiesWhenUpdateStreamPropertiesIsCalledTwiceThenFieldsChanged, ProgramAllFieldsInComputeMode) { DebugManagerStateRestore restorer; Mock<::L0::Kernel> kernel; auto pMockModule = std::unique_ptr(new Mock(device, nullptr)); kernel.module = pMockModule.get(); auto pCommandList = std::make_unique>(); auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); ASSERT_EQ(ZE_RESULT_SUCCESS, result); const_cast(&kernel.getKernelDescriptor())->kernelAttributes.numGrfRequired = 0x100; pCommandList->updateStreamProperties(kernel, false, false); EXPECT_TRUE(pCommandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty); EXPECT_TRUE(pCommandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty); const_cast(&kernel.getKernelDescriptor())->kernelAttributes.numGrfRequired = 0x80; pCommandList->updateStreamProperties(kernel, false, false); EXPECT_TRUE(pCommandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty); EXPECT_FALSE(pCommandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty); } struct ProgramDirtyFieldsInComputeMode { template static constexpr bool isMatched() { if constexpr (NEO::ToGfxCoreFamily::get() != IGFX_XE_HPG_CORE) { return false; } else { return TestTraitsPlatforms::programOnlyChangedFieldsInComputeStateMode; } } }; HWTEST2_F(CommandListCreate, GivenComputeModePropertiesWhenUpdateStreamPropertiesIsCalledTwiceDirtyFieldsChanged, ProgramDirtyFieldsInComputeMode) { DebugManagerStateRestore restorer; Mock<::L0::Kernel> kernel; auto pMockModule = std::unique_ptr(new Mock(device, nullptr)); kernel.module = pMockModule.get(); auto pCommandList = std::make_unique>(); auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); ASSERT_EQ(ZE_RESULT_SUCCESS, result); const_cast(&kernel.getKernelDescriptor())->kernelAttributes.numGrfRequired = 0x100; pCommandList->updateStreamProperties(kernel, false, false); EXPECT_TRUE(pCommandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty); EXPECT_TRUE(pCommandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty); const_cast(&kernel.getKernelDescriptor())->kernelAttributes.numGrfRequired = 0x80; pCommandList->updateStreamProperties(kernel, false, false); EXPECT_TRUE(pCommandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty); EXPECT_FALSE(pCommandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty); } } // namespace ult } // namespace L0 test_cmdqueue_enqueuecommandlist_xe_hpg_core.cpp000066400000000000000000000101241422164147700377300ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/xe_hpg_core/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/linear_stream.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h" #include "level_zero/core/test/unit_tests/mocks/mock_device.h" #include "level_zero/core/test/unit_tests/mocks/mock_memory_manager.h" #include #include "gtest/gtest.h" namespace L0 { namespace ult { using CommandQueueExecuteCommandListsXeHpgCore = Test; XE_HPG_CORETEST_F(CommandQueueExecuteCommandListsXeHpgCore, WhenExecutingCmdListsThenPipelineSelectAndCfeStateAreAddedToCmdBuffer) { const ze_command_queue_desc_t desc = {}; ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create( productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc, false, false, returnValue)); ASSERT_NE(nullptr, commandQueue->commandStream); auto usedSpaceBefore = commandQueue->commandStream->getUsed(); ze_command_list_handle_t commandLists[] = { CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle()}; uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]); auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandQueue->commandStream->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter)); using CFE_STATE = typename FamilyType::CFE_STATE; auto itorCFE = find(cmdList.begin(), cmdList.end()); ASSERT_NE(itorCFE, cmdList.end()); // Should have a PS before a CFE using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT; auto itorPS = find(cmdList.begin(), itorCFE); ASSERT_NE(itorPS, itorCFE); { auto cmd = genCmdCast(*itorPS); EXPECT_EQ(cmd->getMaskBits() & 3u, 3u); EXPECT_EQ(cmd->getPipelineSelection(), PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU); } CommandList::fromHandle(commandLists[0])->destroy(); commandQueue->destroy(); } XE_HPG_CORETEST_F(CommandQueueExecuteCommandListsXeHpgCore, WhenExecutingCmdListsThenStateBaseAddressForGeneralStateBaseAddressIsNotAdded) { const ze_command_queue_desc_t desc = {}; ze_result_t returnValue; auto commandQueue = whitebox_cast(CommandQueue::create( productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc, false, false, returnValue)); ASSERT_NE(nullptr, commandQueue->commandStream); auto usedSpaceBefore = commandQueue->commandStream->getUsed(); ze_command_list_handle_t commandLists[] = { CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle()}; uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]); auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandQueue->commandStream->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter)); using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; auto itorSba = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(itorSba, cmdList.end()); CommandList::fromHandle(commandLists[0])->destroy(); commandQueue->destroy(); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/xe_hpg_core/test_device_xe_hpg_core.cpp000066400000000000000000000151251422164147700334620ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" namespace L0 { namespace ult { using CommandQueueGroupTest = Test; HWTEST2_F(CommandQueueGroupTest, givenNoBlitterSupportAndNoCCSThenOneQueueGroupIsReturned, IsXeHpgCore) { const uint32_t rootDeviceIndex = 0u; NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get(); hwInfo.featureTable.flags.ftrCCSNode = false; hwInfo.capabilityTable.blitterOperationsSupported = false; auto *neoMockDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, rootDeviceIndex); Mock deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment()); uint32_t count = 0; ze_result_t res = deviceImp.getCommandQueueGroupProperties(&count, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_GE(count, 1u); } HWTEST2_F(CommandQueueGroupTest, givenNoBlitterSupportAndCCSThenTwoQueueGroupsAreReturned, IsXeHpgCore) { const uint32_t rootDeviceIndex = 0u; NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get(); hwInfo.featureTable.flags.ftrCCSNode = true; hwInfo.capabilityTable.blitterOperationsSupported = false; auto *neoMockDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, rootDeviceIndex); Mock deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment()); uint32_t count = 0; ze_result_t res = deviceImp.getCommandQueueGroupProperties(&count, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_GE(count, 2u); } HWTEST2_F(CommandQueueGroupTest, givenBlitterSupportAndCCSThenThreeQueueGroupsAreReturned, IsXeHpgCore) { const uint32_t rootDeviceIndex = 0u; NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get(); hwInfo.featureTable.flags.ftrCCSNode = true; hwInfo.capabilityTable.blitterOperationsSupported = true; hwInfo.featureTable.ftrBcsInfo.set(0); auto *neoMockDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, rootDeviceIndex); Mock deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment()); uint32_t count = 0; ze_result_t res = deviceImp.getCommandQueueGroupProperties(&count, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_GE(count, 3u); std::vector properties(count); res = deviceImp.getCommandQueueGroupProperties(&count, properties.data()); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto &engineGroups = neoMockDevice->getRegularEngineGroups(); for (uint32_t i = 0; i < count; i++) { if (engineGroups[i].engineGroupType == NEO::EngineGroupType::RenderCompute) { EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE); EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY); EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COOPERATIVE_KERNELS); EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_METRICS); EXPECT_EQ(properties[i].numQueues, 1u); EXPECT_EQ(properties[i].maxMemoryFillPatternSize, std::numeric_limits::max()); } else if (engineGroups[i].engineGroupType == NEO::EngineGroupType::Compute) { EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE); EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY); EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COOPERATIVE_KERNELS); uint32_t numerOfCCSEnabled = hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled; EXPECT_EQ(properties[i].numQueues, numerOfCCSEnabled); EXPECT_EQ(properties[i].maxMemoryFillPatternSize, std::numeric_limits::max()); } else if (engineGroups[i].engineGroupType == NEO::EngineGroupType::Copy) { EXPECT_TRUE(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY); EXPECT_EQ(properties[i].numQueues, hwInfo.featureTable.ftrBcsInfo.count()); EXPECT_EQ(properties[i].maxMemoryFillPatternSize, 4 * sizeof(uint32_t)); } } } class DeviceCopyQueueGroupFixture : public DeviceFixture { public: void SetUp() { DebugManager.flags.EnableBlitterOperationsSupport.set(0); DeviceFixture::SetUp(); } void TearDown() { DeviceFixture::TearDown(); } DebugManagerStateRestore restorer; }; using DeviceCopyQueueGroupTest = Test; HWTEST2_F(DeviceCopyQueueGroupTest, givenBlitterSupportAndEnableBlitterOperationsSupportSetToZeroThenNoCopyEngineIsReturned, IsXeHpgCore) { const uint32_t rootDeviceIndex = 0u; NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get(); hwInfo.featureTable.flags.ftrCCSNode = false; hwInfo.capabilityTable.blitterOperationsSupported = true; hwInfo.featureTable.ftrBcsInfo.set(0); auto *neoMockDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, rootDeviceIndex); Mock deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment()); uint32_t count = 0; ze_result_t res = deviceImp.getCommandQueueGroupProperties(&count, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); std::vector properties(count); res = deviceImp.getCommandQueueGroupProperties(&count, properties.data()); EXPECT_EQ(ZE_RESULT_SUCCESS, res); for (auto &engineGroup : neoMockDevice->getRegularEngineGroups()) { EXPECT_NE(NEO::EngineGroupType::Copy, engineGroup.engineGroupType); } } using TestDeviceXeHpgCore = Test; HWTEST2_F(TestDeviceXeHpgCore, givenReturnedDevicePropertiesThenExpectedPropertyFlagsSet, IsXeHpgCore) { ze_device_properties_t deviceProps = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; device->getProperties(&deviceProps); EXPECT_EQ(0u, deviceProps.flags & ZE_DEVICE_PROPERTY_FLAG_ONDEMANDPAGING); EXPECT_EQ(0u, deviceProps.flags & ZE_DEVICE_PROPERTY_FLAG_ECC); EXPECT_EQ(0u, deviceProps.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE); if (defaultHwInfo->capabilityTable.isIntegratedDevice) { EXPECT_EQ(ZE_DEVICE_PROPERTY_FLAG_INTEGRATED, deviceProps.flags & ZE_DEVICE_PROPERTY_FLAG_INTEGRATED); } else { EXPECT_EQ(0u, deviceProps.flags & ZE_DEVICE_PROPERTY_FLAG_INTEGRATED); } } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/xe_hpg_core/test_image_xe_hpg_core.cpp000066400000000000000000000071121422164147700333020ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/linear_stream.h" #include "shared/source/device/device.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/indirect_heap/indirect_heap.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/image/image.h" #include "level_zero/core/source/image/image_hw.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" namespace L0 { namespace ult { using ImageCreate = Test; HWTEST2_F(ImageCreate, WhenGettingImagePropertiesThenPropertiesSetCorrectly, IsXeHpgCore) { ze_image_properties_t properties; ze_image_desc_t desc = {}; desc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; desc.type = ZE_IMAGE_TYPE_3D; desc.format.layout = ZE_IMAGE_FORMAT_LAYOUT_8_8_8_8; desc.format.type = ZE_IMAGE_FORMAT_TYPE_UINT; desc.format.x = desc.format.y = desc.format.z = desc.format.w = ZE_IMAGE_FORMAT_SWIZZLE_R; desc.width = 10; desc.height = 10; desc.depth = 10; auto result = device->imageGetProperties(&desc, &properties); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto samplerFilterFlagsValid = (properties.samplerFilterFlags == ZE_IMAGE_SAMPLER_FILTER_FLAG_POINT) || (properties.samplerFilterFlags == ZE_IMAGE_SAMPLER_FILTER_FLAG_LINEAR); EXPECT_TRUE(samplerFilterFlagsValid); } HWTEST2_F(ImageCreate, WhenDestroyingImageThenSuccessIsReturned, IsXeHpgCore) { ze_image_desc_t desc = {}; desc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; L0::Image *image_ptr; auto result = Image::create(productFamily, device, &desc, &image_ptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto image = whitebox_cast(image_ptr); ASSERT_NE(nullptr, image); result = zeImageDestroy(image->toHandle()); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } HWTEST2_F(ImageCreate, WhenCreatingImageThenSuccessIsReturned, IsXeHpgCore) { ze_image_desc_t desc = {}; desc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; L0::Image *image_ptr; auto result = Image::create(productFamily, device, &desc, &image_ptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto image = whitebox_cast(image_ptr); ASSERT_NE(nullptr, image); image->destroy(); } HWTEST2_F(ImageCreate, givenInvalidProductFamilyThenReturnNullPointer, IsXeHpgCore) { ze_image_desc_t desc = {}; desc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; L0::Image *image_ptr; auto result = Image::create(IGFX_UNKNOWN, device, &desc, &image_ptr); ASSERT_NE(ZE_RESULT_SUCCESS, result); auto image = whitebox_cast(image_ptr); ASSERT_EQ(nullptr, image); } HWTEST2_F(ImageCreate, WhenImagesIsCreatedThenParamsSetCorrectly, IsXeHpgCore) { ze_image_desc_t desc = {}; desc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; desc.type = ZE_IMAGE_TYPE_3D; desc.format.layout = ZE_IMAGE_FORMAT_LAYOUT_8_8_8_8; desc.format.type = ZE_IMAGE_FORMAT_TYPE_UINT; desc.format.x = desc.format.y = desc.format.z = desc.format.w = ZE_IMAGE_FORMAT_SWIZZLE_R; desc.width = 10; desc.height = 10; desc.depth = 10; L0::Image *image_ptr; auto result = Image::create(productFamily, device, &desc, &image_ptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto image = whitebox_cast(image_ptr); ASSERT_NE(nullptr, image); auto alloc = image->getAllocation(); ASSERT_NE(nullptr, alloc); image->destroy(); } } // namespace ult } // namespace L0 test_l0_hw_helper_xe_hpg_core.cpp000066400000000000000000000017651422164147700345210ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/core/test/unit_tests/xe_hpg_core/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" namespace L0 { namespace ult { HWTEST_EXCLUDE_PRODUCT(L0HwHelperTest, givenBitmaskWithAttentionBitsForSingleThreadWhenGettingThreadsThenSingleCorrectThreadReturned, IGFX_XE_HPG_CORE); HWTEST_EXCLUDE_PRODUCT(L0HwHelperTest, givenBitmaskWithAttentionBitsForAllSubslicesWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_XE_HPG_CORE); HWTEST_EXCLUDE_PRODUCT(L0HwHelperTest, givenBitmaskWithAttentionBitsForAllEUsWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_XE_HPG_CORE); HWTEST_EXCLUDE_PRODUCT(L0HwHelperTest, givenEu0To1Threads0To3BitmaskWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_XE_HPG_CORE); HWTEST_EXCLUDE_PRODUCT(L0HwHelperTest, givenBitmaskWithAttentionBitsForHalfOfThreadsWhenGettingThreadsThenCorrectThreadsAreReturned, IGFX_XE_HPG_CORE); } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/core/test/unit_tests/xe_hpg_core/test_module_xe_hpg_core.cpp000066400000000000000000000047041422164147700335110ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/kernel/kernel_properties.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" namespace L0 { namespace ult { using KernelPropertyTest = Test; HWTEST2_F(KernelPropertyTest, givenKernelExtendedPropertiesStructureWhenKernelPropertiesCalledThenPropertiesAreCorrectlySet, IsXeHpgCore) { ze_device_module_properties_t kernelProperties = {}; ze_float_atomic_ext_properties_t kernelExtendedProperties = {}; kernelExtendedProperties.stype = ZE_STRUCTURE_TYPE_FLOAT_ATOMIC_EXT_PROPERTIES; kernelProperties.pNext = &kernelExtendedProperties; ze_result_t res = device->getKernelProperties(&kernelProperties); EXPECT_EQ(res, ZE_RESULT_SUCCESS); EXPECT_FALSE(kernelExtendedProperties.fp16Flags & FP_ATOMIC_EXT_FLAG_GLOBAL_LOAD_STORE); EXPECT_FALSE(kernelExtendedProperties.fp16Flags & FP_ATOMIC_EXT_FLAG_GLOBAL_ADD); EXPECT_FALSE(kernelExtendedProperties.fp16Flags & FP_ATOMIC_EXT_FLAG_GLOBAL_MIN_MAX); EXPECT_FALSE(kernelExtendedProperties.fp16Flags & FP_ATOMIC_EXT_FLAG_LOCAL_LOAD_STORE); EXPECT_FALSE(kernelExtendedProperties.fp16Flags & FP_ATOMIC_EXT_FLAG_LOCAL_ADD); EXPECT_FALSE(kernelExtendedProperties.fp16Flags & FP_ATOMIC_EXT_FLAG_LOCAL_MIN_MAX); EXPECT_FALSE(kernelExtendedProperties.fp32Flags & FP_ATOMIC_EXT_FLAG_GLOBAL_LOAD_STORE); EXPECT_TRUE(kernelExtendedProperties.fp32Flags & FP_ATOMIC_EXT_FLAG_GLOBAL_ADD); EXPECT_FALSE(kernelExtendedProperties.fp32Flags & FP_ATOMIC_EXT_FLAG_GLOBAL_MIN_MAX); EXPECT_FALSE(kernelExtendedProperties.fp32Flags & FP_ATOMIC_EXT_FLAG_LOCAL_LOAD_STORE); EXPECT_FALSE(kernelExtendedProperties.fp32Flags & FP_ATOMIC_EXT_FLAG_LOCAL_ADD); EXPECT_FALSE(kernelExtendedProperties.fp32Flags & FP_ATOMIC_EXT_FLAG_LOCAL_MIN_MAX); EXPECT_FALSE(kernelExtendedProperties.fp64Flags & FP_ATOMIC_EXT_FLAG_GLOBAL_LOAD_STORE); EXPECT_FALSE(kernelExtendedProperties.fp64Flags & FP_ATOMIC_EXT_FLAG_GLOBAL_ADD); EXPECT_FALSE(kernelExtendedProperties.fp64Flags & FP_ATOMIC_EXT_FLAG_GLOBAL_MIN_MAX); EXPECT_FALSE(kernelExtendedProperties.fp64Flags & FP_ATOMIC_EXT_FLAG_LOCAL_LOAD_STORE); EXPECT_FALSE(kernelExtendedProperties.fp64Flags & FP_ATOMIC_EXT_FLAG_LOCAL_ADD); EXPECT_FALSE(kernelExtendedProperties.fp64Flags & FP_ATOMIC_EXT_FLAG_LOCAL_MIN_MAX); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/ddi/000077500000000000000000000000001422164147700202605ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/ddi/ze_ddi_tables.h000066400000000000000000000012651422164147700232250ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include #include #include #include #include #include extern "C" { typedef struct _ze_gpu_driver_dditable_t { ze_dditable_t ddiTable; ze_dditable_t core_ddiTable; ze_dditable_t tracing_ddiTable; zet_dditable_t tools_ddiTable; zes_dditable_t sysman_ddiTable; ze_api_version_t version = ZE_API_VERSION_1_0; bool enableTracing; } ze_gpu_driver_dditable_t; extern ze_gpu_driver_dditable_t driver_ddiTable; } // extern "C" compute-runtime-22.14.22890/level_zero/doc/000077500000000000000000000000001422164147700202655ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/doc/BUILD.md000066400000000000000000000041641422164147700214530ustar00rootroot00000000000000 # Building Level Zero These instructions have been tested on Ubuntu* and complement those existing for NEO in the top-level BUILD.md file. 1. Install/build Level Zero loader and Level Zero headers Install Level Zero loader and headers from [https://github.com/oneapi-src/level-zero/releases](https://github.com/oneapi-src/level-zero/releases). For execution, only the level-zero package is needed; for compilation, level-zero-devel package is also required. Alternatively, build Level Zero loader from source, as indicated in [https://github.com/oneapi-src/level-zero](https://github.com/oneapi-src/level-zero). Build will generate ze_loader library and symlinks, as well as those for ze_validation_layer. 2. Install/build Level Zero driver Install Level Zero package from [https://github.com/intel/compute-runtime/releases](https://github.com/intel/compute-runtime/releases). Alternatively, follow instructions in top-level BUILD.md file to build NEO. Level Zero is built by default. When built, ze_intel_gpu library and symlinks are generated. 3. Build your application Compilation needs to include the Level Zero headers and to link against the loader library: ```shell g++ zello_world_gpu.cpp -o zello_world_gpu -lze_loader ``` If libraries not installed in system paths, include Level Zero headers and path to Level Zero loader: ```shell g++ -I zello_world_gpu.cpp -o zello_world_gpu -L -lze_loader ``` 4. Execute your application If Level Zero loader packages have been built and installed in the system, then they will be present in system paths: ```shell ./zello_world_gpu ``` Sample output: ```shell Device : * name : Intel(R) Graphics Gen9 [0x5912] * type : GPU * vendorId : 8086 Zello World Results validation PASSED ``` If libraries not installed in system paths, add paths to ze_loader and ze_intel_gpu libraries: ```shell LD_LIBRARY_PATH=: ./zello_world_gpu ``` ___(*) Other names and brands may be claimed as property of others.___compute-runtime-22.14.22890/level_zero/doc/RELEASENOTES_CORE.md000066400000000000000000000321341422164147700233130ustar00rootroot00000000000000 # Release Notes v1.3 Level Zero Core API. January 2022 ## Changes in this release: ### Implict Scaling Implicit scaling has been enabled by default on Level Zero on Xe HPC (PVC) B and later steppings. The `EnableImplicitScaling` debug key may be used to enable (`EnableImplicitScaling=1`) or disable (`EnableImplicitScaling=0`) implicit scaling on on Xe HPC and other multi-tile architectures. ### [Blocking Free](https://spec.oneapi.io/level-zero/latest/core/api.html#zememfreeext) The blocking free memory policy has been implemented for `zeMemFreeExt` extension. Defer free policy will be added in upcoming releases. ### [PCI Properties Extension](https://spec.oneapi.io/level-zero/latest/core/EXT_PCIProperties.html#pci-properties-extension) Support for PCI properties extension has been added via `zeDevicePciGetPropertiesExt` interface. This currently provides access to device's BDF address only. Device bandwidth property will be exposed in future based on support from underlying components ### [Memory Compression Hints](https://spec.oneapi.io/level-zero/latest/core/EXT_MemoryCompressionHints.html#memory-compression-hints-extension) Memory compression hints for shared and device memory allocations and images have been added. ### Sampler Address Modes Fix Level Zero driver had a bug in the implementation of the ZE_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER and ZE_SAMPLER_ADDRESS_MODE_CLAMP address modes, where this were being implemented invertedly. This is now fixed and users can use driver's version to determine which address mode to use. Details on how DPC++ is handling this can be found in: [https://github.com/intel/llvm/blob/756c2e8fb45e44b51b32bd8a22b3c325f17bb5c9/sycl/plugins/level_zero/pi_level_zero.cpp#L5264?] # Release Notes v1.2 Level Zero Core API. August 2021 ## Changes in this release: ### [Extension to create image views for planar formats](https://spec.oneapi.com/level-zero/latest/core/api.html?highlight=relaxed#relaxedalloclimits-enums) This extension allows accessing each plane for planar formats and have different interpretations of created images. Sample code: [https://github.com/intel/compute-runtime/blob/master/level_zero/core/test/black_box_tests/zello_image_view.cpp](https://github.com/intel/compute-runtime/blob/master/level_zero/core/test/black_box_tests/zello_image_view.cpp) ### [Extension for querying image properties](https://spec.oneapi.io/level-zero/latest/core/api.html?highlight=ze_image_memory_properties_exp_t#_CPPv432ze_image_memory_properties_exp_t) This extension allows querying the different properties of an image, such as size, row pitch, and slice pitch. ### [Definition of ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES_1_2 properties](https://spec.oneapi.io/level-zero/latest/core/api.html?highlight=ze_structure_type_device_properties_1_2#_CPPv439ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES_1_2) `ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES_1_2` properties allows users to request driver to return timer resolution in cycles per seconds, as defined v1.2 specification: ```cpp ze_api_version_t version; zeDriverGetApiVersion(hDriver, &version); ... ze_device_properties_t devProperties = {}; devProperties->stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES_1_2; zeDeviceGetProperties(device, &devProperties); uint64_t timerResolutionInCyclesPerSecond = devProperties.timerResolution; ``` If `ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES_1_2` is not set, then timer resolution is returned in nanoseconds, as defined in v1.1. ```cpp ze_api_version_t version; zeDriverGetApiVersion(hDriver, &version); ... ze_device_properties_t devProperties = {}; zeDeviceGetProperties(device, &devProperties); uint64_t timerResolutionInNanoSeconds = devProperties.timerResolution; ``` ### Extension to set preferred allocation for USM shared allocations [`ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_INITIAL_PLACEMENT`](https://spec.oneapi.io/level-zero/latest/core/api.html?highlight=mem_alloc_flag_bias_initial_placement#_CPPv447ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_INITIAL_PLACEMENT) and [`ZE_HOST_MEM_ALLOC_FLAG_BIAS_INITIAL_PLACEMENT`](https://spec.oneapi.io/level-zero/latest/core/api.html?highlight=mem_alloc_flag_bias_initial_placement#_CPPv445ZE_HOST_MEM_ALLOC_FLAG_BIAS_INITIAL_PLACEMENT) can now be set in `ze_device_mem_alloc_flags_t` and `ze_host_mem_alloc_flags_t`, respectively, when creating a shared-alloaction, to indicate the driver where a shared-allocation should be initially placed. ### [IPC Memory Cache Bias Flags](https://spec.oneapi.io/level-zero/latest/core/api.html?highlight=ze_ipc_memory_flag_bias_cached#ze-ipc-memory-flags-t) `ZE_IPC_MEMORY_FLAG_BIAS_CACHED` and `ZE_IPC_MEMORY_FLAG_BIAS_UNCACHED ` can be passed when opening an IPC memory handle with `zeMemOpenIpcHandle` to set the cache settings of the imported allocation. ### [Support for preferred group size](https://spec.oneapi.io/level-zero/latest/core/api.html?highlight=ze_kernel_preferred_group_size_properties_t#ze-kernel-preferred-group-size-properties-t) `ze_kernel_preferred_group_size_properties_t` can be used through `zeKernelGetProperties` to query for the preferred multiple group size of a kernel for submission. Submitting a kernel with the preferred group size returned by the driver may improve performance in certain platforms. ### [Module compilation options](https://spec.oneapi.io/level-zero/latest/core/PROG.html#module-build-options) Optimization levels can now be passed to `zeModuleCreate` using the `-ze-opt-level` option, which are then communicated to the underlying graphics compiler as hint to indicate the level of optimization desired. ### [Extension to read the timestamps of each subdevice](https://spec.oneapi.io/level-zero/latest/core/api.html?highlight=zeeventquerytimestampsexp#zeeventquerytimestampsexp) This extension defines the `zeEventQueryTimestampsExp` interface to query for timestamps of the parent device or all of the available subdevices. ### [Extension to set thread arbitration policy](https://spec.oneapi.io/level-zero/latest/core/api.html?highlight=ze_structure_type_device_properties_1_2#kernelschedulinghints) The `zeKernelSchedulingHintExp` interface allows applications to set the thread arbitration policy desired for the target kernel. Avaialable policies can be queried by application through `zeDeviceGetModuleProperties` with the [`ze_scheduling_hint_exp_properties_t`](https://spec.oneapi.io/level-zero/latest/core/api.html?highlight=ze_scheduling_hint_exp_properties_t#_CPPv435ze_scheduling_hint_exp_properties_t) structure. Policies include: * `ZE_SCHEDULING_HINT_EXP_FLAG_OLDEST_FIRST` * `ZE_SCHEDULING_HINT_EXP_FLAG_ROUND_ROBIN` * `ZE_SCHEDULING_HINT_EXP_FLAG_STALL_BASED_ROUND_ROBIN` ### [Extension for cache reservation](https://spec.oneapi.io/level-zero/latest/core/EXT_CacheReservation.html#cache-reservation-extension) With `zeDeviceReserveCacheExt`, applications can reserve sections of the GPU cache for exclusive use. Cache level support varies between platforms. Likewise, `zeDeviceSetCacheAdviceExt`, can be used to set a region of the cached as reserved or non-reserved region. If default behavior selected, then non-reserved is used, where region is accessible to all clients or applications. # Release Notes v1.1 Level Zero Core API. April 2021 ## Changes in this release: ### Device allocations larger than 4GB size. https://spec.oneapi.com/level-zero/latest/core/api.html?highlight=relaxed#relaxedalloclimits-enums L0 driver now allows the allocation of buffers larger than 4GB. To use, the `ze_relaxed_allocation_limits_exp_desc_t` structure needs to be passed to `zeMemAllocHost` or `zeMemAllocShared` as a linked descriptor. Sample code: ```cpp ze_relaxed_allocation_limits_exp_desc_t relaxedDesc = {}; relaxedDesc.stype = ZE_STRUCTURE_TYPE_RELAXED_ALLOCATION_LIMITS_EXP_DESC; relaxedDesc.flags = ZE_RELAXED_ALLOCATION_LIMITS_EXP_FLAG_MAX_SIZE; ze_device_mem_alloc_desc_t deviceDesc = {}; deviceDesc.pNext = &relaxedDesc; zeMemAllocDevice(context, &deviceDesc, size, 0, device, &ptr); ``` In addition to this, kernels need to be compiled with `ze-opt-greater-than-4GB-buffer-required`. This needs to be passed in `pBuildFlags` field in `ze_module_desc_t` descriptor while calling `zeModuleCreate`. ### zeDeviceGetGlobalTimestamps for CPU/GPU synchronized time. https://spec.oneapi.com/level-zero/latest/core/api.html?highlight=zedevicegetglobaltimestamps#_CPPv427zeDeviceGetGlobalTimestamps18ze_device_handle_tP8uint64_tP8uint64_t Returns synchronized Host and device global timestamps. Sample code: ```cpp ze_relaxed_allocation_limits_exp_desc_t relaxedDesc = {}; relaxedDesc.stype = ZE_STRUCTURE_TYPE_RELAXED_ALLOCATION_LIMITS_EXP_DESC; relaxedDesc.flags = ZE_RELAXED_ALLOCATION_LIMITS_EXP_FLAG_MAX_SIZE; ze_device_mem_alloc_desc_t deviceDesc = {}; deviceDesc.pNext = &relaxedDesc; zeMemAllocDevice(context, &deviceDesc, size, 0, device, &ptr); ``` ### Global work offset https://spec.oneapi.com/level-zero/latest/core/api.html?highlight=globaloffset#_CPPv426zeKernelSetGlobalOffsetExp18ze_kernel_handle_t8uint32_t8uint32_t8uint32_t Applications now can set a global work offset to kernels. Sample code: ```cpp ... uint32_t groupSizeX = sizeX; uint32_t groupSizeY = 1u; uint32_t groupSizeZ = 1u; zeKernelSetGroupSize(kernel, groupSizeX, groupSizeY, groupSizeZ); uint32_t offsetx = offset; uint32_t offsety = 0; uint32_t offsetz = 0; zeKernelSetGlobalOffsetExp(kernel, offsetx, offsety, offsetz); ... ``` ### Atomic floating point properties https://spec.oneapi.com/level-zero/latest/core/api.html?highlight=ze_structure_type_float_atomic_ext_properties#_CPPv432ze_float_atomic_ext_properties_t Applications now can query for floating atomic properties supported by the device in a kernel. This is done by passing `ze_float_atomic_ext_properties_t` to zeDeviceGetModuleProperties as a linked property structure. Sample code: ```cpp ze_device_module_properties_t kernelProperties = {}; ze_float_atomic_ext_properties_t extendedProperties = {}; extendedProperties.stype = ZE_STRUCTURE_TYPE_FLOAT_ATOMIC_EXT_PROPERTIES; kernelProperties.pNext = &extendedProperties; zeDeviceGetModuleProperties(hDevice, &kernelProperties); if (extendedProperties.fp16Flags & ZE_DEVICE_FP_ATOMIC_EXT_FLAG_GLOBAL_ADD) { // kernel supports floating atomic add and subtract } ``` ### Context Creation for specific devices https://spec.oneapi.com/level-zero/latest/core/api.html?highlight=zecontextcreate#_CPPv417zeContextCreateEx18ze_driver_handle_tPK17ze_context_desc_t8uint32_tP18ze_device_handle_tP19ze_context_handle_t Added `zeContextCreateEX` to create a context with a set of devices. Resources allocated against that context are visible only to the devices for which the context was created. Sample code: ```cpp std::vector devices; devices.push_back(device0); devices.push_back(device1); ... zeContextCreateEx(hDriver, &desc, devices.size(), devices.data(), &phContext); ``` ### Change on timer resolution https://spec.oneapi.com/level-zero/latest/core/api.html?highlight=timerresolution#_CPPv4N22ze_device_properties_t15timerResolutionE Time resolution returned by device properties has been changed to cycles/second (v1.0 has a resolution of nano-seconds). To help libraries with the transtition to the new resolution, the `UseCyclesPerSecondTimer` variable has been defined. When set to 1, the driver will return the resolution defined for v1.1 (cycles/second), otherwise, it will still return the resolution for v1.0 (nanoseconds). The use of this environment variable is only temporal while applications and libraries complete their transition to v1.1 and will be eventually eliminated, leaving the resolution for v1.1 as default. When reading querying for the timere resolution, applications then need to keep in mind: * If `ZE_API_VERSION_1_0` returned by `zeDriverGetApiVersion`: Timer resolution is nanoseconds. * If `ZE_API_VERSION_1_1` returned by `zeDriverGetApiVersion`: Timer resolution is nanoseconds, as in v1.0. * If `ZE_API_VERSION_1_1` returned by `zeDriverGetApiVersion` and `UseCyclesPerSecondTimer=1`: Timer resolution is cycles per seconds, as in v1.1. Note: In Release builds, `NEOReadDebugKeys=1` may be needed to read environment variables. To confirm the L0 driver is reading the environment variables, please use `PrintDebugSettings=1`, which will print them at the beginning of the application. See below: ```sh $ PrintDebugSettings=1 UseCyclesPerSecondTimer=1 ./zello_world_gpu Non-default value of debug variable: PrintDebugSettings = 1 Non-default value of debug variable: UseCyclesPerSecondTimer = 1 ... ``` Sample code: if `UseCyclesPerSecondTimer=1` set ```cpp ze_api_version_t version; zeDriverGetApiVersion(hDriver, &version); ... ze_device_properties_t devProperties = {}; zeDeviceGetProperties(device, &devProperties); if (version == ZE_API_VERSION_1_1) { uint64_t timerResolutionInCyclesPerSecond = devProperties.timerResolution; } else { uint64_t timerResolutionInNanoSeconds = devProperties.timerResolution; } ... ``` if `UseCyclesPerSecondTimer` not set ```cpp ze_api_version_t version; zeDriverGetApiVersion(hDriver, &version); ... ze_device_properties_t devProperties = {}; zeDeviceGetProperties(device, &devProperties); uint64_t timerResolutionInNanoSeconds = devProperties.timerResolution; ... ``` compute-runtime-22.14.22890/level_zero/experimental/000077500000000000000000000000001422164147700222155ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/experimental/source/000077500000000000000000000000001422164147700235155ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/experimental/source/CMakeLists.txt000066400000000000000000000011501422164147700262520ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # add_subdirectory(tracing) set(L0_EXPERIMENTAL_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/experimental.cpp ) target_include_directories(${L0_STATIC_LIB_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/ ) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_EXPERIMENTAL_SOURCES} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_EXPERIMENTAL_SOURCES ${L0_EXPERIMENTAL_SOURCES}) compute-runtime-22.14.22890/level_zero/experimental/source/experimental.cpp000066400000000000000000000001741422164147700267200ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ namespace L0 { } // namespace L0 compute-runtime-22.14.22890/level_zero/experimental/source/tracing/000077500000000000000000000000001422164147700251445ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/experimental/source/tracing/CMakeLists.txt000066400000000000000000000040311422164147700277020ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_EXPERIMENTAL_TRACING ${CMAKE_CURRENT_SOURCE_DIR}/tracing.h ${CMAKE_CURRENT_SOURCE_DIR}/tracing_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/tracing_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tracing_barrier_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/tracing_barrier_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tracing_cmdlist_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tracing_cmdlist_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/tracing_cmdqueue_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tracing_cmdqueue_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/tracing_copy_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tracing_copy_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/tracing_device_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tracing_device_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/tracing_driver_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tracing_driver_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/tracing_event_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tracing_event_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/tracing_fence_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tracing_fence_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/tracing_global_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tracing_global_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/tracing_sampler_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tracing_sampler_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/tracing_residency_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tracing_residency_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/tracing_image_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tracing_image_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/tracing_memory_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tracing_memory_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/tracing_module_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tracing_module_imp.h ) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_EXPERIMENTAL_TRACING} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_EXPERIMENTAL_TRACING ${L0_SRCS_EXPERIMENTAL_TRACING}) compute-runtime-22.14.22890/level_zero/experimental/source/tracing/tracing.h000066400000000000000000000020741422164147700267470ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include struct _zet_tracer_exp_handle_t {}; namespace L0 { struct APITracer : _zet_tracer_exp_handle_t { static APITracer *create(); virtual ~APITracer() = default; static APITracer *fromHandle(zet_tracer_exp_handle_t handle) { return static_cast(handle); } inline zet_tracer_exp_handle_t toHandle() { return this; } virtual ze_result_t destroyTracer(zet_tracer_exp_handle_t phTracer) = 0; virtual ze_result_t setPrologues(zet_core_callbacks_t *pCoreCbs) = 0; virtual ze_result_t setEpilogues(zet_core_callbacks_t *pCoreCbs) = 0; virtual ze_result_t enableTracer(ze_bool_t enable) = 0; }; ze_result_t createAPITracer(zet_context_handle_t hContext, const zet_tracer_exp_desc_t *desc, zet_tracer_exp_handle_t *phTracer); struct APITracerContext { virtual ~APITracerContext() = default; virtual void *getActiveTracersList() = 0; virtual void releaseActivetracersList() = 0; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/experimental/source/tracing/tracing_barrier_imp.cpp000066400000000000000000000101721422164147700316530ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/experimental/source/tracing/tracing_imp.h" ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendBarrier_Tracing(ze_command_list_handle_t hCommandList, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnAppendBarrier, hCommandList, hSignalEvent, numWaitEvents, phWaitEvents); ze_command_list_append_barrier_params_t tracerParams; tracerParams.phCommandList = &hCommandList; tracerParams.phSignalEvent = &hSignalEvent; tracerParams.pnumWaitEvents = &numWaitEvents; tracerParams.pphWaitEvents = &phWaitEvents; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnCommandListAppendBarrierCb_t, CommandList, pfnAppendBarrierCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnAppendBarrier, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phCommandList, *tracerParams.phSignalEvent, *tracerParams.pnumWaitEvents, *tracerParams.pphWaitEvents); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendMemoryRangesBarrier_Tracing(ze_command_list_handle_t hCommandList, uint32_t numRanges, const size_t *pRangeSizes, const void **pRanges, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemoryRangesBarrier, hCommandList, numRanges, pRangeSizes, pRanges, hSignalEvent, numWaitEvents, phWaitEvents); ze_command_list_append_memory_ranges_barrier_params_t tracerParams; tracerParams.phCommandList = &hCommandList; tracerParams.pnumRanges = &numRanges; tracerParams.ppRangeSizes = &pRangeSizes; tracerParams.ppRanges = &pRanges; tracerParams.phSignalEvent = &hSignalEvent; tracerParams.pnumWaitEvents = &numWaitEvents; tracerParams.pphWaitEvents = &phWaitEvents; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnCommandListAppendMemoryRangesBarrierCb_t, CommandList, pfnAppendMemoryRangesBarrierCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemoryRangesBarrier, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phCommandList, *tracerParams.pnumRanges, *tracerParams.ppRangeSizes, *tracerParams.ppRanges, *tracerParams.phSignalEvent, *tracerParams.pnumWaitEvents, *tracerParams.pphWaitEvents); } compute-runtime-22.14.22890/level_zero/experimental/source/tracing/tracing_barrier_imp.h000066400000000000000000000017311422164147700313210ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once extern "C" { ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendBarrier_Tracing(ze_command_list_handle_t hCommandList, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents); ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendMemoryRangesBarrier_Tracing(ze_command_list_handle_t hCommandList, uint32_t numRanges, const size_t *pRangeSizes, const void **pRanges, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents); } compute-runtime-22.14.22890/level_zero/experimental/source/tracing/tracing_cmdlist_imp.cpp000066400000000000000000000246001422164147700316650ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/experimental/source/tracing/tracing_imp.h" ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListCreate_Tracing(ze_context_handle_t hContext, ze_device_handle_t hDevice, const ze_command_list_desc_t *desc, ze_command_list_handle_t *phCommandList) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnCreate, hContext, hDevice, desc, phCommandList); ze_command_list_create_params_t tracerParams; tracerParams.phContext = &hContext; tracerParams.phDevice = &hDevice; tracerParams.pdesc = &desc; tracerParams.pphCommandList = &phCommandList; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnCommandListCreateCb_t, CommandList, pfnCreateCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnCreate, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phContext, *tracerParams.phDevice, *tracerParams.pdesc, *tracerParams.pphCommandList); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListCreateImmediate_Tracing(ze_context_handle_t hContext, ze_device_handle_t hDevice, const ze_command_queue_desc_t *altdesc, ze_command_list_handle_t *phCommandList) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnCreateImmediate, hContext, hDevice, altdesc, phCommandList); ze_command_list_create_immediate_params_t tracerParams; tracerParams.phContext = &hContext; tracerParams.phDevice = &hDevice; tracerParams.paltdesc = &altdesc; tracerParams.pphCommandList = &phCommandList; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnCommandListCreateImmediateCb_t, CommandList, pfnCreateImmediateCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnCreateImmediate, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phContext, *tracerParams.phDevice, *tracerParams.paltdesc, *tracerParams.pphCommandList); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListDestroy_Tracing(ze_command_list_handle_t hCommandList) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnDestroy, hCommandList); ze_command_list_destroy_params_t tracerParams; tracerParams.phCommandList = &hCommandList; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnCommandListDestroyCb_t, CommandList, pfnDestroyCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnDestroy, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phCommandList); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListClose_Tracing(ze_command_list_handle_t hCommandList) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnClose, hCommandList); ze_command_list_close_params_t tracerParams; tracerParams.phCommandList = &hCommandList; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnCommandListCloseCb_t, CommandList, pfnCloseCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnClose, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phCommandList); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListReset_Tracing(ze_command_list_handle_t hCommandList) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnReset, hCommandList); ze_command_list_reset_params_t tracerParams; tracerParams.phCommandList = &hCommandList; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnCommandListResetCb_t, CommandList, pfnResetCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnReset, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phCommandList); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendWriteGlobalTimestamp_Tracing(ze_command_list_handle_t hCommandList, uint64_t *dstptr, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnAppendWriteGlobalTimestamp, hCommandList, dstptr, hSignalEvent, numWaitEvents, phWaitEvents); ze_command_list_append_write_global_timestamp_params_t tracerParams; tracerParams.phCommandList = &hCommandList; tracerParams.pdstptr = &dstptr; tracerParams.phSignalEvent = &hSignalEvent; tracerParams.pnumWaitEvents = &numWaitEvents; tracerParams.pphWaitEvents = &phWaitEvents; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnCommandListAppendWriteGlobalTimestampCb_t, CommandList, pfnAppendWriteGlobalTimestampCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnAppendWriteGlobalTimestamp, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phCommandList, *tracerParams.pdstptr, *tracerParams.phSignalEvent, *tracerParams.pnumWaitEvents, *tracerParams.pphWaitEvents); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendQueryKernelTimestamps_Tracing(ze_command_list_handle_t hCommandList, uint32_t numEvents, ze_event_handle_t *phEvents, void *dstptr, const size_t *pOffsets, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnAppendQueryKernelTimestamps, hCommandList, numEvents, phEvents, dstptr, pOffsets, hSignalEvent, numWaitEvents, phWaitEvents); ze_command_list_append_query_kernel_timestamps_params_t tracerParams; tracerParams.phCommandList = &hCommandList; tracerParams.pnumEvents = &numEvents; tracerParams.pphEvents = &phEvents; tracerParams.pdstptr = &dstptr; tracerParams.ppOffsets = &pOffsets; tracerParams.phSignalEvent = &hSignalEvent; tracerParams.pnumWaitEvents = &numWaitEvents; tracerParams.pphWaitEvents = &phWaitEvents; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnCommandListAppendQueryKernelTimestampsCb_t, CommandList, pfnAppendQueryKernelTimestampsCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnAppendQueryKernelTimestamps, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phCommandList, *tracerParams.pnumEvents, *tracerParams.pphEvents, *tracerParams.pdstptr, *tracerParams.ppOffsets, *tracerParams.phSignalEvent, *tracerParams.pnumWaitEvents, *tracerParams.pphWaitEvents); } compute-runtime-22.14.22890/level_zero/experimental/source/tracing/tracing_cmdlist_imp.h000066400000000000000000000040731422164147700313340ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once extern "C" { ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListCreate_Tracing(ze_context_handle_t hContext, ze_device_handle_t hDevice, const ze_command_list_desc_t *desc, ze_command_list_handle_t *phCommandList); ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListCreateImmediate_Tracing(ze_context_handle_t hContext, ze_device_handle_t hDevice, const ze_command_queue_desc_t *altdesc, ze_command_list_handle_t *phCommandList); ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListDestroy_Tracing(ze_command_list_handle_t hCommandList); ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListClose_Tracing(ze_command_list_handle_t hCommandList); ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListReset_Tracing(ze_command_list_handle_t hCommandList); ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendWriteGlobalTimestamp_Tracing(ze_command_list_handle_t hCommandList, uint64_t *dstptr, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents); ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendQueryKernelTimestamps_Tracing(ze_command_list_handle_t hCommandList, uint32_t numEvents, ze_event_handle_t *phEvents, void *dstptr, const size_t *pOffsets, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents); } compute-runtime-22.14.22890/level_zero/experimental/source/tracing/tracing_cmdqueue_imp.cpp000066400000000000000000000126221422164147700320370ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/experimental/source/tracing/tracing_imp.h" ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandQueueCreate_Tracing(ze_context_handle_t hContext, ze_device_handle_t hDevice, const ze_command_queue_desc_t *desc, ze_command_queue_handle_t *phCommandQueue) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandQueue.pfnCreate, hContext, hDevice, desc, phCommandQueue); ze_command_queue_create_params_t tracerParams; tracerParams.phContext = &hContext; tracerParams.phDevice = &hDevice; tracerParams.pdesc = &desc; tracerParams.pphCommandQueue = &phCommandQueue; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnCommandQueueCreateCb_t, CommandQueue, pfnCreateCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandQueue.pfnCreate, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phContext, *tracerParams.phDevice, *tracerParams.pdesc, *tracerParams.pphCommandQueue); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandQueueDestroy_Tracing(ze_command_queue_handle_t hCommandQueue) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandQueue.pfnDestroy, hCommandQueue); ze_command_queue_destroy_params_t tracerParams; tracerParams.phCommandQueue = &hCommandQueue; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnCommandQueueDestroyCb_t, CommandQueue, pfnDestroyCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandQueue.pfnDestroy, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phCommandQueue); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandQueueExecuteCommandLists_Tracing(ze_command_queue_handle_t hCommandQueue, uint32_t numCommandLists, ze_command_list_handle_t *phCommandLists, ze_fence_handle_t hFence) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandQueue.pfnExecuteCommandLists, hCommandQueue, numCommandLists, phCommandLists, hFence); ze_command_queue_execute_command_lists_params_t tracerParams; tracerParams.phCommandQueue = &hCommandQueue; tracerParams.pnumCommandLists = &numCommandLists; tracerParams.pphCommandLists = &phCommandLists; tracerParams.phFence = &hFence; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnCommandQueueExecuteCommandListsCb_t, CommandQueue, pfnExecuteCommandListsCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandQueue.pfnExecuteCommandLists, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phCommandQueue, *tracerParams.pnumCommandLists, *tracerParams.pphCommandLists, *tracerParams.phFence); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandQueueSynchronize_Tracing(ze_command_queue_handle_t hCommandQueue, uint64_t timeout) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandQueue.pfnSynchronize, hCommandQueue, timeout); ze_command_queue_synchronize_params_t tracerParams; tracerParams.phCommandQueue = &hCommandQueue; tracerParams.ptimeout = &timeout; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnCommandQueueSynchronizeCb_t, CommandQueue, pfnSynchronizeCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandQueue.pfnSynchronize, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phCommandQueue, *tracerParams.ptimeout); } compute-runtime-22.14.22890/level_zero/experimental/source/tracing/tracing_cmdqueue_imp.h000066400000000000000000000017761422164147700315140ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once extern "C" { ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandQueueCreate_Tracing(ze_context_handle_t hContext, ze_device_handle_t hDevice, const ze_command_queue_desc_t *desc, ze_command_queue_handle_t *phCommandQueue); ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandQueueDestroy_Tracing(ze_command_queue_handle_t hCommandQueue); ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandQueueExecuteCommandLists_Tracing(ze_command_queue_handle_t hCommandQueue, uint32_t numCommandLists, ze_command_list_handle_t *phCommandLists, ze_fence_handle_t hFence); ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandQueueSynchronize_Tracing(ze_command_queue_handle_t hCommandQueue, uint64_t timeout); } compute-runtime-22.14.22890/level_zero/experimental/source/tracing/tracing_copy_imp.cpp000066400000000000000000000560421422164147700312050ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/experimental/source/tracing/tracing_imp.h" ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendMemoryCopy_Tracing(ze_command_list_handle_t hCommandList, void *dstptr, const void *srcptr, size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemoryCopy, hCommandList, dstptr, srcptr, size, hSignalEvent, numWaitEvents, phWaitEvents); ze_command_list_append_memory_copy_params_t tracerParams; tracerParams.phCommandList = &hCommandList; tracerParams.pdstptr = &dstptr; tracerParams.psrcptr = &srcptr; tracerParams.psize = &size; tracerParams.phSignalEvent = &hSignalEvent; tracerParams.pnumWaitEvents = &numWaitEvents; tracerParams.pphWaitEvents = &phWaitEvents; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnCommandListAppendMemoryCopyCb_t, CommandList, pfnAppendMemoryCopyCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemoryCopy, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phCommandList, *tracerParams.pdstptr, *tracerParams.psrcptr, *tracerParams.psize, *tracerParams.phSignalEvent, *tracerParams.pnumWaitEvents, *tracerParams.pphWaitEvents); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendMemoryFill_Tracing(ze_command_list_handle_t hCommandList, void *ptr, const void *pattern, size_t patternSize, size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemoryFill, hCommandList, ptr, pattern, patternSize, size, hSignalEvent, numWaitEvents, phWaitEvents); ze_command_list_append_memory_fill_params_t tracerParams; tracerParams.phCommandList = &hCommandList; tracerParams.pptr = &ptr; tracerParams.ppattern = &pattern; tracerParams.ppattern_size = &patternSize; tracerParams.psize = &size; tracerParams.phSignalEvent = &hSignalEvent; tracerParams.pnumWaitEvents = &numWaitEvents; tracerParams.pphWaitEvents = &phWaitEvents; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnCommandListAppendMemoryFillCb_t, CommandList, pfnAppendMemoryFillCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemoryFill, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phCommandList, *tracerParams.pptr, *tracerParams.ppattern, *tracerParams.ppattern_size, *tracerParams.psize, *tracerParams.phSignalEvent, *tracerParams.pnumWaitEvents, *tracerParams.pphWaitEvents); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendMemoryCopyRegion_Tracing(ze_command_list_handle_t hCommandList, void *dstptr, const ze_copy_region_t *dstRegion, uint32_t dstPitch, uint32_t dstSlicePitch, const void *srcptr, const ze_copy_region_t *srcRegion, uint32_t srcPitch, uint32_t srcSlicePitch, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemoryCopyRegion, hCommandList, dstptr, dstRegion, dstPitch, dstSlicePitch, srcptr, srcRegion, srcPitch, srcSlicePitch, hSignalEvent, numWaitEvents, phWaitEvents); ze_command_list_append_memory_copy_region_params_t tracerParams; tracerParams.phCommandList = &hCommandList; tracerParams.pdstptr = &dstptr; tracerParams.pdstRegion = &dstRegion; tracerParams.pdstPitch = &dstPitch; tracerParams.pdstSlicePitch = &dstSlicePitch; tracerParams.psrcptr = &srcptr; tracerParams.psrcRegion = &srcRegion; tracerParams.psrcPitch = &srcPitch; tracerParams.psrcSlicePitch = &srcSlicePitch; tracerParams.phSignalEvent = &hSignalEvent; tracerParams.pnumWaitEvents = &numWaitEvents; tracerParams.pphWaitEvents = &phWaitEvents; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnCommandListAppendMemoryCopyRegionCb_t, CommandList, pfnAppendMemoryCopyRegionCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemoryCopyRegion, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phCommandList, *tracerParams.pdstptr, *tracerParams.pdstRegion, *tracerParams.pdstPitch, *tracerParams.pdstSlicePitch, *tracerParams.psrcptr, *tracerParams.psrcRegion, *tracerParams.psrcPitch, *tracerParams.psrcSlicePitch, *tracerParams.phSignalEvent, *tracerParams.pnumWaitEvents, *tracerParams.pphWaitEvents); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendMemoryCopyFromContext_Tracing(ze_command_list_handle_t hCommandList, void *dstptr, ze_context_handle_t hContextSrc, const void *srcptr, size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemoryCopyFromContext, hCommandList, dstptr, hContextSrc, srcptr, size, hSignalEvent, numWaitEvents, phWaitEvents); ze_command_list_append_memory_copy_from_context_params_t tracerParams; tracerParams.phCommandList = &hCommandList; tracerParams.pdstptr = &dstptr; tracerParams.phContextSrc = &hContextSrc, tracerParams.psrcptr = &srcptr, tracerParams.psize = &size, tracerParams.phSignalEvent = &hSignalEvent; tracerParams.pnumWaitEvents = &numWaitEvents; tracerParams.pphWaitEvents = &phWaitEvents; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnCommandListAppendMemoryCopyFromContextCb_t, CommandList, pfnAppendMemoryCopyFromContextCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemoryCopyFromContext, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phCommandList, *tracerParams.pdstptr, *tracerParams.phContextSrc, *tracerParams.psrcptr, *tracerParams.psize, *tracerParams.phSignalEvent, *tracerParams.pnumWaitEvents, *tracerParams.pphWaitEvents); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendImageCopy_Tracing(ze_command_list_handle_t hCommandList, ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnAppendImageCopy, hCommandList, hDstImage, hSrcImage, hSignalEvent, numWaitEvents, phWaitEvents); ze_command_list_append_image_copy_params_t tracerParams; tracerParams.phCommandList = &hCommandList; tracerParams.phDstImage = &hDstImage; tracerParams.phSrcImage = &hSrcImage; tracerParams.phSignalEvent = &hSignalEvent; tracerParams.pnumWaitEvents = &numWaitEvents; tracerParams.pphWaitEvents = &phWaitEvents; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnCommandListAppendImageCopyCb_t, CommandList, pfnAppendImageCopyCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnAppendImageCopy, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phCommandList, *tracerParams.phDstImage, *tracerParams.phSrcImage, *tracerParams.phSignalEvent, *tracerParams.pnumWaitEvents, *tracerParams.pphWaitEvents); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendImageCopyRegion_Tracing(ze_command_list_handle_t hCommandList, ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage, const ze_image_region_t *pDstRegion, const ze_image_region_t *pSrcRegion, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnAppendImageCopyRegion, hCommandList, hDstImage, hSrcImage, pDstRegion, pSrcRegion, hSignalEvent, numWaitEvents, phWaitEvents); ze_command_list_append_image_copy_region_params_t tracerParams; tracerParams.phCommandList = &hCommandList; tracerParams.phDstImage = &hDstImage; tracerParams.phSrcImage = &hSrcImage; tracerParams.ppDstRegion = &pDstRegion; tracerParams.ppSrcRegion = &pSrcRegion; tracerParams.phSignalEvent = &hSignalEvent; tracerParams.pnumWaitEvents = &numWaitEvents; tracerParams.pphWaitEvents = &phWaitEvents; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnCommandListAppendImageCopyRegionCb_t, CommandList, pfnAppendImageCopyRegionCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnAppendImageCopyRegion, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phCommandList, *tracerParams.phDstImage, *tracerParams.phSrcImage, *tracerParams.ppDstRegion, *tracerParams.ppSrcRegion, *tracerParams.phSignalEvent, *tracerParams.pnumWaitEvents, *tracerParams.pphWaitEvents); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendImageCopyToMemory_Tracing(ze_command_list_handle_t hCommandList, void *dstptr, ze_image_handle_t hSrcImage, const ze_image_region_t *pSrcRegion, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnAppendImageCopyToMemory, hCommandList, dstptr, hSrcImage, pSrcRegion, hSignalEvent, numWaitEvents, phWaitEvents); ze_command_list_append_image_copy_to_memory_params_t tracerParams; tracerParams.phCommandList = &hCommandList; tracerParams.pdstptr = &dstptr; tracerParams.phSrcImage = &hSrcImage; tracerParams.ppSrcRegion = &pSrcRegion; tracerParams.phSignalEvent = &hSignalEvent; tracerParams.pnumWaitEvents = &numWaitEvents; tracerParams.pphWaitEvents = &phWaitEvents; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnCommandListAppendImageCopyToMemoryCb_t, CommandList, pfnAppendImageCopyToMemoryCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnAppendImageCopyToMemory, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phCommandList, *tracerParams.pdstptr, *tracerParams.phSrcImage, *tracerParams.ppSrcRegion, *tracerParams.phSignalEvent, *tracerParams.pnumWaitEvents, *tracerParams.pphWaitEvents); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendImageCopyFromMemory_Tracing(ze_command_list_handle_t hCommandList, ze_image_handle_t hDstImage, const void *srcptr, const ze_image_region_t *pDstRegion, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnAppendImageCopyFromMemory, hCommandList, hDstImage, srcptr, pDstRegion, hSignalEvent, numWaitEvents, phWaitEvents); ze_command_list_append_image_copy_from_memory_params_t tracerParams; tracerParams.phCommandList = &hCommandList; tracerParams.phDstImage = &hDstImage; tracerParams.psrcptr = &srcptr; tracerParams.ppDstRegion = &pDstRegion; tracerParams.phSignalEvent = &hSignalEvent; tracerParams.pnumWaitEvents = &numWaitEvents; tracerParams.pphWaitEvents = &phWaitEvents; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnCommandListAppendImageCopyFromMemoryCb_t, CommandList, pfnAppendImageCopyFromMemoryCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnAppendImageCopyFromMemory, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phCommandList, *tracerParams.phDstImage, *tracerParams.psrcptr, *tracerParams.ppDstRegion, *tracerParams.phSignalEvent, *tracerParams.pnumWaitEvents, *tracerParams.pphWaitEvents); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendMemoryPrefetch_Tracing(ze_command_list_handle_t hCommandList, const void *ptr, size_t size) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemoryPrefetch, hCommandList, ptr, size); ze_command_list_append_memory_prefetch_params_t tracerParams; tracerParams.phCommandList = &hCommandList; tracerParams.pptr = &ptr; tracerParams.psize = &size; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnCommandListAppendMemoryPrefetchCb_t, CommandList, pfnAppendMemoryPrefetchCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemoryPrefetch, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phCommandList, *tracerParams.pptr, *tracerParams.psize); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendMemAdvise_Tracing(ze_command_list_handle_t hCommandList, ze_device_handle_t hDevice, const void *ptr, size_t size, ze_memory_advice_t advice) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemAdvise, hCommandList, hDevice, ptr, size, advice); ze_command_list_append_mem_advise_params_t tracerParams; tracerParams.phCommandList = &hCommandList; tracerParams.phDevice = &hDevice; tracerParams.pptr = &ptr; tracerParams.psize = &size; tracerParams.padvice = &advice; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnCommandListAppendMemAdviseCb_t, CommandList, pfnAppendMemAdviseCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemAdvise, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phCommandList, *tracerParams.phDevice, *tracerParams.pptr, *tracerParams.psize, *tracerParams.padvice); } compute-runtime-22.14.22890/level_zero/experimental/source/tracing/tracing_copy_imp.h000066400000000000000000000125511422164147700306470ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once extern "C" { ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendMemoryCopy_Tracing(ze_command_list_handle_t hCommandList, void *dstptr, const void *srcptr, size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents); ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendMemoryFill_Tracing(ze_command_list_handle_t hCommandList, void *ptr, const void *pattern, size_t patternSize, size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents); ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendMemoryCopyRegion_Tracing(ze_command_list_handle_t hCommandList, void *dstptr, const ze_copy_region_t *dstRegion, uint32_t dstPitch, uint32_t dstSlicePitch, const void *srcptr, const ze_copy_region_t *srcRegion, uint32_t srcPitch, uint32_t srcSlicePitch, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents); ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendMemoryCopyFromContext_Tracing(ze_command_list_handle_t hCommandList, void *dstptr, ze_context_handle_t hContextSrc, const void *srcptr, size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents); ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendImageCopy_Tracing(ze_command_list_handle_t hCommandList, ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents); ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendImageCopyRegion_Tracing(ze_command_list_handle_t hCommandList, ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage, const ze_image_region_t *pDstRegion, const ze_image_region_t *pSrcRegion, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents); ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendImageCopyToMemory_Tracing(ze_command_list_handle_t hCommandList, void *dstptr, ze_image_handle_t hSrcImage, const ze_image_region_t *pSrcRegion, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents); ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendImageCopyFromMemory_Tracing(ze_command_list_handle_t hCommandList, ze_image_handle_t hDstImage, const void *srcptr, const ze_image_region_t *pDstRegion, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents); ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendMemoryPrefetch_Tracing(ze_command_list_handle_t hCommandList, const void *ptr, size_t size); ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendMemAdvise_Tracing(ze_command_list_handle_t hCommandList, ze_device_handle_t hDevice, const void *ptr, size_t size, ze_memory_advice_t advice); } // extern "C" compute-runtime-22.14.22890/level_zero/experimental/source/tracing/tracing_device_imp.cpp000066400000000000000000000455021422164147700314710ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/experimental/source/tracing/tracing_imp.h" ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGet_Tracing(ze_driver_handle_t hDriver, uint32_t *pCount, ze_device_handle_t *phDevices) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Device.pfnGet, hDriver, pCount, phDevices); ze_device_get_params_t tracerParams; tracerParams.phDriver = &hDriver; tracerParams.ppCount = &pCount; tracerParams.pphDevices = &phDevices; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnDeviceGetCb_t, Device, pfnGetCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Device.pfnGet, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phDriver, *tracerParams.ppCount, *tracerParams.pphDevices); } ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetProperties_Tracing(ze_device_handle_t hDevice, ze_device_properties_t *pDeviceProperties) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Device.pfnGetProperties, hDevice, pDeviceProperties); ze_device_get_properties_params_t tracerParams; tracerParams.phDevice = &hDevice; tracerParams.ppDeviceProperties = &pDeviceProperties; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnDeviceGetPropertiesCb_t, Device, pfnGetPropertiesCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Device.pfnGetProperties, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phDevice, *tracerParams.ppDeviceProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetComputeProperties_Tracing(ze_device_handle_t hDevice, ze_device_compute_properties_t *pComputeProperties) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Device.pfnGetComputeProperties, hDevice, pComputeProperties); ze_device_get_compute_properties_params_t tracerParams; tracerParams.phDevice = &hDevice; tracerParams.ppComputeProperties = &pComputeProperties; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnDeviceGetComputePropertiesCb_t, Device, pfnGetComputePropertiesCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Device.pfnGetComputeProperties, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phDevice, *tracerParams.ppComputeProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetMemoryProperties_Tracing(ze_device_handle_t hDevice, uint32_t *pCount, ze_device_memory_properties_t *pMemProperties) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Device.pfnGetMemoryProperties, hDevice, pCount, pMemProperties); ze_device_get_memory_properties_params_t tracerParams; tracerParams.phDevice = &hDevice; tracerParams.ppCount = &pCount; tracerParams.ppMemProperties = &pMemProperties; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnDeviceGetMemoryPropertiesCb_t, Device, pfnGetMemoryPropertiesCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Device.pfnGetMemoryProperties, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phDevice, *tracerParams.ppCount, *tracerParams.ppMemProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetCacheProperties_Tracing(ze_device_handle_t hDevice, uint32_t *pCount, ze_device_cache_properties_t *pCacheProperties) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Device.pfnGetCacheProperties, hDevice, pCount, pCacheProperties); ze_device_get_cache_properties_params_t tracerParams; tracerParams.phDevice = &hDevice; tracerParams.ppCount = &pCount; tracerParams.ppCacheProperties = &pCacheProperties; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnDeviceGetCachePropertiesCb_t, Device, pfnGetCachePropertiesCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Device.pfnGetCacheProperties, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phDevice, *tracerParams.ppCount, *tracerParams.ppCacheProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetImageProperties_Tracing(ze_device_handle_t hDevice, ze_device_image_properties_t *pImageProperties) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Device.pfnGetImageProperties, hDevice, pImageProperties); ze_device_get_image_properties_params_t tracerParams; tracerParams.phDevice = &hDevice; tracerParams.ppImageProperties = &pImageProperties; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnDeviceGetImagePropertiesCb_t, Device, pfnGetImagePropertiesCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Device.pfnGetImageProperties, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phDevice, *tracerParams.ppImageProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetSubDevices_Tracing(ze_device_handle_t hDevice, uint32_t *pCount, ze_device_handle_t *phSubdevices) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Device.pfnGetSubDevices, hDevice, pCount, phSubdevices); ze_device_get_sub_devices_params_t tracerParams; tracerParams.phDevice = &hDevice; tracerParams.ppCount = &pCount; tracerParams.pphSubdevices = &phSubdevices; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnDeviceGetSubDevicesCb_t, Device, pfnGetSubDevicesCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Device.pfnGetSubDevices, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phDevice, *tracerParams.ppCount, *tracerParams.pphSubdevices); } ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetP2PProperties_Tracing(ze_device_handle_t hDevice, ze_device_handle_t hPeerDevice, ze_device_p2p_properties_t *pP2PProperties) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Device.pfnGetP2PProperties, hDevice, hPeerDevice, pP2PProperties); ze_device_get_p2_p_properties_params_t tracerParams; tracerParams.phDevice = &hDevice; tracerParams.phPeerDevice = &hPeerDevice; tracerParams.ppP2PProperties = &pP2PProperties; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnDeviceGetP2PPropertiesCb_t, Device, pfnGetP2PPropertiesCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Device.pfnGetP2PProperties, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phDevice, *tracerParams.phPeerDevice, *tracerParams.ppP2PProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceCanAccessPeer_Tracing(ze_device_handle_t hDevice, ze_device_handle_t hPeerDevice, ze_bool_t *value) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Device.pfnCanAccessPeer, hDevice, hPeerDevice, value); ze_device_can_access_peer_params_t tracerParams; tracerParams.phDevice = &hDevice; tracerParams.phPeerDevice = &hPeerDevice; tracerParams.pvalue = &value; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnDeviceCanAccessPeerCb_t, Device, pfnCanAccessPeerCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Device.pfnCanAccessPeer, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phDevice, *tracerParams.phPeerDevice, *tracerParams.pvalue); } ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelSetCacheConfig_Tracing(ze_kernel_handle_t hKernel, ze_cache_config_flags_t flags) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Kernel.pfnSetCacheConfig, hKernel, flags); ze_kernel_set_cache_config_params_t tracerParams; tracerParams.phKernel = &hKernel; tracerParams.pflags = &flags; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnKernelSetCacheConfigCb_t, Kernel, pfnSetCacheConfigCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Kernel.pfnSetCacheConfig, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phKernel, *tracerParams.pflags); } ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetMemoryAccessProperties_Tracing(ze_device_handle_t hDevice, ze_device_memory_access_properties_t *pMemAccessProperties) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Device.pfnGetMemoryAccessProperties, hDevice, pMemAccessProperties); ze_device_get_memory_access_properties_params_t tracerParams; tracerParams.phDevice = &hDevice; tracerParams.ppMemAccessProperties = &pMemAccessProperties; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnDeviceGetMemoryAccessPropertiesCb_t, Device, pfnGetMemoryAccessPropertiesCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Device.pfnGetMemoryAccessProperties, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phDevice, *tracerParams.ppMemAccessProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetModuleProperties_Tracing(ze_device_handle_t hDevice, ze_device_module_properties_t *pModuleProperties) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Device.pfnGetModuleProperties, hDevice, pModuleProperties); ze_device_get_module_properties_params_t tracerParams; tracerParams.phDevice = &hDevice; tracerParams.ppModuleProperties = &pModuleProperties; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnDeviceGetModulePropertiesCb_t, Device, pfnGetModulePropertiesCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Device.pfnGetModuleProperties, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phDevice, *tracerParams.ppModuleProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetCommandQueueGroupProperties_Tracing(ze_device_handle_t hDevice, uint32_t *pCount, ze_command_queue_group_properties_t *pCommandQueueGroupProperties) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Device.pfnGetCommandQueueGroupProperties, hDevice, pCount, pCommandQueueGroupProperties); ze_device_get_command_queue_group_properties_params_t tracerParams; tracerParams.phDevice = &hDevice; tracerParams.ppCount = &pCount; tracerParams.ppCommandQueueGroupProperties = &pCommandQueueGroupProperties; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnDeviceGetCommandQueueGroupPropertiesCb_t, Device, pfnGetCommandQueueGroupPropertiesCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Device.pfnGetCommandQueueGroupProperties, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phDevice, *tracerParams.ppCount, *tracerParams.ppCommandQueueGroupProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetExternalMemoryProperties_Tracing(ze_device_handle_t hDevice, ze_device_external_memory_properties_t *pExternalMemoryProperties) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Device.pfnGetExternalMemoryProperties, hDevice, pExternalMemoryProperties); ze_device_get_external_memory_properties_params_t tracerParams; tracerParams.phDevice = &hDevice; tracerParams.ppExternalMemoryProperties = &pExternalMemoryProperties; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnDeviceGetExternalMemoryPropertiesCb_t, Device, pfnGetExternalMemoryPropertiesCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Device.pfnGetExternalMemoryProperties, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phDevice, *tracerParams.ppExternalMemoryProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetStatus_Tracing(ze_device_handle_t hDevice) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Device.pfnGetStatus, hDevice); ze_device_get_status_params_t tracerParams; tracerParams.phDevice = &hDevice; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnDeviceGetStatusCb_t, Device, pfnGetStatusCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Device.pfnGetStatus, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phDevice); } compute-runtime-22.14.22890/level_zero/experimental/source/tracing/tracing_device_imp.h000066400000000000000000000060751422164147700311400ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once extern "C" { ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGet_Tracing(ze_driver_handle_t hDriver, uint32_t *pCount, ze_device_handle_t *phDevices); ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetProperties_Tracing(ze_device_handle_t hDevice, ze_device_properties_t *pDeviceProperties); ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetComputeProperties_Tracing(ze_device_handle_t hDevice, ze_device_compute_properties_t *pComputeProperties); ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetMemoryProperties_Tracing(ze_device_handle_t hDevice, uint32_t *pCount, ze_device_memory_properties_t *pMemProperties); ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetCacheProperties_Tracing(ze_device_handle_t hDevice, uint32_t *pCount, ze_device_cache_properties_t *pCacheProperties); ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetImageProperties_Tracing(ze_device_handle_t hDevice, ze_device_image_properties_t *pImageProperties); ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetSubDevices_Tracing(ze_device_handle_t hDevice, uint32_t *pCount, ze_device_handle_t *phSubdevices); ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetP2PProperties_Tracing(ze_device_handle_t hDevice, ze_device_handle_t hPeerDevice, ze_device_p2p_properties_t *pP2PProperties); ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceCanAccessPeer_Tracing(ze_device_handle_t hDevice, ze_device_handle_t hPeerDevice, ze_bool_t *value); ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelSetCacheConfig_Tracing(ze_kernel_handle_t hKernel, ze_cache_config_flags_t flags); ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetMemoryAccessProperties_Tracing(ze_device_handle_t hDevice, ze_device_memory_access_properties_t *pMemAccessProperties); ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetModuleProperties_Tracing(ze_device_handle_t hDevice, ze_device_module_properties_t *pModuleProperties); ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetCommandQueueGroupProperties_Tracing(ze_device_handle_t hDevice, uint32_t *pCount, ze_command_queue_group_properties_t *pCommandQueueGroupProperties); ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetExternalMemoryProperties_Tracing(ze_device_handle_t hDevice, ze_device_external_memory_properties_t *pExternalMemoryProperties); ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetStatus_Tracing(ze_device_handle_t hDevice); } compute-runtime-22.14.22890/level_zero/experimental/source/tracing/tracing_driver_imp.cpp000066400000000000000000000135501422164147700315230ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/experimental/source/tracing/tracing_imp.h" ZE_APIEXPORT ze_result_t ZE_APICALL zeDriverGet_Tracing(uint32_t *pCount, ze_driver_handle_t *phDrivers) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Driver.pfnGet, pCount, phDrivers); ze_driver_get_params_t tracerParams; tracerParams.ppCount = &pCount; tracerParams.pphDrivers = &phDrivers; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnDriverGetCb_t, Driver, pfnGetCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Driver.pfnGet, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.ppCount, *tracerParams.pphDrivers); } ZE_APIEXPORT ze_result_t ZE_APICALL zeDriverGetProperties_Tracing(ze_driver_handle_t hDriver, ze_driver_properties_t *properties) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Driver.pfnGetProperties, hDriver, properties); ze_driver_get_properties_params_t tracerParams; tracerParams.phDriver = &hDriver; tracerParams.ppDriverProperties = &properties; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnDriverGetPropertiesCb_t, Driver, pfnGetPropertiesCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Driver.pfnGetProperties, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phDriver, *tracerParams.ppDriverProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zeDriverGetApiVersion_Tracing(ze_driver_handle_t hDrivers, ze_api_version_t *version) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Driver.pfnGetApiVersion, hDrivers, version); ze_driver_get_api_version_params_t tracerParams; tracerParams.phDriver = &hDrivers; tracerParams.pversion = &version; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnDriverGetApiVersionCb_t, Driver, pfnGetApiVersionCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Driver.pfnGetApiVersion, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phDriver, *tracerParams.pversion); } ZE_APIEXPORT ze_result_t ZE_APICALL zeDriverGetIpcProperties_Tracing(ze_driver_handle_t hDriver, ze_driver_ipc_properties_t *pIpcProperties) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Driver.pfnGetIpcProperties, hDriver, pIpcProperties); ze_driver_get_ipc_properties_params_t tracerParams; tracerParams.phDriver = &hDriver; tracerParams.ppIpcProperties = &pIpcProperties; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnDriverGetIpcPropertiesCb_t, Driver, pfnGetIpcPropertiesCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Driver.pfnGetIpcProperties, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phDriver, *tracerParams.ppIpcProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zeDriverGetExtensionProperties_Tracing(ze_driver_handle_t hDriver, uint32_t *pCount, ze_driver_extension_properties_t *pExtensionProperties) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Driver.pfnGetExtensionProperties, hDriver, pCount, pExtensionProperties); ze_driver_get_extension_properties_params_t tracerParams; tracerParams.phDriver = &hDriver; tracerParams.ppCount = &pCount; tracerParams.ppExtensionProperties = &pExtensionProperties; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnDriverGetExtensionPropertiesCb_t, Driver, pfnGetExtensionPropertiesCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Driver.pfnGetExtensionProperties, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phDriver, *tracerParams.ppCount, *tracerParams.ppExtensionProperties); } compute-runtime-22.14.22890/level_zero/experimental/source/tracing/tracing_driver_imp.h000066400000000000000000000017441422164147700311720ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once extern "C" { ZE_APIEXPORT ze_result_t ZE_APICALL zeDriverGet_Tracing(uint32_t *pCount, ze_driver_handle_t *phDrivers); ZE_APIEXPORT ze_result_t ZE_APICALL zeDriverGetProperties_Tracing(ze_driver_handle_t hDriver, ze_driver_properties_t *properties); ZE_APIEXPORT ze_result_t ZE_APICALL zeDriverGetApiVersion_Tracing(ze_driver_handle_t hDrivers, ze_api_version_t *version); ZE_APIEXPORT ze_result_t ZE_APICALL zeDriverGetIpcProperties_Tracing(ze_driver_handle_t hDriver, ze_driver_ipc_properties_t *pIpcProperties); ZE_APIEXPORT ze_result_t ZE_APICALL zeDriverGetExtensionProperties_Tracing(ze_driver_handle_t hDriver, uint32_t *pCount, ze_driver_extension_properties_t *pExtensionProperties); } compute-runtime-22.14.22890/level_zero/experimental/source/tracing/tracing_event_imp.cpp000066400000000000000000000412501422164147700313470ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/experimental/source/tracing/tracing_imp.h" ZE_APIEXPORT ze_result_t ZE_APICALL zeEventPoolCreate_Tracing(ze_context_handle_t hContext, const ze_event_pool_desc_t *desc, uint32_t numDevices, ze_device_handle_t *phDevices, ze_event_pool_handle_t *phEventPool) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.EventPool.pfnCreate, hContext, desc, numDevices, phDevices, phEventPool); ze_event_pool_create_params_t tracerParams; tracerParams.phContext = &hContext; tracerParams.pdesc = &desc; tracerParams.pnumDevices = &numDevices; tracerParams.pphDevices = &phDevices; tracerParams.pphEventPool = &phEventPool; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnEventPoolCreateCb_t, EventPool, pfnCreateCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.EventPool.pfnCreate, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phContext, *tracerParams.pdesc, *tracerParams.pnumDevices, *tracerParams.pphDevices, *tracerParams.pphEventPool); } ZE_APIEXPORT ze_result_t ZE_APICALL zeEventPoolDestroy_Tracing(ze_event_pool_handle_t hEventPool) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.EventPool.pfnDestroy, hEventPool); ze_event_pool_destroy_params_t tracerParams; tracerParams.phEventPool = &hEventPool; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnEventPoolDestroyCb_t, EventPool, pfnDestroyCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.EventPool.pfnDestroy, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phEventPool); } ZE_APIEXPORT ze_result_t ZE_APICALL zeEventCreate_Tracing(ze_event_pool_handle_t hEventPool, const ze_event_desc_t *desc, ze_event_handle_t *phEvent) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Event.pfnCreate, hEventPool, desc, phEvent); ze_event_create_params_t tracerParams; tracerParams.phEventPool = &hEventPool; tracerParams.pdesc = &desc; tracerParams.pphEvent = &phEvent; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnEventCreateCb_t, Event, pfnCreateCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Event.pfnCreate, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phEventPool, *tracerParams.pdesc, *tracerParams.pphEvent); } ZE_APIEXPORT ze_result_t ZE_APICALL zeEventDestroy_Tracing(ze_event_handle_t hEvent) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Event.pfnDestroy, hEvent); ze_event_destroy_params_t tracerParams; tracerParams.phEvent = &hEvent; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnEventDestroyCb_t, Event, pfnDestroyCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Event.pfnDestroy, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phEvent); } ZE_APIEXPORT ze_result_t ZE_APICALL zeEventPoolGetIpcHandle_Tracing(ze_event_pool_handle_t hEventPool, ze_ipc_event_pool_handle_t *phIpc) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.EventPool.pfnGetIpcHandle, hEventPool, phIpc); ze_event_pool_get_ipc_handle_params_t tracerParams; tracerParams.phEventPool = &hEventPool; tracerParams.pphIpc = &phIpc; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnEventPoolGetIpcHandleCb_t, EventPool, pfnGetIpcHandleCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.EventPool.pfnGetIpcHandle, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phEventPool, *tracerParams.pphIpc); } ZE_APIEXPORT ze_result_t ZE_APICALL zeEventPoolOpenIpcHandle_Tracing(ze_context_handle_t hContext, ze_ipc_event_pool_handle_t hIpc, ze_event_pool_handle_t *phEventPool) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.EventPool.pfnOpenIpcHandle, hContext, hIpc, phEventPool); ze_event_pool_open_ipc_handle_params_t tracerParams; tracerParams.phContext = &hContext; tracerParams.phIpc = &hIpc; tracerParams.pphEventPool = &phEventPool; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnEventPoolOpenIpcHandleCb_t, EventPool, pfnOpenIpcHandleCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.EventPool.pfnOpenIpcHandle, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phContext, *tracerParams.phIpc, *tracerParams.pphEventPool); } ZE_APIEXPORT ze_result_t ZE_APICALL zeEventPoolCloseIpcHandle_Tracing(ze_event_pool_handle_t hEventPool) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.EventPool.pfnCloseIpcHandle, hEventPool); ze_event_pool_close_ipc_handle_params_t tracerParams; tracerParams.phEventPool = &hEventPool; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnEventPoolCloseIpcHandleCb_t, EventPool, pfnCloseIpcHandleCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.EventPool.pfnCloseIpcHandle, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phEventPool); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendSignalEvent_Tracing(ze_command_list_handle_t hCommandList, ze_event_handle_t hEvent) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnAppendSignalEvent, hCommandList, hEvent); ze_command_list_append_signal_event_params_t tracerParams; tracerParams.phCommandList = &hCommandList; tracerParams.phEvent = &hEvent; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnCommandListAppendSignalEventCb_t, CommandList, pfnAppendSignalEventCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnAppendSignalEvent, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phCommandList, *tracerParams.phEvent); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendWaitOnEvents_Tracing(ze_command_list_handle_t hCommandList, uint32_t numEvents, ze_event_handle_t *phEvents) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnAppendWaitOnEvents, hCommandList, numEvents, phEvents); ze_command_list_append_wait_on_events_params_t tracerParams; tracerParams.phCommandList = &hCommandList; tracerParams.pnumEvents = &numEvents; tracerParams.pphEvents = &phEvents; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnCommandListAppendWaitOnEventsCb_t, CommandList, pfnAppendWaitOnEventsCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnAppendWaitOnEvents, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phCommandList, *tracerParams.pnumEvents, *tracerParams.pphEvents); } ZE_APIEXPORT ze_result_t ZE_APICALL zeEventHostSignal_Tracing(ze_event_handle_t hEvent) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Event.pfnHostSignal, hEvent); ze_event_host_signal_params_t tracerParams; tracerParams.phEvent = &hEvent; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnEventHostSignalCb_t, Event, pfnHostSignalCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Event.pfnHostSignal, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phEvent); } ZE_APIEXPORT ze_result_t ZE_APICALL zeEventHostSynchronize_Tracing(ze_event_handle_t hEvent, uint64_t timeout) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Event.pfnHostSynchronize, hEvent, timeout); ze_event_host_synchronize_params_t tracerParams; tracerParams.phEvent = &hEvent; tracerParams.ptimeout = &timeout; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnEventHostSynchronizeCb_t, Event, pfnHostSynchronizeCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Event.pfnHostSynchronize, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phEvent, *tracerParams.ptimeout); } ZE_APIEXPORT ze_result_t ZE_APICALL zeEventQueryStatus_Tracing(ze_event_handle_t hEvent) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Event.pfnQueryStatus, hEvent); ze_event_query_status_params_t tracerParams; tracerParams.phEvent = &hEvent; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnEventQueryStatusCb_t, Event, pfnQueryStatusCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Event.pfnQueryStatus, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phEvent); } ZE_APIEXPORT ze_result_t ZE_APICALL zeEventHostReset_Tracing(ze_event_handle_t hEvent) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Event.pfnHostReset, hEvent); ze_event_host_reset_params_t tracerParams; tracerParams.phEvent = &hEvent; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnEventHostResetCb_t, Event, pfnHostResetCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Event.pfnHostReset, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phEvent); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendEventReset_Tracing(ze_command_list_handle_t hCommandList, ze_event_handle_t hEvent) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnAppendEventReset, hCommandList, hEvent); ze_command_list_append_event_reset_params_t tracerParams; tracerParams.phCommandList = &hCommandList; tracerParams.phEvent = &hEvent; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnCommandListAppendEventResetCb_t, CommandList, pfnAppendEventResetCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnAppendEventReset, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phCommandList, *tracerParams.phEvent); } ZE_APIEXPORT ze_result_t ZE_APICALL zeEventQueryKernelTimestamp_Tracing(ze_event_handle_t hEvent, ze_kernel_timestamp_result_t *dstptr) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Event.pfnQueryKernelTimestamp, hEvent, dstptr); ze_event_query_kernel_timestamp_params_t tracerParams; tracerParams.phEvent = &hEvent; tracerParams.pdstptr = &dstptr; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnEventQueryKernelTimestampCb_t, Event, pfnQueryKernelTimestampCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Event.pfnQueryKernelTimestamp, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phEvent, *tracerParams.pdstptr); } compute-runtime-22.14.22890/level_zero/experimental/source/tracing/tracing_event_imp.h000066400000000000000000000047461422164147700310250ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once extern "C" { ZE_APIEXPORT ze_result_t ZE_APICALL zeEventPoolCreate_Tracing(ze_context_handle_t hContext, const ze_event_pool_desc_t *desc, uint32_t numDevices, ze_device_handle_t *phDevices, ze_event_pool_handle_t *phEventPool); ZE_APIEXPORT ze_result_t ZE_APICALL zeEventPoolDestroy_Tracing(ze_event_pool_handle_t hEventPool); ZE_APIEXPORT ze_result_t ZE_APICALL zeEventCreate_Tracing(ze_event_pool_handle_t hEventPool, const ze_event_desc_t *desc, ze_event_handle_t *phEvent); ZE_APIEXPORT ze_result_t ZE_APICALL zeEventDestroy_Tracing(ze_event_handle_t hEvent); ZE_APIEXPORT ze_result_t ZE_APICALL zeEventPoolGetIpcHandle_Tracing(ze_event_pool_handle_t hEventPool, ze_ipc_event_pool_handle_t *phIpc); ZE_APIEXPORT ze_result_t ZE_APICALL zeEventPoolOpenIpcHandle_Tracing(ze_context_handle_t hContext, ze_ipc_event_pool_handle_t hIpc, ze_event_pool_handle_t *phEventPool); ZE_APIEXPORT ze_result_t ZE_APICALL zeEventPoolCloseIpcHandle_Tracing(ze_event_pool_handle_t hEventPool); ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendSignalEvent_Tracing(ze_command_list_handle_t hCommandList, ze_event_handle_t hEvent); ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendWaitOnEvents_Tracing(ze_command_list_handle_t hCommandList, uint32_t numEvents, ze_event_handle_t *phEvents); ZE_APIEXPORT ze_result_t ZE_APICALL zeEventHostSignal_Tracing(ze_event_handle_t hEvent); ZE_APIEXPORT ze_result_t ZE_APICALL zeEventHostSynchronize_Tracing(ze_event_handle_t hEvent, uint64_t timeout); ZE_APIEXPORT ze_result_t ZE_APICALL zeEventQueryStatus_Tracing(ze_event_handle_t hEvent); ZE_APIEXPORT ze_result_t ZE_APICALL zeEventHostReset_Tracing(ze_event_handle_t hEvent); ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendEventReset_Tracing(ze_command_list_handle_t hCommandList, ze_event_handle_t hEvent); ZE_APIEXPORT ze_result_t ZE_APICALL zeEventQueryKernelTimestamp_Tracing(ze_event_handle_t hEvent, ze_kernel_timestamp_result_t *dstptr); } compute-runtime-22.14.22890/level_zero/experimental/source/tracing/tracing_fence_imp.cpp000066400000000000000000000117441422164147700313130ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/experimental/source/tracing/tracing_imp.h" ZE_APIEXPORT ze_result_t ZE_APICALL zeFenceCreate_Tracing(ze_command_queue_handle_t hCommandQueue, const ze_fence_desc_t *desc, ze_fence_handle_t *phFence) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Fence.pfnCreate, hCommandQueue, desc, phFence); ze_fence_create_params_t tracerParams; tracerParams.phCommandQueue = &hCommandQueue; tracerParams.pdesc = &desc; tracerParams.pphFence = &phFence; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnFenceCreateCb_t, Fence, pfnCreateCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Fence.pfnCreate, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phCommandQueue, *tracerParams.pdesc, *tracerParams.pphFence); } ZE_APIEXPORT ze_result_t ZE_APICALL zeFenceDestroy_Tracing(ze_fence_handle_t hFence) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Fence.pfnDestroy, hFence); ze_fence_destroy_params_t tracerParams; tracerParams.phFence = &hFence; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnFenceDestroyCb_t, Fence, pfnDestroyCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Fence.pfnDestroy, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phFence); } ZE_APIEXPORT ze_result_t ZE_APICALL zeFenceHostSynchronize_Tracing(ze_fence_handle_t hFence, uint64_t timeout) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Fence.pfnHostSynchronize, hFence, timeout); ze_fence_host_synchronize_params_t tracerParams; tracerParams.phFence = &hFence; tracerParams.ptimeout = &timeout; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnFenceHostSynchronizeCb_t, Fence, pfnHostSynchronizeCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Fence.pfnHostSynchronize, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phFence, *tracerParams.ptimeout); } ZE_APIEXPORT ze_result_t ZE_APICALL zeFenceQueryStatus_Tracing(ze_fence_handle_t hFence) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Fence.pfnQueryStatus, hFence); ze_fence_query_status_params_t tracerParams; tracerParams.phFence = &hFence; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnFenceQueryStatusCb_t, Fence, pfnQueryStatusCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Fence.pfnQueryStatus, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phFence); } ZE_APIEXPORT ze_result_t ZE_APICALL zeFenceReset_Tracing(ze_fence_handle_t hFence) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Fence.pfnReset, hFence); ze_fence_reset_params_t tracerParams; tracerParams.phFence = &hFence; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnFenceResetCb_t, Fence, pfnResetCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Fence.pfnReset, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phFence); } compute-runtime-22.14.22890/level_zero/experimental/source/tracing/tracing_fence_imp.h000066400000000000000000000013241422164147700307510ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once extern "C" { ZE_APIEXPORT ze_result_t ZE_APICALL zeFenceCreate_Tracing(ze_command_queue_handle_t hCommandQueue, const ze_fence_desc_t *desc, ze_fence_handle_t *phFence); ZE_APIEXPORT ze_result_t ZE_APICALL zeFenceDestroy_Tracing(ze_fence_handle_t hFence); ZE_APIEXPORT ze_result_t ZE_APICALL zeFenceHostSynchronize_Tracing(ze_fence_handle_t hFence, uint64_t timeout); ZE_APIEXPORT ze_result_t ZE_APICALL zeFenceQueryStatus_Tracing(ze_fence_handle_t hFence); ZE_APIEXPORT ze_result_t ZE_APICALL zeFenceReset_Tracing(ze_fence_handle_t hFence); } compute-runtime-22.14.22890/level_zero/experimental/source/tracing/tracing_global_imp.cpp000066400000000000000000000016771422164147700314770ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/experimental/source/tracing/tracing_imp.h" ZE_APIEXPORT ze_result_t ZE_APICALL zeInit_Tracing(ze_init_flags_t flags) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Global.pfnInit, flags); ze_init_params_t tracerParams; tracerParams.pflags = &flags; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnInitCb_t, Global, pfnInitCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Global.pfnInit, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.pflags); } compute-runtime-22.14.22890/level_zero/experimental/source/tracing/tracing_global_imp.h000066400000000000000000000002771422164147700311370ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once extern "C" { ZE_APIEXPORT ze_result_t ZE_APICALL zeInit_Tracing(ze_init_flags_t flags); } compute-runtime-22.14.22890/level_zero/experimental/source/tracing/tracing_image_imp.cpp000066400000000000000000000072511422164147700313130ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/experimental/source/tracing/tracing_imp.h" ZE_APIEXPORT ze_result_t ZE_APICALL zeImageGetProperties_Tracing(ze_device_handle_t hDevice, const ze_image_desc_t *desc, ze_image_properties_t *pImageProperties) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Image.pfnGetProperties, hDevice, desc, pImageProperties); ze_image_get_properties_params_t tracerParams; tracerParams.phDevice = &hDevice; tracerParams.pdesc = &desc; tracerParams.ppImageProperties = &pImageProperties; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnImageGetPropertiesCb_t, Image, pfnGetPropertiesCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Image.pfnGetProperties, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phDevice, *tracerParams.pdesc, *tracerParams.ppImageProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zeImageCreate_Tracing(ze_context_handle_t hContext, ze_device_handle_t hDevice, const ze_image_desc_t *desc, ze_image_handle_t *phImage) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Image.pfnCreate, hContext, hDevice, desc, phImage); ze_image_create_params_t tracerParams; tracerParams.phContext = &hContext; tracerParams.phDevice = &hDevice; tracerParams.pdesc = &desc; tracerParams.pphImage = &phImage; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnImageCreateCb_t, Image, pfnCreateCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Image.pfnCreate, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phContext, *tracerParams.phDevice, *tracerParams.pdesc, *tracerParams.pphImage); } ZE_APIEXPORT ze_result_t ZE_APICALL zeImageDestroy_Tracing(ze_image_handle_t hImage) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Image.pfnDestroy, hImage); ze_image_destroy_params_t tracerParams; tracerParams.phImage = &hImage; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnImageDestroyCb_t, Image, pfnDestroyCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Image.pfnDestroy, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phImage); } compute-runtime-22.14.22890/level_zero/experimental/source/tracing/tracing_image_imp.h000066400000000000000000000012321422164147700307510ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once extern "C" { ZE_APIEXPORT ze_result_t ZE_APICALL zeImageGetProperties_Tracing(ze_device_handle_t hDevice, const ze_image_desc_t *desc, ze_image_properties_t *pImageProperties); ZE_APIEXPORT ze_result_t ZE_APICALL zeImageCreate_Tracing(ze_context_handle_t hContext, ze_device_handle_t hDevice, const ze_image_desc_t *desc, ze_image_handle_t *phImage); ZE_APIEXPORT ze_result_t ZE_APICALL zeImageDestroy_Tracing(ze_image_handle_t hImage); } compute-runtime-22.14.22890/level_zero/experimental/source/tracing/tracing_imp.cpp000066400000000000000000000241371422164147700301530ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/experimental/source/tracing/tracing_imp.h" #include "shared/source/helpers/debug_helpers.h" namespace L0 { thread_local ze_bool_t tracingInProgress = 0; struct APITracerContextImp globalAPITracerContextImp; struct APITracerContextImp *pGlobalAPITracerContextImp = &globalAPITracerContextImp; APITracer *APITracer::create() { APITracerImp *tracer = new APITracerImp; tracer->tracingState = disabledState; tracer->tracerFunctions = {}; UNRECOVERABLE_IF(tracer == nullptr); return tracer; } ze_result_t createAPITracer(zet_context_handle_t hContext, const zet_tracer_exp_desc_t *desc, zet_tracer_exp_handle_t *phTracer) { if (!pGlobalAPITracerContextImp->isTracingEnabled()) { return ZE_RESULT_ERROR_UNINITIALIZED; } APITracerImp *tracer = static_cast(APITracer::create()); tracer->tracerFunctions.pUserData = desc->pUserData; *phTracer = tracer->toHandle(); return ZE_RESULT_SUCCESS; } // This destructor will be called only during at-exit processing, // Hence, this function is executing in a single threaded environment, // and requires no mutex. APITracerContextImp::~APITracerContextImp() { std::list::iterator itr = threadTracerDataList.begin(); while (itr != threadTracerDataList.end()) { (*itr)->clearThreadTracerDataOnList(); itr = threadTracerDataList.erase(itr); } } ze_result_t APITracerImp::destroyTracer(zet_tracer_exp_handle_t phTracer) { APITracerImp *tracer = static_cast(phTracer); ze_result_t result = pGlobalAPITracerContextImp->finalizeDisableImpTracingWait(tracer); if (result == ZE_RESULT_SUCCESS) { delete L0::APITracer::fromHandle(phTracer); } return result; } ze_result_t APITracerImp::setPrologues(zet_core_callbacks_t *pCoreCbs) { if (this->tracingState != disabledState) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } this->tracerFunctions.corePrologues = *pCoreCbs; return ZE_RESULT_SUCCESS; } ze_result_t APITracerImp::setEpilogues(zet_core_callbacks_t *pCoreCbs) { if (this->tracingState != disabledState) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } this->tracerFunctions.coreEpilogues = *pCoreCbs; return ZE_RESULT_SUCCESS; } ze_result_t APITracerImp::enableTracer(ze_bool_t enable) { return pGlobalAPITracerContextImp->enableTracingImp(this, enable); } void APITracerContextImp::addThreadTracerDataToList(ThreadPrivateTracerData *threadDataP) { std::lock_guard lock(threadTracerDataListMutex); threadTracerDataList.push_back(threadDataP); } void APITracerContextImp::removeThreadTracerDataFromList(ThreadPrivateTracerData *threadDataP) { std::lock_guard lock(threadTracerDataListMutex); if (threadTracerDataList.empty()) return; threadTracerDataList.remove(threadDataP); } thread_local ThreadPrivateTracerData myThreadPrivateTracerData; ThreadPrivateTracerData::ThreadPrivateTracerData() { isInitialized = false; onList = false; tracerArrayPointer.store(nullptr, std::memory_order_relaxed); } ThreadPrivateTracerData::~ThreadPrivateTracerData() { if (onList) { globalAPITracerContextImp.removeThreadTracerDataFromList(this); onList = false; } tracerArrayPointer.store(nullptr, std::memory_order_relaxed); } void ThreadPrivateTracerData::removeThreadTracerDataFromList(void) { if (onList) { globalAPITracerContextImp.removeThreadTracerDataFromList(this); onList = false; } tracerArrayPointer.store(nullptr, std::memory_order_relaxed); } bool ThreadPrivateTracerData::testAndSetThreadTracerDataInitializedAndOnList(void) { if (!isInitialized) { isInitialized = true; onList = true; globalAPITracerContextImp.addThreadTracerDataToList(&myThreadPrivateTracerData); } return onList; } bool APITracerContextImp::isTracingEnabled() { return driver_ddiTable.enableTracing; } // // Walk the list of per-thread private data structures, testing // whether any of them reference this array. // // Return 1 if a reference is found. Otherwise return 0. // ze_bool_t APITracerContextImp::testForTracerArrayReferences(tracer_array_t *tracerArray) { std::lock_guard lock(threadTracerDataListMutex); std::list::iterator itr; for (itr = threadTracerDataList.begin(); itr != threadTracerDataList.end(); itr++) { if ((*itr)->tracerArrayPointer.load(std::memory_order_relaxed) == tracerArray) return 1; } return 0; } // // Walk the retiring_tracer_array_list, checking each member of the list for // references by per thread tracer array pointer. Delete and free // each tracer array that has no per-thread references. // // Return the number of entries on the retiring tracer array list. // size_t APITracerContextImp::testAndFreeRetiredTracers() { std::list::iterator itr = this->retiringTracerArrayList.begin(); while (itr != this->retiringTracerArrayList.end()) { tracer_array_t *retiringTracerArray = *itr; itr++; if (testForTracerArrayReferences(retiringTracerArray)) continue; this->retiringTracerArrayList.remove(retiringTracerArray); delete[] retiringTracerArray->tracerArrayEntries; delete retiringTracerArray; } return this->retiringTracerArrayList.size(); } size_t APITracerContextImp::updateTracerArrays() { tracer_array_t *newTracerArray; size_t newTracerArrayCount = this->enabledTracerImpList.size(); if (newTracerArrayCount != 0) { newTracerArray = new tracer_array_t; newTracerArray->tracerArrayCount = newTracerArrayCount; newTracerArray->tracerArrayEntries = new tracer_array_entry_t[newTracerArrayCount]; // // iterate over the list of enabled tracers, copying their entries into the // new tracer array // size_t i = 0; std::list::iterator itr; for (itr = enabledTracerImpList.begin(); itr != enabledTracerImpList.end(); itr++) { newTracerArray->tracerArrayEntries[i] = (*itr)->tracerFunctions; i++; } } else { newTracerArray = &emptyTracerArray; } // // active_tracer_array.load can use memory_order_relaxed here because // there is logically no transfer of other memory context between // threads in this case. // tracer_array_t *active_tracer_array_shadow = activeTracerArray.load(std::memory_order_relaxed); if (active_tracer_array_shadow != &emptyTracerArray) { retiringTracerArrayList.push_back(active_tracer_array_shadow); } // // This active_tracer_array.store must use memory_order_release. // This store DOES signal a logical transfer of tracer state information // from this thread to the tracing threads. // activeTracerArray.store(newTracerArray, std::memory_order_release); return testAndFreeRetiredTracers(); } ze_result_t APITracerContextImp::enableTracingImp(struct APITracerImp *tracerImp, ze_bool_t enable) { std::lock_guard lock(traceTableMutex); ze_result_t result; switch (tracerImp->tracingState) { case disabledState: if (enable) { enabledTracerImpList.push_back(tracerImp); tracerImp->tracingState = enabledState; updateTracerArrays(); } result = ZE_RESULT_SUCCESS; break; case enabledState: if (!enable) { enabledTracerImpList.remove(tracerImp); tracerImp->tracingState = disabledWaitingState; if (updateTracerArrays() == 0) tracerImp->tracingState = disabledState; } result = ZE_RESULT_SUCCESS; break; case disabledWaitingState: result = ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE; break; default: result = ZE_RESULT_ERROR_UNINITIALIZED; UNRECOVERABLE_IF(true); break; } return result; } // This is called by the destroy tracer method. // // This routine will return ZE_RESULT_SUCCESS // state if either it has never been enabled, // or if it has been enabled and then disabled. // // On ZE_RESULT_SUCESS, the destroy tracer method // can free the tracer's memory. // // ZE_RESULT_ERROR_UNINITIALIZED is returned // if the tracer has been enabled but not // disabled. The destroy tracer method // should NOT free this tracer's memory. // ze_result_t APITracerContextImp::finalizeDisableImpTracingWait(struct APITracerImp *tracerImp) { std::lock_guard lock(traceTableMutex); ze_result_t result; switch (tracerImp->tracingState) { case disabledState: result = ZE_RESULT_SUCCESS; break; case enabledState: result = ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE; break; case disabledWaitingState: while (testAndFreeRetiredTracers() != 0) { std::this_thread::sleep_for(std::chrono::milliseconds(1)); } tracerImp->tracingState = disabledState; result = ZE_RESULT_SUCCESS; break; default: result = ZE_RESULT_ERROR_UNINITIALIZED; UNRECOVERABLE_IF(true); break; } return result; } void *APITracerContextImp::getActiveTracersList() { tracer_array_t *stableTracerArray = nullptr; if (!myThreadPrivateTracerData.testAndSetThreadTracerDataInitializedAndOnList()) { return nullptr; } do { stableTracerArray = pGlobalAPITracerContextImp->activeTracerArray.load(std::memory_order_acquire); myThreadPrivateTracerData.tracerArrayPointer.store(stableTracerArray, std::memory_order_relaxed); } while (stableTracerArray != pGlobalAPITracerContextImp->activeTracerArray.load(std::memory_order_relaxed)); return (void *)stableTracerArray; } void APITracerContextImp::releaseActivetracersList() { if (myThreadPrivateTracerData.testAndSetThreadTracerDataInitializedAndOnList()) myThreadPrivateTracerData.tracerArrayPointer.store(nullptr, std::memory_order_relaxed); } } // namespace L0 compute-runtime-22.14.22890/level_zero/experimental/source/tracing/tracing_imp.h000066400000000000000000000235741422164147700276240ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/experimental/source/tracing/tracing.h" #include "level_zero/experimental/source/tracing/tracing_barrier_imp.h" #include "level_zero/experimental/source/tracing/tracing_cmdlist_imp.h" #include "level_zero/experimental/source/tracing/tracing_cmdqueue_imp.h" #include "level_zero/experimental/source/tracing/tracing_copy_imp.h" #include "level_zero/experimental/source/tracing/tracing_device_imp.h" #include "level_zero/experimental/source/tracing/tracing_driver_imp.h" #include "level_zero/experimental/source/tracing/tracing_event_imp.h" #include "level_zero/experimental/source/tracing/tracing_fence_imp.h" #include "level_zero/experimental/source/tracing/tracing_global_imp.h" #include "level_zero/experimental/source/tracing/tracing_image_imp.h" #include "level_zero/experimental/source/tracing/tracing_memory_imp.h" #include "level_zero/experimental/source/tracing/tracing_module_imp.h" #include "level_zero/experimental/source/tracing/tracing_residency_imp.h" #include "level_zero/experimental/source/tracing/tracing_sampler_imp.h" #include #include #include "ze_ddi_tables.h" #include #include #include #include #include #include extern ze_gpu_driver_dditable_t driver_ddiTable; namespace L0 { extern thread_local ze_bool_t tracingInProgress; extern struct APITracerContextImp *pGlobalAPITracerContextImp; typedef struct tracer_array_entry { zet_core_callbacks_t corePrologues; zet_core_callbacks_t coreEpilogues; zet_device_handle_t hDevice; void *pUserData; } tracer_array_entry_t; typedef struct tracerArray { size_t tracerArrayCount; tracer_array_entry_t *tracerArrayEntries; } tracer_array_t; typedef enum tracingState { disabledState, // tracing has never been enabled enabledState, // tracing is enabled. disabledWaitingState, // tracing has been disabled, but not waited for } tracingState_t; struct APITracerImp : APITracer { ze_result_t destroyTracer(zet_tracer_exp_handle_t phTracer) override; ze_result_t setPrologues(zet_core_callbacks_t *pCoreCbs) override; ze_result_t setEpilogues(zet_core_callbacks_t *pCoreCbs) override; ze_result_t enableTracer(ze_bool_t enable) override; tracer_array_entry_t tracerFunctions{}; tracingState_t tracingState = disabledState; private: }; class ThreadPrivateTracerData { public: void clearThreadTracerDataOnList(void) { onList = false; } void removeThreadTracerDataFromList(void); bool testAndSetThreadTracerDataInitializedAndOnList(void); bool onList; bool isInitialized; ThreadPrivateTracerData(); ~ThreadPrivateTracerData(); std::atomic tracerArrayPointer; private: ThreadPrivateTracerData(const ThreadPrivateTracerData &); ThreadPrivateTracerData &operator=(const ThreadPrivateTracerData &); }; struct APITracerContextImp : APITracerContext { public: APITracerContextImp() { activeTracerArray.store(&emptyTracerArray, std::memory_order_relaxed); }; ~APITracerContextImp() override; static void apiTracingEnable(ze_init_flag_t flag); void *getActiveTracersList() override; void releaseActivetracersList() override; ze_result_t enableTracingImp(struct APITracerImp *newTracer, ze_bool_t enable); ze_result_t finalizeDisableImpTracingWait(struct APITracerImp *oldTracer); bool isTracingEnabled(); void addThreadTracerDataToList(ThreadPrivateTracerData *threadDataP); void removeThreadTracerDataFromList(ThreadPrivateTracerData *threadDataP); private: std::mutex traceTableMutex; tracer_array_t emptyTracerArray = {0, NULL}; std::atomic activeTracerArray; // // a list of tracer arrays that were once active, but // have been replaced by a new active array. These // once-active tracer arrays may continue for some time // to have references to them among the per-thread // tracer array pointers. // std::list retiringTracerArrayList; std::list enabledTracerImpList; ze_bool_t testForTracerArrayReferences(tracer_array_t *tracerArray); size_t testAndFreeRetiredTracers(); size_t updateTracerArrays(); std::list threadTracerDataList; std::mutex threadTracerDataListMutex; }; extern thread_local ThreadPrivateTracerData myThreadPrivateTracerData; template class APITracerCallbackStateImp { public: T current_api_callback; void *pUserData; }; template class APITracerCallbackDataImp { public: T apiOrdinal = {}; std::vector> prologCallbacks; std::vector> epilogCallbacks; }; #define ZE_HANDLE_TRACER_RECURSION(ze_api_ptr, ...) \ do { \ if (L0::tracingInProgress) { \ return ze_api_ptr(__VA_ARGS__); \ } \ L0::tracingInProgress = 1; \ } while (0) #define ZE_GEN_TRACER_ARRAY_ENTRY(callbackPtr, tracerArray, tracerArrayIndex, callbackType, callbackCategory, callbackFunction) \ do { \ callbackPtr = tracerArray->tracerArrayEntries[tracerArrayIndex].callbackType.callbackCategory.callbackFunction; \ } while (0) #define ZE_GEN_PER_API_CALLBACK_STATE(perApiCallbackData, tracerType, callbackCategory, callbackFunctionType) \ L0::tracer_array_t *currentTracerArray; \ currentTracerArray = (L0::tracer_array_t *)L0::pGlobalAPITracerContextImp->getActiveTracersList(); \ if (currentTracerArray) { \ for (size_t i = 0; i < currentTracerArray->tracerArrayCount; i++) { \ tracerType prologueCallbackPtr; \ tracerType epilogue_callback_ptr; \ ZE_GEN_TRACER_ARRAY_ENTRY(prologueCallbackPtr, currentTracerArray, i, corePrologues, callbackCategory, callbackFunctionType); \ ZE_GEN_TRACER_ARRAY_ENTRY(epilogue_callback_ptr, currentTracerArray, i, coreEpilogues, callbackCategory, callbackFunctionType); \ \ L0::APITracerCallbackStateImp prologCallback; \ prologCallback.current_api_callback = prologueCallbackPtr; \ prologCallback.pUserData = currentTracerArray->tracerArrayEntries[i].pUserData; \ perApiCallbackData.prologCallbacks.push_back(prologCallback); \ \ L0::APITracerCallbackStateImp epilogCallback; \ epilogCallback.current_api_callback = epilogue_callback_ptr; \ epilogCallback.pUserData = currentTracerArray->tracerArrayEntries[i].pUserData; \ perApiCallbackData.epilogCallbacks.push_back(epilogCallback); \ } \ } template ze_result_t APITracerWrapperImp(TFunction_pointer zeApiPtr, TParams paramsStruct, TTracer apiOrdinal, TTracerPrologCallbacks prologCallbacks, TTracerEpilogCallbacks epilogCallbacks, Args &&...args) { ze_result_t ret = ZE_RESULT_SUCCESS; std::vector> *callbacks_prologs = &prologCallbacks; std::vector ppTracerInstanceUserData; ppTracerInstanceUserData.resize(callbacks_prologs->size()); for (size_t i = 0; i < callbacks_prologs->size(); i++) { if (callbacks_prologs->at(i).current_api_callback != nullptr) callbacks_prologs->at(i).current_api_callback(paramsStruct, ret, callbacks_prologs->at(i).pUserData, &ppTracerInstanceUserData[i]); } ret = zeApiPtr(args...); std::vector> *callbacksEpilogs = &epilogCallbacks; for (size_t i = 0; i < callbacksEpilogs->size(); i++) { if (callbacksEpilogs->at(i).current_api_callback != nullptr) callbacksEpilogs->at(i).current_api_callback(paramsStruct, ret, callbacksEpilogs->at(i).pUserData, &ppTracerInstanceUserData[i]); } L0::tracingInProgress = 0; L0::pGlobalAPITracerContextImp->releaseActivetracersList(); return ret; } } // namespace L0 compute-runtime-22.14.22890/level_zero/experimental/source/tracing/tracing_memory_imp.cpp000066400000000000000000000640131422164147700315400ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/experimental/source/tracing/tracing_imp.h" ZE_APIEXPORT ze_result_t ZE_APICALL zeMemAllocShared_Tracing(ze_context_handle_t hContext, const ze_device_mem_alloc_desc_t *deviceDesc, const ze_host_mem_alloc_desc_t *hostDesc, size_t size, size_t alignment, ze_device_handle_t hDevice, void **pptr) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Mem.pfnAllocShared, hContext, deviceDesc, hostDesc, size, alignment, hDevice, pptr); ze_mem_alloc_shared_params_t tracerParams; tracerParams.phContext = &hContext; tracerParams.pdevice_desc = &deviceDesc; tracerParams.phost_desc = &hostDesc; tracerParams.psize = &size; tracerParams.palignment = &alignment; tracerParams.phDevice = &hDevice; tracerParams.ppptr = &pptr; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnMemAllocSharedCb_t, Mem, pfnAllocSharedCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Mem.pfnAllocShared, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phContext, *tracerParams.pdevice_desc, *tracerParams.phost_desc, *tracerParams.psize, *tracerParams.palignment, *tracerParams.phDevice, *tracerParams.ppptr); } ZE_APIEXPORT ze_result_t ZE_APICALL zeMemAllocDevice_Tracing(ze_context_handle_t hContext, const ze_device_mem_alloc_desc_t *deviceDesc, size_t size, size_t alignment, ze_device_handle_t hDevice, void **pptr) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Mem.pfnAllocDevice, hContext, deviceDesc, size, alignment, hDevice, pptr); ze_mem_alloc_device_params_t tracerParams; tracerParams.phContext = &hContext; tracerParams.pdevice_desc = &deviceDesc; tracerParams.psize = &size; tracerParams.palignment = &alignment; tracerParams.phDevice = &hDevice; tracerParams.ppptr = &pptr; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnMemAllocDeviceCb_t, Mem, pfnAllocDeviceCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Mem.pfnAllocDevice, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phContext, *tracerParams.pdevice_desc, *tracerParams.psize, *tracerParams.palignment, *tracerParams.phDevice, *tracerParams.ppptr); } ZE_APIEXPORT ze_result_t ZE_APICALL zeMemAllocHost_Tracing(ze_context_handle_t hContext, const ze_host_mem_alloc_desc_t *hostDesc, size_t size, size_t alignment, void **pptr) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Mem.pfnAllocHost, hContext, hostDesc, size, alignment, pptr); ze_mem_alloc_host_params_t tracerParams; tracerParams.phContext = &hContext; tracerParams.phost_desc = &hostDesc; tracerParams.psize = &size; tracerParams.palignment = &alignment; tracerParams.ppptr = &pptr; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnMemAllocHostCb_t, Mem, pfnAllocHostCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Mem.pfnAllocHost, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phContext, *tracerParams.phost_desc, *tracerParams.psize, *tracerParams.palignment, *tracerParams.ppptr); } ZE_APIEXPORT ze_result_t ZE_APICALL zeMemFree_Tracing(ze_context_handle_t hContext, void *ptr) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Mem.pfnFree, hContext, ptr); ze_mem_free_params_t tracerParams; tracerParams.phContext = &hContext; tracerParams.pptr = &ptr; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnMemFreeCb_t, Mem, pfnFreeCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Mem.pfnFree, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phContext, *tracerParams.pptr); } ZE_APIEXPORT ze_result_t ZE_APICALL zeMemGetAllocProperties_Tracing(ze_context_handle_t hContext, const void *ptr, ze_memory_allocation_properties_t *pMemAllocProperties, ze_device_handle_t *phDevice) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Mem.pfnGetAllocProperties, hContext, ptr, pMemAllocProperties, phDevice); ze_mem_get_alloc_properties_params_t tracerParams; tracerParams.phContext = &hContext; tracerParams.pptr = &ptr; tracerParams.ppMemAllocProperties = &pMemAllocProperties; tracerParams.pphDevice = &phDevice; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnMemGetAllocPropertiesCb_t, Mem, pfnGetAllocPropertiesCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Mem.pfnGetAllocProperties, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phContext, *tracerParams.pptr, *tracerParams.ppMemAllocProperties, *tracerParams.pphDevice); } ZE_APIEXPORT ze_result_t ZE_APICALL zeMemGetAddressRange_Tracing(ze_context_handle_t hContext, const void *ptr, void **pBase, size_t *pSize) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Mem.pfnGetAddressRange, hContext, ptr, pBase, pSize); ze_mem_get_address_range_params_t tracerParams; tracerParams.phContext = &hContext; tracerParams.pptr = &ptr; tracerParams.ppBase = &pBase; tracerParams.ppSize = &pSize; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnMemGetAddressRangeCb_t, Mem, pfnGetAddressRangeCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Mem.pfnGetAddressRange, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phContext, *tracerParams.pptr, *tracerParams.ppBase, *tracerParams.ppSize); } ZE_APIEXPORT ze_result_t ZE_APICALL zeMemGetIpcHandle_Tracing(ze_context_handle_t hContext, const void *ptr, ze_ipc_mem_handle_t *pIpcHandle) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Mem.pfnGetIpcHandle, hContext, ptr, pIpcHandle); ze_mem_get_ipc_handle_params_t tracerParams; tracerParams.phContext = &hContext; tracerParams.pptr = &ptr; tracerParams.ppIpcHandle = &pIpcHandle; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnMemGetIpcHandleCb_t, Mem, pfnGetIpcHandleCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Mem.pfnGetIpcHandle, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phContext, *tracerParams.pptr, *tracerParams.ppIpcHandle); } ZE_APIEXPORT ze_result_t ZE_APICALL zeMemOpenIpcHandle_Tracing(ze_context_handle_t hContext, ze_device_handle_t hDevice, ze_ipc_mem_handle_t handle, ze_ipc_memory_flags_t flags, void **pptr) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Mem.pfnOpenIpcHandle, hContext, hDevice, handle, flags, pptr); ze_mem_open_ipc_handle_params_t tracerParams; tracerParams.phContext = &hContext; tracerParams.phDevice = &hDevice; tracerParams.phandle = &handle; tracerParams.pflags = &flags; tracerParams.ppptr = &pptr; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnMemOpenIpcHandleCb_t, Mem, pfnOpenIpcHandleCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Mem.pfnOpenIpcHandle, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phContext, *tracerParams.phDevice, *tracerParams.phandle, *tracerParams.pflags, *tracerParams.ppptr); } ZE_APIEXPORT ze_result_t ZE_APICALL zeMemCloseIpcHandle_Tracing(ze_context_handle_t hContext, const void *ptr) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Mem.pfnCloseIpcHandle, hContext, ptr); ze_mem_close_ipc_handle_params_t tracerParams; tracerParams.phContext = &hContext; tracerParams.pptr = &ptr; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnMemCloseIpcHandleCb_t, Mem, pfnCloseIpcHandleCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Mem.pfnCloseIpcHandle, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phContext, *tracerParams.pptr); } ZE_APIEXPORT ze_result_t ZE_APICALL zeVirtualMemReserve_Tracing(ze_context_handle_t hContext, const void *pStart, size_t size, void **pptr) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.VirtualMem.pfnReserve, hContext, pStart, size, pptr); ze_virtual_mem_reserve_params_t tracerParams; tracerParams.phContext = &hContext; tracerParams.ppStart = &pStart; tracerParams.psize = &size; tracerParams.ppptr = &pptr; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnVirtualMemReserveCb_t, VirtualMem, pfnReserveCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.VirtualMem.pfnReserve, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phContext, *tracerParams.ppStart, *tracerParams.psize, *tracerParams.ppptr); } ZE_APIEXPORT ze_result_t ZE_APICALL zeVirtualMemFree_Tracing(ze_context_handle_t hContext, const void *ptr, size_t size) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.VirtualMem.pfnFree, hContext, ptr, size); ze_virtual_mem_free_params_t tracerParams; tracerParams.phContext = &hContext; tracerParams.pptr = &ptr; tracerParams.psize = &size; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnVirtualMemFreeCb_t, VirtualMem, pfnFreeCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.VirtualMem.pfnFree, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phContext, *tracerParams.pptr, *tracerParams.psize); } ZE_APIEXPORT ze_result_t ZE_APICALL zeVirtualMemQueryPageSize_Tracing(ze_context_handle_t hContext, ze_device_handle_t hDevice, size_t size, size_t *pagesize) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.VirtualMem.pfnQueryPageSize, hContext, hDevice, size, pagesize); ze_virtual_mem_query_page_size_params_t tracerParams; tracerParams.phContext = &hContext; tracerParams.phDevice = &hDevice; tracerParams.psize = &size; tracerParams.ppagesize = &pagesize; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnVirtualMemQueryPageSizeCb_t, VirtualMem, pfnQueryPageSizeCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.VirtualMem.pfnQueryPageSize, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phContext, *tracerParams.phDevice, *tracerParams.psize, *tracerParams.ppagesize); } ZE_APIEXPORT ze_result_t ZE_APICALL zeVirtualMemMap_Tracing(ze_context_handle_t hContext, const void *ptr, size_t size, ze_physical_mem_handle_t hPhysicalMemory, size_t offset, ze_memory_access_attribute_t access) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.VirtualMem.pfnMap, hContext, ptr, size, hPhysicalMemory, offset, access); ze_virtual_mem_map_params_t tracerParams; tracerParams.phContext = &hContext; tracerParams.pptr = &ptr; tracerParams.psize = &size; tracerParams.phPhysicalMemory = &hPhysicalMemory; tracerParams.poffset = &offset; tracerParams.paccess = &access; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnVirtualMemMapCb_t, VirtualMem, pfnMapCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.VirtualMem.pfnMap, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phContext, *tracerParams.pptr, *tracerParams.psize, *tracerParams.phPhysicalMemory, *tracerParams.poffset, *tracerParams.paccess); } ZE_APIEXPORT ze_result_t ZE_APICALL zeVirtualMemUnmap_Tracing(ze_context_handle_t hContext, const void *ptr, size_t size) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.VirtualMem.pfnUnmap, hContext, ptr, size); ze_virtual_mem_unmap_params_t tracerParams; tracerParams.phContext = &hContext; tracerParams.pptr = &ptr; tracerParams.psize = &size; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnVirtualMemUnmapCb_t, VirtualMem, pfnUnmapCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.VirtualMem.pfnUnmap, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phContext, *tracerParams.pptr, *tracerParams.psize); } ZE_APIEXPORT ze_result_t ZE_APICALL zeVirtualMemSetAccessAttribute_Tracing(ze_context_handle_t hContext, const void *ptr, size_t size, ze_memory_access_attribute_t access) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.VirtualMem.pfnSetAccessAttribute, hContext, ptr, size, access); ze_virtual_mem_set_access_attribute_params_t tracerParams; tracerParams.phContext = &hContext; tracerParams.pptr = &ptr; tracerParams.psize = &size; tracerParams.paccess = &access; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnVirtualMemSetAccessAttributeCb_t, VirtualMem, pfnSetAccessAttributeCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.VirtualMem.pfnSetAccessAttribute, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phContext, *tracerParams.pptr, *tracerParams.psize, *tracerParams.paccess); } ZE_APIEXPORT ze_result_t ZE_APICALL zeVirtualMemGetAccessAttribute_Tracing(ze_context_handle_t hContext, const void *ptr, size_t size, ze_memory_access_attribute_t *access, size_t *outSize) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.VirtualMem.pfnGetAccessAttribute, hContext, ptr, size, access, outSize); ze_virtual_mem_get_access_attribute_params_t tracerParams; tracerParams.phContext = &hContext; tracerParams.pptr = &ptr; tracerParams.psize = &size; tracerParams.paccess = &access; tracerParams.poutSize = &outSize; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnVirtualMemGetAccessAttributeCb_t, VirtualMem, pfnGetAccessAttributeCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.VirtualMem.pfnGetAccessAttribute, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phContext, *tracerParams.pptr, *tracerParams.psize, *tracerParams.paccess, *tracerParams.poutSize); } ZE_APIEXPORT ze_result_t ZE_APICALL zePhysicalMemCreate_Tracing(ze_context_handle_t hContext, ze_device_handle_t hDevice, ze_physical_mem_desc_t *desc, ze_physical_mem_handle_t *phPhysicalMemory) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.PhysicalMem.pfnCreate, hContext, hDevice, desc, phPhysicalMemory); ze_physical_mem_create_params_t tracerParams; tracerParams.phContext = &hContext; tracerParams.phDevice = &hDevice; tracerParams.pdesc = &desc; tracerParams.pphPhysicalMemory = &phPhysicalMemory; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnPhysicalMemCreateCb_t, PhysicalMem, pfnCreateCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.PhysicalMem.pfnCreate, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phContext, *tracerParams.phDevice, *tracerParams.pdesc, *tracerParams.pphPhysicalMemory); } ZE_APIEXPORT ze_result_t ZE_APICALL zePhysicalMemDestroy_Tracing(ze_context_handle_t hContext, ze_physical_mem_handle_t hPhysicalMemory) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.PhysicalMem.pfnDestroy, hContext, hPhysicalMemory); ze_physical_mem_destroy_params_t tracerParams; tracerParams.phContext = &hContext; tracerParams.phPhysicalMemory = &hPhysicalMemory; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnPhysicalMemDestroyCb_t, PhysicalMem, pfnDestroyCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.PhysicalMem.pfnDestroy, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phContext, *tracerParams.phPhysicalMemory); } compute-runtime-22.14.22890/level_zero/experimental/source/tracing/tracing_memory_imp.h000066400000000000000000000111011422164147700311730ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once extern "C" { ZE_APIEXPORT ze_result_t ZE_APICALL zeMemAllocShared_Tracing(ze_context_handle_t hContext, const ze_device_mem_alloc_desc_t *deviceDesc, const ze_host_mem_alloc_desc_t *hostDesc, size_t size, size_t alignment, ze_device_handle_t hDevice, void **pptr); ZE_APIEXPORT ze_result_t ZE_APICALL zeMemAllocDevice_Tracing(ze_context_handle_t hContext, const ze_device_mem_alloc_desc_t *deviceDesc, size_t size, size_t alignment, ze_device_handle_t hDevice, void **pptr); ZE_APIEXPORT ze_result_t ZE_APICALL zeMemAllocHost_Tracing(ze_context_handle_t hContext, const ze_host_mem_alloc_desc_t *hostDesc, size_t size, size_t alignment, void **pptr); ZE_APIEXPORT ze_result_t ZE_APICALL zeMemFree_Tracing(ze_context_handle_t hContext, void *ptr); ZE_APIEXPORT ze_result_t ZE_APICALL zeMemGetAllocProperties_Tracing(ze_context_handle_t hContext, const void *ptr, ze_memory_allocation_properties_t *pMemAllocProperties, ze_device_handle_t *phDevice); ZE_APIEXPORT ze_result_t ZE_APICALL zeMemGetAddressRange_Tracing(ze_context_handle_t hContext, const void *ptr, void **pBase, size_t *pSize); ZE_APIEXPORT ze_result_t ZE_APICALL zeMemGetIpcHandle_Tracing(ze_context_handle_t hContext, const void *ptr, ze_ipc_mem_handle_t *pIpcHandle); ZE_APIEXPORT ze_result_t ZE_APICALL zeMemOpenIpcHandle_Tracing(ze_context_handle_t hContext, ze_device_handle_t hDevice, ze_ipc_mem_handle_t handle, ze_ipc_memory_flags_t flags, void **pptr); ZE_APIEXPORT ze_result_t ZE_APICALL zeMemCloseIpcHandle_Tracing(ze_context_handle_t hContext, const void *ptr); ZE_APIEXPORT ze_result_t ZE_APICALL zeVirtualMemReserve_Tracing(ze_context_handle_t hContext, const void *pStart, size_t size, void **pptr); ZE_APIEXPORT ze_result_t ZE_APICALL zeVirtualMemFree_Tracing(ze_context_handle_t hContext, const void *ptr, size_t size); ZE_APIEXPORT ze_result_t ZE_APICALL zeVirtualMemQueryPageSize_Tracing(ze_context_handle_t hContext, ze_device_handle_t hDevice, size_t size, size_t *pagesize); ZE_APIEXPORT ze_result_t ZE_APICALL zeVirtualMemMap_Tracing(ze_context_handle_t hContext, const void *ptr, size_t size, ze_physical_mem_handle_t hPhysicalMemory, size_t offset, ze_memory_access_attribute_t access); ZE_APIEXPORT ze_result_t ZE_APICALL zeVirtualMemUnmap_Tracing(ze_context_handle_t hContext, const void *ptr, size_t size); ZE_APIEXPORT ze_result_t ZE_APICALL zeVirtualMemSetAccessAttribute_Tracing(ze_context_handle_t hContext, const void *ptr, size_t size, ze_memory_access_attribute_t access); ZE_APIEXPORT ze_result_t ZE_APICALL zeVirtualMemGetAccessAttribute_Tracing(ze_context_handle_t hContext, const void *ptr, size_t size, ze_memory_access_attribute_t *access, size_t *outSize); ZE_APIEXPORT ze_result_t ZE_APICALL zePhysicalMemCreate_Tracing(ze_context_handle_t hContext, ze_device_handle_t hDevice, ze_physical_mem_desc_t *desc, ze_physical_mem_handle_t *phPhysicalMemory); ZE_APIEXPORT ze_result_t ZE_APICALL zePhysicalMemDestroy_Tracing(ze_context_handle_t hContext, ze_physical_mem_handle_t hPhysicalMemory); } compute-runtime-22.14.22890/level_zero/experimental/source/tracing/tracing_module_imp.cpp000066400000000000000000001113021422164147700315070ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/experimental/source/tracing/tracing_imp.h" ZE_APIEXPORT ze_result_t ZE_APICALL zeModuleCreate_Tracing(ze_context_handle_t hContext, ze_device_handle_t hDevice, const ze_module_desc_t *desc, ze_module_handle_t *phModule, ze_module_build_log_handle_t *phBuildLog) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Module.pfnCreate, hContext, hDevice, desc, phModule, phBuildLog); ze_module_create_params_t tracerParams; tracerParams.phContext = &hContext; tracerParams.phDevice = &hDevice; tracerParams.pdesc = &desc; tracerParams.pphModule = &phModule; tracerParams.pphBuildLog = &phBuildLog; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnModuleCreateCb_t, Module, pfnCreateCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Module.pfnCreate, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phContext, *tracerParams.phDevice, *tracerParams.pdesc, *tracerParams.pphModule, *tracerParams.pphBuildLog); } ZE_APIEXPORT ze_result_t ZE_APICALL zeModuleDestroy_Tracing(ze_module_handle_t hModule) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Module.pfnDestroy, hModule); ze_module_destroy_params_t tracerParams; tracerParams.phModule = &hModule; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnModuleDestroyCb_t, Module, pfnDestroyCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Module.pfnDestroy, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phModule); } ZE_APIEXPORT ze_result_t ZE_APICALL zeModuleBuildLogDestroy_Tracing(ze_module_build_log_handle_t hModuleBuildLog) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.ModuleBuildLog.pfnDestroy, hModuleBuildLog); ze_module_build_log_destroy_params_t tracerParams; tracerParams.phModuleBuildLog = &hModuleBuildLog; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnModuleBuildLogDestroyCb_t, ModuleBuildLog, pfnDestroyCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.ModuleBuildLog.pfnDestroy, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phModuleBuildLog); } ZE_APIEXPORT ze_result_t ZE_APICALL zeModuleBuildLogGetString_Tracing(ze_module_build_log_handle_t hModuleBuildLog, size_t *pSize, char *pBuildLog) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.ModuleBuildLog.pfnGetString, hModuleBuildLog, pSize, pBuildLog); ze_module_build_log_get_string_params_t tracerParams; tracerParams.phModuleBuildLog = &hModuleBuildLog; tracerParams.ppSize = &pSize; tracerParams.ppBuildLog = &pBuildLog; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnModuleBuildLogGetStringCb_t, ModuleBuildLog, pfnGetStringCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.ModuleBuildLog.pfnGetString, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phModuleBuildLog, *tracerParams.ppSize, *tracerParams.ppBuildLog); } ZE_APIEXPORT ze_result_t ZE_APICALL zeModuleGetNativeBinary_Tracing(ze_module_handle_t hModule, size_t *pSize, uint8_t *pModuleNativeBinary) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Module.pfnGetNativeBinary, hModule, pSize, pModuleNativeBinary); ze_module_get_native_binary_params_t tracerParams; tracerParams.phModule = &hModule; tracerParams.ppSize = &pSize; tracerParams.ppModuleNativeBinary = &pModuleNativeBinary; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnModuleGetNativeBinaryCb_t, Module, pfnGetNativeBinaryCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Module.pfnGetNativeBinary, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phModule, *tracerParams.ppSize, *tracerParams.ppModuleNativeBinary); } ZE_APIEXPORT ze_result_t ZE_APICALL zeModuleGetGlobalPointer_Tracing(ze_module_handle_t hModule, const char *pGlobalName, size_t *pSize, void **pptr) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Module.pfnGetGlobalPointer, hModule, pGlobalName, pSize, pptr); ze_module_get_global_pointer_params_t tracerParams; tracerParams.phModule = &hModule; tracerParams.ppGlobalName = &pGlobalName; tracerParams.ppSize = &pSize; tracerParams.ppptr = &pptr; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnModuleGetGlobalPointerCb_t, Module, pfnGetGlobalPointerCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Module.pfnGetGlobalPointer, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phModule, *tracerParams.ppGlobalName, *tracerParams.ppSize, *tracerParams.ppptr); } ZE_APIEXPORT ze_result_t ZE_APICALL zeModuleDynamicLink_Tracing(uint32_t numModules, ze_module_handle_t *phModules, ze_module_build_log_handle_t *phLinkLog) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Module.pfnDynamicLink, numModules, phModules, phLinkLog); ze_module_dynamic_link_params_t tracerParams; tracerParams.pnumModules = &numModules; tracerParams.pphModules = &phModules; tracerParams.pphLinkLog = &phLinkLog; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnModuleDynamicLinkCb_t, Module, pfnDynamicLinkCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Module.pfnDynamicLink, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.pnumModules, *tracerParams.pphModules, *tracerParams.pphLinkLog); } ZE_APIEXPORT ze_result_t ZE_APICALL zeModuleGetProperties_Tracing(ze_module_handle_t hModule, ze_module_properties_t *pModuleProperties) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Module.pfnGetProperties, hModule, pModuleProperties); ze_module_get_properties_params_t tracerParams; tracerParams.phModule = &hModule; tracerParams.ppModuleProperties = &pModuleProperties; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnModuleGetPropertiesCb_t, Module, pfnGetPropertiesCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Module.pfnGetProperties, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phModule, *tracerParams.ppModuleProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelCreate_Tracing(ze_module_handle_t hModule, const ze_kernel_desc_t *desc, ze_kernel_handle_t *phKernel) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Kernel.pfnCreate, hModule, desc, phKernel); ze_kernel_create_params_t tracerParams; tracerParams.phModule = &hModule; tracerParams.pdesc = &desc; tracerParams.pphKernel = &phKernel; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnKernelCreateCb_t, Kernel, pfnCreateCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Kernel.pfnCreate, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phModule, *tracerParams.pdesc, *tracerParams.pphKernel); } ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelDestroy_Tracing(ze_kernel_handle_t hKernel) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Kernel.pfnDestroy, hKernel); ze_kernel_destroy_params_t tracerParams; tracerParams.phKernel = &hKernel; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnKernelDestroyCb_t, Kernel, pfnDestroyCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Kernel.pfnDestroy, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phKernel); } ZE_APIEXPORT ze_result_t ZE_APICALL zeModuleGetFunctionPointer_Tracing(ze_module_handle_t hModule, const char *pKernelName, void **pfnFunction) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Module.pfnGetFunctionPointer, hModule, pKernelName, pfnFunction); ze_module_get_function_pointer_params_t tracerParams; tracerParams.phModule = &hModule; tracerParams.ppFunctionName = &pKernelName; tracerParams.ppfnFunction = &pfnFunction; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnModuleGetFunctionPointerCb_t, Module, pfnGetFunctionPointerCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Module.pfnGetFunctionPointer, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phModule, *tracerParams.ppFunctionName, *tracerParams.ppfnFunction); } ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelSetGroupSize_Tracing(ze_kernel_handle_t hKernel, uint32_t groupSizeX, uint32_t groupSizeY, uint32_t groupSizeZ) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Kernel.pfnSetGroupSize, hKernel, groupSizeX, groupSizeY, groupSizeZ); ze_kernel_set_group_size_params_t tracerParams; tracerParams.phKernel = &hKernel; tracerParams.pgroupSizeX = &groupSizeX; tracerParams.pgroupSizeY = &groupSizeY; tracerParams.pgroupSizeZ = &groupSizeZ; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnKernelSetGroupSizeCb_t, Kernel, pfnSetGroupSizeCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Kernel.pfnSetGroupSize, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phKernel, *tracerParams.pgroupSizeX, *tracerParams.pgroupSizeY, *tracerParams.pgroupSizeZ); } ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelSuggestGroupSize_Tracing(ze_kernel_handle_t hKernel, uint32_t globalSizeX, uint32_t globalSizeY, uint32_t globalSizeZ, uint32_t *groupSizeX, uint32_t *groupSizeY, uint32_t *groupSizeZ) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Kernel.pfnSuggestGroupSize, hKernel, globalSizeX, globalSizeY, globalSizeZ, groupSizeX, groupSizeY, groupSizeZ); ze_kernel_suggest_group_size_params_t tracerParams; tracerParams.phKernel = &hKernel; tracerParams.pglobalSizeX = &globalSizeX; tracerParams.pglobalSizeY = &globalSizeY; tracerParams.pglobalSizeZ = &globalSizeZ; tracerParams.pgroupSizeX = &groupSizeX; tracerParams.pgroupSizeY = &groupSizeY; tracerParams.pgroupSizeZ = &groupSizeZ; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnKernelSuggestGroupSizeCb_t, Kernel, pfnSuggestGroupSizeCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Kernel.pfnSuggestGroupSize, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phKernel, *tracerParams.pglobalSizeX, *tracerParams.pglobalSizeY, *tracerParams.pglobalSizeZ, *tracerParams.pgroupSizeX, *tracerParams.pgroupSizeY, *tracerParams.pgroupSizeZ); } ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelSetArgumentValue_Tracing(ze_kernel_handle_t hKernel, uint32_t argIndex, size_t argSize, const void *pArgValue) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Kernel.pfnSetArgumentValue, hKernel, argIndex, argSize, pArgValue); ze_kernel_set_argument_value_params_t tracerParams; tracerParams.phKernel = &hKernel; tracerParams.pargIndex = &argIndex; tracerParams.pargSize = &argSize; tracerParams.ppArgValue = &pArgValue; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnKernelSetArgumentValueCb_t, Kernel, pfnSetArgumentValueCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Kernel.pfnSetArgumentValue, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phKernel, *tracerParams.pargIndex, *tracerParams.pargSize, *tracerParams.ppArgValue); } ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelGetProperties_Tracing(ze_kernel_handle_t hKernel, ze_kernel_properties_t *pKernelProperties) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Kernel.pfnGetProperties, hKernel, pKernelProperties); ze_kernel_get_properties_params_t tracerParams; tracerParams.phKernel = &hKernel; tracerParams.ppKernelProperties = &pKernelProperties; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnKernelGetPropertiesCb_t, Kernel, pfnGetPropertiesCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Kernel.pfnGetProperties, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phKernel, *tracerParams.ppKernelProperties); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchKernel_Tracing(ze_command_list_handle_t hCommandList, ze_kernel_handle_t hKernel, const ze_group_count_t *pLaunchFuncArgs, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnAppendLaunchKernel, hCommandList, hKernel, pLaunchFuncArgs, hSignalEvent, numWaitEvents, phWaitEvents); ze_command_list_append_launch_kernel_params_t tracerParams; tracerParams.phCommandList = &hCommandList; tracerParams.phKernel = &hKernel; tracerParams.ppLaunchFuncArgs = &pLaunchFuncArgs; tracerParams.phSignalEvent = &hSignalEvent; tracerParams.pnumWaitEvents = &numWaitEvents; tracerParams.pphWaitEvents = &phWaitEvents; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnCommandListAppendLaunchKernelCb_t, CommandList, pfnAppendLaunchKernelCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnAppendLaunchKernel, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phCommandList, *tracerParams.phKernel, *tracerParams.ppLaunchFuncArgs, *tracerParams.phSignalEvent, *tracerParams.pnumWaitEvents, *tracerParams.pphWaitEvents); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchKernelIndirect_Tracing(ze_command_list_handle_t hCommandList, ze_kernel_handle_t hKernel, const ze_group_count_t *pLaunchArgumentsBuffer, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnAppendLaunchKernelIndirect, hCommandList, hKernel, pLaunchArgumentsBuffer, hSignalEvent, numWaitEvents, phWaitEvents); ze_command_list_append_launch_kernel_indirect_params_t tracerParams; tracerParams.phCommandList = &hCommandList; tracerParams.phKernel = &hKernel; tracerParams.ppLaunchArgumentsBuffer = &pLaunchArgumentsBuffer; tracerParams.phSignalEvent = &hSignalEvent; tracerParams.pnumWaitEvents = &numWaitEvents; tracerParams.pphWaitEvents = &phWaitEvents; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnCommandListAppendLaunchKernelIndirectCb_t, CommandList, pfnAppendLaunchKernelIndirectCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnAppendLaunchKernelIndirect, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phCommandList, *tracerParams.phKernel, *tracerParams.ppLaunchArgumentsBuffer, *tracerParams.phSignalEvent, *tracerParams.pnumWaitEvents, *tracerParams.pphWaitEvents); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchMultipleKernelsIndirect_Tracing(ze_command_list_handle_t hCommandList, uint32_t numKernels, ze_kernel_handle_t *phKernels, const uint32_t *pCountBuffer, const ze_group_count_t *pLaunchArgumentsBuffer, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnAppendLaunchMultipleKernelsIndirect, hCommandList, numKernels, phKernels, pCountBuffer, pLaunchArgumentsBuffer, hSignalEvent, numWaitEvents, phWaitEvents); ze_command_list_append_launch_multiple_kernels_indirect_params_t tracerParams; tracerParams.phCommandList = &hCommandList; tracerParams.pnumKernels = &numKernels; tracerParams.pphKernels = &phKernels; tracerParams.ppCountBuffer = &pCountBuffer; tracerParams.ppLaunchArgumentsBuffer = &pLaunchArgumentsBuffer; tracerParams.phSignalEvent = &hSignalEvent; tracerParams.pnumWaitEvents = &numWaitEvents; tracerParams.pphWaitEvents = &phWaitEvents; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnCommandListAppendLaunchMultipleKernelsIndirectCb_t, CommandList, pfnAppendLaunchMultipleKernelsIndirectCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnAppendLaunchMultipleKernelsIndirect, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phCommandList, *tracerParams.pnumKernels, *tracerParams.pphKernels, *tracerParams.ppCountBuffer, *tracerParams.ppLaunchArgumentsBuffer, *tracerParams.phSignalEvent, *tracerParams.pnumWaitEvents, *tracerParams.pphWaitEvents); } ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchCooperativeKernel_Tracing(ze_command_list_handle_t hCommandList, ze_kernel_handle_t hKernel, const ze_group_count_t *pLaunchFuncArgs, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnAppendLaunchCooperativeKernel, hCommandList, hKernel, pLaunchFuncArgs, hSignalEvent, numWaitEvents, phWaitEvents); ze_command_list_append_launch_cooperative_kernel_params_t tracerParams; tracerParams.phCommandList = &hCommandList; tracerParams.phKernel = &hKernel; tracerParams.ppLaunchFuncArgs = &pLaunchFuncArgs; tracerParams.phSignalEvent = &hSignalEvent; tracerParams.pnumWaitEvents = &numWaitEvents; tracerParams.pphWaitEvents = &phWaitEvents; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnCommandListAppendLaunchCooperativeKernelCb_t, CommandList, pfnAppendLaunchCooperativeKernelCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnAppendLaunchCooperativeKernel, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phCommandList, *tracerParams.phKernel, *tracerParams.ppLaunchFuncArgs, *tracerParams.phSignalEvent, *tracerParams.pnumWaitEvents, *tracerParams.pphWaitEvents); } ZE_APIEXPORT ze_result_t ZE_APICALL zeModuleGetKernelNames_Tracing(ze_module_handle_t hModule, uint32_t *pCount, const char **pNames) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Module.pfnGetKernelNames, hModule, pCount, pNames); ze_module_get_kernel_names_params_t tracerParams; tracerParams.phModule = &hModule; tracerParams.ppCount = &pCount; tracerParams.ppNames = &pNames; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnModuleGetKernelNamesCb_t, Module, pfnGetKernelNamesCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Module.pfnGetKernelNames, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phModule, *tracerParams.ppCount, *tracerParams.ppNames); } ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelSuggestMaxCooperativeGroupCount_Tracing(ze_kernel_handle_t hKernel, uint32_t *totalGroupCount) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Kernel.pfnSuggestMaxCooperativeGroupCount, hKernel, totalGroupCount); ze_kernel_suggest_max_cooperative_group_count_params_t tracerParams; tracerParams.phKernel = &hKernel; tracerParams.ptotalGroupCount = &totalGroupCount; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnKernelSuggestMaxCooperativeGroupCountCb_t, Kernel, pfnSuggestMaxCooperativeGroupCountCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Kernel.pfnSuggestMaxCooperativeGroupCount, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phKernel, *tracerParams.ptotalGroupCount); } ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelGetIndirectAccess_Tracing(ze_kernel_handle_t hKernel, ze_kernel_indirect_access_flags_t *pFlags) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Kernel.pfnGetIndirectAccess, hKernel, pFlags); ze_kernel_get_indirect_access_params_t tracerParams; tracerParams.phKernel = &hKernel; tracerParams.ppFlags = &pFlags; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnKernelGetIndirectAccessCb_t, Kernel, pfnGetIndirectAccessCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Kernel.pfnGetIndirectAccess, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phKernel, *tracerParams.ppFlags); } ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelGetName_Tracing(ze_kernel_handle_t hKernel, size_t *pSize, char *pName) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Kernel.pfnGetName, hKernel, pSize, pName); ze_kernel_get_name_params_t tracerParams; tracerParams.phKernel = &hKernel; tracerParams.ppSize = &pSize; tracerParams.ppName = &pName; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnKernelGetNameCb_t, Kernel, pfnGetNameCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Kernel.pfnGetName, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phKernel, *tracerParams.ppSize, *tracerParams.ppName); } ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelGetSourceAttributes_Tracing(ze_kernel_handle_t hKernel, uint32_t *pSize, char **pString) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Kernel.pfnGetSourceAttributes, hKernel, pSize, pString); ze_kernel_get_source_attributes_params_t tracerParams; tracerParams.phKernel = &hKernel; tracerParams.ppSize = &pSize; tracerParams.ppString = &pString; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnKernelGetSourceAttributesCb_t, Kernel, pfnGetSourceAttributesCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Kernel.pfnGetSourceAttributes, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phKernel, *tracerParams.ppSize, *tracerParams.ppString); } ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelSetIndirectAccess_Tracing(ze_kernel_handle_t hKernel, ze_kernel_indirect_access_flags_t flags) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Kernel.pfnSetIndirectAccess, hKernel, flags); ze_kernel_set_indirect_access_params_t tracerParams; tracerParams.phKernel = &hKernel; tracerParams.pflags = &flags; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnKernelSetIndirectAccessCb_t, Kernel, pfnSetIndirectAccessCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Kernel.pfnSetIndirectAccess, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phKernel, *tracerParams.pflags); } compute-runtime-22.14.22890/level_zero/experimental/source/tracing/tracing_module_imp.h000066400000000000000000000150671422164147700311670ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once extern "C" { ZE_APIEXPORT ze_result_t ZE_APICALL zeModuleCreate_Tracing(ze_context_handle_t hContext, ze_device_handle_t hDevice, const ze_module_desc_t *desc, ze_module_handle_t *phModule, ze_module_build_log_handle_t *phBuildLog); ZE_APIEXPORT ze_result_t ZE_APICALL zeModuleDestroy_Tracing(ze_module_handle_t hModule); ZE_APIEXPORT ze_result_t ZE_APICALL zeModuleBuildLogDestroy_Tracing(ze_module_build_log_handle_t hModuleBuildLog); ZE_APIEXPORT ze_result_t ZE_APICALL zeModuleBuildLogGetString_Tracing(ze_module_build_log_handle_t hModuleBuildLog, size_t *pSize, char *pBuildLog); ZE_APIEXPORT ze_result_t ZE_APICALL zeModuleGetNativeBinary_Tracing(ze_module_handle_t hModule, size_t *pSize, uint8_t *pModuleNativeBinary); ZE_APIEXPORT ze_result_t ZE_APICALL zeModuleGetGlobalPointer_Tracing(ze_module_handle_t hModule, const char *pGlobalName, size_t *pSize, void **pptr); ZE_APIEXPORT ze_result_t ZE_APICALL zeModuleDynamicLink_Tracing(uint32_t numModules, ze_module_handle_t *phModules, ze_module_build_log_handle_t *phLinkLog); ZE_APIEXPORT ze_result_t ZE_APICALL zeModuleGetProperties_Tracing(ze_module_handle_t hModule, ze_module_properties_t *pModuleProperties); ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelCreate_Tracing(ze_module_handle_t hModule, const ze_kernel_desc_t *desc, ze_kernel_handle_t *phFunction); ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelDestroy_Tracing(ze_kernel_handle_t hKernel); ZE_APIEXPORT ze_result_t ZE_APICALL zeModuleGetFunctionPointer_Tracing(ze_module_handle_t hModule, const char *pKernelName, void **pfnFunction); ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelSetGroupSize_Tracing(ze_kernel_handle_t hKernel, uint32_t groupSizeX, uint32_t groupSizeY, uint32_t groupSizeZ); ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelSuggestGroupSize_Tracing(ze_kernel_handle_t hKernel, uint32_t globalSizeX, uint32_t globalSizeY, uint32_t globalSizeZ, uint32_t *groupSizeX, uint32_t *groupSizeY, uint32_t *groupSizeZ); ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelSetArgumentValue_Tracing(ze_kernel_handle_t hKernel, uint32_t argIndex, size_t argSize, const void *pArgValue); ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelGetProperties_Tracing(ze_kernel_handle_t hKernel, ze_kernel_properties_t *pKernelProperties); ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchKernel_Tracing(ze_command_list_handle_t hCommandList, ze_kernel_handle_t hKernel, const ze_group_count_t *pLaunchFuncArgs, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents); ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchKernelIndirect_Tracing(ze_command_list_handle_t hCommandList, ze_kernel_handle_t hKernel, const ze_group_count_t *pLaunchArgumentsBuffer, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents); ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchMultipleKernelsIndirect_Tracing(ze_command_list_handle_t hCommandList, uint32_t numKernels, ze_kernel_handle_t *phKernels, const uint32_t *pCountBuffer, const ze_group_count_t *pLaunchArgumentsBuffer, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents); ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchCooperativeKernel_Tracing(ze_command_list_handle_t hCommandList, ze_kernel_handle_t hKernel, const ze_group_count_t *pLaunchFuncArgs, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents); ZE_APIEXPORT ze_result_t ZE_APICALL zeModuleGetKernelNames_Tracing(ze_module_handle_t hModule, uint32_t *pCount, const char **pNames); ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelSuggestMaxCooperativeGroupCount_Tracing(ze_kernel_handle_t hKernel, uint32_t *totalGroupCount); ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelGetIndirectAccess_Tracing(ze_kernel_handle_t hKernel, ze_kernel_indirect_access_flags_t *pFlags); ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelGetName_Tracing(ze_kernel_handle_t hKernel, size_t *pSize, char *pName); ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelGetSourceAttributes_Tracing(ze_kernel_handle_t hKernel, uint32_t *pSize, char **pString); ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelSetIndirectAccess_Tracing(ze_kernel_handle_t hKernel, ze_kernel_indirect_access_flags_t flags); } compute-runtime-22.14.22890/level_zero/experimental/source/tracing/tracing_residency_imp.cpp000066400000000000000000000232721422164147700322170ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/experimental/source/tracing/tracing_imp.h" ZE_APIEXPORT ze_result_t ZE_APICALL zeContextCreate_Tracing(ze_driver_handle_t hDriver, const ze_context_desc_t *desc, ze_context_handle_t *phContext) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Context.pfnCreate, hDriver, desc, phContext); ze_context_create_params_t tracerParams; tracerParams.phDriver = &hDriver; tracerParams.pdesc = &desc; tracerParams.pphContext = &phContext; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnContextCreateCb_t, Context, pfnCreateCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Context.pfnCreate, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phDriver, *tracerParams.pdesc, *tracerParams.pphContext); } ZE_APIEXPORT ze_result_t ZE_APICALL zeContextDestroy_Tracing(ze_context_handle_t hContext) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Context.pfnDestroy, hContext); ze_context_destroy_params_t tracerParams; tracerParams.phContext = &hContext; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnContextDestroyCb_t, Context, pfnDestroyCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Context.pfnDestroy, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phContext); } ZE_APIEXPORT ze_result_t ZE_APICALL zeContextGetStatus_Tracing(ze_context_handle_t hContext) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Context.pfnGetStatus, hContext); ze_context_get_status_params_t tracerParams; tracerParams.phContext = &hContext; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnContextGetStatusCb_t, Context, pfnGetStatusCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Context.pfnGetStatus, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phContext); } ZE_APIEXPORT ze_result_t ZE_APICALL zeContextSystemBarrier_Tracing(ze_context_handle_t hContext, ze_device_handle_t hDevice) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Context.pfnSystemBarrier, hContext, hDevice); ze_context_system_barrier_params_t tracerParams; tracerParams.phContext = &hContext; tracerParams.phDevice = &hDevice; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnContextSystemBarrierCb_t, Context, pfnSystemBarrierCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Context.pfnSystemBarrier, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phContext, *tracerParams.phDevice); } ZE_APIEXPORT ze_result_t ZE_APICALL zeContextMakeMemoryResident_Tracing(ze_context_handle_t hContext, ze_device_handle_t hDevice, void *ptr, size_t size) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Context.pfnMakeMemoryResident, hContext, hDevice, ptr, size); ze_context_make_memory_resident_params_t tracerParams; tracerParams.phContext = &hContext; tracerParams.phDevice = &hDevice; tracerParams.pptr = &ptr; tracerParams.psize = &size; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnContextMakeMemoryResidentCb_t, Context, pfnMakeMemoryResidentCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Context.pfnMakeMemoryResident, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phContext, *tracerParams.phDevice, *tracerParams.pptr, *tracerParams.psize); } ZE_APIEXPORT ze_result_t ZE_APICALL zeContextEvictMemory_Tracing(ze_context_handle_t hContext, ze_device_handle_t hDevice, void *ptr, size_t size) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Context.pfnEvictMemory, hContext, hDevice, ptr, size); ze_context_evict_memory_params_t tracerParams; tracerParams.phContext = &hContext; tracerParams.phDevice = &hDevice; tracerParams.pptr = &ptr; tracerParams.psize = &size; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnContextEvictMemoryCb_t, Context, pfnEvictMemoryCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Context.pfnEvictMemory, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phContext, *tracerParams.phDevice, *tracerParams.pptr, *tracerParams.psize); } ZE_APIEXPORT ze_result_t ZE_APICALL zeContextMakeImageResident_Tracing(ze_context_handle_t hContext, ze_device_handle_t hDevice, ze_image_handle_t hImage) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Context.pfnMakeImageResident, hContext, hDevice, hImage); ze_context_make_image_resident_params_t tracerParams; tracerParams.phContext = &hContext; tracerParams.phDevice = &hDevice; tracerParams.phImage = &hImage; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnContextMakeImageResidentCb_t, Context, pfnMakeImageResidentCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Context.pfnMakeImageResident, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phContext, *tracerParams.phDevice, *tracerParams.phImage); } ZE_APIEXPORT ze_result_t ZE_APICALL zeContextEvictImage_Tracing(ze_context_handle_t hContext, ze_device_handle_t hDevice, ze_image_handle_t hImage) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Context.pfnEvictImage, hContext, hDevice, hImage); ze_context_evict_image_params_t tracerParams; tracerParams.phContext = &hContext; tracerParams.phDevice = &hDevice; tracerParams.phImage = &hImage; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnContextEvictImageCb_t, Context, pfnEvictImageCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Context.pfnEvictImage, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phContext, *tracerParams.phDevice, *tracerParams.phImage); } compute-runtime-22.14.22890/level_zero/experimental/source/tracing/tracing_residency_imp.h000066400000000000000000000031171422164147700316600ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once extern "C" { ZE_APIEXPORT ze_result_t ZE_APICALL zeContextCreate_Tracing(ze_driver_handle_t hDriver, const ze_context_desc_t *desc, ze_context_handle_t *phContext); ZE_APIEXPORT ze_result_t ZE_APICALL zeContextDestroy_Tracing(ze_context_handle_t hContext); ZE_APIEXPORT ze_result_t ZE_APICALL zeContextGetStatus_Tracing(ze_context_handle_t hContext); ZE_APIEXPORT ze_result_t ZE_APICALL zeContextSystemBarrier_Tracing(ze_context_handle_t hContext, ze_device_handle_t hDevice); ZE_APIEXPORT ze_result_t ZE_APICALL zeContextMakeMemoryResident_Tracing(ze_context_handle_t hContext, ze_device_handle_t hDevice, void *ptr, size_t size); ZE_APIEXPORT ze_result_t ZE_APICALL zeContextEvictMemory_Tracing(ze_context_handle_t hContext, ze_device_handle_t hDevice, void *ptr, size_t size); ZE_APIEXPORT ze_result_t ZE_APICALL zeContextMakeImageResident_Tracing(ze_context_handle_t hContext, ze_device_handle_t hDevice, ze_image_handle_t hImage); ZE_APIEXPORT ze_result_t ZE_APICALL zeContextEvictImage_Tracing(ze_context_handle_t hContext, ze_device_handle_t hDevice, ze_image_handle_t hImage); } // extern "C" compute-runtime-22.14.22890/level_zero/experimental/source/tracing/tracing_sampler_imp.cpp000066400000000000000000000046651422164147700317020ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/experimental/source/tracing/tracing_imp.h" ZE_APIEXPORT ze_result_t ZE_APICALL zeSamplerCreate_Tracing(ze_context_handle_t hContext, ze_device_handle_t hDevice, const ze_sampler_desc_t *pDesc, ze_sampler_handle_t *phSampler) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Sampler.pfnCreate, hContext, hDevice, pDesc, phSampler); ze_sampler_create_params_t tracerParams; tracerParams.phContext = &hContext; tracerParams.phDevice = &hDevice; tracerParams.pdesc = &pDesc; tracerParams.pphSampler = &phSampler; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnSamplerCreateCb_t, Sampler, pfnCreateCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Sampler.pfnCreate, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phContext, *tracerParams.phDevice, *tracerParams.pdesc, *tracerParams.pphSampler); } ZE_APIEXPORT ze_result_t ZE_APICALL zeSamplerDestroy_Tracing(ze_sampler_handle_t hSampler) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Sampler.pfnDestroy, hSampler); ze_sampler_destroy_params_t tracerParams; tracerParams.phSampler = &hSampler; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnSamplerDestroyCb_t, Sampler, pfnDestroyCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Sampler.pfnDestroy, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phSampler); } compute-runtime-22.14.22890/level_zero/experimental/source/tracing/tracing_sampler_imp.h000066400000000000000000000007171422164147700313410ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once extern "C" { ZE_APIEXPORT ze_result_t ZE_APICALL zeSamplerCreate_Tracing(ze_context_handle_t hContext, ze_device_handle_t hDevice, const ze_sampler_desc_t *desc, ze_sampler_handle_t *phSampler); ZE_APIEXPORT ze_result_t ZE_APICALL zeSamplerDestroy_Tracing(ze_sampler_handle_t hSampler); } compute-runtime-22.14.22890/level_zero/experimental/test/000077500000000000000000000000001422164147700231745ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/experimental/test/CMakeLists.txt000066400000000000000000000001421422164147700257310ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # add_subdirectories() compute-runtime-22.14.22890/level_zero/experimental/test/unit_tests/000077500000000000000000000000001422164147700253755ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/experimental/test/unit_tests/CMakeLists.txt000066400000000000000000000070111422164147700301340ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # link_libraries(${ASAN_LIBS} ${TSAN_LIBS}) set(TARGET_NAME ${TARGET_NAME_L0}_exp_tests) include(${NEO_SOURCE_DIR}/cmake/setup_ult_global_flags.cmake) function(ADD_SUPPORTED_TEST_PRODUCT_FAMILIES_DEFINITION) set(L0_TESTED_PRODUCT_FAMILIES ${ALL_TESTED_PRODUCT_FAMILY}) string(REPLACE ";" "," L0_TESTED_PRODUCT_FAMILIES "${L0_TESTED_PRODUCT_FAMILIES}" ) add_definitions( -DSUPPORTED_TEST_PRODUCT_FAMILIES=${L0_TESTED_PRODUCT_FAMILIES} ) endfunction() add_supported_test_product_families_definition() add_executable( ${TARGET_NAME} ${NEO_SOURCE_DIR}/level_zero/core/source/dll/disallow_deferred_deleter.cpp ${NEO_SOURCE_DIR}/level_zero/core/test/unit_tests/sources/builtin/create_ult_builtin_functions_lib.cpp ${NEO_SOURCE_DIR}/level_zero/tools/test/unit_tests/sources/debug/debug_session_helper.cpp ) target_sources( ${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${NEO_SOURCE_DIR}/level_zero/core/test/unit_tests/main.cpp ${NEO_SOURCE_DIR}/level_zero/core/test/unit_tests/mock.h ${NEO_SOURCE_DIR}/level_zero/core/test/unit_tests/white_box.h ${NEO_SHARED_TEST_DIRECTORY}/unit_test/tests_configuration.h ) target_sources( ${TARGET_NAME} PRIVATE $ $ $ $ $ $ ) set_target_properties(${TARGET_NAME} PROPERTIES FOLDER ${TARGET_NAME_L0}) add_subdirectoriesl0(${CMAKE_CURRENT_SOURCE_DIR} "*") target_compile_definitions( ${TARGET_NAME} PRIVATE $ ) target_include_directories( ${TARGET_NAME} BEFORE PRIVATE $/experimental ${NEO_SHARED_TEST_DIRECTORY}/common/test_macros/header${BRANCH_DIR_SUFFIX} ${NEO_SHARED_TEST_DIRECTORY}/common/test_configuration/unit_tests ) if(WIN32) target_link_libraries(${TARGET_NAME} dbghelp) add_dependencies(${TARGET_NAME} mock_gdi) endif() target_link_libraries( ${TARGET_NAME} ${NEO_SHARED_MOCKABLE_LIB_NAME} ${HW_LIBS_ULT} gmock-gtest ${NEO_EXTRA_LIBS} ) target_sources(${TARGET_NAME} PRIVATE $ $ ) option(L0_ULT_VERBOSE "Use the default/verbose test output" OFF) if(NOT L0_ULT_VERBOSE) set(L0_TESTS_LISTENER_OPTION "--disable_default_listener") else() set(L0_TESTS_LISTENER_OPTION "--enable_default_listener") endif() if(MSVC) set_target_properties( ${TARGET_NAME} PROPERTIES VS_DEBUGGER_COMMAND_ARGUMENTS "${L0_TESTS_FILTER_OPTION} --gtest_catch_exceptions=0 ${L0_TESTS_LISTENER_OPTION}" VS_DEBUGGER_WORKING_DIRECTORY "$(OutDir )") endif() add_dependencies(unit_tests ${TARGET_NAME}) create_source_tree(${TARGET_NAME} ${L0_ROOT_DIR}/..) compute-runtime-22.14.22890/level_zero/experimental/test/unit_tests/sources/000077500000000000000000000000001422164147700270605ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/experimental/test/unit_tests/sources/CMakeLists.txt000066400000000000000000000005261422164147700316230ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/test_cmdlist.cpp $ ) add_subdirectories() compute-runtime-22.14.22890/level_zero/experimental/test/unit_tests/sources/test_cmdlist.cpp000066400000000000000000000061351422164147700322670ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/command_encoder.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/fixtures/device_fixture.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/context/context_imp.h" #include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_built_ins.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_device.h" #include "level_zero/core/test/unit_tests/mocks/mock_driver_handle.h" namespace L0 { namespace ult { class CommandListMemoryExtensionFixture : public DeviceFixture { public: void SetUp() { DeviceFixture::SetUp(); ze_result_t returnValue; commandList.reset(whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue))); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; eventPoolDesc.count = 2; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; eventDesc.signal = 0; eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); size_t size = sizeof(uint32_t); size_t alignment = 1u; ze_device_mem_alloc_desc_t deviceDesc = {}; auto result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); } void TearDown() { context->freeMem(ptr); DeviceFixture::TearDown(); } std::unique_ptr commandList; std::unique_ptr eventPool; std::unique_ptr event; uint32_t waitMemData = 1u; void *ptr = nullptr; }; using CommandListAppendWaitOnMemExtension = Test; TEST_F(CommandListAppendWaitOnMemExtension, givenAppendWaitOnMemReturnsUnsupported) { ze_result_t result = ZE_RESULT_SUCCESS; result = commandList->appendWaitOnMemory(nullptr, nullptr, 1u, nullptr); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, result); } using CommandListAppendWriteToMemExtension = Test; TEST_F(CommandListAppendWriteToMemExtension, givenAppendWriteToMemReturnsUnsupported) { ze_result_t result = ZE_RESULT_SUCCESS; uint64_t data = 0xabc; result = commandList->appendWriteToMemory(nullptr, nullptr, data); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, result); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/experimental/test/unit_tests/sources/tracing/000077500000000000000000000000001422164147700305075ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/experimental/test/unit_tests/sources/tracing/CMakeLists.txt000066400000000000000000000027071422164147700332550ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources( ${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_api_tracing_common.h ${CMAKE_CURRENT_SOURCE_DIR}/test_core_api_tracing.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_global_api_tracing.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_copy_api_tracing.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_device_api_tracing.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_driver_api_tracing.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_event_api_tracing.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_event_api_multi_tracing.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_fence_api_tracing.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_api_tracing.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdqueue_api_tracing.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_barrier_api_tracing.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_image_api_tracing.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_memory_api_tracing.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_sampler_api_tracing.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_residency_api_tracing.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_module_api_tracing.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_module_api_multi_tracing.cpp ) test_api_tracing_common.h000066400000000000000000000217101422164147700354710ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/experimental/test/unit_tests/sources/tracing/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/device/device_imp.h" #include "level_zero/core/source/driver/driver_handle_imp.h" #include "level_zero/core/source/driver/host_pointer_manager.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h" #include "level_zero/core/test/unit_tests/mocks/mock_device.h" #include "level_zero/core/test/unit_tests/mocks/mock_driver.h" #include "level_zero/core/test/unit_tests/mocks/mock_memory_manager.h" #include "level_zero/core/test/unit_tests/mocks/mock_module.h" #include "level_zero/experimental/source/tracing/tracing.h" #include "level_zero/experimental/source/tracing/tracing_imp.h" #include #include "gtest/gtest.h" #include #include namespace L0 { extern struct APITracerContextImp *pGlobalAPITracerContextImp; namespace ult { template ze_result_t callHandleTracerRecursion(TFunctionPointer zeApiPtr, Args &&...args) { ZE_HANDLE_TRACER_RECURSION(zeApiPtr, args...); return ZE_RESULT_ERROR_UNKNOWN; } class ZeAPITracingCoreTestsFixture { public: ZeAPITracingCoreTestsFixture(){}; protected: virtual void SetUp() { //NOLINT driver_ddiTable.enableTracing = true; myThreadPrivateTracerData.onList = false; myThreadPrivateTracerData.isInitialized = false; myThreadPrivateTracerData.testAndSetThreadTracerDataInitializedAndOnList(); } virtual void TearDown() { //NOLINT myThreadPrivateTracerData.removeThreadTracerDataFromList(); driver_ddiTable.enableTracing = false; } }; class zeAPITracingCoreTests : public ZeAPITracingCoreTestsFixture, public ::testing::Test { protected: void SetUp() override { //NOLINT ZeAPITracingCoreTestsFixture::SetUp(); } void TearDown() override { //NOLINT ZeAPITracingCoreTestsFixture::TearDown(); } }; class zeAPITracingRuntimeTests : public ZeAPITracingCoreTestsFixture, public ::testing::Test { protected: zet_core_callbacks_t prologCbs = {}; zet_core_callbacks_t epilogCbs = {}; zet_tracer_exp_handle_t apiTracerHandle; zet_tracer_exp_desc_t tracerDesc; int defaultUserData = 0; void *userData; void SetUp() override { //NOLINT ze_result_t result; ZeAPITracingCoreTestsFixture::SetUp(); userData = &defaultUserData; tracerDesc.pUserData = userData; result = zetTracerExpCreate(nullptr, &tracerDesc, &apiTracerHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, apiTracerHandle); } void TearDown() override { //NOLINT ze_result_t result; result = zetTracerExpSetEnabled(apiTracerHandle, false); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = zetTracerExpDestroy(apiTracerHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); ZeAPITracingCoreTestsFixture::TearDown(); } void setTracerCallbacksAndEnableTracer() { ze_result_t result; result = zetTracerExpSetPrologues(apiTracerHandle, &prologCbs); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = zetTracerExpSetEpilogues(apiTracerHandle, &epilogCbs); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = zetTracerExpSetEnabled(apiTracerHandle, true); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } }; class zeAPITracingRuntimeMultipleArgumentsTests : public ZeAPITracingCoreTestsFixture, public ::testing::Test { protected: zet_core_callbacks_t prologCbs0 = {}; zet_core_callbacks_t epilogCbs0 = {}; zet_core_callbacks_t prologCbs1 = {}; zet_core_callbacks_t epilogCbs2 = {}; zet_core_callbacks_t prologCbs3 = {}; zet_core_callbacks_t epilogCbs3 = {}; zet_tracer_exp_handle_t apiTracerHandle0; zet_tracer_exp_handle_t apiTracerHandle1; zet_tracer_exp_handle_t apiTracerHandle2; zet_tracer_exp_handle_t apiTracerHandle3; zet_tracer_exp_desc_t tracerDesc0; zet_tracer_exp_desc_t tracerDesc1; zet_tracer_exp_desc_t tracerDesc2; zet_tracer_exp_desc_t tracerDesc3; int defaultUserData0 = 1; void *pUserData0; int defaultUserData1 = 11; void *pUserData1; int defaultUserdata2 = 21; void *pUserData2; int defaultUserData3 = 31; void *pUserData3; void SetUp() override { //NOLINT ze_result_t result; ZeAPITracingCoreTestsFixture::SetUp(); pUserData0 = &defaultUserData0; tracerDesc0.pUserData = pUserData0; result = zetTracerExpCreate(nullptr, &tracerDesc0, &apiTracerHandle0); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, apiTracerHandle0); pUserData1 = &defaultUserData1; tracerDesc1.pUserData = pUserData1; result = zetTracerExpCreate(nullptr, &tracerDesc1, &apiTracerHandle1); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, apiTracerHandle1); pUserData2 = &defaultUserdata2; tracerDesc2.pUserData = pUserData2; result = zetTracerExpCreate(nullptr, &tracerDesc2, &apiTracerHandle2); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, apiTracerHandle2); pUserData3 = &defaultUserData3; tracerDesc3.pUserData = pUserData3; result = zetTracerExpCreate(nullptr, &tracerDesc3, &apiTracerHandle3); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, apiTracerHandle3); } void TearDown() override { //NOLINT ze_result_t result; result = zetTracerExpSetEnabled(apiTracerHandle0, false); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = zetTracerExpDestroy(apiTracerHandle0); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = zetTracerExpSetEnabled(apiTracerHandle1, false); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = zetTracerExpDestroy(apiTracerHandle1); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = zetTracerExpSetEnabled(apiTracerHandle2, false); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = zetTracerExpDestroy(apiTracerHandle2); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = zetTracerExpSetEnabled(apiTracerHandle3, false); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = zetTracerExpDestroy(apiTracerHandle3); EXPECT_EQ(ZE_RESULT_SUCCESS, result); ZeAPITracingCoreTestsFixture::TearDown(); } void setTracerCallbacksAndEnableTracer() { ze_result_t result; /* Both prolog and epilog, pass instance data from prolog to epilog */ result = zetTracerExpSetPrologues(apiTracerHandle0, &prologCbs0); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = zetTracerExpSetEpilogues(apiTracerHandle0, &epilogCbs0); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = zetTracerExpSetEnabled(apiTracerHandle0, true); EXPECT_EQ(ZE_RESULT_SUCCESS, result); /* prolog only */ result = zetTracerExpSetPrologues(apiTracerHandle1, &prologCbs1); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = zetTracerExpSetEnabled(apiTracerHandle1, true); EXPECT_EQ(ZE_RESULT_SUCCESS, result); /* epilog only */ result = zetTracerExpSetEpilogues(apiTracerHandle2, &epilogCbs2); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = zetTracerExpSetEnabled(apiTracerHandle2, true); EXPECT_EQ(ZE_RESULT_SUCCESS, result); /* Both prolog and epilog, pass instance data from prolog to epilog */ result = zetTracerExpSetPrologues(apiTracerHandle3, &prologCbs3); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = zetTracerExpSetEpilogues(apiTracerHandle3, &epilogCbs3); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = zetTracerExpSetEnabled(apiTracerHandle3, true); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } void validateDefaultUserDataFinal() { EXPECT_EQ(defaultUserData0, 3); EXPECT_EQ(defaultUserData1, 22); EXPECT_EQ(defaultUserdata2, 42); EXPECT_EQ(defaultUserData3, 93); } }; template void genericPrologCallbackPtr(Tparams params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); *val += 1; } template void genericEpilogCallbackPtr(Tparams params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 1); } template THandleType generateRandomHandle() { return reinterpret_cast(static_cast(rand() % (RAND_MAX - 1) + 1)); } template TSizeType generateRandomSize() { return static_cast(rand()); } struct instanceDataStruct { void *instanceDataValue; }; } // namespace ult } // namespace L0 test_barrier_api_tracing.cpp000066400000000000000000000045541422164147700361710ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/experimental/test/unit_tests/sources/tracing/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "test_api_tracing_common.h" namespace L0 { namespace ult { TEST_F(zeAPITracingRuntimeTests, WhenCallingCommandListAppendBarrierTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.CommandList.pfnAppendBarrier = [](ze_command_list_handle_t hCommandList, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return ZE_RESULT_SUCCESS; }; ze_event_handle_t hSignalEvent = nullptr; uint32_t numWaitEvents = 0; prologCbs.CommandList.pfnAppendBarrierCb = genericPrologCallbackPtr; epilogCbs.CommandList.pfnAppendBarrierCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeCommandListAppendBarrier_Tracing(nullptr, hSignalEvent, numWaitEvents, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingCommandListAppendMemoryRangesBarrierTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemoryRangesBarrier = [](ze_command_list_handle_t hCommandList, uint32_t numRanges, const size_t *pRangeSizes, const void **pRanges, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return ZE_RESULT_SUCCESS; }; uint32_t numRanges = 1; const size_t pRangeSizes[] = {1}; const void **pRanges = new const void *[1]; driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemoryRangesBarrier = [](ze_command_list_handle_t hCommandList, uint32_t numRanges, const size_t *pRangeSizes, const void **pRanges, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return ZE_RESULT_SUCCESS; }; prologCbs.CommandList.pfnAppendMemoryRangesBarrierCb = genericPrologCallbackPtr; epilogCbs.CommandList.pfnAppendMemoryRangesBarrierCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeCommandListAppendMemoryRangesBarrier_Tracing(nullptr, numRanges, pRangeSizes, pRanges, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); delete[] pRanges; } } // namespace ult } // namespace L0 test_cmdlist_api_tracing.cpp000066400000000000000000000141621422164147700361760ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/experimental/test/unit_tests/sources/tracing/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "test_api_tracing_common.h" namespace L0 { namespace ult { TEST_F(zeAPITracingRuntimeTests, WhenCallingCommandListCreateTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.CommandList.pfnCreate = [](ze_context_handle_t hContext, ze_device_handle_t hDevice, const ze_command_list_desc_t *desc, ze_command_list_handle_t *phCommandList) { return ZE_RESULT_SUCCESS; }; ze_command_list_desc_t desc = {}; ze_command_list_handle_t commandList = {}; prologCbs.CommandList.pfnCreateCb = genericPrologCallbackPtr; epilogCbs.CommandList.pfnCreateCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeCommandListCreate_Tracing(nullptr, nullptr, &desc, &commandList); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingCommandListCreateImmediateTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.CommandList.pfnCreateImmediate = [](ze_context_handle_t hContext, ze_device_handle_t hDevice, const ze_command_queue_desc_t *desc, ze_command_list_handle_t *phCommandList) { return ZE_RESULT_SUCCESS; }; ze_command_queue_desc_t desc = {}; ze_command_list_handle_t commandList = {}; prologCbs.CommandList.pfnCreateImmediateCb = genericPrologCallbackPtr; epilogCbs.CommandList.pfnCreateImmediateCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeCommandListCreateImmediate_Tracing(nullptr, nullptr, &desc, &commandList); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingCommandListDestroyTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.CommandList.pfnDestroy = [](ze_command_list_handle_t hCommandList) { return ZE_RESULT_SUCCESS; }; prologCbs.CommandList.pfnDestroyCb = genericPrologCallbackPtr; epilogCbs.CommandList.pfnDestroyCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeCommandListDestroy_Tracing(nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingCommandListResetTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.CommandList.pfnReset = [](ze_command_list_handle_t hCommandList) { return ZE_RESULT_SUCCESS; }; prologCbs.CommandList.pfnResetCb = genericPrologCallbackPtr; epilogCbs.CommandList.pfnResetCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeCommandListReset_Tracing(nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingCommandListAppendMemoryPrefetchTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemoryPrefetch = [](ze_command_list_handle_t hCommandList, const void *ptr, size_t size) { return ZE_RESULT_SUCCESS; }; prologCbs.CommandList.pfnAppendMemoryPrefetchCb = genericPrologCallbackPtr; epilogCbs.CommandList.pfnAppendMemoryPrefetchCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeCommandListAppendMemoryPrefetch_Tracing(nullptr, nullptr, 0); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingCommandListCloseTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.CommandList.pfnClose = [](ze_command_list_handle_t hCommandList) { return ZE_RESULT_SUCCESS; }; prologCbs.CommandList.pfnCloseCb = genericPrologCallbackPtr; epilogCbs.CommandList.pfnCloseCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeCommandListClose_Tracing(nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingCommandListAppendQueryKernelTimestampsTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.CommandList.pfnAppendQueryKernelTimestamps = [](ze_command_list_handle_t hCommandList, uint32_t numEvents, ze_event_handle_t *phEvents, void *dstptr, const size_t *pOffsets, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return ZE_RESULT_SUCCESS; }; prologCbs.CommandList.pfnAppendQueryKernelTimestampsCb = genericPrologCallbackPtr; epilogCbs.CommandList.pfnAppendQueryKernelTimestampsCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeCommandListAppendQueryKernelTimestamps_Tracing(nullptr, 1U, nullptr, nullptr, nullptr, nullptr, 1U, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingCommandListAppendWriteGlobalTimestampTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.CommandList.pfnAppendWriteGlobalTimestamp = [](ze_command_list_handle_t hCommandList, uint64_t *dstptr, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return ZE_RESULT_SUCCESS; }; prologCbs.CommandList.pfnAppendWriteGlobalTimestampCb = genericPrologCallbackPtr; epilogCbs.CommandList.pfnAppendWriteGlobalTimestampCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeCommandListAppendWriteGlobalTimestamp_Tracing(nullptr, nullptr, nullptr, 1U, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } } // namespace ult } // namespace L0 test_cmdqueue_api_tracing.cpp000066400000000000000000000062731422164147700363530ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/experimental/test/unit_tests/sources/tracing/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "test_api_tracing_common.h" namespace L0 { namespace ult { TEST_F(zeAPITracingRuntimeTests, WhenCallingCommandQueueCreateTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.CommandQueue.pfnCreate = [](ze_context_handle_t hContext, ze_device_handle_t hDevice, const ze_command_queue_desc_t *desc, ze_command_queue_handle_t *phCommandQueue) { return ZE_RESULT_SUCCESS; }; ze_command_queue_desc_t desc = {}; ze_command_queue_handle_t commandQueue = {}; prologCbs.CommandQueue.pfnCreateCb = genericPrologCallbackPtr; epilogCbs.CommandQueue.pfnCreateCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeCommandQueueCreate_Tracing(nullptr, nullptr, &desc, &commandQueue); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingCommandQueueDestroyTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.CommandQueue.pfnDestroy = [](ze_command_queue_handle_t hCommandQueue) { return ZE_RESULT_SUCCESS; }; prologCbs.CommandQueue.pfnDestroyCb = genericPrologCallbackPtr; epilogCbs.CommandQueue.pfnDestroyCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeCommandQueueDestroy_Tracing(nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingCommandQueueExecuteCommandListsTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; uint32_t numCommandList = 0; ze_command_list_handle_t phCommandLists = {}; ze_fence_handle_t hFence = nullptr; driver_ddiTable.core_ddiTable.CommandQueue.pfnExecuteCommandLists = [](ze_command_queue_handle_t hCommandQueue, uint32_t numCommandLists, ze_command_list_handle_t *phCommandLists, ze_fence_handle_t hFence) { return ZE_RESULT_SUCCESS; }; prologCbs.CommandQueue.pfnExecuteCommandListsCb = genericPrologCallbackPtr; epilogCbs.CommandQueue.pfnExecuteCommandListsCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeCommandQueueExecuteCommandLists_Tracing(nullptr, numCommandList, &phCommandLists, hFence); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingCommandQueueSynchronizeTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.CommandQueue.pfnSynchronize = [](ze_command_queue_handle_t hCommandQueue, uint64_t timeout) { return ZE_RESULT_SUCCESS; }; uint64_t timeout = 100; prologCbs.CommandQueue.pfnSynchronizeCb = genericPrologCallbackPtr; epilogCbs.CommandQueue.pfnSynchronizeCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeCommandQueueSynchronize_Tracing(nullptr, timeout); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } } // namespace ult } // namespace L0 test_copy_api_tracing.cpp000066400000000000000000000247671422164147700355250ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/experimental/test/unit_tests/sources/tracing/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "test_api_tracing_common.h" namespace L0 { namespace ult { TEST_F(zeAPITracingRuntimeTests, WhenCallingCommandListAppendMemoryCopyTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemoryCopy = [](ze_command_list_handle_t hCommandList, void *dstptr, const void *srcptr, size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitevents, ze_event_handle_t *phWaitEvents) { return ZE_RESULT_SUCCESS; }; size_t bufferSize = 4096u; void *dst = malloc(bufferSize); void *src = malloc(bufferSize); prologCbs.CommandList.pfnAppendMemoryCopyCb = genericPrologCallbackPtr; epilogCbs.CommandList.pfnAppendMemoryCopyCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeCommandListAppendMemoryCopy_Tracing(nullptr, dst, static_cast(src), bufferSize, nullptr, 0U, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); free(dst); free(src); } TEST_F(zeAPITracingRuntimeTests, WhenCallingCommandListAppendMemoryFillTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemoryFill = [](ze_command_list_handle_t hCommandList, void *ptr, const void *pattern, size_t patternSize, size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return ZE_RESULT_SUCCESS; }; size_t bufferSize = 4096u; void *dst = malloc(bufferSize); int pattern = 1; prologCbs.CommandList.pfnAppendMemoryFillCb = genericPrologCallbackPtr; epilogCbs.CommandList.pfnAppendMemoryFillCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeCommandListAppendMemoryFill_Tracing(nullptr, dst, &pattern, sizeof(pattern), bufferSize, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); free(dst); } TEST_F(zeAPITracingRuntimeTests, WhenCallingCommandListAppendMemoryCopyRegionTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemoryCopyRegion = [](ze_command_list_handle_t hCommandList, void *dstptr, const ze_copy_region_t *dstRegion, uint32_t dstPitch, uint32_t dstSlicePitch, const void *srcptr, const ze_copy_region_t *srcRegion, uint32_t srcPitch, uint32_t srcSlicePitch, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return ZE_RESULT_SUCCESS; }; size_t bufferSize = 4096u; void *dst = malloc(bufferSize); ze_copy_region_t dstRegion; uint32_t dstPitch = 1; void *src = malloc(bufferSize); ze_copy_region_t srcRegion; uint32_t srcPitch = 1; uint32_t dstSlicePitch = 0; uint32_t srcSlicePitch = 0; prologCbs.CommandList.pfnAppendMemoryCopyRegionCb = genericPrologCallbackPtr; epilogCbs.CommandList.pfnAppendMemoryCopyRegionCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeCommandListAppendMemoryCopyRegion_Tracing(nullptr, dst, &dstRegion, dstPitch, dstSlicePitch, static_cast(src), &srcRegion, srcPitch, srcSlicePitch, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); free(dst); free(src); } TEST_F(zeAPITracingRuntimeTests, WhenCallingCommandListAppendImageCopyTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.CommandList.pfnAppendImageCopy = [](ze_command_list_handle_t hCommandList, ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return ZE_RESULT_SUCCESS; }; ze_image_handle_t hDstImage = static_cast(malloc(1)); ze_image_handle_t hSrcImage = static_cast(malloc(1)); prologCbs.CommandList.pfnAppendImageCopyCb = genericPrologCallbackPtr; epilogCbs.CommandList.pfnAppendImageCopyCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeCommandListAppendImageCopy_Tracing(nullptr, hDstImage, hSrcImage, nullptr, 0U, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); free(hDstImage); free(hSrcImage); } TEST_F(zeAPITracingRuntimeTests, WhenCallingCommandListAppendImageCopyRegionTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.CommandList.pfnAppendImageCopyRegion = [](ze_command_list_handle_t hCommandList, ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage, const ze_image_region_t *pDstRegion, const ze_image_region_t *pSrcRegion, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return ZE_RESULT_SUCCESS; }; ze_image_handle_t hDstImage = static_cast(malloc(1)); ze_image_handle_t hSrcImage = static_cast(malloc(1)); prologCbs.CommandList.pfnAppendImageCopyRegionCb = genericPrologCallbackPtr; epilogCbs.CommandList.pfnAppendImageCopyRegionCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeCommandListAppendImageCopyRegion_Tracing(nullptr, hDstImage, hSrcImage, nullptr, nullptr, nullptr, 0U, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); free(hDstImage); free(hSrcImage); } TEST_F(zeAPITracingRuntimeTests, WhenCallingCommandListAppendImageCopyToMemoryTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.CommandList.pfnAppendImageCopyToMemory = [](ze_command_list_handle_t hCommandList, void *dstptr, ze_image_handle_t hSrcImage, const ze_image_region_t *pSrcRegion, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return ZE_RESULT_SUCCESS; }; ze_image_handle_t hSrcImage = static_cast(malloc(1)); void *dstptr = malloc(1); prologCbs.CommandList.pfnAppendImageCopyToMemoryCb = genericPrologCallbackPtr; epilogCbs.CommandList.pfnAppendImageCopyToMemoryCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeCommandListAppendImageCopyToMemory_Tracing(nullptr, dstptr, hSrcImage, nullptr, nullptr, 0U, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); free(hSrcImage); free(dstptr); } TEST_F(zeAPITracingRuntimeTests, WhenCallingCommandListAppendImageCopyFromMemoryTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.CommandList.pfnAppendImageCopyFromMemory = [](ze_command_list_handle_t hCommandList, ze_image_handle_t hDstImage, const void *srcptr, const ze_image_region_t *pDstRegion, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return ZE_RESULT_SUCCESS; }; ze_image_handle_t hDstImage = static_cast(malloc(1)); void *srcptr = malloc(1); prologCbs.CommandList.pfnAppendImageCopyFromMemoryCb = genericPrologCallbackPtr; epilogCbs.CommandList.pfnAppendImageCopyFromMemoryCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeCommandListAppendImageCopyFromMemory_Tracing(nullptr, hDstImage, srcptr, nullptr, nullptr, 0U, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); free(hDstImage); free(srcptr); } TEST_F(zeAPITracingRuntimeTests, WhenCallingCommandListAppendMemAdviseTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemAdvise = [](ze_command_list_handle_t hCommandList, ze_device_handle_t hDevice, const void *ptr, size_t size, ze_memory_advice_t advice) { return ZE_RESULT_SUCCESS; }; size_t bufferSize = 4096u; void *ptr = malloc(bufferSize); ze_memory_advice_t advice = ZE_MEMORY_ADVICE_SET_READ_MOSTLY; prologCbs.CommandList.pfnAppendMemAdviseCb = genericPrologCallbackPtr; epilogCbs.CommandList.pfnAppendMemAdviseCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeCommandListAppendMemAdvise_Tracing(nullptr, nullptr, ptr, bufferSize, advice); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); free(ptr); } TEST_F(zeAPITracingRuntimeTests, WhenCallingCommandListAppendMemoryCopyFromContextTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemoryCopyFromContext = [](ze_command_list_handle_t hCommandList, void *dstptr, ze_context_handle_t hContextSrc, const void *srcptr, size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return ZE_RESULT_SUCCESS; }; prologCbs.CommandList.pfnAppendMemoryCopyFromContextCb = genericPrologCallbackPtr; epilogCbs.CommandList.pfnAppendMemoryCopyFromContextCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeCommandListAppendMemoryCopyFromContext_Tracing(nullptr, nullptr, nullptr, nullptr, 0U, nullptr, 1u, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } } // namespace ult } // namespace L0 test_core_api_tracing.cpp000066400000000000000000000327601422164147700354730ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/experimental/test/unit_tests/sources/tracing/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "test_api_tracing_common.h" namespace L0 { namespace ult { void OnEnterCommandListAppendLaunchFunction( ze_command_list_append_launch_kernel_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { int a = 0; a++; } void OnExitCommandListAppendLaunchFunction( ze_command_list_append_launch_kernel_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { int a = 0; a++; } void OnEnterCommandListCreateWithUserData( ze_command_list_create_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(5, *val); } void OnExitCommandListCreateWithUserData( ze_command_list_create_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(5, *val); } void OnEnterCommandListCloseWithUserData( ze_command_list_close_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(5, *val); } void OnExitCommandListCloseWithUserData( ze_command_list_close_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(5, *val); } void OnEnterCommandListCloseWithUserDataRecursion( ze_command_list_close_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(5, *val); *val += 5; } void OnExitCommandListCloseWithUserDataRecursion( ze_command_list_close_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(10, *val); *val += 5; } void OnEnterCommandListCloseWithUserDataAndAllocateInstanceData( ze_command_list_close_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { ASSERT_NE(nullptr, pTracerUserData); int *userdata = static_cast(pTracerUserData); EXPECT_EQ(5, *userdata); int *instanceData = new int; ppTracerInstanceUserData[0] = instanceData; *instanceData = 0x1234; } void OnExitCommandListCloseWithUserDataAndReadInstanceData( ze_command_list_close_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { ASSERT_NE(nullptr, pTracerUserData); int *userdata = static_cast(pTracerUserData); EXPECT_EQ(5, *userdata); ASSERT_NE(nullptr, ppTracerInstanceUserData); ASSERT_NE(nullptr, ppTracerInstanceUserData[0]); int *instanceData = static_cast(ppTracerInstanceUserData[0]); ASSERT_NE(nullptr, instanceData); if (nullptr == instanceData) return; int data = *instanceData; EXPECT_EQ(0x1234, data); delete instanceData; } void OnEnterCommandListCloseWithoutUserDataAndAllocateInstanceData( ze_command_list_close_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { ASSERT_EQ(nullptr, pTracerUserData); int *instanceData = new int; ppTracerInstanceUserData[0] = instanceData; *instanceData = 0x1234; } void OnExitCommandListCloseWithoutUserDataAndReadInstanceData( ze_command_list_close_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { ASSERT_EQ(nullptr, pTracerUserData); ASSERT_NE(nullptr, ppTracerInstanceUserData); ASSERT_NE(nullptr, ppTracerInstanceUserData[0]); int *instanceData = static_cast(ppTracerInstanceUserData[0]); ASSERT_NE(nullptr, instanceData); if (nullptr == instanceData) return; int data = *instanceData; EXPECT_EQ(0x1234, data); delete instanceData; } TEST(zeAPITracingCoreTestsNoSetup, WhenCreateTracerAndNoZetInitThenReturnFailure) { ze_result_t result; zet_tracer_exp_handle_t APITracerHandle; zet_tracer_exp_desc_t tracer_desc = {}; result = zetTracerExpCreate(nullptr, &tracer_desc, &APITracerHandle); EXPECT_EQ(ZE_RESULT_ERROR_UNINITIALIZED, result); } TEST_F(zeAPITracingCoreTests, WhenCreateTracerAndsetCallbacksAndEnableTracingAndDisableTracingAndDestroyTracerThenReturnSuccess) { ze_result_t result; zet_tracer_exp_handle_t APITracerHandle; zet_tracer_exp_desc_t tracer_desc = {}; result = zetTracerExpCreate(nullptr, &tracer_desc, &APITracerHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); ASSERT_NE(nullptr, APITracerHandle); zet_core_callbacks_t prologCbs = {}; zet_core_callbacks_t epilogCbs = {}; prologCbs.CommandList.pfnAppendLaunchKernelCb = OnEnterCommandListAppendLaunchFunction; epilogCbs.CommandList.pfnAppendLaunchKernelCb = OnExitCommandListAppendLaunchFunction; result = zetTracerExpSetPrologues(APITracerHandle, &prologCbs); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = zetTracerExpSetEpilogues(APITracerHandle, &epilogCbs); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = zetTracerExpSetEnabled(APITracerHandle, true); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = zetTracerExpSetEnabled(APITracerHandle, false); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = zetTracerExpDestroy(APITracerHandle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(zeAPITracingCoreTests, WhenCallingTracerWrapperWithOnePrologAndNoEpilogWithUserDataAndUserDataMatchingInPrologThenReturnSuccess) { MockCommandList commandList; ze_result_t result; int user_data = 5; ze_command_list_close_params_t tracerParams; zet_core_callbacks_t prologCbs = {}; prologCbs.CommandList.pfnCloseCb = OnEnterCommandListCloseWithUserData; ze_command_list_handle_t command_list_handle = commandList.toHandle(); tracerParams.phCommandList = &command_list_handle; std::vector> prologCallbacks; std::vector> epilogCallbacks; APITracerCallbackStateImp prologCallback; prologCallback.current_api_callback = prologCbs.CommandList.pfnCloseCb; prologCallback.pUserData = &user_data; prologCallbacks.push_back(prologCallback); ze_pfnCommandListCloseCb_t apiOrdinal = {}; result = APITracerWrapperImp(zeCommandListClose, &tracerParams, apiOrdinal, prologCallbacks, epilogCallbacks, *tracerParams.phCommandList); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(zeAPITracingCoreTests, WhenCallingTracerWrapperWithOneSetOfPrologEpilogsWithUserDataAndUserDataMatchingInPrologAndEpilogThenReturnSuccess) { MockCommandList commandList; ze_result_t result; int user_data = 5; ze_command_list_close_params_t tracerParams; zet_core_callbacks_t prologCbs = {}; zet_core_callbacks_t epilogCbs = {}; prologCbs.CommandList.pfnCloseCb = OnEnterCommandListCloseWithUserData; epilogCbs.CommandList.pfnCloseCb = OnExitCommandListCloseWithUserData; ze_command_list_handle_t command_list_handle = commandList.toHandle(); tracerParams.phCommandList = &command_list_handle; std::vector> prologCallbacks; std::vector> epilogCallbacks; APITracerCallbackStateImp prologCallback; APITracerCallbackStateImp epilogCallback; prologCallback.current_api_callback = prologCbs.CommandList.pfnCloseCb; epilogCallback.current_api_callback = epilogCbs.CommandList.pfnCloseCb; prologCallback.pUserData = &user_data; epilogCallback.pUserData = &user_data; prologCallbacks.push_back(prologCallback); epilogCallbacks.push_back(epilogCallback); ze_pfnCommandListCloseCb_t apiOrdinal = {}; result = APITracerWrapperImp(zeCommandListClose, &tracerParams, apiOrdinal, prologCallbacks, epilogCallbacks, *tracerParams.phCommandList); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(zeAPITracingCoreTests, WhenCallingTracerWrapperWithOneSetOfPrologEpilogsWithUserDataAndInstanceDataUserDataMatchingInPrologAndEpilogThenReturnSuccess) { MockCommandList commandList; ze_result_t result; int user_data = 5; ze_command_list_close_params_t tracerParams; zet_core_callbacks_t prologCbs = {}; zet_core_callbacks_t epilogCbs = {}; prologCbs.CommandList.pfnCloseCb = OnEnterCommandListCloseWithUserDataAndAllocateInstanceData; epilogCbs.CommandList.pfnCloseCb = OnExitCommandListCloseWithUserDataAndReadInstanceData; ze_command_list_handle_t command_list_handle = commandList.toHandle(); tracerParams.phCommandList = &command_list_handle; std::vector> prologCallbacks; std::vector> epilogCallbacks; APITracerCallbackStateImp prologCallback; APITracerCallbackStateImp epilogCallback; prologCallback.current_api_callback = prologCbs.CommandList.pfnCloseCb; epilogCallback.current_api_callback = epilogCbs.CommandList.pfnCloseCb; prologCallback.pUserData = &user_data; epilogCallback.pUserData = &user_data; prologCallbacks.push_back(prologCallback); epilogCallbacks.push_back(epilogCallback); ze_pfnCommandListCloseCb_t apiOrdinal = {}; result = APITracerWrapperImp(zeCommandListClose, &tracerParams, apiOrdinal, prologCallbacks, epilogCallbacks, *tracerParams.phCommandList); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(zeAPITracingCoreTests, WhenCallingTracerWrapperWithOneSetOfPrologEpilogsWithInstanceDataThenReturnSuccess) { MockCommandList commandList; ze_result_t result; ze_command_list_close_params_t tracerParams; zet_core_callbacks_t prologCbs = {}; zet_core_callbacks_t epilogCbs = {}; prologCbs.CommandList.pfnCloseCb = OnEnterCommandListCloseWithoutUserDataAndAllocateInstanceData; epilogCbs.CommandList.pfnCloseCb = OnExitCommandListCloseWithoutUserDataAndReadInstanceData; ze_command_list_handle_t command_list_handle = commandList.toHandle(); tracerParams.phCommandList = &command_list_handle; std::vector> prologCallbacks; std::vector> epilogCallbacks; APITracerCallbackStateImp prologCallback; APITracerCallbackStateImp epilogCallback; prologCallback.current_api_callback = prologCbs.CommandList.pfnCloseCb; epilogCallback.current_api_callback = epilogCbs.CommandList.pfnCloseCb; prologCallback.pUserData = nullptr; epilogCallback.pUserData = nullptr; prologCallbacks.push_back(prologCallback); epilogCallbacks.push_back(epilogCallback); ze_pfnCommandListCloseCb_t apiOrdinal = {}; result = APITracerWrapperImp(zeCommandListClose, &tracerParams, apiOrdinal, prologCallbacks, epilogCallbacks, *tracerParams.phCommandList); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(zeAPITracingCoreTests, WhenCallingTracerWrapperWithOneSetOfPrologEpilogsWithRecursionHandledThenSuccessIsReturned) { MockCommandList commandList; ze_result_t result; int user_data = 5; ze_command_list_close_params_t tracerParams; zet_core_callbacks_t prologCbs = {}; zet_core_callbacks_t epilogCbs = {}; prologCbs.CommandList.pfnCloseCb = OnEnterCommandListCloseWithUserDataRecursion; epilogCbs.CommandList.pfnCloseCb = OnExitCommandListCloseWithUserDataRecursion; ze_command_list_handle_t command_list_handle = commandList.toHandle(); tracerParams.phCommandList = &command_list_handle; std::vector> prologCallbacks; std::vector> epilogCallbacks; APITracerCallbackStateImp prologCallback; APITracerCallbackStateImp epilogCallback; prologCallback.current_api_callback = prologCbs.CommandList.pfnCloseCb; epilogCallback.current_api_callback = epilogCbs.CommandList.pfnCloseCb; prologCallback.pUserData = &user_data; epilogCallback.pUserData = &user_data; prologCallbacks.push_back(prologCallback); epilogCallbacks.push_back(epilogCallback); ze_pfnCommandListCloseCb_t apiOrdinal = {}; result = callHandleTracerRecursion(zeCommandListClose, command_list_handle); EXPECT_EQ(ZE_RESULT_ERROR_UNKNOWN, result); result = APITracerWrapperImp(zeCommandListClose, &tracerParams, apiOrdinal, prologCallbacks, epilogCallbacks, *tracerParams.phCommandList); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = callHandleTracerRecursion(zeCommandListClose, command_list_handle); EXPECT_EQ(ZE_RESULT_ERROR_UNKNOWN, result); result = callHandleTracerRecursion(zeCommandListClose, command_list_handle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); L0::tracingInProgress = 0; } } // namespace ult } // namespace L0 test_device_api_tracing.cpp000066400000000000000000000245621422164147700360030ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/experimental/test/unit_tests/sources/tracing/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "test_api_tracing_common.h" namespace L0 { namespace ult { TEST_F(zeAPITracingRuntimeTests, WhenCallingDeviceGetTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Device.pfnGet = [](ze_driver_handle_t hDriver, uint32_t *pCount, ze_device_handle_t *phDevices) { return ZE_RESULT_SUCCESS; }; prologCbs.Device.pfnGetCb = genericPrologCallbackPtr; epilogCbs.Device.pfnGetCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeDeviceGet_Tracing(nullptr, nullptr, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingDeviceGetPropertiesTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Device.pfnGetProperties = [](ze_device_handle_t hDevice, ze_device_properties_t *pDeviceProperties) { return ZE_RESULT_SUCCESS; }; prologCbs.Device.pfnGetPropertiesCb = genericPrologCallbackPtr; epilogCbs.Device.pfnGetPropertiesCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeDeviceGetProperties_Tracing(nullptr, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingDeviceGetComputePropertiesTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Device.pfnGetComputeProperties = [](ze_device_handle_t hDevice, ze_device_compute_properties_t *pComputeProperties) { return ZE_RESULT_SUCCESS; }; prologCbs.Device.pfnGetComputePropertiesCb = genericPrologCallbackPtr; epilogCbs.Device.pfnGetComputePropertiesCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeDeviceGetComputeProperties_Tracing(nullptr, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingDeviceGetMemoryPropertiesTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Device.pfnGetMemoryProperties = [](ze_device_handle_t hDevice, uint32_t *pCount, ze_device_memory_properties_t *pMemProperties) { return ZE_RESULT_SUCCESS; }; prologCbs.Device.pfnGetMemoryPropertiesCb = genericPrologCallbackPtr; epilogCbs.Device.pfnGetMemoryPropertiesCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeDeviceGetMemoryProperties_Tracing(nullptr, nullptr, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingDeviceGetCachePropertiesTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Device.pfnGetCacheProperties = [](ze_device_handle_t hDevice, uint32_t *pCount, ze_device_cache_properties_t *pCacheProperties) { return ZE_RESULT_SUCCESS; }; prologCbs.Device.pfnGetCachePropertiesCb = genericPrologCallbackPtr; epilogCbs.Device.pfnGetCachePropertiesCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeDeviceGetCacheProperties_Tracing(nullptr, nullptr, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingDeviceGetImagePropertiesTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Device.pfnGetImageProperties = [](ze_device_handle_t hDevice, ze_device_image_properties_t *pImageProperties) { return ZE_RESULT_SUCCESS; }; prologCbs.Device.pfnGetImagePropertiesCb = genericPrologCallbackPtr; epilogCbs.Device.pfnGetImagePropertiesCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeDeviceGetImageProperties_Tracing(nullptr, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingDeviceGetSubDevicesTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Device.pfnGetSubDevices = [](ze_device_handle_t hDevice, uint32_t *pCount, ze_device_handle_t *phSubdevices) { return ZE_RESULT_SUCCESS; }; uint32_t pcount = 1; prologCbs.Device.pfnGetSubDevicesCb = genericPrologCallbackPtr; epilogCbs.Device.pfnGetSubDevicesCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeDeviceGetSubDevices_Tracing(nullptr, &pcount, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingDeviceGetP2PPropertiesTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Device.pfnGetP2PProperties = [](ze_device_handle_t hDevice, ze_device_handle_t hPeerDevice, ze_device_p2p_properties_t *pP2PProperties) { return ZE_RESULT_SUCCESS; }; ze_device_p2p_properties_t pP2PProperties; prologCbs.Device.pfnGetP2PPropertiesCb = genericPrologCallbackPtr; epilogCbs.Device.pfnGetP2PPropertiesCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeDeviceGetP2PProperties_Tracing(nullptr, nullptr, &pP2PProperties); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingDeviceCanAccessPeerTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Device.pfnCanAccessPeer = [](ze_device_handle_t hDevice, ze_device_handle_t hPeerDevice, ze_bool_t *value) { return ZE_RESULT_SUCCESS; }; ze_bool_t value; prologCbs.Device.pfnCanAccessPeerCb = genericPrologCallbackPtr; epilogCbs.Device.pfnCanAccessPeerCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeDeviceCanAccessPeer_Tracing(nullptr, nullptr, &value); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingKernelSetCacheConfigTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Kernel.pfnSetCacheConfig = [](ze_kernel_handle_t hKernel, ze_cache_config_flags_t flags) { return ZE_RESULT_SUCCESS; }; ze_cache_config_flags_t flags = {}; prologCbs.Kernel.pfnSetCacheConfigCb = genericPrologCallbackPtr; epilogCbs.Kernel.pfnSetCacheConfigCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeKernelSetCacheConfig_Tracing(nullptr, flags); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingDeviceGetModulePropertiesTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Device.pfnGetModuleProperties = [](ze_device_handle_t hDevice, ze_device_module_properties_t *pModuleProperties) { return ZE_RESULT_SUCCESS; }; prologCbs.Device.pfnGetModulePropertiesCb = genericPrologCallbackPtr; epilogCbs.Device.pfnGetModulePropertiesCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeDeviceGetModuleProperties_Tracing(nullptr, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingDeviceGetMemoryAccessPropertiesTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Device.pfnGetMemoryAccessProperties = [](ze_device_handle_t hDevice, ze_device_memory_access_properties_t *pMemAccessProperties) { return ZE_RESULT_SUCCESS; }; prologCbs.Device.pfnGetMemoryAccessPropertiesCb = genericPrologCallbackPtr; epilogCbs.Device.pfnGetMemoryAccessPropertiesCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeDeviceGetMemoryAccessProperties_Tracing(nullptr, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingDeviceGetCommandQueueGroupPropertiesTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Device.pfnGetCommandQueueGroupProperties = [](ze_device_handle_t hDevice, uint32_t *pCount, ze_command_queue_group_properties_t *pCommandQueueGroupProperties) { return ZE_RESULT_SUCCESS; }; prologCbs.Device.pfnGetCommandQueueGroupPropertiesCb = genericPrologCallbackPtr; epilogCbs.Device.pfnGetCommandQueueGroupPropertiesCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeDeviceGetCommandQueueGroupProperties_Tracing(nullptr, nullptr, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingDeviceGetExternalMemoryPropertiesTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Device.pfnGetExternalMemoryProperties = [](ze_device_handle_t hDevice, ze_device_external_memory_properties_t *pExternalMemoryProperties) { return ZE_RESULT_SUCCESS; }; prologCbs.Device.pfnGetExternalMemoryPropertiesCb = genericPrologCallbackPtr; epilogCbs.Device.pfnGetExternalMemoryPropertiesCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeDeviceGetExternalMemoryProperties_Tracing(nullptr, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingDeviceGetStatusTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Device.pfnGetStatus = [](ze_device_handle_t hDevice) { return ZE_RESULT_SUCCESS; }; prologCbs.Device.pfnGetStatusCb = genericPrologCallbackPtr; epilogCbs.Device.pfnGetStatusCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeDeviceGetStatus_Tracing(nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } } // namespace ult } // namespace L0 test_driver_api_tracing.cpp000066400000000000000000000065341422164147700360360ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/experimental/test/unit_tests/sources/tracing/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "test_api_tracing_common.h" namespace L0 { namespace ult { TEST_F(zeAPITracingRuntimeTests, WhenCallingzeDriverGetTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Driver.pfnGet = [](uint32_t *pCount, ze_driver_handle_t *phDrivers) { return ZE_RESULT_SUCCESS; }; prologCbs.Driver.pfnGetCb = genericPrologCallbackPtr; epilogCbs.Driver.pfnGetCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeDriverGet_Tracing(nullptr, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingzeDriverGetPropertiesTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Driver.pfnGetProperties = [](ze_driver_handle_t hDriver, ze_driver_properties_t *properties) { return ZE_RESULT_SUCCESS; }; prologCbs.Device.pfnGetSubDevicesCb = genericPrologCallbackPtr; epilogCbs.Device.pfnGetSubDevicesCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeDriverGetProperties_Tracing(nullptr, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(zeAPITracingRuntimeTests, WhenCallingzeDriverGetApiVersionTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Driver.pfnGetApiVersion = [](ze_driver_handle_t hDrivers, ze_api_version_t *version) { return ZE_RESULT_SUCCESS; }; prologCbs.Driver.pfnGetApiVersionCb = genericPrologCallbackPtr; epilogCbs.Driver.pfnGetApiVersionCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeDriverGetApiVersion_Tracing(nullptr, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingzeDriverGetIpcPropertiesTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Driver.pfnGetIpcProperties = [](ze_driver_handle_t hDrivers, ze_driver_ipc_properties_t *pIpcProperties) { return ZE_RESULT_SUCCESS; }; prologCbs.Driver.pfnGetIpcPropertiesCb = genericPrologCallbackPtr; epilogCbs.Driver.pfnGetIpcPropertiesCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeDriverGetIpcProperties_Tracing(nullptr, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingzeDriverGetExtensionPropertiesTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Driver.pfnGetExtensionProperties = [](ze_driver_handle_t hDrivers, uint32_t *pCount, ze_driver_extension_properties_t *pExtensionProperties) { return ZE_RESULT_SUCCESS; }; prologCbs.Driver.pfnGetExtensionPropertiesCb = genericPrologCallbackPtr; epilogCbs.Driver.pfnGetExtensionPropertiesCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeDriverGetExtensionProperties_Tracing(nullptr, nullptr, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } } // namespace ult } // namespace L0 test_event_api_multi_tracing.cpp000066400000000000000000003102041422164147700370660ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/experimental/test/unit_tests/sources/tracing/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "test_api_tracing_common.h" namespace L0 { namespace ult { // Multi prolog/epilog Event Tests struct { ze_event_pool_handle_t hEventPool0; ze_event_desc_t desc0; ze_event_handle_t hEvent0; ze_event_pool_handle_t hEventPool1; ze_event_desc_t desc1; ze_event_handle_t hEvent1; ze_event_handle_t hEventAPI; void *instanceData0; void *instanceData3; } event_create_args; TEST_F(zeAPITracingRuntimeMultipleArgumentsTests, WhenCallingEventCreateTracingWrapperWithMultiplePrologEpilogsThenReturnSuccess) { ze_result_t result; // initialize initial argument set event_create_args.hEventPool0 = generateRandomHandle(); event_create_args.hEvent0 = generateRandomHandle(); // initialize replacement argument set event_create_args.hEventPool1 = generateRandomHandle(); event_create_args.hEvent1 = generateRandomHandle(); // initialize user instance data event_create_args.instanceData0 = generateRandomHandle(); event_create_args.instanceData3 = generateRandomHandle(); driver_ddiTable.core_ddiTable.Event.pfnCreate = [](ze_event_pool_handle_t hEventPool, const ze_event_desc_t *desc, ze_event_handle_t *phEvent) { EXPECT_EQ(event_create_args.hEventPool1, hEventPool); EXPECT_EQ(&event_create_args.desc1, desc); EXPECT_EQ(&event_create_args.hEvent1, phEvent); EXPECT_EQ(event_create_args.hEvent1, *phEvent); event_create_args.hEventAPI = generateRandomHandle(); *phEvent = event_create_args.hEventAPI; return ZE_RESULT_SUCCESS; }; // // The 0th prolog replaces the orignal API arguments with a new set // Create instance data, pass it to corresponding epilog. // prologCbs0.Event.pfnCreateCb = [](ze_event_create_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(event_create_args.hEventPool0, *params->phEventPool); EXPECT_EQ(&event_create_args.desc0, *params->pdesc); EXPECT_EQ(&event_create_args.hEvent0, *params->pphEvent); ze_event_handle_t **ppHandle; ASSERT_NE(nullptr, params); ppHandle = params->pphEvent; ze_event_handle_t *pHandle; ASSERT_NE(nullptr, ppHandle); pHandle = *ppHandle; ze_event_handle_t handle; ASSERT_NE(nullptr, pHandle); handle = *pHandle; EXPECT_EQ(event_create_args.hEvent0, handle); *params->phEventPool = event_create_args.hEventPool1; *params->pdesc = &event_create_args.desc1; *params->pphEvent = &event_create_args.hEvent1; ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 1); *val += 1; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = event_create_args.instanceData0; *ppTracerInstanceUserData = instanceData; }; // // The 0th epilog expects to see the API argument replacements // Expect to receive instance data from corresponding prolog // epilogCbs0.Event.pfnCreateCb = [](ze_event_create_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(event_create_args.hEventPool1, *params->phEventPool); EXPECT_EQ(&event_create_args.desc1, *params->pdesc); EXPECT_EQ(&event_create_args.hEvent1, *params->pphEvent); ze_event_handle_t **ppHandle; ASSERT_NE(nullptr, params); ppHandle = params->pphEvent; ze_event_handle_t *pHandle; ASSERT_NE(nullptr, ppHandle); pHandle = *ppHandle; ze_event_handle_t handle; ASSERT_NE(nullptr, pHandle); handle = *pHandle; EXPECT_EQ(event_create_args.hEvent1, handle); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 2); *val += 1; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, event_create_args.instanceData0); delete instanceData; }; // // The 1st prolog sees the arguments as replaced by the 0th prolog. // There is no epilog for this prolog, so don't allocate instance data // prologCbs1.Event.pfnCreateCb = [](ze_event_create_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(event_create_args.hEventPool1, *params->phEventPool); EXPECT_EQ(&event_create_args.desc1, *params->pdesc); EXPECT_EQ(&event_create_args.hEvent1, *params->pphEvent); ze_event_handle_t **ppHandle; ASSERT_NE(nullptr, params); ppHandle = params->pphEvent; ze_event_handle_t *pHandle; ASSERT_NE(nullptr, ppHandle); pHandle = *ppHandle; ze_event_handle_t handle; ASSERT_NE(nullptr, pHandle); handle = *pHandle; EXPECT_EQ(event_create_args.hEvent1, handle); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 11); *val += 11; }; // // The 2nd epilog expects to see the API argument replacements // There is no corresponding prolog, so there is no instance data // epilogCbs2.Event.pfnCreateCb = [](ze_event_create_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(event_create_args.hEventPool1, *params->phEventPool); EXPECT_EQ(&event_create_args.desc1, *params->pdesc); EXPECT_EQ(&event_create_args.hEvent1, *params->pphEvent); ze_event_handle_t **ppHandle; ASSERT_NE(nullptr, params); ppHandle = params->pphEvent; ze_event_handle_t *pHandle; ASSERT_NE(nullptr, ppHandle); pHandle = *ppHandle; ze_event_handle_t handle; ASSERT_NE(nullptr, pHandle); handle = *pHandle; EXPECT_EQ(event_create_args.hEvent1, handle); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 21); *val += 21; }; // // The 3rd prolog expects to see the API argument replacements and doesn't modify them // Create instance data and pass to corresponding epilog // prologCbs3.Event.pfnCreateCb = [](ze_event_create_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(event_create_args.hEventPool1, *params->phEventPool); EXPECT_EQ(&event_create_args.desc1, *params->pdesc); EXPECT_EQ(&event_create_args.hEvent1, *params->pphEvent); ze_event_handle_t **ppHandle; ASSERT_NE(nullptr, params); ppHandle = params->pphEvent; ze_event_handle_t *pHandle; ASSERT_NE(nullptr, ppHandle); pHandle = *ppHandle; ze_event_handle_t handle; ASSERT_NE(nullptr, pHandle); handle = *pHandle; EXPECT_EQ(event_create_args.hEvent1, handle); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 31); *val += 31; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = event_create_args.instanceData3; *ppTracerInstanceUserData = instanceData; }; // // The 3rd epilog expects to see the API argument replacements // Expect to see instance data from corresponding prolog // epilogCbs3.Event.pfnCreateCb = [](ze_event_create_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(event_create_args.hEventPool1, *params->phEventPool); EXPECT_EQ(&event_create_args.desc1, *params->pdesc); EXPECT_EQ(&event_create_args.hEvent1, *params->pphEvent); ze_event_handle_t **ppHandle; ASSERT_NE(nullptr, params); ppHandle = params->pphEvent; ze_event_handle_t *pHandle; ASSERT_NE(nullptr, ppHandle); pHandle = *ppHandle; ze_event_handle_t handle; ASSERT_NE(nullptr, pHandle); handle = *pHandle; EXPECT_EQ(event_create_args.hEvent1, handle); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 62); *val += 31; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, event_create_args.instanceData3); delete instanceData; }; setTracerCallbacksAndEnableTracer(); result = zeEventCreate_Tracing(event_create_args.hEventPool0, &event_create_args.desc0, &event_create_args.hEvent0); EXPECT_EQ(ZE_RESULT_SUCCESS, result); validateDefaultUserDataFinal(); } struct { ze_event_handle_t hEvent0; ze_event_handle_t hEvent1; void *instanceData0; void *instanceData3; } event_destroy_args; TEST_F(zeAPITracingRuntimeMultipleArgumentsTests, WhenCallingEventDestroyTracingWrapperWithMultiplePrologEpilogsThenReturnSuccess) { ze_result_t result; // initialize initial argument set event_destroy_args.hEvent0 = generateRandomHandle(); // initialize replacement argument set event_destroy_args.hEvent1 = generateRandomHandle(); // initialize user instance data event_destroy_args.instanceData0 = generateRandomHandle(); event_destroy_args.instanceData3 = generateRandomHandle(); driver_ddiTable.core_ddiTable.Event.pfnDestroy = [](ze_event_handle_t hEvent) { EXPECT_EQ(event_destroy_args.hEvent1, hEvent); return ZE_RESULT_SUCCESS; }; // // The 0th prolog replaces the orignal API arguments with a new set // Create instance data, pass it to corresponding epilog. // prologCbs0.Event.pfnDestroyCb = [](ze_event_destroy_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(event_destroy_args.hEvent0, *params->phEvent); *params->phEvent = event_destroy_args.hEvent1; ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 1); *val += 1; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = event_destroy_args.instanceData0; *ppTracerInstanceUserData = instanceData; }; // // The 0th epilog expects to see the API argument replacements // Expect to receive instance data from corresponding prolog // epilogCbs0.Event.pfnDestroyCb = [](ze_event_destroy_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(event_destroy_args.hEvent1, *params->phEvent); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 2); *val += 1; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, event_destroy_args.instanceData0); delete instanceData; }; // // The 1st prolog sees the arguments as replaced by the 0th prolog. // There is no epilog for this prolog, so don't allocate instance data // prologCbs1.Event.pfnDestroyCb = [](ze_event_destroy_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(event_destroy_args.hEvent1, *params->phEvent); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 11); *val += 11; }; // // The 2nd epilog expects to see the API argument replacements // There is no corresponding prolog, so there is no instance data // epilogCbs2.Event.pfnDestroyCb = [](ze_event_destroy_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(event_destroy_args.hEvent1, *params->phEvent); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 21); *val += 21; }; // // The 3rd prolog expects to see the API argument replacements and doesn't modify them // Create instance data and pass to corresponding epilog // prologCbs3.Event.pfnDestroyCb = [](ze_event_destroy_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(event_destroy_args.hEvent1, *params->phEvent); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 31); *val += 31; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = event_destroy_args.instanceData3; *ppTracerInstanceUserData = instanceData; }; // // The 3rd epilog expects to see the API argument replacements // Expect to see instance data from corresponding prolog // epilogCbs3.Event.pfnDestroyCb = [](ze_event_destroy_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(event_destroy_args.hEvent1, *params->phEvent); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 62); *val += 31; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, event_destroy_args.instanceData3); delete instanceData; }; setTracerCallbacksAndEnableTracer(); result = zeEventDestroy_Tracing(event_destroy_args.hEvent0); EXPECT_EQ(ZE_RESULT_SUCCESS, result); validateDefaultUserDataFinal(); } struct { ze_event_handle_t hEvent0; ze_event_handle_t hEvent1; void *instanceData0; void *instanceData3; } event_host_signal_args; TEST_F(zeAPITracingRuntimeMultipleArgumentsTests, WhenCallingEventHostSignalTracingWrapperWithMultiplePrologEpilogsThenReturnSuccess) { ze_result_t result; // initialize initial argument set event_host_signal_args.hEvent0 = generateRandomHandle(); // initialize replacement argument set event_host_signal_args.hEvent1 = generateRandomHandle(); // initialize user instance data event_host_signal_args.instanceData0 = generateRandomHandle(); event_host_signal_args.instanceData3 = generateRandomHandle(); driver_ddiTable.core_ddiTable.Event.pfnHostSignal = [](ze_event_handle_t hEvent) { EXPECT_EQ(event_host_signal_args.hEvent1, hEvent); return ZE_RESULT_SUCCESS; }; // // The 0th prolog replaces the orignal API arguments with a new set // Create instance data, pass it to corresponding epilog. // prologCbs0.Event.pfnHostSignalCb = [](ze_event_host_signal_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(event_host_signal_args.hEvent0, *params->phEvent); *params->phEvent = event_host_signal_args.hEvent1; ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 1); *val += 1; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = event_host_signal_args.instanceData0; *ppTracerInstanceUserData = instanceData; }; // // The 0th epilog expects to see the API argument replacements // Expect to receive instance data from corresponding prolog // epilogCbs0.Event.pfnHostSignalCb = [](ze_event_host_signal_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(event_host_signal_args.hEvent1, *params->phEvent); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 2); *val += 1; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, event_host_signal_args.instanceData0); delete instanceData; }; // // The 1st prolog sees the arguments as replaced by the 0th prolog. // There is no epilog for this prolog, so don't allocate instance data // prologCbs1.Event.pfnHostSignalCb = [](ze_event_host_signal_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(event_host_signal_args.hEvent1, *params->phEvent); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 11); *val += 11; }; // // The 2nd epilog expects to see the API argument replacements // There is no corresponding prolog, so there is no instance data // epilogCbs2.Event.pfnHostSignalCb = [](ze_event_host_signal_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(event_host_signal_args.hEvent1, *params->phEvent); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 21); *val += 21; }; // // The 3rd prolog expects to see the API argument replacements and doesn't modify them // Create instance data and pass to corresponding epilog // prologCbs3.Event.pfnHostSignalCb = [](ze_event_host_signal_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(event_host_signal_args.hEvent1, *params->phEvent); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 31); *val += 31; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = event_host_signal_args.instanceData3; *ppTracerInstanceUserData = instanceData; }; // // The 3rd epilog expects to see the API argument replacements // Expect to see instance data from corresponding prolog // epilogCbs3.Event.pfnHostSignalCb = [](ze_event_host_signal_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(event_host_signal_args.hEvent1, *params->phEvent); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 62); *val += 31; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, event_host_signal_args.instanceData3); delete instanceData; }; setTracerCallbacksAndEnableTracer(); result = zeEventHostSignal_Tracing(event_host_signal_args.hEvent0); EXPECT_EQ(ZE_RESULT_SUCCESS, result); validateDefaultUserDataFinal(); } struct { ze_event_handle_t hEvent0; uint32_t timeout0; ze_event_handle_t hEvent1; uint32_t timeout1; void *instanceData0; void *instanceData3; } event_host_synchronize_args; TEST_F(zeAPITracingRuntimeMultipleArgumentsTests, WhenCallingEventHostSynchronizeTracingWrapperWithMultiplePrologEpilogsThenReturnSuccess) { ze_result_t result; // initialize initial argument set event_host_synchronize_args.hEvent0 = generateRandomHandle(); event_host_synchronize_args.timeout0 = generateRandomSize(); // initialize replacement argument set event_host_synchronize_args.hEvent1 = generateRandomHandle(); event_host_synchronize_args.timeout1 = generateRandomSize(); // initialize user instance data event_host_synchronize_args.instanceData0 = generateRandomHandle(); event_host_synchronize_args.instanceData3 = generateRandomHandle(); driver_ddiTable.core_ddiTable.Event.pfnHostSynchronize = [](ze_event_handle_t hEvent, uint64_t timeout) { EXPECT_EQ(event_host_synchronize_args.hEvent1, hEvent); EXPECT_EQ(event_host_synchronize_args.timeout1, timeout); return ZE_RESULT_SUCCESS; }; // // The 0th prolog replaces the orignal API arguments with a new set // Create instance data, pass it to corresponding epilog. // prologCbs0.Event.pfnHostSynchronizeCb = [](ze_event_host_synchronize_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(event_host_synchronize_args.hEvent0, *params->phEvent); EXPECT_EQ(event_host_synchronize_args.timeout0, *params->ptimeout); *params->phEvent = event_host_synchronize_args.hEvent1; *params->ptimeout = event_host_synchronize_args.timeout1; ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 1); *val += 1; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = event_host_synchronize_args.instanceData0; *ppTracerInstanceUserData = instanceData; }; // // The 0th epilog expects to see the API argument replacements // Expect to receive instance data from corresponding prolog // epilogCbs0.Event.pfnHostSynchronizeCb = [](ze_event_host_synchronize_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(event_host_synchronize_args.hEvent1, *params->phEvent); EXPECT_EQ(event_host_synchronize_args.timeout1, *params->ptimeout); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 2); *val += 1; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, event_host_synchronize_args.instanceData0); delete instanceData; }; // // The 1st prolog sees the arguments as replaced by the 0th prolog. // There is no epilog for this prolog, so don't allocate instance data // prologCbs1.Event.pfnHostSynchronizeCb = [](ze_event_host_synchronize_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(event_host_synchronize_args.hEvent1, *params->phEvent); EXPECT_EQ(event_host_synchronize_args.timeout1, *params->ptimeout); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 11); *val += 11; }; // // The 2nd epilog expects to see the API argument replacements // There is no corresponding prolog, so there is no instance data // epilogCbs2.Event.pfnHostSynchronizeCb = [](ze_event_host_synchronize_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(result, ZE_RESULT_SUCCESS); ASSERT_NE(nullptr, pTracerUserData); EXPECT_EQ(event_host_synchronize_args.hEvent1, *params->phEvent); EXPECT_EQ(event_host_synchronize_args.timeout1, *params->ptimeout); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 21); *val += 21; }; // // The 3rd prolog expects to see the API argument replacements and doesn't modify them // Create instance data and pass to corresponding epilog // prologCbs3.Event.pfnHostSynchronizeCb = [](ze_event_host_synchronize_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(event_host_synchronize_args.hEvent1, *params->phEvent); EXPECT_EQ(event_host_synchronize_args.timeout1, *params->ptimeout); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 31); *val += 31; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = event_host_synchronize_args.instanceData3; *ppTracerInstanceUserData = instanceData; }; // // The 3rd epilog expects to see the API argument replacements // Expect to see instance data from corresponding prolog // epilogCbs3.Event.pfnHostSynchronizeCb = [](ze_event_host_synchronize_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(event_host_synchronize_args.hEvent1, *params->phEvent); EXPECT_EQ(event_host_synchronize_args.timeout1, *params->ptimeout); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 62); *val += 31; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, event_host_synchronize_args.instanceData3); delete instanceData; }; setTracerCallbacksAndEnableTracer(); result = zeEventHostSynchronize_Tracing(event_host_synchronize_args.hEvent0, event_host_synchronize_args.timeout0); EXPECT_EQ(ZE_RESULT_SUCCESS, result); validateDefaultUserDataFinal(); } struct { ze_event_handle_t hEvent0; ze_event_handle_t hEvent1; void *instanceData0; void *instanceData3; } event_query_status_args; TEST_F(zeAPITracingRuntimeMultipleArgumentsTests, WhenCallingEventQueryStatusTracingWrapperWithMultiplePrologEpilogsThenReturnSuccess) { ze_result_t result; // initialize initial argument set event_query_status_args.hEvent0 = generateRandomHandle(); // initialize replacement argument set event_query_status_args.hEvent1 = generateRandomHandle(); // initialize user instance data event_query_status_args.instanceData0 = generateRandomHandle(); event_query_status_args.instanceData3 = generateRandomHandle(); driver_ddiTable.core_ddiTable.Event.pfnQueryStatus = [](ze_event_handle_t hEvent) { EXPECT_EQ(event_query_status_args.hEvent1, hEvent); return ZE_RESULT_SUCCESS; }; // // The 0th prolog replaces the orignal API arguments with a new set // Create instance data, pass it to corresponding epilog. // prologCbs0.Event.pfnQueryStatusCb = [](ze_event_query_status_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(event_query_status_args.hEvent0, *params->phEvent); *params->phEvent = event_query_status_args.hEvent1; ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 1); *val += 1; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = event_query_status_args.instanceData0; *ppTracerInstanceUserData = instanceData; }; // // The 0th epilog expects to see the API argument replacements // Expect to receive instance data from corresponding prolog // epilogCbs0.Event.pfnQueryStatusCb = [](ze_event_query_status_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(event_query_status_args.hEvent1, *params->phEvent); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 2); *val += 1; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, event_query_status_args.instanceData0); delete instanceData; }; // // The 1st prolog sees the arguments as replaced by the 0th prolog. // There is no epilog for this prolog, so don't allocate instance data // prologCbs1.Event.pfnQueryStatusCb = [](ze_event_query_status_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(event_query_status_args.hEvent1, *params->phEvent); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 11); *val += 11; }; // // The 2nd epilog expects to see the API argument replacements // There is no corresponding prolog, so there is no instance data // epilogCbs2.Event.pfnQueryStatusCb = [](ze_event_query_status_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(result, ZE_RESULT_SUCCESS); ASSERT_NE(nullptr, pTracerUserData); EXPECT_EQ(event_query_status_args.hEvent1, *params->phEvent); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 21); *val += 21; }; // // The 3rd prolog expects to see the API argument replacements and doesn't modify them // Create instance data and pass to corresponding epilog // prologCbs3.Event.pfnQueryStatusCb = [](ze_event_query_status_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(event_query_status_args.hEvent1, *params->phEvent); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 31); *val += 31; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = event_query_status_args.instanceData3; *ppTracerInstanceUserData = instanceData; }; // // The 3rd epilog expects to see the API argument replacements // Expect to see instance data from corresponding prolog // epilogCbs3.Event.pfnQueryStatusCb = [](ze_event_query_status_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(event_query_status_args.hEvent1, *params->phEvent); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 62); *val += 31; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, event_query_status_args.instanceData3); delete instanceData; }; setTracerCallbacksAndEnableTracer(); result = zeEventQueryStatus_Tracing(event_query_status_args.hEvent0); EXPECT_EQ(ZE_RESULT_SUCCESS, result); validateDefaultUserDataFinal(); } struct { ze_event_handle_t hEvent0; ze_event_handle_t hEvent1; void *instanceData0; void *instanceData3; } event_reset_args; TEST_F(zeAPITracingRuntimeMultipleArgumentsTests, WhenCallingEventHostResetTracingWrapperWithMultiplePrologEpilogsThenReturnSuccess) { ze_result_t result; // initialize initial argument set event_reset_args.hEvent0 = generateRandomHandle(); // initialize replacement argument set event_reset_args.hEvent1 = generateRandomHandle(); // initialize user instance data event_reset_args.instanceData0 = generateRandomHandle(); event_reset_args.instanceData3 = generateRandomHandle(); driver_ddiTable.core_ddiTable.Event.pfnHostReset = [](ze_event_handle_t hEvent) { EXPECT_EQ(event_reset_args.hEvent1, hEvent); return ZE_RESULT_SUCCESS; }; // // The 0th prolog replaces the orignal API arguments with a new set // Create instance data, pass it to corresponding epilog. // prologCbs0.Event.pfnHostResetCb = [](ze_event_host_reset_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(event_reset_args.hEvent0, *params->phEvent); *params->phEvent = event_reset_args.hEvent1; ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 1); *val += 1; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = event_reset_args.instanceData0; *ppTracerInstanceUserData = instanceData; }; // // The 0th epilog expects to see the API argument replacements // Expect to receive instance data from corresponding prolog // epilogCbs0.Event.pfnHostResetCb = [](ze_event_host_reset_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(event_reset_args.hEvent1, *params->phEvent); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 2); *val += 1; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, event_reset_args.instanceData0); delete instanceData; }; // // The 1st prolog sees the arguments as replaced by the 0th prolog. // There is no epilog for this prolog, so don't allocate instance data // prologCbs1.Event.pfnHostResetCb = [](ze_event_host_reset_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(event_reset_args.hEvent1, *params->phEvent); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 11); *val += 11; }; // // The 2nd epilog expects to see the API argument replacements // There is no corresponding prolog, so there is no instance data // epilogCbs2.Event.pfnHostResetCb = [](ze_event_host_reset_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(result, ZE_RESULT_SUCCESS); ASSERT_NE(nullptr, pTracerUserData); EXPECT_EQ(event_reset_args.hEvent1, *params->phEvent); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 21); *val += 21; }; // // The 3rd prolog expects to see the API argument replacements and doesn't modify them // Create instance data and pass to corresponding epilog // prologCbs3.Event.pfnHostResetCb = [](ze_event_host_reset_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(event_reset_args.hEvent1, *params->phEvent); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 31); *val += 31; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = event_reset_args.instanceData3; *ppTracerInstanceUserData = instanceData; }; // // The 3rd epilog expects to see the API argument replacements // Expect to see instance data from corresponding prolog // epilogCbs3.Event.pfnHostResetCb = [](ze_event_host_reset_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(event_reset_args.hEvent1, *params->phEvent); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 62); *val += 31; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, event_reset_args.instanceData3); delete instanceData; }; setTracerCallbacksAndEnableTracer(); result = zeEventHostReset_Tracing(event_reset_args.hEvent0); EXPECT_EQ(ZE_RESULT_SUCCESS, result); validateDefaultUserDataFinal(); } // Multi prolog/epilog Event Pool Tests #define NUM_EVENT_POOL_CREATE_DEVICES_0 2 #define NUM_EVENT_POOL_CREATE_DEVICES_1 4 struct { ze_context_handle_t hContext0; ze_event_pool_desc_t desc0; uint32_t numDevices0 = NUM_EVENT_POOL_CREATE_DEVICES_0; ze_device_handle_t hDevices0[NUM_EVENT_POOL_CREATE_DEVICES_0]; ze_event_pool_handle_t hEventPool0; ze_context_handle_t hContext1; ze_event_pool_desc_t desc1; uint32_t numDevices1 = NUM_EVENT_POOL_CREATE_DEVICES_1; ze_device_handle_t hDevices1[NUM_EVENT_POOL_CREATE_DEVICES_1]; ze_event_pool_handle_t hEventPool1; ze_event_pool_handle_t hEventPoolAPI; void *instanceData0; void *instanceData3; } event_pool_create_args; TEST_F(zeAPITracingRuntimeMultipleArgumentsTests, WhenCallingEventPoolCreateTracingWrapperWithMultiplePrologEpilogsThenReturnSuccess) { ze_result_t result; // initialize initial argument set event_pool_create_args.hContext0 = generateRandomHandle(); for (int i = 0; i < NUM_EVENT_POOL_CREATE_DEVICES_0; i++) { event_pool_create_args.hDevices0[i] = generateRandomHandle(); } event_pool_create_args.hEventPool0 = generateRandomHandle(); // initialize replacement argument set event_pool_create_args.hContext1 = generateRandomHandle(); for (int i = 0; i < NUM_EVENT_POOL_CREATE_DEVICES_1; i++) { event_pool_create_args.hDevices1[i] = generateRandomHandle(); } event_pool_create_args.hEventPool1 = generateRandomHandle(); // initialize user instance data event_pool_create_args.instanceData0 = generateRandomHandle(); event_pool_create_args.instanceData3 = generateRandomHandle(); driver_ddiTable.core_ddiTable.EventPool.pfnCreate = [](ze_context_handle_t hContext, const ze_event_pool_desc_t *desc, uint32_t numDevices, ze_device_handle_t *phDevices, ze_event_pool_handle_t *phEventPool) { EXPECT_EQ(event_pool_create_args.hContext1, hContext); EXPECT_EQ(&event_pool_create_args.desc1, desc); EXPECT_EQ(event_pool_create_args.numDevices1, numDevices); EXPECT_EQ(event_pool_create_args.hDevices1, phDevices); for (int i = 0; i < NUM_EVENT_POOL_CREATE_DEVICES_1; i++) { EXPECT_EQ(event_pool_create_args.hDevices1[i], phDevices[i]); } EXPECT_EQ(event_pool_create_args.hEventPool1, *phEventPool); EXPECT_EQ(&event_pool_create_args.hEventPool1, phEventPool); event_pool_create_args.hEventPoolAPI = generateRandomHandle(); *phEventPool = event_pool_create_args.hEventPoolAPI; return ZE_RESULT_SUCCESS; }; // // The 0th prolog replaces the orignal API arguments with a new set // Create instance data, pass it to corresponding epilog. // prologCbs0.EventPool.pfnCreateCb = [](ze_event_pool_create_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(event_pool_create_args.hContext0, *params->phContext); EXPECT_EQ(&event_pool_create_args.desc0, *params->pdesc); EXPECT_EQ(event_pool_create_args.numDevices0, *params->pnumDevices); EXPECT_EQ(event_pool_create_args.hDevices0, *params->pphDevices); for (int i = 0; i < NUM_EVENT_POOL_CREATE_DEVICES_0; i++) { EXPECT_EQ(event_pool_create_args.hDevices0[i], (*(params->pphDevices))[i]); } ze_event_pool_handle_t **ppHandle; ASSERT_NE(nullptr, params); ppHandle = params->pphEventPool; ze_event_pool_handle_t *pHandle; ASSERT_NE(nullptr, ppHandle); pHandle = *ppHandle; EXPECT_EQ(&event_pool_create_args.hEventPool0, pHandle); ze_event_pool_handle_t handle; ASSERT_NE(nullptr, pHandle); handle = *pHandle; EXPECT_EQ(event_pool_create_args.hEventPool0, handle); *params->phContext = event_pool_create_args.hContext1; *params->pdesc = &event_pool_create_args.desc1; *params->pnumDevices = event_pool_create_args.numDevices1; *params->pphDevices = event_pool_create_args.hDevices1; *params->pphEventPool = &event_pool_create_args.hEventPool1; ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 1); *val += 1; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = event_create_args.instanceData0; *ppTracerInstanceUserData = instanceData; }; // // The 0th epilog expects to see the API argument replacements // Expect to receive instance data from corresponding prolog // epilogCbs0.EventPool.pfnCreateCb = [](ze_event_pool_create_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(event_pool_create_args.hContext1, *params->phContext); EXPECT_EQ(&event_pool_create_args.desc1, *params->pdesc); EXPECT_EQ(event_pool_create_args.numDevices1, *params->pnumDevices); EXPECT_EQ(event_pool_create_args.hDevices1, *params->pphDevices); for (int i = 0; i < NUM_EVENT_POOL_CREATE_DEVICES_0; i++) { EXPECT_EQ(event_pool_create_args.hDevices1[i], (*(params->pphDevices))[i]); } ze_event_pool_handle_t **ppHandle; ASSERT_NE(nullptr, params); ppHandle = params->pphEventPool; ze_event_pool_handle_t *pHandle; ASSERT_NE(nullptr, ppHandle); pHandle = *ppHandle; EXPECT_EQ(&event_pool_create_args.hEventPool1, pHandle); ze_event_pool_handle_t handle; ASSERT_NE(nullptr, pHandle); handle = *pHandle; EXPECT_EQ(event_pool_create_args.hEventPool1, handle); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 2); *val += 1; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, event_create_args.instanceData0); delete instanceData; }; // // The 1st prolog sees the arguments as replaced by the 0th prolog. // There is no epilog for this prolog, so don't allocate instance data // prologCbs1.EventPool.pfnCreateCb = [](ze_event_pool_create_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(event_pool_create_args.hContext1, *params->phContext); EXPECT_EQ(&event_pool_create_args.desc1, *params->pdesc); EXPECT_EQ(event_pool_create_args.numDevices1, *params->pnumDevices); EXPECT_EQ(event_pool_create_args.hDevices1, *params->pphDevices); for (int i = 0; i < NUM_EVENT_POOL_CREATE_DEVICES_0; i++) { EXPECT_EQ(event_pool_create_args.hDevices1[i], (*(params->pphDevices))[i]); } ze_event_pool_handle_t **ppHandle; ASSERT_NE(nullptr, params); ppHandle = params->pphEventPool; ze_event_pool_handle_t *pHandle; ASSERT_NE(nullptr, ppHandle); pHandle = *ppHandle; EXPECT_EQ(&event_pool_create_args.hEventPool1, pHandle); ze_event_pool_handle_t handle; ASSERT_NE(nullptr, pHandle); handle = *pHandle; EXPECT_EQ(event_pool_create_args.hEventPool1, handle); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 11); *val += 11; }; // // The 2nd epilog expects to see the API argument replacements // There is no corresponding prolog, so there is no instance data // epilogCbs2.EventPool.pfnCreateCb = [](ze_event_pool_create_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(event_pool_create_args.hContext1, *params->phContext); EXPECT_EQ(&event_pool_create_args.desc1, *params->pdesc); EXPECT_EQ(event_pool_create_args.numDevices1, *params->pnumDevices); EXPECT_EQ(event_pool_create_args.hDevices1, *params->pphDevices); for (int i = 0; i < NUM_EVENT_POOL_CREATE_DEVICES_0; i++) { EXPECT_EQ(event_pool_create_args.hDevices1[i], (*(params->pphDevices))[i]); } ze_event_pool_handle_t **ppHandle; ASSERT_NE(nullptr, params); ppHandle = params->pphEventPool; ze_event_pool_handle_t *pHandle; ASSERT_NE(nullptr, ppHandle); pHandle = *ppHandle; EXPECT_EQ(&event_pool_create_args.hEventPool1, pHandle); ze_event_pool_handle_t handle; ASSERT_NE(nullptr, pHandle); handle = *pHandle; EXPECT_EQ(event_pool_create_args.hEventPool1, handle); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 21); *val += 21; }; // // The 3rd prolog expects to see the API argument replacements and doesn't modify them // Create instance data and pass to corresponding epilog // prologCbs3.EventPool.pfnCreateCb = [](ze_event_pool_create_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(event_pool_create_args.hContext1, *params->phContext); EXPECT_EQ(&event_pool_create_args.desc1, *params->pdesc); EXPECT_EQ(event_pool_create_args.numDevices1, *params->pnumDevices); EXPECT_EQ(event_pool_create_args.hDevices1, *params->pphDevices); for (int i = 0; i < NUM_EVENT_POOL_CREATE_DEVICES_0; i++) { EXPECT_EQ(event_pool_create_args.hDevices1[i], (*(params->pphDevices))[i]); } ze_event_pool_handle_t **ppHandle; ASSERT_NE(nullptr, params); ppHandle = params->pphEventPool; ze_event_pool_handle_t *pHandle; ASSERT_NE(nullptr, ppHandle); pHandle = *ppHandle; EXPECT_EQ(&event_pool_create_args.hEventPool1, pHandle); ze_event_pool_handle_t handle; ASSERT_NE(nullptr, pHandle); handle = *pHandle; EXPECT_EQ(event_pool_create_args.hEventPool1, handle); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 31); *val += 31; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = event_create_args.instanceData3; *ppTracerInstanceUserData = instanceData; }; // // The 3rd epilog expects to see the API argument replacements // Expect to see instance data from corresponding prolog // epilogCbs3.EventPool.pfnCreateCb = [](ze_event_pool_create_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(event_pool_create_args.hContext1, *params->phContext); EXPECT_EQ(&event_pool_create_args.desc1, *params->pdesc); EXPECT_EQ(event_pool_create_args.numDevices1, *params->pnumDevices); EXPECT_EQ(event_pool_create_args.hDevices1, *params->pphDevices); for (int i = 0; i < NUM_EVENT_POOL_CREATE_DEVICES_0; i++) { EXPECT_EQ(event_pool_create_args.hDevices1[i], (*(params->pphDevices))[i]); } ze_event_pool_handle_t **ppHandle; ASSERT_NE(nullptr, params); ppHandle = params->pphEventPool; ze_event_pool_handle_t *pHandle; ASSERT_NE(nullptr, ppHandle); pHandle = *ppHandle; EXPECT_EQ(&event_pool_create_args.hEventPool1, pHandle); ze_event_pool_handle_t handle; ASSERT_NE(nullptr, pHandle); handle = *pHandle; EXPECT_EQ(event_pool_create_args.hEventPool1, handle); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 62); *val += 31; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, event_create_args.instanceData3); delete instanceData; }; setTracerCallbacksAndEnableTracer(); result = zeEventPoolCreate_Tracing(event_pool_create_args.hContext0, &event_pool_create_args.desc0, event_pool_create_args.numDevices0, event_pool_create_args.hDevices0, &event_pool_create_args.hEventPool0); EXPECT_EQ(ZE_RESULT_SUCCESS, result); validateDefaultUserDataFinal(); } struct { ze_event_pool_handle_t hEventPool0; ze_event_pool_handle_t hEventPool1; void *instanceData0; void *instanceData3; } event_pool_destroy_args; TEST_F(zeAPITracingRuntimeMultipleArgumentsTests, WhenCallingEventPoolDestroyTracingWrapperWithMultiplePrologEpilogsThenReturnSuccess) { ze_result_t result; // initialize initial argument set event_pool_destroy_args.hEventPool0 = generateRandomHandle(); // initialize replacement argument set event_pool_destroy_args.hEventPool1 = generateRandomHandle(); // initialize user instance data event_pool_destroy_args.instanceData0 = generateRandomHandle(); event_pool_destroy_args.instanceData3 = generateRandomHandle(); driver_ddiTable.core_ddiTable.EventPool.pfnDestroy = [](ze_event_pool_handle_t hEventPool) { EXPECT_EQ(event_pool_destroy_args.hEventPool1, hEventPool); return ZE_RESULT_SUCCESS; }; // // The 0th prolog replaces the orignal API arguments with a new set // Create instance data, pass it to corresponding epilog. // prologCbs0.EventPool.pfnDestroyCb = [](ze_event_pool_destroy_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(event_pool_destroy_args.hEventPool0, *params->phEventPool); *params->phEventPool = event_pool_destroy_args.hEventPool1; ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 1); *val += 1; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = event_pool_destroy_args.instanceData0; *ppTracerInstanceUserData = instanceData; }; // // The 0th epilog expects to see the API argument replacements // Expect to receive instance data from corresponding prolog // epilogCbs0.EventPool.pfnDestroyCb = [](ze_event_pool_destroy_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(event_pool_destroy_args.hEventPool1, *params->phEventPool); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 2); *val += 1; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, event_pool_destroy_args.instanceData0); delete instanceData; }; // // The 1st prolog sees the arguments as replaced by the 0th prolog. // There is no epilog for this prolog, so don't allocate instance data // prologCbs1.EventPool.pfnDestroyCb = [](ze_event_pool_destroy_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(event_pool_destroy_args.hEventPool1, *params->phEventPool); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 11); *val += 11; }; // // The 2nd epilog expects to see the API argument replacements // There is no corresponding prolog, so there is no instance data // epilogCbs2.EventPool.pfnDestroyCb = [](ze_event_pool_destroy_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(event_pool_destroy_args.hEventPool1, *params->phEventPool); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 21); *val += 21; }; // // The 3rd prolog expects to see the API argument replacements and doesn't modify them // Create instance data and pass to corresponding epilog // prologCbs3.EventPool.pfnDestroyCb = [](ze_event_pool_destroy_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(event_pool_destroy_args.hEventPool1, *params->phEventPool); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 31); *val += 31; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = event_pool_destroy_args.instanceData3; *ppTracerInstanceUserData = instanceData; }; // // The 3rd epilog expects to see the API argument replacements // Expect to see instance data from corresponding prolog // epilogCbs3.EventPool.pfnDestroyCb = [](ze_event_pool_destroy_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(event_pool_destroy_args.hEventPool1, *params->phEventPool); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 62); *val += 31; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, event_pool_destroy_args.instanceData3); delete instanceData; }; setTracerCallbacksAndEnableTracer(); result = zeEventPoolDestroy_Tracing(event_pool_destroy_args.hEventPool0); EXPECT_EQ(ZE_RESULT_SUCCESS, result); validateDefaultUserDataFinal(); } struct { ze_event_pool_handle_t hEventPool0; ze_ipc_event_pool_handle_t hIpc0; ze_event_pool_handle_t hEventPool1; ze_ipc_event_pool_handle_t hIpc1; ze_ipc_event_pool_handle_t hIpcAPI; void *instanceData0; void *instanceData3; } event_pool_get_ipc_handle_args; static void event_pool_get_ipc_handle_init_random(ze_ipc_event_pool_handle_t *phIpc) { uint8_t *ptr = (uint8_t *)phIpc; for (size_t i = 0; i < sizeof(*phIpc); i++, ptr++) { *ptr = generateRandomSize(); } } static bool event_pool_get_ipc_handles_compare(ze_ipc_event_pool_handle_t *phIpc0, ze_ipc_event_pool_handle_t *phIpc1) { if (nullptr == phIpc0) { return false; } if (nullptr == phIpc1) { return false; } return (memcmp((void *)phIpc0, (void *)phIpc1, sizeof(ze_ipc_event_pool_handle_t)) == 0); } TEST_F(zeAPITracingRuntimeMultipleArgumentsTests, WhenCallingEventPoolGetIpcHandleTracingWrapperWithMultiplePrologEpilogsThenReturnSuccess) { ze_result_t result; // initialize initial argument set event_pool_get_ipc_handle_args.hEventPool0 = generateRandomHandle(); event_pool_get_ipc_handle_init_random(&event_pool_get_ipc_handle_args.hIpc0); // initialize replacement argument set event_pool_get_ipc_handle_args.hEventPool1 = generateRandomHandle(); event_pool_get_ipc_handle_init_random(&event_pool_get_ipc_handle_args.hIpc1); // initialize user instance data event_pool_get_ipc_handle_args.instanceData0 = generateRandomHandle(); event_pool_get_ipc_handle_args.instanceData3 = generateRandomHandle(); driver_ddiTable.core_ddiTable.EventPool.pfnGetIpcHandle = [](ze_event_pool_handle_t hEventPool, ze_ipc_event_pool_handle_t *phIpc) { EXPECT_EQ(event_pool_get_ipc_handle_args.hEventPool1, hEventPool); EXPECT_EQ(&event_pool_get_ipc_handle_args.hIpc1, phIpc); EXPECT_TRUE(event_pool_get_ipc_handles_compare(&event_pool_get_ipc_handle_args.hIpc1, phIpc)); event_pool_get_ipc_handle_init_random(&event_pool_get_ipc_handle_args.hIpcAPI); *phIpc = event_pool_get_ipc_handle_args.hIpcAPI; return ZE_RESULT_SUCCESS; }; // // The 0th prolog replaces the orignal API arguments with a new set // Create instance data, pass it to corresponding epilog. // prologCbs0.EventPool.pfnGetIpcHandleCb = [](ze_event_pool_get_ipc_handle_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(event_pool_get_ipc_handle_args.hEventPool0, *params->phEventPool); EXPECT_EQ(&event_pool_get_ipc_handle_args.hIpc0, *params->pphIpc); EXPECT_TRUE(event_pool_get_ipc_handles_compare(&event_pool_get_ipc_handle_args.hIpc0, *params->pphIpc)); *params->phEventPool = event_pool_get_ipc_handle_args.hEventPool1; *params->pphIpc = &event_pool_get_ipc_handle_args.hIpc1; ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 1); *val += 1; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = event_pool_get_ipc_handle_args.instanceData0; *ppTracerInstanceUserData = instanceData; }; // // The 0th epilog expects to see the API argument replacements // Expect to receive instance data from corresponding prolog // epilogCbs0.EventPool.pfnGetIpcHandleCb = [](ze_event_pool_get_ipc_handle_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(event_pool_get_ipc_handle_args.hEventPool1, *params->phEventPool); EXPECT_EQ(&event_pool_get_ipc_handle_args.hIpc1, *params->pphIpc); EXPECT_TRUE(event_pool_get_ipc_handles_compare(&event_pool_get_ipc_handle_args.hIpc1, *params->pphIpc)); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 2); *val += 1; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, event_pool_get_ipc_handle_args.instanceData0); delete instanceData; }; // // The 1st prolog sees the arguments as replaced by the 0th prolog. // There is no epilog for this prolog, so don't allocate instance data // prologCbs1.EventPool.pfnGetIpcHandleCb = [](ze_event_pool_get_ipc_handle_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(event_pool_get_ipc_handle_args.hEventPool1, *params->phEventPool); EXPECT_EQ(&event_pool_get_ipc_handle_args.hIpc1, *params->pphIpc); EXPECT_TRUE(event_pool_get_ipc_handles_compare(&event_pool_get_ipc_handle_args.hIpc1, *params->pphIpc)); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 11); *val += 11; }; // // The 2nd epilog expects to see the API argument replacements // There is no corresponding prolog, so there is no instance data // epilogCbs2.EventPool.pfnGetIpcHandleCb = [](ze_event_pool_get_ipc_handle_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(event_pool_get_ipc_handle_args.hEventPool1, *params->phEventPool); EXPECT_EQ(&event_pool_get_ipc_handle_args.hIpc1, *params->pphIpc); EXPECT_TRUE(event_pool_get_ipc_handles_compare(&event_pool_get_ipc_handle_args.hIpc1, *params->pphIpc)); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 21); *val += 21; }; // // The 3rd prolog expects to see the API argument replacements and doesn't modify them // Create instance data and pass to corresponding epilog // prologCbs3.EventPool.pfnGetIpcHandleCb = [](ze_event_pool_get_ipc_handle_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(event_pool_get_ipc_handle_args.hEventPool1, *params->phEventPool); EXPECT_EQ(&event_pool_get_ipc_handle_args.hIpc1, *params->pphIpc); EXPECT_TRUE(event_pool_get_ipc_handles_compare(&event_pool_get_ipc_handle_args.hIpc1, *params->pphIpc)); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 31); *val += 31; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = event_pool_get_ipc_handle_args.instanceData3; *ppTracerInstanceUserData = instanceData; }; // // The 3rd epilog expects to see the API argument replacements // Expect to see instance data from corresponding prolog // epilogCbs3.EventPool.pfnGetIpcHandleCb = [](ze_event_pool_get_ipc_handle_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(event_pool_get_ipc_handle_args.hEventPool1, *params->phEventPool); EXPECT_EQ(&event_pool_get_ipc_handle_args.hIpc1, *params->pphIpc); EXPECT_TRUE(event_pool_get_ipc_handles_compare(&event_pool_get_ipc_handle_args.hIpc1, *params->pphIpc)); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 62); *val += 31; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, event_pool_get_ipc_handle_args.instanceData3); delete instanceData; }; setTracerCallbacksAndEnableTracer(); result = zeEventPoolGetIpcHandle_Tracing(event_pool_get_ipc_handle_args.hEventPool0, &event_pool_get_ipc_handle_args.hIpc0); EXPECT_EQ(ZE_RESULT_SUCCESS, result); validateDefaultUserDataFinal(); } struct { ze_context_handle_t hContext0; ze_ipc_event_pool_handle_t hIpc0; ze_event_pool_handle_t hEventPool0; ze_context_handle_t hContext1; ze_ipc_event_pool_handle_t hIpc1; ze_event_pool_handle_t hEventPool1; ze_event_pool_handle_t hEventPoolAPI; void *instanceData0; void *instanceData3; } event_pool_open_ipc_handle_args; static void event_pool_open_ipc_handle_init_random(ze_ipc_event_pool_handle_t *phIpc) { uint8_t *ptr = (uint8_t *)phIpc; for (size_t i = 0; i < sizeof(*phIpc); i++, ptr++) { *ptr = generateRandomSize(); } } static bool event_pool_open_ipc_handles_compare(ze_ipc_event_pool_handle_t *phIpc0, ze_ipc_event_pool_handle_t *phIpc1) { return (memcmp((void *)phIpc0, (void *)phIpc1, sizeof(ze_ipc_event_pool_handle_t)) == 0); } TEST_F(zeAPITracingRuntimeMultipleArgumentsTests, WhenCallingEventPoolOpenIpcHandleTracingWrapperWithMultiplePrologEpilogsThenReturnSuccess) { ze_result_t result; // initialize initial argument set event_pool_open_ipc_handle_args.hContext0 = generateRandomHandle(); event_pool_open_ipc_handle_init_random(&event_pool_open_ipc_handle_args.hIpc0); event_pool_open_ipc_handle_args.hEventPool0 = generateRandomHandle(); // initialize replacement argument set event_pool_open_ipc_handle_args.hContext1 = generateRandomHandle(); event_pool_open_ipc_handle_init_random(&event_pool_open_ipc_handle_args.hIpc1); event_pool_open_ipc_handle_args.hEventPool1 = generateRandomHandle(); // initialize user instance data event_pool_open_ipc_handle_args.instanceData0 = generateRandomHandle(); event_pool_open_ipc_handle_args.instanceData3 = generateRandomHandle(); driver_ddiTable.core_ddiTable.EventPool.pfnOpenIpcHandle = [](ze_context_handle_t hContext, ze_ipc_event_pool_handle_t hIpc, ze_event_pool_handle_t *phEventPool) { EXPECT_EQ(event_pool_open_ipc_handle_args.hContext1, hContext); EXPECT_TRUE(event_pool_open_ipc_handles_compare(&event_pool_open_ipc_handle_args.hIpc1, &hIpc)); EXPECT_EQ(event_pool_open_ipc_handle_args.hEventPool1, *phEventPool); EXPECT_EQ(&event_pool_open_ipc_handle_args.hEventPool1, phEventPool); event_pool_open_ipc_handle_args.hEventPoolAPI = generateRandomHandle(); *phEventPool = event_pool_open_ipc_handle_args.hEventPoolAPI; return ZE_RESULT_SUCCESS; }; // // The 0th prolog replaces the orignal API arguments with a new set // Create instance data, pass it to corresponding epilog. // prologCbs0.EventPool.pfnOpenIpcHandleCb = [](ze_event_pool_open_ipc_handle_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(event_pool_open_ipc_handle_args.hContext0, *params->phContext); EXPECT_TRUE(event_pool_open_ipc_handles_compare(&event_pool_open_ipc_handle_args.hIpc0, params->phIpc)); ze_event_pool_handle_t **ppHandle; ASSERT_NE(nullptr, params); ppHandle = params->pphEventPool; ze_event_pool_handle_t *pHandle; ASSERT_NE(nullptr, ppHandle); pHandle = *ppHandle; EXPECT_EQ(&event_pool_open_ipc_handle_args.hEventPool0, pHandle); ze_event_pool_handle_t handle; ASSERT_NE(nullptr, pHandle); handle = *pHandle; EXPECT_EQ(event_pool_open_ipc_handle_args.hEventPool0, handle); *params->phContext = event_pool_open_ipc_handle_args.hContext1; *params->phIpc = event_pool_open_ipc_handle_args.hIpc1; *params->pphEventPool = &event_pool_open_ipc_handle_args.hEventPool1; ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 1); *val += 1; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = event_pool_open_ipc_handle_args.instanceData0; *ppTracerInstanceUserData = instanceData; }; // // The 0th epilog expects to see the API argument replacements // Expect to receive instance data from corresponding prolog // epilogCbs0.EventPool.pfnOpenIpcHandleCb = [](ze_event_pool_open_ipc_handle_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(event_pool_open_ipc_handle_args.hContext1, *params->phContext); EXPECT_TRUE(event_pool_open_ipc_handles_compare(&event_pool_open_ipc_handle_args.hIpc1, params->phIpc)); ze_event_pool_handle_t **ppHandle; ASSERT_NE(nullptr, params); ppHandle = params->pphEventPool; ze_event_pool_handle_t *pHandle; ASSERT_NE(nullptr, ppHandle); pHandle = *ppHandle; EXPECT_EQ(&event_pool_open_ipc_handle_args.hEventPool1, pHandle); ze_event_pool_handle_t handle; ASSERT_NE(nullptr, pHandle); handle = *pHandle; EXPECT_EQ(event_pool_open_ipc_handle_args.hEventPool1, handle); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 2); *val += 1; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, event_pool_open_ipc_handle_args.instanceData0); delete instanceData; }; // // The 1st prolog sees the arguments as replaced by the 0th prolog. // There is no epilog for this prolog, so don't allocate instance data // prologCbs1.EventPool.pfnOpenIpcHandleCb = [](ze_event_pool_open_ipc_handle_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(event_pool_open_ipc_handle_args.hContext1, *params->phContext); EXPECT_TRUE(event_pool_open_ipc_handles_compare(&event_pool_open_ipc_handle_args.hIpc1, params->phIpc)); ze_event_pool_handle_t **ppHandle; ASSERT_NE(nullptr, params); ppHandle = params->pphEventPool; ze_event_pool_handle_t *pHandle; ASSERT_NE(nullptr, ppHandle); pHandle = *ppHandle; EXPECT_EQ(&event_pool_open_ipc_handle_args.hEventPool1, pHandle); ze_event_pool_handle_t handle; ASSERT_NE(nullptr, pHandle); handle = *pHandle; EXPECT_EQ(event_pool_open_ipc_handle_args.hEventPool1, handle); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 11); *val += 11; }; // // The 2nd epilog expects to see the API argument replacements // There is no corresponding prolog, so there is no instance data // epilogCbs2.EventPool.pfnOpenIpcHandleCb = [](ze_event_pool_open_ipc_handle_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(event_pool_open_ipc_handle_args.hContext1, *params->phContext); EXPECT_TRUE(event_pool_open_ipc_handles_compare(&event_pool_open_ipc_handle_args.hIpc1, params->phIpc)); ze_event_pool_handle_t **ppHandle; ASSERT_NE(nullptr, params); ppHandle = params->pphEventPool; ze_event_pool_handle_t *pHandle; ASSERT_NE(nullptr, ppHandle); pHandle = *ppHandle; EXPECT_EQ(&event_pool_open_ipc_handle_args.hEventPool1, pHandle); ze_event_pool_handle_t handle; ASSERT_NE(nullptr, pHandle); handle = *pHandle; EXPECT_EQ(event_pool_open_ipc_handle_args.hEventPool1, handle); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 21); *val += 21; }; // // The 3rd prolog expects to see the API argument replacements and doesn't modify them // Create instance data and pass to corresponding epilog // prologCbs3.EventPool.pfnOpenIpcHandleCb = [](ze_event_pool_open_ipc_handle_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(event_pool_open_ipc_handle_args.hContext1, *params->phContext); EXPECT_TRUE(event_pool_open_ipc_handles_compare(&event_pool_open_ipc_handle_args.hIpc1, params->phIpc)); ze_event_pool_handle_t **ppHandle; ASSERT_NE(nullptr, params); ppHandle = params->pphEventPool; ze_event_pool_handle_t *pHandle; ASSERT_NE(nullptr, ppHandle); pHandle = *ppHandle; EXPECT_EQ(&event_pool_open_ipc_handle_args.hEventPool1, pHandle); ze_event_pool_handle_t handle; ASSERT_NE(nullptr, pHandle); handle = *pHandle; EXPECT_EQ(event_pool_open_ipc_handle_args.hEventPool1, handle); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 31); *val += 31; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = event_pool_open_ipc_handle_args.instanceData3; *ppTracerInstanceUserData = instanceData; }; // // The 3rd epilog expects to see the API argument replacements // Expect to see instance data from corresponding prolog // epilogCbs3.EventPool.pfnOpenIpcHandleCb = [](ze_event_pool_open_ipc_handle_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(event_pool_open_ipc_handle_args.hContext1, *params->phContext); EXPECT_TRUE(event_pool_open_ipc_handles_compare(&event_pool_open_ipc_handle_args.hIpc1, params->phIpc)); ze_event_pool_handle_t **ppHandle; ASSERT_NE(nullptr, params); ppHandle = params->pphEventPool; ze_event_pool_handle_t *pHandle; ASSERT_NE(nullptr, ppHandle); pHandle = *ppHandle; EXPECT_EQ(&event_pool_open_ipc_handle_args.hEventPool1, pHandle); ze_event_pool_handle_t handle; ASSERT_NE(nullptr, pHandle); handle = *pHandle; EXPECT_EQ(event_pool_open_ipc_handle_args.hEventPool1, handle); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 62); *val += 31; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, event_pool_open_ipc_handle_args.instanceData3); delete instanceData; }; setTracerCallbacksAndEnableTracer(); result = zeEventPoolOpenIpcHandle_Tracing(event_pool_open_ipc_handle_args.hContext0, event_pool_open_ipc_handle_args.hIpc0, &event_pool_open_ipc_handle_args.hEventPool0); EXPECT_EQ(ZE_RESULT_SUCCESS, result); validateDefaultUserDataFinal(); } struct { ze_event_pool_handle_t hEventPool0; ze_event_pool_handle_t hEventPool1; void *instanceData0; void *instanceData3; } event_pool_close_ipc_handle_args; TEST_F(zeAPITracingRuntimeMultipleArgumentsTests, WhenCallingEventPoolCloseIpcHandleTracingWrapperWithMultiplePrologEpilogsThenReturnSuccess) { ze_result_t result; // initialize initial argument set event_pool_close_ipc_handle_args.hEventPool0 = generateRandomHandle(); // initialize replacement argument set event_pool_close_ipc_handle_args.hEventPool1 = generateRandomHandle(); // initialize user instance data event_pool_close_ipc_handle_args.instanceData0 = generateRandomHandle(); event_pool_close_ipc_handle_args.instanceData3 = generateRandomHandle(); driver_ddiTable.core_ddiTable.EventPool.pfnCloseIpcHandle = [](ze_event_pool_handle_t hEventPool) { EXPECT_EQ(event_pool_close_ipc_handle_args.hEventPool1, hEventPool); return ZE_RESULT_SUCCESS; }; // // The 0th prolog replaces the orignal API arguments with a new set // Create instance data, pass it to corresponding epilog. // prologCbs0.EventPool.pfnCloseIpcHandleCb = [](ze_event_pool_close_ipc_handle_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(event_pool_close_ipc_handle_args.hEventPool0, *params->phEventPool); *params->phEventPool = event_pool_close_ipc_handle_args.hEventPool1; ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 1); *val += 1; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = event_pool_close_ipc_handle_args.instanceData0; *ppTracerInstanceUserData = instanceData; }; // // The 0th epilog expects to see the API argument replacements // Expect to receive instance data from corresponding prolog // epilogCbs0.EventPool.pfnCloseIpcHandleCb = [](ze_event_pool_close_ipc_handle_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(event_pool_close_ipc_handle_args.hEventPool1, *params->phEventPool); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 2); *val += 1; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, event_pool_close_ipc_handle_args.instanceData0); delete instanceData; }; // // The 1st prolog sees the arguments as replaced by the 0th prolog. // There is no epilog for this prolog, so don't allocate instance data // prologCbs1.EventPool.pfnCloseIpcHandleCb = [](ze_event_pool_close_ipc_handle_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(event_pool_close_ipc_handle_args.hEventPool1, *params->phEventPool); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 11); *val += 11; }; // // The 2nd epilog expects to see the API argument replacements // There is no corresponding prolog, so there is no instance data // epilogCbs2.EventPool.pfnCloseIpcHandleCb = [](ze_event_pool_close_ipc_handle_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(event_pool_close_ipc_handle_args.hEventPool1, *params->phEventPool); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 21); *val += 21; }; // // The 3rd prolog expects to see the API argument replacements and doesn't modify them // Create instance data and pass to corresponding epilog // prologCbs3.EventPool.pfnCloseIpcHandleCb = [](ze_event_pool_close_ipc_handle_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(event_pool_close_ipc_handle_args.hEventPool1, *params->phEventPool); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 31); *val += 31; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = event_pool_close_ipc_handle_args.instanceData3; *ppTracerInstanceUserData = instanceData; }; // // The 3rd epilog expects to see the API argument replacements // Expect to see instance data from corresponding prolog // epilogCbs3.EventPool.pfnCloseIpcHandleCb = [](ze_event_pool_close_ipc_handle_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(event_pool_close_ipc_handle_args.hEventPool1, *params->phEventPool); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 62); *val += 31; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, event_pool_close_ipc_handle_args.instanceData3); delete instanceData; }; setTracerCallbacksAndEnableTracer(); result = zeEventPoolCloseIpcHandle_Tracing(event_pool_close_ipc_handle_args.hEventPool0); EXPECT_EQ(ZE_RESULT_SUCCESS, result); validateDefaultUserDataFinal(); } // Command List API with Events struct { ze_command_list_handle_t hCommandList0; ze_event_handle_t hEvent0; ze_command_list_handle_t hCommandList1; ze_event_handle_t hEvent1; void *instanceData0; void *instanceData3; } command_list_append_signal_event_args; TEST_F(zeAPITracingRuntimeMultipleArgumentsTests, WhenCallingCommandListAppendSignalEventTracingWrapperWithMultiplePrologEpilogsThenReturnSuccess) { ze_result_t result; // initialize initial argument set command_list_append_signal_event_args.hCommandList0 = generateRandomHandle(); command_list_append_signal_event_args.hEvent0 = generateRandomHandle(); // initialize replacement argument set command_list_append_signal_event_args.hCommandList1 = generateRandomHandle(); command_list_append_signal_event_args.hEvent1 = generateRandomHandle(); // initialize user instance data command_list_append_signal_event_args.instanceData0 = generateRandomHandle(); command_list_append_signal_event_args.instanceData3 = generateRandomHandle(); driver_ddiTable.core_ddiTable.CommandList.pfnAppendSignalEvent = [](ze_command_list_handle_t hCommandList, ze_event_handle_t hEvent) { EXPECT_EQ(command_list_append_signal_event_args.hCommandList1, hCommandList); EXPECT_EQ(command_list_append_signal_event_args.hEvent1, hEvent); return ZE_RESULT_SUCCESS; }; // // The 0th prolog replaces the orignal API arguments with a new set // Create instance data, pass it to corresponding epilog. // prologCbs0.CommandList.pfnAppendSignalEventCb = [](ze_command_list_append_signal_event_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(command_list_append_signal_event_args.hCommandList0, *params->phCommandList); EXPECT_EQ(command_list_append_signal_event_args.hEvent0, *params->phEvent); *params->phCommandList = command_list_append_signal_event_args.hCommandList1; *params->phEvent = command_list_append_signal_event_args.hEvent1; ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 1); *val += 1; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = command_list_append_signal_event_args.instanceData0; *ppTracerInstanceUserData = instanceData; }; // // The 0th epilog expects to see the API argument replacements // Expect to receive instance data from corresponding prolog // epilogCbs0.CommandList.pfnAppendSignalEventCb = [](ze_command_list_append_signal_event_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(command_list_append_signal_event_args.hCommandList1, *params->phCommandList); EXPECT_EQ(command_list_append_signal_event_args.hEvent1, *params->phEvent); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 2); *val += 1; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, command_list_append_signal_event_args.instanceData0); delete instanceData; }; // // The 1st prolog sees the arguments as replaced by the 0th prolog. // There is no epilog for this prolog, so don't allocate instance data // prologCbs1.CommandList.pfnAppendSignalEventCb = [](ze_command_list_append_signal_event_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(command_list_append_signal_event_args.hCommandList1, *params->phCommandList); EXPECT_EQ(command_list_append_signal_event_args.hEvent1, *params->phEvent); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 11); *val += 11; }; // // The 2nd epilog expects to see the API argument replacements // There is no corresponding prolog, so there is no instance data // epilogCbs2.CommandList.pfnAppendSignalEventCb = [](ze_command_list_append_signal_event_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(command_list_append_signal_event_args.hCommandList1, *params->phCommandList); EXPECT_EQ(command_list_append_signal_event_args.hEvent1, *params->phEvent); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 21); *val += 21; }; // // The 3rd prolog expects to see the API argument replacements and doesn't modify them // Allocate instance data and pass to corresponding epilog // prologCbs3.CommandList.pfnAppendSignalEventCb = [](ze_command_list_append_signal_event_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(command_list_append_signal_event_args.hCommandList1, *params->phCommandList); EXPECT_EQ(command_list_append_signal_event_args.hEvent1, *params->phEvent); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 31); *val += 31; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = command_list_append_signal_event_args.instanceData0; *ppTracerInstanceUserData = instanceData; }; epilogCbs3.CommandList.pfnAppendSignalEventCb = [](ze_command_list_append_signal_event_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(command_list_append_signal_event_args.hCommandList1, *params->phCommandList); EXPECT_EQ(command_list_append_signal_event_args.hEvent1, *params->phEvent); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 62); *val += 31; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, command_list_append_signal_event_args.instanceData0); delete instanceData; }; setTracerCallbacksAndEnableTracer(); result = zeCommandListAppendSignalEvent_Tracing(command_list_append_signal_event_args.hCommandList0, command_list_append_signal_event_args.hEvent0); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(ZE_RESULT_SUCCESS, result); validateDefaultUserDataFinal(); } #define NUM_COMMAND_LIST_APPEND_WAIT_ON_EVENTS_0 2 #define NUM_COMMAND_LIST_APPEND_WAIT_ON_EVENTS_1 4 struct { ze_command_list_handle_t hCommandList0; uint32_t numEvents0 = NUM_COMMAND_LIST_APPEND_WAIT_ON_EVENTS_0; ze_event_handle_t hEvents0[NUM_COMMAND_LIST_APPEND_WAIT_ON_EVENTS_0]; ze_command_list_handle_t hCommandList1; uint32_t numEvents1 = NUM_COMMAND_LIST_APPEND_WAIT_ON_EVENTS_1; ze_event_handle_t hEvents1[NUM_COMMAND_LIST_APPEND_WAIT_ON_EVENTS_1]; void *instanceData0; void *instanceData3; } command_list_append_wait_on_events_args; TEST_F(zeAPITracingRuntimeMultipleArgumentsTests, WhenCallingCommandListAppendWaitOnEventsTracingWrapperWithMultiplePrologEpilogsThenReturnSuccess) { ze_result_t result; // initialize initial argument set command_list_append_wait_on_events_args.hCommandList0 = generateRandomHandle(); for (int i = 0; i < NUM_COMMAND_LIST_APPEND_WAIT_ON_EVENTS_0; i++) { command_list_append_wait_on_events_args.hEvents0[i] = generateRandomHandle(); } // initialize replacement argument set command_list_append_wait_on_events_args.hCommandList1 = generateRandomHandle(); for (int i = 0; i < NUM_COMMAND_LIST_APPEND_WAIT_ON_EVENTS_1; i++) { command_list_append_wait_on_events_args.hEvents1[i] = generateRandomHandle(); } // initialize user instance data command_list_append_wait_on_events_args.instanceData0 = generateRandomHandle(); command_list_append_wait_on_events_args.instanceData3 = generateRandomHandle(); driver_ddiTable.core_ddiTable.CommandList.pfnAppendWaitOnEvents = [](ze_command_list_handle_t hCommandList, uint32_t numEvents, ze_event_handle_t *phEvents) { EXPECT_EQ(command_list_append_wait_on_events_args.hCommandList1, hCommandList); EXPECT_EQ(numEvents, (uint32_t)NUM_COMMAND_LIST_APPEND_WAIT_ON_EVENTS_1); for (int i = 0; i < NUM_COMMAND_LIST_APPEND_WAIT_ON_EVENTS_1; i++) { EXPECT_EQ(command_list_append_wait_on_events_args.hEvents1[i], phEvents[i]); } return ZE_RESULT_SUCCESS; }; // // The 0th prolog replaces the orignal API arguments with a new set // Create instance data, pass it to corresponding epilog. // prologCbs0.CommandList.pfnAppendWaitOnEventsCb = [](ze_command_list_append_wait_on_events_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(command_list_append_wait_on_events_args.hCommandList0, *params->phCommandList); EXPECT_EQ((uint32_t)NUM_COMMAND_LIST_APPEND_WAIT_ON_EVENTS_0, *params->pnumEvents); for (int i = 0; i < NUM_COMMAND_LIST_APPEND_WAIT_ON_EVENTS_0; i++) { EXPECT_EQ(command_list_append_wait_on_events_args.hEvents0[i], (*(params->pphEvents))[i]); } *params->phCommandList = command_list_append_wait_on_events_args.hCommandList1; *params->pnumEvents = NUM_COMMAND_LIST_APPEND_WAIT_ON_EVENTS_1; *params->pphEvents = command_list_append_wait_on_events_args.hEvents1; ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 1); *val += 1; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = command_list_append_wait_on_events_args.instanceData0; *ppTracerInstanceUserData = instanceData; }; // // The 0th epilog expects to see the API argument replacements // Expect to receive instance data from corresponding prolog // epilogCbs0.CommandList.pfnAppendWaitOnEventsCb = [](ze_command_list_append_wait_on_events_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(command_list_append_wait_on_events_args.hCommandList1, *params->phCommandList); EXPECT_EQ((uint32_t)NUM_COMMAND_LIST_APPEND_WAIT_ON_EVENTS_1, *params->pnumEvents); for (int i = 0; i < NUM_COMMAND_LIST_APPEND_WAIT_ON_EVENTS_1; i++) { EXPECT_EQ(command_list_append_wait_on_events_args.hEvents1[i], (*(params->pphEvents))[i]); } ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 2); *val += 1; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, command_list_append_wait_on_events_args.instanceData0); delete instanceData; }; // // The 1st prolog sees the arguments as replaced by the 0th prolog. // There is no epilog for this prolog, so don't allocate instance data // prologCbs1.CommandList.pfnAppendWaitOnEventsCb = [](ze_command_list_append_wait_on_events_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(command_list_append_wait_on_events_args.hCommandList1, *params->phCommandList); EXPECT_EQ((uint32_t)NUM_COMMAND_LIST_APPEND_WAIT_ON_EVENTS_1, *params->pnumEvents); for (int i = 0; i < NUM_COMMAND_LIST_APPEND_WAIT_ON_EVENTS_1; i++) { EXPECT_EQ(command_list_append_wait_on_events_args.hEvents1[i], (*(params->pphEvents))[i]); } ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 11); *val += 11; }; // // The 2nd epilog expects to see the API argument replacements // There is no corresponding prolog, so there is no instance data // epilogCbs2.CommandList.pfnAppendWaitOnEventsCb = [](ze_command_list_append_wait_on_events_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(command_list_append_wait_on_events_args.hCommandList1, *params->phCommandList); EXPECT_EQ((uint32_t)NUM_COMMAND_LIST_APPEND_WAIT_ON_EVENTS_1, *params->pnumEvents); for (int i = 0; i < NUM_COMMAND_LIST_APPEND_WAIT_ON_EVENTS_1; i++) { EXPECT_EQ(command_list_append_wait_on_events_args.hEvents1[i], (*(params->pphEvents))[i]); } ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 21); *val += 21; }; // // The 3rd prolog expects to see the API argument replacements and doesn't modify them // Allocate instance data and pass to corresponding epilog // prologCbs3.CommandList.pfnAppendWaitOnEventsCb = [](ze_command_list_append_wait_on_events_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(command_list_append_wait_on_events_args.hCommandList1, *params->phCommandList); EXPECT_EQ((uint32_t)NUM_COMMAND_LIST_APPEND_WAIT_ON_EVENTS_1, *params->pnumEvents); for (int i = 0; i < NUM_COMMAND_LIST_APPEND_WAIT_ON_EVENTS_1; i++) { EXPECT_EQ(command_list_append_wait_on_events_args.hEvents1[i], (*(params->pphEvents))[i]); } ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 31); *val += 31; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = command_list_append_wait_on_events_args.instanceData0; *ppTracerInstanceUserData = instanceData; }; epilogCbs3.CommandList.pfnAppendWaitOnEventsCb = [](ze_command_list_append_wait_on_events_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(command_list_append_wait_on_events_args.hCommandList1, *params->phCommandList); EXPECT_EQ((uint32_t)NUM_COMMAND_LIST_APPEND_WAIT_ON_EVENTS_1, *params->pnumEvents); for (int i = 0; i < NUM_COMMAND_LIST_APPEND_WAIT_ON_EVENTS_1; i++) { EXPECT_EQ(command_list_append_wait_on_events_args.hEvents1[i], (*(params->pphEvents))[i]); } ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 62); *val += 31; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, command_list_append_wait_on_events_args.instanceData0); delete instanceData; }; setTracerCallbacksAndEnableTracer(); result = zeCommandListAppendWaitOnEvents_Tracing(command_list_append_wait_on_events_args.hCommandList0, NUM_COMMAND_LIST_APPEND_WAIT_ON_EVENTS_0, command_list_append_wait_on_events_args.hEvents0); EXPECT_EQ(ZE_RESULT_SUCCESS, result); validateDefaultUserDataFinal(); } } // namespace ult } // namespace L0 test_event_api_tracing.cpp000066400000000000000000000235271422164147700356650ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/experimental/test/unit_tests/sources/tracing/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "test_api_tracing_common.h" namespace L0 { namespace ult { TEST_F(zeAPITracingRuntimeTests, WhenCallingEventCreateTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Event.pfnCreate = [](ze_event_pool_handle_t hEventPool, const ze_event_desc_t *desc, ze_event_handle_t *phEvent) { return ZE_RESULT_SUCCESS; }; ze_event_handle_t event = {}; ze_event_desc_t desc = {}; prologCbs.Event.pfnCreateCb = genericPrologCallbackPtr; epilogCbs.Event.pfnCreateCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeEventCreate_Tracing(nullptr, &desc, &event); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingEventDestroyTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Event.pfnDestroy = [](ze_event_handle_t hEvent) { return ZE_RESULT_SUCCESS; }; prologCbs.Event.pfnDestroyCb = genericPrologCallbackPtr; epilogCbs.Event.pfnDestroyCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeEventDestroy_Tracing(nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingEventHostSignalTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Event.pfnHostSignal = [](ze_event_handle_t hEvent) { return ZE_RESULT_SUCCESS; }; prologCbs.Event.pfnHostSignalCb = genericPrologCallbackPtr; epilogCbs.Event.pfnHostSignalCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeEventHostSignal_Tracing(nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingEventHostSynchronizeTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Event.pfnHostSynchronize = [](ze_event_handle_t hEvent, uint64_t timeout) { return ZE_RESULT_SUCCESS; }; prologCbs.Event.pfnHostSynchronizeCb = genericPrologCallbackPtr; epilogCbs.Event.pfnHostSynchronizeCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeEventHostSynchronize_Tracing(nullptr, 1U); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingEventQueryStatusTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Event.pfnQueryStatus = [](ze_event_handle_t hEvent) { return ZE_RESULT_SUCCESS; }; prologCbs.Event.pfnQueryStatusCb = genericPrologCallbackPtr; epilogCbs.Event.pfnQueryStatusCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeEventQueryStatus_Tracing(nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingEventHostResetTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Event.pfnHostReset = [](ze_event_handle_t hEvent) { return ZE_RESULT_SUCCESS; }; prologCbs.Event.pfnHostResetCb = genericPrologCallbackPtr; epilogCbs.Event.pfnHostResetCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeEventHostReset_Tracing(nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingEventPoolCreateTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.EventPool.pfnCreate = [](ze_context_handle_t hContext, const ze_event_pool_desc_t *desc, uint32_t numDevices, ze_device_handle_t *phDevices, ze_event_pool_handle_t *phEventPool) { return ZE_RESULT_SUCCESS; }; prologCbs.EventPool.pfnCreateCb = genericPrologCallbackPtr; epilogCbs.EventPool.pfnCreateCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeEventPoolCreate_Tracing(nullptr, nullptr, 1U, nullptr, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingEventPoolDestroyTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.EventPool.pfnDestroy = [](ze_event_pool_handle_t hEventPool) { return ZE_RESULT_SUCCESS; }; prologCbs.EventPool.pfnDestroyCb = genericPrologCallbackPtr; epilogCbs.EventPool.pfnDestroyCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeEventPoolDestroy_Tracing(nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingEventPoolGetIpcHandleTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.EventPool.pfnGetIpcHandle = [](ze_event_pool_handle_t hEventPool, ze_ipc_event_pool_handle_t *phIpc) { return ZE_RESULT_SUCCESS; }; ze_ipc_event_pool_handle_t phIpc; prologCbs.EventPool.pfnGetIpcHandleCb = genericPrologCallbackPtr; epilogCbs.EventPool.pfnGetIpcHandleCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeEventPoolGetIpcHandle_Tracing(nullptr, &phIpc); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingEventPoolOpenIpcHandleTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.EventPool.pfnOpenIpcHandle = [](ze_context_handle_t hDriver, ze_ipc_event_pool_handle_t hIpc, ze_event_pool_handle_t *phEventPool) { return ZE_RESULT_SUCCESS; }; ze_ipc_event_pool_handle_t hIpc = {}; ze_event_pool_handle_t phEventPool; prologCbs.EventPool.pfnOpenIpcHandleCb = genericPrologCallbackPtr; epilogCbs.EventPool.pfnOpenIpcHandleCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeEventPoolOpenIpcHandle_Tracing(nullptr, hIpc, &phEventPool); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingEventPoolCloseIpcHandleTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.EventPool.pfnCloseIpcHandle = [](ze_event_pool_handle_t hEventPool) { return ZE_RESULT_SUCCESS; }; prologCbs.EventPool.pfnCloseIpcHandleCb = genericPrologCallbackPtr; epilogCbs.EventPool.pfnCloseIpcHandleCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeEventPoolCloseIpcHandle_Tracing(nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } // Command List API with Events TEST_F(zeAPITracingRuntimeTests, WhenCallingCommandListAppendSignalEventTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.CommandList.pfnAppendSignalEvent = [](ze_command_list_handle_t hCommandList, ze_event_handle_t hEvent) { return ZE_RESULT_SUCCESS; }; prologCbs.CommandList.pfnAppendSignalEventCb = genericPrologCallbackPtr; epilogCbs.CommandList.pfnAppendSignalEventCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeCommandListAppendSignalEvent_Tracing(nullptr, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingCommandListAppendWaitOnEventsTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.CommandList.pfnAppendWaitOnEvents = [](ze_command_list_handle_t hCommandList, uint32_t numEvents, ze_event_handle_t *phEvents) { return ZE_RESULT_SUCCESS; }; ze_event_handle_t phEvents = {}; prologCbs.CommandList.pfnAppendWaitOnEventsCb = genericPrologCallbackPtr; epilogCbs.CommandList.pfnAppendWaitOnEventsCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeCommandListAppendWaitOnEvents_Tracing(nullptr, 1, &phEvents); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingCommandListAppendEventResetTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.CommandList.pfnAppendEventReset = [](ze_command_list_handle_t hCommandList, ze_event_handle_t hEvent) { return ZE_RESULT_SUCCESS; }; prologCbs.CommandList.pfnAppendEventResetCb = genericPrologCallbackPtr; epilogCbs.CommandList.pfnAppendEventResetCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeCommandListAppendEventReset_Tracing(nullptr, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingEventQueryKernelTimestampTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Event.pfnQueryKernelTimestamp = [](ze_event_handle_t hEvent, ze_kernel_timestamp_result_t *dstptr) { return ZE_RESULT_SUCCESS; }; prologCbs.Event.pfnQueryKernelTimestampCb = genericPrologCallbackPtr; epilogCbs.Event.pfnQueryKernelTimestampCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeEventQueryKernelTimestamp_Tracing(nullptr, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } // See test_event_api_multi_tracing.cpp for more tests } // namespace ult } // namespace L0 test_fence_api_tracing.cpp000066400000000000000000001101041422164147700356100ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/experimental/test/unit_tests/sources/tracing/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "test_api_tracing_common.h" namespace L0 { namespace ult { TEST_F(zeAPITracingRuntimeTests, WhenCallingFenceCreateTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Fence.pfnCreate = [](ze_command_queue_handle_t hCommandQueue, const ze_fence_desc_t *desc, ze_fence_handle_t *phFence) { return ZE_RESULT_SUCCESS; }; ze_fence_handle_t fence = {}; ze_fence_desc_t desc = {}; prologCbs.Fence.pfnCreateCb = genericPrologCallbackPtr; epilogCbs.Fence.pfnCreateCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeFenceCreate_Tracing(nullptr, &desc, &fence); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingFenceDestroyTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Fence.pfnDestroy = [](ze_fence_handle_t hFence) { return ZE_RESULT_SUCCESS; }; prologCbs.Fence.pfnDestroyCb = genericPrologCallbackPtr; epilogCbs.Fence.pfnDestroyCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeFenceDestroy_Tracing(nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingFenceHostSynchronizeTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Fence.pfnHostSynchronize = [](ze_fence_handle_t hFence, uint64_t timeout) { return ZE_RESULT_SUCCESS; }; prologCbs.Fence.pfnHostSynchronizeCb = genericPrologCallbackPtr; epilogCbs.Fence.pfnHostSynchronizeCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeFenceHostSynchronize_Tracing(nullptr, 1U); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingFenceQueryStatusTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Fence.pfnQueryStatus = [](ze_fence_handle_t hFence) { return ZE_RESULT_SUCCESS; }; prologCbs.Fence.pfnQueryStatusCb = genericPrologCallbackPtr; epilogCbs.Fence.pfnQueryStatusCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeFenceQueryStatus_Tracing(nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingFenceResetTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Fence.pfnReset = [](ze_fence_handle_t hFence) { return ZE_RESULT_SUCCESS; }; prologCbs.Fence.pfnResetCb = genericPrologCallbackPtr; epilogCbs.Fence.pfnResetCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeFenceReset_Tracing(nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } struct { ze_command_queue_handle_t hCommandQueue0; ze_fence_desc_t desc0{}; ze_fence_handle_t hFence0; ze_command_queue_handle_t hCommandQueue1; ze_fence_desc_t desc1{}; ze_fence_handle_t hFence1; ze_fence_handle_t hFenceAPI; void *instanceData0; void *instanceData3; } fence_create_args; TEST_F(zeAPITracingRuntimeMultipleArgumentsTests, WhenCallingFenceCreateTracingWrapperWithMultiplePrologEpilogsThenReturnSuccess) { ze_result_t result; // initialize initial argument set fence_create_args.hCommandQueue0 = generateRandomHandle(); fence_create_args.hFence0 = generateRandomHandle(); // initialize replacement argument set fence_create_args.hCommandQueue1 = generateRandomHandle(); fence_create_args.hFence1 = generateRandomHandle(); // initialize user instance data fence_create_args.instanceData0 = generateRandomHandle(); fence_create_args.instanceData3 = generateRandomHandle(); driver_ddiTable.core_ddiTable.Fence.pfnCreate = [](ze_command_queue_handle_t hCommandQueue, const ze_fence_desc_t *desc, ze_fence_handle_t *phFence) { EXPECT_EQ(fence_create_args.hCommandQueue1, hCommandQueue); EXPECT_EQ(&fence_create_args.desc1, desc); EXPECT_EQ(&fence_create_args.hFence1, phFence); EXPECT_EQ(fence_create_args.hFence1, *phFence); fence_create_args.hFenceAPI = generateRandomHandle(); *phFence = fence_create_args.hFenceAPI; return ZE_RESULT_SUCCESS; }; // // The 0th prolog replaces the orignal API arguments with a new set // Create instance data, pass it to corresponding epilog. // prologCbs0.Fence.pfnCreateCb = [](ze_fence_create_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(fence_create_args.hCommandQueue0, *params->phCommandQueue); EXPECT_EQ(&fence_create_args.desc0, *params->pdesc); EXPECT_EQ(&fence_create_args.hFence0, *params->pphFence); ze_fence_handle_t **ppHandle; ASSERT_NE(nullptr, params); ppHandle = params->pphFence; ze_fence_handle_t *pHandle; ASSERT_NE(nullptr, ppHandle); pHandle = *ppHandle; ze_fence_handle_t handle; ASSERT_NE(nullptr, pHandle); handle = *pHandle; EXPECT_EQ(fence_create_args.hFence0, handle); *params->phCommandQueue = fence_create_args.hCommandQueue1; *params->pdesc = &fence_create_args.desc1; *params->pphFence = &fence_create_args.hFence1; ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 1); *val += 1; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = fence_create_args.instanceData0; *ppTracerInstanceUserData = instanceData; }; // // The 0th epilog expects to see the API argument replacements // Expect to receive instance data from corresponding prolog // epilogCbs0.Fence.pfnCreateCb = [](ze_fence_create_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(fence_create_args.hCommandQueue1, *params->phCommandQueue); EXPECT_EQ(&fence_create_args.desc1, *params->pdesc); EXPECT_EQ(&fence_create_args.hFence1, *params->pphFence); ze_fence_handle_t **ppHandle; ASSERT_NE(nullptr, params); ppHandle = params->pphFence; ze_fence_handle_t *pHandle; ASSERT_NE(nullptr, ppHandle); pHandle = *ppHandle; ze_fence_handle_t handle; ASSERT_NE(nullptr, pHandle); handle = *pHandle; EXPECT_EQ(fence_create_args.hFence1, handle); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 2); *val += 1; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, fence_create_args.instanceData0); delete instanceData; }; // // The 1st prolog sees the arguments as replaced by the 0th prolog. // There is no epilog for this prolog, so don't allocate instance data // prologCbs1.Fence.pfnCreateCb = [](ze_fence_create_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(fence_create_args.hCommandQueue1, *params->phCommandQueue); EXPECT_EQ(&fence_create_args.desc1, *params->pdesc); EXPECT_EQ(&fence_create_args.hFence1, *params->pphFence); ze_fence_handle_t **ppHandle; ASSERT_NE(nullptr, params); ppHandle = params->pphFence; ze_fence_handle_t *pHandle; ASSERT_NE(nullptr, ppHandle); pHandle = *ppHandle; ze_fence_handle_t handle; ASSERT_NE(nullptr, pHandle); handle = *pHandle; EXPECT_EQ(fence_create_args.hFence1, handle); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 11); *val += 11; }; // // The 2nd epilog expects to see the API argument replacements // There is no corresponding prolog, so there is no instance data // epilogCbs2.Fence.pfnCreateCb = [](ze_fence_create_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(fence_create_args.hCommandQueue1, *params->phCommandQueue); EXPECT_EQ(&fence_create_args.desc1, *params->pdesc); EXPECT_EQ(&fence_create_args.hFence1, *params->pphFence); ze_fence_handle_t **ppHandle; ASSERT_NE(nullptr, params); ppHandle = params->pphFence; ze_fence_handle_t *pHandle; ASSERT_NE(nullptr, ppHandle); pHandle = *ppHandle; ze_fence_handle_t handle; ASSERT_NE(nullptr, pHandle); handle = *pHandle; EXPECT_EQ(fence_create_args.hFence1, handle); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 21); *val += 21; }; // // The 3rd prolog expects to see the API argument replacements and doesn't modify them // Create instance data and pass to corresponding epilog // prologCbs3.Fence.pfnCreateCb = [](ze_fence_create_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(fence_create_args.hCommandQueue1, *params->phCommandQueue); EXPECT_EQ(&fence_create_args.desc1, *params->pdesc); EXPECT_EQ(&fence_create_args.hFence1, *params->pphFence); ze_fence_handle_t **ppHandle; ASSERT_NE(nullptr, params); ppHandle = params->pphFence; ze_fence_handle_t *pHandle; ASSERT_NE(nullptr, ppHandle); pHandle = *ppHandle; ze_fence_handle_t handle; ASSERT_NE(nullptr, pHandle); handle = *pHandle; EXPECT_EQ(fence_create_args.hFence1, handle); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 31); *val += 31; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = fence_create_args.instanceData3; *ppTracerInstanceUserData = instanceData; }; // // The 3rd epilog expects to see the API argument replacements // Expect to see instance data from corresponding prolog // epilogCbs3.Fence.pfnCreateCb = [](ze_fence_create_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(fence_create_args.hCommandQueue1, *params->phCommandQueue); EXPECT_EQ(&fence_create_args.desc1, *params->pdesc); EXPECT_EQ(&fence_create_args.hFence1, *params->pphFence); ze_fence_handle_t **ppHandle; ASSERT_NE(nullptr, params); ppHandle = params->pphFence; ze_fence_handle_t *pHandle; ASSERT_NE(nullptr, ppHandle); pHandle = *ppHandle; ze_fence_handle_t handle; ASSERT_NE(nullptr, pHandle); handle = *pHandle; ASSERT_NE(nullptr, handle); EXPECT_EQ(fence_create_args.hFence1, handle); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 62); *val += 31; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, fence_create_args.instanceData3); delete instanceData; }; setTracerCallbacksAndEnableTracer(); result = zeFenceCreate_Tracing(fence_create_args.hCommandQueue0, &fence_create_args.desc0, &fence_create_args.hFence0); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(fence_create_args.hFence1, fence_create_args.hFenceAPI); validateDefaultUserDataFinal(); } struct { ze_fence_handle_t hFence0; ze_fence_handle_t hFence1; void *instanceData0; void *instanceData3; } fence_destroy_args; TEST_F(zeAPITracingRuntimeMultipleArgumentsTests, WhenCallingFenceDestroyTracingWrapperWithMultiplePrologEpilogsThenReturnSuccess) { ze_result_t result; // initialize initial argument set fence_destroy_args.hFence0 = generateRandomHandle(); // initialize replacement argument set fence_destroy_args.hFence1 = generateRandomHandle(); // initialize user instance data fence_destroy_args.instanceData0 = generateRandomHandle(); fence_destroy_args.instanceData3 = generateRandomHandle(); driver_ddiTable.core_ddiTable.Fence.pfnDestroy = [](ze_fence_handle_t hFence) { EXPECT_EQ(fence_destroy_args.hFence1, hFence); return ZE_RESULT_SUCCESS; }; // // The 0th prolog replaces the orignal API arguments with a new set // Create instance data, pass it to corresponding epilog. // prologCbs0.Fence.pfnDestroyCb = [](ze_fence_destroy_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(fence_destroy_args.hFence0, *params->phFence); *params->phFence = fence_destroy_args.hFence1; ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 1); *val += 1; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = fence_destroy_args.instanceData0; *ppTracerInstanceUserData = instanceData; }; // // The 0th epilog expects to see the API argument replacements // Expect to receive instance data from corresponding prolog // epilogCbs0.Fence.pfnDestroyCb = [](ze_fence_destroy_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(fence_destroy_args.hFence1, *params->phFence); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 2); *val += 1; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, fence_destroy_args.instanceData0); delete instanceData; }; // // The 1st prolog sees the arguments as replaced by the 0th prolog. // There is no epilog for this prolog, so don't allocate instance data // prologCbs1.Fence.pfnDestroyCb = [](ze_fence_destroy_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(fence_destroy_args.hFence1, *params->phFence); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 11); *val += 11; }; // // The 2nd epilog expects to see the API argument replacements // There is no corresponding prolog, so there is no instance data // epilogCbs2.Fence.pfnDestroyCb = [](ze_fence_destroy_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(fence_destroy_args.hFence1, *params->phFence); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 21); *val += 21; }; // // The 3rd prolog expects to see the API argument replacements and doesn't modify them // Create instance data and pass to corresponding epilog // prologCbs3.Fence.pfnDestroyCb = [](ze_fence_destroy_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(fence_destroy_args.hFence1, *params->phFence); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 31); *val += 31; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = fence_destroy_args.instanceData3; *ppTracerInstanceUserData = instanceData; }; // // The 3rd epilog expects to see the API argument replacements // Expect to see instance data from corresponding prolog // epilogCbs3.Fence.pfnDestroyCb = [](ze_fence_destroy_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(fence_destroy_args.hFence1, *params->phFence); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 62); *val += 31; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, fence_destroy_args.instanceData3); delete instanceData; }; setTracerCallbacksAndEnableTracer(); result = zeFenceDestroy_Tracing(fence_destroy_args.hFence0); EXPECT_EQ(ZE_RESULT_SUCCESS, result); validateDefaultUserDataFinal(); } struct { ze_fence_handle_t hFence0; uint64_t timeout0; ze_fence_handle_t hFence1; uint64_t timeout1; void *instanceData0; void *instanceData3; } fence_host_synchronize_args; TEST_F(zeAPITracingRuntimeMultipleArgumentsTests, WhenCallingFenceHostSynchronizeTracingWrapperWithMultiplePrologEpilogsThenReturnSuccess) { ze_result_t result; // initialize initial argument set fence_host_synchronize_args.hFence0 = generateRandomHandle(); fence_host_synchronize_args.timeout0 = generateRandomSize(); // initialize replacement argument set fence_host_synchronize_args.hFence1 = generateRandomHandle(); fence_host_synchronize_args.timeout1 = generateRandomSize(); // initialize user instance data fence_host_synchronize_args.instanceData0 = generateRandomHandle(); fence_host_synchronize_args.instanceData3 = generateRandomHandle(); driver_ddiTable.core_ddiTable.Fence.pfnHostSynchronize = [](ze_fence_handle_t hFence, uint64_t timeout) { EXPECT_EQ(fence_host_synchronize_args.hFence1, hFence); EXPECT_EQ(fence_host_synchronize_args.timeout1, timeout); return ZE_RESULT_SUCCESS; }; // // The 0th prolog replaces the orignal API arguments with a new set // Create instance data, pass it to corresponding epilog. // prologCbs0.Fence.pfnHostSynchronizeCb = [](ze_fence_host_synchronize_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(fence_host_synchronize_args.hFence0, *params->phFence); EXPECT_EQ(fence_host_synchronize_args.timeout0, *params->ptimeout); *params->phFence = fence_host_synchronize_args.hFence1; *params->ptimeout = fence_host_synchronize_args.timeout1; ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 1); *val += 1; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = fence_host_synchronize_args.instanceData0; *ppTracerInstanceUserData = instanceData; }; // // The 0th epilog expects to see the API argument replacements // Expect to receive instance data from corresponding prolog // epilogCbs0.Fence.pfnHostSynchronizeCb = [](ze_fence_host_synchronize_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(fence_host_synchronize_args.hFence1, *params->phFence); EXPECT_EQ(fence_host_synchronize_args.timeout1, *params->ptimeout); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 2); *val += 1; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, fence_host_synchronize_args.instanceData0); delete instanceData; }; // // The 1st prolog sees the arguments as replaced by the 0th prolog. // There is no epilog for this prolog, so don't allocate instance data // prologCbs1.Fence.pfnHostSynchronizeCb = [](ze_fence_host_synchronize_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(fence_host_synchronize_args.hFence1, *params->phFence); EXPECT_EQ(fence_host_synchronize_args.timeout1, *params->ptimeout); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 11); *val += 11; }; // // The 2nd epilog expects to see the API argument replacements // There is no corresponding prolog, so there is no instance data // epilogCbs2.Fence.pfnHostSynchronizeCb = [](ze_fence_host_synchronize_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(fence_host_synchronize_args.hFence1, *params->phFence); EXPECT_EQ(fence_host_synchronize_args.timeout1, *params->ptimeout); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 21); *val += 21; }; // // The 3rd prolog expects to see the API argument replacements and doesn't modify them // Create instance data and pass to corresponding epilog // prologCbs3.Fence.pfnHostSynchronizeCb = [](ze_fence_host_synchronize_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(fence_host_synchronize_args.hFence1, *params->phFence); EXPECT_EQ(fence_host_synchronize_args.timeout1, *params->ptimeout); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 31); *val += 31; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = fence_host_synchronize_args.instanceData3; *ppTracerInstanceUserData = instanceData; }; // // The 3rd epilog expects to see the API argument replacements // Expect to see instance data from corresponding prolog // epilogCbs3.Fence.pfnHostSynchronizeCb = [](ze_fence_host_synchronize_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(fence_host_synchronize_args.hFence1, *params->phFence); EXPECT_EQ(fence_host_synchronize_args.timeout1, *params->ptimeout); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 62); *val += 31; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, fence_host_synchronize_args.instanceData3); delete instanceData; }; setTracerCallbacksAndEnableTracer(); result = zeFenceHostSynchronize_Tracing(fence_host_synchronize_args.hFence0, fence_host_synchronize_args.timeout0); EXPECT_EQ(ZE_RESULT_SUCCESS, result); validateDefaultUserDataFinal(); } struct { ze_fence_handle_t hFence0; ze_fence_handle_t hFence1; void *instanceData0; void *instanceData3; } fence_query_status_args; TEST_F(zeAPITracingRuntimeMultipleArgumentsTests, WhenCallingFenceQueryStatusTracingWrapperWithMultiplePrologEpilogsThenReturnSuccess) { ze_result_t result; // initialize initial argument set fence_query_status_args.hFence0 = generateRandomHandle(); // initialize replacement argument set fence_query_status_args.hFence1 = generateRandomHandle(); // initialize user instance data fence_query_status_args.instanceData0 = generateRandomHandle(); fence_query_status_args.instanceData3 = generateRandomHandle(); driver_ddiTable.core_ddiTable.Fence.pfnQueryStatus = [](ze_fence_handle_t hFence) { EXPECT_EQ(fence_query_status_args.hFence1, hFence); return ZE_RESULT_SUCCESS; }; // // The 0th prolog replaces the orignal API arguments with a new set // Create instance data, pass it to corresponding epilog. // prologCbs0.Fence.pfnQueryStatusCb = [](ze_fence_query_status_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(fence_query_status_args.hFence0, *params->phFence); *params->phFence = fence_query_status_args.hFence1; ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 1); *val += 1; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = fence_query_status_args.instanceData0; *ppTracerInstanceUserData = instanceData; }; // // The 0th epilog expects to see the API argument replacements // Expect to receive instance data from corresponding prolog // epilogCbs0.Fence.pfnQueryStatusCb = [](ze_fence_query_status_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(fence_query_status_args.hFence1, *params->phFence); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 2); *val += 1; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, fence_query_status_args.instanceData0); delete instanceData; }; // // The 1st prolog sees the arguments as replaced by the 0th prolog. // There is no epilog for this prolog, so don't allocate instance data // prologCbs1.Fence.pfnQueryStatusCb = [](ze_fence_query_status_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(fence_query_status_args.hFence1, *params->phFence); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 11); *val += 11; }; // // The 2nd epilog expects to see the API argument replacements // There is no corresponding prolog, so there is no instance data // epilogCbs2.Fence.pfnQueryStatusCb = [](ze_fence_query_status_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(fence_query_status_args.hFence1, *params->phFence); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 21); *val += 21; }; // // The 3rd prolog expects to see the API argument replacements and doesn't modify them // Create instance data and pass to corresponding epilog // prologCbs3.Fence.pfnQueryStatusCb = [](ze_fence_query_status_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(fence_query_status_args.hFence1, *params->phFence); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 31); *val += 31; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = fence_query_status_args.instanceData3; *ppTracerInstanceUserData = instanceData; }; // // The 3rd epilog expects to see the API argument replacements // Expect to see instance data from corresponding prolog // epilogCbs3.Fence.pfnQueryStatusCb = [](ze_fence_query_status_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(fence_query_status_args.hFence1, *params->phFence); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 62); *val += 31; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, fence_query_status_args.instanceData3); delete instanceData; }; setTracerCallbacksAndEnableTracer(); result = zeFenceQueryStatus_Tracing(fence_query_status_args.hFence0); EXPECT_EQ(ZE_RESULT_SUCCESS, result); validateDefaultUserDataFinal(); } struct { ze_fence_handle_t hFence0; ze_fence_handle_t hFence1; void *instanceData0; void *instanceData3; } fence_reset_args; TEST_F(zeAPITracingRuntimeMultipleArgumentsTests, WhenCallingFenceResetTracingWrapperWithMultiplePrologEpilogsThenReturnSuccess) { ze_result_t result; // initialize initial argument set fence_reset_args.hFence0 = generateRandomHandle(); // initialize replacement argument set fence_reset_args.hFence1 = generateRandomHandle(); // initialize user instance data fence_reset_args.instanceData0 = generateRandomHandle(); fence_reset_args.instanceData3 = generateRandomHandle(); driver_ddiTable.core_ddiTable.Fence.pfnReset = [](ze_fence_handle_t hFence) { EXPECT_EQ(fence_reset_args.hFence1, hFence); return ZE_RESULT_SUCCESS; }; // // The 0th prolog replaces the orignal API arguments with a new set // Create instance data, pass it to corresponding epilog. // prologCbs0.Fence.pfnResetCb = [](ze_fence_reset_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(fence_reset_args.hFence0, *params->phFence); *params->phFence = fence_reset_args.hFence1; ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 1); *val += 1; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = fence_reset_args.instanceData0; *ppTracerInstanceUserData = instanceData; }; // // The 0th epilog expects to see the API argument replacements // Expect to receive instance data from corresponding prolog // epilogCbs0.Fence.pfnResetCb = [](ze_fence_reset_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(fence_reset_args.hFence1, *params->phFence); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 2); *val += 1; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, fence_reset_args.instanceData0); delete instanceData; }; // // The 1st prolog sees the arguments as replaced by the 0th prolog. // There is no epilog for this prolog, so don't allocate instance data // prologCbs1.Fence.pfnResetCb = [](ze_fence_reset_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(fence_reset_args.hFence1, *params->phFence); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 11); *val += 11; }; // // The 2nd epilog expects to see the API argument replacements // There is no corresponding prolog, so there is no instance data // epilogCbs2.Fence.pfnResetCb = [](ze_fence_reset_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(fence_reset_args.hFence1, *params->phFence); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 21); *val += 21; }; // // The 3rd prolog expects to see the API argument replacements and doesn't modify them // Create instance data and pass to corresponding epilog // prologCbs3.Fence.pfnResetCb = [](ze_fence_reset_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(fence_reset_args.hFence1, *params->phFence); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 31); *val += 31; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = fence_reset_args.instanceData3; *ppTracerInstanceUserData = instanceData; }; // // The 3rd epilog expects to see the API argument replacements // Expect to see instance data from corresponding prolog // epilogCbs3.Fence.pfnResetCb = [](ze_fence_reset_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(fence_reset_args.hFence1, *params->phFence); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 62); *val += 31; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, fence_reset_args.instanceData3); delete instanceData; }; setTracerCallbacksAndEnableTracer(); result = zeFenceReset_Tracing(fence_reset_args.hFence0); EXPECT_EQ(ZE_RESULT_SUCCESS, result); validateDefaultUserDataFinal(); } } // namespace ult } // namespace L0 test_global_api_tracing.cpp000066400000000000000000000013341422164147700357740ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/experimental/test/unit_tests/sources/tracing/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "test_api_tracing_common.h" namespace L0 { namespace ult { TEST_F(zeAPITracingRuntimeTests, WhenCallingInitTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Global.pfnInit = [](ze_init_flags_t flags) { return ZE_RESULT_SUCCESS; }; prologCbs.Global.pfnInitCb = genericPrologCallbackPtr; epilogCbs.Global.pfnInitCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeInit_Tracing(ZE_INIT_FLAG_GPU_ONLY); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } } // namespace ult } // namespace L0 test_image_api_tracing.cpp000066400000000000000000000631701422164147700356240ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/experimental/test/unit_tests/sources/tracing/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "test_api_tracing_common.h" namespace L0 { namespace ult { TEST_F(zeAPITracingRuntimeTests, WhenCallingImageGetPropertiesTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Image.pfnGetProperties = [](ze_device_handle_t hDevice, const ze_image_desc_t *desc, ze_image_properties_t *pImageProperties) { return ZE_RESULT_SUCCESS; }; const ze_image_desc_t desc = {}; ze_image_properties_t pImageProperties = {}; prologCbs.Image.pfnGetPropertiesCb = genericPrologCallbackPtr; epilogCbs.Image.pfnGetPropertiesCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeImageGetProperties_Tracing(nullptr, &desc, &pImageProperties); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingImageCreateTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Image.pfnCreate = [](ze_context_handle_t hContext, ze_device_handle_t hDevice, const ze_image_desc_t *desc, ze_image_handle_t *phImage) { return ZE_RESULT_SUCCESS; }; const ze_image_desc_t desc = {}; ze_image_handle_t phImage = {}; prologCbs.Image.pfnCreateCb = genericPrologCallbackPtr; epilogCbs.Image.pfnCreateCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeImageCreate_Tracing(nullptr, nullptr, &desc, &phImage); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingImageDestroyTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Image.pfnDestroy = [](ze_image_handle_t hImage) { return ZE_RESULT_SUCCESS; }; prologCbs.Image.pfnDestroyCb = genericPrologCallbackPtr; epilogCbs.Image.pfnDestroyCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeImageDestroy_Tracing(nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } // More complex tracing test. struct { ze_device_handle_t hDevice0; ze_image_desc_t desc0; ze_image_properties_t ImageProperties0; ze_device_handle_t hDevice1; ze_image_desc_t desc1; ze_image_properties_t ImageProperties1; void *instanceData0; void *instanceData3; } ImageGetProperties_args; TEST_F(zeAPITracingRuntimeMultipleArgumentsTests, WhenCallingImageGetPropertiesTracingWrapperWithMultiplePrologEpilogsThenReturnSuccess) { ze_result_t result; // initialize initial argument set ImageGetProperties_args.hDevice0 = generateRandomHandle(); // initialize replacement argument set ImageGetProperties_args.hDevice1 = generateRandomHandle(); // initialize user instance data ImageGetProperties_args.instanceData0 = generateRandomHandle(); ImageGetProperties_args.instanceData3 = generateRandomHandle(); driver_ddiTable.core_ddiTable.Image.pfnGetProperties = [](ze_device_handle_t hDevice, const ze_image_desc_t *desc, ze_image_properties_t *pImageProperties) { EXPECT_EQ(ImageGetProperties_args.hDevice1, hDevice); EXPECT_EQ(&ImageGetProperties_args.desc1, desc); EXPECT_EQ(&ImageGetProperties_args.ImageProperties1, pImageProperties); return ZE_RESULT_SUCCESS; }; // // The 0th prolog replaces the orignal API arguments with a new set // Create instance data, pass it to corresponding epilog. // prologCbs0.Image.pfnGetPropertiesCb = [](ze_image_get_properties_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(ImageGetProperties_args.hDevice0, *params->phDevice); EXPECT_EQ(&ImageGetProperties_args.desc0, *params->pdesc); EXPECT_EQ(&ImageGetProperties_args.ImageProperties0, *params->ppImageProperties); *params->phDevice = ImageGetProperties_args.hDevice1; *params->pdesc = &ImageGetProperties_args.desc1; *params->ppImageProperties = &ImageGetProperties_args.ImageProperties1; ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 1); *val += 1; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = ImageGetProperties_args.instanceData0; *ppTracerInstanceUserData = instanceData; }; // // The 0th epilog expects to see the API argument replacements // Expect to receive instance data from corresponding prolog // epilogCbs0.Image.pfnGetPropertiesCb = [](ze_image_get_properties_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(ImageGetProperties_args.hDevice1, *params->phDevice); EXPECT_EQ(&ImageGetProperties_args.desc1, *params->pdesc); EXPECT_EQ(&ImageGetProperties_args.ImageProperties1, *params->ppImageProperties); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 2); *val += 1; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, ImageGetProperties_args.instanceData0); delete instanceData; }; // // The 1st prolog sees the arguments as replaced by the 0th prolog. // There is no epilog for this prolog, so don't allocate instance data // prologCbs1.Image.pfnGetPropertiesCb = [](ze_image_get_properties_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(ImageGetProperties_args.hDevice1, *params->phDevice); EXPECT_EQ(&ImageGetProperties_args.desc1, *params->pdesc); EXPECT_EQ(&ImageGetProperties_args.ImageProperties1, *params->ppImageProperties); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 11); *val += 11; }; // // The 2nd epilog expects to see the API argument replacements // There is no corresponding prolog, so there is no instance data // epilogCbs2.Image.pfnGetPropertiesCb = [](ze_image_get_properties_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(ImageGetProperties_args.hDevice1, *params->phDevice); EXPECT_EQ(&ImageGetProperties_args.desc1, *params->pdesc); EXPECT_EQ(&ImageGetProperties_args.ImageProperties1, *params->ppImageProperties); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 21); *val += 21; }; // // The 3rd prolog expects to see the API argument replacements and doesn't modify them // Create instance data and pass to corresponding epilog // prologCbs3.Image.pfnGetPropertiesCb = [](ze_image_get_properties_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(ImageGetProperties_args.hDevice1, *params->phDevice); EXPECT_EQ(&ImageGetProperties_args.desc1, *params->pdesc); EXPECT_EQ(&ImageGetProperties_args.ImageProperties1, *params->ppImageProperties); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 31); *val += 31; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = ImageGetProperties_args.instanceData3; *ppTracerInstanceUserData = instanceData; }; // // The 3rd epilog expects to see the API argument replacements // Expect to see instance data from corresponding prolog // epilogCbs3.Image.pfnGetPropertiesCb = [](ze_image_get_properties_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(ImageGetProperties_args.hDevice1, *params->phDevice); EXPECT_EQ(&ImageGetProperties_args.desc1, *params->pdesc); EXPECT_EQ(&ImageGetProperties_args.ImageProperties1, *params->ppImageProperties); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 62); *val += 31; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, ImageGetProperties_args.instanceData3); delete instanceData; }; setTracerCallbacksAndEnableTracer(); result = zeImageGetProperties_Tracing(ImageGetProperties_args.hDevice0, &ImageGetProperties_args.desc0, &ImageGetProperties_args.ImageProperties0); EXPECT_EQ(ZE_RESULT_SUCCESS, result); validateDefaultUserDataFinal(); } struct { ze_context_handle_t hContext0; ze_device_handle_t hDevice0; ze_image_desc_t desc0; ze_image_handle_t hImage0; ze_context_handle_t hContext1; ze_device_handle_t hDevice1; ze_image_desc_t desc1; ze_image_handle_t hImage1; ze_image_handle_t hImageAPI; void *instanceData0; void *instanceData3; } ImageCreate_args; TEST_F(zeAPITracingRuntimeMultipleArgumentsTests, WhenCallingImageCreateTracingWrapperWithMultiplePrologEpilogsThenReturnSuccess) { ze_result_t result; // initialize initial argument set ImageCreate_args.hContext0 = generateRandomHandle(); ImageCreate_args.hDevice0 = generateRandomHandle(); ImageCreate_args.hImage0 = generateRandomHandle(); // initialize replacement argument set ImageCreate_args.hContext1 = generateRandomHandle(); ImageCreate_args.hDevice1 = generateRandomHandle(); ImageCreate_args.hImage1 = generateRandomHandle(); // initialize user instance data ImageCreate_args.instanceData0 = generateRandomHandle(); ImageCreate_args.instanceData3 = generateRandomHandle(); driver_ddiTable.core_ddiTable.Image.pfnCreate = [](ze_context_handle_t hContext, ze_device_handle_t hDevice, const ze_image_desc_t *desc, ze_image_handle_t *phImage) { EXPECT_EQ(ImageCreate_args.hContext1, hContext); EXPECT_EQ(ImageCreate_args.hDevice1, hDevice); EXPECT_EQ(&ImageCreate_args.desc1, desc); EXPECT_EQ(&ImageCreate_args.hImage1, phImage); EXPECT_EQ(ImageCreate_args.hImage1, *phImage); ImageCreate_args.hImageAPI = generateRandomHandle(); *phImage = ImageCreate_args.hImageAPI; return ZE_RESULT_SUCCESS; }; // // The 0th prolog replaces the orignal API arguments with a new set // Create instance data, pass it to corresponding epilog. // prologCbs0.Image.pfnCreateCb = [](ze_image_create_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { ASSERT_NE(nullptr, params); ASSERT_NE(nullptr, params->phContext); ASSERT_NE(nullptr, params->phDevice); ASSERT_NE(nullptr, *params->phContext); ASSERT_NE(nullptr, *params->phDevice); EXPECT_EQ(ImageCreate_args.hContext0, *params->phContext); EXPECT_EQ(ImageCreate_args.hDevice0, *params->phDevice); EXPECT_EQ(&ImageCreate_args.desc0, *params->pdesc); ze_image_handle_t **ppHandle; ASSERT_NE(nullptr, params); ppHandle = params->pphImage; ze_image_handle_t *pHandle; ASSERT_NE(nullptr, ppHandle); pHandle = *ppHandle; EXPECT_EQ(&ImageCreate_args.hImage0, pHandle); ze_image_handle_t handle; ASSERT_NE(nullptr, pHandle); handle = *pHandle; EXPECT_EQ(ImageCreate_args.hImage0, handle); *params->phContext = ImageCreate_args.hContext1; *params->phDevice = ImageCreate_args.hDevice1; *params->pdesc = &ImageCreate_args.desc1; *params->pphImage = &ImageCreate_args.hImage1; ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 1); *val += 1; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = ImageCreate_args.instanceData0; *ppTracerInstanceUserData = instanceData; }; // // The 0th epilog expects to see the API argument replacements // Expect to receive instance data from corresponding prolog // epilogCbs0.Image.pfnCreateCb = [](ze_image_create_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); ASSERT_NE(nullptr, params); ASSERT_NE(nullptr, params->phContext); ASSERT_NE(nullptr, params->phDevice); ASSERT_NE(nullptr, *params->phContext); ASSERT_NE(nullptr, *params->phDevice); EXPECT_EQ(ImageCreate_args.hContext1, *params->phContext); EXPECT_EQ(ImageCreate_args.hDevice1, *params->phDevice); EXPECT_EQ(&ImageCreate_args.desc1, *params->pdesc); ze_image_handle_t **ppHandle; ASSERT_NE(nullptr, params); ppHandle = params->pphImage; ze_image_handle_t *pHandle; ASSERT_NE(nullptr, ppHandle); pHandle = *ppHandle; EXPECT_EQ(&ImageCreate_args.hImage1, pHandle); ze_image_handle_t handle; ASSERT_NE(nullptr, pHandle); handle = *pHandle; EXPECT_EQ(ImageCreate_args.hImage1, handle); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 2); *val += 1; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, ImageCreate_args.instanceData0); delete instanceData; }; // // The 1st prolog sees the arguments as replaced by the 0th prolog. // There is no epilog for this prolog, so don't allocate instance data // prologCbs1.Image.pfnCreateCb = [](ze_image_create_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(ImageCreate_args.hContext1, *params->phContext); EXPECT_EQ(ImageCreate_args.hDevice1, *params->phDevice); EXPECT_EQ(&ImageCreate_args.desc1, *params->pdesc); ze_image_handle_t **ppHandle; ASSERT_NE(nullptr, params); ppHandle = params->pphImage; ze_image_handle_t *pHandle; ASSERT_NE(nullptr, ppHandle); pHandle = *ppHandle; EXPECT_EQ(&ImageCreate_args.hImage1, pHandle); ze_image_handle_t handle; ASSERT_NE(nullptr, pHandle); handle = *pHandle; EXPECT_EQ(ImageCreate_args.hImage1, handle); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 11); *val += 11; }; // // The 2nd epilog expects to see the API argument replacements // There is no corresponding prolog, so there is no instance data // epilogCbs2.Image.pfnCreateCb = [](ze_image_create_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(ImageCreate_args.hContext1, *params->phContext); EXPECT_EQ(ImageCreate_args.hDevice1, *params->phDevice); EXPECT_EQ(&ImageCreate_args.desc1, *params->pdesc); ze_image_handle_t **ppHandle; ASSERT_NE(nullptr, params); ppHandle = params->pphImage; ze_image_handle_t *pHandle; ASSERT_NE(nullptr, ppHandle); pHandle = *ppHandle; EXPECT_EQ(&ImageCreate_args.hImage1, pHandle); ze_image_handle_t handle; ASSERT_NE(nullptr, pHandle); handle = *pHandle; EXPECT_EQ(ImageCreate_args.hImage1, handle); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 21); *val += 21; }; // // The 3rd prolog expects to see the API argument replacements and doesn't modify them // Create instance data and pass to corresponding epilog // prologCbs3.Image.pfnCreateCb = [](ze_image_create_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(ImageCreate_args.hContext1, *params->phContext); EXPECT_EQ(ImageCreate_args.hDevice1, *params->phDevice); EXPECT_EQ(&ImageCreate_args.desc1, *params->pdesc); ze_image_handle_t **ppHandle; ASSERT_NE(nullptr, params); ppHandle = params->pphImage; ze_image_handle_t *pHandle; ASSERT_NE(nullptr, ppHandle); pHandle = *ppHandle; EXPECT_EQ(&ImageCreate_args.hImage1, pHandle); ze_image_handle_t handle; ASSERT_NE(nullptr, pHandle); handle = *pHandle; EXPECT_EQ(ImageCreate_args.hImage1, handle); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 31); *val += 31; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = ImageCreate_args.instanceData3; *ppTracerInstanceUserData = instanceData; }; // // The 3rd epilog expects to see the API argument replacements // Expect to see instance data from corresponding prolog // epilogCbs3.Image.pfnCreateCb = [](ze_image_create_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(ImageCreate_args.hContext1, *params->phContext); EXPECT_EQ(ImageCreate_args.hDevice1, *params->phDevice); EXPECT_EQ(&ImageCreate_args.desc1, *params->pdesc); ze_image_handle_t **ppHandle; ASSERT_NE(nullptr, params); ppHandle = params->pphImage; ze_image_handle_t *pHandle; ASSERT_NE(nullptr, ppHandle); pHandle = *ppHandle; EXPECT_EQ(&ImageCreate_args.hImage1, pHandle); ze_image_handle_t handle; ASSERT_NE(nullptr, pHandle); handle = *pHandle; EXPECT_EQ(ImageCreate_args.hImage1, handle); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 62); *val += 31; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, ImageCreate_args.instanceData3); delete instanceData; }; setTracerCallbacksAndEnableTracer(); result = zeImageCreate_Tracing(ImageCreate_args.hContext0, ImageCreate_args.hDevice0, &ImageCreate_args.desc0, &ImageCreate_args.hImage0); EXPECT_EQ(ZE_RESULT_SUCCESS, result); validateDefaultUserDataFinal(); } struct { ze_image_handle_t hImage0; ze_image_handle_t hImage1; void *instanceData0; void *instanceData3; } ImageDestroy_args; TEST_F(zeAPITracingRuntimeMultipleArgumentsTests, WhenCallingImageDestroyTracingWrapperWithMultiplePrologEpilogsThenReturnSuccess) { ze_result_t result; // initialize initial argument set ImageDestroy_args.hImage0 = generateRandomHandle(); // initialize replacement argument set ImageDestroy_args.hImage1 = generateRandomHandle(); // initialize user instance data ImageDestroy_args.instanceData0 = generateRandomHandle(); ImageDestroy_args.instanceData3 = generateRandomHandle(); driver_ddiTable.core_ddiTable.Image.pfnDestroy = [](ze_image_handle_t hImage) { EXPECT_EQ(ImageDestroy_args.hImage1, hImage); return ZE_RESULT_SUCCESS; }; // // The 0th prolog replaces the orignal API arguments with a new set // Create instance data, pass it to corresponding epilog. // prologCbs0.Image.pfnDestroyCb = [](ze_image_destroy_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(ImageDestroy_args.hImage0, *params->phImage); *params->phImage = ImageDestroy_args.hImage1; ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 1); *val += 1; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = ImageDestroy_args.instanceData0; *ppTracerInstanceUserData = instanceData; }; // // The 0th epilog expects to see the API argument replacements // Expect to receive instance data from corresponding prolog // epilogCbs0.Image.pfnDestroyCb = [](ze_image_destroy_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(ImageDestroy_args.hImage1, *params->phImage); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 2); *val += 1; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, ImageDestroy_args.instanceData0); delete instanceData; }; // // The 1st prolog sees the arguments as replaced by the 0th prolog. // There is no epilog for this prolog, so don't allocate instance data // prologCbs1.Image.pfnDestroyCb = [](ze_image_destroy_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(ImageDestroy_args.hImage1, *params->phImage); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 11); *val += 11; }; // // The 2nd epilog expects to see the API argument replacements // There is no corresponding prolog, so there is no instance data // epilogCbs2.Image.pfnDestroyCb = [](ze_image_destroy_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(ImageDestroy_args.hImage1, *params->phImage); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 21); *val += 21; }; // // The 3rd prolog expects to see the API argument replacements and doesn't modify them // Create instance data and pass to corresponding epilog // prologCbs3.Image.pfnDestroyCb = [](ze_image_destroy_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(ImageDestroy_args.hImage1, *params->phImage); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 31); *val += 31; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = ImageDestroy_args.instanceData3; *ppTracerInstanceUserData = instanceData; }; // // The 3rd epilog expects to see the API argument replacements // Expect to see instance data from corresponding prolog // epilogCbs3.Image.pfnDestroyCb = [](ze_image_destroy_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(ImageDestroy_args.hImage1, *params->phImage); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 62); *val += 31; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, ImageDestroy_args.instanceData3); delete instanceData; }; setTracerCallbacksAndEnableTracer(); result = zeImageDestroy_Tracing(ImageDestroy_args.hImage0); EXPECT_EQ(ZE_RESULT_SUCCESS, result); validateDefaultUserDataFinal(); } } // namespace ult } // namespace L0 test_memory_api_tracing.cpp000066400000000000000000000311731422164147700360500ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/experimental/test/unit_tests/sources/tracing/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "test_api_tracing_common.h" namespace L0 { namespace ult { TEST_F(zeAPITracingRuntimeTests, WhenCallingMemAllocSharedTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Mem.pfnAllocShared = [](ze_context_handle_t hContext, const ze_device_mem_alloc_desc_t *deviceDesc, const ze_host_mem_alloc_desc_t *hostDesc, size_t size, size_t alignment, ze_device_handle_t hDevice, void **pptr) { return ZE_RESULT_SUCCESS; }; ze_device_mem_alloc_desc_t deviceDesc = {}; ze_host_mem_alloc_desc_t hostDesc; size_t size = 1024; size_t alignment = 4096; void *pptr = nullptr; prologCbs.Mem.pfnAllocSharedCb = genericPrologCallbackPtr; epilogCbs.Mem.pfnAllocSharedCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeMemAllocShared_Tracing(nullptr, &deviceDesc, &hostDesc, size, alignment, nullptr, &pptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingMemAllocDeviceTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Mem.pfnAllocDevice = [](ze_context_handle_t hContext, const ze_device_mem_alloc_desc_t *deviceDesc, size_t size, size_t alignment, ze_device_handle_t hDevice, void **pptr) { return ZE_RESULT_SUCCESS; }; size_t size = 1024; size_t alignment = 4096; ze_device_mem_alloc_desc_t deviceDesc = {}; void *pptr = nullptr; prologCbs.Mem.pfnAllocDeviceCb = genericPrologCallbackPtr; epilogCbs.Mem.pfnAllocDeviceCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeMemAllocDevice_Tracing(nullptr, &deviceDesc, size, alignment, nullptr, &pptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingMemAllocHostTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { driver_ddiTable.core_ddiTable.Mem.pfnAllocHost = [](ze_context_handle_t hContext, const ze_host_mem_alloc_desc_t *hostDesc, size_t size, size_t alignment, void **pptr) { return ZE_RESULT_SUCCESS; }; ze_result_t result; size_t size = 1024; size_t alignment = 4096; ze_host_mem_alloc_desc_t hostDesc = {}; void *pptr = nullptr; prologCbs.Mem.pfnAllocHostCb = genericPrologCallbackPtr; epilogCbs.Mem.pfnAllocHostCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeMemAllocHost_Tracing(nullptr, &hostDesc, size, alignment, &pptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingMemFreeTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { driver_ddiTable.core_ddiTable.Mem.pfnFree = [](ze_context_handle_t hContext, void *ptr) { return ZE_RESULT_SUCCESS; }; ze_result_t result; prologCbs.Mem.pfnFreeCb = genericPrologCallbackPtr; epilogCbs.Mem.pfnFreeCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeMemFree_Tracing(nullptr, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingMemGetAllocPropertiesTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { driver_ddiTable.core_ddiTable.Mem.pfnGetAllocProperties = [](ze_context_handle_t hContext, const void *ptr, ze_memory_allocation_properties_t *pMemAllocProperties, ze_device_handle_t *phDevice) { return ZE_RESULT_SUCCESS; }; ze_result_t result; prologCbs.Mem.pfnGetAllocPropertiesCb = genericPrologCallbackPtr; epilogCbs.Mem.pfnGetAllocPropertiesCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeMemGetAllocProperties_Tracing(nullptr, nullptr, nullptr, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingMemGetAddressRangeTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { driver_ddiTable.core_ddiTable.Mem.pfnGetAddressRange = [](ze_context_handle_t hContext, const void *ptr, void **pBase, size_t *pSize) { return ZE_RESULT_SUCCESS; }; ze_result_t result; prologCbs.Mem.pfnGetAddressRangeCb = genericPrologCallbackPtr; epilogCbs.Mem.pfnGetAddressRangeCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeMemGetAddressRange_Tracing(nullptr, nullptr, nullptr, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingMemGetIpcHandleTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { driver_ddiTable.core_ddiTable.Mem.pfnGetIpcHandle = [](ze_context_handle_t hContext, const void *ptr, ze_ipc_mem_handle_t *pIpcHandle) { return ZE_RESULT_SUCCESS; }; ze_result_t result; prologCbs.Mem.pfnGetIpcHandleCb = genericPrologCallbackPtr; epilogCbs.Mem.pfnGetIpcHandleCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeMemGetIpcHandle_Tracing(nullptr, nullptr, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingMemOpenIpcHandleTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { driver_ddiTable.core_ddiTable.Mem.pfnOpenIpcHandle = [](ze_context_handle_t hContext, ze_device_handle_t hDevice, ze_ipc_mem_handle_t handle, ze_ipc_memory_flags_t flags, void **pptr) { return ZE_RESULT_SUCCESS; }; ze_result_t result; ze_ipc_mem_handle_t ipchandle = {}; prologCbs.Mem.pfnOpenIpcHandleCb = genericPrologCallbackPtr; epilogCbs.Mem.pfnOpenIpcHandleCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeMemOpenIpcHandle_Tracing(nullptr, nullptr, ipchandle, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingMemCloseIpcHandleTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { driver_ddiTable.core_ddiTable.Mem.pfnCloseIpcHandle = [](ze_context_handle_t hContext, const void *ptr) { return ZE_RESULT_SUCCESS; }; ze_result_t result; prologCbs.Mem.pfnCloseIpcHandleCb = genericPrologCallbackPtr; epilogCbs.Mem.pfnCloseIpcHandleCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeMemCloseIpcHandle_Tracing(nullptr, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingPhysicalMemCreateTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { driver_ddiTable.core_ddiTable.PhysicalMem.pfnCreate = [](ze_context_handle_t hContext, ze_device_handle_t hDevice, ze_physical_mem_desc_t *desc, ze_physical_mem_handle_t *phPhysicalMemory) { return ZE_RESULT_SUCCESS; }; ze_result_t result; prologCbs.PhysicalMem.pfnCreateCb = genericPrologCallbackPtr; epilogCbs.PhysicalMem.pfnCreateCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zePhysicalMemCreate_Tracing(nullptr, nullptr, nullptr, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingPhysicalMemDestroyTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { driver_ddiTable.core_ddiTable.PhysicalMem.pfnDestroy = [](ze_context_handle_t hContext, ze_physical_mem_handle_t hPhysicalMemory) { return ZE_RESULT_SUCCESS; }; ze_result_t result; prologCbs.PhysicalMem.pfnDestroyCb = genericPrologCallbackPtr; epilogCbs.PhysicalMem.pfnDestroyCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zePhysicalMemDestroy_Tracing(nullptr, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingVirtualMemFreeTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { driver_ddiTable.core_ddiTable.VirtualMem.pfnFree = [](ze_context_handle_t hContext, const void *ptr, size_t size) { return ZE_RESULT_SUCCESS; }; ze_result_t result; prologCbs.VirtualMem.pfnFreeCb = genericPrologCallbackPtr; epilogCbs.VirtualMem.pfnFreeCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeVirtualMemFree_Tracing(nullptr, nullptr, 1U); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingVirtualMemGetAccessAttributeTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { driver_ddiTable.core_ddiTable.VirtualMem.pfnGetAccessAttribute = [](ze_context_handle_t hContext, const void *ptr, size_t size, ze_memory_access_attribute_t *access, size_t *outSize) { return ZE_RESULT_SUCCESS; }; ze_result_t result; prologCbs.VirtualMem.pfnGetAccessAttributeCb = genericPrologCallbackPtr; epilogCbs.VirtualMem.pfnGetAccessAttributeCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeVirtualMemGetAccessAttribute_Tracing(nullptr, nullptr, 1U, nullptr, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingVirtualMemMapTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { driver_ddiTable.core_ddiTable.VirtualMem.pfnMap = [](ze_context_handle_t hContext, const void *ptr, size_t size, ze_physical_mem_handle_t hPhysicalMemory, size_t offset, ze_memory_access_attribute_t access) { return ZE_RESULT_SUCCESS; }; ze_result_t result; prologCbs.VirtualMem.pfnMapCb = genericPrologCallbackPtr; epilogCbs.VirtualMem.pfnMapCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeVirtualMemMap_Tracing(nullptr, nullptr, 1U, nullptr, 1U, ZE_MEMORY_ACCESS_ATTRIBUTE_NONE); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingVirtualMemQueryPageSizeTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { driver_ddiTable.core_ddiTable.VirtualMem.pfnQueryPageSize = [](ze_context_handle_t hContext, ze_device_handle_t hDevice, size_t size, size_t *pagesize) { return ZE_RESULT_SUCCESS; }; ze_result_t result; prologCbs.VirtualMem.pfnQueryPageSizeCb = genericPrologCallbackPtr; epilogCbs.VirtualMem.pfnQueryPageSizeCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeVirtualMemQueryPageSize_Tracing(nullptr, nullptr, 1U, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingVirtualMemReserveTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { driver_ddiTable.core_ddiTable.VirtualMem.pfnReserve = [](ze_context_handle_t hContext, const void *pStart, size_t size, void **pptr) { return ZE_RESULT_SUCCESS; }; ze_result_t result; prologCbs.VirtualMem.pfnReserveCb = genericPrologCallbackPtr; epilogCbs.VirtualMem.pfnReserveCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeVirtualMemReserve_Tracing(nullptr, nullptr, 1U, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingVirtualMemSetAccessAttributeTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { driver_ddiTable.core_ddiTable.VirtualMem.pfnSetAccessAttribute = [](ze_context_handle_t hContext, const void *ptr, size_t size, ze_memory_access_attribute_t access) { return ZE_RESULT_SUCCESS; }; ze_result_t result; prologCbs.VirtualMem.pfnSetAccessAttributeCb = genericPrologCallbackPtr; epilogCbs.VirtualMem.pfnSetAccessAttributeCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeVirtualMemSetAccessAttribute_Tracing(nullptr, nullptr, 1U, ZE_MEMORY_ACCESS_ATTRIBUTE_NONE); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingVirtualMemUnmapTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { driver_ddiTable.core_ddiTable.VirtualMem.pfnUnmap = [](ze_context_handle_t hContext, const void *ptr, size_t size) { return ZE_RESULT_SUCCESS; }; ze_result_t result; prologCbs.VirtualMem.pfnUnmapCb = genericPrologCallbackPtr; epilogCbs.VirtualMem.pfnUnmapCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeVirtualMemUnmap_Tracing(nullptr, nullptr, 1U); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } } // namespace ult } // namespace L0 test_module_api_multi_tracing.cpp000066400000000000000000001030501422164147700372310ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/experimental/test/unit_tests/sources/tracing/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "test_api_tracing_common.h" #include namespace L0 { namespace ult { struct { ze_context_handle_t hContext0; ze_device_handle_t hDevice0; ze_module_desc_t desc0; ze_module_handle_t hModule0; ze_module_build_log_handle_t hBuildLog0; ze_context_handle_t hContext1; ze_device_handle_t hDevice1; ze_module_desc_t desc1; ze_module_handle_t hModule1; ze_module_build_log_handle_t hBuildLog1; ze_module_handle_t hModuleAPI; ze_module_build_log_handle_t hBuildLogAPI; void *instanceData0; void *instanceData3; } module_create_args; static void moduleCreateDescInitRandom(ze_module_desc_t *desc) { uint8_t *ptr = (uint8_t *)desc; for (size_t i = 0; i < sizeof(*desc); i++, ptr++) { *ptr = generateRandomSize(); } } static bool moduleCreateDescCompare(const ze_module_desc_t *phIpc0, const ze_module_desc_t *phIpc1) { if (nullptr == phIpc0) { return false; } if (nullptr == phIpc1) { return false; } return (memcmp((void *)phIpc0, (void *)phIpc1, sizeof(ze_module_desc_t)) == 0); } TEST_F(zeAPITracingRuntimeMultipleArgumentsTests, WhenCallingModuleCreateTracingWrapperWithMultiplePrologEpilogsThenReturnSuccess) { ze_result_t result; // initialize initial argument set module_create_args.hContext0 = generateRandomHandle(); module_create_args.hDevice0 = generateRandomHandle(); moduleCreateDescInitRandom(&module_create_args.desc0); module_create_args.hModule0 = generateRandomHandle(); module_create_args.hBuildLog0 = generateRandomHandle(); // initialize replacement argument set module_create_args.hContext1 = generateRandomHandle(); module_create_args.hDevice1 = generateRandomHandle(); moduleCreateDescInitRandom(&module_create_args.desc1); module_create_args.hModule1 = generateRandomHandle(); module_create_args.hBuildLog1 = generateRandomHandle(); // initialize user instance data module_create_args.instanceData0 = generateRandomHandle(); module_create_args.instanceData3 = generateRandomHandle(); driver_ddiTable.core_ddiTable.Module.pfnCreate = [](ze_context_handle_t hContext, ze_device_handle_t hDevice, const ze_module_desc_t *desc, ze_module_handle_t *phModule, ze_module_build_log_handle_t *phBuildLog) { EXPECT_EQ(module_create_args.hContext1, hContext); EXPECT_EQ(module_create_args.hDevice1, hDevice); EXPECT_EQ(&module_create_args.desc1, desc); EXPECT_TRUE(moduleCreateDescCompare(&module_create_args.desc1, desc)); EXPECT_EQ(&module_create_args.hModule1, phModule); EXPECT_EQ(module_create_args.hModule1, *phModule); EXPECT_EQ(&module_create_args.hBuildLog1, phBuildLog); EXPECT_EQ(module_create_args.hBuildLog1, *phBuildLog); module_create_args.hModuleAPI = generateRandomHandle(); module_create_args.hBuildLogAPI = generateRandomHandle(); *phModule = module_create_args.hModuleAPI; *phBuildLog = module_create_args.hBuildLogAPI; return ZE_RESULT_SUCCESS; }; // // The 0th prolog replaces the orignal API arguments with a new set // Create instance data, pass it to corresponding epilog. // prologCbs0.Module.pfnCreateCb = [](ze_module_create_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(module_create_args.hContext0, *params->phContext); EXPECT_EQ(module_create_args.hDevice0, *params->phDevice); EXPECT_EQ(&module_create_args.desc0, *params->pdesc); EXPECT_TRUE(moduleCreateDescCompare(&module_create_args.desc0, *params->pdesc)); ze_module_handle_t **ppHandle; ASSERT_NE(nullptr, params); ppHandle = params->pphModule; ze_module_handle_t *pHandle; ASSERT_NE(nullptr, ppHandle); pHandle = *ppHandle; EXPECT_EQ(&module_create_args.hModule0, pHandle); ze_module_handle_t handle; ASSERT_NE(nullptr, pHandle); handle = *pHandle; EXPECT_EQ(module_create_args.hModule0, handle); ze_module_build_log_handle_t **ppLogHandle; ppLogHandle = params->pphBuildLog; ze_module_build_log_handle_t *pLogHandle; ASSERT_NE(nullptr, ppLogHandle); pLogHandle = *ppLogHandle; EXPECT_EQ(&module_create_args.hBuildLog0, pLogHandle); ze_module_build_log_handle_t logHandle; logHandle = *pLogHandle; ASSERT_NE(nullptr, logHandle); EXPECT_EQ(module_create_args.hBuildLog0, logHandle); *params->phContext = module_create_args.hContext1; *params->phDevice = module_create_args.hDevice1; *params->pdesc = &module_create_args.desc1; *params->pphModule = &module_create_args.hModule1; *params->pphBuildLog = &module_create_args.hBuildLog1; ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 1); *val += 1; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = module_create_args.instanceData0; *ppTracerInstanceUserData = instanceData; }; // // The 0th epilog expects to see the API argument replacements // Expect to receive instance data from corresponding prolog // epilogCbs0.Module.pfnCreateCb = [](ze_module_create_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(module_create_args.hContext1, *params->phContext); EXPECT_EQ(module_create_args.hDevice1, *params->phDevice); EXPECT_EQ(&module_create_args.desc1, *params->pdesc); EXPECT_TRUE(moduleCreateDescCompare(&module_create_args.desc1, *params->pdesc)); ze_module_handle_t **ppHandle; ASSERT_NE(nullptr, params); ppHandle = params->pphModule; ze_module_handle_t *pHandle; ASSERT_NE(nullptr, ppHandle); pHandle = *ppHandle; EXPECT_EQ(&module_create_args.hModule1, pHandle); ze_module_handle_t handle; ASSERT_NE(nullptr, pHandle); handle = *pHandle; EXPECT_EQ(module_create_args.hModule1, handle); ze_module_build_log_handle_t **ppLogHandle; ppLogHandle = params->pphBuildLog; ze_module_build_log_handle_t *pLogHandle; ASSERT_NE(nullptr, ppLogHandle); pLogHandle = *ppLogHandle; EXPECT_EQ(&module_create_args.hBuildLog1, pLogHandle); ze_module_build_log_handle_t logHandle; logHandle = *pLogHandle; ASSERT_NE(nullptr, logHandle); EXPECT_EQ(module_create_args.hBuildLog1, logHandle); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 2); *val += 1; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, module_create_args.instanceData0); delete instanceData; }; // // The 1st prolog sees the arguments as replaced by the 0th prolog. // There is no epilog for this prolog, so don't allocate instance data // prologCbs1.Module.pfnCreateCb = [](ze_module_create_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(module_create_args.hContext1, *params->phContext); EXPECT_EQ(module_create_args.hDevice1, *params->phDevice); EXPECT_EQ(&module_create_args.desc1, *params->pdesc); EXPECT_TRUE(moduleCreateDescCompare(&module_create_args.desc1, *params->pdesc)); ze_module_handle_t **ppHandle; ASSERT_NE(nullptr, params); ppHandle = params->pphModule; ze_module_handle_t *pHandle; ASSERT_NE(nullptr, ppHandle); pHandle = *ppHandle; EXPECT_EQ(&module_create_args.hModule1, pHandle); ze_module_handle_t handle; ASSERT_NE(nullptr, pHandle); handle = *pHandle; EXPECT_EQ(module_create_args.hModule1, handle); ze_module_build_log_handle_t **ppLogHandle; ppLogHandle = params->pphBuildLog; ze_module_build_log_handle_t *pLogHandle; ASSERT_NE(nullptr, ppLogHandle); pLogHandle = *ppLogHandle; EXPECT_EQ(&module_create_args.hBuildLog1, pLogHandle); ze_module_build_log_handle_t logHandle; logHandle = *pLogHandle; ASSERT_NE(nullptr, logHandle); EXPECT_EQ(module_create_args.hBuildLog1, logHandle); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 11); *val += 11; }; // // The 2nd epilog expects to see the API argument replacements // There is no corresponding prolog, so there is no instance data // epilogCbs2.Module.pfnCreateCb = [](ze_module_create_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(module_create_args.hContext1, *params->phContext); EXPECT_EQ(module_create_args.hDevice1, *params->phDevice); EXPECT_EQ(&module_create_args.desc1, *params->pdesc); EXPECT_TRUE(moduleCreateDescCompare(&module_create_args.desc1, *params->pdesc)); ze_module_handle_t **ppHandle; ASSERT_NE(nullptr, params); ppHandle = params->pphModule; ze_module_handle_t *pHandle; ASSERT_NE(nullptr, ppHandle); pHandle = *ppHandle; EXPECT_EQ(&module_create_args.hModule1, pHandle); ze_module_handle_t handle; ASSERT_NE(nullptr, pHandle); handle = *pHandle; EXPECT_EQ(module_create_args.hModule1, handle); ze_module_build_log_handle_t **ppLogHandle; ppLogHandle = params->pphBuildLog; ze_module_build_log_handle_t *pLogHandle; ASSERT_NE(nullptr, ppLogHandle); pLogHandle = *ppLogHandle; EXPECT_EQ(&module_create_args.hBuildLog1, pLogHandle); ze_module_build_log_handle_t logHandle; logHandle = *pLogHandle; ASSERT_NE(nullptr, logHandle); EXPECT_EQ(module_create_args.hBuildLog1, logHandle); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 21); *val += 21; }; // // The 3rd prolog expects to see the API argument replacements and doesn't modify them // Create instance data and pass to corresponding epilog // prologCbs3.Module.pfnCreateCb = [](ze_module_create_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(module_create_args.hContext1, *params->phContext); EXPECT_EQ(module_create_args.hDevice1, *params->phDevice); EXPECT_EQ(&module_create_args.desc1, *params->pdesc); EXPECT_TRUE(moduleCreateDescCompare(&module_create_args.desc1, *params->pdesc)); ze_module_handle_t **ppHandle; ASSERT_NE(nullptr, params); ppHandle = params->pphModule; ze_module_handle_t *pHandle; ASSERT_NE(nullptr, ppHandle); pHandle = *ppHandle; EXPECT_EQ(&module_create_args.hModule1, pHandle); ze_module_handle_t handle; ASSERT_NE(nullptr, pHandle); handle = *pHandle; EXPECT_EQ(module_create_args.hModule1, handle); ze_module_build_log_handle_t **ppLogHandle; ppLogHandle = params->pphBuildLog; ze_module_build_log_handle_t *pLogHandle; ASSERT_NE(nullptr, ppLogHandle); pLogHandle = *ppLogHandle; EXPECT_EQ(&module_create_args.hBuildLog1, pLogHandle); ze_module_build_log_handle_t logHandle; logHandle = *pLogHandle; ASSERT_NE(nullptr, logHandle); EXPECT_EQ(module_create_args.hBuildLog1, logHandle); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 31); *val += 31; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = module_create_args.instanceData3; *ppTracerInstanceUserData = instanceData; }; // // The 3rd epilog expects to see the API argument replacements // Expect to see instance data from corresponding prolog // epilogCbs3.Module.pfnCreateCb = [](ze_module_create_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(module_create_args.hContext1, *params->phContext); EXPECT_EQ(module_create_args.hDevice1, *params->phDevice); EXPECT_EQ(&module_create_args.desc1, *params->pdesc); EXPECT_TRUE(moduleCreateDescCompare(&module_create_args.desc1, *params->pdesc)); ze_module_handle_t **ppHandle; ASSERT_NE(nullptr, params); ppHandle = params->pphModule; ze_module_handle_t *pHandle; ASSERT_NE(nullptr, ppHandle); pHandle = *ppHandle; EXPECT_EQ(&module_create_args.hModule1, pHandle); ze_module_handle_t handle; ASSERT_NE(nullptr, pHandle); handle = *pHandle; EXPECT_EQ(module_create_args.hModule1, handle); ze_module_build_log_handle_t **ppLogHandle; ppLogHandle = params->pphBuildLog; ze_module_build_log_handle_t *pLogHandle; ASSERT_NE(nullptr, ppLogHandle); pLogHandle = *ppLogHandle; EXPECT_EQ(&module_create_args.hBuildLog1, pLogHandle); ze_module_build_log_handle_t logHandle; logHandle = *pLogHandle; ASSERT_NE(nullptr, logHandle); EXPECT_EQ(module_create_args.hBuildLog1, logHandle); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 62); *val += 31; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, module_create_args.instanceData3); delete instanceData; }; setTracerCallbacksAndEnableTracer(); result = zeModuleCreate_Tracing(module_create_args.hContext0, module_create_args.hDevice0, &module_create_args.desc0, &module_create_args.hModule0, &module_create_args.hBuildLog0); EXPECT_EQ(ZE_RESULT_SUCCESS, result); validateDefaultUserDataFinal(); } struct { ze_module_handle_t hModule0; ze_module_handle_t hModule1; void *instanceData0; void *instanceData3; } module_destroy_args; TEST_F(zeAPITracingRuntimeMultipleArgumentsTests, WhenCallingModuleDestroyTracingWrapperWithMultiplePrologEpilogsThenReturnSuccess) { ze_result_t result; // initialize initial argument set module_destroy_args.hModule0 = generateRandomHandle(); // initialize replacement argument set module_destroy_args.hModule1 = generateRandomHandle(); // initialize user instance data module_destroy_args.instanceData0 = generateRandomHandle(); module_destroy_args.instanceData3 = generateRandomHandle(); driver_ddiTable.core_ddiTable.Module.pfnDestroy = [](ze_module_handle_t hModule) { EXPECT_EQ(module_destroy_args.hModule1, hModule); return ZE_RESULT_SUCCESS; }; // // The 0th prolog replaces the orignal API arguments with a new set // Create instance data, pass it to corresponding epilog. // prologCbs0.Module.pfnDestroyCb = [](ze_module_destroy_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(module_destroy_args.hModule0, *params->phModule); *params->phModule = module_destroy_args.hModule1; ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 1); *val += 1; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = module_destroy_args.instanceData0; *ppTracerInstanceUserData = instanceData; }; // // The 0th epilog expects to see the API argument replacements // Expect to receive instance data from corresponding prolog // epilogCbs0.Module.pfnDestroyCb = [](ze_module_destroy_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(module_destroy_args.hModule1, *params->phModule); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 2); *val += 1; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, module_destroy_args.instanceData0); delete instanceData; }; // // The 1st prolog sees the arguments as replaced by the 0th prolog. // There is no epilog for this prolog, so don't allocate instance data // prologCbs1.Module.pfnDestroyCb = [](ze_module_destroy_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(module_destroy_args.hModule1, *params->phModule); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 11); *val += 11; }; // // The 2nd epilog expects to see the API argument replacements // There is no corresponding prolog, so there is no instance data // epilogCbs2.Module.pfnDestroyCb = [](ze_module_destroy_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(module_destroy_args.hModule1, *params->phModule); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 21); *val += 21; }; // // The 3rd prolog expects to see the API argument replacements and doesn't modify them // Create instance data and pass to corresponding epilog // prologCbs3.Module.pfnDestroyCb = [](ze_module_destroy_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(module_destroy_args.hModule1, *params->phModule); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 31); *val += 31; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = module_destroy_args.instanceData3; *ppTracerInstanceUserData = instanceData; }; // // The 3rd epilog expects to see the API argument replacements // Expect to see instance data from corresponding prolog // epilogCbs3.Module.pfnDestroyCb = [](ze_module_destroy_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(module_destroy_args.hModule1, *params->phModule); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 62); *val += 31; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, module_destroy_args.instanceData3); delete instanceData; }; setTracerCallbacksAndEnableTracer(); result = zeModuleDestroy_Tracing(module_destroy_args.hModule0); EXPECT_EQ(ZE_RESULT_SUCCESS, result); validateDefaultUserDataFinal(); } static constexpr size_t moduleGetNativeBinarySize0 = 64; static constexpr size_t moduleGetNativeBinarySize1 = 128; struct { ze_module_handle_t hModule0; size_t size0 = moduleGetNativeBinarySize0; uint8_t moduleNativeBinary0[moduleGetNativeBinarySize0]; ze_module_handle_t hModule1; size_t size1 = moduleGetNativeBinarySize1; uint8_t moduleNativeBinary1[moduleGetNativeBinarySize1]; void *instanceData0; void *instanceData3; } module_get_native_binary_args; static void module_get_native_binary_native_binary_init_random(uint8_t *binary, size_t size) { uint8_t *ptr = binary; for (size_t i = 0; i < size; i++) { *ptr = generateRandomSize(); } } static bool module_get_native_binary_native_binary_compare(uint8_t *binary0, uint8_t *binary1, size_t size) { if (binary0 == nullptr) { return false; } if (binary1 == nullptr) { return false; } return (memcmp(static_cast(binary0), static_cast(binary1), size) == 0); } TEST_F(zeAPITracingRuntimeMultipleArgumentsTests, WhenCallingModuleGetNativeBinaryTracingWrapperWithMultiplePrologEpilogsThenReturnSuccess) { ze_result_t result; // initialize initial argument set module_get_native_binary_args.hModule0 = generateRandomHandle(); module_get_native_binary_native_binary_init_random(module_get_native_binary_args.moduleNativeBinary0, moduleGetNativeBinarySize0); // initialize replacement argument set module_get_native_binary_args.hModule1 = generateRandomHandle(); module_get_native_binary_native_binary_init_random(module_get_native_binary_args.moduleNativeBinary1, moduleGetNativeBinarySize1); // initialize user instance data module_get_native_binary_args.instanceData0 = generateRandomHandle(); module_get_native_binary_args.instanceData3 = generateRandomHandle(); driver_ddiTable.core_ddiTable.Module.pfnGetNativeBinary = [](ze_module_handle_t hModule, size_t *pSize, uint8_t *pModuleNativeBinary) { EXPECT_EQ(module_get_native_binary_args.hModule1, hModule); EXPECT_EQ(&module_get_native_binary_args.size1, pSize); EXPECT_EQ(*pSize, moduleGetNativeBinarySize1); EXPECT_EQ(module_get_native_binary_args.moduleNativeBinary1, pModuleNativeBinary); EXPECT_TRUE(module_get_native_binary_native_binary_compare(module_get_native_binary_args.moduleNativeBinary1, pModuleNativeBinary, moduleGetNativeBinarySize0)); return ZE_RESULT_SUCCESS; }; // // The 0th prolog replaces the orignal API arguments with a new set // Create instance data, pass it to corresponding epilog. // prologCbs0.Module.pfnGetNativeBinaryCb = [](ze_module_get_native_binary_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(module_get_native_binary_args.hModule0, *params->phModule); size_t **ppSize; ASSERT_NE(nullptr, params); ppSize = params->ppSize; size_t *pSize; ASSERT_NE(nullptr, ppSize); pSize = *ppSize; EXPECT_EQ(&module_get_native_binary_args.size0, *params->ppSize); size_t size; ASSERT_NE(nullptr, pSize); size = *pSize; EXPECT_EQ(size, moduleGetNativeBinarySize0); EXPECT_EQ(module_get_native_binary_args.moduleNativeBinary0, *params->ppModuleNativeBinary); EXPECT_TRUE(module_get_native_binary_native_binary_compare(module_get_native_binary_args.moduleNativeBinary0, *params->ppModuleNativeBinary, moduleGetNativeBinarySize0)); *params->phModule = module_get_native_binary_args.hModule1; *params->ppSize = &module_get_native_binary_args.size1; *params->ppModuleNativeBinary = module_get_native_binary_args.moduleNativeBinary1; ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 1); *val += 1; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = module_get_native_binary_args.instanceData0; *ppTracerInstanceUserData = instanceData; }; // // The 0th epilog expects to see the API argument replacements // Expect to receive instance data from corresponding prolog // epilogCbs0.Module.pfnGetNativeBinaryCb = [](ze_module_get_native_binary_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(module_get_native_binary_args.hModule1, *params->phModule); size_t **ppSize; ASSERT_NE(nullptr, params); ppSize = params->ppSize; size_t *pSize; ASSERT_NE(nullptr, ppSize); pSize = *ppSize; EXPECT_EQ(&module_get_native_binary_args.size1, *params->ppSize); size_t size; ASSERT_NE(nullptr, pSize); size = *pSize; EXPECT_EQ(size, moduleGetNativeBinarySize1); EXPECT_EQ(module_get_native_binary_args.moduleNativeBinary1, *params->ppModuleNativeBinary); EXPECT_TRUE(module_get_native_binary_native_binary_compare(module_get_native_binary_args.moduleNativeBinary1, *params->ppModuleNativeBinary, moduleGetNativeBinarySize1)); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 2); *val += 1; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, module_get_native_binary_args.instanceData0); delete instanceData; }; // // The 1st prolog sees the arguments as replaced by the 0th prolog. // There is no epilog for this prolog, so don't allocate instance data // prologCbs1.Module.pfnGetNativeBinaryCb = [](ze_module_get_native_binary_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(module_get_native_binary_args.hModule1, *params->phModule); size_t **ppSize; ASSERT_NE(nullptr, params); ppSize = params->ppSize; size_t *pSize; ASSERT_NE(nullptr, ppSize); pSize = *ppSize; EXPECT_EQ(&module_get_native_binary_args.size1, *params->ppSize); size_t size; ASSERT_NE(nullptr, pSize); size = *pSize; EXPECT_EQ(size, moduleGetNativeBinarySize1); EXPECT_EQ(module_get_native_binary_args.moduleNativeBinary1, *params->ppModuleNativeBinary); EXPECT_TRUE(module_get_native_binary_native_binary_compare(module_get_native_binary_args.moduleNativeBinary1, *params->ppModuleNativeBinary, moduleGetNativeBinarySize1)); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 11); *val += 11; }; // // The 2nd epilog expects to see the API argument replacements // There is no corresponding prolog, so there is no instance data // epilogCbs2.Module.pfnGetNativeBinaryCb = [](ze_module_get_native_binary_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(module_get_native_binary_args.hModule1, *params->phModule); size_t **ppSize; ASSERT_NE(nullptr, params); ppSize = params->ppSize; size_t *pSize; ASSERT_NE(nullptr, ppSize); pSize = *ppSize; EXPECT_EQ(&module_get_native_binary_args.size1, *params->ppSize); size_t size; ASSERT_NE(nullptr, pSize); size = *pSize; EXPECT_EQ(size, moduleGetNativeBinarySize1); EXPECT_EQ(module_get_native_binary_args.moduleNativeBinary1, *params->ppModuleNativeBinary); EXPECT_TRUE(module_get_native_binary_native_binary_compare(module_get_native_binary_args.moduleNativeBinary1, *params->ppModuleNativeBinary, moduleGetNativeBinarySize1)); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 21); *val += 21; }; // // The 3rd prolog expects to see the API argument replacements and doesn't modify them // Create instance data and pass to corresponding epilog // prologCbs3.Module.pfnGetNativeBinaryCb = [](ze_module_get_native_binary_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(module_get_native_binary_args.hModule1, *params->phModule); EXPECT_EQ(&module_get_native_binary_args.size1, *params->ppSize); size_t **ppSize; ASSERT_NE(nullptr, params); ppSize = params->ppSize; size_t *pSize; ASSERT_NE(nullptr, ppSize); pSize = *ppSize; EXPECT_EQ(&module_get_native_binary_args.size1, *params->ppSize); size_t size; ASSERT_NE(nullptr, pSize); size = *pSize; EXPECT_EQ(size, moduleGetNativeBinarySize1); EXPECT_EQ(module_get_native_binary_args.moduleNativeBinary1, *params->ppModuleNativeBinary); EXPECT_TRUE(module_get_native_binary_native_binary_compare(module_get_native_binary_args.moduleNativeBinary1, *params->ppModuleNativeBinary, moduleGetNativeBinarySize1)); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 31); *val += 31; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = module_get_native_binary_args.instanceData3; *ppTracerInstanceUserData = instanceData; }; // // The 3rd epilog expects to see the API argument replacements // Expect to see instance data from corresponding prolog // epilogCbs3.Module.pfnGetNativeBinaryCb = [](ze_module_get_native_binary_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(module_get_native_binary_args.hModule1, *params->phModule); size_t **ppSize; ASSERT_NE(nullptr, params); ppSize = params->ppSize; size_t *pSize; ASSERT_NE(nullptr, ppSize); pSize = *ppSize; EXPECT_EQ(&module_get_native_binary_args.size1, *params->ppSize); size_t size; ASSERT_NE(nullptr, pSize); size = *pSize; EXPECT_EQ(size, moduleGetNativeBinarySize1); EXPECT_EQ(module_get_native_binary_args.moduleNativeBinary1, *params->ppModuleNativeBinary); EXPECT_TRUE(module_get_native_binary_native_binary_compare(module_get_native_binary_args.moduleNativeBinary1, *params->ppModuleNativeBinary, moduleGetNativeBinarySize1)); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 62); *val += 31; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, module_get_native_binary_args.instanceData3); delete instanceData; }; setTracerCallbacksAndEnableTracer(); result = zeModuleGetNativeBinary_Tracing(module_get_native_binary_args.hModule0, &module_get_native_binary_args.size0, module_get_native_binary_args.moduleNativeBinary0); EXPECT_EQ(ZE_RESULT_SUCCESS, result); validateDefaultUserDataFinal(); } } // namespace ult } // namespace L0 test_module_api_tracing.cpp000066400000000000000000000457421422164147700360340ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/experimental/test/unit_tests/sources/tracing/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "test_api_tracing_common.h" namespace L0 { namespace ult { TEST_F(zeAPITracingRuntimeTests, WhenCallingModuleCreateTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Module.pfnCreate = [](ze_context_handle_t hContext, ze_device_handle_t hDevice, const ze_module_desc_t *pDesc, ze_module_handle_t *phModule, ze_module_build_log_handle_t *phBuildLog) { return ZE_RESULT_SUCCESS; }; ze_module_desc_t desc = {}; ze_module_handle_t phModule = {}; ze_module_build_log_handle_t phBuildLog = {}; prologCbs.Module.pfnCreateCb = genericPrologCallbackPtr; epilogCbs.Module.pfnCreateCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeModuleCreate_Tracing(nullptr, nullptr, &desc, &phModule, &phBuildLog); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingModuleDestroyTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Module.pfnDestroy = [](ze_module_handle_t hModule) { return ZE_RESULT_SUCCESS; }; prologCbs.Module.pfnDestroyCb = genericPrologCallbackPtr; epilogCbs.Module.pfnDestroyCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeModuleDestroy_Tracing(nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingModuleBuildLogDestroyTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.ModuleBuildLog.pfnDestroy = [](ze_module_build_log_handle_t hModuleBuildLog) { return ZE_RESULT_SUCCESS; }; prologCbs.ModuleBuildLog.pfnDestroyCb = genericPrologCallbackPtr; epilogCbs.ModuleBuildLog.pfnDestroyCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeModuleBuildLogDestroy_Tracing(nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingModuleBuildLogGetStringTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.ModuleBuildLog.pfnGetString = [](ze_module_build_log_handle_t hModuleBuildLog, size_t *pSize, char *pBuildLog) { return ZE_RESULT_SUCCESS; }; size_t pSize = {}; char pBuildLog = {}; prologCbs.ModuleBuildLog.pfnGetStringCb = genericPrologCallbackPtr; epilogCbs.ModuleBuildLog.pfnGetStringCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeModuleBuildLogGetString_Tracing(nullptr, &pSize, &pBuildLog); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingModuleGetNativeBinaryTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Module.pfnGetNativeBinary = [](ze_module_handle_t hModule, size_t *pSize, uint8_t *pModuleNativeBinary) { return ZE_RESULT_SUCCESS; }; size_t pSize = {}; uint8_t pModuleNativeBinary = {}; prologCbs.Module.pfnGetNativeBinaryCb = genericPrologCallbackPtr; epilogCbs.Module.pfnGetNativeBinaryCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeModuleGetNativeBinary_Tracing(nullptr, &pSize, &pModuleNativeBinary); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingModuleGetGlobalPointerTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Module.pfnGetGlobalPointer = [](ze_module_handle_t hModule, const char *pGlobalName, size_t *pSize, void **pPtr) { return ZE_RESULT_SUCCESS; }; const char pGlobalName = {}; size_t size; void *pptr = nullptr; prologCbs.Module.pfnGetGlobalPointerCb = genericPrologCallbackPtr; epilogCbs.Module.pfnGetGlobalPointerCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeModuleGetGlobalPointer_Tracing(nullptr, &pGlobalName, &size, &pptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingKernelCreateTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Kernel.pfnCreate = [](ze_module_handle_t hModule, const ze_kernel_desc_t *pDesc, ze_kernel_handle_t *phKernel) { return ZE_RESULT_SUCCESS; }; const ze_kernel_desc_t desc = {}; ze_kernel_handle_t phKernel = {}; prologCbs.Kernel.pfnCreateCb = genericPrologCallbackPtr; epilogCbs.Kernel.pfnCreateCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeKernelCreate_Tracing(nullptr, &desc, &phKernel); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingKernelDestroyTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Kernel.pfnDestroy = [](ze_kernel_handle_t hKernel) { return ZE_RESULT_SUCCESS; }; prologCbs.Kernel.pfnDestroyCb = genericPrologCallbackPtr; epilogCbs.Kernel.pfnDestroyCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeKernelDestroy_Tracing(nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingModuleGetFunctionPointerTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Module.pfnGetFunctionPointer = [](ze_module_handle_t hModule, const char *pKernelName, void **pfnFunction) { return ZE_RESULT_SUCCESS; }; const char pKernelName = {}; void *pfnFunction = nullptr; prologCbs.Module.pfnGetFunctionPointerCb = genericPrologCallbackPtr; epilogCbs.Module.pfnGetFunctionPointerCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeModuleGetFunctionPointer_Tracing(nullptr, &pKernelName, &pfnFunction); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingKernelSetGroupSizeTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Kernel.pfnSetGroupSize = [](ze_kernel_handle_t hKernel, uint32_t groupSizeX, uint32_t groupSizeY, uint32_t groupSizeZ) { return ZE_RESULT_SUCCESS; }; uint32_t groupSizeX = {}; uint32_t groupSizeY = {}; uint32_t groupSizeZ = {}; prologCbs.Kernel.pfnSetGroupSizeCb = genericPrologCallbackPtr; epilogCbs.Kernel.pfnSetGroupSizeCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeKernelSetGroupSize_Tracing(nullptr, groupSizeX, groupSizeY, groupSizeZ); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingKernelSuggestGroupSizeTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Kernel.pfnSuggestGroupSize = [](ze_kernel_handle_t hKernel, uint32_t globalSizeX, uint32_t globalSizeY, uint32_t globalSizeZ, uint32_t *groupSizeX, uint32_t *groupSizeY, uint32_t *groupSizeZ) { return ZE_RESULT_SUCCESS; }; uint32_t globalSizeX = {}; uint32_t globalSizeY = {}; uint32_t globalSizeZ = {}; uint32_t groupSizeX = {}; uint32_t groupSizeY = {}; uint32_t groupSizeZ = {}; prologCbs.Kernel.pfnSuggestGroupSizeCb = genericPrologCallbackPtr; epilogCbs.Kernel.pfnSuggestGroupSizeCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeKernelSuggestGroupSize_Tracing(nullptr, globalSizeX, globalSizeY, globalSizeZ, &groupSizeX, &groupSizeY, &groupSizeZ); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingKernelSetArgumentValueTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Kernel.pfnSetArgumentValue = [](ze_kernel_handle_t hKernel, uint32_t argIndex, size_t argSize, const void *pArgValue) { return ZE_RESULT_SUCCESS; }; uint32_t argIndex = {}; size_t argSize = {}; const void *pArgValue = nullptr; prologCbs.Kernel.pfnSetArgumentValueCb = genericPrologCallbackPtr; epilogCbs.Kernel.pfnSetArgumentValueCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeKernelSetArgumentValue_Tracing(nullptr, argIndex, argSize, &pArgValue); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingKernelGetPropertiesTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Kernel.pfnGetProperties = [](ze_kernel_handle_t hKernel, ze_kernel_properties_t *pKernelProperties) { return ZE_RESULT_SUCCESS; }; prologCbs.Kernel.pfnGetPropertiesCb = genericPrologCallbackPtr; epilogCbs.Kernel.pfnGetPropertiesCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeKernelGetProperties_Tracing(nullptr, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingCommandListAppendLaunchKernelTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.CommandList.pfnAppendLaunchKernel = [](ze_command_list_handle_t hCommandList, ze_kernel_handle_t hKernel, const ze_group_count_t *pLaunchFuncArgs, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return ZE_RESULT_SUCCESS; }; const ze_group_count_t pLaunchFuncArgs = {}; ze_event_handle_t hSignalEvent = {}; uint32_t numWaitEvents = {}; ze_event_handle_t phWaitEvents = {}; prologCbs.CommandList.pfnAppendLaunchKernelCb = genericPrologCallbackPtr; epilogCbs.CommandList.pfnAppendLaunchKernelCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeCommandListAppendLaunchKernel_Tracing(nullptr, nullptr, &pLaunchFuncArgs, hSignalEvent, numWaitEvents, &phWaitEvents); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingCommandListAppendLaunchKernelIndirectTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.CommandList.pfnAppendLaunchKernelIndirect = [](ze_command_list_handle_t hCommandList, ze_kernel_handle_t hKernel, const ze_group_count_t *pLaunchArgumentsBuffer, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return ZE_RESULT_SUCCESS; }; const ze_group_count_t pLaunchArgumentsBuffer = {}; ze_event_handle_t hSignalEvent = {}; uint32_t numWaitEvents = {}; ze_event_handle_t phWaitEvents = {}; prologCbs.CommandList.pfnAppendLaunchKernelIndirectCb = genericPrologCallbackPtr; epilogCbs.CommandList.pfnAppendLaunchKernelIndirectCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeCommandListAppendLaunchKernelIndirect_Tracing(nullptr, nullptr, &pLaunchArgumentsBuffer, hSignalEvent, numWaitEvents, &phWaitEvents); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingCommandListAppendLaunchMultipleKernelsIndirectTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.CommandList.pfnAppendLaunchMultipleKernelsIndirect = [](ze_command_list_handle_t hCommandList, uint32_t numKernels, ze_kernel_handle_t *phKernels, const uint32_t *pNumLaunchArguments, const ze_group_count_t *pLaunchArgumentsBuffer, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return ZE_RESULT_SUCCESS; }; uint32_t numKernels = {}; ze_kernel_handle_t phKernels = {}; const uint32_t pNumLaunchArguments = {}; const ze_group_count_t pLaunchArgumentsBuffer = {}; ze_event_handle_t hSignalEvent = {}; uint32_t numWaitEvents = {}; ze_event_handle_t phWaitEvents = {}; prologCbs.CommandList.pfnAppendLaunchMultipleKernelsIndirectCb = genericPrologCallbackPtr; epilogCbs.CommandList.pfnAppendLaunchMultipleKernelsIndirectCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeCommandListAppendLaunchMultipleKernelsIndirect_Tracing(nullptr, numKernels, &phKernels, &pNumLaunchArguments, &pLaunchArgumentsBuffer, hSignalEvent, numWaitEvents, &phWaitEvents); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingCommandListAppendLaunchCooperativeKernelTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.CommandList.pfnAppendLaunchCooperativeKernel = [](ze_command_list_handle_t hCommandList, ze_kernel_handle_t hKernel, const ze_group_count_t *pLaunchFuncArgs, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return ZE_RESULT_SUCCESS; }; prologCbs.CommandList.pfnAppendLaunchCooperativeKernelCb = genericPrologCallbackPtr; epilogCbs.CommandList.pfnAppendLaunchCooperativeKernelCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeCommandListAppendLaunchCooperativeKernel_Tracing(nullptr, nullptr, nullptr, nullptr, 1, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingModuleGetKernelNamesTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Module.pfnGetKernelNames = [](ze_module_handle_t hDevice, uint32_t *pCount, const char **pNames) { return ZE_RESULT_SUCCESS; }; prologCbs.Module.pfnGetKernelNamesCb = genericPrologCallbackPtr; epilogCbs.Module.pfnGetKernelNamesCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeModuleGetKernelNames_Tracing(nullptr, nullptr, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingKernelSuggestMaxCooperativeGroupCountTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Kernel.pfnSuggestMaxCooperativeGroupCount = [](ze_kernel_handle_t hKernel, uint32_t *totalGroupCount) { return ZE_RESULT_SUCCESS; }; prologCbs.Kernel.pfnSuggestMaxCooperativeGroupCountCb = genericPrologCallbackPtr; epilogCbs.Kernel.pfnSuggestMaxCooperativeGroupCountCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeKernelSuggestMaxCooperativeGroupCount_Tracing(nullptr, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingKernelGetIndirectAccessTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Kernel.pfnGetIndirectAccess = [](ze_kernel_handle_t hKernel, ze_kernel_indirect_access_flags_t *pFlags) { return ZE_RESULT_SUCCESS; }; prologCbs.Kernel.pfnGetIndirectAccessCb = genericPrologCallbackPtr; epilogCbs.Kernel.pfnGetIndirectAccessCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeKernelGetIndirectAccess_Tracing(nullptr, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingKernelGetNameTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Kernel.pfnGetName = [](ze_kernel_handle_t hKernel, size_t *pSize, char *pName) { return ZE_RESULT_SUCCESS; }; prologCbs.Kernel.pfnGetNameCb = genericPrologCallbackPtr; epilogCbs.Kernel.pfnGetNameCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeKernelGetName_Tracing(nullptr, nullptr, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingKernelGetSourceAttributesTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Kernel.pfnGetSourceAttributes = [](ze_kernel_handle_t hKernel, uint32_t *pSize, char **pString) { return ZE_RESULT_SUCCESS; }; prologCbs.Kernel.pfnGetSourceAttributesCb = genericPrologCallbackPtr; epilogCbs.Kernel.pfnGetSourceAttributesCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeKernelGetSourceAttributes_Tracing(nullptr, nullptr, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingKernelSetIndirectAccessTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Kernel.pfnSetIndirectAccess = [](ze_kernel_handle_t hKernel, ze_kernel_indirect_access_flags_t flags) { return ZE_RESULT_SUCCESS; }; prologCbs.Kernel.pfnSetIndirectAccessCb = genericPrologCallbackPtr; epilogCbs.Kernel.pfnSetIndirectAccessCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeKernelSetIndirectAccess_Tracing(nullptr, ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingModuleDynamicLinkTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Module.pfnDynamicLink = [](uint32_t numModules, ze_module_handle_t *phModules, ze_module_build_log_handle_t *phLinkLog) { return ZE_RESULT_SUCCESS; }; prologCbs.Module.pfnDynamicLinkCb = genericPrologCallbackPtr; epilogCbs.Module.pfnDynamicLinkCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeModuleDynamicLink_Tracing(1U, nullptr, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingModuleGetPropertiesTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Module.pfnGetProperties = [](ze_module_handle_t hModule, ze_module_properties_t *pModuleProperties) { return ZE_RESULT_SUCCESS; }; prologCbs.Module.pfnGetPropertiesCb = genericPrologCallbackPtr; epilogCbs.Module.pfnGetPropertiesCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeModuleGetProperties_Tracing(nullptr, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } } // namespace ult } // namespace L0 test_residency_api_tracing.cpp000066400000000000000000001075201422164147700365250ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/experimental/test/unit_tests/sources/tracing/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "test_api_tracing_common.h" namespace L0 { namespace ult { TEST_F(zeAPITracingRuntimeTests, WhenCallingContextCreateTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Context.pfnCreate = [](ze_driver_handle_t hContext, const ze_context_desc_t *desc, ze_context_handle_t *phContext) { return ZE_RESULT_SUCCESS; }; prologCbs.Context.pfnCreateCb = genericPrologCallbackPtr; epilogCbs.Context.pfnCreateCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeContextCreate_Tracing(nullptr, nullptr, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingContextDestroyTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Context.pfnDestroy = [](ze_context_handle_t hContext) { return ZE_RESULT_SUCCESS; }; prologCbs.Context.pfnDestroyCb = genericPrologCallbackPtr; epilogCbs.Context.pfnDestroyCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeContextDestroy_Tracing(nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingContextGetStatusTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Context.pfnGetStatus = [](ze_context_handle_t hContext) { return ZE_RESULT_SUCCESS; }; prologCbs.Context.pfnGetStatusCb = genericPrologCallbackPtr; epilogCbs.Context.pfnGetStatusCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeContextGetStatus_Tracing(nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingContextSystemBarrierTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Context.pfnSystemBarrier = [](ze_context_handle_t hContext, ze_device_handle_t hDevice) { return ZE_RESULT_SUCCESS; }; prologCbs.Context.pfnSystemBarrierCb = genericPrologCallbackPtr; epilogCbs.Context.pfnSystemBarrierCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeContextSystemBarrier_Tracing(nullptr, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingContextMakeMemoryResidentTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Context.pfnMakeMemoryResident = [](ze_context_handle_t hContext, ze_device_handle_t hDevice, void *ptr, size_t size) { return ZE_RESULT_SUCCESS; }; prologCbs.Context.pfnMakeMemoryResidentCb = genericPrologCallbackPtr; epilogCbs.Context.pfnMakeMemoryResidentCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeContextMakeMemoryResident_Tracing(nullptr, nullptr, nullptr, 1024); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingContextEvictMemoryTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Context.pfnEvictMemory = [](ze_context_handle_t hContext, ze_device_handle_t hDevice, void *ptr, size_t size) { return ZE_RESULT_SUCCESS; }; prologCbs.Context.pfnEvictMemoryCb = genericPrologCallbackPtr; epilogCbs.Context.pfnEvictMemoryCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeContextEvictMemory_Tracing(nullptr, nullptr, nullptr, 1024); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingContextMakeImageResidentTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Context.pfnMakeImageResident = [](ze_context_handle_t hContext, ze_device_handle_t hDevice, ze_image_handle_t hImage) { return ZE_RESULT_SUCCESS; }; prologCbs.Context.pfnMakeImageResidentCb = genericPrologCallbackPtr; epilogCbs.Context.pfnMakeImageResidentCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeContextMakeImageResident_Tracing(nullptr, nullptr, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingContextEvictImageTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Context.pfnEvictImage = [](ze_context_handle_t hContext, ze_device_handle_t hDevice, ze_image_handle_t hImage) { return ZE_RESULT_SUCCESS; }; prologCbs.Context.pfnEvictImageCb = genericPrologCallbackPtr; epilogCbs.Context.pfnEvictImageCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeContextEvictImage_Tracing(nullptr, nullptr, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } struct { ze_context_handle_t hContext0; ze_device_handle_t hDevice0; void *ptr0; size_t size0; ze_context_handle_t hContext1; ze_device_handle_t hDevice1; void *ptr1; size_t size1; void *instanceData0; void *instanceData3; } MakeMemoryResident_args; TEST_F(zeAPITracingRuntimeMultipleArgumentsTests, WhenCallingContextMakeMemoryResidentTracingWrapperWithMultiplePrologEpilogsThenReturnSuccess) { ze_result_t result; // initialize initial argument set MakeMemoryResident_args.hContext0 = generateRandomHandle(); MakeMemoryResident_args.hDevice0 = generateRandomHandle(); MakeMemoryResident_args.ptr0 = generateRandomHandle(); MakeMemoryResident_args.size0 = generateRandomSize(); // initialize replacement argument set MakeMemoryResident_args.hContext1 = generateRandomHandle(); MakeMemoryResident_args.hDevice1 = generateRandomHandle(); MakeMemoryResident_args.ptr1 = generateRandomHandle(); MakeMemoryResident_args.size1 = generateRandomSize(); // initialize user instance data MakeMemoryResident_args.instanceData0 = generateRandomHandle(); MakeMemoryResident_args.instanceData3 = generateRandomHandle(); // arguments are expeted to be passed in from first prolog callback driver_ddiTable.core_ddiTable.Context.pfnMakeMemoryResident = [](ze_context_handle_t hContext, ze_device_handle_t hDevice, void *ptr, size_t size) { EXPECT_EQ(hContext, MakeMemoryResident_args.hContext1); EXPECT_EQ(hDevice, MakeMemoryResident_args.hDevice1); EXPECT_EQ(ptr, MakeMemoryResident_args.ptr1); EXPECT_EQ(size, MakeMemoryResident_args.size1); return ZE_RESULT_SUCCESS; }; // // The 0th prolog replaces the orignal API arguments with a new set // Allocate instance data, pass it to corresponding epilog. // prologCbs0.Context.pfnMakeMemoryResidentCb = [](ze_context_make_memory_resident_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(*params->phContext, MakeMemoryResident_args.hContext0); EXPECT_EQ(*params->phDevice, MakeMemoryResident_args.hDevice0); EXPECT_EQ(*params->pptr, MakeMemoryResident_args.ptr0); EXPECT_EQ(*params->psize, MakeMemoryResident_args.size0); *params->phContext = MakeMemoryResident_args.hContext1; *params->phDevice = MakeMemoryResident_args.hDevice1; *params->pptr = MakeMemoryResident_args.ptr1; *params->psize = MakeMemoryResident_args.size1; ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 1); *val += 1; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = MakeMemoryResident_args.instanceData0; *ppTracerInstanceUserData = instanceData; }; // // The 0th epilog expects to see the API argument replacements // Expect to receive instance data from corresponding prolog // epilogCbs0.Context.pfnMakeMemoryResidentCb = [](ze_context_make_memory_resident_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(*params->phContext, MakeMemoryResident_args.hContext1); EXPECT_EQ(*params->phDevice, MakeMemoryResident_args.hDevice1); EXPECT_EQ(*params->pptr, MakeMemoryResident_args.ptr1); EXPECT_EQ(*params->psize, MakeMemoryResident_args.size1); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 2); *val += 1; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, MakeMemoryResident_args.instanceData0); delete instanceData; }; // // The 1st prolog sees the arguments as replaced by the 0th prolog. // There is no epilog for this prolog, so don't allocate instance data // prologCbs1.Context.pfnMakeMemoryResidentCb = [](ze_context_make_memory_resident_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(*params->phContext, MakeMemoryResident_args.hContext1); EXPECT_EQ(*params->phDevice, MakeMemoryResident_args.hDevice1); EXPECT_EQ(*params->pptr, MakeMemoryResident_args.ptr1); EXPECT_EQ(*params->psize, MakeMemoryResident_args.size1); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 11); *val += 11; }; // // The 2nd epilog expects to see the API argument replacements // There is no corresponding prolog, so there is no instance data // epilogCbs2.Context.pfnMakeMemoryResidentCb = [](ze_context_make_memory_resident_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(*params->phContext, MakeMemoryResident_args.hContext1); EXPECT_EQ(*params->phDevice, MakeMemoryResident_args.hDevice1); EXPECT_EQ(*params->pptr, MakeMemoryResident_args.ptr1); EXPECT_EQ(*params->psize, MakeMemoryResident_args.size1); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 21); *val += 21; }; // // The 3rd prolog expects to see the API argument replacements and doesn't modify them // Allocate instance data and pass to corresponding epilog // prologCbs3.Context.pfnMakeMemoryResidentCb = [](ze_context_make_memory_resident_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(*params->phContext, MakeMemoryResident_args.hContext1); EXPECT_EQ(*params->phDevice, MakeMemoryResident_args.hDevice1); EXPECT_EQ(*params->pptr, MakeMemoryResident_args.ptr1); EXPECT_EQ(*params->psize, MakeMemoryResident_args.size1); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 31); *val += 31; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = MakeMemoryResident_args.instanceData3; *ppTracerInstanceUserData = instanceData; }; // // The 3rd prolog expects to see the API argument replacements and doesn't modify them // Create instance data and pass to corresponding epilog // epilogCbs3.Context.pfnMakeMemoryResidentCb = [](ze_context_make_memory_resident_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(*params->phContext, MakeMemoryResident_args.hContext1); EXPECT_EQ(*params->phDevice, MakeMemoryResident_args.hDevice1); EXPECT_EQ(*params->pptr, MakeMemoryResident_args.ptr1); EXPECT_EQ(*params->psize, MakeMemoryResident_args.size1); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 62); *val += 31; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, MakeMemoryResident_args.instanceData3); delete instanceData; }; setTracerCallbacksAndEnableTracer(); result = zeContextMakeMemoryResident_Tracing(MakeMemoryResident_args.hContext0, MakeMemoryResident_args.hDevice0, MakeMemoryResident_args.ptr0, MakeMemoryResident_args.size0); EXPECT_EQ(ZE_RESULT_SUCCESS, result); validateDefaultUserDataFinal(); } struct { ze_context_handle_t hContext0; ze_device_handle_t hDevice0; void *ptr0; size_t size0; ze_context_handle_t hContext1; ze_device_handle_t hDevice1; void *ptr1; size_t size1; void *instanceData0; void *instanceData3; } EvictMemory_args; TEST_F(zeAPITracingRuntimeMultipleArgumentsTests, WhenCallingContextEvictMemoryTracingWrapperWithMultiplePrologEpilogsThenReturnSuccess) { ze_result_t result; // initialize initial argument set EvictMemory_args.hContext0 = generateRandomHandle(); EvictMemory_args.hDevice0 = generateRandomHandle(); EvictMemory_args.ptr0 = generateRandomHandle(); EvictMemory_args.size0 = generateRandomSize(); // initialize replacement argument set EvictMemory_args.hContext1 = generateRandomHandle(); EvictMemory_args.hDevice1 = generateRandomHandle(); EvictMemory_args.ptr1 = generateRandomHandle(); EvictMemory_args.size1 = generateRandomSize(); // initialize user instance data EvictMemory_args.instanceData0 = generateRandomHandle(); EvictMemory_args.instanceData3 = generateRandomHandle(); driver_ddiTable.core_ddiTable.Context.pfnEvictMemory = [](ze_context_handle_t hContext, ze_device_handle_t hDevice, void *ptr, size_t size) { EXPECT_EQ(hContext, EvictMemory_args.hContext1); EXPECT_EQ(hDevice, EvictMemory_args.hDevice1); EXPECT_EQ(ptr, EvictMemory_args.ptr1); EXPECT_EQ(size, EvictMemory_args.size1); return ZE_RESULT_SUCCESS; }; // // The 0th prolog replaces the orignal API arguments with a new set // Create instance data, pass it to corresponding epilog. // prologCbs0.Context.pfnEvictMemoryCb = [](ze_context_evict_memory_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(EvictMemory_args.hContext0, *params->phContext); EXPECT_EQ(EvictMemory_args.hDevice0, *params->phDevice); EXPECT_EQ(EvictMemory_args.ptr0, *params->pptr); EXPECT_EQ(EvictMemory_args.size0, *params->psize); *params->phContext = EvictMemory_args.hContext1; *params->phDevice = EvictMemory_args.hDevice1; *params->pptr = EvictMemory_args.ptr1; *params->psize = EvictMemory_args.size1; ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 1); *val += 1; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = EvictMemory_args.instanceData0; *ppTracerInstanceUserData = instanceData; }; // // The 0th epilog expects to see the API argument replacements // Expect to receive instance data from corresponding prolog // epilogCbs0.Context.pfnEvictMemoryCb = [](ze_context_evict_memory_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(*params->phContext, EvictMemory_args.hContext1); EXPECT_EQ(*params->phDevice, EvictMemory_args.hDevice1); EXPECT_EQ(*params->pptr, EvictMemory_args.ptr1); EXPECT_EQ(*params->psize, EvictMemory_args.size1); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 2); *val += 1; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, EvictMemory_args.instanceData0); delete instanceData; }; // // The 1st prolog sees the arguments as replaced by the 0th prolog. // There is no epilog for this prolog, so don't allocate instance data // prologCbs1.Context.pfnEvictMemoryCb = [](ze_context_evict_memory_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(*params->phContext, EvictMemory_args.hContext1); EXPECT_EQ(*params->phDevice, EvictMemory_args.hDevice1); EXPECT_EQ(*params->pptr, EvictMemory_args.ptr1); EXPECT_EQ(*params->psize, EvictMemory_args.size1); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 11); *val += 11; }; // // The 2nd epilog expects to see the API argument replacements // There is no corresponding prolog, so there is no instance data // epilogCbs2.Context.pfnEvictMemoryCb = [](ze_context_evict_memory_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(*params->phContext, EvictMemory_args.hContext1); EXPECT_EQ(*params->phDevice, EvictMemory_args.hDevice1); EXPECT_EQ(*params->pptr, EvictMemory_args.ptr1); EXPECT_EQ(*params->psize, EvictMemory_args.size1); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 21); *val += 21; }; // // The 3rd prolog expects to see the API argument replacements and doesn't modify them // Allocate instance data and pass to corresponding epilog // prologCbs3.Context.pfnEvictMemoryCb = [](ze_context_evict_memory_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(*params->phContext, EvictMemory_args.hContext1); EXPECT_EQ(*params->phDevice, EvictMemory_args.hDevice1); EXPECT_EQ(*params->pptr, EvictMemory_args.ptr1); EXPECT_EQ(*params->psize, EvictMemory_args.size1); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 31); *val += 31; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = EvictMemory_args.instanceData3; *ppTracerInstanceUserData = instanceData; }; epilogCbs3.Context.pfnEvictMemoryCb = [](ze_context_evict_memory_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(*params->phContext, EvictMemory_args.hContext1); EXPECT_EQ(*params->phDevice, EvictMemory_args.hDevice1); EXPECT_EQ(*params->pptr, EvictMemory_args.ptr1); EXPECT_EQ(*params->psize, EvictMemory_args.size1); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 62); *val += 31; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, EvictMemory_args.instanceData3); delete instanceData; }; setTracerCallbacksAndEnableTracer(); result = zeContextEvictMemory_Tracing(EvictMemory_args.hContext0, EvictMemory_args.hDevice0, EvictMemory_args.ptr0, EvictMemory_args.size0); EXPECT_EQ(ZE_RESULT_SUCCESS, result); validateDefaultUserDataFinal(); } struct { ze_context_handle_t hContext0; ze_device_handle_t hDevice0; ze_image_handle_t hImage0; ze_context_handle_t hContext1; ze_device_handle_t hDevice1; ze_image_handle_t hImage1; void *instanceData0; void *instanceData3; } MakeImageResident_args; TEST_F(zeAPITracingRuntimeMultipleArgumentsTests, WhenCallingContextMakeImageResidentTracingWrapperWithMultiplePrologEpilogsThenReturnSuccess) { ze_result_t result; // initialize initial argument set MakeImageResident_args.hContext0 = generateRandomHandle(); MakeImageResident_args.hDevice0 = generateRandomHandle(); MakeImageResident_args.hImage0 = generateRandomHandle(); // initialize replacement argument set MakeImageResident_args.hContext1 = generateRandomHandle(); MakeImageResident_args.hDevice1 = generateRandomHandle(); MakeImageResident_args.hImage1 = generateRandomHandle(); // initialize user instance data MakeImageResident_args.instanceData0 = generateRandomHandle(); MakeImageResident_args.instanceData3 = generateRandomHandle(); // arguments are expeted to be passed in from first prolog callback driver_ddiTable.core_ddiTable.Context.pfnMakeImageResident = [](ze_context_handle_t hContext, ze_device_handle_t hDevice, ze_image_handle_t hImage) { EXPECT_EQ(hContext, MakeImageResident_args.hContext1); EXPECT_EQ(hDevice, MakeImageResident_args.hDevice1); EXPECT_EQ(hImage, MakeImageResident_args.hImage1); return ZE_RESULT_SUCCESS; }; // // The 0th prolog replaces the orignal API arguments with a new set // Allocate instance data, pass it to corresponding epilog. // prologCbs0.Context.pfnMakeImageResidentCb = [](ze_context_make_image_resident_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(*params->phContext, MakeImageResident_args.hContext0); EXPECT_EQ(*params->phDevice, MakeImageResident_args.hDevice0); EXPECT_EQ(*params->phImage, MakeImageResident_args.hImage0); *params->phContext = MakeImageResident_args.hContext1; *params->phDevice = MakeImageResident_args.hDevice1; *params->phImage = MakeImageResident_args.hImage1; ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 1); *val += 1; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = MakeImageResident_args.instanceData0; *ppTracerInstanceUserData = instanceData; }; // // The 0th epilog expects to see the API argument replacements // Expect to receive instance data from corresponding prolog // epilogCbs0.Context.pfnMakeImageResidentCb = [](ze_context_make_image_resident_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(*params->phContext, MakeImageResident_args.hContext1); EXPECT_EQ(*params->phDevice, MakeImageResident_args.hDevice1); EXPECT_EQ(*params->phImage, MakeImageResident_args.hImage1); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 2); *val += 1; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, MakeImageResident_args.instanceData0); delete instanceData; }; // // The 1st prolog sees the arguments as replaced by the 0th prolog. // There is no epilog for this prolog, so don't allocate instance data // prologCbs1.Context.pfnMakeImageResidentCb = [](ze_context_make_image_resident_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(*params->phContext, MakeImageResident_args.hContext1); EXPECT_EQ(*params->phDevice, MakeImageResident_args.hDevice1); EXPECT_EQ(*params->phImage, MakeImageResident_args.hImage1); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 11); *val += 11; }; // // The 2nd epilog expects to see the API argument replacements // There is no corresponding prolog, so there is no instance data // epilogCbs2.Context.pfnMakeImageResidentCb = [](ze_context_make_image_resident_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(*params->phContext, MakeImageResident_args.hContext1); EXPECT_EQ(*params->phDevice, MakeImageResident_args.hDevice1); EXPECT_EQ(*params->phImage, MakeImageResident_args.hImage1); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 21); *val += 21; }; // // The 3rd prolog expects to see the API argument replacements and doesn't modify them // Allocate instance data and pass to corresponding epilog // prologCbs3.Context.pfnMakeImageResidentCb = [](ze_context_make_image_resident_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(*params->phContext, MakeImageResident_args.hContext1); EXPECT_EQ(*params->phDevice, MakeImageResident_args.hDevice1); EXPECT_EQ(*params->phImage, MakeImageResident_args.hImage1); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 31); *val += 31; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = MakeImageResident_args.instanceData3; *ppTracerInstanceUserData = instanceData; }; // // The 3rd prolog expects to see the API argument replacements and doesn't modify them // Create instance data and pass to corresponding epilog // epilogCbs3.Context.pfnMakeImageResidentCb = [](ze_context_make_image_resident_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(*params->phDevice, MakeImageResident_args.hDevice1); EXPECT_EQ(*params->phContext, MakeImageResident_args.hContext1); EXPECT_EQ(*params->phImage, MakeImageResident_args.hImage1); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 62); *val += 31; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, MakeImageResident_args.instanceData3); delete instanceData; }; setTracerCallbacksAndEnableTracer(); result = zeContextMakeImageResident_Tracing(MakeImageResident_args.hContext0, MakeImageResident_args.hDevice0, MakeImageResident_args.hImage0); EXPECT_EQ(ZE_RESULT_SUCCESS, result); validateDefaultUserDataFinal(); } struct { ze_context_handle_t hContext0; ze_device_handle_t hDevice0; ze_image_handle_t hImage0; ze_context_handle_t hContext1; ze_device_handle_t hDevice1; ze_image_handle_t hImage1; void *instanceData0; void *instanceData3; } EvictImage_args; TEST_F(zeAPITracingRuntimeMultipleArgumentsTests, WhenCallingContextMakeImageResidentTracingWrapperWithMultiplefPrologEpilogsThenReturnSuccess) { ze_result_t result; // initialize initial argument set EvictImage_args.hContext0 = generateRandomHandle(); EvictImage_args.hDevice0 = generateRandomHandle(); EvictImage_args.hImage0 = generateRandomHandle(); // initialize replacement argument set EvictImage_args.hContext1 = generateRandomHandle(); EvictImage_args.hDevice1 = generateRandomHandle(); EvictImage_args.hImage1 = generateRandomHandle(); // initialize user instance data EvictImage_args.instanceData0 = generateRandomHandle(); EvictImage_args.instanceData3 = generateRandomHandle(); // arguments are expeted to be passed in from first prolog callback driver_ddiTable.core_ddiTable.Context.pfnMakeImageResident = [](ze_context_handle_t hContext, ze_device_handle_t hDevice, ze_image_handle_t hImage) { EXPECT_EQ(hContext, EvictImage_args.hContext1); EXPECT_EQ(hDevice, EvictImage_args.hDevice1); EXPECT_EQ(hImage, EvictImage_args.hImage1); return ZE_RESULT_SUCCESS; }; // // The 0th prolog replaces the orignal API arguments with a new set // Allocate instance data, pass it to corresponding epilog. // prologCbs0.Context.pfnMakeImageResidentCb = [](ze_context_make_image_resident_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(*params->phContext, EvictImage_args.hContext0); EXPECT_EQ(*params->phDevice, EvictImage_args.hDevice0); EXPECT_EQ(*params->phImage, EvictImage_args.hImage0); *params->phContext = EvictImage_args.hContext1; *params->phDevice = EvictImage_args.hDevice1; *params->phImage = EvictImage_args.hImage1; ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 1); *val += 1; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = EvictImage_args.instanceData0; *ppTracerInstanceUserData = instanceData; }; // // The 0th epilog expects to see the API argument replacements // Expect to receive instance data from corresponding prolog // epilogCbs0.Context.pfnMakeImageResidentCb = [](ze_context_make_image_resident_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(*params->phContext, EvictImage_args.hContext1); EXPECT_EQ(*params->phDevice, EvictImage_args.hDevice1); EXPECT_EQ(*params->phImage, EvictImage_args.hImage1); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 2); *val += 1; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, EvictImage_args.instanceData0); delete instanceData; }; // // The 1st prolog sees the arguments as replaced by the 0th prolog. // There is no epilog for this prolog, so don't allocate instance data // prologCbs1.Context.pfnMakeImageResidentCb = [](ze_context_make_image_resident_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(*params->phContext, EvictImage_args.hContext1); EXPECT_EQ(*params->phDevice, EvictImage_args.hDevice1); EXPECT_EQ(*params->phImage, EvictImage_args.hImage1); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 11); *val += 11; }; // // The 2nd epilog expects to see the API argument replacements // There is no corresponding prolog, so there is no instance data // epilogCbs2.Context.pfnMakeImageResidentCb = [](ze_context_make_image_resident_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(*params->phContext, EvictImage_args.hContext1); EXPECT_EQ(*params->phDevice, EvictImage_args.hDevice1); EXPECT_EQ(*params->phImage, EvictImage_args.hImage1); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 21); *val += 21; }; // // The 3rd prolog expects to see the API argument replacements and doesn't modify them // Allocate instance data and pass to corresponding epilog // prologCbs3.Context.pfnMakeImageResidentCb = [](ze_context_make_image_resident_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(*params->phContext, EvictImage_args.hContext1); EXPECT_EQ(*params->phDevice, EvictImage_args.hDevice1); EXPECT_EQ(*params->phImage, EvictImage_args.hImage1); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 31); *val += 31; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = EvictImage_args.instanceData3; *ppTracerInstanceUserData = instanceData; }; // // The 3rd prolog expects to see the API argument replacements and doesn't modify them // Create instance data and pass to corresponding epilog // epilogCbs3.Context.pfnMakeImageResidentCb = [](ze_context_make_image_resident_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(*params->phContext, EvictImage_args.hContext1); EXPECT_EQ(*params->phDevice, EvictImage_args.hDevice1); EXPECT_EQ(*params->phImage, EvictImage_args.hImage1); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 62); *val += 31; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, EvictImage_args.instanceData3); delete instanceData; }; setTracerCallbacksAndEnableTracer(); result = zeContextMakeImageResident_Tracing(EvictImage_args.hContext0, EvictImage_args.hDevice0, EvictImage_args.hImage0); EXPECT_EQ(ZE_RESULT_SUCCESS, result); validateDefaultUserDataFinal(); } } // namespace ult } // namespace L0 test_sampler_api_tracing.cpp000066400000000000000000000435771422164147700362160ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/experimental/test/unit_tests/sources/tracing/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "test_api_tracing_common.h" namespace L0 { namespace ult { TEST_F(zeAPITracingRuntimeTests, WhenCallingSamplerCreateTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Sampler.pfnCreate = [](ze_context_handle_t hContext, ze_device_handle_t hDevice, const ze_sampler_desc_t *pDesc, ze_sampler_handle_t *phSampler) { return ZE_RESULT_SUCCESS; }; prologCbs.Sampler.pfnCreateCb = genericPrologCallbackPtr; epilogCbs.Sampler.pfnCreateCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeSamplerCreate_Tracing(nullptr, nullptr, nullptr, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } TEST_F(zeAPITracingRuntimeTests, WhenCallingSamplerDestroyTracingWrapperWithOneSetOfPrologEpilogsThenReturnSuccess) { ze_result_t result; driver_ddiTable.core_ddiTable.Sampler.pfnDestroy = [](ze_sampler_handle_t hSampler) { return ZE_RESULT_SUCCESS; }; prologCbs.Sampler.pfnDestroyCb = genericPrologCallbackPtr; epilogCbs.Sampler.pfnDestroyCb = genericEpilogCallbackPtr; setTracerCallbacksAndEnableTracer(); result = zeSamplerDestroy_Tracing(nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(defaultUserData, 1); } struct { ze_context_handle_t hContext0; ze_device_handle_t hDevice0; ze_sampler_desc_t Desc0; ze_sampler_handle_t hSampler0; ze_context_handle_t hContext1; ze_device_handle_t hDevice1; ze_sampler_desc_t Desc1; ze_sampler_handle_t hSampler1; ze_sampler_handle_t hSamplerAPI; void *instanceData0; void *instanceData3; } sampler_create_args; TEST_F(zeAPITracingRuntimeMultipleArgumentsTests, WhenCallingSamplerCreateTracingWrapperWithTwoSetsOfPrologEpilogsCheckArgumentsThenReturnSuccess) { ze_result_t result; // initialize initial argument set sampler_create_args.hContext0 = generateRandomHandle(); sampler_create_args.hDevice0 = generateRandomHandle(); sampler_create_args.hSampler0 = generateRandomHandle(); // initialize replacement argument set sampler_create_args.hContext1 = generateRandomHandle(); sampler_create_args.hDevice1 = generateRandomHandle(); sampler_create_args.hSampler1 = generateRandomHandle(); // initialize user instance data sampler_create_args.instanceData0 = generateRandomHandle(); sampler_create_args.instanceData3 = generateRandomHandle(); // Arguments are expected to be passed in by the first prolog callback driver_ddiTable.core_ddiTable.Sampler.pfnCreate = [](ze_context_handle_t hContext, ze_device_handle_t hDevice, const ze_sampler_desc_t *pDesc, ze_sampler_handle_t *phSampler) { EXPECT_EQ(sampler_create_args.hContext1, hContext); EXPECT_EQ(sampler_create_args.hDevice1, hDevice); EXPECT_EQ(&sampler_create_args.Desc1, pDesc); EXPECT_EQ(&sampler_create_args.hSampler1, phSampler); EXPECT_EQ(sampler_create_args.hSampler1, *phSampler); sampler_create_args.hSamplerAPI = generateRandomHandle(); *phSampler = sampler_create_args.hSamplerAPI; return ZE_RESULT_SUCCESS; }; // // The 0th prolog replaces the orignal API arguments with a new set // Create instance data, pass it to corresponding epilog. // prologCbs0.Sampler.pfnCreateCb = [](ze_sampler_create_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(sampler_create_args.hContext0, *params->phContext); EXPECT_EQ(sampler_create_args.hDevice0, *params->phDevice); EXPECT_EQ(&sampler_create_args.Desc0, *params->pdesc); EXPECT_EQ(&sampler_create_args.hSampler0, *params->pphSampler); ze_sampler_handle_t **ppHandle; ASSERT_NE(nullptr, params); ppHandle = params->pphSampler; ze_sampler_handle_t *pHandle; ASSERT_NE(nullptr, ppHandle); pHandle = *ppHandle; ze_sampler_handle_t handle; ASSERT_NE(nullptr, pHandle); handle = *pHandle; EXPECT_EQ(sampler_create_args.hSampler0, handle); *params->phContext = sampler_create_args.hContext1; *params->phDevice = sampler_create_args.hDevice1; *params->pdesc = &sampler_create_args.Desc1; *params->pphSampler = &sampler_create_args.hSampler1; ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 1); *val += 1; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = sampler_create_args.instanceData0; *ppTracerInstanceUserData = instanceData; }; // // The 0th epilog expects to see the API argument replacements // Expect to receive instance data from corresponding prolog // epilogCbs0.Sampler.pfnCreateCb = [](ze_sampler_create_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(sampler_create_args.hContext1, *params->phContext); EXPECT_EQ(sampler_create_args.hDevice1, *params->phDevice); EXPECT_EQ(&sampler_create_args.Desc1, *params->pdesc); EXPECT_EQ(&sampler_create_args.hSampler1, *params->pphSampler); ze_sampler_handle_t **ppHandle; ASSERT_NE(nullptr, params); ppHandle = params->pphSampler; ze_sampler_handle_t *pHandle; ASSERT_NE(nullptr, ppHandle); pHandle = *ppHandle; ze_sampler_handle_t handle; ASSERT_NE(nullptr, pHandle); handle = *pHandle; EXPECT_EQ(sampler_create_args.hSampler1, handle); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 2); *val += 1; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, sampler_create_args.instanceData0); delete instanceData; }; // // The 1st prolog sees the arguments as replaced by the 0th prolog. // There is no epilog for this prolog, so don't allocate instance data // prologCbs1.Sampler.pfnCreateCb = [](ze_sampler_create_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(sampler_create_args.hContext1, *params->phContext); EXPECT_EQ(sampler_create_args.hDevice1, *params->phDevice); EXPECT_EQ(&sampler_create_args.Desc1, *params->pdesc); EXPECT_EQ(&sampler_create_args.hSampler1, *params->pphSampler); ze_sampler_handle_t **ppHandle; ASSERT_NE(nullptr, params); ppHandle = params->pphSampler; ze_sampler_handle_t *pHandle; ASSERT_NE(nullptr, ppHandle); pHandle = *ppHandle; ze_sampler_handle_t handle; ASSERT_NE(nullptr, pHandle); handle = *pHandle; EXPECT_EQ(sampler_create_args.hSampler1, handle); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 11); *val += 11; }; // // The 2nd epilog expects to see the API argument replacements // There is no corresponding prolog, so there is no instance data // epilogCbs2.Sampler.pfnCreateCb = [](ze_sampler_create_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(sampler_create_args.hContext1, *params->phContext); EXPECT_EQ(sampler_create_args.hDevice1, *params->phDevice); EXPECT_EQ(&sampler_create_args.Desc1, *params->pdesc); EXPECT_EQ(&sampler_create_args.hSampler1, *params->pphSampler); ze_sampler_handle_t **ppHandle; ASSERT_NE(nullptr, params); ppHandle = params->pphSampler; ze_sampler_handle_t *pHandle; ASSERT_NE(nullptr, ppHandle); pHandle = *ppHandle; ze_sampler_handle_t handle; ASSERT_NE(nullptr, pHandle); handle = *pHandle; EXPECT_EQ(sampler_create_args.hSampler1, handle); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 21); *val += 21; }; // // The 3rd prolog expects to see the API argument replacements and doesn't modify them // Create instance data and pass to corresponding epilog // prologCbs3.Sampler.pfnCreateCb = [](ze_sampler_create_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(sampler_create_args.hContext1, *params->phContext); EXPECT_EQ(sampler_create_args.hDevice1, *params->phDevice); EXPECT_EQ(&sampler_create_args.Desc1, *params->pdesc); EXPECT_EQ(&sampler_create_args.hSampler1, *params->pphSampler); ze_sampler_handle_t **ppHandle; ASSERT_NE(nullptr, params); ppHandle = params->pphSampler; ze_sampler_handle_t *pHandle; ASSERT_NE(nullptr, ppHandle); pHandle = *ppHandle; ze_sampler_handle_t handle; ASSERT_NE(nullptr, pHandle); handle = *pHandle; EXPECT_EQ(sampler_create_args.hSampler1, handle); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 31); *val += 31; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = sampler_create_args.instanceData3; *ppTracerInstanceUserData = instanceData; }; // // The 3rd epilog expects to see the API argument replacements // Expect to see instance data from corresponding prolog // epilogCbs3.Sampler.pfnCreateCb = [](ze_sampler_create_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(sampler_create_args.hContext1, *params->phContext); EXPECT_EQ(sampler_create_args.hDevice1, *params->phDevice); EXPECT_EQ(&sampler_create_args.Desc1, *params->pdesc); EXPECT_EQ(&sampler_create_args.hSampler1, *params->pphSampler); ze_sampler_handle_t **ppHandle; ASSERT_NE(nullptr, params); ppHandle = params->pphSampler; ze_sampler_handle_t *pHandle; ASSERT_NE(nullptr, ppHandle); pHandle = *ppHandle; ze_sampler_handle_t handle; ASSERT_NE(nullptr, pHandle); handle = *pHandle; EXPECT_EQ(sampler_create_args.hSampler1, handle); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 62); *val += 31; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, sampler_create_args.instanceData3); delete instanceData; }; setTracerCallbacksAndEnableTracer(); result = zeSamplerCreate_Tracing(sampler_create_args.hContext0, sampler_create_args.hDevice0, &sampler_create_args.Desc0, &sampler_create_args.hSampler0); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(sampler_create_args.hSampler1, sampler_create_args.hSamplerAPI); validateDefaultUserDataFinal(); } struct { ze_sampler_handle_t hSampler0; ze_sampler_handle_t hSampler1; void *instanceData0; void *instanceData3; } sampler_destroy_args; TEST_F(zeAPITracingRuntimeMultipleArgumentsTests, WhenCallingSamplerDestroyTracingWrapperWithTwoSetsOfPrologEpilogsCheckArgumentsThenReturnSuccess) { ze_result_t result; // initialize initial argument set sampler_destroy_args.hSampler0 = generateRandomHandle(); // initialize replacement argument set sampler_destroy_args.hSampler1 = generateRandomHandle(); // initialize user instance data sampler_destroy_args.instanceData0 = generateRandomHandle(); sampler_destroy_args.instanceData3 = generateRandomHandle(); // Arguments are expected to be passed in by the first prolog callback driver_ddiTable.core_ddiTable.Sampler.pfnDestroy = [](ze_sampler_handle_t hSampler) { EXPECT_EQ(sampler_destroy_args.hSampler1, hSampler); return ZE_RESULT_SUCCESS; }; // // The 0th prolog replaces the orignal API arguments with a new set // Allocate instance data, pass it to corresponding epilog. // prologCbs0.Sampler.pfnDestroyCb = [](ze_sampler_destroy_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(sampler_destroy_args.hSampler0, *params->phSampler); *params->phSampler = sampler_destroy_args.hSampler1; ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 1); *val += 1; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = sampler_destroy_args.instanceData0; *ppTracerInstanceUserData = instanceData; }; // // The 0th epilog expects to see the API argument replacements // Expect to receive instance data from corresponding prolog // epilogCbs0.Sampler.pfnDestroyCb = [](ze_sampler_destroy_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(sampler_destroy_args.hSampler1, *params->phSampler); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 2); *val += 1; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, sampler_destroy_args.instanceData0); delete instanceData; }; // // The 1st prolog sees the arguments as replaced by the 0th prolog. // There is no epilog for this prolog, so don't allocate instance data // prologCbs1.Sampler.pfnDestroyCb = [](ze_sampler_destroy_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(sampler_destroy_args.hSampler1, *params->phSampler); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 11); *val += 11; }; // // The 2nd epilog expects to see the API argument replacements // There is no corresponding prolog, so there is no instance data // epilogCbs2.Sampler.pfnDestroyCb = [](ze_sampler_destroy_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(sampler_destroy_args.hSampler1, *params->phSampler); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 21); *val += 21; }; // // The 3rd prolog expects to see the API argument replacements and doesn't modify them // Allocate instance data and pass to corresponding epilog // prologCbs3.Sampler.pfnDestroyCb = [](ze_sampler_destroy_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { EXPECT_EQ(sampler_destroy_args.hSampler1, *params->phSampler); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 31); *val += 31; struct instanceDataStruct *instanceData = new struct instanceDataStruct; instanceData->instanceDataValue = sampler_destroy_args.instanceData3; *ppTracerInstanceUserData = instanceData; }; // // The 3rd epilog expects to see the API argument replacements // Expect to see instance data from corresponding prolog // epilogCbs3.Sampler.pfnDestroyCb = [](ze_sampler_destroy_params_t *params, ze_result_t result, void *pTracerUserData, void **ppTracerInstanceUserData) { struct instanceDataStruct *instanceData; EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(sampler_destroy_args.hSampler1, *params->phSampler); ASSERT_NE(nullptr, pTracerUserData); int *val = static_cast(pTracerUserData); EXPECT_EQ(*val, 62); *val += 31; instanceData = (struct instanceDataStruct *)*ppTracerInstanceUserData; EXPECT_EQ(instanceData->instanceDataValue, sampler_destroy_args.instanceData3); delete instanceData; }; setTracerCallbacksAndEnableTracer(); result = zeSamplerDestroy_Tracing(sampler_destroy_args.hSampler0); EXPECT_EQ(ZE_RESULT_SUCCESS, result); validateDefaultUserDataFinal(); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/os_release_info.cmake000066400000000000000000000133061422164147700236610ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(NOT DEFINED _os_release_info) set(_os_release_info TRUE) # os_release_info.cmake - Function to dump OS name and version # This file has no dependencies on other files (e.g., functions or definitions) # of the local cmake environment. # Set cmake policies for at least this level: cmake_minimum_required(VERSION 3.2.0) # Function get_os_release_info - Determine and return OS name and version # # Args: # 1. the name of a variable to receive os_name # 2. the name of a variable to receive os_version # # Return values: (Quotation marks are always stripped). # Upon failure, return values are null strings. # # Examples: # os_name os_version # -------------- ------- # clear-linux-os 21180 (Changes twice daily) # ubuntu 12.04 16.04 17.10 18.04 # fedora 27 # centos 6.9 7.4.1708 # # Potential sources are tried (in order of preference) until a # suitable one is found. # Implementation documentation: # # The potential sources, in order, are as follows. # - /etc/centos-release # Centos 7 also has /etc/os-release. File /etc/os-release is less # precise about the Centos version (e.g., "7" instead of "7.4.1708"). # For that reason, this file is checked first. # Examples: # CentOS release 6.9 (Final) # CentOS Linux release 7.4.1708 (Core) # - /usr/lib/os-release # Present for Clear Linux, modern Fedora, and Ubuntu since some time # between 14.04 and 16.04. The ID and VERSION_ID values are used. # Examples: # ID=clear-linux-os VERSION_ID=21180 # ID=fedora VERSION_ID=27 # ID=ubuntu VERSION_ID="14.04" # ID=ubuntu VERSION_ID="16.04" # ID="ubuntu" VERSION_ID="17.10" # - /etc/os-release - Same form as (sometimes a link to) /usr/lib/os-release # ID="Ubuntu" VERSION_ID="12.04" # ID="Ubuntu" VERSION_ID="14.04" # with a symbolic link: /etc/os-release -> ../usr/lib/os-release # ID="CentOS Linux" VERSION_ID="7" Also: ID_LIKE="rhel fedora" # - /etc/lsb-release # For Centos, not too meaningful. # Other "OS"s are more reasonable: # DISTRIB_ID=Ubuntu DISTRIB_RELEASE=12.04 # DISTRIB_ID=Ubuntu DISTRIB_RELEASE=14.04 # DISTRIB_ID=Ubuntu DISTRIB_RELEASE=17.10 function(get_os_release_info _vn_id _vn_version_id _vn_codename) set(_var_id "") set(_var_version_id "") set(_var_codename "") if("${_var_id}" STREQUAL "") set(file_path "/etc/centos-release") if(EXISTS "${file_path}") # Example: CentOS release 6.9 (Final) file(STRINGS "${file_path}" file_list LIMIT_COUNT 1) list(GET file_list 0 file_line) # Remove all parenthesized items. string(REGEX REPLACE "\\([^)]+\\)" "" file_line "${file_line}") # Extract start and end, discard optional "version" or "release" string(REGEX MATCH "^([A-Za-z0-9_]+)( +(version|release))? +(.*)$" _dummy "${file_line}") # 1 2 3 4 set(_var_id "${CMAKE_MATCH_1}") set(_var_version_id "${CMAKE_MATCH_4}") endif() endif() if("${_var_id}" STREQUAL "") if(EXISTS "/usr/lib/os-release") set(file_path "/usr/lib/os-release") elseif(EXISTS "/etc/os-release") set(file_path "/etc/os-release") else() set(file_path "") endif() if(NOT "${file_path}" STREQUAL "") file(STRINGS "${file_path}" data_list REGEX "^(ID|VERSION_ID|VERSION_CODENAME)=") # Look for lines like "ID="..." and VERSION_ID="..." foreach(_var ${data_list}) if("${_var}" MATCHES "^(ID)=(.*)$") set(_var_id "${CMAKE_MATCH_2}") elseif("${_var}" MATCHES "^(VERSION_ID)=(.*)$") set(_var_version_id "${CMAKE_MATCH_2}") elseif("${_var}" MATCHES "^(VERSION_CODENAME)=(.*)$") set(_var_codename "${CMAKE_MATCH_2}") endif() endforeach() endif() endif() if("${_var_id}" STREQUAL "") set(file_path "/etc/lsb-release") if(EXISTS "${file_path}") file(STRINGS "${file_path}" data_list REGEX "^(DISTRIB_ID|DISTRIB_RELEASE|DISTRIB_CODENAME)=") # Look for lines like "DISTRIB_ID="..." and DISTRIB_RELEASE="..." foreach(_var ${data_list}) if("${_var}" MATCHES "^(DISTRIB_ID)=(.*)$") set(_var_id "${CMAKE_MATCH_2}") elseif("${_var}" MATCHES "^(DISTRIB_RELEASE)=(.*)$") set(_var_version_id "${CMAKE_MATCH_2}") elseif("${_var}" MATCHES "^(DISTRIB_CODENAME)=(.*)$") set(_var_codename "${CMAKE_MATCH_2}") endif() endforeach() endif() endif() string(TOLOWER "${_var_id}" "_var_id") string(STRIP "${_var_id}" _var_id) string(STRIP "${_var_version_id}" _var_version_id) string(STRIP "${_var_codename}" _var_codename) # Remove any enclosing quotation marks string(REGEX REPLACE "^\"(.*)\"$" "\\1" _var_id "${_var_id}") string(REGEX REPLACE "^\"(.*)\"$" "\\1" _var_version_id "${_var_version_id}") string(REGEX REPLACE "^\"(.*)\"$" "\\1" _var_codename "${_var_codename}") if(NOT "${_vn_id}" STREQUAL "") set(${_vn_id} "${_var_id}" PARENT_SCOPE) endif() if(NOT "${_vn_version_id}" STREQUAL "") set(${_vn_version_id} "${_var_version_id}" PARENT_SCOPE) endif() if(NOT "${_vn_codename}" STREQUAL "") set(${_vn_codename} "${_var_codename}" PARENT_SCOPE) endif() endfunction() endif() compute-runtime-22.14.22890/level_zero/source/000077500000000000000000000000001422164147700210205ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/source/CMakeLists.txt000066400000000000000000000001501422164147700235540ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # add_subdirectories() compute-runtime-22.14.22890/level_zero/source/inc/000077500000000000000000000000001422164147700215715ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/source/inc/ze_intel_gpu.h000066400000000000000000000005441422164147700244310ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include #include #include inline bool getenv_tobool(const char *name) { const char *env = getenv(name); if ((nullptr == env) || (0 == strcmp("0", env))) return false; return (0 == strcmp("1", env)); } compute-runtime-22.14.22890/level_zero/tools/000077500000000000000000000000001422164147700206605ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/000077500000000000000000000000001422164147700221605ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/CMakeLists.txt000066400000000000000000000012071422164147700247200ustar00rootroot00000000000000# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # add_subdirectory(debug) add_subdirectory(metrics) add_subdirectory(sysman) add_subdirectory(pin) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_TOOLS_SOURCES} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) if((NEO_ENABLE_i915_PRELIM_DETECTION) AND ("${BRANCH_TYPE}" STREQUAL "")) target_include_directories(${L0_STATIC_LIB_NAME} PUBLIC ${I915_INCLUDES_DIR}/prelim ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_TOOLS_SOURCES ${L0_TOOLS_SOURCES}) compute-runtime-22.14.22890/level_zero/tools/source/debug/000077500000000000000000000000001422164147700232465ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/debug/CMakeLists.txt000066400000000000000000000010411422164147700260020ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_DEBUG ${CMAKE_CURRENT_SOURCE_DIR}/debug_session.cpp ${CMAKE_CURRENT_SOURCE_DIR}/debug_session.h ${CMAKE_CURRENT_SOURCE_DIR}/debug_handlers.h ${CMAKE_CURRENT_SOURCE_DIR}/eu_thread.h ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}debug_handlers.cpp ) add_subdirectories() target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_DEBUG} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) compute-runtime-22.14.22890/level_zero/tools/source/debug/debug_handlers.cpp000066400000000000000000000045431422164147700267260ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/debug/debug_handlers.h" #include "level_zero/tools/source/debug/debug_session.h" namespace L0 { DebugSession *DebugSession::create(const zet_debug_config_t &config, Device *device, ze_result_t &result) { result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; return nullptr; } namespace DebugApiHandlers { ze_result_t debugAttach(zet_device_handle_t hDevice, const zet_debug_config_t *config, zet_debug_session_handle_t *phDebug) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t debugDetach(zet_debug_session_handle_t hDebug) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t debugReadEvent(zet_debug_session_handle_t hDebug, uint64_t timeout, zet_debug_event_t *event) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t debugInterrupt(zet_debug_session_handle_t hDebug, ze_device_thread_t thread) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t debugResume(zet_debug_session_handle_t hDebug, ze_device_thread_t thread) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t debugReadMemory(zet_debug_session_handle_t hDebug, ze_device_thread_t thread, const zet_debug_memory_space_desc_t *desc, size_t size, void *buffer) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t debugWriteMemory(zet_debug_session_handle_t hDebug, ze_device_thread_t thread, const zet_debug_memory_space_desc_t *desc, size_t size, const void *buffer) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t debugAcknowledgeEvent(zet_debug_session_handle_t hDebug, const zet_debug_event_t *event) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t debugGetRegisterSetProperties(zet_device_handle_t hDevice, uint32_t *pCount, zet_debug_regset_properties_t *pRegisterSetProperties) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t debugReadRegisters(zet_debug_session_handle_t hDebug, ze_device_thread_t thread, uint32_t type, uint32_t start, uint32_t count, void *pRegisterValues) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t debugWriteRegisters(zet_debug_session_handle_t hDebug, ze_device_thread_t thread, uint32_t type, uint32_t start, uint32_t count, void *pRegisterValues) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } } // namespace DebugApiHandlers } // namespace L0compute-runtime-22.14.22890/level_zero/tools/source/debug/debug_handlers.h000066400000000000000000000030771422164147700263740ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include namespace L0 { namespace DebugApiHandlers { ze_result_t debugAttach(zet_device_handle_t hDevice, const zet_debug_config_t *config, zet_debug_session_handle_t *phDebug); ze_result_t debugDetach(zet_debug_session_handle_t hDebug); ze_result_t debugReadEvent(zet_debug_session_handle_t hDebug, uint64_t timeout, zet_debug_event_t *event); ze_result_t debugInterrupt(zet_debug_session_handle_t hDebug, ze_device_thread_t thread); ze_result_t debugResume(zet_debug_session_handle_t hDebug, ze_device_thread_t thread); ze_result_t debugReadMemory(zet_debug_session_handle_t hDebug, ze_device_thread_t thread, const zet_debug_memory_space_desc_t *desc, size_t size, void *buffer); ze_result_t debugWriteMemory(zet_debug_session_handle_t hDebug, ze_device_thread_t thread, const zet_debug_memory_space_desc_t *desc, size_t size, const void *buffer); ze_result_t debugAcknowledgeEvent(zet_debug_session_handle_t hDebug, const zet_debug_event_t *event); ze_result_t debugGetRegisterSetProperties(zet_device_handle_t hDevice, uint32_t *pCount, zet_debug_regset_properties_t *pRegisterSetProperties); ze_result_t debugReadRegisters(zet_debug_session_handle_t hDebug, ze_device_thread_t thread, uint32_t type, uint32_t start, uint32_t count, void *pRegisterValues); ze_result_t debugWriteRegisters(zet_debug_session_handle_t hDebug, ze_device_thread_t thread, uint32_t type, uint32_t start, uint32_t count, void *pRegisterValues); } // namespace DebugApiHandlers } // namespace L0compute-runtime-22.14.22890/level_zero/tools/source/debug/debug_session.cpp000066400000000000000000000210661422164147700266100ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/debug/debug_session.h" #include "shared/source/helpers/hw_info.h" #include "level_zero/core/source/device/device_imp.h" namespace L0 { ze_device_thread_t DebugSession::convertToPhysical(ze_device_thread_t thread, uint32_t &deviceIndex) { auto &hwInfo = connectedDevice->getHwInfo(); auto deviceBitfield = connectedDevice->getNEODevice()->getDeviceBitfield(); if (connectedDevice->getNEODevice()->isSubDevice()) { deviceIndex = Math::log2(static_cast(deviceBitfield.to_ulong())); } else if (thread.slice != UINT32_MAX) { deviceIndex = thread.slice / hwInfo.gtSystemInfo.SliceCount; thread.slice = thread.slice % hwInfo.gtSystemInfo.SliceCount; } return thread; } EuThread::ThreadId DebugSession::convertToThreadId(ze_device_thread_t thread) { auto &hwInfo = connectedDevice->getHwInfo(); auto deviceBitfield = connectedDevice->getNEODevice()->getDeviceBitfield(); UNRECOVERABLE_IF(!DebugSession::isSingleThread(thread)); uint32_t deviceIndex = 0; if (connectedDevice->getNEODevice()->isSubDevice()) { deviceIndex = Math::log2(static_cast(deviceBitfield.to_ulong())); } else { deviceIndex = thread.slice / hwInfo.gtSystemInfo.SliceCount; thread.slice = thread.slice % hwInfo.gtSystemInfo.SliceCount; } EuThread::ThreadId threadId(deviceIndex, thread.slice, thread.subslice, thread.eu, thread.thread); return threadId; } ze_device_thread_t DebugSession::convertToApi(EuThread::ThreadId threadId) { auto &hwInfo = connectedDevice->getHwInfo(); ze_device_thread_t thread = {static_cast(threadId.slice), static_cast(threadId.subslice), static_cast(threadId.eu), static_cast(threadId.thread)}; if (!connectedDevice->getNEODevice()->isSubDevice()) { thread.slice = thread.slice + static_cast(threadId.tileIndex * hwInfo.gtSystemInfo.SliceCount); } return thread; } DebugSession::DebugSession(const zet_debug_config_t &config, Device *device) : connectedDevice(device) { if (connectedDevice) { auto &hwInfo = connectedDevice->getHwInfo(); const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported; const uint32_t numEuPerSubslice = hwInfo.gtSystemInfo.MaxEuPerSubSlice; const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount); uint32_t subDeviceCount = std::max(1u, connectedDevice->getNEODevice()->getNumSubDevices()); for (uint32_t tileIndex = 0; tileIndex < subDeviceCount; tileIndex++) { for (uint32_t sliceID = 0; sliceID < hwInfo.gtSystemInfo.MaxSlicesSupported; sliceID++) { for (uint32_t subsliceID = 0; subsliceID < numSubslicesPerSlice; subsliceID++) { for (uint32_t euID = 0; euID < numEuPerSubslice; euID++) { for (uint32_t threadID = 0; threadID < numThreadsPerEu; threadID++) { EuThread::ThreadId thread = {tileIndex, sliceID, subsliceID, euID, threadID}; allThreads[uint64_t(thread)] = std::make_unique(thread); } } } } } } } std::vector DebugSession::getSingleThreadsForDevice(uint32_t deviceIndex, ze_device_thread_t physicalThread, const NEO::HardwareInfo &hwInfo) { const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported; const uint32_t numEuPerSubslice = hwInfo.gtSystemInfo.MaxEuPerSubSlice; const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount); UNRECOVERABLE_IF(numThreadsPerEu > 8); std::vector threads; const uint32_t slice = physicalThread.slice; const uint32_t subslice = physicalThread.subslice; const uint32_t eu = physicalThread.eu; const uint32_t thread = physicalThread.thread; for (uint32_t sliceID = 0; sliceID < hwInfo.gtSystemInfo.MaxSlicesSupported; sliceID++) { if (slice != UINT32_MAX) { sliceID = slice; } for (uint32_t subsliceID = 0; subsliceID < numSubslicesPerSlice; subsliceID++) { if (subslice != UINT32_MAX) { subsliceID = subslice; } for (uint32_t euID = 0; euID < numEuPerSubslice; euID++) { if (eu != UINT32_MAX) { euID = eu; } for (uint32_t threadID = 0; threadID < numThreadsPerEu; threadID++) { if (thread != UINT32_MAX) { threadID = thread; } threads.push_back({deviceIndex, sliceID, subsliceID, euID, threadID}); if (thread != UINT32_MAX) { break; } } if (eu != UINT32_MAX) { break; } } if (subslice != UINT32_MAX) { break; } } if (slice != UINT32_MAX) { break; } } return threads; } bool DebugSession::areRequestedThreadsStopped(ze_device_thread_t thread) { auto &hwInfo = connectedDevice->getHwInfo(); uint32_t deviceIndex = 0; auto physicalThread = convertToPhysical(thread, deviceIndex); auto singleThreads = getSingleThreadsForDevice(deviceIndex, physicalThread, hwInfo); bool requestedThreadsStopped = true; for (auto &threadId : singleThreads) { if (allThreads[threadId]->isStopped()) { continue; } requestedThreadsStopped = false; } return requestedThreadsStopped; } ze_result_t DebugSession::sanityMemAccessThreadCheck(ze_device_thread_t thread, const zet_debug_memory_space_desc_t *desc) { if (DebugSession::isThreadAll(thread)) { if (desc->type != ZET_DEBUG_MEMORY_SPACE_TYPE_DEFAULT) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } else { return ZE_RESULT_SUCCESS; } } else if (DebugSession::isSingleThread(thread)) { if (desc->type != ZET_DEBUG_MEMORY_SPACE_TYPE_DEFAULT) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } if (!areRequestedThreadsStopped(thread)) { return ZE_RESULT_ERROR_NOT_AVAILABLE; } else { return ZE_RESULT_SUCCESS; } } return ZE_RESULT_ERROR_INVALID_ARGUMENT; } void DebugSession::fillDevicesFromThread(ze_device_thread_t thread, std::vector &devices) { auto deviceCount = std::max(1u, connectedDevice->getNEODevice()->getNumSubDevices()); UNRECOVERABLE_IF(devices.size() < deviceCount); uint32_t deviceIndex = 0; convertToPhysical(thread, deviceIndex); bool singleDevice = (thread.slice != UINT32_MAX && deviceCount > 1) || deviceCount == 1; if (singleDevice) { devices[deviceIndex] = 1; } else { for (uint32_t i = 0; i < deviceCount; i++) { devices[i] = 1; } } } bool DebugSession::isBindlessSystemRoutine() { if (debugArea.reserved1 &= 1) { return true; } return false; } size_t DebugSession::getPerThreadScratchOffset(size_t ptss, EuThread::ThreadId threadId) { auto &hwInfo = connectedDevice->getHwInfo(); const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported; const uint32_t numEuPerSubslice = hwInfo.gtSystemInfo.MaxEuPerSubSlice; const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount); auto threadOffset = (((threadId.slice * numSubslicesPerSlice + threadId.subslice) * numEuPerSubslice + threadId.eu) * numThreadsPerEu + threadId.thread) * ptss; return threadOffset; } void DebugSession::printBitmask(uint8_t *bitmask, size_t bitmaskSize) { if (NEO::DebugManager.flags.DebuggerLogBitmask.get() & NEO::DebugVariables::DEBUGGER_LOG_BITMASK::LOG_INFO) { DEBUG_BREAK_IF(bitmaskSize % sizeof(uint64_t) != 0); PRINT_DEBUGGER_LOG(stdout, "\nINFO: Bitmask: ", ""); for (size_t i = 0; i < bitmaskSize / sizeof(uint64_t); i++) { uint64_t bitmask64 = 0; memcpy_s(&bitmask64, sizeof(uint64_t), &bitmask[i * sizeof(uint64_t)], sizeof(uint64_t)); PRINT_DEBUGGER_LOG(stdout, "\n [%lu] = %#018" PRIx64, static_cast(i), bitmask64); } } } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/debug/debug_session.h000066400000000000000000000106621422164147700262550ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/debugger/debugger_l0.h" #include "level_zero/tools/source/debug/eu_thread.h" #include #include #include struct _zet_debug_session_handle_t {}; namespace L0 { struct Device; struct DebugSession : _zet_debug_session_handle_t { virtual ~DebugSession() = default; DebugSession() = delete; static DebugSession *create(const zet_debug_config_t &config, Device *device, ze_result_t &result); static DebugSession *fromHandle(zet_debug_session_handle_t handle) { return static_cast(handle); } inline zet_debug_session_handle_t toHandle() { return this; } virtual bool closeConnection() = 0; virtual ze_result_t initialize() = 0; virtual ze_result_t readEvent(uint64_t timeout, zet_debug_event_t *event) = 0; virtual ze_result_t interrupt(ze_device_thread_t thread) = 0; virtual ze_result_t resume(ze_device_thread_t thread) = 0; virtual ze_result_t readMemory(ze_device_thread_t thread, const zet_debug_memory_space_desc_t *desc, size_t size, void *buffer) = 0; virtual ze_result_t writeMemory(ze_device_thread_t thread, const zet_debug_memory_space_desc_t *desc, size_t size, const void *buffer) = 0; virtual ze_result_t acknowledgeEvent(const zet_debug_event_t *event) = 0; virtual ze_result_t readRegisters(ze_device_thread_t thread, uint32_t type, uint32_t start, uint32_t count, void *pRegisterValues) = 0; virtual ze_result_t writeRegisters(ze_device_thread_t thread, uint32_t type, uint32_t start, uint32_t count, void *pRegisterValues) = 0; static ze_result_t getRegisterSetProperties(Device *device, uint32_t *pCount, zet_debug_regset_properties_t *pRegisterSetProperties); MOCKABLE_VIRTUAL bool areRequestedThreadsStopped(ze_device_thread_t thread); Device *getConnectedDevice() { return connectedDevice; } static bool isThreadAll(ze_device_thread_t thread) { return thread.slice == UINT32_MAX && thread.subslice == UINT32_MAX && thread.eu == UINT32_MAX && thread.thread == UINT32_MAX; } static bool isSingleThread(ze_device_thread_t thread) { return thread.slice != UINT32_MAX && thread.subslice != UINT32_MAX && thread.eu != UINT32_MAX && thread.thread != UINT32_MAX; } static bool areThreadsEqual(ze_device_thread_t thread, ze_device_thread_t thread2) { return thread.slice == thread2.slice && thread.subslice == thread2.subslice && thread.eu == thread2.eu && thread.thread == thread2.thread; } static bool checkSingleThreadWithinDeviceThread(ze_device_thread_t checkedThread, ze_device_thread_t thread) { if (DebugSession::isThreadAll(thread)) { return true; } bool threadMatch = (thread.thread == checkedThread.thread) || thread.thread == UINT32_MAX; bool euMatch = (thread.eu == checkedThread.eu) || thread.eu == UINT32_MAX; bool subsliceMatch = (thread.subslice == checkedThread.subslice) || thread.subslice == UINT32_MAX; bool sliceMatch = (thread.slice == checkedThread.slice) || thread.slice == UINT32_MAX; return threadMatch && euMatch && subsliceMatch && sliceMatch; } static void printBitmask(uint8_t *bitmask, size_t bitmaskSize); virtual ze_device_thread_t convertToPhysical(ze_device_thread_t thread, uint32_t &deviceIndex); virtual EuThread::ThreadId convertToThreadId(ze_device_thread_t thread); virtual ze_device_thread_t convertToApi(EuThread::ThreadId threadId); ze_result_t sanityMemAccessThreadCheck(ze_device_thread_t thread, const zet_debug_memory_space_desc_t *desc); protected: DebugSession(const zet_debug_config_t &config, Device *device); virtual void startAsyncThread() = 0; virtual bool isBindlessSystemRoutine(); virtual bool readModuleDebugArea() = 0; virtual ze_result_t readSbaBuffer(EuThread::ThreadId threadId, SbaTrackedAddresses &sbaBuffer) = 0; void fillDevicesFromThread(ze_device_thread_t thread, std::vector &devices); std::vector getSingleThreadsForDevice(uint32_t deviceIndex, ze_device_thread_t physicalThread, const NEO::HardwareInfo &hwInfo); size_t getPerThreadScratchOffset(size_t ptss, EuThread::ThreadId threadId); DebugAreaHeader debugArea; Device *connectedDevice = nullptr; std::map> allThreads; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/debug/eu_thread.h000066400000000000000000000113661422164147700253660ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/debug_settings/debug_settings_manager.h" #include #include #include #include #include namespace L0 { class EuThread { public: enum class State { Running, Stopped, Unavailable }; struct ThreadId { union { struct { uint64_t thread : 4; uint64_t eu : 5; uint64_t subslice : 10; uint64_t slice : 10; uint64_t tileIndex : 2; uint64_t reserved : 33; }; uint64_t packed; }; ThreadId(uint32_t tile, uint32_t slice, uint32_t subslice, uint32_t eu, uint32_t thread) { this->packed = 0; this->tileIndex = tile; this->slice = slice; this->subslice = subslice; this->eu = eu; this->thread = thread; } ThreadId(uint32_t tile, ze_device_thread_t thread) { this->packed = 0; this->tileIndex = tile; this->slice = thread.slice; this->subslice = thread.subslice; this->eu = thread.eu; this->thread = thread.thread; } operator uint64_t() const { return packed; } }; virtual ~EuThread() = default; EuThread(ThreadId threadId) : threadId(threadId) {} bool stopThread(uint64_t memHandle) { memoryHandle = memHandle; if (state == State::Stopped) { return false; } state = State::Stopped; PRINT_DEBUGGER_THREAD_LOG("Stopped thread: %s", toString().c_str()); return true; } bool verifyStopped(uint8_t newCounter) { PRINT_DEBUGGER_THREAD_LOG("EuThread::verifyStopped() Thread: %s newCounter == %d oldCounter == %d", toString().c_str(), (int32_t)newCounter, (int32_t)systemRoutineCounter); if (newCounter == systemRoutineCounter) { if (newCounter % 2 != 0) { if (state == State::Running) { PRINT_DEBUGGER_ERROR_LOG("Thread: %s state RUNNING when thread is stopped. Switching to STOPPED", toString().c_str()); DEBUG_BREAK_IF(true); } state = State::Stopped; return true; } } if (newCounter == (systemRoutineCounter + 2)) { state = State::Stopped; systemRoutineCounter = newCounter; return true; } else if (newCounter > systemRoutineCounter + 2) { PRINT_DEBUGGER_ERROR_LOG("Thread: %s state out of sync.", toString().c_str()); DEBUG_BREAK_IF(true); } if (newCounter % 2 == 0) { if (state == State::Stopped) { PRINT_DEBUGGER_ERROR_LOG("Thread: %s state STOPPED when thread is running. Switching to RUNNING", toString().c_str()); DEBUG_BREAK_IF(true); } state = State::Running; systemRoutineCounter = newCounter; return false; } state = State::Stopped; systemRoutineCounter = newCounter; return true; } bool resumeThread() { if (state != State::Stopped) { PRINT_DEBUGGER_THREAD_LOG("Resuming already RUNNING thread: %s", toString().c_str()); return false; } PRINT_DEBUGGER_THREAD_LOG("Resumed thread: %s", toString().c_str()); state = State::Running; memoryHandle = invalidHandle; return true; } bool isStopped() const { return state == State::Stopped; } bool isRunning() const { return state != State::Stopped; } static std::string toString(ThreadId threadId) { std::stringstream threadString; threadString << "device index = " << threadId.tileIndex << " slice = " << threadId.slice << " subslice = " << threadId.subslice << " eu = " << threadId.eu << " thread = " << threadId.thread; return threadString.str(); } std::string toString() const { return toString(threadId); } ThreadId getThreadId() const { return threadId; } uint64_t getMemoryHandle() const { return memoryHandle; } uint8_t getLastCounter() const { return systemRoutineCounter; } public: static constexpr uint64_t invalidHandle = std::numeric_limits::max(); protected: ThreadId threadId; std::atomic state = State::Unavailable; uint8_t systemRoutineCounter = 0; std::atomic memoryHandle = invalidHandle; }; static_assert(sizeof(EuThread::ThreadId) == sizeof(uint64_t)); } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/debug/linux/000077500000000000000000000000001422164147700244055ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/debug/linux/debug_session_linux_helper.cpp000066400000000000000000000005161422164147700325220ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/debug/debug_session.h" #include namespace L0 { DebugSession *createDebugSessionHelper(const zet_debug_config_t &config, Device *device, int debugFd) { return nullptr; } } // namespace L0compute-runtime-22.14.22890/level_zero/tools/source/metrics/000077500000000000000000000000001422164147700236265ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/metrics/CMakeLists.txt000066400000000000000000000024501422164147700263670ustar00rootroot00000000000000# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_METRICS ) list(APPEND L0_SRCS_TOOLS_METRICS ${CMAKE_CURRENT_SOURCE_DIR}/metric.cpp ${CMAKE_CURRENT_SOURCE_DIR}/metric_oa_enumeration_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/metric_oa_enumeration_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/metric_oa_streamer_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/metric_oa_streamer_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/metric_oa_query_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/metric_oa_query_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/metric_oa_source.cpp ${CMAKE_CURRENT_SOURCE_DIR}/metric_oa_source.h ${CMAKE_CURRENT_SOURCE_DIR}/os_metric_ip_sampling.h ${CMAKE_CURRENT_SOURCE_DIR}/metric_ip_sampling_source.h ${CMAKE_CURRENT_SOURCE_DIR}/metric_ip_sampling_source.cpp ${CMAKE_CURRENT_SOURCE_DIR}/metric_ip_sampling_streamer.h ${CMAKE_CURRENT_SOURCE_DIR}/metric_ip_sampling_streamer.cpp ) if(UNIX) add_subdirectory(linux) else() add_subdirectory(windows) endif() target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_METRICS} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_METRICS ${L0_SRCS_TOOLS_METRICS}) compute-runtime-22.14.22890/level_zero/tools/source/metrics/linux/000077500000000000000000000000001422164147700247655ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/metrics/linux/CMakeLists.txt000066400000000000000000000011351422164147700275250ustar00rootroot00000000000000# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/os_metric_oa_query_imp_linux.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_metric_oa_enumeration_imp_linux.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_metric_ip_sampling_imp_linux.cpp ) if(UNIX) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_LINUX} ) # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_LINUX ${L0_SRCS_TOOLS_LINUX}) endif() os_metric_ip_sampling_imp_linux.cpp000066400000000000000000000172161422164147700340530ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/metrics/linux/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/constants.h" #include "shared/source/os_interface/linux/drm_neo.h" #include "shared/source/os_interface/linux/ioctl_helper.h" #include "shared/source/os_interface/linux/sys_calls.h" #include "shared/source/os_interface/os_interface.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/device/device_imp.h" #include "level_zero/core/source/hw_helpers/l0_hw_helper.h" #include "level_zero/tools/source/metrics/os_metric_ip_sampling.h" #include namespace L0 { constexpr uint32_t maxDssBufferSize = 512 * KB; constexpr uint32_t defaultPollPeriodNs = 10000000u; constexpr uint32_t unitReportSize = 64u; class MetricIpSamplingLinuxImp : public MetricIpSamplingOsInterface { public: MetricIpSamplingLinuxImp(Device &device); ~MetricIpSamplingLinuxImp() override = default; ze_result_t startMeasurement(uint32_t ¬ifyEveryNReports, uint32_t &samplingPeriodNs) override; ze_result_t stopMeasurement() override; ze_result_t readData(uint8_t *pRawData, size_t *pRawDataSize) override; uint32_t getRequiredBufferSize(const uint32_t maxReportCount) override; uint32_t getUnitReportSize() override; bool isNReportsAvailable() override; bool isDependencyAvailable() override; private: int32_t stream = -1; Device &device; ze_result_t getNearestSupportedSamplingUnit(uint32_t &samplingPeriodNs, uint32_t &samplingRate); }; MetricIpSamplingLinuxImp::MetricIpSamplingLinuxImp(Device &device) : device(device) {} ze_result_t MetricIpSamplingLinuxImp::getNearestSupportedSamplingUnit(uint32_t &samplingPeriodNs, uint32_t &samplingUnit) { static constexpr uint64_t nsecPerSec = 1000000000ull; static constexpr uint32_t samplingClockGranularity = 251u; static constexpr uint32_t minSamplingUnit = 1u; static constexpr uint32_t maxSamplingUnit = 7u; const auto drm = device.getOsInterface().getDriverModel()->as(); int32_t gpuTimeStampfrequency = 0; int32_t ret = drm->getTimestampFrequency(gpuTimeStampfrequency); if (ret < 0) { return ZE_RESULT_ERROR_UNKNOWN; } uint64_t gpuClockPeriodNs = nsecPerSec / static_cast(gpuTimeStampfrequency); uint64_t numberOfClocks = samplingPeriodNs / gpuClockPeriodNs; samplingUnit = std::clamp(static_cast(numberOfClocks / samplingClockGranularity), minSamplingUnit, maxSamplingUnit); samplingPeriodNs = samplingUnit * samplingClockGranularity * static_cast(gpuClockPeriodNs); return ZE_RESULT_SUCCESS; } ze_result_t MetricIpSamplingLinuxImp::startMeasurement(uint32_t ¬ifyEveryNReports, uint32_t &samplingPeriodNs) { const auto drm = device.getOsInterface().getDriverModel()->as(); uint32_t samplingUnit = 0; if (getNearestSupportedSamplingUnit(samplingPeriodNs, samplingUnit) != ZE_RESULT_SUCCESS) { return ZE_RESULT_ERROR_UNKNOWN; } DeviceImp &deviceImp = static_cast(device); auto ioctlHelper = drm->getIoctlHelper(); uint32_t euStallFdParameter = ioctlHelper->getEuStallFdParameter(); std::array properties; auto engineInfo = drm->getEngineInfo(); if (engineInfo == nullptr) { return ZE_RESULT_ERROR_UNKNOWN; } auto classInstance = engineInfo->getEngineInstance(deviceImp.getPhysicalSubDeviceId(), aub_stream::ENGINE_CCS); if (classInstance == nullptr) { return ZE_RESULT_ERROR_UNKNOWN; } if (!ioctlHelper->getEuStallProperties(properties, maxDssBufferSize, samplingUnit, defaultPollPeriodNs, classInstance->engineInstance)) { return ZE_RESULT_ERROR_UNKNOWN; } struct drm_i915_perf_open_param param = { .flags = I915_PERF_FLAG_FD_CLOEXEC | euStallFdParameter | I915_PERF_FLAG_FD_NONBLOCK, .num_properties = sizeof(properties) / 16, .properties_ptr = reinterpret_cast(properties.data()), }; stream = NEO::SysCalls::ioctl(drm->getFileDescriptor(), DRM_IOCTL_I915_PERF_OPEN, ¶m); if (stream < 0) { return ZE_RESULT_ERROR_UNKNOWN; } int32_t ret = NEO::SysCalls::ioctl(stream, I915_PERF_IOCTL_ENABLE, 0); PRINT_DEBUG_STRING(NEO::DebugManager.flags.PrintDebugMessages.get() && (ret < 0), stderr, "PRELIM_I915_PERF_IOCTL_ENABLE failed errno = %d | ret = %d \n", errno, ret); return (ret == 0) ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_UNKNOWN; } ze_result_t MetricIpSamplingLinuxImp::stopMeasurement() { int32_t disableStatus = NEO::SysCalls::ioctl(stream, I915_PERF_IOCTL_DISABLE, 0); PRINT_DEBUG_STRING(NEO::DebugManager.flags.PrintDebugMessages.get() && (disableStatus < 0), stderr, "I915_PERF_IOCTL_DISABLE failed errno = %d | ret = %d \n", errno, disableStatus); int32_t closeStatus = NEO::SysCalls::close(stream); PRINT_DEBUG_STRING(NEO::DebugManager.flags.PrintDebugMessages.get() && (closeStatus < 0), stderr, "close() failed errno = %d | ret = %d \n", errno, closeStatus); stream = -1; return ((closeStatus == 0) && (disableStatus == 0)) ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_UNKNOWN; } ze_result_t MetricIpSamplingLinuxImp::readData(uint8_t *pRawData, size_t *pRawDataSize) { ssize_t ret = NEO::SysCalls::read(stream, pRawData, *pRawDataSize); PRINT_DEBUG_STRING(NEO::DebugManager.flags.PrintDebugMessages.get() && (ret < 0), stderr, "read() failed errno = %d | ret = %d \n", errno, ret); if (ret >= 0) { *pRawDataSize = ret; return ZE_RESULT_SUCCESS; } *pRawDataSize = 0; // If read needs to try again, do not return error if (errno == EINTR || errno == EAGAIN || errno == EBUSY) { return ZE_RESULT_SUCCESS; } return ZE_RESULT_ERROR_UNKNOWN; } uint32_t MetricIpSamplingLinuxImp::getRequiredBufferSize(const uint32_t maxReportCount) { uint32_t requiredBufferSize = getUnitReportSize() * maxReportCount; const auto hwInfo = device.getNEODevice()->getHardwareInfo(); return std::min(requiredBufferSize, maxDssBufferSize * hwInfo.gtSystemInfo.MaxDualSubSlicesSupported); } uint32_t MetricIpSamplingLinuxImp::getUnitReportSize() { return unitReportSize; } bool MetricIpSamplingLinuxImp::isNReportsAvailable() { struct pollfd pollParams; memset(&pollParams, 0, sizeof(pollParams)); DEBUG_BREAK_IF(stream == -1); pollParams.fd = stream; pollParams.revents = 0; pollParams.events = POLLIN; int32_t pollResult = NEO::SysCalls::poll(&pollParams, 1, 0u); PRINT_DEBUG_STRING(NEO::DebugManager.flags.PrintDebugMessages.get() && (pollResult < 0), stderr, "poll() failed errno = %d | pollResult = %d \n", errno, pollResult); if (pollResult > 0) { return true; } return false; } bool MetricIpSamplingLinuxImp::isDependencyAvailable() { const auto &hardwareInfo = device.getNEODevice()->getHardwareInfo(); auto &l0HwHelper = L0HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); if (!l0HwHelper.isIpSamplingSupported(hardwareInfo)) { return false; } uint32_t notifyEveryNReports = 1u; uint32_t samplingPeriod = 100; ze_result_t status = startMeasurement(notifyEveryNReports, samplingPeriod); if (stream != -1) { stopMeasurement(); } return status == ZE_RESULT_SUCCESS ? true : false; } std::unique_ptr MetricIpSamplingOsInterface::create(Device &device) { return std::make_unique(device); } } // namespace L0 os_metric_oa_enumeration_imp_linux.cpp000066400000000000000000000053011422164147700345460ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/metrics/linux/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/linux/drm_neo.h" #include "shared/source/os_interface/linux/sys_calls.h" #include "shared/source/os_interface/os_interface.h" #include "level_zero/tools/source/metrics/metric_oa_enumeration_imp.h" #include "level_zero/tools/source/metrics/metric_oa_source.h" #include #include namespace L0 { const char *MetricEnumeration::getMetricsDiscoveryFilename() { return "libmd.so.1"; } bool MetricEnumeration::getAdapterId(uint32_t &adapterMajor, uint32_t &adapterMinor) { auto &device = metricSource.getMetricDeviceContext().getDevice(); auto &osInterface = device.getOsInterface(); auto drm = osInterface.getDriverModel()->as(); auto drmFile = drm->getFileDescriptor(); struct stat drmStat = {}; int32_t result = NEO::SysCalls::fstat(drmFile, &drmStat); adapterMajor = major(drmStat.st_rdev); adapterMinor = minor(drmStat.st_rdev); return result == 0; } MetricsDiscovery::IAdapter_1_9 *MetricEnumeration::getMetricsAdapter() { UNRECOVERABLE_IF(pAdapterGroup == nullptr); // Obtain drm minor / major version. uint32_t drmMajor = 0; uint32_t drmMinor = 0; UNRECOVERABLE_IF(getAdapterId(drmMajor, drmMinor) == false); // Driver drm major/minor version. const int32_t drmNodePrimary = 0; // From xf86drm.h const int32_t drmNodeRender = 2; // From xf86drm.h const int32_t drmMaxDevices = 64; // From drm_drv.c#110 const int32_t drmMinorRender = drmMinor - (drmNodeRender * drmMaxDevices); const int32_t drmMinorPrimary = drmMinor - (drmNodePrimary * drmMaxDevices); // Enumerate metrics discovery adapters. for (uint32_t index = 0, count = pAdapterGroup->GetParams()->AdapterCount; index < count; ++index) { UNRECOVERABLE_IF(pAdapterGroup->GetAdapter(index) == nullptr); UNRECOVERABLE_IF(pAdapterGroup->GetAdapter(index)->GetParams() == nullptr); auto adapter = pAdapterGroup->GetAdapter(index); auto adapterParams = adapter->GetParams(); const bool validAdapterType = adapterParams->SystemId.Type == MetricsDiscovery::ADAPTER_ID_TYPE_MAJOR_MINOR; const bool validAdapterMajor = adapterParams->SystemId.MajorMinor.Major == static_cast(drmMajor); const bool validAdapterMinor = (adapterParams->SystemId.MajorMinor.Minor == drmMinorRender) || (adapterParams->SystemId.MajorMinor.Minor == drmMinorPrimary); if (validAdapterType && validAdapterMajor && validAdapterMinor) { return adapter; } } return nullptr; } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/metrics/linux/os_metric_oa_query_imp_linux.cpp000066400000000000000000000044531422164147700334530ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/linux/drm_neo.h" #include "shared/source/os_interface/os_interface.h" #include "level_zero/core/source/device/device.h" #include "level_zero/tools/source/metrics/metric_oa_query_imp.h" #include "level_zero/tools/source/metrics/metric_oa_source.h" using namespace MetricsLibraryApi; namespace L0 { const char *MetricsLibrary::getFilename() { return "libigdml.so.1"; } bool MetricsLibrary::getContextData(Device &device, ContextCreateData_1_0 &contextData) { auto &osInterface = device.getOsInterface(); auto drm = osInterface.getDriverModel()->as(); auto drmFileDescriptor = drm->getFileDescriptor(); auto &osData = contextData.ClientData->Linux; osData.Adapter->Type = LinuxAdapterType::DrmFileDescriptor; osData.Adapter->DrmFileDescriptor = drmFileDescriptor; return drmFileDescriptor != -1; } bool MetricsLibrary::activateConfiguration(const ConfigurationHandle_1_0 configurationHandle) { ConfigurationActivateData_1_0 activateData = {}; activateData.Type = GpuConfigurationActivationType::Tbs; const bool validMetricsLibrary = isInitialized(); const bool validConfiguration = configurationHandle.IsValid(); const bool result = validMetricsLibrary && validConfiguration && (api.ConfigurationActivate(configurationHandle, &activateData) == StatusCode::Success); DEBUG_BREAK_IF(!result); return result; } bool MetricsLibrary::deactivateConfiguration(const ConfigurationHandle_1_0 configurationHandle) { const bool validMetricsLibrary = isInitialized(); const bool validConfiguration = configurationHandle.IsValid(); const bool result = validMetricsLibrary && validConfiguration && (api.ConfigurationDeactivate(configurationHandle) == StatusCode::Success); DEBUG_BREAK_IF(!result); return result; } void MetricsLibrary::cacheConfiguration(zet_metric_group_handle_t metricGroup, ConfigurationHandle_1_0 configurationHandle) { // Linux does not support configuration cache. // Any previous configuration should be deleted. deleteAllConfigurations(); // Cache only a single configuration. configurations[metricGroup] = configurationHandle; } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/metrics/metric.cpp000066400000000000000000000173631422164147700256270ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/metrics/metric.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/device/device_imp.h" #include "level_zero/core/source/driver/driver.h" #include "level_zero/core/source/driver/driver_handle_imp.h" #include "level_zero/source/inc/ze_intel_gpu.h" #include "level_zero/tools/source/metrics/metric_ip_sampling_source.h" #include "level_zero/tools/source/metrics/metric_oa_source.h" #include #include namespace L0 { std::unique_ptr MetricDeviceContext::create(Device &device) { return std::make_unique(device); } MetricDeviceContext::MetricDeviceContext(Device &inputDevice) : device(inputDevice) { auto deviceNeo = device.getNEODevice(); bool isSubDevice = deviceNeo->isSubDevice(); subDeviceIndex = isSubDevice ? static_cast(deviceNeo)->getSubDeviceIndex() : 0; multiDeviceCapable = !isSubDevice && device.isImplicitScalingCapable(); metricSources[MetricSource::SourceType::Oa] = OaMetricSourceImp::create(*this); metricSources[MetricSource::SourceType::IpSampling] = IpSamplingMetricSourceImp::create(*this); } bool MetricDeviceContext::enable() { bool status = false; for (auto const &entry : metricSources) { auto const &metricSource = entry.second; metricSource->enable(); status |= metricSource->isAvailable(); } return status; } ze_result_t MetricDeviceContext::metricGroupGet(uint32_t *pCount, zet_metric_group_handle_t *phMetricGroups) { ze_result_t result = ZE_RESULT_SUCCESS; uint32_t availableCount = 0; uint32_t requestCount = *pCount; for (auto const &entry : metricSources) { auto const &metricSource = entry.second; if (!metricSource->isAvailable()) { continue; } result = metricSource->metricGroupGet(&requestCount, phMetricGroups); if (result == ZE_RESULT_ERROR_UNSUPPORTED_FEATURE) { result = ZE_RESULT_SUCCESS; continue; } if (result != ZE_RESULT_SUCCESS) { break; } availableCount += requestCount; if (*pCount == 0) { requestCount = 0; } else { DEBUG_BREAK_IF(availableCount > *pCount); phMetricGroups += requestCount; requestCount = *pCount - availableCount; if (requestCount == 0) { break; } } } *pCount = availableCount; return result; } ze_result_t MetricDeviceContext::activateMetricGroupsDeferred(uint32_t count, zet_metric_group_handle_t *phMetricGroups) { // Activation: postpone until zetMetricStreamerOpen or zeCommandQueueExecuteCommandLists // Deactivation: execute immediately. if (phMetricGroups == nullptr) { return deActivateAllDomains(); } for (auto index = 0u; index < count; index++) { zet_metric_group_handle_t hMetricGroup = MetricGroup::fromHandle(phMetricGroups[index])->getMetricGroupForSubDevice(subDeviceIndex); zet_metric_group_properties_t properties = {ZET_STRUCTURE_TYPE_METRIC_GROUP_PROPERTIES}; MetricGroup::fromHandle(hMetricGroup)->getProperties(&properties); auto domain = properties.domain; // Domain already associated with the same handle. if (domains[domain].first == hMetricGroup) { continue; } // Domain empty; So create new deactiavted association. if (domains[domain].first == nullptr) { domains[domain].first = hMetricGroup; domains[domain].second = false; continue; } // Attempt to overwrite a previous association is an error. return ZE_RESULT_ERROR_UNKNOWN; } return ZE_RESULT_SUCCESS; } ze_result_t MetricDeviceContext::activateAllDomains() { for (auto &entry : domains) { auto &metricGroup = entry.second; MetricGroup::fromHandle(metricGroup.first)->activate(); metricGroup.second = true; } return ZE_RESULT_SUCCESS; } ze_result_t MetricDeviceContext::deActivateAllDomains() { for (auto &entry : domains) { auto &metricGroup = entry.second; if (metricGroup.second == true) { MetricGroup::fromHandle(metricGroup.first)->deactivate(); } metricGroup = {}; } return ZE_RESULT_SUCCESS; } ze_result_t MetricDeviceContext::appendMetricMemoryBarrier(CommandList &commandList) { bool isSuccess = false; for (auto const &entry : metricSources) { auto const &metricSource = entry.second; if (!metricSource->isAvailable()) { continue; } ze_result_t result = metricSource->appendMetricMemoryBarrier(commandList); if (result == ZE_RESULT_SUCCESS) { isSuccess = true; } else if (result != ZE_RESULT_ERROR_UNSUPPORTED_FEATURE) { return result; } } return isSuccess == false ? ZE_RESULT_ERROR_UNSUPPORTED_FEATURE : ZE_RESULT_SUCCESS; } bool MetricDeviceContext::isMetricGroupActivated(const zet_metric_group_handle_t hMetricGroup) const { for (auto const &entry : domains) { auto const &metricGroup = entry.second; if (metricGroup.first == hMetricGroup) { return true; } } return false; } bool MetricDeviceContext::isMetricGroupActivated() const { for (auto const &entry : domains) { auto const &metricGroup = entry.second; if (metricGroup.second == true) { return true; } } return false; } bool MetricDeviceContext::isImplicitScalingCapable() const { return multiDeviceCapable; } ze_result_t MetricDeviceContext::activateMetricGroups() { return activateAllDomains(); } uint32_t MetricDeviceContext::getSubDeviceIndex() const { return subDeviceIndex; } Device &MetricDeviceContext::getDevice() const { return device; } ze_result_t MetricDeviceContext::enableMetricApi() { bool failed = false; auto driverHandle = L0::DriverHandle::fromHandle(GlobalDriverHandle); auto rootDevices = std::vector(); auto subDevices = std::vector(); // Obtain root devices. uint32_t rootDeviceCount = 0; driverHandle->getDevice(&rootDeviceCount, nullptr); rootDevices.resize(rootDeviceCount); driverHandle->getDevice(&rootDeviceCount, rootDevices.data()); for (auto rootDeviceHandle : rootDevices) { auto rootDevice = static_cast(L0::Device::fromHandle(rootDeviceHandle)); // Initialize root device. failed |= !rootDevice->metricContext->enable(); if (failed) { break; } // Initialize sub devices. for (uint32_t i = 0; i < rootDevice->numSubDevices; ++i) { failed |= !rootDevice->subDevices[i]->getMetricDeviceContext().enable(); } } return failed ? ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE : ZE_RESULT_SUCCESS; } ze_result_t metricGroupGet(zet_device_handle_t hDevice, uint32_t *pCount, zet_metric_group_handle_t *phMetricGroups) { auto device = Device::fromHandle(hDevice); return device->getMetricDeviceContext().metricGroupGet(pCount, phMetricGroups); } ze_result_t metricStreamerOpen(zet_context_handle_t hContext, zet_device_handle_t hDevice, zet_metric_group_handle_t hMetricGroup, zet_metric_streamer_desc_t *pDesc, ze_event_handle_t hNotificationEvent, zet_metric_streamer_handle_t *phMetricStreamer) { return MetricGroup::fromHandle(hMetricGroup)->streamerOpen(hContext, hDevice, pDesc, hNotificationEvent, phMetricStreamer); } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/metrics/metric.h000066400000000000000000000155001422164147700252630ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/event/event.h" #include #include "metrics_discovery_api.h" #include struct _zet_metric_group_handle_t {}; struct _zet_metric_handle_t {}; struct _zet_metric_streamer_handle_t {}; struct _zet_metric_query_pool_handle_t {}; struct _zet_metric_query_handle_t {}; namespace L0 { struct CommandList; struct MetricStreamer; class MetricSource { public: enum class SourceType { Undefined, Oa, IpSampling }; virtual void enable() = 0; virtual bool isAvailable() = 0; virtual ze_result_t appendMetricMemoryBarrier(CommandList &commandList) = 0; virtual ze_result_t metricGroupGet(uint32_t *pCount, zet_metric_group_handle_t *phMetricGroups) = 0; virtual ~MetricSource() = default; }; class MetricDeviceContext { public: MetricDeviceContext(Device &device); ze_result_t metricGroupGet(uint32_t *pCount, zet_metric_group_handle_t *phMetricGroups); ze_result_t activateMetricGroupsDeferred(uint32_t count, zet_metric_group_handle_t *phMetricGroups); ze_result_t activateMetricGroups(); ze_result_t appendMetricMemoryBarrier(CommandList &commandList); bool isMetricGroupActivated(const zet_metric_group_handle_t hMetricGroup) const; bool isMetricGroupActivated() const; bool isImplicitScalingCapable() const; Device &getDevice() const; uint32_t getSubDeviceIndex() const; template T &getMetricSource() const; void setSubDeviceIndex(uint32_t subDeviceIndex) { this->subDeviceIndex = subDeviceIndex; } static std::unique_ptr create(Device &device); static ze_result_t enableMetricApi(); private: bool enable(); ze_result_t activateAllDomains(); ze_result_t deActivateAllDomains(); struct Device &device; std::map> domains; bool multiDeviceCapable = false; uint32_t subDeviceIndex = 0; std::map> metricSources; }; struct Metric : _zet_metric_handle_t { virtual ~Metric() = default; virtual ze_result_t getProperties(zet_metric_properties_t *pProperties) = 0; static Metric *fromHandle(zet_metric_handle_t handle) { return static_cast(handle); } inline zet_metric_handle_t toHandle() { return this; } }; struct MetricGroup : _zet_metric_group_handle_t { virtual ~MetricGroup() = default; virtual ze_result_t getProperties(zet_metric_group_properties_t *pProperties) = 0; virtual ze_result_t metricGet(uint32_t *pCount, zet_metric_handle_t *phMetrics) = 0; virtual ze_result_t calculateMetricValues(const zet_metric_group_calculation_type_t type, size_t rawDataSize, const uint8_t *pRawData, uint32_t *pMetricValueCount, zet_typed_value_t *pMetricValues) = 0; virtual ze_result_t calculateMetricValuesExp(const zet_metric_group_calculation_type_t type, size_t rawDataSize, const uint8_t *pRawData, uint32_t *pSetCount, uint32_t *pTotalMetricValueCount, uint32_t *pMetricCounts, zet_typed_value_t *pMetricValues) = 0; static MetricGroup *fromHandle(zet_metric_group_handle_t handle) { return static_cast(handle); } zet_metric_group_handle_t toHandle() { return this; } virtual bool activate() = 0; virtual bool deactivate() = 0; virtual zet_metric_group_handle_t getMetricGroupForSubDevice(const uint32_t subDeviceIndex) = 0; virtual ze_result_t streamerOpen( zet_context_handle_t hContext, zet_device_handle_t hDevice, zet_metric_streamer_desc_t *desc, ze_event_handle_t hNotificationEvent, zet_metric_streamer_handle_t *phMetricStreamer) = 0; virtual ze_result_t metricQueryPoolCreate( zet_context_handle_t hContext, zet_device_handle_t hDevice, const zet_metric_query_pool_desc_t *desc, zet_metric_query_pool_handle_t *phMetricQueryPool) = 0; }; struct MetricGroupCalculateHeader { static constexpr uint32_t magicValue = 0xFFFEDCBA; uint32_t magic; uint32_t dataCount; uint32_t rawDataOffsets; uint32_t rawDataSizes; uint32_t rawDataOffset; }; struct MetricStreamer : _zet_metric_streamer_handle_t { virtual ~MetricStreamer() = default; virtual ze_result_t readData(uint32_t maxReportCount, size_t *pRawDataSize, uint8_t *pRawData) = 0; virtual ze_result_t close() = 0; static MetricStreamer *fromHandle(zet_metric_streamer_handle_t handle) { return static_cast(handle); } virtual ze_result_t appendStreamerMarker(CommandList &commandList, uint32_t value) = 0; virtual Event::State getNotificationState() = 0; inline zet_metric_streamer_handle_t toHandle() { return this; } }; struct MetricQueryPool : _zet_metric_query_pool_handle_t { virtual ~MetricQueryPool() = default; virtual ze_result_t destroy() = 0; virtual ze_result_t metricQueryCreate(uint32_t index, zet_metric_query_handle_t *phMetricQuery) = 0; static MetricQueryPool *fromHandle(zet_metric_query_pool_handle_t handle); zet_metric_query_pool_handle_t toHandle(); }; struct MetricQuery : _zet_metric_query_handle_t { virtual ~MetricQuery() = default; virtual ze_result_t appendBegin(CommandList &commandList) = 0; virtual ze_result_t appendEnd(CommandList &commandList, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0; virtual ze_result_t getData(size_t *pRawDataSize, uint8_t *pRawData) = 0; virtual ze_result_t reset() = 0; virtual ze_result_t destroy() = 0; static MetricQuery *fromHandle(zet_metric_query_handle_t handle); zet_metric_query_handle_t toHandle(); }; // MetricGroup. ze_result_t metricGroupGet(zet_device_handle_t hDevice, uint32_t *pCount, zet_metric_group_handle_t *phMetricGroups); // MetricStreamer. ze_result_t metricStreamerOpen(zet_context_handle_t hContext, zet_device_handle_t hDevice, zet_metric_group_handle_t hMetricGroup, zet_metric_streamer_desc_t *pDesc, ze_event_handle_t hNotificationEvent, zet_metric_streamer_handle_t *phMetricStreamer); // MetricQueryPool. ze_result_t metricQueryPoolCreate(zet_context_handle_t hContext, zet_device_handle_t hDevice, zet_metric_group_handle_t hMetricGroup, const zet_metric_query_pool_desc_t *pDesc, zet_metric_query_pool_handle_t *phMetricQueryPool); } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/metrics/metric_ip_sampling_source.cpp000066400000000000000000000167461422164147700315750ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/metrics/metric_ip_sampling_source.h" #include "level_zero/tools/source/metrics/metric.h" #include "level_zero/tools/source/metrics/os_metric_ip_sampling.h" #include namespace L0 { constexpr uint32_t ipSamplinMetricCount = 10u; constexpr uint32_t ipSamplinDomainId = 100u; std::unique_ptr IpSamplingMetricSourceImp::create(const MetricDeviceContext &metricDeviceContext) { return std::unique_ptr(new (std::nothrow) IpSamplingMetricSourceImp(metricDeviceContext)); } IpSamplingMetricSourceImp::IpSamplingMetricSourceImp(const MetricDeviceContext &metricDeviceContext) : metricDeviceContext(metricDeviceContext) { metricOsInterface = MetricIpSamplingOsInterface::create(metricDeviceContext.getDevice()); } void IpSamplingMetricSourceImp::enable() { isEnabled = metricOsInterface->isDependencyAvailable(); } bool IpSamplingMetricSourceImp::isAvailable() { return isEnabled; } void IpSamplingMetricSourceImp::cacheMetricGroup() { std::vector metrics = {}; metrics.reserve(ipSamplinMetricCount); zet_metric_properties_t metricProperties = {}; metricProperties.stype = ZET_STRUCTURE_TYPE_METRIC_PROPERTIES; metricProperties.pNext = nullptr; strcpy_s(metricProperties.component, ZET_MAX_METRIC_COMPONENT, "XVE"); metricProperties.tierNumber = 4; metricProperties.resultType = ZET_VALUE_TYPE_UINT64; // Preparing properties for IP seperately because of unique values strcpy_s(metricProperties.name, ZET_MAX_METRIC_NAME, "IP"); strcpy_s(metricProperties.description, ZET_MAX_METRIC_DESCRIPTION, "IP address"); metricProperties.metricType = ZET_METRIC_TYPE_IP_EXP; strcpy_s(metricProperties.resultUnits, ZET_MAX_METRIC_RESULT_UNITS, "Address"); metrics.push_back(IpSamplingMetricImp(metricProperties)); std::vector> metricPropertiesList = { {"Active", "Active cycles"}, {"ControlStall", "Stall on control"}, {"PipeStall", "Stall on pipe"}, {"SendStall", "Stall on send"}, {"DistStall", "Stall on distance"}, {"SbidStall", "Stall on scoreboard"}, {"SyncStall", "Stall on sync"}, {"InstrFetchStall", "Stall on instruction fetch"}, {"OtherStall", "Stall on other condition"}, }; // Preparing properties for others because of common values metricProperties.metricType = ZET_METRIC_TYPE_EVENT; strcpy_s(metricProperties.resultUnits, ZET_MAX_METRIC_RESULT_UNITS, "Events"); for (auto &property : metricPropertiesList) { strcpy_s(metricProperties.name, ZET_MAX_METRIC_NAME, property.first); strcpy_s(metricProperties.description, ZET_MAX_METRIC_DESCRIPTION, property.second); metrics.push_back(IpSamplingMetricImp(metricProperties)); } cachedMetricGroup = IpSamplingMetricGroupImp::create(metrics); DEBUG_BREAK_IF(cachedMetricGroup == nullptr); } ze_result_t IpSamplingMetricSourceImp::metricGroupGet(uint32_t *pCount, zet_metric_group_handle_t *phMetricGroups) { if (!isEnabled) { *pCount = 0; return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } if (*pCount == 0) { *pCount = 1; return ZE_RESULT_SUCCESS; } if (cachedMetricGroup == nullptr) { cacheMetricGroup(); } DEBUG_BREAK_IF(phMetricGroups == nullptr); phMetricGroups[0] = cachedMetricGroup->toHandle(); *pCount = 1; return ZE_RESULT_SUCCESS; } ze_result_t IpSamplingMetricSourceImp::appendMetricMemoryBarrier(CommandList &commandList) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } void IpSamplingMetricSourceImp::setMetricOsInterface(std::unique_ptr &metricOsInterface) { this->metricOsInterface = std::move(metricOsInterface); } IpSamplingMetricGroupImp::IpSamplingMetricGroupImp(std::vector &metrics) { this->metrics.reserve(metrics.size()); for (const auto &metric : metrics) { this->metrics.push_back(std::make_unique(metric)); } properties.stype = ZET_STRUCTURE_TYPE_METRIC_GROUP_PROPERTIES; properties.pNext = nullptr; strcpy_s(properties.name, ZET_MAX_METRIC_GROUP_NAME, "EuStallSampling"); strcpy_s(properties.description, ZET_MAX_METRIC_GROUP_DESCRIPTION, "EU stall sampling"); properties.samplingType = ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_TIME_BASED; properties.domain = ipSamplinDomainId; properties.metricCount = ipSamplinMetricCount; } ze_result_t IpSamplingMetricGroupImp::getProperties(zet_metric_group_properties_t *pProperties) { *pProperties = properties; return ZE_RESULT_SUCCESS; } ze_result_t IpSamplingMetricGroupImp::metricGet(uint32_t *pCount, zet_metric_handle_t *phMetrics) { if (*pCount == 0) { *pCount = static_cast(metrics.size()); return ZE_RESULT_SUCCESS; } // User is expected to allocate space. DEBUG_BREAK_IF(phMetrics == nullptr); *pCount = std::min(*pCount, static_cast(metrics.size())); for (uint32_t i = 0; i < *pCount; i++) { phMetrics[i] = metrics[i]->toHandle(); } return ZE_RESULT_SUCCESS; } ze_result_t IpSamplingMetricGroupImp::calculateMetricValues(const zet_metric_group_calculation_type_t type, size_t rawDataSize, const uint8_t *pRawData, uint32_t *pMetricValueCount, zet_typed_value_t *pMetricValues) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t IpSamplingMetricGroupImp::calculateMetricValuesExp(const zet_metric_group_calculation_type_t type, size_t rawDataSize, const uint8_t *pRawData, uint32_t *pSetCount, uint32_t *pTotalMetricValueCount, uint32_t *pMetricCounts, zet_typed_value_t *pMetricValues) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } bool IpSamplingMetricGroupImp::activate() { // There is no hardware specific activation, since metric collection starts in streamer open return true; } bool IpSamplingMetricGroupImp::deactivate() { return true; } zet_metric_group_handle_t IpSamplingMetricGroupImp::getMetricGroupForSubDevice(const uint32_t subDeviceIndex) { return toHandle(); } ze_result_t IpSamplingMetricGroupImp::metricQueryPoolCreate( zet_context_handle_t hContext, zet_device_handle_t hDevice, const zet_metric_query_pool_desc_t *desc, zet_metric_query_pool_handle_t *phMetricQueryPool) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } std::unique_ptr IpSamplingMetricGroupImp::create(std::vector &ipSamplingMetrics) { return std::unique_ptr(new (std::nothrow) IpSamplingMetricGroupImp(ipSamplingMetrics)); } IpSamplingMetricImp::IpSamplingMetricImp(zet_metric_properties_t &properties) : properties(properties) { } ze_result_t IpSamplingMetricImp::getProperties(zet_metric_properties_t *pProperties) { *pProperties = properties; return ZE_RESULT_SUCCESS; } template <> IpSamplingMetricSourceImp &MetricDeviceContext::getMetricSource() const { return static_cast(*metricSources.at(MetricSource::SourceType::IpSampling)); } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/metrics/metric_ip_sampling_source.h000066400000000000000000000072401422164147700312270ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/tools/source/metrics/metric.h" #include "level_zero/tools/source/metrics/os_metric_ip_sampling.h" namespace L0 { struct IpSamplingMetricImp; struct IpSamplingMetricGroupImp; struct IpSamplingMetricStreamerImp; class IpSamplingMetricSourceImp : public MetricSource { public: IpSamplingMetricSourceImp(const MetricDeviceContext &metricDeviceContext); virtual ~IpSamplingMetricSourceImp() = default; void enable() override; bool isAvailable() override; ze_result_t metricGroupGet(uint32_t *pCount, zet_metric_group_handle_t *phMetricGroups) override; ze_result_t appendMetricMemoryBarrier(CommandList &commandList) override; void setMetricOsInterface(std::unique_ptr &metricOsInterface); static std::unique_ptr create(const MetricDeviceContext &metricDeviceContext); MetricIpSamplingOsInterface *getMetricOsInterface() { return metricOsInterface.get(); } IpSamplingMetricStreamerImp *pActiveStreamer = nullptr; protected: void cacheMetricGroup(); bool isEnabled = false; const MetricDeviceContext &metricDeviceContext; std::unique_ptr metricOsInterface = nullptr; std::unique_ptr cachedMetricGroup = nullptr; }; struct IpSamplingMetricGroupImp : public MetricGroup { IpSamplingMetricGroupImp(std::vector &metrics); virtual ~IpSamplingMetricGroupImp() = default; ze_result_t getProperties(zet_metric_group_properties_t *pProperties) override; ze_result_t metricGet(uint32_t *pCount, zet_metric_handle_t *phMetrics) override; ze_result_t calculateMetricValues(const zet_metric_group_calculation_type_t type, size_t rawDataSize, const uint8_t *pRawData, uint32_t *pMetricValueCount, zet_typed_value_t *pMetricValues) override; ze_result_t calculateMetricValuesExp(const zet_metric_group_calculation_type_t type, size_t rawDataSize, const uint8_t *pRawData, uint32_t *pSetCount, uint32_t *pTotalMetricValueCount, uint32_t *pMetricCounts, zet_typed_value_t *pMetricValues) override; bool activate() override; bool deactivate() override; zet_metric_group_handle_t getMetricGroupForSubDevice(const uint32_t subDeviceIndex) override; ze_result_t streamerOpen( zet_context_handle_t hContext, zet_device_handle_t hDevice, zet_metric_streamer_desc_t *desc, ze_event_handle_t hNotificationEvent, zet_metric_streamer_handle_t *phMetricStreamer) override; ze_result_t metricQueryPoolCreate( zet_context_handle_t hContext, zet_device_handle_t hDevice, const zet_metric_query_pool_desc_t *desc, zet_metric_query_pool_handle_t *phMetricQueryPool) override; static std::unique_ptr create(std::vector &ipSamplingMetrics); private: std::vector> metrics = {}; zet_metric_group_properties_t properties = {}; }; struct IpSamplingMetricImp : public Metric { virtual ~IpSamplingMetricImp() = default; IpSamplingMetricImp(zet_metric_properties_t &properties); ze_result_t getProperties(zet_metric_properties_t *pProperties) override; private: zet_metric_properties_t properties; }; template <> IpSamplingMetricSourceImp &MetricDeviceContext::getMetricSource() const; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/metrics/metric_ip_sampling_streamer.cpp000066400000000000000000000071031422164147700321020ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/metrics/metric_ip_sampling_streamer.h" #include "level_zero/tools/source/metrics/metric.h" #include "level_zero/tools/source/metrics/metric_ip_sampling_source.h" #include "level_zero/tools/source/metrics/os_metric_ip_sampling.h" #include namespace L0 { ze_result_t IpSamplingMetricGroupImp::streamerOpen( zet_context_handle_t hContext, zet_device_handle_t hDevice, zet_metric_streamer_desc_t *desc, ze_event_handle_t hNotificationEvent, zet_metric_streamer_handle_t *phMetricStreamer) { auto device = Device::fromHandle(hDevice); // Check whether metric group is activated. if (!device->getMetricDeviceContext().isMetricGroupActivated(this->toHandle())) { return ZE_RESULT_NOT_READY; } auto &metricSource = device->getMetricDeviceContext().getMetricSource(); // Check whether metric streamer is already open. if (metricSource.pActiveStreamer != nullptr) { return ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE; } auto pStreamerImp = new IpSamplingMetricStreamerImp(metricSource); UNRECOVERABLE_IF(pStreamerImp == nullptr); const ze_result_t result = metricSource.getMetricOsInterface()->startMeasurement(desc->notifyEveryNReports, desc->samplingPeriod); if (result == ZE_RESULT_SUCCESS) { metricSource.pActiveStreamer = pStreamerImp; pStreamerImp->attachEvent(hNotificationEvent); } else { delete pStreamerImp; pStreamerImp = nullptr; return result; } *phMetricStreamer = pStreamerImp->toHandle(); return ZE_RESULT_SUCCESS; } ze_result_t IpSamplingMetricStreamerImp::readData(uint32_t maxReportCount, size_t *pRawDataSize, uint8_t *pRawData) { // Return required size if requested. if (*pRawDataSize == 0) { *pRawDataSize = ipSamplingSource.getMetricOsInterface()->getRequiredBufferSize(maxReportCount); return ZE_RESULT_SUCCESS; } // If there is a difference in pRawDataSize and maxReportCount, use the minimum value for reading. if (maxReportCount != UINT32_MAX) { size_t maxSizeRequired = ipSamplingSource.getMetricOsInterface()->getRequiredBufferSize(maxReportCount); *pRawDataSize = std::min(maxSizeRequired, *pRawDataSize); } return ipSamplingSource.getMetricOsInterface()->readData(pRawData, pRawDataSize); } ze_result_t IpSamplingMetricStreamerImp::close() { const ze_result_t result = ipSamplingSource.getMetricOsInterface()->stopMeasurement(); detachEvent(); ipSamplingSource.pActiveStreamer = nullptr; delete this; return result; } Event::State IpSamplingMetricStreamerImp::getNotificationState() { return ipSamplingSource.getMetricOsInterface()->isNReportsAvailable() ? Event::State::STATE_SIGNALED : Event::State::STATE_INITIAL; } ze_result_t IpSamplingMetricStreamerImp::appendStreamerMarker(CommandList &commandList, uint32_t value) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } void IpSamplingMetricStreamerImp::attachEvent(ze_event_handle_t hNotificationEvent) { // Associate notification event with metric streamer. pNotificationEvent = Event::fromHandle(hNotificationEvent); if (pNotificationEvent != nullptr) { pNotificationEvent->metricStreamer = this; } } void IpSamplingMetricStreamerImp::detachEvent() { // Release notification event. if (pNotificationEvent != nullptr) { pNotificationEvent->metricStreamer = nullptr; } } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/metrics/metric_ip_sampling_streamer.h000066400000000000000000000017151422164147700315520ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/tools/source/metrics/metric.h" #include "level_zero/tools/source/metrics/os_metric_ip_sampling.h" namespace L0 { class IpSamplingMetricSourceImp; struct IpSamplingMetricStreamerImp : MetricStreamer { IpSamplingMetricStreamerImp(IpSamplingMetricSourceImp &ipSamplingSource) : ipSamplingSource(ipSamplingSource) {} ~IpSamplingMetricStreamerImp() override{}; ze_result_t readData(uint32_t maxReportCount, size_t *pRawDataSize, uint8_t *pRawData) override; ze_result_t close() override; Event::State getNotificationState() override; ze_result_t appendStreamerMarker(CommandList &commandList, uint32_t value) override; void attachEvent(ze_event_handle_t hNotificationEvent); void detachEvent(); protected: Event *pNotificationEvent = nullptr; IpSamplingMetricSourceImp &ipSamplingSource; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/metrics/metric_oa_enumeration_imp.cpp000066400000000000000000001102601422164147700315470ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/metrics/metric_oa_enumeration_imp.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/string.h" #include "shared/source/os_interface/os_library.h" #include "level_zero/core/source/device/device_imp.h" #include "level_zero/tools/source/metrics/metric_oa_query_imp.h" #include "level_zero/tools/source/metrics/metric_oa_source.h" #include namespace L0 { const char *MetricEnumeration::oaConcurrentGroupName = "OA"; MetricEnumeration::MetricEnumeration(OaMetricSourceImp &metricSourceInput) : metricSource(metricSourceInput) {} MetricEnumeration::~MetricEnumeration() { cleanupMetricsDiscovery(); initializationState = ZE_RESULT_ERROR_UNINITIALIZED; } ze_result_t MetricEnumeration::metricGroupGet(uint32_t &count, zet_metric_group_handle_t *phMetricGroups) { ze_result_t result = initialize(); if (result != ZE_RESULT_SUCCESS) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } if (count == 0) { count = static_cast(metricGroups.size()); return ZE_RESULT_SUCCESS; } else if (count > metricGroups.size()) { count = static_cast(metricGroups.size()); } for (uint32_t i = 0; i < count; i++) { phMetricGroups[i] = metricGroups[i]->toHandle(); } return ZE_RESULT_SUCCESS; } MetricGroup *MetricEnumeration::getMetricGroupByIndex(const uint32_t index) { return metricGroups[index]; } uint32_t MetricEnumeration::getMetricGroupCount() { return static_cast(metricGroups.size()); } bool MetricEnumeration::isInitialized() { if (initializationState == ZE_RESULT_ERROR_UNINITIALIZED) { initialize(); } return initializationState == ZE_RESULT_SUCCESS; } ze_result_t MetricEnumeration::initialize() { if (initializationState == ZE_RESULT_ERROR_UNINITIALIZED) { if (hMetricsDiscovery && openMetricsDiscovery() == ZE_RESULT_SUCCESS && cacheMetricInformation() == ZE_RESULT_SUCCESS) { initializationState = ZE_RESULT_SUCCESS; } else { initializationState = ZE_RESULT_ERROR_UNKNOWN; cleanupMetricsDiscovery(); } } return initializationState; } ze_result_t MetricEnumeration::loadMetricsDiscovery() { // Load library. hMetricsDiscovery.reset(OaMetricSourceImp::osLibraryLoadFunction(getMetricsDiscoveryFilename())); // Load exported functions. if (hMetricsDiscovery) { openAdapterGroup = reinterpret_cast( hMetricsDiscovery->getProcAddress("OpenAdapterGroup")); } if (openAdapterGroup == nullptr) { NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "cannot load %s exported functions\n", MetricEnumeration::getMetricsDiscoveryFilename()); cleanupMetricsDiscovery(); return ZE_RESULT_ERROR_NOT_AVAILABLE; } // Return success if exported functions have been loaded. return ZE_RESULT_SUCCESS; } ze_result_t MetricEnumeration::openMetricsDiscovery() { UNRECOVERABLE_IF(openAdapterGroup == nullptr); const uint32_t subDeviceIndex = metricSource.getSubDeviceIndex(); // Clean up members. pAdapterGroup = nullptr; pAdapter = nullptr; pMetricsDevice = nullptr; // Open adapter group. openAdapterGroup((MetricsDiscovery::IAdapterGroupLatest **)&pAdapterGroup); if (pAdapterGroup == nullptr) { NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "unable to open metrics adapter groups %s\n", " "); cleanupMetricsDiscovery(); return ZE_RESULT_ERROR_UNKNOWN; } // Obtain metrics adapter that matches adapter used by l0. pAdapter = getMetricsAdapter(); if (pAdapter == nullptr) { NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "unable to open metrics adapter %s\n", " "); cleanupMetricsDiscovery(); return ZE_RESULT_ERROR_NOT_AVAILABLE; } auto &device = metricSource.getDevice(); const auto &deviceImp = *static_cast(&device); if (metricSource.isImplicitScalingCapable()) { // Open metrics device for each sub device. for (size_t i = 0; i < deviceImp.numSubDevices; i++) { auto &metricsDevice = deviceImp.subDevices[i]->getMetricDeviceContext().getMetricSource().getMetricEnumeration().pMetricsDevice; pAdapter->OpenMetricsSubDevice(static_cast(i), &metricsDevice); deviceImp.subDevices[i]->getMetricDeviceContext().getMetricSource().getMetricEnumeration().pAdapter = pAdapter; if (metricsDevice == nullptr) { NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "unable to open metrics device %u\n", i); cleanupMetricsDiscovery(); return ZE_RESULT_ERROR_NOT_AVAILABLE; } } } else { if (subDeviceIndex == 0) { // Open metrics device for root device or sub device with index 0. pAdapter->OpenMetricsDevice(&pMetricsDevice); } else { // Open metrics device for a given sub device index. pAdapter->OpenMetricsSubDevice(subDeviceIndex, &pMetricsDevice); } if (pMetricsDevice == nullptr) { NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "unable to open metrics device %u\n", subDeviceIndex); cleanupMetricsDiscovery(); return ZE_RESULT_ERROR_NOT_AVAILABLE; } } return ZE_RESULT_SUCCESS; } ze_result_t MetricEnumeration::cleanupMetricsDiscovery() { if (pAdapter) { auto &device = metricSource.getDevice(); const auto &deviceImp = *static_cast(&device); if (metricSource.isImplicitScalingCapable()) { for (size_t i = 0; i < deviceImp.numSubDevices; i++) { deviceImp.subDevices[i]->getMetricDeviceContext().getMetricSource().getMetricEnumeration().cleanupMetricsDiscovery(); } } else if (pMetricsDevice) { // Close metrics device for one sub device or root device. pAdapter->CloseMetricsDevice(pMetricsDevice); pMetricsDevice = nullptr; } } for (size_t i = 0; i < metricGroups.size(); ++i) { delete metricGroups[i]; } metricGroups.clear(); if (hMetricsDiscovery != nullptr) { if (pAdapterGroup != nullptr) { pAdapterGroup->Close(); } pAdapterGroup = nullptr; openAdapterGroup = nullptr; hMetricsDiscovery.reset(); } return ZE_RESULT_SUCCESS; } // namespace L0 ze_result_t MetricEnumeration::cacheMetricInformation() { auto &device = metricSource.getDevice(); const auto &deviceImp = *static_cast(&device); if (metricSource.isImplicitScalingCapable()) { ze_result_t result = ZE_RESULT_SUCCESS; // Get metric information from all sub devices. for (auto subDevice : deviceImp.subDevices) { result = subDevice->getMetricDeviceContext().getMetricSource().getMetricEnumeration().cacheMetricInformation(); if (ZE_RESULT_SUCCESS != result) { return result; } } // Get metric groups count for one sub device. const uint32_t metricGroupCount = deviceImp.subDevices[0]->getMetricDeviceContext().getMetricSource().getMetricEnumeration().getMetricGroupCount(); // Cache and aggregate all metric groups from all sub devices. for (uint32_t i = 0; i < metricGroupCount; i++) { auto metricGroupRootDevice = new OaMetricGroupImp(); for (auto subDevice : deviceImp.subDevices) { MetricGroup *metricGroupSubDevice = subDevice->getMetricDeviceContext().getMetricSource().getMetricEnumeration().getMetricGroupByIndex(i); metricGroupRootDevice->getMetricGroups().push_back(metricGroupSubDevice); } metricGroups.push_back(metricGroupRootDevice); } return result; } DEBUG_BREAK_IF(pMetricsDevice == nullptr); MetricsDiscovery::TMetricsDeviceParams_1_2 *pMetricsDeviceParams = pMetricsDevice->GetParams(); DEBUG_BREAK_IF(pMetricsDeviceParams == nullptr); // Check required Metrics Discovery API version - should be at least 1.5. const bool unsupportedMajorVersion = pMetricsDeviceParams->Version.MajorNumber < requiredMetricsDiscoveryMajorVersion; const bool unsupportedMinorVersion = (pMetricsDeviceParams->Version.MajorNumber == requiredMetricsDiscoveryMajorVersion) && (pMetricsDeviceParams->Version.MinorNumber < requiredMetricsDiscoveryMinorVersion); if (unsupportedMajorVersion || unsupportedMinorVersion) { // Metrics Discovery API version too low return ZE_RESULT_ERROR_UNKNOWN; } // 1. Iterate over concurrent groups. MetricsDiscovery::IConcurrentGroup_1_5 *pConcurrentGroup = nullptr; for (uint32_t i = 0; i < pMetricsDeviceParams->ConcurrentGroupsCount; ++i) { pConcurrentGroup = pMetricsDevice->GetConcurrentGroup(i); DEBUG_BREAK_IF(pConcurrentGroup == nullptr); MetricsDiscovery::TConcurrentGroupParams_1_0 *pConcurrentGroupParams = pConcurrentGroup->GetParams(); DEBUG_BREAK_IF(pConcurrentGroupParams == nullptr); // 2. Find "OA" concurrent group. if (strcmp(pConcurrentGroupParams->SymbolName, oaConcurrentGroupName) == 0) { // Reserve memory for metric groups metricGroups.reserve(pConcurrentGroupParams->MetricSetsCount); // 3. Iterate over metric sets. for (uint32_t j = 0; j < pConcurrentGroupParams->MetricSetsCount; ++j) { MetricsDiscovery::IMetricSet_1_5 *pMetricSet = pConcurrentGroup->GetMetricSet(j); DEBUG_BREAK_IF(pMetricSet == nullptr); cacheMetricGroup(*pMetricSet, *pConcurrentGroup, i, ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_TIME_BASED); cacheMetricGroup(*pMetricSet, *pConcurrentGroup, i, ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED); } } } return ZE_RESULT_SUCCESS; } ze_result_t MetricEnumeration::cacheMetricGroup(MetricsDiscovery::IMetricSet_1_5 &metricSet, MetricsDiscovery::IConcurrentGroup_1_5 &concurrentGroup, const uint32_t domain, const zet_metric_group_sampling_type_flag_t samplingType) { MetricsDiscovery::TMetricSetParams_1_4 *pMetricSetParams = metricSet.GetParams(); DEBUG_BREAK_IF(pMetricSetParams == nullptr); const uint32_t sourceApiMask = OaMetricGroupImp::getApiMask(samplingType); // Map metric groups to level zero format and cache them. if (pMetricSetParams->ApiMask & sourceApiMask) { metricSet.SetApiFiltering(sourceApiMask); // Obtain params once again - updated after SetApiFiltering pMetricSetParams = metricSet.GetParams(); zet_metric_group_properties_t properties = {}; properties.stype = ZET_STRUCTURE_TYPE_METRIC_GROUP_PROPERTIES; snprintf(properties.name, sizeof(properties.name), "%s", pMetricSetParams->SymbolName); // To always have null-terminated string snprintf(properties.description, sizeof(properties.description), "%s", pMetricSetParams->ShortName); properties.samplingType = samplingType; properties.domain = domain; // Concurrent group number properties.metricCount = pMetricSetParams->MetricsCount + pMetricSetParams->InformationCount; std::vector metrics; createMetrics(metricSet, metrics); auto pMetricGroup = OaMetricGroupImp::create(properties, metricSet, concurrentGroup, metrics, metricSource); DEBUG_BREAK_IF(pMetricGroup == nullptr); metricGroups.push_back(pMetricGroup); // Disable api filtering metricSet.SetApiFiltering(MetricsDiscovery::API_TYPE_ALL); } return ZE_RESULT_SUCCESS; } ze_result_t MetricEnumeration::createMetrics(MetricsDiscovery::IMetricSet_1_5 &metricSet, std::vector &metrics) { MetricsDiscovery::TMetricSetParams_1_4 *pMetricSetParams = metricSet.GetParams(); DEBUG_BREAK_IF(pMetricSetParams == nullptr); metrics.reserve(pMetricSetParams->MetricsCount + pMetricSetParams->InformationCount); // Map metrics to level zero format and add them to 'metrics' vector. for (uint32_t i = 0; i < pMetricSetParams->MetricsCount; ++i) { MetricsDiscovery::IMetric_1_0 *pSourceMetric = metricSet.GetMetric(i); DEBUG_BREAK_IF(pSourceMetric == nullptr); MetricsDiscovery::TMetricParams_1_0 *pSourceMetricParams = pSourceMetric->GetParams(); DEBUG_BREAK_IF(pSourceMetricParams == nullptr); zet_metric_properties_t properties = {}; properties.stype = ZET_STRUCTURE_TYPE_METRIC_PROPERTIES; snprintf(properties.name, sizeof(properties.name), "%s", pSourceMetricParams->SymbolName); // To always have a null-terminated string snprintf(properties.description, sizeof(properties.description), "%s", pSourceMetricParams->LongName); snprintf(properties.component, sizeof(properties.component), "%s", pSourceMetricParams->GroupName); snprintf(properties.resultUnits, sizeof(properties.resultUnits), "%s", pSourceMetricParams->MetricResultUnits); properties.tierNumber = getMetricTierNumber(pSourceMetricParams->UsageFlagsMask); properties.metricType = getMetricType(pSourceMetricParams->MetricType); properties.resultType = getMetricResultType(pSourceMetricParams->ResultType); auto pMetric = OaMetricImp::create(properties); UNRECOVERABLE_IF(pMetric == nullptr); metrics.push_back(pMetric); } // Map information to level zero format and add them to 'metrics' vector (as metrics). for (uint32_t i = 0; i < pMetricSetParams->InformationCount; ++i) { MetricsDiscovery::IInformation_1_0 *pSourceInformation = metricSet.GetInformation(i); DEBUG_BREAK_IF(pSourceInformation == nullptr); MetricsDiscovery::TInformationParams_1_0 *pSourceInformationParams = pSourceInformation->GetParams(); DEBUG_BREAK_IF(pSourceInformationParams == nullptr); zet_metric_properties_t properties = {}; properties.stype = ZET_STRUCTURE_TYPE_METRIC_PROPERTIES; snprintf(properties.name, sizeof(properties.name), "%s", pSourceInformationParams->SymbolName); // To always have a null-terminated string snprintf(properties.description, sizeof(properties.description), "%s", pSourceInformationParams->LongName); snprintf(properties.component, sizeof(properties.component), "%s", pSourceInformationParams->GroupName); snprintf(properties.resultUnits, sizeof(properties.resultUnits), "%s", pSourceInformationParams->InfoUnits); properties.tierNumber = 1; properties.metricType = getMetricType(pSourceInformationParams->InfoType); properties.resultType = properties.metricType == ZET_METRIC_TYPE_FLAG ? ZET_VALUE_TYPE_BOOL8 : ZET_VALUE_TYPE_UINT64; auto pMetric = OaMetricImp::create(properties); UNRECOVERABLE_IF(pMetric == nullptr); metrics.push_back(pMetric); } return ZE_RESULT_SUCCESS; } uint32_t MetricEnumeration::getMetricTierNumber(const uint32_t sourceUsageFlagsMask) const { uint32_t tierNumber = 0; if (sourceUsageFlagsMask & MetricsDiscovery::USAGE_FLAG_TIER_1) { tierNumber = 1; } else if (sourceUsageFlagsMask & MetricsDiscovery::USAGE_FLAG_TIER_2) { tierNumber = 2; } else if (sourceUsageFlagsMask & MetricsDiscovery::USAGE_FLAG_TIER_3) { tierNumber = 3; } else if (sourceUsageFlagsMask & MetricsDiscovery::USAGE_FLAG_TIER_4) { tierNumber = 4; } else { // No tier - some metrics may have this undefined tierNumber = 0; } return tierNumber; } zet_metric_type_t MetricEnumeration::getMetricType(const MetricsDiscovery::TMetricType sourceMetricType) const { switch (sourceMetricType) { case MetricsDiscovery::METRIC_TYPE_DURATION: return ZET_METRIC_TYPE_DURATION; case MetricsDiscovery::METRIC_TYPE_EVENT: return ZET_METRIC_TYPE_EVENT; case MetricsDiscovery::METRIC_TYPE_EVENT_WITH_RANGE: return ZET_METRIC_TYPE_EVENT_WITH_RANGE; case MetricsDiscovery::METRIC_TYPE_THROUGHPUT: return ZET_METRIC_TYPE_THROUGHPUT; case MetricsDiscovery::METRIC_TYPE_TIMESTAMP: return ZET_METRIC_TYPE_TIMESTAMP; case MetricsDiscovery::METRIC_TYPE_FLAG: return ZET_METRIC_TYPE_FLAG; case MetricsDiscovery::METRIC_TYPE_RATIO: return ZET_METRIC_TYPE_RATIO; case MetricsDiscovery::METRIC_TYPE_RAW: return ZET_METRIC_TYPE_RAW; default: DEBUG_BREAK_IF(!false); return ZET_METRIC_TYPE_RAW; } } zet_metric_type_t MetricEnumeration::getMetricType( const MetricsDiscovery::TInformationType sourceInformationType) const { switch (sourceInformationType) { case MetricsDiscovery::INFORMATION_TYPE_REPORT_REASON: return ZET_METRIC_TYPE_EVENT; case MetricsDiscovery::INFORMATION_TYPE_VALUE: case MetricsDiscovery::INFORMATION_TYPE_CONTEXT_ID_TAG: case MetricsDiscovery::INFORMATION_TYPE_SAMPLE_PHASE: case MetricsDiscovery::INFORMATION_TYPE_GPU_NODE: return ZET_METRIC_TYPE_RAW; case MetricsDiscovery::INFORMATION_TYPE_FLAG: return ZET_METRIC_TYPE_FLAG; case MetricsDiscovery::INFORMATION_TYPE_TIMESTAMP: return ZET_METRIC_TYPE_TIMESTAMP; default: DEBUG_BREAK_IF(!false); return ZET_METRIC_TYPE_RAW; } } zet_value_type_t MetricEnumeration::getMetricResultType( const MetricsDiscovery::TMetricResultType sourceMetricResultType) const { switch (sourceMetricResultType) { case MetricsDiscovery::RESULT_UINT32: return ZET_VALUE_TYPE_UINT32; case MetricsDiscovery::RESULT_UINT64: return ZET_VALUE_TYPE_UINT64; case MetricsDiscovery::RESULT_BOOL: return ZET_VALUE_TYPE_BOOL8; case MetricsDiscovery::RESULT_FLOAT: return ZET_VALUE_TYPE_FLOAT32; default: DEBUG_BREAK_IF(!false); return ZET_VALUE_TYPE_UINT64; } } OaMetricGroupImp ::~OaMetricGroupImp() { for (size_t i = 0; i < metrics.size(); ++i) { delete metrics[i]; } metrics.clear(); }; ze_result_t OaMetricGroupImp::getProperties(zet_metric_group_properties_t *pProperties) { if (metricGroups.size() > 0) { *pProperties = OaMetricGroupImp::getProperties(metricGroups[0]); } else { copyProperties(properties, *pProperties); } return ZE_RESULT_SUCCESS; } zet_metric_group_properties_t OaMetricGroupImp::getProperties(const zet_metric_group_handle_t handle) { auto metricGroup = MetricGroup::fromHandle(handle); UNRECOVERABLE_IF(!metricGroup); zet_metric_group_properties_t properties = {ZET_STRUCTURE_TYPE_METRIC_GROUP_PROPERTIES}; metricGroup->getProperties(&properties); return properties; } ze_result_t OaMetricGroupImp::metricGet(uint32_t *pCount, zet_metric_handle_t *phMetrics) { if (metricGroups.size() > 0) { auto metricGroupSubDevice = MetricGroup::fromHandle(metricGroups[0]); return metricGroupSubDevice->metricGet(pCount, phMetrics); } if (*pCount == 0) { *pCount = static_cast(metrics.size()); return ZE_RESULT_SUCCESS; } // User is expected to allocate space. DEBUG_BREAK_IF(phMetrics == nullptr); if (*pCount > metrics.size()) { *pCount = static_cast(metrics.size()); } for (uint32_t i = 0; i < *pCount; i++) { phMetrics[i] = metrics[i]->toHandle(); } return ZE_RESULT_SUCCESS; } bool OaMetricGroupImp::activate() { if (properties.samplingType != ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED) { return true; } auto hConfiguration = metricSource->getMetricsLibrary().getConfiguration(toHandle()); // Validate metrics library handle. if (!hConfiguration.IsValid()) { DEBUG_BREAK_IF(true); return false; } // Write metric group configuration to gpu. const bool result = metricSource->getMetricsLibrary().activateConfiguration(hConfiguration); DEBUG_BREAK_IF(!result); return result; } bool OaMetricGroupImp::deactivate() { if (properties.samplingType != ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED) { return true; } auto hConfiguration = metricSource->getMetricsLibrary().getConfiguration(toHandle()); // Deactivate metric group configuration using metrics library. metricSource->getMetricsLibrary().deactivateConfiguration(hConfiguration); // Release Metrics Library is not used. if (metricSource->getMetricsLibrary().getMetricQueryCount() == 0) { if (metricSource->getMetricsLibrary().getInitializationState() != ZE_RESULT_ERROR_UNINITIALIZED) { metricSource->getMetricsLibrary().release(); } } return true; } bool OaMetricGroupImp::activateMetricSet() { DEBUG_BREAK_IF(pReferenceMetricSet == nullptr); const bool result = pReferenceMetricSet->Activate() == MetricsDiscovery::CC_OK; DEBUG_BREAK_IF(!result); return result; } bool OaMetricGroupImp::deactivateMetricSet() { DEBUG_BREAK_IF(pReferenceMetricSet == nullptr); const bool result = pReferenceMetricSet->Deactivate() == MetricsDiscovery::CC_OK; return result; } uint32_t OaMetricGroupImp::getApiMask(const zet_metric_group_sampling_type_flags_t samplingType) { switch (samplingType) { case ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_TIME_BASED: return MetricsDiscovery::API_TYPE_IOSTREAM; case ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED: return MetricsDiscovery::API_TYPE_OCL | MetricsDiscovery::API_TYPE_OGL4_X; default: DEBUG_BREAK_IF(true); return 0; } } zet_metric_group_handle_t OaMetricGroupImp::getMetricGroupForSubDevice(const uint32_t subDeviceIndex) { if (metricGroups.size() > 0) { return metricGroups[subDeviceIndex]; } return toHandle(); } ze_result_t OaMetricGroupImp::openIoStream(uint32_t &timerPeriodNs, uint32_t &oaBufferSize) { const auto openResult = pReferenceConcurrentGroup->OpenIoStream(pReferenceMetricSet, 0, &timerPeriodNs, &oaBufferSize); return (openResult == MetricsDiscovery::CC_OK) ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_UNKNOWN; } ze_result_t OaMetricGroupImp::waitForReports(const uint32_t timeoutMs) { return (pReferenceConcurrentGroup->WaitForReports(timeoutMs) == MetricsDiscovery::TCompletionCode::CC_OK) ? ZE_RESULT_SUCCESS : ZE_RESULT_NOT_READY; } ze_result_t OaMetricGroupImp::readIoStream(uint32_t &reportCount, uint8_t &reportData) { char *castedReportData = reinterpret_cast(&reportData); const auto readResult = pReferenceConcurrentGroup->ReadIoStream(&reportCount, castedReportData, 0); switch (readResult) { case MetricsDiscovery::CC_OK: case MetricsDiscovery::CC_READ_PENDING: return ZE_RESULT_SUCCESS; default: return ZE_RESULT_ERROR_UNKNOWN; } } ze_result_t OaMetricGroupImp::closeIoStream() { const auto closeResult = pReferenceConcurrentGroup->CloseIoStream(); return (closeResult == MetricsDiscovery::CC_OK) ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_UNKNOWN; } ze_result_t OaMetricGroupImp::calculateMetricValues(const zet_metric_group_calculation_type_t type, size_t rawDataSize, const uint8_t *pRawData, uint32_t *pMetricValueCount, zet_typed_value_t *pMetricValues) { const MetricGroupCalculateHeader *pRawHeader = reinterpret_cast(pRawData); if (pRawHeader->magic == MetricGroupCalculateHeader::magicValue) { NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "%s", "INFO: The call is not supported for multiple devices\n" "INFO: Please use zetMetricGroupCalculateMultipleMetricValuesExp instead\n"); return ZE_RESULT_ERROR_UNKNOWN; } const bool calculateCountOnly = *pMetricValueCount == 0; const bool result = calculateCountOnly ? getCalculatedMetricCount(rawDataSize, *pMetricValueCount) : getCalculatedMetricValues(type, rawDataSize, pRawData, *pMetricValueCount, pMetricValues); return result ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_UNKNOWN; } ze_result_t OaMetricGroupImp::calculateMetricValuesExp(const zet_metric_group_calculation_type_t type, size_t rawDataSize, const uint8_t *pRawData, uint32_t *pSetCount, uint32_t *pTotalMetricValueCount, uint32_t *pMetricCounts, zet_typed_value_t *pMetricValues) { const MetricGroupCalculateHeader *pRawHeader = reinterpret_cast(pRawData); if (pRawHeader->magic != MetricGroupCalculateHeader::magicValue) { const bool calculationCountOnly = *pTotalMetricValueCount == 0; ze_result_t result = calculateMetricValues(type, rawDataSize, pRawData, pTotalMetricValueCount, pMetricValues); if (result == ZE_RESULT_SUCCESS) { *pSetCount = 1; if (!calculationCountOnly) { pMetricCounts[0] = *pTotalMetricValueCount; } } else { if (calculationCountOnly) { *pSetCount = 0; *pTotalMetricValueCount = 0; } else { pMetricCounts[0] = 0; } } return result; } bool result = true; const size_t metricGroupCount = metricGroups.size(); if (*pSetCount == 0 || *pTotalMetricValueCount == 0) { const uint32_t *pRawDataSizesUnpacked = reinterpret_cast(pRawData + pRawHeader->rawDataSizes); if (metricGroupCount == 0) { result = getCalculatedMetricCount(*pRawDataSizesUnpacked, *pTotalMetricValueCount); if (result) { *pSetCount = 1; } else { *pSetCount = 0; *pTotalMetricValueCount = 0; } } else { *pSetCount = static_cast(metricGroupCount); *pTotalMetricValueCount = 0; for (size_t i = 0; i < metricGroupCount; i++) { uint32_t metricCount = 0; auto &metricGroup = *static_cast(metricGroups[i]); result = metricGroup.getCalculatedMetricCount(pRawDataSizesUnpacked[i], metricCount); if (!result) { *pSetCount = 0; *pTotalMetricValueCount = 0; break; } *pTotalMetricValueCount += metricCount; } } } else { const uint32_t *pRawDataSizesUnpacked = reinterpret_cast(pRawData + pRawHeader->rawDataSizes); const uint32_t *pRawDataOffsetsUnpacked = reinterpret_cast(pRawData + pRawHeader->rawDataOffsets); const uint8_t *pRawDataOffsetUnpacked = reinterpret_cast(pRawData + pRawHeader->rawDataOffset); if (metricGroupCount == 0) { result = getCalculatedMetricValues(type, pRawDataSizesUnpacked[0], pRawDataOffsetUnpacked, *pTotalMetricValueCount, pMetricValues); pMetricCounts[0] = *pTotalMetricValueCount; } else { UNRECOVERABLE_IF(*pSetCount > metricGroupCount); const uint32_t maxTotalMetricValueCount = *pTotalMetricValueCount; *pTotalMetricValueCount = 0; for (size_t i = 0; i < *pSetCount; i++) { auto &metricGroup = *static_cast(metricGroups[i]); const uint32_t dataSize = pRawDataSizesUnpacked[i]; const uint8_t *pRawDataOffset = pRawDataOffsetUnpacked + pRawDataOffsetsUnpacked[i]; pMetricCounts[i] = maxTotalMetricValueCount; result = metricGroup.getCalculatedMetricValues(type, dataSize, pRawDataOffset, pMetricCounts[i], pMetricValues); if (!result) { for (size_t j = 0; j <= i; j++) { pMetricCounts[j] = 0; } break; } *pTotalMetricValueCount += pMetricCounts[i]; pMetricValues += pMetricCounts[i]; } } } return result ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_UNKNOWN; } bool OaMetricGroupImp::getCalculatedMetricCount(const size_t rawDataSize, uint32_t &metricValueCount) { uint32_t rawReportSize = getRawReportSize(); if (rawReportSize == 0) { return false; } if ((rawDataSize % rawReportSize) != 0) { return false; } const uint32_t rawReportCount = static_cast(rawDataSize) / rawReportSize; metricValueCount = rawReportCount * properties.metricCount; return true; } bool OaMetricGroupImp::getCalculatedMetricValues(const zet_metric_group_calculation_type_t type, const size_t rawDataSize, const uint8_t *pRawData, uint32_t &metricValueCount, zet_typed_value_t *pCalculatedData) { uint32_t calculatedReportCount = 0; uint32_t expectedMetricValueCount = 0; if (pCalculatedData == nullptr) { return false; } if (getCalculatedMetricCount(rawDataSize, expectedMetricValueCount) == false) { return false; } // Calculated metrics / maximum values container. std::vector calculatedMetrics(expectedMetricValueCount); std::vector maximumValues(expectedMetricValueCount); // Set filtering type. pReferenceMetricSet->SetApiFiltering(OaMetricGroupImp::getApiMask(properties.samplingType)); // Calculate metrics. const uint32_t outMetricsSize = static_cast(calculatedMetrics.size()) * sizeof(MetricsDiscovery::TTypedValue_1_0); bool result = pReferenceMetricSet->CalculateMetrics( reinterpret_cast(const_cast(pRawData)), static_cast(rawDataSize), calculatedMetrics.data(), outMetricsSize, &calculatedReportCount, maximumValues.data(), outMetricsSize) == MetricsDiscovery::CC_OK; if (result) { // Adjust copied reports to buffer provided by the user. metricValueCount = std::min(metricValueCount, calculatedReportCount * properties.metricCount); // Translate metrics from metrics discovery to oneAPI format. switch (type) { case ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES: for (size_t i = 0; i < metricValueCount; ++i) { copyValue(calculatedMetrics[i], pCalculatedData[i]); } break; case ZET_METRIC_GROUP_CALCULATION_TYPE_MAX_METRIC_VALUES: for (size_t i = 0; i < metricValueCount; ++i) { copyValue(maximumValues[i], pCalculatedData[i]); } break; default: result = false; break; } } return result; } ze_result_t OaMetricGroupImp::initialize(const zet_metric_group_properties_t &sourceProperties, MetricsDiscovery::IMetricSet_1_5 &metricSet, MetricsDiscovery::IConcurrentGroup_1_5 &concurrentGroup, const std::vector &groupMetrics, OaMetricSourceImp &metricSource) { copyProperties(sourceProperties, properties); pReferenceMetricSet = &metricSet; pReferenceConcurrentGroup = &concurrentGroup; metrics = groupMetrics; this->metricSource = &metricSource; return ZE_RESULT_SUCCESS; } uint32_t OaMetricGroupImp::getRawReportSize() { auto pMetricSetParams = pReferenceMetricSet->GetParams(); return (properties.samplingType == ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_TIME_BASED) ? pMetricSetParams->RawReportSize : pMetricSetParams->QueryReportSize; } std::vector &OaMetricGroupImp::getMetricGroups() { return metricGroups; } void OaMetricGroupImp::copyProperties(const zet_metric_group_properties_t &source, zet_metric_group_properties_t &destination) { destination = source; memcpy_s(destination.name, sizeof(destination.name), source.name, sizeof(destination.name)); memcpy_s(destination.description, sizeof(destination.description), source.description, sizeof(destination.description)); } void OaMetricGroupImp::copyValue(const MetricsDiscovery::TTypedValue_1_0 &source, zet_typed_value_t &destination) const { destination = {}; switch (source.ValueType) { case MetricsDiscovery::VALUE_TYPE_UINT32: destination.type = ZET_VALUE_TYPE_UINT32; destination.value.ui32 = source.ValueUInt32; break; case MetricsDiscovery::VALUE_TYPE_UINT64: destination.type = ZET_VALUE_TYPE_UINT64; destination.value.ui64 = source.ValueUInt64; break; case MetricsDiscovery::VALUE_TYPE_FLOAT: destination.type = ZET_VALUE_TYPE_FLOAT32; destination.value.fp32 = source.ValueFloat; break; case MetricsDiscovery::VALUE_TYPE_BOOL: destination.type = ZET_VALUE_TYPE_BOOL8; destination.value.b8 = source.ValueBool; break; default: destination.type = ZET_VALUE_TYPE_UINT64; destination.value.ui64 = 0; DEBUG_BREAK_IF(true); break; } } ze_result_t OaMetricImp::getProperties(zet_metric_properties_t *pProperties) { copyProperties(properties, *pProperties); return ZE_RESULT_SUCCESS; } ze_result_t OaMetricImp::initialize(const zet_metric_properties_t &sourceProperties) { copyProperties(sourceProperties, properties); return ZE_RESULT_SUCCESS; } void OaMetricImp::copyProperties(const zet_metric_properties_t &source, zet_metric_properties_t &destination) { destination = source; memcpy_s(destination.name, sizeof(destination.name), source.name, sizeof(destination.name)); memcpy_s(destination.description, sizeof(destination.description), source.description, sizeof(destination.description)); memcpy_s(destination.component, sizeof(destination.component), source.component, sizeof(destination.component)); memcpy_s(destination.resultUnits, sizeof(destination.resultUnits), source.resultUnits, sizeof(destination.resultUnits)); } MetricGroup *OaMetricGroupImp::create(zet_metric_group_properties_t &properties, MetricsDiscovery::IMetricSet_1_5 &metricSet, MetricsDiscovery::IConcurrentGroup_1_5 &concurrentGroup, const std::vector &metrics, MetricSource &metricSource) { auto pMetricGroup = new OaMetricGroupImp(); UNRECOVERABLE_IF(pMetricGroup == nullptr); pMetricGroup->initialize(properties, metricSet, concurrentGroup, metrics, static_cast(metricSource)); return pMetricGroup; } Metric *OaMetricImp::create(zet_metric_properties_t &properties) { auto pMetric = new OaMetricImp(); UNRECOVERABLE_IF(pMetric == nullptr); pMetric->initialize(properties); return pMetric; } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/metrics/metric_oa_enumeration_imp.h000066400000000000000000000164131422164147700312210ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/os_library.h" #include "level_zero/tools/source/metrics/metric.h" #include namespace L0 { class OaMetricSourceImp; struct MetricEnumeration { MetricEnumeration(OaMetricSourceImp &metricSource); virtual ~MetricEnumeration(); ze_result_t metricGroupGet(uint32_t &count, zet_metric_group_handle_t *phMetricGroups); MetricGroup *getMetricGroupByIndex(const uint32_t index); uint32_t getMetricGroupCount(); virtual bool isInitialized(); virtual ze_result_t loadMetricsDiscovery(); static const char *getMetricsDiscoveryFilename(); protected: ze_result_t initialize(); virtual ze_result_t openMetricsDiscovery(); virtual bool getAdapterId(uint32_t &major, uint32_t &minor); virtual MetricsDiscovery::IAdapter_1_9 *getMetricsAdapter(); ze_result_t cleanupMetricsDiscovery(); ze_result_t cacheMetricInformation(); ze_result_t cacheMetricGroup(MetricsDiscovery::IMetricSet_1_5 &metricSet, MetricsDiscovery::IConcurrentGroup_1_5 &pConcurrentGroup, const uint32_t domain, const zet_metric_group_sampling_type_flag_t samplingType); ze_result_t createMetrics(MetricsDiscovery::IMetricSet_1_5 &metricSet, std::vector &metrics); // Metrics Discovery types mapping. uint32_t getMetricTierNumber(const uint32_t sourceUsageFlagsMask) const; zet_metric_type_t getMetricType(const MetricsDiscovery::TMetricType sourceMetricType) const; zet_metric_type_t getMetricType(const MetricsDiscovery::TInformationType sourceInformationType) const; zet_value_type_t getMetricResultType(const MetricsDiscovery::TMetricResultType sourceMetricResultType) const; protected: OaMetricSourceImp &metricSource; std::vector metricGroups; // Cached metric groups ze_result_t initializationState = ZE_RESULT_ERROR_UNINITIALIZED; // Metrics Discovery API. std::unique_ptr hMetricsDiscovery = nullptr; MetricsDiscovery::OpenAdapterGroup_fn openAdapterGroup = nullptr; MetricsDiscovery::IAdapterGroup_1_9 *pAdapterGroup = nullptr; MetricsDiscovery::IAdapter_1_9 *pAdapter = nullptr; MetricsDiscovery::IMetricsDevice_1_5 *pMetricsDevice = nullptr; public: // Metrics Discovery version should be at least 1.5. static const uint32_t requiredMetricsDiscoveryMajorVersion = 1; static const uint32_t requiredMetricsDiscoveryMinorVersion = 5; static const char *oaConcurrentGroupName; }; struct OaMetricGroupImp : MetricGroup { ~OaMetricGroupImp() override; ze_result_t getProperties(zet_metric_group_properties_t *pProperties) override; ze_result_t metricGet(uint32_t *pCount, zet_metric_handle_t *phMetrics) override; ze_result_t calculateMetricValues(const zet_metric_group_calculation_type_t type, size_t rawDataSize, const uint8_t *pRawData, uint32_t *pMetricValueCount, zet_typed_value_t *pCalculatedData) override; ze_result_t calculateMetricValuesExp(const zet_metric_group_calculation_type_t type, size_t rawDataSize, const uint8_t *pRawData, uint32_t *pSetCount, uint32_t *pTotalMetricValueCount, uint32_t *pMetricCounts, zet_typed_value_t *pMetricValues) override; ze_result_t initialize(const zet_metric_group_properties_t &sourceProperties, MetricsDiscovery::IMetricSet_1_5 &metricSet, MetricsDiscovery::IConcurrentGroup_1_5 &concurrentGroup, const std::vector &groupMetrics, OaMetricSourceImp &metricSource); bool activate() override; bool deactivate() override; bool activateMetricSet(); bool deactivateMetricSet(); static uint32_t getApiMask(const zet_metric_group_sampling_type_flags_t samplingType); zet_metric_group_handle_t getMetricGroupForSubDevice(const uint32_t subDeviceIndex) override; // Time based measurements. ze_result_t openIoStream(uint32_t &timerPeriodNs, uint32_t &oaBufferSize); ze_result_t waitForReports(const uint32_t timeoutMs); ze_result_t readIoStream(uint32_t &reportCount, uint8_t &reportData); ze_result_t closeIoStream(); std::vector &getMetricGroups(); ze_result_t streamerOpen( zet_context_handle_t hContext, zet_device_handle_t hDevice, zet_metric_streamer_desc_t *desc, ze_event_handle_t hNotificationEvent, zet_metric_streamer_handle_t *phMetricStreamer) override; ze_result_t metricQueryPoolCreate( zet_context_handle_t hContext, zet_device_handle_t hDevice, const zet_metric_query_pool_desc_t *desc, zet_metric_query_pool_handle_t *phMetricQueryPool) override; static MetricGroup *create(zet_metric_group_properties_t &properties, MetricsDiscovery::IMetricSet_1_5 &metricSet, MetricsDiscovery::IConcurrentGroup_1_5 &concurrentGroup, const std::vector &metrics, MetricSource &metricSource); static zet_metric_group_properties_t getProperties(const zet_metric_group_handle_t handle); uint32_t getRawReportSize(); protected: void copyProperties(const zet_metric_group_properties_t &source, zet_metric_group_properties_t &destination); void copyValue(const MetricsDiscovery::TTypedValue_1_0 &source, zet_typed_value_t &destination) const; bool getCalculatedMetricCount(const size_t rawDataSize, uint32_t &metricValueCount); bool getCalculatedMetricValues(const zet_metric_group_calculation_type_t, const size_t rawDataSize, const uint8_t *pRawData, uint32_t &metricValueCount, zet_typed_value_t *pCalculatedData); // Cached metrics. std::vector metrics; zet_metric_group_properties_t properties{ ZET_STRUCTURE_TYPE_METRIC_GROUP_PROPERTIES, }; MetricsDiscovery::IMetricSet_1_5 *pReferenceMetricSet = nullptr; MetricsDiscovery::IConcurrentGroup_1_5 *pReferenceConcurrentGroup = nullptr; std::vector metricGroups; OaMetricSourceImp *metricSource; private: ze_result_t openForDevice(Device *pDevice, zet_metric_streamer_desc_t &desc, zet_metric_streamer_handle_t *phMetricStreamer); }; struct OaMetricImp : Metric { ~OaMetricImp() override{}; ze_result_t getProperties(zet_metric_properties_t *pProperties) override; ze_result_t initialize(const zet_metric_properties_t &sourceProperties); static Metric *create(zet_metric_properties_t &properties); protected: void copyProperties(const zet_metric_properties_t &source, zet_metric_properties_t &destination); zet_metric_properties_t properties{ ZET_STRUCTURE_TYPE_METRIC_PROPERTIES}; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/metrics/metric_oa_query_imp.cpp000066400000000000000000001052661422164147700304000ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/metrics/metric_oa_query_imp.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/device/device.h" #include "shared/source/helpers/engine_node_helper.h" #include "shared/source/memory_manager/allocation_properties.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/os_context.h" #include "shared/source/os_interface/os_library.h" #include "level_zero/core/source/cmdlist/cmdlist.h" #include "level_zero/core/source/cmdlist/cmdlist_imp.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/device/device_imp.h" #include "level_zero/tools/source/metrics/metric_oa_enumeration_imp.h" #include "level_zero/tools/source/metrics/metric_oa_source.h" using namespace MetricsLibraryApi; namespace L0 { MetricsLibrary::MetricsLibrary(OaMetricSourceImp &metricSourceInput) : metricSource(metricSourceInput) {} MetricsLibrary::~MetricsLibrary() { release(); } ze_result_t MetricsLibrary::getInitializationState() { return initializationState; } bool MetricsLibrary::isInitialized() { // Try to initialize metrics library only once. if (initializationState == ZE_RESULT_ERROR_UNINITIALIZED) { initialize(); } return initializationState == ZE_RESULT_SUCCESS; } uint32_t MetricsLibrary::getQueryReportGpuSize() { TypedValue_1_0 gpuReportSize = {}; // Obtain gpu report size. if (!isInitialized() || api.GetParameter(ParameterType::QueryHwCountersReportGpuSize, &gpuReportSize.Type, &gpuReportSize) != StatusCode::Success) { DEBUG_BREAK_IF(true); return 0; } // Validate gpu report size. if (!gpuReportSize.ValueUInt32) { DEBUG_BREAK_IF(true); return 0; } return gpuReportSize.ValueUInt32; } bool MetricsLibrary::createMetricQuery(const uint32_t slotsCount, QueryHandle_1_0 &query, NEO::GraphicsAllocation *&pAllocation) { std::lock_guard lock(mutex); // Validate metrics library state. if (!isInitialized()) { DEBUG_BREAK_IF(true); return false; } QueryCreateData_1_0 queryData = {}; queryData.HandleContext = context; queryData.Type = ObjectType::QueryHwCounters; queryData.Slots = slotsCount; // Create query pool within metrics library. if (api.QueryCreate(&queryData, &query) != StatusCode::Success) { DEBUG_BREAK_IF(true); return false; } // Register created query. queries.push_back(query); return true; } uint32_t MetricsLibrary::getMetricQueryCount() { std::lock_guard lock(mutex); return static_cast(queries.size()); } bool MetricsLibrary::destroyMetricQuery(QueryHandle_1_0 &query) { std::lock_guard lock(mutex); DEBUG_BREAK_IF(!query.IsValid()); const bool result = isInitialized() && (api.QueryDelete(query) == StatusCode::Success); auto iter = std::find_if(queries.begin(), queries.end(), [&](const QueryHandle_1_0 &element) { return element.data == query.data; }); // Unregister query. if (iter != queries.end()) { queries.erase(iter); } return result; } bool MetricsLibrary::getMetricQueryReportSize(size_t &rawDataSize) { ValueType valueType = ValueType::Last; TypedValue_1_0 value = {}; const bool result = isInitialized() && (api.GetParameter(ParameterType::QueryHwCountersReportApiSize, &valueType, &value) == StatusCode::Success); rawDataSize = static_cast(value.ValueUInt32); DEBUG_BREAK_IF(!result); return result; } bool MetricsLibrary::getMetricQueryReport(QueryHandle_1_0 &query, const uint32_t slot, const size_t rawDataSize, uint8_t *pData) { GetReportData_1_0 report = {}; report.Type = ObjectType::QueryHwCounters; report.Query.Handle = query; report.Query.Slot = slot; report.Query.SlotsCount = 1; report.Query.Data = pData; report.Query.DataSize = static_cast(rawDataSize); const bool result = isInitialized() && (api.GetData(&report) == StatusCode::Success); DEBUG_BREAK_IF(!result); return result; } void MetricsLibrary::initialize() { auto &metricsEnumeration = metricSource.getMetricEnumeration(); // Function should be called only once. DEBUG_BREAK_IF(initializationState != ZE_RESULT_ERROR_UNINITIALIZED); // Metrics Enumeration needs to be initialized before Metrics Library const bool validMetricsEnumeration = metricsEnumeration.isInitialized(); const bool validMetricsLibrary = validMetricsEnumeration && handle && createContext(); // Load metrics library and exported functions. initializationState = validMetricsLibrary ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_UNKNOWN; DEBUG_BREAK_IF(initializationState != ZE_RESULT_SUCCESS); } void MetricsLibrary::release() { // Delete metric group configurations. deleteAllConfigurations(); // Destroy context. if (context.IsValid() && contextDeleteFunction) { contextDeleteFunction(context); } // Reset metric query state to not initialized. api = {}; callbacks = {}; context = {}; isWorkloadPartitionEnabled = false; initializationState = ZE_RESULT_ERROR_UNINITIALIZED; } bool MetricsLibrary::load() { // Load library. handle = OaMetricSourceImp::osLibraryLoadFunction(getFilename()); // Load exported functions. if (handle) { contextCreateFunction = reinterpret_cast( handle->getProcAddress(METRICS_LIBRARY_CONTEXT_CREATE_1_0)); contextDeleteFunction = reinterpret_cast( handle->getProcAddress(METRICS_LIBRARY_CONTEXT_DELETE_1_0)); } if (contextCreateFunction == nullptr || contextDeleteFunction == nullptr) { PRINT_DEBUG_STRING(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "cannot load %s exported functions\n", MetricsLibrary::getFilename()); return false; } // Return success if exported functions have been loaded. return true; } void MetricsLibrary::enableWorkloadPartition() { isWorkloadPartitionEnabled = true; } void MetricsLibrary::getSubDeviceClientOptions( ClientOptionsData_1_0 &subDevice, ClientOptionsData_1_0 &subDeviceIndex, ClientOptionsData_1_0 &subDeviceCount, ClientOptionsData_1_0 &workloadPartition) { const auto &deviceImp = *static_cast(&metricSource.getDevice()); if (!deviceImp.isSubdevice) { // Root device. subDevice.Type = ClientOptionsType::SubDevice; subDevice.SubDevice.Enabled = false; subDeviceIndex.Type = ClientOptionsType::SubDeviceIndex; subDeviceIndex.SubDeviceIndex.Index = 0; subDeviceCount.Type = ClientOptionsType::SubDeviceCount; subDeviceCount.SubDeviceCount.Count = std::max(deviceImp.getNEODevice()->getRootDevice()->getNumSubDevices(), 1u); workloadPartition.Type = ClientOptionsType::WorkloadPartition; workloadPartition.WorkloadPartition.Enabled = false; } else { // Sub device. subDevice.Type = ClientOptionsType::SubDevice; subDevice.SubDevice.Enabled = true; subDeviceIndex.Type = ClientOptionsType::SubDeviceIndex; subDeviceIndex.SubDeviceIndex.Index = static_cast(deviceImp.getNEODevice())->getSubDeviceIndex(); subDeviceCount.Type = ClientOptionsType::SubDeviceCount; subDeviceCount.SubDeviceCount.Count = std::max(deviceImp.getNEODevice()->getRootDevice()->getNumSubDevices(), 1u); workloadPartition.Type = ClientOptionsType::WorkloadPartition; workloadPartition.WorkloadPartition.Enabled = isWorkloadPartitionEnabled; } } bool MetricsLibrary::createContext() { auto &device = metricSource.getDevice(); const auto &hwHelper = device.getHwHelper(); const auto &asyncComputeEngines = hwHelper.getGpgpuEngineInstances(device.getHwInfo()); ContextCreateData_1_0 createData = {}; ClientOptionsData_1_0 clientOptions[6] = {}; ClientData_1_0 clientData = {}; ClientType_1_0 clientType = {}; ClientDataLinuxAdapter_1_0 adapter = {}; // Check if compute command streamer is used. auto asyncComputeEngine = std::find_if(asyncComputeEngines.begin(), asyncComputeEngines.end(), [&](const auto &engine) { return engine.first == aub_stream::ENGINE_CCS; }); const auto &deviceImp = *static_cast(&device); const auto &commandStreamReceiver = *deviceImp.getNEODevice()->getDefaultEngine().commandStreamReceiver; const auto engineType = commandStreamReceiver.getOsContext().getEngineType(); const bool isComputeUsed = NEO::EngineHelpers::isCcs(engineType); metricSource.setUseCompute(isComputeUsed); // Create metrics library context. DEBUG_BREAK_IF(!contextCreateFunction); clientType.Api = ClientApi::OneApi; clientType.Gen = getGenType(device.getPlatformInfo()); clientOptions[0].Type = ClientOptionsType::Compute; clientOptions[0].Compute.Asynchronous = asyncComputeEngine != asyncComputeEngines.end(); clientOptions[1].Type = ClientOptionsType::Tbs; clientOptions[1].Tbs.Enabled = metricSource.getMetricStreamer() != nullptr; // Sub device client options #2 getSubDeviceClientOptions(clientOptions[2], clientOptions[3], clientOptions[4], clientOptions[5]); clientData.Linux.Adapter = &adapter; clientData.ClientOptions = clientOptions; clientData.ClientOptionsCount = sizeof(clientOptions) / sizeof(ClientOptionsData_1_0); createData.Api = &api; createData.ClientCallbacks = &callbacks; createData.ClientData = &clientData; const bool result = getContextData(device, createData) && contextCreateFunction(clientType, &createData, &context) == StatusCode::Success; DEBUG_BREAK_IF(!result); return result; } ClientGen MetricsLibrary::getGenType(const uint32_t gen) const { auto &hwHelper = NEO::HwHelper::get(static_cast(gen)); return static_cast(hwHelper.getMetricsLibraryGenId()); } uint32_t MetricsLibrary::getGpuCommandsSize(CommandBufferData_1_0 &commandBuffer) { CommandBufferSize_1_0 commandBufferSize = {}; bool result = isInitialized(); // Validate metrics library initialization state. if (result) { commandBuffer.HandleContext = context; result = api.CommandBufferGetSize(&commandBuffer, &commandBufferSize) == StatusCode::Success; } DEBUG_BREAK_IF(!result); return result ? commandBufferSize.GpuMemorySize : 0; } bool MetricsLibrary::getGpuCommands(CommandBufferData_1_0 &commandBuffer) { // Obtain gpu commands from metrics library. const bool result = isInitialized() && (api.CommandBufferGet(&commandBuffer) == StatusCode::Success); DEBUG_BREAK_IF(!result); return result; } bool MetricsLibrary::getGpuCommands(CommandList &commandList, CommandBufferData_1_0 &commandBuffer) { // Obtain required command buffer size. commandBuffer.Size = getGpuCommandsSize(commandBuffer); // Validate gpu commands size. if (!commandBuffer.Size) { DEBUG_BREAK_IF(true); return false; } // Allocate command buffer. auto stream = commandList.commandContainer.getCommandStream(); auto buffer = stream->getSpace(commandBuffer.Size); // Fill attached command buffer with gpu commands. commandBuffer.Data = buffer; // Obtain gpu commands from metrics library. const bool result = isInitialized() && (api.CommandBufferGet(&commandBuffer) == StatusCode::Success); DEBUG_BREAK_IF(!result); return result; } ConfigurationHandle_1_0 MetricsLibrary::createConfiguration(const zet_metric_group_handle_t metricGroupHandle, const zet_metric_group_properties_t properties) { // Metric group internal data. auto metricGroup = static_cast(MetricGroup::fromHandle(metricGroupHandle)); auto metricGroupDummy = ConfigurationHandle_1_0{}; DEBUG_BREAK_IF(!metricGroup); // Metrics library configuration creation data. ConfigurationHandle_1_0 handle = {}; ConfigurationCreateData_1_0 handleData = {}; handleData.HandleContext = context; handleData.Type = ObjectType::ConfigurationHwCountersOa; // Check supported sampling types. const bool validSampling = properties.samplingType == ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED || properties.samplingType == ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_TIME_BASED; // Activate metric group through metrics discovery to send metric group // configuration to kernel driver. const bool validActivate = isInitialized() && validSampling && metricGroup->activateMetricSet(); if (validActivate) { // Use metrics library to create configuration for the activated metric group. api.ConfigurationCreate(&handleData, &handle); // Use metrics discovery to deactivate metric group. metricGroup->deactivateMetricSet(); } return validActivate ? handle : metricGroupDummy; } ConfigurationHandle_1_0 MetricsLibrary::getConfiguration(zet_metric_group_handle_t handle) { auto iter = configurations.find(handle); auto configuration = (iter != end(configurations)) ? iter->second : addConfiguration(handle); DEBUG_BREAK_IF(!configuration.IsValid()); return configuration; } ConfigurationHandle_1_0 MetricsLibrary::addConfiguration(zet_metric_group_handle_t handle) { ConfigurationHandle_1_0 libraryHandle = {}; DEBUG_BREAK_IF(!handle); // Create metrics library configuration. auto metricGroup = MetricGroup::fromHandle(handle); auto properties = OaMetricGroupImp::getProperties(handle); auto configuration = createConfiguration(metricGroup, properties); // Cache configuration if valid. if (configuration.IsValid()) { libraryHandle = configuration; cacheConfiguration(handle, libraryHandle); } DEBUG_BREAK_IF(!libraryHandle.IsValid()); return libraryHandle; } void MetricsLibrary::deleteAllConfigurations() { if (api.ConfigurationDelete) { for (auto &configuration : configurations) { if (configuration.second.IsValid()) { api.ConfigurationDelete(configuration.second); } } } configurations.clear(); } ze_result_t OaMetricGroupImp::metricQueryPoolCreate( zet_context_handle_t hContext, zet_device_handle_t hDevice, const zet_metric_query_pool_desc_t *desc, zet_metric_query_pool_handle_t *phMetricQueryPool) { return OaMetricQueryPoolImp::metricQueryPoolCreate(hContext, hDevice, toHandle(), desc, phMetricQueryPool); } ze_result_t OaMetricQueryPoolImp::metricQueryPoolCreate(zet_context_handle_t hContext, zet_device_handle_t hDevice, zet_metric_group_handle_t hMetricGroup, const zet_metric_query_pool_desc_t *pDesc, zet_metric_query_pool_handle_t *phMetricQueryPool) { auto device = Device::fromHandle(hDevice); auto &metricSource = device->getMetricDeviceContext().getMetricSource(); // Metric query cannot be used with streamer simultaneously // (due to oa buffer usage constraints). if (metricSource.getMetricStreamer() != nullptr) { return ZE_RESULT_ERROR_NOT_AVAILABLE; } const auto &deviceImp = *static_cast(device); auto metricPoolImp = new OaMetricQueryPoolImp(metricSource, hMetricGroup, *pDesc); if (metricSource.isImplicitScalingCapable()) { auto emptyMetricGroups = std::vector(); auto &metricGroups = hMetricGroup ? static_cast(MetricGroup::fromHandle(hMetricGroup))->getMetricGroups() : emptyMetricGroups; const bool useMetricGroupSubDevice = metricGroups.size() > 0; auto &metricPools = metricPoolImp->getMetricQueryPools(); for (size_t i = 0; i < deviceImp.numSubDevices; ++i) { auto &subDevice = deviceImp.subDevices[i]; auto &subDeviceMetricSource = subDevice->getMetricDeviceContext().getMetricSource(); subDeviceMetricSource.getMetricsLibrary().enableWorkloadPartition(); zet_metric_group_handle_t metricGroupHandle = useMetricGroupSubDevice ? metricGroups[subDeviceMetricSource.getSubDeviceIndex()] : hMetricGroup; auto metricPoolSubdeviceImp = new OaMetricQueryPoolImp(subDeviceMetricSource, metricGroupHandle, *pDesc); // Create metric query pool. if (!metricPoolSubdeviceImp->create()) { metricPoolSubdeviceImp->destroy(); metricPoolImp->destroy(); metricPoolSubdeviceImp = nullptr; metricPoolImp = nullptr; *phMetricQueryPool = nullptr; return ZE_RESULT_ERROR_INVALID_ARGUMENT; } metricPools.push_back(metricPoolSubdeviceImp); } } else { // Create metric query pool. if (!metricPoolImp->create()) { metricPoolImp->destroy(); metricPoolImp = nullptr; *phMetricQueryPool = nullptr; return ZE_RESULT_ERROR_INVALID_ARGUMENT; } } // Allocate gpu memory. if (!metricPoolImp->allocateGpuMemory()) { metricPoolImp->destroy(); metricPoolImp = nullptr; *phMetricQueryPool = nullptr; return ZE_RESULT_ERROR_INVALID_ARGUMENT; } *phMetricQueryPool = metricPoolImp; return ZE_RESULT_SUCCESS; } ze_result_t metricQueryPoolCreate(zet_context_handle_t hContext, zet_device_handle_t hDevice, zet_metric_group_handle_t hMetricGroup, const zet_metric_query_pool_desc_t *pDesc, zet_metric_query_pool_handle_t *phMetricQueryPool) { if (pDesc->type == ZET_METRIC_QUERY_POOL_TYPE_EXECUTION) { return OaMetricQueryPoolImp::metricQueryPoolCreate(hContext, hDevice, hMetricGroup, pDesc, phMetricQueryPool); } else { UNRECOVERABLE_IF(hMetricGroup == nullptr); return MetricGroup::fromHandle(hMetricGroup)->metricQueryPoolCreate(hContext, hDevice, pDesc, phMetricQueryPool); } } OaMetricQueryPoolImp::OaMetricQueryPoolImp(OaMetricSourceImp &metricSourceInput, zet_metric_group_handle_t hEventMetricGroupInput, const zet_metric_query_pool_desc_t &poolDescription) : metricSource(metricSourceInput), metricsLibrary(metricSource.getMetricsLibrary()), description(poolDescription), hMetricGroup(hEventMetricGroupInput) {} bool OaMetricQueryPoolImp::create() { switch (description.type) { case ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE: return createMetricQueryPool(); case ZET_METRIC_QUERY_POOL_TYPE_EXECUTION: return createSkipExecutionQueryPool(); default: DEBUG_BREAK_IF(true); return false; } } ze_result_t OaMetricQueryPoolImp::destroy() { switch (description.type) { case ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE: if (metricQueryPools.size() > 0) { for (auto &metricQueryPool : metricQueryPools) { MetricQueryPool::fromHandle(metricQueryPool)->destroy(); } } if (query.IsValid()) { metricsLibrary.destroyMetricQuery(query); } if (pAllocation) { metricSource.getDevice().getDriverHandle()->getMemoryManager()->freeGraphicsMemory(pAllocation); } break; case ZET_METRIC_QUERY_POOL_TYPE_EXECUTION: for (auto &metricQueryPool : metricQueryPools) { MetricQueryPool::fromHandle(metricQueryPool)->destroy(); } break; default: DEBUG_BREAK_IF(true); break; } // Check open queries. if (metricSource.getMetricsLibrary().getMetricQueryCount() == 0) { if (!metricSource.isMetricGroupActivated()) { metricSource.getMetricsLibrary().release(); } } delete this; return ZE_RESULT_SUCCESS; } bool OaMetricQueryPoolImp::allocateGpuMemory() { if (description.type == ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE) { // Get allocation size. const auto &deviceImp = *static_cast(&metricSource.getDevice()); allocationSize = (metricSource.isImplicitScalingCapable()) ? deviceImp.subDevices[0]->getMetricDeviceContext().getMetricSource().getMetricsLibrary().getQueryReportGpuSize() * description.count * deviceImp.numSubDevices : metricsLibrary.getQueryReportGpuSize() * description.count; if (allocationSize == 0) { return false; } // Allocate gpu memory. NEO::AllocationProperties properties( metricSource.getDevice().getRootDeviceIndex(), allocationSize, NEO::AllocationType::BUFFER_HOST_MEMORY, metricSource.getDevice().getNEODevice()->getDeviceBitfield()); properties.alignment = 64u; pAllocation = metricSource.getDevice().getDriverHandle()->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties); UNRECOVERABLE_IF(pAllocation == nullptr); // Clear allocation. memset(pAllocation->getUnderlyingBuffer(), 0, allocationSize); } return true; } bool OaMetricQueryPoolImp::createMetricQueryPool() { // Validate metric group query - only event based is supported. auto metricGroupProperites = OaMetricGroupImp::getProperties(hMetricGroup); const bool validMetricGroup = metricGroupProperites.samplingType == ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED; if (!validMetricGroup) { return false; } // Pool initialization. pool.reserve(description.count); for (uint32_t i = 0; i < description.count; ++i) { pool.push_back({metricSource, *this, i}); } // Metrics library query object initialization. return metricsLibrary.createMetricQuery(description.count, query, pAllocation); } bool OaMetricQueryPoolImp::createSkipExecutionQueryPool() { pool.reserve(description.count); for (uint32_t i = 0; i < description.count; ++i) { pool.push_back({metricSource, *this, i}); } return true; } MetricQueryPool *MetricQueryPool::fromHandle(zet_metric_query_pool_handle_t handle) { return static_cast(handle); } zet_metric_query_pool_handle_t MetricQueryPool::toHandle() { return this; } ze_result_t OaMetricQueryPoolImp::metricQueryCreate(uint32_t index, zet_metric_query_handle_t *phMetricQuery) { if (index >= description.count) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } if (metricQueryPools.size() > 0) { auto pMetricQueryImp = new OaMetricQueryImp(metricSource, *this, index); for (auto metricQueryPoolHandle : metricQueryPools) { auto &metricQueries = pMetricQueryImp->getMetricQueries(); auto metricQueryPoolImp = static_cast(MetricQueryPool::fromHandle(metricQueryPoolHandle)); metricQueries.push_back(&metricQueryPoolImp->pool[index]); } *phMetricQuery = pMetricQueryImp; return ZE_RESULT_SUCCESS; } else { *phMetricQuery = &(pool[index]); return ZE_RESULT_SUCCESS; } } std::vector &OaMetricQueryPoolImp::getMetricQueryPools() { return metricQueryPools; } OaMetricQueryImp::OaMetricQueryImp(OaMetricSourceImp &metricSourceInput, OaMetricQueryPoolImp &poolInput, const uint32_t slotInput) : metricSource(metricSourceInput), metricsLibrary(metricSource.getMetricsLibrary()), pool(poolInput), slot(slotInput) {} ze_result_t OaMetricQueryImp::appendBegin(CommandList &commandList) { switch (pool.description.type) { case ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE: return writeMetricQuery(commandList, nullptr, 0, nullptr, true); case ZET_METRIC_QUERY_POOL_TYPE_EXECUTION: return writeSkipExecutionQuery(commandList, nullptr, 0, nullptr, true); default: DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_INVALID_ARGUMENT; } } ze_result_t OaMetricQueryImp::appendEnd(CommandList &commandList, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { switch (pool.description.type) { case ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE: return writeMetricQuery(commandList, hSignalEvent, numWaitEvents, phWaitEvents, false); case ZET_METRIC_QUERY_POOL_TYPE_EXECUTION: return writeSkipExecutionQuery(commandList, hSignalEvent, numWaitEvents, phWaitEvents, false); default: DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_INVALID_ARGUMENT; } } ze_result_t OaMetricQueryImp::getData(size_t *pRawDataSize, uint8_t *pRawData) { const bool calculateSizeOnly = *pRawDataSize == 0; const size_t metricQueriesSize = metricQueries.size(); bool result = true; if (metricQueriesSize > 0) { if (calculateSizeOnly) { const size_t headerSize = sizeof(MetricGroupCalculateHeader); const size_t rawDataOffsetsRequiredSize = sizeof(uint32_t) * metricQueriesSize; const size_t rawDataSizesRequiredSize = sizeof(uint32_t) * metricQueriesSize; auto pMetricQueryImp = static_cast(MetricQuery::fromHandle(metricQueries[0])); result = pMetricQueryImp->metricsLibrary.getMetricQueryReportSize(*pRawDataSize); const size_t rawDataRequiredSize = *pRawDataSize * metricQueriesSize; *pRawDataSize = headerSize + rawDataOffsetsRequiredSize + rawDataSizesRequiredSize + rawDataRequiredSize; } else { MetricGroupCalculateHeader *pRawDataHeader = reinterpret_cast(pRawData); pRawDataHeader->magic = MetricGroupCalculateHeader::magicValue; pRawDataHeader->dataCount = static_cast(metricQueriesSize); // Relative offsets in the header allow to move/copy the buffer. pRawDataHeader->rawDataOffsets = sizeof(MetricGroupCalculateHeader); pRawDataHeader->rawDataSizes = static_cast(pRawDataHeader->rawDataOffsets + (sizeof(uint32_t) * metricQueriesSize)); pRawDataHeader->rawDataOffset = static_cast(pRawDataHeader->rawDataSizes + (sizeof(uint32_t) * metricQueriesSize)); const size_t sizePerSubDevice = (*pRawDataSize - pRawDataHeader->rawDataOffset) / metricQueriesSize; DEBUG_BREAK_IF(sizePerSubDevice == 0); *pRawDataSize = pRawDataHeader->rawDataOffset; uint32_t *pRawDataOffsetsUnpacked = reinterpret_cast(pRawData + pRawDataHeader->rawDataOffsets); uint32_t *pRawDataSizesUnpacked = reinterpret_cast(pRawData + pRawDataHeader->rawDataSizes); uint8_t *pRawDataUnpacked = reinterpret_cast(pRawData + pRawDataHeader->rawDataOffset); for (size_t i = 0; i < metricQueriesSize; ++i) { size_t getDataSize = sizePerSubDevice; const uint32_t rawDataOffset = (i != 0) ? (pRawDataSizesUnpacked[i - 1] + pRawDataOffsetsUnpacked[i - 1]) : 0; auto pMetricQuery = MetricQuery::fromHandle(metricQueries[i]); ze_result_t tmpResult = pMetricQuery->getData(&getDataSize, pRawDataUnpacked + rawDataOffset); // Return at first error. if (tmpResult != ZE_RESULT_SUCCESS) { return tmpResult; } pRawDataSizesUnpacked[i] = static_cast(getDataSize); pRawDataOffsetsUnpacked[i] = (i != 0) ? pRawDataOffsetsUnpacked[i - 1] + pRawDataSizesUnpacked[i - 1] : 0; *pRawDataSize += getDataSize; } } } else { result = calculateSizeOnly ? metricsLibrary.getMetricQueryReportSize(*pRawDataSize) : metricsLibrary.getMetricQueryReport(pool.query, slot, *pRawDataSize, pRawData); } return result ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_UNKNOWN; } ze_result_t OaMetricQueryImp::reset() { return ZE_RESULT_SUCCESS; } ze_result_t OaMetricQueryImp::destroy() { if (metricQueries.size() > 0) { delete this; } return ZE_RESULT_SUCCESS; } std::vector &OaMetricQueryImp::getMetricQueries() { return metricQueries; } ze_result_t OaMetricQueryImp::writeMetricQuery(CommandList &commandList, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, const bool begin) { bool result = true; const bool writeCompletionEvent = hSignalEvent && !begin; const size_t metricQueriesSize = metricQueries.size(); // Make gpu allocation visible. commandList.commandContainer.addToResidencyContainer(pool.pAllocation); // Wait for events before executing query. commandList.appendWaitOnEvents(numWaitEvents, phWaitEvents); if (metricQueriesSize) { const size_t allocationSizeForSubDevice = pool.allocationSize / metricQueriesSize; static_cast(commandList).appendMultiPartitionPrologue(static_cast(allocationSizeForSubDevice)); void *buffer = nullptr; bool gpuCommandStatus = true; // Revert iteration to be ensured that the last set of gpu commands overwrite the previous written sets of gpu commands, // so only one of the sub-device contexts will be used to append to command list. for (int32_t i = static_cast(metricQueriesSize - 1); i >= 0; --i) { // Adjust cpu and gpu addresses for each sub-device's query object. uint64_t gpuAddress = pool.pAllocation->getGpuAddress() + (i * allocationSizeForSubDevice); uint8_t *cpuAddress = static_cast(pool.pAllocation->getUnderlyingBuffer()) + (i * allocationSizeForSubDevice); auto &metricQueryImp = *static_cast(MetricQuery::fromHandle(metricQueries[i])); auto &metricLibrarySubDevice = metricQueryImp.metricsLibrary; auto &metricSourceSubDevice = metricQueryImp.metricSource; // Obtain gpu commands. CommandBufferData_1_0 commandBuffer = {}; commandBuffer.CommandsType = ObjectType::QueryHwCounters; commandBuffer.QueryHwCounters.Handle = metricQueryImp.pool.query; commandBuffer.QueryHwCounters.Begin = begin; commandBuffer.QueryHwCounters.Slot = slot; commandBuffer.Allocation.GpuAddress = gpuAddress; commandBuffer.Allocation.CpuAddress = cpuAddress; commandBuffer.Type = metricSourceSubDevice.isComputeUsed() ? GpuCommandBufferType::Compute : GpuCommandBufferType::Render; // Obtain required command buffer size. commandBuffer.Size = metricLibrarySubDevice.getGpuCommandsSize(commandBuffer); // Validate gpu commands size. if (!commandBuffer.Size) { return ZE_RESULT_ERROR_UNKNOWN; } // Allocate command buffer only once. if (buffer == nullptr) { auto stream = commandList.commandContainer.getCommandStream(); buffer = stream->getSpace(commandBuffer.Size); } // Fill attached command buffer with gpu commands. commandBuffer.Data = buffer; // Obtain gpu commands from metrics library for each sub-device to update cpu and gpu addresses for // each query object in metrics library, so that get data works properly. gpuCommandStatus = metricLibrarySubDevice.getGpuCommands(commandBuffer); if (!gpuCommandStatus) { break; } } static_cast(commandList).appendMultiPartitionEpilogue(); if (!gpuCommandStatus) { return ZE_RESULT_ERROR_UNKNOWN; } // Write gpu commands for sub device index 0. } else { // Obtain gpu commands. CommandBufferData_1_0 commandBuffer = {}; commandBuffer.CommandsType = ObjectType::QueryHwCounters; commandBuffer.QueryHwCounters.Handle = pool.query; commandBuffer.QueryHwCounters.Begin = begin; commandBuffer.QueryHwCounters.Slot = slot; commandBuffer.Allocation.GpuAddress = pool.pAllocation->getGpuAddress(); commandBuffer.Allocation.CpuAddress = pool.pAllocation->getUnderlyingBuffer(); commandBuffer.Type = metricSource.isComputeUsed() ? GpuCommandBufferType::Compute : GpuCommandBufferType::Render; // Get query commands. result = metricsLibrary.getGpuCommands(commandList, commandBuffer); } // Write completion event. if (result && writeCompletionEvent) { result = commandList.appendSignalEvent(hSignalEvent) == ZE_RESULT_SUCCESS; } return result ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_UNKNOWN; } ze_result_t OaMetricQueryImp::writeSkipExecutionQuery(CommandList &commandList, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, const bool begin) { bool writeCompletionEvent = hSignalEvent && !begin; bool result = false; // Obtain gpu commands. CommandBufferData_1_0 commandBuffer = {}; commandBuffer.CommandsType = ObjectType::OverrideNullHardware; commandBuffer.Override.Enable = begin; commandBuffer.Type = metricSource.isComputeUsed() ? GpuCommandBufferType::Compute : GpuCommandBufferType::Render; // Wait for events before executing query. zeCommandListAppendWaitOnEvents(commandList.toHandle(), numWaitEvents, phWaitEvents); // Get query commands. result = metricsLibrary.getGpuCommands(commandList, commandBuffer); // Write completion event. if (result && writeCompletionEvent) { result = zeCommandListAppendSignalEvent(commandList.toHandle(), hSignalEvent) == ZE_RESULT_SUCCESS; } return result ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_UNKNOWN; } MetricQuery *MetricQuery::fromHandle(zet_metric_query_handle_t handle) { return static_cast(handle); } zet_metric_query_handle_t MetricQuery::toHandle() { return this; } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/metrics/metric_oa_query_imp.h000066400000000000000000000144721422164147700300430ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/tools/source/metrics/metric.h" #include #include using MetricsLibraryApi::ClientCallbacks_1_0; using MetricsLibraryApi::ClientGen; using MetricsLibraryApi::ClientOptionsData_1_0; using MetricsLibraryApi::CommandBufferData_1_0; using MetricsLibraryApi::ConfigurationHandle_1_0; using MetricsLibraryApi::ContextCreateData_1_0; using MetricsLibraryApi::ContextCreateFunction_1_0; using MetricsLibraryApi::ContextDeleteFunction_1_0; using MetricsLibraryApi::ContextHandle_1_0; using MetricsLibraryApi::Interface_1_0; using MetricsLibraryApi::QueryHandle_1_0; namespace L0 { struct Device; struct CommandList; struct MetricGroup; class OaMetricSourceImp; } // namespace L0 namespace NEO { class OsLibrary; class GraphicsAllocation; } // namespace NEO namespace L0 { struct MetricsLibrary { public: MetricsLibrary(OaMetricSourceImp &metricSource); virtual ~MetricsLibrary(); // Initialization. virtual bool load(); bool isInitialized(); ze_result_t getInitializationState(); void enableWorkloadPartition(); void getSubDeviceClientOptions(ClientOptionsData_1_0 &subDevice, ClientOptionsData_1_0 &subDeviceIndex, ClientOptionsData_1_0 &subDeviceCount, ClientOptionsData_1_0 &workloadPartition); static const char *getFilename(); // Deinitialization. void release(); // Metric query. uint32_t getQueryReportGpuSize(); bool createMetricQuery(const uint32_t slotsCount, QueryHandle_1_0 &query, NEO::GraphicsAllocation *&pAllocation); uint32_t getMetricQueryCount(); bool getMetricQueryReport(QueryHandle_1_0 &query, const uint32_t slot, const size_t rawDataSize, uint8_t *pData); virtual bool getMetricQueryReportSize(size_t &rawDataSize); bool destroyMetricQuery(QueryHandle_1_0 &query); // Command buffer. bool getGpuCommands(CommandList &commandList, CommandBufferData_1_0 &commandBuffer); bool getGpuCommands(CommandBufferData_1_0 &commandBuffer); uint32_t getGpuCommandsSize(CommandBufferData_1_0 &commandBuffer); // Metric group configuration. ConfigurationHandle_1_0 getConfiguration(const zet_metric_group_handle_t metricGroup); bool activateConfiguration(const ConfigurationHandle_1_0 configurationHandle); bool deactivateConfiguration(const ConfigurationHandle_1_0 configurationHandle); void cacheConfiguration(zet_metric_group_handle_t metricGroup, ConfigurationHandle_1_0 configurationHandle); void deleteAllConfigurations(); protected: void initialize(); bool createContext(); virtual bool getContextData(Device &device, ContextCreateData_1_0 &contextData); ConfigurationHandle_1_0 createConfiguration(const zet_metric_group_handle_t metricGroup, const zet_metric_group_properties_t properties); ConfigurationHandle_1_0 addConfiguration(const zet_metric_group_handle_t metricGroup); ClientGen getGenType(const uint32_t gen) const; protected: NEO::OsLibrary *handle = nullptr; OaMetricSourceImp &metricSource; ze_result_t initializationState = ZE_RESULT_ERROR_UNINITIALIZED; bool isWorkloadPartitionEnabled = false; std::mutex mutex; // Metrics library types. Interface_1_0 api = {}; ClientCallbacks_1_0 callbacks = {}; ContextHandle_1_0 context = {}; ContextCreateFunction_1_0 contextCreateFunction = nullptr; ContextDeleteFunction_1_0 contextDeleteFunction = nullptr; std::map configurations; std::vector queries; }; struct OaMetricQueryImp : MetricQuery { public: OaMetricQueryImp(OaMetricSourceImp &metricSource, struct OaMetricQueryPoolImp &pool, const uint32_t slot); ze_result_t appendBegin(CommandList &commandList) override; ze_result_t appendEnd(CommandList &commandList, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t getData(size_t *pRawDataSize, uint8_t *pRawData) override; ze_result_t reset() override; ze_result_t destroy() override; std::vector &getMetricQueries(); protected: ze_result_t writeMetricQuery(CommandList &commandList, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, const bool begin); ze_result_t writeSkipExecutionQuery(CommandList &commandList, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, const bool begin); protected: OaMetricSourceImp &metricSource; MetricsLibrary &metricsLibrary; OaMetricQueryPoolImp &pool; uint32_t slot; std::vector metricQueries; }; struct OaMetricQueryPoolImp : MetricQueryPool { public: OaMetricQueryPoolImp(OaMetricSourceImp &metricSource, zet_metric_group_handle_t hEventMetricGroup, const zet_metric_query_pool_desc_t &poolDescription); bool create(); ze_result_t destroy() override; ze_result_t metricQueryCreate(uint32_t index, zet_metric_query_handle_t *phMetricQuery) override; bool allocateGpuMemory(); std::vector &getMetricQueryPools(); static ze_result_t metricQueryPoolCreate(zet_context_handle_t hContext, zet_device_handle_t hDevice, zet_metric_group_handle_t hMetricGroup, const zet_metric_query_pool_desc_t *pDesc, zet_metric_query_pool_handle_t *phMetricQueryPool); protected: bool createMetricQueryPool(); bool createSkipExecutionQueryPool(); public: OaMetricSourceImp &metricSource; MetricsLibrary &metricsLibrary; std::vector pool; NEO::GraphicsAllocation *pAllocation = nullptr; uint32_t allocationSize = 0; zet_metric_query_pool_desc_t description = {}; zet_metric_group_handle_t hMetricGroup = nullptr; QueryHandle_1_0 query = {}; protected: std::vector metricQueryPools; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/metrics/metric_oa_source.cpp000066400000000000000000000122711422164147700276570ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/metrics/metric_oa_source.h" #include "shared/source/os_interface/os_library.h" #include "level_zero/core/source/device/device_imp.h" #include "level_zero/tools/source/metrics/metric.h" #include "level_zero/tools/source/metrics/metric_oa_enumeration_imp.h" #include "level_zero/tools/source/metrics/metric_oa_query_imp.h" namespace L0 { OaMetricSourceImp::OsLibraryLoadPtr OaMetricSourceImp::osLibraryLoadFunction(NEO::OsLibrary::load); std::unique_ptr OaMetricSourceImp::create(const MetricDeviceContext &metricDeviceContext) { return std::unique_ptr(new (std::nothrow) OaMetricSourceImp(metricDeviceContext)); } OaMetricSourceImp::OaMetricSourceImp(const MetricDeviceContext &metricDeviceContext) : metricDeviceContext(metricDeviceContext), metricEnumeration(std::unique_ptr(new (std::nothrow) MetricEnumeration(*this))), metricsLibrary(std::unique_ptr(new (std::nothrow) MetricsLibrary(*this))) { } OaMetricSourceImp::~OaMetricSourceImp() = default; void OaMetricSourceImp::enable() { loadDependencies(); } bool OaMetricSourceImp::isAvailable() { return isInitialized(); } ze_result_t OaMetricSourceImp::appendMetricMemoryBarrier(CommandList &commandList) { DeviceImp *pDeviceImp = static_cast(commandList.device); if (pDeviceImp->metricContext->isImplicitScalingCapable()) { // Use one of the sub-device contexts to append to command list. pDeviceImp = static_cast(pDeviceImp->subDevices[0]); } auto &metricContext = pDeviceImp->getMetricDeviceContext(); auto &metricsLibrary = metricContext.getMetricSource().getMetricsLibrary(); // Obtain gpu commands. CommandBufferData_1_0 commandBuffer = {}; commandBuffer.CommandsType = MetricsLibraryApi::ObjectType::OverrideFlushCaches; commandBuffer.Override.Enable = true; commandBuffer.Type = metricContext.getMetricSource().isComputeUsed() ? MetricsLibraryApi::GpuCommandBufferType::Compute : MetricsLibraryApi::GpuCommandBufferType::Render; return metricsLibrary.getGpuCommands(commandList, commandBuffer) ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_UNKNOWN; } bool OaMetricSourceImp::loadDependencies() { bool result = true; if (metricEnumeration->loadMetricsDiscovery() != ZE_RESULT_SUCCESS) { result = false; DEBUG_BREAK_IF(!result); } if (result && !metricsLibrary->load()) { result = false; DEBUG_BREAK_IF(!result); } // Set metric context initialization state. setInitializationState(result ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_UNKNOWN); return result; } bool OaMetricSourceImp::isInitialized() { return initializationState == ZE_RESULT_SUCCESS; } void OaMetricSourceImp::setInitializationState(const ze_result_t state) { initializationState = state; } Device &OaMetricSourceImp::getDevice() { return metricDeviceContext.getDevice(); } MetricsLibrary &OaMetricSourceImp::getMetricsLibrary() { return *metricsLibrary; } MetricEnumeration &OaMetricSourceImp::getMetricEnumeration() { return *metricEnumeration; } MetricStreamer *OaMetricSourceImp::getMetricStreamer() { return pMetricStreamer; } void OaMetricSourceImp::setMetricStreamer(MetricStreamer *pMetricStreamer) { this->pMetricStreamer = pMetricStreamer; } void OaMetricSourceImp::setMetricsLibrary(MetricsLibrary &metricsLibrary) { this->metricsLibrary.release(); this->metricsLibrary.reset(&metricsLibrary); } void OaMetricSourceImp::setMetricEnumeration(MetricEnumeration &metricEnumeration) { this->metricEnumeration.release(); this->metricEnumeration.reset(&metricEnumeration); } void OaMetricSourceImp::setUseCompute(const bool useCompute) { this->useCompute = useCompute; } bool OaMetricSourceImp::isComputeUsed() const { return useCompute; } ze_result_t OaMetricSourceImp::metricGroupGet(uint32_t *pCount, zet_metric_group_handle_t *phMetricGroups) { return getMetricEnumeration().metricGroupGet(*pCount, phMetricGroups); } uint32_t OaMetricSourceImp::getSubDeviceIndex() { return metricDeviceContext.getSubDeviceIndex(); } bool OaMetricSourceImp::isMetricGroupActivated(const zet_metric_group_handle_t hMetricGroup) const { return metricDeviceContext.isMetricGroupActivated(hMetricGroup); } bool OaMetricSourceImp::isMetricGroupActivated() const { return metricDeviceContext.isMetricGroupActivated(); } bool OaMetricSourceImp::isImplicitScalingCapable() const { return metricDeviceContext.isImplicitScalingCapable(); } template <> OaMetricSourceImp &MetricDeviceContext::getMetricSource() const { return static_cast(*metricSources.at(MetricSource::SourceType::Oa)); } } // namespace L0compute-runtime-22.14.22890/level_zero/tools/source/metrics/metric_oa_source.h000066400000000000000000000050531422164147700273240ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/tools/source/metrics/metric.h" #include "level_zero/tools/source/metrics/metric_oa_enumeration_imp.h" #include "level_zero/tools/source/metrics/metric_oa_query_imp.h" #include "level_zero/tools/source/metrics/metric_oa_streamer_imp.h" namespace NEO { class OsLibrary; } // namespace NEO namespace L0 { class OaMetricSourceImp : public MetricSource { public: OaMetricSourceImp(const MetricDeviceContext &metricDeviceContext); virtual ~OaMetricSourceImp(); void enable() override; bool isAvailable() override; ze_result_t metricGroupGet(uint32_t *pCount, zet_metric_group_handle_t *phMetricGroups) override; ze_result_t appendMetricMemoryBarrier(CommandList &commandList) override; bool loadDependencies(); bool isInitialized(); void setInitializationState(const ze_result_t state); Device &getDevice(); MetricsLibrary &getMetricsLibrary(); MetricEnumeration &getMetricEnumeration(); MetricStreamer *getMetricStreamer(); void setMetricStreamer(MetricStreamer *pMetricStreamer); void setMetricsLibrary(MetricsLibrary &metricsLibrary); void setMetricEnumeration(MetricEnumeration &metricEnumeration); ze_result_t activateMetricGroups(); ze_result_t activateMetricGroupsDeferred(const uint32_t count, zet_metric_group_handle_t *phMetricGroups); bool isMetricGroupActivated(const zet_metric_group_handle_t hMetricGroup) const; bool isMetricGroupActivated() const; void setUseCompute(const bool useCompute); bool isComputeUsed() const; uint32_t getSubDeviceIndex(); bool isImplicitScalingCapable() const; const MetricDeviceContext &getMetricDeviceContext() const { return metricDeviceContext; } static bool checkDependencies(); static std::unique_ptr create(const MetricDeviceContext &metricDeviceContext); using OsLibraryLoadPtr = std::add_pointer::type; static OsLibraryLoadPtr osLibraryLoadFunction; protected: ze_result_t initializationState = ZE_RESULT_ERROR_UNINITIALIZED; const MetricDeviceContext &metricDeviceContext; std::unique_ptr metricEnumeration = nullptr; std::unique_ptr metricsLibrary = nullptr; MetricStreamer *pMetricStreamer = nullptr; bool useCompute = false; }; template <> OaMetricSourceImp &MetricDeviceContext::getMetricSource() const; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/metrics/metric_oa_streamer_imp.cpp000066400000000000000000000352071422164147700310520ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/metrics/metric_oa_streamer_imp.h" #include "shared/source/helpers/debug_helpers.h" #include "level_zero/core/source/device/device_imp.h" #include "level_zero/tools/source/metrics/metric_oa_enumeration_imp.h" #include "level_zero/tools/source/metrics/metric_oa_query_imp.h" #include "level_zero/tools/source/metrics/metric_oa_source.h" using namespace MetricsLibraryApi; namespace L0 { ze_result_t OaMetricStreamerImp::readData(uint32_t maxReportCount, size_t *pRawDataSize, uint8_t *pRawData) { ze_result_t result = ZE_RESULT_SUCCESS; const size_t metricStreamerSize = metricStreamers.size(); if (metricStreamerSize > 0) { auto pMetricStreamer = MetricStreamer::fromHandle(metricStreamers[0]); // Return required size if requested. if (*pRawDataSize == 0) { const size_t headerSize = sizeof(MetricGroupCalculateHeader); const size_t rawDataOffsetsRequiredSize = sizeof(uint32_t) * metricStreamerSize; const size_t rawDataSizesRequiredSize = sizeof(uint32_t) * metricStreamerSize; const size_t rawDataRequiredSize = static_cast(pMetricStreamer)->getRequiredBufferSize(maxReportCount) * metricStreamerSize; *pRawDataSize = headerSize + rawDataOffsetsRequiredSize + rawDataSizesRequiredSize + rawDataRequiredSize; return ZE_RESULT_SUCCESS; } MetricGroupCalculateHeader *pRawDataHeader = reinterpret_cast(pRawData); pRawDataHeader->magic = MetricGroupCalculateHeader::magicValue; pRawDataHeader->dataCount = static_cast(metricStreamerSize); // Relative offsets in the header allow to move/copy the buffer. pRawDataHeader->rawDataOffsets = sizeof(MetricGroupCalculateHeader); pRawDataHeader->rawDataSizes = static_cast(pRawDataHeader->rawDataOffsets + (sizeof(uint32_t) * metricStreamerSize)); pRawDataHeader->rawDataOffset = static_cast(pRawDataHeader->rawDataSizes + (sizeof(uint32_t) * metricStreamerSize)); const size_t sizePerSubDevice = (*pRawDataSize - pRawDataHeader->rawDataOffset) / metricStreamerSize; DEBUG_BREAK_IF(sizePerSubDevice == 0); *pRawDataSize = pRawDataHeader->rawDataOffset; uint32_t *pRawDataOffsetsUnpacked = reinterpret_cast(pRawData + pRawDataHeader->rawDataOffsets); uint32_t *pRawDataSizesUnpacked = reinterpret_cast(pRawData + pRawDataHeader->rawDataSizes); uint8_t *pRawDataUnpacked = reinterpret_cast(pRawData + pRawDataHeader->rawDataOffset); for (size_t i = 0; i < metricStreamerSize; ++i) { size_t readSize = sizePerSubDevice; const uint32_t rawDataOffset = (i != 0) ? (pRawDataSizesUnpacked[i - 1] + pRawDataOffsetsUnpacked[i - 1]) : 0; pMetricStreamer = MetricStreamer::fromHandle(metricStreamers[i]); result = pMetricStreamer->readData(maxReportCount, &readSize, pRawDataUnpacked + rawDataOffset); // Return at first error. if (result != ZE_RESULT_SUCCESS) { return result; } pRawDataSizesUnpacked[i] = static_cast(readSize); pRawDataOffsetsUnpacked[i] = (i != 0) ? pRawDataOffsetsUnpacked[i - 1] + pRawDataSizesUnpacked[i - 1] : 0; *pRawDataSize += readSize; } } else { DEBUG_BREAK_IF(rawReportSize == 0); auto metricGroup = static_cast(MetricGroup::fromHandle(hMetricGroup)); // Return required size if requested. if (*pRawDataSize == 0) { *pRawDataSize = getRequiredBufferSize(maxReportCount); return ZE_RESULT_SUCCESS; } // User is expected to allocate space. DEBUG_BREAK_IF(pRawData == nullptr); // Retrieve the number of reports that fit into the buffer. uint32_t reportCount = static_cast(*pRawDataSize / rawReportSize); // Read streamer data. result = metricGroup->readIoStream(reportCount, *pRawData); if (result == ZE_RESULT_SUCCESS) { *pRawDataSize = reportCount * rawReportSize; } } return result; } ze_result_t OaMetricStreamerImp::close() { ze_result_t result = ZE_RESULT_SUCCESS; if (metricStreamers.size() > 0) { for (auto metricStreamerHandle : metricStreamers) { auto metricStreamer = MetricStreamer::fromHandle(metricStreamerHandle); auto tmpResult = metricStreamer->close(); // Hold the first error result. if (result == ZE_RESULT_SUCCESS) result = tmpResult; } // Delete metric streamer aggregator. if (result == ZE_RESULT_SUCCESS) { detachEvent(); delete this; } } else { result = stopMeasurements(); if (result == ZE_RESULT_SUCCESS) { auto device = Device::fromHandle(hDevice); auto &metricSource = device->getMetricDeviceContext().getMetricSource(); auto &metricsLibrary = metricSource.getMetricsLibrary(); // Clear metric streamer reference in context. // Another metric streamer instance or query can be used. metricSource.setMetricStreamer(nullptr); // Close metrics library (if was used to generate streamer's marker gpu commands). // It will allow metric query to use Linux Tbs stream exclusively // (to activate metric sets and to read context switch reports). metricsLibrary.release(); detachEvent(); // Delete metric streamer. delete this; } } return result; } ze_result_t OaMetricStreamerImp::initialize(ze_device_handle_t hDevice, zet_metric_group_handle_t hMetricGroup) { this->hDevice = hDevice; this->hMetricGroup = hMetricGroup; auto metricGroup = static_cast(MetricGroup::fromHandle(this->hMetricGroup)); rawReportSize = metricGroup->getRawReportSize(); return ZE_RESULT_SUCCESS; } ze_result_t OaMetricStreamerImp::startMeasurements(uint32_t ¬ifyEveryNReports, uint32_t &samplingPeriodNs) { auto metricGroup = static_cast(MetricGroup::fromHandle(hMetricGroup)); uint32_t requestedOaBufferSize = getOaBufferSize(notifyEveryNReports); const ze_result_t result = metricGroup->openIoStream(samplingPeriodNs, requestedOaBufferSize); // Return oa buffer size and notification event aligned to gpu capabilities. if (result == ZE_RESULT_SUCCESS) { oaBufferSize = requestedOaBufferSize; notifyEveryNReports = getNotifyEveryNReports(requestedOaBufferSize); } return result; } void OaMetricStreamerImp::attachEvent(ze_event_handle_t hNotificationEvent) { // Associate notification event with metric streamer. pNotificationEvent = Event::fromHandle(hNotificationEvent); if (pNotificationEvent != nullptr) { pNotificationEvent->metricStreamer = this; } } void OaMetricStreamerImp::detachEvent() { // Release notification event. if (pNotificationEvent != nullptr) { pNotificationEvent->metricStreamer = nullptr; } } ze_result_t OaMetricStreamerImp::stopMeasurements() { auto metricGroup = static_cast(MetricGroup::fromHandle(hMetricGroup)); const ze_result_t result = metricGroup->closeIoStream(); if (result == ZE_RESULT_SUCCESS) { oaBufferSize = 0; } return result; } uint32_t OaMetricStreamerImp::getOaBufferSize(const uint32_t notifyEveryNReports) const { // Notification is on half full buffer, hence multiplication by 2. return notifyEveryNReports * rawReportSize * 2; } uint32_t OaMetricStreamerImp::getNotifyEveryNReports(const uint32_t oaBufferSize) const { // Notification is on half full buffer, hence division by 2. return rawReportSize ? oaBufferSize / (rawReportSize * 2) : 0; } Event::State OaMetricStreamerImp::getNotificationState() { if (metricStreamers.size() > 0) { for (auto metricStreamer : metricStreamers) { // Return Signalled if report is available on any subdevice. if (MetricStreamer::fromHandle(metricStreamer)->getNotificationState() == Event::State::STATE_SIGNALED) { return Event::State::STATE_SIGNALED; } } return Event::State::STATE_INITIAL; } auto metricGroup = static_cast(MetricGroup::fromHandle(hMetricGroup)); bool reportsReady = metricGroup->waitForReports(0) == ZE_RESULT_SUCCESS; return reportsReady ? Event::State::STATE_SIGNALED : Event::State::STATE_INITIAL; } std::vector &OaMetricStreamerImp::getMetricStreamers() { return metricStreamers; } uint32_t OaMetricStreamerImp::getRequiredBufferSize(const uint32_t maxReportCount) const { DEBUG_BREAK_IF(rawReportSize == 0); uint32_t maxOaBufferReportCount = oaBufferSize / rawReportSize; // Trim to OA buffer size if needed. return maxReportCount > maxOaBufferReportCount ? oaBufferSize : maxReportCount * rawReportSize; } ze_result_t OaMetricGroupImp::openForDevice(Device *pDevice, zet_metric_streamer_desc_t &desc, zet_metric_streamer_handle_t *phMetricStreamer) { auto &metricSource = pDevice->getMetricDeviceContext().getMetricSource(); *phMetricStreamer = nullptr; // Check whether metric streamer is already open. if (metricSource.getMetricStreamer() != nullptr) { return ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE; } // metric streamer cannot be used with query simultaneously // (oa buffer cannot be shared). if (metricSource.getMetricsLibrary().getMetricQueryCount() > 0) { return ZE_RESULT_ERROR_NOT_AVAILABLE; } // Unload metrics library if there are no active queries. // It will allow to open metric streamer. Query and streamer cannot be used // simultaneously since they use the same exclusive resource (oa buffer). if (metricSource.getMetricsLibrary().getInitializationState() == ZE_RESULT_SUCCESS) { metricSource.getMetricsLibrary().release(); } // Check metric group sampling type. zet_metric_group_properties_t metricGroupProperties = {ZET_STRUCTURE_TYPE_METRIC_GROUP_PROPERTIES}; getProperties(&metricGroupProperties); if (metricGroupProperties.samplingType != ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_TIME_BASED) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } // Check whether metric group is activated. if (!metricSource.isMetricGroupActivated(toHandle())) { return ZE_RESULT_NOT_READY; } auto pMetricStreamer = new OaMetricStreamerImp(); UNRECOVERABLE_IF(pMetricStreamer == nullptr); pMetricStreamer->initialize(pDevice->toHandle(), toHandle()); const ze_result_t result = pMetricStreamer->startMeasurements( desc.notifyEveryNReports, desc.samplingPeriod); if (result == ZE_RESULT_SUCCESS) { metricSource.setMetricStreamer(pMetricStreamer); } else { delete pMetricStreamer; pMetricStreamer = nullptr; return ZE_RESULT_ERROR_UNKNOWN; } *phMetricStreamer = pMetricStreamer->toHandle(); return ZE_RESULT_SUCCESS; } ze_result_t OaMetricGroupImp::streamerOpen( zet_context_handle_t hContext, zet_device_handle_t hDevice, zet_metric_streamer_desc_t *desc, ze_event_handle_t hNotificationEvent, zet_metric_streamer_handle_t *phMetricStreamer) { ze_result_t result = ZE_RESULT_SUCCESS; auto pDevice = Device::fromHandle(hDevice); const auto pDeviceImp = static_cast(pDevice); if (pDeviceImp->metricContext->isImplicitScalingCapable()) { const uint32_t subDeviceCount = pDeviceImp->numSubDevices; auto pMetricStreamer = new OaMetricStreamerImp(); UNRECOVERABLE_IF(pMetricStreamer == nullptr); auto &metricStreamers = pMetricStreamer->getMetricStreamers(); metricStreamers.resize(subDeviceCount); for (uint32_t i = 0; i < subDeviceCount; i++) { auto metricGroupsSubDevice = static_cast(MetricGroup::fromHandle(getMetricGroups()[i])); result = metricGroupsSubDevice->openForDevice(pDeviceImp->subDevices[i], *desc, &metricStreamers[i]); if (result != ZE_RESULT_SUCCESS) { for (uint32_t j = 0; j < i; j++) { auto metricStreamerSubDevice = MetricStreamer::fromHandle(metricStreamers[j]); delete metricStreamerSubDevice; } delete pMetricStreamer; return result; } } *phMetricStreamer = pMetricStreamer->toHandle(); } else { result = openForDevice(pDevice, *desc, phMetricStreamer); } if (result == ZE_RESULT_SUCCESS) { OaMetricStreamerImp *metImp = static_cast(MetricStreamer::fromHandle(*phMetricStreamer)); metImp->attachEvent(hNotificationEvent); } return result; } ze_result_t OaMetricStreamerImp::appendStreamerMarker(CommandList &commandList, uint32_t value) { DeviceImp *pDeviceImp = static_cast(commandList.device); if (pDeviceImp->metricContext->isImplicitScalingCapable()) { // Use one of the sub-device contexts to append to command list. pDeviceImp = static_cast(pDeviceImp->subDevices[0]); pDeviceImp->metricContext->getMetricSource().getMetricsLibrary().enableWorkloadPartition(); } OaMetricSourceImp &metricSource = pDeviceImp->metricContext->getMetricSource(); auto &metricsLibrary = metricSource.getMetricsLibrary(); const uint32_t streamerMarkerHighBitsShift = 25; // Obtain gpu commands. CommandBufferData_1_0 commandBuffer = {}; commandBuffer.CommandsType = ObjectType::MarkerStreamUser; commandBuffer.MarkerStreamUser.Value = value; commandBuffer.MarkerStreamUser.Reserved = (value >> streamerMarkerHighBitsShift); commandBuffer.Type = metricSource.isComputeUsed() ? GpuCommandBufferType::Compute : GpuCommandBufferType::Render; return metricsLibrary.getGpuCommands(commandList, commandBuffer) ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_UNKNOWN; } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/metrics/metric_oa_streamer_imp.h000066400000000000000000000026471422164147700305210ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/tools/source/metrics/metric.h" struct Event; namespace L0 { struct OaMetricStreamerImp : MetricStreamer { ~OaMetricStreamerImp() override{}; ze_result_t readData(uint32_t maxReportCount, size_t *pRawDataSize, uint8_t *pRawData) override; ze_result_t close() override; ze_result_t initialize(ze_device_handle_t hDevice, zet_metric_group_handle_t hMetricGroup); ze_result_t startMeasurements(uint32_t ¬ifyEveryNReports, uint32_t &samplingPeriodNs); Event::State getNotificationState() override; void attachEvent(ze_event_handle_t hNotificationEvent); void detachEvent(); ze_result_t appendStreamerMarker(CommandList &commandList, uint32_t value) override; std::vector &getMetricStreamers(); protected: ze_result_t stopMeasurements(); uint32_t getOaBufferSize(const uint32_t notifyEveryNReports) const; uint32_t getNotifyEveryNReports(const uint32_t oaBufferSize) const; uint32_t getRequiredBufferSize(const uint32_t maxReportCount) const; ze_device_handle_t hDevice = nullptr; zet_metric_group_handle_t hMetricGroup = nullptr; Event *pNotificationEvent = nullptr; uint32_t rawReportSize = 0; uint32_t oaBufferSize = 0; std::vector metricStreamers; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/metrics/os_metric_ip_sampling.h000066400000000000000000000015001422164147700303410ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include #include namespace L0 { struct Device; class MetricIpSamplingOsInterface { public: virtual ~MetricIpSamplingOsInterface() = default; virtual ze_result_t startMeasurement(uint32_t ¬ifyEveryNReports, uint32_t &samplingPeriodNs) = 0; virtual ze_result_t stopMeasurement() = 0; virtual ze_result_t readData(uint8_t *pRawData, size_t *pRawDataSize) = 0; virtual uint32_t getRequiredBufferSize(const uint32_t maxReportCount) = 0; virtual uint32_t getUnitReportSize() = 0; virtual bool isNReportsAvailable() = 0; virtual bool isDependencyAvailable() = 0; static std::unique_ptr create(Device &device); }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/metrics/windows/000077500000000000000000000000001422164147700253205ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/metrics/windows/CMakeLists.txt000066400000000000000000000011541422164147700300610ustar00rootroot00000000000000# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/os_metric_oa_query_imp_windows.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_metric_oa_enumeration_imp_windows.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_metric_ip_sampling_imp_windows.cpp ) if(WIN32) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_WINDOWS} ) # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_WINDOWS ${L0_SRCS_TOOLS_WINDOWS}) endif() os_metric_ip_sampling_imp_windows.cpp000066400000000000000000000023211422164147700347300ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/metrics/windows/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/device/device.h" #include "level_zero/tools/source/metrics/os_metric_ip_sampling.h" namespace L0 { class MetricIpSamplingWindowsImp : public MetricIpSamplingOsInterface { public: MetricIpSamplingWindowsImp() {} ze_result_t startMeasurement(uint32_t ¬ifyEveryNReports, uint32_t &samplingPeriodNs) override { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t stopMeasurement() override { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t readData(uint8_t *pRawData, size_t *pRawDataSize) override { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } uint32_t getRequiredBufferSize(const uint32_t maxReportCount) override { return 0u; } uint32_t getUnitReportSize() override { return 0u; } bool isNReportsAvailable() override { return false; } bool isDependencyAvailable() override { return false; } }; std::unique_ptr MetricIpSamplingOsInterface::create(Device &device) { return std::make_unique(); } } // namespace L0 os_metric_oa_enumeration_imp_windows.cpp000066400000000000000000000040001422164147700354270ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/metrics/windows/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/os_interface.h" #include "shared/source/os_interface/windows/wddm/wddm.h" #include "level_zero/tools/source/metrics/metric_oa_enumeration_imp.h" #include "level_zero/tools/source/metrics/metric_oa_source.h" #if defined(_WIN64) #define METRICS_DISCOVERY_NAME "igdmd64.dll" #elif defined(_WIN32) #define METRICS_DISCOVERY_NAME "igdmd32.dll" #else #error "Unsupported OS" #endif namespace L0 { const char *MetricEnumeration::getMetricsDiscoveryFilename() { return METRICS_DISCOVERY_NAME; } bool MetricEnumeration::getAdapterId(uint32_t &major, uint32_t &minor) { auto &device = metricSource.getMetricDeviceContext().getDevice(); auto wddm = device.getOsInterface().getDriverModel()->as(); auto luid = wddm->getAdapterLuid(); major = luid.HighPart; minor = luid.LowPart; return true; } MetricsDiscovery::IAdapter_1_9 *MetricEnumeration::getMetricsAdapter() { uint32_t major = 0; uint32_t minor = 0; UNRECOVERABLE_IF(pAdapterGroup == nullptr); UNRECOVERABLE_IF(getAdapterId(major, minor) == false); // Enumerate metrics discovery adapters. for (uint32_t index = 0, count = pAdapterGroup->GetParams()->AdapterCount; index < count; ++index) { UNRECOVERABLE_IF(pAdapterGroup->GetAdapter(index) == nullptr); UNRECOVERABLE_IF(pAdapterGroup->GetAdapter(index)->GetParams() == nullptr); auto adapter = pAdapterGroup->GetAdapter(index); auto adapterParams = adapter->GetParams(); const bool validAdapterInfo = adapterParams->SystemId.Type == MetricsDiscovery::ADAPTER_ID_TYPE_LUID; const bool validAdapterMatch = (adapterParams->SystemId.Luid.HighPart == major) && (adapterParams->SystemId.Luid.LowPart == minor); if (validAdapterInfo && validAdapterMatch) { return adapter; } } return nullptr; } } // namespace L0 os_metric_oa_query_imp_windows.cpp000066400000000000000000000046231422164147700342610ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/metrics/windows/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/os_interface.h" #include "shared/source/os_interface/windows/wddm/wddm.h" #include "level_zero/core/source/device/device.h" #include "level_zero/tools/source/metrics/metric_oa_query_imp.h" #if defined(_WIN64) #define METRICS_LIBRARY_NAME "igdml64.dll" #elif defined(_WIN32) #define METRICS_LIBRARY_NAME "igdml32.dll" #else #error "Unsupported OS" #endif using namespace MetricsLibraryApi; namespace L0 { const char *MetricsLibrary::getFilename() { return METRICS_LIBRARY_NAME; } bool MetricsLibrary::getContextData(Device &device, ContextCreateData_1_0 &contextData) { auto wddm = device.getOsInterface().getDriverModel()->as(); auto &osData = contextData.ClientData->Windows; // Copy escape data (adapter/device/escape function). osData.KmdInstrumentationEnabled = true; osData.Device = reinterpret_cast(static_cast(wddm->getDeviceHandle())); osData.Escape = wddm->getEscapeHandle(); osData.Adapter = reinterpret_cast(static_cast(wddm->getAdapter())); return osData.Device && osData.Escape && osData.Adapter; } bool MetricsLibrary::activateConfiguration(const ConfigurationHandle_1_0 configurationHandle) { ConfigurationActivateData_1_0 activateData = {}; activateData.Type = GpuConfigurationActivationType::EscapeCode; const bool validMetricsLibrary = isInitialized(); const bool validConfiguration = configurationHandle.IsValid(); const bool result = validMetricsLibrary && validConfiguration && (api.ConfigurationActivate(configurationHandle, &activateData) == StatusCode::Success); DEBUG_BREAK_IF(!result); return result; } bool MetricsLibrary::deactivateConfiguration(const ConfigurationHandle_1_0 configurationHandle) { const bool validMetricsLibrary = isInitialized(); const bool validConfiguration = configurationHandle.IsValid(); const bool result = validMetricsLibrary && validConfiguration && (api.ConfigurationDeactivate(configurationHandle) == StatusCode::Success); DEBUG_BREAK_IF(!result); return result; } void MetricsLibrary::cacheConfiguration(zet_metric_group_handle_t metricGroup, ConfigurationHandle_1_0 configurationHandle) { configurations[metricGroup] = configurationHandle; } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/pin/000077500000000000000000000000001422164147700227465ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/pin/CMakeLists.txt000066400000000000000000000007331422164147700255110ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_PIN ${CMAKE_CURRENT_SOURCE_DIR}/pin.cpp ${CMAKE_CURRENT_SOURCE_DIR}/pin.h ) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_PIN} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) add_subdirectories() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_PIN ${L0_SRCS_TOOLS_PIN}) compute-runtime-22.14.22890/level_zero/tools/source/pin/linux/000077500000000000000000000000001422164147700241055ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/pin/linux/CMakeLists.txt000066400000000000000000000011231422164147700266420ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/os_pin.h ) if(UNIX) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_LINUX} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) target_include_directories(${L0_STATIC_LIB_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_LINUX ${L0_SRCS_TOOLS_LINUX}) compute-runtime-22.14.22890/level_zero/tools/source/pin/linux/os_pin.h000066400000000000000000000004551422164147700255510ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/tools/source/pin/pin.h" namespace L0 { typedef uint32_t (*OpenGTPin_fn)(void *gtPinInit); const std::string PinContext::gtPinLibraryFilename = "libgtpin.so"; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/pin/pin.cpp000066400000000000000000000030641422164147700242430ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "pin.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/debug_helpers.h" #include "level_zero/source/inc/ze_intel_gpu.h" #include "os_pin.h" const std::string gtPinOpenFunctionName = "OpenGTPin"; namespace L0 { ze_result_t PinContext::init() { std::unique_ptr hGtPinLibrary = nullptr; hGtPinLibrary.reset(NEO::OsLibrary::load(gtPinLibraryFilename.c_str())); if (hGtPinLibrary.get() == nullptr) { PRINT_DEBUG_STRING(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Unable to find gtpin library %s\n", gtPinLibraryFilename.c_str()); return ZE_RESULT_ERROR_UNKNOWN; } OpenGTPin_fn openGTPin = reinterpret_cast(hGtPinLibrary.get()->getProcAddress(gtPinOpenFunctionName.c_str())); if (openGTPin == nullptr) { hGtPinLibrary.reset(nullptr); PRINT_DEBUG_STRING(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Unable to find gtpin library open function symbol %s\n", gtPinOpenFunctionName.c_str()); return ZE_RESULT_ERROR_UNKNOWN; } uint32_t openResult = openGTPin(nullptr); if (openResult != 0) { hGtPinLibrary.reset(nullptr); PRINT_DEBUG_STRING(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "gtpin library open %s failed with status %u\n", gtPinOpenFunctionName.c_str(), openResult); return ZE_RESULT_ERROR_UNKNOWN; } return ZE_RESULT_SUCCESS; } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/pin/pin.h000066400000000000000000000005751422164147700237140ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/os_library.h" #include #include namespace L0 { class PinContext { public: static ze_result_t init(); private: static const std::string gtPinLibraryFilename; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/pin/windows/000077500000000000000000000000001422164147700244405ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/pin/windows/CMakeLists.txt000066400000000000000000000011351422164147700272000ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/os_pin.h ) if(WIN32) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_WINDOWS} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) target_include_directories(${L0_STATIC_LIB_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_WINDOWS ${L0_SRCS_TOOLS_WINDOWS}) compute-runtime-22.14.22890/level_zero/tools/source/pin/windows/os_pin.h000066400000000000000000000004651422164147700261050ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/tools/source/pin/pin.h" namespace L0 { typedef uint32_t(__fastcall *OpenGTPin_fn)(void *gtPinInit); const std::string PinContext::gtPinLibraryFilename = "gtpin.dll"; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/000077500000000000000000000000001422164147700234725ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/CMakeLists.txt000066400000000000000000000011111422164147700262240ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN ${CMAKE_CURRENT_SOURCE_DIR}/sysman.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sysman.h ${CMAKE_CURRENT_SOURCE_DIR}/sysman_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sysman_imp.h ) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) add_subdirectories() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN ${L0_SRCS_TOOLS_SYSMAN}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/diagnostics/000077500000000000000000000000001422164147700260015ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/diagnostics/CMakeLists.txt000066400000000000000000000012721422164147700305430ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_DIAGNOSTICS ${CMAKE_CURRENT_SOURCE_DIR}/diagnostics.cpp ${CMAKE_CURRENT_SOURCE_DIR}/diagnostics.h ${CMAKE_CURRENT_SOURCE_DIR}/diagnostics_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/diagnostics_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/os_diagnostics.h ) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_DIAGNOSTICS} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) add_subdirectories() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_DIAGNOSTICS ${L0_SRCS_TOOLS_SYSMAN_DIAGNOSTICS}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/diagnostics/diagnostics.cpp000066400000000000000000000031531422164147700310160ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/basic_math.h" #include "level_zero/tools/source/sysman/diagnostics/diagnostics_imp.h" namespace L0 { class OsDiagnostics; DiagnosticsHandleContext::~DiagnosticsHandleContext() { releaseDiagnosticsHandles(); } void DiagnosticsHandleContext::releaseDiagnosticsHandles() { for (Diagnostics *pDiagnostics : handleList) { delete pDiagnostics; } handleList.clear(); } void DiagnosticsHandleContext::createHandle(ze_device_handle_t deviceHandle, const std::string &diagTests) { Diagnostics *pDiagnostics = new DiagnosticsImp(pOsSysman, diagTests, deviceHandle); handleList.push_back(pDiagnostics); } void DiagnosticsHandleContext::init(std::vector &deviceHandles) { OsDiagnostics::getSupportedDiagTestsFromFW(pOsSysman, supportedDiagTests); for (const auto &deviceHandle : deviceHandles) { for (const std::string &diagTests : supportedDiagTests) { createHandle(deviceHandle, diagTests); } } } ze_result_t DiagnosticsHandleContext::diagnosticsGet(uint32_t *pCount, zes_diag_handle_t *phDiagnostics) { uint32_t handleListSize = static_cast(handleList.size()); uint32_t numToCopy = std::min(*pCount, handleListSize); if (0 == *pCount || *pCount > handleListSize) { *pCount = handleListSize; } if (nullptr != phDiagnostics) { for (uint32_t i = 0; i < numToCopy; i++) { phDiagnostics[i] = handleList[i]->toHandle(); } } return ZE_RESULT_SUCCESS; } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/diagnostics/diagnostics.h000066400000000000000000000027371422164147700304720ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/device/device.h" #include #include #include struct _zes_diag_handle_t { virtual ~_zes_diag_handle_t() = default; }; namespace L0 { struct OsSysman; class Diagnostics : _zes_diag_handle_t { public: virtual ~Diagnostics() {} virtual ze_result_t diagnosticsGetProperties(zes_diag_properties_t *pProperties) = 0; virtual ze_result_t diagnosticsGetTests(uint32_t *pCount, zes_diag_test_t *pTests) = 0; virtual ze_result_t diagnosticsRunTests(uint32_t start, uint32_t end, zes_diag_result_t *pResult) = 0; inline zes_diag_handle_t toHandle() { return this; } static Diagnostics *fromHandle(zes_diag_handle_t handle) { return static_cast(handle); } }; struct DiagnosticsHandleContext { DiagnosticsHandleContext(OsSysman *pOsSysman) : pOsSysman(pOsSysman){}; void releaseDiagnosticsHandles(); MOCKABLE_VIRTUAL ~DiagnosticsHandleContext(); MOCKABLE_VIRTUAL void init(std::vector &deviceHandles); ze_result_t diagnosticsGet(uint32_t *pCount, zes_diag_handle_t *phDiagnostics); std::vector supportedDiagTests = {}; OsSysman *pOsSysman = nullptr; std::vector handleList = {}; private: void createHandle(ze_device_handle_t deviceHandle, const std::string &DiagTests); }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/diagnostics/diagnostics_imp.cpp000066400000000000000000000024371422164147700316670ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "diagnostics_imp.h" #include "shared/source/helpers/debug_helpers.h" #include "os_diagnostics.h" #include namespace L0 { ze_result_t DiagnosticsImp::diagnosticsGetProperties(zes_diag_properties_t *pProperties) { pOsDiagnostics->osGetDiagProperties(pProperties); return ZE_RESULT_SUCCESS; } ze_result_t DiagnosticsImp::diagnosticsGetTests(uint32_t *pCount, zes_diag_test_t *pTests) { return pOsDiagnostics->osGetDiagTests(pCount, pTests); } ze_result_t DiagnosticsImp::diagnosticsRunTests(uint32_t start, uint32_t end, zes_diag_result_t *pResult) { return pOsDiagnostics->osRunDiagTests(start, end, pResult); } DiagnosticsImp::DiagnosticsImp(OsSysman *pOsSysman, const std::string &initalizedDiagTest, ze_device_handle_t handle) : deviceHandle(handle) { ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; Device::fromHandle(deviceHandle)->getProperties(&deviceProperties); pOsDiagnostics = OsDiagnostics::create(pOsSysman, initalizedDiagTest, deviceProperties.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE, deviceProperties.subdeviceId); UNRECOVERABLE_IF(nullptr == pOsDiagnostics); } DiagnosticsImp::~DiagnosticsImp() { } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/diagnostics/diagnostics_imp.h000066400000000000000000000021031422164147700313220ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "shared/source/helpers/string.h" #include "level_zero/tools/source/sysman/diagnostics/diagnostics.h" #include "level_zero/tools/source/sysman/diagnostics/os_diagnostics.h" #include namespace L0 { class OsDiagnostics; class DiagnosticsImp : public Diagnostics, NEO::NonCopyableOrMovableClass { public: ze_result_t diagnosticsGetProperties(zes_diag_properties_t *pProperties) override; ze_result_t diagnosticsGetTests(uint32_t *pCount, zes_diag_test_t *pTests) override; ze_result_t diagnosticsRunTests(uint32_t start, uint32_t end, zes_diag_result_t *pResult) override; DiagnosticsImp() = default; DiagnosticsImp(OsSysman *pOsSysman, const std::string &initalizedDiagTest, ze_device_handle_t handle); ~DiagnosticsImp() override; std::unique_ptr pOsDiagnostics = nullptr; private: ze_device_handle_t deviceHandle = nullptr; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/diagnostics/linux/000077500000000000000000000000001422164147700271405ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/diagnostics/linux/CMakeLists.txt000066400000000000000000000015551422164147700317060ustar00rootroot00000000000000# # Copyright (C) 2021-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_DIAGNOSTICS_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/os_diagnostics_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_diagnostics_imp.h ) if(NEO_ENABLE_i915_PRELIM_DETECTION) list(APPEND L0_SRCS_TOOLS_SYSMAN_DIAGNOSTICS_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/os_diagnostics_helper_prelim.cpp ) else() list(APPEND L0_SRCS_TOOLS_SYSMAN_DIAGNOSTICS_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/os_diagnostics_helper.cpp ) endif() if(UNIX) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_DIAGNOSTICS_LINUX} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_DIAGNOSTICS_LINUX ${L0_SRCS_TOOLS_SYSMAN_DIAGNOSTICS_LINUX}) os_diagnostics_helper.cpp000066400000000000000000000007541422164147700341420ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/diagnostics/linux/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/string.h" #include "level_zero/tools/source/sysman/diagnostics/linux/os_diagnostics_imp.h" namespace L0 { void OsDiagnostics::getSupportedDiagTestsFromFW(void *pOsSysman, std::vector &supportedDiagTests) { } ze_result_t LinuxDiagnosticsImp::osRunDiagTestsinFW(zes_diag_result_t *pResult) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } } // namespace L0os_diagnostics_helper_prelim.cpp000066400000000000000000000057531422164147700355160ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/diagnostics/linux/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/string.h" #include "shared/source/os_interface/device_factory.h" #include "level_zero/core/source/device/device_imp.h" #include "level_zero/tools/source/sysman/diagnostics/linux/os_diagnostics_imp.h" #include namespace L0 { //All memory mappings where LMEMBAR is being referenced are invalidated. //Also prevents new ones from being created. //It will invalidate LMEM memory mappings only when sysfs entry quiesce_gpu is set. //the sysfs node will be at /sys/class/drm/card/invalidate_lmem_mmaps const std::string LinuxDiagnosticsImp::invalidateLmemFile("invalidate_lmem_mmaps"); // the sysfs node will be at /sys/class/drm/card/quiesce_gpu const std::string LinuxDiagnosticsImp::quiescentGpuFile("quiesce_gpu"); void OsDiagnostics::getSupportedDiagTestsFromFW(void *pOsSysman, std::vector &supportedDiagTests) { LinuxSysmanImp *pLinuxSysmanImp = static_cast(pOsSysman); if (IGFX_PVC == pLinuxSysmanImp->getProductFamily()) { FirmwareUtil *pFwInterface = pLinuxSysmanImp->getFwUtilInterface(); if (pFwInterface != nullptr) { if (ZE_RESULT_SUCCESS == static_cast(pFwInterface)->fwDeviceInit()) { static_cast(pFwInterface)->fwSupportedDiagTests(supportedDiagTests); } } } } ze_result_t LinuxDiagnosticsImp::osRunDiagTestsinFW(zes_diag_result_t *pResult) { const int intVal = 1; // before running diagnostics need to close all active workloads // writing 1 to /sys/class/drm/card/quiesce_gpu will signal KMD //GPU (every gt in the card) will be wedged. // GPU will only be unwedged after warm/cold reset ::pid_t myPid = pProcfsAccess->myProcessId(); std::vector<::pid_t> processes; ze_result_t result = pProcfsAccess->listProcesses(processes); if (ZE_RESULT_SUCCESS != result) { return result; } for (auto &&pid : processes) { std::vector fds; pLinuxSysmanImp->getPidFdsForOpenDevice(pProcfsAccess, pSysfsAccess, pid, fds); if (pid == myPid) { // L0 is expected to have this file open. // Keep list of fds. Close before unbind. continue; } if (!fds.empty()) { pProcfsAccess->kill(pid); } } result = pSysfsAccess->write(quiescentGpuFile, intVal); if (ZE_RESULT_SUCCESS != result) { return result; } result = pSysfsAccess->write(invalidateLmemFile, intVal); if (ZE_RESULT_SUCCESS != result) { return result; } pFwInterface->fwRunDiagTests(osDiagType, pResult); pLinuxSysmanImp->diagnosticsReset = true; if (*pResult == ZES_DIAG_RESULT_REBOOT_FOR_REPAIR) { return pLinuxSysmanImp->osColdReset(); } return pLinuxSysmanImp->osWarmReset(); // we need to at least do a Warm reset to bring the machine out of wedged state } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/diagnostics/linux/os_diagnostics_imp.cpp000066400000000000000000000040561422164147700335260ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/diagnostics/linux/os_diagnostics_imp.h" #include "shared/source/helpers/string.h" #include "level_zero/core/source/device/device_imp.h" namespace L0 { const std::string LinuxDiagnosticsImp::deviceDir("device"); void LinuxDiagnosticsImp::osGetDiagProperties(zes_diag_properties_t *pProperties) { pProperties->onSubdevice = isSubdevice; pProperties->subdeviceId = subdeviceId; pProperties->haveTests = 0; // osGetDiagTests is Unsupported strncpy_s(pProperties->name, ZES_STRING_PROPERTY_SIZE, osDiagType.c_str(), osDiagType.size()); return; } ze_result_t LinuxDiagnosticsImp::osGetDiagTests(uint32_t *pCount, zes_diag_test_t *pTests) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t LinuxDiagnosticsImp::osRunDiagTests(uint32_t start, uint32_t end, zes_diag_result_t *pResult) { return osRunDiagTestsinFW(pResult); } LinuxDiagnosticsImp::LinuxDiagnosticsImp(OsSysman *pOsSysman, const std::string &diagTests, ze_bool_t onSubdevice, uint32_t subdeviceId) : osDiagType(diagTests), isSubdevice(onSubdevice), subdeviceId(subdeviceId) { pLinuxSysmanImp = static_cast(pOsSysman); pFwInterface = pLinuxSysmanImp->getFwUtilInterface(); pSysfsAccess = &pLinuxSysmanImp->getSysfsAccess(); pFsAccess = &pLinuxSysmanImp->getFsAccess(); pProcfsAccess = &pLinuxSysmanImp->getProcfsAccess(); pDevice = pLinuxSysmanImp->getDeviceHandle(); auto device = static_cast(pDevice); executionEnvironment = device->getNEODevice()->getExecutionEnvironment(); rootDeviceIndex = device->getNEODevice()->getRootDeviceIndex(); } std::unique_ptr OsDiagnostics::create(OsSysman *pOsSysman, const std::string &diagTests, ze_bool_t onSubdevice, uint32_t subdeviceId) { std::unique_ptr pLinuxDiagnosticsImp = std::make_unique(pOsSysman, diagTests, onSubdevice, subdeviceId); return pLinuxDiagnosticsImp; } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/diagnostics/linux/os_diagnostics_imp.h000066400000000000000000000040611422164147700331670ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "sysman/diagnostics/diagnostics_imp.h" #include "sysman/diagnostics/os_diagnostics.h" #include "sysman/linux/os_sysman_imp.h" namespace L0 { class LinuxDiagnosticsImp : public OsDiagnostics, NEO::NonCopyableOrMovableClass { public: void osGetDiagProperties(zes_diag_properties_t *pProperties) override; ze_result_t osGetDiagTests(uint32_t *pCount, zes_diag_test_t *pTests) override; ze_result_t osRunDiagTests(uint32_t start, uint32_t end, zes_diag_result_t *pResult) override; ze_result_t osRunDiagTestsinFW(zes_diag_result_t *pResult); ze_result_t osWarmReset(); LinuxDiagnosticsImp() = default; LinuxDiagnosticsImp(OsSysman *pOsSysman, const std::string &diagTests, ze_bool_t onSubdevice, uint32_t subdeviceId); ~LinuxDiagnosticsImp() override = default; std::string osDiagType = "unknown"; ze_result_t osColdReset(); protected: LinuxSysmanImp *pLinuxSysmanImp = nullptr; FirmwareUtil *pFwInterface = nullptr; SysfsAccess *pSysfsAccess = nullptr; FsAccess *pFsAccess = nullptr; ProcfsAccess *pProcfsAccess = nullptr; Device *pDevice = nullptr; std::string devicePciBdf = ""; NEO::ExecutionEnvironment *executionEnvironment = nullptr; uint32_t rootDeviceIndex = 0u; decltype(&NEO::SysCalls::open) openFunction = NEO::SysCalls::open; decltype(&NEO::SysCalls::close) closeFunction = NEO::SysCalls::close; decltype(&NEO::SysCalls::pread) preadFunction = NEO::SysCalls::pread; decltype(&NEO::SysCalls::pwrite) pwriteFunction = NEO::SysCalls::pwrite; void releaseSysmanDeviceResources(); void releaseDeviceResources(); ze_result_t initDevice(); void reInitSysmanDeviceResources(); private: static const std::string quiescentGpuFile; bool isSubdevice = false; uint32_t subdeviceId = 0; static const std::string invalidateLmemFile; static const std::string deviceDir; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/diagnostics/os_diagnostics.h000066400000000000000000000015411422164147700311630ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/tools/source/sysman/os_sysman.h" #include #include #include #include namespace L0 { class OsDiagnostics { public: virtual void osGetDiagProperties(zes_diag_properties_t *pProperties) = 0; virtual ze_result_t osGetDiagTests(uint32_t *pCount, zes_diag_test_t *pTests) = 0; virtual ze_result_t osRunDiagTests(uint32_t start, uint32_t end, zes_diag_result_t *pResult) = 0; static std::unique_ptr create(OsSysman *pOsSysman, const std::string &DiagTests, ze_bool_t onSubdevice, uint32_t subdeviceId); static void getSupportedDiagTestsFromFW(void *pOsSysman, std::vector &supportedDiagTests); virtual ~OsDiagnostics() {} }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/diagnostics/windows/000077500000000000000000000000001422164147700274735ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/diagnostics/windows/CMakeLists.txt000066400000000000000000000010241422164147700322300ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_DIAGNOSTICS_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/os_diagnostics_imp.cpp ) if(WIN32) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_DIAGNOSTICS_WINDOWS} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_DIAGNOSTICS_WINDOWS ${L0_SRCS_TOOLS_SYSMAN_DIAGNOSTICS_WINDOWS}) os_diagnostics_imp.cpp000066400000000000000000000017411422164147700340000ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/diagnostics/windows/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/diagnostics/windows/os_diagnostics_imp.h" namespace L0 { void WddmDiagnosticsImp::osGetDiagProperties(zes_diag_properties_t *pProperties){}; ze_result_t WddmDiagnosticsImp::osGetDiagTests(uint32_t *pCount, zes_diag_test_t *pTests) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t WddmDiagnosticsImp::osRunDiagTests(uint32_t start, uint32_t end, zes_diag_result_t *pResult) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } std::unique_ptr OsDiagnostics::create(OsSysman *pOsSysman, const std::string &DiagTests, ze_bool_t onSubdevice, uint32_t subdeviceId) { std::unique_ptr pWddmDiagnosticsImp = std::make_unique(); return pWddmDiagnosticsImp; } void OsDiagnostics::getSupportedDiagTestsFromFW(void *pOsSysman, std::vector &supportedDiagTests) { } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/diagnostics/windows/os_diagnostics_imp.h000066400000000000000000000011351422164147700335210ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "sysman/diagnostics/os_diagnostics.h" #include "sysman/windows/os_sysman_imp.h" namespace L0 { class WddmDiagnosticsImp : public OsDiagnostics { public: void osGetDiagProperties(zes_diag_properties_t *pProperties) override; ze_result_t osGetDiagTests(uint32_t *pCount, zes_diag_test_t *pTests) override; ze_result_t osRunDiagTests(uint32_t start, uint32_t end, zes_diag_result_t *pResult) override; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/engine/000077500000000000000000000000001422164147700247375ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/engine/CMakeLists.txt000077500000000000000000000012141422164147700275000ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_ENGINE ${CMAKE_CURRENT_SOURCE_DIR}/engine.cpp ${CMAKE_CURRENT_SOURCE_DIR}/engine.h ${CMAKE_CURRENT_SOURCE_DIR}/engine_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/engine_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/os_engine.h ) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_ENGINE} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) add_subdirectories() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_ENGINE ${L0_SRCS_TOOLS_SYSMAN_ENGINE}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/engine/engine.cpp000066400000000000000000000036731422164147700267210ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/engine/engine.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/debug_helpers.h" #include "level_zero/tools/source/sysman/engine/engine_imp.h" class OsEngine; namespace L0 { EngineHandleContext::EngineHandleContext(OsSysman *pOsSysman) { this->pOsSysman = pOsSysman; } EngineHandleContext::~EngineHandleContext() { releaseEngines(); } void EngineHandleContext::createHandle(zes_engine_group_t engineType, uint32_t engineInstance, uint32_t subDeviceId) { Engine *pEngine = new EngineImp(pOsSysman, engineType, engineInstance, subDeviceId); if (pEngine->initSuccess == true) { handleList.push_back(pEngine); } else { delete pEngine; } } void EngineHandleContext::init() { std::set> engineGroupInstance = {}; //set contains pair of engine group and struct containing engine instance and subdeviceId OsEngine::getNumEngineTypeAndInstances(engineGroupInstance, pOsSysman); for (auto itr = engineGroupInstance.begin(); itr != engineGroupInstance.end(); ++itr) { createHandle(itr->first, itr->second.first, itr->second.second); } } void EngineHandleContext::releaseEngines() { for (Engine *pEngine : handleList) { delete pEngine; } handleList.clear(); } ze_result_t EngineHandleContext::engineGet(uint32_t *pCount, zes_engine_handle_t *phEngine) { uint32_t handleListSize = static_cast(handleList.size()); uint32_t numToCopy = std::min(*pCount, handleListSize); if (0 == *pCount || *pCount > handleListSize) { *pCount = handleListSize; } if (nullptr != phEngine) { for (uint32_t i = 0; i < numToCopy; i++) { phEngine[i] = handleList[i]->toHandle(); } } return ZE_RESULT_SUCCESS; } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/engine/engine.h000066400000000000000000000023271422164147700263610ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include #include #include struct _zes_engine_handle_t { virtual ~_zes_engine_handle_t() = default; }; namespace L0 { using EngineInstanceSubDeviceId = std::pair; struct OsSysman; class Engine : _zes_engine_handle_t { public: virtual ze_result_t engineGetProperties(zes_engine_properties_t *pProperties) = 0; virtual ze_result_t engineGetActivity(zes_engine_stats_t *pStats) = 0; static Engine *fromHandle(zes_engine_handle_t handle) { return static_cast(handle); } inline zes_engine_handle_t toHandle() { return this; } bool initSuccess = false; }; struct EngineHandleContext { EngineHandleContext(OsSysman *pOsSysman); MOCKABLE_VIRTUAL ~EngineHandleContext(); MOCKABLE_VIRTUAL void init(); void releaseEngines(); ze_result_t engineGet(uint32_t *pCount, zes_engine_handle_t *phEngine); OsSysman *pOsSysman = nullptr; std::vector handleList = {}; private: void createHandle(zes_engine_group_t engineType, uint32_t engineInstance, uint32_t subDeviceId); }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/engine/engine_imp.cpp000066400000000000000000000017741422164147700275660ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/engine/engine_imp.h" #include "shared/source/helpers/debug_helpers.h" namespace L0 { ze_result_t EngineImp::engineGetActivity(zes_engine_stats_t *pStats) { return pOsEngine->getActivity(pStats); } ze_result_t EngineImp::engineGetProperties(zes_engine_properties_t *pProperties) { *pProperties = engineProperties; return ZE_RESULT_SUCCESS; } void EngineImp::init() { if (pOsEngine->isEngineModuleSupported()) { pOsEngine->getProperties(engineProperties); this->initSuccess = true; } } EngineImp::EngineImp(OsSysman *pOsSysman, zes_engine_group_t engineType, uint32_t engineInstance, uint32_t subDeviceId) { pOsEngine = OsEngine::create(pOsSysman, engineType, engineInstance, subDeviceId); init(); } EngineImp::~EngineImp() { if (nullptr != pOsEngine) { delete pOsEngine; pOsEngine = nullptr; } } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/engine/engine_imp.h000066400000000000000000000015471422164147700272310ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "level_zero/tools/source/sysman/engine/engine.h" #include "level_zero/tools/source/sysman/engine/os_engine.h" #include namespace L0 { class EngineImp : public Engine, NEO::NonCopyableOrMovableClass { public: ze_result_t engineGetProperties(zes_engine_properties_t *pProperties) override; ze_result_t engineGetActivity(zes_engine_stats_t *pStats) override; EngineImp() = default; EngineImp(OsSysman *pOsSysman, zes_engine_group_t engineType, uint32_t engineInstance, uint32_t subDeviceId); ~EngineImp() override; OsEngine *pOsEngine = nullptr; void init(); private: zes_engine_properties_t engineProperties = {}; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/engine/linux/000077500000000000000000000000001422164147700260765ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/engine/linux/CMakeLists.txt000077500000000000000000000014031422164147700306370ustar00rootroot00000000000000# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_ENGINE_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/os_engine_imp.h ) if(NEO_ENABLE_i915_PRELIM_DETECTION) list(APPEND L0_SRCS_TOOLS_SYSMAN_ENGINE_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/os_engine_imp_prelim.cpp ) else() list(APPEND L0_SRCS_TOOLS_SYSMAN_ENGINE_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/os_engine_imp.cpp ) endif() if(UNIX) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_ENGINE_LINUX} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_ENGINE_LINUX ${L0_SRCS_TOOLS_SYSMAN_ENGINE_LINUX}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/engine/linux/os_engine_imp.cpp000066400000000000000000000103501422164147700314140ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/engine/linux/os_engine_imp.h" #include "shared/source/os_interface/linux/engine_info.h" #include "sysman/linux/os_sysman_imp.h" namespace L0 { static const std::multimap<__u16, zes_engine_group_t> i915ToEngineMap = { {static_cast<__u16>(I915_ENGINE_CLASS_RENDER), ZES_ENGINE_GROUP_RENDER_SINGLE}, {static_cast<__u16>(I915_ENGINE_CLASS_VIDEO), ZES_ENGINE_GROUP_MEDIA_DECODE_SINGLE}, {static_cast<__u16>(I915_ENGINE_CLASS_VIDEO), ZES_ENGINE_GROUP_MEDIA_ENCODE_SINGLE}, {static_cast<__u16>(I915_ENGINE_CLASS_COPY), ZES_ENGINE_GROUP_COPY_SINGLE}, {static_cast<__u16>(I915_ENGINE_CLASS_VIDEO_ENHANCE), ZES_ENGINE_GROUP_MEDIA_ENHANCEMENT_SINGLE}}; static const std::multimap engineToI915Map = { {ZES_ENGINE_GROUP_RENDER_SINGLE, static_cast<__u16>(I915_ENGINE_CLASS_RENDER)}, {ZES_ENGINE_GROUP_MEDIA_DECODE_SINGLE, static_cast<__u16>(I915_ENGINE_CLASS_VIDEO)}, {ZES_ENGINE_GROUP_MEDIA_ENCODE_SINGLE, static_cast<__u16>(I915_ENGINE_CLASS_VIDEO)}, {ZES_ENGINE_GROUP_COPY_SINGLE, static_cast<__u16>(I915_ENGINE_CLASS_COPY)}, {ZES_ENGINE_GROUP_MEDIA_ENHANCEMENT_SINGLE, static_cast<__u16>(I915_ENGINE_CLASS_VIDEO_ENHANCE)}}; ze_result_t OsEngine::getNumEngineTypeAndInstances(std::set> &engineGroupInstance, OsSysman *pOsSysman) { LinuxSysmanImp *pLinuxSysmanImp = static_cast(pOsSysman); NEO::Drm *pDrm = &pLinuxSysmanImp->getDrm(); if (pDrm->sysmanQueryEngineInfo() == false) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } auto engineInfo = pDrm->getEngineInfo(); for (auto itr = engineInfo->engines.begin(); itr != engineInfo->engines.end(); ++itr) { auto i915ToEngineMapRange = i915ToEngineMap.equal_range(static_cast<__u16>(itr->engine.engineClass)); for (auto L0EngineEntryInMap = i915ToEngineMapRange.first; L0EngineEntryInMap != i915ToEngineMapRange.second; L0EngineEntryInMap++) { auto L0EngineType = L0EngineEntryInMap->second; engineGroupInstance.insert({L0EngineType, {static_cast(itr->engine.engineInstance), 0}}); } } return ZE_RESULT_SUCCESS; } ze_result_t LinuxEngineImp::getActivity(zes_engine_stats_t *pStats) { if (fd < 0) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } uint64_t data[2] = {}; if (pPmuInterface->pmuRead(static_cast(fd), data, sizeof(data)) < 0) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } // In data[], First u64 is "active time", And second u64 is "timestamp". Both in nanoseconds pStats->activeTime = data[0] / microSecondsToNanoSeconds; pStats->timestamp = data[1] / microSecondsToNanoSeconds; return ZE_RESULT_SUCCESS; } ze_result_t LinuxEngineImp::getProperties(zes_engine_properties_t &properties) { properties.type = engineGroup; properties.onSubdevice = 0; properties.subdeviceId = subDeviceId; return ZE_RESULT_SUCCESS; } void LinuxEngineImp::init() { auto i915EngineClass = engineToI915Map.find(engineGroup); // I915_PMU_ENGINE_BUSY macro provides the perf type config which we want to listen to get the engine busyness. fd = pPmuInterface->pmuInterfaceOpen(I915_PMU_ENGINE_BUSY(i915EngineClass->second, engineInstance), -1, PERF_FORMAT_TOTAL_TIME_ENABLED); } bool LinuxEngineImp::isEngineModuleSupported() { if (fd < 0) { return false; } return true; } LinuxEngineImp::LinuxEngineImp(OsSysman *pOsSysman, zes_engine_group_t type, uint32_t engineInstance, uint32_t subDeviceId) : engineGroup(type), engineInstance(engineInstance), subDeviceId(subDeviceId) { LinuxSysmanImp *pLinuxSysmanImp = static_cast(pOsSysman); pDrm = &pLinuxSysmanImp->getDrm(); pDevice = pLinuxSysmanImp->getDeviceHandle(); pPmuInterface = pLinuxSysmanImp->getPmuInterface(); init(); } OsEngine *OsEngine::create(OsSysman *pOsSysman, zes_engine_group_t type, uint32_t engineInstance, uint32_t subDeviceId) { LinuxEngineImp *pLinuxEngineImp = new LinuxEngineImp(pOsSysman, type, engineInstance, subDeviceId); return static_cast(pLinuxEngineImp); } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/engine/linux/os_engine_imp.h000066400000000000000000000024701422164147700310650ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "shared/source/os_interface/linux/drm_neo.h" #include "level_zero/tools/source/sysman/sysman_const.h" #include "sysman/engine/os_engine.h" namespace L0 { class PmuInterface; struct Device; class LinuxEngineImp : public OsEngine, NEO::NonCopyableOrMovableClass { public: ze_result_t getActivity(zes_engine_stats_t *pStats) override; ze_result_t getProperties(zes_engine_properties_t &properties) override; bool isEngineModuleSupported() override; static zes_engine_group_t getGroupFromEngineType(zes_engine_group_t type); LinuxEngineImp() = default; LinuxEngineImp(OsSysman *pOsSysman, zes_engine_group_t type, uint32_t engineInstance, uint32_t subDeviceId); ~LinuxEngineImp() override { if (fd != -1) { close(static_cast(fd)); fd = -1; } } protected: zes_engine_group_t engineGroup = ZES_ENGINE_GROUP_ALL; uint32_t engineInstance = 0; PmuInterface *pPmuInterface = nullptr; NEO::Drm *pDrm = nullptr; Device *pDevice = nullptr; uint32_t subDeviceId = 0; uint32_t onSubDevice = 0; private: void init(); int64_t fd = -1; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/engine/linux/os_engine_imp_prelim.cpp000066400000000000000000000137731422164147700330000ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/linux/engine_info.h" #include "level_zero/tools/source/sysman/engine/linux/os_engine_imp.h" #include "sysman/linux/os_sysman_imp.h" #include "third_party/uapi/prelim/drm/i915_drm_prelim.h" namespace L0 { static const std::multimap<__u16, zes_engine_group_t> i915ToEngineMap = { {static_cast<__u16>(I915_ENGINE_CLASS_RENDER), ZES_ENGINE_GROUP_RENDER_SINGLE}, {static_cast<__u16>(I915_ENGINE_CLASS_VIDEO), ZES_ENGINE_GROUP_MEDIA_DECODE_SINGLE}, {static_cast<__u16>(I915_ENGINE_CLASS_VIDEO), ZES_ENGINE_GROUP_MEDIA_ENCODE_SINGLE}, {static_cast<__u16>(I915_ENGINE_CLASS_COPY), ZES_ENGINE_GROUP_COPY_SINGLE}, {static_cast<__u16>(PRELIM_I915_ENGINE_CLASS_COMPUTE), ZES_ENGINE_GROUP_COMPUTE_SINGLE}, {static_cast<__u16>(I915_ENGINE_CLASS_VIDEO_ENHANCE), ZES_ENGINE_GROUP_MEDIA_ENHANCEMENT_SINGLE}}; static const std::multimap engineToI915Map = { {ZES_ENGINE_GROUP_RENDER_SINGLE, static_cast<__u16>(I915_ENGINE_CLASS_RENDER)}, {ZES_ENGINE_GROUP_MEDIA_DECODE_SINGLE, static_cast<__u16>(I915_ENGINE_CLASS_VIDEO)}, {ZES_ENGINE_GROUP_MEDIA_ENCODE_SINGLE, static_cast<__u16>(I915_ENGINE_CLASS_VIDEO)}, {ZES_ENGINE_GROUP_COPY_SINGLE, static_cast<__u16>(I915_ENGINE_CLASS_COPY)}, {ZES_ENGINE_GROUP_COMPUTE_SINGLE, static_cast<__u16>(PRELIM_I915_ENGINE_CLASS_COMPUTE)}, {ZES_ENGINE_GROUP_MEDIA_ENHANCEMENT_SINGLE, static_cast<__u16>(I915_ENGINE_CLASS_VIDEO_ENHANCE)}}; zes_engine_group_t LinuxEngineImp::getGroupFromEngineType(zes_engine_group_t type) { if (type == ZES_ENGINE_GROUP_RENDER_SINGLE) { return ZES_ENGINE_GROUP_RENDER_ALL; } if (type == ZES_ENGINE_GROUP_COMPUTE_SINGLE) { return ZES_ENGINE_GROUP_COMPUTE_ALL; } if (type == ZES_ENGINE_GROUP_COPY_SINGLE) { return ZES_ENGINE_GROUP_COPY_ALL; } if (type == ZES_ENGINE_GROUP_MEDIA_DECODE_SINGLE || type == ZES_ENGINE_GROUP_MEDIA_ENCODE_SINGLE || type == ZES_ENGINE_GROUP_MEDIA_ENHANCEMENT_SINGLE) { return ZES_ENGINE_GROUP_MEDIA_ALL; } return ZES_ENGINE_GROUP_ALL; } ze_result_t OsEngine::getNumEngineTypeAndInstances(std::set> &engineGroupInstance, OsSysman *pOsSysman) { LinuxSysmanImp *pLinuxSysmanImp = static_cast(pOsSysman); NEO::Drm *pDrm = &pLinuxSysmanImp->getDrm(); if (pDrm->sysmanQueryEngineInfo() == false) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } auto engineInfo = pDrm->getEngineInfo(); for (auto itr = engineInfo->engines.begin(); itr != engineInfo->engines.end(); ++itr) { uint32_t subDeviceId = engineInfo->getEngineTileIndex(itr->engine); auto i915ToEngineMapRange = i915ToEngineMap.equal_range(static_cast<__u16>(itr->engine.engineClass)); for (auto L0EngineEntryInMap = i915ToEngineMapRange.first; L0EngineEntryInMap != i915ToEngineMapRange.second; L0EngineEntryInMap++) { auto L0EngineType = L0EngineEntryInMap->second; engineGroupInstance.insert({L0EngineType, {static_cast(itr->engine.engineInstance), subDeviceId}}); engineGroupInstance.insert({LinuxEngineImp::getGroupFromEngineType(L0EngineType), {0u, subDeviceId}}); engineGroupInstance.insert({ZES_ENGINE_GROUP_ALL, {0u, subDeviceId}}); } } return ZE_RESULT_SUCCESS; } ze_result_t LinuxEngineImp::getActivity(zes_engine_stats_t *pStats) { if (fd < 0) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } uint64_t data[2] = {}; if (pPmuInterface->pmuRead(static_cast(fd), data, sizeof(data)) < 0) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } // In data[], First u64 is "active time", And second u64 is "timestamp". Both in nanoseconds pStats->activeTime = data[0] / microSecondsToNanoSeconds; pStats->timestamp = data[1] / microSecondsToNanoSeconds; return ZE_RESULT_SUCCESS; } ze_result_t LinuxEngineImp::getProperties(zes_engine_properties_t &properties) { properties.subdeviceId = subDeviceId; properties.onSubdevice = onSubDevice; properties.type = engineGroup; return ZE_RESULT_SUCCESS; } void LinuxEngineImp::init() { uint32_t subDeviceCount = 0; pDevice->getSubDevices(&subDeviceCount, nullptr); onSubDevice = (subDeviceCount == 0) ? 0 : 1; uint64_t config = UINT64_MAX; switch (engineGroup) { case ZES_ENGINE_GROUP_ALL: config = __PRELIM_I915_PMU_ANY_ENGINE_GROUP_BUSY(subDeviceId); break; case ZES_ENGINE_GROUP_COMPUTE_ALL: case ZES_ENGINE_GROUP_RENDER_ALL: config = __PRELIM_I915_PMU_RENDER_GROUP_BUSY(subDeviceId); break; case ZES_ENGINE_GROUP_COPY_ALL: config = __PRELIM_I915_PMU_COPY_GROUP_BUSY(subDeviceId); break; case ZES_ENGINE_GROUP_MEDIA_ALL: config = __PRELIM_I915_PMU_MEDIA_GROUP_BUSY(subDeviceId); break; default: auto i915EngineClass = engineToI915Map.find(engineGroup); config = I915_PMU_ENGINE_BUSY(i915EngineClass->second, engineInstance); break; } fd = pPmuInterface->pmuInterfaceOpen(config, -1, PERF_FORMAT_TOTAL_TIME_ENABLED); } bool LinuxEngineImp::isEngineModuleSupported() { if (fd < 0) { return false; } return true; } LinuxEngineImp::LinuxEngineImp(OsSysman *pOsSysman, zes_engine_group_t type, uint32_t engineInstance, uint32_t subDeviceId) : engineGroup(type), engineInstance(engineInstance), subDeviceId(subDeviceId) { LinuxSysmanImp *pLinuxSysmanImp = static_cast(pOsSysman); pDrm = &pLinuxSysmanImp->getDrm(); pDevice = pLinuxSysmanImp->getDeviceHandle(); pPmuInterface = pLinuxSysmanImp->getPmuInterface(); init(); } OsEngine *OsEngine::create(OsSysman *pOsSysman, zes_engine_group_t type, uint32_t engineInstance, uint32_t subDeviceId) { LinuxEngineImp *pLinuxEngineImp = new LinuxEngineImp(pOsSysman, type, engineInstance, subDeviceId); return static_cast(pLinuxEngineImp); } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/engine/os_engine.h000066400000000000000000000015071422164147700270610ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/tools/source/sysman/engine/engine.h" #include #include namespace L0 { struct OsSysman; using namespace std; class OsEngine { public: virtual ze_result_t getActivity(zes_engine_stats_t *pStats) = 0; virtual ze_result_t getProperties(zes_engine_properties_t &properties) = 0; virtual bool isEngineModuleSupported() = 0; static OsEngine *create(OsSysman *pOsSysman, zes_engine_group_t engineType, uint32_t engineInstance, uint32_t subDeviceId); static ze_result_t getNumEngineTypeAndInstances(std::set> &engineGroupInstance, OsSysman *pOsSysman); virtual ~OsEngine() = default; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/engine/windows/000077500000000000000000000000001422164147700264315ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/engine/windows/CMakeLists.txt000077500000000000000000000010531422164147700311730ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_ENGINE_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/os_engine_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/os_engine_imp.cpp ) if(WIN32) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_ENGINE_WINDOWS} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_ENGINE_WINDOWS ${L0_SRCS_TOOLS_SYSMAN_ENGINE_WINDOWS}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/engine/windows/os_engine_imp.cpp000066400000000000000000000067361422164147700317640ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "sysman/engine/windows/os_engine_imp.h" namespace L0 { ze_result_t WddmEngineImp::getActivity(zes_engine_stats_t *pStats) { uint64_t activeTime = 0; uint64_t timeStamp = 0; KmdSysman::RequestProperty request; KmdSysman::ResponseProperty response; request.commandId = KmdSysman::Command::Get; request.componentId = KmdSysman::Component::ActivityComponent; switch (this->engineGroup) { case ZES_ENGINE_GROUP_ALL: request.paramInfo = KmdSysman::ActivityDomainsType::ActitvityDomainGT; break; case ZES_ENGINE_GROUP_COMPUTE_ALL: request.paramInfo = KmdSysman::ActivityDomainsType::ActivityDomainRenderCompute; break; case ZES_ENGINE_GROUP_MEDIA_ALL: request.paramInfo = KmdSysman::ActivityDomainsType::ActivityDomainMedia; break; default: return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; break; } request.requestId = KmdSysman::Requests::Activity::CurrentActivityCounter; ze_result_t status = pKmdSysManager->requestSingle(request, response); if (status != ZE_RESULT_SUCCESS) { return status; } memcpy_s(&activeTime, sizeof(uint64_t), response.dataBuffer, sizeof(uint64_t)); memcpy_s(&timeStamp, sizeof(uint64_t), (response.dataBuffer + sizeof(uint64_t)), sizeof(uint64_t)); pStats->activeTime = activeTime; pStats->timestamp = timeStamp; return status; } ze_result_t WddmEngineImp::getProperties(zes_engine_properties_t &properties) { properties.type = engineGroup; properties.onSubdevice = false; properties.subdeviceId = 0; return ZE_RESULT_SUCCESS; } bool WddmEngineImp::isEngineModuleSupported() { return true; } WddmEngineImp::WddmEngineImp(OsSysman *pOsSysman, zes_engine_group_t engineType, uint32_t engineInstance, uint32_t subDeviceId) { WddmSysmanImp *pWddmSysmanImp = static_cast(pOsSysman); this->engineGroup = engineType; pKmdSysManager = &pWddmSysmanImp->getKmdSysManager(); } OsEngine *OsEngine::create(OsSysman *pOsSysman, zes_engine_group_t engineType, uint32_t engineInstance, uint32_t subDeviceId) { WddmEngineImp *pWddmEngineImp = new WddmEngineImp(pOsSysman, engineType, engineInstance, subDeviceId); return static_cast(pWddmEngineImp); } ze_result_t OsEngine::getNumEngineTypeAndInstances(std::set> &engineGroupInstance, OsSysman *pOsSysman) { WddmSysmanImp *pWddmSysmanImp = static_cast(pOsSysman); KmdSysManager *pKmdSysManager = &pWddmSysmanImp->getKmdSysManager(); KmdSysman::RequestProperty request; KmdSysman::ResponseProperty response; request.commandId = KmdSysman::Command::Get; request.componentId = KmdSysman::Component::ActivityComponent; request.requestId = KmdSysman::Requests::Activity::NumActivityDomains; ze_result_t status = pKmdSysManager->requestSingle(request, response); if (status != ZE_RESULT_SUCCESS) { return status; } uint32_t maxNumEnginesSupported = 0; memcpy_s(&maxNumEnginesSupported, sizeof(uint32_t), response.dataBuffer, sizeof(uint32_t)); if (maxNumEnginesSupported == 0) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } for (uint32_t i = 0; i < maxNumEnginesSupported; i++) { engineGroupInstance.insert({static_cast(i), {0, 0}}); } return status; } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/engine/windows/os_engine_imp.h000066400000000000000000000015531422164147700314210ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "sysman/engine/os_engine.h" #include "sysman/windows/os_sysman_imp.h" namespace L0 { class KmdSysManager; class WddmEngineImp : public OsEngine, NEO::NonCopyableOrMovableClass { public: ze_result_t getActivity(zes_engine_stats_t *pStats) override; ze_result_t getProperties(zes_engine_properties_t &properties) override; bool isEngineModuleSupported() override; WddmEngineImp() = default; WddmEngineImp(OsSysman *pOsSysman, zes_engine_group_t type, uint32_t engineInstance, uint32_t subDeviceId); ~WddmEngineImp() override = default; protected: KmdSysManager *pKmdSysManager = nullptr; zes_engine_group_t engineGroup = ZES_ENGINE_GROUP_ALL; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/events/000077500000000000000000000000001422164147700247765ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/events/CMakeLists.txt000066400000000000000000000012321422164147700275340ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_EVENTS ${CMAKE_CURRENT_SOURCE_DIR}/events.h ${CMAKE_CURRENT_SOURCE_DIR}/events_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/events_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/os_events.h ) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_EVENTS} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) if(UNIX) add_subdirectory(linux) else() add_subdirectory(windows) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_EVENTS ${L0_SRCS_TOOLS_SYSMAN_EVENTS}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/events/events.h000066400000000000000000000006331422164147700264550ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include namespace L0 { class Events { public: virtual ~Events(){}; virtual ze_result_t eventRegister(zes_event_type_flags_t events) = 0; virtual bool eventListen(zes_event_type_flags_t &pEvent, uint64_t timeout) = 0; virtual void init() = 0; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/events/events_imp.cpp000066400000000000000000000012271422164147700276550ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "events_imp.h" #include "shared/source/helpers/debug_helpers.h" namespace L0 { ze_result_t EventsImp::eventRegister(zes_event_type_flags_t events) { return pOsEvents->eventRegister(events); } bool EventsImp::eventListen(zes_event_type_flags_t &pEvent, uint64_t timeout) { return pOsEvents->eventListen(pEvent, timeout); } void EventsImp::init() { pOsEvents = OsEvents::create(pOsSysman); UNRECOVERABLE_IF(nullptr == pOsEvents); } EventsImp::~EventsImp() { if (nullptr != pOsEvents) { delete pOsEvents; } } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/events/events_imp.h000066400000000000000000000012731422164147700273230ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "events.h" #include "os_events.h" namespace L0 { class EventsImp : public Events, NEO::NonCopyableOrMovableClass { public: void init() override; ze_result_t eventRegister(zes_event_type_flags_t events) override; bool eventListen(zes_event_type_flags_t &pEvent, uint64_t timeout) override; OsEvents *pOsEvents = nullptr; EventsImp() = default; EventsImp(OsSysman *pOsSysman) : pOsSysman(pOsSysman){}; ~EventsImp() override; private: OsSysman *pOsSysman = nullptr; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/events/linux/000077500000000000000000000000001422164147700261355ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/events/linux/CMakeLists.txt000066400000000000000000000015001422164147700306710ustar00rootroot00000000000000# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_EVENTS_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) if(NEO_ENABLE_i915_PRELIM_DETECTION) list(APPEND L0_SRCS_TOOLS_SYSMAN_EVENTS_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/os_events_imp_prelim.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_events_imp_prelim.h ) else() list(APPEND L0_SRCS_TOOLS_SYSMAN_EVENTS_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/os_events_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_events_imp.h ) endif() if(UNIX) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_EVENTS_LINUX} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_EVENTS_LINUX ${L0_SRCS_TOOLS_SYSMAN_EVENTS_LINUX}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/events/linux/os_events_imp.cpp000066400000000000000000000114541422164147700315200ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/events/linux/os_events_imp.h" #include "sysman/events/events_imp.h" #include "sysman/linux/os_sysman_imp.h" namespace L0 { const std::string LinuxEventsImp::varFs("/var/lib/libze_intel_gpu/"); const std::string LinuxEventsImp::detachEvent("remove"); const std::string LinuxEventsImp::attachEvent("add"); bool LinuxEventsImp::isResetRequired(zes_event_type_flags_t &pEvent) { zes_device_state_t pState = {}; pLinuxSysmanImp->getSysmanDeviceImp()->deviceGetState(&pState); if (pState.reset) { pEvent |= ZES_EVENT_TYPE_FLAG_DEVICE_RESET_REQUIRED; return true; } return false; } bool LinuxEventsImp::checkDeviceDetachEvent(zes_event_type_flags_t &pEvent) { // When device detach uevent is generated, then L0 udev rules will create a file: // /var/lib/libze_intel_gpu/remove- // For , check comment in LinuxEventsImp::init() const std::string deviceDetachFile = detachEvent + "-" + pciIdPathTag; const std::string deviceDetachFileAbsolutePath = varFs + deviceDetachFile; uint32_t val = 0; auto result = pFsAccess->read(deviceDetachFileAbsolutePath, val); if (result != ZE_RESULT_SUCCESS) { return false; } if (val == 1) { pEvent |= ZES_EVENT_TYPE_FLAG_DEVICE_DETACH; return true; } return false; } bool LinuxEventsImp::checkDeviceAttachEvent(zes_event_type_flags_t &pEvent) { // When device detach uevent is generated, then L0 udev rules will create a file: // /var/lib/libze_intel_gpu/add- // For , check comment in LinuxEventsImp::init() const std::string deviceAttachFile = attachEvent + "-" + pciIdPathTag; const std::string deviceAttachFileAbsolutePath = varFs + deviceAttachFile; uint32_t val = 0; auto result = pFsAccess->read(deviceAttachFileAbsolutePath, val); if (result != ZE_RESULT_SUCCESS) { return false; } if (val == 1) { pEvent |= ZES_EVENT_TYPE_FLAG_DEVICE_ATTACH; return true; } return false; } bool LinuxEventsImp::checkIfMemHealthChanged(zes_event_type_flags_t &pEvent) { if (currentMemHealth() != memHealthAtEventRegister) { pEvent |= ZES_EVENT_TYPE_FLAG_MEM_HEALTH; return true; } return false; } bool LinuxEventsImp::eventListen(zes_event_type_flags_t &pEvent, uint64_t timeout) { if (registeredEvents & ZES_EVENT_TYPE_FLAG_DEVICE_RESET_REQUIRED) { if (isResetRequired(pEvent)) { registeredEvents &= ~(ZES_EVENT_TYPE_FLAG_DEVICE_RESET_REQUIRED); //After receiving event unregister it return true; } } if (registeredEvents & ZES_EVENT_TYPE_FLAG_DEVICE_DETACH) { if (checkDeviceDetachEvent(pEvent)) { registeredEvents &= ~(ZES_EVENT_TYPE_FLAG_DEVICE_DETACH); return true; } } if (registeredEvents & ZES_EVENT_TYPE_FLAG_DEVICE_ATTACH) { if (checkDeviceAttachEvent(pEvent)) { registeredEvents &= ~(ZES_EVENT_TYPE_FLAG_DEVICE_ATTACH); return true; } } if (registeredEvents & ZES_EVENT_TYPE_FLAG_MEM_HEALTH) { if (checkIfMemHealthChanged(pEvent)) { registeredEvents &= ~(ZES_EVENT_TYPE_FLAG_MEM_HEALTH); return true; } } return false; } ze_result_t LinuxEventsImp::eventRegister(zes_event_type_flags_t events) { if (0x7fff < events) { return ZE_RESULT_ERROR_INVALID_ENUMERATION; } registeredEvents |= events; if (registeredEvents & ZES_EVENT_TYPE_FLAG_MEM_HEALTH) { memHealthAtEventRegister = currentMemHealth(); } return ZE_RESULT_SUCCESS; } zes_mem_health_t LinuxEventsImp::currentMemHealth() { return ZES_MEM_HEALTH_UNKNOWN; } void LinuxEventsImp::getPciIdPathTag() { std::string bdfDir; ze_result_t result = pSysfsAccess->readSymLink("device", bdfDir); if (ZE_RESULT_SUCCESS != result) { return; } const auto loc = bdfDir.find_last_of('/'); auto bdf = bdfDir.substr(loc + 1); std::replace(bdf.begin(), bdf.end(), ':', '_'); std::replace(bdf.begin(), bdf.end(), '.', '_'); // ID_PATH_TAG key is received when uevent related to device add/remove is generated. // Example of ID_PATH_TAG is: // ID_PATH_TAG=pci-0000_8c_00_0 pciIdPathTag = "pci-" + bdf; } LinuxEventsImp::LinuxEventsImp(OsSysman *pOsSysman) { pLinuxSysmanImp = static_cast(pOsSysman); pSysfsAccess = &pLinuxSysmanImp->getSysfsAccess(); pFsAccess = &pLinuxSysmanImp->getFsAccess(); getPciIdPathTag(); } OsEvents *OsEvents::create(OsSysman *pOsSysman) { LinuxEventsImp *pLinuxEventsImp = new LinuxEventsImp(pOsSysman); return static_cast(pLinuxEventsImp); } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/events/linux/os_events_imp.h000066400000000000000000000025211422164147700311600ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/tools/source/sysman/events/os_events.h" #include "level_zero/tools/source/sysman/linux/os_sysman_imp.h" namespace L0 { class LinuxEventsImp : public OsEvents, NEO::NonCopyableOrMovableClass { public: bool eventListen(zes_event_type_flags_t &pEvent, uint64_t timeout) override; ze_result_t eventRegister(zes_event_type_flags_t events) override; LinuxEventsImp() = default; LinuxEventsImp(OsSysman *pOsSysman); ~LinuxEventsImp() override = default; protected: LinuxSysmanImp *pLinuxSysmanImp = nullptr; void getPciIdPathTag(); zes_mem_health_t currentMemHealth(); bool isResetRequired(zes_event_type_flags_t &pEvent); bool checkDeviceDetachEvent(zes_event_type_flags_t &pEvent); bool checkDeviceAttachEvent(zes_event_type_flags_t &pEvent); bool checkIfMemHealthChanged(zes_event_type_flags_t &pEvent); std::string pciIdPathTag; zes_mem_health_t memHealthAtEventRegister = ZES_MEM_HEALTH_UNKNOWN; private: FsAccess *pFsAccess = nullptr; SysfsAccess *pSysfsAccess = nullptr; static const std::string varFs; static const std::string detachEvent; static const std::string attachEvent; zes_event_type_flags_t registeredEvents = 0; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/events/linux/os_events_imp_prelim.cpp000066400000000000000000000212771422164147700330740ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/events/linux/os_events_imp_prelim.h" #include "level_zero/tools/source/sysman/memory/linux/os_memory_imp_prelim.h" #include "sysman/events/events_imp.h" #include "sysman/linux/os_sysman_imp.h" namespace L0 { const std::string LinuxEventsImp::deviceMemoryHealth("device_memory_health"); const std::string LinuxEventsImp::varFs("/var/lib/libze_intel_gpu/"); const std::string LinuxEventsImp::detachEvent("remove"); const std::string LinuxEventsImp::attachEvent("add"); static bool checkRasEventOccured(Ras *rasHandle) { zes_ras_config_t config = {}; zes_ras_state_t state = {}; rasHandle->rasGetConfig(&config); if (ZE_RESULT_SUCCESS == rasHandle->rasGetState(&state, 0)) { uint64_t totalCategoryThreshold = 0; for (int i = 0; i < ZES_MAX_RAS_ERROR_CATEGORY_COUNT; i++) { totalCategoryThreshold += state.category[i]; if ((config.detailedThresholds.category[i] > 0) && (state.category[i] > config.detailedThresholds.category[i])) { return true; } } if ((config.totalThreshold > 0) && (totalCategoryThreshold > config.totalThreshold)) { return true; } } return false; } bool LinuxEventsImp::checkRasEvent(zes_event_type_flags_t &pEvent) { for (auto rasHandle : pLinuxSysmanImp->getSysmanDeviceImp()->pRasHandleContext->handleList) { zes_ras_properties_t properties = {}; rasHandle->rasGetProperties(&properties); if ((registeredEvents & ZES_EVENT_TYPE_FLAG_RAS_CORRECTABLE_ERRORS) && (properties.type == ZES_RAS_ERROR_TYPE_CORRECTABLE)) { if (checkRasEventOccured(rasHandle) == true) { pEvent |= ZES_EVENT_TYPE_FLAG_RAS_CORRECTABLE_ERRORS; return true; } } if ((registeredEvents & ZES_EVENT_TYPE_FLAG_RAS_UNCORRECTABLE_ERRORS) && (properties.type == ZES_RAS_ERROR_TYPE_UNCORRECTABLE)) { if (checkRasEventOccured(rasHandle) == true) { pEvent |= ZES_EVENT_TYPE_FLAG_RAS_UNCORRECTABLE_ERRORS; return true; } } } return false; } bool LinuxEventsImp::isResetRequired(zes_event_type_flags_t &pEvent) { zes_device_state_t pState = {}; pLinuxSysmanImp->getSysmanDeviceImp()->deviceGetState(&pState); if (pState.reset) { pEvent |= ZES_EVENT_TYPE_FLAG_DEVICE_RESET_REQUIRED; return true; } return false; } bool LinuxEventsImp::checkDeviceDetachEvent(zes_event_type_flags_t &pEvent) { // When device detach uevent is generated, then L0 udev rules will create a file: // /var/lib/libze_intel_gpu/remove- // For , check comment in LinuxEventsImp::init() const std::string deviceDetachFile = detachEvent + "-" + pciIdPathTag; const std::string deviceDetachFileAbsolutePath = varFs + deviceDetachFile; uint32_t val = 0; auto result = pFsAccess->read(deviceDetachFileAbsolutePath, val); if (result != ZE_RESULT_SUCCESS) { return false; } if (val == 1) { pEvent |= ZES_EVENT_TYPE_FLAG_DEVICE_DETACH; return true; } return false; } bool LinuxEventsImp::checkDeviceAttachEvent(zes_event_type_flags_t &pEvent) { // When device detach uevent is generated, then L0 udev rules will create a file: // /var/lib/libze_intel_gpu/add- // For , check comment in LinuxEventsImp::init() const std::string deviceAttachFile = attachEvent + "-" + pciIdPathTag; const std::string deviceAttachFileAbsolutePath = varFs + deviceAttachFile; uint32_t val = 0; auto result = pFsAccess->read(deviceAttachFileAbsolutePath, val); if (result != ZE_RESULT_SUCCESS) { return false; } if (val == 1) { pEvent |= ZES_EVENT_TYPE_FLAG_DEVICE_ATTACH; return true; } return false; } bool LinuxEventsImp::checkIfMemHealthChanged(zes_event_type_flags_t &pEvent) { if (currentMemHealth() != memHealthAtEventRegister) { pEvent |= ZES_EVENT_TYPE_FLAG_MEM_HEALTH; return true; } return false; } bool LinuxEventsImp::checkIfFabricPortStatusChanged(zes_event_type_flags_t &pEvent) { uint32_t currentFabricEventStatusVal = 0; if (currentFabricEventStatus(currentFabricEventStatusVal) != ZE_RESULT_SUCCESS) { return false; } if (currentFabricEventStatusVal != fabricEventTrackAtRegister) { pEvent |= ZES_EVENT_TYPE_FLAG_FABRIC_PORT_HEALTH; return true; } return false; } bool LinuxEventsImp::eventListen(zes_event_type_flags_t &pEvent, uint64_t timeout) { if (registeredEvents & ZES_EVENT_TYPE_FLAG_DEVICE_RESET_REQUIRED) { if (isResetRequired(pEvent)) { registeredEvents &= ~(ZES_EVENT_TYPE_FLAG_DEVICE_RESET_REQUIRED); //After receiving event unregister it return true; } } if (registeredEvents & ZES_EVENT_TYPE_FLAG_DEVICE_DETACH) { if (checkDeviceDetachEvent(pEvent)) { registeredEvents &= ~(ZES_EVENT_TYPE_FLAG_DEVICE_DETACH); return true; } } if (registeredEvents & ZES_EVENT_TYPE_FLAG_DEVICE_ATTACH) { if (checkDeviceAttachEvent(pEvent)) { registeredEvents &= ~(ZES_EVENT_TYPE_FLAG_DEVICE_ATTACH); return true; } } if (registeredEvents & ZES_EVENT_TYPE_FLAG_MEM_HEALTH) { if (checkIfMemHealthChanged(pEvent)) { registeredEvents &= ~(ZES_EVENT_TYPE_FLAG_MEM_HEALTH); return true; } } if ((registeredEvents & ZES_EVENT_TYPE_FLAG_RAS_CORRECTABLE_ERRORS) || (registeredEvents & ZES_EVENT_TYPE_FLAG_RAS_UNCORRECTABLE_ERRORS)) { if (checkRasEvent(pEvent)) { if (pEvent & ZES_EVENT_TYPE_FLAG_RAS_CORRECTABLE_ERRORS) { registeredEvents &= ~(ZES_EVENT_TYPE_FLAG_RAS_CORRECTABLE_ERRORS); } else { registeredEvents &= ~(ZES_EVENT_TYPE_FLAG_RAS_UNCORRECTABLE_ERRORS); } return true; } } if (registeredEvents & ZES_EVENT_TYPE_FLAG_FABRIC_PORT_HEALTH) { if (checkIfFabricPortStatusChanged(pEvent)) { registeredEvents &= ~(ZES_EVENT_TYPE_FLAG_FABRIC_PORT_HEALTH); return true; } } return false; } ze_result_t LinuxEventsImp::eventRegister(zes_event_type_flags_t events) { if (0x7fff < events) { return ZE_RESULT_ERROR_INVALID_ENUMERATION; } registeredEvents |= events; if (registeredEvents & ZES_EVENT_TYPE_FLAG_MEM_HEALTH) { memHealthAtEventRegister = currentMemHealth(); } if (registeredEvents & ZES_EVENT_TYPE_FLAG_FABRIC_PORT_HEALTH) { currentFabricEventStatus(fabricEventTrackAtRegister); } return ZE_RESULT_SUCCESS; } ze_result_t LinuxEventsImp::currentFabricEventStatus(uint32_t &val) { // When Fabric port status change uevent is generated, then L0 udev rules will create a file: // /var/lib/libze_intel_gpu/fabric- // For , check comment in LinuxEventsImp::init() const std::string fabric = "fabric"; const std::string fabricEventFile = fabric + "-" + pciIdPathTag; const std::string fabricEventFileAbsolutePath = varFs + fabricEventFile; return pFsAccess->read(fabricEventFileAbsolutePath, val); } zes_mem_health_t LinuxEventsImp::currentMemHealth() { std::string memHealth; ze_result_t result = pSysfsAccess->read(deviceMemoryHealth, memHealth); if (ZE_RESULT_SUCCESS != result) { return ZES_MEM_HEALTH_UNKNOWN; } auto health = i915ToL0MemHealth.find(memHealth); if (health != i915ToL0MemHealth.end()) { return i915ToL0MemHealth.at(memHealth); } return ZES_MEM_HEALTH_UNKNOWN; } void LinuxEventsImp::getPciIdPathTag() { std::string bdfDir; ze_result_t result = pSysfsAccess->readSymLink("device", bdfDir); if (ZE_RESULT_SUCCESS != result) { return; } const auto loc = bdfDir.find_last_of('/'); auto bdf = bdfDir.substr(loc + 1); std::replace(bdf.begin(), bdf.end(), ':', '_'); std::replace(bdf.begin(), bdf.end(), '.', '_'); // ID_PATH_TAG key is received when uevent related to device add/remove is generated. // Example of ID_PATH_TAG is: // ID_PATH_TAG=pci-0000_8c_00_0 pciIdPathTag = "pci-" + bdf; } LinuxEventsImp::LinuxEventsImp(OsSysman *pOsSysman) { pLinuxSysmanImp = static_cast(pOsSysman); pSysfsAccess = &pLinuxSysmanImp->getSysfsAccess(); pFsAccess = &pLinuxSysmanImp->getFsAccess(); getPciIdPathTag(); } OsEvents *OsEvents::create(OsSysman *pOsSysman) { LinuxEventsImp *pLinuxEventsImp = new LinuxEventsImp(pOsSysman); return static_cast(pLinuxEventsImp); } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/events/linux/os_events_imp_prelim.h000066400000000000000000000031441422164147700325320ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/tools/source/sysman/events/os_events.h" #include "level_zero/tools/source/sysman/linux/os_sysman_imp.h" namespace L0 { class LinuxEventsImp : public OsEvents, NEO::NonCopyableOrMovableClass { public: bool eventListen(zes_event_type_flags_t &pEvent, uint64_t timeout) override; ze_result_t eventRegister(zes_event_type_flags_t events) override; LinuxEventsImp() = default; LinuxEventsImp(OsSysman *pOsSysman); ~LinuxEventsImp() override = default; protected: LinuxSysmanImp *pLinuxSysmanImp = nullptr; void getPciIdPathTag(); zes_mem_health_t currentMemHealth(); ze_result_t currentFabricEventStatus(uint32_t &val); bool isResetRequired(zes_event_type_flags_t &pEvent); bool checkDeviceDetachEvent(zes_event_type_flags_t &pEvent); bool checkDeviceAttachEvent(zes_event_type_flags_t &pEvent); bool checkIfMemHealthChanged(zes_event_type_flags_t &pEvent); bool checkIfFabricPortStatusChanged(zes_event_type_flags_t &pEvent); bool checkRasEvent(zes_event_type_flags_t &pEvent); std::string pciIdPathTag; zes_mem_health_t memHealthAtEventRegister = ZES_MEM_HEALTH_UNKNOWN; uint32_t fabricEventTrackAtRegister = 0; private: FsAccess *pFsAccess = nullptr; SysfsAccess *pSysfsAccess = nullptr; static const std::string varFs; static const std::string detachEvent; static const std::string attachEvent; static const std::string deviceMemoryHealth; zes_event_type_flags_t registeredEvents = 0; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/events/os_events.h000066400000000000000000000007541422164147700271620ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/tools/source/sysman/os_sysman.h" #include namespace L0 { class OsEvents { public: static OsEvents *create(OsSysman *pOsSysman); virtual bool eventListen(zes_event_type_flags_t &pEvent, uint64_t timeout) = 0; virtual ze_result_t eventRegister(zes_event_type_flags_t events) = 0; virtual ~OsEvents() {} }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/events/windows/000077500000000000000000000000001422164147700264705ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/events/windows/CMakeLists.txt000066400000000000000000000010531422164147700312270ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_EVENTS_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/os_events_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/os_events_imp.cpp ) if(WIN32) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_EVENTS_WINDOWS} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_EVENTS_WINDOWS ${L0_SRCS_TOOLS_SYSMAN_EVENTS_WINDOWS}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/events/windows/os_events_imp.cpp000066400000000000000000000117741422164147700320600ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/events/windows/os_events_imp.h" #include "level_zero/tools/source/sysman/windows/os_sysman_imp.h" namespace L0 { void WddmEventsImp::registerEvents(zes_event_type_flags_t eventId, uint32_t requestId) { ze_result_t status = ZE_RESULT_SUCCESS; EventHandler event; KmdSysman::RequestProperty request; KmdSysman::ResponseProperty response; request.requestId = requestId; request.commandId = KmdSysman::Command::RegisterEvent; request.componentId = KmdSysman::Component::InterfaceProperties; request.dataSize = sizeof(HANDLE); event.requestId = requestId; event.id = eventId; event.windowsHandle = CreateEvent(NULL, FALSE, FALSE, NULL); memcpy_s(request.dataBuffer, sizeof(HANDLE), &event.windowsHandle, sizeof(HANDLE)); status = pKmdSysManager->requestSingle(request, response); if (status != ZE_RESULT_SUCCESS) { CloseHandle(event.windowsHandle); return; } eventList.push_back(event); } void WddmEventsImp::unregisterEvents() { ze_result_t status = ZE_RESULT_SUCCESS; EventHandler event; KmdSysman::RequestProperty request; KmdSysman::ResponseProperty response; SetEvent(exitHandle); request.commandId = KmdSysman::Command::UnregisterEvent; request.componentId = KmdSysman::Component::InterfaceProperties; request.dataSize = sizeof(HANDLE); for (uint32_t i = 0; i < eventList.size(); i++) { request.requestId = eventList[i].requestId; event.windowsHandle = eventList[i].windowsHandle; memcpy_s(request.dataBuffer, sizeof(HANDLE), &event.windowsHandle, sizeof(HANDLE)); status = pKmdSysManager->requestSingle(request, response); if (status == ZE_RESULT_SUCCESS) { CloseHandle(event.windowsHandle); } } eventList.clear(); } ze_result_t WddmEventsImp::eventRegister(zes_event_type_flags_t events) { unregisterEvents(); if (events & ZES_EVENT_TYPE_FLAG_ENERGY_THRESHOLD_CROSSED) { registerEvents(ZES_EVENT_TYPE_FLAG_ENERGY_THRESHOLD_CROSSED, KmdSysman::Events::EnergyThresholdCrossed); } if (events & ZES_EVENT_TYPE_FLAG_DEVICE_SLEEP_STATE_ENTER) { registerEvents(ZES_EVENT_TYPE_FLAG_DEVICE_SLEEP_STATE_ENTER, KmdSysman::Events::EnterD3); } if (events & ZES_EVENT_TYPE_FLAG_DEVICE_SLEEP_STATE_EXIT) { registerEvents(ZES_EVENT_TYPE_FLAG_DEVICE_SLEEP_STATE_EXIT, KmdSysman::Events::EnterD0); } if (events & ZES_EVENT_TYPE_FLAG_DEVICE_DETACH) { registerEvents(ZES_EVENT_TYPE_FLAG_DEVICE_DETACH, KmdSysman::Events::EnterTDR); } if (events & ZES_EVENT_TYPE_FLAG_DEVICE_ATTACH) { registerEvents(ZES_EVENT_TYPE_FLAG_DEVICE_ATTACH, KmdSysman::Events::ExitTDR); } ResetEvent(exitHandle); return (eventList.size() == 0) ? ZE_RESULT_ERROR_UNSUPPORTED_FEATURE : ZE_RESULT_SUCCESS; } bool WddmEventsImp::eventListen(zes_event_type_flags_t &pEvent, uint64_t timeout) { HANDLE events[MAXIMUM_WAIT_OBJECTS]; pEvent = 0; // Note: whatever happens on this function, it should return true. If that's not the case, the upper loop in sysman.cpp will // cause an infinite loop for the case of "Infinite timeout". This may work on Linux since the implementation is poll based, // windows uses WaitForMultipleObjects, which is a blocking call. // no events no listen. Less than MAXIMUM_WAIT_OBJECTS - 2 to left space for the exit handle. if (eventList.size() == 0 || (eventList.size() >= (MAXIMUM_WAIT_OBJECTS - 2))) { pEvent = ZES_EVENT_TYPE_FLAG_FORCE_UINT32; return true; } // set every handle from pos 1 onwards... for (uint32_t i = 0; i < eventList.size(); i++) { events[i] = eventList[i].windowsHandle; } events[eventList.size()] = exitHandle; // Setting the last handle for the exit handle, then the exit handle is signaled, it breaks from the wait. uint32_t signaledEvent = WaitForMultipleObjects(static_cast(eventList.size() + 1), events, FALSE, static_cast(timeout)); ResetEvent(exitHandle); // Was a timeout, exit event loop. if (signaledEvent == WAIT_TIMEOUT) { return true; } // Was the exit event and exit event loop. if (signaledEvent == eventList.size()) { pEvent = ZES_EVENT_TYPE_FLAG_FORCE_UINT32; } else { pEvent = eventList[signaledEvent].id; } // Whatever reason exit the loop, WaitForMultipleObjects exited, exit from the loop must follow. return true; } WddmEventsImp::WddmEventsImp(OsSysman *pOsSysman) { WddmSysmanImp *pWddmSysmanImp = static_cast(pOsSysman); pKmdSysManager = &pWddmSysmanImp->getKmdSysManager(); exitHandle = CreateEvent(NULL, FALSE, FALSE, NULL); ResetEvent(exitHandle); } OsEvents *OsEvents::create(OsSysman *pOsSysman) { WddmEventsImp *pWddmEventsImp = new WddmEventsImp(pOsSysman); return static_cast(pWddmEventsImp); } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/events/windows/os_events_imp.h000066400000000000000000000023451422164147700315170ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "level_zero/tools/source/sysman/events/os_events.h" #include "level_zero/tools/source/sysman/windows/os_sysman_imp.h" #include "sysman/windows/os_sysman_imp.h" namespace L0 { class KmdSysManager; struct EventHandler { HANDLE windowsHandle; zes_event_type_flags_t id; uint32_t requestId; }; class WddmEventsImp : public OsEvents, NEO::NonCopyableOrMovableClass { public: bool eventListen(zes_event_type_flags_t &pEvent, uint64_t timeout) override; ze_result_t eventRegister(zes_event_type_flags_t events) override; WddmEventsImp(OsSysman *pOsSysman); ~WddmEventsImp() { CloseHandle(exitHandle); } // Don't allow copies of the WddmEventsImp object WddmEventsImp(const WddmEventsImp &obj) = delete; WddmEventsImp &operator=(const WddmEventsImp &obj) = delete; private: void registerEvents(zes_event_type_flags_t eventId, uint32_t requestId); void unregisterEvents(); HANDLE exitHandle; protected: KmdSysManager *pKmdSysManager = nullptr; std::vector eventList; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/fabric_port/000077500000000000000000000000001422164147700257645ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/fabric_port/CMakeLists.txt000066400000000000000000000012651422164147700305300ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_FABRICPORT ${CMAKE_CURRENT_SOURCE_DIR}/fabric_port.cpp ${CMAKE_CURRENT_SOURCE_DIR}/fabric_port.h ${CMAKE_CURRENT_SOURCE_DIR}/fabric_port_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/fabric_port_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/os_fabric_port.h ) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_FABRICPORT} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) add_subdirectories() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_FABRICPORT ${L0_SRCS_TOOLS_SYSMAN_FABRICPORT}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/fabric_port/fabric_port.cpp000066400000000000000000000033361422164147700307670ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/fabric_port/fabric_port.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/debug_helpers.h" #include "level_zero/tools/source/sysman/fabric_port/fabric_port_imp.h" namespace L0 { FabricPortHandleContext::FabricPortHandleContext(OsSysman *pOsSysman) { pFabricDevice = new FabricDeviceImp(pOsSysman); UNRECOVERABLE_IF(nullptr == pFabricDevice); handleList.clear(); } FabricPortHandleContext::~FabricPortHandleContext() { UNRECOVERABLE_IF(nullptr == pFabricDevice); for (FabricPort *pFabricPort : handleList) { delete pFabricPort; } handleList.clear(); delete pFabricDevice; pFabricDevice = nullptr; } ze_result_t FabricPortHandleContext::init() { UNRECOVERABLE_IF(nullptr == pFabricDevice); uint32_t numPorts = pFabricDevice->getNumPorts(); for (uint32_t portNum = 0; portNum < numPorts; portNum++) { FabricPort *pFabricPort = new FabricPortImp(pFabricDevice, portNum); UNRECOVERABLE_IF(nullptr == pFabricPort); handleList.push_back(pFabricPort); } return ZE_RESULT_SUCCESS; } ze_result_t FabricPortHandleContext::fabricPortGet(uint32_t *pCount, zes_fabric_port_handle_t *phPort) { uint32_t handleListSize = static_cast(handleList.size()); uint32_t numToCopy = std::min(*pCount, handleListSize); if (0 == *pCount || *pCount > handleListSize) { *pCount = handleListSize; } if (nullptr != phPort) { for (uint32_t i = 0; i < numToCopy; i++) { phPort[i] = handleList[i]->toZesHandle(); } } return ZE_RESULT_SUCCESS; } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/fabric_port/fabric_port.h000066400000000000000000000032711422164147700304320ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include #include struct _zes_fabric_port_handle_t { virtual ~_zes_fabric_port_handle_t() = default; }; namespace L0 { struct OsSysman; class OsFabricDevice; class FabricDevice { public: virtual ~FabricDevice() = default; virtual OsFabricDevice *getOsFabricDevice() = 0; virtual uint32_t getNumPorts() = 0; }; class FabricPort : _zes_fabric_port_handle_t { public: virtual ~FabricPort() = default; virtual ze_result_t fabricPortGetProperties(zes_fabric_port_properties_t *pProperties) = 0; virtual ze_result_t fabricPortGetLinkType(zes_fabric_link_type_t *pLinkType) = 0; virtual ze_result_t fabricPortGetConfig(zes_fabric_port_config_t *pConfig) = 0; virtual ze_result_t fabricPortSetConfig(const zes_fabric_port_config_t *pConfig) = 0; virtual ze_result_t fabricPortGetState(zes_fabric_port_state_t *pState) = 0; virtual ze_result_t fabricPortGetThroughput(zes_fabric_port_throughput_t *pThroughput) = 0; inline zes_fabric_port_handle_t toZesHandle() { return this; } static FabricPort *fromHandle(zes_fabric_port_handle_t handle) { return static_cast(handle); } }; struct FabricPortHandleContext : NEO::NonCopyableOrMovableClass { FabricPortHandleContext(OsSysman *pOsSysman); ~FabricPortHandleContext(); ze_result_t init(); ze_result_t fabricPortGet(uint32_t *pCount, zes_fabric_port_handle_t *phPort); FabricDevice *pFabricDevice = nullptr; std::vector handleList = {}; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/fabric_port/fabric_port_imp.cpp000066400000000000000000000041661422164147700316360ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/fabric_port/fabric_port_imp.h" #include "shared/source/helpers/debug_helpers.h" #include namespace L0 { uint32_t FabricDeviceImp::getNumPorts() { UNRECOVERABLE_IF(nullptr == pOsFabricDevice); return pOsFabricDevice->getNumPorts(); } FabricDeviceImp::FabricDeviceImp(OsSysman *pOsSysman) { pOsFabricDevice = OsFabricDevice::create(pOsSysman); UNRECOVERABLE_IF(nullptr == pOsFabricDevice); } FabricDeviceImp::~FabricDeviceImp() { delete pOsFabricDevice; pOsFabricDevice = nullptr; } void fabricPortGetTimestamp(uint64_t ×tamp) { std::chrono::time_point ts = std::chrono::steady_clock::now(); timestamp = std::chrono::duration_cast(ts.time_since_epoch()).count(); } ze_result_t FabricPortImp::fabricPortGetProperties(zes_fabric_port_properties_t *pProperties) { return pOsFabricPort->getProperties(pProperties); } ze_result_t FabricPortImp::fabricPortGetLinkType(zes_fabric_link_type_t *pLinkType) { return pOsFabricPort->getLinkType(pLinkType); } ze_result_t FabricPortImp::fabricPortGetConfig(zes_fabric_port_config_t *pConfig) { return pOsFabricPort->getConfig(pConfig); } ze_result_t FabricPortImp::fabricPortSetConfig(const zes_fabric_port_config_t *pConfig) { return pOsFabricPort->setConfig(pConfig); } ze_result_t FabricPortImp::fabricPortGetState(zes_fabric_port_state_t *pState) { return pOsFabricPort->getState(pState); } ze_result_t FabricPortImp::fabricPortGetThroughput(zes_fabric_port_throughput_t *pThroughput) { fabricPortGetTimestamp(pThroughput->timestamp); return pOsFabricPort->getThroughput(pThroughput); } void FabricPortImp::init() { } FabricPortImp::FabricPortImp(FabricDevice *pFabricDevice, uint32_t portNum) { pOsFabricPort = OsFabricPort::create(pFabricDevice->getOsFabricDevice(), portNum); UNRECOVERABLE_IF(nullptr == pOsFabricPort); init(); } FabricPortImp::~FabricPortImp() { delete pOsFabricPort; pOsFabricPort = nullptr; } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/fabric_port/fabric_port_imp.h000066400000000000000000000030121422164147700312700ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "level_zero/tools/source/sysman/fabric_port/fabric_port.h" #include "level_zero/tools/source/sysman/fabric_port/os_fabric_port.h" #include namespace L0 { class FabricDeviceImp : public FabricDevice, NEO::NonCopyableOrMovableClass { public: FabricDeviceImp() = delete; FabricDeviceImp(OsSysman *pOsSysman); ~FabricDeviceImp() override; uint32_t getNumPorts() override; OsFabricDevice *getOsFabricDevice() override { return pOsFabricDevice; } protected: OsFabricDevice *pOsFabricDevice = nullptr; }; class FabricPortImp : public FabricPort, NEO::NonCopyableOrMovableClass { public: ze_result_t fabricPortGetProperties(zes_fabric_port_properties_t *pProperties) override; ze_result_t fabricPortGetLinkType(zes_fabric_link_type_t *pLinkType) override; ze_result_t fabricPortGetConfig(zes_fabric_port_config_t *pConfig) override; ze_result_t fabricPortSetConfig(const zes_fabric_port_config_t *pConfig) override; ze_result_t fabricPortGetState(zes_fabric_port_state_t *pState) override; ze_result_t fabricPortGetThroughput(zes_fabric_port_throughput_t *pThroughput) override; FabricPortImp() = delete; FabricPortImp(FabricDevice *pFabricDevice, uint32_t portNum); ~FabricPortImp() override; protected: void init(); OsFabricPort *pOsFabricPort = nullptr; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/fabric_port/linux/000077500000000000000000000000001422164147700271235ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/fabric_port/linux/CMakeLists.txt000066400000000000000000000027541422164147700316730ustar00rootroot00000000000000# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_FABRICPORT_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) if(NEO_ENABLE_i915_PRELIM_DETECTION) list(APPEND L0_SRCS_TOOLS_SYSMAN_FABRICPORT_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/os_fabric_port_imp_prelim.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_fabric_port_imp_prelim.h ) if(LIBGENL_FOUND) set(L0_SRCS_TOOLS_SYSMAN_FABRICPORT_LINUX_ACCESS ${CMAKE_CURRENT_SOURCE_DIR}/iaf_nl_api.cpp ${CMAKE_CURRENT_SOURCE_DIR}/iaf_nl_api.h ${CMAKE_CURRENT_SOURCE_DIR}/fabric_device_access.h ${CMAKE_CURRENT_SOURCE_DIR}/fabric_device_access_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/fabric_device_access_imp.h ) else() set(L0_SRCS_TOOLS_SYSMAN_FABRICPORT_LINUX_ACCESS ${CMAKE_CURRENT_SOURCE_DIR}/fabric_device_access_stub.cpp ${CMAKE_CURRENT_SOURCE_DIR}/fabric_device_access_stub.h ) endif() else() list(APPEND L0_SRCS_TOOLS_SYSMAN_FABRICPORT_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/os_fabric_port_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_fabric_port_imp.h ) endif() if(UNIX) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_FABRICPORT_LINUX} ${L0_SRCS_TOOLS_SYSMAN_FABRICPORT_LINUX_ACCESS} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_FABRICPORT_LINUX ${L0_SRCS_TOOLS_SYSMAN_FABRICPORT_LINUX}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/fabric_port/linux/fabric_device_access.h000066400000000000000000000034141422164147700333640ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "level_zero/zes_api.h" #include "sysman/linux/fs_access.h" #include namespace L0 { struct OsSysman; class FabricDeviceAccess : NEO::NonCopyableOrMovableClass { public: virtual ze_result_t getState(const zes_fabric_port_id_t portId, zes_fabric_port_state_t &state) = 0; virtual ze_result_t getThroughput(const zes_fabric_port_id_t portId, zes_fabric_port_throughput_t &througput) = 0; virtual ze_result_t getPortEnabledState(const zes_fabric_port_id_t portId, bool &enabled) = 0; virtual ze_result_t getPortBeaconState(const zes_fabric_port_id_t portId, bool &enabled) = 0; virtual ze_result_t enablePortBeaconing(const zes_fabric_port_id_t portId) = 0; virtual ze_result_t disablePortBeaconing(const zes_fabric_port_id_t portId) = 0; virtual ze_result_t enable(const zes_fabric_port_id_t portId) = 0; virtual ze_result_t disable(const zes_fabric_port_id_t portId) = 0; virtual ze_result_t enableUsage(const zes_fabric_port_id_t portId) = 0; virtual ze_result_t disableUsage(const zes_fabric_port_id_t portId) = 0; virtual ze_result_t forceSweep() = 0; virtual ze_result_t routingQuery(uint32_t &start, uint32_t &end) = 0; virtual ze_result_t getPorts(std::vector &ports) = 0; virtual void getProperties(const zes_fabric_port_id_t portId, std::string &model, bool &onSubdevice, uint32_t &subdeviceId, zes_fabric_port_speed_t &maxRxSpeed, zes_fabric_port_speed_t &maxTxSpeed) = 0; virtual ~FabricDeviceAccess() = default; static FabricDeviceAccess *create(OsSysman *pOsSysman); }; } // namespace L0 fabric_device_access_imp.cpp000066400000000000000000000212401422164147700345020ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/fabric_port/linux/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "fabric_device_access_imp.h" #include "level_zero/tools/source/sysman/linux/os_sysman_imp.h" #include namespace L0 { const std::string iafPath = "device/"; const std::string iafDirectory = "iaf."; const std::string fabricIdFile = "/iaf_fabric_id"; ze_result_t FabricDeviceAccessNl::getState(const zes_fabric_port_id_t portId, zes_fabric_port_state_t &state) { ze_result_t result = pIafNlApi->fPortStatusQuery(portId, state); if (ZE_RESULT_SUCCESS != result) { return result; } uint64_t guid; uint8_t portNumber; zes_fabric_port_speed_t maxRxSpeed; zes_fabric_port_speed_t maxTxSpeed; result = pIafNlApi->fportProperties(portId, guid, portNumber, maxRxSpeed, maxTxSpeed, state.rxSpeed, state.txSpeed); if (ZE_RESULT_SUCCESS != result) { return result; } switch (state.status) { case ZES_FABRIC_PORT_STATUS_HEALTHY: case ZES_FABRIC_PORT_STATUS_DEGRADED: case ZES_FABRIC_PORT_STATUS_FAILED: { auto it = guidMap.find(guid); if (guidMap.end() == it) { populateGuidMap(); it = guidMap.find(guid); } if (guidMap.end() != it) { state.remotePortId = it->second; state.remotePortId.portNumber = portNumber; } } break; default: break; } return result; } ze_result_t FabricDeviceAccessNl::getThroughput(const zes_fabric_port_id_t portId, zes_fabric_port_throughput_t &througput) { return pIafNlApi->getThroughput(portId, througput); } ze_result_t FabricDeviceAccessNl::getPortEnabledState(const zes_fabric_port_id_t portId, bool &enabled) { return pIafNlApi->portStateQuery(portId, enabled); } ze_result_t FabricDeviceAccessNl::getPortBeaconState(const zes_fabric_port_id_t portId, bool &enabled) { return pIafNlApi->portBeaconStateQuery(portId, enabled); } ze_result_t FabricDeviceAccessNl::enablePortBeaconing(const zes_fabric_port_id_t portId) { return pIafNlApi->portBeaconEnable(portId); } ze_result_t FabricDeviceAccessNl::disablePortBeaconing(const zes_fabric_port_id_t portId) { return pIafNlApi->portBeaconDisable(portId); } ze_result_t FabricDeviceAccessNl::enable(const zes_fabric_port_id_t portId) { return pIafNlApi->portEnable(portId); } ze_result_t FabricDeviceAccessNl::disable(const zes_fabric_port_id_t portId) { return pIafNlApi->portDisable(portId); } ze_result_t FabricDeviceAccessNl::enableUsage(const zes_fabric_port_id_t portId) { return pIafNlApi->portUsageEnable(portId); } ze_result_t FabricDeviceAccessNl::disableUsage(const zes_fabric_port_id_t portId) { return pIafNlApi->portUsageDisable(portId); } ze_result_t FabricDeviceAccessNl::forceSweep() { return pIafNlApi->remRequest(); } ze_result_t FabricDeviceAccessNl::routingQuery(uint32_t &start, uint32_t &end) { return pIafNlApi->routingGenQuery(start, end); } ze_result_t FabricDeviceAccessNl::getPorts(std::vector &ports) { ze_result_t result; result = init(); if (ZE_RESULT_SUCCESS != result) { return result; } ports.clear(); for (auto port : myPorts) { ports.push_back(port.portId); } return ZE_RESULT_SUCCESS; } void FabricDeviceAccessNl::getProperties(const zes_fabric_port_id_t portId, std::string &model, bool &onSubdevice, uint32_t &subdeviceId, zes_fabric_port_speed_t &maxRxSpeed, zes_fabric_port_speed_t &maxTxSpeed) { for (auto port : myPorts) { UNRECOVERABLE_IF(portId.fabricId != port.portId.fabricId); if (portId.attachId == port.portId.attachId && portId.portNumber == port.portId.portNumber) { model = port.model; onSubdevice = port.onSubdevice; subdeviceId = port.portId.attachId; maxRxSpeed = port.maxRxSpeed; maxTxSpeed = port.maxTxSpeed; return; } } } ze_result_t FabricDeviceAccessNl::getAllFabricIds(std::vector &fabricIds) { return pIafNlApi->deviceEnum(fabricIds); } ze_result_t FabricDeviceAccessNl::getNumSubdevices(const uint32_t fabricId, uint32_t &numSubdevices) { return pIafNlApi->fabricDeviceProperties(fabricId, numSubdevices); } ze_result_t FabricDeviceAccessNl::getSubdevice(const uint32_t fabricId, const uint32_t subdevice, uint64_t &guid, std::vector &ports) { return pIafNlApi->subdevicePropertiesGet(fabricId, subdevice, guid, ports); } ze_result_t FabricDeviceAccessNl::getPortSpeeds(const zes_fabric_port_id_t portId, zes_fabric_port_speed_t &maxRxSpeed, zes_fabric_port_speed_t &maxTxSpeed) { uint64_t guid; uint8_t portNumber; zes_fabric_port_speed_t rxSpeed; zes_fabric_port_speed_t txSpeed; return pIafNlApi->fportProperties(portId, guid, portNumber, maxRxSpeed, maxTxSpeed, rxSpeed, txSpeed); } ze_result_t FabricDeviceAccessNl::initMyPorts(const uint32_t fabricId) { uint32_t numSubdevices; if (ZE_RESULT_SUCCESS != getNumSubdevices(fabricId, numSubdevices)) { return ZE_RESULT_ERROR_UNKNOWN; } for (uint32_t subdevice = 0; subdevice < numSubdevices; subdevice++) { uint64_t guid; std::vector ports; if (ZE_RESULT_SUCCESS != getSubdevice(fabricId, subdevice, guid, ports)) { myPorts.clear(); return ZE_RESULT_ERROR_UNKNOWN; } for (auto port : ports) { Port p; p.onSubdevice = numSubdevices > 1; p.portId.fabricId = fabricId; p.portId.attachId = subdevice; p.portId.portNumber = port; p.model = "XeLink"; if (ZE_RESULT_SUCCESS != getPortSpeeds(p.portId, p.maxRxSpeed, p.maxTxSpeed)) { myPorts.clear(); return ZE_RESULT_ERROR_UNKNOWN; } myPorts.push_back(p); } } return ZE_RESULT_SUCCESS; } void FabricDeviceAccessNl::populateGuidMap() { std::vector fabricIds; if (ZE_RESULT_SUCCESS != getAllFabricIds(fabricIds)) { return; } for (auto fabricId : fabricIds) { uint32_t numSubdevices = 0; if (ZE_RESULT_SUCCESS != getNumSubdevices(fabricId, numSubdevices)) { return; } for (uint32_t subdevice = 0; subdevice < numSubdevices; subdevice++) { uint64_t guid; std::vector ports; if (ZE_RESULT_SUCCESS != getSubdevice(fabricId, subdevice, guid, ports)) { return; } zes_fabric_port_id_t portId; portId.fabricId = fabricId; portId.attachId = subdevice; portId.portNumber = ports.size(); guidMap[guid] = portId; } } return; } ze_result_t FabricDeviceAccessNl::init() { if (myPorts.empty()) { std::string path; path.clear(); std::vector list; if (ZE_RESULT_SUCCESS != pLinuxSysmanImp->getSysfsAccess().scanDirEntries(iafPath, list)) { // There should be a device directory return ZE_RESULT_ERROR_UNKNOWN; } for (auto entry : list) { if (!iafDirectory.compare(entry.substr(0, iafDirectory.length()))) { // device/iaf.X/iaf_fabric_id, where X is the hardware slot number path = iafPath + entry + fabricIdFile; } } if (path.empty()) { // This device does not have a fabric return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } std::string fabricIdStr; fabricIdStr.clear(); if (ZE_RESULT_SUCCESS != pLinuxSysmanImp->getSysfsAccess().read(path, fabricIdStr)) { // This device has a fabric, but the iaf module isn't running return ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE; } unsigned long myFabricId = 0UL; size_t end = 0; myFabricId = std::stoul(fabricIdStr, &end, 16); if (fabricIdStr.length() != end || myFabricId > std::numeric_limits::max()) { return ZE_RESULT_ERROR_UNKNOWN; } if (ZE_RESULT_SUCCESS != initMyPorts(static_cast(myFabricId))) { return ZE_RESULT_ERROR_UNKNOWN; } } return ZE_RESULT_SUCCESS; } FabricDeviceAccessNl::FabricDeviceAccessNl(OsSysman *pOsSysman) { pLinuxSysmanImp = static_cast(pOsSysman); pIafNlApi = new IafNlApi; UNRECOVERABLE_IF(nullptr == pIafNlApi); } FabricDeviceAccessNl::~FabricDeviceAccessNl() { if (nullptr != pIafNlApi) { delete pIafNlApi; pIafNlApi = nullptr; } } FabricDeviceAccess *FabricDeviceAccess::create(OsSysman *pOsSysman) { return new FabricDeviceAccessNl(pOsSysman); } } // namespace L0 fabric_device_access_imp.h000066400000000000000000000050541422164147700341540ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/fabric_port/linux/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "fabric_device_access.h" #include "iaf_nl_api.h" #include "sysman/linux/os_sysman_imp.h" namespace L0 { struct Port { bool onSubdevice; zes_fabric_port_id_t portId; std::string model; zes_fabric_port_speed_t maxRxSpeed; zes_fabric_port_speed_t maxTxSpeed; }; class FabricDeviceAccessNl : public FabricDeviceAccess { public: FabricDeviceAccessNl() = delete; FabricDeviceAccessNl(OsSysman *pOsSysman); virtual ~FabricDeviceAccessNl(); ze_result_t getState(const zes_fabric_port_id_t portId, zes_fabric_port_state_t &state) override; ze_result_t getThroughput(const zes_fabric_port_id_t portId, zes_fabric_port_throughput_t &througput) override; ze_result_t getPortEnabledState(const zes_fabric_port_id_t portId, bool &enabled) override; ze_result_t getPortBeaconState(const zes_fabric_port_id_t portId, bool &enabled) override; ze_result_t enablePortBeaconing(const zes_fabric_port_id_t portId) override; ze_result_t disablePortBeaconing(const zes_fabric_port_id_t portId) override; ze_result_t enable(const zes_fabric_port_id_t portId) override; ze_result_t disable(const zes_fabric_port_id_t portId) override; ze_result_t enableUsage(const zes_fabric_port_id_t portId) override; ze_result_t disableUsage(const zes_fabric_port_id_t portId) override; ze_result_t forceSweep() override; ze_result_t routingQuery(uint32_t &start, uint32_t &end) override; ze_result_t getPorts(std::vector &ports) override; void getProperties(const zes_fabric_port_id_t portId, std::string &model, bool &onSubdevice, uint32_t &subdeviceId, zes_fabric_port_speed_t &maxRxSpeed, zes_fabric_port_speed_t &maxTxSpeed) override; private: ze_result_t init(); ze_result_t initMyPorts(const uint32_t fabricId); void populateGuidMap(); ze_result_t getAllFabricIds(std::vector &fabricIds); ze_result_t getNumSubdevices(const uint32_t fabricId, uint32_t &numSubdevices); ze_result_t getSubdevice(const uint32_t fabricId, const uint32_t subdeviceId, uint64_t &guid, std::vector &ports); ze_result_t getPortSpeeds(const zes_fabric_port_id_t portId, zes_fabric_port_speed_t &maxRxSpeed, zes_fabric_port_speed_t &maxTxSpeed); LinuxSysmanImp *pLinuxSysmanImp = nullptr; std::map guidMap = {}; protected: IafNlApi *pIafNlApi = nullptr; std::vector myPorts = {}; }; } // namespace L0 fabric_device_access_stub.cpp000066400000000000000000000047571422164147700347100ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/fabric_port/linux/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "fabric_device_access_stub.h" #include "level_zero/tools/source/sysman/linux/os_sysman_imp.h" namespace L0 { ze_result_t FabricDeviceAccessStub::getState(const zes_fabric_port_id_t portId, zes_fabric_port_state_t &state) { return ZE_RESULT_SUCCESS; } ze_result_t FabricDeviceAccessStub::getThroughput(const zes_fabric_port_id_t portId, zes_fabric_port_throughput_t &througput) { return ZE_RESULT_SUCCESS; } ze_result_t FabricDeviceAccessStub::getPortEnabledState(const zes_fabric_port_id_t portId, bool &enabled) { return ZE_RESULT_SUCCESS; } ze_result_t FabricDeviceAccessStub::getPortBeaconState(const zes_fabric_port_id_t portId, bool &enabled) { return ZE_RESULT_SUCCESS; } ze_result_t FabricDeviceAccessStub::enablePortBeaconing(const zes_fabric_port_id_t portId) { return ZE_RESULT_SUCCESS; } ze_result_t FabricDeviceAccessStub::disablePortBeaconing(const zes_fabric_port_id_t portId) { return ZE_RESULT_SUCCESS; } ze_result_t FabricDeviceAccessStub::enable(const zes_fabric_port_id_t portId) { return ZE_RESULT_SUCCESS; } ze_result_t FabricDeviceAccessStub::disable(const zes_fabric_port_id_t portId) { return ZE_RESULT_SUCCESS; } ze_result_t FabricDeviceAccessStub::enableUsage(const zes_fabric_port_id_t portId) { return ZE_RESULT_SUCCESS; } ze_result_t FabricDeviceAccessStub::disableUsage(const zes_fabric_port_id_t portId) { return ZE_RESULT_SUCCESS; } ze_result_t FabricDeviceAccessStub::forceSweep() { return ZE_RESULT_SUCCESS; } ze_result_t FabricDeviceAccessStub::routingQuery(uint32_t &start, uint32_t &end) { return ZE_RESULT_SUCCESS; } ze_result_t FabricDeviceAccessStub::getPorts(std::vector &ports) { return ZE_RESULT_SUCCESS; } void FabricDeviceAccessStub::getProperties(const zes_fabric_port_id_t portId, std::string &model, bool &onSubdevice, uint32_t &subdeviceId, zes_fabric_port_speed_t &maxRxSpeed, zes_fabric_port_speed_t &maxTxSpeed) { model = ""; onSubdevice = false; subdeviceId = 0U; maxRxSpeed.width = -1; maxRxSpeed.bitRate = -1L; maxTxSpeed.width = -1; maxTxSpeed.bitRate = -1L; } FabricDeviceAccessStub::FabricDeviceAccessStub(OsSysman *pOsSysman) { } FabricDeviceAccessStub::~FabricDeviceAccessStub() { } FabricDeviceAccess *FabricDeviceAccess::create(OsSysman *pOsSysman) { return new FabricDeviceAccessStub(pOsSysman); } } // namespace L0 fabric_device_access_stub.h000066400000000000000000000031421422164147700343400ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/fabric_port/linux/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "fabric_device_access.h" namespace L0 { class FabricDeviceAccessStub : public FabricDeviceAccess { public: FabricDeviceAccessStub() = delete; FabricDeviceAccessStub(OsSysman *pOsSysman); ~FabricDeviceAccessStub(); ze_result_t getState(const zes_fabric_port_id_t portId, zes_fabric_port_state_t &state) override; ze_result_t getThroughput(const zes_fabric_port_id_t portId, zes_fabric_port_throughput_t &througput) override; ze_result_t getPortEnabledState(const zes_fabric_port_id_t portId, bool &enabled) override; ze_result_t getPortBeaconState(const zes_fabric_port_id_t portId, bool &enabled) override; ze_result_t enablePortBeaconing(const zes_fabric_port_id_t portId) override; ze_result_t disablePortBeaconing(const zes_fabric_port_id_t portId) override; ze_result_t enable(const zes_fabric_port_id_t portId) override; ze_result_t disable(const zes_fabric_port_id_t portId) override; ze_result_t enableUsage(const zes_fabric_port_id_t portId) override; ze_result_t disableUsage(const zes_fabric_port_id_t portId) override; ze_result_t forceSweep() override; ze_result_t routingQuery(uint32_t &start, uint32_t &end) override; ze_result_t getPorts(std::vector &ports) override; void getProperties(const zes_fabric_port_id_t portId, std::string &model, bool &onSubdevice, uint32_t &subdeviceId, zes_fabric_port_speed_t &maxRxSpeed, zes_fabric_port_speed_t &maxTxSpeed) override; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/fabric_port/linux/iaf_nl_api.cpp000066400000000000000000001007151422164147700317140ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "iaf_nl_api.h" #include "level_zero/tools/source/sysman/linux/os_sysman_imp.h" #include #include #include #include namespace L0 { extern "C" { // C linkage callback routines for Netlink static int globalHandleMsg(struct nl_msg *msg, void *arg) { IafNlApi *pIafNlApi = reinterpret_cast(arg); return pIafNlApi->handleMsg(msg); } static int globalNlOperation(struct nl_cache_ops *ops, struct genl_cmd *cmd, struct genl_info *info, void *arg) { IafNlApi *pIafNlApi = reinterpret_cast(arg); return pIafNlApi->nlOperation(ops, cmd, info); } } static const struct { int id; char *name; } iafCmds[] = { {.id = IAF_CMD_OP_DEVICE_ENUM, .name = const_cast("DEVICE_ENUM")}, {.id = IAF_CMD_OP_PORT_ENABLE, .name = const_cast("PORT_ENABLE")}, {.id = IAF_CMD_OP_PORT_DISABLE, .name = const_cast("PORT_DISABLE")}, {.id = IAF_CMD_OP_PORT_STATE_QUERY, .name = const_cast("PORT_STATE_QUERY")}, {.id = IAF_CMD_OP_PORT_USAGE_ENABLE, .name = const_cast("PORT_USAGE_ENABLE")}, {.id = IAF_CMD_OP_PORT_USAGE_DISABLE, .name = const_cast("PORT_USAGE_DISABLE")}, {.id = IAF_CMD_OP_PORT_USAGE_STATE_QUERY, .name = const_cast("PORT_USAGE_STATE_QUERY")}, {.id = IAF_CMD_OP_PORT_BEACON_ENABLE, .name = const_cast("PORT_BEACON_ENABLE")}, {.id = IAF_CMD_OP_PORT_BEACON_DISABLE, .name = const_cast("PORT_BEACON_DISABLE")}, {.id = IAF_CMD_OP_PORT_BEACON_STATE_QUERY, .name = const_cast("PORT_BEACON_STATE_QUERY")}, {.id = IAF_CMD_OP_PORT_ROUTED_QUERY, .name = const_cast("PORT_ROUTED_QUERY")}, {.id = IAF_CMD_OP_REM_REQUEST, .name = const_cast("REM_REQUEST")}, {.id = IAF_CMD_OP_ROUTING_GEN_QUERY, .name = const_cast("ROUTING_GEN_QUERY")}, {.id = IAF_CMD_OP_FABRIC_DEVICE_PROPERTIES, .name = const_cast("FABRIC_DEVICE_PROPERTIES")}, {.id = IAF_CMD_OP_SUB_DEVICE_PROPERTIES_GET, .name = const_cast("SUB_DEVICE_PROPERTIES_GET")}, {.id = IAF_CMD_OP_FPORT_STATUS_QUERY, .name = const_cast("FPORT_STATUS_QUERY")}, {.id = IAF_CMD_OP_SUB_DEVICE_TRAP_COUNT_QUERY, .name = const_cast("FPORT_EVENT_COUNT_QUERY")}, {.id = IAF_CMD_OP_FPORT_PROPERTIES, .name = const_cast("FPORT_PROPERTIES")}, {.id = IAF_CMD_OP_FPORT_XMIT_RECV_COUNTS, .name = const_cast("FPORT_XMIT_RECV_COUNTS")}, {.id = 0, .name = nullptr}, }; struct PortProperties { uint64_t neighborGuid; uint8_t neighborPortNumber; int32_t enabledRxWidth; int64_t enabledRxBitrate; int32_t activeRxWidth; int64_t activeRxBitrate; int32_t enabledTxWidth; int64_t enabledTxBitrate; int32_t activeTxWidth; int64_t activeTxBitrate; }; struct Subdevice { uint64_t guid; std::vector ports; }; struct Generation { uint32_t start; uint32_t end; }; ze_result_t IafNlApi::allocMsg(const uint16_t cmdOp, struct nl_msg *&msg) { msg = pNlApi->nlmsgAlloc(); if (nullptr == msg) { return ZE_RESULT_ERROR_UNKNOWN; } if (nullptr == pNlApi->genlmsgPut(msg, NL_AUTO_PID, NL_AUTO_SEQ, familyId, 0, 0, cmdOp, 1)) { pNlApi->nlmsgFree(msg); msg = nullptr; return ZE_RESULT_ERROR_UNKNOWN; } return ZE_RESULT_SUCCESS; } ze_result_t IafNlApi::issueRequest(const uint16_t cmdOp, const uint32_t fabricId, const uint32_t attachId, const uint8_t portNumber, void *pOutput) { ze_result_t result = init(); if (ZE_RESULT_SUCCESS != result) { return result; } struct nl_msg *msg; result = allocMsg(cmdOp, msg); if (ZE_RESULT_SUCCESS == result) { pNlApi->nlaPutU32(msg, IAF_ATTR_FABRIC_ID, fabricId); pNlApi->nlaPutU8(msg, IAF_ATTR_SD_INDEX, attachId); pNlApi->nlaPutU8(msg, IAF_ATTR_FABRIC_PORT_NUMBER, portNumber); result = performTransaction(cmdOp, msg, pOutput); } cleanup(); return result; } ze_result_t IafNlApi::issueRequest(const uint16_t cmdOp, const uint32_t fabricId, const uint32_t attachId, void *pOutput) { ze_result_t result = init(); if (ZE_RESULT_SUCCESS != result) { return result; } struct nl_msg *msg; result = allocMsg(cmdOp, msg); if (ZE_RESULT_SUCCESS == result) { pNlApi->nlaPutU32(msg, IAF_ATTR_FABRIC_ID, fabricId); pNlApi->nlaPutU8(msg, IAF_ATTR_SD_INDEX, attachId); result = performTransaction(cmdOp, msg, pOutput); } cleanup(); return result; } ze_result_t IafNlApi::issueRequest(const uint16_t cmdOp, const uint32_t fabricId, void *pOutput) { ze_result_t result = init(); if (ZE_RESULT_SUCCESS != result) { return result; } struct nl_msg *msg; result = allocMsg(cmdOp, msg); if (ZE_RESULT_SUCCESS == result) { pNlApi->nlaPutU32(msg, IAF_ATTR_FABRIC_ID, fabricId); result = performTransaction(cmdOp, msg, pOutput); } cleanup(); return result; } ze_result_t IafNlApi::issueRequest(const uint16_t cmdOp, void *pOutput) { ze_result_t result = init(); if (ZE_RESULT_SUCCESS != result) { return result; } struct nl_msg *msg; result = allocMsg(cmdOp, msg); if (ZE_RESULT_SUCCESS == result) { result = performTransaction(cmdOp, msg, pOutput); } cleanup(); return result; } ze_result_t IafNlApi::performTransaction(const uint16_t cmdOp, struct nl_msg *msg, void *pOutput) { Operation *pOperation = new Operation(cmdOp, pOutput); uint64_t context = reinterpret_cast(pOperation); pNlApi->nlaPutU64(msg, IAF_ATTR_CMD_OP_CONTEXT, context); pNlApi->nlaPutU8(msg, IAF_ATTR_CMD_OP_MSG_TYPE, IAF_CMD_MSG_REQUEST); validContexts.push_back(context); if (0 > pNlApi->nlSendAuto(nlSock, msg)) { pOperation->done = true; } pNlApi->nlmsgFree(msg); while (!pOperation->done) { int res = pNlApi->nlRecvmsgsDefault(nlSock); if (0 > res) { if (-NLE_PERM == res) { pOperation->result = ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS; } pOperation->done = true; } } validContexts.remove(context); ze_result_t result = pOperation->result; delete pOperation; return result; } ze_result_t IafNlApi::handleResponse(const uint16_t cmdOp, struct genl_info *info, void *pOutput) { switch (cmdOp) { case IAF_CMD_OP_FPORT_STATUS_QUERY: return fPortStatusQueryRsp(info, pOutput); case IAF_CMD_OP_FPORT_XMIT_RECV_COUNTS: return getThroughputRsp(info, pOutput); case IAF_CMD_OP_PORT_STATE_QUERY: return portStateQueryRsp(info, pOutput); case IAF_CMD_OP_PORT_BEACON_STATE_QUERY: return portBeaconStateQueryRsp(info, pOutput); case IAF_CMD_OP_ROUTING_GEN_QUERY: return routingGenQueryRsp(info, pOutput); case IAF_CMD_OP_DEVICE_ENUM: return deviceEnumRsp(info, pOutput); case IAF_CMD_OP_FABRIC_DEVICE_PROPERTIES: return fabricDevicePropertiesRsp(info, pOutput); case IAF_CMD_OP_SUB_DEVICE_PROPERTIES_GET: return subdevicePropertiesGetRsp(info, pOutput); case IAF_CMD_OP_FPORT_PROPERTIES: return fportPropertiesRsp(info, pOutput); case IAF_CMD_OP_PORT_BEACON_ENABLE: case IAF_CMD_OP_PORT_BEACON_DISABLE: case IAF_CMD_OP_PORT_ENABLE: case IAF_CMD_OP_PORT_DISABLE: case IAF_CMD_OP_PORT_USAGE_ENABLE: case IAF_CMD_OP_PORT_USAGE_DISABLE: case IAF_CMD_OP_REM_REQUEST: return ZE_RESULT_SUCCESS; default: return ZE_RESULT_ERROR_UNKNOWN; } } ze_result_t IafNlApi::fPortStatusQueryRsp(struct genl_info *info, void *pOutput) { zes_fabric_port_state_t *pState = reinterpret_cast(pOutput); const struct nlmsghdr *nlh = info->nlh; auto nla = pNlApi->nlmsgAttrdata(nlh, GENL_HDRLEN); auto rem = pNlApi->nlmsgAttrlen(nlh, GENL_HDRLEN); for (; pNlApi->nlaOk(nla, rem); nla = pNlApi->nlaNext(nla, &(rem))) { if (pNlApi->nlaType(nla) == IAF_ATTR_FABRIC_PORT) { uint8_t healthStatus = 0; uint8_t lqi = 0; uint8_t lwd = 0; uint8_t rate = 0; uint8_t failed = 0; uint8_t isolated = 0; uint8_t flapping = 0; uint8_t linkDown = 0; uint8_t didNotTrain = 0; auto cur = (struct nlattr *)pNlApi->nlaData(nla); auto rem = pNlApi->nlaLen(nla); for (; pNlApi->nlaOk(cur, rem); cur = pNlApi->nlaNext(cur, &(rem))) { switch (pNlApi->nlaType(cur)) { case IAF_ATTR_FPORT_HEALTH: healthStatus = pNlApi->nlaGetU8(cur); break; case IAF_ATTR_FPORT_ISSUE_LQI: lqi = pNlApi->nlaGetU8(cur); break; case IAF_ATTR_FPORT_ISSUE_LWD: lwd = pNlApi->nlaGetU8(cur); break; case IAF_ATTR_FPORT_ISSUE_RATE: rate = pNlApi->nlaGetU8(cur); break; case IAF_ATTR_FPORT_ERROR_FAILED: failed = pNlApi->nlaGetU8(cur); break; case IAF_ATTR_FPORT_ERROR_ISOLATED: isolated = pNlApi->nlaGetU8(cur); break; case IAF_ATTR_FPORT_ERROR_FLAPPING: flapping = pNlApi->nlaGetU8(cur); break; case IAF_ATTR_FPORT_ERROR_LINK_DOWN: linkDown = pNlApi->nlaGetU8(cur); break; case IAF_ATTR_FPORT_ERROR_DID_NOT_TRAIN: didNotTrain = pNlApi->nlaGetU8(cur); break; default: break; } } switch (healthStatus) { case IAF_FPORT_HEALTH_OFF: pState->status = ZES_FABRIC_PORT_STATUS_DISABLED; break; case IAF_FPORT_HEALTH_FAILED: pState->status = ZES_FABRIC_PORT_STATUS_FAILED; pState->failureReasons = 0; if (1 == failed || 1 == isolated || 1 == linkDown) { pState->failureReasons |= ZES_FABRIC_PORT_FAILURE_FLAG_FAILED; } if (1 == didNotTrain) { pState->failureReasons |= ZES_FABRIC_PORT_FAILURE_FLAG_TRAINING_TIMEOUT; } if (1 == flapping) { pState->failureReasons |= ZES_FABRIC_PORT_FAILURE_FLAG_FLAPPING; } break; case IAF_FPORT_HEALTH_DEGRADED: pState->status = ZES_FABRIC_PORT_STATUS_DEGRADED; pState->qualityIssues = 0; if (1 == lqi) { pState->qualityIssues |= ZES_FABRIC_PORT_QUAL_ISSUE_FLAG_LINK_ERRORS; } if (1 == lwd || 1 == rate) { pState->qualityIssues |= ZES_FABRIC_PORT_QUAL_ISSUE_FLAG_SPEED; } break; case IAF_FPORT_HEALTH_HEALTHY: pState->status = ZES_FABRIC_PORT_STATUS_HEALTHY; break; default: pState->status = ZES_FABRIC_PORT_STATUS_UNKNOWN; break; } } } return ZE_RESULT_SUCCESS; } ze_result_t IafNlApi::getThroughputRsp(struct genl_info *info, void *pOutput) { zes_fabric_port_throughput_t *pThroughput = reinterpret_cast(pOutput); pThroughput->txCounter = 0UL; pThroughput->rxCounter = 0UL; if (info->attrs[IAF_ATTR_FPORT_TX_BYTES]) { pThroughput->txCounter = pNlApi->nlaGetU64(info->attrs[IAF_ATTR_FPORT_TX_BYTES]); } if (info->attrs[IAF_ATTR_FPORT_RX_BYTES]) { pThroughput->rxCounter = pNlApi->nlaGetU64(info->attrs[IAF_ATTR_FPORT_RX_BYTES]); } return ZE_RESULT_SUCCESS; } ze_result_t IafNlApi::portStateQueryRsp(struct genl_info *info, void *pOutput) { bool *pEnabled = reinterpret_cast(pOutput); const struct nlmsghdr *nlh = info->nlh; *pEnabled = false; auto nla = pNlApi->nlmsgAttrdata(nlh, GENL_HDRLEN); auto rem = pNlApi->nlmsgAttrlen(nlh, GENL_HDRLEN); for (; pNlApi->nlaOk(nla, rem); nla = pNlApi->nlaNext(nla, &(rem))) { if (pNlApi->nlaIsNested(nla)) { auto cur = (struct nlattr *)pNlApi->nlaData(nla); auto rem = pNlApi->nlaLen(nla); for (; pNlApi->nlaOk(cur, rem); cur = pNlApi->nlaNext(cur, &(rem))) { switch (pNlApi->nlaType(cur)) { case IAF_ATTR_ENABLED_STATE: *pEnabled = (0 != pNlApi->nlaGetU8(cur)); break; default: break; } } } } return ZE_RESULT_SUCCESS; } ze_result_t IafNlApi::portBeaconStateQueryRsp(struct genl_info *info, void *pOutput) { bool *pEnabled = reinterpret_cast(pOutput); const struct nlmsghdr *nlh = info->nlh; *pEnabled = false; auto nla = pNlApi->nlmsgAttrdata(nlh, GENL_HDRLEN); auto rem = pNlApi->nlmsgAttrlen(nlh, GENL_HDRLEN); for (; pNlApi->nlaOk(nla, rem); nla = pNlApi->nlaNext(nla, &(rem))) { if (pNlApi->nlaIsNested(nla)) { auto cur = (struct nlattr *)pNlApi->nlaData(nla); auto rem = pNlApi->nlaLen(nla); for (; pNlApi->nlaOk(cur, rem); cur = pNlApi->nlaNext(cur, &(rem))) { switch (pNlApi->nlaType(cur)) { case IAF_ATTR_ENABLED_STATE: *pEnabled = (0 != pNlApi->nlaGetU8(cur)); break; default: break; } } } } return ZE_RESULT_SUCCESS; } ze_result_t IafNlApi::routingGenQueryRsp(struct genl_info *info, void *pOutput) { Generation *pGeneration = reinterpret_cast(pOutput); if (info->attrs[IAF_ATTR_ROUTING_GEN_START]) { pGeneration->start = pNlApi->nlaGetU32(info->attrs[IAF_ATTR_ROUTING_GEN_START]); } if (info->attrs[IAF_ATTR_ROUTING_GEN_END]) { pGeneration->end = pNlApi->nlaGetU32(info->attrs[IAF_ATTR_ROUTING_GEN_END]); } return ZE_RESULT_SUCCESS; } ze_result_t IafNlApi::deviceEnumRsp(struct genl_info *info, void *pOutput) { std::vector *pFabricIds = reinterpret_cast *>(pOutput); const struct nlmsghdr *nlh = info->nlh; pFabricIds->clear(); auto nla = pNlApi->nlmsgAttrdata(nlh, GENL_HDRLEN); auto rem = pNlApi->nlmsgAttrlen(nlh, GENL_HDRLEN); for (; pNlApi->nlaOk(nla, rem); nla = pNlApi->nlaNext(nla, &(rem))) { if (pNlApi->nlaIsNested(nla)) { auto cur = (struct nlattr *)pNlApi->nlaData(nla); auto rem = pNlApi->nlaLen(nla); for (; pNlApi->nlaOk(cur, rem); cur = pNlApi->nlaNext(cur, &(rem))) { switch (pNlApi->nlaType(cur)) { case IAF_ATTR_FABRIC_ID: pFabricIds->push_back(pNlApi->nlaGetU32(cur)); break; default: break; } } } } return ZE_RESULT_SUCCESS; } ze_result_t IafNlApi::fabricDevicePropertiesRsp(struct genl_info *info, void *pOutput) { uint32_t *pNumSubdevices = reinterpret_cast(pOutput); *pNumSubdevices = 0; if (info->attrs[IAF_ATTR_SUBDEVICE_COUNT]) { *pNumSubdevices = pNlApi->nlaGetU8(info->attrs[IAF_ATTR_SUBDEVICE_COUNT]); } return ZE_RESULT_SUCCESS; } ze_result_t IafNlApi::subdevicePropertiesGetRsp(struct genl_info *info, void *pOutput) { Subdevice *pSubdevice = reinterpret_cast(pOutput); const struct nlmsghdr *nlh = info->nlh; if (info->attrs[IAF_ATTR_GUID]) { pSubdevice->guid = pNlApi->nlaGetU64(info->attrs[IAF_ATTR_GUID]); } auto nla = pNlApi->nlmsgAttrdata(nlh, GENL_HDRLEN); auto rem = pNlApi->nlmsgAttrlen(nlh, GENL_HDRLEN); for (; pNlApi->nlaOk(nla, rem); nla = pNlApi->nlaNext(nla, &(rem))) { if (pNlApi->nlaType(nla) == IAF_ATTR_FABRIC_PORT) { uint8_t port = 0; uint8_t type = IAF_FPORT_TYPE_DISCONNECTED; auto cur = (struct nlattr *)pNlApi->nlaData(nla); auto rem = pNlApi->nlaLen(nla); for (; pNlApi->nlaOk(cur, rem); cur = pNlApi->nlaNext(cur, &(rem))) { switch (pNlApi->nlaType(cur)) { case IAF_ATTR_FABRIC_PORT_NUMBER: port = pNlApi->nlaGetU8(cur); break; case IAF_ATTR_FABRIC_PORT_TYPE: type = pNlApi->nlaGetU8(cur); break; default: break; } } if (0 != port && IAF_FPORT_TYPE_DISCONNECTED != type) { pSubdevice->ports.push_back(port); } } } return ZE_RESULT_SUCCESS; } ze_result_t IafNlApi::fportPropertiesRsp(struct genl_info *info, void *pOutput) { PortProperties *pPortProperties = reinterpret_cast(pOutput); const struct nlmsghdr *nlh = info->nlh; auto nla = pNlApi->nlmsgAttrdata(nlh, GENL_HDRLEN); auto rem = pNlApi->nlmsgAttrlen(nlh, GENL_HDRLEN); for (; pNlApi->nlaOk(nla, rem); nla = pNlApi->nlaNext(nla, &(rem))) { if (pNlApi->nlaType(nla) == IAF_ATTR_FABRIC_PORT) { int32_t activeWidth = -1; int32_t degradedRxWidth = -1; int32_t degradedTxWidth = -1; int64_t activeBitrate = -1; int64_t maxBitrate = -1; auto cur = (struct nlattr *)pNlApi->nlaData(nla); auto rem = pNlApi->nlaLen(nla); for (; pNlApi->nlaOk(cur, rem); cur = pNlApi->nlaNext(cur, &(rem))) { switch (pNlApi->nlaType(cur)) { case IAF_ATTR_FPORT_NEIGHBOR_GUID: pPortProperties->neighborGuid = pNlApi->nlaGetU64(cur); break; case IAF_ATTR_FPORT_NEIGHBOR_PORT_NUMBER: pPortProperties->neighborPortNumber = pNlApi->nlaGetU8(cur); break; case IAF_ATTR_FPORT_LINK_WIDTH_ENABLED: pPortProperties->enabledRxWidth = pPortProperties->enabledTxWidth = translateWidth(pNlApi->nlaGetU8(cur)); break; case IAF_ATTR_FPORT_LINK_WIDTH_ACTIVE: activeWidth = translateWidth(pNlApi->nlaGetU8(cur)); break; case IAF_ATTR_FPORT_BPS_LINK_SPEED_ACTIVE: activeBitrate = pNlApi->nlaGetU64(cur); break; case IAF_ATTR_FPORT_LINK_WIDTH_DOWNGRADE_RX_ACTIVE: degradedRxWidth = translateWidth(pNlApi->nlaGetU8(cur)); break; case IAF_ATTR_FPORT_LINK_WIDTH_DOWNGRADE_TX_ACTIVE: degradedTxWidth = translateWidth(pNlApi->nlaGetU8(cur)); break; case IAF_ATTR_FPORT_BPS_LINK_SPEED_MAX: maxBitrate = pNlApi->nlaGetU64(cur); break; default: break; } } if (-1 != degradedRxWidth) { pPortProperties->activeRxWidth = degradedRxWidth; } else { pPortProperties->activeRxWidth = activeWidth; } if (-1 != degradedTxWidth) { pPortProperties->activeTxWidth = degradedTxWidth; } else { pPortProperties->activeTxWidth = activeWidth; } if (0 != activeBitrate) { pPortProperties->activeRxBitrate = pPortProperties->activeTxBitrate = activeBitrate; } if (0 != maxBitrate) { pPortProperties->enabledRxBitrate = pPortProperties->enabledTxBitrate = maxBitrate; } } } return ZE_RESULT_SUCCESS; } int32_t IafNlApi::translateWidth(uint8_t width) { if (width & 0x8) { return 4; } if (width & 0x4) { return 3; } if (width & 0x2) { return 2; } if (width & 0x1) { return 1; } return -1; } ze_result_t IafNlApi::fPortStatusQuery(const zes_fabric_port_id_t portId, zes_fabric_port_state_t &state) { return issueRequest(IAF_CMD_OP_FPORT_STATUS_QUERY, portId.fabricId, portId.attachId, portId.portNumber, reinterpret_cast(&state)); } ze_result_t IafNlApi::getThroughput(const zes_fabric_port_id_t portId, zes_fabric_port_throughput_t &throughput) { return issueRequest(IAF_CMD_OP_FPORT_XMIT_RECV_COUNTS, portId.fabricId, portId.attachId, portId.portNumber, reinterpret_cast(&throughput)); } ze_result_t IafNlApi::portStateQuery(const zes_fabric_port_id_t portId, bool &enabled) { return issueRequest(IAF_CMD_OP_PORT_STATE_QUERY, portId.fabricId, portId.attachId, portId.portNumber, reinterpret_cast(&enabled)); } ze_result_t IafNlApi::portBeaconStateQuery(const zes_fabric_port_id_t portId, bool &enabled) { return issueRequest(IAF_CMD_OP_PORT_BEACON_STATE_QUERY, portId.fabricId, portId.attachId, portId.portNumber, reinterpret_cast(&enabled)); } ze_result_t IafNlApi::portBeaconEnable(const zes_fabric_port_id_t portId) { return issueRequest(IAF_CMD_OP_PORT_BEACON_ENABLE, portId.fabricId, portId.attachId, portId.portNumber, nullptr); } ze_result_t IafNlApi::portBeaconDisable(const zes_fabric_port_id_t portId) { return issueRequest(IAF_CMD_OP_PORT_BEACON_DISABLE, portId.fabricId, portId.attachId, portId.portNumber, nullptr); } ze_result_t IafNlApi::portEnable(const zes_fabric_port_id_t portId) { return issueRequest(IAF_CMD_OP_PORT_ENABLE, portId.fabricId, portId.attachId, portId.portNumber, nullptr); } ze_result_t IafNlApi::portDisable(const zes_fabric_port_id_t portId) { return issueRequest(IAF_CMD_OP_PORT_DISABLE, portId.fabricId, portId.attachId, portId.portNumber, nullptr); } ze_result_t IafNlApi::portUsageEnable(const zes_fabric_port_id_t portId) { return issueRequest(IAF_CMD_OP_PORT_USAGE_ENABLE, portId.fabricId, portId.attachId, portId.portNumber, nullptr); } ze_result_t IafNlApi::portUsageDisable(const zes_fabric_port_id_t portId) { return issueRequest(IAF_CMD_OP_PORT_USAGE_DISABLE, portId.fabricId, portId.attachId, portId.portNumber, nullptr); } ze_result_t IafNlApi::remRequest() { return issueRequest(IAF_CMD_OP_REM_REQUEST, nullptr); } ze_result_t IafNlApi::routingGenQuery(uint32_t &start, uint32_t &end) { Generation gen; gen.start = gen.end = 0; ze_result_t result = issueRequest(IAF_CMD_OP_ROUTING_GEN_QUERY, reinterpret_cast(&gen)); if (ZE_RESULT_SUCCESS == result) { start = gen.start; end = gen.end; } return result; } ze_result_t IafNlApi::deviceEnum(std::vector &fabricIds) { return issueRequest(IAF_CMD_OP_DEVICE_ENUM, reinterpret_cast(&fabricIds)); } ze_result_t IafNlApi::fabricDeviceProperties(const uint32_t fabricId, uint32_t &numSubdevices) { return issueRequest(IAF_CMD_OP_FABRIC_DEVICE_PROPERTIES, fabricId, reinterpret_cast(&numSubdevices)); } ze_result_t IafNlApi::subdevicePropertiesGet(const uint32_t fabricId, const uint32_t attachId, uint64_t &guid, std::vector &ports) { Subdevice sd; sd.guid = 0; sd.ports.clear(); ze_result_t result = issueRequest(IAF_CMD_OP_SUB_DEVICE_PROPERTIES_GET, fabricId, attachId, reinterpret_cast(&sd)); if (ZE_RESULT_SUCCESS == result) { guid = sd.guid; ports = sd.ports; } return result; } ze_result_t IafNlApi::fportProperties(const zes_fabric_port_id_t portId, uint64_t &neighborGuid, uint8_t &neighborPortNumber, zes_fabric_port_speed_t &maxRxSpeed, zes_fabric_port_speed_t &maxTxSpeed, zes_fabric_port_speed_t &rxSpeed, zes_fabric_port_speed_t &txSpeed) { PortProperties portProperties; portProperties.neighborGuid = 0UL; portProperties.neighborPortNumber = 0U; portProperties.enabledRxWidth = -1; portProperties.enabledRxBitrate = -1L; portProperties.activeRxWidth = -1; portProperties.activeRxBitrate = -1L; portProperties.enabledTxWidth = -1; portProperties.enabledTxBitrate = -1L; portProperties.activeTxWidth = -1; portProperties.activeTxBitrate = -1L; ze_result_t result = issueRequest(IAF_CMD_OP_FPORT_PROPERTIES, portId.fabricId, portId.attachId, portId.portNumber, reinterpret_cast(&portProperties)); if (ZE_RESULT_SUCCESS == result) { neighborGuid = portProperties.neighborGuid; neighborPortNumber = portProperties.neighborPortNumber; maxRxSpeed.width = portProperties.enabledRxWidth; maxRxSpeed.bitRate = portProperties.enabledRxBitrate; rxSpeed.width = portProperties.activeRxWidth; rxSpeed.bitRate = portProperties.activeRxBitrate; maxTxSpeed.width = portProperties.enabledTxWidth; maxTxSpeed.bitRate = portProperties.enabledTxBitrate; txSpeed.width = portProperties.activeTxWidth; txSpeed.bitRate = portProperties.activeTxBitrate; } return result; } int IafNlApi::handleMsg(struct nl_msg *msg) { return pNlApi->genlHandleMsg(msg, reinterpret_cast(this)); } int IafNlApi::nlOperation(struct nl_cache_ops *ops, struct genl_cmd *cmd, struct genl_info *info) { if (info->attrs[IAF_ATTR_CMD_OP_CONTEXT]) { uint64_t context = pNlApi->nlaGetU64(info->attrs[IAF_ATTR_CMD_OP_CONTEXT]); bool found = false; for (auto i : validContexts) { if (context == i) { found = true; break; } } if (!found) { return NL_STOP; } Operation *pOperation = reinterpret_cast(context); if (cmd->c_id == pOperation->cmdOp && info->attrs[IAF_ATTR_CMD_OP_RESULT] && info->attrs[IAF_ATTR_CMD_OP_MSG_TYPE]) { if ((pNlApi->nlaGetU8(info->attrs[IAF_ATTR_CMD_OP_MSG_TYPE]) == IAF_CMD_MSG_RESPONSE) && (pNlApi->nlaGetU8(info->attrs[IAF_ATTR_CMD_OP_RESULT]) == IAF_CMD_RSP_SUCCESS)) { pOperation->result = handleResponse(pOperation->cmdOp, info, pOperation->pOutput); } } pOperation->done = true; } return NL_OK; } ze_result_t IafNlApi::init() { if (!pNlApi) { return ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE; } if (!initted) { if (!pNlApi->loadEntryPoints()) { pNlApi.reset(nullptr); return ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE; } initted = true; } int retval = pNlApi->genlRegisterFamily(&ops); if (-NLE_EXIST == retval) { // Temporary error return ZE_RESULT_NOT_READY; } else if (!retval) { nlSock = pNlApi->nlSocketAlloc(); if (nullptr != nlSock) { if (!pNlApi->genlConnect(nlSock)) { if (!pNlApi->genlOpsResolve(nlSock, &ops)) { familyId = pNlApi->genlCtrlResolve(nlSock, std::string(ops.o_name).c_str()); if (0 <= familyId) { if (!pNlApi->nlSocketModifyCb(nlSock, NL_CB_VALID, NL_CB_CUSTOM, globalHandleMsg, reinterpret_cast(this))) { pNlApi->nlSocketDisableSeqCheck(nlSock); return ZE_RESULT_SUCCESS; } } } } pNlApi->nlSocketFree(nlSock); nlSock = nullptr; } pNlApi->genlUnregisterFamily(&ops); } return ZE_RESULT_ERROR_UNKNOWN; } void IafNlApi::cleanup() { pNlApi->nlSocketFree(nlSock); nlSock = nullptr; pNlApi->genlUnregisterFamily(&ops); } IafNlApi::IafNlApi() { validContexts.clear(); pNlApi.reset(new NlApi); memset(policy, 0, sizeof(nla_policy) * _IAF_ATTR_COUNT); policy[IAF_ATTR_CMD_OP_MSG_TYPE].type = NLA_U8; policy[IAF_ATTR_CMD_OP_CONTEXT].type = NLA_U64; policy[IAF_ATTR_CMD_OP_RESULT].type = NLA_U8; policy[IAF_ATTR_FABRIC_ID].type = NLA_U32; policy[IAF_ATTR_SD_INDEX].type = NLA_U8; policy[IAF_ATTR_ENTRIES].type = NLA_U16; policy[IAF_ATTR_FABRIC_DEVICE].type = NLA_NESTED; policy[IAF_ATTR_DEV_NAME].type = NLA_NUL_STRING; policy[IAF_ATTR_PARENT_DEV_NAME].type = NLA_NUL_STRING; policy[IAF_ATTR_SOCKET_ID].type = NLA_U8; policy[IAF_ATTR_PCI_SLOT_NUM].type = NLA_U8; policy[IAF_ATTR_SUBDEVICE_COUNT].type = NLA_U8; policy[IAF_ATTR_VERSION].type = NLA_U8; policy[IAF_ATTR_PRODUCT_TYPE].type = NLA_U8; policy[IAF_ATTR_SUB_DEVICE].type = NLA_NESTED; policy[IAF_ATTR_GUID].type = NLA_U64; policy[IAF_ATTR_EXTENDED_PORT_COUNT].type = NLA_U8; policy[IAF_ATTR_FABRIC_PORT_COUNT].type = NLA_U8; policy[IAF_ATTR_SWITCH_LIFETIME].type = NLA_U8; policy[IAF_ATTR_ROUTING_MODE_SUPPORTED].type = NLA_U8; policy[IAF_ATTR_ROUTING_MODE_ENABLED].type = NLA_U8; policy[IAF_ATTR_EHHANCED_PORT_0_PRESENT].type = NLA_U8; policy[IAF_ATTR_FABRIC_PORT].type = NLA_NESTED; policy[IAF_ATTR_FABRIC_PORT_NUMBER].type = NLA_U8; policy[IAF_ATTR_FABRIC_PORT_TYPE].type = NLA_U8; policy[IAF_ATTR_BRIDGE_PORT_NUMBER].type = NLA_U8; policy[IAF_ATTR_ENABLED_STATE].type = NLA_U8; policy[IAF_ATTR_ROUTING_GEN_START].type = NLA_U32; policy[IAF_ATTR_ROUTING_GEN_END].type = NLA_U32; policy[IAF_ATTR_FPORT_HEALTH].type = NLA_U8; policy[IAF_ATTR_FPORT_ISSUE_LQI].type = NLA_U8; policy[IAF_ATTR_FPORT_ISSUE_LWD].type = NLA_U8; policy[IAF_ATTR_FPORT_ISSUE_RATE].type = NLA_U8; policy[IAF_ATTR_FPORT_ERROR_FAILED].type = NLA_U8; policy[IAF_ATTR_FPORT_ERROR_ISOLATED].type = NLA_U8; policy[IAF_ATTR_FPORT_ERROR_FLAPPING].type = NLA_U8; policy[IAF_ATTR_FPORT_ERROR_LINK_DOWN].type = NLA_U8; policy[IAF_ATTR_FPORT_ERROR_DID_NOT_TRAIN].type = NLA_U8; policy[IAF_ATTR_SUB_DEVICE_TRAP_COUNT].type = NLA_U64; policy[IAF_ATTR_FPORT_PM_PORT_STATE].type = NLA_U8; policy[IAF_ATTR_FPORT_ROUTED].type = NLA_U8; policy[IAF_ATTR_FPORT_LOGICAL_STATE].type = NLA_U8; policy[IAF_ATTR_FPORT_PHYSICAL_STATE].type = NLA_U8; policy[IAF_ATTR_FPORT_FID].type = NLA_U32; policy[IAF_ATTR_FPORT_LINK_DOWN_COUNT].type = NLA_U32; policy[IAF_ATTR_FPORT_NEIGHBOR_GUID].type = NLA_U64; policy[IAF_ATTR_FPORT_PORT_ERROR_ACTION].type = NLA_U32; policy[IAF_ATTR_FPORT_NEIGHBOR_PORT_NUMBER].type = NLA_U8; policy[IAF_ATTR_FPORT_PORT_LINK_MODE_ACTIVE].type = NLA_U8; policy[IAF_ATTR_FPORT_NEIGHBOR_LINK_DOWN_REASON].type = NLA_U8; policy[IAF_ATTR_FPORT_H_O_Q_LIFETIME].type = NLA_U8; policy[IAF_ATTR_FPORT_VL_CAP].type = NLA_U8; policy[IAF_ATTR_FPORT_OPERATIONAL_VLS].type = NLA_U8; policy[IAF_ATTR_FPORT_NEIGHBOR_MTU].type = NLA_U8; policy[IAF_ATTR_FPORT_LTP_CRC_MODE_SUPPORTED].type = NLA_U8; policy[IAF_ATTR_FPORT_LTP_CRC_MODE_ENABLED].type = NLA_U8; policy[IAF_ATTR_FPORT_LTP_CRC_MODE_ACTIVE].type = NLA_U8; policy[IAF_ATTR_FPORT_LINK_WIDTH_SUPPORTED].type = NLA_U8; policy[IAF_ATTR_FPORT_LINK_WIDTH_ENABLED].type = NLA_U8; policy[IAF_ATTR_FPORT_LINK_WIDTH_ACTIVE].type = NLA_U8; policy[IAF_ATTR_FPORT_LINK_SPEED_SUPPORTED].type = NLA_U8; policy[IAF_ATTR_FPORT_LINK_SPEED_ENABLED].type = NLA_U8; policy[IAF_ATTR_FPORT_LINK_SPEED_ACTIVE].type = NLA_U8; policy[IAF_ATTR_FPORT_LINK_WIDTH_DOWNGRADE_RX_ACTIVE].type = NLA_U8; policy[IAF_ATTR_FPORT_LINK_WIDTH_DOWNGRADE_TX_ACTIVE].type = NLA_U8; policy[IAF_ATTR_FPORT_LINK_INIT_REASON].type = NLA_U8; policy[IAF_ATTR_FPORT_LINK_DOWN_REASON].type = NLA_U8; policy[IAF_ATTR_FPORT_LQI_OFFLINE_DISABLED_REASON].type = NLA_U8; policy[IAF_ATTR_FPORT_LQI_NEIGHBOR_NORMAL].type = NLA_U8; policy[IAF_ATTR_FPORT_LINK_QUALITY_INDICATOR].type = NLA_U8; policy[IAF_ATTR_FPORT_BPS_LINK_SPEED_ACTIVE].type = NLA_U64; policy[IAF_ATTR_TIMESTAMP].type = NLA_U64; policy[IAF_ATTR_FPORT_TX_BYTES].type = NLA_U64; policy[IAF_ATTR_FPORT_RX_BYTES].type = NLA_U64; policy[IAF_ATTR_FPORT_BPS_LINK_SPEED_MAX].type = NLA_U64; policy[IAF_ATTR_FPORT_LQI_CHANGE_COUNT].type = NLA_U32; int i; memset(cmds, 0, sizeof(genl_cmd) * _IAF_CMD_OP_COUNT); for (i = 0; nullptr != iafCmds[i].name; i++) { cmds[i].c_id = iafCmds[i].id; cmds[i].c_name = iafCmds[i].name; cmds[i].c_maxattr = _IAF_ATTR_COUNT - 1, cmds[i].c_msg_parser = &globalNlOperation, cmds[i].c_attr_policy = policy; } ops.o_name = const_cast("iaf_ze"); ops.o_hdrsize = 0U; ops.o_cmds = cmds; ops.o_ncmds = i; } IafNlApi::~IafNlApi() { if (nullptr != nlSock) { pNlApi->nlSocketFree(nlSock); nlSock = nullptr; } } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/fabric_port/linux/iaf_nl_api.h000066400000000000000000000105271422164147700313620ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/tools/source/sysman/linux/nl_api/nl_api.h" #include "iaf/iaf_netlink.h" #include "sysman/linux/os_sysman_imp.h" #include #include #include #include #include namespace L0 { class IafNlApi; class Operation { public: uint16_t cmdOp; bool done; void *pOutput; ze_result_t result; Operation(uint16_t cmdOp, void *pOutput) : cmdOp(cmdOp), done(false), pOutput(pOutput), result(ZE_RESULT_ERROR_UNKNOWN) {} }; class IafNlApi { public: IafNlApi(); virtual ~IafNlApi(); MOCKABLE_VIRTUAL ze_result_t handleResponse(const uint16_t cmdOp, struct genl_info *info, void *pOutput); MOCKABLE_VIRTUAL ze_result_t fPortStatusQuery(const zes_fabric_port_id_t portId, zes_fabric_port_state_t &state); MOCKABLE_VIRTUAL ze_result_t getThroughput(const zes_fabric_port_id_t portId, zes_fabric_port_throughput_t &throughput); MOCKABLE_VIRTUAL ze_result_t portStateQuery(const zes_fabric_port_id_t portId, bool &enabled); MOCKABLE_VIRTUAL ze_result_t portBeaconStateQuery(const zes_fabric_port_id_t portId, bool &enabled); MOCKABLE_VIRTUAL ze_result_t portBeaconEnable(const zes_fabric_port_id_t portId); MOCKABLE_VIRTUAL ze_result_t portBeaconDisable(const zes_fabric_port_id_t portId); MOCKABLE_VIRTUAL ze_result_t portEnable(const zes_fabric_port_id_t portId); MOCKABLE_VIRTUAL ze_result_t portDisable(const zes_fabric_port_id_t portId); MOCKABLE_VIRTUAL ze_result_t portUsageEnable(const zes_fabric_port_id_t portId); MOCKABLE_VIRTUAL ze_result_t portUsageDisable(const zes_fabric_port_id_t portId); MOCKABLE_VIRTUAL ze_result_t remRequest(); MOCKABLE_VIRTUAL ze_result_t routingGenQuery(uint32_t &start, uint32_t &end); MOCKABLE_VIRTUAL ze_result_t deviceEnum(std::vector &fabricIds); MOCKABLE_VIRTUAL ze_result_t fabricDeviceProperties(const uint32_t fabricId, uint32_t &numSubdevices); MOCKABLE_VIRTUAL ze_result_t subdevicePropertiesGet(const uint32_t fabricId, const uint32_t attachId, uint64_t &guid, std::vector &ports); MOCKABLE_VIRTUAL ze_result_t fportProperties(const zes_fabric_port_id_t portId, uint64_t &neighborGuid, uint8_t &neighborPortNumber, zes_fabric_port_speed_t &maxRxSpeed, zes_fabric_port_speed_t &maxTxSpeed, zes_fabric_port_speed_t &rxSpeed, zes_fabric_port_speed_t &txSpeed); std::list validContexts = {}; int handleMsg(struct nl_msg *msg); int nlOperation(struct nl_cache_ops *ops, struct genl_cmd *cmd, struct genl_info *info); protected: std::unique_ptr pNlApi; ze_result_t init(); void cleanup(); private: ze_result_t allocMsg(const uint16_t cmdOp, struct nl_msg *&msg); ze_result_t issueRequest(const uint16_t cmdOp, const uint32_t fabricId, const uint32_t attachId, const uint8_t portNumber, void *pOutput); ze_result_t issueRequest(const uint16_t cmdOp, const uint32_t fabricId, const uint32_t attachId, void *pOutput); ze_result_t issueRequest(const uint16_t cmdOp, const uint32_t fabricId, void *pOutput); ze_result_t issueRequest(const uint16_t cmdOp, void *pOutput); ze_result_t performTransaction(const uint16_t cmdOp, struct nl_msg *msg, void *pOutput); ze_result_t fPortStatusQueryRsp(struct genl_info *info, void *pOutput); ze_result_t getThroughputRsp(struct genl_info *info, void *pOutput); ze_result_t portStateQueryRsp(struct genl_info *info, void *pOutput); ze_result_t portBeaconStateQueryRsp(struct genl_info *info, void *pOutput); ze_result_t routingGenQueryRsp(struct genl_info *info, void *pOutput); ze_result_t deviceEnumRsp(struct genl_info *info, void *pOutput); ze_result_t fabricDevicePropertiesRsp(struct genl_info *info, void *pOutput); ze_result_t subdevicePropertiesGetRsp(struct genl_info *info, void *pOutput); ze_result_t fportPropertiesRsp(struct genl_info *info, void *pOutput); int32_t translateWidth(uint8_t width); bool initted = false; struct nl_sock *nlSock = nullptr; int familyId = 0; struct nla_policy policy[_IAF_ATTR_COUNT] = {}; struct genl_cmd cmds[_IAF_CMD_OP_COUNT] = {}; struct genl_ops ops = {}; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/fabric_port/linux/os_fabric_port_imp.cpp000066400000000000000000000050631422164147700334730ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/fabric_port/linux/os_fabric_port_imp.h" #include namespace L0 { uint32_t LinuxFabricDeviceImp::getNumPorts() { return numPorts; } LinuxFabricDeviceImp::LinuxFabricDeviceImp(OsSysman *pOsSysman) { } LinuxFabricDeviceImp::~LinuxFabricDeviceImp() { } ze_result_t LinuxFabricPortImp::getLinkType(zes_fabric_link_type_t *pLinkType) { ::snprintf(pLinkType->desc, ZES_MAX_FABRIC_LINK_TYPE_SIZE, "%s", "SAMPLE LINK, VERBOSE"); return ZE_RESULT_SUCCESS; } ze_result_t LinuxFabricPortImp::getConfig(zes_fabric_port_config_t *pConfig) { *pConfig = config; return ZE_RESULT_SUCCESS; } ze_result_t LinuxFabricPortImp::setConfig(const zes_fabric_port_config_t *pConfig) { config = *pConfig; return ZE_RESULT_SUCCESS; } ze_result_t LinuxFabricPortImp::getState(zes_fabric_port_state_t *pState) { pState->status = ZES_FABRIC_PORT_STATUS_UNKNOWN; pState->qualityIssues = 0U; pState->failureReasons = 0U; pState->remotePortId.fabricId = 0U; pState->remotePortId.attachId = 0U; pState->remotePortId.portNumber = 0U; pState->rxSpeed.bitRate = 0LU; pState->rxSpeed.width = 0U; pState->txSpeed.bitRate = 0LU; pState->txSpeed.width = 0U; return ZE_RESULT_SUCCESS; } ze_result_t LinuxFabricPortImp::getThroughput(zes_fabric_port_throughput_t *pThroughput) { pThroughput->rxCounter = 0LU; pThroughput->txCounter = 0LU; return ZE_RESULT_SUCCESS; } ze_result_t LinuxFabricPortImp::getProperties(zes_fabric_port_properties_t *pProperties) { ::snprintf(pProperties->model, ZES_MAX_FABRIC_PORT_MODEL_SIZE, "%s", this->model.c_str()); pProperties->onSubdevice = false; pProperties->subdeviceId = 0U; pProperties->portId = this->portId; pProperties->maxRxSpeed = this->maxRxSpeed; pProperties->maxTxSpeed = this->maxTxSpeed; return ZE_RESULT_SUCCESS; } LinuxFabricPortImp::LinuxFabricPortImp(OsFabricDevice *pOsFabricDevice, uint32_t portNum) { this->portNum = portNum; model = std::string("EXAMPLE"); } LinuxFabricPortImp::~LinuxFabricPortImp() { } OsFabricDevice *OsFabricDevice::create(OsSysman *pOsSysman) { LinuxFabricDeviceImp *pLinuxFabricDeviceImp = new LinuxFabricDeviceImp(pOsSysman); return pLinuxFabricDeviceImp; } OsFabricPort *OsFabricPort::create(OsFabricDevice *pOsFabricDevice, uint32_t portNum) { LinuxFabricPortImp *pLinuxFabricPortImp = new LinuxFabricPortImp(pOsFabricDevice, portNum); return pLinuxFabricPortImp; } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/fabric_port/linux/os_fabric_port_imp.h000066400000000000000000000030251422164147700331340ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "sysman/fabric_port/fabric_port_imp.h" #include "sysman/fabric_port/os_fabric_port.h" #include "sysman/linux/fs_access.h" namespace L0 { class LinuxFabricDeviceImp : public OsFabricDevice, NEO::NonCopyableOrMovableClass { public: uint32_t getNumPorts() override; LinuxFabricDeviceImp() = delete; LinuxFabricDeviceImp(OsSysman *pOsSysman); ~LinuxFabricDeviceImp() override; private: uint32_t numPorts = 0; }; class LinuxFabricPortImp : public OsFabricPort, NEO::NonCopyableOrMovableClass { public: ze_result_t getProperties(zes_fabric_port_properties_t *pProperties) override; ze_result_t getLinkType(zes_fabric_link_type_t *pLinkType) override; ze_result_t getConfig(zes_fabric_port_config_t *pConfig) override; ze_result_t setConfig(const zes_fabric_port_config_t *pConfig) override; ze_result_t getState(zes_fabric_port_state_t *pState) override; ze_result_t getThroughput(zes_fabric_port_throughput_t *pThroughput) override; LinuxFabricPortImp() = delete; LinuxFabricPortImp(OsFabricDevice *pOsFabricDevice, uint32_t portNum); ~LinuxFabricPortImp() override; private: uint32_t portNum = 0; std::string model = ""; zes_fabric_port_id_t portId = {}; zes_fabric_port_speed_t maxRxSpeed = {}; zes_fabric_port_speed_t maxTxSpeed = {}; zes_fabric_port_config_t config = {}; }; } // namespace L0 os_fabric_port_imp_prelim.cpp000066400000000000000000000177111422164147700347670ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/fabric_port/linux/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "os_fabric_port_imp_prelim.h" #include "shared/source/helpers/debug_helpers.h" #include namespace L0 { uint32_t LinuxFabricDeviceImp::getNumPorts() { pFabricDeviceAccess->getPorts(portIds); return static_cast(portIds.size()); } void LinuxFabricDeviceImp::getPortId(uint32_t portNumber, zes_fabric_port_id_t &portId) { UNRECOVERABLE_IF(getNumPorts() <= portNumber); portId = portIds[portNumber]; } void LinuxFabricDeviceImp::getProperties(const zes_fabric_port_id_t portId, std::string &model, bool &onSubdevice, uint32_t &subdeviceId, zes_fabric_port_speed_t &maxRxSpeed, zes_fabric_port_speed_t &maxTxSpeed) { pFabricDeviceAccess->getProperties(portId, model, onSubdevice, subdeviceId, maxRxSpeed, maxTxSpeed); } ze_result_t LinuxFabricDeviceImp::getState(const zes_fabric_port_id_t portId, zes_fabric_port_state_t *pState) { return pFabricDeviceAccess->getState(portId, *pState); } ze_result_t LinuxFabricDeviceImp::getThroughput(const zes_fabric_port_id_t portId, zes_fabric_port_throughput_t *pThroughput) { return pFabricDeviceAccess->getThroughput(portId, *pThroughput); } ze_result_t LinuxFabricDeviceImp::performSweep() { uint32_t start = 0U; uint32_t end = 0U; ze_result_t result = ZE_RESULT_SUCCESS; result = forceSweep(); if (ZE_RESULT_SUCCESS != result) { return result; } result = routingQuery(start, end); if (ZE_RESULT_SUCCESS != result) { return result; } while (end < start) { uint32_t newStart; result = routingQuery(newStart, end); if (ZE_RESULT_SUCCESS != result) { return result; } } return result; } ze_result_t LinuxFabricDeviceImp::getPortEnabledState(const zes_fabric_port_id_t portId, bool &enabled) { return pFabricDeviceAccess->getPortEnabledState(portId, enabled); } ze_result_t LinuxFabricDeviceImp::enablePort(const zes_fabric_port_id_t portId) { ze_result_t result = enable(portId); // usage should be enabled, but make sure in case of previous errors enableUsage(portId); return result; } ze_result_t LinuxFabricDeviceImp::disablePort(const zes_fabric_port_id_t portId) { ze_result_t result = ZE_RESULT_SUCCESS; result = disableUsage(portId); if (ZE_RESULT_SUCCESS == result) { result = disable(portId); if (ZE_RESULT_SUCCESS == result) { return enableUsage(portId); } } // Try not so leave port usage disabled on an error enableUsage(portId); return result; } ze_result_t LinuxFabricDeviceImp::getPortBeaconState(const zes_fabric_port_id_t portId, bool &enabled) { return pFabricDeviceAccess->getPortBeaconState(portId, enabled); } ze_result_t LinuxFabricDeviceImp::enablePortBeaconing(const zes_fabric_port_id_t portId) { return pFabricDeviceAccess->enablePortBeaconing(portId); } ze_result_t LinuxFabricDeviceImp::disablePortBeaconing(const zes_fabric_port_id_t portId) { return pFabricDeviceAccess->disablePortBeaconing(portId); } ze_result_t LinuxFabricDeviceImp::enable(const zes_fabric_port_id_t portId) { ze_result_t result = ZE_RESULT_SUCCESS; result = pFabricDeviceAccess->enable(portId); if (ZE_RESULT_SUCCESS != result) { return result; } return performSweep(); } ze_result_t LinuxFabricDeviceImp::disable(const zes_fabric_port_id_t portId) { ze_result_t result = ZE_RESULT_SUCCESS; result = pFabricDeviceAccess->disable(portId); if (ZE_RESULT_SUCCESS != result) { return result; } return performSweep(); } ze_result_t LinuxFabricDeviceImp::enableUsage(const zes_fabric_port_id_t portId) { ze_result_t result = ZE_RESULT_SUCCESS; result = pFabricDeviceAccess->enableUsage(portId); if (ZE_RESULT_SUCCESS != result) { return result; } return performSweep(); } ze_result_t LinuxFabricDeviceImp::disableUsage(const zes_fabric_port_id_t portId) { ze_result_t result = ZE_RESULT_SUCCESS; result = pFabricDeviceAccess->disableUsage(portId); if (ZE_RESULT_SUCCESS != result) { return result; } return performSweep(); } ze_result_t LinuxFabricDeviceImp::forceSweep() { return pFabricDeviceAccess->forceSweep(); } ze_result_t LinuxFabricDeviceImp::routingQuery(uint32_t &start, uint32_t &end) { return pFabricDeviceAccess->routingQuery(start, end); } LinuxFabricDeviceImp::LinuxFabricDeviceImp(OsSysman *pOsSysman) { pFabricDeviceAccess = FabricDeviceAccess::create(pOsSysman); UNRECOVERABLE_IF(nullptr == pFabricDeviceAccess); } LinuxFabricDeviceImp::~LinuxFabricDeviceImp() { delete pFabricDeviceAccess; } ze_result_t LinuxFabricPortImp::getLinkType(zes_fabric_link_type_t *pLinkType) { ::snprintf(pLinkType->desc, ZES_MAX_FABRIC_LINK_TYPE_SIZE, "%s", "XeLink"); return ZE_RESULT_SUCCESS; } ze_result_t LinuxFabricPortImp::getConfig(zes_fabric_port_config_t *pConfig) { ze_result_t result = ZE_RESULT_SUCCESS; bool enabled = false; result = pLinuxFabricDeviceImp->getPortEnabledState(portId, enabled); if (ZE_RESULT_SUCCESS != result) { return result; } pConfig->enabled = enabled == true; result = pLinuxFabricDeviceImp->getPortBeaconState(portId, enabled); if (ZE_RESULT_SUCCESS != result) { return result; } pConfig->beaconing = enabled == true; return result; } ze_result_t LinuxFabricPortImp::setConfig(const zes_fabric_port_config_t *pConfig) { ze_result_t result = ZE_RESULT_SUCCESS; bool enabled = false; result = pLinuxFabricDeviceImp->getPortEnabledState(portId, enabled); if (ZE_RESULT_SUCCESS == result && enabled != pConfig->enabled) { if (pConfig->enabled) { result = pLinuxFabricDeviceImp->enablePort(portId); } else { result = pLinuxFabricDeviceImp->disablePort(portId); } } if (ZE_RESULT_SUCCESS != result) { return result; } bool beaconing = false; result = pLinuxFabricDeviceImp->getPortBeaconState(portId, beaconing); if (ZE_RESULT_SUCCESS == result && beaconing != pConfig->beaconing) { if (pConfig->beaconing) { result = pLinuxFabricDeviceImp->enablePortBeaconing(portId); } else { result = pLinuxFabricDeviceImp->disablePortBeaconing(portId); } } return result; } ze_result_t LinuxFabricPortImp::getState(zes_fabric_port_state_t *pState) { return pLinuxFabricDeviceImp->getState(portId, pState); } ze_result_t LinuxFabricPortImp::getThroughput(zes_fabric_port_throughput_t *pThroughput) { return pLinuxFabricDeviceImp->getThroughput(portId, pThroughput); } ze_result_t LinuxFabricPortImp::getProperties(zes_fabric_port_properties_t *pProperties) { ::snprintf(pProperties->model, ZES_MAX_FABRIC_PORT_MODEL_SIZE, "%s", this->model.c_str()); pProperties->onSubdevice = this->onSubdevice; pProperties->subdeviceId = this->subdeviceId; pProperties->portId = this->portId; pProperties->maxRxSpeed = this->maxRxSpeed; pProperties->maxTxSpeed = this->maxTxSpeed; return ZE_RESULT_SUCCESS; } LinuxFabricPortImp::LinuxFabricPortImp(OsFabricDevice *pOsFabricDevice, uint32_t portNum) { pLinuxFabricDeviceImp = static_cast(pOsFabricDevice); this->portNum = portNum; pLinuxFabricDeviceImp->getPortId(this->portNum, this->portId); pLinuxFabricDeviceImp->getProperties(this->portId, this->model, this->onSubdevice, this->subdeviceId, this->maxRxSpeed, this->maxTxSpeed); } LinuxFabricPortImp::~LinuxFabricPortImp() { } OsFabricDevice *OsFabricDevice::create(OsSysman *pOsSysman) { LinuxFabricDeviceImp *pLinuxFabricDeviceImp = new LinuxFabricDeviceImp(pOsSysman); return pLinuxFabricDeviceImp; } OsFabricPort *OsFabricPort::create(OsFabricDevice *pOsFabricDevice, uint32_t portNum) { LinuxFabricPortImp *pLinuxFabricPortImp = new LinuxFabricPortImp(pOsFabricDevice, portNum); return pLinuxFabricPortImp; } } // namespace L0 os_fabric_port_imp_prelim.h000066400000000000000000000057641422164147700344410ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/fabric_port/linux/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "fabric_device_access.h" #include "sysman/fabric_port/fabric_port_imp.h" #include "sysman/fabric_port/os_fabric_port.h" #include namespace L0 { class LinuxFabricDeviceImp : public OsFabricDevice, NEO::NonCopyableOrMovableClass { public: uint32_t getNumPorts() override; ze_result_t performSweep(); ze_result_t getPortEnabledState(const zes_fabric_port_id_t portId, bool &enabled); ze_result_t enablePort(const zes_fabric_port_id_t portId); ze_result_t disablePort(const zes_fabric_port_id_t portId); ze_result_t getPortBeaconState(const zes_fabric_port_id_t portId, bool &enabled); ze_result_t enablePortBeaconing(const zes_fabric_port_id_t portId); ze_result_t disablePortBeaconing(const zes_fabric_port_id_t portId); ze_result_t getState(const zes_fabric_port_id_t portId, zes_fabric_port_state_t *pState); ze_result_t getThroughput(const zes_fabric_port_id_t portId, zes_fabric_port_throughput_t *pThroughput); void getPortId(const uint32_t portNumber, zes_fabric_port_id_t &portId); void getProperties(const zes_fabric_port_id_t portId, std::string &model, bool &onSubdevice, uint32_t &subdeviceId, zes_fabric_port_speed_t &maxRxSpeed, zes_fabric_port_speed_t &maxTxSpeed); LinuxFabricDeviceImp() = delete; LinuxFabricDeviceImp(OsSysman *pOsSysman); ~LinuxFabricDeviceImp() override; private: std::vector portIds = {}; ze_result_t forceSweep(); ze_result_t routingQuery(uint32_t &start, uint32_t &end); ze_result_t enable(const zes_fabric_port_id_t portId); ze_result_t disable(const zes_fabric_port_id_t portId); ze_result_t enableUsage(const zes_fabric_port_id_t portId); ze_result_t disableUsage(const zes_fabric_port_id_t portId); protected: FabricDeviceAccess *pFabricDeviceAccess = nullptr; }; class LinuxFabricPortImp : public OsFabricPort, NEO::NonCopyableOrMovableClass { public: ze_result_t getProperties(zes_fabric_port_properties_t *pProperties) override; ze_result_t getLinkType(zes_fabric_link_type_t *pLinkType) override; ze_result_t getConfig(zes_fabric_port_config_t *pConfig) override; ze_result_t setConfig(const zes_fabric_port_config_t *pConfig) override; ze_result_t getState(zes_fabric_port_state_t *pState) override; ze_result_t getThroughput(zes_fabric_port_throughput_t *pThroughput) override; LinuxFabricPortImp() = delete; LinuxFabricPortImp(OsFabricDevice *pOsFabricDevice, uint32_t portNum); ~LinuxFabricPortImp() override; private: LinuxFabricDeviceImp *pLinuxFabricDeviceImp = nullptr; uint32_t portNum = 0; zes_fabric_port_id_t portId = {}; std::string model = ""; bool onSubdevice = false; uint32_t subdeviceId; zes_fabric_port_speed_t maxRxSpeed = {}; zes_fabric_port_speed_t maxTxSpeed = {}; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/fabric_port/os_fabric_port.h000066400000000000000000000020061422164147700311260ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/tools/source/sysman/os_sysman.h" #include namespace L0 { class OsFabricDevice { public: virtual uint32_t getNumPorts() = 0; static OsFabricDevice *create(OsSysman *pOsSysman); virtual ~OsFabricDevice() = default; }; class OsFabricPort { public: virtual ze_result_t getProperties(zes_fabric_port_properties_t *pProperties) = 0; virtual ze_result_t getLinkType(zes_fabric_link_type_t *pLinkType) = 0; virtual ze_result_t getConfig(zes_fabric_port_config_t *pConfig) = 0; virtual ze_result_t setConfig(const zes_fabric_port_config_t *pConfig) = 0; virtual ze_result_t getState(zes_fabric_port_state_t *pState) = 0; virtual ze_result_t getThroughput(zes_fabric_port_throughput_t *pThroughput) = 0; static OsFabricPort *create(OsFabricDevice *pOsFabricDevice, uint32_t portNum); virtual ~OsFabricPort() = default; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/fabric_port/windows/000077500000000000000000000000001422164147700274565ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/fabric_port/windows/CMakeLists.txt000066400000000000000000000011061422164147700322140ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_FABRICPORT_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/os_fabric_port_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_fabric_port_imp.h ) if(WIN32) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_FABRICPORT_WINDOWS} ) endif() ## Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_FABRICPORT_WINDOWS ${L0_SRCS_TOOLS_SYSMAN_FABRICPORT_WINDOWS}) os_fabric_port_imp.cpp000066400000000000000000000041121422164147700337410ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/fabric_port/windows/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/fabric_port/windows/os_fabric_port_imp.h" #include namespace L0 { uint32_t WddmFabricDeviceImp::getNumPorts() { return numPorts; } WddmFabricDeviceImp::WddmFabricDeviceImp(OsSysman *pOsSysman) { } WddmFabricDeviceImp::~WddmFabricDeviceImp() { } ze_result_t WddmFabricPortImp::getLinkType(zes_fabric_link_type_t *pLinkType) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t WddmFabricPortImp::getConfig(zes_fabric_port_config_t *pConfig) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t WddmFabricPortImp::setConfig(const zes_fabric_port_config_t *pConfig) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t WddmFabricPortImp::getState(zes_fabric_port_state_t *pState) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t WddmFabricPortImp::getThroughput(zes_fabric_port_throughput_t *pThroughput) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t WddmFabricPortImp::getProperties(zes_fabric_port_properties_t *pProperties) { ::memset(pProperties->model, '\0', ZES_MAX_FABRIC_PORT_MODEL_SIZE); pProperties->onSubdevice = false; pProperties->subdeviceId = 0U; ::memset(&pProperties->portId, '\0', sizeof(pProperties->portId)); ::memset(&pProperties->maxRxSpeed, '\0', sizeof(pProperties->maxRxSpeed)); ::memset(&pProperties->maxTxSpeed, '\0', sizeof(pProperties->maxTxSpeed)); return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } WddmFabricPortImp::WddmFabricPortImp(OsFabricDevice *pOsFabricDevice, uint32_t portNum) { } WddmFabricPortImp::~WddmFabricPortImp() { } OsFabricDevice *OsFabricDevice::create(OsSysman *pOsSysman) { WddmFabricDeviceImp *pWddmFabricDeviceImp = new WddmFabricDeviceImp(pOsSysman); return pWddmFabricDeviceImp; } OsFabricPort *OsFabricPort::create(OsFabricDevice *pOsFabricDevice, uint32_t portNum) { WddmFabricPortImp *pWddmFabricPortImp = new WddmFabricPortImp(pOsFabricDevice, portNum); return pWddmFabricPortImp; } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/fabric_port/windows/os_fabric_port_imp.h000066400000000000000000000024201422164147700334650ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "sysman/fabric_port/fabric_port_imp.h" #include "sysman/fabric_port/os_fabric_port.h" #include namespace L0 { class WddmFabricDeviceImp : public OsFabricDevice, NEO::NonCopyableOrMovableClass { public: uint32_t getNumPorts() override; WddmFabricDeviceImp() = delete; WddmFabricDeviceImp(OsSysman *pOsSysman); ~WddmFabricDeviceImp() override; private: uint32_t numPorts = 0; }; class WddmFabricPortImp : public OsFabricPort, NEO::NonCopyableOrMovableClass { public: ze_result_t getProperties(zes_fabric_port_properties_t *pProperties) override; ze_result_t getLinkType(zes_fabric_link_type_t *pLinkType) override; ze_result_t getConfig(zes_fabric_port_config_t *pConfig) override; ze_result_t setConfig(const zes_fabric_port_config_t *pConfig) override; ze_result_t getState(zes_fabric_port_state_t *pState) override; ze_result_t getThroughput(zes_fabric_port_throughput_t *pThroughput) override; WddmFabricPortImp() = delete; WddmFabricPortImp(OsFabricDevice *pOsFabricDevice, uint32_t portNum); ~WddmFabricPortImp() override; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/fan/000077500000000000000000000000001422164147700242365ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/fan/CMakeLists.txt000066400000000000000000000011611422164147700267750ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_FAN ${CMAKE_CURRENT_SOURCE_DIR}/fan.cpp ${CMAKE_CURRENT_SOURCE_DIR}/fan.h ${CMAKE_CURRENT_SOURCE_DIR}/fan_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/fan_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/os_fan.h ) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_FAN} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) add_subdirectories() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_FAN ${L0_SRCS_TOOLS_SYSMAN_FAN}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/fan/fan.cpp000066400000000000000000000020351422164147700255060ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/fan/fan.h" #include "shared/source/helpers/basic_math.h" #include "level_zero/tools/source/sysman/fan/fan_imp.h" namespace L0 { FanHandleContext::~FanHandleContext() { for (Fan *pFan : handleList) { delete pFan; } } void FanHandleContext::init() { Fan *pFan = new FanImp(pOsSysman); if (pFan->initSuccess == true) { handleList.push_back(pFan); } else { delete pFan; } } ze_result_t FanHandleContext::fanGet(uint32_t *pCount, zes_fan_handle_t *phFan) { uint32_t handleListSize = static_cast(handleList.size()); uint32_t numToCopy = std::min(*pCount, handleListSize); if (0 == *pCount || *pCount > handleListSize) { *pCount = handleListSize; } if (nullptr != phFan) { for (uint32_t i = 0; i < numToCopy; i++) { phFan[i] = handleList[i]->toHandle(); } } return ZE_RESULT_SUCCESS; } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/fan/fan.h000066400000000000000000000023471422164147700251610ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include #include struct _zes_fan_handle_t { virtual ~_zes_fan_handle_t() = default; }; namespace L0 { struct OsSysman; class Fan : _zes_fan_handle_t { public: virtual ze_result_t fanGetProperties(zes_fan_properties_t *pProperties) = 0; virtual ze_result_t fanGetConfig(zes_fan_config_t *pConfig) = 0; virtual ze_result_t fanSetDefaultMode() = 0; virtual ze_result_t fanSetFixedSpeedMode(const zes_fan_speed_t *pSpeed) = 0; virtual ze_result_t fanSetSpeedTableMode(const zes_fan_speed_table_t *pSpeedTable) = 0; virtual ze_result_t fanGetState(zes_fan_speed_units_t units, int32_t *pSpeed) = 0; static Fan *fromHandle(zes_fan_handle_t handle) { return static_cast(handle); } inline zes_fan_handle_t toHandle() { return this; } bool initSuccess = false; }; struct FanHandleContext { FanHandleContext(OsSysman *pOsSysman) : pOsSysman(pOsSysman){}; ~FanHandleContext(); void init(); ze_result_t fanGet(uint32_t *pCount, zes_fan_handle_t *phFan); OsSysman *pOsSysman = nullptr; std::vector handleList = {}; }; } // namespace L0compute-runtime-22.14.22890/level_zero/tools/source/sysman/fan/fan_imp.cpp000066400000000000000000000024641422164147700263610ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/fan/fan_imp.h" #include "shared/source/helpers/debug_helpers.h" namespace L0 { FanImp::FanImp(OsSysman *pOsSysman) { pOsFan = OsFan::create(pOsSysman); UNRECOVERABLE_IF(nullptr == pOsFan); init(); } ze_result_t FanImp::fanGetProperties(zes_fan_properties_t *pProperties) { *pProperties = properties; return ZE_RESULT_SUCCESS; } ze_result_t FanImp::fanGetConfig(zes_fan_config_t *pConfig) { return pOsFan->getConfig(pConfig); } ze_result_t FanImp::fanSetDefaultMode() { return pOsFan->setDefaultMode(); } ze_result_t FanImp::fanSetFixedSpeedMode(const zes_fan_speed_t *pSpeed) { return pOsFan->setFixedSpeedMode(pSpeed); } ze_result_t FanImp::fanSetSpeedTableMode(const zes_fan_speed_table_t *pSpeedTable) { return pOsFan->setSpeedTableMode(pSpeedTable); } ze_result_t FanImp::fanGetState(zes_fan_speed_units_t units, int32_t *pSpeed) { return pOsFan->getState(units, pSpeed); } void FanImp::init() { if (pOsFan->isFanModuleSupported()) { pOsFan->getProperties(&properties); this->initSuccess = true; } } FanImp::~FanImp() { if (nullptr != pOsFan) { delete pOsFan; pOsFan = nullptr; } } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/fan/fan_imp.h000066400000000000000000000017461422164147700260300ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "level_zero/tools/source/sysman/fan/fan.h" #include "level_zero/tools/source/sysman/fan/os_fan.h" namespace L0 { class FanImp : public Fan, NEO::NonCopyableOrMovableClass { public: ze_result_t fanGetProperties(zes_fan_properties_t *pProperties) override; ze_result_t fanGetConfig(zes_fan_config_t *pConfig) override; ze_result_t fanSetDefaultMode() override; ze_result_t fanSetFixedSpeedMode(const zes_fan_speed_t *pSpeed) override; ze_result_t fanSetSpeedTableMode(const zes_fan_speed_table_t *pSpeedTable) override; ze_result_t fanGetState(zes_fan_speed_units_t units, int32_t *pSpeed) override; FanImp() = default; FanImp(OsSysman *pOsSysman); ~FanImp() override; OsFan *pOsFan = nullptr; void init(); private: zes_fan_properties_t properties = {}; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/fan/linux/000077500000000000000000000000001422164147700253755ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/fan/linux/CMakeLists.txt000066400000000000000000000007411422164147700301370ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_FAN_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/os_fan_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_fan_imp.h ) if(UNIX) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_FAN_LINUX} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_FAN_LINUX ${L0_SRCS_TOOLS_SYSMAN_FAN_LINUX}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/fan/linux/os_fan_imp.cpp000066400000000000000000000024161422164147700302160ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/fan/linux/os_fan_imp.h" #include "level_zero/tools/source/sysman/linux/pmt/pmt.h" #include "sysman/linux/os_sysman_imp.h" namespace L0 { ze_result_t LinuxFanImp::getProperties(zes_fan_properties_t *pProperties) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t LinuxFanImp::getConfig(zes_fan_config_t *pConfig) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t LinuxFanImp::setDefaultMode() { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t LinuxFanImp::setFixedSpeedMode(const zes_fan_speed_t *pSpeed) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t LinuxFanImp::setSpeedTableMode(const zes_fan_speed_table_t *pSpeedTable) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t LinuxFanImp::getState(zes_fan_speed_units_t units, int32_t *pSpeed) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } bool LinuxFanImp::isFanModuleSupported() { return false; } LinuxFanImp::LinuxFanImp(OsSysman *pOsSysman) { } OsFan *OsFan::create(OsSysman *pOsSysman) { LinuxFanImp *pLinuxFanImp = new LinuxFanImp(pOsSysman); return static_cast(pLinuxFanImp); } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/fan/linux/os_fan_imp.h000066400000000000000000000016311422164147700276610ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "level_zero/tools/source/sysman/fan/os_fan.h" namespace L0 { class SysfsAccess; class LinuxFanImp : public OsFan, NEO::NonCopyableOrMovableClass { public: ze_result_t getProperties(zes_fan_properties_t *pProperties) override; ze_result_t getConfig(zes_fan_config_t *pConfig) override; ze_result_t setDefaultMode() override; ze_result_t setFixedSpeedMode(const zes_fan_speed_t *pSpeed) override; ze_result_t setSpeedTableMode(const zes_fan_speed_table_t *pSpeedTable) override; ze_result_t getState(zes_fan_speed_units_t units, int32_t *pSpeed) override; bool isFanModuleSupported() override; LinuxFanImp(OsSysman *pOsSysman); LinuxFanImp() = default; ~LinuxFanImp() override = default; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/fan/os_fan.h000066400000000000000000000014111422164147700256510ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include namespace L0 { struct OsSysman; class OsFan { public: virtual ze_result_t getProperties(zes_fan_properties_t *pProperties) = 0; virtual ze_result_t getConfig(zes_fan_config_t *pConfig) = 0; virtual ze_result_t setDefaultMode() = 0; virtual ze_result_t setFixedSpeedMode(const zes_fan_speed_t *pSpeed) = 0; virtual ze_result_t setSpeedTableMode(const zes_fan_speed_table_t *pSpeedTable) = 0; virtual ze_result_t getState(zes_fan_speed_units_t units, int32_t *pSpeed) = 0; virtual bool isFanModuleSupported() = 0; static OsFan *create(OsSysman *pOsSysman); virtual ~OsFan() = default; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/fan/windows/000077500000000000000000000000001422164147700257305ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/fan/windows/CMakeLists.txt000066400000000000000000000010311422164147700304630ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_FAN_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/os_fan_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/os_fan_imp.cpp ) if(WIN32) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_FAN_WINDOWS} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_FAN_WINDOWS ${L0_SRCS_TOOLS_SYSMAN_FAN_WINDOWS}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/fan/windows/os_fan_imp.cpp000066400000000000000000000125031422164147700305470ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "sysman/fan/windows/os_fan_imp.h" namespace L0 { struct FanPoint { union { struct { int32_t temperatureDegreesCelsius : 16; int32_t fanSpeedPercent : 16; }; int32_t data; }; }; ze_result_t WddmFanImp::getProperties(zes_fan_properties_t *pProperties) { pProperties->onSubdevice = false; pProperties->subdeviceId = 0; std::vector vRequests = {}; std::vector vResponses = {}; KmdSysman::RequestProperty request = {}; request.commandId = KmdSysman::Command::Set; request.componentId = KmdSysman::Component::FanComponent; request.requestId = KmdSysman::Requests::Fans::CurrentNumOfControlPoints; request.dataSize = sizeof(uint32_t); uint32_t FanPoints = 2; memcpy_s(request.dataBuffer, sizeof(uint32_t), &FanPoints, sizeof(uint32_t)); vRequests.push_back(request); request.dataSize = 0; memset(request.dataBuffer, request.dataSize, sizeof(request.dataBuffer)); request.commandId = KmdSysman::Command::Get; request.requestId = KmdSysman::Requests::Fans::MaxFanControlPointsSupported; vRequests.push_back(request); ze_result_t status = pKmdSysManager->requestMultiple(vRequests, vResponses); if ((status != ZE_RESULT_SUCCESS) || (vResponses.size() != vRequests.size())) { return status; } pProperties->canControl = (vResponses[0].returnCode == KmdSysman::Success); if (vResponses[1].returnCode == KmdSysman::Success) { memcpy_s(&FanPoints, sizeof(uint32_t), vResponses[1].dataBuffer, sizeof(uint32_t)); pProperties->maxPoints = maxPoints = static_cast(FanPoints); } pProperties->maxRPM = -1; pProperties->supportedModes = zes_fan_speed_mode_t::ZES_FAN_SPEED_MODE_TABLE; pProperties->supportedUnits = zes_fan_speed_units_t::ZES_FAN_SPEED_UNITS_PERCENT; return ZE_RESULT_SUCCESS; } ze_result_t WddmFanImp::getConfig(zes_fan_config_t *pConfig) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t WddmFanImp::setDefaultMode() { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t WddmFanImp::setFixedSpeedMode(const zes_fan_speed_t *pSpeed) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t WddmFanImp::setSpeedTableMode(const zes_fan_speed_table_t *pSpeedTable) { if (pSpeedTable->numPoints == 0 || pSpeedTable->numPoints > maxPoints) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } for (int32_t i = 0; i < pSpeedTable->numPoints; i++) { if (pSpeedTable->table[i].speed.units == zes_fan_speed_units_t::ZES_FAN_SPEED_UNITS_RPM) { return ze_result_t::ZE_RESULT_ERROR_INVALID_ARGUMENT; } } std::vector vRequests = {}; std::vector vResponses = {}; KmdSysman::RequestProperty request = {}; uint32_t value = pSpeedTable->numPoints; request.commandId = KmdSysman::Command::Set; request.componentId = KmdSysman::Component::FanComponent; request.requestId = KmdSysman::Requests::Fans::CurrentNumOfControlPoints; request.dataSize = sizeof(uint32_t); memcpy_s(request.dataBuffer, sizeof(uint32_t), &value, sizeof(uint32_t)); vRequests.push_back(request); request.requestId = KmdSysman::Requests::Fans::CurrentFanPoint; for (int32_t i = 0; i < pSpeedTable->numPoints; i++) { FanPoint point = {}; point.fanSpeedPercent = pSpeedTable->table[i].speed.speed; point.temperatureDegreesCelsius = pSpeedTable->table[i].temperature; value = point.data; memcpy_s(request.dataBuffer, sizeof(uint32_t), &value, sizeof(uint32_t)); vRequests.push_back(request); } return pKmdSysManager->requestMultiple(vRequests, vResponses); ; } ze_result_t WddmFanImp::getState(zes_fan_speed_units_t units, int32_t *pSpeed) { if (units == ZES_FAN_SPEED_UNITS_PERCENT) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } KmdSysman::RequestProperty request; KmdSysman::ResponseProperty response; request.commandId = KmdSysman::Command::Get; request.componentId = KmdSysman::Component::FanComponent; request.requestId = KmdSysman::Requests::Fans::CurrentFanSpeed; ze_result_t status = pKmdSysManager->requestSingle(request, response); if (status != ZE_RESULT_SUCCESS) { return status; } uint32_t value = 0; memcpy_s(&value, sizeof(uint32_t), response.dataBuffer, sizeof(uint32_t)); *pSpeed = static_cast(value); return status; } bool WddmFanImp::isFanModuleSupported() { KmdSysman::RequestProperty request = {}; KmdSysman::ResponseProperty response = {}; request.commandId = KmdSysman::Command::Get; request.componentId = KmdSysman::Component::FanComponent; request.requestId = KmdSysman::Requests::Fans::CurrentFanSpeed; return (pKmdSysManager->requestSingle(request, response) == ZE_RESULT_SUCCESS); } WddmFanImp::WddmFanImp(OsSysman *pOsSysman) { WddmSysmanImp *pWddmSysmanImp = static_cast(pOsSysman); pKmdSysManager = &pWddmSysmanImp->getKmdSysManager(); } OsFan *OsFan::create(OsSysman *pOsSysman) { WddmFanImp *pWddmFanImp = new WddmFanImp(pOsSysman); return static_cast(pWddmFanImp); } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/fan/windows/os_fan_imp.h000066400000000000000000000021071422164147700302130ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "sysman/fan/os_fan.h" #include "sysman/windows/os_sysman_imp.h" namespace L0 { class KmdSysManager; class WddmFanImp : public OsFan, NEO::NonCopyableOrMovableClass { public: ze_result_t getProperties(zes_fan_properties_t *pProperties) override; ze_result_t getConfig(zes_fan_config_t *pConfig) override; ze_result_t setDefaultMode() override; ze_result_t setFixedSpeedMode(const zes_fan_speed_t *pSpeed) override; ze_result_t setSpeedTableMode(const zes_fan_speed_table_t *pSpeedTable) override; ze_result_t getState(zes_fan_speed_units_t units, int32_t *pSpeed) override; bool isFanModuleSupported() override; WddmFanImp(OsSysman *pOsSysman); WddmFanImp() = default; ~WddmFanImp() override = default; protected: KmdSysManager *pKmdSysManager = nullptr; private: uint64_t prevTS = 0; uint32_t prevPulses = 0; int32_t maxPoints = 0; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/firmware/000077500000000000000000000000001422164147700253065ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/firmware/CMakeLists.txt000066400000000000000000000012371422164147700300510ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_FIRMWARE ${CMAKE_CURRENT_SOURCE_DIR}/firmware.cpp ${CMAKE_CURRENT_SOURCE_DIR}/firmware.h ${CMAKE_CURRENT_SOURCE_DIR}/firmware_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/firmware_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/os_firmware.h ) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_FIRMWARE} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) add_subdirectories() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_FIRMWARE ${L0_SRCS_TOOLS_SYSMAN_FIRMWARE}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/firmware/firmware.cpp000066400000000000000000000027561422164147700276400ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/basic_math.h" #include "level_zero/tools/source/sysman/firmware/firmware_imp.h" namespace L0 { class OsFirmware; FirmwareHandleContext::~FirmwareHandleContext() { releaseFwHandles(); } void FirmwareHandleContext::releaseFwHandles() { for (Firmware *pFirmware : handleList) { delete pFirmware; } handleList.clear(); } void FirmwareHandleContext::createHandle(const std::string &fwType) { Firmware *pFirmware = new FirmwareImp(pOsSysman, fwType); if (pFirmware->isFirmwareEnabled == true) { handleList.push_back(pFirmware); } else { delete pFirmware; } } void FirmwareHandleContext::init() { std::vector supportedFwTypes = {}; OsFirmware::getSupportedFwTypes(supportedFwTypes, pOsSysman); for (const std::string &fwType : supportedFwTypes) { createHandle(fwType); } } ze_result_t FirmwareHandleContext::firmwareGet(uint32_t *pCount, zes_firmware_handle_t *phFirmware) { uint32_t handleListSize = static_cast(handleList.size()); uint32_t numToCopy = std::min(*pCount, handleListSize); if (0 == *pCount || *pCount > handleListSize) { *pCount = handleListSize; } if (nullptr != phFirmware) { for (uint32_t i = 0; i < numToCopy; i++) { phFirmware[i] = handleList[i]->toHandle(); } } return ZE_RESULT_SUCCESS; } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/firmware/firmware.h000066400000000000000000000023031422164147700272710ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include #include #include struct _zes_firmware_handle_t { virtual ~_zes_firmware_handle_t() = default; }; namespace L0 { struct OsSysman; class Firmware : _zes_firmware_handle_t { public: virtual ~Firmware() {} virtual ze_result_t firmwareGetProperties(zes_firmware_properties_t *pProperties) = 0; virtual ze_result_t firmwareFlash(void *pImage, uint32_t size) = 0; inline zes_firmware_handle_t toHandle() { return this; } static Firmware *fromHandle(zes_firmware_handle_t handle) { return static_cast(handle); } bool isFirmwareEnabled = false; }; struct FirmwareHandleContext { FirmwareHandleContext(OsSysman *pOsSysman) : pOsSysman(pOsSysman){}; MOCKABLE_VIRTUAL ~FirmwareHandleContext(); void releaseFwHandles(); MOCKABLE_VIRTUAL void init(); ze_result_t firmwareGet(uint32_t *pCount, zes_firmware_handle_t *phFirmware); OsSysman *pOsSysman = nullptr; std::vector handleList = {}; private: void createHandle(const std::string &fwType); }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/firmware/firmware_imp.cpp000066400000000000000000000017611422164147700305000ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "firmware_imp.h" #include "shared/source/helpers/debug_helpers.h" #include "os_firmware.h" #include namespace L0 { ze_result_t FirmwareImp::firmwareGetProperties(zes_firmware_properties_t *pProperties) { pOsFirmware->osGetFwProperties(pProperties); strncpy_s(pProperties->name, ZES_STRING_PROPERTY_SIZE, fwType.c_str(), fwType.size()); return ZE_RESULT_SUCCESS; } ze_result_t FirmwareImp::firmwareFlash(void *pImage, uint32_t size) { return pOsFirmware->osFirmwareFlash(pImage, size); } void FirmwareImp::init() { this->isFirmwareEnabled = pOsFirmware->isFirmwareSupported(); } FirmwareImp::FirmwareImp(OsSysman *pOsSysman, const std::string &initalizedFwType) { pOsFirmware = OsFirmware::create(pOsSysman, initalizedFwType); fwType = initalizedFwType; UNRECOVERABLE_IF(nullptr == pOsFirmware); init(); } FirmwareImp::~FirmwareImp() { } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/firmware/firmware_imp.h000066400000000000000000000015751422164147700301500ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "shared/source/helpers/string.h" #include "level_zero/tools/source/sysman/firmware/firmware.h" #include "level_zero/tools/source/sysman/firmware/os_firmware.h" #include namespace L0 { class OsFirmware; class FirmwareImp : public Firmware, NEO::NonCopyableOrMovableClass { public: ze_result_t firmwareGetProperties(zes_firmware_properties_t *pProperties) override; ze_result_t firmwareFlash(void *pImage, uint32_t size) override; FirmwareImp() = default; FirmwareImp(OsSysman *pOsSysman, const std::string &fwType); ~FirmwareImp() override; std::unique_ptr pOsFirmware = nullptr; std::string fwType = "Unknown"; void init(); }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/firmware/linux/000077500000000000000000000000001422164147700264455ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/firmware/linux/CMakeLists.txt000066400000000000000000000015271422164147700312120ustar00rootroot00000000000000# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_FIRMWARE_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/os_firmware_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_firmware_imp.h ) if(NEO_ENABLE_i915_PRELIM_DETECTION) list(APPEND L0_SRCS_TOOLS_SYSMAN_FIRMWARE_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/os_firmware_imp_helper_prelim.cpp ) else() list(APPEND L0_SRCS_TOOLS_SYSMAN_FIRMWARE_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/os_firmware_imp_helper.cpp ) endif() if(UNIX) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_FIRMWARE_LINUX} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_FIRMWARE_LINUX ${L0_SRCS_TOOLS_SYSMAN_FIRMWARE_LINUX}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/firmware/linux/os_firmware_imp.cpp000066400000000000000000000053031422164147700323340ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/firmware/linux/os_firmware_imp.h" #include "shared/source/helpers/string.h" namespace L0 { static const std::string mtdDescriptor("/proc/mtd"); ze_result_t OsFirmware::getSupportedFwTypes(std::vector &supportedFwTypes, OsSysman *pOsSysman) { LinuxSysmanImp *pLinuxSysmanImp = static_cast(pOsSysman); FirmwareUtil *pFwInterface = pLinuxSysmanImp->getFwUtilInterface(); std::vector deviceSupportedFwTypes; if (pFwInterface != nullptr) { pFwInterface->getDeviceSupportedFwTypes(deviceSupportedFwTypes); } FsAccess *pFsAccess = &pLinuxSysmanImp->getFsAccess(); std::vector mtdDescriptorStrings = {}; ze_result_t result = pFsAccess->read(mtdDescriptor, mtdDescriptorStrings); if (result != ZE_RESULT_SUCCESS) { return result; } for (const auto &readByteLine : mtdDescriptorStrings) { for (const auto &fwType : deviceSupportedFwTypes) { if (std::string::npos != readByteLine.find(fwType)) { if (std::find(supportedFwTypes.begin(), supportedFwTypes.end(), fwType) == supportedFwTypes.end()) { supportedFwTypes.push_back(fwType); } } } } return ZE_RESULT_SUCCESS; } bool LinuxFirmwareImp::isFirmwareSupported(void) { if (pFwInterface != nullptr) { isFWInitalized = ((ZE_RESULT_SUCCESS == pFwInterface->fwDeviceInit()) ? true : false); return this->isFWInitalized; } return false; } void LinuxFirmwareImp::osGetFwProperties(zes_firmware_properties_t *pProperties) { if (ZE_RESULT_SUCCESS != getFirmwareVersion(osFwType, pProperties)) { strncpy_s(static_cast(pProperties->version), ZES_STRING_PROPERTY_SIZE, unknown.c_str(), ZES_STRING_PROPERTY_SIZE); } pProperties->canControl = true; //Assuming that user has permission to flash the firmware } ze_result_t LinuxFirmwareImp::osFirmwareFlash(void *pImage, uint32_t size) { return pFwInterface->flashFirmware(osFwType, pImage, size); } LinuxFirmwareImp::LinuxFirmwareImp(OsSysman *pOsSysman, const std::string &fwType) : osFwType(fwType) { LinuxSysmanImp *pLinuxSysmanImp = static_cast(pOsSysman); pSysfsAccess = &pLinuxSysmanImp->getSysfsAccess(); pFwInterface = pLinuxSysmanImp->getFwUtilInterface(); } std::unique_ptr OsFirmware::create(OsSysman *pOsSysman, const std::string &fwType) { std::unique_ptr pLinuxFirmwareImp = std::make_unique(pOsSysman, fwType); return pLinuxFirmwareImp; } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/firmware/linux/os_firmware_imp.h000066400000000000000000000017571422164147700320120ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "sysman/firmware/firmware_imp.h" #include "sysman/firmware/os_firmware.h" #include "sysman/linux/os_sysman_imp.h" namespace L0 { class LinuxFirmwareImp : public OsFirmware, NEO::NonCopyableOrMovableClass { public: bool isFirmwareSupported(void) override; void osGetFwProperties(zes_firmware_properties_t *pProperties) override; ze_result_t osFirmwareFlash(void *pImage, uint32_t size) override; ze_result_t getFirmwareVersion(std::string fwType, zes_firmware_properties_t *pProperties); LinuxFirmwareImp() = default; LinuxFirmwareImp(OsSysman *pOsSysman, const std::string &fwType); ~LinuxFirmwareImp() override = default; protected: FirmwareUtil *pFwInterface = nullptr; SysfsAccess *pSysfsAccess = nullptr; bool isFWInitalized = false; std::string osFwType; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/firmware/linux/os_firmware_imp_helper.cpp000066400000000000000000000011351422164147700336720ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/firmware/linux/os_firmware_imp.h" namespace L0 { ze_result_t LinuxFirmwareImp::getFirmwareVersion(std::string fwType, zes_firmware_properties_t *pProperties) { std::string fwVersion; ze_result_t result = pFwInterface->getFwVersion(fwType, fwVersion); if (ZE_RESULT_SUCCESS == result) { strncpy_s(static_cast(pProperties->version), ZES_STRING_PROPERTY_SIZE, fwVersion.c_str(), ZES_STRING_PROPERTY_SIZE); } return result; } } // namespace L0os_firmware_imp_helper_prelim.cpp000066400000000000000000000036741422164147700351750ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/firmware/linux/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/firmware/linux/os_firmware_imp.h" const std::string iafPath = "device/"; const std::string iafDirectory = "iaf."; const std::string pscbin_version = "/pscbin_version"; namespace L0 { ze_result_t LinuxFirmwareImp::getFirmwareVersion(std::string fwType, zes_firmware_properties_t *pProperties) { std::string fwVersion; if (fwType == "PSC") { std::string path; path.clear(); std::vector list; // scans the directories present in /sys/class/drm/cardX/device/ ze_result_t result = pSysfsAccess->scanDirEntries(iafPath, list); if (ZE_RESULT_SUCCESS != result) { // There should be a device directory return result; } for (const auto &entry : list) { if (!iafDirectory.compare(entry.substr(0, iafDirectory.length()))) { // device/iaf.X/pscbin_version, where X is the hardware slot number path = iafPath + entry + pscbin_version; } } if (path.empty()) { // This device does not have a PSC Version return ZE_RESULT_ERROR_NOT_AVAILABLE; } std::string pscVersion; pscVersion.clear(); result = pSysfsAccess->read(path, pscVersion); if (ZE_RESULT_SUCCESS != result) { // not able to read PSC version from iaf.x return result; } strncpy_s(static_cast(pProperties->version), ZES_STRING_PROPERTY_SIZE, pscVersion.c_str(), ZES_STRING_PROPERTY_SIZE); return result; } ze_result_t result = pFwInterface->getFwVersion(fwType, fwVersion); if (result == ZE_RESULT_SUCCESS) { strncpy_s(static_cast(pProperties->version), ZES_STRING_PROPERTY_SIZE, fwVersion.c_str(), ZES_STRING_PROPERTY_SIZE); } return result; } } // namespace L0compute-runtime-22.14.22890/level_zero/tools/source/sysman/firmware/os_firmware.h000066400000000000000000000013551422164147700300000ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/tools/source/sysman/os_sysman.h" #include #include #include #include namespace L0 { class OsFirmware { public: virtual bool isFirmwareSupported(void) = 0; virtual void osGetFwProperties(zes_firmware_properties_t *pProperties) = 0; virtual ze_result_t osFirmwareFlash(void *pImage, uint32_t size) = 0; static std::unique_ptr create(OsSysman *pOsSysman, const std::string &fwType); static ze_result_t getSupportedFwTypes(std::vector &supportedFwTypes, OsSysman *pOsSysman); virtual ~OsFirmware() {} }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/firmware/windows/000077500000000000000000000000001422164147700270005ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/firmware/windows/CMakeLists.txt000066400000000000000000000010051422164147700315340ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_FIRMWARE_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/os_firmware_imp.cpp ) if(WIN32) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_FIRMWARE_WINDOWS} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_FIRMWARE_WINDOWS ${L0_SRCS_TOOLS_SYSMAN_FIRMWARE_WINDOWS}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/firmware/windows/os_firmware_imp.cpp000066400000000000000000000015451422164147700326730ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/firmware/windows/os_firmware_imp.h" namespace L0 { bool WddmFirmwareImp::isFirmwareSupported(void) { return false; } void WddmFirmwareImp::osGetFwProperties(zes_firmware_properties_t *pProperties){}; ze_result_t WddmFirmwareImp::osFirmwareFlash(void *pImage, uint32_t size) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; }; std::unique_ptr OsFirmware::create(OsSysman *pOsSysman, const std::string &fwType) { std::unique_ptr pWddmFirmwareImp = std::make_unique(); return pWddmFirmwareImp; } ze_result_t OsFirmware::getSupportedFwTypes(std::vector &supportedFwTypes, OsSysman *pOsSysman) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/firmware/windows/os_firmware_imp.h000066400000000000000000000010251422164147700323310ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "sysman/firmware/os_firmware.h" #include "sysman/windows/os_sysman_imp.h" namespace L0 { class WddmFirmwareImp : public OsFirmware { public: bool isFirmwareSupported(void) override; void osGetFwProperties(zes_firmware_properties_t *pProperties) override; ze_result_t osFirmwareFlash(void *pImage, uint32_t size) override; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/frequency/000077500000000000000000000000001422164147700254735ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/frequency/CMakeLists.txt000066400000000000000000000013451422164147700302360ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_FREQUENCY ${CMAKE_CURRENT_SOURCE_DIR}/frequency.cpp ${CMAKE_CURRENT_SOURCE_DIR}/frequency.h ${CMAKE_CURRENT_SOURCE_DIR}/frequency_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/frequency_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/os_frequency.h ) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_FREQUENCY} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) if(UNIX) add_subdirectory(linux) else() add_subdirectory(windows) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_FREQUENCY ${L0_SRCS_TOOLS_SYSMAN_FREQUENCY}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/frequency/frequency.cpp000066400000000000000000000033611422164147700302030ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/frequency/frequency.h" #include "shared/source/helpers/basic_math.h" #include "level_zero/tools/source/sysman/frequency/frequency_imp.h" #include "level_zero/tools/source/sysman/frequency/os_frequency.h" namespace L0 { FrequencyHandleContext::~FrequencyHandleContext() { for (Frequency *pFrequency : handleList) { delete pFrequency; } } void FrequencyHandleContext::createHandle(ze_device_handle_t deviceHandle, zes_freq_domain_t frequencyDomain) { Frequency *pFrequency = new FrequencyImp(pOsSysman, deviceHandle, frequencyDomain); handleList.push_back(pFrequency); } ze_result_t FrequencyHandleContext::init(std::vector &deviceHandles) { for (const auto &deviceHandle : deviceHandles) { auto totalDomains = OsFrequency::getNumberOfFreqDoainsSupported(pOsSysman); UNRECOVERABLE_IF(totalDomains > 2); for (uint32_t frequencyDomain = 0; frequencyDomain < totalDomains; frequencyDomain++) { createHandle(deviceHandle, static_cast(frequencyDomain)); } } return ZE_RESULT_SUCCESS; } ze_result_t FrequencyHandleContext::frequencyGet(uint32_t *pCount, zes_freq_handle_t *phFrequency) { uint32_t handleListSize = static_cast(handleList.size()); uint32_t numToCopy = std::min(*pCount, handleListSize); if (0 == *pCount || *pCount > handleListSize) { *pCount = handleListSize; } if (nullptr != phFrequency) { for (uint32_t i = 0; i < numToCopy; i++) { phFrequency[i] = handleList[i]->toZesFreqHandle(); } } return ZE_RESULT_SUCCESS; } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/frequency/frequency.h000066400000000000000000000050201422164147700276420ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/device/device.h" #include #include struct _zes_freq_handle_t { virtual ~_zes_freq_handle_t() = default; }; namespace L0 { constexpr double unsupportedProperty = -1.0; struct OsSysman; class Frequency : _zes_freq_handle_t { public: virtual ~Frequency() {} virtual ze_result_t frequencyGetProperties(zes_freq_properties_t *pProperties) = 0; virtual ze_result_t frequencyGetAvailableClocks(uint32_t *pCount, double *phFrequency) = 0; virtual ze_result_t frequencyGetRange(zes_freq_range_t *pLimits) = 0; virtual ze_result_t frequencySetRange(const zes_freq_range_t *pLimits) = 0; virtual ze_result_t frequencyGetState(zes_freq_state_t *pState) = 0; virtual ze_result_t frequencyGetThrottleTime(zes_freq_throttle_time_t *pThrottleTime) = 0; // Overclocking virtual ze_result_t frequencyOcGetCapabilities(zes_oc_capabilities_t *pOcCapabilities) = 0; virtual ze_result_t frequencyOcGetFrequencyTarget(double *pCurrentOcfrequency) = 0; virtual ze_result_t frequencyOcSetFrequencyTarget(double currentOcfrequency) = 0; virtual ze_result_t frequencyOcGetVoltageTarget(double *pCurrentVoltageTarget, double *pCurrentVoltageOffset) = 0; virtual ze_result_t frequencyOcSetVoltageTarget(double currentVoltageTarget, double currentVoltageOffset) = 0; virtual ze_result_t frequencyOcGetMode(zes_oc_mode_t *pCurrentOcMode) = 0; virtual ze_result_t frequencyOcSetMode(zes_oc_mode_t currentOcMode) = 0; virtual ze_result_t frequencyOcGetIccMax(double *pOcIccMax) = 0; virtual ze_result_t frequencyOcSetIccMax(double ocIccMax) = 0; virtual ze_result_t frequencyOcGeTjMax(double *pOcTjMax) = 0; virtual ze_result_t frequencyOcSetTjMax(double ocTjMax) = 0; static Frequency *fromHandle(zes_freq_handle_t handle) { return static_cast(handle); } inline zes_freq_handle_t toZesFreqHandle() { return this; } }; struct FrequencyHandleContext { FrequencyHandleContext(OsSysman *pOsSysman) : pOsSysman(pOsSysman){}; ~FrequencyHandleContext(); ze_result_t init(std::vector &deviceHandles); ze_result_t frequencyGet(uint32_t *pCount, zes_freq_handle_t *phFrequency); OsSysman *pOsSysman = nullptr; std::vector handleList = {}; private: void createHandle(ze_device_handle_t deviceHandle, zes_freq_domain_t frequencyDomain); }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/frequency/frequency_imp.cpp000066400000000000000000000111531422164147700310460ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/frequency/frequency_imp.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/debug_helpers.h" #include namespace L0 { ze_result_t FrequencyImp::frequencyGetProperties(zes_freq_properties_t *pProperties) { *pProperties = zesFrequencyProperties; return ZE_RESULT_SUCCESS; } ze_result_t FrequencyImp::frequencyGetAvailableClocks(uint32_t *pCount, double *phFrequency) { uint32_t numToCopy = std::min(*pCount, numClocks); if (0 == *pCount || *pCount > numClocks) { *pCount = numClocks; } if (nullptr != phFrequency) { for (uint32_t i = 0; i < numToCopy; i++) { phFrequency[i] = pClocks[i]; } } return ZE_RESULT_SUCCESS; } ze_result_t FrequencyImp::frequencyGetRange(zes_freq_range_t *pLimits) { return pOsFrequency->osFrequencyGetRange(pLimits); } ze_result_t FrequencyImp::frequencySetRange(const zes_freq_range_t *pLimits) { double newMin = round(pLimits->min); double newMax = round(pLimits->max); // No need to check if the frequency is inside the clocks array: // 1. GuC will cap this, GuC has an internal range. Hw too rounds to the next step, no need to do that check. // 2. For Overclocking, Oc frequency will be higher than the zesFrequencyProperties.max frequency, so it would be outside // the clocks array too. Pcode at the end will decide the granted frequency, no need for the check. if (newMin > newMax) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } return pOsFrequency->osFrequencySetRange(pLimits); } ze_result_t FrequencyImp::frequencyGetState(zes_freq_state_t *pState) { return pOsFrequency->osFrequencyGetState(pState); } ze_result_t FrequencyImp::frequencyGetThrottleTime(zes_freq_throttle_time_t *pThrottleTime) { return pOsFrequency->osFrequencyGetThrottleTime(pThrottleTime); } ze_result_t FrequencyImp::frequencyOcGetCapabilities(zes_oc_capabilities_t *pOcCapabilities) { return pOsFrequency->getOcCapabilities(pOcCapabilities); } ze_result_t FrequencyImp::frequencyOcGetFrequencyTarget(double *pCurrentOcFrequency) { return pOsFrequency->getOcFrequencyTarget(pCurrentOcFrequency); } ze_result_t FrequencyImp::frequencyOcSetFrequencyTarget(double currentOcFrequency) { return pOsFrequency->setOcFrequencyTarget(currentOcFrequency); } ze_result_t FrequencyImp::frequencyOcGetVoltageTarget(double *pCurrentVoltageTarget, double *pCurrentVoltageOffset) { return pOsFrequency->getOcVoltageTarget(pCurrentVoltageTarget, pCurrentVoltageOffset); } ze_result_t FrequencyImp::frequencyOcSetVoltageTarget(double currentVoltageTarget, double currentVoltageOffset) { return pOsFrequency->setOcVoltageTarget(currentVoltageTarget, currentVoltageOffset); } ze_result_t FrequencyImp::frequencyOcGetMode(zes_oc_mode_t *pCurrentOcMode) { return pOsFrequency->getOcMode(pCurrentOcMode); } ze_result_t FrequencyImp::frequencyOcSetMode(zes_oc_mode_t currentOcMode) { return pOsFrequency->setOcMode(currentOcMode); } ze_result_t FrequencyImp::frequencyOcGetIccMax(double *pOcIccMax) { return pOsFrequency->getOcIccMax(pOcIccMax); } ze_result_t FrequencyImp::frequencyOcSetIccMax(double ocIccMax) { return pOsFrequency->setOcIccMax(ocIccMax); } ze_result_t FrequencyImp::frequencyOcGeTjMax(double *pOcTjMax) { return pOsFrequency->getOcTjMax(pOcTjMax); } ze_result_t FrequencyImp::frequencyOcSetTjMax(double ocTjMax) { return pOsFrequency->setOcTjMax(ocTjMax); } void FrequencyImp::init() { pOsFrequency->osFrequencyGetProperties(zesFrequencyProperties); double step = pOsFrequency->osFrequencyGetStepSize(); double freqRange = zesFrequencyProperties.max - zesFrequencyProperties.min; numClocks = static_cast(round(freqRange / step)) + 1; pClocks = new double[numClocks]; for (unsigned int i = 0; i < numClocks; i++) { pClocks[i] = round(zesFrequencyProperties.min + (step * i)); } } FrequencyImp::FrequencyImp(OsSysman *pOsSysman, ze_device_handle_t handle, zes_freq_domain_t frequencyDomainNumber) : deviceHandle(handle) { ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; Device::fromHandle(deviceHandle)->getProperties(&deviceProperties); pOsFrequency = OsFrequency::create(pOsSysman, deviceProperties.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE, deviceProperties.subdeviceId, frequencyDomainNumber); UNRECOVERABLE_IF(nullptr == pOsFrequency); init(); } FrequencyImp::~FrequencyImp() { delete pOsFrequency; delete[] pClocks; } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/frequency/frequency_imp.h000066400000000000000000000043141422164147700305140ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "level_zero/tools/source/sysman/frequency/frequency.h" #include "level_zero/tools/source/sysman/frequency/os_frequency.h" #include namespace L0 { class FrequencyImp : public Frequency, NEO::NonCopyableOrMovableClass { public: ze_result_t frequencyGetProperties(zes_freq_properties_t *pProperties) override; ze_result_t frequencyGetAvailableClocks(uint32_t *pCount, double *phFrequency) override; ze_result_t frequencyGetRange(zes_freq_range_t *pLimits) override; ze_result_t frequencySetRange(const zes_freq_range_t *pLimits) override; ze_result_t frequencyGetState(zes_freq_state_t *pState) override; ze_result_t frequencyGetThrottleTime(zes_freq_throttle_time_t *pThrottleTime) override; // Overclocking ze_result_t frequencyOcGetCapabilities(zes_oc_capabilities_t *pOcCapabilities) override; ze_result_t frequencyOcGetFrequencyTarget(double *pCurrentOcFrequency) override; ze_result_t frequencyOcSetFrequencyTarget(double currentOcFrequency) override; ze_result_t frequencyOcGetVoltageTarget(double *pCurrentVoltageTarget, double *pCurrentVoltageOffset) override; ze_result_t frequencyOcSetVoltageTarget(double currentVoltageTarget, double currentVoltageOffset) override; ze_result_t frequencyOcGetMode(zes_oc_mode_t *pCurrentOcMode) override; ze_result_t frequencyOcSetMode(zes_oc_mode_t currentOcMode) override; ze_result_t frequencyOcGetIccMax(double *pOcIccMax) override; ze_result_t frequencyOcSetIccMax(double ocIccMax) override; ze_result_t frequencyOcGeTjMax(double *pOcTjMax) override; ze_result_t frequencyOcSetTjMax(double ocTjMax) override; FrequencyImp() = default; FrequencyImp(OsSysman *pOsSysman, ze_device_handle_t handle, zes_freq_domain_t frequencyDomainNumber); ~FrequencyImp() override; OsFrequency *pOsFrequency = nullptr; void init(); private: zes_freq_properties_t zesFrequencyProperties = {}; double *pClocks = nullptr; uint32_t numClocks = 0; ze_device_handle_t deviceHandle = nullptr; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/frequency/linux/000077500000000000000000000000001422164147700266325ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/frequency/linux/CMakeLists.txt000066400000000000000000000015361422164147700313770ustar00rootroot00000000000000# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_FREQUENCY_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) if(NEO_ENABLE_i915_PRELIM_DETECTION) list(APPEND L0_SRCS_TOOLS_SYSMAN_FREQUENCY_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/os_frequency_imp_prelim.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_frequency_imp_prelim.h ) else() list(APPEND L0_SRCS_TOOLS_SYSMAN_FREQUENCY_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/os_frequency_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_frequency_imp.h ) endif() if(UNIX) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_FREQUENCY_LINUX} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_FREQUENCY_LINUX ${L0_SRCS_TOOLS_SYSMAN_FREQUENCY_LINUX}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/frequency/linux/os_frequency_imp.cpp000066400000000000000000000236371422164147700327200ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/frequency/linux/os_frequency_imp.h" #include "sysman/linux/os_sysman_imp.h" namespace L0 { const bool LinuxFrequencyImp::canControl = true; // canControl is true on i915 (GEN9 Hardcode) ze_result_t LinuxFrequencyImp::osFrequencyGetProperties(zes_freq_properties_t &properties) { properties.pNext = nullptr; properties.canControl = canControl; properties.type = frequencyDomainNumber; ze_result_t result1 = getMinVal(properties.min); ze_result_t result2 = getMaxVal(properties.max); // If can't figure out the valid range, then can't control it. if (ZE_RESULT_SUCCESS != result1 || ZE_RESULT_SUCCESS != result2) { properties.canControl = false; properties.min = 0.0; properties.max = 0.0; } properties.isThrottleEventSupported = false; properties.onSubdevice = isSubdevice; properties.subdeviceId = subdeviceId; return ZE_RESULT_SUCCESS; } double LinuxFrequencyImp::osFrequencyGetStepSize() { return 50.0 / 3; // Step of 16.6666667 Mhz (GEN9 Hardcode); } ze_result_t LinuxFrequencyImp::osFrequencyGetRange(zes_freq_range_t *pLimits) { ze_result_t result = getMax(pLimits->max); if (ZE_RESULT_SUCCESS != result) { pLimits->max = -1; } result = getMin(pLimits->min); if (ZE_RESULT_SUCCESS != result) { pLimits->min = -1; } return ZE_RESULT_SUCCESS; } ze_result_t LinuxFrequencyImp::osFrequencySetRange(const zes_freq_range_t *pLimits) { double newMin = round(pLimits->min); double newMax = round(pLimits->max); double currentMax = 0.0; ze_result_t result = getMax(currentMax); if (ZE_RESULT_SUCCESS != result) { return result; } if (newMin > currentMax) { // set the max first ze_result_t result = setMax(newMax); if (ZE_RESULT_SUCCESS != result) { return result; } return setMin(newMin); } // set the min first result = setMin(newMin); if (ZE_RESULT_SUCCESS != result) { return result; } return setMax(newMax); } ze_result_t LinuxFrequencyImp::osFrequencyGetState(zes_freq_state_t *pState) { ze_result_t result; result = getRequest(pState->request); if (ZE_RESULT_SUCCESS != result) { pState->request = -1; } result = getTdp(pState->tdp); if (ZE_RESULT_SUCCESS != result) { pState->tdp = -1; } result = getEfficient(pState->efficient); if (ZE_RESULT_SUCCESS != result) { pState->efficient = -1; } result = getActual(pState->actual); if (ZE_RESULT_SUCCESS != result) { pState->actual = -1; } pState->pNext = nullptr; pState->currentVoltage = -1.0; pState->throttleReasons = 0u; return ZE_RESULT_SUCCESS; } ze_result_t LinuxFrequencyImp::osFrequencyGetThrottleTime(zes_freq_throttle_time_t *pThrottleTime) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t LinuxFrequencyImp::getOcCapabilities(zes_oc_capabilities_t *pOcCapabilities) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t LinuxFrequencyImp::getOcFrequencyTarget(double *pCurrentOcFrequency) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t LinuxFrequencyImp::setOcFrequencyTarget(double currentOcFrequency) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t LinuxFrequencyImp::getOcVoltageTarget(double *pCurrentVoltageTarget, double *pCurrentVoltageOffset) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t LinuxFrequencyImp::setOcVoltageTarget(double currentVoltageTarget, double currentVoltageOffset) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t LinuxFrequencyImp::getOcMode(zes_oc_mode_t *pCurrentOcMode) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t LinuxFrequencyImp::setOcMode(zes_oc_mode_t currentOcMode) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t LinuxFrequencyImp::getOcIccMax(double *pOcIccMax) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t LinuxFrequencyImp::setOcIccMax(double ocIccMax) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t LinuxFrequencyImp::getOcTjMax(double *pOcTjMax) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t LinuxFrequencyImp::setOcTjMax(double ocTjMax) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t LinuxFrequencyImp::getMin(double &min) { double intval; ze_result_t result = pSysfsAccess->read(minFreqFile, intval); if (ZE_RESULT_SUCCESS != result) { if (result == ZE_RESULT_ERROR_NOT_AVAILABLE) { result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } return result; } min = intval; return ZE_RESULT_SUCCESS; } ze_result_t LinuxFrequencyImp::setMin(double min) { ze_result_t result = pSysfsAccess->write(minFreqFile, min); if (ZE_RESULT_SUCCESS != result) { if (result == ZE_RESULT_ERROR_NOT_AVAILABLE) { result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } return result; } return ZE_RESULT_SUCCESS; } ze_result_t LinuxFrequencyImp::getMax(double &max) { double intval; ze_result_t result = pSysfsAccess->read(maxFreqFile, intval); if (ZE_RESULT_SUCCESS != result) { if (result == ZE_RESULT_ERROR_NOT_AVAILABLE) { result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } return result; } max = intval; return ZE_RESULT_SUCCESS; } ze_result_t LinuxFrequencyImp::setMax(double max) { ze_result_t result = pSysfsAccess->write(maxFreqFile, max); if (ZE_RESULT_SUCCESS != result) { if (result == ZE_RESULT_ERROR_NOT_AVAILABLE) { result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } return result; } return ZE_RESULT_SUCCESS; } ze_result_t LinuxFrequencyImp::getRequest(double &request) { double intval; ze_result_t result = pSysfsAccess->read(requestFreqFile, intval); if (ZE_RESULT_SUCCESS != result) { if (result == ZE_RESULT_ERROR_NOT_AVAILABLE) { result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } return result; } request = intval; return ZE_RESULT_SUCCESS; } ze_result_t LinuxFrequencyImp::getTdp(double &tdp) { double intval; ze_result_t result = pSysfsAccess->read(tdpFreqFile, intval); if (ZE_RESULT_SUCCESS != result) { if (result == ZE_RESULT_ERROR_NOT_AVAILABLE) { result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } return result; } tdp = intval; return ZE_RESULT_SUCCESS; } ze_result_t LinuxFrequencyImp::getActual(double &actual) { double intval; ze_result_t result = pSysfsAccess->read(actualFreqFile, intval); if (ZE_RESULT_SUCCESS != result) { if (result == ZE_RESULT_ERROR_NOT_AVAILABLE) { result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } return result; } actual = intval; return ZE_RESULT_SUCCESS; } ze_result_t LinuxFrequencyImp::getEfficient(double &efficient) { double intval; ze_result_t result = pSysfsAccess->read(efficientFreqFile, intval); if (ZE_RESULT_SUCCESS != result) { if (result == ZE_RESULT_ERROR_NOT_AVAILABLE) { result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } return result; } efficient = intval; return ZE_RESULT_SUCCESS; } ze_result_t LinuxFrequencyImp::getMaxVal(double &maxVal) { double intval; ze_result_t result = pSysfsAccess->read(maxValFreqFile, intval); if (ZE_RESULT_SUCCESS != result) { if (result == ZE_RESULT_ERROR_NOT_AVAILABLE) { result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } return result; } maxVal = intval; return ZE_RESULT_SUCCESS; } ze_result_t LinuxFrequencyImp::getMinVal(double &minVal) { double intval; ze_result_t result = pSysfsAccess->read(minValFreqFile, intval); if (ZE_RESULT_SUCCESS != result) { if (result == ZE_RESULT_ERROR_NOT_AVAILABLE) { result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } return result; } minVal = intval; return ZE_RESULT_SUCCESS; } void LinuxFrequencyImp::init() { const std::string baseDir = "gt/gt" + std::to_string(subdeviceId) + "/"; if (pSysfsAccess->directoryExists(baseDir)) { minFreqFile = baseDir + "rps_min_freq_mhz"; maxFreqFile = baseDir + "rps_max_freq_mhz"; requestFreqFile = baseDir + "punit_req_freq_mhz"; tdpFreqFile = baseDir + "rapl_PL1_freq_mhz"; actualFreqFile = baseDir + "rps_act_freq_mhz"; efficientFreqFile = baseDir + "rps_RP1_freq_mhz"; maxValFreqFile = baseDir + "rps_RP0_freq_mhz"; minValFreqFile = baseDir + "rps_RPn_freq_mhz"; } else { minFreqFile = "gt_min_freq_mhz"; maxFreqFile = "gt_max_freq_mhz"; requestFreqFile = "gt_cur_freq_mhz"; tdpFreqFile = "rapl_PL1_freq_mhz"; actualFreqFile = "gt_act_freq_mhz"; efficientFreqFile = "gt_RP1_freq_mhz"; maxValFreqFile = "gt_RP0_freq_mhz"; minValFreqFile = "gt_RPn_freq_mhz"; } } LinuxFrequencyImp::LinuxFrequencyImp(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId, zes_freq_domain_t frequencyDomainNumber) : isSubdevice(onSubdevice), subdeviceId(subdeviceId), frequencyDomainNumber(frequencyDomainNumber) { LinuxSysmanImp *pLinuxSysmanImp = static_cast(pOsSysman); pSysfsAccess = &pLinuxSysmanImp->getSysfsAccess(); init(); } OsFrequency *OsFrequency::create(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId, zes_freq_domain_t frequencyDomainNumber) { LinuxFrequencyImp *pLinuxFrequencyImp = new LinuxFrequencyImp(pOsSysman, onSubdevice, subdeviceId, frequencyDomainNumber); return static_cast(pLinuxFrequencyImp); } uint16_t OsFrequency::getNumberOfFreqDoainsSupported(OsSysman *pOsSysman) { return 1; // hardcode for now to support only ZES_FREQ_DOMAIN_GPU } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/frequency/linux/os_frequency_imp.h000066400000000000000000000053261422164147700323600ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "sysman/frequency/frequency_imp.h" #include "sysman/frequency/os_frequency.h" #include "sysman/linux/fs_access.h" namespace L0 { class LinuxFrequencyImp : public OsFrequency, NEO::NonCopyableOrMovableClass { public: ze_result_t osFrequencyGetProperties(zes_freq_properties_t &properties) override; double osFrequencyGetStepSize() override; ze_result_t osFrequencyGetRange(zes_freq_range_t *pLimits) override; ze_result_t osFrequencySetRange(const zes_freq_range_t *pLimits) override; ze_result_t osFrequencyGetState(zes_freq_state_t *pState) override; ze_result_t osFrequencyGetThrottleTime(zes_freq_throttle_time_t *pThrottleTime) override; ze_result_t getOcCapabilities(zes_oc_capabilities_t *pOcCapabilities) override; ze_result_t getOcFrequencyTarget(double *pCurrentOcFrequency) override; ze_result_t setOcFrequencyTarget(double currentOcFrequency) override; ze_result_t getOcVoltageTarget(double *pCurrentVoltageTarget, double *pCurrentVoltageOffset) override; ze_result_t setOcVoltageTarget(double currentVoltageTarget, double currentVoltageOffset) override; ze_result_t getOcMode(zes_oc_mode_t *pCurrentOcMode) override; ze_result_t setOcMode(zes_oc_mode_t currentOcMode) override; ze_result_t getOcIccMax(double *pOcIccMax) override; ze_result_t setOcIccMax(double ocIccMax) override; ze_result_t getOcTjMax(double *pOcTjMax) override; ze_result_t setOcTjMax(double ocTjMax) override; LinuxFrequencyImp() = default; LinuxFrequencyImp(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId, zes_freq_domain_t frequencyDomainNumber); ~LinuxFrequencyImp() override = default; protected: SysfsAccess *pSysfsAccess = nullptr; ze_result_t getMin(double &min); ze_result_t setMin(double min); ze_result_t getMax(double &max); ze_result_t setMax(double max); ze_result_t getRequest(double &request); ze_result_t getTdp(double &tdp); ze_result_t getActual(double &actual); ze_result_t getEfficient(double &efficient); ze_result_t getMaxVal(double &maxVal); ze_result_t getMinVal(double &minVal); private: std::string minFreqFile; std::string maxFreqFile; std::string requestFreqFile; std::string tdpFreqFile; std::string actualFreqFile; std::string efficientFreqFile; std::string maxValFreqFile; std::string minValFreqFile; static const bool canControl; bool isSubdevice = false; uint32_t subdeviceId = 0; zes_freq_domain_t frequencyDomainNumber = ZES_FREQ_DOMAIN_GPU; void init(); }; } // namespace L0 os_frequency_imp_prelim.cpp000066400000000000000000000313361422164147700342040ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/frequency/linux/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/frequency/linux/os_frequency_imp_prelim.h" #include "igfxfmid.h" #include "sysman/linux/os_sysman_imp.h" namespace L0 { const bool LinuxFrequencyImp::canControl = true; // canControl is true on i915 (GEN9 Hardcode) ze_result_t LinuxFrequencyImp::osFrequencyGetProperties(zes_freq_properties_t &properties) { properties.pNext = nullptr; properties.canControl = canControl; properties.type = frequencyDomainNumber; ze_result_t result1 = getMinVal(properties.min); ze_result_t result2 = getMaxVal(properties.max); // If can't figure out the valid range, then can't control it. if (ZE_RESULT_SUCCESS != result1 || ZE_RESULT_SUCCESS != result2) { properties.canControl = false; properties.min = 0.0; properties.max = 0.0; } properties.isThrottleEventSupported = false; properties.onSubdevice = isSubdevice; properties.subdeviceId = subdeviceId; return ZE_RESULT_SUCCESS; } double LinuxFrequencyImp::osFrequencyGetStepSize() { auto productFamily = pDevice->getNEODevice()->getHardwareInfo().platform.eProductFamily; double stepSize; if (productFamily >= IGFX_XE_HP_SDV) { stepSize = 50.0; } else { stepSize = 50.0 / 3; // Step of 16.6666667 Mhz (GEN9 Hardcode) } return stepSize; } ze_result_t LinuxFrequencyImp::osFrequencyGetRange(zes_freq_range_t *pLimits) { ze_result_t result = getMax(pLimits->max); if (ZE_RESULT_SUCCESS != result) { pLimits->max = -1; } result = getMin(pLimits->min); if (ZE_RESULT_SUCCESS != result) { pLimits->min = -1; } return ZE_RESULT_SUCCESS; } ze_result_t LinuxFrequencyImp::osFrequencySetRange(const zes_freq_range_t *pLimits) { double newMin = round(pLimits->min); double newMax = round(pLimits->max); if (newMax == -1 && newMin == -1) { double MaxDefault = 0, MinDefault = 0; ze_result_t result1, result2, result; result1 = pSysfsAccess->read(maxDefaultFreqFile, MaxDefault); result2 = pSysfsAccess->read(minDefaultFreqFile, MinDefault); if (result1 == ZE_RESULT_SUCCESS && result2 == ZE_RESULT_SUCCESS) { result = setMax(MaxDefault); if (ZE_RESULT_SUCCESS != result) { return result; } return setMin(MinDefault); } } double currentMax = 0.0; ze_result_t result = getMax(currentMax); if (ZE_RESULT_SUCCESS != result) { return result; } if (newMin > currentMax) { // set the max first ze_result_t result = setMax(newMax); if (ZE_RESULT_SUCCESS != result) { return result; } return setMin(newMin); } // set the min first result = setMin(newMin); if (ZE_RESULT_SUCCESS != result) { return result; } return setMax(newMax); } bool LinuxFrequencyImp::getThrottleReasonStatus(void) { uint32_t val = 0; auto result = pSysfsAccess->read(throttleReasonStatusFile, val); if (ZE_RESULT_SUCCESS == result) { return (val == 0 ? false : true); } else { return false; } } ze_result_t LinuxFrequencyImp::osFrequencyGetState(zes_freq_state_t *pState) { ze_result_t result; result = getRequest(pState->request); if (ZE_RESULT_SUCCESS != result) { pState->request = -1; } result = getTdp(pState->tdp); if (ZE_RESULT_SUCCESS != result) { pState->tdp = -1; } result = getEfficient(pState->efficient); if (ZE_RESULT_SUCCESS != result) { pState->efficient = -1; } result = getActual(pState->actual); if (ZE_RESULT_SUCCESS != result) { pState->actual = -1; } pState->pNext = nullptr; pState->currentVoltage = -1.0; pState->throttleReasons = 0u; if (getThrottleReasonStatus()) { uint32_t val = 0; ze_result_t result; result = pSysfsAccess->read(throttleReasonPL1File, val); if (val && (result == ZE_RESULT_SUCCESS)) { pState->throttleReasons |= ZES_FREQ_THROTTLE_REASON_FLAG_AVE_PWR_CAP; } result = pSysfsAccess->read(throttleReasonPL2File, val); if (val && (result == ZE_RESULT_SUCCESS)) { pState->throttleReasons |= ZES_FREQ_THROTTLE_REASON_FLAG_BURST_PWR_CAP; } result = pSysfsAccess->read(throttleReasonPL4File, val); if (val && (result == ZE_RESULT_SUCCESS)) { pState->throttleReasons |= ZES_FREQ_THROTTLE_REASON_FLAG_CURRENT_LIMIT; } result = pSysfsAccess->read(throttleReasonThermalFile, val); if (val && (result == ZE_RESULT_SUCCESS)) { pState->throttleReasons |= ZES_FREQ_THROTTLE_REASON_FLAG_THERMAL_LIMIT; } } return ZE_RESULT_SUCCESS; } ze_result_t LinuxFrequencyImp::osFrequencyGetThrottleTime(zes_freq_throttle_time_t *pThrottleTime) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t LinuxFrequencyImp::getOcCapabilities(zes_oc_capabilities_t *pOcCapabilities) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t LinuxFrequencyImp::getOcFrequencyTarget(double *pCurrentOcFrequency) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t LinuxFrequencyImp::setOcFrequencyTarget(double currentOcFrequency) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t LinuxFrequencyImp::getOcVoltageTarget(double *pCurrentVoltageTarget, double *pCurrentVoltageOffset) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t LinuxFrequencyImp::setOcVoltageTarget(double currentVoltageTarget, double currentVoltageOffset) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t LinuxFrequencyImp::getOcMode(zes_oc_mode_t *pCurrentOcMode) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t LinuxFrequencyImp::setOcMode(zes_oc_mode_t currentOcMode) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t LinuxFrequencyImp::getOcIccMax(double *pOcIccMax) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t LinuxFrequencyImp::setOcIccMax(double ocIccMax) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t LinuxFrequencyImp::getOcTjMax(double *pOcTjMax) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t LinuxFrequencyImp::setOcTjMax(double ocTjMax) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t LinuxFrequencyImp::getMin(double &min) { double intval = 0; ze_result_t result = pSysfsAccess->read(minFreqFile, intval); if (ZE_RESULT_SUCCESS != result) { if (result == ZE_RESULT_ERROR_NOT_AVAILABLE) { result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } return result; } min = intval; return ZE_RESULT_SUCCESS; } ze_result_t LinuxFrequencyImp::setMin(double min) { ze_result_t result = pSysfsAccess->write(minFreqFile, min); if (ZE_RESULT_SUCCESS != result) { if (result == ZE_RESULT_ERROR_NOT_AVAILABLE) { result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } return result; } return ZE_RESULT_SUCCESS; } ze_result_t LinuxFrequencyImp::getMax(double &max) { double intval = 0; ze_result_t result = pSysfsAccess->read(maxFreqFile, intval); if (ZE_RESULT_SUCCESS != result) { if (result == ZE_RESULT_ERROR_NOT_AVAILABLE) { result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } return result; } max = intval; return ZE_RESULT_SUCCESS; } ze_result_t LinuxFrequencyImp::setMax(double max) { ze_result_t result = pSysfsAccess->write(maxFreqFile, max); if (ZE_RESULT_SUCCESS != result) { if (result == ZE_RESULT_ERROR_NOT_AVAILABLE) { result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } return result; } return ZE_RESULT_SUCCESS; } ze_result_t LinuxFrequencyImp::getRequest(double &request) { double intval = 0; ze_result_t result = pSysfsAccess->read(requestFreqFile, intval); if (ZE_RESULT_SUCCESS != result) { if (result == ZE_RESULT_ERROR_NOT_AVAILABLE) { result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } return result; } request = intval; return ZE_RESULT_SUCCESS; } ze_result_t LinuxFrequencyImp::getTdp(double &tdp) { double intval = 0; ze_result_t result = pSysfsAccess->read(tdpFreqFile, intval); if (ZE_RESULT_SUCCESS != result) { if (result == ZE_RESULT_ERROR_NOT_AVAILABLE) { result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } return result; } tdp = intval; return ZE_RESULT_SUCCESS; } ze_result_t LinuxFrequencyImp::getActual(double &actual) { double intval = 0; ze_result_t result = pSysfsAccess->read(actualFreqFile, intval); if (ZE_RESULT_SUCCESS != result) { if (result == ZE_RESULT_ERROR_NOT_AVAILABLE) { result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } return result; } actual = intval; return ZE_RESULT_SUCCESS; } ze_result_t LinuxFrequencyImp::getEfficient(double &efficient) { double intval = 0; ze_result_t result = pSysfsAccess->read(efficientFreqFile, intval); if (ZE_RESULT_SUCCESS != result) { if (result == ZE_RESULT_ERROR_NOT_AVAILABLE) { result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } return result; } efficient = intval; return ZE_RESULT_SUCCESS; } ze_result_t LinuxFrequencyImp::getMaxVal(double &maxVal) { double intval = 0; ze_result_t result = pSysfsAccess->read(maxValFreqFile, intval); if (ZE_RESULT_SUCCESS != result) { if (result == ZE_RESULT_ERROR_NOT_AVAILABLE) { result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } return result; } maxVal = intval; return ZE_RESULT_SUCCESS; } ze_result_t LinuxFrequencyImp::getMinVal(double &minVal) { double intval = 0; ze_result_t result = pSysfsAccess->read(minValFreqFile, intval); if (ZE_RESULT_SUCCESS != result) { if (result == ZE_RESULT_ERROR_NOT_AVAILABLE) { result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } return result; } minVal = intval; return ZE_RESULT_SUCCESS; } void LinuxFrequencyImp::init() { const std::string baseDir = "gt/gt" + std::to_string(subdeviceId) + "/"; if (pSysfsAccess->directoryExists(baseDir)) { minFreqFile = baseDir + "rps_min_freq_mhz"; minDefaultFreqFile = baseDir + ".defaults/rps_min_freq_mhz"; maxFreqFile = baseDir + "rps_max_freq_mhz"; maxDefaultFreqFile = baseDir + ".defaults/rps_max_freq_mhz"; requestFreqFile = baseDir + "punit_req_freq_mhz"; tdpFreqFile = baseDir + "rapl_PL1_freq_mhz"; actualFreqFile = baseDir + "rps_act_freq_mhz"; efficientFreqFile = baseDir + "rps_RP1_freq_mhz"; maxValFreqFile = baseDir + "rps_RP0_freq_mhz"; minValFreqFile = baseDir + "rps_RPn_freq_mhz"; throttleReasonStatusFile = "throttle_reason_status"; throttleReasonPL1File = "throttle_reason_pl1"; throttleReasonPL2File = "throttle_reason_pl2"; throttleReasonPL4File = "throttle_reason_pl4"; throttleReasonThermalFile = "throttle_reason_thermal"; } else { minFreqFile = "gt_min_freq_mhz"; maxFreqFile = "gt_max_freq_mhz"; requestFreqFile = "gt_cur_freq_mhz"; tdpFreqFile = "rapl_PL1_freq_mhz"; actualFreqFile = "gt_act_freq_mhz"; efficientFreqFile = "gt_RP1_freq_mhz"; maxValFreqFile = "gt_RP0_freq_mhz"; minValFreqFile = "gt_RPn_freq_mhz"; throttleReasonStatusFile = "gt_throttle_reason_status"; throttleReasonPL1File = "gt_throttle_reason_status_pl1"; throttleReasonPL2File = "gt_throttle_reason_status_pl2"; throttleReasonPL4File = "gt_throttle_reason_status_pl4"; throttleReasonThermalFile = "gt_throttle_reason_status_thermal"; } } LinuxFrequencyImp::LinuxFrequencyImp(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId, zes_freq_domain_t frequencyDomainNumber) : isSubdevice(onSubdevice), subdeviceId(subdeviceId), frequencyDomainNumber(frequencyDomainNumber) { LinuxSysmanImp *pLinuxSysmanImp = static_cast(pOsSysman); pSysfsAccess = &pLinuxSysmanImp->getSysfsAccess(); pDevice = Device::fromHandle(pLinuxSysmanImp->getSysmanDeviceImp()->hCoreDevice); init(); } OsFrequency *OsFrequency::create(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId, zes_freq_domain_t frequencyDomainNumber) { LinuxFrequencyImp *pLinuxFrequencyImp = new LinuxFrequencyImp(pOsSysman, onSubdevice, subdeviceId, frequencyDomainNumber); return static_cast(pLinuxFrequencyImp); } uint16_t OsFrequency::getNumberOfFreqDoainsSupported(OsSysman *pOsSysman) { return 1; // hardcode for now to support only ZES_FREQ_DOMAIN_GPU } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/frequency/linux/os_frequency_imp_prelim.h000066400000000000000000000060521422164147700337250ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "sysman/frequency/frequency_imp.h" #include "sysman/frequency/os_frequency.h" #include "sysman/linux/fs_access.h" namespace L0 { class LinuxFrequencyImp : public OsFrequency, NEO::NonCopyableOrMovableClass { public: ze_result_t osFrequencyGetProperties(zes_freq_properties_t &properties) override; double osFrequencyGetStepSize() override; ze_result_t osFrequencyGetRange(zes_freq_range_t *pLimits) override; ze_result_t osFrequencySetRange(const zes_freq_range_t *pLimits) override; ze_result_t osFrequencyGetState(zes_freq_state_t *pState) override; ze_result_t osFrequencyGetThrottleTime(zes_freq_throttle_time_t *pThrottleTime) override; ze_result_t getOcCapabilities(zes_oc_capabilities_t *pOcCapabilities) override; ze_result_t getOcFrequencyTarget(double *pCurrentOcFrequency) override; ze_result_t setOcFrequencyTarget(double currentOcFrequency) override; ze_result_t getOcVoltageTarget(double *pCurrentVoltageTarget, double *pCurrentVoltageOffset) override; ze_result_t setOcVoltageTarget(double currentVoltageTarget, double currentVoltageOffset) override; ze_result_t getOcMode(zes_oc_mode_t *pCurrentOcMode) override; ze_result_t setOcMode(zes_oc_mode_t currentOcMode) override; ze_result_t getOcIccMax(double *pOcIccMax) override; ze_result_t setOcIccMax(double ocIccMax) override; ze_result_t getOcTjMax(double *pOcTjMax) override; ze_result_t setOcTjMax(double ocTjMax) override; LinuxFrequencyImp() = default; LinuxFrequencyImp(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId, zes_freq_domain_t frequencyDomainNumber); ~LinuxFrequencyImp() override = default; protected: SysfsAccess *pSysfsAccess = nullptr; ze_result_t getMin(double &min); ze_result_t setMin(double min); ze_result_t getMax(double &max); ze_result_t setMax(double max); ze_result_t getRequest(double &request); ze_result_t getTdp(double &tdp); ze_result_t getActual(double &actual); ze_result_t getEfficient(double &efficient); ze_result_t getMaxVal(double &maxVal); ze_result_t getMinVal(double &minVal); bool getThrottleReasonStatus(void); Device *pDevice = nullptr; private: std::string minFreqFile; std::string maxFreqFile; std::string minDefaultFreqFile; std::string maxDefaultFreqFile; std::string requestFreqFile; std::string tdpFreqFile; std::string actualFreqFile; std::string efficientFreqFile; std::string maxValFreqFile; std::string minValFreqFile; std::string throttleReasonStatusFile; std::string throttleReasonPL1File; std::string throttleReasonPL2File; std::string throttleReasonPL4File; std::string throttleReasonThermalFile; static const bool canControl; bool isSubdevice = false; uint32_t subdeviceId = 0; zes_freq_domain_t frequencyDomainNumber = ZES_FREQ_DOMAIN_GPU; void init(); }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/frequency/os_frequency.h000066400000000000000000000033261422164147700303520ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/tools/source/sysman/os_sysman.h" namespace L0 { class OsFrequency { public: virtual ze_result_t osFrequencyGetProperties(zes_freq_properties_t &properties) = 0; virtual double osFrequencyGetStepSize() = 0; virtual ze_result_t osFrequencyGetRange(zes_freq_range_t *pLimits) = 0; virtual ze_result_t osFrequencySetRange(const zes_freq_range_t *pLimits) = 0; virtual ze_result_t osFrequencyGetState(zes_freq_state_t *pState) = 0; virtual ze_result_t osFrequencyGetThrottleTime(zes_freq_throttle_time_t *pThrottleTime) = 0; virtual ze_result_t getOcCapabilities(zes_oc_capabilities_t *pOcCapabilities) = 0; virtual ze_result_t getOcFrequencyTarget(double *pCurrentOcFrequency) = 0; virtual ze_result_t setOcFrequencyTarget(double currentOcFrequency) = 0; virtual ze_result_t getOcVoltageTarget(double *pCurrentVoltageTarget, double *pCurrentVoltageOffset) = 0; virtual ze_result_t setOcVoltageTarget(double currentVoltageTarget, double currentVoltageOffset) = 0; virtual ze_result_t getOcMode(zes_oc_mode_t *pCurrentOcMode) = 0; virtual ze_result_t setOcMode(zes_oc_mode_t currentOcMode) = 0; virtual ze_result_t getOcIccMax(double *pOcIccMax) = 0; virtual ze_result_t setOcIccMax(double ocIccMax) = 0; virtual ze_result_t getOcTjMax(double *pOcTjMax) = 0; virtual ze_result_t setOcTjMax(double ocTjMax) = 0; static OsFrequency *create(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId, zes_freq_domain_t type); static uint16_t getNumberOfFreqDoainsSupported(OsSysman *pOsSysman); virtual ~OsFrequency() {} }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/frequency/windows/000077500000000000000000000000001422164147700271655ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/frequency/windows/CMakeLists.txt000066400000000000000000000010161422164147700317230ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_FREQUENCY_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/os_frequency_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/os_frequency_imp.cpp ) if(WIN32) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_FREQUENCY_WINDOWS} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_FREQUENCY_WINDOWS ${L0_SRCS_TOOLS_SYSMAN_FREQUENCY_WINDOWS}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/frequency/windows/os_frequency_imp.cpp000066400000000000000000000610361422164147700332460ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/frequency/windows/os_frequency_imp.h" #include "level_zero/tools/source/sysman/sysman_const.h" namespace L0 { ze_result_t WddmFrequencyImp::osFrequencyGetProperties(zes_freq_properties_t &properties) { readOverclockingInfo(); uint32_t value = 0; std::vector vRequests = {}; std::vector vResponses = {}; KmdSysman::RequestProperty request = {}; request.commandId = KmdSysman::Command::Get; request.componentId = KmdSysman::Component::FrequencyComponent; request.requestId = KmdSysman::Requests::Frequency::FrequencyThrottledEventSupported; request.paramInfo = static_cast(frequencyDomainNumber); vRequests.push_back(request); request.requestId = KmdSysman::Requests::Frequency::FrequencyRangeMinDefault; request.paramInfo = static_cast(frequencyDomainNumber); vRequests.push_back(request); request.requestId = KmdSysman::Requests::Frequency::FrequencyRangeMaxDefault; request.paramInfo = static_cast(frequencyDomainNumber); vRequests.push_back(request); request.requestId = KmdSysman::Requests::Frequency::CanControlFrequency; request.paramInfo = static_cast(frequencyDomainNumber); vRequests.push_back(request); ze_result_t status = pKmdSysManager->requestMultiple(vRequests, vResponses); if ((status != ZE_RESULT_SUCCESS) || (vResponses.size() != vRequests.size())) { return status; } properties.isThrottleEventSupported = false; if (vResponses[0].returnCode == ZE_RESULT_SUCCESS) { memcpy_s(&value, sizeof(uint32_t), vResponses[0].dataBuffer, sizeof(uint32_t)); properties.isThrottleEventSupported = static_cast(value); } properties.min = unsupportedProperty; if (vResponses[1].returnCode == ZE_RESULT_SUCCESS) { value = 0; memcpy_s(&value, sizeof(uint32_t), vResponses[1].dataBuffer, sizeof(uint32_t)); properties.min = static_cast(value); } properties.max = unsupportedProperty; if (vResponses[2].returnCode == ZE_RESULT_SUCCESS) { value = 0; memcpy_s(&value, sizeof(uint32_t), vResponses[2].dataBuffer, sizeof(uint32_t)); properties.max = static_cast(value); } properties.canControl = false; if (vResponses[3].returnCode == ZE_RESULT_SUCCESS) { value = 0; memcpy_s(&value, sizeof(uint32_t), vResponses[3].dataBuffer, sizeof(uint32_t)); properties.canControl = (value == 1); } properties.onSubdevice = false; properties.subdeviceId = 0; properties.type = frequencyDomainNumber; return ZE_RESULT_SUCCESS; } double WddmFrequencyImp::osFrequencyGetStepSize() { return 50.0 / 3; // Step of 16.6666667 Mhz (GEN9 Hardcode); } ze_result_t WddmFrequencyImp::osFrequencyGetRange(zes_freq_range_t *pLimits) { return getRange(&pLimits->min, &pLimits->max); } ze_result_t WddmFrequencyImp::osFrequencySetRange(const zes_freq_range_t *pLimits) { return setRange(pLimits->min, pLimits->max); } ze_result_t WddmFrequencyImp::osFrequencyGetState(zes_freq_state_t *pState) { uint32_t value = 0; std::vector vRequests = {}; std::vector vResponses = {}; KmdSysman::RequestProperty request = {}; request.commandId = KmdSysman::Command::Get; request.componentId = KmdSysman::Component::FrequencyComponent; request.requestId = KmdSysman::Requests::Frequency::CurrentRequestedFrequency; request.paramInfo = static_cast(this->frequencyDomainNumber); vRequests.push_back(request); request.requestId = KmdSysman::Requests::Frequency::CurrentTdpFrequency; vRequests.push_back(request); request.requestId = KmdSysman::Requests::Frequency::CurrentResolvedFrequency; vRequests.push_back(request); request.requestId = KmdSysman::Requests::Frequency::CurrentEfficientFrequency; vRequests.push_back(request); request.requestId = KmdSysman::Requests::Frequency::CurrentVoltage; vRequests.push_back(request); request.requestId = KmdSysman::Requests::Frequency::CurrentThrottleReasons; vRequests.push_back(request); ze_result_t status = pKmdSysManager->requestMultiple(vRequests, vResponses); if ((status != ZE_RESULT_SUCCESS) || (vResponses.size() != vRequests.size())) { return status; } pState->request = unsupportedProperty; if (vResponses[0].returnCode == KmdSysman::Success) { memcpy_s(&value, sizeof(uint32_t), vResponses[0].dataBuffer, sizeof(uint32_t)); pState->request = static_cast(value); } pState->tdp = unsupportedProperty; if (vResponses[1].returnCode == KmdSysman::Success) { memcpy_s(&value, sizeof(uint32_t), vResponses[1].dataBuffer, sizeof(uint32_t)); pState->tdp = static_cast(value); } pState->actual = unsupportedProperty; if (vResponses[2].returnCode == KmdSysman::Success) { memcpy_s(&value, sizeof(uint32_t), vResponses[2].dataBuffer, sizeof(uint32_t)); pState->actual = static_cast(value); } pState->efficient = unsupportedProperty; if (vResponses[3].returnCode == KmdSysman::Success) { memcpy_s(&value, sizeof(uint32_t), vResponses[3].dataBuffer, sizeof(uint32_t)); pState->efficient = static_cast(value); } pState->currentVoltage = unsupportedProperty; if (vResponses[4].returnCode == KmdSysman::Success) { memcpy_s(&value, sizeof(uint32_t), vResponses[4].dataBuffer, sizeof(uint32_t)); pState->currentVoltage = static_cast(value); pState->currentVoltage /= milliVoltsFactor; } if (vResponses[5].returnCode == KmdSysman::Success) { KmdThrottleReasons value = {0}; pState->throttleReasons = {0}; memcpy_s(&value, sizeof(uint32_t), vResponses[5].dataBuffer, sizeof(uint32_t)); if (value.power3) { pState->throttleReasons |= ZES_FREQ_THROTTLE_REASON_FLAG_AVE_PWR_CAP; } if (value.power4) { pState->throttleReasons |= ZES_FREQ_THROTTLE_REASON_FLAG_BURST_PWR_CAP; } if (value.current1) { pState->throttleReasons |= ZES_FREQ_THROTTLE_REASON_FLAG_CURRENT_LIMIT; } if (value.thermal1 || value.thermal2 || value.thermal3 || value.thermal4) { pState->throttleReasons |= ZES_FREQ_THROTTLE_REASON_FLAG_THERMAL_LIMIT; } } return ZE_RESULT_SUCCESS; } ze_result_t WddmFrequencyImp::osFrequencyGetThrottleTime(zes_freq_throttle_time_t *pThrottleTime) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t WddmFrequencyImp::getOcCapabilities(zes_oc_capabilities_t *pOcCapabilities) { *pOcCapabilities = ocCapabilities; return ZE_RESULT_SUCCESS; } ze_result_t WddmFrequencyImp::getOcFrequencyTarget(double *pCurrentOcFrequency) { ze_result_t status = ZE_RESULT_SUCCESS; uint32_t value = 0; KmdSysman::RequestProperty request; KmdSysman::ResponseProperty response; request.commandId = KmdSysman::Command::Get; request.componentId = KmdSysman::Component::FrequencyComponent; request.requestId = KmdSysman::Requests::Frequency::CurrentFrequencyTarget; request.paramInfo = static_cast(this->frequencyDomainNumber); status = pKmdSysManager->requestSingle(request, response); if (status != ZE_RESULT_SUCCESS) { return status; } memcpy_s(&value, sizeof(uint32_t), response.dataBuffer, sizeof(uint32_t)); *pCurrentOcFrequency = currentFrequencyTarget = static_cast(value); return status; } ze_result_t WddmFrequencyImp::setOcFrequencyTarget(double currentOcFrequency) { this->currentFrequencyTarget = currentOcFrequency; return applyOcSettings(); } ze_result_t WddmFrequencyImp::getOcVoltageTarget(double *pCurrentVoltageTarget, double *pCurrentVoltageOffset) { uint32_t unsignedValue = 0; int32_t signedValue = 0; std::vector vRequests = {}; std::vector vResponses = {}; KmdSysman::RequestProperty request = {}; request.commandId = KmdSysman::Command::Get; request.componentId = KmdSysman::Component::FrequencyComponent; request.requestId = KmdSysman::Requests::Frequency::CurrentVoltageTarget; request.paramInfo = static_cast(this->frequencyDomainNumber); vRequests.push_back(request); request.requestId = KmdSysman::Requests::Frequency::CurrentVoltageOffset; vRequests.push_back(request); ze_result_t status = pKmdSysManager->requestMultiple(vRequests, vResponses); if ((status != ZE_RESULT_SUCCESS) || (vResponses.size() != vRequests.size())) { return status; } if (vResponses[0].returnCode == KmdSysman::Success) { memcpy_s(&unsignedValue, sizeof(uint32_t), vResponses[0].dataBuffer, sizeof(uint32_t)); *pCurrentVoltageTarget = currentVoltageTarget = static_cast(unsignedValue); } if (vResponses[1].returnCode == KmdSysman::Success) { memcpy_s(&signedValue, sizeof(int32_t), vResponses[1].dataBuffer, sizeof(int32_t)); *pCurrentVoltageOffset = currentVoltageOffset = static_cast(signedValue); } return ZE_RESULT_SUCCESS; } ze_result_t WddmFrequencyImp::setOcVoltageTarget(double currentVoltageTarget, double currentVoltageOffset) { this->currentVoltageTarget = currentVoltageTarget; this->currentVoltageOffset = currentVoltageOffset; return applyOcSettings(); } ze_result_t WddmFrequencyImp::getOcMode(zes_oc_mode_t *pCurrentOcMode) { ze_result_t status = ZE_RESULT_SUCCESS; uint32_t value = 0; KmdSysman::RequestProperty request; KmdSysman::ResponseProperty response; request.commandId = KmdSysman::Command::Get; request.componentId = KmdSysman::Component::FrequencyComponent; request.requestId = KmdSysman::Requests::Frequency::CurrentVoltageMode; request.paramInfo = static_cast(this->frequencyDomainNumber); status = pKmdSysManager->requestSingle(request, response); if (status != ZE_RESULT_SUCCESS) { return status; } memcpy_s(&value, sizeof(uint32_t), response.dataBuffer, sizeof(uint32_t)); currentVoltageMode = value ? ZES_OC_MODE_OVERRIDE : ZES_OC_MODE_INTERPOLATIVE; *pCurrentOcMode = currentVoltageMode; return status; } ze_result_t WddmFrequencyImp::setOcMode(zes_oc_mode_t currentOcMode) { if (currentOcMode == ZES_OC_MODE_FIXED) { this->currentVoltageMode = ZES_OC_MODE_INTERPOLATIVE; return ze_result_t::ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } if (currentOcMode == ZES_OC_MODE_OFF) { this->currentVoltageMode = ZES_OC_MODE_INTERPOLATIVE; return applyOcSettings(); } if (currentOcMode == ZES_OC_MODE_INTERPOLATIVE || currentOcMode == ZES_OC_MODE_OVERRIDE) { this->currentVoltageMode = currentOcMode; return applyOcSettings(); } return ZE_RESULT_SUCCESS; } ze_result_t WddmFrequencyImp::getOcIccMax(double *pOcIccMax) { ze_result_t status = ZE_RESULT_SUCCESS; uint32_t value = 0; KmdSysman::RequestProperty request; KmdSysman::ResponseProperty response; request.commandId = KmdSysman::Command::Get; request.componentId = KmdSysman::Component::FrequencyComponent; request.requestId = KmdSysman::Requests::Frequency::CurrentIccMax; request.paramInfo = static_cast(this->frequencyDomainNumber); status = pKmdSysManager->requestSingle(request, response); if (status != ZE_RESULT_SUCCESS) { return status; } memcpy_s(&value, sizeof(uint32_t), response.dataBuffer, sizeof(uint32_t)); *pOcIccMax = static_cast(value); return status; } ze_result_t WddmFrequencyImp::setOcIccMax(double ocIccMax) { uint32_t value = 0; KmdSysman::RequestProperty request; KmdSysman::ResponseProperty response; request.commandId = KmdSysman::Command::Set; request.componentId = KmdSysman::Component::FrequencyComponent; request.requestId = KmdSysman::Requests::Frequency::CurrentIccMax; request.paramInfo = static_cast(this->frequencyDomainNumber); request.dataSize = sizeof(uint32_t); value = static_cast(ocIccMax); memcpy_s(request.dataBuffer, sizeof(uint32_t), &value, sizeof(uint32_t)); return pKmdSysManager->requestSingle(request, response); } ze_result_t WddmFrequencyImp::getOcTjMax(double *pOcTjMax) { ze_result_t status = ZE_RESULT_SUCCESS; uint32_t value = 0; KmdSysman::RequestProperty request; KmdSysman::ResponseProperty response; request.commandId = KmdSysman::Command::Get; request.componentId = KmdSysman::Component::FrequencyComponent; request.requestId = KmdSysman::Requests::Frequency::CurrentTjMax; request.paramInfo = static_cast(this->frequencyDomainNumber); status = pKmdSysManager->requestSingle(request, response); if (status != ZE_RESULT_SUCCESS) { return status; } memcpy_s(&value, sizeof(uint32_t), response.dataBuffer, sizeof(uint32_t)); *pOcTjMax = static_cast(value); return status; } ze_result_t WddmFrequencyImp::setOcTjMax(double ocTjMax) { uint32_t value = 0; KmdSysman::RequestProperty request; KmdSysman::ResponseProperty response; request.commandId = KmdSysman::Command::Set; request.componentId = KmdSysman::Component::FrequencyComponent; request.requestId = KmdSysman::Requests::Frequency::CurrentTjMax; request.paramInfo = static_cast(this->frequencyDomainNumber); request.dataSize = sizeof(uint32_t); value = static_cast(ocTjMax); memcpy_s(request.dataBuffer, sizeof(uint32_t), &value, sizeof(uint32_t)); return pKmdSysManager->requestSingle(request, response); } ze_result_t WddmFrequencyImp::setRange(double min, double max) { uint32_t value = 0; KmdSysman::RequestProperty request; KmdSysman::ResponseProperty response; request.commandId = KmdSysman::Command::Set; request.componentId = KmdSysman::Component::FrequencyComponent; request.requestId = KmdSysman::Requests::Frequency::CurrentFrequencyRange; request.paramInfo = static_cast(this->frequencyDomainNumber); request.dataSize = 2 * sizeof(uint32_t); value = static_cast(min); memcpy_s(request.dataBuffer, sizeof(uint32_t), &value, sizeof(uint32_t)); value = static_cast(max); memcpy_s((request.dataBuffer + sizeof(uint32_t)), sizeof(uint32_t), &value, sizeof(uint32_t)); return pKmdSysManager->requestSingle(request, response); } ze_result_t WddmFrequencyImp::getRange(double *min, double *max) { ze_result_t status = ZE_RESULT_SUCCESS; KmdSysman::RequestProperty request; KmdSysman::ResponseProperty response; request.commandId = KmdSysman::Command::Get; request.componentId = KmdSysman::Component::FrequencyComponent; request.requestId = KmdSysman::Requests::Frequency::CurrentFrequencyRange; request.paramInfo = static_cast(this->frequencyDomainNumber); status = pKmdSysManager->requestSingle(request, response); if (status != ZE_RESULT_SUCCESS) { return status; } uint32_t value = 0; memcpy_s(&value, sizeof(uint32_t), response.dataBuffer, sizeof(uint32_t)); *min = static_cast(value); memcpy_s(&value, sizeof(uint32_t), (response.dataBuffer + sizeof(uint32_t)), sizeof(uint32_t)); *max = static_cast(value); return status; } ze_result_t WddmFrequencyImp::applyOcSettings() { int32_t value = 0; std::vector vRequests = {}; std::vector vResponses = {}; KmdSysman::RequestProperty request = {}; request.commandId = KmdSysman::Command::Set; request.componentId = KmdSysman::Component::FrequencyComponent; request.paramInfo = static_cast(this->frequencyDomainNumber); request.dataSize = sizeof(int32_t); // Fixed mode not supported. request.requestId = KmdSysman::Requests::Frequency::CurrentFixedMode; value = 0; memcpy_s(request.dataBuffer, sizeof(int32_t), &value, sizeof(int32_t)); vRequests.push_back(request); request.requestId = KmdSysman::Requests::Frequency::CurrentVoltageMode; value = (currentVoltageMode == ZES_OC_MODE_OVERRIDE) ? 1 : 0; memcpy_s(request.dataBuffer, sizeof(int32_t), &value, sizeof(int32_t)); vRequests.push_back(request); request.requestId = KmdSysman::Requests::Frequency::CurrentVoltageOffset; value = static_cast(currentVoltageOffset); memcpy_s(request.dataBuffer, sizeof(int32_t), &value, sizeof(int32_t)); vRequests.push_back(request); request.requestId = KmdSysman::Requests::Frequency::CurrentVoltageTarget; value = static_cast(currentVoltageTarget); memcpy_s(request.dataBuffer, sizeof(int32_t), &value, sizeof(int32_t)); vRequests.push_back(request); request.requestId = KmdSysman::Requests::Frequency::CurrentFrequencyTarget; value = static_cast(currentFrequencyTarget); memcpy_s(request.dataBuffer, sizeof(int32_t), &value, sizeof(int32_t)); vRequests.push_back(request); ze_result_t status = pKmdSysManager->requestMultiple(vRequests, vResponses); if ((status != ZE_RESULT_SUCCESS) || (vResponses.size() != vRequests.size())) { return status; } for (uint32_t i = 0; i < vResponses.size(); i++) { if (vResponses[i].returnCode != KmdSysman::ReturnCodes::Success) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } } return status; } void WddmFrequencyImp::readOverclockingInfo() { uint32_t value = 0; std::vector vRequests = {}; std::vector vResponses = {}; KmdSysman::RequestProperty request = {}; request.commandId = KmdSysman::Command::Get; request.componentId = KmdSysman::Component::FrequencyComponent; request.paramInfo = static_cast(this->frequencyDomainNumber); request.requestId = KmdSysman::Requests::Frequency::ExtendedOcSupported; vRequests.push_back(request); request.requestId = KmdSysman::Requests::Frequency::FixedModeSupported; vRequests.push_back(request); request.requestId = KmdSysman::Requests::Frequency::HighVoltageModeSupported; vRequests.push_back(request); request.requestId = KmdSysman::Requests::Frequency::HighVoltageEnabled; vRequests.push_back(request); request.requestId = KmdSysman::Requests::Frequency::CurrentIccMax; vRequests.push_back(request); request.requestId = KmdSysman::Requests::Frequency::FrequencyOcSupported; vRequests.push_back(request); request.requestId = KmdSysman::Requests::Frequency::CurrentTjMax; vRequests.push_back(request); request.requestId = KmdSysman::Requests::Frequency::MaxNonOcFrequencyDefault; vRequests.push_back(request); request.requestId = KmdSysman::Requests::Frequency::MaxNonOcVoltageDefault; vRequests.push_back(request); request.requestId = KmdSysman::Requests::Frequency::MaxOcFrequencyDefault; vRequests.push_back(request); request.requestId = KmdSysman::Requests::Frequency::MaxOcVoltageDefault; vRequests.push_back(request); request.requestId = KmdSysman::Requests::Frequency::CurrentFrequencyTarget; vRequests.push_back(request); request.requestId = KmdSysman::Requests::Frequency::CurrentVoltageTarget; vRequests.push_back(request); request.requestId = KmdSysman::Requests::Frequency::CurrentVoltageOffset; vRequests.push_back(request); request.requestId = KmdSysman::Requests::Frequency::CurrentVoltageMode; vRequests.push_back(request); ze_result_t status = pKmdSysManager->requestMultiple(vRequests, vResponses); if ((status != ZE_RESULT_SUCCESS) || (vResponses.size() != vRequests.size())) { return; } if (vResponses[0].returnCode == KmdSysman::Success) { memcpy_s(&value, sizeof(uint32_t), vResponses[0].dataBuffer, sizeof(uint32_t)); ocCapabilities.isExtendedModeSupported = static_cast(value); } if (vResponses[1].returnCode == KmdSysman::Success) { memcpy_s(&value, sizeof(uint32_t), vResponses[1].dataBuffer, sizeof(uint32_t)); ocCapabilities.isFixedModeSupported = static_cast(value); } if (vResponses[2].returnCode == KmdSysman::Success) { memcpy_s(&value, sizeof(uint32_t), vResponses[2].dataBuffer, sizeof(uint32_t)); ocCapabilities.isHighVoltModeCapable = static_cast(value); } if (vResponses[3].returnCode == KmdSysman::Success) { memcpy_s(&value, sizeof(uint32_t), vResponses[3].dataBuffer, sizeof(uint32_t)); ocCapabilities.isHighVoltModeEnabled = static_cast(value); } if (vResponses[4].returnCode == KmdSysman::Success) { memcpy_s(&value, sizeof(uint32_t), vResponses[4].dataBuffer, sizeof(uint32_t)); ocCapabilities.isIccMaxSupported = static_cast(value > 0); } if (vResponses[5].returnCode == KmdSysman::Success) { memcpy_s(&value, sizeof(uint32_t), vResponses[5].dataBuffer, sizeof(uint32_t)); ocCapabilities.isOcSupported = static_cast(value); } if (vResponses[6].returnCode == KmdSysman::Success) { memcpy_s(&value, sizeof(uint32_t), vResponses[6].dataBuffer, sizeof(uint32_t)); ocCapabilities.isTjMaxSupported = static_cast(value > 0); } if (vResponses[7].returnCode == KmdSysman::Success) { memcpy_s(&value, sizeof(uint32_t), vResponses[7].dataBuffer, sizeof(uint32_t)); ocCapabilities.maxFactoryDefaultFrequency = static_cast(value); } if (vResponses[8].returnCode == KmdSysman::Success) { memcpy_s(&value, sizeof(uint32_t), vResponses[8].dataBuffer, sizeof(uint32_t)); ocCapabilities.maxFactoryDefaultVoltage = static_cast(value); } if (vResponses[9].returnCode == KmdSysman::Success) { memcpy_s(&value, sizeof(uint32_t), vResponses[9].dataBuffer, sizeof(uint32_t)); ocCapabilities.maxOcFrequency = static_cast(value); } if (vResponses[10].returnCode == KmdSysman::Success) { memcpy_s(&value, sizeof(uint32_t), vResponses[10].dataBuffer, sizeof(uint32_t)); ocCapabilities.maxOcVoltage = static_cast(value); } if (vResponses[11].returnCode == KmdSysman::Success) { memcpy_s(&value, sizeof(uint32_t), vResponses[11].dataBuffer, sizeof(uint32_t)); currentFrequencyTarget = static_cast(value); } if (vResponses[12].returnCode == KmdSysman::Success) { memcpy_s(&value, sizeof(uint32_t), vResponses[12].dataBuffer, sizeof(uint32_t)); currentVoltageTarget = static_cast(value); } if (vResponses[13].returnCode == KmdSysman::Success) { memcpy_s(&value, sizeof(uint32_t), vResponses[13].dataBuffer, sizeof(uint32_t)); currentVoltageOffset = static_cast(value); } if (vResponses[14].returnCode == KmdSysman::Success) { memcpy_s(&value, sizeof(uint32_t), vResponses[14].dataBuffer, sizeof(uint32_t)); currentVoltageMode = value ? ZES_OC_MODE_OVERRIDE : ZES_OC_MODE_INTERPOLATIVE; } } WddmFrequencyImp::WddmFrequencyImp(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId, zes_freq_domain_t frequencyDomainNumber) { WddmSysmanImp *pWddmSysmanImp = static_cast(pOsSysman); this->frequencyDomainNumber = frequencyDomainNumber; pKmdSysManager = &pWddmSysmanImp->getKmdSysManager(); } OsFrequency *OsFrequency::create(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId, zes_freq_domain_t frequencyDomainNumber) { WddmFrequencyImp *pWddmFrequencyImp = new WddmFrequencyImp(pOsSysman, onSubdevice, subdeviceId, frequencyDomainNumber); return static_cast(pWddmFrequencyImp); } uint16_t OsFrequency::getNumberOfFreqDoainsSupported(OsSysman *pOsSysman) { WddmSysmanImp *pWddmSysmanImp = static_cast(pOsSysman); KmdSysManager *pKmdSysManager = &pWddmSysmanImp->getKmdSysManager(); KmdSysman::RequestProperty request; KmdSysman::ResponseProperty response; request.commandId = KmdSysman::Command::Get; request.componentId = KmdSysman::Component::FrequencyComponent; request.requestId = KmdSysman::Requests::Frequency::NumFrequencyDomains; ze_result_t status = pKmdSysManager->requestSingle(request, response); if (status != ZE_RESULT_SUCCESS) { return 0; } uint32_t maxNumEnginesSupported = 0; memcpy_s(&maxNumEnginesSupported, sizeof(uint32_t), response.dataBuffer, sizeof(uint32_t)); return static_cast(maxNumEnginesSupported); } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/frequency/windows/os_frequency_imp.h000066400000000000000000000065331422164147700327140ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "sysman/frequency/os_frequency.h" #include "sysman/windows/os_sysman_imp.h" #define KMD_BIT_RANGE(endbit, startbit) ((endbit) - (startbit) + 1) namespace L0 { struct KmdThrottleReasons { union { uint32_t bitfield; struct { uint32_t thermal1 : KMD_BIT_RANGE(0, 0); uint32_t thermal2 : KMD_BIT_RANGE(1, 1); uint32_t reserved1 : KMD_BIT_RANGE(3, 2); uint32_t power1 : KMD_BIT_RANGE(4, 4); uint32_t power2 : KMD_BIT_RANGE(5, 5); uint32_t thermal3 : KMD_BIT_RANGE(6, 6); uint32_t thermal4 : KMD_BIT_RANGE(7, 7); uint32_t current1 : KMD_BIT_RANGE(8, 8); uint32_t reserved2 : KMD_BIT_RANGE(9, 9); uint32_t power3 : KMD_BIT_RANGE(10, 10); uint32_t power4 : KMD_BIT_RANGE(11, 11); uint32_t inefficient1 : KMD_BIT_RANGE(12, 12); uint32_t reserved3 : KMD_BIT_RANGE(13, 13); uint32_t inefficient2 : KMD_BIT_RANGE(14, 14); uint32_t reserved4 : KMD_BIT_RANGE(31, 15); }; }; }; class KmdSysManager; class WddmFrequencyImp : public OsFrequency, NEO::NonCopyableOrMovableClass { public: ze_result_t osFrequencyGetProperties(zes_freq_properties_t &properties) override; double osFrequencyGetStepSize() override; ze_result_t osFrequencyGetRange(zes_freq_range_t *pLimits) override; ze_result_t osFrequencySetRange(const zes_freq_range_t *pLimits) override; ze_result_t osFrequencyGetState(zes_freq_state_t *pState) override; ze_result_t osFrequencyGetThrottleTime(zes_freq_throttle_time_t *pThrottleTime) override; ze_result_t getOcCapabilities(zes_oc_capabilities_t *pOcCapabilities) override; ze_result_t getOcFrequencyTarget(double *pCurrentOcFrequency) override; ze_result_t setOcFrequencyTarget(double currentOcFrequency) override; ze_result_t getOcVoltageTarget(double *pCurrentVoltageTarget, double *pCurrentVoltageOffset) override; ze_result_t setOcVoltageTarget(double currentVoltageTarget, double currentVoltageOffset) override; ze_result_t getOcMode(zes_oc_mode_t *pCurrentOcMode) override; ze_result_t setOcMode(zes_oc_mode_t currentOcMode) override; ze_result_t getOcIccMax(double *pOcIccMax) override; ze_result_t setOcIccMax(double ocIccMax) override; ze_result_t getOcTjMax(double *pOcTjMax) override; ze_result_t setOcTjMax(double ocTjMax) override; WddmFrequencyImp(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId, zes_freq_domain_t type); WddmFrequencyImp() = default; ~WddmFrequencyImp() override = default; private: ze_result_t setRange(double min, double max); ze_result_t getRange(double *min, double *max); void readOverclockingInfo(); ze_result_t applyOcSettings(); double minRangeFreq = -1.0; double maxRangeFreq = -1.0; zes_oc_capabilities_t ocCapabilities = {}; zes_oc_mode_t currentVoltageMode = ZES_OC_MODE_OFF; double currentFrequencyTarget = -1.0; double currentVoltageTarget = -1.0; double currentVoltageOffset = -1.0; protected: KmdSysManager *pKmdSysManager = nullptr; zes_freq_domain_t frequencyDomainNumber = ZES_FREQ_DOMAIN_GPU; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/global_operations/000077500000000000000000000000001422164147700271755ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/global_operations/CMakeLists.txt000066400000000000000000000015441422164147700317410ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_GLOBAL_OPERATIONS ${CMAKE_CURRENT_SOURCE_DIR}/global_operations.h ${CMAKE_CURRENT_SOURCE_DIR}/global_operations_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/global_operations_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/os_global_operations.h ) target_include_directories(${L0_STATIC_LIB_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/ ) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_GLOBAL_OPERATIONS} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) if(UNIX) add_subdirectory(linux) else() add_subdirectory(windows) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_GLOBAL_OPERATIONS ${L0_SRCS_TOOLS_GLOBAL_OPERATIONS}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/global_operations/global_operations.h000066400000000000000000000011031422164147700330440ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include namespace L0 { class GlobalOperations { public: virtual ~GlobalOperations(){}; virtual ze_result_t reset(ze_bool_t force) = 0; virtual ze_result_t deviceGetProperties(zes_device_properties_t *pProperties) = 0; virtual ze_result_t processesGetState(uint32_t *pCount, zes_process_state_t *pProcesses) = 0; virtual ze_result_t deviceGetState(zes_device_state_t *pState) = 0; virtual void init() = 0; }; } // namespace L0 global_operations_imp.cpp000066400000000000000000000053071422164147700341770ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/global_operations/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "global_operations_imp.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/debug_helpers.h" #include "level_zero/core/source/device/device.h" namespace L0 { ze_result_t GlobalOperationsImp::processesGetState(uint32_t *pCount, zes_process_state_t *pProcesses) { std::vector pProcessList; ze_result_t result = pOsGlobalOperations->scanProcessesState(pProcessList); if (result != ZE_RESULT_SUCCESS) { return result; } if ((*pCount > 0) && (*pCount < pProcessList.size())) { result = ZE_RESULT_ERROR_INVALID_SIZE; } if (pProcesses != nullptr) { uint32_t limit = std::min(*pCount, static_cast(pProcessList.size())); for (uint32_t i = 0; i < limit; i++) { pProcesses[i].processId = pProcessList[i].processId; pProcesses[i].engines = pProcessList[i].engines; pProcesses[i].memSize = pProcessList[i].memSize; pProcesses[i].sharedSize = pProcessList[i].sharedSize; } } *pCount = static_cast(pProcessList.size()); return result; } ze_result_t GlobalOperationsImp::deviceGetProperties(zes_device_properties_t *pProperties) { Device *device = pOsGlobalOperations->getDevice(); ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; device->getProperties(&deviceProperties); sysmanProperties.core = deviceProperties; uint32_t count = 0; device->getSubDevices(&count, nullptr); sysmanProperties.numSubdevices = count; *pProperties = sysmanProperties; return ZE_RESULT_SUCCESS; } ze_result_t GlobalOperationsImp::reset(ze_bool_t force) { return pOsGlobalOperations->reset(force); } ze_result_t GlobalOperationsImp::deviceGetState(zes_device_state_t *pState) { return pOsGlobalOperations->deviceGetState(pState); } void GlobalOperationsImp::init() { if (pOsGlobalOperations == nullptr) { pOsGlobalOperations = OsGlobalOperations::create(pOsSysman); } UNRECOVERABLE_IF(nullptr == pOsGlobalOperations); pOsGlobalOperations->getVendorName(sysmanProperties.vendorName); pOsGlobalOperations->getDriverVersion(sysmanProperties.driverVersion); pOsGlobalOperations->getModelName(sysmanProperties.modelName); pOsGlobalOperations->getBrandName(sysmanProperties.brandName); pOsGlobalOperations->getBoardNumber(sysmanProperties.boardNumber); pOsGlobalOperations->getSerialNumber(sysmanProperties.serialNumber); } GlobalOperationsImp::~GlobalOperationsImp() { if (nullptr != pOsGlobalOperations) { delete pOsGlobalOperations; } } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/global_operations/global_operations_imp.h000066400000000000000000000020121422164147700337110ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include #include "global_operations.h" #include "os_global_operations.h" #include namespace L0 { class GlobalOperationsImp : public GlobalOperations, NEO::NonCopyableOrMovableClass { public: void init() override; ze_result_t reset(ze_bool_t force) override; ze_result_t deviceGetProperties(zes_device_properties_t *pProperties) override; ze_result_t processesGetState(uint32_t *pCount, zes_process_state_t *pProcesses) override; ze_result_t deviceGetState(zes_device_state_t *pState) override; OsGlobalOperations *pOsGlobalOperations = nullptr; GlobalOperationsImp() = default; GlobalOperationsImp(OsSysman *pOsSysman) : pOsSysman(pOsSysman){}; ~GlobalOperationsImp() override; private: OsSysman *pOsSysman = nullptr; zes_device_properties_t sysmanProperties = {}; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/global_operations/linux/000077500000000000000000000000001422164147700303345ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/global_operations/linux/CMakeLists.txt000066400000000000000000000015771422164147700331060ustar00rootroot00000000000000# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_GLOBAL_OPERATIONS_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/os_global_operations_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_global_operations_imp.h ) if(NEO_ENABLE_i915_PRELIM_DETECTION) list(APPEND L0_SRCS_TOOLS_GLOBAL_OPERATIONS_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/os_global_operations_helper_prelim.cpp ) else() list(APPEND L0_SRCS_TOOLS_GLOBAL_OPERATIONS_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/os_global_operations_helper.cpp ) endif() if(UNIX) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_GLOBAL_OPERATIONS_LINUX} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_GLOBAL_OPERATIONS_LINUX ${L0_SRCS_TOOLS_GLOBAL_OPERATIONS_LINUX}) os_global_operations_helper.cpp000066400000000000000000000004421422164147700365240ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/global_operations/linux/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/global_operations/linux/os_global_operations_imp.h" namespace L0 { void LinuxGlobalOperationsImp::getRepairStatus(zes_device_state_t *pState) {} } // namespace L0os_global_operations_helper_prelim.cpp000066400000000000000000000017441422164147700401020ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/global_operations/linux/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/global_operations/linux/os_global_operations_imp.h" namespace L0 { void LinuxGlobalOperationsImp::getRepairStatus(zes_device_state_t *pState) { bool ifrStatus = false; if (IGFX_PVC == pLinuxSysmanImp->getProductFamily()) { auto pFwInterface = pLinuxSysmanImp->getFwUtilInterface(); if (pFwInterface != nullptr) { if (ZE_RESULT_SUCCESS == pFwInterface->fwDeviceInit()) { auto result = pFwInterface->fwIfrApplied(ifrStatus); if (result == ZE_RESULT_SUCCESS) { pState->repaired = ZES_REPAIR_STATUS_NOT_PERFORMED; if (ifrStatus) { pState->reset |= ZES_RESET_REASON_FLAG_REPAIR; pState->repaired = ZES_REPAIR_STATUS_PERFORMED; } } } } } } } // namespace L0 os_global_operations_imp.cpp000066400000000000000000000427421422164147700360430ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/global_operations/linux/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/global_operations/linux/os_global_operations_imp.h" #include "shared/source/os_interface/device_factory.h" #include "level_zero/core/source/device/device_imp.h" #include "level_zero/tools/source/sysman/global_operations/global_operations_imp.h" #include "level_zero/tools/source/sysman/linux/fs_access.h" #include "level_zero/tools/source/sysman/sysman_const.h" #include #include #include namespace L0 { const std::string LinuxGlobalOperationsImp::deviceDir("device"); const std::string LinuxGlobalOperationsImp::subsystemVendorFile("device/subsystem_vendor"); const std::string LinuxGlobalOperationsImp::driverFile("device/driver"); const std::string LinuxGlobalOperationsImp::functionLevelReset("device/reset"); const std::string LinuxGlobalOperationsImp::clientsDir("clients"); const std::string LinuxGlobalOperationsImp::srcVersionFile("/sys/module/i915/srcversion"); const std::string LinuxGlobalOperationsImp::agamaVersionFile("/sys/module/i915/agama_version"); const std::string LinuxGlobalOperationsImp::ueventWedgedFile("/var/lib/libze_intel_gpu/wedged_file"); // Map engine entries(numeric values) present in /sys/class/drm/card/clients//busy, // with engine enum defined in leve-zero spec // Note that entries with int 2 and 3(represented by i915 as CLASS_VIDEO and CLASS_VIDEO_ENHANCE) // are both mapped to MEDIA, as CLASS_VIDEO represents any media fixed-function hardware. static const std::map engineMap = { {0, ZES_ENGINE_TYPE_FLAG_3D}, {1, ZES_ENGINE_TYPE_FLAG_DMA}, {2, ZES_ENGINE_TYPE_FLAG_MEDIA}, {3, ZES_ENGINE_TYPE_FLAG_MEDIA}, {4, ZES_ENGINE_TYPE_FLAG_COMPUTE}}; void LinuxGlobalOperationsImp::getSerialNumber(char (&serialNumber)[ZES_STRING_PROPERTY_SIZE]) { std::strncpy(serialNumber, unknown.c_str(), ZES_STRING_PROPERTY_SIZE); } Device *LinuxGlobalOperationsImp::getDevice() { return pDevice; } void LinuxGlobalOperationsImp::getBoardNumber(char (&boardNumber)[ZES_STRING_PROPERTY_SIZE]) { std::strncpy(boardNumber, unknown.c_str(), ZES_STRING_PROPERTY_SIZE); } void LinuxGlobalOperationsImp::getBrandName(char (&brandName)[ZES_STRING_PROPERTY_SIZE]) { std::string strVal; ze_result_t result = pSysfsAccess->read(subsystemVendorFile, strVal); if (ZE_RESULT_SUCCESS != result) { std::strncpy(brandName, unknown.c_str(), ZES_STRING_PROPERTY_SIZE); return; } if (strVal.compare(intelPciId) == 0) { std::strncpy(brandName, vendorIntel.c_str(), ZES_STRING_PROPERTY_SIZE); } else { std::strncpy(brandName, unknown.c_str(), ZES_STRING_PROPERTY_SIZE); } } void LinuxGlobalOperationsImp::getModelName(char (&modelName)[ZES_STRING_PROPERTY_SIZE]) { NEO::Device *neoDevice = pDevice->getNEODevice(); std::string deviceModelName = neoDevice->getDeviceName(neoDevice->getHardwareInfo()); std::strncpy(modelName, deviceModelName.c_str(), ZES_STRING_PROPERTY_SIZE); } void LinuxGlobalOperationsImp::getVendorName(char (&vendorName)[ZES_STRING_PROPERTY_SIZE]) { ze_device_properties_t coreDeviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; pDevice->getProperties(&coreDeviceProperties); std::stringstream pciId; pciId << std::hex << coreDeviceProperties.vendorId; if (("0x" + pciId.str()).compare(intelPciId) == 0) { std::strncpy(vendorName, vendorIntel.c_str(), ZES_STRING_PROPERTY_SIZE); } else { std::strncpy(vendorName, unknown.c_str(), ZES_STRING_PROPERTY_SIZE); } } void LinuxGlobalOperationsImp::getDriverVersion(char (&driverVersion)[ZES_STRING_PROPERTY_SIZE]) { std::string strVal; std::strncpy(driverVersion, unknown.c_str(), ZES_STRING_PROPERTY_SIZE); ze_result_t result = pFsAccess->read(agamaVersionFile, strVal); if (ZE_RESULT_SUCCESS != result) { if (ZE_RESULT_ERROR_NOT_AVAILABLE != result) { return; } result = pFsAccess->read(srcVersionFile, strVal); if (ZE_RESULT_SUCCESS != result) { return; } } std::strncpy(driverVersion, strVal.c_str(), ZES_STRING_PROPERTY_SIZE); return; } ze_result_t LinuxGlobalOperationsImp::reset(ze_bool_t force) { if (!pSysfsAccess->isRootUser()) { return ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS; } std::string resetPath; std::string resetName; ze_result_t result = ZE_RESULT_SUCCESS; ::pid_t myPid = pProcfsAccess->myProcessId(); std::vector myPidFds; std::vector<::pid_t> processes; result = pProcfsAccess->listProcesses(processes); if (ZE_RESULT_SUCCESS != result) { return result; } for (auto &&pid : processes) { std::vector fds; pLinuxSysmanImp->getPidFdsForOpenDevice(pProcfsAccess, pSysfsAccess, pid, fds); if (pid == myPid) { // L0 is expected to have this file open. // Keep list of fds. Close before unbind. myPidFds = fds; } else if (!fds.empty()) { if (force) { pProcfsAccess->kill(pid); } else { // Device is in use by another process. // Don't reset while in use. return ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE; } } } pSysfsAccess->getRealPath(deviceDir, resetName); resetName = pFsAccess->getBaseName(resetName); ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; pDevice->getProperties(&deviceProperties); if (!(deviceProperties.flags & ZE_DEVICE_PROPERTY_FLAG_INTEGRATED)) { result = pSysfsAccess->unbindDevice(resetName); if (ZE_RESULT_SUCCESS != result) { return result; } return pLinuxSysmanImp->osWarmReset(); } pSysfsAccess->getRealPath(functionLevelReset, resetPath); // Must run as root. Verify permission to perform reset. result = pFsAccess->canWrite(resetPath); if (ZE_RESULT_SUCCESS != result) { return result; } ExecutionEnvironmentRefCountRestore restorer(executionEnvironment); pLinuxSysmanImp->releaseDeviceResources(); for (auto &&fd : myPidFds) { // Close open filedescriptors to the device // before unbinding device. // From this point forward, there is no // graceful way to fail the reset call. // All future ze calls by this process for this // device will fail. ::close(fd); } // Unbind the device from the kernel driver. result = pSysfsAccess->unbindDevice(resetName); if (ZE_RESULT_SUCCESS != result) { return result; } // If someone opened the device // after we check, kill them here. result = pProcfsAccess->listProcesses(processes); if (ZE_RESULT_SUCCESS != result) { return result; } std::vector<::pid_t> deviceUsingPids; deviceUsingPids.clear(); for (auto &&pid : processes) { std::vector fds; pLinuxSysmanImp->getPidFdsForOpenDevice(pProcfsAccess, pSysfsAccess, pid, fds); if (!fds.empty()) { // Kill all processes that have the device open. pProcfsAccess->kill(pid); deviceUsingPids.push_back(pid); } } // Wait for all the processes to exit // If they don't all exit within resetTimeout // just fail reset. auto start = std::chrono::steady_clock::now(); auto end = start; for (auto &&pid : deviceUsingPids) { while (pProcfsAccess->isAlive(pid)) { if (std::chrono::duration_cast(end - start).count() > resetTimeout) { return ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE; } struct ::timespec timeout = {.tv_sec = 0, .tv_nsec = 1000}; ::nanosleep(&timeout, NULL); end = std::chrono::steady_clock::now(); } } // Reset the device. result = pFsAccess->write(resetPath, "1"); if (ZE_RESULT_SUCCESS != result) { return result; } // Rebind the device to the kernel driver. result = pSysfsAccess->bindDevice(resetName); if (ZE_RESULT_SUCCESS != result) { return result; } return pLinuxSysmanImp->initDevice(); } // Processes in the form of clients are present in sysfs like this: // # /sys/class/drm/card0/clients$ ls // 4 5 // # /sys/class/drm/card0/clients/4$ ls // busy name pid // # /sys/class/drm/card0/clients/4/busy$ ls // 0 1 2 3 // // Number of processes(If one process opened drm device multiple times, then multiple entries will be // present for same process in clients directory) will be the number of clients // (For example from above example, processes dirs are 4,5) // Thus total number of times drm connection opened with this device will be 2. // process.pid = pid (from above example) // process.engines -> For each client's busy dir, numbers 0,1,2,3 represent engines and they contain // accumulated nanoseconds each client spent on engines. // Thus we traverse each file in busy dir for non-zero time and if we find that file say 0,then we could say that // this engine 0 is used by process. ze_result_t LinuxGlobalOperationsImp::scanProcessesState(std::vector &pProcessList) { std::vector clientIds; struct deviceMemStruct { uint64_t deviceMemorySize; uint64_t deviceSharedMemorySize; }; struct engineMemoryPairType { int64_t engineTypeField; deviceMemStruct deviceMemStructField; }; ze_result_t result = pSysfsAccess->scanDirEntries(clientsDir, clientIds); if (ZE_RESULT_SUCCESS != result) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } // Create a map with unique pid as key and engineType as value std::map pidClientMap; for (const auto &clientId : clientIds) { // realClientPidPath will be something like: clients//pid std::string realClientPidPath = clientsDir + "/" + clientId + "/" + "pid"; uint64_t pid; result = pSysfsAccess->read(realClientPidPath, pid); if (ZE_RESULT_SUCCESS != result) { std::string bPidString; result = pSysfsAccess->read(realClientPidPath, bPidString); if (result == ZE_RESULT_SUCCESS) { size_t start = bPidString.find("<"); size_t end = bPidString.find(">"); std::string bPid = bPidString.substr(start + 1, end - start - 1); pid = std::stoull(bPid, nullptr, 10); } } if (ZE_RESULT_SUCCESS != result) { if (ZE_RESULT_ERROR_NOT_AVAILABLE == result) { // update the result as Success as ZE_RESULT_ERROR_NOT_AVAILABLE is expected if the "realClientPidPath" folder is empty // this condition(when encountered) must not prevent the information accumulated for other clientIds // this situation occurs when there is no call modifying result, result = ZE_RESULT_SUCCESS; continue; } else { return result; } } // Traverse the clients//busy directory to get accelerator engines used by process std::vector engineNums = {}; int64_t engineType = 0; std::string busyDirForEngines = clientsDir + "/" + clientId + "/" + "busy"; result = pSysfsAccess->scanDirEntries(busyDirForEngines, engineNums); if (ZE_RESULT_SUCCESS != result) { if (ZE_RESULT_ERROR_NOT_AVAILABLE == result) { // update the result as Success as ZE_RESULT_ERROR_NOT_AVAILABLE is expected if the "realClientPidPath" folder is empty // this condition(when encountered) must not prevent the information accumulated for other clientIds // this situation occurs when there is no call modifying result, // Here its seen when the last element of clientIds returns ZE_RESULT_ERROR_NOT_AVAILABLE for some reason. engineType = ZES_ENGINE_TYPE_FLAG_OTHER; // When busy node is absent assign engine type with ZES_ENGINE_TYPE_FLAG_OTHER } else { return result; } } // Scan all engine files present in /sys/class/drm/card0/clients//busy and check // whether that engine is used by process for (const auto &engineNum : engineNums) { uint64_t timeSpent = 0; std::string engine = busyDirForEngines + "/" + engineNum; result = pSysfsAccess->read(engine, timeSpent); if (ZE_RESULT_SUCCESS != result) { if (ZE_RESULT_ERROR_NOT_AVAILABLE == result) { continue; } else { return result; } } if (timeSpent > 0) { int i915EnginNumber = stoi(engineNum); auto i915MapToL0EngineType = engineMap.find(i915EnginNumber); zes_engine_type_flags_t val = ZES_ENGINE_TYPE_FLAG_OTHER; if (i915MapToL0EngineType != engineMap.end()) { // Found a valid map val = i915MapToL0EngineType->second; } // In this for loop we want to retrieve the overall engines used by process engineType = engineType | val; } } uint64_t memSize = 0; std::string realClientTotalMemoryPath = clientsDir + "/" + clientId + "/" + "total_device_memory_buffer_objects" + "/" + "created_bytes"; result = pSysfsAccess->read(realClientTotalMemoryPath, memSize); if (ZE_RESULT_SUCCESS != result) { if (ZE_RESULT_ERROR_NOT_AVAILABLE != result) { return result; } } uint64_t sharedMemSize = 0; std::string realClientTotalSharedMemoryPath = clientsDir + "/" + clientId + "/" + "total_device_memory_buffer_objects" + "/" + "imported_bytes"; result = pSysfsAccess->read(realClientTotalSharedMemoryPath, sharedMemSize); if (ZE_RESULT_SUCCESS != result) { if (ZE_RESULT_ERROR_NOT_AVAILABLE != result) { return result; } } deviceMemStruct totalDeviceMem = {memSize, sharedMemSize}; engineMemoryPairType engineMemoryPair = {engineType, totalDeviceMem}; auto ret = pidClientMap.insert(std::make_pair(pid, engineMemoryPair)); if (ret.second == false) { // insertion failed as entry with same pid already exists in map // Now update the engineMemoryPairType field for the existing pid entry engineMemoryPairType updateEngineMemoryPair; auto pidEntryFromMap = pidClientMap.find(pid); auto existingEngineType = pidEntryFromMap->second.engineTypeField; auto existingdeviceMemorySize = pidEntryFromMap->second.deviceMemStructField.deviceMemorySize; auto existingdeviceSharedMemorySize = pidEntryFromMap->second.deviceMemStructField.deviceSharedMemorySize; updateEngineMemoryPair.engineTypeField = existingEngineType | engineMemoryPair.engineTypeField; updateEngineMemoryPair.deviceMemStructField.deviceMemorySize = existingdeviceMemorySize + engineMemoryPair.deviceMemStructField.deviceMemorySize; updateEngineMemoryPair.deviceMemStructField.deviceSharedMemorySize = existingdeviceSharedMemorySize + engineMemoryPair.deviceMemStructField.deviceSharedMemorySize; pidClientMap[pid] = updateEngineMemoryPair; } result = ZE_RESULT_SUCCESS; } // iterate through all elements of pidClientMap for (auto itr = pidClientMap.begin(); itr != pidClientMap.end(); ++itr) { zes_process_state_t process; process.processId = static_cast(itr->first); process.memSize = itr->second.deviceMemStructField.deviceMemorySize; process.sharedSize = itr->second.deviceMemStructField.deviceSharedMemorySize; process.engines = static_cast(itr->second.engineTypeField); pProcessList.push_back(process); } return result; } void LinuxGlobalOperationsImp::getWedgedStatus(zes_device_state_t *pState) { uint32_t valWedged = 0; if (ZE_RESULT_SUCCESS == pFsAccess->read(ueventWedgedFile, valWedged)) { if (valWedged != 0) { pState->reset |= ZES_RESET_REASON_FLAG_WEDGED; } } } ze_result_t LinuxGlobalOperationsImp::deviceGetState(zes_device_state_t *pState) { memset(pState, 0, sizeof(zes_device_state_t)); pState->repaired = ZES_REPAIR_STATUS_UNSUPPORTED; getWedgedStatus(pState); getRepairStatus(pState); return ZE_RESULT_SUCCESS; } LinuxGlobalOperationsImp::LinuxGlobalOperationsImp(OsSysman *pOsSysman) { pLinuxSysmanImp = static_cast(pOsSysman); pSysfsAccess = &pLinuxSysmanImp->getSysfsAccess(); pProcfsAccess = &pLinuxSysmanImp->getProcfsAccess(); pFsAccess = &pLinuxSysmanImp->getFsAccess(); pDevice = pLinuxSysmanImp->getDeviceHandle(); auto device = static_cast(pDevice); devicePciBdf = device->getNEODevice()->getRootDeviceEnvironment().osInterface->getDriverModel()->as()->getPciPath(); executionEnvironment = device->getNEODevice()->getExecutionEnvironment(); rootDeviceIndex = device->getNEODevice()->getRootDeviceIndex(); } OsGlobalOperations *OsGlobalOperations::create(OsSysman *pOsSysman) { LinuxGlobalOperationsImp *pLinuxGlobalOperationsImp = new LinuxGlobalOperationsImp(pOsSysman); return static_cast(pLinuxGlobalOperationsImp); } } // namespace L0 os_global_operations_imp.h000066400000000000000000000044401422164147700355010ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/global_operations/linux/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/tools/source/sysman/global_operations/os_global_operations.h" #include "level_zero/tools/source/sysman/linux/os_sysman_imp.h" namespace L0 { class SysfsAccess; struct Device; class LinuxGlobalOperationsImp : public OsGlobalOperations, NEO::NonCopyableOrMovableClass { public: void getSerialNumber(char (&serialNumber)[ZES_STRING_PROPERTY_SIZE]) override; void getBoardNumber(char (&boardNumber)[ZES_STRING_PROPERTY_SIZE]) override; void getBrandName(char (&brandName)[ZES_STRING_PROPERTY_SIZE]) override; void getModelName(char (&modelName)[ZES_STRING_PROPERTY_SIZE]) override; void getVendorName(char (&vendorName)[ZES_STRING_PROPERTY_SIZE]) override; void getDriverVersion(char (&driverVersion)[ZES_STRING_PROPERTY_SIZE]) override; void getWedgedStatus(zes_device_state_t *pState) override; void getRepairStatus(zes_device_state_t *pState) override; Device *getDevice() override; ze_result_t reset(ze_bool_t force) override; ze_result_t scanProcessesState(std::vector &pProcessList) override; ze_result_t deviceGetState(zes_device_state_t *pState) override; LinuxGlobalOperationsImp() = default; LinuxGlobalOperationsImp(OsSysman *pOsSysman); ~LinuxGlobalOperationsImp() override = default; protected: FsAccess *pFsAccess = nullptr; ProcfsAccess *pProcfsAccess = nullptr; SysfsAccess *pSysfsAccess = nullptr; LinuxSysmanImp *pLinuxSysmanImp = nullptr; Device *pDevice = nullptr; int resetTimeout = 10000; // in milliseconds void releaseSysmanDeviceResources(); void releaseDeviceResources(); ze_result_t initDevice(); void reInitSysmanDeviceResources(); private: static const std::string deviceDir; static const std::string subsystemVendorFile; static const std::string driverFile; static const std::string functionLevelReset; static const std::string clientsDir; static const std::string srcVersionFile; static const std::string agamaVersionFile; static const std::string ueventWedgedFile; std::string devicePciBdf = ""; NEO::ExecutionEnvironment *executionEnvironment = nullptr; uint32_t rootDeviceIndex = 0u; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/global_operations/os_global_operations.h000066400000000000000000000024741422164147700335610ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/device/device.h" #include "level_zero/tools/source/sysman/os_sysman.h" #include #include #include namespace L0 { class OsGlobalOperations { public: virtual void getSerialNumber(char (&serialNumber)[ZES_STRING_PROPERTY_SIZE]) = 0; virtual void getBoardNumber(char (&boardNumber)[ZES_STRING_PROPERTY_SIZE]) = 0; virtual void getBrandName(char (&brandName)[ZES_STRING_PROPERTY_SIZE]) = 0; virtual void getModelName(char (&modelName)[ZES_STRING_PROPERTY_SIZE]) = 0; virtual void getVendorName(char (&vendorName)[ZES_STRING_PROPERTY_SIZE]) = 0; virtual void getDriverVersion(char (&driverVersion)[ZES_STRING_PROPERTY_SIZE]) = 0; virtual void getWedgedStatus(zes_device_state_t *pState) = 0; virtual void getRepairStatus(zes_device_state_t *pState) = 0; virtual Device *getDevice() = 0; virtual ze_result_t reset(ze_bool_t force) = 0; virtual ze_result_t scanProcessesState(std::vector &pProcessList) = 0; virtual ze_result_t deviceGetState(zes_device_state_t *pState) = 0; static OsGlobalOperations *create(OsSysman *pOsSysman); virtual ~OsGlobalOperations() {} }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/global_operations/windows/000077500000000000000000000000001422164147700306675ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/global_operations/windows/CMakeLists.txt000066400000000000000000000011271422164147700334300ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_GLOBAL_OPERATIONS_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/os_global_operations_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/os_global_operations_imp.cpp ) if(WIN32) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_GLOBAL_OPERATIONS_WINDOWS} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_GLOBAL_OPERATIONS_WINDOWS ${L0_SRCS_TOOLS_GLOBAL_OPERATIONS_WINDOWS}) os_global_operations_imp.cpp000066400000000000000000000045631422164147700363750ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/global_operations/windows/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/global_operations/windows/os_global_operations_imp.h" namespace L0 { Device *WddmGlobalOperationsImp::getDevice() { return pDevice; } void WddmGlobalOperationsImp::getSerialNumber(char (&serialNumber)[ZES_STRING_PROPERTY_SIZE]) { } void WddmGlobalOperationsImp::getBoardNumber(char (&boardNumber)[ZES_STRING_PROPERTY_SIZE]) { } void WddmGlobalOperationsImp::getBrandName(char (&brandName)[ZES_STRING_PROPERTY_SIZE]) { } void WddmGlobalOperationsImp::getModelName(char (&modelName)[ZES_STRING_PROPERTY_SIZE]) { } void WddmGlobalOperationsImp::getVendorName(char (&vendorName)[ZES_STRING_PROPERTY_SIZE]) { } void WddmGlobalOperationsImp::getDriverVersion(char (&driverVersion)[ZES_STRING_PROPERTY_SIZE]) { } void WddmGlobalOperationsImp::getWedgedStatus(zes_device_state_t *pState) { } void WddmGlobalOperationsImp::getRepairStatus(zes_device_state_t *pState) { } ze_result_t WddmGlobalOperationsImp::reset(ze_bool_t force) { uint32_t value = 0; KmdSysman::RequestProperty request; KmdSysman::ResponseProperty response; request.commandId = KmdSysman::Command::Set; request.componentId = KmdSysman::Component::GlobalOperationsComponent; request.requestId = KmdSysman::Requests::GlobalOperation::TriggerDeviceLevelReset; request.dataSize = sizeof(uint32_t); value = static_cast(force); memcpy_s(request.dataBuffer, sizeof(uint32_t), &value, sizeof(uint32_t)); return pKmdSysManager->requestSingle(request, response); } ze_result_t WddmGlobalOperationsImp::scanProcessesState(std::vector &pProcessList) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t WddmGlobalOperationsImp::deviceGetState(zes_device_state_t *pState) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } WddmGlobalOperationsImp::WddmGlobalOperationsImp(OsSysman *pOsSysman) { WddmSysmanImp *pWddmSysmanImp = static_cast(pOsSysman); pDevice = pWddmSysmanImp->getDeviceHandle(); pKmdSysManager = &pWddmSysmanImp->getKmdSysManager(); } OsGlobalOperations *OsGlobalOperations::create(OsSysman *pOsSysman) { WddmGlobalOperationsImp *pWddmGlobalOperationsImp = new WddmGlobalOperationsImp(pOsSysman); return static_cast(pWddmGlobalOperationsImp); } } // namespace L0 os_global_operations_imp.h000066400000000000000000000032201422164147700360270ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/global_operations/windows/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "sysman/global_operations/os_global_operations.h" #include "sysman/windows/os_sysman_imp.h" namespace L0 { class KmdSysManager; class WddmGlobalOperationsImp : public OsGlobalOperations, NEO::NonCopyableOrMovableClass { public: void getSerialNumber(char (&serialNumber)[ZES_STRING_PROPERTY_SIZE]) override; void getBoardNumber(char (&boardNumber)[ZES_STRING_PROPERTY_SIZE]) override; void getBrandName(char (&brandName)[ZES_STRING_PROPERTY_SIZE]) override; void getModelName(char (&modelName)[ZES_STRING_PROPERTY_SIZE]) override; void getVendorName(char (&vendorName)[ZES_STRING_PROPERTY_SIZE]) override; void getDriverVersion(char (&driverVersion)[ZES_STRING_PROPERTY_SIZE]) override; void getWedgedStatus(zes_device_state_t *pState) override; void getRepairStatus(zes_device_state_t *pState) override; Device *getDevice() override; ze_result_t reset(ze_bool_t force) override; ze_result_t scanProcessesState(std::vector &pProcessList) override; ze_result_t deviceGetState(zes_device_state_t *pState) override; WddmGlobalOperationsImp(OsSysman *pOsSysman); WddmGlobalOperationsImp(const WddmGlobalOperationsImp &obj) = delete; WddmGlobalOperationsImp() = default; WddmGlobalOperationsImp &operator=(const WddmGlobalOperationsImp &obj) = delete; ~WddmGlobalOperationsImp() override = default; private: Device *pDevice = nullptr; protected: KmdSysManager *pKmdSysManager = nullptr; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/linux/000077500000000000000000000000001422164147700246315ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/linux/CMakeLists.txt000066400000000000000000000021531422164147700273720ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/os_sysman_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/os_sysman_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/fs_access.cpp ) if(UNIX) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_LINUX} ) endif() add_subdirectories() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_LINUX ${L0_SRCS_TOOLS_SYSMAN_LINUX}) if(L0_INSTALL_UDEV_RULES) install( FILES "${CMAKE_CURRENT_SOURCE_DIR}/udev/99-drm_ze_intel_gpu.rules" DESTINATION ${UDEV_RULES_DIR} COMPONENT ${PROJECT_NAME} ) install( FILES "${CMAKE_CURRENT_SOURCE_DIR}/udev/wedged_file" DESTINATION "/var/lib/libze_intel_gpu/" COMPONENT ${PROJECT_NAME} ) install( FILES "${CMAKE_CURRENT_SOURCE_DIR}/udev/pci_bind_status_file" DESTINATION "/var/lib/libze_intel_gpu/" COMPONENT ${PROJECT_NAME} ) endif() compute-runtime-22.14.22890/level_zero/tools/source/sysman/linux/firmware_util/000077500000000000000000000000001422164147700275025ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/linux/firmware_util/CMakeLists.txt000066400000000000000000000024221422164147700322420ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(igsc_FOUND) set(L0_SRCS_TOOLS_SYSMAN_LINUX_FIRMWARE_UTIL ${CMAKE_CURRENT_SOURCE_DIR}/firmware_util_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/firmware_util_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/firmware_util.h ) if(igsc_VERSION VERSION_LESS 0.3) message(STATUS "default igsc version: ${igsc_VERSION}") set(L0_SRCS_TOOLS_SYSMAN_LINUX_FIRMWARE_UTIL_HELPER ${CMAKE_CURRENT_SOURCE_DIR}/firmware_util_imp_helper.cpp ) else() message(STATUS "IGSC version: ${igsc_VERSION}") set(L0_SRCS_TOOLS_SYSMAN_LINUX_FIRMWARE_UTIL_HELPER ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}firmware_util_imp_helper.cpp ) endif() else() set(L0_SRCS_TOOLS_SYSMAN_LINUX_FIRMWARE_UTIL ${CMAKE_CURRENT_SOURCE_DIR}/firmware_util_imp_stub.cpp ${CMAKE_CURRENT_SOURCE_DIR}/firmware_util.h ) endif() if(UNIX) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_LINUX_FIRMWARE_UTIL} ${L0_SRCS_TOOLS_SYSMAN_LINUX_FIRMWARE_UTIL_HELPER} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_FIRMWARE_UTIL_LINUX ${L0_SRCS_TOOLS_SYSMAN_FIRMWARE_UTIL_LINUX}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/linux/firmware_util/firmware_util.h000066400000000000000000000024301422164147700325230ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "level_zero/core/source/device/device.h" #ifdef IGSC_PRESENT #include "igsc_lib.h" #else typedef struct igsc_device_info { } igsc_device_info_t; #endif #include #include namespace L0 { class FirmwareUtil { public: static FirmwareUtil *create(const std::string &pciBDF); virtual ze_result_t fwDeviceInit() = 0; virtual ze_result_t getFirstDevice(igsc_device_info *) = 0; virtual ze_result_t getFwVersion(std::string fwType, std::string &firmwareVersion) = 0; virtual ze_result_t flashFirmware(std::string fwType, void *pImage, uint32_t size) = 0; virtual ze_result_t fwIfrApplied(bool &ifrStatus) = 0; virtual ze_result_t fwSupportedDiagTests(std::vector &supportedDiagTests) = 0; virtual ze_result_t fwRunDiagTests(std::string &osDiagType, zes_diag_result_t *pDiagResult) = 0; virtual ze_result_t fwGetMemoryErrorCount(zes_ras_error_type_t type, uint32_t subDeviceCount, uint32_t subDeviceId, uint64_t &count) = 0; virtual void getDeviceSupportedFwTypes(std::vector &fwTypes) = 0; virtual ~FirmwareUtil() = default; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/linux/firmware_util/firmware_util_imp.cpp000066400000000000000000000167501422164147700337350ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/linux/firmware_util/firmware_util_imp.h" #include "shared/source/utilities/directory.h" namespace L0 { const std::string fwUtilLibraryFile = "libigsc.so.0"; const std::string fwDeviceInitByDevice = "igsc_device_init_by_device_info"; const std::string fwDeviceGetDeviceInfo = "igsc_device_get_device_info"; const std::string fwDeviceFwVersion = "igsc_device_fw_version"; const std::string fwDeviceIteratorCreate = "igsc_device_iterator_create"; const std::string fwDeviceIteratorNext = "igsc_device_iterator_next"; const std::string fwDeviceIteratorDestroy = "igsc_device_iterator_destroy"; const std::string fwDeviceFwUpdate = "igsc_device_fw_update"; const std::string fwImageOpromInit = "igsc_image_oprom_init"; const std::string fwImageOpromType = "igsc_image_oprom_type"; const std::string fwDeviceOpromUpdate = "igsc_device_oprom_update"; const std::string fwDeviceOpromVersion = "igsc_device_oprom_version"; const std::string fwDeviceClose = "igsc_device_close"; pIgscDeviceInitByDevice deviceInitByDevice; pIgscDeviceGetDeviceInfo deviceGetDeviceInfo; pIgscDeviceFwVersion deviceGetFwVersion; pIgscDeviceIteratorCreate deviceIteratorCreate; pIgscDeviceIteratorNext deviceItreatorNext; pIgscDeviceIteratorDestroy deviceItreatorDestroy; pIgscDeviceFwUpdate deviceFwUpdate; pIgscImageOpromInit imageOpromInit; pIgscImageOpromType imageOpromType; pIgscDeviceOpromUpdate deviceOpromUpdate; pIgscDeviceOpromVersion deviceOpromVersion; pIgscDeviceClose deviceClose; bool FirmwareUtilImp::loadEntryPoints() { bool ok = getSymbolAddr(fwDeviceInitByDevice, deviceInitByDevice); ok = ok && getSymbolAddr(fwDeviceGetDeviceInfo, deviceGetDeviceInfo); ok = ok && getSymbolAddr(fwDeviceFwVersion, deviceGetFwVersion); ok = ok && getSymbolAddr(fwDeviceIteratorCreate, deviceIteratorCreate); ok = ok && getSymbolAddr(fwDeviceIteratorNext, deviceItreatorNext); ok = ok && getSymbolAddr(fwDeviceIteratorDestroy, deviceItreatorDestroy); ok = ok && getSymbolAddr(fwDeviceFwUpdate, deviceFwUpdate); ok = ok && getSymbolAddr(fwImageOpromInit, imageOpromInit); ok = ok && getSymbolAddr(fwImageOpromType, imageOpromType); ok = ok && getSymbolAddr(fwDeviceOpromUpdate, deviceOpromUpdate); ok = ok && getSymbolAddr(fwDeviceOpromVersion, deviceOpromVersion); ok = ok && getSymbolAddr(fwDeviceClose, deviceClose); ok = ok && loadEntryPointsExt(); return ok; } static void progressFunc(uint32_t done, uint32_t total, void *ctx) { uint32_t percent = (done * 100) / total; PRINT_DEBUG_STRING(NEO::DebugManager.flags.PrintDebugMessages.get(), stdout, "Progess: %d/%d:%d/%\n", done, total, percent); } ze_result_t FirmwareUtilImp::getFirstDevice(igsc_device_info *info) { igsc_device_iterator *iter; int ret = deviceIteratorCreate(&iter); if (ret != IGSC_SUCCESS) { return ZE_RESULT_ERROR_UNINITIALIZED; } info->name[0] = '\0'; do { ret = deviceItreatorNext(iter, info); if (ret != IGSC_SUCCESS) { deviceItreatorDestroy(iter); return ZE_RESULT_ERROR_UNINITIALIZED; } if (info->domain == domain && info->bus == bus && info->dev == device && info->func == function) { fwDevicePath.assign(info->name); break; } } while (1); deviceItreatorDestroy(iter); return ZE_RESULT_SUCCESS; } ze_result_t FirmwareUtilImp::fwDeviceInit() { int ret; igsc_device_info info; ze_result_t result = getFirstDevice(&info); if (result != ZE_RESULT_SUCCESS) { return result; } ret = deviceInitByDevice(&fwDeviceHandle, fwDevicePath.c_str()); if (ret != 0) { return ZE_RESULT_ERROR_UNINITIALIZED; } return ZE_RESULT_SUCCESS; } ze_result_t FirmwareUtilImp::fwGetVersion(std::string &fwVersion) { igsc_fw_version deviceFwVersion; memset(&deviceFwVersion, 0, sizeof(deviceFwVersion)); int ret = deviceGetFwVersion(&fwDeviceHandle, &deviceFwVersion); if (ret != IGSC_SUCCESS) { return ZE_RESULT_ERROR_UNINITIALIZED; } fwVersion.append(deviceFwVersion.project); fwVersion.append("_"); fwVersion.append(std::to_string(deviceFwVersion.hotfix)); fwVersion.append("."); fwVersion.append(std::to_string(deviceFwVersion.build)); return ZE_RESULT_SUCCESS; } ze_result_t FirmwareUtilImp::opromGetVersion(std::string &fwVersion) { igsc_oprom_version opromVersion; memset(&opromVersion, 0, sizeof(opromVersion)); int ret = deviceOpromVersion(&fwDeviceHandle, IGSC_OPROM_CODE, &opromVersion); if (ret != IGSC_SUCCESS) { return ZE_RESULT_ERROR_UNINITIALIZED; } fwVersion.append("OPROM CODE VERSION:"); for (int i = 0; i < IGSC_OPROM_VER_SIZE; i++) { fwVersion.append(std::to_string(static_cast(opromVersion.version[i]))); } fwVersion.append("_"); memset(&opromVersion, 0, sizeof(opromVersion)); ret = deviceOpromVersion(&fwDeviceHandle, IGSC_OPROM_DATA, &opromVersion); if (ret != IGSC_SUCCESS) { return ZE_RESULT_ERROR_UNINITIALIZED; } fwVersion.append("OPROM DATA VERSION:"); for (int i = 0; i < IGSC_OPROM_VER_SIZE; i++) { fwVersion.append(std::to_string(static_cast(opromVersion.version[i]))); } return ZE_RESULT_SUCCESS; } ze_result_t FirmwareUtilImp::fwFlashGSC(void *pImage, uint32_t size) { int ret = deviceFwUpdate(&fwDeviceHandle, static_cast(pImage), size, progressFunc, nullptr); if (ret != IGSC_SUCCESS) { return ZE_RESULT_ERROR_UNINITIALIZED; } return ZE_RESULT_SUCCESS; } ze_result_t FirmwareUtilImp::fwFlashOprom(void *pImage, uint32_t size) { struct igsc_oprom_image *opromImg = nullptr; uint32_t opromImgType = 0; int retData = 0, retCode = 0; int ret = imageOpromInit(&opromImg, static_cast(pImage), size); if (ret != IGSC_SUCCESS) { return ZE_RESULT_ERROR_UNINITIALIZED; } ret = imageOpromType(opromImg, &opromImgType); if (ret != IGSC_SUCCESS) { return ZE_RESULT_ERROR_UNINITIALIZED; } if (opromImgType & IGSC_OPROM_DATA) { retData = deviceOpromUpdate(&fwDeviceHandle, IGSC_OPROM_DATA, opromImg, progressFunc, nullptr); } if (opromImgType & IGSC_OPROM_CODE) { retCode = deviceOpromUpdate(&fwDeviceHandle, IGSC_OPROM_CODE, opromImg, progressFunc, nullptr); } if ((retData != IGSC_SUCCESS) && (retCode != IGSC_SUCCESS)) { return ZE_RESULT_ERROR_UNINITIALIZED; } return ZE_RESULT_SUCCESS; } FirmwareUtilImp::FirmwareUtilImp(const std::string &pciBDF) { NEO::parseBdfString(pciBDF.c_str(), domain, bus, device, function); }; FirmwareUtilImp::~FirmwareUtilImp() { if (nullptr != libraryHandle) { deviceClose(&fwDeviceHandle); delete libraryHandle; libraryHandle = nullptr; } }; FirmwareUtil *FirmwareUtil::create(const std::string &pciBDF) { FirmwareUtilImp *pFwUtilImp = new FirmwareUtilImp(pciBDF); UNRECOVERABLE_IF(nullptr == pFwUtilImp); pFwUtilImp->libraryHandle = NEO::OsLibrary::load(fwUtilLibraryFile); if (pFwUtilImp->libraryHandle == nullptr || pFwUtilImp->loadEntryPoints() == false) { if (nullptr != pFwUtilImp->libraryHandle) { delete pFwUtilImp->libraryHandle; pFwUtilImp->libraryHandle = nullptr; } delete pFwUtilImp; return nullptr; } return static_cast(pFwUtilImp); } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/linux/firmware_util/firmware_util_imp.h000066400000000000000000000113631422164147700333750ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "level_zero/core/source/device/device.h" #include "level_zero/tools/source/sysman/linux/firmware_util/firmware_util.h" #include #include #include #include namespace L0 { typedef int (*pIgscDeviceInitByDevice)(struct igsc_device_handle *handle, const char *devicePath); typedef int (*pIgscDeviceGetDeviceInfo)(struct igsc_device_handle *handle, struct igsc_info_device *info); typedef int (*pIgscDeviceFwVersion)(struct igsc_device_handle *handle, struct igsc_fw_version *version); typedef int (*pIgscDeviceIteratorCreate)(struct igsc_device_iterator **iter); typedef int (*pIgscDeviceIteratorNext)(struct igsc_device_iterator *iter, struct igsc_device_info *info); typedef void (*pIgscDeviceIteratorDestroy)(struct igsc_device_iterator *iter); typedef int (*pIgscDeviceFwUpdate)(struct igsc_device_handle *handle, const uint8_t *buffer, const uint32_t bufferLen, igsc_progress_func_t progressFunc, void *ctx); typedef int (*pIgscImageOpromInit)(struct igsc_oprom_image **img, const uint8_t *buffer, uint32_t bufferLen); typedef int (*pIgscImageOpromType)(struct igsc_oprom_image *img, uint32_t *opromType); typedef int (*pIgscDeviceOpromUpdate)(struct igsc_device_handle *handle, uint32_t opromType, struct igsc_oprom_image *img, igsc_progress_func_t progressFunc, void *ctx); typedef int (*pIgscDeviceOpromVersion)(struct igsc_device_handle *handle, uint32_t opromType, struct igsc_oprom_version *version); typedef int (*pIgscDeviceClose)(struct igsc_device_handle *handle); extern pIgscDeviceInitByDevice deviceInitByDevice; extern pIgscDeviceGetDeviceInfo deviceGetDeviceInfo; extern pIgscDeviceFwVersion deviceGetFwVersion; extern pIgscDeviceIteratorCreate deviceIteratorCreate; extern pIgscDeviceIteratorNext deviceItreatorNext; extern pIgscDeviceIteratorDestroy deviceItreatorDestroy; extern pIgscDeviceFwUpdate deviceFwUpdate; extern pIgscImageOpromInit imageOpromInit; extern pIgscImageOpromType imageOpromType; extern pIgscDeviceOpromUpdate deviceOpromUpdate; extern pIgscDeviceOpromVersion deviceOpromVersion; extern pIgscDeviceClose deviceClose; class FirmwareUtilImp : public FirmwareUtil, NEO::NonCopyableOrMovableClass { public: FirmwareUtilImp(const std::string &pciBDF); ~FirmwareUtilImp(); ze_result_t fwDeviceInit() override; ze_result_t getFirstDevice(igsc_device_info *) override; ze_result_t getFwVersion(std::string fwType, std::string &firmwareVersion) override; ze_result_t flashFirmware(std::string fwType, void *pImage, uint32_t size) override; ze_result_t fwIfrApplied(bool &ifrStatus) override; ze_result_t fwSupportedDiagTests(std::vector &supportedDiagTests) override; ze_result_t fwRunDiagTests(std::string &osDiagType, zes_diag_result_t *pDiagResult) override; virtual ze_result_t fwGetMemoryErrorCount(zes_ras_error_type_t type, uint32_t subDeviceCount, uint32_t subDeviceId, uint64_t &count) override; void getDeviceSupportedFwTypes(std::vector &fwTypes) override; ze_result_t fwGetVersion(std::string &fwVersion); ze_result_t opromGetVersion(std::string &fwVersion); ze_result_t pscGetVersion(std::string &fwVersion); ze_result_t fwFlashGSC(void *pImage, uint32_t size); ze_result_t fwFlashOprom(void *pImage, uint32_t size); ze_result_t fwFlashIafPsc(void *pImage, uint32_t size); ze_result_t fwCallGetstatusExt(uint32_t &supportedTests, uint32_t &ifrApplied, uint32_t &prevErrors, uint32_t &pendingReset); std::string fwDevicePath{}; struct igsc_device_handle fwDeviceHandle = {}; bool loadEntryPoints(); bool loadEntryPointsExt(); NEO::OsLibrary *libraryHandle = nullptr; template bool getSymbolAddr(const std::string name, T &proc) { void *addr = libraryHandle->getProcAddress(name); proc = reinterpret_cast(addr); return nullptr != proc; } private: uint16_t domain = 0; uint8_t bus = 0; uint8_t device = 0; uint8_t function = 0; std::mutex fwLock; }; } // namespace L0 firmware_util_imp_helper.cpp000066400000000000000000000035501422164147700352070ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/linux/firmware_util/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/non_copyable_or_moveable.h" #include "level_zero/core/source/device/device.h" #include "level_zero/tools/source/sysman/linux/firmware_util/firmware_util_imp.h" std::vector deviceSupportedFwTypes = {"GSC", "OptionROM"}; namespace L0 { ze_result_t FirmwareUtilImp::fwIfrApplied(bool &ifrStatus) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t FirmwareUtilImp::fwGetMemoryErrorCount(zes_ras_error_type_t type, uint32_t subDeviceCount, uint32_t subDeviceId, uint64_t &count) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t FirmwareUtilImp::fwSupportedDiagTests(std::vector &supportedDiagTests) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t FirmwareUtilImp::fwRunDiagTests(std::string &osDiagType, zes_diag_result_t *pDiagResult) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } void FirmwareUtilImp::getDeviceSupportedFwTypes(std::vector &fwTypes) { fwTypes = deviceSupportedFwTypes; } ze_result_t FirmwareUtilImp::getFwVersion(std::string fwType, std::string &firmwareVersion) { if (fwType == deviceSupportedFwTypes[0]) { //GSC return fwGetVersion(firmwareVersion); } if (fwType == deviceSupportedFwTypes[1]) { //OPROM return opromGetVersion(firmwareVersion); } return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t FirmwareUtilImp::flashFirmware(std::string fwType, void *pImage, uint32_t size) { if (fwType == deviceSupportedFwTypes[0]) { //GSC return fwFlashGSC(pImage, size); } if (fwType == deviceSupportedFwTypes[1]) { //OPROM return fwFlashOprom(pImage, size); } return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } bool FirmwareUtilImp::loadEntryPointsExt() { return true; } } // namespace L0 firmware_util_imp_stub.cpp000066400000000000000000000004401422164147700347000ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/linux/firmware_util/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/linux/firmware_util/firmware_util.h" namespace L0 { FirmwareUtil *FirmwareUtil::create(const std::string &pciBDF) { return nullptr; } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/linux/fs_access.cpp000066400000000000000000000366311422164147700272770ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/linux/fs_access.h" #include #include #include #include #include #include #include #include namespace L0 { static ze_result_t getResult(int err) { if ((EPERM == err) || (EACCES == err)) { return ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS; } else if (ENOENT == err) { return ZE_RESULT_ERROR_NOT_AVAILABLE; } else { return ZE_RESULT_ERROR_UNKNOWN; } } // Generic Filesystem Access FsAccess::FsAccess() { } FsAccess *FsAccess::create() { return new FsAccess(); } ze_result_t FsAccess::read(const std::string file, uint64_t &val) { // Read a single line from text file without trailing newline std::ifstream fs; fs.open(file.c_str()); if (fs.fail()) { return getResult(errno); } fs >> val; if (fs.fail()) { fs.close(); return getResult(errno); } fs.close(); return ZE_RESULT_SUCCESS; } ze_result_t FsAccess::read(const std::string file, double &val) { // Read a single line from text file without trailing newline std::ifstream fs; fs.open(file.c_str()); if (fs.fail()) { return getResult(errno); } fs >> val; if (fs.fail()) { fs.close(); return getResult(errno); } fs.close(); return ZE_RESULT_SUCCESS; } ze_result_t FsAccess::read(const std::string file, int32_t &val) { // Read a single line from text file without trailing newline std::ifstream fs; fs.open(file.c_str()); if (fs.fail()) { return getResult(errno); } fs >> val; if (fs.fail()) { fs.close(); return getResult(errno); } fs.close(); return ZE_RESULT_SUCCESS; } ze_result_t FsAccess::read(const std::string file, uint32_t &val) { // Read a single line from text file without trailing newline std::ifstream fs; fs.open(file.c_str()); if (fs.fail()) { return getResult(errno); } fs >> val; if (fs.fail()) { fs.close(); return getResult(errno); } fs.close(); return ZE_RESULT_SUCCESS; } ze_result_t FsAccess::read(const std::string file, std::string &val) { // Read a single line from text file without trailing newline std::ifstream fs; val.clear(); fs.open(file.c_str()); if (fs.fail()) { return getResult(errno); } fs >> val; if (fs.fail()) { fs.close(); return getResult(errno); } fs.close(); // Strip trailing newline if (val.back() == '\n') { val.pop_back(); } return ZE_RESULT_SUCCESS; } ze_result_t FsAccess::read(const std::string file, std::vector &val) { // Read a entire text file, one line per vector entry std::string line; std::ifstream fs; val.clear(); fs.open(file.c_str()); if (fs.fail()) { return getResult(errno); } while (std::getline(fs, line)) { if (fs.fail()) { fs.close(); return getResult(errno); } if (line.back() == '\n') { line.pop_back(); } val.push_back(line); } fs.close(); return ZE_RESULT_SUCCESS; } ze_result_t FsAccess::write(const std::string file, const std::string val) { std::ofstream sysfs; sysfs.open(file.c_str()); if (sysfs.fail()) { return getResult(errno); } sysfs << val << std::endl; if (sysfs.fail()) { sysfs.close(); return getResult(errno); } sysfs.close(); return ZE_RESULT_SUCCESS; } ze_result_t FsAccess::canRead(const std::string file) { struct stat sb; if (statSyscall(file.c_str(), &sb) != 0) { return ZE_RESULT_ERROR_UNKNOWN; } if (sb.st_mode & S_IRUSR) { return ZE_RESULT_SUCCESS; } return ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS; } ze_result_t FsAccess::canWrite(const std::string file) { struct stat sb; if (statSyscall(file.c_str(), &sb) != 0) { return ZE_RESULT_ERROR_UNKNOWN; } if (sb.st_mode & S_IWUSR) { return ZE_RESULT_SUCCESS; } return ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS; } bool FsAccess::fileExists(const std::string file) { if (access(file.c_str(), F_OK)) { return false; } return true; } ze_result_t FsAccess::getFileMode(const std::string file, ::mode_t &mode) { struct stat sb; if (0 != stat(file.c_str(), &sb)) { return getResult(errno); } mode = sb.st_mode; return ZE_RESULT_SUCCESS; } ze_result_t FsAccess::readSymLink(const std::string path, std::string &val) { // returns the value of symlink at path char buf[PATH_MAX]; ssize_t len = ::readlink(path.c_str(), buf, PATH_MAX - 1); if (len < 0) { return getResult(errno); } buf[len] = '\0'; val = std::string(buf); return ZE_RESULT_SUCCESS; } ze_result_t FsAccess::getRealPath(const std::string path, std::string &val) { // returns the real file path after resolving all symlinks in path char buf[PATH_MAX]; char *realPath = ::realpath(path.c_str(), buf); if (!realPath) { return getResult(errno); } val = std::string(buf); return ZE_RESULT_SUCCESS; } ze_result_t FsAccess::listDirectory(const std::string path, std::vector &list) { list.clear(); ::DIR *procDir = ::opendir(path.c_str()); if (!procDir) { return getResult(errno); } struct ::dirent *ent; int err = 0; // readdir doesn't clear errno, so make sure it is clear errno = 0; while (NULL != (ent = ::readdir(procDir))) { // Ignore . and .. std::string name = std::string(ent->d_name); if (!name.compare(".") || !name.compare("..")) { errno = 0; continue; } list.push_back(std::string(ent->d_name)); errno = 0; } err = errno; ::closedir(procDir); // Check if in above while loop, readdir encountered any error. if ((err != 0) && (err != ENOENT)) { list.clear(); return getResult(err); } return ZE_RESULT_SUCCESS; } std::string FsAccess::getBaseName(const std::string path) { size_t pos = path.rfind("/"); if (std::string::npos == pos) { return path; } return path.substr(pos + 1, std::string::npos); } std::string FsAccess::getDirName(const std::string path) { size_t pos = path.rfind("/"); if (std::string::npos == pos) { return std::string(""); } // Include trailing slash return path.substr(0, pos); } bool FsAccess::isRootUser() { return (geteuid() == 0); } bool FsAccess::directoryExists(const std::string path) { if (accessSyscall(path.c_str(), F_OK)) { return false; } return true; } // Procfs Access const std::string ProcfsAccess::procDir = "/proc/"; const std::string ProcfsAccess::fdDir = "/fd/"; std::string ProcfsAccess::fullPath(const ::pid_t pid) { // Returns the full path for proc entry for process pid return std::string(procDir + std::to_string(pid)); } std::string ProcfsAccess::fdDirPath(const ::pid_t pid) { // Returns the full path to file descritpor directory // for process pid return std::string(fullPath(pid) + fdDir); } std::string ProcfsAccess::fullFdPath(const ::pid_t pid, const int fd) { // Returns the full path for filedescriptor fd // for process pid return std::string(fdDirPath(pid) + std::to_string(fd)); } ProcfsAccess *ProcfsAccess::create() { return new ProcfsAccess(); } ze_result_t ProcfsAccess::listProcesses(std::vector<::pid_t> &list) { // Returns a vector with all the active process ids in the system list.clear(); std::vector dir; ze_result_t result = FsAccess::listDirectory(procDir, dir); if (ZE_RESULT_SUCCESS != result) { return result; } for (auto &&file : dir) { ::pid_t pid; std::istringstream stream(file); stream >> pid; if (stream.fail()) { // Non numeric filename, not a process, skip continue; } list.push_back(pid); } return ZE_RESULT_SUCCESS; } ze_result_t ProcfsAccess::getFileDescriptors(const ::pid_t pid, std::vector &list) { // Returns a vector with all the filedescriptor numbers opened by a pid list.clear(); std::vector dir; ze_result_t result = FsAccess::listDirectory(fdDirPath(pid), dir); if (ZE_RESULT_SUCCESS != result) { return result; } for (auto &&file : dir) { int fd; std::istringstream stream(file); stream >> fd; if (stream.fail()) { // Non numeric filename, not a file descriptor continue; } list.push_back(fd); } return ZE_RESULT_SUCCESS; } ze_result_t ProcfsAccess::getFileName(const ::pid_t pid, const int fd, std::string &val) { // Given a process id and a file descriptor number // return full name of the open file. // NOTE: For sockets, the name will be of the format "socket:[nnnnnnn]" return FsAccess::readSymLink(fullFdPath(pid, fd), val); } bool ProcfsAccess::isAlive(const ::pid_t pid) { return FsAccess::fileExists(fullPath(pid)); } void ProcfsAccess::kill(const ::pid_t pid) { ::kill(pid, SIGKILL); } ::pid_t ProcfsAccess::myProcessId() { return ::getpid(); } // Sysfs Access const std::string SysfsAccess::drmPath = "/sys/class/drm/"; const std::string SysfsAccess::devicesPath = "device/drm/"; const std::string SysfsAccess::primaryDevName = "card"; const std::string SysfsAccess::drmDriverDevNodeDir = "/dev/dri/"; const std::string SysfsAccess::intelGpuBindEntry = "/sys/bus/pci/drivers/i915/bind"; const std::string SysfsAccess::intelGpuUnbindEntry = "/sys/bus/pci/drivers/i915/unbind"; std::string SysfsAccess::fullPath(const std::string file) { // Prepend sysfs directory path for this device return std::string(dirname + file); } SysfsAccess::SysfsAccess(const std::string dev) { // dev could be either /dev/dri/cardX or /dev/dri/renderDX std::string fileName = FsAccess::getBaseName(dev); std::string devicesDir = drmPath + fileName + std::string("/") + devicesPath; FsAccess::listDirectory(devicesDir, deviceNames); for (auto &&next : deviceNames) { if (!next.compare(0, primaryDevName.length(), primaryDevName)) { dirname = drmPath + next + std::string("/"); break; } } } SysfsAccess *SysfsAccess::create(const std::string dev) { return new SysfsAccess(dev); } ze_result_t SysfsAccess::canRead(const std::string file) { // Prepend sysfs directory path and call the base canRead return FsAccess::canRead(fullPath(file)); } ze_result_t SysfsAccess::canWrite(const std::string file) { // Prepend sysfs directory path and call the base canWrite return FsAccess::canWrite(fullPath(file)); } ze_result_t SysfsAccess::getFileMode(const std::string file, ::mode_t &mode) { // Prepend sysfs directory path and call the base getFileMode return FsAccess::getFileMode(fullPath(file), mode); } ze_result_t SysfsAccess::read(const std::string file, std::string &val) { // Prepend sysfs directory path and call the base read return FsAccess::read(fullPath(file).c_str(), val); } ze_result_t SysfsAccess::read(const std::string file, int32_t &val) { std::string str; ze_result_t result; result = FsAccess::read(fullPath(file), str); if (ZE_RESULT_SUCCESS != result) { return result; } std::istringstream stream(str); stream >> val; if (stream.fail()) { return ZE_RESULT_ERROR_UNKNOWN; } return ZE_RESULT_SUCCESS; } ze_result_t SysfsAccess::read(const std::string file, uint32_t &val) { std::string str; ze_result_t result; result = FsAccess::read(fullPath(file), str); if (ZE_RESULT_SUCCESS != result) { return result; } std::istringstream stream(str); stream >> val; if (stream.fail()) { return ZE_RESULT_ERROR_UNKNOWN; } return ZE_RESULT_SUCCESS; } ze_result_t SysfsAccess::read(const std::string file, double &val) { std::string str; ze_result_t result; result = FsAccess::read(fullPath(file), str); if (ZE_RESULT_SUCCESS != result) { return result; } std::istringstream stream(str); stream >> val; if (stream.fail()) { return ZE_RESULT_ERROR_UNKNOWN; } return ZE_RESULT_SUCCESS; } ze_result_t SysfsAccess::read(const std::string file, uint64_t &val) { std::string str; ze_result_t result; result = FsAccess::read(fullPath(file), str); if (ZE_RESULT_SUCCESS != result) { return result; } std::istringstream stream(str); stream >> val; if (stream.fail()) { return ZE_RESULT_ERROR_UNKNOWN; } return ZE_RESULT_SUCCESS; } ze_result_t SysfsAccess::read(const std::string file, std::vector &val) { // Prepend sysfs directory path and call the base read return FsAccess::read(fullPath(file), val); } ze_result_t SysfsAccess::write(const std::string file, const std::string val) { // Prepend sysfs directory path and call the base write return FsAccess::write(fullPath(file).c_str(), val); } ze_result_t SysfsAccess::write(const std::string file, const int val) { std::ostringstream stream; stream << val; if (stream.fail()) { return ZE_RESULT_ERROR_UNKNOWN; } return FsAccess::write(fullPath(file), stream.str()); } ze_result_t SysfsAccess::write(const std::string file, const double val) { std::ostringstream stream; stream << val; if (stream.fail()) { return ZE_RESULT_ERROR_UNKNOWN; } return FsAccess::write(fullPath(file), stream.str()); } ze_result_t SysfsAccess::write(const std::string file, const uint64_t val) { std::ostringstream stream; stream << val; if (stream.fail()) { return ZE_RESULT_ERROR_UNKNOWN; } return FsAccess::write(fullPath(file), stream.str()); } ze_result_t SysfsAccess::scanDirEntries(const std::string path, std::vector &list) { list.clear(); return FsAccess::listDirectory(fullPath(path).c_str(), list); } ze_result_t SysfsAccess::readSymLink(const std::string path, std::string &val) { // Prepend sysfs directory path and call the base readSymLink return FsAccess::readSymLink(fullPath(path).c_str(), val); } ze_result_t SysfsAccess::getRealPath(const std::string path, std::string &val) { // Prepend sysfs directory path and call the base getRealPath return FsAccess::getRealPath(fullPath(path).c_str(), val); } ze_result_t SysfsAccess::bindDevice(std::string device) { return FsAccess::write(intelGpuBindEntry, device); } ze_result_t SysfsAccess::unbindDevice(std::string device) { return FsAccess::write(intelGpuUnbindEntry, device); } bool SysfsAccess::fileExists(const std::string file) { // Prepend sysfs directory path and call the base fileExists return FsAccess::fileExists(fullPath(file).c_str()); } bool SysfsAccess::directoryExists(const std::string path) { return FsAccess::directoryExists(fullPath(path).c_str()); } bool SysfsAccess::isMyDeviceFile(const std::string dev) { // dev is a full pathname. if (getDirName(dev).compare(drmDriverDevNodeDir)) { for (auto &&next : deviceNames) { if (!getBaseName(dev).compare(next)) { return true; } } } return false; } bool SysfsAccess::isRootUser() { return FsAccess::isRootUser(); } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/linux/fs_access.h000066400000000000000000000117371422164147700267440ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/linux/sys_calls.h" #include "level_zero/ze_api.h" #include "level_zero/zet_api.h" #include #include #include #include #include #include #include #include #include #include namespace L0 { class FsAccess { public: static FsAccess *create(); virtual ~FsAccess() = default; virtual ze_result_t canRead(const std::string file); virtual ze_result_t canWrite(const std::string file); virtual ze_result_t getFileMode(const std::string file, ::mode_t &mode); virtual ze_result_t read(const std::string file, uint64_t &val); virtual ze_result_t read(const std::string file, std::string &val); virtual ze_result_t read(const std::string file, std::vector &val); virtual ze_result_t read(const std::string file, double &val); virtual ze_result_t read(const std::string file, uint32_t &val); virtual ze_result_t read(const std::string file, int32_t &val); virtual ze_result_t write(const std::string file, const std::string val); virtual ze_result_t readSymLink(const std::string path, std::string &buf); virtual ze_result_t getRealPath(const std::string path, std::string &buf); virtual ze_result_t listDirectory(const std::string path, std::vector &list); virtual bool isRootUser(); std::string getBaseName(const std::string path); std::string getDirName(const std::string path); virtual bool fileExists(const std::string file); virtual bool directoryExists(const std::string path); protected: FsAccess(); decltype(&NEO::SysCalls::access) accessSyscall = NEO::SysCalls::access; decltype(&stat) statSyscall = stat; }; class ProcfsAccess : private FsAccess { public: static ProcfsAccess *create(); ~ProcfsAccess() override = default; MOCKABLE_VIRTUAL ze_result_t listProcesses(std::vector<::pid_t> &list); MOCKABLE_VIRTUAL ::pid_t myProcessId(); MOCKABLE_VIRTUAL ze_result_t getFileDescriptors(const ::pid_t pid, std::vector &list); MOCKABLE_VIRTUAL ze_result_t getFileName(const ::pid_t pid, const int fd, std::string &val); MOCKABLE_VIRTUAL bool isAlive(const ::pid_t pid); MOCKABLE_VIRTUAL void kill(const ::pid_t pid); protected: ProcfsAccess() = default; private: std::string fullPath(const ::pid_t pid); std::string fdDirPath(const ::pid_t pid); std::string fullFdPath(const ::pid_t pid, const int fd); static const std::string procDir; static const std::string fdDir; }; class SysfsAccess : protected FsAccess { public: static SysfsAccess *create(const std::string file); SysfsAccess() = default; ~SysfsAccess() override = default; ze_result_t canRead(const std::string file) override; ze_result_t canWrite(const std::string file) override; ze_result_t getFileMode(const std::string file, ::mode_t &mode) override; ze_result_t read(const std::string file, std::string &val) override; ze_result_t read(const std::string file, int32_t &val) override; ze_result_t read(const std::string file, uint32_t &val) override; ze_result_t read(const std::string file, uint64_t &val) override; ze_result_t read(const std::string file, double &val) override; ze_result_t read(const std::string file, std::vector &val) override; ze_result_t write(const std::string file, const std::string val) override; MOCKABLE_VIRTUAL ze_result_t write(const std::string file, const int val); MOCKABLE_VIRTUAL ze_result_t write(const std::string file, const uint64_t val); MOCKABLE_VIRTUAL ze_result_t write(const std::string file, const double val); ze_result_t write(const std::string file, std::vector val); MOCKABLE_VIRTUAL ze_result_t scanDirEntries(const std::string path, std::vector &list); MOCKABLE_VIRTUAL ze_result_t readSymLink(const std::string path, std::string &buf) override; ze_result_t getRealPath(const std::string path, std::string &buf) override; MOCKABLE_VIRTUAL ze_result_t bindDevice(const std::string device); MOCKABLE_VIRTUAL ze_result_t unbindDevice(const std::string device); MOCKABLE_VIRTUAL bool fileExists(const std::string file) override; MOCKABLE_VIRTUAL bool isMyDeviceFile(const std::string dev); MOCKABLE_VIRTUAL bool directoryExists(const std::string path) override; MOCKABLE_VIRTUAL bool isRootUser() override; private: SysfsAccess(const std::string file); std::string fullPath(const std::string file); std::vector deviceNames; std::string dirname; static const std::string drmPath; static const std::string devicesPath; static const std::string primaryDevName; static const std::string drmDriverDevNodeDir; static const std::string intelGpuBindEntry; static const std::string intelGpuUnbindEntry; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/linux/nl_api/000077500000000000000000000000001422164147700260735ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/linux/nl_api/CMakeLists.txt000066400000000000000000000010071422164147700306310ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(LIBGENL_FOUND) set(L0_SRCS_TOOLS_SYSMAN_LINUX_NL_API ${CMAKE_CURRENT_SOURCE_DIR}/nl_api.h ${CMAKE_CURRENT_SOURCE_DIR}/nl_api.cpp ) endif() if(UNIX) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_LINUX_NL_API} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_LINUX_NL_API ${L0_SRCS_TOOLS_SYSMAN_LINUX_NL_API}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/linux/nl_api/nl_api.cpp000066400000000000000000000233201422164147700300410ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "nl_api.h" namespace L0 { static constexpr std::string_view libgenlFile = "libnl-genl-3.so.200"; static constexpr std::string_view genlConnectRoutine = "genl_connect"; static constexpr std::string_view genlCtrlResolveRoutine = "genl_ctrl_resolve"; static constexpr std::string_view genlHandleMsgRoutine = "genl_handle_msg"; static constexpr std::string_view genlmsgPutRoutine = "genlmsg_put"; static constexpr std::string_view genlOpsResolveRoutine = "genl_ops_resolve"; static constexpr std::string_view genlRegisterFamilyRoutine = "genl_register_family"; static constexpr std::string_view genlUnregisterFamilyRoutine = "genl_unregister_family"; static constexpr std::string_view nlRecvmsgsDefaultRoutine = "nl_recvmsgs_default"; static constexpr std::string_view nlSendAutoRoutine = "nl_send_auto"; static constexpr std::string_view nlSocketAllocRoutine = "nl_socket_alloc"; static constexpr std::string_view nlSocketDisableSeqCheckRoutine = "nl_socket_disable_seq_check"; static constexpr std::string_view nlSocketFreeRoutine = "nl_socket_free"; static constexpr std::string_view nlSocketModifyCbRoutine = "nl_socket_modify_cb"; static constexpr std::string_view nlaDataRoutine = "nla_data"; static constexpr std::string_view nlaGetU32Routine = "nla_get_u32"; static constexpr std::string_view nlaGetU64Routine = "nla_get_u64"; static constexpr std::string_view nlaGetU8Routine = "nla_get_u8"; static constexpr std::string_view nlaIsNestedRoutine = "nla_is_nested"; static constexpr std::string_view nlaLenRoutine = "nla_len"; static constexpr std::string_view nlaNextRoutine = "nla_next"; static constexpr std::string_view nlaOkRoutine = "nla_ok"; static constexpr std::string_view nlaPutU16Routine = "nla_put_u16"; static constexpr std::string_view nlaPutU32Routine = "nla_put_u32"; static constexpr std::string_view nlaPutU64Routine = "nla_put_u64"; static constexpr std::string_view nlaPutU8Routine = "nla_put_u8"; static constexpr std::string_view nlaTypeRoutine = "nla_type"; static constexpr std::string_view nlmsgAllocRoutine = "nlmsg_alloc"; static constexpr std::string_view nlmsgAttrdataRoutine = "nlmsg_attrdata"; static constexpr std::string_view nlmsgAttrlenRoutine = "nlmsg_attrlen"; static constexpr std::string_view nlmsgFreeRoutine = "nlmsg_free"; static constexpr std::string_view nlmsgHdrRoutine = "nlmsg_hdr"; template bool NlApi::getSymbolAddr(const std::string_view &name, T &sym) { sym = reinterpret_cast(genlLibraryHandle->getProcAddress(std::string(name))); return nullptr != sym; } bool NlApi::loadEntryPoints() { if (!isAvailable()) return false; bool ok = true; ok = getSymbolAddr(genlConnectRoutine, genlConnectEntry); ok = ok && getSymbolAddr(genlCtrlResolveRoutine, genlCtrlResolveEntry); ok = ok && getSymbolAddr(genlHandleMsgRoutine, genlHandleMsgEntry); ok = ok && getSymbolAddr(genlmsgPutRoutine, genlmsgPutEntry); ok = ok && getSymbolAddr(genlOpsResolveRoutine, genlOpsResolveEntry); ok = ok && getSymbolAddr(genlRegisterFamilyRoutine, genlRegisterFamilyEntry); ok = ok && getSymbolAddr(genlUnregisterFamilyRoutine, genlUnregisterFamilyEntry); ok = ok && getSymbolAddr(nlRecvmsgsDefaultRoutine, nlRecvmsgsDefaultEntry); ok = ok && getSymbolAddr(nlSendAutoRoutine, nlSendAutoEntry); ok = ok && getSymbolAddr(nlSocketAllocRoutine, nlSocketAllocEntry); ok = ok && getSymbolAddr(nlSocketDisableSeqCheckRoutine, nlSocketDisableSeqCheckEntry); ok = ok && getSymbolAddr(nlSocketFreeRoutine, nlSocketFreeEntry); ok = ok && getSymbolAddr(nlSocketModifyCbRoutine, nlSocketModifyCbEntry); ok = ok && getSymbolAddr(nlaDataRoutine, nlaDataEntry); ok = ok && getSymbolAddr(nlaGetU32Routine, nlaGetU32Entry); ok = ok && getSymbolAddr(nlaGetU64Routine, nlaGetU64Entry); ok = ok && getSymbolAddr(nlaGetU8Routine, nlaGetU8Entry); ok = ok && getSymbolAddr(nlaIsNestedRoutine, nlaIsNestedEntry); ok = ok && getSymbolAddr(nlaLenRoutine, nlaLenEntry); ok = ok && getSymbolAddr(nlaNextRoutine, nlaNextEntry); ok = ok && getSymbolAddr(nlaOkRoutine, nlaOkEntry); ok = ok && getSymbolAddr(nlaPutU16Routine, nlaPutU16Entry); ok = ok && getSymbolAddr(nlaPutU32Routine, nlaPutU32Entry); ok = ok && getSymbolAddr(nlaPutU64Routine, nlaPutU64Entry); ok = ok && getSymbolAddr(nlaPutU8Routine, nlaPutU8Entry); ok = ok && getSymbolAddr(nlaTypeRoutine, nlaTypeEntry); ok = ok && getSymbolAddr(nlmsgAllocRoutine, nlmsgAllocEntry); ok = ok && getSymbolAddr(nlmsgAttrdataRoutine, nlmsgAttrdataEntry); ok = ok && getSymbolAddr(nlmsgAttrlenRoutine, nlmsgAttrlenEntry); ok = ok && getSymbolAddr(nlmsgFreeRoutine, nlmsgFreeEntry); ok = ok && getSymbolAddr(nlmsgHdrRoutine, nlmsgHdrEntry); return ok; } int NlApi::genlConnect(struct nl_sock *sock) { UNRECOVERABLE_IF(nullptr == genlConnectEntry); return (*genlConnectEntry)(sock); } int NlApi::genlCtrlResolve(struct nl_sock *sock, const char *name) { UNRECOVERABLE_IF(nullptr == genlCtrlResolveEntry); return (*genlCtrlResolveEntry)(sock, name); } int NlApi::genlHandleMsg(struct nl_msg *msg, void *arg) { UNRECOVERABLE_IF(nullptr == genlHandleMsgEntry); return (*genlHandleMsgEntry)(msg, arg); } void *NlApi::genlmsgPut(struct nl_msg *msg, uint32_t port, uint32_t seq, int family, int hdrlen, int flags, uint8_t cmd, uint8_t version) { UNRECOVERABLE_IF(nullptr == genlmsgPutEntry); return (*genlmsgPutEntry)(msg, port, seq, family, hdrlen, flags, cmd, version); } int NlApi::genlOpsResolve(struct nl_sock *sock, struct genl_ops *ops) { UNRECOVERABLE_IF(nullptr == genlOpsResolveEntry); return (*genlOpsResolveEntry)(sock, ops); } int NlApi::genlRegisterFamily(struct genl_ops *ops) { UNRECOVERABLE_IF(nullptr == genlRegisterFamilyEntry); return (*genlRegisterFamilyEntry)(ops); } int NlApi::genlUnregisterFamily(struct genl_ops *ops) { UNRECOVERABLE_IF(nullptr == genlUnregisterFamilyEntry); return (*genlUnregisterFamilyEntry)(ops); } int NlApi::nlRecvmsgsDefault(struct nl_sock *sock) { UNRECOVERABLE_IF(nullptr == nlRecvmsgsDefaultEntry); return (*nlRecvmsgsDefaultEntry)(sock); } int NlApi::nlSendAuto(struct nl_sock *sock, struct nl_msg *msg) { UNRECOVERABLE_IF(nullptr == nlSendAutoEntry); return (*nlSendAutoEntry)(sock, msg); } struct nl_sock *NlApi::nlSocketAlloc() { UNRECOVERABLE_IF(nullptr == nlSocketAllocEntry); return (*nlSocketAllocEntry)(); } void NlApi::nlSocketDisableSeqCheck(struct nl_sock *sock) { UNRECOVERABLE_IF(nullptr == nlSocketDisableSeqCheckEntry); (*nlSocketDisableSeqCheckEntry)(sock); return; } void NlApi::nlSocketFree(struct nl_sock *sock) { UNRECOVERABLE_IF(nullptr == nlSocketFreeEntry); (*nlSocketFreeEntry)(sock); return; } int NlApi::nlSocketModifyCb(struct nl_sock *sock, enum nl_cb_type type, enum nl_cb_kind kind, nl_recvmsg_msg_cb_t cb, void *arg) { UNRECOVERABLE_IF(nullptr == nlSocketModifyCbEntry); return (*nlSocketModifyCbEntry)(sock, type, kind, cb, arg); } void *NlApi::nlaData(const struct nlattr *attr) { UNRECOVERABLE_IF(nullptr == nlaDataEntry); return (*nlaDataEntry)(attr); } uint32_t NlApi::nlaGetU32(const struct nlattr *attr) { UNRECOVERABLE_IF(nullptr == nlaGetU32Entry); return (*nlaGetU32Entry)(attr); } uint64_t NlApi::nlaGetU64(const struct nlattr *attr) { UNRECOVERABLE_IF(nullptr == nlaGetU64Entry); return (*nlaGetU64Entry)(attr); } uint8_t NlApi::nlaGetU8(const struct nlattr *attr) { UNRECOVERABLE_IF(nullptr == nlaGetU8Entry); return (*nlaGetU8Entry)(attr); } int NlApi::nlaIsNested(const struct nlattr *attr) { UNRECOVERABLE_IF(nullptr == nlaIsNestedEntry); return (*nlaIsNestedEntry)(attr); } int NlApi::nlaLen(const struct nlattr *attr) { UNRECOVERABLE_IF(nullptr == nlaLenEntry); return (*nlaLenEntry)(attr); } struct nlattr *NlApi::nlaNext(const struct nlattr *attr, int *remaining) { UNRECOVERABLE_IF(nullptr == nlaNextEntry); return (*nlaNextEntry)(attr, remaining); } int NlApi::nlaOk(const struct nlattr *attr, int remaining) { UNRECOVERABLE_IF(nullptr == nlaOkEntry); return (*nlaOkEntry)(attr, remaining); } int NlApi::nlaPutU16(struct nl_msg *msg, int id, uint16_t data) { UNRECOVERABLE_IF(nullptr == nlaPutU16Entry); return (*nlaPutU16Entry)(msg, id, data); } int NlApi::nlaPutU32(struct nl_msg *msg, int id, uint32_t data) { UNRECOVERABLE_IF(nullptr == nlaPutU32Entry); return (*nlaPutU32Entry)(msg, id, data); } int NlApi::nlaPutU64(struct nl_msg *msg, int id, uint64_t data) { UNRECOVERABLE_IF(nullptr == nlaPutU64Entry); return (*nlaPutU64Entry)(msg, id, data); } int NlApi::nlaPutU8(struct nl_msg *msg, int id, uint8_t data) { UNRECOVERABLE_IF(nullptr == nlaPutU8Entry); return (*nlaPutU8Entry)(msg, id, data); } int NlApi::nlaType(const struct nlattr *attr) { UNRECOVERABLE_IF(nullptr == nlaTypeEntry); return (*nlaTypeEntry)(attr); } struct nl_msg *NlApi::nlmsgAlloc() { UNRECOVERABLE_IF(nullptr == nlmsgAllocEntry); return (*nlmsgAllocEntry)(); } struct nlattr *NlApi::nlmsgAttrdata(const struct nlmsghdr *hdr, int attr) { UNRECOVERABLE_IF(nullptr == nlmsgAttrdataEntry); return (*nlmsgAttrdataEntry)(hdr, attr); } int NlApi::nlmsgAttrlen(const struct nlmsghdr *hdr, int attr) { UNRECOVERABLE_IF(nullptr == nlmsgAttrlenEntry); return (*nlmsgAttrlenEntry)(hdr, attr); } void NlApi::nlmsgFree(struct nl_msg *msg) { UNRECOVERABLE_IF(nullptr == nlmsgFreeEntry); (*nlmsgFreeEntry)(msg); return; } struct nlmsghdr *NlApi::nlmsgHdr(struct nl_msg *msg) { UNRECOVERABLE_IF(nullptr == nlmsgHdrEntry); return (*nlmsgHdrEntry)(msg); } NlApi::NlApi() { genlLibraryHandle.reset(NEO::OsLibrary::load(std::string(libgenlFile))); } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/linux/nl_api/nl_api.h000066400000000000000000000144201422164147700275070ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "level_zero/core/source/device/device.h" #include #include #include #include #include #include #include #include namespace L0 { typedef int (*pGenlConnect)(struct nl_sock *); typedef int (*pGenlCtrlResolve)(struct nl_sock *, const char *); typedef int (*pGenlHandleMsg)(struct nl_msg *, void *); typedef int (*pGenlOpsResolve)(struct nl_sock *, struct genl_ops *); typedef int (*pGenlRegisterFamily)(struct genl_ops *); typedef int (*pGenlUnregisterFamily)(struct genl_ops *); typedef void *(*pGenlmsgPut)(struct nl_msg *, uint32_t, uint32_t, int, int, int, uint8_t, uint8_t); typedef int (*pNlRecvmsgsDefault)(struct nl_sock *); typedef int (*pNlSendAuto)(struct nl_sock *, struct nl_msg *); typedef struct nl_sock *(*pNlSocketAlloc)(); typedef void (*pNlSocketDisableSeqCheck)(struct nl_sock *); typedef void (*pNlSocketFree)(struct nl_sock *); typedef int (*pNlSocketModifyCb)(struct nl_sock *, enum nl_cb_type, enum nl_cb_kind, nl_recvmsg_msg_cb_t, void *); typedef void *(*pNlaData)(const struct nlattr *); typedef uint32_t (*pNlaGetU32)(const struct nlattr *); typedef uint64_t (*pNlaGetU64)(const struct nlattr *); typedef uint8_t (*pNlaGetU8)(const struct nlattr *); typedef int (*pNlaIsNested)(const struct nlattr *); typedef int (*pNlaLen)(const struct nlattr *); typedef struct nlattr *(*pNlaNext)(const struct nlattr *, int *); typedef int (*pNlaOk)(const struct nlattr *, int); typedef int (*pNlaPutU16)(struct nl_msg *, int, uint16_t); typedef int (*pNlaPutU32)(struct nl_msg *, int, uint32_t); typedef int (*pNlaPutU64)(struct nl_msg *, int, uint64_t); typedef int (*pNlaPutU8)(struct nl_msg *, int, uint8_t); typedef int (*pNlaType)(const struct nlattr *); typedef struct nl_msg *(*pNlmsgAlloc)(); typedef struct nlattr *(*pNlmsgAttrdata)(const struct nlmsghdr *, int); typedef int (*pNlmsgAttrlen)(const struct nlmsghdr *, int); typedef void (*pNlmsgFree)(struct nl_msg *); typedef struct nlmsghdr *(*pNlmsgHdr)(struct nl_msg *); class NlApi : public NEO::NonCopyableOrMovableClass { public: MOCKABLE_VIRTUAL int genlConnect(struct nl_sock *sock); MOCKABLE_VIRTUAL int genlCtrlResolve(struct nl_sock *sock, const char *name); MOCKABLE_VIRTUAL int genlHandleMsg(struct nl_msg *msg, void *arg); MOCKABLE_VIRTUAL int genlOpsResolve(struct nl_sock *sock, struct genl_ops *ops); MOCKABLE_VIRTUAL int genlRegisterFamily(struct genl_ops *ops); MOCKABLE_VIRTUAL int genlUnregisterFamily(struct genl_ops *ops); MOCKABLE_VIRTUAL void *genlmsgPut(struct nl_msg *msg, uint32_t port, uint32_t seq, int family, int hdrlen, int flags, uint8_t cmd, uint8_t version); MOCKABLE_VIRTUAL int nlRecvmsgsDefault(struct nl_sock *sock); MOCKABLE_VIRTUAL int nlSendAuto(struct nl_sock *sock, struct nl_msg *msg); MOCKABLE_VIRTUAL struct nl_sock *nlSocketAlloc(); MOCKABLE_VIRTUAL void nlSocketDisableSeqCheck(struct nl_sock *sock); MOCKABLE_VIRTUAL void nlSocketFree(struct nl_sock *sock); MOCKABLE_VIRTUAL int nlSocketModifyCb(struct nl_sock *sock, enum nl_cb_type type, enum nl_cb_kind kind, nl_recvmsg_msg_cb_t cb, void *arg); MOCKABLE_VIRTUAL void *nlaData(const struct nlattr *attr); MOCKABLE_VIRTUAL uint32_t nlaGetU32(const struct nlattr *attr); MOCKABLE_VIRTUAL uint64_t nlaGetU64(const struct nlattr *attr); MOCKABLE_VIRTUAL uint8_t nlaGetU8(const struct nlattr *attr); MOCKABLE_VIRTUAL int nlaIsNested(const struct nlattr *attr); MOCKABLE_VIRTUAL int nlaLen(const struct nlattr *attr); MOCKABLE_VIRTUAL struct nlattr *nlaNext(const struct nlattr *attr, int *remaining); MOCKABLE_VIRTUAL int nlaOk(const struct nlattr *attr, int remaining); MOCKABLE_VIRTUAL int nlaPutU16(struct nl_msg *msg, int id, uint16_t data); MOCKABLE_VIRTUAL int nlaPutU32(struct nl_msg *msg, int id, uint32_t data); MOCKABLE_VIRTUAL int nlaPutU64(struct nl_msg *msg, int id, uint64_t data); MOCKABLE_VIRTUAL int nlaPutU8(struct nl_msg *msg, int id, uint8_t data); MOCKABLE_VIRTUAL int nlaType(const struct nlattr *attr); MOCKABLE_VIRTUAL struct nl_msg *nlmsgAlloc(); MOCKABLE_VIRTUAL struct nlattr *nlmsgAttrdata(const struct nlmsghdr *hdr, int attr); MOCKABLE_VIRTUAL int nlmsgAttrlen(const struct nlmsghdr *hdr, int attr); MOCKABLE_VIRTUAL void nlmsgFree(struct nl_msg *msg); MOCKABLE_VIRTUAL struct nlmsghdr *nlmsgHdr(struct nl_msg *msg); bool isAvailable() { return nullptr != genlLibraryHandle.get(); } MOCKABLE_VIRTUAL bool loadEntryPoints(); NlApi(); MOCKABLE_VIRTUAL ~NlApi() = default; protected: template bool getSymbolAddr(const std::string_view &name, T &sym); std::unique_ptr genlLibraryHandle; pGenlConnect genlConnectEntry = nullptr; pGenlCtrlResolve genlCtrlResolveEntry = nullptr; pGenlHandleMsg genlHandleMsgEntry = nullptr; pGenlOpsResolve genlOpsResolveEntry = nullptr; pGenlRegisterFamily genlRegisterFamilyEntry = nullptr; pGenlUnregisterFamily genlUnregisterFamilyEntry = nullptr; pGenlmsgPut genlmsgPutEntry = nullptr; pNlRecvmsgsDefault nlRecvmsgsDefaultEntry = nullptr; pNlSendAuto nlSendAutoEntry = nullptr; pNlSocketAlloc nlSocketAllocEntry = nullptr; pNlSocketDisableSeqCheck nlSocketDisableSeqCheckEntry = nullptr; pNlSocketFree nlSocketFreeEntry = nullptr; pNlSocketModifyCb nlSocketModifyCbEntry = nullptr; pNlaData nlaDataEntry = nullptr; pNlaGetU32 nlaGetU32Entry = nullptr; pNlaGetU64 nlaGetU64Entry = nullptr; pNlaGetU8 nlaGetU8Entry = nullptr; pNlaIsNested nlaIsNestedEntry = nullptr; pNlaLen nlaLenEntry = nullptr; pNlaNext nlaNextEntry = nullptr; pNlaOk nlaOkEntry = nullptr; pNlaPutU16 nlaPutU16Entry = nullptr; pNlaPutU32 nlaPutU32Entry = nullptr; pNlaPutU64 nlaPutU64Entry = nullptr; pNlaPutU8 nlaPutU8Entry = nullptr; pNlaType nlaTypeEntry = nullptr; pNlmsgAlloc nlmsgAllocEntry = nullptr; pNlmsgAttrdata nlmsgAttrdataEntry = nullptr; pNlmsgAttrlen nlmsgAttrlenEntry = nullptr; pNlmsgFree nlmsgFreeEntry = nullptr; pNlmsgHdr nlmsgHdrEntry = nullptr; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/linux/os_sysman_imp.cpp000066400000000000000000000371151422164147700302240ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/linux/os_sysman_imp.h" #include "shared/source/os_interface/device_factory.h" #include "level_zero/core/source/device/device_imp.h" #include "level_zero/tools/source/sysman/linux/fs_access.h" #include "sysman/linux/firmware_util/firmware_util.h" #include namespace L0 { const std::string LinuxSysmanImp::deviceDir("device"); ze_result_t LinuxSysmanImp::init() { pFsAccess = FsAccess::create(); DEBUG_BREAK_IF(nullptr == pFsAccess); if (pProcfsAccess == nullptr) { pProcfsAccess = ProcfsAccess::create(); } DEBUG_BREAK_IF(nullptr == pProcfsAccess); auto result = initLocalDeviceAndDrmHandles(); if (ZE_RESULT_SUCCESS != result) { return result; } int myDeviceFd = pDrm->getFileDescriptor(); std::string myDeviceName; result = pProcfsAccess->getFileName(pProcfsAccess->myProcessId(), myDeviceFd, myDeviceName); if (ZE_RESULT_SUCCESS != result) { return result; } if (pSysfsAccess == nullptr) { pSysfsAccess = SysfsAccess::create(myDeviceName); } DEBUG_BREAK_IF(nullptr == pSysfsAccess); pPmuInterface = PmuInterface::create(this); DEBUG_BREAK_IF(nullptr == pPmuInterface); return createPmtHandles(); } void LinuxSysmanImp::createFwUtilInterface() { std::string realRootPath; auto result = pSysfsAccess->getRealPath("device", realRootPath); if (ZE_RESULT_SUCCESS != result) { return; } auto rootPciPathOfGpuDevice = getPciRootPortDirectoryPath(realRootPath); auto loc = realRootPath.find_last_of('/'); pFwUtilInterface = FirmwareUtil::create(realRootPath.substr(loc + 1, std::string::npos)); } ze_result_t LinuxSysmanImp::createPmtHandles() { std::string realRootPath; auto result = pSysfsAccess->getRealPath("device", realRootPath); if (ZE_RESULT_SUCCESS != result) { return result; } auto rootPciPathOfGpuDevice = getPciRootPortDirectoryPath(realRootPath); PlatformMonitoringTech::create(pParentSysmanDeviceImp->deviceHandles, pFsAccess, rootPciPathOfGpuDevice, mapOfSubDeviceIdToPmtObject); return result; } PmuInterface *LinuxSysmanImp::getPmuInterface() { return pPmuInterface; } FirmwareUtil *LinuxSysmanImp::getFwUtilInterface() { if (pFwUtilInterface == nullptr) { createFwUtilInterface(); } return pFwUtilInterface; } PRODUCT_FAMILY LinuxSysmanImp::getProductFamily() { return pDevice->getNEODevice()->getHardwareInfo().platform.eProductFamily; } FsAccess &LinuxSysmanImp::getFsAccess() { UNRECOVERABLE_IF(nullptr == pFsAccess); return *pFsAccess; } ProcfsAccess &LinuxSysmanImp::getProcfsAccess() { UNRECOVERABLE_IF(nullptr == pProcfsAccess); return *pProcfsAccess; } SysfsAccess &LinuxSysmanImp::getSysfsAccess() { UNRECOVERABLE_IF(nullptr == pSysfsAccess); return *pSysfsAccess; } ze_result_t LinuxSysmanImp::initLocalDeviceAndDrmHandles() { pDevice = Device::fromHandle(pParentSysmanDeviceImp->hCoreDevice); DEBUG_BREAK_IF(nullptr == pDevice); NEO::OSInterface &OsInterface = pDevice->getOsInterface(); if (OsInterface.getDriverModel()->getDriverModelType() != NEO::DriverModelType::DRM) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } pDrm = OsInterface.getDriverModel()->as(); return ZE_RESULT_SUCCESS; } NEO::Drm &LinuxSysmanImp::getDrm() { if (pDrm == nullptr) { initLocalDeviceAndDrmHandles(); } UNRECOVERABLE_IF(nullptr == pDrm); return *pDrm; } void LinuxSysmanImp::releaseLocalDrmHandle() { pDrm = nullptr; } Device *LinuxSysmanImp::getDeviceHandle() { return pDevice; } SysmanDeviceImp *LinuxSysmanImp::getSysmanDeviceImp() { return pParentSysmanDeviceImp; } std::string LinuxSysmanImp::getPciRootPortDirectoryPath(std::string realPciPath) { size_t loc; // we need to change the absolute path to two levels up to get // the Discrete card's root port. // the root port is always at a fixed distance as defined in HW uint8_t nLevel = 2; while (nLevel > 0) { loc = realPciPath.find_last_of('/'); if (loc == std::string::npos) { break; } realPciPath = realPciPath.substr(0, loc); nLevel--; } return realPciPath; } static std::string modifyPathOnLevel(std::string path, uint8_t level) { size_t loc = 0; size_t count = 0; std::string modifiedPath(path); uint8_t nLevel = level; do { loc = path.find_first_of('/'); count = count + loc; if (loc == std::string::npos) { break; } path = path.substr(loc + 1, path.size()); nLevel--; } while (nLevel > 0); if (nLevel == 0) { modifiedPath = modifiedPath.substr(0, (count + level - 1)); // need to adjust for last '/' that the code encounters } return modifiedPath; } std::string LinuxSysmanImp::getPciRootPortDirectoryPathForReset(std::string realPciPath) { // the rootport is always the first pci folder after the pcie slot. // /sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:01.0/0000:8c:00.0 // '/sys/devices/pci0000:89/0000:89:02.0/' will always be the same distance. return modifyPathOnLevel(realPciPath, 5); } std::string LinuxSysmanImp::getPciCardBusDirectoryPath(std::string realPciPath) { // the cardbus is always the second pci folder after the pcie slot. // /sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:01.0/0000:8c:00.0 // '/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/' will always be the same distance. return modifyPathOnLevel(realPciPath, 6); } PlatformMonitoringTech *LinuxSysmanImp::getPlatformMonitoringTechAccess(uint32_t subDeviceId) { auto subDeviceIdToPmtEntry = mapOfSubDeviceIdToPmtObject.find(subDeviceId); if (subDeviceIdToPmtEntry == mapOfSubDeviceIdToPmtObject.end()) { return nullptr; } return subDeviceIdToPmtEntry->second; } LinuxSysmanImp::LinuxSysmanImp(SysmanDeviceImp *pParentSysmanDeviceImp) { this->pParentSysmanDeviceImp = pParentSysmanDeviceImp; } void LinuxSysmanImp::releasePmtObject() { for (auto &subDeviceIdToPmtEntry : mapOfSubDeviceIdToPmtObject) { if (subDeviceIdToPmtEntry.second) { delete subDeviceIdToPmtEntry.second; subDeviceIdToPmtEntry.second = nullptr; } } mapOfSubDeviceIdToPmtObject.clear(); } void LinuxSysmanImp::releaseFwUtilInterface() { if (nullptr != pFwUtilInterface) { delete pFwUtilInterface; pFwUtilInterface = nullptr; } } LinuxSysmanImp::~LinuxSysmanImp() { if (nullptr != pSysfsAccess) { delete pSysfsAccess; pSysfsAccess = nullptr; } if (nullptr != pProcfsAccess) { delete pProcfsAccess; pProcfsAccess = nullptr; } if (nullptr != pFsAccess) { delete pFsAccess; pFsAccess = nullptr; } if (nullptr != pPmuInterface) { delete pPmuInterface; pPmuInterface = nullptr; } releaseFwUtilInterface(); releasePmtObject(); } void LinuxSysmanImp::getPidFdsForOpenDevice(ProcfsAccess *pProcfsAccess, SysfsAccess *pSysfsAccess, const ::pid_t pid, std::vector &deviceFds) { // Return a list of all the file descriptors of this process that point to this device std::vector fds; deviceFds.clear(); if (ZE_RESULT_SUCCESS != pProcfsAccess->getFileDescriptors(pid, fds)) { // Process exited. Not an error. Just ignore. return; } for (auto &&fd : fds) { std::string file; if (pProcfsAccess->getFileName(pid, fd, file) != ZE_RESULT_SUCCESS) { // Process closed this file. Not an error. Just ignore. continue; } if (pSysfsAccess->isMyDeviceFile(file)) { deviceFds.push_back(fd); } } } void LinuxSysmanImp::releaseSysmanDeviceResources() { getSysmanDeviceImp()->pEngineHandleContext->releaseEngines(); getSysmanDeviceImp()->pRasHandleContext->releaseRasHandles(); if (!diagnosticsReset) { getSysmanDeviceImp()->pDiagnosticsHandleContext->releaseDiagnosticsHandles(); } getSysmanDeviceImp()->pFirmwareHandleContext->releaseFwHandles(); releasePmtObject(); if (!diagnosticsReset) { releaseFwUtilInterface(); } releaseLocalDrmHandle(); } void LinuxSysmanImp::releaseDeviceResources() { releaseSysmanDeviceResources(); auto device = static_cast(getDeviceHandle()); executionEnvironment = device->getNEODevice()->getExecutionEnvironment(); device->releaseResources(); executionEnvironment->memoryManager->releaseDeviceSpecificMemResources(rootDeviceIndex); executionEnvironment->releaseRootDeviceEnvironmentResources(executionEnvironment->rootDeviceEnvironments[rootDeviceIndex].get()); executionEnvironment->rootDeviceEnvironments[rootDeviceIndex].reset(); } void LinuxSysmanImp::reInitSysmanDeviceResources() { getSysmanDeviceImp()->updateSubDeviceHandlesLocally(); createPmtHandles(); createFwUtilInterface(); getSysmanDeviceImp()->pRasHandleContext->init(getSysmanDeviceImp()->deviceHandles); getSysmanDeviceImp()->pEngineHandleContext->init(); if (!diagnosticsReset) { getSysmanDeviceImp()->pDiagnosticsHandleContext->init(getSysmanDeviceImp()->deviceHandles); } getSysmanDeviceImp()->pFirmwareHandleContext->init(); } ze_result_t LinuxSysmanImp::initDevice() { ze_result_t result = ZE_RESULT_SUCCESS; auto device = static_cast(getDeviceHandle()); auto neoDevice = NEO::DeviceFactory::createDevice(*executionEnvironment, devicePciBdf, rootDeviceIndex); if (neoDevice == nullptr) { return ZE_RESULT_ERROR_DEVICE_LOST; } static_cast(device->getDriverHandle())->updateRootDeviceBitFields(neoDevice); static_cast(device->getDriverHandle())->enableRootDeviceDebugger(neoDevice); Device::deviceReinit(device->getDriverHandle(), device, neoDevice, &result); reInitSysmanDeviceResources(); return ZE_RESULT_SUCCESS; } // A 'warm reset' is a conventional reset that is triggered across a PCI express link. // A warm reset is triggered either when a link is forced into electrical idle or // by sending TS1 and TS2 ordered sets with the hot reset bit set. // Software can initiate a warm reset by setting and then clearing the secondary bus reset bit // in the bridge control register in the PCI configuration space of the bridge port upstream of the device. ze_result_t LinuxSysmanImp::osWarmReset() { std::string rootPortPath; std::string realRootPath; ze_result_t result = pSysfsAccess->getRealPath(deviceDir, realRootPath); if (ZE_RESULT_SUCCESS != result) { return result; } auto device = static_cast(pDevice); executionEnvironment = device->getNEODevice()->getExecutionEnvironment(); devicePciBdf = device->getNEODevice()->getRootDeviceEnvironment().osInterface->getDriverModel()->as()->getPciPath(); rootDeviceIndex = device->getNEODevice()->getRootDeviceIndex(); ExecutionEnvironmentRefCountRestore restorer(executionEnvironment); releaseDeviceResources(); rootPortPath = getPciRootPortDirectoryPathForReset(realRootPath); int fd, ret = 0; unsigned int offset = PCI_BRIDGE_CONTROL; // Bridge control offset in Header of PCI config space unsigned int value = 0x00; unsigned int resetValue = 0x00; std::string configFilePath = rootPortPath + '/' + "config"; fd = this->openFunction(configFilePath.c_str(), O_RDWR); if (fd < 0) { return ZE_RESULT_ERROR_UNKNOWN; } this->preadFunction(fd, &value, 0x01, offset); resetValue = value | PCI_BRIDGE_CTL_BUS_RESET; this->pwriteFunction(fd, &resetValue, 0x01, offset); std::this_thread::sleep_for(std::chrono::milliseconds(100)); // Sleep for 100 milliseconds just to make sure the change is propagated. this->pwriteFunction(fd, &value, 0x01, offset); std::this_thread::sleep_for(std::chrono::milliseconds(500)); // Sleep for 500 milliseconds ret = this->closeFunction(fd); if (ret < 0) { return ZE_RESULT_ERROR_UNKNOWN; } std::string cardBusPath; cardBusPath = getPciCardBusDirectoryPath(realRootPath); // write 1 to remove result = pFsAccess->write(cardBusPath + '/' + "remove", "1"); if (ZE_RESULT_SUCCESS != result) { return result; } result = pFsAccess->write(realRootPath + '/' + "rescan", "1"); if (ZE_RESULT_SUCCESS != result) { return result; } return initDevice(); } std::string LinuxSysmanImp::getAddressFromPath(std::string &rootPortPath) { size_t loc; loc = rootPortPath.find_last_of('/'); // we get the pci address of the root port from rootPortPath return rootPortPath.substr(loc + 1, std::string::npos); } ze_result_t LinuxSysmanImp::osColdReset() { const std::string slotPath("/sys/bus/pci/slots/"); // holds the directories matching to the number of slots in the PC std::string cardBusPath; // will hold the PCIe Root port directory path (the address of the PCIe slot). std::string realRootPath; // will hold the absolute real path (not symlink) to the selected Device ze_result_t result = pSysfsAccess->getRealPath(deviceDir, realRootPath); // e.g realRootPath=/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:01.0/0000:8c:00.0 if (ZE_RESULT_SUCCESS != result) { return result; } auto device = static_cast(pDevice); executionEnvironment = device->getNEODevice()->getExecutionEnvironment(); devicePciBdf = device->getNEODevice()->getRootDeviceEnvironment().osInterface->getDriverModel()->as()->getPciPath(); rootDeviceIndex = device->getNEODevice()->getRootDeviceIndex(); ExecutionEnvironmentRefCountRestore restorer(executionEnvironment); releaseDeviceResources(); cardBusPath = getPciCardBusDirectoryPath(realRootPath); // e.g cardBusPath=/sys/devices/pci0000:89/0000:89:02.0/ std::string rootAddress = getAddressFromPath(cardBusPath); // e.g rootAddress = 0000:8a:00.0 std::vector dir; result = pFsAccess->listDirectory(slotPath, dir); // get list of slot directories from /sys/bus/pci/slots/ if (ZE_RESULT_SUCCESS != result) { return result; } for (auto &slot : dir) { std::string slotAddress; result = pFsAccess->read((slotPath + slot + "/address"), slotAddress); // extract slot address from the slot directory /sys/bus/pci/slots//address if (ZE_RESULT_SUCCESS != result) { return result; } if (slotAddress.compare(rootAddress) == 0) { // compare slot address to root port address result = pFsAccess->write((slotPath + slot + "/power"), "0"); // turn off power if (ZE_RESULT_SUCCESS != result) { return result; } std::this_thread::sleep_for(std::chrono::milliseconds(100)); // Sleep for 100 milliseconds just to make sure, 1 ms is defined as part of spec result = pFsAccess->write((slotPath + slot + "/power"), "1"); // turn on power if (ZE_RESULT_SUCCESS != result) { return result; } return initDevice(); } } return ZE_RESULT_ERROR_DEVICE_LOST; // incase the reset fails inform upper layers. } OsSysman *OsSysman::create(SysmanDeviceImp *pParentSysmanDeviceImp) { LinuxSysmanImp *pLinuxSysmanImp = new LinuxSysmanImp(pParentSysmanDeviceImp); return static_cast(pLinuxSysmanImp); } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/linux/os_sysman_imp.h000066400000000000000000000067651422164147700277000ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "shared/source/os_interface/linux/drm_neo.h" #include "shared/source/os_interface/os_interface.h" #include "level_zero/core/source/device/device.h" #include "level_zero/tools/source/sysman/linux/firmware_util/firmware_util.h" #include "level_zero/tools/source/sysman/linux/fs_access.h" #include "level_zero/tools/source/sysman/linux/pmt/pmt.h" #include "level_zero/tools/source/sysman/linux/pmu/pmu_imp.h" #include "level_zero/tools/source/sysman/sysman_imp.h" #include namespace L0 { class PmuInterface; class LinuxSysmanImp : public OsSysman, NEO::NonCopyableOrMovableClass { public: LinuxSysmanImp(SysmanDeviceImp *pParentSysmanDeviceImp); ~LinuxSysmanImp() override; ze_result_t init() override; PmuInterface *getPmuInterface(); FirmwareUtil *getFwUtilInterface(); FsAccess &getFsAccess(); ProcfsAccess &getProcfsAccess(); SysfsAccess &getSysfsAccess(); NEO::Drm &getDrm(); PlatformMonitoringTech *getPlatformMonitoringTechAccess(uint32_t subDeviceId); Device *getDeviceHandle(); SysmanDeviceImp *getSysmanDeviceImp(); std::string getPciRootPortDirectoryPath(std::string realPciPath); std::string getPciRootPortDirectoryPathForReset(std::string realPciPath); std::string getPciCardBusDirectoryPath(std::string realPciPath); void releasePmtObject(); ze_result_t createPmtHandles(); void createFwUtilInterface(); void releaseFwUtilInterface(); void releaseLocalDrmHandle(); PRODUCT_FAMILY getProductFamily(); void releaseSysmanDeviceResources(); void releaseDeviceResources(); ze_result_t initDevice(); void reInitSysmanDeviceResources(); void getPidFdsForOpenDevice(ProcfsAccess *, SysfsAccess *, const ::pid_t, std::vector &); ze_result_t osWarmReset(); ze_result_t osColdReset(); std::string getAddressFromPath(std::string &rootPortPath); decltype(&NEO::SysCalls::open) openFunction = NEO::SysCalls::open; decltype(&NEO::SysCalls::close) closeFunction = NEO::SysCalls::close; decltype(&NEO::SysCalls::pread) preadFunction = NEO::SysCalls::pread; decltype(&NEO::SysCalls::pwrite) pwriteFunction = NEO::SysCalls::pwrite; std::string devicePciBdf = ""; uint32_t rootDeviceIndex = 0u; NEO::ExecutionEnvironment *executionEnvironment = nullptr; bool diagnosticsReset = false; protected: FsAccess *pFsAccess = nullptr; ProcfsAccess *pProcfsAccess = nullptr; SysfsAccess *pSysfsAccess = nullptr; Device *pDevice = nullptr; NEO::Drm *pDrm = nullptr; PmuInterface *pPmuInterface = nullptr; FirmwareUtil *pFwUtilInterface = nullptr; std::map mapOfSubDeviceIdToPmtObject; ze_result_t initLocalDeviceAndDrmHandles(); private: LinuxSysmanImp() = delete; SysmanDeviceImp *pParentSysmanDeviceImp = nullptr; static const std::string deviceDir; }; class ExecutionEnvironmentRefCountRestore { public: ExecutionEnvironmentRefCountRestore() = delete; ExecutionEnvironmentRefCountRestore(NEO::ExecutionEnvironment *executionEnvironmentRecevied) { executionEnvironment = executionEnvironmentRecevied; executionEnvironment->incRefInternal(); } ~ExecutionEnvironmentRefCountRestore() { executionEnvironment->decRefInternal(); } NEO::ExecutionEnvironment *executionEnvironment = nullptr; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/linux/pmt/000077500000000000000000000000001422164147700254315ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/linux/pmt/CMakeLists.txt000066400000000000000000000010231422164147700301650ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_LINUX_PMT ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}pmt_helper.cpp ${CMAKE_CURRENT_SOURCE_DIR}/pmt.cpp ${CMAKE_CURRENT_SOURCE_DIR}/pmt.h ) if(UNIX) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_LINUX_PMT} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_PMT_LINUX ${L0_SRCS_TOOLS_SYSMAN_PMT_LINUX}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/linux/pmt/pmt.cpp000066400000000000000000000213711422164147700267410ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/linux/pmt/pmt.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "level_zero/tools/source/sysman/sysman_imp.h" #include #include #include #include namespace L0 { const std::string PlatformMonitoringTech::baseTelemSysFS("/sys/class/intel_pmt"); const std::string PlatformMonitoringTech::telem("telem"); uint32_t PlatformMonitoringTech::rootDeviceTelemNodeIndex = 0; ze_result_t PlatformMonitoringTech::readValue(const std::string key, uint32_t &value) { auto offset = keyOffsetMap.find(key); if (offset == keyOffsetMap.end()) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } int fd = this->openFunction(telemetryDeviceEntry.c_str(), O_RDONLY); if (fd == -1) { return ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE; } ze_result_t res = ZE_RESULT_SUCCESS; if (this->preadFunction(fd, &value, sizeof(uint32_t), baseOffset + offset->second) != sizeof(uint32_t)) { res = ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE; } if (this->closeFunction(fd) < 0) { return ZE_RESULT_ERROR_UNKNOWN; } return res; } ze_result_t PlatformMonitoringTech::readValue(const std::string key, uint64_t &value) { auto offset = keyOffsetMap.find(key); if (offset == keyOffsetMap.end()) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } int fd = this->openFunction(telemetryDeviceEntry.c_str(), O_RDONLY); if (fd == -1) { return ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE; } ze_result_t res = ZE_RESULT_SUCCESS; if (this->preadFunction(fd, &value, sizeof(uint64_t), baseOffset + offset->second) != sizeof(uint64_t)) { res = ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE; } if (this->closeFunction(fd) < 0) { return ZE_RESULT_ERROR_UNKNOWN; } return res; } bool compareTelemNodes(std::string &telemNode1, std::string &telemNode2) { std::string telem = "telem"; auto indexString1 = telemNode1.substr(telem.size(), telemNode1.size()); auto indexForTelemNode1 = stoi(indexString1); auto indexString2 = telemNode2.substr(telem.size(), telemNode2.size()); auto indexForTelemNode2 = stoi(indexString2); return indexForTelemNode1 < indexForTelemNode2; } // Check if Telemetry node(say /sys/class/intel_pmt/telem1) and rootPciPathOfGpuDevice share same PCI Root port static bool isValidTelemNode(FsAccess *pFsAccess, const std::string &rootPciPathOfGpuDevice, const std::string sysfsTelemNode) { std::string realPathOfTelemNode; auto result = pFsAccess->getRealPath(sysfsTelemNode, realPathOfTelemNode); if (result != ZE_RESULT_SUCCESS) { return false; } // Example: If // rootPciPathOfGpuDevice = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0"; // realPathOfTelemNode = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:02.0/0000:8e:00.1/pmt_telemetry.1.auto/intel_pmt/telem1"; // As rootPciPathOfGpuDevice is a substring og realPathOfTelemNode , hence both sysfs telemNode and GPU device share same PCI Root. // Hence this telem node entry is valid for GPU device. return (realPathOfTelemNode.compare(0, rootPciPathOfGpuDevice.size(), rootPciPathOfGpuDevice) == 0); } ze_result_t PlatformMonitoringTech::enumerateRootTelemIndex(FsAccess *pFsAccess, std::string &rootPciPathOfGpuDevice) { std::vector listOfTelemNodes; auto result = pFsAccess->listDirectory(baseTelemSysFS, listOfTelemNodes); if (ZE_RESULT_SUCCESS != result) { return result; } // listOfTelemNodes vector could contain non "telem" entries which are not interested to us. // Lets refactor listOfTelemNodes vector as below for (auto iterator = listOfTelemNodes.begin(); iterator != listOfTelemNodes.end(); iterator++) { if (iterator->compare(0, telem.size(), telem) != 0) { listOfTelemNodes.erase(iterator--); // Remove entry if its suffix is not "telem" } } // Exmaple: For below directory // # /sys/class/intel_pmt$ ls // telem1 telem2 telem3 // Then listOfTelemNodes would contain telem1, telem2, telem3 std::sort(listOfTelemNodes.begin(), listOfTelemNodes.end(), compareTelemNodes); // sort listOfTelemNodes, to arange telem nodes in ascending order for (const auto &telemNode : listOfTelemNodes) { if (isValidTelemNode(pFsAccess, rootPciPathOfGpuDevice, baseTelemSysFS + "/" + telemNode)) { auto indexString = telemNode.substr(telem.size(), telemNode.size()); rootDeviceTelemNodeIndex = stoi(indexString); // if telemNode is telemN, then rootDeviceTelemNodeIndex = N return ZE_RESULT_SUCCESS; } } return ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE; } ze_result_t PlatformMonitoringTech::init(FsAccess *pFsAccess, const std::string &rootPciPathOfGpuDevice) { std::string telemNode = telem + std::to_string(rootDeviceTelemNodeIndex); if (isSubdevice) { uint32_t telemNodeIndex = 0; // If rootDeviceTelemNode is telem1, then rootDeviceTelemNodeIndex = 1 // And thus for subdevice0 --> telem node will be telem2, // for subdevice1 --> telem node will be telem3 etc telemNodeIndex = rootDeviceTelemNodeIndex + subdeviceId + 1; telemNode = telem + std::to_string(telemNodeIndex); } std::string baseTelemSysFSNode = baseTelemSysFS + "/" + telemNode; if (!isValidTelemNode(pFsAccess, rootPciPathOfGpuDevice, baseTelemSysFSNode)) { return ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE; } telemetryDeviceEntry = baseTelemSysFSNode + "/" + telem; if (!pFsAccess->fileExists(telemetryDeviceEntry)) { NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Telemetry support not available. No file %s\n", telemetryDeviceEntry.c_str()); return ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE; } std::string guid; std::string guidPath = baseTelemSysFSNode + std::string("/guid"); ze_result_t result = pFsAccess->read(guidPath, guid); if (ZE_RESULT_SUCCESS != result) { NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Telemetry sysfs entry not available %s\n", guidPath.c_str()); return result; } result = getKeyOffsetMap(guid, keyOffsetMap); if (ZE_RESULT_SUCCESS != result) { // We didnt have any entry for this guid in guidToKeyOffsetMap return result; } std::string offsetPath = baseTelemSysFSNode + std::string("/offset"); result = pFsAccess->read(offsetPath, baseOffset); if (ZE_RESULT_SUCCESS != result) { NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Telemetry sysfs entry not available %s\n", offsetPath.c_str()); return result; } return ZE_RESULT_SUCCESS; } PlatformMonitoringTech::PlatformMonitoringTech(FsAccess *pFsAccess, ze_bool_t onSubdevice, uint32_t subdeviceId) : subdeviceId(subdeviceId), isSubdevice(onSubdevice) { } void PlatformMonitoringTech::doInitPmtObject(FsAccess *pFsAccess, uint32_t subdeviceId, PlatformMonitoringTech *pPmt, const std::string &rootPciPathOfGpuDevice, std::map &mapOfSubDeviceIdToPmtObject) { if (pPmt->init(pFsAccess, rootPciPathOfGpuDevice) == ZE_RESULT_SUCCESS) { mapOfSubDeviceIdToPmtObject.emplace(subdeviceId, pPmt); return; } delete pPmt; // We are here as pPmt->init failed and thus this pPmt object is not useful. Let's delete that. } void PlatformMonitoringTech::create(const std::vector &deviceHandles, FsAccess *pFsAccess, std::string &rootPciPathOfGpuDevice, std::map &mapOfSubDeviceIdToPmtObject) { if (ZE_RESULT_SUCCESS == PlatformMonitoringTech::enumerateRootTelemIndex(pFsAccess, rootPciPathOfGpuDevice)) { for (const auto &deviceHandle : deviceHandles) { uint32_t subdeviceId = 0; ze_bool_t onSubdevice = false; SysmanDeviceImp::getSysmanDeviceInfo(deviceHandle, subdeviceId, onSubdevice); auto pPmt = new PlatformMonitoringTech(pFsAccess, onSubdevice, subdeviceId); UNRECOVERABLE_IF(nullptr == pPmt); PlatformMonitoringTech::doInitPmtObject(pFsAccess, subdeviceId, pPmt, rootPciPathOfGpuDevice, mapOfSubDeviceIdToPmtObject); } } } PlatformMonitoringTech::~PlatformMonitoringTech() { } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/linux/pmt/pmt.h000066400000000000000000000042421422164147700264040ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "shared/source/os_interface/linux/sys_calls.h" #include "level_zero/core/source/device/device.h" #include "level_zero/tools/source/sysman/linux/fs_access.h" #include #include #include #include namespace L0 { class PlatformMonitoringTech : NEO::NonCopyableOrMovableClass { public: PlatformMonitoringTech() = delete; PlatformMonitoringTech(FsAccess *pFsAccess, ze_bool_t onSubdevice, uint32_t subdeviceId); virtual ~PlatformMonitoringTech(); virtual ze_result_t readValue(const std::string key, uint32_t &value); virtual ze_result_t readValue(const std::string key, uint64_t &value); static ze_result_t enumerateRootTelemIndex(FsAccess *pFsAccess, std::string &rootPciPathOfGpuDevice); static void create(const std::vector &deviceHandles, FsAccess *pFsAccess, std::string &rootPciPathOfGpuDevice, std::map &mapOfSubDeviceIdToPmtObject); protected: static uint32_t rootDeviceTelemNodeIndex; std::string telemetryDeviceEntry{}; std::map keyOffsetMap; ze_result_t getKeyOffsetMap(std::string guid, std::map &keyOffsetMap); ze_result_t init(FsAccess *pFsAccess, const std::string &rootPciPathOfGpuDevice); static void doInitPmtObject(FsAccess *pFsAccess, uint32_t subdeviceId, PlatformMonitoringTech *pPmt, const std::string &rootPciPathOfGpuDevice, std::map &mapOfSubDeviceIdToPmtObject); decltype(&NEO::SysCalls::open) openFunction = NEO::SysCalls::open; decltype(&NEO::SysCalls::close) closeFunction = NEO::SysCalls::close; decltype(&NEO::SysCalls::pread) preadFunction = NEO::SysCalls::pread; private: static const std::string baseTelemSysFS; static const std::string telem; uint64_t baseOffset = 0; uint32_t subdeviceId = 0; ze_bool_t isSubdevice = 0; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/linux/pmt/pmt_helper.cpp000066400000000000000000000014041422164147700302730ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/linux/pmt/pmt.h" #include "level_zero/tools/source/sysman/linux/pmt/pmt_xml_offsets.h" namespace L0 { ze_result_t PlatformMonitoringTech::getKeyOffsetMap(std::string guid, std::map &keyOffsetMap) { ze_result_t retVal = ZE_RESULT_ERROR_UNKNOWN; auto keyOffsetMapEntry = guidToKeyOffsetMap.find(guid); if (keyOffsetMapEntry == guidToKeyOffsetMap.end()) { // We didnt have any entry for this guid in guidToKeyOffsetMap retVal = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; return retVal; } keyOffsetMap = keyOffsetMapEntry->second; return ZE_RESULT_SUCCESS; } } // namespace L0compute-runtime-22.14.22890/level_zero/tools/source/sysman/linux/pmt/pmt_xml_offsets.h000077500000000000000000000066761422164147700310350ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include #include namespace L0 { // Each entry of this map corresponds to one particular graphics card type for example, DG1 A or B step. // or XeHP_SDV. GUID string will help in identify the card type const std::map> guidToKeyOffsetMap = { {"0x490e01", // DG1 B stepping {{"PACKAGE_ENERGY", 0x420}, {"COMPUTE_TEMPERATURES", 0x68}, {"SOC_TEMPERATURES", 0x60}, {"CORE_TEMPERATURES", 0x6c}}}, {"0x490e", // DG1 A stepping {{"PACKAGE_ENERGY", 0x400}, {"COMPUTE_TEMPERATURES", 0x68}, {"SOC_TEMPERATURES", 0x60}, {"CORE_TEMPERATURES", 0x6c}}}, {"0x4f95", // For DG2 device {{"PACKAGE_ENERGY", 1032}, {"SOC_TEMPERATURES", 56}}}, // SOC_TEMPERATURE contains GT_TEMP, DRAM_TEMP, SA_TEMP, DE_TEMP, PCIE_TEMP, TYPEC_TEMP {"0x4f9301", // For ATSM device {{"PACKAGE_ENERGY", 1032}, {"SOC_TEMPERATURES", 56}}}, // SOC_TEMPERATURE contains GT_TEMP, DRAM_TEMP, SA_TEMP, DE_TEMP, PCIE_TEMP, TYPEC_TEMP {"0xfdc76194", // For XeHP_SDV device {{"HBM0MaxDeviceTemperature", 28}, {"HBM1MaxDeviceTemperature", 36}, {"TileMinTemperature", 40}, {"TileMaxTemperature", 44}, {"GTMinTemperature", 48}, {"GTMaxTemperature", 52}, {"VF0_VFID", 88}, {"VF0_HBM0_READ", 92}, {"VF0_HBM0_WRITE", 96}, {"VF0_HBM1_READ", 104}, {"VF0_HBM1_WRITE", 108}, {"VF0_TIMESTAMP_L", 168}, {"VF0_TIMESTAMP_H", 172}, {"VF1_VFID", 176}, {"VF1_HBM0_READ", 180}, {"VF1_HBM0_WRITE", 184}, {"VF1_HBM1_READ", 192}, {"VF1_HBM1_WRITE", 196}, {"VF1_TIMESTAMP_L", 256}, {"VF1_TIMESTAMP_H", 260}}}, {"0xfdc76196", // For XeHP_SDV B0 device {{"HBM0MaxDeviceTemperature", 28}, {"HBM1MaxDeviceTemperature", 36}, {"TileMinTemperature", 40}, {"TileMaxTemperature", 44}, {"GTMinTemperature", 48}, {"GTMaxTemperature", 52}, {"VF0_VFID", 88}, {"VF0_HBM0_READ", 92}, {"VF0_HBM0_WRITE", 96}, {"VF0_HBM1_READ", 104}, {"VF0_HBM1_WRITE", 108}, {"VF0_TIMESTAMP_L", 168}, {"VF0_TIMESTAMP_H", 172}, {"VF1_VFID", 176}, {"VF1_HBM0_READ", 180}, {"VF1_HBM0_WRITE", 184}, {"VF1_HBM1_READ", 192}, {"VF1_HBM1_WRITE", 196}, {"VF1_TIMESTAMP_L", 256}, {"VF1_TIMESTAMP_H", 260}}}, {"0xb15a0edc", // For PVC device {{"HBM0MaxDeviceTemperature", 28}, {"HBM1MaxDeviceTemperature", 36}, {"TileMinTemperature", 40}, {"TileMaxTemperature", 44}, {"GTMinTemperature", 48}, {"GTMaxTemperature", 52}, {"VF0_VFID", 88}, {"VF0_HBM0_READ", 92}, {"VF0_HBM0_WRITE", 96}, {"VF0_HBM1_READ", 104}, {"VF0_HBM1_WRITE", 108}, {"VF0_TIMESTAMP_L", 168}, {"VF0_TIMESTAMP_H", 172}, {"VF1_VFID", 176}, {"VF1_HBM0_READ", 180}, {"VF1_HBM0_WRITE", 184}, {"VF1_HBM1_READ", 192}, {"VF1_HBM1_WRITE", 196}, {"VF1_TIMESTAMP_L", 256}, {"VF1_TIMESTAMP_H", 260}, {"HBM2MaxDeviceTemperature", 300}, {"HBM3MaxDeviceTemperature", 308}, {"VF0_HBM2_READ", 312}, {"VF0_HBM2_WRITE", 316}, {"VF0_HBM3_READ", 328}, {"VF0_HBM3_WRITE", 332}, {"VF1_HBM2_READ", 344}, {"VF1_HBM2_WRITE", 348}, {"VF1_HBM3_READ", 360}, {"VF1_HBM3_WRITE", 364}}}}; } // namespace L0compute-runtime-22.14.22890/level_zero/tools/source/sysman/linux/pmu/000077500000000000000000000000001422164147700254325ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/linux/pmu/CMakeLists.txt000066400000000000000000000007771422164147700302050ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_LINUX_PMU ${CMAKE_CURRENT_SOURCE_DIR}/pmu_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/pmu_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/pmu.h ) if(UNIX) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_LINUX_PMU} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_PMU_LINUX ${L0_SRCS_TOOLS_SYSMAN_PMU_LINUX}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/linux/pmu/pmu.h000066400000000000000000000007361422164147700264120ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include namespace L0 { class LinuxSysmanImp; class PmuInterface { public: virtual ~PmuInterface() = default; virtual int64_t pmuInterfaceOpen(uint64_t config, int group, uint32_t format) = 0; virtual int pmuRead(int fd, uint64_t *data, ssize_t sizeOfdata) = 0; static PmuInterface *create(LinuxSysmanImp *pLinuxSysmanImp); }; } // namespace L0compute-runtime-22.14.22890/level_zero/tools/source/sysman/linux/pmu/pmu_imp.cpp000066400000000000000000000061711422164147700276110ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/linux/pmu/pmu_imp.h" namespace L0 { const std::string PmuInterfaceImp::deviceDir("device"); const std::string PmuInterfaceImp::sysDevicesDir("/sys/devices/"); static constexpr int64_t perfEventOpenSyscallNumber = 298; // Get event id uint32_t PmuInterfaceImp::getEventType() { std::string i915DirName("i915"); bool isLmemSupported = pDevice->getDriverHandle()->getMemoryManager()->isLocalMemorySupported(pDevice->getRootDeviceIndex()); if (isLmemSupported) { std::string bdfDir; // ID or type of Pmu driver for discrete graphics is obtained by reading sysfs node as explained below: // For instance DG1 in PCI slot 0000:03:00.0: // $ cat /sys/devices/i915_0000_03_00.0/type // 23 ze_result_t result = pSysfsAccess->readSymLink(deviceDir, bdfDir); if (ZE_RESULT_SUCCESS != result) { return 0; } const auto loc = bdfDir.find_last_of('/'); auto bdf = bdfDir.substr(loc + 1); std::replace(bdf.begin(), bdf.end(), ':', '_'); i915DirName = "i915_" + bdf; } // For integrated graphics type of PMU driver is obtained by reading /sys/devices/i915/type node // # cat /sys/devices/i915/type // 18 const std::string eventTypeSysfsNode = sysDevicesDir + i915DirName + "/" + "type"; auto eventTypeVal = 0u; if (ZE_RESULT_SUCCESS != pFsAccess->read(eventTypeSysfsNode, eventTypeVal)) { return 0; } return eventTypeVal; } int PmuInterfaceImp::getErrorNo() { return errno; } inline int64_t PmuInterfaceImp::perfEventOpen(perf_event_attr *attr, pid_t pid, int cpu, int groupFd, uint64_t flags) { attr->size = sizeof(*attr); return this->syscallFunction(perfEventOpenSyscallNumber, attr, pid, cpu, groupFd, flags); } int64_t PmuInterfaceImp::pmuInterfaceOpen(uint64_t config, int group, uint32_t format) { struct perf_event_attr attr = {}; int nrCpus = get_nprocs_conf(); int cpu = 0; int64_t ret = 0; attr.type = getEventType(); if (attr.type == 0) { return -ENOENT; } if (group >= 0) { format &= ~PERF_FORMAT_GROUP; } attr.read_format = static_cast(format); attr.config = config; do { ret = perfEventOpen(&attr, -1, cpu++, group, 0); } while ((ret < 0 && getErrorNo() == EINVAL) && (cpu < nrCpus)); return ret; } int PmuInterfaceImp::pmuRead(int fd, uint64_t *data, ssize_t sizeOfdata) { ssize_t len; len = this->readFunction(fd, data, sizeOfdata); if (len != sizeOfdata) { return -1; } return 0; } PmuInterfaceImp::PmuInterfaceImp(LinuxSysmanImp *pLinuxSysmanImp) { pSysfsAccess = &pLinuxSysmanImp->getSysfsAccess(); pFsAccess = &pLinuxSysmanImp->getFsAccess(); pDevice = pLinuxSysmanImp->getDeviceHandle(); } PmuInterface *PmuInterface::create(LinuxSysmanImp *pLinuxSysmanImp) { PmuInterfaceImp *pPmuInterfaceImp = new PmuInterfaceImp(pLinuxSysmanImp); UNRECOVERABLE_IF(nullptr == pPmuInterfaceImp); return pPmuInterfaceImp; } } // namespace L0compute-runtime-22.14.22890/level_zero/tools/source/sysman/linux/pmu/pmu_imp.h000066400000000000000000000023021422164147700272460ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/tools/source/sysman/linux/os_sysman_imp.h" #include "level_zero/tools/source/sysman/linux/pmu/pmu.h" #include #include #include namespace L0 { class PmuInterfaceImp : public PmuInterface, NEO::NonCopyableOrMovableClass { public: PmuInterfaceImp() = delete; PmuInterfaceImp(LinuxSysmanImp *pLinuxSysmanImp); ~PmuInterfaceImp() override = default; int64_t pmuInterfaceOpen(uint64_t config, int group, uint32_t format) override; MOCKABLE_VIRTUAL int pmuRead(int fd, uint64_t *data, ssize_t sizeOfdata) override; protected: MOCKABLE_VIRTUAL int getErrorNo(); MOCKABLE_VIRTUAL int64_t perfEventOpen(perf_event_attr *attr, pid_t pid, int cpu, int groupFd, uint64_t flags); decltype(&read) readFunction = read; decltype(&syscall) syscallFunction = syscall; private: uint32_t getEventType(); FsAccess *pFsAccess = nullptr; SysfsAccess *pSysfsAccess = nullptr; Device *pDevice = nullptr; static const std::string deviceDir; static const std::string sysDevicesDir; }; } // namespace L0compute-runtime-22.14.22890/level_zero/tools/source/sysman/linux/udev/000077500000000000000000000000001422164147700255745ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/linux/udev/99-drm_ze_intel_gpu.rules000066400000000000000000000027361422164147700324450ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # ACTION=="change", \ SUBSYSTEM=="drm", \ ENV{RESET_FAILED}=="1", \ ENV{RESET_UNIT}=="gt", \ RUN+="/bin/sh -c 'echo 1 > /var/lib/libze_intel_gpu/wedged_file'" ACTION=="unbind", \ SUBSYSTEM=="drm", \ RUN+="/bin/sh -c 'echo 0 > /var/lib/libze_intel_gpu/pci_bind_status_file'" ACTION=="bind", \ SUBSYSTEM=="drm", \ RUN+="/bin/sh -c 'echo 1 > /var/lib/libze_intel_gpu/pci_bind_status_file'", \ RUN+="/bin/sh -c 'echo 0 > /var/lib/libze_intel_gpu/wedged_file'" ACTION=="remove", \ SUBSYSTEM=="drm", \ ENV{DEVNAME}=="/dev/dri/card*", \ ENV{ID_PATH_TAG}=="pci-*", \ RUN+="/bin/sh -c 'touch /var/lib/libze_intel_gpu/remove-$env{ID_PATH_TAG}; echo 1 > /var/lib/libze_intel_gpu/remove-$env{ID_PATH_TAG}; rm /var/lib/libze_intel_gpu/add-$env{ID_PATH_TAG};'" ACTION=="add", \ SUBSYSTEM=="drm", \ ENV{DEVNAME}=="/dev/dri/card*", \ ENV{ID_PATH_TAG}=="pci-*", \ RUN+="/bin/sh -c 'touch /var/lib/libze_intel_gpu/add-$env{ID_PATH_TAG}; echo 1 > /var/lib/libze_intel_gpu/add-$env{ID_PATH_TAG}; rm /var/lib/libze_intel_gpu/remove-$env{ID_PATH_TAG}; echo 0 > /var/lib/libze_intel_gpu/wedged_file;'" ACTION=="change", \ SUBSYSTEM=="platform", \ ENV{DEVTYPE}=="mfd_device", \ ENV{DRIVER}=="iaf", \ ENV{TYPE}=="PORT_CHANGE", \ ENV{ID_PATH_TAG}=="pci-*", \ RUN+="/bin/sh -c 'pci_id_path_tag=${env{ID_PATH_TAG}::16}; filename=/var/lib/libze_intel_gpu/fabric-${pci_id_path_tag}; touch ${filename}; echo $(($(cat ${filename}) + 1)) > ${filename};'" compute-runtime-22.14.22890/level_zero/tools/source/sysman/linux/udev/pci_bind_status_file000066400000000000000000000000001422164147700316560ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/linux/udev/wedged_file000066400000000000000000000000001422164147700277430ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/memory/000077500000000000000000000000001422164147700250025ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/memory/CMakeLists.txt000077500000000000000000000012141422164147700275430ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_MEMORY ${CMAKE_CURRENT_SOURCE_DIR}/memory.cpp ${CMAKE_CURRENT_SOURCE_DIR}/memory.h ${CMAKE_CURRENT_SOURCE_DIR}/memory_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/memory_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/os_memory.h ) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_MEMORY} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) add_subdirectories() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_MEMORY ${L0_SRCS_TOOLS_SYSMAN_MEMORY}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/memory/linux/000077500000000000000000000000001422164147700261415ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/memory/linux/CMakeLists.txt000077500000000000000000000020251422164147700307030ustar00rootroot00000000000000# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_MEMORY_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) if(NEO_ENABLE_i915_PRELIM_DETECTION) list(APPEND L0_SRCS_TOOLS_SYSMAN_MEMORY_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/os_memory_imp_prelim.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_memory_imp_prelim.h ) elseif(SUPPORT_DG1 AND "${BRANCH_TYPE}" STREQUAL "") list(APPEND L0_SRCS_TOOLS_SYSMAN_MEMORY_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/os_memory_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/os_memory_imp_dg1.cpp ) else() list(APPEND L0_SRCS_TOOLS_SYSMAN_MEMORY_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/os_memory_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/os_memory_imp.cpp ) endif() if(UNIX) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_MEMORY_LINUX} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_MEMORY_LINUX ${L0_SRCS_TOOLS_SYSMAN_MEMORY_LINUX}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/memory/linux/os_memory_imp.cpp000066400000000000000000000030401422164147700315200ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/memory/linux/os_memory_imp.h" #include "sysman/linux/os_sysman_imp.h" namespace L0 { LinuxMemoryImp::LinuxMemoryImp(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId) : isSubdevice(onSubdevice), subdeviceId(subdeviceId) { LinuxSysmanImp *pLinuxSysmanImp = static_cast(pOsSysman); pDevice = pLinuxSysmanImp->getDeviceHandle(); } bool LinuxMemoryImp::isMemoryModuleSupported() { return pDevice->getDriverHandle()->getMemoryManager()->isLocalMemorySupported(pDevice->getRootDeviceIndex()); } ze_result_t LinuxMemoryImp::getProperties(zes_mem_properties_t *pProperties) { pProperties->type = ZES_MEM_TYPE_DDR; pProperties->location = ZES_MEM_LOC_DEVICE; pProperties->onSubdevice = isSubdevice; pProperties->subdeviceId = subdeviceId; pProperties->busWidth = -1; pProperties->numChannels = -1; pProperties->physicalSize = 0; return ZE_RESULT_SUCCESS; } ze_result_t LinuxMemoryImp::getBandwidth(zes_mem_bandwidth_t *pBandwidth) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t LinuxMemoryImp::getState(zes_mem_state_t *pState) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } OsMemory *OsMemory::create(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId) { LinuxMemoryImp *pLinuxMemoryImp = new LinuxMemoryImp(pOsSysman, onSubdevice, subdeviceId); return static_cast(pLinuxMemoryImp); } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/memory/linux/os_memory_imp.h000066400000000000000000000017131422164147700311720ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "shared/source/os_interface/linux/drm_neo.h" #include "sysman/memory/os_memory.h" namespace L0 { class SysfsAccess; struct Device; class LinuxMemoryImp : public OsMemory, NEO::NonCopyableOrMovableClass { public: ze_result_t getProperties(zes_mem_properties_t *pProperties) override; ze_result_t getBandwidth(zes_mem_bandwidth_t *pBandwidth) override; ze_result_t getState(zes_mem_state_t *pState) override; bool isMemoryModuleSupported() override; LinuxMemoryImp(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId); LinuxMemoryImp() = default; ~LinuxMemoryImp() override = default; protected: NEO::Drm *pDrm = nullptr; Device *pDevice = nullptr; private: bool isSubdevice = false; uint32_t subdeviceId = 0; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/memory/linux/os_memory_imp_dg1.cpp000066400000000000000000000043471422164147700322660ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/linux/memory_info.h" #include "level_zero/tools/source/sysman/memory/linux/os_memory_imp.h" #include "sysman/linux/os_sysman_imp.h" namespace L0 { LinuxMemoryImp::LinuxMemoryImp(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId) : isSubdevice(onSubdevice), subdeviceId(subdeviceId) { LinuxSysmanImp *pLinuxSysmanImp = static_cast(pOsSysman); pDrm = &pLinuxSysmanImp->getDrm(); pDevice = pLinuxSysmanImp->getDeviceHandle(); } bool LinuxMemoryImp::isMemoryModuleSupported() { return pDevice->getDriverHandle()->getMemoryManager()->isLocalMemorySupported(pDevice->getRootDeviceIndex()); } ze_result_t LinuxMemoryImp::getProperties(zes_mem_properties_t *pProperties) { pProperties->location = ZES_MEM_LOC_DEVICE; pProperties->type = ZES_MEM_TYPE_DDR; pProperties->onSubdevice = isSubdevice; pProperties->subdeviceId = subdeviceId; pProperties->busWidth = -1; pProperties->numChannels = -1; pProperties->physicalSize = 0; return ZE_RESULT_SUCCESS; } ze_result_t LinuxMemoryImp::getBandwidth(zes_mem_bandwidth_t *pBandwidth) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t LinuxMemoryImp::getState(zes_mem_state_t *pState) { std::vector deviceRegions; if (pDrm->queryMemoryInfo() == false) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } auto memoryInfo = pDrm->getMemoryInfo(); if (!memoryInfo) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } for (auto region : memoryInfo->getDrmRegionInfos()) { if (region.region.memoryClass == I915_MEMORY_CLASS_DEVICE) { deviceRegions.push_back(region); } } pState->free = deviceRegions[subdeviceId].unallocatedSize; pState->size = deviceRegions[subdeviceId].probedSize; pState->health = ZES_MEM_HEALTH_OK; return ZE_RESULT_SUCCESS; } OsMemory *OsMemory::create(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId) { LinuxMemoryImp *pLinuxMemoryImp = new LinuxMemoryImp(pOsSysman, onSubdevice, subdeviceId); return static_cast(pLinuxMemoryImp); } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/memory/linux/os_memory_imp_prelim.cpp000066400000000000000000000216131422164147700330760ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/memory/linux/os_memory_imp_prelim.h" #include "shared/source/os_interface/linux/system_info.h" #include "level_zero/tools/source/sysman/sysman_const.h" #include "drm/intel_hwconfig_types.h" #include "igfxfmid.h" #include "sysman/linux/os_sysman_imp.h" namespace L0 { const std::string LinuxMemoryImp::deviceMemoryHealth("device_memory_health"); void LinuxMemoryImp::init() { if (isSubdevice) { const std::string baseDir = "gt/gt" + std::to_string(subdeviceId) + "/"; physicalSizeFile = baseDir + "addr_range"; } } LinuxMemoryImp::LinuxMemoryImp(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId) : isSubdevice(onSubdevice), subdeviceId(subdeviceId) { LinuxSysmanImp *pLinuxSysmanImp = static_cast(pOsSysman); pDrm = &pLinuxSysmanImp->getDrm(); pDevice = pLinuxSysmanImp->getDeviceHandle(); pSysfsAccess = &pLinuxSysmanImp->getSysfsAccess(); pPmt = pLinuxSysmanImp->getPlatformMonitoringTechAccess(subdeviceId); init(); } bool LinuxMemoryImp::isMemoryModuleSupported() { return pDevice->getDriverHandle()->getMemoryManager()->isLocalMemorySupported(pDevice->getRootDeviceIndex()); } ze_result_t LinuxMemoryImp::getProperties(zes_mem_properties_t *pProperties) { pProperties->type = ZES_MEM_TYPE_DDR; pProperties->numChannels = -1; if (pDrm->querySystemInfo()) { auto memSystemInfo = pDrm->getSystemInfo(); if (memSystemInfo != nullptr) { pProperties->numChannels = memSystemInfo->getMaxMemoryChannels(); auto memType = memSystemInfo->getMemoryType(); switch (memType) { case INTEL_HWCONFIG_MEMORY_TYPE_HBM2e: case INTEL_HWCONFIG_MEMORY_TYPE_HBM2: pProperties->type = ZES_MEM_TYPE_HBM; break; case INTEL_HWCONFIG_MEMORY_TYPE_LPDDR4: pProperties->type = ZES_MEM_TYPE_LPDDR4; break; case INTEL_HWCONFIG_MEMORY_TYPE_LPDDR5: pProperties->type = ZES_MEM_TYPE_LPDDR5; break; default: pProperties->type = ZES_MEM_TYPE_DDR; break; } } } pProperties->location = ZES_MEM_LOC_DEVICE; pProperties->onSubdevice = isSubdevice; pProperties->subdeviceId = subdeviceId; pProperties->busWidth = memoryBusWidth; // Hardcode pProperties->physicalSize = 0; if (isSubdevice) { std::string memval; ze_result_t result = pSysfsAccess->read(physicalSizeFile, memval); uint64_t intval = strtoull(memval.c_str(), nullptr, 16); if (ZE_RESULT_SUCCESS != result) { pProperties->physicalSize = 0u; } else { pProperties->physicalSize = intval; } } return ZE_RESULT_SUCCESS; } ze_result_t LinuxMemoryImp::getVFIDString(std::string &vfID) { uint32_t vf0VfIdVal = 0; std::string key = "VF0_VFID"; auto result = pPmt->readValue(key, vf0VfIdVal); if (result != ZE_RESULT_SUCCESS) { return result; } uint32_t vf1VfIdVal = 0; key = "VF1_VFID"; result = pPmt->readValue(key, vf1VfIdVal); if (result != ZE_RESULT_SUCCESS) { return result; } // At any point of time only one VF(virtual function) could be active and thus would // read greater than zero val. If both VF0 and VF1 are reading 0 or both are reading // greater than 0, then we would be confused in taking the decision of correct VF. // Lets assume and report this as a error condition if (((vf0VfIdVal == 0) && (vf1VfIdVal == 0)) || ((vf0VfIdVal > 0) && (vf1VfIdVal > 0))) { return ZE_RESULT_ERROR_UNKNOWN; } if (vf0VfIdVal > 0) { vfID = "VF0"; } if (vf1VfIdVal > 0) { vfID = "VF1"; } return result; } void LinuxMemoryImp::getHbmFrequency(PRODUCT_FAMILY productFamily, unsigned short stepping, uint64_t &hbmFrequency) { hbmFrequency = 0; if (productFamily == IGFX_XE_HP_SDV) { // For IGFX_XE_HP HBM frequency would be 2.8 GT/s = 2.8 * 1000 * 1000 * 1000 T/s = 2800000000 T/s hbmFrequency = 2.8 * gigaUnitTransferToUnitTransfer; } else if (productFamily == IGFX_PVC) { if (stepping == REVISION_B) { const std::string baseDir = "gt/gt" + std::to_string(subdeviceId) + "/"; // Calculating bandwidth based on HBM max frequency const std::string hbmRP0FreqFile = baseDir + "hbm_RP0_freq_mhz"; uint64_t hbmFreqValue = 0; ze_result_t result = pSysfsAccess->read(hbmRP0FreqFile, hbmFreqValue); if (ZE_RESULT_SUCCESS == result) { hbmFrequency = hbmFreqValue * 1000 * 1000; // Converting MHz value to Hz return; } } else if (stepping == REVISION_A0) { // For IGFX_PVC REV A0 HBM frequency would be 3.2 GT/s = 3.2 * 1000 * 1000 * 1000 T/s = 3200000000 T/s hbmFrequency = 3.2 * gigaUnitTransferToUnitTransfer; } } } ze_result_t LinuxMemoryImp::getBandwidth(zes_mem_bandwidth_t *pBandwidth) { if (pPmt == nullptr) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } std::string vfId = ""; auto result = getVFIDString(vfId); if (result != ZE_RESULT_SUCCESS) { return result; } uint32_t numHbmModules = 0u; auto &hwInfo = pDevice->getNEODevice()->getHardwareInfo(); auto productFamily = hwInfo.platform.eProductFamily; auto stepping = NEO::HwInfoConfig::get(productFamily)->getSteppingFromHwRevId(hwInfo); if (productFamily == IGFX_XE_HP_SDV) { numHbmModules = 2u; } else if (productFamily == IGFX_PVC) { numHbmModules = 4u; } pBandwidth->readCounter = 0; pBandwidth->writeCounter = 0; pBandwidth->timestamp = 0; pBandwidth->maxBandwidth = 0; for (auto hbmModuleIndex = 0u; hbmModuleIndex < numHbmModules; hbmModuleIndex++) { uint32_t counterValue = 0; // To read counters from VFID 0 and HBM module 0, key would be: VF0_HBM0_READ std::string readCounterKey = vfId + "_HBM" + std::to_string(hbmModuleIndex) + "_READ"; result = pPmt->readValue(readCounterKey, counterValue); if (result != ZE_RESULT_SUCCESS) { return result; } pBandwidth->readCounter += counterValue; counterValue = 0; // To write counters to VFID 0 and HBM module 0, key would be: VF0_HBM0_Write std::string writeCounterKey = vfId + "_HBM" + std::to_string(hbmModuleIndex) + "_WRITE"; result = pPmt->readValue(writeCounterKey, counterValue); if (result != ZE_RESULT_SUCCESS) { return result; } pBandwidth->writeCounter += counterValue; } uint32_t timeStampL = 0; std::string timeStamp = vfId + "_TIMESTAMP_L"; result = pPmt->readValue(timeStamp, timeStampL); if (result != ZE_RESULT_SUCCESS) { return result; } uint32_t timeStampH = 0; timeStamp = vfId + "_TIMESTAMP_H"; result = pPmt->readValue(timeStamp, timeStampH); if (result != ZE_RESULT_SUCCESS) { return result; } pBandwidth->timestamp |= timeStampH; pBandwidth->timestamp = (pBandwidth->timestamp << 32) | timeStampL; uint64_t hbmFrequency = 0; getHbmFrequency(productFamily, stepping, hbmFrequency); pBandwidth->maxBandwidth = memoryBusWidth * hbmFrequency * numHbmModules; pBandwidth->maxBandwidth /= 8; // Divide by 8 to get bandwidth in bytes/sec return result; } ze_result_t LinuxMemoryImp::getState(zes_mem_state_t *pState) { std::string memHealth; ze_result_t result = pSysfsAccess->read(deviceMemoryHealth, memHealth); if (ZE_RESULT_SUCCESS != result) { pState->health = ZES_MEM_HEALTH_UNKNOWN; } else { auto health = i915ToL0MemHealth.find(memHealth); if (health == i915ToL0MemHealth.end()) { pState->health = ZES_MEM_HEALTH_UNKNOWN; } else { pState->health = i915ToL0MemHealth.at(memHealth); } } std::vector deviceRegions; auto memRegions = pDrm->getMemoryRegions(); if (memRegions.empty()) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } auto regions = pDrm->getIoctlHelper()->translateToMemoryRegions(memRegions); for (auto region : regions) { if (region.region.memoryClass == I915_MEMORY_CLASS_DEVICE) { deviceRegions.push_back(region); } } UNRECOVERABLE_IF(deviceRegions.size() <= subdeviceId); pState->free = deviceRegions[subdeviceId].unallocatedSize; pState->size = deviceRegions[subdeviceId].probedSize; return ZE_RESULT_SUCCESS; } OsMemory *OsMemory::create(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId) { LinuxMemoryImp *pLinuxMemoryImp = new LinuxMemoryImp(pOsSysman, onSubdevice, subdeviceId); return static_cast(pLinuxMemoryImp); } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/memory/linux/os_memory_imp_prelim.h000066400000000000000000000043121422164147700325400ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "shared/source/os_interface/linux/drm_neo.h" #include "sysman/memory/os_memory.h" #include namespace L0 { class SysfsAccess; struct Device; class PlatformMonitoringTech; class LinuxMemoryImp : public OsMemory, NEO::NonCopyableOrMovableClass { public: ze_result_t getProperties(zes_mem_properties_t *pProperties) override; ze_result_t getBandwidth(zes_mem_bandwidth_t *pBandwidth) override; ze_result_t getState(zes_mem_state_t *pState) override; bool isMemoryModuleSupported() override; LinuxMemoryImp(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId); LinuxMemoryImp() = default; ~LinuxMemoryImp() override = default; protected: SysfsAccess *pSysfsAccess = nullptr; NEO::Drm *pDrm = nullptr; Device *pDevice = nullptr; PlatformMonitoringTech *pPmt = nullptr; void getHbmFrequency(PRODUCT_FAMILY productFamily, unsigned short stepping, uint64_t &hbmFrequency); private: ze_result_t getVFIDString(std::string &vfID); static const std::string deviceMemoryHealth; bool isSubdevice = false; uint32_t subdeviceId = 0; std::string physicalSizeFile; void init(); }; // Mapping of i915 memory health to L0 health params // OK status for No error, no HBM memory sparing, default value // REBOOT_ALARM status for Hardware warning interrupt received and uevent has been sent, and system should be rebooted ASAP // EC_FAILED sysfs status for Error correction failed: user did not reboot, and the uncorrectable errors happened // DEGRADED sysfs status for System has been rebooted and memory sparing is in action, detectable at boot time // DEGRADED_FAILED sysfs status for Upon receival of the final interrupt that uncorrectable errors happened when memory was already in sparing mode const std::map i915ToL0MemHealth{ {"OK", ZES_MEM_HEALTH_OK}, {"REBOOT_ALARM", ZES_MEM_HEALTH_DEGRADED}, {"DEGRADED", ZES_MEM_HEALTH_CRITICAL}, {"DEGRADED_FAILED", ZES_MEM_HEALTH_REPLACE}, {"EC_FAILED", ZES_MEM_HEALTH_REPLACE}}; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/memory/memory.cpp000066400000000000000000000026311422164147700270200ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/basic_math.h" #include "shared/source/memory_manager/memory_manager.h" #include "level_zero/core/source/device/device.h" #include "level_zero/tools/source/sysman/memory/memory_imp.h" namespace L0 { MemoryHandleContext::~MemoryHandleContext() { for (Memory *pMemory : handleList) { delete pMemory; } } void MemoryHandleContext::createHandle(ze_device_handle_t deviceHandle) { Memory *pMemory = new MemoryImp(pOsSysman, deviceHandle); if (pMemory->initSuccess == true) { handleList.push_back(pMemory); } else { delete pMemory; } } ze_result_t MemoryHandleContext::init(std::vector &deviceHandles) { for (const auto &deviceHandle : deviceHandles) { createHandle(deviceHandle); } return ZE_RESULT_SUCCESS; } ze_result_t MemoryHandleContext::memoryGet(uint32_t *pCount, zes_mem_handle_t *phMemory) { uint32_t handleListSize = static_cast(handleList.size()); uint32_t numToCopy = std::min(*pCount, handleListSize); if (0 == *pCount || *pCount > handleListSize) { *pCount = handleListSize; } if (nullptr != phMemory) { for (uint32_t i = 0; i < numToCopy; i++) { phMemory[i] = handleList[i]->toHandle(); } } return ZE_RESULT_SUCCESS; } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/memory/memory.h000066400000000000000000000023611422164147700264650ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/device/device.h" #include #include struct _zes_mem_handle_t { virtual ~_zes_mem_handle_t() = default; }; namespace L0 { struct OsSysman; class Memory : _zes_mem_handle_t { public: virtual ze_result_t memoryGetProperties(zes_mem_properties_t *pProperties) = 0; virtual ze_result_t memoryGetBandwidth(zes_mem_bandwidth_t *pBandwidth) = 0; virtual ze_result_t memoryGetState(zes_mem_state_t *pState) = 0; static Memory *fromHandle(zes_mem_handle_t handle) { return static_cast(handle); } inline zes_mem_handle_t toHandle() { return this; } bool initSuccess = false; }; struct MemoryHandleContext { MemoryHandleContext(OsSysman *pOsSysman) : pOsSysman(pOsSysman){}; ~MemoryHandleContext(); ze_result_t init(std::vector &deviceHandles); ze_result_t memoryGet(uint32_t *pCount, zes_mem_handle_t *phMemory); OsSysman *pOsSysman = nullptr; bool isLmemSupported = false; std::vector handleList = {}; private: void createHandle(ze_device_handle_t deviceHandle); }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/memory/memory_imp.cpp000066400000000000000000000022671422164147700276720ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/memory/memory_imp.h" #include "level_zero/tools/source/sysman/sysman_imp.h" namespace L0 { ze_result_t MemoryImp::memoryGetBandwidth(zes_mem_bandwidth_t *pBandwidth) { return pOsMemory->getBandwidth(pBandwidth); } ze_result_t MemoryImp::memoryGetState(zes_mem_state_t *pState) { return pOsMemory->getState(pState); } ze_result_t MemoryImp::memoryGetProperties(zes_mem_properties_t *pProperties) { *pProperties = memoryProperties; return ZE_RESULT_SUCCESS; } void MemoryImp::init() { this->initSuccess = pOsMemory->isMemoryModuleSupported(); if (this->initSuccess == true) { pOsMemory->getProperties(&memoryProperties); } } MemoryImp::MemoryImp(OsSysman *pOsSysman, ze_device_handle_t handle) { uint32_t subdeviceId = 0; ze_bool_t onSubdevice = false; SysmanDeviceImp::getSysmanDeviceInfo(handle, subdeviceId, onSubdevice); pOsMemory = OsMemory::create(pOsSysman, onSubdevice, subdeviceId); init(); } MemoryImp::~MemoryImp() { if (nullptr != pOsMemory) { delete pOsMemory; } } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/memory/memory_imp.h000066400000000000000000000015671422164147700273410ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "level_zero/tools/source/sysman/memory/memory.h" #include "level_zero/tools/source/sysman/memory/os_memory.h" #include namespace L0 { class MemoryImp : public Memory, NEO::NonCopyableOrMovableClass { public: ze_result_t memoryGetProperties(zes_mem_properties_t *pProperties) override; ze_result_t memoryGetBandwidth(zes_mem_bandwidth_t *pBandwidth) override; ze_result_t memoryGetState(zes_mem_state_t *pState) override; MemoryImp(OsSysman *pOsSysman, ze_device_handle_t handle); ~MemoryImp() override; MemoryImp() = default; void init(); OsMemory *pOsMemory = nullptr; private: zes_mem_properties_t memoryProperties = {}; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/memory/os_memory.h000066400000000000000000000011311422164147700271600ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include namespace L0 { struct OsSysman; class OsMemory { public: virtual ze_result_t getProperties(zes_mem_properties_t *pProperties) = 0; virtual ze_result_t getBandwidth(zes_mem_bandwidth_t *pBandwidth) = 0; virtual ze_result_t getState(zes_mem_state_t *pState) = 0; virtual bool isMemoryModuleSupported() = 0; static OsMemory *create(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId); virtual ~OsMemory() {} }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/memory/windows/000077500000000000000000000000001422164147700264745ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/memory/windows/CMakeLists.txt000077500000000000000000000007741422164147700312470ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_MEMORY_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/os_memory_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/os_memory_imp.cpp ) if(WIN32) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_MEMORY_WINDOWS} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_MEMORY_WINDOWS ${L0_SRCS_TOOLS_SYSMAN_MEMORY_WINDOWS}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/memory/windows/os_memory_imp.cpp000066400000000000000000000250061422164147700320610ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "sysman/memory/windows/os_memory_imp.h" template std::string intToHex(I w, size_t hexLength = sizeof(I) << 1) { static const char *digits = "0123456789ABCDEF"; std::string retString(hexLength, '0'); constexpr uint32_t intSize = sizeof(uint32_t); for (size_t i = 0, j = (hexLength - 1) * intSize; i < hexLength; ++i, j -= intSize) retString[i] = digits[(w >> j) & 0x0f]; return (std::string("0x") + retString); } std::wstring toWString(std::string str) { std::wstring wsTmp(str.begin(), str.end()); return wsTmp; } std::wstring constructCounterStr(std::wstring object, std::wstring counter, LUID luid, uint32_t index) { std::wstring fstr = L"\\"; fstr += object; fstr += L"(luid_"; fstr += toWString(intToHex((long)luid.HighPart)); fstr += L"_"; fstr += toWString(intToHex((unsigned long)luid.LowPart)); fstr += L"_phys_"; fstr += std::to_wstring(index); fstr += L")\\"; fstr += counter; return fstr; } namespace L0 { bool WddmMemoryImp::isMemoryModuleSupported() { uint32_t value = 0; KmdSysman::RequestProperty request; KmdSysman::ResponseProperty response; request.commandId = KmdSysman::Command::Get; request.componentId = KmdSysman::Component::MemoryComponent; request.requestId = KmdSysman::Requests::Memory::NumMemoryDomains; if (pKmdSysManager->requestSingle(request, response) != ZE_RESULT_SUCCESS) { return false; } memcpy_s(&value, sizeof(uint32_t), response.dataBuffer, sizeof(uint32_t)); return (value > 0); } ze_result_t WddmMemoryImp::getProperties(zes_mem_properties_t *pProperties) { uint32_t valueSmall = 0; uint64_t valueLarge = 0; std::vector vRequests = {}; std::vector vResponses = {}; KmdSysman::RequestProperty request = {}; pProperties->onSubdevice = isSubdevice; pProperties->subdeviceId = subdeviceId; request.commandId = KmdSysman::Command::Get; request.componentId = KmdSysman::Component::MemoryComponent; request.requestId = KmdSysman::Requests::Memory::MemoryType; vRequests.push_back(request); request.requestId = KmdSysman::Requests::Memory::PhysicalSize; vRequests.push_back(request); request.requestId = KmdSysman::Requests::Memory::NumChannels; vRequests.push_back(request); request.requestId = KmdSysman::Requests::Memory::MemoryLocation; vRequests.push_back(request); request.requestId = KmdSysman::Requests::Memory::MemoryWidth; vRequests.push_back(request); ze_result_t status = pKmdSysManager->requestMultiple(vRequests, vResponses); if ((status != ZE_RESULT_SUCCESS) || (vResponses.size() != vRequests.size())) { return status; } pProperties->type = ZES_MEM_TYPE_FORCE_UINT32; if (vResponses[0].returnCode == KmdSysman::Success) { memcpy_s(&valueSmall, sizeof(uint32_t), vResponses[0].dataBuffer, sizeof(uint32_t)); switch (valueSmall) { case KmdSysman::MemoryType::DDR4: { pProperties->type = ZES_MEM_TYPE_DDR4; } break; case KmdSysman::MemoryType::DDR5: { pProperties->type = ZES_MEM_TYPE_DDR5; } break; case KmdSysman::MemoryType::LPDDR5: { pProperties->type = ZES_MEM_TYPE_LPDDR5; } break; case KmdSysman::MemoryType::LPDDR4: { pProperties->type = ZES_MEM_TYPE_LPDDR4; } break; case KmdSysman::MemoryType::DDR3: { pProperties->type = ZES_MEM_TYPE_DDR3; } break; case KmdSysman::MemoryType::LPDDR3: { pProperties->type = ZES_MEM_TYPE_LPDDR3; } break; case KmdSysman::MemoryType::GDDR4: { pProperties->type = ZES_MEM_TYPE_GDDR4; } break; case KmdSysman::MemoryType::GDDR5: { pProperties->type = ZES_MEM_TYPE_GDDR5; } break; case KmdSysman::MemoryType::GDDR5X: { pProperties->type = ZES_MEM_TYPE_GDDR5X; } break; case KmdSysman::MemoryType::GDDR6: { pProperties->type = ZES_MEM_TYPE_GDDR6; } break; case KmdSysman::MemoryType::GDDR6X: { pProperties->type = ZES_MEM_TYPE_GDDR6X; } break; case KmdSysman::MemoryType::GDDR7: { pProperties->type = ZES_MEM_TYPE_GDDR7; } break; default: { pProperties->type = ZES_MEM_TYPE_FORCE_UINT32; } break; } } pProperties->physicalSize = 0; if (vResponses[1].returnCode == KmdSysman::Success) { memcpy_s(&valueLarge, sizeof(uint64_t), vResponses[1].dataBuffer, sizeof(uint64_t)); pProperties->physicalSize = valueLarge; } pProperties->numChannels = -1; if (vResponses[2].returnCode == KmdSysman::Success) { memcpy_s(&valueSmall, sizeof(uint32_t), vResponses[2].dataBuffer, sizeof(uint32_t)); pProperties->numChannels = valueSmall; } pProperties->location = ZES_MEM_LOC_FORCE_UINT32; if (vResponses[3].returnCode == KmdSysman::Success) { memcpy_s(&valueSmall, sizeof(uint32_t), vResponses[3].dataBuffer, sizeof(uint32_t)); pProperties->location = static_cast(valueSmall); } pProperties->busWidth = -1; if (vResponses[4].returnCode == KmdSysman::Success) { memcpy_s(&valueSmall, sizeof(uint32_t), vResponses[4].dataBuffer, sizeof(uint32_t)); pProperties->busWidth = valueSmall; } pProperties->subdeviceId = 0; pProperties->onSubdevice = false; return ZE_RESULT_SUCCESS; } ze_result_t WddmMemoryImp::getBandwidth(zes_mem_bandwidth_t *pBandwidth) { uint32_t valueSmall = 0; uint64_t valueLarge = 0; std::vector vRequests = {}; std::vector vResponses = {}; KmdSysman::RequestProperty request = {}; request.commandId = KmdSysman::Command::Get; request.componentId = KmdSysman::Component::MemoryComponent; request.requestId = KmdSysman::Requests::Memory::MaxBandwidth; vRequests.push_back(request); request.requestId = KmdSysman::Requests::Memory::CurrentBandwidthRead; vRequests.push_back(request); request.requestId = KmdSysman::Requests::Memory::CurrentBandwidthWrite; vRequests.push_back(request); ze_result_t status = pKmdSysManager->requestMultiple(vRequests, vResponses); if ((status != ZE_RESULT_SUCCESS) || (vResponses.size() != vRequests.size())) { return status; } pBandwidth->maxBandwidth = 0; if (vResponses[0].returnCode == KmdSysman::Success) { memcpy_s(&valueSmall, sizeof(uint32_t), vResponses[0].dataBuffer, sizeof(uint32_t)); pBandwidth->maxBandwidth = static_cast(valueSmall) * static_cast(MbpsToBytesPerSecond); } pBandwidth->readCounter = 0; if (vResponses[1].returnCode == KmdSysman::Success) { memcpy_s(&valueLarge, sizeof(uint64_t), vResponses[1].dataBuffer, sizeof(uint64_t)); pBandwidth->readCounter = valueLarge; } pBandwidth->writeCounter = 0; if (vResponses[2].returnCode == KmdSysman::Success) { memcpy_s(&valueLarge, sizeof(uint64_t), vResponses[2].dataBuffer, sizeof(uint64_t)); pBandwidth->writeCounter = valueLarge; } std::chrono::time_point ts = std::chrono::steady_clock::now(); pBandwidth->timestamp = std::chrono::duration_cast(ts.time_since_epoch()).count(); return ZE_RESULT_SUCCESS; } ze_result_t WddmMemoryImp::getState(zes_mem_state_t *pState) { ze_result_t status = ZE_RESULT_SUCCESS; uint64_t valueLarge = 0; KmdSysman::RequestProperty request; KmdSysman::ResponseProperty response; pState->health = ZES_MEM_HEALTH_OK; request.commandId = KmdSysman::Command::Get; request.componentId = KmdSysman::Component::MemoryComponent; request.requestId = KmdSysman::Requests::Memory::PhysicalSize; status = pKmdSysManager->requestSingle(request, response); if (status != ZE_RESULT_SUCCESS) { return status; } memcpy_s(&valueLarge, sizeof(uint64_t), response.dataBuffer, sizeof(uint64_t)); pState->size = valueLarge; if (!pdhInitialized) { if (pdhOpenQuery && pdhOpenQuery(NULL, NULL, &gpuQuery) == ERROR_SUCCESS) { pdhInitialized = true; } } if (!pdhCounterAdded && pdhAddEnglishCounterW && pKmdSysManager->GetWddmAccess()) { std::wstring counterStr = constructCounterStr(L"GPU Adapter Memory", L"Dedicated Usage", pKmdSysManager->GetWddmAccess()->getAdapterLuid(), 0); pdhCounterAdded = (pdhAddEnglishCounterW(gpuQuery, counterStr.c_str(), NULL, &dedicatedUsage) == ERROR_SUCCESS); } if (pdhCounterAdded && pdhCollectQueryData && pdhGetFormattedCounterValue) { PDH_FMT_COUNTERVALUE counterVal; pdhCollectQueryData(gpuQuery); pdhGetFormattedCounterValue(dedicatedUsage, PDH_FMT_LARGE, NULL, &counterVal); valueLarge = counterVal.largeValue; pState->free = pState->size - valueLarge; } return ZE_RESULT_SUCCESS; } WddmMemoryImp::WddmMemoryImp(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId) : isSubdevice(onSubdevice), subdeviceId(subdeviceId) { WddmSysmanImp *pWddmSysmanImp = static_cast(pOsSysman); pKmdSysManager = &pWddmSysmanImp->getKmdSysManager(); pDevice = pWddmSysmanImp->getDeviceHandle(); hGetProcPDH = LoadLibrary(L"C:\\Windows\\System32\\pdh.dll"); if (hGetProcPDH) { pdhOpenQuery = reinterpret_cast(GetProcAddress(hGetProcPDH, "PdhOpenQueryW")); pdhAddEnglishCounterW = reinterpret_cast(GetProcAddress(hGetProcPDH, "PdhAddEnglishCounterW")); pdhCollectQueryData = reinterpret_cast(GetProcAddress(hGetProcPDH, "PdhCollectQueryData")); pdhGetFormattedCounterValue = reinterpret_cast(GetProcAddress(hGetProcPDH, "PdhGetFormattedCounterValue")); pdhCloseQuery = reinterpret_cast(GetProcAddress(hGetProcPDH, "PdhCloseQuery")); } } WddmMemoryImp::~WddmMemoryImp() { if (pdhInitialized && pdhCloseQuery) { pdhCloseQuery(gpuQuery); } if (hGetProcPDH) { FreeLibrary(hGetProcPDH); } } OsMemory *OsMemory::create(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId) { WddmMemoryImp *pWddmMemoryImp = new WddmMemoryImp(pOsSysman, onSubdevice, subdeviceId); return static_cast(pWddmMemoryImp); } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/memory/windows/os_memory_imp.h000066400000000000000000000040461422164147700315270ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "shared/source/os_interface/windows/wddm/wddm.h" #include "level_zero/tools/source/sysman/sysman_const.h" #include "sysman/memory/os_memory.h" #include "sysman/windows/os_sysman_imp.h" #include typedef PDH_STATUS(__stdcall *fn_PdhOpenQueryW)(LPCWSTR szDataSource, DWORD_PTR dwUserData, PDH_HQUERY *phQuery); typedef PDH_STATUS(__stdcall *fn_PdhAddEnglishCounterW)(PDH_HQUERY hQuery, LPCWSTR szFullCounterPath, DWORD_PTR dwUserData, PDH_HCOUNTER *phCounter); typedef PDH_STATUS(__stdcall *fn_PdhCollectQueryData)(PDH_HQUERY hQuery); typedef PDH_STATUS(__stdcall *fn_PdhGetFormattedCounterValue)(PDH_HCOUNTER hCounter, DWORD dwFormat, LPDWORD lpdwType, PPDH_FMT_COUNTERVALUE pValue); typedef PDH_STATUS(__stdcall *fn_PdhCloseQuery)(PDH_HQUERY hQuery); namespace L0 { class KmdSysManager; class WddmMemoryImp : public OsMemory, NEO::NonCopyableOrMovableClass { public: ze_result_t getProperties(zes_mem_properties_t *pProperties) override; ze_result_t getBandwidth(zes_mem_bandwidth_t *pBandwidth) override; ze_result_t getState(zes_mem_state_t *pState) override; bool isMemoryModuleSupported() override; WddmMemoryImp(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId); WddmMemoryImp() = default; ~WddmMemoryImp(); protected: KmdSysManager *pKmdSysManager = nullptr; Device *pDevice = nullptr; bool isSubdevice = false; uint32_t subdeviceId = 0; fn_PdhOpenQueryW pdhOpenQuery = nullptr; fn_PdhAddEnglishCounterW pdhAddEnglishCounterW = nullptr; fn_PdhCollectQueryData pdhCollectQueryData = nullptr; fn_PdhGetFormattedCounterValue pdhGetFormattedCounterValue = nullptr; fn_PdhCloseQuery pdhCloseQuery = nullptr; bool pdhInitialized = false; bool pdhCounterAdded = false; PDH_HQUERY gpuQuery = nullptr; PDH_HCOUNTER dedicatedUsage = nullptr; HINSTANCE hGetProcPDH = nullptr; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/os_sysman.h000066400000000000000000000005231422164147700256560ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include namespace L0 { struct SysmanDeviceImp; struct OsSysman { virtual ~OsSysman(){}; virtual ze_result_t init() = 0; static OsSysman *create(SysmanDeviceImp *pSysmanImp); }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/pci/000077500000000000000000000000001422164147700242455ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/pci/CMakeLists.txt000066400000000000000000000012071422164147700270050ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_PCI ${CMAKE_CURRENT_SOURCE_DIR}/pci.h ${CMAKE_CURRENT_SOURCE_DIR}/pci_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/pci_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/os_pci.h ) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_PCI} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) if(UNIX) add_subdirectory(linux) else() add_subdirectory(windows) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_PCI ${L0_SRCS_TOOLS_SYSMAN_PCI}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/pci/linux/000077500000000000000000000000001422164147700254045ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/pci/linux/CMakeLists.txt000066400000000000000000000010251422164147700301420ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_PCI_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/os_pci_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_pci_imp.h ) if(UNIX) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_PCI_LINUX} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_PCI_LINUX ${L0_SRCS_TOOLS_SYSMAN_PCI_LINUX}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/pci/linux/os_pci_imp.cpp000066400000000000000000000260671422164147700302440ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/pci/linux/os_pci_imp.h" #include "shared/source/utilities/directory.h" #include "level_zero/tools/source/sysman/linux/fs_access.h" #include "level_zero/tools/source/sysman/sysman_const.h" #include "sysman/pci/pci_imp.h" #include namespace L0 { const std::string LinuxPciImp::deviceDir("device"); const std::string LinuxPciImp::resourceFile("device/resource"); const std::string LinuxPciImp::maxLinkSpeedFile("device/max_link_speed"); const std::string LinuxPciImp::maxLinkWidthFile("device/max_link_width"); ze_result_t LinuxPciImp::getProperties(zes_pci_properties_t *properties) { properties->haveBandwidthCounters = false; properties->havePacketCounters = false; properties->haveReplayCounters = false; return ZE_RESULT_SUCCESS; } ze_result_t LinuxPciImp::getPciBdf(zes_pci_properties_t &pciProperties) { std::string bdfDir; ze_result_t result = pSysfsAccess->readSymLink(deviceDir, bdfDir); if (ZE_RESULT_SUCCESS != result) { return result; } const auto loc = bdfDir.find_last_of('/'); std::string bdf = bdfDir.substr(loc + 1); uint16_t domain = 0; uint8_t bus = 0, device = 0, function = 0; NEO::parseBdfString(bdf.c_str(), domain, bus, device, function); pciProperties.address.domain = static_cast(domain); pciProperties.address.bus = static_cast(bus); pciProperties.address.device = static_cast(device); pciProperties.address.function = static_cast(function); return ZE_RESULT_SUCCESS; } ze_result_t LinuxPciImp::getMaxLinkSpeed(double &maxLinkSpeed) { ze_result_t result; if (isLmemSupported) { std::string rootPortPath; std::string realRootPath; result = pSysfsAccess->getRealPath(deviceDir, realRootPath); if (ZE_RESULT_SUCCESS != result) { maxLinkSpeed = 0; return result; } // we need to get actual values of speed and width at the Discrete card's root port. rootPortPath = pLinuxSysmanImp->getPciRootPortDirectoryPath(realRootPath); result = pfsAccess->read(rootPortPath + '/' + "max_link_speed", maxLinkSpeed); if (ZE_RESULT_SUCCESS != result) { maxLinkSpeed = 0; return result; } } else { result = pSysfsAccess->read(maxLinkSpeedFile, maxLinkSpeed); if (ZE_RESULT_SUCCESS != result) { maxLinkSpeed = 0; return result; } } return ZE_RESULT_SUCCESS; } ze_result_t LinuxPciImp::getMaxLinkWidth(int32_t &maxLinkwidth) { ze_result_t result; if (isLmemSupported) { std::string rootPortPath; std::string realRootPath; result = pSysfsAccess->getRealPath(deviceDir, realRootPath); if (ZE_RESULT_SUCCESS != result) { maxLinkwidth = -1; return result; } // we need to get actual values of speed and width at the Discrete card's root port. rootPortPath = pLinuxSysmanImp->getPciRootPortDirectoryPath(realRootPath); result = pfsAccess->read(rootPortPath + '/' + "max_link_width", maxLinkwidth); if (ZE_RESULT_SUCCESS != result) { maxLinkwidth = -1; return result; } if (maxLinkwidth == static_cast(unknownPcieLinkWidth)) { maxLinkwidth = -1; } } else { result = pSysfsAccess->read(maxLinkWidthFile, maxLinkwidth); if (ZE_RESULT_SUCCESS != result) { return result; } if (maxLinkwidth == static_cast(unknownPcieLinkWidth)) { maxLinkwidth = -1; } } return ZE_RESULT_SUCCESS; } void getBarBaseAndSize(std::string readBytes, uint64_t &baseAddr, uint64_t &barSize, uint64_t &barFlags) { unsigned long long start, end, flags; std::stringstream sStreamReadBytes; sStreamReadBytes << readBytes; sStreamReadBytes >> std::hex >> start; sStreamReadBytes >> end; sStreamReadBytes >> flags; flags &= 0xf; barFlags = flags; baseAddr = start; barSize = end - start + 1; } ze_result_t LinuxPciImp::initializeBarProperties(std::vector &pBarProperties) { std::vector ReadBytes; ze_result_t result = pSysfsAccess->read(resourceFile, ReadBytes); if (result != ZE_RESULT_SUCCESS) { return result; } for (uint32_t i = 0; i <= maxPciBars; i++) { uint64_t baseAddr, barSize, barFlags; getBarBaseAndSize(ReadBytes[i], baseAddr, barSize, barFlags); if (baseAddr && !(barFlags & 0x1)) { // we do not update for I/O ports zes_pci_bar_properties_t *pBarProp = new zes_pci_bar_properties_t; memset(pBarProp, 0, sizeof(zes_pci_bar_properties_t)); pBarProp->index = i; pBarProp->base = baseAddr; pBarProp->size = barSize; // Bar Flags Desc. // Bit-0 - Value 0x0 -> MMIO type BAR // Bit-0 - Value 0x1 -> I/O type BAR if (i == 0) { // GRaphics MMIO is at BAR0, and is a 64-bit pBarProp->type = ZES_PCI_BAR_TYPE_MMIO; } if (i == 2) { pBarProp->type = ZES_PCI_BAR_TYPE_MEM; // device memory is always at BAR2 } if (i == 6) { // the 7th entry of resource file is expected to be ROM BAR pBarProp->type = ZES_PCI_BAR_TYPE_ROM; } pBarProperties.push_back(pBarProp); } } if (pBarProperties.size() == 0) { result = ZE_RESULT_ERROR_UNKNOWN; } return result; } uint32_t LinuxPciImp::getRebarCapabilityPos() { uint32_t pos = PCI_CFG_SPACE_SIZE; uint32_t header = 0; if (!configMemory) { return 0; } // Minimum 8 bytes per capability. Hence maximum capabilities that // could be present in PCI extended configuration space are // represented by loopCount. auto loopCount = (PCI_CFG_SPACE_EXP_SIZE - PCI_CFG_SPACE_SIZE) / 8; header = getDwordFromConfig(pos); if (!header) { return 0; } while (loopCount-- > 0) { if (PCI_EXT_CAP_ID(header) == PCI_EXT_CAP_ID_REBAR) { return pos; } pos = PCI_EXT_CAP_NEXT(header); if (pos < PCI_CFG_SPACE_SIZE) { return 0; } header = getDwordFromConfig(pos); } return 0; } // Parse PCIe configuration space to see if resizable Bar is supported bool LinuxPciImp::resizableBarSupported() { return (getRebarCapabilityPos() > 0); } bool LinuxPciImp::resizableBarEnabled(uint32_t barIndex) { bool isBarResizable = false; uint32_t capabilityRegister = 0, controlRegister = 0; uint32_t nBars = 1; auto rebarCapabilityPos = getRebarCapabilityPos(); // If resizable Bar is not supported then return false. if (!rebarCapabilityPos) { return false; } // As per PCI spec, resizable BAR's capability structure's 52 byte length could be represented as: // -------------------------------------------------------------- // | byte offset | Description of register | // -------------------------------------------------------------| // | +000h | PCI Express Extended Capability Header | // -------------------------------------------------------------| // | +004h | Resizable BAR Capability Register (0) | // -------------------------------------------------------------| // | +008h | Resizable BAR Control Register (0) | // -------------------------------------------------------------| // | +00Ch | Resizable BAR Capability Register (1) | // -------------------------------------------------------------| // | +010h | Resizable BAR Control Register (1) | // -------------------------------------------------------------| // | +014h | --- | // -------------------------------------------------------------| // Only first Control register(at offset 008h, as shown above), could tell about number of resizable Bars controlRegister = getDwordFromConfig(rebarCapabilityPos + PCI_REBAR_CTRL); nBars = BITS(controlRegister, 5, 3); // control register's bits 5,6 and 7 contain number of resizable bars information for (auto barNumber = 0u; barNumber < nBars; barNumber++) { uint32_t barId = 0; controlRegister = getDwordFromConfig(rebarCapabilityPos + PCI_REBAR_CTRL); barId = BITS(controlRegister, 0, 3); // Control register's bit 0,1,2 tells the index of bar if (barId == barIndex) { isBarResizable = true; break; } rebarCapabilityPos += 8; } if (isBarResizable == false) { return false; } capabilityRegister = getDwordFromConfig(rebarCapabilityPos + PCI_REBAR_CAP); // Capability register's bit 4 to 31 indicates supported Bar sizes. // In possibleBarSizes, position of each set bit indicates supported bar size. Example, if set bit // position of possibleBarSizes is from 0 to n, then this indicates BAR size from 2^0 MB to 2^n MB auto possibleBarSizes = (capabilityRegister & PCI_REBAR_CAP_SIZES) >> 4; // First 4 bits are reserved uint32_t largestPossibleBarSize = 0; while (possibleBarSizes >>= 1) { // most significant set bit position of possibleBarSizes would tell larget possible bar size largestPossibleBarSize++; } // Control register's bit 8 to 13 indicates current BAR size in encoded form. // Example, real value of current size could be 2^currentSize MB auto currentSize = BITS(controlRegister, 8, 6); // If current size is equal to larget possible BAR size, it indicates resizable BAR is enabled. return (currentSize == largestPossibleBarSize); } ze_result_t LinuxPciImp::getState(zes_pci_state_t *state) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } void LinuxPciImp::pciExtendedConfigRead() { std::string pciConfigNode; pSysfsAccess->getRealPath("device/config", pciConfigNode); int fdConfig = -1; fdConfig = this->openFunction(pciConfigNode.c_str(), O_RDONLY); if (fdConfig < 0) { return; } configMemory = std::make_unique(PCI_CFG_SPACE_EXP_SIZE); memset(configMemory.get(), 0, PCI_CFG_SPACE_EXP_SIZE); this->preadFunction(fdConfig, configMemory.get(), PCI_CFG_SPACE_EXP_SIZE, 0); this->closeFunction(fdConfig); } LinuxPciImp::LinuxPciImp(OsSysman *pOsSysman) { pLinuxSysmanImp = static_cast(pOsSysman); pSysfsAccess = &pLinuxSysmanImp->getSysfsAccess(); pfsAccess = &pLinuxSysmanImp->getFsAccess(); Device *pDevice = pLinuxSysmanImp->getDeviceHandle(); isLmemSupported = pDevice->getDriverHandle()->getMemoryManager()->isLocalMemorySupported(pDevice->getRootDeviceIndex()); if (pSysfsAccess->isRootUser()) { pciExtendedConfigRead(); } } OsPci *OsPci::create(OsSysman *pOsSysman) { LinuxPciImp *pLinuxPciImp = new LinuxPciImp(pOsSysman); return static_cast(pLinuxPciImp); } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/pci/linux/os_pci_imp.h000066400000000000000000000037031422164147700277010ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "shared/source/os_interface/linux/sys_calls.h" #include "sysman/linux/os_sysman_imp.h" #include "sysman/pci/os_pci.h" namespace L0 { class SysfsAccess; class FsAccess; class LinuxPciImp : public OsPci, NEO::NonCopyableOrMovableClass { public: ze_result_t getPciBdf(zes_pci_properties_t &pciProperties) override; ze_result_t getMaxLinkSpeed(double &maxLinkSpeed) override; ze_result_t getMaxLinkWidth(int32_t &maxLinkwidth) override; ze_result_t getState(zes_pci_state_t *state) override; ze_result_t getProperties(zes_pci_properties_t *properties) override; bool resizableBarSupported() override; bool resizableBarEnabled(uint32_t barIndex) override; ze_result_t initializeBarProperties(std::vector &pBarProperties) override; LinuxPciImp() = default; LinuxPciImp(OsSysman *pOsSysman); ~LinuxPciImp() override = default; protected: SysfsAccess *pSysfsAccess = nullptr; FsAccess *pfsAccess = nullptr; LinuxSysmanImp *pLinuxSysmanImp = nullptr; std::unique_ptr configMemory; void pciExtendedConfigRead(); decltype(&NEO::SysCalls::open) openFunction = NEO::SysCalls::open; decltype(&NEO::SysCalls::close) closeFunction = NEO::SysCalls::close; decltype(&NEO::SysCalls::pread) preadFunction = NEO::SysCalls::pread; private: static const std::string deviceDir; static const std::string resourceFile; static const std::string maxLinkSpeedFile; static const std::string maxLinkWidthFile; bool isLmemSupported = false; uint32_t getDwordFromConfig(uint32_t pos) { return configMemory[pos] | (configMemory[pos + 1] << 8) | (configMemory[pos + 2] << 16) | (configMemory[pos + 3] << 24); } uint32_t getRebarCapabilityPos(); }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/pci/os_pci.h000066400000000000000000000021051422164147700256700ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/tools/source/sysman/os_sysman.h" #include #include #include namespace L0 { int64_t convertPcieSpeedFromGTsToBs(double maxLinkSpeedInGt); int32_t convertLinkSpeedToPciGen(double speed); double convertPciGenToLinkSpeed(uint32_t gen); class OsPci { public: virtual ze_result_t getPciBdf(zes_pci_properties_t &pciProperties) = 0; virtual ze_result_t getMaxLinkSpeed(double &maxLinkSpeed) = 0; virtual ze_result_t getMaxLinkWidth(int32_t &maxLinkWidth) = 0; virtual ze_result_t getState(zes_pci_state_t *state) = 0; virtual ze_result_t getProperties(zes_pci_properties_t *properties) = 0; virtual bool resizableBarSupported() = 0; virtual bool resizableBarEnabled(uint32_t barIndex) = 0; virtual ze_result_t initializeBarProperties(std::vector &pBarProperties) = 0; static OsPci *create(OsSysman *pOsSysman); virtual ~OsPci() = default; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/pci/pci.h000066400000000000000000000010631422164147700251710ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/tools/source/sysman/sysman_const.h" #include namespace L0 { class Pci { public: virtual ~Pci(){}; virtual ze_result_t pciStaticProperties(zes_pci_properties_t *pProperties) = 0; virtual ze_result_t pciGetInitializedBars(uint32_t *pCount, zes_pci_bar_properties_t *pProperties) = 0; virtual ze_result_t pciGetState(zes_pci_state_t *pState) = 0; virtual void init() = 0; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/pci/pci_imp.cpp000066400000000000000000000141071422164147700263740ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "pci_imp.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/string.h" #include "shared/source/utilities/directory.h" #include namespace L0 { // // While computing the PCIe bandwidth, also consider that due to 8b/10b encoding // in PCIe gen1 and gen2 real bandwidth will be reduced by 20%, // And in case of gen3 and above due to 128b/130b encoding real bandwidth is // reduced by approx 1.54% as compared to theoretical bandwidth. // In below method, get real PCIe speed in pcieSpeedWithEnc in Mega bits per second // pcieSpeedWithEnc = maxLinkSpeedInGt * (Gigabit to Megabit) * Encoding = // maxLinkSpeedInGt * 1000 * Encoding // int64_t convertPcieSpeedFromGTsToBs(double maxLinkSpeedInGt) { double pcieSpeedWithEnc; if ((maxLinkSpeedInGt == PciLinkSpeeds::Pci32_0GigatransfersPerSecond) || (maxLinkSpeedInGt == PciLinkSpeeds::Pci16_0GigatransfersPerSecond) || (maxLinkSpeedInGt == PciLinkSpeeds::Pci8_0GigatransfersPerSecond)) { pcieSpeedWithEnc = maxLinkSpeedInGt * 1000 * 128 / 130; } else if ((maxLinkSpeedInGt == PciLinkSpeeds::Pci5_0GigatransfersPerSecond) || (maxLinkSpeedInGt == PciLinkSpeeds::Pci2_5GigatransfersPerSecond)) { pcieSpeedWithEnc = maxLinkSpeedInGt * 1000 * 8 / 10; } else { pcieSpeedWithEnc = 0; } // // PCIE speed we got above is in Mega bits per second // Convert that speed in bytes/second. // Now, because 1Mb/s = (1000*1000)/8 bytes/second = 125000 bytes/second // pcieSpeedWithEnc = pcieSpeedWithEnc * 125000; return static_cast(pcieSpeedWithEnc); } double convertPciGenToLinkSpeed(uint32_t gen) { switch (gen) { case PciGenerations::PciGen1: { return PciLinkSpeeds::Pci2_5GigatransfersPerSecond; } break; case PciGenerations::PciGen2: { return PciLinkSpeeds::Pci5_0GigatransfersPerSecond; } break; case PciGenerations::PciGen3: { return PciLinkSpeeds::Pci8_0GigatransfersPerSecond; } break; case PciGenerations::PciGen4: { return PciLinkSpeeds::Pci16_0GigatransfersPerSecond; } break; case PciGenerations::PciGen5: { return PciLinkSpeeds::Pci32_0GigatransfersPerSecond; } break; default: { return 0.0; } break; } } int32_t convertLinkSpeedToPciGen(double speed) { if (speed == PciLinkSpeeds::Pci2_5GigatransfersPerSecond) { return PciGenerations::PciGen1; } else if (speed == PciLinkSpeeds::Pci5_0GigatransfersPerSecond) { return PciGenerations::PciGen2; } else if (speed == PciLinkSpeeds::Pci8_0GigatransfersPerSecond) { return PciGenerations::PciGen3; } else if (speed == PciLinkSpeeds::Pci16_0GigatransfersPerSecond) { return PciGenerations::PciGen4; } else if (speed == PciLinkSpeeds::Pci32_0GigatransfersPerSecond) { return PciGenerations::PciGen5; } else { return -1; } } ze_result_t PciImp::pciStaticProperties(zes_pci_properties_t *pProperties) { *pProperties = pciProperties; return ZE_RESULT_SUCCESS; } ze_result_t PciImp::pciGetInitializedBars(uint32_t *pCount, zes_pci_bar_properties_t *pProperties) { uint32_t pciBarPropertiesSize = static_cast(pciBarProperties.size()); uint32_t numToCopy = std::min(*pCount, pciBarPropertiesSize); if (0 == *pCount || *pCount > pciBarPropertiesSize) { *pCount = pciBarPropertiesSize; } if (nullptr != pProperties) { for (uint32_t i = 0; i < numToCopy; i++) { pProperties[i].base = pciBarProperties[i]->base; pProperties[i].index = pciBarProperties[i]->index; pProperties[i].size = pciBarProperties[i]->size; pProperties[i].type = pciBarProperties[i]->type; if (pProperties[i].pNext != nullptr) { zes_pci_bar_properties_1_2_t *pBarPropsExt = static_cast(pProperties[i].pNext); if (pBarPropsExt->stype == zes_structure_type_t::ZES_STRUCTURE_TYPE_PCI_BAR_PROPERTIES_1_2) { // base, index, size and type are the same as the non 1.2 struct. pBarPropsExt->base = pciBarProperties[i]->base; pBarPropsExt->index = pciBarProperties[i]->index; pBarPropsExt->size = pciBarProperties[i]->size; pBarPropsExt->type = pciBarProperties[i]->type; pBarPropsExt->resizableBarSupported = static_cast(resizableBarSupported); pBarPropsExt->resizableBarEnabled = static_cast(pOsPci->resizableBarEnabled(pBarPropsExt->index)); } } } } return ZE_RESULT_SUCCESS; } ze_result_t PciImp::pciGetState(zes_pci_state_t *pState) { return pOsPci->getState(pState); } void PciImp::pciGetStaticFields() { pOsPci->getProperties(&pciProperties); resizableBarSupported = pOsPci->resizableBarSupported(); std::string bdf; pOsPci->getPciBdf(pciProperties); int32_t maxLinkWidth = -1; int64_t maxBandWidth = -1; double maxLinkSpeed = 0; pOsPci->getMaxLinkSpeed(maxLinkSpeed); pOsPci->getMaxLinkWidth(maxLinkWidth); maxBandWidth = maxLinkWidth * convertPcieSpeedFromGTsToBs(maxLinkSpeed); if (maxBandWidth == 0) { pciProperties.maxSpeed.maxBandwidth = -1; } else { pciProperties.maxSpeed.maxBandwidth = maxBandWidth; } pciProperties.maxSpeed.width = maxLinkWidth; pciProperties.maxSpeed.gen = convertLinkSpeedToPciGen(maxLinkSpeed); pOsPci->initializeBarProperties(pciBarProperties); } void PciImp::init() { if (pOsPci == nullptr) { pOsPci = OsPci::create(pOsSysman); } UNRECOVERABLE_IF(nullptr == pOsPci); pciGetStaticFields(); } PciImp::~PciImp() { for (zes_pci_bar_properties_t *pProperties : pciBarProperties) { delete pProperties; pProperties = nullptr; } if (nullptr != pOsPci) { delete pOsPci; pOsPci = nullptr; } } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/pci/pci_imp.h000066400000000000000000000021271422164147700260400ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "shared/source/memory_manager/memory_manager.h" #include "level_zero/core/source/device/device.h" #include #include "os_pci.h" #include "pci.h" #include namespace L0 { class PciImp : public Pci, NEO::NonCopyableOrMovableClass { public: void init() override; ze_result_t pciStaticProperties(zes_pci_properties_t *pProperties) override; ze_result_t pciGetInitializedBars(uint32_t *pCount, zes_pci_bar_properties_t *pProperties) override; ze_result_t pciGetState(zes_pci_state_t *pState) override; void pciGetStaticFields(); PciImp() = default; PciImp(OsSysman *pOsSysman) : pOsSysman(pOsSysman){}; ~PciImp() override; OsPci *pOsPci = nullptr; private: OsSysman *pOsSysman = nullptr; bool resizableBarSupported = false; zes_pci_properties_t pciProperties = {}; std::vector pciBarProperties = {}; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/pci/windows/000077500000000000000000000000001422164147700257375ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/pci/windows/CMakeLists.txt000066400000000000000000000010361422164147700304770ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_PCI_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/os_pci_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_pci_imp.h ) if(WIN32) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_PCI_WINDOWS} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_PCI_WINDOWS ${L0_SRCS_TOOLS_SYSMAN_PCI_WINDOWS}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/pci/windows/os_pci_imp.cpp000066400000000000000000000172401422164147700305700ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "sysman/pci/windows/os_pci_imp.h" namespace L0 { ze_result_t WddmPciImp::getProperties(zes_pci_properties_t *properties) { properties->haveBandwidthCounters = false; properties->havePacketCounters = false; properties->haveReplayCounters = false; return ZE_RESULT_SUCCESS; } ze_result_t WddmPciImp::getPciBdf(zes_pci_properties_t &pciProperties) { uint32_t valueSmall = 0; uint32_t domain = 0, bus = 0, dev = 0, func = 0; std::vector vRequests = {}; std::vector vResponses = {}; KmdSysman::RequestProperty request = {}; request.commandId = KmdSysman::Command::Get; request.componentId = KmdSysman::Component::PciComponent; request.paramInfo = (isLmemSupported) ? KmdSysman::PciDomainsType::PciRootPort : KmdSysman::PciDomainsType::PciCurrentDevice; request.requestId = KmdSysman::Requests::Pci::Bus; vRequests.push_back(request); request.requestId = KmdSysman::Requests::Pci::Domain; vRequests.push_back(request); request.requestId = KmdSysman::Requests::Pci::Device; vRequests.push_back(request); request.requestId = KmdSysman::Requests::Pci::Function; vRequests.push_back(request); ze_result_t status = pKmdSysManager->requestMultiple(vRequests, vResponses); if ((status != ZE_RESULT_SUCCESS) || (vResponses.size() != vRequests.size())) { return status; } if (vResponses[0].returnCode == KmdSysman::Success) { memcpy_s(&valueSmall, sizeof(uint32_t), vResponses[0].dataBuffer, sizeof(uint32_t)); bus = valueSmall; } if (vResponses[1].returnCode == KmdSysman::Success) { memcpy_s(&valueSmall, sizeof(uint32_t), vResponses[1].dataBuffer, sizeof(uint32_t)); domain = valueSmall; } if (vResponses[2].returnCode == KmdSysman::Success) { memcpy_s(&valueSmall, sizeof(uint32_t), vResponses[2].dataBuffer, sizeof(uint32_t)); dev = valueSmall; } if (vResponses[3].returnCode == KmdSysman::Success) { memcpy_s(&valueSmall, sizeof(uint32_t), vResponses[3].dataBuffer, sizeof(uint32_t)); func = valueSmall; } pciProperties.address.domain = domain; pciProperties.address.bus = bus; pciProperties.address.device = dev; pciProperties.address.function = func; return ZE_RESULT_SUCCESS; } ze_result_t WddmPciImp::getMaxLinkSpeed(double &maxLinkSpeed) { uint32_t valueSmall = 0; KmdSysman::RequestProperty request; KmdSysman::ResponseProperty response; request.commandId = KmdSysman::Command::Get; request.componentId = KmdSysman::Component::PciComponent; request.requestId = KmdSysman::Requests::Pci::MaxLinkSpeed; request.paramInfo = (isLmemSupported) ? KmdSysman::PciDomainsType::PciRootPort : KmdSysman::PciDomainsType::PciCurrentDevice; ze_result_t status = pKmdSysManager->requestSingle(request, response); if (status != ZE_RESULT_SUCCESS) { return status; } memcpy_s(&valueSmall, sizeof(uint32_t), response.dataBuffer, sizeof(uint32_t)); maxLinkSpeed = convertPciGenToLinkSpeed(valueSmall); return status; } ze_result_t WddmPciImp::getMaxLinkWidth(int32_t &maxLinkwidth) { uint32_t valueSmall = 0; KmdSysman::RequestProperty request; KmdSysman::ResponseProperty response; request.commandId = KmdSysman::Command::Get; request.componentId = KmdSysman::Component::PciComponent; request.requestId = KmdSysman::Requests::Pci::MaxLinkWidth; request.paramInfo = (isLmemSupported) ? KmdSysman::PciDomainsType::PciRootPort : KmdSysman::PciDomainsType::PciCurrentDevice; ze_result_t status = pKmdSysManager->requestSingle(request, response); if (status != ZE_RESULT_SUCCESS) { return status; } memcpy_s(&valueSmall, sizeof(uint32_t), response.dataBuffer, sizeof(uint32_t)); maxLinkwidth = static_cast(valueSmall); return status; } ze_result_t WddmPciImp::getState(zes_pci_state_t *state) { uint32_t valueSmall = 0; std::vector vRequests = {}; std::vector vResponses = {}; KmdSysman::RequestProperty request = {}; state->qualityIssues = ZES_PCI_LINK_QUAL_ISSUE_FLAG_FORCE_UINT32; state->stabilityIssues = ZES_PCI_LINK_STAB_ISSUE_FLAG_FORCE_UINT32; state->status = ZES_PCI_LINK_STATUS_FORCE_UINT32; request.commandId = KmdSysman::Command::Get; request.componentId = KmdSysman::Component::PciComponent; request.paramInfo = (isLmemSupported) ? KmdSysman::PciDomainsType::PciRootPort : KmdSysman::PciDomainsType::PciCurrentDevice; request.requestId = KmdSysman::Requests::Pci::CurrentLinkSpeed; vRequests.push_back(request); request.requestId = KmdSysman::Requests::Pci::CurrentLinkWidth; vRequests.push_back(request); ze_result_t status = pKmdSysManager->requestMultiple(vRequests, vResponses); if ((status != ZE_RESULT_SUCCESS) || (vResponses.size() != vRequests.size())) { return status; } if (vResponses[0].returnCode == KmdSysman::Success) { memcpy_s(&valueSmall, sizeof(uint32_t), vResponses[0].dataBuffer, sizeof(uint32_t)); state->speed.gen = static_cast(valueSmall); } if (vResponses[1].returnCode == KmdSysman::Success) { memcpy_s(&valueSmall, sizeof(uint32_t), vResponses[1].dataBuffer, sizeof(uint32_t)); state->speed.width = static_cast(valueSmall); } return ZE_RESULT_SUCCESS; } bool WddmPciImp::resizableBarSupported() { uint32_t valueSmall = 0; bool supported = false; KmdSysman::RequestProperty request; KmdSysman::ResponseProperty response; request.commandId = KmdSysman::Command::Get; request.componentId = KmdSysman::Component::PciComponent; request.paramInfo = KmdSysman::PciDomainsType::PciCurrentDevice; request.requestId = KmdSysman::Requests::Pci::ResizableBarSupported; if (pKmdSysManager->requestSingle(request, response) == ZE_RESULT_SUCCESS) { memcpy_s(&valueSmall, sizeof(uint32_t), response.dataBuffer, sizeof(uint32_t)); supported = static_cast(valueSmall); } return supported; } bool WddmPciImp::resizableBarEnabled(uint32_t barIndex) { uint32_t valueSmall = 0; bool enabled = false; KmdSysman::RequestProperty request; KmdSysman::ResponseProperty response; request.commandId = KmdSysman::Command::Get; request.componentId = KmdSysman::Component::PciComponent; request.paramInfo = KmdSysman::PciDomainsType::PciCurrentDevice; request.requestId = KmdSysman::Requests::Pci::ResizableBarEnabled; if (pKmdSysManager->requestSingle(request, response) == ZE_RESULT_SUCCESS) { memcpy_s(&valueSmall, sizeof(uint32_t), response.dataBuffer, sizeof(uint32_t)); enabled = static_cast(valueSmall); } return enabled; } ze_result_t WddmPciImp::initializeBarProperties(std::vector &pBarProperties) { zes_pci_bar_properties_t *pBarProp = new zes_pci_bar_properties_t; memset(pBarProp, 0, sizeof(zes_pci_bar_properties_t)); pBarProperties.push_back(pBarProp); return ZE_RESULT_SUCCESS; } WddmPciImp::WddmPciImp(OsSysman *pOsSysman) { WddmSysmanImp *pWddmSysmanImp = static_cast(pOsSysman); pKmdSysManager = &pWddmSysmanImp->getKmdSysManager(); Device *pDevice = pWddmSysmanImp->getDeviceHandle(); isLmemSupported = pDevice->getDriverHandle()->getMemoryManager()->isLocalMemorySupported(pDevice->getRootDeviceIndex()); } OsPci *OsPci::create(OsSysman *pOsSysman) { WddmPciImp *pWddmPciImp = new WddmPciImp(pOsSysman); return static_cast(pWddmPciImp); } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/pci/windows/os_pci_imp.h000066400000000000000000000021441422164147700302320ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "sysman/pci/os_pci.h" #include "sysman/windows/os_sysman_imp.h" namespace L0 { class KmdSysManager; class WddmPciImp : public OsPci, NEO::NonCopyableOrMovableClass { public: ze_result_t getPciBdf(zes_pci_properties_t &pciProperties) override; ze_result_t getMaxLinkSpeed(double &maxLinkSpeed) override; ze_result_t getMaxLinkWidth(int32_t &maxLinkwidth) override; ze_result_t getState(zes_pci_state_t *state) override; ze_result_t getProperties(zes_pci_properties_t *properties) override; bool resizableBarSupported() override; bool resizableBarEnabled(uint32_t barIndex) override; ze_result_t initializeBarProperties(std::vector &pBarProperties) override; WddmPciImp(OsSysman *pOsSysman); WddmPciImp() = default; ~WddmPciImp() override = default; protected: KmdSysManager *pKmdSysManager = nullptr; private: bool isLmemSupported = false; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/performance/000077500000000000000000000000001422164147700257735ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/performance/CMakeLists.txt000066400000000000000000000013621422164147700305350ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_PERFORMANCE ${CMAKE_CURRENT_SOURCE_DIR}/performance.cpp ${CMAKE_CURRENT_SOURCE_DIR}/performance.h ${CMAKE_CURRENT_SOURCE_DIR}/performance_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/performance_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/os_performance.h ) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_PERFORMANCE} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) if(UNIX) add_subdirectory(linux) else() add_subdirectory(windows) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_PERFORMANCE ${L0_SRCS_TOOLS_SYSMAN_PERFORMANCE}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/performance/linux/000077500000000000000000000000001422164147700271325ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/performance/linux/CMakeLists.txt000066400000000000000000000015621422164147700316760ustar00rootroot00000000000000# # Copyright (C) 2021-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_PERFORMANCE_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) if(NEO_ENABLE_i915_PRELIM_DETECTION) list(APPEND L0_SRCS_TOOLS_SYSMAN_PERFORMANCE_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/os_performance_imp_prelim.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_performance_imp_prelim.h ) else() list(APPEND L0_SRCS_TOOLS_SYSMAN_PERFORMANCE_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/os_performance_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_performance_imp.h ) endif() if(UNIX) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_PERFORMANCE_LINUX} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_PERFORMANCE_LINUX ${L0_SRCS_TOOLS_SYSMAN_PERFORMANCE_LINUX}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/performance/linux/os_performance_imp.cpp000066400000000000000000000017501422164147700335100ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/performance/linux/os_performance_imp.h" namespace L0 { ze_result_t LinuxPerformanceImp::osPerformanceGetProperties(zes_perf_properties_t &pProperties) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t LinuxPerformanceImp::osPerformanceGetConfig(double *pFactor) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t LinuxPerformanceImp::osPerformanceSetConfig(double pFactor) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } bool LinuxPerformanceImp::isPerformanceSupported(void) { return false; } OsPerformance *OsPerformance::create(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId, zes_engine_type_flag_t domain) { LinuxPerformanceImp *pLinuxPerformanceImp = new LinuxPerformanceImp(pOsSysman, onSubdevice, subdeviceId, domain); return static_cast(pLinuxPerformanceImp); } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/performance/linux/os_performance_imp.h000066400000000000000000000016401422164147700331530ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "sysman/linux/os_sysman_imp.h" #include "sysman/performance/os_performance.h" #include "sysman/performance/performance_imp.h" namespace L0 { class LinuxPerformanceImp : public OsPerformance, NEO::NonCopyableOrMovableClass { public: ze_result_t osPerformanceGetProperties(zes_perf_properties_t &pProperties) override; ze_result_t osPerformanceGetConfig(double *pFactor) override; ze_result_t osPerformanceSetConfig(double pFactor) override; bool isPerformanceSupported(void) override; LinuxPerformanceImp() = delete; LinuxPerformanceImp(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId, zes_engine_type_flag_t domain) {} ~LinuxPerformanceImp() override = default; }; } // namespace L0 os_performance_imp_prelim.cpp000066400000000000000000000160051422164147700350000ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/performance/linux/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/performance/linux/os_performance_imp_prelim.h" #include namespace L0 { constexpr double maxPerformanceFactor = 100; constexpr double halfOfMaxPerformanceFactor = 50; constexpr double minPerformanceFactor = 0; const std::string LinuxPerformanceImp::sysPwrBalance("sys_pwr_balance"); ze_result_t LinuxPerformanceImp::osPerformanceGetProperties(zes_perf_properties_t &pProperties) { pProperties.onSubdevice = isSubdevice; pProperties.subdeviceId = subdeviceId; pProperties.engines = domain; return ZE_RESULT_SUCCESS; } ze_result_t LinuxPerformanceImp::getBaseScaleFactor() { auto result = pSysfsAccess->read(baseScale, baseScaleReading); if (ZE_RESULT_SUCCESS != result) { return getErrorCode(result); } return ZE_RESULT_SUCCESS; } ze_result_t LinuxPerformanceImp::getMediaScaleFactor() { auto result = pSysfsAccess->read(mediaScale, mediaScaleReading); if (ZE_RESULT_SUCCESS != result) { return getErrorCode(result); } return ZE_RESULT_SUCCESS; } ze_result_t LinuxPerformanceImp::osPerformanceGetConfig(double *pFactor) { ze_result_t result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; double baseFactorReading = 0; double mediaFactorReading = 0; double sysPwrBalanceReading = 0; double multiplier = 0; switch (domain) { case ZES_ENGINE_TYPE_FLAG_OTHER: result = pSysfsAccess->read(sysPwrBalance, sysPwrBalanceReading); if (ZE_RESULT_SUCCESS != result) { return getErrorCode(result); } if (sysPwrBalanceReading >= 0 && sysPwrBalanceReading <= 16.0) { *pFactor = 50.0 + std::round((16.0 - sysPwrBalanceReading) * 50.0 / 16.0); } else if (sysPwrBalanceReading > 16.0 && sysPwrBalanceReading <= 63.0) { *pFactor = std::round((63.0 - sysPwrBalanceReading) * 50.0 / (63.0 - 16.0)); } else { result = ZE_RESULT_ERROR_UNKNOWN; } break; case ZES_ENGINE_TYPE_FLAG_MEDIA: result = pSysfsAccess->read(mediaFreqFactor, mediaFactorReading); if (ZE_RESULT_SUCCESS != result) { return getErrorCode(result); } multiplier = (mediaFactorReading * mediaScaleReading); // Value retrieved from media_freq_factor file is in U(fixed point decimal) format convert it into decimal by multiplication with scale factor if (multiplier == 1) { *pFactor = maxPerformanceFactor; } else if (multiplier == 0.5) { *pFactor = minPerformanceFactor; } else { result = ZE_RESULT_ERROR_UNKNOWN; } break; case ZES_ENGINE_TYPE_FLAG_COMPUTE: result = pSysfsAccess->read(baseFreqFactor, baseFactorReading); if (ZE_RESULT_SUCCESS != result) { return getErrorCode(result); } multiplier = (baseFactorReading * baseScaleReading); // Value retrieved from base_freq_factor file is in U(fixed point decimal) format convert it into decimal by multiplication with scale factor if (multiplier >= 0.5 && multiplier <= 1) { *pFactor = (1 - multiplier) * 100 + 50; } else if (multiplier > 1 && multiplier <= 2) { *pFactor = (2 - multiplier) * 50; } else { result = ZE_RESULT_ERROR_UNKNOWN; } break; default: result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; break; } return result; } ze_result_t LinuxPerformanceImp::osPerformanceSetConfig(double pFactor) { double multiplier = 0; ze_result_t result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; if (pFactor < minPerformanceFactor || pFactor > maxPerformanceFactor) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } switch (domain) { case ZES_ENGINE_TYPE_FLAG_OTHER: if (pFactor <= halfOfMaxPerformanceFactor) { multiplier = 63.0 - std::round(pFactor * (47.0) / 50.0); // multiplier = 63 - ROUND(pFactor * (63.0 - 16.0) / 50.0) } else { multiplier = 16.0 - std::round((pFactor - 50.0) * 16.0 / 50.0); } result = pSysfsAccess->write(sysPwrBalance, multiplier); break; case ZES_ENGINE_TYPE_FLAG_MEDIA: if (pFactor < halfOfMaxPerformanceFactor) { multiplier = 0.5; } else { multiplier = 1; } multiplier = multiplier / mediaScaleReading; // Divide by scale factor and then round off to convert from decimal to U format multiplier = std::round(multiplier); result = pSysfsAccess->write(mediaFreqFactor, multiplier); break; case ZES_ENGINE_TYPE_FLAG_COMPUTE: if (pFactor < halfOfMaxPerformanceFactor) { multiplier = 2 - (pFactor / 50.0); } else { multiplier = 1 - ((pFactor - 50) / 100.0); } multiplier = multiplier / baseScaleReading; // Divide by scale factor and then round off to convert from decimal to U format multiplier = std::round(multiplier); result = pSysfsAccess->write(baseFreqFactor, multiplier); break; default: result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; break; } return result; } bool LinuxPerformanceImp::isPerformanceSupported(void) { switch (domain) { case ZES_ENGINE_TYPE_FLAG_OTHER: if (pSysfsAccess->canRead(sysPwrBalance) != ZE_RESULT_SUCCESS) { return false; } break; case ZES_ENGINE_TYPE_FLAG_MEDIA: if (pSysfsAccess->canRead(mediaFreqFactor) != ZE_RESULT_SUCCESS) { return false; } if (getMediaScaleFactor() != ZE_RESULT_SUCCESS) { return false; } break; case ZES_ENGINE_TYPE_FLAG_COMPUTE: if (pSysfsAccess->canRead(baseFreqFactor) != ZE_RESULT_SUCCESS) { return false; } if (getBaseScaleFactor() != ZE_RESULT_SUCCESS) { return false; } break; default: return false; break; } return true; } void LinuxPerformanceImp::init() { const std::string baseDir = "gt/gt" + std::to_string(subdeviceId) + "/"; baseFreqFactor = baseDir + "base_freq_factor"; mediaFreqFactor = baseDir + "media_freq_factor"; baseScale = baseDir + "base_freq_factor.scale"; mediaScale = baseDir + "media_freq_factor.scale"; } LinuxPerformanceImp::LinuxPerformanceImp(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId, zes_engine_type_flag_t domain) : domain(domain), subdeviceId(subdeviceId), isSubdevice(onSubdevice) { LinuxSysmanImp *pLinuxSysmanImp = static_cast(pOsSysman); pSysfsAccess = &pLinuxSysmanImp->getSysfsAccess(); init(); } OsPerformance *OsPerformance::create(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId, zes_engine_type_flag_t domain) { LinuxPerformanceImp *pLinuxPerformanceImp = new LinuxPerformanceImp(pOsSysman, onSubdevice, subdeviceId, domain); return static_cast(pLinuxPerformanceImp); } } // namespace L0 os_performance_imp_prelim.h000066400000000000000000000036161422164147700344510ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/performance/linux/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "sysman/linux/os_sysman_imp.h" #include "sysman/performance/os_performance.h" #include "sysman/performance/performance_imp.h" namespace L0 { class LinuxPerformanceImp : public OsPerformance, NEO::NonCopyableOrMovableClass { public: ze_result_t osPerformanceGetProperties(zes_perf_properties_t &pProperties) override; ze_result_t osPerformanceGetConfig(double *pFactor) override; ze_result_t osPerformanceSetConfig(double pFactor) override; bool isPerformanceSupported(void) override; LinuxPerformanceImp() = delete; LinuxPerformanceImp(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId, zes_engine_type_flag_t domain); ~LinuxPerformanceImp() override = default; protected: SysfsAccess *pSysfsAccess = nullptr; zes_engine_type_flag_t domain = ZES_ENGINE_TYPE_FLAG_OTHER; private: std::string mediaFreqFactor; std::string baseFreqFactor; static const std::string sysPwrBalance; std::string baseScale; std::string mediaScale; uint32_t subdeviceId = 0; ze_bool_t isSubdevice = 0; double baseScaleReading = 0; double mediaScaleReading = 0; ze_result_t getMediaFreqFactor(); ze_result_t getMediaScaleFactor(); ze_result_t getBaseFreqFactor(); ze_result_t getBaseScaleFactor(); void init(); void getMultiplierVal(double rp0Reading, double rpnReading, double pFactor, double &multiplier); void getPerformanceFactor(double rp0Reading, double rpnReading, double multiplierReading, double *pFactor); ze_result_t getErrorCode(ze_result_t result) { if (result == ZE_RESULT_ERROR_NOT_AVAILABLE) { result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } return result; } }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/performance/os_performance.h000066400000000000000000000012761422164147700311540ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/tools/source/sysman/os_sysman.h" #include namespace L0 { class OsPerformance { public: virtual ze_result_t osPerformanceGetProperties(zes_perf_properties_t &pProperties) = 0; virtual ze_result_t osPerformanceGetConfig(double *pFactor) = 0; virtual ze_result_t osPerformanceSetConfig(double pFactor) = 0; virtual bool isPerformanceSupported(void) = 0; static OsPerformance *create(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId, zes_engine_type_flag_t domain); virtual ~OsPerformance() {} }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/performance/performance.cpp000066400000000000000000000032521422164147700310020ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "performance.h" #include "performance_imp.h" namespace L0 { PerformanceHandleContext::~PerformanceHandleContext() { for (auto &pPerformance : handleList) { if (pPerformance) { delete pPerformance; pPerformance = nullptr; } handleList.pop_back(); } } void PerformanceHandleContext::createHandle(ze_device_handle_t deviceHandle, zes_engine_type_flag_t domain) { Performance *pPerformance = new PerformanceImp(pOsSysman, deviceHandle, domain); if (pPerformance->isPerformanceEnabled == true) { handleList.push_back(pPerformance); } else { delete pPerformance; } } ze_result_t PerformanceHandleContext::init(std::vector &deviceHandles, ze_device_handle_t coreDevice) { for (const auto &deviceHandle : deviceHandles) { createHandle(deviceHandle, ZES_ENGINE_TYPE_FLAG_MEDIA); createHandle(deviceHandle, ZES_ENGINE_TYPE_FLAG_COMPUTE); } createHandle(coreDevice, ZES_ENGINE_TYPE_FLAG_OTHER); return ZE_RESULT_SUCCESS; } ze_result_t PerformanceHandleContext::performanceGet(uint32_t *pCount, zes_perf_handle_t *phPerformance) { uint32_t handleListSize = static_cast(handleList.size()); uint32_t numToCopy = std::min(*pCount, handleListSize); if (0 == *pCount || *pCount > handleListSize) { *pCount = handleListSize; } if (nullptr != phPerformance) { for (uint32_t i = 0; i < numToCopy; i++) { phPerformance[i] = handleList[i]->toPerformanceHandle(); } } return ZE_RESULT_SUCCESS; } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/performance/performance.h000066400000000000000000000025501422164147700304470ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/device/device.h" #include #include struct _zes_perf_handle_t { virtual ~_zes_perf_handle_t() = default; }; namespace L0 { struct OsSysman; class Performance : _zes_perf_handle_t { public: virtual ~Performance() {} virtual ze_result_t performanceGetProperties(zes_perf_properties_t *pProperties) = 0; virtual ze_result_t performanceGetConfig(double *pFactor) = 0; virtual ze_result_t performanceSetConfig(double pFactor) = 0; inline zes_perf_handle_t toPerformanceHandle() { return this; } static Performance *fromHandle(zes_perf_handle_t handle) { return static_cast(handle); } bool isPerformanceEnabled = false; }; struct PerformanceHandleContext { PerformanceHandleContext(OsSysman *pOsSysman) : pOsSysman(pOsSysman){}; ~PerformanceHandleContext(); ze_result_t init(std::vector &deviceHandles, ze_device_handle_t coreDevice); ze_result_t performanceGet(uint32_t *pCount, zes_perf_handle_t *phPerformance); OsSysman *pOsSysman = nullptr; std::vector handleList = {}; private: void createHandle(ze_device_handle_t deviceHandle, zes_engine_type_flag_t domain); }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/performance/performance_imp.cpp000066400000000000000000000027641422164147700316560ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "performance_imp.h" #include "shared/source/helpers/debug_helpers.h" namespace L0 { ze_result_t PerformanceImp::performanceGetProperties(zes_perf_properties_t *pProperties) { *pProperties = performanceProperties; return ZE_RESULT_SUCCESS; } ze_result_t PerformanceImp::performanceGetConfig(double *pFactor) { return pOsPerformance->osPerformanceGetConfig(pFactor); } ze_result_t PerformanceImp::performanceSetConfig(double pFactor) { return pOsPerformance->osPerformanceSetConfig(pFactor); } void PerformanceImp::init() { this->isPerformanceEnabled = pOsPerformance->isPerformanceSupported(); if (this->isPerformanceEnabled) { pOsPerformance->osPerformanceGetProperties(performanceProperties); } } PerformanceImp::PerformanceImp(OsSysman *pOsSysman, ze_device_handle_t handle, zes_engine_type_flag_t domain) { ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; Device::fromHandle(handle)->getProperties(&deviceProperties); pOsPerformance = OsPerformance::create(pOsSysman, deviceProperties.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE, deviceProperties.subdeviceId, domain); UNRECOVERABLE_IF(nullptr == pOsPerformance); init(); } PerformanceImp::~PerformanceImp() { if (pOsPerformance != nullptr) { delete pOsPerformance; pOsPerformance = nullptr; } } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/performance/performance_imp.h000066400000000000000000000015551422164147700313200ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include #include "os_performance.h" #include "performance.h" namespace L0 { class PerformanceImp : public Performance, NEO::NonCopyableOrMovableClass { public: ze_result_t performanceGetProperties(zes_perf_properties_t *pProperties) override; ze_result_t performanceGetConfig(double *pFactor) override; ze_result_t performanceSetConfig(double pFactor) override; PerformanceImp() = delete; PerformanceImp(OsSysman *pOsSysman, ze_device_handle_t handle, zes_engine_type_flag_t domain); ~PerformanceImp() override; OsPerformance *pOsPerformance = nullptr; void init(); private: zes_perf_properties_t performanceProperties = {}; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/performance/windows/000077500000000000000000000000001422164147700274655ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/performance/windows/CMakeLists.txt000066400000000000000000000010241422164147700322220ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_PERFORMANCE_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/os_performance_imp.cpp ) if(WIN32) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_PERFORMANCE_WINDOWS} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_PERFORMANCE_WINDOWS ${L0_SRCS_TOOLS_SYSMAN_PERFORMANCE_WINDOWS}) os_performance_imp.cpp000066400000000000000000000024331422164147700337630ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/performance/windows/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/performance/os_performance.h" namespace L0 { class WddmPerformanceImp : public OsPerformance { public: ze_result_t osPerformanceGetProperties(zes_perf_properties_t &pProperties) override; ze_result_t osPerformanceGetConfig(double *pFactor) override; ze_result_t osPerformanceSetConfig(double pFactor) override; bool isPerformanceSupported(void) override; }; ze_result_t WddmPerformanceImp::osPerformanceGetConfig(double *pFactor) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t WddmPerformanceImp::osPerformanceSetConfig(double pFactor) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t WddmPerformanceImp::osPerformanceGetProperties(zes_perf_properties_t &properties) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } bool WddmPerformanceImp::isPerformanceSupported(void) { return false; } OsPerformance *OsPerformance::create(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId, zes_engine_type_flag_t domain) { WddmPerformanceImp *pWddmPerformanceImp = new WddmPerformanceImp(); return static_cast(pWddmPerformanceImp); } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/power/000077500000000000000000000000001422164147700246265ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/power/CMakeLists.txt000066400000000000000000000012031422164147700273620ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_POWER ${CMAKE_CURRENT_SOURCE_DIR}/power.cpp ${CMAKE_CURRENT_SOURCE_DIR}/power.h ${CMAKE_CURRENT_SOURCE_DIR}/power_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/power_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/os_power.h ) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_POWER} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) add_subdirectories() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_POWER ${L0_SRCS_TOOLS_SYSMAN_POWER}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/power/linux/000077500000000000000000000000001422164147700257655ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/power/linux/CMakeLists.txt000066400000000000000000000014611422164147700305270ustar00rootroot00000000000000# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_POWER_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/os_power_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_power_imp.h ) if(NEO_ENABLE_i915_PRELIM_DETECTION) list(APPEND L0_SRCS_TOOLS_SYSMAN_POWER_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/os_power_helper_prelim.cpp ) else() list(APPEND L0_SRCS_TOOLS_SYSMAN_POWER_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/os_power_helper.cpp ) endif() if(UNIX) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_POWER_LINUX} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_POWER_LINUX ${L0_SRCS_TOOLS_SYSMAN_POWER_LINUX}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/power/linux/os_power_helper.cpp000066400000000000000000000005311422164147700316640ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/power/linux/os_power_imp.h" namespace L0 { bool LinuxPowerImp::isEnergyHwmonDir(std::string name) { if (isSubdevice == false && (name == i915)) { return true; } return false; } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/power/linux/os_power_helper_prelim.cpp000066400000000000000000000017521422164147700332420ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/power/linux/os_power_imp.h" #include "level_zero/tools/source/sysman/sysman_const.h" #include "sysman/linux/os_sysman_imp.h" namespace L0 { // The top-level hwmon(hwmon1 in example) contains all the power related information and device level // energy counters. The other hwmon directories contain per tile energy counters. // ex:- device/hwmon/hwmon1/energy1_input name = "i915" (Top level hwmon) // device/hwmon/hwmon2/energy1_input name = "i915_gt0" (Tile 0) // device/hwmon/hwmon3/energy1_input name = "i915_gt1" (Tile 1) bool LinuxPowerImp::isEnergyHwmonDir(std::string name) { if (isSubdevice == true) { if (name == i915 + "_gt" + std::to_string(subdeviceId)) { return true; } } else if (name == i915) { return true; } return false; } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/power/linux/os_power_imp.cpp000066400000000000000000000224601422164147700311770ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/power/linux/os_power_imp.h" #include "level_zero/tools/source/sysman/linux/pmt/pmt.h" #include "level_zero/tools/source/sysman/sysman_const.h" #include "sysman/linux/os_sysman_imp.h" namespace L0 { const std::string LinuxPowerImp::hwmonDir("device/hwmon"); const std::string LinuxPowerImp::i915("i915"); const std::string LinuxPowerImp::sustainedPowerLimitEnabled("power1_max_enable"); const std::string LinuxPowerImp::sustainedPowerLimit("power1_max"); const std::string LinuxPowerImp::sustainedPowerLimitInterval("power1_max_interval"); const std::string LinuxPowerImp::burstPowerLimitEnabled("power1_cap_enable"); const std::string LinuxPowerImp::burstPowerLimit("power1_cap"); const std::string LinuxPowerImp::energyCounterNode("energy1_input"); const std::string LinuxPowerImp::defaultPowerLimit("power_default_limit"); const std::string LinuxPowerImp::minPowerLimit("power_min_limit"); const std::string LinuxPowerImp::maxPowerLimit("power_max_limit"); void powerGetTimestamp(uint64_t ×tamp) { std::chrono::time_point ts = std::chrono::steady_clock::now(); timestamp = std::chrono::duration_cast(ts.time_since_epoch()).count(); } ze_result_t LinuxPowerImp::getProperties(zes_power_properties_t *pProperties) { pProperties->onSubdevice = isSubdevice; pProperties->subdeviceId = subdeviceId; pProperties->canControl = canControl; pProperties->isEnergyThresholdSupported = false; pProperties->defaultLimit = -1; pProperties->minLimit = -1; pProperties->maxLimit = -1; uint32_t val = 0; auto result = pSysfsAccess->read(i915HwmonDir + "/" + defaultPowerLimit, val); if (ZE_RESULT_SUCCESS == result) { pProperties->defaultLimit = static_cast(val / milliFactor); // need to convert from microwatt to milliwatt } result = pSysfsAccess->read(i915HwmonDir + "/" + minPowerLimit, val); if (ZE_RESULT_SUCCESS == result && val != 0) { pProperties->minLimit = static_cast(val / milliFactor); // need to convert from microwatt to milliwatt } result = pSysfsAccess->read(i915HwmonDir + "/" + maxPowerLimit, val); if (ZE_RESULT_SUCCESS == result && val != std::numeric_limits::max()) { pProperties->maxLimit = static_cast(val / milliFactor); // need to convert from microwatt to milliwatt } return ZE_RESULT_SUCCESS; } ze_result_t LinuxPowerImp::getPmtEnergyCounter(zes_power_energy_counter_t *pEnergy) { const std::string key("PACKAGE_ENERGY"); uint64_t energy = 0; ze_result_t result = pPmt->readValue(key, energy); // PMT will return energy counter in Q20 format(fixed point representation) where first 20 bits(from LSB) represent decimal part and remaining integral part which is converted into joule by division with 1048576(2^20) and then converted into microjoules pEnergy->energy = (energy / 1048576) * convertJouleToMicroJoule; return result; } ze_result_t LinuxPowerImp::getEnergyCounter(zes_power_energy_counter_t *pEnergy) { powerGetTimestamp(pEnergy->timestamp); ze_result_t result = pSysfsAccess->read(energyHwmonDir + "/" + energyCounterNode, pEnergy->energy); if (result != ZE_RESULT_SUCCESS) { if (pPmt != nullptr) { return getPmtEnergyCounter(pEnergy); } } if (result != ZE_RESULT_SUCCESS) { return getErrorCode(result); } return result; } ze_result_t LinuxPowerImp::getLimits(zes_power_sustained_limit_t *pSustained, zes_power_burst_limit_t *pBurst, zes_power_peak_limit_t *pPeak) { ze_result_t result = ZE_RESULT_ERROR_UNKNOWN; uint64_t val = 0; if (pSustained != nullptr) { result = pSysfsAccess->read(i915HwmonDir + "/" + sustainedPowerLimitEnabled, val); if (ZE_RESULT_SUCCESS != result) { return getErrorCode(result); } pSustained->enabled = static_cast(val); if (pSustained->enabled) { val = 0; result = pSysfsAccess->read(i915HwmonDir + "/" + sustainedPowerLimit, val); if (ZE_RESULT_SUCCESS != result) { return getErrorCode(result); } val /= milliFactor; // Convert microWatts to milliwatts pSustained->power = static_cast(val); val = 0; result = pSysfsAccess->read(i915HwmonDir + "/" + sustainedPowerLimitInterval, val); if (ZE_RESULT_SUCCESS != result) { return getErrorCode(result); } pSustained->interval = static_cast(val); } } if (pBurst != nullptr) { result = pSysfsAccess->read(i915HwmonDir + "/" + burstPowerLimitEnabled, val); if (ZE_RESULT_SUCCESS != result) { return getErrorCode(result); } pBurst->enabled = static_cast(val); if (pBurst->enabled) { result = pSysfsAccess->read(i915HwmonDir + "/" + burstPowerLimit, val); if (ZE_RESULT_SUCCESS != result) { return getErrorCode(result); } val /= milliFactor; // Convert microWatts to milliwatts pBurst->power = static_cast(val); } } if (pPeak != nullptr) { pPeak->powerAC = -1; pPeak->powerDC = -1; result = ZE_RESULT_SUCCESS; } return result; } ze_result_t LinuxPowerImp::setLimits(const zes_power_sustained_limit_t *pSustained, const zes_power_burst_limit_t *pBurst, const zes_power_peak_limit_t *pPeak) { ze_result_t result = ZE_RESULT_ERROR_UNKNOWN; int32_t val = 0; if (pSustained != nullptr) { uint64_t isSustainedPowerLimitEnabled = 0; result = pSysfsAccess->read(i915HwmonDir + "/" + sustainedPowerLimitEnabled, isSustainedPowerLimitEnabled); if (ZE_RESULT_SUCCESS != result) { return getErrorCode(result); } if (isSustainedPowerLimitEnabled != static_cast(pSustained->enabled)) { result = pSysfsAccess->write(i915HwmonDir + "/" + sustainedPowerLimitEnabled, static_cast(pSustained->enabled)); if (ZE_RESULT_SUCCESS != result) { return getErrorCode(result); } isSustainedPowerLimitEnabled = static_cast(pSustained->enabled); } if (isSustainedPowerLimitEnabled) { val = static_cast(pSustained->power) * milliFactor; // Convert milliWatts to microwatts result = pSysfsAccess->write(i915HwmonDir + "/" + sustainedPowerLimit, val); if (ZE_RESULT_SUCCESS != result) { return getErrorCode(result); } result = pSysfsAccess->write(i915HwmonDir + "/" + sustainedPowerLimitInterval, pSustained->interval); if (ZE_RESULT_SUCCESS != result) { return getErrorCode(result); } } result = ZE_RESULT_SUCCESS; } if (pBurst != nullptr) { result = pSysfsAccess->write(i915HwmonDir + "/" + burstPowerLimitEnabled, static_cast(pBurst->enabled)); if (ZE_RESULT_SUCCESS != result) { return getErrorCode(result); } if (pBurst->enabled) { val = static_cast(pBurst->power) * milliFactor; // Convert milliWatts to microwatts result = pSysfsAccess->write(i915HwmonDir + "/" + burstPowerLimit, val); if (ZE_RESULT_SUCCESS != result) { return getErrorCode(result); } } } return result; } ze_result_t LinuxPowerImp::getEnergyThreshold(zes_energy_threshold_t *pThreshold) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t LinuxPowerImp::setEnergyThreshold(double threshold) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } bool LinuxPowerImp::isPowerModuleSupported() { std::vector listOfAllHwmonDirs = {}; bool hwmonDirExists = false; if (ZE_RESULT_SUCCESS != pSysfsAccess->scanDirEntries(hwmonDir, listOfAllHwmonDirs)) { hwmonDirExists = false; } for (const auto &tempHwmonDirEntry : listOfAllHwmonDirs) { const std::string i915NameFile = hwmonDir + "/" + tempHwmonDirEntry + "/" + "name"; std::string name; if (ZE_RESULT_SUCCESS != pSysfsAccess->read(i915NameFile, name)) { continue; } if (name == i915) { i915HwmonDir = hwmonDir + "/" + tempHwmonDirEntry; hwmonDirExists = true; canControl = true; } if (isEnergyHwmonDir(name) == true) { energyHwmonDir = hwmonDir + "/" + tempHwmonDirEntry; } } if (hwmonDirExists == false) { return (pPmt != nullptr); } return true; } LinuxPowerImp::LinuxPowerImp(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId) : isSubdevice(onSubdevice), subdeviceId(subdeviceId) { LinuxSysmanImp *pLinuxSysmanImp = static_cast(pOsSysman); pPmt = pLinuxSysmanImp->getPlatformMonitoringTechAccess(subdeviceId); pSysfsAccess = &pLinuxSysmanImp->getSysfsAccess(); } OsPower *OsPower::create(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId) { LinuxPowerImp *pLinuxPowerImp = new LinuxPowerImp(pOsSysman, onSubdevice, subdeviceId); return static_cast(pLinuxPowerImp); } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/power/linux/os_power_imp.h000066400000000000000000000044431422164147700306450ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "level_zero/tools/source/sysman/power/os_power.h" #include #include namespace L0 { class SysfsAccess; class PlatformMonitoringTech; class LinuxPowerImp : public OsPower, NEO::NonCopyableOrMovableClass { public: ze_result_t getProperties(zes_power_properties_t *pProperties) override; ze_result_t getEnergyCounter(zes_power_energy_counter_t *pEnergy) override; ze_result_t getLimits(zes_power_sustained_limit_t *pSustained, zes_power_burst_limit_t *pBurst, zes_power_peak_limit_t *pPeak) override; ze_result_t setLimits(const zes_power_sustained_limit_t *pSustained, const zes_power_burst_limit_t *pBurst, const zes_power_peak_limit_t *pPeak) override; ze_result_t getEnergyThreshold(zes_energy_threshold_t *pThreshold) override; ze_result_t setEnergyThreshold(double threshold) override; bool isPowerModuleSupported() override; bool isEnergyHwmonDir(std::string name); ze_result_t getPmtEnergyCounter(zes_power_energy_counter_t *pEnergy); LinuxPowerImp(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId); LinuxPowerImp() = default; ~LinuxPowerImp() override = default; protected: PlatformMonitoringTech *pPmt = nullptr; SysfsAccess *pSysfsAccess = nullptr; private: std::string i915HwmonDir; std::string energyHwmonDir; static const std::string hwmonDir; static const std::string i915; static const std::string sustainedPowerLimitEnabled; static const std::string sustainedPowerLimit; static const std::string sustainedPowerLimitInterval; static const std::string burstPowerLimitEnabled; static const std::string burstPowerLimit; static const std::string energyCounterNode; static const std::string defaultPowerLimit; static const std::string minPowerLimit; static const std::string maxPowerLimit; bool canControl = false; bool isSubdevice = false; uint32_t subdeviceId = 0; ze_result_t getErrorCode(ze_result_t result) { if (result == ZE_RESULT_ERROR_NOT_AVAILABLE) { result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } return result; } }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/power/os_power.h000066400000000000000000000017661422164147700266460ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include namespace L0 { struct OsSysman; class OsPower { public: virtual ze_result_t getProperties(zes_power_properties_t *pProperties) = 0; virtual ze_result_t getEnergyCounter(zes_power_energy_counter_t *pEnergy) = 0; virtual ze_result_t getLimits(zes_power_sustained_limit_t *pSustained, zes_power_burst_limit_t *pBurst, zes_power_peak_limit_t *pPeak) = 0; virtual ze_result_t setLimits(const zes_power_sustained_limit_t *pSustained, const zes_power_burst_limit_t *pBurst, const zes_power_peak_limit_t *pPeak) = 0; virtual ze_result_t getEnergyThreshold(zes_energy_threshold_t *pThreshold) = 0; virtual ze_result_t setEnergyThreshold(double threshold) = 0; virtual bool isPowerModuleSupported() = 0; static OsPower *create(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId); virtual ~OsPower() = default; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/power/power.cpp000066400000000000000000000027571422164147700265010ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/power/power.h" #include "shared/source/helpers/basic_math.h" #include "level_zero/tools/source/sysman/power/power_imp.h" namespace L0 { PowerHandleContext::~PowerHandleContext() { for (Power *pPower : handleList) { delete pPower; } } void PowerHandleContext::createHandle(ze_device_handle_t deviceHandle) { Power *pPower = new PowerImp(pOsSysman, deviceHandle); if (pPower->initSuccess == true) { handleList.push_back(pPower); } else { delete pPower; } } ze_result_t PowerHandleContext::init(std::vector &deviceHandles, ze_device_handle_t coreDevice) { // Create Handle for device level power if (deviceHandles.size() > 1) { createHandle(coreDevice); } for (const auto &deviceHandle : deviceHandles) { createHandle(deviceHandle); } return ZE_RESULT_SUCCESS; } ze_result_t PowerHandleContext::powerGet(uint32_t *pCount, zes_pwr_handle_t *phPower) { uint32_t handleListSize = static_cast(handleList.size()); uint32_t numToCopy = std::min(*pCount, handleListSize); if (0 == *pCount || *pCount > handleListSize) { *pCount = handleListSize; } if (nullptr != phPower) { for (uint32_t i = 0; i < numToCopy; i++) { phPower[i] = handleList[i]->toHandle(); } } return ZE_RESULT_SUCCESS; } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/power/power.h000066400000000000000000000034511422164147700261360ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/device/device.h" #include #include struct _zet_sysman_pwr_handle_t { virtual ~_zet_sysman_pwr_handle_t() = default; }; struct _zes_pwr_handle_t { virtual ~_zes_pwr_handle_t() = default; }; namespace L0 { struct OsSysman; class Power : _zet_sysman_pwr_handle_t, _zes_pwr_handle_t { public: virtual ze_result_t powerGetProperties(zes_power_properties_t *pProperties) = 0; virtual ze_result_t powerGetEnergyCounter(zes_power_energy_counter_t *pEnergy) = 0; virtual ze_result_t powerGetLimits(zes_power_sustained_limit_t *pSustained, zes_power_burst_limit_t *pBurst, zes_power_peak_limit_t *pPeak) = 0; virtual ze_result_t powerSetLimits(const zes_power_sustained_limit_t *pSustained, const zes_power_burst_limit_t *pBurst, const zes_power_peak_limit_t *pPeak) = 0; virtual ze_result_t powerGetEnergyThreshold(zes_energy_threshold_t *pThreshold) = 0; virtual ze_result_t powerSetEnergyThreshold(double threshold) = 0; static Power *fromHandle(zes_pwr_handle_t handle) { return static_cast(handle); } inline zes_pwr_handle_t toHandle() { return this; } bool initSuccess = false; zes_power_properties_t powerProperties = {}; }; struct PowerHandleContext { PowerHandleContext(OsSysman *pOsSysman) : pOsSysman(pOsSysman){}; ~PowerHandleContext(); ze_result_t init(std::vector &deviceHandles, ze_device_handle_t coreDevice); ze_result_t powerGet(uint32_t *pCount, zes_pwr_handle_t *phPower); OsSysman *pOsSysman = nullptr; std::vector handleList = {}; private: void createHandle(ze_device_handle_t deviceHandle); }; } // namespace L0compute-runtime-22.14.22890/level_zero/tools/source/sysman/power/power_imp.cpp000066400000000000000000000035471422164147700273440ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/power/power_imp.h" #include "shared/source/helpers/debug_helpers.h" namespace L0 { ze_result_t PowerImp::powerGetProperties(zes_power_properties_t *pProperties) { *pProperties = powerProperties; return ZE_RESULT_SUCCESS; } ze_result_t PowerImp::powerGetEnergyCounter(zes_power_energy_counter_t *pEnergy) { return pOsPower->getEnergyCounter(pEnergy); } ze_result_t PowerImp::powerGetLimits(zes_power_sustained_limit_t *pSustained, zes_power_burst_limit_t *pBurst, zes_power_peak_limit_t *pPeak) { return pOsPower->getLimits(pSustained, pBurst, pPeak); } ze_result_t PowerImp::powerSetLimits(const zes_power_sustained_limit_t *pSustained, const zes_power_burst_limit_t *pBurst, const zes_power_peak_limit_t *pPeak) { return pOsPower->setLimits(pSustained, pBurst, pPeak); } ze_result_t PowerImp::powerGetEnergyThreshold(zes_energy_threshold_t *pThreshold) { return pOsPower->getEnergyThreshold(pThreshold); } ze_result_t PowerImp::powerSetEnergyThreshold(double threshold) { return pOsPower->setEnergyThreshold(threshold); } PowerImp::PowerImp(OsSysman *pOsSysman, ze_device_handle_t handle) : deviceHandle(handle) { ze_device_properties_t deviceProperties = {}; Device::fromHandle(deviceHandle)->getProperties(&deviceProperties); pOsPower = OsPower::create(pOsSysman, deviceProperties.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE, deviceProperties.subdeviceId); UNRECOVERABLE_IF(nullptr == pOsPower); init(); } void PowerImp::init() { if (pOsPower->isPowerModuleSupported()) { pOsPower->getProperties(&powerProperties); this->initSuccess = true; } } PowerImp::~PowerImp() { if (nullptr != pOsPower) { delete pOsPower; pOsPower = nullptr; } } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/power/power_imp.h000066400000000000000000000023721422164147700270040ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "level_zero/tools/source/sysman/power/os_power.h" #include "level_zero/tools/source/sysman/power/power.h" #include namespace L0 { class PowerImp : public Power, NEO::NonCopyableOrMovableClass { public: ze_result_t powerGetProperties(zes_power_properties_t *pProperties) override; ze_result_t powerGetEnergyCounter(zes_power_energy_counter_t *pEnergy) override; ze_result_t powerGetLimits(zes_power_sustained_limit_t *pSustained, zes_power_burst_limit_t *pBurst, zes_power_peak_limit_t *pPeak) override; ze_result_t powerSetLimits(const zes_power_sustained_limit_t *pSustained, const zes_power_burst_limit_t *pBurst, const zes_power_peak_limit_t *pPeak) override; ze_result_t powerGetEnergyThreshold(zes_energy_threshold_t *pThreshold) override; ze_result_t powerSetEnergyThreshold(double threshold) override; PowerImp() = default; PowerImp(OsSysman *pOsSysman, ze_device_handle_t device); ~PowerImp() override; OsPower *pOsPower = nullptr; void init(); private: ze_device_handle_t deviceHandle = {}; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/power/windows/000077500000000000000000000000001422164147700263205ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/power/windows/CMakeLists.txt000066400000000000000000000010451422164147700310600ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_POWER_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/os_power_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/os_power_imp.cpp ) if(WIN32) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_POWER_WINDOWS} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_POWER_WINDOWS ${L0_SRCS_TOOLS_SYSMAN_POWER_WINDOWS}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/power/windows/os_power_imp.cpp000066400000000000000000000272021422164147700315310ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "sysman/power/windows/os_power_imp.h" namespace L0 { ze_result_t WddmPowerImp::getProperties(zes_power_properties_t *pProperties) { pProperties->onSubdevice = false; pProperties->subdeviceId = 0; std::vector vRequests = {}; std::vector vResponses = {}; KmdSysman::RequestProperty request = {}; request.commandId = KmdSysman::Command::Get; request.componentId = KmdSysman::Component::PowerComponent; request.requestId = KmdSysman::Requests::Power::EnergyThresholdSupported; vRequests.push_back(request); request.requestId = KmdSysman::Requests::Power::TdpDefault; vRequests.push_back(request); request.requestId = KmdSysman::Requests::Power::MinPowerLimitDefault; vRequests.push_back(request); request.requestId = KmdSysman::Requests::Power::MaxPowerLimitDefault; vRequests.push_back(request); ze_result_t status = pKmdSysManager->requestMultiple(vRequests, vResponses); if ((status != ZE_RESULT_SUCCESS) || (vResponses.size() != vRequests.size())) { return status; } if (vResponses[0].returnCode == KmdSysman::Success) { memcpy_s(&pProperties->canControl, sizeof(ze_bool_t), vResponses[0].dataBuffer, sizeof(ze_bool_t)); memcpy_s(&pProperties->isEnergyThresholdSupported, sizeof(ze_bool_t), vResponses[0].dataBuffer, sizeof(ze_bool_t)); } pProperties->defaultLimit = -1; if (vResponses[1].returnCode == KmdSysman::Success) { memcpy_s(&pProperties->defaultLimit, sizeof(uint32_t), vResponses[1].dataBuffer, sizeof(uint32_t)); } pProperties->minLimit = -1; if (vResponses[2].returnCode == KmdSysman::Success) { memcpy_s(&pProperties->minLimit, sizeof(uint32_t), vResponses[2].dataBuffer, sizeof(uint32_t)); } pProperties->maxLimit = -1; if (vResponses[3].returnCode == KmdSysman::Success) { memcpy_s(&pProperties->maxLimit, sizeof(uint32_t), vResponses[3].dataBuffer, sizeof(uint32_t)); } return ZE_RESULT_SUCCESS; } ze_result_t WddmPowerImp::getEnergyCounter(zes_power_energy_counter_t *pEnergy) { uint32_t energyUnits = 0; uint32_t timestampFrequency = 0; std::vector vRequests = {}; std::vector vResponses = {}; KmdSysman::RequestProperty request = {}; request.commandId = KmdSysman::Command::Get; request.componentId = KmdSysman::Component::PowerComponent; request.requestId = KmdSysman::Requests::Power::EnergyCounterUnits; vRequests.push_back(request); request.requestId = KmdSysman::Requests::Power::CurrentEnergyCounter; vRequests.push_back(request); request.commandId = KmdSysman::Command::Get; request.componentId = KmdSysman::Component::ActivityComponent; request.requestId = KmdSysman::Requests::Activity::TimestampFrequency; vRequests.push_back(request); ze_result_t status = pKmdSysManager->requestMultiple(vRequests, vResponses); if ((status != ZE_RESULT_SUCCESS) || (vResponses.size() != vRequests.size())) { return status; } if (vResponses[0].returnCode == KmdSysman::Success) { memcpy_s(&energyUnits, sizeof(uint32_t), vResponses[0].dataBuffer, sizeof(uint32_t)); } uint32_t valueCounter = 0; uint64_t valueTimeStamp = 0; if (vResponses[1].returnCode == KmdSysman::Success) { memcpy_s(&valueCounter, sizeof(uint32_t), vResponses[1].dataBuffer, sizeof(uint32_t)); uint32_t conversionUnit = (1 << energyUnits); double valueConverted = static_cast(valueCounter) / static_cast(conversionUnit); valueConverted *= static_cast(convertJouleToMicroJoule); pEnergy->energy = static_cast(valueConverted); memcpy_s(&valueTimeStamp, sizeof(uint64_t), (vResponses[1].dataBuffer + sizeof(uint32_t)), sizeof(uint64_t)); } if (vResponses[2].returnCode == KmdSysman::Success) { memcpy_s(×tampFrequency, sizeof(uint32_t), vResponses[2].dataBuffer, sizeof(uint32_t)); double timeFactor = 1.0 / static_cast(timestampFrequency); timeFactor = static_cast(valueTimeStamp) * timeFactor; timeFactor *= static_cast(microFacor); pEnergy->timestamp = static_cast(timeFactor); } return ZE_RESULT_SUCCESS; } ze_result_t WddmPowerImp::getLimits(zes_power_sustained_limit_t *pSustained, zes_power_burst_limit_t *pBurst, zes_power_peak_limit_t *pPeak) { ze_result_t status = ZE_RESULT_SUCCESS; KmdSysman::RequestProperty request; KmdSysman::ResponseProperty response; request.commandId = KmdSysman::Command::Get; request.componentId = KmdSysman::Component::PowerComponent; if (pSustained) { memset(pSustained, 0, sizeof(zes_power_sustained_limit_t)); request.requestId = KmdSysman::Requests::Power::PowerLimit1Enabled; status = pKmdSysManager->requestSingle(request, response); if (status != ZE_RESULT_SUCCESS) { return status; } memcpy_s(&pSustained->enabled, sizeof(ze_bool_t), response.dataBuffer, sizeof(ze_bool_t)); request.requestId = KmdSysman::Requests::Power::CurrentPowerLimit1; status = pKmdSysManager->requestSingle(request, response); if (status != ZE_RESULT_SUCCESS) { return status; } memcpy_s(&pSustained->power, sizeof(uint32_t), response.dataBuffer, sizeof(uint32_t)); request.requestId = KmdSysman::Requests::Power::CurrentPowerLimit1Tau; status = pKmdSysManager->requestSingle(request, response); if (status != ZE_RESULT_SUCCESS) { return status; } memcpy_s(&pSustained->interval, sizeof(uint32_t), response.dataBuffer, sizeof(uint32_t)); } if (pBurst) { memset(pBurst, 0, sizeof(zes_power_burst_limit_t)); request.requestId = KmdSysman::Requests::Power::PowerLimit2Enabled; status = pKmdSysManager->requestSingle(request, response); if (status != ZE_RESULT_SUCCESS) { return status; } memcpy_s(&pBurst->enabled, sizeof(ze_bool_t), response.dataBuffer, sizeof(ze_bool_t)); request.requestId = KmdSysman::Requests::Power::CurrentPowerLimit2; status = pKmdSysManager->requestSingle(request, response); if (status != ZE_RESULT_SUCCESS) { return status; } memcpy_s(&pBurst->power, sizeof(uint32_t), response.dataBuffer, sizeof(uint32_t)); } if (pPeak) { memset(pPeak, 0, sizeof(zes_power_peak_limit_t)); request.requestId = KmdSysman::Requests::Power::CurrentPowerLimit4Ac; status = pKmdSysManager->requestSingle(request, response); if (status != ZE_RESULT_SUCCESS) { return status; } memcpy_s(&pPeak->powerAC, sizeof(uint32_t), response.dataBuffer, sizeof(uint32_t)); request.requestId = KmdSysman::Requests::Power::CurrentPowerLimit4Dc; status = pKmdSysManager->requestSingle(request, response); if (status != ZE_RESULT_SUCCESS) { return status; } memcpy_s(&pPeak->powerDC, sizeof(uint32_t), response.dataBuffer, sizeof(uint32_t)); } return status; } ze_result_t WddmPowerImp::setLimits(const zes_power_sustained_limit_t *pSustained, const zes_power_burst_limit_t *pBurst, const zes_power_peak_limit_t *pPeak) { ze_result_t status = ZE_RESULT_SUCCESS; KmdSysman::RequestProperty request; KmdSysman::ResponseProperty response; request.commandId = KmdSysman::Command::Set; request.componentId = KmdSysman::Component::PowerComponent; request.dataSize = sizeof(uint32_t); if (pSustained) { request.requestId = KmdSysman::Requests::Power::CurrentPowerLimit1; memcpy_s(request.dataBuffer, sizeof(uint32_t), &pSustained->power, sizeof(uint32_t)); status = pKmdSysManager->requestSingle(request, response); if (status != ZE_RESULT_SUCCESS) { return status; } request.requestId = KmdSysman::Requests::Power::CurrentPowerLimit1Tau; memcpy_s(request.dataBuffer, sizeof(uint32_t), &pSustained->interval, sizeof(uint32_t)); status = pKmdSysManager->requestSingle(request, response); if (status != ZE_RESULT_SUCCESS) { return status; } } if (pBurst) { request.requestId = KmdSysman::Requests::Power::CurrentPowerLimit2; memcpy_s(request.dataBuffer, sizeof(uint32_t), &pBurst->power, sizeof(uint32_t)); status = pKmdSysManager->requestSingle(request, response); if (status != ZE_RESULT_SUCCESS) { return status; } } if (pPeak) { request.requestId = KmdSysman::Requests::Power::CurrentPowerLimit4Ac; memcpy_s(request.dataBuffer, sizeof(uint32_t), &pPeak->powerAC, sizeof(uint32_t)); status = pKmdSysManager->requestSingle(request, response); if (status != ZE_RESULT_SUCCESS) { return status; } request.requestId = KmdSysman::Requests::Power::CurrentPowerLimit4Dc; memcpy_s(request.dataBuffer, sizeof(uint32_t), &pPeak->powerDC, sizeof(uint32_t)); status = pKmdSysManager->requestSingle(request, response); } return status; } ze_result_t WddmPowerImp::getEnergyThreshold(zes_energy_threshold_t *pThreshold) { KmdSysman::RequestProperty request; KmdSysman::ResponseProperty response; pThreshold->processId = 0; request.commandId = KmdSysman::Command::Get; request.componentId = KmdSysman::Component::PowerComponent; request.requestId = KmdSysman::Requests::Power::CurrentEnergyThreshold; ze_result_t status = pKmdSysManager->requestSingle(request, response); if (status != ZE_RESULT_SUCCESS) { return status; } memset(pThreshold, 0, sizeof(zes_energy_threshold_t)); uint32_t value = 0; memcpy_s(&value, sizeof(uint32_t), response.dataBuffer, sizeof(uint32_t)); pThreshold->threshold = static_cast(value); pThreshold->enable = true; return status; } ze_result_t WddmPowerImp::setEnergyThreshold(double threshold) { KmdSysman::RequestProperty request; KmdSysman::ResponseProperty response; request.commandId = KmdSysman::Command::Set; request.componentId = KmdSysman::Component::PowerComponent; request.requestId = KmdSysman::Requests::Power::CurrentEnergyThreshold; request.dataSize = sizeof(uint32_t); uint32_t value = static_cast(threshold); memcpy_s(request.dataBuffer, sizeof(uint32_t), &value, sizeof(uint32_t)); return pKmdSysManager->requestSingle(request, response); } bool WddmPowerImp::isPowerModuleSupported() { KmdSysman::RequestProperty request; KmdSysman::ResponseProperty response; request.commandId = KmdSysman::Command::Get; request.componentId = KmdSysman::Component::PowerComponent; request.requestId = KmdSysman::Requests::Power::PowerLimit1Enabled; ze_result_t status = pKmdSysManager->requestSingle(request, response); uint32_t enabled = 0; memcpy_s(&enabled, sizeof(uint32_t), response.dataBuffer, sizeof(uint32_t)); return ((status == ZE_RESULT_SUCCESS) && (enabled)); } WddmPowerImp::WddmPowerImp(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId) { WddmSysmanImp *pWddmSysmanImp = static_cast(pOsSysman); pKmdSysManager = &pWddmSysmanImp->getKmdSysManager(); } OsPower *OsPower::create(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId) { WddmPowerImp *pWddmPowerImp = new WddmPowerImp(pOsSysman, onSubdevice, subdeviceId); return static_cast(pWddmPowerImp); } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/power/windows/os_power_imp.h000066400000000000000000000023301422164147700311710ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "sysman/power/os_power.h" #include "sysman/windows/os_sysman_imp.h" namespace L0 { class KmdSysManager; class WddmPowerImp : public OsPower, NEO::NonCopyableOrMovableClass { public: ze_result_t getProperties(zes_power_properties_t *pProperties) override; ze_result_t getEnergyCounter(zes_power_energy_counter_t *pEnergy) override; ze_result_t getLimits(zes_power_sustained_limit_t *pSustained, zes_power_burst_limit_t *pBurst, zes_power_peak_limit_t *pPeak) override; ze_result_t setLimits(const zes_power_sustained_limit_t *pSustained, const zes_power_burst_limit_t *pBurst, const zes_power_peak_limit_t *pPeak) override; ze_result_t getEnergyThreshold(zes_energy_threshold_t *pThreshold) override; ze_result_t setEnergyThreshold(double threshold) override; bool isPowerModuleSupported() override; WddmPowerImp(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId); WddmPowerImp() = default; ~WddmPowerImp() override = default; protected: KmdSysManager *pKmdSysManager = nullptr; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/ras/000077500000000000000000000000001422164147700242575ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/ras/CMakeLists.txt000066400000000000000000000011611422164147700270160ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_RAS ${CMAKE_CURRENT_SOURCE_DIR}/ras.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ras.h ${CMAKE_CURRENT_SOURCE_DIR}/ras_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ras_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/os_ras.h ) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_RAS} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) add_subdirectories() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_RAS ${L0_SRCS_TOOLS_SYSMAN_RAS}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/ras/linux/000077500000000000000000000000001422164147700254165ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/ras/linux/CMakeLists.txt000066400000000000000000000017071422164147700301630ustar00rootroot00000000000000# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_RAS_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) if(NEO_ENABLE_i915_PRELIM_DETECTION) list(APPEND L0_SRCS_TOOLS_SYSMAN_RAS_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/os_ras_imp_prelim.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_ras_imp_prelim.h ${CMAKE_CURRENT_SOURCE_DIR}/os_ras_imp_gt.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_ras_imp_fabric.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_ras_imp_hbm.cpp ) else() list(APPEND L0_SRCS_TOOLS_SYSMAN_RAS_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/os_ras_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_ras_imp.h ) endif() if(UNIX) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_RAS_LINUX} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_RAS_LINUX ${L0_SRCS_TOOLS_SYSMAN_RAS_LINUX}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/ras/linux/os_ras_imp.cpp000066400000000000000000000036711422164147700302640ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/ras/linux/os_ras_imp.h" #include "sysman/linux/os_sysman_imp.h" namespace L0 { void OsRas::getSupportedRasErrorTypes(std::set &errorType, OsSysman *pOsSysman, ze_device_handle_t deviceHandle) {} ze_result_t LinuxRasImp::osRasGetState(zes_ras_state_t &state, ze_bool_t clear) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t LinuxRasImp::osRasGetConfig(zes_ras_config_t *config) { config->totalThreshold = totalThreshold; memcpy(config->detailedThresholds.category, categoryThreshold, sizeof(config->detailedThresholds.category)); return ZE_RESULT_SUCCESS; } ze_result_t LinuxRasImp::osRasSetConfig(const zes_ras_config_t *config) { if (pFsAccess->isRootUser() == true) { totalThreshold = config->totalThreshold; memcpy(categoryThreshold, config->detailedThresholds.category, sizeof(config->detailedThresholds.category)); return ZE_RESULT_SUCCESS; } return ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS; } ze_result_t LinuxRasImp::osRasGetProperties(zes_ras_properties_t &properties) { properties.pNext = nullptr; properties.type = osRasErrorType; properties.onSubdevice = isSubdevice; properties.subdeviceId = subdeviceId; return ZE_RESULT_SUCCESS; } LinuxRasImp::LinuxRasImp(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId) : osRasErrorType(type), isSubdevice(onSubdevice), subdeviceId(subdeviceId) { pLinuxSysmanImp = static_cast(pOsSysman); pFsAccess = &pLinuxSysmanImp->getFsAccess(); } OsRas *OsRas::create(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId) { LinuxRasImp *pLinuxRasImp = new LinuxRasImp(pOsSysman, type, onSubdevice, subdeviceId); return static_cast(pLinuxRasImp); } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/ras/linux/os_ras_imp.h000066400000000000000000000022221422164147700277200ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "level_zero/tools/source/sysman/ras/os_ras.h" namespace L0 { class FsAccess; class LinuxSysmanImp; class LinuxRasImp : public OsRas, NEO::NonCopyableOrMovableClass { public: ze_result_t osRasGetProperties(zes_ras_properties_t &properties) override; ze_result_t osRasGetState(zes_ras_state_t &state, ze_bool_t clear) override; ze_result_t osRasGetConfig(zes_ras_config_t *config) override; ze_result_t osRasSetConfig(const zes_ras_config_t *config) override; LinuxRasImp(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId); LinuxRasImp() = default; ~LinuxRasImp() override = default; protected: zes_ras_error_type_t osRasErrorType = {}; FsAccess *pFsAccess = nullptr; LinuxSysmanImp *pLinuxSysmanImp = nullptr; private: bool isSubdevice = false; uint32_t subdeviceId = 0; uint64_t totalThreshold = 0; uint64_t categoryThreshold[ZES_MAX_RAS_ERROR_CATEGORY_COUNT] = {0}; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/ras/linux/os_ras_imp_fabric.cpp000066400000000000000000000101751422164147700315670ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/ras/linux/os_ras_imp_prelim.h" #include "sysman/linux/fs_access.h" #include "sysman/linux/os_sysman_imp.h" #include namespace L0 { void LinuxRasSourceFabric::getNodes(std::vector &nodes, uint32_t subdeviceId, FsAccess *fsAccess, const zes_ras_error_type_t &type) { const uint32_t minBoardStrappedNumber = 0; const uint32_t maxBoardStrappedNumber = 31; const uint32_t minPortId = 1; const uint32_t maxPortId = 8; nodes.clear(); for (auto boardStrappedNumber = minBoardStrappedNumber; boardStrappedNumber <= maxBoardStrappedNumber; boardStrappedNumber++) { const auto iafPathString("/sys/module/iaf/drivers/platform:iaf/iaf."); const auto boardStrappedString(iafPathString + std::to_string(boardStrappedNumber)); if (!fsAccess->directoryExists(boardStrappedString)) { continue; } const auto subDeviceString(boardStrappedString + "/sd." + std::to_string(subdeviceId)); std::vector subDeviceErrorNodes; if (type == ZES_RAS_ERROR_TYPE_CORRECTABLE) { subDeviceErrorNodes.push_back(subDeviceString + "/fw_comm_errors"); for (auto portId = minPortId; portId <= maxPortId; portId++) { subDeviceErrorNodes.push_back(subDeviceString + "/port." + std::to_string(portId) + "/link_degrades"); } } else { subDeviceErrorNodes.push_back(subDeviceString + "/sd_failure"); subDeviceErrorNodes.push_back(subDeviceString + "/fw_error"); for (auto portId = minPortId; portId <= maxPortId; portId++) { subDeviceErrorNodes.push_back(subDeviceString + "/port." + std::to_string(portId) + "/link_failures"); } } for (auto &subDeviceErrorNode : subDeviceErrorNodes) { if (ZE_RESULT_SUCCESS == fsAccess->canRead(subDeviceErrorNode)) { nodes.push_back(subDeviceErrorNode); } } } } ze_result_t LinuxRasSourceFabric::getSupportedRasErrorTypes(std::set &errorType, OsSysman *pOsSysman, ze_device_handle_t deviceHandle) { LinuxSysmanImp *pLinuxSysmanImp = static_cast(pOsSysman); NEO::Device *neoDevice = static_cast(deviceHandle)->getNEODevice(); uint32_t subDeviceIndex = neoDevice->isSubDevice() ? static_cast(neoDevice)->getSubDeviceIndex() : 0; std::vector nodes; getNodes(nodes, subDeviceIndex, &pLinuxSysmanImp->getFsAccess(), ZES_RAS_ERROR_TYPE_UNCORRECTABLE); if (nodes.size()) { errorType.insert(ZES_RAS_ERROR_TYPE_UNCORRECTABLE); } getNodes(nodes, subDeviceIndex, &pLinuxSysmanImp->getFsAccess(), ZES_RAS_ERROR_TYPE_CORRECTABLE); if (nodes.size()) { errorType.insert(ZES_RAS_ERROR_TYPE_CORRECTABLE); } return ZE_RESULT_SUCCESS; } LinuxRasSourceFabric::LinuxRasSourceFabric(OsSysman *pOsSysman, zes_ras_error_type_t type, uint32_t subDeviceId) { fsAccess = &static_cast(pOsSysman)->getFsAccess(); getNodes(errorNodes, subDeviceId, fsAccess, type); } uint64_t LinuxRasSourceFabric::getComputeErrorCount() { uint64_t currentErrorCount = 0; for (const auto &node : errorNodes) { uint64_t errorCount = 0; fsAccess->read(node, errorCount); currentErrorCount += errorCount; } return currentErrorCount; } ze_result_t LinuxRasSourceFabric::osRasGetState(zes_ras_state_t &state, ze_bool_t clear) { if (errorNodes.size() == 0) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } std::memset(state.category, 0, sizeof(zes_ras_state_t::category)); uint64_t currentComputeErrorCount = getComputeErrorCount(); if (clear) { baseComputeErrorCount = currentComputeErrorCount; currentComputeErrorCount = getComputeErrorCount(); } state.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS] = currentComputeErrorCount - baseComputeErrorCount; return ZE_RESULT_SUCCESS; } } // namespace L0compute-runtime-22.14.22890/level_zero/tools/source/sysman/ras/linux/os_ras_imp_gt.cpp000066400000000000000000000430621422164147700307540ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/ras/linux/os_ras_imp_prelim.h" #include "sysman/linux/os_sysman_imp.h" #include namespace L0 { static const std::map> categoryToListOfEventsUncorrectable = { {ZES_RAS_ERROR_CAT_CACHE_ERRORS, {"fatal-array-bist", "fatal-eu-grf", "fatal-eu-ic", "fatal-guc", "fatal-idi-parity", "fatal-l3-double", "fatal-l3-ecc-checker", "fatal-sampler", "fatal-slm", "fatal-sqidi", "fatal-tlb"}}, {ZES_RAS_ERROR_CAT_RESET, {"engine-reset"}}, {ZES_RAS_ERROR_CAT_PROGRAMMING_ERRORS, {"eu-attention"}}, {ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS, {"soc-fatal-fabric-ss0-0", "soc-fatal-fabric-ss0-1", "soc-fatal-fabric-ss0-2", "soc-fatal-fabric-ss0-3", "soc-fatal-fabric-ss1-0", "soc-fatal-fabric-ss1-1", "soc-fatal-fabric-ss1-2", "soc-fatal-fabric-ss1-3", "soc-fatal-fabric-ss1-4", "soc-fatal-hbm-ss0-0", "soc-fatal-hbm-ss0-1", "soc-fatal-hbm-ss0-2", "soc-fatal-hbm-ss0-3", "soc-fatal-hbm-ss0-4", "soc-fatal-hbm-ss0-5", "soc-fatal-hbm-ss0-6", "soc-fatal-hbm-ss0-7", "soc-fatal-hbm-ss0-8", "soc-fatal-hbm-ss0-9", "soc-fatal-hbm-ss0-10", "soc-fatal-hbm-ss0-11", "soc-fatal-hbm-ss0-12", "soc-fatal-hbm-ss0-13", "soc-fatal-hbm-ss0-14", "soc-fatal-hbm-ss0-15", "soc-fatal-hbm-ss1-0", "soc-fatal-hbm-ss1-1", "soc-fatal-hbm-ss1-2", "soc-fatal-hbm-ss1-3", "soc-fatal-hbm-ss1-4", "soc-fatal-hbm-ss1-5", "soc-fatal-hbm-ss1-6", "soc-fatal-hbm-ss1-7", "soc-fatal-hbm-ss1-8", "soc-fatal-hbm-ss1-9", "soc-fatal-hbm-ss1-10", "soc-fatal-hbm-ss1-11", "soc-fatal-hbm-ss1-12", "soc-fatal-hbm-ss1-13", "soc-fatal-hbm-ss1-14", "soc-fatal-hbm-ss1-15", "soc-fatal-mdfi-east", "soc-fatal-mdfi-south", "soc-fatal-mdfi-west", "soc-fatal-psf-csc-0", "soc-fatal-psf-csc-1", "soc-fatal-psf-csc-2", "soc-fatal-punit", "sgunit-fatal", "soc-nonfatal-fabric-ss0-0", "soc-nonfatal-fabric-ss0-1", "soc-nonfatal-fabric-ss0-2", "soc-nonfatal-fabric-ss0-3", "soc-nonfatal-fabric-ss1-0", "soc-nonfatal-fabric-ss1-1", "soc-nonfatal-fabric-ss1-2", "soc-nonfatal-fabric-ss1-3", "soc-nonfatal-fabric-ss1-4", "soc-nonfatal-hbm-ss0-0", "soc-nonfatal-hbm-ss0-1", "soc-nonfatal-hbm-ss0-2", "soc-nonfatal-hbm-ss0-3", "soc-nonfatal-hbm-ss0-4", "soc-nonfatal-hbm-ss0-5", "soc-nonfatal-hbm-ss0-6", "soc-nonfatal-hbm-ss0-7", "soc-nonfatal-hbm-ss0-8", "soc-nonfatal-hbm-ss0-9", "soc-nonfatal-hbm-ss0-10", "soc-nonfatal-hbm-ss0-11", "soc-nonfatal-hbm-ss0-12", "soc-nonfatal-hbm-ss0-13", "soc-nonfatal-hbm-ss0-14", "soc-nonfatal-hbm-ss0-15", "soc-nonfatal-hbm-ss1-0", "soc-nonfatal-hbm-ss1-1", "soc-nonfatal-hbm-ss1-2", "soc-nonfatal-hbm-ss1-3", "soc-nonfatal-hbm-ss1-4", "soc-nonfatal-hbm-ss1-5", "soc-nonfatal-hbm-ss1-6", "soc-nonfatal-hbm-ss1-7", "soc-nonfatal-hbm-ss1-8", "soc-nonfatal-hbm-ss1-9", "soc-nonfatal-hbm-ss1-10", "soc-nonfatal-hbm-ss1-11", "soc-nonfatal-hbm-ss1-12", "soc-nonfatal-hbm-ss1-13", "soc-nonfatal-hbm-ss1-14", "soc-nonfatal-hbm-ss1-15", "soc-nonfatal-mdfi-east", "soc-nonfatal-mdfi-south", "soc-nonfatal-mdfi-west", "soc-nonfatal-psf-csc-0", "soc-nonfatal-psf-csc-1", "soc-nonfatal-psf-csc-2", "soc-nonfatal-punit", "sgunit-nonfatal"}}, {ZES_RAS_ERROR_CAT_COMPUTE_ERRORS, {"fatal-fpu", "fatal-l3-fabric"}}, {ZES_RAS_ERROR_CAT_DRIVER_ERRORS, {"driver-object-migration", "driver-engine-other", "driver-ggtt", "driver-gt-interrupt", "driver-gt-other", "driver-guc-communication", "driver-rps"}}}; static const std::map> categoryToListOfEventsCorrectable = { {ZES_RAS_ERROR_CAT_CACHE_ERRORS, {"correctable-eu-grf", "correctable-eu-ic", "correctable-guc", "correctable-l3-sng", "correctable-sampler", "correctable-slm"}}, {ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS, {"soc-correctable-fabric-ss0-0", "soc-correctable-fabric-ss0-1", "soc-correctable-fabric-ss0-2", "soc-correctable-fabric-ss0-3", "soc-correctable-fabric-ss1-0", "soc-correctable-fabric-ss1-1", "soc-correctable-fabric-ss1-2", "soc-correctable-fabric-ss1-3", "soc-correctable-fabric-ss1-4", "soc-correctable-hbm-ss0-0", "soc-correctable-hbm-ss0-1", "soc-correctable-hbm-ss0-2", "soc-correctable-hbm-ss0-3", "soc-correctable-hbm-ss0-4", "soc-correctable-hbm-ss0-5", "soc-correctable-hbm-ss0-6", "soc-correctable-hbm-ss0-7", "soc-correctable-hbm-ss0-8", "soc-correctable-hbm-ss0-9", "soc-correctable-hbm-ss0-10", "soc-correctable-hbm-ss0-11", "soc-correctable-hbm-ss0-12", "soc-correctable-hbm-ss0-13", "soc-correctable-hbm-ss0-14", "soc-correctable-hbm-ss0-15", "soc-correctable-hbm-ss1-0", "soc-correctable-hbm-ss1-1", "soc-correctable-hbm-ss1-2", "soc-correctable-hbm-ss1-3", "soc-correctable-hbm-ss1-4", "soc-correctable-hbm-ss1-5", "soc-correctable-hbm-ss1-6", "soc-correctable-hbm-ss1-7", "soc-correctable-hbm-ss1-8", "soc-correctable-hbm-ss1-9", "soc-correctable-hbm-ss1-10", "soc-correctable-hbm-ss1-11", "soc-correctable-hbm-ss1-12", "soc-correctable-hbm-ss1-13", "soc-correctable-hbm-ss1-14", "soc-correctable-hbm-ss1-15", "soc-correctable-mdfi-east", "soc-correctable-mdfi-south", "soc-correctable-mdfi-west", "soc-correctable-psf-csc-0", "soc-correctable-psf-csc-1", "soc-correctable-punit", "sgunit-correctable"}}}; static void closeFd(int64_t &fd) { if (fd != -1) { close(static_cast(fd)); fd = -1; } } static ze_result_t readI915EventsDirectory(LinuxSysmanImp *pLinuxSysmanImp, std::vector &listOfEvents, std::string *eventDirectory) { // To know how many errors are supported on a platform scan // /sys/devices/i915_0000_01_00.0/events/ // all events are enumerated in sysfs at /sys/devices/i915_0000_01_00.0/events/ // For above example device is in PCI slot 0000:01:00.0: SysfsAccess *pSysfsAccess = &pLinuxSysmanImp->getSysfsAccess(); const std::string deviceDir("device"); const std::string sysDevicesDir("/sys/devices/"); std::string bdfDir; ze_result_t result = pSysfsAccess->readSymLink(deviceDir, bdfDir); if (ZE_RESULT_SUCCESS != result) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } const auto loc = bdfDir.find_last_of('/'); auto bdf = bdfDir.substr(loc + 1); std::replace(bdf.begin(), bdf.end(), ':', '_'); std::string i915DirName = "i915_" + bdf; std::string sysfsNode = sysDevicesDir + i915DirName + "/" + "events"; if (eventDirectory != nullptr) { *eventDirectory = sysfsNode; } FsAccess *pFsAccess = &pLinuxSysmanImp->getFsAccess(); result = pFsAccess->listDirectory(sysfsNode, listOfEvents); if (ZE_RESULT_SUCCESS != result) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } return ZE_RESULT_SUCCESS; } static uint64_t convertHexToUint64(std::string strVal) { auto loc = strVal.find('='); std::stringstream ss; ss << std::hex << strVal.substr(loc + 1); uint64_t config = 0; ss >> config; return config; } static bool isErrorTypeSupported(std::string pattern, std::vector &eventList) { std::regex pPattern(pattern); for (const auto &entry : eventList) { if (regex_match(entry, pPattern) == true) { return true; } } return false; } static bool getErrorType(std::vector errorPattern, std::vector &eventList) { for (auto &pattern : errorPattern) { if (isErrorTypeSupported(pattern, eventList) == true) { return true; } } return false; } void LinuxRasSourceGt::closeFds() { for (auto &memberFd : memberFds) { closeFd(memberFd); } memberFds.clear(); closeFd(groupFd); } LinuxRasSourceGt::~LinuxRasSourceGt() { closeFds(); } void LinuxRasSourceGt::getSupportedRasErrorTypes(std::set &errorType, OsSysman *pOsSysman, ze_device_handle_t deviceHandle) { LinuxSysmanImp *pLinuxSysmanImp = static_cast(pOsSysman); std::vector listOfEvents = {}; ze_result_t result = readI915EventsDirectory(pLinuxSysmanImp, listOfEvents, nullptr); if (result != ZE_RESULT_SUCCESS) { return; } ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; Device::fromHandle(deviceHandle)->getProperties(&deviceProperties); bool onSubDevice = deviceProperties.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE; uint32_t subDeviceId = deviceProperties.subdeviceId; std::vector uncorrectablePattern; std::vector correctablePattern; // For device with no subDevice error entries are of form error-- // and for device having subDevice error entries are of form error-gt-- uncorrectablePattern.push_back("^error--driver.*"); if (onSubDevice == false) { correctablePattern.push_back("^error--correctable.*"); correctablePattern.push_back("^error--soc-correctable.*"); uncorrectablePattern.push_back("^error--engine-reset.*"); uncorrectablePattern.push_back("^error--eu-attention.*"); uncorrectablePattern.push_back("^error--fatal.*"); uncorrectablePattern.push_back("^error--soc-fatal.*"); uncorrectablePattern.push_back("^error--soc-nonfatal.*"); } else { correctablePattern.push_back("^error-gt" + std::to_string(subDeviceId) + "--correctable.*"); correctablePattern.push_back("^error-gt" + std::to_string(subDeviceId) + "--soc-correctable.*"); uncorrectablePattern.push_back("^error-gt" + std::to_string(subDeviceId) + "--driver.*"); uncorrectablePattern.push_back("^error-gt" + std::to_string(subDeviceId) + "--fatal.*"); uncorrectablePattern.push_back("^error-gt" + std::to_string(subDeviceId) + "--soc-fatal.*"); uncorrectablePattern.push_back("^error-gt" + std::to_string(subDeviceId) + "--soc-nonfatal.*"); uncorrectablePattern.push_back("^error-gt" + std::to_string(subDeviceId) + "--eu-attention.*"); uncorrectablePattern.push_back("^error-gt" + std::to_string(subDeviceId) + "--engine-reset.*"); } if (getErrorType(correctablePattern, listOfEvents) == true) { errorType.insert(ZES_RAS_ERROR_TYPE_CORRECTABLE); } if (getErrorType(uncorrectablePattern, listOfEvents) == true) { errorType.insert(ZES_RAS_ERROR_TYPE_UNCORRECTABLE); } } ze_result_t LinuxRasSourceGt::osRasGetState(zes_ras_state_t &state, ze_bool_t clear) { if (clear == true) { closeFds(); totalEventCount = 0; memset(state.category, 0, sizeof(state.category)); memset(initialErrorCount, 0, sizeof(initialErrorCount)); } initRasErrors(clear); // Iterate over all the file descriptor values present in vector which is mapped to given ras error category // Use the file descriptors to read pmu counters and add all the errors corresponding to the ras error category if (groupFd < 0) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } std::map> categoryToEvent; if (osRasErrorType == ZES_RAS_ERROR_TYPE_CORRECTABLE) { categoryToEvent = categoryToListOfEventsCorrectable; } if (osRasErrorType == ZES_RAS_ERROR_TYPE_UNCORRECTABLE) { categoryToEvent = categoryToListOfEventsUncorrectable; } std::vector data(2 + totalEventCount, 0); // In data[], event count starts from second index, first value gives number of events and second value is for timestamp if (pPmuInterface->pmuRead(static_cast(groupFd), data.data(), sizeof(uint64_t) * data.size()) < 0) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } /* The data buffer retrieved after reading pmu counters is parsed to get the error count for each suberror category */ uint64_t initialIndex = 2; // Initial index in the buffer from which the data be parsed begins for (auto errorCat = errorCategoryToEventCount.begin(); errorCat != errorCategoryToEventCount.end(); errorCat++) { uint64_t errorCount = 0; uint64_t j = 0; for (; j < errorCat->second; j++) { errorCount += data[initialIndex + j]; } state.category[errorCat->first] = errorCount + initialErrorCount[errorCat->first]; initialIndex += j; } return ZE_RESULT_SUCCESS; } ze_result_t LinuxRasSourceGt::getPmuConfig( const std::string &eventDirectory, const std::vector &listOfEvents, const std::string &errorFileToGetConfig, std::string &pmuConfig) { auto findErrorInList = std::find(listOfEvents.begin(), listOfEvents.end(), errorFileToGetConfig); if (findErrorInList == listOfEvents.end()) { return ZE_RESULT_ERROR_UNKNOWN; } return pFsAccess->read(eventDirectory + "/" + errorFileToGetConfig, pmuConfig); } ze_result_t LinuxRasSourceGt::getBootUpErrorCountFromSysfs( std::string nameOfError, const std::string &errorCounterDir, uint64_t &errorVal) { std::replace(nameOfError.begin(), nameOfError.end(), '-', '_'); // replace - with _ to convert name of pmu config node to name of sysfs node return pSysfsAccess->read(errorCounterDir + "/" + nameOfError, errorVal); } void LinuxRasSourceGt::initRasErrors(ze_bool_t clear) { // if already initialized if (groupFd >= 0) { return; } std::string eventDirectory; std::vector listOfEvents = {}; ze_result_t result = readI915EventsDirectory(pLinuxSysmanImp, listOfEvents, &eventDirectory); if (result != ZE_RESULT_SUCCESS) { return; } std::map> categoryToListOfEvents; if (osRasErrorType == ZES_RAS_ERROR_TYPE_CORRECTABLE) { categoryToListOfEvents = categoryToListOfEventsCorrectable; } if (osRasErrorType == ZES_RAS_ERROR_TYPE_UNCORRECTABLE) { categoryToListOfEvents = categoryToListOfEventsUncorrectable; } std::string errorPrefix = "error--"; // prefix string of the file containing config value for pmu counters std::string errorCounterDir = "gt/gt0/error_counter"; // Directory containing the sysfs nodes which in turn contains initial value of error count if (isSubdevice == true) { errorPrefix = "error-gt" + std::to_string(subdeviceId) + "--"; errorCounterDir = "gt/gt" + std::to_string(subdeviceId) + "/error_counter"; } // Following loop retrieves initial count of errors from sysfs and pmu config values for each ras error // PMU: error-- Ex:- error--engine-reset (config with no subdevice) // PMU: error-gt-- Ex:- error-gt0--engine-reset (config with subdevices) // PMU: error-- Ex:- error--driver-object-migration (config for device level errors) // Sysfs: card0/gt/gt0/error_counter/ Ex:- gt/gt0/error_counter/engine_reset (sysfs with no subdevice) // Sysfs: card0/gt/gt/error_counter/ Ex:- gt/gt1/error_counter/engine_reset (sysfs with dubdevices) // Sysfs: error_counter/ Ex:- error_counter/driver_object_migration (sysfs for error which occur at device level) for (auto const &rasErrorCatToListOfEvents : categoryToListOfEvents) { uint64_t eventCount = 0; uint64_t errorCount = 0; for (auto const &nameOfError : rasErrorCatToListOfEvents.second) { std::string errorPrefixLocal = errorPrefix; std::string errorCounterDirLocal = errorCounterDir; if (nameOfError == "driver-object-migration") { // check for errors which occur at device level errorCounterDirLocal = "error_counter"; errorPrefixLocal = "error--"; } uint64_t initialErrorVal = 0; if (clear == false) { result = getBootUpErrorCountFromSysfs(nameOfError, errorCounterDirLocal, initialErrorVal); if (result != ZE_RESULT_SUCCESS) { continue; } } std::string pmuConfig; result = getPmuConfig(eventDirectory, listOfEvents, errorPrefixLocal + nameOfError, pmuConfig); if (result != ZE_RESULT_SUCCESS) { continue; } uint64_t config = convertHexToUint64(pmuConfig); if (groupFd == -1) { groupFd = pPmuInterface->pmuInterfaceOpen(config, -1, PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_GROUP); // To get file descriptor of the group leader if (groupFd < 0) { return; } } else { // The rest of the group members are created with subsequent calls with groupFd being set to the file descriptor of the group leader memberFds.push_back(pPmuInterface->pmuInterfaceOpen(config, static_cast(groupFd), PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_GROUP)); } eventCount++; errorCount += initialErrorVal; } initialErrorCount[rasErrorCatToListOfEvents.first] = errorCount; errorCategoryToEventCount[rasErrorCatToListOfEvents.first] = eventCount; totalEventCount += eventCount; } } LinuxRasSourceGt::LinuxRasSourceGt(LinuxSysmanImp *pLinuxSysmanImp, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId) : pLinuxSysmanImp(pLinuxSysmanImp), osRasErrorType(type), isSubdevice(onSubdevice), subdeviceId(subdeviceId) { pPmuInterface = pLinuxSysmanImp->getPmuInterface(); pFsAccess = &pLinuxSysmanImp->getFsAccess(); pSysfsAccess = &pLinuxSysmanImp->getSysfsAccess(); } } // namespace L0compute-runtime-22.14.22890/level_zero/tools/source/sysman/ras/linux/os_ras_imp_hbm.cpp000066400000000000000000000036641422164147700311140ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/ras/linux/os_ras_imp_prelim.h" #include "sysman/linux/os_sysman_imp.h" namespace L0 { void LinuxRasSourceHbm::getSupportedRasErrorTypes(std::set &errorType, OsSysman *pOsSysman, ze_device_handle_t deviceHandle) { LinuxSysmanImp *pLinuxSysmanImp = static_cast(pOsSysman); FirmwareUtil *pFwInterface = pLinuxSysmanImp->getFwUtilInterface(); if (pFwInterface != nullptr) { errorType.insert(ZES_RAS_ERROR_TYPE_CORRECTABLE); errorType.insert(ZES_RAS_ERROR_TYPE_UNCORRECTABLE); } } ze_result_t LinuxRasSourceHbm::osRasGetState(zes_ras_state_t &state, ze_bool_t clear) { if (pFwInterface == nullptr) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } uint32_t subDeviceCount = 0; pDevice->getSubDevices(&subDeviceCount, nullptr); if (clear == true) { uint64_t errorCount = 0; ze_result_t result = pFwInterface->fwGetMemoryErrorCount(osRasErrorType, subDeviceCount, subdeviceId, errorCount); if (result != ZE_RESULT_SUCCESS) { return result; } errorBaseline = errorCount; // during clear update the error baseline value } uint64_t errorCount = 0; ze_result_t result = pFwInterface->fwGetMemoryErrorCount(osRasErrorType, subDeviceCount, subdeviceId, errorCount); if (result != ZE_RESULT_SUCCESS) { return result; } state.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS] = errorCount - errorBaseline; return ZE_RESULT_SUCCESS; } LinuxRasSourceHbm::LinuxRasSourceHbm(LinuxSysmanImp *pLinuxSysmanImp, zes_ras_error_type_t type, uint32_t subdeviceId) : pLinuxSysmanImp(pLinuxSysmanImp), osRasErrorType(type), subdeviceId(subdeviceId) { pFwInterface = pLinuxSysmanImp->getFwUtilInterface(); pDevice = pLinuxSysmanImp->getDeviceHandle(); } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/ras/linux/os_ras_imp_prelim.cpp000066400000000000000000000065321422164147700316330ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/ras/linux/os_ras_imp_prelim.h" #include "sysman/linux/os_sysman_imp.h" namespace L0 { void OsRas::getSupportedRasErrorTypes(std::set &errorType, OsSysman *pOsSysman, ze_device_handle_t deviceHandle) { constexpr auto maxErrorTypes = 2; LinuxRasSourceGt::getSupportedRasErrorTypes(errorType, pOsSysman, deviceHandle); if (errorType.size() < maxErrorTypes) { LinuxRasSourceFabric::getSupportedRasErrorTypes(errorType, pOsSysman, deviceHandle); if (errorType.size() < maxErrorTypes) { LinuxRasSourceHbm::getSupportedRasErrorTypes(errorType, pOsSysman, deviceHandle); } } } ze_result_t LinuxRasImp::osRasGetConfig(zes_ras_config_t *config) { config->totalThreshold = totalThreshold; memcpy(config->detailedThresholds.category, categoryThreshold, sizeof(config->detailedThresholds.category)); return ZE_RESULT_SUCCESS; } ze_result_t LinuxRasImp::osRasSetConfig(const zes_ras_config_t *config) { if (pFsAccess->isRootUser() == true) { totalThreshold = config->totalThreshold; memcpy(categoryThreshold, config->detailedThresholds.category, sizeof(config->detailedThresholds.category)); return ZE_RESULT_SUCCESS; } return ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS; } ze_result_t LinuxRasImp::osRasGetProperties(zes_ras_properties_t &properties) { properties.pNext = nullptr; properties.type = osRasErrorType; properties.onSubdevice = isSubdevice; properties.subdeviceId = subdeviceId; return ZE_RESULT_SUCCESS; } ze_result_t LinuxRasImp::osRasGetState(zes_ras_state_t &state, ze_bool_t clear) { if (clear == true) { if (pFsAccess->isRootUser() == false) { return ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS; } } ze_result_t result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; for (auto &rasSource : rasSources) { zes_ras_state_t localState = {}; ze_result_t localResult = rasSource->osRasGetState(localState, clear); if (localResult != ZE_RESULT_SUCCESS) { continue; } for (int i = 0; i < ZES_MAX_RAS_ERROR_CATEGORY_COUNT; i++) { state.category[i] += localState.category[i]; } result = ZE_RESULT_SUCCESS; } return result; } void LinuxRasImp::initSources() { rasSources.push_back(std::make_unique(pLinuxSysmanImp, osRasErrorType, isSubdevice, subdeviceId)); rasSources.push_back(std::make_unique(pLinuxSysmanImp, osRasErrorType, subdeviceId)); rasSources.push_back(std::make_unique(pLinuxSysmanImp, osRasErrorType, subdeviceId)); } LinuxRasImp::LinuxRasImp(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId) : osRasErrorType(type), isSubdevice(onSubdevice), subdeviceId(subdeviceId) { pLinuxSysmanImp = static_cast(pOsSysman); pFsAccess = &pLinuxSysmanImp->getFsAccess(); initSources(); } OsRas *OsRas::create(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId) { LinuxRasImp *pLinuxRasImp = new LinuxRasImp(pOsSysman, type, onSubdevice, subdeviceId); return static_cast(pLinuxRasImp); } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/ras/linux/os_ras_imp_prelim.h000066400000000000000000000107071422164147700312770ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "level_zero/tools/source/sysman/ras/os_ras.h" #include #include #include #include namespace L0 { class FsAccess; class SysfsAccess; class PmuInterface; class LinuxSysmanImp; class LinuxRasSources; class FirmwareUtil; struct Device; class LinuxRasImp : public OsRas, NEO::NonCopyableOrMovableClass { public: ze_result_t osRasGetProperties(zes_ras_properties_t &properties) override; ze_result_t osRasGetState(zes_ras_state_t &state, ze_bool_t clear) override; ze_result_t osRasGetConfig(zes_ras_config_t *config) override; ze_result_t osRasSetConfig(const zes_ras_config_t *config) override; LinuxRasImp(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId); LinuxRasImp() = default; ~LinuxRasImp() override = default; protected: zes_ras_error_type_t osRasErrorType = {}; FsAccess *pFsAccess = nullptr; LinuxSysmanImp *pLinuxSysmanImp = nullptr; std::vector> rasSources = {}; private: void initSources(); bool isSubdevice = false; uint32_t subdeviceId = 0; uint64_t totalThreshold = 0; uint64_t categoryThreshold[ZES_MAX_RAS_ERROR_CATEGORY_COUNT] = {0}; }; class LinuxRasSources : NEO::NonCopyableOrMovableClass { public: virtual ze_result_t osRasGetState(zes_ras_state_t &state, ze_bool_t clear) = 0; virtual ~LinuxRasSources() = default; }; class LinuxRasSourceGt : public LinuxRasSources { public: virtual ze_result_t osRasGetState(zes_ras_state_t &state, ze_bool_t clear) override; static void getSupportedRasErrorTypes(std::set &errorType, OsSysman *pOsSysman, ze_device_handle_t deviceHandle); LinuxRasSourceGt(LinuxSysmanImp *pLinuxSysmanImp, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId); LinuxRasSourceGt() = default; virtual ~LinuxRasSourceGt(); protected: LinuxSysmanImp *pLinuxSysmanImp = nullptr; zes_ras_error_type_t osRasErrorType = {}; PmuInterface *pPmuInterface = nullptr; FsAccess *pFsAccess = nullptr; SysfsAccess *pSysfsAccess = nullptr; private: void initRasErrors(ze_bool_t clear); ze_result_t getPmuConfig( const std::string &eventDirectory, const std::vector &listOfEvents, const std::string &errorFileToGetConfig, std::string &pmuConfig); ze_result_t getBootUpErrorCountFromSysfs( std::string nameOfError, const std::string &errorCounterDir, uint64_t &errorVal); void closeFds(); int64_t groupFd = -1; std::vector memberFds = {}; uint64_t initialErrorCount[ZES_MAX_RAS_ERROR_CATEGORY_COUNT] = {0}; std::map errorCategoryToEventCount; uint64_t totalEventCount = 0; bool isSubdevice = false; uint32_t subdeviceId = 0; }; class LinuxRasSourceFabric : public LinuxRasSources { public: static ze_result_t getSupportedRasErrorTypes(std::set &errorType, OsSysman *pOsSysman, ze_device_handle_t deviceHandle); LinuxRasSourceFabric(OsSysman *pOsSysman, zes_ras_error_type_t type, uint32_t subDeviceId); ~LinuxRasSourceFabric() = default; ze_result_t osRasGetState(zes_ras_state_t &state, ze_bool_t clear) override; private: FsAccess *fsAccess = nullptr; std::vector errorNodes = {}; uint64_t baseComputeErrorCount = 0; uint64_t getComputeErrorCount(); static void getNodes(std::vector &nodes, uint32_t subdeviceId, FsAccess *fsAccess, const zes_ras_error_type_t &type); }; class LinuxRasSourceHbm : public LinuxRasSources { public: virtual ze_result_t osRasGetState(zes_ras_state_t &state, ze_bool_t clear) override; static void getSupportedRasErrorTypes(std::set &errorType, OsSysman *pOsSysman, ze_device_handle_t deviceHandle); LinuxRasSourceHbm(LinuxSysmanImp *pLinuxSysmanImp, zes_ras_error_type_t type, uint32_t subdeviceId); LinuxRasSourceHbm() = default; virtual ~LinuxRasSourceHbm() override{}; protected: LinuxSysmanImp *pLinuxSysmanImp = nullptr; zes_ras_error_type_t osRasErrorType = {}; FirmwareUtil *pFwInterface = nullptr; Device *pDevice = nullptr; private: uint64_t errorBaseline = 0; uint32_t subdeviceId = 0; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/ras/linux/udev/000077500000000000000000000000001422164147700263615ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/ras/linux/udev/99-drm_ze_intel_gpu.rules000066400000000000000000000004521422164147700332230ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # ACTION=="change", \ SUBSYSTEM=="drm", \ ENV{RESET}=="1", \ RUN+="/bin/sh -c 'val=`cat /var/lib/libze_intel_gpu/ras_reset_count` && newval=`expr $val + 1` && echo $newval > /var/lib/libze_intel_gpu/ras_reset_count'" compute-runtime-22.14.22890/level_zero/tools/source/sysman/ras/linux/udev/counter_init000066400000000000000000000000021422164147700307760ustar00rootroot000000000000000 compute-runtime-22.14.22890/level_zero/tools/source/sysman/ras/os_ras.h000066400000000000000000000015041422164147700257160ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include #include namespace L0 { struct OsSysman; class OsRas { public: virtual ze_result_t osRasGetProperties(zes_ras_properties_t &properties) = 0; virtual ze_result_t osRasGetState(zes_ras_state_t &state, ze_bool_t clear) = 0; virtual ze_result_t osRasGetConfig(zes_ras_config_t *config) = 0; virtual ze_result_t osRasSetConfig(const zes_ras_config_t *config) = 0; static OsRas *create(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId); static void getSupportedRasErrorTypes(std::set &errorType, OsSysman *pOsSysman, ze_device_handle_t deviceHandle); virtual ~OsRas() = default; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/ras/ras.cpp000066400000000000000000000027551422164147700255610ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/basic_math.h" #include "level_zero/tools/source/sysman/ras/ras_imp.h" namespace L0 { void RasHandleContext::releaseRasHandles() { for (Ras *pRas : handleList) { delete pRas; } handleList.clear(); } RasHandleContext::~RasHandleContext() { releaseRasHandles(); } void RasHandleContext::createHandle(zes_ras_error_type_t type, ze_device_handle_t deviceHandle) { Ras *pRas = new RasImp(pOsSysman, type, deviceHandle); handleList.push_back(pRas); } void RasHandleContext::init(std::vector &deviceHandles) { for (const auto &deviceHandle : deviceHandles) { std::set errorType = {}; OsRas::getSupportedRasErrorTypes(errorType, pOsSysman, deviceHandle); for (const auto &type : errorType) { createHandle(type, deviceHandle); } } } ze_result_t RasHandleContext::rasGet(uint32_t *pCount, zes_ras_handle_t *phRas) { uint32_t handleListSize = static_cast(handleList.size()); uint32_t numToCopy = std::min(*pCount, handleListSize); if (0 == *pCount || *pCount > handleListSize) { *pCount = handleListSize; } if (nullptr != phRas) { for (uint32_t i = 0; i < numToCopy; i++) { phRas[i] = handleList[i]->toHandle(); } } return ZE_RESULT_SUCCESS; } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/ras/ras.h000066400000000000000000000026101422164147700252140ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/device/device.h" #include #include struct _zes_ras_handle_t { virtual ~_zes_ras_handle_t() = default; }; namespace L0 { struct OsSysman; class Ras : _zes_ras_handle_t { public: virtual ze_result_t rasGetProperties(zes_ras_properties_t *pProperties) = 0; virtual ze_result_t rasGetConfig(zes_ras_config_t *pConfig) = 0; virtual ze_result_t rasSetConfig(const zes_ras_config_t *pConfig) = 0; virtual ze_result_t rasGetState(zes_ras_state_t *pState, ze_bool_t clear) = 0; static Ras *fromHandle(zes_ras_handle_t handle) { return static_cast(handle); } inline zes_ras_handle_t toHandle() { return this; } bool isRasErrorSupported = false; zes_ras_error_type_t rasErrorType{}; }; struct RasHandleContext { RasHandleContext(OsSysman *pOsSysman) : pOsSysman(pOsSysman){}; MOCKABLE_VIRTUAL ~RasHandleContext(); MOCKABLE_VIRTUAL void init(std::vector &deviceHandles); void releaseRasHandles(); ze_result_t rasGet(uint32_t *pCount, zes_ras_handle_t *phRas); OsSysman *pOsSysman = nullptr; std::vector handleList = {}; private: void createHandle(zes_ras_error_type_t type, ze_device_handle_t deviceHandle); }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/ras/ras_imp.cpp000066400000000000000000000026161422164147700264220ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/ras/ras_imp.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/string.h" #include namespace L0 { ze_result_t RasImp::rasGetProperties(zes_ras_properties_t *pProperties) { *pProperties = rasProperties; return ZE_RESULT_SUCCESS; } ze_result_t RasImp::rasGetConfig(zes_ras_config_t *pConfig) { return pOsRas->osRasGetConfig(pConfig); } ze_result_t RasImp::rasSetConfig(const zes_ras_config_t *pConfig) { return pOsRas->osRasSetConfig(pConfig); } ze_result_t RasImp::rasGetState(zes_ras_state_t *pState, ze_bool_t clear) { memset(pState->category, 0, sizeof(pState->category)); return pOsRas->osRasGetState(*pState, clear); } void RasImp::init() { pOsRas->osRasGetProperties(rasProperties); } RasImp::RasImp(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_device_handle_t handle) : deviceHandle(handle) { ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; Device::fromHandle(deviceHandle)->getProperties(&deviceProperties); pOsRas = OsRas::create(pOsSysman, type, deviceProperties.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE, deviceProperties.subdeviceId); init(); } RasImp::~RasImp() { if (nullptr != pOsRas) { delete pOsRas; } } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/ras/ras_imp.h000066400000000000000000000017071422164147700260670ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "level_zero/tools/source/sysman/ras/os_ras.h" #include "level_zero/tools/source/sysman/ras/ras.h" namespace L0 { class RasImp : public Ras, NEO::NonCopyableOrMovableClass { public: ze_result_t rasGetProperties(zes_ras_properties_t *pProperties) override; ze_result_t rasGetConfig(zes_ras_config_t *pConfig) override; ze_result_t rasSetConfig(const zes_ras_config_t *pConfig) override; ze_result_t rasGetState(zes_ras_state_t *pConfig, ze_bool_t clear) override; RasImp() = default; RasImp(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_device_handle_t deviceHandle); ~RasImp() override; OsRas *pOsRas = nullptr; void init(); private: zes_ras_properties_t rasProperties = {}; ze_device_handle_t deviceHandle = {}; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/ras/windows/000077500000000000000000000000001422164147700257515ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/ras/windows/CMakeLists.txt000077500000000000000000000006751422164147700305240ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_RAS_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/os_ras_imp.cpp ) if(WIN32) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_RAS_WINDOWS} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_RAS_WINDOWS ${L0_SRCS_TOOLS_SYSMAN_RAS_WINDOWS}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/ras/windows/os_ras_imp.cpp000066400000000000000000000025071422164147700306140ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "sysman/ras/os_ras.h" namespace L0 { class WddmRasImp : public OsRas { ze_result_t osRasGetProperties(zes_ras_properties_t &properties) override; ze_result_t osRasGetState(zes_ras_state_t &state, ze_bool_t clear) override; ze_result_t osRasGetConfig(zes_ras_config_t *config) override; ze_result_t osRasSetConfig(const zes_ras_config_t *config) override; }; void OsRas::getSupportedRasErrorTypes(std::set &errorType, OsSysman *pOsSysman, ze_device_handle_t deviceHandle) {} ze_result_t WddmRasImp::osRasGetProperties(zes_ras_properties_t &properties) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t WddmRasImp::osRasGetConfig(zes_ras_config_t *config) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t WddmRasImp::osRasSetConfig(const zes_ras_config_t *config) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t WddmRasImp::osRasGetState(zes_ras_state_t &state, ze_bool_t clear) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } OsRas *OsRas::create(OsSysman *pOsSysman, zes_ras_error_type_t type, ze_bool_t onSubdevice, uint32_t subdeviceId) { WddmRasImp *pWddmRasImp = new WddmRasImp(); return static_cast(pWddmRasImp); } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/scheduler/000077500000000000000000000000001422164147700254505ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/scheduler/CMakeLists.txt000066400000000000000000000012471422164147700302140ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_SCHEDULER ${CMAKE_CURRENT_SOURCE_DIR}/scheduler.h ${CMAKE_CURRENT_SOURCE_DIR}/scheduler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/scheduler_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/scheduler_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/os_scheduler.h ) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_SCHEDULER} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) add_subdirectories() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_SCHEDULER ${L0_SRCS_TOOLS_SYSMAN_SCHEDULER}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/scheduler/linux/000077500000000000000000000000001422164147700266075ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/scheduler/linux/CMakeLists.txt000066400000000000000000000014361422164147700313530ustar00rootroot00000000000000# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_SCHEDULER_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/os_scheduler_imp.h ) if(NEO_ENABLE_i915_PRELIM_DETECTION) list(APPEND L0_SRCS_TOOLS_SYSMAN_SCHEDULER_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/os_scheduler_imp_prelim.cpp ) else() list(APPEND L0_SRCS_TOOLS_SYSMAN_SCHEDULER_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/os_scheduler_imp.cpp ) endif() if(UNIX) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_SCHEDULER_LINUX} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_SCHEDULER_LINUX ${L0_SRCS_TOOLS_SYSMAN_SCHEDULER_LINUX}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/scheduler/linux/os_scheduler_imp.cpp000066400000000000000000000233471422164147700326500ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/scheduler/linux/os_scheduler_imp.h" #include "sysman/linux/os_sysman_imp.h" namespace L0 { const std::string LinuxSchedulerImp::preemptTimeoutMilliSecs("preempt_timeout_ms"); const std::string LinuxSchedulerImp::defaultPreemptTimeouttMilliSecs(".defaults/preempt_timeout_ms"); const std::string LinuxSchedulerImp::timesliceDurationMilliSecs("timeslice_duration_ms"); const std::string LinuxSchedulerImp::defaultTimesliceDurationMilliSecs(".defaults/timeslice_duration_ms"); const std::string LinuxSchedulerImp::heartbeatIntervalMilliSecs("heartbeat_interval_ms"); const std::string LinuxSchedulerImp::defaultHeartbeatIntervalMilliSecs(".defaults/heartbeat_interval_ms"); const std::string LinuxSchedulerImp::engineDir("engine"); ze_result_t LinuxSchedulerImp::getProperties(zes_sched_properties_t &schedProperties) { schedProperties.onSubdevice = onSubdevice; schedProperties.subdeviceId = subdeviceId; schedProperties.canControl = canControlScheduler(); schedProperties.engines = this->engineType; schedProperties.supportedModes = (1 << ZES_SCHED_MODE_TIMEOUT) | (1 << ZES_SCHED_MODE_TIMESLICE) | (1 << ZES_SCHED_MODE_EXCLUSIVE); return ZE_RESULT_SUCCESS; } ze_result_t LinuxSchedulerImp::getPreemptTimeout(uint64_t &timeout, ze_bool_t getDefault) { ze_result_t result = ZE_RESULT_ERROR_UNKNOWN; uint32_t i = 0; std::vector timeoutVec = {}; timeoutVec.resize(listOfEngines.size()); for (const auto &engineName : listOfEngines) { if (getDefault) { result = pSysfsAccess->read(engineDir + "/" + engineName + "/" + defaultPreemptTimeouttMilliSecs, timeout); } else { result = pSysfsAccess->read(engineDir + "/" + engineName + "/" + preemptTimeoutMilliSecs, timeout); } if (result == ZE_RESULT_SUCCESS) { timeout = timeout * milliSecsToMicroSecs; timeoutVec[i] = timeout; i++; } else { if (result == ZE_RESULT_ERROR_NOT_AVAILABLE) { result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } return result; } } // check if all engines of the same type have the same scheduling param values if (std::adjacent_find(timeoutVec.begin(), timeoutVec.end(), std::not_equal_to<>()) == timeoutVec.end()) { timeout = timeoutVec[0]; return result; } else { return ZE_RESULT_ERROR_UNKNOWN; } } ze_result_t LinuxSchedulerImp::getTimesliceDuration(uint64_t ×lice, ze_bool_t getDefault) { ze_result_t result = ZE_RESULT_ERROR_UNKNOWN; uint32_t i = 0; std::vector timesliceVec = {}; timesliceVec.resize(listOfEngines.size()); for (const auto &engineName : listOfEngines) { if (getDefault) { result = pSysfsAccess->read(engineDir + "/" + engineName + "/" + defaultTimesliceDurationMilliSecs, timeslice); } else { result = pSysfsAccess->read(engineDir + "/" + engineName + "/" + timesliceDurationMilliSecs, timeslice); } if (result == ZE_RESULT_SUCCESS) { timeslice = timeslice * milliSecsToMicroSecs; timesliceVec[i] = timeslice; i++; } else { if (result == ZE_RESULT_ERROR_NOT_AVAILABLE) { result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } return result; } } // check if all engines of the same type have the same scheduling param values if (std::adjacent_find(timesliceVec.begin(), timesliceVec.end(), std::not_equal_to<>()) == timesliceVec.end()) { timeslice = timesliceVec[0]; return result; } else { return ZE_RESULT_ERROR_UNKNOWN; } } ze_result_t LinuxSchedulerImp::getHeartbeatInterval(uint64_t &heartbeat, ze_bool_t getDefault) { ze_result_t result = ZE_RESULT_ERROR_UNKNOWN; uint32_t i = 0; std::vector heartbeatVec = {}; heartbeatVec.resize(listOfEngines.size()); for (const auto &engineName : listOfEngines) { if (getDefault) { result = pSysfsAccess->read(engineDir + "/" + engineName + "/" + defaultHeartbeatIntervalMilliSecs, heartbeat); } else { result = pSysfsAccess->read(engineDir + "/" + engineName + "/" + heartbeatIntervalMilliSecs, heartbeat); } if (result == ZE_RESULT_SUCCESS) { heartbeat = heartbeat * milliSecsToMicroSecs; heartbeatVec[i] = heartbeat; i++; } else { if (result == ZE_RESULT_ERROR_NOT_AVAILABLE) { result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } return result; } } // check if all engines of the same type have the same scheduling param values if (std::adjacent_find(heartbeatVec.begin(), heartbeatVec.end(), std::not_equal_to<>()) == heartbeatVec.end()) { heartbeat = heartbeatVec[0]; return result; } else { return ZE_RESULT_ERROR_UNKNOWN; } } ze_result_t LinuxSchedulerImp::setPreemptTimeout(uint64_t timeout) { timeout = timeout / milliSecsToMicroSecs; ze_result_t result = ZE_RESULT_ERROR_UNKNOWN; for (const auto &engineName : listOfEngines) { result = pSysfsAccess->write(engineDir + "/" + engineName + "/" + preemptTimeoutMilliSecs, timeout); if (result != ZE_RESULT_SUCCESS) { if (result == ZE_RESULT_ERROR_NOT_AVAILABLE) { result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } return result; } } return result; } ze_result_t LinuxSchedulerImp::setTimesliceDuration(uint64_t timeslice) { timeslice = timeslice / milliSecsToMicroSecs; ze_result_t result = ZE_RESULT_ERROR_UNKNOWN; for (const auto &engineName : listOfEngines) { result = pSysfsAccess->write(engineDir + "/" + engineName + "/" + timesliceDurationMilliSecs, timeslice); if (result != ZE_RESULT_SUCCESS) { if (result == ZE_RESULT_ERROR_NOT_AVAILABLE) { result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } return result; } } return result; } ze_result_t LinuxSchedulerImp::setHeartbeatInterval(uint64_t heartbeat) { heartbeat = heartbeat / milliSecsToMicroSecs; ze_result_t result = ZE_RESULT_ERROR_UNKNOWN; for (const auto &engineName : listOfEngines) { result = pSysfsAccess->write(engineDir + "/" + engineName + "/" + heartbeatIntervalMilliSecs, heartbeat); if (result != ZE_RESULT_SUCCESS) { if (result == ZE_RESULT_ERROR_NOT_AVAILABLE) { result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } return result; } } return result; } ze_bool_t LinuxSchedulerImp::canControlScheduler() { return 1; } static const std::multimap level0EngineTypeToSysfsEngineMap = { {ZES_ENGINE_TYPE_FLAG_RENDER, "rcs"}, {ZES_ENGINE_TYPE_FLAG_DMA, "bcs"}, {ZES_ENGINE_TYPE_FLAG_MEDIA, "vcs"}, {ZES_ENGINE_TYPE_FLAG_OTHER, "vecs"}}; static ze_result_t getNumEngineTypeAndInstancesForDevice(std::map> &mapOfEngines, SysfsAccess *pSysfsAccess) { std::vector localListOfAllEngines = {}; auto result = pSysfsAccess->scanDirEntries(LinuxSchedulerImp::engineDir, localListOfAllEngines); if (ZE_RESULT_SUCCESS != result) { if (result == ZE_RESULT_ERROR_NOT_AVAILABLE) { result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } return result; } for_each(localListOfAllEngines.begin(), localListOfAllEngines.end(), [&](std::string &mappedEngine) { for (auto itr = level0EngineTypeToSysfsEngineMap.begin(); itr != level0EngineTypeToSysfsEngineMap.end(); itr++) { char digits[] = "0123456789"; auto mappedEngineName = mappedEngine.substr(0, mappedEngine.find_first_of(digits, 0)); if (0 == mappedEngineName.compare(itr->second.c_str())) { auto ret = mapOfEngines.find(itr->first); if (ret != mapOfEngines.end()) { ret->second.push_back(mappedEngine); } else { std::vector engineVec = {}; engineVec.push_back(mappedEngine); mapOfEngines.emplace(itr->first, engineVec); } } } }); return result; } ze_result_t OsScheduler::getNumEngineTypeAndInstances( std::map> &mapOfEngines, OsSysman *pOsSysman, ze_device_handle_t subdeviceHandle) { LinuxSysmanImp *pLinuxSysmanImp = static_cast(pOsSysman); auto pSysfsAccess = &pLinuxSysmanImp->getSysfsAccess(); return getNumEngineTypeAndInstancesForDevice(mapOfEngines, pSysfsAccess); } LinuxSchedulerImp::LinuxSchedulerImp( OsSysman *pOsSysman, zes_engine_type_flag_t type, std::vector &listOfEngines, ze_bool_t isSubdevice, uint32_t subdeviceId) : engineType(type), onSubdevice(isSubdevice), subdeviceId(subdeviceId) { LinuxSysmanImp *pLinuxSysmanImp = static_cast(pOsSysman); pSysfsAccess = &pLinuxSysmanImp->getSysfsAccess(); this->listOfEngines = listOfEngines; } OsScheduler *OsScheduler::create( OsSysman *pOsSysman, zes_engine_type_flag_t type, std::vector &listOfEngines, ze_bool_t isSubdevice, uint32_t subdeviceId) { LinuxSchedulerImp *pLinuxSchedulerImp = new LinuxSchedulerImp(pOsSysman, type, listOfEngines, isSubdevice, subdeviceId); return static_cast(pLinuxSchedulerImp); } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/scheduler/linux/os_scheduler_imp.h000066400000000000000000000042051422164147700323050ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/linux/drm_neo.h" #include "sysman/scheduler/scheduler_imp.h" #include namespace L0 { class SysfsAccess; struct Device; // Following below mappings of scheduler properties with sysfs nodes // zes_sched_timeslice_properties_t.interval = timeslice_duration_ms // zes_sched_timeslice_properties_t.yieldTimeout = preempt_timeout_ms // zes_sched_timeout_properties_t. watchdogTimeout = heartbeat_interval_ms class LinuxSchedulerImp : public OsScheduler, NEO::NonCopyableOrMovableClass { public: ze_result_t getPreemptTimeout(uint64_t &timeout, ze_bool_t getDefault) override; ze_result_t getTimesliceDuration(uint64_t ×lice, ze_bool_t getDefault) override; ze_result_t getHeartbeatInterval(uint64_t &heartbeat, ze_bool_t getDefault) override; ze_result_t setPreemptTimeout(uint64_t timeout) override; ze_result_t setTimesliceDuration(uint64_t timeslice) override; ze_result_t setHeartbeatInterval(uint64_t heartbeat) override; ze_bool_t canControlScheduler() override; ze_result_t getProperties(zes_sched_properties_t &properties) override; LinuxSchedulerImp() = default; LinuxSchedulerImp(OsSysman *pOsSysman, zes_engine_type_flag_t type, std::vector &listOfEngines, ze_bool_t isSubdevice, uint32_t subdeviceId); ~LinuxSchedulerImp() override = default; static const std::string engineDir; protected: SysfsAccess *pSysfsAccess = nullptr; Device *pDevice = nullptr; zes_engine_type_flag_t engineType = ZES_ENGINE_TYPE_FLAG_OTHER; ze_bool_t onSubdevice = 0; uint32_t subdeviceId = 0; private: static const std::string preemptTimeoutMilliSecs; static const std::string defaultPreemptTimeouttMilliSecs; static const std::string timesliceDurationMilliSecs; static const std::string defaultTimesliceDurationMilliSecs; static const std::string heartbeatIntervalMilliSecs; static const std::string defaultHeartbeatIntervalMilliSecs; std::vector listOfEngines = {}; }; } // namespace L0 os_scheduler_imp_prelim.cpp000066400000000000000000000311611422164147700341320ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/scheduler/linux/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/linux/engine_info.h" #include "level_zero/tools/source/sysman/scheduler/linux/os_scheduler_imp.h" #include "sysman/linux/os_sysman_imp.h" namespace L0 { const std::string LinuxSchedulerImp::preemptTimeoutMilliSecs("preempt_timeout_ms"); const std::string LinuxSchedulerImp::defaultPreemptTimeouttMilliSecs(".defaults/preempt_timeout_ms"); const std::string LinuxSchedulerImp::timesliceDurationMilliSecs("timeslice_duration_ms"); const std::string LinuxSchedulerImp::defaultTimesliceDurationMilliSecs(".defaults/timeslice_duration_ms"); const std::string LinuxSchedulerImp::heartbeatIntervalMilliSecs("heartbeat_interval_ms"); const std::string LinuxSchedulerImp::defaultHeartbeatIntervalMilliSecs(".defaults/heartbeat_interval_ms"); const std::string LinuxSchedulerImp::engineDir("engine"); constexpr uint16_t milliSecsToMicroSecs = 1000; static const std::map<__u16, std::string> i915EngineClassToSysfsEngineMap = { {I915_ENGINE_CLASS_RENDER, "rcs"}, {static_cast<__u16>(I915_ENGINE_CLASS_COMPUTE), "ccs"}, {I915_ENGINE_CLASS_COPY, "bcs"}, {I915_ENGINE_CLASS_VIDEO, "vcs"}, {I915_ENGINE_CLASS_VIDEO_ENHANCE, "vecs"}}; static const std::map sysfsEngineMapToLevel0EngineType = { {"rcs", ZES_ENGINE_TYPE_FLAG_RENDER}, {"ccs", ZES_ENGINE_TYPE_FLAG_COMPUTE}, {"bcs", ZES_ENGINE_TYPE_FLAG_DMA}, {"vcs", ZES_ENGINE_TYPE_FLAG_MEDIA}, {"vecs", ZES_ENGINE_TYPE_FLAG_OTHER}}; static const std::multimap level0EngineTypeToSysfsEngineMap = { {ZES_ENGINE_TYPE_FLAG_RENDER, "rcs"}, {ZES_ENGINE_TYPE_FLAG_COMPUTE, "ccs"}, {ZES_ENGINE_TYPE_FLAG_DMA, "bcs"}, {ZES_ENGINE_TYPE_FLAG_MEDIA, "vcs"}, {ZES_ENGINE_TYPE_FLAG_OTHER, "vecs"}}; static const std::map sysfsEngineMapToi915EngineClass = { {"rcs", I915_ENGINE_CLASS_RENDER}, {"ccs", static_cast<__u16>(I915_ENGINE_CLASS_COMPUTE)}, {"bcs", I915_ENGINE_CLASS_COPY}, {"vcs", I915_ENGINE_CLASS_VIDEO}, {"vecs", I915_ENGINE_CLASS_VIDEO_ENHANCE}}; ze_result_t LinuxSchedulerImp::getProperties(zes_sched_properties_t &schedProperties) { schedProperties.onSubdevice = onSubdevice; schedProperties.subdeviceId = subdeviceId; schedProperties.canControl = canControlScheduler(); schedProperties.engines = this->engineType; schedProperties.supportedModes = (1 << ZES_SCHED_MODE_TIMEOUT) | (1 << ZES_SCHED_MODE_TIMESLICE) | (1 << ZES_SCHED_MODE_EXCLUSIVE); return ZE_RESULT_SUCCESS; } ze_result_t LinuxSchedulerImp::getPreemptTimeout(uint64_t &timeout, ze_bool_t getDefault) { ze_result_t result = ZE_RESULT_ERROR_UNKNOWN; uint32_t i = 0; std::vector timeoutVec = {}; timeoutVec.resize(listOfEngines.size()); for (const auto &engineName : listOfEngines) { if (getDefault) { result = pSysfsAccess->read(engineDir + "/" + engineName + "/" + defaultPreemptTimeouttMilliSecs, timeout); } else { result = pSysfsAccess->read(engineDir + "/" + engineName + "/" + preemptTimeoutMilliSecs, timeout); } if (result == ZE_RESULT_SUCCESS) { timeout = timeout * milliSecsToMicroSecs; timeoutVec[i] = timeout; i++; } else { if (result == ZE_RESULT_ERROR_NOT_AVAILABLE) { result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } return result; } } if (engineType == ZES_ENGINE_TYPE_FLAG_COMPUTE) { timeout = *std::max_element(timeoutVec.begin(), timeoutVec.end()); return result; } // check if all engines of the same type have the same scheduling param values if (std::adjacent_find(timeoutVec.begin(), timeoutVec.end(), std::not_equal_to<>()) == timeoutVec.end()) { timeout = timeoutVec[0]; return result; } else { return ZE_RESULT_ERROR_UNKNOWN; } } ze_result_t LinuxSchedulerImp::getTimesliceDuration(uint64_t ×lice, ze_bool_t getDefault) { ze_result_t result = ZE_RESULT_ERROR_UNKNOWN; uint32_t i = 0; std::vector timesliceVec = {}; timesliceVec.resize(listOfEngines.size()); for (const auto &engineName : listOfEngines) { if (getDefault) { result = pSysfsAccess->read(engineDir + "/" + engineName + "/" + defaultTimesliceDurationMilliSecs, timeslice); } else { result = pSysfsAccess->read(engineDir + "/" + engineName + "/" + timesliceDurationMilliSecs, timeslice); } if (result == ZE_RESULT_SUCCESS) { timeslice = timeslice * milliSecsToMicroSecs; timesliceVec[i] = timeslice; i++; } else { if (result == ZE_RESULT_ERROR_NOT_AVAILABLE) { result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } return result; } } // check if all engines of the same type have the same scheduling param values if (std::adjacent_find(timesliceVec.begin(), timesliceVec.end(), std::not_equal_to<>()) == timesliceVec.end()) { timeslice = timesliceVec[0]; return result; } else { return ZE_RESULT_ERROR_UNKNOWN; } } ze_result_t LinuxSchedulerImp::getHeartbeatInterval(uint64_t &heartbeat, ze_bool_t getDefault) { ze_result_t result = ZE_RESULT_ERROR_UNKNOWN; uint32_t i = 0; std::vector heartbeatVec = {}; heartbeatVec.resize(listOfEngines.size()); for (const auto &engineName : listOfEngines) { if (getDefault) { result = pSysfsAccess->read(engineDir + "/" + engineName + "/" + defaultHeartbeatIntervalMilliSecs, heartbeat); } else { result = pSysfsAccess->read(engineDir + "/" + engineName + "/" + heartbeatIntervalMilliSecs, heartbeat); } if (result == ZE_RESULT_SUCCESS) { heartbeat = heartbeat * milliSecsToMicroSecs; heartbeatVec[i] = heartbeat; i++; } else { if (result == ZE_RESULT_ERROR_NOT_AVAILABLE) { result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } return result; } } // check if all engines of the same type have the same scheduling param values if (std::adjacent_find(heartbeatVec.begin(), heartbeatVec.end(), std::not_equal_to<>()) == heartbeatVec.end()) { heartbeat = heartbeatVec[0]; return result; } else { return ZE_RESULT_ERROR_UNKNOWN; } } ze_result_t LinuxSchedulerImp::setPreemptTimeout(uint64_t timeout) { timeout = timeout / milliSecsToMicroSecs; ze_result_t result = ZE_RESULT_ERROR_UNKNOWN; for (const auto &engineName : listOfEngines) { result = pSysfsAccess->write(engineDir + "/" + engineName + "/" + preemptTimeoutMilliSecs, timeout); if (result != ZE_RESULT_SUCCESS) { if (result == ZE_RESULT_ERROR_NOT_AVAILABLE) { result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } return result; } } return result; } ze_result_t LinuxSchedulerImp::setTimesliceDuration(uint64_t timeslice) { timeslice = timeslice / milliSecsToMicroSecs; ze_result_t result = ZE_RESULT_ERROR_UNKNOWN; for (const auto &engineName : listOfEngines) { result = pSysfsAccess->write(engineDir + "/" + engineName + "/" + timesliceDurationMilliSecs, timeslice); if (result != ZE_RESULT_SUCCESS) { if (result == ZE_RESULT_ERROR_NOT_AVAILABLE) { result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } return result; } } return result; } ze_result_t LinuxSchedulerImp::setHeartbeatInterval(uint64_t heartbeat) { heartbeat = heartbeat / milliSecsToMicroSecs; ze_result_t result = ZE_RESULT_ERROR_UNKNOWN; for (const auto &engineName : listOfEngines) { result = pSysfsAccess->write(engineDir + "/" + engineName + "/" + heartbeatIntervalMilliSecs, heartbeat); if (result != ZE_RESULT_SUCCESS) { if (result == ZE_RESULT_ERROR_NOT_AVAILABLE) { result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } return result; } } return result; } ze_bool_t LinuxSchedulerImp::canControlScheduler() { return 1; } static ze_result_t getNumEngineTypeAndInstancesForSubDevices(std::map> &mapOfEngines, NEO::Drm *pDrm, uint32_t subdeviceId) { auto engineInfo = pDrm->getEngineInfo(); if (engineInfo == nullptr) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } std::vector listOfEngines; engineInfo->getListOfEnginesOnATile(subdeviceId, listOfEngines); for (const auto &engine : listOfEngines) { auto sysfEngineString = i915EngineClassToSysfsEngineMap.find(static_cast(engine.engineClass)); if (sysfEngineString == i915EngineClassToSysfsEngineMap.end()) { continue; } std::string sysfsEngineDirNode = sysfEngineString->second + std::to_string(engine.engineInstance); auto level0EngineType = sysfsEngineMapToLevel0EngineType.find(sysfEngineString->second); auto ret = mapOfEngines.find(level0EngineType->second); if (ret != mapOfEngines.end()) { ret->second.push_back(sysfsEngineDirNode); } else { std::vector engineVec = {}; engineVec.push_back(sysfsEngineDirNode); mapOfEngines.emplace(level0EngineType->second, engineVec); } } return ZE_RESULT_SUCCESS; } static ze_result_t getNumEngineTypeAndInstancesForDevice(std::map> &mapOfEngines, SysfsAccess *pSysfsAccess) { std::vector localListOfAllEngines = {}; auto result = pSysfsAccess->scanDirEntries(LinuxSchedulerImp::engineDir, localListOfAllEngines); if (ZE_RESULT_SUCCESS != result) { if (result == ZE_RESULT_ERROR_NOT_AVAILABLE) { result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } return result; } for_each(localListOfAllEngines.begin(), localListOfAllEngines.end(), [&](std::string &mappedEngine) { for (auto itr = level0EngineTypeToSysfsEngineMap.begin(); itr != level0EngineTypeToSysfsEngineMap.end(); itr++) { char digits[] = "0123456789"; auto mappedEngineName = mappedEngine.substr(0, mappedEngine.find_first_of(digits, 0)); if (0 == mappedEngineName.compare(itr->second.c_str())) { auto ret = mapOfEngines.find(itr->first); if (ret != mapOfEngines.end()) { ret->second.push_back(mappedEngine); } else { std::vector engineVec = {}; engineVec.push_back(mappedEngine); mapOfEngines.emplace(itr->first, engineVec); } } } }); return result; } ze_result_t OsScheduler::getNumEngineTypeAndInstances( std::map> &mapOfEngines, OsSysman *pOsSysman, ze_device_handle_t subdeviceHandle) { LinuxSysmanImp *pLinuxSysmanImp = static_cast(pOsSysman); auto pDrm = &pLinuxSysmanImp->getDrm(); auto pSysfsAccess = &pLinuxSysmanImp->getSysfsAccess(); ze_device_properties_t deviceProperties = {}; Device::fromHandle(subdeviceHandle)->getProperties(&deviceProperties); if (deviceProperties.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE) { return getNumEngineTypeAndInstancesForSubDevices(mapOfEngines, pDrm, deviceProperties.subdeviceId); } return getNumEngineTypeAndInstancesForDevice(mapOfEngines, pSysfsAccess); } LinuxSchedulerImp::LinuxSchedulerImp( OsSysman *pOsSysman, zes_engine_type_flag_t type, std::vector &listOfEngines, ze_bool_t isSubdevice, uint32_t subdeviceId) : engineType(type), onSubdevice(isSubdevice), subdeviceId(subdeviceId) { LinuxSysmanImp *pLinuxSysmanImp = static_cast(pOsSysman); pSysfsAccess = &pLinuxSysmanImp->getSysfsAccess(); this->listOfEngines = listOfEngines; } OsScheduler *OsScheduler::create( OsSysman *pOsSysman, zes_engine_type_flag_t type, std::vector &listOfEngines, ze_bool_t isSubdevice, uint32_t subdeviceId) { LinuxSchedulerImp *pLinuxSchedulerImp = new LinuxSchedulerImp(pOsSysman, type, listOfEngines, isSubdevice, subdeviceId); return static_cast(pLinuxSchedulerImp); } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/scheduler/os_scheduler.h000066400000000000000000000025411422164147700303020ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include #include #include #include namespace L0 { struct OsSysman; using namespace std; class OsScheduler { public: virtual ze_result_t getPreemptTimeout(uint64_t &timeout, ze_bool_t getDefault) = 0; virtual ze_result_t getTimesliceDuration(uint64_t ×lice, ze_bool_t getDefault) = 0; virtual ze_result_t getHeartbeatInterval(uint64_t &heartbeat, ze_bool_t getDefault) = 0; virtual ze_result_t setPreemptTimeout(uint64_t timeout) = 0; virtual ze_result_t setTimesliceDuration(uint64_t timeslice) = 0; virtual ze_result_t setHeartbeatInterval(uint64_t heartbeat) = 0; virtual ze_bool_t canControlScheduler() = 0; virtual ze_result_t getProperties(zes_sched_properties_t &properties) = 0; static OsScheduler *create(OsSysman *pOsSysman, zes_engine_type_flag_t engineType, std::vector &listOfEngines, ze_bool_t isSubdevice, uint32_t subdeviceId); static ze_result_t getNumEngineTypeAndInstances(std::map> &listOfEngines, OsSysman *pOsSysman, ze_device_handle_t subdeviceHandle); virtual ~OsScheduler() = default; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/scheduler/scheduler.cpp000066400000000000000000000035271422164147700301410ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/debug_helpers.h" #include "level_zero/tools/source/sysman/scheduler/scheduler_imp.h" class OsScheduler; namespace L0 { SchedulerHandleContext::SchedulerHandleContext(OsSysman *pOsSysman) { this->pOsSysman = pOsSysman; } SchedulerHandleContext::~SchedulerHandleContext() { for (Scheduler *pScheduler : handleList) { delete pScheduler; } handleList.clear(); } void SchedulerHandleContext::createHandle(zes_engine_type_flag_t engineType, std::vector &listOfEngines, ze_device_handle_t deviceHandle) { Scheduler *pScheduler = new SchedulerImp(pOsSysman, engineType, listOfEngines, deviceHandle); handleList.push_back(pScheduler); } void SchedulerHandleContext::init(std::vector &deviceHandles) { for (const auto &deviceHandle : deviceHandles) { std::map> engineTypeInstance = {}; OsScheduler::getNumEngineTypeAndInstances(engineTypeInstance, pOsSysman, deviceHandle); for (auto itr = engineTypeInstance.begin(); itr != engineTypeInstance.end(); ++itr) { createHandle(itr->first, itr->second, deviceHandle); } } } ze_result_t SchedulerHandleContext::schedulerGet(uint32_t *pCount, zes_sched_handle_t *phScheduler) { uint32_t handleListSize = static_cast(handleList.size()); uint32_t numToCopy = std::min(*pCount, handleListSize); if (0 == *pCount || *pCount > handleListSize) { *pCount = handleListSize; } if (nullptr != phScheduler) { for (uint32_t i = 0; i < numToCopy; i++) { phScheduler[i] = handleList[i]->toHandle(); } } return ZE_RESULT_SUCCESS; } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/scheduler/scheduler.h000066400000000000000000000037501422164147700276040ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "level_zero/core/source/device/device.h" #include #include #include #include struct _zes_sched_handle_t { virtual ~_zes_sched_handle_t() = default; }; namespace L0 { struct OsSysman; class Scheduler : _zes_sched_handle_t { public: virtual ze_result_t schedulerGetProperties(zes_sched_properties_t *pProperties) = 0; virtual ze_result_t getCurrentMode(zes_sched_mode_t *pMode) = 0; virtual ze_result_t getTimeoutModeProperties(ze_bool_t getDefaults, zes_sched_timeout_properties_t *pConfig) = 0; virtual ze_result_t getTimesliceModeProperties(ze_bool_t getDefaults, zes_sched_timeslice_properties_t *pConfig) = 0; virtual ze_result_t setTimeoutMode(zes_sched_timeout_properties_t *pProperties, ze_bool_t *pNeedReload) = 0; virtual ze_result_t setTimesliceMode(zes_sched_timeslice_properties_t *pProperties, ze_bool_t *pNeedReload) = 0; virtual ze_result_t setExclusiveMode(ze_bool_t *pNeedReload) = 0; virtual ze_result_t setComputeUnitDebugMode(ze_bool_t *pNeedReload) = 0; static Scheduler *fromHandle(zes_sched_handle_t handle) { return static_cast(handle); } inline zes_sched_handle_t toHandle() { return this; } bool initSuccess = false; }; struct SchedulerHandleContext : NEO::NonCopyableOrMovableClass { SchedulerHandleContext(OsSysman *pOsSysman); ~SchedulerHandleContext(); void init(std::vector &deviceHandles); ze_result_t schedulerGet(uint32_t *pCount, zes_sched_handle_t *phScheduler); OsSysman *pOsSysman = nullptr; std::vector handleList = {}; ze_device_handle_t hCoreDevice = nullptr; private: void createHandle(zes_engine_type_flag_t engineType, std::vector &listOfEngines, ze_device_handle_t deviceHandle); }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/scheduler/scheduler_imp.cpp000066400000000000000000000123601422164147700310010ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "scheduler_imp.h" #include "shared/source/helpers/debug_helpers.h" #include "level_zero/tools/source/sysman/sysman_const.h" namespace L0 { ze_result_t SchedulerImp::setExclusiveMode(ze_bool_t *pNeedReload) { uint64_t timeslice = 0, timeout = 0, heartbeat = 0; *pNeedReload = false; ze_result_t result = pOsScheduler->setPreemptTimeout(timeout); if (result != ZE_RESULT_SUCCESS) { return result; } result = pOsScheduler->setTimesliceDuration(timeslice); if (result != ZE_RESULT_SUCCESS) { return result; } result = pOsScheduler->setHeartbeatInterval(heartbeat); return result; } ze_result_t SchedulerImp::setComputeUnitDebugMode(ze_bool_t *pNeedReload) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t SchedulerImp::getCurrentMode(zes_sched_mode_t *pMode) { uint64_t timeout = 0; uint64_t timeslice = 0; ze_result_t result = pOsScheduler->getPreemptTimeout(timeout, false); if (result != ZE_RESULT_SUCCESS) { return result; } result = pOsScheduler->getTimesliceDuration(timeslice, false); if (result != ZE_RESULT_SUCCESS) { return result; } if (timeslice > 0) { *pMode = ZES_SCHED_MODE_TIMESLICE; } else { if (timeout > 0) { *pMode = ZES_SCHED_MODE_TIMEOUT; } else { *pMode = ZES_SCHED_MODE_EXCLUSIVE; } } return result; } ze_result_t SchedulerImp::getTimeoutModeProperties(ze_bool_t getDefaults, zes_sched_timeout_properties_t *pConfig) { uint64_t heartbeat = 0; ze_result_t result = pOsScheduler->getHeartbeatInterval(heartbeat, getDefaults); if (result != ZE_RESULT_SUCCESS) { return result; } pConfig->watchdogTimeout = heartbeat; return result; } ze_result_t SchedulerImp::getTimesliceModeProperties(ze_bool_t getDefaults, zes_sched_timeslice_properties_t *pConfig) { uint64_t timeout = 0, timeslice = 0; ze_result_t result = pOsScheduler->getPreemptTimeout(timeout, getDefaults); if (result != ZE_RESULT_SUCCESS) { return result; } result = pOsScheduler->getTimesliceDuration(timeslice, getDefaults); if (result != ZE_RESULT_SUCCESS) { return result; } pConfig->interval = timeslice; pConfig->yieldTimeout = timeout; return result; } ze_result_t SchedulerImp::setTimeoutMode(zes_sched_timeout_properties_t *pProperties, ze_bool_t *pNeedReload) { zes_sched_mode_t currMode; ze_result_t result = getCurrentMode(&currMode); if (result != ZE_RESULT_SUCCESS) { return result; } if (pProperties->watchdogTimeout < minTimeoutModeHeartbeat) { // watchdogTimeout(in usec) less than 5000 would be computed to // 0 milli seconds preempt timeout, and then after returning from // this method, we would end up in EXCLUSIVE mode return ZE_RESULT_ERROR_INVALID_ARGUMENT; } *pNeedReload = false; result = pOsScheduler->setHeartbeatInterval(pProperties->watchdogTimeout); if ((currMode == ZES_SCHED_MODE_TIMEOUT) || (result != ZE_RESULT_SUCCESS)) { return result; } uint64_t timeout = (pProperties->watchdogTimeout) / 5; result = pOsScheduler->setPreemptTimeout(timeout); if (result != ZE_RESULT_SUCCESS) { return result; } uint64_t timeslice = 0; result = pOsScheduler->setTimesliceDuration(timeslice); if (result != ZE_RESULT_SUCCESS) { return result; } return result; } ze_result_t SchedulerImp::setTimesliceMode(zes_sched_timeslice_properties_t *pProperties, ze_bool_t *pNeedReload) { if (pProperties->interval < minTimeoutInMicroSeconds) { // interval(in usec) less than 1000 would be computed to // 0 milli seconds interval. return ZE_RESULT_ERROR_INVALID_ARGUMENT; } *pNeedReload = false; ze_result_t result = pOsScheduler->setPreemptTimeout(pProperties->yieldTimeout); if (result != ZE_RESULT_SUCCESS) { return result; } result = pOsScheduler->setTimesliceDuration(pProperties->interval); if (result != ZE_RESULT_SUCCESS) { return result; } uint64_t heartbeat = 2500 * (pProperties->interval); result = pOsScheduler->setHeartbeatInterval(heartbeat); return result; } ze_result_t SchedulerImp::schedulerGetProperties(zes_sched_properties_t *pProperties) { *pProperties = properties; return ZE_RESULT_SUCCESS; } void SchedulerImp::init() { pOsScheduler->getProperties(this->properties); } SchedulerImp::SchedulerImp(OsSysman *pOsSysman, zes_engine_type_flag_t engineType, std::vector &listOfEngines, ze_device_handle_t deviceHandle) { ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; Device::fromHandle(deviceHandle)->getProperties(&deviceProperties); pOsScheduler = OsScheduler::create(pOsSysman, engineType, listOfEngines, deviceProperties.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE, deviceProperties.subdeviceId); UNRECOVERABLE_IF(nullptr == pOsScheduler); init(); }; SchedulerImp::~SchedulerImp() { if (nullptr != pOsScheduler) { delete pOsScheduler; pOsScheduler = nullptr; } } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/scheduler/scheduler_imp.h000066400000000000000000000026271422164147700304530ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include #include "os_scheduler.h" #include "scheduler.h" namespace L0 { class SchedulerImp : public Scheduler, NEO::NonCopyableOrMovableClass { public: void init(); ze_result_t schedulerGetProperties(zes_sched_properties_t *pProperties) override; ze_result_t getCurrentMode(zes_sched_mode_t *pMode) override; ze_result_t getTimeoutModeProperties(ze_bool_t getDefaults, zes_sched_timeout_properties_t *pConfig) override; ze_result_t getTimesliceModeProperties(ze_bool_t getDefaults, zes_sched_timeslice_properties_t *pConfig) override; ze_result_t setTimeoutMode(zes_sched_timeout_properties_t *pProperties, ze_bool_t *pNeedReload) override; ze_result_t setTimesliceMode(zes_sched_timeslice_properties_t *pProperties, ze_bool_t *pNeedReload) override; ze_result_t setExclusiveMode(ze_bool_t *pNeedReload) override; ze_result_t setComputeUnitDebugMode(ze_bool_t *pNeedReload) override; SchedulerImp() = default; OsScheduler *pOsScheduler = nullptr; SchedulerImp(OsSysman *pOsSysman, zes_engine_type_flag_t type, std::vector &listOfEngines, ze_device_handle_t deviceHandle); ~SchedulerImp() override; private: zes_sched_properties_t properties = {}; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/scheduler/windows/000077500000000000000000000000001422164147700271425ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/scheduler/windows/CMakeLists.txt000066400000000000000000000010751422164147700317050ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_SCHEDULER_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/os_scheduler_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/os_scheduler_imp.cpp ) if(WIN32) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_SCHEDULER_WINDOWS} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_SCHEDULER_WINDOWS ${L0_SRCS_TOOLS_SYSMAN_SCHEDULER_WINDOWS}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/scheduler/windows/os_scheduler_imp.cpp000066400000000000000000000034001422164147700331670ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/scheduler/windows/os_scheduler_imp.h" #include "sysman/scheduler/scheduler_imp.h" namespace L0 { ze_result_t WddmSchedulerImp::getPreemptTimeout(uint64_t &timeout, ze_bool_t getDefault) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t WddmSchedulerImp::getTimesliceDuration(uint64_t ×lice, ze_bool_t getDefault) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t WddmSchedulerImp::getHeartbeatInterval(uint64_t &heartbeat, ze_bool_t getDefault) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t WddmSchedulerImp::setPreemptTimeout(uint64_t timeout) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t WddmSchedulerImp::setTimesliceDuration(uint64_t timeslice) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t WddmSchedulerImp::setHeartbeatInterval(uint64_t heartbeat) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_bool_t WddmSchedulerImp::canControlScheduler() { return 0; } ze_result_t WddmSchedulerImp::getProperties(zes_sched_properties_t &properties) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t OsScheduler::getNumEngineTypeAndInstances(std::map> &listOfEngines, OsSysman *pOsSysman, ze_device_handle_t subdeviceHandle) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } OsScheduler *OsScheduler::create( OsSysman *pOsSysman, zes_engine_type_flag_t type, std::vector &listOfEngines, ze_bool_t isSubdevice, uint32_t subdeviceId) { WddmSchedulerImp *pWddmSchedulerImp = new WddmSchedulerImp(); return static_cast(pWddmSchedulerImp); } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/scheduler/windows/os_scheduler_imp.h000066400000000000000000000014501422164147700326370ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "sysman/scheduler/scheduler_imp.h" namespace L0 { class WddmSchedulerImp : public OsScheduler { public: ze_result_t getPreemptTimeout(uint64_t &timeout, ze_bool_t getDefault) override; ze_result_t getTimesliceDuration(uint64_t ×lice, ze_bool_t getDefault) override; ze_result_t getHeartbeatInterval(uint64_t &heartbeat, ze_bool_t getDefault) override; ze_result_t setPreemptTimeout(uint64_t timeout) override; ze_result_t setTimesliceDuration(uint64_t timeslice) override; ze_result_t setHeartbeatInterval(uint64_t heartbeat) override; ze_bool_t canControlScheduler() override; ze_result_t getProperties(zes_sched_properties_t &properties) override; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/standby/000077500000000000000000000000001422164147700251365ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/standby/CMakeLists.txt000066400000000000000000000013231422164147700276750ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_STANDBY ${CMAKE_CURRENT_SOURCE_DIR}/standby.cpp ${CMAKE_CURRENT_SOURCE_DIR}/standby.h ${CMAKE_CURRENT_SOURCE_DIR}/standby_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/standby_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/os_standby.h ) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_STANDBY} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) if(UNIX) add_subdirectory(linux) else() add_subdirectory(windows) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_STANDBY ${L0_SRCS_TOOLS_SYSMAN_STANDBY}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/standby/linux/000077500000000000000000000000001422164147700262755ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/standby/linux/CMakeLists.txt000066400000000000000000000010551422164147700310360ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_STANDBY_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/os_standby_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_standby_imp.h ) if(UNIX) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_STANDBY_LINUX} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_STANDBY_LINUX ${L0_SRCS_TOOLS_SYSMAN_STANDBY_LINUX}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/standby/linux/os_standby_imp.cpp000066400000000000000000000050621422164147700320160ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/standby/linux/os_standby_imp.h" namespace L0 { ze_result_t LinuxStandbyImp::osStandbyGetProperties(zes_standby_properties_t &properties) { properties.pNext = nullptr; properties.type = ZES_STANDBY_TYPE_GLOBAL; properties.onSubdevice = isSubdevice; properties.subdeviceId = subdeviceId; return ZE_RESULT_SUCCESS; } bool LinuxStandbyImp::isStandbySupported(void) { if (ZE_RESULT_SUCCESS == pSysfsAccess->canRead(standbyModeFile)) { return true; } else { return false; } } ze_result_t LinuxStandbyImp::getMode(zes_standby_promo_mode_t &mode) { int currentMode = -1; ze_result_t result = pSysfsAccess->read(standbyModeFile, currentMode); if (ZE_RESULT_SUCCESS != result) { if (result == ZE_RESULT_ERROR_NOT_AVAILABLE) { result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } return result; } if (standbyModeDefault == currentMode) { mode = ZES_STANDBY_PROMO_MODE_DEFAULT; } else if (standbyModeNever == currentMode) { mode = ZES_STANDBY_PROMO_MODE_NEVER; } else { result = ZE_RESULT_ERROR_UNKNOWN; } return result; } ze_result_t LinuxStandbyImp::setMode(zes_standby_promo_mode_t mode) { ze_result_t result = ZE_RESULT_ERROR_UNKNOWN; if (ZES_STANDBY_PROMO_MODE_DEFAULT == mode) { result = pSysfsAccess->write(standbyModeFile, standbyModeDefault); } else { result = pSysfsAccess->write(standbyModeFile, standbyModeNever); } if (ZE_RESULT_ERROR_NOT_AVAILABLE == result) { result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } return result; } void LinuxStandbyImp::init() { const std::string baseDir = "gt/gt" + std::to_string(subdeviceId) + "/"; if (pSysfsAccess->directoryExists(baseDir)) { standbyModeFile = baseDir + "rc6_enable"; } else { standbyModeFile = "power/rc6_enable"; } } LinuxStandbyImp::LinuxStandbyImp(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId) : isSubdevice(onSubdevice), subdeviceId(subdeviceId) { LinuxSysmanImp *pLinuxSysmanImp = static_cast(pOsSysman); pSysfsAccess = &pLinuxSysmanImp->getSysfsAccess(); init(); } OsStandby *OsStandby::create(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId) { LinuxStandbyImp *pLinuxStandbyImp = new LinuxStandbyImp(pOsSysman, onSubdevice, subdeviceId); return static_cast(pLinuxStandbyImp); } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/standby/linux/os_standby_imp.h000066400000000000000000000021141422164147700314560ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "sysman/linux/os_sysman_imp.h" #include "sysman/standby/os_standby.h" #include "sysman/standby/standby_imp.h" namespace L0 { class LinuxStandbyImp : public OsStandby, NEO::NonCopyableOrMovableClass { public: ze_result_t getMode(zes_standby_promo_mode_t &mode) override; ze_result_t setMode(zes_standby_promo_mode_t mode) override; ze_result_t osStandbyGetProperties(zes_standby_properties_t &properties) override; bool isStandbySupported(void) override; LinuxStandbyImp() = default; LinuxStandbyImp(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId); ~LinuxStandbyImp() override = default; protected: SysfsAccess *pSysfsAccess = nullptr; private: std::string standbyModeFile; static const int standbyModeDefault = 1; static const int standbyModeNever = 0; bool isSubdevice = false; uint32_t subdeviceId = 0; void init(); }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/standby/os_standby.h000066400000000000000000000012221422164147700274510ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/tools/source/sysman/os_sysman.h" #include namespace L0 { class OsStandby { public: virtual ze_result_t getMode(zes_standby_promo_mode_t &mode) = 0; virtual ze_result_t setMode(zes_standby_promo_mode_t mode) = 0; virtual ze_result_t osStandbyGetProperties(zes_standby_properties_t &properties) = 0; virtual bool isStandbySupported(void) = 0; static OsStandby *create(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId); virtual ~OsStandby() {} }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/standby/standby.cpp000066400000000000000000000025051422164147700273100ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "standby.h" #include "shared/source/helpers/basic_math.h" #include "standby_imp.h" namespace L0 { StandbyHandleContext::~StandbyHandleContext() { for (Standby *pStandby : handleList) { delete pStandby; } } void StandbyHandleContext::createHandle(ze_device_handle_t deviceHandle) { Standby *pStandby = new StandbyImp(pOsSysman, deviceHandle); if (pStandby->isStandbyEnabled == true) { handleList.push_back(pStandby); } else { delete pStandby; } } ze_result_t StandbyHandleContext::init(std::vector &deviceHandles) { for (const auto &deviceHandle : deviceHandles) { createHandle(deviceHandle); } return ZE_RESULT_SUCCESS; } ze_result_t StandbyHandleContext::standbyGet(uint32_t *pCount, zes_standby_handle_t *phStandby) { uint32_t handleListSize = static_cast(handleList.size()); uint32_t numToCopy = std::min(*pCount, handleListSize); if (0 == *pCount || *pCount > handleListSize) { *pCount = handleListSize; } if (nullptr != phStandby) { for (uint32_t i = 0; i < numToCopy; i++) { phStandby[i] = handleList[i]->toStandbyHandle(); } } return ZE_RESULT_SUCCESS; } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/standby/standby.h000066400000000000000000000024371422164147700267610ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/device/device.h" #include #include struct _zes_standby_handle_t { virtual ~_zes_standby_handle_t() = default; }; namespace L0 { struct OsSysman; class Standby : _zes_standby_handle_t { public: virtual ~Standby() {} virtual ze_result_t standbyGetProperties(zes_standby_properties_t *pProperties) = 0; virtual ze_result_t standbyGetMode(zes_standby_promo_mode_t *pMode) = 0; virtual ze_result_t standbySetMode(const zes_standby_promo_mode_t mode) = 0; inline zes_standby_handle_t toStandbyHandle() { return this; } static Standby *fromHandle(zes_standby_handle_t handle) { return static_cast(handle); } bool isStandbyEnabled = false; }; struct StandbyHandleContext { StandbyHandleContext(OsSysman *pOsSysman) : pOsSysman(pOsSysman){}; ~StandbyHandleContext(); ze_result_t init(std::vector &deviceHandles); ze_result_t standbyGet(uint32_t *pCount, zes_standby_handle_t *phStandby); OsSysman *pOsSysman; std::vector handleList = {}; private: void createHandle(ze_device_handle_t deviceHandle); }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/standby/standby_imp.cpp000066400000000000000000000023631422164147700301570ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "standby_imp.h" #include "shared/source/helpers/debug_helpers.h" #include namespace L0 { ze_result_t StandbyImp::standbyGetProperties(zes_standby_properties_t *pProperties) { *pProperties = standbyProperties; return ZE_RESULT_SUCCESS; } ze_result_t StandbyImp::standbyGetMode(zes_standby_promo_mode_t *pMode) { return pOsStandby->getMode(*pMode); } ze_result_t StandbyImp::standbySetMode(const zes_standby_promo_mode_t mode) { return pOsStandby->setMode(mode); } void StandbyImp::init() { pOsStandby->osStandbyGetProperties(standbyProperties); this->isStandbyEnabled = pOsStandby->isStandbySupported(); } StandbyImp::StandbyImp(OsSysman *pOsSysman, ze_device_handle_t handle) : deviceHandle(handle) { ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; Device::fromHandle(deviceHandle)->getProperties(&deviceProperties); pOsStandby = OsStandby::create(pOsSysman, deviceProperties.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE, deviceProperties.subdeviceId); UNRECOVERABLE_IF(nullptr == pOsStandby); init(); } StandbyImp::~StandbyImp() { delete pOsStandby; } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/standby/standby_imp.h000066400000000000000000000015661422164147700276300ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include #include "os_standby.h" #include "standby.h" namespace L0 { class StandbyImp : public Standby, NEO::NonCopyableOrMovableClass { public: ze_result_t standbyGetProperties(zes_standby_properties_t *pProperties) override; ze_result_t standbyGetMode(zes_standby_promo_mode_t *pMode) override; ze_result_t standbySetMode(const zes_standby_promo_mode_t mode) override; StandbyImp() = default; StandbyImp(OsSysman *pOsSysman, ze_device_handle_t handle); ~StandbyImp() override; OsStandby *pOsStandby = nullptr; void init(); private: zes_standby_properties_t standbyProperties = {}; ze_device_handle_t deviceHandle = nullptr; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/standby/windows/000077500000000000000000000000001422164147700266305ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/standby/windows/CMakeLists.txt000066400000000000000000000010051422164147700313640ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_STANDBY_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/os_standby_imp.cpp ) if(WIN32) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_STANDBY_WINDOWS} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_STANDBY_WINDOWS ${L0_SRCS_TOOLS_SYSMAN_STANDBY_WINDOWS}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/standby/windows/os_standby_imp.cpp000066400000000000000000000022251422164147700323470ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/standby/os_standby.h" namespace L0 { class WddmStandbyImp : public OsStandby { public: ze_result_t getMode(zes_standby_promo_mode_t &mode) override; ze_result_t setMode(zes_standby_promo_mode_t mode) override; ze_result_t osStandbyGetProperties(zes_standby_properties_t &properties) override; bool isStandbySupported(void) override; }; ze_result_t WddmStandbyImp::setMode(zes_standby_promo_mode_t mode) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t WddmStandbyImp::getMode(zes_standby_promo_mode_t &mode) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t WddmStandbyImp::osStandbyGetProperties(zes_standby_properties_t &properties) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } bool WddmStandbyImp::isStandbySupported(void) { return false; } OsStandby *OsStandby::create(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId) { WddmStandbyImp *pWddmStandbyImp = new WddmStandbyImp(); return static_cast(pWddmStandbyImp); } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/sysman.cpp000066400000000000000000000246641422164147700255240ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/sysman.h" #include "level_zero/core/source/device/device_imp.h" #include "level_zero/core/source/driver/driver.h" #include "level_zero/core/source/driver/driver_handle_imp.h" #include "level_zero/tools/source/sysman/sysman_imp.h" #include namespace L0 { void DeviceImp::createSysmanHandle(bool isSubDevice) { if (static_cast(driverHandle)->enableSysman && !isSubDevice) { if (this->getSysmanHandle() == nullptr) { // Sysman handles are created only during zeInit time device creation. And destroyed during L0::device destroy. this->setSysmanHandle(L0::SysmanDeviceHandleContext::init(this->toHandle())); } } } SysmanDevice *SysmanDeviceHandleContext::init(ze_device_handle_t coreDevice) { SysmanDeviceImp *sysmanDevice = new SysmanDeviceImp(coreDevice); DEBUG_BREAK_IF(!sysmanDevice); if (ZE_RESULT_SUCCESS != sysmanDevice->init()) { delete sysmanDevice; sysmanDevice = nullptr; } L0::DeviceImp *device = static_cast(Device::fromHandle(coreDevice)); for (auto &subDevice : device->subDevices) { static_cast(subDevice)->setSysmanHandle(sysmanDevice); } return sysmanDevice; } void DeviceImp::setSysmanHandle(SysmanDevice *pSysmanDev) { pSysmanDevice = pSysmanDev; } SysmanDevice *DeviceImp::getSysmanHandle() { return pSysmanDevice; } ze_result_t DriverHandleImp::sysmanEventsListen( uint32_t timeout, uint32_t count, zes_device_handle_t *phDevices, uint32_t *pNumDeviceEvents, zes_event_type_flags_t *pEvents) { bool gotSysmanEvent = false; memset(pEvents, 0, count * sizeof(zes_event_type_flags_t)); auto timeToExitLoop = L0::steadyClock::now() + std::chrono::milliseconds(timeout); do { for (uint32_t devIndex = 0; devIndex < count; devIndex++) { gotSysmanEvent = L0::SysmanDevice::fromHandle(phDevices[devIndex])->deviceEventListen(pEvents[devIndex], timeout); if (gotSysmanEvent) { *pNumDeviceEvents = 1; break; } } if (gotSysmanEvent) { break; } std::this_thread::sleep_for(std::chrono::milliseconds(10)); // Sleep for 10 milliseconds before next check of events } while ((L0::steadyClock::now() <= timeToExitLoop)); return ZE_RESULT_SUCCESS; } ze_result_t DriverHandleImp::sysmanEventsListenEx( uint64_t timeout, uint32_t count, zes_device_handle_t *phDevices, uint32_t *pNumDeviceEvents, zes_event_type_flags_t *pEvents) { bool gotSysmanEvent = false; memset(pEvents, 0, count * sizeof(zes_event_type_flags_t)); auto timeToExitLoop = L0::steadyClock::now() + std::chrono::duration(timeout); do { for (uint32_t devIndex = 0; devIndex < count; devIndex++) { gotSysmanEvent = L0::SysmanDevice::fromHandle(phDevices[devIndex])->deviceEventListen(pEvents[devIndex], timeout); if (gotSysmanEvent) { *pNumDeviceEvents = 1; break; } } if (gotSysmanEvent) { break; } std::this_thread::sleep_for(std::chrono::milliseconds(10)); // Sleep for 10 milliseconds before next check of events } while ((L0::steadyClock::now() <= timeToExitLoop)); return ZE_RESULT_SUCCESS; } ze_result_t SysmanDevice::performanceGet(zes_device_handle_t hDevice, uint32_t *pCount, zes_perf_handle_t *phPerformance) { auto pSysmanDevice = L0::SysmanDevice::fromHandle(hDevice); if (pSysmanDevice == nullptr) { return ZE_RESULT_ERROR_UNINITIALIZED; } return pSysmanDevice->performanceGet(pCount, phPerformance); } ze_result_t SysmanDevice::powerGet(zes_device_handle_t hDevice, uint32_t *pCount, zes_pwr_handle_t *phPower) { auto pSysmanDevice = L0::SysmanDevice::fromHandle(hDevice); if (pSysmanDevice == nullptr) { return ZE_RESULT_ERROR_UNINITIALIZED; } return pSysmanDevice->powerGet(pCount, phPower); } ze_result_t SysmanDevice::frequencyGet(zes_device_handle_t hDevice, uint32_t *pCount, zes_freq_handle_t *phFrequency) { auto pSysmanDevice = L0::SysmanDevice::fromHandle(hDevice); if (pSysmanDevice == nullptr) { return ZE_RESULT_ERROR_UNINITIALIZED; } return pSysmanDevice->frequencyGet(pCount, phFrequency); } ze_result_t SysmanDevice::fabricPortGet(zes_device_handle_t hDevice, uint32_t *pCount, zes_fabric_port_handle_t *phPort) { auto pSysmanDevice = L0::SysmanDevice::fromHandle(hDevice); if (pSysmanDevice == nullptr) { return ZE_RESULT_ERROR_UNINITIALIZED; } return pSysmanDevice->fabricPortGet(pCount, phPort); } ze_result_t SysmanDevice::temperatureGet(zes_device_handle_t hDevice, uint32_t *pCount, zes_temp_handle_t *phTemperature) { auto pSysmanDevice = L0::SysmanDevice::fromHandle(hDevice); if (pSysmanDevice == nullptr) { return ZE_RESULT_ERROR_UNINITIALIZED; } return pSysmanDevice->temperatureGet(pCount, phTemperature); } ze_result_t SysmanDevice::standbyGet(zes_device_handle_t hDevice, uint32_t *pCount, zes_standby_handle_t *phStandby) { auto pSysmanDevice = L0::SysmanDevice::fromHandle(hDevice); if (pSysmanDevice == nullptr) { return ZE_RESULT_ERROR_UNINITIALIZED; } return pSysmanDevice->standbyGet(pCount, phStandby); } ze_result_t SysmanDevice::deviceGetProperties(zes_device_handle_t hDevice, zes_device_properties_t *pProperties) { auto pSysmanDevice = L0::SysmanDevice::fromHandle(hDevice); if (pSysmanDevice == nullptr) { return ZE_RESULT_ERROR_UNINITIALIZED; } return pSysmanDevice->deviceGetProperties(pProperties); } ze_result_t SysmanDevice::processesGetState(zes_device_handle_t hDevice, uint32_t *pCount, zes_process_state_t *pProcesses) { auto pSysmanDevice = L0::SysmanDevice::fromHandle(hDevice); if (pSysmanDevice == nullptr) { return ZE_RESULT_ERROR_UNINITIALIZED; } return pSysmanDevice->processesGetState(pCount, pProcesses); } ze_result_t SysmanDevice::deviceReset(zes_device_handle_t hDevice, ze_bool_t force) { auto pSysmanDevice = L0::SysmanDevice::fromHandle(hDevice); if (pSysmanDevice == nullptr) { return ZE_RESULT_ERROR_UNINITIALIZED; } return pSysmanDevice->deviceReset(force); } ze_result_t SysmanDevice::deviceGetState(zes_device_handle_t hDevice, zes_device_state_t *pState) { auto pSysmanDevice = L0::SysmanDevice::fromHandle(hDevice); if (pSysmanDevice == nullptr) { return ZE_RESULT_ERROR_UNINITIALIZED; } return pSysmanDevice->deviceGetState(pState); } ze_result_t SysmanDevice::engineGet(zes_device_handle_t hDevice, uint32_t *pCount, zes_engine_handle_t *phEngine) { auto pSysmanDevice = L0::SysmanDevice::fromHandle(hDevice); if (pSysmanDevice == nullptr) { return ZE_RESULT_ERROR_UNINITIALIZED; } return pSysmanDevice->engineGet(pCount, phEngine); } ze_result_t SysmanDevice::pciGetProperties(zes_device_handle_t hDevice, zes_pci_properties_t *pProperties) { auto pSysmanDevice = L0::SysmanDevice::fromHandle(hDevice); if (pSysmanDevice == nullptr) { return ZE_RESULT_ERROR_UNINITIALIZED; } return pSysmanDevice->pciGetProperties(pProperties); } ze_result_t SysmanDevice::pciGetState(zes_device_handle_t hDevice, zes_pci_state_t *pState) { auto pSysmanDevice = L0::SysmanDevice::fromHandle(hDevice); if (pSysmanDevice == nullptr) { return ZE_RESULT_ERROR_UNINITIALIZED; } return pSysmanDevice->pciGetState(pState); } ze_result_t SysmanDevice::pciGetBars(zes_device_handle_t hDevice, uint32_t *pCount, zes_pci_bar_properties_t *pProperties) { auto pSysmanDevice = L0::SysmanDevice::fromHandle(hDevice); if (pSysmanDevice == nullptr) { return ZE_RESULT_ERROR_UNINITIALIZED; } return pSysmanDevice->pciGetBars(pCount, pProperties); } ze_result_t SysmanDevice::pciGetStats(zes_device_handle_t hDevice, zes_pci_stats_t *pStats) { auto pSysmanDevice = L0::SysmanDevice::fromHandle(hDevice); if (pSysmanDevice == nullptr) { return ZE_RESULT_ERROR_UNINITIALIZED; } return pSysmanDevice->pciGetStats(pStats); } ze_result_t SysmanDevice::schedulerGet(zes_device_handle_t hDevice, uint32_t *pCount, zes_sched_handle_t *phScheduler) { auto pSysmanDevice = L0::SysmanDevice::fromHandle(hDevice); if (pSysmanDevice == nullptr) { return ZE_RESULT_ERROR_UNINITIALIZED; } return pSysmanDevice->schedulerGet(pCount, phScheduler); } ze_result_t SysmanDevice::rasGet(zes_device_handle_t hDevice, uint32_t *pCount, zes_ras_handle_t *phRas) { auto pSysmanDevice = L0::SysmanDevice::fromHandle(hDevice); if (pSysmanDevice == nullptr) { return ZE_RESULT_ERROR_UNINITIALIZED; } return pSysmanDevice->rasGet(pCount, phRas); } ze_result_t SysmanDevice::memoryGet(zes_device_handle_t hDevice, uint32_t *pCount, zes_mem_handle_t *phMemory) { auto pSysmanDevice = L0::SysmanDevice::fromHandle(hDevice); if (pSysmanDevice == nullptr) { return ZE_RESULT_ERROR_UNINITIALIZED; } return pSysmanDevice->memoryGet(pCount, phMemory); } ze_result_t SysmanDevice::fanGet(zes_device_handle_t hDevice, uint32_t *pCount, zes_fan_handle_t *phFan) { auto pSysmanDevice = L0::SysmanDevice::fromHandle(hDevice); if (pSysmanDevice == nullptr) { return ZE_RESULT_ERROR_UNINITIALIZED; } return pSysmanDevice->fanGet(pCount, phFan); } ze_result_t SysmanDevice::diagnosticsGet(zes_device_handle_t hDevice, uint32_t *pCount, zes_diag_handle_t *phDiagnostics) { auto pSysmanDevice = L0::SysmanDevice::fromHandle(hDevice); if (pSysmanDevice == nullptr) { return ZE_RESULT_ERROR_UNINITIALIZED; } return pSysmanDevice->diagnosticsGet(pCount, phDiagnostics); } ze_result_t SysmanDevice::firmwareGet(zes_device_handle_t hDevice, uint32_t *pCount, zes_firmware_handle_t *phFirmware) { auto pSysmanDevice = L0::SysmanDevice::fromHandle(hDevice); if (pSysmanDevice == nullptr) { return ZE_RESULT_ERROR_UNINITIALIZED; } return pSysmanDevice->firmwareGet(pCount, phFirmware); } ze_result_t SysmanDevice::deviceEventRegister(zes_device_handle_t hDevice, zes_event_type_flags_t events) { auto pSysmanDevice = L0::SysmanDevice::fromHandle(hDevice); if (pSysmanDevice == nullptr) { return ZE_RESULT_ERROR_UNINITIALIZED; } return pSysmanDevice->deviceEventRegister(events); } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/sysman.h000066400000000000000000000136311422164147700251610ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/tools/source/sysman/diagnostics/diagnostics.h" #include "level_zero/tools/source/sysman/engine/engine.h" #include "level_zero/tools/source/sysman/events/events.h" #include "level_zero/tools/source/sysman/fabric_port/fabric_port.h" #include "level_zero/tools/source/sysman/fan/fan.h" #include "level_zero/tools/source/sysman/firmware/firmware.h" #include "level_zero/tools/source/sysman/frequency/frequency.h" #include "level_zero/tools/source/sysman/global_operations/global_operations.h" #include "level_zero/tools/source/sysman/memory/memory.h" #include "level_zero/tools/source/sysman/pci/pci.h" #include "level_zero/tools/source/sysman/performance/performance.h" #include "level_zero/tools/source/sysman/power/power.h" #include "level_zero/tools/source/sysman/ras/ras.h" #include "level_zero/tools/source/sysman/scheduler/scheduler.h" #include "level_zero/tools/source/sysman/standby/standby.h" #include "level_zero/tools/source/sysman/temperature/temperature.h" #include struct _zet_sysman_handle_t {}; namespace L0 { struct Device; struct SysmanDevice : _ze_device_handle_t { static SysmanDevice *fromHandle(zes_device_handle_t handle) { return Device::fromHandle(handle)->getSysmanHandle(); } static ze_result_t performanceGet(zes_device_handle_t hDevice, uint32_t *pCount, zes_perf_handle_t *phPerformance); static ze_result_t powerGet(zes_device_handle_t hDevice, uint32_t *pCount, zes_pwr_handle_t *phPower); static ze_result_t frequencyGet(zes_device_handle_t hDevice, uint32_t *pCount, zes_freq_handle_t *phFrequency); static ze_result_t fabricPortGet(zes_device_handle_t hDevice, uint32_t *pCount, zes_fabric_port_handle_t *phPort); static ze_result_t temperatureGet(zes_device_handle_t hDevice, uint32_t *pCount, zes_temp_handle_t *phTemperature); static ze_result_t standbyGet(zes_device_handle_t hDevice, uint32_t *pCount, zes_standby_handle_t *phStandby); static ze_result_t deviceGetProperties(zes_device_handle_t hDevice, zes_device_properties_t *pProperties); static ze_result_t processesGetState(zes_device_handle_t hDevice, uint32_t *pCount, zes_process_state_t *pProcesses); static ze_result_t deviceReset(zes_device_handle_t hDevice, ze_bool_t force); static ze_result_t deviceGetState(zes_device_handle_t hDevice, zes_device_state_t *pState); static ze_result_t engineGet(zes_device_handle_t hDevice, uint32_t *pCount, zes_engine_handle_t *phEngine); static ze_result_t pciGetProperties(zes_device_handle_t hDevice, zes_pci_properties_t *pProperties); static ze_result_t pciGetState(zes_device_handle_t hDevice, zes_pci_state_t *pState); static ze_result_t pciGetBars(zes_device_handle_t hDevice, uint32_t *pCount, zes_pci_bar_properties_t *pProperties); static ze_result_t pciGetStats(zes_device_handle_t hDevice, zes_pci_stats_t *pStats); static ze_result_t schedulerGet(zes_device_handle_t hDevice, uint32_t *pCount, zes_sched_handle_t *phScheduler); static ze_result_t rasGet(zes_device_handle_t hDevice, uint32_t *pCount, zes_ras_handle_t *phRas); static ze_result_t memoryGet(zes_device_handle_t hDevice, uint32_t *pCount, zes_mem_handle_t *phMemory); static ze_result_t fanGet(zes_device_handle_t hDevice, uint32_t *pCount, zes_fan_handle_t *phFan); static ze_result_t diagnosticsGet(zes_device_handle_t hDevice, uint32_t *pCount, zes_diag_handle_t *phDiagnostics); static ze_result_t firmwareGet(zes_device_handle_t hDevice, uint32_t *pCount, zes_firmware_handle_t *phFirmware); static ze_result_t deviceEventRegister(zes_device_handle_t hDevice, zes_event_type_flags_t events); static bool deviceEventListen(zes_device_handle_t hDevice, zes_event_type_flags_t &pEvent, uint64_t timeout); virtual ze_result_t performanceGet(uint32_t *pCount, zes_perf_handle_t *phPerformance) = 0; virtual ze_result_t powerGet(uint32_t *pCount, zes_pwr_handle_t *phPower) = 0; virtual ze_result_t frequencyGet(uint32_t *pCount, zes_freq_handle_t *phFrequency) = 0; virtual ze_result_t fabricPortGet(uint32_t *pCount, zes_fabric_port_handle_t *phPort) = 0; virtual ze_result_t temperatureGet(uint32_t *pCount, zes_temp_handle_t *phTemperature) = 0; virtual ze_result_t standbyGet(uint32_t *pCount, zes_standby_handle_t *phStandby) = 0; virtual ze_result_t deviceGetProperties(zes_device_properties_t *pProperties) = 0; virtual ze_result_t processesGetState(uint32_t *pCount, zes_process_state_t *pProcesses) = 0; virtual ze_result_t deviceReset(ze_bool_t force) = 0; virtual ze_result_t deviceGetState(zes_device_state_t *pState) = 0; virtual ze_result_t engineGet(uint32_t *pCount, zes_engine_handle_t *phEngine) = 0; virtual ze_result_t pciGetProperties(zes_pci_properties_t *pProperties) = 0; virtual ze_result_t pciGetState(zes_pci_state_t *pState) = 0; virtual ze_result_t pciGetBars(uint32_t *pCount, zes_pci_bar_properties_t *pProperties) = 0; virtual ze_result_t pciGetStats(zes_pci_stats_t *pStats) = 0; virtual ze_result_t schedulerGet(uint32_t *pCount, zes_sched_handle_t *phScheduler) = 0; virtual ze_result_t rasGet(uint32_t *pCount, zes_ras_handle_t *phRas) = 0; virtual ze_result_t memoryGet(uint32_t *pCount, zes_mem_handle_t *phMemory) = 0; virtual ze_result_t fanGet(uint32_t *pCount, zes_fan_handle_t *phFan) = 0; virtual ze_result_t diagnosticsGet(uint32_t *pCount, zes_diag_handle_t *phDiagnostics) = 0; virtual ze_result_t firmwareGet(uint32_t *pCount, zes_firmware_handle_t *phFirmware) = 0; virtual ze_result_t deviceEventRegister(zes_event_type_flags_t events) = 0; virtual bool deviceEventListen(zes_event_type_flags_t &pEvent, uint64_t timeout) = 0; virtual ~SysmanDevice() = default; }; class SysmanDeviceHandleContext { public: SysmanDeviceHandleContext() = delete; static SysmanDevice *init(ze_device_handle_t device); }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/sysman_const.h000066400000000000000000000040131422164147700263610ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include #include const std::string vendorIntel("Intel(R) Corporation"); const std::string unknown("unknown"); const std::string intelPciId("0x8086"); constexpr uint32_t MbpsToBytesPerSecond = 125000; constexpr double milliVoltsFactor = 1000.0; namespace L0 { struct steadyClock { typedef std::chrono::duration duration; typedef duration::rep rep; typedef duration::period period; typedef std::chrono::time_point time_point; static constexpr bool is_steady = true; static time_point now() noexcept { static auto epoch = std::chrono::steady_clock::now(); return time_point(std::chrono::duration_cast(std::chrono::steady_clock::now() - epoch)); } }; } // namespace L0 namespace PciLinkSpeeds { constexpr double Pci2_5GigatransfersPerSecond = 2.5; constexpr double Pci5_0GigatransfersPerSecond = 5.0; constexpr double Pci8_0GigatransfersPerSecond = 8.0; constexpr double Pci16_0GigatransfersPerSecond = 16.0; constexpr double Pci32_0GigatransfersPerSecond = 32.0; } // namespace PciLinkSpeeds enum PciGenerations { PciGen1 = 1, PciGen2, PciGen3, PciGen4, PciGen5, }; constexpr uint8_t maxPciBars = 6; // Linux kernel would report 255 link width, as an indication of unknown. constexpr uint32_t unknownPcieLinkWidth = 255u; constexpr uint32_t microSecondsToNanoSeconds = 1000u; constexpr uint64_t convertJouleToMicroJoule = 1000000u; constexpr uint64_t minTimeoutModeHeartbeat = 5000u; constexpr uint64_t minTimeoutInMicroSeconds = 1000u; constexpr uint16_t milliSecsToMicroSecs = 1000; constexpr uint32_t milliFactor = 1000u; constexpr uint32_t microFacor = milliFactor * milliFactor; constexpr uint64_t gigaUnitTransferToUnitTransfer = 1000 * 1000 * 1000; constexpr int32_t memoryBusWidth = 128; // bus width in bits constexpr int32_t numMemoryChannels = 8; #define BITS(x, at, width) (((x) >> (at)) & ((1 << (width)) - 1))compute-runtime-22.14.22890/level_zero/tools/source/sysman/sysman_imp.cpp000066400000000000000000000212321422164147700263550ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/sysman_imp.h" #include "level_zero/core/source/driver/driver.h" #include "level_zero/core/source/driver/driver_handle_imp.h" #include "level_zero/tools/source/sysman/events/events_imp.h" #include "level_zero/tools/source/sysman/global_operations/global_operations_imp.h" #include "level_zero/tools/source/sysman/pci/pci_imp.h" #include "level_zero/tools/source/sysman/sysman.h" #include namespace L0 { SysmanDeviceImp::SysmanDeviceImp(ze_device_handle_t hDevice) { hCoreDevice = hDevice; pOsSysman = OsSysman::create(this); UNRECOVERABLE_IF(nullptr == pOsSysman); pPci = new PciImp(pOsSysman); pPowerHandleContext = new PowerHandleContext(pOsSysman); pFrequencyHandleContext = new FrequencyHandleContext(pOsSysman); pFabricPortHandleContext = new FabricPortHandleContext(pOsSysman); pTempHandleContext = new TemperatureHandleContext(pOsSysman); pStandbyHandleContext = new StandbyHandleContext(pOsSysman); pEngineHandleContext = new EngineHandleContext(pOsSysman); pSchedulerHandleContext = new SchedulerHandleContext(pOsSysman); pRasHandleContext = new RasHandleContext(pOsSysman); pMemoryHandleContext = new MemoryHandleContext(pOsSysman); pGlobalOperations = new GlobalOperationsImp(pOsSysman); pEvents = new EventsImp(pOsSysman); pFanHandleContext = new FanHandleContext(pOsSysman); pFirmwareHandleContext = new FirmwareHandleContext(pOsSysman); pDiagnosticsHandleContext = new DiagnosticsHandleContext(pOsSysman); pPerformanceHandleContext = new PerformanceHandleContext(pOsSysman); } SysmanDeviceImp::~SysmanDeviceImp() { freeResource(pPerformanceHandleContext); freeResource(pDiagnosticsHandleContext); freeResource(pFirmwareHandleContext); freeResource(pFanHandleContext); freeResource(pEvents); freeResource(pGlobalOperations); freeResource(pMemoryHandleContext); freeResource(pRasHandleContext); freeResource(pSchedulerHandleContext); freeResource(pEngineHandleContext); freeResource(pStandbyHandleContext); freeResource(pTempHandleContext); freeResource(pFabricPortHandleContext); freeResource(pPci); freeResource(pFrequencyHandleContext); freeResource(pPowerHandleContext); freeResource(pOsSysman); } void SysmanDeviceImp::updateSubDeviceHandlesLocally() { uint32_t subDeviceCount = 0; deviceHandles.clear(); Device::fromHandle(hCoreDevice)->getSubDevices(&subDeviceCount, nullptr); if (subDeviceCount == 0) { deviceHandles.resize(1, hCoreDevice); } else { deviceHandles.resize(subDeviceCount, nullptr); Device::fromHandle(hCoreDevice)->getSubDevices(&subDeviceCount, deviceHandles.data()); } } void SysmanDeviceImp::getSysmanDeviceInfo(zes_device_handle_t hDevice, uint32_t &subdeviceId, ze_bool_t &onSubdevice) { NEO::Device *neoDevice = Device::fromHandle(hDevice)->getNEODevice(); onSubdevice = static_cast(false); if (NEO::HwHelper::getSubDevicesCount(&neoDevice->getHardwareInfo()) > 1) { onSubdevice = static_cast(true); } if (!neoDevice->isSubDevice()) { // To get physical device or subdeviceIndex Index in case when the device does not support tile architecture is single tile device UNRECOVERABLE_IF(neoDevice->getDeviceBitfield().count() != 1) // or the device is single tile device or AFFINITY_MASK only exposes single tile subdeviceId = Math::log2(static_cast(neoDevice->getDeviceBitfield().to_ulong())); } else { subdeviceId = static_cast(neoDevice)->getSubDeviceIndex(); } } ze_result_t SysmanDeviceImp::init() { // We received a device handle. Check for subdevices in this device updateSubDeviceHandlesLocally(); auto result = pOsSysman->init(); if (ZE_RESULT_SUCCESS != result) { return result; } if (pPowerHandleContext) { pPowerHandleContext->init(deviceHandles, hCoreDevice); } if (pFrequencyHandleContext) { pFrequencyHandleContext->init(deviceHandles); } if (pFabricPortHandleContext) { pFabricPortHandleContext->init(); } if (pTempHandleContext) { pTempHandleContext->init(deviceHandles); } if (pPci) { pPci->init(); } if (pStandbyHandleContext) { pStandbyHandleContext->init(deviceHandles); } if (pEngineHandleContext) { pEngineHandleContext->init(); } if (pSchedulerHandleContext) { pSchedulerHandleContext->init(deviceHandles); } if (pRasHandleContext) { pRasHandleContext->init(deviceHandles); } if (pMemoryHandleContext) { pMemoryHandleContext->init(deviceHandles); } if (pGlobalOperations) { pGlobalOperations->init(); } if (pEvents) { pEvents->init(); } if (pFanHandleContext) { pFanHandleContext->init(); } if (pFirmwareHandleContext) { pFirmwareHandleContext->init(); } if (pDiagnosticsHandleContext) { pDiagnosticsHandleContext->init(deviceHandles); } if (pPerformanceHandleContext) { pPerformanceHandleContext->init(deviceHandles, hCoreDevice); } return result; } ze_result_t SysmanDeviceImp::frequencyGet(uint32_t *pCount, zes_freq_handle_t *phFrequency) { return pFrequencyHandleContext->frequencyGet(pCount, phFrequency); } ze_result_t SysmanDeviceImp::deviceGetProperties(zes_device_properties_t *pProperties) { return pGlobalOperations->deviceGetProperties(pProperties); } ze_result_t SysmanDeviceImp::processesGetState(uint32_t *pCount, zes_process_state_t *pProcesses) { return pGlobalOperations->processesGetState(pCount, pProcesses); } ze_result_t SysmanDeviceImp::deviceReset(ze_bool_t force) { return pGlobalOperations->reset(force); } ze_result_t SysmanDeviceImp::deviceEventRegister(zes_event_type_flags_t events) { return pEvents->eventRegister(events); } bool SysmanDeviceImp::deviceEventListen(zes_event_type_flags_t &pEvent, uint64_t timeout) { return pEvents->eventListen(pEvent, timeout); } ze_result_t SysmanDeviceImp::deviceGetState(zes_device_state_t *pState) { return pGlobalOperations->deviceGetState(pState); } ze_result_t SysmanDeviceImp::pciGetProperties(zes_pci_properties_t *pProperties) { return pPci->pciStaticProperties(pProperties); } ze_result_t SysmanDeviceImp::pciGetState(zes_pci_state_t *pState) { return pPci->pciGetState(pState); } ze_result_t SysmanDeviceImp::pciGetBars(uint32_t *pCount, zes_pci_bar_properties_t *pProperties) { return pPci->pciGetInitializedBars(pCount, pProperties); } ze_result_t SysmanDeviceImp::pciGetStats(zes_pci_stats_t *pStats) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t SysmanDeviceImp::powerGet(uint32_t *pCount, zes_pwr_handle_t *phPower) { return pPowerHandleContext->powerGet(pCount, phPower); } ze_result_t SysmanDeviceImp::engineGet(uint32_t *pCount, zes_engine_handle_t *phEngine) { return pEngineHandleContext->engineGet(pCount, phEngine); } ze_result_t SysmanDeviceImp::standbyGet(uint32_t *pCount, zes_standby_handle_t *phStandby) { return pStandbyHandleContext->standbyGet(pCount, phStandby); } ze_result_t SysmanDeviceImp::fabricPortGet(uint32_t *pCount, zes_fabric_port_handle_t *phPort) { return pFabricPortHandleContext->fabricPortGet(pCount, phPort); } ze_result_t SysmanDeviceImp::temperatureGet(uint32_t *pCount, zes_temp_handle_t *phTemperature) { return pTempHandleContext->temperatureGet(pCount, phTemperature); } ze_result_t SysmanDeviceImp::schedulerGet(uint32_t *pCount, zes_sched_handle_t *phScheduler) { return pSchedulerHandleContext->schedulerGet(pCount, phScheduler); } ze_result_t SysmanDeviceImp::rasGet(uint32_t *pCount, zes_ras_handle_t *phRas) { return pRasHandleContext->rasGet(pCount, phRas); } ze_result_t SysmanDeviceImp::firmwareGet(uint32_t *pCount, zes_firmware_handle_t *phFirmware) { return pFirmwareHandleContext->firmwareGet(pCount, phFirmware); } ze_result_t SysmanDeviceImp::diagnosticsGet(uint32_t *pCount, zes_diag_handle_t *phDiagnostics) { return pDiagnosticsHandleContext->diagnosticsGet(pCount, phDiagnostics); } ze_result_t SysmanDeviceImp::memoryGet(uint32_t *pCount, zes_mem_handle_t *phMemory) { return pMemoryHandleContext->memoryGet(pCount, phMemory); } ze_result_t SysmanDeviceImp::fanGet(uint32_t *pCount, zes_fan_handle_t *phFan) { return pFanHandleContext->fanGet(pCount, phFan); } ze_result_t SysmanDeviceImp::performanceGet(uint32_t *pCount, zes_perf_handle_t *phPerformance) { return pPerformanceHandleContext->performanceGet(pCount, phPerformance); } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/sysman_imp.h000066400000000000000000000074171422164147700260330ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "level_zero/tools/source/sysman/os_sysman.h" #include "level_zero/tools/source/sysman/sysman.h" #include #include namespace L0 { struct SysmanDeviceImp : SysmanDevice, NEO::NonCopyableOrMovableClass { SysmanDeviceImp(ze_device_handle_t hDevice); ~SysmanDeviceImp() override; SysmanDeviceImp() = delete; ze_result_t init(); ze_device_handle_t hCoreDevice = nullptr; std::vector deviceHandles; // handles of all subdevices OsSysman *pOsSysman = nullptr; Pci *pPci = nullptr; GlobalOperations *pGlobalOperations = nullptr; Events *pEvents = nullptr; PowerHandleContext *pPowerHandleContext = nullptr; FrequencyHandleContext *pFrequencyHandleContext = nullptr; FabricPortHandleContext *pFabricPortHandleContext = nullptr; TemperatureHandleContext *pTempHandleContext = nullptr; StandbyHandleContext *pStandbyHandleContext = nullptr; EngineHandleContext *pEngineHandleContext = nullptr; SchedulerHandleContext *pSchedulerHandleContext = nullptr; RasHandleContext *pRasHandleContext = nullptr; MemoryHandleContext *pMemoryHandleContext = nullptr; FanHandleContext *pFanHandleContext = nullptr; FirmwareHandleContext *pFirmwareHandleContext = nullptr; DiagnosticsHandleContext *pDiagnosticsHandleContext = nullptr; PerformanceHandleContext *pPerformanceHandleContext = nullptr; ze_result_t performanceGet(uint32_t *pCount, zes_perf_handle_t *phPerformance) override; ze_result_t powerGet(uint32_t *pCount, zes_pwr_handle_t *phPower) override; ze_result_t frequencyGet(uint32_t *pCount, zes_freq_handle_t *phFrequency) override; ze_result_t fabricPortGet(uint32_t *pCount, zes_fabric_port_handle_t *phPort) override; ze_result_t temperatureGet(uint32_t *pCount, zes_temp_handle_t *phTemperature) override; ze_result_t standbyGet(uint32_t *pCount, zes_standby_handle_t *phStandby) override; ze_result_t deviceGetProperties(zes_device_properties_t *pProperties) override; ze_result_t processesGetState(uint32_t *pCount, zes_process_state_t *pProcesses) override; ze_result_t deviceReset(ze_bool_t force) override; ze_result_t deviceGetState(zes_device_state_t *pState) override; ze_result_t engineGet(uint32_t *pCount, zes_engine_handle_t *phEngine) override; ze_result_t pciGetProperties(zes_pci_properties_t *pProperties) override; ze_result_t pciGetState(zes_pci_state_t *pState) override; ze_result_t pciGetBars(uint32_t *pCount, zes_pci_bar_properties_t *pProperties) override; ze_result_t pciGetStats(zes_pci_stats_t *pStats) override; ze_result_t schedulerGet(uint32_t *pCount, zes_sched_handle_t *phScheduler) override; ze_result_t rasGet(uint32_t *pCount, zes_ras_handle_t *phRas) override; ze_result_t memoryGet(uint32_t *pCount, zes_mem_handle_t *phMemory) override; ze_result_t fanGet(uint32_t *pCount, zes_fan_handle_t *phFan) override; ze_result_t diagnosticsGet(uint32_t *pCount, zes_diag_handle_t *phFirmware) override; ze_result_t firmwareGet(uint32_t *pCount, zes_firmware_handle_t *phFirmware) override; ze_result_t deviceEventRegister(zes_event_type_flags_t events) override; bool deviceEventListen(zes_event_type_flags_t &pEvent, uint64_t timeout) override; static void getSysmanDeviceInfo(zes_device_handle_t hDevice, uint32_t &subdeviceId, ze_bool_t &onSubdevice); void updateSubDeviceHandlesLocally(); private: template void inline freeResource(T *&resource) { if (resource) { delete resource; resource = nullptr; } } }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/temperature/000077500000000000000000000000001422164147700260275ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/temperature/CMakeLists.txt000066400000000000000000000012711422164147700305700ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_TEMPERATURE ${CMAKE_CURRENT_SOURCE_DIR}/temperature.cpp ${CMAKE_CURRENT_SOURCE_DIR}/temperature.h ${CMAKE_CURRENT_SOURCE_DIR}/temperature_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/temperature_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/os_temperature.h ) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_TEMPERATURE} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) add_subdirectories() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_TEMPERATURE ${L0_SRCS_TOOLS_SYSMAN_TEMPERATURE}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/temperature/linux/000077500000000000000000000000001422164147700271665ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/temperature/linux/CMakeLists.txt000066400000000000000000000014601422164147700317270ustar00rootroot00000000000000# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_TEMPERATURE_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/os_temperature_imp.h ) if(NEO_ENABLE_i915_PRELIM_DETECTION) list(APPEND L0_SRCS_TOOLS_SYSMAN_TEMPERATURE_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/os_temperature_imp_prelim.cpp ) else() list(APPEND L0_SRCS_TOOLS_SYSMAN_TEMPERATURE_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/os_temperature_imp.cpp ) endif() if(UNIX) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_TEMPERATURE_LINUX} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_TEMPERATURE_LINUX ${L0_SRCS_TOOLS_SYSMAN_TEMPERATURE_LINUX}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/temperature/linux/os_temperature_imp.cpp000066400000000000000000000137761422164147700336130ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/temperature/linux/os_temperature_imp.h" #include "level_zero/tools/source/sysman/linux/pmt/pmt.h" #include "sysman/linux/os_sysman_imp.h" namespace L0 { constexpr uint32_t numSocTemperatureEntries = 7; // entries would be PCH, DRAM, SA, PSF, DE, PCIE, TYPEC constexpr uint32_t numCoreTemperatureEntries = 4; // entries would be CORE0, CORE1, CORE2, CORE3 constexpr uint32_t numComputeTemperatureEntries = 3; // entries would be IA, GT and LLC constexpr uint32_t invalidMaxTemperature = 125; constexpr uint32_t invalidMinTemperature = 10; ze_result_t LinuxTemperatureImp::getProperties(zes_temp_properties_t *pProperties) { pProperties->type = type; pProperties->onSubdevice = 0; pProperties->subdeviceId = 0; if (isSubdevice) { pProperties->onSubdevice = isSubdevice; pProperties->subdeviceId = subdeviceId; } return ZE_RESULT_SUCCESS; } ze_result_t LinuxTemperatureImp::getGlobalMaxTemperatureNoSubDevice(double *pTemperature) { auto isValidTemperature = [](auto temperature) { if ((temperature > invalidMaxTemperature) || (temperature < invalidMinTemperature)) { return false; } return true; }; auto getMaxTemperature = [&](auto temperature, auto numTemperatureEntries) { uint32_t maxTemperature = 0; for (uint32_t count = 0; count < numTemperatureEntries; count++) { uint32_t localTemperatureVal = (temperature >> (8 * count)) & 0xff; if (isValidTemperature(localTemperatureVal)) { if (localTemperatureVal > maxTemperature) { maxTemperature = localTemperatureVal; } } } return maxTemperature; }; uint32_t computeTemperature = 0; std::string key("COMPUTE_TEMPERATURES"); ze_result_t result = pPmt->readValue(key, computeTemperature); if (result != ZE_RESULT_SUCCESS) { return result; } // Check max temperature among IA, GT and LLC sensors across COMPUTE_TEMPERATURES uint32_t maxComputeTemperature = getMaxTemperature(computeTemperature, numComputeTemperatureEntries); uint32_t coreTemperature = 0; key = "CORE_TEMPERATURES"; result = pPmt->readValue(key, coreTemperature); if (result != ZE_RESULT_SUCCESS) { return result; } // Check max temperature among CORE0, CORE1, CORE2, CORE3 sensors across CORE_TEMPERATURES uint32_t maxCoreTemperature = getMaxTemperature(coreTemperature, numCoreTemperatureEntries); uint64_t socTemperature = 0; key = "SOC_TEMPERATURES"; result = pPmt->readValue(key, socTemperature); if (result != ZE_RESULT_SUCCESS) { return result; } // Check max temperature among PCH, DRAM, SA, PSF, DE, PCIE, TYPEC sensors across SOC_TEMPERATURES uint32_t maxSocTemperature = getMaxTemperature(socTemperature, numSocTemperatureEntries); *pTemperature = static_cast(std::max({maxComputeTemperature, maxCoreTemperature, maxSocTemperature})); return result; } ze_result_t LinuxTemperatureImp::getGlobalMaxTemperature(double *pTemperature) { if (!isSubdevice) { return getGlobalMaxTemperatureNoSubDevice(pTemperature); } return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t LinuxTemperatureImp::getGpuMaxTemperatureNoSubDevice(double *pTemperature) { uint32_t computeTemperature = 0; std::string key("COMPUTE_TEMPERATURES"); ze_result_t result = pPmt->readValue(key, computeTemperature); if (result != ZE_RESULT_SUCCESS) { return result; } // GT temperature could be read via 8th to 15th bit in the value read in temperature computeTemperature = (computeTemperature >> 8) & 0xff; *pTemperature = static_cast(computeTemperature); return ZE_RESULT_SUCCESS; } ze_result_t LinuxTemperatureImp::getGpuMaxTemperature(double *pTemperature) { if (!isSubdevice) { return getGpuMaxTemperatureNoSubDevice(pTemperature); } return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t LinuxTemperatureImp::getMemoryMaxTemperature(double *pTemperature) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t LinuxTemperatureImp::getSensorTemperature(double *pTemperature) { ze_result_t result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; switch (type) { case ZES_TEMP_SENSORS_GLOBAL: result = getGlobalMaxTemperature(pTemperature); if (result != ZE_RESULT_SUCCESS) { return result; } break; case ZES_TEMP_SENSORS_GPU: result = getGpuMaxTemperature(pTemperature); if (result != ZE_RESULT_SUCCESS) { return result; } break; case ZES_TEMP_SENSORS_MEMORY: result = getMemoryMaxTemperature(pTemperature); if (result != ZE_RESULT_SUCCESS) { return result; } break; default: *pTemperature = 0; break; } return result; } bool LinuxTemperatureImp::isTempModuleSupported() { if (!isSubdevice) { if (type == ZES_TEMP_SENSORS_MEMORY) { return false; } } return (pPmt != nullptr); } void LinuxTemperatureImp::setSensorType(zes_temp_sensors_t sensorType) { type = sensorType; } LinuxTemperatureImp::LinuxTemperatureImp(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId) : subdeviceId(subdeviceId), isSubdevice(onSubdevice) { LinuxSysmanImp *pLinuxSysmanImp = static_cast(pOsSysman); pPmt = pLinuxSysmanImp->getPlatformMonitoringTechAccess(subdeviceId); } std::unique_ptr OsTemperature::create(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId, zes_temp_sensors_t sensorType) { std::unique_ptr pLinuxTemperatureImp = std::make_unique(pOsSysman, onSubdevice, subdeviceId); pLinuxTemperatureImp->setSensorType(sensorType); return pLinuxTemperatureImp; } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/temperature/linux/os_temperature_imp.h000066400000000000000000000030031422164147700332360ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "level_zero/tools/source/sysman/temperature/os_temperature.h" #include namespace L0 { class SysfsAccess; class PlatformMonitoringTech; struct Device; class LinuxTemperatureImp : public OsTemperature, NEO::NonCopyableOrMovableClass { public: ze_result_t getProperties(zes_temp_properties_t *pProperties) override; ze_result_t getSensorTemperature(double *pTemperature) override; bool isTempModuleSupported() override; void setSensorType(zes_temp_sensors_t sensorType); LinuxTemperatureImp(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId); LinuxTemperatureImp() = default; ~LinuxTemperatureImp() override = default; protected: PlatformMonitoringTech *pPmt = nullptr; Device *pDevice = nullptr; zes_temp_sensors_t type = ZES_TEMP_SENSORS_GLOBAL; private: ze_result_t getGlobalMaxTemperature(double *pTemperature); ze_result_t getGlobalMinTemperature(double *pTemperature); ze_result_t getGpuMaxTemperature(double *pTemperature); ze_result_t getGpuMinTemperature(double *pTemperature); ze_result_t getMemoryMaxTemperature(double *pTemperature); ze_result_t getGlobalMaxTemperatureNoSubDevice(double *pTemperature); ze_result_t getGpuMaxTemperatureNoSubDevice(double *pTemperature); uint32_t subdeviceId = 0; ze_bool_t isSubdevice = 0; }; } // namespace L0 os_temperature_imp_prelim.cpp000066400000000000000000000213201422164147700350640ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/temperature/linux/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/linux/pmt/pmt.h" #include "level_zero/tools/source/sysman/temperature/linux/os_temperature_imp.h" #include "sysman/linux/os_sysman_imp.h" namespace L0 { constexpr uint32_t numSocTemperatureEntries = 7; // entries would be PCH or GT_TEMP, DRAM, SA, PSF, DE, PCIE, TYPEC constexpr uint32_t numCoreTemperatureEntries = 4; // entries would be CORE0, CORE1, CORE2, CORE3 constexpr uint32_t numComputeTemperatureEntries = 3; // entries would be IA, GT and LLC constexpr uint32_t invalidMaxTemperature = 125; constexpr uint32_t invalidMinTemperature = 10; ze_result_t LinuxTemperatureImp::getProperties(zes_temp_properties_t *pProperties) { pProperties->type = type; pProperties->onSubdevice = 0; pProperties->subdeviceId = 0; if (isSubdevice) { pProperties->onSubdevice = isSubdevice; pProperties->subdeviceId = subdeviceId; } return ZE_RESULT_SUCCESS; } ze_result_t LinuxTemperatureImp::getGlobalMaxTemperatureNoSubDevice(double *pTemperature) { auto isValidTemperature = [](auto temperature) { if ((temperature > invalidMaxTemperature) || (temperature < invalidMinTemperature)) { return false; } return true; }; auto getMaxTemperature = [&](auto temperature, auto numTemperatureEntries) { uint32_t maxTemperature = 0; for (uint32_t count = 0; count < numTemperatureEntries; count++) { uint32_t localTemperatureVal = (temperature >> (8 * count)) & 0xff; if (isValidTemperature(localTemperatureVal)) { if (localTemperatureVal > maxTemperature) { maxTemperature = localTemperatureVal; } } } return maxTemperature; }; ze_result_t result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; uint32_t maxComputeTemperature = 0; uint32_t maxCoreTemperature = 0; std::string key; auto productFamily = pDevice->getNEODevice()->getHardwareInfo().platform.eProductFamily; if (productFamily == IGFX_DG1) { uint32_t computeTemperature = 0; key = "COMPUTE_TEMPERATURES"; result = pPmt->readValue(key, computeTemperature); if (result != ZE_RESULT_SUCCESS) { return result; } // Check max temperature among IA, GT and LLC sensors across COMPUTE_TEMPERATURES maxComputeTemperature = getMaxTemperature(computeTemperature, numComputeTemperatureEntries); uint32_t coreTemperature = 0; key = "CORE_TEMPERATURES"; result = pPmt->readValue(key, coreTemperature); if (result != ZE_RESULT_SUCCESS) { return result; } // Check max temperature among CORE0, CORE1, CORE2, CORE3 sensors across CORE_TEMPERATURES maxCoreTemperature = getMaxTemperature(coreTemperature, numCoreTemperatureEntries); } // SOC_TEMPERATURES is present in all product families uint64_t socTemperature = 0; key = "SOC_TEMPERATURES"; result = pPmt->readValue(key, socTemperature); if (result != ZE_RESULT_SUCCESS) { return result; } // Check max temperature among possible sensors like PCH or GT_TEMP, DRAM, SA, PSF, DE, PCIE, TYPEC across SOC_TEMPERATURES uint32_t maxSocTemperature = getMaxTemperature(socTemperature, numSocTemperatureEntries); *pTemperature = static_cast(std::max({maxComputeTemperature, maxCoreTemperature, maxSocTemperature})); return result; } ze_result_t LinuxTemperatureImp::getGlobalMaxTemperature(double *pTemperature) { if (!isSubdevice) { return getGlobalMaxTemperatureNoSubDevice(pTemperature); } uint32_t globalMaxTemperature = 0; std::string key("TileMaxTemperature"); ze_result_t result = pPmt->readValue(key, globalMaxTemperature); if (result != ZE_RESULT_SUCCESS) { return result; } *pTemperature = static_cast(globalMaxTemperature); return result; } ze_result_t LinuxTemperatureImp::getGpuMaxTemperatureNoSubDevice(double *pTemperature) { double gpuMaxTemperature = 0; uint64_t socTemperature = 0; // Gpu temperature is obtained from GT_TEMP in SOC_TEMPERATURE's bit 0 to 7. std::string key = "SOC_TEMPERATURES"; auto result = pPmt->readValue(key, socTemperature); if (result != ZE_RESULT_SUCCESS) { return result; } gpuMaxTemperature = static_cast(socTemperature & 0xff); auto productFamily = pDevice->getNEODevice()->getHardwareInfo().platform.eProductFamily; if (productFamily == IGFX_DG1) { // In DG1 platform, Gpu Max Temperature is obtained from COMPUTE_TEMPERATURE only uint32_t computeTemperature = 0; std::string key("COMPUTE_TEMPERATURES"); ze_result_t result = pPmt->readValue(key, computeTemperature); if (result != ZE_RESULT_SUCCESS) { return result; } // GT temperature could be read via 8th to 15th bit in the value read in temperature computeTemperature = (computeTemperature >> 8) & 0xff; gpuMaxTemperature = static_cast(computeTemperature); } *pTemperature = gpuMaxTemperature; return ZE_RESULT_SUCCESS; } ze_result_t LinuxTemperatureImp::getGpuMaxTemperature(double *pTemperature) { if (!isSubdevice) { return getGpuMaxTemperatureNoSubDevice(pTemperature); } uint32_t gpuMaxTemperature = 0; std::string key("GTMaxTemperature"); ze_result_t result = pPmt->readValue(key, gpuMaxTemperature); if (result != ZE_RESULT_SUCCESS) { return result; } *pTemperature = static_cast(gpuMaxTemperature); return result; } ze_result_t LinuxTemperatureImp::getMemoryMaxTemperature(double *pTemperature) { ze_result_t result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; uint32_t numHbmModules = 0u; auto productFamily = pDevice->getNEODevice()->getHardwareInfo().platform.eProductFamily; if (productFamily == IGFX_XE_HP_SDV) { numHbmModules = 2u; } else if (productFamily == IGFX_PVC) { numHbmModules = 4u; } else { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } std::vector maxDeviceTemperatureList; for (auto hbmModuleIndex = 0u; hbmModuleIndex < numHbmModules; hbmModuleIndex++) { uint32_t maxDeviceTemperature = 0; // To read HBM 0's max device temperature key would be HBM0MaxDeviceTemperature std::string key = "HBM" + std::to_string(hbmModuleIndex) + "MaxDeviceTemperature"; result = pPmt->readValue(key, maxDeviceTemperature); if (result != ZE_RESULT_SUCCESS) { return result; } maxDeviceTemperatureList.push_back(maxDeviceTemperature); } *pTemperature = static_cast(*std::max_element(maxDeviceTemperatureList.begin(), maxDeviceTemperatureList.end())); return result; } ze_result_t LinuxTemperatureImp::getSensorTemperature(double *pTemperature) { ze_result_t result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; switch (type) { case ZES_TEMP_SENSORS_GLOBAL: result = getGlobalMaxTemperature(pTemperature); if (result != ZE_RESULT_SUCCESS) { return result; } break; case ZES_TEMP_SENSORS_GPU: result = getGpuMaxTemperature(pTemperature); if (result != ZE_RESULT_SUCCESS) { return result; } break; case ZES_TEMP_SENSORS_MEMORY: result = getMemoryMaxTemperature(pTemperature); if (result != ZE_RESULT_SUCCESS) { return result; } break; default: *pTemperature = 0; result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; break; } return result; } bool LinuxTemperatureImp::isTempModuleSupported() { if (!isSubdevice) { if (type == ZES_TEMP_SENSORS_MEMORY) { return false; } } return (pPmt != nullptr); } void LinuxTemperatureImp::setSensorType(zes_temp_sensors_t sensorType) { type = sensorType; } LinuxTemperatureImp::LinuxTemperatureImp(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId) : subdeviceId(subdeviceId), isSubdevice(onSubdevice) { LinuxSysmanImp *pLinuxSysmanImp = static_cast(pOsSysman); pPmt = pLinuxSysmanImp->getPlatformMonitoringTechAccess(subdeviceId); pDevice = pLinuxSysmanImp->getDeviceHandle(); } std::unique_ptr OsTemperature::create(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId, zes_temp_sensors_t sensorType) { std::unique_ptr pLinuxTemperatureImp = std::make_unique(pOsSysman, onSubdevice, subdeviceId); pLinuxTemperatureImp->setSensorType(sensorType); return pLinuxTemperatureImp; } } // namespace L0compute-runtime-22.14.22890/level_zero/tools/source/sysman/temperature/os_temperature.h000066400000000000000000000011631422164147700312370ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include #include namespace L0 { struct OsSysman; class OsTemperature { public: virtual ze_result_t getProperties(zes_temp_properties_t *pProperties) = 0; virtual ze_result_t getSensorTemperature(double *pTemperature) = 0; virtual bool isTempModuleSupported() = 0; static std::unique_ptr create(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId, zes_temp_sensors_t sensorType); virtual ~OsTemperature() = default; }; } // namespace L0compute-runtime-22.14.22890/level_zero/tools/source/sysman/temperature/temperature.cpp000066400000000000000000000030511422164147700310670ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/basic_math.h" #include "level_zero/tools/source/sysman/temperature/temperature_imp.h" namespace L0 { TemperatureHandleContext::~TemperatureHandleContext() { for (Temperature *pTemperature : handleList) { delete pTemperature; } } void TemperatureHandleContext::createHandle(const ze_device_handle_t &deviceHandle, zes_temp_sensors_t type) { Temperature *pTemperature = new TemperatureImp(deviceHandle, pOsSysman, type); if (pTemperature->initSuccess == true) { handleList.push_back(pTemperature); } else { delete pTemperature; } } void TemperatureHandleContext::init(std::vector &deviceHandles) { for (const auto &deviceHandle : deviceHandles) { createHandle(deviceHandle, ZES_TEMP_SENSORS_GLOBAL); createHandle(deviceHandle, ZES_TEMP_SENSORS_GPU); createHandle(deviceHandle, ZES_TEMP_SENSORS_MEMORY); } } ze_result_t TemperatureHandleContext::temperatureGet(uint32_t *pCount, zes_temp_handle_t *phTemperature) { uint32_t handleListSize = static_cast(handleList.size()); uint32_t numToCopy = std::min(*pCount, handleListSize); if (0 == *pCount || *pCount > handleListSize) { *pCount = handleListSize; } if (nullptr != phTemperature) { for (uint32_t i = 0; i < numToCopy; i++) { phTemperature[i] = handleList[i]->toHandle(); } } return ZE_RESULT_SUCCESS; } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/temperature/temperature.h000066400000000000000000000025641422164147700305440ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include #include struct _zes_temp_handle_t { virtual ~_zes_temp_handle_t() = default; }; namespace L0 { struct OsSysman; class Temperature : _zes_temp_handle_t { public: virtual ze_result_t temperatureGetProperties(zes_temp_properties_t *pProperties) = 0; virtual ze_result_t temperatureGetConfig(zes_temp_config_t *pConfig) = 0; virtual ze_result_t temperatureSetConfig(const zes_temp_config_t *pConfig) = 0; virtual ze_result_t temperatureGetState(double *pTemperature) = 0; static Temperature *fromHandle(zes_temp_handle_t handle) { return static_cast(handle); } inline zes_temp_handle_t toHandle() { return this; } bool initSuccess = false; zes_temp_properties_t tempProperties = {}; }; struct TemperatureHandleContext { TemperatureHandleContext(OsSysman *pOsSysman) : pOsSysman(pOsSysman){}; ~TemperatureHandleContext(); void init(std::vector &deviceHandles); ze_result_t temperatureGet(uint32_t *pCount, zes_temp_handle_t *phTemperature); OsSysman *pOsSysman = nullptr; std::vector handleList = {}; private: void createHandle(const ze_device_handle_t &deviceHandle, zes_temp_sensors_t type); }; } // namespace L0compute-runtime-22.14.22890/level_zero/tools/source/sysman/temperature/temperature_imp.cpp000066400000000000000000000027531422164147700317440ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/temperature/temperature_imp.h" #include "level_zero/core/source/device/device.h" namespace L0 { ze_result_t TemperatureImp::temperatureGetProperties(zes_temp_properties_t *pProperties) { *pProperties = tempProperties; return ZE_RESULT_SUCCESS; } ze_result_t TemperatureImp::temperatureGetConfig(zes_temp_config_t *pConfig) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t TemperatureImp::temperatureSetConfig(const zes_temp_config_t *pConfig) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t TemperatureImp::temperatureGetState(double *pTemperature) { return pOsTemperature->getSensorTemperature(pTemperature); } void TemperatureImp::init() { if (pOsTemperature->isTempModuleSupported()) { pOsTemperature->getProperties(&tempProperties); this->initSuccess = true; } } TemperatureImp::TemperatureImp(const ze_device_handle_t &deviceHandle, OsSysman *pOsSysman, zes_temp_sensors_t type) { ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; Device::fromHandle(deviceHandle)->getProperties(&deviceProperties); pOsTemperature = OsTemperature::create(pOsSysman, deviceProperties.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE, deviceProperties.subdeviceId, type); init(); } TemperatureImp::~TemperatureImp() { } } // namespace L0compute-runtime-22.14.22890/level_zero/tools/source/sysman/temperature/temperature_imp.h000066400000000000000000000017341422164147700314070ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "level_zero/tools/source/sysman/temperature/os_temperature.h" #include "level_zero/tools/source/sysman/temperature/temperature.h" namespace L0 { class TemperatureImp : public Temperature, NEO::NonCopyableOrMovableClass { public: ze_result_t temperatureGetProperties(zes_temp_properties_t *pProperties) override; ze_result_t temperatureGetConfig(zes_temp_config_t *pConfig) override; ze_result_t temperatureSetConfig(const zes_temp_config_t *pConfig) override; ze_result_t temperatureGetState(double *pTemperature) override; TemperatureImp() = default; TemperatureImp(const ze_device_handle_t &deviceHandle, OsSysman *pOsSysman, zes_temp_sensors_t type); ~TemperatureImp() override; std::unique_ptr pOsTemperature = nullptr; void init(); }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/temperature/windows/000077500000000000000000000000001422164147700275215ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/temperature/windows/CMakeLists.txt000066400000000000000000000011111422164147700322530ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_TEMPERATURE_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/os_temperature_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/os_temperature_imp.cpp ) if(WIN32) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_TEMPERATURE_WINDOWS} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_TEMPERATURE_WINDOWS ${L0_SRCS_TOOLS_SYSMAN_TEMPERATURE_WINDOWS}) os_temperature_imp.cpp000066400000000000000000000120771422164147700340600ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/temperature/windows/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "sysman/temperature/windows/os_temperature_imp.h" namespace L0 { ze_result_t WddmTemperatureImp::getProperties(zes_temp_properties_t *pProperties) { uint32_t value = 0; std::vector vRequests = {}; std::vector vResponses = {}; KmdSysman::RequestProperty request = {}; pProperties->type = this->type; pProperties->onSubdevice = false; pProperties->subdeviceId = 0; request.commandId = KmdSysman::Command::Get; request.componentId = KmdSysman::Component::TemperatureComponent; switch (this->type) { case ZES_TEMP_SENSORS_GLOBAL: request.paramInfo = KmdSysman::TemperatureDomainsType::TemperatureDomainPackage; break; case ZES_TEMP_SENSORS_GPU: request.paramInfo = KmdSysman::TemperatureDomainsType::TemperatureDomainDGPU; break; case ZES_TEMP_SENSORS_MEMORY: request.paramInfo = KmdSysman::TemperatureDomainsType::TemperatureDomainPackage; break; default: return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; break; } request.requestId = KmdSysman::Requests::Temperature::TempCriticalEventSupported; vRequests.push_back(request); request.requestId = KmdSysman::Requests::Temperature::TempThreshold1EventSupported; vRequests.push_back(request); request.requestId = KmdSysman::Requests::Temperature::TempThreshold2EventSupported; vRequests.push_back(request); request.requestId = KmdSysman::Requests::Temperature::MaxTempSupported; vRequests.push_back(request); ze_result_t status = pKmdSysManager->requestMultiple(vRequests, vResponses); if ((status != ZE_RESULT_SUCCESS) || (vResponses.size() != vRequests.size())) { return status; } if (vResponses[0].returnCode == KmdSysman::Success) { memcpy_s(&pProperties->isCriticalTempSupported, sizeof(ze_bool_t), vResponses[0].dataBuffer, sizeof(ze_bool_t)); } if (vResponses[1].returnCode == KmdSysman::Success) { memcpy_s(&pProperties->isThreshold1Supported, sizeof(ze_bool_t), vResponses[1].dataBuffer, sizeof(ze_bool_t)); } if (vResponses[2].returnCode == KmdSysman::Success) { memcpy_s(&pProperties->isThreshold2Supported, sizeof(ze_bool_t), vResponses[2].dataBuffer, sizeof(ze_bool_t)); } if (vResponses[3].returnCode == KmdSysman::Success) { memcpy_s(&value, sizeof(uint32_t), vResponses[3].dataBuffer, sizeof(uint32_t)); pProperties->maxTemperature = static_cast(value); } return ZE_RESULT_SUCCESS; } ze_result_t WddmTemperatureImp::getSensorTemperature(double *pTemperature) { ze_result_t status = ZE_RESULT_SUCCESS; KmdSysman::RequestProperty request; KmdSysman::ResponseProperty response; request.commandId = KmdSysman::Command::Get; request.componentId = KmdSysman::Component::TemperatureComponent; request.requestId = KmdSysman::Requests::Temperature::CurrentTemperature; switch (type) { case ZES_TEMP_SENSORS_GLOBAL: request.paramInfo = KmdSysman::TemperatureDomainsType::TemperatureDomainPackage; break; case ZES_TEMP_SENSORS_GPU: request.paramInfo = KmdSysman::TemperatureDomainsType::TemperatureDomainDGPU; break; case ZES_TEMP_SENSORS_MEMORY: request.paramInfo = KmdSysman::TemperatureDomainsType::TemperatureDomainHBM; break; default: *pTemperature = 0; return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; break; } status = pKmdSysManager->requestSingle(request, response); if (status != ZE_RESULT_SUCCESS) { return status; } uint32_t value = 0; memcpy_s(&value, sizeof(uint32_t), response.dataBuffer, sizeof(uint32_t)); *pTemperature = static_cast(value); return status; } bool WddmTemperatureImp::isTempModuleSupported() { if ((type == ZES_TEMP_SENSORS_GLOBAL_MIN) || (type == ZES_TEMP_SENSORS_GPU_MIN)) { return false; } KmdSysman::RequestProperty request; KmdSysman::ResponseProperty response; request.paramInfo = static_cast(type); request.commandId = KmdSysman::Command::Get; request.componentId = KmdSysman::Component::TemperatureComponent; request.requestId = KmdSysman::Requests::Temperature::CurrentTemperature; return (pKmdSysManager->requestSingle(request, response) == ZE_RESULT_SUCCESS); } void WddmTemperatureImp::setSensorType(zes_temp_sensors_t sensorType) { type = sensorType; } WddmTemperatureImp::WddmTemperatureImp(OsSysman *pOsSysman) { WddmSysmanImp *pWddmSysmanImp = static_cast(pOsSysman); pKmdSysManager = &pWddmSysmanImp->getKmdSysManager(); } std::unique_ptr OsTemperature::create(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId, zes_temp_sensors_t sensorType) { std::unique_ptr pWddmTemperatureImp = std::make_unique(pOsSysman); pWddmTemperatureImp->setSensorType(sensorType); return std::move(pWddmTemperatureImp); } } // namespace L0compute-runtime-22.14.22890/level_zero/tools/source/sysman/temperature/windows/os_temperature_imp.h000066400000000000000000000015641422164147700336030ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "sysman/temperature/os_temperature.h" #include "sysman/windows/os_sysman_imp.h" namespace L0 { class KmdSysManager; class WddmTemperatureImp : public OsTemperature, NEO::NonCopyableOrMovableClass { public: ze_result_t getProperties(zes_temp_properties_t *pProperties) override; ze_result_t getSensorTemperature(double *pTemperature) override; bool isTempModuleSupported() override; void setSensorType(zes_temp_sensors_t sensorType); WddmTemperatureImp(OsSysman *pOsSysman); WddmTemperatureImp() = default; ~WddmTemperatureImp() override = default; protected: KmdSysManager *pKmdSysManager = nullptr; zes_temp_sensors_t type = ZES_TEMP_SENSORS_GLOBAL; }; } // namespace L0compute-runtime-22.14.22890/level_zero/tools/source/sysman/windows/000077500000000000000000000000001422164147700251645ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/source/sysman/windows/CMakeLists.txt000066400000000000000000000012441422164147700277250ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/os_sysman_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/kmd_sys.h ${CMAKE_CURRENT_SOURCE_DIR}/kmd_sys_manager.h ${CMAKE_CURRENT_SOURCE_DIR}/kmd_sys_manager.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_sysman_imp.cpp ) if(WIN32) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_WINDOWS} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_WINDOWS ${L0_SRCS_TOOLS_SYSMAN_WINDOWS}) compute-runtime-22.14.22890/level_zero/tools/source/sysman/windows/kmd_sys.h000066400000000000000000000274141422164147700270160ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include namespace L0 { namespace KmdSysman { constexpr uint32_t KmdMaxBufferSize = 2048; constexpr uint32_t MaxPropertyBufferSize = 128; constexpr uint32_t PcEscapeOperation = 35; constexpr uint32_t KmdSysmanSuccess = 0; constexpr uint32_t KmdSysmanFail = 1; constexpr uint32_t KmdMajorVersion = 1; constexpr uint32_t KmdMinorVersion = 0; constexpr uint32_t KmdPatchNumber = 0; struct GfxSysmanMainHeaderIn { uint32_t inVersion; uint32_t inNumElements; uint32_t inTotalsize; uint8_t inBuffer[KmdMaxBufferSize]; }; struct GfxSysmanMainHeaderOut { uint32_t outStatus; uint32_t outNumElements; uint32_t outTotalSize; uint8_t outBuffer[KmdMaxBufferSize]; }; struct GfxSysmanReqHeaderIn { uint32_t inRequestId; uint32_t inCommand; uint32_t inComponent; uint32_t inCommandParam; uint32_t inDataSize; }; struct GfxSysmanReqHeaderOut { uint32_t outRequestId; uint32_t outComponent; uint32_t outReturnCode; uint32_t outDataSize; }; enum Command { Get = 0, Set, RegisterEvent, UnregisterEvent, MaxCommands, }; enum Events { EnergyThresholdCrossed = 0, EnterD0, EnterD3, EnterTDR, ExitTDR, FrequencyThrottled, CriticalTemperature, TemperatureThreshold1, TemperatureThreshold2, ResetDeviceRequired, MaxEvents, }; enum Component { InterfaceProperties = 0, PowerComponent, FrequencyComponent, ActivityComponent, FanComponent, TemperatureComponent, FpsComponent, SchedulerComponent, MemoryComponent, PciComponent, GlobalOperationsComponent, MaxComponents, }; namespace Requests { enum Interface { InterfaceVersion = 0, MaxInterfaceRequests, }; enum Power { NumPowerDomains = 0, // support / enabled EnergyThresholdSupported, EnergyThresholdEnabled, PowerLimit1Enabled, PowerLimit2Enabled, // default fused values PowerLimit1Default, PowerLimit2Default, PowerLimit1TauDefault, PowerLimit4AcDefault, PowerLimit4DcDefault, EnergyThresholdDefault, TdpDefault, MinPowerLimitDefault, MaxPowerLimitDefault, EnergyCounterUnits, // current runtime values CurrentPowerLimit1, CurrentPowerLimit2, CurrentPowerLimit1Tau, CurrentPowerLimit4Ac, CurrentPowerLimit4Dc, CurrentEnergyThreshold, DisableEnergyThreshold, CurrentEnergyCounter, MaxPowerRequests, }; enum Activity { NumActivityDomains = 0, // default fused values ActivityCounterNumberOfBits, ActivityCounterFrequency, TimestampFrequency, // current runtime values CurrentActivityCounter, MaxActivityRequests, }; enum Temperature { NumTemperatureDomains = 0, // support / enabled TempCriticalEventSupported, TempThreshold1EventSupported, TempThreshold2EventSupported, TempCriticalEventEnabled, TempThreshold1EventEnabled, TempThreshold2EventEnabled, // default fused values MaxTempSupported, // current runtime values CurrentTemperature, MaxTemperatureRequests, }; enum Frequency { NumFrequencyDomains = 0, // support / enabled FrequencyOcSupported, VoltageOverrideSupported, VoltageOffsetSupported, HighVoltageModeSupported, ExtendedOcSupported, FixedModeSupported, HighVoltageEnabled, CanControlFrequency, FrequencyThrottledEventSupported, // default fused values TjMaxDefault, IccMaxDefault, MaxOcFrequencyDefault, MaxNonOcFrequencyDefault, MaxOcVoltageDefault, MaxNonOcVoltageDefault, FrequencyRangeMinDefault, FrequencyRangeMaxDefault, // current runtime values CurrentFrequencyTarget, CurrentVoltageTarget, CurrentVoltageOffset, CurrentVoltageMode, CurrentFixedMode, CurrentTjMax, CurrentIccMax, CurrentVoltage, CurrentRequestedFrequency, CurrentTdpFrequency, CurrentEfficientFrequency, CurrentResolvedFrequency, CurrentThrottleReasons, CurrentThrottleTime, CurrentFrequencyRange, MaxFrequencyRequests, }; enum Fans { NumFanDomains = 0, // default fused values MaxFanControlPointsSupported, MaxFanSpeedSupported, // current runtime values CurrentNumOfControlPoints, CurrentFanPoint, CurrentFanSpeed, MaxFanRequests, }; enum Fps { NumFpsDomains = 0, IsDisplayAttached, InstRenderTime, TimeToFlip, AvgFps, AvgRenderTime, AvgInstFps, MaxFpsRequests, }; enum Scheduler { NumSchedulerDomains = 0, MaxSchedulerRequests, }; enum Memory { NumMemoryDomains = 0, // default fused values MemoryType, MemoryLocation, PhysicalSize, StolenSize, SystemSize, DedicatedSize, MemoryWidth, NumChannels, MaxBandwidth, // current runtime values CurrentBandwidthRead, CurrentBandwidthWrite, CurrentFreeMemorySize, CurrentTotalAllocableMem, MaxMemoryRequests }; enum Pci { NumPciDomains = 0, // support / enabled BandwidthCountersSupported, PacketCountersSupported, ReplayCountersSupported, // default fused values DeviceId, VendorId, Domain, Bus, Device, Function, Gen, DevType, MaxLinkWidth, MaxLinkSpeed, BusInterface, BusWidth, BarType, BarIndex, BarBase, BarSize, // current runtime values CurrentLinkWidth, CurrentLinkSpeed, CurrentLinkStatus, CurrentLinkQualityFlags, CurrentLinkStabilityFlags, CurrentLinkReplayCounter, CurrentLinkPacketCounter, CurrentLinkRxCounter, CurrentLinkTxCounter, // resizable bar ResizableBarSupported, ResizableBarEnabled, MaxPciRequests, }; enum GlobalOperation { NumGlobalOperationDomains = 0, TriggerDeviceLevelReset }; } // namespace Requests enum FlipType { MMIOFlip = 0, MMIOAsyncFlip, DMAFlip, DMAAsyncFlip, MaxFlipTypes, }; enum GeneralDomainsType { GeneralDomainDGPU = 0, GeneralDomainHBM, GeneralDomainMaxTypes, }; enum TemperatureDomainsType { TemperatureDomainPackage = 0, TemperatureDomainDGPU, TemperatureDomainHBM, TempetatureMaxDomainTypes, }; enum ActivityDomainsType { ActitvityDomainGT = 0, ActivityDomainRenderCompute, ActivityDomainMedia, ActivityDomainMaxTypes, }; enum PciDomainsType { PciCurrentDevice = 0, PciParentDevice, PciRootPort, PciDomainMaxTypes, }; enum MemoryType { DDR4 = 0, DDR5, LPDDR5, LPDDR4, DDR3, LPDDR3, GDDR4, GDDR5, GDDR5X, GDDR6, GDDR6X, GDDR7, UknownMemType, MaxMemoryTypes, }; enum MemoryWidthType { MemWidth8x = 0, MemWidth16x, MemWidth32x, UnknownMemWidth, MaxMemoryWidthTypes, }; enum MemoryLocationsType { SystemMemory = 0, DeviceMemory, UnknownMemoryLocation, MaxMemoryLocationTypes, }; enum PciGensType { PciGen1_1 = 0, PciGen2_0, PciGen3_0, PciGen4_0, UnknownPciGen, MaxPciGenTypes, }; enum PciLinkSpeedType { UnknownPciLinkSpeed = 0, PciLinkSpeed2_5 = 1, PciLinkSpeed5_0, PciLinkSpeed8_0, PciLinkSpeed16_0, MaxPciLinkSpeedTypes, }; enum ReturnCodes { Success = 0, PcuError, IllegalCommand, TimeOut, IllegalData, IllegalSubCommand, OverclockingLocked, DomainServiceNotSupported, FrequencyExceedsMax, VoltageExceedsMax, OverclockingNotSupported, InvalidVr, InvalidIccMax, VoltageOverrideDisabled, ServiceNotAvailable, InvalidRequestType, InvalidComponent, BufferNotLargeEnough, GetNotSupported, SetNotSupported, MissingProperties, InvalidEvent, CreateEventError, ErrorVersion, ErrorSize, ErrorNoElements, ErrorBufferCorrupted, VTNotSupported, NotInitialized, PropertyNotSet, InvalidFlipType, }; enum PciLinkWidthType { PciLinkWidth1x = 0, PciLinkWidth2x, PciLinkWidth4x, PciLinkWidth8x, PciLinkWidth12x, PciLinkWidth16x, PciLinkWidth32x, UnknownPciLinkWidth, MaxPciLinkWidthTypes, }; struct KmdSysmanVersion { KmdSysmanVersion() : data(0) {} union { struct { uint32_t reservedBits : 8; uint32_t majorVersion : 8; uint32_t minorVersion : 8; uint32_t patchNumber : 8; }; uint32_t data; }; }; struct RequestProperty { RequestProperty() : requestId(0), commandId(0), componentId(0), paramInfo(0), dataSize(0) {} RequestProperty(const RequestProperty &other) { requestId = other.requestId; commandId = other.commandId; componentId = other.componentId; paramInfo = other.paramInfo; dataSize = other.dataSize; if (other.dataSize > 0 && other.dataSize < MaxPropertyBufferSize && other.dataBuffer) { memcpy_s(dataBuffer, other.dataSize, other.dataBuffer, other.dataSize); } } RequestProperty(uint32_t _requestId, uint32_t _commandId, uint32_t _componentId, uint32_t _paramInfo, uint32_t _dataSize, uint8_t *_dataBuffer) { requestId = _requestId; commandId = _commandId; componentId = _componentId; paramInfo = _paramInfo; dataSize = _dataSize; if (dataSize > 0 && dataSize < MaxPropertyBufferSize && _dataBuffer) { memcpy_s(dataBuffer, dataSize, _dataBuffer, dataSize); } } RequestProperty &operator=(const RequestProperty &other) { requestId = other.requestId; commandId = other.commandId; componentId = other.componentId; paramInfo = other.paramInfo; dataSize = other.dataSize; if (other.dataSize > 0 && other.dataSize < MaxPropertyBufferSize && other.dataBuffer) { memcpy_s(dataBuffer, other.dataSize, other.dataBuffer, other.dataSize); } return *this; } uint32_t requestId; uint32_t commandId; uint32_t componentId; uint32_t paramInfo; uint32_t dataSize; uint8_t dataBuffer[MaxPropertyBufferSize] = {0}; }; struct ResponseProperty { ResponseProperty() : requestId(0), returnCode(0), componentId(0), dataSize(0) {} ResponseProperty(const ResponseProperty &other) { requestId = other.requestId; returnCode = other.returnCode; componentId = other.componentId; dataSize = other.dataSize; if (other.dataSize > 0 && other.dataSize < MaxPropertyBufferSize && other.dataBuffer) { memcpy_s(dataBuffer, other.dataSize, other.dataBuffer, other.dataSize); } } ResponseProperty(uint32_t _requestId, uint32_t _returnCode, uint32_t _componentId, uint32_t _dataSize, uint8_t *_dataBuffer) { requestId = _requestId; returnCode = _returnCode; componentId = _componentId; dataSize = _dataSize; if (dataSize > 0 && dataSize < MaxPropertyBufferSize && _dataBuffer) { memcpy_s(dataBuffer, dataSize, _dataBuffer, dataSize); } } ResponseProperty &operator=(const ResponseProperty &other) { this->requestId = other.requestId; this->returnCode = other.returnCode; this->componentId = other.componentId; this->dataSize = other.dataSize; if (other.dataSize > 0 && other.dataSize < MaxPropertyBufferSize && other.dataBuffer) { memcpy_s(this->dataBuffer, other.dataSize, other.dataBuffer, other.dataSize); } return *this; } uint32_t requestId; uint32_t returnCode; uint32_t componentId; uint32_t dataSize; uint8_t dataBuffer[MaxPropertyBufferSize] = {0}; }; } // namespace KmdSysman } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/windows/kmd_sys_manager.cpp000066400000000000000000000212611422164147700310350ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/windows/kmd_sys_manager.h" #include "gfxEscape.h" namespace L0 { struct PcEscapeInfo { GFX_ESCAPE_HEADER_T headerGfx; uint32_t escapeOpInput; uint32_t escapeErrorType; uint32_t dataInSize; uint64_t pDataIn; uint32_t dataOutSize; uint64_t pDataOut; }; uint32_t sumOfBufferData(void *pBuffer, uint32_t bufferSize) { uint32_t index; uint32_t ulCheckSum; uint32_t numOfUnsignedLongs = bufferSize / sizeof(uint32_t); uint32_t *pElement = static_cast(pBuffer); ulCheckSum = 0; for (index = 0; index < numOfUnsignedLongs; index++) { ulCheckSum += *pElement; pElement++; } return ulCheckSum; } KmdSysManager::KmdSysManager(NEO::Wddm *pWddm) { pWddmAccess = pWddm; } KmdSysManager *KmdSysManager::create(NEO::Wddm *pWddm) { return new KmdSysManager(pWddm); } ze_result_t KmdSysManager::requestSingle(KmdSysman::RequestProperty &inputRequest, KmdSysman::ResponseProperty &outputResponse) { KmdSysman::GfxSysmanMainHeaderIn inMainHeader; KmdSysman::GfxSysmanMainHeaderOut outMainHeader; memset(&inMainHeader, 0, sizeof(KmdSysman::GfxSysmanMainHeaderIn)); memset(&outMainHeader, 0, sizeof(KmdSysman::GfxSysmanMainHeaderOut)); std::vector vectorInput; vectorInput.push_back(inputRequest); if (!parseBufferIn(&inMainHeader, vectorInput)) { return ZE_RESULT_ERROR_INVALID_SIZE; } KmdSysman::KmdSysmanVersion versionKmd; versionKmd.data = 0; versionKmd.majorVersion = KmdSysman::KmdMajorVersion; versionKmd.minorVersion = KmdSysman::KmdMinorVersion; versionKmd.patchNumber = KmdSysman::KmdPatchNumber; inMainHeader.inVersion = versionKmd.data; uint64_t inPointerToLongInt = reinterpret_cast(&inMainHeader); uint64_t outPointerToLongInt = reinterpret_cast(&outMainHeader); auto status = escape(KmdSysman::PcEscapeOperation, inPointerToLongInt, sizeof(KmdSysman::GfxSysmanMainHeaderIn), outPointerToLongInt, sizeof(KmdSysman::GfxSysmanMainHeaderOut)); if (status) { std::vector vecOutput; if (!parseBufferOut(&outMainHeader, vecOutput)) { return ZE_RESULT_ERROR_INVALID_SIZE; } if (vecOutput.size() > 0) { outputResponse = vecOutput[0]; } else { return ZE_RESULT_ERROR_INVALID_SIZE; } return (outputResponse.returnCode == KmdSysman::KmdSysmanSuccess) ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_NOT_AVAILABLE; } return (status) ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_INVALID_ARGUMENT; } ze_result_t KmdSysManager::requestMultiple(std::vector &inputRequest, std::vector &outputResponse) { KmdSysman::GfxSysmanMainHeaderIn inMainHeader; KmdSysman::GfxSysmanMainHeaderOut outMainHeader; if (inputRequest.size() == 0) { return ZE_RESULT_ERROR_INVALID_NULL_HANDLE; } memset(&inMainHeader, 0, sizeof(KmdSysman::GfxSysmanMainHeaderIn)); memset(&outMainHeader, 0, sizeof(KmdSysman::GfxSysmanMainHeaderOut)); if (!parseBufferIn(&inMainHeader, inputRequest)) { return ZE_RESULT_ERROR_INVALID_SIZE; } KmdSysman::KmdSysmanVersion versionKmd; versionKmd.data = 0; versionKmd.majorVersion = 1; versionKmd.minorVersion = 0; versionKmd.patchNumber = 0; inMainHeader.inVersion = versionKmd.data; uint64_t inPointerToLongInt = reinterpret_cast(&inMainHeader); uint64_t outPointerToLongInt = reinterpret_cast(&outMainHeader); auto status = escape(KmdSysman::PcEscapeOperation, inPointerToLongInt, sizeof(KmdSysman::GfxSysmanMainHeaderIn), outPointerToLongInt, sizeof(KmdSysman::GfxSysmanMainHeaderOut)); if (status) { if (!parseBufferOut(&outMainHeader, outputResponse)) { return ZE_RESULT_ERROR_INVALID_SIZE; } if (outputResponse.size() == 0) { return ZE_RESULT_ERROR_INVALID_SIZE; } } return (status) ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_INVALID_ARGUMENT; } bool KmdSysManager::escape(uint32_t escapeOp, uint64_t pDataIn, uint32_t dataInSize, uint64_t pDataOut, uint32_t dataOutSize) { if (pWddmAccess) { D3DKMT_ESCAPE escapeCommand = {0}; PcEscapeInfo pcEscape = {0}; escapeCommand.Flags.HardwareAccess = 0; escapeCommand.Flags.Reserved = 0; escapeCommand.hAdapter = (D3DKMT_HANDLE)0; escapeCommand.hContext = (D3DKMT_HANDLE)0; escapeCommand.hDevice = (D3DKMT_HANDLE)pWddmAccess->getDeviceHandle(); escapeCommand.pPrivateDriverData = &pcEscape; escapeCommand.PrivateDriverDataSize = sizeof(pcEscape); escapeCommand.Type = D3DKMT_ESCAPE_DRIVERPRIVATE; pcEscape.headerGfx.EscapeCode = GFX_ESCAPE_PWRCONS_CONTROL; pcEscape.escapeErrorType = 0; pcEscape.escapeOpInput = escapeOp; pcEscape.headerGfx.Size = sizeof(pcEscape) - sizeof(pcEscape.headerGfx); pcEscape.pDataIn = pDataIn; pcEscape.pDataOut = pDataOut; pcEscape.dataInSize = dataInSize; pcEscape.dataOutSize = dataOutSize; void *pBuffer = &pcEscape; pBuffer = reinterpret_cast(pBuffer) + sizeof(pcEscape.headerGfx); pcEscape.headerGfx.CheckSum = sumOfBufferData(pBuffer, pcEscape.headerGfx.Size); auto status = pWddmAccess->escape(escapeCommand); if (status == STATUS_SUCCESS) { return true; } } return false; } bool KmdSysManager::parseBufferIn(KmdSysman::GfxSysmanMainHeaderIn *pInMainHeader, std::vector &vectorInput) { memset(pInMainHeader, 0, sizeof(KmdSysman::GfxSysmanMainHeaderIn)); for (uint32_t i = 0; i < vectorInput.size(); i++) { KmdSysman::GfxSysmanReqHeaderIn headerIn; headerIn.inRequestId = vectorInput[i].requestId; headerIn.inCommand = vectorInput[i].commandId; headerIn.inComponent = vectorInput[i].componentId; headerIn.inCommandParam = vectorInput[i].paramInfo; headerIn.inDataSize = vectorInput[i].dataSize; if ((pInMainHeader->inTotalsize + sizeof(KmdSysman::GfxSysmanReqHeaderIn)) >= KmdSysman::KmdMaxBufferSize) { memset(pInMainHeader, 0, sizeof(KmdSysman::GfxSysmanMainHeaderIn)); return false; } if ((pInMainHeader->inTotalsize + sizeof(KmdSysman::GfxSysmanReqHeaderIn) + headerIn.inDataSize) >= KmdSysman::KmdMaxBufferSize) { memset(pInMainHeader, 0, sizeof(KmdSysman::GfxSysmanMainHeaderIn)); return false; } uint8_t *pBuff = reinterpret_cast(&pInMainHeader->inBuffer[pInMainHeader->inTotalsize]); memcpy_s(pBuff, sizeof(KmdSysman::GfxSysmanReqHeaderIn), &headerIn, sizeof(KmdSysman::GfxSysmanReqHeaderIn)); pBuff += sizeof(KmdSysman::GfxSysmanReqHeaderIn); pInMainHeader->inTotalsize += sizeof(KmdSysman::GfxSysmanReqHeaderIn); if (headerIn.inDataSize > 0) { memcpy_s(pBuff, headerIn.inDataSize, vectorInput[i].dataBuffer, headerIn.inDataSize); pInMainHeader->inTotalsize += headerIn.inDataSize; } pInMainHeader->inNumElements++; } return true; } bool KmdSysManager::parseBufferOut(KmdSysman::GfxSysmanMainHeaderOut *pOutMainHeader, std::vector &vectorOutput) { uint8_t *pBuff = reinterpret_cast(pOutMainHeader->outBuffer); uint32_t totalSize = 0; vectorOutput.clear(); for (uint32_t i = 0; i < pOutMainHeader->outNumElements; i++) { KmdSysman::ResponseProperty propertyResponse; KmdSysman::GfxSysmanReqHeaderOut headerOut; memcpy_s(&headerOut, sizeof(KmdSysman::GfxSysmanReqHeaderOut), pBuff, sizeof(KmdSysman::GfxSysmanReqHeaderOut)); propertyResponse.requestId = headerOut.outRequestId; propertyResponse.returnCode = headerOut.outReturnCode; propertyResponse.componentId = headerOut.outComponent; propertyResponse.dataSize = headerOut.outDataSize; pBuff += sizeof(KmdSysman::GfxSysmanReqHeaderOut); if (headerOut.outDataSize > 0) { memcpy_s(propertyResponse.dataBuffer, headerOut.outDataSize, pBuff, headerOut.outDataSize); pBuff += headerOut.outDataSize; } vectorOutput.push_back(propertyResponse); totalSize += sizeof(KmdSysman::GfxSysmanReqHeaderOut); totalSize += headerOut.outDataSize; } if (totalSize != pOutMainHeader->outTotalSize) { vectorOutput.clear(); return false; } return true; } } // namespace L0compute-runtime-22.14.22890/level_zero/tools/source/sysman/windows/kmd_sys_manager.h000066400000000000000000000024271422164147700305050ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/os_interface.h" #include "shared/source/os_interface/windows/wddm/wddm.h" #include "level_zero/tools/source/sysman/windows/kmd_sys.h" #include "level_zero/ze_api.h" #include "level_zero/zet_api.h" #include #include namespace L0 { class KmdSysManager { public: static KmdSysManager *create(NEO::Wddm *pWddm); KmdSysManager() = default; ~KmdSysManager() = default; MOCKABLE_VIRTUAL ze_result_t requestSingle(KmdSysman::RequestProperty &In, KmdSysman::ResponseProperty &Out); ze_result_t requestMultiple(std::vector &vIn, std::vector &vOut); NEO::Wddm *GetWddmAccess() { return pWddmAccess; } private: MOCKABLE_VIRTUAL bool escape(uint32_t escapeOp, uint64_t pDataIn, uint32_t dataInSize, uint64_t pDataOut, uint32_t dataOutSize); bool parseBufferIn(KmdSysman::GfxSysmanMainHeaderIn *pIn, std::vector &vIn); bool parseBufferOut(KmdSysman::GfxSysmanMainHeaderOut *pOut, std::vector &vOut); KmdSysManager(NEO::Wddm *pWddm); NEO::Wddm *pWddmAccess = nullptr; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/windows/os_sysman_imp.cpp000066400000000000000000000027421422164147700305550ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/windows/os_sysman_imp.h" #include "shared/source/os_interface/windows/wddm/wddm.h" namespace L0 { ze_result_t WddmSysmanImp::init() { pDevice = Device::fromHandle(pParentSysmanDeviceImp->hCoreDevice); UNRECOVERABLE_IF(nullptr == pDevice); NEO::OSInterface &OsInterface = pDevice->getOsInterface(); auto driverModel = OsInterface.getDriverModel(); if (driverModel) { pWddm = driverModel->as(); } pKmdSysManager = KmdSysManager::create(pWddm); UNRECOVERABLE_IF(nullptr == pKmdSysManager); return ZE_RESULT_SUCCESS; } Device *WddmSysmanImp::getDeviceHandle() { return pDevice; } NEO::Wddm &WddmSysmanImp::getWddm() { UNRECOVERABLE_IF(nullptr == pWddm); return *pWddm; } KmdSysManager &WddmSysmanImp::getKmdSysManager() { UNRECOVERABLE_IF(nullptr == pKmdSysManager); return *pKmdSysManager; } WddmSysmanImp::WddmSysmanImp(SysmanDeviceImp *pParentSysmanDeviceImp) { this->pParentSysmanDeviceImp = pParentSysmanDeviceImp; } WddmSysmanImp::~WddmSysmanImp() { if (nullptr != pKmdSysManager) { delete pKmdSysManager; pKmdSysManager = nullptr; } } OsSysman *OsSysman::create(SysmanDeviceImp *pParentSysmanDeviceImp) { WddmSysmanImp *pWddmSysmanImp = new WddmSysmanImp(pParentSysmanDeviceImp); return static_cast(pWddmSysmanImp); } } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/source/sysman/windows/os_sysman_imp.h000066400000000000000000000020211422164147700302100ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/os_interface/windows/wddm/wddm.h" #include "level_zero/core/source/device/device.h" #include "level_zero/tools/source/sysman/sysman_imp.h" #include "level_zero/tools/source/sysman/windows/kmd_sys.h" #include "level_zero/tools/source/sysman/windows/kmd_sys_manager.h" namespace L0 { class WddmSysmanImp : public OsSysman, NEO::NonCopyableOrMovableClass { public: WddmSysmanImp(SysmanDeviceImp *pParentSysmanDeviceImp); ~WddmSysmanImp() override; ze_result_t init() override; KmdSysManager &getKmdSysManager(); NEO::Wddm &getWddm(); Device *getDeviceHandle(); protected: KmdSysManager *pKmdSysManager = nullptr; Device *pDevice = nullptr; private: SysmanDeviceImp *pParentSysmanDeviceImp = nullptr; NEO::Wddm *pWddm = nullptr; }; } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/test/000077500000000000000000000000001422164147700216375ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/.clang-tidy000066400000000000000000000035161422164147700237000ustar00rootroot00000000000000--- Checks: 'clang-diagnostic-*,clang-analyzer-*,google-default-arguments,modernize-use-override,modernize-use-default-member-init,-clang-analyzer-alpha*,readability-identifier-naming,-clang-analyzer-optin.performance.Padding,-clang-analyzer-security.insecureAPI.strcpy,-clang-analyzer-cplusplus.NewDeleteLeaks,-clang-analyzer-core.CallAndMessage,-clang-analyzer-unix.MismatchedDeallocator,-clang-analyzer-core.NullDereference,-clang-analyzer-cplusplus.NewDelete,-clang-analyzer-optin.cplusplus.VirtualCall' # WarningsAsErrors: '.*' HeaderFilterRegex: '^((?!^third_party\/).+)\.(h|hpp|inl)$' AnalyzeTemporaryDtors: false CheckOptions: - key: google-readability-braces-around-statements.ShortStatementLines value: '1' - key: google-readability-function-size.StatementThreshold value: '800' - key: google-readability-namespace-comments.ShortNamespaceLines value: '10' - key: google-readability-namespace-comments.SpacesBeforeComments value: '2' - key: readability-identifier-naming.ParameterCase value: camelBack - key: readability-identifier-naming.StructMemberCase value: camelBack - key: modernize-loop-convert.MaxCopySize value: '16' - key: modernize-loop-convert.MinConfidence value: reasonable - key: modernize-loop-convert.NamingStyle value: CamelCase - key: modernize-pass-by-value.IncludeStyle value: llvm - key: modernize-replace-auto-ptr.IncludeStyle value: llvm - key: modernize-use-nullptr.NullMacros value: 'NULL' - key: modernize-use-default-member-init.UseAssignment value: '1' ... compute-runtime-22.14.22890/level_zero/tools/test/CMakeLists.txt000066400000000000000000000003511422164147700243760ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(NOT NEO_SKIP_L0_UNIT_TESTS) add_subdirectory(unit_tests) endif() if(NOT NEO_SKIP_L0_BLACK_BOX_TESTS) add_subdirectory(black_box_tests) endif() compute-runtime-22.14.22890/level_zero/tools/test/black_box_tests/000077500000000000000000000000001422164147700250055ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/black_box_tests/CMakeLists.txt000066400000000000000000000017211422164147700275460ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(UNIX) set(L0_BLACK_BOX_TEST_PROJECT_FOLDER "ze_intel_gpu/black_box_tests") set(TEST_TARGETS zello_sysman zello_metrics ) foreach(TEST_NAME ${TEST_TARGETS}) add_executable(${TEST_NAME} ${TEST_NAME}.cpp) set_target_properties(${TEST_NAME} PROPERTIES VS_DEBUGGER_COMMAND "$(TargetPath)" VS_DEBUGGER_COMMAND_ARGUMENTS "" VS_DEBUGGER_WORKING_DIRECTORY "$(OutputPath)" ) if(BUILD_LEVEL_ZERO_LOADER) add_dependencies(${TEST_NAME} ze_loader) target_link_libraries(${TEST_NAME} ${NEO_BINARY_DIR}/lib/libze_loader.so) else() target_link_libraries(${TEST_NAME} PUBLIC ${TARGET_NAME_L0}) endif() set_target_properties(${TEST_NAME} PROPERTIES FOLDER ${L0_BLACK_BOX_TEST_PROJECT_FOLDER}) endforeach() endif() add_subdirectories() compute-runtime-22.14.22890/level_zero/tools/test/black_box_tests/zello_metrics.cpp000066400000000000000000001232371422164147700303740ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include #include #include #include #include #include #include #include #include #include #include #include #define ROOT_DEVICE 0xFFFFFFFF #define VALIDATECALL(myZeCall) \ do { \ if ((myZeCall) != ZE_RESULT_SUCCESS) { \ std::cout << "Error at " \ << #myZeCall << ": " \ << __FUNCTION__ << ": " \ << __LINE__ << std::endl; \ std::terminate(); \ } \ } while (0); /////////////////////////// /// Device /////////////////////////// struct Device { uint32_t deviceIndex; uint32_t subDeviceIndex; Device(uint32_t inDeviceIndex, uint32_t inSubDeviceIndex) { deviceIndex = inDeviceIndex; subDeviceIndex = inSubDeviceIndex; } }; /////////////////////////// /// Sample /////////////////////////// struct Sample { private: /////////////////////////// /// L0 core api objects /////////////////////////// ze_driver_handle_t driverHandle = {}; ze_context_handle_t contextHandle = {}; ze_device_handle_t deviceHandle = {}; ze_command_queue_handle_t commandQueue = {}; ze_command_queue_desc_t queueDescription = {}; ze_command_list_handle_t commandList = {}; /////////////////////////// /// Metrics groups /////////////////////////// zet_metric_group_handle_t metricGroup = nullptr; zet_metric_group_properties_t metricGroupProperties = {}; /////////////////////////// /// Notification events /////////////////////////// ze_event_pool_handle_t eventPool = {}; ze_event_handle_t notificationEvent = {}; /////////////////////////// /// Metric streamer /////////////////////////// zet_metric_streamer_handle_t metricStreamer = {}; const uint32_t notifyReportCount = 10; const uint32_t samplingPeriod = 40000; uint32_t metricStreamerMarker = 0; /////////////////////////// /// Metric query /////////////////////////// const uint32_t queryPoolCount = 1000; const uint32_t querySlotIndex = 0; zet_metric_query_pool_handle_t queryPoolHandle = {}; zet_metric_query_handle_t queryHandle = {}; zet_metric_query_pool_desc_t queryPoolDesc = {}; /////////////////////////// /// Metrics raw output data /////////////////////////// const uint32_t maxRawReportCount = 5; std::vector rawData = {}; size_t rawDataSize = 0; /////////////////////////// /// Workload /////////////////////////// uint32_t allocationSize = 4096; void *sourceBuffer = nullptr; void *destinationBuffer = nullptr; /////////////////////////// /// Options /////////////////////////// bool verbose = false; Device device = {0, 0}; public: /////////////////////////// /// Sample constructor /////////////////////////// Sample(Device inDevice, bool useMetrics, int32_t argc, char *argv[]) : device(inDevice) { wait(argc, argv); enableMetrics(useMetrics); create(); } /////////////////////////// /// Sample destructor /////////////////////////// ~Sample() { destroy(); } /////////////////////////// /// isArgumentEnabled /////////////////////////// static bool isArgumentEnabled(int argc, char *argv[], const char *shortName, const char *longName) { char **arg = &argv[1]; char **argE = &argv[argc]; for (; arg != argE; ++arg) { if ((0 == strcmp(*arg, shortName)) || (0 == strcmp(*arg, longName))) { return true; } } return false; } /////////////////////////// /// getArgumentValue /////////////////////////// static uint32_t getArgumentValue(int argc, char *argv[], const char *shortName, const char *longName) { char **arg = &argv[1]; char **argE = &argv[argc]; for (; arg != argE; ++arg) { if ((0 == strcmp(*arg, shortName)) || (0 == strcmp(*arg, longName))) { ++arg; VALIDATECALL(arg != argE ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_INVALID_ARGUMENT); return atoi(*arg); } } return false; } /////////////////////////// /// activateMetrics /////////////////////////// void activateMetrics(const char *metricsName, zet_metric_group_sampling_type_flag_t metricsType) { // Find time based metric group. findMetricGroup(metricsName, metricsType); // Metric Streamer begin. VALIDATECALL(zetMetricGroupGetProperties(metricGroup, &metricGroupProperties)); // Activate metric group. VALIDATECALL(zetContextActivateMetricGroups(contextHandle, deviceHandle, 1, &metricGroup)); std::cout << std::endl << "MetricGroup activated" << std::endl; } /////////////////////////// /// deactivateMetrics /////////////////////////// void deactivateMetrics() { VALIDATECALL(zetContextActivateMetricGroups(contextHandle, deviceHandle, 0, nullptr)); std::cout << "MetricGroup deactivated" << std::endl; } /////////////////////////// /// executeOnlyWorkload /////////////////////////// void executeOnlyWorkload() { // Execute a test workload that will be measured by metric stream. executeWorkload(); // Execute command list. exectuteCommandList(); // Validate output data. validateResults(); } /////////////////////////// /// executeStreamWorkload /////////////////////////// void executeStreamWorkload() { // Create metric stream instance. createMetricStream(); // Append metric Streamer marker #1. // It allows to correlate time based reports from metric Streamer with a given workload (dispatch, copy, etc.). VALIDATECALL(zetCommandListAppendMetricStreamerMarker(commandList, metricStreamer, ++metricStreamerMarker)); // Execute a test workload that will be measured by metric stream. executeWorkload(); // Append metric Streamer marker #2. VALIDATECALL(zetCommandListAppendMetricStreamerMarker(commandList, metricStreamer, ++metricStreamerMarker)); // Copying of data must finish before running the user function. VALIDATECALL(zeCommandListAppendBarrier(commandList, nullptr, 0, nullptr)); // Append metric Streamer marker #3. VALIDATECALL(zetCommandListAppendMetricStreamerMarker(commandList, metricStreamer, ++metricStreamerMarker)); // Execute command list. exectuteCommandList(); // Validate output data. validateResults(); // Obtain raw stream metrics. obtainRawStreamMetrics(); // Obtain final stream metrics. obtainCalculatedMetrics(); } /////////////////////////// /// executeQueryWorkload /////////////////////////// void executeQueryWorkload() { // Create metric query instance. createMetricQuery(); // Metric query begin. VALIDATECALL(zetCommandListAppendMetricQueryBegin(reinterpret_cast(commandList), queryHandle)); executeWorkload(); // Metric query end. VALIDATECALL(zetCommandListAppendMetricQueryEnd(reinterpret_cast(commandList), queryHandle, notificationEvent, 0, nullptr)); // An optional memory barrier to flush gpu caches. VALIDATECALL(zetCommandListAppendMetricMemoryBarrier(reinterpret_cast(commandList))); // Execute command list. exectuteCommandList(); // Validate output data. validateResults(); // Obtain raw stream metrics. obtainRawQueryMetrics(); // Obtain final stream metrics. obtainCalculatedMetrics(); } private: /////////////////////////// /// enableMetrics /////////////////////////// void enableMetrics(bool enable) { if (enable) { #if defined(_WIN32) _putenv(const_cast("ZET_ENABLE_METRICS=1")); #else putenv(const_cast("ZET_ENABLE_METRICS=1")); #endif } } /////////////////////////// /// create /////////////////////////// void create() { createL0(); createDevice(); createSubDevice(); createCommandQueue(); createCommandList(); createEvent(); createResources(); } /////////////////////////// /// destroy /////////////////////////// void destroy() { // Close metric streamer. if (metricStreamer) { VALIDATECALL(zetMetricStreamerClose(metricStreamer)); std::cout << "MetricStreamer closed" << std::endl; } // Destroy metric query pool. if (queryHandle) { VALIDATECALL(zetMetricQueryDestroy(queryHandle)); } if (queryPoolHandle) { VALIDATECALL(zetMetricQueryPoolDestroy(queryPoolHandle)); } // Destroy notification event. VALIDATECALL(zeMemFree(contextHandle, sourceBuffer)); VALIDATECALL(zeMemFree(contextHandle, destinationBuffer)); VALIDATECALL(zeEventDestroy(notificationEvent)); VALIDATECALL(zeEventPoolDestroy(eventPool)); VALIDATECALL(zeCommandListDestroy(commandList)); VALIDATECALL(zeCommandQueueDestroy(commandQueue)); VALIDATECALL(zeContextDestroy(contextHandle)); } ////////////////////////// /// createDevice /////////////////////////// void createL0() { static bool enableOnce = true; if (enableOnce) { enableOnce = false; } else { return; } VALIDATECALL(zeInit(ZE_INIT_FLAG_GPU_ONLY)); } /////////////////////////// /// createDevice /////////////////////////// void createDevice() { uint32_t driverCount = 0; uint32_t deviceCount = 0; uint32_t driverVersion = 0; ze_api_version_t apiVersion = {}; ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; std::vector devices; // Obtain driver. VALIDATECALL(zeDriverGet(&driverCount, nullptr)); VALIDATECALL(zeDriverGet(&driverCount, &driverHandle)); // Driver properties. if (verbose) { ze_driver_properties_t driverProperties = {}; VALIDATECALL(zeDriverGetProperties(driverHandle, &driverProperties)); driverVersion = driverProperties.driverVersion; const uint32_t driverMajorVersion = ZE_MAJOR_VERSION(driverVersion); const uint32_t driverMinorVersion = ZE_MINOR_VERSION(driverVersion); std::cout << "Driver version: " << driverMajorVersion << "." << driverMinorVersion << "\n"; VALIDATECALL(zeDriverGetApiVersion(driverHandle, &apiVersion)); const uint32_t apiMajorVersion = ZE_MAJOR_VERSION(apiVersion); const uint32_t apiMinorVersion = ZE_MINOR_VERSION(apiVersion); std::cout << "API version: " << apiMajorVersion << "." << apiMinorVersion << "\n"; } // Obtain context. ze_context_desc_t contextDesc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; VALIDATECALL(zeContextCreate(driverHandle, &contextDesc, &contextHandle)); // Obtain all devices. VALIDATECALL(zeDeviceGet(driverHandle, &deviceCount, nullptr)); devices.resize(deviceCount); VALIDATECALL(zeDeviceGet(driverHandle, &deviceCount, devices.data())); printf("\nDevices count : %u", deviceCount); printf("\nDevice index : %u", device.deviceIndex); // Obtain selected device. VALIDATECALL(device.deviceIndex < deviceCount ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_INVALID_ARGUMENT); deviceHandle = devices[device.deviceIndex]; // Obtain device properties. VALIDATECALL(zeDeviceGetProperties(deviceHandle, &deviceProperties)); } /////////////////////////// /// createSubDevice /////////////////////////// void createSubDevice() { uint32_t subDevicesCount = 0; std::vector subDevices; // Sub devices count. VALIDATECALL(zeDeviceGetSubDevices(deviceHandle, &subDevicesCount, nullptr)); printf("\nSub devices count : %u", subDevicesCount); printf("\nSub device index : %u\n", device.subDeviceIndex); printf("\nIs root device : %u\n", device.subDeviceIndex == ROOT_DEVICE); const bool subDevicesAvailable = subDevicesCount > 0; const bool useRootDevice = device.subDeviceIndex == ROOT_DEVICE; if (subDevicesAvailable && !useRootDevice) { VALIDATECALL((device.subDeviceIndex < subDevicesCount ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_DEVICE_LOST)); // Obtain sub devices. if (subDevicesCount > 1) { subDevices.resize(subDevicesCount); VALIDATECALL(zeDeviceGetSubDevices(deviceHandle, &subDevicesCount, subDevices.data())); deviceHandle = subDevices[device.subDeviceIndex]; } else { printf("\nUsing root device."); } } } /////////////////////////// /// createCommandQueue /////////////////////////// void createCommandQueue() { uint32_t queueGroupsCount = 0; VALIDATECALL(zeDeviceGetCommandQueueGroupProperties(deviceHandle, &queueGroupsCount, nullptr)); if (queueGroupsCount == 0) { std::cout << "No queue groups found!\n"; std::terminate(); } std::vector queueProperties(queueGroupsCount); VALIDATECALL(zeDeviceGetCommandQueueGroupProperties(deviceHandle, &queueGroupsCount, queueProperties.data())); for (uint32_t i = 0; i < queueGroupsCount; ++i) { if (queueProperties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) { queueDescription.ordinal = i; } } queueDescription.index = 0; queueDescription.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; VALIDATECALL(zeCommandQueueCreate(contextHandle, deviceHandle, &queueDescription, &commandQueue)); } /////////////////////////// /// createCommandList /////////////////////////// void createCommandList() { ze_command_list_desc_t commandListDesc = {}; commandListDesc.commandQueueGroupOrdinal = queueDescription.ordinal; // Create command list. VALIDATECALL(zeCommandListCreate(contextHandle, deviceHandle, &commandListDesc, &commandList)); } /////////////////////////// /// createResources /////////////////////////// void createResources() { ze_device_mem_alloc_desc_t deviceDesc = {ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC}; deviceDesc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED; deviceDesc.ordinal = 0; ze_host_mem_alloc_desc_t hostDesc = {ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC}; hostDesc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED; VALIDATECALL(zeMemAllocShared(contextHandle, &deviceDesc, &hostDesc, allocationSize, 1, deviceHandle, &sourceBuffer)); VALIDATECALL(zeMemAllocShared(contextHandle, &deviceDesc, &hostDesc, allocationSize, 1, deviceHandle, &destinationBuffer)); // Initialize memory memset(sourceBuffer, 55, allocationSize); memset(destinationBuffer, 0, allocationSize); } /////////////////////////// /// createEvent /////////////////////////// void createEvent() { ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = ze_event_pool_flag_t::ZE_EVENT_POOL_FLAG_HOST_VISIBLE; ze_event_desc_t notificationEventDesc = {}; notificationEventDesc.index = 0; notificationEventDesc.wait = ze_event_scope_flag_t::ZE_EVENT_SCOPE_FLAG_HOST; notificationEventDesc.signal = ze_event_scope_flag_t::ZE_EVENT_SCOPE_FLAG_DEVICE; // Optional notification event to know if Streamer reports are ready to read. VALIDATECALL(zeEventPoolCreate(contextHandle, &eventPoolDesc, 1, &deviceHandle, &eventPool)); VALIDATECALL(zeEventCreate(eventPool, ¬ificationEventDesc, ¬ificationEvent)); } /////////////////////////// /// executeWorkload /////////////////////////// void executeWorkload() { std::ifstream file("copy_buffer_to_buffer.spv", std::ios::binary); if (file.is_open()) { file.seekg(0, file.end); auto length = file.tellg(); file.seekg(0, file.beg); std::cout << "Using copy_buffer_to_buffer.spv" << std::endl; std::unique_ptr spirvInput(new char[length]); file.read(spirvInput.get(), length); ze_module_desc_t moduleDesc = {}; ze_module_build_log_handle_t buildlog; moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; moduleDesc.pInputModule = reinterpret_cast(spirvInput.get()); moduleDesc.inputSize = length; moduleDesc.pBuildFlags = ""; ze_module_handle_t module = nullptr; ze_kernel_handle_t kernel = nullptr; if (zeModuleCreate(contextHandle, deviceHandle, &moduleDesc, &module, &buildlog) != ZE_RESULT_SUCCESS) { size_t szLog = 0; zeModuleBuildLogGetString(buildlog, &szLog, nullptr); char *strLog = (char *)malloc(szLog); zeModuleBuildLogGetString(buildlog, &szLog, strLog); std::cout << "Build log:" << strLog << std::endl; free(strLog); } VALIDATECALL(zeModuleBuildLogDestroy(buildlog)); ze_kernel_desc_t kernelDesc = {}; kernelDesc.pKernelName = "CopyBufferToBufferBytes"; VALIDATECALL(zeKernelCreate(module, &kernelDesc, &kernel)); uint32_t groupSizeX = 32u; uint32_t groupSizeY = 1u; uint32_t groupSizeZ = 1u; VALIDATECALL(zeKernelSuggestGroupSize(kernel, allocationSize, 1U, 1U, &groupSizeX, &groupSizeY, &groupSizeZ)); VALIDATECALL(zeKernelSetGroupSize(kernel, groupSizeX, groupSizeY, groupSizeZ)); uint32_t offset = 0; VALIDATECALL(zeKernelSetArgumentValue(kernel, 1, sizeof(destinationBuffer), &destinationBuffer)); VALIDATECALL(zeKernelSetArgumentValue(kernel, 0, sizeof(sourceBuffer), &sourceBuffer)); VALIDATECALL(zeKernelSetArgumentValue(kernel, 2, sizeof(uint32_t), &offset)); VALIDATECALL(zeKernelSetArgumentValue(kernel, 3, sizeof(uint32_t), &offset)); VALIDATECALL(zeKernelSetArgumentValue(kernel, 4, sizeof(uint32_t), &offset)); ze_group_count_t dispatchTraits; dispatchTraits.groupCountX = allocationSize / groupSizeX; dispatchTraits.groupCountY = 1u; dispatchTraits.groupCountZ = 1u; VALIDATECALL(zeCommandListAppendLaunchKernel(commandList, kernel, &dispatchTraits, nullptr, 0, nullptr)); file.close(); } else { std::cout << "Using zeCommandListAppendMemoryCopy" << std::endl; VALIDATECALL(zeCommandListAppendMemoryCopy(commandList, destinationBuffer, sourceBuffer, allocationSize, nullptr, 0, nullptr)); } } ////////////////////////// /// exectuteCommandList /////////////////////////// void exectuteCommandList() { // Close command list. VALIDATECALL(zeCommandListClose(commandList)); // Execute workload. VALIDATECALL(zeCommandQueueExecuteCommandLists(commandQueue, 1, &commandList, nullptr)); // If using async command queue, explicit sync must be used for correctness. VALIDATECALL(zeCommandQueueSynchronize(commandQueue, std::numeric_limits::max())); // Check if notification event meets zet_metric_group_properties_t::notifyEveryNReports requirments. const bool notificationOccured = zeEventQueryStatus(notificationEvent) == ZE_RESULT_SUCCESS; std::cout << "Requested report count ready to read: " << (notificationOccured ? "true" : "false") << std::endl; } ////////////////////////// /// validateResults /////////////////////////// void validateResults() { // Validate. const bool outputValidationSuccessful = (memcmp(destinationBuffer, sourceBuffer, allocationSize) == 0); if (!outputValidationSuccessful) { // Validate uint8_t *srcCharBuffer = static_cast(sourceBuffer); uint8_t *dstCharBuffer = static_cast(destinationBuffer); for (size_t i = 0; i < allocationSize; i++) { if (srcCharBuffer[i] != dstCharBuffer[i]) { std::cout << "srcBuffer[" << i << "] = " << static_cast(srcCharBuffer[i]) << " not equal to " << "dstBuffer[" << i << "] = " << static_cast(dstCharBuffer[i]) << "\n"; break; } } } std::cout << "\nResults validation " << (outputValidationSuccessful ? "PASSED" : "FAILED") << std::endl; VALIDATECALL(outputValidationSuccessful ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_UNKNOWN) } /////////////////////////// /// createMetricStream /////////////////////////// void createMetricStream() { zet_metric_streamer_desc_t streamerProperties = {}; streamerProperties.notifyEveryNReports = notifyReportCount; streamerProperties.samplingPeriod = samplingPeriod; VALIDATECALL(zetMetricStreamerOpen(contextHandle, deviceHandle, metricGroup, &streamerProperties, notificationEvent, &metricStreamer)); std::cout << "Metric Streamer opened" << " StreamerProp.notifyEveryNReports: " << streamerProperties.notifyEveryNReports << " StreamerProp.samplingPeriod: " << streamerProperties.samplingPeriod << std::endl; std::this_thread::sleep_for(std::chrono::milliseconds(100)); } /////////////////////////// /// createMetricQuery /////////////////////////// void createMetricQuery() { queryPoolDesc.count = queryPoolCount; queryPoolDesc.type = ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE; // Create metric query pool. VALIDATECALL(zetMetricQueryPoolCreate(contextHandle, deviceHandle, metricGroup, &queryPoolDesc, &queryPoolHandle)); // Obtain metric query from pool. VALIDATECALL(zetMetricQueryCreate(queryPoolHandle, querySlotIndex, &queryHandle)); } /////////////////////////// /// obtainRawStreamMetrics /////////////////////////// void obtainRawStreamMetrics() { // Read raw buffer size. VALIDATECALL(zetMetricStreamerReadData(metricStreamer, maxRawReportCount, &rawDataSize, nullptr)); std::cout << "Streamer read requires: " << rawDataSize << " bytes buffer" << std::endl; // Read raw data. rawData.resize(rawDataSize, 0); VALIDATECALL(zetMetricStreamerReadData(metricStreamer, maxRawReportCount, &rawDataSize, rawData.data())); std::cout << "Streamer read raw bytes: " << rawDataSize << std::endl; } /////////////////////////// /// obtainRawQueryMetrics /////////////////////////// void obtainRawQueryMetrics() { // Obtain metric query report size. VALIDATECALL(zetMetricQueryGetData(queryHandle, &rawDataSize, nullptr)); // Obtain report. rawData.resize(rawDataSize); VALIDATECALL(zetMetricQueryGetData(queryHandle, &rawDataSize, rawData.data())); } /////////////////////////// /// obtainCalculatedMetrics /////////////////////////// void obtainCalculatedMetrics() { uint32_t setCount = 0; uint32_t totalCalculatedMetricCount = 0; zet_metric_group_properties_t properties = {}; std::vector metricCounts = {}; std::vector results = {}; std::vector metrics = {}; ze_result_t result = ZE_RESULT_SUCCESS; // Obtain maximum space for calculated metrics. result = zetMetricGroupCalculateMetricValues( metricGroup, ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, rawDataSize, rawData.data(), &totalCalculatedMetricCount, nullptr); if (result == ZE_RESULT_ERROR_UNKNOWN) { // Try to use calculate for multiple metric values. VALIDATECALL(zetMetricGroupCalculateMultipleMetricValuesExp( metricGroup, ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, rawDataSize, rawData.data(), &setCount, &totalCalculatedMetricCount, nullptr, nullptr)); // Allocate space for calculated reports. metricCounts.resize(setCount); results.resize(totalCalculatedMetricCount); // Obtain calculated metrics and their count. VALIDATECALL(zetMetricGroupCalculateMultipleMetricValuesExp( metricGroup, ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, rawDataSize, rawData.data(), &setCount, &totalCalculatedMetricCount, metricCounts.data(), results.data())); } else { // Allocate space for calculated reports. setCount = 1; metricCounts.resize(setCount); results.resize(totalCalculatedMetricCount); metricCounts[0] = totalCalculatedMetricCount; // Obtain calculated metrics and their count. VALIDATECALL(zetMetricGroupCalculateMetricValues( metricGroup, ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, rawDataSize, rawData.data(), &totalCalculatedMetricCount, results.data())); } // Obtain metric group properties to show each metric. VALIDATECALL(zetMetricGroupGetProperties(metricGroup, &properties)); // Allocate space for all metrics from a given metric group. metrics.resize(properties.metricCount); // Obtain metrics from a given metric group. VALIDATECALL(zetMetricGet(metricGroup, &properties.metricCount, metrics.data())); for (uint32_t i = 0; i < setCount; ++i) { std::cout << "\r\nSet " << i; const uint32_t metricCount = properties.metricCount; const uint32_t metricCountForSet = metricCounts[i]; for (uint32_t j = 0; j < metricCountForSet; j++) { const uint32_t resultIndex = j + metricCount * i; const uint32_t metricIndex = j % metricCount; zet_metric_properties_t metricProperties = {}; VALIDATECALL((resultIndex < totalCalculatedMetricCount) ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_UNKNOWN) // Obtain single metric properties to learn output value type. VALIDATECALL(zetMetricGetProperties(metrics[metricIndex], &metricProperties)); VALIDATECALL((results[resultIndex].type == metricProperties.resultType) ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_UNKNOWN) if (metricIndex == 0) { std::cout << "\r\n"; } std::cout << "\r\n"; std::cout << std::setw(25) << metricProperties.name << ": "; switch (results[resultIndex].type) { case zet_value_type_t::ZET_VALUE_TYPE_BOOL8: std::cout << std::setw(12); std::cout << (results[resultIndex].value.b8 ? "true" : "false"); break; case zet_value_type_t::ZET_VALUE_TYPE_FLOAT32: std::cout << std::setw(12); std::cout << results[resultIndex].value.fp32; break; case zet_value_type_t::ZET_VALUE_TYPE_FLOAT64: std::cout << std::setw(12); std::cout << results[resultIndex].value.fp64; break; case zet_value_type_t::ZET_VALUE_TYPE_UINT32: std::cout << std::setw(12); std::cout << results[resultIndex].value.ui32; break; case zet_value_type_t::ZET_VALUE_TYPE_UINT64: std::cout << std::setw(12); std::cout << results[resultIndex].value.ui64; break; default: break; } } std::cout << "\r\n"; } } /////////////////////////// /// findMetricGroup /////////////////////////// void findMetricGroup(const char *groupName, const zet_metric_group_sampling_type_flag_t samplingType) { uint32_t metricGroupCount = 0; std::vector metricGroups = {}; // Obtain metric group count for a given device. VALIDATECALL(zetMetricGroupGet(deviceHandle, &metricGroupCount, nullptr)); // Obtain all metric groups. metricGroups.resize(metricGroupCount); VALIDATECALL(zetMetricGroupGet(deviceHandle, &metricGroupCount, metricGroups.data())); // Enumerate metric groups to find a particular one with a given group name // and sampling type requested by the user. for (uint32_t i = 0; i < metricGroupCount; ++i) { const zet_metric_group_handle_t metricGroupHandle = metricGroups[i]; zet_metric_group_properties_t metricGroupProperties = {}; // Obtain metric group properties to check the group name and sampling type. VALIDATECALL(zetMetricGroupGetProperties(metricGroupHandle, &metricGroupProperties)); printMetricGroupProperties(metricGroupProperties); const bool validGroupName = strcmp(metricGroupProperties.name, groupName) == 0; const bool validSamplingType = (metricGroupProperties.samplingType & samplingType); // Validating the name and sampling type. if (validSamplingType) { // Print metrics from metric group. uint32_t metricCount = 0; std::vector metrics = {}; // Obtain metrics count for verbose purpose. VALIDATECALL(zetMetricGet(metricGroupHandle, &metricCount, nullptr)); // Obtain metrics for verbose purpose. metrics.resize(metricCount); VALIDATECALL(zetMetricGet(metricGroupHandle, &metricCount, metrics.data())); // Enumerate metric group metrics for verbose purpose. for (uint32_t j = 0; j < metricCount; ++j) { const zet_metric_handle_t metric = metrics[j]; zet_metric_properties_t metricProperties = {}; VALIDATECALL(zetMetricGetProperties(metric, &metricProperties)); printMetricProperties(metricProperties); } // Obtain metric group handle. if (validGroupName) { metricGroup = metricGroupHandle; return; } } } // Unable to find metric group. VALIDATECALL(ZE_RESULT_ERROR_UNKNOWN); } /////////////////////////// /// printMetricGroupProperties /////////////////////////// void printMetricGroupProperties(const zet_metric_group_properties_t &properties) { if (verbose) { std::cout << "METRIC GROUP: " << "name: " << properties.name << ", " << "desc: " << properties.description << ", " << "samplingType: " << properties.samplingType << ", " << "domain: " << properties.domain << ", " << "metricCount: " << properties.metricCount << std::endl; } } /////////////////////////// /// printMetricProperties /////////////////////////// void printMetricProperties(const zet_metric_properties_t &properties) { if (verbose) { std::cout << "\tMETRIC: " << "name: " << properties.name << ", " << "desc: " << properties.description << ", " << "component: " << properties.component << ", " << "tier: " << properties.tierNumber << ", " << "metricType: " << properties.metricType << ", " << "resultType: " << properties.resultType << ", " << "units: " << properties.resultUnits << std::endl; } } /////////////////////////// /// wait /////////////////////////// void wait(int argc, char *argv[]) { static bool waitEnabled = isArgumentEnabled(argc, argv, "-w", "--wait"); static uint32_t waitTime = getArgumentValue(argc, argv, "-w", "--wait"); if (waitEnabled) { for (uint32_t i = 0; i < waitTime; ++i) { std::this_thread::sleep_for(std::chrono::seconds(1)); printf("\nwait %u", i); } } } }; /////////////////////////// /// noMetric /////////////////////////// bool sample(int argc, char *argv[]) { std::cout << std::endl << "-==== No metric: device " << 0 << " ====-" << std::endl; Sample sample({0, 0}, false, argc, argv); sample.executeOnlyWorkload(); return true; } ///////////////////////////// ///// query ///////////////////////////// bool query(int argc, char *argv[], std::vector devices, std::vector sets) { std::vector samples; // Create samples for each device. for (uint32_t i = 0; i < devices.size(); ++i) { samples.push_back(new Sample(devices[i], true, argc, argv)); } // Activate metric sets. for (uint32_t i = 0; i < devices.size(); ++i) { samples[i]->activateMetrics(sets[i].c_str(), ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED); } // Activate metric sets. for (uint32_t i = 0; i < devices.size(); ++i) { samples[i]->executeQueryWorkload(); } // Deactivate metric sets. for (uint32_t i = 0; i < devices.size(); ++i) { samples[i]->deactivateMetrics(); } // Remove samples. for (uint32_t i = 0; i < devices.size(); ++i) { delete samples[i]; } return true; } /////////////////////////// /// query_device_0 /////////////////////////// bool query_device_0(int argc, char *argv[]) { std::cout << std::endl << "-==== Metric query: device 0 ====-" << std::endl; query(argc, argv, {Device(0, 0)}, {"TestOa"}); return true; } /////////////////////////// /// query_device_root /////////////////////////// bool query_device_root(int argc, char *argv[]) { std::cout << std::endl << "-==== Metric query: device 0 ====-" << std::endl; query(argc, argv, {Device(0, ROOT_DEVICE)}, {"TestOa"}); return true; } /////////////////////////// /// query_device_0_sub_device_1 /////////////////////////// bool query_device_0_sub_device_1(int argc, char *argv[]) { std::cout << std::endl << "-==== Metric query: device 0 / sub_device 1 ====-" << std::endl; query(argc, argv, {Device(0, 1)}, {"TestOa"}); return true; } /////////////////////////// /// query_device_0_sub_device_0_1 /////////////////////////// bool query_device_0_sub_device_0_1(int argc, char *argv[]) { std::cout << std::endl << "-==== Metric query: device 0 / sub_device 0 : 1 ====-" << std::endl; query(argc, argv, {Device(0, 0), Device(0, 1)}, {"TestOa", "ComputeBasic"}); return true; } /////////////////////////// /// query_device_1_sub_device_1 /////////////////////////// bool query_device_1_sub_device_1(int argc, char *argv[]) { std::cout << std::endl << "-==== Metric query: device 1 / sub_device 1 ====-" << std::endl; query(argc, argv, {Device(1, 1)}, {"TestOa"}); return true; } /////////////////////////// /// query_device_1 /////////////////////////// bool query_device_1(int argc, char *argv[]) { std::cout << std::endl << "-==== Metric query: device 1 ====-" << std::endl; query(argc, argv, {Device(1, 0)}, {"TestOa"}); return true; } /////////////////////////// /// query_device_0_1 /////////////////////////// bool query_device_0_1(int argc, char *argv[]) { std::cout << std::endl << "-==== Metric query: device 0 / 1 ====-" << std::endl; query(argc, argv, {Device(0, 0), Device(1, 0)}, {"TestOa", "ComputeBasic"}); return true; } /////////////////////////// /// stream /////////////////////////// bool stream(int argc, char *argv[], std::vector devices, std::vector sets) { std::vector samples; // Create samples for each device. for (uint32_t i = 0; i < devices.size(); ++i) { samples.push_back(new Sample(devices[i], true, argc, argv)); } // Activate metric sets. for (uint32_t i = 0; i < devices.size(); ++i) { samples[i]->activateMetrics(sets[i].c_str(), ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_TIME_BASED); } // Execute workload. for (uint32_t i = 0; i < devices.size(); ++i) { samples[i]->executeStreamWorkload(); } // Deactivate metric sets. for (uint32_t i = 0; i < devices.size(); ++i) { samples[i]->deactivateMetrics(); } // Destroy samples. for (uint32_t i = 0; i < devices.size(); ++i) { delete samples[i]; } return true; } /////////////////////////// /// stream_device_root /////////////////////////// bool stream_device_root(int argc, char *argv[]) { std::cout << std::endl << "-==== Metric stream: device 0 ====-" << std::endl; stream(argc, argv, {Device(0, ROOT_DEVICE)}, {"TestOa"}); return true; } /////////////////////////// /// stream_device_0 /////////////////////////// bool stream_device_0(int argc, char *argv[]) { std::cout << std::endl << "-==== Metric stream: device 0 ====-" << std::endl; stream(argc, argv, {Device(0, 0)}, {"TestOa"}); return true; } /////////////////////////// /// stream_device_0_sub_device_1 /////////////////////////// bool stream_device_0_sub_device_1(int argc, char *argv[]) { std::cout << std::endl << "-==== Metric stream: device 0 / sub_device 1 ====-" << std::endl; stream(argc, argv, {Device(0, 1)}, {"TestOa"}); return true; } /////////////////////////// /// stream_device_0_sub_device_0_1 /////////////////////////// bool stream_device_0_sub_device_0_1(int argc, char *argv[]) { std::cout << std::endl << "-==== Metric stream: device 0 / sub_device 0 : 1 ====-" << std::endl; stream(argc, argv, {Device(0, 0), Device(0, 1)}, {"TestOa", "ComputeBasic"}); return true; } /////////////////////////// /// stream_device_1 /////////////////////////// bool stream_device_1(int argc, char *argv[]) { std::cout << std::endl << "-==== Metric stream: device 1 ====-" << std::endl; stream(argc, argv, {Device(1, 0)}, {"TestOa"}); return true; } /////////////////////////// /// stream_device_1_sub_device_1 /////////////////////////// bool stream_device_1_sub_device_1(int argc, char *argv[]) { std::cout << std::endl << "-==== Metric stream: device 1 / sub_device 1 ====-" << std::endl; stream(argc, argv, {Device(1, 1)}, {"TestOa"}); return true; } /////////////////////////// /// stream_device_0_1 /////////////////////////// bool stream_device_0_1(int argc, char *argv[]) { std::cout << std::endl << "-==== Metric stream: device 0 / 1 ====-" << std::endl; stream(argc, argv, {Device(0, 0), Device(1, 0)}, {"TestOa", "ComputeBasic"}); return true; } ///////////////////////////// ///// main ///////////////////////////// int main(int argc, char *argv[]) { printf("Zello metrics\n"); fflush(stdout); std::map> tests; tests["sample"] = sample; tests["query_device_root"] = query_device_root; tests["query_device_0"] = query_device_0; tests["query_device_0_sub_device_1"] = query_device_0_sub_device_1; tests["query_device_0_sub_device_0_1"] = query_device_0_sub_device_0_1; tests["query_device_1"] = query_device_1; tests["query_device_1_sub_device_1"] = query_device_1_sub_device_1; tests["query_device_0_1"] = query_device_0_1; tests["stream_device_root"] = stream_device_root; tests["stream_device_0"] = stream_device_0; tests["stream_device_0_sub_device_1"] = stream_device_0_sub_device_1; tests["stream_device_0_sub_device_0_1"] = stream_device_0_sub_device_0_1; tests["stream_device_1"] = stream_device_1; tests["stream_device_1_sub_device_1"] = stream_device_1_sub_device_1; tests["stream_device_0_1"] = stream_device_0_1; // Run test. for (auto &test : tests) { if (Sample::isArgumentEnabled(argc, argv, "", test.first.c_str())) { return test.second(argc, argv); } } // Print available tests. for (auto &test : tests) { std::cout << test.first.c_str() << std::endl; } return 0; } compute-runtime-22.14.22890/level_zero/tools/test/black_box_tests/zello_sysman.cpp000066400000000000000000001714551422164147700302450ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include #include #include #include #include #include #include #include #include #include bool verbose = true; std::string getErrorString(ze_result_t error) { static const std::map mgetErrorString{ {ZE_RESULT_NOT_READY, "ZE_RESULT_NOT_READY"}, {ZE_RESULT_ERROR_DEVICE_LOST, "ZE_RESULT_ERROR_DEVICE_LOST"}, {ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY, "ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY"}, {ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY, "ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY"}, {ZE_RESULT_ERROR_MODULE_BUILD_FAILURE, "ZE_RESULT_ERROR_MODULE_BUILD_FAILURE"}, {ZE_RESULT_ERROR_MODULE_LINK_FAILURE, "ZE_RESULT_ERROR_MODULE_LINK_FAILURE"}, {ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS, "ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS"}, {ZE_RESULT_ERROR_NOT_AVAILABLE, "ZE_RESULT_ERROR_NOT_AVAILABLE"}, {ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE, "ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE"}, {ZE_RESULT_ERROR_UNINITIALIZED, "ZE_RESULT_ERROR_UNINITIALIZED"}, {ZE_RESULT_ERROR_UNSUPPORTED_VERSION, "ZE_RESULT_ERROR_UNSUPPORTED_VERSION"}, {ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, "ZE_RESULT_ERROR_UNSUPPORTED_FEATURE"}, {ZE_RESULT_ERROR_INVALID_ARGUMENT, "ZE_RESULT_ERROR_INVALID_ARGUMENT"}, {ZE_RESULT_ERROR_INVALID_NULL_HANDLE, "ZE_RESULT_ERROR_INVALID_NULL_HANDLE"}, {ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE, "ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE"}, {ZE_RESULT_ERROR_INVALID_NULL_POINTER, "ZE_RESULT_ERROR_INVALID_NULL_POINTER"}, {ZE_RESULT_ERROR_INVALID_SIZE, "ZE_RESULT_ERROR_INVALID_SIZE"}, {ZE_RESULT_ERROR_UNSUPPORTED_SIZE, "ZE_RESULT_ERROR_UNSUPPORTED_SIZE"}, {ZE_RESULT_ERROR_UNSUPPORTED_ALIGNMENT, "ZE_RESULT_ERROR_UNSUPPORTED_ALIGNMENT"}, {ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT, "ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT"}, {ZE_RESULT_ERROR_INVALID_ENUMERATION, "ZE_RESULT_ERROR_INVALID_ENUMERATION"}, {ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION, "ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION"}, {ZE_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT, "ZE_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT"}, {ZE_RESULT_ERROR_INVALID_NATIVE_BINARY, "ZE_RESULT_ERROR_INVALID_NATIVE_BINARY"}, {ZE_RESULT_ERROR_INVALID_GLOBAL_NAME, "ZE_RESULT_ERROR_INVALID_GLOBAL_NAME"}, {ZE_RESULT_ERROR_INVALID_KERNEL_NAME, "ZE_RESULT_ERROR_INVALID_KERNEL_NAME"}, {ZE_RESULT_ERROR_INVALID_FUNCTION_NAME, "ZE_RESULT_ERROR_INVALID_FUNCTION_NAME"}, {ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION, "ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION"}, {ZE_RESULT_ERROR_INVALID_GLOBAL_WIDTH_DIMENSION, "ZE_RESULT_ERROR_INVALID_GLOBAL_WIDTH_DIMENSION"}, {ZE_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX, "ZE_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX"}, {ZE_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE, "ZE_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE"}, {ZE_RESULT_ERROR_INVALID_KERNEL_ATTRIBUTE_VALUE, "ZE_RESULT_ERROR_INVALID_KERNEL_ATTRIBUTE_VALUE"}, {ZE_RESULT_ERROR_INVALID_MODULE_UNLINKED, "ZE_RESULT_ERROR_INVALID_MODULE_UNLINKED"}, {ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE, "ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE"}, {ZE_RESULT_ERROR_OVERLAPPING_REGIONS, "ZE_RESULT_ERROR_OVERLAPPING_REGIONS"}, {ZE_RESULT_ERROR_UNKNOWN, "ZE_RESULT_ERROR_UNKNOWN"}}; auto i = mgetErrorString.find(error); if (i == mgetErrorString.end()) return "ZE_RESULT_ERROR_UNKNOWN"; else return mgetErrorString.at(error); } #define VALIDATECALL(myZeCall) \ do { \ ze_result_t r = myZeCall; \ if (r != ZE_RESULT_SUCCESS) { \ std::cout << getErrorString(r) \ << " returned by " \ << #myZeCall << ": " \ << __FUNCTION__ << ": " \ << __LINE__ << "\n"; \ } \ } while (0); void usage() { std::cout << "\n set Env variable ZES_ENABLE_SYSMAN=1" "\n" "\n zello_sysman [OPTIONS]" "\n" "\n OPTIONS:" "\n -p, --pci selectively run pci black box test" "\n -f, --frequency selectively run frequency black box test" "\n -s, --standby selectively run standby black box test" "\n -e, --engine selectively run engine black box test" "\n -c, --scheduler selectively run scheduler black box test" "\n -t, --temperature selectively run temperature black box test" "\n -o, --power selectively run power black box test" "\n -m, --memory selectively run memory black box test" "\n -g, --global selectively run device/global operations black box test" "\n -R, --ras selectively run ras black box test" "\n -E, --event set and listen to events black box test" "\n -r, --reset force|noforce selectively run device reset test" "\n -i, --firmware selectively run device firmware test is the firmware binary needed to flash" "\n -F, --fabricport selectively run fabricport black box test" "\n -d, --diagnostics selectively run diagnostics black box test" "\n -P, --performance selectively run performance-factor black box test" "\n -h, --help display help message" "\n" "\n All L0 Syman APIs that set values require root privileged execution" "\n" "\n"; } void getDeviceHandles(ze_driver_handle_t &driverHandle, std::vector &devices, int argc, char *argv[]) { VALIDATECALL(zeInit(ZE_INIT_FLAG_GPU_ONLY)); uint32_t driverCount = 0; VALIDATECALL(zeDriverGet(&driverCount, nullptr)); if (driverCount == 0) { std::cout << "Error could not retrieve driver" << std::endl; std::terminate(); } VALIDATECALL(zeDriverGet(&driverCount, &driverHandle)); uint32_t deviceCount = 0; VALIDATECALL(zeDeviceGet(driverHandle, &deviceCount, nullptr)); if (deviceCount == 0) { std::cout << "Error could not retrieve device" << std::endl; std::terminate(); } devices.resize(deviceCount); VALIDATECALL(zeDeviceGet(driverHandle, &deviceCount, devices.data())); ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; for (const auto &device : devices) { VALIDATECALL(zeDeviceGetProperties(device, &deviceProperties)); if (verbose) { std::cout << "Device Name = " << deviceProperties.name << std::endl; } } } void testSysmanPower(ze_device_handle_t &device) { std::cout << std::endl << " ---- Power tests ---- " << std::endl; bool iamroot = (geteuid() == 0); uint32_t count = 0; VALIDATECALL(zesDeviceEnumPowerDomains(device, &count, nullptr)); if (count == 0) { std::cout << "Could not retrieve Power domains" << std::endl; return; } std::vector handles(count, nullptr); VALIDATECALL(zesDeviceEnumPowerDomains(device, &count, handles.data())); for (const auto &handle : handles) { zes_power_properties_t properties; VALIDATECALL(zesPowerGetProperties(handle, &properties)); if (verbose) { std::cout << "properties.canControl = " << properties.canControl << std::endl; std::cout << "properties.isEnergyThresholdSupported= " << properties.isEnergyThresholdSupported << std::endl; std::cout << "properties.defaultLimit= " << properties.defaultLimit << std::endl; std::cout << "properties.maxLimit =" << properties.maxLimit << std::endl; std::cout << "properties.minLimit =" << properties.minLimit << std::endl; } zes_power_energy_counter_t energyCounter; VALIDATECALL(zesPowerGetEnergyCounter(handle, &energyCounter)); if (verbose) { std::cout << "energyCounter.energy = " << energyCounter.energy << std::endl; std::cout << "energyCounter.timestamp = " << energyCounter.timestamp << std::endl; } zes_power_sustained_limit_t sustainedGetDefault = {}; zes_power_burst_limit_t burstGetDefault = {}; VALIDATECALL(zesPowerGetLimits(handle, &sustainedGetDefault, &burstGetDefault, nullptr)); if (verbose) { std::cout << "sustainedGetDefault.enabled = " << sustainedGetDefault.enabled << std::endl; if (sustainedGetDefault.enabled) { std::cout << "sustainedGetDefault.power = " << sustainedGetDefault.power << std::endl; std::cout << "sustainedGetDefault.interval = " << sustainedGetDefault.interval << std::endl; } std::cout << "burstGetDefault.enabled = " << burstGetDefault.enabled << std::endl; if (burstGetDefault.enabled) { std::cout << "burstGetDefault.power = " << burstGetDefault.power << std::endl; } } if (iamroot) { zes_power_sustained_limit_t sustainedSet = {}; sustainedSet.power = sustainedGetDefault.power - sustainedGetDefault.power / 10; //Randomly try to reduce power sustainedSet.interval = sustainedGetDefault.interval - sustainedGetDefault.interval / 10; zes_power_burst_limit_t burstSet = {}; if (burstGetDefault.enabled) { burstSet.enabled = 0; } VALIDATECALL(zesPowerSetLimits(handle, &sustainedSet, &burstSet, nullptr)); if (verbose) { std::cout << "zesPowerSetLimits success" << std::endl; std::cout << "Now restore the power values to default ones" << std::endl; } VALIDATECALL(zesPowerSetLimits(handle, &sustainedGetDefault, &burstGetDefault, nullptr)); } } } std::string getEngineFlagType(zes_engine_type_flags_t engineFlag) { static const std::map mgetEngineType{ {ZES_ENGINE_TYPE_FLAG_OTHER, "ZES_ENGINE_TYPE_FLAG_OTHER"}, {ZES_ENGINE_TYPE_FLAG_COMPUTE, "ZES_ENGINE_TYPE_FLAG_COMPUTE"}, {ZES_ENGINE_TYPE_FLAG_3D, "ZES_ENGINE_TYPE_FLAG_3D"}, {ZES_ENGINE_TYPE_FLAG_MEDIA, "ZES_ENGINE_TYPE_FLAG_MEDIA"}, {ZES_ENGINE_TYPE_FLAG_DMA, "ZES_ENGINE_TYPE_FLAG_DMA"}, {ZES_ENGINE_TYPE_FLAG_RENDER, "ZES_ENGINE_TYPE_FLAG_RENDER"}}; auto i = mgetEngineType.find(engineFlag); if (i == mgetEngineType.end()) return "NOT SUPPORTED MODE Engine avalialbe"; else return mgetEngineType.at(engineFlag); } void testSysmanPerformance(ze_device_handle_t &device) { std::cout << std::endl << " ---- Performance-factor tests ---- " << std::endl; uint32_t count = 0; VALIDATECALL(zesDeviceEnumPerformanceFactorDomains(device, &count, nullptr)); if (count == 0) { std::cout << "Could not retrieve Performance factor domains" << std::endl; return; } std::vector handles(count, nullptr); VALIDATECALL(zesDeviceEnumPerformanceFactorDomains(device, &count, handles.data())); for (const auto &handle : handles) { zes_perf_properties_t properties; VALIDATECALL(zesPerformanceFactorGetProperties(handle, &properties)); if (verbose) { std::cout << "properties.onSubdevice = " << properties.onSubdevice << std::endl; std::cout << "properties.subdeviceId = " << properties.subdeviceId << std::endl; std::cout << "properties.engines = " << getEngineFlagType(properties.engines) << std::endl; } double originalFactor = 0; VALIDATECALL(zesPerformanceFactorGetConfig(handle, &originalFactor)); if (verbose) { std::cout << "current Performance Factor = " << originalFactor << std::endl; } std::cout << std::endl; } } std::string getTemperatureSensorType(zes_temp_sensors_t type) { static const std::map mgetSensorType{ {ZES_TEMP_SENSORS_GLOBAL, "ZES_TEMP_SENSORS_GLOBAL"}, {ZES_TEMP_SENSORS_GPU, "ZES_TEMP_SENSORS_GPU"}, {ZES_TEMP_SENSORS_MEMORY, "ZES_TEMP_SENSORS_MEMORY"}, {ZES_TEMP_SENSORS_GLOBAL_MIN, "ZES_TEMP_SENSORS_GLOBAL_MIN"}, {ZES_TEMP_SENSORS_GPU_MIN, "ZES_TEMP_SENSORS_GPU_MIN"}, {ZES_TEMP_SENSORS_MEMORY_MIN, "ZES_TEMP_SENSORS_MEMORY_MIN"}}; auto i = mgetSensorType.find(type); if (i == mgetSensorType.end()) return "NOT SUPPORTED MODE Engine avalialbe"; else return mgetSensorType.at(type); } void testSysmanTemperature(ze_device_handle_t &device) { std::cout << std::endl << " ---- Temperature tests ---- " << std::endl; uint32_t count = 0; VALIDATECALL(zesDeviceEnumTemperatureSensors(device, &count, nullptr)); if (count == 0) { std::cout << "Could not retrieve Temperature domains" << std::endl; return; } std::vector handles(count, nullptr); VALIDATECALL(zesDeviceEnumTemperatureSensors(device, &count, handles.data())); for (const auto &handle : handles) { zes_temp_properties_t properties = {}; VALIDATECALL(zesTemperatureGetProperties(handle, &properties)); double temperature; VALIDATECALL(zesTemperatureGetState(handle, &temperature)); if (verbose) { std::cout << "For subDevice " << properties.subdeviceId << " temperature current state for " << getTemperatureSensorType(properties.type) << " is: " << temperature << std::endl; } } } void testSysmanPci(ze_device_handle_t &device) { std::cout << std::endl << " ---- PCI tests ---- " << std::endl; zes_pci_properties_t properties = {}; VALIDATECALL(zesDevicePciGetProperties(device, &properties)); if (verbose) { std::cout << "properties.address.domain = " << properties.address.domain << std::endl; std::cout << "properties.address.bus = " << properties.address.bus << std::endl; std::cout << "properties.address.device = " << properties.address.device << std::endl; std::cout << "properties.address.function = " << properties.address.function << std::endl; std::cout << "properties.maxSpeed.gen = " << properties.maxSpeed.gen << std::endl; std::cout << "properties.maxSpeed.width = " << properties.maxSpeed.width << std::endl; std::cout << "properties.maxSpeed.maxBandwidth = " << properties.maxSpeed.maxBandwidth << std::endl; } uint32_t count = 0; VALIDATECALL(zesDevicePciGetBars(device, &count, nullptr)); if (verbose) { std::cout << "Bar count = " << count << std::endl; } std::vector pciBarProps(count); std::vector pciBarExtProps(count); for (uint32_t i = 0; i < count; i++) { pciBarExtProps[i].stype = ZES_STRUCTURE_TYPE_PCI_BAR_PROPERTIES_1_2; pciBarExtProps[i].pNext = nullptr; pciBarProps[i].stype = ZES_STRUCTURE_TYPE_PCI_BAR_PROPERTIES; pciBarProps[i].pNext = static_cast(&pciBarExtProps[i]); } VALIDATECALL(zesDevicePciGetBars(device, &count, pciBarProps.data())); if (verbose) { for (uint32_t i = 0; i < count; i++) { std::cout << "pciBarProps.type = " << std::hex << pciBarProps[i].type << std::endl; std::cout << "pciBarProps.index = " << std::hex << pciBarProps[i].index << std::endl; std::cout << "pciBarProps.base = " << std::hex << pciBarProps[i].base << std::endl; std::cout << "pciBarProps.size = " << std::hex << pciBarProps[i].size << std::endl; std::cout << "pci_bar_properties_1_2_t.resizableBarSupported = " << static_cast(pciBarExtProps[i].resizableBarSupported) << std::endl; std::cout << "pci_bar_properties_1_2_t.resizableBarEnabled = " << static_cast(pciBarExtProps[i].resizableBarEnabled) << std::endl; } } } void testSysmanFrequency(ze_device_handle_t &device) { std::cout << std::endl << " ---- Frequency tests ---- " << std::endl; bool iamroot = (geteuid() == 0); uint32_t count = 0; VALIDATECALL(zesDeviceEnumFrequencyDomains(device, &count, nullptr)); if (count == 0) { std::cout << "Could not retrieve frequency domains" << std::endl; return; } std::vector handles(count, nullptr); VALIDATECALL(zesDeviceEnumFrequencyDomains(device, &count, handles.data())); for (const auto &handle : handles) { zes_freq_properties_t freqProperties = {}; zes_freq_range_t freqRange = {}; zes_freq_range_t testFreqRange = {}; zes_freq_state_t freqState = {}; VALIDATECALL(zesFrequencyGetProperties(handle, &freqProperties)); if (verbose) { std::cout << "freqProperties.type = " << freqProperties.type << std::endl; std::cout << "freqProperties.canControl = " << freqProperties.canControl << std::endl; std::cout << "freqProperties.isThrottleEventSupported = " << freqProperties.isThrottleEventSupported << std::endl; std::cout << "freqProperties.min = " << freqProperties.min << std::endl; std::cout << "freqProperties.max = " << freqProperties.max << std::endl; if (freqProperties.onSubdevice) { std::cout << "freqProperties.subdeviceId = " << freqProperties.subdeviceId << std::endl; } } VALIDATECALL(zesFrequencyGetState(handle, &freqState)); if (verbose) { std::cout << "freqState.currentVoltage = " << freqState.currentVoltage << std::endl; std::cout << "freqState.request = " << freqState.request << std::endl; std::cout << "freqState.tdp = " << freqState.tdp << std::endl; std::cout << "freqState.efficient = " << freqState.efficient << std::endl; std::cout << "freqState.actual = " << freqState.actual << std::endl; std::cout << "freqState.throttleReasons = " << freqState.throttleReasons << std::endl; } VALIDATECALL(zesFrequencyGetRange(handle, &freqRange)); if (verbose) { std::cout << "freqRange.min = " << freqRange.min << std::endl; std::cout << "freqRange.max = " << freqRange.max << std::endl; } count = 0; VALIDATECALL(zesFrequencyGetAvailableClocks(handle, &count, nullptr)); std::vector frequency(count); VALIDATECALL(zesFrequencyGetAvailableClocks(handle, &count, frequency.data())); if (verbose) { for (auto freq : frequency) { std::cout << " frequency = " << freq << std::endl; } } if (iamroot) { // Test setting min and max frequency the same, then restore originals testFreqRange.min = freqRange.min; testFreqRange.max = freqRange.min; if (verbose) { std::cout << "Setting Frequency Range . min " << testFreqRange.min << std::endl; std::cout << "Setting Frequency Range . max " << testFreqRange.max << std::endl; } VALIDATECALL(zesFrequencySetRange(handle, &testFreqRange)); VALIDATECALL(zesFrequencyGetRange(handle, &testFreqRange)); if (verbose) { std::cout << "After Setting Getting Frequency Range . min " << testFreqRange.min << std::endl; std::cout << "After Setting Getting Frequency Range . max " << testFreqRange.max << std::endl; } testFreqRange.min = freqRange.min; testFreqRange.max = freqRange.max; if (verbose) { std::cout << "Setting Frequency Range . min " << testFreqRange.min << std::endl; std::cout << "Setting Frequency Range . max " << testFreqRange.max << std::endl; } VALIDATECALL(zesFrequencySetRange(handle, &testFreqRange)); VALIDATECALL(zesFrequencyGetRange(handle, &testFreqRange)); if (verbose) { std::cout << "After Setting Getting Frequency Range . min " << testFreqRange.min << std::endl; std::cout << "After Setting Getting Frequency Range . max " << testFreqRange.max << std::endl; } } else { std::cout << "Not running as Root. Skipping zetSysmanFrequencySetRange test." << std::endl; } } } void testSysmanRas(ze_device_handle_t &device) { std::cout << std::endl << " ---- Ras tests ---- " << std::endl; uint32_t count = 0; bool iamroot = (geteuid() == 0); VALIDATECALL(zesDeviceEnumRasErrorSets(device, &count, nullptr)); if (count == 0) { std::cout << "Could not retrieve Ras Error Sets" << std::endl; return; } std::vector handles(count, nullptr); VALIDATECALL(zesDeviceEnumRasErrorSets(device, &count, handles.data())); for (const auto &handle : handles) { zes_ras_properties_t rasProperties = {}; zes_ras_state_t rasState = {}; VALIDATECALL(zesRasGetProperties(handle, &rasProperties)); if (verbose) { std::cout << "rasProperties.type = " << rasProperties.type << std::endl; if (rasProperties.onSubdevice) { std::cout << "rasProperties.subdeviceId = " << rasProperties.subdeviceId << std::endl; } } ze_bool_t clear = 0; VALIDATECALL(zesRasGetState(handle, clear, &rasState)); if (verbose) { if (rasProperties.type == ZES_RAS_ERROR_TYPE_UNCORRECTABLE) { std::cout << "Number of fatal accelerator engine resets attempted by the driver = " << rasState.category[ZES_RAS_ERROR_CAT_RESET] << std::endl; std::cout << "Number of fatal errors that have occurred in caches = " << rasState.category[ZES_RAS_ERROR_CAT_CACHE_ERRORS] << std::endl; std::cout << "Number of fatal programming errors that have occurred = " << rasState.category[ZES_RAS_ERROR_CAT_PROGRAMMING_ERRORS] << std::endl; std::cout << "Number of fatal driver errors that have occurred = " << rasState.category[ZES_RAS_ERROR_CAT_DRIVER_ERRORS] << std::endl; std::cout << "Number of fatal compute errors that have occurred = " << rasState.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS] << std::endl; std::cout << "Number of fatal non compute errors that have occurred = " << rasState.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS] << std::endl; std::cout << "Number of fatal display errors that have occurred = " << rasState.category[ZES_RAS_ERROR_CAT_DISPLAY_ERRORS] << std::endl; } else { std::cout << "Number of correctable accelerator engine resets attempted by the driver = " << rasState.category[ZES_RAS_ERROR_CAT_RESET] << std::endl; std::cout << "Number of correctable errors that have occurred in caches = " << rasState.category[ZES_RAS_ERROR_CAT_CACHE_ERRORS] << std::endl; std::cout << "Number of correctable programming errors that have occurred = " << rasState.category[ZES_RAS_ERROR_CAT_PROGRAMMING_ERRORS] << std::endl; std::cout << "Number of correctable driver errors that have occurred = " << rasState.category[ZES_RAS_ERROR_CAT_DRIVER_ERRORS] << std::endl; std::cout << "Number of correctable compute errors that have occurred = " << rasState.category[ZES_RAS_ERROR_CAT_COMPUTE_ERRORS] << std::endl; std::cout << "Number of correctable non compute errors that have occurred = " << rasState.category[ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS] << std::endl; std::cout << "Number of correctable display errors that have occurred = " << rasState.category[ZES_RAS_ERROR_CAT_DISPLAY_ERRORS] << std::endl; } } if (iamroot) { zes_ras_config_t getConfig = {}; zes_ras_config_t setConfig = {}; setConfig.totalThreshold = 14; memset(setConfig.detailedThresholds.category, 0, sizeof(setConfig.detailedThresholds.category)); VALIDATECALL(zesRasSetConfig(handle, &setConfig)); if (verbose) { std::cout << "Setting Total threshold = " << setConfig.totalThreshold << std::endl; std::cout << "Setting Threshold for Engine Resets = " << setConfig.detailedThresholds.category[0] << std::endl; std::cout << "Setting Threshold for Programming Errors = " << setConfig.detailedThresholds.category[1] << std::endl; std::cout << "Setting Threshold for Driver Errors = " << setConfig.detailedThresholds.category[2] << std::endl; std::cout << "Setting Threshold for Compute Errors = " << setConfig.detailedThresholds.category[3] << std::endl; std::cout << "Setting Threshold for Non Compute Errors = " << setConfig.detailedThresholds.category[4] << std::endl; std::cout << "Setting Threshold for Cache Errors = " << setConfig.detailedThresholds.category[5] << std::endl; std::cout << "Setting Threshold for Display Errors = " << setConfig.detailedThresholds.category[6] << std::endl; } VALIDATECALL(zesRasGetConfig(handle, &getConfig)); if (verbose) { std::cout << "Getting Total threshold = " << getConfig.totalThreshold << std::endl; std::cout << "Getting Threshold for Engine Resets = " << getConfig.detailedThresholds.category[0] << std::endl; std::cout << "Getting Threshold for Programming Errors = " << getConfig.detailedThresholds.category[1] << std::endl; std::cout << "Getting Threshold for Driver Errors = " << getConfig.detailedThresholds.category[2] << std::endl; std::cout << "Getting Threshold for Compute Errors = " << getConfig.detailedThresholds.category[3] << std::endl; std::cout << "Getting Threshold for Non Compute Errors = " << getConfig.detailedThresholds.category[4] << std::endl; std::cout << "Getting Threshold for Cache Errors = " << getConfig.detailedThresholds.category[5] << std::endl; std::cout << "Getting Threshold for Display Errors = " << getConfig.detailedThresholds.category[6] << std::endl; } } } } std::string getStandbyType(zes_standby_type_t standbyType) { if (standbyType == ZES_STANDBY_TYPE_GLOBAL) return "ZES_STANDBY_TYPE_GLOBAL"; else return "NOT SUPPORTED Standby Type "; } std::string getStandbyMode(zes_standby_promo_mode_t standbyMode) { if (standbyMode == ZES_STANDBY_PROMO_MODE_DEFAULT) return "ZES_STANDBY_PROMO_MODE_DEFAULT"; else if (standbyMode == ZES_STANDBY_PROMO_MODE_NEVER) return "ZES_STANDBY_PROMO_MODE_NEVER"; else return "NOT SUPPORTED Standby Type "; } void testSysmanStandby(ze_device_handle_t &device) { std::cout << std::endl << " ---- Standby tests ---- " << std::endl; bool iamroot = (geteuid() == 0); uint32_t count = 0; VALIDATECALL(zesDeviceEnumStandbyDomains(device, &count, nullptr)); if (count == 0) { std::cout << "Could not retrieve Standby domains" << std::endl; return; } std::vector handles(count, nullptr); VALIDATECALL(zesDeviceEnumStandbyDomains(device, &count, handles.data())); for (const auto &handle : handles) { zes_standby_properties_t standbyProperties = {}; zes_standby_promo_mode_t standbyMode = ZES_STANDBY_PROMO_MODE_FORCE_UINT32; VALIDATECALL(zesStandbyGetProperties(handle, &standbyProperties)); if (verbose) { std::cout << "standbyProperties.type = " << getStandbyType(standbyProperties.type) << std::endl; if (standbyProperties.onSubdevice) { std::cout << "standbyProperties.subdeviceId = " << standbyProperties.subdeviceId << std::endl; } } VALIDATECALL(zesStandbyGetMode(handle, &standbyMode)); if (verbose) { std::cout << "standbyMode.type = " << getStandbyMode(standbyMode) << std::endl; } if (iamroot) { std::cout << "Setting Standby Mode Default" << std::endl; VALIDATECALL(zesStandbySetMode(handle, ZES_STANDBY_PROMO_MODE_DEFAULT)); std::cout << "Setting Standby Mode Never" << std::endl; VALIDATECALL(zesStandbySetMode(handle, ZES_STANDBY_PROMO_MODE_NEVER)); // Restore the original mode after the test. std::cout << "Restore Standby Mode" << std::endl; VALIDATECALL(zesStandbyGetMode(handle, &standbyMode)); } else { std::cout << "Not running as Root. Skipping zetSysmanStandbySetMode test." << std::endl; } } } std::string getEngineType(zes_engine_group_t engineGroup) { static const std::map mgetEngineType{ {ZES_ENGINE_GROUP_COMPUTE_SINGLE, "ZES_ENGINE_GROUP_COMPUTE_SINGLE"}, {ZES_ENGINE_GROUP_RENDER_SINGLE, "ZES_ENGINE_GROUP_RENDER_SINGLE"}, {ZES_ENGINE_GROUP_MEDIA_DECODE_SINGLE, "ZES_ENGINE_GROUP_MEDIA_DECODE_SINGLE"}, {ZES_ENGINE_GROUP_MEDIA_ENCODE_SINGLE, "ZES_ENGINE_GROUP_MEDIA_ENCODE_SINGLE"}, {ZES_ENGINE_GROUP_COPY_SINGLE, "ZES_ENGINE_GROUP_COPY_SINGLE"}, {ZES_ENGINE_GROUP_ALL, "ZES_ENGINE_GROUP_ALL"}, {ZES_ENGINE_GROUP_COMPUTE_ALL, "ZES_ENGINE_GROUP_COMPUTE_ALL"}, {ZES_ENGINE_GROUP_COPY_ALL, "ZES_ENGINE_GROUP_COPY_ALL"}, {ZES_ENGINE_GROUP_RENDER_ALL, "ZES_ENGINE_GROUP_RENDER_ALL"}, {ZES_ENGINE_GROUP_MEDIA_ALL, "ZES_ENGINE_GROUP_MEDIA_ALL"}, {ZES_ENGINE_GROUP_MEDIA_ENHANCEMENT_SINGLE, "ZES_ENGINE_GROUP_MEDIA_ENHANCEMENT_SINGLE"}}; auto i = mgetEngineType.find(engineGroup); if (i == mgetEngineType.end()) return "NOT SUPPORTED MODE Engine avalialbe"; else return mgetEngineType.at(engineGroup); } void testSysmanEngine(ze_device_handle_t &device) { std::cout << std::endl << " ---- Engine tests ---- " << std::endl; uint32_t count = 0; VALIDATECALL(zesDeviceEnumEngineGroups(device, &count, nullptr)); if (count == 0) { std::cout << "Could not retrieve Engine domains" << std::endl; return; } std::vector handles(count, nullptr); VALIDATECALL(zesDeviceEnumEngineGroups(device, &count, handles.data())); for (const auto &handle : handles) { zes_engine_properties_t engineProperties = {}; zes_engine_stats_t engineStats = {}; VALIDATECALL(zesEngineGetProperties(handle, &engineProperties)); if (verbose) { std::cout << "Engine Type = " << getEngineType(engineProperties.type) << std::endl; if (engineProperties.onSubdevice) { std::cout << "Subdevice Id = " << engineProperties.subdeviceId << std::endl; } } VALIDATECALL(zesEngineGetActivity(handle, &engineStats)); if (verbose) { std::cout << "Active Time = " << engineStats.activeTime << std::endl; std::cout << "Timestamp = " << engineStats.timestamp << std::endl; } } } std::string getSchedulerModeName(zes_sched_mode_t mode) { static const std::map mgetSchedulerModeName{ {ZES_SCHED_MODE_TIMEOUT, "ZES_SCHED_MODE_TIMEOUT"}, {ZES_SCHED_MODE_TIMESLICE, "ZES_SCHED_MODE_TIMESLICE"}, {ZES_SCHED_MODE_EXCLUSIVE, "ZES_SCHED_MODE_EXCLUSIVE"}, {ZES_SCHED_MODE_COMPUTE_UNIT_DEBUG, "ZES_SCHED_MODE_COMPUTE_UNIT_DEBUG"}}; auto i = mgetSchedulerModeName.find(mode); if (i == mgetSchedulerModeName.end()) return "NOT SUPPORTED MODE SET"; else return mgetSchedulerModeName.at(mode); } void testSysmanScheduler(ze_device_handle_t &device) { std::cout << std::endl << " ---- Scheduler tests ---- " << std::endl; bool iamroot = (geteuid() == 0); uint32_t count = 0; VALIDATECALL(zesDeviceEnumSchedulers(device, &count, nullptr)); if (count == 0) { std::cout << "Could not retrieve scheduler domains" << std::endl; return; } std::vector handles(count, nullptr); VALIDATECALL(zesDeviceEnumSchedulers(device, &count, handles.data())); for (const auto &handle : handles) { zes_sched_mode_t currentMode = {}; VALIDATECALL(zesSchedulerGetCurrentMode(handle, ¤tMode)); if (verbose) { std::cout << "Current Mode = " << getSchedulerModeName(currentMode) << std::endl; } zes_sched_timeout_properties_t timeoutProperties = {}; zes_sched_timeslice_properties_t timesliceProperties = {}; VALIDATECALL(zesSchedulerGetTimeoutModeProperties(handle, false, &timeoutProperties)); if (verbose) { std::cout << "Timeout Mode Watchdog Timeout = " << timeoutProperties.watchdogTimeout << std::endl; } VALIDATECALL(zesSchedulerGetTimesliceModeProperties(handle, false, ×liceProperties)); if (verbose) { std::cout << "Timeslice Mode Interval = " << timesliceProperties.interval << std::endl; std::cout << "Timeslice Mode Yield Timeout = " << timesliceProperties.yieldTimeout << std::endl; } ze_bool_t needReload = 0; if (iamroot) { std::cout << "Setting Scheduler Timeout Mode" << std::endl; VALIDATECALL(zesSchedulerSetTimeoutMode(handle, &timeoutProperties, &needReload)); std::cout << "Setting Scheduler TimeSlice Mode" << std::endl; VALIDATECALL(zesSchedulerSetTimesliceMode(handle, ×liceProperties, &needReload)); std::cout << "Setting Scheduler Exclusive Mode" << std::endl; VALIDATECALL(zesSchedulerSetExclusiveMode(handle, &needReload)); std::cout << "Restoring Scheduler Mode" << std::endl; // Restore the original mode after the test. if (currentMode == ZES_SCHED_MODE_TIMEOUT) { VALIDATECALL(zesSchedulerSetTimeoutMode(handle, &timeoutProperties, &needReload)); } else if (currentMode == ZES_SCHED_MODE_TIMESLICE) { VALIDATECALL(zesSchedulerSetTimesliceMode(handle, ×liceProperties, &needReload)); } else if (currentMode == ZES_SCHED_MODE_EXCLUSIVE) { VALIDATECALL(zesSchedulerSetExclusiveMode(handle, &needReload)); } } else { std::cout << "Not running as Root. Skipping zetSysmanSchedulerSetTimeoutMode test." << std::endl; std::cout << "Not running as Root. Skipping zetSysmanSchedulerSetTimesliceMode test." << std::endl; std::cout << "Not running as Root. Skipping zetSysmanSchedulerSetExclusiveMode test." << std::endl; } } } std::string getMemoryType(zes_mem_type_t memType) { static const std::map mgetMemoryType{ {ZES_MEM_TYPE_HBM, "ZES_MEM_TYPE_HBM"}, {ZES_MEM_TYPE_DDR, "ZES_MEM_TYPE_DDR"}, {ZES_MEM_TYPE_DDR3, "ZES_MEM_TYPE_DDR3"}, {ZES_MEM_TYPE_DDR4, "ZES_MEM_TYPE_DDR4"}, {ZES_MEM_TYPE_DDR5, "ZES_MEM_TYPE_DDR5"}, {ZES_MEM_TYPE_LPDDR, "ZES_MEM_TYPE_LPDDR"}, {ZES_MEM_TYPE_LPDDR3, "ZES_MEM_TYPE_LPDDR3"}, {ZES_MEM_TYPE_LPDDR4, "ZES_MEM_TYPE_LPDDR4"}, {ZES_MEM_TYPE_LPDDR5, "ZES_MEM_TYPE_LPDDR5"}, {ZES_MEM_TYPE_SRAM, "ZES_MEM_TYPE_SRAM"}, {ZES_MEM_TYPE_L1, "ZES_MEM_TYPE_L1"}, {ZES_MEM_TYPE_L3, "ZES_MEM_TYPE_L3"}, {ZES_MEM_TYPE_GRF, "ZES_MEM_TYPE_GRF"}, {ZES_MEM_TYPE_SLM, "ZES_MEM_TYPE_SLM"}}; auto i = mgetMemoryType.find(memType); if (i == mgetMemoryType.end()) return "NOT SUPPORTED MEMORY TYPE SET"; else return mgetMemoryType.at(memType); } std::string getMemoryHealth(zes_mem_health_t memHealth) { static const std::map mgetMemoryHealth{ {ZES_MEM_HEALTH_UNKNOWN, "ZES_MEM_HEALTH_UNKNOWN"}, {ZES_MEM_HEALTH_OK, "ZES_MEM_HEALTH_OK"}, {ZES_MEM_HEALTH_DEGRADED, "ZES_MEM_HEALTH_DEGRADED"}, {ZES_MEM_HEALTH_CRITICAL, "ZES_MEM_HEALTH_CRITICAL"}, {ZES_MEM_HEALTH_REPLACE, "ZES_MEM_HEALTH_REPLACE"}}; auto i = mgetMemoryHealth.find(memHealth); if (i == mgetMemoryHealth.end()) return "NOT SUPPORTED MEMORY HEALTH SET"; else return mgetMemoryHealth.at(memHealth); } void testSysmanMemory(ze_device_handle_t &device) { std::cout << std::endl << " ---- Memory tests ---- " << std::endl; uint32_t count = 0; VALIDATECALL(zesDeviceEnumMemoryModules(device, &count, nullptr)); if (count == 0) { std::cout << "Could not retrieve Memory domains" << std::endl; return; } std::vector handles(count, nullptr); VALIDATECALL(zesDeviceEnumMemoryModules(device, &count, handles.data())); for (const auto &handle : handles) { zes_mem_properties_t memoryProperties = {}; zes_mem_state_t memoryState = {}; zes_mem_bandwidth_t memoryBandwidth = {}; VALIDATECALL(zesMemoryGetProperties(handle, &memoryProperties)); if (verbose) { std::cout << "Memory Type = " << getMemoryType(memoryProperties.type) << std::endl; std::cout << "On Subdevice = " << memoryProperties.onSubdevice << std::endl; std::cout << "Subdevice Id = " << memoryProperties.subdeviceId << std::endl; std::cout << "Memory Size = " << memoryProperties.physicalSize << std::endl; std::cout << "Number of channels = " << memoryProperties.numChannels << std::endl; } VALIDATECALL(zesMemoryGetState(handle, &memoryState)); if (verbose) { std::cout << "Memory Health = " << getMemoryHealth(memoryState.health) << std::endl; std::cout << "The total allocatable memory in bytes = " << memoryState.size << std::endl; std::cout << "The free memory in bytes = " << memoryState.free << std::endl; } VALIDATECALL(zesMemoryGetBandwidth(handle, &memoryBandwidth)); if (verbose) { std::cout << "Memory Read Counter = " << memoryBandwidth.readCounter << std::endl; std::cout << "Memory Write Counter = " << memoryBandwidth.writeCounter << std::endl; std::cout << "Memory Maximum Bandwidth = " << memoryBandwidth.maxBandwidth << std::endl; std::cout << "Memory Timestamp = " << memoryBandwidth.timestamp << std::endl; } } } void testSysmanFirmware(ze_device_handle_t &device, std::string imagePath) { std::cout << std::endl << " ---- firmware tests ---- " << std::endl; uint32_t count = 0; std::ifstream imageFile; uint64_t imgSize = 0; if (imagePath.size() != 0) { struct stat statBuf; auto status = stat(imagePath.c_str(), &statBuf); if (!status) { imageFile.open(imagePath.c_str(), std::ios::binary); imgSize = statBuf.st_size; } } VALIDATECALL(zesDeviceEnumFirmwares(device, &count, nullptr)); if (count == 0) { std::cout << "Could not retrieve Firmware domains" << std::endl; return; } std::vector handles(count, nullptr); VALIDATECALL(zesDeviceEnumFirmwares(device, &count, handles.data())); for (auto handle : handles) { zes_firmware_properties_t fwProperties = {}; VALIDATECALL(zesFirmwareGetProperties(handle, &fwProperties)); if (verbose) { std::cout << "firmware name = " << fwProperties.name << std::endl; std::cout << "On Subdevice = " << fwProperties.onSubdevice << std::endl; std::cout << "Subdevice Id = " << fwProperties.subdeviceId << std::endl; std::cout << "firmware version = " << fwProperties.version << std::endl; } if (imagePath.size() != 0 && imgSize > 0) { char img[imgSize]; imageFile.read(img, imgSize); VALIDATECALL(zesFirmwareFlash(handle, img, static_cast(imgSize))); VALIDATECALL(zesFirmwareGetProperties(handle, &fwProperties)); if (verbose) { std::cout << "firmware name = " << fwProperties.name << std::endl; std::cout << "On Subdevice = " << fwProperties.onSubdevice << std::endl; std::cout << "Subdevice Id = " << fwProperties.subdeviceId << std::endl; std::cout << "firmware version = " << fwProperties.version << std::endl; } } } } void testSysmanReset(ze_device_handle_t &device, bool force) { std::cout << std::endl << " ---- Reset test (force = " << (force ? "true" : "false") << ") ---- " << std::endl; VALIDATECALL(zesDeviceReset(device, force)); } void testSysmanListenEvents(ze_driver_handle_t driver, std::vector &devices, zes_event_type_flags_t events) { uint32_t numDeviceEvents = 0; zes_event_type_flags_t *pEvents = new zes_event_type_flags_t[devices.size()]; uint32_t timeout = 10000u; uint32_t numDevices = static_cast(devices.size()); VALIDATECALL(zesDriverEventListen(driver, timeout, numDevices, devices.data(), &numDeviceEvents, pEvents)); if (verbose) { if (numDeviceEvents) { for (auto index = 0u; index < devices.size(); index++) { if (pEvents[index] & ZES_EVENT_TYPE_FLAG_DEVICE_RESET_REQUIRED) { std::cout << "Device " << index << "got reset required event" << std::endl; } if (pEvents[index] & ZES_EVENT_TYPE_FLAG_DEVICE_DETACH) { std::cout << "Device " << index << "got DEVICE_DETACH event" << std::endl; } if (pEvents[index] & ZES_EVENT_TYPE_FLAG_DEVICE_ATTACH) { std::cout << "Device " << index << "got DEVICE_ATTACH event" << std::endl; } if (pEvents[index] & ZES_EVENT_TYPE_FLAG_RAS_UNCORRECTABLE_ERRORS) { std::cout << "Device " << index << "got RAS UNCORRECTABLE event" << std::endl; } if (pEvents[index] & ZES_EVENT_TYPE_FLAG_RAS_CORRECTABLE_ERRORS) { std::cout << "Device " << index << "got RAS CORRECTABLE event" << std::endl; } } } } } void testSysmanListenEventsEx(ze_driver_handle_t driver, std::vector &devices, zes_event_type_flags_t events) { uint32_t numDeviceEvents = 0; zes_event_type_flags_t *pEvents = new zes_event_type_flags_t[devices.size()]; uint64_t timeout = 10000u; uint32_t numDevices = static_cast(devices.size()); VALIDATECALL(zesDriverEventListenEx(driver, timeout, numDevices, devices.data(), &numDeviceEvents, pEvents)); if (verbose) { if (numDeviceEvents) { for (auto index = 0u; index < devices.size(); index++) { if (pEvents[index] & ZES_EVENT_TYPE_FLAG_DEVICE_RESET_REQUIRED) { std::cout << "Device " << index << "got reset required event" << std::endl; } if (pEvents[index] & ZES_EVENT_TYPE_FLAG_DEVICE_DETACH) { std::cout << "Device " << index << "got DEVICE_DETACH event" << std::endl; } if (pEvents[index] & ZES_EVENT_TYPE_FLAG_DEVICE_ATTACH) { std::cout << "Device " << index << "got DEVICE_ATTACH event" << std::endl; } if (pEvents[index] & ZES_EVENT_TYPE_FLAG_RAS_UNCORRECTABLE_ERRORS) { std::cout << "Device " << index << "got RAS UNCORRECTABLE event" << std::endl; } if (pEvents[index] & ZES_EVENT_TYPE_FLAG_RAS_CORRECTABLE_ERRORS) { std::cout << "Device " << index << "got RAS CORRECTABLE event" << std::endl; } } } } } std::string getFabricPortStatus(zes_fabric_port_status_t status) { static const std::map fabricPortStatus{ {ZES_FABRIC_PORT_STATUS_UNKNOWN, "ZES_FABRIC_PORT_STATUS_UNKNOWN"}, {ZES_FABRIC_PORT_STATUS_HEALTHY, "ZES_FABRIC_PORT_STATUS_HEALTHY"}, {ZES_FABRIC_PORT_STATUS_DEGRADED, "ZES_FABRIC_PORT_STATUS_DEGRADED"}, {ZES_FABRIC_PORT_STATUS_FAILED, "ZES_FABRIC_PORT_STATUS_FAILED"}, {ZES_FABRIC_PORT_STATUS_DISABLED, "ZES_FABRIC_PORT_STATUS_DISABLED"}}; auto i = fabricPortStatus.find(status); if (i == fabricPortStatus.end()) return "UNEXPECTED STATUS"; else return fabricPortStatus.at(status); } std::string getFabricPortQualityIssues(zes_fabric_port_qual_issue_flags_t qualityIssues) { std::string returnValue; returnValue.clear(); if (qualityIssues & ZES_FABRIC_PORT_QUAL_ISSUE_FLAG_LINK_ERRORS) { returnValue.append("ZES_FABRIC_PORT_QUAL_ISSUE_FLAG_LINK_ERRORS "); } if (qualityIssues & ZES_FABRIC_PORT_QUAL_ISSUE_FLAG_SPEED) { returnValue.append("ZES_FABRIC_PORT_QUAL_ISSUE_FLAG_SPEED"); } return returnValue; } std::string getFabricPortFailureReasons(zes_fabric_port_failure_flags_t failureReasons) { std::string returnValue; returnValue.clear(); if (failureReasons & ZES_FABRIC_PORT_FAILURE_FLAG_FAILED) { returnValue.append("ZES_FABRIC_PORT_FAILURE_FLAG_FAILED "); } if (failureReasons & ZES_FABRIC_PORT_FAILURE_FLAG_TRAINING_TIMEOUT) { returnValue.append("ZES_FABRIC_PORT_FAILURE_FLAG_TRAINING_TIMEOUT "); } if (failureReasons & ZES_FABRIC_PORT_FAILURE_FLAG_FLAPPING) { returnValue.append("ZES_FABRIC_PORT_FAILURE_FLAG_FLAPPING "); } return returnValue; } void testSysmanFabricPort(ze_device_handle_t &device) { std::cout << std::endl << " ---- FabricPort tests ---- " << std::endl; uint32_t count = 0; VALIDATECALL(zesDeviceEnumFabricPorts(device, &count, nullptr)); if (count == 0) { std::cout << "Could not retrieve FabricPorts" << std::endl; return; } std::vector handles(count, nullptr); VALIDATECALL(zesDeviceEnumFabricPorts(device, &count, handles.data())); for (auto handle : handles) { zes_fabric_port_properties_t fabricPortProperties = {}; zes_fabric_link_type_t fabricPortLinkType = {}; zes_fabric_port_config_t fabricPortConfig = {}; zes_fabric_port_state_t fabricPortState = {}; zes_fabric_port_throughput_t fabricPortThroughput = {}; VALIDATECALL(zesFabricPortGetProperties(handle, &fabricPortProperties)); if (verbose) { std::cout << "Model = \"" << fabricPortProperties.model << "\"" << std::endl; std::cout << "On Subdevice = " << static_cast(fabricPortProperties.onSubdevice) << std::endl; std::cout << "Subdevice Id = " << fabricPortProperties.subdeviceId << std::endl; std::cout << "Port ID = [" << fabricPortProperties.portId.fabricId << ":" << fabricPortProperties.portId.attachId << ":" << static_cast(fabricPortProperties.portId.portNumber) << "]" << std::endl; std::cout << "Max Rx Speed = " << fabricPortProperties.maxRxSpeed.bitRate << " pbs, " << fabricPortProperties.maxRxSpeed.width << " lanes" << std::endl; std::cout << "Max Tx Speed = " << fabricPortProperties.maxTxSpeed.bitRate << " pbs, " << fabricPortProperties.maxTxSpeed.width << " lanes" << std::endl; } VALIDATECALL(zesFabricPortGetLinkType(handle, &fabricPortLinkType)); if (verbose) { std::cout << "Link Type = \"" << fabricPortLinkType.desc << "\"" << std::endl; } VALIDATECALL(zesFabricPortGetConfig(handle, &fabricPortConfig)); if (verbose) { std::cout << "Enabled = " << static_cast(fabricPortConfig.enabled) << std::endl; std::cout << "Beaconing = " << static_cast(fabricPortConfig.beaconing) << std::endl; } VALIDATECALL(zesFabricPortGetState(handle, &fabricPortState)); if (verbose) { std::cout << "Status = " << getFabricPortStatus(fabricPortState.status) << std::endl; std::cout << "Quality Issues = " << getFabricPortQualityIssues(fabricPortState.qualityIssues) << std::hex << fabricPortState.qualityIssues << std::endl; std::cout << "Failure Reasons = " << getFabricPortFailureReasons(fabricPortState.failureReasons) << std::hex << fabricPortState.failureReasons << std::endl; std::cout << "Remote Port ID = [" << fabricPortState.remotePortId.fabricId << ":" << fabricPortState.remotePortId.attachId << ":" << static_cast(fabricPortState.remotePortId.portNumber) << "]" << std::endl; std::cout << "Rx Speed = " << fabricPortState.rxSpeed.bitRate << " pbs, " << fabricPortState.rxSpeed.width << " lanes" << std::endl; std::cout << "Tx Speed = " << fabricPortState.txSpeed.bitRate << " pbs, " << fabricPortState.txSpeed.width << " lanes" << std::endl; } VALIDATECALL(zesFabricPortGetThroughput(handle, &fabricPortThroughput)); if (verbose) { std::cout << "Timestamp = " << fabricPortThroughput.timestamp << std::endl; std::cout << "RX Counter = " << fabricPortThroughput.rxCounter << std::endl; std::cout << "TX Counter = " << fabricPortThroughput.txCounter << std::endl; } } } void testSysmanGlobalOperations(ze_device_handle_t &device) { std::cout << std::endl << " ---- Global Operations tests ---- " << std::endl; zes_device_properties_t properties = {}; VALIDATECALL(zesDeviceGetProperties(device, &properties)); if (verbose) { std::cout << "properties.numSubdevices = " << properties.numSubdevices << std::endl; std::cout << "properties.serialNumber = " << properties.serialNumber << std::endl; std::cout << "properties.boardNumber = " << properties.boardNumber << std::endl; std::cout << "properties.brandName = " << properties.brandName << std::endl; std::cout << "properties.modelName = " << properties.modelName << std::endl; std::cout << "properties.vendorName = " << properties.vendorName << std::endl; std::cout << "properties.driverVersion= " << properties.driverVersion << std::endl; } uint32_t count = 0; VALIDATECALL(zesDeviceProcessesGetState(device, &count, nullptr)); std::vector processes(count); VALIDATECALL(zesDeviceProcessesGetState(device, &count, processes.data())); if (verbose) { for (const auto &process : processes) { std::cout << "processes.processId = " << process.processId << std::endl; std::cout << "processes.memSize = " << process.memSize << std::endl; std::cout << "processes.sharedSize = " << process.sharedSize << std::endl; std::cout << "processes.engines = " << process.engines << std::endl; } } zes_device_state_t deviceState = {}; VALIDATECALL(zesDeviceGetState(device, &deviceState)); if (verbose) { std::cout << "reset status: " << deviceState.reset << std::endl; std::cout << "repair" << deviceState.repaired << std::endl; if (deviceState.reset & ZES_RESET_REASON_FLAG_WEDGED) { std::cout << "state reset wedged = " << deviceState.reset << std::endl; } if (deviceState.reset & ZES_RESET_REASON_FLAG_REPAIR) { std::cout << "state reset repair = " << deviceState.reset << std::endl; std::cout << "repair state = " << deviceState.repaired << std::endl; } } } void testSysmanDiagnostics(ze_device_handle_t &device) { std::cout << std::endl << " ---- diagnostics tests ---- " << std::endl; uint32_t count = 0; uint32_t subTestCount = 0; zes_diag_test_t tests = {}; zes_diag_result_t results; uint32_t start = 0, end = 0; VALIDATECALL(zesDeviceEnumDiagnosticTestSuites(device, &count, nullptr)); if (count == 0) { std::cout << "Could not retrieve diagnostics domains" << std::endl; return; } std::vector handles(count, nullptr); VALIDATECALL(zesDeviceEnumDiagnosticTestSuites(device, &count, handles.data())); for (auto handle : handles) { zes_diag_properties_t diagProperties = {}; VALIDATECALL(zesDiagnosticsGetProperties(handle, &diagProperties)); if (verbose) { std::cout << "diagnostics name = " << diagProperties.name << std::endl; std::cout << "On Subdevice = " << diagProperties.onSubdevice << std::endl; std::cout << "Subdevice Id = " << diagProperties.subdeviceId << std::endl; std::cout << "diagnostics have sub tests = " << diagProperties.haveTests << std::endl; } if (diagProperties.haveTests != 0) { VALIDATECALL(zesDiagnosticsGetTests(handle, &subTestCount, &tests)); if (verbose) { std::cout << "diagnostics subTestCount = " << subTestCount << "for " << diagProperties.name << std::endl; for (uint32_t i = 0; i < subTestCount; i++) { std::cout << "subTest#" << tests.index << " = " << tests.name << std::endl; } } end = subTestCount - 1; } VALIDATECALL(zesDiagnosticsRunTests(handle, start, end, &results)); if (verbose) { switch (results) { case ZES_DIAG_RESULT_NO_ERRORS: std::cout << "No errors have occurred" << std::endl; break; case ZES_DIAG_RESULT_REBOOT_FOR_REPAIR: std::cout << "diagnostics successful and repair applied, reboot needed" << std::endl; break; case ZES_DIAG_RESULT_FAIL_CANT_REPAIR: std::cout << "diagnostics run, unable to fix" << std::endl; break; case ZES_DIAG_RESULT_ABORT: std::cout << "diagnostics run fialed, unknown error" << std::endl; break; case ZES_DIAG_RESULT_FORCE_UINT32: default: std::cout << "undefined error" << std::endl; } } } } bool validateGetenv(const char *name) { const char *env = getenv(name); if ((nullptr == env) || (0 == strcmp("0", env))) return false; return (0 == strcmp("1", env)); } int main(int argc, char *argv[]) { std::vector devices; ze_driver_handle_t driver; if (!validateGetenv("ZES_ENABLE_SYSMAN")) { std::cout << "Must set environment variable ZES_ENABLE_SYSMAN=1" << std::endl; exit(0); } getDeviceHandles(driver, devices, argc, argv); int opt; static struct option long_opts[] = { {"help", no_argument, nullptr, 'h'}, {"pci", no_argument, nullptr, 'p'}, {"frequency", no_argument, nullptr, 'f'}, {"standby", no_argument, nullptr, 's'}, {"engine", no_argument, nullptr, 'e'}, {"scheduler", no_argument, nullptr, 'c'}, {"temperature", no_argument, nullptr, 't'}, {"power", no_argument, nullptr, 'o'}, {"global", no_argument, nullptr, 'g'}, {"ras", no_argument, nullptr, 'R'}, {"memory", no_argument, nullptr, 'm'}, {"event", no_argument, nullptr, 'E'}, {"reset", required_argument, nullptr, 'r'}, {"fabricport", no_argument, nullptr, 'F'}, {"firmware", optional_argument, nullptr, 'i'}, {"diagnostics", no_argument, nullptr, 'd'}, {"performance", no_argument, nullptr, 'P'}, {0, 0, 0, 0}, }; bool force = false; while ((opt = getopt_long(argc, argv, "hdpPfsectogmrFEi:", long_opts, nullptr)) != -1) { switch (opt) { case 'h': usage(); exit(0); break; case 'p': std::for_each(devices.begin(), devices.end(), [&](auto device) { testSysmanPci(device); }); break; case 'P': std::for_each(devices.begin(), devices.end(), [&](auto device) { testSysmanPerformance(device); }); break; case 'f': std::for_each(devices.begin(), devices.end(), [&](auto device) { testSysmanFrequency(device); }); break; case 's': std::for_each(devices.begin(), devices.end(), [&](auto device) { testSysmanStandby(device); }); break; case 'e': std::for_each(devices.begin(), devices.end(), [&](auto device) { testSysmanEngine(device); }); break; case 'c': std::for_each(devices.begin(), devices.end(), [&](auto device) { testSysmanScheduler(device); }); break; case 't': std::for_each(devices.begin(), devices.end(), [&](auto device) { testSysmanTemperature(device); }); break; case 'o': std::for_each(devices.begin(), devices.end(), [&](auto device) { testSysmanPower(device); }); break; case 'g': std::for_each(devices.begin(), devices.end(), [&](auto device) { testSysmanGlobalOperations(device); }); break; case 'm': std::for_each(devices.begin(), devices.end(), [&](auto device) { testSysmanMemory(device); }); break; case 'R': std::for_each(devices.begin(), devices.end(), [&](auto device) { testSysmanRas(device); }); break; case 'i': { std::string filePathFirmware; if (optarg != nullptr) { filePathFirmware = optarg; } std::for_each(devices.begin(), devices.end(), [&](auto device) { testSysmanFirmware(device, filePathFirmware); }); break; } case 'r': if (!strcmp(optarg, "force")) { force = true; } else if (!strcmp(optarg, "noforce")) { force = false; } else { usage(); exit(0); } std::for_each(devices.begin(), devices.end(), [&](auto device) { testSysmanReset(device, force); }); break; case 'E': std::for_each(devices.begin(), devices.end(), [&](auto device) { zesDeviceEventRegister(device, ZES_EVENT_TYPE_FLAG_DEVICE_RESET_REQUIRED | ZES_EVENT_TYPE_FLAG_DEVICE_DETACH | ZES_EVENT_TYPE_FLAG_DEVICE_ATTACH | ZES_EVENT_TYPE_FLAG_RAS_CORRECTABLE_ERRORS | ZES_EVENT_TYPE_FLAG_RAS_UNCORRECTABLE_ERRORS | ZES_EVENT_TYPE_FLAG_FABRIC_PORT_HEALTH); }); testSysmanListenEvents(driver, devices, ZES_EVENT_TYPE_FLAG_DEVICE_RESET_REQUIRED | ZES_EVENT_TYPE_FLAG_DEVICE_DETACH | ZES_EVENT_TYPE_FLAG_DEVICE_ATTACH | ZES_EVENT_TYPE_FLAG_RAS_CORRECTABLE_ERRORS | ZES_EVENT_TYPE_FLAG_RAS_UNCORRECTABLE_ERRORS | ZES_EVENT_TYPE_FLAG_FABRIC_PORT_HEALTH); std::for_each(devices.begin(), devices.end(), [&](auto device) { zesDeviceEventRegister(device, ZES_EVENT_TYPE_FLAG_DEVICE_RESET_REQUIRED | ZES_EVENT_TYPE_FLAG_DEVICE_DETACH | ZES_EVENT_TYPE_FLAG_DEVICE_ATTACH | ZES_EVENT_TYPE_FLAG_RAS_CORRECTABLE_ERRORS | ZES_EVENT_TYPE_FLAG_RAS_UNCORRECTABLE_ERRORS | ZES_EVENT_TYPE_FLAG_FABRIC_PORT_HEALTH); }); testSysmanListenEventsEx(driver, devices, ZES_EVENT_TYPE_FLAG_DEVICE_RESET_REQUIRED | ZES_EVENT_TYPE_FLAG_DEVICE_DETACH | ZES_EVENT_TYPE_FLAG_DEVICE_ATTACH | ZES_EVENT_TYPE_FLAG_RAS_CORRECTABLE_ERRORS | ZES_EVENT_TYPE_FLAG_RAS_UNCORRECTABLE_ERRORS | ZES_EVENT_TYPE_FLAG_FABRIC_PORT_HEALTH); break; case 'F': std::for_each(devices.begin(), devices.end(), [&](auto device) { testSysmanFabricPort(device); }); break; case 'd': std::for_each(devices.begin(), devices.end(), [&](auto device) { testSysmanDiagnostics(device); }); break; default: usage(); exit(0); } } return 0; } compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/000077500000000000000000000000001422164147700240405ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/CMakeLists.txt000066400000000000000000000072231422164147700266040ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # link_libraries(${ASAN_LIBS} ${TSAN_LIBS}) set(TARGET_NAME ${TARGET_NAME_L0}_tools_tests) include(${NEO_SOURCE_DIR}/cmake/setup_ult_global_flags.cmake) function(ADD_SUPPORTED_TEST_PRODUCT_FAMILIES_DEFINITION) set(L0_TESTED_PRODUCT_FAMILIES ${ALL_TESTED_PRODUCT_FAMILY}) string(REPLACE ";" "," L0_TESTED_PRODUCT_FAMILIES "${L0_TESTED_PRODUCT_FAMILIES}") add_definitions(-DSUPPORTED_TEST_PRODUCT_FAMILIES=${L0_TESTED_PRODUCT_FAMILIES}) endfunction() ADD_SUPPORTED_TEST_PRODUCT_FAMILIES_DEFINITION() add_executable(${TARGET_NAME} ${NEO_SOURCE_DIR}/level_zero/core/source/dll/disallow_deferred_deleter.cpp ) target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${NEO_SOURCE_DIR}/level_zero/core/test/unit_tests/main.cpp ${NEO_SOURCE_DIR}/level_zero/core/test/unit_tests/mock.h ${NEO_SOURCE_DIR}/level_zero/core/test/unit_tests/white_box.h ${NEO_SOURCE_DIR}/level_zero/core/source/dll/create_builtin_functions_lib.cpp ${NEO_SOURCE_DIR}/level_zero/tools/test/unit_tests/sources/debug/debug_session_helper.cpp ${NEO_SHARED_TEST_DIRECTORY}/unit_test/tests_configuration.h ) target_sources(${TARGET_NAME} PRIVATE $ $ $ $ $ $ ) set_target_properties(${TARGET_NAME} PROPERTIES FOLDER ${TARGET_NAME_L0}) add_subdirectoriesL0(${CMAKE_CURRENT_SOURCE_DIR} "*") target_compile_definitions(${TARGET_NAME} PRIVATE $) target_include_directories(${TARGET_NAME} BEFORE PRIVATE $/tools ${NEO_SHARED_TEST_DIRECTORY}/common/test_macros/header${BRANCH_DIR_SUFFIX} ${NEO_SHARED_TEST_DIRECTORY}/common/test_configuration/unit_tests ) if(WIN32) target_link_libraries(${TARGET_NAME} dbghelp) add_dependencies(${TARGET_NAME} mock_gdi) endif() target_link_libraries(${TARGET_NAME} ${NEO_SHARED_MOCKABLE_LIB_NAME} ${HW_LIBS_ULT} gmock-gtest ${NEO_EXTRA_LIBS} ) target_sources(${TARGET_NAME} PRIVATE $ $ $ $ $ ) if(TARGET ${BUILTINS_SPIRV_LIB_NAME}) target_sources(${TARGET_NAME} PRIVATE $ ) endif() option(L0_ULT_VERBOSE "Use the default/verbose test output" OFF) if(NOT L0_ULT_VERBOSE) set(L0_TESTS_LISTENER_OPTION "--disable_default_listener") else() set(L0_TESTS_LISTENER_OPTION "--enable_default_listener") endif() if(MSVC) set_target_properties(${TARGET_NAME} PROPERTIES VS_DEBUGGER_COMMAND_ARGUMENTS "${L0_TESTS_FILTER_OPTION} --gtest_catch_exceptions=0 ${L0_TESTS_LISTENER_OPTION}" VS_DEBUGGER_WORKING_DIRECTORY "$(OutDir)" ) endif() add_dependencies(unit_tests ${TARGET_NAME}) create_source_tree(${TARGET_NAME} ${L0_ROOT_DIR}/..) compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/000077500000000000000000000000001422164147700255235ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/CMakeLists.txt000066400000000000000000000003051422164147700302610ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) add_subdirectories() compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/debug/000077500000000000000000000000001422164147700266115ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/debug/CMakeLists.txt000066400000000000000000000007301422164147700313510ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/debug_session_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/eu_thread_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_debug_session.h ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}test_debug_api.cpp ) add_subdirectories() compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/debug/debug_session_helper.cpp000066400000000000000000000006121422164147700335040ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/test/unit_tests/sources/debug/mock_debug_session.h" #include namespace L0 { DebugSession *createDebugSessionHelper(const zet_debug_config_t &config, Device *device, int debugFd) { return new L0::ult::DebugSessionMock(config, device); } } // namespace L0compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/debug/debug_session_tests.cpp000066400000000000000000000566721422164147700334100ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/device/device_imp.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mock.h" #include "level_zero/core/test/unit_tests/mocks/mock_device.h" #include "level_zero/tools/test/unit_tests/sources/debug/mock_debug_session.h" namespace L0 { namespace ult { TEST(DebugSession, givenThreadWhenIsThreadAllCalledThenTrueReturnedOnlyForAllValuesEqualMax) { ze_device_thread_t thread = {UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX}; EXPECT_TRUE(DebugSession::isThreadAll(thread)); thread = {0, UINT32_MAX, UINT32_MAX, UINT32_MAX}; EXPECT_FALSE(DebugSession::isThreadAll(thread)); thread = {UINT32_MAX, 0, UINT32_MAX, UINT32_MAX}; EXPECT_FALSE(DebugSession::isThreadAll(thread)); thread = {UINT32_MAX, UINT32_MAX, 0, UINT32_MAX}; EXPECT_FALSE(DebugSession::isThreadAll(thread)); thread = {UINT32_MAX, UINT32_MAX, UINT32_MAX, 0}; EXPECT_FALSE(DebugSession::isThreadAll(thread)); } TEST(DebugSession, givenThreadWhenIsSingleThreadCalledThenTrueReturnedOnlyForNonMaxValues) { ze_device_thread_t thread = {UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX}; EXPECT_FALSE(DebugSession::isSingleThread(thread)); thread = {0, UINT32_MAX, UINT32_MAX, UINT32_MAX}; EXPECT_FALSE(DebugSession::isSingleThread(thread)); thread = {UINT32_MAX, 0, UINT32_MAX, UINT32_MAX}; EXPECT_FALSE(DebugSession::isSingleThread(thread)); thread = {UINT32_MAX, UINT32_MAX, 0, UINT32_MAX}; EXPECT_FALSE(DebugSession::isSingleThread(thread)); thread = {UINT32_MAX, UINT32_MAX, UINT32_MAX, 0}; EXPECT_FALSE(DebugSession::isSingleThread(thread)); thread = {UINT32_MAX, UINT32_MAX, 0, 0}; EXPECT_FALSE(DebugSession::isSingleThread(thread)); thread = {UINT32_MAX, 0, 0, 0}; EXPECT_FALSE(DebugSession::isSingleThread(thread)); thread = {UINT32_MAX, 0, UINT32_MAX, 0}; EXPECT_FALSE(DebugSession::isSingleThread(thread)); thread = {0, UINT32_MAX, 0, UINT32_MAX}; EXPECT_FALSE(DebugSession::isSingleThread(thread)); thread = {UINT32_MAX, 0, 0, UINT32_MAX}; EXPECT_FALSE(DebugSession::isSingleThread(thread)); thread = {0, UINT32_MAX, UINT32_MAX, 0}; EXPECT_FALSE(DebugSession::isSingleThread(thread)); thread = {0, 0, UINT32_MAX, 0}; EXPECT_FALSE(DebugSession::isSingleThread(thread)); thread = {0, 0, 0, UINT32_MAX}; EXPECT_FALSE(DebugSession::isSingleThread(thread)); thread = {1, 2, 3, 0}; EXPECT_TRUE(DebugSession::isSingleThread(thread)); } TEST(DebugSession, givenThreadsWhenAreThreadsEqualCalledThenTrueReturnedForEqualThreads) { ze_device_thread_t thread = {UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX}; ze_device_thread_t thread2 = {UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX}; EXPECT_TRUE(DebugSession::areThreadsEqual(thread, thread2)); thread = {0, UINT32_MAX, UINT32_MAX, UINT32_MAX}; EXPECT_FALSE(DebugSession::areThreadsEqual(thread, thread2)); thread2 = {UINT32_MAX, 0, UINT32_MAX, UINT32_MAX}; EXPECT_FALSE(DebugSession::areThreadsEqual(thread, thread2)); thread = {1, 0, 0, 0}; thread2 = {1, 0, 0, UINT32_MAX}; EXPECT_FALSE(DebugSession::areThreadsEqual(thread, thread2)); thread2 = {1, 0, 1, 0}; EXPECT_FALSE(DebugSession::areThreadsEqual(thread, thread2)); thread2 = {1, 1, 0, 0}; EXPECT_FALSE(DebugSession::areThreadsEqual(thread, thread2)); thread2 = {0, 0, 0, 0}; EXPECT_FALSE(DebugSession::areThreadsEqual(thread, thread2)); { ze_device_thread_t thread = {1, 1, 1, 1}; ze_device_thread_t thread2 = {1, 1, 1, 1}; EXPECT_TRUE(DebugSession::areThreadsEqual(thread, thread2)); } } TEST(DebugSession, givenThreadWhenCheckSingleThreadWithinDeviceThreadCalledThenTrueReturnedForMatchingThread) { ze_device_thread_t thread = {0, 1, 2, 3}; ze_device_thread_t thread2 = {UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX}; EXPECT_TRUE(DebugSession::checkSingleThreadWithinDeviceThread(thread, thread2)); thread = {0, 1, 2, 3}; thread2 = {0, UINT32_MAX, UINT32_MAX, UINT32_MAX}; EXPECT_TRUE(DebugSession::checkSingleThreadWithinDeviceThread(thread, thread2)); thread = {0, 1, 2, 3}; thread2 = {0, 1, 2, 3}; EXPECT_TRUE(DebugSession::checkSingleThreadWithinDeviceThread(thread, thread2)); thread = {0, 1, 2, 3}; thread2 = {0, UINT32_MAX, UINT32_MAX, 4}; EXPECT_FALSE(DebugSession::checkSingleThreadWithinDeviceThread(thread, thread2)); thread = {0, 1, 2, 3}; thread2 = {0, 1, 2, 4}; EXPECT_FALSE(DebugSession::checkSingleThreadWithinDeviceThread(thread, thread2)); thread = {0, 1, 2, 3}; thread2 = {1, 1, 2, 3}; EXPECT_FALSE(DebugSession::checkSingleThreadWithinDeviceThread(thread, thread2)); thread = {0, 1, 2, 3}; thread2 = {0, 2, 2, 3}; EXPECT_FALSE(DebugSession::checkSingleThreadWithinDeviceThread(thread, thread2)); thread = {0, 1, 3, 3}; thread2 = {0, 1, 2, 3}; EXPECT_FALSE(DebugSession::checkSingleThreadWithinDeviceThread(thread, thread2)); thread = {0, 1, 3, 3}; thread2 = {UINT32_MAX, 0, 2, 3}; EXPECT_FALSE(DebugSession::checkSingleThreadWithinDeviceThread(thread, thread2)); } TEST(DebugSession, givenSingleThreadWhenGettingSingleThreadsThenCorrectThreadIsReturned) { zet_debug_config_t config = {}; config.pid = 0x1234; auto hwInfo = *NEO::defaultHwInfo.get(); NEO::Device *neoDevice(NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); Mock deviceImp(neoDevice, neoDevice->getExecutionEnvironment()); auto debugSession = std::make_unique(config, &deviceImp); auto subslice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported - 1; ze_device_thread_t physicalThread = {0, subslice, 2, 3}; auto threads = debugSession->getSingleThreadsForDevice(0, physicalThread, hwInfo); EXPECT_EQ(1u, threads.size()); EXPECT_EQ(0u, threads[0].tileIndex); EXPECT_EQ(0u, threads[0].slice); EXPECT_EQ(subslice, threads[0].subslice); EXPECT_EQ(2u, threads[0].eu); EXPECT_EQ(3u, threads[0].thread); } TEST(DebugSession, givenAllThreadsWhenGettingSingleThreadsThenCorrectThreadsAreReturned) { zet_debug_config_t config = {}; config.pid = 0x1234; auto hwInfo = *NEO::defaultHwInfo.get(); NEO::Device *neoDevice(NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); Mock deviceImp(neoDevice, neoDevice->getExecutionEnvironment()); auto debugSession = std::make_unique(config, &deviceImp); auto subslice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported - 1; ze_device_thread_t physicalThread = {0, subslice, 2, UINT32_MAX}; const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount); auto threads = debugSession->getSingleThreadsForDevice(0, physicalThread, hwInfo); EXPECT_EQ(numThreadsPerEu, threads.size()); for (uint32_t i = 0; i < numThreadsPerEu; i++) { EXPECT_EQ(0u, threads[i].tileIndex); EXPECT_EQ(0u, threads[i].slice); EXPECT_EQ(subslice, threads[i].subslice); EXPECT_EQ(2u, threads[i].eu); EXPECT_EQ(i, threads[i].thread); } } TEST(DebugSession, givenAllEUsWhenGettingSingleThreadsThenCorrectThreadsAreReturned) { zet_debug_config_t config = {}; config.pid = 0x1234; auto hwInfo = *NEO::defaultHwInfo.get(); NEO::Device *neoDevice(NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); Mock deviceImp(neoDevice, neoDevice->getExecutionEnvironment()); auto debugSession = std::make_unique(config, &deviceImp); auto subslice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported - 1; ze_device_thread_t physicalThread = {0, subslice, UINT32_MAX, 0}; const uint32_t numEuPerSubslice = hwInfo.gtSystemInfo.MaxEuPerSubSlice; auto threads = debugSession->getSingleThreadsForDevice(0, physicalThread, hwInfo); EXPECT_EQ(numEuPerSubslice, threads.size()); for (uint32_t i = 0; i < numEuPerSubslice; i++) { EXPECT_EQ(0u, threads[i].slice); EXPECT_EQ(subslice, threads[i].subslice); EXPECT_EQ(i, threads[i].eu); EXPECT_EQ(0u, threads[i].thread); } } TEST(DebugSession, givenAllSubslicesWhenGettingSingleThreadsThenCorrectThreadsAreReturned) { zet_debug_config_t config = {}; config.pid = 0x1234; auto hwInfo = *NEO::defaultHwInfo.get(); NEO::Device *neoDevice(NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); Mock deviceImp(neoDevice, neoDevice->getExecutionEnvironment()); auto debugSession = std::make_unique(config, &deviceImp); ze_device_thread_t physicalThread = {0, UINT32_MAX, 0, 0}; const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported; auto threads = debugSession->getSingleThreadsForDevice(0, physicalThread, hwInfo); EXPECT_EQ(numSubslicesPerSlice, threads.size()); for (uint32_t i = 0; i < numSubslicesPerSlice; i++) { EXPECT_EQ(0u, threads[i].slice); EXPECT_EQ(i, threads[i].subslice); EXPECT_EQ(0u, threads[i].eu); EXPECT_EQ(0u, threads[i].thread); } } TEST(DebugSession, givenAllSlicesWhenGettingSingleThreadsThenCorrectThreadsAreReturned) { zet_debug_config_t config = {}; config.pid = 0x1234; auto hwInfo = *NEO::defaultHwInfo.get(); NEO::Device *neoDevice(NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); Mock deviceImp(neoDevice, neoDevice->getExecutionEnvironment()); auto debugSession = std::make_unique(config, &deviceImp); ze_device_thread_t physicalThread = {UINT32_MAX, 0, 0, 0}; const uint32_t numSlices = hwInfo.gtSystemInfo.MaxSlicesSupported; auto threads = debugSession->getSingleThreadsForDevice(0, physicalThread, hwInfo); EXPECT_EQ(numSlices, threads.size()); for (uint32_t i = 0; i < numSlices; i++) { EXPECT_EQ(0u, threads[i].tileIndex); EXPECT_EQ(i, threads[i].slice); EXPECT_EQ(0u, threads[i].subslice); EXPECT_EQ(0u, threads[i].eu); EXPECT_EQ(0u, threads[i].thread); } } TEST(DebugSession, givenBindlessSystemRoutineWhenQueryingIsBindlessThenTrueReturned) { zet_debug_config_t config = {}; config.pid = 0x1234; auto hwInfo = *NEO::defaultHwInfo.get(); NEO::Device *neoDevice(NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); Mock deviceImp(neoDevice, neoDevice->getExecutionEnvironment()); auto debugSession = std::make_unique(config, &deviceImp); debugSession->debugArea.reserved1 = 1u; EXPECT_TRUE(debugSession->isBindlessSystemRoutine()); } TEST(DebugSession, givenBindfulSystemRoutineWhenQueryingIsBindlessThenFalseReturned) { zet_debug_config_t config = {}; config.pid = 0x1234; auto hwInfo = *NEO::defaultHwInfo.get(); NEO::Device *neoDevice(NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); Mock deviceImp(neoDevice, neoDevice->getExecutionEnvironment()); auto debugSession = std::make_unique(config, &deviceImp); debugSession->debugArea.reserved1 = 0u; EXPECT_FALSE(debugSession->isBindlessSystemRoutine()); } TEST(DebugSession, givenApiThreadAndSingleTileWhenConvertingThenCorrectValuesReturned) { auto hwInfo = *NEO::defaultHwInfo.get(); NEO::Device *neoDevice(NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); Mock deviceImp(neoDevice, neoDevice->getExecutionEnvironment()); auto debugSession = std::make_unique(zet_debug_config_t{0x1234}, &deviceImp); ze_device_thread_t thread = {hwInfo.gtSystemInfo.SliceCount - 1, hwInfo.gtSystemInfo.SubSliceCount - 1, 0, 0}; uint32_t deviceIndex = 0; auto convertedThread = debugSession->convertToPhysical(thread, deviceIndex); EXPECT_EQ(0u, deviceIndex); EXPECT_EQ(convertedThread.slice, thread.slice); EXPECT_EQ(convertedThread.subslice, thread.subslice); EXPECT_EQ(convertedThread.eu, thread.eu); EXPECT_EQ(convertedThread.thread, thread.thread); thread.slice = UINT32_MAX; convertedThread = debugSession->convertToPhysical(thread, deviceIndex); EXPECT_EQ(0u, deviceIndex); EXPECT_EQ(convertedThread.slice, thread.slice); } TEST(DebugSession, givenAllStoppedThreadsWhenAreRequestedThreadsStoppedCalledThenTrueReturned) { zet_debug_config_t config = {}; config.pid = 0x1234; auto hwInfo = *NEO::defaultHwInfo.get(); NEO::MockDevice *neoDevice(NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); Mock deviceImp(neoDevice, neoDevice->getExecutionEnvironment()); auto sessionMock = std::make_unique(config, &deviceImp); for (uint32_t i = 0; i < hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount; i++) { EuThread::ThreadId thread(0, 0, 0, 0, i); sessionMock->allThreads[thread]->stopThread(1u); } ze_device_thread_t apiThread = {0, 0, 0, UINT32_MAX}; EXPECT_TRUE(sessionMock->areRequestedThreadsStopped(apiThread)); } TEST(DebugSession, givenSomeStoppedThreadsWhenAreRequestedThreadsStoppedCalledThenFalseReturned) { zet_debug_config_t config = {}; config.pid = 0x1234; auto hwInfo = *NEO::defaultHwInfo.get(); NEO::MockDevice *neoDevice(NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); Mock deviceImp(neoDevice, neoDevice->getExecutionEnvironment()); auto sessionMock = std::make_unique(config, &deviceImp); for (uint32_t i = 0; i < hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount; i++) { EuThread::ThreadId thread(0, 0, 0, 0, i); if (i % 2) { sessionMock->allThreads[thread]->stopThread(1u); } } ze_device_thread_t apiThread = {0, 0, 0, UINT32_MAX}; EXPECT_FALSE(sessionMock->areRequestedThreadsStopped(apiThread)); } TEST(DebugSession, givenApiThreadAndSingleTileWhenFillingDevicesThenVectorEntryIsSet) { auto hwInfo = *NEO::defaultHwInfo.get(); NEO::Device *neoDevice(NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); Mock deviceImp(neoDevice, neoDevice->getExecutionEnvironment()); auto debugSession = std::make_unique(zet_debug_config_t{0x1234}, &deviceImp); ze_device_thread_t thread = {hwInfo.gtSystemInfo.SliceCount - 1, hwInfo.gtSystemInfo.SubSliceCount - 1, 0, 0}; std::vector devices(1); debugSession->fillDevicesFromThread(thread, devices); EXPECT_EQ(1u, devices[0]); } TEST(DebugSession, givenDifferentCombinationsOfThreadsAndMemoryTypeCheckExpectedMemoryAccess) { zet_debug_config_t config = {}; config.pid = 0x1234; auto hwInfo = *NEO::defaultHwInfo.get(); NEO::MockDevice *neoDevice(NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); Mock deviceImp(neoDevice, neoDevice->getExecutionEnvironment()); auto sessionMock = std::make_unique(config, &deviceImp); ze_device_thread_t thread = {UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX}; zet_debug_memory_space_desc_t desc; desc.address = 0x1000; desc.type = ZET_DEBUG_MEMORY_SPACE_TYPE_DEFAULT; ze_result_t retVal = sessionMock->sanityMemAccessThreadCheck(thread, &desc); EXPECT_EQ(ZE_RESULT_SUCCESS, retVal); desc.type = ZET_DEBUG_MEMORY_SPACE_TYPE_SLM; retVal = sessionMock->sanityMemAccessThreadCheck(thread, &desc); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, retVal); thread = {1, 1, UINT32_MAX, UINT32_MAX}; retVal = sessionMock->sanityMemAccessThreadCheck(thread, &desc); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, retVal); thread = {0, 0, 0, 1}; retVal = sessionMock->sanityMemAccessThreadCheck(thread, &desc); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, retVal); desc.type = ZET_DEBUG_MEMORY_SPACE_TYPE_DEFAULT; retVal = sessionMock->sanityMemAccessThreadCheck(thread, &desc); EXPECT_EQ(ZE_RESULT_ERROR_NOT_AVAILABLE, retVal); for (uint32_t i = 0; i < hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount; i++) { EuThread::ThreadId thread(0, 0, 0, 0, i); sessionMock->allThreads[thread]->stopThread(1u); } retVal = sessionMock->sanityMemAccessThreadCheck(thread, &desc); EXPECT_EQ(ZE_RESULT_SUCCESS, retVal); } TEST(DebugSession, givenDifferentThreadsWhenGettingPerThreadScratchOffsetThenCorrectOffsetReturned) { auto hwInfo = *NEO::defaultHwInfo.get(); NEO::Device *neoDevice(NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); Mock deviceImp(neoDevice, neoDevice->getExecutionEnvironment()); auto debugSession = std::make_unique(zet_debug_config_t{0x1234}, &deviceImp); const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount); EuThread::ThreadId thread0Eu0 = {0, 0, 0, 0, 0}; EuThread::ThreadId thread0Eu1 = {0, 0, 0, 1, 0}; EuThread::ThreadId thread2Subslice1 = {0, 0, 1, 0, 2}; const uint32_t ptss = 128; auto size = debugSession->getPerThreadScratchOffset(ptss, thread0Eu0); EXPECT_EQ(0u, size); size = debugSession->getPerThreadScratchOffset(ptss, thread0Eu1); EXPECT_EQ(ptss * numThreadsPerEu, size); size = debugSession->getPerThreadScratchOffset(ptss, thread2Subslice1); EXPECT_EQ(2 * ptss + ptss * hwInfo.gtSystemInfo.MaxEuPerSubSlice * numThreadsPerEu, size); } TEST(DebugSession, GivenLogsEnabledWhenPrintBitmaskCalledThenBitmaskIsPrinted) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.DebuggerLogBitmask.set(255); ::testing::internal::CaptureStdout(); uint64_t bitmask[2] = {0x404080808080, 0x1111ffff1111ffff}; DebugSession::printBitmask(reinterpret_cast(bitmask), sizeof(bitmask)); auto output = ::testing::internal::GetCapturedStdout(); EXPECT_THAT(output, testing::HasSubstr(std::string("\nINFO: Bitmask: "))); EXPECT_THAT(output, testing::HasSubstr(std::string("[0] = 0x0000404080808080"))); EXPECT_THAT(output, testing::HasSubstr(std::string("[1] = 0x1111ffff1111ffff"))); } TEST(DebugSession, GivenLogsDisabledWhenPrintBitmaskCalledThenBitmaskIsNotPrinted) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.DebuggerLogBitmask.set(0); ::testing::internal::CaptureStdout(); uint64_t bitmask[2] = {0x404080808080, 0x1111ffff1111ffff}; DebugSession::printBitmask(reinterpret_cast(bitmask), sizeof(bitmask)); auto output = ::testing::internal::GetCapturedStdout(); EXPECT_EQ(0u, output.size()); } using DebugSessionMultiTile = Test; TEST_F(DebugSessionMultiTile, givenApiThreadAndMultipleTilesWhenConvertingToPhysicalThenCorrectValueReturned) { L0::Device *device = driverHandle->devices[0]; auto debugSession = std::make_unique(zet_debug_config_t{0x1234}, device); ze_device_thread_t thread = {sliceCount * 2 - 1, 0, 0, 0}; uint32_t deviceIndex = 0; auto convertedThread = debugSession->convertToPhysical(thread, deviceIndex); EXPECT_EQ(1u, deviceIndex); EXPECT_EQ(sliceCount - 1, convertedThread.slice); EXPECT_EQ(thread.subslice, convertedThread.subslice); EXPECT_EQ(thread.eu, convertedThread.eu); EXPECT_EQ(thread.thread, convertedThread.thread); thread = {sliceCount - 1, 0, 0, 0}; convertedThread = debugSession->convertToPhysical(thread, deviceIndex); EXPECT_EQ(0u, deviceIndex); EXPECT_EQ(sliceCount - 1, convertedThread.slice); EXPECT_EQ(thread.subslice, convertedThread.subslice); EXPECT_EQ(thread.eu, convertedThread.eu); EXPECT_EQ(thread.thread, convertedThread.thread); thread.slice = UINT32_MAX; convertedThread = debugSession->convertToPhysical(thread, deviceIndex); EXPECT_EQ(0u, deviceIndex); EXPECT_EQ(convertedThread.slice, thread.slice); L0::DeviceImp *deviceImp = static_cast(device); debugSession = std::make_unique(zet_debug_config_t{0x1234}, deviceImp->subDevices[1]); thread = {sliceCount - 1, 0, 0, 0}; deviceIndex = 10; convertedThread = debugSession->convertToPhysical(thread, deviceIndex); EXPECT_EQ(1u, deviceIndex); EXPECT_EQ(sliceCount - 1, convertedThread.slice); EXPECT_EQ(thread.subslice, convertedThread.subslice); EXPECT_EQ(thread.eu, convertedThread.eu); EXPECT_EQ(thread.thread, convertedThread.thread); } TEST_F(DebugSessionMultiTile, WhenConvertingToThreadIdAndBackThenCorrectThreadIdsAreReturned) { L0::Device *device = driverHandle->devices[0]; auto debugSession = std::make_unique(zet_debug_config_t{0x1234}, device); ze_device_thread_t thread = {sliceCount * 2 - 1, 0, 0, 0}; auto threadID = debugSession->convertToThreadId(thread); EXPECT_EQ(1u, threadID.tileIndex); EXPECT_EQ(sliceCount - 1, threadID.slice); EXPECT_EQ(thread.subslice, threadID.subslice); EXPECT_EQ(thread.eu, threadID.eu); EXPECT_EQ(thread.thread, threadID.thread); auto apiThread = debugSession->convertToApi(threadID); EXPECT_EQ(thread.slice, apiThread.slice); EXPECT_EQ(thread.subslice, apiThread.subslice); EXPECT_EQ(thread.eu, apiThread.eu); EXPECT_EQ(thread.thread, apiThread.thread); L0::DeviceImp *deviceImp = static_cast(device); debugSession = std::make_unique(zet_debug_config_t{0x1234}, deviceImp->subDevices[1]); thread = {sliceCount - 1, 0, 0, 0}; threadID = debugSession->convertToThreadId(thread); EXPECT_EQ(1u, threadID.tileIndex); EXPECT_EQ(sliceCount - 1, threadID.slice); EXPECT_EQ(thread.subslice, threadID.subslice); EXPECT_EQ(thread.eu, threadID.eu); EXPECT_EQ(thread.thread, threadID.thread); apiThread = debugSession->convertToApi(threadID); EXPECT_EQ(thread.slice, apiThread.slice); EXPECT_EQ(thread.subslice, apiThread.subslice); EXPECT_EQ(thread.eu, apiThread.eu); EXPECT_EQ(thread.thread, apiThread.thread); } TEST_F(DebugSessionMultiTile, givenApiThreadAndMultiTileWhenFillingDevicesThenVectorEntriesAreSet) { L0::Device *device = driverHandle->devices[0]; auto debugSession = std::make_unique(zet_debug_config_t{0x1234}, device); ze_device_thread_t thread = {UINT32_MAX, 0, 0, 0}; std::vector devices(numSubDevices); debugSession->fillDevicesFromThread(thread, devices); EXPECT_EQ(1u, devices[0]); EXPECT_EQ(1u, devices[1]); } TEST_F(DebugSessionMultiTile, givenApiThreadAndSingleTileWhenFillingDevicesThenVectorEntryIsSet) { L0::Device *device = driverHandle->devices[0]; auto debugSession = std::make_unique(zet_debug_config_t{0x1234}, device); ze_device_thread_t thread = {sliceCount * numSubDevices - 1, 0, 0, 0}; std::vector devices(numSubDevices); debugSession->fillDevicesFromThread(thread, devices); EXPECT_EQ(0u, devices[0]); EXPECT_EQ(1u, devices[1]); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/debug/eu_thread_tests.cpp000066400000000000000000000200671422164147700325040ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/tools/source/debug/eu_thread.h" namespace L0 { namespace ult { TEST(EuThread, WhenConstructingEuThreadThenCorrectIdsAreSet) { ze_device_thread_t devThread = {3, 4, 5, 6}; EuThread::ThreadId threadId(0, devThread); EXPECT_EQ(0u, threadId.tileIndex); EXPECT_EQ(3u, threadId.slice); EXPECT_EQ(4u, threadId.subslice); EXPECT_EQ(5u, threadId.eu); EXPECT_EQ(6u, threadId.thread); EuThread::ThreadId threadId2(3, 1, 2, 3, 4); EXPECT_EQ(3u, threadId2.tileIndex); EXPECT_EQ(1u, threadId2.slice); EXPECT_EQ(2u, threadId2.subslice); EXPECT_EQ(3u, threadId2.eu); EXPECT_EQ(4u, threadId2.thread); auto castValue = static_cast(threadId2); EXPECT_EQ(threadId2.packed, castValue); } TEST(EuThread, GivenEuThreadWhenGettingThreadIdThenValidIdReturned) { ze_device_thread_t devThread = {3, 4, 5, 6}; EuThread::ThreadId threadId(0, devThread); EuThread euThread(threadId); auto id = euThread.getThreadId(); EXPECT_EQ(threadId.packed, id.packed); } TEST(EuThread, GivenEuThreadWhenChangingAndQueryingStatesThenStateIsChanged) { ze_device_thread_t devThread = {3, 4, 5, 6}; EuThread::ThreadId threadId(0, devThread); EuThread euThread(threadId); EXPECT_FALSE(euThread.isStopped()); EXPECT_TRUE(euThread.isRunning()); bool result = euThread.stopThread(0x1234); EXPECT_TRUE(result); EXPECT_TRUE(euThread.isStopped()); EXPECT_FALSE(euThread.isRunning()); EXPECT_EQ(0x1234u, euThread.getMemoryHandle()); result = euThread.stopThread(0x5678); EXPECT_FALSE(result); EXPECT_TRUE(euThread.isStopped()); EXPECT_FALSE(euThread.isRunning()); EXPECT_EQ(0x5678u, euThread.getMemoryHandle()); result = euThread.resumeThread(); EXPECT_EQ(EuThread::invalidHandle, euThread.getMemoryHandle()); EXPECT_TRUE(result); EXPECT_FALSE(euThread.isStopped()); EXPECT_TRUE(euThread.isRunning()); result = euThread.resumeThread(); EXPECT_FALSE(result); EXPECT_FALSE(euThread.isStopped()); EXPECT_TRUE(euThread.isRunning()); EXPECT_EQ(EuThread::invalidHandle, euThread.getMemoryHandle()); } TEST(EuThread, GivenEuThreadWhenToStringCalledThenCorrectStringReturned) { ze_device_thread_t devThread = {3, 4, 5, 6}; EuThread::ThreadId threadId(0, devThread); EuThread euThread(threadId); auto threadString = euThread.toString(); EXPECT_EQ("device index = 0 slice = 3 subslice = 4 eu = 5 thread = 6", threadString); } TEST(EuThread, GivenThreadStateRunningWhenVerifyingStopWithOddCounterThenTrueReturnedAndStateStopped) { ze_device_thread_t devThread = {3, 4, 5, 6}; EuThread::ThreadId threadId(0, devThread); EuThread euThread(threadId); EXPECT_TRUE(euThread.isRunning()); EXPECT_TRUE(euThread.verifyStopped(1)); EXPECT_TRUE(euThread.isStopped()); } TEST(EuThread, GivenThreadStateStoppedWhenVerifyingStopWithOddCounterThenTrueReturnedAndStateStopped) { ze_device_thread_t devThread = {3, 4, 5, 6}; EuThread::ThreadId threadId(0, devThread); EuThread euThread(threadId); euThread.verifyStopped(1); euThread.stopThread(1u); EXPECT_TRUE(euThread.verifyStopped(1)); EXPECT_TRUE(euThread.isStopped()); } TEST(EuThread, GivenThreadStateStoppedWhenVerifyingStopWithEvenCounterThenFalseReturnedAndStateRunning) { ze_device_thread_t devThread = {3, 4, 5, 6}; EuThread::ThreadId threadId(0, devThread); EuThread euThread(threadId); euThread.verifyStopped(1); euThread.stopThread(1u); EXPECT_FALSE(euThread.verifyStopped(2)); EXPECT_TRUE(euThread.isRunning()); } TEST(EuThread, GivenEnabledErrorLogsWhenThreadStateStoppedAndVerifyingStopWithEvenCounterThenErrorMessageIsPrinted) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.DebuggerLogBitmask.set(NEO::DebugVariables::DEBUGGER_LOG_BITMASK::LOG_ERROR); ze_device_thread_t devThread = {0, 0, 0, 0}; EuThread::ThreadId threadId(0, devThread); EuThread euThread(threadId); euThread.verifyStopped(1); euThread.stopThread(1u); ::testing::internal::CaptureStderr(); EXPECT_FALSE(euThread.verifyStopped(2)); EXPECT_TRUE(euThread.isRunning()); auto message = ::testing::internal::GetCapturedStderr(); EXPECT_STREQ("\nERROR: Thread: device index = 0 slice = 0 subslice = 0 eu = 0 thread = 0 state STOPPED when thread is running. Switching to RUNNING", message.c_str()); } TEST(EuThread, GivenThreadStateRunningWhenVerifyingStopWithOddCounterForSecondStopThenTrueIsReturnedAndStateStopped) { ze_device_thread_t devThread = {3, 4, 5, 6}; EuThread::ThreadId threadId(0, devThread); EuThread euThread(threadId); euThread.verifyStopped(1); euThread.resumeThread(); EXPECT_TRUE(euThread.verifyStopped(3)); EXPECT_TRUE(euThread.isStopped()); } TEST(EuThread, GivenThreadStateRunningWhenVerifyingStopWithEvenCounteThenFalseIsReturnedAndStateRunning) { ze_device_thread_t devThread = {3, 4, 5, 6}; EuThread::ThreadId threadId(0, devThread); EuThread euThread(threadId); euThread.verifyStopped(1); euThread.resumeThread(); EXPECT_FALSE(euThread.verifyStopped(2)); EXPECT_TRUE(euThread.isRunning()); } TEST(EuThread, GivenThreadStateStoppedWhenVerifyingStopWithOddCounterBiggerByMoreThanTwoThenTrueIsReturnedAndStateStopped) { ze_device_thread_t devThread = {3, 4, 5, 6}; EuThread::ThreadId threadId(0, devThread); EuThread euThread(threadId); euThread.verifyStopped(1); EXPECT_TRUE(euThread.verifyStopped(7)); EXPECT_TRUE(euThread.isStopped()); } TEST(EuThread, GivenEnabledErrorLogsWhenThreadStateStoppedAndVerifyingStopWithOddCounterBiggerByMoreThanTwoThenErrorMessageIsPrinted) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.DebuggerLogBitmask.set(NEO::DebugVariables::DEBUGGER_LOG_BITMASK::LOG_ERROR); ze_device_thread_t devThread = {0, 0, 0, 0}; EuThread::ThreadId threadId(0, devThread); EuThread euThread(threadId); euThread.verifyStopped(1); ::testing::internal::CaptureStderr(); EXPECT_TRUE(euThread.verifyStopped(7)); EXPECT_TRUE(euThread.isStopped()); auto message = ::testing::internal::GetCapturedStderr(); EXPECT_STREQ("\nERROR: Thread: device index = 0 slice = 0 subslice = 0 eu = 0 thread = 0 state out of sync.", message.c_str()); } TEST(EuThread, GivenEnabledErrorLogsWhenThreadStateRunningAndVerifyingStopWithOddCounterEqualToPreviousThenErrorMessageIsPrinted) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.DebuggerLogBitmask.set(NEO::DebugVariables::DEBUGGER_LOG_BITMASK::LOG_ERROR); ze_device_thread_t devThread = {0, 0, 0, 0}; EuThread::ThreadId threadId(0, devThread); EuThread euThread(threadId); euThread.verifyStopped(1); euThread.resumeThread(); ::testing::internal::CaptureStderr(); EXPECT_TRUE(euThread.verifyStopped(1)); EXPECT_TRUE(euThread.isStopped()); auto message = ::testing::internal::GetCapturedStderr(); EXPECT_STREQ("\nERROR: Thread: device index = 0 slice = 0 subslice = 0 eu = 0 thread = 0 state RUNNING when thread is stopped. Switching to STOPPED", message.c_str()); } TEST(EuThread, GivenThreadStateStoppedWhenVerifyingStopWithEvenCounterBiggerByMoreThanTwoThenFalseIsReturnedAndStateRunning) { ze_device_thread_t devThread = {3, 4, 5, 6}; EuThread::ThreadId threadId(0, devThread); EuThread euThread(threadId); euThread.verifyStopped(1); EXPECT_FALSE(euThread.verifyStopped(8)); EXPECT_TRUE(euThread.isRunning()); } TEST(EuThread, GivenEuThreadWhenGettingLastCounterThenCorrectValueIsReturned) { ze_device_thread_t devThread = {3, 4, 5, 6}; EuThread::ThreadId threadId(0, devThread); EuThread euThread(threadId); EXPECT_EQ(0u, euThread.getLastCounter()); euThread.verifyStopped(1); EXPECT_EQ(1u, euThread.getLastCounter()); euThread.verifyStopped(9); EXPECT_EQ(9u, euThread.getLastCounter()); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/debug/mock_debug_session.h000066400000000000000000000055401422164147700326300ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/os_interface.h" #include "level_zero/tools/source/debug/debug_session.h" namespace L0 { namespace ult { class OsInterfaceWithDebugAttach : public NEO::OSInterface { public: OsInterfaceWithDebugAttach() : OSInterface() {} bool isDebugAttachAvailable() const override { return debugAttachAvailable; } bool debugAttachAvailable = true; }; struct DebugSessionMock : public L0::DebugSession { using L0::DebugSession::allThreads; using L0::DebugSession::debugArea; using L0::DebugSession::fillDevicesFromThread; using L0::DebugSession::getPerThreadScratchOffset; using L0::DebugSession::getSingleThreadsForDevice; using L0::DebugSession::isBindlessSystemRoutine; DebugSessionMock(const zet_debug_config_t &config, L0::Device *device) : DebugSession(config, device), config(config){}; bool closeConnection() override { return true; } ze_result_t initialize() override { if (config.pid == 0) { return ZE_RESULT_ERROR_UNKNOWN; } return ZE_RESULT_SUCCESS; } ze_result_t readEvent(uint64_t timeout, zet_debug_event_t *event) override { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t interrupt(ze_device_thread_t thread) override { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t resume(ze_device_thread_t thread) override { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t readMemory(ze_device_thread_t thread, const zet_debug_memory_space_desc_t *desc, size_t size, void *buffer) override { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t writeMemory(ze_device_thread_t thread, const zet_debug_memory_space_desc_t *desc, size_t size, const void *buffer) override { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t acknowledgeEvent(const zet_debug_event_t *event) override { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t readRegisters(ze_device_thread_t thread, uint32_t type, uint32_t start, uint32_t count, void *pRegisterValues) override { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t writeRegisters(ze_device_thread_t thread, uint32_t type, uint32_t start, uint32_t count, void *pRegisterValues) override { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t readSbaBuffer(EuThread::ThreadId threadId, SbaTrackedAddresses &sbaBuffer) override { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } void startAsyncThread() override { asyncThreadStarted = true; } bool readModuleDebugArea() override { return true; } zet_debug_config_t config; bool asyncThreadStarted = false; }; } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/debug/test_debug_api.cpp000066400000000000000000000074151422164147700323020ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/test/unit_tests/sources/debug/test_debug_api.inl" #include "shared/test/common/test_macros/test.h" #include "level_zero/tools/source/debug/debug_handlers.h" namespace L0 { namespace ult { TEST_F(DebugApiTest, givenDeviceWhenDebugAttachIsCalledThenNullptrSessionHandleAndErrorAreReturned) { zet_debug_config_t config = {}; config.pid = 0x1234; Mock deviceImp(neoDevice, neoDevice->getExecutionEnvironment()); zet_debug_session_handle_t debugSession = nullptr; auto result = zetDebugAttach(deviceImp.toHandle(), &config, &debugSession); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, result); EXPECT_EQ(nullptr, debugSession); } TEST_F(DebugApiTest, givenDebugSessionSetWhenGettingDebugSessionThenCorrectObjectIsReturned) { zet_debug_config_t config = {}; config.pid = 0x1234; Mock deviceImp(neoDevice, neoDevice->getExecutionEnvironment()); deviceImp.debugSession = std::make_unique(config, &deviceImp); EXPECT_NE(nullptr, deviceImp.getDebugSession(config)); } TEST_F(DebugApiTest, givenNoDebugSessionWhenGettingDebugSessionThenNullptrIsReturned) { zet_debug_config_t config = {}; config.pid = 0x1234; Mock deviceImp(neoDevice, neoDevice->getExecutionEnvironment()); deviceImp.debugSession.release(); EXPECT_EQ(nullptr, deviceImp.getDebugSession(config)); } TEST_F(DebugApiTest, givenSubdeviceWhenGettingDebugSessionThenNullptrIsReturned) { zet_debug_config_t config = {}; config.pid = 0x1234; Mock deviceImp(neoDevice, neoDevice->getExecutionEnvironment()); deviceImp.isSubdevice = true; EXPECT_EQ(nullptr, deviceImp.getDebugSession(config)); } TEST(DebugSessionTest, WhenDebugSessionCreateIsCalledThenNullptrReturned) { ze_result_t result; zet_debug_config_t config = {}; config.pid = 0x1234; L0::DebugSession *session = L0::DebugSession::create(config, nullptr, result); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, result); EXPECT_EQ(nullptr, session); } TEST(DebugSessionTest, WhenUnsupportedFunctionCalledThenErrorIsReturned) { zet_debug_session_handle_t session = {}; auto result = L0::DebugApiHandlers::debugDetach(session); EXPECT_EQ(result, ZE_RESULT_ERROR_UNSUPPORTED_FEATURE); result = L0::DebugApiHandlers::debugReadEvent(session, 0, nullptr); EXPECT_EQ(result, ZE_RESULT_ERROR_UNSUPPORTED_FEATURE); ze_device_thread_t thread = {}; result = L0::DebugApiHandlers::debugInterrupt(session, thread); EXPECT_EQ(result, ZE_RESULT_ERROR_UNSUPPORTED_FEATURE); result = L0::DebugApiHandlers::debugResume(session, thread); EXPECT_EQ(result, ZE_RESULT_ERROR_UNSUPPORTED_FEATURE); result = L0::DebugApiHandlers::debugReadMemory(session, thread, nullptr, 0, nullptr); EXPECT_EQ(result, ZE_RESULT_ERROR_UNSUPPORTED_FEATURE); result = L0::DebugApiHandlers::debugWriteMemory(session, thread, nullptr, 0, nullptr); EXPECT_EQ(result, ZE_RESULT_ERROR_UNSUPPORTED_FEATURE); result = L0::DebugApiHandlers::debugAcknowledgeEvent(session, nullptr); EXPECT_EQ(result, ZE_RESULT_ERROR_UNSUPPORTED_FEATURE); zet_device_handle_t hDevice = {}; result = L0::DebugApiHandlers::debugGetRegisterSetProperties(hDevice, nullptr, nullptr); EXPECT_EQ(result, ZE_RESULT_ERROR_UNSUPPORTED_FEATURE); uint32_t type = {0}; result = L0::DebugApiHandlers::debugReadRegisters(session, thread, type, 0, 0, nullptr); EXPECT_EQ(result, ZE_RESULT_ERROR_UNSUPPORTED_FEATURE); result = L0::DebugApiHandlers::debugWriteRegisters(session, thread, type, 0, 0, nullptr); EXPECT_EQ(result, ZE_RESULT_ERROR_UNSUPPORTED_FEATURE); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/debug/test_debug_api.inl000066400000000000000000000160471422164147700323030ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_built_ins.h" #include "level_zero/tools/test/unit_tests/sources/debug/mock_debug_session.h" namespace L0 { namespace ult { struct DebugApiFixture : public DeviceFixture { void SetUp() { DeviceFixture::SetUp(); neoDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface.reset(new NEO::OSInterface); } void TearDown() { DeviceFixture::TearDown(); } }; using DebugApiTest = Test; TEST_F(DebugApiTest, givenDeviceWhenGettingDebugPropertiesThenNoFlagIsSet) { zet_device_debug_properties_t debugProperties = {}; debugProperties.flags = ZET_DEVICE_DEBUG_PROPERTY_FLAG_FORCE_UINT32; auto result = zetDeviceGetDebugProperties(device->toHandle(), &debugProperties); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(0u, debugProperties.flags); } TEST_F(DebugApiTest, givenDeviceWhenCallingDebugAttachThenErrorIsReturned) { zet_debug_config_t config = {}; config.pid = 0x1234; zet_debug_session_handle_t debugSession = nullptr; auto result = zetDebugAttach(device->toHandle(), &config, &debugSession); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, result); EXPECT_EQ(nullptr, debugSession); } TEST_F(DebugApiTest, givenSubDeviceWhenCallingDebugAttachThenErrorIsReturned) { zet_debug_config_t config = {}; config.pid = 0x1234; zet_debug_session_handle_t debugSession = nullptr; Mock deviceImp(neoDevice, neoDevice->getExecutionEnvironment()); deviceImp.isSubdevice = true; auto result = zetDebugAttach(deviceImp.toHandle(), &config, &debugSession); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, result); EXPECT_EQ(nullptr, debugSession); } TEST_F(DebugApiTest, givenDeviceWhenDebugAttachIsAvaialbleThenGetPropertiesReturnsCorrectFlag) { zet_device_debug_properties_t debugProperties = {}; debugProperties.flags = ZET_DEVICE_DEBUG_PROPERTY_FLAG_FORCE_UINT32; neoDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface.reset(new OsInterfaceWithDebugAttach); auto result = zetDeviceGetDebugProperties(device->toHandle(), &debugProperties); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(ZET_DEVICE_DEBUG_PROPERTY_FLAG_ATTACH, debugProperties.flags); } TEST_F(DebugApiTest, givenStateSaveAreaHeaderUnavailableWhenGettingDebugPropertiesThenAttachFlagIsNotReturned) { zet_device_debug_properties_t debugProperties = {}; debugProperties.flags = ZET_DEVICE_DEBUG_PROPERTY_FLAG_FORCE_UINT32; neoDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface.reset(new OsInterfaceWithDebugAttach); mockBuiltIns->stateSaveAreaHeader.clear(); auto result = zetDeviceGetDebugProperties(device->toHandle(), &debugProperties); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(0u, debugProperties.flags); } TEST_F(DebugApiTest, givenSubDeviceWhenDebugAttachIsAvaialbleThenGetPropertiesReturnsNoFlag) { zet_device_debug_properties_t debugProperties = {}; debugProperties.flags = ZET_DEVICE_DEBUG_PROPERTY_FLAG_FORCE_UINT32; neoDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface.reset(new OsInterfaceWithDebugAttach); Mock deviceImp(neoDevice, neoDevice->getExecutionEnvironment()); deviceImp.isSubdevice = true; auto result = zetDeviceGetDebugProperties(deviceImp.toHandle(), &debugProperties); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(0u, debugProperties.flags); } TEST(DebugSessionTest, givenDebugSessionWhenConvertingToAndFromHandleCorrectHandleAndPointerIsReturned) { zet_debug_config_t config = {}; config.pid = 0x1234; NEO::Device *neoDevice(NEO::MockDevice::createWithNewExecutionEnvironment(NEO::defaultHwInfo.get(), 0)); Mock deviceImp(neoDevice, neoDevice->getExecutionEnvironment()); auto debugSession = std::make_unique(config, &deviceImp); L0::DebugSession *session = debugSession.get(); zet_debug_session_handle_t debugSessionHandle = debugSession->toHandle(); auto sessionFromHandle = L0::DebugSession::fromHandle(session); EXPECT_NE(nullptr, debugSessionHandle); EXPECT_EQ(session, sessionFromHandle); } TEST(DebugSessionTest, givenDebugSessionWhenGettingConnectedDeviceThenCorrectDeviceIsReturned) { zet_debug_config_t config = {}; config.pid = 0x1234; NEO::Device *neoDevice(NEO::MockDevice::createWithNewExecutionEnvironment(NEO::defaultHwInfo.get(), 0)); Mock deviceImp(neoDevice, neoDevice->getExecutionEnvironment()); auto debugSession = std::make_unique(config, &deviceImp); L0::DebugSession *session = debugSession.get(); auto device = session->getConnectedDevice(); EXPECT_EQ(&deviceImp, device); } TEST(DebugSessionTest, givenDeviceWithDebugSessionWhenRemoveCalledThenSessionIsNotDeleted) { zet_debug_config_t config = {}; config.pid = 0x1234; NEO::Device *neoDevice(NEO::MockDevice::createWithNewExecutionEnvironment(NEO::defaultHwInfo.get(), 0)); Mock deviceImp(neoDevice, neoDevice->getExecutionEnvironment()); auto debugSession = std::make_unique(config, &deviceImp); L0::DebugSession *session = debugSession.get(); deviceImp.debugSession.reset(session); deviceImp.removeDebugSession(); EXPECT_EQ(nullptr, deviceImp.debugSession.get()); } TEST(DebugSessionTest, givenSubDeviceWhenCreateingSessionThenNullptrReturned) { zet_debug_config_t config = {}; config.pid = 0x1234; NEO::Device *neoDevice(NEO::MockDevice::createWithNewExecutionEnvironment(NEO::defaultHwInfo.get(), 0)); Mock deviceImp(neoDevice, neoDevice->getExecutionEnvironment()); deviceImp.isSubdevice = true; ze_result_t result = ZE_RESULT_ERROR_DEVICE_LOST; auto session = deviceImp.createDebugSession(config, result); EXPECT_EQ(nullptr, session); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, result); } TEST(DebugSessionTest, givenRootDeviceWhenCreateingSessionThenResultReturnedIsCorrect) { zet_debug_config_t config = {}; config.pid = 0x1234; NEO::Device *neoDevice(NEO::MockDevice::createWithNewExecutionEnvironment(NEO::defaultHwInfo.get(), 0)); auto osInterface = new OsInterfaceWithDebugAttach; osInterface->debugAttachAvailable = false; neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.reset(osInterface); Mock deviceImp(neoDevice, neoDevice->getExecutionEnvironment()); deviceImp.isSubdevice = false; ze_result_t result = ZE_RESULT_ERROR_DEVICE_LOST; auto session = deviceImp.createDebugSession(config, result); EXPECT_EQ(nullptr, session); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, result); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/loader/000077500000000000000000000000001422164147700267715ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/loader/CMakeLists.txt000066400000000000000000000003521422164147700315310ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_loader.cpp ) compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/loader/test_loader.cpp000066400000000000000000000306351422164147700320110ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include #include #include "ze_ddi_tables.h" namespace L0 { namespace ult { TEST(LoaderTest, whenCallingzesGetDriverProcAddrTableWithCorrectMajorVersionThenSuccessIsReturnedAndMinorVersionIsIgnored) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(1u, 64u)); zes_driver_dditable_t pDdiTable = {}; ze_result_t result = zesGetDriverProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST(LoaderTest, whenCallingzesGetDiagnosticsProcAddrTableWithCorrectMajorVersionThenSuccessIsReturnedAndMinorVersionIsIgnored) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(1u, 64u)); zes_diagnostics_dditable_t pDdiTable = {}; ze_result_t result = zesGetDiagnosticsProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST(LoaderTest, whenCallingzesGetEngineProcAddrTableWithCorrectMajorVersionThenSuccessIsReturnedAndMinorVersionIsIgnored) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(1u, 64u)); zes_engine_dditable_t pDdiTable = {}; ze_result_t result = zesGetEngineProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST(LoaderTest, whenCallingzesGetFabricPortProcAddrTableWithCorrectMajorVersionThenSuccessIsReturnedAndMinorVersionIsIgnored) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(1u, 64u)); zes_fabric_port_dditable_t pDdiTable = {}; ze_result_t result = zesGetFabricPortProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST(LoaderTest, whenCallingzesGetFanProcAddrTableWithCorrectMajorVersionThenSuccessIsReturnedAndMinorVersionIsIgnored) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(1u, 64u)); zes_fan_dditable_t pDdiTable = {}; ze_result_t result = zesGetFanProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST(LoaderTest, whenCallingzesGetDeviceProcAddrTableWithCorrectMajorVersionThenSuccessIsReturnedAndMinorVersionIsIgnored) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(1u, 64u)); zes_device_dditable_t pDdiTable = {}; ze_result_t result = zesGetDeviceProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST(LoaderTest, whenCallingzesGetFirmwareProcAddrTableWithCorrectMajorVersionThenSuccessIsReturnedAndMinorVersionIsIgnored) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(1u, 64u)); zes_firmware_dditable_t pDdiTable = {}; ze_result_t result = zesGetFirmwareProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST(LoaderTest, whenCallingzesGetFrequencyProcAddrTableWithCorrectMajorVersionThenSuccessIsReturnedAndMinorVersionIsIgnored) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(1u, 64u)); zes_frequency_dditable_t pDdiTable = {}; ze_result_t result = zesGetFrequencyProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST(LoaderTest, whenCallingzesGetLedProcAddrTableWithCorrectMajorVersionThenSuccessIsReturnedAndMinorVersionIsIgnored) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(1u, 64u)); zes_led_dditable_t pDdiTable = {}; ze_result_t result = zesGetLedProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST(LoaderTest, whenCallingzesGetMemoryProcAddrTableWithCorrectMajorVersionThenSuccessIsReturnedAndMinorVersionIsIgnored) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(1u, 64u)); zes_memory_dditable_t pDdiTable = {}; ze_result_t result = zesGetMemoryProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST(LoaderTest, whenCallingzesGetPerformanceFactorProcAddrTableWithCorrectMajorVersionThenSuccessIsReturnedAndMinorVersionIsIgnored) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(1u, 64u)); zes_performance_factor_dditable_t pDdiTable = {}; ze_result_t result = zesGetPerformanceFactorProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST(LoaderTest, whenCallingzesGetPowerProcAddrTableWithCorrectMajorVersionThenSuccessIsReturnedAndMinorVersionIsIgnored) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(1u, 64u)); zes_power_dditable_t pDdiTable = {}; ze_result_t result = zesGetPowerProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST(LoaderTest, whenCallingzesGetPsuProcAddrTableWithCorrectMajorVersionThenSuccessIsReturnedAndMinorVersionIsIgnored) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(1u, 64u)); zes_psu_dditable_t pDdiTable = {}; ze_result_t result = zesGetPsuProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST(LoaderTest, whenCallingzesGetRasProcAddrTableWithCorrectMajorVersionThenSuccessIsReturnedAndMinorVersionIsIgnored) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(1u, 64u)); zes_ras_dditable_t pDdiTable = {}; ze_result_t result = zesGetRasProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST(LoaderTest, whenCallingzesGetSchedulerProcAddrTableWithCorrectMajorVersionThenSuccessIsReturnedAndMinorVersionIsIgnored) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(1u, 64u)); zes_scheduler_dditable_t pDdiTable = {}; ze_result_t result = zesGetSchedulerProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST(LoaderTest, whenCallingzesGetStandbyProcAddrTableWithCorrectMajorVersionThenSuccessIsReturnedAndMinorVersionIsIgnored) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(1u, 64u)); zes_standby_dditable_t pDdiTable = {}; ze_result_t result = zesGetStandbyProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST(LoaderTest, whenCallingzesGetDriverProcAddrTableWithGreaterThanAllowedMajorVersionThenUnitializedIsReturned) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(64u, 0u)); zes_driver_dditable_t pDdiTable = {}; ze_result_t result = zesGetDriverProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_VERSION, result); } TEST(LoaderTest, whenCallingzesGetDiagnosticsProcAddrTableWithGreaterThanAllowedMajorVersionThenUnitializedIsReturned) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(64u, 0u)); zes_diagnostics_dditable_t pDdiTable = {}; ze_result_t result = zesGetDiagnosticsProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_VERSION, result); } TEST(LoaderTest, whenCallingzesGetEngineProcAddrTableWithGreaterThanAllowedMajorVersionThenUnitializedIsReturned) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(64u, 0u)); zes_engine_dditable_t pDdiTable = {}; ze_result_t result = zesGetEngineProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_VERSION, result); } TEST(LoaderTest, whenCallingzesGetFabricPortProcAddrTableWithGreaterThanAllowedMajorVersionThenUnitializedIsReturned) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(64u, 0u)); zes_fabric_port_dditable_t pDdiTable = {}; ze_result_t result = zesGetFabricPortProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_VERSION, result); } TEST(LoaderTest, whenCallingzesGetFanProcAddrTableWithGreaterThanAllowedMajorVersionThenUnitializedIsReturned) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(64u, 0u)); zes_fan_dditable_t pDdiTable = {}; ze_result_t result = zesGetFanProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_VERSION, result); } TEST(LoaderTest, whenCallingzesGetDeviceProcAddrTableWithGreaterThanAllowedMajorVersionThenUnitializedIsReturned) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(64u, 0u)); zes_device_dditable_t pDdiTable = {}; ze_result_t result = zesGetDeviceProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_VERSION, result); } TEST(LoaderTest, whenCallingzesGetFirmwareProcAddrTableWithGreaterThanAllowedMajorVersionThenUnitializedIsReturned) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(64u, 0u)); zes_firmware_dditable_t pDdiTable = {}; ze_result_t result = zesGetFirmwareProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_VERSION, result); } TEST(LoaderTest, whenCallingzesGetFrequencyProcAddrTableWithGreaterThanAllowedMajorVersionThenUnitializedIsReturned) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(64u, 0u)); zes_frequency_dditable_t pDdiTable = {}; ze_result_t result = zesGetFrequencyProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_VERSION, result); } TEST(LoaderTest, whenCallingzesGetLedProcAddrTableWithGreaterThanAllowedMajorVersionThenUnitializedIsReturned) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(64u, 0u)); zes_led_dditable_t pDdiTable = {}; ze_result_t result = zesGetLedProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_VERSION, result); } TEST(LoaderTest, whenCallingzesGetMemoryProcAddrTableWithGreaterThanAllowedMajorVersionThenUnitializedIsReturned) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(64u, 0u)); zes_memory_dditable_t pDdiTable = {}; ze_result_t result = zesGetMemoryProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_VERSION, result); } TEST(LoaderTest, whenCallingzesGetPerformanceFactorProcAddrTableWithGreaterThanAllowedMajorVersionThenUnitializedIsReturned) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(64u, 0u)); zes_performance_factor_dditable_t pDdiTable = {}; ze_result_t result = zesGetPerformanceFactorProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_VERSION, result); } TEST(LoaderTest, whenCallingzesGetPowerProcAddrTableWithGreaterThanAllowedMajorVersionThenUnitializedIsReturned) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(64u, 0u)); zes_power_dditable_t pDdiTable = {}; ze_result_t result = zesGetPowerProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_VERSION, result); } TEST(LoaderTest, whenCallingzesGetPsuProcAddrTableWithGreaterThanAllowedMajorVersionThenUnitializedIsReturned) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(64u, 0u)); zes_psu_dditable_t pDdiTable = {}; ze_result_t result = zesGetPsuProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_VERSION, result); } TEST(LoaderTest, whenCallingzesGetRasProcAddrTableWithGreaterThanAllowedMajorVersionThenUnitializedIsReturned) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(64u, 0u)); zes_ras_dditable_t pDdiTable = {}; ze_result_t result = zesGetRasProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_VERSION, result); } TEST(LoaderTest, whenCallingzesGetSchedulerProcAddrTableWithGreaterThanAllowedMajorVersionThenUnitializedIsReturned) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(64u, 0u)); zes_scheduler_dditable_t pDdiTable = {}; ze_result_t result = zesGetSchedulerProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_VERSION, result); } TEST(LoaderTest, whenCallingzesGetStandbyProcAddrTableWithGreaterThanAllowedMajorVersionThenUnitializedIsReturned) { ze_api_version_t version = static_cast(ZE_MAKE_VERSION(64u, 0u)); zes_standby_dditable_t pDdiTable = {}; ze_result_t result = zesGetStandbyProcAddrTable(version, &pDdiTable); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_VERSION, result); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/metrics/000077500000000000000000000000001422164147700271715ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/metrics/CMakeLists.txt000066400000000000000000000026551422164147700317410ustar00rootroot00000000000000# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/mock_metric_oa.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_metric_oa.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_metric_oa_enumeration.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_metric_oa_enumeration.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_metric_oa_enumeration_1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_metric_oa_enumeration_2.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_metric_oa_query_pool_1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_metric_oa_query_pool_2.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_metric_oa_query_pool_3.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_metric_oa_streamer_1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_metric_oa_streamer_2.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_metric_oa_streamer_3.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_metric_oa_initialization.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_metric_ip_sampling_enumeration.cpp ${CMAKE_CURRENT_SOURCE_DIR}/metric_ip_sampling_fixture.cpp ${CMAKE_CURRENT_SOURCE_DIR}/metric_ip_sampling_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/test_metric_ip_sampling_streamer.cpp ) add_subdirectories() compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/metrics/linux/000077500000000000000000000000001422164147700303305ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/metrics/linux/CMakeLists.txt000066400000000000000000000014631422164147700330740ustar00rootroot00000000000000# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_TOOLS_METRICS_LINUX_TESTS_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_metric_oa_query_pool_linux.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_metric_ip_sampling_linux.cpp ) if(TESTS_PVC) if(NEO_ENABLE_i915_PRELIM_DETECTION) list(APPEND L0_TOOLS_METRICS_LINUX_TESTS_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/test_metric_ip_sampling_linux_pvc_prelim.cpp ) endif() if("${BRANCH_TYPE}" STREQUAL "") list(APPEND L0_TOOLS_METRICS_LINUX_TESTS_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/test_metric_ip_sampling_linux_pvc_upstream.cpp ) endif() endif() if(UNIX) target_sources(${TARGET_NAME} PRIVATE ${L0_TOOLS_METRICS_LINUX_TESTS_LINUX} ) endif() test_metric_ip_sampling_linux.cpp000066400000000000000000000022121422164147700370750ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/metrics/linux/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/tools/source/metrics/os_metric_ip_sampling.h" namespace L0 { namespace ult { class MetricIpSamplingLinuxTest : public DeviceFixture, public ::testing::Test { public: void SetUp() override { DeviceFixture::SetUp(); metricIpSamplingOsInterface = MetricIpSamplingOsInterface::create(static_cast(*device)); } void TearDown() override { DeviceFixture::TearDown(); } std::unique_ptr metricIpSamplingOsInterface = nullptr; }; HWTEST2_F(MetricIpSamplingLinuxTest, GivenUnsupportedProductFamilyIsUsedWhenIsDependencyAvailableIsCalledThenReturnFailure, IsNotXeHpcCore) { auto hwInfo = neoDevice->getRootDeviceEnvironment().getMutableHardwareInfo(); hwInfo->platform.eProductFamily = productFamily; EXPECT_FALSE(metricIpSamplingOsInterface->isDependencyAvailable()); } } // namespace ult } // namespace L0 test_metric_ip_sampling_linux_pvc_prelim.cpp000066400000000000000000000474361422164147700413360ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/metrics/linux/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/linux/drm_neo.h" #include "shared/source/os_interface/linux/ioctl_helper.h" #include "shared/source/os_interface/linux/sys_calls.h" #include "shared/test/common/libult/linux/drm_mock.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/os_interface/linux/sys_calls_linux_ult.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/tools/source/metrics/os_metric_ip_sampling.h" #include "device_ids_configs.h" #include "hw_cmds.h" namespace NEO { namespace SysCalls { extern int closeFuncRetVal; } // namespace SysCalls } // namespace NEO namespace L0 { namespace ult { class IoctlHelperPrelim20Mock : public NEO::IoctlHelperPrelim20 { bool getEuStallProperties(std::array &properties, uint64_t dssBufferSize, uint64_t samplingRate, uint64_t pollPeriod, uint64_t engineInstance) override { return false; } }; class DrmPrelimMock : public DrmMock { public: DrmPrelimMock(RootDeviceEnvironment &rootDeviceEnvironment) : DrmPrelimMock(rootDeviceEnvironment, defaultHwInfo.get()) {} DrmPrelimMock(RootDeviceEnvironment &rootDeviceEnvironment, HardwareInfo *inputHwInfo, bool invokeQueryEngineInfo = true) : DrmMock(rootDeviceEnvironment) { customHwInfo = std::make_unique(&inputHwInfo->platform, &inputHwInfo->featureTable, &inputHwInfo->workaroundTable, &inputHwInfo->gtSystemInfo, inputHwInfo->capabilityTable); customHwInfo->gtSystemInfo.MaxDualSubSlicesSupported = 64; rootDeviceEnvironment.setHwInfo(customHwInfo.get()); setupIoctlHelper(rootDeviceEnvironment.getHardwareInfo()->platform.eProductFamily); if (invokeQueryEngineInfo) { queryEngineInfo(); } } bool queryEngineInfo() override { uint16_t computeEngineClass = getIoctlHelper()->getComputeEngineClass(); std::vector engines(4); engines[0].engine = {computeEngineClass, 0}; engines[0].capabilities = 0; engines[1].engine = {computeEngineClass, 1}; engines[1].capabilities = 0; engines[2].engine = {computeEngineClass, 2}; engines[2].capabilities = 0; engines[3].engine = {computeEngineClass, 3}; engines[3].capabilities = 0; std::vector distances(4); distances[0].engine = engines[0].engine; distances[0].region = {I915_MEMORY_CLASS_DEVICE, 0}; distances[1].engine = engines[1].engine; distances[1].region = {I915_MEMORY_CLASS_DEVICE, 1}; distances[2].engine = engines[2].engine; distances[2].region = {I915_MEMORY_CLASS_DEVICE, 2}; distances[3].engine = engines[3].engine; distances[3].region = {I915_MEMORY_CLASS_DEVICE, 3}; std::vector queryItems{distances.size()}; for (auto i = 0u; i < distances.size(); i++) { queryItems[i].length = sizeof(drm_i915_query_engine_info); } engineInfo = std::make_unique(this, customHwInfo.get(), 4, distances, queryItems, engines); return true; } bool queryEngineInfo1SubDevice() { uint16_t computeEngineClass = getIoctlHelper()->getComputeEngineClass(); std::vector engines(1); engines[0].engine = {computeEngineClass, 0}; engines[0].capabilities = 0; std::vector distances(1); distances[0].engine = engines[0].engine; distances[0].region = {I915_MEMORY_CLASS_DEVICE, 0}; std::vector queryItems{distances.size()}; for (auto i = 0u; i < distances.size(); i++) { queryItems[i].length = sizeof(drm_i915_query_engine_info); } engineInfo = std::make_unique(this, customHwInfo.get(), 1, distances, queryItems, engines); return true; } void getPrelimVersion(std::string &prelimVersion) override { prelimVersion = "2.0"; } void setIoctlHelperPrelim20Mock() { backUpIoctlHelper = std::move(ioctlHelper); ioctlHelper = static_cast>(std::make_unique()); } void restoreIoctlHelperPrelim20() { ioctlHelper = std::move(backUpIoctlHelper); } std::unique_ptr customHwInfo; std::unique_ptr backUpIoctlHelper; }; class MetricIpSamplingLinuxTestPrelim : public DeviceFixture, public ::testing::Test { public: void SetUp() override { DeviceFixture::SetUp(); neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]->osInterface = std::make_unique(); auto &osInterface = device->getOsInterface(); osInterface.setDriverModel(std::make_unique(const_cast(neoDevice->getRootDeviceEnvironment()))); metricIpSamplingOsInterface = MetricIpSamplingOsInterface::create(static_cast(*device)); } void TearDown() override { DeviceFixture::TearDown(); } std::unique_ptr metricIpSamplingOsInterface = nullptr; }; HWTEST2_F(MetricIpSamplingLinuxTestPrelim, givenCorrectArgumentsWhenStartMeasurementIsCalledThenReturnSuccess, IsPVC) { constexpr uint32_t samplingGranularity = 251; constexpr uint32_t gpuClockPeriodNs = 1000000; constexpr uint32_t samplingUnit = 3; uint32_t notifyEveryNReports = 0, samplingPeriodNs = samplingGranularity * samplingUnit * gpuClockPeriodNs; EXPECT_EQ(metricIpSamplingOsInterface->startMeasurement(notifyEveryNReports, samplingPeriodNs), ZE_RESULT_SUCCESS); EXPECT_EQ(samplingPeriodNs, samplingGranularity * samplingUnit * gpuClockPeriodNs); } HWTEST2_F(MetricIpSamplingLinuxTestPrelim, givenGetTimestampFrequencyFailsWhenStartMeasurementIsCalledThenReturnFailure, IsPVC) { auto drm = static_cast(device->getOsInterface().getDriverModel()->as()); VariableBackup backupCsTimeStampFrequency(&drm->storedCsTimestampFrequency, 0); VariableBackup backupStoredRetVal(&drm->storedRetVal, -1); uint32_t notifyEveryNReports = 0, samplingPeriodNs = 10000; EXPECT_EQ(metricIpSamplingOsInterface->startMeasurement(notifyEveryNReports, samplingPeriodNs), ZE_RESULT_ERROR_UNKNOWN); } HWTEST2_F(MetricIpSamplingLinuxTestPrelim, givenIoctlI915PerfOpenFailsWhenStartMeasurementIsCalledThenReturnFailure, IsPVC) { uint32_t notifyEveryNReports = 0, samplingPeriodNs = 10000; VariableBackup mockIoctl(&SysCalls::sysCallsIoctl, [](int fileDescriptor, unsigned long int request, void *arg) -> int { if (request == DRM_IOCTL_I915_PERF_OPEN) { return -1; } return 0; }); EXPECT_EQ(metricIpSamplingOsInterface->startMeasurement(notifyEveryNReports, samplingPeriodNs), ZE_RESULT_ERROR_UNKNOWN); } HWTEST2_F(MetricIpSamplingLinuxTestPrelim, givenI915PerfIoctlEnableFailsWhenStartMeasurementIsCalledThenReturnFailure, IsPVC) { uint32_t notifyEveryNReports = 0, samplingPeriodNs = 10000; VariableBackup mockIoctl(&SysCalls::sysCallsIoctl, [](int fileDescriptor, unsigned long int request, void *arg) -> int { if (request == I915_PERF_IOCTL_ENABLE) { return -1; } return 0; }); EXPECT_EQ(metricIpSamplingOsInterface->startMeasurement(notifyEveryNReports, samplingPeriodNs), ZE_RESULT_ERROR_UNKNOWN); } HWTEST2_F(MetricIpSamplingLinuxTestPrelim, givenCloseSucceedsWhenStopMeasurementIsCalledThenReturnSuccess, IsPVC) { VariableBackup backupCloseFuncRetval(&NEO::SysCalls::closeFuncRetVal, 0); EXPECT_EQ(metricIpSamplingOsInterface->stopMeasurement(), ZE_RESULT_SUCCESS); } HWTEST2_F(MetricIpSamplingLinuxTestPrelim, givenCloseFailsWhenStopMeasurementIsCalledThenReturnFailure, IsPVC) { VariableBackup backupCloseFuncRetval(&NEO::SysCalls::closeFuncRetVal, -1); EXPECT_EQ(metricIpSamplingOsInterface->stopMeasurement(), ZE_RESULT_ERROR_UNKNOWN); } HWTEST2_F(MetricIpSamplingLinuxTestPrelim, givenI915PerfIoctlDisableFailsWhenStartMeasurementIsCalledThenReturnFailure, IsPVC) { VariableBackup mockIoctl(&SysCalls::sysCallsIoctl, [](int fileDescriptor, unsigned long int request, void *arg) -> int { if (request == I915_PERF_IOCTL_DISABLE) { return -1; } return 0; }); EXPECT_EQ(metricIpSamplingOsInterface->stopMeasurement(), ZE_RESULT_ERROR_UNKNOWN); } HWTEST2_F(MetricIpSamplingLinuxTestPrelim, givenReadSucceedsWhenReadDataIsCalledThenReturnSuccess, IsPVC) { VariableBackup mockRead(&SysCalls::sysCallsRead, [](int fd, void *buf, size_t count) -> ssize_t { return 1; }); uint8_t pRawData = 0u; size_t pRawDataSize = 0; EXPECT_EQ(metricIpSamplingOsInterface->readData(&pRawData, &pRawDataSize), ZE_RESULT_SUCCESS); } HWTEST2_F(MetricIpSamplingLinuxTestPrelim, givenReadFailsWhenReadDataIsCalledThenReturnFailure, IsPVC) { VariableBackup mockRead(&SysCalls::sysCallsRead, [](int fd, void *buf, size_t count) -> ssize_t { return -1; errno = EBADF; }); uint8_t pRawData = 0u; size_t pRawDataSize = 0; EXPECT_EQ(metricIpSamplingOsInterface->readData(&pRawData, &pRawDataSize), ZE_RESULT_ERROR_UNKNOWN); } HWTEST2_F(MetricIpSamplingLinuxTestPrelim, givenReadFailsWithRetryErrorNumberWhenReadDataIsCalledThenReturnSuccess, IsPVC) { VariableBackup mockRead(&SysCalls::sysCallsRead, [](int fd, void *buf, size_t count) -> ssize_t { return -1; }); uint8_t pRawData = 0u; size_t pRawDataSize = 0; errno = EINTR; EXPECT_EQ(metricIpSamplingOsInterface->readData(&pRawData, &pRawDataSize), ZE_RESULT_SUCCESS); errno = EBUSY; EXPECT_EQ(metricIpSamplingOsInterface->readData(&pRawData, &pRawDataSize), ZE_RESULT_SUCCESS); errno = EAGAIN; EXPECT_EQ(metricIpSamplingOsInterface->readData(&pRawData, &pRawDataSize), ZE_RESULT_SUCCESS); } HWTEST2_F(MetricIpSamplingLinuxTestPrelim, WhenGetRequiredBufferSizeIsCalledThenCorrectSizeIsReturned, IsPVC) { constexpr uint32_t unitReportSize = 64; EXPECT_EQ(metricIpSamplingOsInterface->getRequiredBufferSize(10), unitReportSize * 10); } HWTEST2_F(MetricIpSamplingLinuxTestPrelim, GivenPollIsSuccessfulWhenisNReportsAvailableIsCalledThenReturnSuccess, IsPVC) { VariableBackup mockPoll(&SysCalls::sysCallsPoll, [](struct pollfd *pollFd, unsigned long int numberOfFds, int timeout) -> int { return 1; }); EXPECT_TRUE(metricIpSamplingOsInterface->isNReportsAvailable()); } HWTEST2_F(MetricIpSamplingLinuxTestPrelim, GivenPollIsFailureWhenisNReportsAvailableIsCalledThenReturnFailure, IsPVC) { VariableBackup mockPoll(&SysCalls::sysCallsPoll, [](struct pollfd *pollFd, unsigned long int numberOfFds, int timeout) -> int { return -1; }); EXPECT_FALSE(metricIpSamplingOsInterface->isNReportsAvailable()); } HWTEST2_F(MetricIpSamplingLinuxTestPrelim, GivenSupportedProductFamilyAndUnsupportedDeviceIdIsUsedWhenIsDependencyAvailableIsCalledThenReturnFailure, IsPVC) { auto hwInfo = neoDevice->getRootDeviceEnvironment().getMutableHardwareInfo(); hwInfo->platform.eProductFamily = productFamily; hwInfo->platform.usDeviceID = NEO::PVC_XL_IDS.front(); EXPECT_FALSE(metricIpSamplingOsInterface->isDependencyAvailable()); } HWTEST2_F(MetricIpSamplingLinuxTestPrelim, GivenSupportedProductFamilyAndSupportedDeviceIdIsUsedWhenIsDependencyAvailableIsCalledThenReturnFailure, IsPVC) { auto hwInfo = neoDevice->getRootDeviceEnvironment().getMutableHardwareInfo(); hwInfo->platform.eProductFamily = productFamily; for (auto deviceId : NEO::PVC_XT_IDS) { hwInfo->platform.usDeviceID = deviceId; EXPECT_TRUE(metricIpSamplingOsInterface->isDependencyAvailable()); } } HWTEST2_F(MetricIpSamplingLinuxTestPrelim, GivenDriverOpenFailsWhenIsDependencyAvailableIsCalledThenReturnFailure, IsPVC) { auto hwInfo = neoDevice->getRootDeviceEnvironment().getMutableHardwareInfo(); hwInfo->platform.eProductFamily = productFamily; hwInfo->platform.usDeviceID = NEO::PVC_XT_IDS.front(); auto drm = static_cast(device->getOsInterface().getDriverModel()->as()); VariableBackup backupCsTimeStampFrequency(&drm->storedCsTimestampFrequency, 0); VariableBackup backupStoredRetVal(&drm->storedRetVal, -1); EXPECT_FALSE(metricIpSamplingOsInterface->isDependencyAvailable()); } HWTEST2_F(MetricIpSamplingLinuxTestPrelim, GivenIoctlHelperFailsWhenIsDependencyAvailableIsCalledThenReturnFailure, IsPVC) { auto hwInfo = neoDevice->getRootDeviceEnvironment().getMutableHardwareInfo(); hwInfo->platform.eProductFamily = productFamily; hwInfo->platform.usDeviceID = NEO::PVC_XT_IDS.front(); auto drm = static_cast(device->getOsInterface().getDriverModel()->as()); drm->setIoctlHelperPrelim20Mock(); EXPECT_FALSE(metricIpSamplingOsInterface->isDependencyAvailable()); drm->restoreIoctlHelperPrelim20(); } struct MetricIpSamplingLinuxMultiDeviceTest : public ::testing::Test { std::unique_ptr createDevices(uint32_t numSubDevices) { DebugManager.flags.CreateMultipleSubDevices.set(numSubDevices); DebugManager.flags.UseDrmVirtualEnginesForCcs.set(0); NEO::ExecutionEnvironment *executionEnvironment = new MockExecutionEnvironment(defaultHwInfo.get(), false, 1); executionEnvironment->parseAffinityMask(); executionEnvironment->rootDeviceEnvironments[0]->osInterface.reset(new OSInterface); executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel( std::make_unique(const_cast(*executionEnvironment->rootDeviceEnvironments[0]), defaultHwInfo.get(), false)); return std::make_unique(1, numSubDevices, *executionEnvironment); } DebugManagerStateRestore restorer; }; HWTEST2_F(MetricIpSamplingLinuxMultiDeviceTest, GivenCombinationOfAffinityMaskWhenStartMeasurementIsCalledForRootDeviceThenInstanceIdIsCorrect, IsPVC) { DebugManager.flags.ZE_AFFINITY_MASK.set("0.1,0.2,0.3"); auto deviceFactory = createDevices(4); auto driverHandle = std::make_unique(); ze_result_t returnValue = ZE_RESULT_SUCCESS; auto rootDevice = std::unique_ptr(L0::Device::create(driverHandle.get(), deviceFactory->rootDevices[0], false, &returnValue)); auto metricIpSamplingOsInterface = MetricIpSamplingOsInterface::create(static_cast(*rootDevice)); uint32_t notifyEveryNReports = 0, samplingPeriodNs = 10000; auto drm = static_cast(rootDevice->getOsInterface().getDriverModel()->as()); drm->queryEngineInfo(); VariableBackup mockIoctl(&SysCalls::sysCallsIoctl, [](int fileDescriptor, unsigned long int request, void *arg) -> int { if (request == DRM_IOCTL_I915_PERF_OPEN) { drm_i915_perf_open_param *param = reinterpret_cast(arg); uint64_t *values = reinterpret_cast(param->properties_ptr); EXPECT_EQ(values[9], 1ull); } return 0; }); EXPECT_EQ(metricIpSamplingOsInterface->startMeasurement(notifyEveryNReports, samplingPeriodNs), ZE_RESULT_SUCCESS); } HWTEST2_F(MetricIpSamplingLinuxMultiDeviceTest, GivenCombinationOfAffinityMaskWhenStartMeasurementIsCalledForSubDeviceThenInstanceIdIsCorrect, IsPVC) { DebugManager.flags.ZE_AFFINITY_MASK.set("0.2,0.3"); auto deviceFactory = createDevices(4); auto driverHandle = std::make_unique(); ze_result_t returnValue = ZE_RESULT_SUCCESS; auto rootDevice = std::unique_ptr(L0::Device::create(driverHandle.get(), deviceFactory->rootDevices[0], false, &returnValue)); uint32_t subDeviceCount = 2; ze_device_handle_t subDevices[2] = {}; rootDevice->getSubDevices(&subDeviceCount, subDevices); auto metricIpSamplingOsInterface = MetricIpSamplingOsInterface::create(static_cast(*subDevices[0])); uint32_t notifyEveryNReports = 0, samplingPeriodNs = 10000; auto drm = static_cast(rootDevice->getOsInterface().getDriverModel()->as()); drm->queryEngineInfo(); { VariableBackup mockIoctl(&SysCalls::sysCallsIoctl, [](int fileDescriptor, unsigned long int request, void *arg) -> int { if (request == DRM_IOCTL_I915_PERF_OPEN) { drm_i915_perf_open_param *param = reinterpret_cast(arg); uint64_t *values = reinterpret_cast(param->properties_ptr); EXPECT_EQ(values[9], 2ull); } return 0; }); EXPECT_EQ(metricIpSamplingOsInterface->startMeasurement(notifyEveryNReports, samplingPeriodNs), ZE_RESULT_SUCCESS); } { metricIpSamplingOsInterface = MetricIpSamplingOsInterface::create(static_cast(*subDevices[1])); VariableBackup mockIoctl(&SysCalls::sysCallsIoctl, [](int fileDescriptor, unsigned long int request, void *arg) -> int { if (request == DRM_IOCTL_I915_PERF_OPEN) { drm_i915_perf_open_param *param = reinterpret_cast(arg); uint64_t *values = reinterpret_cast(param->properties_ptr); EXPECT_EQ(values[9], 3ull); } return 0; }); EXPECT_EQ(metricIpSamplingOsInterface->startMeasurement(notifyEveryNReports, samplingPeriodNs), ZE_RESULT_SUCCESS); } } HWTEST2_F(MetricIpSamplingLinuxMultiDeviceTest, GivenEngineInfoIsNullWhenStartMeasurementIsCalledForRootDeviceThenErrorIsReturned, IsPVC) { DebugManager.flags.ZE_AFFINITY_MASK.set("0.1"); auto deviceFactory = createDevices(4); auto driverHandle = std::make_unique(); ze_result_t returnValue = ZE_RESULT_SUCCESS; auto rootDevice = std::unique_ptr(L0::Device::create(driverHandle.get(), deviceFactory->rootDevices[0], false, &returnValue)); auto metricIpSamplingOsInterface = MetricIpSamplingOsInterface::create(static_cast(*rootDevice)); uint32_t notifyEveryNReports = 0, samplingPeriodNs = 10000; auto drm = static_cast(rootDevice->getOsInterface().getDriverModel()->as()); drm->queryEngineInfo1SubDevice(); EXPECT_EQ(metricIpSamplingOsInterface->startMeasurement(notifyEveryNReports, samplingPeriodNs), ZE_RESULT_ERROR_UNKNOWN); } HWTEST2_F(MetricIpSamplingLinuxMultiDeviceTest, GivenEngineInstanceIsNullWhenStartMeasurementIsCalledForRootDeviceThenErrorIsReturned, IsPVC) { DebugManager.flags.ZE_AFFINITY_MASK.set("0.1"); auto deviceFactory = createDevices(4); auto driverHandle = std::make_unique(); ze_result_t returnValue = ZE_RESULT_SUCCESS; auto rootDevice = std::unique_ptr(L0::Device::create(driverHandle.get(), deviceFactory->rootDevices[0], false, &returnValue)); auto metricIpSamplingOsInterface = MetricIpSamplingOsInterface::create(static_cast(*rootDevice)); uint32_t notifyEveryNReports = 0, samplingPeriodNs = 10000; EXPECT_EQ(metricIpSamplingOsInterface->startMeasurement(notifyEveryNReports, samplingPeriodNs), ZE_RESULT_ERROR_UNKNOWN); } } // namespace ult } // namespace L0 test_metric_ip_sampling_linux_pvc_upstream.cpp000066400000000000000000000044111422164147700416700ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/metrics/linux/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/libult/linux/drm_mock.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/tools/source/metrics/os_metric_ip_sampling.h" #include "level_zero/tools/test/unit_tests/sources/metrics/mock_metric_oa.h" #include "hw_cmds.h" namespace L0 { namespace ult { class DrmTipMock : public DrmMock { public: DrmTipMock(RootDeviceEnvironment &rootDeviceEnvironment) : DrmTipMock(rootDeviceEnvironment, defaultHwInfo.get()) {} DrmTipMock(RootDeviceEnvironment &rootDeviceEnvironment, HardwareInfo *inputHwInfo) : DrmMock(rootDeviceEnvironment) { rootDeviceEnvironment.setHwInfo(inputHwInfo); setupIoctlHelper(rootDeviceEnvironment.getHardwareInfo()->platform.eProductFamily); } void getPrelimVersion(std::string &prelimVersion) override { prelimVersion = ""; } }; class MetricIpSamplingLinuxTestUpstream : public MetricContextFixture, public ::testing::Test { public: void SetUp() override { MetricContextFixture::SetUp(); neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]->osInterface = std::make_unique(); auto &osInterface = device->getOsInterface(); osInterface.setDriverModel(std::make_unique(const_cast(neoDevice->getRootDeviceEnvironment()))); metricIpSamplingOsInterface = MetricIpSamplingOsInterface::create(static_cast(*device)); } void TearDown() override { MetricContextFixture::TearDown(); } std::unique_ptr metricIpSamplingOsInterface = nullptr; }; HWTEST2_F(MetricIpSamplingLinuxTestUpstream, GivenSupportedProductFamilyAndSupportedDeviceIdIsUsedForUpstreamWhenIsDependencyAvailableIsCalledThenReturnFailure, IsPVC) { auto hwInfo = neoDevice->getRootDeviceEnvironment().getMutableHardwareInfo(); hwInfo->platform.eProductFamily = productFamily; for (auto deviceId : NEO::PVC_XT_IDS) { hwInfo->platform.usDeviceID = deviceId; EXPECT_FALSE(metricIpSamplingOsInterface->isDependencyAvailable()); } } } // namespace ult } // namespace L0 test_metric_oa_query_pool_linux.cpp000066400000000000000000000502371422164147700374620ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/metrics/linux/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/device_factory.h" #include "shared/source/os_interface/linux/sys_calls.h" #include "shared/source/os_interface/os_interface.h" #include "shared/test/common/libult/linux/drm_mock.h" #include "shared/test/common/mocks/mock_io_functions.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/tools/test/unit_tests/sources/metrics/mock_metric_oa.h" #include "gmock/gmock.h" #include "gtest/gtest.h" using ::testing::_; using ::testing::Return; namespace NEO { namespace SysCalls { extern int fstatFuncRetVal; } // namespace SysCalls } // namespace NEO namespace L0 { namespace ult { class MetricQueryPoolLinuxTest : public MetricContextFixture, public ::testing::Test { public: void SetUp() override { MetricContextFixture::SetUp(); neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]->osInterface = std::make_unique(); auto &osInterface = device->getOsInterface(); osInterface.setDriverModel(std::make_unique(const_cast(neoDevice->getRootDeviceEnvironment()))); } void TearDown() override { MetricContextFixture::TearDown(); } }; TEST_F(MetricQueryPoolLinuxTest, givenCorrectArgumentsWhenGetContextDataIsCalledThenReturnsSuccess) { ClientData_1_0 clientData = {}; ContextCreateData_1_0 contextData = {}; ClientDataLinuxAdapter_1_0 adapter = {}; adapter.Type = LinuxAdapterType::Last; adapter.DrmFileDescriptor = -1; clientData.Linux.Adapter = &adapter; contextData.ClientData = &clientData; contextData.ClientData->Linux.Adapter = &adapter; EXPECT_EQ(mockMetricsLibrary->metricsLibraryGetContextData(*device, contextData), true); auto &osInterface = device->getOsInterface(); EXPECT_EQ(contextData.ClientData->Linux.Adapter->DrmFileDescriptor, osInterface.getDriverModel()->as()->getFileDescriptor()); EXPECT_EQ(contextData.ClientData->Linux.Adapter->Type, LinuxAdapterType::DrmFileDescriptor); } TEST_F(MetricQueryPoolLinuxTest, givenCorrectArgumentsWhenActivateConfigurationIsCalledThenReturnsSuccess) { ConfigurationHandle_1_0 dummyConfigurationHandle; dummyConfigurationHandle.data = &dummyConfigurationHandle; mockMetricsLibrary->initializationState = ZE_RESULT_SUCCESS; EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockConfigurationActivate(_, _)) .WillOnce(Return(StatusCode::Success)); EXPECT_TRUE(mockMetricsLibrary->activateConfiguration(dummyConfigurationHandle)); } TEST_F(MetricQueryPoolLinuxTest, givenCorrectArgumentsWhenActivateConfigurationIsCalledAndMetricLibraryActivateFailsThenReturnsFail) { ConfigurationHandle_1_0 dummyConfigurationHandle; dummyConfigurationHandle.data = &dummyConfigurationHandle; mockMetricsLibrary->initializationState = ZE_RESULT_SUCCESS; EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockConfigurationActivate(_, _)) .WillOnce(Return(StatusCode::Failed)); EXPECT_FALSE(mockMetricsLibrary->activateConfiguration(dummyConfigurationHandle)); } TEST_F(MetricQueryPoolLinuxTest, givenInCorrectConfigurationWhenActivateConfigurationIsCalledThenReturnsFail) { ConfigurationHandle_1_0 dummyConfigurationHandle; dummyConfigurationHandle.data = nullptr; mockMetricsLibrary->initializationState = ZE_RESULT_SUCCESS; ON_CALL(*mockMetricsLibrary->g_mockApi, MockConfigurationActivate(_, _)) .WillByDefault(Return(StatusCode::Success)); EXPECT_FALSE(mockMetricsLibrary->activateConfiguration(dummyConfigurationHandle)); } TEST_F(MetricQueryPoolLinuxTest, givenMetricLibraryIsInIncorrectInitializedStateWhenActivateConfigurationIsCalledThenReturnsFail) { ConfigurationHandle_1_0 dummyConfigurationHandle; dummyConfigurationHandle.data = &dummyConfigurationHandle; mockMetricsLibrary->initializationState = ZE_RESULT_ERROR_UNKNOWN; ON_CALL(*mockMetricsLibrary->g_mockApi, MockConfigurationActivate(_, _)) .WillByDefault(Return(StatusCode::Success)); EXPECT_FALSE(mockMetricsLibrary->activateConfiguration(dummyConfigurationHandle)); } TEST_F(MetricQueryPoolLinuxTest, givenCorrectArgumentsWhenDeActivateConfigurationIsCalledThenReturnsSuccess) { ConfigurationHandle_1_0 dummyConfigurationHandle; dummyConfigurationHandle.data = &dummyConfigurationHandle; mockMetricsLibrary->initializationState = ZE_RESULT_SUCCESS; EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockConfigurationDeactivate(_)) .WillOnce(Return(StatusCode::Success)); EXPECT_TRUE(mockMetricsLibrary->deactivateConfiguration(dummyConfigurationHandle)); } TEST_F(MetricQueryPoolLinuxTest, givenCorrectArgumentsWhenDeActivateConfigurationIsCalledAndMetricLibraryDeActivateFailsThenReturnsFail) { ConfigurationHandle_1_0 dummyConfigurationHandle; dummyConfigurationHandle.data = &dummyConfigurationHandle; mockMetricsLibrary->initializationState = ZE_RESULT_SUCCESS; EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockConfigurationDeactivate(_)) .WillOnce(Return(StatusCode::Failed)); EXPECT_FALSE(mockMetricsLibrary->deactivateConfiguration(dummyConfigurationHandle)); } TEST_F(MetricQueryPoolLinuxTest, givenInCorrectConfigurationWhenDeActivateConfigurationIsCalledThenReturnsFail) { ConfigurationHandle_1_0 dummyConfigurationHandle; dummyConfigurationHandle.data = nullptr; mockMetricsLibrary->initializationState = ZE_RESULT_SUCCESS; ON_CALL(*mockMetricsLibrary->g_mockApi, MockConfigurationDeactivate(_)) .WillByDefault(Return(StatusCode::Success)); EXPECT_FALSE(mockMetricsLibrary->deactivateConfiguration(dummyConfigurationHandle)); } TEST_F(MetricQueryPoolLinuxTest, givenMetricLibraryIsInIncorrectInitializedStateWhenDeActivateConfigurationIsCalledThenReturnsFail) { ConfigurationHandle_1_0 dummyConfigurationHandle; dummyConfigurationHandle.data = &dummyConfigurationHandle; mockMetricsLibrary->initializationState = ZE_RESULT_ERROR_UNKNOWN; ON_CALL(*mockMetricsLibrary->g_mockApi, MockConfigurationDeactivate(_)) .WillByDefault(Return(StatusCode::Success)); EXPECT_FALSE(mockMetricsLibrary->deactivateConfiguration(dummyConfigurationHandle)); } TEST_F(MetricQueryPoolLinuxTest, givenCorrectArgumentsWhenCacheConfigurationIsCalledThenCacheingIsSuccessfull) { metricsDeviceParams.ConcurrentGroupsCount = 1; Mock metricsConcurrentGroup0; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.MetricSetsCount = 1; Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .WillOnce(Return(&metricsConcurrentGroup0)); EXPECT_CALL(metricsConcurrentGroup0, GetParams()) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup0, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockConfigurationDelete(_)) .WillOnce(Return(StatusCode::Success)); uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); std::vector metricGroups; metricGroups.resize(metricGroupCount); EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, metricGroups.data()), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); ConfigurationHandle_1_0 dummyConfigurationHandle; dummyConfigurationHandle.data = &dummyConfigurationHandle; mockMetricsLibrary->deleteAllConfigurations(); mockMetricsLibrary->cacheConfiguration(metricGroups[0], dummyConfigurationHandle); EXPECT_EQ(mockMetricsLibrary->getConfiguration(metricGroups[0]).data, dummyConfigurationHandle.data); } TEST_F(MetricQueryPoolLinuxTest, WhenMetricLibraryGetFileNameIsCalledThenCorrectFilenameIsReturned) { EXPECT_STREQ(MetricsLibrary::getFilename(), "libigdml.so.1"); } class MetricEnumerationTestLinux : public MetricContextFixture, public ::testing::Test { public: void SetUp() override { MetricContextFixture::SetUp(); neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]->osInterface = std::make_unique(); auto &osInterface = device->getOsInterface(); osInterface.setDriverModel(std::make_unique(const_cast(neoDevice->getRootDeviceEnvironment()))); } void TearDown() override { MetricContextFixture::TearDown(); } }; TEST_F(MetricEnumerationTestLinux, givenCorrectLinuxDrmAdapterWhenGetMetricsAdapterThenReturnSuccess) { auto adapterGroupParams = TAdapterGroupParams_1_6{}; auto adapterParams = TAdapterParams_1_9{}; adapterGroupParams.AdapterCount = 1; adapterParams.SystemId.Type = MetricsDiscovery::TAdapterIdType::ADAPTER_ID_TYPE_MAJOR_MINOR; adapterParams.SystemId.MajorMinor.Major = 0; adapterParams.SystemId.MajorMinor.Minor = 0; openMetricsAdapterGroup(); EXPECT_CALL(adapterGroup, GetParams()) .Times(1) .WillOnce(Return(&adapterGroupParams)); EXPECT_CALL(adapterGroup, GetAdapter(_)) .WillRepeatedly(Return(&adapter)); EXPECT_CALL(adapter, GetParams()) .WillRepeatedly(Return(&adapterParams)); EXPECT_CALL(*mockMetricEnumeration, getAdapterId(_, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgReferee<0>(adapterParams.SystemId.MajorMinor.Major), ::testing::SetArgReferee<1>(adapterParams.SystemId.MajorMinor.Minor), Return(true))); EXPECT_CALL(*mockMetricEnumeration, getMetricsAdapter()) .Times(1) .WillOnce([&]() { return mockMetricEnumeration->baseGetMetricsAdapter(); }); EXPECT_EQ(mockMetricEnumeration->openMetricsDiscovery(), ZE_RESULT_SUCCESS); } TEST_F(MetricEnumerationTestLinux, givenCorrectLinuxMinorPrimaryNodeDrmAdapterWhenGetMetricsAdapterThenReturnSuccess) { const int32_t drmNodePrimary = 0; // From xf86drm.h const int32_t drmMaxDevices = 64; // From drm_drv.c#110 auto adapterGroupParams = TAdapterGroupParams_1_6{}; auto adapterParams = TAdapterParams_1_9{}; adapterGroupParams.AdapterCount = 1; adapterParams.SystemId.Type = MetricsDiscovery::TAdapterIdType::ADAPTER_ID_TYPE_MAJOR_MINOR; adapterParams.SystemId.MajorMinor.Major = 0; adapterParams.SystemId.MajorMinor.Minor = 1000 - (drmNodePrimary * drmMaxDevices); uint32_t drmMajor = 0; uint32_t drmMinor = 1000; openMetricsAdapterGroup(); EXPECT_CALL(adapterGroup, GetParams()) .WillRepeatedly(Return(&adapterGroupParams)); EXPECT_CALL(adapterGroup, GetAdapter(_)) .WillRepeatedly(Return(&adapter)); EXPECT_CALL(adapter, GetParams()) .WillRepeatedly(Return(&adapterParams)); EXPECT_CALL(*mockMetricEnumeration, getAdapterId(_, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgReferee<0>(drmMajor), ::testing::SetArgReferee<1>(drmMinor), Return(true))); EXPECT_CALL(*mockMetricEnumeration, getMetricsAdapter()) .Times(1) .WillOnce([&]() { return mockMetricEnumeration->baseGetMetricsAdapter(); }); EXPECT_EQ(mockMetricEnumeration->openMetricsDiscovery(), ZE_RESULT_SUCCESS); } TEST_F(MetricEnumerationTestLinux, givenCorrectLinuxMinorRenderNodeDrmAdapterWhenGetMetricsAdapterThenReturnSuccess) { const int32_t drmNodeRender = 2; // From xf86drm.h const int32_t drmMaxDevices = 64; // From drm_drv.c#110 auto adapterGroupParams = TAdapterGroupParams_1_6{}; auto adapterParams = TAdapterParams_1_9{}; adapterGroupParams.AdapterCount = 1; adapterParams.SystemId.Type = MetricsDiscovery::TAdapterIdType::ADAPTER_ID_TYPE_MAJOR_MINOR; adapterParams.SystemId.MajorMinor.Major = 0; adapterParams.SystemId.MajorMinor.Minor = 1000 - (drmNodeRender * drmMaxDevices); uint32_t drmMajor = 0; uint32_t drmMinor = 1000; openMetricsAdapterGroup(); EXPECT_CALL(adapterGroup, GetParams()) .WillRepeatedly(Return(&adapterGroupParams)); EXPECT_CALL(adapterGroup, GetAdapter(_)) .WillRepeatedly(Return(&adapter)); EXPECT_CALL(adapter, GetParams()) .WillRepeatedly(Return(&adapterParams)); EXPECT_CALL(*mockMetricEnumeration, getAdapterId(_, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgReferee<0>(drmMajor), ::testing::SetArgReferee<1>(drmMinor), Return(true))); EXPECT_CALL(*mockMetricEnumeration, getMetricsAdapter()) .Times(1) .WillOnce([&]() { return mockMetricEnumeration->baseGetMetricsAdapter(); }); EXPECT_EQ(mockMetricEnumeration->openMetricsDiscovery(), ZE_RESULT_SUCCESS); } TEST_F(MetricEnumerationTestLinux, givenIcorrectMetricDiscoveryAdapterTypeWhenGetMetricsAdapterThenReturnFail) { auto adapterGroupParams = TAdapterGroupParams_1_6{}; auto adapterParams = TAdapterParams_1_9{}; adapterGroupParams.AdapterCount = 1; adapterParams.SystemId.Type = MetricsDiscovery::TAdapterIdType::ADAPTER_ID_TYPE_LUID; adapterParams.SystemId.MajorMinor.Major = 0; adapterParams.SystemId.MajorMinor.Minor = 0; EXPECT_CALL(*mockMetricEnumeration->g_mockApi, MockOpenAdapterGroup(_)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<0>(&adapterGroup), Return(TCompletionCode::CC_OK))); EXPECT_CALL(adapterGroup, GetParams()) .Times(1) .WillOnce(Return(&adapterGroupParams)); EXPECT_CALL(adapterGroup, GetAdapter(_)) .WillRepeatedly(Return(&adapter)); EXPECT_CALL(adapter, GetParams()) .WillRepeatedly(Return(&adapterParams)); EXPECT_CALL(*mockMetricEnumeration, getAdapterId(_, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgReferee<0>(adapterParams.SystemId.MajorMinor.Major), ::testing::SetArgReferee<1>(adapterParams.SystemId.MajorMinor.Minor), Return(true))); EXPECT_CALL(*mockMetricEnumeration, getMetricsAdapter()) .Times(1) .WillOnce([&]() { return mockMetricEnumeration->baseGetMetricsAdapter(); }); EXPECT_CALL(adapterGroup, Close()) .Times(1) .WillOnce(Return(TCompletionCode::CC_OK)); EXPECT_NE(mockMetricEnumeration->openMetricsDiscovery(), ZE_RESULT_SUCCESS); } TEST_F(MetricEnumerationTestLinux, givenIcorrectMetricDiscoveryAdapterMajorWhenGetMetricsAdapterThenReturnFail) { auto adapterGroupParams = TAdapterGroupParams_1_6{}; auto adapterParams = TAdapterParams_1_9{}; adapterGroupParams.AdapterCount = 1; adapterParams.SystemId.Type = MetricsDiscovery::TAdapterIdType::ADAPTER_ID_TYPE_MAJOR_MINOR; adapterParams.SystemId.MajorMinor.Major = 0; adapterParams.SystemId.MajorMinor.Minor = 0; uint32_t incorrectMajor = 1; EXPECT_CALL(*mockMetricEnumeration->g_mockApi, MockOpenAdapterGroup(_)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<0>(&adapterGroup), Return(TCompletionCode::CC_OK))); EXPECT_CALL(adapterGroup, GetParams()) .Times(1) .WillOnce(Return(&adapterGroupParams)); EXPECT_CALL(adapterGroup, GetAdapter(_)) .WillRepeatedly(Return(&adapter)); EXPECT_CALL(adapter, GetParams()) .WillRepeatedly(Return(&adapterParams)); EXPECT_CALL(*mockMetricEnumeration, getAdapterId(_, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgReferee<0>(incorrectMajor), ::testing::SetArgReferee<1>(adapterParams.SystemId.MajorMinor.Minor), Return(true))); EXPECT_CALL(*mockMetricEnumeration, getMetricsAdapter()) .Times(1) .WillOnce([&]() { return mockMetricEnumeration->baseGetMetricsAdapter(); }); EXPECT_CALL(adapterGroup, Close()) .Times(1) .WillOnce(Return(TCompletionCode::CC_OK)); EXPECT_NE(mockMetricEnumeration->openMetricsDiscovery(), ZE_RESULT_SUCCESS); } TEST_F(MetricEnumerationTestLinux, givenIcorrectMetricDiscoveryAdapterMinorWhenGetMetricsAdapterThenReturnFail) { auto adapterGroupParams = TAdapterGroupParams_1_6{}; auto adapterParams = TAdapterParams_1_9{}; adapterGroupParams.AdapterCount = 1; adapterParams.SystemId.Type = MetricsDiscovery::TAdapterIdType::ADAPTER_ID_TYPE_MAJOR_MINOR; adapterParams.SystemId.MajorMinor.Major = 0; adapterParams.SystemId.MajorMinor.Minor = 0; uint32_t incorrectMinor = 1; EXPECT_CALL(*mockMetricEnumeration->g_mockApi, MockOpenAdapterGroup(_)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<0>(&adapterGroup), Return(TCompletionCode::CC_OK))); EXPECT_CALL(adapterGroup, GetParams()) .Times(1) .WillOnce(Return(&adapterGroupParams)); EXPECT_CALL(adapterGroup, GetAdapter(_)) .WillRepeatedly(Return(&adapter)); EXPECT_CALL(adapter, GetParams()) .WillRepeatedly(Return(&adapterParams)); EXPECT_CALL(*mockMetricEnumeration, getAdapterId(_, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgReferee<0>(adapterParams.SystemId.MajorMinor.Major), ::testing::SetArgReferee<1>(incorrectMinor), Return(true))); EXPECT_CALL(*mockMetricEnumeration, getMetricsAdapter()) .Times(1) .WillOnce([&]() { return mockMetricEnumeration->baseGetMetricsAdapter(); }); EXPECT_CALL(adapterGroup, Close()) .Times(1) .WillOnce(Return(TCompletionCode::CC_OK)); EXPECT_NE(mockMetricEnumeration->openMetricsDiscovery(), ZE_RESULT_SUCCESS); } TEST_F(MetricEnumerationTestLinux, givenIcorrectOpenMetricDeviceOnAdapterWhenGetMetricsAdapterThenReturnFail) { auto adapterGroupParams = TAdapterGroupParams_1_6{}; auto adapterParams = TAdapterParams_1_9{}; adapterGroupParams.AdapterCount = 1; adapterParams.SystemId.Type = MetricsDiscovery::TAdapterIdType::ADAPTER_ID_TYPE_MAJOR_MINOR; adapterParams.SystemId.MajorMinor.Major = 0; adapterParams.SystemId.MajorMinor.Minor = 0; EXPECT_CALL(*mockMetricEnumeration->g_mockApi, MockOpenAdapterGroup(_)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<0>(&adapterGroup), Return(TCompletionCode::CC_OK))); EXPECT_CALL(adapterGroup, GetParams()) .Times(1) .WillOnce(Return(&adapterGroupParams)); EXPECT_CALL(adapterGroup, GetAdapter(_)) .WillRepeatedly(Return(&adapter)); EXPECT_CALL(adapter, GetParams()) .WillRepeatedly(Return(&adapterParams)); EXPECT_CALL(*mockMetricEnumeration, getAdapterId(_, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgReferee<0>(adapterParams.SystemId.MajorMinor.Major), ::testing::SetArgReferee<1>(adapterParams.SystemId.MajorMinor.Minor), Return(true))); EXPECT_CALL(*mockMetricEnumeration, getMetricsAdapter()) .Times(1) .WillOnce([&]() { return mockMetricEnumeration->baseGetMetricsAdapter(); }); EXPECT_CALL(adapter, OpenMetricsDevice(_)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<0>(nullptr), Return(TCompletionCode::CC_ERROR_GENERAL))); EXPECT_CALL(adapterGroup, Close()) .Times(1) .WillOnce(Return(TCompletionCode::CC_OK)); EXPECT_NE(mockMetricEnumeration->openMetricsDiscovery(), ZE_RESULT_SUCCESS); } TEST_F(MetricEnumerationTestLinux, givenCorrectDrmFileForFstatWhenGetMetricsAdapterThenReturnSuccess) { uint32_t drmMajor = 0; uint32_t drmMinor = 0; VariableBackup fstatBackup(&NEO::SysCalls::fstatFuncRetVal); NEO::SysCalls::fstatFuncRetVal = 0; EXPECT_EQ(mockMetricEnumeration->baseGetAdapterId(drmMajor, drmMinor), true); } TEST_F(MetricEnumerationTestLinux, givenIncorrectDrmFileForFstatWhenGetMetricsAdapterThenReturnFail) { uint32_t drmMajor = 0; uint32_t drmMinor = 0; VariableBackup fstatBackup(&NEO::SysCalls::fstatFuncRetVal); NEO::SysCalls::fstatFuncRetVal = -1; EXPECT_EQ(mockMetricEnumeration->baseGetAdapterId(drmMajor, drmMinor), false); } } // namespace ult } // namespace L0 metric_ip_sampling_fixture.cpp000066400000000000000000000043471422164147700352410ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/metrics/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/test/unit_tests/sources/metrics/metric_ip_sampling_fixture.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/tools/source/metrics/metric_ip_sampling_source.h" #include "level_zero/tools/source/metrics/metric_oa_source.h" #include "level_zero/tools/test/unit_tests/sources/metrics/mock_metric_ip_sampling.h" #include namespace L0 { extern _ze_driver_handle_t *GlobalDriverHandle; namespace ult { void MetricIpSamplingFixture::SetUp() { MultiDeviceFixture::numRootDevices = 1; MultiDeviceFixture::numSubDevices = 2; MultiDeviceFixture::SetUp(); testDevices.reserve(MultiDeviceFixture::numRootDevices + (MultiDeviceFixture::numRootDevices * MultiDeviceFixture::numSubDevices)); for (auto device : driverHandle->devices) { testDevices.push_back(device); auto &deviceImp = *static_cast(device); const uint32_t subDeviceCount = static_cast(deviceImp.subDevices.size()); for (uint32_t i = 0; i < subDeviceCount; i++) { testDevices.push_back(deviceImp.subDevices[i]); } } osInterfaceVector.reserve(testDevices.size()); for (auto device : testDevices) { auto mockMetricIpSamplingOsInterface = new MockMetricIpSamplingOsInterface(); osInterfaceVector.push_back(mockMetricIpSamplingOsInterface); std::unique_ptr metricIpSamplingOsInterface = std::unique_ptr(mockMetricIpSamplingOsInterface); auto &metricSource = device->getMetricDeviceContext().getMetricSource(); metricSource.setMetricOsInterface(metricIpSamplingOsInterface); auto &metricOaSource = device->getMetricDeviceContext().getMetricSource(); metricOaSource.setInitializationState(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE); } GlobalDriverHandle = static_cast<_ze_driver_handle_t *>(driverHandle.get()); } void MetricIpSamplingFixture::TearDown() { MultiDeviceFixture::TearDown(); } } // namespace ult } // namespace L0 metric_ip_sampling_fixture.h000066400000000000000000000011531422164147700346760ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/metrics/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include namespace L0 { namespace ult { class MockMetricIpSamplingOsInterface; class MetricIpSamplingFixture : public MultiDeviceFixture, public ::testing::Test { public: void SetUp() override; void TearDown() override; std::vector osInterfaceVector = {}; std::vector testDevices = {}; }; } // namespace ult } // namespace L0 mock_metric_ip_sampling.h000066400000000000000000000030041422164147700341360ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/metrics/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/common/test_macros/test.h" #include "level_zero/tools/source/metrics/os_metric_ip_sampling.h" namespace L0 { namespace ult { class MockMetricIpSamplingOsInterface : public MetricIpSamplingOsInterface { public: ze_result_t startMeasurementReturn = ZE_RESULT_SUCCESS; ze_result_t stopMeasurementReturn = ZE_RESULT_SUCCESS; ze_result_t readDataReturn = ZE_RESULT_SUCCESS; uint32_t getRequiredBufferSizeReturn = 100; uint32_t getUnitReportSizeReturn = 64; bool isNReportsAvailableReturn = true; bool isDependencyAvailableReturn = true; ~MockMetricIpSamplingOsInterface() override = default; ze_result_t startMeasurement(uint32_t ¬ifyEveryNReports, uint32_t &samplingPeriodNs) override { return startMeasurementReturn; } ze_result_t stopMeasurement() override { return stopMeasurementReturn; } ze_result_t readData(uint8_t *pRawData, size_t *pRawDataSize) override { return readDataReturn; } uint32_t getRequiredBufferSize(const uint32_t maxReportCount) override { return getRequiredBufferSizeReturn; } uint32_t getUnitReportSize() override { return getUnitReportSizeReturn; } bool isNReportsAvailable() override { return isNReportsAvailableReturn; } bool isDependencyAvailable() override { return isDependencyAvailableReturn; } }; } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/metrics/mock_metric_oa.cpp000066400000000000000000000412661422164147700326610ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/test/unit_tests/sources/metrics/mock_metric_oa.h" #include "shared/source/command_container/implicit_scaling.h" #include "shared/test/common/mocks/mock_os_library.h" #include "level_zero/tools/source/metrics/metric_ip_sampling_source.h" #include "level_zero/tools/source/metrics/metric_oa_source.h" #include "level_zero/tools/source/metrics/metric_oa_streamer_imp.h" #include "level_zero/tools/source/metrics/os_metric_ip_sampling.h" using namespace MetricsLibraryApi; using ::testing::_; using ::testing::Return; namespace L0 { namespace ult { class MockIpSamplingOsInterface : public MetricIpSamplingOsInterface { public: ~MockIpSamplingOsInterface() override = default; ze_result_t startMeasurement(uint32_t ¬ifyEveryNReports, uint32_t &samplingPeriodNs) override { return ZE_RESULT_ERROR_UNKNOWN; } ze_result_t stopMeasurement() override { return ZE_RESULT_ERROR_UNKNOWN; } ze_result_t readData(uint8_t *pRawData, size_t *pRawDataSize) override { return ZE_RESULT_ERROR_UNKNOWN; } uint32_t getRequiredBufferSize(const uint32_t maxReportCount) override { return 0; } uint32_t getUnitReportSize() override { return 0; } bool isNReportsAvailable() override { return false; } bool isDependencyAvailable() override { return false; } }; void MetricContextFixture::SetUp() { // Call base class. ContextFixture::SetUp(); // Initialize metric api. auto &metricSource = device->getMetricDeviceContext().getMetricSource(); metricSource.setInitializationState(ZE_RESULT_SUCCESS); std::unique_ptr metricIpSamplingOsInterface = std::unique_ptr(new MockIpSamplingOsInterface()); auto &ipMetricSource = device->getMetricDeviceContext().getMetricSource(); ipMetricSource.setMetricOsInterface(metricIpSamplingOsInterface); // Mock metrics library. mockMetricsLibrary = std::unique_ptr>(new (std::nothrow) Mock(metricSource)); mockMetricsLibrary->setMockedApi(&mockMetricsLibraryApi); mockMetricsLibrary->handle = new MockOsLibrary(); // Mock metric enumeration. mockMetricEnumeration = std::unique_ptr>(new (std::nothrow) Mock(metricSource)); mockMetricEnumeration->setMockedApi(&mockMetricsDiscoveryApi); mockMetricEnumeration->hMetricsDiscovery = std::make_unique(); // Metrics Discovery device common settings. metricsDeviceParams.Version.MajorNumber = MetricEnumeration::requiredMetricsDiscoveryMajorVersion; metricsDeviceParams.Version.MinorNumber = MetricEnumeration::requiredMetricsDiscoveryMinorVersion; } void MetricContextFixture::TearDown() { // Restore original metrics library delete mockMetricsLibrary->handle; mockMetricsLibrary->setMockedApi(nullptr); mockMetricsLibrary.reset(); // Restore original metric enumeration. mockMetricEnumeration->setMockedApi(nullptr); mockMetricEnumeration.reset(); // Call base class. ContextFixture::TearDown(); } void MetricContextFixture::openMetricsAdapter() { EXPECT_CALL(*mockMetricEnumeration, loadMetricsDiscovery()) .Times(0); EXPECT_CALL(*mockMetricEnumeration->g_mockApi, MockOpenAdapterGroup(_)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<0>(&adapterGroup), Return(TCompletionCode::CC_OK))); EXPECT_CALL(adapter, OpenMetricsDevice(_)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<0>(&metricsDevice), Return(TCompletionCode::CC_OK))); EXPECT_CALL(adapter, CloseMetricsDevice(_)) .Times(1) .WillOnce(Return(TCompletionCode::CC_OK)); EXPECT_CALL(adapterGroup, GetAdapter(_)) .Times(0); EXPECT_CALL(*mockMetricEnumeration, getMetricsAdapter()) .Times(1) .WillOnce(Return(&adapter)); EXPECT_CALL(adapterGroup, Close()) .Times(1) .WillOnce(Return(TCompletionCode::CC_OK)); } void MetricContextFixture::openMetricsAdapterGroup() { EXPECT_CALL(*mockMetricEnumeration, loadMetricsDiscovery()) .Times(0); EXPECT_CALL(*mockMetricEnumeration->g_mockApi, MockOpenAdapterGroup(_)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<0>(&adapterGroup), Return(TCompletionCode::CC_OK))); EXPECT_CALL(adapter, OpenMetricsDevice(_)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<0>(&metricsDevice), Return(TCompletionCode::CC_OK))); EXPECT_CALL(adapter, CloseMetricsDevice(_)) .Times(1) .WillOnce(Return(TCompletionCode::CC_OK)); EXPECT_CALL(adapterGroup, Close()) .Times(1) .WillOnce(Return(TCompletionCode::CC_OK)); } void MetricMultiDeviceFixture::SetUp() { DebugManager.flags.EnableImplicitScaling.set(1); MultiDeviceFixture::SetUp(); devices.resize(driverHandle->devices.size()); for (uint32_t i = 0; i < driverHandle->devices.size(); i++) { devices[i] = driverHandle->devices[i]; } // Initialize metric api. auto &metricSource = devices[0]->getMetricDeviceContext().getMetricSource(); metricSource.setInitializationState(ZE_RESULT_SUCCESS); // Mock metrics library. mockMetricsLibrary = std::unique_ptr>(new (std::nothrow) Mock(metricSource)); mockMetricsLibrary->setMockedApi(&mockMetricsLibraryApi); mockMetricsLibrary->handle = new MockOsLibrary(); // Mock metric enumeration. mockMetricEnumeration = std::unique_ptr>(new (std::nothrow) Mock(metricSource)); mockMetricEnumeration->setMockedApi(&mockMetricsDiscoveryApi); mockMetricEnumeration->hMetricsDiscovery = std::make_unique(); auto &deviceImp = *static_cast(devices[0]); const uint32_t subDeviceCount = static_cast(deviceImp.subDevices.size()); mockMetricEnumerationSubDevices.resize(subDeviceCount); mockMetricsLibrarySubDevices.resize(subDeviceCount); for (uint32_t i = 0; i < subDeviceCount; i++) { auto &metricsSubDeviceContext = deviceImp.subDevices[i]->getMetricDeviceContext().getMetricSource(); mockMetricEnumerationSubDevices[i] = std::unique_ptr>(new (std::nothrow) Mock(metricsSubDeviceContext)); mockMetricEnumerationSubDevices[i]->setMockedApi(&mockMetricsDiscoveryApi); mockMetricEnumerationSubDevices[i]->hMetricsDiscovery = std::make_unique(); mockMetricsLibrarySubDevices[i] = std::unique_ptr>(new (std::nothrow) Mock(metricsSubDeviceContext)); mockMetricsLibrarySubDevices[i]->setMockedApi(&mockMetricsLibraryApi); mockMetricsLibrarySubDevices[i]->handle = new MockOsLibrary(); metricsSubDeviceContext.setInitializationState(ZE_RESULT_SUCCESS); } // Metrics Discovery device common settings. metricsDeviceParams.Version.MajorNumber = MetricEnumeration::requiredMetricsDiscoveryMajorVersion; metricsDeviceParams.Version.MinorNumber = MetricEnumeration::requiredMetricsDiscoveryMinorVersion; } void MetricMultiDeviceFixture::TearDown() { auto &deviceImp = *static_cast(devices[0]); const uint32_t subDeviceCount = static_cast(deviceImp.subDevices.size()); for (uint32_t i = 0; i < subDeviceCount; i++) { mockMetricEnumerationSubDevices[i]->setMockedApi(nullptr); mockMetricEnumerationSubDevices[i].reset(); delete mockMetricsLibrarySubDevices[i]->handle; mockMetricsLibrarySubDevices[i]->setMockedApi(nullptr); mockMetricsLibrarySubDevices[i].reset(); } mockMetricEnumerationSubDevices.clear(); mockMetricsLibrarySubDevices.clear(); // Restore original metrics library delete mockMetricsLibrary->handle; mockMetricsLibrary->setMockedApi(nullptr); mockMetricsLibrary.reset(); // Restore original metric enumeration. mockMetricEnumeration->setMockedApi(nullptr); mockMetricEnumeration.reset(); MultiDeviceFixture::TearDown(); } void MetricMultiDeviceFixture::openMetricsAdapter() { EXPECT_CALL(*mockMetricEnumeration, loadMetricsDiscovery()) .Times(0); EXPECT_CALL(*mockMetricEnumeration->g_mockApi, MockOpenAdapterGroup(_)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<0>(&adapterGroup), Return(TCompletionCode::CC_OK))); EXPECT_CALL(adapter, OpenMetricsSubDevice(_, _)) .Times(2) .WillRepeatedly(DoAll(::testing::SetArgPointee<1>(&metricsDevice), Return(TCompletionCode::CC_OK))); EXPECT_CALL(adapter, CloseMetricsDevice(_)) .Times(2) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(adapterGroup, GetAdapter(_)) .Times(0); EXPECT_CALL(*mockMetricEnumeration, getMetricsAdapter()) .Times(1) .WillOnce(Return(&adapter)); EXPECT_CALL(adapterGroup, Close()) .Times(1) .WillOnce(Return(TCompletionCode::CC_OK)); } void MetricMultiDeviceFixture::openMetricsAdapterSubDevice(uint32_t subDeviceIndex) { EXPECT_CALL(*mockMetricEnumerationSubDevices[subDeviceIndex], loadMetricsDiscovery()) .Times(0); EXPECT_CALL(*mockMetricEnumerationSubDevices[subDeviceIndex]->g_mockApi, MockOpenAdapterGroup(_)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<0>(&adapterGroup), Return(TCompletionCode::CC_OK))); EXPECT_CALL(adapter, OpenMetricsDevice(_)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<0>(&metricsDevice), Return(TCompletionCode::CC_OK))); EXPECT_CALL(adapter, CloseMetricsDevice(_)) .Times(1) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(adapterGroup, GetAdapter(_)) .Times(0); EXPECT_CALL(*mockMetricEnumerationSubDevices[subDeviceIndex], getMetricsAdapter()) .Times(1) .WillOnce(Return(&adapter)); EXPECT_CALL(adapterGroup, Close()) .Times(1) .WillOnce(Return(TCompletionCode::CC_OK)); } void MetricMultiDeviceFixture::openMetricsAdapterGroup() { EXPECT_CALL(*mockMetricEnumeration, loadMetricsDiscovery()) .Times(0); EXPECT_CALL(*mockMetricEnumeration->g_mockApi, MockOpenAdapterGroup(_)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<0>(&adapterGroup), Return(TCompletionCode::CC_OK))); EXPECT_CALL(adapter, OpenMetricsDevice(_)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<0>(&metricsDevice), Return(TCompletionCode::CC_OK))); EXPECT_CALL(adapter, CloseMetricsDevice(_)) .Times(1) .WillOnce(Return(TCompletionCode::CC_OK)); EXPECT_CALL(adapterGroup, Close()) .Times(1) .WillOnce(Return(TCompletionCode::CC_OK)); } void MetricStreamerMultiDeviceFixture::cleanup(zet_device_handle_t &hDevice, zet_metric_streamer_handle_t &hStreamer) { OaMetricStreamerImp *pStreamerImp = static_cast(MetricStreamer::fromHandle(hStreamer)); auto &deviceImp = *static_cast(devices[0]); for (size_t index = 0; index < deviceImp.subDevices.size(); index++) { zet_metric_streamer_handle_t metricStreamerSubDeviceHandle = pStreamerImp->getMetricStreamers()[index]; OaMetricStreamerImp *pStreamerSubDevImp = static_cast(MetricStreamer::fromHandle(metricStreamerSubDeviceHandle)); auto device = deviceImp.subDevices[index]; auto &metricSource = device->getMetricDeviceContext().getMetricSource(); auto &metricsLibrary = metricSource.getMetricsLibrary(); metricSource.setMetricStreamer(nullptr); metricsLibrary.release(); delete pStreamerSubDevImp; } auto &metricSource = devices[0]->getMetricDeviceContext().getMetricSource(); metricSource.setMetricStreamer(nullptr); delete pStreamerImp; } Mock::Mock(::L0::OaMetricSourceImp &metricSource) : MetricsLibrary(metricSource) { } Mock::~Mock() { } MockMetricsLibraryApi *Mock::g_mockApi = nullptr; void Mock::setMockedApi(MockMetricsLibraryApi *mockedApi) { if (mockedApi) { // Mock class used to communicate with metrics library. metricsLibrary = &metricSource.getMetricsLibrary(); metricSource.setMetricsLibrary(*this); // Mock metrics library api functions. contextCreateFunction = mockedApi->ContextCreate; contextDeleteFunction = mockedApi->ContextDelete; api.GetParameter = mockedApi->GetParameter; api.CommandBufferGet = mockedApi->CommandBufferGet; api.CommandBufferGetSize = mockedApi->CommandBufferGetSize; api.QueryCreate = mockedApi->QueryCreate; api.QueryDelete = mockedApi->QueryDelete; api.OverrideCreate = mockedApi->OverrideCreate; api.OverrideDelete = mockedApi->OverrideDelete; api.MarkerCreate = mockedApi->MarkerCreate; api.MarkerDelete = mockedApi->MarkerDelete; api.ConfigurationCreate = mockedApi->ConfigurationCreate; api.ConfigurationActivate = mockedApi->ConfigurationActivate; api.ConfigurationDeactivate = mockedApi->ConfigurationDeactivate; api.ConfigurationDelete = mockedApi->ConfigurationDelete; api.GetData = mockedApi->GetData; // Mock metrics library api. Mock::g_mockApi = mockedApi; } else { // Restore an original class used to communicate with metrics library. metricSource.setMetricsLibrary(*metricsLibrary); } } StatusCode MockMetricsLibraryApi::ContextCreate(ClientType_1_0 clientType, ContextCreateData_1_0 *createData, ContextHandle_1_0 *handle) { return Mock::g_mockApi->MockContextCreate(clientType, createData, handle); } StatusCode MockMetricsLibraryApi::ContextDelete(const ContextHandle_1_0 handle) { return Mock::g_mockApi->MockContextDelete(handle); } StatusCode MockMetricsLibraryApi::GetParameter(const ParameterType parameter, ValueType *type, TypedValue_1_0 *value) { return Mock::g_mockApi->MockGetParameter(parameter, type, value); } StatusCode MockMetricsLibraryApi::CommandBufferGet(const CommandBufferData_1_0 *data) { return Mock::g_mockApi->MockCommandBufferGet(data); } StatusCode MockMetricsLibraryApi::CommandBufferGetSize(const CommandBufferData_1_0 *data, CommandBufferSize_1_0 *size) { return Mock::g_mockApi->MockCommandBufferGetSize(data, size); } StatusCode MockMetricsLibraryApi::QueryCreate(const QueryCreateData_1_0 *createData, QueryHandle_1_0 *handle) { return Mock::g_mockApi->MockQueryCreate(createData, handle); } StatusCode MockMetricsLibraryApi::QueryDelete(const QueryHandle_1_0 handle) { return Mock::g_mockApi->MockQueryDelete(handle); } StatusCode MockMetricsLibraryApi::OverrideCreate(const OverrideCreateData_1_0 *createData, OverrideHandle_1_0 *handle) { return Mock::g_mockApi->MockOverrideCreate(createData, handle); } StatusCode MockMetricsLibraryApi::OverrideDelete(const OverrideHandle_1_0 handle) { return Mock::g_mockApi->MockOverrideDelete(handle); } StatusCode MockMetricsLibraryApi::MarkerCreate(const MarkerCreateData_1_0 *createData, MarkerHandle_1_0 *handle) { return Mock::g_mockApi->MockMarkerCreate(createData, handle); } StatusCode MockMetricsLibraryApi::MarkerDelete(const MarkerHandle_1_0 handle) { return Mock::g_mockApi->MockMarkerDelete(handle); } StatusCode MockMetricsLibraryApi::ConfigurationCreate(const ConfigurationCreateData_1_0 *createData, ConfigurationHandle_1_0 *handle) { return Mock::g_mockApi->MockConfigurationCreate(createData, handle); } StatusCode MockMetricsLibraryApi::ConfigurationActivate(const ConfigurationHandle_1_0 handle, const ConfigurationActivateData_1_0 *activateData) { return Mock::g_mockApi->MockConfigurationActivate(handle, activateData); } StatusCode MockMetricsLibraryApi::ConfigurationDeactivate(const ConfigurationHandle_1_0 handle) { return Mock::g_mockApi->MockConfigurationDeactivate(handle); } StatusCode MockMetricsLibraryApi::ConfigurationDelete(const ConfigurationHandle_1_0 handle) { return Mock::g_mockApi->MockConfigurationDelete(handle); } StatusCode MockMetricsLibraryApi::GetData(GetReportData_1_0 *data) { return Mock::g_mockApi->MockGetData(data); } Mock::Mock() {} Mock::~Mock() {} Mock::Mock() {} Mock::~Mock() {} } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/metrics/mock_metric_oa.h000066400000000000000000000217271422164147700323260ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mock.h" #include "level_zero/core/test/unit_tests/white_box.h" #include "level_zero/tools/source/metrics/metric.h" #include "level_zero/tools/source/metrics/metric_oa_enumeration_imp.h" #include "level_zero/tools/source/metrics/metric_oa_query_imp.h" #include "level_zero/tools/test/unit_tests/sources/metrics/mock_metric_oa_enumeration.h" namespace L0 { namespace ult { template <> struct WhiteBox<::L0::MetricQuery> : public ::L0::MetricQuery { using BaseClass = ::L0::MetricQuery; }; template <> struct WhiteBox<::L0::MetricQueryPool> : public ::L0::MetricQueryPool { using BaseClass = ::L0::MetricQuery; }; using MetricQuery = WhiteBox<::L0::MetricQuery>; using MetricQueryPool = WhiteBox<::L0::MetricQueryPool>; using MetricsLibraryApi::ClientData_1_0; using MetricsLibraryApi::ClientDataLinuxAdapter_1_0; using MetricsLibraryApi::ClientType_1_0; using MetricsLibraryApi::CommandBufferData_1_0; using MetricsLibraryApi::CommandBufferSize_1_0; using MetricsLibraryApi::ConfigurationActivateData_1_0; using MetricsLibraryApi::ConfigurationCreateData_1_0; using MetricsLibraryApi::ConfigurationHandle_1_0; using MetricsLibraryApi::ContextCreateData_1_0; using MetricsLibraryApi::ContextHandle_1_0; using MetricsLibraryApi::GetReportData_1_0; using MetricsLibraryApi::LinuxAdapterType; using MetricsLibraryApi::MarkerCreateData_1_0; using MetricsLibraryApi::MarkerHandle_1_0; using MetricsLibraryApi::OverrideCreateData_1_0; using MetricsLibraryApi::OverrideHandle_1_0; using MetricsLibraryApi::ParameterType; using MetricsLibraryApi::QueryCreateData_1_0; using MetricsLibraryApi::QueryHandle_1_0; using MetricsLibraryApi::StatusCode; using MetricsLibraryApi::TypedValue_1_0; using MetricsLibraryApi::ValueType; struct MockMetricsLibraryApi { // Original api functions. static StatusCode ML_STDCALL ContextCreate(ClientType_1_0 clientType, ContextCreateData_1_0 *createData, ContextHandle_1_0 *handle); static StatusCode ML_STDCALL ContextDelete(const ContextHandle_1_0 handle); static StatusCode ML_STDCALL GetParameter(const ParameterType parameter, ValueType *type, TypedValue_1_0 *value); static StatusCode ML_STDCALL CommandBufferGet(const CommandBufferData_1_0 *data); static StatusCode ML_STDCALL CommandBufferGetSize(const CommandBufferData_1_0 *data, CommandBufferSize_1_0 *size); static StatusCode ML_STDCALL QueryCreate(const QueryCreateData_1_0 *createData, QueryHandle_1_0 *handle); static StatusCode ML_STDCALL QueryDelete(const QueryHandle_1_0 handle); static StatusCode ML_STDCALL OverrideCreate(const OverrideCreateData_1_0 *createData, OverrideHandle_1_0 *handle); static StatusCode ML_STDCALL OverrideDelete(const OverrideHandle_1_0 handle); static StatusCode ML_STDCALL MarkerCreate(const MarkerCreateData_1_0 *createData, MarkerHandle_1_0 *handle); static StatusCode ML_STDCALL MarkerDelete(const MarkerHandle_1_0 handle); static StatusCode ML_STDCALL ConfigurationCreate(const ConfigurationCreateData_1_0 *createData, ConfigurationHandle_1_0 *handle); static StatusCode ML_STDCALL ConfigurationActivate(const ConfigurationHandle_1_0 handle, const ConfigurationActivateData_1_0 *activateData); static StatusCode ML_STDCALL ConfigurationDeactivate(const ConfigurationHandle_1_0 handle); static StatusCode ML_STDCALL ConfigurationDelete(const ConfigurationHandle_1_0 handle); static StatusCode ML_STDCALL GetData(GetReportData_1_0 *data); // Mocked api functions. MOCK_METHOD(StatusCode, MockContextCreate, (ClientType_1_0 clientType, ContextCreateData_1_0 *createData, ContextHandle_1_0 *handle)); MOCK_METHOD(StatusCode, MockContextDelete, (const ContextHandle_1_0 handle)); MOCK_METHOD(StatusCode, MockGetParameter, (const ParameterType parameter, ValueType *type, TypedValue_1_0 *value)); MOCK_METHOD(StatusCode, MockCommandBufferGet, (const CommandBufferData_1_0 *data)); MOCK_METHOD(StatusCode, MockCommandBufferGetSize, (const CommandBufferData_1_0 *data, CommandBufferSize_1_0 *size)); MOCK_METHOD(StatusCode, MockQueryCreate, (const QueryCreateData_1_0 *createData, QueryHandle_1_0 *handle)); MOCK_METHOD(StatusCode, MockQueryDelete, (const QueryHandle_1_0 handle)); MOCK_METHOD(StatusCode, MockOverrideCreate, (const OverrideCreateData_1_0 *createData, OverrideHandle_1_0 *handle)); MOCK_METHOD(StatusCode, MockOverrideDelete, (const OverrideHandle_1_0 handle)); MOCK_METHOD(StatusCode, MockMarkerCreate, (const MarkerCreateData_1_0 *createData, MarkerHandle_1_0 *handle)); MOCK_METHOD(StatusCode, MockMarkerDelete, (const MarkerHandle_1_0 handle)); MOCK_METHOD(StatusCode, MockConfigurationCreate, (const ConfigurationCreateData_1_0 *createData, ConfigurationHandle_1_0 *handle)); MOCK_METHOD(StatusCode, MockConfigurationActivate, (const ConfigurationHandle_1_0 handle, const ConfigurationActivateData_1_0 *activateData)); MOCK_METHOD(StatusCode, MockConfigurationDeactivate, (const ConfigurationHandle_1_0 handle)); MOCK_METHOD(StatusCode, MockConfigurationDelete, (const ConfigurationHandle_1_0 handle)); MOCK_METHOD(StatusCode, MockGetData, (GetReportData_1_0 * data)); }; template <> struct Mock : public MetricsLibrary { public: Mock(::L0::OaMetricSourceImp &metricSource); ~Mock() override; using MetricsLibrary::handle; using MetricsLibrary::initializationState; // Api mock enable/disable. void setMockedApi(MockMetricsLibraryApi *mockedApi); // Mocked metrics library functions. MOCK_METHOD(bool, load, (), (override)); MOCK_METHOD(bool, getContextData, (::L0::Device &, ContextCreateData_1_0 &), (override)); MOCK_METHOD(bool, getMetricQueryReportSize, (size_t & rawDataSize), (override)); // Not mocked metrics library functions. bool metricsLibraryGetContextData(::L0::Device &device, ContextCreateData_1_0 &contextData) { return MetricsLibrary::getContextData(device, contextData); } // Original metrics library implementation used by metric context. ::L0::MetricsLibrary *metricsLibrary = nullptr; // Mocked metrics library api version. // We cannot use a static instance here since the gtest validates memory usage, // and mocked functions will stay in memory longer than the test. static MockMetricsLibraryApi *g_mockApi; }; template <> struct Mock : public MetricQueryPool { Mock(); ~Mock() override; MOCK_METHOD(ze_result_t, metricQueryCreate, (uint32_t, zet_metric_query_handle_t *), (override)); MOCK_METHOD(ze_result_t, destroy, (), (override)); }; template <> struct Mock : public MetricQuery { Mock(); ~Mock() override; MOCK_METHOD(ze_result_t, getData, (size_t *, uint8_t *), (override)); MOCK_METHOD(ze_result_t, reset, (), (override)); MOCK_METHOD(ze_result_t, destroy, (), (override)); }; class MetricContextFixture : public ContextFixture { protected: void SetUp(); void TearDown(); void openMetricsAdapter(); void openMetricsAdapterGroup(); public: // Mocked objects. std::unique_ptr> mockMetricEnumeration = nullptr; std::unique_ptr> mockMetricsLibrary = nullptr; // Mocked metrics library/discovery APIs. MockMetricsLibraryApi mockMetricsLibraryApi = {}; MockMetricsDiscoveryApi mockMetricsDiscoveryApi = {}; // Metrics discovery device Mock adapterGroup; Mock adapter; Mock metricsDevice; MetricsDiscovery::TMetricsDeviceParams_1_2 metricsDeviceParams = {}; }; class MetricMultiDeviceFixture : public MultiDeviceFixture { protected: void SetUp(); void TearDown(); void openMetricsAdapter(); void openMetricsAdapterSubDevice(uint32_t subDeviceIndex); void openMetricsAdapterGroup(); public: std::vector devices; // Mocked objects. std::unique_ptr> mockMetricEnumeration = nullptr; std::unique_ptr> mockMetricsLibrary = nullptr; std::vector>> mockMetricEnumerationSubDevices; std::vector>> mockMetricsLibrarySubDevices; // Mocked metrics library/discovery APIs. MockMetricsLibraryApi mockMetricsLibraryApi = {}; MockMetricsDiscoveryApi mockMetricsDiscoveryApi = {}; // Metrics discovery device Mock adapterGroup; Mock adapter; Mock metricsDevice; MetricsDiscovery::TMetricsDeviceParams_1_2 metricsDeviceParams = {}; }; class MetricStreamerMultiDeviceFixture : public MetricMultiDeviceFixture { public: void cleanup(zet_device_handle_t &hDevice, zet_metric_streamer_handle_t &hStreamer); }; } // namespace ult } // namespace L0 mock_metric_oa_enumeration.cpp000066400000000000000000000250541422164147700352050ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/metrics/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/metrics/metric_oa_enumeration_imp.h" #include "level_zero/tools/source/metrics/metric_oa_source.h" #include "level_zero/tools/test/unit_tests/sources/metrics/mock_metric_oa.h" namespace L0 { namespace ult { Mock::Mock(::L0::OaMetricSourceImp &metricSource) : MetricEnumeration(metricSource) { } Mock::~Mock() { } MockMetricsDiscoveryApi *Mock::g_mockApi = nullptr; TCompletionCode MockMetricsDiscoveryApi::OpenAdapterGroup(IAdapterGroupLatest **group) { return Mock::g_mockApi->MockOpenAdapterGroup((IAdapterGroup_1_9 **)group); } TCompletionCode MockMetricsDiscoveryApi::OpenMetricsDeviceFromFile(const char *fileName, void *openParams, IMetricsDeviceLatest **device) { return Mock::g_mockApi->MockOpenMetricsDeviceFromFile(fileName, openParams, (IMetricsDevice_1_5 **)device); } TCompletionCode MockMetricsDiscoveryApi::CloseMetricsDevice(IMetricsDeviceLatest *device) { return Mock::g_mockApi->MockCloseMetricsDevice((IMetricsDevice_1_5 *)device); } TCompletionCode MockMetricsDiscoveryApi::SaveMetricsDeviceToFile(const char *fileName, void *saveParams, IMetricsDeviceLatest *device) { return Mock::g_mockApi->MockSaveMetricsDeviceToFile(fileName, saveParams, (IMetricsDevice_1_5 *)device); } void Mock::setMockedApi(MockMetricsDiscoveryApi *mockedApi) { if (mockedApi) { // Mock class used to communicate with metrics library. metricEnumeration = &metricSource.getMetricEnumeration(); metricSource.setMetricEnumeration(*this); // Mock metrics library api functions. openAdapterGroup = mockedApi->OpenAdapterGroup; // Mock metrics library api. Mock::g_mockApi = mockedApi; } else { // Restore an original class used to communicate with metrics library. metricSource.setMetricEnumeration(*metricEnumeration); } } } // namespace ult } // namespace L0 namespace MetricsDiscovery { IMetricsDevice_1_0::~IMetricsDevice_1_0() {} TMetricsDeviceParams_1_0 *IMetricsDevice_1_0::GetParams(void) { UNRECOVERABLE_IF(true); return nullptr; } IConcurrentGroup_1_0 *IMetricsDevice_1_0::GetConcurrentGroup(uint32_t) { UNRECOVERABLE_IF(true); return nullptr; } TGlobalSymbol_1_0 *IMetricsDevice_1_0::GetGlobalSymbol(uint32_t) { UNRECOVERABLE_IF(true); return nullptr; } TTypedValue_1_0 *IMetricsDevice_1_0::GetGlobalSymbolValueByName(const char *name) { UNRECOVERABLE_IF(true); return nullptr; } TCompletionCode IMetricsDevice_1_0::GetLastError(void) { UNRECOVERABLE_IF(true); return TCompletionCode::CC_ERROR_NOT_SUPPORTED; } TCompletionCode IMetricsDevice_1_0::GetGpuCpuTimestamps(uint64_t *, uint64_t *, uint32_t *) { UNRECOVERABLE_IF(true); return TCompletionCode::CC_ERROR_NOT_SUPPORTED; } IConcurrentGroup_1_1 *IMetricsDevice_1_1::GetConcurrentGroup(uint32_t) { UNRECOVERABLE_IF(true); return nullptr; } TMetricsDeviceParams_1_2 *MetricsDiscovery::IMetricsDevice_1_2::GetParams(void) { UNRECOVERABLE_IF(true); return nullptr; } IOverride_1_2 *IMetricsDevice_1_2::GetOverride(unsigned int) { UNRECOVERABLE_IF(true); return nullptr; } IOverride_1_2 *IMetricsDevice_1_2::GetOverrideByName(char const *) { UNRECOVERABLE_IF(true); return nullptr; } IConcurrentGroup_1_5 *IMetricsDevice_1_5::GetConcurrentGroup(uint32_t) { UNRECOVERABLE_IF(true); return nullptr; } IConcurrentGroup_1_0::~IConcurrentGroup_1_0() {} TConcurrentGroupParams_1_0 *IConcurrentGroup_1_0::GetParams(void) { UNRECOVERABLE_IF(true); return nullptr; } IMetricSet_1_0 *IConcurrentGroup_1_0::GetMetricSet(uint32_t index) { UNRECOVERABLE_IF(true); return nullptr; } TCompletionCode IConcurrentGroup_1_0::OpenIoStream(IMetricSet_1_0 *metricSet, uint32_t processId, uint32_t *nsTimerPeriod, uint32_t *oaBufferSize) { UNRECOVERABLE_IF(true); return CC_ERROR_NOT_SUPPORTED; } TCompletionCode IConcurrentGroup_1_0::ReadIoStream(uint32_t *reportsCount, char *reportData, uint32_t readFlags) { UNRECOVERABLE_IF(true); return CC_ERROR_NOT_SUPPORTED; } TCompletionCode IConcurrentGroup_1_0::CloseIoStream(void) { UNRECOVERABLE_IF(true); return CC_ERROR_NOT_SUPPORTED; } TCompletionCode IConcurrentGroup_1_0::WaitForReports(uint32_t milliseconds) { UNRECOVERABLE_IF(true); return CC_ERROR_NOT_SUPPORTED; } IInformation_1_0 *IConcurrentGroup_1_0::GetIoMeasurementInformation(uint32_t index) { UNRECOVERABLE_IF(true); return nullptr; } IInformation_1_0 *IConcurrentGroup_1_0::GetIoGpuContextInformation(uint32_t index) { UNRECOVERABLE_IF(true); return nullptr; } IMetricSet_1_1 *IConcurrentGroup_1_1::GetMetricSet(uint32_t index) { UNRECOVERABLE_IF(true); return nullptr; } TCompletionCode IConcurrentGroup_1_3::SetIoStreamSamplingType(TSamplingType type) { UNRECOVERABLE_IF(true); return CC_ERROR_NOT_SUPPORTED; } IMetricSet_1_5 *IConcurrentGroup_1_5::GetMetricSet(uint32_t index) { UNRECOVERABLE_IF(true); return nullptr; } IMetricSet_1_0::~IMetricSet_1_0() {} TMetricSetParams_1_0 *IMetricSet_1_0::GetParams(void) { UNRECOVERABLE_IF(true); return nullptr; } IMetric_1_0 *IMetricSet_1_0::GetMetric(uint32_t index) { UNRECOVERABLE_IF(true); return nullptr; } IInformation_1_0 *IMetricSet_1_0::GetInformation(uint32_t index) { UNRECOVERABLE_IF(true); return nullptr; } IMetricSet_1_0 *IMetricSet_1_0::GetComplementaryMetricSet(uint32_t index) { UNRECOVERABLE_IF(true); return nullptr; } TCompletionCode IMetricSet_1_0::Activate(void) { UNRECOVERABLE_IF(true); return CC_ERROR_NOT_SUPPORTED; } TCompletionCode IMetricSet_1_0::Deactivate(void) { UNRECOVERABLE_IF(true); return CC_ERROR_NOT_SUPPORTED; } IMetric_1_0 *IMetricSet_1_0::AddCustomMetric( const char *symbolName, const char *shortName, const char *groupName, const char *longName, const char *dxToOglAlias, uint32_t usageFlagsMask, uint32_t apiMask, TMetricResultType resultType, const char *resultUnits, TMetricType metricType, int64_t loWatermark, int64_t hiWatermark, THwUnitType hwType, const char *ioReadEquation, const char *deltaFunction, const char *queryReadEquation, const char *normalizationEquation, const char *maxValueEquation, const char *signalName) { UNRECOVERABLE_IF(true); return nullptr; } IMetricSet_1_1 ::~IMetricSet_1_1() {} TCompletionCode IMetricSet_1_1::SetApiFiltering(uint32_t apiMask) { UNRECOVERABLE_IF(true); return CC_ERROR_NOT_SUPPORTED; } TCompletionCode IMetricSet_1_1::CalculateMetrics(const unsigned char *rawData, uint32_t rawDataSize, TTypedValue_1_0 *out, uint32_t outSize, uint32_t *outReportCount, bool enableContextFiltering) { UNRECOVERABLE_IF(true); return CC_ERROR_NOT_SUPPORTED; } TCompletionCode IMetricSet_1_1::CalculateIoMeasurementInformation(TTypedValue_1_0 *out, uint32_t outSize) { UNRECOVERABLE_IF(true); return CC_ERROR_NOT_SUPPORTED; } IMetricSet_1_4 ::~IMetricSet_1_4() {} TMetricSetParams_1_4 *IMetricSet_1_4::GetParams(void) { UNRECOVERABLE_IF(true); return nullptr; } IMetricSet_1_5 *IMetricSet_1_5::GetComplementaryMetricSet(uint32_t index) { UNRECOVERABLE_IF(true); return nullptr; } TCompletionCode IMetricSet_1_5::CalculateMetrics(const unsigned char *rawData, uint32_t rawDataSize, TTypedValue_1_0 *out, uint32_t outSize, uint32_t *outReportCount, TTypedValue_1_0 *outMaxValues, uint32_t outMaxValuesSize) { UNRECOVERABLE_IF(true); return CC_ERROR_NOT_SUPPORTED; } IMetric_1_0 ::~IMetric_1_0() {} TMetricParams_1_0 *IMetric_1_0::GetParams() { UNRECOVERABLE_IF(true); return nullptr; } IInformation_1_0 ::~IInformation_1_0() {} TInformationParams_1_0 *IInformation_1_0::GetParams() { UNRECOVERABLE_IF(true); return nullptr; } IAdapter_1_8 *IAdapterGroup_1_8::GetAdapter(uint32_t index) { UNRECOVERABLE_IF(true); return nullptr; } IAdapter_1_6 *IAdapterGroup_1_6::GetAdapter(uint32_t index) { UNRECOVERABLE_IF(true); return nullptr; } IAdapterGroup_1_6 ::~IAdapterGroup_1_6() { } const TAdapterGroupParams_1_6 *IAdapterGroup_1_6::GetParams(void) const { UNRECOVERABLE_IF(true); return nullptr; } IAdapter_1_9 *IAdapterGroup_1_9::GetAdapter(uint32_t index) { UNRECOVERABLE_IF(true); return nullptr; } TCompletionCode IAdapterGroup_1_6::Close() { UNRECOVERABLE_IF(true); return CC_ERROR_NOT_SUPPORTED; } IAdapter_1_6 ::~IAdapter_1_6() {} const TAdapterParams_1_6 *IAdapter_1_6 ::GetParams(void) const { UNRECOVERABLE_IF(true); return nullptr; } const TAdapterParams_1_8 *IAdapter_1_8::GetParams(void) const { UNRECOVERABLE_IF(true); return nullptr; } const TAdapterParams_1_9 *IAdapter_1_9::GetParams(void) const { UNRECOVERABLE_IF(true); return nullptr; } TCompletionCode IAdapter_1_6 ::Reset() { UNRECOVERABLE_IF(true); return CC_ERROR_NOT_SUPPORTED; } TCompletionCode IAdapter_1_6 ::OpenMetricsDevice(IMetricsDevice_1_5 **metricsDevice) { UNRECOVERABLE_IF(true); return CC_ERROR_NOT_SUPPORTED; } TCompletionCode IAdapter_1_6 ::OpenMetricsDeviceFromFile(const char *fileName, void *openParams, IMetricsDevice_1_5 **metricsDevice) { UNRECOVERABLE_IF(true); return CC_ERROR_NOT_SUPPORTED; } TCompletionCode IAdapter_1_6 ::CloseMetricsDevice(IMetricsDevice_1_5 *metricsDevice) { UNRECOVERABLE_IF(true); return CC_ERROR_NOT_SUPPORTED; } TCompletionCode IAdapter_1_6 ::SaveMetricsDeviceToFile(const char *fileName, void *saveParams, IMetricsDevice_1_5 *metricsDevice) { UNRECOVERABLE_IF(true); return CC_ERROR_NOT_SUPPORTED; } const TSubDeviceParams_1_9 *IAdapter_1_9::GetSubDeviceParams(const uint32_t subDeviceIndex) { UNRECOVERABLE_IF(true); return nullptr; } const TEngineParams_1_9 *IAdapter_1_9::GetEngineParams(const uint32_t subDeviceIndex, const uint32_t engineIndex) { UNRECOVERABLE_IF(true); return nullptr; } TCompletionCode IAdapter_1_9::OpenMetricsSubDevice(const uint32_t subDeviceIndex, IMetricsDevice_1_5 **metricsDevice) { UNRECOVERABLE_IF(true); return CC_ERROR_NOT_SUPPORTED; } TCompletionCode IAdapter_1_9::OpenMetricsSubDeviceFromFile(const uint32_t subDeviceIndex, const char *fileName, void *openParams, IMetricsDevice_1_5 **metricsDevice) { UNRECOVERABLE_IF(true); return CC_ERROR_NOT_SUPPORTED; } } // namespace MetricsDiscovery mock_metric_oa_enumeration.h000066400000000000000000000245761422164147700346620ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/metrics/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/device/device.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mock.h" #include "level_zero/core/test/unit_tests/white_box.h" #include "level_zero/tools/source/metrics/metric.h" #include "level_zero/tools/source/metrics/metric_oa_enumeration_imp.h" #include "level_zero/tools/source/metrics/metric_oa_query_imp.h" namespace L0 { namespace ult { template <> struct WhiteBox<::L0::MetricGroup> : public ::L0::MetricGroup { using BaseClass = ::L0::MetricGroup; }; using MetricGroup = WhiteBox<::L0::MetricGroup>; using MetricsDiscovery::IAdapter_1_6; using MetricsDiscovery::IAdapter_1_8; using MetricsDiscovery::IAdapter_1_9; using MetricsDiscovery::IAdapterGroup_1_6; using MetricsDiscovery::IAdapterGroup_1_8; using MetricsDiscovery::IAdapterGroup_1_9; using MetricsDiscovery::IAdapterGroupLatest; using MetricsDiscovery::IConcurrentGroup_1_5; using MetricsDiscovery::IInformation_1_0; using MetricsDiscovery::IMetric_1_0; using MetricsDiscovery::IMetricsDevice_1_5; using MetricsDiscovery::IMetricsDeviceLatest; using MetricsDiscovery::IMetricSet_1_0; using MetricsDiscovery::IMetricSet_1_5; using MetricsDiscovery::IOverride_1_2; using MetricsDiscovery::TAdapterGroupParams_1_6; using MetricsDiscovery::TAdapterParams_1_6; using MetricsDiscovery::TAdapterParams_1_8; using MetricsDiscovery::TAdapterParams_1_9; using MetricsDiscovery::TCompletionCode; using MetricsDiscovery::TConcurrentGroupParams_1_0; using MetricsDiscovery::TEngineParams_1_9; using MetricsDiscovery::TGlobalSymbol_1_0; using MetricsDiscovery::TMetricParams_1_0; using MetricsDiscovery::TMetricsDeviceParams_1_2; using MetricsDiscovery::TMetricSetParams_1_4; using MetricsDiscovery::TSamplingType; using MetricsDiscovery::TSubDeviceParams_1_9; using MetricsDiscovery::TTypedValue_1_0; struct MockMetricsDiscoveryApi { // Original api functions. static TCompletionCode MD_STDCALL OpenMetricsDeviceFromFile(const char *fileName, void *openParams, IMetricsDeviceLatest **device); static TCompletionCode MD_STDCALL CloseMetricsDevice(IMetricsDeviceLatest *device); static TCompletionCode MD_STDCALL SaveMetricsDeviceToFile(const char *fileName, void *saveParams, IMetricsDeviceLatest *device); static TCompletionCode MD_STDCALL OpenAdapterGroup(IAdapterGroupLatest **adapterGroup); // Mocked api functions. MOCK_METHOD(TCompletionCode, MockOpenMetricsDeviceFromFile, (const char *, void *, IMetricsDevice_1_5 **)); MOCK_METHOD(TCompletionCode, MockCloseMetricsDevice, (IMetricsDevice_1_5 *)); MOCK_METHOD(TCompletionCode, MockSaveMetricsDeviceToFile, (const char *, void *, IMetricsDevice_1_5 *)); MOCK_METHOD(TCompletionCode, MockOpenAdapterGroup, (IAdapterGroup_1_9 **)); }; template <> class Mock : public IAdapterGroup_1_9 { public: Mock(){}; MOCK_METHOD(IAdapter_1_9 *, GetAdapter, (uint32_t), (override)); MOCK_METHOD(const TAdapterGroupParams_1_6 *, GetParams, (), (const, override)); MOCK_METHOD(TCompletionCode, Close, (), (override)); }; template <> class Mock : public IAdapter_1_9 { public: Mock(){}; // 1.9 MOCK_METHOD(const TSubDeviceParams_1_9 *, GetSubDeviceParams, (const uint32_t subDeviceIndex), (override)); MOCK_METHOD(const TEngineParams_1_9 *, GetEngineParams, (const uint32_t subDeviceIndex, const uint32_t engineIndex), (override)); MOCK_METHOD(TCompletionCode, OpenMetricsSubDevice, (const uint32_t subDeviceIndex, IMetricsDevice_1_5 **metricsDevice), (override)); MOCK_METHOD(TCompletionCode, OpenMetricsSubDeviceFromFile, (const uint32_t subDeviceIndex, const char *fileName, void *openParams, IMetricsDevice_1_5 **metricsDevice), (override)); MOCK_METHOD(const TAdapterParams_1_9 *, GetParams, (), (const, override)); MOCK_METHOD(TCompletionCode, Reset, (), (override)); MOCK_METHOD(TCompletionCode, OpenMetricsDevice, (IMetricsDevice_1_5 **), (override)); MOCK_METHOD(TCompletionCode, OpenMetricsDeviceFromFile, (const char *, void *, IMetricsDevice_1_5 **), (override)); MOCK_METHOD(TCompletionCode, CloseMetricsDevice, (IMetricsDevice_1_5 *), (override)); MOCK_METHOD(TCompletionCode, SaveMetricsDeviceToFile, (const char *, void *, IMetricsDevice_1_5 *), (override)); }; template <> class Mock : public IMetricsDevice_1_5 { public: Mock(){}; MOCK_METHOD(TMetricsDeviceParams_1_2 *, GetParams, (), (override)); MOCK_METHOD(IOverride_1_2 *, GetOverride, (uint32_t index), (override)); MOCK_METHOD(IOverride_1_2 *, GetOverrideByName, (const char *symbolName), (override)); MOCK_METHOD(IConcurrentGroup_1_5 *, GetConcurrentGroup, (uint32_t index), (override)); MOCK_METHOD(TGlobalSymbol_1_0 *, GetGlobalSymbol, (uint32_t index), (override)); MOCK_METHOD(TTypedValue_1_0 *, GetGlobalSymbolValueByName, (const char *name), (override)); MOCK_METHOD(TCompletionCode, GetLastError, (), (override)); MOCK_METHOD(TCompletionCode, GetGpuCpuTimestamps, (uint64_t * gpuTimestampNs, uint64_t *cpuTimestampNs, uint32_t *cpuId), (override)); }; template <> class Mock : public IConcurrentGroup_1_5 { public: Mock(){}; MOCK_METHOD(IMetricSet_1_5 *, GetMetricSet, (uint32_t index), (override)); MOCK_METHOD(TConcurrentGroupParams_1_0 *, GetParams, (), (override)); MOCK_METHOD(TCompletionCode, OpenIoStream, (IMetricSet_1_0 * metricSet, uint32_t processId, uint32_t *nsTimerPeriod, uint32_t *oaBufferSize), (override)); MOCK_METHOD(TCompletionCode, ReadIoStream, (uint32_t * reportsCount, char *reportData, uint32_t readFlags), (override)); MOCK_METHOD(TCompletionCode, CloseIoStream, (), (override)); MOCK_METHOD(TCompletionCode, WaitForReports, (uint32_t milliseconds), (override)); MOCK_METHOD(TCompletionCode, SetIoStreamSamplingType, (TSamplingType type), (override)); MOCK_METHOD(IInformation_1_0 *, GetIoMeasurementInformation, (uint32_t index), (override)); MOCK_METHOD(IInformation_1_0 *, GetIoGpuContextInformation, (uint32_t index), (override)); }; template <> class Mock : public IMetricSet_1_5 { public: Mock(){}; MOCK_METHOD(TMetricSetParams_1_4 *, GetParams, (), (override)); MOCK_METHOD(IMetric_1_0 *, GetMetric, (uint32_t index), (override)); MOCK_METHOD(IInformation_1_0 *, GetInformation, (uint32_t index), (override)); MOCK_METHOD(TCompletionCode, Activate, (), (override)); MOCK_METHOD(TCompletionCode, Deactivate, (), (override)); MOCK_METHOD(TCompletionCode, SetApiFiltering, (uint32_t apiMask), (override)); MOCK_METHOD(TCompletionCode, CalculateMetrics, (const unsigned char *rawData, uint32_t rawDataSize, TTypedValue_1_0 *out, uint32_t outSize, uint32_t *outReportCount, bool enableContextFiltering), (override)); MOCK_METHOD(TCompletionCode, CalculateIoMeasurementInformation, (TTypedValue_1_0 * out, uint32_t outSize), (override)); MOCK_METHOD(IMetricSet_1_5 *, GetComplementaryMetricSet, (uint32_t index), (override)); MOCK_METHOD(TCompletionCode, CalculateMetrics, (const unsigned char *rawData, uint32_t rawDataSize, TTypedValue_1_0 *out, uint32_t outSize, uint32_t *outReportCount, TTypedValue_1_0 *outMaxValues, uint32_t outMaxValuesSize), (override)); }; template <> class Mock : public IMetric_1_0 { public: Mock(){}; MOCK_METHOD(TMetricParams_1_0 *, GetParams, (), (override)); }; template <> class Mock : public IInformation_1_0 { public: Mock(){}; MOCK_METHOD(MetricsDiscovery::TInformationParams_1_0 *, GetParams, (), (override)); }; template <> struct Mock : public MetricEnumeration { Mock(::L0::OaMetricSourceImp &metricSource); ~Mock() override; using MetricEnumeration::cleanupMetricsDiscovery; using MetricEnumeration::hMetricsDiscovery; using MetricEnumeration::initializationState; using MetricEnumeration::openAdapterGroup; using MetricEnumeration::openMetricsDiscovery; // Api mock enable/disable. void setMockedApi(MockMetricsDiscoveryApi *mockedApi); // Mock metric enumeration functions. MOCK_METHOD(bool, isInitialized, (), (override)); MOCK_METHOD(ze_result_t, loadMetricsDiscovery, (), (override)); MOCK_METHOD(MetricsDiscovery::IAdapter_1_9 *, getMetricsAdapter, (), (override)); MOCK_METHOD(bool, getAdapterId, (uint32_t & drmMajor, uint32_t &drmMinor), (override)); // Not mocked metrics enumeration functions. bool baseIsInitialized() { return MetricEnumeration::isInitialized(); } IAdapter_1_9 *baseGetMetricsAdapter() { return MetricEnumeration::getMetricsAdapter(); } bool baseGetAdapterId(uint32_t &adapterMajor, uint32_t &adapterMinor) { return MetricEnumeration::getAdapterId(adapterMajor, adapterMinor); } ze_result_t baseLoadMetricsDiscovery() { return MetricEnumeration::loadMetricsDiscovery(); } // Mock metrics discovery api. static MockMetricsDiscoveryApi *g_mockApi; // Original metric enumeration obtained from metric context. ::L0::MetricEnumeration *metricEnumeration = nullptr; }; template <> struct Mock : public OaMetricGroupImp { Mock() {} MOCK_METHOD(ze_result_t, metricGet, (uint32_t *, zet_metric_handle_t *), (override)); MOCK_METHOD(ze_result_t, calculateMetricValues, (const zet_metric_group_calculation_type_t, size_t, const uint8_t *, uint32_t *, zet_typed_value_t *), (override)); MOCK_METHOD(ze_result_t, calculateMetricValuesExp, (const zet_metric_group_calculation_type_t, size_t, const uint8_t *, uint32_t *, uint32_t *, uint32_t *, zet_typed_value_t *), (override)); MOCK_METHOD(ze_result_t, getProperties, (zet_metric_group_properties_t * properties), (override)); MOCK_METHOD(bool, activate, (), (override)); MOCK_METHOD(bool, deactivate, (), (override)); zet_metric_group_handle_t getMetricGroupForSubDevice(const uint32_t subDeviceIndex) override { return nullptr; } MOCK_METHOD(ze_result_t, waitForReports, (const uint32_t)); MOCK_METHOD(ze_result_t, openIoStream, (uint32_t &, uint32_t &)); MOCK_METHOD(ze_result_t, readIoStream, (uint32_t &, uint8_t &)); MOCK_METHOD(ze_result_t, closeIoStream, ()); }; struct MetricGroupImpTest : public OaMetricGroupImp { using OaMetricGroupImp::copyValue; using OaMetricGroupImp::pReferenceConcurrentGroup; using OaMetricGroupImp::pReferenceMetricSet; }; } // namespace ult } // namespace L0 test_metric_ip_sampling_enumeration.cpp000066400000000000000000000340771422164147700371430ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/metrics/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/tools/source/metrics/metric_ip_sampling_source.h" #include "level_zero/tools/source/metrics/metric_oa_source.h" #include "level_zero/tools/source/metrics/os_metric_ip_sampling.h" #include "level_zero/tools/test/unit_tests/sources/metrics/metric_ip_sampling_fixture.h" #include "level_zero/tools/test/unit_tests/sources/metrics/mock_metric_ip_sampling.h" #include namespace L0 { extern _ze_driver_handle_t *GlobalDriverHandle; namespace ult { using MetricIpSamplingEnumerationTest = MetricIpSamplingFixture; TEST_F(MetricIpSamplingEnumerationTest, GivenDependenciesAvailableWhenInititializingThenSuccessIsReturned) { EXPECT_EQ(ZE_RESULT_SUCCESS, testDevices[0]->getMetricDeviceContext().enableMetricApi()); for (auto device : testDevices) { auto &metricSource = device->getMetricDeviceContext().getMetricSource(); EXPECT_TRUE(metricSource.isAvailable()); } } TEST_F(MetricIpSamplingEnumerationTest, GivenDependenciesUnAvailableForRootDeviceWhenInititializingThenFailureIsReturned) { osInterfaceVector[0]->isDependencyAvailableReturn = false; EXPECT_EQ(ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE, testDevices[0]->getMetricDeviceContext().enableMetricApi()); } TEST_F(MetricIpSamplingEnumerationTest, GivenDependenciesUnAvailableForSubDeviceWhenInititializingThenFailureIsReturned) { osInterfaceVector[1]->isDependencyAvailableReturn = false; EXPECT_EQ(ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE, testDevices[0]->getMetricDeviceContext().enableMetricApi()); auto &metricSource = testDevices[0]->getMetricDeviceContext().getMetricSource(); EXPECT_TRUE(metricSource.isAvailable()); auto &metricSource0 = testDevices[1]->getMetricDeviceContext().getMetricSource(); EXPECT_FALSE(metricSource0.isAvailable()); auto &metricSource1 = testDevices[2]->getMetricDeviceContext().getMetricSource(); EXPECT_TRUE(metricSource1.isAvailable()); } TEST_F(MetricIpSamplingEnumerationTest, GivenDependenciesAvailableWhenMetricGroupGetIsCalledThenValidMetricGroupIsReturned) { EXPECT_EQ(ZE_RESULT_SUCCESS, testDevices[0]->getMetricDeviceContext().enableMetricApi()); for (auto device : testDevices) { uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); std::vector metricGroups; metricGroups.resize(metricGroupCount); EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, metricGroups.data()), ZE_RESULT_SUCCESS); EXPECT_NE(metricGroups[0], nullptr); } } TEST_F(MetricIpSamplingEnumerationTest, GivenDependenciesAvailableWhenMetricGroupGetIsCalledMultipleTimesThenValidMetricGroupIsReturned) { EXPECT_EQ(ZE_RESULT_SUCCESS, testDevices[0]->getMetricDeviceContext().enableMetricApi()); for (auto device : testDevices) { uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); std::vector metricGroups; metricGroups.resize(metricGroupCount); EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, metricGroups.data()), ZE_RESULT_SUCCESS); EXPECT_NE(metricGroups[0], nullptr); EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, metricGroups.data()), ZE_RESULT_SUCCESS); EXPECT_NE(metricGroups[0], nullptr); } } TEST_F(MetricIpSamplingEnumerationTest, GivenDependenciesAvailableWhenMetricGroupGetIsCalledThenMetricGroupWithCorrectPropertiesIsReturned) { EXPECT_EQ(ZE_RESULT_SUCCESS, testDevices[0]->getMetricDeviceContext().enableMetricApi()); for (auto device : testDevices) { uint32_t metricGroupCount = 0; zetMetricGroupGet(device->toHandle(), &metricGroupCount, nullptr); EXPECT_EQ(metricGroupCount, 1u); std::vector metricGroups; metricGroups.resize(metricGroupCount); ASSERT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, metricGroups.data()), ZE_RESULT_SUCCESS); ASSERT_NE(metricGroups[0], nullptr); zet_metric_group_properties_t metricGroupProperties; EXPECT_EQ(zetMetricGroupGetProperties(metricGroups[0], &metricGroupProperties), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupProperties.domain, 100u); EXPECT_EQ(metricGroupProperties.samplingType, ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_TIME_BASED); EXPECT_EQ(metricGroupProperties.metricCount, 10u); EXPECT_EQ(strcmp(metricGroupProperties.description, "EU stall sampling"), 0); EXPECT_EQ(strcmp(metricGroupProperties.name, "EuStallSampling"), 0); } } TEST_F(MetricIpSamplingEnumerationTest, GivenDependenciesAvailableWhenMetricGroupGetIsCalledThenCorrectMetricsAreReturned) { struct MetricProperties { const char *name; const char *description; const char *component; uint32_t tierNumber; zet_metric_type_t metricType; zet_value_type_t resultType; const char *resultUnits; }; std::vector expectedProperties = { {"IP", "IP address", "XVE", 4, ZET_METRIC_TYPE_IP_EXP, ZET_VALUE_TYPE_UINT64, "Address"}, {"Active", "Active cycles", "XVE", 4, ZET_METRIC_TYPE_EVENT, ZET_VALUE_TYPE_UINT64, "Events"}, {"ControlStall", "Stall on control", "XVE", 4, ZET_METRIC_TYPE_EVENT, ZET_VALUE_TYPE_UINT64, "Events"}, {"PipeStall", "Stall on pipe", "XVE", 4, ZET_METRIC_TYPE_EVENT, ZET_VALUE_TYPE_UINT64, "Events"}, {"SendStall", "Stall on send", "XVE", 4, ZET_METRIC_TYPE_EVENT, ZET_VALUE_TYPE_UINT64, "Events"}, {"DistStall", "Stall on distance", "XVE", 4, ZET_METRIC_TYPE_EVENT, ZET_VALUE_TYPE_UINT64, "Events"}, {"SbidStall", "Stall on scoreboard", "XVE", 4, ZET_METRIC_TYPE_EVENT, ZET_VALUE_TYPE_UINT64, "Events"}, {"SyncStall", "Stall on sync", "XVE", 4, ZET_METRIC_TYPE_EVENT, ZET_VALUE_TYPE_UINT64, "Events"}, {"InstrFetchStall", "Stall on instruction fetch", "XVE", 4, ZET_METRIC_TYPE_EVENT, ZET_VALUE_TYPE_UINT64, "Events"}, {"OtherStall", "Stall on other condition", "XVE", 4, ZET_METRIC_TYPE_EVENT, ZET_VALUE_TYPE_UINT64, "Events"}, }; EXPECT_EQ(ZE_RESULT_SUCCESS, testDevices[0]->getMetricDeviceContext().enableMetricApi()); for (auto device : testDevices) { uint32_t metricGroupCount = 0; zetMetricGroupGet(device->toHandle(), &metricGroupCount, nullptr); EXPECT_EQ(metricGroupCount, 1u); std::vector metricGroups; metricGroups.resize(metricGroupCount); zetMetricGroupGet(device->toHandle(), &metricGroupCount, metricGroups.data()); ASSERT_NE(metricGroups[0], nullptr); zet_metric_group_properties_t metricGroupProperties; zetMetricGroupGetProperties(metricGroups[0], &metricGroupProperties); uint32_t metricCount = 0; std::vector metricHandles = {}; metricHandles.resize(metricGroupProperties.metricCount); EXPECT_EQ(zetMetricGet(metricGroups[0], &metricCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricCount, metricGroupProperties.metricCount); EXPECT_EQ(zetMetricGet(metricGroups[0], &metricCount, metricHandles.data()), ZE_RESULT_SUCCESS); std::vector::iterator propertiesIter = expectedProperties.begin(); zet_metric_properties_t ipSamplingMetricProperties = {}; for (auto &metricHandle : metricHandles) { EXPECT_EQ(zetMetricGetProperties(metricHandle, &ipSamplingMetricProperties), ZE_RESULT_SUCCESS); EXPECT_EQ(strcmp(ipSamplingMetricProperties.name, propertiesIter->name), 0); EXPECT_EQ(strcmp(ipSamplingMetricProperties.description, propertiesIter->description), 0); EXPECT_EQ(strcmp(ipSamplingMetricProperties.component, propertiesIter->component), 0); EXPECT_EQ(ipSamplingMetricProperties.tierNumber, propertiesIter->tierNumber); EXPECT_EQ(ipSamplingMetricProperties.metricType, propertiesIter->metricType); EXPECT_EQ(ipSamplingMetricProperties.resultType, propertiesIter->resultType); EXPECT_EQ(strcmp(ipSamplingMetricProperties.resultUnits, propertiesIter->resultUnits), 0); propertiesIter++; } } } TEST_F(MetricIpSamplingEnumerationTest, GivenEnumerationIsSuccessfulThenDummyActivationAndDeActivationHappens) { EXPECT_EQ(ZE_RESULT_SUCCESS, testDevices[0]->getMetricDeviceContext().enableMetricApi()); for (auto device : testDevices) { uint32_t metricGroupCount = 0; zetMetricGroupGet(device->toHandle(), &metricGroupCount, nullptr); std::vector metricGroups; metricGroups.resize(metricGroupCount); ASSERT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, metricGroups.data()), ZE_RESULT_SUCCESS); ASSERT_NE(metricGroups[0], nullptr); zet_metric_group_properties_t metricGroupProperties; EXPECT_EQ(zetMetricGroupGetProperties(metricGroups[0], &metricGroupProperties), ZE_RESULT_SUCCESS); EXPECT_EQ(strcmp(metricGroupProperties.name, "EuStallSampling"), 0); EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device->toHandle(), 1, &metricGroups[0]), ZE_RESULT_SUCCESS); static_cast(device)->activateMetricGroups(); EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device->toHandle(), 0, nullptr), ZE_RESULT_SUCCESS); } } TEST_F(MetricIpSamplingEnumerationTest, GivenEnumerationIsSuccessfulWhenCalculateMultipleMetricValuesExpIsCalledThenUnsupportedFeatureIsReturned) { EXPECT_EQ(ZE_RESULT_SUCCESS, testDevices[0]->getMetricDeviceContext().enableMetricApi()); for (auto device : testDevices) { uint32_t metricGroupCount = 0; zetMetricGroupGet(device->toHandle(), &metricGroupCount, nullptr); std::vector metricGroups; metricGroups.resize(metricGroupCount); ASSERT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, metricGroups.data()), ZE_RESULT_SUCCESS); ASSERT_NE(metricGroups[0], nullptr); zet_metric_group_properties_t metricGroupProperties; EXPECT_EQ(zetMetricGroupGetProperties(metricGroups[0], &metricGroupProperties), ZE_RESULT_SUCCESS); EXPECT_EQ(strcmp(metricGroupProperties.name, "EuStallSampling"), 0); EXPECT_EQ(zetMetricGroupCalculateMultipleMetricValuesExp(metricGroups[0], ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, 0x0, nullptr, nullptr, nullptr, nullptr, nullptr), ZE_RESULT_ERROR_UNSUPPORTED_FEATURE); } } TEST_F(MetricIpSamplingEnumerationTest, GivenEnumerationIsSuccessfulWhenCalculateMetricValuesIsCalledThenUnsupportedFeatureIsReturned) { EXPECT_EQ(ZE_RESULT_SUCCESS, testDevices[0]->getMetricDeviceContext().enableMetricApi()); for (auto device : testDevices) { uint32_t metricGroupCount = 0; zetMetricGroupGet(device->toHandle(), &metricGroupCount, nullptr); std::vector metricGroups; metricGroups.resize(metricGroupCount); ASSERT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, metricGroups.data()), ZE_RESULT_SUCCESS); ASSERT_NE(metricGroups[0], nullptr); zet_metric_group_properties_t metricGroupProperties; EXPECT_EQ(zetMetricGroupGetProperties(metricGroups[0], &metricGroupProperties), ZE_RESULT_SUCCESS); EXPECT_EQ(strcmp(metricGroupProperties.name, "EuStallSampling"), 0); EXPECT_EQ(zetMetricGroupCalculateMetricValues(metricGroups[0], ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, 0, nullptr, nullptr, nullptr), ZE_RESULT_ERROR_UNSUPPORTED_FEATURE); } } TEST_F(MetricIpSamplingEnumerationTest, GivenEnumerationIsSuccessfulWhenQueryPoolCreateIsCalledThenUnsupportedFeatureIsReturned) { EXPECT_EQ(ZE_RESULT_SUCCESS, testDevices[0]->getMetricDeviceContext().enableMetricApi()); for (auto device : testDevices) { uint32_t metricGroupCount = 0; zetMetricGroupGet(device->toHandle(), &metricGroupCount, nullptr); std::vector metricGroups; metricGroups.resize(metricGroupCount); ASSERT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, metricGroups.data()), ZE_RESULT_SUCCESS); ASSERT_NE(metricGroups[0], nullptr); zet_metric_group_properties_t metricGroupProperties; EXPECT_EQ(zetMetricGroupGetProperties(metricGroups[0], &metricGroupProperties), ZE_RESULT_SUCCESS); EXPECT_EQ(strcmp(metricGroupProperties.name, "EuStallSampling"), 0); zet_metric_query_pool_desc_t poolDesc = {}; poolDesc.stype = ZET_STRUCTURE_TYPE_METRIC_QUERY_POOL_DESC; poolDesc.count = 1; poolDesc.type = ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE; EXPECT_EQ(zetMetricQueryPoolCreate(context->toHandle(), device, metricGroups[0], &poolDesc, nullptr), ZE_RESULT_ERROR_UNSUPPORTED_FEATURE); } } TEST_F(MetricIpSamplingEnumerationTest, GivenEnumerationIsSuccessfulWhenAppendMetricMemoryBarrierIsCalledThenUnsupportedFeatureIsReturned) { EXPECT_EQ(ZE_RESULT_SUCCESS, testDevices[0]->getMetricDeviceContext().enableMetricApi()); auto &device = testDevices[0]; ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); zet_command_list_handle_t commandListHandle = commandList->toHandle(); EXPECT_EQ(zetCommandListAppendMetricMemoryBarrier(commandListHandle), ZE_RESULT_ERROR_UNSUPPORTED_FEATURE); } } // namespace ult } // namespace L0 test_metric_ip_sampling_streamer.cpp000066400000000000000000000412301422164147700364240ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/metrics/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/tools/source/metrics/metric_ip_sampling_source.h" #include "level_zero/tools/source/metrics/metric_oa_source.h" #include "level_zero/tools/source/metrics/os_metric_ip_sampling.h" #include "level_zero/tools/test/unit_tests/sources/metrics/metric_ip_sampling_fixture.h" #include "level_zero/tools/test/unit_tests/sources/metrics/mock_metric_ip_sampling.h" #include namespace L0 { extern _ze_driver_handle_t *GlobalDriverHandle; namespace ult { class MetricIpSamplingStreamerTest : public MetricIpSamplingFixture { public: zet_metric_group_handle_t getMetricGroup(L0::Device *device) { uint32_t metricGroupCount = 0; zetMetricGroupGet(device->toHandle(), &metricGroupCount, nullptr); EXPECT_EQ(metricGroupCount, 1u); std::vector metricGroups; metricGroups.resize(metricGroupCount); zetMetricGroupGet(device->toHandle(), &metricGroupCount, metricGroups.data()); EXPECT_NE(metricGroups[0], nullptr); return metricGroups[0]; } }; TEST_F(MetricIpSamplingStreamerTest, GivenAllInputsAreCorrectWhenStreamerOpenAndCloseAreCalledThenSuccessIsReturned) { EXPECT_EQ(ZE_RESULT_SUCCESS, testDevices[0]->getMetricDeviceContext().enableMetricApi()); for (auto device : testDevices) { if (!device->getNEODevice()->isSubDevice()) { continue; } zet_metric_group_handle_t metricGroupHandle = MetricIpSamplingStreamerTest::getMetricGroup(device); EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device, 1, &metricGroupHandle), ZE_RESULT_SUCCESS); ze_event_handle_t eventHandle = {}; zet_metric_streamer_handle_t streamerHandle = {}; zet_metric_streamer_desc_t streamerDesc = {}; streamerDesc.stype = ZET_STRUCTURE_TYPE_METRIC_STREAMER_DESC; streamerDesc.notifyEveryNReports = 32768; streamerDesc.samplingPeriod = 1000; EXPECT_EQ(zetMetricStreamerOpen(context->toHandle(), device, metricGroupHandle, &streamerDesc, eventHandle, &streamerHandle), ZE_RESULT_SUCCESS); EXPECT_NE(streamerHandle, nullptr); EXPECT_EQ(zetMetricStreamerClose(streamerHandle), ZE_RESULT_SUCCESS); } } TEST_F(MetricIpSamplingStreamerTest, GivenEventHandleIsNullWhenStreamerOpenAndCloseAreCalledThenSuccessIsReturned) { EXPECT_EQ(ZE_RESULT_SUCCESS, testDevices[0]->getMetricDeviceContext().enableMetricApi()); for (auto device : testDevices) { if (!device->getNEODevice()->isSubDevice()) { continue; } zet_metric_group_handle_t metricGroupHandle = MetricIpSamplingStreamerTest::getMetricGroup(device); EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device, 1, &metricGroupHandle), ZE_RESULT_SUCCESS); zet_metric_streamer_handle_t streamerHandle = {}; zet_metric_streamer_desc_t streamerDesc = {}; streamerDesc.stype = ZET_STRUCTURE_TYPE_METRIC_STREAMER_DESC; streamerDesc.notifyEveryNReports = 32768; streamerDesc.samplingPeriod = 1000; EXPECT_EQ(zetMetricStreamerOpen(context->toHandle(), device, metricGroupHandle, &streamerDesc, nullptr, &streamerHandle), ZE_RESULT_SUCCESS); EXPECT_NE(streamerHandle, nullptr); EXPECT_EQ(zetMetricStreamerClose(streamerHandle), ZE_RESULT_SUCCESS); } } TEST_F(MetricIpSamplingStreamerTest, GivenMetricGroupIsNotActivatedWhenStreamerOpenIsCalledThenErrorIsReturned) { EXPECT_EQ(ZE_RESULT_SUCCESS, testDevices[0]->getMetricDeviceContext().enableMetricApi()); for (auto device : testDevices) { if (!device->getNEODevice()->isSubDevice()) { continue; } zet_metric_group_handle_t metricGroupHandle = MetricIpSamplingStreamerTest::getMetricGroup(device); ze_event_handle_t eventHandle = {}; zet_metric_streamer_handle_t streamerHandle = {}; zet_metric_streamer_desc_t streamerDesc = {}; streamerDesc.stype = ZET_STRUCTURE_TYPE_METRIC_STREAMER_DESC; streamerDesc.notifyEveryNReports = 32768; streamerDesc.samplingPeriod = 1000; EXPECT_EQ( zetMetricStreamerOpen(context->toHandle(), device, metricGroupHandle, &streamerDesc, eventHandle, &streamerHandle), ZE_RESULT_NOT_READY); EXPECT_EQ(streamerHandle, nullptr); } } TEST_F(MetricIpSamplingStreamerTest, GivenStreamerIsAlreadyOpenWhenStreamerOpenIsCalledThenErrorIsReturned) { EXPECT_EQ(ZE_RESULT_SUCCESS, testDevices[0]->getMetricDeviceContext().enableMetricApi()); for (auto device : testDevices) { if (!device->getNEODevice()->isSubDevice()) { continue; } zet_metric_group_handle_t metricGroupHandle = MetricIpSamplingStreamerTest::getMetricGroup(device); EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device, 1, &metricGroupHandle), ZE_RESULT_SUCCESS); ze_event_handle_t eventHandle = {}; zet_metric_streamer_handle_t streamerHandle0 = {}, streamerHandle1 = {}; zet_metric_streamer_desc_t streamerDesc = {}; streamerDesc.stype = ZET_STRUCTURE_TYPE_METRIC_STREAMER_DESC; streamerDesc.notifyEveryNReports = 32768; streamerDesc.samplingPeriod = 1000; EXPECT_EQ(zetMetricStreamerOpen(context->toHandle(), device, metricGroupHandle, &streamerDesc, eventHandle, &streamerHandle0), ZE_RESULT_SUCCESS); EXPECT_NE(streamerHandle0, nullptr); EXPECT_EQ( zetMetricStreamerOpen(context->toHandle(), device, metricGroupHandle, &streamerDesc, eventHandle, &streamerHandle1), ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE); EXPECT_EQ(streamerHandle1, nullptr); EXPECT_EQ(zetMetricStreamerClose(streamerHandle0), ZE_RESULT_SUCCESS); } } TEST_F(MetricIpSamplingStreamerTest, GivenStartMeasurementFailsWhenStreamerOpenIsCalledThenErrorIsReturned) { EXPECT_EQ(ZE_RESULT_SUCCESS, testDevices[0]->getMetricDeviceContext().enableMetricApi()); for (std::size_t index = 0; index < testDevices.size(); index++) { auto device = testDevices[index]; osInterfaceVector[index]->startMeasurementReturn = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; if (!device->getNEODevice()->isSubDevice()) { continue; } zet_metric_group_handle_t metricGroupHandle = MetricIpSamplingStreamerTest::getMetricGroup(device); EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device, 1, &metricGroupHandle), ZE_RESULT_SUCCESS); ze_event_handle_t eventHandle = {}; zet_metric_streamer_handle_t streamerHandle = {}; zet_metric_streamer_desc_t streamerDesc = {}; streamerDesc.stype = ZET_STRUCTURE_TYPE_METRIC_STREAMER_DESC; streamerDesc.notifyEveryNReports = 32768; streamerDesc.samplingPeriod = 1000; EXPECT_EQ( zetMetricStreamerOpen(context->toHandle(), device, metricGroupHandle, &streamerDesc, eventHandle, &streamerHandle), osInterfaceVector[index]->startMeasurementReturn); EXPECT_EQ(streamerHandle, nullptr); } } TEST_F(MetricIpSamplingStreamerTest, GivenAllInputsAreCorrectWhenReadDataIsCalledThenSuccessIsReturned) { EXPECT_EQ(ZE_RESULT_SUCCESS, testDevices[0]->getMetricDeviceContext().enableMetricApi()); for (std::size_t index = 0; index < testDevices.size(); index++) { auto device = testDevices[index]; osInterfaceVector[index]->getRequiredBufferSizeReturn = 100; if (!device->getNEODevice()->isSubDevice()) { continue; } zet_metric_group_handle_t metricGroupHandle = MetricIpSamplingStreamerTest::getMetricGroup(device); EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device, 1, &metricGroupHandle), ZE_RESULT_SUCCESS); ze_event_handle_t eventHandle = {}; zet_metric_streamer_handle_t streamerHandle = {}; zet_metric_streamer_desc_t streamerDesc = {}; streamerDesc.stype = ZET_STRUCTURE_TYPE_METRIC_STREAMER_DESC; streamerDesc.notifyEveryNReports = 32768; streamerDesc.samplingPeriod = 1000; EXPECT_EQ(zetMetricStreamerOpen(context->toHandle(), device, metricGroupHandle, &streamerDesc, eventHandle, &streamerHandle), ZE_RESULT_SUCCESS); EXPECT_NE(streamerHandle, nullptr); size_t rawSize = 0; EXPECT_EQ(zetMetricStreamerReadData(streamerHandle, 100, &rawSize, nullptr), ZE_RESULT_SUCCESS); EXPECT_NE(rawSize, 0u); uint8_t rawData = 0; rawSize = 50; EXPECT_EQ(zetMetricStreamerReadData(streamerHandle, 50, &rawSize, &rawData), ZE_RESULT_SUCCESS); EXPECT_EQ(rawSize, 50u); EXPECT_EQ(zetMetricStreamerClose(streamerHandle), ZE_RESULT_SUCCESS); } } TEST_F(MetricIpSamplingStreamerTest, GivenAllInputsAreCorrectWhenReadDataIsCalledWithMaxReportCountUint32MaxThenSuccessIsReturned) { EXPECT_EQ(ZE_RESULT_SUCCESS, testDevices[0]->getMetricDeviceContext().enableMetricApi()); for (std::size_t index = 0; index < testDevices.size(); index++) { auto device = testDevices[index]; osInterfaceVector[index]->getRequiredBufferSizeReturn = 100; if (!device->getNEODevice()->isSubDevice()) { continue; } zet_metric_group_handle_t metricGroupHandle = MetricIpSamplingStreamerTest::getMetricGroup(device); EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device, 1, &metricGroupHandle), ZE_RESULT_SUCCESS); ze_event_handle_t eventHandle = {}; zet_metric_streamer_handle_t streamerHandle = {}; zet_metric_streamer_desc_t streamerDesc = {}; streamerDesc.stype = ZET_STRUCTURE_TYPE_METRIC_STREAMER_DESC; streamerDesc.notifyEveryNReports = 32768; streamerDesc.samplingPeriod = 1000; EXPECT_EQ(zetMetricStreamerOpen(context->toHandle(), device, metricGroupHandle, &streamerDesc, eventHandle, &streamerHandle), ZE_RESULT_SUCCESS); EXPECT_NE(streamerHandle, nullptr); size_t rawSize = 0; EXPECT_EQ(zetMetricStreamerReadData(streamerHandle, UINT32_MAX, &rawSize, nullptr), ZE_RESULT_SUCCESS); EXPECT_NE(rawSize, 0u); uint8_t rawData = 0; rawSize = UINT32_MAX; osInterfaceVector[index]->getRequiredBufferSizeReturn = 1; EXPECT_EQ(zetMetricStreamerReadData(streamerHandle, UINT32_MAX, &rawSize, &rawData), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricStreamerClose(streamerHandle), ZE_RESULT_SUCCESS); } } TEST_F(MetricIpSamplingStreamerTest, GivenStreamerOpenIsSuccessfullWhenStreamerAppendMarkerIsCalledThenErrorIsReturned) { EXPECT_EQ(ZE_RESULT_SUCCESS, testDevices[0]->getMetricDeviceContext().enableMetricApi()); for (auto device : testDevices) { if (!device->getNEODevice()->isSubDevice()) { continue; } zet_metric_group_handle_t metricGroupHandle = MetricIpSamplingStreamerTest::getMetricGroup(device); EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device, 1, &metricGroupHandle), ZE_RESULT_SUCCESS); ze_event_handle_t eventHandle = {}; zet_metric_streamer_handle_t streamerHandle = {}; zet_metric_streamer_desc_t streamerDesc = {}; streamerDesc.stype = ZET_STRUCTURE_TYPE_METRIC_STREAMER_DESC; streamerDesc.notifyEveryNReports = 32768; streamerDesc.samplingPeriod = 1000; EXPECT_EQ(zetMetricStreamerOpen(context->toHandle(), device, metricGroupHandle, &streamerDesc, eventHandle, &streamerHandle), ZE_RESULT_SUCCESS); EXPECT_NE(streamerHandle, nullptr); ze_result_t returnValue = ZE_RESULT_SUCCESS; std::unique_ptr commandList( CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); EXPECT_EQ(zetCommandListAppendMetricStreamerMarker(commandList->toHandle(), streamerHandle, 0), ZE_RESULT_ERROR_UNSUPPORTED_FEATURE); EXPECT_EQ(zetMetricStreamerClose(streamerHandle), ZE_RESULT_SUCCESS); } } TEST_F(MetricIpSamplingStreamerTest, GivenStreamerIsOpenAndDataIsAvailableToReadWhenEventQueryStatusIsCalledThenEventIsSignalled) { EXPECT_EQ(ZE_RESULT_SUCCESS, testDevices[0]->getMetricDeviceContext().enableMetricApi()); for (std::size_t index = 0; index < testDevices.size(); index++) { auto device = testDevices[index]; osInterfaceVector[index]->isNReportsAvailableReturn = true; if (!device->getNEODevice()->isSubDevice()) { continue; } zet_metric_group_handle_t metricGroupHandle = MetricIpSamplingStreamerTest::getMetricGroup(device); EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device, 1, &metricGroupHandle), ZE_RESULT_SUCCESS); ze_event_handle_t eventHandle = {}; zet_metric_streamer_handle_t streamerHandle = {}; zet_metric_streamer_desc_t streamerDesc = {}; streamerDesc.stype = ZET_STRUCTURE_TYPE_METRIC_STREAMER_DESC; streamerDesc.notifyEveryNReports = 32768; streamerDesc.samplingPeriod = 1000; ze_event_pool_handle_t eventPoolHandle = {}; ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = 0; eventPoolDesc.stype = ZE_STRUCTURE_TYPE_EVENT_POOL_DESC; ze_device_handle_t hDevice = device->toHandle(); EXPECT_EQ(zeEventPoolCreate(context->toHandle(), &eventPoolDesc, 1, &hDevice, &eventPoolHandle), ZE_RESULT_SUCCESS); EXPECT_NE(eventPoolHandle, nullptr); ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.stype = ZE_STRUCTURE_TYPE_EVENT_DESC; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE; EXPECT_EQ(zeEventCreate(eventPoolHandle, &eventDesc, &eventHandle), ZE_RESULT_SUCCESS); EXPECT_NE(eventHandle, nullptr); EXPECT_EQ(zetMetricStreamerOpen(context->toHandle(), device, metricGroupHandle, &streamerDesc, eventHandle, &streamerHandle), ZE_RESULT_SUCCESS); EXPECT_NE(streamerHandle, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, zeEventQueryStatus(eventHandle)); EXPECT_EQ(zetMetricStreamerClose(streamerHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zeEventDestroy(eventHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zeEventPoolDestroy(eventPoolHandle), ZE_RESULT_SUCCESS); } } TEST_F(MetricIpSamplingStreamerTest, GivenStreamerIsOpenAndDataIsNotAvailableToReadWhenEventQueryStatusIsCalledThenEventIsNotSignalled) { EXPECT_EQ(ZE_RESULT_SUCCESS, testDevices[0]->getMetricDeviceContext().enableMetricApi()); for (std::size_t index = 0; index < testDevices.size(); index++) { auto device = testDevices[index]; osInterfaceVector[index]->isNReportsAvailableReturn = false; if (!device->getNEODevice()->isSubDevice()) { continue; } zet_metric_group_handle_t metricGroupHandle = MetricIpSamplingStreamerTest::getMetricGroup(device); EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device, 1, &metricGroupHandle), ZE_RESULT_SUCCESS); ze_event_handle_t eventHandle = {}; zet_metric_streamer_handle_t streamerHandle = {}; zet_metric_streamer_desc_t streamerDesc = {}; streamerDesc.stype = ZET_STRUCTURE_TYPE_METRIC_STREAMER_DESC; streamerDesc.notifyEveryNReports = 32768; streamerDesc.samplingPeriod = 1000; ze_event_pool_handle_t eventPoolHandle = {}; ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = 0; eventPoolDesc.stype = ZE_STRUCTURE_TYPE_EVENT_POOL_DESC; ze_device_handle_t hDevice = device->toHandle(); EXPECT_EQ(zeEventPoolCreate(context->toHandle(), &eventPoolDesc, 1, &hDevice, &eventPoolHandle), ZE_RESULT_SUCCESS); EXPECT_NE(eventPoolHandle, nullptr); ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.stype = ZE_STRUCTURE_TYPE_EVENT_DESC; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE; EXPECT_EQ(zeEventCreate(eventPoolHandle, &eventDesc, &eventHandle), ZE_RESULT_SUCCESS); EXPECT_NE(eventHandle, nullptr); EXPECT_EQ(zetMetricStreamerOpen(context->toHandle(), device, metricGroupHandle, &streamerDesc, eventHandle, &streamerHandle), ZE_RESULT_SUCCESS); EXPECT_NE(streamerHandle, nullptr); EXPECT_NE(ZE_RESULT_SUCCESS, zeEventQueryStatus(eventHandle)); EXPECT_EQ(zetMetricStreamerClose(streamerHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zeEventDestroy(eventHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zeEventPoolDestroy(eventPoolHandle), ZE_RESULT_SUCCESS); } } } // namespace ult } // namespace L0 test_metric_oa_enumeration_1.cpp000066400000000000000000003565621422164147700354660ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/metrics/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/device/device_imp.h" #include "level_zero/core/test/unit_tests/mocks/mock_driver.h" #include "level_zero/tools/source/metrics/metric_oa_source.h" #include "level_zero/tools/test/unit_tests/sources/metrics/mock_metric_oa.h" #include "gmock/gmock.h" #include "gtest/gtest.h" #include using ::testing::_; using ::testing::Return; namespace L0 { namespace ult { using MetricEnumerationTest = Test; TEST_F(MetricEnumerationTest, givenIncorrectMetricsDiscoveryDeviceWhenZetGetMetricGroupIsCalledThenNoMetricGroupsAreReturned) { EXPECT_CALL(*mockMetricEnumeration, loadMetricsDiscovery()) .Times(0); EXPECT_CALL(*mockMetricEnumeration->g_mockApi, MockOpenAdapterGroup(_)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<0>(nullptr), Return(TCompletionCode::CC_ERROR_GENERAL))); uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 0u); } TEST_F(MetricEnumerationTest, givenCorrectMetricDiscoveryWhenLoadMetricsDiscoveryIsCalledThenReturnsSuccess) { EXPECT_CALL(*mockMetricEnumeration, loadMetricsDiscovery()) .Times(1); EXPECT_EQ(mockMetricEnumeration->loadMetricsDiscovery(), ZE_RESULT_SUCCESS); } TEST_F(MetricEnumerationTest, givenIncorrectMetricDiscoveryWhenLoadMetricsDiscoveryIsCalledThenReturnsFail) { mockMetricEnumeration->hMetricsDiscovery = nullptr; mockMetricEnumeration->openAdapterGroup = nullptr; EXPECT_EQ(mockMetricEnumeration->baseLoadMetricsDiscovery(), ZE_RESULT_ERROR_NOT_AVAILABLE); } TEST_F(MetricEnumerationTest, givenIncorrectMetricDiscoveryWhenMetricGroupGetIsCalledThenNoMetricGroupsAreReturned) { mockMetricEnumeration->hMetricsDiscovery = nullptr; mockMetricEnumeration->openAdapterGroup = nullptr; uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 0u); } TEST_F(MetricEnumerationTest, givenIncorrectMetricsDiscoveryInterfaceVersionWhenZetGetMetricGroupIsCalledThenNoMetricGroupsAreReturned) { metricsDeviceParams.Version.MajorNumber = 0; metricsDeviceParams.Version.MinorNumber = 1; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .Times(1) .WillOnce(Return(&metricsDeviceParams)); uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 0u); } TEST_F(MetricEnumerationTest, givenNoConcurrentMetricGroupsWhenZetGetMetricGroupIsCalledThenNoMetricGroupsAreReturned) { openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .Times(1) .WillOnce(Return(&metricsDeviceParams)); uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 0u); } TEST_F(MetricEnumerationTest, givenTwoConcurrentMetricGroupsWhenZetGetMetricGroupIsCalledThenReturnsTwoMetricsGroups) { // Metrics Discovery device. metricsDeviceParams.ConcurrentGroupsCount = 2; // Metrics Discovery concurrent group. Mock metricsConcurrentGroup0; Mock metricsConcurrentGroup1; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.MetricSetsCount = 1; // Metrics Discovery:: metric set. Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(2) .WillOnce(Return(&metricsConcurrentGroup0)) .WillOnce(Return(&metricsConcurrentGroup1)); EXPECT_CALL(metricsConcurrentGroup0, GetParams()) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup1, GetParams()) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup0, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsConcurrentGroup1, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 2u); std::vector metricGroups; metricGroups.resize(metricGroupCount); EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, metricGroups.data()), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 2u); EXPECT_NE(metricGroups[0], nullptr); EXPECT_NE(metricGroups[1], nullptr); } TEST_F(MetricEnumerationTest, givenInvalidArgumentsWhenZetGetMetricGroupPropertiesIsCalledThenReturnsFail) { // Metrics Discovery device. metricsDeviceParams.ConcurrentGroupsCount = 1; // Metrics Discovery concurrent group. Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.MetricSetsCount = 1; // Metrics Discovery:: metric set. Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; // One api: metric group handle. zet_metric_group_handle_t metricGroupHandle = {}; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(1) .WillOnce(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(1) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); // Metric group count. uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); // Metric group handle. EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); } TEST_F(MetricEnumerationTest, givenValidArgumentsWhenZetGetMetricGroupPropertiesIsCalledThenReturnsSuccess) { // Metrics Discovery device. metricsDeviceParams.ConcurrentGroupsCount = 1; // Metrics Discovery concurrent group. Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; // Metrics Discovery:: metric set. Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; // One api: metric group handle. zet_metric_group_handle_t metricGroupHandle = {}; zet_metric_group_properties_t metricGroupProperties = {}; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(1) .WillOnce(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(1) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); // Metric group count. uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); // Metric group handle. EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); // Metric group properties. EXPECT_EQ(zetMetricGroupGetProperties(metricGroupHandle, &metricGroupProperties), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupProperties.domain, 0u); EXPECT_EQ(metricGroupProperties.samplingType, ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED); EXPECT_EQ(metricGroupProperties.metricCount, metricsSetParams.MetricsCount); EXPECT_EQ(strcmp(metricGroupProperties.description, metricsSetParams.ShortName), 0); EXPECT_EQ(strcmp(metricGroupProperties.name, metricsSetParams.SymbolName), 0); } TEST_F(MetricEnumerationTest, givenInvalidArgumentsWhenZetMetricGetIsCalledThenReturnsFail) { // Metrics Discovery device. metricsDeviceParams.ConcurrentGroupsCount = 1; // Metrics Discovery concurrent group. Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; // Metrics Discovery:: metric set. Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; // One api: metric group handle. zet_metric_group_handle_t metricGroupHandle = {}; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(1) .WillOnce(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(1) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); // Metric group count. uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); // Metric group handle. EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); } TEST_F(MetricEnumerationTest, givenValidArgumentsWhenZetMetricGetIsCalledThenReturnsCorrectMetricCount) { // Metrics Discovery device. metricsDeviceParams.ConcurrentGroupsCount = 1; // Metrics Discovery concurrent group. Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; // Metrics Discovery:: metric set. Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; metricsSetParams.MetricsCount = 1; // Metrics Discovery:: metric. Mock metric; TMetricParams_1_0 metricParams = {}; // One api: metric group handle. zet_metric_group_handle_t metricGroupHandle = {}; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(1) .WillOnce(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(1) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, GetMetric(_)) .Times(1) .WillOnce(Return(&metric)); EXPECT_CALL(metric, GetParams()) .Times(1) .WillOnce(Return(&metricParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); // Metric group count. uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); // Metric group handle. EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); // Obtain metric count. uint32_t metricCount = 0; EXPECT_EQ(zetMetricGet(metricGroupHandle, &metricCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricCount, 1u); } TEST_F(MetricEnumerationTest, givenValidArgumentsWhenZetMetricGetIsCalledThenReturnsCorrectMetric) { // Metrics Discovery device. metricsDeviceParams.ConcurrentGroupsCount = 1; // Metrics Discovery concurrent group. Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; // Metrics Discovery:: metric set. Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; metricsSetParams.MetricsCount = 1; // Metrics Discovery:: metric. Mock metric; TMetricParams_1_0 metricParams = {}; // One api: metric group handle. zet_metric_group_handle_t metricGroupHandle = {}; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(1) .WillOnce(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(1) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, GetMetric(_)) .Times(1) .WillOnce(Return(&metric)); EXPECT_CALL(metric, GetParams()) .Times(1) .WillOnce(Return(&metricParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); // Metric group count. uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); // Metric group handle. EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); // Obtain metric. uint32_t metricCount = 0; zet_metric_handle_t metricHandle = {}; EXPECT_EQ(zetMetricGet(metricGroupHandle, &metricCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricCount, 1u); EXPECT_EQ(zetMetricGet(metricGroupHandle, &metricCount, &metricHandle), ZE_RESULT_SUCCESS); EXPECT_NE(metricHandle, nullptr); } TEST_F(MetricEnumerationTest, givenInvalidArgumentsWhenZetMetricGetPropertiestIsCalledThenReturnsFail) { // Metrics Discovery device. metricsDeviceParams.ConcurrentGroupsCount = 1; // Metrics Discovery concurrent group. Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; // Metrics Discovery:: metric set. Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; metricsSetParams.MetricsCount = 1; // Metrics Discovery:: metric. Mock metric; TMetricParams_1_0 metricParams = {}; // One api: metric group handle. zet_metric_group_handle_t metricGroupHandle = {}; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(1) .WillOnce(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(1) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, GetMetric(_)) .Times(1) .WillOnce(Return(&metric)); EXPECT_CALL(metric, GetParams()) .Times(1) .WillOnce(Return(&metricParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); // Metric group count. uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); // Metric group handle. EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); // Obtain metric. uint32_t metricCount = 0; zet_metric_handle_t metricHandle = {}; EXPECT_EQ(zetMetricGet(metricGroupHandle, &metricCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricCount, 1u); EXPECT_EQ(zetMetricGet(metricGroupHandle, &metricCount, &metricHandle), ZE_RESULT_SUCCESS); EXPECT_NE(metricHandle, nullptr); } TEST_F(MetricEnumerationTest, givenValidArgumentsWhenZetMetricGetPropertiestIsCalledThenReturnSuccess) { // Metrics Discovery device. metricsDeviceParams.ConcurrentGroupsCount = 1; // Metrics Discovery concurrent group. Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; // Metrics Discovery:: metric set. Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; metricsSetParams.MetricsCount = 1; // Metrics Discovery:: metric. Mock metric; TMetricParams_1_0 metricParams = {}; metricParams.SymbolName = "Metric symbol name"; metricParams.ShortName = "Metric short name"; metricParams.LongName = "Metric long name"; metricParams.ResultType = MetricsDiscovery::TMetricResultType::RESULT_UINT64; metricParams.MetricType = MetricsDiscovery::TMetricType::METRIC_TYPE_RATIO; zet_metric_properties_t metricProperties = {}; // One api: metric group handle. zet_metric_group_handle_t metricGroupHandle = {}; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(1) .WillOnce(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(1) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, GetMetric(_)) .Times(1) .WillOnce(Return(&metric)); EXPECT_CALL(metric, GetParams()) .Times(1) .WillOnce(Return(&metricParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); // Metric group count. uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); // Metric group handle. EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); // Obtain metric. uint32_t metricCount = 0; zet_metric_handle_t metricHandle = {}; EXPECT_EQ(zetMetricGet(metricGroupHandle, &metricCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricCount, 1u); EXPECT_EQ(zetMetricGet(metricGroupHandle, &metricCount, &metricHandle), ZE_RESULT_SUCCESS); EXPECT_NE(metricHandle, nullptr); // Obtain metric params. EXPECT_EQ(zetMetricGetProperties(metricHandle, &metricProperties), ZE_RESULT_SUCCESS); EXPECT_EQ(strcmp(metricProperties.name, metricParams.SymbolName), 0); EXPECT_EQ(strcmp(metricProperties.description, metricParams.LongName), 0); EXPECT_EQ(metricProperties.metricType, ZET_METRIC_TYPE_RATIO); EXPECT_EQ(metricProperties.resultType, ZET_VALUE_TYPE_UINT64); } TEST_F(MetricEnumerationTest, givenValidArgumentsWhenZetMetricGetPropertiestIsCalledThenReturnSuccessExt) { // Metrics Discovery device. metricsDeviceParams.ConcurrentGroupsCount = 1; // Metrics Discovery concurrent group. Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; // Metrics Discovery:: metric set. Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; metricsSetParams.MetricsCount = 1; // Metrics Discovery:: metric. Mock metric; TMetricParams_1_0 metricParams = {}; metricParams.SymbolName = "Metric symbol name"; metricParams.ShortName = "Metric short name"; metricParams.LongName = "Metric long name"; metricParams.ResultType = MetricsDiscovery::TMetricResultType::RESULT_UINT64; metricParams.MetricType = MetricsDiscovery::TMetricType::METRIC_TYPE_RATIO; zet_metric_properties_t metricProperties = {}; // One api: metric group handle. zet_metric_group_handle_t metricGroupHandle = {}; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(1) .WillOnce(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(1) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, GetMetric(_)) .Times(1) .WillOnce(Return(&metric)); EXPECT_CALL(metric, GetParams()) .Times(1) .WillOnce(Return(&metricParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); // Metric group count. uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); // Metric group handle. EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); // Obtain metric. uint32_t metricCount = 0; zet_metric_handle_t metricHandle = {}; EXPECT_EQ(zetMetricGet(metricGroupHandle, &metricCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricCount, 1u); EXPECT_EQ(zetMetricGet(metricGroupHandle, &metricCount, &metricHandle), ZE_RESULT_SUCCESS); EXPECT_NE(metricHandle, nullptr); // Obtain metric params. EXPECT_EQ(zetMetricGetProperties(metricHandle, &metricProperties), ZE_RESULT_SUCCESS); EXPECT_EQ(strcmp(metricProperties.name, metricParams.SymbolName), 0); EXPECT_EQ(strcmp(metricProperties.description, metricParams.LongName), 0); EXPECT_EQ(metricProperties.metricType, ZET_METRIC_TYPE_RATIO); EXPECT_EQ(metricProperties.resultType, ZET_VALUE_TYPE_UINT64); } TEST_F(MetricEnumerationTest, givenInvalidArgumentsWhenzetContextActivateMetricGroupsIsCalledThenReturnsFail) { // Metrics Discovery device. metricsDeviceParams.ConcurrentGroupsCount = 1; // Metrics Discovery concurrent group. Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; // Metrics Discovery:: metric set. Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; metricsSetParams.MetricsCount = 1; // Metrics Discovery:: metric. Mock metric; TMetricParams_1_0 metricParams = {}; metricParams.SymbolName = "Metric symbol name"; metricParams.ShortName = "Metric short name"; metricParams.LongName = "Metric long name"; metricParams.ResultType = MetricsDiscovery::TMetricResultType::RESULT_UINT64; metricParams.MetricType = MetricsDiscovery::TMetricType::METRIC_TYPE_RATIO; // One api: metric group handle. zet_metric_group_handle_t metricGroupHandle = {}; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(1) .WillOnce(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(1) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, GetMetric(_)) .Times(1) .WillOnce(Return(&metric)); EXPECT_CALL(metric, GetParams()) .Times(1) .WillOnce(Return(&metricParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); // Metric group count. uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); // Metric group handle. EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); } TEST_F(MetricEnumerationTest, givenValidEventBasedMetricGroupWhenzetContextActivateMetricGroupsIsCalledThenReturnsSuccess) { // Metrics Discovery device. metricsDeviceParams.ConcurrentGroupsCount = 1; // Metrics Discovery concurrent group. Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; // Metrics Discovery:: metric set. Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; metricsSetParams.MetricsCount = 1; // Metrics Discovery:: metric. Mock metric; TMetricParams_1_0 metricParams = {}; metricParams.SymbolName = "Metric symbol name"; metricParams.ShortName = "Metric short name"; metricParams.LongName = "Metric long name"; metricParams.ResultType = MetricsDiscovery::TMetricResultType::RESULT_UINT64; metricParams.MetricType = MetricsDiscovery::TMetricType::METRIC_TYPE_RATIO; // One api: metric group handle. zet_metric_group_handle_t metricGroupHandle = {}; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(1) .WillOnce(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(1) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, GetMetric(_)) .Times(1) .WillOnce(Return(&metric)); EXPECT_CALL(metric, GetParams()) .Times(1) .WillOnce(Return(&metricParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); // Metric group count. uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); // Metric group handle. EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); // Activate metric group. EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device->toHandle(), 1, &metricGroupHandle), ZE_RESULT_SUCCESS); } using MultiDeviceMetricEnumerationTest = Test; TEST_F(MultiDeviceMetricEnumerationTest, givenMultipleDevicesAndValidEventBasedMetricGroupWhenzetContextActivateMetricGroupsIsCalledThenReturnsSuccess) { auto &deviceImp = *static_cast(devices[0]); const uint32_t subDeviceCount = static_cast(deviceImp.subDevices.size()); // Metrics Discovery device. metricsDeviceParams.ConcurrentGroupsCount = 1; // Metrics Discovery concurrent group. Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; // Metrics Discovery:: metric set. Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; metricsSetParams.MetricsCount = 1; // Metrics Discovery:: metric. Mock metric; TMetricParams_1_0 metricParams = {}; metricParams.SymbolName = "Metric symbol name"; metricParams.ShortName = "Metric short name"; metricParams.LongName = "Metric long name"; metricParams.ResultType = MetricsDiscovery::TMetricResultType::RESULT_UINT64; metricParams.MetricType = MetricsDiscovery::TMetricType::METRIC_TYPE_RATIO; // One api: metric group handle. zet_metric_group_handle_t metricGroupHandle = {}; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, GetMetric(_)) .Times(subDeviceCount) .WillRepeatedly(Return(&metric)); EXPECT_CALL(metric, GetParams()) .Times(subDeviceCount) .WillRepeatedly(Return(&metricParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); // Metric group count. uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(devices[0]->toHandle(), &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); // Metric group handle. EXPECT_EQ(zetMetricGroupGet(devices[0]->toHandle(), &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); // Activate metric group. EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), devices[0]->toHandle(), 1, &metricGroupHandle), ZE_RESULT_SUCCESS); } TEST_F(MultiDeviceMetricEnumerationTest, givenMultipleDevicesAndTwoMetricGroupsWithTheSameDomainsWhenzetContextActivateMetricGroupsIsCalledThenReturnsFail) { auto &deviceImp = *static_cast(devices[0]); const uint32_t subDeviceCount = static_cast(deviceImp.subDevices.size()); // Metrics Discovery device. metricsDeviceParams.ConcurrentGroupsCount = 1; // Metrics Discovery concurrent group. Mock metricsConcurrentGroup0; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams0 = {}; metricsConcurrentGroupParams0.MetricSetsCount = 2; metricsConcurrentGroupParams0.SymbolName = "OA"; metricsConcurrentGroupParams0.Description = "OA description"; // Metrics Discovery:: metric set. Mock metricsSet0; Mock metricsSet1; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams0 = {}; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams1 = {}; metricsSetParams0.ApiMask = MetricsDiscovery::API_TYPE_OCL; metricsSetParams0.SymbolName = "Metric set ZERO"; metricsSetParams0.ShortName = "Metric set ZERO description"; metricsSetParams0.MetricsCount = 1; metricsSetParams1.ApiMask = MetricsDiscovery::API_TYPE_IOSTREAM; metricsSetParams1.SymbolName = "Metric set ONE name"; metricsSetParams1.ShortName = "Metric set ONE description"; metricsSetParams1.MetricsCount = 1; // Metrics Discovery:: metric. Mock metric0; TMetricParams_1_0 metricParams0 = {}; metricParams0.SymbolName = "Metric ZERO symbol name"; metricParams0.ShortName = "Metric ZERO short name"; metricParams0.LongName = "Metric ZERO long name"; metricParams0.ResultType = MetricsDiscovery::TMetricResultType::RESULT_UINT64; metricParams0.MetricType = MetricsDiscovery::TMetricType::METRIC_TYPE_RATIO; Mock metric1; TMetricParams_1_0 metricParams1 = {}; metricParams1.SymbolName = "Metric ONE symbol name"; metricParams1.ShortName = "Metric ONE short name"; metricParams1.LongName = "Metric ONE long name"; metricParams1.ResultType = MetricsDiscovery::TMetricResultType::RESULT_UINT64; metricParams1.MetricType = MetricsDiscovery::TMetricType::METRIC_TYPE_RATIO; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroup0)); EXPECT_CALL(metricsConcurrentGroup0, GetParams()) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroupParams0)); EXPECT_CALL(metricsConcurrentGroup0, GetMetricSet(_)) .Times(subDeviceCount * 2) .WillOnce(Return(&metricsSet0)) .WillOnce(Return(&metricsSet1)) .WillOnce(Return(&metricsSet0)) .WillOnce(Return(&metricsSet1)); EXPECT_CALL(metricsSet0, GetParams()) .WillRepeatedly(Return(&metricsSetParams0)); EXPECT_CALL(metricsSet1, GetParams()) .WillRepeatedly(Return(&metricsSetParams1)); EXPECT_CALL(metricsSet0, GetMetric(_)) .Times(subDeviceCount) .WillRepeatedly(Return(&metric0)); EXPECT_CALL(metricsSet1, GetMetric(_)) .Times(subDeviceCount) .WillRepeatedly(Return(&metric1)); EXPECT_CALL(metricsSet0, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsSet1, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metric0, GetParams()) .Times(subDeviceCount) .WillRepeatedly(Return(&metricParams0)); EXPECT_CALL(metric1, GetParams()) .Times(subDeviceCount) .WillRepeatedly(Return(&metricParams1)); // Metric group count. uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(devices[0]->toHandle(), &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 2u); // Metric group handle. std::vector metricGroupHandles; metricGroupHandles.resize(metricGroupCount); EXPECT_EQ(zetMetricGroupGet(devices[0]->toHandle(), &metricGroupCount, metricGroupHandles.data()), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 2u); EXPECT_NE(metricGroupHandles[0], nullptr); EXPECT_NE(metricGroupHandles[1], nullptr); zet_metric_group_properties_t properties0; EXPECT_EQ(ZE_RESULT_SUCCESS, zetMetricGroupGetProperties(metricGroupHandles[0], &properties0)); zet_metric_group_properties_t properties1; EXPECT_EQ(ZE_RESULT_SUCCESS, zetMetricGroupGetProperties(metricGroupHandles[1], &properties1)); // Activate metric groups. EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), devices[0]->toHandle(), 4, metricGroupHandles.data()), ZE_RESULT_ERROR_UNKNOWN); } TEST_F(MetricEnumerationTest, givenValidTimeBasedMetricGroupWhenzetContextActivateMetricGroupsIsCalledThenReturnsSuccess) { // Metrics Discovery device. metricsDeviceParams.ConcurrentGroupsCount = 1; // Metrics Discovery concurrent group. Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; // Metrics Discovery:: metric set. Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_IOSTREAM; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; metricsSetParams.MetricsCount = 1; // Metrics Discovery:: metric. Mock metric; TMetricParams_1_0 metricParams = {}; metricParams.SymbolName = "Metric symbol name"; metricParams.ShortName = "Metric short name"; metricParams.LongName = "Metric long name"; metricParams.ResultType = MetricsDiscovery::TMetricResultType::RESULT_UINT64; metricParams.MetricType = MetricsDiscovery::TMetricType::METRIC_TYPE_RATIO; // One api: metric group handle. zet_metric_group_handle_t metricGroupHandle = {}; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(1) .WillOnce(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(1) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, GetMetric(_)) .Times(1) .WillOnce(Return(&metric)); EXPECT_CALL(metric, GetParams()) .Times(1) .WillOnce(Return(&metricParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); // Metric group count. uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); // Metric group handle. EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); // Activate metric group. EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device->toHandle(), 1, &metricGroupHandle), ZE_RESULT_SUCCESS); } TEST_F(MetricEnumerationTest, givenActivateTheSameMetricGroupTwiceWhenzetContextActivateMetricGroupsIsCalledThenReturnsSuccess) { // Metrics Discovery device. metricsDeviceParams.ConcurrentGroupsCount = 1; // Metrics Discovery concurrent group. Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; // Metrics Discovery:: metric set. Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_IOSTREAM; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; metricsSetParams.MetricsCount = 1; // Metrics Discovery:: metric. Mock metric; TMetricParams_1_0 metricParams = {}; metricParams.SymbolName = "Metric symbol name"; metricParams.ShortName = "Metric short name"; metricParams.LongName = "Metric long name"; metricParams.ResultType = MetricsDiscovery::TMetricResultType::RESULT_UINT64; metricParams.MetricType = MetricsDiscovery::TMetricType::METRIC_TYPE_RATIO; // One api: metric group handle. zet_metric_group_handle_t metricGroupHandle = {}; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(1) .WillOnce(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(1) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, GetMetric(_)) .Times(1) .WillOnce(Return(&metric)); EXPECT_CALL(metric, GetParams()) .Times(1) .WillOnce(Return(&metricParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); // Metric group count. uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); // Metric group handle. EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); // Activate metric group. EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device->toHandle(), 1, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device->toHandle(), 1, &metricGroupHandle), ZE_RESULT_SUCCESS); } TEST_F(MetricEnumerationTest, givenActivateTwoMetricGroupsWithDifferentDomainsWhenzetContextActivateMetricGroupsIsCalledThenReturnsSuccess) { // Metrics Discovery device. metricsDeviceParams.ConcurrentGroupsCount = 2; // Metrics Discovery concurrent group. Mock metricsConcurrentGroup0; Mock metricsConcurrentGroup1; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams0 = {}; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams1 = {}; metricsConcurrentGroupParams0.MetricSetsCount = 1; metricsConcurrentGroupParams1.MetricSetsCount = 1; metricsConcurrentGroupParams0.SymbolName = "OA"; metricsConcurrentGroupParams1.SymbolName = "OA"; // Metrics Discovery: metric set Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; // One api: metric group. zet_metric_group_handle_t metricGroupHandle[2] = {}; zet_metric_group_properties_t metricGroupProperties[2] = {}; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(2) .WillOnce(Return(&metricsConcurrentGroup0)) .WillOnce(Return(&metricsConcurrentGroup1)); EXPECT_CALL(metricsConcurrentGroup0, GetParams()) .Times(1) .WillOnce(Return(&metricsConcurrentGroupParams0)); EXPECT_CALL(metricsConcurrentGroup1, GetParams()) .Times(1) .WillOnce(Return(&metricsConcurrentGroupParams1)); EXPECT_CALL(metricsConcurrentGroup0, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsConcurrentGroup1, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); // Metric group handles. uint32_t metricGroupCount = 2; EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 2u); EXPECT_NE(metricGroupHandle[0], nullptr); EXPECT_NE(metricGroupHandle[1], nullptr); // Metric group properties. EXPECT_EQ(zetMetricGroupGetProperties(metricGroupHandle[0], &metricGroupProperties[0]), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricGroupGetProperties(metricGroupHandle[1], &metricGroupProperties[1]), ZE_RESULT_SUCCESS); // Activate two metric groups with a different domains. EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device->toHandle(), 1, &metricGroupHandle[0]), ZE_RESULT_SUCCESS); EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device->toHandle(), 1, &metricGroupHandle[1]), ZE_RESULT_SUCCESS); } TEST_F(MetricEnumerationTest, givenActivateTwoMetricGroupsWithDifferentDomainsAtOnceWhenzetContextActivateMetricGroupsIsCalledThenReturnsSuccess) { // Metrics Discovery device. metricsDeviceParams.ConcurrentGroupsCount = 2; // Metrics Discovery concurrent group. Mock metricsConcurrentGroup0; Mock metricsConcurrentGroup1; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams0 = {}; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams1 = {}; metricsConcurrentGroupParams0.MetricSetsCount = 1; metricsConcurrentGroupParams1.MetricSetsCount = 1; metricsConcurrentGroupParams0.SymbolName = "OA"; metricsConcurrentGroupParams1.SymbolName = "OA"; // Metrics Discovery: metric set Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; // One api: metric group. zet_metric_group_handle_t metricGroupHandle[2] = {}; zet_metric_group_properties_t metricGroupProperties[2] = {}; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(2) .WillOnce(Return(&metricsConcurrentGroup0)) .WillOnce(Return(&metricsConcurrentGroup1)); EXPECT_CALL(metricsConcurrentGroup0, GetParams()) .Times(1) .WillOnce(Return(&metricsConcurrentGroupParams0)); EXPECT_CALL(metricsConcurrentGroup1, GetParams()) .Times(1) .WillOnce(Return(&metricsConcurrentGroupParams1)); EXPECT_CALL(metricsConcurrentGroup0, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsConcurrentGroup1, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); // Metric group handles. uint32_t metricGroupCount = 2; EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 2u); EXPECT_NE(metricGroupHandle[0], nullptr); EXPECT_NE(metricGroupHandle[1], nullptr); // Metric group properties. EXPECT_EQ(zetMetricGroupGetProperties(metricGroupHandle[0], &metricGroupProperties[0]), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricGroupGetProperties(metricGroupHandle[1], &metricGroupProperties[1]), ZE_RESULT_SUCCESS); // Activate two metric groups with a different domains. EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device->toHandle(), 2, metricGroupHandle), ZE_RESULT_SUCCESS); } TEST_F(MetricEnumerationTest, givenActivateTwoMetricGroupsWithTheSameDomainsWhenzetContextActivateMetricGroupsIsCalledThenReturnsFail) { // Metrics Discovery device. metricsDeviceParams.ConcurrentGroupsCount = 1; // Metrics Discovery concurrent group. Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 2; metricsConcurrentGroupParams.SymbolName = "OA"; // Metrics Discovery: metric set Mock metricsSet0; Mock metricsSet1; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams0 = {}; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams1 = {}; metricsSetParams0.ApiMask = MetricsDiscovery::API_TYPE_OCL; metricsSetParams1.ApiMask = MetricsDiscovery::API_TYPE_IOSTREAM; // One api: metric group. zet_metric_group_handle_t metricGroupHandle[2] = {}; zet_metric_group_properties_t metricGroupProperties[2] = {}; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(1) .WillOnce(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(1) .WillOnce(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .Times(2) .WillOnce(Return(&metricsSet0)) .WillOnce(Return(&metricsSet1)); EXPECT_CALL(metricsSet0, GetParams()) .WillRepeatedly(Return(&metricsSetParams0)); EXPECT_CALL(metricsSet1, GetParams()) .WillRepeatedly(Return(&metricsSetParams1)); EXPECT_CALL(metricsSet0, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsSet1, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); // Metric group handles. uint32_t metricGroupCount = 2; EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 2u); EXPECT_NE(metricGroupHandle[0], nullptr); EXPECT_NE(metricGroupHandle[1], nullptr); // Metric group properties. EXPECT_EQ(zetMetricGroupGetProperties(metricGroupHandle[0], &metricGroupProperties[0]), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricGroupGetProperties(metricGroupHandle[1], &metricGroupProperties[1]), ZE_RESULT_SUCCESS); // Activate two metric groups with a different domains. EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device->toHandle(), 1, &metricGroupHandle[0]), ZE_RESULT_SUCCESS); EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device->toHandle(), 1, &metricGroupHandle[1]), ZE_RESULT_ERROR_UNKNOWN); EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device->toHandle(), 2, metricGroupHandle), ZE_RESULT_ERROR_UNKNOWN); } TEST_F(MetricEnumerationTest, givenDeactivateTestsWhenzetContextActivateMetricGroupsIsCalledThenReturnsApropriateResults) { // Metrics Discovery device. metricsDeviceParams.ConcurrentGroupsCount = 1; // Metrics Discovery concurrent group. Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 2; metricsConcurrentGroupParams.SymbolName = "OA"; // Metrics Discovery: metric set Mock metricsSet0; Mock metricsSet1; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams0 = {}; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams1 = {}; metricsSetParams0.ApiMask = MetricsDiscovery::API_TYPE_IOSTREAM; metricsSetParams1.ApiMask = MetricsDiscovery::API_TYPE_IOSTREAM; // One api: metric group. zet_metric_group_handle_t metricGroupHandle[2] = {}; zet_metric_group_properties_t metricGroupProperties[2] = {}; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(1) .WillOnce(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(1) .WillOnce(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .Times(2) .WillOnce(Return(&metricsSet0)) .WillOnce(Return(&metricsSet1)); EXPECT_CALL(metricsSet0, GetParams()) .WillRepeatedly(Return(&metricsSetParams0)); EXPECT_CALL(metricsSet1, GetParams()) .WillRepeatedly(Return(&metricsSetParams1)); EXPECT_CALL(metricsSet0, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsSet1, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); // Metric group handles. uint32_t metricGroupCount = 2; EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 2u); EXPECT_NE(metricGroupHandle[0], nullptr); EXPECT_NE(metricGroupHandle[1], nullptr); // Metric group properties. EXPECT_EQ(zetMetricGroupGetProperties(metricGroupHandle[0], &metricGroupProperties[0]), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricGroupGetProperties(metricGroupHandle[1], &metricGroupProperties[1]), ZE_RESULT_SUCCESS); // Activate two metric groups with a different domains. EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device->toHandle(), 1, &metricGroupHandle[0]), ZE_RESULT_SUCCESS); EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device->toHandle(), 1, &metricGroupHandle[1]), ZE_RESULT_ERROR_UNKNOWN); EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device->toHandle(), 2, metricGroupHandle), ZE_RESULT_ERROR_UNKNOWN); // Deactivate all. EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device->toHandle(), 0, nullptr), ZE_RESULT_SUCCESS); // Activate two metric groups at once. EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device->toHandle(), 2, metricGroupHandle), ZE_RESULT_ERROR_UNKNOWN); // Deactivate all. EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device->toHandle(), 0, nullptr), ZE_RESULT_SUCCESS); // Activate one domain. EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device->toHandle(), 1, metricGroupHandle), ZE_RESULT_SUCCESS); // Deactivate all. EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device->toHandle(), 0, nullptr), ZE_RESULT_SUCCESS); } TEST_F(MetricEnumerationTest, givenInvalidArgumentsWhenZetMetricGroupCalculateMetricValuesIsCalledThenReturnsFail) { // Metrics Discovery device. metricsDeviceParams.ConcurrentGroupsCount = 1; // Metrics Discovery concurrent group. Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; // Metrics Discovery: metric set Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; // One api: metric group. zet_metric_group_handle_t metricGroupHandle = {}; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(1) .WillOnce(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(1) .WillOnce(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .Times(1) .WillOnce(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); // Metric group handles. uint32_t metricGroupCount = 1; EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); } TEST_F(MetricEnumerationTest, givenIncorrectRawReportSizeWhenZetMetricGroupCalculateMetricValuesIsCalledThenReturnsFail) { // Metrics Discovery device. metricsDeviceParams.ConcurrentGroupsCount = 1; // Metrics Discovery concurrent group. Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; // Metrics Discovery: metric set Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; metricsSetParams.QueryReportSize = 256; // One api: metric group. zet_metric_group_handle_t metricGroupHandle = {}; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(1) .WillOnce(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(1) .WillOnce(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .Times(1) .WillOnce(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); // Metric group handles. uint32_t metricGroupCount = 1; EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); // Raw results. const size_t rawResultsSize = 300; uint8_t rawResults[rawResultsSize] = {}; // Invalid raw buffer size provided by the user. uint32_t calculatedResults = 0; EXPECT_NE(metricsSetParams.QueryReportSize, rawResultsSize); EXPECT_EQ(zetMetricGroupCalculateMetricValues(metricGroupHandle, ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, rawResultsSize, rawResults, &calculatedResults, nullptr), ZE_RESULT_ERROR_UNKNOWN); // Invalid raw buffer size provided by the driver. metricsSetParams.QueryReportSize = 0; EXPECT_NE(metricsSetParams.QueryReportSize, rawResultsSize); EXPECT_EQ(zetMetricGroupCalculateMetricValues(metricGroupHandle, ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, rawResultsSize, rawResults, &calculatedResults, nullptr), ZE_RESULT_ERROR_UNKNOWN); } TEST_F(MetricEnumerationTest, givenIncorrectRawReportSizeWhenZetMetricGroupCalculateMetricValuesExpIsCalledThenReturnsFail) { // Metrics Discovery device. metricsDeviceParams.ConcurrentGroupsCount = 1; // Metrics Discovery concurrent group. Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; // Metrics Discovery: metric set Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; metricsSetParams.QueryReportSize = 0; // One api: metric group. zet_metric_group_handle_t metricGroupHandle = {}; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(1) .WillOnce(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(1) .WillOnce(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .Times(1) .WillOnce(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); // Metric group handles. uint32_t metricGroupCount = 1; EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); // Raw results. const size_t rawResultsSize = 300; uint8_t rawResults[rawResultsSize] = {}; // Invalid raw buffer size provided by the user. uint32_t dataCount = 0; uint32_t totalMetricCount = 0; EXPECT_NE(metricsSetParams.QueryReportSize, rawResultsSize); EXPECT_EQ(zetMetricGroupCalculateMultipleMetricValuesExp(metricGroupHandle, ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, rawResultsSize, rawResults, &dataCount, &totalMetricCount, nullptr, nullptr), ZE_RESULT_ERROR_UNKNOWN); EXPECT_EQ(dataCount, 0u); EXPECT_EQ(totalMetricCount, 0u); // Invalid raw buffer size provided by the driver. EXPECT_NE(metricsSetParams.QueryReportSize, rawResultsSize); EXPECT_EQ(zetMetricGroupCalculateMultipleMetricValuesExp(metricGroupHandle, ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, rawResultsSize, rawResults, &dataCount, &totalMetricCount, nullptr, nullptr), ZE_RESULT_ERROR_UNKNOWN); EXPECT_EQ(dataCount, 0u); EXPECT_EQ(totalMetricCount, 0u); } TEST_F(MetricEnumerationTest, givenCorrectRawReportSizeWhenZetMetricGroupCalculateMetricValuesExpIsCalledThenReturnsSuccess) { metricsDeviceParams.ConcurrentGroupsCount = 1; Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; metricsSetParams.QueryReportSize = 256; metricsSetParams.MetricsCount = 11; Mock metric; MetricsDiscovery::TMetricParams_1_0 metricParams = {}; zet_metric_group_handle_t metricGroupHandle = {}; uint32_t returnedMetricCount = 1; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(1) .WillOnce(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(1) .WillOnce(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .Times(1) .WillOnce(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, GetMetric(_)) .Times(metricsSetParams.MetricsCount) .WillRepeatedly(Return(&metric)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metric, GetParams()) .WillRepeatedly(Return(&metricParams)); EXPECT_CALL(metricsSet, CalculateMetrics(_, _, _, _, _, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<4>(returnedMetricCount), Return(TCompletionCode::CC_OK))); uint32_t metricGroupCount = 1; EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); // Raw results. const size_t rawResultsSize = 256; uint8_t rawResults[rawResultsSize] = {}; // Valid raw buffer size provided by the user. uint32_t dataCount = 0; uint32_t totalMetricCount = 0; EXPECT_EQ(metricsSetParams.QueryReportSize, rawResultsSize); EXPECT_EQ(zetMetricGroupCalculateMultipleMetricValuesExp(metricGroupHandle, ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, rawResultsSize, rawResults, &dataCount, &totalMetricCount, nullptr, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(dataCount, 1u); EXPECT_EQ(totalMetricCount, metricsSetParams.MetricsCount); // Copy calculated metrics. std::vector metricCounts(dataCount); std::vector caculatedRawResults(totalMetricCount); EXPECT_EQ(zetMetricGroupCalculateMultipleMetricValuesExp(metricGroupHandle, ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, rawResultsSize, rawResults, &dataCount, &totalMetricCount, metricCounts.data(), caculatedRawResults.data()), ZE_RESULT_SUCCESS); EXPECT_EQ(metricCounts[0], totalMetricCount); } TEST_F(MetricEnumerationTest, givenFailedCalculateMetricsWhenZetMetricGroupCalculateMetricValuesExpIsCalledThenReturnsFail) { metricsDeviceParams.ConcurrentGroupsCount = 1; Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; metricsSetParams.QueryReportSize = 256; metricsSetParams.MetricsCount = 11; Mock metric; MetricsDiscovery::TMetricParams_1_0 metricParams = {}; zet_metric_group_handle_t metricGroupHandle = {}; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(1) .WillOnce(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(1) .WillOnce(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .Times(1) .WillOnce(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, GetMetric(_)) .Times(metricsSetParams.MetricsCount) .WillRepeatedly(Return(&metric)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metric, GetParams()) .WillRepeatedly(Return(&metricParams)); EXPECT_CALL(metricsSet, CalculateMetrics(_, _, _, _, _, _, _)) .Times(1) .WillOnce(Return(TCompletionCode::CC_ERROR_GENERAL)); uint32_t metricGroupCount = 1; EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); // Raw results. const size_t rawResultsSize = 256; uint8_t rawResults[rawResultsSize] = {}; // Valid raw buffer size provided by the user. uint32_t dataCount = 0; uint32_t totalMetricCount = 0; EXPECT_EQ(metricsSetParams.QueryReportSize, rawResultsSize); EXPECT_EQ(zetMetricGroupCalculateMultipleMetricValuesExp(metricGroupHandle, ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, rawResultsSize, rawResults, &dataCount, &totalMetricCount, nullptr, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(dataCount, 1u); EXPECT_EQ(totalMetricCount, metricsSetParams.MetricsCount); // Copy calculated metrics. std::vector metricCounts(dataCount); std::vector caculatedRawResults(totalMetricCount); EXPECT_EQ(zetMetricGroupCalculateMultipleMetricValuesExp(metricGroupHandle, ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, rawResultsSize, rawResults, &dataCount, &totalMetricCount, metricCounts.data(), caculatedRawResults.data()), ZE_RESULT_ERROR_UNKNOWN); EXPECT_EQ(metricCounts[0], 0u); } TEST_F(MetricEnumerationTest, givenInvalidQueryReportSizeWhenZetMetricGroupCalculateMultipleMetricValuesExpIsCalledTwiceThenReturnsFail) { metricsDeviceParams.ConcurrentGroupsCount = 1; Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; metricsSetParams.QueryReportSize = 0; metricsSetParams.MetricsCount = 11; Mock metric; MetricsDiscovery::TMetricParams_1_0 metricParams = {}; zet_metric_group_handle_t metricGroupHandle = {}; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(1) .WillOnce(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(1) .WillOnce(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .Times(1) .WillOnce(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, GetMetric(_)) .Times(metricsSetParams.MetricsCount) .WillRepeatedly(Return(&metric)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metric, GetParams()) .WillRepeatedly(Return(&metricParams)); // Metric group handles. uint32_t metricGroupCount = 1; EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); // Raw results. const size_t rawResultsSize = 284; uint8_t rawResults[rawResultsSize] = {}; MetricGroupCalculateHeader *pRawHeader = reinterpret_cast(rawResults); pRawHeader->magic = MetricGroupCalculateHeader::magicValue; pRawHeader->dataCount = 1; pRawHeader->rawDataOffsets = sizeof(MetricGroupCalculateHeader); pRawHeader->rawDataSizes = pRawHeader->rawDataOffsets + sizeof(uint32_t) * pRawHeader->dataCount; pRawHeader->rawDataOffset = pRawHeader->rawDataSizes + sizeof(uint32_t) * pRawHeader->dataCount; uint32_t *pRawDataOffsets = reinterpret_cast(rawResults + pRawHeader->rawDataOffsets); uint32_t *pRawDataSizes = reinterpret_cast(rawResults + pRawHeader->rawDataSizes); pRawDataOffsets[0] = 0; pRawDataSizes[0] = static_cast(rawResultsSize - pRawHeader->rawDataOffset); // Invalid raw buffer size provided by the driver. uint32_t dataCount = 0; uint32_t totalMetricCount = 0; EXPECT_EQ(zetMetricGroupCalculateMultipleMetricValuesExp(metricGroupHandle, ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, rawResultsSize, rawResults, &dataCount, &totalMetricCount, nullptr, nullptr), ZE_RESULT_ERROR_UNKNOWN); EXPECT_EQ(dataCount, 0u); EXPECT_EQ(totalMetricCount, 0u); } TEST_F(MetricEnumerationTest, givenCorrectRawDataHeaderWhenZetMetricGroupCalculateMultipleMetricValuesExpIsCalledTwiceThenReturnsSuccess) { metricsDeviceParams.ConcurrentGroupsCount = 1; Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; metricsSetParams.QueryReportSize = 256; metricsSetParams.MetricsCount = 11; Mock metric; MetricsDiscovery::TMetricParams_1_0 metricParams = {}; zet_metric_group_handle_t metricGroupHandle = {}; uint32_t returnedMetricCount = 1; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(1) .WillOnce(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(1) .WillOnce(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .Times(1) .WillOnce(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, GetMetric(_)) .Times(metricsSetParams.MetricsCount) .WillRepeatedly(Return(&metric)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metric, GetParams()) .WillRepeatedly(Return(&metricParams)); EXPECT_CALL(metricsSet, CalculateMetrics(_, _, _, _, _, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<4>(returnedMetricCount), Return(TCompletionCode::CC_OK))); // Metric group handles. uint32_t metricGroupCount = 1; EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); // Raw results. const size_t rawResultsSize = 284; uint8_t rawResults[rawResultsSize] = {}; MetricGroupCalculateHeader *pRawHeader = reinterpret_cast(rawResults); pRawHeader->magic = MetricGroupCalculateHeader::magicValue; pRawHeader->dataCount = 1; pRawHeader->rawDataOffsets = sizeof(MetricGroupCalculateHeader); pRawHeader->rawDataSizes = pRawHeader->rawDataOffsets + sizeof(uint32_t) * pRawHeader->dataCount; pRawHeader->rawDataOffset = pRawHeader->rawDataSizes + sizeof(uint32_t) * pRawHeader->dataCount; uint32_t *pRawDataOffsets = reinterpret_cast(rawResults + pRawHeader->rawDataOffsets); uint32_t *pRawDataSizes = reinterpret_cast(rawResults + pRawHeader->rawDataSizes); pRawDataOffsets[0] = 0; pRawDataSizes[0] = static_cast(rawResultsSize - pRawHeader->rawDataOffset); // Valid raw buffer. uint32_t dataCount = 0; uint32_t totalMetricCount = 0; EXPECT_NE(metricsSetParams.QueryReportSize, rawResultsSize); EXPECT_EQ(zetMetricGroupCalculateMultipleMetricValuesExp(metricGroupHandle, ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, rawResultsSize, rawResults, &dataCount, &totalMetricCount, nullptr, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(dataCount, 1u); EXPECT_EQ(totalMetricCount, metricsSetParams.MetricsCount); // Copy calculated metrics. std::vector metricCounts(dataCount); std::vector caculatedRawResults(totalMetricCount); EXPECT_EQ(zetMetricGroupCalculateMultipleMetricValuesExp(metricGroupHandle, ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, rawResultsSize, rawResults, &dataCount, &totalMetricCount, metricCounts.data(), caculatedRawResults.data()), ZE_RESULT_SUCCESS); EXPECT_EQ(metricCounts[0], totalMetricCount); } TEST_F(MetricEnumerationTest, givenCorrectRawReportSizeWhenZetMetricGroupCalculateMetricValuesIsCalledThenReturnsCorrectCalculatedReportCount) { // Metrics Discovery device. metricsDeviceParams.ConcurrentGroupsCount = 1; // Metrics Discovery concurrent group. Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; // Metrics Discovery: metric set Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; metricsSetParams.QueryReportSize = 256; metricsSetParams.MetricsCount = 11; // Metrics Discovery: metric Mock metric; MetricsDiscovery::TMetricParams_1_0 metricParams = {}; // One api: metric group. zet_metric_group_handle_t metricGroupHandle = {}; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(1) .WillOnce(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(1) .WillOnce(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, GetMetric(_)) .Times(11) .WillRepeatedly(Return(&metric)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metric, GetParams()) .WillRepeatedly(Return(&metricParams)); // Metric group handles. uint32_t metricGroupCount = 1; EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); // Return correct calculated report count for single raw report. { size_t rawResultsSize = metricsSetParams.QueryReportSize; std::vector rawResults(rawResultsSize); uint32_t calculatedResults = 0; EXPECT_EQ(zetMetricGroupCalculateMetricValues(metricGroupHandle, ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, rawResultsSize, rawResults.data(), &calculatedResults, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(calculatedResults, metricsSetParams.MetricsCount); } // Return correct calculated report count for two raw report. { size_t rawResultsSize = 2 * metricsSetParams.QueryReportSize; std::vector rawResults(rawResultsSize); uint32_t calculatedResults = 0; EXPECT_EQ(zetMetricGroupCalculateMetricValues(metricGroupHandle, ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, rawResultsSize, rawResults.data(), &calculatedResults, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(calculatedResults, 2 * metricsSetParams.MetricsCount); } } TEST_F(MetricEnumerationTest, givenCorrectRawReportSizeAndLowerProvidedCalculatedReportCountThanObtainedFromApiWhenZetMetricGroupCalculateMetricValuesIsCalledThenReturnsCorrectCalculatedReportCount) { // Metrics Discovery device. metricsDeviceParams.ConcurrentGroupsCount = 1; // Metrics Discovery concurrent group. Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; // Metrics Discovery: metric set Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; metricsSetParams.QueryReportSize = 256; metricsSetParams.MetricsCount = 11; // Metrics Discovery: metric Mock metric; MetricsDiscovery::TMetricParams_1_0 metricParams = {}; // One api: metric group. zet_metric_group_handle_t metricGroupHandle = {}; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(1) .WillOnce(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(1) .WillOnce(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, GetMetric(_)) .Times(11) .WillRepeatedly(Return(&metric)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metric, GetParams()) .WillRepeatedly(Return(&metricParams)); uint32_t returnedMetricCount = 2; EXPECT_CALL(metricsSet, CalculateMetrics(_, _, _, _, _, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<4>(returnedMetricCount), Return(TCompletionCode::CC_OK))); // Metric group handles. uint32_t metricGroupCount = 1; EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); // Return correct calculated report count for single raw report. size_t rawResultsSize = metricsSetParams.QueryReportSize; std::vector rawResults(rawResultsSize); uint32_t calculatedResultsCount = 0; EXPECT_EQ(zetMetricGroupCalculateMetricValues(metricGroupHandle, ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, rawResultsSize, rawResults.data(), &calculatedResultsCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(calculatedResultsCount, metricsSetParams.MetricsCount); // Provide lower calculated report count than returned earlier from api. calculatedResultsCount = 2; std::vector caculatedrawResults(calculatedResultsCount); EXPECT_EQ(zetMetricGroupCalculateMetricValues(metricGroupHandle, ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, rawResultsSize, rawResults.data(), &calculatedResultsCount, caculatedrawResults.data()), ZE_RESULT_SUCCESS); } TEST_F(MetricEnumerationTest, givenCorrectRawReportSizeAndCorrectCalculatedReportCountWhenZetMetricGroupCalculateMetricValuesIsCalledThenReturnsSuccess) { // Metrics Discovery device. metricsDeviceParams.ConcurrentGroupsCount = 1; // Metrics Discovery concurrent group. Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; // Metrics Discovery: metric set Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; metricsSetParams.QueryReportSize = 256; metricsSetParams.MetricsCount = 11; // Metrics Discovery: metric Mock metric; MetricsDiscovery::TMetricParams_1_0 metricParams = {}; // One api: metric group. zet_metric_group_handle_t metricGroupHandle = {}; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(1) .WillOnce(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(1) .WillOnce(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, GetMetric(_)) .Times(11) .WillRepeatedly(Return(&metric)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsSet, CalculateMetrics(_, _, _, _, _, _, _)) .Times(1) .WillOnce(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metric, GetParams()) .WillRepeatedly(Return(&metricParams)); // Metric group handles. uint32_t metricGroupCount = 1; EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); // Return correct calculated report count for single raw report. size_t rawResultsSize = metricsSetParams.QueryReportSize; std::vector rawResults(rawResultsSize); uint32_t calculatedResultsCount = 0; EXPECT_EQ(zetMetricGroupCalculateMetricValues(metricGroupHandle, ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, rawResultsSize, rawResults.data(), &calculatedResultsCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(calculatedResultsCount, metricsSetParams.MetricsCount); // Provide incorrect calculated report buffer. std::vector caculatedrawResults(calculatedResultsCount); EXPECT_EQ(zetMetricGroupCalculateMetricValues(metricGroupHandle, ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, rawResultsSize, rawResults.data(), &calculatedResultsCount, caculatedrawResults.data()), ZE_RESULT_SUCCESS); } TEST_F(MetricEnumerationTest, givenCorrectRawReportSizeAndCorrectCalculatedReportCountWhenZetMetricGroupCalculateMetricValuesIsCalledThenMaxValuesAreReturned) { // Metrics Discovery device. metricsDeviceParams.ConcurrentGroupsCount = 1; // Metrics Discovery concurrent group. Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; // Metrics Discovery: metric set Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; metricsSetParams.QueryReportSize = 256; metricsSetParams.MetricsCount = 11; // Metrics Discovery: metric Mock metric; MetricsDiscovery::TMetricParams_1_0 metricParams = {}; // One api: metric group. zet_metric_group_handle_t metricGroupHandle = {}; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(1) .WillOnce(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(1) .WillOnce(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, GetMetric(_)) .Times(11) .WillRepeatedly(Return(&metric)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsSet, CalculateMetrics(_, _, _, _, _, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<4>(metricsSetParams.MetricsCount), Return(TCompletionCode::CC_OK))); EXPECT_CALL(metric, GetParams()) .WillRepeatedly(Return(&metricParams)); // Metric group handles. uint32_t metricGroupCount = 1; EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); // Return correct calculated report count for single raw report. size_t rawResultsSize = metricsSetParams.QueryReportSize; std::vector rawResults(rawResultsSize); uint32_t calculatedResultsCount = 0; EXPECT_EQ(zetMetricGroupCalculateMetricValues(metricGroupHandle, ZET_METRIC_GROUP_CALCULATION_TYPE_MAX_METRIC_VALUES, rawResultsSize, rawResults.data(), &calculatedResultsCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(calculatedResultsCount, metricsSetParams.MetricsCount); // Provide incorrect calculated report buffer. std::vector caculatedrawResults(calculatedResultsCount); EXPECT_EQ(zetMetricGroupCalculateMetricValues(metricGroupHandle, ZET_METRIC_GROUP_CALCULATION_TYPE_MAX_METRIC_VALUES, rawResultsSize, rawResults.data(), &calculatedResultsCount, caculatedrawResults.data()), ZE_RESULT_SUCCESS); } TEST_F(MetricEnumerationTest, givenIncorrectCalculationTypeWhenZetMetricGroupCalculateMetricValuesIsCalledThenMaxValuesAreReturned) { // Metrics Discovery device. metricsDeviceParams.ConcurrentGroupsCount = 1; // Metrics Discovery concurrent group. Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; // Metrics Discovery: metric set Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; metricsSetParams.QueryReportSize = 256; metricsSetParams.MetricsCount = 11; // Metrics Discovery: metric Mock metric; MetricsDiscovery::TMetricParams_1_0 metricParams = {}; // One api: metric group. zet_metric_group_handle_t metricGroupHandle = {}; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(1) .WillOnce(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(1) .WillOnce(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, GetMetric(_)) .Times(11) .WillRepeatedly(Return(&metric)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metric, GetParams()) .WillRepeatedly(Return(&metricParams)); // Metric group handles. uint32_t metricGroupCount = 1; EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); // Return correct calculated report count for single raw report. size_t rawResultsSize = metricsSetParams.QueryReportSize; std::vector rawResults(rawResultsSize); uint32_t calculatedResultsCount = 0; EXPECT_EQ(zetMetricGroupCalculateMetricValues(metricGroupHandle, ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, rawResultsSize, rawResults.data(), &calculatedResultsCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(calculatedResultsCount, metricsSetParams.MetricsCount); } TEST_F(MetricEnumerationTest, givenInitializedMetricEnumerationWhenIsInitializedIsCalledThenMetricEnumerationWillNotBeInitializedAgain) { mockMetricEnumeration->initializationState = ZE_RESULT_SUCCESS; EXPECT_EQ(mockMetricEnumeration->baseIsInitialized(), true); } TEST_F(MetricEnumerationTest, givenNotInitializedMetricEnumerationWhenIsInitializedIsCalledThenMetricEnumerationWillBeInitialized) { // Metrics Discovery device. metricsDeviceParams.ConcurrentGroupsCount = 1; // Metrics Discovery concurrent group. Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; // Metrics Discovery: metric set Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; metricsSetParams.QueryReportSize = 256; metricsSetParams.MetricsCount = 11; // Metrics Discovery: metric Mock metric; MetricsDiscovery::TMetricParams_1_0 metricParams = {}; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(1) .WillOnce(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(1) .WillOnce(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, GetMetric(_)) .Times(11) .WillRepeatedly(Return(&metric)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metric, GetParams()) .WillRepeatedly(Return(&metricParams)); mockMetricEnumeration->initializationState = ZE_RESULT_ERROR_UNINITIALIZED; EXPECT_EQ(mockMetricEnumeration->baseIsInitialized(), true); } TEST_F(MetricEnumerationTest, givenLoadedMetricsLibraryAndDiscoveryAndMetricsLibraryInitializedWhenLoadDependenciesThenReturnSuccess) { EXPECT_CALL(*mockMetricEnumeration, loadMetricsDiscovery()) .Times(1) .WillOnce(Return(ZE_RESULT_SUCCESS)); EXPECT_CALL(*mockMetricsLibrary, load()) .Times(1) .WillOnce(Return(true)); mockMetricsLibrary->initializationState = ZE_RESULT_SUCCESS; auto &metricSource = device->getMetricDeviceContext().getMetricSource(); EXPECT_EQ(metricSource.loadDependencies(), true); EXPECT_EQ(metricSource.isInitialized(), true); } TEST_F(MetricEnumerationTest, givenNotLoadedMetricsLibraryAndDiscoveryWhenLoadDependenciesThenReturnFail) { EXPECT_CALL(*mockMetricEnumeration, loadMetricsDiscovery()) .Times(1) .WillOnce(Return(ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE)); auto &metricSource = device->getMetricDeviceContext().getMetricSource(); EXPECT_EQ(metricSource.loadDependencies(), false); EXPECT_EQ(metricSource.isInitialized(), false); } TEST_F(MetricEnumerationTest, givenRootDeviceWhenLoadDependenciesIsCalledThenLegacyOpenMetricsDeviceWillBeCalled) { auto &metricSource = device->getMetricDeviceContext().getMetricSource(); Mock mockAdapterGroup; Mock mockAdapter; Mock mockDevice; EXPECT_CALL(*mockMetricsLibrary, load()) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricEnumeration, loadMetricsDiscovery()) .Times(1) .WillOnce(Return(ZE_RESULT_SUCCESS)); EXPECT_CALL(*Mock::g_mockApi, MockOpenAdapterGroup(_)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<0>(&mockAdapterGroup), Return(TCompletionCode::CC_OK))); EXPECT_CALL(*mockMetricEnumeration, getMetricsAdapter()) .Times(1) .WillOnce(Return(&mockAdapter)); EXPECT_CALL(mockAdapter, OpenMetricsDevice(_)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<0>(&mockDevice), Return(TCompletionCode::CC_OK))); EXPECT_CALL(mockAdapter, CloseMetricsDevice(_)) .Times(1) .WillOnce(Return(TCompletionCode::CC_OK)); EXPECT_CALL(mockDevice, GetParams()) .Times(1) .WillOnce(Return(&metricsDeviceParams)); EXPECT_CALL(mockAdapterGroup, Close()) .Times(1) .WillOnce(Return(TCompletionCode::CC_OK)); // Use first sub device. device->getMetricDeviceContext().setSubDeviceIndex(0); mockMetricsLibrary->initializationState = ZE_RESULT_SUCCESS; EXPECT_EQ(metricSource.loadDependencies(), true); EXPECT_EQ(metricSource.isInitialized(), true); EXPECT_EQ(mockMetricEnumeration->baseIsInitialized(), true); EXPECT_EQ(mockMetricEnumeration->cleanupMetricsDiscovery(), ZE_RESULT_SUCCESS); } TEST_F(MetricEnumerationTest, givenSubDeviceWhenLoadDependenciesIsCalledThenOpenMetricsSubDeviceWillBeCalled) { auto &metricSource = device->getMetricDeviceContext().getMetricSource(); Mock mockAdapterGroup; Mock mockAdapter; Mock mockDevice; EXPECT_CALL(*mockMetricsLibrary, load()) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricEnumeration, loadMetricsDiscovery()) .Times(1) .WillOnce(Return(ZE_RESULT_SUCCESS)); EXPECT_CALL(*Mock::g_mockApi, MockOpenAdapterGroup(_)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<0>(&mockAdapterGroup), Return(TCompletionCode::CC_OK))); EXPECT_CALL(*mockMetricEnumeration, getMetricsAdapter()) .Times(1) .WillOnce(Return(&mockAdapter)); EXPECT_CALL(mockAdapter, OpenMetricsSubDevice(_, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<1>(&mockDevice), Return(TCompletionCode::CC_OK))); EXPECT_CALL(mockAdapter, CloseMetricsDevice(_)) .Times(1) .WillOnce(Return(TCompletionCode::CC_OK)); EXPECT_CALL(mockDevice, GetParams()) .Times(1) .WillOnce(Return(&metricsDeviceParams)); EXPECT_CALL(mockAdapterGroup, Close()) .Times(1) .WillOnce(Return(TCompletionCode::CC_OK)); // Use second sub device. device->getMetricDeviceContext().setSubDeviceIndex(1); mockMetricsLibrary->initializationState = ZE_RESULT_SUCCESS; EXPECT_EQ(metricSource.loadDependencies(), true); EXPECT_EQ(metricSource.isInitialized(), true); EXPECT_EQ(mockMetricEnumeration->baseIsInitialized(), true); EXPECT_EQ(mockMetricEnumeration->cleanupMetricsDiscovery(), ZE_RESULT_SUCCESS); } TEST_F(MetricEnumerationTest, givenSubDeviceWhenLoadDependenciesIsCalledThenOpenMetricsSubDeviceWillBeCalledWithoutSuccess) { auto &metricSource = device->getMetricDeviceContext().getMetricSource(); Mock mockAdapterGroup; Mock mockAdapter; Mock mockDevice; EXPECT_CALL(*mockMetricsLibrary, load()) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricEnumeration, loadMetricsDiscovery()) .Times(1) .WillOnce(Return(ZE_RESULT_SUCCESS)); EXPECT_CALL(*Mock::g_mockApi, MockOpenAdapterGroup(_)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<0>(&mockAdapterGroup), Return(TCompletionCode::CC_OK))); EXPECT_CALL(*mockMetricEnumeration, getMetricsAdapter()) .Times(1) .WillOnce(Return(&mockAdapter)); EXPECT_CALL(mockAdapter, OpenMetricsSubDevice(_, _)) .Times(1) .WillOnce(Return(TCompletionCode::CC_OK)); EXPECT_CALL(mockAdapter, CloseMetricsDevice(_)) .Times(0); EXPECT_CALL(mockDevice, GetParams()) .Times(0); EXPECT_CALL(mockAdapterGroup, Close()) .Times(1) .WillOnce(Return(TCompletionCode::CC_OK)); // Use second sub device. device->getMetricDeviceContext().setSubDeviceIndex(1); mockMetricsLibrary->initializationState = ZE_RESULT_SUCCESS; EXPECT_EQ(metricSource.loadDependencies(), true); EXPECT_EQ(metricSource.isInitialized(), true); EXPECT_EQ(mockMetricEnumeration->baseIsInitialized(), false); } class MetricEnumerationTestMetricTypes : public MetricEnumerationTest, public ::testing::WithParamInterface { public: MetricsDiscovery::TMetricType metricType; MetricEnumerationTestMetricTypes() { metricType = GetParam(); } ~MetricEnumerationTestMetricTypes() override {} }; TEST_P(MetricEnumerationTestMetricTypes, givenValidMetricTypesWhenSetAndGetIsSameThenReturnSuccess) { // Metrics Discovery device. metricsDeviceParams.ConcurrentGroupsCount = 1; // Metrics Discovery concurrent group. Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; // Metrics Discovery:: metric set. Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; metricsSetParams.MetricsCount = 1; // Metrics Discovery:: metric. Mock metric; TMetricParams_1_0 metricParams = {}; metricParams.SymbolName = "Metric symbol name"; metricParams.ShortName = "Metric short name"; metricParams.LongName = "Metric long name"; metricParams.ResultType = MetricsDiscovery::TMetricResultType::RESULT_UINT64; metricParams.MetricType = metricType; zet_metric_properties_t metricProperties = {}; // One api: metric group handle. zet_metric_group_handle_t metricGroupHandle = {}; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(1) .WillOnce(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(1) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, GetMetric(_)) .Times(1) .WillOnce(Return(&metric)); EXPECT_CALL(metric, GetParams()) .Times(1) .WillOnce(Return(&metricParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); // Metric group count. uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); // Metric group handle. EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); // Obtain metric. uint32_t metricCount = 0; zet_metric_handle_t metricHandle = {}; EXPECT_EQ(zetMetricGet(metricGroupHandle, &metricCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricCount, 1u); EXPECT_EQ(zetMetricGet(metricGroupHandle, &metricCount, &metricHandle), ZE_RESULT_SUCCESS); EXPECT_NE(metricHandle, nullptr); // Obtain metric params. EXPECT_EQ(zetMetricGetProperties(metricHandle, &metricProperties), ZE_RESULT_SUCCESS); EXPECT_EQ(strcmp(metricProperties.name, metricParams.SymbolName), 0); EXPECT_EQ(strcmp(metricProperties.description, metricParams.LongName), 0); EXPECT_EQ(metricProperties.metricType, static_cast(metricType)); EXPECT_EQ(metricProperties.resultType, ZET_VALUE_TYPE_UINT64); } std::vector getListOfMetricTypes() { std::vector metricTypes = {}; for (int type = MetricsDiscovery::TMetricType::METRIC_TYPE_DURATION; type < MetricsDiscovery::TMetricType::METRIC_TYPE_LAST; type++) { metricTypes.push_back(static_cast(type)); } return metricTypes; } INSTANTIATE_TEST_CASE_P(parameterizedMetricEnumerationTestMetricTypes, MetricEnumerationTestMetricTypes, ::testing::ValuesIn(getListOfMetricTypes())); class MetricEnumerationTestInformationTypes : public MetricEnumerationTest, public ::testing::WithParamInterface { public: MetricsDiscovery::TInformationType infoType; std::unordered_map validate; MetricEnumerationTestInformationTypes() { infoType = GetParam(); validate[MetricsDiscovery::TInformationType::INFORMATION_TYPE_REPORT_REASON] = ZET_METRIC_TYPE_EVENT; validate[MetricsDiscovery::TInformationType::INFORMATION_TYPE_VALUE] = ZET_METRIC_TYPE_RAW; validate[MetricsDiscovery::TInformationType::INFORMATION_TYPE_FLAG] = ZET_METRIC_TYPE_FLAG; validate[MetricsDiscovery::TInformationType::INFORMATION_TYPE_TIMESTAMP] = ZET_METRIC_TYPE_TIMESTAMP; validate[MetricsDiscovery::TInformationType::INFORMATION_TYPE_CONTEXT_ID_TAG] = ZET_METRIC_TYPE_RAW; validate[MetricsDiscovery::TInformationType::INFORMATION_TYPE_SAMPLE_PHASE] = ZET_METRIC_TYPE_RAW; validate[MetricsDiscovery::TInformationType::INFORMATION_TYPE_GPU_NODE] = ZET_METRIC_TYPE_RAW; } ~MetricEnumerationTestInformationTypes() override {} }; TEST_P(MetricEnumerationTestInformationTypes, givenValidInformationTypesWhenSetAndGetIsSameThenReturnSuccess) { // Metrics Discovery device. metricsDeviceParams.ConcurrentGroupsCount = 1; // Metrics Discovery concurrent group. Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; // Metrics Discovery:: metric set. Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; metricsSetParams.MetricsCount = 0; metricsSetParams.InformationCount = 1; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; // Metrics Discovery:: information. Mock information; MetricsDiscovery::TInformationParams_1_0 sourceInformationParams = {}; sourceInformationParams.SymbolName = "Info symbol name"; sourceInformationParams.LongName = "Info long name"; sourceInformationParams.GroupName = "Info group name"; sourceInformationParams.InfoUnits = "Info Units"; sourceInformationParams.InfoType = infoType; zet_metric_properties_t metricProperties = {}; // One api: metric group handle. zet_metric_group_handle_t metricGroupHandle = {}; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(1) .WillOnce(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(1) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, GetInformation(_)) .Times(1) .WillOnce(Return(&information)); EXPECT_CALL(information, GetParams()) .Times(1) .WillOnce(Return(&sourceInformationParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); // Metric group count. uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); // Metric group handle. EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); // Obtain information. uint32_t metricCount = 0; zet_metric_handle_t infoHandle = {}; EXPECT_EQ(zetMetricGet(metricGroupHandle, &metricCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricCount, 1u); EXPECT_EQ(zetMetricGet(metricGroupHandle, &metricCount, &infoHandle), ZE_RESULT_SUCCESS); EXPECT_NE(infoHandle, nullptr); // Obtain information params. EXPECT_EQ(zetMetricGetProperties(infoHandle, &metricProperties), ZE_RESULT_SUCCESS); EXPECT_EQ(strcmp(metricProperties.name, sourceInformationParams.SymbolName), 0); EXPECT_EQ(strcmp(metricProperties.description, sourceInformationParams.LongName), 0); EXPECT_EQ(strcmp(metricProperties.component, sourceInformationParams.GroupName), 0); EXPECT_EQ(strcmp(metricProperties.resultUnits, sourceInformationParams.InfoUnits), 0); EXPECT_EQ(metricProperties.metricType, validate[infoType]); } std::vector getListOfInfoTypes() { std::vector infoTypes = {}; for (int type = MetricsDiscovery::TInformationType::INFORMATION_TYPE_REPORT_REASON; type < MetricsDiscovery::TInformationType::INFORMATION_TYPE_LAST; type++) { infoTypes.push_back(static_cast(type)); } return infoTypes; } INSTANTIATE_TEST_CASE_P(parameterizedMetricEnumerationTestInformationTypes, MetricEnumerationTestInformationTypes, ::testing::ValuesIn(getListOfInfoTypes())); TEST_F(MetricEnumerationTest, givenMetricSetWhenActivateIsCalledActivateReturnsTrue) { Mock metricsSet; MetricGroupImpTest metricGroup; metricGroup.pReferenceMetricSet = &metricsSet; EXPECT_CALL(metricsSet, Activate()) .WillRepeatedly(Return(MetricsDiscovery::CC_OK)); EXPECT_EQ(metricGroup.activateMetricSet(), true); } TEST_F(MetricEnumerationTest, givenMetricSetWhenActivateIsCalledActivateReturnsFalse) { Mock metricsSet; MetricGroupImpTest metricGroup; metricGroup.pReferenceMetricSet = &metricsSet; EXPECT_CALL(metricsSet, Activate()) .WillRepeatedly(Return(MetricsDiscovery::CC_ERROR_GENERAL)); EXPECT_EQ(metricGroup.activateMetricSet(), false); } TEST_F(MetricEnumerationTest, givenMetricSetWhenDeactivateIsCalledDeactivateReturnsTrue) { Mock metricsSet; MetricGroupImpTest metricGroup; metricGroup.pReferenceMetricSet = &metricsSet; EXPECT_CALL(metricsSet, Deactivate()) .WillRepeatedly(Return(MetricsDiscovery::CC_OK)); EXPECT_EQ(metricGroup.deactivateMetricSet(), true); } TEST_F(MetricEnumerationTest, givenMetricSetWhenDeactivateIsCalledDeactivateReturnsFalse) { Mock metricsSet; MetricGroupImpTest metricGroup; metricGroup.pReferenceMetricSet = &metricsSet; EXPECT_CALL(metricsSet, Deactivate()) .WillRepeatedly(Return(MetricsDiscovery::CC_ERROR_GENERAL)); EXPECT_EQ(metricGroup.deactivateMetricSet(), false); } TEST_F(MetricEnumerationTest, givenMetricSetWhenWaitForReportsIsCalledWaitForReportsReturnsSuccess) { Mock concurrentGroup; MetricGroupImpTest metricGroup; metricGroup.pReferenceConcurrentGroup = &concurrentGroup; EXPECT_CALL(concurrentGroup, WaitForReports(_)) .WillRepeatedly(Return(MetricsDiscovery::TCompletionCode::CC_OK)); uint32_t timeout = 1; EXPECT_EQ(metricGroup.waitForReports(timeout), ZE_RESULT_SUCCESS); } TEST_F(MetricEnumerationTest, givenMetricSetWhenWaitForReportsIsCalledWaitForReportsReturnsNotReady) { Mock concurrentGroup; MetricGroupImpTest metricGroup; metricGroup.pReferenceConcurrentGroup = &concurrentGroup; EXPECT_CALL(concurrentGroup, WaitForReports(_)) .WillRepeatedly(Return(MetricsDiscovery::TCompletionCode::CC_ERROR_GENERAL)); uint32_t timeout = 1; EXPECT_EQ(metricGroup.waitForReports(timeout), ZE_RESULT_NOT_READY); } TEST_F(MetricEnumerationTest, givenTimeAndBufferSizeWhenOpenIoStreamReturnsErrorThenTheMetricGroupOpenIoStreamReturnsErrorUnknown) { Mock concurrentGroup; MetricGroupImpTest metricGroup; metricGroup.pReferenceConcurrentGroup = &concurrentGroup; EXPECT_CALL(concurrentGroup, OpenIoStream(_, _, _, _)) .WillRepeatedly(Return(MetricsDiscovery::CC_ERROR_GENERAL)); uint32_t timerPeriodNs = 1; uint32_t oaBufferSize = 100; EXPECT_EQ(metricGroup.openIoStream(timerPeriodNs, oaBufferSize), ZE_RESULT_ERROR_UNKNOWN); } TEST_F(MetricEnumerationTest, givenReportCountAndReportDataWhenReadIoStreamReturnsOkTheMetricGroupReadIoStreamReturnsSuccess) { Mock concurrentGroup; MetricGroupImpTest metricGroup; metricGroup.pReferenceConcurrentGroup = &concurrentGroup; EXPECT_CALL(concurrentGroup, ReadIoStream(_, _, _)) .WillOnce(Return(MetricsDiscovery::CC_OK)); uint32_t reportCount = 1; uint8_t reportData = 0; EXPECT_EQ(metricGroup.readIoStream(reportCount, reportData), ZE_RESULT_SUCCESS); } TEST_F(MetricEnumerationTest, givenReportCountAndReportDataWhenReadIoStreamReturnsPendingTheMetricGroupReadIoStreamReturnsSuccess) { Mock concurrentGroup; MetricGroupImpTest metricGroup; metricGroup.pReferenceConcurrentGroup = &concurrentGroup; EXPECT_CALL(concurrentGroup, ReadIoStream(_, _, _)) .WillOnce(Return(MetricsDiscovery::CC_READ_PENDING)); uint32_t reportCount = 1; uint8_t reportData = 0; EXPECT_EQ(metricGroup.readIoStream(reportCount, reportData), ZE_RESULT_SUCCESS); } TEST_F(MetricEnumerationTest, givenReportCountAndReportDataWhenReadIoStreamReturnsErrorThenMetrigGroupReadIoStreamReturnsError) { Mock concurrentGroup; MetricGroupImpTest metricGroup; metricGroup.pReferenceConcurrentGroup = &concurrentGroup; EXPECT_CALL(concurrentGroup, ReadIoStream(_, _, _)) .WillOnce(Return(MetricsDiscovery::CC_ERROR_GENERAL)); uint32_t reportCount = 1; uint8_t reportData = 0; EXPECT_EQ(metricGroup.readIoStream(reportCount, reportData), ZE_RESULT_ERROR_UNKNOWN); } TEST_F(MetricEnumerationTest, givenTimeAndBufferSizeWhenCloseIoStreamIsCalledCloseAndFailThenIoStreamReturnsErrorUnknown) { Mock concurrentGroup; MetricGroupImpTest metricGroup; metricGroup.pReferenceConcurrentGroup = &concurrentGroup; EXPECT_CALL(concurrentGroup, CloseIoStream()) .WillRepeatedly(Return(MetricsDiscovery::CC_ERROR_GENERAL)); EXPECT_EQ(metricGroup.closeIoStream(), ZE_RESULT_ERROR_UNKNOWN); } TEST_F(MetricEnumerationTest, givenTTypedValueWhenCopyValueIsCalledReturnsFilledZetTypedValue) { MetricsDiscovery::TTypedValue_1_0 source = {}; zet_typed_value_t destination = {}; MetricGroupImpTest metricGroup = {}; for (int vType = MetricsDiscovery::VALUE_TYPE_UINT32; vType < MetricsDiscovery::VALUE_TYPE_LAST; vType++) { source.ValueType = static_cast(vType); if (vType != MetricsDiscovery::VALUE_TYPE_BOOL) source.ValueUInt64 = 0xFF; else source.ValueBool = true; metricGroup.copyValue(const_cast(source), destination); switch (vType) { case MetricsDiscovery::VALUE_TYPE_UINT32: EXPECT_EQ(destination.type, ZET_VALUE_TYPE_UINT32); EXPECT_EQ(destination.value.ui32, source.ValueUInt32); break; case MetricsDiscovery::VALUE_TYPE_UINT64: EXPECT_EQ(destination.type, ZET_VALUE_TYPE_UINT64); EXPECT_EQ(destination.value.ui64, source.ValueUInt64); break; case MetricsDiscovery::VALUE_TYPE_FLOAT: EXPECT_EQ(destination.type, ZET_VALUE_TYPE_FLOAT32); EXPECT_EQ(destination.value.fp32, source.ValueFloat); break; case MetricsDiscovery::VALUE_TYPE_BOOL: EXPECT_EQ(destination.type, ZET_VALUE_TYPE_BOOL8); EXPECT_EQ(destination.value.b8, source.ValueBool); break; default: EXPECT_EQ(destination.type, ZET_VALUE_TYPE_UINT64); EXPECT_EQ(destination.value.ui64, static_cast(0)); break; } } } } // namespace ult } // namespace L0 test_metric_oa_enumeration_2.cpp000066400000000000000000000723101422164147700354510ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/metrics/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/device/device_imp.h" #include "level_zero/core/test/unit_tests/mocks/mock_driver.h" #include "level_zero/tools/source/metrics/metric_oa_source.h" #include "level_zero/tools/test/unit_tests/sources/metrics/mock_metric_oa.h" #include "gmock/gmock.h" #include "gtest/gtest.h" using ::testing::_; using ::testing::Return; namespace L0 { namespace ult { using MetricEnumerationMultiDeviceTest = Test; TEST_F(MetricEnumerationMultiDeviceTest, givenRootDeviceWhenLoadDependenciesIsCalledThenOpenMetricsSubDeviceWillBeCalled) { // Use first root device. auto &metricSource = devices[0]->getMetricDeviceContext().getMetricSource(); auto &deviceImp = *static_cast(devices[0]); const uint32_t subDeviceCount = static_cast(deviceImp.subDevices.size()); Mock mockAdapterGroup; Mock mockAdapter; Mock mockDevice; EXPECT_CALL(*mockMetricsLibrary, load()) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricEnumeration, loadMetricsDiscovery()) .Times(1) .WillOnce(Return(ZE_RESULT_SUCCESS)); EXPECT_CALL(*Mock::g_mockApi, MockOpenAdapterGroup(_)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<0>(&mockAdapterGroup), Return(TCompletionCode::CC_OK))); EXPECT_CALL(*mockMetricEnumeration, getMetricsAdapter()) .Times(1) .WillOnce(Return(&mockAdapter)); EXPECT_CALL(mockAdapter, OpenMetricsSubDevice(_, _)) .Times(subDeviceCount) .WillRepeatedly(DoAll(::testing::SetArgPointee<1>(&mockDevice), Return(TCompletionCode::CC_OK))); EXPECT_CALL(mockAdapter, CloseMetricsDevice(_)) .Times(subDeviceCount) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(mockDevice, GetParams()) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(mockAdapterGroup, Close()) .Times(1) .WillOnce(Return(TCompletionCode::CC_OK)); // Use root device. devices[0]->getMetricDeviceContext().setSubDeviceIndex(0); mockMetricsLibrary->initializationState = ZE_RESULT_SUCCESS; EXPECT_EQ(metricSource.loadDependencies(), true); EXPECT_EQ(metricSource.isInitialized(), true); EXPECT_EQ(mockMetricEnumeration->baseIsInitialized(), true); EXPECT_EQ(mockMetricEnumeration->cleanupMetricsDiscovery(), ZE_RESULT_SUCCESS); } TEST_F(MetricEnumerationMultiDeviceTest, givenRootDeviceWhenLoadDependenciesIsCalledThenOpenMetricsSubDeviceWillBeCalledWithoutSuccess) { // Use first root device. auto &metricSource = devices[0]->getMetricDeviceContext().getMetricSource(); Mock mockAdapterGroup; Mock mockAdapter; Mock mockDevice; EXPECT_CALL(*mockMetricsLibrary, load()) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricEnumeration, loadMetricsDiscovery()) .Times(1) .WillOnce(Return(ZE_RESULT_SUCCESS)); EXPECT_CALL(*Mock::g_mockApi, MockOpenAdapterGroup(_)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<0>(&mockAdapterGroup), Return(TCompletionCode::CC_OK))); EXPECT_CALL(*mockMetricEnumeration, getMetricsAdapter()) .Times(1) .WillOnce(Return(&mockAdapter)); EXPECT_CALL(mockAdapter, OpenMetricsSubDevice(_, _)) .Times(1) .WillOnce(Return(TCompletionCode::CC_ERROR_GENERAL)); EXPECT_CALL(mockAdapter, CloseMetricsDevice(_)) .Times(0); EXPECT_CALL(mockDevice, GetParams()) .Times(0); EXPECT_CALL(mockAdapterGroup, Close()) .Times(1) .WillOnce(Return(TCompletionCode::CC_OK)); // Use root device. devices[0]->getMetricDeviceContext().setSubDeviceIndex(0); mockMetricsLibrary->initializationState = ZE_RESULT_SUCCESS; EXPECT_EQ(metricSource.loadDependencies(), true); EXPECT_EQ(metricSource.isInitialized(), true); EXPECT_EQ(mockMetricEnumeration->baseIsInitialized(), false); } TEST_F(MetricEnumerationMultiDeviceTest, givenIncorrectMetricsDiscoveryInterfaceVersionWhenZetGetMetricGroupIsCalledThenReturnsFail) { metricsDeviceParams.Version.MajorNumber = 0; metricsDeviceParams.Version.MinorNumber = 1; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .Times(1) .WillOnce(Return(&metricsDeviceParams)); uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(devices[0]->toHandle(), &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 0u); } TEST_F(MetricEnumerationMultiDeviceTest, givenValidArgumentsWhenZetMetricGetPropertiestIsCalledThenReturnSuccess) { // Use first root device. auto &deviceImp = *static_cast(devices[0]); const uint32_t subDeviceCount = static_cast(deviceImp.subDevices.size()); // Metrics Discovery device. metricsDeviceParams.ConcurrentGroupsCount = 1; // Metrics Discovery concurrent group. Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; // Metrics Discovery:: metric set. Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; metricsSetParams.MetricsCount = 1; // Metrics Discovery:: metric. Mock metric; TMetricParams_1_0 metricParams = {}; metricParams.SymbolName = "Metric symbol name"; metricParams.ShortName = "Metric short name"; metricParams.LongName = "Metric long name"; metricParams.ResultType = MetricsDiscovery::TMetricResultType::RESULT_UINT64; metricParams.MetricType = MetricsDiscovery::TMetricType::METRIC_TYPE_RATIO; zet_metric_properties_t metricProperties = {}; // One api: metric group handle. zet_metric_group_handle_t metricGroupHandle = {}; zet_metric_group_properties_t metricGroupProperties = {}; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, GetMetric(_)) .Times(subDeviceCount) .WillRepeatedly(Return(&metric)); EXPECT_CALL(metric, GetParams()) .Times(subDeviceCount) .WillRepeatedly(Return(&metricParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); // Metric group count. uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(devices[0]->toHandle(), &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); // Metric group handle. EXPECT_EQ(zetMetricGroupGet(devices[0]->toHandle(), &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); // Metric group properties. EXPECT_EQ(zetMetricGroupGetProperties(metricGroupHandle, &metricGroupProperties), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupProperties.domain, 0u); EXPECT_EQ(metricGroupProperties.samplingType, ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED); EXPECT_EQ(metricGroupProperties.metricCount, metricsSetParams.MetricsCount); EXPECT_EQ(strcmp(metricGroupProperties.description, metricsSetParams.ShortName), 0); EXPECT_EQ(strcmp(metricGroupProperties.name, metricsSetParams.SymbolName), 0); // Obtain metric. uint32_t metricCount = 0; zet_metric_handle_t metricHandle = {}; EXPECT_EQ(zetMetricGet(metricGroupHandle, &metricCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricCount, 1u); EXPECT_EQ(zetMetricGet(metricGroupHandle, &metricCount, &metricHandle), ZE_RESULT_SUCCESS); EXPECT_NE(metricHandle, nullptr); // Obtain metric params. EXPECT_EQ(zetMetricGetProperties(metricHandle, &metricProperties), ZE_RESULT_SUCCESS); EXPECT_EQ(strcmp(metricProperties.name, metricParams.SymbolName), 0); EXPECT_EQ(strcmp(metricProperties.description, metricParams.LongName), 0); EXPECT_EQ(metricProperties.metricType, ZET_METRIC_TYPE_RATIO); EXPECT_EQ(metricProperties.resultType, ZET_VALUE_TYPE_UINT64); } TEST_F(MetricEnumerationMultiDeviceTest, givenCorrectRawDataHeaderWhenZetMetricGroupCalculateMetricValuesExpIsCalledTwiceThenReturnsSuccess) { auto &deviceImp = *static_cast(devices[0]); const uint32_t subDeviceCount = static_cast(deviceImp.subDevices.size()); metricsDeviceParams.ConcurrentGroupsCount = 1; Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; metricsSetParams.QueryReportSize = 256; metricsSetParams.MetricsCount = 11; Mock metric; MetricsDiscovery::TMetricParams_1_0 metricParams = {}; zet_metric_group_handle_t metricGroupHandle = {}; uint32_t returnedMetricCount = 1; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, GetMetric(_)) .Times(metricsSetParams.MetricsCount * subDeviceCount) .WillRepeatedly(Return(&metric)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metric, GetParams()) .WillRepeatedly(Return(&metricParams)); EXPECT_CALL(metricsSet, CalculateMetrics(_, _, _, _, _, _, _)) .Times(subDeviceCount) .WillRepeatedly(DoAll(::testing::SetArgPointee<4>(returnedMetricCount), Return(TCompletionCode::CC_OK))); // Metric group handles. uint32_t metricGroupCount = 1; EXPECT_EQ(zetMetricGroupGet(devices[0]->toHandle(), &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); // Raw results. const size_t rawResultsSize = 560; uint8_t rawResults[rawResultsSize] = {}; MetricGroupCalculateHeader *pRawHeader = reinterpret_cast(rawResults); pRawHeader->magic = MetricGroupCalculateHeader::magicValue; pRawHeader->dataCount = subDeviceCount; pRawHeader->rawDataOffsets = sizeof(MetricGroupCalculateHeader); pRawHeader->rawDataSizes = pRawHeader->rawDataOffsets + sizeof(uint32_t) * pRawHeader->dataCount; pRawHeader->rawDataOffset = pRawHeader->rawDataSizes + sizeof(uint32_t) * pRawHeader->dataCount; uint32_t *pRawDataOffsets = reinterpret_cast(rawResults + pRawHeader->rawDataOffsets); uint32_t *pRawDataSizes = reinterpret_cast(rawResults + pRawHeader->rawDataSizes); pRawDataOffsets[0] = 0; pRawDataOffsets[1] = metricsSetParams.QueryReportSize; pRawDataSizes[0] = metricsSetParams.QueryReportSize; pRawDataSizes[1] = metricsSetParams.QueryReportSize; // Valid raw buffer. uint32_t dataCount = 0; uint32_t totalMetricCount = 0; EXPECT_NE(metricsSetParams.QueryReportSize, rawResultsSize); EXPECT_EQ(zetMetricGroupCalculateMultipleMetricValuesExp(metricGroupHandle, ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, rawResultsSize, rawResults, &dataCount, &totalMetricCount, nullptr, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(dataCount, subDeviceCount); EXPECT_EQ(totalMetricCount, subDeviceCount * metricsSetParams.MetricsCount); // Copy calculated metrics. std::vector metricCounts(dataCount); std::vector caculatedRawResults(totalMetricCount); EXPECT_EQ(zetMetricGroupCalculateMultipleMetricValuesExp(metricGroupHandle, ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, rawResultsSize, rawResults, &dataCount, &totalMetricCount, metricCounts.data(), caculatedRawResults.data()), ZE_RESULT_SUCCESS); EXPECT_EQ(metricCounts[0], metricsSetParams.MetricsCount); EXPECT_EQ(metricCounts[1], metricsSetParams.MetricsCount); } TEST_F(MetricEnumerationMultiDeviceTest, givenInvalidDataCountAndTotalMetricCountWhenZetMetricGroupCalculateMetricValuesExpIsCalledThenReturnsCorrectDataCountAndTotalMetricCount) { auto &deviceImp = *static_cast(devices[0]); const uint32_t subDeviceCount = static_cast(deviceImp.subDevices.size()); metricsDeviceParams.ConcurrentGroupsCount = 1; Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; metricsSetParams.QueryReportSize = 256; metricsSetParams.MetricsCount = 11; Mock metric; MetricsDiscovery::TMetricParams_1_0 metricParams = {}; zet_metric_group_handle_t metricGroupHandle = {}; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, GetMetric(_)) .Times(metricsSetParams.MetricsCount * subDeviceCount) .WillRepeatedly(Return(&metric)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metric, GetParams()) .WillRepeatedly(Return(&metricParams)); // Metric group handles. uint32_t metricGroupCount = 1; EXPECT_EQ(zetMetricGroupGet(devices[0]->toHandle(), &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); // Raw results. const size_t rawResultsSize = 560; uint8_t rawResults[rawResultsSize] = {}; MetricGroupCalculateHeader *pRawHeader = reinterpret_cast(rawResults); pRawHeader->magic = MetricGroupCalculateHeader::magicValue; pRawHeader->dataCount = subDeviceCount; pRawHeader->rawDataOffsets = sizeof(MetricGroupCalculateHeader); pRawHeader->rawDataSizes = pRawHeader->rawDataOffsets + sizeof(uint32_t) * pRawHeader->dataCount; pRawHeader->rawDataOffset = pRawHeader->rawDataSizes + sizeof(uint32_t) * pRawHeader->dataCount; uint32_t *pRawDataOffsets = reinterpret_cast(rawResults + pRawHeader->rawDataOffsets); uint32_t *pRawDataSizes = reinterpret_cast(rawResults + pRawHeader->rawDataSizes); pRawDataOffsets[0] = 0; pRawDataOffsets[1] = metricsSetParams.QueryReportSize; pRawDataSizes[0] = metricsSetParams.QueryReportSize; pRawDataSizes[1] = metricsSetParams.QueryReportSize; // Valid raw buffer. Invalid data count. uint32_t dataCount = 1000; uint32_t totalMetricCount = 0; EXPECT_NE(metricsSetParams.QueryReportSize, rawResultsSize); EXPECT_EQ(zetMetricGroupCalculateMultipleMetricValuesExp(metricGroupHandle, ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, rawResultsSize, rawResults, &dataCount, &totalMetricCount, nullptr, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(subDeviceCount, dataCount); EXPECT_EQ(subDeviceCount * metricsSetParams.MetricsCount, totalMetricCount); // Valid raw buffer. Invalid total metric count. dataCount = 0; totalMetricCount = 1000; EXPECT_NE(metricsSetParams.QueryReportSize, rawResultsSize); EXPECT_EQ(zetMetricGroupCalculateMultipleMetricValuesExp(metricGroupHandle, ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, rawResultsSize, rawResults, &dataCount, &totalMetricCount, nullptr, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(subDeviceCount, dataCount); EXPECT_EQ(subDeviceCount * metricsSetParams.MetricsCount, totalMetricCount); } TEST_F(MetricEnumerationMultiDeviceTest, givenInvalidQueryReportSizeWhenZetMetricGroupCalculateMetricValuesExpIsCalledThenReturnsFail) { auto &deviceImp = *static_cast(devices[0]); const uint32_t subDeviceCount = static_cast(deviceImp.subDevices.size()); metricsDeviceParams.ConcurrentGroupsCount = 1; Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; metricsSetParams.QueryReportSize = 0; metricsSetParams.MetricsCount = 11; Mock metric; MetricsDiscovery::TMetricParams_1_0 metricParams = {}; zet_metric_group_handle_t metricGroupHandle = {}; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, GetMetric(_)) .Times(metricsSetParams.MetricsCount * subDeviceCount) .WillRepeatedly(Return(&metric)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metric, GetParams()) .WillRepeatedly(Return(&metricParams)); // Metric group handles. uint32_t metricGroupCount = 1; EXPECT_EQ(zetMetricGroupGet(devices[0]->toHandle(), &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); // Raw results. const size_t rawResultsSize = 560; uint8_t rawResults[rawResultsSize] = {}; MetricGroupCalculateHeader *pRawHeader = reinterpret_cast(rawResults); pRawHeader->magic = MetricGroupCalculateHeader::magicValue; pRawHeader->dataCount = subDeviceCount; pRawHeader->rawDataOffsets = sizeof(MetricGroupCalculateHeader); pRawHeader->rawDataSizes = pRawHeader->rawDataOffsets + sizeof(uint32_t) * pRawHeader->dataCount; pRawHeader->rawDataOffset = pRawHeader->rawDataSizes + sizeof(uint32_t) * pRawHeader->dataCount; uint32_t *pRawDataOffsets = reinterpret_cast(rawResults + pRawHeader->rawDataOffsets); uint32_t *pRawDataSizes = reinterpret_cast(rawResults + pRawHeader->rawDataSizes); pRawDataOffsets[0] = 0; pRawDataOffsets[1] = metricsSetParams.QueryReportSize; pRawDataSizes[0] = metricsSetParams.QueryReportSize; pRawDataSizes[1] = metricsSetParams.QueryReportSize; // Valid raw buffer. Invalid query report size. uint32_t dataCount = 0; uint32_t totalMetricCount = 0; EXPECT_NE(metricsSetParams.QueryReportSize, rawResultsSize); EXPECT_EQ(zetMetricGroupCalculateMultipleMetricValuesExp(metricGroupHandle, ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, rawResultsSize, rawResults, &dataCount, &totalMetricCount, nullptr, nullptr), ZE_RESULT_ERROR_UNKNOWN); EXPECT_EQ(dataCount, 0u); EXPECT_EQ(totalMetricCount, 0u); } TEST_F(MetricEnumerationMultiDeviceTest, givenErrorGeneralOnCalculateMetricsWhenZetMetricGroupCalculateMetricValuesExpIsCalledThenReturnsFail) { auto &deviceImp = *static_cast(devices[0]); const uint32_t subDeviceCount = static_cast(deviceImp.subDevices.size()); metricsDeviceParams.ConcurrentGroupsCount = 1; Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; metricsSetParams.QueryReportSize = 256; metricsSetParams.MetricsCount = 11; Mock metric; MetricsDiscovery::TMetricParams_1_0 metricParams = {}; zet_metric_group_handle_t metricGroupHandle = {}; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, GetMetric(_)) .Times(metricsSetParams.MetricsCount * subDeviceCount) .WillRepeatedly(Return(&metric)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metric, GetParams()) .WillRepeatedly(Return(&metricParams)); EXPECT_CALL(metricsSet, CalculateMetrics(_, _, _, _, _, _, _)) .Times(1) .WillOnce(Return(TCompletionCode::CC_ERROR_GENERAL)); // Metric group handles. uint32_t metricGroupCount = 1; EXPECT_EQ(zetMetricGroupGet(devices[0]->toHandle(), &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); // Raw results. const size_t rawResultsSize = 560; uint8_t rawResults[rawResultsSize] = {}; MetricGroupCalculateHeader *pRawHeader = reinterpret_cast(rawResults); pRawHeader->magic = MetricGroupCalculateHeader::magicValue; pRawHeader->dataCount = subDeviceCount; pRawHeader->rawDataOffsets = sizeof(MetricGroupCalculateHeader); pRawHeader->rawDataSizes = pRawHeader->rawDataOffsets + sizeof(uint32_t) * pRawHeader->dataCount; pRawHeader->rawDataOffset = pRawHeader->rawDataSizes + sizeof(uint32_t) * pRawHeader->dataCount; uint32_t *pRawDataOffsets = reinterpret_cast(rawResults + pRawHeader->rawDataOffsets); uint32_t *pRawDataSizes = reinterpret_cast(rawResults + pRawHeader->rawDataSizes); pRawDataOffsets[0] = 0; pRawDataOffsets[1] = metricsSetParams.QueryReportSize; pRawDataSizes[0] = metricsSetParams.QueryReportSize; pRawDataSizes[1] = metricsSetParams.QueryReportSize; // Valid raw buffer. uint32_t dataCount = 0; uint32_t totalMetricCount = 0; EXPECT_NE(metricsSetParams.QueryReportSize, rawResultsSize); EXPECT_EQ(zetMetricGroupCalculateMultipleMetricValuesExp(metricGroupHandle, ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, rawResultsSize, rawResults, &dataCount, &totalMetricCount, nullptr, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(dataCount, subDeviceCount); EXPECT_EQ(totalMetricCount, subDeviceCount * metricsSetParams.MetricsCount); // Copy calculated metrics. CalculateMetrics returns CC_ERROR_GENERAL for first sub device. std::vector metricCounts(dataCount); std::vector caculatedRawResults(totalMetricCount); EXPECT_EQ(zetMetricGroupCalculateMultipleMetricValuesExp(metricGroupHandle, ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, rawResultsSize, rawResults, &dataCount, &totalMetricCount, metricCounts.data(), caculatedRawResults.data()), ZE_RESULT_ERROR_UNKNOWN); EXPECT_EQ(metricCounts[0], 0u); EXPECT_EQ(metricCounts[1], 0u); } TEST_F(MetricEnumerationMultiDeviceTest, givenCorrectRawDataHeaderWhenZetMetricGroupCalculateMetricValuesIsCalledThenReturnsFail) { auto &deviceImp = *static_cast(devices[0]); const uint32_t subDeviceCount = static_cast(deviceImp.subDevices.size()); metricsDeviceParams.ConcurrentGroupsCount = 1; Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; metricsSetParams.QueryReportSize = 256; metricsSetParams.MetricsCount = 11; Mock metric; MetricsDiscovery::TMetricParams_1_0 metricParams = {}; zet_metric_group_handle_t metricGroupHandle = {}; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, GetMetric(_)) .Times(metricsSetParams.MetricsCount * subDeviceCount) .WillRepeatedly(Return(&metric)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metric, GetParams()) .WillRepeatedly(Return(&metricParams)); // Metric group handles. uint32_t metricGroupCount = 1; EXPECT_EQ(zetMetricGroupGet(devices[0]->toHandle(), &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); // Raw results. const size_t rawResultsSize = 560; uint8_t rawResults[rawResultsSize] = {}; MetricGroupCalculateHeader *pRawHeader = reinterpret_cast(rawResults); pRawHeader->magic = MetricGroupCalculateHeader::magicValue; pRawHeader->dataCount = subDeviceCount; pRawHeader->rawDataOffsets = sizeof(MetricGroupCalculateHeader); pRawHeader->rawDataSizes = pRawHeader->rawDataOffsets + sizeof(uint32_t) * pRawHeader->dataCount; pRawHeader->rawDataOffset = pRawHeader->rawDataSizes + sizeof(uint32_t) * pRawHeader->dataCount; uint32_t *pRawDataOffsets = reinterpret_cast(rawResults + pRawHeader->rawDataOffsets); uint32_t *pRawDataSizes = reinterpret_cast(rawResults + pRawHeader->rawDataSizes); pRawDataOffsets[0] = 0; pRawDataOffsets[1] = metricsSetParams.QueryReportSize; pRawDataSizes[0] = metricsSetParams.QueryReportSize; pRawDataSizes[1] = metricsSetParams.QueryReportSize; // Valid raw buffer for zetMetricGroupCalculateMultipleMetricValuesExp. uint32_t metricCount = 0; EXPECT_NE(metricsSetParams.QueryReportSize, rawResultsSize); EXPECT_EQ(zetMetricGroupCalculateMetricValues(metricGroupHandle, ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, rawResultsSize, rawResults, &metricCount, nullptr), ZE_RESULT_ERROR_UNKNOWN); EXPECT_EQ(metricCount, 0u); } } // namespace ult } // namespace L0 test_metric_oa_initialization.cpp000066400000000000000000000033151422164147700357300ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/metrics/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/os_library.h" #include "level_zero/core/test/unit_tests/mocks/mock_driver.h" #include "level_zero/tools/source/metrics/metric_oa_source.h" #include "level_zero/tools/test/unit_tests/sources/metrics/mock_metric_oa.h" #include "gmock/gmock.h" #include "gtest/gtest.h" using ::testing::Return; namespace L0 { extern _ze_driver_handle_t *GlobalDriverHandle; namespace ult { class MockOsLibrary : public NEO::OsLibrary { public: MockOsLibrary(const std::string &name, std::string *errorValue) { } void *getProcAddress(const std::string &procName) override { return nullptr; } bool isLoaded() override { return false; } static OsLibrary *load(const std::string &name) { auto ptr = new (std::nothrow) MockOsLibrary(name, nullptr); if (ptr == nullptr) { return nullptr; } return ptr; } }; using MetricInitializationTest = Test; TEST_F(MetricInitializationTest, GivenOaDependenciesAreAvailableThenMetricInitializationIsSuccess) { GlobalDriverHandle = static_cast<_ze_driver_handle_t *>(driverHandle.get()); EXPECT_CALL(*mockMetricEnumeration, loadMetricsDiscovery()) .Times(1) .WillOnce(Return(ZE_RESULT_SUCCESS)); EXPECT_CALL(*mockMetricsLibrary, load()) .Times(1) .WillOnce(Return(true)); OaMetricSourceImp::osLibraryLoadFunction = MockOsLibrary::load; EXPECT_EQ(device->getMetricDeviceContext().enableMetricApi(), ZE_RESULT_SUCCESS); OaMetricSourceImp::osLibraryLoadFunction = NEO::OsLibrary::load; } } // namespace ult } // namespace L0 test_metric_oa_query_pool_1.cpp000066400000000000000000001762371422164147700353350ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/metrics/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/device_factory.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/driver/driver_imp.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_device.h" #include "level_zero/core/test/unit_tests/mocks/mock_driver_handle.h" #include "level_zero/tools/source/metrics/metric_oa_source.h" #include "level_zero/tools/test/unit_tests/sources/metrics/mock_metric_oa.h" #include "gmock/gmock.h" #include "gtest/gtest.h" using ::testing::_; using ::testing::Return; namespace L0 { namespace ult { class MetricQueryPoolTest : public MetricContextFixture, public ::testing::Test { public: void SetUp() override { ze_result_t returnValue = ZE_RESULT_SUCCESS; MetricContextFixture::SetUp(); auto executionEnvironment = new NEO::ExecutionEnvironment(); driverHandle.reset(DriverHandle::create(NEO::DeviceFactory::createDevices(*executionEnvironment), L0EnvVariables{}, &returnValue)); } void TearDown() override { MetricContextFixture::TearDown(); driverHandle.reset(); GlobalDriver = nullptr; } std::unique_ptr driverHandle; }; TEST_F(MetricQueryPoolTest, givenCorrectArgumentsWhenZetMetricQueryPoolCreateIsCalledThenQueryPoolIsObtained) { zet_device_handle_t metricDevice = device->toHandle(); Mock metricGroup; zet_metric_group_properties_t metricGroupProperties = {}; metricGroupProperties.samplingType = ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED; zet_metric_query_pool_handle_t poolHandle = {}; zet_metric_query_pool_desc_t poolDesc = {}; poolDesc.stype = ZET_STRUCTURE_TYPE_METRIC_QUERY_POOL_DESC; poolDesc.count = 1; poolDesc.type = ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE; TypedValue_1_0 value = {}; value.Type = ValueType::Uint32; value.ValueUInt32 = 64; QueryHandle_1_0 queryHandle = {&value}; ContextHandle_1_0 contextHandle = {&value}; EXPECT_CALL(*mockMetricEnumeration, isInitialized()) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, getContextData(_, _)) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, load()) .Times(0); EXPECT_CALL(metricGroup, getProperties(_)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<0>(metricGroupProperties), Return(ZE_RESULT_SUCCESS))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryCreate(_, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<1>(queryHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryDelete(_)) .Times(1) .WillOnce(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockGetParameter(_, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<2>(value), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextCreate(_, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<2>(contextHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextDelete(_)) .Times(1) .WillOnce(Return(StatusCode::Success)); EXPECT_EQ(zetMetricQueryPoolCreate(context->toHandle(), metricDevice, metricGroup.toHandle(), &poolDesc, &poolHandle), ZE_RESULT_SUCCESS); EXPECT_NE(poolHandle, nullptr); EXPECT_EQ(zetMetricQueryPoolDestroy(poolHandle), ZE_RESULT_SUCCESS); } TEST_F(MetricQueryPoolTest, givenIncorrectMetricGroupTypeWhenZetMetricQueryPoolCreateIsCalledThenReturnsFail) { zet_device_handle_t metricDevice = device->toHandle(); Mock metricGroup; zet_metric_group_properties_t metricGroupProperties = {}; metricGroupProperties.samplingType = ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_TIME_BASED; zet_metric_query_pool_handle_t poolHandle = {}; zet_metric_query_pool_desc_t poolDesc = {}; poolDesc.stype = ZET_STRUCTURE_TYPE_METRIC_QUERY_POOL_DESC; poolDesc.count = 1; poolDesc.type = ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE; TypedValue_1_0 value = {}; value.Type = ValueType::Uint32; value.ValueUInt32 = 64; ContextHandle_1_0 contextHandle = {&value}; EXPECT_CALL(*mockMetricEnumeration, isInitialized()) .Times(0); EXPECT_CALL(*mockMetricsLibrary, getContextData(_, _)) .Times(0); EXPECT_CALL(*mockMetricsLibrary, load()) .Times(0); EXPECT_CALL(metricGroup, getProperties(_)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<0>(metricGroupProperties), Return(ZE_RESULT_SUCCESS))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryCreate(_, _)) .Times(0); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryDelete(_)) .Times(0); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockGetParameter(_, _, _)) .Times(0); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextCreate(_, _, _)) .Times(0); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextDelete(_)) .Times(0); EXPECT_EQ(zetMetricQueryPoolCreate(context->toHandle(), metricDevice, metricGroup.toHandle(), &poolDesc, &poolHandle), ZE_RESULT_ERROR_INVALID_ARGUMENT); EXPECT_EQ(poolHandle, nullptr); } TEST_F(MetricQueryPoolTest, givenIncorrectParameterWhenZetMetricQueryPoolCreateIsCalledThenReturnsFail) { zet_device_handle_t metricDevice = device->toHandle(); Mock metricGroup; zet_metric_group_properties_t metricGroupProperties = {}; metricGroupProperties.samplingType = ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED; zet_metric_query_pool_handle_t poolHandle = {}; zet_metric_query_pool_desc_t poolDesc = {}; poolDesc.stype = ZET_STRUCTURE_TYPE_METRIC_QUERY_POOL_DESC; poolDesc.count = 1; poolDesc.type = ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE; TypedValue_1_0 value = {}; value.Type = ValueType::Uint32; value.ValueUInt32 = 64; QueryHandle_1_0 queryHandle = {&value}; ContextHandle_1_0 contextHandle = {&value}; EXPECT_CALL(*mockMetricEnumeration, isInitialized()) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, getContextData(_, _)) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, load()) .Times(0); EXPECT_CALL(metricGroup, getProperties(_)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<0>(metricGroupProperties), Return(ZE_RESULT_SUCCESS))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryCreate(_, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<1>(queryHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryDelete(_)) .Times(1) .WillOnce(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockGetParameter(_, _, _)) .Times(1) .WillOnce(Return(StatusCode::Failed)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextCreate(_, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<2>(contextHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextDelete(_)) .Times(1) .WillOnce(Return(StatusCode::Success)); EXPECT_EQ(zetMetricQueryPoolCreate(context->toHandle(), metricDevice, metricGroup.toHandle(), &poolDesc, &poolHandle), ZE_RESULT_ERROR_INVALID_ARGUMENT); EXPECT_EQ(poolHandle, nullptr); } TEST_F(MetricQueryPoolTest, givenIncorrectContextWhenZetMetricQueryPoolCreateIsCalledThenReturnsFail) { zet_device_handle_t metricDevice = device->toHandle(); Mock metricGroup; zet_metric_group_properties_t metricGroupProperties = {}; metricGroupProperties.samplingType = ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED; zet_metric_query_pool_handle_t poolHandle = {}; zet_metric_query_pool_desc_t poolDesc = {}; poolDesc.stype = ZET_STRUCTURE_TYPE_METRIC_QUERY_POOL_DESC; poolDesc.count = 1; poolDesc.type = ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE; EXPECT_CALL(*mockMetricEnumeration, isInitialized()) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, getContextData(_, _)) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, load()) .Times(0); EXPECT_CALL(metricGroup, getProperties(_)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<0>(metricGroupProperties), Return(ZE_RESULT_SUCCESS))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextCreate(_, _, _)) .Times(1) .WillOnce(Return(StatusCode::Failed)); EXPECT_EQ(zetMetricQueryPoolCreate(context->toHandle(), metricDevice, metricGroup.toHandle(), &poolDesc, &poolHandle), ZE_RESULT_ERROR_INVALID_ARGUMENT); EXPECT_EQ(poolHandle, nullptr); } TEST_F(MetricQueryPoolTest, givenIncorrectContextDataWhenZetMetricQueryPoolCreateIsCalledThenReturnsFail) { zet_device_handle_t metricDevice = device->toHandle(); Mock metricGroup; zet_metric_group_properties_t metricGroupProperties = {}; metricGroupProperties.samplingType = ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED; zet_metric_query_pool_handle_t poolHandle = {}; zet_metric_query_pool_desc_t poolDesc = {}; poolDesc.stype = ZET_STRUCTURE_TYPE_METRIC_QUERY_POOL_DESC; poolDesc.count = 1; poolDesc.type = ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE; EXPECT_CALL(*mockMetricEnumeration, isInitialized()) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, getContextData(_, _)) .Times(1) .WillOnce(Return(false)); EXPECT_CALL(*mockMetricsLibrary, load()) .Times(0); EXPECT_CALL(metricGroup, getProperties(_)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<0>(metricGroupProperties), Return(ZE_RESULT_SUCCESS))); EXPECT_EQ(zetMetricQueryPoolCreate(context->toHandle(), metricDevice, metricGroup.toHandle(), &poolDesc, &poolHandle), ZE_RESULT_ERROR_INVALID_ARGUMENT); EXPECT_EQ(poolHandle, nullptr); } TEST_F(MetricQueryPoolTest, givenIncorrectGpuReportSizeWhenZetMetricQueryPoolCreateIsCalledThenReturnsFail) { zet_device_handle_t metricDevice = device->toHandle(); Mock metricGroup; zet_metric_group_properties_t metricGroupProperties = {}; metricGroupProperties.samplingType = ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED; zet_metric_query_pool_handle_t poolHandle = {}; zet_metric_query_pool_desc_t poolDesc = {}; poolDesc.stype = ZET_STRUCTURE_TYPE_METRIC_QUERY_POOL_DESC; poolDesc.count = 1; poolDesc.type = ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE; TypedValue_1_0 value = {}; value.Type = ValueType::Uint32; value.ValueUInt32 = 0; QueryHandle_1_0 queryHandle = {&value}; ContextHandle_1_0 contextHandle = {&value}; EXPECT_CALL(*mockMetricEnumeration, isInitialized()) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, getContextData(_, _)) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, load()) .Times(0); EXPECT_CALL(metricGroup, getProperties(_)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<0>(metricGroupProperties), Return(ZE_RESULT_SUCCESS))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryCreate(_, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<1>(queryHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryDelete(_)) .Times(1) .WillOnce(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockGetParameter(_, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<2>(value), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextCreate(_, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<2>(contextHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextDelete(_)) .Times(1) .WillOnce(Return(StatusCode::Success)); EXPECT_EQ(zetMetricQueryPoolCreate(context->toHandle(), metricDevice, metricGroup.toHandle(), &poolDesc, &poolHandle), ZE_RESULT_ERROR_INVALID_ARGUMENT); EXPECT_EQ(poolHandle, nullptr); } TEST_F(MetricQueryPoolTest, givenCorrectArgumentsWhenZetMetricQueryCreateIsCalledThenMetricQueryIsObtained) { zet_device_handle_t metricDevice = device->toHandle(); Mock metricGroup; zet_metric_group_properties_t metricGroupProperties = {}; metricGroupProperties.samplingType = ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED; zet_metric_query_handle_t queryHandle = {}; zet_metric_query_pool_handle_t poolHandle = {}; zet_metric_query_pool_desc_t poolDesc = {}; poolDesc.stype = ZET_STRUCTURE_TYPE_METRIC_QUERY_POOL_DESC; poolDesc.count = 1; poolDesc.type = ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE; TypedValue_1_0 value = {}; value.Type = ValueType::Uint32; value.ValueUInt32 = 64; QueryHandle_1_0 metricsLibraryQueryHandle = {&value}; ContextHandle_1_0 metricsLibraryContextHandle = {&value}; EXPECT_CALL(*mockMetricEnumeration, isInitialized()) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, getContextData(_, _)) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, load()) .Times(0); EXPECT_CALL(metricGroup, getProperties(_)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<0>(metricGroupProperties), Return(ZE_RESULT_SUCCESS))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryCreate(_, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<1>(metricsLibraryQueryHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryDelete(_)) .Times(1) .WillOnce(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockGetParameter(_, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<2>(value), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextCreate(_, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<2>(metricsLibraryContextHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextDelete(_)) .Times(1) .WillOnce(Return(StatusCode::Success)); // Create metric query pool. EXPECT_EQ(zetMetricQueryPoolCreate(context->toHandle(), metricDevice, metricGroup.toHandle(), &poolDesc, &poolHandle), ZE_RESULT_SUCCESS); EXPECT_NE(poolHandle, nullptr); // Create metric query. EXPECT_EQ(zetMetricQueryCreate(poolHandle, 0, &queryHandle), ZE_RESULT_SUCCESS); EXPECT_NE(queryHandle, nullptr); // Destroy query and its pool. EXPECT_EQ(zetMetricQueryDestroy(queryHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricQueryPoolDestroy(poolHandle), ZE_RESULT_SUCCESS); } TEST_F(MetricQueryPoolTest, givenIncorrectSlotIndexWhenZetMetricQueryCreateIsCalledThenReturnsFail) { zet_device_handle_t metricDevice = device->toHandle(); Mock metricGroup; zet_metric_group_properties_t metricGroupProperties = {}; metricGroupProperties.samplingType = ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED; zet_metric_query_handle_t queryHandle = {}; zet_metric_query_pool_handle_t poolHandle = {}; zet_metric_query_pool_desc_t poolDesc = {}; poolDesc.stype = ZET_STRUCTURE_TYPE_METRIC_QUERY_POOL_DESC; poolDesc.count = 1; poolDesc.type = ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE; TypedValue_1_0 value = {}; value.Type = ValueType::Uint32; value.ValueUInt32 = 64; QueryHandle_1_0 metricsLibraryQueryHandle = {&value}; ContextHandle_1_0 metricsLibraryContextHandle = {&value}; EXPECT_CALL(*mockMetricEnumeration, isInitialized()) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, getContextData(_, _)) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, load()) .Times(0); EXPECT_CALL(metricGroup, getProperties(_)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<0>(metricGroupProperties), Return(ZE_RESULT_SUCCESS))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryCreate(_, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<1>(metricsLibraryQueryHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryDelete(_)) .Times(1) .WillOnce(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockGetParameter(_, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<2>(value), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextCreate(_, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<2>(metricsLibraryContextHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextDelete(_)) .Times(1) .WillOnce(Return(StatusCode::Success)); // Create metric query pool. EXPECT_EQ(zetMetricQueryPoolCreate(context->toHandle(), metricDevice, metricGroup.toHandle(), &poolDesc, &poolHandle), ZE_RESULT_SUCCESS); EXPECT_NE(poolHandle, nullptr); // Create metric query. EXPECT_EQ(zetMetricQueryCreate(poolHandle, 1, &queryHandle), ZE_RESULT_ERROR_INVALID_ARGUMENT); EXPECT_EQ(queryHandle, nullptr); // Destroy metric query pool. EXPECT_EQ(zetMetricQueryPoolDestroy(poolHandle), ZE_RESULT_SUCCESS); } TEST_F(MetricQueryPoolTest, givenCorrectArgumentsWhenZetMetricQueryResetIsCalledThenReturnsSuccess) { zet_device_handle_t metricDevice = device->toHandle(); Mock metricGroup; zet_metric_group_properties_t metricGroupProperties = {}; metricGroupProperties.samplingType = ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED; zet_metric_query_handle_t queryHandle = {}; zet_metric_query_pool_handle_t poolHandle = {}; zet_metric_query_pool_desc_t poolDesc = {}; poolDesc.stype = ZET_STRUCTURE_TYPE_METRIC_QUERY_POOL_DESC; poolDesc.count = 1; poolDesc.type = ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE; TypedValue_1_0 value = {}; value.Type = ValueType::Uint32; value.ValueUInt32 = 64; QueryHandle_1_0 metricsLibraryQueryHandle = {&value}; ContextHandle_1_0 metricsLibraryContextHandle = {&value}; EXPECT_CALL(*mockMetricEnumeration, isInitialized()) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, getContextData(_, _)) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, load()) .Times(0); EXPECT_CALL(metricGroup, getProperties(_)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<0>(metricGroupProperties), Return(ZE_RESULT_SUCCESS))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryCreate(_, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<1>(metricsLibraryQueryHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryDelete(_)) .Times(1) .WillOnce(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockGetParameter(_, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<2>(value), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextCreate(_, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<2>(metricsLibraryContextHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextDelete(_)) .Times(1) .WillOnce(Return(StatusCode::Success)); // Create metric query pool. EXPECT_EQ(zetMetricQueryPoolCreate(context->toHandle(), metricDevice, metricGroup.toHandle(), &poolDesc, &poolHandle), ZE_RESULT_SUCCESS); EXPECT_NE(poolHandle, nullptr); // Create metric query. EXPECT_EQ(zetMetricQueryCreate(poolHandle, 0, &queryHandle), ZE_RESULT_SUCCESS); EXPECT_NE(queryHandle, nullptr); // Reset metric query. EXPECT_EQ(zetMetricQueryReset(queryHandle), ZE_RESULT_SUCCESS); // Destroy query and its pool. EXPECT_EQ(zetMetricQueryDestroy(queryHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricQueryPoolDestroy(poolHandle), ZE_RESULT_SUCCESS); } TEST_F(MetricQueryPoolTest, givenIncorrectArgumentsWhenZetCommandListAppendMetricQueryBeginIsCalledThenReturnsFail) { zet_device_handle_t metricDevice = device->toHandle(); Mock metricGroup; zet_metric_group_properties_t metricGroupProperties = {}; metricGroupProperties.samplingType = ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED; zet_metric_query_handle_t queryHandle = {}; zet_metric_query_pool_handle_t poolHandle = {}; zet_metric_query_pool_desc_t poolDesc = {}; poolDesc.stype = ZET_STRUCTURE_TYPE_METRIC_QUERY_POOL_DESC; poolDesc.count = 1; poolDesc.type = ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE; TypedValue_1_0 value = {}; value.Type = ValueType::Uint32; value.ValueUInt32 = 64; QueryHandle_1_0 metricsLibraryQueryHandle = {&value}; ContextHandle_1_0 metricsLibraryContextHandle = {&value}; EXPECT_CALL(*mockMetricEnumeration, isInitialized()) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, getContextData(_, _)) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, load()) .Times(0); EXPECT_CALL(metricGroup, getProperties(_)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<0>(metricGroupProperties), Return(ZE_RESULT_SUCCESS))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryCreate(_, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<1>(metricsLibraryQueryHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryDelete(_)) .Times(1) .WillOnce(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockGetParameter(_, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<2>(value), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextCreate(_, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<2>(metricsLibraryContextHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextDelete(_)) .Times(1) .WillOnce(Return(StatusCode::Success)); // Create metric query pool. EXPECT_EQ(zetMetricQueryPoolCreate(context->toHandle(), metricDevice, metricGroup.toHandle(), &poolDesc, &poolHandle), ZE_RESULT_SUCCESS); EXPECT_NE(poolHandle, nullptr); // Create metric query. EXPECT_EQ(zetMetricQueryCreate(poolHandle, 0, &queryHandle), ZE_RESULT_SUCCESS); EXPECT_NE(queryHandle, nullptr); // Reset metric query. EXPECT_EQ(zetMetricQueryReset(queryHandle), ZE_RESULT_SUCCESS); // Destroy query and its pool. EXPECT_EQ(zetMetricQueryDestroy(queryHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricQueryPoolDestroy(poolHandle), ZE_RESULT_SUCCESS); } TEST_F(MetricQueryPoolTest, givenCorrectArgumentsWhenZetCommandListAppendMetricQueryBeginIsCalledThenReturnsSuccess) { zet_device_handle_t metricDevice = device->toHandle(); ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); zet_command_list_handle_t commandListHandle = commandList->toHandle(); Mock metricGroup; zet_metric_group_properties_t metricGroupProperties = {}; metricGroupProperties.samplingType = ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED; zet_metric_query_handle_t queryHandle = {}; zet_metric_query_pool_handle_t poolHandle = {}; zet_metric_query_pool_desc_t poolDesc = {}; poolDesc.stype = ZET_STRUCTURE_TYPE_METRIC_QUERY_POOL_DESC; poolDesc.count = 1; poolDesc.type = ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE; TypedValue_1_0 value = {}; value.Type = ValueType::Uint32; value.ValueUInt32 = 64; QueryHandle_1_0 metricsLibraryQueryHandle = {&value}; ContextHandle_1_0 metricsLibraryContextHandle = {&value}; CommandBufferSize_1_0 commandBufferSize = {}; commandBufferSize.GpuMemorySize = 100; EXPECT_CALL(*mockMetricEnumeration, isInitialized()) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, getContextData(_, _)) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, load()) .Times(0); EXPECT_CALL(metricGroup, getProperties(_)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<0>(metricGroupProperties), Return(ZE_RESULT_SUCCESS))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryCreate(_, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<1>(metricsLibraryQueryHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryDelete(_)) .Times(1) .WillOnce(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockGetParameter(_, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<2>(value), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextCreate(_, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<2>(metricsLibraryContextHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockCommandBufferGetSize(_, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<1>(::testing::ByRef(commandBufferSize)), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockCommandBufferGet(_)) .Times(1) .WillOnce(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextDelete(_)) .Times(1) .WillOnce(Return(StatusCode::Success)); // Create metric query pool. EXPECT_EQ(zetMetricQueryPoolCreate(context->toHandle(), metricDevice, metricGroup.toHandle(), &poolDesc, &poolHandle), ZE_RESULT_SUCCESS); EXPECT_NE(poolHandle, nullptr); // Create metric query. EXPECT_EQ(zetMetricQueryCreate(poolHandle, 0, &queryHandle), ZE_RESULT_SUCCESS); EXPECT_NE(queryHandle, nullptr); // Write BEGIN metric query to command list. EXPECT_EQ(zetCommandListAppendMetricQueryBegin(commandListHandle, queryHandle), ZE_RESULT_SUCCESS); // Destroy query and its pool. EXPECT_EQ(zetMetricQueryDestroy(queryHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricQueryPoolDestroy(poolHandle), ZE_RESULT_SUCCESS); } TEST_F(MetricQueryPoolTest, givenCorrectArgumentsWhenZeEventPoolCreateIsCalledThenReturnsSuccess) { zet_device_handle_t metricDevice = device->toHandle(); ze_event_pool_handle_t eventPoolHandle = {}; ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = 0; eventPoolDesc.stype = ZE_STRUCTURE_TYPE_EVENT_POOL_DESC; // Create event pool. EXPECT_EQ(zeEventPoolCreate(context->toHandle(), &eventPoolDesc, 1, &metricDevice, &eventPoolHandle), ZE_RESULT_SUCCESS); EXPECT_NE(eventPoolHandle, nullptr); // Destroy event pool. EXPECT_EQ(zeEventPoolDestroy(eventPoolHandle), ZE_RESULT_SUCCESS); } TEST_F(MetricQueryPoolTest, givenIncorrectArgumentsWhenZeEventCreateIsCalledThenReturnsFail) { zet_device_handle_t metricDevice = device->toHandle(); ze_event_pool_handle_t eventPoolHandle = {}; ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = 0; eventPoolDesc.stype = ZE_STRUCTURE_TYPE_EVENT_POOL_DESC; // Create event pool. EXPECT_EQ(zeEventPoolCreate(context->toHandle(), &eventPoolDesc, 1, &metricDevice, &eventPoolHandle), ZE_RESULT_SUCCESS); EXPECT_NE(eventPoolHandle, nullptr); // Destroy event and its pool. EXPECT_EQ(zeEventPoolDestroy(eventPoolHandle), ZE_RESULT_SUCCESS); } TEST_F(MetricQueryPoolTest, givenCorrectArgumentsWhenZeEventCreateIsCalledThenReturnsSuccess) { zet_device_handle_t metricDevice = device->toHandle(); ze_event_pool_handle_t eventPoolHandle = {}; ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = 0; eventPoolDesc.stype = ZE_STRUCTURE_TYPE_EVENT_POOL_DESC; ze_event_handle_t eventHandle = {}; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.stype = ZE_STRUCTURE_TYPE_EVENT_DESC; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE; // Create event pool. EXPECT_EQ(zeEventPoolCreate(context->toHandle(), &eventPoolDesc, 1, &metricDevice, &eventPoolHandle), ZE_RESULT_SUCCESS); EXPECT_NE(eventPoolHandle, nullptr); // Create event. EXPECT_EQ(zeEventCreate(eventPoolHandle, &eventDesc, &eventHandle), ZE_RESULT_SUCCESS); EXPECT_NE(eventHandle, nullptr); // Destroy event and its pool. EXPECT_EQ(zeEventDestroy(eventHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zeEventPoolDestroy(eventPoolHandle), ZE_RESULT_SUCCESS); } TEST_F(MetricQueryPoolTest, givenIncorrectArgumentsWhenZetCommandListAppendMetricQueryEndIsCalledThenReturnsFail) { zet_device_handle_t metricDevice = device->toHandle(); ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); zet_command_list_handle_t commandListHandle = commandList->toHandle(); ze_event_pool_handle_t eventPoolHandle = {}; ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = 0; eventPoolDesc.stype = ZE_STRUCTURE_TYPE_EVENT_POOL_DESC; ze_event_handle_t eventHandle = {}; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.stype = ZE_STRUCTURE_TYPE_EVENT_DESC; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE; Mock metricGroup; zet_metric_group_properties_t metricGroupProperties = {}; metricGroupProperties.samplingType = ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED; zet_metric_query_handle_t queryHandle = {}; zet_metric_query_pool_handle_t poolHandle = {}; zet_metric_query_pool_desc_t poolDesc = {}; poolDesc.stype = ZET_STRUCTURE_TYPE_METRIC_QUERY_POOL_DESC; poolDesc.count = 1; poolDesc.type = ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE; TypedValue_1_0 value = {}; value.Type = ValueType::Uint32; value.ValueUInt32 = 64; QueryHandle_1_0 metricsLibraryQueryHandle = {&value}; ContextHandle_1_0 metricsLibraryContextHandle = {&value}; CommandBufferSize_1_0 commandBufferSize = {}; commandBufferSize.GpuMemorySize = 100; EXPECT_CALL(*mockMetricEnumeration, isInitialized()) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, getContextData(_, _)) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, load()) .Times(0); EXPECT_CALL(metricGroup, getProperties(_)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<0>(metricGroupProperties), Return(ZE_RESULT_SUCCESS))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryCreate(_, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<1>(metricsLibraryQueryHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryDelete(_)) .Times(1) .WillOnce(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockGetParameter(_, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<2>(value), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextCreate(_, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<2>(metricsLibraryContextHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockCommandBufferGetSize(_, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<1>(::testing::ByRef(commandBufferSize)), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockCommandBufferGet(_)) .Times(1) .WillOnce(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextDelete(_)) .Times(1) .WillOnce(Return(StatusCode::Success)); // Create metric query pool. EXPECT_EQ(zetMetricQueryPoolCreate(context->toHandle(), metricDevice, metricGroup.toHandle(), &poolDesc, &poolHandle), ZE_RESULT_SUCCESS); EXPECT_NE(poolHandle, nullptr); // Create metric query. EXPECT_EQ(zetMetricQueryCreate(poolHandle, 0, &queryHandle), ZE_RESULT_SUCCESS); EXPECT_NE(queryHandle, nullptr); // Create event pool. EXPECT_EQ(zeEventPoolCreate(context->toHandle(), &eventPoolDesc, 1, &metricDevice, &eventPoolHandle), ZE_RESULT_SUCCESS); // Create event. EXPECT_EQ(zeEventCreate(eventPoolHandle, &eventDesc, &eventHandle), ZE_RESULT_SUCCESS); // Write BEGIN metric query to command list. EXPECT_EQ(zetCommandListAppendMetricQueryBegin(commandListHandle, queryHandle), ZE_RESULT_SUCCESS); // Destroy event and its pool. EXPECT_EQ(zeEventDestroy(eventHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zeEventPoolDestroy(eventPoolHandle), ZE_RESULT_SUCCESS); // Destroy query and its pool. EXPECT_EQ(zetMetricQueryDestroy(queryHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricQueryPoolDestroy(poolHandle), ZE_RESULT_SUCCESS); } TEST_F(MetricQueryPoolTest, givenCorrectArgumentsWhenZetCommandListAppendMetricQueryEndIsCalledThenReturnsSuccess) { zet_device_handle_t metricDevice = device->toHandle(); ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); zet_command_list_handle_t commandListHandle = commandList->toHandle(); ze_event_pool_handle_t eventPoolHandle = {}; ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = 0; eventPoolDesc.stype = ZE_STRUCTURE_TYPE_EVENT_POOL_DESC; ze_event_handle_t eventHandle = {}; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.stype = ZE_STRUCTURE_TYPE_EVENT_DESC; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE; Mock metricGroup; zet_metric_group_properties_t metricGroupProperties = {}; metricGroupProperties.samplingType = ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED; zet_metric_query_handle_t queryHandle = {}; zet_metric_query_pool_handle_t poolHandle = {}; zet_metric_query_pool_desc_t poolDesc = {}; poolDesc.stype = ZET_STRUCTURE_TYPE_METRIC_QUERY_POOL_DESC; poolDesc.count = 1; poolDesc.type = ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE; TypedValue_1_0 value = {}; value.Type = ValueType::Uint32; value.ValueUInt32 = 64; QueryHandle_1_0 metricsLibraryQueryHandle = {&value}; ContextHandle_1_0 metricsLibraryContextHandle = {&value}; CommandBufferSize_1_0 commandBufferSize = {}; commandBufferSize.GpuMemorySize = 100; EXPECT_CALL(*mockMetricEnumeration, isInitialized()) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, getContextData(_, _)) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, load()) .Times(0); EXPECT_CALL(metricGroup, getProperties(_)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<0>(metricGroupProperties), Return(ZE_RESULT_SUCCESS))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryCreate(_, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<1>(metricsLibraryQueryHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryDelete(_)) .Times(1) .WillOnce(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockGetParameter(_, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<2>(value), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextCreate(_, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<2>(metricsLibraryContextHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockCommandBufferGetSize(_, _)) .Times(2) .WillRepeatedly(DoAll(::testing::SetArgPointee<1>(::testing::ByRef(commandBufferSize)), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockCommandBufferGet(_)) .Times(2) .WillRepeatedly(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextDelete(_)) .Times(1) .WillOnce(Return(StatusCode::Success)); // Create metric query pool. EXPECT_EQ(zetMetricQueryPoolCreate(context->toHandle(), metricDevice, metricGroup.toHandle(), &poolDesc, &poolHandle), ZE_RESULT_SUCCESS); EXPECT_NE(poolHandle, nullptr); // Create metric query. EXPECT_EQ(zetMetricQueryCreate(poolHandle, 0, &queryHandle), ZE_RESULT_SUCCESS); EXPECT_NE(queryHandle, nullptr); // Create event pool. EXPECT_EQ(zeEventPoolCreate(context->toHandle(), &eventPoolDesc, 1, &metricDevice, &eventPoolHandle), ZE_RESULT_SUCCESS); EXPECT_NE(eventPoolHandle, nullptr); // Create event. EXPECT_EQ(zeEventCreate(eventPoolHandle, &eventDesc, &eventHandle), ZE_RESULT_SUCCESS); EXPECT_NE(eventHandle, nullptr); // Write BEGIN metric query to command list. EXPECT_EQ(zetCommandListAppendMetricQueryBegin(commandListHandle, queryHandle), ZE_RESULT_SUCCESS); // Write END metric query to command list, use an event to determine if the data is available. EXPECT_EQ(zetCommandListAppendMetricQueryEnd(commandListHandle, queryHandle, eventHandle, 0, nullptr), ZE_RESULT_SUCCESS); // Destroy event and its pool. EXPECT_EQ(zeEventDestroy(eventHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zeEventPoolDestroy(eventPoolHandle), ZE_RESULT_SUCCESS); // Destroy query and its pool. EXPECT_EQ(zetMetricQueryDestroy(queryHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricQueryPoolDestroy(poolHandle), ZE_RESULT_SUCCESS); } TEST_F(MetricQueryPoolTest, givenIncorrectArgumentsWhenZetMetricQueryGetDataIsCalledThenReturnsFail) { zet_device_handle_t metricDevice = device->toHandle(); ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); zet_command_list_handle_t commandListHandle = commandList->toHandle(); ze_event_pool_handle_t eventPoolHandle = {}; ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = 0; eventPoolDesc.stype = ZE_STRUCTURE_TYPE_EVENT_POOL_DESC; ze_event_handle_t eventHandle = {}; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.stype = ZE_STRUCTURE_TYPE_EVENT_DESC; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE; Mock metricGroup; zet_metric_group_properties_t metricGroupProperties = {}; metricGroupProperties.samplingType = ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED; zet_metric_query_handle_t queryHandle = {}; zet_metric_query_pool_handle_t poolHandle = {}; zet_metric_query_pool_desc_t poolDesc = {}; poolDesc.stype = ZET_STRUCTURE_TYPE_METRIC_QUERY_POOL_DESC; poolDesc.count = 1; poolDesc.type = ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE; TypedValue_1_0 value = {}; value.Type = ValueType::Uint32; value.ValueUInt32 = 64; QueryHandle_1_0 metricsLibraryQueryHandle = {&value}; ContextHandle_1_0 metricsLibraryContextHandle = {&value}; CommandBufferSize_1_0 commandBufferSize = {}; commandBufferSize.GpuMemorySize = 100; EXPECT_CALL(*mockMetricEnumeration, isInitialized()) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, getContextData(_, _)) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, load()) .Times(0); EXPECT_CALL(metricGroup, getProperties(_)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<0>(metricGroupProperties), Return(ZE_RESULT_SUCCESS))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryCreate(_, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<1>(metricsLibraryQueryHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryDelete(_)) .Times(1) .WillOnce(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockGetParameter(_, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<2>(value), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextCreate(_, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<2>(metricsLibraryContextHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockCommandBufferGetSize(_, _)) .Times(2) .WillRepeatedly(DoAll(::testing::SetArgPointee<1>(::testing::ByRef(commandBufferSize)), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockCommandBufferGet(_)) .Times(2) .WillRepeatedly(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextDelete(_)) .Times(1) .WillOnce(Return(StatusCode::Success)); // Create metric query pool. EXPECT_EQ(zetMetricQueryPoolCreate(context->toHandle(), metricDevice, metricGroup.toHandle(), &poolDesc, &poolHandle), ZE_RESULT_SUCCESS); EXPECT_NE(poolHandle, nullptr); // Create metric query. EXPECT_EQ(zetMetricQueryCreate(poolHandle, 0, &queryHandle), ZE_RESULT_SUCCESS); EXPECT_NE(queryHandle, nullptr); // Create event pool. EXPECT_EQ(zeEventPoolCreate(context->toHandle(), &eventPoolDesc, 1, &metricDevice, &eventPoolHandle), ZE_RESULT_SUCCESS); EXPECT_NE(eventPoolHandle, nullptr); // Create event. EXPECT_EQ(zeEventCreate(eventPoolHandle, &eventDesc, &eventHandle), ZE_RESULT_SUCCESS); EXPECT_NE(eventHandle, nullptr); // Write BEGIN metric query to command list. EXPECT_EQ(zetCommandListAppendMetricQueryBegin(commandListHandle, queryHandle), ZE_RESULT_SUCCESS); // Write END metric query to command list, use an event to determine if the data is available. EXPECT_EQ(zetCommandListAppendMetricQueryEnd(commandListHandle, queryHandle, eventHandle, 0, nullptr), ZE_RESULT_SUCCESS); // Destroy event and its pool. EXPECT_EQ(zeEventDestroy(eventHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zeEventPoolDestroy(eventPoolHandle), ZE_RESULT_SUCCESS); // Destroy query and its pool. EXPECT_EQ(zetMetricQueryDestroy(queryHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricQueryPoolDestroy(poolHandle), ZE_RESULT_SUCCESS); } TEST_F(MetricQueryPoolTest, givenCorrectArgumentsWhenZetMetricQueryGetDataIsCalledThenReturnsSuccess) { zet_device_handle_t metricDevice = device->toHandle(); ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); zet_command_list_handle_t commandListHandle = commandList->toHandle(); ze_event_pool_handle_t eventPoolHandle = {}; ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = 0; eventPoolDesc.stype = ZE_STRUCTURE_TYPE_EVENT_POOL_DESC; ze_event_handle_t eventHandle = {}; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.stype = ZE_STRUCTURE_TYPE_EVENT_DESC; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE; Mock metricGroup; zet_metric_group_properties_t metricGroupProperties = {}; metricGroupProperties.samplingType = ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED; zet_metric_query_handle_t queryHandle = {}; zet_metric_query_pool_handle_t poolHandle = {}; zet_metric_query_pool_desc_t poolDesc = {}; poolDesc.stype = ZET_STRUCTURE_TYPE_METRIC_QUERY_POOL_DESC; poolDesc.count = 1; poolDesc.type = ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE; TypedValue_1_0 value = {}; value.Type = ValueType::Uint32; value.ValueUInt32 = 64; size_t reportSize = 256; QueryHandle_1_0 metricsLibraryQueryHandle = {&value}; ContextHandle_1_0 metricsLibraryContextHandle = {&value}; CommandBufferSize_1_0 commandBufferSize = {}; commandBufferSize.GpuMemorySize = 100; EXPECT_CALL(*mockMetricEnumeration, isInitialized()) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, getContextData(_, _)) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, load()) .Times(0); EXPECT_CALL(metricGroup, getProperties(_)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<0>(metricGroupProperties), Return(ZE_RESULT_SUCCESS))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryCreate(_, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<1>(metricsLibraryQueryHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryDelete(_)) .Times(1) .WillOnce(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockGetParameter(_, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<2>(value), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextCreate(_, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<2>(metricsLibraryContextHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockCommandBufferGetSize(_, _)) .Times(2) .WillRepeatedly(DoAll(::testing::SetArgPointee<1>(::testing::ByRef(commandBufferSize)), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockCommandBufferGet(_)) .Times(2) .WillRepeatedly(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary, getMetricQueryReportSize(_)) .Times(1) .WillOnce(DoAll(::testing::SetArgReferee<0>(reportSize), Return(true))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockGetData(_)) .Times(1) .WillOnce(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextDelete(_)) .Times(1) .WillOnce(Return(StatusCode::Success)); // Create metric query pool. EXPECT_EQ(zetMetricQueryPoolCreate(context->toHandle(), metricDevice, metricGroup.toHandle(), &poolDesc, &poolHandle), ZE_RESULT_SUCCESS); EXPECT_NE(poolHandle, nullptr); // Create metric query. EXPECT_EQ(zetMetricQueryCreate(poolHandle, 0, &queryHandle), ZE_RESULT_SUCCESS); EXPECT_NE(queryHandle, nullptr); // Create event pool. EXPECT_EQ(zeEventPoolCreate(context->toHandle(), &eventPoolDesc, 1, &metricDevice, &eventPoolHandle), ZE_RESULT_SUCCESS); EXPECT_NE(eventPoolHandle, nullptr); // Create event. EXPECT_EQ(zeEventCreate(eventPoolHandle, &eventDesc, &eventHandle), ZE_RESULT_SUCCESS); EXPECT_NE(eventHandle, nullptr); // Write BEGIN metric query to command list. EXPECT_EQ(zetCommandListAppendMetricQueryBegin(commandListHandle, queryHandle), ZE_RESULT_SUCCESS); // Write END metric query to command list, use an event to determine if the data is available. EXPECT_EQ(zetCommandListAppendMetricQueryEnd(commandListHandle, queryHandle, eventHandle, 0, nullptr), ZE_RESULT_SUCCESS); // Get desired raw data size. size_t rawSize = 0; EXPECT_EQ(zetMetricQueryGetData(queryHandle, &rawSize, nullptr), ZE_RESULT_SUCCESS); // Get data. std::vector rawData; rawData.resize(rawSize); EXPECT_EQ(zetMetricQueryGetData(queryHandle, &rawSize, rawData.data()), ZE_RESULT_SUCCESS); // Destroy event and its pool. EXPECT_EQ(zeEventDestroy(eventHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zeEventPoolDestroy(eventPoolHandle), ZE_RESULT_SUCCESS); // Destroy query and its pool. EXPECT_EQ(zetMetricQueryDestroy(queryHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricQueryPoolDestroy(poolHandle), ZE_RESULT_SUCCESS); } MATCHER_P(reportDataAreEqual, reportData, "") { return (arg->Query.Slot == reportData->Query.Slot) && (arg->Query.SlotsCount == reportData->Query.SlotsCount) && (arg->Query.Handle.data == reportData->Query.Handle.data) && (arg->Query.Data == reportData->Query.Data) && (arg->Query.DataSize == reportData->Query.DataSize) && (arg->Type == reportData->Type); } TEST_F(MetricQueryPoolTest, givenCorrectArgumentsWhenZetMetricQueryGetDataIsCalledThenReturnsSuccessWithProperFilledStructure) { zet_device_handle_t metricDevice = device->toHandle(); ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); zet_command_list_handle_t commandListHandle = commandList->toHandle(); ze_event_pool_handle_t eventPoolHandle = {}; ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = 0; eventPoolDesc.stype = ZE_STRUCTURE_TYPE_EVENT_POOL_DESC; ze_event_handle_t eventHandle = {}; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.stype = ZE_STRUCTURE_TYPE_EVENT_DESC; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE; Mock metricGroup; zet_metric_group_properties_t metricGroupProperties = {}; metricGroupProperties.samplingType = ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED; uint32_t queriesCount = 10; zet_metric_query_handle_t queryHandle = {}; zet_metric_query_pool_handle_t poolHandle = {}; zet_metric_query_pool_desc_t poolDesc = {}; poolDesc.stype = ZET_STRUCTURE_TYPE_METRIC_QUERY_POOL_DESC; poolDesc.count = queriesCount; poolDesc.type = ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE; TypedValue_1_0 value = {}; value.Type = ValueType::Uint32; value.ValueUInt32 = 64; size_t reportSize = 256; QueryHandle_1_0 metricsLibraryQueryHandle = {&value}; ContextHandle_1_0 metricsLibraryContextHandle = {&value}; CommandBufferSize_1_0 commandBufferSize = {}; commandBufferSize.GpuMemorySize = 100; GetReportData_1_0 reportData = {}; reportData.Type = ObjectType::QueryHwCounters; reportData.Query.Handle = metricsLibraryQueryHandle; reportData.Query.Slot = queriesCount - 1; reportData.Query.SlotsCount = 1; EXPECT_CALL(*mockMetricEnumeration, isInitialized()) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, getContextData(_, _)) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, load()) .Times(0); EXPECT_CALL(metricGroup, getProperties(_)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<0>(metricGroupProperties), Return(ZE_RESULT_SUCCESS))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryCreate(_, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<1>(metricsLibraryQueryHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryDelete(_)) .Times(1) .WillOnce(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockGetParameter(_, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<2>(value), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextCreate(_, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<2>(metricsLibraryContextHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockCommandBufferGetSize(_, _)) .Times(2) .WillRepeatedly(DoAll(::testing::SetArgPointee<1>(::testing::ByRef(commandBufferSize)), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockCommandBufferGet(_)) .Times(2) .WillRepeatedly(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary, getMetricQueryReportSize(_)) .Times(1) .WillOnce(DoAll(::testing::SetArgReferee<0>(reportSize), Return(true))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockGetData(reportDataAreEqual(&reportData))) .Times(1) .WillOnce(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextDelete(_)) .Times(1) .WillOnce(Return(StatusCode::Success)); // Create metric query pool. EXPECT_EQ(zetMetricQueryPoolCreate(context->toHandle(), metricDevice, metricGroup.toHandle(), &poolDesc, &poolHandle), ZE_RESULT_SUCCESS); EXPECT_NE(poolHandle, nullptr); // Create metric query. EXPECT_EQ(zetMetricQueryCreate(poolHandle, queriesCount - 1, &queryHandle), ZE_RESULT_SUCCESS); EXPECT_NE(queryHandle, nullptr); // Create event pool. EXPECT_EQ(zeEventPoolCreate(context->toHandle(), &eventPoolDesc, 1, &metricDevice, &eventPoolHandle), ZE_RESULT_SUCCESS); EXPECT_NE(eventPoolHandle, nullptr); // Create event. EXPECT_EQ(zeEventCreate(eventPoolHandle, &eventDesc, &eventHandle), ZE_RESULT_SUCCESS); EXPECT_NE(eventHandle, nullptr); // Write BEGIN metric query to command list. EXPECT_EQ(zetCommandListAppendMetricQueryBegin(commandListHandle, queryHandle), ZE_RESULT_SUCCESS); // Write END metric query to command list, use an event to determine if the data is available. EXPECT_EQ(zetCommandListAppendMetricQueryEnd(commandListHandle, queryHandle, eventHandle, 0, nullptr), ZE_RESULT_SUCCESS); // Get desired raw data size. size_t rawSize = 0; EXPECT_EQ(zetMetricQueryGetData(queryHandle, &rawSize, nullptr), ZE_RESULT_SUCCESS); // Get data. std::vector rawData; rawData.resize(rawSize); reportData.Query.Data = rawData.data(); reportData.Query.DataSize = static_cast(rawSize); EXPECT_EQ(zetMetricQueryGetData(queryHandle, &rawSize, rawData.data()), ZE_RESULT_SUCCESS); // Destroy event and its pool. EXPECT_EQ(zeEventDestroy(eventHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zeEventPoolDestroy(eventPoolHandle), ZE_RESULT_SUCCESS); // Destroy query and its pool. EXPECT_EQ(zetMetricQueryDestroy(queryHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricQueryPoolDestroy(poolHandle), ZE_RESULT_SUCCESS); } TEST_F(MetricQueryPoolTest, givenMetricQueryIsActiveWhenMetricQueryPoolDestroyIsCalledThenMetricLibraryIsNotReleased) { zet_device_handle_t metricDevice = device->toHandle(); Mock metricGroup; zet_metric_group_properties_t metricGroupProperties = {}; metricGroupProperties.samplingType = ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED; zet_metric_query_handle_t queryHandle[2] = {}; zet_metric_query_pool_handle_t poolHandle[2] = {}; zet_metric_query_pool_desc_t poolDesc = {}; poolDesc.stype = ZET_STRUCTURE_TYPE_METRIC_QUERY_POOL_DESC; poolDesc.count = 1; poolDesc.type = ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE; TypedValue_1_0 value = {}; value.Type = ValueType::Uint32; value.ValueUInt32 = 64; QueryHandle_1_0 metricsLibraryQueryHandle = {&value}; ContextHandle_1_0 metricsLibraryContextHandle = {&value}; CommandBufferSize_1_0 commandBufferSize = {}; commandBufferSize.GpuMemorySize = 100; EXPECT_CALL(*mockMetricEnumeration, isInitialized()) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, getContextData(_, _)) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, load()) .Times(0); EXPECT_CALL(metricGroup, getProperties(_)) .Times(2) .WillRepeatedly(DoAll(::testing::SetArgPointee<0>(metricGroupProperties), Return(ZE_RESULT_SUCCESS))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryCreate(_, _)) .Times(2) .WillRepeatedly(DoAll(::testing::SetArgPointee<1>(metricsLibraryQueryHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryDelete(_)) .Times(2) .WillRepeatedly(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockGetParameter(_, _, _)) .Times(2) .WillRepeatedly(DoAll(::testing::SetArgPointee<2>(value), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextCreate(_, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<2>(metricsLibraryContextHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextDelete(_)) .Times(1) .WillOnce(Return(StatusCode::Success)); EXPECT_EQ(zetMetricQueryPoolCreate(context->toHandle(), metricDevice, metricGroup.toHandle(), &poolDesc, &poolHandle[0]), ZE_RESULT_SUCCESS); EXPECT_NE(poolHandle[0], nullptr); EXPECT_EQ(zetMetricQueryPoolCreate(context->toHandle(), metricDevice, metricGroup.toHandle(), &poolDesc, &poolHandle[1]), ZE_RESULT_SUCCESS); EXPECT_NE(poolHandle[1], nullptr); EXPECT_EQ(zetMetricQueryCreate(poolHandle[0], 0, &queryHandle[0]), ZE_RESULT_SUCCESS); EXPECT_NE(queryHandle[0], nullptr); EXPECT_EQ(zetMetricQueryCreate(poolHandle[1], 0, &queryHandle[1]), ZE_RESULT_SUCCESS); EXPECT_NE(queryHandle[1], nullptr); EXPECT_EQ(zetMetricQueryDestroy(queryHandle[0]), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricQueryPoolDestroy(poolHandle[0]), ZE_RESULT_SUCCESS); EXPECT_EQ(mockMetricsLibrary->getInitializationState(), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricQueryDestroy(queryHandle[1]), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricQueryPoolDestroy(poolHandle[1]), ZE_RESULT_SUCCESS); EXPECT_NE(mockMetricsLibrary->getInitializationState(), ZE_RESULT_SUCCESS); } TEST_F(MetricQueryPoolTest, givenMetricQueryIsActiveWhenMetricGroupDeactivateIsCalledThenMetricLibraryIsNotReleased) { zet_device_handle_t metricDevice = device->toHandle(); metricsDeviceParams.ConcurrentGroupsCount = 1; Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; metricsSetParams.MetricsCount = 1; Mock metric; TMetricParams_1_0 metricParams = {}; metricParams.SymbolName = "Metric symbol name"; metricParams.ShortName = "Metric short name"; metricParams.LongName = "Metric long name"; metricParams.ResultType = MetricsDiscovery::TMetricResultType::RESULT_UINT64; metricParams.MetricType = MetricsDiscovery::TMetricType::METRIC_TYPE_RATIO; zet_metric_group_handle_t metricGroupHandle = {}; zet_metric_group_properties_t metricGroupProperties = {}; metricGroupProperties.samplingType = ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED; zet_metric_query_handle_t queryHandle = {}; zet_metric_query_pool_handle_t poolHandle = {}; zet_metric_query_pool_desc_t poolDesc = {}; poolDesc.stype = ZET_STRUCTURE_TYPE_METRIC_QUERY_POOL_DESC; poolDesc.count = 1; poolDesc.type = ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE; TypedValue_1_0 value = {}; value.Type = ValueType::Uint32; value.ValueUInt32 = 64; QueryHandle_1_0 metricsLibraryQueryHandle = {&value}; ContextHandle_1_0 metricsLibraryContextHandle = {&value}; ConfigurationHandle_1_0 metricsLibraryConfigurationHandle = {&value}; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(1) .WillRepeatedly(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(1) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, GetMetric(_)) .Times(1) .WillRepeatedly(Return(&metric)); EXPECT_CALL(metric, GetParams()) .Times(1) .WillRepeatedly(Return(&metricParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(*mockMetricEnumeration, isInitialized()) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, getContextData(_, _)) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, load()) .Times(0); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryCreate(_, _)) .Times(1) .WillRepeatedly(DoAll(::testing::SetArgPointee<1>(metricsLibraryQueryHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryDelete(_)) .Times(1) .WillRepeatedly(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockGetParameter(_, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<2>(value), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextCreate(_, _, _)) .Times(1) .WillRepeatedly(DoAll(::testing::SetArgPointee<2>(metricsLibraryContextHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextDelete(_)) .Times(1) .WillRepeatedly(Return(StatusCode::Success)); uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(metricDevice, &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_EQ(zetMetricGroupGet(metricDevice, &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), metricDevice, 1, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricQueryPoolCreate(context->toHandle(), metricDevice, metricGroupHandle, &poolDesc, &poolHandle), ZE_RESULT_SUCCESS); EXPECT_NE(poolHandle, nullptr); EXPECT_EQ(zetMetricQueryCreate(poolHandle, 0, &queryHandle), ZE_RESULT_SUCCESS); EXPECT_NE(queryHandle, nullptr); EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), metricDevice, 0, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(mockMetricsLibrary->getInitializationState(), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricQueryDestroy(queryHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricQueryPoolDestroy(poolHandle), ZE_RESULT_SUCCESS); EXPECT_NE(mockMetricsLibrary->getInitializationState(), ZE_RESULT_SUCCESS); } } // namespace ult } // namespace L0 test_metric_oa_query_pool_2.cpp000066400000000000000000000544711422164147700353310ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/metrics/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/device_factory.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/driver/driver_imp.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_device.h" #include "level_zero/core/test/unit_tests/mocks/mock_driver_handle.h" #include "level_zero/tools/source/metrics/metric_oa_source.h" #include "level_zero/tools/test/unit_tests/sources/metrics/mock_metric_oa.h" #include "gmock/gmock.h" #include "gtest/gtest.h" using ::testing::_; using ::testing::Return; namespace L0 { namespace ult { class MetricQueryPoolTest : public MetricContextFixture, public ::testing::Test { public: void SetUp() override { ze_result_t returnValue = ZE_RESULT_SUCCESS; MetricContextFixture::SetUp(); auto executionEnvironment = new NEO::ExecutionEnvironment(); driverHandle.reset(DriverHandle::create(NEO::DeviceFactory::createDevices(*executionEnvironment), L0EnvVariables{}, &returnValue)); } void TearDown() override { MetricContextFixture::TearDown(); driverHandle.reset(); GlobalDriver = nullptr; } std::unique_ptr driverHandle; }; TEST_F(MetricQueryPoolTest, givenCorrectArgumentsWhenStreamerIsOpenThenQueryPoolIsNotAvailable) { zet_device_handle_t metricDevice = device->toHandle(); ze_event_handle_t eventHandle = {}; zet_metric_streamer_handle_t streamerHandle = {}; zet_metric_streamer_desc_t streamerDesc = {}; streamerDesc.stype = ZET_STRUCTURE_TYPE_METRIC_STREAMER_DESC; streamerDesc.notifyEveryNReports = 32768; streamerDesc.samplingPeriod = 1000; Mock metricGroup; zet_metric_group_handle_t metricGroupHandle = metricGroup.toHandle(); zet_metric_group_properties_t metricGroupProperties = {}; metricsDeviceParams.ConcurrentGroupsCount = 1; Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_IOSTREAM; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; metricsSetParams.RawReportSize = 256; zet_metric_query_pool_handle_t poolHandle = {}; zet_metric_query_pool_desc_t poolDesc = {}; poolDesc.stype = ZET_STRUCTURE_TYPE_METRIC_QUERY_POOL_DESC; poolDesc.count = 1; poolDesc.type = ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE; TypedValue_1_0 value = {}; value.Type = ValueType::Uint32; value.ValueUInt32 = 64; QueryHandle_1_0 queryHandle = {&value}; ContextHandle_1_0 contextHandle = {&value}; EXPECT_CALL(*mockMetricsLibrary, load()) .Times(0); openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(1) .WillOnce(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(1) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsConcurrentGroup, OpenIoStream(_, _, _, _)) .Times(1) .WillOnce(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsConcurrentGroup, CloseIoStream()) .Times(1) .WillOnce(Return(TCompletionCode::CC_OK)); uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(metricDevice, &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_EQ(zetMetricGroupGet(metricDevice, &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); EXPECT_EQ(zetMetricGroupGetProperties(metricGroupHandle, &metricGroupProperties), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupProperties.domain, 0u); EXPECT_EQ(metricGroupProperties.samplingType, ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_TIME_BASED); EXPECT_EQ(metricGroupProperties.metricCount, metricsSetParams.MetricsCount); EXPECT_EQ(strcmp(metricGroupProperties.description, metricsSetParams.ShortName), 0); EXPECT_EQ(strcmp(metricGroupProperties.name, metricsSetParams.SymbolName), 0); EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), metricDevice, 1, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricStreamerOpen(context->toHandle(), metricDevice, metricGroupHandle, &streamerDesc, eventHandle, &streamerHandle), ZE_RESULT_SUCCESS); EXPECT_NE(streamerHandle, nullptr); EXPECT_EQ(zetMetricQueryPoolCreate(context->toHandle(), metricDevice, metricGroupHandle, &poolDesc, &poolHandle), ZE_RESULT_ERROR_NOT_AVAILABLE); EXPECT_EQ(zetMetricStreamerClose(streamerHandle), ZE_RESULT_SUCCESS); EXPECT_NE(streamerHandle, nullptr); } TEST_F(MetricQueryPoolTest, givenExecutionQueryTypeWhenZetMetricQueryPoolCreateIsCalledThenQueryPoolIsObtained) { zet_device_handle_t metricDevice = device->toHandle(); zet_metric_query_pool_handle_t poolHandle = {}; zet_metric_query_pool_desc_t poolDesc = {}; poolDesc.stype = ZET_STRUCTURE_TYPE_METRIC_QUERY_POOL_DESC; poolDesc.count = 1; poolDesc.type = ZET_METRIC_QUERY_POOL_TYPE_EXECUTION; EXPECT_CALL(*mockMetricsLibrary, load()) .Times(0); EXPECT_EQ(zetMetricQueryPoolCreate(context->toHandle(), metricDevice, nullptr, &poolDesc, &poolHandle), ZE_RESULT_SUCCESS); EXPECT_NE(poolHandle, nullptr); EXPECT_EQ(zetMetricQueryPoolDestroy(poolHandle), ZE_RESULT_SUCCESS); } TEST_F(MetricQueryPoolTest, givenExecutionQueryTypeWhenAppendMetricQueryBeginAndEndIsCalledThenReturnSuccess) { zet_device_handle_t metricDevice = device->toHandle(); zet_metric_query_pool_handle_t poolHandle = {}; zet_metric_query_handle_t queryHandle = {}; zet_metric_query_pool_desc_t poolDesc = {}; poolDesc.stype = ZET_STRUCTURE_TYPE_METRIC_QUERY_POOL_DESC; poolDesc.count = 1; poolDesc.type = ZET_METRIC_QUERY_POOL_TYPE_EXECUTION; ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); zet_command_list_handle_t commandListHandle = commandList->toHandle(); TypedValue_1_0 value = {}; value.Type = ValueType::Uint32; value.ValueUInt32 = 64; QueryHandle_1_0 metricsLibraryQueryHandle = {&value}; ContextHandle_1_0 metricsLibraryContextHandle = {&value}; CommandBufferSize_1_0 commandBufferSize = {}; commandBufferSize.GpuMemorySize = 100; EXPECT_CALL(*mockMetricEnumeration, isInitialized()) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, getContextData(_, _)) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, load()) .Times(0); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextCreate(_, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<2>(metricsLibraryContextHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextDelete(_)) .Times(1) .WillOnce(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockCommandBufferGetSize(_, _)) .Times(3) .WillRepeatedly(DoAll(::testing::SetArgPointee<1>(::testing::ByRef(commandBufferSize)), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockCommandBufferGet(_)) .Times(3) .WillRepeatedly(Return(StatusCode::Success)); EXPECT_EQ(zetMetricQueryPoolCreate(context->toHandle(), metricDevice, nullptr, &poolDesc, &poolHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricQueryCreate(poolHandle, 0, &queryHandle), ZE_RESULT_SUCCESS); EXPECT_NE(queryHandle, nullptr); EXPECT_EQ(zetCommandListAppendMetricQueryBegin(commandListHandle, queryHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetCommandListAppendMetricQueryEnd(commandListHandle, queryHandle, nullptr, 0, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(zetCommandListAppendMetricMemoryBarrier(commandListHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricQueryDestroy(queryHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricQueryPoolDestroy(poolHandle), ZE_RESULT_SUCCESS); } TEST_F(MetricQueryPoolTest, givenExecutionQueryTypeAndCompletionEventWhenAppendMetricQueryBeginAndEndIsCalledThenReturnSuccess) { zet_device_handle_t metricDevice = device->toHandle(); zet_metric_query_pool_handle_t poolHandle = {}; zet_metric_query_handle_t queryHandle = {}; zet_metric_query_pool_desc_t poolDesc = {}; poolDesc.stype = ZET_STRUCTURE_TYPE_METRIC_QUERY_POOL_DESC; poolDesc.count = 1; poolDesc.type = ZET_METRIC_QUERY_POOL_TYPE_EXECUTION; ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); zet_command_list_handle_t commandListHandle = commandList->toHandle(); TypedValue_1_0 value = {}; value.Type = ValueType::Uint32; value.ValueUInt32 = 64; QueryHandle_1_0 metricsLibraryQueryHandle = {&value}; ContextHandle_1_0 metricsLibraryContextHandle = {&value}; CommandBufferSize_1_0 commandBufferSize = {}; commandBufferSize.GpuMemorySize = 100; EXPECT_CALL(*mockMetricEnumeration, isInitialized()) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, getContextData(_, _)) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, load()) .Times(0); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextCreate(_, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<2>(metricsLibraryContextHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextDelete(_)) .Times(1) .WillOnce(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockCommandBufferGetSize(_, _)) .Times(2) .WillRepeatedly(DoAll(::testing::SetArgPointee<1>(::testing::ByRef(commandBufferSize)), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockCommandBufferGet(_)) .Times(2) .WillRepeatedly(Return(StatusCode::Success)); ze_event_pool_handle_t eventPoolHandle = {}; ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = 0; eventPoolDesc.stype = ZE_STRUCTURE_TYPE_EVENT_POOL_DESC; ze_event_handle_t eventHandle = {}; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.stype = ZE_STRUCTURE_TYPE_EVENT_DESC; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE; EXPECT_EQ(zeEventPoolCreate(context->toHandle(), &eventPoolDesc, 1, &metricDevice, &eventPoolHandle), ZE_RESULT_SUCCESS); EXPECT_NE(eventPoolHandle, nullptr); EXPECT_EQ(zeEventCreate(eventPoolHandle, &eventDesc, &eventHandle), ZE_RESULT_SUCCESS); EXPECT_NE(eventHandle, nullptr); EXPECT_EQ(zetMetricQueryPoolCreate(context->toHandle(), metricDevice, nullptr, &poolDesc, &poolHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricQueryCreate(poolHandle, 0, &queryHandle), ZE_RESULT_SUCCESS); EXPECT_NE(queryHandle, nullptr); EXPECT_EQ(zetCommandListAppendMetricQueryBegin(commandListHandle, queryHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetCommandListAppendMetricQueryEnd(commandListHandle, queryHandle, eventHandle, 0, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricQueryDestroy(queryHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricQueryPoolDestroy(poolHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zeEventDestroy(eventHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zeEventPoolDestroy(eventPoolHandle), ZE_RESULT_SUCCESS); } TEST_F(MetricQueryPoolTest, givenExecutionQueryTypeAndMetricsLibraryWillFailWhenAppendMetricQueryBeginAndEndIsCalledThenReturnFail) { zet_device_handle_t metricDevice = device->toHandle(); zet_metric_query_pool_handle_t poolHandle = {}; zet_metric_query_handle_t queryHandle = {}; zet_metric_query_pool_desc_t poolDesc = {}; poolDesc.stype = ZET_STRUCTURE_TYPE_METRIC_QUERY_POOL_DESC; poolDesc.count = 1; poolDesc.type = ZET_METRIC_QUERY_POOL_TYPE_EXECUTION; ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); zet_command_list_handle_t commandListHandle = commandList->toHandle(); TypedValue_1_0 value = {}; value.Type = ValueType::Uint32; value.ValueUInt32 = 64; QueryHandle_1_0 metricsLibraryQueryHandle = {&value}; ContextHandle_1_0 metricsLibraryContextHandle = {&value}; CommandBufferSize_1_0 commandBufferSize = {}; commandBufferSize.GpuMemorySize = 100; EXPECT_CALL(*mockMetricEnumeration, isInitialized()) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, getContextData(_, _)) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, load()) .Times(0); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextCreate(_, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<2>(metricsLibraryContextHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextDelete(_)) .Times(1) .WillOnce(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockCommandBufferGetSize(_, _)) .Times(2) .WillRepeatedly(DoAll(::testing::SetArgPointee<1>(::testing::ByRef(commandBufferSize)), Return(StatusCode::Failed))); ze_event_pool_handle_t eventPoolHandle = {}; ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = 0; eventPoolDesc.stype = ZE_STRUCTURE_TYPE_EVENT_POOL_DESC; ze_event_handle_t eventHandle = {}; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.stype = ZE_STRUCTURE_TYPE_EVENT_DESC; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE; EXPECT_EQ(zeEventPoolCreate(context->toHandle(), &eventPoolDesc, 1, &metricDevice, &eventPoolHandle), ZE_RESULT_SUCCESS); EXPECT_NE(eventPoolHandle, nullptr); EXPECT_EQ(zeEventCreate(eventPoolHandle, &eventDesc, &eventHandle), ZE_RESULT_SUCCESS); EXPECT_NE(eventHandle, nullptr); EXPECT_EQ(zetMetricQueryPoolCreate(context->toHandle(), metricDevice, nullptr, &poolDesc, &poolHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricQueryCreate(poolHandle, 0, &queryHandle), ZE_RESULT_SUCCESS); EXPECT_NE(queryHandle, nullptr); EXPECT_NE(zetCommandListAppendMetricQueryBegin(commandListHandle, queryHandle), ZE_RESULT_SUCCESS); EXPECT_NE(zetCommandListAppendMetricQueryEnd(commandListHandle, queryHandle, eventHandle, 0, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricQueryDestroy(queryHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricQueryPoolDestroy(poolHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zeEventDestroy(eventHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zeEventPoolDestroy(eventPoolHandle), ZE_RESULT_SUCCESS); } TEST_F(MetricQueryPoolTest, givenRootDeviceWhenGetSubDeviceClientOptionsIsCalledThenReturnRootDeviceProperties) { auto &metricSource = device->getMetricDeviceContext().getMetricSource(); auto &metricsLibrary = metricSource.getMetricsLibrary(); auto subDevice = ClientOptionsData_1_0{}; auto subDeviceIndex = ClientOptionsData_1_0{}; auto subDeviceCount = ClientOptionsData_1_0{}; auto workloadPartition = ClientOptionsData_1_0{}; metricsLibrary.getSubDeviceClientOptions(subDevice, subDeviceIndex, subDeviceCount, workloadPartition); // Root device EXPECT_EQ(subDevice.Type, MetricsLibraryApi::ClientOptionsType::SubDevice); EXPECT_EQ(subDevice.SubDevice.Enabled, false); EXPECT_EQ(subDeviceIndex.Type, MetricsLibraryApi::ClientOptionsType::SubDeviceIndex); EXPECT_EQ(subDeviceIndex.SubDeviceIndex.Index, 0u); EXPECT_EQ(subDeviceCount.Type, MetricsLibraryApi::ClientOptionsType::SubDeviceCount); EXPECT_EQ(subDeviceCount.SubDeviceCount.Count, std::max(device->getNEODevice()->getNumSubDevices(), 1u)); EXPECT_EQ(workloadPartition.Type, MetricsLibraryApi::ClientOptionsType::WorkloadPartition); EXPECT_EQ(workloadPartition.WorkloadPartition.Enabled, false); } TEST_F(MetricQueryPoolTest, givenUninitializedMetricEnumerationWhenGetQueryReportGpuSizeIsCalledThenReturnInvalidSize) { auto &metricSource = device->getMetricDeviceContext().getMetricSource(); auto &metricsLibrary = metricSource.getMetricsLibrary(); EXPECT_CALL(*mockMetricEnumeration, isInitialized()) .Times(1) .WillOnce(Return(false)); const uint32_t invalidSize = metricsLibrary.getQueryReportGpuSize(); EXPECT_EQ(invalidSize, 0u); } TEST_F(MetricQueryPoolTest, givenCorrectArgumentsWhenActivateMetricGroupsIsCalledThenReturnsSuccess) { zet_device_handle_t metricDevice = device->toHandle(); metricsDeviceParams.ConcurrentGroupsCount = 1; Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; metricsSetParams.MetricsCount = 1; Mock metric; TMetricParams_1_0 metricParams = {}; metricParams.SymbolName = "Metric symbol name"; metricParams.ShortName = "Metric short name"; metricParams.LongName = "Metric long name"; metricParams.ResultType = MetricsDiscovery::TMetricResultType::RESULT_UINT64; metricParams.MetricType = MetricsDiscovery::TMetricType::METRIC_TYPE_RATIO; zet_metric_group_handle_t metricGroupHandle = {}; zet_metric_group_properties_t metricGroupProperties = {}; metricGroupProperties.samplingType = ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED; TypedValue_1_0 value = {}; value.Type = ValueType::Uint32; value.ValueUInt32 = 64; ConfigurationHandle_1_0 metricsLibraryConfigurationHandle = {&value}; ContextHandle_1_0 metricsLibraryContextHandle = {&value}; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(1) .WillOnce(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(1) .WillOnce(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, GetMetric(_)) .Times(1) .WillOnce(Return(&metric)); EXPECT_CALL(metricsSet, Activate()) .Times(1) .WillOnce(Return(MetricsDiscovery::CC_OK)); EXPECT_CALL(metricsSet, Deactivate()) .Times(1) .WillOnce(Return(MetricsDiscovery::CC_OK)); EXPECT_CALL(metric, GetParams()) .Times(1) .WillOnce(Return(&metricParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(*mockMetricEnumeration, isInitialized()) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, getContextData(_, _)) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, load()) .Times(0); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextCreate(_, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<2>(metricsLibraryContextHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextDelete(_)) .Times(1) .WillOnce(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockConfigurationCreate(_, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<1>(metricsLibraryConfigurationHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockConfigurationActivate(_, _)) .Times(1) .WillOnce(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockConfigurationDeactivate(_)) .Times(1) .WillOnce(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockConfigurationDelete(_)) .Times(1) .WillOnce(Return(StatusCode::Success)); // Metric group count. uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); // Metric group handle. EXPECT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); // Activate metric group. EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device->toHandle(), 1, &metricGroupHandle), ZE_RESULT_SUCCESS); // Activate metric groups. device->activateMetricGroups(); EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), metricDevice, 0, nullptr), ZE_RESULT_SUCCESS); } } // namespace ult } // namespace L0 test_metric_oa_query_pool_3.cpp000066400000000000000000001742701422164147700353320ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/metrics/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/device_factory.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/driver/driver_imp.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_device.h" #include "level_zero/core/test/unit_tests/mocks/mock_driver_handle.h" #include "level_zero/tools/source/metrics/metric_oa_source.h" #include "level_zero/tools/test/unit_tests/sources/metrics/mock_metric_oa.h" #include "gmock/gmock.h" #include "gtest/gtest.h" using ::testing::_; using ::testing::Return; namespace L0 { namespace ult { class MultiDeviceMetricQueryPoolTest : public MetricMultiDeviceFixture, public ::testing::Test { public: void SetUp() override { ze_result_t returnValue = ZE_RESULT_SUCCESS; MetricMultiDeviceFixture::SetUp(); auto executionEnvironment = new NEO::ExecutionEnvironment(); driverHandle.reset(DriverHandle::create(NEO::DeviceFactory::createDevices(*executionEnvironment), L0EnvVariables{}, &returnValue)); } void TearDown() override { MetricMultiDeviceFixture::TearDown(); driverHandle.reset(); GlobalDriver = nullptr; } std::unique_ptr driverHandle; }; TEST_F(MultiDeviceMetricQueryPoolTest, givenSubDeviceWhenGetSubDeviceClientOptionsIsCalledThenReturnSubDeviceProperties) { auto &deviceImp = *static_cast(devices[0]); auto subDevice = ClientOptionsData_1_0{}; auto subDeviceIndex = ClientOptionsData_1_0{}; auto subDeviceCount = ClientOptionsData_1_0{}; auto workloadPartition = ClientOptionsData_1_0{}; // Sub devices for (uint32_t i = 0, count = deviceImp.numSubDevices; i < count; ++i) { auto &metricSource = deviceImp.subDevices[i]->getMetricDeviceContext().getMetricSource(); auto &metricsLibrary = metricSource.getMetricsLibrary(); metricsLibrary.getSubDeviceClientOptions(subDevice, subDeviceIndex, subDeviceCount, workloadPartition); EXPECT_EQ(subDevice.Type, MetricsLibraryApi::ClientOptionsType::SubDevice); EXPECT_EQ(subDevice.SubDevice.Enabled, true); EXPECT_EQ(subDeviceIndex.Type, MetricsLibraryApi::ClientOptionsType::SubDeviceIndex); EXPECT_EQ(subDeviceIndex.SubDeviceIndex.Index, i); EXPECT_EQ(subDeviceCount.Type, MetricsLibraryApi::ClientOptionsType::SubDeviceCount); EXPECT_EQ(subDeviceCount.SubDeviceCount.Count, std::max(deviceImp.numSubDevices, 1u)); EXPECT_EQ(workloadPartition.Type, MetricsLibraryApi::ClientOptionsType::WorkloadPartition); EXPECT_EQ(workloadPartition.WorkloadPartition.Enabled, false); } } TEST_F(MultiDeviceMetricQueryPoolTest, givenSubDeviceWithWorkloadPartitionWhenGetSubDeviceClientOptionsIsCalledThenReturnSubDeviceProperties) { auto &deviceImp = *static_cast(devices[0]); auto subDevice = ClientOptionsData_1_0{}; auto subDeviceIndex = ClientOptionsData_1_0{}; auto subDeviceCount = ClientOptionsData_1_0{}; auto workloadPartition = ClientOptionsData_1_0{}; // Sub devices for (uint32_t i = 0, count = deviceImp.numSubDevices; i < count; ++i) { auto &metricSource = deviceImp.subDevices[i]->getMetricDeviceContext().getMetricSource(); auto &metricsLibrary = metricSource.getMetricsLibrary(); metricsLibrary.enableWorkloadPartition(); metricsLibrary.getSubDeviceClientOptions(subDevice, subDeviceIndex, subDeviceCount, workloadPartition); EXPECT_EQ(subDevice.Type, MetricsLibraryApi::ClientOptionsType::SubDevice); EXPECT_EQ(subDevice.SubDevice.Enabled, true); EXPECT_EQ(subDeviceIndex.Type, MetricsLibraryApi::ClientOptionsType::SubDeviceIndex); EXPECT_EQ(subDeviceIndex.SubDeviceIndex.Index, i); EXPECT_EQ(subDeviceCount.Type, MetricsLibraryApi::ClientOptionsType::SubDeviceCount); EXPECT_EQ(subDeviceCount.SubDeviceCount.Count, std::max(deviceImp.numSubDevices, 1u)); EXPECT_EQ(workloadPartition.Type, MetricsLibraryApi::ClientOptionsType::WorkloadPartition); EXPECT_EQ(workloadPartition.WorkloadPartition.Enabled, true); } } TEST_F(MultiDeviceMetricQueryPoolTest, givenCorrectArgumentsWhenZetMetricQueryPoolCreateIsCalledThenReturnsSuccess) { zet_device_handle_t metricDevice = devices[0]->toHandle(); auto &deviceImp = *static_cast(devices[0]); const uint32_t subDeviceCount = static_cast(deviceImp.subDevices.size()); metricsDeviceParams.ConcurrentGroupsCount = 1; Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; metricsSetParams.MetricsCount = 1; Mock metric; TMetricParams_1_0 metricParams = {}; metricParams.SymbolName = "Metric symbol name"; metricParams.ShortName = "Metric short name"; metricParams.LongName = "Metric long name"; metricParams.ResultType = MetricsDiscovery::TMetricResultType::RESULT_UINT64; metricParams.MetricType = MetricsDiscovery::TMetricType::METRIC_TYPE_RATIO; zet_metric_group_handle_t metricGroupHandle = {}; zet_metric_group_properties_t metricGroupProperties = {}; metricGroupProperties.samplingType = ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED; zet_metric_query_handle_t queryHandle = {}; zet_metric_query_pool_handle_t poolHandle = {}; zet_metric_query_pool_desc_t poolDesc = {}; poolDesc.stype = ZET_STRUCTURE_TYPE_METRIC_QUERY_POOL_DESC; poolDesc.count = 1; poolDesc.type = ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE; TypedValue_1_0 value = {}; value.Type = ValueType::Uint32; value.ValueUInt32 = 64; QueryHandle_1_0 metricsLibraryQueryHandle = {&value}; ContextHandle_1_0 metricsLibraryContextHandle = {&value}; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, GetMetric(_)) .Times(subDeviceCount) .WillRepeatedly(Return(&metric)); EXPECT_CALL(metric, GetParams()) .Times(subDeviceCount) .WillRepeatedly(Return(&metricParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); for (uint32_t i = 0; i < subDeviceCount; ++i) { EXPECT_CALL(*mockMetricEnumerationSubDevices[i], isInitialized()) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrarySubDevices[i], getContextData(_, _)) .Times(1) .WillOnce(Return(true)); } EXPECT_CALL(*mockMetricsLibrary, load()) .Times(0); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryCreate(_, _)) .Times(subDeviceCount) .WillRepeatedly(DoAll(::testing::SetArgPointee<1>(metricsLibraryQueryHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryDelete(_)) .Times(subDeviceCount) .WillRepeatedly(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockGetParameter(_, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<2>(value), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextCreate(_, _, _)) .Times(subDeviceCount) .WillRepeatedly(DoAll(::testing::SetArgPointee<2>(metricsLibraryContextHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextDelete(_)) .Times(subDeviceCount) .WillRepeatedly(Return(StatusCode::Success)); // Metric group count. uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(devices[0]->toHandle(), &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); // Metric group handle. EXPECT_EQ(zetMetricGroupGet(devices[0]->toHandle(), &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); // Activate metric group. EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), devices[0]->toHandle(), 1, &metricGroupHandle), ZE_RESULT_SUCCESS); // Create metric query pool. EXPECT_EQ(zetMetricQueryPoolCreate(context->toHandle(), metricDevice, metricGroupHandle, &poolDesc, &poolHandle), ZE_RESULT_SUCCESS); EXPECT_NE(poolHandle, nullptr); // Create metric query. EXPECT_EQ(zetMetricQueryCreate(poolHandle, 0, &queryHandle), ZE_RESULT_SUCCESS); EXPECT_NE(queryHandle, nullptr); // Destroy query and its pool. EXPECT_EQ(zetMetricQueryDestroy(queryHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricQueryPoolDestroy(poolHandle), ZE_RESULT_SUCCESS); } TEST_F(MultiDeviceMetricQueryPoolTest, givenEnableWalkerPartitionIsOnWhenZetCommandListAppendMetricQueryBeginEndIsCalledForSubDeviceThenReturnsSuccess) { DebugManagerStateRestore restorer; DebugManager.flags.EnableWalkerPartition.set(1); auto &deviceImp = *static_cast(devices[0]); zet_device_handle_t metricDeviceHandle = deviceImp.subDevices[0]->toHandle(); metricsDeviceParams.ConcurrentGroupsCount = 1; Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; metricsSetParams.MetricsCount = 1; Mock metric; TMetricParams_1_0 metricParams = {}; metricParams.SymbolName = "Metric symbol name"; metricParams.ShortName = "Metric short name"; metricParams.LongName = "Metric long name"; metricParams.ResultType = MetricsDiscovery::TMetricResultType::RESULT_UINT64; metricParams.MetricType = MetricsDiscovery::TMetricType::METRIC_TYPE_RATIO; zet_metric_group_handle_t metricGroupHandle = {}; zet_metric_group_properties_t metricGroupProperties = {}; metricGroupProperties.samplingType = ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED; zet_metric_query_handle_t queryHandle = {}; zet_metric_query_pool_handle_t poolHandle = {}; zet_metric_query_pool_desc_t poolDesc = {}; poolDesc.stype = ZET_STRUCTURE_TYPE_METRIC_QUERY_POOL_DESC; poolDesc.count = 1; poolDesc.type = ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE; TypedValue_1_0 value = {}; value.Type = ValueType::Uint32; value.ValueUInt32 = 64; QueryHandle_1_0 metricsLibraryQueryHandle = {&value}; ContextHandle_1_0 metricsLibraryContextHandle = {&value}; openMetricsAdapterSubDevice(0); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(1) .WillRepeatedly(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(1) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, GetMetric(_)) .Times(1) .WillRepeatedly(Return(&metric)); EXPECT_CALL(metric, GetParams()) .Times(1) .WillRepeatedly(Return(&metricParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(*mockMetricEnumerationSubDevices[0], isInitialized()) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrarySubDevices[0], getContextData(_, _)) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, load()) .Times(0); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryCreate(_, _)) .Times(1) .WillRepeatedly(DoAll(::testing::SetArgPointee<1>(metricsLibraryQueryHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryDelete(_)) .Times(1) .WillRepeatedly(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockGetParameter(_, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<2>(value), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextCreate(_, _, _)) .Times(1) .WillRepeatedly(DoAll(::testing::SetArgPointee<2>(metricsLibraryContextHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextDelete(_)) .Times(1) .WillRepeatedly(Return(StatusCode::Success)); uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(metricDeviceHandle, &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_EQ(zetMetricGroupGet(metricDeviceHandle, &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), metricDeviceHandle, 1, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricQueryPoolCreate(context->toHandle(), metricDeviceHandle, metricGroupHandle, &poolDesc, &poolHandle), ZE_RESULT_SUCCESS); EXPECT_NE(poolHandle, nullptr); EXPECT_EQ(zetMetricQueryCreate(poolHandle, 0, &queryHandle), ZE_RESULT_SUCCESS); EXPECT_NE(queryHandle, nullptr); EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), metricDeviceHandle, 0, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricQueryDestroy(queryHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricQueryPoolDestroy(poolHandle), ZE_RESULT_SUCCESS); } TEST_F(MultiDeviceMetricQueryPoolTest, givenEnableWalkerPartitionIsOnWhenZetCommandListAppendMetricMemoryBarrierIsCalledForSubDeviceThenReturnsSuccess) { DebugManagerStateRestore restorer; DebugManager.flags.EnableWalkerPartition.set(1); auto &deviceImp = *static_cast(devices[0]); ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, deviceImp.subDevices[0], NEO::EngineGroupType::RenderCompute, 0u, returnValue)); zet_command_list_handle_t commandListHandle = commandList->toHandle(); TypedValue_1_0 value = {}; value.Type = ValueType::Uint32; value.ValueUInt32 = 64; QueryHandle_1_0 metricsLibraryQueryHandle = {&value}; ContextHandle_1_0 metricsLibraryContextHandle = {&value}; CommandBufferSize_1_0 commandBufferSize = {}; commandBufferSize.GpuMemorySize = 100; EXPECT_CALL(*mockMetricEnumerationSubDevices[0], isInitialized()) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrarySubDevices[0], getContextData(_, _)) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, load()) .Times(0); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockCommandBufferGetSize(_, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<1>(::testing::ByRef(commandBufferSize)), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockCommandBufferGet(_)) .Times(1) .WillOnce(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextCreate(_, _, _)) .Times(1) .WillRepeatedly(DoAll(::testing::SetArgPointee<2>(metricsLibraryContextHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextDelete(_)) .Times(1) .WillRepeatedly(Return(StatusCode::Success)); EXPECT_EQ(zetCommandListAppendMetricMemoryBarrier(commandListHandle), ZE_RESULT_SUCCESS); } TEST_F(MultiDeviceMetricQueryPoolTest, givenFailedMetricsLibraryContextWhenZetMetricQueryPoolCreateIsCalledThenReturnFail) { zet_device_handle_t metricDevice = devices[0]->toHandle(); auto &deviceImp = *static_cast(devices[0]); const uint32_t subDeviceCount = static_cast(deviceImp.subDevices.size()); metricsDeviceParams.ConcurrentGroupsCount = 1; Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; metricsSetParams.MetricsCount = 1; Mock metric; TMetricParams_1_0 metricParams = {}; metricParams.SymbolName = "Metric symbol name"; metricParams.ShortName = "Metric short name"; metricParams.LongName = "Metric long name"; metricParams.ResultType = MetricsDiscovery::TMetricResultType::RESULT_UINT64; metricParams.MetricType = MetricsDiscovery::TMetricType::METRIC_TYPE_RATIO; zet_metric_group_handle_t metricGroupHandle = {}; zet_metric_group_properties_t metricGroupProperties = {}; metricGroupProperties.samplingType = ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED; zet_metric_query_pool_handle_t poolHandle = {}; zet_metric_query_pool_desc_t poolDesc = {}; poolDesc.stype = ZET_STRUCTURE_TYPE_METRIC_QUERY_POOL_DESC; poolDesc.count = 1; poolDesc.type = ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE; TypedValue_1_0 value = {}; value.Type = ValueType::Uint32; value.ValueUInt32 = 64; QueryHandle_1_0 metricsLibraryQueryHandle = {&value}; ContextHandle_1_0 metricsLibraryContextHandle = {&value}; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, GetMetric(_)) .Times(subDeviceCount) .WillRepeatedly(Return(&metric)); EXPECT_CALL(metric, GetParams()) .Times(subDeviceCount) .WillRepeatedly(Return(&metricParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); for (uint32_t i = 0; i < subDeviceCount; ++i) { EXPECT_CALL(*mockMetricEnumerationSubDevices[i], isInitialized()) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrarySubDevices[i], getContextData(_, _)) .Times(1) .WillOnce(Return(true)); } EXPECT_CALL(*mockMetricsLibrary, load()) .Times(0); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryCreate(_, _)) .Times(subDeviceCount) .WillOnce(DoAll(::testing::SetArgPointee<1>(metricsLibraryQueryHandle), Return(StatusCode::Success))) .WillOnce(Return(StatusCode::Failed)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryDelete(_)) .Times(1) .WillOnce(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockGetParameter(_, _, _)) .Times(0); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextCreate(_, _, _)) .Times(subDeviceCount) .WillRepeatedly(DoAll(::testing::SetArgPointee<2>(metricsLibraryContextHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextDelete(_)) .Times(subDeviceCount) .WillRepeatedly(Return(StatusCode::Success)); // Metric group count. uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(devices[0]->toHandle(), &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); // Metric group handle. EXPECT_EQ(zetMetricGroupGet(devices[0]->toHandle(), &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); // Activate metric group. EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), devices[0]->toHandle(), 1, &metricGroupHandle), ZE_RESULT_SUCCESS); // Create metric query pool. EXPECT_EQ(zetMetricQueryPoolCreate(context->toHandle(), metricDevice, metricGroupHandle, &poolDesc, &poolHandle), ZE_RESULT_ERROR_INVALID_ARGUMENT); } TEST_F(MultiDeviceMetricQueryPoolTest, givenExecutionQueryTypeWhenZetMetricQueryPoolCreateIsCalledThenQueryPoolIsObtained) { zet_device_handle_t metricDevice = devices[0]->toHandle(); zet_metric_query_pool_handle_t poolHandle = {}; zet_metric_query_pool_desc_t poolDesc = {}; poolDesc.stype = ZET_STRUCTURE_TYPE_METRIC_QUERY_POOL_DESC; poolDesc.count = 1; poolDesc.type = ZET_METRIC_QUERY_POOL_TYPE_EXECUTION; EXPECT_CALL(*mockMetricsLibrary, load()) .Times(0); EXPECT_EQ(zetMetricQueryPoolCreate(context->toHandle(), metricDevice, nullptr, &poolDesc, &poolHandle), ZE_RESULT_SUCCESS); EXPECT_NE(poolHandle, nullptr); EXPECT_EQ(zetMetricQueryPoolDestroy(poolHandle), ZE_RESULT_SUCCESS); } TEST_F(MultiDeviceMetricQueryPoolTest, givenFailedGetDataWhenZetMetricQueryGetDataIsCalledThenReturnsFail) { zet_device_handle_t metricDevice = devices[0]->toHandle(); auto &deviceImp = *static_cast(devices[0]); const uint32_t subDeviceCount = static_cast(deviceImp.subDevices.size()); metricsDeviceParams.ConcurrentGroupsCount = 1; Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; metricsSetParams.MetricsCount = 1; Mock metric; TMetricParams_1_0 metricParams = {}; metricParams.SymbolName = "Metric symbol name"; metricParams.ShortName = "Metric short name"; metricParams.LongName = "Metric long name"; metricParams.ResultType = MetricsDiscovery::TMetricResultType::RESULT_UINT64; metricParams.MetricType = MetricsDiscovery::TMetricType::METRIC_TYPE_RATIO; zet_metric_group_handle_t metricGroupHandle = {}; zet_metric_group_properties_t metricGroupProperties = {}; metricGroupProperties.samplingType = ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED; zet_metric_query_handle_t queryHandle = {}; zet_metric_query_pool_handle_t poolHandle = {}; zet_metric_query_pool_desc_t poolDesc = {}; poolDesc.stype = ZET_STRUCTURE_TYPE_METRIC_QUERY_POOL_DESC; poolDesc.count = 1; poolDesc.type = ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE; TypedValue_1_0 value = {}; value.Type = ValueType::Uint32; value.ValueUInt32 = 64; size_t reportSize = 256; QueryHandle_1_0 metricsLibraryQueryHandle = {&value}; ContextHandle_1_0 metricsLibraryContextHandle = {&value}; CommandBufferSize_1_0 commandBufferSize = {}; commandBufferSize.GpuMemorySize = 100; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, GetMetric(_)) .Times(subDeviceCount) .WillRepeatedly(Return(&metric)); EXPECT_CALL(metric, GetParams()) .Times(subDeviceCount) .WillRepeatedly(Return(&metricParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); for (uint32_t i = 0; i < subDeviceCount; ++i) { EXPECT_CALL(*mockMetricEnumerationSubDevices[i], isInitialized()) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrarySubDevices[i], getContextData(_, _)) .Times(1) .WillOnce(Return(true)); } EXPECT_CALL(*mockMetricsLibrarySubDevices[0], getMetricQueryReportSize(_)) .Times(1) .WillOnce(DoAll(::testing::SetArgReferee<0>(reportSize), Return(true))); EXPECT_CALL(*mockMetricsLibrary, load()) .Times(0); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryCreate(_, _)) .Times(subDeviceCount) .WillRepeatedly(DoAll(::testing::SetArgPointee<1>(metricsLibraryQueryHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryDelete(_)) .Times(subDeviceCount) .WillRepeatedly(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockGetParameter(_, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<2>(value), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextCreate(_, _, _)) .Times(subDeviceCount) .WillRepeatedly(DoAll(::testing::SetArgPointee<2>(metricsLibraryContextHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockGetData(_)) .Times(subDeviceCount) .WillOnce(Return(StatusCode::Success)) .WillOnce(Return(StatusCode::Failed)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextDelete(_)) .Times(subDeviceCount) .WillRepeatedly(Return(StatusCode::Success)); // Metric group count. uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(devices[0]->toHandle(), &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); // Metric group handle. EXPECT_EQ(zetMetricGroupGet(devices[0]->toHandle(), &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); // Create metric query pool. EXPECT_EQ(zetMetricQueryPoolCreate(context->toHandle(), metricDevice, metricGroupHandle, &poolDesc, &poolHandle), ZE_RESULT_SUCCESS); EXPECT_NE(poolHandle, nullptr); // Create metric query. EXPECT_EQ(zetMetricQueryCreate(poolHandle, 0, &queryHandle), ZE_RESULT_SUCCESS); EXPECT_NE(queryHandle, nullptr); // Get desired raw data size. size_t rawSize = 0; EXPECT_EQ(zetMetricQueryGetData(queryHandle, &rawSize, nullptr), ZE_RESULT_SUCCESS); const size_t expectedRawSize = (reportSize * subDeviceCount) + sizeof(MetricGroupCalculateHeader) + (2 * sizeof(uint32_t) * subDeviceCount); EXPECT_EQ(rawSize, expectedRawSize); // Get data. std::vector rawData; rawData.resize(rawSize); EXPECT_EQ(zetMetricQueryGetData(queryHandle, &rawSize, rawData.data()), ZE_RESULT_ERROR_UNKNOWN); // Destroy query and its pool. EXPECT_EQ(zetMetricQueryDestroy(queryHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricQueryPoolDestroy(poolHandle), ZE_RESULT_SUCCESS); } TEST_F(MultiDeviceMetricQueryPoolTest, givenCorrectArgumentsWhenZetCommandListAppendMetricQueryBeginEndIsCalledThenReturnsSuccess) { zet_device_handle_t metricDevice = devices[0]->toHandle(); auto &deviceImp = *static_cast(devices[0]); const uint32_t subDeviceCount = static_cast(deviceImp.subDevices.size()); ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, devices[0], NEO::EngineGroupType::RenderCompute, 0u, returnValue)); zet_command_list_handle_t commandListHandle = commandList->toHandle(); metricsDeviceParams.ConcurrentGroupsCount = 1; Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; metricsSetParams.MetricsCount = 1; Mock metric; TMetricParams_1_0 metricParams = {}; metricParams.SymbolName = "Metric symbol name"; metricParams.ShortName = "Metric short name"; metricParams.LongName = "Metric long name"; metricParams.ResultType = MetricsDiscovery::TMetricResultType::RESULT_UINT64; metricParams.MetricType = MetricsDiscovery::TMetricType::METRIC_TYPE_RATIO; zet_metric_group_handle_t metricGroupHandle = {}; zet_metric_group_properties_t metricGroupProperties = {}; metricGroupProperties.samplingType = ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED; ze_event_pool_handle_t eventPoolHandle = {}; ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = 0; eventPoolDesc.stype = ZE_STRUCTURE_TYPE_EVENT_POOL_DESC; ze_event_handle_t eventHandle = {}; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.stype = ZE_STRUCTURE_TYPE_EVENT_DESC; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE; zet_metric_query_handle_t queryHandle = {}; zet_metric_query_pool_handle_t poolHandle = {}; zet_metric_query_pool_desc_t poolDesc = {}; poolDesc.stype = ZET_STRUCTURE_TYPE_METRIC_QUERY_POOL_DESC; poolDesc.count = 1; poolDesc.type = ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE; TypedValue_1_0 value = {}; value.Type = ValueType::Uint32; value.ValueUInt32 = 64; QueryHandle_1_0 metricsLibraryQueryHandle = {&value}; ContextHandle_1_0 metricsLibraryContextHandle = {&value}; CommandBufferSize_1_0 commandBufferSize = {}; commandBufferSize.GpuMemorySize = 100; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, GetMetric(_)) .Times(subDeviceCount) .WillRepeatedly(Return(&metric)); EXPECT_CALL(metric, GetParams()) .Times(subDeviceCount) .WillRepeatedly(Return(&metricParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); for (uint32_t i = 0; i < subDeviceCount; ++i) { EXPECT_CALL(*mockMetricEnumerationSubDevices[i], isInitialized()) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrarySubDevices[i], getContextData(_, _)) .Times(1) .WillOnce(Return(true)); } EXPECT_CALL(*mockMetricsLibrary, load()) .Times(0); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryCreate(_, _)) .Times(subDeviceCount) .WillRepeatedly(DoAll(::testing::SetArgPointee<1>(metricsLibraryQueryHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryDelete(_)) .Times(subDeviceCount) .WillRepeatedly(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockGetParameter(_, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<2>(value), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextCreate(_, _, _)) .Times(subDeviceCount) .WillRepeatedly(DoAll(::testing::SetArgPointee<2>(metricsLibraryContextHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextDelete(_)) .Times(subDeviceCount) .WillRepeatedly(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockCommandBufferGetSize(_, _)) .Times(2 * subDeviceCount + 1) .WillRepeatedly(DoAll(::testing::SetArgPointee<1>(::testing::ByRef(commandBufferSize)), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockCommandBufferGet(_)) .Times(2 * subDeviceCount + 1) .WillRepeatedly(Return(StatusCode::Success)); // Metric group count. uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(devices[0]->toHandle(), &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); // Metric group handle. EXPECT_EQ(zetMetricGroupGet(devices[0]->toHandle(), &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); // Activate metric group. EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), devices[0]->toHandle(), 1, &metricGroupHandle), ZE_RESULT_SUCCESS); // Create metric query pool. EXPECT_EQ(zetMetricQueryPoolCreate(context->toHandle(), metricDevice, metricGroupHandle, &poolDesc, &poolHandle), ZE_RESULT_SUCCESS); EXPECT_NE(poolHandle, nullptr); // Create metric query. EXPECT_EQ(zetMetricQueryCreate(poolHandle, 0, &queryHandle), ZE_RESULT_SUCCESS); EXPECT_NE(queryHandle, nullptr); // Create event pool. EXPECT_EQ(zeEventPoolCreate(context->toHandle(), &eventPoolDesc, 1, &metricDevice, &eventPoolHandle), ZE_RESULT_SUCCESS); EXPECT_NE(eventPoolHandle, nullptr); // Create event. EXPECT_EQ(zeEventCreate(eventPoolHandle, &eventDesc, &eventHandle), ZE_RESULT_SUCCESS); EXPECT_NE(eventHandle, nullptr); // Write BEGIN metric query to command list. EXPECT_EQ(zetCommandListAppendMetricQueryBegin(commandListHandle, queryHandle), ZE_RESULT_SUCCESS); // Write END metric query to command list, use an event to determine if the data is available. EXPECT_EQ(zetCommandListAppendMetricQueryEnd(commandListHandle, queryHandle, eventHandle, 0, nullptr), ZE_RESULT_SUCCESS); // Write memory barrier to command list. EXPECT_EQ(zetCommandListAppendMetricMemoryBarrier(commandListHandle), ZE_RESULT_SUCCESS); // Destroy event and its pool. EXPECT_EQ(zeEventDestroy(eventHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zeEventPoolDestroy(eventPoolHandle), ZE_RESULT_SUCCESS); // Destroy query and its pool. EXPECT_EQ(zetMetricQueryDestroy(queryHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricQueryPoolDestroy(poolHandle), ZE_RESULT_SUCCESS); } TEST_F(MultiDeviceMetricQueryPoolTest, givenInvalidCommandBufferGetSizeWhenZetCommandListAppendMetricQueryBeginIsCalledThenReturnsFail) { zet_device_handle_t metricDevice = devices[0]->toHandle(); auto &deviceImp = *static_cast(devices[0]); const uint32_t subDeviceCount = static_cast(deviceImp.subDevices.size()); ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, devices[0], NEO::EngineGroupType::RenderCompute, 0u, returnValue)); zet_command_list_handle_t commandListHandle = commandList->toHandle(); metricsDeviceParams.ConcurrentGroupsCount = 1; Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; metricsSetParams.MetricsCount = 1; Mock metric; TMetricParams_1_0 metricParams = {}; metricParams.SymbolName = "Metric symbol name"; metricParams.ShortName = "Metric short name"; metricParams.LongName = "Metric long name"; metricParams.ResultType = MetricsDiscovery::TMetricResultType::RESULT_UINT64; metricParams.MetricType = MetricsDiscovery::TMetricType::METRIC_TYPE_RATIO; zet_metric_group_handle_t metricGroupHandle = {}; zet_metric_group_properties_t metricGroupProperties = {}; metricGroupProperties.samplingType = ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED; zet_metric_query_handle_t queryHandle = {}; zet_metric_query_pool_handle_t poolHandle = {}; zet_metric_query_pool_desc_t poolDesc = {}; poolDesc.stype = ZET_STRUCTURE_TYPE_METRIC_QUERY_POOL_DESC; poolDesc.count = 1; poolDesc.type = ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE; TypedValue_1_0 value = {}; value.Type = ValueType::Uint32; value.ValueUInt32 = 64; QueryHandle_1_0 metricsLibraryQueryHandle = {&value}; ContextHandle_1_0 metricsLibraryContextHandle = {&value}; CommandBufferSize_1_0 commandBufferSize = {}; commandBufferSize.GpuMemorySize = 0; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, GetMetric(_)) .Times(subDeviceCount) .WillRepeatedly(Return(&metric)); EXPECT_CALL(metric, GetParams()) .Times(subDeviceCount) .WillRepeatedly(Return(&metricParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); for (uint32_t i = 0; i < subDeviceCount; ++i) { EXPECT_CALL(*mockMetricEnumerationSubDevices[i], isInitialized()) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrarySubDevices[i], getContextData(_, _)) .Times(1) .WillOnce(Return(true)); } EXPECT_CALL(*mockMetricsLibrary, load()) .Times(0); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryCreate(_, _)) .Times(subDeviceCount) .WillRepeatedly(DoAll(::testing::SetArgPointee<1>(metricsLibraryQueryHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryDelete(_)) .Times(subDeviceCount) .WillRepeatedly(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockGetParameter(_, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<2>(value), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextCreate(_, _, _)) .Times(subDeviceCount) .WillRepeatedly(DoAll(::testing::SetArgPointee<2>(metricsLibraryContextHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextDelete(_)) .Times(subDeviceCount) .WillRepeatedly(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockCommandBufferGetSize(_, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<1>(::testing::ByRef(commandBufferSize)), Return(StatusCode::Success))); // Metric group count. uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(devices[0]->toHandle(), &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); // Metric group handle. EXPECT_EQ(zetMetricGroupGet(devices[0]->toHandle(), &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); // Create metric query pool. EXPECT_EQ(zetMetricQueryPoolCreate(context->toHandle(), metricDevice, metricGroupHandle, &poolDesc, &poolHandle), ZE_RESULT_SUCCESS); EXPECT_NE(poolHandle, nullptr); // Create metric query. EXPECT_EQ(zetMetricQueryCreate(poolHandle, 0, &queryHandle), ZE_RESULT_SUCCESS); EXPECT_NE(queryHandle, nullptr); // Write BEGIN metric query to command list. EXPECT_EQ(zetCommandListAppendMetricQueryBegin(commandListHandle, queryHandle), ZE_RESULT_ERROR_UNKNOWN); // Destroy query and its pool. EXPECT_EQ(zetMetricQueryDestroy(queryHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricQueryPoolDestroy(poolHandle), ZE_RESULT_SUCCESS); } TEST_F(MultiDeviceMetricQueryPoolTest, givenInvalidCommandBufferGetWhenZetCommandListAppendMetricQueryBeginIsCalledThenReturnsFail) { zet_device_handle_t metricDevice = devices[0]->toHandle(); auto &deviceImp = *static_cast(devices[0]); const uint32_t subDeviceCount = static_cast(deviceImp.subDevices.size()); ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, devices[0], NEO::EngineGroupType::RenderCompute, 0u, returnValue)); zet_command_list_handle_t commandListHandle = commandList->toHandle(); metricsDeviceParams.ConcurrentGroupsCount = 1; Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; metricsSetParams.MetricsCount = 1; Mock metric; TMetricParams_1_0 metricParams = {}; metricParams.SymbolName = "Metric symbol name"; metricParams.ShortName = "Metric short name"; metricParams.LongName = "Metric long name"; metricParams.ResultType = MetricsDiscovery::TMetricResultType::RESULT_UINT64; metricParams.MetricType = MetricsDiscovery::TMetricType::METRIC_TYPE_RATIO; zet_metric_group_handle_t metricGroupHandle = {}; zet_metric_group_properties_t metricGroupProperties = {}; metricGroupProperties.samplingType = ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED; zet_metric_query_handle_t queryHandle = {}; zet_metric_query_pool_handle_t poolHandle = {}; zet_metric_query_pool_desc_t poolDesc = {}; poolDesc.stype = ZET_STRUCTURE_TYPE_METRIC_QUERY_POOL_DESC; poolDesc.count = 1; poolDesc.type = ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE; TypedValue_1_0 value = {}; value.Type = ValueType::Uint32; value.ValueUInt32 = 64; QueryHandle_1_0 metricsLibraryQueryHandle = {&value}; ContextHandle_1_0 metricsLibraryContextHandle = {&value}; CommandBufferSize_1_0 commandBufferSize = {}; commandBufferSize.GpuMemorySize = 100; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, GetMetric(_)) .Times(subDeviceCount) .WillRepeatedly(Return(&metric)); EXPECT_CALL(metric, GetParams()) .Times(subDeviceCount) .WillRepeatedly(Return(&metricParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); for (uint32_t i = 0; i < subDeviceCount; ++i) { EXPECT_CALL(*mockMetricEnumerationSubDevices[i], isInitialized()) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrarySubDevices[i], getContextData(_, _)) .Times(1) .WillOnce(Return(true)); } EXPECT_CALL(*mockMetricsLibrary, load()) .Times(0); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryCreate(_, _)) .Times(subDeviceCount) .WillRepeatedly(DoAll(::testing::SetArgPointee<1>(metricsLibraryQueryHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryDelete(_)) .Times(subDeviceCount) .WillRepeatedly(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockGetParameter(_, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<2>(value), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextCreate(_, _, _)) .Times(subDeviceCount) .WillRepeatedly(DoAll(::testing::SetArgPointee<2>(metricsLibraryContextHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextDelete(_)) .Times(subDeviceCount) .WillRepeatedly(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockCommandBufferGetSize(_, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<1>(::testing::ByRef(commandBufferSize)), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockCommandBufferGet(_)) .Times(1) .WillRepeatedly(Return(StatusCode::Failed)); // Metric group count. uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(devices[0]->toHandle(), &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); // Metric group handle. EXPECT_EQ(zetMetricGroupGet(devices[0]->toHandle(), &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); // Create metric query pool. EXPECT_EQ(zetMetricQueryPoolCreate(context->toHandle(), metricDevice, metricGroupHandle, &poolDesc, &poolHandle), ZE_RESULT_SUCCESS); EXPECT_NE(poolHandle, nullptr); // Create metric query. EXPECT_EQ(zetMetricQueryCreate(poolHandle, 0, &queryHandle), ZE_RESULT_SUCCESS); EXPECT_NE(queryHandle, nullptr); // Write BEGIN metric query to command list. EXPECT_EQ(zetCommandListAppendMetricQueryBegin(commandListHandle, queryHandle), ZE_RESULT_ERROR_UNKNOWN); // Destroy query and its pool. EXPECT_EQ(zetMetricQueryDestroy(queryHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricQueryPoolDestroy(poolHandle), ZE_RESULT_SUCCESS); } TEST_F(MultiDeviceMetricQueryPoolTest, givenUninitializedMetricsLibraryWhenGetGpuCommandsIsCalledThenReturnsFail) { auto &metricSource = devices[0]->getMetricDeviceContext().getMetricSource(); auto &metricsLibrary = metricSource.getMetricsLibrary(); CommandBufferData_1_0 commandBuffer = {}; EXPECT_CALL(*mockMetricEnumeration, isInitialized()) .Times(1) .WillOnce(Return(false)); const bool result = metricsLibrary.getGpuCommands(commandBuffer); EXPECT_EQ(result, false); } TEST_F(MultiDeviceMetricQueryPoolTest, givenValidArgumentsWhenZetMetricGroupCalculateMetricValuesExpThenReturnsSuccess) { zet_device_handle_t metricDevice = devices[0]->toHandle(); auto &deviceImp = *static_cast(devices[0]); const uint32_t subDeviceCount = static_cast(deviceImp.subDevices.size()); metricsDeviceParams.ConcurrentGroupsCount = 1; Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; metricsSetParams.QueryReportSize = 256; metricsSetParams.MetricsCount = 1; Mock metric; TMetricParams_1_0 metricParams = {}; metricParams.SymbolName = "Metric symbol name"; metricParams.ShortName = "Metric short name"; metricParams.LongName = "Metric long name"; metricParams.ResultType = MetricsDiscovery::TMetricResultType::RESULT_UINT64; metricParams.MetricType = MetricsDiscovery::TMetricType::METRIC_TYPE_RATIO; zet_metric_group_handle_t metricGroupHandle = {}; zet_metric_group_properties_t metricGroupProperties = {}; metricGroupProperties.samplingType = ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED; zet_metric_query_handle_t queryHandle = {}; zet_metric_query_pool_handle_t poolHandle = {}; zet_metric_query_pool_desc_t poolDesc = {}; poolDesc.stype = ZET_STRUCTURE_TYPE_METRIC_QUERY_POOL_DESC; poolDesc.count = 1; poolDesc.type = ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE; TypedValue_1_0 value = {}; value.Type = ValueType::Uint32; value.ValueUInt32 = 64; QueryHandle_1_0 metricsLibraryQueryHandle = {&value}; ContextHandle_1_0 metricsLibraryContextHandle = {&value}; CommandBufferSize_1_0 commandBufferSize = {}; commandBufferSize.GpuMemorySize = 100; uint32_t returnedMetricCount = 1; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, GetMetric(_)) .Times(subDeviceCount) .WillRepeatedly(Return(&metric)); EXPECT_CALL(metric, GetParams()) .Times(subDeviceCount) .WillRepeatedly(Return(&metricParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); for (uint32_t i = 0; i < subDeviceCount; ++i) { EXPECT_CALL(*mockMetricEnumerationSubDevices[i], isInitialized()) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrarySubDevices[i], getContextData(_, _)) .Times(1) .WillOnce(Return(true)); } EXPECT_CALL(*mockMetricsLibrarySubDevices[0], getMetricQueryReportSize(_)) .Times(1) .WillOnce(DoAll(::testing::SetArgReferee<0>(metricsSetParams.QueryReportSize), Return(true))); EXPECT_CALL(*mockMetricsLibrary, load()) .Times(0); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryCreate(_, _)) .Times(subDeviceCount) .WillRepeatedly(DoAll(::testing::SetArgPointee<1>(metricsLibraryQueryHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryDelete(_)) .Times(subDeviceCount) .WillRepeatedly(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockGetParameter(_, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<2>(value), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextCreate(_, _, _)) .Times(subDeviceCount) .WillRepeatedly(DoAll(::testing::SetArgPointee<2>(metricsLibraryContextHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockGetData(_)) .Times(subDeviceCount) .WillRepeatedly(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextDelete(_)) .Times(subDeviceCount) .WillRepeatedly(Return(StatusCode::Success)); EXPECT_CALL(metricsSet, CalculateMetrics(_, _, _, _, _, _, _)) .Times(subDeviceCount) .WillRepeatedly(DoAll(::testing::SetArgPointee<4>(returnedMetricCount), Return(TCompletionCode::CC_OK))); // Metric group count. uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(devices[0]->toHandle(), &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); // Metric group handle. EXPECT_EQ(zetMetricGroupGet(devices[0]->toHandle(), &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); // Create metric query pool. EXPECT_EQ(zetMetricQueryPoolCreate(context->toHandle(), metricDevice, metricGroupHandle, &poolDesc, &poolHandle), ZE_RESULT_SUCCESS); EXPECT_NE(poolHandle, nullptr); // Create metric query. EXPECT_EQ(zetMetricQueryCreate(poolHandle, 0, &queryHandle), ZE_RESULT_SUCCESS); EXPECT_NE(queryHandle, nullptr); // Get desired raw data size. size_t rawSize = 0; EXPECT_EQ(zetMetricQueryGetData(queryHandle, &rawSize, nullptr), ZE_RESULT_SUCCESS); const size_t expectedRawSize = (metricsSetParams.QueryReportSize * subDeviceCount) + sizeof(MetricGroupCalculateHeader) + (2 * sizeof(uint32_t) * subDeviceCount); EXPECT_EQ(rawSize, expectedRawSize); // Get data. std::vector rawData; rawData.resize(rawSize); EXPECT_EQ(zetMetricQueryGetData(queryHandle, &rawSize, rawData.data()), ZE_RESULT_SUCCESS); uint32_t dataCount = 0; uint32_t totalMetricCount = 0; EXPECT_EQ(zetMetricGroupCalculateMultipleMetricValuesExp(metricGroupHandle, ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, rawSize, rawData.data(), &dataCount, &totalMetricCount, nullptr, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(dataCount, subDeviceCount); EXPECT_EQ(totalMetricCount, subDeviceCount * metricsSetParams.MetricsCount); std::vector metricCounts(dataCount); std::vector caculatedRawResults(totalMetricCount); EXPECT_EQ(zetMetricGroupCalculateMultipleMetricValuesExp(metricGroupHandle, ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, rawSize, rawData.data(), &dataCount, &totalMetricCount, metricCounts.data(), caculatedRawResults.data()), ZE_RESULT_SUCCESS); EXPECT_EQ(metricCounts[0], metricsSetParams.MetricsCount); EXPECT_EQ(metricCounts[1], metricsSetParams.MetricsCount); // Destroy query and its pool. EXPECT_EQ(zetMetricQueryDestroy(queryHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricQueryPoolDestroy(poolHandle), ZE_RESULT_SUCCESS); } TEST_F(MultiDeviceMetricQueryPoolTest, givenCorrectArgumentsWhenActivateMetricGroupsIsCalledThenReturnsSuccess) { zet_device_handle_t metricDevice = devices[0]->toHandle(); auto &deviceImp = *static_cast(devices[0]); const uint32_t subDeviceCount = static_cast(deviceImp.subDevices.size()); metricsDeviceParams.ConcurrentGroupsCount = 1; Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; metricsSetParams.MetricsCount = 1; Mock metric; TMetricParams_1_0 metricParams = {}; metricParams.SymbolName = "Metric symbol name"; metricParams.ShortName = "Metric short name"; metricParams.LongName = "Metric long name"; metricParams.ResultType = MetricsDiscovery::TMetricResultType::RESULT_UINT64; metricParams.MetricType = MetricsDiscovery::TMetricType::METRIC_TYPE_RATIO; zet_metric_group_handle_t metricGroupHandle = {}; zet_metric_group_properties_t metricGroupProperties = {}; metricGroupProperties.samplingType = ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED; TypedValue_1_0 value = {}; value.Type = ValueType::Uint32; value.ValueUInt32 = 64; ConfigurationHandle_1_0 metricsLibraryConfigurationHandle = {&value}; ContextHandle_1_0 metricsLibraryContextHandle = {&value}; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, GetMetric(_)) .Times(subDeviceCount) .WillRepeatedly(Return(&metric)); EXPECT_CALL(metricsSet, Activate()) .Times(subDeviceCount) .WillRepeatedly(Return(MetricsDiscovery::CC_OK)); EXPECT_CALL(metricsSet, Deactivate()) .Times(subDeviceCount) .WillRepeatedly(Return(MetricsDiscovery::CC_OK)); EXPECT_CALL(metric, GetParams()) .Times(subDeviceCount) .WillRepeatedly(Return(&metricParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); for (uint32_t i = 0; i < subDeviceCount; ++i) { EXPECT_CALL(*mockMetricEnumerationSubDevices[i], isInitialized()) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrarySubDevices[i], getContextData(_, _)) .Times(1) .WillOnce(Return(true)); } EXPECT_CALL(*mockMetricsLibrary, load()) .Times(0); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextCreate(_, _, _)) .Times(subDeviceCount) .WillRepeatedly(DoAll(::testing::SetArgPointee<2>(metricsLibraryContextHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextDelete(_)) .Times(subDeviceCount) .WillRepeatedly(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockConfigurationCreate(_, _)) .Times(subDeviceCount) .WillRepeatedly(DoAll(::testing::SetArgPointee<1>(metricsLibraryConfigurationHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockConfigurationActivate(_, _)) .Times(subDeviceCount) .WillRepeatedly(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockConfigurationDeactivate(_)) .Times(subDeviceCount) .WillRepeatedly(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockConfigurationDelete(_)) .Times(subDeviceCount) .WillRepeatedly(Return(StatusCode::Success)); // Metric group count. uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(devices[0]->toHandle(), &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); // Metric group handle. EXPECT_EQ(zetMetricGroupGet(devices[0]->toHandle(), &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); // Activate metric group (deferred). EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), devices[0]->toHandle(), 1, &metricGroupHandle), ZE_RESULT_SUCCESS); // Activate metric groups. devices[0]->activateMetricGroups(); // Deactivate metric groups. EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), metricDevice, 0, nullptr), ZE_RESULT_SUCCESS); } } // namespace ult } // namespace L0 test_metric_oa_streamer_1.cpp000066400000000000000000001373031422164147700347500ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/metrics/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_driver.h" #include "level_zero/tools/source/metrics/metric_oa_source.h" #include "level_zero/tools/test/unit_tests/sources/metrics/mock_metric_oa.h" #include "gmock/gmock.h" #include "gtest/gtest.h" using ::testing::_; using ::testing::Return; namespace L0 { namespace ult { using MetricStreamerTest = Test; TEST_F(MetricStreamerTest, givenInvalidMetricGroupTypeWhenZetMetricStreamerOpenIsCalledThenReturnsFail) { // One api: device handle. zet_device_handle_t metricDeviceHandle = device->toHandle(); // One api: event handle. ze_event_handle_t eventHandle = {}; // One api: streamer handle. zet_metric_streamer_handle_t streamerHandle = {}; zet_metric_streamer_desc_t streamerDesc = {}; // One api: metric group handle. Mock metricGroup; zet_metric_group_handle_t metricGroupHandle = metricGroup.toHandle(); zet_metric_group_properties_t metricGroupProperties = {}; metricGroupProperties.samplingType = ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED; EXPECT_CALL(metricGroup, getProperties(_)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<0>(metricGroupProperties), Return(ZE_RESULT_SUCCESS))); // Metric streamer open. EXPECT_EQ(zetMetricStreamerOpen(context->toHandle(), metricDeviceHandle, metricGroupHandle, &streamerDesc, eventHandle, &streamerHandle), ZE_RESULT_ERROR_INVALID_ARGUMENT); EXPECT_EQ(streamerHandle, nullptr); } TEST_F(MetricStreamerTest, givenValidArgumentsWhenZetMetricStreamerOpenIsCalledThenReturnsSuccess) { zet_device_handle_t metricDeviceHandle = device->toHandle(); ze_event_handle_t eventHandle = {}; zet_metric_streamer_handle_t streamerHandle = {}; zet_metric_streamer_desc_t streamerDesc = {}; streamerDesc.stype = ZET_STRUCTURE_TYPE_METRIC_STREAMER_DESC; streamerDesc.notifyEveryNReports = 32768; streamerDesc.samplingPeriod = 1000; Mock metricGroup; zet_metric_group_handle_t metricGroupHandle = metricGroup.toHandle(); zet_metric_group_properties_t metricGroupProperties = {}; metricsDeviceParams.ConcurrentGroupsCount = 1; Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_IOSTREAM; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; metricsSetParams.RawReportSize = 256; openMetricsAdapter(); EXPECT_CALL(*mockMetricEnumeration, loadMetricsDiscovery()) .Times(1) .WillOnce(Return(ZE_RESULT_SUCCESS)); EXPECT_CALL(*mockMetricsLibrary, load()) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(1) .WillOnce(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(1) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsConcurrentGroup, OpenIoStream(_, _, _, _)) .Times(1) .WillOnce(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsConcurrentGroup, CloseIoStream()) .Times(1) .WillOnce(Return(TCompletionCode::CC_OK)); mockMetricsLibrary->initializationState = ZE_RESULT_SUCCESS; auto &metricSource = device->getMetricDeviceContext().getMetricSource(); EXPECT_TRUE(metricSource.loadDependencies()); EXPECT_TRUE(metricSource.isInitialized()); uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(metricDeviceHandle, &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_EQ(zetMetricGroupGet(metricDeviceHandle, &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); EXPECT_EQ(zetMetricGroupGetProperties(metricGroupHandle, &metricGroupProperties), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupProperties.domain, 0u); EXPECT_EQ(metricGroupProperties.samplingType, ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_TIME_BASED); EXPECT_EQ(metricGroupProperties.metricCount, metricsSetParams.MetricsCount); EXPECT_EQ(strcmp(metricGroupProperties.description, metricsSetParams.ShortName), 0); EXPECT_EQ(strcmp(metricGroupProperties.name, metricsSetParams.SymbolName), 0); EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), metricDeviceHandle, 1, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricStreamerOpen(context->toHandle(), metricDeviceHandle, metricGroupHandle, &streamerDesc, eventHandle, &streamerHandle), ZE_RESULT_SUCCESS); EXPECT_NE(streamerHandle, nullptr); EXPECT_EQ(zetMetricStreamerClose(streamerHandle), ZE_RESULT_SUCCESS); EXPECT_NE(streamerHandle, nullptr); } TEST_F(MetricStreamerTest, givenRawReportSizeAsZeroWhenZetMetricStreamerOpenIsCalledThenReturnsSuccess) { zet_device_handle_t metricDeviceHandle = device->toHandle(); ze_event_handle_t eventHandle = {}; zet_metric_streamer_handle_t streamerHandle = {}; zet_metric_streamer_desc_t streamerDesc = {}; streamerDesc.stype = ZET_STRUCTURE_TYPE_METRIC_STREAMER_DESC; streamerDesc.notifyEveryNReports = 32768; streamerDesc.samplingPeriod = 1000; Mock metricGroup; zet_metric_group_handle_t metricGroupHandle = metricGroup.toHandle(); zet_metric_group_properties_t metricGroupProperties = {}; metricsDeviceParams.ConcurrentGroupsCount = 1; Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_IOSTREAM; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; metricsSetParams.RawReportSize = 0; openMetricsAdapter(); EXPECT_CALL(*mockMetricEnumeration, loadMetricsDiscovery()) .Times(1) .WillOnce(Return(ZE_RESULT_SUCCESS)); EXPECT_CALL(*mockMetricsLibrary, load()) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(1) .WillOnce(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(1) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsConcurrentGroup, OpenIoStream(_, _, _, _)) .Times(1) .WillOnce(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsConcurrentGroup, CloseIoStream()) .Times(1) .WillOnce(Return(TCompletionCode::CC_OK)); mockMetricsLibrary->initializationState = ZE_RESULT_SUCCESS; auto &metricSource = device->getMetricDeviceContext().getMetricSource(); EXPECT_TRUE(metricSource.loadDependencies()); EXPECT_TRUE(metricSource.isInitialized()); uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(metricDeviceHandle, &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_EQ(zetMetricGroupGet(metricDeviceHandle, &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); EXPECT_EQ(zetMetricGroupGetProperties(metricGroupHandle, &metricGroupProperties), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupProperties.domain, 0u); EXPECT_EQ(metricGroupProperties.samplingType, ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_TIME_BASED); EXPECT_EQ(metricGroupProperties.metricCount, metricsSetParams.MetricsCount); EXPECT_EQ(strcmp(metricGroupProperties.description, metricsSetParams.ShortName), 0); EXPECT_EQ(strcmp(metricGroupProperties.name, metricsSetParams.SymbolName), 0); EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), metricDeviceHandle, 1, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricStreamerOpen(context->toHandle(), metricDeviceHandle, metricGroupHandle, &streamerDesc, eventHandle, &streamerHandle), ZE_RESULT_SUCCESS); EXPECT_NE(streamerHandle, nullptr); EXPECT_EQ(zetMetricStreamerClose(streamerHandle), ZE_RESULT_SUCCESS); EXPECT_NE(streamerHandle, nullptr); } TEST_F(MetricStreamerTest, givenValidArgumentsAndMetricGroupsIsNotActivatedWhenZetMetricStreamerOpenIsCalledThenReturnsNotReady) { zet_device_handle_t metricDeviceHandle = device->toHandle(); ze_event_handle_t eventHandle = {}; zet_metric_streamer_handle_t streamerHandle = {}; zet_metric_streamer_desc_t streamerDesc = {}; streamerDesc.stype = ZET_STRUCTURE_TYPE_METRIC_STREAMER_DESC; streamerDesc.notifyEveryNReports = 32768; streamerDesc.samplingPeriod = 1000; Mock metricGroup; zet_metric_group_handle_t metricGroupHandle = metricGroup.toHandle(); metricsDeviceParams.ConcurrentGroupsCount = 1; Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_IOSTREAM; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; metricsSetParams.RawReportSize = 256; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(1) .WillOnce(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(1) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(metricDeviceHandle, &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricGroupGet(metricDeviceHandle, &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricStreamerOpen(context->toHandle(), metricDeviceHandle, metricGroupHandle, &streamerDesc, eventHandle, &streamerHandle), ZE_RESULT_NOT_READY); EXPECT_EQ(streamerHandle, nullptr); } TEST_F(MetricStreamerTest, givenValidArgumentsWhenZetMetricStreamerOpenIsCalledTwiceThenReturnsObjectInUse) { // One api: device handle. zet_device_handle_t metricDeviceHandle = device->toHandle(); // One api: event handle. ze_event_handle_t eventHandle = {}; // One api: streamer handle. zet_metric_streamer_handle_t firstStreamerHandle = {}; zet_metric_streamer_handle_t secondStreamerHandle = {}; zet_metric_streamer_desc_t streamerDesc = {}; streamerDesc.stype = ZET_STRUCTURE_TYPE_METRIC_STREAMER_DESC; streamerDesc.notifyEveryNReports = 32768; streamerDesc.samplingPeriod = 1000; // One api: metric group handle. Mock metricGroup; zet_metric_group_handle_t metricGroupHandle = metricGroup.toHandle(); zet_metric_group_properties_t metricGroupProperties = {}; // Metrics Discovery device. metricsDeviceParams.ConcurrentGroupsCount = 1; // Metrics Discovery concurrent group. Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; // Metrics Discovery metric set. Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_IOSTREAM; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; metricsSetParams.RawReportSize = 256; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(1) .WillOnce(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(1) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsConcurrentGroup, OpenIoStream(_, _, _, _)) .Times(1) .WillOnce(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsConcurrentGroup, CloseIoStream()) .Times(1) .WillOnce(Return(TCompletionCode::CC_OK)); // Metric group count. uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(metricDeviceHandle, &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); // Metric group handle. EXPECT_EQ(zetMetricGroupGet(metricDeviceHandle, &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); // Metric group properties. EXPECT_EQ(zetMetricGroupGetProperties(metricGroupHandle, &metricGroupProperties), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupProperties.domain, 0u); EXPECT_EQ(metricGroupProperties.samplingType, ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_TIME_BASED); EXPECT_EQ(metricGroupProperties.metricCount, metricsSetParams.MetricsCount); EXPECT_EQ(strcmp(metricGroupProperties.description, metricsSetParams.ShortName), 0); EXPECT_EQ(strcmp(metricGroupProperties.name, metricsSetParams.SymbolName), 0); // Metric group activation. EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), metricDeviceHandle, 1, &metricGroupHandle), ZE_RESULT_SUCCESS); // Metric streamer open. EXPECT_EQ(zetMetricStreamerOpen(context->toHandle(), metricDeviceHandle, metricGroupHandle, &streamerDesc, eventHandle, &firstStreamerHandle), ZE_RESULT_SUCCESS); EXPECT_NE(firstStreamerHandle, nullptr); EXPECT_EQ(zetMetricStreamerOpen(context->toHandle(), metricDeviceHandle, metricGroupHandle, &streamerDesc, eventHandle, &secondStreamerHandle), ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE); EXPECT_EQ(secondStreamerHandle, nullptr); // Metric streamer close. EXPECT_EQ(zetMetricStreamerClose(firstStreamerHandle), ZE_RESULT_SUCCESS); EXPECT_NE(firstStreamerHandle, nullptr); } TEST_F(MetricStreamerTest, givenCorrectArgumentsWhenZetMetricQueryPoolCreateExtIsCalledThenMetricStreamerIsNotAvailable) { // One api: device handle. zet_device_handle_t metricDevice = device->toHandle(); // One api: event handle. ze_event_handle_t eventHandle = {}; // One api: streamer handle. zet_metric_streamer_handle_t streamerHandle = {}; zet_metric_streamer_desc_t streamerDesc = {}; streamerDesc.stype = ZET_STRUCTURE_TYPE_METRIC_STREAMER_DESC; streamerDesc.notifyEveryNReports = 32768; streamerDesc.samplingPeriod = 1000; // One api: metric group handle. Mock metricGroup; zet_metric_group_handle_t metricGroupHandle = metricGroup.toHandle(); zet_metric_group_properties_t metricGroupProperties = {}; metricGroupProperties.samplingType = ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED; // One api: query pool handle. zet_metric_query_pool_handle_t poolHandle = {}; zet_metric_query_pool_desc_t poolDesc = {}; poolDesc.stype = ZET_STRUCTURE_TYPE_METRIC_QUERY_POOL_DESC; poolDesc.count = 1; poolDesc.type = ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE; TypedValue_1_0 value = {}; value.Type = ValueType::Uint32; value.ValueUInt32 = 64; QueryHandle_1_0 queryHandle = {&value}; ContextHandle_1_0 contextHandle = {&value}; EXPECT_CALL(*mockMetricEnumeration, isInitialized()) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, getContextData(_, _)) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, load()) .Times(0); EXPECT_CALL(metricGroup, getProperties(_)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<0>(metricGroupProperties), Return(ZE_RESULT_SUCCESS))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryCreate(_, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<1>(queryHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockQueryDelete(_)) .Times(1) .WillOnce(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockGetParameter(_, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<2>(value), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextCreate(_, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<2>(contextHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextDelete(_)) .Times(1) .WillOnce(Return(StatusCode::Success)); // Metric query pool create. EXPECT_EQ(zetMetricQueryPoolCreate(context->toHandle(), metricDevice, metricGroup.toHandle(), &poolDesc, &poolHandle), ZE_RESULT_SUCCESS); EXPECT_NE(poolHandle, nullptr); // Metric streamer open. EXPECT_EQ(zetMetricStreamerOpen(context->toHandle(), metricDevice, metricGroupHandle, &streamerDesc, eventHandle, &streamerHandle), ZE_RESULT_ERROR_NOT_AVAILABLE); EXPECT_EQ(streamerHandle, nullptr); // Metric query pool destroy. EXPECT_EQ(zetMetricQueryPoolDestroy(poolHandle), ZE_RESULT_SUCCESS); } TEST_F(MetricStreamerTest, givenInvalidArgumentsWhenZetMetricStreamerReadDataIsCalledThenReturnsFail) { // One api: device handle. zet_device_handle_t metricDeviceHandle = device->toHandle(); // One api: event handle. ze_event_handle_t eventHandle = {}; // One api: streamer handle. zet_metric_streamer_handle_t streamerHandle = {}; zet_metric_streamer_desc_t streamerDesc = {}; streamerDesc.stype = ZET_STRUCTURE_TYPE_METRIC_STREAMER_DESC; streamerDesc.notifyEveryNReports = 32768; streamerDesc.samplingPeriod = 1000; // One api: metric group handle. Mock metricGroup; zet_metric_group_handle_t metricGroupHandle = metricGroup.toHandle(); zet_metric_group_properties_t metricGroupProperties = {}; // Metrics Discovery device. metricsDeviceParams.ConcurrentGroupsCount = 1; // Metrics Discovery concurrent group. Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; // Metrics Discovery metric set. Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_IOSTREAM; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; metricsSetParams.RawReportSize = 256; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(1) .WillOnce(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(1) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsConcurrentGroup, OpenIoStream(_, _, _, _)) .Times(1) .WillOnce(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsConcurrentGroup, CloseIoStream()) .Times(1) .WillOnce(Return(TCompletionCode::CC_OK)); // Metric group count. uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(metricDeviceHandle, &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); // Metric group handle. EXPECT_EQ(zetMetricGroupGet(metricDeviceHandle, &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); // Metric group properties. EXPECT_EQ(zetMetricGroupGetProperties(metricGroupHandle, &metricGroupProperties), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupProperties.domain, 0u); EXPECT_EQ(metricGroupProperties.samplingType, ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_TIME_BASED); EXPECT_EQ(metricGroupProperties.metricCount, metricsSetParams.MetricsCount); EXPECT_EQ(strcmp(metricGroupProperties.description, metricsSetParams.ShortName), 0); EXPECT_EQ(strcmp(metricGroupProperties.name, metricsSetParams.SymbolName), 0); // Metric group activation. EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), metricDeviceHandle, 1, &metricGroupHandle), ZE_RESULT_SUCCESS); // Metric streamer open. EXPECT_EQ(zetMetricStreamerOpen(context->toHandle(), metricDeviceHandle, metricGroupHandle, &streamerDesc, eventHandle, &streamerHandle), ZE_RESULT_SUCCESS); EXPECT_NE(streamerHandle, nullptr); // Metric streamer close. EXPECT_EQ(zetMetricStreamerClose(streamerHandle), ZE_RESULT_SUCCESS); } TEST_F(MetricStreamerTest, givenValidArgumentsWhenZetMetricStreamerReadDataIsCalledThenReturnsSuccess) { // One api: device handle. zet_device_handle_t metricDeviceHandle = device->toHandle(); // One api: event handle. ze_event_handle_t eventHandle = {}; // One api: streamer handle. zet_metric_streamer_handle_t streamerHandle = {}; zet_metric_streamer_desc_t streamerDesc = {}; streamerDesc.stype = ZET_STRUCTURE_TYPE_METRIC_STREAMER_DESC; streamerDesc.notifyEveryNReports = 32768; streamerDesc.samplingPeriod = 1000; // One api: metric group handle. Mock metricGroup; zet_metric_group_handle_t metricGroupHandle = metricGroup.toHandle(); zet_metric_group_properties_t metricGroupProperties = {}; // Metrics Discovery device. metricsDeviceParams.ConcurrentGroupsCount = 1; // Metrics Discovery concurrent group. Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; // Metrics Discovery metric set. Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_IOSTREAM; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; metricsSetParams.RawReportSize = 256; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(1) .WillOnce(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(1) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsConcurrentGroup, OpenIoStream(_, _, _, _)) .Times(1) .WillOnce(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsConcurrentGroup, ReadIoStream(_, _, _)) .Times(1) .WillOnce(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsConcurrentGroup, CloseIoStream()) .Times(1) .WillOnce(Return(TCompletionCode::CC_OK)); // Metric group count. uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(metricDeviceHandle, &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); // Metric group handle. EXPECT_EQ(zetMetricGroupGet(metricDeviceHandle, &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); // Metric group properties. EXPECT_EQ(zetMetricGroupGetProperties(metricGroupHandle, &metricGroupProperties), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupProperties.domain, 0u); EXPECT_EQ(metricGroupProperties.samplingType, ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_TIME_BASED); EXPECT_EQ(metricGroupProperties.metricCount, metricsSetParams.MetricsCount); EXPECT_EQ(strcmp(metricGroupProperties.description, metricsSetParams.ShortName), 0); EXPECT_EQ(strcmp(metricGroupProperties.name, metricsSetParams.SymbolName), 0); // Metric group activation. EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), metricDeviceHandle, 1, &metricGroupHandle), ZE_RESULT_SUCCESS); // Metric streamer open. EXPECT_EQ(zetMetricStreamerOpen(context->toHandle(), metricDeviceHandle, metricGroupHandle, &streamerDesc, eventHandle, &streamerHandle), ZE_RESULT_SUCCESS); EXPECT_NE(streamerHandle, nullptr); // Metric streamer: get desired raw data size. size_t rawSize = 0; uint32_t reportCount = 256; EXPECT_EQ(zetMetricStreamerReadData(streamerHandle, reportCount, &rawSize, nullptr), ZE_RESULT_SUCCESS); // Metric streamer: read the data. std::vector rawData; rawData.resize(rawSize); EXPECT_EQ(zetMetricStreamerReadData(streamerHandle, reportCount, &rawSize, rawData.data()), ZE_RESULT_SUCCESS); // Metric streamer close. EXPECT_EQ(zetMetricStreamerClose(streamerHandle), ZE_RESULT_SUCCESS); } TEST_F(MetricStreamerTest, givenValidArgumentsWhenZetMetricStreamerReadDataIsCalledWithMaxReportCountOverTheSupportedThenReturnsSuccess) { zet_device_handle_t metricDeviceHandle = device->toHandle(); ze_event_handle_t eventHandle = {}; zet_metric_streamer_handle_t streamerHandle = {}; zet_metric_streamer_desc_t streamerDesc = {}; streamerDesc.stype = ZET_STRUCTURE_TYPE_METRIC_STREAMER_DESC; streamerDesc.notifyEveryNReports = 32768; streamerDesc.samplingPeriod = 1000; Mock metricGroup; zet_metric_group_handle_t metricGroupHandle = metricGroup.toHandle(); zet_metric_group_properties_t metricGroupProperties = {}; metricsDeviceParams.ConcurrentGroupsCount = 1; Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_IOSTREAM; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; metricsSetParams.RawReportSize = 256; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(1) .WillOnce(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(1) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsConcurrentGroup, OpenIoStream(_, _, _, _)) .Times(1) .WillOnce(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsConcurrentGroup, ReadIoStream(_, _, _)) .Times(1) .WillOnce(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsConcurrentGroup, CloseIoStream()) .Times(1) .WillOnce(Return(TCompletionCode::CC_OK)); uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(metricDeviceHandle, &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_EQ(zetMetricGroupGet(metricDeviceHandle, &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); EXPECT_EQ(zetMetricGroupGetProperties(metricGroupHandle, &metricGroupProperties), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupProperties.domain, 0u); EXPECT_EQ(metricGroupProperties.samplingType, ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_TIME_BASED); EXPECT_EQ(metricGroupProperties.metricCount, metricsSetParams.MetricsCount); EXPECT_EQ(strcmp(metricGroupProperties.description, metricsSetParams.ShortName), 0); EXPECT_EQ(strcmp(metricGroupProperties.name, metricsSetParams.SymbolName), 0); EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), metricDeviceHandle, 1, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricStreamerOpen(context->toHandle(), metricDeviceHandle, metricGroupHandle, &streamerDesc, eventHandle, &streamerHandle), ZE_RESULT_SUCCESS); EXPECT_NE(streamerHandle, nullptr); size_t rawSize = 0; uint32_t reportCount = 65537; EXPECT_EQ(zetMetricStreamerReadData(streamerHandle, reportCount, &rawSize, nullptr), ZE_RESULT_SUCCESS); std::vector rawData; rawData.resize(rawSize); EXPECT_EQ(zetMetricStreamerReadData(streamerHandle, reportCount, &rawSize, rawData.data()), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricStreamerClose(streamerHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), metricDeviceHandle, 0, nullptr), ZE_RESULT_SUCCESS); } TEST_F(MetricStreamerTest, givenInvalidArgumentsWhenZetCommandListAppendMetricStreamerMarkerIsCalledThenReturnsFail) { // One api: device handle. zet_device_handle_t metricDeviceHandle = device->toHandle(); // One api: event handle. ze_event_handle_t eventHandle = {}; // One api: streamer handle. zet_metric_streamer_handle_t streamerHandle = {}; zet_metric_streamer_desc_t streamerDesc = {}; streamerDesc.stype = ZET_STRUCTURE_TYPE_METRIC_STREAMER_DESC; streamerDesc.notifyEveryNReports = 32768; streamerDesc.samplingPeriod = 1000; // One api: metric group handle. Mock metricGroup; zet_metric_group_handle_t metricGroupHandle = metricGroup.toHandle(); zet_metric_group_properties_t metricGroupProperties = {}; // Metrics Discovery device. metricsDeviceParams.ConcurrentGroupsCount = 1; // Metrics Discovery concurrent group. Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; // Metrics Discovery metric set. Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_IOSTREAM; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; metricsSetParams.RawReportSize = 256; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(1) .WillOnce(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(1) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsConcurrentGroup, OpenIoStream(_, _, _, _)) .Times(1) .WillOnce(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsConcurrentGroup, CloseIoStream()) .Times(1) .WillOnce(Return(TCompletionCode::CC_OK)); // Metric group count. uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(metricDeviceHandle, &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); // Metric group handle. EXPECT_EQ(zetMetricGroupGet(metricDeviceHandle, &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); // Metric group properties. EXPECT_EQ(zetMetricGroupGetProperties(metricGroupHandle, &metricGroupProperties), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupProperties.domain, 0u); EXPECT_EQ(metricGroupProperties.samplingType, ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_TIME_BASED); EXPECT_EQ(metricGroupProperties.metricCount, metricsSetParams.MetricsCount); EXPECT_EQ(strcmp(metricGroupProperties.description, metricsSetParams.ShortName), 0); EXPECT_EQ(strcmp(metricGroupProperties.name, metricsSetParams.SymbolName), 0); // Metric group activation. EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), metricDeviceHandle, 1, &metricGroupHandle), ZE_RESULT_SUCCESS); // Metric streamer open. EXPECT_EQ(zetMetricStreamerOpen(context->toHandle(), metricDeviceHandle, metricGroupHandle, &streamerDesc, eventHandle, &streamerHandle), ZE_RESULT_SUCCESS); EXPECT_NE(streamerHandle, nullptr); // Metric streamer close. EXPECT_EQ(zetMetricStreamerClose(streamerHandle), ZE_RESULT_SUCCESS); } MATCHER_P(streamerMarkerDataAreEqual, marker, "") { const uint32_t streamerMarkerLowBitsMask = 0x1FFFFFF; const uint32_t streamerMarkerHighBitsShift = 25; return (arg->CommandsType == ObjectType::MarkerStreamUser) && (arg->MarkerStreamUser.Value == (marker & streamerMarkerLowBitsMask)) && (arg->MarkerStreamUser.Reserved == (marker >> streamerMarkerHighBitsShift)); } TEST_F(MetricStreamerTest, givenValidArgumentsWhenZetCommandListAppendMetricStreamerMarkerIsCalledThenReturnsSuccess) { // One api: device handle. zet_device_handle_t metricDeviceHandle = device->toHandle(); // One api: event handle. ze_event_handle_t eventHandle = {}; // One api: command list handle. ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); // One api: streamer handle. zet_metric_streamer_handle_t streamerHandle = {}; zet_metric_streamer_desc_t streamerDesc = {}; streamerDesc.stype = ZET_STRUCTURE_TYPE_METRIC_STREAMER_DESC; streamerDesc.notifyEveryNReports = 32768; streamerDesc.samplingPeriod = 1000; // One api: metric group handle. Mock metricGroup; zet_metric_group_handle_t metricGroupHandle = metricGroup.toHandle(); zet_metric_group_properties_t metricGroupProperties = {}; // Metrics Discovery device. metricsDeviceParams.ConcurrentGroupsCount = 1; // Metrics Discovery concurrent group. Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; // Metrics Discovery metric set. Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_IOSTREAM; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; metricsSetParams.RawReportSize = 256; TypedValue_1_0 value = {}; value.Type = ValueType::Uint32; value.ValueUInt32 = 64; ContextHandle_1_0 contextHandle = {&value}; CommandBufferSize_1_0 commandBufferSize = {}; commandBufferSize.GpuMemorySize = 100; uint32_t markerValue = 0x12345678; openMetricsAdapter(); EXPECT_CALL(*mockMetricEnumeration, isInitialized()) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, getContextData(_, _)) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextCreate(_, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<2>(contextHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextDelete(_)) .Times(1) .WillOnce(Return(StatusCode::Success)); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(1) .WillOnce(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(1) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsConcurrentGroup, OpenIoStream(_, _, _, _)) .Times(1) .WillOnce(Return(TCompletionCode::CC_OK)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockCommandBufferGetSize(_, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<1>(::testing::ByRef(commandBufferSize)), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockCommandBufferGet(streamerMarkerDataAreEqual(markerValue))) .Times(1) .WillOnce(Return(StatusCode::Success)); EXPECT_CALL(metricsConcurrentGroup, CloseIoStream()) .Times(1) .WillOnce(Return(TCompletionCode::CC_OK)); // Metric group count. uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(metricDeviceHandle, &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); // Metric group handle. EXPECT_EQ(zetMetricGroupGet(metricDeviceHandle, &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); // Metric group properties. EXPECT_EQ(zetMetricGroupGetProperties(metricGroupHandle, &metricGroupProperties), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupProperties.domain, 0u); EXPECT_EQ(metricGroupProperties.samplingType, ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_TIME_BASED); EXPECT_EQ(metricGroupProperties.metricCount, metricsSetParams.MetricsCount); EXPECT_EQ(strcmp(metricGroupProperties.description, metricsSetParams.ShortName), 0); EXPECT_EQ(strcmp(metricGroupProperties.name, metricsSetParams.SymbolName), 0); // Metric group activation. EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), metricDeviceHandle, 1, &metricGroupHandle), ZE_RESULT_SUCCESS); // Metric streamer open. EXPECT_EQ(zetMetricStreamerOpen(context->toHandle(), metricDeviceHandle, metricGroupHandle, &streamerDesc, eventHandle, &streamerHandle), ZE_RESULT_SUCCESS); EXPECT_NE(streamerHandle, nullptr); // Metric streamer marker. EXPECT_EQ(zetCommandListAppendMetricStreamerMarker(commandList->toHandle(), streamerHandle, markerValue), ZE_RESULT_SUCCESS); // Metric streamer close. EXPECT_EQ(zetMetricStreamerClose(streamerHandle), ZE_RESULT_SUCCESS); } TEST_F(MetricStreamerTest, givenMultipleMarkerInsertionsWhenZetCommandListAppendMetricStreamerMarkerIsCalledThenReturnsSuccess) { // One api: device handle. zet_device_handle_t metricDeviceHandle = device->toHandle(); // One api: event handle. ze_event_handle_t eventHandle = {}; // One api: command list handle. ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); // One api: streamer handle. zet_metric_streamer_handle_t streamerHandle = {}; zet_metric_streamer_desc_t streamerDesc = {}; streamerDesc.stype = ZET_STRUCTURE_TYPE_METRIC_STREAMER_DESC; streamerDesc.notifyEveryNReports = 32768; streamerDesc.samplingPeriod = 1000; // One api: metric group handle. Mock metricGroup; zet_metric_group_handle_t metricGroupHandle = metricGroup.toHandle(); zet_metric_group_properties_t metricGroupProperties = {}; // Metrics Discovery device. metricsDeviceParams.ConcurrentGroupsCount = 1; // Metrics Discovery concurrent group. Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; // Metrics Discovery metric set. Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_IOSTREAM; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; metricsSetParams.RawReportSize = 256; TypedValue_1_0 value = {}; value.Type = ValueType::Uint32; value.ValueUInt32 = 64; ContextHandle_1_0 contextHandle = {&value}; CommandBufferSize_1_0 commandBufferSize = {}; commandBufferSize.GpuMemorySize = 100; openMetricsAdapter(); EXPECT_CALL(*mockMetricEnumeration, isInitialized()) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary, getContextData(_, _)) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextCreate(_, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<2>(contextHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockContextDelete(_)) .Times(1) .WillOnce(Return(StatusCode::Success)); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(1) .WillOnce(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(1) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsConcurrentGroup, OpenIoStream(_, _, _, _)) .Times(1) .WillOnce(Return(TCompletionCode::CC_OK)); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockCommandBufferGetSize(_, _)) .Times(10) .WillRepeatedly(DoAll(::testing::SetArgPointee<1>(::testing::ByRef(commandBufferSize)), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrary->g_mockApi, MockCommandBufferGet(_)) .Times(10) .WillRepeatedly(Return(StatusCode::Success)); EXPECT_CALL(metricsConcurrentGroup, CloseIoStream()) .Times(1) .WillOnce(Return(TCompletionCode::CC_OK)); // Metric group count. uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(metricDeviceHandle, &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); // Metric group handle. EXPECT_EQ(zetMetricGroupGet(metricDeviceHandle, &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); // Metric group properties. EXPECT_EQ(zetMetricGroupGetProperties(metricGroupHandle, &metricGroupProperties), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupProperties.domain, 0u); EXPECT_EQ(metricGroupProperties.samplingType, ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_TIME_BASED); EXPECT_EQ(metricGroupProperties.metricCount, metricsSetParams.MetricsCount); EXPECT_EQ(strcmp(metricGroupProperties.description, metricsSetParams.ShortName), 0); EXPECT_EQ(strcmp(metricGroupProperties.name, metricsSetParams.SymbolName), 0); // Metric group activation. EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), metricDeviceHandle, 1, &metricGroupHandle), ZE_RESULT_SUCCESS); // Metric streamer open. EXPECT_EQ(zetMetricStreamerOpen(context->toHandle(), metricDeviceHandle, metricGroupHandle, &streamerDesc, eventHandle, &streamerHandle), ZE_RESULT_SUCCESS); EXPECT_NE(streamerHandle, nullptr); // Metric streamer marker. std::array markerValues = {1, 2, 3, 4, 5, 6, 7, 8, 9, 0}; for (auto &markerValue : markerValues) { EXPECT_EQ(zetCommandListAppendMetricStreamerMarker(commandList->toHandle(), streamerHandle, markerValue), ZE_RESULT_SUCCESS); } // Metric streamer close. EXPECT_EQ(zetMetricStreamerClose(streamerHandle), ZE_RESULT_SUCCESS); } } // namespace ult } // namespace L0 test_metric_oa_streamer_2.cpp000066400000000000000000001122461422164147700347500ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/metrics/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "level_zero/tools/source/metrics/metric_oa_source.h" #include "level_zero/tools/test/unit_tests/sources/metrics/mock_metric_oa.h" using ::testing::_; using ::testing::Return; namespace L0 { namespace ult { using MetricStreamerMultiDeviceTest = Test; TEST_F(MetricStreamerMultiDeviceTest, givenInvalidMetricGroupTypeWhenZetMetricStreamerOpenIsCalledThenReturnsFail) { zet_device_handle_t metricDeviceHandle = devices[0]->toHandle(); auto &deviceImp = *static_cast(devices[0]); const uint32_t subDeviceCount = static_cast(deviceImp.subDevices.size()); ze_event_handle_t eventHandle = {}; zet_metric_streamer_handle_t streamerHandle = {}; zet_metric_streamer_desc_t streamerDesc = {}; streamerDesc.stype = ZET_STRUCTURE_TYPE_METRIC_STREAMER_DESC; streamerDesc.notifyEveryNReports = 32768; streamerDesc.samplingPeriod = 1000; Mock metricGroup; zet_metric_group_handle_t metricGroupHandle = metricGroup.toHandle(); zet_metric_group_properties_t metricGroupProperties = {}; metricsDeviceParams.ConcurrentGroupsCount = 1; Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; metricsSetParams.RawReportSize = 256; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(metricDeviceHandle, &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_EQ(zetMetricGroupGet(metricDeviceHandle, &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); EXPECT_EQ(zetMetricGroupGetProperties(metricGroupHandle, &metricGroupProperties), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupProperties.domain, 0u); EXPECT_EQ(metricGroupProperties.samplingType, ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED); EXPECT_EQ(metricGroupProperties.metricCount, metricsSetParams.MetricsCount); EXPECT_EQ(strcmp(metricGroupProperties.description, metricsSetParams.ShortName), 0); EXPECT_EQ(strcmp(metricGroupProperties.name, metricsSetParams.SymbolName), 0); EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), metricDeviceHandle, 1, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricStreamerOpen(context->toHandle(), metricDeviceHandle, metricGroupHandle, &streamerDesc, eventHandle, &streamerHandle), ZE_RESULT_ERROR_INVALID_ARGUMENT); EXPECT_EQ(streamerHandle, nullptr); } TEST_F(MetricStreamerMultiDeviceTest, givenValidArgumentsWhenZetMetricStreamerOpenIsCalledThenReturnsSuccess) { zet_device_handle_t metricDeviceHandle = devices[0]->toHandle(); auto &deviceImp = *static_cast(devices[0]); const uint32_t subDeviceCount = static_cast(deviceImp.subDevices.size()); ze_event_handle_t eventHandle = {}; zet_metric_streamer_handle_t streamerHandle = {}; zet_metric_streamer_desc_t streamerDesc = {}; streamerDesc.stype = ZET_STRUCTURE_TYPE_METRIC_STREAMER_DESC; streamerDesc.notifyEveryNReports = 32768; streamerDesc.samplingPeriod = 1000; Mock metricGroup; zet_metric_group_handle_t metricGroupHandle = metricGroup.toHandle(); zet_metric_group_properties_t metricGroupProperties = {}; metricsDeviceParams.ConcurrentGroupsCount = 1; Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_IOSTREAM; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; metricsSetParams.RawReportSize = 256; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsConcurrentGroup, OpenIoStream(_, _, _, _)) .Times(subDeviceCount) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsConcurrentGroup, CloseIoStream()) .Times(subDeviceCount) .WillRepeatedly(Return(TCompletionCode::CC_OK)); uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(metricDeviceHandle, &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_EQ(zetMetricGroupGet(metricDeviceHandle, &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); EXPECT_EQ(zetMetricGroupGetProperties(metricGroupHandle, &metricGroupProperties), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupProperties.domain, 0u); EXPECT_EQ(metricGroupProperties.samplingType, ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_TIME_BASED); EXPECT_EQ(metricGroupProperties.metricCount, metricsSetParams.MetricsCount); EXPECT_EQ(strcmp(metricGroupProperties.description, metricsSetParams.ShortName), 0); EXPECT_EQ(strcmp(metricGroupProperties.name, metricsSetParams.SymbolName), 0); EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), metricDeviceHandle, 1, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricStreamerOpen(context->toHandle(), metricDeviceHandle, metricGroupHandle, &streamerDesc, eventHandle, &streamerHandle), ZE_RESULT_SUCCESS); EXPECT_NE(streamerHandle, nullptr); EXPECT_EQ(zetMetricStreamerClose(streamerHandle), ZE_RESULT_SUCCESS); } TEST_F(MetricStreamerMultiDeviceTest, givenEnableWalkerPartitionIsOnWhenZetMetricStreamerOpenIsCalledThenReturnsSuccess) { DebugManagerStateRestore restorer; DebugManager.flags.EnableWalkerPartition.set(1); auto &deviceImp = *static_cast(devices[0]); zet_device_handle_t metricDeviceHandle = deviceImp.subDevices[0]->toHandle(); ze_event_handle_t eventHandle = {}; zet_metric_streamer_handle_t streamerHandle = {}; zet_metric_streamer_desc_t streamerDesc = {}; streamerDesc.stype = ZET_STRUCTURE_TYPE_METRIC_STREAMER_DESC; streamerDesc.notifyEveryNReports = 32768; streamerDesc.samplingPeriod = 1000; Mock metricGroup; zet_metric_group_handle_t metricGroupHandle = metricGroup.toHandle(); zet_metric_group_properties_t metricGroupProperties = {}; metricsDeviceParams.ConcurrentGroupsCount = 1; Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_IOSTREAM; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; metricsSetParams.RawReportSize = 256; EXPECT_CALL(*mockMetricEnumerationSubDevices[0], loadMetricsDiscovery()) .Times(0); EXPECT_CALL(*mockMetricEnumerationSubDevices[0]->g_mockApi, MockOpenAdapterGroup(_)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<0>(&adapterGroup), Return(TCompletionCode::CC_OK))); EXPECT_CALL(adapter, OpenMetricsDevice(_)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<0>(&metricsDevice), Return(TCompletionCode::CC_OK))); EXPECT_CALL(adapter, CloseMetricsDevice(_)) .Times(1) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(adapterGroup, GetAdapter(_)) .Times(0); EXPECT_CALL(*mockMetricEnumerationSubDevices[0], getMetricsAdapter()) .Times(1) .WillOnce(Return(&adapter)); EXPECT_CALL(adapterGroup, Close()) .Times(1) .WillOnce(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(1) .WillRepeatedly(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(1) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsConcurrentGroup, OpenIoStream(_, _, _, _)) .Times(1) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsConcurrentGroup, CloseIoStream()) .Times(1) .WillRepeatedly(Return(TCompletionCode::CC_OK)); uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(metricDeviceHandle, &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_EQ(zetMetricGroupGet(metricDeviceHandle, &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); EXPECT_EQ(zetMetricGroupGetProperties(metricGroupHandle, &metricGroupProperties), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupProperties.domain, 0u); EXPECT_EQ(metricGroupProperties.samplingType, ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_TIME_BASED); EXPECT_EQ(metricGroupProperties.metricCount, metricsSetParams.MetricsCount); EXPECT_EQ(strcmp(metricGroupProperties.description, metricsSetParams.ShortName), 0); EXPECT_EQ(strcmp(metricGroupProperties.name, metricsSetParams.SymbolName), 0); EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), metricDeviceHandle, 1, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricStreamerOpen(context->toHandle(), metricDeviceHandle, metricGroupHandle, &streamerDesc, eventHandle, &streamerHandle), ZE_RESULT_SUCCESS); EXPECT_NE(streamerHandle, nullptr); EXPECT_EQ(zetMetricStreamerClose(streamerHandle), ZE_RESULT_SUCCESS); } TEST_F(MetricStreamerMultiDeviceTest, givenValidArgumentsWhenZetMetricStreamerOpenIsCalledAndOpenIoStreamFailsThenReturnsFail) { zet_device_handle_t metricDeviceHandle = devices[0]->toHandle(); auto &deviceImp = *static_cast(devices[0]); const uint32_t subDeviceCount = static_cast(deviceImp.subDevices.size()); ze_event_handle_t eventHandle = {}; zet_metric_streamer_handle_t streamerHandle = {}; zet_metric_streamer_desc_t streamerDesc = {}; streamerDesc.stype = ZET_STRUCTURE_TYPE_METRIC_STREAMER_DESC; streamerDesc.notifyEveryNReports = 32768; streamerDesc.samplingPeriod = 1000; Mock metricGroup; zet_metric_group_handle_t metricGroupHandle = metricGroup.toHandle(); metricsDeviceParams.ConcurrentGroupsCount = 1; Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_IOSTREAM; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; metricsSetParams.RawReportSize = 256; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsConcurrentGroup, OpenIoStream(_, _, _, _)) .Times(subDeviceCount) .WillOnce(Return(TCompletionCode::CC_OK)) .WillRepeatedly(Return(TCompletionCode::CC_ERROR_GENERAL)); uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(metricDeviceHandle, &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricGroupGet(metricDeviceHandle, &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), metricDeviceHandle, 1, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricStreamerOpen(context->toHandle(), metricDeviceHandle, metricGroupHandle, &streamerDesc, eventHandle, &streamerHandle), ZE_RESULT_ERROR_UNKNOWN); EXPECT_EQ(streamerHandle, nullptr); } TEST_F(MetricStreamerMultiDeviceTest, givenValidArgumentsAndCloseIoStreamFailsWhenzetMetricStreamerCloseIsCalledThenReturnsFail) { zet_device_handle_t metricDeviceHandle = devices[0]->toHandle(); auto &deviceImp = *static_cast(devices[0]); const uint32_t subDeviceCount = static_cast(deviceImp.subDevices.size()); ze_event_handle_t eventHandle = {}; zet_metric_streamer_handle_t streamerHandle = {}; zet_metric_streamer_desc_t streamerDesc = {}; streamerDesc.stype = ZET_STRUCTURE_TYPE_METRIC_STREAMER_DESC; streamerDesc.notifyEveryNReports = 32768; streamerDesc.samplingPeriod = 1000; Mock metricGroup; zet_metric_group_handle_t metricGroupHandle = metricGroup.toHandle(); zet_metric_group_properties_t metricGroupProperties = {}; metricsDeviceParams.ConcurrentGroupsCount = 1; Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_IOSTREAM; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; metricsSetParams.RawReportSize = 256; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsConcurrentGroup, OpenIoStream(_, _, _, _)) .Times(subDeviceCount) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsConcurrentGroup, CloseIoStream()) .Times(subDeviceCount) .WillRepeatedly(Return(TCompletionCode::CC_ERROR_GENERAL)); uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(metricDeviceHandle, &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_EQ(zetMetricGroupGet(metricDeviceHandle, &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); EXPECT_EQ(zetMetricGroupGetProperties(metricGroupHandle, &metricGroupProperties), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupProperties.domain, 0u); EXPECT_EQ(metricGroupProperties.samplingType, ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_TIME_BASED); EXPECT_EQ(metricGroupProperties.metricCount, metricsSetParams.MetricsCount); EXPECT_EQ(strcmp(metricGroupProperties.description, metricsSetParams.ShortName), 0); EXPECT_EQ(strcmp(metricGroupProperties.name, metricsSetParams.SymbolName), 0); EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), metricDeviceHandle, 1, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricStreamerOpen(context->toHandle(), metricDeviceHandle, metricGroupHandle, &streamerDesc, eventHandle, &streamerHandle), ZE_RESULT_SUCCESS); EXPECT_NE(streamerHandle, nullptr); EXPECT_EQ(zetMetricStreamerClose(streamerHandle), ZE_RESULT_ERROR_UNKNOWN); cleanup(metricDeviceHandle, streamerHandle); } TEST_F(MetricStreamerMultiDeviceTest, givenValidArgumentsWhenZetMetricStreamerOpenIsCalledThenVerifyEventQueryStatusIsSuccess) { zet_device_handle_t metricDeviceHandle = devices[0]->toHandle(); auto &deviceImp = *static_cast(devices[0]); const uint32_t subDeviceCount = static_cast(deviceImp.subDevices.size()); ze_event_pool_handle_t eventPoolHandle = {}; ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; eventPoolDesc.flags = 0; eventPoolDesc.stype = ZE_STRUCTURE_TYPE_EVENT_POOL_DESC; ze_event_handle_t eventHandle = {}; ze_event_desc_t eventDesc = {}; eventDesc.index = 0; eventDesc.stype = ZE_STRUCTURE_TYPE_EVENT_DESC; eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE; zet_metric_streamer_handle_t streamerHandle = {}; zet_metric_streamer_desc_t streamerDesc = {}; streamerDesc.stype = ZET_STRUCTURE_TYPE_METRIC_STREAMER_DESC; streamerDesc.notifyEveryNReports = 32768; streamerDesc.samplingPeriod = 1000; Mock metricGroup; zet_metric_group_handle_t metricGroupHandle = metricGroup.toHandle(); metricsDeviceParams.ConcurrentGroupsCount = 1; Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_IOSTREAM; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; metricsSetParams.RawReportSize = 256; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsConcurrentGroup, OpenIoStream(_, _, _, _)) .Times(subDeviceCount) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsConcurrentGroup, CloseIoStream()) .Times(subDeviceCount) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsConcurrentGroup, WaitForReports(_)) .Times(subDeviceCount) .WillOnce(Return(TCompletionCode::CC_ERROR_GENERAL)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(metricDeviceHandle, &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_EQ(zetMetricGroupGet(metricDeviceHandle, &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), metricDeviceHandle, 1, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zeEventPoolCreate(context->toHandle(), &eventPoolDesc, 1, &metricDeviceHandle, &eventPoolHandle), ZE_RESULT_SUCCESS); EXPECT_NE(eventPoolHandle, nullptr); EXPECT_EQ(zeEventCreate(eventPoolHandle, &eventDesc, &eventHandle), ZE_RESULT_SUCCESS); EXPECT_NE(eventHandle, nullptr); EXPECT_EQ(zetMetricStreamerOpen(context->toHandle(), metricDeviceHandle, metricGroupHandle, &streamerDesc, eventHandle, &streamerHandle), ZE_RESULT_SUCCESS); EXPECT_NE(streamerHandle, nullptr); EXPECT_EQ(zeEventQueryStatus(eventHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricStreamerClose(streamerHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zeEventDestroy(eventHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zeEventPoolDestroy(eventPoolHandle), ZE_RESULT_SUCCESS); } TEST_F(MetricStreamerMultiDeviceTest, givenValidArgumentsWhenZetMetricStreamerReadDataIsCalledThenReturnsSuccess) { zet_device_handle_t metricDeviceHandle = devices[0]->toHandle(); auto &deviceImp = *static_cast(devices[0]); const uint32_t subDeviceCount = static_cast(deviceImp.subDevices.size()); ze_event_handle_t eventHandle = {}; zet_metric_streamer_handle_t streamerHandle = {}; zet_metric_streamer_desc_t streamerDesc = {}; streamerDesc.stype = ZET_STRUCTURE_TYPE_METRIC_STREAMER_DESC; streamerDesc.notifyEveryNReports = 32768; streamerDesc.samplingPeriod = 1000; Mock metricGroup; zet_metric_group_handle_t metricGroupHandle = metricGroup.toHandle(); metricsDeviceParams.ConcurrentGroupsCount = 1; Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_IOSTREAM; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; metricsSetParams.RawReportSize = 256; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsConcurrentGroup, OpenIoStream(_, _, _, _)) .Times(subDeviceCount) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsConcurrentGroup, ReadIoStream(_, _, _)) .Times(subDeviceCount) .WillOnce(DoAll(::testing::SetArgPointee<0>(10), Return(TCompletionCode::CC_OK))) .WillOnce(DoAll(::testing::SetArgPointee<0>(20), Return(TCompletionCode::CC_OK))); EXPECT_CALL(metricsConcurrentGroup, CloseIoStream()) .Times(subDeviceCount) .WillRepeatedly(Return(TCompletionCode::CC_OK)); uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(metricDeviceHandle, &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_EQ(zetMetricGroupGet(metricDeviceHandle, &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), metricDeviceHandle, 1, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricStreamerOpen(context->toHandle(), metricDeviceHandle, metricGroupHandle, &streamerDesc, eventHandle, &streamerHandle), ZE_RESULT_SUCCESS); EXPECT_NE(streamerHandle, nullptr); size_t rawSize = 0; uint32_t reportCount = 256; EXPECT_EQ(zetMetricStreamerReadData(streamerHandle, reportCount, &rawSize, nullptr), ZE_RESULT_SUCCESS); const size_t expectedRawSize = (metricsSetParams.RawReportSize * reportCount * subDeviceCount) + sizeof(MetricGroupCalculateHeader) + (2 * sizeof(uint32_t) * subDeviceCount); EXPECT_EQ(rawSize, expectedRawSize); std::vector rawData; rawData.resize(rawSize); EXPECT_EQ(zetMetricStreamerReadData(streamerHandle, reportCount, &rawSize, rawData.data()), ZE_RESULT_SUCCESS); EXPECT_EQ(rawSize, (metricsSetParams.RawReportSize * 30) + sizeof(MetricGroupCalculateHeader) + (2 * sizeof(uint32_t) * subDeviceCount)); MetricGroupCalculateHeader *rawDataHeader = reinterpret_cast(rawData.data()); EXPECT_NE(rawDataHeader, nullptr); for (uint32_t i = 0; i < subDeviceCount; ++i) { uint32_t rawDataOffset = (reinterpret_cast(rawData.data() + rawDataHeader->rawDataOffsets))[i]; uint32_t rawDataSize = (reinterpret_cast(rawData.data() + rawDataHeader->rawDataSizes))[i]; if (i == 0) { EXPECT_EQ(rawDataOffset, 0u); EXPECT_EQ(rawDataSize, metricsSetParams.RawReportSize * 10); } else if (i == 1) { EXPECT_EQ(rawDataOffset, metricsSetParams.RawReportSize * 10); EXPECT_EQ(rawDataSize, metricsSetParams.RawReportSize * 20); } } EXPECT_EQ(zetMetricStreamerClose(streamerHandle), ZE_RESULT_SUCCESS); } TEST_F(MetricStreamerMultiDeviceTest, givenValidArgumentsWhenZetMetricStreamerReadDataIsCalledAndReadIoStreamFailsThenReturnsFailure) { zet_device_handle_t metricDeviceHandle = devices[0]->toHandle(); auto &deviceImp = *static_cast(devices[0]); const uint32_t subDeviceCount = static_cast(deviceImp.subDevices.size()); ze_event_handle_t eventHandle = {}; zet_metric_streamer_handle_t streamerHandle = {}; zet_metric_streamer_desc_t streamerDesc = {}; streamerDesc.stype = ZET_STRUCTURE_TYPE_METRIC_STREAMER_DESC; streamerDesc.notifyEveryNReports = 32768; streamerDesc.samplingPeriod = 1000; Mock metricGroup; zet_metric_group_handle_t metricGroupHandle = metricGroup.toHandle(); metricsDeviceParams.ConcurrentGroupsCount = 1; Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_IOSTREAM; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; metricsSetParams.RawReportSize = 256; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsConcurrentGroup, OpenIoStream(_, _, _, _)) .Times(subDeviceCount) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsConcurrentGroup, ReadIoStream(_, _, _)) .Times(1) .WillRepeatedly(Return(TCompletionCode::CC_ERROR_GENERAL)); EXPECT_CALL(metricsConcurrentGroup, CloseIoStream()) .Times(subDeviceCount) .WillRepeatedly(Return(TCompletionCode::CC_OK)); uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(metricDeviceHandle, &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_EQ(zetMetricGroupGet(metricDeviceHandle, &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), metricDeviceHandle, 1, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricStreamerOpen(context->toHandle(), metricDeviceHandle, metricGroupHandle, &streamerDesc, eventHandle, &streamerHandle), ZE_RESULT_SUCCESS); EXPECT_NE(streamerHandle, nullptr); size_t rawSize = 0; uint32_t reportCount = 256; EXPECT_EQ(zetMetricStreamerReadData(streamerHandle, reportCount, &rawSize, nullptr), ZE_RESULT_SUCCESS); const size_t expectedRawSize = (metricsSetParams.RawReportSize * reportCount * subDeviceCount) + sizeof(MetricGroupCalculateHeader) + (2 * sizeof(uint32_t) * subDeviceCount); EXPECT_EQ(rawSize, expectedRawSize); std::vector rawData; rawData.resize(rawSize); EXPECT_EQ(zetMetricStreamerReadData(streamerHandle, reportCount, &rawSize, rawData.data()), ZE_RESULT_ERROR_UNKNOWN); EXPECT_EQ(zetMetricStreamerClose(streamerHandle), ZE_RESULT_SUCCESS); } TEST_F(MetricStreamerMultiDeviceTest, givenMultipleMarkerInsertionsWhenZetCommandListAppendMetricStreamerMarkerIsCalledThenReturnsSuccess) { zet_device_handle_t metricDeviceHandle = devices[0]->toHandle(); auto &deviceImp = *static_cast(devices[0]); const uint32_t subDeviceCount = static_cast(deviceImp.subDevices.size()); ze_event_handle_t eventHandle = {}; ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, devices[0], NEO::EngineGroupType::RenderCompute, 0u, returnValue)); zet_metric_streamer_handle_t streamerHandle = {}; zet_metric_streamer_desc_t streamerDesc = {}; streamerDesc.stype = ZET_STRUCTURE_TYPE_METRIC_STREAMER_DESC; streamerDesc.notifyEveryNReports = 32768; streamerDesc.samplingPeriod = 1000; Mock metricGroup; zet_metric_group_handle_t metricGroupHandle = metricGroup.toHandle(); zet_metric_group_properties_t metricGroupProperties = {}; metricsDeviceParams.ConcurrentGroupsCount = 1; Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_IOSTREAM; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; metricsSetParams.RawReportSize = 256; TypedValue_1_0 value = {}; value.Type = ValueType::Uint32; value.ValueUInt32 = 64; ContextHandle_1_0 contextHandle = {&value}; CommandBufferSize_1_0 commandBufferSize = {}; commandBufferSize.GpuMemorySize = 100; openMetricsAdapter(); EXPECT_CALL(*mockMetricEnumerationSubDevices[0], isInitialized()) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrarySubDevices[0]->g_mockApi, MockCommandBufferGetSize(_, _)) .Times(10) .WillRepeatedly(DoAll(::testing::SetArgPointee<1>(::testing::ByRef(commandBufferSize)), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrarySubDevices[0]->g_mockApi, MockCommandBufferGet(_)) .Times(10) .WillRepeatedly(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrarySubDevices[0], getContextData(_, _)) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrarySubDevices[0]->g_mockApi, MockContextCreate(_, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<2>(contextHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrarySubDevices[0]->g_mockApi, MockContextDelete(_)) .Times(1) .WillOnce(Return(StatusCode::Success)); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsConcurrentGroup, OpenIoStream(_, _, _, _)) .Times(subDeviceCount) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsConcurrentGroup, CloseIoStream()) .Times(subDeviceCount) .WillRepeatedly(Return(TCompletionCode::CC_OK)); uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(metricDeviceHandle, &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_EQ(zetMetricGroupGet(metricDeviceHandle, &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); EXPECT_EQ(zetMetricGroupGetProperties(metricGroupHandle, &metricGroupProperties), ZE_RESULT_SUCCESS); EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), metricDeviceHandle, 1, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricStreamerOpen(context->toHandle(), metricDeviceHandle, metricGroupHandle, &streamerDesc, eventHandle, &streamerHandle), ZE_RESULT_SUCCESS); EXPECT_NE(streamerHandle, nullptr); std::array markerValues = {1, 2, 3, 4, 5, 6, 7, 8, 9, 0}; for (auto &markerValue : markerValues) { EXPECT_EQ(zetCommandListAppendMetricStreamerMarker(commandList->toHandle(), streamerHandle, markerValue), ZE_RESULT_SUCCESS); } EXPECT_EQ(zetMetricStreamerClose(streamerHandle), ZE_RESULT_SUCCESS); } } // namespace ult } // namespace L0 test_metric_oa_streamer_3.cpp000066400000000000000000000252141422164147700347470ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/metrics/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "level_zero/tools/test/unit_tests/sources/metrics/mock_metric_oa.h" using ::testing::_; using ::testing::Return; namespace L0 { namespace ult { using MetricStreamerMultiDeviceTest = Test; TEST_F(MetricStreamerMultiDeviceTest, givenEnableWalkerPartitionIsOnWhenZetCommandListAppendMetricStreamerMarkerIsCalledForSubDeviceThenReturnsSuccess) { DebugManagerStateRestore restorer; DebugManager.flags.EnableWalkerPartition.set(1); auto &deviceImp = *static_cast(devices[0]); zet_device_handle_t metricDeviceHandle = deviceImp.subDevices[0]->toHandle(); ze_event_handle_t eventHandle = {}; ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, deviceImp.subDevices[0], NEO::EngineGroupType::RenderCompute, 0u, returnValue)); zet_metric_streamer_handle_t streamerHandle = {}; zet_metric_streamer_desc_t streamerDesc = {}; streamerDesc.stype = ZET_STRUCTURE_TYPE_METRIC_STREAMER_DESC; streamerDesc.notifyEveryNReports = 32768; streamerDesc.samplingPeriod = 1000; Mock metricGroup; zet_metric_group_handle_t metricGroupHandle = metricGroup.toHandle(); zet_metric_group_properties_t metricGroupProperties = {}; metricsDeviceParams.ConcurrentGroupsCount = 1; Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_IOSTREAM; metricsSetParams.MetricsCount = 0; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; metricsSetParams.RawReportSize = 256; TypedValue_1_0 value = {}; value.Type = ValueType::Uint32; value.ValueUInt32 = 64; ContextHandle_1_0 contextHandle = {&value}; CommandBufferSize_1_0 commandBufferSize = {}; commandBufferSize.GpuMemorySize = 100; openMetricsAdapterSubDevice(0); EXPECT_CALL(*mockMetricEnumerationSubDevices[0], isInitialized()) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrarySubDevices[0]->g_mockApi, MockCommandBufferGetSize(_, _)) .Times(10) .WillRepeatedly(DoAll(::testing::SetArgPointee<1>(::testing::ByRef(commandBufferSize)), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrarySubDevices[0]->g_mockApi, MockCommandBufferGet(_)) .Times(10) .WillRepeatedly(Return(StatusCode::Success)); EXPECT_CALL(*mockMetricsLibrarySubDevices[0], getContextData(_, _)) .Times(1) .WillOnce(Return(true)); EXPECT_CALL(*mockMetricsLibrarySubDevices[0]->g_mockApi, MockContextCreate(_, _, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgPointee<2>(contextHandle), Return(StatusCode::Success))); EXPECT_CALL(*mockMetricsLibrarySubDevices[0]->g_mockApi, MockContextDelete(_)) .Times(1) .WillOnce(Return(StatusCode::Success)); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(1) .WillRepeatedly(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(1) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsConcurrentGroup, OpenIoStream(_, _, _, _)) .Times(1) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsConcurrentGroup, CloseIoStream()) .Times(1) .WillRepeatedly(Return(TCompletionCode::CC_OK)); uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(metricDeviceHandle, &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_EQ(zetMetricGroupGet(metricDeviceHandle, &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); EXPECT_EQ(zetMetricGroupGetProperties(metricGroupHandle, &metricGroupProperties), ZE_RESULT_SUCCESS); EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), metricDeviceHandle, 1, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricStreamerOpen(context->toHandle(), metricDeviceHandle, metricGroupHandle, &streamerDesc, eventHandle, &streamerHandle), ZE_RESULT_SUCCESS); EXPECT_NE(streamerHandle, nullptr); std::array markerValues = {1, 2, 3, 4, 5, 6, 7, 8, 9, 0}; for (auto &markerValue : markerValues) { EXPECT_EQ(zetCommandListAppendMetricStreamerMarker(commandList->toHandle(), streamerHandle, markerValue), ZE_RESULT_SUCCESS); } EXPECT_EQ(zetMetricStreamerClose(streamerHandle), ZE_RESULT_SUCCESS); } TEST_F(MetricStreamerMultiDeviceTest, givenValidArgumentsWhenZetMetricGroupCalculateMetricValuesExpThenReturnsSuccess) { zet_device_handle_t metricDeviceHandle = devices[0]->toHandle(); auto &deviceImp = *static_cast(devices[0]); const uint32_t subDeviceCount = static_cast(deviceImp.subDevices.size()); ze_event_handle_t eventHandle = {}; zet_metric_streamer_handle_t streamerHandle = {}; zet_metric_streamer_desc_t streamerDesc = {}; streamerDesc.stype = ZET_STRUCTURE_TYPE_METRIC_STREAMER_DESC; streamerDesc.notifyEveryNReports = 32768; streamerDesc.samplingPeriod = 1000; Mock metricGroup; zet_metric_group_handle_t metricGroupHandle = metricGroup.toHandle(); metricsDeviceParams.ConcurrentGroupsCount = 1; Mock metricsConcurrentGroup; TConcurrentGroupParams_1_0 metricsConcurrentGroupParams = {}; metricsConcurrentGroupParams.MetricSetsCount = 1; metricsConcurrentGroupParams.SymbolName = "OA"; metricsConcurrentGroupParams.Description = "OA description"; Mock metricsSet; MetricsDiscovery::TMetricSetParams_1_4 metricsSetParams = {}; metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_IOSTREAM; metricsSetParams.SymbolName = "Metric set name"; metricsSetParams.ShortName = "Metric set description"; metricsSetParams.RawReportSize = 256; metricsSetParams.MetricsCount = 11; Mock metric; MetricsDiscovery::TMetricParams_1_0 metricParams = {}; uint32_t returnedMetricCount = 1; openMetricsAdapter(); EXPECT_CALL(metricsDevice, GetParams()) .WillRepeatedly(Return(&metricsDeviceParams)); EXPECT_CALL(metricsDevice, GetConcurrentGroup(_)) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroup)); EXPECT_CALL(metricsConcurrentGroup, GetParams()) .Times(subDeviceCount) .WillRepeatedly(Return(&metricsConcurrentGroupParams)); EXPECT_CALL(metricsConcurrentGroup, GetMetricSet(_)) .WillRepeatedly(Return(&metricsSet)); EXPECT_CALL(metricsSet, GetParams()) .WillRepeatedly(Return(&metricsSetParams)); EXPECT_CALL(metricsSet, GetMetric(_)) .Times(metricsSetParams.MetricsCount * subDeviceCount) .WillRepeatedly(Return(&metric)); EXPECT_CALL(metricsSet, SetApiFiltering(_)) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metric, GetParams()) .WillRepeatedly(Return(&metricParams)); EXPECT_CALL(metricsConcurrentGroup, OpenIoStream(_, _, _, _)) .Times(subDeviceCount) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsConcurrentGroup, ReadIoStream(_, _, _)) .Times(subDeviceCount) .WillRepeatedly(Return(TCompletionCode::CC_OK)); EXPECT_CALL(metricsSet, CalculateMetrics(_, _, _, _, _, _, _)) .Times(subDeviceCount) .WillRepeatedly(DoAll(::testing::SetArgPointee<4>(returnedMetricCount), Return(TCompletionCode::CC_OK))); EXPECT_CALL(metricsConcurrentGroup, CloseIoStream()) .Times(subDeviceCount) .WillRepeatedly(Return(TCompletionCode::CC_OK)); uint32_t metricGroupCount = 0; EXPECT_EQ(zetMetricGroupGet(metricDeviceHandle, &metricGroupCount, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_EQ(zetMetricGroupGet(metricDeviceHandle, &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(metricGroupCount, 1u); EXPECT_NE(metricGroupHandle, nullptr); EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), metricDeviceHandle, 1, &metricGroupHandle), ZE_RESULT_SUCCESS); EXPECT_EQ(zetMetricStreamerOpen(context->toHandle(), metricDeviceHandle, metricGroupHandle, &streamerDesc, eventHandle, &streamerHandle), ZE_RESULT_SUCCESS); EXPECT_NE(streamerHandle, nullptr); size_t rawSize = 0; uint32_t reportCount = 256; EXPECT_EQ(zetMetricStreamerReadData(streamerHandle, reportCount, &rawSize, nullptr), ZE_RESULT_SUCCESS); std::vector rawData; rawData.resize(rawSize); size_t rawRequestSize = rawSize; EXPECT_EQ(zetMetricStreamerReadData(streamerHandle, reportCount, &rawSize, rawData.data()), ZE_RESULT_SUCCESS); EXPECT_EQ(rawSize, rawRequestSize); uint32_t dataCount = 0; uint32_t totalMetricCount = 0; EXPECT_EQ(zetMetricGroupCalculateMultipleMetricValuesExp(metricGroupHandle, ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, rawSize, rawData.data(), &dataCount, &totalMetricCount, nullptr, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(totalMetricCount, subDeviceCount * metricsSetParams.MetricsCount * reportCount); std::vector metricCounts(dataCount); std::vector caculatedRawResults(totalMetricCount); EXPECT_EQ(zetMetricGroupCalculateMultipleMetricValuesExp(metricGroupHandle, ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, rawSize, rawData.data(), &dataCount, &totalMetricCount, metricCounts.data(), caculatedRawResults.data()), ZE_RESULT_SUCCESS); EXPECT_EQ(metricCounts[0], metricsSetParams.MetricsCount); EXPECT_EQ(metricCounts[1], metricsSetParams.MetricsCount); EXPECT_EQ(zetMetricStreamerClose(streamerHandle), ZE_RESULT_SUCCESS); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/metrics/windows/000077500000000000000000000000001422164147700306635ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/metrics/windows/CMakeLists.txt000066400000000000000000000003441422164147700334240ustar00rootroot00000000000000# # Copyright (C) 2021-2022 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/test_metric_oa_enumeration_windows.cpp ) endif() test_metric_ip_sampling_windows.cpp000066400000000000000000000030501422164147700377640ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/metrics/windows/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/tools/source/metrics/os_metric_ip_sampling.h" using ::testing::_; using ::testing::Return; namespace L0 { namespace ult { class MetricIpSamplingWindowsTest : public DeviceFixture, public ::testing::Test { public: void SetUp() override { DeviceFixture::SetUp(); metricIpSamplingOsInterface = MetricIpSamplingOsInterface::create(static_cast(*device)); } void TearDown() override { DeviceFixture::TearDown(); } std::unique_ptr metricIpSamplingOsInterface = nullptr; }; TEST_F(MetricIpSamplingWindowsTest, WhenIpSamplingOsInterfaceIsUsedReturnUnsupported) { EXPECT_FALSE(metricIpSamplingOsInterface->isDependencyAvailable()); EXPECT_FALSE(metricIpSamplingOsInterface->isNReportsAvailable()); EXPECT_EQ(metricIpSamplingOsInterface->getRequiredBufferSize(0), 0); EXPECT_EQ(metricIpSamplingOsInterface->getUnitReportSize(), 0); EXPECT_EQ(metricIpSamplingOsInterface->readData(nullptr, nullptr), ZE_RESULT_ERROR_UNSUPPORTED_FEATURE); uint32_t dummy; EXPECT_EQ(metricIpSamplingOsInterface->startMeasurement(dummy, dummy), ZE_RESULT_ERROR_UNSUPPORTED_FEATURE); EXPECT_EQ(metricIpSamplingOsInterface->stopMeasurement(), ZE_RESULT_ERROR_UNSUPPORTED_FEATURE); } } // namespace ult } // namespace L0 test_metric_oa_enumeration_windows.cpp000066400000000000000000000041321422164147700404710ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/metrics/windows/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/os_interface.h" #include "shared/test/common/mocks/mock_wddm.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/tools/test/unit_tests/sources/metrics/mock_metric_oa.h" #include "gmock/gmock.h" #include "gtest/gtest.h" using ::testing::_; using ::testing::Return; namespace L0 { namespace ult { using MetricEnumerationTestWindows = Test; TEST_F(MetricEnumerationTestWindows, givenCorrectWindowsAdapterWhenGetMetricsAdapterThenReturnSuccess) { auto &rootDevice = neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]; auto &osInterface = rootDevice->osInterface; auto wddm = new WddmMock(*rootDevice); auto adapterGroupParams = TAdapterGroupParams_1_6{}; auto adapterParams = TAdapterParams_1_9{}; osInterface = std::make_unique(); osInterface->setDriverModel(std::unique_ptr(wddm)); adapterGroupParams.AdapterCount = 1; adapterParams.SystemId.Type = MetricsDiscovery::TAdapterIdType::ADAPTER_ID_TYPE_LUID; adapterParams.SystemId.Luid.HighPart = 0; adapterParams.SystemId.Luid.LowPart = 0; openMetricsAdapterGroup(); EXPECT_CALL(adapterGroup, GetParams()) .Times(1) .WillOnce(Return(&adapterGroupParams)); EXPECT_CALL(adapterGroup, GetAdapter(_)) .WillRepeatedly(Return(&adapter)); EXPECT_CALL(adapter, GetParams()) .WillRepeatedly(Return(&adapterParams)); EXPECT_CALL(*mockMetricEnumeration, getAdapterId(_, _)) .Times(1) .WillOnce(DoAll(::testing::SetArgReferee<0>(adapterParams.SystemId.Luid.HighPart), ::testing::SetArgReferee<1>(adapterParams.SystemId.Luid.LowPart), Return(true))); EXPECT_CALL(*mockMetricEnumeration, getMetricsAdapter()) .Times(1) .WillOnce([&]() { return mockMetricEnumeration->baseGetMetricsAdapter(); }); EXPECT_EQ(mockMetricEnumeration->openMetricsDiscovery(), ZE_RESULT_SUCCESS); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/000077500000000000000000000000001422164147700270355ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/CMakeLists.txt000066400000000000000000000003051422164147700315730ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) add_subdirectories() compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/diagnostics/000077500000000000000000000000001422164147700313445ustar00rootroot00000000000000CMakeLists.txt000066400000000000000000000003051422164147700340230ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/diagnostics# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) add_subdirectories() compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/diagnostics/linux/000077500000000000000000000000001422164147700325035ustar00rootroot00000000000000CMakeLists.txt000066400000000000000000000012401422164147700351610ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/diagnostics/linux# # Copyright (C) 2021-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_TESTS_TOOLS_SYSMAN_DIAGNOSTICS_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}test_zes_sysman_diagnostics.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_zes_sysman_diagnostics.h ) if((NEO_ENABLE_i915_PRELIM_DETECTION) AND ("${BRANCH_TYPE}" STREQUAL "")) list(REMOVE_ITEM L0_TESTS_TOOLS_SYSMAN_DIAGNOSTICS_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/test_zes_sysman_diagnostics.cpp ) endif() if(UNIX) target_sources(${TARGET_NAME} PRIVATE ${L0_TESTS_TOOLS_SYSMAN_DIAGNOSTICS_LINUX} ) endif() mock_zes_sysman_diagnostics.h000066400000000000000000000247071422164147700404020ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/diagnostics/linux/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/test/unit_tests/mock.h" #include "level_zero/tools/source/sysman/diagnostics/linux/os_diagnostics_imp.h" #include "level_zero/tools/test/unit_tests/sources/sysman/linux/mock_sysman_fixture.h" namespace L0 { namespace ult { uint32_t mockDiagHandleCount = 2; const std::string mockQuiescentGpuFile("quiesce_gpu"); const std::string mockinvalidateLmemFile("invalidate_lmem_mmaps"); const std::vector mockSupportedDiagTypes = {"MOCKSUITE1", "MOCKSUITE2"}; const std::string deviceDirDiag("device"); const std::string mockdeviceDirDiag("/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:01.0/0000:8c:00.0"); const std::string mockRemove("remove"); const std::string mockRescan("rescan"); const std::string mockRealPathConfig("/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:01.0/config"); const std::string mockRootAddress("0000:8a:00.0"); const std::string mockRootWrongAddress("0000:7a:00.0"); const std::string mockSlotPath1("/sys/bus/pci/slots/1/"); const std::string mockDeviceName("/MOCK_DEVICE_NAME"); const std::string mockSlotPath("/sys/bus/pci/slots/"); class DiagnosticsFwInterface : public FirmwareUtil {}; template <> struct Mock : public FirmwareUtil { ze_result_t mockFwDeviceInit(void) { return ZE_RESULT_SUCCESS; } ze_result_t mockFwDeviceInitFail(void) { return ZE_RESULT_ERROR_UNKNOWN; } ze_result_t mockGetFirstDevice(igsc_device_info *info) { return ZE_RESULT_SUCCESS; } ze_result_t mockFwSupportedDiagTests(std::vector &supportedDiagTests) { supportedDiagTests.push_back(mockSupportedDiagTypes[0]); supportedDiagTests.push_back(mockSupportedDiagTypes[1]); return ZE_RESULT_SUCCESS; } ze_result_t mockFwRunDiagTestsReturnSuccess(std::string &osDiagType, zes_diag_result_t *pResult) { *pResult = ZES_DIAG_RESULT_NO_ERRORS; return ZE_RESULT_SUCCESS; } ze_result_t mockFwRunDiagTestsReturnSuccessWithResultRepair(std::string &osDiagType, zes_diag_result_t *pResult) { *pResult = ZES_DIAG_RESULT_REBOOT_FOR_REPAIR; return ZE_RESULT_SUCCESS; } Mock() = default; MOCK_METHOD(ze_result_t, fwDeviceInit, (), (override)); MOCK_METHOD(ze_result_t, getFirstDevice, (igsc_device_info * info), (override)); ADDMETHOD_NOBASE(getFwVersion, ze_result_t, ZE_RESULT_SUCCESS, (std::string fwType, std::string &firmwareVersion)); ADDMETHOD_NOBASE(flashFirmware, ze_result_t, ZE_RESULT_SUCCESS, (std::string fwType, void *pImage, uint32_t size)); ADDMETHOD_NOBASE(fwIfrApplied, ze_result_t, ZE_RESULT_SUCCESS, (bool &ifrStatus)); MOCK_METHOD(ze_result_t, fwSupportedDiagTests, (std::vector & supportedDiagTests), (override)); MOCK_METHOD(ze_result_t, fwRunDiagTests, (std::string & osDiagType, zes_diag_result_t *pResult), (override)); ADDMETHOD_NOBASE(fwGetMemoryErrorCount, ze_result_t, ZE_RESULT_SUCCESS, (zes_ras_error_type_t category, uint32_t subDeviceCount, uint32_t subDeviceId, uint64_t &count)); ADDMETHOD_NOBASE_VOIDRETURN(getDeviceSupportedFwTypes, (std::vector & fwTypes)); }; struct MockGlobalOperationsEngineHandleContext : public EngineHandleContext { MockGlobalOperationsEngineHandleContext(OsSysman *pOsSysman) : EngineHandleContext(pOsSysman) {} void init() override {} }; class DiagSysfsAccess : public SysfsAccess {}; template <> struct Mock : public DiagSysfsAccess { ze_result_t getRealPathVal(const std::string file, std::string &val) { if (file.compare(deviceDirDiag) == 0) { val = mockdeviceDirDiag; } else { return ZE_RESULT_ERROR_NOT_AVAILABLE; } return ZE_RESULT_SUCCESS; } ze_result_t mockwrite(const std::string file, const int val) { if (std::string::npos != file.find(mockQuiescentGpuFile)) { return ZE_RESULT_SUCCESS; } else if (std::string::npos != file.find(mockinvalidateLmemFile)) { return ZE_RESULT_SUCCESS; } else { return ZE_RESULT_ERROR_NOT_AVAILABLE; } } ze_result_t mockwriteFails(const std::string file, const int val) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } bool mockIsMyDeviceFile(const std::string dev) { if (dev.compare(mockDeviceName) == 0) { return true; } return false; } Mock() = default; MOCK_METHOD(ze_result_t, write, (const std::string file, const int val), (override)); MOCK_METHOD(ze_result_t, getRealPath, (const std::string path, std::string &val), (override)); MOCK_METHOD(bool, isMyDeviceFile, (const std::string dev), (override)); }; class DiagFsAccess : public FsAccess {}; template <> struct Mock : public DiagFsAccess { ze_result_t mockFsWrite(const std::string file, std::string val) { if (std::string::npos != file.find(mockRemove)) { return ZE_RESULT_SUCCESS; } else if (std::string::npos != file.find(mockRescan)) { return ZE_RESULT_SUCCESS; } else { return ZE_RESULT_ERROR_NOT_AVAILABLE; } } ze_result_t mockFsReadAddress(const std::string file, std::string &val) { if (file.compare(mockSlotPath1)) { val = mockRootAddress; return ZE_RESULT_SUCCESS; } else { return ZE_RESULT_ERROR_NOT_AVAILABLE; } } ze_result_t mockFsReadWrongAddress(const std::string file, std::string &val) { if (file.compare(mockSlotPath1)) { val = mockRootWrongAddress; return ZE_RESULT_SUCCESS; } else { return ZE_RESULT_ERROR_NOT_AVAILABLE; } } ze_result_t listDirectorySuccess(const std::string directory, std::vector &listOfslots) { if (directory.compare(mockSlotPath) == 0) { listOfslots.push_back("1"); return ZE_RESULT_SUCCESS; } return ZE_RESULT_ERROR_NOT_AVAILABLE; } ze_result_t listDirectoryFailure(const std::string directory, std::vector &events) { return ZE_RESULT_ERROR_NOT_AVAILABLE; } ze_result_t getRealPathVal(const std::string file, std::string &val) { if (file.compare(deviceDirDiag) == 0) { val = mockdeviceDirDiag; } else { return ZE_RESULT_ERROR_NOT_AVAILABLE; } return ZE_RESULT_SUCCESS; } Mock() = default; MOCK_METHOD(ze_result_t, write, (const std::string file, const std::string val), (override)); MOCK_METHOD(ze_result_t, read, (const std::string file, std::string &val), (override)); MOCK_METHOD(ze_result_t, getRealPath, (const std::string path, std::string &val), (override)); MOCK_METHOD(ze_result_t, listDirectory, (const std::string path, std::vector &list), (override)); }; class DiagProcfsAccess : public ProcfsAccess {}; template <> struct Mock : public DiagProcfsAccess { const ::pid_t extraPid = 4; const int extraFd = 5; std::vector<::pid_t> pidList = {1, 2, 3}; std::vector fdList = {0, 1, 2}; ::pid_t ourDevicePid = 0; int ourDeviceFd = 0; ze_result_t mockProcessListDeviceUnused(std::vector<::pid_t> &list) { list = pidList; return ZE_RESULT_SUCCESS; } ze_result_t mockProcessListDeviceInUse(std::vector<::pid_t> &list) { list = pidList; if (ourDevicePid) { list.push_back(ourDevicePid); } return ZE_RESULT_SUCCESS; } ::pid_t getMockMyProcessId() { return ::getpid(); } ze_result_t getMockFileDescriptors(const ::pid_t pid, std::vector &list) { // Give every process 3 file descriptors // Except the device that MOCK has the device open. Give it one extra. list.clear(); list = fdList; if (ourDevicePid == pid) { list.push_back(ourDeviceFd); } return ZE_RESULT_SUCCESS; } ze_result_t getMockFileDescriptorsFailure(const ::pid_t pid, std::vector &list) { //return failure to verify the error condition check list.clear(); return ZE_RESULT_ERROR_UNKNOWN; } ze_result_t getMockFileName(const ::pid_t pid, const int fd, std::string &val) { if (pid == ourDevicePid && fd == ourDeviceFd) { val = mockDeviceName; } else { // return fake filenames for other file descriptors val = std::string("/FILENAME") + std::to_string(fd); } return ZE_RESULT_SUCCESS; } bool mockIsAlive(const ::pid_t pid) { if (pid == ourDevicePid) { return true; } return false; } void mockKill(const ::pid_t pid) { ourDevicePid = 0; } ze_result_t mockFsWrite(const std::string file, std::string val) { if (std::string::npos != file.find(mockRemove)) { return ZE_RESULT_SUCCESS; } else if (std::string::npos != file.find(mockRescan)) { return ZE_RESULT_SUCCESS; } else { return ZE_RESULT_ERROR_NOT_AVAILABLE; } } Mock() = default; MOCK_METHOD(ze_result_t, listProcesses, (std::vector<::pid_t> & list), (override)); MOCK_METHOD(::pid_t, myProcessId, (), (override)); MOCK_METHOD(ze_result_t, getFileDescriptors, (const ::pid_t pid, std::vector &list), (override)); MOCK_METHOD(ze_result_t, getFileName, (const ::pid_t pid, const int fd, std::string &val), (override)); MOCK_METHOD(bool, isAlive, (const ::pid_t pid), (override)); MOCK_METHOD(void, kill, (const ::pid_t pid), (override)); MOCK_METHOD(ze_result_t, listDirectory, (const std::string path, std::vector &list), (override)); MOCK_METHOD(ze_result_t, write, (const std::string file, const std::string val), (override)); }; class PublicLinuxDiagnosticsImp : public L0::LinuxDiagnosticsImp { public: using LinuxDiagnosticsImp::closeFunction; using LinuxDiagnosticsImp::openFunction; using LinuxDiagnosticsImp::pDevice; using LinuxDiagnosticsImp::pFsAccess; using LinuxDiagnosticsImp::pFwInterface; using LinuxDiagnosticsImp::pLinuxSysmanImp; using LinuxDiagnosticsImp::pProcfsAccess; using LinuxDiagnosticsImp::preadFunction; using LinuxDiagnosticsImp::pSysfsAccess; using LinuxDiagnosticsImp::pwriteFunction; }; } // namespace ult } // namespace L0 test_zes_sysman_diagnostics.cpp000066400000000000000000000243441422164147700407600ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/diagnostics/linux/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/test/unit_tests/sources/sysman/diagnostics/linux/mock_zes_sysman_diagnostics.h" extern bool sysmanUltsEnable; using ::testing::_; namespace L0 { namespace ult { class ZesDiagnosticsFixture : public SysmanDeviceFixture { protected: zes_diag_handle_t hSysmanDiagnostics = {}; std::unique_ptr> pMockFwInterface; FirmwareUtil *pFwUtilInterfaceOld = nullptr; void SetUp() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanDeviceFixture::SetUp(); pFwUtilInterfaceOld = pLinuxSysmanImp->pFwUtilInterface; pMockFwInterface = std::make_unique>>(); pLinuxSysmanImp->pFwUtilInterface = pMockFwInterface.get(); ON_CALL(*pMockFwInterface.get(), fwDeviceInit()) .WillByDefault(::testing::Invoke(pMockFwInterface.get(), &Mock::mockFwDeviceInit)); ON_CALL(*pMockFwInterface.get(), getFirstDevice(_)) .WillByDefault(::testing::Invoke(pMockFwInterface.get(), &Mock::mockGetFirstDevice)); ON_CALL(*pMockFwInterface.get(), fwSupportedDiagTests(_)) .WillByDefault(::testing::Invoke(pMockFwInterface.get(), &Mock::mockFwSupportedDiagTests)); for (const auto &handle : pSysmanDeviceImp->pDiagnosticsHandleContext->handleList) { delete handle; } pSysmanDeviceImp->pDiagnosticsHandleContext->handleList.clear(); uint32_t subDeviceCount = 0; std::vector deviceHandles; // We received a device handle. Check for subdevices in this device Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, nullptr); if (subDeviceCount == 0) { deviceHandles.resize(1, device->toHandle()); } else { deviceHandles.resize(subDeviceCount, nullptr); Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, deviceHandles.data()); } pSysmanDeviceImp->pDiagnosticsHandleContext->init(deviceHandles); } void TearDown() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanDeviceFixture::TearDown(); pLinuxSysmanImp->pFwUtilInterface = pFwUtilInterfaceOld; } std::vector get_diagnostics_handles(uint32_t &count) { std::vector handles(count, nullptr); EXPECT_EQ(zesDeviceEnumDiagnosticTestSuites(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS); return handles; } void clear_and_reinit_handles(std::vector &deviceHandles) { for (const auto &handle : pSysmanDeviceImp->pDiagnosticsHandleContext->handleList) { delete handle; } pSysmanDeviceImp->pDiagnosticsHandleContext->handleList.clear(); pSysmanDeviceImp->pDiagnosticsHandleContext->supportedDiagTests.clear(); uint32_t subDeviceCount = 0; // We received a device handle. Check for subdevices in this device Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, nullptr); if (subDeviceCount == 0) { deviceHandles.resize(1, device->toHandle()); } else { deviceHandles.resize(subDeviceCount, nullptr); Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, deviceHandles.data()); } } }; TEST_F(ZesDiagnosticsFixture, GivenComponentCountZeroWhenCallingzesDeviceEnumDiagnosticTestSuitesThenZeroCountIsReturnedAndVerifyzesDeviceEnumDiagnosticTestSuitesCallSucceeds) { std::vector diagnosticsHandle{}; uint32_t count = 0; ze_result_t result = zesDeviceEnumDiagnosticTestSuites(device->toHandle(), &count, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(count, 0u); uint32_t testCount = count + 1; result = zesDeviceEnumDiagnosticTestSuites(device->toHandle(), &testCount, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(testCount, count); diagnosticsHandle.resize(count); result = zesDeviceEnumDiagnosticTestSuites(device->toHandle(), &count, diagnosticsHandle.data()); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(count, 0u); uint32_t subDeviceCount = 0; std::vector deviceHandles; // We received a device handle. Check for subdevices in this device Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, nullptr); if (subDeviceCount == 0) { deviceHandles.resize(1, device->toHandle()); } else { deviceHandles.resize(subDeviceCount, nullptr); Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, deviceHandles.data()); } DiagnosticsImp *ptestDiagnosticsImp = new DiagnosticsImp(pSysmanDeviceImp->pDiagnosticsHandleContext->pOsSysman, mockSupportedDiagTypes[0], deviceHandles[0]); pSysmanDeviceImp->pDiagnosticsHandleContext->handleList.push_back(ptestDiagnosticsImp); result = zesDeviceEnumDiagnosticTestSuites(device->toHandle(), &count, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(count, 1u); testCount = count; diagnosticsHandle.resize(testCount); result = zesDeviceEnumDiagnosticTestSuites(device->toHandle(), &testCount, diagnosticsHandle.data()); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, diagnosticsHandle.data()); EXPECT_EQ(testCount, 1u); pSysmanDeviceImp->pDiagnosticsHandleContext->handleList.pop_back(); delete ptestDiagnosticsImp; } TEST_F(ZesDiagnosticsFixture, GivenFwInterfaceAsNullWhenCallingzesDeviceEnumDiagnosticTestSuitesThenZeroCountIsReturnedAndVerifyzesDeviceEnumDiagnosticTestSuitesCallSucceeds) { auto tempFwInterface = pLinuxSysmanImp->pFwUtilInterface; pLinuxSysmanImp->pFwUtilInterface = nullptr; std::vector diagnosticsHandle{}; uint32_t count = 0; ze_result_t result = zesDeviceEnumDiagnosticTestSuites(device->toHandle(), &count, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(count, 0u); uint32_t testCount = count + 1; result = zesDeviceEnumDiagnosticTestSuites(device->toHandle(), &testCount, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(testCount, count); pLinuxSysmanImp->pFwUtilInterface = tempFwInterface; } TEST_F(ZesDiagnosticsFixture, GivenFailedFirmwareInitializationWhenInitializingDiagnosticsContextThenexpectNoHandles) { std::vector deviceHandles; clear_and_reinit_handles(deviceHandles); ON_CALL(*pMockFwInterface.get(), fwDeviceInit()) .WillByDefault(::testing::Invoke(pMockFwInterface.get(), &Mock::mockFwDeviceInitFail)); pSysmanDeviceImp->pDiagnosticsHandleContext->init(deviceHandles); EXPECT_EQ(0u, pSysmanDeviceImp->pDiagnosticsHandleContext->handleList.size()); } TEST_F(ZesDiagnosticsFixture, GivenSupportedTestsWhenInitializingDiagnosticsContextThenExpectHandles) { std::vector deviceHandles; clear_and_reinit_handles(deviceHandles); pSysmanDeviceImp->pDiagnosticsHandleContext->supportedDiagTests.push_back(mockSupportedDiagTypes[0]); pSysmanDeviceImp->pDiagnosticsHandleContext->init(deviceHandles); EXPECT_EQ(1u, pSysmanDeviceImp->pDiagnosticsHandleContext->handleList.size()); } TEST_F(ZesDiagnosticsFixture, GivenFirmwareInitializationFailureThenCreateHandleMustFail) { std::vector deviceHandles; clear_and_reinit_handles(deviceHandles); ON_CALL(*pMockFwInterface.get(), fwDeviceInit()) .WillByDefault(::testing::Invoke(pMockFwInterface.get(), &Mock::mockFwDeviceInitFail)); pSysmanDeviceImp->pDiagnosticsHandleContext->init(deviceHandles); EXPECT_EQ(0u, pSysmanDeviceImp->pDiagnosticsHandleContext->handleList.size()); } TEST_F(ZesDiagnosticsFixture, GivenValidDiagnosticsHandleWhenGettingDiagnosticsPropertiesThenCallSucceeds) { std::vector deviceHandles; clear_and_reinit_handles(deviceHandles); DiagnosticsImp *ptestDiagnosticsImp = new DiagnosticsImp(pSysmanDeviceImp->pDiagnosticsHandleContext->pOsSysman, mockSupportedDiagTypes[0], deviceHandles[0]); pSysmanDeviceImp->pDiagnosticsHandleContext->handleList.push_back(ptestDiagnosticsImp); auto handle = pSysmanDeviceImp->pDiagnosticsHandleContext->handleList[0]->toHandle(); zes_diag_properties_t properties = {}; EXPECT_EQ(ZE_RESULT_SUCCESS, zesDiagnosticsGetProperties(handle, &properties)); } TEST_F(ZesDiagnosticsFixture, GivenValidDiagnosticsHandleWhenGettingDiagnosticsTestThenCallSucceeds) { std::vector deviceHandles; clear_and_reinit_handles(deviceHandles); DiagnosticsImp *ptestDiagnosticsImp = new DiagnosticsImp(pSysmanDeviceImp->pDiagnosticsHandleContext->pOsSysman, mockSupportedDiagTypes[0], deviceHandles[0]); pSysmanDeviceImp->pDiagnosticsHandleContext->handleList.push_back(ptestDiagnosticsImp); auto handle = pSysmanDeviceImp->pDiagnosticsHandleContext->handleList[0]->toHandle(); zes_diag_test_t tests = {}; uint32_t count = 0; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesDiagnosticsGetTests(handle, &count, &tests)); pSysmanDeviceImp->pDiagnosticsHandleContext->handleList.pop_back(); delete ptestDiagnosticsImp; } TEST_F(ZesDiagnosticsFixture, GivenValidDiagnosticsHandleWhenRunningDiagnosticsTestThenCallSucceeds) { std::vector deviceHandles; clear_and_reinit_handles(deviceHandles); DiagnosticsImp *ptestDiagnosticsImp = new DiagnosticsImp(pSysmanDeviceImp->pDiagnosticsHandleContext->pOsSysman, mockSupportedDiagTypes[0], deviceHandles[0]); pSysmanDeviceImp->pDiagnosticsHandleContext->handleList.push_back(ptestDiagnosticsImp); auto handle = pSysmanDeviceImp->pDiagnosticsHandleContext->handleList[0]->toHandle(); zes_diag_result_t results = ZES_DIAG_RESULT_FORCE_UINT32; uint32_t start = 0, end = 0; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesDiagnosticsRunTests(handle, start, end, &results)); pSysmanDeviceImp->pDiagnosticsHandleContext->handleList.pop_back(); delete ptestDiagnosticsImp; } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/engine/000077500000000000000000000000001422164147700303025ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/engine/CMakeLists.txt000066400000000000000000000003051422164147700330400ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) add_subdirectories() compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/engine/linux/000077500000000000000000000000001422164147700314415ustar00rootroot00000000000000CMakeLists.txt000066400000000000000000000012551422164147700341250ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/engine/linux# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_TESTS_TOOLS_SYSMAN_ENGINE_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}test_zes_engine.cpp ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}mock_engine.h ) if((NEO_ENABLE_i915_PRELIM_DETECTION) AND ("${BRANCH_TYPE}" STREQUAL "")) list(REMOVE_ITEM L0_TESTS_TOOLS_SYSMAN_ENGINE_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/test_zes_engine.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_engine.h ) endif() if(UNIX) target_sources(${TARGET_NAME} PRIVATE ${L0_TESTS_TOOLS_SYSMAN_ENGINE_LINUX} ) endif() mock_engine.h000066400000000000000000000125621422164147700340170ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/engine/linux/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/linux/engine_info.h" #include "shared/test/common/libult/linux/drm_mock.h" #include "level_zero/core/test/unit_tests/mock.h" #include "level_zero/core/test/unit_tests/mocks/mock_memory_manager.h" #include "level_zero/tools/source/sysman/engine/linux/os_engine_imp.h" #include "level_zero/tools/source/sysman/linux/pmu/pmu_imp.h" #include "sysman/engine/engine_imp.h" #include "sysman/linux/os_sysman_imp.h" using namespace NEO; namespace L0 { namespace ult { constexpr int64_t mockPmuFd = 10; constexpr uint64_t mockTimestamp = 87654321; constexpr uint64_t mockActiveTime = 987654321; const uint32_t microSecondsToNanoSeconds = 1000u; constexpr uint16_t I915_INVALID_ENGINE_CLASS = UINT16_MAX; const std::string deviceDir("device"); struct MockMemoryManagerInEngineSysman : public MemoryManagerMock { MockMemoryManagerInEngineSysman(NEO::ExecutionEnvironment &executionEnvironment) : MemoryManagerMock(const_cast(executionEnvironment)) {} }; class EngineNeoDrm : public Drm { public: using Drm::getEngineInfo; using Drm::setupIoctlHelper; const int mockFd = 0; EngineNeoDrm(RootDeviceEnvironment &rootDeviceEnvironment) : Drm(std::make_unique(mockFd, ""), rootDeviceEnvironment) {} }; template <> struct Mock : public EngineNeoDrm { Mock(RootDeviceEnvironment &rootDeviceEnvironment) : EngineNeoDrm(rootDeviceEnvironment) {} bool queryEngineInfoMockPositiveTest() { std::vector i915engineInfo(6); i915engineInfo[0].engine.engineClass = I915_ENGINE_CLASS_RENDER; i915engineInfo[0].engine.engineInstance = 0; i915engineInfo[1].engine.engineClass = I915_ENGINE_CLASS_RENDER; i915engineInfo[1].engine.engineInstance = 1; i915engineInfo[2].engine.engineClass = I915_ENGINE_CLASS_VIDEO; i915engineInfo[2].engine.engineInstance = 1; i915engineInfo[3].engine.engineClass = I915_ENGINE_CLASS_COPY; i915engineInfo[3].engine.engineInstance = 0; i915engineInfo[4].engine.engineClass = I915_ENGINE_CLASS_VIDEO_ENHANCE; i915engineInfo[4].engine.engineInstance = 0; i915engineInfo[5].engine.engineClass = I915_INVALID_ENGINE_CLASS; i915engineInfo[5].engine.engineInstance = 0; NEO::HardwareInfo hwInfo = *rootDeviceEnvironment.getHardwareInfo(); this->engineInfo.reset(new EngineInfo(this, &hwInfo, i915engineInfo)); return true; } bool queryEngineInfoMockReturnFalse() { return false; } MOCK_METHOD(bool, sysmanQueryEngineInfo, (), (override)); }; class MockPmuInterfaceImp : public PmuInterfaceImp { public: using PmuInterfaceImp::perfEventOpen; MockPmuInterfaceImp(LinuxSysmanImp *pLinuxSysmanImp) : PmuInterfaceImp(pLinuxSysmanImp) {} }; template <> struct Mock : public MockPmuInterfaceImp { Mock(LinuxSysmanImp *pLinuxSysmanImp) : MockPmuInterfaceImp(pLinuxSysmanImp) {} int64_t mockedPerfEventOpenAndSuccessReturn(perf_event_attr *attr, pid_t pid, int cpu, int groupFd, uint64_t flags) { return mockPmuFd; } int64_t mockedPerfEventOpenAndFailureReturn(perf_event_attr *attr, pid_t pid, int cpu, int groupFd, uint64_t flags) { return -1; } int mockedPmuReadAndSuccessReturn(int fd, uint64_t *data, ssize_t sizeOfdata) { data[0] = mockActiveTime; data[1] = mockTimestamp; return 0; } int mockedPmuReadAndFailureReturn(int fd, uint64_t *data, ssize_t sizeOfdata) { return -1; } MOCK_METHOD(int64_t, perfEventOpen, (perf_event_attr * attr, pid_t pid, int cpu, int groupFd, uint64_t flags), (override)); MOCK_METHOD(int, pmuRead, (int fd, uint64_t *data, ssize_t sizeOfdata), (override)); }; class EngineSysfsAccess : public SysfsAccess {}; class EngineFsAccess : public FsAccess {}; template <> struct Mock : public EngineFsAccess { MOCK_METHOD(ze_result_t, read, (const std::string file, uint32_t &val), (override)); ze_result_t readValSuccess(const std::string file, uint32_t &val) { val = 23; return ZE_RESULT_SUCCESS; } ze_result_t readValFailure(const std::string file, uint32_t &val) { val = 0; return ZE_RESULT_ERROR_NOT_AVAILABLE; } }; template <> struct Mock : public EngineSysfsAccess { MOCK_METHOD(ze_result_t, readSymLink, (const std::string file, std::string &buf), (override)); ze_result_t getValStringSymLinkSuccess(const std::string file, std::string &val) { if (file.compare(deviceDir) == 0) { val = "/sys/devices/pci0000:00/0000:00:01.0/0000:01:00.0/0000:02:01.0/0000:03:00.0"; return ZE_RESULT_SUCCESS; } return ZE_RESULT_ERROR_NOT_AVAILABLE; } ze_result_t getValStringSymLinkFailure(const std::string file, std::string &val) { return ZE_RESULT_ERROR_NOT_AVAILABLE; } Mock() = default; }; class DrmMockEngineInfoFailing : public DrmMock { public: using DrmMock::DrmMock; DrmMockEngineInfoFailing(RootDeviceEnvironment &rootDeviceEnvironment) : DrmMock(rootDeviceEnvironment) {} int handleRemainingRequests(unsigned long request, void *arg) override { return -1; } }; } // namespace ult } // namespace L0 test_zes_engine.cpp000066400000000000000000000305471422164147700352640ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/engine/linux/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/test/unit_tests/sources/sysman/linux/mock_sysman_fixture.h" #include "mock_engine.h" extern bool sysmanUltsEnable; using ::testing::Matcher; using ::testing::Return; class OsEngine; namespace L0 { namespace ult { constexpr uint32_t handleComponentCount = 6u; class ZesEngineFixture : public SysmanDeviceFixture { protected: std::unique_ptr> pDrm; std::unique_ptr> pPmuInterface; Drm *pOriginalDrm = nullptr; PmuInterface *pOriginalPmuInterface = nullptr; MemoryManager *pMemoryManagerOriginal = nullptr; std::unique_ptr pMemoryManager; std::unique_ptr> pSysfsAccess; SysfsAccess *pSysfsAccessOriginal = nullptr; std::unique_ptr> pFsAccess; FsAccess *pFsAccessOriginal = nullptr; void SetUp() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanDeviceFixture::SetUp(); pMemoryManagerOriginal = device->getDriverHandle()->getMemoryManager(); pMemoryManager = std::make_unique<::testing::NiceMock>(*neoDevice->getExecutionEnvironment()); pMemoryManager->localMemorySupported[0] = false; device->getDriverHandle()->setMemoryManager(pMemoryManager.get()); pSysfsAccessOriginal = pLinuxSysmanImp->pSysfsAccess; pSysfsAccess = std::make_unique>>(); pLinuxSysmanImp->pSysfsAccess = pSysfsAccess.get(); pFsAccessOriginal = pLinuxSysmanImp->pFsAccess; pFsAccess = std::make_unique>>(); pLinuxSysmanImp->pFsAccess = pFsAccess.get(); EngineHandleContext *pEngineHandleContext = pSysmanDeviceImp->pEngineHandleContext; pDrm = std::make_unique>>(const_cast(neoDevice->getRootDeviceEnvironment())); pDrm->setupIoctlHelper(neoDevice->getRootDeviceEnvironment().getHardwareInfo()->platform.eProductFamily); pPmuInterface = std::make_unique>>(pLinuxSysmanImp); pOriginalDrm = pLinuxSysmanImp->pDrm; pOriginalPmuInterface = pLinuxSysmanImp->pPmuInterface; pLinuxSysmanImp->pDrm = pDrm.get(); pLinuxSysmanImp->pPmuInterface = pPmuInterface.get(); ON_CALL(*pDrm.get(), sysmanQueryEngineInfo()) .WillByDefault(::testing::Invoke(pDrm.get(), &Mock::queryEngineInfoMockPositiveTest)); ON_CALL(*pPmuInterface.get(), perfEventOpen(_, _, _, _, _)) .WillByDefault(::testing::Invoke(pPmuInterface.get(), &Mock::mockedPerfEventOpenAndSuccessReturn)); ON_CALL(*pPmuInterface.get(), pmuRead(_, _, _)) .WillByDefault(::testing::Invoke(pPmuInterface.get(), &Mock::mockedPmuReadAndSuccessReturn)); ON_CALL(*pSysfsAccess.get(), readSymLink(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getValStringSymLinkSuccess)); ON_CALL(*pFsAccess.get(), read(_, _)) .WillByDefault(::testing::Invoke(pFsAccess.get(), &Mock::readValSuccess)); pEngineHandleContext->init(); } void TearDown() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanDeviceFixture::TearDown(); device->getDriverHandle()->setMemoryManager(pMemoryManagerOriginal); pLinuxSysmanImp->pDrm = pOriginalDrm; pLinuxSysmanImp->pPmuInterface = pOriginalPmuInterface; pLinuxSysmanImp->pSysfsAccess = pSysfsAccessOriginal; pLinuxSysmanImp->pFsAccess = pFsAccessOriginal; } std::vector getEngineHandles(uint32_t count) { std::vector handles(count, nullptr); EXPECT_EQ(zesDeviceEnumEngineGroups(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS); return handles; } }; TEST_F(ZesEngineFixture, GivenComponentCountZeroWhenCallingzesDeviceEnumEngineGroupsThenNonZeroCountIsReturnedAndVerifyCallSucceeds) { uint32_t count = 0; EXPECT_EQ(ZE_RESULT_SUCCESS, zesDeviceEnumEngineGroups(device->toHandle(), &count, NULL)); EXPECT_EQ(count, handleComponentCount); uint32_t testcount = count + 1; EXPECT_EQ(ZE_RESULT_SUCCESS, zesDeviceEnumEngineGroups(device->toHandle(), &testcount, NULL)); EXPECT_EQ(testcount, count); count = 0; std::vector handles(count, nullptr); EXPECT_EQ(zesDeviceEnumEngineGroups(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS); EXPECT_EQ(count, handleComponentCount); } TEST_F(ZesEngineFixture, GivenValidEngineHandlesWhenCallingZesEngineGetPropertiesThenVerifyCallSucceeds) { zes_engine_properties_t properties; auto handle = getEngineHandles(handleComponentCount); EXPECT_EQ(ZE_RESULT_SUCCESS, zesEngineGetProperties(handle[0], &properties)); EXPECT_EQ(ZES_ENGINE_GROUP_RENDER_SINGLE, properties.type); EXPECT_FALSE(properties.onSubdevice); EXPECT_EQ(ZE_RESULT_SUCCESS, zesEngineGetProperties(handle[1], &properties)); EXPECT_EQ(ZES_ENGINE_GROUP_RENDER_SINGLE, properties.type); EXPECT_FALSE(properties.onSubdevice); EXPECT_EQ(ZE_RESULT_SUCCESS, zesEngineGetProperties(handle[2], &properties)); EXPECT_EQ(ZES_ENGINE_GROUP_MEDIA_DECODE_SINGLE, properties.type); EXPECT_FALSE(properties.onSubdevice); EXPECT_EQ(ZE_RESULT_SUCCESS, zesEngineGetProperties(handle[3], &properties)); EXPECT_EQ(ZES_ENGINE_GROUP_MEDIA_ENCODE_SINGLE, properties.type); EXPECT_FALSE(properties.onSubdevice); EXPECT_EQ(ZE_RESULT_SUCCESS, zesEngineGetProperties(handle[4], &properties)); EXPECT_EQ(ZES_ENGINE_GROUP_COPY_SINGLE, properties.type); EXPECT_FALSE(properties.onSubdevice); EXPECT_EQ(ZE_RESULT_SUCCESS, zesEngineGetProperties(handle[5], &properties)); EXPECT_EQ(ZES_ENGINE_GROUP_MEDIA_ENHANCEMENT_SINGLE, properties.type); EXPECT_FALSE(properties.onSubdevice); } TEST_F(ZesEngineFixture, GivenValidEngineHandleAndIntegratedDeviceWhenCallingZesEngineGetActivityThenVerifyCallReturnsSuccess) { zes_engine_stats_t stats = {}; auto handles = getEngineHandles(handleComponentCount); EXPECT_EQ(handleComponentCount, handles.size()); for (auto handle : handles) { EXPECT_EQ(ZE_RESULT_SUCCESS, zesEngineGetActivity(handle, &stats)); EXPECT_EQ(mockActiveTime / microSecondsToNanoSeconds, stats.activeTime); EXPECT_EQ(mockTimestamp / microSecondsToNanoSeconds, stats.timestamp); } } TEST_F(ZesEngineFixture, GivenValidEngineHandleAndDiscreteDeviceWhenCallingZesEngineGetActivityThenVerifyCallReturnsSuccess) { auto pMemoryManagerTest = std::make_unique<::testing::NiceMock>(*neoDevice->getExecutionEnvironment()); pMemoryManagerTest->localMemorySupported[0] = true; device->getDriverHandle()->setMemoryManager(pMemoryManagerTest.get()); zes_engine_stats_t stats = {}; auto handles = getEngineHandles(handleComponentCount); EXPECT_EQ(handleComponentCount, handles.size()); for (auto handle : handles) { EXPECT_EQ(ZE_RESULT_SUCCESS, zesEngineGetActivity(handle, &stats)); EXPECT_EQ(mockActiveTime / microSecondsToNanoSeconds, stats.activeTime); EXPECT_EQ(mockTimestamp / microSecondsToNanoSeconds, stats.timestamp); } } TEST_F(ZesEngineFixture, GivenTestDiscreteDevicesAndValidEngineHandleWhenCallingZesEngineGetActivityAndPMUGetEventTypeFailsThenVerifyEngineGetActivityReturnsFailure) { auto pMemoryManagerTest = std::make_unique<::testing::NiceMock>(*neoDevice->getExecutionEnvironment()); pMemoryManagerTest->localMemorySupported[0] = true; device->getDriverHandle()->setMemoryManager(pMemoryManagerTest.get()); ON_CALL(*pSysfsAccess.get(), readSymLink(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getValStringSymLinkFailure)); auto pOsEngineTest1 = OsEngine::create(pOsSysman, ZES_ENGINE_GROUP_RENDER_SINGLE, 0u, 0u); zes_engine_stats_t stats = {}; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, pOsEngineTest1->getActivity(&stats)); ON_CALL(*pSysfsAccess.get(), readSymLink(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getValStringSymLinkSuccess)); ON_CALL(*pFsAccess.get(), read(_, _)) .WillByDefault(::testing::Invoke(pFsAccess.get(), &Mock::readValFailure)); auto pOsEngineTest2 = OsEngine::create(pOsSysman, ZES_ENGINE_GROUP_RENDER_SINGLE, 0u, 0u); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, pOsEngineTest2->getActivity(&stats)); delete pOsEngineTest1; delete pOsEngineTest2; } TEST_F(ZesEngineFixture, GivenTestIntegratedDevicesAndValidEngineHandleWhenCallingZesEngineGetActivityAndPMUGetEventTypeFailsThenVerifyEngineGetActivityReturnsFailure) { zes_engine_stats_t stats = {}; ON_CALL(*pFsAccess.get(), read(_, _)) .WillByDefault(::testing::Invoke(pFsAccess.get(), &Mock::readValFailure)); auto pOsEngineTest1 = OsEngine::create(pOsSysman, ZES_ENGINE_GROUP_RENDER_SINGLE, 0u, 0u); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, pOsEngineTest1->getActivity(&stats)); delete pOsEngineTest1; } TEST_F(ZesEngineFixture, GivenValidEngineHandleWhenCallingZesEngineGetActivityAndPmuReadFailsThenVerifyEngineGetActivityReturnsFailure) { ON_CALL(*pPmuInterface.get(), pmuRead(_, _, _)) .WillByDefault(::testing::Invoke(pPmuInterface.get(), &Mock::mockedPmuReadAndFailureReturn)); zes_engine_stats_t stats = {}; auto handles = getEngineHandles(handleComponentCount); EXPECT_EQ(handleComponentCount, handles.size()); for (auto handle : handles) { EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesEngineGetActivity(handle, &stats)); } } TEST_F(ZesEngineFixture, GivenValidEngineHandleWhenCallingZesEngineGetActivityAndperfEventOpenFailsThenVerifyEngineGetActivityReturnsFailure) { ON_CALL(*pPmuInterface.get(), perfEventOpen(_, _, _, _, _)) .WillByDefault(::testing::Invoke(pPmuInterface.get(), &Mock::mockedPerfEventOpenAndFailureReturn)); MockPmuInterfaceImp pPmuInterfaceImp(pLinuxSysmanImp); EXPECT_EQ(-1, pPmuInterface->pmuInterfaceOpen(0, -1, 0)); } TEST_F(ZesEngineFixture, GivenValidOsSysmanPointerWhenRetrievingEngineTypeAndInstancesAndIfEngineInfoQueryFailsThenErrorIsReturned) { std::set> engineGroupInstance; ON_CALL(*pDrm.get(), sysmanQueryEngineInfo()) .WillByDefault(::testing::Invoke(pDrm.get(), &Mock::queryEngineInfoMockReturnFalse)); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, OsEngine::getNumEngineTypeAndInstances(engineGroupInstance, pOsSysman)); } TEST_F(ZesEngineFixture, givenEngineInfoQuerySupportedWhenQueryingEngineInfoThenEngineInfoIsCreatedWithEngines) { auto drm = std::make_unique((const_cast(neoDevice->getRootDeviceEnvironment()))); ASSERT_NE(nullptr, drm); std::vector memRegions{ {{I915_MEMORY_CLASS_SYSTEM, 0}, 1024, 0}}; drm->memoryInfo.reset(new MemoryInfo(memRegions)); drm->sysmanQueryEngineInfo(); auto engineInfo = drm->getEngineInfo(); ASSERT_NE(nullptr, engineInfo); EXPECT_EQ(2u, engineInfo->engines.size()); } TEST_F(ZesEngineFixture, GivenEngineInfoWithVideoQuerySupportedWhenQueryingEngineInfoWithVideoThenEngineInfoIsCreatedWithEngines) { auto drm = std::make_unique((const_cast(neoDevice->getRootDeviceEnvironment()))); ASSERT_NE(nullptr, drm); std::vector memRegions{ {{I915_MEMORY_CLASS_SYSTEM, 0}, 1024, 0}}; drm->memoryInfo.reset(new MemoryInfo(memRegions)); drm->sysmanQueryEngineInfo(); auto engineInfo = drm->getEngineInfo(); ASSERT_NE(nullptr, engineInfo); EXPECT_EQ(2u, engineInfo->engines.size()); } TEST_F(ZesEngineFixture, GivenEngineInfoWithVideoQueryFailsThenFailureIsReturned) { auto drm = std::make_unique((const_cast(neoDevice->getRootDeviceEnvironment()))); ASSERT_NE(nullptr, drm); EXPECT_FALSE(drm->sysmanQueryEngineInfo()); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/engine/windows/000077500000000000000000000000001422164147700317745ustar00rootroot00000000000000CMakeLists.txt000066400000000000000000000005241422164147700344560ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/engine/windows# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_zes_engine.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_engine.h ) endif() mock_engine.h000066400000000000000000000072421422164147700343510ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/engine/windows/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/test/unit_tests/mock.h" #include "level_zero/tools/test/unit_tests/sources/sysman/windows/mock_kmd_sys_manager.h" namespace L0 { namespace ult { class EngineKmdSysManager : public Mock {}; template <> struct Mock : public EngineKmdSysManager { KmdSysman::ActivityDomainsType mockEngineTypes[3] = {KmdSysman::ActivityDomainsType::ActitvityDomainGT, KmdSysman::ActivityDomainsType::ActivityDomainRenderCompute, KmdSysman::ActivityDomainsType::ActivityDomainMedia}; uint64_t mockActivityCounters[3] = {652411, 222115, 451115}; uint64_t mockActivityTimeStamps[3] = {4465421, 2566851, 1226621}; uint32_t mockNumSupportedEngineGroups = 3; uint32_t mockFrequencyTimeStamp = 38400000; uint32_t mockFrequencyActivity = 1200000; void getActivityProperty(KmdSysman::GfxSysmanReqHeaderIn *pRequest, KmdSysman::GfxSysmanReqHeaderOut *pResponse) { uint8_t *pBuffer = reinterpret_cast(pResponse); pBuffer += sizeof(KmdSysman::GfxSysmanReqHeaderOut); KmdSysman::ActivityDomainsType domain = static_cast(pRequest->inCommandParam); if (domain < KmdSysman::ActivityDomainsType::ActitvityDomainGT || domain >= KmdSysman::ActivityDomainsType::ActivityDomainMaxTypes) { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; return; } switch (pRequest->inRequestId) { case KmdSysman::Requests::Activity::NumActivityDomains: { uint32_t *pValueCounter = reinterpret_cast(pBuffer); *pValueCounter = mockNumSupportedEngineGroups; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Activity::CurrentActivityCounter: { uint64_t *pValueCounter = reinterpret_cast(pBuffer); uint64_t *pValueTimeStamp = reinterpret_cast(pBuffer + sizeof(uint64_t)); *pValueCounter = mockActivityCounters[domain]; *pValueTimeStamp = mockActivityTimeStamps[domain]; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = 2 * sizeof(uint64_t); } break; case KmdSysman::Requests::Activity::TimestampFrequency: { uint32_t *pValueFrequency = reinterpret_cast(pBuffer); *pValueFrequency = mockFrequencyTimeStamp; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Activity::ActivityCounterFrequency: { uint32_t *pValueFrequency = reinterpret_cast(pBuffer); *pValueFrequency = mockFrequencyActivity; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; default: { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } break; } } void setActivityProperty(KmdSysman::GfxSysmanReqHeaderIn *pRequest, KmdSysman::GfxSysmanReqHeaderOut *pResponse) override { uint8_t *pBuffer = reinterpret_cast(pRequest); pBuffer += sizeof(KmdSysman::GfxSysmanReqHeaderIn); pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } Mock() = default; ~Mock() = default; }; } // namespace ult } // namespace L0 test_zes_engine.cpp000066400000000000000000000107531422164147700356140ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/engine/windows/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/engine/windows/os_engine_imp.h" #include "level_zero/tools/test/unit_tests/sources/sysman/engine/windows/mock_engine.h" #include "level_zero/tools/test/unit_tests/sources/sysman/windows/mock_sysman_fixture.h" extern bool sysmanUltsEnable; namespace L0 { namespace ult { constexpr uint32_t engineHandleComponentCount = 3u; class SysmanDeviceEngineFixture : public SysmanDeviceFixture { protected: Mock *pKmdSysManager = nullptr; KmdSysManager *pOriginalKmdSysManager = nullptr; void SetUp() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanDeviceFixture::SetUp(); pKmdSysManager = new Mock; EXPECT_CALL(*pKmdSysManager, escape(_, _, _, _, _)) .WillRepeatedly(::testing::Invoke(pKmdSysManager, &Mock::mock_escape)); pOriginalKmdSysManager = pWddmSysmanImp->pKmdSysManager; pWddmSysmanImp->pKmdSysManager = pKmdSysManager; for (auto handle : pSysmanDeviceImp->pEngineHandleContext->handleList) { delete handle; } pSysmanDeviceImp->pEngineHandleContext->handleList.clear(); pSysmanDeviceImp->pEngineHandleContext->init(); } void TearDown() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanDeviceFixture::TearDown(); pWddmSysmanImp->pKmdSysManager = pOriginalKmdSysManager; if (pKmdSysManager != nullptr) { delete pKmdSysManager; pKmdSysManager = nullptr; } } std::vector get_engine_handles(uint32_t count) { std::vector handles(count, nullptr); EXPECT_EQ(zesDeviceEnumEngineGroups(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS); return handles; } }; TEST_F(SysmanDeviceEngineFixture, GivenComponentCountZeroWhenEnumeratingEngineGroupsThenValidCountIsReturnedAndVerifySysmanPowerGetCallSucceeds) { uint32_t count = 0; EXPECT_EQ(zesDeviceEnumEngineGroups(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, engineHandleComponentCount); } TEST_F(SysmanDeviceEngineFixture, GivenInvalidComponentCountWhenEnumeratingEngineGroupsThenValidCountIsReturnedAndVerifySysmanPowerGetCallSucceeds) { uint32_t count = 0; EXPECT_EQ(zesDeviceEnumEngineGroups(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, engineHandleComponentCount); count = count + 1; EXPECT_EQ(zesDeviceEnumEngineGroups(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, engineHandleComponentCount); } TEST_F(SysmanDeviceEngineFixture, GivenComponentCountZeroWhenEnumeratingEngineGroupsThenValidPowerHandlesIsReturned) { uint32_t count = 0; EXPECT_EQ(zesDeviceEnumEngineGroups(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, engineHandleComponentCount); std::vector handles(count, nullptr); EXPECT_EQ(zesDeviceEnumEngineGroups(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS); for (auto handle : handles) { EXPECT_NE(handle, nullptr); } } TEST_F(SysmanDeviceEngineFixture, GivenValidHandleGetPropertiesThenCorrectEngineGroupIsReturned) { auto handles = get_engine_handles(engineHandleComponentCount); uint32_t engineGroupIndex = 0; for (auto handle : handles) { zes_engine_properties_t properties = {}; ze_result_t result = zesEngineGetProperties(handle, &properties); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_FALSE(properties.onSubdevice); EXPECT_EQ(properties.subdeviceId, 0); EXPECT_EQ(properties.type, pKmdSysManager->mockEngineTypes[engineGroupIndex++]); } } TEST_F(SysmanDeviceEngineFixture, GivenValidHandleGetAvtivityThenCorrectValuesAreReturned) { auto handles = get_engine_handles(engineHandleComponentCount); uint32_t engineGroupIndex = 0; for (auto handle : handles) { zes_engine_stats_t stats; ze_result_t result = zesEngineGetActivity(handle, &stats); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(stats.activeTime, pKmdSysManager->mockActivityCounters[engineGroupIndex]); EXPECT_EQ(stats.timestamp, pKmdSysManager->mockActivityTimeStamps[engineGroupIndex]); engineGroupIndex++; } } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/events/000077500000000000000000000000001422164147700303415ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/events/CMakeLists.txt000066400000000000000000000003041422164147700330760ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) add_subdirectories() compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/events/linux/000077500000000000000000000000001422164147700315005ustar00rootroot00000000000000CMakeLists.txt000066400000000000000000000012521422164147700341610ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/events/linux# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_TESTS_TOOLS_SYSMAN_EVENTS_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}test_zes_events.cpp ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}mock_events.h ) if(NEO_ENABLE_i915_PRELIM_DETECTION AND("${BRANCH_TYPE}" STREQUAL "")) list(REMOVE_ITEM L0_TESTS_TOOLS_SYSMAN_EVENTS_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/test_zes_events.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_events.h ) endif() if(UNIX) target_sources(${TARGET_NAME} PRIVATE ${L0_TESTS_TOOLS_SYSMAN_EVENTS_LINUX} ) endif() mock_events.h000066400000000000000000000057711422164147700341210ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/events/linux/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/tools/source/sysman/events/events_imp.h" #include "level_zero/tools/source/sysman/events/linux/os_events_imp.h" namespace L0 { namespace ult { const std::string ueventWedgedFile("/var/lib/libze_intel_gpu/wedged_file"); const std::string ueventDetachFile("/var/lib/libze_intel_gpu/remove-pci-0000_03_00_0"); const std::string ueventAttachFile("/var/lib/libze_intel_gpu/add-pci-0000_03_00_0"); const std::string deviceDir("device"); class EventsFsAccess : public FsAccess {}; template <> struct Mock : public EventsFsAccess { ze_result_t getValReturnValAsOne(const std::string file, uint32_t &val) { if (file.compare(ueventWedgedFile) == 0) { val = 1; } else if (file.compare(ueventDetachFile) == 0) { val = 1; } else if (file.compare(ueventAttachFile) == 0) { val = 1; } else { return ZE_RESULT_ERROR_NOT_AVAILABLE; } return ZE_RESULT_SUCCESS; } ze_result_t getValReturnValAsZero(const std::string file, uint32_t &val) { if (file.compare(ueventWedgedFile) == 0) { val = 0; } else if (file.compare(ueventDetachFile) == 0) { val = 0; } else if (file.compare(ueventAttachFile) == 0) { val = 0; } else { return ZE_RESULT_ERROR_NOT_AVAILABLE; } return ZE_RESULT_SUCCESS; } ze_result_t getValFileNotFound(const std::string file, uint32_t &val) { return ZE_RESULT_ERROR_NOT_AVAILABLE; } ze_result_t getValFileInsufficientPermissions(const std::string file, uint32_t &val) { return ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS; } Mock() = default; MOCK_METHOD(ze_result_t, read, (const std::string file, uint32_t &val), (override)); MOCK_METHOD(ze_result_t, canWrite, (const std::string file), (override)); }; class EventsSysfsAccess : public SysfsAccess {}; template <> struct Mock : public EventsSysfsAccess { MOCK_METHOD(ze_result_t, readSymLink, (const std::string file, std::string &buf), (override)); ze_result_t getValStringSymLinkSuccess(const std::string file, std::string &val) { if (file.compare(deviceDir) == 0) { val = "/sys/devices/pci0000:00/0000:00:01.0/0000:01:00.0/0000:02:01.0/0000:03:00.0"; return ZE_RESULT_SUCCESS; } return ZE_RESULT_ERROR_NOT_AVAILABLE; } ze_result_t getValStringSymLinkFailure(const std::string file, std::string &val) { return ZE_RESULT_ERROR_NOT_AVAILABLE; } Mock() = default; }; class PublicLinuxEventsImp : public L0::LinuxEventsImp { public: PublicLinuxEventsImp(OsSysman *pOsSysman) : LinuxEventsImp(pOsSysman) {} using LinuxEventsImp::getPciIdPathTag; using LinuxEventsImp::memHealthAtEventRegister; using LinuxEventsImp::pciIdPathTag; }; } // namespace ult } // namespace L0 test_zes_events.cpp000066400000000000000000000363261422164147700353630ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/events/linux/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/global_operations/global_operations_imp.h" #include "level_zero/tools/test/unit_tests/sources/sysman/linux/mock_sysman_fixture.h" #include "mock_events.h" extern bool sysmanUltsEnable; using ::testing::Matcher; namespace L0 { namespace ult { class SysmanEventsFixture : public SysmanDeviceFixture { protected: std::unique_ptr> pFsAccess; FsAccess *pFsAccessOriginal = nullptr; OsEvents *pOsEventsPrev = nullptr; L0::EventsImp *pEventsImp; GlobalOperations *pGlobalOperationsOriginal = nullptr; std::unique_ptr pGlobalOperations; std::unique_ptr> pSysfsAccess; SysfsAccess *pSysfsAccessOriginal = nullptr; void SetUp() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanDeviceFixture::SetUp(); pFsAccessOriginal = pLinuxSysmanImp->pFsAccess; pFsAccess = std::make_unique>>(); pLinuxSysmanImp->pFsAccess = pFsAccess.get(); pEventsImp = static_cast(pSysmanDeviceImp->pEvents); pOsEventsPrev = pEventsImp->pOsEvents; pEventsImp->pOsEvents = nullptr; pGlobalOperations = std::make_unique(pLinuxSysmanImp); pGlobalOperationsOriginal = pSysmanDeviceImp->pGlobalOperations; pSysmanDeviceImp->pGlobalOperations = pGlobalOperations.get(); pSysmanDeviceImp->pGlobalOperations->init(); pSysfsAccessOriginal = pLinuxSysmanImp->pSysfsAccess; pSysfsAccess = std::make_unique>>(); pLinuxSysmanImp->pSysfsAccess = pSysfsAccess.get(); ON_CALL(*pSysfsAccess.get(), readSymLink(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getValStringSymLinkSuccess)); pEventsImp->init(); } void TearDown() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } if (nullptr != pEventsImp->pOsEvents) { delete pEventsImp->pOsEvents; } pEventsImp->pOsEvents = pOsEventsPrev; pEventsImp = nullptr; pLinuxSysmanImp->pSysfsAccess = pSysfsAccessOriginal; pLinuxSysmanImp->pFsAccess = pFsAccessOriginal; pSysmanDeviceImp->pGlobalOperations = pGlobalOperationsOriginal; SysmanDeviceFixture::TearDown(); } }; TEST_F(SysmanEventsFixture, GivenValidDeviceHandleWhenListeningForResetRequiredEventsThenEventListenAPIReturnsAfterReceivingEventWithinTimeout) { EXPECT_EQ(ZE_RESULT_SUCCESS, zesDeviceEventRegister(device->toHandle(), ZES_EVENT_TYPE_FLAG_DEVICE_RESET_REQUIRED)); ON_CALL(*pFsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(pFsAccess.get(), &Mock::getValReturnValAsOne)); zes_device_handle_t *phDevices = new zes_device_handle_t[1]; phDevices[0] = device->toHandle(); uint32_t numDeviceEvents = 0; zes_event_type_flags_t *pDeviceEvents = new zes_event_type_flags_t[1]; EXPECT_EQ(ZE_RESULT_SUCCESS, zesDriverEventListen(driverHandle->toHandle(), 1u, 1u, phDevices, &numDeviceEvents, pDeviceEvents)); EXPECT_EQ(1u, numDeviceEvents); EXPECT_EQ(ZES_EVENT_TYPE_FLAG_DEVICE_RESET_REQUIRED, pDeviceEvents[0]); delete[] phDevices; delete[] pDeviceEvents; } TEST_F(SysmanEventsFixture, GivenValidDeviceHandleWhenListeningForResetRequiredEventsThenEventListenAPIWaitForTimeoutIfEventNotReceived) { EXPECT_EQ(ZE_RESULT_SUCCESS, zesDeviceEventRegister(device->toHandle(), ZES_EVENT_TYPE_FLAG_DEVICE_RESET_REQUIRED)); ON_CALL(*pFsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(pFsAccess.get(), &Mock::getValReturnValAsZero)); zes_device_handle_t *phDevices = new zes_device_handle_t[1]; phDevices[0] = device->toHandle(); uint32_t numDeviceEvents = 0; zes_event_type_flags_t *pDeviceEvents = new zes_event_type_flags_t[1]; EXPECT_EQ(ZE_RESULT_SUCCESS, zesDriverEventListen(driverHandle->toHandle(), 1u, 1u, phDevices, &numDeviceEvents, pDeviceEvents)); EXPECT_EQ(0u, numDeviceEvents); ON_CALL(*pFsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(pFsAccess.get(), &Mock::getValFileNotFound)); EXPECT_EQ(ZE_RESULT_SUCCESS, zesDriverEventListen(driverHandle->toHandle(), 1u, 1u, phDevices, &numDeviceEvents, pDeviceEvents)); EXPECT_EQ(0u, numDeviceEvents); delete[] phDevices; delete[] pDeviceEvents; } TEST_F(SysmanEventsFixture, GivenValidDeviceHandleWhenListeningForCurrentlyUnsupportedEventsThenEventListenAPIWaitForTimeoutIfEventNotReceived) { EXPECT_EQ(ZE_RESULT_SUCCESS, zesDeviceEventRegister(device->toHandle(), ZES_EVENT_TYPE_FLAG_TEMP_THRESHOLD2)); ON_CALL(*pFsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(pFsAccess.get(), &Mock::getValReturnValAsOne)); zes_device_handle_t *phDevices = new zes_device_handle_t[1]; phDevices[0] = device->toHandle(); uint32_t numDeviceEvents = 0; zes_event_type_flags_t *pDeviceEvents = new zes_event_type_flags_t[1]; EXPECT_EQ(ZE_RESULT_SUCCESS, zesDriverEventListen(driverHandle->toHandle(), 1u, 1u, phDevices, &numDeviceEvents, pDeviceEvents)); EXPECT_EQ(0u, numDeviceEvents); delete[] phDevices; delete[] pDeviceEvents; } TEST_F(SysmanEventsFixture, GivenReadSymLinkCallFailsWhenGettingPCIBDFThenEmptyPciIdPathTagReceived) { ON_CALL(*pSysfsAccess.get(), readSymLink(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getValStringSymLinkFailure)); PublicLinuxEventsImp linuxEventImp(pOsSysman); EXPECT_TRUE(linuxEventImp.pciIdPathTag.empty()); } TEST_F(SysmanEventsFixture, GivenValidDeviceHandleWhenListeningForDeviceDetachEventsThenEventListenAPIReturnsAfterReceivingEventWithinTimeout) { EXPECT_EQ(ZE_RESULT_SUCCESS, zesDeviceEventRegister(device->toHandle(), ZES_EVENT_TYPE_FLAG_DEVICE_DETACH)); ON_CALL(*pFsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(pFsAccess.get(), &Mock::getValReturnValAsOne)); zes_device_handle_t *phDevices = new zes_device_handle_t[1]; phDevices[0] = device->toHandle(); uint32_t numDeviceEvents = 0; zes_event_type_flags_t *pDeviceEvents = new zes_event_type_flags_t[1]; EXPECT_EQ(ZE_RESULT_SUCCESS, zesDriverEventListen(driverHandle->toHandle(), 1u, 1u, phDevices, &numDeviceEvents, pDeviceEvents)); EXPECT_EQ(1u, numDeviceEvents); EXPECT_EQ(ZES_EVENT_TYPE_FLAG_DEVICE_DETACH, pDeviceEvents[0]); delete[] phDevices; delete[] pDeviceEvents; } TEST_F(SysmanEventsFixture, GivenValidDeviceHandleWhenListeningForDeviceDetachEventsThenAfterReceivingEventRegisterEventAgainToReceiveEvent) { EXPECT_EQ(ZE_RESULT_SUCCESS, zesDeviceEventRegister(device->toHandle(), ZES_EVENT_TYPE_FLAG_DEVICE_DETACH)); ON_CALL(*pFsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(pFsAccess.get(), &Mock::getValReturnValAsOne)); zes_device_handle_t *phDevices = new zes_device_handle_t[1]; phDevices[0] = device->toHandle(); uint32_t numDeviceEvents = 0; zes_event_type_flags_t *pDeviceEvents = new zes_event_type_flags_t[1]; EXPECT_EQ(ZE_RESULT_SUCCESS, zesDriverEventListen(driverHandle->toHandle(), 1u, 1u, phDevices, &numDeviceEvents, pDeviceEvents)); EXPECT_EQ(1u, numDeviceEvents); EXPECT_EQ(ZES_EVENT_TYPE_FLAG_DEVICE_DETACH, pDeviceEvents[0]); numDeviceEvents = 0; EXPECT_EQ(ZE_RESULT_SUCCESS, zesDriverEventListen(driverHandle->toHandle(), 1u, 1u, phDevices, &numDeviceEvents, pDeviceEvents)); EXPECT_EQ(0u, numDeviceEvents); EXPECT_EQ(ZE_RESULT_SUCCESS, zesDeviceEventRegister(device->toHandle(), ZES_EVENT_TYPE_FLAG_DEVICE_DETACH)); EXPECT_EQ(ZE_RESULT_SUCCESS, zesDriverEventListen(driverHandle->toHandle(), 1u, 1u, phDevices, &numDeviceEvents, pDeviceEvents)); EXPECT_EQ(1u, numDeviceEvents); EXPECT_EQ(ZES_EVENT_TYPE_FLAG_DEVICE_DETACH, pDeviceEvents[0]); delete[] phDevices; delete[] pDeviceEvents; } TEST_F(SysmanEventsFixture, GivenValidDeviceHandleWhenListeningForDeviceDetachEventsThenEventListenAPIWaitForTimeoutIfEventNotReceived) { EXPECT_EQ(ZE_RESULT_SUCCESS, zesDeviceEventRegister(device->toHandle(), ZES_EVENT_TYPE_FLAG_DEVICE_DETACH)); ON_CALL(*pFsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(pFsAccess.get(), &Mock::getValReturnValAsZero)); zes_device_handle_t *phDevices = new zes_device_handle_t[1]; phDevices[0] = device->toHandle(); uint32_t numDeviceEvents = 0; zes_event_type_flags_t *pDeviceEvents = new zes_event_type_flags_t[1]; EXPECT_EQ(ZE_RESULT_SUCCESS, zesDriverEventListen(driverHandle->toHandle(), 1u, 1u, phDevices, &numDeviceEvents, pDeviceEvents)); EXPECT_EQ(0u, numDeviceEvents); ON_CALL(*pFsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(pFsAccess.get(), &Mock::getValFileNotFound)); EXPECT_EQ(ZE_RESULT_SUCCESS, zesDriverEventListen(driverHandle->toHandle(), 1u, 1u, phDevices, &numDeviceEvents, pDeviceEvents)); EXPECT_EQ(0u, numDeviceEvents); delete[] phDevices; delete[] pDeviceEvents; } TEST_F(SysmanEventsFixture, GivenValidDeviceHandleWhenListeningForDeviceAttachEventsThenEventListenAPIReturnsAfterReceivingEventWithinTimeout) { EXPECT_EQ(ZE_RESULT_SUCCESS, zesDeviceEventRegister(device->toHandle(), ZES_EVENT_TYPE_FLAG_DEVICE_ATTACH)); ON_CALL(*pFsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(pFsAccess.get(), &Mock::getValReturnValAsOne)); zes_device_handle_t *phDevices = new zes_device_handle_t[1]; phDevices[0] = device->toHandle(); uint32_t numDeviceEvents = 0; zes_event_type_flags_t *pDeviceEvents = new zes_event_type_flags_t[1]; EXPECT_EQ(ZE_RESULT_SUCCESS, zesDriverEventListen(driverHandle->toHandle(), 1u, 1u, phDevices, &numDeviceEvents, pDeviceEvents)); EXPECT_EQ(1u, numDeviceEvents); EXPECT_EQ(ZES_EVENT_TYPE_FLAG_DEVICE_ATTACH, pDeviceEvents[0]); delete[] phDevices; delete[] pDeviceEvents; } TEST_F(SysmanEventsFixture, GivenValidDeviceHandleWhenListeningForDeviceAttachEventsThenEventListenAPIWaitForTimeoutIfEventNotReceived) { EXPECT_EQ(ZE_RESULT_SUCCESS, zesDeviceEventRegister(device->toHandle(), ZES_EVENT_TYPE_FLAG_DEVICE_ATTACH)); ON_CALL(*pFsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(pFsAccess.get(), &Mock::getValReturnValAsZero)); zes_device_handle_t *phDevices = new zes_device_handle_t[1]; phDevices[0] = device->toHandle(); uint32_t numDeviceEvents = 0; zes_event_type_flags_t *pDeviceEvents = new zes_event_type_flags_t[1]; EXPECT_EQ(ZE_RESULT_SUCCESS, zesDriverEventListen(driverHandle->toHandle(), 1u, 1u, phDevices, &numDeviceEvents, pDeviceEvents)); EXPECT_EQ(0u, numDeviceEvents); ON_CALL(*pFsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(pFsAccess.get(), &Mock::getValFileNotFound)); EXPECT_EQ(ZE_RESULT_SUCCESS, zesDriverEventListen(driverHandle->toHandle(), 1u, 1u, phDevices, &numDeviceEvents, pDeviceEvents)); EXPECT_EQ(0u, numDeviceEvents); delete[] phDevices; delete[] pDeviceEvents; } TEST_F(SysmanEventsFixture, GivenValidDeviceHandleWhenListeningForMemHealthEventsThenEventListenAPIReturnsAfterReceivingEventWithinTimeout) { PublicLinuxEventsImp *pLinuxEventsImp = new PublicLinuxEventsImp(pOsSysman); pLinuxEventsImp->eventRegister(ZES_EVENT_TYPE_FLAG_MEM_HEALTH); pLinuxEventsImp->memHealthAtEventRegister = ZES_MEM_HEALTH_OK; zes_event_type_flags_t events = 0; uint32_t timeout = 1u; EXPECT_TRUE(pLinuxEventsImp->eventListen(events, timeout)); EXPECT_EQ(events, ZES_EVENT_TYPE_FLAG_MEM_HEALTH); delete pLinuxEventsImp; } TEST_F(SysmanEventsFixture, GivenValidDeviceHandleWhenListeningForMemHealthEventsAndMemHealthDidntOccurThenEventListenAPIReturnsWithinTimeout) { EXPECT_EQ(ZE_RESULT_SUCCESS, zesDeviceEventRegister(device->toHandle(), ZES_EVENT_TYPE_FLAG_MEM_HEALTH)); zes_device_handle_t *phDevices = new zes_device_handle_t[1]; phDevices[0] = device->toHandle(); uint32_t numDeviceEvents = 0; zes_event_type_flags_t *pDeviceEvents = new zes_event_type_flags_t[1]; EXPECT_EQ(ZE_RESULT_SUCCESS, zesDriverEventListen(driverHandle->toHandle(), 1u, 1u, phDevices, &numDeviceEvents, pDeviceEvents)); EXPECT_EQ(0u, numDeviceEvents); delete[] phDevices; delete[] pDeviceEvents; } TEST_F(SysmanEventsFixture, GivenValidDeviceHandleWhenListeningForAListOfEventsThenEventRegisterAPIReturnsProperErrorCodeInCaseEventsAreInvalid) { zes_event_type_flags_t events1 = 0x7ffe; EXPECT_EQ(ZE_RESULT_SUCCESS, zesDeviceEventRegister(device->toHandle(), events1)); zes_event_type_flags_t events2 = 0x1e240; EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ENUMERATION, zesDeviceEventRegister(device->toHandle(), events2)); } TEST_F(SysmanEventsFixture, GivenValidDeviceHandleWhenListeningForResetRequiredEventsThenEventListenExAPIReturnsAfterReceivingEventWithinTimeout) { EXPECT_EQ(ZE_RESULT_SUCCESS, zesDeviceEventRegister(device->toHandle(), ZES_EVENT_TYPE_FLAG_DEVICE_RESET_REQUIRED)); ON_CALL(*pFsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(pFsAccess.get(), &Mock::getValReturnValAsOne)); zes_device_handle_t *phDevices = new zes_device_handle_t[1]; phDevices[0] = device->toHandle(); uint32_t numDeviceEvents = 0; zes_event_type_flags_t *pDeviceEvents = new zes_event_type_flags_t[1]; EXPECT_EQ(ZE_RESULT_SUCCESS, zesDriverEventListenEx(driverHandle->toHandle(), 1u, 1u, phDevices, &numDeviceEvents, pDeviceEvents)); EXPECT_EQ(1u, numDeviceEvents); EXPECT_EQ(ZES_EVENT_TYPE_FLAG_DEVICE_RESET_REQUIRED, pDeviceEvents[0]); delete[] phDevices; delete[] pDeviceEvents; } TEST_F(SysmanEventsFixture, GivenValidDeviceHandleWhenListeningForResetRequiredEventsThenEventListenExAPIWaitForTimeoutIfEventNotReceived) { EXPECT_EQ(ZE_RESULT_SUCCESS, zesDeviceEventRegister(device->toHandle(), ZES_EVENT_TYPE_FLAG_DEVICE_RESET_REQUIRED)); ON_CALL(*pFsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(pFsAccess.get(), &Mock::getValReturnValAsZero)); zes_device_handle_t *phDevices = new zes_device_handle_t[1]; phDevices[0] = device->toHandle(); uint32_t numDeviceEvents = 0; zes_event_type_flags_t *pDeviceEvents = new zes_event_type_flags_t[1]; EXPECT_EQ(ZE_RESULT_SUCCESS, zesDriverEventListenEx(driverHandle->toHandle(), 1u, 1u, phDevices, &numDeviceEvents, pDeviceEvents)); EXPECT_EQ(0u, numDeviceEvents); ON_CALL(*pFsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(pFsAccess.get(), &Mock::getValFileNotFound)); EXPECT_EQ(ZE_RESULT_SUCCESS, zesDriverEventListenEx(driverHandle->toHandle(), 1u, 1u, phDevices, &numDeviceEvents, pDeviceEvents)); EXPECT_EQ(0u, numDeviceEvents); delete[] phDevices; delete[] pDeviceEvents; } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/events/windows/000077500000000000000000000000001422164147700320335ustar00rootroot00000000000000CMakeLists.txt000066400000000000000000000004311422164147700345120ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/events/windows# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_zes_events.cpp ) endif() test_zes_events.cpp000066400000000000000000000117101422164147700357040ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/events/windows/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/events/events_imp.h" #include "level_zero/tools/source/sysman/events/windows/os_events_imp.h" #include "level_zero/tools/source/sysman/global_operations/global_operations_imp.h" #include "level_zero/tools/test/unit_tests/sources/sysman/windows/mock_kmd_sys_manager.h" #include "level_zero/tools/test/unit_tests/sources/sysman/windows/mock_sysman_fixture.h" extern bool sysmanUltsEnable; using ::testing::Matcher; namespace L0 { namespace ult { class SysmanEventsFixture : public SysmanDeviceFixture { protected: std::unique_ptr> pKmdSysManager; std::unique_ptr pGlobalOperations; OsEvents *pOsEventsPrev = nullptr; L0::EventsImp *pEventsImp; KmdSysManager *pOriginalKmdSysManager = nullptr; GlobalOperations *pGlobalOperationsOld = nullptr; void SetUp() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanDeviceFixture::SetUp(); } void init(bool allowSetCalls) { pKmdSysManager.reset(new Mock); pKmdSysManager->allowSetCalls = allowSetCalls; EXPECT_CALL(*pKmdSysManager, escape(_, _, _, _, _)) .WillRepeatedly(::testing::Invoke(pKmdSysManager.get(), &Mock::mock_escape)); pOriginalKmdSysManager = pWddmSysmanImp->pKmdSysManager; pWddmSysmanImp->pKmdSysManager = pKmdSysManager.get(); pEventsImp = static_cast(pSysmanDeviceImp->pEvents); pOsEventsPrev = pEventsImp->pOsEvents; pEventsImp->pOsEvents = nullptr; pGlobalOperations = std::make_unique(pWddmSysmanImp); pGlobalOperationsOld = pSysmanDeviceImp->pGlobalOperations; pSysmanDeviceImp->pGlobalOperations = pGlobalOperations.get(); pSysmanDeviceImp->pGlobalOperations->init(); pEventsImp->init(); } void TearDown() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } if (nullptr != pEventsImp->pOsEvents) { delete pEventsImp->pOsEvents; } pEventsImp->pOsEvents = pOsEventsPrev; pEventsImp = nullptr; pSysmanDeviceImp->pGlobalOperations = pGlobalOperationsOld; pWddmSysmanImp->pKmdSysManager = pOriginalKmdSysManager; SysmanDeviceFixture::TearDown(); } }; TEST_F(SysmanEventsFixture, GivenValidDeviceHandleWhenListeningForResetRequiredEventsThenEventListenAPIReturnsAfterTimingOutWithNoEvent) { init(true); EXPECT_EQ(ZE_RESULT_SUCCESS, zesDeviceEventRegister(device->toHandle(), ZES_EVENT_TYPE_FLAG_DEVICE_DETACH)); zes_device_handle_t *phDevices = new zes_device_handle_t[1]; phDevices[0] = device->toHandle(); uint32_t numDeviceEvents = 0; zes_event_type_flags_t *pDeviceEvents = new zes_event_type_flags_t[1]; EXPECT_EQ(ZE_RESULT_SUCCESS, zesDriverEventListen(driverHandle->toHandle(), 100u, 1u, phDevices, &numDeviceEvents, pDeviceEvents)); EXPECT_EQ(1u, numDeviceEvents); delete[] phDevices; delete[] pDeviceEvents; } TEST_F(SysmanEventsFixture, GivenValidDeviceHandleWhenListeningForResetRequiredEventsThenEventListenAPIReturnsAfterReceivingEventOnInfiniteWait) { init(true); EXPECT_EQ(ZE_RESULT_SUCCESS, zesDeviceEventRegister(device->toHandle(), ZES_EVENT_TYPE_FLAG_DEVICE_DETACH)); zes_device_handle_t *phDevices = new zes_device_handle_t[1]; phDevices[0] = device->toHandle(); uint32_t numDeviceEvents = 0; zes_event_type_flags_t *pDeviceEvents = new zes_event_type_flags_t[1]; pKmdSysManager->signalEvent(ZES_EVENT_TYPE_FLAG_DEVICE_DETACH); EXPECT_EQ(ZE_RESULT_SUCCESS, zesDriverEventListen(driverHandle->toHandle(), INFINITE, 1u, phDevices, &numDeviceEvents, pDeviceEvents)); EXPECT_EQ(1u, numDeviceEvents); delete[] phDevices; delete[] pDeviceEvents; } TEST_F(SysmanEventsFixture, GivenValidDeviceHandleWhenListeningForResetRequiredEventsThenEventListenAPIReturnsAfterReceivingEventOnInfiniteWaitMultipleTimes) { init(true); EXPECT_EQ(ZE_RESULT_SUCCESS, zesDeviceEventRegister(device->toHandle(), ZES_EVENT_TYPE_FLAG_DEVICE_DETACH)); zes_device_handle_t *phDevices = new zes_device_handle_t[1]; phDevices[0] = device->toHandle(); uint32_t numDeviceEvents = 0; zes_event_type_flags_t *pDeviceEvents = new zes_event_type_flags_t[1]; pKmdSysManager->signalEvent(ZES_EVENT_TYPE_FLAG_DEVICE_DETACH); EXPECT_EQ(ZE_RESULT_SUCCESS, zesDriverEventListen(driverHandle->toHandle(), INFINITE, 1u, phDevices, &numDeviceEvents, pDeviceEvents)); EXPECT_EQ(1u, numDeviceEvents); pKmdSysManager->signalEvent(ZES_EVENT_TYPE_FLAG_DEVICE_DETACH); EXPECT_EQ(ZE_RESULT_SUCCESS, zesDriverEventListen(driverHandle->toHandle(), INFINITE, 1u, phDevices, &numDeviceEvents, pDeviceEvents)); EXPECT_EQ(1u, numDeviceEvents); delete[] phDevices; delete[] pDeviceEvents; } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/fabric_port/000077500000000000000000000000001422164147700313275ustar00rootroot00000000000000CMakeLists.txt000066400000000000000000000003051422164147700340060ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/fabric_port# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) add_subdirectories() compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/fabric_port/linux/000077500000000000000000000000001422164147700324665ustar00rootroot00000000000000CMakeLists.txt000066400000000000000000000013471422164147700351540ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/fabric_port/linux# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_TESTS_TOOLS_SYSMAN_FABRICPORT_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}test_zes_fabric_port.cpp ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}mock_fabric_device.h ) if(NEO_ENABLE_i915_PRELIM_DETECTION AND("${BRANCH_TYPE}" STREQUAL "")) list(REMOVE_ITEM L0_TESTS_TOOLS_SYSMAN_FABRICPORT_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/test_zes_fabric_port.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_fabric_device.h ) endif() if(UNIX) target_sources(${TARGET_NAME} PRIVATE ${L0_TESTS_TOOLS_SYSMAN_FABRICPORT_LINUX} ) add_subdirectories() endif() mock_fabric_device.h000066400000000000000000000010221422164147700363310ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/fabric_port/linux/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "sysman/fabric_port/fabric_port.h" namespace L0 { namespace ult { template <> struct Mock : public FabricDevice { MOCK_METHOD(uint32_t, getNumPorts, (), (override)); MOCK_METHOD(OsFabricDevice *, getOsFabricDevice, (), (override)); Mock() = default; ~Mock() override = default; }; } // namespace ult } // namespace L0 test_zes_fabric_port.cpp000066400000000000000000000223731422164147700373340ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/fabric_port/linux/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/test/unit_tests/sources/sysman/fabric_port/linux/mock_fabric_device.h" #include "level_zero/tools/test/unit_tests/sources/sysman/linux/mock_sysman_fixture.h" #include "gmock/gmock.h" #include "gtest/gtest.h" #include extern bool sysmanUltsEnable; using ::testing::Return; namespace L0 { namespace ult { class ZesFabricPortFixture : public SysmanDeviceFixture { protected: static uint32_t numPorts; void SetUp() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanDeviceFixture::SetUp(); FabricPortHandleContext *pFabricPortHandleContext = pSysmanDeviceImp->pFabricPortHandleContext; if (nullptr != pFabricPortHandleContext->pFabricDevice) { for (FabricPort *pFabricPort : pFabricPortHandleContext->handleList) { delete pFabricPort; } pFabricPortHandleContext->handleList.clear(); delete pFabricPortHandleContext->pFabricDevice; pFabricPortHandleContext->pFabricDevice = nullptr; } Mock *mockFabricDevice = new NiceMock>; ON_CALL(*mockFabricDevice, getNumPorts()) .WillByDefault(Return(numPorts)); pFabricPortHandleContext->pFabricDevice = mockFabricDevice; pFabricPortHandleContext->init(); } void TearDown() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanDeviceFixture::TearDown(); } }; uint32_t ZesFabricPortFixture::numPorts = 2U; TEST_F(ZesFabricPortFixture, GivenPortCountZeroWhenCallingZesFabricPortGetThenCountIsReturnedAndVerifyZesFabricPortGetCallSucceeds) { uint32_t count = 0U; ze_result_t result = zesDeviceEnumFabricPorts(device, &count, NULL); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(count, ZesFabricPortFixture::numPorts); } TEST_F(ZesFabricPortFixture, GivenPortCountZeroAndValidHandlePtrWhenCallingZesFabricPortGetThenCountIsReturnedAndNoHandlesReturnedAndVerifyZesFabricPortGetCallSucceeds) { uint32_t count = 0U; zes_fabric_port_handle_t handle = static_cast(0UL); ze_result_t result = zesDeviceEnumFabricPorts(device, &count, &handle); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(count, ZesFabricPortFixture::numPorts); EXPECT_EQ(handle, static_cast(0UL)); } TEST_F(ZesFabricPortFixture, GivenPortCountCorrectWhenCallingZesFabricPortGetThenCountHandlesAreReturnedAndAndVerifyZesFabricPortGetCallSucceeds) { uint32_t count = ZesFabricPortFixture::numPorts; zes_fabric_port_handle_t hPorts[ZesFabricPortFixture::numPorts]; ze_result_t result = zesDeviceEnumFabricPorts(device, &count, hPorts); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(count, ZesFabricPortFixture::numPorts); } TEST_F(ZesFabricPortFixture, GivenPortCountGreaterThanPortsWhenCallingZesFabricPortGetThenCorrectCountisReturnedAndAndVerifyZesFabricPortGetCallSucceeds) { uint32_t count = ZesFabricPortFixture::numPorts + 1U; zes_fabric_port_handle_t hPorts[ZesFabricPortFixture::numPorts + 1U]; ze_result_t result = zesDeviceEnumFabricPorts(device, &count, hPorts); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(count, ZesFabricPortFixture::numPorts); } TEST_F(ZesFabricPortFixture, GivenPortCounLessThanPortsWhenCallingZesFabricPortGetThenCountLessTanPortsHandlesAreReturned) { uint32_t count = ZesFabricPortFixture::numPorts - 1U; zes_fabric_port_handle_t hPorts[ZesFabricPortFixture::numPorts - 1U]; ze_result_t result = zesDeviceEnumFabricPorts(device, &count, hPorts); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(count, ZesFabricPortFixture::numPorts - 1U); } TEST_F(ZesFabricPortFixture, GivenValidFabricPortHandleWhenCallingZesFabricPortGetPropertiesThenZesFabricPortGetPropertiesCallSucceeds) { uint32_t count = 1U; zes_fabric_port_handle_t hPorts[1U]; ze_result_t result = zesDeviceEnumFabricPorts(device, &count, hPorts); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(count, 1U); zes_fabric_port_properties_t properties; // Initialize values properties.onSubdevice = true; properties.subdeviceId = std::numeric_limits::max(); std::memset(properties.model, std::numeric_limits::max(), ZES_MAX_FABRIC_PORT_MODEL_SIZE); properties.portId.fabricId = std::numeric_limits::max(); properties.portId.attachId = std::numeric_limits::max(); properties.portId.portNumber = std::numeric_limits::max(); properties.maxRxSpeed.bitRate = std::numeric_limits::max(); properties.maxRxSpeed.width = std::numeric_limits::max(); properties.maxTxSpeed.bitRate = std::numeric_limits::max(); properties.maxTxSpeed.width = std::numeric_limits::max(); result = zesFabricPortGetProperties(hPorts[0], &properties); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_FALSE(properties.onSubdevice); EXPECT_EQ(0L, properties.subdeviceId); EXPECT_STREQ("EXAMPLE", properties.model); EXPECT_EQ(0U, properties.portId.fabricId); EXPECT_EQ(0U, properties.portId.attachId); EXPECT_EQ(0U, properties.portId.portNumber); EXPECT_EQ(0L, properties.maxRxSpeed.bitRate); EXPECT_EQ(0, properties.maxRxSpeed.width); EXPECT_EQ(0L, properties.maxTxSpeed.bitRate); EXPECT_EQ(0, properties.maxTxSpeed.width); } TEST_F(ZesFabricPortFixture, GivenValidFabricPortHandleWhenCallingZesFabricPortGetLinkTypeThenZesFabricPortGetLinkTypeCallSucceeds) { uint32_t count = 1U; zes_fabric_port_handle_t hPorts[1U]; ze_result_t result = zesDeviceEnumFabricPorts(device, &count, hPorts); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(count, 1U); zes_fabric_link_type_t linkType; result = zesFabricPortGetLinkType(hPorts[0], &linkType); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_STREQ("SAMPLE LINK, VERBOSE", linkType.desc); } TEST_F(ZesFabricPortFixture, GivenValidFabricPortHandleWhenCallingZesFabricPortGetConfigThenZesFabricPortGetConfigCallSucceeds) { uint32_t count = 1U; zes_fabric_port_handle_t hPorts[1U]; ze_result_t result = zesDeviceEnumFabricPorts(device, &count, hPorts); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(count, 1U); zes_fabric_port_config_t getConfig = {.stype = ZES_STRUCTURE_TYPE_FABRIC_PORT_PROPERTIES, .pNext = nullptr, .enabled = true, .beaconing = true}; result = zesFabricPortGetConfig(hPorts[0], &getConfig); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_FALSE(getConfig.enabled); EXPECT_FALSE(getConfig.beaconing); } TEST_F(ZesFabricPortFixture, GivenValidFabricPortHandleWhenCallingZesFabricPortSetConfigThenZesFabricPortGetConfigCallSucceeds) { uint32_t count = 1U; zes_fabric_port_handle_t hPorts[1U]; ze_result_t result = zesDeviceEnumFabricPorts(device, &count, hPorts); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(count, 1U); zes_fabric_port_config_t setConfig = {.stype = ZES_STRUCTURE_TYPE_FABRIC_PORT_PROPERTIES, .pNext = nullptr, .enabled = true, .beaconing = false}; result = zesFabricPortSetConfig(hPorts[0U], &setConfig); EXPECT_EQ(ZE_RESULT_SUCCESS, result); zes_fabric_port_config_t getConfig = {.stype = ZES_STRUCTURE_TYPE_FABRIC_PORT_PROPERTIES, .pNext = nullptr, .enabled = false, .beaconing = true}; result = zesFabricPortGetConfig(hPorts[0], &getConfig); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_TRUE(getConfig.enabled); EXPECT_FALSE(getConfig.beaconing); } TEST_F(ZesFabricPortFixture, GivenValidFabricPortHandleWhenCallingZesFabricPortGetStateThenZesFabricPortGetStateCallSucceeds) { uint32_t count = 1U; zes_fabric_port_handle_t hPorts[1U]; ze_result_t result = zesDeviceEnumFabricPorts(device, &count, hPorts); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(count, 1U); zes_fabric_port_state_t state; result = zesFabricPortGetState(hPorts[0], &state); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(0U, state.status); EXPECT_EQ(0U, state.qualityIssues); EXPECT_EQ(0U, state.failureReasons); EXPECT_EQ(0U, state.remotePortId.fabricId); EXPECT_EQ(0U, state.remotePortId.attachId); EXPECT_EQ(0U, state.remotePortId.portNumber); EXPECT_EQ(0L, state.rxSpeed.bitRate); EXPECT_EQ(0, state.rxSpeed.width); EXPECT_EQ(0L, state.txSpeed.bitRate); EXPECT_EQ(0, state.txSpeed.width); } TEST_F(ZesFabricPortFixture, GivenValidFabricPortHandleWhenCallingZesFabricPortGetThroughputThenZesFabricPortGetThroughputCallSucceeds) { uint32_t count = 1U; zes_fabric_port_handle_t hPorts[1U]; ze_result_t result = zesDeviceEnumFabricPorts(device, &count, hPorts); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(count, 1U); zes_fabric_port_throughput_t throughput; // Initialize values throughput.timestamp = 0LU; throughput.rxCounter = std::numeric_limits::max(); throughput.txCounter = std::numeric_limits::max(); result = zesFabricPortGetThroughput(hPorts[0], &throughput); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(0LU, throughput.timestamp); EXPECT_EQ(0LU, throughput.rxCounter); EXPECT_EQ(0LU, throughput.txCounter); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/fan/000077500000000000000000000000001422164147700276015ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/fan/CMakeLists.txt000066400000000000000000000003051422164147700323370ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) add_subdirectories() compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/fan/linux/000077500000000000000000000000001422164147700307405ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/fan/linux/CMakeLists.txt000066400000000000000000000004251422164147700335010ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(UNIX) target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_zes_fan.cpp ) endif() test_zes_fan.cpp000066400000000000000000000114671422164147700340620ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/fan/linux/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/fan/linux/os_fan_imp.h" #include "level_zero/tools/test/unit_tests/sources/sysman/linux/mock_sysman_fixture.h" #include "sysman/fan/fan_imp.h" extern bool sysmanUltsEnable; namespace L0 { namespace ult { constexpr uint32_t fanHandleComponentCount = 0u; class SysmanDeviceFanFixture : public SysmanDeviceFixture { protected: void SetUp() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanDeviceFixture::SetUp(); pSysmanDeviceImp->pFanHandleContext->init(); } void TearDown() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanDeviceFixture::TearDown(); } std::vector get_fan_handles(uint32_t count) { std::vector handles(count, nullptr); EXPECT_EQ(zesDeviceEnumFans(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS); return handles; } }; TEST_F(SysmanDeviceFanFixture, GivenComponentCountZeroWhenEnumeratingFanDomainsThenValidCountIsReturnedAndVerifySysmanFanGetCallSucceeds) { uint32_t count = 0; EXPECT_EQ(zesDeviceEnumFans(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, fanHandleComponentCount); } TEST_F(SysmanDeviceFanFixture, GivenInvalidComponentCountWhenEnumeratingFanDomainsThenValidCountIsReturnedAndVerifySysmanFanGetCallSucceeds) { uint32_t count = 0; EXPECT_EQ(zesDeviceEnumFans(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, fanHandleComponentCount); count = count + 1; EXPECT_EQ(zesDeviceEnumFans(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, fanHandleComponentCount); } TEST_F(SysmanDeviceFanFixture, GivenComponentCountZeroWhenEnumeratingFanDomainsThenValidFanHandlesIsReturned) { uint32_t count = 0; EXPECT_EQ(zesDeviceEnumFans(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, fanHandleComponentCount); std::vector handles(count, nullptr); EXPECT_EQ(zesDeviceEnumFans(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS); for (auto handle : handles) { EXPECT_NE(handle, nullptr); } } TEST_F(SysmanDeviceFanFixture, GivenValidFanHandleWhenGettingFanPropertiesThenCallSucceeds) { auto handles = get_fan_handles(fanHandleComponentCount); for (auto handle : handles) { zes_fan_properties_t properties; EXPECT_EQ(ZE_RESULT_SUCCESS, zesFanGetProperties(handle, &properties)); } } TEST_F(SysmanDeviceFanFixture, GivenValidFanHandleWhenGettingFanConfigThenUnsupportedIsReturned) { auto handles = get_fan_handles(fanHandleComponentCount); for (auto handle : handles) { zes_fan_config_t fanConfig; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesFanGetConfig(handle, &fanConfig)); } } TEST_F(SysmanDeviceFanFixture, GivenValidFanHandleWhenSettingDefaultModeThenUnsupportedIsReturned) { auto handles = get_fan_handles(fanHandleComponentCount); for (auto handle : handles) { EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesFanSetDefaultMode(handle)); } } TEST_F(SysmanDeviceFanFixture, GivenValidFanHandleWhenSettingFixedSpeedModeThenUnsupportedIsReturned) { auto handles = get_fan_handles(fanHandleComponentCount); for (auto handle : handles) { zes_fan_speed_t fanSpeed = {0}; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesFanSetFixedSpeedMode(handle, &fanSpeed)); } } TEST_F(SysmanDeviceFanFixture, GivenValidFanHandleWhenSettingTheSpeedTableModeThenUnsupportedIsReturned) { auto handles = get_fan_handles(fanHandleComponentCount); for (auto handle : handles) { zes_fan_speed_table_t fanSpeedTable = {0}; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesFanSetSpeedTableMode(handle, &fanSpeedTable)); } } TEST_F(SysmanDeviceFanFixture, GivenValidFanHandleWhenGettingFanSpeedWithRPMUnitThenValidFanSpeedReadingsRetrieved) { auto handles = get_fan_handles(fanHandleComponentCount); for (auto handle : handles) { zes_fan_speed_units_t unit = zes_fan_speed_units_t::ZES_FAN_SPEED_UNITS_RPM; int32_t fanSpeed = 0; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesFanGetState(handle, unit, &fanSpeed)); } } TEST_F(SysmanDeviceFanFixture, GivenValidFanHandleWhenGettingFanSpeedWithPercentUnitThenUnsupportedIsReturned) { auto handles = get_fan_handles(fanHandleComponentCount); for (auto handle : handles) { zes_fan_speed_units_t unit = zes_fan_speed_units_t::ZES_FAN_SPEED_UNITS_PERCENT; int32_t fanSpeed = 0; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesFanGetState(handle, unit, &fanSpeed)); } } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/fan/windows/000077500000000000000000000000001422164147700312735ustar00rootroot00000000000000CMakeLists.txt000066400000000000000000000005251422164147700337560ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/fan/windows# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_zes_sysman_fan.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_fan.h ) endif() compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/fan/windows/mock_fan.h000066400000000000000000000052051422164147700332230ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/test/unit_tests/mock.h" #include "level_zero/tools/test/unit_tests/sources/sysman/windows/mock_kmd_sys_manager.h" #include "sysman/fan/fan_imp.h" namespace L0 { namespace ult { class FanKmdSysManager : public Mock {}; template <> struct Mock : public FanKmdSysManager { uint32_t mockFanMaxPoints = 10; uint32_t mockFanCurrentPulses = 523436; uint32_t mockFanCurrentFanPoints = 0; void getFanProperty(KmdSysman::GfxSysmanReqHeaderIn *pRequest, KmdSysman::GfxSysmanReqHeaderOut *pResponse) override { uint8_t *pBuffer = reinterpret_cast(pResponse); pBuffer += sizeof(KmdSysman::GfxSysmanReqHeaderOut); switch (pRequest->inRequestId) { case KmdSysman::Requests::Fans::MaxFanControlPointsSupported: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockFanMaxPoints; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Fans::CurrentFanSpeed: { if (fanSupported) { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockFanCurrentPulses; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } else { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } } break; default: { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } break; } } void setFanProperty(KmdSysman::GfxSysmanReqHeaderIn *pRequest, KmdSysman::GfxSysmanReqHeaderOut *pResponse) override { uint8_t *pBuffer = reinterpret_cast(pRequest); pBuffer += sizeof(KmdSysman::GfxSysmanReqHeaderIn); switch (pRequest->inRequestId) { case KmdSysman::Requests::Fans::CurrentNumOfControlPoints: { uint32_t *pValue = reinterpret_cast(pBuffer); mockFanCurrentFanPoints = *pValue; pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; } break; default: { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } break; } } Mock() = default; ~Mock() = default; }; } // namespace ult } // namespace L0 test_zes_sysman_fan.cpp000066400000000000000000000203771422164147700360070ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/fan/windows/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/fan/windows/os_fan_imp.h" #include "level_zero/tools/test/unit_tests/sources/sysman/fan/windows/mock_fan.h" #include "level_zero/tools/test/unit_tests/sources/sysman/windows/mock_sysman_fixture.h" extern bool sysmanUltsEnable; namespace L0 { namespace ult { constexpr uint32_t fanHandleComponentCount = 1u; class SysmanDeviceFanFixture : public SysmanDeviceFixture { protected: std::unique_ptr> pKmdSysManager; KmdSysManager *pOriginalKmdSysManager = nullptr; void SetUp() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanDeviceFixture::SetUp(); } void init(bool allowSetCalls, bool fanSupported) { pKmdSysManager.reset(new Mock); pKmdSysManager->allowSetCalls = allowSetCalls; pKmdSysManager->fanSupported = fanSupported; EXPECT_CALL(*pKmdSysManager, escape(_, _, _, _, _)) .WillRepeatedly(::testing::Invoke(pKmdSysManager.get(), &Mock::mock_escape)); pOriginalKmdSysManager = pWddmSysmanImp->pKmdSysManager; pWddmSysmanImp->pKmdSysManager = pKmdSysManager.get(); for (auto handle : pSysmanDeviceImp->pFanHandleContext->handleList) { delete handle; } pSysmanDeviceImp->pFanHandleContext->handleList.clear(); pSysmanDeviceImp->pFanHandleContext->init(); } void TearDown() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanDeviceFixture::TearDown(); pWddmSysmanImp->pKmdSysManager = pOriginalKmdSysManager; } std::vector get_fan_handles() { uint32_t count = 0; EXPECT_EQ(zesDeviceEnumFans(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); std::vector handles(count, nullptr); EXPECT_EQ(zesDeviceEnumFans(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS); return handles; } }; TEST_F(SysmanDeviceFanFixture, GivenComponentCountZeroWhenEnumeratingFansThenValidCountIsReturnedAndVerifySysmanPowerGetCallSucceeds) { init(true, true); uint32_t count = 0; EXPECT_EQ(zesDeviceEnumFans(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, fanHandleComponentCount); } TEST_F(SysmanDeviceFanFixture, GivenInvalidComponentCountWhenEnumeratingFansThenValidCountIsReturnedAndVerifySysmanPowerGetCallSucceeds) { init(true, true); uint32_t count = 0; EXPECT_EQ(zesDeviceEnumFans(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, fanHandleComponentCount); count = count + 1; EXPECT_EQ(zesDeviceEnumFans(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, fanHandleComponentCount); } TEST_F(SysmanDeviceFanFixture, GivenComponentCountZeroWhenEnumeratingFansThenValidFanHandlesIsReturned) { init(true, true); uint32_t count = 0; EXPECT_EQ(zesDeviceEnumFans(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, fanHandleComponentCount); std::vector handles(count, nullptr); EXPECT_EQ(zesDeviceEnumFans(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS); for (auto handle : handles) { EXPECT_NE(handle, nullptr); } } TEST_F(SysmanDeviceFanFixture, GivenValidFanHandleWhenGettingFanPropertiesAllowSetToTrueThenCallSucceeds) { // Setting allow set calls or not init(true, true); auto handles = get_fan_handles(); for (auto handle : handles) { zes_fan_properties_t properties; ze_result_t result = zesFanGetProperties(handle, &properties); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_FALSE(properties.onSubdevice); EXPECT_EQ(properties.subdeviceId, 0); EXPECT_TRUE(properties.canControl); EXPECT_EQ(properties.maxPoints, pKmdSysManager->mockFanMaxPoints); EXPECT_EQ(properties.maxRPM, -1); EXPECT_EQ(properties.supportedModes, zes_fan_speed_mode_t::ZES_FAN_SPEED_MODE_TABLE); EXPECT_EQ(properties.supportedUnits, zes_fan_speed_units_t::ZES_FAN_SPEED_UNITS_PERCENT); } } TEST_F(SysmanDeviceFanFixture, GivenValidFanHandleWhenGettingFanPropertiesAllowSetToFalseThenControlToFalse) { // Setting allow set calls or not init(false, true); auto handles = get_fan_handles(); for (auto handle : handles) { zes_fan_properties_t properties = {}; ze_result_t result = zesFanGetProperties(handle, &properties); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_FALSE(properties.canControl); } } TEST_F(SysmanDeviceFanFixture, GivenValidNoSupportForFanCheckFanHandleCountIsZero) { // Setting allow set calls or not init(false, false); uint32_t count = 0; EXPECT_EQ(zesDeviceEnumFans(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, 0u); } TEST_F(SysmanDeviceFanFixture, GivenValidFanHandleWhenGettingFanPropertiesAllowSetToFalseThenCallSucceeds) { // Setting allow set calls or not init(true, true); auto handles = get_fan_handles(); for (auto handle : handles) { zes_fan_properties_t properties; ze_result_t result = zesFanGetProperties(handle, &properties); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_FALSE(properties.onSubdevice); EXPECT_EQ(properties.subdeviceId, 0); EXPECT_TRUE(properties.canControl); EXPECT_EQ(properties.maxPoints, pKmdSysManager->mockFanMaxPoints); EXPECT_EQ(properties.maxRPM, -1); EXPECT_EQ(properties.supportedModes, zes_fan_speed_mode_t::ZES_FAN_SPEED_MODE_TABLE); EXPECT_EQ(properties.supportedUnits, zes_fan_speed_units_t::ZES_FAN_SPEED_UNITS_PERCENT); } } TEST_F(SysmanDeviceFanFixture, GivenValidFanHandleWhenGettingFanConfigThenUnsupportedIsReturned) { // Setting allow set calls or not init(true, true); auto handles = get_fan_handles(); for (auto handle : handles) { zes_fan_config_t fanConfig; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesFanGetConfig(handle, &fanConfig)); } } TEST_F(SysmanDeviceFanFixture, GivenValidFanHandleWhenSettingDefaultModeThenUnsupportedIsReturned) { // Setting allow set calls or not init(true, true); auto handles = get_fan_handles(); for (auto handle : handles) { EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesFanSetDefaultMode(handle)); } } TEST_F(SysmanDeviceFanFixture, GivenValidFanHandleWhenSettingFixedSpeedModeThenUnsupportedIsReturned) { // Setting allow set calls or not init(true, true); auto handles = get_fan_handles(); for (auto handle : handles) { zes_fan_speed_t fanSpeed = {0}; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesFanSetFixedSpeedMode(handle, &fanSpeed)); } } TEST_F(SysmanDeviceFanFixture, GivenValidFanHandleWhenSettingTheSpeedTableModeThenUnsupportedIsReturned) { // Setting allow set calls or not init(true, true); auto handles = get_fan_handles(); for (auto handle : handles) { zes_fan_speed_table_t fanSpeedTable = {0}; EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, zesFanSetSpeedTableMode(handle, &fanSpeedTable)); } } TEST_F(SysmanDeviceFanFixture, GivenValidFanHandleWhenGettingFanSpeedWithRPMUnitThenValidFanSpeedReadingsRetrieved) { // Setting allow set calls or not init(true, true); auto handles = get_fan_handles(); for (auto handle : handles) { zes_fan_speed_units_t unit = zes_fan_speed_units_t::ZES_FAN_SPEED_UNITS_RPM; int32_t fanSpeed = 0; ze_result_t result = zesFanGetState(handle, unit, &fanSpeed); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_GT(fanSpeed, 0); } } TEST_F(SysmanDeviceFanFixture, GivenValidFanHandleWhenGettingFanSpeedWithPercentUnitThenUnsupportedIsReturned) { // Setting allow set calls or not init(true, true); auto handles = get_fan_handles(); for (auto handle : handles) { zes_fan_speed_units_t unit = zes_fan_speed_units_t::ZES_FAN_SPEED_UNITS_PERCENT; int32_t fanSpeed = 0; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesFanGetState(handle, unit, &fanSpeed)); } } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/firmware/000077500000000000000000000000001422164147700306515ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/firmware/CMakeLists.txt000066400000000000000000000003051422164147700334070ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) add_subdirectories() compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/firmware/linux/000077500000000000000000000000001422164147700320105ustar00rootroot00000000000000CMakeLists.txt000066400000000000000000000013371422164147700344750ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/firmware/linux# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_TESTS_TOOLS_SYSMAN_FIRMWARE_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}mock_zes_sysman_firmware.h ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}test_zes_sysman_firmware.cpp ) if((NEO_ENABLE_i915_PRELIM_DETECTION) AND ("${BRANCH_TYPE}" STREQUAL "")) list(REMOVE_ITEM L0_TESTS_TOOLS_SYSMAN_FIRMWARE_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/test_zes_sysman_firmware.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_zes_sysman_firmware.h ) endif() if(UNIX) target_sources(${TARGET_NAME} PRIVATE ${L0_TESTS_TOOLS_SYSMAN_FIRMWARE_LINUX} ) endif() mock_zes_sysman_firmware.h000066400000000000000000000064031422164147700372050ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/firmware/linux/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/test/unit_tests/mock.h" #include "level_zero/tools/source/sysman/firmware/linux/os_firmware_imp.h" #include "level_zero/tools/test/unit_tests/sources/sysman/linux/mock_sysman_fixture.h" namespace L0 { namespace ult { constexpr uint32_t mockHandleCount = 2; const std::string mockFwVersion("DG01->0->2026"); const std::string mockOpromVersion("OPROM CODE VERSION:123_OPROM DATA VERSION:456"); std::vector mockSupportedFwTypes = {"GSC", "OptionROM"}; std::vector mockUnsupportedFwTypes = {"unknown"}; std::string mockEmpty = {}; class FirmwareInterface : public FirmwareUtil {}; class FirmwareFsAccess : public FsAccess {}; template <> struct Mock : public FirmwareFsAccess { MOCK_METHOD(ze_result_t, read, (const std::string file, std::vector &val), (override)); ze_result_t readValSuccess(const std::string file, std::vector &val) { val.push_back("mtd3: 005ef000 00001000 \"i915-spi.42.auto.GSC\""); val.push_back("mtd5: 00200000 00001000 \"i915-spi.42.auto.OptionROM\""); return ZE_RESULT_SUCCESS; } ze_result_t readMtdValSuccess(const std::string file, std::vector &val) { val.push_back("mtd3: 005ef000 00001000 \"i915-spi.42.auto.GSC\""); val.push_back("mtd3: 005ef000 00001000 \"i915-spi.42.auto.GSC\""); return ZE_RESULT_SUCCESS; } }; template <> struct Mock : public FirmwareUtil { ze_result_t mockFwGetVersion(std::string &fwVersion) { fwVersion = mockFwVersion; return ZE_RESULT_SUCCESS; } ze_result_t mockOpromGetVersion(std::string &fwVersion) { fwVersion = mockOpromVersion; return ZE_RESULT_SUCCESS; } ze_result_t mockGetFwVersion(std::string fwType, std::string &firmwareVersion) { if (fwType == "GSC") { firmwareVersion = mockFwVersion; } else if (fwType == "OptionROM") { firmwareVersion = mockOpromVersion; } return ZE_RESULT_SUCCESS; } Mock() = default; MOCK_METHOD(ze_result_t, fwDeviceInit, (), (override)); MOCK_METHOD(ze_result_t, getFirstDevice, (igsc_device_info * info), (override)); MOCK_METHOD(ze_result_t, getFwVersion, (std::string fwType, std::string &firmwareVersion), (override)); MOCK_METHOD(ze_result_t, flashFirmware, (std::string fwType, void *pImage, uint32_t size), (override)); MOCK_METHOD(ze_result_t, fwIfrApplied, (bool &ifrStatus), (override)); ADDMETHOD_NOBASE(fwSupportedDiagTests, ze_result_t, ZE_RESULT_SUCCESS, (std::vector & supportedDiagTests)); ADDMETHOD_NOBASE(fwRunDiagTests, ze_result_t, ZE_RESULT_SUCCESS, (std::string & osDiagType, zes_diag_result_t *pResult)); ADDMETHOD_NOBASE(fwGetMemoryErrorCount, ze_result_t, ZE_RESULT_SUCCESS, (zes_ras_error_type_t category, uint32_t subDeviceCount, uint32_t subDeviceId, uint64_t &count)); MOCK_METHOD(void, getDeviceSupportedFwTypes, (std::vector & fwTypes), (override)); }; class PublicLinuxFirmwareImp : public L0::LinuxFirmwareImp { public: using LinuxFirmwareImp::pFwInterface; }; } // namespace ult } // namespace L0 test_zes_sysman_firmware.cpp000066400000000000000000000267001422164147700375700ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/firmware/linux/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/test/unit_tests/sources/sysman/firmware/linux/mock_zes_sysman_firmware.h" extern bool sysmanUltsEnable; using ::testing::_; namespace L0 { namespace ult { class ZesFirmwareFixture : public SysmanDeviceFixture { protected: zes_firmware_handle_t hSysmanFirmware = {}; std::unique_ptr> pMockFwInterface; FirmwareUtil *pFwUtilInterfaceOld = nullptr; std::unique_ptr> pFsAccess; FsAccess *pFsAccessOriginal = nullptr; void SetUp() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanDeviceFixture::SetUp(); pFsAccessOriginal = pLinuxSysmanImp->pFsAccess; pFsAccess = std::make_unique>>(); pLinuxSysmanImp->pFsAccess = pFsAccess.get(); pFwUtilInterfaceOld = pLinuxSysmanImp->pFwUtilInterface; pMockFwInterface = std::make_unique>>(); pLinuxSysmanImp->pFwUtilInterface = pMockFwInterface.get(); ON_CALL(*pMockFwInterface.get(), fwDeviceInit()) .WillByDefault(::testing::Return(ZE_RESULT_SUCCESS)); ON_CALL(*pMockFwInterface.get(), getFirstDevice(_)) .WillByDefault(::testing::Return(ZE_RESULT_SUCCESS)); ON_CALL(*pMockFwInterface.get(), getDeviceSupportedFwTypes(_)) .WillByDefault(::testing::SetArgReferee<0>(mockSupportedFwTypes)); ON_CALL(*pFsAccess.get(), read(_, _)) .WillByDefault(::testing::Invoke(pFsAccess.get(), &Mock::readValSuccess)); for (const auto &handle : pSysmanDeviceImp->pFirmwareHandleContext->handleList) { delete handle; } pSysmanDeviceImp->pFirmwareHandleContext->handleList.clear(); pSysmanDeviceImp->pFirmwareHandleContext->init(); } void TearDown() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanDeviceFixture::TearDown(); pLinuxSysmanImp->pFwUtilInterface = pFwUtilInterfaceOld; pLinuxSysmanImp->pFsAccess = pFsAccessOriginal; } std::vector get_firmware_handles(uint32_t count) { std::vector handles(count, nullptr); EXPECT_EQ(zesDeviceEnumFirmwares(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS); return handles; } }; TEST_F(ZesFirmwareFixture, GivenComponentCountZeroWhenCallingzesFirmwareGetThenZeroCountIsReturnedAndVerifyzesFirmwareGetCallSucceeds) { std::vector firmwareHandle{}; uint32_t count = 0; ze_result_t result = zesDeviceEnumFirmwares(device->toHandle(), &count, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(count, mockHandleCount); uint32_t testCount = count + 1; result = zesDeviceEnumFirmwares(device->toHandle(), &testCount, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(testCount, count); firmwareHandle.resize(count); result = zesDeviceEnumFirmwares(device->toHandle(), &count, firmwareHandle.data()); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(count, mockHandleCount); FirmwareImp *ptestFirmwareImp = new FirmwareImp(pSysmanDeviceImp->pFirmwareHandleContext->pOsSysman, mockSupportedFwTypes[0]); pSysmanDeviceImp->pFirmwareHandleContext->handleList.push_back(ptestFirmwareImp); result = zesDeviceEnumFirmwares(device->toHandle(), &count, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(count, mockHandleCount); testCount = count; firmwareHandle.resize(testCount); result = zesDeviceEnumFirmwares(device->toHandle(), &testCount, firmwareHandle.data()); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, firmwareHandle.data()); EXPECT_EQ(testCount, mockHandleCount); pSysmanDeviceImp->pFirmwareHandleContext->handleList.pop_back(); delete ptestFirmwareImp; } TEST_F(ZesFirmwareFixture, GivenValidFirmwareHandleWhenGettingFirmwarePropertiesThenVersionIsReturned) { FirmwareImp *ptestFirmwareImp = new FirmwareImp(pSysmanDeviceImp->pFirmwareHandleContext->pOsSysman, mockSupportedFwTypes[0]); pSysmanDeviceImp->pFirmwareHandleContext->handleList.push_back(ptestFirmwareImp); ON_CALL(*pMockFwInterface.get(), getFwVersion(_, _)) .WillByDefault(::testing::Invoke(pMockFwInterface.get(), &Mock::mockGetFwVersion)); auto handles = get_firmware_handles(mockHandleCount); zes_firmware_properties_t properties = {}; EXPECT_EQ(ZE_RESULT_SUCCESS, zesFirmwareGetProperties(handles[0], &properties)); EXPECT_STREQ(mockSupportedFwTypes[0].c_str(), properties.name); EXPECT_STREQ(mockFwVersion.c_str(), properties.version); pSysmanDeviceImp->pFirmwareHandleContext->handleList.pop_back(); delete ptestFirmwareImp; } TEST_F(ZesFirmwareFixture, GivenValidFirmwareHandleWhenGettingOpromPropertiesThenVersionIsReturned) { FirmwareImp *ptestFirmwareImp = new FirmwareImp(pSysmanDeviceImp->pFirmwareHandleContext->pOsSysman, mockSupportedFwTypes[1]); pSysmanDeviceImp->pFirmwareHandleContext->handleList.push_back(ptestFirmwareImp); ON_CALL(*pMockFwInterface.get(), getFwVersion(_, _)) .WillByDefault(::testing::Invoke(pMockFwInterface.get(), &Mock::mockGetFwVersion)); auto handles = get_firmware_handles(mockHandleCount); zes_firmware_properties_t properties = {}; EXPECT_EQ(ZE_RESULT_SUCCESS, zesFirmwareGetProperties(handles[1], &properties)); EXPECT_STREQ(mockSupportedFwTypes[1].c_str(), properties.name); EXPECT_STREQ(mockOpromVersion.c_str(), properties.version); pSysmanDeviceImp->pFirmwareHandleContext->handleList.pop_back(); delete ptestFirmwareImp; } TEST_F(ZesFirmwareFixture, GivenFailedFirmwareInitializationWhenInitializingFirmwareContextThenexpectNoHandles) { for (const auto &handle : pSysmanDeviceImp->pFirmwareHandleContext->handleList) { delete handle; } pSysmanDeviceImp->pFirmwareHandleContext->handleList.clear(); ON_CALL(*pMockFwInterface.get(), fwDeviceInit()) .WillByDefault(::testing::Return(ZE_RESULT_ERROR_UNKNOWN)); pSysmanDeviceImp->pFirmwareHandleContext->init(); EXPECT_EQ(0u, pSysmanDeviceImp->pFirmwareHandleContext->handleList.size()); } TEST_F(ZesFirmwareFixture, GivenRepeatedFWTypesWhenInitializingFirmwareContextThenexpectNoHandles) { for (const auto &handle : pSysmanDeviceImp->pFirmwareHandleContext->handleList) { delete handle; } pSysmanDeviceImp->pFirmwareHandleContext->handleList.clear(); ON_CALL(*pFsAccess.get(), read(_, _)) .WillByDefault(::testing::Invoke(pFsAccess.get(), &Mock::readMtdValSuccess)); pSysmanDeviceImp->pFirmwareHandleContext->init(); EXPECT_EQ(1u, pSysmanDeviceImp->pFirmwareHandleContext->handleList.size()); } TEST_F(ZesFirmwareFixture, GivenValidFirmwareHandleWhenFlashingGscFirmwareThenSuccessIsReturned) { FirmwareImp *ptestFirmwareImp = new FirmwareImp(pSysmanDeviceImp->pFirmwareHandleContext->pOsSysman, mockSupportedFwTypes[0]); pSysmanDeviceImp->pFirmwareHandleContext->handleList.push_back(ptestFirmwareImp); ON_CALL(*pMockFwInterface.get(), flashFirmware(_, _, _)) .WillByDefault(::testing::Return(ZE_RESULT_SUCCESS)); auto handles = get_firmware_handles(mockHandleCount); uint8_t testImage[ZES_STRING_PROPERTY_SIZE] = {}; memset(testImage, 0xA, ZES_STRING_PROPERTY_SIZE); for (auto handle : handles) { EXPECT_EQ(ZE_RESULT_SUCCESS, zesFirmwareFlash(handle, (void *)testImage, ZES_STRING_PROPERTY_SIZE)); } pSysmanDeviceImp->pFirmwareHandleContext->handleList.pop_back(); delete ptestFirmwareImp; } TEST_F(ZesFirmwareFixture, GivenValidFirmwareHandleWhenFlashingUnkownFirmwareThenFailureIsReturned) { for (const auto &handle : pSysmanDeviceImp->pFirmwareHandleContext->handleList) { delete handle; } pSysmanDeviceImp->pFirmwareHandleContext->handleList.clear(); FirmwareImp *ptestFirmwareImp = new FirmwareImp(pSysmanDeviceImp->pFirmwareHandleContext->pOsSysman, mockUnsupportedFwTypes[0]); pSysmanDeviceImp->pFirmwareHandleContext->handleList.push_back(ptestFirmwareImp); ON_CALL(*pMockFwInterface.get(), flashFirmware(_, _, _)) .WillByDefault(::testing::Return(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE)); uint8_t testImage[ZES_STRING_PROPERTY_SIZE] = {}; memset(testImage, 0xA, ZES_STRING_PROPERTY_SIZE); auto handle = pSysmanDeviceImp->pFirmwareHandleContext->handleList[0]->toHandle(); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesFirmwareFlash(handle, (void *)testImage, ZES_STRING_PROPERTY_SIZE)); pSysmanDeviceImp->pFirmwareHandleContext->handleList.pop_back(); delete ptestFirmwareImp; } TEST_F(ZesFirmwareFixture, GivenFirmwareInitializationFailureThenCreateHandleMustFail) { for (const auto &handle : pSysmanDeviceImp->pFirmwareHandleContext->handleList) { delete handle; } pSysmanDeviceImp->pFirmwareHandleContext->handleList.clear(); ON_CALL(*pMockFwInterface.get(), fwDeviceInit()) .WillByDefault(::testing::Return(ZE_RESULT_ERROR_UNKNOWN)); pSysmanDeviceImp->pFirmwareHandleContext->init(); EXPECT_EQ(0u, pSysmanDeviceImp->pFirmwareHandleContext->handleList.size()); } TEST_F(ZesFirmwareFixture, GivenValidFirmwareHandleFirmwareLibraryCallFailureWhenGettingFirmwarePropertiesThenUnknownIsReturned) { for (const auto &handle : pSysmanDeviceImp->pFirmwareHandleContext->handleList) { delete handle; } pSysmanDeviceImp->pFirmwareHandleContext->handleList.clear(); ON_CALL(*pMockFwInterface.get(), getFwVersion(_, _)) .WillByDefault(::testing::Return(ZE_RESULT_ERROR_UNINITIALIZED)); pSysmanDeviceImp->pFirmwareHandleContext->init(); auto handles = get_firmware_handles(mockHandleCount); zes_firmware_properties_t properties = {}; EXPECT_EQ(ZE_RESULT_SUCCESS, zesFirmwareGetProperties(handles[0], &properties)); EXPECT_STREQ(mockSupportedFwTypes[0].c_str(), properties.name); EXPECT_STREQ("unknown", properties.version); EXPECT_EQ(ZE_RESULT_SUCCESS, zesFirmwareGetProperties(handles[1], &properties)); EXPECT_STREQ(mockSupportedFwTypes[1].c_str(), properties.name); EXPECT_STREQ("unknown", properties.version); } class ZesFirmwareUninitializedFixture : public SysmanDeviceFixture { protected: zes_firmware_handle_t hSysmanFirmware = {}; std::unique_ptr> pMockFwInterface; FirmwareUtil *pFwUtilInterfaceOld = nullptr; std::unique_ptr> pFsAccess; FsAccess *pFsAccessOriginal = nullptr; void SetUp() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanDeviceFixture::SetUp(); pFsAccessOriginal = pLinuxSysmanImp->pFsAccess; pFsAccess = std::make_unique>>(); pLinuxSysmanImp->pFsAccess = pFsAccess.get(); pFwUtilInterfaceOld = pLinuxSysmanImp->pFwUtilInterface; pLinuxSysmanImp->pFwUtilInterface = nullptr; ON_CALL(*pFsAccess.get(), read(_, _)) .WillByDefault(::testing::Invoke(pFsAccess.get(), &Mock::readValSuccess)); } void TearDown() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanDeviceFixture::TearDown(); pLinuxSysmanImp->pFwUtilInterface = pFwUtilInterfaceOld; pLinuxSysmanImp->pFsAccess = pFsAccessOriginal; } }; } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/frequency/000077500000000000000000000000001422164147700310365ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/frequency/CMakeLists.txt000066400000000000000000000003051422164147700335740ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) add_subdirectories() compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/frequency/linux/000077500000000000000000000000001422164147700321755ustar00rootroot00000000000000CMakeLists.txt000066400000000000000000000013141422164147700346550ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/frequency/linux# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_TESTS_TOOLS_SYSMAN_FREQUENCY_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}test_zes_frequency.cpp ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}mock_sysfs_frequency.h ) if(NEO_ENABLE_i915_PRELIM_DETECTION AND("${BRANCH_TYPE}" STREQUAL "")) list(REMOVE_ITEM L0_TESTS_TOOLS_SYSMAN_FREQUENCY_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/test_zes_frequency.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_sysfs_frequency.h ) endif() if(UNIX) target_sources(${TARGET_NAME} PRIVATE ${L0_TESTS_TOOLS_SYSMAN_FREQUENCY_LINUX} ) endif() mock_sysfs_frequency.h000066400000000000000000000231451422164147700365350ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/frequency/linux/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/tools/source/sysman/frequency/linux/os_frequency_imp.h" namespace L0 { namespace ult { const std::string minFreqFile("gt/gt0/rps_min_freq_mhz"); const std::string maxFreqFile("gt/gt0/rps_max_freq_mhz"); const std::string requestFreqFile("gt/gt0/punit_req_freq_mhz"); const std::string tdpFreqFile("gt/gt0/rapl_PL1_freq_mhz"); const std::string actualFreqFile("gt/gt0/rps_act_freq_mhz"); const std::string efficientFreqFile("gt/gt0/rps_RP1_freq_mhz"); const std::string maxValFreqFile("gt/gt0/rps_RP0_freq_mhz"); const std::string minValFreqFile("gt/gt0/rps_RPn_freq_mhz"); const std::string minFreqFileLegacy("gt_min_freq_mhz"); const std::string maxFreqFileLegacy("gt_max_freq_mhz"); const std::string requestFreqFileLegacy("gt_cur_freq_mhz"); const std::string tdpFreqFileLegacy("rapl_PL1_freq_mhz"); const std::string actualFreqFileLegacy("gt_act_freq_mhz"); const std::string efficientFreqFileLegacy("gt_RP1_freq_mhz"); const std::string maxValFreqFileLegacy("gt_RP0_freq_mhz"); const std::string minValFreqFileLegacy("gt_RPn_freq_mhz"); class FrequencySysfsAccess : public SysfsAccess {}; template <> struct Mock : public FrequencySysfsAccess { double mockMin = 0; double mockMax = 0; double mockRequest = 0; double mockTdp = 0; double mockActual = 0; double mockEfficient = 0; double mockMaxVal = 0; double mockMinVal = 0; MOCK_METHOD(ze_result_t, read, (const std::string file, double &val), (override)); MOCK_METHOD(ze_result_t, write, (const std::string file, const double val), (override)); MOCK_METHOD(bool, directoryExists, (const std::string path), (override)); bool mockDirectoryExistsSuccess(const std::string path) { return true; } bool mockDirectoryExistsFailure(const std::string path) { return false; } ze_result_t getMaxValReturnErrorNotAvailable(const std::string file, double &val) { if (file.compare(maxValFreqFile) == 0) { return ZE_RESULT_ERROR_NOT_AVAILABLE; } return ZE_RESULT_SUCCESS; } ze_result_t getMaxValReturnErrorUnknown(const std::string file, double &val) { if (file.compare(maxValFreqFile) == 0) { return ZE_RESULT_ERROR_UNKNOWN; } return ZE_RESULT_SUCCESS; } ze_result_t getMinValReturnErrorNotAvailable(const std::string file, double &val) { if (file.compare(minValFreqFile) == 0) { return ZE_RESULT_ERROR_NOT_AVAILABLE; } return ZE_RESULT_SUCCESS; } ze_result_t getMinValReturnErrorUnknown(const std::string file, double &val) { if (file.compare(minValFreqFile) == 0) { return ZE_RESULT_ERROR_UNKNOWN; } return ZE_RESULT_SUCCESS; } ze_result_t getValReturnErrorNotAvailable(const std::string file, double &val) { return ZE_RESULT_ERROR_NOT_AVAILABLE; } ze_result_t getValActualReturnErrorNotAvailable(const std::string file, double &val) { if (file.compare(actualFreqFile) == 0) { return ZE_RESULT_ERROR_NOT_AVAILABLE; } return ZE_RESULT_SUCCESS; } ze_result_t getValEfficientReturnErrorNotAvailable(const std::string file, double &val) { if (file.compare(efficientFreqFile) == 0) { return ZE_RESULT_ERROR_NOT_AVAILABLE; } return ZE_RESULT_SUCCESS; } ze_result_t getValTdpReturnErrorNotAvailable(const std::string file, double &val) { if (file.compare(tdpFreqFile) == 0) { return ZE_RESULT_ERROR_NOT_AVAILABLE; } return ZE_RESULT_SUCCESS; } ze_result_t getValRequestReturnErrorNotAvailable(const std::string file, double &val) { if (file.compare(requestFreqFile) == 0) { return ZE_RESULT_ERROR_NOT_AVAILABLE; } return ZE_RESULT_SUCCESS; } ze_result_t setValMinReturnErrorNotAvailable(const std::string file, const double val) { if (file.compare(minFreqFile) == 0) { return ZE_RESULT_ERROR_NOT_AVAILABLE; } return ZE_RESULT_SUCCESS; } ze_result_t setValMaxReturnErrorNotAvailable(const std::string file, const double val) { if (file.compare(maxFreqFile) == 0) { return ZE_RESULT_ERROR_NOT_AVAILABLE; } return ZE_RESULT_SUCCESS; } ze_result_t getValReturnErrorUnknown(const std::string file, double &val) { return ZE_RESULT_ERROR_UNKNOWN; } ze_result_t getValActualReturnErrorUnknown(const std::string file, double &val) { if (file.compare(actualFreqFile) == 0) { return ZE_RESULT_ERROR_UNKNOWN; } return ZE_RESULT_SUCCESS; } ze_result_t getValEfficientReturnErrorUnknown(const std::string file, double &val) { if (file.compare(efficientFreqFile) == 0) { return ZE_RESULT_ERROR_UNKNOWN; } return ZE_RESULT_SUCCESS; } ze_result_t getValTdpReturnErrorUnknown(const std::string file, double &val) { if (file.compare(tdpFreqFile) == 0) { return ZE_RESULT_ERROR_UNKNOWN; } return ZE_RESULT_SUCCESS; } ze_result_t getValRequestReturnErrorUnknown(const std::string file, double &val) { if (file.compare(requestFreqFile) == 0) { return ZE_RESULT_ERROR_UNKNOWN; } return ZE_RESULT_SUCCESS; } ze_result_t setValMinReturnErrorUnknown(const std::string file, const double val) { if (file.compare(minFreqFile) == 0) { return ZE_RESULT_ERROR_UNKNOWN; } return ZE_RESULT_SUCCESS; } ze_result_t setValMaxReturnErrorUnknown(const std::string file, const double val) { if (file.compare(maxFreqFile) == 0) { return ZE_RESULT_ERROR_UNKNOWN; } return ZE_RESULT_SUCCESS; } ze_result_t getValLegacy(const std::string file, double &val) { if (file.compare(minFreqFileLegacy) == 0) { val = mockMin; } if (file.compare(maxFreqFileLegacy) == 0) { val = mockMax; } if (file.compare(requestFreqFileLegacy) == 0) { val = mockRequest; } if (file.compare(tdpFreqFileLegacy) == 0) { val = mockTdp; } if (file.compare(actualFreqFileLegacy) == 0) { val = mockActual; } if (file.compare(efficientFreqFileLegacy) == 0) { val = mockEfficient; } if (file.compare(maxValFreqFileLegacy) == 0) { val = mockMaxVal; } if (file.compare(minValFreqFileLegacy) == 0) { val = mockMinVal; } return ZE_RESULT_SUCCESS; } ze_result_t setValLegacy(const std::string file, const double val) { if (file.compare(minFreqFileLegacy) == 0) { mockMin = val; } if (file.compare(maxFreqFileLegacy) == 0) { mockMax = val; } if (file.compare(requestFreqFileLegacy) == 0) { mockRequest = val; } if (file.compare(tdpFreqFileLegacy) == 0) { mockTdp = val; } if (file.compare(actualFreqFileLegacy) == 0) { mockActual = val; } if (file.compare(efficientFreqFileLegacy) == 0) { mockEfficient = val; } if (file.compare(maxValFreqFileLegacy) == 0) { mockMaxVal = val; } if (file.compare(minValFreqFileLegacy) == 0) { mockMinVal = val; } return ZE_RESULT_SUCCESS; } ze_result_t getVal(const std::string file, double &val) { if (file.compare(minFreqFile) == 0) { val = mockMin; } if (file.compare(maxFreqFile) == 0) { val = mockMax; } if (file.compare(requestFreqFile) == 0) { val = mockRequest; } if (file.compare(tdpFreqFile) == 0) { val = mockTdp; } if (file.compare(actualFreqFile) == 0) { val = mockActual; } if (file.compare(efficientFreqFile) == 0) { val = mockEfficient; } if (file.compare(maxValFreqFile) == 0) { val = mockMaxVal; } if (file.compare(minValFreqFile) == 0) { val = mockMinVal; } return ZE_RESULT_SUCCESS; } ze_result_t setVal(const std::string file, const double val) { if (file.compare(minFreqFile) == 0) { mockMin = val; } if (file.compare(maxFreqFile) == 0) { mockMax = val; } if (file.compare(requestFreqFile) == 0) { mockRequest = val; } if (file.compare(tdpFreqFile) == 0) { mockTdp = val; } if (file.compare(actualFreqFile) == 0) { mockActual = val; } if (file.compare(efficientFreqFile) == 0) { mockEfficient = val; } if (file.compare(maxValFreqFile) == 0) { mockMaxVal = val; } if (file.compare(minValFreqFile) == 0) { mockMinVal = val; } return ZE_RESULT_SUCCESS; } Mock() = default; ~Mock() override = default; }; class PublicLinuxFrequencyImp : public L0::LinuxFrequencyImp { public: PublicLinuxFrequencyImp(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId, zes_freq_domain_t type) : LinuxFrequencyImp(pOsSysman, onSubdevice, subdeviceId, type) {} using LinuxFrequencyImp::getMaxVal; using LinuxFrequencyImp::getMin; using LinuxFrequencyImp::getMinVal; using LinuxFrequencyImp::pSysfsAccess; }; } // namespace ult } // namespace L0 test_zes_frequency.cpp000066400000000000000000000755751422164147700365660ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/frequency/linux/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/test/unit_tests/sources/sysman/linux/mock_sysman_fixture.h" #include "gmock/gmock.h" #include "gtest/gtest.h" #include "mock_sysfs_frequency.h" #include extern bool sysmanUltsEnable; using ::testing::Invoke; namespace L0 { namespace ult { constexpr double minFreq = 300.0; constexpr double maxFreq = 1100.0; constexpr double step = 100.0 / 6; constexpr double request = 300.0; constexpr double tdp = 1100.0; constexpr double actual = 300.0; constexpr double efficient = 300.0; constexpr double maxVal = 1100.0; constexpr double minVal = 300.0; constexpr uint32_t numClocks = static_cast((maxFreq - minFreq) / step) + 1; constexpr uint32_t handleComponentCount = 1u; class SysmanDeviceFrequencyFixture : public SysmanDeviceFixture { protected: std::unique_ptr> pSysfsAccess; SysfsAccess *pSysfsAccessOld = nullptr; std::vector deviceHandles; void SetUp() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanDeviceFixture::SetUp(); pSysfsAccessOld = pLinuxSysmanImp->pSysfsAccess; pSysfsAccess = std::make_unique>>(); pLinuxSysmanImp->pSysfsAccess = pSysfsAccess.get(); pSysfsAccess->setVal(minFreqFile, minFreq); pSysfsAccess->setVal(maxFreqFile, maxFreq); pSysfsAccess->setVal(requestFreqFile, request); pSysfsAccess->setVal(tdpFreqFile, tdp); pSysfsAccess->setVal(actualFreqFile, actual); pSysfsAccess->setVal(efficientFreqFile, efficient); pSysfsAccess->setVal(maxValFreqFile, maxVal); pSysfsAccess->setVal(minValFreqFile, minVal); ON_CALL(*pSysfsAccess.get(), read(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getVal)); ON_CALL(*pSysfsAccess.get(), write(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::setVal)); ON_CALL(*pSysfsAccess.get(), directoryExists(_)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::mockDirectoryExistsSuccess)); // delete handles created in initial SysmanDeviceHandleContext::init() call for (auto handle : pSysmanDeviceImp->pFrequencyHandleContext->handleList) { delete handle; } pSysmanDeviceImp->pFrequencyHandleContext->handleList.clear(); uint32_t subDeviceCount = 0; // We received a device handle. Check for subdevices in this device Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, nullptr); if (subDeviceCount == 0) { deviceHandles.resize(1, device->toHandle()); } else { deviceHandles.resize(subDeviceCount, nullptr); Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, deviceHandles.data()); } pSysmanDeviceImp->pFrequencyHandleContext->init(deviceHandles); } void TearDown() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanDeviceFixture::TearDown(); pLinuxSysmanImp->pSysfsAccess = pSysfsAccessOld; } double clockValue(const double calculatedClock) { // i915 specific. frequency step is a fraction // However, the i915 represents all clock // rates as integer values. So clocks are // rounded to the nearest integer. uint32_t actualClock = static_cast(calculatedClock + 0.5); return static_cast(actualClock); } std::vector get_freq_handles(uint32_t count) { std::vector handles(count, nullptr); EXPECT_EQ(zesDeviceEnumFrequencyDomains(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS); return handles; } }; TEST_F(SysmanDeviceFrequencyFixture, GivenComponentCountZeroWhenEnumeratingFrequencyHandlesThenNonZeroCountIsReturnedAndCallSucceds) { uint32_t count = 0U; EXPECT_EQ(ZE_RESULT_SUCCESS, zesDeviceEnumFrequencyDomains(device->toHandle(), &count, nullptr)); EXPECT_EQ(count, handleComponentCount); uint32_t testCount = count + 1; EXPECT_EQ(ZE_RESULT_SUCCESS, zesDeviceEnumFrequencyDomains(device->toHandle(), &testCount, nullptr)); EXPECT_EQ(count, testCount); auto handles = get_freq_handles(count); for (auto handle : handles) { EXPECT_NE(handle, nullptr); } } TEST_F(SysmanDeviceFrequencyFixture, GivenComponentCountZeroAndValidPtrWhenEnumeratingFrequencyHandlesThenNonZeroCountAndNoHandlesAreReturnedAndCallSucceds) { uint32_t count = 0U; zes_freq_handle_t handle = static_cast(0UL); EXPECT_EQ(ZE_RESULT_SUCCESS, zesDeviceEnumFrequencyDomains(device->toHandle(), &count, &handle)); EXPECT_EQ(count, handleComponentCount); EXPECT_EQ(handle, static_cast(0UL)); } TEST_F(SysmanDeviceFrequencyFixture, GivenActualComponentCountTwoWhenTryingToGetOneComponentOnlyThenOneComponentIsReturnedAndCountUpdated) { auto pFrequencyHandleContextTest = std::make_unique(pOsSysman); pFrequencyHandleContextTest->handleList.push_back(new FrequencyImp(pOsSysman, device->toHandle(), ZES_FREQ_DOMAIN_GPU)); pFrequencyHandleContextTest->handleList.push_back(new FrequencyImp(pOsSysman, device->toHandle(), ZES_FREQ_DOMAIN_GPU)); uint32_t count = 1; std::vector phFrequency(count, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, pFrequencyHandleContextTest->frequencyGet(&count, phFrequency.data())); EXPECT_EQ(count, 1u); } TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleWhenCallingzesFrequencyGetPropertiesThenSuccessIsReturned) { auto handles = get_freq_handles(handleComponentCount); for (auto handle : handles) { EXPECT_NE(handle, nullptr); zes_freq_properties_t properties; EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencyGetProperties(handle, &properties)); EXPECT_EQ(nullptr, properties.pNext); EXPECT_EQ(ZES_FREQ_DOMAIN_GPU, properties.type); EXPECT_FALSE(properties.onSubdevice); EXPECT_DOUBLE_EQ(maxFreq, properties.max); EXPECT_DOUBLE_EQ(minFreq, properties.min); EXPECT_TRUE(properties.canControl); } } TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleAndZeroCountWhenCallingzesFrequencyGetAvailableClocksThenCallSucceeds) { auto handles = get_freq_handles(handleComponentCount); for (auto handle : handles) { EXPECT_NE(handle, nullptr); uint32_t count = 0; EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencyGetAvailableClocks(handle, &count, nullptr)); EXPECT_EQ(numClocks, count); } } TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleAndZeroCountWhenCountIsMoreThanNumClocksThenCallSucceeds) { auto handles = get_freq_handles(handleComponentCount); for (auto handle : handles) { EXPECT_NE(handle, nullptr); uint32_t count = 80; EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencyGetAvailableClocks(handle, &count, nullptr)); EXPECT_EQ(numClocks, count); } } TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleAndZeroCountWhenCountIsLessThanNumClocksThenCallSucceeds) { auto handles = get_freq_handles(handleComponentCount); for (auto handle : handles) { EXPECT_NE(handle, nullptr); uint32_t count = 20; EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencyGetAvailableClocks(handle, &count, nullptr)); } } TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleAndCorrectCountWhenCallingzesFrequencyGetAvailableClocksThenCallSucceeds) { auto handles = get_freq_handles(handleComponentCount); for (auto handle : handles) { uint32_t count = 0; EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencyGetAvailableClocks(handle, &count, nullptr)); EXPECT_EQ(numClocks, count); double *clocks = new double[count]; EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencyGetAvailableClocks(handle, &count, clocks)); EXPECT_EQ(numClocks, count); for (uint32_t i = 0; i < count; i++) { EXPECT_DOUBLE_EQ(clockValue(minFreq + (step * i)), clocks[i]); } delete[] clocks; } } TEST_F(SysmanDeviceFrequencyFixture, GivenValidateFrequencyGetRangeWhengetMaxAndgetMinFailsThenFrequencyGetRangeCallReturnsNegativeValuesForRange) { auto pFrequencyImp = std::make_unique(pOsSysman, device->toHandle(), ZES_FREQ_DOMAIN_GPU); zes_freq_range_t limit = {}; ON_CALL(*pSysfsAccess.get(), read(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getValReturnErrorNotAvailable)); EXPECT_EQ(ZE_RESULT_SUCCESS, pFrequencyImp->frequencyGetRange(&limit)); EXPECT_EQ(-1, limit.max); EXPECT_EQ(-1, limit.min); ON_CALL(*pSysfsAccess.get(), read(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getValReturnErrorUnknown)); EXPECT_EQ(ZE_RESULT_SUCCESS, pFrequencyImp->frequencyGetRange(&limit)); EXPECT_EQ(-1, limit.max); EXPECT_EQ(-1, limit.min); } TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleWhenCallingzesFrequencyGetRangeThenVerifyzesFrequencyGetRangeTestCallSucceeds) { auto handles = get_freq_handles(handleComponentCount); for (auto handle : handles) { zes_freq_range_t limits; EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencyGetRange(handle, &limits)); EXPECT_DOUBLE_EQ(minFreq, limits.min); EXPECT_DOUBLE_EQ(maxFreq, limits.max); } } TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyLimitsWhenCallingFrequencySetRangeForFailures1ThenAPIExitsGracefully) { auto pFrequencyImp = std::make_unique(pOsSysman, device->toHandle(), ZES_FREQ_DOMAIN_GPU); zes_freq_range_t limits = {}; // Verify that Max must be within range. limits.min = minFreq; limits.max = 600.0; ON_CALL(*pSysfsAccess.get(), write(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::setValMinReturnErrorNotAvailable)); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, pFrequencyImp->frequencySetRange(&limits)); ON_CALL(*pSysfsAccess.get(), write(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::setValMinReturnErrorUnknown)); EXPECT_EQ(ZE_RESULT_ERROR_UNKNOWN, pFrequencyImp->frequencySetRange(&limits)); } TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyLimitsWhenCallingFrequencySetRangeForFailures2ThenAPIExitsGracefully) { auto pFrequencyImp = std::make_unique(pOsSysman, device->toHandle(), ZES_FREQ_DOMAIN_GPU); zes_freq_range_t limits = {}; // Verify that Max must be within range. limits.min = 900.0; limits.max = maxFreq; ON_CALL(*pSysfsAccess.get(), write(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::setValMaxReturnErrorNotAvailable)); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, pFrequencyImp->frequencySetRange(&limits)); ON_CALL(*pSysfsAccess.get(), write(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::setValMaxReturnErrorUnknown)); EXPECT_EQ(ZE_RESULT_ERROR_UNKNOWN, pFrequencyImp->frequencySetRange(&limits)); } TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleWhenCallingzesFrequencySetRangeThenVerifyzesFrequencySetRangeTest1CallSucceeds) { auto handles = get_freq_handles(handleComponentCount); for (auto handle : handles) { const double startingMin = 900.0; const double newMax = 600.0; zes_freq_range_t limits; pSysfsAccess->setVal(minFreqFile, startingMin); // If the new Max value is less than the old Min // value, the new Min must be set before the new Max limits.min = minFreq; limits.max = newMax; EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencySetRange(handle, &limits)); EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencyGetRange(handle, &limits)); EXPECT_DOUBLE_EQ(minFreq, limits.min); EXPECT_DOUBLE_EQ(newMax, limits.max); } } TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleWhenCallingzesFrequencySetRangeThenVerifyzesFrequencySetRangeTest2CallSucceeds) { auto handles = get_freq_handles(handleComponentCount); for (auto handle : handles) { const double startingMax = 600.0; const double newMin = 900.0; zes_freq_range_t limits; pSysfsAccess->setVal(maxFreqFile, startingMax); // If the new Min value is greater than the old Max // value, the new Max must be set before the new Min limits.min = newMin; limits.max = maxFreq; EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencySetRange(handle, &limits)); EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencyGetRange(handle, &limits)); EXPECT_DOUBLE_EQ(newMin, limits.min); EXPECT_DOUBLE_EQ(maxFreq, limits.max); } } TEST_F(SysmanDeviceFrequencyFixture, GivenInvalidFrequencyLimitsWhenCallingFrequencySetRangeThenVerifyFrequencySetRangeTest4ReturnsError) { auto pFrequencyImp = std::make_unique(pOsSysman, device->toHandle(), ZES_FREQ_DOMAIN_GPU); zes_freq_range_t limits; // Verify that Max must be greater than min range. limits.min = clockValue(maxFreq + step); limits.max = minFreq; EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, pFrequencyImp->frequencySetRange(&limits)); } TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleWhenCallingzesFrequencyGetStateThenVerifyzesFrequencyGetStateTestCallSucceeds) { auto handles = get_freq_handles(handleComponentCount); for (auto handle : handles) { const double testRequestValue = 450.0; const double testTdpValue = 1200.0; const double testEfficientValue = 400.0; const double testActualValue = 550.0; zes_freq_state_t state; pSysfsAccess->setVal(requestFreqFile, testRequestValue); pSysfsAccess->setVal(tdpFreqFile, testTdpValue); pSysfsAccess->setVal(actualFreqFile, testActualValue); pSysfsAccess->setVal(efficientFreqFile, testEfficientValue); EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencyGetState(handle, &state)); EXPECT_DOUBLE_EQ(testRequestValue, state.request); EXPECT_DOUBLE_EQ(testTdpValue, state.tdp); EXPECT_DOUBLE_EQ(testEfficientValue, state.efficient); EXPECT_DOUBLE_EQ(testActualValue, state.actual); EXPECT_EQ(0u, state.throttleReasons); EXPECT_EQ(nullptr, state.pNext); EXPECT_LE(state.currentVoltage, 0); } } TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleWhenCallingzesFrequencyGetStateWithLegacyPathThenVerifyzesFrequencyGetStateTestCallSucceeds) { ON_CALL(*pSysfsAccess.get(), read(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getValLegacy)); ON_CALL(*pSysfsAccess.get(), write(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::setValLegacy)); ON_CALL(*pSysfsAccess.get(), directoryExists(_)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::mockDirectoryExistsFailure)); for (auto handle : pSysmanDeviceImp->pFrequencyHandleContext->handleList) { delete handle; } pSysmanDeviceImp->pFrequencyHandleContext->handleList.clear(); pSysmanDeviceImp->pFrequencyHandleContext->init(deviceHandles); auto handles = get_freq_handles(handleComponentCount); for (auto handle : handles) { const double testRequestValue = 400.0; const double testTdpValue = 1100.0; const double testEfficientValue = 300.0; const double testActualValue = 550.0; zes_freq_state_t state; pSysfsAccess->setValLegacy(requestFreqFileLegacy, testRequestValue); pSysfsAccess->setValLegacy(tdpFreqFileLegacy, testTdpValue); pSysfsAccess->setValLegacy(actualFreqFileLegacy, testActualValue); pSysfsAccess->setValLegacy(efficientFreqFileLegacy, testEfficientValue); EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencyGetState(handle, &state)); EXPECT_DOUBLE_EQ(testRequestValue, state.request); EXPECT_DOUBLE_EQ(testTdpValue, state.tdp); EXPECT_DOUBLE_EQ(testEfficientValue, state.efficient); EXPECT_DOUBLE_EQ(testActualValue, state.actual); EXPECT_EQ(0u, state.throttleReasons); EXPECT_EQ(nullptr, state.pNext); EXPECT_LE(state.currentVoltage, 0); } } TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleWhenCallingzesFrequencyGetRangeWithLegacyPathThenVerifyzesFrequencyGetRangeTestCallSucceeds) { ON_CALL(*pSysfsAccess.get(), read(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getValLegacy)); ON_CALL(*pSysfsAccess.get(), write(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::setValLegacy)); ON_CALL(*pSysfsAccess.get(), directoryExists(_)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::mockDirectoryExistsFailure)); for (auto handle : pSysmanDeviceImp->pFrequencyHandleContext->handleList) { delete handle; } pSysmanDeviceImp->pFrequencyHandleContext->handleList.clear(); pSysmanDeviceImp->pFrequencyHandleContext->init(deviceHandles); auto handles = get_freq_handles(handleComponentCount); double minFreqLegacy = 400.0; double maxFreqLegacy = 1200.0; pSysfsAccess->setValLegacy(minFreqFileLegacy, minFreqLegacy); pSysfsAccess->setValLegacy(maxFreqFileLegacy, maxFreqLegacy); for (auto handle : handles) { zes_freq_range_t limits; EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencyGetRange(handle, &limits)); EXPECT_DOUBLE_EQ(minFreqLegacy, limits.min); EXPECT_DOUBLE_EQ(maxFreqLegacy, limits.max); } } TEST_F(SysmanDeviceFrequencyFixture, GivenValidStatePointerWhenValidatingfrequencyGetStateWhenOneOfTheFrequencyStateThenNegativeValueIsReturned) { auto pFrequencyImp = std::make_unique(pOsSysman, device->toHandle(), ZES_FREQ_DOMAIN_GPU); zes_freq_state_t state = {}; ON_CALL(*pSysfsAccess.get(), read(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getValRequestReturnErrorNotAvailable)); EXPECT_EQ(ZE_RESULT_SUCCESS, pFrequencyImp->frequencyGetState(&state)); EXPECT_EQ(-1, state.request); ON_CALL(*pSysfsAccess.get(), read(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getValRequestReturnErrorUnknown)); EXPECT_EQ(ZE_RESULT_SUCCESS, pFrequencyImp->frequencyGetState(&state)); EXPECT_EQ(-1, state.request); ON_CALL(*pSysfsAccess.get(), read(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getValTdpReturnErrorNotAvailable)); EXPECT_EQ(ZE_RESULT_SUCCESS, pFrequencyImp->frequencyGetState(&state)); EXPECT_EQ(-1, state.tdp); ON_CALL(*pSysfsAccess.get(), read(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getValTdpReturnErrorUnknown)); EXPECT_EQ(ZE_RESULT_SUCCESS, pFrequencyImp->frequencyGetState(&state)); EXPECT_EQ(-1, state.tdp); ON_CALL(*pSysfsAccess.get(), read(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getValEfficientReturnErrorNotAvailable)); EXPECT_EQ(ZE_RESULT_SUCCESS, pFrequencyImp->frequencyGetState(&state)); EXPECT_EQ(-1, state.efficient); ON_CALL(*pSysfsAccess.get(), read(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getValEfficientReturnErrorUnknown)); EXPECT_EQ(ZE_RESULT_SUCCESS, pFrequencyImp->frequencyGetState(&state)); EXPECT_EQ(-1, state.efficient); ON_CALL(*pSysfsAccess.get(), read(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getValActualReturnErrorNotAvailable)); EXPECT_EQ(ZE_RESULT_SUCCESS, pFrequencyImp->frequencyGetState(&state)); EXPECT_EQ(-1, state.actual); ON_CALL(*pSysfsAccess.get(), read(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getValActualReturnErrorUnknown)); EXPECT_EQ(ZE_RESULT_SUCCESS, pFrequencyImp->frequencyGetState(&state)); EXPECT_EQ(-1, state.actual); } TEST_F(SysmanDeviceFrequencyFixture, GivenThrottleTimeStructPointerWhenCallingfrequencyGetThrottleTimeThenUnsupportedIsReturned) { auto pFrequencyImp = std::make_unique(pOsSysman, device->toHandle(), ZES_FREQ_DOMAIN_GPU); zes_freq_throttle_time_t throttleTime = {}; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, pFrequencyImp->frequencyGetThrottleTime(&throttleTime)); } TEST_F(SysmanDeviceFrequencyFixture, GivengetMinFunctionReturnsErrorWhenValidatinggetMinFailuresThenAPIReturnsErrorAccordingly) { PublicLinuxFrequencyImp linuxFrequencyImp(pOsSysman, 0, 0, ZES_FREQ_DOMAIN_GPU); double min = 0; ON_CALL(*pSysfsAccess.get(), read(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getValReturnErrorNotAvailable)); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, linuxFrequencyImp.getMin(min)); ON_CALL(*pSysfsAccess.get(), read(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getValReturnErrorUnknown)); EXPECT_EQ(ZE_RESULT_ERROR_UNKNOWN, linuxFrequencyImp.getMin(min)); } TEST_F(SysmanDeviceFrequencyFixture, GivengetMinValFunctionReturnsErrorWhenValidatinggetMinValFailuresThenAPIReturnsErrorAccordingly) { PublicLinuxFrequencyImp linuxFrequencyImp(pOsSysman, 0, 0, ZES_FREQ_DOMAIN_GPU); double val = 0; ON_CALL(*pSysfsAccess.get(), read(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getMinValReturnErrorNotAvailable)); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, linuxFrequencyImp.getMinVal(val)); ON_CALL(*pSysfsAccess.get(), read(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getMinValReturnErrorUnknown)); EXPECT_EQ(ZE_RESULT_ERROR_UNKNOWN, linuxFrequencyImp.getMinVal(val)); } TEST_F(SysmanDeviceFrequencyFixture, GivengetMaxValFunctionReturnsErrorWhenValidatinggetMaxValFailuresThenAPIReturnsErrorAccordingly) { PublicLinuxFrequencyImp linuxFrequencyImp(pOsSysman, 0, 0, ZES_FREQ_DOMAIN_GPU); double val = 0; ON_CALL(*pSysfsAccess.get(), read(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getMaxValReturnErrorNotAvailable)); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, linuxFrequencyImp.getMaxVal(val)); ON_CALL(*pSysfsAccess.get(), read(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getMaxValReturnErrorUnknown)); EXPECT_EQ(ZE_RESULT_ERROR_UNKNOWN, linuxFrequencyImp.getMaxVal(val)); } TEST_F(SysmanDeviceFrequencyFixture, GivengetMaxValFunctionReturnsErrorWhenValidatingosFrequencyGetPropertiesThenAPIBehavesAsExpected) { zes_freq_properties_t properties = {}; PublicLinuxFrequencyImp linuxFrequencyImp(pOsSysman, 0, 0, ZES_FREQ_DOMAIN_GPU); ON_CALL(*pSysfsAccess.get(), read(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getMaxValReturnErrorNotAvailable)); EXPECT_EQ(ZE_RESULT_SUCCESS, linuxFrequencyImp.osFrequencyGetProperties(properties)); EXPECT_EQ(0, properties.canControl); } TEST_F(SysmanDeviceFrequencyFixture, GivengetMinValFunctionReturnsErrorWhenValidatingosFrequencyGetPropertiesThenAPIBehavesAsExpected) { zes_freq_properties_t properties = {}; PublicLinuxFrequencyImp linuxFrequencyImp(pOsSysman, 0, 0, ZES_FREQ_DOMAIN_GPU); ON_CALL(*pSysfsAccess.get(), read(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getMinValReturnErrorNotAvailable)); EXPECT_EQ(ZE_RESULT_SUCCESS, linuxFrequencyImp.osFrequencyGetProperties(properties)); EXPECT_EQ(0, properties.canControl); } TEST_F(SysmanDeviceFrequencyFixture, GivenOnSubdeviceSetWhenValidatingAnyFrequencyAPIThenSuccessIsReturned) { zes_freq_properties_t properties = {}; PublicLinuxFrequencyImp linuxFrequencyImp(pOsSysman, 1, 0, ZES_FREQ_DOMAIN_GPU); EXPECT_EQ(ZE_RESULT_SUCCESS, linuxFrequencyImp.osFrequencyGetProperties(properties)); EXPECT_EQ(1, properties.canControl); } TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleWhenCallingzesFrequencySetRangeAndIfgetMaxFailsThenVerifyzesFrequencySetRangeTestCallFail) { auto handles = get_freq_handles(handleComponentCount); for (auto handle : handles) { const double startingMax = 600.0; const double newMin = 900.0; zes_freq_range_t limits; pSysfsAccess->setVal(maxFreqFile, startingMax); // If the new Min value is greater than the old Max // value, the new Max must be set before the new Min limits.min = newMin; limits.max = maxFreq; ON_CALL(*pSysfsAccess.get(), read(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::setValMaxReturnErrorNotAvailable)); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesFrequencySetRange(handle, &limits)); } } TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleWhenCallingzesFrequencySetRangeAndIfsetMaxFailsThenVerifyzesFrequencySetRangeTestCallFail) { auto handles = get_freq_handles(handleComponentCount); for (auto handle : handles) { const double startingMax = 600.0; const double newMin = 900.0; zes_freq_range_t limits; pSysfsAccess->setVal(maxFreqFile, startingMax); // If the new Min value is greater than the old Max // value, the new Max must be set before the new Min limits.min = newMin; limits.max = maxFreq; ON_CALL(*pSysfsAccess.get(), write(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::setValMaxReturnErrorNotAvailable)); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesFrequencySetRange(handle, &limits)); } } TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleWhenCallingzesFrequencyOcGetFrequencyTargetThenVerifyTestCallFail) { auto handles = get_freq_handles(handleComponentCount); for (auto handle : handles) { double freqTarget = 0.0; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesFrequencyOcGetFrequencyTarget(handle, &freqTarget)); } } TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleWhenCallingzesFrequencyOcSetFrequencyTargetThenVerifyTestCallFail) { auto handles = get_freq_handles(handleComponentCount); for (auto handle : handles) { double freqTarget = 0.0; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesFrequencyOcSetFrequencyTarget(handle, freqTarget)); } } TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleWhenCallingzesFrequencyOcGetVoltageTargetThenVerifyTestCallFail) { auto handles = get_freq_handles(handleComponentCount); for (auto handle : handles) { double voltTarget = 0.0, voltOffset = 0.0; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesFrequencyOcGetVoltageTarget(handle, &voltTarget, &voltOffset)); } } TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleWhenCallingzesFrequencyOcSetVoltageTargetThenVerifyTestCallFail) { auto handles = get_freq_handles(handleComponentCount); for (auto handle : handles) { double voltTarget = 0.0, voltOffset = 0.0; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesFrequencyOcSetVoltageTarget(handle, voltTarget, voltOffset)); } } TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleWhenCallingzesFrequencyOcSetModeThenVerifyTestCallFail) { auto handles = get_freq_handles(handleComponentCount); for (auto handle : handles) { zes_oc_mode_t mode = ZES_OC_MODE_OFF; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesFrequencyOcSetMode(handle, mode)); } } TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleWhenCallingzesFrequencyOcGetModeThenVerifyTestCallFail) { auto handles = get_freq_handles(handleComponentCount); for (auto handle : handles) { zes_oc_mode_t mode = ZES_OC_MODE_OFF; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesFrequencyOcGetMode(handle, &mode)); } } TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleWhenCallingzesFrequencyOcGetCapabilitiesThenVerifyTestCallFail) { auto handles = get_freq_handles(handleComponentCount); for (auto handle : handles) { zes_oc_capabilities_t caps = {}; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesFrequencyOcGetCapabilities(handle, &caps)); } } TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleWhenCallingzesFrequencyOcGetIccMaxThenVerifyTestCallFail) { auto handles = get_freq_handles(handleComponentCount); for (auto handle : handles) { double iccMax = 0.0; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesFrequencyOcGetIccMax(handle, &iccMax)); } } TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleWhenCallingzesFrequencyOcSetIccMaxThenVerifyTestCallFail) { auto handles = get_freq_handles(handleComponentCount); for (auto handle : handles) { double iccMax = 0.0; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesFrequencyOcSetIccMax(handle, iccMax)); } } TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleWhenCallingzesFrequencyOcGetTjMaxThenVerifyTestCallFail) { auto handles = get_freq_handles(handleComponentCount); for (auto handle : handles) { double tjMax = 0.0; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesFrequencyOcGetTjMax(handle, &tjMax)); } } TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleWhenCallingzesFrequencyOcSetTjMaxThenVerifyTestCallFail) { auto handles = get_freq_handles(handleComponentCount); for (auto handle : handles) { double tjMax = 0.0; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesFrequencyOcSetTjMax(handle, tjMax)); } } TEST_F(SysmanMultiDeviceFixture, GivenValidDevicePointerWhenGettingFrequencyPropertiesThenValidSchedPropertiesRetrieved) { zes_freq_properties_t properties = {}; ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; Device::fromHandle(device)->getProperties(&deviceProperties); LinuxFrequencyImp *pLinuxFrequencyImp = new LinuxFrequencyImp(pOsSysman, deviceProperties.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE, deviceProperties.subdeviceId, ZES_FREQ_DOMAIN_GPU); EXPECT_EQ(ZE_RESULT_SUCCESS, pLinuxFrequencyImp->osFrequencyGetProperties(properties)); EXPECT_EQ(properties.subdeviceId, deviceProperties.subdeviceId); EXPECT_EQ(properties.onSubdevice, deviceProperties.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE); delete pLinuxFrequencyImp; } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/frequency/windows/000077500000000000000000000000001422164147700325305ustar00rootroot00000000000000CMakeLists.txt000066400000000000000000000005321422164147700352110ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/frequency/windows# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_zes_frequency.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_frequency.h ) endif() mock_frequency.h000066400000000000000000000423041422164147700356370ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/frequency/windows/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/test/unit_tests/mock.h" #include "level_zero/tools/test/unit_tests/sources/sysman/windows/mock_kmd_sys_manager.h" #include "sysman/frequency/windows/os_frequency_imp.h" namespace L0 { namespace ult { class FrequencyKmdSysManager : public Mock {}; template <> struct Mock : public FrequencyKmdSysManager { uint32_t mockNumberOfDomains = 2; uint32_t mockDomainType[2] = {ZES_FREQ_DOMAIN_GPU, ZES_FREQ_DOMAIN_MEMORY}; bool mockGPUCanControl[2] = {true, false}; bool mockGPUCannotControl[2] = {false, false}; uint32_t mockMinFrequencyRange = 400; uint32_t mockMaxFrequencyRange = 1200; uint32_t mockRpn[2] = {400, 0}; uint32_t mockRp0[2] = {1200, 0}; uint32_t mockRequestedFrequency = 600; uint32_t mockTdpFrequency = 0; uint32_t mockResolvedFrequency[2] = {600, 4200}; uint32_t mockEfficientFrequency = 400; uint32_t mockCurrentVoltage = 1100; uint32_t mockThrottleReasons = 0; uint32_t mockIccMax = 1025; uint32_t mockTjMax = 105; uint32_t mockIsExtendedModeSupported[2] = {0, 0}; uint32_t mockIsFixedModeSupported[2] = {0, 0}; uint32_t mockIsHighVoltModeCapable[2] = {0, 0}; uint32_t mockIsHighVoltModeEnabled[2] = {0, 0}; uint32_t mockIsIccMaxSupported = 1; uint32_t mockIsOcSupported[2] = {0, 0}; uint32_t mockIsTjMaxSupported = 1; uint32_t mockMaxFactoryDefaultFrequency[2] = {600, 4200}; uint32_t mockMaxFactoryDefaultVoltage[2] = {1200, 1300}; uint32_t mockMaxOcFrequency[2] = {1800, 4500}; uint32_t mockMaxOcVoltage[2] = {1300, 1400}; uint32_t mockFixedMode[2] = {0, 0}; uint32_t mockVoltageMode[2] = {0, 0}; uint32_t mockHighVoltageSupported[2] = {0, 0}; uint32_t mockHighVoltageEnabled[2] = {0, 0}; uint32_t mockFrequencyTarget[2] = {0, 0}; uint32_t mockVoltageTarget[2] = {0, 0}; uint32_t mockVoltageOffset[2] = {0, 0}; void getFrequencyProperty(KmdSysman::GfxSysmanReqHeaderIn *pRequest, KmdSysman::GfxSysmanReqHeaderOut *pResponse) override { uint8_t *pBuffer = reinterpret_cast(pResponse); pBuffer += sizeof(KmdSysman::GfxSysmanReqHeaderOut); KmdSysman::GeneralDomainsType domain = static_cast(pRequest->inCommandParam); if (domain < KmdSysman::GeneralDomainsType::GeneralDomainDGPU || domain >= KmdSysman::GeneralDomainsType::GeneralDomainMaxTypes) { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; return; } switch (pRequest->inRequestId) { case KmdSysman::Requests::Frequency::NumFrequencyDomains: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockNumberOfDomains; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Frequency::ExtendedOcSupported: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockIsExtendedModeSupported[domain]; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Frequency::CanControlFrequency: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = allowSetCalls ? mockGPUCanControl[domain] : mockGPUCannotControl[domain]; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Frequency::FixedModeSupported: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockFixedMode[domain]; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Frequency::HighVoltageModeSupported: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockHighVoltageSupported[domain]; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Frequency::HighVoltageEnabled: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockHighVoltageEnabled[domain]; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Frequency::FrequencyOcSupported: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockIsOcSupported[domain]; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Frequency::CurrentIccMax: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockIccMax; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Frequency::CurrentTjMax: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockTjMax; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Frequency::MaxNonOcFrequencyDefault: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockMaxFactoryDefaultFrequency[domain]; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Frequency::MaxNonOcVoltageDefault: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockMaxFactoryDefaultVoltage[domain]; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Frequency::MaxOcFrequencyDefault: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockMaxOcFrequency[domain]; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Frequency::MaxOcVoltageDefault: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockMaxOcVoltage[domain]; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Frequency::CurrentFixedMode: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockFixedMode[domain]; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Frequency::CurrentFrequencyTarget: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockFrequencyTarget[domain]; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Frequency::CurrentVoltageTarget: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockVoltageTarget[domain]; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Frequency::CurrentVoltageOffset: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockVoltageOffset[domain]; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Frequency::CurrentVoltageMode: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockVoltageMode[domain]; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Frequency::FrequencyThrottledEventSupported: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Frequency::CurrentFrequencyRange: { if (domain == KmdSysman::GeneralDomainsType::GeneralDomainDGPU) { uint32_t *pMinFreq = reinterpret_cast(pBuffer); uint32_t *pMaxFreq = reinterpret_cast(pBuffer + sizeof(uint32_t)); *pMinFreq = mockMinFrequencyRange; *pMaxFreq = mockMaxFrequencyRange; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = 2 * sizeof(uint32_t); } else if (domain == KmdSysman::GeneralDomainsType::GeneralDomainHBM) { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } } break; case KmdSysman::Requests::Frequency::CurrentRequestedFrequency: { if (domain == KmdSysman::GeneralDomainsType::GeneralDomainDGPU) { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockRequestedFrequency; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } else if (domain == KmdSysman::GeneralDomainsType::GeneralDomainHBM) { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } } break; case KmdSysman::Requests::Frequency::CurrentTdpFrequency: { if (domain == KmdSysman::GeneralDomainsType::GeneralDomainDGPU) { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockTdpFrequency; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } else if (domain == KmdSysman::GeneralDomainsType::GeneralDomainHBM) { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } } break; case KmdSysman::Requests::Frequency::CurrentResolvedFrequency: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockResolvedFrequency[domain]; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Frequency::CurrentEfficientFrequency: { if (domain == KmdSysman::GeneralDomainsType::GeneralDomainDGPU) { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockEfficientFrequency; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } else if (domain == KmdSysman::GeneralDomainsType::GeneralDomainHBM) { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } } break; case KmdSysman::Requests::Frequency::FrequencyRangeMaxDefault: { if (domain == KmdSysman::GeneralDomainsType::GeneralDomainDGPU) { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockRp0[domain]; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } else if (domain == KmdSysman::GeneralDomainsType::GeneralDomainHBM) { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } } break; case KmdSysman::Requests::Frequency::FrequencyRangeMinDefault: { if (domain == KmdSysman::GeneralDomainsType::GeneralDomainDGPU) { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockRpn[domain]; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } else if (domain == KmdSysman::GeneralDomainsType::GeneralDomainHBM) { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } } break; case KmdSysman::Requests::Frequency::CurrentVoltage: { if (domain == KmdSysman::GeneralDomainsType::GeneralDomainDGPU) { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockCurrentVoltage; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } else if (domain == KmdSysman::GeneralDomainsType::GeneralDomainHBM) { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } } break; case KmdSysman::Requests::Frequency::CurrentThrottleReasons: { if (domain == KmdSysman::GeneralDomainsType::GeneralDomainDGPU) { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockThrottleReasons; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } else if (domain == KmdSysman::GeneralDomainsType::GeneralDomainHBM) { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } } break; default: { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } break; } } void setFrequencyProperty(KmdSysman::GfxSysmanReqHeaderIn *pRequest, KmdSysman::GfxSysmanReqHeaderOut *pResponse) override { uint8_t *pBuffer = reinterpret_cast(pRequest); pBuffer += sizeof(KmdSysman::GfxSysmanReqHeaderIn); KmdSysman::GeneralDomainsType domain = static_cast(pRequest->inCommandParam); if (domain < KmdSysman::GeneralDomainsType::GeneralDomainDGPU || domain >= KmdSysman::GeneralDomainsType::GeneralDomainMaxTypes) { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; return; } switch (pRequest->inRequestId) { case KmdSysman::Requests::Frequency::CurrentFrequencyRange: { uint32_t *pMinFreq = reinterpret_cast(pBuffer); uint32_t *pMaxFreq = reinterpret_cast(pBuffer + sizeof(uint32_t)); mockMinFrequencyRange = *pMinFreq; mockMaxFrequencyRange = *pMaxFreq; pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; } break; case KmdSysman::Requests::Frequency::CurrentFixedMode: { uint32_t *pValue = reinterpret_cast(pBuffer); mockFixedMode[domain] = *pValue; pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; } break; case KmdSysman::Requests::Frequency::CurrentVoltageMode: { uint32_t *pValue = reinterpret_cast(pBuffer); mockVoltageMode[domain] = *pValue; pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; } break; case KmdSysman::Requests::Frequency::CurrentVoltageOffset: { uint32_t *pValue = reinterpret_cast(pBuffer); mockVoltageOffset[domain] = *pValue; pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; } break; case KmdSysman::Requests::Frequency::CurrentVoltageTarget: { uint32_t *pValue = reinterpret_cast(pBuffer); mockVoltageTarget[domain] = *pValue; pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; } break; case KmdSysman::Requests::Frequency::CurrentFrequencyTarget: { uint32_t *pValue = reinterpret_cast(pBuffer); mockFrequencyTarget[domain] = *pValue; pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; } break; default: { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } break; } } Mock() = default; ~Mock() = default; }; } // namespace ult } // namespace L0 test_zes_frequency.cpp000066400000000000000000000524621422164147700371070ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/frequency/windows/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/frequency/windows/os_frequency_imp.h" #include "level_zero/tools/source/sysman/sysman_const.h" #include "level_zero/tools/test/unit_tests/sources/sysman/frequency/windows/mock_frequency.h" #include "level_zero/tools/test/unit_tests/sources/sysman/windows/mock_sysman_fixture.h" extern bool sysmanUltsEnable; namespace L0 { namespace ult { constexpr uint32_t frequencyHandleComponentCount = 2u; constexpr double minFreq = 400.0; constexpr double maxFreq = 1200.0; constexpr double step = 50.0 / 3; constexpr uint32_t numClocks = static_cast((maxFreq - minFreq) / step) + 1; class SysmanDeviceFrequencyFixture : public SysmanDeviceFixture { protected: Mock *pKmdSysManager = nullptr; KmdSysManager *pOriginalKmdSysManager = nullptr; void SetUp() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanDeviceFixture::SetUp(); } void init(bool allowSetCalls) { pKmdSysManager = new Mock; pKmdSysManager->allowSetCalls = allowSetCalls; EXPECT_CALL(*pKmdSysManager, escape(_, _, _, _, _)) .WillRepeatedly(::testing::Invoke(pKmdSysManager, &Mock::mock_escape)); pOriginalKmdSysManager = pWddmSysmanImp->pKmdSysManager; pWddmSysmanImp->pKmdSysManager = pKmdSysManager; // delete handles created in initial SysmanDeviceHandleContext::init() call for (auto handle : pSysmanDeviceImp->pFrequencyHandleContext->handleList) { delete handle; } pSysmanDeviceImp->pFrequencyHandleContext->handleList.clear(); uint32_t subDeviceCount = 0; std::vector deviceHandles; // We received a device handle. Check for subdevices in this device Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, nullptr); if (subDeviceCount == 0) { deviceHandles.resize(1, device->toHandle()); } else { deviceHandles.resize(subDeviceCount, nullptr); Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, deviceHandles.data()); } pSysmanDeviceImp->pFrequencyHandleContext->init(deviceHandles); } void TearDown() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanDeviceFixture::TearDown(); pWddmSysmanImp->pKmdSysManager = pOriginalKmdSysManager; if (pKmdSysManager != nullptr) { delete pKmdSysManager; pKmdSysManager = nullptr; } } double clockValue(const double calculatedClock) { // i915 specific. frequency step is a fraction // However, the Kmd represents all clock // rates as integer values. So clocks are // rounded to the nearest integer. uint32_t actualClock = static_cast(calculatedClock + 0.5); return static_cast(actualClock); } std::vector get_frequency_handles(uint32_t count) { std::vector handles(count, nullptr); EXPECT_EQ(zesDeviceEnumFrequencyDomains(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS); return handles; } }; TEST_F(SysmanDeviceFrequencyFixture, GivenComponentCountZeroWhenEnumeratingFrequencyDomainsThenValidCountIsReturnedAndVerifySysmanPowerGetCallSucceeds) { init(true); uint32_t count = 0; EXPECT_EQ(zesDeviceEnumFrequencyDomains(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, frequencyHandleComponentCount); } TEST_F(SysmanDeviceFrequencyFixture, GivenInvalidComponentCountWhenEnumeratingFrequencyDomainsThenValidCountIsReturnedAndVerifySysmanPowerGetCallSucceeds) { init(true); uint32_t count = 0; EXPECT_EQ(zesDeviceEnumFrequencyDomains(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, frequencyHandleComponentCount); count = count + 1; EXPECT_EQ(zesDeviceEnumFrequencyDomains(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, frequencyHandleComponentCount); } TEST_F(SysmanDeviceFrequencyFixture, GivenComponentCountZeroWhenEnumeratingFrequencyDomainsThenValidPowerHandlesIsReturned) { init(true); uint32_t count = 0; EXPECT_EQ(zesDeviceEnumFrequencyDomains(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, frequencyHandleComponentCount); std::vector handles(count, nullptr); EXPECT_EQ(zesDeviceEnumFrequencyDomains(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS); for (auto handle : handles) { EXPECT_NE(handle, nullptr); } } TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleWhenCallingzesFrequencyGetPropertiesThenSuccessIsReturned) { init(true); auto handles = get_frequency_handles(frequencyHandleComponentCount); uint32_t domainIndex = 0; for (auto handle : handles) { EXPECT_NE(handle, nullptr); zes_freq_properties_t properties; EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencyGetProperties(handle, &properties)); EXPECT_EQ(pKmdSysManager->mockDomainType[domainIndex], properties.type); EXPECT_FALSE(properties.onSubdevice); EXPECT_EQ(pKmdSysManager->mockGPUCanControl[domainIndex], properties.canControl); if (domainIndex == ZES_FREQ_DOMAIN_GPU) { EXPECT_DOUBLE_EQ(pKmdSysManager->mockRp0[domainIndex], properties.max); EXPECT_DOUBLE_EQ(pKmdSysManager->mockRpn[domainIndex], properties.min); } else if (domainIndex == ZES_FREQ_DOMAIN_MEMORY) { EXPECT_DOUBLE_EQ(-1, properties.max); EXPECT_DOUBLE_EQ(-1, properties.min); } domainIndex++; } } TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleWhenCallingzesAllowSetCallsToFalseFrequencyGetPropertiesThenSuccessIsReturned) { init(false); auto handles = get_frequency_handles(frequencyHandleComponentCount); uint32_t domainIndex = 0; for (auto handle : handles) { EXPECT_NE(handle, nullptr); zes_freq_properties_t properties; EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencyGetProperties(handle, &properties)); EXPECT_EQ(pKmdSysManager->mockDomainType[domainIndex], properties.type); EXPECT_FALSE(properties.onSubdevice); if (domainIndex == ZES_FREQ_DOMAIN_GPU) { EXPECT_DOUBLE_EQ(pKmdSysManager->mockRp0[domainIndex], properties.max); EXPECT_DOUBLE_EQ(pKmdSysManager->mockRpn[domainIndex], properties.min); } else if (domainIndex == ZES_FREQ_DOMAIN_MEMORY) { EXPECT_DOUBLE_EQ(-1.0, properties.max); EXPECT_DOUBLE_EQ(-1.0, properties.min); } EXPECT_EQ(pKmdSysManager->mockGPUCannotControl[domainIndex], properties.canControl); domainIndex++; } } TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleAndZeroCountWhenCallingzesFrequencyGetAvailableClocksThenCallSucceeds) { init(true); auto handles = get_frequency_handles(frequencyHandleComponentCount); uint32_t domainIndex = 0; for (auto handle : handles) { EXPECT_NE(handle, nullptr); uint32_t count = 0; if (domainIndex == ZES_FREQ_DOMAIN_GPU) { EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencyGetAvailableClocks(handle, &count, nullptr)); EXPECT_EQ(numClocks, count); } else if (domainIndex == ZES_FREQ_DOMAIN_MEMORY) { EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencyGetAvailableClocks(handle, &count, nullptr)); EXPECT_EQ(1, count); } domainIndex++; } } TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleAndCorrectCountWhenCallingzesFrequencyGetAvailableClocksThenCallSucceeds) { init(true); auto handles = get_frequency_handles(frequencyHandleComponentCount); uint32_t domainIndex = 0; for (auto handle : handles) { uint32_t count = 0; if (domainIndex == ZES_FREQ_DOMAIN_GPU) { EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencyGetAvailableClocks(handle, &count, nullptr)); EXPECT_EQ(numClocks, count); double *clocks = new double[count]; EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencyGetAvailableClocks(handle, &count, clocks)); EXPECT_EQ(numClocks, count); for (uint32_t i = 0; i < count; i++) { EXPECT_DOUBLE_EQ(clockValue(pKmdSysManager->mockRpn[domainIndex] + (step * i)), clocks[i]); } delete[] clocks; } domainIndex++; } } TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleWhenCallingzesFrequencyGetRangeThenVerifyzesFrequencyGetRangeTestCallSucceeds) { init(true); auto handles = get_frequency_handles(frequencyHandleComponentCount); uint32_t domainIndex = 0; for (auto handle : handles) { zes_freq_range_t limits; if (domainIndex == ZES_FREQ_DOMAIN_GPU) { EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencyGetRange(handle, &limits)); EXPECT_DOUBLE_EQ(pKmdSysManager->mockMinFrequencyRange, limits.min); EXPECT_DOUBLE_EQ(pKmdSysManager->mockMaxFrequencyRange, limits.max); } else if (domainIndex == ZES_FREQ_DOMAIN_MEMORY) { EXPECT_EQ(ZE_RESULT_ERROR_NOT_AVAILABLE, zesFrequencyGetRange(handle, &limits)); } domainIndex++; } } TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleWhenCallingzesFrequencySetRangeThenVerifyzesFrequencySetRangeTest1CallSucceeds) { init(true); auto handles = get_frequency_handles(frequencyHandleComponentCount); uint32_t domainIndex = 0; for (auto handle : handles) { const double startingMin = 900.0; const double newMax = 600.0; if (domainIndex == ZES_FREQ_DOMAIN_GPU) { zes_freq_range_t limits; pKmdSysManager->mockMinFrequencyRange = static_cast(startingMin); // If the new Max value is less than the old Min // value, the new Min must be set before the new Max limits.min = minFreq; limits.max = newMax; EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencySetRange(handle, &limits)); EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencyGetRange(handle, &limits)); EXPECT_DOUBLE_EQ(minFreq, limits.min); EXPECT_DOUBLE_EQ(newMax, limits.max); } domainIndex++; } } TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleWhenCallingzesFrequencySetRangeThenVerifyzesFrequencySetRangeTest2CallSucceeds) { init(true); auto handles = get_frequency_handles(frequencyHandleComponentCount); uint32_t domainIndex = 0; for (auto handle : handles) { const double startingMax = 600.0; const double newMin = 900.0; if (domainIndex == ZES_FREQ_DOMAIN_GPU) { zes_freq_range_t limits; pKmdSysManager->mockMaxFrequencyRange = static_cast(startingMax); // If the new Min value is greater than the old Max // value, the new Max must be set before the new Min limits.min = newMin; limits.max = maxFreq; EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencySetRange(handle, &limits)); EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencyGetRange(handle, &limits)); EXPECT_DOUBLE_EQ(newMin, limits.min); EXPECT_DOUBLE_EQ(maxFreq, limits.max); } domainIndex++; } } TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleWhenCallingzesFrequencyGetStateThenVerifyCallSucceeds) { init(true); auto handles = get_frequency_handles(frequencyHandleComponentCount); uint32_t domainIndex = 0; for (auto handle : handles) { zes_freq_state_t state = {}; EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencyGetState(handle, &state)); if (domainIndex == ZES_FREQ_DOMAIN_GPU) { EXPECT_DOUBLE_EQ(static_cast(pKmdSysManager->mockResolvedFrequency[domainIndex]), state.actual); EXPECT_DOUBLE_EQ(static_cast(pKmdSysManager->mockCurrentVoltage) / milliVoltsFactor, state.currentVoltage); EXPECT_DOUBLE_EQ(static_cast(pKmdSysManager->mockEfficientFrequency), state.efficient); EXPECT_DOUBLE_EQ(static_cast(pKmdSysManager->mockRequestedFrequency), state.request); EXPECT_DOUBLE_EQ(static_cast(pKmdSysManager->mockTdpFrequency), state.tdp); EXPECT_EQ(pKmdSysManager->mockThrottleReasons, state.throttleReasons); } else if (domainIndex == ZES_FREQ_DOMAIN_MEMORY) { EXPECT_DOUBLE_EQ(static_cast(pKmdSysManager->mockResolvedFrequency[domainIndex]), state.actual); EXPECT_DOUBLE_EQ(-1.0, state.currentVoltage); EXPECT_DOUBLE_EQ(-1.0, state.efficient); EXPECT_DOUBLE_EQ(-1.0, state.request); EXPECT_DOUBLE_EQ(-1.0, state.tdp); EXPECT_EQ(pKmdSysManager->mockThrottleReasons, state.throttleReasons); } domainIndex++; } } TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleWhenCallingzesFrequencyGetThrottleTimeThenVerifyCallFails) { init(true); auto handles = get_frequency_handles(frequencyHandleComponentCount); for (auto handle : handles) { zes_freq_throttle_time_t throttletime = {}; EXPECT_NE(ZE_RESULT_SUCCESS, zesFrequencyGetThrottleTime(handle, &throttletime)); } } TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleWhenCallingzesFrequencyOcGetCapabilitiesThenVerifyCallSucceeds) { init(false); uint32_t domainIndex = 0; auto handles = get_frequency_handles(frequencyHandleComponentCount); for (auto handle : handles) { zes_oc_capabilities_t ocCaps = {}; EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencyOcGetCapabilities(handle, &ocCaps)); EXPECT_EQ(pKmdSysManager->mockIsExtendedModeSupported[domainIndex], ocCaps.isExtendedModeSupported); EXPECT_EQ(pKmdSysManager->mockIsFixedModeSupported[domainIndex], ocCaps.isFixedModeSupported); EXPECT_EQ(pKmdSysManager->mockHighVoltageSupported[domainIndex], ocCaps.isHighVoltModeCapable); EXPECT_EQ(pKmdSysManager->mockHighVoltageEnabled[domainIndex], ocCaps.isHighVoltModeEnabled); EXPECT_EQ(pKmdSysManager->mockIsIccMaxSupported, ocCaps.isIccMaxSupported); EXPECT_EQ(pKmdSysManager->mockIsOcSupported[domainIndex], ocCaps.isOcSupported); EXPECT_EQ(pKmdSysManager->mockIsTjMaxSupported, ocCaps.isTjMaxSupported); EXPECT_DOUBLE_EQ(static_cast(pKmdSysManager->mockMaxFactoryDefaultFrequency[domainIndex]), ocCaps.maxFactoryDefaultFrequency); EXPECT_DOUBLE_EQ(static_cast(pKmdSysManager->mockMaxFactoryDefaultVoltage[domainIndex]), ocCaps.maxFactoryDefaultVoltage); EXPECT_DOUBLE_EQ(static_cast(pKmdSysManager->mockMaxOcFrequency[domainIndex]), ocCaps.maxOcFrequency); EXPECT_DOUBLE_EQ(static_cast(pKmdSysManager->mockMaxOcVoltage[domainIndex]), ocCaps.maxOcVoltage); EXPECT_DOUBLE_EQ(static_cast(pKmdSysManager->mockVoltageOffset[domainIndex]), ocCaps.maxOcVoltageOffset); EXPECT_DOUBLE_EQ(static_cast(pKmdSysManager->mockVoltageOffset[domainIndex]), ocCaps.minOcVoltageOffset); domainIndex++; } } TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleWhenCallingzesFrequencyOcGetFrequencyTargetThenVerifyCallSucceeds) { init(false); uint32_t domainIndex = 0; auto handles = get_frequency_handles(frequencyHandleComponentCount); for (auto handle : handles) { double freqTarget = 0.0; EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencyOcGetFrequencyTarget(handle, &freqTarget)); EXPECT_DOUBLE_EQ(static_cast(pKmdSysManager->mockFrequencyTarget[domainIndex]), freqTarget); domainIndex++; } } TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleWhenCallingzesFrequencyOcGetVoltageTargetThenVerifyCallSucceeds) { init(false); uint32_t domainIndex = 0; auto handles = get_frequency_handles(frequencyHandleComponentCount); for (auto handle : handles) { double voltageTarget = 0.0, voltageOffset = 0.0; EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencyOcGetVoltageTarget(handle, &voltageTarget, &voltageOffset)); EXPECT_DOUBLE_EQ(static_cast(pKmdSysManager->mockVoltageTarget[domainIndex]), voltageTarget); EXPECT_DOUBLE_EQ(static_cast(pKmdSysManager->mockVoltageOffset[domainIndex]), voltageOffset); domainIndex++; } } TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleWhenCallingzesFrequencyOcGetModeThenVerifyCallSucceeds) { init(false); uint32_t domainIndex = 0; auto handles = get_frequency_handles(frequencyHandleComponentCount); for (auto handle : handles) { zes_oc_mode_t mode = ZES_OC_MODE_OFF; EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencyOcGetMode(handle, &mode)); EXPECT_DOUBLE_EQ(ZES_OC_MODE_INTERPOLATIVE, mode); domainIndex++; } } TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleAllowSetCallsToFalseWhenCallingzesFrequencyOcSetFrequencyTargetThenVerifyCallFails) { init(false); uint32_t domainIndex = 0; auto handles = get_frequency_handles(frequencyHandleComponentCount); for (auto handle : handles) { double freqTarget = 1400.0; EXPECT_NE(ZE_RESULT_SUCCESS, zesFrequencyOcSetFrequencyTarget(handle, freqTarget)); domainIndex++; } } TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleAllowSetCallsToFalseWhenCallingzesFrequencyOcSetVoltageTargetThenVerifyCallFails) { init(false); uint32_t domainIndex = 0; auto handles = get_frequency_handles(frequencyHandleComponentCount); for (auto handle : handles) { double voltageTarget = 1040.0, voltageOffset = 20.0; EXPECT_NE(ZE_RESULT_SUCCESS, zesFrequencyOcSetVoltageTarget(handle, voltageTarget, voltageOffset)); domainIndex++; } } TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleAllowSetCallsToFalseWhenCallingzesFrequencyOcSetModeThenVerifyCallFails) { init(false); uint32_t domainIndex = 0; auto handles = get_frequency_handles(frequencyHandleComponentCount); for (auto handle : handles) { zes_oc_mode_t mode = ZES_OC_MODE_OVERRIDE; EXPECT_NE(ZE_RESULT_SUCCESS, zesFrequencyOcSetMode(handle, mode)); domainIndex++; } } TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleAllowSetCallsToTrueWhenCallingzesFrequencyOcSetFrequencyTargetThenVerifyCallSucceed) { init(true); uint32_t domainIndex = 0; auto handles = get_frequency_handles(frequencyHandleComponentCount); for (auto handle : handles) { double freqTarget = 1400.0; EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencyOcSetFrequencyTarget(handle, freqTarget)); double newFreqTarget = 0.0; EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencyOcGetFrequencyTarget(handle, &newFreqTarget)); EXPECT_DOUBLE_EQ(newFreqTarget, freqTarget); domainIndex++; } } TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleAllowSetCallsToTrueWhenCallingzesFrequencyOcSetVoltageTargetThenVerifyCallSucceed) { init(true); uint32_t domainIndex = 0; auto handles = get_frequency_handles(frequencyHandleComponentCount); for (auto handle : handles) { double voltageTarget = 1040.0, voltageOffset = 20.0; EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencyOcSetVoltageTarget(handle, voltageTarget, voltageOffset)); double newVoltageTarget = 1040.0, newVoltageOffset = 20.0; EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencyOcGetVoltageTarget(handle, &newVoltageTarget, &newVoltageOffset)); EXPECT_DOUBLE_EQ(voltageTarget, newVoltageTarget); EXPECT_DOUBLE_EQ(voltageOffset, newVoltageOffset); domainIndex++; } } TEST_F(SysmanDeviceFrequencyFixture, GivenValidFrequencyHandleAllowSetCallsToTrueWhenCallingzesFrequencyOcSetModeThenVerifyCallSucceed) { init(true); uint32_t domainIndex = 0; auto handles = get_frequency_handles(frequencyHandleComponentCount); for (auto handle : handles) { zes_oc_mode_t mode = ZES_OC_MODE_INTERPOLATIVE; zes_oc_mode_t newmode; EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencyOcSetMode(handle, mode)); EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencyOcGetMode(handle, &newmode)); EXPECT_EQ(newmode, ZES_OC_MODE_INTERPOLATIVE); mode = ZES_OC_MODE_OFF; EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencyOcSetMode(handle, mode)); EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencyOcGetMode(handle, &newmode)); EXPECT_EQ(newmode, ZES_OC_MODE_INTERPOLATIVE); mode = ZES_OC_MODE_FIXED; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION, zesFrequencyOcSetMode(handle, mode)); EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencyOcGetMode(handle, &newmode)); EXPECT_EQ(newmode, ZES_OC_MODE_INTERPOLATIVE); mode = ZES_OC_MODE_OFF; EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencyOcSetMode(handle, mode)); EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencyOcGetMode(handle, &newmode)); EXPECT_EQ(newmode, ZES_OC_MODE_INTERPOLATIVE); mode = ZES_OC_MODE_OVERRIDE; EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencyOcSetMode(handle, mode)); EXPECT_EQ(ZE_RESULT_SUCCESS, zesFrequencyOcGetMode(handle, &newmode)); EXPECT_EQ(newmode, ZES_OC_MODE_OVERRIDE); domainIndex++; } } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/global_operations/000077500000000000000000000000001422164147700325405ustar00rootroot00000000000000CMakeLists.txt000066400000000000000000000003041422164147700352160ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/global_operations# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) add_subdirectories() compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/global_operations/linux/000077500000000000000000000000001422164147700336775ustar00rootroot00000000000000CMakeLists.txt000066400000000000000000000013711422164147700363620ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/global_operations/linux# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_TESTS_TOOLS_SYSMAN_GLOBAL_OPERATIONS_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_zes_global_operations.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_global_operations.h ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/test_zes_global_operations_helper.cpp ) if((NEO_ENABLE_i915_PRELIM_DETECTION) AND ("${BRANCH_TYPE}" STREQUAL "")) list(REMOVE_ITEM L0_TESTS_TOOLS_SYSMAN_GLOBAL_OPERATIONS_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/test_zes_global_operations_helper.cpp ) endif() if(UNIX) target_sources(${TARGET_NAME} PRIVATE ${L0_TESTS_TOOLS_SYSMAN_GLOBAL_OPERATIONS_LINUX} ) endif() mock_global_operations.h000066400000000000000000000502421422164147700405100ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/global_operations/linux/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/test/unit_tests/mock.h" #include "level_zero/tools/source/sysman/global_operations/global_operations_imp.h" #include "level_zero/tools/source/sysman/global_operations/linux/os_global_operations_imp.h" namespace L0 { namespace ult { const std::string vendorIntel("Intel(R) Corporation"); const std::string unknown("unknown"); const std::string intelPciId("0x8086"); const std::string deviceDir("device"); const std::string subsystemVendorFile("device/subsystem_vendor"); const std::string driverFile("device/driver"); const std::string agamaVersionFile("/sys/module/i915/agama_version"); const std::string srcVersionFile("/sys/module/i915/srcversion"); const std::string functionLevelReset("device/reset"); const std::string clientsDir("clients"); constexpr uint64_t pid1 = 1711u; constexpr uint64_t pid2 = 1722u; constexpr uint64_t pid3 = 1723u; constexpr uint64_t pid4 = 1733u; constexpr uint64_t pid6 = 1744u; constexpr uint64_t pid7 = 1755u; const std::string bPid4 = "<1733>"; constexpr uint64_t engineTimeSpent = 123456u; const std::string clientId1("4"); const std::string clientId2("5"); const std::string clientId3("6"); const std::string clientId4("7"); const std::string clientId5("8"); const std::string clientId6("10"); const std::string clientId7("11"); const std::string clientId8("12"); const std::string clientId9("13"); const std::string engine0("0"); const std::string engine1("1"); const std::string engine2("2"); const std::string engine3("3"); const std::string engine6("6"); const std::string driverVersion("5.0.0-37-generic SMP mod_unload"); const std::string srcVersion("5.0.0-37"); const std::string ueventWedgedFile("/var/lib/libze_intel_gpu/wedged_file"); const std::string mockFunctionResetPath("/MOCK_FUNCTION_LEVEL_RESET_PATH"); const std::string mockDeviceDir("/MOCK_DEVICE_DIR"); const std::string mockDeviceName("/MOCK_DEVICE_NAME"); struct GlobalOperationsEngineHandleContext : public EngineHandleContext { GlobalOperationsEngineHandleContext(OsSysman *pOsSysman) : EngineHandleContext(pOsSysman) {} }; template <> struct Mock : public GlobalOperationsEngineHandleContext { void initMock() {} Mock(OsSysman *pOsSysman) : GlobalOperationsEngineHandleContext(pOsSysman) {} MOCK_METHOD(void, init, (), (override)); }; struct GlobalOperationsRasHandleContext : public RasHandleContext { GlobalOperationsRasHandleContext(OsSysman *pOsSysman) : RasHandleContext(pOsSysman) {} }; template <> struct Mock : public GlobalOperationsRasHandleContext { void initMock(std::vector &deviceHandles) {} Mock(OsSysman *pOsSysman) : GlobalOperationsRasHandleContext(pOsSysman) {} MOCK_METHOD(void, init, (std::vector & deviceHandles), (override)); }; struct GlobalOperationsDiagnosticsHandleContext : public DiagnosticsHandleContext { GlobalOperationsDiagnosticsHandleContext(OsSysman *pOsSysman) : DiagnosticsHandleContext(pOsSysman) {} }; template <> struct Mock : public GlobalOperationsDiagnosticsHandleContext { void initMock(std::vector &deviceHandles) {} Mock(OsSysman *pOsSysman) : GlobalOperationsDiagnosticsHandleContext(pOsSysman) {} MOCK_METHOD(void, init, (std::vector & deviceHandles), (override)); }; struct GlobalOperationsFirmwareHandleContext : public FirmwareHandleContext { GlobalOperationsFirmwareHandleContext(OsSysman *pOsSysman) : FirmwareHandleContext(pOsSysman) {} }; template <> struct Mock : public GlobalOperationsFirmwareHandleContext { void initMock() {} Mock(OsSysman *pOsSysman) : GlobalOperationsFirmwareHandleContext(pOsSysman) {} MOCK_METHOD(void, init, (), (override)); }; class GlobalOperationsSysfsAccess : public SysfsAccess {}; template <> struct Mock : public GlobalOperationsSysfsAccess { ze_result_t getRealPathVal(const std::string file, std::string &val) { if (file.compare(functionLevelReset) == 0) { val = mockFunctionResetPath; } else if (file.compare(deviceDir) == 0) { val = mockDeviceDir; } else { return ZE_RESULT_ERROR_NOT_AVAILABLE; } return ZE_RESULT_SUCCESS; } ze_result_t getValString(const std::string file, std::string &val) { if (file.compare(subsystemVendorFile) == 0) { val = "0x8086"; } else if (file.compare("clients/8/pid") == 0) { val = bPid4; } else { return ZE_RESULT_ERROR_NOT_AVAILABLE; } return ZE_RESULT_SUCCESS; } ze_result_t getFalseValString(const std::string file, std::string &val) { if (file.compare(subsystemVendorFile) == 0) { val = "0xa086"; } else { return ZE_RESULT_ERROR_NOT_AVAILABLE; } return ZE_RESULT_SUCCESS; } ze_result_t getValUnsignedLong(const std::string file, uint64_t &val) { if ((file.compare("clients/4/pid") == 0) || (file.compare("clients/5/pid") == 0)) { val = pid1; } else if (file.compare("clients/6/pid") == 0) { val = pid2; } else if (file.compare("clients/7/pid") == 0) { val = pid3; } else if (file.compare("clients/10/pid") == 0) { val = pid6; } else if (file.compare("clients/11/pid") == 0) { val = pid7; } else if (file.compare("clients/12/pid") == 0) { val = pid7; } else if (file.compare("clients/13/pid") == 0) { val = pid7; } else if ((file.compare("clients/4/busy/0") == 0) || (file.compare("clients/4/busy/3") == 0) || (file.compare("clients/5/busy/1") == 0) || (file.compare("clients/6/busy/0") == 0) || (file.compare("clients/8/busy/1") == 0) || (file.compare("clients/8/busy/0") == 0) || (file.compare("clients/13/busy/6") == 0)) { val = engineTimeSpent; } else if ((file.compare("clients/4/busy/1") == 0) || (file.compare("clients/4/busy/2") == 0) || (file.compare("clients/5/busy/0") == 0) || (file.compare("clients/5/busy/2") == 0) || (file.compare("clients/7/busy/0") == 0) || (file.compare("clients/7/busy/2") == 0) || (file.compare("clients/5/busy/3") == 0) || (file.compare("clients/6/busy/1") == 0) || (file.compare("clients/6/busy/2") == 0) || (file.compare("clients/6/busy/3") == 0) || (file.compare("clients/8/busy/2") == 0) || (file.compare("clients/8/busy/3") == 0)) { val = 0; } else if ((file.compare("clients/4/total_device_memory_buffer_objects/created_bytes") == 0) || (file.compare("clients/5/total_device_memory_buffer_objects/created_bytes") == 0) || (file.compare("clients/6/total_device_memory_buffer_objects/created_bytes") == 0) || (file.compare("clients/8/total_device_memory_buffer_objects/created_bytes") == 0) || (file.compare("clients/10/total_device_memory_buffer_objects/created_bytes") == 0)) { val = 1024; } else if ((file.compare("clients/4/total_device_memory_buffer_objects/imported_bytes") == 0) || (file.compare("clients/5/total_device_memory_buffer_objects/imported_bytes") == 0) || (file.compare("clients/6/total_device_memory_buffer_objects/imported_bytes") == 0) || (file.compare("clients/8/total_device_memory_buffer_objects/imported_bytes") == 0) || (file.compare("clients/10/total_device_memory_buffer_objects/imported_bytes") == 0)) { val = 512; } else if (file.compare("clients/7/total_device_memory_buffer_objects/created_bytes") == 0) { return ZE_RESULT_ERROR_UNKNOWN; } else if (file.compare("clients/7/total_device_memory_buffer_objects/imported_bytes") == 0) { return ZE_RESULT_ERROR_NOT_AVAILABLE; } else if (file.compare("clients/13/total_device_memory_buffer_objects/imported_bytes") == 0) { return ZE_RESULT_ERROR_UNKNOWN; } else { return ZE_RESULT_ERROR_NOT_AVAILABLE; } return ZE_RESULT_SUCCESS; } ze_result_t getValUnsignedLongCreatedBytesSuccess(const std::string file, uint64_t &val) { if ((file.compare("clients/4/pid") == 0) || (file.compare("clients/5/pid") == 0)) { val = pid1; } else if (file.compare("clients/6/pid") == 0) { val = pid2; } else if ((file.compare("clients/4/busy/0") == 0) || (file.compare("clients/4/busy/3") == 0) || (file.compare("clients/5/busy/1") == 0) || (file.compare("clients/6/busy/0") == 0) || (file.compare("clients/8/busy/1") == 0) || (file.compare("clients/8/busy/0") == 0)) { val = engineTimeSpent; } else if ((file.compare("clients/4/busy/1") == 0) || (file.compare("clients/4/busy/2") == 0) || (file.compare("clients/5/busy/0") == 0) || (file.compare("clients/5/busy/2") == 0) || (file.compare("clients/7/busy/0") == 0) || (file.compare("clients/7/busy/2") == 0) || (file.compare("clients/5/busy/3") == 0) || (file.compare("clients/6/busy/1") == 0) || (file.compare("clients/6/busy/2") == 0) || (file.compare("clients/6/busy/3") == 0) || (file.compare("clients/8/busy/2") == 0) || (file.compare("clients/8/busy/3") == 0)) { val = 0; } else if ((file.compare("clients/4/total_device_memory_buffer_objects/created_bytes") == 0) || (file.compare("clients/5/total_device_memory_buffer_objects/created_bytes") == 0) || (file.compare("clients/6/total_device_memory_buffer_objects/created_bytes") == 0) || (file.compare("clients/8/total_device_memory_buffer_objects/created_bytes") == 0) || (file.compare("clients/7/total_device_memory_buffer_objects/created_bytes") == 0)) { val = 1024; } else if ((file.compare("clients/4/total_device_memory_buffer_objects/imported_bytes") == 0) || (file.compare("clients/5/total_device_memory_buffer_objects/imported_bytes") == 0) || (file.compare("clients/6/total_device_memory_buffer_objects/imported_bytes") == 0) || (file.compare("clients/8/total_device_memory_buffer_objects/imported_bytes") == 0)) { val = 512; } else if (file.compare("clients/7/total_device_memory_buffer_objects/imported_bytes") == 0) { return ZE_RESULT_ERROR_NOT_AVAILABLE; } else { return ZE_RESULT_ERROR_NOT_AVAILABLE; } return ZE_RESULT_SUCCESS; } ze_result_t getScannedDir4Entries(const std::string path, std::vector &list) { if (path.compare(clientsDir) == 0) { list.push_back(clientId1); list.push_back(clientId2); list.push_back(clientId3); list.push_back(clientId4); list.push_back(clientId5); list.push_back(clientId6); } else if ((path.compare("clients/4/busy") == 0) || (path.compare("clients/5/busy") == 0) || (path.compare("clients/6/busy") == 0) || (path.compare("clients/7/busy") == 0) || (path.compare("clients/8/busy") == 0)) { list.push_back(engine0); list.push_back(engine1); list.push_back(engine2); list.push_back(engine3); } else { return ZE_RESULT_ERROR_NOT_AVAILABLE; } return ZE_RESULT_SUCCESS; } ze_result_t getScannedDirEntries(const std::string path, std::vector &list) { if (path.compare(clientsDir) == 0) { list.push_back(clientId1); list.push_back(clientId2); list.push_back(clientId3); list.push_back(clientId5); list.push_back(clientId6); list.push_back(clientId7); } else if ((path.compare("clients/4/busy") == 0) || (path.compare("clients/5/busy") == 0) || (path.compare("clients/6/busy") == 0) || (path.compare("clients/8/busy") == 0)) { list.push_back(engine0); list.push_back(engine1); list.push_back(engine2); list.push_back(engine3); } else { return ZE_RESULT_ERROR_NOT_AVAILABLE; } return ZE_RESULT_SUCCESS; } ze_result_t getScannedDirPidEntires(const std::string path, std::vector &list) { if (path.compare(clientsDir) == 0) { list.push_back(clientId8); } else if (path.compare("clients/12/busy") == 0) { return ZE_RESULT_ERROR_UNKNOWN; } return ZE_RESULT_SUCCESS; } ze_result_t getScannedDirPidEntiresForClients(const std::string path, std::vector &list) { if (path.compare(clientsDir) == 0) { list.push_back(clientId9); } else if (path.compare("clients/13/busy") == 0) { list.push_back(engine6); } return ZE_RESULT_SUCCESS; } bool mockIsMyDeviceFile(const std::string dev) { if (dev.compare(mockDeviceName) == 0) { return true; } return false; } Mock() = default; MOCK_METHOD(ze_result_t, read, (const std::string file, std::string &val), (override)); MOCK_METHOD(ze_result_t, read, (const std::string file, uint64_t &val), (override)); MOCK_METHOD(ze_result_t, scanDirEntries, (const std::string path, std::vector &list), (override)); MOCK_METHOD(ze_result_t, getRealPath, (const std::string path, std::string &val), (override)); MOCK_METHOD(ze_result_t, bindDevice, (const std::string device), (override)); MOCK_METHOD(ze_result_t, unbindDevice, (const std::string device), (override)); MOCK_METHOD(bool, fileExists, (const std::string file), (override)); MOCK_METHOD(bool, isMyDeviceFile, (const std::string dev), (override)); ADDMETHOD_NOBASE(isRootUser, bool, true, ()); }; class GlobalOperationsProcfsAccess : public ProcfsAccess {}; template <> struct Mock : public GlobalOperationsProcfsAccess { const ::pid_t extraPid = 4; const int extraFd = 5; std::vector<::pid_t> pidList = {1, 2, 3}; std::vector fdList = {0, 1, 2}; ::pid_t ourDevicePid = 0; int ourDeviceFd = 0; ze_result_t mockProcessListDeviceUnused(std::vector<::pid_t> &list) { list = pidList; return ZE_RESULT_SUCCESS; } ze_result_t mockProcessListDeviceInUse(std::vector<::pid_t> &list) { list = pidList; if (ourDevicePid) { list.push_back(ourDevicePid); } return ZE_RESULT_SUCCESS; } ::pid_t getMockMyProcessId() { return ::getpid(); } ze_result_t getMockFileDescriptors(const ::pid_t pid, std::vector &list) { // Give every process 3 file descriptors // Except the device that MOCK has the device open. Give it one extra. list.clear(); list = fdList; if (ourDevicePid == pid) { list.push_back(ourDeviceFd); } return ZE_RESULT_SUCCESS; } ze_result_t getMockFileDescriptorsFailure(const ::pid_t pid, std::vector &list) { //return failure to verify the error condition check list.clear(); return ZE_RESULT_ERROR_UNKNOWN; } ze_result_t getMockFileName(const ::pid_t pid, const int fd, std::string &val) { if (pid == ourDevicePid && fd == ourDeviceFd) { val = mockDeviceName; } else { // return fake filenames for other file descriptors val = std::string("/FILENAME") + std::to_string(fd); } return ZE_RESULT_SUCCESS; } bool mockIsAlive(const ::pid_t pid) { if (pid == ourDevicePid) { return true; } return false; } void mockKill(const ::pid_t pid) { ourDevicePid = 0; } Mock() = default; MOCK_METHOD(ze_result_t, listProcesses, (std::vector<::pid_t> & list), (override)); MOCK_METHOD(::pid_t, myProcessId, (), (override)); MOCK_METHOD(ze_result_t, getFileDescriptors, (const ::pid_t pid, std::vector &list), (override)); MOCK_METHOD(ze_result_t, getFileName, (const ::pid_t pid, const int fd, std::string &val), (override)); MOCK_METHOD(bool, isAlive, (const ::pid_t pid), (override)); MOCK_METHOD(void, kill, (const ::pid_t pid), (override)); }; class GlobalOperationsFsAccess : public FsAccess {}; template <> struct Mock : public GlobalOperationsFsAccess { ze_result_t getValAgamaFile(const std::string file, std::string &val) { if (file.compare(agamaVersionFile) == 0) { val = driverVersion; } else { return ZE_RESULT_ERROR_NOT_AVAILABLE; } return ZE_RESULT_SUCCESS; } ze_result_t getValSrcFile(const std::string file, std::string &val) { if (file.compare(srcVersionFile) == 0) { val = srcVersion; } else { return ZE_RESULT_ERROR_NOT_AVAILABLE; } return ZE_RESULT_SUCCESS; } ze_result_t getValWedgedFileTrue(const std::string file, uint32_t &val) { if (file.compare(ueventWedgedFile) == 0) { val = 1; } else { return ZE_RESULT_ERROR_NOT_AVAILABLE; } return ZE_RESULT_SUCCESS; } ze_result_t getValWedgedFileFalse(const std::string file, uint32_t &val) { if (file.compare(ueventWedgedFile) == 0) { val = 0; } else { return ZE_RESULT_ERROR_NOT_AVAILABLE; } return ZE_RESULT_SUCCESS; } Mock() = default; MOCK_METHOD(ze_result_t, read, (const std::string file, std::string &val), (override)); MOCK_METHOD(ze_result_t, read, (const std::string file, uint32_t &val), (override)); MOCK_METHOD(ze_result_t, write, (const std::string file, const std::string val), (override)); MOCK_METHOD(ze_result_t, canWrite, (const std::string file), (override)); }; class FirmwareInterface : public FirmwareUtil {}; template <> struct Mock : public FirmwareUtil { ze_result_t mockFwDeviceInit(void) { return ZE_RESULT_SUCCESS; } ze_result_t mockFwDeviceInitFail(void) { return ZE_RESULT_ERROR_UNKNOWN; } ze_result_t mockIfrReturnTrue(bool &ifrStatus) { ifrStatus = true; return ZE_RESULT_SUCCESS; } ze_result_t mockIfrReturnFail(bool &ifrStatus) { return ZE_RESULT_ERROR_UNKNOWN; } ze_result_t mockIfrReturnFalse(bool &ifrStatus) { ifrStatus = false; return ZE_RESULT_SUCCESS; } Mock() = default; MOCK_METHOD(ze_result_t, fwDeviceInit, (), (override)); MOCK_METHOD(ze_result_t, getFirstDevice, (igsc_device_info * info), (override)); MOCK_METHOD(ze_result_t, getFwVersion, (std::string fwType, std::string &firmwareVersion), (override)); MOCK_METHOD(ze_result_t, flashFirmware, (std::string fwType, void *pImage, uint32_t size), (override)); MOCK_METHOD(ze_result_t, fwIfrApplied, (bool &ifrStatus), (override)); ADDMETHOD_NOBASE(fwSupportedDiagTests, ze_result_t, ZE_RESULT_SUCCESS, (std::vector & supportedDiagTests)); ADDMETHOD_NOBASE(fwRunDiagTests, ze_result_t, ZE_RESULT_SUCCESS, (std::string & osDiagType, zes_diag_result_t *pResult)); ADDMETHOD_NOBASE(fwGetMemoryErrorCount, ze_result_t, ZE_RESULT_SUCCESS, (zes_ras_error_type_t category, uint32_t subDeviceCount, uint32_t subDeviceId, uint64_t &count)); ADDMETHOD_NOBASE_VOIDRETURN(getDeviceSupportedFwTypes, (std::vector & fwTypes)); }; class PublicLinuxGlobalOperationsImp : public L0::LinuxGlobalOperationsImp { public: using LinuxGlobalOperationsImp::pLinuxSysmanImp; using LinuxGlobalOperationsImp::resetTimeout; }; } // namespace ult } // namespace L0 test_zes_global_operations.cpp000066400000000000000000001136511422164147700417560ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/global_operations/linux/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/ult_hw_config.h" #include "level_zero/tools/test/unit_tests/sources/sysman/linux/mock_sysman_fixture.h" #include "mock_global_operations.h" extern bool sysmanUltsEnable; using ::testing::Matcher; namespace L0 { namespace ult { constexpr uint64_t memSize1 = 2048; constexpr uint64_t memSize2 = 1024; constexpr uint64_t memSize4 = 1024; constexpr uint64_t memSize6 = 1024; constexpr uint64_t memSize7 = 0; constexpr uint64_t sharedMemSize1 = 1024; constexpr uint64_t sharedMemSize2 = 512; constexpr uint64_t sharedMemSize4 = 512; constexpr uint64_t sharedMemSize6 = 512; constexpr uint64_t sharedMemSize7 = 0; // In mock function getValUnsignedLong, we have set the engines used as 0, 3 and 1. // Hence, expecting 28 as engine field because 28 in binary would be 00011100 // This indicates bit number 2, 3 and 4 are set, thus this indicates, this process // used ZES_ENGINE_TYPE_FLAG_3D, ZES_ENGINE_TYPE_FLAG_MEDIA and ZES_ENGINE_TYPE_FLAG_DMA // Their corresponding mapping with i915 engine numbers are 0, 3 and 1 respectively. constexpr int64_t engines1 = 28u; // 4 in binary 0100, as 2nd bit is set, hence it indicates, process used ZES_ENGINE_TYPE_FLAG_3D // Corresponding i915 mapped value in mocked getValUnsignedLong() is 0. constexpr int64_t engines2 = 4u; constexpr int64_t engines4 = 20u; constexpr int64_t engines6 = 1u; constexpr int64_t engines7 = 1u; constexpr uint32_t totalProcessStates = 5u; // Three process States for three pids constexpr uint32_t totalProcessStatesForFaultyClients = 3u; class SysmanGlobalOperationsFixture : public SysmanDeviceFixture { protected: std::unique_ptr> pEngineHandleContext; std::unique_ptr> pDiagnosticsHandleContext; std::unique_ptr> pFirmwareHandleContext; std::unique_ptr> pRasHandleContext; std::unique_ptr> pSysfsAccess; std::unique_ptr> pProcfsAccess; std::unique_ptr> pFsAccess; EngineHandleContext *pEngineHandleContextOld = nullptr; DiagnosticsHandleContext *pDiagnosticsHandleContextOld = nullptr; FirmwareHandleContext *pFirmwareHandleContextOld = nullptr; RasHandleContext *pRasHandleContextOld = nullptr; SysfsAccess *pSysfsAccessOld = nullptr; ProcfsAccess *pProcfsAccessOld = nullptr; FsAccess *pFsAccessOld = nullptr; OsGlobalOperations *pOsGlobalOperationsPrev = nullptr; L0::GlobalOperations *pGlobalOperationsPrev = nullptr; L0::GlobalOperationsImp *pGlobalOperationsImp; std::string expectedModelName; void SetUp() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanDeviceFixture::SetUp(); pEngineHandleContextOld = pSysmanDeviceImp->pEngineHandleContext; pDiagnosticsHandleContextOld = pSysmanDeviceImp->pDiagnosticsHandleContext; pFirmwareHandleContextOld = pSysmanDeviceImp->pFirmwareHandleContext; pRasHandleContextOld = pSysmanDeviceImp->pRasHandleContext; pSysfsAccessOld = pLinuxSysmanImp->pSysfsAccess; pProcfsAccessOld = pLinuxSysmanImp->pProcfsAccess; pFsAccessOld = pLinuxSysmanImp->pFsAccess; pEngineHandleContext = std::make_unique>>(pOsSysman); pSysfsAccess = std::make_unique>>(); pProcfsAccess = std::make_unique>>(); pFsAccess = std::make_unique>>(); pDiagnosticsHandleContext = std::make_unique>>(pOsSysman); pFirmwareHandleContext = std::make_unique>>(pOsSysman); pRasHandleContext = std::make_unique>>(pOsSysman); pSysmanDeviceImp->pEngineHandleContext = pEngineHandleContext.get(); pLinuxSysmanImp->pSysfsAccess = pSysfsAccess.get(); pLinuxSysmanImp->pProcfsAccess = pProcfsAccess.get(); pLinuxSysmanImp->pFsAccess = pFsAccess.get(); pSysmanDeviceImp->pDiagnosticsHandleContext = pDiagnosticsHandleContext.get(); pSysmanDeviceImp->pFirmwareHandleContext = pFirmwareHandleContext.get(); pSysmanDeviceImp->pRasHandleContext = pRasHandleContext.get(); ON_CALL(*pRasHandleContext.get(), init(_)) .WillByDefault(::testing::Invoke(pRasHandleContext.get(), &Mock::initMock)); ON_CALL(*pEngineHandleContext.get(), init()) .WillByDefault(::testing::Invoke(pEngineHandleContext.get(), &Mock::initMock)); ON_CALL(*pDiagnosticsHandleContext.get(), init(_)) .WillByDefault(::testing::Invoke(pDiagnosticsHandleContext.get(), &Mock::initMock)); ON_CALL(*pFirmwareHandleContext.get(), init()) .WillByDefault(::testing::Invoke(pFirmwareHandleContext.get(), &Mock::initMock)); ON_CALL(*pSysfsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getValString)); ON_CALL(*pSysfsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getValUnsignedLong)); ON_CALL(*pSysfsAccess.get(), scanDirEntries(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getScannedDirEntries)); ON_CALL(*pFsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(pFsAccess.get(), &Mock::getValAgamaFile)); ON_CALL(*pSysfsAccess.get(), getRealPath(_, Matcher(_))) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getRealPathVal)); ON_CALL(*pFsAccess.get(), canWrite(Matcher(mockFunctionResetPath))) .WillByDefault(::testing::Return(ZE_RESULT_SUCCESS)); ON_CALL(*pProcfsAccess.get(), myProcessId()) .WillByDefault(::testing::Invoke(pProcfsAccess.get(), &Mock::getMockMyProcessId)); ON_CALL(*pProcfsAccess.get(), isAlive(_)) .WillByDefault(::testing::Invoke(pProcfsAccess.get(), &Mock::mockIsAlive)); ON_CALL(*pSysfsAccess.get(), isMyDeviceFile(_)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::mockIsMyDeviceFile)); ON_CALL(*pProcfsAccess.get(), getFileName(_, _, Matcher(_))) .WillByDefault(::testing::Invoke(pProcfsAccess.get(), &Mock::getMockFileName)); ON_CALL(*pProcfsAccess.get(), getFileDescriptors(_, Matcher &>(_))) .WillByDefault(::testing::Invoke(pProcfsAccess.get(), &Mock::getMockFileDescriptors)); ON_CALL(*pSysfsAccess.get(), unbindDevice(_)) .WillByDefault(::testing::Return(ZE_RESULT_SUCCESS)); ON_CALL(*pFsAccess.get(), write(mockFunctionResetPath, std::string("1"))) .WillByDefault(::testing::Return(ZE_RESULT_SUCCESS)); pGlobalOperationsImp = static_cast(pSysmanDeviceImp->pGlobalOperations); pOsGlobalOperationsPrev = pGlobalOperationsImp->pOsGlobalOperations; pGlobalOperationsImp->pOsGlobalOperations = nullptr; expectedModelName = neoDevice->getDeviceName(neoDevice->getHardwareInfo()); pGlobalOperationsImp->init(); } void TearDown() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } if (nullptr != pGlobalOperationsImp->pOsGlobalOperations) { delete pGlobalOperationsImp->pOsGlobalOperations; } pGlobalOperationsImp->pOsGlobalOperations = pOsGlobalOperationsPrev; pGlobalOperationsImp = nullptr; pSysmanDeviceImp->pEngineHandleContext = pEngineHandleContextOld; pSysmanDeviceImp->pDiagnosticsHandleContext = pDiagnosticsHandleContextOld; pSysmanDeviceImp->pFirmwareHandleContext = pFirmwareHandleContextOld; pSysmanDeviceImp->pRasHandleContext = pRasHandleContextOld; SysmanDeviceFixture::TearDown(); pLinuxSysmanImp->pSysfsAccess = pSysfsAccessOld; pLinuxSysmanImp->pProcfsAccess = pProcfsAccessOld; pLinuxSysmanImp->pFsAccess = pFsAccessOld; } }; class SysmanGlobalOperationsIntegratedFixture : public SysmanGlobalOperationsFixture { void SetUp() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanGlobalOperationsFixture::SetUp(); auto mockHardwareInfo = neoDevice->getHardwareInfo(); mockHardwareInfo.capabilityTable.isIntegratedDevice = true; neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]->setHwInfo(&mockHardwareInfo); } void TearDown() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanGlobalOperationsFixture::TearDown(); } }; TEST_F(SysmanGlobalOperationsFixture, GivenValidDeviceHandleWhenCallingzetGlobalOperationsGetPropertiesThenVerifyzetGlobalOperationsGetPropertiesCallSucceeds) { zes_device_properties_t properties; ze_result_t result = zesDeviceGetProperties(device, &properties); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(properties.numSubdevices, 0u); EXPECT_TRUE(0 == unknown.compare(properties.boardNumber)); EXPECT_TRUE(0 == vendorIntel.compare(properties.brandName)); EXPECT_TRUE(0 == driverVersion.compare(properties.driverVersion)); EXPECT_TRUE(0 == expectedModelName.compare(properties.modelName)); EXPECT_TRUE(0 == unknown.compare(properties.serialNumber)); EXPECT_TRUE(0 == vendorIntel.compare(properties.vendorName)); } TEST_F(SysmanGlobalOperationsFixture, GivenValidDeviceHandleWhenCallingzesDeviceGetPropertiesForCheckingDriverVersionWhenAgmaFileIsAbsentThenVerifyzesDeviceGetPropertiesCallSucceeds) { zes_device_properties_t properties; std::string test; test = srcVersion; ON_CALL(*pFsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(pFsAccess.get(), &Mock::getValSrcFile)); pGlobalOperationsImp->init(); ze_result_t result = zesDeviceGetProperties(device, &properties); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_TRUE(0 == test.compare(properties.driverVersion)); } TEST_F(SysmanGlobalOperationsFixture, GivenValidDeviceHandleWhenCallingzesDeviceGetPropertiesForCheckingDriverVersionWhenAgmaFileAndSrcFileIsAbsentThenVerifyzesDeviceGetPropertiesCallSucceeds) { zes_device_properties_t properties; ON_CALL(*pFsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Return(ZE_RESULT_ERROR_NOT_AVAILABLE)); pGlobalOperationsImp->init(); ze_result_t result = zesDeviceGetProperties(device, &properties); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_TRUE(0 == unknown.compare(properties.driverVersion)); } TEST_F(SysmanGlobalOperationsFixture, GivenValidDeviceHandleWhenCallingzesDeviceGetPropertiesForCheckingDriverVersionWhenDriverVersionFileIsNotAvaliableThenVerifyzesDeviceGetPropertiesCallSucceeds) { zes_device_properties_t properties; ON_CALL(*pFsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Return(ZE_RESULT_ERROR_NOT_AVAILABLE)); pGlobalOperationsImp->init(); ze_result_t result = zesDeviceGetProperties(device, &properties); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_TRUE(0 == unknown.compare(properties.driverVersion)); } TEST_F(SysmanGlobalOperationsFixture, GivenValidDeviceHandleWhenCallingzesDeviceGetPropertiesForCheckingDriverVersionWhenDriverVersionFileReadFailsThenVerifyzesDeviceGetPropertiesCallSucceeds) { zes_device_properties_t properties; ON_CALL(*pFsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Return(ZE_RESULT_ERROR_UNKNOWN)); pGlobalOperationsImp->init(); ze_result_t result = zesDeviceGetProperties(device, &properties); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_TRUE(0 == unknown.compare(properties.driverVersion)); } TEST_F(SysmanGlobalOperationsFixture, GivenValidDeviceHandleWhenCallingzesDeviceGetPropertiesForCheckingDevicePropertiesWhenVendorIsUnKnownThenVerifyzesDeviceGetPropertiesCallSucceeds) { ON_CALL(*pSysfsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getFalseValString)); neoDevice->deviceInfo.vendorId = 1806; //Unknown Vendor id pGlobalOperationsImp->init(); zes_device_properties_t properties; ze_result_t result = zesDeviceGetProperties(device, &properties); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_TRUE(0 == unknown.compare(properties.vendorName)); EXPECT_TRUE(0 == unknown.compare(properties.brandName)); } TEST_F(SysmanGlobalOperationsFixture, GivenValidDeviceHandleWhenCallingzesDeviceGetPropertiesForCheckingDriverVersionWhenAccessingAgamaFileOrSrcFileGotPermissionDeniedThenVerifyzesDeviceGetPropertiesCallSucceeds) { zes_device_properties_t properties; ON_CALL(*pFsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Return(ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS)); pGlobalOperationsImp->init(); ze_result_t result = zesDeviceGetProperties(device, &properties); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_TRUE(0 == unknown.compare(properties.driverVersion)); } TEST_F(SysmanGlobalOperationsFixture, GivenValidDeviceHandleWhileRetrievingInformationAboutHostProcessesUsingDeviceThenSuccessIsReturned) { uint32_t count = 0; ASSERT_EQ(ZE_RESULT_SUCCESS, zesDeviceProcessesGetState(device, &count, nullptr)); EXPECT_EQ(count, totalProcessStates); std::vector processes(count); ASSERT_EQ(ZE_RESULT_SUCCESS, zesDeviceProcessesGetState(device, &count, processes.data())); EXPECT_EQ(processes[0].processId, pid1); EXPECT_EQ(processes[0].engines, engines1); EXPECT_EQ(processes[0].memSize, memSize1); EXPECT_EQ(processes[0].sharedSize, sharedMemSize1); EXPECT_EQ(processes[1].processId, pid2); EXPECT_EQ(processes[1].engines, engines2); EXPECT_EQ(processes[1].memSize, memSize2); EXPECT_EQ(processes[1].sharedSize, sharedMemSize2); EXPECT_EQ(processes[2].processId, pid4); EXPECT_EQ(processes[2].engines, engines4); EXPECT_EQ(processes[2].memSize, memSize4); EXPECT_EQ(processes[2].sharedSize, sharedMemSize4); EXPECT_EQ(processes[3].processId, pid6); EXPECT_EQ(processes[3].engines, engines6); EXPECT_EQ(processes[3].memSize, memSize6); EXPECT_EQ(processes[3].sharedSize, sharedMemSize6); EXPECT_EQ(processes[4].processId, pid7); EXPECT_EQ(processes[4].engines, engines7); EXPECT_EQ(processes[4].memSize, memSize7); EXPECT_EQ(processes[4].sharedSize, sharedMemSize7); } TEST_F(SysmanGlobalOperationsFixture, GivenValidDeviceHandleWhileRetrievingInformationAboutHostProcessesUsingDeviceThenSuccessIsReturnedEvenwithFaultyClient) { uint32_t count = 0; ON_CALL(*pSysfsAccess.get(), scanDirEntries(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getScannedDir4Entries)); ON_CALL(*pSysfsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getValUnsignedLongCreatedBytesSuccess)); ASSERT_EQ(ZE_RESULT_SUCCESS, zesDeviceProcessesGetState(device, &count, nullptr)); EXPECT_EQ(count, totalProcessStatesForFaultyClients); std::vector processes(count); ASSERT_EQ(ZE_RESULT_SUCCESS, zesDeviceProcessesGetState(device, &count, processes.data())); EXPECT_EQ(processes[0].processId, pid1); EXPECT_EQ(processes[0].engines, engines1); EXPECT_EQ(processes[0].memSize, memSize1); EXPECT_EQ(processes[0].sharedSize, sharedMemSize1); EXPECT_EQ(processes[1].processId, pid2); EXPECT_EQ(processes[1].engines, engines2); EXPECT_EQ(processes[1].memSize, memSize2); EXPECT_EQ(processes[1].sharedSize, sharedMemSize2); EXPECT_EQ(processes[2].processId, pid4); EXPECT_EQ(processes[2].engines, engines4); EXPECT_EQ(processes[2].memSize, memSize4); EXPECT_EQ(processes[2].sharedSize, sharedMemSize4); } TEST_F(SysmanGlobalOperationsFixture, GivenValidDeviceHandleWhileCountValueIsProvidedThenFailureIsReturned) { uint32_t count = 2; ASSERT_EQ(ZE_RESULT_ERROR_INVALID_SIZE, zesDeviceProcessesGetState(device, &count, nullptr)); } TEST_F(SysmanGlobalOperationsFixture, GivenValidDeviceHandleWhileRetrievingInformationAboutHostProcessesUsingFaultyClientFileThenFailureIsReturned) { uint32_t count = 0; ON_CALL(*pSysfsAccess.get(), scanDirEntries(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getScannedDir4Entries)); ASSERT_EQ(ZE_RESULT_ERROR_UNKNOWN, zesDeviceProcessesGetState(device, &count, nullptr)); } TEST_F(SysmanGlobalOperationsFixture, GivenValidDeviceHandleWhileRetrievingInformationAboutHostProcessesUsingNullDirThenFailureIsReturned) { uint32_t count = 0; EXPECT_CALL(*pSysfsAccess.get(), scanDirEntries(_, _)) .WillOnce(::testing::Return(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE)); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesDeviceProcessesGetState(device, &count, nullptr)); } TEST_F(SysmanGlobalOperationsFixture, GivenValidDeviceHandleWhileRetrievingInformationAboutHostProcessesUsingDeviceThenFailureIsReturnedEvenwithFaultyClient) { uint32_t count = 0; ON_CALL(*pSysfsAccess.get(), scanDirEntries(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getScannedDirPidEntires)); EXPECT_CALL(*pSysfsAccess.get(), read(_, Matcher(_))) .WillOnce(::testing::Return(ZE_RESULT_ERROR_UNKNOWN)); EXPECT_CALL(*pSysfsAccess.get(), read(_, Matcher(_))) .WillOnce(::testing::Return(ZE_RESULT_ERROR_UNKNOWN)); EXPECT_EQ(ZE_RESULT_ERROR_UNKNOWN, zesDeviceProcessesGetState(device, &count, nullptr)); } TEST_F(SysmanGlobalOperationsFixture, GivenValidDeviceHandleWhileRetrievingInformationAboutHostProcessesUsingBusyDirForEnginesReadThenFailureIsReturnedEvenwithFaultyClient) { uint32_t count = 0; ON_CALL(*pSysfsAccess.get(), scanDirEntries(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getScannedDirPidEntires)); EXPECT_EQ(ZE_RESULT_ERROR_UNKNOWN, zesDeviceProcessesGetState(device, &count, nullptr)); } TEST_F(SysmanGlobalOperationsFixture, GivenValidDeviceHandleWhileRetrievingInformationAboutHostProcessesUsingBusyDirForEnginesThenFailureIsReturnedEvenwithFaultyClient) { uint32_t count = 0; ON_CALL(*pSysfsAccess.get(), scanDirEntries(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getScannedDir4Entries)); EXPECT_CALL(*pSysfsAccess.get(), read(_, Matcher(_))) .WillOnce(::testing::Invoke(pSysfsAccess.get(), &Mock::getValUnsignedLongCreatedBytesSuccess)) .WillOnce(::testing::Return(ZE_RESULT_ERROR_UNKNOWN)); EXPECT_EQ(ZE_RESULT_ERROR_UNKNOWN, zesDeviceProcessesGetState(device, &count, nullptr)); } TEST_F(SysmanGlobalOperationsFixture, GivenValidDeviceHandleWhileReadingInvalidBufferObjectsThenErrorIsReturned) { uint32_t count = 0; ON_CALL(*pSysfsAccess.get(), scanDirEntries(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getScannedDirPidEntiresForClients)); EXPECT_EQ(ZE_RESULT_ERROR_UNKNOWN, zesDeviceProcessesGetState(device, &count, nullptr)); } TEST_F(SysmanGlobalOperationsFixture, GivenValidDeviceHandleWhileReadingExistingMemoryFileThenCorrectValueIsReturned) { uint64_t memSize = 0; EXPECT_EQ(ZE_RESULT_SUCCESS, pSysfsAccess->read("clients/6/total_device_memory_buffer_objects/created_bytes", memSize)); EXPECT_EQ(memSize2, memSize); } TEST_F(SysmanGlobalOperationsFixture, GivenValidDeviceHandleWhileReadingInvalidMemoryFileThenErrorIsReturned) { uint64_t memSize = 0; ON_CALL(*pSysfsAccess.get(), scanDirEntries(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getScannedDir4Entries)); EXPECT_EQ(ZE_RESULT_ERROR_NOT_AVAILABLE, pSysfsAccess->read("clients/7/total_device_memory_buffer_objects/imported_bytes", memSize)); } TEST_F(SysmanGlobalOperationsFixture, GivenValidDeviceHandleWhileReadingNonExistingFileThenErrorIsReturned) { std::vector engineEntries; EXPECT_EQ(ZE_RESULT_ERROR_NOT_AVAILABLE, pSysfsAccess->scanDirEntries("clients/7/busy", engineEntries)); } TEST_F(SysmanGlobalOperationsFixture, GivenDeviceIsWedgedWhenCallingGetDeviceStateThenZesResetReasonFlagWedgedIsReturned) { ON_CALL(*pFsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(pFsAccess.get(), &Mock::getValWedgedFileTrue)); zes_device_state_t deviceState; EXPECT_EQ(ZE_RESULT_SUCCESS, zesDeviceGetState(device, &deviceState)); EXPECT_EQ(ZES_RESET_REASON_FLAG_WEDGED, deviceState.reset); } TEST_F(SysmanGlobalOperationsFixture, GivenDeviceIsNotWedgedWhenCallingGetDeviceStateThenZeroIsReturned) { ON_CALL(*pFsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(pFsAccess.get(), &Mock::getValWedgedFileFalse)); zes_device_state_t deviceState; EXPECT_EQ(ZE_RESULT_SUCCESS, zesDeviceGetState(device, &deviceState)); EXPECT_EQ(0u, deviceState.reset); } TEST_F(SysmanGlobalOperationsIntegratedFixture, GivenPermissionDeniedWhenCallingGetDeviceStateThenZeResultErrorInsufficientPermissionsIsReturned) { ON_CALL(*pFsAccess.get(), canWrite(Matcher(mockFunctionResetPath))) .WillByDefault(::testing::Return(ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS)); pGlobalOperationsImp->init(); ze_result_t result = zesDeviceReset(device, true); EXPECT_EQ(ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS, result); } TEST_F(SysmanGlobalOperationsIntegratedFixture, GivenDeviceInUseWhenCallingResetThenZeResultErrorHandleObjectInUseIsReturned) { pProcfsAccess->ourDevicePid = pProcfsAccess->extraPid; pProcfsAccess->ourDeviceFd = pProcfsAccess->extraFd; ON_CALL(*pProcfsAccess.get(), listProcesses(Matcher &>(_))) .WillByDefault(::testing::Invoke(pProcfsAccess.get(), &Mock::mockProcessListDeviceInUse)); pGlobalOperationsImp->init(); ze_result_t result = zesDeviceReset(device, false); EXPECT_EQ(ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE, result); } TEST_F(SysmanGlobalOperationsIntegratedFixture, GivenDeviceNotInUseWhenCallingResetThenSuccessIsReturned) { // Pretend we have the device open pProcfsAccess->ourDevicePid = getpid(); pProcfsAccess->ourDeviceFd = ::open("/dev/null", 0); // The first time we get the process list, include our own process, that has the file open // Reset should close the file (we verify after reset). On subsequent calls, return // the process list without our process EXPECT_CALL(*pProcfsAccess.get(), listProcesses(Matcher &>(_))) .WillOnce(::testing::Invoke(pProcfsAccess.get(), &Mock::mockProcessListDeviceInUse)) .WillRepeatedly(::testing::Invoke(pProcfsAccess.get(), &Mock::mockProcessListDeviceUnused)); EXPECT_CALL(*pSysfsAccess.get(), bindDevice(_)) .WillOnce(::testing::Return(ZE_RESULT_SUCCESS)); pGlobalOperationsImp->init(); ze_result_t result = zesDeviceReset(device, false); EXPECT_EQ(ZE_RESULT_SUCCESS, result); // Check that reset closed the device // If the device is already closed, then close will fail with errno of EBADF EXPECT_NE(0, ::close(pProcfsAccess->ourDevicePid)); EXPECT_EQ(errno, EBADF); } TEST_F(SysmanGlobalOperationsIntegratedFixture, GivenForceTrueAndDeviceInUseWhenCallingResetThenSuccessIsReturned) { // Pretend another process has the device open pProcfsAccess->ourDevicePid = getpid() + 1; // make sure it isn't our process id pProcfsAccess->ourDeviceFd = pProcfsAccess->extraFd; ON_CALL(*pProcfsAccess.get(), listProcesses(Matcher &>(_))) .WillByDefault(::testing::Invoke(pProcfsAccess.get(), &Mock::mockProcessListDeviceInUse)); EXPECT_CALL(*pProcfsAccess.get(), kill(pProcfsAccess->ourDevicePid)) .WillOnce(::testing::Invoke(pProcfsAccess.get(), &Mock::mockKill)); EXPECT_CALL(*pSysfsAccess.get(), bindDevice(_)) .WillOnce(::testing::Return(ZE_RESULT_SUCCESS)); pGlobalOperationsImp->init(); ze_result_t result = zesDeviceReset(device, true); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(SysmanGlobalOperationsIntegratedFixture, GivenProcessStartsMidResetWhenCallingResetThenSuccessIsReturned) { // Pretend another process has the device open pProcfsAccess->ourDevicePid = getpid() + 1; // make sure it isn't our process id pProcfsAccess->ourDeviceFd = pProcfsAccess->extraFd; // Return process list without open fd on first call, but with open fd on subsequent calls EXPECT_CALL(*pProcfsAccess.get(), listProcesses(Matcher &>(_))) .WillOnce(::testing::Invoke(pProcfsAccess.get(), &Mock::mockProcessListDeviceUnused)) .WillRepeatedly(::testing::Invoke(pProcfsAccess.get(), &Mock::mockProcessListDeviceInUse)); EXPECT_CALL(*pProcfsAccess.get(), kill(pProcfsAccess->ourDevicePid)) .WillOnce(::testing::Invoke(pProcfsAccess.get(), &Mock::mockKill)); EXPECT_CALL(*pSysfsAccess.get(), bindDevice(_)) .WillOnce(::testing::Return(ZE_RESULT_SUCCESS)); pGlobalOperationsImp->init(); ze_result_t result = zesDeviceReset(device, false); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(SysmanGlobalOperationsIntegratedFixture, GivenProcessStartsMidResetWhenCallingResetAndIfNeoDeviceCreateFailsThenErrorIsReturned) { // Pretend another process has the device open pProcfsAccess->ourDevicePid = getpid() + 1; // make sure it isn't our process id pProcfsAccess->ourDeviceFd = pProcfsAccess->extraFd; // Return process list without open fd on first call, but with open fd on subsequent calls EXPECT_CALL(*pProcfsAccess.get(), listProcesses(Matcher &>(_))) .WillOnce(::testing::Invoke(pProcfsAccess.get(), &Mock::mockProcessListDeviceUnused)) .WillRepeatedly(::testing::Invoke(pProcfsAccess.get(), &Mock::mockProcessListDeviceInUse)); EXPECT_CALL(*pProcfsAccess.get(), kill(pProcfsAccess->ourDevicePid)) .WillOnce(::testing::Invoke(pProcfsAccess.get(), &Mock::mockKill)); EXPECT_CALL(*pSysfsAccess.get(), bindDevice(_)) .WillOnce(::testing::Return(ZE_RESULT_SUCCESS)); pGlobalOperationsImp->init(); VariableBackup backup{&ultHwConfig}; ultHwConfig.mockedPrepareDeviceEnvironmentsFuncResult = false; ze_result_t result = zesDeviceReset(device, false); EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, result); } TEST_F(SysmanGlobalOperationsIntegratedFixture, GivenProcessStartsMidResetWhenCallingResetAndBindFailsThenFailureIsReturned) { // Pretend another process has the device open pProcfsAccess->ourDevicePid = getpid() + 1; // make sure it isn't our process id pProcfsAccess->ourDeviceFd = pProcfsAccess->extraFd; // Return process list without open fd on first call, but with open fd on subsequent calls EXPECT_CALL(*pProcfsAccess.get(), listProcesses(Matcher &>(_))) .WillOnce(::testing::Invoke(pProcfsAccess.get(), &Mock::mockProcessListDeviceUnused)) .WillRepeatedly(::testing::Invoke(pProcfsAccess.get(), &Mock::mockProcessListDeviceInUse)); EXPECT_CALL(*pProcfsAccess.get(), kill(pProcfsAccess->ourDevicePid)) .WillOnce(::testing::Invoke(pProcfsAccess.get(), &Mock::mockKill)); EXPECT_CALL(*pSysfsAccess.get(), bindDevice(_)) .WillOnce(::testing::Return(ZE_RESULT_ERROR_UNKNOWN)); pGlobalOperationsImp->init(); ze_result_t result = zesDeviceReset(device, false); EXPECT_EQ(ZE_RESULT_ERROR_UNKNOWN, result); } TEST_F(SysmanGlobalOperationsIntegratedFixture, GivenDeviceInUseWhenCallingResetAndListProcessesFailsThenZeResultErrorIsReturned) { pProcfsAccess->ourDevicePid = pProcfsAccess->extraPid; pProcfsAccess->ourDeviceFd = pProcfsAccess->extraFd; EXPECT_CALL(*pProcfsAccess.get(), listProcesses(Matcher &>(_))) .WillOnce(::testing::Return(ZE_RESULT_ERROR_NOT_AVAILABLE)); pGlobalOperationsImp->init(); ze_result_t result = zesDeviceReset(device, false); EXPECT_EQ(ZE_RESULT_ERROR_NOT_AVAILABLE, result); } TEST_F(SysmanGlobalOperationsIntegratedFixture, GivenProcessStartsMidResetWhenListProcessesFailsAfterUnbindThenFailureIsReturned) { // Pretend another process has the device open pProcfsAccess->ourDevicePid = getpid() + 1; // make sure it isn't our process id pProcfsAccess->ourDeviceFd = pProcfsAccess->extraFd; // Return process list without open fd on first call EXPECT_CALL(*pProcfsAccess.get(), listProcesses(Matcher &>(_))) .WillOnce(::testing::Invoke(pProcfsAccess.get(), &Mock::mockProcessListDeviceUnused)) .WillOnce(::testing::Return(ZE_RESULT_ERROR_NOT_AVAILABLE)); ON_CALL(*pProcfsAccess.get(), kill(pProcfsAccess->ourDevicePid)) .WillByDefault(::testing::Invoke(pProcfsAccess.get(), &Mock::mockKill)); pGlobalOperationsImp->init(); ze_result_t result = zesDeviceReset(device, false); EXPECT_EQ(ZE_RESULT_ERROR_NOT_AVAILABLE, result); } TEST_F(SysmanGlobalOperationsIntegratedFixture, GivenProcessStartsMidResetWhenCallingResetAndWriteFailsAfterUnbindThenFailureIsReturned) { // Pretend another process has the device open pProcfsAccess->ourDevicePid = getpid() + 1; // make sure it isn't our process id pProcfsAccess->ourDeviceFd = pProcfsAccess->extraFd; // Return process list without open fd on first call, but with open fd on subsequent calls EXPECT_CALL(*pProcfsAccess.get(), listProcesses(Matcher &>(_))) .WillOnce(::testing::Invoke(pProcfsAccess.get(), &Mock::mockProcessListDeviceUnused)) .WillRepeatedly(::testing::Invoke(pProcfsAccess.get(), &Mock::mockProcessListDeviceInUse)); EXPECT_CALL(*pProcfsAccess.get(), kill(pProcfsAccess->ourDevicePid)) .WillOnce(::testing::Invoke(pProcfsAccess.get(), &Mock::mockKill)); EXPECT_CALL(*pFsAccess.get(), write(mockFunctionResetPath, std::string("1"))) .WillOnce(::testing::Return(ZE_RESULT_ERROR_UNKNOWN)); pGlobalOperationsImp->init(); ze_result_t result = zesDeviceReset(device, false); EXPECT_EQ(ZE_RESULT_ERROR_UNKNOWN, result); } TEST_F(SysmanGlobalOperationsIntegratedFixture, GivenProcessStartsMidResetWhenCallingResetAndUnbindFailsThenFailureIsReturned) { // Pretend another process has the device open pProcfsAccess->ourDevicePid = getpid() + 1; // make sure it isn't our process id pProcfsAccess->ourDeviceFd = pProcfsAccess->extraFd; // Return process list without open fd on first call, but with open fd on subsequent calls ON_CALL(*pProcfsAccess.get(), listProcesses(Matcher &>(_))) .WillByDefault(::testing::Invoke(pProcfsAccess.get(), &Mock::mockProcessListDeviceUnused)); ON_CALL(*pProcfsAccess.get(), kill(pProcfsAccess->ourDevicePid)) .WillByDefault(::testing::Invoke(pProcfsAccess.get(), &Mock::mockKill)); EXPECT_CALL(*pSysfsAccess.get(), unbindDevice(_)) .WillOnce(::testing::Return(ZE_RESULT_ERROR_UNKNOWN)); pGlobalOperationsImp->init(); ze_result_t result = zesDeviceReset(device, false); EXPECT_EQ(ZE_RESULT_ERROR_UNKNOWN, result); } TEST_F(SysmanGlobalOperationsIntegratedFixture, GivenProcessStartsMidResetWhenCallingResetAndGetFileNameFailsThenSuccessIsReturned) { // Pretend another process has the device open pProcfsAccess->ourDevicePid = getpid() + 1; // make sure it isn't our process id pProcfsAccess->ourDeviceFd = pProcfsAccess->extraFd; // Return process list without open fd on first call, but with open fd on subsequent calls EXPECT_CALL(*pProcfsAccess.get(), listProcesses(Matcher &>(_))) .WillOnce(::testing::Invoke(pProcfsAccess.get(), &Mock::mockProcessListDeviceUnused)) .WillRepeatedly(::testing::Invoke(pProcfsAccess.get(), &Mock::mockProcessListDeviceInUse)); ON_CALL(*pProcfsAccess.get(), getFileName(_, _, Matcher(_))) .WillByDefault(::testing::Return(ZE_RESULT_ERROR_UNKNOWN)); ON_CALL(*pProcfsAccess.get(), kill(pProcfsAccess->ourDevicePid)) .WillByDefault(::testing::Invoke(pProcfsAccess.get(), &Mock::mockKill)); EXPECT_CALL(*pSysfsAccess.get(), bindDevice(_)) .WillOnce(::testing::Return(ZE_RESULT_SUCCESS)); pGlobalOperationsImp->init(); ze_result_t result = zesDeviceReset(device, false); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(SysmanGlobalOperationsIntegratedFixture, GivenProcessWontDieWhenCallingResetThenZeResultErrorHandleObjectInUseErrorIsReturned) { // Pretend another process has the device open pProcfsAccess->ourDevicePid = getpid() + 1; // make sure it isn't our process id pProcfsAccess->ourDeviceFd = pProcfsAccess->extraFd; static_cast(pGlobalOperationsImp->pOsGlobalOperations)->resetTimeout = 0; // timeout immediate // Return process list without open fd on first call, but with open fd on subsequent calls EXPECT_CALL(*pProcfsAccess.get(), listProcesses(Matcher &>(_))) .WillOnce(::testing::Invoke(pProcfsAccess.get(), &Mock::mockProcessListDeviceUnused)) .WillRepeatedly(::testing::Invoke(pProcfsAccess.get(), &Mock::mockProcessListDeviceInUse)); EXPECT_CALL(*pProcfsAccess.get(), kill(pProcfsAccess->ourDevicePid)) .Times(1); EXPECT_CALL(*pFsAccess.get(), write(mockFunctionResetPath, std::string("1"))) .Times(0); EXPECT_CALL(*pSysfsAccess.get(), bindDevice(_)) .Times(0); pGlobalOperationsImp->init(); ze_result_t result = zesDeviceReset(device, false); EXPECT_EQ(ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE, result); } TEST_F(SysmanGlobalOperationsIntegratedFixture, GivenProcessStartsMidResetWhenCallingResetAndGetFileDescriptorsFailsThenSuccessIsReturned) { // Pretend another process has the device open pProcfsAccess->ourDevicePid = getpid() + 1; // make sure it isn't our process id pProcfsAccess->ourDeviceFd = pProcfsAccess->extraFd; // Return process list without open fd on first call, but with open fd on subsequent calls ON_CALL(*pProcfsAccess.get(), listProcesses(Matcher &>(_))) .WillByDefault(::testing::Invoke(pProcfsAccess.get(), &Mock::mockProcessListDeviceInUse)); EXPECT_CALL(*pProcfsAccess.get(), getFileDescriptors(_, Matcher &>(_))) .WillOnce(::testing::Invoke(pProcfsAccess.get(), &Mock::getMockFileDescriptorsFailure)) .WillRepeatedly(::testing::Invoke(pProcfsAccess.get(), &Mock::getMockFileDescriptors)); ON_CALL(*pProcfsAccess.get(), getFileName(_, _, Matcher(_))) .WillByDefault(::testing::Return(ZE_RESULT_ERROR_UNKNOWN)); ON_CALL(*pProcfsAccess.get(), kill(pProcfsAccess->ourDevicePid)) .WillByDefault(::testing::Invoke(pProcfsAccess.get(), &Mock::mockKill)); EXPECT_CALL(*pSysfsAccess.get(), bindDevice(_)) .WillOnce(::testing::Return(ZE_RESULT_SUCCESS)); pGlobalOperationsImp->init(); ze_result_t result = zesDeviceReset(device, false); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST(SysmanGlobalOperationsTest, GivenValidDevicePciPathWhenPreparingDeviceEnvironmentThenPrepareDeviceEnvironmentReturnsTrue) { auto device1 = std::unique_ptr{MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())}; std::string pciPath1 = "0000:00:02.0"; EXPECT_TRUE(DeviceFactory::prepareDeviceEnvironment(*device1->getExecutionEnvironment(), pciPath1, 0u)); } TEST(SysmanGlobalOperationsTest, GivenValidDevicePciPathWhoseFileDescriptorOpenFailedThenPrepareDeviceEnvironmentReturnsFalse) { auto device2 = std::unique_ptr{MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())}; std::string pciPath2 = "0000:00:03.0"; EXPECT_FALSE(DeviceFactory::prepareDeviceEnvironment(*device2->getExecutionEnvironment(), pciPath2, 0u)); } TEST(SysmanGlobalOperationsTest, GivenNotExisitingPciPathWhenPrepareDeviceEnvironmentIsCalledThenFalseIsReturned) { auto device3 = std::unique_ptr{MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())}; std::string pciPath3 = "0000:00:04.0"; EXPECT_FALSE(DeviceFactory::prepareDeviceEnvironment(*device3->getExecutionEnvironment(), pciPath3, 0u)); } } // namespace ult } // namespace L0 test_zes_global_operations_helper.cpp000066400000000000000000000001251422164147700433040ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/global_operations/linux/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ windows/000077500000000000000000000000001422164147700341535ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/global_operationsCMakeLists.txt000066400000000000000000000005521422164147700367150ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/global_operations/windows# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_zes_global_operations.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_global_operations.h ) endif() mock_global_operations.h000066400000000000000000000025741422164147700410500ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/global_operations/windows/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/test/unit_tests/mock.h" #include "level_zero/tools/source/sysman/global_operations/global_operations_imp.h" #include "level_zero/tools/source/sysman/global_operations/windows/os_global_operations_imp.h" #include "level_zero/tools/test/unit_tests/sources/sysman/windows/mock_kmd_sys_manager.h" namespace L0 { namespace ult { class GlobalOpsKmdSysManager : public Mock {}; template <> struct Mock : public GlobalOpsKmdSysManager { void setGlobalOperationsProperty(KmdSysman::GfxSysmanReqHeaderIn *pRequest, KmdSysman::GfxSysmanReqHeaderOut *pResponse) override { uint8_t *pBuffer = reinterpret_cast(pRequest); pBuffer += sizeof(KmdSysman::GfxSysmanReqHeaderIn); switch (pRequest->inRequestId) { case KmdSysman::Requests::GlobalOperation::TriggerDeviceLevelReset: { uint32_t *value = reinterpret_cast(pBuffer); *value = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; } break; default: { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } break; } } Mock() = default; ~Mock() = default; }; } // namespace ult } // namespace L0 test_zes_global_operations.cpp000066400000000000000000000054331422164147700423070ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/global_operations/windows/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/global_operations/windows/os_global_operations_imp.h" #include "level_zero/tools/test/unit_tests/sources/sysman/global_operations/windows/mock_global_operations.h" #include "level_zero/tools/test/unit_tests/sources/sysman/windows/mock_sysman_fixture.h" extern bool sysmanUltsEnable; namespace L0 { namespace ult { class SysmanGlobalOperationsFixture : public SysmanDeviceFixture { protected: OsGlobalOperations *pOsGlobalOperationsPrev = nullptr; L0::GlobalOperations *pGlobalOperationsPrev = nullptr; L0::GlobalOperationsImp *pGlobalOperationsImp; Mock *pKmdSysManager = nullptr; KmdSysManager *pOriginalKmdSysManager = nullptr; void SetUp() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanDeviceFixture::SetUp(); } void init(bool allowSetCalls) { pKmdSysManager = new Mock; pKmdSysManager->allowSetCalls = allowSetCalls; EXPECT_CALL(*pKmdSysManager, escape(_, _, _, _, _)) .WillRepeatedly(::testing::Invoke(pKmdSysManager, &Mock::mock_escape)); pOriginalKmdSysManager = pWddmSysmanImp->pKmdSysManager; pWddmSysmanImp->pKmdSysManager = pKmdSysManager; pGlobalOperationsImp = static_cast(pSysmanDeviceImp->pGlobalOperations); pOsGlobalOperationsPrev = pGlobalOperationsImp->pOsGlobalOperations; pGlobalOperationsImp->pOsGlobalOperations = nullptr; pGlobalOperationsImp->init(); } void TearDown() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } if (nullptr != pGlobalOperationsImp->pOsGlobalOperations) { delete pGlobalOperationsImp->pOsGlobalOperations; } pGlobalOperationsImp->pOsGlobalOperations = pOsGlobalOperationsPrev; pGlobalOperationsImp = nullptr; SysmanDeviceFixture::TearDown(); pWddmSysmanImp->pKmdSysManager = pOriginalKmdSysManager; if (pKmdSysManager != nullptr) { delete pKmdSysManager; pKmdSysManager = nullptr; } } }; TEST_F(SysmanGlobalOperationsFixture, GivenForceTrueAndDeviceInUseWhenCallingResetThenSuccessIsReturned) { init(true); pGlobalOperationsImp->init(); ze_result_t result = zesDeviceReset(device, true); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(SysmanGlobalOperationsFixture, GivenProcessStartsMidResetWhenCallingResetThenSuccessIsReturned) { init(false); pGlobalOperationsImp->init(); ze_result_t result = zesDeviceReset(device, true); EXPECT_EQ(ZE_RESULT_ERROR_NOT_AVAILABLE, result); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/linux/000077500000000000000000000000001422164147700301745ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/linux/CMakeLists.txt000066400000000000000000000012711422164147700327350ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(igsc_FOUND) set(L0_SRCS_TOOLS_SYSMAN_LINUX_FIRMWARE_UTIL_TEST ${CMAKE_CURRENT_SOURCE_DIR}/test_fw_util.cpp ) endif() if(UNIX) target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_sysman.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_sysman_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_procfs_access_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_sysfs_access_fixture.h ${L0_SRCS_TOOLS_SYSMAN_LINUX_FIRMWARE_UTIL_TEST} ) endif() add_subdirectories() mock_fw_util_fixture.h000066400000000000000000000036431422164147700345240ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/linux/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/mock_method_macros.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/tools/source/sysman/sysman.h" #include "sysman/linux/os_sysman_imp.h" using ::testing::_; using namespace NEO; namespace L0 { namespace ult { class LinuxFwUtilInterface : public FirmwareUtil {}; struct MockLinuxFwUtilInterface : public LinuxFwUtilInterface { MockLinuxFwUtilInterface() = default; ADDMETHOD_NOBASE(fwDeviceInit, ze_result_t, ZE_RESULT_SUCCESS, ()); ADDMETHOD_NOBASE(getFirstDevice, ze_result_t, ZE_RESULT_SUCCESS, (igsc_device_info * info)); ADDMETHOD_NOBASE(getFwVersion, ze_result_t, ZE_RESULT_SUCCESS, (std::string fwType, std::string &firmwareVersion)); ADDMETHOD_NOBASE(flashFirmware, ze_result_t, ZE_RESULT_SUCCESS, (std::string fwType, void *pImage, uint32_t size)); ADDMETHOD_NOBASE(fwIfrApplied, ze_result_t, ZE_RESULT_SUCCESS, (bool &ifrStatus)); ADDMETHOD_NOBASE(fwSupportedDiagTests, ze_result_t, ZE_RESULT_SUCCESS, (std::vector & supportedDiagTests)); ADDMETHOD_NOBASE(fwRunDiagTests, ze_result_t, ZE_RESULT_SUCCESS, (std::string & osDiagType, zes_diag_result_t *pResult)); ADDMETHOD_NOBASE(fwGetMemoryErrorCount, ze_result_t, ZE_RESULT_SUCCESS, (zes_ras_error_type_t category, uint32_t subDeviceCount, uint32_t subDeviceId, uint64_t &count)); ADDMETHOD_NOBASE_VOIDRETURN(getDeviceSupportedFwTypes, (std::vector & fwTypes)); }; class LinuxOsLibrary : public OsLibrary {}; struct MockOsLibrary : public LinuxOsLibrary { public: virtual ~MockOsLibrary() = default; void *getProcAddress(const std::string &procName) override { return nullptr; } bool isLoaded() override { return false; } }; } // namespace ult } // namespace L0 mock_procfs_access_fixture.h000066400000000000000000000022571422164147700356700ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/linux/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/tools/source/sysman/sysman.h" #include "sysman/linux/os_sysman_imp.h" using ::testing::_; using ::testing::NiceMock; using namespace NEO; namespace L0 { namespace ult { const std::string mockedDeviceName("/MOCK_DEVICE_NAME"); class LinuxProcfsAccess : public ProcfsAccess {}; template <> struct Mock : public LinuxProcfsAccess { ::pid_t ourDevicePid = 0; int ourDeviceFd = 0; ze_result_t getMockFileName(const ::pid_t pid, const int fd, std::string &val) { if (pid == ourDevicePid && fd == ourDeviceFd) { val = mockedDeviceName; } else { // return fake filenames for other file descriptors val = std::string("/FILENAME") + std::to_string(fd); } return ZE_RESULT_SUCCESS; } Mock() = default; MOCK_METHOD(ze_result_t, getFileName, (const ::pid_t pid, const int fd, std::string &val), (override)); }; } // namespace ult } // namespace L0 mock_sysfs_access_fixture.h000066400000000000000000000015151422164147700355370ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/linux/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/tools/source/sysman/sysman.h" #include "sysman/linux/os_sysman_imp.h" using ::testing::_; using ::testing::NiceMock; using namespace NEO; namespace L0 { namespace ult { class LinuxSysfsAccess : public SysfsAccess {}; template <> struct Mock : public LinuxSysfsAccess { ze_result_t getRealPathVal(const std::string file, std::string &val) { val = "/random/path"; return ZE_RESULT_SUCCESS; } Mock() = default; MOCK_METHOD(ze_result_t, getRealPath, (const std::string path, std::string &val), (override)); }; } // namespace ult } // namespace L0 mock_sysman_fixture.h000066400000000000000000000151751422164147700343700ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/linux/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/os_interface/device_factory.h" #include "shared/source/os_interface/linux/drm_neo.h" #include "shared/source/os_interface/os_interface.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/tools/source/sysman/sysman.h" #include "level_zero/tools/test/unit_tests/sources/sysman/linux/mock_fw_util_fixture.h" #include "level_zero/tools/test/unit_tests/sources/sysman/linux/mock_procfs_access_fixture.h" #include "level_zero/tools/test/unit_tests/sources/sysman/linux/mock_sysfs_access_fixture.h" #include "sysman/linux/os_sysman_imp.h" extern bool sysmanUltsEnable; using ::testing::_; using ::testing::Matcher; using ::testing::NiceMock; using namespace NEO; namespace L0 { namespace ult { constexpr int mockFd = 0; class SysmanMockDrm : public Drm { public: SysmanMockDrm(RootDeviceEnvironment &rootDeviceEnvironment) : Drm(std::make_unique(mockFd, ""), rootDeviceEnvironment) { setupIoctlHelper(rootDeviceEnvironment.getHardwareInfo()->platform.eProductFamily); } }; class PublicLinuxSysmanImp : public L0::LinuxSysmanImp { public: using LinuxSysmanImp::mapOfSubDeviceIdToPmtObject; using LinuxSysmanImp::pDrm; using LinuxSysmanImp::pFsAccess; using LinuxSysmanImp::pFwUtilInterface; using LinuxSysmanImp::pPmuInterface; using LinuxSysmanImp::pProcfsAccess; using LinuxSysmanImp::pSysfsAccess; }; class SysmanDeviceFixture : public DeviceFixture, public ::testing::Test { public: Mock *pSysfsAccess = nullptr; Mock *pProcfsAccess = nullptr; MockLinuxFwUtilInterface *pFwUtilInterface = nullptr; void SetUp() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } DeviceFixture::SetUp(); neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]->osInterface = std::make_unique(); auto &osInterface = device->getOsInterface(); osInterface.setDriverModel(std::make_unique(const_cast(neoDevice->getRootDeviceEnvironment()))); setenv("ZES_ENABLE_SYSMAN", "1", 1); device->setSysmanHandle(new SysmanDeviceImp(device->toHandle())); pSysmanDevice = device->getSysmanHandle(); pSysmanDeviceImp = static_cast(pSysmanDevice); pOsSysman = pSysmanDeviceImp->pOsSysman; pLinuxSysmanImp = static_cast(pOsSysman); pFwUtilInterface = new MockLinuxFwUtilInterface(); pSysfsAccess = new NiceMock>; pProcfsAccess = new NiceMock>; pLinuxSysmanImp->pFwUtilInterface = pFwUtilInterface; pLinuxSysmanImp->pSysfsAccess = pSysfsAccess; pLinuxSysmanImp->pProcfsAccess = pProcfsAccess; ON_CALL(*pSysfsAccess, getRealPath(_, Matcher(_))) .WillByDefault(::testing::Invoke(pSysfsAccess, &Mock::getRealPathVal)); ON_CALL(*pProcfsAccess, getFileName(_, _, Matcher(_))) .WillByDefault(::testing::Invoke(pProcfsAccess, &Mock::getMockFileName)); pSysmanDeviceImp->init(); } void TearDown() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } DeviceFixture::TearDown(); unsetenv("ZES_ENABLE_SYSMAN"); } SysmanDevice *pSysmanDevice = nullptr; SysmanDeviceImp *pSysmanDeviceImp = nullptr; OsSysman *pOsSysman = nullptr; PublicLinuxSysmanImp *pLinuxSysmanImp = nullptr; }; class SysmanMultiDeviceFixture : public MultiDeviceFixture, public ::testing::Test { public: Mock *pSysfsAccess = nullptr; Mock *pProcfsAccess = nullptr; MockLinuxFwUtilInterface *pFwUtilInterface = nullptr; void SetUp() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } MultiDeviceFixture::SetUp(); device = driverHandle->devices[0]; neoDevice = device->getNEODevice(); neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]->osInterface = std::make_unique(); auto &osInterface = device->getOsInterface(); osInterface.setDriverModel(std::make_unique(const_cast(neoDevice->getRootDeviceEnvironment()))); setenv("ZES_ENABLE_SYSMAN", "1", 1); device->setSysmanHandle(new SysmanDeviceImp(device->toHandle())); pSysmanDevice = device->getSysmanHandle(); for (auto &subDevice : static_cast(device)->subDevices) { static_cast(subDevice)->setSysmanHandle(pSysmanDevice); } pSysmanDeviceImp = static_cast(pSysmanDevice); pOsSysman = pSysmanDeviceImp->pOsSysman; pLinuxSysmanImp = static_cast(pOsSysman); pFwUtilInterface = new MockLinuxFwUtilInterface(); pSysfsAccess = new NiceMock>; pProcfsAccess = new NiceMock>; pLinuxSysmanImp->pFwUtilInterface = pFwUtilInterface; pLinuxSysmanImp->pSysfsAccess = pSysfsAccess; pLinuxSysmanImp->pProcfsAccess = pProcfsAccess; ON_CALL(*pSysfsAccess, getRealPath(_, Matcher(_))) .WillByDefault(::testing::Invoke(pSysfsAccess, &Mock::getRealPathVal)); ON_CALL(*pProcfsAccess, getFileName(_, _, Matcher(_))) .WillByDefault(::testing::Invoke(pProcfsAccess, &Mock::getMockFileName)); pSysmanDeviceImp->init(); subDeviceCount = numSubDevices; } void TearDown() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } unsetenv("ZES_ENABLE_SYSMAN"); MultiDeviceFixture::TearDown(); } SysmanDevice *pSysmanDevice = nullptr; SysmanDeviceImp *pSysmanDeviceImp = nullptr; OsSysman *pOsSysman = nullptr; PublicLinuxSysmanImp *pLinuxSysmanImp = nullptr; NEO::Device *neoDevice = nullptr; L0::Device *device = nullptr; uint32_t subDeviceCount = 0u; }; class PublicFsAccess : public L0::FsAccess { public: using FsAccess::accessSyscall; using FsAccess::statSyscall; }; class PublicSysfsAccess : public L0::SysfsAccess { public: using SysfsAccess::accessSyscall; }; } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/linux/nl_api/000077500000000000000000000000001422164147700314365ustar00rootroot00000000000000CMakeLists.txt000066400000000000000000000006021422164147700341150ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/linux/nl_api# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(UNIX) if(LIBGENL_FOUND) target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_sysman_nl_api.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_nl_api.cpp ) endif() endif() mock_nl_api.cpp000066400000000000000000000204201422164147700343340ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/linux/nl_api/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "mock_nl_api.h" namespace L0 { namespace ult { extern "C" { static int mockCallback(struct nl_msg *msg, void *arg) { return NL_OK; } } struct nl_sock MockNlDll::mockNlSock; struct nl_msg MockNlDll::mockNlMsg; struct nlmsghdr MockNlDll::mockNlmsghdr; struct nlattr MockNlDll::mockNlattr; struct nlattr MockNlDll::mockNextNlattr; struct genl_ops MockNlDll::mockGenlOps; nl_recvmsg_msg_cb_t MockNlDll::mockCb = mockCallback; extern "C" { int mockGenlConnect(struct nl_sock *sock) { EXPECT_EQ(&MockNlDll::mockNlSock, sock); return 0; } int mockGenlCtrlResolve(struct nl_sock *sock, const char *name) { EXPECT_EQ(&MockNlDll::mockNlSock, sock); EXPECT_FALSE(strcmp(MockNlDll::mockFamilyName, name)); return 0; } int mockGenlHandleMsg(struct nl_msg *msg, void *arg) { EXPECT_EQ(&MockNlDll::mockNlMsg, msg); EXPECT_EQ(MockNlDll::mockArgP, arg); return 0; } void *mockGenlmsgPut(struct nl_msg *msg, uint32_t port, uint32_t seq, int family, int hdrlen, int flags, uint8_t cmd, uint8_t version) { EXPECT_EQ(&MockNlDll::mockNlMsg, msg); EXPECT_EQ(MockNlDll::mockFamilyId, family); EXPECT_EQ(MockNlDll::mockPort, port); EXPECT_EQ(MockNlDll::mockSeq, seq); EXPECT_EQ(MockNlDll::mockCmd, cmd); EXPECT_EQ(MockNlDll::mockHdrlen, hdrlen); EXPECT_EQ(MockNlDll::mockFlags, flags); EXPECT_EQ(MockNlDll::mockVersion, version); return msg; } int mockGenlOpsResolve(struct nl_sock *sock, struct genl_ops *ops) { EXPECT_EQ(&MockNlDll::mockNlSock, sock); EXPECT_EQ(&MockNlDll::mockGenlOps, ops); return 0; } int mockGenlRegisterFamily(struct genl_ops *ops) { EXPECT_EQ(&MockNlDll::mockGenlOps, ops); return 0; } int mockGenlUnregisterFamily(struct genl_ops *ops) { EXPECT_EQ(&MockNlDll::mockGenlOps, ops); return 0; } struct nl_sock *mockNlSocketAlloc() { return &MockNlDll::mockNlSock; } void mockNlSocketDisableSeqCheck(struct nl_sock *sock) { EXPECT_EQ(&MockNlDll::mockNlSock, sock); return; } void mockNlSocketFree(struct nl_sock *sock) { EXPECT_EQ(&MockNlDll::mockNlSock, sock); return; } int mockNlSocketModifyCb(struct nl_sock *sock, enum nl_cb_type type, enum nl_cb_kind kind, nl_recvmsg_msg_cb_t cb, void *arg) { EXPECT_EQ(&MockNlDll::mockNlSock, sock); EXPECT_EQ(MockNlDll::mockCbType, type); EXPECT_EQ(MockNlDll::mockCbKind, kind); EXPECT_EQ(MockNlDll::mockCb, cb); EXPECT_EQ(MockNlDll::mockArgP, arg); return 0; } int mockNlRecvmsgsDefault(struct nl_sock *sock) { EXPECT_EQ(&MockNlDll::mockNlSock, sock); return 0; } int mockNlSendAuto(struct nl_sock *sock, struct nl_msg *msg) { EXPECT_EQ(&MockNlDll::mockNlSock, sock); EXPECT_EQ(&MockNlDll::mockNlMsg, msg); return 0; } void *mockNlaData(const struct nlattr *attr) { EXPECT_EQ(&MockNlDll::mockNlattr, attr); return &MockNlDll::mockNlattr; } uint32_t mockNlaGetU32(const struct nlattr *attr) { EXPECT_EQ(&MockNlDll::mockNlattr, attr); return MockNlDll::mockU32Val; } uint64_t mockNlaGetU64(const struct nlattr *attr) { EXPECT_EQ(&MockNlDll::mockNlattr, attr); return MockNlDll::mockU64Val; } uint8_t mockNlaGetU8(const struct nlattr *attr) { EXPECT_EQ(&MockNlDll::mockNlattr, attr); return MockNlDll::mockU8Val; } int mockNlaIsNested(const struct nlattr *attr) { EXPECT_EQ(&MockNlDll::mockNlattr, attr); return 0; } int mockNlaLen(const struct nlattr *attr) { EXPECT_EQ(&MockNlDll::mockNlattr, attr); return MockNlDll::mockAttrLen; } struct nlattr *mockNlaNext(const struct nlattr *attr, int *remaining) { EXPECT_EQ(&MockNlDll::mockNlattr, attr); EXPECT_EQ(MockNlDll::mockRemainBefore, *remaining); *remaining = MockNlDll::mockRemainAfter; return &MockNlDll::mockNextNlattr; } int mockNlaOk(const struct nlattr *attr, int remaining) { EXPECT_EQ(&MockNlDll::mockNlattr, attr); EXPECT_EQ(MockNlDll::mockRemainBefore, remaining); return 0; } int mockNlaPutU16(struct nl_msg *msg, int type, uint16_t data) { EXPECT_EQ(&MockNlDll::mockNlMsg, msg); EXPECT_EQ(MockNlDll::mockType, type); EXPECT_EQ(MockNlDll::mockU16Val, data); return 0; } int mockNlaPutU32(struct nl_msg *msg, int type, uint32_t data) { EXPECT_EQ(&MockNlDll::mockNlMsg, msg); EXPECT_EQ(MockNlDll::mockType, type); EXPECT_EQ(MockNlDll::mockU32Val, data); return 0; } int mockNlaPutU64(struct nl_msg *msg, int type, uint64_t data) { EXPECT_EQ(&MockNlDll::mockNlMsg, msg); EXPECT_EQ(MockNlDll::mockType, type); EXPECT_EQ(MockNlDll::mockU64Val, data); return 0; } int mockNlaPutU8(struct nl_msg *msg, int type, uint8_t data) { EXPECT_EQ(&MockNlDll::mockNlMsg, msg); EXPECT_EQ(MockNlDll::mockType, type); EXPECT_EQ(MockNlDll::mockU8Val, data); return 0; } int mockNlaType(const struct nlattr *attr) { EXPECT_EQ(&MockNlDll::mockNlattr, attr); return MockNlDll::mockType; } struct nl_msg *mockNlmsgAlloc() { return &MockNlDll::mockNlMsg; } void mockNlmsgFree(struct nl_msg *msg) { EXPECT_EQ(&MockNlDll::mockNlMsg, msg); return; } struct nlattr *mockNlmsgAttrdata(const struct nlmsghdr *hdr, int attr) { EXPECT_EQ(&MockNlDll::mockNlmsghdr, hdr); EXPECT_EQ(MockNlDll::mockAttr, attr); return &MockNlDll::mockNlattr; } int mockNlmsgAttrlen(const struct nlmsghdr *hdr, int attr) { EXPECT_EQ(&MockNlDll::mockNlmsghdr, hdr); EXPECT_EQ(MockNlDll::mockAttr, attr); return MockNlDll::mockAttrLen; } struct nlmsghdr *mockNlmsgHdr(struct nl_msg *msg) { EXPECT_EQ(&MockNlDll::mockNlMsg, msg); return &MockNlDll::mockNlmsghdr; } } MockNlDll::MockNlDll() { funcMap["genl_connect"] = reinterpret_cast(&mockGenlConnect); funcMap["genl_ctrl_resolve"] = reinterpret_cast(&mockGenlCtrlResolve); funcMap["genl_handle_msg"] = reinterpret_cast(&mockGenlHandleMsg); funcMap["genlmsg_put"] = reinterpret_cast(&mockGenlmsgPut); funcMap["genl_ops_resolve"] = reinterpret_cast(&mockGenlOpsResolve); funcMap["genl_register_family"] = reinterpret_cast(&mockGenlRegisterFamily); funcMap["genl_unregister_family"] = reinterpret_cast(&mockGenlUnregisterFamily); funcMap["nl_recvmsgs_default"] = reinterpret_cast(&mockNlRecvmsgsDefault); funcMap["nl_send_auto"] = reinterpret_cast(&mockNlSendAuto); funcMap["nl_socket_alloc"] = reinterpret_cast(&mockNlSocketAlloc); funcMap["nl_socket_disable_seq_check"] = reinterpret_cast(&mockNlSocketDisableSeqCheck); funcMap["nl_socket_free"] = reinterpret_cast(&mockNlSocketFree); funcMap["nl_socket_modify_cb"] = reinterpret_cast(&mockNlSocketModifyCb); funcMap["nla_data"] = reinterpret_cast(&mockNlaData); funcMap["nla_get_u32"] = reinterpret_cast(&mockNlaGetU32); funcMap["nla_get_u64"] = reinterpret_cast(&mockNlaGetU64); funcMap["nla_get_u8"] = reinterpret_cast(&mockNlaGetU8); funcMap["nla_is_nested"] = reinterpret_cast(&mockNlaIsNested); funcMap["nla_len"] = reinterpret_cast(&mockNlaLen); funcMap["nla_next"] = reinterpret_cast(&mockNlaNext); funcMap["nla_ok"] = reinterpret_cast(&mockNlaOk); funcMap["nla_put_u16"] = reinterpret_cast(&mockNlaPutU16); funcMap["nla_put_u32"] = reinterpret_cast(&mockNlaPutU32); funcMap["nla_put_u64"] = reinterpret_cast(&mockNlaPutU64); funcMap["nla_put_u8"] = reinterpret_cast(&mockNlaPutU8); funcMap["nla_type"] = reinterpret_cast(&mockNlaType); funcMap["nlmsg_alloc"] = reinterpret_cast(&mockNlmsgAlloc); funcMap["nlmsg_attrdata"] = reinterpret_cast(&mockNlmsgAttrdata); funcMap["nlmsg_attrlen"] = reinterpret_cast(&mockNlmsgAttrlen); funcMap["nlmsg_free"] = reinterpret_cast(&mockNlmsgFree); funcMap["nlmsg_hdr"] = reinterpret_cast(&mockNlmsgHdr); } void *MockNlDll::getProcAddress(const std::string &procName) { auto it = funcMap.find(procName); if (funcMap.end() == it) { return nullptr; } else { return it->second; } } void MockNlDll::deleteEntryPoint(const std::string &procName) { auto it = funcMap.find(procName); if (funcMap.end() != it) { funcMap.erase(it); } } } // namespace ult } // namespace L0 mock_nl_api.h000066400000000000000000000036631422164147700340130ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/linux/nl_api/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/tools/source/sysman/linux/nl_api/nl_api.h" // Define opaque types so variables can be allocated struct nl_sock { }; struct nl_msg { }; namespace L0 { namespace ult { class MockNlDll : public NEO::OsLibrary { public: MOCK_METHOD(bool, isLoaded, (), (override)); void *getProcAddress(const std::string &procName) override; void deleteEntryPoint(const std::string &procName); MockNlDll(); static struct nl_sock mockNlSock; static struct nl_msg mockNlMsg; static struct nlmsghdr mockNlmsghdr; static struct nlattr mockNlattr; static struct nlattr mockNextNlattr; static struct genl_ops mockGenlOps; static nl_recvmsg_msg_cb_t mockCb; constexpr static int mockFamilyId = 0x2020; constexpr static char mockFamilyName[] = "TestName"; constexpr static void *mockArgP = nullptr; constexpr static uint32_t mockPort = NL_AUTO_PID; constexpr static uint32_t mockSeq = NL_AUTO_SEQ; constexpr static int mockHdrlen = NLMSG_HDRLEN; constexpr static int mockFlags = 0; constexpr static uint8_t mockCmd = 1; constexpr static uint8_t mockVersion = 2; constexpr static int mockType = 3; constexpr static uint8_t mockU8Val = 0x7fU; constexpr static uint16_t mockU16Val = 0x7fffU; constexpr static uint32_t mockU32Val = 0x7fffffffU; constexpr static uint64_t mockU64Val = 0x7fffffffffffffffUL; constexpr static int mockAttr = 4; constexpr static int mockAttrLen = 8; constexpr static int mockRemainBefore = 20; constexpr static int mockRemainAfter = 16; constexpr static enum nl_cb_type mockCbType = NL_CB_VALID; constexpr static enum nl_cb_kind mockCbKind = NL_CB_CUSTOM; private: std::map funcMap; }; } // namespace ult } // namespace L0 test_sysman_nl_api.cpp000066400000000000000000000230261422164147700357610ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/linux/nl_api/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "mock_nl_api.h" extern bool sysmanUltsEnable; using ::testing::Invoke; using ::testing::Return; namespace L0 { namespace ult { class PublicNlApi : public NlApi { public: using NlApi::genlLibraryHandle; }; class SysmanNlApiFixture : public ::testing::Test { protected: PublicNlApi testNlApi; void SetUp() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } auto mockNlDll = std::make_unique(); testNlApi.genlLibraryHandle = std::move(mockNlDll); EXPECT_TRUE(testNlApi.loadEntryPoints()); } void TearDown() override { } bool testLoadEntryPointsWithMissingFunction(const std::string &procName) { PublicNlApi localNlApi; auto mockNlDll = std::make_unique(); mockNlDll->deleteEntryPoint(procName); localNlApi.genlLibraryHandle = std::move(mockNlDll); return localNlApi.loadEntryPoints(); } public: static const int testAttr; }; const int SysmanNlApiFixture::testAttr = 1; TEST_F(SysmanNlApiFixture, GivenNlApiWhenMissingDllEntryPointThenVerifyLoadEntryPointsFails) { EXPECT_FALSE(testLoadEntryPointsWithMissingFunction("genl_connect")); EXPECT_FALSE(testLoadEntryPointsWithMissingFunction("genl_ctrl_resolve")); EXPECT_FALSE(testLoadEntryPointsWithMissingFunction("genl_handle_msg")); EXPECT_FALSE(testLoadEntryPointsWithMissingFunction("genlmsg_put")); EXPECT_FALSE(testLoadEntryPointsWithMissingFunction("genl_ops_resolve")); EXPECT_FALSE(testLoadEntryPointsWithMissingFunction("genl_register_family")); EXPECT_FALSE(testLoadEntryPointsWithMissingFunction("genl_unregister_family")); EXPECT_FALSE(testLoadEntryPointsWithMissingFunction("nl_recvmsgs_default")); EXPECT_FALSE(testLoadEntryPointsWithMissingFunction("nl_send_auto")); EXPECT_FALSE(testLoadEntryPointsWithMissingFunction("nl_socket_alloc")); EXPECT_FALSE(testLoadEntryPointsWithMissingFunction("nl_socket_disable_seq_check")); EXPECT_FALSE(testLoadEntryPointsWithMissingFunction("nl_socket_free")); EXPECT_FALSE(testLoadEntryPointsWithMissingFunction("nl_socket_modify_cb")); EXPECT_FALSE(testLoadEntryPointsWithMissingFunction("nla_data")); EXPECT_FALSE(testLoadEntryPointsWithMissingFunction("nla_get_u32")); EXPECT_FALSE(testLoadEntryPointsWithMissingFunction("nla_get_u64")); EXPECT_FALSE(testLoadEntryPointsWithMissingFunction("nla_get_u8")); EXPECT_FALSE(testLoadEntryPointsWithMissingFunction("nla_is_nested")); EXPECT_FALSE(testLoadEntryPointsWithMissingFunction("nla_len")); EXPECT_FALSE(testLoadEntryPointsWithMissingFunction("nla_next")); EXPECT_FALSE(testLoadEntryPointsWithMissingFunction("nla_ok")); EXPECT_FALSE(testLoadEntryPointsWithMissingFunction("nla_put_u16")); EXPECT_FALSE(testLoadEntryPointsWithMissingFunction("nla_put_u32")); EXPECT_FALSE(testLoadEntryPointsWithMissingFunction("nla_put_u64")); EXPECT_FALSE(testLoadEntryPointsWithMissingFunction("nla_put_u8")); EXPECT_FALSE(testLoadEntryPointsWithMissingFunction("nla_type")); EXPECT_FALSE(testLoadEntryPointsWithMissingFunction("nlmsg_alloc")); EXPECT_FALSE(testLoadEntryPointsWithMissingFunction("nlmsg_attrdata")); EXPECT_FALSE(testLoadEntryPointsWithMissingFunction("nlmsg_attrlen")); EXPECT_FALSE(testLoadEntryPointsWithMissingFunction("nlmsg_free")); EXPECT_FALSE(testLoadEntryPointsWithMissingFunction("nlmsg_hdr")); } TEST_F(SysmanNlApiFixture, GivenNlApiWhenMissingDllHandleThenVerifyLoadEntryPointsFails) { testNlApi.genlLibraryHandle.reset(); EXPECT_FALSE(testNlApi.loadEntryPoints()); } TEST_F(SysmanNlApiFixture, GivenNlApiWhenCompleteMockNlDllThenVerifyGenlConnectReturnsZero) { EXPECT_EQ(0, testNlApi.genlConnect(&MockNlDll::mockNlSock)); } TEST_F(SysmanNlApiFixture, GivenNlApiWhenCompleteMockNlDllThenVerifyGenlCtrlReturnsZero) { EXPECT_EQ(0, testNlApi.genlCtrlResolve(&MockNlDll::mockNlSock, const_cast(MockNlDll::mockFamilyName))); } TEST_F(SysmanNlApiFixture, GivenNlApiWhenCompleteMockNlDllThenVerifyGenlHandleMsgReturnsZero) { EXPECT_EQ(0, testNlApi.genlHandleMsg(&MockNlDll::mockNlMsg, MockNlDll::mockArgP)); } TEST_F(SysmanNlApiFixture, GivenNlApiWhenCompleteMockNlDllThenVerifyGenlmsgPutReturnsValidPointer) { EXPECT_NE(nullptr, testNlApi.genlmsgPut(&MockNlDll::mockNlMsg, MockNlDll::mockPort, MockNlDll::mockSeq, MockNlDll::mockFamilyId, MockNlDll::mockHdrlen, MockNlDll::mockFlags, MockNlDll::mockCmd, MockNlDll::mockVersion)); } TEST_F(SysmanNlApiFixture, GivenNlApiWhenCompleteMockNlDllThenVerifyGenlOpsResolveReturnsZero) { EXPECT_EQ(0, testNlApi.genlOpsResolve(&MockNlDll::mockNlSock, &MockNlDll::mockGenlOps)); } TEST_F(SysmanNlApiFixture, GivenNlApiWhenCompleteMockNlDllThenVerifyGenlRegisterFamilyReturnsZero) { EXPECT_EQ(0, testNlApi.genlRegisterFamily(&MockNlDll::mockGenlOps)); } TEST_F(SysmanNlApiFixture, GivenNlApiWhenCompleteMockNlDllThenVerifyGenlUnregisterFamilyReturnsZero) { EXPECT_EQ(0, testNlApi.genlUnregisterFamily(&MockNlDll::mockGenlOps)); } TEST_F(SysmanNlApiFixture, GivenNlApiWhenCompleteMockNlDllThenVerifyNlSocketAllocReturnsValidPointer) { EXPECT_EQ(&MockNlDll::mockNlSock, testNlApi.nlSocketAlloc()); } TEST_F(SysmanNlApiFixture, GivenNlApiWhenCompleteMockNlDllThenVerifyNlSocketDisableSeqCheckCompletesSuccessfully) { testNlApi.nlSocketDisableSeqCheck(&MockNlDll::mockNlSock); } TEST_F(SysmanNlApiFixture, GivenNlApiWhenCompleteMockNlDllThenVerifyNlSocketFreeCompletesSuccessfully) { testNlApi.nlSocketFree(&MockNlDll::mockNlSock); } TEST_F(SysmanNlApiFixture, GivenValidNlSockWhenCallingNlSocketModifyCbThenVerifyNlSocketModifyCbReturnsZero) { EXPECT_EQ(0, testNlApi.nlSocketModifyCb(&MockNlDll::mockNlSock, MockNlDll::mockCbType, MockNlDll::mockCbKind, MockNlDll::mockCb, MockNlDll::mockArgP)); } TEST_F(SysmanNlApiFixture, GivenNlApiWhenCompleteMockNlDllThenVerifyNlRecvmsgsDefaultReturnsZero) { EXPECT_EQ(0, testNlApi.nlRecvmsgsDefault(&MockNlDll::mockNlSock)); } TEST_F(SysmanNlApiFixture, GivenNlApiWhenCompleteMockNlDllThenVerifyNlSendAutoReturnsZero) { EXPECT_EQ(0, testNlApi.nlSendAuto(&MockNlDll::mockNlSock, &MockNlDll::mockNlMsg)); } TEST_F(SysmanNlApiFixture, GivenNlApiWhenCompleteMockNlDllThenVerifyNlaDataReturnsValidPointer) { EXPECT_NE(nullptr, testNlApi.nlaData(&MockNlDll::mockNlattr)); } TEST_F(SysmanNlApiFixture, GivenNlApiWhenCompleteMockNlDllThenVerifyNlaGetU32ReturnsValue) { EXPECT_EQ(MockNlDll::mockU32Val, testNlApi.nlaGetU32(&MockNlDll::mockNlattr)); } TEST_F(SysmanNlApiFixture, GivenNlApiWhenCompleteMockNlDllThenVerifyNlaGetU64ReturnsValue) { EXPECT_EQ(MockNlDll::mockU64Val, testNlApi.nlaGetU64(&MockNlDll::mockNlattr)); } TEST_F(SysmanNlApiFixture, GivenNlApiWhenCompleteMockNlDllThenVerifyNlaGetU8ReturnsValue) { EXPECT_EQ(MockNlDll::mockU8Val, testNlApi.nlaGetU8(&MockNlDll::mockNlattr)); } TEST_F(SysmanNlApiFixture, GivenNlApiWhenCompleteMockNlDllThenVerifyNlaIsNestedReturnsZero) { EXPECT_EQ(0, testNlApi.nlaIsNested(&MockNlDll::mockNlattr)); } TEST_F(SysmanNlApiFixture, GivenNlApiWhenCompleteMockNlDllThenVerifyNlaLenReturnsValue) { EXPECT_EQ(MockNlDll::mockAttrLen, testNlApi.nlaLen(&MockNlDll::mockNlattr)); } TEST_F(SysmanNlApiFixture, GivenNlApiWhenCompleteMockNlDllThenVerifyNlaNextReturnsZero) { int remaining = MockNlDll::mockRemainBefore; EXPECT_EQ(&MockNlDll::mockNextNlattr, testNlApi.nlaNext(&MockNlDll::mockNlattr, &remaining)); EXPECT_EQ(MockNlDll::mockRemainAfter, remaining); } TEST_F(SysmanNlApiFixture, GivenNlApiWhenCompleteMockNlDllThenVerifyNlaOkReturnsZero) { EXPECT_EQ(0, testNlApi.nlaOk(&MockNlDll::mockNlattr, MockNlDll::mockRemainBefore)); } TEST_F(SysmanNlApiFixture, GivenNlApiWhenCompleteMockNlDllThenVerifyNlaPutU16ReturnsZero) { EXPECT_EQ(0, testNlApi.nlaPutU16(&MockNlDll::mockNlMsg, MockNlDll::mockType, MockNlDll::mockU16Val)); } TEST_F(SysmanNlApiFixture, GivenNlApiWhenCompleteMockNlDllThenVerifyNlaPutU32ReturnsZero) { EXPECT_EQ(0, testNlApi.nlaPutU32(&MockNlDll::mockNlMsg, MockNlDll::mockType, MockNlDll::mockU32Val)); } TEST_F(SysmanNlApiFixture, GivenNlApiWhenCompleteMockNlDllThenVerifyNlaPutU64ReturnsZero) { EXPECT_EQ(0, testNlApi.nlaPutU64(&MockNlDll::mockNlMsg, MockNlDll::mockType, MockNlDll::mockU64Val)); } TEST_F(SysmanNlApiFixture, GivenNlApiWhenCompleteMockNlDllThenVerifyNlaPutU8ReturnsZero) { EXPECT_EQ(0, testNlApi.nlaPutU8(&MockNlDll::mockNlMsg, MockNlDll::mockType, MockNlDll::mockU8Val)); } TEST_F(SysmanNlApiFixture, GivenNlApiWhenCompleteMockNlDllThenVerifyNlaTypeReturnsType) { EXPECT_EQ(MockNlDll::mockType, testNlApi.nlaType(&MockNlDll::mockNlattr)); } TEST_F(SysmanNlApiFixture, GivenNlApiWhenCompleteMockNlDllThenVerifyNlmsgAllocReturnsNlMsg) { EXPECT_EQ(&MockNlDll::mockNlMsg, testNlApi.nlmsgAlloc()); } TEST_F(SysmanNlApiFixture, GivenNlApiWhenCompleteMockNlDllThenVerifyNlmsgFreeReturnsSuccessfully) { testNlApi.nlmsgFree(&MockNlDll::mockNlMsg); } TEST_F(SysmanNlApiFixture, GivenNlApiWhenCompleteMockNlDllThenVerifyNlmsgAttrdataReturnsNlattr) { EXPECT_EQ(&MockNlDll::mockNlattr, testNlApi.nlmsgAttrdata(&MockNlDll::mockNlmsghdr, MockNlDll::mockAttr)); } TEST_F(SysmanNlApiFixture, GivenNlApiWhenCompleteMockNlDllThenVerifyNlmsgAttrlenReturnsLength) { EXPECT_EQ(MockNlDll::mockAttrLen, testNlApi.nlmsgAttrlen(&MockNlDll::mockNlmsghdr, MockNlDll::mockAttr)); } TEST_F(SysmanNlApiFixture, GivenNlApiWhenCompleteMockNlDllThenVerifyNlmsgHdrReturnsNlmsghdr) { EXPECT_EQ(&MockNlDll::mockNlmsghdr, testNlApi.nlmsgHdr(&MockNlDll::mockNlMsg)); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/linux/pmt/000077500000000000000000000000001422164147700307745ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/linux/pmt/CMakeLists.txt000066400000000000000000000005111422164147700335310ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(UNIX) target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_pmt.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_pmt.h ) endif() compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/linux/pmt/mock_pmt.h000066400000000000000000000143521422164147700327630ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/test/unit_tests/mock.h" #include "sysman/linux/pmt/pmt.h" namespace L0 { namespace ult { const std::string baseTelemSysFS("/sys/class/intel_pmt"); const std::string telem("telem"); const std::string telemNodeForSubdevice0("telem2"); const std::string telemNodeForSubdevice1("telem3"); std::string rootPciPathOfGpuDeviceInPmt = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0"; const std::string realPathTelem1 = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:02.0/0000:8e:00.1/pmt_telemetry.1.auto/intel_pmt/telem1"; const std::string realPathTelem2 = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:02.0/0000:8e:00.1/pmt_telemetry.1.auto/intel_pmt/telem2"; const std::string realPathTelem3 = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:02.0/0000:8e:00.1/pmt_telemetry.1.auto/intel_pmt/telem3"; const std::string realPathTelem4 = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:02.0/0000:8e:00.1/pmt_telemetry.1.auto/intel_pmt/telem4"; const std::string realPathTelem5 = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:02.0/0000:8e:00.1/pmt_telemetry.1.auto/intel_pmt/telem5"; const std::string sysfsPahTelem1 = "/sys/class/intel_pmt/telem1"; const std::string sysfsPahTelem2 = "/sys/class/intel_pmt/telem2"; const std::string sysfsPahTelem3 = "/sys/class/intel_pmt/telem3"; const std::string sysfsPahTelem4 = "/sys/class/intel_pmt/telem4"; const std::string sysfsPahTelem5 = "/sys/class/intel_pmt/telem5"; class PmtFsAccess : public FsAccess {}; template <> struct Mock : public PmtFsAccess { Mock() { baseTelemSysFSNodeForSubdevice0 = baseTelemSysFS + "/" + telemNodeForSubdevice0; baseTelemSysFSNodeForSubdevice1 = baseTelemSysFS + "/" + telemNodeForSubdevice1; telemetryDeviceEntryForSubdevice0 = baseTelemSysFSNodeForSubdevice0 + "/" + telem; telemetryDeviceEntryForSubdevice1 = baseTelemSysFSNodeForSubdevice1 + "/" + telem; } ze_result_t getValString(const std::string file, std::string &val) { std::string guidPathForSubdevice0 = baseTelemSysFSNodeForSubdevice0 + std::string("/guid"); std::string guidPathForSubdevice1 = baseTelemSysFSNodeForSubdevice1 + std::string("/guid"); if ((file.compare(guidPathForSubdevice0) == 0) || (file.compare(guidPathForSubdevice1) == 0)) { val = "0xfdc76194"; return ZE_RESULT_SUCCESS; } return ZE_RESULT_ERROR_NOT_AVAILABLE; } ze_result_t getValUnsignedLong(const std::string file, uint64_t &val) { if ((file.compare(baseTelemSysFSNodeForSubdevice0 + std::string("/size")) == 0) || (file.compare(baseTelemSysFSNodeForSubdevice1 + std::string("/size")) == 0) || (file.compare(baseTelemSysFSNodeForSubdevice0 + std::string("/offset")) == 0) || (file.compare(baseTelemSysFSNodeForSubdevice1 + std::string("/offset")) == 0)) { val = 0; return ZE_RESULT_SUCCESS; } return ZE_RESULT_ERROR_NOT_AVAILABLE; } bool isFileExists(const std::string file) { if ((file.compare(telemetryDeviceEntryForSubdevice0) == 0) || (file.compare(telemetryDeviceEntryForSubdevice1) == 0)) { return true; } return false; } ze_result_t getRealPathSuccess(const std::string path, std::string &buf) { if (path.compare(sysfsPahTelem1) == 0) { buf = realPathTelem1; } else if (path.compare(sysfsPahTelem2) == 0) { buf = realPathTelem2; } else if (path.compare(sysfsPahTelem3) == 0) { buf = realPathTelem3; } else if (path.compare(sysfsPahTelem4) == 0) { buf = realPathTelem4; } else if (path.compare(sysfsPahTelem5) == 0) { buf = realPathTelem5; } else { return ZE_RESULT_ERROR_NOT_AVAILABLE; } return ZE_RESULT_SUCCESS; } ze_result_t listDirectorySuccess(const std::string directory, std::vector &listOfTelemNodes) { if (directory.compare(baseTelemSysFS) == 0) { listOfTelemNodes.push_back("crashlog2"); listOfTelemNodes.push_back("crashlog1"); listOfTelemNodes.push_back("telem3"); listOfTelemNodes.push_back("telem2"); listOfTelemNodes.push_back("telem1"); listOfTelemNodes.push_back("telem4"); listOfTelemNodes.push_back("telem5"); return ZE_RESULT_SUCCESS; } return ZE_RESULT_ERROR_NOT_AVAILABLE; } ze_result_t listDirectoryNoTelemNode(const std::string directory, std::vector &listOfTelemNodes) { if (directory.compare(baseTelemSysFS) == 0) { listOfTelemNodes.push_back("crashlog2"); listOfTelemNodes.push_back("crashlog1"); return ZE_RESULT_SUCCESS; } return ZE_RESULT_ERROR_NOT_AVAILABLE; } MOCK_METHOD(ze_result_t, read, (const std::string file, std::string &val), (override)); MOCK_METHOD(ze_result_t, read, (const std::string file, uint64_t &val), (override)); MOCK_METHOD(ze_result_t, getRealPath, (const std::string path, std::string &buf), (override)); MOCK_METHOD(ze_result_t, listDirectory, (const std::string path, std::vector &list), (override)); MOCK_METHOD(bool, fileExists, (const std::string file), (override)); std::string telemetryDeviceEntryForSubdevice0; std::string telemetryDeviceEntryForSubdevice1; std::string baseTelemSysFSNodeForSubdevice0; std::string baseTelemSysFSNodeForSubdevice1; }; class PublicPlatformMonitoringTech : public L0::PlatformMonitoringTech { public: PublicPlatformMonitoringTech(FsAccess *pFsAccess, ze_bool_t onSubdevice, uint32_t subdeviceId) : PlatformMonitoringTech(pFsAccess, onSubdevice, subdeviceId) {} using PlatformMonitoringTech::closeFunction; using PlatformMonitoringTech::doInitPmtObject; using PlatformMonitoringTech::init; using PlatformMonitoringTech::keyOffsetMap; using PlatformMonitoringTech::openFunction; using PlatformMonitoringTech::preadFunction; using PlatformMonitoringTech::telemetryDeviceEntry; }; } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/linux/pmt/test_pmt.cpp000066400000000000000000000407651422164147700333530ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/test/unit_tests/sources/sysman/linux/mock_sysman_fixture.h" #include "mock_pmt.h" extern bool sysmanUltsEnable; using ::testing::_; using ::testing::Matcher; using ::testing::Return; namespace L0 { namespace ult { static int fakeFileDescriptor = 123; const std::map dummyKeyOffsetMap = { {"DUMMY_KEY", 0x0}}; class ZesPmtFixtureMultiDevice : public SysmanMultiDeviceFixture { protected: std::vector deviceHandles; std::unique_ptr> pTestFsAccess; std::map mapOfSubDeviceIdToPmtObject; void SetUp() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanMultiDeviceFixture::SetUp(); uint32_t subDeviceCount = 0; Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, nullptr); if (subDeviceCount == 0) { deviceHandles.resize(1, device->toHandle()); } else { deviceHandles.resize(subDeviceCount, nullptr); Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, deviceHandles.data()); } pTestFsAccess = std::make_unique>>(); ON_CALL(*pTestFsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(pTestFsAccess.get(), &Mock::getValString)); ON_CALL(*pTestFsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(pTestFsAccess.get(), &Mock::getValUnsignedLong)); ON_CALL(*pTestFsAccess.get(), listDirectory(_, _)) .WillByDefault(::testing::Invoke(pTestFsAccess.get(), &Mock::listDirectorySuccess)); ON_CALL(*pTestFsAccess.get(), getRealPath(_, _)) .WillByDefault(::testing::Invoke(pTestFsAccess.get(), &Mock::getRealPathSuccess)); ON_CALL(*pTestFsAccess.get(), fileExists(_)) .WillByDefault(::testing::Invoke(pTestFsAccess.get(), &Mock::isFileExists)); PlatformMonitoringTech::create(deviceHandles, pTestFsAccess.get(), rootPciPathOfGpuDeviceInPmt, mapOfSubDeviceIdToPmtObject); } void TearDown() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanMultiDeviceFixture::TearDown(); for (auto &subDeviceIdToPmtEntry : mapOfSubDeviceIdToPmtObject) { delete subDeviceIdToPmtEntry.second; subDeviceIdToPmtEntry.second = nullptr; } } }; TEST_F(ZesPmtFixtureMultiDevice, GivenValidDeviceHandlesWhenCreatingPMTHandlesThenValidPmtHandlesForAllSubdevicesWillBeCreated) {} TEST_F(ZesPmtFixtureMultiDevice, GivenValidDeviceHandlesWhenenumerateRootTelemIndexThenCheckForErrorIflistDirectoryFails) { EXPECT_CALL(*pTestFsAccess.get(), listDirectory(_, _)) .WillOnce(Return(ZE_RESULT_ERROR_NOT_AVAILABLE)); EXPECT_EQ(ZE_RESULT_ERROR_NOT_AVAILABLE, PlatformMonitoringTech::enumerateRootTelemIndex(pTestFsAccess.get(), rootPciPathOfGpuDeviceInPmt)); } TEST_F(ZesPmtFixtureMultiDevice, GivenValidDeviceHandlesWhenenumerateRootTelemIndexThenCheckForErrorIfgetRealPathFails) { EXPECT_CALL(*pTestFsAccess.get(), getRealPath(_, _)) .WillRepeatedly(Return(ZE_RESULT_ERROR_NOT_AVAILABLE)); EXPECT_EQ(ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE, PlatformMonitoringTech::enumerateRootTelemIndex(pTestFsAccess.get(), rootPciPathOfGpuDeviceInPmt)); } TEST_F(ZesPmtFixtureMultiDevice, GivenWhenenumerateRootTelemIndexThenCheckForErrorIfgetRealPathSuccessButNoTelemetryNodeAndGPUDeviceShareRootPciPort) { EXPECT_CALL(*pTestFsAccess.get(), getRealPath(_, _)) .Times(5) .WillRepeatedly(::testing::DoAll(::testing::SetArgReferee<1>("/sys/devices/pci0000:89/0000:89:02.0/0000:8e:00.0/0000:8b:02.0/0000:8e:00.1/pmt_telemetry.1.auto/intel_pmt/telem1"), Return(ZE_RESULT_SUCCESS))); EXPECT_EQ(ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE, PlatformMonitoringTech::enumerateRootTelemIndex(pTestFsAccess.get(), rootPciPathOfGpuDeviceInPmt)); } TEST_F(ZesPmtFixtureMultiDevice, GivenTelemDirectoryContainNowTelemEntryWhenenumerateRootTelemIndexThenCheckForError) { ON_CALL(*pTestFsAccess.get(), listDirectory(_, _)) .WillByDefault(::testing::Invoke(pTestFsAccess.get(), &Mock::listDirectoryNoTelemNode)); EXPECT_EQ(ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE, PlatformMonitoringTech::enumerateRootTelemIndex(pTestFsAccess.get(), rootPciPathOfGpuDeviceInPmt)); } TEST_F(ZesPmtFixtureMultiDevice, GivenValidDeviceHandlesWhenCreatingPMTHandlesThenCheckForErrorThatCouldHappenDuringWhileValidatingTelemNode) { EXPECT_CALL(*pTestFsAccess.get(), getRealPath(_, _)) .WillRepeatedly(Return(ZE_RESULT_ERROR_NOT_AVAILABLE)); PlatformMonitoringTech::enumerateRootTelemIndex(pTestFsAccess.get(), rootPciPathOfGpuDeviceInPmt); auto pPmt = std::make_unique(pTestFsAccess.get(), 1, 0); EXPECT_EQ(pPmt->init(pTestFsAccess.get(), rootPciPathOfGpuDeviceInPmt), ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE); } TEST_F(ZesPmtFixtureMultiDevice, GivenValidDeviceHandlesWhenCreatingPMTHandlesThenCheckForErrorThatCouldHappenDuringGUIDRead) { EXPECT_CALL(*pTestFsAccess.get(), read(_, Matcher(_))) .WillOnce(Return(ZE_RESULT_ERROR_NOT_AVAILABLE)); PlatformMonitoringTech::enumerateRootTelemIndex(pTestFsAccess.get(), rootPciPathOfGpuDeviceInPmt); auto pPmt = std::make_unique(pTestFsAccess.get(), 1, 0); EXPECT_EQ(pPmt->init(pTestFsAccess.get(), rootPciPathOfGpuDeviceInPmt), ZE_RESULT_ERROR_NOT_AVAILABLE); } TEST_F(ZesPmtFixtureMultiDevice, GivenValidDeviceHandlesWhenCreatingPMTHandlesThenCheckForErrorIfGUIDReadValueIsNotSupported) { EXPECT_CALL(*pTestFsAccess.get(), read(_, Matcher(_))) .WillOnce(::testing::DoAll(::testing::SetArgReferee<1>(""), Return(ZE_RESULT_SUCCESS))); PlatformMonitoringTech::enumerateRootTelemIndex(pTestFsAccess.get(), rootPciPathOfGpuDeviceInPmt); auto pPmt = std::make_unique(pTestFsAccess.get(), 1, 0); EXPECT_EQ(pPmt->init(pTestFsAccess.get(), rootPciPathOfGpuDeviceInPmt), ZE_RESULT_ERROR_UNSUPPORTED_FEATURE); } TEST_F(ZesPmtFixtureMultiDevice, GivenSomeKeyWhenCallingreadValueWithUint64TypeThenCheckForErrorBranches) { auto pPmt = std::make_unique(pTestFsAccess.get(), 0, 0); uint64_t val = 0; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, pPmt->readValue("SOMETHING", val)); } TEST_F(ZesPmtFixtureMultiDevice, GivenSomeKeyWhenCallingreadValueWithUint32TypeThenCheckForErrorBranches) { auto pPmt = std::make_unique(pTestFsAccess.get(), 0, 0); uint32_t val = 0; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, pPmt->readValue("SOMETHING", val)); } TEST_F(ZesPmtFixtureMultiDevice, GivenValidDeviceHandlesWhenCreatingPMTHandlesThenCheckForErrorThatCouldHappenDuringbaseOffsetRead) { EXPECT_CALL(*pTestFsAccess.get(), read(_, Matcher(_))) .WillOnce(Return(ZE_RESULT_ERROR_NOT_AVAILABLE)); PlatformMonitoringTech::enumerateRootTelemIndex(pTestFsAccess.get(), rootPciPathOfGpuDeviceInPmt); auto pPmt = std::make_unique(pTestFsAccess.get(), 1, 0); EXPECT_EQ(pPmt->init(pTestFsAccess.get(), rootPciPathOfGpuDeviceInPmt), ZE_RESULT_ERROR_NOT_AVAILABLE); } inline static int openMock(const char *pathname, int flags) { if (strcmp(pathname, "/sys/class/intel_pmt/telem2/telem") == 0) { return fakeFileDescriptor; } if (strcmp(pathname, "/sys/class/intel_pmt/telem3/telem") == 0) { return fakeFileDescriptor; } return -1; } inline static int openMockReturnFailure(const char *pathname, int flags) { return -1; } inline static int closeMock(int fd) { if (fd == fakeFileDescriptor) { return 0; } return -1; } inline static int closeMockReturnFailure(int fd) { return -1; } ssize_t preadMockPmt(int fd, void *buf, size_t count, off_t offset) { return count; } ssize_t preadMockPmtFailure(int fd, void *buf, size_t count, off_t offset) { return -1; } TEST_F(ZesPmtFixtureMultiDevice, GivenValidSyscallsWhenCallingreadValueWithUint32TypeAndOpenSysCallFailsThenreadValueFails) { auto pPmt = std::make_unique(pTestFsAccess.get(), 1, 0); pPmt->openFunction = openMockReturnFailure; uint32_t val = 0; pPmt->keyOffsetMap = dummyKeyOffsetMap; EXPECT_EQ(ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE, pPmt->readValue("DUMMY_KEY", val)); } TEST_F(ZesPmtFixtureMultiDevice, GivenValidSyscallsWhenCallingreadValueWithUint32TypeAndCloseSysCallFailsThenreadValueFails) { auto pPmt = std::make_unique(pTestFsAccess.get(), 1, 0); pPmt->telemetryDeviceEntry = baseTelemSysFS + "/" + telemNodeForSubdevice0 + "/" + telem; pPmt->openFunction = openMock; pPmt->preadFunction = preadMockPmt; pPmt->closeFunction = closeMockReturnFailure; uint32_t val = 0; pPmt->keyOffsetMap = dummyKeyOffsetMap; EXPECT_EQ(ZE_RESULT_ERROR_UNKNOWN, pPmt->readValue("DUMMY_KEY", val)); } TEST_F(ZesPmtFixtureMultiDevice, GivenValidSyscallsWhenCallingreadValueWithUint64TypeAndOpenSysCallFailsThenreadValueFails) { auto pPmt = std::make_unique(pTestFsAccess.get(), 1, 0); pPmt->openFunction = openMockReturnFailure; uint64_t val = 0; pPmt->keyOffsetMap = dummyKeyOffsetMap; EXPECT_EQ(ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE, pPmt->readValue("DUMMY_KEY", val)); } TEST_F(ZesPmtFixtureMultiDevice, GivenValidSyscallsWhenCallingreadValueWithUint64TypeAndCloseSysCallFailsThenreadValueFails) { auto pPmt = std::make_unique(pTestFsAccess.get(), 1, 0); pPmt->telemetryDeviceEntry = baseTelemSysFS + "/" + telemNodeForSubdevice0 + "/" + telem; pPmt->openFunction = openMock; pPmt->preadFunction = preadMockPmt; pPmt->closeFunction = closeMockReturnFailure; uint64_t val = 0; pPmt->keyOffsetMap = dummyKeyOffsetMap; EXPECT_EQ(ZE_RESULT_ERROR_UNKNOWN, pPmt->readValue("DUMMY_KEY", val)); } TEST_F(ZesPmtFixtureMultiDevice, GivenValidSyscallsWhenCallingreadValueWithUint32TypeAndPreadSysCallFailsThenreadValueFails) { auto pPmt = std::make_unique(pTestFsAccess.get(), 1, 0); pPmt->telemetryDeviceEntry = baseTelemSysFS + "/" + telemNodeForSubdevice0 + "/" + telem; pPmt->openFunction = openMock; pPmt->preadFunction = preadMockPmtFailure; pPmt->closeFunction = closeMock; uint32_t val = 0; pPmt->keyOffsetMap = dummyKeyOffsetMap; EXPECT_EQ(ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE, pPmt->readValue("DUMMY_KEY", val)); } TEST_F(ZesPmtFixtureMultiDevice, GivenValidSyscallsWhenCallingreadValueWithUint64TypeAndPreadSysCallFailsThenreadValueFails) { auto pPmt = std::make_unique(pTestFsAccess.get(), 1, 0); pPmt->telemetryDeviceEntry = baseTelemSysFS + "/" + telemNodeForSubdevice0 + "/" + telem; pPmt->openFunction = openMock; pPmt->preadFunction = preadMockPmtFailure; pPmt->closeFunction = closeMock; uint64_t val = 0; pPmt->keyOffsetMap = dummyKeyOffsetMap; EXPECT_EQ(ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE, pPmt->readValue("DUMMY_KEY", val)); } TEST_F(ZesPmtFixtureMultiDevice, GivenValidSyscallsWhenDoingPMTInitThenPMTmapOfSubDeviceIdToPmtObjectWouldContainValidEntries) { std::map mapOfSubDeviceIdToPmtObject; for (const auto &deviceHandle : deviceHandles) { ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; Device::fromHandle(deviceHandle)->getProperties(&deviceProperties); auto pPmt = new PublicPlatformMonitoringTech(pTestFsAccess.get(), deviceProperties.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE, deviceProperties.subdeviceId); UNRECOVERABLE_IF(nullptr == pPmt); PublicPlatformMonitoringTech::doInitPmtObject(pTestFsAccess.get(), deviceProperties.subdeviceId, pPmt, rootPciPathOfGpuDeviceInPmt, mapOfSubDeviceIdToPmtObject); auto subDeviceIdToPmtEntry = mapOfSubDeviceIdToPmtObject.find(deviceProperties.subdeviceId); EXPECT_EQ(subDeviceIdToPmtEntry->second, pPmt); delete pPmt; } } TEST_F(ZesPmtFixtureMultiDevice, GivenBaseOffsetReadFailWhenDoingPMTInitThenPMTmapOfSubDeviceIdToPmtObjectWouldBeEmpty) { std::map mapOfSubDeviceIdToPmtObject; EXPECT_CALL(*pTestFsAccess.get(), read(_, Matcher(_))) .WillRepeatedly(Return(ZE_RESULT_ERROR_NOT_AVAILABLE)); for (const auto &deviceHandle : deviceHandles) { ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; Device::fromHandle(deviceHandle)->getProperties(&deviceProperties); auto pPmt = new PublicPlatformMonitoringTech(pTestFsAccess.get(), deviceProperties.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE, deviceProperties.subdeviceId); UNRECOVERABLE_IF(nullptr == pPmt); PublicPlatformMonitoringTech::doInitPmtObject(pTestFsAccess.get(), deviceProperties.subdeviceId, pPmt, rootPciPathOfGpuDeviceInPmt, mapOfSubDeviceIdToPmtObject); EXPECT_TRUE(mapOfSubDeviceIdToPmtObject.empty()); } } TEST_F(ZesPmtFixtureMultiDevice, GivenNoPMTHandleInmapOfSubDeviceIdToPmtObjectWhenCallingreleasePmtObjectThenMapWouldGetEmpty) { auto mapOriginal = pLinuxSysmanImp->mapOfSubDeviceIdToPmtObject; pLinuxSysmanImp->mapOfSubDeviceIdToPmtObject.clear(); for (const auto &deviceHandle : deviceHandles) { ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; Device::fromHandle(deviceHandle)->getProperties(&deviceProperties); pLinuxSysmanImp->mapOfSubDeviceIdToPmtObject.emplace(deviceProperties.subdeviceId, nullptr); } pLinuxSysmanImp->releasePmtObject(); EXPECT_TRUE(pLinuxSysmanImp->mapOfSubDeviceIdToPmtObject.empty()); pLinuxSysmanImp->mapOfSubDeviceIdToPmtObject = mapOriginal; } class ZesPmtFixtureNoSubDevice : public SysmanDeviceFixture { protected: std::vector deviceHandles; std::unique_ptr> pTestFsAccess; std::map mapOfSubDeviceIdToPmtObject; void SetUp() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanDeviceFixture::SetUp(); uint32_t subDeviceCount = 0; Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, nullptr); if (subDeviceCount == 0) { deviceHandles.resize(1, device->toHandle()); } else { deviceHandles.resize(subDeviceCount, nullptr); Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, deviceHandles.data()); } pTestFsAccess = std::make_unique>>(); ON_CALL(*pTestFsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(pTestFsAccess.get(), &Mock::getValString)); ON_CALL(*pTestFsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(pTestFsAccess.get(), &Mock::getValUnsignedLong)); ON_CALL(*pTestFsAccess.get(), listDirectory(_, _)) .WillByDefault(::testing::Invoke(pTestFsAccess.get(), &Mock::listDirectorySuccess)); ON_CALL(*pTestFsAccess.get(), getRealPath(_, _)) .WillByDefault(::testing::Invoke(pTestFsAccess.get(), &Mock::getRealPathSuccess)); ON_CALL(*pTestFsAccess.get(), fileExists(_)) .WillByDefault(::testing::Invoke(pTestFsAccess.get(), &Mock::isFileExists)); PlatformMonitoringTech::create(deviceHandles, pTestFsAccess.get(), rootPciPathOfGpuDeviceInPmt, mapOfSubDeviceIdToPmtObject); } void TearDown() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanDeviceFixture::TearDown(); for (auto &subDeviceIdToPmtEntry : mapOfSubDeviceIdToPmtObject) { delete subDeviceIdToPmtEntry.second; subDeviceIdToPmtEntry.second = nullptr; } } }; TEST_F(ZesPmtFixtureNoSubDevice, GivenValidDeviceHandlesWhenCreatingPMTHandlesThenValidPmtHandlesForAllSubdevicesWillBeCreated) {} } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/linux/pmu/000077500000000000000000000000001422164147700307755ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/linux/pmu/CMakeLists.txt000066400000000000000000000005161422164147700335370ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(UNIX) target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_pmu.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_pmu.h ) endif() add_subdirectories() compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/linux/pmu/mock_pmu.h000066400000000000000000000044761422164147700327730ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/test/unit_tests/mock.h" #include "level_zero/tools/source/sysman/linux/pmu/pmu_imp.h" using namespace NEO; namespace L0 { namespace ult { constexpr uint64_t mockEventVal = 2u; constexpr uint64_t mockTimeStamp = 100u; constexpr int64_t mockPmuFd = 5; constexpr uint64_t mockEventCount = 2u; constexpr uint64_t mockEvent1Val = 100u; constexpr uint64_t mockEvent2Val = 150u; class MockPmuInterfaceImpForSysman : public PmuInterfaceImp { public: using PmuInterfaceImp::getErrorNo; using PmuInterfaceImp::perfEventOpen; using PmuInterfaceImp::readFunction; using PmuInterfaceImp::syscallFunction; MockPmuInterfaceImpForSysman(LinuxSysmanImp *pLinuxSysmanImp) : PmuInterfaceImp(pLinuxSysmanImp) {} }; template <> struct Mock : public MockPmuInterfaceImpForSysman { Mock(LinuxSysmanImp *pLinuxSysmanImp) : MockPmuInterfaceImpForSysman(pLinuxSysmanImp) {} int64_t mockedPerfEventOpenAndSuccessReturn(perf_event_attr *attr, pid_t pid, int cpu, int groupFd, uint64_t flags) { return mockPmuFd; } int64_t mockedPerfEventOpenAndFailureReturn(perf_event_attr *attr, pid_t pid, int cpu, int groupFd, uint64_t flags) { return -1; } int mockedReadCountersForGroupSuccess(int fd, uint64_t *data, ssize_t sizeOfdata) { data[0] = mockEventCount; data[1] = mockTimeStamp; data[2] = mockEvent1Val; data[3] = mockEvent2Val; return 0; } int mockGetErrorNoSuccess() { return EINVAL; } int mockGetErrorNoFailure() { return EBADF; } MOCK_METHOD(int, pmuRead, (int fd, uint64_t *data, ssize_t sizeOfdata), (override)); MOCK_METHOD(int64_t, perfEventOpen, (perf_event_attr * attr, pid_t pid, int cpu, int groupFd, uint64_t flags), (override)); MOCK_METHOD(int, getErrorNo, (), (override)); }; class PmuFsAccess : public FsAccess {}; template <> struct Mock : public PmuFsAccess { MOCK_METHOD(ze_result_t, read, (const std::string file, uint32_t &val), (override)); ze_result_t readValSuccess(const std::string file, uint32_t &val) { val = 18; return ZE_RESULT_SUCCESS; } }; } // namespace ult } // namespace L0compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/linux/pmu/test_pmu.cpp000066400000000000000000000146101422164147700333430ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/test/unit_tests/sources/sysman/linux/mock_sysman_fixture.h" #include "level_zero/tools/test/unit_tests/sources/sysman/linux/pmu/mock_pmu.h" extern bool sysmanUltsEnable; using ::testing::Matcher; using ::testing::Return; namespace L0 { namespace ult { struct SysmanPmuFixture : public SysmanDeviceFixture { protected: std::unique_ptr> pPmuInterface; PmuInterface *pOriginalPmuInterface = nullptr; std::unique_ptr> pFsAccess; FsAccess *pFsAccessOriginal = nullptr; void SetUp() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanDeviceFixture::SetUp(); pFsAccessOriginal = pLinuxSysmanImp->pFsAccess; pFsAccess = std::make_unique>>(); pLinuxSysmanImp->pFsAccess = pFsAccess.get(); pOriginalPmuInterface = pLinuxSysmanImp->pPmuInterface; pPmuInterface = std::make_unique>>(pLinuxSysmanImp); pLinuxSysmanImp->pPmuInterface = pPmuInterface.get(); ON_CALL(*pFsAccess.get(), read(_, _)) .WillByDefault(::testing::Invoke(pFsAccess.get(), &Mock::readValSuccess)); ON_CALL(*pPmuInterface.get(), perfEventOpen(_, _, _, _, _)) .WillByDefault(::testing::Invoke(pPmuInterface.get(), &Mock::mockedPerfEventOpenAndSuccessReturn)); ON_CALL(*pPmuInterface.get(), pmuRead(_, _, _)) .WillByDefault(::testing::Invoke(pPmuInterface.get(), &Mock::mockedReadCountersForGroupSuccess)); ON_CALL(*pPmuInterface.get(), getErrorNo()) .WillByDefault(::testing::Invoke(pPmuInterface.get(), &Mock::mockGetErrorNoSuccess)); } void TearDown() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanDeviceFixture::TearDown(); pLinuxSysmanImp->pPmuInterface = pOriginalPmuInterface; pLinuxSysmanImp->pFsAccess = pFsAccessOriginal; } }; inline static ssize_t openReadReturnSuccess(int fd, void *data, size_t sizeOfdata) { uint64_t dataVal[2] = {mockEventVal, mockTimeStamp}; memcpy_s(data, sizeOfdata, dataVal, sizeOfdata); return sizeOfdata; } inline static ssize_t openReadReturnFailure(int fd, void *data, size_t sizeOfdata) { return -1; } inline static long int syscallReturnSuccess(long int sysNo, ...) noexcept { return mockPmuFd; } TEST_F(SysmanPmuFixture, GivenValidPmuHandleWhenCallingPmuReadThenSuccessIsReturned) { MockPmuInterfaceImpForSysman *pmuInterface = new MockPmuInterfaceImpForSysman(pLinuxSysmanImp); pmuInterface->readFunction = openReadReturnSuccess; uint64_t data[2]; int validFd = 10; EXPECT_EQ(0, pmuInterface->pmuRead(validFd, data, sizeof(data))); EXPECT_EQ(mockEventVal, data[0]); EXPECT_EQ(mockTimeStamp, data[1]); delete pmuInterface; } TEST_F(SysmanPmuFixture, GivenValidPmuHandleWhenCallingPerEventOpenThenSuccessIsReturned) { MockPmuInterfaceImpForSysman *pmuInterface = new MockPmuInterfaceImpForSysman(pLinuxSysmanImp); pmuInterface->syscallFunction = syscallReturnSuccess; struct perf_event_attr attr = {}; int cpu = 0; attr.read_format = static_cast(PERF_FORMAT_TOTAL_TIME_ENABLED); attr.config = 11; EXPECT_EQ(mockPmuFd, pmuInterface->perfEventOpen(&attr, -1, cpu, -1, 0)); delete pmuInterface; } TEST_F(SysmanPmuFixture, GivenValidPmuHandleWhenCallingThenFailureIsReturned) { MockPmuInterfaceImpForSysman *pmuInterface = new MockPmuInterfaceImpForSysman(pLinuxSysmanImp); pmuInterface->readFunction = openReadReturnFailure; int validFd = 10; uint64_t data[2]; EXPECT_EQ(-1, pmuInterface->pmuRead(validFd, data, sizeof(data))); delete pmuInterface; } TEST_F(SysmanPmuFixture, GivenValidPmuHandleWhenCallingPmuInterfaceOpenAndPerfEventOpenSucceedsThenVaildFdIsReturned) { uint64_t config = 10; EXPECT_EQ(mockPmuFd, pLinuxSysmanImp->pPmuInterface->pmuInterfaceOpen(config, -1, PERF_FORMAT_TOTAL_TIME_ENABLED)); } TEST_F(SysmanPmuFixture, GivenValidPmuHandleWhenReadingGroupOfEventsUsingGroupFdThenSuccessIsReturned) { uint64_t configForEvent1 = 10; int64_t groupFd = pLinuxSysmanImp->pPmuInterface->pmuInterfaceOpen(configForEvent1, -1, PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_GROUP); // To get group leader uint64_t configForEvent2 = 15; pLinuxSysmanImp->pPmuInterface->pmuInterfaceOpen(configForEvent2, static_cast(groupFd), PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_GROUP); uint64_t data[4]; EXPECT_EQ(0, pLinuxSysmanImp->pPmuInterface->pmuRead(static_cast(groupFd), data, sizeof(data))); EXPECT_EQ(mockEventCount, data[0]); EXPECT_EQ(mockTimeStamp, data[1]); EXPECT_EQ(mockEvent1Val, data[2]); EXPECT_EQ(mockEvent2Val, data[3]); } TEST_F(SysmanPmuFixture, GivenValidPmuHandleWhenCallingPmuInterfaceOpenAndPerfEventOpenFailsThenFailureIsReturned) { ON_CALL(*pPmuInterface.get(), perfEventOpen(_, _, _, _, _)) .WillByDefault(::testing::Invoke(pPmuInterface.get(), &Mock::mockedPerfEventOpenAndFailureReturn)); uint64_t config = 10; EXPECT_EQ(-1, pLinuxSysmanImp->pPmuInterface->pmuInterfaceOpen(config, -1, PERF_FORMAT_TOTAL_TIME_ENABLED)); } TEST_F(SysmanPmuFixture, GivenValidPmuHandleWhenCallingPmuInterfaceOpenAndPerfEventOpenFailsAndErrNoSetBySyscallIsNotInvalidArgumentThenFailureIsReturned) { ON_CALL(*pPmuInterface.get(), perfEventOpen(_, _, _, _, _)) .WillByDefault(::testing::Invoke(pPmuInterface.get(), &Mock::mockedPerfEventOpenAndFailureReturn)); ON_CALL(*pPmuInterface.get(), getErrorNo()) .WillByDefault(::testing::Invoke(pPmuInterface.get(), &Mock::mockGetErrorNoFailure)); uint64_t config = 10; EXPECT_EQ(-1, pLinuxSysmanImp->pPmuInterface->pmuInterfaceOpen(config, -1, PERF_FORMAT_TOTAL_TIME_ENABLED)); } TEST_F(SysmanPmuFixture, GivenValidPmuHandleWhenAndDomainErrorOccursThenDomainErrorIsReturnedBygetErrorNoFunction) { MockPmuInterfaceImpForSysman *pmuInterface = new MockPmuInterfaceImpForSysman(pLinuxSysmanImp); log(-1.0); //Domain error injected EXPECT_EQ(EDOM, pmuInterface->getErrorNo()); delete pmuInterface; } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/linux/test_fw_util.cpp000066400000000000000000000035231422164147700334130ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/tools/source/sysman/linux/firmware_util/firmware_util_imp.h" #include "level_zero/tools/test/unit_tests/sources/sysman/linux/mock_sysman_fixture.h" extern bool sysmanUltsEnable; namespace L0 { namespace ult { static uint32_t mockFwUtilDeviceCloseCallCount = 0; TEST(LinuxFwUtilDeleteTest, GivenLibraryWasNotSetWhenFirmwareUtilInterfaceIsDeletedThenLibraryFunctionIsNotAccessed) { mockFwUtilDeviceCloseCallCount = 0; if (!sysmanUltsEnable) { GTEST_SKIP(); } VariableBackup mockDeviceClose(&deviceClose, [](struct igsc_device_handle *handle) -> int { mockFwUtilDeviceCloseCallCount++; return 0; }); std::string pciBdf("0000:00:00.0"); FirmwareUtilImp *pFwUtilImp = new FirmwareUtilImp(pciBdf); pFwUtilImp->libraryHandle = nullptr; delete pFwUtilImp; EXPECT_EQ(mockFwUtilDeviceCloseCallCount, 0u); } TEST(LinuxFwUtilDeleteTest, GivenLibraryWasSetWhenFirmwareUtilInterfaceIsDeletedThenLibraryFunctionIsAccessed) { mockFwUtilDeviceCloseCallCount = 0; if (!sysmanUltsEnable) { GTEST_SKIP(); } VariableBackup mockDeviceClose(&deviceClose, [](struct igsc_device_handle *handle) -> int { mockFwUtilDeviceCloseCallCount++; return 0; }); std::string pciBdf("0000:00:00.0"); FirmwareUtilImp *pFwUtilImp = new FirmwareUtilImp(pciBdf); // Prepare dummy OsLibrary for library, since no access is expected pFwUtilImp->libraryHandle = static_cast(new MockOsLibrary()); delete pFwUtilImp; EXPECT_EQ(mockFwUtilDeviceCloseCallCount, 1u); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/linux/test_sysman.cpp000066400000000000000000000450231422164147700332550ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "level_zero/tools/test/unit_tests/sources/sysman/linux/mock_sysman_fixture.h" namespace L0 { namespace ult { inline static int mockAccessFailure(const char *pathname, int mode) { return -1; } inline static int mockAccessSuccess(const char *pathname, int mode) { return 0; } inline static int mockStatFailure(const char *pathname, struct stat *sb) noexcept { return -1; } inline static int mockStatSuccess(const char *pathname, struct stat *sb) noexcept { sb->st_mode = S_IWUSR | S_IRUSR; return 0; } inline static int mockStatNoPermissions(const char *pathname, struct stat *sb) noexcept { sb->st_mode = 0; return 0; } TEST_F(SysmanDeviceFixture, GivenValidDeviceHandleInSysmanImpCreationWhenAllSysmanInterfacesAreAssignedToNullThenExpectSysmanDeviceModuleContextsAreNull) { ze_device_handle_t hSysman = device->toHandle(); SysmanDeviceImp *sysmanImp = new SysmanDeviceImp(hSysman); delete (sysmanImp->pPowerHandleContext); delete (sysmanImp->pFrequencyHandleContext); delete (sysmanImp->pFabricPortHandleContext); delete (sysmanImp->pTempHandleContext); delete (sysmanImp->pPci); delete (sysmanImp->pStandbyHandleContext); delete (sysmanImp->pEngineHandleContext); delete (sysmanImp->pSchedulerHandleContext); delete (sysmanImp->pRasHandleContext); delete (sysmanImp->pMemoryHandleContext); delete (sysmanImp->pGlobalOperations); delete (sysmanImp->pEvents); delete (sysmanImp->pFanHandleContext); delete (sysmanImp->pFirmwareHandleContext); delete (sysmanImp->pDiagnosticsHandleContext); delete (sysmanImp->pPerformanceHandleContext); sysmanImp->pPowerHandleContext = nullptr; sysmanImp->pFrequencyHandleContext = nullptr; sysmanImp->pFabricPortHandleContext = nullptr; sysmanImp->pTempHandleContext = nullptr; sysmanImp->pPci = nullptr; sysmanImp->pStandbyHandleContext = nullptr; sysmanImp->pEngineHandleContext = nullptr; sysmanImp->pSchedulerHandleContext = nullptr; sysmanImp->pRasHandleContext = nullptr; sysmanImp->pMemoryHandleContext = nullptr; sysmanImp->pGlobalOperations = nullptr; sysmanImp->pEvents = nullptr; sysmanImp->pFanHandleContext = nullptr; sysmanImp->pFirmwareHandleContext = nullptr; sysmanImp->pDiagnosticsHandleContext = nullptr; sysmanImp->pPerformanceHandleContext = nullptr; auto pLinuxSysmanImpTemp = static_cast(sysmanImp->pOsSysman); pLinuxSysmanImpTemp->pSysfsAccess = pSysfsAccess; pLinuxSysmanImpTemp->pProcfsAccess = pProcfsAccess; sysmanImp->init(); // all sysman module contexts are null. Validating PowerHandleContext instead of all contexts EXPECT_EQ(sysmanImp->pPowerHandleContext, nullptr); pLinuxSysmanImpTemp->pSysfsAccess = nullptr; pLinuxSysmanImpTemp->pProcfsAccess = nullptr; delete sysmanImp; sysmanImp = nullptr; } TEST_F(SysmanDeviceFixture, GivenValidDeviceHandleAndIfSysmanDeviceInitFailsThenErrorReturnedWhileQueryingSysmanAPIs) { ze_device_handle_t hSysman = device->toHandle(); auto pSysmanDeviceOriginal = static_cast(device)->getSysmanHandle(); // L0::SysmanDeviceHandleContext::init() would return nullptr as: // L0::SysmanDeviceHandleContext::init() --> sysmanDevice->init() --> pOsSysman->init() --> pSysfsAccess->getRealPath() // pSysfsAccess->getRealPath() would fail because pSysfsAccess is not mocked in this test case. auto pSysmanDeviceLocal = L0::SysmanDeviceHandleContext::init(hSysman); EXPECT_EQ(pSysmanDeviceLocal, nullptr); static_cast(device)->setSysmanHandle(pSysmanDeviceLocal); uint32_t count = 0; EXPECT_EQ(ZE_RESULT_ERROR_UNINITIALIZED, zesDeviceEnumSchedulers(hSysman, &count, nullptr)); EXPECT_EQ(ZE_RESULT_ERROR_UNINITIALIZED, zesDeviceProcessesGetState(hSysman, &count, nullptr)); EXPECT_EQ(ZE_RESULT_ERROR_UNINITIALIZED, zesDevicePciGetBars(hSysman, &count, nullptr)); EXPECT_EQ(ZE_RESULT_ERROR_UNINITIALIZED, zesDeviceEnumPowerDomains(hSysman, &count, nullptr)); EXPECT_EQ(ZE_RESULT_ERROR_UNINITIALIZED, zesDeviceEnumFrequencyDomains(hSysman, &count, nullptr)); EXPECT_EQ(ZE_RESULT_ERROR_UNINITIALIZED, zesDeviceEnumEngineGroups(hSysman, &count, nullptr)); EXPECT_EQ(ZE_RESULT_ERROR_UNINITIALIZED, zesDeviceEnumStandbyDomains(hSysman, &count, nullptr)); EXPECT_EQ(ZE_RESULT_ERROR_UNINITIALIZED, zesDeviceEnumFirmwares(hSysman, &count, nullptr)); EXPECT_EQ(ZE_RESULT_ERROR_UNINITIALIZED, zesDeviceEnumMemoryModules(hSysman, &count, nullptr)); EXPECT_EQ(ZE_RESULT_ERROR_UNINITIALIZED, zesDeviceEnumFabricPorts(hSysman, &count, nullptr)); EXPECT_EQ(ZE_RESULT_ERROR_UNINITIALIZED, zesDeviceEnumTemperatureSensors(hSysman, &count, nullptr)); EXPECT_EQ(ZE_RESULT_ERROR_UNINITIALIZED, zesDeviceEnumRasErrorSets(hSysman, &count, nullptr)); EXPECT_EQ(ZE_RESULT_ERROR_UNINITIALIZED, zesDeviceEnumFans(hSysman, &count, nullptr)); EXPECT_EQ(ZE_RESULT_ERROR_UNINITIALIZED, zesDeviceEnumDiagnosticTestSuites(hSysman, &count, nullptr)); EXPECT_EQ(ZE_RESULT_ERROR_UNINITIALIZED, zesDeviceEnumPerformanceFactorDomains(hSysman, &count, nullptr)); zes_device_properties_t properties; EXPECT_EQ(ZE_RESULT_ERROR_UNINITIALIZED, zesDeviceGetProperties(hSysman, &properties)); zes_device_state_t state; EXPECT_EQ(ZE_RESULT_ERROR_UNINITIALIZED, zesDeviceGetState(hSysman, &state)); EXPECT_EQ(ZE_RESULT_ERROR_UNINITIALIZED, zesDeviceReset(hSysman, true)); zes_pci_properties_t pciProperties; EXPECT_EQ(ZE_RESULT_ERROR_UNINITIALIZED, zesDevicePciGetProperties(hSysman, &pciProperties)); zes_pci_state_t pciState; EXPECT_EQ(ZE_RESULT_ERROR_UNINITIALIZED, zesDevicePciGetState(hSysman, &pciState)); zes_pci_stats_t pciStats; EXPECT_EQ(ZE_RESULT_ERROR_UNINITIALIZED, zesDevicePciGetStats(hSysman, &pciStats)); zes_event_type_flags_t events = ZES_EVENT_TYPE_FLAG_DEVICE_DETACH; EXPECT_EQ(ZE_RESULT_ERROR_UNINITIALIZED, zesDeviceEventRegister(hSysman, events)); static_cast(device)->setSysmanHandle(pSysmanDeviceOriginal); } using MockDeviceSysmanGetTest = Test; TEST_F(MockDeviceSysmanGetTest, GivenValidSysmanHandleSetInDeviceStructWhenGetThisSysmanHandleThenHandlesShouldBeSimilar) { SysmanDeviceImp *sysman = new SysmanDeviceImp(device->toHandle()); device->setSysmanHandle(sysman); EXPECT_EQ(sysman, device->getSysmanHandle()); } TEST_F(SysmanDeviceFixture, GivenValidDeviceHandleButSysmanInitFailsThenValidNullptrReceived) { ze_device_handle_t hSysman = device->toHandle(); auto pSysmanDevice = L0::SysmanDeviceHandleContext::init(hSysman); EXPECT_EQ(pSysmanDevice, nullptr); } TEST_F(SysmanDeviceFixture, GivenSetValidDrmHandleForDeviceWhenDoingOsSysmanDeviceInitThenSameDrmHandleIsRetrieved) { EXPECT_EQ(&pLinuxSysmanImp->getDrm(), device->getOsInterface().getDriverModel()->as()); } TEST_F(SysmanDeviceFixture, GivenCreateFsAccessHandleWhenCallinggetFsAccessThenCreatedFsAccessHandleWillBeRetrieved) { if (pLinuxSysmanImp->pFsAccess != nullptr) { //delete previously allocated pFsAccess delete pLinuxSysmanImp->pFsAccess; pLinuxSysmanImp->pFsAccess = nullptr; } pLinuxSysmanImp->pFsAccess = FsAccess::create(); EXPECT_EQ(&pLinuxSysmanImp->getFsAccess(), pLinuxSysmanImp->pFsAccess); } TEST_F(SysmanDeviceFixture, GivenPublicFsAccessClassWhenCallingDirectoryExistsWithValidAndInvalidPathThenSuccessAndFailureAreReturnedRespectively) { PublicFsAccess *tempFsAccess = new PublicFsAccess(); tempFsAccess->accessSyscall = mockAccessSuccess; char cwd[PATH_MAX]; std::string path = getcwd(cwd, PATH_MAX); EXPECT_TRUE(tempFsAccess->directoryExists(path)); tempFsAccess->accessSyscall = mockAccessFailure; path = "invalidDiretory"; EXPECT_FALSE(tempFsAccess->directoryExists(path)); delete tempFsAccess; } TEST_F(SysmanDeviceFixture, GivenPublicSysfsAccessClassWhenCallingDirectoryExistsWithInvalidPathThenFalseIsRetured) { PublicFsAccess *tempSysfsAccess = new PublicFsAccess(); tempSysfsAccess->accessSyscall = mockAccessFailure; std::string path = "invalidDiretory"; EXPECT_FALSE(tempSysfsAccess->directoryExists(path)); delete tempSysfsAccess; } TEST_F(SysmanDeviceFixture, GivenPublicFsAccessClassWhenCallingCanWriteWithUserHavingWritePermissionsThenSuccessIsReturned) { PublicFsAccess *tempFsAccess = new PublicFsAccess(); tempFsAccess->statSyscall = mockStatSuccess; char cwd[PATH_MAX]; std::string path = getcwd(cwd, PATH_MAX); EXPECT_EQ(ZE_RESULT_SUCCESS, tempFsAccess->canWrite(path)); delete tempFsAccess; } TEST_F(SysmanDeviceFixture, GivenPublicFsAccessClassWhenCallingCanReadWithUserHavingReadPermissionsThenSuccessIsReturned) { PublicFsAccess *tempFsAccess = new PublicFsAccess(); tempFsAccess->statSyscall = mockStatSuccess; char cwd[PATH_MAX]; std::string path = getcwd(cwd, PATH_MAX); EXPECT_EQ(ZE_RESULT_SUCCESS, tempFsAccess->canRead(path)); delete tempFsAccess; } TEST_F(SysmanDeviceFixture, GivenPublicFsAccessClassWhenCallingCanWriteWithUserNotHavingWritePermissionsThenInsufficientIsReturned) { PublicFsAccess *tempFsAccess = new PublicFsAccess(); tempFsAccess->statSyscall = mockStatNoPermissions; char cwd[PATH_MAX]; std::string path = getcwd(cwd, PATH_MAX); EXPECT_EQ(ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS, tempFsAccess->canWrite(path)); delete tempFsAccess; } TEST_F(SysmanDeviceFixture, GivenPublicFsAccessClassWhenCallingCanReadWithUserNotHavingReadPermissionsThenInsufficientIsReturned) { PublicFsAccess *tempFsAccess = new PublicFsAccess(); tempFsAccess->statSyscall = mockStatNoPermissions; char cwd[PATH_MAX]; std::string path = getcwd(cwd, PATH_MAX); EXPECT_EQ(ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS, tempFsAccess->canRead(path)); delete tempFsAccess; } TEST_F(SysmanDeviceFixture, GivenPublicFsAccessClassWhenCallingCanReadWithInvalidPathThenErrorIsReturned) { PublicFsAccess *tempFsAccess = new PublicFsAccess(); tempFsAccess->statSyscall = mockStatFailure; std::string path = "invalidPath"; EXPECT_EQ(ZE_RESULT_ERROR_UNKNOWN, tempFsAccess->canRead(path)); delete tempFsAccess; } TEST_F(SysmanDeviceFixture, GivenPublicFsAccessClassWhenCallingCanWriteWithInvalidPathThenErrorIsReturned) { PublicFsAccess *tempFsAccess = new PublicFsAccess(); tempFsAccess->statSyscall = mockStatFailure; std::string path = "invalidPath"; EXPECT_EQ(ZE_RESULT_ERROR_UNKNOWN, tempFsAccess->canRead(path)); delete tempFsAccess; } TEST_F(SysmanDeviceFixture, GivenValidPathnameWhenCallingFsAccessExistsThenSuccessIsReturned) { auto FsAccess = pLinuxSysmanImp->getFsAccess(); char cwd[PATH_MAX]; std::string path = getcwd(cwd, PATH_MAX); EXPECT_TRUE(FsAccess.fileExists(path)); } TEST_F(SysmanDeviceFixture, GivenInvalidPathnameWhenCallingFsAccessExistsThenErrorIsReturned) { auto FsAccess = pLinuxSysmanImp->getFsAccess(); std::string path = "noSuchFileOrDirectory"; EXPECT_FALSE(FsAccess.fileExists(path)); } TEST_F(SysmanDeviceFixture, GivenCreateSysfsAccessHandleWhenCallinggetSysfsAccessThenCreatedSysfsAccessHandleHandleWillBeRetrieved) { if (pLinuxSysmanImp->pSysfsAccess != nullptr) { //delete previously allocated pSysfsAccess delete pLinuxSysmanImp->pSysfsAccess; pLinuxSysmanImp->pSysfsAccess = nullptr; } pLinuxSysmanImp->pSysfsAccess = SysfsAccess::create(""); EXPECT_EQ(&pLinuxSysmanImp->getSysfsAccess(), pLinuxSysmanImp->pSysfsAccess); } TEST_F(SysmanDeviceFixture, GivenCreateProcfsAccessHandleWhenCallinggetProcfsAccessThenCreatedProcfsAccessHandleWillBeRetrieved) { if (pLinuxSysmanImp->pProcfsAccess != nullptr) { //delete previously allocated pProcfsAccess delete pLinuxSysmanImp->pProcfsAccess; pLinuxSysmanImp->pProcfsAccess = nullptr; } pLinuxSysmanImp->pProcfsAccess = ProcfsAccess::create(); EXPECT_EQ(&pLinuxSysmanImp->getProcfsAccess(), pLinuxSysmanImp->pProcfsAccess); } TEST_F(SysmanDeviceFixture, GivenValidPidWhenCallingProcfsAccessIsAliveThenSuccessIsReturned) { auto ProcfsAccess = pLinuxSysmanImp->getProcfsAccess(); EXPECT_TRUE(ProcfsAccess.isAlive(getpid())); } TEST_F(SysmanDeviceFixture, GivenInvalidPidWhenCallingProcfsAccessIsAliveThenErrorIsReturned) { auto ProcfsAccess = pLinuxSysmanImp->getProcfsAccess(); EXPECT_FALSE(ProcfsAccess.isAlive(reinterpret_cast<::pid_t>(-1))); } TEST_F(SysmanDeviceFixture, GivenValidDeviceHandleThenSameHandleIsRetrievedFromOsSpecificCode) { EXPECT_EQ(pLinuxSysmanImp->getDeviceHandle(), device); } TEST_F(SysmanDeviceFixture, GivenPmuInterfaceHandleWhenCallinggetPmuInterfaceThenCreatedPmuInterfaceHandleWillBeRetrieved) { if (pLinuxSysmanImp->pPmuInterface != nullptr) { //delete previously allocated pPmuInterface delete pLinuxSysmanImp->pPmuInterface; pLinuxSysmanImp->pPmuInterface = nullptr; } pLinuxSysmanImp->pPmuInterface = PmuInterface::create(pLinuxSysmanImp); EXPECT_EQ(pLinuxSysmanImp->getPmuInterface(), pLinuxSysmanImp->pPmuInterface); } TEST_F(SysmanDeviceFixture, GivenValidPciPathWhileGettingRootPciPortThenReturnedPathIs2LevelUpThenTheCurrentPath) { const std::string mockBdf = "0000:00:02.0"; const std::string mockRealPath = "/sys/devices/pci0000:00/0000:00:01.0/0000:01:00.0/0000:02:01.0/" + mockBdf; const std::string mockRealPath2LevelsUp = "/sys/devices/pci0000:00/0000:00:01.0/0000:01:00.0"; std::string pciRootPort1 = pLinuxSysmanImp->getPciRootPortDirectoryPath(mockRealPath); EXPECT_EQ(pciRootPort1, mockRealPath2LevelsUp); std::string pciRootPort2 = pLinuxSysmanImp->getPciRootPortDirectoryPath("device"); EXPECT_EQ(pciRootPort2, "device"); } TEST_F(SysmanDeviceFixture, GivenValidPciPathWhileGettingRootPciPortThenReturnedPathIs1LevelAfterPCIePath) { const std::string mockBdf = "0000:00:02.0"; const std::string mockRealPath = "/sys/devices/pci0000:00/0000:00:01.0/0000:01:00.0/0000:02:01.0/" + mockBdf; const std::string mockRootPortPath = "/sys/devices/pci0000:00/0000:00:01.0"; std::string pciRootPort1 = pLinuxSysmanImp->getPciRootPortDirectoryPathForReset(mockRealPath); EXPECT_EQ(pciRootPort1, mockRootPortPath); std::string pciRootPort2 = pLinuxSysmanImp->getPciRootPortDirectoryPathForReset("device"); EXPECT_EQ(pciRootPort2, "device"); } TEST_F(SysmanDeviceFixture, GivenNullDrmHandleWhenGettingDrmHandleThenValidDrmHandleIsReturned) { pLinuxSysmanImp->releaseLocalDrmHandle(); EXPECT_NO_THROW(pLinuxSysmanImp->getDrm()); } TEST_F(SysmanDeviceFixture, GivenValidDeviceHandleWhenProductFamilyFromDeviceThenValidCorrectProductFamilyIsReturned) { auto productFamily = pLinuxSysmanImp->getDeviceHandle()->getNEODevice()->getHardwareInfo().platform.eProductFamily; EXPECT_EQ(productFamily, pLinuxSysmanImp->getProductFamily()); } TEST_F(SysmanMultiDeviceFixture, GivenValidDeviceHandleHavingSubdevicesWhenValidatingSysmanHandlesForSubdevicesThenSysmanHandleForSubdeviceWillBeSameAsSysmanHandleForDevice) { ze_device_handle_t hSysman = device->toHandle(); auto pSysmanDeviceOriginal = static_cast(device)->getSysmanHandle(); auto pSysmanDeviceLocal = L0::SysmanDeviceHandleContext::init(hSysman); EXPECT_EQ(pSysmanDeviceLocal, nullptr); static_cast(device)->setSysmanHandle(pSysmanDeviceLocal); uint32_t count = 0; EXPECT_EQ(ZE_RESULT_SUCCESS, device->getSubDevices(&count, nullptr)); std::vector subDeviceHandles(count, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, device->getSubDevices(&count, subDeviceHandles.data())); for (auto subDeviceHandle : subDeviceHandles) { L0::DeviceImp *subDeviceHandleImp = static_cast(Device::fromHandle(subDeviceHandle)); EXPECT_EQ(subDeviceHandleImp->getSysmanHandle(), device->getSysmanHandle()); } static_cast(device)->setSysmanHandle(pSysmanDeviceOriginal); } TEST_F(SysmanMultiDeviceFixture, GivenValidEffectiveUserIdCheckWhetherPermissionsReturnedByIsRootUserAreCorrect) { int euid = geteuid(); auto pFsAccess = pLinuxSysmanImp->getFsAccess(); if (euid == 0) { EXPECT_EQ(true, pFsAccess.isRootUser()); } else { EXPECT_EQ(false, pFsAccess.isRootUser()); } } TEST_F(SysmanMultiDeviceFixture, GivenSysmanEnvironmentVariableSetWhenCreateL0DeviceThenSysmanHandleCreateIsAttempted) { driverHandle->enableSysman = true; // In SetUp of SysmanMultiDeviceFixture, sysman handle for device is already created, so new sysman handle should not be created static_cast(device)->createSysmanHandle(true); EXPECT_EQ(device->getSysmanHandle(), pSysmanDevice); static_cast(device)->createSysmanHandle(false); EXPECT_EQ(device->getSysmanHandle(), pSysmanDevice); // delete previously allocated sysman handle and then attempt to create sysman handle again delete pSysmanDevice; device->setSysmanHandle(nullptr); static_cast(device)->createSysmanHandle(true); EXPECT_EQ(device->getSysmanHandle(), nullptr); static_cast(device)->createSysmanHandle(false); EXPECT_EQ(device->getSysmanHandle(), nullptr); } class UnknownDriverModel : public DriverModel { public: UnknownDriverModel() : DriverModel(DriverModelType::UNKNOWN) {} void setGmmInputArgs(void *args) override {} uint32_t getDeviceHandle() const override { return 0u; } PhysicalDevicePciBusInfo getPciBusInfo() const override { PhysicalDevicePciBusInfo pciBusInfo(PhysicalDevicePciBusInfo::InvalidValue, PhysicalDevicePciBusInfo::InvalidValue, PhysicalDevicePciBusInfo::InvalidValue, PhysicalDevicePciBusInfo::InvalidValue); return pciBusInfo; } PhyicalDevicePciSpeedInfo getPciSpeedInfo() const override { return {}; } bool isGpuHangDetected(OsContext &osContext) override { return false; } }; using SysmanUnknownDriverModelTest = Test; TEST_F(SysmanUnknownDriverModelTest, GivenDriverModelTypeIsNotDrmWhenExecutingSysmanOnLinuxThenErrorIsReturned) { neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]->osInterface = std::make_unique(); auto &osInterface = device->getOsInterface(); osInterface.setDriverModel(std::make_unique()); auto pSysmanDeviceImp = std::make_unique(device->toHandle()); auto pLinuxSysmanImp = static_cast(pSysmanDeviceImp->pOsSysman); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, pLinuxSysmanImp->init()); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/memory/000077500000000000000000000000001422164147700303455ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/memory/CMakeLists.txt000066400000000000000000000003051422164147700331030ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) add_subdirectories() compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/memory/linux/000077500000000000000000000000001422164147700315045ustar00rootroot00000000000000CMakeLists.txt000066400000000000000000000022611422164147700341660ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/memory/linux# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_TESTS_TOOLS_SYSMAN_MEMORY_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) if(SUPPORT_DG1 AND "${BRANCH_TYPE}" STREQUAL "") list(APPEND L0_TESTS_TOOLS_SYSMAN_MEMORY_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/mock_memory.h ${CMAKE_CURRENT_SOURCE_DIR}/test_sysman_memory_dg1.cpp ) else() list(APPEND L0_TESTS_TOOLS_SYSMAN_MEMORY_LINUX ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}mock_memory.h ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}test_sysman_memory.cpp ) endif() if((NEO_ENABLE_i915_PRELIM_DETECTION) AND ("${BRANCH_TYPE}" STREQUAL "")) if(SUPPORT_DG1) list(REMOVE_ITEM L0_TESTS_TOOLS_SYSMAN_MEMORY_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/test_sysman_memory_dg1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_memory.h ) else() list(REMOVE_ITEM L0_TESTS_TOOLS_SYSMAN_MEMORY_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/test_sysman_memory.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_memory.h ) endif() endif() if(UNIX) target_sources(${TARGET_NAME} PRIVATE ${L0_TESTS_TOOLS_SYSMAN_MEMORY_LINUX} ) endif() mock_memory.h000066400000000000000000000045051422164147700341230ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/memory/linux/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/linux/memory_info.h" #include "level_zero/core/test/unit_tests/mock.h" #include "level_zero/core/test/unit_tests/mocks/mock_memory_manager.h" #include "level_zero/tools/source/sysman/memory/linux/os_memory_imp.h" #include "level_zero/tools/source/sysman/memory/memory_imp.h" #include "sysman/linux/os_sysman_imp.h" using namespace NEO; constexpr uint64_t probedSizeRegionZero = 8 * GB; constexpr uint64_t probedSizeRegionOne = 16 * GB; constexpr uint64_t probedSizeRegionTwo = 4 * GB; constexpr uint64_t probedSizeRegionThree = 16 * GB; constexpr uint64_t unallocatedSizeRegionZero = 6 * GB; constexpr uint64_t unallocatedSizeRegionOne = 12 * GB; constexpr uint64_t unallocatedSizeRegionTwo = 25 * GB; constexpr uint64_t unallocatedSizeRegionThree = 3 * GB; namespace L0 { namespace ult { struct MockMemoryManagerSysman : public MemoryManagerMock { MockMemoryManagerSysman(NEO::ExecutionEnvironment &executionEnvironment) : MemoryManagerMock(const_cast(executionEnvironment)) {} }; class MemoryNeoDrm : public Drm { public: using Drm::memoryInfo; const int mockFd = 33; MemoryNeoDrm(RootDeviceEnvironment &rootDeviceEnvironment) : Drm(std::make_unique(mockFd, ""), rootDeviceEnvironment) {} }; template <> struct Mock : public MemoryNeoDrm { Mock(RootDeviceEnvironment &rootDeviceEnvironment) : MemoryNeoDrm(rootDeviceEnvironment) {} bool queryMemoryInfoMockPositiveTest() { std::vector regionInfo(2); regionInfo[0].region = {I915_MEMORY_CLASS_SYSTEM, 0}; regionInfo[0].probedSize = probedSizeRegionZero; regionInfo[0].unallocatedSize = unallocatedSizeRegionZero; regionInfo[1].region = {I915_MEMORY_CLASS_DEVICE, 0}; regionInfo[1].probedSize = probedSizeRegionOne; regionInfo[1].unallocatedSize = unallocatedSizeRegionOne; this->memoryInfo.reset(new MemoryInfo(regionInfo)); return true; } bool queryMemoryInfoMockReturnFalse() { return false; } bool queryMemoryInfoMockReturnFakeTrue() { return true; } MOCK_METHOD(bool, queryMemoryInfo, (), (override)); }; } // namespace ult } // namespace L0 test_sysman_memory.cpp000066400000000000000000000174031422164147700360770ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/memory/linux/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/memory/linux/os_memory_imp.h" #include "level_zero/tools/source/sysman/sysman_imp.h" #include "level_zero/tools/test/unit_tests/sources/sysman/linux/mock_sysman_fixture.h" #include "level_zero/tools/test/unit_tests/sources/sysman/memory/linux/mock_memory.h" #include "gtest/gtest.h" extern bool sysmanUltsEnable; namespace L0 { namespace ult { constexpr uint32_t memoryHandleComponentCount = 1u; class SysmanDeviceMemoryFixture : public SysmanDeviceFixture { protected: void SetUp() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanDeviceFixture::SetUp(); pMemoryManagerOld = device->getDriverHandle()->getMemoryManager(); pMemoryManager = new ::testing::NiceMock(*neoDevice->getExecutionEnvironment()); pMemoryManager->localMemorySupported[0] = false; device->getDriverHandle()->setMemoryManager(pMemoryManager); for (auto handle : pSysmanDeviceImp->pMemoryHandleContext->handleList) { delete handle; } pSysmanDeviceImp->pMemoryHandleContext->handleList.clear(); uint32_t subDeviceCount = 0; std::vector deviceHandles; // We received a device handle. Check for subdevices in this device Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, nullptr); if (subDeviceCount == 0) { deviceHandles.resize(1, device->toHandle()); } else { deviceHandles.resize(subDeviceCount, nullptr); Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, deviceHandles.data()); } pSysmanDeviceImp->pMemoryHandleContext->init(deviceHandles); } void TearDown() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } device->getDriverHandle()->setMemoryManager(pMemoryManagerOld); SysmanDeviceFixture::TearDown(); if (pMemoryManager != nullptr) { delete pMemoryManager; pMemoryManager = nullptr; } } void setLocalSupportedAndReinit(bool supported) { pMemoryManager->localMemorySupported[0] = supported; for (auto handle : pSysmanDeviceImp->pMemoryHandleContext->handleList) { delete handle; } pSysmanDeviceImp->pMemoryHandleContext->handleList.clear(); uint32_t subDeviceCount = 0; std::vector deviceHandles; // We received a device handle. Check for subdevices in this device Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, nullptr); if (subDeviceCount == 0) { deviceHandles.resize(1, device->toHandle()); } else { deviceHandles.resize(subDeviceCount, nullptr); Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, deviceHandles.data()); } pSysmanDeviceImp->pMemoryHandleContext->init(deviceHandles); } std::vector get_memory_handles(uint32_t count) { std::vector handles(count, nullptr); EXPECT_EQ(zesDeviceEnumMemoryModules(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS); return handles; } MockMemoryManagerSysman *pMemoryManager = nullptr; MemoryManager *pMemoryManagerOld; }; TEST_F(SysmanDeviceMemoryFixture, GivenComponentCountZeroWhenEnumeratingMemoryModulesWithLocalMemorySupportThenValidCountIsReturnedAndVerifySysmanPowerGetCallSucceeds) { setLocalSupportedAndReinit(false); uint32_t count = 0; EXPECT_EQ(zesDeviceEnumMemoryModules(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, 0u); } TEST_F(SysmanDeviceMemoryFixture, GivenInvalidComponentCountWhenEnumeratingMemoryModulesWithLocalMemorySupportThenValidCountIsReturnedAndVerifySysmanPowerGetCallSucceeds) { setLocalSupportedAndReinit(false); uint32_t count = 0; EXPECT_EQ(zesDeviceEnumMemoryModules(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, 0u); count = count + 1; EXPECT_EQ(zesDeviceEnumMemoryModules(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, 0u); } TEST_F(SysmanDeviceMemoryFixture, GivenComponentCountZeroWhenEnumeratingMemoryModulesWithNoLocalMemorySupportThenValidCountIsReturnedAndVerifySysmanPowerGetCallSucceeds) { setLocalSupportedAndReinit(true); uint32_t count = 0; EXPECT_EQ(zesDeviceEnumMemoryModules(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, memoryHandleComponentCount); } TEST_F(SysmanDeviceMemoryFixture, GivenInvalidComponentCountWhenEnumeratingMemoryModulesWithNoLocalMemorySupportThenValidCountIsReturnedAndVerifySysmanPowerGetCallSucceeds) { setLocalSupportedAndReinit(true); uint32_t count = 0; EXPECT_EQ(zesDeviceEnumMemoryModules(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, memoryHandleComponentCount); count = count + 1; EXPECT_EQ(zesDeviceEnumMemoryModules(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, memoryHandleComponentCount); } TEST_F(SysmanDeviceMemoryFixture, GivenComponentCountZeroWhenEnumeratingMemoryModulesThenValidPowerHandlesIsReturned) { setLocalSupportedAndReinit(true); uint32_t count = 0; EXPECT_EQ(zesDeviceEnumMemoryModules(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, memoryHandleComponentCount); std::vector handles(count, nullptr); EXPECT_EQ(zesDeviceEnumMemoryModules(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS); for (auto handle : handles) { EXPECT_NE(handle, nullptr); } } TEST_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenCallingZetSysmanMemoryGetPropertiesThenVerifySysmanMemoryGetPropertiesCallReturnSuccess) { setLocalSupportedAndReinit(true); auto handles = get_memory_handles(memoryHandleComponentCount); for (auto handle : handles) { zes_mem_properties_t properties; EXPECT_EQ(zesMemoryGetProperties(handle, &properties), ZE_RESULT_SUCCESS); } } TEST_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenCallingZetSysmanMemoryGetStateThenVerifySysmanMemoryGetStateCallReturnUnsupportedFeature) { setLocalSupportedAndReinit(true); auto handles = get_memory_handles(memoryHandleComponentCount); for (auto handle : handles) { zes_mem_state_t state; EXPECT_EQ(zesMemoryGetState(handle, &state), ZE_RESULT_ERROR_UNSUPPORTED_FEATURE); } } TEST_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenCallingZetSysmanMemoryGetBandwidthThenVerifySysmanMemoryGetBandwidthCallReturnUnsupportedFeature) { setLocalSupportedAndReinit(true); auto handles = get_memory_handles(memoryHandleComponentCount); for (auto handle : handles) { zes_mem_bandwidth_t bandwidth; EXPECT_EQ(zesMemoryGetBandwidth(handle, &bandwidth), ZE_RESULT_ERROR_UNSUPPORTED_FEATURE); } } TEST_F(SysmanMultiDeviceFixture, GivenValidDevicePointerWhenGettingMemoryPropertiesThenValidMemoryPropertiesRetrieved) { zes_mem_properties_t properties = {}; ze_device_properties_t deviceProperties = {}; ze_bool_t isSubDevice = deviceProperties.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE; Device::fromHandle(device)->getProperties(&deviceProperties); LinuxMemoryImp *pLinuxMemoryImp = new LinuxMemoryImp(pOsSysman, isSubDevice, deviceProperties.subdeviceId); EXPECT_EQ(ZE_RESULT_SUCCESS, pLinuxMemoryImp->getProperties(&properties)); EXPECT_EQ(properties.subdeviceId, deviceProperties.subdeviceId); EXPECT_EQ(properties.onSubdevice, isSubDevice); delete pLinuxMemoryImp; } } // namespace ult } // namespace L0 test_sysman_memory_dg1.cpp000066400000000000000000000247441422164147700366400ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/memory/linux/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/sysman_imp.h" #include "level_zero/tools/test/unit_tests/sources/sysman/linux/mock_sysman_fixture.h" #include "mock_memory.h" extern bool sysmanUltsEnable; namespace L0 { namespace ult { constexpr uint32_t memoryHandleComponentCount = 1u; class SysmanDeviceMemoryFixture : public SysmanDeviceFixture { protected: Mock *pDrm = nullptr; Drm *pOriginalDrm = nullptr; void SetUp() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanDeviceFixture::SetUp(); pMemoryManagerOld = device->getDriverHandle()->getMemoryManager(); pMemoryManager = new ::testing::NiceMock(*neoDevice->getExecutionEnvironment()); pMemoryManager->localMemorySupported[0] = false; device->getDriverHandle()->setMemoryManager(pMemoryManager); pDrm = new NiceMock>(const_cast(neoDevice->getRootDeviceEnvironment())); pSysmanDevice = device->getSysmanHandle(); pSysmanDeviceImp = static_cast(pSysmanDevice); pOsSysman = pSysmanDeviceImp->pOsSysman; pLinuxSysmanImp = static_cast(pOsSysman); pLinuxSysmanImp->pDrm = pDrm; ON_CALL(*pDrm, queryMemoryInfo()) .WillByDefault(::testing::Invoke(pDrm, &Mock::queryMemoryInfoMockPositiveTest)); for (auto handle : pSysmanDeviceImp->pMemoryHandleContext->handleList) { delete handle; } pSysmanDeviceImp->pMemoryHandleContext->handleList.clear(); uint32_t subDeviceCount = 0; std::vector deviceHandles; // We received a device handle. Check for subdevices in this device Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, nullptr); if (subDeviceCount == 0) { deviceHandles.resize(1, device->toHandle()); } else { deviceHandles.resize(subDeviceCount, nullptr); Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, deviceHandles.data()); } pSysmanDeviceImp->pMemoryHandleContext->init(deviceHandles); } void TearDown() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } device->getDriverHandle()->setMemoryManager(pMemoryManagerOld); SysmanDeviceFixture::TearDown(); pLinuxSysmanImp->pDrm = pOriginalDrm; if (pDrm != nullptr) { delete pDrm; pDrm = nullptr; } if (pMemoryManager != nullptr) { delete pMemoryManager; pMemoryManager = nullptr; } } void setLocalSupportedAndReinit(bool supported) { pMemoryManager->localMemorySupported[0] = supported; for (auto handle : pSysmanDeviceImp->pMemoryHandleContext->handleList) { delete handle; } pSysmanDeviceImp->pMemoryHandleContext->handleList.clear(); uint32_t subDeviceCount = 0; std::vector deviceHandles; // We received a device handle. Check for subdevices in this device Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, nullptr); if (subDeviceCount == 0) { deviceHandles.resize(1, device->toHandle()); } else { deviceHandles.resize(subDeviceCount, nullptr); Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, deviceHandles.data()); } pSysmanDeviceImp->pMemoryHandleContext->init(deviceHandles); } std::vector get_memory_handles(uint32_t count) { std::vector handles(count, nullptr); EXPECT_EQ(zesDeviceEnumMemoryModules(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS); return handles; } MockMemoryManagerSysman *pMemoryManager = nullptr; MemoryManager *pMemoryManagerOld; }; TEST_F(SysmanDeviceMemoryFixture, GivenComponentCountZeroWhenEnumeratingMemoryModulesWithLocalMemorySupportThenValidCountIsReturnedAndVerifySysmanPowerGetCallSucceeds) { setLocalSupportedAndReinit(true); uint32_t count = 0; EXPECT_EQ(zesDeviceEnumMemoryModules(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, memoryHandleComponentCount); } TEST_F(SysmanDeviceMemoryFixture, GivenInvalidComponentCountWhenEnumeratingMemoryModulesWithLocalMemorySupportThenValidCountIsReturnedAndVerifySysmanPowerGetCallSucceeds) { setLocalSupportedAndReinit(true); uint32_t count = 0; EXPECT_EQ(zesDeviceEnumMemoryModules(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, memoryHandleComponentCount); count = count + 1; EXPECT_EQ(zesDeviceEnumMemoryModules(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, memoryHandleComponentCount); } TEST_F(SysmanDeviceMemoryFixture, GivenComponentCountZeroWhenEnumeratingMemoryModulesWithLocalMemorySupportThenValidPowerHandlesIsReturned) { setLocalSupportedAndReinit(true); uint32_t count = 0; EXPECT_EQ(zesDeviceEnumMemoryModules(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, memoryHandleComponentCount); std::vector handles(count, nullptr); EXPECT_EQ(zesDeviceEnumMemoryModules(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS); for (auto handle : handles) { EXPECT_NE(handle, nullptr); } } TEST_F(SysmanDeviceMemoryFixture, GivenComponentCountZeroWhenEnumeratingMemoryModulesWithNoLocalMemorySupportThenZeroCountIsReturnedAndVerifySysmanPowerGetCallSucceeds) { setLocalSupportedAndReinit(false); uint32_t count = 0; EXPECT_EQ(zesDeviceEnumMemoryModules(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, 0u); } TEST_F(SysmanDeviceMemoryFixture, GivenInvalidComponentCountWhenEnumeratingMemoryModulesWithNoLocalMemorySupportThenZeroCountIsReturnedAndVerifySysmanPowerGetCallSucceeds) { setLocalSupportedAndReinit(false); uint32_t count = 0; EXPECT_EQ(zesDeviceEnumMemoryModules(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, 0u); count = count + 1; EXPECT_EQ(zesDeviceEnumMemoryModules(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, 0u); } TEST_F(SysmanDeviceMemoryFixture, GivenComponentCountZeroWhenEnumeratingMemoryModulesWithNoLocalMemorySupportThenValidPowerHandlesIsReturned) { setLocalSupportedAndReinit(false); uint32_t count = 0; EXPECT_EQ(zesDeviceEnumMemoryModules(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, 0u); std::vector handles(count, nullptr); EXPECT_EQ(zesDeviceEnumMemoryModules(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS); for (auto handle : handles) { EXPECT_NE(handle, nullptr); } } TEST_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenGettingPropertiesWithLocalMemoryThenCallSucceeds) { setLocalSupportedAndReinit(true); auto handles = get_memory_handles(memoryHandleComponentCount); for (auto handle : handles) { zes_mem_properties_t properties; ze_result_t result = zesMemoryGetProperties(handle, &properties); EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(properties.type, ZES_MEM_TYPE_DDR); EXPECT_EQ(properties.location, ZES_MEM_LOC_DEVICE); EXPECT_FALSE(properties.onSubdevice); EXPECT_EQ(properties.subdeviceId, 0u); EXPECT_EQ(properties.physicalSize, 0u); EXPECT_EQ(properties.numChannels, -1); EXPECT_EQ(properties.busWidth, -1); } } TEST_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenGettingStateThenCallSucceeds) { setLocalSupportedAndReinit(true); auto handles = get_memory_handles(memoryHandleComponentCount); for (auto handle : handles) { zes_mem_state_t state; ze_result_t result = zesMemoryGetState(handle, &state); EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(state.health, ZES_MEM_HEALTH_OK); EXPECT_EQ(state.size, probedSizeRegionOne); EXPECT_EQ(state.free, unallocatedSizeRegionOne); } } TEST_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleAndIfQueryMemoryInfoFailsWhenGettingStateThenErrorIsReturned) { setLocalSupportedAndReinit(true); ON_CALL(*pDrm, queryMemoryInfo()) .WillByDefault(::testing::Invoke(pDrm, &Mock::queryMemoryInfoMockReturnFalse)); auto handles = get_memory_handles(memoryHandleComponentCount); for (auto handle : handles) { zes_mem_state_t state; EXPECT_EQ(zesMemoryGetState(handle, &state), ZE_RESULT_ERROR_UNSUPPORTED_FEATURE); } } TEST_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleAndIfQueryMemoryInfoAndIfMemoryInfoIsNotCorrectWhenGettingStateThenErrorIsReturned) { setLocalSupportedAndReinit(true); ON_CALL(*pDrm, queryMemoryInfo()) .WillByDefault(::testing::Invoke(pDrm, &Mock::queryMemoryInfoMockReturnFakeTrue)); auto handles = get_memory_handles(memoryHandleComponentCount); for (auto handle : handles) { zes_mem_state_t state; EXPECT_EQ(zesMemoryGetState(handle, &state), ZE_RESULT_ERROR_UNSUPPORTED_FEATURE); } } TEST_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenGettingBandwidthThenZeResultErrorUnsupportedFeatureIsReturned) { setLocalSupportedAndReinit(true); auto handles = get_memory_handles(memoryHandleComponentCount); for (auto handle : handles) { zes_mem_bandwidth_t bandwidth; EXPECT_EQ(zesMemoryGetBandwidth(handle, &bandwidth), ZE_RESULT_ERROR_UNSUPPORTED_FEATURE); } } TEST_F(SysmanMultiDeviceFixture, GivenValidDevicePointerWhenGettingMemoryPropertiesThenValidMemoryPropertiesRetrieved) { zes_mem_properties_t properties = {}; ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; ze_bool_t isSubDevice = deviceProperties.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE; Device::fromHandle(device)->getProperties(&deviceProperties); LinuxMemoryImp *pLinuxMemoryImp = new LinuxMemoryImp(pOsSysman, isSubDevice, deviceProperties.subdeviceId); EXPECT_EQ(ZE_RESULT_SUCCESS, pLinuxMemoryImp->getProperties(&properties)); EXPECT_EQ(properties.subdeviceId, deviceProperties.subdeviceId); EXPECT_EQ(properties.onSubdevice, isSubDevice); delete pLinuxMemoryImp; } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/memory/windows/000077500000000000000000000000001422164147700320375ustar00rootroot00000000000000CMakeLists.txt000066400000000000000000000005241422164147700345210ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/memory/windows# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_zes_memory.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_memory.h ) endif() mock_memory.h000066400000000000000000000144611422164147700344600ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/memory/windows/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/test/unit_tests/mock.h" #include "level_zero/core/test/unit_tests/mocks/mock_memory_manager.h" #include "level_zero/tools/test/unit_tests/sources/sysman/windows/mock_kmd_sys_manager.h" #include "sysman/memory/memory_imp.h" namespace L0 { namespace ult { struct MockMemoryManagerSysman : public MemoryManagerMock { MockMemoryManagerSysman(NEO::ExecutionEnvironment &executionEnvironment) : MemoryManagerMock(const_cast(executionEnvironment)) {} }; class MemoryKmdSysManager : public Mock {}; template <> struct Mock : public MemoryKmdSysManager { uint32_t mockMemoryType = KmdSysman::MemoryType::GDDR6; uint32_t mockMemoryLocation = KmdSysman::MemoryLocationsType::DeviceMemory; uint64_t mockMemoryPhysicalSize = 4294967296; uint64_t mockMemoryStolen = 0; uint64_t mockMemorySystem = 17179869184; uint64_t mockMemoryDedicated = 0; uint64_t mockMemoryFree = 4294813696; uint32_t mockMemoryBus = 256; uint32_t mockMemoryChannels = 2; uint32_t mockMemoryMaxBandwidth = 4256000000; uint32_t mockMemoryCurrentBandwidthRead = 561321; uint32_t mockMemoryCurrentBandwidthWrite = 664521; uint32_t mockMemoryDomains = 1; void getMemoryProperty(KmdSysman::GfxSysmanReqHeaderIn *pRequest, KmdSysman::GfxSysmanReqHeaderOut *pResponse) override { uint8_t *pBuffer = reinterpret_cast(pResponse); pBuffer += sizeof(KmdSysman::GfxSysmanReqHeaderOut); switch (pRequest->inRequestId) { case KmdSysman::Requests::Memory::NumMemoryDomains: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockMemoryDomains; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Memory::MemoryType: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockMemoryType; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Memory::MemoryLocation: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockMemoryLocation; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Memory::PhysicalSize: { uint64_t *pValue = reinterpret_cast(pBuffer); *pValue = mockMemoryPhysicalSize; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint64_t); } break; case KmdSysman::Requests::Memory::StolenSize: { uint64_t *pValue = reinterpret_cast(pBuffer); *pValue = mockMemoryStolen; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint64_t); } break; case KmdSysman::Requests::Memory::SystemSize: { uint64_t *pValue = reinterpret_cast(pBuffer); *pValue = mockMemorySystem; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint64_t); } break; case KmdSysman::Requests::Memory::DedicatedSize: { uint64_t *pValue = reinterpret_cast(pBuffer); *pValue = mockMemoryDedicated; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint64_t); } break; case KmdSysman::Requests::Memory::CurrentFreeMemorySize: { uint64_t *pValue = reinterpret_cast(pBuffer); *pValue = mockMemoryFree; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint64_t); } break; case KmdSysman::Requests::Memory::MemoryWidth: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockMemoryBus; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Memory::NumChannels: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockMemoryChannels; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Memory::MaxBandwidth: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockMemoryMaxBandwidth; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Memory::CurrentBandwidthRead: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockMemoryCurrentBandwidthRead; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Memory::CurrentBandwidthWrite: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockMemoryCurrentBandwidthWrite; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; default: { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } break; } } void setMemoryProperty(KmdSysman::GfxSysmanReqHeaderIn *pRequest, KmdSysman::GfxSysmanReqHeaderOut *pResponse) override { uint8_t *pBuffer = reinterpret_cast(pRequest); pBuffer += sizeof(KmdSysman::GfxSysmanReqHeaderIn); pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } Mock() = default; ~Mock() = default; }; class PublicWddmPowerImp : public L0::WddmMemoryImp { public: using WddmMemoryImp::pKmdSysManager; }; } // namespace ult } // namespace L0 test_zes_memory.cpp000066400000000000000000000200771422164147700357220ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/memory/windows/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/memory/windows/os_memory_imp.h" #include "level_zero/tools/test/unit_tests/sources/sysman/memory/windows/mock_memory.h" #include "level_zero/tools/test/unit_tests/sources/sysman/windows/mock_sysman_fixture.h" extern bool sysmanUltsEnable; namespace L0 { namespace ult { constexpr uint32_t memoryHandleComponentCount = 1u; class SysmanDeviceMemoryFixture : public SysmanDeviceFixture { protected: Mock *pKmdSysManager = nullptr; KmdSysManager *pOriginalKmdSysManager = nullptr; void SetUp() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanDeviceFixture::SetUp(); pMemoryManagerOld = device->getDriverHandle()->getMemoryManager(); pMemoryManager = new ::testing::NiceMock(*neoDevice->getExecutionEnvironment()); pMemoryManager->localMemorySupported[0] = false; device->getDriverHandle()->setMemoryManager(pMemoryManager); pKmdSysManager = new Mock; EXPECT_CALL(*pKmdSysManager, escape(_, _, _, _, _)) .WillRepeatedly(::testing::Invoke(pKmdSysManager, &Mock::mock_escape)); pOriginalKmdSysManager = pWddmSysmanImp->pKmdSysManager; pWddmSysmanImp->pKmdSysManager = pKmdSysManager; for (auto handle : pSysmanDeviceImp->pMemoryHandleContext->handleList) { delete handle; } pSysmanDeviceImp->pMemoryHandleContext->handleList.clear(); uint32_t subDeviceCount = 0; std::vector deviceHandles; // We received a device handle. Check for subdevices in this device Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, nullptr); if (subDeviceCount == 0) { deviceHandles.resize(1, device->toHandle()); } else { deviceHandles.resize(subDeviceCount, nullptr); Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, deviceHandles.data()); } pSysmanDeviceImp->pMemoryHandleContext->init(deviceHandles); } void TearDown() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } device->getDriverHandle()->setMemoryManager(pMemoryManagerOld); SysmanDeviceFixture::TearDown(); pWddmSysmanImp->pKmdSysManager = pOriginalKmdSysManager; if (pKmdSysManager != nullptr) { delete pKmdSysManager; pKmdSysManager = nullptr; } if (pMemoryManager != nullptr) { delete pMemoryManager; pMemoryManager = nullptr; } } void setLocalSupportedAndReinit(bool supported) { pMemoryManager->localMemorySupported[0] = supported; for (auto handle : pSysmanDeviceImp->pMemoryHandleContext->handleList) { delete handle; } pSysmanDeviceImp->pMemoryHandleContext->handleList.clear(); uint32_t subDeviceCount = 0; std::vector deviceHandles; // We received a device handle. Check for subdevices in this device Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, nullptr); if (subDeviceCount == 0) { deviceHandles.resize(1, device->toHandle()); } else { deviceHandles.resize(subDeviceCount, nullptr); Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, deviceHandles.data()); } pSysmanDeviceImp->pMemoryHandleContext->init(deviceHandles); } std::vector get_memory_handles(uint32_t count) { std::vector handles(count, nullptr); EXPECT_EQ(zesDeviceEnumMemoryModules(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS); return handles; } MockMemoryManagerSysman *pMemoryManager = nullptr; MemoryManager *pMemoryManagerOld; }; TEST_F(SysmanDeviceMemoryFixture, GivenComponentCountZeroWhenEnumeratingMemoryModulesWithLocalMemorySupportThenValidCountIsReturnedAndVerifySysmanPowerGetCallSucceeds) { setLocalSupportedAndReinit(true); uint32_t count = 0; EXPECT_EQ(zesDeviceEnumMemoryModules(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, memoryHandleComponentCount); } TEST_F(SysmanDeviceMemoryFixture, GivenInvalidComponentCountWhenEnumeratingMemoryModulesWithLocalMemorySupportThenValidCountIsReturnedAndVerifySysmanPowerGetCallSucceeds) { setLocalSupportedAndReinit(true); uint32_t count = 0; EXPECT_EQ(zesDeviceEnumMemoryModules(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, memoryHandleComponentCount); count = count + 1; EXPECT_EQ(zesDeviceEnumMemoryModules(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, memoryHandleComponentCount); } TEST_F(SysmanDeviceMemoryFixture, GivenComponentCountZeroWhenEnumeratingMemoryModulesWithLocalMemorySupportThenValidPowerHandlesIsReturned) { setLocalSupportedAndReinit(true); uint32_t count = 0; EXPECT_EQ(zesDeviceEnumMemoryModules(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, memoryHandleComponentCount); std::vector handles(count, nullptr); EXPECT_EQ(zesDeviceEnumMemoryModules(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS); for (auto handle : handles) { EXPECT_NE(handle, nullptr); } } TEST_F(SysmanDeviceMemoryFixture, GivenComponentCountZeroWhenEnumeratingMemoryModulesWithNoLocalMemorySupportThenZeroCountIsReturnedAndVerifySysmanPowerGetCallSucceeds) { setLocalSupportedAndReinit(false); uint32_t count = 0; EXPECT_EQ(zesDeviceEnumMemoryModules(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, memoryHandleComponentCount); } TEST_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenCallingGettingPropertiesWithLocalMemoryThenCallSucceeds) { pKmdSysManager->mockMemoryLocation = KmdSysman::MemoryLocationsType::DeviceMemory; setLocalSupportedAndReinit(true); auto handles = get_memory_handles(memoryHandleComponentCount); for (auto handle : handles) { zes_mem_properties_t properties; ze_result_t result = zesMemoryGetProperties(handle, &properties); EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(properties.type, ZES_MEM_TYPE_GDDR6); EXPECT_EQ(properties.location, ZES_MEM_LOC_DEVICE); EXPECT_FALSE(properties.onSubdevice); EXPECT_EQ(properties.subdeviceId, 0u); EXPECT_EQ(properties.physicalSize, pKmdSysManager->mockMemoryPhysicalSize); EXPECT_EQ(properties.numChannels, pKmdSysManager->mockMemoryChannels); EXPECT_EQ(properties.busWidth, pKmdSysManager->mockMemoryBus); } } TEST_F(SysmanDeviceMemoryFixture, DISABLED_GivenValidMemoryHandleWhenGettingStateThenCallSucceeds) { setLocalSupportedAndReinit(true); auto handles = get_memory_handles(memoryHandleComponentCount); for (auto handle : handles) { zes_mem_state_t state; ze_result_t result = zesMemoryGetState(handle, &state); EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(state.health, ZES_MEM_HEALTH_OK); EXPECT_GT(state.size, 0u); EXPECT_GT(state.free, 0u); } } TEST_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenGettingBandwidthThenCallSucceeds) { setLocalSupportedAndReinit(true); auto handles = get_memory_handles(memoryHandleComponentCount); for (auto handle : handles) { zes_mem_bandwidth_t bandwidth; ze_result_t result = zesMemoryGetBandwidth(handle, &bandwidth); EXPECT_EQ(result, ZE_RESULT_SUCCESS); EXPECT_EQ(bandwidth.maxBandwidth, pKmdSysManager->mockMemoryMaxBandwidth * MbpsToBytesPerSecond); EXPECT_EQ(bandwidth.readCounter, pKmdSysManager->mockMemoryCurrentBandwidthRead); EXPECT_EQ(bandwidth.writeCounter, pKmdSysManager->mockMemoryCurrentBandwidthWrite); EXPECT_GT(bandwidth.timestamp, 0u); } } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/mocks/000077500000000000000000000000001422164147700301515ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/mocks/CMakeLists.txt000066400000000000000000000006211422164147700327100ustar00rootroot00000000000000# # Copyright (C) 2021-2022 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/mock_sysman_env_vars.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_sysman_device_info.h ${CMAKE_CURRENT_SOURCE_DIR}/test_sysman_device_info.cpp ) mock_sysman_device_info.h000066400000000000000000000032131422164147700351170ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/mocks/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/tools/source/sysman/sysman_imp.h" #include "mock_sysman_env_vars.h" using namespace NEO; using ::testing::_; namespace L0 { namespace ult { class SysmanMultiDeviceInfoFixture : public ::testing::Test { public: void SetUp() { if (!sysmanUltsEnable) { GTEST_SKIP(); } hwInfo = *NEO::defaultHwInfo.get(); hwInfo.gtSystemInfo.MultiTileArchInfo.IsValid = 1; hwInfo.gtSystemInfo.MultiTileArchInfo.TileCount = numSubDevices; hwInfo.gtSystemInfo.MultiTileArchInfo.Tile0 = 1; hwInfo.gtSystemInfo.MultiTileArchInfo.Tile1 = 1; auto executionEnvironment = MockDevice::prepareExecutionEnvironment(&hwInfo, 0u); neoDevice = NEO::MockDevice::createWithExecutionEnvironment(&hwInfo, executionEnvironment, 0u); NEO::DeviceVector devices; devices.push_back(std::unique_ptr(neoDevice)); driverHandle = std::make_unique>(); driverHandle->initialize(std::move(devices)); device = driverHandle->devices[0]; } void TearDown() {} NEO::MockDevice *neoDevice = nullptr; L0::Device *device = nullptr; std::unique_ptr> driverHandle; NEO::HardwareInfo hwInfo; const uint32_t numRootDevices = 1u; const uint32_t numSubDevices = 2u; }; } // namespace ult } // namespace L0mock_sysman_env_vars.h000066400000000000000000000020001422164147700344610ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/mocks/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/mocks/mock_io_functions.h" #include "shared/test/common/test_macros/test.h" extern bool sysmanUltsEnable; using namespace NEO; using envVariableMap = std::unordered_map; namespace L0 { namespace ult { class SysmanEnabledFixture : public ::testing::Test { public: void SetUp() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } mockableEnvValues = std::make_unique(); (*mockableEnvValues)["ZES_ENABLE_SYSMAN"] = "1"; mockableEnvValuesBackup = std::make_unique>(&IoFunctions::mockableEnvValues, mockableEnvValues.get()); } protected: std::unique_ptr> mockableEnvValuesBackup; std::unique_ptr mockableEnvValues; }; } // namespace ult } // namespace L0 test_sysman_device_info.cpp000066400000000000000000000031441422164147700355030ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/mocks/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/test/unit_tests/sources/sysman/mocks/mock_sysman_device_info.h" namespace L0 { namespace ult { TEST_F(SysmanMultiDeviceInfoFixture, GivenDeviceWithMultipleTilesWhenOnlyTileOneIsEnabledThenGetSysmanDeviceInfoReturnsExpectedValues) { neoDevice->deviceBitfield.reset(); neoDevice->deviceBitfield.set(1); uint32_t subdeviceId = 0; ze_bool_t onSubdevice = false; SysmanDeviceImp::getSysmanDeviceInfo(device->toHandle(), subdeviceId, onSubdevice); EXPECT_EQ(subdeviceId, 1u); EXPECT_TRUE(onSubdevice); } TEST_F(SysmanMultiDeviceInfoFixture, GivenDeviceWithMultipleTilesEnabledThenGetSysmanDeviceInfoReturnsExpectedValues) { uint32_t subDeviceCount = 0; std::vector deviceHandles; Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, nullptr); if (subDeviceCount == 0) { deviceHandles.resize(1, device->toHandle()); } else { deviceHandles.resize(subDeviceCount, nullptr); Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, deviceHandles.data()); } for (auto &device : deviceHandles) { NEO::Device *neoDevice = Device::fromHandle(device)->getNEODevice(); uint32_t subdeviceId = 0; ze_bool_t onSubdevice = false; SysmanDeviceImp::getSysmanDeviceInfo(device, subdeviceId, onSubdevice); EXPECT_EQ(subdeviceId, static_cast(neoDevice)->getSubDeviceIndex()); EXPECT_TRUE(onSubdevice); } } } // namespace ult } // namespace L0compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/pci/000077500000000000000000000000001422164147700276105ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/pci/CMakeLists.txt000066400000000000000000000003051422164147700323460ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) add_subdirectories() compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/pci/linux/000077500000000000000000000000001422164147700307475ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/pci/linux/CMakeLists.txt000066400000000000000000000005231422164147700335070ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(UNIX) target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/mock_sysfs_pci.h ${CMAKE_CURRENT_SOURCE_DIR}/test_zes_pci.cpp ) endif() mock_sysfs_pci.h000066400000000000000000000140021422164147700340510ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/pci/linux/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/test/unit_tests/mock.h" #include "level_zero/tools/source/sysman/linux/fs_access.h" #include "level_zero/tools/source/sysman/pci/linux/os_pci_imp.h" #include "sysman/pci/pci_imp.h" namespace L0 { namespace ult { const std::string deviceDir("device"); const std::string resourceFile("device/resource"); const std::string maxLinkSpeedFile("device/max_link_speed"); const std::string maxLinkWidthFile("device/max_link_width"); const std::string mockBdf = "0000:00:02.0"; const std::string mockRealPath = "/sys/devices/pci0000:00/0000:00:01.0/0000:01:00.0/0000:02:01.0/" + mockBdf; const std::string mockRealPathConfig = mockRealPath + "/config"; const std::string mockRealPath2LevelsUp = "/sys/devices/pci0000:00/0000:00:01.0/0000:01:00.0"; constexpr double mockMaxLinkSpeed = 2.5; const std::vector mockReadBytes = { "0x00000000bf000000 0x00000000bfffffff 0x0000000000140204", "0x0000000000000000 0x0000000000000000 0x0000000000000000", "0x0000000080000000 0x000000008fffffff 0x000000000014220c", "0x0000000000000000 0x0000000000000000 0x0000000000000000", "0x0000000000004000 0x000000000000403f 0x0000000000040101", "0x0000000000000000 0x0000000000000000 0x0000000000000000", "0x00000000000c0000 0x00000000000dffff 0x0000000000000212", "0x0000000000000000 0x0000000000000000 0x0000000000000000", "0x0000000000000000 0x0000000000000000 0x0000000000000000", "0x0000000000000000 0x0000000000000000 0x0000000000000000", "0x0000000000000000 0x0000000000000000 0x0000000000000000", "0x0000000000000000 0x0000000000000000 0x0000000000000000", "0x0000000000000000 0x0000000000000000 0x0000000000000000", }; class PciSysfsAccess : public SysfsAccess {}; class PcifsAccess : public FsAccess {}; template <> struct Mock : public PcifsAccess { int32_t mockMaxLinkWidth = 0; MOCK_METHOD(ze_result_t, read, (const std::string file, double &val), (override)); MOCK_METHOD(ze_result_t, read, (const std::string file, int32_t &val), (override)); ze_result_t getValDouble(const std::string file, double &val) { if (file.compare(mockRealPath2LevelsUp + '/' + "max_link_speed") == 0) { val = mockMaxLinkSpeed; return ZE_RESULT_SUCCESS; } return ZE_RESULT_ERROR_NOT_AVAILABLE; } ze_result_t getValInt(const std::string file, int32_t &val) { if (file.compare(mockRealPath2LevelsUp + '/' + "max_link_width") == 0) { val = mockMaxLinkWidth; return ZE_RESULT_SUCCESS; } return ZE_RESULT_ERROR_NOT_AVAILABLE; } ze_result_t setValInt(const std::string file, int32_t val) { if (file.compare(maxLinkWidthFile) == 0) { mockMaxLinkWidth = val; } return ZE_RESULT_SUCCESS; } }; template <> struct Mock : public PciSysfsAccess { int32_t mockMaxLinkWidth = 0; MOCK_METHOD(ze_result_t, read, (const std::string file, double &val), (override)); MOCK_METHOD(ze_result_t, read, (const std::string file, int32_t &val), (override)); MOCK_METHOD(ze_result_t, read, (const std::string file, std::vector &val), (override)); MOCK_METHOD(ze_result_t, readSymLink, (const std::string file, std::string &buf), (override)); MOCK_METHOD(ze_result_t, getRealPath, (const std::string file, std::string &buf), (override)); MOCK_METHOD(bool, isRootUser, (), (override)); bool checkRootUser() { return true; } ze_result_t getValDouble(const std::string file, double &val) { if (file.compare(maxLinkSpeedFile) == 0) { val = mockMaxLinkSpeed; return ZE_RESULT_SUCCESS; } return ZE_RESULT_ERROR_NOT_AVAILABLE; } ze_result_t setValInt(const std::string file, int32_t val) { if (file.compare(maxLinkWidthFile) == 0) { mockMaxLinkWidth = val; return ZE_RESULT_SUCCESS; } return ZE_RESULT_ERROR_NOT_AVAILABLE; } ze_result_t getValInt(const std::string file, int32_t &val) { if (file.compare(maxLinkWidthFile) == 0) { val = mockMaxLinkWidth; return ZE_RESULT_SUCCESS; } return ZE_RESULT_ERROR_NOT_AVAILABLE; } ze_result_t getValStringSymLinkEmpty(const std::string file, std::string &val) { if (file.compare(deviceDir) == 0) { val = "/sys/devices/pci0000:00/0000:00:01.0/0000:01:00.0/0000:02:01.0/"; return ZE_RESULT_SUCCESS; } return ZE_RESULT_ERROR_NOT_AVAILABLE; } ze_result_t getValStringSymLink(const std::string file, std::string &val) { if (file.compare(deviceDir) == 0) { val = mockBdf; return ZE_RESULT_SUCCESS; } return ZE_RESULT_ERROR_NOT_AVAILABLE; } ze_result_t getValStringRealPath(const std::string file, std::string &val) { if (file.compare(deviceDir) == 0) { val = mockRealPath; return ZE_RESULT_SUCCESS; } if (file.compare("device/config") == 0) { val = mockRealPathConfig; return ZE_RESULT_SUCCESS; } return ZE_RESULT_ERROR_NOT_AVAILABLE; } ze_result_t getValVector(const std::string file, std::vector &val) { if (file.compare(resourceFile) == 0) { val = mockReadBytes; return ZE_RESULT_SUCCESS; } return ZE_RESULT_ERROR_NOT_AVAILABLE; } Mock() = default; }; class PublicLinuxPciImp : public L0::LinuxPciImp { public: PublicLinuxPciImp(OsSysman *pOsSysman) : LinuxPciImp(pOsSysman) {} using LinuxPciImp::closeFunction; using LinuxPciImp::configMemory; using LinuxPciImp::openFunction; using LinuxPciImp::pciExtendedConfigRead; using LinuxPciImp::pfsAccess; using LinuxPciImp::preadFunction; using LinuxPciImp::pSysfsAccess; }; } // namespace ult } // namespace L0 test_zes_pci.cpp000066400000000000000000000567011422164147700341000ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/pci/linux/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/test/unit_tests/mocks/mock_memory_manager.h" #include "level_zero/tools/test/unit_tests/sources/sysman/linux/mock_sysman_fixture.h" #include "mock_sysfs_pci.h" #include extern bool sysmanUltsEnable; using ::testing::_; using ::testing::Invoke; using ::testing::Matcher; using ::testing::NiceMock; namespace L0 { namespace ult { constexpr int mockMaxLinkWidth = 1; constexpr int mockMaxLinkWidthInvalid = 255; constexpr uint32_t expectedBus = 0u; constexpr uint32_t expectedDevice = 2u; constexpr uint32_t expectedFunction = 0u; constexpr int32_t expectedWidth = 1u; constexpr int32_t expectedGen = 1u; // As mockMaxLinkSpeed = 2.5, hence expectedGen should be 1 // As mockMaxLinkSpeed = 2.5, hence, pcieSpeedWithEnc = mockMaxLinkWidth * (2.5 * 1000 * 8/10 * 125000) = 250000000 constexpr int64_t expectedBandwidth = 250000000u; constexpr int convertMegabitsPerSecondToBytesPerSecond = 125000; constexpr int convertGigabitToMegabit = 1000; constexpr double encodingGen1Gen2 = 0.8; constexpr double encodingGen3andAbove = 0.98461538461; constexpr int pciExtendedConfigSpaceSize = 4096; static int fakeFileDescriptor = 123; inline static int openMock(const char *pathname, int flags) { if (strcmp(pathname, mockRealPathConfig.c_str()) == 0) { return fakeFileDescriptor; } return -1; } inline static int openMockReturnFailure(const char *pathname, int flags) { return -1; } inline static int closeMock(int fd) { if (fd == fakeFileDescriptor) { return 0; } return -1; } ssize_t preadMock(int fd, void *buf, size_t count, off_t offset) { uint8_t *mockBuf = static_cast(buf); // Sample config values mockBuf[0x100] = 0x0e; mockBuf[0x102] = 0x01; mockBuf[0x103] = 0x42; mockBuf[0x420] = 0x15; mockBuf[0x422] = 0x01; mockBuf[0x423] = 0x22; mockBuf[0x425] = 0xf0; mockBuf[0x426] = 0x3f; mockBuf[0x428] = 0x22; mockBuf[0x429] = 0x11; mockBuf[0x220] = 0x24; mockBuf[0x222] = 0x01; mockBuf[0x223] = 0x32; mockBuf[0x320] = 0x10; mockBuf[0x322] = 0x01; mockBuf[0x323] = 0x40; mockBuf[0x400] = 0x18; mockBuf[0x402] = 0x01; return pciExtendedConfigSpaceSize; } ssize_t preadMockHeaderFailure(int fd, void *buf, size_t count, off_t offset) { return pciExtendedConfigSpaceSize; } ssize_t preadMockInvalidPos(int fd, void *buf, size_t count, off_t offset) { uint8_t *mockBuf = static_cast(buf); // Sample config values mockBuf[0x100] = 0x0e; mockBuf[0x102] = 0x01; mockBuf[0x420] = 0x15; mockBuf[0x422] = 0x01; mockBuf[0x423] = 0x22; mockBuf[0x220] = 0x24; mockBuf[0x222] = 0x01; mockBuf[0x223] = 0x32; mockBuf[0x320] = 0x10; mockBuf[0x322] = 0x01; mockBuf[0x323] = 0x40; mockBuf[0x400] = 0x18; mockBuf[0x402] = 0x01; return pciExtendedConfigSpaceSize; } ssize_t preadMockLoop(int fd, void *buf, size_t count, off_t offset) { uint8_t *mockBuf = static_cast(buf); // Sample config values mockBuf[0x100] = 0x0e; mockBuf[0x102] = 0x01; mockBuf[0x103] = 0x42; mockBuf[0x420] = 0x16; mockBuf[0x422] = 0x01; mockBuf[0x423] = 0x42; mockBuf[0x220] = 0x24; mockBuf[0x222] = 0x01; mockBuf[0x223] = 0x32; mockBuf[0x320] = 0x10; mockBuf[0x322] = 0x01; mockBuf[0x323] = 0x40; mockBuf[0x400] = 0x18; mockBuf[0x402] = 0x01; return pciExtendedConfigSpaceSize; } ssize_t preadMockFailure(int fd, void *buf, size_t count, off_t offset) { return -1; } struct MockMemoryManagerPci : public MemoryManagerMock { MockMemoryManagerPci(NEO::ExecutionEnvironment &executionEnvironment) : MemoryManagerMock(const_cast(executionEnvironment)) {} }; class ZesPciFixture : public SysmanDeviceFixture { protected: std::unique_ptr> pSysfsAccess; std::unique_ptr> pfsAccess; MockMemoryManagerPci *memoryManager = nullptr; SysfsAccess *pOriginalSysfsAccess = nullptr; FsAccess *pOriginalFsAccess = nullptr; L0::PciImp *pPciImp; OsPci *pOsPciPrev; std::unique_ptr> driverHandle; MemoryManager *pMemoryManagerOld; void SetUp() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanDeviceFixture::SetUp(); pMemoryManagerOld = device->getDriverHandle()->getMemoryManager(); memoryManager = new ::testing::NiceMock(*neoDevice->getExecutionEnvironment()); memoryManager->localMemorySupported[0] = false; device->getDriverHandle()->setMemoryManager(memoryManager); pSysfsAccess = std::make_unique>>(); pOriginalSysfsAccess = pLinuxSysmanImp->pSysfsAccess; pLinuxSysmanImp->pSysfsAccess = pSysfsAccess.get(); pfsAccess = std::make_unique>>(); pOriginalFsAccess = pLinuxSysmanImp->pFsAccess; pLinuxSysmanImp->pFsAccess = pfsAccess.get(); pSysfsAccess->setValInt(maxLinkWidthFile, mockMaxLinkWidth); pfsAccess->setValInt(maxLinkWidthFile, mockMaxLinkWidth); ON_CALL(*pSysfsAccess.get(), read(_, Matcher &>(_))) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getValVector)); ON_CALL(*pSysfsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getValInt)); ON_CALL(*pSysfsAccess.get(), readSymLink(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getValStringSymLink)); ON_CALL(*pSysfsAccess.get(), getRealPath(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getValStringRealPath)); ON_CALL(*pSysfsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getValDouble)); ON_CALL(*pSysfsAccess.get(), isRootUser()) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::checkRootUser)); ON_CALL(*pfsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(pfsAccess.get(), &Mock::getValDouble)); ON_CALL(*pfsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(pfsAccess.get(), &Mock::getValInt)); pPciImp = static_cast(pSysmanDeviceImp->pPci); pOsPciPrev = pPciImp->pOsPci; pPciImp->pOsPci = nullptr; memoryManager->localMemorySupported[0] = 0; PublicLinuxPciImp *pLinuxPciImp = new PublicLinuxPciImp(pOsSysman); pLinuxPciImp->openFunction = openMock; pLinuxPciImp->closeFunction = closeMock; pLinuxPciImp->preadFunction = preadMock; pLinuxPciImp->pciExtendedConfigRead(); pPciImp->pOsPci = static_cast(pLinuxPciImp); pPciImp->pciGetStaticFields(); } void TearDown() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } device->getDriverHandle()->setMemoryManager(pMemoryManagerOld); SysmanDeviceFixture::TearDown(); if (nullptr != pPciImp->pOsPci) { delete pPciImp->pOsPci; } pPciImp->pOsPci = pOsPciPrev; pPciImp = nullptr; unsetenv("ZES_ENABLE_SYSMAN"); pLinuxSysmanImp->pSysfsAccess = pOriginalSysfsAccess; pLinuxSysmanImp->pFsAccess = pOriginalFsAccess; if (memoryManager != nullptr) { delete memoryManager; memoryManager = nullptr; } } }; TEST_F(ZesPciFixture, GivenValidSysmanHandleWhenCallingzetSysmanPciGetPropertiesThenVerifyzetSysmanPciGetPropertiesCallSucceeds) { zes_pci_properties_t properties, propertiesBefore; memset(&properties.address.bus, std::numeric_limits::max(), sizeof(properties.address.bus)); memset(&properties.address.device, std::numeric_limits::max(), sizeof(properties.address.device)); memset(&properties.address.function, std::numeric_limits::max(), sizeof(properties.address.function)); memset(&properties.maxSpeed.gen, std::numeric_limits::max(), sizeof(properties.maxSpeed.gen)); memset(&properties.maxSpeed.width, std::numeric_limits::max(), sizeof(properties.maxSpeed.width)); memset(&properties.maxSpeed.maxBandwidth, std::numeric_limits::max(), sizeof(properties.maxSpeed.maxBandwidth)); propertiesBefore = properties; ze_result_t result = zesDevicePciGetProperties(device, &properties); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(properties.address.bus, expectedBus); EXPECT_EQ(properties.address.device, expectedDevice); EXPECT_EQ(properties.address.function, expectedFunction); EXPECT_EQ(properties.maxSpeed.gen, expectedGen); EXPECT_EQ(properties.maxSpeed.width, expectedWidth); EXPECT_EQ(properties.maxSpeed.maxBandwidth, expectedBandwidth); EXPECT_NE(properties.address.bus, propertiesBefore.address.bus); EXPECT_NE(properties.address.device, propertiesBefore.address.device); EXPECT_NE(properties.address.function, propertiesBefore.address.function); EXPECT_NE(properties.maxSpeed.gen, propertiesBefore.maxSpeed.gen); EXPECT_NE(properties.maxSpeed.width, propertiesBefore.maxSpeed.width); EXPECT_NE(properties.maxSpeed.maxBandwidth, propertiesBefore.maxSpeed.maxBandwidth); } TEST_F(ZesPciFixture, GivenValidSysmanHandleWhenSettingLmemSupportAndCallingzetSysmanPciGetPropertiesThenVerifyApiCallSucceeds) { zes_pci_properties_t properties, propertiesBefore; memoryManager->localMemorySupported[0] = 1; pPciImp->init(); memset(&properties.address.bus, std::numeric_limits::max(), sizeof(properties.address.bus)); memset(&properties.address.device, std::numeric_limits::max(), sizeof(properties.address.device)); memset(&properties.address.function, std::numeric_limits::max(), sizeof(properties.address.function)); memset(&properties.maxSpeed.gen, std::numeric_limits::max(), sizeof(properties.maxSpeed.gen)); memset(&properties.maxSpeed.width, std::numeric_limits::max(), sizeof(properties.maxSpeed.width)); memset(&properties.maxSpeed.maxBandwidth, std::numeric_limits::max(), sizeof(properties.maxSpeed.maxBandwidth)); propertiesBefore = properties; ze_result_t result = zesDevicePciGetProperties(device, &properties); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(properties.address.bus, expectedBus); EXPECT_EQ(properties.address.device, expectedDevice); EXPECT_EQ(properties.address.function, expectedFunction); EXPECT_EQ(properties.maxSpeed.gen, expectedGen); EXPECT_EQ(properties.maxSpeed.width, expectedWidth); EXPECT_EQ(properties.maxSpeed.maxBandwidth, expectedBandwidth); EXPECT_NE(properties.address.bus, propertiesBefore.address.bus); EXPECT_NE(properties.address.device, propertiesBefore.address.device); EXPECT_NE(properties.address.function, propertiesBefore.address.function); EXPECT_NE(properties.maxSpeed.gen, propertiesBefore.maxSpeed.gen); EXPECT_NE(properties.maxSpeed.width, propertiesBefore.maxSpeed.width); EXPECT_NE(properties.maxSpeed.maxBandwidth, propertiesBefore.maxSpeed.maxBandwidth); } TEST_F(ZesPciFixture, GivenValidSysmanHandleWhenCallingzetSysmanPciGetPropertiesAndBdfStringIsEmptyThenVerifyApiCallSucceeds) { zes_pci_properties_t properties; ON_CALL(*pSysfsAccess.get(), readSymLink(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getValStringSymLinkEmpty)); pPciImp->init(); ze_result_t result = zesDevicePciGetProperties(device, &properties); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(properties.address.bus, 0u); EXPECT_EQ(properties.address.device, 0u); EXPECT_EQ(properties.address.function, 0u); } TEST_F(ZesPciFixture, GivenValidSysmanHandleWhenGettingPCIWidthThenZeroWidthIsReturnedIfSystemProvidesInvalidValue) { int32_t width = 0; pSysfsAccess->setValInt(maxLinkWidthFile, mockMaxLinkWidthInvalid); pfsAccess->setValInt(maxLinkWidthFile, mockMaxLinkWidthInvalid); ON_CALL(*pSysfsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getValInt)); ON_CALL(*pfsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(pfsAccess.get(), &Mock::getValInt)); EXPECT_EQ(ZE_RESULT_SUCCESS, pPciImp->pOsPci->getMaxLinkWidth(width)); EXPECT_EQ(width, -1); } TEST_F(ZesPciFixture, GivenValidSysmanHandleWhenCallingzetSysmanPciGetBarsThenVerifyzetSysmanPciGetBarsCallSucceeds) { uint32_t count = 0; ze_result_t result = zesDevicePciGetBars(device, &count, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_GT(count, 0u); uint32_t testCount = count + 1; EXPECT_EQ(ZE_RESULT_SUCCESS, zesDevicePciGetBars(device, &testCount, nullptr)); EXPECT_EQ(count, testCount); std::vector pciBarProps(count); result = zesDevicePciGetBars(device, &count, pciBarProps.data()); EXPECT_EQ(ZE_RESULT_SUCCESS, result); for (uint32_t i = 0; i < count; i++) { EXPECT_LE(pciBarProps[i].type, ZES_PCI_BAR_TYPE_MEM); EXPECT_NE(pciBarProps[i].base, 0u); EXPECT_NE(pciBarProps[i].size, 0u); } } TEST_F(ZesPciFixture, GivenValidSysmanHandleWhenInitializingPciAndPciConfigOpenFailsThenResizableBarSupportWillBeFalse) { OsPci *pOsPciOriginal = pPciImp->pOsPci; PublicLinuxPciImp *pLinuxPciImpTemp = new PublicLinuxPciImp(pOsSysman); pLinuxPciImpTemp->openFunction = openMockReturnFailure; pLinuxPciImpTemp->closeFunction = closeMock; pLinuxPciImpTemp->preadFunction = preadMock; pLinuxPciImpTemp->pciExtendedConfigRead(); pPciImp->pOsPci = static_cast(pLinuxPciImpTemp); pPciImp->pciGetStaticFields(); EXPECT_FALSE(pPciImp->pOsPci->resizableBarSupported()); uint32_t barIndex = 2u; EXPECT_FALSE(pPciImp->pOsPci->resizableBarEnabled(barIndex)); delete pLinuxPciImpTemp; pPciImp->pOsPci = pOsPciOriginal; } TEST_F(ZesPciFixture, GivenValidSysmanHandleWhenInitializingPciAndPciConfigReadFailsThenResizableBarSupportWillBeFalse) { OsPci *pOsPciOriginal = pPciImp->pOsPci; PublicLinuxPciImp *pLinuxPciImpTemp = new PublicLinuxPciImp(pOsSysman); pLinuxPciImpTemp->openFunction = openMock; pLinuxPciImpTemp->closeFunction = closeMock; pLinuxPciImpTemp->preadFunction = preadMockFailure; pLinuxPciImpTemp->pciExtendedConfigRead(); pPciImp->pOsPci = static_cast(pLinuxPciImpTemp); pPciImp->pciGetStaticFields(); EXPECT_FALSE(pPciImp->pOsPci->resizableBarSupported()); uint32_t barIndex = 2u; EXPECT_FALSE(pPciImp->pOsPci->resizableBarEnabled(barIndex)); delete pLinuxPciImpTemp; pPciImp->pOsPci = pOsPciOriginal; } TEST_F(ZesPciFixture, GivenSysmanHandleWhenCheckForResizableBarSupportAndHeaderFieldNotPresentThenResizableBarSupportFalseReturned) { OsPci *pOsPciOriginal = pPciImp->pOsPci; PublicLinuxPciImp *pLinuxPciImpTemp = new PublicLinuxPciImp(pOsSysman); pLinuxPciImpTemp->openFunction = openMock; pLinuxPciImpTemp->closeFunction = closeMock; pLinuxPciImpTemp->preadFunction = preadMockHeaderFailure; pLinuxPciImpTemp->pciExtendedConfigRead(); pPciImp->pOsPci = static_cast(pLinuxPciImpTemp); pPciImp->pciGetStaticFields(); EXPECT_FALSE(pPciImp->pOsPci->resizableBarSupported()); uint32_t barIndex = 2u; EXPECT_FALSE(pPciImp->pOsPci->resizableBarEnabled(barIndex)); delete pLinuxPciImpTemp; pPciImp->pOsPci = pOsPciOriginal; } TEST_F(ZesPciFixture, GivenSysmanHandleWhenCheckForResizableBarSupportAndCapabilityLinkListIsBrokenThenResizableBarSupportFalseReturned) { OsPci *pOsPciOriginal = pPciImp->pOsPci; PublicLinuxPciImp *pLinuxPciImpTemp = new PublicLinuxPciImp(pOsSysman); pLinuxPciImpTemp->openFunction = openMock; pLinuxPciImpTemp->closeFunction = closeMock; pLinuxPciImpTemp->preadFunction = preadMockInvalidPos; pLinuxPciImpTemp->pciExtendedConfigRead(); pPciImp->pOsPci = static_cast(pLinuxPciImpTemp); pPciImp->pciGetStaticFields(); EXPECT_FALSE(pPciImp->pOsPci->resizableBarSupported()); uint32_t barIndex = 2u; EXPECT_FALSE(pPciImp->pOsPci->resizableBarEnabled(barIndex)); delete pLinuxPciImpTemp; pPciImp->pOsPci = pOsPciOriginal; } TEST_F(ZesPciFixture, GivenSysmanHandleWhenCheckForResizableBarSupportAndIfRebarCapabilityNotPresentThenResizableBarSupportFalseReturned) { OsPci *pOsPciOriginal = pPciImp->pOsPci; PublicLinuxPciImp *pLinuxPciImpTemp = new PublicLinuxPciImp(pOsSysman); pLinuxPciImpTemp->openFunction = openMock; pLinuxPciImpTemp->closeFunction = closeMock; pLinuxPciImpTemp->preadFunction = preadMockLoop; pLinuxPciImpTemp->pciExtendedConfigRead(); pPciImp->pOsPci = static_cast(pLinuxPciImpTemp); pPciImp->pciGetStaticFields(); EXPECT_FALSE(pPciImp->pOsPci->resizableBarSupported()); uint32_t barIndex = 2u; EXPECT_FALSE(pPciImp->pOsPci->resizableBarEnabled(barIndex)); delete pLinuxPciImpTemp; pPciImp->pOsPci = pOsPciOriginal; } TEST_F(ZesPciFixture, GivenValidSysmanHandleWhenCallingzetSysmanPciGetBarsThenVerifyzetSysmanPciGetBarsCallSucceedsWith1_2Extension) { uint32_t count = 0; EXPECT_EQ(ZE_RESULT_SUCCESS, zesDevicePciGetBars(device, &count, nullptr)); EXPECT_NE(count, 0u); std::vector pBarProps(count); std::vector props1_2(count); for (uint32_t i = 0; i < count; i++) { props1_2[i].stype = ZES_STRUCTURE_TYPE_PCI_BAR_PROPERTIES_1_2; props1_2[i].pNext = nullptr; pBarProps[i].stype = ZES_STRUCTURE_TYPE_PCI_BAR_PROPERTIES; pBarProps[i].pNext = static_cast(&props1_2[i]); } EXPECT_EQ(ZE_RESULT_SUCCESS, zesDevicePciGetBars(device, &count, pBarProps.data())); for (uint32_t i = 0; i < count; i++) { EXPECT_EQ(pBarProps[i].stype, zes_structure_type_t::ZES_STRUCTURE_TYPE_PCI_BAR_PROPERTIES); EXPECT_LE(pBarProps[i].type, ZES_PCI_BAR_TYPE_MEM); EXPECT_NE(pBarProps[i].base, 0u); EXPECT_NE(pBarProps[i].size, 0u); EXPECT_EQ(props1_2[i].stype, zes_structure_type_t::ZES_STRUCTURE_TYPE_PCI_BAR_PROPERTIES_1_2); EXPECT_EQ(props1_2[i].resizableBarSupported, true); if (props1_2[i].index == 2) { EXPECT_EQ(props1_2[i].resizableBarEnabled, true); } else { EXPECT_EQ(props1_2[i].resizableBarEnabled, false); } EXPECT_LE(props1_2[i].type, ZES_PCI_BAR_TYPE_MEM); EXPECT_NE(props1_2[i].base, 0u); EXPECT_NE(props1_2[i].size, 0u); } } TEST_F(ZesPciFixture, GivenValidSysmanHandleWhenCallingzetSysmanPciGetBarsThenVerifyzetSysmanPciGetBarsCallSucceedsWith1_2ExtensionWrongType) { uint32_t count = 0; EXPECT_EQ(ZE_RESULT_SUCCESS, zesDevicePciGetBars(device, &count, nullptr)); EXPECT_NE(count, 0u); std::vector pBarProps(count); std::vector props1_2(count); for (uint32_t i = 0; i < count; i++) { props1_2[i].stype = ZES_STRUCTURE_TYPE_PCI_STATE; props1_2[i].pNext = nullptr; pBarProps[i].stype = ZES_STRUCTURE_TYPE_PCI_BAR_PROPERTIES; pBarProps[i].pNext = static_cast(&props1_2[i]); } EXPECT_EQ(ZE_RESULT_SUCCESS, zesDevicePciGetBars(device, &count, pBarProps.data())); for (uint32_t i = 0; i < count; i++) { EXPECT_EQ(pBarProps[i].stype, zes_structure_type_t::ZES_STRUCTURE_TYPE_PCI_BAR_PROPERTIES); EXPECT_LE(pBarProps[i].type, ZES_PCI_BAR_TYPE_MEM); EXPECT_NE(pBarProps[i].base, 0u); EXPECT_NE(pBarProps[i].size, 0u); EXPECT_EQ(props1_2[i].stype, zes_structure_type_t::ZES_STRUCTURE_TYPE_PCI_STATE); } } TEST_F(ZesPciFixture, GivenValidSysmanHandleWhenCallingzetSysmanPciGetBarsThenVerifyzetSysmanPciGetBarsCallSucceedsWith1_2ExtensionWithNullPtr) { uint32_t count = 0; EXPECT_EQ(ZE_RESULT_SUCCESS, zesDevicePciGetBars(device, &count, nullptr)); EXPECT_NE(count, 0u); zes_pci_bar_properties_t *pBarProps = new zes_pci_bar_properties_t[count]; for (uint32_t i = 0; i < count; i++) { pBarProps[i].pNext = nullptr; pBarProps[i].stype = zes_structure_type_t::ZES_STRUCTURE_TYPE_PCI_BAR_PROPERTIES; } EXPECT_EQ(ZE_RESULT_SUCCESS, zesDevicePciGetBars(device, &count, pBarProps)); for (uint32_t i = 0; i < count; i++) { EXPECT_LE(pBarProps[i].type, ZES_PCI_BAR_TYPE_MEM); EXPECT_NE(pBarProps[i].base, 0u); EXPECT_NE(pBarProps[i].size, 0u); } delete[] pBarProps; } TEST_F(ZesPciFixture, GivenValidSysmanHandleWhenCallingzetSysmanPciGetStateThenVerifyzetSysmanPciGetStateCallReturnNotSupported) { zes_pci_state_t state; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesDevicePciGetState(device, &state)); } TEST_F(ZesPciFixture, GivenValidSysmanHandleWhenCallingzetSysmanPciGetStatsThenVerifyzetSysmanPciGetStatsCallReturnNotSupported) { zes_pci_stats_t stats; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesDevicePciGetStats(device, &stats)); } TEST_F(ZesPciFixture, WhenConvertingLinkSpeedThenResultIsCorrect) { for (int32_t i = PciGenerations::PciGen1; i <= PciGenerations::PciGen5; i++) { double speed = convertPciGenToLinkSpeed(i); int32_t gen = convertLinkSpeedToPciGen(speed); EXPECT_EQ(i, gen); } EXPECT_EQ(-1, convertLinkSpeedToPciGen(0.0)); EXPECT_EQ(0.0, convertPciGenToLinkSpeed(0)); } // This test validates convertPcieSpeedFromGTsToBs method. // convertPcieSpeedFromGTsToBs(double maxLinkSpeedInGt) method will // return real PCIe speed in bytes per second as per below formula: // maxLinkSpeedInGt * (Gigabit to Megabit) * Encoding * (Mb/s to bytes/second) = // maxLinkSpeedInGt * convertGigabitToMegabit * Encoding * convertMegabitsPerSecondToBytesPerSecond; TEST_F(ZesPciFixture, WhenConvertingLinkSpeedFromGigatransfersPerSecondToBytesPerSecondThenResultIsCorrect) { int64_t speedPci320 = convertPcieSpeedFromGTsToBs(PciLinkSpeeds::Pci32_0GigatransfersPerSecond); EXPECT_EQ(speedPci320, static_cast(PciLinkSpeeds::Pci32_0GigatransfersPerSecond * convertMegabitsPerSecondToBytesPerSecond * convertGigabitToMegabit * encodingGen3andAbove)); int64_t speedPci160 = convertPcieSpeedFromGTsToBs(PciLinkSpeeds::Pci16_0GigatransfersPerSecond); EXPECT_EQ(speedPci160, static_cast(PciLinkSpeeds::Pci16_0GigatransfersPerSecond * convertMegabitsPerSecondToBytesPerSecond * convertGigabitToMegabit * encodingGen3andAbove)); int64_t speedPci80 = convertPcieSpeedFromGTsToBs(PciLinkSpeeds::Pci8_0GigatransfersPerSecond); EXPECT_EQ(speedPci80, static_cast(PciLinkSpeeds::Pci8_0GigatransfersPerSecond * convertMegabitsPerSecondToBytesPerSecond * convertGigabitToMegabit * encodingGen3andAbove)); int64_t speedPci50 = convertPcieSpeedFromGTsToBs(PciLinkSpeeds::Pci5_0GigatransfersPerSecond); EXPECT_EQ(speedPci50, static_cast(PciLinkSpeeds::Pci5_0GigatransfersPerSecond * convertMegabitsPerSecondToBytesPerSecond * convertGigabitToMegabit * encodingGen1Gen2)); int64_t speedPci25 = convertPcieSpeedFromGTsToBs(PciLinkSpeeds::Pci2_5GigatransfersPerSecond); EXPECT_EQ(speedPci25, static_cast(PciLinkSpeeds::Pci2_5GigatransfersPerSecond * convertMegabitsPerSecondToBytesPerSecond * convertGigabitToMegabit * encodingGen1Gen2)); EXPECT_EQ(0, convertPcieSpeedFromGTsToBs(0.0)); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/pci/windows/000077500000000000000000000000001422164147700313025ustar00rootroot00000000000000CMakeLists.txt000066400000000000000000000005161422164147700337650ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/pci/windows# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_zes_pci.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_pci.h ) endif() compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/pci/windows/mock_pci.h000066400000000000000000000124011422164147700332350ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/test/unit_tests/mock.h" #include "level_zero/tools/source/sysman/pci/windows/os_pci_imp.h" #include "level_zero/tools/test/unit_tests/sources/sysman/windows/mock_kmd_sys_manager.h" #include "sysman/pci/pci_imp.h" namespace L0 { namespace ult { struct MockMemoryManagerSysman : public MemoryManagerMock { MockMemoryManagerSysman(NEO::ExecutionEnvironment &executionEnvironment) : MemoryManagerMock(const_cast(executionEnvironment)) {} }; class PciKmdSysManager : public Mock {}; template <> struct Mock : public PciKmdSysManager { //PciCurrentDevice, PciParentDevice, PciRootPort uint32_t mockDomain[3] = {0, 0, 0}; uint32_t mockBus[3] = {0, 0, 3}; uint32_t mockDevice[3] = {2, 0, 0}; uint32_t mockFunction[3] = {0, 0, 0}; uint32_t mockMaxLinkSpeed[3] = {1, 0, 4}; uint32_t mockMaxLinkWidth[3] = {1, 0, 8}; uint32_t mockCurrentLinkSpeed[3] = {1, 0, 3}; uint32_t mockCurrentLinkWidth[3] = {1, 0, 1}; uint32_t mockResizableBarSupported[3] = {1, 1, 1}; uint32_t mockResizableBarEnabled[3] = {1, 1, 1}; void getPciProperty(KmdSysman::GfxSysmanReqHeaderIn *pRequest, KmdSysman::GfxSysmanReqHeaderOut *pResponse) override { uint8_t *pBuffer = reinterpret_cast(pResponse); pBuffer += sizeof(KmdSysman::GfxSysmanReqHeaderOut); KmdSysman::PciDomainsType domain = static_cast(pRequest->inCommandParam); switch (pRequest->inRequestId) { case KmdSysman::Requests::Pci::Domain: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockDomain[domain]; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Pci::Bus: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockBus[domain]; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Pci::Device: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockDevice[domain]; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Pci::Function: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockFunction[domain]; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Pci::MaxLinkSpeed: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockMaxLinkSpeed[domain]; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Pci::MaxLinkWidth: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockMaxLinkWidth[domain]; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Pci::CurrentLinkSpeed: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockCurrentLinkSpeed[domain]; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Pci::CurrentLinkWidth: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockCurrentLinkWidth[domain]; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Pci::ResizableBarSupported: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockResizableBarSupported[domain]; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Pci::ResizableBarEnabled: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockResizableBarEnabled[domain]; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; default: { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } break; } } void setPciProperty(KmdSysman::GfxSysmanReqHeaderIn *pRequest, KmdSysman::GfxSysmanReqHeaderOut *pResponse) override { uint8_t *pBuffer = reinterpret_cast(pRequest); pBuffer += sizeof(KmdSysman::GfxSysmanReqHeaderIn); pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } Mock() = default; ~Mock() = default; }; } // namespace ult } // namespace L0 test_zes_pci.cpp000066400000000000000000000217611422164147700344310ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/pci/windows/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/test/unit_tests/mocks/mock_memory_manager.h" #include "level_zero/tools/test/unit_tests/sources/sysman/windows/mock_sysman_fixture.h" #include "mock_pci.h" extern bool sysmanUltsEnable; namespace L0 { namespace ult { class SysmanDevicePciFixture : public SysmanDeviceFixture { protected: Mock *pKmdSysManager = nullptr; KmdSysManager *pOriginalKmdSysManager = nullptr; void SetUp() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanDeviceFixture::SetUp(); pMemoryManagerOld = device->getDriverHandle()->getMemoryManager(); pMemoryManager = new ::testing::NiceMock(*neoDevice->getExecutionEnvironment()); pMemoryManager->localMemorySupported[0] = false; device->getDriverHandle()->setMemoryManager(pMemoryManager); pKmdSysManager = new Mock; EXPECT_CALL(*pKmdSysManager, escape(_, _, _, _, _)) .WillRepeatedly(::testing::Invoke(pKmdSysManager, &Mock::mock_escape)); pOriginalKmdSysManager = pWddmSysmanImp->pKmdSysManager; pWddmSysmanImp->pKmdSysManager = pKmdSysManager; delete pSysmanDeviceImp->pPci; pSysmanDeviceImp->pPci = new PciImp(pOsSysman); if (pSysmanDeviceImp->pPci) { pSysmanDeviceImp->pPci->init(); } } void TearDown() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } device->getDriverHandle()->setMemoryManager(pMemoryManagerOld); SysmanDeviceFixture::TearDown(); pWddmSysmanImp->pKmdSysManager = pOriginalKmdSysManager; if (pKmdSysManager != nullptr) { delete pKmdSysManager; pKmdSysManager = nullptr; } if (pMemoryManager != nullptr) { delete pMemoryManager; pMemoryManager = nullptr; } } void setLocalMemorySupportedAndReinit(bool supported) { pMemoryManager->localMemorySupported[0] = supported; delete pSysmanDeviceImp->pPci; pSysmanDeviceImp->pPci = new PciImp(pOsSysman); if (pSysmanDeviceImp->pPci) { pSysmanDeviceImp->pPci->init(); } } MockMemoryManagerSysman *pMemoryManager = nullptr; MemoryManager *pMemoryManagerOld; }; TEST_F(SysmanDevicePciFixture, GivenValidSysmanHandleWhenCallingzetSysmanPciGetPropertiesWithLocalMemoryThenVerifyzetSysmanPciGetPropertiesCallSucceeds) { setLocalMemorySupportedAndReinit(true); zes_pci_properties_t properties; ze_result_t result = zesDevicePciGetProperties(device, &properties); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(properties.address.domain, pKmdSysManager->mockDomain[KmdSysman::PciDomainsType::PciRootPort]); EXPECT_EQ(properties.address.bus, pKmdSysManager->mockBus[KmdSysman::PciDomainsType::PciRootPort]); EXPECT_EQ(properties.address.device, pKmdSysManager->mockDevice[KmdSysman::PciDomainsType::PciRootPort]); EXPECT_EQ(properties.address.function, pKmdSysManager->mockFunction[KmdSysman::PciDomainsType::PciRootPort]); EXPECT_EQ(properties.maxSpeed.gen, pKmdSysManager->mockMaxLinkSpeed[KmdSysman::PciDomainsType::PciRootPort]); EXPECT_EQ(properties.maxSpeed.width, pKmdSysManager->mockMaxLinkWidth[KmdSysman::PciDomainsType::PciRootPort]); } TEST_F(SysmanDevicePciFixture, GivenValidSysmanHandleWhenCallingzetSysmanPciGetPropertiesWithNoLocalMemoryThenVerifyzetSysmanPciGetPropertiesCallSucceeds) { setLocalMemorySupportedAndReinit(false); zes_pci_properties_t properties; ze_result_t result = zesDevicePciGetProperties(device, &properties); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(properties.address.domain, pKmdSysManager->mockDomain[KmdSysman::PciDomainsType::PciCurrentDevice]); EXPECT_EQ(properties.address.bus, pKmdSysManager->mockBus[KmdSysman::PciDomainsType::PciCurrentDevice]); EXPECT_EQ(properties.address.device, pKmdSysManager->mockDevice[KmdSysman::PciDomainsType::PciCurrentDevice]); EXPECT_EQ(properties.address.function, pKmdSysManager->mockFunction[KmdSysman::PciDomainsType::PciCurrentDevice]); EXPECT_EQ(properties.maxSpeed.gen, pKmdSysManager->mockMaxLinkSpeed[KmdSysman::PciDomainsType::PciCurrentDevice]); EXPECT_EQ(properties.maxSpeed.width, pKmdSysManager->mockMaxLinkWidth[KmdSysman::PciDomainsType::PciCurrentDevice]); } TEST_F(SysmanDevicePciFixture, GivenValidSysmanHandleWhenCallingzetSysmanPciGetBarsThenVerifyzetSysmanPciGetBarsCallSucceeds) { uint32_t count = 0; EXPECT_EQ(ZE_RESULT_SUCCESS, zesDevicePciGetBars(device, &count, nullptr)); } TEST_F(SysmanDevicePciFixture, GivenValidSysmanHandleWhenCallingzetSysmanPciGetBarsThenVerifyzetSysmanPciGetBarsCallSucceedsWith1_2Extension) { uint32_t count = 0; EXPECT_EQ(ZE_RESULT_SUCCESS, zesDevicePciGetBars(device, &count, nullptr)); EXPECT_NE(count, 0u); std::vector pBarProps(count); std::vector props1_2(count); for (uint32_t i = 0; i < count; i++) { props1_2[i].stype = ZES_STRUCTURE_TYPE_PCI_BAR_PROPERTIES_1_2; props1_2[i].pNext = nullptr; pBarProps[i].stype = ZES_STRUCTURE_TYPE_PCI_BAR_PROPERTIES; pBarProps[i].pNext = static_cast(&props1_2[i]); } EXPECT_EQ(ZE_RESULT_SUCCESS, zesDevicePciGetBars(device, &count, pBarProps.data())); for (uint32_t i = 0; i < count; i++) { EXPECT_EQ(pBarProps[i].stype, zes_structure_type_t::ZES_STRUCTURE_TYPE_PCI_BAR_PROPERTIES); EXPECT_EQ(props1_2[i].stype, zes_structure_type_t::ZES_STRUCTURE_TYPE_PCI_BAR_PROPERTIES_1_2); EXPECT_EQ(props1_2[i].resizableBarSupported, true); EXPECT_EQ(props1_2[i].resizableBarEnabled, true); } } TEST_F(SysmanDevicePciFixture, GivenValidSysmanHandleWhenCallingPciGetBarsThenVerifyAPICallSucceedsWith1_2ExtensionWithNullPtr) { uint32_t count = 0; EXPECT_EQ(ZE_RESULT_SUCCESS, zesDevicePciGetBars(device, &count, nullptr)); EXPECT_NE(count, 0u); zes_pci_bar_properties_t *pBarProps = new zes_pci_bar_properties_t[count]; for (uint32_t i = 0; i < count; i++) { pBarProps[i].pNext = nullptr; pBarProps[i].stype = zes_structure_type_t::ZES_STRUCTURE_TYPE_PCI_BAR_PROPERTIES; } EXPECT_EQ(ZE_RESULT_SUCCESS, zesDevicePciGetBars(device, &count, pBarProps)); delete[] pBarProps; pBarProps = nullptr; } TEST_F(SysmanDevicePciFixture, GivenValidSysmanHandleWhenCallingzetSysmanPciGetBarsThenVerifyzetSysmanPciGetBarsCallSucceedsWith1_2ExtensionWrongType) { uint32_t count = 0; EXPECT_EQ(ZE_RESULT_SUCCESS, zesDevicePciGetBars(device, &count, nullptr)); EXPECT_NE(count, 0u); std::vector pBarProps(count); std::vector props1_2(count); for (uint32_t i = 0; i < count; i++) { props1_2[i].stype = ZES_STRUCTURE_TYPE_PCI_STATE; props1_2[i].pNext = nullptr; pBarProps[i].stype = ZES_STRUCTURE_TYPE_PCI_BAR_PROPERTIES; pBarProps[i].pNext = static_cast(&props1_2[i]); } EXPECT_EQ(ZE_RESULT_SUCCESS, zesDevicePciGetBars(device, &count, pBarProps.data())); for (uint32_t i = 0; i < count; i++) { EXPECT_EQ(pBarProps[i].stype, zes_structure_type_t::ZES_STRUCTURE_TYPE_PCI_BAR_PROPERTIES); EXPECT_LE(pBarProps[i].type, ZES_PCI_BAR_TYPE_MEM); EXPECT_EQ(props1_2[i].stype, zes_structure_type_t::ZES_STRUCTURE_TYPE_PCI_STATE); } } TEST_F(SysmanDevicePciFixture, GivenValidSysmanHandleWhenCallingzetSysmanPciGetStatsWithLocalMemoryThenVerifyzetSysmanPciGetBarsCallSucceeds) { setLocalMemorySupportedAndReinit(true); zes_pci_state_t state; ze_result_t result = zesDevicePciGetState(device, &state); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(state.speed.gen, pKmdSysManager->mockCurrentLinkSpeed[KmdSysman::PciDomainsType::PciRootPort]); EXPECT_EQ(state.speed.width, pKmdSysManager->mockCurrentLinkWidth[KmdSysman::PciDomainsType::PciRootPort]); } TEST_F(SysmanDevicePciFixture, GivenValidSysmanHandleWhenCallingzetSysmanPciGetStatsWithNoLocalMemoryThenVerifyzetSysmanPciGetBarsCallSucceeds) { setLocalMemorySupportedAndReinit(false); zes_pci_state_t state; ze_result_t result = zesDevicePciGetState(device, &state); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(state.speed.gen, pKmdSysManager->mockCurrentLinkSpeed[KmdSysman::PciDomainsType::PciCurrentDevice]); EXPECT_EQ(state.speed.width, pKmdSysManager->mockCurrentLinkWidth[KmdSysman::PciDomainsType::PciCurrentDevice]); } TEST_F(SysmanDevicePciFixture, WhenConvertingLinkSpeedThenResultIsCorrect) { for (int32_t i = PciGenerations::PciGen1; i <= PciGenerations::PciGen5; i++) { double speed = convertPciGenToLinkSpeed(i); int32_t gen = convertLinkSpeedToPciGen(speed); EXPECT_EQ(i, gen); } EXPECT_EQ(-1, convertLinkSpeedToPciGen(0.0)); EXPECT_EQ(0.0, convertPciGenToLinkSpeed(0)); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/performance/000077500000000000000000000000001422164147700313365ustar00rootroot00000000000000CMakeLists.txt000066400000000000000000000003051422164147700340150ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/performance# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) add_subdirectories() compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/performance/linux/000077500000000000000000000000001422164147700324755ustar00rootroot00000000000000CMakeLists.txt000066400000000000000000000013341422164147700351570ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/performance/linux# # Copyright (C) 2021-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_TESTS_TOOLS_SYSMAN_PERFORMANCE_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}test_zes_performance.cpp ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}mock_sysfs_performance.h ) if((NEO_ENABLE_i915_PRELIM_DETECTION) AND ("${BRANCH_TYPE}" STREQUAL "")) list(REMOVE_ITEM L0_TESTS_TOOLS_SYSMAN_PERFORMANCE_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/test_zes_performance.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_sysfs_performance.h ) endif() if(UNIX) target_sources(${TARGET_NAME} PRIVATE ${L0_TESTS_TOOLS_SYSMAN_PERFORMANCE_LINUX} ) endif() mock_sysfs_performance.h000066400000000000000000000006541422164147700373350ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/performance/linux/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/test/unit_tests/mock.h" #include "level_zero/tools/source/sysman/performance/linux/os_performance_imp.h" #include "sysman/linux/fs_access.h" #include "sysman/performance/performance.h" #include "sysman/performance/performance_imp.h" namespace L0 { namespace ult { } // namespace ult } // namespace L0 test_zes_performance.cpp000066400000000000000000000122031422164147700373410ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/performance/linux/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/test/unit_tests/sources/sysman/linux/mock_sysman_fixture.h" #include "mock_sysfs_performance.h" extern bool sysmanUltsEnable; using ::testing::_; using ::testing::Matcher; namespace L0 { namespace ult { constexpr uint32_t mockHandleCount = 0; class ZesPerformanceFixture : public SysmanMultiDeviceFixture { protected: std::vector deviceHandles; void SetUp() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanMultiDeviceFixture::SetUp(); pSysmanDeviceImp->pPerformanceHandleContext->handleList.clear(); uint32_t subDeviceCount = 0; Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, nullptr); if (subDeviceCount == 0) { deviceHandles.resize(1, device->toHandle()); } else { deviceHandles.resize(subDeviceCount, nullptr); Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, deviceHandles.data()); } pSysmanDeviceImp->pPerformanceHandleContext->init(deviceHandles, device); } void TearDown() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanMultiDeviceFixture::TearDown(); } std::vector get_perf_handles(uint32_t count) { std::vector handles(count, nullptr); EXPECT_EQ(zesDeviceEnumPerformanceFactorDomains(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS); return handles; } }; TEST_F(ZesPerformanceFixture, GivenValidSysmanHandleWhenRetrievingPerfThenZeroHandlesInReturn) { uint32_t count = 0; ze_result_t result = zesDeviceEnumPerformanceFactorDomains(device->toHandle(), &count, NULL); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(count, mockHandleCount); uint32_t testcount = count + 1; result = zesDeviceEnumPerformanceFactorDomains(device->toHandle(), &testcount, NULL); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(testcount, mockHandleCount); count = 0; std::vector handles(count, nullptr); EXPECT_EQ(zesDeviceEnumPerformanceFactorDomains(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS); EXPECT_EQ(count, mockHandleCount); } TEST_F(ZesPerformanceFixture, GivenValidOsSysmanPointerWhenCreatingOsPerformanceThenValidhandleForOsPerformanceIsRetrieved) { for (const auto &handle : deviceHandles) { Performance *pPerformance = new PerformanceImp(pOsSysman, handle, ZES_ENGINE_TYPE_FLAG_MEDIA); zes_perf_properties_t properties = {}; EXPECT_EQ(ZE_RESULT_SUCCESS, pPerformance->performanceGetProperties(&properties)); double factor = 0; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, pPerformance->performanceGetConfig(&factor)); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, pPerformance->performanceSetConfig(factor)); EXPECT_FALSE(static_cast(pPerformance)->pOsPerformance->isPerformanceSupported()); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, static_cast(pPerformance)->pOsPerformance->osPerformanceGetProperties(properties)); zes_perf_handle_t perfHandle = pPerformance->toPerformanceHandle(); EXPECT_EQ(pPerformance, Performance::fromHandle(perfHandle)); delete pPerformance; } } TEST_F(ZesPerformanceFixture, GivenValidOfjectsOfClassPerformanceImpAndPerformanceHandleContextThenDuringObjectReleaseCheckDestructorBranches) { // Check destructors of PerformanceImp and PerformanceHandleContext std::unique_ptr pPerformanceHandleContext1 = std::make_unique(pOsSysman); for (const auto &deviceHandle : deviceHandles) { Performance *pPerformance1 = new PerformanceImp(pOsSysman, deviceHandle, ZES_ENGINE_TYPE_FLAG_MEDIA); pPerformanceHandleContext1->handleList.push_back(pPerformance1); Performance *pPerformance2 = new PerformanceImp(pOsSysman, deviceHandle, ZES_ENGINE_TYPE_FLAG_COMPUTE); pPerformanceHandleContext1->handleList.push_back(pPerformance2); } // Check branches of destructors of PerformanceImp and PerformanceHandleContext std::unique_ptr pPerformanceHandleContext2 = std::make_unique(pOsSysman); for (const auto &deviceHandle : deviceHandles) { Performance *pPerformance1 = new PerformanceImp(pOsSysman, deviceHandle, ZES_ENGINE_TYPE_FLAG_MEDIA); pPerformanceHandleContext2->handleList.push_back(pPerformance1); Performance *pPerformance2 = new PerformanceImp(pOsSysman, deviceHandle, ZES_ENGINE_TYPE_FLAG_COMPUTE); pPerformanceHandleContext2->handleList.push_back(pPerformance2); } for (auto &handle : pPerformanceHandleContext2->handleList) { auto pPerformanceImp = static_cast(handle); delete pPerformanceImp->pOsPerformance; pPerformanceImp->pOsPerformance = nullptr; delete handle; handle = nullptr; } } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/power/000077500000000000000000000000001422164147700301715ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/power/CMakeLists.txt000066400000000000000000000003051422164147700327270ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) add_subdirectories() compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/power/linux/000077500000000000000000000000001422164147700313305ustar00rootroot00000000000000CMakeLists.txt000066400000000000000000000014661422164147700340200ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/power/linux# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_TESTS_TOOLS_SYSMAN_POWER_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_zes_power.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_sysfs_power.h ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/mock_sysfs_power_helper.h ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/test_zes_power_helper.cpp ) if((NEO_ENABLE_i915_PRELIM_DETECTION) AND ("${BRANCH_TYPE}" STREQUAL "")) list(REMOVE_ITEM L0_TESTS_TOOLS_SYSMAN_POWER_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/mock_sysfs_power_helper.h ${CMAKE_CURRENT_SOURCE_DIR}/test_zes_power_helper.cpp ) endif() if(UNIX) target_sources(${TARGET_NAME} PRIVATE ${L0_TESTS_TOOLS_SYSMAN_POWER_LINUX} ) endif() mock_sysfs_power.h000066400000000000000000000546261422164147700350330ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/power/linux/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/string.h" #include "level_zero/core/test/unit_tests/mock.h" #include "level_zero/tools/source/sysman/power/linux/os_power_imp.h" #include "level_zero/tools/test/unit_tests/sources/sysman/linux/mock_sysman_fixture.h" #include "sysman/linux/pmt/pmt.h" #include "sysman/power/power_imp.h" #include "sysman/sysman_imp.h" extern bool sysmanUltsEnable; using ::testing::DoDefault; using ::testing::Matcher; using ::testing::Return; namespace L0 { namespace ult { constexpr uint64_t setEnergyCounter = (83456u * 1048576u); constexpr uint64_t offset = 0x400; const std::string deviceName("device"); const std::string baseTelemSysFS("/sys/class/intel_pmt"); const std::string hwmonDir("device/hwmon"); const std::string i915HwmonDir("device/hwmon/hwmon2"); const std::string nonI915HwmonDir("device/hwmon/hwmon1"); const std::vector listOfMockedHwmonDirs = {"hwmon0", "hwmon1", "hwmon2", "hwmon3", "hwmon4"}; const std::string sustainedPowerLimitEnabled("power1_max_enable"); const std::string sustainedPowerLimit("power1_max"); const std::string sustainedPowerLimitInterval("power1_max_interval"); const std::string burstPowerLimitEnabled("power1_cap_enable"); const std::string burstPowerLimit("power1_cap"); const std::string energyCounterNode("energy1_input"); const std::string defaultPowerLimit("power_default_limit"); const std::string minPowerLimit("power_min_limit"); const std::string maxPowerLimit("power_max_limit"); constexpr uint64_t expectedEnergyCounter = 123456785u; constexpr uint32_t mockDefaultPowerLimitVal = 300000000; constexpr uint32_t mockMaxPowerLimitVal = 490000000; constexpr uint32_t mockMinPowerLimitVal = 10; const std::map deviceKeyOffsetMapPower = { {"PACKAGE_ENERGY", 0x400}, {"COMPUTE_TEMPERATURES", 0x68}, {"SOC_TEMPERATURES", 0x60}, {"CORE_TEMPERATURES", 0x6c}}; class PowerSysfsAccess : public SysfsAccess {}; template <> struct Mock : public PowerSysfsAccess { ze_result_t getValStringHelper(const std::string file, std::string &val); ze_result_t getValString(const std::string file, std::string &val) { ze_result_t result = ZE_RESULT_ERROR_UNKNOWN; if (file.compare(i915HwmonDir + "/" + "name") == 0) { val = "i915"; result = ZE_RESULT_SUCCESS; } else if (file.compare(nonI915HwmonDir + "/" + "name") == 0) { result = ZE_RESULT_ERROR_NOT_AVAILABLE; } else { val = "garbageI915"; result = ZE_RESULT_SUCCESS; } return result; } uint64_t sustainedPowerLimitEnabledVal = 1u; uint64_t sustainedPowerLimitVal = 0; uint64_t sustainedPowerLimitIntervalVal = 0; uint64_t burstPowerLimitEnabledVal = 0; uint64_t burstPowerLimitVal = 0; uint64_t energyCounterNodeVal = expectedEnergyCounter; ze_result_t getValUnsignedLongReturnErrorForBurstPowerLimit(const std::string file, uint64_t &val) { if (file.compare(i915HwmonDir + "/" + burstPowerLimitEnabled) == 0) { val = burstPowerLimitEnabledVal; } if (file.compare(i915HwmonDir + "/" + burstPowerLimit) == 0) { return ZE_RESULT_ERROR_NOT_AVAILABLE; } return ZE_RESULT_SUCCESS; } ze_result_t getValUnsignedLongReturnErrorForBurstPowerLimitEnabled(const std::string file, uint64_t &val) { if (file.compare(i915HwmonDir + "/" + burstPowerLimitEnabled) == 0) { return ZE_RESULT_ERROR_NOT_AVAILABLE; // mocking the condition when user passes nullptr for sustained and peak power in zesPowerGetLimit and burst power file is absent } return ZE_RESULT_SUCCESS; } ze_result_t getValUnsignedLongReturnErrorForSustainedPowerLimitEnabled(const std::string file, uint64_t &val) { if (file.compare(i915HwmonDir + "/" + sustainedPowerLimitEnabled) == 0) { return ZE_RESULT_ERROR_NOT_AVAILABLE; // mocking the condition when user passes nullptr for burst and peak power in zesPowerGetLimit and sustained power file is absent } return ZE_RESULT_SUCCESS; } ze_result_t getValUnsignedLongReturnsPowerLimitEnabledAsDisabled(const std::string file, uint64_t &val) { if (file.compare(i915HwmonDir + "/" + sustainedPowerLimitEnabled) == 0) { val = 0; return ZE_RESULT_SUCCESS; } else if (file.compare(i915HwmonDir + "/" + burstPowerLimitEnabled) == 0) { val = 0; return ZE_RESULT_SUCCESS; } return ZE_RESULT_SUCCESS; } ze_result_t getValUnsignedLongReturnErrorForSustainedPower(const std::string file, uint64_t &val) { if (file.compare(i915HwmonDir + "/" + sustainedPowerLimitEnabled) == 0) { val = 1; } else if (file.compare(i915HwmonDir + "/" + sustainedPowerLimit) == 0) { return ZE_RESULT_ERROR_NOT_AVAILABLE; } return ZE_RESULT_SUCCESS; } ze_result_t getValUnsignedLongReturnErrorForSustainedPowerInterval(const std::string file, uint64_t &val) { if (file.compare(i915HwmonDir + "/" + sustainedPowerLimitEnabled) == 0) { val = 1; } else if (file.compare(i915HwmonDir + "/" + sustainedPowerLimitInterval) == 0) { return ZE_RESULT_ERROR_NOT_AVAILABLE; } return ZE_RESULT_SUCCESS; } ze_result_t setValUnsignedLongReturnErrorForBurstPowerLimit(const std::string file, const int val) { if (file.compare(i915HwmonDir + "/" + burstPowerLimit) == 0) { return ZE_RESULT_ERROR_NOT_AVAILABLE; } return ZE_RESULT_SUCCESS; } ze_result_t setValUnsignedLongReturnErrorForBurstPowerLimitEnabled(const std::string file, const int val) { if (file.compare(i915HwmonDir + "/" + burstPowerLimitEnabled) == 0) { return ZE_RESULT_ERROR_NOT_AVAILABLE; } return ZE_RESULT_SUCCESS; } ze_result_t setValUnsignedLongReturnErrorForSustainedPowerLimitEnabled(const std::string file, const int val) { if (file.compare(i915HwmonDir + "/" + sustainedPowerLimitEnabled) == 0) { return ZE_RESULT_ERROR_NOT_AVAILABLE; } return ZE_RESULT_SUCCESS; } ze_result_t setValUnsignedLongReturnInsufficientForSustainedPowerLimitEnabled(const std::string file, const int val) { if (file.compare(i915HwmonDir + "/" + sustainedPowerLimitEnabled) == 0) { return ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS; } return ZE_RESULT_SUCCESS; } ze_result_t setValReturnErrorForSustainedPower(const std::string file, const int val) { if (file.compare(i915HwmonDir + "/" + sustainedPowerLimit) == 0) { return ZE_RESULT_ERROR_NOT_AVAILABLE; } return ZE_RESULT_SUCCESS; } ze_result_t setValReturnErrorForSustainedPowerInterval(const std::string file, const int val) { if (file.compare(i915HwmonDir + "/" + sustainedPowerLimitInterval) == 0) { return ZE_RESULT_ERROR_NOT_AVAILABLE; } return ZE_RESULT_SUCCESS; } ze_result_t getValUnsignedLongHelper(const std::string file, uint64_t &val); ze_result_t getValUnsignedLong(const std::string file, uint64_t &val) { ze_result_t result = ZE_RESULT_SUCCESS; if (file.compare(i915HwmonDir + "/" + sustainedPowerLimitEnabled) == 0) { val = sustainedPowerLimitEnabledVal; } else if (file.compare(i915HwmonDir + "/" + sustainedPowerLimit) == 0) { val = sustainedPowerLimitVal; } else if (file.compare(i915HwmonDir + "/" + sustainedPowerLimitInterval) == 0) { val = sustainedPowerLimitIntervalVal; } else if (file.compare(i915HwmonDir + "/" + burstPowerLimitEnabled) == 0) { val = burstPowerLimitEnabledVal; } else if (file.compare(i915HwmonDir + "/" + burstPowerLimit) == 0) { val = burstPowerLimitVal; } else if (file.compare(i915HwmonDir + "/" + energyCounterNode) == 0) { val = energyCounterNodeVal; } else { result = ZE_RESULT_ERROR_NOT_AVAILABLE; } return result; } ze_result_t getValUnsignedInt(const std::string file, uint32_t &val) { ze_result_t result = ZE_RESULT_SUCCESS; if (file.compare(i915HwmonDir + "/" + defaultPowerLimit) == 0) { val = mockDefaultPowerLimitVal; } else if (file.compare(i915HwmonDir + "/" + maxPowerLimit) == 0) { val = mockMaxPowerLimitVal; } else if (file.compare(i915HwmonDir + "/" + minPowerLimit) == 0) { val = mockMinPowerLimitVal; } else { result = ZE_RESULT_ERROR_NOT_AVAILABLE; } return result; } ze_result_t getValUnsignedIntMax(const std::string file, uint32_t &val) { ze_result_t result = ZE_RESULT_SUCCESS; if (file.compare(i915HwmonDir + "/" + maxPowerLimit) == 0) { val = std::numeric_limits::max(); } else { result = ZE_RESULT_ERROR_NOT_AVAILABLE; } return result; } ze_result_t setVal(const std::string file, const int val) { ze_result_t result = ZE_RESULT_SUCCESS; if (file.compare(i915HwmonDir + "/" + sustainedPowerLimitEnabled) == 0) { sustainedPowerLimitEnabledVal = static_cast(val); } else if (file.compare(i915HwmonDir + "/" + sustainedPowerLimit) == 0) { sustainedPowerLimitVal = static_cast(val); } else if (file.compare(i915HwmonDir + "/" + sustainedPowerLimitInterval) == 0) { sustainedPowerLimitIntervalVal = static_cast(val); } else if (file.compare(i915HwmonDir + "/" + burstPowerLimitEnabled) == 0) { burstPowerLimitEnabledVal = static_cast(val); } else if (file.compare(i915HwmonDir + "/" + burstPowerLimit) == 0) { burstPowerLimitVal = static_cast(val); } else { result = ZE_RESULT_ERROR_NOT_AVAILABLE; } return result; } ze_result_t getscanDirEntries(const std::string file, std::vector &listOfEntries) { if (file.compare(hwmonDir) == 0) { listOfEntries = listOfMockedHwmonDirs; return ZE_RESULT_SUCCESS; } return ZE_RESULT_ERROR_NOT_AVAILABLE; } Mock() = default; MOCK_METHOD(ze_result_t, read, (const std::string file, uint64_t &val), (override)); MOCK_METHOD(ze_result_t, read, (const std::string file, std::string &val), (override)); MOCK_METHOD(ze_result_t, read, (const std::string file, uint32_t &val), (override)); MOCK_METHOD(ze_result_t, write, (const std::string file, const int val), (override)); MOCK_METHOD(ze_result_t, scanDirEntries, (const std::string file, std::vector &listOfEntries), (override)); }; class PowerPmt : public PlatformMonitoringTech { public: PowerPmt(FsAccess *pFsAccess, ze_bool_t onSubdevice, uint32_t subdeviceId) : PlatformMonitoringTech(pFsAccess, onSubdevice, subdeviceId) {} using PlatformMonitoringTech::closeFunction; using PlatformMonitoringTech::keyOffsetMap; using PlatformMonitoringTech::openFunction; using PlatformMonitoringTech::preadFunction; using PlatformMonitoringTech::telemetryDeviceEntry; }; template <> struct Mock : public PowerPmt { ~Mock() override { rootDeviceTelemNodeIndex = 0; } Mock(FsAccess *pFsAccess, ze_bool_t onSubdevice, uint32_t subdeviceId) : PowerPmt(pFsAccess, onSubdevice, subdeviceId) {} void mockedInit(FsAccess *pFsAccess) { std::string rootPciPathOfGpuDevice = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0"; if (ZE_RESULT_SUCCESS != PlatformMonitoringTech::enumerateRootTelemIndex(pFsAccess, rootPciPathOfGpuDevice)) { return; } telemetryDeviceEntry = "/sys/class/intel_pmt/telem2/telem"; } }; class PowerFsAccess : public FsAccess {}; template <> struct Mock : public PowerFsAccess { ze_result_t listDirectorySuccess(const std::string directory, std::vector &listOfTelemNodes) { if (directory.compare(baseTelemSysFS) == 0) { listOfTelemNodes.push_back("telem1"); listOfTelemNodes.push_back("telem2"); listOfTelemNodes.push_back("telem3"); listOfTelemNodes.push_back("telem4"); listOfTelemNodes.push_back("telem5"); return ZE_RESULT_SUCCESS; } return ZE_RESULT_ERROR_NOT_AVAILABLE; } ze_result_t listDirectoryFailure(const std::string directory, std::vector &events) { return ZE_RESULT_ERROR_NOT_AVAILABLE; } ze_result_t getRealPathSuccess(const std::string path, std::string &buf) { if (path.compare("/sys/class/intel_pmt/telem1") == 0) { buf = "/sys/devices/pci0000:89/0000:89:02.0/0000:86:00.0/0000:8b:02.0/0000:8e:00.1/pmt_telemetry.1.auto/intel_pmt/telem1"; } else if (path.compare("/sys/class/intel_pmt/telem2") == 0) { buf = "/sys/devices/pci0000:89/0000:89:02.0/0000:86:00.0/0000:8b:02.0/0000:8e:00.1/pmt_telemetry.1.auto/intel_pmt/telem2"; } else if (path.compare("/sys/class/intel_pmt/telem3") == 0) { buf = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:02.0/0000:8e:00.1/pmt_telemetry.1.auto/intel_pmt/telem3"; } else if (path.compare("/sys/class/intel_pmt/telem4") == 0) { buf = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:02.0/0000:8e:00.1/pmt_telemetry.1.auto/intel_pmt/telem4"; } else if (path.compare("/sys/class/intel_pmt/telem5") == 0) { buf = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:02.0/0000:8e:00.1/pmt_telemetry.1.auto/intel_pmt/telem5"; } else { return ZE_RESULT_ERROR_NOT_AVAILABLE; } return ZE_RESULT_SUCCESS; } ze_result_t getRealPathFailure(const std::string path, std::string &buf) { return ZE_RESULT_ERROR_NOT_AVAILABLE; } MOCK_METHOD(ze_result_t, listDirectory, (const std::string path, std::vector &list), (override)); MOCK_METHOD(ze_result_t, getRealPath, (const std::string path, std::string &buf), (override)); Mock() = default; }; class PublicLinuxPowerImp : public L0::LinuxPowerImp { public: PublicLinuxPowerImp(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId) : LinuxPowerImp(pOsSysman, onSubdevice, subdeviceId) {} using LinuxPowerImp::pPmt; }; class SysmanDevicePowerFixture : public SysmanDeviceFixture { protected: std::unique_ptr pPublicLinuxPowerImp; std::unique_ptr> pPmt; std::unique_ptr> pFsAccess; std::unique_ptr> pSysfsAccess; SysfsAccess *pSysfsAccessOld = nullptr; FsAccess *pFsAccessOriginal = nullptr; OsPower *pOsPowerOriginal = nullptr; std::vector deviceHandles; void SetUp() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanDeviceFixture::SetUp(); pFsAccess = std::make_unique>>(); pFsAccessOriginal = pLinuxSysmanImp->pFsAccess; pLinuxSysmanImp->pFsAccess = pFsAccess.get(); pSysfsAccessOld = pLinuxSysmanImp->pSysfsAccess; pSysfsAccess = std::make_unique>>(); pLinuxSysmanImp->pSysfsAccess = pSysfsAccess.get(); ON_CALL(*pFsAccess.get(), listDirectory(_, _)) .WillByDefault(::testing::Invoke(pFsAccess.get(), &Mock::listDirectorySuccess)); ON_CALL(*pFsAccess.get(), getRealPath(_, _)) .WillByDefault(::testing::Invoke(pFsAccess.get(), &Mock::getRealPathSuccess)); ON_CALL(*pSysfsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getValString)); ON_CALL(*pSysfsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getValUnsignedLong)); ON_CALL(*pSysfsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getValUnsignedInt)); ON_CALL(*pSysfsAccess.get(), write(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::setVal)); ON_CALL(*pSysfsAccess.get(), scanDirEntries(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getscanDirEntries)); uint32_t subDeviceCount = 0; Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, nullptr); if (subDeviceCount == 0) { deviceHandles.resize(1, device->toHandle()); } else { deviceHandles.resize(subDeviceCount, nullptr); Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, deviceHandles.data()); } for (auto &deviceHandle : deviceHandles) { ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; Device::fromHandle(deviceHandle)->getProperties(&deviceProperties); auto pPmt = new NiceMock>(pFsAccess.get(), deviceProperties.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE, deviceProperties.subdeviceId); pPmt->mockedInit(pFsAccess.get()); pPmt->keyOffsetMap = deviceKeyOffsetMapPower; pLinuxSysmanImp->mapOfSubDeviceIdToPmtObject.emplace(deviceProperties.subdeviceId, pPmt); } pSysmanDeviceImp->pPowerHandleContext->init(deviceHandles, device->toHandle()); } void TearDown() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanDeviceFixture::TearDown(); pLinuxSysmanImp->pFsAccess = pFsAccessOriginal; pLinuxSysmanImp->pSysfsAccess = pSysfsAccessOld; } std::vector getPowerHandles(uint32_t count) { std::vector handles(count, nullptr); EXPECT_EQ(zesDeviceEnumPowerDomains(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS); return handles; } }; class SysmanDevicePowerMultiDeviceFixture : public SysmanMultiDeviceFixture { protected: std::unique_ptr pPublicLinuxPowerImp; std::unique_ptr> pPmt; std::unique_ptr> pFsAccess; std::unique_ptr> pSysfsAccess; SysfsAccess *pSysfsAccessOld = nullptr; FsAccess *pFsAccessOriginal = nullptr; OsPower *pOsPowerOriginal = nullptr; std::vector deviceHandles; std::map mapOriginal; void SetUp() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanMultiDeviceFixture::SetUp(); pFsAccess = std::make_unique>>(); pFsAccessOriginal = pLinuxSysmanImp->pFsAccess; pLinuxSysmanImp->pFsAccess = pFsAccess.get(); pSysfsAccessOld = pLinuxSysmanImp->pSysfsAccess; pSysfsAccess = std::make_unique>>(); pLinuxSysmanImp->pSysfsAccess = pSysfsAccess.get(); ON_CALL(*pFsAccess.get(), listDirectory(_, _)) .WillByDefault(::testing::Invoke(pFsAccess.get(), &Mock::listDirectorySuccess)); ON_CALL(*pFsAccess.get(), getRealPath(_, _)) .WillByDefault(::testing::Invoke(pFsAccess.get(), &Mock::getRealPathSuccess)); ON_CALL(*pSysfsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getValString)); ON_CALL(*pSysfsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getValUnsignedLong)); ON_CALL(*pSysfsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getValUnsignedInt)); ON_CALL(*pSysfsAccess.get(), write(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::setVal)); ON_CALL(*pSysfsAccess.get(), scanDirEntries(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getscanDirEntries)); uint32_t subDeviceCount = 0; Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, nullptr); if (subDeviceCount == 0) { deviceHandles.resize(1, device->toHandle()); } else { deviceHandles.resize(subDeviceCount, nullptr); Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, deviceHandles.data()); } mapOriginal = pLinuxSysmanImp->mapOfSubDeviceIdToPmtObject; pLinuxSysmanImp->mapOfSubDeviceIdToPmtObject.clear(); for (auto &deviceHandle : deviceHandles) { ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; Device::fromHandle(deviceHandle)->getProperties(&deviceProperties); auto pPmt = new NiceMock>(pFsAccess.get(), deviceProperties.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE, deviceProperties.subdeviceId); pPmt->mockedInit(pFsAccess.get()); pPmt->keyOffsetMap = deviceKeyOffsetMapPower; pLinuxSysmanImp->mapOfSubDeviceIdToPmtObject.emplace(deviceProperties.subdeviceId, pPmt); } pSysmanDeviceImp->pPowerHandleContext->init(deviceHandles, device->toHandle()); } void TearDown() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } for (const auto &pmtMapElement : pLinuxSysmanImp->mapOfSubDeviceIdToPmtObject) { delete pmtMapElement.second; } SysmanMultiDeviceFixture::TearDown(); pLinuxSysmanImp->pFsAccess = pFsAccessOriginal; pLinuxSysmanImp->pSysfsAccess = pSysfsAccessOld; pLinuxSysmanImp->mapOfSubDeviceIdToPmtObject = mapOriginal; } std::vector getPowerHandles(uint32_t count) { std::vector handles(count, nullptr); EXPECT_EQ(zesDeviceEnumPowerDomains(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS); return handles; } }; } // namespace ult } // namespace L0 mock_sysfs_power_helper.h000066400000000000000000000001251422164147700363530ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/power/linux/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ test_zes_power.cpp000066400000000000000000000734121422164147700350400ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/power/linux/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/power/linux/os_power_imp.h" #include "level_zero/tools/test/unit_tests/sources/sysman/power/linux/mock_sysfs_power.h" namespace L0 { namespace ult { static int fakeFileDescriptor = 123; constexpr uint64_t convertJouleToMicroJoule = 1000000u; constexpr uint32_t powerHandleComponentCount = 1u; inline static int openMockPower(const char *pathname, int flags) { if (strcmp(pathname, "/sys/class/intel_pmt/telem2/telem") == 0) { return fakeFileDescriptor; } if (strcmp(pathname, "/sys/class/intel_pmt/telem3/telem") == 0) { return fakeFileDescriptor; } return -1; } inline static int closeMockPower(int fd) { if (fd == fakeFileDescriptor) { return 0; } return -1; } ssize_t preadMockPower(int fd, void *buf, size_t count, off_t offset) { uint64_t *mockBuf = static_cast(buf); *mockBuf = setEnergyCounter; return count; } TEST_F(SysmanDevicePowerFixture, GivenComponentCountZeroWhenEnumeratingPowerDomainsWhenhwmonInterfaceExistsThenValidCountIsReturnedAndVerifySysmanPowerGetCallSucceeds) { uint32_t count = 0; EXPECT_EQ(zesDeviceEnumPowerDomains(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, powerHandleComponentCount); } TEST_F(SysmanDevicePowerFixture, GivenInvalidComponentCountWhenEnumeratingPowerDomainsWhenhwmonInterfaceExistsThenValidCountIsReturnedAndVerifySysmanPowerGetCallSucceeds) { uint32_t count = 0; EXPECT_EQ(zesDeviceEnumPowerDomains(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, powerHandleComponentCount); count = count + 1; EXPECT_EQ(zesDeviceEnumPowerDomains(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, powerHandleComponentCount); } TEST_F(SysmanDevicePowerFixture, GivenComponentCountZeroWhenEnumeratingPowerDomainsWhenhwmonInterfaceExistsThenValidPowerHandlesIsReturned) { uint32_t count = 0; EXPECT_EQ(zesDeviceEnumPowerDomains(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, powerHandleComponentCount); std::vector handles(count, nullptr); EXPECT_EQ(zesDeviceEnumPowerDomains(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS); for (auto handle : handles) { EXPECT_NE(handle, nullptr); } } TEST_F(SysmanDevicePowerFixture, GivenValidPowerHandleWhenGettingPowerPropertiesWhenhwmonInterfaceExistsThenCallSucceeds) { auto handles = getPowerHandles(powerHandleComponentCount); for (auto handle : handles) { zes_power_properties_t properties; EXPECT_EQ(ZE_RESULT_SUCCESS, zesPowerGetProperties(handle, &properties)); EXPECT_FALSE(properties.onSubdevice); EXPECT_EQ(properties.subdeviceId, 0u); EXPECT_EQ(properties.canControl, true); EXPECT_EQ(properties.isEnergyThresholdSupported, false); EXPECT_EQ(properties.defaultLimit, (int32_t)(mockDefaultPowerLimitVal / milliFactor)); EXPECT_EQ(properties.maxLimit, (int32_t)(mockMaxPowerLimitVal / milliFactor)); EXPECT_EQ(properties.minLimit, (int32_t)(mockMinPowerLimitVal / milliFactor)); } } TEST_F(SysmanDevicePowerFixture, GivenValidPowerHandleWhenGettingPowerPropertiesWhenHwmonInterfaceExistThenLimitsReturnsUnkown) { EXPECT_CALL(*pSysfsAccess.get(), read(_, Matcher(_))) .WillRepeatedly(Return(ZE_RESULT_ERROR_NOT_AVAILABLE)); for (const auto &handle : pSysmanDeviceImp->pPowerHandleContext->handleList) { delete handle; } pSysmanDeviceImp->pPowerHandleContext->handleList.clear(); pSysmanDeviceImp->pPowerHandleContext->init(deviceHandles, device->toHandle()); auto handles = getPowerHandles(powerHandleComponentCount); for (auto handle : handles) { zes_power_properties_t properties; EXPECT_EQ(ZE_RESULT_SUCCESS, zesPowerGetProperties(handle, &properties)); EXPECT_FALSE(properties.onSubdevice); EXPECT_EQ(properties.subdeviceId, 0u); EXPECT_EQ(properties.defaultLimit, -1); EXPECT_EQ(properties.maxLimit, -1); EXPECT_EQ(properties.minLimit, -1); } } TEST_F(SysmanDevicePowerFixture, GivenValidPowerHandleWhenGettingPowerPropertiesWhenHwmonInterfaceExistThenMaxLimitIsUnsupported) { EXPECT_CALL(*pSysfsAccess.get(), read(_, Matcher(_))) .WillOnce(Return(ZE_RESULT_ERROR_NOT_AVAILABLE)) .WillOnce(Return(ZE_RESULT_ERROR_NOT_AVAILABLE)) .WillOnce(::testing::Invoke(pSysfsAccess.get(), &Mock::getValUnsignedIntMax)) .WillRepeatedly(DoDefault()); for (const auto &handle : pSysmanDeviceImp->pPowerHandleContext->handleList) { delete handle; } pSysmanDeviceImp->pPowerHandleContext->handleList.clear(); pSysmanDeviceImp->pPowerHandleContext->init(deviceHandles, device->toHandle()); auto handles = getPowerHandles(powerHandleComponentCount); for (auto handle : handles) { zes_power_properties_t properties; EXPECT_EQ(ZE_RESULT_SUCCESS, zesPowerGetProperties(handle, &properties)); EXPECT_FALSE(properties.onSubdevice); EXPECT_EQ(properties.subdeviceId, 0u); EXPECT_EQ(properties.defaultLimit, -1); EXPECT_EQ(properties.maxLimit, -1); EXPECT_EQ(properties.minLimit, -1); } } TEST_F(SysmanDevicePowerFixture, GivenValidPowerHandleWhenGettingPowerPropertiesThenHwmonInterfaceExistAndMinLimitIsUnknown) { EXPECT_CALL(*pSysfsAccess.get(), read(_, Matcher(_))) .WillOnce(Return(ZE_RESULT_ERROR_NOT_AVAILABLE)) .WillOnce(::testing::DoAll(::testing::SetArgReferee<1>(0), Return(ZE_RESULT_SUCCESS))) .WillRepeatedly(DoDefault()); for (const auto &handle : pSysmanDeviceImp->pPowerHandleContext->handleList) { delete handle; } pSysmanDeviceImp->pPowerHandleContext->handleList.clear(); pSysmanDeviceImp->pPowerHandleContext->init(deviceHandles, device->toHandle()); auto handles = getPowerHandles(powerHandleComponentCount); for (auto handle : handles) { zes_power_properties_t properties; EXPECT_EQ(ZE_RESULT_SUCCESS, zesPowerGetProperties(handle, &properties)); EXPECT_FALSE(properties.onSubdevice); EXPECT_EQ(properties.subdeviceId, 0u); EXPECT_EQ(properties.defaultLimit, -1); EXPECT_EQ(properties.maxLimit, static_cast(mockMaxPowerLimitVal / milliFactor)); EXPECT_EQ(properties.minLimit, -1); } } TEST_F(SysmanDevicePowerFixture, GivenValidPowerHandleWhenGettingPowerEnergyCounterFailedWhenHwmonInterfaceExistThenValidErrorCodeReturned) { auto handles = getPowerHandles(powerHandleComponentCount); for (auto &deviceHandle : deviceHandles) { ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; Device::fromHandle(deviceHandle)->getProperties(&deviceProperties); auto pPmt = static_cast> *>(pLinuxSysmanImp->getPlatformMonitoringTechAccess(deviceProperties.subdeviceId)); pPmt->openFunction = openMockPower; pPmt->closeFunction = closeMockPower; pPmt->preadFunction = preadMockPower; } EXPECT_CALL(*pSysfsAccess.get(), read(_, Matcher(_))) .WillRepeatedly(Return(ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS)); for (auto handle : handles) { zes_power_energy_counter_t energyCounter = {}; uint64_t expectedEnergyCounter = convertJouleToMicroJoule * (setEnergyCounter / 1048576); ASSERT_EQ(ZE_RESULT_SUCCESS, zesPowerGetEnergyCounter(handle, &energyCounter)); EXPECT_EQ(energyCounter.energy, expectedEnergyCounter); } } TEST_F(SysmanDevicePowerFixture, GivenSetPowerLimitsWhenGettingPowerLimitsWhenHwmonInterfaceExistThenLimitsSetEarlierAreRetrieved) { auto handles = getPowerHandles(powerHandleComponentCount); for (auto handle : handles) { zes_power_sustained_limit_t sustainedSet = {}; zes_power_sustained_limit_t sustainedGet = {}; sustainedSet.enabled = 1; sustainedSet.interval = 10; sustainedSet.power = 300000; EXPECT_EQ(ZE_RESULT_SUCCESS, zesPowerSetLimits(handle, &sustainedSet, nullptr, nullptr)); EXPECT_EQ(ZE_RESULT_SUCCESS, zesPowerGetLimits(handle, &sustainedGet, nullptr, nullptr)); EXPECT_EQ(sustainedGet.power, sustainedSet.power); EXPECT_EQ(sustainedGet.interval, sustainedSet.interval); zes_power_burst_limit_t burstSet = {}; zes_power_burst_limit_t burstGet = {}; burstSet.enabled = 1; burstSet.power = 375000; EXPECT_EQ(ZE_RESULT_SUCCESS, zesPowerSetLimits(handle, nullptr, &burstSet, nullptr)); EXPECT_EQ(ZE_RESULT_SUCCESS, zesPowerGetLimits(handle, nullptr, &burstGet, nullptr)); EXPECT_EQ(burstSet.enabled, burstGet.enabled); EXPECT_EQ(burstSet.power, burstGet.power); burstSet.enabled = 0; burstGet.enabled = 0; burstGet.power = 0; EXPECT_EQ(ZE_RESULT_SUCCESS, zesPowerSetLimits(handle, nullptr, &burstSet, nullptr)); EXPECT_EQ(ZE_RESULT_SUCCESS, zesPowerGetLimits(handle, nullptr, &burstGet, nullptr)); EXPECT_EQ(burstSet.enabled, burstGet.enabled); EXPECT_EQ(burstGet.power, 0); zes_power_peak_limit_t peakGet = {}; EXPECT_EQ(ZE_RESULT_SUCCESS, zesPowerGetLimits(handle, nullptr, nullptr, &peakGet)); EXPECT_EQ(peakGet.powerAC, -1); EXPECT_EQ(peakGet.powerDC, -1); } } TEST_F(SysmanDevicePowerFixture, GivenGetPowerLimitsReturnErrorWhenGettingPowerLimitsWhenHwmonInterfaceExistForBurstPowerLimitThenProperErrorCodesReturned) { auto handles = getPowerHandles(powerHandleComponentCount); EXPECT_CALL(*pSysfsAccess.get(), read(_, Matcher(_))) .WillOnce(::testing::Invoke(pSysfsAccess.get(), &Mock::getValUnsignedLongReturnErrorForBurstPowerLimit)) .WillOnce(Return(ZE_RESULT_ERROR_NOT_AVAILABLE)); for (auto handle : handles) { zes_power_burst_limit_t burstSet = {}; zes_power_burst_limit_t burstGet = {}; burstSet.enabled = 1; burstSet.power = 375000; EXPECT_EQ(ZE_RESULT_SUCCESS, zesPowerSetLimits(handle, nullptr, &burstSet, nullptr)); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesPowerGetLimits(handle, nullptr, &burstGet, nullptr)); } } TEST_F(SysmanDevicePowerFixture, GivenGetPowerLimitsReturnErrorWhenGettingPowerLimitsWhenHwmonInterfaceExistForBurstPowerLimitThenProperErrorCodesIsReturned) { auto handles = getPowerHandles(powerHandleComponentCount); EXPECT_CALL(*pSysfsAccess.get(), read(_, Matcher(_))) .WillRepeatedly(Return(ZE_RESULT_ERROR_UNKNOWN)); for (auto handle : handles) { zes_power_burst_limit_t burstSet = {}; zes_power_burst_limit_t burstGet = {}; burstSet.enabled = 1; burstSet.power = 375000; EXPECT_EQ(ZE_RESULT_SUCCESS, zesPowerSetLimits(handle, nullptr, &burstSet, nullptr)); EXPECT_EQ(ZE_RESULT_ERROR_UNKNOWN, zesPowerGetLimits(handle, nullptr, &burstGet, nullptr)); } } TEST_F(SysmanDevicePowerFixture, GivenSetPowerLimitsReturnErrorWhenSettingPowerLimitsWhenHwmonInterfaceExistForBurstPowerLimitThenProperErrorCodesReturned) { auto handles = getPowerHandles(powerHandleComponentCount); ON_CALL(*pSysfsAccess.get(), write(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::setValUnsignedLongReturnErrorForBurstPowerLimit)); for (auto handle : handles) { zes_power_burst_limit_t burstSet = {}; burstSet.enabled = 1; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesPowerSetLimits(handle, nullptr, &burstSet, nullptr)); } } TEST_F(SysmanDevicePowerFixture, GivenSetPowerLimitsReturnErrorWhenSettingPowerLimitsWhenHwmonInterfaceExistForBurstPowerLimitEnabledThenProperErrorCodesReturned) { auto handles = getPowerHandles(powerHandleComponentCount); ON_CALL(*pSysfsAccess.get(), write(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::setValUnsignedLongReturnErrorForBurstPowerLimitEnabled)); ON_CALL(*pSysfsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getValUnsignedLongReturnErrorForBurstPowerLimitEnabled)); for (auto handle : handles) { zes_power_burst_limit_t burstSet = {}; zes_power_burst_limit_t burstGet = {}; burstSet.enabled = 1; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesPowerSetLimits(handle, nullptr, &burstSet, nullptr)); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesPowerGetLimits(handle, nullptr, &burstGet, nullptr)); } } TEST_F(SysmanDevicePowerFixture, GivenReadingSustainedPowerLimitNodeReturnErrorWhenSetOrGetPowerLimitsWhenHwmonInterfaceExistForSustainedPowerLimitEnabledThenProperErrorCodesReturned) { auto handles = getPowerHandles(powerHandleComponentCount); ON_CALL(*pSysfsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getValUnsignedLongReturnErrorForSustainedPowerLimitEnabled)); for (auto handle : handles) { zes_power_sustained_limit_t sustainedSet = {}; zes_power_sustained_limit_t sustainedGet = {}; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesPowerSetLimits(handle, &sustainedSet, nullptr, nullptr)); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesPowerGetLimits(handle, &sustainedGet, nullptr, nullptr)); } } TEST_F(SysmanDevicePowerFixture, GivenReadingSustainedPowerNodeReturnErrorWhenGetPowerLimitsForSustainedPowerWhenHwmonInterfaceExistThenProperErrorCodesReturned) { auto handles = getPowerHandles(powerHandleComponentCount); ON_CALL(*pSysfsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getValUnsignedLongReturnErrorForSustainedPower)); for (auto handle : handles) { zes_power_sustained_limit_t sustainedGet = {}; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesPowerGetLimits(handle, &sustainedGet, nullptr, nullptr)); } } TEST_F(SysmanDevicePowerFixture, GivenReadingSustainedPowerIntervalNodeReturnErrorWhenGetPowerLimitsForSustainedPowerWhenHwmonInterfaceExistThenProperErrorCodesReturned) { auto handles = getPowerHandles(powerHandleComponentCount); ON_CALL(*pSysfsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getValUnsignedLongReturnErrorForSustainedPowerInterval)); for (auto handle : handles) { zes_power_sustained_limit_t sustainedGet = {}; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesPowerGetLimits(handle, &sustainedGet, nullptr, nullptr)); } } TEST_F(SysmanDevicePowerFixture, GivenwritingSustainedPowerNodeReturnErrorWhenSetPowerLimitsForSustainedPowerWhenHwmonInterfaceExistThenProperErrorCodesReturned) { auto handles = getPowerHandles(powerHandleComponentCount); ON_CALL(*pSysfsAccess.get(), write(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::setValReturnErrorForSustainedPower)); zes_power_sustained_limit_t sustainedSet = {}; sustainedSet.enabled = 1; sustainedSet.interval = 10; sustainedSet.power = 300000; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesPowerSetLimits(handles[0], &sustainedSet, nullptr, nullptr)); } TEST_F(SysmanDevicePowerFixture, GivenwritingSustainedPowerIntervalNodeReturnErrorWhenSetPowerLimitsForSustainedPowerIntervalWhenHwmonInterfaceExistThenProperErrorCodesReturned) { auto handles = getPowerHandles(powerHandleComponentCount); ON_CALL(*pSysfsAccess.get(), write(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::setValReturnErrorForSustainedPowerInterval)); zes_power_sustained_limit_t sustainedSet = {}; sustainedSet.enabled = 1; sustainedSet.interval = 10; sustainedSet.power = 300000; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesPowerSetLimits(handles[0], &sustainedSet, nullptr, nullptr)); } TEST_F(SysmanDevicePowerFixture, GivenValidPowerHandleWhenWritingToSustainedPowerEnableNodeWithoutPermissionsThenValidErrorIsReturned) { auto handles = getPowerHandles(powerHandleComponentCount); ON_CALL(*pSysfsAccess.get(), write(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::setValUnsignedLongReturnInsufficientForSustainedPowerLimitEnabled)); zes_power_sustained_limit_t sustainedSet = {}; sustainedSet.enabled = 0; EXPECT_EQ(ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS, zesPowerSetLimits(handles[0], &sustainedSet, nullptr, nullptr)); } TEST_F(SysmanDevicePowerFixture, GivenValidPowerHandleAndPermissionsThenFirstDisableSustainedPowerLimitAndThenEnableItAndCheckSuccesIsReturned) { auto handles = getPowerHandles(powerHandleComponentCount); zes_power_sustained_limit_t sustainedSet = {}; zes_power_sustained_limit_t sustainedGet = {}; sustainedSet.enabled = 0; EXPECT_EQ(ZE_RESULT_SUCCESS, zesPowerSetLimits(handles[0], &sustainedSet, nullptr, nullptr)); sustainedSet.enabled = 1; sustainedSet.interval = 10; sustainedSet.power = 300000; EXPECT_EQ(ZE_RESULT_SUCCESS, zesPowerSetLimits(handles[0], &sustainedSet, nullptr, nullptr)); EXPECT_EQ(ZE_RESULT_SUCCESS, zesPowerGetLimits(handles[0], &sustainedGet, nullptr, nullptr)); EXPECT_EQ(sustainedGet.enabled, sustainedSet.enabled); EXPECT_EQ(sustainedGet.power, sustainedSet.power); EXPECT_EQ(sustainedGet.interval, sustainedSet.interval); } TEST_F(SysmanDevicePowerFixture, GivenGetPowerLimitsWhenPowerLimitsAreDisabledWhenHwmonInterfaceExistThenAllPowerValuesAreIgnored) { auto handles = getPowerHandles(powerHandleComponentCount); ON_CALL(*pSysfsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getValUnsignedLongReturnsPowerLimitEnabledAsDisabled)); zes_power_sustained_limit_t sustainedGet = {}; zes_power_burst_limit_t burstGet = {}; EXPECT_EQ(ZE_RESULT_SUCCESS, zesPowerGetLimits(handles[0], &sustainedGet, nullptr, nullptr)); EXPECT_EQ(sustainedGet.interval, 0); EXPECT_EQ(sustainedGet.power, 0); EXPECT_EQ(sustainedGet.enabled, 0); EXPECT_EQ(ZE_RESULT_SUCCESS, zesPowerGetLimits(handles[0], nullptr, &burstGet, nullptr)); EXPECT_EQ(burstGet.enabled, 0); EXPECT_EQ(burstGet.power, 0); EXPECT_EQ(ZE_RESULT_SUCCESS, zesPowerSetLimits(handles[0], &sustainedGet, nullptr, nullptr)); zes_power_burst_limit_t burstSet = {}; burstSet.enabled = 0; EXPECT_EQ(ZE_RESULT_SUCCESS, zesPowerSetLimits(handles[0], nullptr, &burstSet, nullptr)); } TEST_F(SysmanDevicePowerFixture, GivenScanDiectoriesFailAndPmtIsNotNullPointerThenPowerModuleIsSupported) { EXPECT_CALL(*pSysfsAccess.get(), scanDirEntries(_, _)) .WillRepeatedly(Return(ZE_RESULT_ERROR_NOT_AVAILABLE)); pSysmanDeviceImp->pPowerHandleContext->init(deviceHandles, device->toHandle()); ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; Device::fromHandle(device)->getProperties(&deviceProperties); PublicLinuxPowerImp *pPowerImp = new PublicLinuxPowerImp(pOsSysman, deviceProperties.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE, deviceProperties.subdeviceId); EXPECT_TRUE(pPowerImp->isPowerModuleSupported()); delete pPowerImp; } TEST_F(SysmanDevicePowerFixture, GivenComponentCountZeroWhenEnumeratingPowerDomainsThenValidCountIsReturnedAndVerifySysmanPowerGetCallSucceeds) { EXPECT_CALL(*pSysfsAccess.get(), scanDirEntries(_, _)) .WillRepeatedly(Return(ZE_RESULT_ERROR_NOT_AVAILABLE)); for (const auto &handle : pSysmanDeviceImp->pPowerHandleContext->handleList) { delete handle; } pSysmanDeviceImp->pPowerHandleContext->handleList.clear(); pSysmanDeviceImp->pPowerHandleContext->init(deviceHandles, device->toHandle()); uint32_t count = 0; EXPECT_EQ(zesDeviceEnumPowerDomains(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, powerHandleComponentCount); } TEST_F(SysmanDevicePowerFixture, GivenInvalidComponentCountWhenEnumeratingPowerDomainsThenValidCountIsReturnedAndVerifySysmanPowerGetCallSucceeds) { EXPECT_CALL(*pSysfsAccess.get(), scanDirEntries(_, _)) .WillRepeatedly(Return(ZE_RESULT_ERROR_NOT_AVAILABLE)); for (const auto &handle : pSysmanDeviceImp->pPowerHandleContext->handleList) { delete handle; } pSysmanDeviceImp->pPowerHandleContext->handleList.clear(); pSysmanDeviceImp->pPowerHandleContext->init(deviceHandles, device->toHandle()); uint32_t count = 0; EXPECT_EQ(zesDeviceEnumPowerDomains(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, powerHandleComponentCount); count = count + 1; EXPECT_EQ(zesDeviceEnumPowerDomains(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, powerHandleComponentCount); } TEST_F(SysmanDevicePowerFixture, GivenComponentCountZeroWhenEnumeratingPowerDomainsThenValidPowerHandlesIsReturned) { EXPECT_CALL(*pSysfsAccess.get(), scanDirEntries(_, _)) .WillRepeatedly(Return(ZE_RESULT_ERROR_NOT_AVAILABLE)); for (const auto &handle : pSysmanDeviceImp->pPowerHandleContext->handleList) { delete handle; } pSysmanDeviceImp->pPowerHandleContext->handleList.clear(); pSysmanDeviceImp->pPowerHandleContext->init(deviceHandles, device->toHandle()); uint32_t count = 0; EXPECT_EQ(zesDeviceEnumPowerDomains(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, powerHandleComponentCount); std::vector handles(count, nullptr); EXPECT_EQ(zesDeviceEnumPowerDomains(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS); for (auto handle : handles) { EXPECT_NE(handle, nullptr); } } TEST_F(SysmanDevicePowerFixture, GivenValidPowerHandleWhenGettingPowerPropertiesThenCallSucceeds) { EXPECT_CALL(*pSysfsAccess.get(), scanDirEntries(_, _)) .WillRepeatedly(Return(ZE_RESULT_ERROR_NOT_AVAILABLE)); for (const auto &handle : pSysmanDeviceImp->pPowerHandleContext->handleList) { delete handle; } pSysmanDeviceImp->pPowerHandleContext->handleList.clear(); pSysmanDeviceImp->pPowerHandleContext->init(deviceHandles, device->toHandle()); auto handles = getPowerHandles(powerHandleComponentCount); for (auto handle : handles) { zes_power_properties_t properties; EXPECT_EQ(ZE_RESULT_SUCCESS, zesPowerGetProperties(handle, &properties)); EXPECT_FALSE(properties.onSubdevice); EXPECT_EQ(properties.subdeviceId, 0u); } } TEST_F(SysmanDevicePowerFixture, GivenValidPowerHandleWhenGettingPowerEnergyCounterThenValidPowerReadingsRetrieved) { EXPECT_CALL(*pSysfsAccess.get(), scanDirEntries(_, _)) .WillRepeatedly(Return(ZE_RESULT_ERROR_NOT_AVAILABLE)); for (const auto &handle : pSysmanDeviceImp->pPowerHandleContext->handleList) { delete handle; } pSysmanDeviceImp->pPowerHandleContext->handleList.clear(); pSysmanDeviceImp->pPowerHandleContext->init(deviceHandles, device->toHandle()); auto handles = getPowerHandles(powerHandleComponentCount); for (auto &deviceHandle : deviceHandles) { ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; Device::fromHandle(deviceHandle)->getProperties(&deviceProperties); auto pPmt = static_cast> *>(pLinuxSysmanImp->getPlatformMonitoringTechAccess(deviceProperties.subdeviceId)); pPmt->openFunction = openMockPower; pPmt->closeFunction = closeMockPower; pPmt->preadFunction = preadMockPower; } for (auto handle : handles) { zes_power_energy_counter_t energyCounter; uint64_t expectedEnergyCounter = convertJouleToMicroJoule * (setEnergyCounter / 1048576); ASSERT_EQ(ZE_RESULT_SUCCESS, zesPowerGetEnergyCounter(handle, &energyCounter)); EXPECT_EQ(energyCounter.energy, expectedEnergyCounter); } } TEST_F(SysmanDevicePowerFixture, GivenValidPowerHandleWhenGettingPowerEnergyCounterWhenEnergyHwmonFileReturnsErrorAndPmtFailsThenFailureIsReturned) { EXPECT_CALL(*pSysfsAccess.get(), read(_, Matcher(_))) .WillRepeatedly(Return(ZE_RESULT_ERROR_NOT_AVAILABLE)); for (const auto &handle : pSysmanDeviceImp->pPowerHandleContext->handleList) { delete handle; } for (auto &subDeviceIdToPmtEntry : pLinuxSysmanImp->mapOfSubDeviceIdToPmtObject) { delete subDeviceIdToPmtEntry.second; } pLinuxSysmanImp->mapOfSubDeviceIdToPmtObject.clear(); for (auto &deviceHandle : deviceHandles) { ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; Device::fromHandle(deviceHandle)->getProperties(&deviceProperties); pLinuxSysmanImp->mapOfSubDeviceIdToPmtObject.emplace(deviceProperties.subdeviceId, nullptr); } pSysmanDeviceImp->pPowerHandleContext->handleList.clear(); pSysmanDeviceImp->pPowerHandleContext->init(deviceHandles, device->toHandle()); auto handles = getPowerHandles(powerHandleComponentCount); for (auto handle : handles) { zes_power_energy_counter_t energyCounter = {}; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesPowerGetEnergyCounter(handle, &energyCounter)); } } TEST_F(SysmanDevicePowerFixture, GivenValidPowerHandleWhenGettingPowerEnergyThresholdThenUnsupportedFeatureErrorIsReturned) { EXPECT_CALL(*pSysfsAccess.get(), scanDirEntries(_, _)) .WillRepeatedly(Return(ZE_RESULT_ERROR_NOT_AVAILABLE)); for (const auto &handle : pSysmanDeviceImp->pPowerHandleContext->handleList) { delete handle; } pSysmanDeviceImp->pPowerHandleContext->handleList.clear(); pSysmanDeviceImp->pPowerHandleContext->init(deviceHandles, device->toHandle()); zes_energy_threshold_t threshold; auto handles = getPowerHandles(powerHandleComponentCount); for (auto handle : handles) { EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesPowerGetEnergyThreshold(handle, &threshold)); } } TEST_F(SysmanDevicePowerFixture, GivenValidPowerHandleWhenSettingPowerEnergyThresholdThenUnsupportedFeatureErrorIsReturned) { EXPECT_CALL(*pSysfsAccess.get(), scanDirEntries(_, _)) .WillRepeatedly(Return(ZE_RESULT_ERROR_NOT_AVAILABLE)); for (const auto &handle : pSysmanDeviceImp->pPowerHandleContext->handleList) { delete handle; } pSysmanDeviceImp->pPowerHandleContext->handleList.clear(); pSysmanDeviceImp->pPowerHandleContext->init(deviceHandles, device->toHandle()); double threshold = 0; auto handles = getPowerHandles(powerHandleComponentCount); for (auto handle : handles) { EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesPowerSetEnergyThreshold(handle, threshold)); } } TEST_F(SysmanDevicePowerFixture, GivenValidPowerHandleWhenGettingPowerLimitsThenUnsupportedFeatureErrorIsReturned) { EXPECT_CALL(*pSysfsAccess.get(), scanDirEntries(_, _)) .WillRepeatedly(Return(ZE_RESULT_ERROR_NOT_AVAILABLE)); for (const auto &handle : pSysmanDeviceImp->pPowerHandleContext->handleList) { delete handle; } pSysmanDeviceImp->pPowerHandleContext->handleList.clear(); pSysmanDeviceImp->pPowerHandleContext->init(deviceHandles, device->toHandle()); auto handles = getPowerHandles(powerHandleComponentCount); for (auto handle : handles) { zes_power_sustained_limit_t sustained; zes_power_burst_limit_t burst; zes_power_peak_limit_t peak; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesPowerGetLimits(handle, &sustained, &burst, &peak)); } } TEST_F(SysmanDevicePowerFixture, GivenValidPowerHandleWhenSettingPowerLimitsThenUnsupportedFeatureErrorIsReturned) { EXPECT_CALL(*pSysfsAccess.get(), scanDirEntries(_, _)) .WillRepeatedly(Return(ZE_RESULT_ERROR_NOT_AVAILABLE)); for (const auto &handle : pSysmanDeviceImp->pPowerHandleContext->handleList) { delete handle; } pSysmanDeviceImp->pPowerHandleContext->handleList.clear(); pSysmanDeviceImp->pPowerHandleContext->init(deviceHandles, device->toHandle()); auto handles = getPowerHandles(powerHandleComponentCount); for (auto handle : handles) { zes_power_sustained_limit_t sustained; zes_power_burst_limit_t burst; zes_power_peak_limit_t peak; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesPowerSetLimits(handle, &sustained, &burst, &peak)); } } TEST_F(SysmanDevicePowerMultiDeviceFixture, GivenValidPowerHandleWhenGettingPowerEnergyCounterWhenEnergyHwmonFailsThenValidPowerReadingsRetrievedFromPmt) { EXPECT_CALL(*pSysfsAccess.get(), scanDirEntries(_, _)) .WillRepeatedly(Return(ZE_RESULT_ERROR_NOT_AVAILABLE)); for (const auto &handle : pSysmanDeviceImp->pPowerHandleContext->handleList) { delete handle; } pSysmanDeviceImp->pPowerHandleContext->handleList.clear(); pSysmanDeviceImp->pPowerHandleContext->init(deviceHandles, device->toHandle()); auto handles = getPowerHandles(powerHandleComponentCount); for (auto &deviceHandle : deviceHandles) { ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; Device::fromHandle(deviceHandle)->getProperties(&deviceProperties); auto pPmt = static_cast> *>(pLinuxSysmanImp->getPlatformMonitoringTechAccess(deviceProperties.subdeviceId)); pPmt->openFunction = openMockPower; pPmt->closeFunction = closeMockPower; pPmt->preadFunction = preadMockPower; } for (auto handle : handles) { zes_power_energy_counter_t energyCounter; uint64_t expectedEnergyCounter = convertJouleToMicroJoule * (setEnergyCounter / 1048576); ASSERT_EQ(ZE_RESULT_SUCCESS, zesPowerGetEnergyCounter(handle, &energyCounter)); EXPECT_EQ(energyCounter.energy, expectedEnergyCounter); } } } // namespace ult } // namespace L0 test_zes_power_helper.cpp000066400000000000000000000030141422164147700363660ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/power/linux/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/power/linux/os_power_imp.h" #include "level_zero/tools/test/unit_tests/sources/sysman/power/linux/mock_sysfs_power.h" namespace L0 { namespace ult { constexpr uint32_t powerHandleComponentCount = 1u; using SysmanDevicePowerFixtureHelper = SysmanDevicePowerFixture; TEST_F(SysmanDevicePowerFixtureHelper, GivenValidPowerHandleWhenGettingPowerEnergyCounterThenValidPowerReadingsRetrieved) { auto handles = getPowerHandles(powerHandleComponentCount); for (auto handle : handles) { zes_power_energy_counter_t energyCounter = {}; ASSERT_EQ(ZE_RESULT_SUCCESS, zesPowerGetEnergyCounter(handle, &energyCounter)); EXPECT_EQ(energyCounter.energy, expectedEnergyCounter); } } using SysmanDevicePowerMultiDeviceFixtureHelper = SysmanDevicePowerMultiDeviceFixture; TEST_F(SysmanDevicePowerMultiDeviceFixtureHelper, GivenValidDeviceHandlesAndHwmonInterfaceExistThenSuccessIsReturned) { for (auto &deviceHandle : deviceHandles) { ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; Device::fromHandle(deviceHandle)->getProperties(&deviceProperties); PublicLinuxPowerImp *pPowerImp = new PublicLinuxPowerImp(pOsSysman, deviceProperties.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE, deviceProperties.subdeviceId); EXPECT_TRUE(pPowerImp->isPowerModuleSupported()); delete pPowerImp; } } } // namespace ult } // namespace L0compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/power/windows/000077500000000000000000000000001422164147700316635ustar00rootroot00000000000000CMakeLists.txt000066400000000000000000000005311422164147700343430ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/power/windows# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_zes_sysman_power.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_power.h ) endif() mock_power.h000066400000000000000000000217421422164147700341300ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/power/windows/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/test/unit_tests/mock.h" #include "level_zero/tools/test/unit_tests/sources/sysman/windows/mock_kmd_sys_manager.h" #include "sysman/power/power_imp.h" namespace L0 { namespace ult { class PowerKmdSysManager : public Mock {}; template <> struct Mock : public PowerKmdSysManager { uint32_t mockPowerLimit1Enabled = 1; uint32_t mockPowerLimit2Enabled = 1; int32_t mockPowerLimit1 = 25000; int32_t mockPowerLimit2 = 41000; int32_t mockTauPowerLimit1 = 20800; uint32_t mockTpdDefault = 34000; uint32_t mockMinPowerLimit = 1000; uint32_t mockMaxPowerLimit = 80000; int32_t mockAcPowerPeak = 0; int32_t mockDcPowerPeak = 0; uint32_t mockEnergyThreshold = 0; uint32_t mockEnergyCounter = 3231121; uint32_t mockTimeStamp = 1123412412; uint32_t mockEnergyUnit = 14; uint32_t mockFrequencyTimeStamp = 38400000; void getActivityProperty(KmdSysman::GfxSysmanReqHeaderIn *pRequest, KmdSysman::GfxSysmanReqHeaderOut *pResponse) { uint8_t *pBuffer = reinterpret_cast(pResponse); pBuffer += sizeof(KmdSysman::GfxSysmanReqHeaderOut); switch (pRequest->inRequestId) { case KmdSysman::Requests::Activity::TimestampFrequency: { uint32_t *pValueFrequency = reinterpret_cast(pBuffer); *pValueFrequency = mockFrequencyTimeStamp; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; default: { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } break; } } void getPowerProperty(KmdSysman::GfxSysmanReqHeaderIn *pRequest, KmdSysman::GfxSysmanReqHeaderOut *pResponse) override { uint8_t *pBuffer = reinterpret_cast(pResponse); pBuffer += sizeof(KmdSysman::GfxSysmanReqHeaderOut); switch (pRequest->inRequestId) { case KmdSysman::Requests::Power::EnergyThresholdSupported: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = static_cast(this->allowSetCalls); pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Power::TdpDefault: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockTpdDefault; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Power::MinPowerLimitDefault: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockMinPowerLimit; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Power::MaxPowerLimitDefault: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockMaxPowerLimit; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Power::PowerLimit1Enabled: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockPowerLimit1Enabled; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Power::PowerLimit2Enabled: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockPowerLimit2Enabled; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Power::CurrentPowerLimit1: { int32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockPowerLimit1; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(int32_t); } break; case KmdSysman::Requests::Power::CurrentPowerLimit1Tau: { int32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockTauPowerLimit1; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(int32_t); } break; case KmdSysman::Requests::Power::CurrentPowerLimit2: { int32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockPowerLimit2; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(int32_t); } break; case KmdSysman::Requests::Power::CurrentPowerLimit4Ac: { int32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockAcPowerPeak; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(int32_t); } break; case KmdSysman::Requests::Power::CurrentPowerLimit4Dc: { int32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockDcPowerPeak; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Power::CurrentEnergyThreshold: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockEnergyThreshold; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Power::CurrentEnergyCounter: { uint32_t *pValueCounter = reinterpret_cast(pBuffer); uint64_t *pValueTS = reinterpret_cast(pBuffer + sizeof(uint32_t)); *pValueCounter = mockEnergyCounter; *pValueTS = mockTimeStamp; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t) + sizeof(uint64_t); } break; case KmdSysman::Requests::Power::EnergyCounterUnits: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockEnergyUnit; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; default: { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } break; } } void setPowerProperty(KmdSysman::GfxSysmanReqHeaderIn *pRequest, KmdSysman::GfxSysmanReqHeaderOut *pResponse) override { uint8_t *pBuffer = reinterpret_cast(pRequest); pBuffer += sizeof(KmdSysman::GfxSysmanReqHeaderIn); switch (pRequest->inRequestId) { case KmdSysman::Requests::Power::CurrentPowerLimit1: { int32_t *pValue = reinterpret_cast(pBuffer); mockPowerLimit1 = *pValue; pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; } break; case KmdSysman::Requests::Power::CurrentPowerLimit1Tau: { int32_t *pValue = reinterpret_cast(pBuffer); mockTauPowerLimit1 = *pValue; pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; } break; case KmdSysman::Requests::Power::CurrentPowerLimit2: { int32_t *pValue = reinterpret_cast(pBuffer); mockPowerLimit2 = *pValue; pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; } break; case KmdSysman::Requests::Power::CurrentPowerLimit4Ac: { int32_t *pValue = reinterpret_cast(pBuffer); mockAcPowerPeak = *pValue; pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; } break; case KmdSysman::Requests::Power::CurrentPowerLimit4Dc: { int32_t *pValue = reinterpret_cast(pBuffer); mockDcPowerPeak = *pValue; pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; } break; case KmdSysman::Requests::Power::CurrentEnergyThreshold: { uint32_t *pValue = reinterpret_cast(pBuffer); mockEnergyThreshold = *pValue; pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; } break; default: { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } break; } } Mock() = default; ~Mock() = default; }; } // namespace ult } // namespace L0 test_zes_sysman_power.cpp000066400000000000000000000253341422164147700367650ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/power/windows/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/power/windows/os_power_imp.h" #include "level_zero/tools/test/unit_tests/sources/sysman/power/windows/mock_power.h" #include "level_zero/tools/test/unit_tests/sources/sysman/windows/mock_sysman_fixture.h" extern bool sysmanUltsEnable; namespace L0 { namespace ult { constexpr uint32_t powerHandleComponentCount = 1u; class SysmanDevicePowerFixture : public SysmanDeviceFixture { protected: std::unique_ptr> pKmdSysManager; KmdSysManager *pOriginalKmdSysManager = nullptr; void SetUp() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanDeviceFixture::SetUp(); } void init(bool allowSetCalls) { pKmdSysManager.reset(new Mock); pKmdSysManager->allowSetCalls = allowSetCalls; EXPECT_CALL(*pKmdSysManager, escape(_, _, _, _, _)) .WillRepeatedly(::testing::Invoke(pKmdSysManager.get(), &Mock::mock_escape)); pOriginalKmdSysManager = pWddmSysmanImp->pKmdSysManager; pWddmSysmanImp->pKmdSysManager = pKmdSysManager.get(); for (auto handle : pSysmanDeviceImp->pPowerHandleContext->handleList) { delete handle; } pSysmanDeviceImp->pPowerHandleContext->handleList.clear(); uint32_t subDeviceCount = 0; std::vector deviceHandles; Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, nullptr); if (subDeviceCount == 0) { deviceHandles.resize(1, device->toHandle()); } else { deviceHandles.resize(subDeviceCount, nullptr); Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, deviceHandles.data()); } pSysmanDeviceImp->pPowerHandleContext->init(deviceHandles, device->toHandle()); } void TearDown() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanDeviceFixture::TearDown(); pWddmSysmanImp->pKmdSysManager = pOriginalKmdSysManager; } std::vector get_power_handles(uint32_t count) { std::vector handles(count, nullptr); EXPECT_EQ(zesDeviceEnumPowerDomains(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS); return handles; } }; TEST_F(SysmanDevicePowerFixture, GivenComponentCountZeroWhenEnumeratingPowerDomainThenValidCountIsReturnedAndVerifySysmanPowerGetCallSucceeds) { init(true); uint32_t count = 0; EXPECT_EQ(zesDeviceEnumPowerDomains(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, powerHandleComponentCount); } TEST_F(SysmanDevicePowerFixture, GivenInvalidComponentCountWhenEnumeratingPowerDomainThenValidCountIsReturnedAndVerifySysmanPowerGetCallSucceeds) { init(true); uint32_t count = 0; EXPECT_EQ(zesDeviceEnumPowerDomains(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, powerHandleComponentCount); count = count + 1; EXPECT_EQ(zesDeviceEnumPowerDomains(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, powerHandleComponentCount); } TEST_F(SysmanDevicePowerFixture, GivenComponentCountZeroWhenEnumeratingPowerDomainThenValidPowerHandlesIsReturned) { init(true); uint32_t count = 0; EXPECT_EQ(zesDeviceEnumPowerDomains(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, powerHandleComponentCount); std::vector handles(count, nullptr); EXPECT_EQ(zesDeviceEnumPowerDomains(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS); for (auto handle : handles) { EXPECT_NE(handle, nullptr); } } TEST_F(SysmanDevicePowerFixture, GivenValidPowerHandleWhenGettingPowerPropertiesAllowSetToTrueThenCallSucceeds) { // Setting allow set calls or not init(true); auto handles = get_power_handles(powerHandleComponentCount); for (auto handle : handles) { zes_power_properties_t properties; ze_result_t result = zesPowerGetProperties(handle, &properties); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_FALSE(properties.onSubdevice); EXPECT_EQ(properties.subdeviceId, 0); EXPECT_TRUE(properties.canControl); EXPECT_TRUE(properties.isEnergyThresholdSupported); EXPECT_EQ(properties.maxLimit, pKmdSysManager->mockMaxPowerLimit); EXPECT_EQ(properties.minLimit, pKmdSysManager->mockMinPowerLimit); EXPECT_EQ(properties.defaultLimit, pKmdSysManager->mockTpdDefault); } } TEST_F(SysmanDevicePowerFixture, GivenValidPowerHandleWhenGettingPowerPropertiesAllowSetToFalseThenCallSucceeds) { // Setting allow set calls or not init(false); auto handles = get_power_handles(powerHandleComponentCount); for (auto handle : handles) { zes_power_properties_t properties; ze_result_t result = zesPowerGetProperties(handle, &properties); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_FALSE(properties.onSubdevice); EXPECT_EQ(properties.subdeviceId, 0); EXPECT_FALSE(properties.canControl); EXPECT_FALSE(properties.isEnergyThresholdSupported); EXPECT_EQ(properties.maxLimit, pKmdSysManager->mockMaxPowerLimit); EXPECT_EQ(properties.minLimit, pKmdSysManager->mockMinPowerLimit); EXPECT_EQ(properties.defaultLimit, pKmdSysManager->mockTpdDefault); } } TEST_F(SysmanDevicePowerFixture, GivenValidPowerHandleWhenGettingPowerEnergyCounterThenValidPowerReadingsRetrieved) { // Setting allow set calls or not init(true); auto handles = get_power_handles(powerHandleComponentCount); for (auto handle : handles) { zes_power_energy_counter_t energyCounter; ze_result_t result = zesPowerGetEnergyCounter(handle, &energyCounter); uint32_t conversionUnit = (1 << pKmdSysManager->mockEnergyUnit); double valueConverted = static_cast(pKmdSysManager->mockEnergyCounter) / static_cast(conversionUnit); valueConverted *= static_cast(convertJouleToMicroJoule); uint64_t mockEnergytoMicroJoules = static_cast(valueConverted); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(energyCounter.energy, mockEnergytoMicroJoules); EXPECT_EQ(energyCounter.timestamp, convertTStoMicroSec(pKmdSysManager->mockTimeStamp, pKmdSysManager->mockFrequencyTimeStamp)); } } TEST_F(SysmanDevicePowerFixture, GivenValidPowerHandleWhenGettingPowerLimitsAllowSetToFalseThenCallSucceedsWithValidPowerReadingsRetrieved) { // Setting allow set calls or not init(false); auto handles = get_power_handles(powerHandleComponentCount); for (auto handle : handles) { zes_power_sustained_limit_t sustained; zes_power_burst_limit_t burst; zes_power_peak_limit_t peak; ze_result_t result = zesPowerGetLimits(handle, &sustained, &burst, &peak); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_TRUE(sustained.enabled); EXPECT_EQ(sustained.power, pKmdSysManager->mockPowerLimit1); EXPECT_EQ(sustained.interval, pKmdSysManager->mockTauPowerLimit1); EXPECT_TRUE(burst.enabled); EXPECT_EQ(burst.power, pKmdSysManager->mockPowerLimit2); EXPECT_EQ(peak.powerAC, pKmdSysManager->mockAcPowerPeak); EXPECT_EQ(peak.powerDC, pKmdSysManager->mockDcPowerPeak); } } TEST_F(SysmanDevicePowerFixture, GivenValidPowerHandleWhenSettingPowerLimitsAllowSetToFalseThenCallFails) { // Setting allow set calls or not init(false); auto handles = get_power_handles(powerHandleComponentCount); for (auto handle : handles) { zes_power_sustained_limit_t sustained; zes_power_burst_limit_t burst; zes_power_peak_limit_t peak; ze_result_t result = zesPowerGetLimits(handle, &sustained, &burst, &peak); EXPECT_EQ(ZE_RESULT_SUCCESS, result); sustained.power += 1000; result = zesPowerSetLimits(handle, &sustained, &burst, &peak); EXPECT_NE(ZE_RESULT_SUCCESS, result); } } TEST_F(SysmanDevicePowerFixture, GivenValidPowerHandleWhenSettingEnergyThresholdAllowSetToFalseThenCallFails) { // Setting allow set calls or not init(false); auto handles = get_power_handles(powerHandleComponentCount); for (auto handle : handles) { double energyThreshold = 2000; ze_result_t result = zesPowerSetEnergyThreshold(handle, energyThreshold); EXPECT_NE(ZE_RESULT_SUCCESS, result); } } TEST_F(SysmanDevicePowerFixture, GivenValidPowerHandleWhenSettingEnergyThresholdAllowSetToTrueThenCallSucceeds) { // Setting allow set calls or not init(true); auto handles = get_power_handles(powerHandleComponentCount); for (auto handle : handles) { double energyThreshold = 2000; ze_result_t result = zesPowerSetEnergyThreshold(handle, energyThreshold); EXPECT_EQ(ZE_RESULT_SUCCESS, result); zes_energy_threshold_t newEnergyThreshold; result = zesPowerGetEnergyThreshold(handle, &newEnergyThreshold); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(newEnergyThreshold.threshold, energyThreshold); } } TEST_F(SysmanDevicePowerFixture, GivenValidPowerHandleWhenSettingPowerLimitsAllowSetToTrueThenCallSucceeds) { // Setting allow set calls or not init(true); auto handles = get_power_handles(powerHandleComponentCount); for (auto handle : handles) { zes_power_sustained_limit_t sustained; zes_power_burst_limit_t burst; zes_power_peak_limit_t peak; uint32_t powerIncrement = 1500; uint32_t timeIncrement = 12000; uint32_t AcPeakPower = 56000; uint32_t DcPeakPower = 44100; ze_result_t result = zesPowerGetLimits(handle, &sustained, &burst, &peak); EXPECT_EQ(ZE_RESULT_SUCCESS, result); sustained.power += powerIncrement; sustained.interval += timeIncrement; burst.power += powerIncrement; peak.powerAC = AcPeakPower; peak.powerDC = DcPeakPower; result = zesPowerSetLimits(handle, &sustained, &burst, &peak); EXPECT_EQ(ZE_RESULT_SUCCESS, result); zes_power_sustained_limit_t newSustained; zes_power_burst_limit_t newBurst; zes_power_peak_limit_t newPeak; result = zesPowerGetLimits(handle, &newSustained, &newBurst, &newPeak); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(newSustained.power, sustained.power); EXPECT_EQ(newSustained.interval, sustained.interval); EXPECT_EQ(newBurst.power, burst.power); EXPECT_EQ(newPeak.powerAC, peak.powerAC); EXPECT_EQ(newPeak.powerDC, peak.powerDC); } } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/ras/000077500000000000000000000000001422164147700276225ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/ras/CMakeLists.txt000066400000000000000000000003051422164147700323600ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) add_subdirectories() compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/ras/linux/000077500000000000000000000000001422164147700307615ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/ras/linux/CMakeLists.txt000066400000000000000000000012641422164147700335240ustar00rootroot00000000000000# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_TESTS_TOOLS_SYSMAN_RAS_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}test_zes_ras.cpp ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}mock_fs_ras.h ) if((NEO_ENABLE_i915_PRELIM_DETECTION) AND ("${BRANCH_TYPE}" STREQUAL "")) list(REMOVE_ITEM L0_TESTS_TOOLS_SYSMAN_RAS_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/test_zes_ras.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_fs_ras.h ) endif() if(UNIX) target_sources(${TARGET_NAME} PRIVATE ${L0_TESTS_TOOLS_SYSMAN_RAS_LINUX} ) endif() add_subdirectories() compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/ras/linux/mock_fs_ras.h000066400000000000000000000012521422164147700334200ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/test/unit_tests/mock.h" #include "level_zero/tools/source/sysman/ras/linux/os_ras_imp.h" #include "sysman/linux/fs_access.h" #include "sysman/ras/ras.h" #include "sysman/ras/ras_imp.h" namespace L0 { namespace ult { class RasFsAccess : public FsAccess {}; template <> struct Mock : public RasFsAccess { MOCK_METHOD(bool, isRootUser, (), (override)); bool userIsRoot() { return true; } bool userIsNotRoot() { return false; } Mock() = default; }; } // namespace ult } // namespace L0 test_zes_ras.cpp000066400000000000000000000161611422164147700341200ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/ras/linux/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/test/unit_tests/sources/sysman/linux/mock_sysman_fixture.h" #include "mock_fs_ras.h" extern bool sysmanUltsEnable; using ::testing::_; using ::testing::Matcher; using ::testing::NiceMock; namespace L0 { namespace ult { constexpr uint32_t mockHandleCount = 0; struct SysmanRasFixture : public SysmanDeviceFixture { protected: std::unique_ptr> pFsAccess; std::vector deviceHandles; FsAccess *pFsAccessOriginal = nullptr; void SetUp() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanDeviceFixture::SetUp(); pFsAccess = std::make_unique>>(); pFsAccessOriginal = pLinuxSysmanImp->pFsAccess; pLinuxSysmanImp->pFsAccess = pFsAccess.get(); ON_CALL(*pFsAccess.get(), isRootUser()) .WillByDefault(::testing::Invoke(pFsAccess.get(), &Mock::userIsRoot)); pSysmanDeviceImp->pRasHandleContext->handleList.clear(); uint32_t subDeviceCount = 0; Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, nullptr); if (subDeviceCount == 0) { deviceHandles.resize(1, device->toHandle()); } else { deviceHandles.resize(subDeviceCount, nullptr); Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, deviceHandles.data()); } pSysmanDeviceImp->pRasHandleContext->init(deviceHandles); } void TearDown() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanDeviceFixture::TearDown(); pLinuxSysmanImp->pFsAccess = pFsAccessOriginal; } std::vector get_ras_handles(uint32_t count) { std::vector handles(count, nullptr); EXPECT_EQ(zesDeviceEnumRasErrorSets(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS); return handles; } }; TEST_F(SysmanRasFixture, GivenValidSysmanHandleWhenRasErrorSetsThenCorrectCountIsReported) { uint32_t count = 0; ze_result_t result = zesDeviceEnumRasErrorSets(device->toHandle(), &count, NULL); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(count, mockHandleCount); uint32_t testcount = count + 1; result = zesDeviceEnumRasErrorSets(device->toHandle(), &testcount, NULL); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(testcount, mockHandleCount); count = 0; std::vector handles(count, nullptr); EXPECT_EQ(zesDeviceEnumRasErrorSets(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS); EXPECT_EQ(count, mockHandleCount); RasImp *pTestRasImp = new RasImp(pSysmanDeviceImp->pRasHandleContext->pOsSysman, ZES_RAS_ERROR_TYPE_CORRECTABLE, device->toHandle()); pSysmanDeviceImp->pRasHandleContext->handleList.push_back(pTestRasImp); EXPECT_EQ(zesDeviceEnumRasErrorSets(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, mockHandleCount + 1); testcount = count; handles.resize(testcount); EXPECT_EQ(zesDeviceEnumRasErrorSets(device->toHandle(), &testcount, handles.data()), ZE_RESULT_SUCCESS); EXPECT_EQ(testcount, mockHandleCount + 1); EXPECT_NE(nullptr, handles.data()); pSysmanDeviceImp->pRasHandleContext->handleList.pop_back(); delete pTestRasImp; } TEST_F(SysmanRasFixture, GivenValidRasHandleWhenGettingRasPropertiesThenSuccessIsReturned) { RasImp *pTestRasImp = new RasImp(pSysmanDeviceImp->pRasHandleContext->pOsSysman, ZES_RAS_ERROR_TYPE_CORRECTABLE, device->toHandle()); pSysmanDeviceImp->pRasHandleContext->handleList.push_back(pTestRasImp); auto handles = get_ras_handles(mockHandleCount + 1); for (auto handle : handles) { zes_ras_properties_t properties = {}; EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetProperties(handle, &properties)); EXPECT_EQ(properties.pNext, nullptr); EXPECT_EQ(properties.onSubdevice, false); EXPECT_EQ(properties.subdeviceId, 0u); EXPECT_EQ(properties.type, ZES_RAS_ERROR_TYPE_CORRECTABLE); } pSysmanDeviceImp->pRasHandleContext->handleList.pop_back(); delete pTestRasImp; } TEST_F(SysmanRasFixture, GivenValidRasHandleWhileCallingZesRasGetStateThenFailureIsReturned) { RasImp *pTestRasImp = new RasImp(pSysmanDeviceImp->pRasHandleContext->pOsSysman, ZES_RAS_ERROR_TYPE_CORRECTABLE, device->toHandle()); pSysmanDeviceImp->pRasHandleContext->handleList.push_back(pTestRasImp); auto handles = get_ras_handles(mockHandleCount + 1); for (auto handle : handles) { zes_ras_state_t state = {}; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesRasGetState(handle, 0, &state)); } pSysmanDeviceImp->pRasHandleContext->handleList.pop_back(); delete pTestRasImp; } TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasGetConfigAfterzesRasSetConfigThenSuccessIsReturned) { RasImp *pTestRasImp = new RasImp(pSysmanDeviceImp->pRasHandleContext->pOsSysman, ZES_RAS_ERROR_TYPE_CORRECTABLE, device->toHandle()); pSysmanDeviceImp->pRasHandleContext->handleList.push_back(pTestRasImp); auto handles = get_ras_handles(mockHandleCount + 1); for (auto handle : handles) { zes_ras_config_t setConfig = {}; zes_ras_config_t getConfig = {}; setConfig.totalThreshold = 50; memset(setConfig.detailedThresholds.category, 1, sizeof(setConfig.detailedThresholds.category)); EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasSetConfig(handle, &setConfig)); EXPECT_EQ(ZE_RESULT_SUCCESS, zesRasGetConfig(handle, &getConfig)); EXPECT_EQ(setConfig.totalThreshold, getConfig.totalThreshold); int compare = std::memcmp(setConfig.detailedThresholds.category, getConfig.detailedThresholds.category, sizeof(setConfig.detailedThresholds.category)); EXPECT_EQ(0, compare); } pSysmanDeviceImp->pRasHandleContext->handleList.pop_back(); delete pTestRasImp; } TEST_F(SysmanRasFixture, GivenValidRasHandleWhenCallingzesRasSetConfigWithoutPermissionThenFailureIsReturned) { ON_CALL(*pFsAccess.get(), isRootUser()) .WillByDefault(::testing::Invoke(pFsAccess.get(), &Mock::userIsNotRoot)); RasImp *pTestRasImp = new RasImp(pSysmanDeviceImp->pRasHandleContext->pOsSysman, ZES_RAS_ERROR_TYPE_CORRECTABLE, device->toHandle()); pSysmanDeviceImp->pRasHandleContext->handleList.push_back(pTestRasImp); auto handles = get_ras_handles(mockHandleCount + 1); for (auto handle : handles) { zes_ras_config_t setConfig = {}; setConfig.totalThreshold = 50; memset(setConfig.detailedThresholds.category, 1, sizeof(setConfig.detailedThresholds.category)); EXPECT_EQ(ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS, zesRasSetConfig(handle, &setConfig)); } pSysmanDeviceImp->pRasHandleContext->releaseRasHandles(); } TEST_F(SysmanRasFixture, GivenValidInstanceWhenOsRasImplementationIsNullThenDestructorIsCalledWithoutException) { RasImp *pTestRasImp = new RasImp(); pTestRasImp->pOsRas = nullptr; EXPECT_NO_THROW(delete pTestRasImp;); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/scheduler/000077500000000000000000000000001422164147700310135ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/scheduler/CMakeLists.txt000066400000000000000000000003041422164147700335500ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) add_subdirectories() compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/scheduler/linux/000077500000000000000000000000001422164147700321525ustar00rootroot00000000000000CMakeLists.txt000066400000000000000000000013171422164147700346350ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/scheduler/linux# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_TESTS_TOOLS_SYSMAN_SCHEDULER_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}test_zes_scheduler.cpp ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}mock_sysfs_scheduler.h ) if((NEO_ENABLE_i915_PRELIM_DETECTION) AND ("${BRANCH_TYPE}" STREQUAL "")) list(REMOVE_ITEM L0_TESTS_TOOLS_SYSMAN_SCHEDULER_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/test_zes_scheduler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_sysfs_scheduler.h ) endif() if(UNIX) target_sources(${TARGET_NAME} PRIVATE ${L0_TESTS_TOOLS_SYSMAN_SCHEDULER_LINUX} ) endif() mock_sysfs_scheduler.h000066400000000000000000000252651422164147700364740ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/scheduler/linux/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/tools/source/sysman/scheduler/linux/os_scheduler_imp.h" #include "sysman/linux/fs_access.h" namespace L0 { namespace ult { const std::string preemptTimeoutMilliSecs("preempt_timeout_ms"); const std::string defaultPreemptTimeoutMilliSecs(".defaults/preempt_timeout_ms"); const std::string timesliceDurationMilliSecs("timeslice_duration_ms"); const std::string defaultTimesliceDurationMilliSecs(".defaults/timeslice_duration_ms"); const std::string heartbeatIntervalMilliSecs("heartbeat_interval_ms"); const std::string defaultHeartbeatIntervalMilliSecs(".defaults/heartbeat_interval_ms"); const std::string engineDir("engine"); const std::vector listOfMockedEngines = {"rcs0", "bcs0", "vcs0", "vcs1", "vecs0"}; class SchedulerSysfsAccess : public SysfsAccess {}; typedef struct SchedulerConfigValues { uint64_t defaultVal; uint64_t actualVal; } SchedulerConfigValues_t; typedef struct SchedulerConfig { SchedulerConfigValues_t timeOut; SchedulerConfigValues_t timeSclice; SchedulerConfigValues_t heartBeat; } SchedulerConfig_t; class SchedulerFileProperties { bool isAvailable = false; ::mode_t mode = 0; public: SchedulerFileProperties() = default; SchedulerFileProperties(bool isAvailable, ::mode_t mode) : isAvailable(isAvailable), mode(mode) {} bool getAvailability() { return isAvailable; } bool hasMode(::mode_t mode) { return mode & this->mode; } }; template <> struct Mock : public SysfsAccess { std::map engineSchedMap; std::map engineSchedFilePropertiesMap; ze_result_t getValForError(const std::string file, uint64_t &val) { return ZE_RESULT_ERROR_NOT_AVAILABLE; } ze_result_t getValForErrorWhileWrite(const std::string file, const uint64_t val) { return ZE_RESULT_ERROR_NOT_AVAILABLE; } void cleanUpMap() { for (std::string mappedEngine : listOfMockedEngines) { auto it = engineSchedMap.find(mappedEngine); if (it != engineSchedMap.end()) { delete it->second; } } } ze_result_t getFileProperties(const std::string file, SchedulerFileProperties &fileProps) { auto iterator = engineSchedFilePropertiesMap.find(file); if (iterator != engineSchedFilePropertiesMap.end()) { fileProps = static_cast(iterator->second); return ZE_RESULT_SUCCESS; } return ZE_RESULT_ERROR_UNKNOWN; } ze_result_t setFileProperties(const std::string &engine, const std::string file, bool isAvailable, ::mode_t mode) { auto iterator = std::find(listOfMockedEngines.begin(), listOfMockedEngines.end(), engine); if (iterator != listOfMockedEngines.end()) { engineSchedFilePropertiesMap[engineDir + "/" + engine + "/" + file] = SchedulerFileProperties(isAvailable, mode); return ZE_RESULT_SUCCESS; } return ZE_RESULT_ERROR_UNKNOWN; } ze_result_t getVal(const std::string file, uint64_t &val) { SchedulerFileProperties fileProperties; ze_result_t result = getFileProperties(file, fileProperties); if (ZE_RESULT_SUCCESS == result) { if (!fileProperties.getAvailability()) { return ZE_RESULT_ERROR_NOT_AVAILABLE; } if (!fileProperties.hasMode(S_IRUSR)) { return ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS; } } else { return ZE_RESULT_ERROR_UNKNOWN; } for (std::string mappedEngine : listOfMockedEngines) { if (file.find(mappedEngine) == std::string::npos) { continue; } auto it = engineSchedMap.find(mappedEngine); if (it == engineSchedMap.end()) { return ZE_RESULT_ERROR_NOT_AVAILABLE; } if (file.compare((file.length() - preemptTimeoutMilliSecs.length()), preemptTimeoutMilliSecs.length(), preemptTimeoutMilliSecs) == 0) { if (file.find(".defaults") != std::string::npos) { val = it->second->timeOut.defaultVal; } else { val = it->second->timeOut.actualVal; } return ZE_RESULT_SUCCESS; } if (file.compare((file.length() - timesliceDurationMilliSecs.length()), timesliceDurationMilliSecs.length(), timesliceDurationMilliSecs) == 0) { if (file.find(".defaults") != std::string::npos) { val = it->second->timeSclice.defaultVal; } else { val = it->second->timeSclice.actualVal; } return ZE_RESULT_SUCCESS; } if (file.compare((file.length() - heartbeatIntervalMilliSecs.length()), heartbeatIntervalMilliSecs.length(), heartbeatIntervalMilliSecs) == 0) { if (file.find(".defaults") != std::string::npos) { val = it->second->heartBeat.defaultVal; } else { val = it->second->heartBeat.actualVal; } return ZE_RESULT_SUCCESS; } } return ZE_RESULT_ERROR_NOT_AVAILABLE; } ze_result_t setVal(const std::string file, const uint64_t val) { SchedulerFileProperties fileProperties; ze_result_t result = getFileProperties(file, fileProperties); if (ZE_RESULT_SUCCESS == result) { if (!fileProperties.getAvailability()) { return ZE_RESULT_ERROR_NOT_AVAILABLE; } if (!fileProperties.hasMode(S_IWUSR)) { return ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS; } } else { return ZE_RESULT_ERROR_UNKNOWN; } for (std::string mappedEngine : listOfMockedEngines) { if (file.find(mappedEngine) == std::string::npos) { continue; } SchedulerConfig_t *schedConfig = new SchedulerConfig_t(); if (file.compare((file.length() - preemptTimeoutMilliSecs.length()), preemptTimeoutMilliSecs.length(), preemptTimeoutMilliSecs) == 0) { if (file.find(".defaults") != std::string::npos) { schedConfig->timeOut.defaultVal = val; } else { schedConfig->timeOut.actualVal = val; } auto ret = engineSchedMap.emplace(mappedEngine, schedConfig); if (ret.second == false) { auto itr = engineSchedMap.find(mappedEngine); if (file.find(".defaults") != std::string::npos) { itr->second->timeOut.defaultVal = val; } else { itr->second->timeOut.actualVal = val; } delete schedConfig; } return ZE_RESULT_SUCCESS; } if (file.compare((file.length() - timesliceDurationMilliSecs.length()), timesliceDurationMilliSecs.length(), timesliceDurationMilliSecs) == 0) { if (file.find(".defaults") != std::string::npos) { schedConfig->timeSclice.defaultVal = val; } else { schedConfig->timeSclice.actualVal = val; } auto ret = engineSchedMap.emplace(mappedEngine, schedConfig); if (ret.second == false) { auto itr = engineSchedMap.find(mappedEngine); if (file.find(".defaults") != std::string::npos) { itr->second->timeSclice.defaultVal = val; } else { itr->second->timeSclice.actualVal = val; } delete schedConfig; } return ZE_RESULT_SUCCESS; } if (file.compare((file.length() - heartbeatIntervalMilliSecs.length()), heartbeatIntervalMilliSecs.length(), heartbeatIntervalMilliSecs) == 0) { if (file.find(".defaults") != std::string::npos) { schedConfig->heartBeat.defaultVal = val; } else { schedConfig->heartBeat.actualVal = val; } auto ret = engineSchedMap.emplace(mappedEngine, schedConfig); if (ret.second == false) { auto itr = engineSchedMap.find(mappedEngine); if (file.find(".defaults") != std::string::npos) { itr->second->heartBeat.defaultVal = val; } else { itr->second->heartBeat.actualVal = val; } delete schedConfig; } return ZE_RESULT_SUCCESS; } } return ZE_RESULT_ERROR_NOT_AVAILABLE; } ze_result_t getscanDirEntries(const std::string file, std::vector &listOfEntries) { if (!isDirectoryAccessible(engineDir)) { return ZE_RESULT_ERROR_NOT_AVAILABLE; } if (!(engineDirectoryPermissions & S_IRUSR)) { return ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS; } listOfEntries = listOfMockedEngines; return ZE_RESULT_SUCCESS; } ze_result_t getscanDirEntriesStatusReturnError(const std::string file, std::vector &listOfEntries) { return ZE_RESULT_ERROR_NOT_AVAILABLE; } void setEngineDirectoryPermission(::mode_t permission) { engineDirectoryPermissions = permission; } Mock() = default; MOCK_METHOD(ze_result_t, read, (const std::string file, uint64_t &val), (override)); MOCK_METHOD(ze_result_t, write, (const std::string file, const uint64_t val), (override)); MOCK_METHOD(ze_result_t, scanDirEntries, (const std::string file, std::vector &listOfEntries), (override)); private: ::mode_t engineDirectoryPermissions = S_IRUSR | S_IWUSR; bool isDirectoryAccessible(const std::string dir) { if (dir.compare(engineDir) == 0) { return true; } return false; } }; class PublicLinuxSchedulerImp : public L0::LinuxSchedulerImp { public: using LinuxSchedulerImp::pSysfsAccess; }; } // namespace ult } // namespace L0 test_zes_scheduler.cpp000066400000000000000000001055331422164147700365040ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/scheduler/linux/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/test/unit_tests/sources/sysman/linux/mock_sysman_fixture.h" #include "mock_sysfs_scheduler.h" extern bool sysmanUltsEnable; using ::testing::DoDefault; using ::testing::Return; namespace L0 { namespace ult { constexpr uint32_t handleComponentCount = 4u; constexpr uint64_t convertMilliToMicro = 1000u; constexpr uint64_t defaultTimeoutMilliSecs = 650u; constexpr uint64_t defaultTimesliceMilliSecs = 1u; constexpr uint64_t defaultHeartbeatMilliSecs = 3000u; constexpr uint64_t timeoutMilliSecs = 640u; constexpr uint64_t timesliceMilliSecs = 1u; constexpr uint64_t heartbeatMilliSecs = 2500u; constexpr uint64_t expectedDefaultHeartbeatTimeoutMicroSecs = defaultHeartbeatMilliSecs * convertMilliToMicro; constexpr uint64_t expectedDefaultTimeoutMicroSecs = defaultTimeoutMilliSecs * convertMilliToMicro; constexpr uint64_t expectedDefaultTimesliceMicroSecs = defaultTimesliceMilliSecs * convertMilliToMicro; constexpr uint64_t expectedHeartbeatTimeoutMicroSecs = heartbeatMilliSecs * convertMilliToMicro; constexpr uint64_t expectedTimeoutMicroSecs = timeoutMilliSecs * convertMilliToMicro; constexpr uint64_t expectedTimesliceMicroSecs = timesliceMilliSecs * convertMilliToMicro; class SysmanDeviceSchedulerFixture : public SysmanDeviceFixture { protected: std::unique_ptr> pSysfsAccess; SysfsAccess *pSysfsAccessOld = nullptr; std::vector deviceHandles; void SetUp() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanDeviceFixture::SetUp(); pSysfsAccessOld = pLinuxSysmanImp->pSysfsAccess; pSysfsAccess = std::make_unique>>(); pLinuxSysmanImp->pSysfsAccess = pSysfsAccess.get(); for_each(listOfMockedEngines.begin(), listOfMockedEngines.end(), [=](std::string engineName) { pSysfsAccess->setFileProperties(engineName, defaultPreemptTimeoutMilliSecs, true, S_IRUSR | S_IRGRP | S_IROTH | S_IWUSR); pSysfsAccess->setFileProperties(engineName, defaultTimesliceDurationMilliSecs, true, S_IRUSR | S_IRGRP | S_IROTH | S_IWUSR); pSysfsAccess->setFileProperties(engineName, defaultHeartbeatIntervalMilliSecs, true, S_IRUSR | S_IRGRP | S_IROTH | S_IWUSR); pSysfsAccess->setFileProperties(engineName, preemptTimeoutMilliSecs, true, S_IRUSR | S_IRGRP | S_IROTH | S_IWUSR); pSysfsAccess->setFileProperties(engineName, timesliceDurationMilliSecs, true, S_IRUSR | S_IRGRP | S_IROTH | S_IWUSR); pSysfsAccess->setFileProperties(engineName, heartbeatIntervalMilliSecs, true, S_IRUSR | S_IRGRP | S_IROTH | S_IWUSR); pSysfsAccess->setVal(engineDir + "/" + engineName + "/" + defaultPreemptTimeoutMilliSecs, defaultTimeoutMilliSecs); pSysfsAccess->setVal(engineDir + "/" + engineName + "/" + defaultTimesliceDurationMilliSecs, defaultTimesliceMilliSecs); pSysfsAccess->setVal(engineDir + "/" + engineName + "/" + defaultHeartbeatIntervalMilliSecs, defaultHeartbeatMilliSecs); pSysfsAccess->setVal(engineDir + "/" + engineName + "/" + preemptTimeoutMilliSecs, timeoutMilliSecs); pSysfsAccess->setVal(engineDir + "/" + engineName + "/" + timesliceDurationMilliSecs, timesliceMilliSecs); pSysfsAccess->setVal(engineDir + "/" + engineName + "/" + heartbeatIntervalMilliSecs, heartbeatMilliSecs); }); ON_CALL(*pSysfsAccess.get(), read(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getVal)); ON_CALL(*pSysfsAccess.get(), write(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::setVal)); ON_CALL(*pSysfsAccess.get(), scanDirEntries(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getscanDirEntries)); // delete handles created in initial SysmanDeviceHandleContext::init() call for (auto handle : pSysmanDeviceImp->pSchedulerHandleContext->handleList) { delete handle; } pSysmanDeviceImp->pSchedulerHandleContext->handleList.clear(); uint32_t subDeviceCount = 0; // We received a device handle. Check for subdevices in this device Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, nullptr); if (subDeviceCount == 0) { deviceHandles.resize(1, device->toHandle()); } else { deviceHandles.resize(subDeviceCount, nullptr); Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, deviceHandles.data()); } pSysmanDeviceImp->pSchedulerHandleContext->init(deviceHandles); } void TearDown() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } pSysfsAccess->cleanUpMap(); SysmanDeviceFixture::TearDown(); pLinuxSysmanImp->pSysfsAccess = pSysfsAccessOld; } std::vector get_sched_handles(uint32_t count) { std::vector handles(count, nullptr); EXPECT_EQ(zesDeviceEnumSchedulers(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS); return handles; } zes_sched_mode_t fixtureGetCurrentMode(zes_sched_handle_t hScheduler) { zes_sched_mode_t mode; ze_result_t result = zesSchedulerGetCurrentMode(hScheduler, &mode); EXPECT_EQ(ZE_RESULT_SUCCESS, result); return mode; } zes_sched_timeout_properties_t fixtureGetTimeoutModeProperties(zes_sched_handle_t hScheduler, ze_bool_t getDefaults) { zes_sched_timeout_properties_t config; ze_result_t result = zesSchedulerGetTimeoutModeProperties(hScheduler, getDefaults, &config); EXPECT_EQ(ZE_RESULT_SUCCESS, result); return config; } zes_sched_timeslice_properties_t fixtureGetTimesliceModeProperties(zes_sched_handle_t hScheduler, ze_bool_t getDefaults) { zes_sched_timeslice_properties_t config; ze_result_t result = zesSchedulerGetTimesliceModeProperties(hScheduler, getDefaults, &config); EXPECT_EQ(ZE_RESULT_SUCCESS, result); return config; } }; TEST_F(SysmanDeviceSchedulerFixture, GivenComponentCountZeroWhenCallingzesDeviceEnumSchedulersAndSysfsCanReadReturnsErrorThenZeroCountIsReturned) { ON_CALL(*pSysfsAccess.get(), scanDirEntries(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getscanDirEntriesStatusReturnError)); auto pSchedulerHandleContextTest = std::make_unique(pOsSysman); pSchedulerHandleContextTest->init(deviceHandles); EXPECT_EQ(0u, static_cast(pSchedulerHandleContextTest->handleList.size())); } TEST_F(SysmanDeviceSchedulerFixture, GivenComponentCountZeroWhenCallingzesDeviceEnumSchedulersAndSysfsCanReadReturnsIncorrectPermissionThenZeroCountIsReturned) { pSysfsAccess->setEngineDirectoryPermission(0); auto pSchedulerHandleContextTest = std::make_unique(pOsSysman); pSchedulerHandleContextTest->init(deviceHandles); EXPECT_EQ(0u, static_cast(pSchedulerHandleContextTest->handleList.size())); } TEST_F(SysmanDeviceSchedulerFixture, GivenComponentCountZeroWhenCallingzesDeviceEnumSchedulersThenNonZeroCountIsReturnedAndVerifyCallSucceeds) { uint32_t count = 0; EXPECT_EQ(ZE_RESULT_SUCCESS, zesDeviceEnumSchedulers(device->toHandle(), &count, NULL)); EXPECT_EQ(count, handleComponentCount); uint32_t testcount = count + 1; EXPECT_EQ(ZE_RESULT_SUCCESS, zesDeviceEnumSchedulers(device->toHandle(), &testcount, NULL)); EXPECT_EQ(testcount, count); count = 0; std::vector handles(count, nullptr); EXPECT_EQ(zesDeviceEnumSchedulers(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS); EXPECT_EQ(count, handleComponentCount); } TEST_F(SysmanDeviceSchedulerFixture, GivenValidDeviceHandleWhenCallingzesSchedulerGetCurrentModeThenVerifyzesSchedulerGetCurrentModeCallSucceeds) { auto handles = get_sched_handles(handleComponentCount); for (auto handle : handles) { auto mode = fixtureGetCurrentMode(handle); EXPECT_EQ(mode, ZES_SCHED_MODE_TIMESLICE); } } TEST_F(SysmanDeviceSchedulerFixture, GivenValidDeviceHandleWhenCallingzesSchedulerGetTimeoutModePropertiesThenVerifyzesSchedulerGetTimeoutModePropertiesCallSucceeds) { auto handles = get_sched_handles(handleComponentCount); for (auto handle : handles) { auto config = fixtureGetTimeoutModeProperties(handle, false); EXPECT_EQ(config.watchdogTimeout, expectedHeartbeatTimeoutMicroSecs); } } TEST_F(SysmanDeviceSchedulerFixture, GivenValidDeviceHandleWhenCallingzesSchedulerGetTimeoutModePropertiesThenVerifyzesSchedulerGetTimeoutModePropertiesForDifferingValues) { auto handles = get_sched_handles(handleComponentCount); pSysfsAccess->setVal(engineDir + "/" + "vcs1" + "/" + heartbeatIntervalMilliSecs, (heartbeatMilliSecs + 5)); for (auto handle : handles) { zes_sched_properties_t properties = {}; zes_sched_timeout_properties_t config; ze_result_t result = zesSchedulerGetProperties(handle, &properties); EXPECT_EQ(ZE_RESULT_SUCCESS, result); if (properties.engines == ZES_ENGINE_TYPE_FLAG_MEDIA) { ze_result_t result = zesSchedulerGetTimeoutModeProperties(handle, false, &config); EXPECT_NE(ZE_RESULT_SUCCESS, result); } } } TEST_F(SysmanDeviceSchedulerFixture, GivenValidDeviceHandleWhenCallingzesSchedulerGetTimeoutModePropertiesWithDefaultsThenVerifyzesSchedulerGetTimeoutModePropertiesCallSucceeds) { auto handles = get_sched_handles(handleComponentCount); for (auto handle : handles) { auto config = fixtureGetTimeoutModeProperties(handle, true); EXPECT_EQ(config.watchdogTimeout, expectedDefaultHeartbeatTimeoutMicroSecs); } } TEST_F(SysmanDeviceSchedulerFixture, GivenValidDeviceHandleWhenCallingzesSchedulerGetTimesliceModePropertiesThenVerifyzesSchedulerGetTimesliceModePropertiesCallSucceeds) { auto handles = get_sched_handles(handleComponentCount); for (auto handle : handles) { auto config = fixtureGetTimesliceModeProperties(handle, false); EXPECT_EQ(config.interval, expectedTimesliceMicroSecs); EXPECT_EQ(config.yieldTimeout, expectedTimeoutMicroSecs); } } TEST_F(SysmanDeviceSchedulerFixture, GivenValidDeviceHandleWhenCallingzesSchedulerGetTimescliceModePropertiesThenVerifyzesSchedulerGetTimescliceModePropertiesForDifferingPreemptTimeoutValues) { auto handles = get_sched_handles(handleComponentCount); pSysfsAccess->setVal(engineDir + "/" + "vcs1" + "/" + preemptTimeoutMilliSecs, (timeoutMilliSecs + 5)); for (auto handle : handles) { zes_sched_properties_t properties = {}; zes_sched_timeslice_properties_t config; ze_result_t result = zesSchedulerGetProperties(handle, &properties); EXPECT_EQ(ZE_RESULT_SUCCESS, result); if (properties.engines == ZES_ENGINE_TYPE_FLAG_MEDIA) { ze_result_t result = zesSchedulerGetTimesliceModeProperties(handle, false, &config); EXPECT_NE(ZE_RESULT_SUCCESS, result); } } } TEST_F(SysmanDeviceSchedulerFixture, GivenValidDeviceHandleWhenCallingzesSchedulerGetTimescliceModePropertiesThenVerifyzesSchedulerGetTimescliceModePropertiesForDifferingTimesliceDurationValues) { auto handles = get_sched_handles(handleComponentCount); pSysfsAccess->setVal(engineDir + "/" + "vcs1" + "/" + timesliceDurationMilliSecs, (timesliceMilliSecs + 5)); for (auto handle : handles) { zes_sched_properties_t properties = {}; zes_sched_timeslice_properties_t config; ze_result_t result = zesSchedulerGetProperties(handle, &properties); EXPECT_EQ(ZE_RESULT_SUCCESS, result); if (properties.engines == ZES_ENGINE_TYPE_FLAG_MEDIA) { ze_result_t result = zesSchedulerGetTimesliceModeProperties(handle, false, &config); EXPECT_NE(ZE_RESULT_SUCCESS, result); } } } TEST_F(SysmanDeviceSchedulerFixture, GivenValidDeviceHandleWhenCallingzesSchedulerGetTimeoutModePropertiesThenVerifyzesSchedulerGetTimeoutModePropertiesForReadFileFailureFileUnavailable) { auto handles = get_sched_handles(handleComponentCount); for (auto handle : handles) { zes_sched_properties_t properties = {}; zes_sched_timeout_properties_t config; ze_result_t result = zesSchedulerGetProperties(handle, &properties); EXPECT_EQ(ZE_RESULT_SUCCESS, result); if (properties.engines == ZES_ENGINE_TYPE_FLAG_MEDIA) { EXPECT_CALL(*pSysfsAccess.get(), read(_, _)) .WillOnce(Return(ZE_RESULT_ERROR_NOT_AVAILABLE)) .WillRepeatedly(DoDefault()); ze_result_t result = zesSchedulerGetTimeoutModeProperties(handle, false, &config); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, result); } } } TEST_F(SysmanDeviceSchedulerFixture, GivenValidDeviceHandleWhenCallingzesSchedulerGetTimeoutModePropertiesThenVerifyzesSchedulerGetTimeoutModePropertiesForReadFileFailureInsufficientPermissions) { auto handles = get_sched_handles(handleComponentCount); for (auto handle : handles) { zes_sched_properties_t properties = {}; zes_sched_timeout_properties_t config; ze_result_t result = zesSchedulerGetProperties(handle, &properties); EXPECT_EQ(ZE_RESULT_SUCCESS, result); if (properties.engines == ZES_ENGINE_TYPE_FLAG_MEDIA) { EXPECT_CALL(*pSysfsAccess.get(), read(_, _)) .WillOnce(Return(ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS)) .WillRepeatedly(DoDefault()); ze_result_t result = zesSchedulerGetTimeoutModeProperties(handle, false, &config); EXPECT_EQ(ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS, result); } } } TEST_F(SysmanDeviceSchedulerFixture, GivenValidDeviceHandleWhenCallingzesSchedulerGetTimescliceModePropertiesThenVerifyzesSchedulerGetTimescliceModePropertiesForReadFileFailureDueToUnavailable) { auto handles = get_sched_handles(handleComponentCount); for (auto handle : handles) { zes_sched_timeslice_properties_t config; zes_sched_properties_t properties = {}; ze_result_t result = zesSchedulerGetProperties(handle, &properties); EXPECT_EQ(ZE_RESULT_SUCCESS, result); if (properties.engines == ZES_ENGINE_TYPE_FLAG_MEDIA) { EXPECT_CALL(*pSysfsAccess.get(), read(_, _)) .WillOnce(Return(ZE_RESULT_SUCCESS)) .WillOnce(Return(ZE_RESULT_SUCCESS)) .WillOnce(Return(ZE_RESULT_SUCCESS)) //3 reads to satisfy Prempt timeout reads .WillOnce(Return(ZE_RESULT_ERROR_NOT_AVAILABLE)) // failure in timesclice read. .WillRepeatedly(DoDefault()); result = zesSchedulerGetTimesliceModeProperties(handle, false, &config); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, result); } } } TEST_F(SysmanDeviceSchedulerFixture, GivenValidDeviceHandleWhenCallingzesSchedulerGetTimescliceModePropertiesThenVerifyzesSchedulerGetTimescliceModePropertiesForReadFileFailureDueToInsufficientPermissions) { auto handles = get_sched_handles(handleComponentCount); for (auto handle : handles) { zes_sched_timeslice_properties_t config; zes_sched_properties_t properties = {}; ze_result_t result = zesSchedulerGetProperties(handle, &properties); EXPECT_EQ(ZE_RESULT_SUCCESS, result); if (properties.engines == ZES_ENGINE_TYPE_FLAG_MEDIA) { EXPECT_CALL(*pSysfsAccess.get(), read(_, _)) .WillOnce(Return(ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS)) .WillRepeatedly(DoDefault()); result = zesSchedulerGetTimesliceModeProperties(handle, false, &config); EXPECT_EQ(ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS, result); } } } TEST_F(SysmanDeviceSchedulerFixture, GivenValidDeviceHandleWhenCallingzesSchedulerGetTimesliceModePropertiesWithDefaultsThenVerifyzesSchedulerGetTimesliceModePropertiesCallSucceeds) { auto handles = get_sched_handles(handleComponentCount); for (auto handle : handles) { auto config = fixtureGetTimesliceModeProperties(handle, true); EXPECT_EQ(config.interval, expectedDefaultTimesliceMicroSecs); EXPECT_EQ(config.yieldTimeout, expectedDefaultTimeoutMicroSecs); } } TEST_F(SysmanDeviceSchedulerFixture, GivenValidDeviceHandleWhenCallingzesSchedulerSetTimeoutModeThenVerifyzesSchedulerSetTimeoutModeCallSucceeds) { auto handles = get_sched_handles(handleComponentCount); for (auto handle : handles) { ze_bool_t needReboot; zes_sched_timeout_properties_t setConfig; setConfig.watchdogTimeout = 10000u; ze_result_t result = zesSchedulerSetTimeoutMode(handle, &setConfig, &needReboot); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_FALSE(needReboot); auto getConfig = fixtureGetTimeoutModeProperties(handle, false); EXPECT_EQ(getConfig.watchdogTimeout, setConfig.watchdogTimeout); auto mode = fixtureGetCurrentMode(handle); EXPECT_EQ(mode, ZES_SCHED_MODE_TIMEOUT); } } TEST_F(SysmanDeviceSchedulerFixture, GivenValidDeviceHandleWhenCallingzesSchedulerSetTimeoutModeWhenTimeoutLessThanMinimumThenVerifyzesSchedulerSetTimeoutModeCallFails) { auto handles = get_sched_handles(handleComponentCount); for (auto handle : handles) { ze_bool_t needReboot; zes_sched_timeout_properties_t setConfig; setConfig.watchdogTimeout = minTimeoutModeHeartbeat - 1; ze_result_t result = zesSchedulerSetTimeoutMode(handle, &setConfig, &needReboot); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); } } TEST_F(SysmanDeviceSchedulerFixture, GivenValidDeviceHandleWhenCallingzesSchedulerSetTimeoutModeWhenCurrentModeIsTimeoutModeThenVerifyzesSchedulerSetTimeoutModeCallSucceeds) { auto handles = get_sched_handles(handleComponentCount); for (auto handle : handles) { ze_bool_t needReboot; zes_sched_timeout_properties_t setTimeOutConfig; setTimeOutConfig.watchdogTimeout = 10000u; ze_result_t result = zesSchedulerSetTimeoutMode(handle, &setTimeOutConfig, &needReboot); EXPECT_EQ(ZE_RESULT_SUCCESS, result); result = zesSchedulerSetTimeoutMode(handle, &setTimeOutConfig, &needReboot); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } } TEST_F(SysmanDeviceSchedulerFixture, GivenValidDeviceHandleWhenCallingzesSchedulerSetTimeoutModeWhenHeartBeatSettingFailsThenVerifyzesSchedulerSetTimeoutModeCallFails) { for_each(listOfMockedEngines.begin(), listOfMockedEngines.end(), [=](std::string engineName) { pSysfsAccess->setFileProperties(engineName, heartbeatIntervalMilliSecs, false, S_IRUSR | S_IRGRP | S_IROTH | S_IWUSR); }); auto handles = get_sched_handles(handleComponentCount); for (auto handle : handles) { ze_bool_t needReboot; zes_sched_timeout_properties_t setTimeOutConfig; setTimeOutConfig.watchdogTimeout = 10000u; ze_result_t result = zesSchedulerSetTimeoutMode(handle, &setTimeOutConfig, &needReboot); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, result); } } TEST_F(SysmanDeviceSchedulerFixture, GivenValidDeviceHandleWhenCallingzesSchedulerSetTimeoutModeWhenGetCurrentModeFailsThenVerifyzesSchedulerSetTimeoutModeCallFails) { for_each(listOfMockedEngines.begin(), listOfMockedEngines.end(), [=](std::string engineName) { pSysfsAccess->setFileProperties(engineName, preemptTimeoutMilliSecs, false, S_IRUSR | S_IRGRP | S_IROTH | S_IWUSR); }); auto handles = get_sched_handles(handleComponentCount); for (auto handle : handles) { ze_bool_t needReboot; zes_sched_timeout_properties_t setConfig; setConfig.watchdogTimeout = minTimeoutModeHeartbeat; ze_result_t result = zesSchedulerSetTimeoutMode(handle, &setConfig, &needReboot); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, result); } } TEST_F(SysmanDeviceSchedulerFixture, GivenValidDeviceHandleWhenCallingzesSchedulerSetTimeoutModeWhenPreEmptTimeoutNoPermissionThenVerifyzesSchedulerSetTimeoutModeCallFails) { for_each(listOfMockedEngines.begin(), listOfMockedEngines.end(), [=](std::string engineName) { pSysfsAccess->setFileProperties(engineName, preemptTimeoutMilliSecs, true, S_IRUSR | S_IRGRP | S_IROTH); }); auto handles = get_sched_handles(handleComponentCount); for (auto handle : handles) { ze_bool_t needReboot; zes_sched_timeout_properties_t setConfig; setConfig.watchdogTimeout = minTimeoutModeHeartbeat; ze_result_t result = zesSchedulerSetTimeoutMode(handle, &setConfig, &needReboot); EXPECT_EQ(ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS, result); } } TEST_F(SysmanDeviceSchedulerFixture, GivenValidDeviceHandleWhenCallingzesSchedulerSetTimeoutModeWhenTimeSliceDurationNoPermissionThenVerifyzesSchedulerSetTimeoutModeCallFails) { for_each(listOfMockedEngines.begin(), listOfMockedEngines.end(), [=](std::string engineName) { pSysfsAccess->setFileProperties(engineName, timesliceDurationMilliSecs, true, S_IRUSR | S_IRGRP | S_IROTH); }); auto handles = get_sched_handles(handleComponentCount); for (auto handle : handles) { ze_bool_t needReboot; zes_sched_timeout_properties_t setConfig; setConfig.watchdogTimeout = minTimeoutModeHeartbeat; ze_result_t result = zesSchedulerSetTimeoutMode(handle, &setConfig, &needReboot); EXPECT_EQ(ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS, result); } } TEST_F(SysmanDeviceSchedulerFixture, GivenValidDeviceHandleWhenCallingzesSchedulerSetTimesliceModeThenVerifyzesSchedulerSetTimesliceModeCallSucceeds) { auto handles = get_sched_handles(handleComponentCount); for (auto handle : handles) { ze_bool_t needReboot; zes_sched_timeslice_properties_t setConfig; setConfig.interval = 1000u; setConfig.yieldTimeout = 1000u; ze_result_t result = zesSchedulerSetTimesliceMode(handle, &setConfig, &needReboot); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_FALSE(needReboot); auto getConfig = fixtureGetTimesliceModeProperties(handle, false); EXPECT_EQ(getConfig.interval, setConfig.interval); EXPECT_EQ(getConfig.yieldTimeout, setConfig.yieldTimeout); auto mode = fixtureGetCurrentMode(handle); EXPECT_EQ(mode, ZES_SCHED_MODE_TIMESLICE); } } TEST_F(SysmanDeviceSchedulerFixture, GivenValidDeviceHandleWhenCallingzesSchedulerSetTimesliceModeWhenIntervalIsLessThanMinimumThenVerifyzesSchedulerSetTimesliceModeCallFails) { auto handles = get_sched_handles(handleComponentCount); for (auto handle : handles) { ze_bool_t needReboot; zes_sched_timeslice_properties_t setConfig; setConfig.interval = minTimeoutInMicroSeconds - 1; setConfig.yieldTimeout = 1000u; ze_result_t result = zesSchedulerSetTimesliceMode(handle, &setConfig, &needReboot); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); } } TEST_F(SysmanDeviceSchedulerFixture, GivenValidDeviceHandleWhenCallingzesSchedulerSetTimesliceModeWhenNoAccessToTimeSliceDurationThenVerifyzesSchedulerSetTimesliceModeCallFails) { for_each(listOfMockedEngines.begin(), listOfMockedEngines.end(), [=](std::string engineName) { pSysfsAccess->setFileProperties(engineName, timesliceDurationMilliSecs, true, S_IRUSR | S_IRGRP | S_IROTH); }); auto handles = get_sched_handles(handleComponentCount); for (auto handle : handles) { ze_bool_t needReboot; zes_sched_timeslice_properties_t setConfig; setConfig.interval = minTimeoutInMicroSeconds; setConfig.yieldTimeout = 1000u; ze_result_t result = zesSchedulerSetTimesliceMode(handle, &setConfig, &needReboot); EXPECT_EQ(ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS, result); } } TEST_F(SysmanDeviceSchedulerFixture, GivenValidDeviceHandleWhenCallingzesSchedulerSetTimesliceModeWhenNoAccessToHeartBeatIntervalThenVerifyzesSchedulerSetTimesliceModeCallFails) { for_each(listOfMockedEngines.begin(), listOfMockedEngines.end(), [=](std::string engineName) { pSysfsAccess->setFileProperties(engineName, heartbeatIntervalMilliSecs, true, S_IRUSR | S_IRGRP | S_IROTH); }); auto handles = get_sched_handles(handleComponentCount); for (auto handle : handles) { ze_bool_t needReboot; zes_sched_timeslice_properties_t setConfig; setConfig.interval = minTimeoutInMicroSeconds; setConfig.yieldTimeout = 1000u; ze_result_t result = zesSchedulerSetTimesliceMode(handle, &setConfig, &needReboot); EXPECT_EQ(ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS, result); } } TEST_F(SysmanDeviceSchedulerFixture, GivenValidDeviceHandleWhenCallingzesSchedulerSetExclusiveModeThenVerifyzesSchedulerSetExclusiveModeCallSucceeds) { auto handles = get_sched_handles(handleComponentCount); for (auto handle : handles) { ze_bool_t needReboot; ze_result_t result = zesSchedulerSetExclusiveMode(handle, &needReboot); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_FALSE(needReboot); auto mode = fixtureGetCurrentMode(handle); EXPECT_EQ(mode, ZES_SCHED_MODE_EXCLUSIVE); } } TEST_F(SysmanDeviceSchedulerFixture, GivenValidDeviceHandleWhenCallingzesSchedulerSetExclusiveModeWhenPreEmptTimeoutNotAvailableThenVerifyzesSchedulerSetExclusiveModeCallFails) { for_each(listOfMockedEngines.begin(), listOfMockedEngines.end(), [=](std::string engineName) { pSysfsAccess->setFileProperties(engineName, preemptTimeoutMilliSecs, false, S_IRUSR | S_IRGRP | S_IROTH | S_IWUSR); }); auto handles = get_sched_handles(handleComponentCount); for (auto handle : handles) { ze_bool_t needReboot; ze_result_t result = zesSchedulerSetExclusiveMode(handle, &needReboot); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, result); } } TEST_F(SysmanDeviceSchedulerFixture, GivenValidDeviceHandleWhenCallingzesSchedulerSetExclusiveModeWhenTimeSliceDurationNotAvailableThenVerifyzesSchedulerSetExclusiveModeCallFails) { for_each(listOfMockedEngines.begin(), listOfMockedEngines.end(), [=](std::string engineName) { pSysfsAccess->setFileProperties(engineName, timesliceDurationMilliSecs, false, S_IRUSR | S_IRGRP | S_IROTH | S_IWUSR); }); auto handles = get_sched_handles(handleComponentCount); for (auto handle : handles) { ze_bool_t needReboot; ze_result_t result = zesSchedulerSetExclusiveMode(handle, &needReboot); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, result); } } TEST_F(SysmanDeviceSchedulerFixture, GivenValidDeviceHandleWhenCallingzesSchedulerSetExclusiveModeWhenTimeSliceDurationHasNoPermissionsThenVerifyzesSchedulerSetExclusiveModeCallFails) { for_each(listOfMockedEngines.begin(), listOfMockedEngines.end(), [=](std::string engineName) { pSysfsAccess->setFileProperties(engineName, timesliceDurationMilliSecs, true, S_IRUSR | S_IRGRP | S_IROTH); }); auto handles = get_sched_handles(handleComponentCount); for (auto handle : handles) { ze_bool_t needReboot; ze_result_t result = zesSchedulerSetExclusiveMode(handle, &needReboot); EXPECT_EQ(ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS, result); } } TEST_F(SysmanDeviceSchedulerFixture, GivenValidDeviceHandleWhenCallingzesSchedulerGetCurrentModeWhenPreEmptTimeOutNotAvailableThenFailureIsReturned) { for_each(listOfMockedEngines.begin(), listOfMockedEngines.end(), [=](std::string engineName) { pSysfsAccess->setFileProperties(engineName, preemptTimeoutMilliSecs, false, S_IRUSR | S_IRGRP | S_IROTH | S_IWUSR); }); auto handles = get_sched_handles(handleComponentCount); for (auto handle : handles) { zes_sched_mode_t mode; ze_result_t result = zesSchedulerGetCurrentMode(handle, &mode); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, result); } } TEST_F(SysmanDeviceSchedulerFixture, GivenValidDeviceHandleWhenCallingzesSchedulerGetCurrentModeWhenTimeSliceDurationNotAvailableThenFailureIsReturned) { for_each(listOfMockedEngines.begin(), listOfMockedEngines.end(), [=](std::string engineName) { pSysfsAccess->setFileProperties(engineName, timesliceDurationMilliSecs, false, S_IRUSR | S_IRGRP | S_IROTH | S_IWUSR); }); auto handles = get_sched_handles(handleComponentCount); for (auto handle : handles) { zes_sched_mode_t mode; ze_result_t result = zesSchedulerGetCurrentMode(handle, &mode); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, result); } } TEST_F(SysmanDeviceSchedulerFixture, GivenValidDeviceHandleWhenCallingzesSchedulerGetCurrentModeWhenTimeSliceDurationHasNoPermissionThenFailureIsReturned) { for_each(listOfMockedEngines.begin(), listOfMockedEngines.end(), [=](std::string engineName) { pSysfsAccess->setFileProperties(engineName, timesliceDurationMilliSecs, true, S_IRGRP | S_IROTH | S_IWUSR); }); auto handles = get_sched_handles(handleComponentCount); for (auto handle : handles) { zes_sched_mode_t mode; ze_result_t result = zesSchedulerGetCurrentMode(handle, &mode); EXPECT_EQ(ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS, result); } } TEST_F(SysmanDeviceSchedulerFixture, GivenValidDeviceHandleWhenCallingzesSchedulerSetComputeUnitDebugModeThenUnsupportedFeatureIsReturned) { auto handles = get_sched_handles(handleComponentCount); for (auto handle : handles) { ze_bool_t needReload; ze_result_t result = zesSchedulerSetComputeUnitDebugMode(handle, &needReload); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, result); } } TEST_F(SysmanDeviceSchedulerFixture, GivenValidDeviceHandleWhenCallingzesSchedulerGetTimeoutModePropertiesWithDefaultsWhenSysfsNodeIsAbsentThenFailureIsReturned) { ON_CALL(*pSysfsAccess.get(), read(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getValForError)); auto handles = get_sched_handles(handleComponentCount); for (auto handle : handles) { zes_sched_timeout_properties_t config; ze_result_t result = zesSchedulerGetTimeoutModeProperties(handle, true, &config); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, result); } } TEST_F(SysmanDeviceSchedulerFixture, GivenValidDeviceHandleWhenCallingzesSchedulerGetTimesliceModePropertiesWithDefaultsWhenSysfsNodeIsAbsentThenFailureIsReturned) { ON_CALL(*pSysfsAccess.get(), read(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getValForError)); auto handles = get_sched_handles(handleComponentCount); for (auto handle : handles) { zes_sched_timeslice_properties_t config; ze_result_t result = zesSchedulerGetTimesliceModeProperties(handle, true, &config); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, result); } } TEST_F(SysmanDeviceSchedulerFixture, GivenValidDeviceHandleWhenCallingzesSchedulerSetTimesliceModeWhenSysfsNodeIsAbsentThenFailureIsReturned) { ON_CALL(*pSysfsAccess.get(), write(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getValForErrorWhileWrite)); auto handles = get_sched_handles(handleComponentCount); for (auto handle : handles) { ze_bool_t needReboot; zes_sched_timeslice_properties_t setConfig; setConfig.interval = 1000u; setConfig.yieldTimeout = 1000u; ze_result_t result = zesSchedulerSetTimesliceMode(handle, &setConfig, &needReboot); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, result); } } TEST_F(SysmanDeviceSchedulerFixture, GivenValidDeviceHandleWhenCallingzesSchedulerSetTimesliceModeWhenSysfsNodeWithoutPermissionsThenFailureIsReturned) { ON_CALL(*pSysfsAccess.get(), write(_, _)) .WillByDefault(::testing::Invoke(pSysfsAccess.get(), &Mock::getValForErrorWhileWrite)); auto handles = get_sched_handles(handleComponentCount); for (auto handle : handles) { ze_bool_t needReboot; zes_sched_timeslice_properties_t setConfig; setConfig.interval = 1000u; setConfig.yieldTimeout = 1000u; EXPECT_CALL(*pSysfsAccess.get(), write(_, _)) .WillOnce(Return(ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS)) .WillRepeatedly(DoDefault()); ze_result_t result = zesSchedulerSetTimesliceMode(handle, &setConfig, &needReboot); EXPECT_EQ(ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS, result); } } TEST_F(SysmanDeviceSchedulerFixture, GivenValidDeviceHandleWhenCallingzesSchedulerGetPropertiesThenVerifyzesSchedulerGetPropertiesCallSucceeds) { auto handles = get_sched_handles(handleComponentCount); for (auto handle : handles) { zes_sched_properties_t properties = {}; ze_result_t result = zesSchedulerGetProperties(handle, &properties); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_TRUE(properties.canControl); EXPECT_LE(properties.engines, ZES_ENGINE_TYPE_FLAG_RENDER); EXPECT_EQ(properties.supportedModes, static_cast((1 << ZES_SCHED_MODE_TIMEOUT) | (1 << ZES_SCHED_MODE_TIMESLICE) | (1 << ZES_SCHED_MODE_EXCLUSIVE))); } } TEST_F(SysmanDeviceSchedulerFixture, GivenValidObjectsOfClassSchedulerImpAndSchedulerHandleContextThenDuringObjectReleaseCheckDestructorBranches) { for (auto &handle : pSysmanDeviceImp->pSchedulerHandleContext->handleList) { auto pSchedulerImp = static_cast(handle); delete pSchedulerImp->pOsScheduler; pSchedulerImp->pOsScheduler = nullptr; delete handle; handle = nullptr; } } TEST_F(SysmanMultiDeviceFixture, GivenValidDevicePointerWhenGettingSchedPropertiesThenValidSchedPropertiesRetrieved) { zes_sched_properties_t properties = {}; std::vector listOfEngines; ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; Device::fromHandle(device)->getProperties(&deviceProperties); LinuxSchedulerImp *pLinuxSchedulerImp = new LinuxSchedulerImp(pOsSysman, ZES_ENGINE_TYPE_FLAG_RENDER, listOfEngines, deviceProperties.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE, deviceProperties.subdeviceId); EXPECT_EQ(ZE_RESULT_SUCCESS, pLinuxSchedulerImp->getProperties(properties)); EXPECT_EQ(properties.subdeviceId, deviceProperties.subdeviceId); EXPECT_EQ(properties.onSubdevice, deviceProperties.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE); delete pLinuxSchedulerImp; } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/standby/000077500000000000000000000000001422164147700305015ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/standby/CMakeLists.txt000066400000000000000000000003051422164147700332370ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) add_subdirectories() compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/standby/linux/000077500000000000000000000000001422164147700316405ustar00rootroot00000000000000CMakeLists.txt000066400000000000000000000005501422164147700343210ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/standby/linux# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(UNIX) target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/mock_sysfs_standby.h ${CMAKE_CURRENT_SOURCE_DIR}/test_zes_sysman_standby.cpp ) endif() mock_sysfs_standby.h000066400000000000000000000061771422164147700356510ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/standby/linux/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/common/test_macros/mock_method_macros.h" #include "level_zero/tools/source/sysman/standby/linux/os_standby_imp.h" namespace L0 { namespace ult { const std::string standbyModeFile("gt/gt0/rc6_enable"); const std::string standbyModeFile1("gt/gt1/rc6_enable"); const std::string standbyModeFileLegacy("power/rc6_enable"); class StandbySysfsAccess : public SysfsAccess {}; template <> struct Mock : public StandbySysfsAccess { int mockStandbyMode = -1; bool isStandbyModeFileAvailable = true; ::mode_t mockStandbyFileMode = S_IRUSR | S_IRGRP | S_IROTH | S_IWUSR; MOCK_METHOD(ze_result_t, read, (const std::string file, int &val), (override)); MOCK_METHOD(ze_result_t, write, (const std::string file, int val), (override)); MOCK_METHOD(ze_result_t, canRead, (const std::string file), (override)); ADDMETHOD_NOBASE(directoryExists, bool, true, (const std::string path)); ze_result_t getCanReadStatus(const std::string file) { if (isFileAccessible(file) == true) { return ZE_RESULT_SUCCESS; } return ZE_RESULT_ERROR_UNKNOWN; } ze_result_t getVal(const std::string file, int &val) { if ((isFileAccessible(file) == true) && (mockStandbyFileMode & S_IRUSR) != 0) { val = mockStandbyMode; return ZE_RESULT_SUCCESS; } if (isStandbyModeFileAvailable == false) { return ZE_RESULT_ERROR_NOT_AVAILABLE; } if ((mockStandbyFileMode & S_IRUSR) == 0) { return ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS; } return ZE_RESULT_ERROR_UNKNOWN; } ze_result_t setVal(const std::string file, const int val) { if ((isFileAccessible(file) == true) && (mockStandbyFileMode & S_IWUSR) != 0) { mockStandbyMode = val; return ZE_RESULT_SUCCESS; } if (isFileAccessible(file) == false) { return ZE_RESULT_ERROR_NOT_AVAILABLE; } if ((mockStandbyFileMode & S_IWUSR) == 0) { return ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS; } return ZE_RESULT_ERROR_UNKNOWN; } ze_result_t setValReturnError(const std::string file, const int val) { if (isFileAccessible(file) == true) { return ZE_RESULT_ERROR_NOT_AVAILABLE; } return ZE_RESULT_ERROR_NOT_AVAILABLE; } Mock() = default; ~Mock() override = default; private: bool isFileAccessible(const std::string file) { if (((file.compare(standbyModeFile) == 0) || (file.compare(standbyModeFile1) == 0) || (file.compare(standbyModeFileLegacy) == 0)) && (isStandbyModeFileAvailable == true)) { return true; } return false; } }; class PublicLinuxStandbyImp : public L0::LinuxStandbyImp { public: PublicLinuxStandbyImp(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId) : LinuxStandbyImp(pOsSysman, onSubdevice, subdeviceId) {} using LinuxStandbyImp::pSysfsAccess; }; } // namespace ult } // namespace L0 test_zes_sysman_standby.cpp000066400000000000000000000410651422164147700372510ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/standby/linux/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/test/unit_tests/sources/sysman/linux/mock_sysman_fixture.h" #include "mock_sysfs_standby.h" extern bool sysmanUltsEnable; using ::testing::_; using ::testing::Matcher; namespace L0 { namespace ult { constexpr int standbyModeDefault = 1; constexpr int standbyModeNever = 0; constexpr int standbyModeInvalid = 0xff; constexpr uint32_t mockHandleCount = 1u; uint32_t mockSubDeviceHandleCount = 0u; class ZesStandbyFixture : public SysmanDeviceFixture { protected: std::unique_ptr> ptestSysfsAccess; zes_standby_handle_t hSysmanStandby = {}; SysfsAccess *pOriginalSysfsAccess = nullptr; std::vector deviceHandles; void SetUp() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanDeviceFixture::SetUp(); ptestSysfsAccess = std::make_unique>>(); pOriginalSysfsAccess = pLinuxSysmanImp->pSysfsAccess; pLinuxSysmanImp->pSysfsAccess = ptestSysfsAccess.get(); ptestSysfsAccess->setVal(standbyModeFile, standbyModeDefault); ON_CALL(*ptestSysfsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(ptestSysfsAccess.get(), &Mock::getVal)); ON_CALL(*ptestSysfsAccess.get(), write(_, Matcher(_))) .WillByDefault(::testing::Invoke(ptestSysfsAccess.get(), &Mock::setVal)); ON_CALL(*ptestSysfsAccess.get(), canRead(_)) .WillByDefault(::testing::Invoke(ptestSysfsAccess.get(), &Mock::getCanReadStatus)); for (const auto &handle : pSysmanDeviceImp->pStandbyHandleContext->handleList) { delete handle; } pSysmanDeviceImp->pStandbyHandleContext->handleList.clear(); uint32_t subDeviceCount = 0; // We received a device handle. Check for subdevices in this device Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, nullptr); if (subDeviceCount == 0) { deviceHandles.resize(1, device->toHandle()); } else { deviceHandles.resize(subDeviceCount, nullptr); Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, deviceHandles.data()); } pSysmanDeviceImp->pStandbyHandleContext->init(deviceHandles); } void TearDown() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } pLinuxSysmanImp->pSysfsAccess = pOriginalSysfsAccess; SysmanDeviceFixture::TearDown(); } std::vector get_standby_handles(uint32_t count) { std::vector handles(count, nullptr); EXPECT_EQ(zesDeviceEnumStandbyDomains(device, &count, handles.data()), ZE_RESULT_SUCCESS); return handles; } }; TEST_F(ZesStandbyFixture, GivenComponentCountZeroWhenCallingzesStandbyGetThenNonZeroCountIsReturnedAndVerifyzesStandbyGetCallSucceeds) { std::vector standbyHandle = {}; uint32_t count = 0; ze_result_t result = zesDeviceEnumStandbyDomains(device, &count, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(count, mockHandleCount); uint32_t testCount = count + 1; result = zesDeviceEnumStandbyDomains(device, &testCount, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(testCount, count); standbyHandle.resize(count); result = zesDeviceEnumStandbyDomains(device, &count, standbyHandle.data()); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, standbyHandle.data()); EXPECT_EQ(count, mockHandleCount); StandbyImp *ptestStandbyImp = new StandbyImp(pSysmanDeviceImp->pStandbyHandleContext->pOsSysman, device->toHandle()); count = 0; pSysmanDeviceImp->pStandbyHandleContext->handleList.push_back(ptestStandbyImp); result = zesDeviceEnumStandbyDomains(device, &count, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(count, mockHandleCount + 1); testCount = count - 1; standbyHandle.resize(testCount); result = zesDeviceEnumStandbyDomains(device, &testCount, standbyHandle.data()); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, standbyHandle.data()); EXPECT_EQ(count, mockHandleCount + 1); pSysmanDeviceImp->pStandbyHandleContext->handleList.pop_back(); delete ptestStandbyImp; } TEST_F(ZesStandbyFixture, GivenValidStandbyHandleWhenCallingzesStandbyGetPropertiesThenVerifyzesStandbyGetPropertiesCallSucceeds) { zes_standby_properties_t properties = {}; auto handles = get_standby_handles(mockHandleCount); for (auto hSysmanStandby : handles) { EXPECT_EQ(ZE_RESULT_SUCCESS, zesStandbyGetProperties(hSysmanStandby, &properties)); EXPECT_EQ(nullptr, properties.pNext); EXPECT_EQ(ZES_STANDBY_TYPE_GLOBAL, properties.type); EXPECT_FALSE(properties.onSubdevice); } } TEST_F(ZesStandbyFixture, GivenValidStandbyHandleWhenCallingzesStandbyGetModeThenVerifyzesStandbyGetModeCallSucceedsForDefaultMode) { zes_standby_promo_mode_t mode = {}; auto handles = get_standby_handles(mockHandleCount); for (auto hSysmanStandby : handles) { EXPECT_EQ(ZE_RESULT_SUCCESS, zesStandbyGetMode(hSysmanStandby, &mode)); EXPECT_EQ(ZES_STANDBY_PROMO_MODE_DEFAULT, mode); } } TEST_F(ZesStandbyFixture, GivenValidStandbyHandleWhenCallingzesStandbyGetModeThenVerifyzesStandbyGetModeCallSucceedsForNeverMode) { zes_standby_promo_mode_t mode = {}; ptestSysfsAccess->setVal(standbyModeFile, standbyModeNever); auto handles = get_standby_handles(mockHandleCount); for (auto hSysmanStandby : handles) { EXPECT_EQ(ZE_RESULT_SUCCESS, zesStandbyGetMode(hSysmanStandby, &mode)); EXPECT_EQ(ZES_STANDBY_PROMO_MODE_NEVER, mode); } } TEST_F(ZesStandbyFixture, GivenInvalidStandbyFileWhenReadisCalledThenExpectFailure) { zes_standby_promo_mode_t mode = {}; ON_CALL(*ptestSysfsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(ptestSysfsAccess.get(), &Mock::setValReturnError)); auto handles = get_standby_handles(mockHandleCount); for (auto hSysmanStandby : handles) { EXPECT_NE(ZE_RESULT_SUCCESS, zesStandbyGetMode(hSysmanStandby, &mode)); } } TEST_F(ZesStandbyFixture, GivenValidStandbyHandleWhenCallingzesStandbyGetModeThenVerifyzesStandbyGetModeCallFailsForInvalidMode) { zes_standby_promo_mode_t mode = {}; ptestSysfsAccess->setVal(standbyModeFile, standbyModeInvalid); auto handles = get_standby_handles(mockHandleCount); for (auto hSysmanStandby : handles) { EXPECT_EQ(ZE_RESULT_ERROR_UNKNOWN, zesStandbyGetMode(hSysmanStandby, &mode)); } } TEST_F(ZesStandbyFixture, GivenValidStandbyHandleWhenCallingzesStandbyGetModeOnUnavailableFileThenVerifyzesStandbyGetModeCallFailsForUnsupportedFeature) { zes_standby_promo_mode_t mode = {}; ptestSysfsAccess->setVal(standbyModeFile, standbyModeInvalid); auto handles = get_standby_handles(mockHandleCount); ptestSysfsAccess->isStandbyModeFileAvailable = false; for (auto hSysmanStandby : handles) { EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesStandbyGetMode(hSysmanStandby, &mode)); } } TEST_F(ZesStandbyFixture, GivenValidStandbyHandleWhenCallingzesStandbyGetModeWithInsufficientPermissionsThenVerifyzesStandbyGetModeCallFailsForInsufficientPermissions) { zes_standby_promo_mode_t mode = {}; ptestSysfsAccess->setVal(standbyModeFile, standbyModeInvalid); auto handles = get_standby_handles(mockHandleCount); ptestSysfsAccess->mockStandbyFileMode &= ~S_IRUSR; for (auto hSysmanStandby : handles) { EXPECT_EQ(ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS, zesStandbyGetMode(hSysmanStandby, &mode)); } } TEST_F(ZesStandbyFixture, GivenValidStandbyHandleWhenCallingzesStandbySetModeThenwithUnwritableFileVerifySysmanzesySetModeCallFailedWithInsufficientPermissions) { auto handles = get_standby_handles(mockHandleCount); ptestSysfsAccess->mockStandbyFileMode &= ~S_IWUSR; for (auto hSysmanStandby : handles) { EXPECT_EQ(ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS, zesStandbySetMode(hSysmanStandby, ZES_STANDBY_PROMO_MODE_NEVER)); } } TEST_F(ZesStandbyFixture, GivenValidStandbyHandleWhenCallingzesStandbySetModeOnUnavailableFileThenVerifyzesStandbySetModeCallFailsForUnsupportedFeature) { auto handles = get_standby_handles(mockHandleCount); ptestSysfsAccess->isStandbyModeFileAvailable = false; for (auto hSysmanStandby : handles) { EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesStandbySetMode(hSysmanStandby, ZES_STANDBY_PROMO_MODE_NEVER)); } } TEST_F(ZesStandbyFixture, GivenValidStandbyHandleWhenCallingzesStandbySetModeNeverThenVerifySysmanzesySetModeCallSucceeds) { auto handles = get_standby_handles(mockHandleCount); for (auto hSysmanStandby : handles) { zes_standby_promo_mode_t mode; ptestSysfsAccess->setVal(standbyModeFile, standbyModeDefault); EXPECT_EQ(ZE_RESULT_SUCCESS, zesStandbyGetMode(hSysmanStandby, &mode)); EXPECT_EQ(ZES_STANDBY_PROMO_MODE_DEFAULT, mode); EXPECT_EQ(ZE_RESULT_SUCCESS, zesStandbySetMode(hSysmanStandby, ZES_STANDBY_PROMO_MODE_NEVER)); EXPECT_EQ(ZE_RESULT_SUCCESS, zesStandbyGetMode(hSysmanStandby, &mode)); EXPECT_EQ(ZES_STANDBY_PROMO_MODE_NEVER, mode); } } TEST_F(ZesStandbyFixture, GivenValidStandbyHandleWhenCallingzesStandbySetModeDefaultThenVerifySysmanzesySetModeCallSucceeds) { auto handles = get_standby_handles(mockHandleCount); for (auto hSysmanStandby : handles) { zes_standby_promo_mode_t mode; ptestSysfsAccess->setVal(standbyModeFile, standbyModeNever); EXPECT_EQ(ZE_RESULT_SUCCESS, zesStandbyGetMode(hSysmanStandby, &mode)); EXPECT_EQ(ZES_STANDBY_PROMO_MODE_NEVER, mode); EXPECT_EQ(ZE_RESULT_SUCCESS, zesStandbySetMode(hSysmanStandby, ZES_STANDBY_PROMO_MODE_DEFAULT)); EXPECT_EQ(ZE_RESULT_SUCCESS, zesStandbyGetMode(hSysmanStandby, &mode)); EXPECT_EQ(ZES_STANDBY_PROMO_MODE_DEFAULT, mode); } } TEST_F(ZesStandbyFixture, GivenOnSubdeviceNotSetWhenValidatingosStandbyGetPropertiesThenSuccessIsReturned) { zes_standby_properties_t properties = {}; ze_device_properties_t deviceProperties = {}; ze_bool_t isSubDevice = deviceProperties.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE; Device::fromHandle(device)->getProperties(&deviceProperties); PublicLinuxStandbyImp *pLinuxStandbyImp = new PublicLinuxStandbyImp(pOsSysman, isSubDevice, deviceProperties.subdeviceId); EXPECT_EQ(ZE_RESULT_SUCCESS, pLinuxStandbyImp->osStandbyGetProperties(properties)); EXPECT_EQ(properties.subdeviceId, deviceProperties.subdeviceId); EXPECT_EQ(properties.onSubdevice, isSubDevice); delete pLinuxStandbyImp; } TEST_F(ZesStandbyFixture, GivenValidStandbyHandleWhenCallingzesStandbySetModeDefaultWithLegacyPathThenVerifySysmanzesySetModeCallSucceeds) { for (auto handle : pSysmanDeviceImp->pStandbyHandleContext->handleList) { delete handle; } pSysmanDeviceImp->pStandbyHandleContext->handleList.clear(); ptestSysfsAccess->directoryExistsResult = false; pSysmanDeviceImp->pStandbyHandleContext->init(deviceHandles); auto handles = get_standby_handles(mockHandleCount); for (auto hSysmanStandby : handles) { zes_standby_promo_mode_t mode; ptestSysfsAccess->setVal(standbyModeFile, standbyModeNever); EXPECT_EQ(ZE_RESULT_SUCCESS, zesStandbyGetMode(hSysmanStandby, &mode)); EXPECT_EQ(ZES_STANDBY_PROMO_MODE_NEVER, mode); EXPECT_EQ(ZE_RESULT_SUCCESS, zesStandbySetMode(hSysmanStandby, ZES_STANDBY_PROMO_MODE_DEFAULT)); EXPECT_EQ(ZE_RESULT_SUCCESS, zesStandbyGetMode(hSysmanStandby, &mode)); EXPECT_EQ(ZES_STANDBY_PROMO_MODE_DEFAULT, mode); } } TEST_F(ZesStandbyFixture, GivenValidStandbyHandleWhenCallingzesStandbySetModeNeverWithLegacyPathThenVerifySysmanzesySetModeCallSucceeds) { for (auto handle : pSysmanDeviceImp->pStandbyHandleContext->handleList) { delete handle; } pSysmanDeviceImp->pStandbyHandleContext->handleList.clear(); ptestSysfsAccess->directoryExistsResult = false; pSysmanDeviceImp->pStandbyHandleContext->init(deviceHandles); auto handles = get_standby_handles(mockHandleCount); for (auto hSysmanStandby : handles) { zes_standby_promo_mode_t mode; ptestSysfsAccess->setVal(standbyModeFile, standbyModeDefault); EXPECT_EQ(ZE_RESULT_SUCCESS, zesStandbyGetMode(hSysmanStandby, &mode)); EXPECT_EQ(ZES_STANDBY_PROMO_MODE_DEFAULT, mode); EXPECT_EQ(ZE_RESULT_SUCCESS, zesStandbySetMode(hSysmanStandby, ZES_STANDBY_PROMO_MODE_NEVER)); EXPECT_EQ(ZE_RESULT_SUCCESS, zesStandbyGetMode(hSysmanStandby, &mode)); EXPECT_EQ(ZES_STANDBY_PROMO_MODE_NEVER, mode); } } class ZesStandbyMultiDeviceFixture : public SysmanMultiDeviceFixture { std::unique_ptr> ptestSysfsAccess; SysfsAccess *pOriginalSysfsAccess = nullptr; protected: void SetUp() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanMultiDeviceFixture::SetUp(); mockSubDeviceHandleCount = subDeviceCount; ptestSysfsAccess = std::make_unique>>(); pOriginalSysfsAccess = pLinuxSysmanImp->pSysfsAccess; pLinuxSysmanImp->pSysfsAccess = ptestSysfsAccess.get(); ptestSysfsAccess->setVal(standbyModeFile, standbyModeDefault); ON_CALL(*ptestSysfsAccess.get(), read(_, Matcher(_))) .WillByDefault(::testing::Invoke(ptestSysfsAccess.get(), &Mock::getVal)); ON_CALL(*ptestSysfsAccess.get(), canRead(_)) .WillByDefault(::testing::Invoke(ptestSysfsAccess.get(), &Mock::getCanReadStatus)); for (const auto &handle : pSysmanDeviceImp->pStandbyHandleContext->handleList) { delete handle; } pSysmanDeviceImp->pStandbyHandleContext->handleList.clear(); uint32_t subDeviceCount = 0; std::vector deviceHandles; Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, nullptr); if (subDeviceCount == 0) { deviceHandles.resize(1, device->toHandle()); } else { deviceHandles.resize(subDeviceCount, nullptr); Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, deviceHandles.data()); } pSysmanDeviceImp->pStandbyHandleContext->init(deviceHandles); } void TearDown() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } pLinuxSysmanImp->pSysfsAccess = pOriginalSysfsAccess; SysmanMultiDeviceFixture::TearDown(); } std::vector get_standby_handles(uint32_t count) { std::vector handles(count, nullptr); EXPECT_EQ(zesDeviceEnumStandbyDomains(device, &count, handles.data()), ZE_RESULT_SUCCESS); return handles; } }; TEST_F(ZesStandbyMultiDeviceFixture, GivenComponentCountZeroWhenCallingzesStandbyGetThenNonZeroCountIsReturnedAndVerifyzesStandbyGetCallSucceeds) { std::vector standbyHandle = {}; uint32_t count = 0; ze_result_t result = zesDeviceEnumStandbyDomains(device, &count, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(count, mockSubDeviceHandleCount); uint32_t testCount = count + 1; result = zesDeviceEnumStandbyDomains(device, &testCount, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(testCount, count); standbyHandle.resize(count); result = zesDeviceEnumStandbyDomains(device, &count, standbyHandle.data()); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, standbyHandle.data()); EXPECT_EQ(count, mockSubDeviceHandleCount); } TEST_F(ZesStandbyMultiDeviceFixture, GivenOnSubdeviceNotSetWhenValidatingosStandbyGetPropertiesThenSuccessIsReturned) { zes_standby_properties_t properties = {}; ze_device_properties_t deviceProperties = {}; ze_bool_t isSubDevice = deviceProperties.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE; Device::fromHandle(device)->getProperties(&deviceProperties); PublicLinuxStandbyImp *pLinuxStandbyImp = new PublicLinuxStandbyImp(pOsSysman, isSubDevice, deviceProperties.subdeviceId); EXPECT_EQ(ZE_RESULT_SUCCESS, pLinuxStandbyImp->osStandbyGetProperties(properties)); EXPECT_EQ(properties.subdeviceId, deviceProperties.subdeviceId); EXPECT_EQ(properties.onSubdevice, isSubDevice); delete pLinuxStandbyImp; } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/temperature/000077500000000000000000000000001422164147700313725ustar00rootroot00000000000000CMakeLists.txt000066400000000000000000000003041422164147700340500ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/temperature# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) add_subdirectories() compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/temperature/linux/000077500000000000000000000000001422164147700325315ustar00rootroot00000000000000CMakeLists.txt000066400000000000000000000012151422164147700352110ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/temperature/linux# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_TESTS_TOOLS_SYSMAN_TEMPERATURE_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}test_zes_temperature.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_sysfs_temperature.h ) if((NEO_ENABLE_i915_PRELIM_DETECTION) AND ("${BRANCH_TYPE}" STREQUAL "")) list(REMOVE_ITEM L0_TESTS_TOOLS_SYSMAN_TEMPERATURE_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/test_zes_temperature.cpp ) endif() if(UNIX) target_sources(${TARGET_NAME} PRIVATE ${L0_TESTS_TOOLS_SYSMAN_TEMPERATURE_LINUX} ) endif() mock_sysfs_temperature.h000066400000000000000000000131721422164147700374240ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/temperature/linux/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/tools/source/sysman/temperature/linux/os_temperature_imp.h" #include "sysman/linux/pmt/pmt.h" #include "sysman/temperature/temperature_imp.h" namespace L0 { namespace ult { constexpr uint8_t memory0MaxTemperature = 0x12; constexpr uint8_t memory1MaxTemperature = 0x45; constexpr uint8_t memory2MaxTemperature = 0x32; constexpr uint8_t memory3MaxTemperature = 0x36; constexpr uint8_t tempArrForSubDevices[28] = {memory0MaxTemperature, 0, 0, 0, 0, 0, 0, 0, memory1MaxTemperature, 0, 0, 0, 0x6f, 0, 0, 0, 0x34, 0, 0, 0, 0x16, 0, 0, 0, 0x1d, 0, 0, 0}; constexpr uint64_t offsetForSubDevices = 28; constexpr uint16_t memory0MaxTempIndex = 0; constexpr uint16_t memory1MaxTempIndex = 8; constexpr uint16_t memory2MaxTempIndex = 300; constexpr uint16_t memory3MaxTempIndex = 308; constexpr uint8_t subDeviceMaxTempIndex = 16; constexpr uint8_t gtMaxTempIndex = 24; constexpr uint8_t tempArrForNoSubDevices[19] = {0x12, 0x23, 0x43, 0xde, 0xa3, 0xce, 0x23, 0x11, 0x45, 0x32, 0x67, 0x47, 0xac, 0x21, 0x03, 0x90, 0, 0, 0}; constexpr uint64_t offsetForNoSubDevices = 0x60; constexpr uint8_t computeIndexForNoSubDevices = 9; constexpr uint8_t globalIndexForNoSubDevices = 3; const std::string baseTelemSysFS("/sys/class/intel_pmt"); std::string rootPciPathOfGpuDeviceInTemperature = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0"; const std::string realPathTelem1 = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:02.0/0000:8e:00.1/pmt_telemetry.1.auto/intel_pmt/telem1"; const std::string realPathTelem2 = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:02.0/0000:8e:00.1/pmt_telemetry.1.auto/intel_pmt/telem2"; const std::string realPathTelem3 = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:02.0/0000:8e:00.1/pmt_telemetry.1.auto/intel_pmt/telem3"; const std::string realPathTelem4 = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:02.0/0000:8e:00.1/pmt_telemetry.1.auto/intel_pmt/telem4"; const std::string realPathTelem5 = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:02.0/0000:8e:00.1/pmt_telemetry.1.auto/intel_pmt/telem5"; const std::string sysfsPahTelem1 = "/sys/class/intel_pmt/telem1"; const std::string sysfsPahTelem2 = "/sys/class/intel_pmt/telem2"; const std::string sysfsPahTelem3 = "/sys/class/intel_pmt/telem3"; const std::string sysfsPahTelem4 = "/sys/class/intel_pmt/telem4"; const std::string sysfsPahTelem5 = "/sys/class/intel_pmt/telem5"; class TemperaturePmt : public PlatformMonitoringTech { public: TemperaturePmt(FsAccess *pFsAccess, ze_bool_t onSubdevice, uint32_t subdeviceId) : PlatformMonitoringTech(pFsAccess, onSubdevice, subdeviceId) {} using PlatformMonitoringTech::closeFunction; using PlatformMonitoringTech::keyOffsetMap; using PlatformMonitoringTech::openFunction; using PlatformMonitoringTech::preadFunction; using PlatformMonitoringTech::telemetryDeviceEntry; }; template <> struct Mock : public TemperaturePmt { Mock(FsAccess *pFsAccess, ze_bool_t onSubdevice, uint32_t subdeviceId) : TemperaturePmt(pFsAccess, onSubdevice, subdeviceId) {} ~Mock() override { rootDeviceTelemNodeIndex = 0; } void mockedInit(FsAccess *pFsAccess) { if (ZE_RESULT_SUCCESS != PlatformMonitoringTech::enumerateRootTelemIndex(pFsAccess, rootPciPathOfGpuDeviceInTemperature)) { return; } telemetryDeviceEntry = "/sys/class/intel_pmt/telem2/telem"; } }; class TemperatureFsAccess : public FsAccess {}; template <> struct Mock : public TemperatureFsAccess { ze_result_t listDirectorySuccess(const std::string directory, std::vector &listOfTelemNodes) { if (directory.compare(baseTelemSysFS) == 0) { listOfTelemNodes.push_back("telem1"); listOfTelemNodes.push_back("telem2"); listOfTelemNodes.push_back("telem3"); listOfTelemNodes.push_back("telem4"); listOfTelemNodes.push_back("telem5"); return ZE_RESULT_SUCCESS; } return ZE_RESULT_ERROR_NOT_AVAILABLE; } ze_result_t listDirectoryFailure(const std::string directory, std::vector &events) { return ZE_RESULT_ERROR_NOT_AVAILABLE; } ze_result_t getRealPathSuccess(const std::string path, std::string &buf) { if (path.compare(sysfsPahTelem1) == 0) { buf = realPathTelem1; } else if (path.compare(sysfsPahTelem2) == 0) { buf = realPathTelem2; } else if (path.compare(sysfsPahTelem3) == 0) { buf = realPathTelem3; } else if (path.compare(sysfsPahTelem4) == 0) { buf = realPathTelem4; } else if (path.compare(sysfsPahTelem5) == 0) { buf = realPathTelem5; } else { return ZE_RESULT_ERROR_NOT_AVAILABLE; } return ZE_RESULT_SUCCESS; } ze_result_t getRealPathFailure(const std::string path, std::string &buf) { return ZE_RESULT_ERROR_NOT_AVAILABLE; } MOCK_METHOD(ze_result_t, listDirectory, (const std::string path, std::vector &list), (override)); MOCK_METHOD(ze_result_t, getRealPath, (const std::string path, std::string &buf), (override)); Mock() = default; }; class PublicLinuxTemperatureImp : public L0::LinuxTemperatureImp { public: PublicLinuxTemperatureImp(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId) : LinuxTemperatureImp(pOsSysman, onSubdevice, subdeviceId) {} using LinuxTemperatureImp::pPmt; using LinuxTemperatureImp::type; }; } // namespace ult } // namespace L0test_zes_temperature.cpp000066400000000000000000000434511422164147700374420ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/temperature/linux/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/test/unit_tests/sources/sysman/linux/mock_sysman_fixture.h" #include "level_zero/tools/test/unit_tests/sources/sysman/temperature/linux/mock_sysfs_temperature.h" extern bool sysmanUltsEnable; namespace L0 { namespace ult { const static int fakeFileDescriptor = 123; std::string rootPciPathOfGpuDevice = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0"; constexpr uint32_t handleComponentCountForSubDevices = 6u; constexpr uint32_t handleComponentCountForNoSubDevices = 2u; constexpr uint32_t invalidMaxTemperature = 125; constexpr uint32_t invalidMinTemperature = 10; const std::map deviceKeyOffsetMapTemperature = { {"PACKAGE_ENERGY", 0x420}, {"COMPUTE_TEMPERATURES", 0x68}, {"SOC_TEMPERATURES", 0x60}, {"CORE_TEMPERATURES", 0x6c}}; inline static int openMockTemp(const char *pathname, int flags) { if (strcmp(pathname, "/sys/class/intel_pmt/telem2/telem") == 0) { return fakeFileDescriptor; } if (strcmp(pathname, "/sys/class/intel_pmt/telem3/telem") == 0) { return fakeFileDescriptor; } return -1; } inline static int closeMockTemp(int fd) { if (fd == fakeFileDescriptor) { return 0; } return -1; } ssize_t preadMockTemp(int fd, void *buf, size_t count, off_t offset) { if (count == sizeof(uint32_t)) { uint32_t *mockBuf = static_cast(buf); if (offset == memory2MaxTempIndex) { *mockBuf = memory2MaxTemperature; } else if (offset == memory3MaxTempIndex) { *mockBuf = memory3MaxTemperature; } else { for (uint64_t i = 0; i < sizeof(uint32_t); i++) { *mockBuf |= (uint32_t)tempArrForSubDevices[(offset - offsetForSubDevices) + i] << (i * 8); } } } else { uint64_t *mockBuf = static_cast(buf); *mockBuf = 0; for (uint64_t i = 0; i < sizeof(uint64_t); i++) { *mockBuf |= (uint64_t)tempArrForSubDevices[(offset - offsetForSubDevices) + i] << (i * 8); } } return count; } ssize_t preadMockTempNoSubDevices(int fd, void *buf, size_t count, off_t offset) { if (count == sizeof(uint32_t)) { uint32_t *mockBuf = static_cast(buf); for (uint64_t i = 0; i < sizeof(uint32_t); i++) { *mockBuf |= (uint32_t)tempArrForNoSubDevices[(offset - offsetForNoSubDevices) + i] << (i * 8); } } else { uint64_t *mockBuf = static_cast(buf); *mockBuf = 0; for (uint64_t i = 0; i < sizeof(uint64_t); i++) { *mockBuf |= (uint64_t)tempArrForNoSubDevices[(offset - offsetForNoSubDevices) + i] << (i * 8); } } return count; } class SysmanMultiDeviceTemperatureFixture : public SysmanMultiDeviceFixture { protected: std::unique_ptr pPublicLinuxTemperatureImp; std::unique_ptr> pFsAccess; FsAccess *pFsAccessOriginal = nullptr; std::vector deviceHandles; void SetUp() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanMultiDeviceFixture::SetUp(); pFsAccess = std::make_unique>>(); pFsAccessOriginal = pLinuxSysmanImp->pFsAccess; pLinuxSysmanImp->pFsAccess = pFsAccess.get(); ON_CALL(*pFsAccess.get(), listDirectory(_, _)) .WillByDefault(::testing::Invoke(pFsAccess.get(), &Mock::listDirectorySuccess)); ON_CALL(*pFsAccess.get(), getRealPath(_, _)) .WillByDefault(::testing::Invoke(pFsAccess.get(), &Mock::getRealPathSuccess)); uint32_t subDeviceCount = 0; Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, nullptr); if (subDeviceCount == 0) { deviceHandles.resize(1, device->toHandle()); } else { deviceHandles.resize(subDeviceCount, nullptr); Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, deviceHandles.data()); } for (auto &deviceHandle : deviceHandles) { ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; Device::fromHandle(deviceHandle)->getProperties(&deviceProperties); auto pPmt = new NiceMock>(pFsAccess.get(), deviceProperties.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE, deviceProperties.subdeviceId); pPmt->mockedInit(pFsAccess.get()); pPmt->keyOffsetMap = deviceKeyOffsetMapTemperature; pLinuxSysmanImp->mapOfSubDeviceIdToPmtObject.emplace(deviceProperties.subdeviceId, pPmt); } pSysmanDeviceImp->pTempHandleContext->init(deviceHandles); } void TearDown() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanMultiDeviceFixture::TearDown(); pLinuxSysmanImp->pFsAccess = pFsAccessOriginal; } std::vector get_temp_handles(uint32_t count) { std::vector handles(count, nullptr); EXPECT_EQ(zesDeviceEnumTemperatureSensors(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS); return handles; } }; TEST_F(SysmanMultiDeviceTemperatureFixture, GivenComponentCountZeroWhenCallingZetSysmanTemperatureGetThenZeroCountIsReturnedAndVerifySysmanTemperatureGetCallSucceeds) { uint32_t count = 0; ze_result_t result = zesDeviceEnumTemperatureSensors(device->toHandle(), &count, NULL); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(count, handleComponentCountForSubDevices); uint32_t testcount = count + 1; result = zesDeviceEnumTemperatureSensors(device->toHandle(), &testcount, NULL); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(testcount, handleComponentCountForSubDevices); count = 0; std::vector handles(count, nullptr); EXPECT_EQ(zesDeviceEnumTemperatureSensors(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS); EXPECT_EQ(count, handleComponentCountForSubDevices); } TEST_F(SysmanMultiDeviceTemperatureFixture, GivenValidTempHandleWhenGettingTemperatureThenValidTemperatureReadingsRetrieved) { auto handles = get_temp_handles(handleComponentCountForSubDevices); for (auto &deviceHandle : deviceHandles) { ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; Device::fromHandle(deviceHandle)->getProperties(&deviceProperties); auto pPmt = static_cast> *>(pLinuxSysmanImp->getPlatformMonitoringTechAccess(deviceProperties.subdeviceId)); pPmt->openFunction = openMockTemp; pPmt->closeFunction = closeMockTemp; pPmt->preadFunction = preadMockTemp; } for (auto handle : handles) { zes_temp_properties_t properties = {}; EXPECT_EQ(ZE_RESULT_SUCCESS, zesTemperatureGetProperties(handle, &properties)); double temperature; ASSERT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesTemperatureGetState(handle, &temperature)); } } TEST_F(SysmanMultiDeviceTemperatureFixture, GivenValidTempHandleWhenGettingTemperatureConfigThenUnsupportedIsReturned) { auto handles = get_temp_handles(handleComponentCountForSubDevices); for (auto handle : handles) { zes_temp_config_t config = {}; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesTemperatureGetConfig(handle, &config)); } } TEST_F(SysmanMultiDeviceTemperatureFixture, GivenValidTempHandleWhenSettingTemperatureConfigThenUnsupportedIsReturned) { auto handles = get_temp_handles(handleComponentCountForSubDevices); for (auto handle : handles) { zes_temp_config_t config = {}; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesTemperatureSetConfig(handle, &config)); } } TEST_F(SysmanMultiDeviceTemperatureFixture, GivenCreatePmtObjectsWhenRootTileIndexEnumeratesSuccessfulThenValidatePmtObjectsReceivedAndBranches) { std::map mapOfSubDeviceIdToPmtObject; PlatformMonitoringTech::create(deviceHandles, pFsAccess.get(), rootPciPathOfGpuDevice, mapOfSubDeviceIdToPmtObject); uint32_t deviceHandlesIndex = 0; for (auto &subDeviceIdToPmtEntry : mapOfSubDeviceIdToPmtObject) { ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; Device::fromHandle(deviceHandles[deviceHandlesIndex++])->getProperties(&deviceProperties); EXPECT_NE(subDeviceIdToPmtEntry.second, nullptr); EXPECT_EQ(subDeviceIdToPmtEntry.first, deviceProperties.subdeviceId); delete subDeviceIdToPmtEntry.second; // delete memory to avoid mem leak here, as we finished our test validation just above. } } class SysmanDeviceTemperatureFixture : public SysmanDeviceFixture { protected: std::unique_ptr pPublicLinuxTemperatureImp; std::unique_ptr> pFsAccess; FsAccess *pFsAccessOriginal = nullptr; std::vector deviceHandles; void SetUp() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanDeviceFixture::SetUp(); pFsAccess = std::make_unique>>(); pFsAccessOriginal = pLinuxSysmanImp->pFsAccess; pLinuxSysmanImp->pFsAccess = pFsAccess.get(); ON_CALL(*pFsAccess.get(), listDirectory(_, _)) .WillByDefault(::testing::Invoke(pFsAccess.get(), &Mock::listDirectorySuccess)); ON_CALL(*pFsAccess.get(), getRealPath(_, _)) .WillByDefault(::testing::Invoke(pFsAccess.get(), &Mock::getRealPathSuccess)); uint32_t subDeviceCount = 0; Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, nullptr); if (subDeviceCount == 0) { deviceHandles.resize(1, device->toHandle()); } else { deviceHandles.resize(subDeviceCount, nullptr); Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, deviceHandles.data()); } for (auto &deviceHandle : deviceHandles) { ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; Device::fromHandle(deviceHandle)->getProperties(&deviceProperties); auto pPmt = new NiceMock>(pFsAccess.get(), deviceProperties.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE, deviceProperties.subdeviceId); pPmt->mockedInit(pFsAccess.get()); pPmt->keyOffsetMap = deviceKeyOffsetMapTemperature; pLinuxSysmanImp->mapOfSubDeviceIdToPmtObject.emplace(deviceProperties.subdeviceId, pPmt); } pSysmanDeviceImp->pTempHandleContext->init(deviceHandles); } void TearDown() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanDeviceFixture::TearDown(); pLinuxSysmanImp->pFsAccess = pFsAccessOriginal; } std::vector get_temp_handles(uint32_t count) { std::vector handles(count, nullptr); EXPECT_EQ(zesDeviceEnumTemperatureSensors(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS); return handles; } }; TEST_F(SysmanDeviceTemperatureFixture, GivenValidTempHandleWhenGettingGPUAndGlobalTemperatureThenValidTemperatureReadingsRetrieved) { auto handles = get_temp_handles(handleComponentCountForNoSubDevices); for (auto &deviceHandle : deviceHandles) { ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; Device::fromHandle(deviceHandle)->getProperties(&deviceProperties); auto pPmt = static_cast> *>(pLinuxSysmanImp->getPlatformMonitoringTechAccess(deviceProperties.subdeviceId)); pPmt->openFunction = openMockTemp; pPmt->closeFunction = closeMockTemp; pPmt->preadFunction = preadMockTempNoSubDevices; } for (auto &handle : handles) { zes_temp_properties_t properties = {}; EXPECT_EQ(ZE_RESULT_SUCCESS, zesTemperatureGetProperties(handle, &properties)); double temperature; ASSERT_EQ(ZE_RESULT_SUCCESS, zesTemperatureGetState(handle, &temperature)); if (properties.type == ZES_TEMP_SENSORS_GLOBAL) { uint8_t maxTemp = 0; for (uint64_t i = 0; i < sizeof(tempArrForNoSubDevices) / sizeof(uint8_t); i++) { if ((tempArrForNoSubDevices[i] > invalidMaxTemperature) || (tempArrForNoSubDevices[i] < invalidMinTemperature) || (maxTemp > tempArrForNoSubDevices[i])) { continue; } maxTemp = tempArrForNoSubDevices[i]; } EXPECT_EQ(temperature, static_cast(maxTemp)); } if (properties.type == ZES_TEMP_SENSORS_GPU) { EXPECT_EQ(temperature, static_cast(tempArrForNoSubDevices[computeIndexForNoSubDevices])); } } } TEST_F(SysmanDeviceTemperatureFixture, GivenValidTempHandleAndPmtReadValueFailsWhenGettingTemperatureThenFailureReturned) { // delete previously allocated pPmt objects for (auto &subDeviceIdToPmtEntry : pLinuxSysmanImp->mapOfSubDeviceIdToPmtObject) { delete subDeviceIdToPmtEntry.second; } pLinuxSysmanImp->mapOfSubDeviceIdToPmtObject.clear(); // delete previously created temp handles for (auto &handle : pSysmanDeviceImp->pTempHandleContext->handleList) { delete handle; handle = nullptr; pSysmanDeviceImp->pTempHandleContext->handleList.pop_back(); } for (auto &deviceHandle : deviceHandles) { ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; Device::fromHandle(deviceHandle)->getProperties(&deviceProperties); auto pPmt = new NiceMock>(pFsAccess.get(), deviceProperties.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE, deviceProperties.subdeviceId); pPmt->mockedInit(pFsAccess.get()); pLinuxSysmanImp->mapOfSubDeviceIdToPmtObject.emplace(deviceProperties.subdeviceId, pPmt); } pSysmanDeviceImp->pTempHandleContext->init(deviceHandles); auto handles = get_temp_handles(handleComponentCountForNoSubDevices); for (auto &handle : handles) { double temperature; ASSERT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesTemperatureGetState(handle, &temperature)); } } TEST_F(SysmanDeviceTemperatureFixture, GivenValidTempHandleWhenGettingUnsupportedSensorsTemperatureThenUnsupportedReturned) { ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES}; Device::fromHandle(device->toHandle())->getProperties(&deviceProperties); auto pPublicLinuxTemperatureImp = std::make_unique(pOsSysman, deviceProperties.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE, deviceProperties.subdeviceId); pPublicLinuxTemperatureImp->setSensorType(ZES_TEMP_SENSORS_MEMORY_MIN); double temperature; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, pPublicLinuxTemperatureImp->getSensorTemperature(&temperature)); } TEST_F(SysmanDeviceTemperatureFixture, GivenValidateEnumerateRootTelemIndexWhengetRealPathFailsThenFailureReturned) { ON_CALL(*pFsAccess.get(), getRealPath(_, _)) .WillByDefault(::testing::Invoke(pFsAccess.get(), &Mock::getRealPathFailure)); EXPECT_EQ(ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE, PlatformMonitoringTech::enumerateRootTelemIndex(pFsAccess.get(), rootPciPathOfGpuDevice)); ON_CALL(*pFsAccess.get(), listDirectory(_, _)) .WillByDefault(::testing::Invoke(pFsAccess.get(), &Mock::listDirectoryFailure)); EXPECT_EQ(ZE_RESULT_ERROR_NOT_AVAILABLE, PlatformMonitoringTech::enumerateRootTelemIndex(pFsAccess.get(), rootPciPathOfGpuDevice)); std::map mapOfSubDeviceIdToPmtObject; PlatformMonitoringTech::create(deviceHandles, pFsAccess.get(), rootPciPathOfGpuDevice, mapOfSubDeviceIdToPmtObject); EXPECT_TRUE(mapOfSubDeviceIdToPmtObject.empty()); } TEST_F(SysmanDeviceTemperatureFixture, GivenValidatePmtReadValueWhenkeyOffsetMapIsNotThereThenFailureReturned) { auto pPmt = std::make_unique>>(pFsAccess.get(), 0, 0); pPmt->mockedInit(pFsAccess.get()); pPmt->keyOffsetMap = deviceKeyOffsetMapTemperature; uint32_t val = 0; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, pPmt->readValue("SOMETHING", val)); } TEST_F(SysmanDeviceTemperatureFixture, GivenCreatePmtObjectsWhenRootTileIndexEnumeratesSuccessfulThenValidatePmtObjectsReceivedAndBranches) { std::map mapOfSubDeviceIdToPmtObject1; PlatformMonitoringTech::create(deviceHandles, pFsAccess.get(), rootPciPathOfGpuDevice, mapOfSubDeviceIdToPmtObject1); for (auto &subDeviceIdToPmtEntry : mapOfSubDeviceIdToPmtObject1) { EXPECT_NE(subDeviceIdToPmtEntry.second, nullptr); EXPECT_EQ(subDeviceIdToPmtEntry.first, 0u); // We know that subdeviceID is zero as core device didnt have any subdevices delete subDeviceIdToPmtEntry.second; // delete memory to avoid mem leak here, as we finished our test validation just above. } std::map mapOfSubDeviceIdToPmtObject2; std::vector testHandleVector; // If empty device handle vector is provided then empty map is retrieved PlatformMonitoringTech::create(testHandleVector, pFsAccess.get(), rootPciPathOfGpuDevice, mapOfSubDeviceIdToPmtObject2); EXPECT_TRUE(mapOfSubDeviceIdToPmtObject2.empty()); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/temperature/windows/000077500000000000000000000000001422164147700330645ustar00rootroot00000000000000CMakeLists.txt000066400000000000000000000005361422164147700355510ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/temperature/windows# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/mock_temperature.h ${CMAKE_CURRENT_SOURCE_DIR}/test_zes_temperature.cpp ) endif() mock_temperature.h000066400000000000000000000062311422164147700365260ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/temperature/windows/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/test/unit_tests/mock.h" #include "level_zero/tools/test/unit_tests/sources/sysman/windows/mock_kmd_sys_manager.h" #include "sysman/temperature/temperature_imp.h" namespace L0 { namespace ult { class TemperatureKmdSysManager : public Mock {}; template <> struct Mock : public TemperatureKmdSysManager { uint32_t mockTempGlobal = 26; uint32_t mockTempGPU = 25; uint32_t mockTempMemory = 23; uint32_t mockMaxTemperature = 100; zes_temp_sensors_t mockSensorTypes[3] = {ZES_TEMP_SENSORS_GLOBAL, ZES_TEMP_SENSORS_GPU, ZES_TEMP_SENSORS_MEMORY}; void getTemperatureProperty(KmdSysman::GfxSysmanReqHeaderIn *pRequest, KmdSysman::GfxSysmanReqHeaderOut *pResponse) override { uint8_t *pBuffer = reinterpret_cast(pResponse); pBuffer += sizeof(KmdSysman::GfxSysmanReqHeaderOut); KmdSysman::TemperatureDomainsType domain = static_cast(pRequest->inCommandParam); if (domain < KmdSysman::TemperatureDomainsType::TemperatureDomainPackage || domain >= KmdSysman::TemperatureDomainsType::TempetatureMaxDomainTypes) { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; return; } switch (pRequest->inRequestId) { case KmdSysman::Requests::Temperature::TempCriticalEventSupported: case KmdSysman::Requests::Temperature::TempThreshold1EventSupported: case KmdSysman::Requests::Temperature::TempThreshold2EventSupported: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Temperature::MaxTempSupported: { uint32_t *pValue = reinterpret_cast(pBuffer); *pValue = mockMaxTemperature; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; case KmdSysman::Requests::Temperature::CurrentTemperature: { uint32_t *pValue = reinterpret_cast(pBuffer); switch (domain) { case KmdSysman::TemperatureDomainsType::TemperatureDomainPackage: { *pValue = mockTempGlobal; } break; case KmdSysman::TemperatureDomainsType::TemperatureDomainDGPU: { *pValue = mockTempGPU; } break; case KmdSysman::TemperatureDomainsType::TemperatureDomainHBM: { *pValue = mockTempMemory; } break; } pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } break; default: { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } break; } } Mock() = default; ~Mock() = default; }; } // namespace ult } // namespace L0test_zes_temperature.cpp000066400000000000000000000163441422164147700377760ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/temperature/windows/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/temperature/windows/os_temperature_imp.h" #include "level_zero/tools/test/unit_tests/sources/sysman/temperature/windows/mock_temperature.h" #include "level_zero/tools/test/unit_tests/sources/sysman/windows/mock_sysman_fixture.h" extern bool sysmanUltsEnable; namespace L0 { namespace ult { constexpr uint32_t temperatureHandleComponentCount = 3u; class SysmanDeviceTemperatureFixture : public SysmanDeviceFixture { protected: Mock *pKmdSysManager = nullptr; KmdSysManager *pOriginalKmdSysManager = nullptr; std::vector deviceHandles; void SetUp() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanDeviceFixture::SetUp(); pKmdSysManager = new Mock; EXPECT_CALL(*pKmdSysManager, escape(_, _, _, _, _)) .WillRepeatedly(::testing::Invoke(pKmdSysManager, &Mock::mock_escape)); pOriginalKmdSysManager = pWddmSysmanImp->pKmdSysManager; pWddmSysmanImp->pKmdSysManager = pKmdSysManager; for (auto handle : pSysmanDeviceImp->pTempHandleContext->handleList) { delete handle; } pSysmanDeviceImp->pTempHandleContext->handleList.clear(); uint32_t subDeviceCount = 0; // We received a device handle. Check for subdevices in this device Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, nullptr); if (subDeviceCount == 0) { deviceHandles.resize(1, device->toHandle()); } else { deviceHandles.resize(subDeviceCount, nullptr); Device::fromHandle(device->toHandle())->getSubDevices(&subDeviceCount, deviceHandles.data()); } pSysmanDeviceImp->pTempHandleContext->init(deviceHandles); } void TearDown() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanDeviceFixture::TearDown(); pWddmSysmanImp->pKmdSysManager = pOriginalKmdSysManager; if (pKmdSysManager != nullptr) { delete pKmdSysManager; pKmdSysManager = nullptr; } } std::vector get_temp_handles(uint32_t count) { std::vector handles(count, nullptr); EXPECT_EQ(zesDeviceEnumTemperatureSensors(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS); return handles; } }; TEST_F(SysmanDeviceTemperatureFixture, GivenComponentCountZeroWhenEnumeratingTemperatureSensorsThenValidCountIsReturnedAndVerifySysmanPowerGetCallSucceeds) { uint32_t count = 0; EXPECT_EQ(zesDeviceEnumTemperatureSensors(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, temperatureHandleComponentCount); } TEST_F(SysmanDeviceTemperatureFixture, GivenInvalidComponentCountWhenEnumeratingTemperatureSensorsThenValidCountIsReturnedAndVerifySysmanPowerGetCallSucceeds) { uint32_t count = 0; EXPECT_EQ(zesDeviceEnumTemperatureSensors(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, temperatureHandleComponentCount); count = count + 1; EXPECT_EQ(zesDeviceEnumTemperatureSensors(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, temperatureHandleComponentCount); } TEST_F(SysmanDeviceTemperatureFixture, GivenComponentCountZeroWhenEnumeratingTemperatureSensorsThenValidPowerHandlesIsReturned) { uint32_t count = 0; EXPECT_EQ(zesDeviceEnumTemperatureSensors(device->toHandle(), &count, nullptr), ZE_RESULT_SUCCESS); EXPECT_EQ(count, temperatureHandleComponentCount); std::vector handles(count, nullptr); EXPECT_EQ(zesDeviceEnumTemperatureSensors(device->toHandle(), &count, handles.data()), ZE_RESULT_SUCCESS); for (auto handle : handles) { EXPECT_NE(handle, nullptr); } } TEST_F(SysmanDeviceTemperatureFixture, GivenValidPowerHandleWhenGettingTemperaturePropertiesAllowSetToTrueThenCallSucceeds) { auto handles = get_temp_handles(temperatureHandleComponentCount); uint32_t sensorTypeIndex = 0; for (auto handle : handles) { zes_temp_properties_t properties; ze_result_t result = zesTemperatureGetProperties(handle, &properties); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_FALSE(properties.onSubdevice); EXPECT_EQ(properties.subdeviceId, 0); EXPECT_FALSE(properties.isCriticalTempSupported); EXPECT_FALSE(properties.isThreshold1Supported); EXPECT_FALSE(properties.isThreshold2Supported); EXPECT_EQ(properties.maxTemperature, pKmdSysManager->mockMaxTemperature); EXPECT_EQ(properties.type, pKmdSysManager->mockSensorTypes[sensorTypeIndex++]); } } TEST_F(SysmanDeviceTemperatureFixture, GivenValidTempHandleWhenGettingMemoryTemperatureThenValidTemperatureReadingsRetrieved) { auto handles = get_temp_handles(temperatureHandleComponentCount); double temperature; ASSERT_EQ(ZE_RESULT_SUCCESS, zesTemperatureGetState(handles[ZES_TEMP_SENSORS_MEMORY], &temperature)); EXPECT_EQ(temperature, static_cast(pKmdSysManager->mockTempMemory)); } TEST_F(SysmanDeviceTemperatureFixture, GivenValidTempHandleWhenGettingGPUTemperatureThenValidTemperatureReadingsRetrieved) { auto handles = get_temp_handles(temperatureHandleComponentCount); double temperature; ASSERT_EQ(ZE_RESULT_SUCCESS, zesTemperatureGetState(handles[ZES_TEMP_SENSORS_GPU], &temperature)); EXPECT_EQ(temperature, static_cast(pKmdSysManager->mockTempGPU)); } TEST_F(SysmanDeviceTemperatureFixture, GivenValidTempHandleWhenGettingGlobalTemperatureThenValidTemperatureReadingsRetrieved) { auto handles = get_temp_handles(temperatureHandleComponentCount); double temperature; ASSERT_EQ(ZE_RESULT_SUCCESS, zesTemperatureGetState(handles[ZES_TEMP_SENSORS_GLOBAL], &temperature)); EXPECT_EQ(temperature, static_cast(pKmdSysManager->mockTempGlobal)); } TEST_F(SysmanDeviceTemperatureFixture, GivenValidTempHandleWhenGettingUnsupportedSensorsTemperatureThenUnsupportedReturned) { auto pTemperatureImpMemory = std::make_unique(deviceHandles[0], pOsSysman, ZES_TEMP_SENSORS_GLOBAL_MIN); auto pWddmTemperatureImp = static_cast(pTemperatureImpMemory->pOsTemperature.get()); double pTemperature = 0; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, pWddmTemperatureImp->getSensorTemperature(&pTemperature)); } TEST_F(SysmanDeviceTemperatureFixture, GivenValidTempHandleWhenGettingTemperatureConfigThenUnsupportedIsReturned) { auto handles = get_temp_handles(temperatureHandleComponentCount); for (auto handle : handles) { zes_temp_config_t config = {}; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesTemperatureGetConfig(handle, &config)); } } TEST_F(SysmanDeviceTemperatureFixture, GivenValidTempHandleWhenSettingTemperatureConfigThenUnsupportedIsReturned) { auto handles = get_temp_handles(temperatureHandleComponentCount); for (auto handle : handles) { zes_temp_config_t config = {}; EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zesTemperatureSetConfig(handle, &config)); } } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/windows/000077500000000000000000000000001422164147700305275ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/windows/CMakeLists.txt000066400000000000000000000007411422164147700332710ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/mock_kmd_sys_manager.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_sysman_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/test_sysman.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_sysman_manager.cpp ) endif() mock_kmd_sys_manager.h000066400000000000000000000461741422164147700350110ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/windows/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/test/unit_tests/mock.h" #include "level_zero/tools/source/sysman/windows/kmd_sys_manager.h" #include "level_zero/tools/source/sysman/windows/os_sysman_imp.h" #include "gmock/gmock.h" namespace L0 { namespace ult { constexpr uint32_t mockKmdVersionMajor = 1; constexpr uint32_t mockKmdVersionMinor = 0; constexpr uint32_t mockKmdPatchNumber = 0; constexpr uint32_t mockKmdMaxHandlesPerEvent = 20; struct MockEventHandle { HANDLE eventHandle; bool inited = false; }; uint64_t convertTStoMicroSec(uint64_t TS, uint32_t freq); class MockKmdSysManager : public KmdSysManager {}; template <> struct Mock : public MockKmdSysManager { ze_bool_t allowSetCalls = false; ze_bool_t fanSupported = false; uint32_t mockPowerLimit1 = 2500; MockEventHandle handles[KmdSysman::Events::MaxEvents][mockKmdMaxHandlesPerEvent]; MOCK_METHOD(bool, escape, (uint32_t escapeOp, uint64_t pDataIn, uint32_t dataInSize, uint64_t pDataOut, uint32_t dataOutSize)); MOCKABLE_VIRTUAL void getInterfaceProperty(KmdSysman::GfxSysmanReqHeaderIn *pRequest, KmdSysman::GfxSysmanReqHeaderOut *pResponse) { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } MOCKABLE_VIRTUAL void setInterfaceProperty(KmdSysman::GfxSysmanReqHeaderIn *pRequest, KmdSysman::GfxSysmanReqHeaderOut *pResponse) { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } MOCKABLE_VIRTUAL void getPowerProperty(KmdSysman::GfxSysmanReqHeaderIn *pRequest, KmdSysman::GfxSysmanReqHeaderOut *pResponse) { uint8_t *pBuffer = reinterpret_cast(pResponse); pBuffer += sizeof(KmdSysman::GfxSysmanReqHeaderOut); if (pRequest->inRequestId == KmdSysman::Requests::Power::CurrentPowerLimit1) { uint32_t *pPl1 = reinterpret_cast(pBuffer); *pPl1 = mockPowerLimit1; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(uint32_t); } else { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } } MOCKABLE_VIRTUAL void setPowerProperty(KmdSysman::GfxSysmanReqHeaderIn *pRequest, KmdSysman::GfxSysmanReqHeaderOut *pResponse) { uint8_t *pBuffer = reinterpret_cast(pRequest); pBuffer += sizeof(KmdSysman::GfxSysmanReqHeaderIn); if (pRequest->inRequestId == KmdSysman::Requests::Power::CurrentPowerLimit1) { uint32_t *pPl1 = reinterpret_cast(pBuffer); mockPowerLimit1 = *pPl1; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; } else { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } } MOCKABLE_VIRTUAL void getFrequencyProperty(KmdSysman::GfxSysmanReqHeaderIn *pRequest, KmdSysman::GfxSysmanReqHeaderOut *pResponse) { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } MOCKABLE_VIRTUAL void setFrequencyProperty(KmdSysman::GfxSysmanReqHeaderIn *pRequest, KmdSysman::GfxSysmanReqHeaderOut *pResponse) { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } MOCKABLE_VIRTUAL void getActivityProperty(KmdSysman::GfxSysmanReqHeaderIn *pRequest, KmdSysman::GfxSysmanReqHeaderOut *pResponse) { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } MOCKABLE_VIRTUAL void setActivityProperty(KmdSysman::GfxSysmanReqHeaderIn *pRequest, KmdSysman::GfxSysmanReqHeaderOut *pResponse) { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } MOCKABLE_VIRTUAL void getFanProperty(KmdSysman::GfxSysmanReqHeaderIn *pRequest, KmdSysman::GfxSysmanReqHeaderOut *pResponse) { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } MOCKABLE_VIRTUAL void setFanProperty(KmdSysman::GfxSysmanReqHeaderIn *pRequest, KmdSysman::GfxSysmanReqHeaderOut *pResponse) { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } MOCKABLE_VIRTUAL void getTemperatureProperty(KmdSysman::GfxSysmanReqHeaderIn *pRequest, KmdSysman::GfxSysmanReqHeaderOut *pResponse) { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } MOCKABLE_VIRTUAL void setTemperatureProperty(KmdSysman::GfxSysmanReqHeaderIn *pRequest, KmdSysman::GfxSysmanReqHeaderOut *pResponse) { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } MOCKABLE_VIRTUAL void getFpsProperty(KmdSysman::GfxSysmanReqHeaderIn *pRequest, KmdSysman::GfxSysmanReqHeaderOut *pResponse) { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } MOCKABLE_VIRTUAL void setFpsProperty(KmdSysman::GfxSysmanReqHeaderIn *pRequest, KmdSysman::GfxSysmanReqHeaderOut *pResponse) { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } MOCKABLE_VIRTUAL void getSchedulerProperty(KmdSysman::GfxSysmanReqHeaderIn *pRequest, KmdSysman::GfxSysmanReqHeaderOut *pResponse) { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } MOCKABLE_VIRTUAL void setSchedulerProperty(KmdSysman::GfxSysmanReqHeaderIn *pRequest, KmdSysman::GfxSysmanReqHeaderOut *pResponse) { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } MOCKABLE_VIRTUAL void getMemoryProperty(KmdSysman::GfxSysmanReqHeaderIn *pRequest, KmdSysman::GfxSysmanReqHeaderOut *pResponse) { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } MOCKABLE_VIRTUAL void setMemoryProperty(KmdSysman::GfxSysmanReqHeaderIn *pRequest, KmdSysman::GfxSysmanReqHeaderOut *pResponse) { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } MOCKABLE_VIRTUAL void getPciProperty(KmdSysman::GfxSysmanReqHeaderIn *pRequest, KmdSysman::GfxSysmanReqHeaderOut *pResponse) { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } MOCKABLE_VIRTUAL void setPciProperty(KmdSysman::GfxSysmanReqHeaderIn *pRequest, KmdSysman::GfxSysmanReqHeaderOut *pResponse) { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } MOCKABLE_VIRTUAL void getGlobalOperationsProperty(KmdSysman::GfxSysmanReqHeaderIn *pRequest, KmdSysman::GfxSysmanReqHeaderOut *pResponse) { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } MOCKABLE_VIRTUAL void setGlobalOperationsProperty(KmdSysman::GfxSysmanReqHeaderIn *pRequest, KmdSysman::GfxSysmanReqHeaderOut *pResponse) { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } void retrieveCorrectVersion(KmdSysman::GfxSysmanMainHeaderOut *pHeaderOut) { pHeaderOut->outNumElements = 1; pHeaderOut->outTotalSize = 0; KmdSysman::GfxSysmanReqHeaderOut *pResponse = reinterpret_cast(pHeaderOut->outBuffer); uint8_t *pBuffer = nullptr; pResponse->outReturnCode = KmdSysman::KmdSysmanSuccess; pResponse->outDataSize = sizeof(KmdSysman::KmdSysmanVersion); pBuffer = reinterpret_cast(pResponse); pBuffer += sizeof(KmdSysman::GfxSysmanReqHeaderOut); pHeaderOut->outTotalSize += sizeof(KmdSysman::GfxSysmanReqHeaderOut); KmdSysman::KmdSysmanVersion *pCurrentVersion = reinterpret_cast(pBuffer); pCurrentVersion->majorVersion = mockKmdVersionMajor; pCurrentVersion->minorVersion = mockKmdVersionMinor; pCurrentVersion->patchNumber = mockKmdPatchNumber; pHeaderOut->outTotalSize += sizeof(KmdSysman::KmdSysmanVersion); } bool validateInputBuffer(KmdSysman::GfxSysmanMainHeaderIn *pHeaderIn) { uint32_t sizeCheck = pHeaderIn->inTotalsize; uint8_t *pBufferPtr = pHeaderIn->inBuffer; for (uint32_t i = 0; i < pHeaderIn->inNumElements; i++) { KmdSysman::GfxSysmanReqHeaderIn *pRequest = reinterpret_cast(pBufferPtr); if (pRequest->inCommand == KmdSysman::Command::Get || pRequest->inCommand == KmdSysman::Command::Set || pRequest->inCommand == KmdSysman::Command::RegisterEvent) { if (pRequest->inComponent >= KmdSysman::Component::InterfaceProperties && pRequest->inComponent < KmdSysman::Component::MaxComponents) { pBufferPtr += sizeof(KmdSysman::GfxSysmanReqHeaderIn); sizeCheck -= sizeof(KmdSysman::GfxSysmanReqHeaderIn); if (pRequest->inCommand == KmdSysman::Command::Set || pRequest->inCommand == KmdSysman::Command::RegisterEvent) { if (pRequest->inDataSize == 0) { return false; } pBufferPtr += pRequest->inDataSize; sizeCheck -= pRequest->inDataSize; } } else { return false; } } else { return false; } } if (sizeCheck != 0) { return false; } return true; } void registerEvent(KmdSysman::GfxSysmanReqHeaderIn *pRequest, KmdSysman::GfxSysmanReqHeaderOut *pResponse) { if (!allowSetCalls) { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; return; } uint8_t *pBuffer = reinterpret_cast(pRequest); pBuffer += sizeof(KmdSysman::GfxSysmanReqHeaderIn); pResponse->outDataSize = 0; switch (pRequest->inRequestId) { case KmdSysman::Events::EnterD0: case KmdSysman::Events::EnterD3: case KmdSysman::Events::EnterTDR: case KmdSysman::Events::ExitTDR: case KmdSysman::Events::EnergyThresholdCrossed: { bool found = false; for (uint32_t i = 0; i < mockKmdMaxHandlesPerEvent; i++) { if (!handles[pRequest->inRequestId][i].inited) { handles[pRequest->inRequestId][i].inited = true; unsigned long long eventID = *(unsigned long long *)pBuffer; handles[pRequest->inRequestId][i].eventHandle = reinterpret_cast(eventID); found = true; break; } } pResponse->outReturnCode = found ? KmdSysman::KmdSysmanSuccess : KmdSysman::KmdSysmanFail; } break; default: pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; break; } } void signalEvent(uint32_t idEvent) { uint32_t arrayID = 0; if (idEvent & ZES_EVENT_TYPE_FLAG_ENERGY_THRESHOLD_CROSSED) { arrayID = KmdSysman::Events::EnergyThresholdCrossed; } if (idEvent & ZES_EVENT_TYPE_FLAG_DEVICE_SLEEP_STATE_ENTER) { arrayID = KmdSysman::Events::EnterD3; } if (idEvent & ZES_EVENT_TYPE_FLAG_DEVICE_SLEEP_STATE_EXIT) { arrayID = KmdSysman::Events::EnterD0; } if (idEvent & ZES_EVENT_TYPE_FLAG_DEVICE_DETACH) { arrayID = KmdSysman::Events::EnterTDR; } if (idEvent & ZES_EVENT_TYPE_FLAG_DEVICE_ATTACH) { arrayID = KmdSysman::Events::ExitTDR; } for (uint32_t i = 0; i < mockKmdMaxHandlesPerEvent; i++) { if (handles[arrayID][i].inited) { SetEvent(handles[arrayID][i].eventHandle); } } } void setProperty(KmdSysman::GfxSysmanReqHeaderIn *pRequest, KmdSysman::GfxSysmanReqHeaderOut *pResponse) { if (!allowSetCalls) { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; return; } switch (pRequest->inComponent) { case KmdSysman::Component::InterfaceProperties: { setInterfaceProperty(pRequest, pResponse); } break; case KmdSysman::Component::PowerComponent: { setPowerProperty(pRequest, pResponse); } break; case KmdSysman::Component::FrequencyComponent: { setFrequencyProperty(pRequest, pResponse); } break; case KmdSysman::Component::ActivityComponent: { setActivityProperty(pRequest, pResponse); } break; case KmdSysman::Component::FanComponent: { setFanProperty(pRequest, pResponse); } break; case KmdSysman::Component::TemperatureComponent: { setTemperatureProperty(pRequest, pResponse); } break; case KmdSysman::Component::FpsComponent: { setFpsProperty(pRequest, pResponse); } break; case KmdSysman::Component::SchedulerComponent: { setSchedulerProperty(pRequest, pResponse); } break; case KmdSysman::Component::MemoryComponent: { setMemoryProperty(pRequest, pResponse); } break; case KmdSysman::Component::PciComponent: { setPciProperty(pRequest, pResponse); } break; case KmdSysman::Component::GlobalOperationsComponent: { setGlobalOperationsProperty(pRequest, pResponse); } break; default: { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } break; } } void getProperty(KmdSysman::GfxSysmanReqHeaderIn *pRequest, KmdSysman::GfxSysmanReqHeaderOut *pResponse) { switch (pRequest->inComponent) { case KmdSysman::Component::InterfaceProperties: { getInterfaceProperty(pRequest, pResponse); } break; case KmdSysman::Component::PowerComponent: { getPowerProperty(pRequest, pResponse); } break; case KmdSysman::Component::FrequencyComponent: { getFrequencyProperty(pRequest, pResponse); } break; case KmdSysman::Component::ActivityComponent: { getActivityProperty(pRequest, pResponse); } break; case KmdSysman::Component::FanComponent: { getFanProperty(pRequest, pResponse); } break; case KmdSysman::Component::TemperatureComponent: { getTemperatureProperty(pRequest, pResponse); } break; case KmdSysman::Component::FpsComponent: { getFpsProperty(pRequest, pResponse); } break; case KmdSysman::Component::SchedulerComponent: { getSchedulerProperty(pRequest, pResponse); } break; case KmdSysman::Component::MemoryComponent: { getMemoryProperty(pRequest, pResponse); } break; case KmdSysman::Component::PciComponent: { getPciProperty(pRequest, pResponse); } break; case KmdSysman::Component::GlobalOperationsComponent: { getGlobalOperationsProperty(pRequest, pResponse); } break; default: { pResponse->outDataSize = 0; pResponse->outReturnCode = KmdSysman::KmdSysmanFail; } break; } } bool mock_escape(uint32_t escapeOp, uint64_t pInPtr, uint32_t dataInSize, uint64_t pOutPtr, uint32_t dataOutSize) { void *pDataIn = reinterpret_cast(pInPtr); void *pDataOut = reinterpret_cast(pOutPtr); if (pDataIn == nullptr || pDataOut == nullptr) { return false; } if (dataInSize != sizeof(KmdSysman::GfxSysmanMainHeaderIn) || dataOutSize != sizeof(KmdSysman::GfxSysmanMainHeaderOut)) { return false; } if (escapeOp != KmdSysman::PcEscapeOperation) { return false; } KmdSysman::GfxSysmanMainHeaderIn *pSysmanMainHeaderIn = reinterpret_cast(pDataIn); KmdSysman::GfxSysmanMainHeaderOut *pSysmanMainHeaderOut = reinterpret_cast(pDataOut); KmdSysman::KmdSysmanVersion versionSysman; versionSysman.data = pSysmanMainHeaderIn->inVersion; if (versionSysman.majorVersion != KmdSysman::KmdMajorVersion) { if (versionSysman.majorVersion == 0) { retrieveCorrectVersion(pSysmanMainHeaderOut); return true; } return false; } if (pSysmanMainHeaderIn->inTotalsize == 0) { return false; } if (pSysmanMainHeaderIn->inNumElements == 0) { return false; } if (!validateInputBuffer(pSysmanMainHeaderIn)) { return false; } uint8_t *pBufferIn = pSysmanMainHeaderIn->inBuffer; uint8_t *pBufferOut = pSysmanMainHeaderOut->outBuffer; uint32_t requestOffset = 0; uint32_t responseOffset = 0; pSysmanMainHeaderOut->outTotalSize = 0; for (uint32_t i = 0; i < pSysmanMainHeaderIn->inNumElements; i++) { KmdSysman::GfxSysmanReqHeaderIn *pRequest = reinterpret_cast(pBufferIn); KmdSysman::GfxSysmanReqHeaderOut *pResponse = reinterpret_cast(pBufferOut); switch (pRequest->inCommand) { case KmdSysman::Command::Get: { getProperty(pRequest, pResponse); requestOffset = sizeof(KmdSysman::GfxSysmanReqHeaderIn); responseOffset = sizeof(KmdSysman::GfxSysmanReqHeaderOut); responseOffset += pResponse->outDataSize; } break; case KmdSysman::Command::Set: { setProperty(pRequest, pResponse); requestOffset = sizeof(KmdSysman::GfxSysmanReqHeaderIn); requestOffset += pRequest->inDataSize; responseOffset = sizeof(KmdSysman::GfxSysmanReqHeaderOut); } break; case KmdSysman::Command::RegisterEvent: { registerEvent(pRequest, pResponse); requestOffset = sizeof(KmdSysman::GfxSysmanReqHeaderIn); requestOffset += pRequest->inDataSize; responseOffset = sizeof(KmdSysman::GfxSysmanReqHeaderOut); } break; default: { return false; } break; } pResponse->outRequestId = pRequest->inRequestId; pResponse->outComponent = pRequest->inComponent; pBufferIn += requestOffset; pBufferOut += responseOffset; pSysmanMainHeaderOut->outTotalSize += responseOffset; } pSysmanMainHeaderOut->outNumElements = pSysmanMainHeaderIn->inNumElements; pSysmanMainHeaderOut->outStatus = KmdSysman::KmdSysmanSuccess; return true; } Mock() = default; ~Mock() = default; }; } // namespace ult } // namespace L0mock_sysman_fixture.h000066400000000000000000000036321422164147700347160ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/windows/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/os_interface/device_factory.h" #include "shared/source/os_interface/os_interface.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/tools/source/sysman/sysman.h" #include "level_zero/tools/test/unit_tests/sources/sysman/mocks/mock_sysman_env_vars.h" #include "sysman/windows/os_sysman_imp.h" extern bool sysmanUltsEnable; using ::testing::_; using namespace NEO; namespace L0 { namespace ult { class PublicWddmSysmanImp : public L0::WddmSysmanImp { public: using WddmSysmanImp::pKmdSysManager; }; class SysmanDeviceFixture : public DeviceFixture, public SysmanEnabledFixture { public: void SetUp() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } DeviceFixture::SetUp(); neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]->osInterface = std::make_unique(); SysmanEnabledFixture::SetUp(); device->setSysmanHandle(L0::SysmanDeviceHandleContext::init(device->toHandle())); pSysmanDevice = device->getSysmanHandle(); pSysmanDeviceImp = static_cast(pSysmanDevice); pOsSysman = pSysmanDeviceImp->pOsSysman; pWddmSysmanImp = static_cast(pOsSysman); } void TearDown() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } SysmanEnabledFixture::TearDown(); DeviceFixture::TearDown(); } SysmanDevice *pSysmanDevice = nullptr; SysmanDeviceImp *pSysmanDeviceImp = nullptr; OsSysman *pOsSysman = nullptr; PublicWddmSysmanImp *pWddmSysmanImp = nullptr; }; } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/windows/test_sysman.cpp000066400000000000000000000024021422164147700336020ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/debug_env_reader.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/tools/test/unit_tests/sources/sysman/windows/mock_sysman_fixture.h" namespace L0 { namespace ult { using MockDeviceSysmanGetTest = Test; TEST_F(MockDeviceSysmanGetTest, GivenValidSysmanHandleSetInDeviceStructWhenGetThisSysmanHandleThenHandlesShouldBeSimilar) { SysmanDeviceImp *sysman = new SysmanDeviceImp(device->toHandle()); device->setSysmanHandle(sysman); EXPECT_EQ(sysman, device->getSysmanHandle()); } TEST_F(SysmanDeviceFixture, GivenValidDeviceHandleInSysmanInitThenValidSysmanHandleReceived) { ze_device_handle_t hSysman = device->toHandle(); auto pSysmanDevice = L0::SysmanDeviceHandleContext::init(hSysman); EXPECT_NE(pSysmanDevice, nullptr); delete pSysmanDevice; pSysmanDevice = nullptr; } TEST_F(SysmanDeviceFixture, GivenMockEnvValuesWhenGettingEnvValueThenCorrectValueIsReturned) { ASSERT_NE(IoFunctions::mockableEnvValues, nullptr); EnvironmentVariableReader envVarReader; EXPECT_EQ(envVarReader.getSetting("ZES_ENABLE_SYSMAN", false), true); } } // namespace ult } // namespace L0test_sysman_manager.cpp000066400000000000000000000163561422164147700352320ustar00rootroot00000000000000compute-runtime-22.14.22890/level_zero/tools/test/unit_tests/sources/sysman/windows/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/tools/source/sysman/sysman_imp.h" #include "level_zero/tools/source/sysman/windows/os_sysman_imp.h" #include "level_zero/tools/test/unit_tests/sources/sysman/windows/mock_kmd_sys_manager.h" #include "gmock/gmock.h" #include "gtest/gtest.h" extern bool sysmanUltsEnable; using ::testing::_; using ::testing::DoAll; using ::testing::InSequence; using ::testing::Invoke; using ::testing::NiceMock; using ::testing::Return; namespace L0 { namespace ult { uint64_t convertTStoMicroSec(uint64_t TS, uint32_t freq) { double timeFactor = 1.0 / static_cast(freq); timeFactor = static_cast(TS) * timeFactor; timeFactor *= static_cast(microFacor); return static_cast(timeFactor); } class SysmanKmdManagerFixture : public ::testing::Test { protected: Mock *pKmdSysManager = nullptr; void SetUp() { if (!sysmanUltsEnable) { GTEST_SKIP(); } pKmdSysManager = new Mock; EXPECT_CALL(*pKmdSysManager, escape(_, _, _, _, _)) .WillRepeatedly(::testing::Invoke(pKmdSysManager, &Mock::mock_escape)); } void TearDown() override { if (!sysmanUltsEnable) { GTEST_SKIP(); } if (pKmdSysManager != nullptr) { delete pKmdSysManager; pKmdSysManager = nullptr; } } }; TEST_F(SysmanKmdManagerFixture, GivenAllowSetCallsFalseWhenRequestingSingleThenPowerValueIsCorrect) { pKmdSysManager->allowSetCalls = false; ze_result_t result = ZE_RESULT_SUCCESS; KmdSysman::RequestProperty request; KmdSysman::ResponseProperty response; request.commandId = KmdSysman::Command::Get; request.componentId = KmdSysman::Component::PowerComponent; request.requestId = KmdSysman::Requests::Power::CurrentPowerLimit1; result = pKmdSysManager->requestSingle(request, response); EXPECT_EQ(ZE_RESULT_SUCCESS, result); uint32_t value = 0; memcpy_s(&value, sizeof(uint32_t), response.dataBuffer, sizeof(uint32_t)); value = static_cast(value); EXPECT_EQ(value, pKmdSysManager->mockPowerLimit1); } TEST_F(SysmanKmdManagerFixture, GivenAllowSetCallsTrueWhenRequestingSingleThenPowerValueIsCorrect) { pKmdSysManager->allowSetCalls = true; ze_result_t result = ZE_RESULT_SUCCESS; KmdSysman::RequestProperty request; KmdSysman::ResponseProperty response; constexpr uint32_t increase = 500; uint32_t iniitialPl1 = pKmdSysManager->mockPowerLimit1; request.commandId = KmdSysman::Command::Get; request.componentId = KmdSysman::Component::PowerComponent; request.requestId = KmdSysman::Requests::Power::CurrentPowerLimit1; request.dataSize = 0; result = pKmdSysManager->requestSingle(request, response); EXPECT_EQ(ZE_RESULT_SUCCESS, result); uint32_t value = 0; memcpy_s(&value, sizeof(uint32_t), response.dataBuffer, sizeof(uint32_t)); value = static_cast(value); EXPECT_EQ(value, iniitialPl1); value += increase; request.commandId = KmdSysman::Command::Set; request.componentId = KmdSysman::Component::PowerComponent; request.requestId = KmdSysman::Requests::Power::CurrentPowerLimit1; request.dataSize = sizeof(uint32_t); memcpy_s(request.dataBuffer, sizeof(uint32_t), &value, sizeof(uint32_t)); result = pKmdSysManager->requestSingle(request, response); EXPECT_EQ(ZE_RESULT_SUCCESS, result); request.commandId = KmdSysman::Command::Get; request.componentId = KmdSysman::Component::PowerComponent; request.requestId = KmdSysman::Requests::Power::CurrentPowerLimit1; request.dataSize = 0; result = pKmdSysManager->requestSingle(request, response); EXPECT_EQ(ZE_RESULT_SUCCESS, result); value = 0; memcpy_s(&value, sizeof(uint32_t), response.dataBuffer, sizeof(uint32_t)); value = static_cast(value); EXPECT_EQ(value, (iniitialPl1 + increase)); } TEST_F(SysmanKmdManagerFixture, GivenAllowSetCallsFalseAndCorruptedDataWhenRequestingSingleThenCallFails) { pKmdSysManager->allowSetCalls = false; ze_result_t result = ZE_RESULT_SUCCESS; KmdSysman::RequestProperty request; KmdSysman::ResponseProperty response; request.commandId = KmdSysman::Command::Get; request.componentId = KmdSysman::Component::PowerComponent; request.requestId = KmdSysman::Requests::Power::CurrentPowerLimit1; request.dataSize = sizeof(uint64_t); result = pKmdSysManager->requestSingle(request, response); EXPECT_NE(ZE_RESULT_SUCCESS, result); request.commandId = KmdSysman::Command::MaxCommands; request.componentId = KmdSysman::Component::PowerComponent; request.requestId = KmdSysman::Requests::Power::CurrentPowerLimit1; request.dataSize = 0; result = pKmdSysManager->requestSingle(request, response); EXPECT_NE(ZE_RESULT_SUCCESS, result); request.commandId = KmdSysman::Command::Get; request.componentId = KmdSysman::Component::MaxComponents; request.requestId = KmdSysman::Requests::Power::CurrentPowerLimit1; request.dataSize = 0; result = pKmdSysManager->requestSingle(request, response); EXPECT_NE(ZE_RESULT_SUCCESS, result); request.commandId = KmdSysman::Command::Get; request.componentId = KmdSysman::Component::PowerComponent; request.requestId = KmdSysman::Requests::Power::MaxPowerRequests; request.dataSize = 0; result = pKmdSysManager->requestSingle(request, response); EXPECT_NE(ZE_RESULT_SUCCESS, result); } TEST_F(SysmanKmdManagerFixture, GivenAllowSetCallsTrueAndCorruptedDataWhenRequestingSingleThenCallFails) { pKmdSysManager->allowSetCalls = true; ze_result_t result = ZE_RESULT_SUCCESS; KmdSysman::RequestProperty request; KmdSysman::ResponseProperty response; uint32_t value = 0; request.commandId = KmdSysman::Command::Set; request.componentId = KmdSysman::Component::PowerComponent; request.requestId = KmdSysman::Requests::Power::CurrentPowerLimit1; request.dataSize = 0; memcpy_s(request.dataBuffer, sizeof(uint32_t), &value, sizeof(uint32_t)); result = pKmdSysManager->requestSingle(request, response); EXPECT_NE(ZE_RESULT_SUCCESS, result); request.commandId = KmdSysman::Command::MaxCommands; request.componentId = KmdSysman::Component::PowerComponent; request.requestId = KmdSysman::Requests::Power::CurrentPowerLimit1; request.dataSize = sizeof(uint32_t); result = pKmdSysManager->requestSingle(request, response); EXPECT_NE(ZE_RESULT_SUCCESS, result); request.commandId = KmdSysman::Command::Get; request.componentId = KmdSysman::Component::MaxComponents; request.requestId = KmdSysman::Requests::Power::CurrentPowerLimit1; result = pKmdSysManager->requestSingle(request, response); EXPECT_NE(ZE_RESULT_SUCCESS, result); request.commandId = KmdSysman::Command::Get; request.componentId = KmdSysman::Component::PowerComponent; request.requestId = KmdSysman::Requests::Power::MaxPowerRequests; result = pKmdSysManager->requestSingle(request, response); EXPECT_NE(ZE_RESULT_SUCCESS, result); } } // namespace ult } // namespace L0 compute-runtime-22.14.22890/level_zero/ze_intel_gpu_version.h.in000066400000000000000000000006101422164147700245240ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #ifndef ZE_INTEL_GPU_VERSION_H #define ZE_INTEL_GPU_VERSION_H #define VER_FILEVERSION ${PROJECT_VERSION_MAJOR},${PROJECT_VERSION_MINOR},${PROJECT_VERSION_PATCH} #define VER_FILEVERSION_STR "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}\0" #endif /* ZE_INTEL_GPU_VERSION_H */ compute-runtime-22.14.22890/lib_names.h.in000066400000000000000000000003241422164147700200600ustar00rootroot00000000000000/* * Copyright (C) 2018 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #cmakedefine SLD_LIBRARY_NAME "${CMAKE_SHARED_LIBRARY_PREFIX}${SLD_LIBRARY_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}" compute-runtime-22.14.22890/manifests/000077500000000000000000000000001422164147700173435ustar00rootroot00000000000000compute-runtime-22.14.22890/manifests/manifest.yml000066400000000000000000000024651422164147700217030ustar00rootroot00000000000000components: gmmlib: dest_dir: gmmlib type: git branch: gmmlib revision: intel-gmmlib-22.0.2 repository: https://github.com/intel/gmmlib.git igc: dest_dir: igc type: git branch: igc revision: 416863d5e7e5a1482f4b95ec2f0cd5a0821930fa repository: https://github.com/intel/intelgraphicscompiler igsc: dest_dir: igsc type: git branch: igsc revision: V0.2.4-5 repository: https://github.com/intel/igsc.git infra: branch: master dest_dir: infra revision: 504606da822eda6cb0cb8b94dde3315016717a08 type: git internal: branch: master dest_dir: internal revision: FETCH_HEAD type: git kmdaf: branch: kmdaf dest_dir: kmdaf revision: 0485b5dc951ddd09298c68dd7e520f51ba6db569 type: git level_zero: dest_dir: level_zero type: git branch: master revision: v1.7.15 repository: https://github.com/oneapi-src/level-zero libva: dest_dir: libva type: git branch: libva revision: c9bb65b repository: https://github.com/intel/libva.git wdk: dest_dir: wdk type: git branch: wdk revision: 5e00a0c1148dfea10b3a934521597896c8e7d32d-1885 wsl: branch: wsl dest_dir: wsl revision: 56430997dac34ca0e9e18c177636234cac26ad54 type: git converter: M-1885 version: '1' compute-runtime-22.14.22890/opencl/000077500000000000000000000000001422164147700166325ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/CMakeLists.txt000066400000000000000000000034301422164147700213720ustar00rootroot00000000000000# # Copyright (C) 2021-2022 Intel Corporation # # SPDX-License-Identifier: MIT # macro(generate_runtime_lib LIB_NAME MOCKABLE GENERATE_EXEC) set(NEO_STATIC_LIB_NAME ${LIB_NAME}) set(SHARINGS_ENABLE_LIB_NAME "${LIB_NAME}_sharings_enable") set(GENERATE_EXECUTABLE ${GENERATE_EXEC}) if(${MOCKABLE}) set(NEO_SHARED_LIB ${NEO_SHARED_MOCKABLE_LIB_NAME}) else() set(NEO_SHARED_LIB ${NEO_SHARED_RELEASE_LIB_NAME}) endif() if(NOT BUILD_WITHOUT_RUNTIME) add_subdirectory(source "${NEO_BUILD_DIR}/${LIB_NAME}") else() add_subdirectory(source "${NEO_BUILD_DIR}/${LIB_NAME}" EXCLUDE_FROM_ALL) endif() target_compile_definitions(${BUILTINS_SOURCES_LIB_NAME} PUBLIC MOCKABLE_VIRTUAL=) target_compile_definitions(${BUILTINS_VME_LIB_NAME} PUBLIC MOCKABLE_VIRTUAL=) if(${MOCKABLE}) target_compile_definitions(${LIB_NAME} PUBLIC MOCKABLE_VIRTUAL=virtual) target_compile_definitions(${SHARINGS_ENABLE_LIB_NAME} PUBLIC MOCKABLE_VIRTUAL=virtual) else() target_compile_definitions(${LIB_NAME} PUBLIC MOCKABLE_VIRTUAL=) target_compile_definitions(${SHARINGS_ENABLE_LIB_NAME} PUBLIC MOCKABLE_VIRTUAL=) endif() endmacro() set(NEO_DYNAMIC_LIB_NAME "igdrcl_dll") # single NEO dll set(NEO_DLL_NAME_BASE "igdrcl") if(DONT_CARE_OF_VIRTUALS) message(STATUS "All targets will use virtuals") set(NEO_RELEASE_LIB_NAME "igdrcl_lib") set(NEO_MOCKABLE_LIB_NAME ${NEO_RELEASE_LIB_NAME}) generate_runtime_lib(${NEO_RELEASE_LIB_NAME} TRUE TRUE) else() set(NEO_RELEASE_LIB_NAME "igdrcl_lib_release") # Used by dll/so generate_runtime_lib(${NEO_RELEASE_LIB_NAME} FALSE TRUE) if(NOT NEO_SKIP_OCL_UNIT_TESTS) set(NEO_MOCKABLE_LIB_NAME "igdrcl_lib_mockable") # Used by ULTS generate_runtime_lib(${NEO_MOCKABLE_LIB_NAME} TRUE FALSE) endif() endif() add_subdirectory(test) compute-runtime-22.14.22890/opencl/doc/000077500000000000000000000000001422164147700173775ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/doc/DISTRIBUTIONS.md000066400000000000000000000022751422164147700217710ustar00rootroot00000000000000 # NEO OpenCL in Linux distributions [![Packaging status](https://repology.org/badge/vertical-allrepos/intel-compute-runtime.svg)](https://repology.org/project/intel-compute-runtime/versions) ## Arch Linux* ``` pacman -S intel-compute-runtime ``` ## Exherbo Linux* ``` cave resolve --execute intel-compute-runtime ``` ## Gentoo*, Funtoo* ``` emerge intel-neo ``` ## NixOS ``` nix-channel --add https://nixos.org/channels/nixpkgs-unstable nix-channel --update nix-env -i intel-compute-runtime ``` ## PLD Linux* ``` ipoldek install intel-gmmlib intel-graphics-compiler intel-compute-runtime ``` ## Ubuntu* 20.04, 21.04 ``` apt install intel-opencl-icd ``` ## Conda (Linux glibc>=2.12) ``` conda config --add channels conda-forge conda install intel-compute-runtime ``` ## Building and installation * [Ubuntu*](https://github.com/intel/compute-runtime/blob/master/BUILD.md) * [Centos* 8](https://github.com/intel/compute-runtime/blob/master/BUILD.md) # NEO in other distributions ## FreeBSD*, DragonFly* ``` pkg install intel-compute-runtime ``` (*) Other names and brands may be claimed as property of others. compute-runtime-22.14.22890/opencl/doc/FAQ.md000066400000000000000000000144501422164147700203340ustar00rootroot00000000000000 # Frequently asked questions (OpenCL) For general questions, see the [main FAQ](https://github.com/intel/compute-runtime/blob/master/FAQ.md). ## OpenCL version ### Which version of OpenCL is supported? See [README.md](https://github.com/intel/compute-runtime/blob/master/README.md). ## Known Issues and Limitations OpenCL compliance of a driver built from open-source components should not be assumed by default. Intel will clearly designate / tag specific builds to indicate production quality including formal compliance. Other builds should be considered experimental. ### What is the functional delta to the "Beignet" driver? Intel's former open-source [Beignet driver](https://01.org/beignet) provided sharing capabilities with MESA OpenGL driver. Our intention is to provide these capabilities in NEO in the future. NEO supports platforms starting with Gen8 graphics (formerly Broadwell). For earlier platforms, please use Beignet driver. ## Feature: cl_intel_va_api_media_sharing extension ### Where can I learn more about this extension? See the enabling [guide](cl_intel_va_api_media_sharing.md). ## Feature: cl_cache ### What is cl_cache? This is a mechanism to cache binary representations of OpenCL kernels provided in text form by the application. By storing the binary representations, compiling is required only the first time, which improves performance. ### How can cl_cache be enabled? In the working directory, manually create *cl_cache* directory. The driver will use this directory to store the binary representations of the compiled kernels. Note: This will work on all supported OSes. ### Configuring cl_cache location Cached kernels can be stored in a different directory than the default one. This is useful when the application is installed into a directory for which the user doesn't have permissions. #### Linux configuration Set the environment variable named `cl_cache_dir` to new location of cl_cache directory. #### Example: If the application's directory is `/home/user/Document`, by default cl_cache will be stored in `/home/user/Document/cl_cache`. If the new path should be `/home/user/Desktop/cl_cache_place`, set environment variable `cl_cache_dir` to `/home/user/Desktop/cl_cache_place`. ```bash export cl_cache_dir=/home/user/Desktop/cl_cache_place ``` Subsequent application runs with passed source code and `cl_cache_dir` environment variable set will reuse previously cached kernel binaries instead of compiling kernels from source. #### Windows configuration To set the new location of cl_cache directory - in the registry `HKEY_LOCAL_MACHINE\SOFTWARE\Intel\IGFX\OCL`: 1. add key `cl_cache_dir` 1. add string value named to `cl_cache_dir` key 1. set data of added value to desired location of cl_cache #### Example: If application is located in `C:\Program Files\application\app.exe`, by default cl_cache will be stored in `C:\Program Files\application\cl_cache`. If the new path should be `C:\Users\USER\Documents\application\cl_cache`, to subkey `HKEY_LOCAL_MACHINE\SOFTWARE\Intel\IGFX\OCL\cl_cache_dir` add string value named `C:\Program Files\application\app.exe` with data `C:\Users\USER\Documents\application\cl_cache`. e.g. string value : `HKEY_LOCAL_MACHINE\SOFTWARE\Intel\IGFX\OCL\cl_cache_dir\C:\Program Files\application\app.exe` data : `C:\Users\USER\Documents\application\cl_cache` Neo will look for string value (REG_SZ) `C:\Program Files\application\app.exe` in key `HKEY_LOCAL_MACHINE\SOFTWARE\Intel\IGFX\OCL\cl_cache_dir`. Data of this string value will be used as new cl_cache dump directory for this specific application. ### What are the known limitations of cl_cache? 1. Not thread safe. (Workaround: Make sure your clBuildProgram calls are executed in thread safe fashion.) 1. Binary representation may not be compatible between various versions of NEO and IGC drivers. (Workaround: Manually empty *cl_cache* directory prior to update) 1. Cache is not automatically cleaned. (Workaround: Manually empty *cl_cache* directory) 1. Cache may exhaust disk space and cause further failures. (Workaround: Monitor and manually empty *cl_cache* directory) 1. Cache is not process safe. ## Feature: Out of order queues ### Implementation details of out of order queues implementation Current implementation of out of order queues allows multiple kernels to be run concurently. This allows for better device utilization in scenarios where single kernel doesn't fill whole device. More details can be found here: * [Sample applications](https://github.com/intel/compute-samples/tree/master/compute_samples/applications/commands_aggregation) * [IWOCL(*) presentation](https://www.iwocl.org/wp-content/uploads/iwocl-2019-michal-mrozek-intel-breaking-the-last-line-of-performance-border.pdf) ### Known issues and limitations 1. Turning on profiling on out of order command queue serializes kernel execution. 1. Blocking command queue with user events blocks all further submissions until event is unblocked. 1. Commands blocked by user events, when unblocked are serialized as well. ## Feature: Double-precision emulation (FP64) By default NEO driver enables double precision operations only on platforms with supporting hardware. This is signified by exposing the "cl_khr_fp64" extension in the extension string. For other platforms, this support can be emulated by the compiler (IGC). ### How do I enable emulation? FP64 emulation can only be enabled on Linux. There are two settings that have to be set. #### Runtime setting: There are two ways you can enable this feature in NEO: * Set an environment variable **OverrideDefaultFP64Settings** to **1**: `OverrideDefaultFP64Settings=1` * In **igdrcl.config** configuration file in the same directory as application binary (you may have to create this file) add a line as such: `OverrideDefaultFP64Settings = 1` #### Compiler setting: IGC reads flags only from environment, so set **IGC_EnableDPEmulation** to **1** as such: `IGC_EnableDPEmulation=1` After both settings have been set you can run the application normally. ### Known issues and limitations Intel does not claim full specification conformance when using emulated mode. We reserve the right to not fix issues that appear only in emulation mode. Performance degradation is to be expected and has not been measured by Intel.compute-runtime-22.14.22890/opencl/doc/VTUNE.md000066400000000000000000000033771422164147700206340ustar00rootroot00000000000000 # Using NEO runtime with VTune Amplifier You can use the Intel VTune Amplifier to identify GPU "hotspots". It will show GPGPU queue, GPU usage, memory throughputs, etc. Using this tool, you can compare how the application behaves under different configurations (LWS, GWS, driver versions, etc.) and identify bottlenecks. ## Requirements * [Intel(R) VTune(tm) Amplifier](https://software.intel.com/en-us/intel-vtune-amplifier-xe) * [Intel(R) SDK for OpenCL(tm) Applications](https://software.intel.com/en-us/intel-opencl/download) * [Intel(R) Metrics Discovery Application Programming Interface](https://github.com/intel/metrics-discovery) * Current Intel(R) OpenCL(tm) GPU driver ## Installation Note: This is an example. Actual filenames may differ 1. Install OpenCL SDK & VTune ``` cd tar xvf intel_sdk_for_opencl_2017_7.0.0.2568_x64.gz tar xvf vtune_amplifier_2018_update2.tar.gz sudo dpkg -i intel-opencl_18.26.10987_amd64.deb cd ~/intel_sdk_for_opencl_2017_7.0.0.2568_x64/; sudo ./install_GUI.sh cd ~/vtune_amplifier_2018_update2/; sudo ./install_GUI.sh #use offline activation with file ``` To verify that VTune was installed properly run: ``` lsmod | grep sep4 ``` This should return 2 lines. Otherwise follow sepdk installation in VTune documentation. 2. Compile and install MD API - see MD API [README](https://github.com/intel/metrics-discovery/blob/master/README.md) for instructions. ## Running VTune ``` /opt/intel/vtune_amplifier_2018/bin64/amplxe-gui ``` Note: If you built Metrics Discovery with libstdc++ > 3.4.20, please use the following workaround: ``` sudo sh -c 'LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libstdc++.so.6 /opt/intel/vtune_amplifier_2018/bin64/amplxe-gui' ```compute-runtime-22.14.22890/opencl/doc/cl_intel_va_api_media_sharing.md000066400000000000000000000024701422164147700257060ustar00rootroot00000000000000 # Intel(R) Graphics Compute Runtime for OpenCL(TM) ## Enabling [cl_intel_va_api_media_sharing](https://www.khronos.org/registry/OpenCL/extensions/intel/cl_intel_va_api_media_sharing.txt) extension To enable cl_intel_va_api_media_sharing extension Neo needs to be compiled on system with libva 2.x installed. This extension is supported by [iHD media driver](https://github.com/intel/media-driver). Before compilation additional packages have to be installed. 1. Download sources: * libdrm https://anongit.freedesktop.org/git/mesa/drm.git * libva https://github.com/intel/libva.git Example: ```shell git clone https://anongit.freedesktop.org/git/mesa/drm.git libdrm git clone https://github.com/intel/libva.git libva ``` 2. Compile and install libdrm Example: ```shell cd libdrm ./autogen.sh make -j `nproc` sudo make install ``` 3. Compile and install libva Example: ```shell cd libva export PKG_CONFIG_PATH=/usr/local/lib/pkgconfig ./autogen.sh make -j `nproc` sudo make install ``` 4. During Neo compilation verify libva was discovered ```shell -- Checking for module 'libva>=1.0.0' -- Found libva, version 1.1.0 -- Looking for vaGetLibFunc in va -- Looking for vaGetLibFunc in va - found -- Using libva ``` compute-runtime-22.14.22890/opencl/extensions/000077500000000000000000000000001422164147700210315ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/extensions/public/000077500000000000000000000000001422164147700223075ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/extensions/public/cl_ext_private.h000066400000000000000000000224231422164147700254730ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "CL/cl.h" /********************************** * Internal only queue properties * **********************************/ // Intel evaluation now. Remove it after approval for public release #define CL_DEVICE_DRIVER_VERSION_INTEL 0x10010 #define CL_DEVICE_DRIVER_VERSION_INTEL_NEO1 0x454E4831 // Driver version is ENH1 /********************************************* * Internal only kernel exec info properties * *********************************************/ #define CL_KERNEL_EXEC_INFO_KERNEL_TYPE_INTEL 0x1000C #define CL_KERNEL_EXEC_INFO_DEFAULT_TYPE_INTEL 0x1000D #define CL_KERNEL_EXEC_INFO_CONCURRENT_TYPE_INTEL 0x1000E /********************************* * cl_intel_debug_info extension * *********************************/ #define cl_intel_debug_info 1 // New queries for clGetProgramInfo: #define CL_PROGRAM_DEBUG_INFO_INTEL 0x4100 #define CL_PROGRAM_DEBUG_INFO_SIZES_INTEL 0x4101 // New queries for clGetKernelInfo: #define CL_KERNEL_BINARY_PROGRAM_INTEL 0x407D #define CL_KERNEL_BINARIES_INTEL 0x4102 #define CL_KERNEL_BINARY_SIZES_INTEL 0x4103 #define CL_KERNEL_BINARY_GPU_ADDRESS_INTEL 0x10010 /******************************************** * event properties for performance counter * ********************************************/ /* performance counter */ #define CL_PROFILING_COMMAND_PERFCOUNTERS_INTEL 0x407F /************************** * Internal only cl types * **************************/ using cl_execution_info_kernel_type_intel = cl_uint; using cl_mem_alloc_flags_intel = cl_bitfield; using cl_mem_properties_intel = cl_bitfield; using cl_mem_flags_intel = cl_mem_flags; using cl_mem_info_intel = cl_uint; using cl_mem_advice_intel = cl_uint; using cl_unified_shared_memory_type_intel = cl_uint; using cl_unified_shared_memory_capabilities_intel = cl_bitfield; /****************************** * Internal only cl_mem_flags * ******************************/ #define CL_MEM_FLAGS_INTEL 0x10001 #define CL_MEM_LOCALLY_UNCACHED_RESOURCE (1 << 18) #define CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE (1 << 25) #define CL_MEM_48BIT_RESOURCE_INTEL (1 << 26) // Used with clEnqueueVerifyMemory #define CL_MEM_COMPARE_EQUAL 0u #define CL_MEM_COMPARE_NOT_EQUAL 1u #define CL_MEM_FORCE_LINEAR_STORAGE_INTEL (1 << 19) #define CL_MEM_FORCE_HOST_MEMORY_INTEL (1 << 20) #define CL_MEM_ALLOCATION_HANDLE_INTEL 0x10050 #define CL_MEM_USES_COMPRESSION_INTEL 0x10051 //Used with createBuffer #define CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL (1 << 23) /****************************** * UNIFIED MEMORY * *******************************/ /* cl_device_info */ #define CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL 0x4190 #define CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL 0x4191 #define CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL 0x4192 #define CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL 0x4193 #define CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL 0x4194 /* cl_unified_shared_memory_capabilities_intel - bitfield */ #define CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL (1 << 0) #define CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL (1 << 1) #define CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL (1 << 2) #define CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL (1 << 3) /* cl_mem_properties_intel */ #define CL_MEM_ALLOC_FLAGS_INTEL 0x4195 #define CL_MEM_ALLOC_USE_HOST_PTR_INTEL 0x1000F /* cl_mem_alloc_flags_intel - bitfield */ #define CL_MEM_ALLOC_DEFAULT_INTEL 0 #define CL_MEM_ALLOC_WRITE_COMBINED_INTEL (1 << 0) #define CL_MEM_ALLOC_INITIAL_PLACEMENT_DEVICE_INTEL (1 << 1) #define CL_MEM_ALLOC_INITIAL_PLACEMENT_HOST_INTEL (1 << 2) /* cl_mem_alloc_info_intel */ #define CL_MEM_ALLOC_TYPE_INTEL 0x419A #define CL_MEM_ALLOC_BASE_PTR_INTEL 0x419B #define CL_MEM_ALLOC_SIZE_INTEL 0x419C #define CL_MEM_ALLOC_DEVICE_INTEL 0x419D /* cl_unified_shared_memory_type_intel */ #define CL_MEM_TYPE_UNKNOWN_INTEL 0x4196 #define CL_MEM_TYPE_HOST_INTEL 0x4197 #define CL_MEM_TYPE_DEVICE_INTEL 0x4198 #define CL_MEM_TYPE_SHARED_INTEL 0x4199 /* cl_command_type */ #define CL_COMMAND_MEMSET_INTEL 0x4204 #define CL_COMMAND_MEMFILL_INTEL 0x4204 #define CL_COMMAND_MEMCPY_INTEL 0x4205 #define CL_COMMAND_MIGRATEMEM_INTEL 0x4206 #define CL_COMMAND_MEMADVISE_INTEL 0x4207 /****************************** * THREAD ARBITRATION POLICY * *******************************/ /* cl_device_info */ #define CL_DEVICE_SUPPORTED_THREAD_ARBITRATION_POLICY_INTEL 0x4208 /* cl_kernel_exec_info */ #define CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL 0x4200 #define CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL 0x4201 #define CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL 0x4202 #define CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL 0x4203 #define CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_OLDEST_FIRST_INTEL 0x10022 #define CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_ROUND_ROBIN_INTEL 0x10023 #define CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_AFTER_DEPENDENCY_ROUND_ROBIN_INTEL 0x10024 #define CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_INTEL 0x10025 #define CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_STALL_BASED_ROUND_ROBIN_INTEL 0x10026 /****************************** * SLICE COUNT SELECTING * *******************************/ /* cl_device_info */ #define CL_DEVICE_SLICE_COUNT_INTEL 0x10020 /* cl_queue_properties */ #define CL_QUEUE_SLICE_COUNT_INTEL 0x10021 /****************************** * QUEUE FAMILY SELECTING * *******************************/ /* cl_device_info */ #define CL_DEVICE_QUEUE_FAMILY_PROPERTIES_INTEL 0x418B /* cl_queue_properties */ #define CL_QUEUE_FAMILY_INTEL 0x418C #define CL_QUEUE_INDEX_INTEL 0x418D /* cl_command_queue_capabilities_intel */ #define CL_QUEUE_DEFAULT_CAPABILITIES_INTEL 0u #define CL_QUEUE_CAPABILITY_CREATE_SINGLE_QUEUE_EVENTS_INTEL (1 << 0) #define CL_QUEUE_CAPABILITY_CREATE_CROSS_QUEUE_EVENTS_INTEL (1 << 1) #define CL_QUEUE_CAPABILITY_SINGLE_QUEUE_EVENT_WAIT_LIST_INTEL (1 << 2) #define CL_QUEUE_CAPABILITY_CROSS_QUEUE_EVENT_WAIT_LIST_INTEL (1 << 3) #define CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL (1 << 8) #define CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_RECT_INTEL (1 << 9) #define CL_QUEUE_CAPABILITY_MAP_BUFFER_INTEL (1 << 10) #define CL_QUEUE_CAPABILITY_FILL_BUFFER_INTEL (1 << 11) #define CL_QUEUE_CAPABILITY_TRANSFER_IMAGE_INTEL (1 << 12) #define CL_QUEUE_CAPABILITY_MAP_IMAGE_INTEL (1 << 13) #define CL_QUEUE_CAPABILITY_FILL_IMAGE_INTEL (1 << 14) #define CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_IMAGE_INTEL (1 << 15) #define CL_QUEUE_CAPABILITY_TRANSFER_IMAGE_BUFFER_INTEL (1 << 16) #define CL_QUEUE_CAPABILITY_MARKER_INTEL (1 << 24) #define CL_QUEUE_CAPABILITY_BARRIER_INTEL (1 << 25) #define CL_QUEUE_CAPABILITY_KERNEL_INTEL (1 << 26) typedef cl_bitfield cl_command_queue_capabilities_intel; #define CL_QUEUE_FAMILY_MAX_NAME_SIZE_INTEL 64 typedef struct _cl_queue_family_properties_intel { cl_command_queue_properties properties; cl_command_queue_capabilities_intel capabilities; cl_uint count; char name[CL_QUEUE_FAMILY_MAX_NAME_SIZE_INTEL]; } cl_queue_family_properties_intel; /****************************** * DEVICE ATTRIBUTE QUERY * *******************************/ /* For GPU devices, version 1.0.0: */ #define CL_DEVICE_IP_VERSION_INTEL 0x4250 #define CL_DEVICE_ID_INTEL 0x4251 #define CL_DEVICE_NUM_SLICES_INTEL 0x4252 #define CL_DEVICE_NUM_SUB_SLICES_PER_SLICE_INTEL 0x4253 #define CL_DEVICE_NUM_EUS_PER_SUB_SLICE_INTEL 0x4254 #define CL_DEVICE_NUM_THREADS_PER_EU_INTEL 0x4255 #define CL_DEVICE_FEATURE_CAPABILITIES_INTEL 0x4256 typedef cl_bitfield cl_device_feature_capabilities_intel; /* For GPU devices, version 1.0.0: */ #define CL_DEVICE_FEATURE_FLAG_DP4A_INTEL (1 << 0) ////// RESOURCE BARRIER EXT #define CL_COMMAND_RESOURCE_BARRIER 0x10010 typedef cl_uint cl_resource_barrier_type; #define CL_RESOURCE_BARRIER_TYPE_ACQUIRE 0x1 // FLUSH+EVICT #define CL_RESOURCE_BARRIER_TYPE_RELEASE 0x2 // FLUSH #define CL_RESOURCE_BARRIER_TYPE_DISCARD 0x3 // DISCARD typedef cl_uint cl_resource_memory_scope; #define CL_MEMORY_SCOPE_DEVICE 0x0 // INCLUDES CROSS-TILE #define CL_MEMORY_SCOPE_ALL_SVM_DEVICES 0x1 // CL_MEMORY_SCOPE_DEVICE + CROSS-DEVICE #pragma pack(push, 1) typedef struct _cl_resource_barrier_descriptor_intel { void *svm_allocation_pointer; cl_mem mem_object; cl_resource_barrier_type type; cl_resource_memory_scope scope; } cl_resource_barrier_descriptor_intel; #pragma pack(pop) /**************************************** * cl_khr_pci_bus_info extension * ***************************************/ #define cl_khr_pci_bus_info 1 // New queries for clGetDeviceInfo: #define CL_DEVICE_PCI_BUS_INFO_KHR 0x410F typedef struct _cl_device_pci_bus_info_khr { cl_uint pci_domain; cl_uint pci_bus; cl_uint pci_device; cl_uint pci_function; } cl_device_pci_bus_info_khr; /************************************************ * cl_intel_mem_compression_hints extension * *************************************************/ #define CL_MEM_COMPRESSED_HINT_INTEL (1u << 21) #define CL_MEM_UNCOMPRESSED_HINT_INTEL (1u << 22) // New query for clGetDeviceInfo: #define CL_MEM_COMPRESSED_INTEL 0x417D /* cl_queue_properties */ #define CL_QUEUE_MDAPI_PROPERTIES_INTEL 0x425E #define CL_QUEUE_MDAPI_CONFIGURATION_INTEL 0x425F typedef cl_bitfield cl_command_queue_mdapi_properties_intel; /* cl_command_queue_mdapi_properties_intel - bitfield */ #define CL_QUEUE_MDAPI_ENABLE_INTEL (1 << 0) compute-runtime-22.14.22890/opencl/extensions/public/cl_gl_private_intel.h000066400000000000000000000076631422164147700265010ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #ifndef OPENCL_SHARED_RESOURCE #define OPENCL_SHARED_RESOURCE #include "GL/gl.h" #include "GmmLib.h" #include "third_party/opencl_headers/CL/cl_gl.h" // Used for creating CL resources from GL resources typedef struct _tagCLGLResourceInfo { GLuint name; GLenum target; unsigned int globalShareHandle; GMM_RESOURCE_INFO *pGmmResInfo; /// Pointer to GMMResInfo from GL that will be copied in CL (GL) GLenum glFormat; GLint glInternalFormat; GLuint glHWFormat; GLboolean isAuxEnabled; GLuint borderWidth; GLint textureBufferWidth; GLint textureBufferSize; GLint textureBufferOffset; GLboolean oglSynchronized; GMM_STATUS status; unsigned int globalShareHandleMCS; GMM_RESOURCE_INFO *pGmmResInfoMCS; GLint numberOfSamples; // Number of samples as specified by API GLvoid *pReleaseData; } CL_GL_RESOURCE_INFO, *PCL_GL_RESOURCE_INFO; // Used for creating GL resources from CL resources typedef struct _tagGLCLResourceInfo { unsigned int globalShareHandle; unsigned int clChannelOrder; unsigned int clChannelDataType; size_t imageWidth; size_t imageHeight; size_t rowPitch; size_t slicePitch; unsigned int mipCount; bool isCreatedFromBuffer; unsigned int arraySize; unsigned int depth; } GL_CL_RESOURCE_INFO, *PGL_CL_RESOURCE_INFO; typedef struct _tagCLGLBufferInfo { GLenum bufferName; unsigned int globalShareHandle; GMM_RESOURCE_INFO *pGmmResInfo; /// Pointer to GMMResInfo from GL that will be copied in CL (GL) GLvoid *pSysMem; GLint bufferSize; GLint bufferOffset; GLboolean oglSynchronized; GMM_STATUS status; GLvoid *pReleaseData; } CL_GL_BUFFER_INFO, *PCL_GL_BUFFER_INFO; #ifdef _WIN32 // Used for creating GL sync objects from CL events typedef struct _tagCLGLSyncInfo { _tagCLGLSyncInfo() : eventName(NULL), event((HANDLE)0), submissionEventName(NULL), submissionEvent((HANDLE)0), clientSynchronizationObject((D3DKMT_HANDLE)0), serverSynchronizationObject((D3DKMT_HANDLE)0), submissionSynchronizationObject((D3DKMT_HANDLE)0), hContextToBlock((D3DKMT_HANDLE)0), waitCalled(false) { } char *eventName; HANDLE event; char *submissionEventName; HANDLE submissionEvent; D3DKMT_HANDLE clientSynchronizationObject; D3DKMT_HANDLE serverSynchronizationObject; D3DKMT_HANDLE submissionSynchronizationObject; D3DKMT_HANDLE hContextToBlock; bool waitCalled; } CL_GL_SYNC_INFO, *PCL_GL_SYNC_INFO; // Used for creating CL events from GL sync objects typedef struct _tagGLCLSyncInfo { __GLsync *syncName; GLvoid *pSync; } GL_CL_SYNC_INFO, *PGL_CL_SYNC_INFO; #endif typedef int(__stdcall *pfn_clRetainEvent)(struct _cl_event *event); typedef int(__stdcall *pfn_clReleaseEvent)(struct _cl_event *event); typedef int(__stdcall *INTELpfn_clGetCLObjectInfoINTEL)(struct _cl_mem *pMemObj, void *pResourceInfo); typedef int(__stdcall *INTELpfn_clEnqueueMarkerWithSyncObjectINTEL)( struct _cl_command_queue *pCommandQueue, struct _cl_event **pOclEvent, struct _cl_context **pOclContext); typedef struct _tagCLGLDispatch { pfn_clRetainEvent clRetainEvent; pfn_clReleaseEvent clReleaseEvent; INTELpfn_clGetCLObjectInfoINTEL clGetCLObjectInfoINTEL; INTELpfn_clEnqueueMarkerWithSyncObjectINTEL clEnqueueMarkerWithSyncObjectINTEL; } CL_GL_DISPATCH, *PCL_GL_DISPATCH; #ifdef _WIN32 typedef struct _tagCLGLContextInfo { D3DKMT_HANDLE DeviceHandle; D3DKMT_HANDLE ContextHandle; } CL_GL_CONTEXT_INFO, *PCL_GL_CONTEXT_INFO; typedef struct _tagCLGLEvent { struct { void *dispatch1; void *dispatch2; } dispatch; void *pObj; void *CLCmdQ; struct _cl_context *CLCtx; unsigned int IsUserEvent; PCL_GL_SYNC_INFO pSyncInfo; } CL_GL_EVENT, *PCL_GL_EVENT; #endif //_WIN32 #endif compute-runtime-22.14.22890/opencl/source/000077500000000000000000000000001422164147700201325ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/CMakeLists.txt000066400000000000000000000210011422164147700226640ustar00rootroot00000000000000# # Copyright (C) 2018-2022 Intel Corporation # # SPDX-License-Identifier: MIT # if(POLICY CMP0042) cmake_policy(SET CMP0042 NEW) endif() if(POLICY CMP0063) cmake_policy(SET CMP0063 NEW) endif() project(neo) set(MSVC_DEF_ADDITIONAL_EXPORTS "") set(OPENCL_RUNTIME_PROJECTS_FOLDER "opencl runtime") set(OPENCL_BUILTINS_PROJECTS_FOLDER "built_ins") hide_subdir(dll) add_library(${NEO_STATIC_LIB_NAME} STATIC EXCLUDE_FROM_ALL ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/enable_cores.cmake ) add_subdirectories() include(enable_cores.cmake) if(WIN32) if("${IGDRCL_OPTION__BITS}" STREQUAL "32") set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /SAFESEH:NO") endif() endif() target_include_directories(${NEO_STATIC_LIB_NAME} PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${KMDAF_HEADERS_DIR} ) target_include_directories(${NEO_STATIC_LIB_NAME} PUBLIC ${ENGINE_NODE_DIR} ${KHRONOS_HEADERS_DIR} ${KHRONOS_GL_HEADERS_DIR} ${CIF_BASE_DIR} ${IGC_OCL_ADAPTOR_DIR} ${VISA_INCLUDE_DIR} ${NEO__IGC_INCLUDE_DIR} ${THIRD_PARTY_DIR} ${NEO__GMM_INCLUDE_DIR} ) target_compile_definitions(${NEO_STATIC_LIB_NAME} PUBLIC "" ${NEO__IGC_COMPILE_DEFINITIONS} ) if(WIN32 OR NOT DISABLE_WDDM_LINUX) target_include_directories(${NEO_STATIC_LIB_NAME} PUBLIC ${WDK_INCLUDE_PATHS}) endif() if(WIN32) target_include_directories(${NEO_STATIC_LIB_NAME} PUBLIC ${NEO_SHARED_DIRECTORY}/os_interface/windows ${NEO_SOURCE_DIR}/opencl/source/os_interface/windows ) target_compile_definitions(${NEO_STATIC_LIB_NAME} PUBLIC OGL=1) else() target_include_directories(${NEO_STATIC_LIB_NAME} PUBLIC ${NEO_SHARED_DIRECTORY}/os_interface/linux ${NEO_SOURCE_DIR}/opencl/source/os_interface/linux ${I915_INCLUDES_DIR} ) endif() target_compile_definitions(${NEO_STATIC_LIB_NAME} PUBLIC GMM_LIB_DLL DEFAULT_PLATFORM=${DEFAULT_SUPPORTED_PLATFORM}) list(APPEND LIB_FLAGS_DEFINITIONS -DCIF_HEADERS_ONLY_BUILD ${SUPPORTED_CORE_FLAGS_DEFINITONS}) target_compile_definitions(${NEO_STATIC_LIB_NAME} PUBLIC ${LIB_FLAGS_DEFINITIONS}) set_target_properties(${NEO_STATIC_LIB_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON) set_property(TARGET ${NEO_STATIC_LIB_NAME} APPEND_STRING PROPERTY COMPILE_FLAGS ${ASAN_FLAGS} ${TSAN_FLAGS}) set_target_properties(${NEO_STATIC_LIB_NAME} PROPERTIES FOLDER ${OPENCL_RUNTIME_PROJECTS_FOLDER}) set(EXPORTS_FILENAME "") if(WIN32) set(EXPORTS_FILENAME "${CMAKE_CURRENT_BINARY_DIR}/OpenCLExports${IGDRCL_OPTION__BITS}.def") set(MSVC_DEF_LIB_NAME "igdrcl${IGDRCL_OPTION__BITS}") set(MSVC_DEF_HEADER "This file was generated during CMake project configuration - please don't edit") if("${CMAKE_BUILD_TYPE}" STREQUAL "Release") configure_file("${CMAKE_CURRENT_SOURCE_DIR}/dll/windows/OpenCLReleaseExports.def.in" "${EXPORTS_FILENAME}") else() configure_file("${CMAKE_CURRENT_SOURCE_DIR}/dll/windows/OpenCLInternalExports.def.in" "${EXPORTS_FILENAME}") endif() elseif(UNIX) set(EXPORTS_FILENAME "${CMAKE_CURRENT_BINARY_DIR}/ocl.exports") if("${CMAKE_BUILD_TYPE}" STREQUAL "Release") configure_file("${CMAKE_CURRENT_SOURCE_DIR}/dll/linux/ocl_release.exports" "${EXPORTS_FILENAME}" COPYONLY) else() configure_file("${CMAKE_CURRENT_SOURCE_DIR}/dll/linux/ocl_internal.exports" "${EXPORTS_FILENAME}" COPYONLY) endif() endif() if(${GENERATE_EXECUTABLE}) list(APPEND NEO_DYNAMIC_LIB__TARGET_OBJECTS $ $ $ $ ) if(DEFINED AUB_STREAM_PROJECT_NAME) list(APPEND NEO_DYNAMIC_LIB__TARGET_OBJECTS $) endif() add_library(${NEO_DYNAMIC_LIB_NAME} SHARED ${NEO_DYNAMIC_LIB__TARGET_OBJECTS} ${NEO_SOURCE_DIR}/shared/source/aub/aub_stream_interface.cpp ) if(UNIX) if(NEO_BUILD_DEBUG_SYMBOLS_PACKAGE) get_filename_component(lib_file_name $ NAME_WE) set(symbols_file_name ${lib_file_name}.debug) set(debug_symbols_target_name "${STRIP_SYMBOLS_TARGET}_${NEO_DYNAMIC_LIB_NAME}") add_custom_target(${debug_symbols_target_name} COMMAND sh -c "objcopy --only-keep-debug ${lib_file_name} ${symbols_file_name}" COMMAND sh -c "strip -g ${lib_file_name}" COMMAND sh -c "objcopy --add-gnu-debuglink=${symbols_file_name} ${lib_file_name}" ) add_dependencies(${debug_symbols_target_name} ${NEO_DYNAMIC_LIB_NAME}) add_dependencies(${STRIP_SYMBOLS_TARGET} ${debug_symbols_target_name}) set_property(GLOBAL PROPERTY IGDRCL_SYMBOL_FILE "${symbols_file_name}") endif() install(FILES $ PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE DESTINATION ${CMAKE_INSTALL_LIBDIR}/intel-opencl COMPONENT opencl ) set_property(GLOBAL APPEND PROPERTY NEO_OCL_COMPONENTS_LIST "opencl") endif() if(NOT DISABLED_GTPIN_SUPPORT) macro(macro_for_each_core_type) foreach(BRANCH_DIR ${BRANCH_DIR_LIST}) if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR}${CORE_TYPE_LOWER}/gtpin_setup_${CORE_TYPE_LOWER}.cpp) target_sources(${NEO_DYNAMIC_LIB_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR}${CORE_TYPE_LOWER}/gtpin_setup_${CORE_TYPE_LOWER}.cpp) endif() endforeach() endmacro() apply_macro_for_each_core_type("SUPPORTED") endif() add_subdirectory(dll) target_link_libraries(${NEO_DYNAMIC_LIB_NAME} ${NEO_STATIC_LIB_NAME} ${NEO_SHARED_LIB} ${NEO_STATIC_LIB_NAME} ${NEO_SHARED_LIB} ${NEO_EXTRA_LIBS}) target_include_directories(${NEO_DYNAMIC_LIB_NAME} BEFORE PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ) if(WIN32) target_link_libraries(${NEO_DYNAMIC_LIB_NAME} dxgi) add_dependencies(${NEO_DYNAMIC_LIB_NAME} ${GMM_TARGET_NAME}) target_sources(${NEO_DYNAMIC_LIB_NAME} PRIVATE ${NEO_SHARED_DIRECTORY}/os_interface/windows/gmm_interface_win.cpp ) else() target_link_libraries(${NEO_DYNAMIC_LIB_NAME} ${GMM_LINK_NAME}) target_include_directories(${NEO_DYNAMIC_LIB_NAME} PRIVATE ${NEO_SHARED_DIRECTORY}/dll/devices${BRANCH_DIR_SUFFIX} ) set_property(TARGET ${NEO_DYNAMIC_LIB_NAME} APPEND_STRING PROPERTY LINK_FLAGS " -Wl,--version-script=${EXPORTS_FILENAME}" ) set_property(TARGET ${NEO_DYNAMIC_LIB_NAME} APPEND_STRING PROPERTY LINK_FLAGS " -Wl,-Bsymbolic" ) set_property(TARGET ${NEO_DYNAMIC_LIB_NAME} APPEND_STRING PROPERTY COMPILE_FLAGS ${ASAN_FLAGS}) target_sources(${NEO_DYNAMIC_LIB_NAME} PRIVATE ${NEO_SHARED_DIRECTORY}/os_interface/linux/gmm_interface_linux.cpp ) endif() set_target_properties(${NEO_DYNAMIC_LIB_NAME} PROPERTIES DEBUG_OUTPUT_NAME "${NEO_DLL_NAME_BASE}${IGDRCL_NAME_POSTFIX}${IGDRCL_OPTION__BITS}" RELEASE_OUTPUT_NAME "${NEO_DLL_NAME_BASE}${IGDRCL_NAME_POSTFIX}${IGDRCL_OPTION__BITS}" RELEASEINTERNAL_OUTPUT_NAME "${NEO_DLL_NAME_BASE}${IGDRCL_NAME_POSTFIX}${IGDRCL_OPTION__BITS}" OUTPUT_NAME "${NEO_DLL_NAME_BASE}${IGDRCL_NAME_POSTFIX}${IGDRCL_OPTION__BITS}" ) set_target_properties(${NEO_DYNAMIC_LIB_NAME} PROPERTIES FOLDER ${OPENCL_RUNTIME_PROJECTS_FOLDER}) create_project_source_tree_with_exports(${NEO_DYNAMIC_LIB_NAME} "${EXPORTS_FILENAME}") endif() create_project_source_tree(${NEO_STATIC_LIB_NAME}) if(UNIX AND NOT (TARGET clang-tidy)) add_custom_target(clang-tidy) add_custom_command( TARGET clang-tidy POST_BUILD COMMAND echo clang-tidy... COMMAND find ${CMAKE_CURRENT_SOURCE_DIR} -name *.cpp | xargs --verbose -I{} -P`nproc` clang-tidy-8 -p ${NEO_BINARY_DIR} {} | tee ${NEO_BINARY_DIR}/clang-tidy.log WORKING_DIRECTORY ${NEO_SOURCE_DIR} ) endif() compute-runtime-22.14.22890/opencl/source/accelerators/000077500000000000000000000000001422164147700226015ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/accelerators/CMakeLists.txt000066400000000000000000000010431422164147700253370ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_ACCELERATORS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/intel_accelerator.cpp ${CMAKE_CURRENT_SOURCE_DIR}/intel_accelerator.h ${CMAKE_CURRENT_SOURCE_DIR}/intel_motion_estimation.cpp ${CMAKE_CURRENT_SOURCE_DIR}/intel_motion_estimation.h ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_ACCELERATORS}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_ACCELERATORS ${RUNTIME_SRCS_ACCELERATORS}) compute-runtime-22.14.22890/opencl/source/accelerators/intel_accelerator.cpp000066400000000000000000000035311422164147700267660ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/accelerators/intel_accelerator.h" #include "shared/source/helpers/get_info.h" #include "shared/source/helpers/string.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/get_info_status_mapper.h" namespace NEO { cl_int IntelAccelerator::getInfo(cl_accelerator_info_intel paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const { cl_int result = CL_SUCCESS; size_t ret = GetInfo::invalidSourceSize; auto getInfoStatus = GetInfoStatus::INVALID_VALUE; switch (paramName) { case CL_ACCELERATOR_DESCRIPTOR_INTEL: { ret = getDescriptorSize(); getInfoStatus = GetInfo::getInfo(paramValue, paramValueSize, getDescriptor(), ret); } break; case CL_ACCELERATOR_REFERENCE_COUNT_INTEL: { auto v = getReference(); ret = sizeof(cl_uint); getInfoStatus = GetInfo::getInfo(paramValue, paramValueSize, &v, ret); } break; case CL_ACCELERATOR_CONTEXT_INTEL: { ret = sizeof(cl_context); cl_context ctx = static_cast(pContext); getInfoStatus = GetInfo::getInfo(paramValue, paramValueSize, &ctx, ret); } break; case CL_ACCELERATOR_TYPE_INTEL: { auto v = getTypeId(); ret = sizeof(cl_accelerator_type_intel); getInfoStatus = GetInfo::getInfo(paramValue, paramValueSize, &v, ret); } break; default: getInfoStatus = GetInfoStatus::INVALID_VALUE; break; } result = changeGetInfoStatusToCLResultType(getInfoStatus); GetInfo::setParamValueReturnSize(paramValueSizeRet, ret, getInfoStatus); return result; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/accelerators/intel_accelerator.h000066400000000000000000000035431422164147700264360ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/api/cl_types.h" #include "opencl/source/helpers/base_object.h" //------------------------------------------------------------------------------ // cl_intel_accelerator Class Stuff //------------------------------------------------------------------------------ namespace NEO { class Context; typedef struct TagAcceleratorObjParams { cl_uint AcceleratorType; cl_uint AcceleratorFlags; } OCLRT_ACCELERATOR_OBJECT_PARAMS, *POCLRT_ACCELERATOR_OBJECT_PARAMS; template <> struct OpenCLObjectMapper<_cl_accelerator_intel> { typedef class IntelAccelerator DerivedType; }; class IntelAccelerator : public BaseObject<_cl_accelerator_intel> { public: IntelAccelerator(Context *context, cl_accelerator_type_intel typeId, size_t descriptorSize, const void *descriptor) : pContext(context), typeId(typeId), descriptorSize(descriptorSize), pDescriptor(descriptor) {} IntelAccelerator() {} static const cl_ulong objectMagic = 0xC6D72FA2E81EA569ULL; cl_accelerator_type_intel getTypeId() const { return typeId; } size_t getDescriptorSize() const { return descriptorSize; } const void *getDescriptor() const { return pDescriptor; } cl_int getInfo(cl_accelerator_info_intel paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const; protected: Context *pContext = nullptr; const cl_accelerator_type_intel typeId = -1; const size_t descriptorSize = 0; const void *pDescriptor = nullptr; private: }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/accelerators/intel_motion_estimation.cpp000066400000000000000000000034531422164147700302460ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/accelerators/intel_motion_estimation.h" namespace NEO { cl_int VmeAccelerator::validateVmeArgs(Context *context, cl_accelerator_type_intel typeId, size_t descriptorSize, const void *descriptor) { const cl_motion_estimation_desc_intel *descObj = (const cl_motion_estimation_desc_intel *)descriptor; DEBUG_BREAK_IF(!context); DEBUG_BREAK_IF(typeId != CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL); if ((descriptorSize != sizeof(cl_motion_estimation_desc_intel)) || (descriptor == NULL)) { return CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL; } switch (descObj->mb_block_type) { case CL_ME_MB_TYPE_16x16_INTEL: case CL_ME_MB_TYPE_8x8_INTEL: case CL_ME_MB_TYPE_4x4_INTEL: break; default: return CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL; } switch (descObj->subpixel_mode) { case CL_ME_SUBPIXEL_MODE_INTEGER_INTEL: case CL_ME_SUBPIXEL_MODE_HPEL_INTEL: case CL_ME_SUBPIXEL_MODE_QPEL_INTEL: break; default: return CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL; } switch (descObj->sad_adjust_mode) { case CL_ME_SAD_ADJUST_MODE_NONE_INTEL: case CL_ME_SAD_ADJUST_MODE_HAAR_INTEL: break; default: return CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL; } switch (descObj->search_path_type) { case CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL: case CL_ME_SEARCH_PATH_RADIUS_4_4_INTEL: case CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL: break; default: return CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL; } return CL_SUCCESS; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/accelerators/intel_motion_estimation.h000066400000000000000000000034011422164147700277040ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/accelerators/intel_accelerator.h" //------------------------------------------------------------------------------ // VmeAccelerator Class Stuff //------------------------------------------------------------------------------ namespace NEO { class Context; class VmeAccelerator : public IntelAccelerator { public: static VmeAccelerator *create(Context *context, cl_accelerator_type_intel typeId, size_t descriptorSize, const void *descriptor, cl_int &result) { result = validateVmeArgs(context, typeId, descriptorSize, descriptor); VmeAccelerator *acc = nullptr; if (result == CL_SUCCESS) { acc = new VmeAccelerator( context, typeId, descriptorSize, descriptor); } return acc; } protected: private: VmeAccelerator(Context *context, cl_accelerator_type_intel typeId, size_t descriptorSize, const void *descriptor) : IntelAccelerator(context, typeId, descriptorSize, descriptor) { } static cl_int validateVmeArgs(Context *context, cl_accelerator_type_intel typeId, size_t descriptorSize, const void *descriptor); }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/api/000077500000000000000000000000001422164147700207035ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/api/CMakeLists.txt000066400000000000000000000012651422164147700234470ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_API ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}additional_extensions.cpp ${CMAKE_CURRENT_SOURCE_DIR}/additional_extensions.h ${CMAKE_CURRENT_SOURCE_DIR}/api.cpp ${CMAKE_CURRENT_SOURCE_DIR}/api.h ${CMAKE_CURRENT_SOURCE_DIR}/api_enter.h ${CMAKE_CURRENT_SOURCE_DIR}/cl_types.h ${CMAKE_CURRENT_SOURCE_DIR}/dispatch.cpp ${CMAKE_CURRENT_SOURCE_DIR}/dispatch.h ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_API}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_API ${RUNTIME_SRCS_API}) add_subdirectories() compute-runtime-22.14.22890/opencl/source/api/additional_extensions.cpp000066400000000000000000000004331422164147700257760ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/api/additional_extensions.h" namespace NEO { void *CL_API_CALL getAdditionalExtensionFunctionAddress(const char *funcName) { return nullptr; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/api/additional_extensions.h000066400000000000000000000003631422164147700254450ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "config.h" #include namespace NEO { void *CL_API_CALL getAdditionalExtensionFunctionAddress(const char *funcName); } compute-runtime-22.14.22890/opencl/source/api/api.cpp000066400000000000000000007354121422164147700221740ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "api.h" #include "shared/source/aub/aub_center.h" #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/get_info.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/kernel_helpers.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/source/os_interface/device_factory.h" #include "shared/source/os_interface/os_context.h" #include "shared/source/utilities/api_intercept.h" #include "shared/source/utilities/stackvec.h" #include "opencl/source/accelerators/intel_motion_estimation.h" #include "opencl/source/api/additional_extensions.h" #include "opencl/source/built_ins/vme_builtin.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/context/driver_diagnostics.h" #include "opencl/source/event/user_event.h" #include "opencl/source/execution_environment/cl_execution_environment.h" #include "opencl/source/gtpin/gtpin_notify.h" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "opencl/source/helpers/cl_validators.h" #include "opencl/source/helpers/get_info_status_mapper.h" #include "opencl/source/helpers/queue_helpers.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/kernel/kernel_info_cl.h" #include "opencl/source/kernel/multi_device_kernel.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/mem_obj/mem_obj_helper.h" #include "opencl/source/mem_obj/pipe.h" #include "opencl/source/platform/platform.h" #include "opencl/source/program/program.h" #include "opencl/source/sampler/sampler.h" #include "opencl/source/sharings/sharing_factory.h" #include "opencl/source/tracing/tracing_api.h" #include "opencl/source/tracing/tracing_notify.h" #include "opencl/source/utilities/cl_logger.h" #include "CL/cl.h" #include "config.h" #include #include using namespace NEO; cl_int CL_API_CALL clGetPlatformIDs(cl_uint numEntries, cl_platform_id *platforms, cl_uint *numPlatforms) { TRACING_ENTER(clGetPlatformIDs, &numEntries, &platforms, &numPlatforms); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("numEntries", numEntries, "platforms", platforms, "numPlatforms", numPlatforms); do { // if platforms is nullptr, we must return the number of valid platforms we // support in the num_platforms variable (if it is non-nullptr) if ((platforms == nullptr) && (numPlatforms == nullptr)) { retVal = CL_INVALID_VALUE; break; } // platform != nullptr and num_entries == 0 is defined by spec as invalid if (numEntries == 0 && platforms != nullptr) { retVal = CL_INVALID_VALUE; break; } static std::mutex mutex; std::unique_lock lock(mutex); if (platformsImpl->empty()) { auto executionEnvironment = new ClExecutionEnvironment(); executionEnvironment->incRefInternal(); auto allDevices = DeviceFactory::createDevices(*executionEnvironment); executionEnvironment->decRefInternal(); if (allDevices.empty()) { retVal = CL_OUT_OF_HOST_MEMORY; break; } auto groupedDevices = Platform::groupDevices(std::move(allDevices)); for (auto &deviceVector : groupedDevices) { auto pPlatform = Platform::createFunc(*executionEnvironment); if (!pPlatform || !pPlatform->initialize(std::move(deviceVector))) { retVal = CL_OUT_OF_HOST_MEMORY; break; } platformsImpl->push_back(std::move(pPlatform)); } if (retVal != CL_SUCCESS) { break; } } cl_uint numPlatformsToExpose = std::min(numEntries, static_cast(platformsImpl->size())); if (numEntries == 0) { numPlatformsToExpose = static_cast(platformsImpl->size()); } if (platforms) { for (auto i = 0u; i < numPlatformsToExpose; i++) { platforms[i] = (*platformsImpl)[i].get(); } } if (numPlatforms) { *numPlatforms = numPlatformsToExpose; } } while (false); TRACING_EXIT(clGetPlatformIDs, &retVal); return retVal; } CL_API_ENTRY cl_int CL_API_CALL clIcdGetPlatformIDsKHR(cl_uint numEntries, cl_platform_id *platforms, cl_uint *numPlatforms) { cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("numEntries", numEntries, "platforms", platforms, "numPlatforms", numPlatforms); retVal = clGetPlatformIDs(numEntries, platforms, numPlatforms); return retVal; } cl_int CL_API_CALL clGetPlatformInfo(cl_platform_id platform, cl_platform_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { TRACING_ENTER(clGetPlatformInfo, &platform, ¶mName, ¶mValueSize, ¶mValue, ¶mValueSizeRet); cl_int retVal = CL_INVALID_PLATFORM; API_ENTER(&retVal); DBG_LOG_INPUTS("platform", platform, "paramName", paramName, "paramValueSize", paramValueSize, "paramValue", NEO::FileLoggerInstance().infoPointerToString(paramValue, paramValueSize), "paramValueSizeRet", paramValueSizeRet); auto pPlatform = castToObject(platform); if (pPlatform) { retVal = pPlatform->getInfo(paramName, paramValueSize, paramValue, paramValueSizeRet); } TRACING_EXIT(clGetPlatformInfo, &retVal); return retVal; } cl_int CL_API_CALL clGetDeviceIDs(cl_platform_id platform, cl_device_type deviceType, cl_uint numEntries, cl_device_id *devices, cl_uint *numDevices) { TRACING_ENTER(clGetDeviceIDs, &platform, &deviceType, &numEntries, &devices, &numDevices); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("platform", platform, "deviceType", deviceType, "numEntries", numEntries, "devices", devices, "numDevices", numDevices); const cl_device_type validType = CL_DEVICE_TYPE_GPU | CL_DEVICE_TYPE_CPU | CL_DEVICE_TYPE_ACCELERATOR | CL_DEVICE_TYPE_DEFAULT | CL_DEVICE_TYPE_CUSTOM; Platform *pPlatform = nullptr; do { /* Check parameter consistency */ if (devices == nullptr && numDevices == nullptr) { retVal = CL_INVALID_VALUE; break; } if (devices && numEntries == 0) { retVal = CL_INVALID_VALUE; break; } if ((deviceType & validType) == 0) { retVal = CL_INVALID_DEVICE_TYPE; break; } if (platform != nullptr) { pPlatform = castToObject(platform); if (pPlatform == nullptr) { retVal = CL_INVALID_PLATFORM; break; } } else { cl_uint numPlatforms = 0u; retVal = clGetPlatformIDs(0, nullptr, &numPlatforms); if (numPlatforms == 0u) { retVal = CL_DEVICE_NOT_FOUND; break; } pPlatform = (*platformsImpl)[0].get(); } DEBUG_BREAK_IF(pPlatform->isInitialized() != true); cl_uint numDev = static_cast(pPlatform->getNumDevices()); if (numDev == 0) { retVal = CL_DEVICE_NOT_FOUND; break; } if (DebugManager.flags.LimitAmountOfReturnedDevices.get()) { numDev = std::min(static_cast(DebugManager.flags.LimitAmountOfReturnedDevices.get()), numDev); } if (deviceType == CL_DEVICE_TYPE_ALL) { /* According to Spec, set it to all except TYPE_CUSTOM. */ deviceType = CL_DEVICE_TYPE_GPU | CL_DEVICE_TYPE_CPU | CL_DEVICE_TYPE_ACCELERATOR | CL_DEVICE_TYPE_DEFAULT; } else if (deviceType == CL_DEVICE_TYPE_DEFAULT) { /* We just set it to GPU now. */ deviceType = CL_DEVICE_TYPE_GPU; } cl_uint retNum = 0; for (auto platformDeviceIndex = 0u; platformDeviceIndex < numDev; platformDeviceIndex++) { ClDevice *device = pPlatform->getClDevice(platformDeviceIndex); UNRECOVERABLE_IF(device == nullptr); if (deviceType & device->getDeviceInfo().deviceType) { if (devices) { if (retNum >= numEntries) { break; } devices[retNum] = device; } retNum++; } } if (numDevices) { *numDevices = retNum; } /* If no suitable device, set a error. */ if (retNum == 0) retVal = CL_DEVICE_NOT_FOUND; } while (false); TRACING_EXIT(clGetDeviceIDs, &retVal); return retVal; } cl_int CL_API_CALL clGetDeviceInfo(cl_device_id device, cl_device_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { TRACING_ENTER(clGetDeviceInfo, &device, ¶mName, ¶mValueSize, ¶mValue, ¶mValueSizeRet); cl_int retVal = CL_INVALID_DEVICE; API_ENTER(&retVal); DBG_LOG_INPUTS("clDevice", device, "paramName", paramName, "paramValueSize", paramValueSize, "paramValue", NEO::FileLoggerInstance().infoPointerToString(paramValue, paramValueSize), "paramValueSizeRet", paramValueSizeRet); ClDevice *pDevice = castToObject(device); if (pDevice != nullptr) { retVal = pDevice->getDeviceInfo(paramName, paramValueSize, paramValue, paramValueSizeRet); } TRACING_EXIT(clGetDeviceInfo, &retVal); return retVal; } cl_int CL_API_CALL clCreateSubDevices(cl_device_id inDevice, const cl_device_partition_property *properties, cl_uint numDevices, cl_device_id *outDevices, cl_uint *numDevicesRet) { ClDevice *pInDevice = castToObject(inDevice); if (pInDevice == nullptr) { return CL_INVALID_DEVICE; } auto subDevicesCount = pInDevice->getNumSubDevices(); if (subDevicesCount <= 1) { return CL_DEVICE_PARTITION_FAILED; } if ((properties == nullptr) || (properties[0] != CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN) || ((properties[1] != CL_DEVICE_AFFINITY_DOMAIN_NUMA) && (properties[1] != CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE)) || (properties[2] != 0)) { return CL_INVALID_VALUE; } if (numDevicesRet != nullptr) { *numDevicesRet = subDevicesCount; } if (outDevices == nullptr) { return CL_SUCCESS; } if (numDevices < subDevicesCount) { return CL_INVALID_VALUE; } for (uint32_t i = 0; i < subDevicesCount; i++) { auto pClDevice = pInDevice->getSubDevice(i); pClDevice->retainApi(); outDevices[i] = pClDevice; } return CL_SUCCESS; } cl_int CL_API_CALL clRetainDevice(cl_device_id device) { TRACING_ENTER(clRetainDevice, &device); cl_int retVal = CL_INVALID_DEVICE; API_ENTER(&retVal); DBG_LOG_INPUTS("device", device); auto pDevice = castToObject(device); if (pDevice) { pDevice->retainApi(); retVal = CL_SUCCESS; } TRACING_EXIT(clRetainDevice, &retVal); return retVal; } cl_int CL_API_CALL clReleaseDevice(cl_device_id device) { TRACING_ENTER(clReleaseDevice, &device); cl_int retVal = CL_INVALID_DEVICE; API_ENTER(&retVal); DBG_LOG_INPUTS("device", device); auto pDevice = castToObject(device); if (pDevice) { pDevice->releaseApi(); retVal = CL_SUCCESS; } TRACING_EXIT(clReleaseDevice, &retVal); return retVal; } cl_context CL_API_CALL clCreateContext(const cl_context_properties *properties, cl_uint numDevices, const cl_device_id *devices, void(CL_CALLBACK *funcNotify)(const char *, const void *, size_t, void *), void *userData, cl_int *errcodeRet) { TRACING_ENTER(clCreateContext, &properties, &numDevices, &devices, &funcNotify, &userData, &errcodeRet); cl_int retVal = CL_SUCCESS; cl_context context = nullptr; API_ENTER(&retVal); DBG_LOG_INPUTS("properties", properties, "numDevices", numDevices, "cl_device_id", devices, "funcNotify", funcNotify, "userData", userData); do { if (devices == nullptr) { /* Must have device. */ retVal = CL_INVALID_VALUE; break; } /* validateObjects make sure numDevices != 0. */ retVal = validateObjects(DeviceList(numDevices, devices)); if (retVal != CL_SUCCESS) break; if (funcNotify == nullptr && userData != nullptr) { retVal = CL_INVALID_VALUE; break; } auto pPlatform = Context::getPlatformFromProperties(properties, retVal); if (CL_SUCCESS != retVal) { break; } ClDeviceVector allDevs(devices, numDevices); if (!pPlatform) { pPlatform = allDevs[0]->getPlatform(); } for (auto &pClDevice : allDevs) { if (pClDevice->getPlatform() != pPlatform) { retVal = CL_INVALID_DEVICE; break; } } if (CL_SUCCESS != retVal) { break; } context = Context::create(properties, allDevs, funcNotify, userData, retVal); } while (false); if (errcodeRet) { *errcodeRet = retVal; } TRACING_EXIT(clCreateContext, &context); return context; } cl_context CL_API_CALL clCreateContextFromType(const cl_context_properties *properties, cl_device_type deviceType, void(CL_CALLBACK *funcNotify)(const char *, const void *, size_t, void *), void *userData, cl_int *errcodeRet) { TRACING_ENTER(clCreateContextFromType, &properties, &deviceType, &funcNotify, &userData, &errcodeRet); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("properties", properties, "deviceType", deviceType, "funcNotify", funcNotify, "userData", userData); Context *pContext = nullptr; do { if (funcNotify == nullptr && userData != nullptr) { retVal = CL_INVALID_VALUE; break; } auto pPlatform = Context::getPlatformFromProperties(properties, retVal); if (CL_SUCCESS != retVal) { break; } cl_uint numDevices = 0; /* Query the number of device first. */ retVal = clGetDeviceIDs(pPlatform, deviceType, 0, nullptr, &numDevices); if (retVal != CL_SUCCESS) { break; } DEBUG_BREAK_IF(numDevices <= 0); cl_device_id device = nullptr; retVal = clGetDeviceIDs(pPlatform, deviceType, 1, &device, nullptr); DEBUG_BREAK_IF(retVal != CL_SUCCESS); ClDeviceVector deviceVector(&device, 1); pContext = Context::create(properties, deviceVector, funcNotify, userData, retVal); } while (false); if (errcodeRet) { *errcodeRet = retVal; } TRACING_EXIT(clCreateContextFromType, (cl_context *)&pContext); return pContext; } cl_int CL_API_CALL clRetainContext(cl_context context) { TRACING_ENTER(clRetainContext, &context); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("context", context); Context *pContext = castToObject(context); if (pContext) { pContext->retain(); TRACING_EXIT(clRetainContext, &retVal); return retVal; } retVal = CL_INVALID_CONTEXT; TRACING_EXIT(clRetainContext, &retVal); return retVal; } cl_int CL_API_CALL clReleaseContext(cl_context context) { TRACING_ENTER(clReleaseContext, &context); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("context", context); Context *pContext = castToObject(context); if (pContext) { pContext->release(); TRACING_EXIT(clReleaseContext, &retVal); return retVal; } retVal = CL_INVALID_CONTEXT; TRACING_EXIT(clReleaseContext, &retVal); return retVal; } cl_int CL_API_CALL clGetContextInfo(cl_context context, cl_context_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { TRACING_ENTER(clGetContextInfo, &context, ¶mName, ¶mValueSize, ¶mValue, ¶mValueSizeRet); auto retVal = CL_INVALID_CONTEXT; API_ENTER(&retVal); DBG_LOG_INPUTS("context", context, "paramName", paramName, "paramValueSize", paramValueSize, "paramValue", NEO::FileLoggerInstance().infoPointerToString(paramValue, paramValueSize), "paramValueSizeRet", paramValueSizeRet); auto pContext = castToObject(context); if (pContext) { retVal = pContext->getInfo(paramName, paramValueSize, paramValue, paramValueSizeRet); } TRACING_EXIT(clGetContextInfo, &retVal); return retVal; } cl_command_queue CL_API_CALL clCreateCommandQueue(cl_context context, cl_device_id device, const cl_command_queue_properties properties, cl_int *errcodeRet) { TRACING_ENTER(clCreateCommandQueue, &context, &device, (cl_command_queue_properties *)&properties, &errcodeRet); cl_command_queue commandQueue = nullptr; ErrorCodeHelper err(errcodeRet, CL_SUCCESS); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("context", context, "device", device, "properties", properties); do { if (properties & ~(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE)) { retVal = CL_INVALID_VALUE; break; } Context *pContext = nullptr; ClDevice *pDevice = nullptr; retVal = validateObjects( WithCastToInternal(context, &pContext), WithCastToInternal(device, &pDevice)); if (retVal != CL_SUCCESS) { break; } if (!pContext->isDeviceAssociated(*pDevice)) { retVal = CL_INVALID_DEVICE; break; } cl_queue_properties props[] = { CL_QUEUE_PROPERTIES, properties, 0}; commandQueue = CommandQueue::create(pContext, pDevice, props, false, retVal); if (pContext->isProvidingPerformanceHints()) { pContext->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, DRIVER_CALLS_INTERNAL_CL_FLUSH); if (castToObjectOrAbort(commandQueue)->isProfilingEnabled()) { pContext->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, PROFILING_ENABLED); if (pDevice->getDeviceInfo().preemptionSupported && pDevice->getHardwareInfo().platform.eProductFamily < IGFX_SKYLAKE) { pContext->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, PROFILING_ENABLED_WITH_DISABLED_PREEMPTION); } } } } while (false); err.set(retVal); DBG_LOG_INPUTS("commandQueue", commandQueue); TRACING_EXIT(clCreateCommandQueue, &commandQueue); return commandQueue; } cl_int CL_API_CALL clRetainCommandQueue(cl_command_queue commandQueue) { TRACING_ENTER(clRetainCommandQueue, &commandQueue); cl_int retVal = CL_INVALID_COMMAND_QUEUE; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue); retainQueue(commandQueue, retVal); if (retVal == CL_SUCCESS) { TRACING_EXIT(clRetainCommandQueue, &retVal); return retVal; } TRACING_EXIT(clRetainCommandQueue, &retVal); return retVal; } cl_int CL_API_CALL clReleaseCommandQueue(cl_command_queue commandQueue) { TRACING_ENTER(clReleaseCommandQueue, &commandQueue); cl_int retVal = CL_INVALID_COMMAND_QUEUE; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue); releaseQueue(commandQueue, retVal); if (retVal == CL_SUCCESS) { TRACING_EXIT(clReleaseCommandQueue, &retVal); return retVal; } TRACING_EXIT(clReleaseCommandQueue, &retVal); return retVal; } cl_int CL_API_CALL clGetCommandQueueInfo(cl_command_queue commandQueue, cl_command_queue_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { TRACING_ENTER(clGetCommandQueueInfo, &commandQueue, ¶mName, ¶mValueSize, ¶mValue, ¶mValueSizeRet); cl_int retVal = CL_INVALID_COMMAND_QUEUE; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "paramName", paramName, "paramValueSize", paramValueSize, "paramValue", NEO::FileLoggerInstance().infoPointerToString(paramValue, paramValueSize), "paramValueSizeRet", paramValueSizeRet); getQueueInfo(commandQueue, paramName, paramValueSize, paramValue, paramValueSizeRet, retVal); // if host queue not found - try to query device queue if (retVal == CL_SUCCESS) { TRACING_EXIT(clGetCommandQueueInfo, &retVal); return retVal; } TRACING_EXIT(clGetCommandQueueInfo, &retVal); return retVal; } // deprecated OpenCL 1.0 cl_int CL_API_CALL clSetCommandQueueProperty(cl_command_queue commandQueue, cl_command_queue_properties properties, cl_bool enable, cl_command_queue_properties *oldProperties) { TRACING_ENTER(clSetCommandQueueProperty, &commandQueue, &properties, &enable, &oldProperties); cl_int retVal = CL_INVALID_VALUE; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "properties", properties, "enable", enable, "oldProperties", oldProperties); TRACING_EXIT(clSetCommandQueueProperty, &retVal); return retVal; } cl_mem CL_API_CALL clCreateBuffer(cl_context context, cl_mem_flags flags, size_t size, void *hostPtr, cl_int *errcodeRet) { if (DebugManager.flags.ForceExtendedBufferSize.get() >= 1) { size += (MemoryConstants::pageSize * DebugManager.flags.ForceExtendedBufferSize.get()); } TRACING_ENTER(clCreateBuffer, &context, &flags, &size, &hostPtr, &errcodeRet); DBG_LOG_INPUTS("cl_context", context, "cl_mem_flags", flags, "size", size, "hostPtr", NEO::FileLoggerInstance().infoPointerToString(hostPtr, size)); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); cl_mem_properties *properties = nullptr; cl_mem_flags_intel flagsIntel = 0; cl_mem buffer = BufferFunctions::validateInputAndCreateBuffer(context, properties, flags, flagsIntel, size, hostPtr, retVal); ErrorCodeHelper{errcodeRet, retVal}; DBG_LOG_INPUTS("buffer", buffer); TRACING_EXIT(clCreateBuffer, &buffer); return buffer; } cl_mem CL_API_CALL clCreateBufferWithProperties(cl_context context, const cl_mem_properties *properties, cl_mem_flags flags, size_t size, void *hostPtr, cl_int *errcodeRet) { if (DebugManager.flags.ForceExtendedBufferSize.get() >= 1) { size += (MemoryConstants::pageSize * DebugManager.flags.ForceExtendedBufferSize.get()); } DBG_LOG_INPUTS("cl_context", context, "cl_mem_properties", properties, "cl_mem_flags", flags, "size", size, "hostPtr", NEO::FileLoggerInstance().infoPointerToString(hostPtr, size)); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); cl_mem_flags_intel flagsIntel = 0; cl_mem buffer = BufferFunctions::validateInputAndCreateBuffer(context, properties, flags, flagsIntel, size, hostPtr, retVal); ErrorCodeHelper{errcodeRet, retVal}; DBG_LOG_INPUTS("buffer", buffer); return buffer; } cl_mem CL_API_CALL clCreateBufferWithPropertiesINTEL(cl_context context, const cl_mem_properties_intel *properties, cl_mem_flags flags, size_t size, void *hostPtr, cl_int *errcodeRet) { if (DebugManager.flags.ForceExtendedBufferSize.get() >= 1) { size += (MemoryConstants::pageSize * DebugManager.flags.ForceExtendedBufferSize.get()); } DBG_LOG_INPUTS("cl_context", context, "cl_mem_properties_intel", properties, "cl_mem_flags", flags, "size", size, "hostPtr", NEO::FileLoggerInstance().infoPointerToString(hostPtr, size)); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); cl_mem_flags_intel flagsIntel = 0; cl_mem buffer = BufferFunctions::validateInputAndCreateBuffer(context, properties, flags, flagsIntel, size, hostPtr, retVal); ErrorCodeHelper{errcodeRet, retVal}; DBG_LOG_INPUTS("buffer", buffer); return buffer; } cl_mem CL_API_CALL clCreateSubBuffer(cl_mem buffer, cl_mem_flags flags, cl_buffer_create_type bufferCreateType, const void *bufferCreateInfo, cl_int *errcodeRet) { TRACING_ENTER(clCreateSubBuffer, &buffer, &flags, &bufferCreateType, &bufferCreateInfo, &errcodeRet); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("buffer", buffer, "flags", flags, "bufferCreateType", bufferCreateType, "bufferCreateInfo", bufferCreateInfo); cl_mem subBuffer = nullptr; Buffer *parentBuffer = castToObject(buffer); do { if (parentBuffer == nullptr) { retVal = CL_INVALID_MEM_OBJECT; break; } /* Are there some invalid flag bits? */ if (!MemObjHelper::checkMemFlagsForSubBuffer(flags)) { retVal = CL_INVALID_VALUE; break; } cl_mem_flags parentFlags = parentBuffer->getFlags(); cl_mem_flags_intel parentFlagsIntel = parentBuffer->getFlagsIntel(); if (parentBuffer->isSubBuffer() == true) { retVal = CL_INVALID_MEM_OBJECT; break; } /* Check whether flag is valid. */ if (((flags & CL_MEM_HOST_READ_ONLY) && (flags & CL_MEM_HOST_NO_ACCESS)) || ((flags & CL_MEM_HOST_READ_ONLY) && (flags & CL_MEM_HOST_WRITE_ONLY)) || ((flags & CL_MEM_HOST_WRITE_ONLY) && (flags & CL_MEM_HOST_NO_ACCESS))) { retVal = CL_INVALID_VALUE; break; } /* Check whether flag is valid and compatible with parent. */ if (flags && (((parentFlags & CL_MEM_WRITE_ONLY) && (flags & (CL_MEM_READ_WRITE | CL_MEM_READ_ONLY))) || ((parentFlags & CL_MEM_READ_ONLY) && (flags & (CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY))) || ((parentFlags & CL_MEM_HOST_WRITE_ONLY) && (flags & CL_MEM_HOST_READ_ONLY)) || ((parentFlags & CL_MEM_HOST_READ_ONLY) && (flags & CL_MEM_HOST_WRITE_ONLY)) || ((parentFlags & CL_MEM_HOST_NO_ACCESS) && (flags & (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY))))) { retVal = CL_INVALID_VALUE; break; } /* Inherit some flags if we do not set. */ if ((flags & (CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_READ_WRITE)) == 0) { flags |= parentFlags & (CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_READ_WRITE); } if ((flags & (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)) == 0) { flags |= parentFlags & (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS); } flags |= parentFlags & (CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR); if (bufferCreateType != CL_BUFFER_CREATE_TYPE_REGION) { retVal = CL_INVALID_VALUE; break; } if (bufferCreateInfo == nullptr) { retVal = CL_INVALID_VALUE; break; } /* Must have non-zero size. */ const cl_buffer_region *region = reinterpret_cast(bufferCreateInfo); if (region->size == 0) { retVal = CL_INVALID_BUFFER_SIZE; break; } /* Out of range. */ if (region->origin > parentBuffer->getSize() || region->origin + region->size > parentBuffer->getSize()) { retVal = CL_INVALID_VALUE; break; } if (!parentBuffer->isValidSubBufferOffset(region->origin)) { retVal = CL_MISALIGNED_SUB_BUFFER_OFFSET; break; } subBuffer = parentBuffer->createSubBuffer(flags, parentFlagsIntel, region, retVal); } while (false); if (errcodeRet) { *errcodeRet = retVal; } TRACING_EXIT(clCreateSubBuffer, &subBuffer); return subBuffer; } cl_mem CL_API_CALL clCreateImage(cl_context context, cl_mem_flags flags, const cl_image_format *imageFormat, const cl_image_desc *imageDesc, void *hostPtr, cl_int *errcodeRet) { TRACING_ENTER(clCreateImage, &context, &flags, &imageFormat, &imageDesc, &hostPtr, &errcodeRet); DBG_LOG_INPUTS("cl_context", context, "cl_mem_flags", flags, "cl_image_format.channel_data_type", imageFormat->image_channel_data_type, "cl_image_format.channel_order", imageFormat->image_channel_order, "cl_image_desc.width", imageDesc->image_width, "cl_image_desc.heigth", imageDesc->image_height, "cl_image_desc.depth", imageDesc->image_depth, "cl_image_desc.type", imageDesc->image_type, "cl_image_desc.array_size", imageDesc->image_array_size, "hostPtr", hostPtr); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); cl_mem_properties *properties = nullptr; cl_mem_flags_intel flagsIntel = 0; retVal = Image::checkIfDeviceSupportsImages(context); cl_mem image = nullptr; if (retVal == CL_SUCCESS) { image = ImageFunctions::validateAndCreateImage(context, properties, flags, flagsIntel, imageFormat, imageDesc, hostPtr, retVal); } ErrorCodeHelper{errcodeRet, retVal}; DBG_LOG_INPUTS("image", image); TRACING_EXIT(clCreateImage, &image); return image; } cl_mem CL_API_CALL clCreateImageWithProperties(cl_context context, const cl_mem_properties *properties, cl_mem_flags flags, const cl_image_format *imageFormat, const cl_image_desc *imageDesc, void *hostPtr, cl_int *errcodeRet) { DBG_LOG_INPUTS("cl_context", context, "cl_mem_properties", properties, "cl_mem_flags", flags, "cl_image_format.channel_data_type", imageFormat->image_channel_data_type, "cl_image_format.channel_order", imageFormat->image_channel_order, "cl_image_desc.width", imageDesc->image_width, "cl_image_desc.heigth", imageDesc->image_height, "cl_image_desc.depth", imageDesc->image_depth, "cl_image_desc.type", imageDesc->image_type, "cl_image_desc.array_size", imageDesc->image_array_size, "hostPtr", hostPtr); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); cl_mem_flags_intel flagsIntel = 0; retVal = Image::checkIfDeviceSupportsImages(context); cl_mem image = nullptr; if (retVal == CL_SUCCESS) { image = ImageFunctions::validateAndCreateImage(context, properties, flags, flagsIntel, imageFormat, imageDesc, hostPtr, retVal); } ErrorCodeHelper{errcodeRet, retVal}; DBG_LOG_INPUTS("image", image); return image; } cl_mem CL_API_CALL clCreateImageWithPropertiesINTEL(cl_context context, const cl_mem_properties_intel *properties, cl_mem_flags flags, const cl_image_format *imageFormat, const cl_image_desc *imageDesc, void *hostPtr, cl_int *errcodeRet) { DBG_LOG_INPUTS("cl_context", context, "cl_mem_properties_intel", properties, "cl_mem_flags", flags, "cl_image_format.channel_data_type", imageFormat->image_channel_data_type, "cl_image_format.channel_order", imageFormat->image_channel_order, "cl_image_desc.width", imageDesc->image_width, "cl_image_desc.heigth", imageDesc->image_height, "cl_image_desc.depth", imageDesc->image_depth, "cl_image_desc.type", imageDesc->image_type, "cl_image_desc.array_size", imageDesc->image_array_size, "hostPtr", hostPtr); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); cl_mem_flags_intel flagsIntel = 0; cl_mem image = ImageFunctions::validateAndCreateImage(context, properties, flags, flagsIntel, imageFormat, imageDesc, hostPtr, retVal); ErrorCodeHelper{errcodeRet, retVal}; DBG_LOG_INPUTS("image", image); return image; } // deprecated OpenCL 1.1 cl_mem CL_API_CALL clCreateImage2D(cl_context context, cl_mem_flags flags, const cl_image_format *imageFormat, size_t imageWidth, size_t imageHeight, size_t imageRowPitch, void *hostPtr, cl_int *errcodeRet) { TRACING_ENTER(clCreateImage2D, &context, &flags, &imageFormat, &imageWidth, &imageHeight, &imageRowPitch, &hostPtr, &errcodeRet); DBG_LOG_INPUTS("context", context, "flags", flags, "imageFormat", imageFormat, "imageWidth", imageWidth, "imageHeight", imageHeight, "imageRowPitch", imageRowPitch, "hostPtr", hostPtr); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); cl_image_desc imageDesc; memset(&imageDesc, 0, sizeof(cl_image_desc)); imageDesc.image_height = imageHeight; imageDesc.image_width = imageWidth; imageDesc.image_row_pitch = imageRowPitch; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; cl_mem_properties *properties = nullptr; cl_mem_flags_intel flagsIntel = 0; retVal = Image::checkIfDeviceSupportsImages(context); cl_mem image2D = nullptr; if (retVal == CL_SUCCESS) { image2D = ImageFunctions::validateAndCreateImage(context, properties, flags, flagsIntel, imageFormat, &imageDesc, hostPtr, retVal); } ErrorCodeHelper{errcodeRet, retVal}; DBG_LOG_INPUTS("image 2D", image2D); TRACING_EXIT(clCreateImage2D, &image2D); return image2D; } // deprecated OpenCL 1.1 cl_mem CL_API_CALL clCreateImage3D(cl_context context, cl_mem_flags flags, const cl_image_format *imageFormat, size_t imageWidth, size_t imageHeight, size_t imageDepth, size_t imageRowPitch, size_t imageSlicePitch, void *hostPtr, cl_int *errcodeRet) { TRACING_ENTER(clCreateImage3D, &context, &flags, &imageFormat, &imageWidth, &imageHeight, &imageDepth, &imageRowPitch, &imageSlicePitch, &hostPtr, &errcodeRet); DBG_LOG_INPUTS("context", context, "flags", flags, "imageFormat", imageFormat, "imageWidth", imageWidth, "imageHeight", imageHeight, "imageDepth", imageDepth, "imageRowPitch", imageRowPitch, "imageSlicePitch", imageSlicePitch, "hostPtr", hostPtr); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); cl_image_desc imageDesc; memset(&imageDesc, 0, sizeof(cl_image_desc)); imageDesc.image_depth = imageDepth; imageDesc.image_height = imageHeight; imageDesc.image_width = imageWidth; imageDesc.image_row_pitch = imageRowPitch; imageDesc.image_slice_pitch = imageSlicePitch; imageDesc.image_type = CL_MEM_OBJECT_IMAGE3D; cl_mem_properties *properties = nullptr; cl_mem_flags_intel intelFlags = 0; retVal = Image::checkIfDeviceSupportsImages(context); cl_mem image3D = nullptr; if (retVal == CL_SUCCESS) { image3D = ImageFunctions::validateAndCreateImage(context, properties, flags, intelFlags, imageFormat, &imageDesc, hostPtr, retVal); } ErrorCodeHelper{errcodeRet, retVal}; DBG_LOG_INPUTS("image 3D", image3D); TRACING_EXIT(clCreateImage3D, &image3D); return image3D; } cl_int CL_API_CALL clRetainMemObject(cl_mem memobj) { TRACING_ENTER(clRetainMemObject, &memobj); cl_int retVal = CL_INVALID_MEM_OBJECT; API_ENTER(&retVal); DBG_LOG_INPUTS("memobj", memobj); auto pMemObj = castToObject(memobj); if (pMemObj) { pMemObj->retain(); retVal = CL_SUCCESS; TRACING_EXIT(clRetainMemObject, &retVal); return retVal; } TRACING_EXIT(clRetainMemObject, &retVal); return retVal; } cl_int CL_API_CALL clReleaseMemObject(cl_mem memobj) { TRACING_ENTER(clReleaseMemObject, &memobj); cl_int retVal = CL_INVALID_MEM_OBJECT; API_ENTER(&retVal); DBG_LOG_INPUTS("memobj", memobj); auto pMemObj = castToObject(memobj); if (pMemObj) { pMemObj->release(); retVal = CL_SUCCESS; TRACING_EXIT(clReleaseMemObject, &retVal); return retVal; } TRACING_EXIT(clReleaseMemObject, &retVal); return retVal; } cl_int CL_API_CALL clGetSupportedImageFormats(cl_context context, cl_mem_flags flags, cl_mem_object_type imageType, cl_uint numEntries, cl_image_format *imageFormats, cl_uint *numImageFormats) { TRACING_ENTER(clGetSupportedImageFormats, &context, &flags, &imageType, &numEntries, &imageFormats, &numImageFormats); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("context", context, "flags", flags, "imageType", imageType, "numEntries", numEntries, "imageFormats", imageFormats, "numImageFormats", numImageFormats); auto pContext = castToObject(context); if (pContext) { auto pClDevice = pContext->getDevice(0); if (pClDevice->getHardwareInfo().capabilityTable.supportsImages) { retVal = pContext->getSupportedImageFormats(&pClDevice->getDevice(), flags, imageType, numEntries, imageFormats, numImageFormats); } else { if (numImageFormats) { *numImageFormats = 0u; } retVal = CL_SUCCESS; } } else { retVal = CL_INVALID_CONTEXT; } TRACING_EXIT(clGetSupportedImageFormats, &retVal); return retVal; } cl_int CL_API_CALL clGetMemObjectInfo(cl_mem memobj, cl_mem_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { TRACING_ENTER(clGetMemObjectInfo, &memobj, ¶mName, ¶mValueSize, ¶mValue, ¶mValueSizeRet); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("memobj", memobj, "paramName", paramName, "paramValueSize", paramValueSize, "paramValue", NEO::FileLoggerInstance().infoPointerToString(paramValue, paramValueSize), "paramValueSizeRet", paramValueSizeRet); MemObj *pMemObj = nullptr; retVal = validateObjects(WithCastToInternal(memobj, &pMemObj)); if (CL_SUCCESS != retVal) { TRACING_EXIT(clGetMemObjectInfo, &retVal); return retVal; } retVal = pMemObj->getMemObjectInfo(paramName, paramValueSize, paramValue, paramValueSizeRet); TRACING_EXIT(clGetMemObjectInfo, &retVal); return retVal; } cl_int CL_API_CALL clGetImageInfo(cl_mem image, cl_image_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { TRACING_ENTER(clGetImageInfo, &image, ¶mName, ¶mValueSize, ¶mValue, ¶mValueSizeRet); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("image", image, "paramName", paramName, "paramValueSize", paramValueSize, "paramValue", NEO::FileLoggerInstance().infoPointerToString(paramValue, paramValueSize), "paramValueSizeRet", paramValueSizeRet); retVal = validateObjects(image); if (CL_SUCCESS != retVal) { TRACING_EXIT(clGetImageInfo, &retVal); return retVal; } auto pImgObj = castToObject(image); if (pImgObj == nullptr) { retVal = CL_INVALID_MEM_OBJECT; TRACING_EXIT(clGetImageInfo, &retVal); return retVal; } retVal = pImgObj->getImageInfo(paramName, paramValueSize, paramValue, paramValueSizeRet); TRACING_EXIT(clGetImageInfo, &retVal); return retVal; } cl_int CL_API_CALL clGetImageParamsINTEL(cl_context context, const cl_image_format *imageFormat, const cl_image_desc *imageDesc, size_t *imageRowPitch, size_t *imageSlicePitch) { cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("context", context, "imageFormat", imageFormat, "imageDesc", imageDesc, "imageRowPitch", imageRowPitch, "imageSlicePitch", imageSlicePitch); const ClSurfaceFormatInfo *surfaceFormat = nullptr; cl_mem_flags memFlags = CL_MEM_READ_ONLY; retVal = validateObjects(context); auto pContext = castToObject(context); if (CL_SUCCESS == retVal) { if ((imageFormat == nullptr) || (imageDesc == nullptr) || (imageRowPitch == nullptr) || (imageSlicePitch == nullptr)) { retVal = CL_INVALID_VALUE; } } if (CL_SUCCESS == retVal) { retVal = Image::validateImageFormat(imageFormat); } if (CL_SUCCESS == retVal) { auto pClDevice = pContext->getDevice(0); surfaceFormat = Image::getSurfaceFormatFromTable(memFlags, imageFormat, pClDevice->getHardwareInfo().capabilityTable.supportsOcl21Features); retVal = Image::validate(pContext, ClMemoryPropertiesHelper::createMemoryProperties(memFlags, 0, 0, &pClDevice->getDevice()), surfaceFormat, imageDesc, nullptr); } if (CL_SUCCESS == retVal) { retVal = Image::getImageParams(pContext, memFlags, surfaceFormat, imageDesc, imageRowPitch, imageSlicePitch); } return retVal; } cl_int CL_API_CALL clSetMemObjectDestructorCallback(cl_mem memobj, void(CL_CALLBACK *funcNotify)(cl_mem, void *), void *userData) { TRACING_ENTER(clSetMemObjectDestructorCallback, &memobj, &funcNotify, &userData); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("memobj", memobj, "funcNotify", funcNotify, "userData", userData); retVal = validateObjects(memobj, (void *)funcNotify); if (CL_SUCCESS != retVal) { TRACING_EXIT(clSetMemObjectDestructorCallback, &retVal); return retVal; } auto pMemObj = castToObject(memobj); retVal = pMemObj->setDestructorCallback(funcNotify, userData); TRACING_EXIT(clSetMemObjectDestructorCallback, &retVal); return retVal; } cl_sampler CL_API_CALL clCreateSampler(cl_context context, cl_bool normalizedCoords, cl_addressing_mode addressingMode, cl_filter_mode filterMode, cl_int *errcodeRet) { TRACING_ENTER(clCreateSampler, &context, &normalizedCoords, &addressingMode, &filterMode, &errcodeRet); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("context", context, "normalizedCoords", normalizedCoords, "addressingMode", addressingMode, "filterMode", filterMode); retVal = validateObjects(context); cl_sampler sampler = nullptr; if (retVal == CL_SUCCESS) { auto pContext = castToObject(context); sampler = Sampler::create( pContext, normalizedCoords, addressingMode, filterMode, CL_FILTER_NEAREST, 0.0f, std::numeric_limits::max(), retVal); } if (errcodeRet) { *errcodeRet = retVal; } TRACING_EXIT(clCreateSampler, &sampler); return sampler; } cl_int CL_API_CALL clRetainSampler(cl_sampler sampler) { TRACING_ENTER(clRetainSampler, &sampler); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("sampler", sampler); auto pSampler = castToObject(sampler); if (pSampler) { pSampler->retain(); TRACING_EXIT(clRetainSampler, &retVal); return retVal; } retVal = CL_INVALID_SAMPLER; TRACING_EXIT(clRetainSampler, &retVal); return retVal; } cl_int CL_API_CALL clReleaseSampler(cl_sampler sampler) { TRACING_ENTER(clReleaseSampler, &sampler); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("sampler", sampler); auto pSampler = castToObject(sampler); if (pSampler) { pSampler->release(); TRACING_EXIT(clReleaseSampler, &retVal); return retVal; } retVal = CL_INVALID_SAMPLER; TRACING_EXIT(clReleaseSampler, &retVal); return retVal; } cl_int CL_API_CALL clGetSamplerInfo(cl_sampler sampler, cl_sampler_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { TRACING_ENTER(clGetSamplerInfo, &sampler, ¶mName, ¶mValueSize, ¶mValue, ¶mValueSizeRet); cl_int retVal = CL_INVALID_SAMPLER; API_ENTER(&retVal); DBG_LOG_INPUTS("sampler", sampler, "paramName", paramName, "paramValueSize", paramValueSize, "paramValue", NEO::FileLoggerInstance().infoPointerToString(paramValue, paramValueSize), "paramValueSizeRet", paramValueSizeRet); auto pSampler = castToObject(sampler); if (pSampler) { retVal = pSampler->getInfo(paramName, paramValueSize, paramValue, paramValueSizeRet); } TRACING_EXIT(clGetSamplerInfo, &retVal); return retVal; } cl_program CL_API_CALL clCreateProgramWithSource(cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_int *errcodeRet) { TRACING_ENTER(clCreateProgramWithSource, &context, &count, &strings, &lengths, &errcodeRet); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("context", context, "count", count, "strings", strings, "lengths", lengths); Context *pContext = nullptr; retVal = validateObjects(WithCastToInternal(context, &pContext), count, strings); cl_program program = nullptr; if (CL_SUCCESS == retVal) { program = Program::create( pContext, count, strings, lengths, retVal); } if (errcodeRet) { *errcodeRet = retVal; } TRACING_EXIT(clCreateProgramWithSource, &program); return program; } cl_program CL_API_CALL clCreateProgramWithBinary(cl_context context, cl_uint numDevices, const cl_device_id *deviceList, const size_t *lengths, const unsigned char **binaries, cl_int *binaryStatus, cl_int *errcodeRet) { TRACING_ENTER(clCreateProgramWithBinary, &context, &numDevices, &deviceList, &lengths, &binaries, &binaryStatus, &errcodeRet); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("context", context, "numDevices", numDevices, "deviceList", deviceList, "lengths", lengths, "binaries", binaries, "binaryStatus", binaryStatus); Context *pContext = nullptr; retVal = validateObjects(WithCastToInternal(context, &pContext), deviceList, numDevices, binaries, lengths); cl_program program = nullptr; ClDeviceVector deviceVector; if (retVal == CL_SUCCESS) { for (auto i = 0u; i < numDevices; i++) { auto device = castToObject(deviceList[i]); if (!device || !pContext->isDeviceAssociated(*device)) { retVal = CL_INVALID_DEVICE; break; } if (lengths[i] == 0 || binaries[i] == nullptr) { retVal = CL_INVALID_VALUE; break; } deviceVector.push_back(device); } } NEO::FileLoggerInstance().dumpBinaryProgram(numDevices, lengths, binaries); if (CL_SUCCESS == retVal) { program = Program::create( pContext, deviceVector, lengths, binaries, binaryStatus, retVal); } if (errcodeRet) { *errcodeRet = retVal; } TRACING_EXIT(clCreateProgramWithBinary, &program); return program; } cl_program CL_API_CALL clCreateProgramWithIL(cl_context context, const void *il, size_t length, cl_int *errcodeRet) { TRACING_ENTER(clCreateProgramWithIL, &context, &il, &length, &errcodeRet); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("context", context, "il", il, "length", length); cl_program program = nullptr; Context *pContext = nullptr; retVal = validateObjects(WithCastToInternal(context, &pContext), il); if (retVal == CL_SUCCESS) { program = ProgramFunctions::createFromIL( pContext, il, length, retVal); } if (errcodeRet != nullptr) { *errcodeRet = retVal; } TRACING_EXIT(clCreateProgramWithIL, &program); return program; } cl_program CL_API_CALL clCreateProgramWithBuiltInKernels(cl_context context, cl_uint numDevices, const cl_device_id *deviceList, const char *kernelNames, cl_int *errcodeRet) { TRACING_ENTER(clCreateProgramWithBuiltInKernels, &context, &numDevices, &deviceList, &kernelNames, &errcodeRet); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("context", context, "numDevices", numDevices, "deviceList", deviceList, "kernelNames", kernelNames); cl_program program = nullptr; Context *pContext = nullptr; retVal = validateObjects(WithCastToInternal(context, &pContext), numDevices, deviceList, kernelNames, errcodeRet); if (retVal == CL_SUCCESS) { ClDeviceVector deviceVector; for (auto i = 0u; i < numDevices; i++) { auto device = castToObject(deviceList[i]); if (!device || !pContext->isDeviceAssociated(*device)) { retVal = CL_INVALID_DEVICE; break; } deviceVector.push_back(device); } if (retVal == CL_SUCCESS) { program = Vme::createBuiltInProgram( *pContext, deviceVector, kernelNames, retVal); } } if (errcodeRet) { *errcodeRet = retVal; } TRACING_EXIT(clCreateProgramWithBuiltInKernels, &program); return program; } cl_int CL_API_CALL clRetainProgram(cl_program program) { TRACING_ENTER(clRetainProgram, &program); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("program", program); auto pProgram = castToObject(program); if (pProgram) { pProgram->retain(); TRACING_EXIT(clRetainProgram, &retVal); return retVal; } retVal = CL_INVALID_PROGRAM; TRACING_EXIT(clRetainProgram, &retVal); return retVal; } cl_int CL_API_CALL clReleaseProgram(cl_program program) { TRACING_ENTER(clReleaseProgram, &program); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("program", program); auto pProgram = castToObject(program); if (pProgram) { pProgram->release(); TRACING_EXIT(clReleaseProgram, &retVal); return retVal; } retVal = CL_INVALID_PROGRAM; TRACING_EXIT(clReleaseProgram, &retVal); return retVal; } cl_int CL_API_CALL clBuildProgram(cl_program program, cl_uint numDevices, const cl_device_id *deviceList, const char *options, void(CL_CALLBACK *funcNotify)(cl_program program, void *userData), void *userData) { TRACING_ENTER(clBuildProgram, &program, &numDevices, &deviceList, &options, &funcNotify, &userData); cl_int retVal = CL_INVALID_PROGRAM; API_ENTER(&retVal); DBG_LOG_INPUTS("clProgram", program, "numDevices", numDevices, "cl_device_id", deviceList, "options", (options != nullptr) ? options : "", "funcNotify", funcNotify, "userData", userData); Program *pProgram = nullptr; retVal = validateObjects(WithCastToInternal(program, &pProgram), Program::isValidCallback(funcNotify, userData)); if (CL_SUCCESS == retVal) { if (pProgram->isLocked()) { retVal = CL_INVALID_OPERATION; } } ClDeviceVector deviceVector; ClDeviceVector *deviceVectorPtr = &deviceVector; if (CL_SUCCESS == retVal) { retVal = Program::processInputDevices(deviceVectorPtr, numDevices, deviceList, pProgram->getDevices()); } if (CL_SUCCESS == retVal) { retVal = pProgram->build(*deviceVectorPtr, options, clCacheEnabled); pProgram->invokeCallback(funcNotify, userData); } TRACING_EXIT(clBuildProgram, &retVal); return retVal; } cl_int CL_API_CALL clCompileProgram(cl_program program, cl_uint numDevices, const cl_device_id *deviceList, const char *options, cl_uint numInputHeaders, const cl_program *inputHeaders, const char **headerIncludeNames, void(CL_CALLBACK *funcNotify)(cl_program program, void *userData), void *userData) { TRACING_ENTER(clCompileProgram, &program, &numDevices, &deviceList, &options, &numInputHeaders, &inputHeaders, &headerIncludeNames, &funcNotify, &userData); cl_int retVal = CL_INVALID_PROGRAM; API_ENTER(&retVal); DBG_LOG_INPUTS("clProgram", program, "numDevices", numDevices, "cl_device_id", deviceList, "options", (options != nullptr) ? options : "", "numInputHeaders", numInputHeaders); Program *pProgram = nullptr; retVal = validateObjects(WithCastToInternal(program, &pProgram), Program::isValidCallback(funcNotify, userData)); if (CL_SUCCESS == retVal) { if (pProgram->isLocked()) { retVal = CL_INVALID_OPERATION; } } ClDeviceVector deviceVector; ClDeviceVector *deviceVectorPtr = &deviceVector; if (CL_SUCCESS == retVal) { retVal = Program::processInputDevices(deviceVectorPtr, numDevices, deviceList, pProgram->getDevices()); } if (CL_SUCCESS == retVal) { retVal = pProgram->compile(*deviceVectorPtr, options, numInputHeaders, inputHeaders, headerIncludeNames); pProgram->invokeCallback(funcNotify, userData); } TRACING_EXIT(clCompileProgram, &retVal); return retVal; } cl_program CL_API_CALL clLinkProgram(cl_context context, cl_uint numDevices, const cl_device_id *deviceList, const char *options, cl_uint numInputPrograms, const cl_program *inputPrograms, void(CL_CALLBACK *funcNotify)(cl_program program, void *userData), void *userData, cl_int *errcodeRet) { TRACING_ENTER(clLinkProgram, &context, &numDevices, &deviceList, &options, &numInputPrograms, &inputPrograms, &funcNotify, &userData, &errcodeRet); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("cl_context", context, "numDevices", numDevices, "cl_device_id", deviceList, "options", (options != nullptr) ? options : "", "numInputPrograms", numInputPrograms); ErrorCodeHelper err(errcodeRet, CL_SUCCESS); Context *pContext = nullptr; Program *pProgram = nullptr; retVal = validateObjects(WithCastToInternal(context, &pContext), Program::isValidCallback(funcNotify, userData)); ClDeviceVector deviceVector; ClDeviceVector *deviceVectorPtr = &deviceVector; if (CL_SUCCESS == retVal) { retVal = Program::processInputDevices(deviceVectorPtr, numDevices, deviceList, pContext->getDevices()); } if (CL_SUCCESS == retVal) { pProgram = new Program(pContext, false, *deviceVectorPtr); retVal = pProgram->link(*deviceVectorPtr, options, numInputPrograms, inputPrograms); pProgram->invokeCallback(funcNotify, userData); } err.set(retVal); TRACING_EXIT(clLinkProgram, (cl_program *)&pProgram); return pProgram; } cl_int CL_API_CALL clUnloadPlatformCompiler(cl_platform_id platform) { TRACING_ENTER(clUnloadPlatformCompiler, &platform); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("platform", platform); retVal = validateObject(platform); TRACING_EXIT(clUnloadPlatformCompiler, &retVal); return retVal; } cl_int CL_API_CALL clGetProgramInfo(cl_program program, cl_program_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { TRACING_ENTER(clGetProgramInfo, &program, ¶mName, ¶mValueSize, ¶mValue, ¶mValueSizeRet); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("clProgram", program, "paramName", paramName, "paramValueSize", paramValueSize, "paramValue", NEO::FileLoggerInstance().infoPointerToString(paramValue, paramValueSize), "paramValueSizeRet", paramValueSizeRet); retVal = validateObjects(program); if (CL_SUCCESS == retVal) { Program *pProgram = (Program *)(program); retVal = pProgram->getInfo( paramName, paramValueSize, paramValue, paramValueSizeRet); } TRACING_EXIT(clGetProgramInfo, &retVal); return retVal; } cl_int CL_API_CALL clGetProgramBuildInfo(cl_program program, cl_device_id device, cl_program_build_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { TRACING_ENTER(clGetProgramBuildInfo, &program, &device, ¶mName, ¶mValueSize, ¶mValue, ¶mValueSizeRet); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("clProgram", program, "cl_device_id", device, "paramName", NEO::FileLoggerInstance().infoPointerToString(paramValue, paramValueSize), "paramValueSize", paramValueSize, "paramValue", paramValue, "paramValueSizeRet", paramValueSizeRet); Program *pProgram = nullptr; ClDevice *pClDevice = nullptr; retVal = validateObjects(WithCastToInternal(program, &pProgram), WithCastToInternal(device, &pClDevice)); if (CL_SUCCESS == retVal) { if (!pProgram->isDeviceAssociated(*pClDevice)) { retVal = CL_INVALID_DEVICE; } } if (CL_SUCCESS == retVal) { retVal = pProgram->getBuildInfo( pClDevice, paramName, paramValueSize, paramValue, paramValueSizeRet); } TRACING_EXIT(clGetProgramBuildInfo, &retVal); return retVal; } cl_kernel CL_API_CALL clCreateKernel(cl_program clProgram, const char *kernelName, cl_int *errcodeRet) { TRACING_ENTER(clCreateKernel, &clProgram, &kernelName, &errcodeRet); API_ENTER(errcodeRet); Program *pProgram = nullptr; cl_kernel kernel = nullptr; cl_int retVal = CL_SUCCESS; DBG_LOG_INPUTS("clProgram", clProgram, "kernelName", kernelName); do { if (!isValidObject(clProgram) || !(pProgram = castToObject(clProgram))) { retVal = CL_INVALID_PROGRAM; break; } if (kernelName == nullptr) { retVal = CL_INVALID_VALUE; break; } if (!pProgram->isBuilt()) { retVal = CL_INVALID_PROGRAM_EXECUTABLE; break; } bool kernelFound = false; KernelInfoContainer kernelInfos; kernelInfos.resize(pProgram->getMaxRootDeviceIndex() + 1); for (const auto &pClDevice : pProgram->getDevicesInProgram()) { auto rootDeviceIndex = pClDevice->getRootDeviceIndex(); auto pKernelInfo = pProgram->getKernelInfo(kernelName, rootDeviceIndex); if (pKernelInfo) { kernelFound = true; kernelInfos[rootDeviceIndex] = pKernelInfo; } } if (!kernelFound) { retVal = CL_INVALID_KERNEL_NAME; break; } kernel = MultiDeviceKernel::create( pProgram, kernelInfos, &retVal); DBG_LOG_INPUTS("kernel", kernel); } while (false); if (errcodeRet) { *errcodeRet = retVal; } gtpinNotifyKernelCreate(kernel); TRACING_EXIT(clCreateKernel, &kernel); return kernel; } cl_int CL_API_CALL clCreateKernelsInProgram(cl_program clProgram, cl_uint numKernels, cl_kernel *kernels, cl_uint *numKernelsRet) { TRACING_ENTER(clCreateKernelsInProgram, &clProgram, &numKernels, &kernels, &numKernelsRet); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("clProgram", clProgram, "numKernels", numKernels, "kernels", kernels, "numKernelsRet", numKernelsRet); auto pProgram = castToObject(clProgram); if (pProgram) { auto numKernelsInProgram = pProgram->getNumKernels(); if (kernels) { if (numKernels < numKernelsInProgram) { retVal = CL_INVALID_VALUE; TRACING_EXIT(clCreateKernelsInProgram, &retVal); return retVal; } for (unsigned int i = 0; i < numKernelsInProgram; ++i) { KernelInfoContainer kernelInfos; kernelInfos.resize(pProgram->getMaxRootDeviceIndex() + 1); for (const auto &pClDevice : pProgram->getDevicesInProgram()) { auto rootDeviceIndex = pClDevice->getRootDeviceIndex(); auto kernelInfo = pProgram->getKernelInfo(i, rootDeviceIndex); DEBUG_BREAK_IF(kernelInfo == nullptr); kernelInfos[rootDeviceIndex] = kernelInfo; } kernels[i] = MultiDeviceKernel::create( pProgram, kernelInfos, nullptr); gtpinNotifyKernelCreate(kernels[i]); } } if (numKernelsRet) { *numKernelsRet = static_cast(numKernelsInProgram); } TRACING_EXIT(clCreateKernelsInProgram, &retVal); return retVal; } retVal = CL_INVALID_PROGRAM; TRACING_EXIT(clCreateKernelsInProgram, &retVal); return retVal; } cl_int CL_API_CALL clRetainKernel(cl_kernel kernel) { TRACING_ENTER(clRetainKernel, &kernel); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("kernel", kernel); auto pMultiDeviceKernel = castToObject(kernel); if (pMultiDeviceKernel) { pMultiDeviceKernel->retain(); TRACING_EXIT(clRetainKernel, &retVal); return retVal; } retVal = CL_INVALID_KERNEL; TRACING_EXIT(clRetainKernel, &retVal); return retVal; } cl_int CL_API_CALL clReleaseKernel(cl_kernel kernel) { TRACING_ENTER(clReleaseKernel, &kernel); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("kernel", kernel); auto pMultiDeviceKernel = castToObject(kernel); if (pMultiDeviceKernel) { pMultiDeviceKernel->release(); TRACING_EXIT(clReleaseKernel, &retVal); return retVal; } retVal = CL_INVALID_KERNEL; TRACING_EXIT(clReleaseKernel, &retVal); return retVal; } cl_int CL_API_CALL clSetKernelArg(cl_kernel kernel, cl_uint argIndex, size_t argSize, const void *argValue) { TRACING_ENTER(clSetKernelArg, &kernel, &argIndex, &argSize, &argValue); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); MultiDeviceKernel *pMultiDeviceKernel = nullptr; retVal = validateObject(WithCastToInternal(kernel, &pMultiDeviceKernel)); DBG_LOG_INPUTS("kernel", kernel, "argIndex", argIndex, "argSize", argSize, "argValue", NEO::FileLoggerInstance().infoPointerToString(argValue, argSize)); do { if (retVal != CL_SUCCESS) { break; } if (pMultiDeviceKernel->getKernelArguments().size() <= argIndex) { retVal = CL_INVALID_ARG_INDEX; break; } retVal = pMultiDeviceKernel->checkCorrectImageAccessQualifier(argIndex, argSize, argValue); if (retVal != CL_SUCCESS) { pMultiDeviceKernel->unsetArg(argIndex); break; } retVal = pMultiDeviceKernel->setArg( argIndex, argSize, argValue); break; } while (false); TRACING_EXIT(clSetKernelArg, &retVal); return retVal; } cl_int CL_API_CALL clGetKernelInfo(cl_kernel kernel, cl_kernel_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { TRACING_ENTER(clGetKernelInfo, &kernel, ¶mName, ¶mValueSize, ¶mValue, ¶mValueSizeRet); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("kernel", kernel, "paramName", paramName, "paramValueSize", paramValueSize, "paramValue", NEO::FileLoggerInstance().infoPointerToString(paramValue, paramValueSize), "paramValueSizeRet", paramValueSizeRet); MultiDeviceKernel *pMultiDeviceKernel = nullptr; retVal = validateObject(WithCastToInternal(kernel, &pMultiDeviceKernel)); if (retVal == CL_SUCCESS) { retVal = pMultiDeviceKernel->getInfo( paramName, paramValueSize, paramValue, paramValueSizeRet); } TRACING_EXIT(clGetKernelInfo, &retVal); return retVal; } cl_int CL_API_CALL clGetKernelArgInfo(cl_kernel kernel, cl_uint argIndx, cl_kernel_arg_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { TRACING_ENTER(clGetKernelArgInfo, &kernel, &argIndx, ¶mName, ¶mValueSize, ¶mValue, ¶mValueSizeRet); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("kernel", kernel, "argIndx", argIndx, "paramName", paramName, "paramValueSize", paramValueSize, "paramValue", NEO::FileLoggerInstance().infoPointerToString(paramValue, paramValueSize), "paramValueSizeRet", paramValueSizeRet); auto pMultiDeviceKernel = castToObject(kernel); retVal = pMultiDeviceKernel ? pMultiDeviceKernel->getArgInfo( argIndx, paramName, paramValueSize, paramValue, paramValueSizeRet) : CL_INVALID_KERNEL; TRACING_EXIT(clGetKernelArgInfo, &retVal); return retVal; } cl_int CL_API_CALL clGetKernelWorkGroupInfo(cl_kernel kernel, cl_device_id device, cl_kernel_work_group_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { TRACING_ENTER(clGetKernelWorkGroupInfo, &kernel, &device, ¶mName, ¶mValueSize, ¶mValue, ¶mValueSizeRet); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("kernel", kernel, "device", device, "paramName", paramName, "paramValueSize", paramValueSize, "paramValue", NEO::FileLoggerInstance().infoPointerToString(paramValue, paramValueSize), "paramValueSizeRet", paramValueSizeRet); MultiDeviceKernel *pMultiDeviceKernel = nullptr; retVal = validateObjects(WithCastToInternal(kernel, &pMultiDeviceKernel)); ClDevice *pClDevice = nullptr; if (CL_SUCCESS == retVal) { if (pMultiDeviceKernel->getDevices().size() == 1u && !device) { pClDevice = pMultiDeviceKernel->getDevices()[0]; } else { retVal = validateObjects(WithCastToInternal(device, &pClDevice)); } } if (CL_SUCCESS == retVal) { auto pKernel = pMultiDeviceKernel->getKernel(pClDevice->getRootDeviceIndex()); retVal = pKernel->getWorkGroupInfo( paramName, paramValueSize, paramValue, paramValueSizeRet); } TRACING_EXIT(clGetKernelWorkGroupInfo, &retVal); return retVal; } cl_int CL_API_CALL clWaitForEvents(cl_uint numEvents, const cl_event *eventList) { TRACING_ENTER(clWaitForEvents, &numEvents, &eventList); auto retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("eventList", getClFileLogger().getEvents(reinterpret_cast(eventList), numEvents)); for (unsigned int i = 0; i < numEvents && retVal == CL_SUCCESS; i++) retVal = validateObjects(eventList[i]); if (retVal != CL_SUCCESS) { TRACING_EXIT(clWaitForEvents, &retVal); return retVal; } retVal = Event::waitForEvents(numEvents, eventList); TRACING_EXIT(clWaitForEvents, &retVal); return retVal; } cl_int CL_API_CALL clGetEventInfo(cl_event event, cl_event_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { TRACING_ENTER(clGetEventInfo, &event, ¶mName, ¶mValueSize, ¶mValue, ¶mValueSizeRet); auto retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("event", event, "paramName", paramName, "paramValueSize", paramValueSize, "paramValue", NEO::FileLoggerInstance().infoPointerToString(paramValue, paramValueSize), "paramValueSizeRet", paramValueSizeRet); Event *neoEvent = castToObject(event); if (neoEvent == nullptr) { retVal = CL_INVALID_EVENT; TRACING_EXIT(clGetEventInfo, &retVal); return retVal; } GetInfoHelper info(paramValue, paramValueSize, paramValueSizeRet); auto flushEvents = true; switch (paramName) { default: { retVal = CL_INVALID_VALUE; TRACING_EXIT(clGetEventInfo, &retVal); return retVal; } // From OCL spec : // "Return the command-queue associated with event. For user event objects," // a nullptr value is returned." case CL_EVENT_COMMAND_QUEUE: { if (neoEvent->isUserEvent()) { retVal = changeGetInfoStatusToCLResultType(info.set(nullptr)); TRACING_EXIT(clGetEventInfo, &retVal); return retVal; } retVal = changeGetInfoStatusToCLResultType(info.set(neoEvent->getCommandQueue())); TRACING_EXIT(clGetEventInfo, &retVal); return retVal; } case CL_EVENT_CONTEXT: retVal = changeGetInfoStatusToCLResultType(info.set(neoEvent->getContext())); TRACING_EXIT(clGetEventInfo, &retVal); return retVal; case CL_EVENT_COMMAND_TYPE: retVal = changeGetInfoStatusToCLResultType(info.set(neoEvent->getCommandType())); TRACING_EXIT(clGetEventInfo, &retVal); return retVal; case CL_EVENT_COMMAND_EXECUTION_STATUS: if (DebugManager.flags.SkipFlushingEventsOnGetStatusCalls.get()) { flushEvents = false; } if (flushEvents) { neoEvent->tryFlushEvent(); } if (neoEvent->isUserEvent()) { auto executionStatus = neoEvent->peekExecutionStatus(); //Spec requires initial state to be queued //our current design relies heavily on SUBMITTED status which directly corresponds //to command being able to be submitted, to overcome this we set initial status to queued //and we override the value stored with the value required by the spec. if (executionStatus == CL_QUEUED) { executionStatus = CL_SUBMITTED; } retVal = changeGetInfoStatusToCLResultType(info.set(executionStatus)); TRACING_EXIT(clGetEventInfo, &retVal); return retVal; } retVal = changeGetInfoStatusToCLResultType(info.set(neoEvent->updateEventAndReturnCurrentStatus())); TRACING_EXIT(clGetEventInfo, &retVal); return retVal; case CL_EVENT_REFERENCE_COUNT: retVal = changeGetInfoStatusToCLResultType(info.set(neoEvent->getReference())); TRACING_EXIT(clGetEventInfo, &retVal); return retVal; } } cl_event CL_API_CALL clCreateUserEvent(cl_context context, cl_int *errcodeRet) { TRACING_ENTER(clCreateUserEvent, &context, &errcodeRet); API_ENTER(errcodeRet); DBG_LOG_INPUTS("context", context); ErrorCodeHelper err(errcodeRet, CL_SUCCESS); Context *ctx = castToObject(context); if (ctx == nullptr) { err.set(CL_INVALID_CONTEXT); cl_event retVal = nullptr; TRACING_EXIT(clCreateUserEvent, &retVal); return retVal; } Event *userEvent = new UserEvent(ctx); cl_event userClEvent = userEvent; DBG_LOG_INPUTS("cl_event", userClEvent, "UserEvent", userEvent); TRACING_EXIT(clCreateUserEvent, &userClEvent); return userClEvent; } cl_int CL_API_CALL clRetainEvent(cl_event event) { TRACING_ENTER(clRetainEvent, &event); auto retVal = CL_SUCCESS; API_ENTER(&retVal); auto pEvent = castToObject(event); DBG_LOG_INPUTS("cl_event", event, "Event", pEvent); if (pEvent) { pEvent->retain(); TRACING_EXIT(clRetainEvent, &retVal); return retVal; } retVal = CL_INVALID_EVENT; TRACING_EXIT(clRetainEvent, &retVal); return retVal; } cl_int CL_API_CALL clReleaseEvent(cl_event event) { TRACING_ENTER(clReleaseEvent, &event); auto retVal = CL_SUCCESS; API_ENTER(&retVal); auto pEvent = castToObject(event); DBG_LOG_INPUTS("cl_event", event, "Event", pEvent); if (pEvent) { pEvent->release(); TRACING_EXIT(clReleaseEvent, &retVal); return retVal; } retVal = CL_INVALID_EVENT; TRACING_EXIT(clReleaseEvent, &retVal); return retVal; } cl_int CL_API_CALL clSetUserEventStatus(cl_event event, cl_int executionStatus) { TRACING_ENTER(clSetUserEventStatus, &event, &executionStatus); auto retVal = CL_SUCCESS; API_ENTER(&retVal); auto userEvent = castToObject(event); DBG_LOG_INPUTS("cl_event", event, "executionStatus", executionStatus, "UserEvent", userEvent); if (userEvent == nullptr) { retVal = CL_INVALID_EVENT; TRACING_EXIT(clSetUserEventStatus, &retVal); return retVal; } if (executionStatus > CL_COMPLETE) { retVal = CL_INVALID_VALUE; TRACING_EXIT(clSetUserEventStatus, &retVal); return retVal; } if (!userEvent->isInitialEventStatus()) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clSetUserEventStatus, &retVal); return retVal; } userEvent->setStatus(executionStatus); TRACING_EXIT(clSetUserEventStatus, &retVal); return retVal; } cl_int CL_API_CALL clSetEventCallback(cl_event event, cl_int commandExecCallbackType, void(CL_CALLBACK *funcNotify)(cl_event, cl_int, void *), void *userData) { TRACING_ENTER(clSetEventCallback, &event, &commandExecCallbackType, &funcNotify, &userData); auto retVal = CL_SUCCESS; API_ENTER(&retVal); auto eventObject = castToObject(event); DBG_LOG_INPUTS("cl_event", event, "commandExecCallbackType", commandExecCallbackType, "Event", eventObject); if (eventObject == nullptr) { retVal = CL_INVALID_EVENT; TRACING_EXIT(clSetEventCallback, &retVal); return retVal; } switch (commandExecCallbackType) { case CL_COMPLETE: case CL_SUBMITTED: case CL_RUNNING: break; default: { retVal = CL_INVALID_VALUE; TRACING_EXIT(clSetEventCallback, &retVal); return retVal; } } if (funcNotify == nullptr) { retVal = CL_INVALID_VALUE; TRACING_EXIT(clSetEventCallback, &retVal); return retVal; } eventObject->tryFlushEvent(); eventObject->addCallback(funcNotify, commandExecCallbackType, userData); TRACING_EXIT(clSetEventCallback, &retVal); return retVal; } cl_int CL_API_CALL clGetEventProfilingInfo(cl_event event, cl_profiling_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { TRACING_ENTER(clGetEventProfilingInfo, &event, ¶mName, ¶mValueSize, ¶mValue, ¶mValueSizeRet); auto retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("event", event, "paramName", paramName, "paramValueSize", paramValueSize, "paramValue", NEO::FileLoggerInstance().infoPointerToString(paramValue, paramValueSize), "paramValueSizeRet", paramValueSizeRet); auto eventObject = castToObject(event); if (eventObject == nullptr) { retVal = CL_INVALID_EVENT; TRACING_EXIT(clGetEventProfilingInfo, &retVal); return retVal; } retVal = eventObject->getEventProfilingInfo(paramName, paramValueSize, paramValue, paramValueSizeRet); TRACING_EXIT(clGetEventProfilingInfo, &retVal); return retVal; } cl_int CL_API_CALL clFlush(cl_command_queue commandQueue) { TRACING_ENTER(clFlush, &commandQueue); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue); auto pCommandQueue = castToObject(commandQueue); retVal = pCommandQueue ? pCommandQueue->flush() : CL_INVALID_COMMAND_QUEUE; TRACING_EXIT(clFlush, &retVal); return retVal; } cl_int CL_API_CALL clFinish(cl_command_queue commandQueue) { TRACING_ENTER(clFinish, &commandQueue); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue); auto pCommandQueue = castToObject(commandQueue); retVal = pCommandQueue ? pCommandQueue->finish() : CL_INVALID_COMMAND_QUEUE; TRACING_EXIT(clFinish, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueReadBuffer(cl_command_queue commandQueue, cl_mem buffer, cl_bool blockingRead, size_t offset, size_t cb, void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueReadBuffer, &commandQueue, &buffer, &blockingRead, &offset, &cb, &ptr, &numEventsInWaitList, &eventWaitList, &event); CommandQueue *pCommandQueue = nullptr; Buffer *pBuffer = nullptr; auto retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), WithCastToInternal(buffer, &pBuffer), ptr); API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "buffer", buffer, "blockingRead", blockingRead, "offset", offset, "cb", cb, "ptr", ptr, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", getClFileLogger().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", getClFileLogger().getEvents(reinterpret_cast(event), 1)); if (CL_SUCCESS == retVal) { if (pBuffer->readMemObjFlagsInvalid()) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueReadBuffer, &retVal); return retVal; } if (!pCommandQueue->validateCapabilityForOperation(CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL, numEventsInWaitList, eventWaitList, event)) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueReadBuffer, &retVal); return retVal; } retVal = pCommandQueue->enqueueReadBuffer( pBuffer, blockingRead, offset, cb, ptr, nullptr, numEventsInWaitList, eventWaitList, event); } DBG_LOG_INPUTS("event", getClFileLogger().getEvents(reinterpret_cast(event), 1u)); TRACING_EXIT(clEnqueueReadBuffer, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueReadBufferRect(cl_command_queue commandQueue, cl_mem buffer, cl_bool blockingRead, const size_t *bufferOrigin, const size_t *hostOrigin, const size_t *region, size_t bufferRowPitch, size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch, void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueReadBufferRect, &commandQueue, &buffer, &blockingRead, &bufferOrigin, &hostOrigin, ®ion, &bufferRowPitch, &bufferSlicePitch, &hostRowPitch, &hostSlicePitch, &ptr, &numEventsInWaitList, &eventWaitList, &event); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "buffer", buffer, "blockingRead", blockingRead, "bufferOrigin[0]", NEO::FileLoggerInstance().getInput(bufferOrigin, 0), "bufferOrigin[1]", NEO::FileLoggerInstance().getInput(bufferOrigin, 1), "bufferOrigin[2]", NEO::FileLoggerInstance().getInput(bufferOrigin, 2), "hostOrigin[0]", NEO::FileLoggerInstance().getInput(hostOrigin, 0), "hostOrigin[1]", NEO::FileLoggerInstance().getInput(hostOrigin, 1), "hostOrigin[2]", NEO::FileLoggerInstance().getInput(hostOrigin, 2), "region[0]", NEO::FileLoggerInstance().getInput(region, 0), "region[1]", NEO::FileLoggerInstance().getInput(region, 1), "region[2]", NEO::FileLoggerInstance().getInput(region, 2), "bufferRowPitch", bufferRowPitch, "bufferSlicePitch", bufferSlicePitch, "hostRowPitch", hostRowPitch, "hostSlicePitch", hostSlicePitch, "ptr", ptr, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", getClFileLogger().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", getClFileLogger().getEvents(reinterpret_cast(event), 1)); CommandQueue *pCommandQueue = nullptr; Buffer *pBuffer = nullptr; retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), WithCastToInternal(buffer, &pBuffer), ptr); if (CL_SUCCESS != retVal) { TRACING_EXIT(clEnqueueReadBufferRect, &retVal); return retVal; } if (pBuffer->readMemObjFlagsInvalid()) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueReadBufferRect, &retVal); return retVal; } if (pBuffer->bufferRectPitchSet(bufferOrigin, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, true) == false) { retVal = CL_INVALID_VALUE; TRACING_EXIT(clEnqueueReadBufferRect, &retVal); return retVal; } if (!pCommandQueue->validateCapabilityForOperation(CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_RECT_INTEL, numEventsInWaitList, eventWaitList, event)) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueReadBufferRect, &retVal); return retVal; } retVal = pCommandQueue->enqueueReadBufferRect( pBuffer, blockingRead, bufferOrigin, hostOrigin, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, ptr, numEventsInWaitList, eventWaitList, event); TRACING_EXIT(clEnqueueReadBufferRect, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueWriteBuffer(cl_command_queue commandQueue, cl_mem buffer, cl_bool blockingWrite, size_t offset, size_t cb, const void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueWriteBuffer, &commandQueue, &buffer, &blockingWrite, &offset, &cb, &ptr, &numEventsInWaitList, &eventWaitList, &event); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "buffer", buffer, "blockingWrite", blockingWrite, "offset", offset, "cb", cb, "ptr", ptr, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", getClFileLogger().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", getClFileLogger().getEvents(reinterpret_cast(event), 1)); CommandQueue *pCommandQueue = nullptr; Buffer *pBuffer = nullptr; retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), WithCastToInternal(buffer, &pBuffer), ptr); if (CL_SUCCESS == retVal) { if (pBuffer->writeMemObjFlagsInvalid()) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueWriteBuffer, &retVal); return retVal; } if (!pCommandQueue->validateCapabilityForOperation(CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL, numEventsInWaitList, eventWaitList, event)) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueWriteBuffer, &retVal); return retVal; } retVal = pCommandQueue->enqueueWriteBuffer( pBuffer, blockingWrite, offset, cb, ptr, nullptr, numEventsInWaitList, eventWaitList, event); } DBG_LOG_INPUTS("event", getClFileLogger().getEvents(reinterpret_cast(event), 1u)); TRACING_EXIT(clEnqueueWriteBuffer, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueWriteBufferRect(cl_command_queue commandQueue, cl_mem buffer, cl_bool blockingWrite, const size_t *bufferOrigin, const size_t *hostOrigin, const size_t *region, size_t bufferRowPitch, size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch, const void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueWriteBufferRect, &commandQueue, &buffer, &blockingWrite, &bufferOrigin, &hostOrigin, ®ion, &bufferRowPitch, &bufferSlicePitch, &hostRowPitch, &hostSlicePitch, &ptr, &numEventsInWaitList, &eventWaitList, &event); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "buffer", buffer, "blockingWrite", blockingWrite, "bufferOrigin[0]", NEO::FileLoggerInstance().getInput(bufferOrigin, 0), "bufferOrigin[1]", NEO::FileLoggerInstance().getInput(bufferOrigin, 1), "bufferOrigin[2]", NEO::FileLoggerInstance().getInput(bufferOrigin, 2), "hostOrigin[0]", NEO::FileLoggerInstance().getInput(hostOrigin, 0), "hostOrigin[1]", NEO::FileLoggerInstance().getInput(hostOrigin, 1), "hostOrigin[2]", NEO::FileLoggerInstance().getInput(hostOrigin, 2), "region[0]", NEO::FileLoggerInstance().getInput(region, 0), "region[1]", NEO::FileLoggerInstance().getInput(region, 1), "region[2]", NEO::FileLoggerInstance().getInput(region, 2), "bufferRowPitch", bufferRowPitch, "bufferSlicePitch", bufferSlicePitch, "hostRowPitch", hostRowPitch, "hostSlicePitch", hostSlicePitch, "ptr", ptr, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", getClFileLogger().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", getClFileLogger().getEvents(reinterpret_cast(event), 1)); CommandQueue *pCommandQueue = nullptr; Buffer *pBuffer = nullptr; retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), WithCastToInternal(buffer, &pBuffer), ptr); if (CL_SUCCESS != retVal) { TRACING_EXIT(clEnqueueWriteBufferRect, &retVal); return retVal; } if (pBuffer->writeMemObjFlagsInvalid()) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueWriteBufferRect, &retVal); return retVal; } if (pBuffer->bufferRectPitchSet(bufferOrigin, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, true) == false) { retVal = CL_INVALID_VALUE; TRACING_EXIT(clEnqueueWriteBufferRect, &retVal); return retVal; } if (!pCommandQueue->validateCapabilityForOperation(CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_RECT_INTEL, numEventsInWaitList, eventWaitList, event)) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueWriteBufferRect, &retVal); return retVal; } retVal = pCommandQueue->enqueueWriteBufferRect( pBuffer, blockingWrite, bufferOrigin, hostOrigin, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, ptr, numEventsInWaitList, eventWaitList, event); TRACING_EXIT(clEnqueueWriteBufferRect, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueFillBuffer(cl_command_queue commandQueue, cl_mem buffer, const void *pattern, size_t patternSize, size_t offset, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueFillBuffer, &commandQueue, &buffer, &pattern, &patternSize, &offset, &size, &numEventsInWaitList, &eventWaitList, &event); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "buffer", buffer, "pattern", NEO::FileLoggerInstance().infoPointerToString(pattern, patternSize), "patternSize", patternSize, "offset", offset, "size", size, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", getClFileLogger().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", getClFileLogger().getEvents(reinterpret_cast(event), 1)); CommandQueue *pCommandQueue = nullptr; Buffer *pBuffer = nullptr; retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), WithCastToInternal(buffer, &pBuffer), pattern, (PatternSize)patternSize, EventWaitList(numEventsInWaitList, eventWaitList)); if (CL_SUCCESS == retVal) { if (!pCommandQueue->validateCapabilityForOperation(CL_QUEUE_CAPABILITY_FILL_BUFFER_INTEL, numEventsInWaitList, eventWaitList, event)) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueFillBuffer, &retVal); return retVal; } retVal = pCommandQueue->enqueueFillBuffer( pBuffer, pattern, patternSize, offset, size, numEventsInWaitList, eventWaitList, event); } TRACING_EXIT(clEnqueueFillBuffer, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueCopyBuffer(cl_command_queue commandQueue, cl_mem srcBuffer, cl_mem dstBuffer, size_t srcOffset, size_t dstOffset, size_t cb, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueCopyBuffer, &commandQueue, &srcBuffer, &dstBuffer, &srcOffset, &dstOffset, &cb, &numEventsInWaitList, &eventWaitList, &event); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "srcBuffer", srcBuffer, "dstBuffer", dstBuffer, "srcOffset", srcOffset, "dstOffset", dstOffset, "cb", cb, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", getClFileLogger().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", getClFileLogger().getEvents(reinterpret_cast(event), 1)); CommandQueue *pCommandQueue = nullptr; Buffer *pSrcBuffer = nullptr; Buffer *pDstBuffer = nullptr; retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), WithCastToInternal(srcBuffer, &pSrcBuffer), WithCastToInternal(dstBuffer, &pDstBuffer)); if (CL_SUCCESS == retVal) { size_t srcSize = pSrcBuffer->getSize(); size_t dstSize = pDstBuffer->getSize(); if (srcOffset + cb > srcSize || dstOffset + cb > dstSize) { retVal = CL_INVALID_VALUE; TRACING_EXIT(clEnqueueCopyBuffer, &retVal); return retVal; } if (!pCommandQueue->validateCapabilityForOperation(CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL, numEventsInWaitList, eventWaitList, event)) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueCopyBuffer, &retVal); return retVal; } retVal = pCommandQueue->enqueueCopyBuffer( pSrcBuffer, pDstBuffer, srcOffset, dstOffset, cb, numEventsInWaitList, eventWaitList, event); } DBG_LOG_INPUTS("event", getClFileLogger().getEvents(reinterpret_cast(event), 1u)); TRACING_EXIT(clEnqueueCopyBuffer, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueCopyBufferRect(cl_command_queue commandQueue, cl_mem srcBuffer, cl_mem dstBuffer, const size_t *srcOrigin, const size_t *dstOrigin, const size_t *region, size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueCopyBufferRect, &commandQueue, &srcBuffer, &dstBuffer, &srcOrigin, &dstOrigin, ®ion, &srcRowPitch, &srcSlicePitch, &dstRowPitch, &dstSlicePitch, &numEventsInWaitList, &eventWaitList, &event); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "srcBuffer", srcBuffer, "dstBuffer", dstBuffer, "srcOrigin[0]", NEO::FileLoggerInstance().getInput(srcOrigin, 0), "srcOrigin[1]", NEO::FileLoggerInstance().getInput(srcOrigin, 1), "srcOrigin[2]", NEO::FileLoggerInstance().getInput(srcOrigin, 2), "dstOrigin[0]", NEO::FileLoggerInstance().getInput(dstOrigin, 0), "dstOrigin[1]", NEO::FileLoggerInstance().getInput(dstOrigin, 1), "dstOrigin[2]", NEO::FileLoggerInstance().getInput(dstOrigin, 2), "region[0]", NEO::FileLoggerInstance().getInput(region, 0), "region[1]", NEO::FileLoggerInstance().getInput(region, 1), "region[2]", NEO::FileLoggerInstance().getInput(region, 2), "srcRowPitch", srcRowPitch, "srcSlicePitch", srcSlicePitch, "dstRowPitch", dstRowPitch, "dstSlicePitch", dstSlicePitch, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", getClFileLogger().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", getClFileLogger().getEvents(reinterpret_cast(event), 1)); CommandQueue *pCommandQueue = nullptr; Buffer *pSrcBuffer = nullptr; Buffer *pDstBuffer = nullptr; retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), WithCastToInternal(srcBuffer, &pSrcBuffer), WithCastToInternal(dstBuffer, &pDstBuffer)); if (CL_SUCCESS == retVal) { if (!pSrcBuffer->bufferRectPitchSet(srcOrigin, region, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, true) || !pDstBuffer->bufferRectPitchSet(dstOrigin, region, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, false)) { retVal = CL_INVALID_VALUE; TRACING_EXIT(clEnqueueCopyBufferRect, &retVal); return retVal; } if (!pCommandQueue->validateCapabilityForOperation(CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_RECT_INTEL, numEventsInWaitList, eventWaitList, event)) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueCopyBufferRect, &retVal); return retVal; } retVal = pCommandQueue->enqueueCopyBufferRect( pSrcBuffer, pDstBuffer, srcOrigin, dstOrigin, region, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, numEventsInWaitList, eventWaitList, event); } DBG_LOG_INPUTS("event", getClFileLogger().getEvents(reinterpret_cast(event), 1u)); TRACING_EXIT(clEnqueueCopyBufferRect, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueReadImage(cl_command_queue commandQueue, cl_mem image, cl_bool blockingRead, const size_t *origin, const size_t *region, size_t rowPitch, size_t slicePitch, void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueReadImage, &commandQueue, &image, &blockingRead, &origin, ®ion, &rowPitch, &slicePitch, &ptr, &numEventsInWaitList, &eventWaitList, &event); CommandQueue *pCommandQueue = nullptr; Image *pImage = nullptr; auto retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), WithCastToInternal(image, &pImage)); API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "image", image, "blockingRead", blockingRead, "origin[0]", NEO::FileLoggerInstance().getInput(origin, 0), "origin[1]", NEO::FileLoggerInstance().getInput(origin, 1), "origin[2]", NEO::FileLoggerInstance().getInput(origin, 2), "region[0]", NEO::FileLoggerInstance().getInput(region, 0), "region[1]", NEO::FileLoggerInstance().getInput(region, 1), "region[2]", NEO::FileLoggerInstance().getInput(region, 2), "rowPitch", rowPitch, "slicePitch", slicePitch, "ptr", ptr, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", getClFileLogger().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", getClFileLogger().getEvents(reinterpret_cast(event), 1)); if (CL_SUCCESS == retVal) { if (pImage->readMemObjFlagsInvalid()) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueReadImage, &retVal); return retVal; } if (isPackedYuvImage(&pImage->getImageFormat())) { retVal = validateYuvOperation(origin, region); if (retVal != CL_SUCCESS) { TRACING_EXIT(clEnqueueReadImage, &retVal); return retVal; } } retVal = Image::validateRegionAndOrigin(origin, region, pImage->getImageDesc()); if (retVal != CL_SUCCESS) { TRACING_EXIT(clEnqueueReadImage, &retVal); return retVal; } if (!pCommandQueue->validateCapabilityForOperation(CL_QUEUE_CAPABILITY_TRANSFER_IMAGE_INTEL, numEventsInWaitList, eventWaitList, event)) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueReadImage, &retVal); return retVal; } retVal = pCommandQueue->enqueueReadImage( pImage, blockingRead, origin, region, rowPitch, slicePitch, ptr, nullptr, numEventsInWaitList, eventWaitList, event); } DBG_LOG_INPUTS("event", getClFileLogger().getEvents(reinterpret_cast(event), 1u)); TRACING_EXIT(clEnqueueReadImage, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueWriteImage(cl_command_queue commandQueue, cl_mem image, cl_bool blockingWrite, const size_t *origin, const size_t *region, size_t inputRowPitch, size_t inputSlicePitch, const void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueWriteImage, &commandQueue, &image, &blockingWrite, &origin, ®ion, &inputRowPitch, &inputSlicePitch, &ptr, &numEventsInWaitList, &eventWaitList, &event); CommandQueue *pCommandQueue = nullptr; Image *pImage = nullptr; auto retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), WithCastToInternal(image, &pImage)); API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "image", image, "blockingWrite", blockingWrite, "origin[0]", NEO::FileLoggerInstance().getInput(origin, 0), "origin[1]", NEO::FileLoggerInstance().getInput(origin, 1), "origin[2]", NEO::FileLoggerInstance().getInput(origin, 2), "region[0]", NEO::FileLoggerInstance().getInput(region, 0), "region[1]", NEO::FileLoggerInstance().getInput(region, 1), "region[2]", NEO::FileLoggerInstance().getInput(region, 2), "inputRowPitch", inputRowPitch, "inputSlicePitch", inputSlicePitch, "ptr", ptr, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", getClFileLogger().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", getClFileLogger().getEvents(reinterpret_cast(event), 1)); if (CL_SUCCESS == retVal) { if (pImage->writeMemObjFlagsInvalid()) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueWriteImage, &retVal); return retVal; } if (isPackedYuvImage(&pImage->getImageFormat())) { retVal = validateYuvOperation(origin, region); if (retVal != CL_SUCCESS) { TRACING_EXIT(clEnqueueWriteImage, &retVal); return retVal; } } retVal = Image::validateRegionAndOrigin(origin, region, pImage->getImageDesc()); if (retVal != CL_SUCCESS) { TRACING_EXIT(clEnqueueWriteImage, &retVal); return retVal; } if (!pCommandQueue->validateCapabilityForOperation(CL_QUEUE_CAPABILITY_TRANSFER_IMAGE_INTEL, numEventsInWaitList, eventWaitList, event)) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueWriteImage, &retVal); return retVal; } retVal = pCommandQueue->enqueueWriteImage( pImage, blockingWrite, origin, region, inputRowPitch, inputSlicePitch, ptr, nullptr, numEventsInWaitList, eventWaitList, event); } DBG_LOG_INPUTS("event", getClFileLogger().getEvents(reinterpret_cast(event), 1u)); TRACING_EXIT(clEnqueueWriteImage, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueFillImage(cl_command_queue commandQueue, cl_mem image, const void *fillColor, const size_t *origin, const size_t *region, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueFillImage, &commandQueue, &image, &fillColor, &origin, ®ion, &numEventsInWaitList, &eventWaitList, &event); CommandQueue *pCommandQueue = nullptr; Image *dstImage = nullptr; auto retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), WithCastToInternal(image, &dstImage), fillColor, EventWaitList(numEventsInWaitList, eventWaitList)); API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "image", image, "fillColor", fillColor, "origin[0]", NEO::FileLoggerInstance().getInput(origin, 0), "origin[1]", NEO::FileLoggerInstance().getInput(origin, 1), "origin[2]", NEO::FileLoggerInstance().getInput(origin, 2), "region[0]", NEO::FileLoggerInstance().getInput(region, 0), "region[1]", NEO::FileLoggerInstance().getInput(region, 1), "region[2]", NEO::FileLoggerInstance().getInput(region, 2), "numEventsInWaitList", numEventsInWaitList, "eventWaitList", getClFileLogger().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", getClFileLogger().getEvents(reinterpret_cast(event), 1)); if (CL_SUCCESS == retVal) { retVal = Image::validateRegionAndOrigin(origin, region, dstImage->getImageDesc()); if (retVal != CL_SUCCESS) { TRACING_EXIT(clEnqueueFillImage, &retVal); return retVal; } if (!pCommandQueue->validateCapabilityForOperation(CL_QUEUE_CAPABILITY_FILL_IMAGE_INTEL, numEventsInWaitList, eventWaitList, event)) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueFillImage, &retVal); return retVal; } retVal = pCommandQueue->enqueueFillImage( dstImage, fillColor, origin, region, numEventsInWaitList, eventWaitList, event); } DBG_LOG_INPUTS("event", getClFileLogger().getEvents(reinterpret_cast(event), 1u)); TRACING_EXIT(clEnqueueFillImage, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueCopyImage(cl_command_queue commandQueue, cl_mem srcImage, cl_mem dstImage, const size_t *srcOrigin, const size_t *dstOrigin, const size_t *region, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueCopyImage, &commandQueue, &srcImage, &dstImage, &srcOrigin, &dstOrigin, ®ion, &numEventsInWaitList, &eventWaitList, &event); CommandQueue *pCommandQueue = nullptr; Image *pSrcImage = nullptr; Image *pDstImage = nullptr; auto retVal = validateObjects(WithCastToInternal(commandQueue, &pCommandQueue), WithCastToInternal(srcImage, &pSrcImage), WithCastToInternal(dstImage, &pDstImage)); API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "srcImage", srcImage, "dstImage", dstImage, "srcOrigin[0]", NEO::FileLoggerInstance().getInput(srcOrigin, 0), "srcOrigin[1]", NEO::FileLoggerInstance().getInput(srcOrigin, 1), "srcOrigin[2]", NEO::FileLoggerInstance().getInput(srcOrigin, 2), "dstOrigin[0]", NEO::FileLoggerInstance().getInput(dstOrigin, 0), "dstOrigin[1]", NEO::FileLoggerInstance().getInput(dstOrigin, 1), "dstOrigin[2]", NEO::FileLoggerInstance().getInput(dstOrigin, 2), "region[0]", region ? region[0] : 0, "region[1]", region ? region[1] : 0, "region[2]", region ? region[2] : 0, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", getClFileLogger().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", getClFileLogger().getEvents(reinterpret_cast(event), 1)); if (CL_SUCCESS == retVal) { if (memcmp(&pSrcImage->getImageFormat(), &pDstImage->getImageFormat(), sizeof(cl_image_format))) { retVal = CL_IMAGE_FORMAT_MISMATCH; TRACING_EXIT(clEnqueueCopyImage, &retVal); return retVal; } if (isPackedYuvImage(&pSrcImage->getImageFormat())) { retVal = validateYuvOperation(srcOrigin, region); if (retVal != CL_SUCCESS) { TRACING_EXIT(clEnqueueCopyImage, &retVal); return retVal; } } if (isPackedYuvImage(&pDstImage->getImageFormat())) { retVal = validateYuvOperation(dstOrigin, region); if (retVal != CL_SUCCESS) { TRACING_EXIT(clEnqueueCopyImage, &retVal); return retVal; } if (pDstImage->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE2D && dstOrigin[2] != 0) { retVal = CL_INVALID_VALUE; TRACING_EXIT(clEnqueueCopyImage, &retVal); return retVal; } } retVal = Image::validateRegionAndOrigin(srcOrigin, region, pSrcImage->getImageDesc()); if (retVal != CL_SUCCESS) { TRACING_EXIT(clEnqueueCopyImage, &retVal); return retVal; } retVal = Image::validateRegionAndOrigin(dstOrigin, region, pDstImage->getImageDesc()); if (retVal != CL_SUCCESS) { TRACING_EXIT(clEnqueueCopyImage, &retVal); return retVal; } if (!pCommandQueue->validateCapabilityForOperation(CL_QUEUE_CAPABILITY_TRANSFER_IMAGE_INTEL, numEventsInWaitList, eventWaitList, event)) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueCopyImage, &retVal); return retVal; } retVal = pCommandQueue->enqueueCopyImage( pSrcImage, pDstImage, srcOrigin, dstOrigin, region, numEventsInWaitList, eventWaitList, event); } DBG_LOG_INPUTS("event", getClFileLogger().getEvents(reinterpret_cast(event), 1u)); TRACING_EXIT(clEnqueueCopyImage, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueCopyImageToBuffer(cl_command_queue commandQueue, cl_mem srcImage, cl_mem dstBuffer, const size_t *srcOrigin, const size_t *region, const size_t dstOffset, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueCopyImageToBuffer, &commandQueue, &srcImage, &dstBuffer, &srcOrigin, ®ion, (size_t *)&dstOffset, &numEventsInWaitList, &eventWaitList, &event); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "srcImage", srcImage, "dstBuffer", dstBuffer, "srcOrigin[0]", NEO::FileLoggerInstance().getInput(srcOrigin, 0), "srcOrigin[1]", NEO::FileLoggerInstance().getInput(srcOrigin, 1), "srcOrigin[2]", NEO::FileLoggerInstance().getInput(srcOrigin, 2), "region[0]", NEO::FileLoggerInstance().getInput(region, 0), "region[1]", NEO::FileLoggerInstance().getInput(region, 1), "region[2]", NEO::FileLoggerInstance().getInput(region, 2), "dstOffset", dstOffset, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", getClFileLogger().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", getClFileLogger().getEvents(reinterpret_cast(event), 1)); CommandQueue *pCommandQueue = nullptr; Image *pSrcImage = nullptr; Buffer *pDstBuffer = nullptr; retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), WithCastToInternal(srcImage, &pSrcImage), WithCastToInternal(dstBuffer, &pDstBuffer)); if (CL_SUCCESS == retVal) { if (isPackedYuvImage(&pSrcImage->getImageFormat())) { retVal = validateYuvOperation(srcOrigin, region); if (retVal != CL_SUCCESS) { TRACING_EXIT(clEnqueueCopyImageToBuffer, &retVal); return retVal; } } retVal = Image::validateRegionAndOrigin(srcOrigin, region, pSrcImage->getImageDesc()); if (retVal != CL_SUCCESS) { TRACING_EXIT(clEnqueueCopyImageToBuffer, &retVal); return retVal; } if (!pCommandQueue->validateCapabilityForOperation(CL_QUEUE_CAPABILITY_TRANSFER_IMAGE_BUFFER_INTEL, numEventsInWaitList, eventWaitList, event)) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueCopyImageToBuffer, &retVal); return retVal; } retVal = pCommandQueue->enqueueCopyImageToBuffer( pSrcImage, pDstBuffer, srcOrigin, region, dstOffset, numEventsInWaitList, eventWaitList, event); } DBG_LOG_INPUTS("event", getClFileLogger().getEvents(reinterpret_cast(event), 1u)); TRACING_EXIT(clEnqueueCopyImageToBuffer, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueCopyBufferToImage(cl_command_queue commandQueue, cl_mem srcBuffer, cl_mem dstImage, size_t srcOffset, const size_t *dstOrigin, const size_t *region, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueCopyBufferToImage, &commandQueue, &srcBuffer, &dstImage, &srcOffset, &dstOrigin, ®ion, &numEventsInWaitList, &eventWaitList, &event); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "srcBuffer", srcBuffer, "dstImage", dstImage, "srcOffset", srcOffset, "dstOrigin[0]", NEO::FileLoggerInstance().getInput(dstOrigin, 0), "dstOrigin[1]", NEO::FileLoggerInstance().getInput(dstOrigin, 1), "dstOrigin[2]", NEO::FileLoggerInstance().getInput(dstOrigin, 2), "region[0]", NEO::FileLoggerInstance().getInput(region, 0), "region[1]", NEO::FileLoggerInstance().getInput(region, 1), "region[2]", NEO::FileLoggerInstance().getInput(region, 2), "numEventsInWaitList", numEventsInWaitList, "eventWaitList", getClFileLogger().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", getClFileLogger().getEvents(reinterpret_cast(event), 1)); CommandQueue *pCommandQueue = nullptr; Buffer *pSrcBuffer = nullptr; Image *pDstImage = nullptr; retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), WithCastToInternal(srcBuffer, &pSrcBuffer), WithCastToInternal(dstImage, &pDstImage)); if (CL_SUCCESS == retVal) { if (isPackedYuvImage(&pDstImage->getImageFormat())) { retVal = validateYuvOperation(dstOrigin, region); if (retVal != CL_SUCCESS) { TRACING_EXIT(clEnqueueCopyBufferToImage, &retVal); return retVal; } } retVal = Image::validateRegionAndOrigin(dstOrigin, region, pDstImage->getImageDesc()); if (retVal != CL_SUCCESS) { TRACING_EXIT(clEnqueueCopyBufferToImage, &retVal); return retVal; } if (!pCommandQueue->validateCapabilityForOperation(CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_IMAGE_INTEL, numEventsInWaitList, eventWaitList, event)) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueCopyBufferToImage, &retVal); return retVal; } retVal = pCommandQueue->enqueueCopyBufferToImage( pSrcBuffer, pDstImage, srcOffset, dstOrigin, region, numEventsInWaitList, eventWaitList, event); } DBG_LOG_INPUTS("event", getClFileLogger().getEvents(reinterpret_cast(event), 1u)); TRACING_EXIT(clEnqueueCopyBufferToImage, &retVal); return retVal; } void *CL_API_CALL clEnqueueMapBuffer(cl_command_queue commandQueue, cl_mem buffer, cl_bool blockingMap, cl_map_flags mapFlags, size_t offset, size_t cb, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, cl_int *errcodeRet) { TRACING_ENTER(clEnqueueMapBuffer, &commandQueue, &buffer, &blockingMap, &mapFlags, &offset, &cb, &numEventsInWaitList, &eventWaitList, &event, &errcodeRet); void *retPtr = nullptr; ErrorCodeHelper err(errcodeRet, CL_SUCCESS); cl_int retVal; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "buffer", buffer, "blockingMap", blockingMap, "mapFlags", mapFlags, "offset", offset, "cb", cb, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", getClFileLogger().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", getClFileLogger().getEvents(reinterpret_cast(event), 1)); do { auto pCommandQueue = castToObject(commandQueue); if (!pCommandQueue) { retVal = CL_INVALID_COMMAND_QUEUE; break; } auto pBuffer = castToObject(buffer); if (!pBuffer) { retVal = CL_INVALID_MEM_OBJECT; break; } if (pBuffer->mapMemObjFlagsInvalid(mapFlags)) { retVal = CL_INVALID_OPERATION; break; } if (!pCommandQueue->validateCapabilityForOperation(CL_QUEUE_CAPABILITY_MAP_BUFFER_INTEL, numEventsInWaitList, eventWaitList, event)) { retVal = CL_INVALID_OPERATION; break; } retPtr = pCommandQueue->enqueueMapBuffer( pBuffer, blockingMap, mapFlags, offset, cb, numEventsInWaitList, eventWaitList, event, retVal); } while (false); err.set(retVal); DBG_LOG_INPUTS("retPtr", retPtr, "event", getClFileLogger().getEvents(reinterpret_cast(event), 1u)); TRACING_EXIT(clEnqueueMapBuffer, &retPtr); return retPtr; } void *CL_API_CALL clEnqueueMapImage(cl_command_queue commandQueue, cl_mem image, cl_bool blockingMap, cl_map_flags mapFlags, const size_t *origin, const size_t *region, size_t *imageRowPitch, size_t *imageSlicePitch, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, cl_int *errcodeRet) { TRACING_ENTER(clEnqueueMapImage, &commandQueue, &image, &blockingMap, &mapFlags, &origin, ®ion, &imageRowPitch, &imageSlicePitch, &numEventsInWaitList, &eventWaitList, &event, &errcodeRet); void *retPtr = nullptr; ErrorCodeHelper err(errcodeRet, CL_SUCCESS); cl_int retVal; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "image", image, "blockingMap", blockingMap, "mapFlags", mapFlags, "origin[0]", NEO::FileLoggerInstance().getInput(origin, 0), "origin[1]", NEO::FileLoggerInstance().getInput(origin, 1), "origin[2]", NEO::FileLoggerInstance().getInput(origin, 2), "region[0]", NEO::FileLoggerInstance().getInput(region, 0), "region[1]", NEO::FileLoggerInstance().getInput(region, 1), "region[2]", NEO::FileLoggerInstance().getInput(region, 2), "imageRowPitch", NEO::FileLoggerInstance().getInput(imageRowPitch, 0), "imageSlicePitch", NEO::FileLoggerInstance().getInput(imageSlicePitch, 0), "numEventsInWaitList", numEventsInWaitList, "eventWaitList", getClFileLogger().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", getClFileLogger().getEvents(reinterpret_cast(event), 1)); do { Image *pImage = nullptr; CommandQueue *pCommandQueue = nullptr; retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), WithCastToInternal(image, &pImage)); if (retVal != CL_SUCCESS) { break; } if (pImage->mapMemObjFlagsInvalid(mapFlags)) { retVal = CL_INVALID_OPERATION; break; } if (isPackedYuvImage(&pImage->getImageFormat())) { retVal = validateYuvOperation(origin, region); if (retVal != CL_SUCCESS) { break; } } retVal = Image::validateRegionAndOrigin(origin, region, pImage->getImageDesc()); if (retVal != CL_SUCCESS) { break; } if (!pCommandQueue->validateCapabilityForOperation(CL_QUEUE_CAPABILITY_MAP_IMAGE_INTEL, numEventsInWaitList, eventWaitList, event)) { retVal = CL_INVALID_OPERATION; break; } retPtr = pCommandQueue->enqueueMapImage( pImage, blockingMap, mapFlags, origin, region, imageRowPitch, imageSlicePitch, numEventsInWaitList, eventWaitList, event, retVal); } while (false); err.set(retVal); DBG_LOG_INPUTS("retPtr", retPtr, "event", getClFileLogger().getEvents(reinterpret_cast(event), 1u)); TRACING_EXIT(clEnqueueMapImage, &retPtr); return retPtr; } cl_int CL_API_CALL clEnqueueUnmapMemObject(cl_command_queue commandQueue, cl_mem memObj, void *mappedPtr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueUnmapMemObject, &commandQueue, &memObj, &mappedPtr, &numEventsInWaitList, &eventWaitList, &event); CommandQueue *pCommandQueue = nullptr; MemObj *pMemObj = nullptr; cl_int retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), WithCastToInternal(memObj, &pMemObj)); API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "memObj", memObj, "mappedPtr", mappedPtr, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", getClFileLogger().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", getClFileLogger().getEvents(reinterpret_cast(event), 1)); if (retVal == CL_SUCCESS) { cl_command_queue_capabilities_intel requiredCapability = 0u; switch (pMemObj->peekClMemObjType()) { case CL_MEM_OBJECT_BUFFER: requiredCapability = CL_QUEUE_CAPABILITY_MAP_BUFFER_INTEL; break; case CL_MEM_OBJECT_IMAGE2D: case CL_MEM_OBJECT_IMAGE3D: case CL_MEM_OBJECT_IMAGE2D_ARRAY: case CL_MEM_OBJECT_IMAGE1D: case CL_MEM_OBJECT_IMAGE1D_ARRAY: case CL_MEM_OBJECT_IMAGE1D_BUFFER: requiredCapability = CL_QUEUE_CAPABILITY_MAP_IMAGE_INTEL; break; default: retVal = CL_INVALID_MEM_OBJECT; TRACING_EXIT(clEnqueueUnmapMemObject, &retVal); return retVal; } if (!pCommandQueue->validateCapabilityForOperation(requiredCapability, numEventsInWaitList, eventWaitList, event)) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueUnmapMemObject, &retVal); return retVal; } retVal = pCommandQueue->enqueueUnmapMemObject(pMemObj, mappedPtr, numEventsInWaitList, eventWaitList, event); } DBG_LOG_INPUTS("event", getClFileLogger().getEvents(reinterpret_cast(event), 1u)); TRACING_EXIT(clEnqueueUnmapMemObject, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueMigrateMemObjects(cl_command_queue commandQueue, cl_uint numMemObjects, const cl_mem *memObjects, cl_mem_migration_flags flags, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueMigrateMemObjects, &commandQueue, &numMemObjects, &memObjects, &flags, &numEventsInWaitList, &eventWaitList, &event); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "numMemObjects", numMemObjects, "memObjects", memObjects, "flags", flags, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", getClFileLogger().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", getClFileLogger().getEvents(reinterpret_cast(event), 1)); CommandQueue *pCommandQueue = nullptr; retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), EventWaitList(numEventsInWaitList, eventWaitList)); if (CL_SUCCESS != retVal) { TRACING_EXIT(clEnqueueMigrateMemObjects, &retVal); return retVal; } if (numMemObjects == 0 || memObjects == nullptr) { retVal = CL_INVALID_VALUE; TRACING_EXIT(clEnqueueMigrateMemObjects, &retVal); return retVal; } for (unsigned int object = 0; object < numMemObjects; object++) { auto memObject = castToObject(memObjects[object]); if (!memObject) { retVal = CL_INVALID_MEM_OBJECT; TRACING_EXIT(clEnqueueMigrateMemObjects, &retVal); return retVal; } } const cl_mem_migration_flags allValidFlags = CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED | CL_MIGRATE_MEM_OBJECT_HOST; if ((flags & (~allValidFlags)) != 0) { retVal = CL_INVALID_VALUE; TRACING_EXIT(clEnqueueMigrateMemObjects, &retVal); return retVal; } retVal = pCommandQueue->enqueueMigrateMemObjects(numMemObjects, memObjects, flags, numEventsInWaitList, eventWaitList, event); DBG_LOG_INPUTS("event", getClFileLogger().getEvents(reinterpret_cast(event), 1u)); TRACING_EXIT(clEnqueueMigrateMemObjects, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueNDRangeKernel(cl_command_queue commandQueue, cl_kernel kernel, cl_uint workDim, const size_t *globalWorkOffset, const size_t *globalWorkSize, const size_t *localWorkSize, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueNDRangeKernel, &commandQueue, &kernel, &workDim, &globalWorkOffset, &globalWorkSize, &localWorkSize, &numEventsInWaitList, &eventWaitList, &event); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "cl_kernel", kernel, "globalWorkOffset[0]", NEO::FileLoggerInstance().getInput(globalWorkOffset, 0), "globalWorkOffset[1]", NEO::FileLoggerInstance().getInput(globalWorkOffset, 1), "globalWorkOffset[2]", NEO::FileLoggerInstance().getInput(globalWorkOffset, 2), "globalWorkSize", NEO::FileLoggerInstance().getSizes(globalWorkSize, workDim, false), "localWorkSize", NEO::FileLoggerInstance().getSizes(localWorkSize, workDim, true), "numEventsInWaitList", numEventsInWaitList, "eventWaitList", getClFileLogger().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", getClFileLogger().getEvents(reinterpret_cast(event), 1)); CommandQueue *pCommandQueue = nullptr; MultiDeviceKernel *pMultiDeviceKernel = nullptr; retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), WithCastToInternal(kernel, &pMultiDeviceKernel), EventWaitList(numEventsInWaitList, eventWaitList)); if (CL_SUCCESS != retVal) { TRACING_EXIT(clEnqueueNDRangeKernel, &retVal); return retVal; } Kernel *pKernel = pMultiDeviceKernel->getKernel(pCommandQueue->getDevice().getRootDeviceIndex()); if ((pKernel->getExecutionType() != KernelExecutionType::Default) || pKernel->usesSyncBuffer()) { retVal = CL_INVALID_KERNEL; TRACING_EXIT(clEnqueueNDRangeKernel, &retVal); return retVal; } if (!pCommandQueue->validateCapabilityForOperation(CL_QUEUE_CAPABILITY_KERNEL_INTEL, numEventsInWaitList, eventWaitList, event)) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueNDRangeKernel, &retVal); return retVal; } TakeOwnershipWrapper kernelOwnership(*pMultiDeviceKernel, gtpinIsGTPinInitialized()); if (gtpinIsGTPinInitialized()) { gtpinNotifyKernelSubmit(kernel, pCommandQueue); } retVal = pCommandQueue->enqueueKernel( pKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); DBG_LOG_INPUTS("event", getClFileLogger().getEvents(reinterpret_cast(event), 1u)); TRACING_EXIT(clEnqueueNDRangeKernel, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueTask(cl_command_queue commandQueue, cl_kernel kernel, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueTask, &commandQueue, &kernel, &numEventsInWaitList, &eventWaitList, &event); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "kernel", kernel, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", getClFileLogger().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", getClFileLogger().getEvents(reinterpret_cast(event), 1)); cl_uint workDim = 3; size_t *globalWorkOffset = nullptr; size_t globalWorkSize[3] = {1, 1, 1}; size_t localWorkSize[3] = {1, 1, 1}; retVal = (clEnqueueNDRangeKernel( commandQueue, kernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event)); TRACING_EXIT(clEnqueueTask, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueNativeKernel(cl_command_queue commandQueue, void(CL_CALLBACK *userFunc)(void *), void *args, size_t cbArgs, cl_uint numMemObjects, const cl_mem *memList, const void **argsMemLoc, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueNativeKernel, &commandQueue, &userFunc, &args, &cbArgs, &numMemObjects, &memList, &argsMemLoc, &numEventsInWaitList, &eventWaitList, &event); cl_int retVal = CL_OUT_OF_HOST_MEMORY; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "userFunc", userFunc, "args", args, "cbArgs", cbArgs, "numMemObjects", numMemObjects, "memList", memList, "argsMemLoc", argsMemLoc, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", getClFileLogger().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", getClFileLogger().getEvents(reinterpret_cast(event), 1)); TRACING_EXIT(clEnqueueNativeKernel, &retVal); return retVal; } // deprecated OpenCL 1.1 cl_int CL_API_CALL clEnqueueMarker(cl_command_queue commandQueue, cl_event *event) { TRACING_ENTER(clEnqueueMarker, &commandQueue, &event); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "cl_event", event); auto pCommandQueue = castToObject(commandQueue); if (pCommandQueue) { if (!pCommandQueue->validateCapability(CL_QUEUE_CAPABILITY_MARKER_INTEL)) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueMarker, &retVal); return retVal; } retVal = pCommandQueue->enqueueMarkerWithWaitList( 0, nullptr, event); TRACING_EXIT(clEnqueueMarker, &retVal); return retVal; } retVal = CL_INVALID_COMMAND_QUEUE; TRACING_EXIT(clEnqueueMarker, &retVal); return retVal; } // deprecated OpenCL 1.1 cl_int CL_API_CALL clEnqueueWaitForEvents(cl_command_queue commandQueue, cl_uint numEvents, const cl_event *eventList) { TRACING_ENTER(clEnqueueWaitForEvents, &commandQueue, &numEvents, &eventList); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "eventList", getClFileLogger().getEvents(reinterpret_cast(eventList), numEvents)); auto pCommandQueue = castToObject(commandQueue); if (!pCommandQueue) { retVal = CL_INVALID_COMMAND_QUEUE; TRACING_EXIT(clEnqueueWaitForEvents, &retVal); return retVal; } for (unsigned int i = 0; i < numEvents && retVal == CL_SUCCESS; i++) { retVal = validateObjects(eventList[i]); } if (retVal != CL_SUCCESS) { TRACING_EXIT(clEnqueueWaitForEvents, &retVal); return retVal; } if (!pCommandQueue->validateCapabilitiesForEventWaitList(numEvents, eventList)) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueWaitForEvents, &retVal); return retVal; } retVal = Event::waitForEvents(numEvents, eventList); TRACING_EXIT(clEnqueueWaitForEvents, &retVal); return retVal; } // deprecated OpenCL 1.1 cl_int CL_API_CALL clEnqueueBarrier(cl_command_queue commandQueue) { TRACING_ENTER(clEnqueueBarrier, &commandQueue); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue); auto pCommandQueue = castToObject(commandQueue); if (pCommandQueue) { if (!pCommandQueue->validateCapability(CL_QUEUE_CAPABILITY_BARRIER_INTEL)) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueBarrier, &retVal); return retVal; } retVal = pCommandQueue->enqueueBarrierWithWaitList( 0, nullptr, nullptr); TRACING_EXIT(clEnqueueBarrier, &retVal); return retVal; } retVal = CL_INVALID_COMMAND_QUEUE; TRACING_EXIT(clEnqueueBarrier, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueMarkerWithWaitList(cl_command_queue commandQueue, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueMarkerWithWaitList, &commandQueue, &numEventsInWaitList, &eventWaitList, &event); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("cl_command_queue", commandQueue, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", getClFileLogger().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", getClFileLogger().getEvents(reinterpret_cast(event), 1)); CommandQueue *pCommandQueue = nullptr; retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), EventWaitList(numEventsInWaitList, eventWaitList)); if (CL_SUCCESS != retVal) { TRACING_EXIT(clEnqueueMarkerWithWaitList, &retVal); return retVal; } if (!pCommandQueue->validateCapabilityForOperation(CL_QUEUE_CAPABILITY_MARKER_INTEL, numEventsInWaitList, eventWaitList, event)) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueMarkerWithWaitList, &retVal); return retVal; } retVal = pCommandQueue->enqueueMarkerWithWaitList( numEventsInWaitList, eventWaitList, event); TRACING_EXIT(clEnqueueMarkerWithWaitList, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueBarrierWithWaitList(cl_command_queue commandQueue, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueBarrierWithWaitList, &commandQueue, &numEventsInWaitList, &eventWaitList, &event); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("cl_command_queue", commandQueue, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", getClFileLogger().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", getClFileLogger().getEvents(reinterpret_cast(event), 1)); CommandQueue *pCommandQueue = nullptr; retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), EventWaitList(numEventsInWaitList, eventWaitList)); if (CL_SUCCESS != retVal) { TRACING_EXIT(clEnqueueBarrierWithWaitList, &retVal); return retVal; } if (!pCommandQueue->validateCapabilityForOperation(CL_QUEUE_CAPABILITY_BARRIER_INTEL, numEventsInWaitList, eventWaitList, event)) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueBarrierWithWaitList, &retVal); return retVal; } retVal = pCommandQueue->enqueueBarrierWithWaitList( numEventsInWaitList, eventWaitList, event); TRACING_EXIT(clEnqueueBarrierWithWaitList, &retVal); return retVal; } CL_API_ENTRY cl_command_queue CL_API_CALL clCreatePerfCountersCommandQueueINTEL( cl_context context, cl_device_id device, cl_command_queue_properties properties, cl_uint configuration, cl_int *errcodeRet) { API_ENTER(nullptr); DBG_LOG_INPUTS("context", context, "device", device, "properties", properties, "configuration", configuration); cl_command_queue commandQueue = nullptr; ErrorCodeHelper err(errcodeRet, CL_SUCCESS); ClDevice *pDevice = nullptr; WithCastToInternal(device, &pDevice); if (pDevice == nullptr) { err.set(CL_INVALID_DEVICE); return commandQueue; } if (!pDevice->getHardwareInfo().capabilityTable.instrumentationEnabled) { err.set(CL_INVALID_DEVICE); return commandQueue; } if ((properties & CL_QUEUE_PROFILING_ENABLE) == 0) { err.set(CL_INVALID_QUEUE_PROPERTIES); return commandQueue; } if ((properties & CL_QUEUE_ON_DEVICE) != 0) { err.set(CL_INVALID_QUEUE_PROPERTIES); return commandQueue; } if ((properties & CL_QUEUE_ON_DEVICE_DEFAULT) != 0) { err.set(CL_INVALID_QUEUE_PROPERTIES); return commandQueue; } if (configuration != 0) { err.set(CL_INVALID_OPERATION); return commandQueue; } commandQueue = clCreateCommandQueue(context, device, properties, errcodeRet); if (commandQueue != nullptr) { auto commandQueueObject = castToObjectOrAbort(commandQueue); if (!commandQueueObject->setPerfCountersEnabled()) { clReleaseCommandQueue(commandQueue); commandQueue = nullptr; err.set(CL_OUT_OF_RESOURCES); } } return commandQueue; } CL_API_ENTRY cl_int CL_API_CALL clSetPerformanceConfigurationINTEL( cl_device_id device, cl_uint count, cl_uint *offsets, cl_uint *values) { // Not supported, covered by Metric Library DLL. return CL_INVALID_OPERATION; } void *clHostMemAllocINTEL( cl_context context, const cl_mem_properties_intel *properties, size_t size, cl_uint alignment, cl_int *errcodeRet) { if (DebugManager.flags.ForceExtendedUSMBufferSize.get() >= 1) { size += (MemoryConstants::pageSize * DebugManager.flags.ForceExtendedUSMBufferSize.get()); } Context *neoContext = nullptr; ErrorCodeHelper err(errcodeRet, CL_SUCCESS); auto retVal = validateObjects(WithCastToInternal(context, &neoContext)); if (retVal != CL_SUCCESS) { err.set(retVal); return nullptr; } SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY, neoContext->getRootDeviceIndices(), neoContext->getDeviceBitfields()); cl_mem_flags flags = 0; cl_mem_flags_intel flagsIntel = 0; cl_mem_alloc_flags_intel allocflags = 0; if (!ClMemoryPropertiesHelper::parseMemoryProperties(properties, unifiedMemoryProperties.allocationFlags, flags, flagsIntel, allocflags, MemoryPropertiesHelper::ObjType::UNKNOWN, *neoContext)) { err.set(CL_INVALID_VALUE); return nullptr; } if (size > neoContext->getDevice(0u)->getSharedDeviceInfo().maxMemAllocSize && !unifiedMemoryProperties.allocationFlags.flags.allowUnrestrictedSize) { err.set(CL_INVALID_BUFFER_SIZE); return nullptr; } return neoContext->getSVMAllocsManager()->createHostUnifiedMemoryAllocation(size, unifiedMemoryProperties); } void *clDeviceMemAllocINTEL( cl_context context, cl_device_id device, const cl_mem_properties_intel *properties, size_t size, cl_uint alignment, cl_int *errcodeRet) { if (DebugManager.flags.ForceExtendedUSMBufferSize.get() >= 1) { size += (MemoryConstants::pageSize * DebugManager.flags.ForceExtendedUSMBufferSize.get()); } Context *neoContext = nullptr; ClDevice *neoDevice = nullptr; ErrorCodeHelper err(errcodeRet, CL_SUCCESS); auto retVal = validateObjects(WithCastToInternal(context, &neoContext), WithCastToInternal(device, &neoDevice)); if (retVal != CL_SUCCESS) { err.set(retVal); return nullptr; } auto subDeviceBitfields = neoContext->getDeviceBitfields(); subDeviceBitfields[neoDevice->getRootDeviceIndex()] = neoDevice->getDeviceBitfield(); SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY, neoContext->getRootDeviceIndices(), subDeviceBitfields); cl_mem_flags flags = 0; cl_mem_flags_intel flagsIntel = 0; cl_mem_alloc_flags_intel allocflags = 0; if (!ClMemoryPropertiesHelper::parseMemoryProperties(properties, unifiedMemoryProperties.allocationFlags, flags, flagsIntel, allocflags, MemoryPropertiesHelper::ObjType::UNKNOWN, *neoContext)) { err.set(CL_INVALID_VALUE); return nullptr; } if (size > neoDevice->getDevice().getDeviceInfo().maxMemAllocSize && !unifiedMemoryProperties.allocationFlags.flags.allowUnrestrictedSize) { err.set(CL_INVALID_BUFFER_SIZE); return nullptr; } unifiedMemoryProperties.device = &neoDevice->getDevice(); return neoContext->getSVMAllocsManager()->createUnifiedMemoryAllocation(size, unifiedMemoryProperties); } void *clSharedMemAllocINTEL( cl_context context, cl_device_id device, const cl_mem_properties_intel *properties, size_t size, cl_uint alignment, cl_int *errcodeRet) { if (DebugManager.flags.ForceExtendedUSMBufferSize.get() >= 1) { size += (MemoryConstants::pageSize * DebugManager.flags.ForceExtendedUSMBufferSize.get()); } Context *neoContext = nullptr; ErrorCodeHelper err(errcodeRet, CL_SUCCESS); auto retVal = validateObjects(WithCastToInternal(context, &neoContext)); if (retVal != CL_SUCCESS) { err.set(retVal); return nullptr; } cl_mem_flags flags = 0; cl_mem_flags_intel flagsIntel = 0; cl_mem_alloc_flags_intel allocflags = 0; ClDevice *neoDevice = castToObject(device); Device *unifiedMemoryPropertiesDevice = nullptr; auto subDeviceBitfields = neoContext->getDeviceBitfields(); if (neoDevice) { if (!neoContext->isDeviceAssociated(*neoDevice)) { err.set(CL_INVALID_DEVICE); return nullptr; } unifiedMemoryPropertiesDevice = &neoDevice->getDevice(); subDeviceBitfields[neoDevice->getRootDeviceIndex()] = neoDevice->getDeviceBitfield(); } else { neoDevice = neoContext->getDevice(0); } SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, neoContext->getRootDeviceIndices(), subDeviceBitfields); unifiedMemoryProperties.device = unifiedMemoryPropertiesDevice; if (!ClMemoryPropertiesHelper::parseMemoryProperties(properties, unifiedMemoryProperties.allocationFlags, flags, flagsIntel, allocflags, MemoryPropertiesHelper::ObjType::UNKNOWN, *neoContext)) { err.set(CL_INVALID_VALUE); return nullptr; } if (size > neoDevice->getSharedDeviceInfo().maxMemAllocSize && !unifiedMemoryProperties.allocationFlags.flags.allowUnrestrictedSize) { err.set(CL_INVALID_BUFFER_SIZE); return nullptr; } auto ptr = neoContext->getSVMAllocsManager()->createSharedUnifiedMemoryAllocation(size, unifiedMemoryProperties, neoContext->getSpecialQueue(neoDevice->getRootDeviceIndex())); if (!ptr) { err.set(CL_OUT_OF_RESOURCES); } return ptr; } cl_int clMemFreeCommon(cl_context context, const void *ptr, bool blocking) { Context *neoContext = nullptr; auto retVal = validateObjects(WithCastToInternal(context, &neoContext)); if (retVal != CL_SUCCESS) { return retVal; } if (ptr && !neoContext->getSVMAllocsManager()->freeSVMAlloc(const_cast(ptr), blocking)) { return CL_INVALID_VALUE; } if (neoContext->getSVMAllocsManager()->getSvmMapOperation(ptr)) { neoContext->getSVMAllocsManager()->removeSvmMapOperation(ptr); } return CL_SUCCESS; } cl_int clMemFreeINTEL( cl_context context, void *ptr) { return clMemFreeCommon(context, ptr, false); } cl_int clMemBlockingFreeINTEL( cl_context context, void *ptr) { return clMemFreeCommon(context, ptr, true); } cl_int clGetMemAllocInfoINTEL( cl_context context, const void *ptr, cl_mem_info_intel paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { Context *pContext = nullptr; cl_int retVal = CL_SUCCESS; retVal = validateObject(WithCastToInternal(context, &pContext)); if (!pContext) { return retVal; } auto allocationsManager = pContext->getSVMAllocsManager(); if (!allocationsManager) { return CL_INVALID_VALUE; } GetInfoHelper info(paramValue, paramValueSize, paramValueSizeRet); auto unifiedMemoryAllocation = allocationsManager->getSVMAlloc(ptr); switch (paramName) { case CL_MEM_ALLOC_TYPE_INTEL: { if (!unifiedMemoryAllocation) { retVal = changeGetInfoStatusToCLResultType(info.set(CL_MEM_TYPE_UNKNOWN_INTEL)); return retVal; } else if (unifiedMemoryAllocation->memoryType == InternalMemoryType::HOST_UNIFIED_MEMORY) { retVal = changeGetInfoStatusToCLResultType(info.set(CL_MEM_TYPE_HOST_INTEL)); return retVal; } else if (unifiedMemoryAllocation->memoryType == InternalMemoryType::DEVICE_UNIFIED_MEMORY) { retVal = changeGetInfoStatusToCLResultType(info.set(CL_MEM_TYPE_DEVICE_INTEL)); return retVal; } else { retVal = changeGetInfoStatusToCLResultType(info.set(CL_MEM_TYPE_SHARED_INTEL)); return retVal; } break; } case CL_MEM_ALLOC_BASE_PTR_INTEL: { if (!unifiedMemoryAllocation) { return changeGetInfoStatusToCLResultType(info.set(nullptr)); } return changeGetInfoStatusToCLResultType(info.set(unifiedMemoryAllocation->gpuAllocations.getDefaultGraphicsAllocation()->getGpuAddress())); } case CL_MEM_ALLOC_SIZE_INTEL: { if (!unifiedMemoryAllocation) { return changeGetInfoStatusToCLResultType(info.set(0u)); } return changeGetInfoStatusToCLResultType(info.set(unifiedMemoryAllocation->size)); } case CL_MEM_ALLOC_FLAGS_INTEL: { if (!unifiedMemoryAllocation) { return changeGetInfoStatusToCLResultType(info.set(0u)); } return changeGetInfoStatusToCLResultType(info.set(unifiedMemoryAllocation->allocationFlagsProperty.allAllocFlags)); } case CL_MEM_ALLOC_DEVICE_INTEL: { if (!unifiedMemoryAllocation) { return changeGetInfoStatusToCLResultType(info.set(static_cast(nullptr))); } auto device = unifiedMemoryAllocation->device ? unifiedMemoryAllocation->device->getSpecializedDevice() : nullptr; return changeGetInfoStatusToCLResultType(info.set(device)); } default: { } } return CL_INVALID_VALUE; } cl_int clSetKernelArgMemPointerINTEL( cl_kernel kernel, cl_uint argIndex, const void *argValue) { return clSetKernelArgSVMPointer(kernel, argIndex, argValue); } cl_int clEnqueueMemsetINTEL( cl_command_queue commandQueue, void *dstPtr, cl_int value, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { auto retVal = clEnqueueSVMMemFill(commandQueue, dstPtr, &value, 1u, size, numEventsInWaitList, eventWaitList, event); if (retVal == CL_SUCCESS && event) { auto pEvent = castToObjectOrAbort(*event); pEvent->setCmdType(CL_COMMAND_MEMSET_INTEL); } return retVal; } cl_int clEnqueueMemFillINTEL( cl_command_queue commandQueue, void *dstPtr, const void *pattern, size_t patternSize, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { auto retVal = clEnqueueSVMMemFill(commandQueue, dstPtr, pattern, patternSize, size, numEventsInWaitList, eventWaitList, event); if (retVal == CL_SUCCESS && event) { auto pEvent = castToObjectOrAbort(*event); pEvent->setCmdType(CL_COMMAND_MEMFILL_INTEL); } return retVal; } cl_int clEnqueueMemcpyINTEL( cl_command_queue commandQueue, cl_bool blocking, void *dstPtr, const void *srcPtr, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { auto retVal = clEnqueueSVMMemcpy(commandQueue, blocking, dstPtr, srcPtr, size, numEventsInWaitList, eventWaitList, event); if (retVal == CL_SUCCESS && event) { auto pEvent = castToObjectOrAbort(*event); pEvent->setCmdType(CL_COMMAND_MEMCPY_INTEL); } return retVal; } cl_int clEnqueueMigrateMemINTEL( cl_command_queue commandQueue, const void *ptr, size_t size, cl_mem_migration_flags flags, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { cl_int retVal = CL_SUCCESS; CommandQueue *pCommandQueue = nullptr; retVal = validateObjects(WithCastToInternal(commandQueue, &pCommandQueue), ptr, EventWaitList(numEventsInWaitList, eventWaitList)); if (retVal == CL_SUCCESS) { pCommandQueue->enqueueMarkerWithWaitList(numEventsInWaitList, eventWaitList, event); if (event) { auto pEvent = castToObjectOrAbort(*event); pEvent->setCmdType(CL_COMMAND_MIGRATEMEM_INTEL); } } return retVal; } cl_int clEnqueueMemAdviseINTEL( cl_command_queue commandQueue, const void *ptr, size_t size, cl_mem_advice_intel advice, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { cl_int retVal = CL_SUCCESS; CommandQueue *pCommandQueue = nullptr; retVal = validateObjects(WithCastToInternal(commandQueue, &pCommandQueue), ptr, EventWaitList(numEventsInWaitList, eventWaitList)); if (retVal == CL_SUCCESS) { pCommandQueue->enqueueMarkerWithWaitList(numEventsInWaitList, eventWaitList, event); if (event) { auto pEvent = castToObjectOrAbort(*event); pEvent->setCmdType(CL_COMMAND_MEMADVISE_INTEL); } } return retVal; } cl_command_queue CL_API_CALL clCreateCommandQueueWithPropertiesKHR(cl_context context, cl_device_id device, const cl_queue_properties_khr *properties, cl_int *errcodeRet) { API_ENTER(errcodeRet); DBG_LOG_INPUTS("context", context, "device", device, "properties", properties); return clCreateCommandQueueWithProperties(context, device, properties, errcodeRet); } cl_accelerator_intel CL_API_CALL clCreateAcceleratorINTEL( cl_context context, cl_accelerator_type_intel acceleratorType, size_t descriptorSize, const void *descriptor, cl_int *errcodeRet) { cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("context", context, "acceleratorType", acceleratorType, "descriptorSize", descriptorSize, "descriptor", NEO::FileLoggerInstance().infoPointerToString(descriptor, descriptorSize)); cl_accelerator_intel accelerator = nullptr; do { retVal = validateObjects(context); if (retVal != CL_SUCCESS) { retVal = CL_INVALID_CONTEXT; break; } Context *pContext = castToObject(context); DEBUG_BREAK_IF(!pContext); switch (acceleratorType) { case CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL: accelerator = VmeAccelerator::create( pContext, acceleratorType, descriptorSize, descriptor, retVal); break; default: retVal = CL_INVALID_ACCELERATOR_TYPE_INTEL; } } while (false); if (errcodeRet) { *errcodeRet = retVal; } return accelerator; } cl_int CL_API_CALL clRetainAcceleratorINTEL( cl_accelerator_intel accelerator) { cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("accelerator", accelerator); IntelAccelerator *pAccelerator = nullptr; do { pAccelerator = castToObject(accelerator); if (!pAccelerator) { retVal = CL_INVALID_ACCELERATOR_INTEL; break; } pAccelerator->retain(); } while (false); return retVal; } cl_int CL_API_CALL clGetAcceleratorInfoINTEL( cl_accelerator_intel accelerator, cl_accelerator_info_intel paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("accelerator", accelerator, "paramName", paramName, "paramValueSize", paramValueSize, "paramValue", NEO::FileLoggerInstance().infoPointerToString(paramValue, paramValueSize), "paramValueSizeRet", paramValueSizeRet); IntelAccelerator *pAccelerator = nullptr; do { pAccelerator = castToObject(accelerator); if (!pAccelerator) { retVal = CL_INVALID_ACCELERATOR_INTEL; break; } retVal = pAccelerator->getInfo( paramName, paramValueSize, paramValue, paramValueSizeRet); } while (false); return retVal; } cl_int CL_API_CALL clReleaseAcceleratorINTEL( cl_accelerator_intel accelerator) { cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("accelerator", accelerator); IntelAccelerator *pAccelerator = nullptr; do { pAccelerator = castToObject(accelerator); if (!pAccelerator) { retVal = CL_INVALID_ACCELERATOR_INTEL; break; } pAccelerator->release(); } while (false); return retVal; } cl_program CL_API_CALL clCreateProgramWithILKHR(cl_context context, const void *il, size_t length, cl_int *errcodeRet) { cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("context", context, "il", NEO::FileLoggerInstance().infoPointerToString(il, length), "length", length); cl_program program = nullptr; Context *pContext = nullptr; retVal = validateObjects(WithCastToInternal(context, &pContext), il); if (retVal == CL_SUCCESS) { program = ProgramFunctions::createFromIL( pContext, il, length, retVal); } if (errcodeRet != nullptr) { *errcodeRet = retVal; } return program; } cl_int CL_API_CALL clGetKernelSuggestedLocalWorkSizeKHR(cl_command_queue commandQueue, cl_kernel kernel, cl_uint workDim, const size_t *globalWorkOffset, const size_t *globalWorkSize, size_t *suggestedLocalWorkSize) { return clGetKernelSuggestedLocalWorkSizeINTEL(commandQueue, kernel, workDim, globalWorkOffset, globalWorkSize, suggestedLocalWorkSize); } #define RETURN_FUNC_PTR_IF_EXIST(name) \ { \ if (!strcmp(funcName, #name)) { \ void *ret = ((void *)(name)); \ TRACING_EXIT(clGetExtensionFunctionAddress, (void **)&ret); \ return ret; \ } \ } void *CL_API_CALL clGetExtensionFunctionAddress(const char *funcName) { TRACING_ENTER(clGetExtensionFunctionAddress, &funcName); DBG_LOG_INPUTS("funcName", funcName); // Support an internal call by the ICD RETURN_FUNC_PTR_IF_EXIST(clIcdGetPlatformIDsKHR); //perf counters RETURN_FUNC_PTR_IF_EXIST(clCreatePerfCountersCommandQueueINTEL); RETURN_FUNC_PTR_IF_EXIST(clSetPerformanceConfigurationINTEL); // Support device extensions RETURN_FUNC_PTR_IF_EXIST(clCreateAcceleratorINTEL); RETURN_FUNC_PTR_IF_EXIST(clGetAcceleratorInfoINTEL); RETURN_FUNC_PTR_IF_EXIST(clRetainAcceleratorINTEL); RETURN_FUNC_PTR_IF_EXIST(clReleaseAcceleratorINTEL); RETURN_FUNC_PTR_IF_EXIST(clCreateBufferWithPropertiesINTEL); RETURN_FUNC_PTR_IF_EXIST(clCreateImageWithPropertiesINTEL); RETURN_FUNC_PTR_IF_EXIST(clAddCommentINTEL); RETURN_FUNC_PTR_IF_EXIST(clEnqueueVerifyMemoryINTEL); RETURN_FUNC_PTR_IF_EXIST(clCreateTracingHandleINTEL); RETURN_FUNC_PTR_IF_EXIST(clSetTracingPointINTEL); RETURN_FUNC_PTR_IF_EXIST(clDestroyTracingHandleINTEL); RETURN_FUNC_PTR_IF_EXIST(clEnableTracingINTEL); RETURN_FUNC_PTR_IF_EXIST(clDisableTracingINTEL); RETURN_FUNC_PTR_IF_EXIST(clGetTracingStateINTEL); RETURN_FUNC_PTR_IF_EXIST(clHostMemAllocINTEL); RETURN_FUNC_PTR_IF_EXIST(clDeviceMemAllocINTEL); RETURN_FUNC_PTR_IF_EXIST(clSharedMemAllocINTEL); RETURN_FUNC_PTR_IF_EXIST(clMemFreeINTEL); RETURN_FUNC_PTR_IF_EXIST(clMemBlockingFreeINTEL); RETURN_FUNC_PTR_IF_EXIST(clGetMemAllocInfoINTEL); RETURN_FUNC_PTR_IF_EXIST(clSetKernelArgMemPointerINTEL); RETURN_FUNC_PTR_IF_EXIST(clEnqueueMemsetINTEL); RETURN_FUNC_PTR_IF_EXIST(clEnqueueMemFillINTEL); RETURN_FUNC_PTR_IF_EXIST(clEnqueueMemcpyINTEL); RETURN_FUNC_PTR_IF_EXIST(clEnqueueMigrateMemINTEL); RETURN_FUNC_PTR_IF_EXIST(clEnqueueMemAdviseINTEL); RETURN_FUNC_PTR_IF_EXIST(clGetDeviceFunctionPointerINTEL); RETURN_FUNC_PTR_IF_EXIST(clGetDeviceGlobalVariablePointerINTEL); RETURN_FUNC_PTR_IF_EXIST(clGetKernelMaxConcurrentWorkGroupCountINTEL); RETURN_FUNC_PTR_IF_EXIST(clGetKernelSuggestedLocalWorkSizeINTEL); RETURN_FUNC_PTR_IF_EXIST(clEnqueueNDCountKernelINTEL); void *ret = sharingFactory.getExtensionFunctionAddress(funcName); if (ret != nullptr) { TRACING_EXIT(clGetExtensionFunctionAddress, &ret); return ret; } // SPIR-V support through the cl_khr_il_program extension RETURN_FUNC_PTR_IF_EXIST(clCreateProgramWithILKHR); RETURN_FUNC_PTR_IF_EXIST(clCreateCommandQueueWithPropertiesKHR); RETURN_FUNC_PTR_IF_EXIST(clSetProgramSpecializationConstant); RETURN_FUNC_PTR_IF_EXIST(clGetKernelSuggestedLocalWorkSizeKHR); ret = getAdditionalExtensionFunctionAddress(funcName); TRACING_EXIT(clGetExtensionFunctionAddress, &ret); return ret; } // OpenCL 1.2 void *CL_API_CALL clGetExtensionFunctionAddressForPlatform(cl_platform_id platform, const char *funcName) { TRACING_ENTER(clGetExtensionFunctionAddressForPlatform, &platform, &funcName); DBG_LOG_INPUTS("platform", platform, "funcName", funcName); auto pPlatform = castToObject(platform); if (pPlatform == nullptr) { void *ret = nullptr; TRACING_EXIT(clGetExtensionFunctionAddressForPlatform, &ret); return ret; } void *ret = clGetExtensionFunctionAddress(funcName); TRACING_EXIT(clGetExtensionFunctionAddressForPlatform, &ret); return ret; } void *CL_API_CALL clSVMAlloc(cl_context context, cl_svm_mem_flags flags, size_t size, cl_uint alignment) { TRACING_ENTER(clSVMAlloc, &context, &flags, &size, &alignment); DBG_LOG_INPUTS("context", context, "flags", flags, "size", size, "alignment", alignment); void *pAlloc = nullptr; Context *pContext = nullptr; if (validateObjects(WithCastToInternal(context, &pContext)) != CL_SUCCESS) { TRACING_EXIT(clSVMAlloc, &pAlloc); return pAlloc; } { // allow CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL with every combination cl_svm_mem_flags tempFlags = flags & (~CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL); if (tempFlags == 0) { tempFlags = CL_MEM_READ_WRITE; } if (!((tempFlags == CL_MEM_READ_WRITE) || (tempFlags == CL_MEM_WRITE_ONLY) || (tempFlags == CL_MEM_READ_ONLY) || (tempFlags == CL_MEM_SVM_FINE_GRAIN_BUFFER) || (tempFlags == (CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS)) || (tempFlags == (CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER)) || (tempFlags == (CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS)) || (tempFlags == (CL_MEM_WRITE_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER)) || (tempFlags == (CL_MEM_WRITE_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS)) || (tempFlags == (CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER)) || (tempFlags == (CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS)))) { TRACING_EXIT(clSVMAlloc, &pAlloc); return pAlloc; } } auto pDevice = pContext->getDevice(0); bool allowUnrestrictedSize = (flags & CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL) || DebugManager.flags.AllowUnrestrictedSize.get(); if ((size == 0) || (!allowUnrestrictedSize && (size > pDevice->getSharedDeviceInfo().maxMemAllocSize))) { TRACING_EXIT(clSVMAlloc, &pAlloc); return pAlloc; } if ((alignment && (alignment & (alignment - 1))) || (alignment > sizeof(cl_ulong16))) { TRACING_EXIT(clSVMAlloc, &pAlloc); return pAlloc; } const HardwareInfo &hwInfo = pDevice->getHardwareInfo(); if (!hwInfo.capabilityTable.ftrSvm) { TRACING_EXIT(clSVMAlloc, &pAlloc); return pAlloc; } if (flags & CL_MEM_SVM_FINE_GRAIN_BUFFER) { bool supportsFineGrained = hwInfo.capabilityTable.ftrSupportsCoherency; if (DebugManager.flags.ForceFineGrainedSVMSupport.get() != -1) { supportsFineGrained = !!DebugManager.flags.ForceFineGrainedSVMSupport.get(); } if (!supportsFineGrained) { TRACING_EXIT(clSVMAlloc, &pAlloc); return pAlloc; } } pAlloc = pContext->getSVMAllocsManager()->createSVMAlloc(size, MemObjHelper::getSvmAllocationProperties(flags), pContext->getRootDeviceIndices(), pContext->getDeviceBitfields()); if (pContext->isProvidingPerformanceHints()) { pContext->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_SVM_ALLOC_MEETS_ALIGNMENT_RESTRICTIONS, pAlloc, size); } TRACING_EXIT(clSVMAlloc, &pAlloc); return pAlloc; } void CL_API_CALL clSVMFree(cl_context context, void *svmPointer) { TRACING_ENTER(clSVMFree, &context, &svmPointer); DBG_LOG_INPUTS("context", context, "svmPointer", svmPointer); Context *pContext = nullptr; cl_int retVal = validateObjects( WithCastToInternal(context, &pContext)); if (retVal != CL_SUCCESS) { TRACING_EXIT(clSVMFree, nullptr); return; } auto pClDevice = pContext->getDevice(0); if (!pClDevice->getHardwareInfo().capabilityTable.ftrSvm) { TRACING_EXIT(clSVMFree, nullptr); return; } pContext->getSVMAllocsManager()->freeSVMAlloc(svmPointer); TRACING_EXIT(clSVMFree, nullptr); } cl_int CL_API_CALL clEnqueueSVMFree(cl_command_queue commandQueue, cl_uint numSvmPointers, void *svmPointers[], void(CL_CALLBACK *pfnFreeFunc)(cl_command_queue queue, cl_uint numSvmPointers, void *svmPointers[], void *userData), void *userData, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueSVMFree, &commandQueue, &numSvmPointers, &svmPointers, &pfnFreeFunc, &userData, &numEventsInWaitList, &eventWaitList, &event); CommandQueue *pCommandQueue = nullptr; cl_int retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), EventWaitList(numEventsInWaitList, eventWaitList)); API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "numSvmPointers", numSvmPointers, "svmPointers", svmPointers, "pfnFreeFunc", pfnFreeFunc, "userData", userData, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", getClFileLogger().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", getClFileLogger().getEvents(reinterpret_cast(event), 1)); if (retVal != CL_SUCCESS) { TRACING_EXIT(clEnqueueSVMFree, &retVal); return retVal; } auto &device = pCommandQueue->getDevice(); if (!device.getHardwareInfo().capabilityTable.ftrSvm) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueSVMFree, &retVal); return retVal; } if (((svmPointers != nullptr) && (numSvmPointers == 0)) || ((svmPointers == nullptr) && (numSvmPointers != 0))) { retVal = CL_INVALID_VALUE; TRACING_EXIT(clEnqueueSVMFree, &retVal); return retVal; } retVal = pCommandQueue->enqueueSVMFree( numSvmPointers, svmPointers, pfnFreeFunc, userData, numEventsInWaitList, eventWaitList, event); TRACING_EXIT(clEnqueueSVMFree, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueSVMMemcpy(cl_command_queue commandQueue, cl_bool blockingCopy, void *dstPtr, const void *srcPtr, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueSVMMemcpy, &commandQueue, &blockingCopy, &dstPtr, &srcPtr, &size, &numEventsInWaitList, &eventWaitList, &event); CommandQueue *pCommandQueue = nullptr; cl_int retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), EventWaitList(numEventsInWaitList, eventWaitList)); API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "blockingCopy", blockingCopy, "dstPtr", dstPtr, "srcPtr", srcPtr, "size", size, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", getClFileLogger().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", getClFileLogger().getEvents(reinterpret_cast(event), 1)); if (retVal != CL_SUCCESS) { TRACING_EXIT(clEnqueueSVMMemcpy, &retVal); return retVal; } auto &device = pCommandQueue->getDevice(); if (!device.getHardwareInfo().capabilityTable.ftrSvm) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueSVMMemcpy, &retVal); return retVal; } if ((dstPtr == nullptr) || (srcPtr == nullptr)) { retVal = CL_INVALID_VALUE; TRACING_EXIT(clEnqueueSVMMemcpy, &retVal); return retVal; } if (!pCommandQueue->validateCapabilityForOperation(CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL, numEventsInWaitList, eventWaitList, event)) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueSVMMemcpy, &retVal); return retVal; } retVal = pCommandQueue->enqueueSVMMemcpy( blockingCopy, dstPtr, srcPtr, size, numEventsInWaitList, eventWaitList, event); TRACING_EXIT(clEnqueueSVMMemcpy, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueSVMMemFill(cl_command_queue commandQueue, void *svmPtr, const void *pattern, size_t patternSize, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueSVMMemFill, &commandQueue, &svmPtr, &pattern, &patternSize, &size, &numEventsInWaitList, &eventWaitList, &event); CommandQueue *pCommandQueue = nullptr; cl_int retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), EventWaitList(numEventsInWaitList, eventWaitList)); API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "svmPtr", NEO::FileLoggerInstance().infoPointerToString(svmPtr, size), "pattern", NEO::FileLoggerInstance().infoPointerToString(pattern, patternSize), "patternSize", patternSize, "size", size, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", getClFileLogger().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", getClFileLogger().getEvents(reinterpret_cast(event), 1)); if (retVal != CL_SUCCESS) { TRACING_EXIT(clEnqueueSVMMemFill, &retVal); return retVal; } auto &device = pCommandQueue->getDevice(); if (!device.getHardwareInfo().capabilityTable.ftrSvm) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueSVMMemFill, &retVal); return retVal; } if ((svmPtr == nullptr) || (size == 0)) { retVal = CL_INVALID_VALUE; TRACING_EXIT(clEnqueueSVMMemFill, &retVal); return retVal; } if (!pCommandQueue->validateCapabilityForOperation(CL_QUEUE_CAPABILITY_FILL_BUFFER_INTEL, numEventsInWaitList, eventWaitList, event)) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueSVMMemFill, &retVal); return retVal; } retVal = pCommandQueue->enqueueSVMMemFill( svmPtr, pattern, patternSize, size, numEventsInWaitList, eventWaitList, event); TRACING_EXIT(clEnqueueSVMMemFill, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueSVMMap(cl_command_queue commandQueue, cl_bool blockingMap, cl_map_flags mapFlags, void *svmPtr, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueSVMMap, &commandQueue, &blockingMap, &mapFlags, &svmPtr, &size, &numEventsInWaitList, &eventWaitList, &event); CommandQueue *pCommandQueue = nullptr; cl_int retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), EventWaitList(numEventsInWaitList, eventWaitList)); API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "blockingMap", blockingMap, "mapFlags", mapFlags, "svmPtr", NEO::FileLoggerInstance().infoPointerToString(svmPtr, size), "size", size, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", getClFileLogger().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", getClFileLogger().getEvents(reinterpret_cast(event), 1)); if (CL_SUCCESS != retVal) { TRACING_EXIT(clEnqueueSVMMap, &retVal); return retVal; } auto &device = pCommandQueue->getDevice(); if (!device.getHardwareInfo().capabilityTable.ftrSvm) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueSVMMap, &retVal); return retVal; } if ((svmPtr == nullptr) || (size == 0)) { retVal = CL_INVALID_VALUE; TRACING_EXIT(clEnqueueSVMMap, &retVal); return retVal; } if (!pCommandQueue->validateCapabilityForOperation(CL_QUEUE_CAPABILITY_MAP_BUFFER_INTEL, numEventsInWaitList, eventWaitList, event)) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueSVMMap, &retVal); return retVal; } retVal = pCommandQueue->enqueueSVMMap( blockingMap, mapFlags, svmPtr, size, numEventsInWaitList, eventWaitList, event, true); TRACING_EXIT(clEnqueueSVMMap, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueSVMUnmap(cl_command_queue commandQueue, void *svmPtr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueSVMUnmap, &commandQueue, &svmPtr, &numEventsInWaitList, &eventWaitList, &event); CommandQueue *pCommandQueue = nullptr; cl_int retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), EventWaitList(numEventsInWaitList, eventWaitList), svmPtr); API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "svmPtr", svmPtr, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", getClFileLogger().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", getClFileLogger().getEvents(reinterpret_cast(event), 1)); if (retVal != CL_SUCCESS) { TRACING_EXIT(clEnqueueSVMUnmap, &retVal); return retVal; } auto &device = pCommandQueue->getDevice(); if (!device.getHardwareInfo().capabilityTable.ftrSvm) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueSVMUnmap, &retVal); return retVal; } if (!pCommandQueue->validateCapabilityForOperation(CL_QUEUE_CAPABILITY_MAP_BUFFER_INTEL, numEventsInWaitList, eventWaitList, event)) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueSVMUnmap, &retVal); return retVal; } retVal = pCommandQueue->enqueueSVMUnmap( svmPtr, numEventsInWaitList, eventWaitList, event, true); TRACING_EXIT(clEnqueueSVMUnmap, &retVal); return retVal; } cl_int CL_API_CALL clSetKernelArgSVMPointer(cl_kernel kernel, cl_uint argIndex, const void *argValue) { TRACING_ENTER(clSetKernelArgSVMPointer, &kernel, &argIndex, &argValue); MultiDeviceKernel *pMultiDeviceKernel = nullptr; auto retVal = validateObjects(WithCastToInternal(kernel, &pMultiDeviceKernel)); API_ENTER(&retVal); if (CL_SUCCESS != retVal) { TRACING_EXIT(clSetKernelArgSVMPointer, &retVal); return retVal; } if (argIndex >= pMultiDeviceKernel->getKernelArgsNumber()) { retVal = CL_INVALID_ARG_INDEX; TRACING_EXIT(clSetKernelArgSVMPointer, &retVal); return retVal; } const auto svmManager = pMultiDeviceKernel->getContext().getSVMAllocsManager(); if (argValue != nullptr) { if (pMultiDeviceKernel->getKernelArguments()[argIndex].allocId > 0 && pMultiDeviceKernel->getKernelArguments()[argIndex].value == argValue) { bool reuseFromCache = false; const auto allocationsCounter = svmManager->allocationsCounter.load(); if (allocationsCounter > 0) { if (allocationsCounter == pMultiDeviceKernel->getKernelArguments()[argIndex].allocIdMemoryManagerCounter) { reuseFromCache = true; } else { const auto svmData = svmManager->getSVMAlloc(argValue); if (svmData && pMultiDeviceKernel->getKernelArguments()[argIndex].allocId == svmData->getAllocId()) { reuseFromCache = true; pMultiDeviceKernel->storeKernelArgAllocIdMemoryManagerCounter(argIndex, allocationsCounter); } } if (reuseFromCache) { TRACING_EXIT(clSetKernelArgSVMPointer, &retVal); return CL_SUCCESS; } } } } DBG_LOG_INPUTS("kernel", kernel, "argIndex", argIndex, "argValue", argValue); for (const auto &pDevice : pMultiDeviceKernel->getDevices()) { const HardwareInfo &hwInfo = pDevice->getHardwareInfo(); if (!hwInfo.capabilityTable.ftrSvm) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clSetKernelArgSVMPointer, &retVal); return retVal; } } for (const auto &pDevice : pMultiDeviceKernel->getDevices()) { auto pKernel = pMultiDeviceKernel->getKernel(pDevice->getRootDeviceIndex()); cl_int kernelArgAddressQualifier = asClKernelArgAddressQualifier(pKernel->getKernelInfo() .kernelDescriptor.payloadMappings.explicitArgs[argIndex] .getTraits() .getAddressQualifier()); if ((kernelArgAddressQualifier != CL_KERNEL_ARG_ADDRESS_GLOBAL) && (kernelArgAddressQualifier != CL_KERNEL_ARG_ADDRESS_CONSTANT)) { retVal = CL_INVALID_ARG_VALUE; TRACING_EXIT(clSetKernelArgSVMPointer, &retVal); return retVal; } } MultiGraphicsAllocation *pSvmAllocs = nullptr; uint32_t allocId = 0u; if (argValue != nullptr) { auto svmData = svmManager->getSVMAlloc(argValue); if (svmData == nullptr) { for (const auto &pDevice : pMultiDeviceKernel->getDevices()) { if (!pDevice->areSharedSystemAllocationsAllowed()) { retVal = CL_INVALID_ARG_VALUE; TRACING_EXIT(clSetKernelArgSVMPointer, &retVal); return retVal; } } } else { pSvmAllocs = &svmData->gpuAllocations; allocId = svmData->getAllocId(); } } retVal = pMultiDeviceKernel->setArgSvmAlloc(argIndex, const_cast(argValue), pSvmAllocs, allocId); TRACING_EXIT(clSetKernelArgSVMPointer, &retVal); return retVal; } cl_int CL_API_CALL clSetKernelExecInfo(cl_kernel kernel, cl_kernel_exec_info paramName, size_t paramValueSize, const void *paramValue) { TRACING_ENTER(clSetKernelExecInfo, &kernel, ¶mName, ¶mValueSize, ¶mValue); MultiDeviceKernel *pMultiDeviceKernel = nullptr; auto retVal = validateObjects(WithCastToInternal(kernel, &pMultiDeviceKernel)); API_ENTER(&retVal); DBG_LOG_INPUTS("kernel", kernel, "paramName", paramName, "paramValueSize", paramValueSize, "paramValue", NEO::FileLoggerInstance().infoPointerToString(paramValue, paramValueSize)); if (CL_SUCCESS != retVal) { TRACING_EXIT(clSetKernelExecInfo, &retVal); return retVal; } switch (paramName) { case CL_KERNEL_EXEC_INFO_SVM_PTRS: case CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM: for (const auto &pDevice : pMultiDeviceKernel->getDevices()) { const HardwareInfo &hwInfo = pDevice->getHardwareInfo(); if (!hwInfo.capabilityTable.ftrSvm) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clSetKernelExecInfo, &retVal); return retVal; } } } switch (paramName) { case CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL: case CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL: case CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL: { if (NEO::DebugManager.flags.DisableIndirectAccess.get() != 1 && pMultiDeviceKernel->getHasIndirectAccess() == true) { auto propertyValue = *reinterpret_cast(paramValue); pMultiDeviceKernel->setUnifiedMemoryProperty(paramName, propertyValue); } } break; case CL_KERNEL_EXEC_INFO_SVM_PTRS: case CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL: { if ((paramValueSize == 0) || (paramValueSize % sizeof(void *)) || (paramValue == nullptr)) { retVal = CL_INVALID_VALUE; TRACING_EXIT(clSetKernelExecInfo, &retVal); return retVal; } size_t numPointers = paramValueSize / sizeof(void *); size_t *pSvmPtrList = (size_t *)paramValue; if (paramName == CL_KERNEL_EXEC_INFO_SVM_PTRS) { pMultiDeviceKernel->clearSvmKernelExecInfo(); } else { pMultiDeviceKernel->clearUnifiedMemoryExecInfo(); } for (uint32_t i = 0; i < numPointers; i++) { auto svmData = pMultiDeviceKernel->getContext().getSVMAllocsManager()->getSVMAlloc((const void *)pSvmPtrList[i]); if (svmData == nullptr) { retVal = CL_INVALID_VALUE; TRACING_EXIT(clSetKernelExecInfo, &retVal); return retVal; } auto &svmAllocs = svmData->gpuAllocations; if (paramName == CL_KERNEL_EXEC_INFO_SVM_PTRS) { pMultiDeviceKernel->setSvmKernelExecInfo(svmAllocs); } else { pMultiDeviceKernel->setUnifiedMemoryExecInfo(svmAllocs); } } break; } case CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_INTEL: { auto propertyValue = *static_cast(paramValue); retVal = pMultiDeviceKernel->setKernelThreadArbitrationPolicy(propertyValue); return retVal; } case CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM: { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clSetKernelExecInfo, &retVal); return retVal; } case CL_KERNEL_EXEC_INFO_KERNEL_TYPE_INTEL: { if (paramValueSize != sizeof(cl_execution_info_kernel_type_intel) || paramValue == nullptr) { retVal = CL_INVALID_VALUE; TRACING_EXIT(clSetKernelExecInfo, &retVal); return retVal; } auto kernelType = *static_cast(paramValue); retVal = pMultiDeviceKernel->setKernelExecutionType(kernelType); TRACING_EXIT(clSetKernelExecInfo, &retVal); return retVal; } default: { retVal = pMultiDeviceKernel->setAdditionalKernelExecInfoWithParam(paramName, paramValueSize, paramValue); TRACING_EXIT(clSetKernelExecInfo, &retVal); return retVal; } } TRACING_EXIT(clSetKernelExecInfo, &retVal); return retVal; }; cl_mem CL_API_CALL clCreatePipe(cl_context context, cl_mem_flags flags, cl_uint pipePacketSize, cl_uint pipeMaxPackets, const cl_pipe_properties *properties, cl_int *errcodeRet) { TRACING_ENTER(clCreatePipe, &context, &flags, &pipePacketSize, &pipeMaxPackets, &properties, &errcodeRet); cl_mem pipe = nullptr; cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("cl_context", context, "cl_mem_flags", flags, "cl_uint", pipePacketSize, "cl_uint", pipeMaxPackets, "const cl_pipe_properties", properties, "cl_int", errcodeRet); Context *pContext = nullptr; const cl_mem_flags allValidFlags = CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS; do { if ((pipePacketSize == 0) || (pipeMaxPackets == 0)) { retVal = CL_INVALID_PIPE_SIZE; break; } /* Are there some invalid flag bits? */ if ((flags & (~allValidFlags)) != 0) { retVal = CL_INVALID_VALUE; break; } if (properties != nullptr) { retVal = CL_INVALID_VALUE; break; } retVal = validateObjects(WithCastToInternal(context, &pContext)); if (retVal != CL_SUCCESS) { break; } auto pDevice = pContext->getDevice(0); if (pDevice->arePipesSupported() == false) { retVal = CL_INVALID_OPERATION; break; } if (pipePacketSize > pDevice->getDeviceInfo().pipeMaxPacketSize) { retVal = CL_INVALID_PIPE_SIZE; break; } // create the pipe pipe = Pipe::create(pContext, flags, pipePacketSize, pipeMaxPackets, properties, retVal); } while (false); if (errcodeRet) { *errcodeRet = retVal; } DBG_LOG_INPUTS("pipe", pipe); TRACING_EXIT(clCreatePipe, &pipe); return pipe; } cl_int CL_API_CALL clGetPipeInfo(cl_mem pipe, cl_pipe_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { TRACING_ENTER(clGetPipeInfo, &pipe, ¶mName, ¶mValueSize, ¶mValue, ¶mValueSizeRet); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("cl_mem", pipe, "cl_pipe_info", paramName, "size_t", paramValueSize, "void *", NEO::FileLoggerInstance().infoPointerToString(paramValue, paramValueSize), "size_t*", paramValueSizeRet); retVal = validateObjects(pipe); if (CL_SUCCESS != retVal) { TRACING_EXIT(clGetPipeInfo, &retVal); return retVal; } auto pPipeObj = castToObject(pipe); if (pPipeObj == nullptr) { retVal = CL_INVALID_MEM_OBJECT; TRACING_EXIT(clGetPipeInfo, &retVal); return retVal; } retVal = pPipeObj->getPipeInfo(paramName, paramValueSize, paramValue, paramValueSizeRet); TRACING_EXIT(clGetPipeInfo, &retVal); return retVal; } cl_command_queue CL_API_CALL clCreateCommandQueueWithProperties(cl_context context, cl_device_id device, const cl_queue_properties *properties, cl_int *errcodeRet) { TRACING_ENTER(clCreateCommandQueueWithProperties, &context, &device, &properties, &errcodeRet); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("context", context, "device", device, "properties", properties); cl_command_queue commandQueue = nullptr; ErrorCodeHelper err(errcodeRet, CL_SUCCESS); Context *pContext = nullptr; ClDevice *pDevice = nullptr; retVal = validateObjects( WithCastToInternal(context, &pContext), WithCastToInternal(device, &pDevice)); if (CL_SUCCESS != retVal) { err.set(retVal); TRACING_EXIT(clCreateCommandQueueWithProperties, &commandQueue); return commandQueue; } if (!pContext->isDeviceAssociated(*pDevice)) { err.set(CL_INVALID_DEVICE); TRACING_EXIT(clCreateCommandQueueWithProperties, &commandQueue); return commandQueue; } auto tokenValue = properties ? *properties : 0; auto propertiesAddress = properties; while (tokenValue != 0) { switch (tokenValue) { case CL_QUEUE_PROPERTIES: case CL_QUEUE_SIZE: case CL_QUEUE_PRIORITY_KHR: case CL_QUEUE_THROTTLE_KHR: case CL_QUEUE_SLICE_COUNT_INTEL: case CL_QUEUE_FAMILY_INTEL: case CL_QUEUE_INDEX_INTEL: case CL_QUEUE_MDAPI_PROPERTIES_INTEL: case CL_QUEUE_MDAPI_CONFIGURATION_INTEL: break; default: err.set(CL_INVALID_VALUE); TRACING_EXIT(clCreateCommandQueueWithProperties, &commandQueue); return commandQueue; } propertiesAddress += 2; tokenValue = *propertiesAddress; } auto commandQueueProperties = getCmdQueueProperties(properties); uint32_t maxOnDeviceQueueSize = pDevice->getDeviceInfo().queueOnDeviceMaxSize; if (commandQueueProperties & static_cast(CL_QUEUE_ON_DEVICE)) { if (!(commandQueueProperties & static_cast(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE))) { err.set(CL_INVALID_VALUE); TRACING_EXIT(clCreateCommandQueueWithProperties, &commandQueue); return commandQueue; } err.set(CL_INVALID_QUEUE_PROPERTIES); TRACING_EXIT(clCreateCommandQueueWithProperties, &commandQueue); return commandQueue; } if (commandQueueProperties & static_cast(CL_QUEUE_ON_DEVICE_DEFAULT)) { if (!(commandQueueProperties & static_cast(CL_QUEUE_ON_DEVICE))) { err.set(CL_INVALID_VALUE); TRACING_EXIT(clCreateCommandQueueWithProperties, &commandQueue); return commandQueue; } } if (getCmdQueueProperties(properties, CL_QUEUE_SIZE) > maxOnDeviceQueueSize) { err.set(CL_INVALID_QUEUE_PROPERTIES); TRACING_EXIT(clCreateCommandQueueWithProperties, &commandQueue); return commandQueue; } if (commandQueueProperties & static_cast(CL_QUEUE_ON_DEVICE)) { if (getCmdQueueProperties(properties, CL_QUEUE_PRIORITY_KHR)) { err.set(CL_INVALID_QUEUE_PROPERTIES); TRACING_EXIT(clCreateCommandQueueWithProperties, &commandQueue); return commandQueue; } } if (commandQueueProperties & static_cast(CL_QUEUE_ON_DEVICE)) { if (getCmdQueueProperties(properties, CL_QUEUE_THROTTLE_KHR)) { err.set(CL_INVALID_QUEUE_PROPERTIES); TRACING_EXIT(clCreateCommandQueueWithProperties, &commandQueue); return commandQueue; } } if (getCmdQueueProperties(properties, CL_QUEUE_SLICE_COUNT_INTEL) > pDevice->getDeviceInfo().maxSliceCount) { err.set(CL_INVALID_QUEUE_PROPERTIES); TRACING_EXIT(clCreateCommandQueueWithProperties, &commandQueue); return commandQueue; } bool queueFamilySelected = false; bool queueSelected = false; const auto queueFamilyIndex = getCmdQueueProperties(properties, CL_QUEUE_FAMILY_INTEL, &queueFamilySelected); const auto queueIndex = getCmdQueueProperties(properties, CL_QUEUE_INDEX_INTEL, &queueSelected); if (queueFamilySelected != queueSelected) { err.set(CL_INVALID_QUEUE_PROPERTIES); TRACING_EXIT(clCreateCommandQueueWithProperties, &commandQueue); return commandQueue; } if (queueFamilySelected && (queueFamilyIndex >= pDevice->getDeviceInfo().queueFamilyProperties.size() || queueIndex >= pDevice->getDeviceInfo().queueFamilyProperties[queueFamilyIndex].count)) { err.set(CL_INVALID_QUEUE_PROPERTIES); TRACING_EXIT(clCreateCommandQueueWithProperties, &commandQueue); return commandQueue; } bool mdapiPropertySet = false; bool mdapiConfigurationSet = false; cl_command_queue_mdapi_properties_intel mdapiProperties = getCmdQueueProperties(properties, CL_QUEUE_MDAPI_PROPERTIES_INTEL, &mdapiPropertySet); cl_uint mdapiConfiguration = getCmdQueueProperties(properties, CL_QUEUE_MDAPI_CONFIGURATION_INTEL, &mdapiConfigurationSet); if (mdapiConfigurationSet && mdapiConfiguration != 0) { err.set(CL_INVALID_OPERATION); TRACING_EXIT(clCreateCommandQueueWithProperties, &commandQueue); return commandQueue; } commandQueue = CommandQueue::create( pContext, pDevice, properties, false, retVal); if (mdapiPropertySet && (mdapiProperties & CL_QUEUE_MDAPI_ENABLE_INTEL)) { auto commandQueueObj = castToObjectOrAbort(commandQueue); if (!commandQueueObj->setPerfCountersEnabled()) { clReleaseCommandQueue(commandQueue); TRACING_EXIT(clCreateCommandQueueWithProperties, &commandQueue); err.set(CL_OUT_OF_RESOURCES); return nullptr; } } if (pContext->isProvidingPerformanceHints()) { pContext->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, DRIVER_CALLS_INTERNAL_CL_FLUSH); if (castToObjectOrAbort(commandQueue)->isProfilingEnabled()) { pContext->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, PROFILING_ENABLED); if (pDevice->getDeviceInfo().preemptionSupported && pDevice->getHardwareInfo().platform.eProductFamily < IGFX_SKYLAKE) { pContext->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, PROFILING_ENABLED_WITH_DISABLED_PREEMPTION); } } } if (!commandQueue) { retVal = CL_OUT_OF_HOST_MEMORY; } DBG_LOG_INPUTS("commandQueue", commandQueue, "properties", static_cast(getCmdQueueProperties(properties))); err.set(retVal); TRACING_EXIT(clCreateCommandQueueWithProperties, &commandQueue); return commandQueue; } cl_sampler CL_API_CALL clCreateSamplerWithProperties(cl_context context, const cl_sampler_properties *samplerProperties, cl_int *errcodeRet) { TRACING_ENTER(clCreateSamplerWithProperties, &context, &samplerProperties, &errcodeRet); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("context", context, "samplerProperties", samplerProperties); cl_sampler sampler = nullptr; retVal = validateObjects(context); if (CL_SUCCESS == retVal) { sampler = Sampler::create( castToObject(context), samplerProperties, retVal); } if (errcodeRet) { *errcodeRet = retVal; } TRACING_EXIT(clCreateSamplerWithProperties, &sampler); return sampler; } cl_int CL_API_CALL clUnloadCompiler() { TRACING_ENTER(clUnloadCompiler); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); TRACING_EXIT(clUnloadCompiler, &retVal); return retVal; } cl_int CL_API_CALL clGetKernelSubGroupInfoKHR(cl_kernel kernel, cl_device_id device, cl_kernel_sub_group_info paramName, size_t inputValueSize, const void *inputValue, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("kernel", kernel, "device", device, "paramName", paramName, "inputValueSize", inputValueSize, "inputValue", NEO::FileLoggerInstance().infoPointerToString(inputValue, inputValueSize), "paramValueSize", paramValueSize, "paramValue", NEO::FileLoggerInstance().infoPointerToString(paramValue, paramValueSize), "paramValueSizeRet", paramValueSizeRet); MultiDeviceKernel *pMultiDeviceKernel = nullptr; retVal = validateObjects(WithCastToInternal(kernel, &pMultiDeviceKernel)); ClDevice *pClDevice = nullptr; if (CL_SUCCESS == retVal) { if (pMultiDeviceKernel->getDevices().size() == 1u && !device) { pClDevice = pMultiDeviceKernel->getDevices()[0]; } else { retVal = validateObjects(WithCastToInternal(device, &pClDevice)); } } if (CL_SUCCESS != retVal) { return retVal; } auto pKernel = pMultiDeviceKernel->getKernel(pClDevice->getRootDeviceIndex()); switch (paramName) { case CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE: case CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE: case CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL: return pKernel->getSubGroupInfo(paramName, inputValueSize, inputValue, paramValueSize, paramValue, paramValueSizeRet); default: { retVal = CL_INVALID_VALUE; return retVal; } } } cl_int CL_API_CALL clGetDeviceAndHostTimer(cl_device_id device, cl_ulong *deviceTimestamp, cl_ulong *hostTimestamp) { TRACING_ENTER(clGetDeviceAndHostTimer, &device, &deviceTimestamp, &hostTimestamp); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("device", device, "deviceTimestamp", deviceTimestamp, "hostTimestamp", hostTimestamp); do { ClDevice *pDevice = castToObject(device); if (pDevice == nullptr) { retVal = CL_INVALID_DEVICE; break; } if (deviceTimestamp == nullptr || hostTimestamp == nullptr) { retVal = CL_INVALID_VALUE; break; } if (!pDevice->getDeviceAndHostTimer(static_cast(deviceTimestamp), static_cast(hostTimestamp))) { retVal = CL_OUT_OF_RESOURCES; break; } } while (false); TRACING_EXIT(clGetDeviceAndHostTimer, &retVal); return retVal; } cl_int CL_API_CALL clGetHostTimer(cl_device_id device, cl_ulong *hostTimestamp) { TRACING_ENTER(clGetHostTimer, &device, &hostTimestamp); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("device", device, "hostTimestamp", hostTimestamp); do { ClDevice *pDevice = castToObject(device); if (pDevice == nullptr) { retVal = CL_INVALID_DEVICE; break; } if (hostTimestamp == nullptr) { retVal = CL_INVALID_VALUE; break; } if (!pDevice->getHostTimer(static_cast(hostTimestamp))) { retVal = CL_OUT_OF_RESOURCES; break; } } while (false); TRACING_EXIT(clGetHostTimer, &retVal); return retVal; } cl_int CL_API_CALL clGetKernelSubGroupInfo(cl_kernel kernel, cl_device_id device, cl_kernel_sub_group_info paramName, size_t inputValueSize, const void *inputValue, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { TRACING_ENTER(clGetKernelSubGroupInfo, &kernel, &device, ¶mName, &inputValueSize, &inputValue, ¶mValueSize, ¶mValue, ¶mValueSizeRet); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("kernel", kernel, "device", device, "paramName", paramName, "inputValueSize", inputValueSize, "inputValue", NEO::FileLoggerInstance().infoPointerToString(inputValue, inputValueSize), "paramValueSize", paramValueSize, "paramValue", NEO::FileLoggerInstance().infoPointerToString(paramValue, paramValueSize), "paramValueSizeRet", paramValueSizeRet); MultiDeviceKernel *pMultiDeviceKernel = nullptr; retVal = validateObjects(WithCastToInternal(kernel, &pMultiDeviceKernel)); ClDevice *pClDevice = nullptr; if (CL_SUCCESS == retVal) { if (pMultiDeviceKernel->getDevices().size() == 1u && !device) { pClDevice = pMultiDeviceKernel->getDevices()[0]; } else { retVal = validateObjects(WithCastToInternal(device, &pClDevice)); } } if (CL_SUCCESS != retVal) { TRACING_EXIT(clGetKernelSubGroupInfo, &retVal); return retVal; } auto pKernel = pMultiDeviceKernel->getKernel(pClDevice->getRootDeviceIndex()); retVal = pKernel->getSubGroupInfo(paramName, inputValueSize, inputValue, paramValueSize, paramValue, paramValueSizeRet); TRACING_EXIT(clGetKernelSubGroupInfo, &retVal); return retVal; } cl_int CL_API_CALL clSetDefaultDeviceCommandQueue(cl_context context, cl_device_id device, cl_command_queue commandQueue) { TRACING_ENTER(clSetDefaultDeviceCommandQueue, &context, &device, &commandQueue); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("context", context, "device", device, "commandQueue", commandQueue); Context *pContext = nullptr; ClDevice *pClDevice = nullptr; retVal = validateObjects(WithCastToInternal(context, &pContext), WithCastToInternal(device, &pClDevice)); if (CL_SUCCESS != retVal) { TRACING_EXIT(clSetDefaultDeviceCommandQueue, &retVal); return retVal; } retVal = CL_INVALID_OPERATION; TRACING_EXIT(clSetDefaultDeviceCommandQueue, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueSVMMigrateMem(cl_command_queue commandQueue, cl_uint numSvmPointers, const void **svmPointers, const size_t *sizes, const cl_mem_migration_flags flags, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueSVMMigrateMem, &commandQueue, &numSvmPointers, &svmPointers, &sizes, &flags, &numEventsInWaitList, &eventWaitList, &event); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "numSvmPointers", numSvmPointers, "svmPointers", NEO::FileLoggerInstance().infoPointerToString(svmPointers ? svmPointers[0] : 0, NEO::FileLoggerInstance().getInput(sizes, 0)), "sizes", NEO::FileLoggerInstance().getInput(sizes, 0), "flags", flags, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", getClFileLogger().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", getClFileLogger().getEvents(reinterpret_cast(event), 1)); CommandQueue *pCommandQueue = nullptr; retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), EventWaitList(numEventsInWaitList, eventWaitList)); if (CL_SUCCESS != retVal) { TRACING_EXIT(clEnqueueSVMMigrateMem, &retVal); return retVal; } auto &device = pCommandQueue->getDevice(); if (!device.getHardwareInfo().capabilityTable.ftrSvm) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueSVMMigrateMem, &retVal); return retVal; } if (numSvmPointers == 0 || svmPointers == nullptr) { retVal = CL_INVALID_VALUE; TRACING_EXIT(clEnqueueSVMMigrateMem, &retVal); return retVal; } const cl_mem_migration_flags allValidFlags = CL_MIGRATE_MEM_OBJECT_HOST | CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED; if ((flags & (~allValidFlags)) != 0) { retVal = CL_INVALID_VALUE; TRACING_EXIT(clEnqueueSVMMigrateMem, &retVal); return retVal; } auto pSvmAllocMgr = pCommandQueue->getContext().getSVMAllocsManager(); UNRECOVERABLE_IF(pSvmAllocMgr == nullptr); for (uint32_t i = 0; i < numSvmPointers; i++) { auto svmData = pSvmAllocMgr->getSVMAlloc(svmPointers[i]); if (svmData == nullptr) { retVal = CL_INVALID_VALUE; TRACING_EXIT(clEnqueueSVMMigrateMem, &retVal); return retVal; } if (sizes != nullptr && sizes[i] != 0) { svmData = pSvmAllocMgr->getSVMAlloc(reinterpret_cast((size_t)svmPointers[i] + sizes[i] - 1)); if (svmData == nullptr) { retVal = CL_INVALID_VALUE; TRACING_EXIT(clEnqueueSVMMigrateMem, &retVal); return retVal; } } } for (uint32_t i = 0; i < numEventsInWaitList; i++) { auto pEvent = castToObject(eventWaitList[i]); if (pEvent->getContext() != &pCommandQueue->getContext()) { retVal = CL_INVALID_CONTEXT; TRACING_EXIT(clEnqueueSVMMigrateMem, &retVal); return retVal; } } retVal = pCommandQueue->enqueueSVMMigrateMem(numSvmPointers, svmPointers, sizes, flags, numEventsInWaitList, eventWaitList, event); TRACING_EXIT(clEnqueueSVMMigrateMem, &retVal); return retVal; } cl_kernel CL_API_CALL clCloneKernel(cl_kernel sourceKernel, cl_int *errcodeRet) { TRACING_ENTER(clCloneKernel, &sourceKernel, &errcodeRet); MultiDeviceKernel *pSourceMultiDeviceKernel = nullptr; MultiDeviceKernel *pClonedMultiDeviceKernel = nullptr; auto retVal = validateObjects(WithCastToInternal(sourceKernel, &pSourceMultiDeviceKernel)); API_ENTER(&retVal); DBG_LOG_INPUTS("sourceKernel", sourceKernel); if (CL_SUCCESS == retVal) { pClonedMultiDeviceKernel = MultiDeviceKernel::create(pSourceMultiDeviceKernel->getProgram(), pSourceMultiDeviceKernel->getKernelInfos(), &retVal); UNRECOVERABLE_IF((pClonedMultiDeviceKernel == nullptr) || (retVal != CL_SUCCESS)); retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel); } if (errcodeRet) { *errcodeRet = retVal; } if (pClonedMultiDeviceKernel != nullptr) { gtpinNotifyKernelCreate(pClonedMultiDeviceKernel); } TRACING_EXIT(clCloneKernel, (cl_kernel *)&pClonedMultiDeviceKernel); return pClonedMultiDeviceKernel; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueVerifyMemoryINTEL(cl_command_queue commandQueue, const void *allocationPtr, const void *expectedData, size_t sizeOfComparison, cl_uint comparisonMode) { cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "allocationPtr", allocationPtr, "expectedData", expectedData, "sizeOfComparison", sizeOfComparison, "comparisonMode", comparisonMode); if (sizeOfComparison == 0 || expectedData == nullptr || allocationPtr == nullptr) { retVal = CL_INVALID_VALUE; return retVal; } CommandQueue *pCommandQueue = nullptr; retVal = validateObjects(WithCastToInternal(commandQueue, &pCommandQueue)); if (retVal != CL_SUCCESS) { return retVal; } auto &csr = pCommandQueue->getGpgpuCommandStreamReceiver(); auto status = csr.expectMemory(allocationPtr, expectedData, sizeOfComparison, comparisonMode); return status ? CL_SUCCESS : CL_INVALID_VALUE; } cl_int CL_API_CALL clAddCommentINTEL(cl_device_id device, const char *comment) { cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("device", device, "comment", comment); ClDevice *pDevice = nullptr; retVal = validateObjects(WithCastToInternal(device, &pDevice)); if (retVal != CL_SUCCESS) { return retVal; } auto aubCenter = pDevice->getRootDeviceEnvironment().aubCenter.get(); if (!comment || (aubCenter && !aubCenter->getAubManager())) { retVal = CL_INVALID_VALUE; } if (retVal == CL_SUCCESS && aubCenter) { aubCenter->getAubManager()->addComment(comment); } return retVal; } cl_int CL_API_CALL clGetDeviceGlobalVariablePointerINTEL( cl_device_id device, cl_program program, const char *globalVariableName, size_t *globalVariableSizeRet, void **globalVariablePointerRet) { cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("device", device, "program", program, "globalVariableName", globalVariableName, "globalVariablePointerRet", globalVariablePointerRet); Program *pProgram = nullptr; ClDevice *pDevice = nullptr; retVal = validateObjects(WithCastToInternal(program, &pProgram), WithCastToInternal(device, &pDevice)); if (globalVariablePointerRet == nullptr) { retVal = CL_INVALID_ARG_VALUE; } if (CL_SUCCESS == retVal) { const auto &symbols = pProgram->getSymbols(pDevice->getRootDeviceIndex()); auto symbolIt = symbols.find(globalVariableName); if ((symbolIt == symbols.end()) || (symbolIt->second.symbol.segment == NEO::SegmentType::Instructions)) { retVal = CL_INVALID_ARG_VALUE; } else { if (globalVariableSizeRet != nullptr) { *globalVariableSizeRet = symbolIt->second.symbol.size; } *globalVariablePointerRet = reinterpret_cast(symbolIt->second.gpuAddress); } } return retVal; } cl_int CL_API_CALL clGetDeviceFunctionPointerINTEL( cl_device_id device, cl_program program, const char *functionName, cl_ulong *functionPointerRet) { cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("device", device, "program", program, "functionName", functionName, "functionPointerRet", functionPointerRet); Program *pProgram = nullptr; ClDevice *pDevice = nullptr; retVal = validateObjects(WithCastToInternal(program, &pProgram), WithCastToInternal(device, &pDevice)); if ((CL_SUCCESS == retVal) && (functionPointerRet == nullptr)) { retVal = CL_INVALID_ARG_VALUE; } if (CL_SUCCESS == retVal) { const auto &symbols = pProgram->getSymbols(pDevice->getRootDeviceIndex()); auto symbolIt = symbols.find(functionName); if ((symbolIt == symbols.end()) || (symbolIt->second.symbol.segment != NEO::SegmentType::Instructions)) { retVal = CL_INVALID_ARG_VALUE; } else { *functionPointerRet = static_cast(symbolIt->second.gpuAddress); } } return retVal; } cl_int CL_API_CALL clSetProgramReleaseCallback(cl_program program, void(CL_CALLBACK *pfnNotify)(cl_program /* program */, void * /* user_data */), void *userData) { DBG_LOG_INPUTS("program", program, "pfnNotify", pfnNotify, "userData", userData); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); Program *pProgram = nullptr; retVal = validateObjects(WithCastToInternal(program, &pProgram), reinterpret_cast(pfnNotify)); if (retVal == CL_SUCCESS) { retVal = CL_INVALID_OPERATION; } return retVal; } cl_int CL_API_CALL clSetProgramSpecializationConstant(cl_program program, cl_uint specId, size_t specSize, const void *specValue) { cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("program", program, "specId", specId, "specSize", specSize, "specValue", specValue); Program *pProgram = nullptr; retVal = validateObjects(WithCastToInternal(program, &pProgram), specValue); if (retVal == CL_SUCCESS) { retVal = pProgram->setProgramSpecializationConstant(specId, specSize, specValue); } return retVal; } cl_int CL_API_CALL clGetKernelSuggestedLocalWorkSizeINTEL(cl_command_queue commandQueue, cl_kernel kernel, cl_uint workDim, const size_t *globalWorkOffset, const size_t *globalWorkSize, size_t *suggestedLocalWorkSize) { cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "cl_kernel", kernel, "globalWorkOffset[0]", NEO::FileLoggerInstance().getInput(globalWorkOffset, 0), "globalWorkOffset[1]", NEO::FileLoggerInstance().getInput(globalWorkOffset, 1), "globalWorkOffset[2]", NEO::FileLoggerInstance().getInput(globalWorkOffset, 2), "globalWorkSize", NEO::FileLoggerInstance().getSizes(globalWorkSize, workDim, true), "suggestedLocalWorkSize", suggestedLocalWorkSize); MultiDeviceKernel *pMultiDeviceKernel = nullptr; CommandQueue *pCommandQueue = nullptr; retVal = validateObjects(WithCastToInternal(commandQueue, &pCommandQueue), WithCastToInternal(kernel, &pMultiDeviceKernel)); if (CL_SUCCESS != retVal) { return retVal; } if ((workDim == 0) || (workDim > 3)) { retVal = CL_INVALID_WORK_DIMENSION; return retVal; } if (globalWorkSize == nullptr || globalWorkSize[0] == 0 || (workDim > 1 && globalWorkSize[1] == 0) || (workDim > 2 && globalWorkSize[2] == 0)) { retVal = CL_INVALID_GLOBAL_WORK_SIZE; return retVal; } auto pKernel = pMultiDeviceKernel->getKernel(pCommandQueue->getDevice().getRootDeviceIndex()); if (!pKernel->isPatched()) { retVal = CL_INVALID_KERNEL; return retVal; } if (suggestedLocalWorkSize == nullptr) { retVal = CL_INVALID_VALUE; return retVal; } pKernel->getSuggestedLocalWorkSize(workDim, globalWorkSize, globalWorkOffset, suggestedLocalWorkSize); return retVal; } cl_int CL_API_CALL clGetKernelMaxConcurrentWorkGroupCountINTEL(cl_command_queue commandQueue, cl_kernel kernel, cl_uint workDim, const size_t *globalWorkOffset, const size_t *localWorkSize, size_t *suggestedWorkGroupCount) { cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "cl_kernel", kernel, "globalWorkOffset[0]", NEO::FileLoggerInstance().getInput(globalWorkOffset, 0), "globalWorkOffset[1]", NEO::FileLoggerInstance().getInput(globalWorkOffset, 1), "globalWorkOffset[2]", NEO::FileLoggerInstance().getInput(globalWorkOffset, 2), "localWorkSize", NEO::FileLoggerInstance().getSizes(localWorkSize, workDim, true), "suggestedWorkGroupCount", suggestedWorkGroupCount); CommandQueue *pCommandQueue = nullptr; MultiDeviceKernel *pMultiDeviceKernel = nullptr; retVal = validateObjects(WithCastToInternal(commandQueue, &pCommandQueue), WithCastToInternal(kernel, &pMultiDeviceKernel)); if (CL_SUCCESS != retVal) { return retVal; } if ((workDim == 0) || (workDim > 3)) { retVal = CL_INVALID_WORK_DIMENSION; return retVal; } if (globalWorkOffset == nullptr) { retVal = CL_INVALID_GLOBAL_OFFSET; return retVal; } if (localWorkSize == nullptr) { retVal = CL_INVALID_WORK_GROUP_SIZE; return retVal; } auto pKernel = pMultiDeviceKernel->getKernel(pCommandQueue->getDevice().getRootDeviceIndex()); if (!pKernel->isPatched()) { retVal = CL_INVALID_KERNEL; return retVal; } if (suggestedWorkGroupCount == nullptr) { retVal = CL_INVALID_VALUE; return retVal; } WithCastToInternal(commandQueue, &pCommandQueue); *suggestedWorkGroupCount = pKernel->getMaxWorkGroupCount(workDim, localWorkSize, pCommandQueue); return retVal; } cl_int CL_API_CALL clEnqueueNDCountKernelINTEL(cl_command_queue commandQueue, cl_kernel kernel, cl_uint workDim, const size_t *globalWorkOffset, const size_t *workgroupCount, const size_t *localWorkSize, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "cl_kernel", kernel, "globalWorkOffset[0]", NEO::FileLoggerInstance().getInput(globalWorkOffset, 0), "globalWorkOffset[1]", NEO::FileLoggerInstance().getInput(globalWorkOffset, 1), "globalWorkOffset[2]", NEO::FileLoggerInstance().getInput(globalWorkOffset, 2), "workgroupCount", NEO::FileLoggerInstance().getSizes(workgroupCount, workDim, false), "localWorkSize", NEO::FileLoggerInstance().getSizes(localWorkSize, workDim, true), "numEventsInWaitList", numEventsInWaitList, "eventWaitList", getClFileLogger().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", getClFileLogger().getEvents(reinterpret_cast(event), 1)); CommandQueue *pCommandQueue = nullptr; Kernel *pKernel = nullptr; MultiDeviceKernel *pMultiDeviceKernel = nullptr; retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), WithCastToInternal(kernel, &pMultiDeviceKernel), EventWaitList(numEventsInWaitList, eventWaitList)); if (CL_SUCCESS != retVal) { return retVal; } auto &device = pCommandQueue->getClDevice(); auto rootDeviceIndex = device.getRootDeviceIndex(); pKernel = pMultiDeviceKernel->getKernel(rootDeviceIndex); size_t globalWorkSize[3]; for (size_t i = 0; i < workDim; i++) { globalWorkSize[i] = workgroupCount[i] * localWorkSize[i]; } if (pKernel->usesSyncBuffer()) { if (pKernel->getExecutionType() != KernelExecutionType::Concurrent) { retVal = CL_INVALID_KERNEL; return retVal; } auto &hardwareInfo = device.getHardwareInfo(); auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); auto engineGroupType = hwHelper.getEngineGroupType(pCommandQueue->getGpgpuEngine().getEngineType(), pCommandQueue->getGpgpuEngine().getEngineUsage(), hardwareInfo); if (!hwHelper.isCooperativeDispatchSupported(engineGroupType, hardwareInfo)) { retVal = CL_INVALID_COMMAND_QUEUE; return retVal; } } if (pKernel->getExecutionType() == KernelExecutionType::Concurrent) { size_t requestedNumberOfWorkgroups = 1; for (size_t i = 0; i < workDim; i++) { requestedNumberOfWorkgroups *= workgroupCount[i]; } size_t maximalNumberOfWorkgroupsAllowed = pKernel->getMaxWorkGroupCount(workDim, localWorkSize, pCommandQueue); if (requestedNumberOfWorkgroups > maximalNumberOfWorkgroupsAllowed) { retVal = CL_INVALID_VALUE; return retVal; } } if (!pCommandQueue->validateCapabilityForOperation(CL_QUEUE_CAPABILITY_KERNEL_INTEL, numEventsInWaitList, eventWaitList, event)) { retVal = CL_INVALID_OPERATION; return retVal; } if (pKernel->usesSyncBuffer()) { device.getDevice().allocateSyncBufferHandler(); } TakeOwnershipWrapper kernelOwnership(*pMultiDeviceKernel, gtpinIsGTPinInitialized()); if (gtpinIsGTPinInitialized()) { gtpinNotifyKernelSubmit(kernel, pCommandQueue); } retVal = pCommandQueue->enqueueKernel( pKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); DBG_LOG_INPUTS("event", getClFileLogger().getEvents(reinterpret_cast(event), 1u)); return retVal; } cl_int CL_API_CALL clSetContextDestructorCallback(cl_context context, void(CL_CALLBACK *pfnNotify)(cl_context /* context */, void * /* user_data */), void *userData) { DBG_LOG_INPUTS("program", context, "pfnNotify", pfnNotify, "userData", userData); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); Context *pContext = nullptr; retVal = validateObjects(WithCastToInternal(context, &pContext), reinterpret_cast(pfnNotify)); if (retVal == CL_SUCCESS) { retVal = pContext->setDestructorCallback(pfnNotify, userData); } return retVal; } compute-runtime-22.14.22890/opencl/source/api/api.h000066400000000000000000000715321422164147700216350ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/extensions/public/cl_ext_private.h" #include "opencl/source/api/api_enter.h" #include "opencl/source/api/dispatch.h" #include "CL/cl.h" #include "CL/cl_gl.h" #ifdef __cplusplus extern "C" { #endif cl_int CL_API_CALL clGetPlatformIDs( cl_uint numEntries, cl_platform_id *platforms, cl_uint *numPlatforms); cl_int CL_API_CALL clGetPlatformInfo( cl_platform_id platform, cl_platform_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); cl_int CL_API_CALL clGetDeviceIDs( cl_platform_id platform, cl_device_type deviceType, cl_uint numEntries, cl_device_id *devices, cl_uint *numDevices); cl_int CL_API_CALL clGetDeviceInfo( cl_device_id device, cl_device_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); cl_int CL_API_CALL clCreateSubDevices( cl_device_id inDevice, const cl_device_partition_property *properties, cl_uint numDevices, cl_device_id *outDevices, cl_uint *numDevicesRet); cl_int CL_API_CALL clRetainDevice( cl_device_id device); cl_int CL_API_CALL clReleaseDevice( cl_device_id device); cl_context CL_API_CALL clCreateContext( const cl_context_properties *properties, cl_uint numDevices, const cl_device_id *devices, void(CL_CALLBACK *funcNotify)(const char *, const void *, size_t, void *), void *userData, cl_int *errcodeRet); cl_context CL_API_CALL clCreateContextFromType( const cl_context_properties *properties, cl_device_type deviceType, void(CL_CALLBACK *funcNotify)(const char *, const void *, size_t, void *), void *userData, cl_int *errcodeRet); cl_int CL_API_CALL clRetainContext( cl_context context); cl_int CL_API_CALL clReleaseContext( cl_context context); cl_int CL_API_CALL clGetContextInfo( cl_context context, cl_context_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); cl_int CL_API_CALL clGetGLContextInfoKHR( const cl_context_properties *properties, cl_gl_context_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); cl_command_queue CL_API_CALL clCreateCommandQueue( cl_context context, cl_device_id device, cl_command_queue_properties properties, cl_int *errcodeRet); cl_int CL_API_CALL clRetainCommandQueue( cl_command_queue commandQueue); cl_int CL_API_CALL clReleaseCommandQueue( cl_command_queue commandQueue); cl_int CL_API_CALL clGetCommandQueueInfo( cl_command_queue commandQueue, cl_command_queue_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); // deprecated OpenCL 1.0 cl_int CL_API_CALL clSetCommandQueueProperty( cl_command_queue commandQueue, cl_command_queue_properties properties, cl_bool enable, cl_command_queue_properties *oldProperties); cl_mem CL_API_CALL clCreateBuffer( cl_context context, cl_mem_flags flags, size_t size, void *hostPtr, cl_int *errcodeRet); cl_mem CL_API_CALL clCreateBufferWithPropertiesINTEL( cl_context context, const cl_mem_properties_intel *properties, cl_mem_flags flags, size_t size, void *hostPtr, cl_int *errcodeRet); cl_mem CL_API_CALL clCreateSubBuffer( cl_mem buffer, cl_mem_flags flags, cl_buffer_create_type bufferCreateType, const void *bufferCreateInfo, cl_int *errcodeRet); cl_mem CL_API_CALL clCreateImage( cl_context context, cl_mem_flags flags, const cl_image_format *imageFormat, const cl_image_desc *imageDesc, void *hostPtr, cl_int *errcodeRet); cl_mem CL_API_CALL clCreateImageWithPropertiesINTEL( cl_context context, const cl_mem_properties_intel *properties, cl_mem_flags flags, const cl_image_format *imageFormat, const cl_image_desc *imageDesc, void *hostPtr, cl_int *errcodeRet); // deprecated OpenCL 1.1 cl_mem CL_API_CALL clCreateImage2D( cl_context context, cl_mem_flags flags, const cl_image_format *imageFormat, size_t imageWidth, size_t imageHeight, size_t imageRowPitch, void *hostPtr, cl_int *errcodeRet); // deprecated OpenCL 1.1 cl_mem CL_API_CALL clCreateImage3D( cl_context context, cl_mem_flags flags, const cl_image_format *imageFormat, size_t imageWidth, size_t imageHeight, size_t imageDepth, size_t imageRowPitch, size_t imageSlicePitch, void *hostPtr, cl_int *errcodeRet); cl_int CL_API_CALL clRetainMemObject( cl_mem memobj); cl_int CL_API_CALL clReleaseMemObject( cl_mem memobj); cl_int CL_API_CALL clGetSupportedImageFormats( cl_context context, cl_mem_flags flags, cl_mem_object_type imageType, cl_uint numEntries, cl_image_format *imageFormats, cl_uint *numImageFormats); cl_int CL_API_CALL clGetMemObjectInfo( cl_mem memobj, cl_mem_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); cl_int CL_API_CALL clGetImageInfo( cl_mem image, cl_image_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); cl_int CL_API_CALL clGetImageParamsINTEL( cl_context context, const cl_image_format *imageFormat, const cl_image_desc *imageDesc, size_t *imageRowPitch, size_t *imageSlicePitch); cl_int CL_API_CALL clSetMemObjectDestructorCallback( cl_mem memobj, void(CL_CALLBACK *funcNotify)(cl_mem, void *), void *userData); cl_sampler CL_API_CALL clCreateSampler( cl_context context, cl_bool normalizedCoords, cl_addressing_mode addressingMode, cl_filter_mode filterMode, cl_int *errcodeRet); cl_int CL_API_CALL clRetainSampler( cl_sampler sampler); cl_int CL_API_CALL clReleaseSampler( cl_sampler sampler); cl_int CL_API_CALL clGetSamplerInfo( cl_sampler sampler, cl_sampler_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); cl_program CL_API_CALL clCreateProgramWithSource( cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_int *errcodeRet); cl_program CL_API_CALL clCreateProgramWithBinary( cl_context context, cl_uint numDevices, const cl_device_id *deviceList, const size_t *lengths, const unsigned char **binaries, cl_int *binaryStatus, cl_int *errcodeRet); cl_program CL_API_CALL clCreateProgramWithIL(cl_context context, const void *il, size_t length, cl_int *errcodeRet); cl_program CL_API_CALL clCreateProgramWithBuiltInKernels( cl_context context, cl_uint numDevices, const cl_device_id *deviceList, const char *kernelNames, cl_int *errcodeRet); cl_int CL_API_CALL clRetainProgram( cl_program program); cl_int CL_API_CALL clReleaseProgram( cl_program program); cl_int CL_API_CALL clBuildProgram( cl_program program, cl_uint numDevices, const cl_device_id *deviceList, const char *options, void(CL_CALLBACK *funcNotify)(cl_program program, void *userData), void *userData); cl_int CL_API_CALL clCompileProgram( cl_program program, cl_uint numDevices, const cl_device_id *deviceList, const char *options, cl_uint numInputHeaders, const cl_program *inputHeaders, const char **headerIncludeNames, void(CL_CALLBACK *funcNotify)(cl_program program, void *userData), void *userData); cl_program CL_API_CALL clLinkProgram( cl_context context, cl_uint numDevices, const cl_device_id *deviceList, const char *options, cl_uint numInputPrograms, const cl_program *inputPrograms, void(CL_CALLBACK *funcNotify)(cl_program program, void *userData), void *userData, cl_int *errcodeRet); cl_int CL_API_CALL clUnloadPlatformCompiler( cl_platform_id platform); // deprecated OpenCL 1.1 cl_int CL_API_CALL clUnloadCompiler(void); cl_int CL_API_CALL clGetProgramInfo( cl_program program, cl_program_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); cl_int CL_API_CALL clGetProgramBuildInfo( cl_program program, cl_device_id device, cl_program_build_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); cl_kernel CL_API_CALL clCreateKernel( cl_program program, const char *kernelName, cl_int *errcodeRet); cl_int CL_API_CALL clCreateKernelsInProgram( cl_program program, cl_uint numKernels, cl_kernel *kernels, cl_uint *numKernelsRet); cl_int CL_API_CALL clRetainKernel( cl_kernel kernel); cl_int CL_API_CALL clReleaseKernel( cl_kernel kernel); cl_int CL_API_CALL clSetKernelArg( cl_kernel kernel, cl_uint argIndex, size_t argSize, const void *argValue); cl_int CL_API_CALL clGetKernelInfo( cl_kernel kernel, cl_kernel_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); cl_int CL_API_CALL clGetKernelArgInfo( cl_kernel kernel, cl_uint argIndx, cl_kernel_arg_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); cl_int CL_API_CALL clGetKernelWorkGroupInfo( cl_kernel kernel, cl_device_id device, cl_kernel_work_group_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); cl_int CL_API_CALL clWaitForEvents( cl_uint numEvents, const cl_event *eventList); cl_int CL_API_CALL clGetEventInfo( cl_event event, cl_event_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); cl_event CL_API_CALL clCreateUserEvent( cl_context context, cl_int *errcodeRet); cl_int CL_API_CALL clRetainEvent( cl_event event); cl_int CL_API_CALL clReleaseEvent( cl_event event); cl_int CL_API_CALL clSetUserEventStatus( cl_event event, cl_int executionStatus); cl_int CL_API_CALL clSetEventCallback( cl_event event, cl_int commandExecCallbackType, void(CL_CALLBACK *funcNotify)(cl_event, cl_int, void *), void *userData); cl_int CL_API_CALL clGetEventProfilingInfo( cl_event event, cl_profiling_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); cl_int CL_API_CALL clFlush( cl_command_queue commandQueue); cl_int CL_API_CALL clFinish( cl_command_queue commandQueue); cl_int CL_API_CALL clEnqueueReadBuffer( cl_command_queue commandQueue, cl_mem buffer, cl_bool blockingRead, size_t offset, size_t cb, void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clEnqueueReadBufferRect( cl_command_queue commandQueue, cl_mem buffer, cl_bool blockingRead, const size_t *bufferOrigin, const size_t *hostOrigin, const size_t *region, size_t bufferRowPitch, size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch, void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clEnqueueWriteBuffer( cl_command_queue commandQueue, cl_mem buffer, cl_bool blockingWrite, size_t offset, size_t cb, const void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clEnqueueWriteBufferRect( cl_command_queue commandQueue, cl_mem buffer, cl_bool blockingWrite, const size_t *bufferOrigin, const size_t *hostOrigin, const size_t *region, size_t bufferRowPitch, size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch, const void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clEnqueueFillBuffer( cl_command_queue commandQueue, cl_mem buffer, const void *pattern, size_t patternSize, size_t offset, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clEnqueueCopyBuffer( cl_command_queue commandQueue, cl_mem srcBuffer, cl_mem dstBuffer, size_t srcOffset, size_t dstOffset, size_t cb, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clEnqueueCopyBufferRect( cl_command_queue commandQueue, cl_mem srcBuffer, cl_mem dstBuffer, const size_t *srcOrigin, const size_t *dstOrigin, const size_t *region, size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clEnqueueReadImage( cl_command_queue commandQueue, cl_mem image, cl_bool blockingRead, const size_t *origin, const size_t *region, size_t rowPitch, size_t slicePitch, void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clEnqueueWriteImage( cl_command_queue commandQueue, cl_mem image, cl_bool blockingWrite, const size_t *origin, const size_t *region, size_t inputRowPitch, size_t inputSlicePitch, const void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clEnqueueFillImage( cl_command_queue commandQueue, cl_mem image, const void *fillColor, const size_t *origin, const size_t *region, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clEnqueueCopyImage( cl_command_queue commandQueue, cl_mem srcImage, cl_mem dstImage, const size_t *srcOrigin, const size_t *dstOrigin, const size_t *region, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clEnqueueCopyImageToBuffer( cl_command_queue commandQueue, cl_mem srcImage, cl_mem dstBuffer, const size_t *srcOrigin, const size_t *region, size_t dstOffset, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clEnqueueCopyBufferToImage( cl_command_queue commandQueue, cl_mem srcBuffer, cl_mem dstImage, size_t srcOffset, const size_t *dstOrigin, const size_t *region, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); void *CL_API_CALL clEnqueueMapBuffer( cl_command_queue commandQueue, cl_mem buffer, cl_bool blockingMap, cl_map_flags mapFlags, size_t offset, size_t cb, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, cl_int *errcodeRet); void *CL_API_CALL clEnqueueMapImage( cl_command_queue commandQueue, cl_mem image, cl_bool blockingMap, cl_map_flags mapFlags, const size_t *origin, const size_t *region, size_t *imageRowPitch, size_t *imageSlicePitch, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, cl_int *errcodeRet); cl_int CL_API_CALL clEnqueueUnmapMemObject( cl_command_queue commandQueue, cl_mem memobj, void *mappedPtr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clEnqueueMigrateMemObjects( cl_command_queue commandQueue, cl_uint numMemObjects, const cl_mem *memObjects, cl_mem_migration_flags flags, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clEnqueueNDRangeKernel( cl_command_queue commandQueue, cl_kernel kernel, cl_uint workDim, const size_t *globalWorkOffset, const size_t *globalWorkSize, const size_t *localWorkSize, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clEnqueueTask( cl_command_queue commandQueue, cl_kernel kernel, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clEnqueueNativeKernel( cl_command_queue commandQueue, void(CL_CALLBACK *userFunc)(void *), void *args, size_t cbArgs, cl_uint numMemObjects, const cl_mem *memList, const void **argsMemLoc, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); // deprecated OpenCL 1.1 cl_int CL_API_CALL clEnqueueMarker( cl_command_queue commandQueue, cl_event *event); // deprecated OpenCL 1.1 cl_int CL_API_CALL clEnqueueWaitForEvents( cl_command_queue commandQueue, cl_uint numEvents, const cl_event *eventList); // deprecated OpenCL 1.1 cl_int CL_API_CALL clEnqueueBarrier( cl_command_queue commandQueue); cl_int CL_API_CALL clEnqueueMarkerWithWaitList( cl_command_queue commandQueue, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clEnqueueBarrierWithWaitList( cl_command_queue commandQueue, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); // deprecated OpenCL 1.1 void *CL_API_CALL clGetExtensionFunctionAddress( const char *funcName); void *CL_API_CALL clGetExtensionFunctionAddressForPlatform( cl_platform_id platform, const char *funcName); // CL-GL Sharing cl_mem CL_API_CALL clCreateFromGLBuffer( cl_context context, cl_mem_flags flags, cl_GLuint bufobj, int *errcodeRet); // OpenCL 1.2 cl_mem CL_API_CALL clCreateFromGLTexture( cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture, cl_int *errcodeRet); // deprecated OpenCL 1.1 cl_mem CL_API_CALL clCreateFromGLTexture2D( cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture, cl_int *errcodeRet); // deprecated OpenCL 1.1 cl_mem CL_API_CALL clCreateFromGLTexture3D( cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture, cl_int *errcodeRet); cl_mem CL_API_CALL clCreateFromGLRenderbuffer( cl_context context, cl_mem_flags flags, cl_GLuint renderbuffer, cl_int *errcodeRet); cl_int CL_API_CALL clGetGLObjectInfo( cl_mem memobj, cl_gl_object_type *glObjectType, cl_GLuint *glObjectName); cl_int CL_API_CALL clGetGLTextureInfo( cl_mem memobj, cl_gl_texture_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); cl_int CL_API_CALL clEnqueueAcquireGLObjects( cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clEnqueueReleaseGLObjects( cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); // OpenCL 2.0 void *CL_API_CALL clSVMAlloc( cl_context context, cl_svm_mem_flags flags, size_t size, cl_uint alignment); void CL_API_CALL clSVMFree( cl_context context, void *svmPointer); cl_int CL_API_CALL clEnqueueSVMFree( cl_command_queue commandQueue, cl_uint numSvmPointers, void *svmPointers[], void(CL_CALLBACK *pfnFreeFunc)( cl_command_queue queue, cl_uint numSvmPointers, void *svmPointers[], void *userData), void *userData, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clEnqueueSVMMemcpy( cl_command_queue commandQueue, cl_bool blockingCopy, void *dstPtr, const void *srcPtr, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clEnqueueSVMMemFill( cl_command_queue commandQueue, void *svmPtr, const void *pattern, size_t patternSize, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clEnqueueSVMMap( cl_command_queue commandQueue, cl_bool blockingMap, cl_map_flags mapFlags, void *svmPtr, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clEnqueueSVMUnmap( cl_command_queue commandQueue, void *svmPtr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clSetKernelArgSVMPointer( cl_kernel kernel, cl_uint argIndex, const void *argValue); cl_int CL_API_CALL clSetKernelExecInfo( cl_kernel kernel, cl_kernel_exec_info paramName, size_t paramValueSize, const void *paramValue); cl_mem CL_API_CALL clCreatePipe( cl_context context, cl_mem_flags flags, cl_uint pipePacketSize, cl_uint pipeMaxPackets, const cl_pipe_properties *properties, cl_int *errcodeRet); cl_int CL_API_CALL clGetPipeInfo( cl_mem pipe, cl_pipe_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); cl_command_queue CL_API_CALL clCreateCommandQueueWithProperties( cl_context context, cl_device_id device, const cl_queue_properties *properties, cl_int *errcodeRet); cl_command_queue CL_API_CALL clCreateCommandQueueWithPropertiesKHR( cl_context context, cl_device_id device, const cl_queue_properties_khr *properties, cl_int *errcodeRet); cl_sampler CL_API_CALL clCreateSamplerWithProperties( cl_context context, const cl_sampler_properties *samplerProperties, cl_int *errcodeRet); cl_int CL_API_CALL clEnqueueVerifyMemoryINTEL( cl_command_queue commandQueue, const void *allocationPtr, const void *expectedData, size_t sizeOfComparison, cl_uint comparisonMode); cl_int CL_API_CALL clAddCommentINTEL(cl_device_id device, const char *comment); // OpenCL 2.1 cl_int CL_API_CALL clGetDeviceAndHostTimer(cl_device_id device, cl_ulong *deviceTimestamp, cl_ulong *hostTimestamp); cl_int CL_API_CALL clGetHostTimer(cl_device_id device, cl_ulong *hostTimestamp); cl_int CL_API_CALL clGetKernelSubGroupInfo(cl_kernel kernel, cl_device_id device, cl_kernel_sub_group_info paramName, size_t inputValueSize, const void *inputValue, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); cl_int CL_API_CALL clSetDefaultDeviceCommandQueue(cl_context context, cl_device_id device, cl_command_queue commandQueue); cl_int CL_API_CALL clEnqueueSVMMigrateMem(cl_command_queue commandQueue, cl_uint numSvmPointers, const void **svmPointers, const size_t *sizes, const cl_mem_migration_flags flags, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_kernel CL_API_CALL clCloneKernel(cl_kernel sourceKernel, cl_int *errcodeRet); extern CL_API_ENTRY cl_command_queue CL_API_CALL clCreatePerfCountersCommandQueueINTEL( cl_context context, cl_device_id device, cl_command_queue_properties properties, cl_uint configuration, cl_int *errcodeRet); extern CL_API_ENTRY cl_int CL_API_CALL clSetPerformanceConfigurationINTEL( cl_device_id device, cl_uint count, cl_uint *offsets, cl_uint *values); extern CL_API_ENTRY cl_event CL_API_CALL clCreateEventFromGLsyncKHR( cl_context context, cl_GLsync sync, cl_int *errcodeRet) CL_EXT_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithILKHR( cl_context context, const void *il, size_t length, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clGetKernelSuggestedLocalWorkSizeKHR( cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim, const size_t *global_work_offset, const size_t *global_work_size, size_t *suggested_local_work_size) CL_API_SUFFIX__VERSION_3_0; void *clHostMemAllocINTEL( cl_context context, const cl_mem_properties_intel *properties, size_t size, cl_uint alignment, cl_int *errcodeRet); void *clDeviceMemAllocINTEL( cl_context context, cl_device_id device, const cl_mem_properties_intel *properties, size_t size, cl_uint alignment, cl_int *errcodeRet); void *clSharedMemAllocINTEL( cl_context context, cl_device_id device, const cl_mem_properties_intel *properties, size_t size, cl_uint alignment, cl_int *errcodeRet); cl_int clMemFreeINTEL( cl_context context, void *ptr); cl_int clMemBlockingFreeINTEL( cl_context context, void *ptr); cl_int clGetMemAllocInfoINTEL( cl_context context, const void *ptr, cl_mem_info_intel paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); cl_int clSetKernelArgMemPointerINTEL( cl_kernel kernel, cl_uint argIndex, const void *argValue); cl_int clEnqueueMemsetINTEL( cl_command_queue commandQueue, void *dstPtr, cl_int value, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int clEnqueueMemFillINTEL( cl_command_queue commandQueue, void *dstPtr, const void *pattern, size_t patternSize, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int clEnqueueMemcpyINTEL( cl_command_queue commandQueue, cl_bool blocking, void *dstPtr, const void *srcPtr, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int clEnqueueMigrateMemINTEL( cl_command_queue commandQueue, const void *ptr, size_t size, cl_mem_migration_flags flags, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int clEnqueueMemAdviseINTEL( cl_command_queue commandQueue, const void *ptr, size_t size, cl_mem_advice_intel advice, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); } cl_int CL_API_CALL clGetDeviceFunctionPointerINTEL( cl_device_id device, cl_program program, const char *functionName, cl_ulong *functionPointerRet); cl_int CL_API_CALL clGetDeviceGlobalVariablePointerINTEL( cl_device_id device, cl_program program, const char *globalVariableName, size_t *globalVariableSizeRet, void **globalVariablePointerRet); cl_int CL_API_CALL clGetKernelSuggestedLocalWorkSizeINTEL( cl_command_queue commandQueue, cl_kernel kernel, cl_uint workDim, const size_t *globalWorkOffset, const size_t *globalWorkSize, size_t *suggestedLocalWorkSize); cl_int CL_API_CALL clGetKernelMaxConcurrentWorkGroupCountINTEL( cl_command_queue commandQueue, cl_kernel kernel, cl_uint workDim, const size_t *globalWorkOffset, const size_t *localWorkSize, size_t *suggestedWorkGroupCount); cl_int CL_API_CALL clEnqueueNDCountKernelINTEL( cl_command_queue commandQueue, cl_kernel kernel, cl_uint workDim, const size_t *globalWorkOffset, const size_t *workgroupCount, const size_t *localWorkSize, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); // OpenCL 2.2 cl_int CL_API_CALL clSetProgramReleaseCallback( cl_program program, void(CL_CALLBACK *pfnNotify)(cl_program /* program */, void * /* user_data */), void *userData); cl_int CL_API_CALL clSetProgramSpecializationConstant( cl_program program, cl_uint specId, size_t specSize, const void *specValue); // OpenCL 3.0 cl_mem CL_API_CALL clCreateBufferWithProperties( cl_context context, const cl_mem_properties *properties, cl_mem_flags flags, size_t size, void *hostPtr, cl_int *errcodeRet); cl_mem CL_API_CALL clCreateImageWithProperties( cl_context context, const cl_mem_properties *properties, cl_mem_flags flags, const cl_image_format *imageFormat, const cl_image_desc *imageDesc, void *hostPtr, cl_int *errcodeRet); cl_int CL_API_CALL clSetContextDestructorCallback( cl_context context, void(CL_CALLBACK *pfn_notify)(cl_context /* context */, void * /* user_data */), void *user_data); compute-runtime-22.14.22890/opencl/source/api/api_enter.h000066400000000000000000000010201422164147700230130ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/utilities/logger.h" #include "shared/source/utilities/perf_profiler.h" #define API_ENTER(retValPointer) \ LoggerApiEnterWrapper::enabled()> ApiWrapperForSingleCall(__FUNCTION__, retValPointer) #if KMD_PROFILING == 1 #undef API_ENTER #define API_ENTER(x) \ PerfProfilerApiWrapper globalPerfProfilersWrapperInstanceForSingleApiFunction(__FUNCTION__) #endif compute-runtime-22.14.22890/opencl/source/api/cl_types.h000066400000000000000000000016331422164147700227010ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/api/dispatch.h" #include struct ClDispatch { SEntryPointsTable dispatch; ClDispatch() : dispatch(globalDispatchTable) { } }; struct _cl_accelerator_intel : public ClDispatch { }; struct _cl_command_queue : public ClDispatch { }; struct _cl_context : public ClDispatch { bool isSharedContext = false; }; struct _cl_device_id : public ClDispatch { }; struct _cl_event : public ClDispatch { }; struct _cl_kernel : public ClDispatch { }; struct _cl_mem : public ClDispatch { }; struct _cl_platform_id : public ClDispatch { }; struct _cl_program : public ClDispatch { }; struct _cl_sampler : public ClDispatch { }; template inline bool isValidObject(Type object) { return object && object->dispatch.icdDispatch == &icdGlobalDispatchTable; } compute-runtime-22.14.22890/opencl/source/api/dispatch.cpp000066400000000000000000000136301422164147700232110ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "dispatch.h" #include "api.h" SDispatchTable icdGlobalDispatchTable = { clGetPlatformIDs, clGetPlatformInfo, clGetDeviceIDs, clGetDeviceInfo, clCreateContext, clCreateContextFromType, clRetainContext, clReleaseContext, clGetContextInfo, clCreateCommandQueue, clRetainCommandQueue, clReleaseCommandQueue, clGetCommandQueueInfo, clSetCommandQueueProperty, clCreateBuffer, clCreateImage2D, clCreateImage3D, clRetainMemObject, clReleaseMemObject, clGetSupportedImageFormats, clGetMemObjectInfo, clGetImageInfo, clCreateSampler, clRetainSampler, clReleaseSampler, clGetSamplerInfo, clCreateProgramWithSource, clCreateProgramWithBinary, clRetainProgram, clReleaseProgram, clBuildProgram, clUnloadCompiler, clGetProgramInfo, clGetProgramBuildInfo, clCreateKernel, clCreateKernelsInProgram, clRetainKernel, clReleaseKernel, clSetKernelArg, clGetKernelInfo, clGetKernelWorkGroupInfo, clWaitForEvents, clGetEventInfo, clRetainEvent, clReleaseEvent, clGetEventProfilingInfo, clFlush, clFinish, clEnqueueReadBuffer, clEnqueueWriteBuffer, clEnqueueCopyBuffer, clEnqueueReadImage, clEnqueueWriteImage, clEnqueueCopyImage, clEnqueueCopyImageToBuffer, clEnqueueCopyBufferToImage, clEnqueueMapBuffer, clEnqueueMapImage, clEnqueueUnmapMemObject, clEnqueueNDRangeKernel, clEnqueueTask, clEnqueueNativeKernel, clEnqueueMarker, clEnqueueWaitForEvents, clEnqueueBarrier, clGetExtensionFunctionAddress, /* cl_khr_gl_sharing */ nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, /* cl_khr_d3d10_sharing */ nullptr, // clGetDeviceIDsFromD3D10KHR, nullptr, // clCreateFromD3D10BufferKHR, nullptr, // clCreateFromD3D10Texture2DKHR, nullptr, // clCreateFromD3D10Texture3DKHR, nullptr, // clEnqueueAcquireD3D10ObjectsKHR, nullptr, // clEnqueueReleaseD3D10ObjectsKHR, /* OpenCL 1.1 */ clSetEventCallback, clCreateSubBuffer, clSetMemObjectDestructorCallback, clCreateUserEvent, clSetUserEventStatus, clEnqueueReadBufferRect, clEnqueueWriteBufferRect, clEnqueueCopyBufferRect, /* cl_ext_device_fission */ nullptr, //clCreateSubDevicesEXT, nullptr, //clRetainDeviceEXT, nullptr, //clReleaseDeviceEXT, /* cl_khr_gl_event */ nullptr, /* OpenCL 1.2 */ clCreateSubDevices, clRetainDevice, clReleaseDevice, clCreateImage, clCreateProgramWithBuiltInKernels, clCompileProgram, clLinkProgram, clUnloadPlatformCompiler, clGetKernelArgInfo, clEnqueueFillBuffer, clEnqueueFillImage, clEnqueueMigrateMemObjects, clEnqueueMarkerWithWaitList, clEnqueueBarrierWithWaitList, clGetExtensionFunctionAddressForPlatform, nullptr, /* cl_khr_d3d11_sharing */ nullptr, // clGetDeviceIDsFromD3D11KHR, nullptr, // clCreateFromD3D11BufferKHR, nullptr, // clCreateFromD3D11Texture2DKHR, nullptr, // clCreateFromD3D11Texture3DKHR, nullptr, // clCreateFromDX9MediaSurfaceKHR, nullptr, // clEnqueueAcquireD3D11ObjectsKHR, nullptr, // clEnqueueReleaseD3D11ObjectsKHR, /* cl_khr_dx9_media_sharing */ nullptr, // clGetDeviceIDsFromDX9MediaAdapterKHR, nullptr, // clEnqueueAcquireDX9MediaSurfacesKHR, nullptr, // clEnqueueReleaseDX9MediaSurfacesKHR, /* cl_khr_egl_image */ nullptr, //clCreateFromEGLImageKHR, nullptr, //clEnqueueAcquireEGLObjectsKHR, nullptr, //clEnqueueReleaseEGLObjectsKHR, /* cl_khr_egl_event */ nullptr, //clCreateEventFromEGLSyncKHR, /* OpenCL 2.0 */ clCreateCommandQueueWithProperties, clCreatePipe, clGetPipeInfo, clSVMAlloc, clSVMFree, clEnqueueSVMFree, clEnqueueSVMMemcpy, clEnqueueSVMMemFill, clEnqueueSVMMap, clEnqueueSVMUnmap, clCreateSamplerWithProperties, clSetKernelArgSVMPointer, clSetKernelExecInfo, clGetKernelSubGroupInfoKHR, /* OpenCL 2.1 */ clCloneKernel, clCreateProgramWithIL, clEnqueueSVMMigrateMem, clGetDeviceAndHostTimer, clGetHostTimer, clGetKernelSubGroupInfo, clSetDefaultDeviceCommandQueue, /* OpenCL 2.2 */ clSetProgramReleaseCallback, clSetProgramSpecializationConstant, /* OpenCL 3.0 */ clCreateBufferWithProperties, clCreateImageWithProperties, clSetContextDestructorCallback}; SCRTDispatchTable crtGlobalDispatchTable = { clGetKernelArgInfo, nullptr, // clGetDeviceIDsFromDX9INTEL, nullptr, // clCreateFromDX9MediaSurfaceINTEL, nullptr, // clEnqueueAcquireDX9ObjectsINTEL, nullptr, // clEnqueueReleaseDX9ObjectsINTEL, clGetImageParamsINTEL, clCreatePerfCountersCommandQueueINTEL, clCreateAcceleratorINTEL, clGetAcceleratorInfoINTEL, clRetainAcceleratorINTEL, clReleaseAcceleratorINTEL, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, clSetPerformanceConfigurationINTEL}; SEntryPointsTable globalDispatchTable = {&icdGlobalDispatchTable, &crtGlobalDispatchTable}; compute-runtime-22.14.22890/opencl/source/api/dispatch.h000066400000000000000000001362071422164147700226640ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "CL/cl.h" #include "CL/cl_ext.h" #include "CL/cl_ext_intel.h" #include "CL/cl_gl.h" #include "CL/cl_gl_ext.h" #if defined(_WIN32) #include #include "CL/cl_d3d10.h" #include "CL/cl_d3d11.h" #include "CL/cl_dx9_media_sharing.h" #define CL_DX9_MEDIA_SHARING_INTEL_EXT #include "shared/source/os_interface/windows/windows_wrapper.h" #include "CL/cl_dx9_media_sharing_intel.h" #else #define CL_CONTEXT_D3D10_DEVICE_KHR 0x4014 #define CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR 0x402C #define CL_MEM_D3D10_RESOURCE_KHR 0x4015 typedef cl_uint cl_dx9_device_source_intel; typedef cl_uint cl_dx9_device_set_intel; typedef cl_uint cl_dx9_media_adapter_type_khr; typedef cl_uint cl_dx9_media_adapter_set_khr; typedef cl_uint cl_d3d10_device_source_khr; typedef cl_uint cl_d3d10_device_set_khr; typedef void *IDirect3DSurface9; typedef void *ID3D10Buffer; typedef void *ID3D10Texture2D; typedef void *ID3D10Texture3D; typedef unsigned int UINT; typedef cl_uint cl_d3d11_device_source_khr; typedef cl_uint cl_d3d11_device_set_khr; typedef void *ID3D11Buffer; typedef void *ID3D11Texture2D; typedef void *ID3D11Texture3D; typedef void *HANDLE; #endif typedef cl_bitfield cl_queue_properties_khr; typedef void(CL_CALLBACK *ctxt_logging_fn)(const char *, const void *, size_t, void *); typedef void(CL_CALLBACK *prog_logging_fn)(cl_program, void *); typedef void(CL_CALLBACK *evnt_logging_fn)(cl_event, cl_int, void *); typedef void(CL_CALLBACK *memobj_logging_fn)(cl_mem, void *); typedef void(CL_CALLBACK *svmfree_logging_fn)(cl_command_queue, cl_uint, void *[], void *); /* * * function pointer typedefs * */ // Platform APIs typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetPlatformIDs)( cl_uint numEntries, cl_platform_id *platforms, cl_uint *numPlatforms) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetPlatformInfo)( cl_platform_id platform, cl_platform_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) CL_API_SUFFIX__VERSION_1_0; // Device APIs typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetDeviceIDs)( cl_platform_id platform, cl_device_type deviceType, cl_uint numEntries, cl_device_id *devices, cl_uint *numDevices) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetDeviceInfo)( cl_device_id device, cl_device_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) CL_API_SUFFIX__VERSION_1_0; // Context APIs typedef CL_API_ENTRY cl_context(CL_API_CALL *KHRpfn_clCreateContext)( const cl_context_properties *properties, cl_uint numDevices, const cl_device_id *devices, ctxt_logging_fn funcNotify, void *userData, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_context(CL_API_CALL *KHRpfn_clCreateContextFromType)( const cl_context_properties *properties, cl_device_type deviceType, ctxt_logging_fn funcNotify, void *userData, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clRetainContext)( cl_context context) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clReleaseContext)( cl_context context) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetContextInfo)( cl_context context, cl_context_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) CL_API_SUFFIX__VERSION_1_0; // Command Queue APIs typedef CL_API_ENTRY cl_command_queue(CL_API_CALL *KHRpfn_clCreateCommandQueue)( cl_context context, cl_device_id device, cl_command_queue_properties properties, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clRetainCommandQueue)( cl_command_queue commandQueue) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clReleaseCommandQueue)( cl_command_queue commandQueue) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetCommandQueueInfo)( cl_command_queue commandQueue, cl_command_queue_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clSetCommandQueueProperty)( cl_command_queue commandQueue, cl_command_queue_properties properties, cl_bool enable, cl_command_queue_properties *oldProperties) CL_API_SUFFIX__VERSION_1_0; // Memory Object APIs typedef CL_API_ENTRY cl_mem(CL_API_CALL *KHRpfn_clCreateBuffer)( cl_context context, cl_mem_flags flags, size_t size, void *hostPtr, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem(CL_API_CALL *KHRpfn_clCreateImage2D)( cl_context context, cl_mem_flags flags, const cl_image_format *imageFormat, size_t imageWidth, size_t imageHeight, size_t imageRowPitch, void *hostPtr, cl_int *errcodeRet) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; typedef CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem(CL_API_CALL *KHRpfn_clCreateImage3D)( cl_context context, cl_mem_flags flags, const cl_image_format *imageFormat, size_t imageWidth, size_t imageHeight, size_t imageDepth, size_t imageRowPitch, size_t imageSlicePitch, void *hostPtr, cl_int *errcodeRet) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clRetainMemObject)( cl_mem memobj) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clReleaseMemObject)( cl_mem memobj) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetSupportedImageFormats)( cl_context context, cl_mem_flags flags, cl_mem_object_type imageType, cl_uint numEntries, cl_image_format *imageFormats, cl_uint *numImageFormats) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetMemObjectInfo)( cl_mem memobj, cl_mem_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetImageInfo)( cl_mem image, cl_image_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) CL_API_SUFFIX__VERSION_1_0; // Sampler APIs typedef CL_API_ENTRY cl_sampler(CL_API_CALL *KHRpfn_clCreateSampler)( cl_context context, cl_bool normalizedCoords, cl_addressing_mode addressingMode, cl_filter_mode filterMode, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_sampler(CL_API_CALL *KHRpfn_clCreateSamplerWithProperties)( cl_context context, const cl_sampler_properties *samplerProperties, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_2_0; typedef CL_API_ENTRY cl_mem(CL_API_CALL *KHRpfn_clCreatePipe)( cl_context context, cl_mem_flags flags, cl_uint pipePacketSize, cl_uint pipeMaxPackets, const cl_pipe_properties *properties, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_2_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetPipeInfo)( cl_mem image, cl_pipe_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) CL_API_SUFFIX__VERSION_2_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clRetainSampler)( cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clReleaseSampler)( cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetSamplerInfo)( cl_sampler sampler, cl_sampler_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) CL_API_SUFFIX__VERSION_1_0; // Program Object APIs typedef CL_API_ENTRY cl_program(CL_API_CALL *KHRpfn_clCreateProgramWithSource)( cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_program(CL_API_CALL *KHRpfn_clCreateProgramWithBinary)( cl_context context, cl_uint numDevices, const cl_device_id *deviceList, const size_t *lengths, const unsigned char **binaries, cl_int *binaryStatus, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clRetainProgram)( cl_program program) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clReleaseProgram)( cl_program program) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clBuildProgram)( cl_program program, cl_uint numDevices, const cl_device_id *deviceList, const char *options, prog_logging_fn funcNotify, void *userData) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int(CL_API_CALL *KHRpfn_clUnloadCompiler)() CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetProgramInfo)( cl_program program, cl_program_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetProgramBuildInfo)( cl_program program, cl_device_id device, cl_program_build_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) CL_API_SUFFIX__VERSION_1_0; // Kernel Object APIs typedef CL_API_ENTRY cl_kernel(CL_API_CALL *KHRpfn_clCreateKernel)( cl_program program, const char *kernelName, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clCreateKernelsInProgram)( cl_program program, cl_uint numKernels, cl_kernel *kernels, cl_uint *numKernelsRet) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clRetainKernel)( cl_kernel kernel) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clReleaseKernel)( cl_kernel kernel) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clSetKernelArg)( cl_kernel kernel, cl_uint argIndex, size_t argSize, const void *argValue) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetKernelInfo)( cl_kernel kernel, cl_kernel_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetKernelWorkGroupInfo)( cl_kernel kernel, cl_device_id device, cl_kernel_work_group_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) CL_API_SUFFIX__VERSION_1_0; // Event Object APIs typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clWaitForEvents)( cl_uint numEvents, const cl_event *eventList) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetEventInfo)( cl_event event, cl_event_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clRetainEvent)( cl_event event) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clReleaseEvent)( cl_event event) CL_API_SUFFIX__VERSION_1_0; // Profiling APIs typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetEventProfilingInfo)( cl_event event, cl_profiling_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) CL_API_SUFFIX__VERSION_1_0; // Flush and Finish APIs typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clFlush)( cl_command_queue commandQueue) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clFinish)( cl_command_queue commandQueue) CL_API_SUFFIX__VERSION_1_0; // Enqueued Commands APIs typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueReadBuffer)( cl_command_queue commandQueue, cl_mem buffer, cl_bool blockingRead, size_t offset, size_t cb, void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueWriteBuffer)( cl_command_queue commandQueue, cl_mem buffer, cl_bool blockingWrite, size_t offset, size_t cb, const void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueCopyBuffer)( cl_command_queue commandQueue, cl_mem srcBuffer, cl_mem dstBuffer, size_t srcOffset, size_t dstOffset, size_t cb, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueReadImage)( cl_command_queue commandQueue, cl_mem image, cl_bool blockingRead, const size_t *origin, const size_t *region, size_t rowPitch, size_t slicePitch, void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueWriteImage)( cl_command_queue commandQueue, cl_mem image, cl_bool blockingWrite, const size_t *origin, const size_t *region, size_t inputRowPitch, size_t inputSlicePitch, const void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueCopyImage)( cl_command_queue commandQueue, cl_mem srcImage, cl_mem dstImage, const size_t *srcOrigin, const size_t *dstOrigin, const size_t *region, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueCopyImageToBuffer)( cl_command_queue commandQueue, cl_mem srcImage, cl_mem dstBuffer, const size_t *srcOrigin, const size_t *region, size_t dstOffset, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueCopyBufferToImage)( cl_command_queue commandQueue, cl_mem srcBuffer, cl_mem dstImage, size_t srcOffset, const size_t *dstOrigin, const size_t *region, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY void *(CL_API_CALL *KHRpfn_clEnqueueMapBuffer)( cl_command_queue commandQueue, cl_mem buffer, cl_bool blockingMap, cl_map_flags mapFlags, size_t offset, size_t cb, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, cl_int *errcodeRet)CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY void *(CL_API_CALL *KHRpfn_clEnqueueMapImage)( cl_command_queue commandQueue, cl_mem image, cl_bool blockingMap, cl_map_flags mapFlags, const size_t *origin, const size_t *region, size_t *imageRowPitch, size_t *imageSlicePitch, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, cl_int *errcodeRet)CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueUnmapMemObject)( cl_command_queue commandQueue, cl_mem memobj, void *mappedPtr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueNDRangeKernel)( cl_command_queue commandQueue, cl_kernel kernel, cl_uint workDim, const size_t *globalWorkOffset, const size_t *globalWorkSize, const size_t *localWorkSize, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueTask)( cl_command_queue commandQueue, cl_kernel kernel, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueNativeKernel)( cl_command_queue commandQueue, void(CL_CALLBACK *userFunc)(void *), void *args, size_t cbArgs, cl_uint numMemObjects, const cl_mem *memList, const void **argsMemLoc, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int(CL_API_CALL *KHRpfn_clEnqueueMarker)( cl_command_queue commandQueue, cl_event *event) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; typedef CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int(CL_API_CALL *KHRpfn_clEnqueueWaitForEvents)( cl_command_queue commandQueue, cl_uint numEvents, const cl_event *eventList) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; typedef CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int(CL_API_CALL *KHRpfn_clEnqueueBarrier)( cl_command_queue commandQueue) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; typedef CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED void *(CL_API_CALL *KHRpfn_clGetExtensionFunctionAddress)( const char *functionName)CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; typedef CL_API_ENTRY cl_mem(CL_API_CALL *KHRpfn_clCreateFromGLBuffer)( cl_context context, cl_mem_flags flags, cl_GLuint bufobj, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_mem(CL_API_CALL *KHRpfn_clCreateFromGLTexture)( cl_context context, cl_mem_flags flags, cl_GLenum textureTarget, cl_GLint miplevel, cl_GLuint texture, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem(CL_API_CALL *KHRpfn_clCreateFromGLTexture2D)( cl_context context, cl_mem_flags flags, cl_GLenum textureTarget, cl_GLint miplevel, cl_GLuint texture, cl_int *errcodeRet) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; typedef CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem(CL_API_CALL *KHRpfn_clCreateFromGLTexture3D)( cl_context context, cl_mem_flags flags, cl_GLenum textureTarget, cl_GLint miplevel, cl_GLuint texture, cl_int *errcodeRet) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; typedef CL_API_ENTRY cl_mem(CL_API_CALL *KHRpfn_clCreateFromGLRenderbuffer)( cl_context context, cl_mem_flags flags, cl_GLuint renderbuffer, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetGLObjectInfo)( cl_mem memobj, cl_gl_object_type *glObjectType, cl_GLuint *glObjectName) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetGLTextureInfo)( cl_mem memobj, cl_gl_texture_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueAcquireGLObjects)( cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueReleaseGLObjects)( cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_0; /* cl_khr_gl_sharing */ typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetGLContextInfoKHR)( const cl_context_properties *properties, cl_gl_context_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) CL_API_SUFFIX__VERSION_1_0; #ifndef _WIN32 typedef void *clGetDeviceIDsFromDX9INTEL_fn; typedef void *clCreateFromDX9MediaSurfaceINTEL_fn; typedef void *clEnqueueAcquireDX9ObjectsINTEL_fn; typedef void *clEnqueueReleaseDX9ObjectsINTEL_fn; typedef void *clGetDeviceIDsFromDX9MediaAdapterKHR_fn; typedef void *clCreateFromDX9MediaSurfaceKHR_fn; typedef void *clEnqueueAcquireDX9MediaSurfacesKHR_fn; typedef void *clEnqueueReleaseDX9MediaSurfacesKHR_fn; typedef void *clGetDeviceIDsFromD3D10KHR_fn; typedef void *clCreateFromD3D10BufferKHR_fn; typedef void *clCreateFromD3D10Texture2DKHR_fn; typedef void *clCreateFromD3D10Texture3DKHR_fn; typedef void *clEnqueueAcquireD3D10ObjectsKHR_fn; typedef void *clEnqueueReleaseD3D10ObjectsKHR_fn; typedef void *clGetDeviceIDsFromD3D11KHR_fn; typedef void *clCreateFromD3D11BufferKHR_fn; typedef void *clCreateFromD3D11Texture2DKHR_fn; typedef void *clCreateFromD3D11Texture3DKHR_fn; typedef void *clEnqueueAcquireD3D11ObjectsKHR_fn; typedef void *clEnqueueReleaseD3D11ObjectsKHR_fn; #endif /* OpenCL 1.1 */ /* cl_kgr_gl_event */ typedef CL_API_ENTRY cl_event(CL_API_CALL *KHRpfn_clCreateEventFromGLsyncKHR)( cl_context context, cl_GLsync sync, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clSetEventCallback)( cl_event event, cl_int commandExecCallbackType, evnt_logging_fn pfnNotify, void *userData) CL_API_SUFFIX__VERSION_1_1; typedef CL_API_ENTRY cl_mem(CL_API_CALL *KHRpfn_clCreateSubBuffer)( cl_mem buffer, cl_mem_flags flags, cl_buffer_create_type bufferCreateType, const void *bufferCreateInfo, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_1; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clSetMemObjectDestructorCallback)( cl_mem memobj, memobj_logging_fn pfnNotify, void *userData) CL_API_SUFFIX__VERSION_1_1; typedef CL_API_ENTRY cl_event(CL_API_CALL *KHRpfn_clCreateUserEvent)( cl_context context, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_1; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clSetUserEventStatus)( cl_event event, cl_int executionStatus) CL_API_SUFFIX__VERSION_1_1; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueReadBufferRect)( cl_command_queue commandQueue, cl_mem buffer, cl_bool blockingRead, const size_t *bufferOffset, const size_t *hostOffset, const size_t *region, size_t bufferRowPitch, size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch, void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_1; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueWriteBufferRect)( cl_command_queue commandQueue, cl_mem buffer, cl_bool blockingRead, const size_t *bufferOffset, const size_t *hostOffset, const size_t *region, size_t bufferRowPitch, size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch, const void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_1; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueCopyBufferRect)( cl_command_queue commandQueue, cl_mem srcBuffer, cl_mem dstBuffer, const size_t *srcOrigin, const size_t *dstOrigin, const size_t *region, size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_1; /*OpenCL 1.2*/ typedef CL_API_ENTRY cl_mem(CL_API_CALL *KHRpfn_clCreateImage)( cl_context context, cl_mem_flags flags, const cl_image_format *imageFormat, const cl_image_desc *imageDesc, void *hostPtr, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetKernelArgInfo)( cl_kernel kernel, cl_uint argIndx, cl_kernel_arg_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueFillBuffer)( cl_command_queue commandQueue, cl_mem buffer, const void *pattern, size_t patternSize, size_t offset, size_t cb, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueFillImage)( cl_command_queue commandQueue, cl_mem image, const void *fillColor, const size_t *origin, const size_t *region, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueMigrateMemObjects)( cl_command_queue commandQueue, cl_uint numMemObjects, const cl_mem *memObjects, cl_mem_migration_flags flags, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueMarkerWithWaitList)( cl_command_queue commandQueue, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueBarrierWithWaitList)( cl_command_queue commandQueue, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clCreateSubDevices)( cl_device_id inDevice, const cl_device_partition_property *properties, cl_uint numEntries, cl_device_id *outDevices, cl_uint *numDevices) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clRetainDevice)( cl_device_id device) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clReleaseDevice)( cl_device_id device) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_program(CL_API_CALL *KHRpfn_clCreateProgramWithBuiltInKernels)( cl_context context, cl_uint numDevices, const cl_device_id *deviceList, const char *kernelNames, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clCompileProgram)( cl_program program, cl_uint numDevices, const cl_device_id *deviceList, const char *options, cl_uint numInputHeaders, const cl_program *inputHeaders, const char **headerIncludeNames, void(CL_CALLBACK *pfnNotify)(cl_program program, void *userData), void *userData) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_program(CL_API_CALL *KHRpfn_clLinkProgram)( cl_context context, cl_uint numDevices, const cl_device_id *deviceList, const char *options, cl_uint numInputPrograms, const cl_program *inputPrograms, void(CL_CALLBACK *pfnNotify)(cl_program program, void *userData), void *userData, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clUnloadPlatformCompiler)( cl_platform_id platform) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY void *(CL_API_CALL *KHRpfn_clGetExtensionFunctionAddressForPlatform)( cl_platform_id platform, const char *funcName)CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clCreateSubDevicesEXT)( cl_device_id inDevice, const cl_device_partition_property_ext *partitionProperties, cl_uint numEntries, cl_device_id *outDevices, cl_uint *numDevices); typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clRetainDeviceEXT)( cl_device_id device) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clReleaseDeviceEXT)( cl_device_id device) CL_API_SUFFIX__VERSION_1_0; /*cl_khr_egl_image*/ typedef void *KHRpfn_clCreateFromEGLImageKHR; typedef void *KHRpfn_clEnqueueAcquireEGLObjectsKHR; typedef void *KHRpfn_clEnqueueReleaseEGLObjectsKHR; /*cl_khr_egl_event*/ typedef void *KHRpfn_clCreateEventFromEGLSyncKHR; /*OpenCL2.0*/ typedef CL_API_ENTRY cl_command_queue(CL_API_CALL *KHRpfn_clCreateCommandQueueWithProperties)( cl_context context, cl_device_id device, const cl_queue_properties *properties, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_2_0; typedef CL_API_ENTRY void *(CL_API_CALL *KHRpfn_clSVMAlloc)( cl_context context, cl_svm_mem_flags flags, size_t size, cl_uint alignment)CL_API_SUFFIX__VERSION_2_0; typedef CL_API_ENTRY void(CL_API_CALL *KHRpfn_clSVMFree)( cl_context context, void *svmPointer) CL_API_SUFFIX__VERSION_2_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueSVMFree)( cl_command_queue commandQueue, cl_uint numSvmPointers, void *svmPointers[], void(CL_CALLBACK *pfnFreeFunc)(cl_command_queue queue, cl_uint numSvmPointers, void *svmPointers[], void *userData), void *userData, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_2_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueSVMMemcpy)( cl_command_queue commandQueue, cl_bool blockingCopy, void *dstPtr, const void *srcPtr, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_2_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueSVMMemFill)( cl_command_queue commandQueue, void *svmPtr, const void *pattern, size_t patternSize, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_2_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueSVMMap)( cl_command_queue commandQueue, cl_bool blockingMap, cl_map_flags mapFlags, void *svmPtr, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_2_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueSVMUnmap)( cl_command_queue commandQueue, void *svmPtr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_2_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clSetKernelArgSVMPointer)( cl_kernel kernel, cl_uint argIndex, const void *argValue) CL_API_SUFFIX__VERSION_2_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clSetKernelExecInfo)( cl_kernel kernel, cl_kernel_exec_info paramName, size_t paramValueSize, const void *paramValue) CL_API_SUFFIX__VERSION_2_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetKernelSubGroupInfoKHR)( cl_kernel kernel, cl_device_id device, cl_kernel_sub_group_info paramName, size_t inputValueSize, const void *inputValue, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) CL_API_SUFFIX__VERSION_2_0; /*OpenCL2.1*/ typedef CL_API_ENTRY cl_kernel(CL_API_CALL *KHRpfn_clCloneKernel)( cl_kernel sourceKernel, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_2_1; typedef CL_API_ENTRY cl_program(CL_API_CALL *KHRpfn_clCreateProgramWithIL)( cl_context context, const void *il, size_t length, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_2_1; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueSVMMigrateMem)( cl_command_queue commandQueue, cl_uint numSvmPointers, const void **svmPointers, const size_t *sizes, const cl_mem_migration_flags flags, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_2_1; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetDeviceAndHostTimer)( cl_device_id device, cl_ulong *deviceTimestamp, cl_ulong *hostTimestamp) CL_API_SUFFIX__VERSION_2_1; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetHostTimer)( cl_device_id device, cl_ulong *hostTimestamp) CL_API_SUFFIX__VERSION_2_1; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetKernelSubGroupInfo)( cl_kernel kernel, cl_device_id device, cl_kernel_sub_group_info paramName, size_t inputValueSize, const void *inputValue, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) CL_API_SUFFIX__VERSION_2_1; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clSetDefaultDeviceCommandQueue)( cl_context context, cl_device_id device, cl_command_queue commandQueue) CL_API_SUFFIX__VERSION_2_1; /*OpenCL2.2*/ typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clSetProgramReleaseCallback)( cl_program program, void(CL_CALLBACK *pfnNotify)(cl_program program, void *userData), void *userData) CL_API_SUFFIX__VERSION_2_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clSetProgramSpecializationConstant)( cl_program program, cl_uint specIdd, size_t specSize, const void *specValue) CL_API_SUFFIX__VERSION_2_2; /*OpenCL3.0*/ typedef CL_API_ENTRY cl_mem(CL_API_CALL *KHRpfn_clCreateBufferWithProperties)( cl_context context, const cl_mem_properties *properties, cl_mem_flags flags, size_t size, void *hostPtr, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_3_0; typedef CL_API_ENTRY cl_mem(CL_API_CALL *KHRpfn_clCreateImageWithProperties)( cl_context context, const cl_mem_properties *properties, cl_mem_flags flags, const cl_image_format *imageFormat, const cl_image_desc *imageDesc, void *hostPtr, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_3_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clSetContextDestructorCallback)( cl_context context, void(CL_CALLBACK *pfnNotify)(cl_context context, void *userData), void *userData) CL_API_SUFFIX__VERSION_3_0; /* clCreateImage */ typedef CL_API_ENTRY cl_int(CL_API_CALL *INTELpfn_clGetImageParamsINTEL)( cl_context context, const cl_image_format *imageFormat, const cl_image_desc *imageDesc, size_t *imageRowPitch, size_t *imageSlicePitch) CL_API_SUFFIX__VERSION_1_1; /* Performance Counter APIs */ typedef CL_API_ENTRY cl_command_queue(CL_API_CALL *INTELpfn_clCreatePerfCountersCommandQueueINTEL)( cl_context context, cl_device_id device, cl_command_queue_properties properties, cl_uint configuration, cl_int *errcodeRet); typedef CL_API_ENTRY cl_int(CL_API_CALL *INTELpfn_clSetPerformanceConfigurationINTEL)( cl_device_id device, cl_uint count, cl_uint *offsets, cl_uint *values); /* cl_intel_accelerator */ typedef CL_API_ENTRY cl_accelerator_intel(CL_API_CALL *INTELpfn_clCreateAcceleratorINTEL)( cl_context context, cl_accelerator_type_intel acceleratorType, size_t descriptorSize, const void *descriptor, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *INTELpfn_clGetAcceleratorInfoINTEL)( cl_accelerator_intel accelerator, cl_accelerator_info_intel paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *INTELpfn_clRetainAcceleratorINTEL)( cl_accelerator_intel accelerator) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *INTELpfn_clReleaseAcceleratorINTEL)( cl_accelerator_intel accelerator) CL_API_SUFFIX__VERSION_1_2; /* cl_intel_va_api_media_sharing */ #ifdef LIBVA #include "CL/cl_va_api_media_sharing_intel.h" typedef CL_API_ENTRY cl_mem(CL_API_CALL *INTELpfn_clCreateFromVA_APIMediaSurfaceINTEL)( cl_context context, cl_mem_flags flags, VASurfaceID *surface, cl_uint plane, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *INTELpfn_clGetDeviceIDsFromVA_APIMediaAdapterINTEL)( cl_platform_id platform, cl_va_api_device_source_intel mediaAdapterType, void *mediaAdapter, cl_va_api_device_set_intel mediaAdapterSet, cl_uint numEntries, cl_device_id *devices, cl_uint *numDevices) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *INTELpfn_clEnqueueAcquireVA_APIMediaSurfacesINTEL)( cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *INTELpfn_clEnqueueReleaseVA_APIMediaSurfacesINTEL)( cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_2; #else typedef void (*INTELpfn_clCreateFromVA_APIMediaSurfaceINTEL)(); typedef void (*INTELpfn_clGetDeviceIDsFromVA_APIMediaAdapterINTEL)(); typedef void (*INTELpfn_clEnqueueAcquireVA_APIMediaSurfacesINTEL)(); typedef void (*INTELpfn_clEnqueueReleaseVA_APIMediaSurfacesINTEL)(); #endif /* * * vendor dispatch table structure * * note that the types in the structure KHRicdVendorDispatch mirror the function * names listed in the string table khrIcdVendorDispatchFunctionNames * */ struct SDispatchTable { KHRpfn_clGetPlatformIDs clGetPlatformIDs; KHRpfn_clGetPlatformInfo clGetPlatformInfo; KHRpfn_clGetDeviceIDs clGetDeviceIDs; KHRpfn_clGetDeviceInfo clGetDeviceInfo; KHRpfn_clCreateContext clCreateContext; KHRpfn_clCreateContextFromType clCreateContextFromType; KHRpfn_clRetainContext clRetainContext; KHRpfn_clReleaseContext clReleaseContext; KHRpfn_clGetContextInfo clGetContextInfo; KHRpfn_clCreateCommandQueue clCreateCommandQueue; KHRpfn_clRetainCommandQueue clRetainCommandQueue; KHRpfn_clReleaseCommandQueue clReleaseCommandQueue; KHRpfn_clGetCommandQueueInfo clGetCommandQueueInfo; KHRpfn_clSetCommandQueueProperty clSetCommandQueueProperty; KHRpfn_clCreateBuffer clCreateBuffer; KHRpfn_clCreateImage2D clCreateImage2D; KHRpfn_clCreateImage3D clCreateImage3D; KHRpfn_clRetainMemObject clRetainMemObject; KHRpfn_clReleaseMemObject clReleaseMemObject; KHRpfn_clGetSupportedImageFormats clGetSupportedImageFormats; KHRpfn_clGetMemObjectInfo clGetMemObjectInfo; KHRpfn_clGetImageInfo clGetImageInfo; KHRpfn_clCreateSampler clCreateSampler; KHRpfn_clRetainSampler clRetainSampler; KHRpfn_clReleaseSampler clReleaseSampler; KHRpfn_clGetSamplerInfo clGetSamplerInfo; KHRpfn_clCreateProgramWithSource clCreateProgramWithSource; KHRpfn_clCreateProgramWithBinary clCreateProgramWithBinary; KHRpfn_clRetainProgram clRetainProgram; KHRpfn_clReleaseProgram clReleaseProgram; KHRpfn_clBuildProgram clBuildProgram; KHRpfn_clUnloadCompiler clUnloadCompiler; KHRpfn_clGetProgramInfo clGetProgramInfo; KHRpfn_clGetProgramBuildInfo clGetProgramBuildInfo; KHRpfn_clCreateKernel clCreateKernel; KHRpfn_clCreateKernelsInProgram clCreateKernelsInProgram; KHRpfn_clRetainKernel clRetainKernel; KHRpfn_clReleaseKernel clReleaseKernel; KHRpfn_clSetKernelArg clSetKernelArg; KHRpfn_clGetKernelInfo clGetKernelInfo; KHRpfn_clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo; KHRpfn_clWaitForEvents clWaitForEvents; KHRpfn_clGetEventInfo clGetEventInfo; KHRpfn_clRetainEvent clRetainEvent; KHRpfn_clReleaseEvent clReleaseEvent; KHRpfn_clGetEventProfilingInfo clGetEventProfilingInfo; KHRpfn_clFlush clFlush; KHRpfn_clFinish clFinish; KHRpfn_clEnqueueReadBuffer clEnqueueReadBuffer; KHRpfn_clEnqueueWriteBuffer clEnqueueWriteBuffer; KHRpfn_clEnqueueCopyBuffer clEnqueueCopyBuffer; KHRpfn_clEnqueueReadImage clEnqueueReadImage; KHRpfn_clEnqueueWriteImage clEnqueueWriteImage; KHRpfn_clEnqueueCopyImage clEnqueueCopyImage; KHRpfn_clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer; KHRpfn_clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage; KHRpfn_clEnqueueMapBuffer clEnqueueMapBuffer; KHRpfn_clEnqueueMapImage clEnqueueMapImage; KHRpfn_clEnqueueUnmapMemObject clEnqueueUnmapMemObject; KHRpfn_clEnqueueNDRangeKernel clEnqueueNDRangeKernel; KHRpfn_clEnqueueTask clEnqueueTask; KHRpfn_clEnqueueNativeKernel clEnqueueNativeKernel; KHRpfn_clEnqueueMarker clEnqueueMarker; KHRpfn_clEnqueueWaitForEvents clEnqueueWaitForEvents; KHRpfn_clEnqueueBarrier clEnqueueBarrier; KHRpfn_clGetExtensionFunctionAddress clGetExtensionFunctionAddress; KHRpfn_clCreateFromGLBuffer clCreateFromGLBuffer; KHRpfn_clCreateFromGLTexture2D clCreateFromGLTexture2D; KHRpfn_clCreateFromGLTexture3D clCreateFromGLTexture3D; KHRpfn_clCreateFromGLRenderbuffer clCreateFromGLRenderbuffer; KHRpfn_clGetGLObjectInfo clGetGLObjectInfo; KHRpfn_clGetGLTextureInfo clGetGLTextureInfo; KHRpfn_clEnqueueAcquireGLObjects clEnqueueAcquireGLObjects; KHRpfn_clEnqueueReleaseGLObjects clEnqueueReleaseGLObjects; KHRpfn_clGetGLContextInfoKHR clGetGLContextInfoKHR; /* cl_khr_d3d10_sharing */ clGetDeviceIDsFromD3D10KHR_fn clGetDeviceIDsFromD3D10KHR; clCreateFromD3D10BufferKHR_fn clCreateFromD3D10BufferKHR; clCreateFromD3D10Texture2DKHR_fn clCreateFromD3D10Texture2DKHR; clCreateFromD3D10Texture3DKHR_fn clCreateFromD3D10Texture3DKHR; clEnqueueAcquireD3D10ObjectsKHR_fn clEnqueueAcquireD3D10ObjectsKHR; clEnqueueReleaseD3D10ObjectsKHR_fn clEnqueueReleaseD3D10ObjectsKHR; /* OpenCL 1.1 */ KHRpfn_clSetEventCallback clSetEventCallback; KHRpfn_clCreateSubBuffer clCreateSubBuffer; KHRpfn_clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback; KHRpfn_clCreateUserEvent clCreateUserEvent; KHRpfn_clSetUserEventStatus clSetUserEventStatus; KHRpfn_clEnqueueReadBufferRect clEnqueueReadBufferRect; KHRpfn_clEnqueueWriteBufferRect clEnqueueWriteBufferRect; KHRpfn_clEnqueueCopyBufferRect clEnqueueCopyBufferRect; /* cl_ext_device_fission */ KHRpfn_clCreateSubDevicesEXT clCreateSubDevicesEXT; KHRpfn_clRetainDeviceEXT clRetainDeviceEXT; KHRpfn_clReleaseDeviceEXT clReleaseDeviceEXT; /* cl_khr_gl_event */ KHRpfn_clCreateEventFromGLsyncKHR clCreateEventFromGLsyncKHR; /* OpenCL 1.2 */ KHRpfn_clCreateSubDevices clCreateSubDevices; KHRpfn_clRetainDevice clRetainDevice; KHRpfn_clReleaseDevice clReleaseDevice; KHRpfn_clCreateImage clCreateImage; KHRpfn_clCreateProgramWithBuiltInKernels clCreateProgramWithBuiltInKernels; KHRpfn_clCompileProgram clCompileProgram; KHRpfn_clLinkProgram clLinkProgram; KHRpfn_clUnloadPlatformCompiler clUnloadPlatformCompiler; KHRpfn_clGetKernelArgInfo clGetKernelArgInfo; KHRpfn_clEnqueueFillBuffer clEnqueueFillBuffer; KHRpfn_clEnqueueFillImage clEnqueueFillImage; KHRpfn_clEnqueueMigrateMemObjects clEnqueueMigrateMemObjects; KHRpfn_clEnqueueMarkerWithWaitList clEnqueueMarkerWithWaitList; KHRpfn_clEnqueueBarrierWithWaitList clEnqueueBarrierWithWaitList; KHRpfn_clGetExtensionFunctionAddressForPlatform clGetExtensionFunctionAddressForPlatform; KHRpfn_clCreateFromGLTexture clCreateFromGLTexture; /* cl_khr_d3d11_sharing */ clGetDeviceIDsFromD3D11KHR_fn clGetDeviceIDsFromD3D11KHR; clCreateFromD3D11BufferKHR_fn clCreateFromD3D11BufferKHR; clCreateFromD3D11Texture2DKHR_fn clCreateFromD3D11Texture2DKHR; clCreateFromD3D11Texture3DKHR_fn clCreateFromD3D11Texture3DKHR; clCreateFromDX9MediaSurfaceKHR_fn clCreateFromDX9MediaSurfaceKHR; clEnqueueAcquireD3D11ObjectsKHR_fn clEnqueueAcquireD3D11ObjectsKHR; clEnqueueReleaseD3D11ObjectsKHR_fn clEnqueueReleaseD3D11ObjectsKHR; /* cl_khr_dx9_media_sharing */ clGetDeviceIDsFromDX9MediaAdapterKHR_fn clGetDeviceIDsFromDX9MediaAdapterKHR; clEnqueueAcquireDX9MediaSurfacesKHR_fn clEnqueueAcquireDX9MediaSurfacesKHR; clEnqueueReleaseDX9MediaSurfacesKHR_fn clEnqueueReleaseDX9MediaSurfacesKHR; /* cl_khr_egl_image */ KHRpfn_clCreateFromEGLImageKHR clCreateFromEGLImageKHR; KHRpfn_clEnqueueAcquireEGLObjectsKHR clEnqueueAcquireEGLObjectsKHR; KHRpfn_clEnqueueReleaseEGLObjectsKHR clEnqueueReleaseEGLObjectsKHR; /* cl_khr_egl_event */ KHRpfn_clCreateEventFromEGLSyncKHR clCreateEventFromEGLSyncKHR; /* OpenCL 2.0 */ KHRpfn_clCreateCommandQueueWithProperties clCreateCommandQueueWithProperties; KHRpfn_clCreatePipe clCreatePipe; KHRpfn_clGetPipeInfo clGetPipeInfo; KHRpfn_clSVMAlloc clSVMAlloc; KHRpfn_clSVMFree clSVMFree; KHRpfn_clEnqueueSVMFree clEnqueueSVMFree; KHRpfn_clEnqueueSVMMemcpy clEnqueueSVMMemcpy; KHRpfn_clEnqueueSVMMemFill clEnqueueSVMMemFill; KHRpfn_clEnqueueSVMMap clEnqueueSVMMap; KHRpfn_clEnqueueSVMUnmap clEnqueueSVMUnmap; KHRpfn_clCreateSamplerWithProperties clCreateSamplerWithProperties; KHRpfn_clSetKernelArgSVMPointer clSetKernelArgSVMPointer; KHRpfn_clSetKernelExecInfo clSetKernelExecInfo; KHRpfn_clGetKernelSubGroupInfoKHR clGetKernelSubGroupInfoKHR; /* OpenCL 2.1 */ KHRpfn_clCloneKernel clCloneKernel; KHRpfn_clCreateProgramWithIL clCreateProgramWithIL; KHRpfn_clEnqueueSVMMigrateMem clEnqueueSVMMigrateMem; KHRpfn_clGetDeviceAndHostTimer clGetDeviceAndHostTimer; KHRpfn_clGetHostTimer clGetHostTimer; KHRpfn_clGetKernelSubGroupInfo clGetKernelSubGroupInfo; KHRpfn_clSetDefaultDeviceCommandQueue clSetDefaultDeviceCommandQueue; /* OpenCL 2.2 */ KHRpfn_clSetProgramReleaseCallback clSetProgramReleaseCallback; KHRpfn_clSetProgramSpecializationConstant clSetProgramSpecializationConstant; /* OpenCL 3.0 */ KHRpfn_clCreateBufferWithProperties clCreateBufferWithProperties; KHRpfn_clCreateImageWithProperties clCreateImageWithProperties; KHRpfn_clSetContextDestructorCallback clSetContextDestructorCallback; }; struct SCRTDispatchTable { // Support CRT entry point KHRpfn_clGetKernelArgInfo clGetKernelArgInfo; clGetDeviceIDsFromDX9INTEL_fn clGetDeviceIDsFromDX9INTEL; clCreateFromDX9MediaSurfaceINTEL_fn clCreateFromDX9MediaSurfaceINTEL; clEnqueueAcquireDX9ObjectsINTEL_fn clEnqueueAcquireDX9ObjectsINTEL; clEnqueueReleaseDX9ObjectsINTEL_fn clEnqueueReleaseDX9ObjectsINTEL; INTELpfn_clGetImageParamsINTEL clGetImageParamsINTEL; // API to expose the Performance Counters to applications INTELpfn_clCreatePerfCountersCommandQueueINTEL clCreatePerfCountersCommandQueueINTEL; // Video Analytics Accelerator INTELpfn_clCreateAcceleratorINTEL clCreateAcceleratorINTEL; INTELpfn_clGetAcceleratorInfoINTEL clGetAcceleratorInfoINTEL; INTELpfn_clRetainAcceleratorINTEL clRetainAcceleratorINTEL; INTELpfn_clReleaseAcceleratorINTEL clReleaseAcceleratorINTEL; void *placeholder12; void *placeholder13; // VAMedia sharing extension #ifdef LIBVA INTELpfn_clCreateFromVA_APIMediaSurfaceINTEL clCreateFromVA_APIMediaSurfaceINTEL; INTELpfn_clGetDeviceIDsFromVA_APIMediaAdapterINTEL clGetDeviceIDsFromVA_APIMediaAdapterINTEL; INTELpfn_clEnqueueReleaseVA_APIMediaSurfacesINTEL clEnqueueReleaseVA_APIMediaSurfacesINTEL; INTELpfn_clEnqueueAcquireVA_APIMediaSurfacesINTEL clEnqueueAcquireVA_APIMediaSurfacesINTEL; #else void *placeholder14; void *placeholder15; void *placeholder16; void *placeholder17; #endif void *placeholder18; void *placeholder19; void *placeholder20; void *placeholder21; // OCL Performance Counters configuration INTELpfn_clSetPerformanceConfigurationINTEL clSetPerformanceConfigurationINTEL; }; extern SDispatchTable icdGlobalDispatchTable; extern SCRTDispatchTable crtGlobalDispatchTable; struct SEntryPointsTable { SDispatchTable *icdDispatch; SCRTDispatchTable *crtDispatch; }; struct SEntryPointsTableData { SDispatchTable icdDispatch; SCRTDispatchTable crtDispatch; }; extern SEntryPointsTable globalDispatchTable; compute-runtime-22.14.22890/opencl/source/built_ins/000077500000000000000000000000001422164147700221225ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/built_ins/CMakeLists.txt000066400000000000000000000026421422164147700246660ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_BUILT_INS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/aux_translation_builtin.h ${CMAKE_CURRENT_SOURCE_DIR}/builtins_dispatch_builder.cpp ${CMAKE_CURRENT_SOURCE_DIR}/builtins_dispatch_builder.h ${CMAKE_CURRENT_SOURCE_DIR}/built_in_ops_vme.h ${CMAKE_CURRENT_SOURCE_DIR}/built_ins.inl ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}unknown_built_in.cpp ${CMAKE_CURRENT_SOURCE_DIR}/vme_builtin.cpp ${CMAKE_CURRENT_SOURCE_DIR}/vme_builtin.h ${CMAKE_CURRENT_SOURCE_DIR}/vme_dispatch_builder.h ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_BUILT_INS}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_BUILT_INS ${RUNTIME_SRCS_BUILT_INS}) set(RUNTIME_SRCS_BUILT_IN_KERNELS ${CMAKE_CURRENT_SOURCE_DIR}/kernels/vme_block_advanced_motion_estimate_bidirectional_check_intel.builtin_kernel ${CMAKE_CURRENT_SOURCE_DIR}/kernels/vme_block_advanced_motion_estimate_check_intel.builtin_kernel ${CMAKE_CURRENT_SOURCE_DIR}/kernels/vme_block_motion_estimate_intel.builtin_kernel ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_BUILT_IN_KERNELS}) hide_subdir(registry) hide_subdir(kernels) add_subdirectories() if(NOT (TARGET ${BUILTINS_VME_LIB_NAME})) add_subdirectory(registry) if(COMPILE_BUILT_INS) add_subdirectory(kernels) endif() endif() compute-runtime-22.14.22890/opencl/source/built_ins/aux_translation_builtin.h000066400000000000000000000116271422164147700272430ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/pipe_control_args.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/helpers/dispatch_info_builder.h" #include "opencl/source/kernel/kernel_objects_for_aux_translation.h" #include "opencl/source/mem_obj/buffer.h" #include namespace NEO { template <> class BuiltInOp : public BuiltinDispatchInfoBuilder { public: BuiltInOp(BuiltIns &kernelsLib, ClDevice &device); template bool buildDispatchInfosForAuxTranslation(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const { size_t kernelInstanceNumber = 0; size_t numKernelObjectsToTranslate = multiDispatchInfo.getKernelObjsForAuxTranslation()->size(); resizeKernelInstances(numKernelObjectsToTranslate); multiDispatchInfo.setBuiltinOpParams(operationParams); for (auto &kernelObj : *multiDispatchInfo.getKernelObjsForAuxTranslation()) { DispatchInfoBuilder builder(clDevice); UNRECOVERABLE_IF(builder.getMaxNumDispatches() != 1); if (kernelInstanceNumber == 0) { // Before Kernel registerPipeControlProgramming(builder.getDispatchInfo(0).dispatchInitCommands, true); } if (kernelInstanceNumber == numKernelObjectsToTranslate - 1) { // After Kernel registerPipeControlProgramming(builder.getDispatchInfo(0).dispatchEpilogueCommands, false); } if (AuxTranslationDirection::AuxToNonAux == operationParams.auxTranslationDirection) { builder.setKernel(convertToNonAuxKernel[kernelInstanceNumber++].get()); } else { UNRECOVERABLE_IF(AuxTranslationDirection::NonAuxToAux != operationParams.auxTranslationDirection); builder.setKernel(convertToAuxKernel[kernelInstanceNumber++].get()); } size_t allocationSize = 0; if (kernelObj.type == KernelObjForAuxTranslation::Type::MEM_OBJ) { auto buffer = static_cast(kernelObj.object); builder.setArg(0, buffer); builder.setArg(1, buffer); allocationSize = alignUp(buffer->getSize(), 512); } else { DEBUG_BREAK_IF(kernelObj.type != KernelObjForAuxTranslation::Type::GFX_ALLOC); auto svmAlloc = static_cast(kernelObj.object); auto svmPtr = reinterpret_cast(svmAlloc->getGpuAddressToPatch()); builder.setArgSvmAlloc(0, svmPtr, svmAlloc); builder.setArgSvmAlloc(1, svmPtr, svmAlloc); allocationSize = alignUp(svmAlloc->getUnderlyingBufferSize(), 512); } size_t xGws = allocationSize / 16; builder.setDispatchGeometry(Vec3{xGws, 0, 0}, Vec3{0, 0, 0}, Vec3{0, 0, 0}); builder.bake(multiDispatchInfo); } return true; } protected: using RegisteredMethodDispatcherT = RegisteredMethodDispatcher; template static void dispatchPipeControl(LinearStream &linearStream, TimestampPacketDependencies *, const HardwareInfo &hwInfo) { PipeControlArgs args; args.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(dcFlush, hwInfo); MemorySynchronizationCommands::addPipeControl(linearStream, args); } template static size_t getSizeForSinglePipeControl(size_t, const HardwareInfo &, bool) { return MemorySynchronizationCommands::getSizeForSinglePipeControl(); } template void registerPipeControlProgramming(RegisteredMethodDispatcherT &dispatcher, bool dcFlush) const { if (dcFlush) { dispatcher.registerMethod(this->dispatchPipeControl); } else { dispatcher.registerMethod(this->dispatchPipeControl); } dispatcher.registerCommandsSizeEstimationMethod(this->getSizeForSinglePipeControl); } void resizeKernelInstances(size_t size) const; MultiDeviceKernel *multiDeviceBaseKernel = nullptr; Kernel *baseKernel = nullptr; mutable std::vector> convertToNonAuxKernel; mutable std::vector> convertToAuxKernel; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/built_ins/built_in_ops_vme.h000066400000000000000000000007231422164147700256320ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "built_in_ops.h" namespace NEO { namespace EBuiltInOps { using Type = uint32_t; constexpr Type VmeBlockMotionEstimateIntel{MaxCoreValue + 1}; constexpr Type VmeBlockAdvancedMotionEstimateCheckIntel{MaxCoreValue + 2}; constexpr Type VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel{MaxCoreValue + 3}; } // namespace EBuiltInOps } // namespace NEO compute-runtime-22.14.22890/opencl/source/built_ins/built_ins.inl000066400000000000000000000031611422164147700246170ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/built_ins/aux_translation_builtin.h" #include "opencl/source/built_ins/populate_built_ins.inl" #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/buffer.h" namespace NEO { BuiltInOp::BuiltInOp(BuiltIns &kernelsLib, ClDevice &device) : BuiltinDispatchInfoBuilder(kernelsLib, device) { BuiltinDispatchInfoBuilder::populate(EBuiltInOps::AuxTranslation, "", "fullCopy", multiDeviceBaseKernel); baseKernel = multiDeviceBaseKernel->getKernel(clDevice.getRootDeviceIndex()); resizeKernelInstances(5); } void BuiltInOp::resizeKernelInstances(size_t size) const { convertToNonAuxKernel.reserve(size); convertToAuxKernel.reserve(size); for (size_t i = convertToNonAuxKernel.size(); i < size; i++) { auto clonedNonAuxToAuxKernel = Kernel::create(baseKernel->getProgram(), baseKernel->getKernelInfo(), clDevice, nullptr); clonedNonAuxToAuxKernel->setAuxTranslationDirection(AuxTranslationDirection::NonAuxToAux); auto clonedAuxToNonAuxKernel = Kernel::create(baseKernel->getProgram(), baseKernel->getKernelInfo(), clDevice, nullptr); clonedAuxToNonAuxKernel->setAuxTranslationDirection(AuxTranslationDirection::AuxToNonAux); clonedNonAuxToAuxKernel->cloneKernel(baseKernel); clonedAuxToNonAuxKernel->cloneKernel(baseKernel); convertToAuxKernel.emplace_back(clonedNonAuxToAuxKernel); convertToNonAuxKernel.emplace_back(clonedAuxToNonAuxKernel); } } } // namespace NEO compute-runtime-22.14.22890/opencl/source/built_ins/builtins_dispatch_builder.cpp000066400000000000000000001174361422164147700300600ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "shared/source/built_ins/built_ins.h" #include "shared/source/built_ins/sip.h" #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/debug_helpers.h" #include "opencl/source/built_ins/aux_translation_builtin.h" #include "opencl/source/built_ins/built_ins.inl" #include "opencl/source/built_ins/vme_dispatch_builder.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/execution_environment/cl_execution_environment.h" #include "opencl/source/helpers/convert_color.h" #include "opencl/source/helpers/dispatch_info_builder.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/program/program.h" #include "compiler_options.h" #include #include namespace NEO { template <> class BuiltInOp : public BuiltinDispatchInfoBuilder { public: BuiltInOp(BuiltIns &kernelsLib, ClDevice &device) : BuiltInOp(kernelsLib, device, true) {} template bool buildDispatchInfosTyped(MultiDispatchInfo &multiDispatchInfo) const { DispatchInfoBuilder kernelSplit1DBuilder(clDevice); auto &operationParams = multiDispatchInfo.peekBuiltinOpParams(); uintptr_t start = reinterpret_cast(operationParams.dstPtr) + operationParams.dstOffset.x; size_t middleAlignment = MemoryConstants::cacheLineSize; size_t middleElSize = sizeof(uint32_t) * 4; uintptr_t leftSize = start % middleAlignment; leftSize = (leftSize > 0) ? (middleAlignment - leftSize) : 0; // calc left leftover size leftSize = std::min(leftSize, operationParams.size.x); // clamp left leftover size to requested size uintptr_t rightSize = (start + operationParams.size.x) % middleAlignment; // calc right leftover size rightSize = std::min(rightSize, operationParams.size.x - leftSize); // clamp uintptr_t middleSizeBytes = operationParams.size.x - leftSize - rightSize; // calc middle size // corner case - fully optimized kernel requires DWORD alignment. If we don't have it, run slower, misaligned kernel const auto srcMiddleStart = reinterpret_cast(operationParams.srcPtr) + operationParams.srcOffset.x + leftSize; const auto srcMisalignment = srcMiddleStart % sizeof(uint32_t); const auto isSrcMisaligned = srcMisalignment != 0u; auto middleSizeEls = middleSizeBytes / middleElSize; // num work items in middle walker // Set-up ISA kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Left, kernLeftLeftover->getKernel(clDevice.getRootDeviceIndex())); if (isSrcMisaligned) { kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Middle, kernMiddleMisaligned->getKernel(clDevice.getRootDeviceIndex())); } else { kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Middle, kernMiddle->getKernel(clDevice.getRootDeviceIndex())); } kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Right, kernRightLeftover->getKernel(clDevice.getRootDeviceIndex())); // Set-up common kernel args if (operationParams.srcSvmAlloc) { kernelSplit1DBuilder.setArgSvmAlloc(0, operationParams.srcPtr, operationParams.srcSvmAlloc); } else if (operationParams.srcMemObj) { kernelSplit1DBuilder.setArg(0, operationParams.srcMemObj); } else { kernelSplit1DBuilder.setArgSvm(0, operationParams.size.x + operationParams.srcOffset.x, operationParams.srcPtr, nullptr, CL_MEM_READ_ONLY); } if (operationParams.dstSvmAlloc) { kernelSplit1DBuilder.setArgSvmAlloc(1, operationParams.dstPtr, operationParams.dstSvmAlloc); } else if (operationParams.dstMemObj) { kernelSplit1DBuilder.setArg(1, operationParams.dstMemObj); } else { kernelSplit1DBuilder.setArgSvm(1, operationParams.size.x + operationParams.dstOffset.x, operationParams.dstPtr, nullptr, 0u); } kernelSplit1DBuilder.setUnifiedMemorySyncRequirement(operationParams.unifiedMemoryArgsRequireMemSync); // Set-up srcOffset kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Left, 2, static_cast(operationParams.srcOffset.x)); kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Middle, 2, static_cast(operationParams.srcOffset.x + leftSize)); kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Right, 2, static_cast(operationParams.srcOffset.x + leftSize + middleSizeBytes)); // Set-up dstOffset kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Left, 3, static_cast(operationParams.dstOffset.x)); kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Middle, 3, static_cast(operationParams.dstOffset.x + leftSize)); kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Right, 3, static_cast(operationParams.dstOffset.x + leftSize + middleSizeBytes)); if (isSrcMisaligned) { kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Middle, 4, static_cast(srcMisalignment * 8)); } // Set-up work sizes // Note for split walker, it would be just builder.SetDipatchGeometry(GWS, ELWS, OFFSET) kernelSplit1DBuilder.setDispatchGeometry(SplitDispatch::RegionCoordX::Left, Vec3{leftSize, 0, 0}, Vec3{0, 0, 0}, Vec3{0, 0, 0}); kernelSplit1DBuilder.setDispatchGeometry(SplitDispatch::RegionCoordX::Middle, Vec3{middleSizeEls, 0, 0}, Vec3{0, 0, 0}, Vec3{0, 0, 0}); kernelSplit1DBuilder.setDispatchGeometry(SplitDispatch::RegionCoordX::Right, Vec3{rightSize, 0, 0}, Vec3{0, 0, 0}, Vec3{0, 0, 0}); kernelSplit1DBuilder.bake(multiDispatchInfo); return true; } bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override { return buildDispatchInfosTyped(multiDispatchInfo); } protected: MultiDeviceKernel *kernLeftLeftover = nullptr; MultiDeviceKernel *kernMiddle = nullptr; MultiDeviceKernel *kernMiddleMisaligned = nullptr; MultiDeviceKernel *kernRightLeftover = nullptr; BuiltInOp(BuiltIns &kernelsLib, ClDevice &device, bool populateKernels) : BuiltinDispatchInfoBuilder(kernelsLib, device) { if (populateKernels) { populate(EBuiltInOps::CopyBufferToBuffer, "", "CopyBufferToBufferLeftLeftover", kernLeftLeftover, "CopyBufferToBufferMiddle", kernMiddle, "CopyBufferToBufferMiddleMisaligned", kernMiddleMisaligned, "CopyBufferToBufferRightLeftover", kernRightLeftover); } } }; template <> class BuiltInOp : public BuiltInOp { public: BuiltInOp(BuiltIns &kernelsLib, ClDevice &device) : BuiltInOp(kernelsLib, device, false) { populate(EBuiltInOps::CopyBufferToBufferStateless, CompilerOptions::greaterThan4gbBuffersRequired, "CopyBufferToBufferLeftLeftover", kernLeftLeftover, "CopyBufferToBufferMiddle", kernMiddle, "CopyBufferToBufferMiddleMisaligned", kernMiddleMisaligned, "CopyBufferToBufferRightLeftover", kernRightLeftover); } bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override { return buildDispatchInfosTyped(multiDispatchInfo); } }; template <> class BuiltInOp : public BuiltinDispatchInfoBuilder { public: BuiltInOp(BuiltIns &kernelsLib, ClDevice &device) : BuiltInOp(kernelsLib, device, true) {} template bool buildDispatchInfosTyped(MultiDispatchInfo &multiDispatchInfo) const { DispatchInfoBuilder kernelNoSplit3DBuilder(clDevice); auto &operationParams = multiDispatchInfo.peekBuiltinOpParams(); size_t hostPtrSize = 0; bool is3D = false; if (operationParams.srcMemObj && operationParams.dstMemObj) { DEBUG_BREAK_IF(!((operationParams.srcPtr == nullptr) && (operationParams.dstPtr == nullptr))); is3D = (operationParams.size.z > 1) || (operationParams.srcOffset.z > 0) || (operationParams.dstOffset.z > 0); } else { if (operationParams.srcPtr) { size_t origin[] = {operationParams.srcOffset.x, operationParams.srcOffset.y, operationParams.srcOffset.z}; size_t region[] = {operationParams.size.x, operationParams.size.y, operationParams.size.z}; hostPtrSize = Buffer::calculateHostPtrSize(origin, region, operationParams.srcRowPitch, operationParams.srcSlicePitch); is3D = (operationParams.size.z > 1) || (operationParams.dstOffset.z > 0); } else if (operationParams.dstPtr) { size_t origin[] = {operationParams.dstOffset.x, operationParams.dstOffset.y, operationParams.dstOffset.z}; size_t region[] = {operationParams.size.x, operationParams.size.y, operationParams.size.z}; hostPtrSize = Buffer::calculateHostPtrSize(origin, region, operationParams.dstRowPitch, operationParams.dstSlicePitch); is3D = (operationParams.size.z > 1) || (operationParams.srcOffset.z > 0); } else { DEBUG_BREAK_IF(!false); } } // Set-up ISA int dimensions = is3D ? 3 : 2; kernelNoSplit3DBuilder.setKernel(kernelBytes[dimensions - 1]->getKernel(clDevice.getRootDeviceIndex())); size_t srcOffsetFromAlignedPtr = 0; size_t dstOffsetFromAlignedPtr = 0; // arg0 = src if (operationParams.srcMemObj) { kernelNoSplit3DBuilder.setArg(0, operationParams.srcMemObj); } else { void *srcPtrToSet = operationParams.srcPtr; if (!is3D) { auto srcPtr = ptrOffset(operationParams.srcPtr, operationParams.srcOffset.z * operationParams.srcSlicePitch); srcPtrToSet = alignDown(srcPtr, 4); srcOffsetFromAlignedPtr = ptrDiff(srcPtr, srcPtrToSet); } kernelNoSplit3DBuilder.setArgSvm(0, hostPtrSize, srcPtrToSet, nullptr, CL_MEM_READ_ONLY); } // arg1 = dst if (operationParams.dstMemObj) { kernelNoSplit3DBuilder.setArg(1, operationParams.dstMemObj); } else { void *dstPtrToSet = operationParams.dstPtr; if (!is3D) { auto dstPtr = ptrOffset(operationParams.dstPtr, operationParams.dstOffset.z * operationParams.dstSlicePitch); dstPtrToSet = alignDown(dstPtr, 4); dstOffsetFromAlignedPtr = ptrDiff(dstPtr, dstPtrToSet); } kernelNoSplit3DBuilder.setArgSvm(1, hostPtrSize, dstPtrToSet, nullptr, 0u); } // arg2 = srcOrigin OffsetType kSrcOrigin[4] = {static_cast(operationParams.srcOffset.x + srcOffsetFromAlignedPtr), static_cast(operationParams.srcOffset.y), static_cast(operationParams.srcOffset.z), 0}; kernelNoSplit3DBuilder.setArg(2, sizeof(OffsetType) * 4, kSrcOrigin); // arg3 = dstOrigin OffsetType kDstOrigin[4] = {static_cast(operationParams.dstOffset.x + dstOffsetFromAlignedPtr), static_cast(operationParams.dstOffset.y), static_cast(operationParams.dstOffset.z), 0}; kernelNoSplit3DBuilder.setArg(3, sizeof(OffsetType) * 4, kDstOrigin); // arg4 = srcPitch OffsetType kSrcPitch[2] = {static_cast(operationParams.srcRowPitch), static_cast(operationParams.srcSlicePitch)}; kernelNoSplit3DBuilder.setArg(4, sizeof(OffsetType) * 2, kSrcPitch); // arg5 = dstPitch OffsetType kDstPitch[2] = {static_cast(operationParams.dstRowPitch), static_cast(operationParams.dstSlicePitch)}; kernelNoSplit3DBuilder.setArg(5, sizeof(OffsetType) * 2, kDstPitch); // Set-up work sizes kernelNoSplit3DBuilder.setDispatchGeometry(operationParams.size, Vec3{0, 0, 0}, Vec3{0, 0, 0}); kernelNoSplit3DBuilder.bake(multiDispatchInfo); return true; } bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override { return buildDispatchInfosTyped(multiDispatchInfo); } protected: MultiDeviceKernel *kernelBytes[3]{}; BuiltInOp(BuiltIns &kernelsLib, ClDevice &device, bool populateKernels) : BuiltinDispatchInfoBuilder(kernelsLib, device) { if (populateKernels) { populate(EBuiltInOps::CopyBufferRect, "", "CopyBufferRectBytes2d", kernelBytes[0], "CopyBufferRectBytes2d", kernelBytes[1], "CopyBufferRectBytes3d", kernelBytes[2]); } } }; template <> class BuiltInOp : public BuiltInOp { public: BuiltInOp(BuiltIns &kernelsLib, ClDevice &device) : BuiltInOp(kernelsLib, device, false) { populate(EBuiltInOps::CopyBufferRectStateless, CompilerOptions::greaterThan4gbBuffersRequired, "CopyBufferRectBytes2d", kernelBytes[0], "CopyBufferRectBytes2d", kernelBytes[1], "CopyBufferRectBytes3d", kernelBytes[2]); } bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override { return buildDispatchInfosTyped(multiDispatchInfo); } }; template <> class BuiltInOp : public BuiltinDispatchInfoBuilder { public: BuiltInOp(BuiltIns &kernelsLib, ClDevice &device) : BuiltInOp(kernelsLib, device, true) {} template bool buildDispatchInfosTyped(MultiDispatchInfo &multiDispatchInfo) const { DispatchInfoBuilder kernelSplit1DBuilder(clDevice); auto &operationParams = multiDispatchInfo.peekBuiltinOpParams(); uintptr_t start = reinterpret_cast(operationParams.dstPtr) + operationParams.dstOffset.x; size_t middleAlignment = MemoryConstants::cacheLineSize; size_t middleElSize = sizeof(uint32_t); uintptr_t leftSize = start % middleAlignment; leftSize = (leftSize > 0) ? (middleAlignment - leftSize) : 0; // calc left leftover size leftSize = std::min(leftSize, operationParams.size.x); // clamp left leftover size to requested size uintptr_t rightSize = (start + operationParams.size.x) % middleAlignment; // calc right leftover size rightSize = std::min(rightSize, operationParams.size.x - leftSize); // clamp uintptr_t middleSizeBytes = operationParams.size.x - leftSize - rightSize; // calc middle size auto middleSizeEls = middleSizeBytes / middleElSize; // num work items in middle walker // Set-up ISA kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Left, kernLeftLeftover->getKernel(clDevice.getRootDeviceIndex())); kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Middle, kernMiddle->getKernel(clDevice.getRootDeviceIndex())); kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Right, kernRightLeftover->getKernel(clDevice.getRootDeviceIndex())); DEBUG_BREAK_IF((operationParams.srcMemObj == nullptr) || (operationParams.srcOffset != 0)); DEBUG_BREAK_IF((operationParams.dstMemObj == nullptr) && (operationParams.dstSvmAlloc == nullptr)); // Set-up dstMemObj with buffer if (operationParams.dstSvmAlloc) { kernelSplit1DBuilder.setArgSvmAlloc(0, operationParams.dstPtr, operationParams.dstSvmAlloc); } else { kernelSplit1DBuilder.setArg(0, operationParams.dstMemObj); } // Set-up dstOffset kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Left, 1, static_cast(operationParams.dstOffset.x)); kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Middle, 1, static_cast(operationParams.dstOffset.x + leftSize)); kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Right, 1, static_cast(operationParams.dstOffset.x + leftSize + middleSizeBytes)); // Set-up srcMemObj with pattern auto graphicsAllocation = operationParams.srcMemObj->getMultiGraphicsAllocation().getDefaultGraphicsAllocation(); kernelSplit1DBuilder.setArgSvm(2, operationParams.srcMemObj->getSize(), graphicsAllocation->getUnderlyingBuffer(), graphicsAllocation, CL_MEM_READ_ONLY); // Set-up patternSizeInEls kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Left, 3, static_cast(operationParams.srcMemObj->getSize())); kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Middle, 3, static_cast(operationParams.srcMemObj->getSize() / middleElSize)); kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Right, 3, static_cast(operationParams.srcMemObj->getSize())); // Set-up work sizes // Note for split walker, it would be just builder.SetDipatchGeomtry(GWS, ELWS, OFFSET) kernelSplit1DBuilder.setDispatchGeometry(SplitDispatch::RegionCoordX::Left, Vec3{leftSize, 0, 0}, Vec3{0, 0, 0}, Vec3{0, 0, 0}); kernelSplit1DBuilder.setDispatchGeometry(SplitDispatch::RegionCoordX::Middle, Vec3{middleSizeEls, 0, 0}, Vec3{0, 0, 0}, Vec3{0, 0, 0}); kernelSplit1DBuilder.setDispatchGeometry(SplitDispatch::RegionCoordX::Right, Vec3{rightSize, 0, 0}, Vec3{0, 0, 0}, Vec3{0, 0, 0}); kernelSplit1DBuilder.bake(multiDispatchInfo); return true; } bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override { return buildDispatchInfosTyped(multiDispatchInfo); } protected: MultiDeviceKernel *kernLeftLeftover = nullptr; MultiDeviceKernel *kernMiddle = nullptr; MultiDeviceKernel *kernRightLeftover = nullptr; BuiltInOp(BuiltIns &kernelsLib, ClDevice &device, bool populateKernels) : BuiltinDispatchInfoBuilder(kernelsLib, device) { if (populateKernels) { populate(EBuiltInOps::FillBuffer, "", "FillBufferLeftLeftover", kernLeftLeftover, "FillBufferMiddle", kernMiddle, "FillBufferRightLeftover", kernRightLeftover); } } }; template <> class BuiltInOp : public BuiltInOp { public: BuiltInOp(BuiltIns &kernelsLib, ClDevice &device) : BuiltInOp(kernelsLib, device, false) { populate(EBuiltInOps::FillBufferStateless, CompilerOptions::greaterThan4gbBuffersRequired, "FillBufferLeftLeftover", kernLeftLeftover, "FillBufferMiddle", kernMiddle, "FillBufferRightLeftover", kernRightLeftover); } bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfos) const override { return buildDispatchInfosTyped(multiDispatchInfos); } }; template <> class BuiltInOp : public BuiltinDispatchInfoBuilder { public: BuiltInOp(BuiltIns &kernelsLib, ClDevice &device) : BuiltInOp(kernelsLib, device, true) {} bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override { return buildDispatchInfosTyped(multiDispatchInfo); } protected: MultiDeviceKernel *kernelBytes[5] = {nullptr}; BuiltInOp(BuiltIns &kernelsLib, ClDevice &device, bool populateKernels) : BuiltinDispatchInfoBuilder(kernelsLib, device) { if (populateKernels) { populate(EBuiltInOps::CopyBufferToImage3d, "", "CopyBufferToImage3dBytes", kernelBytes[0], "CopyBufferToImage3d2Bytes", kernelBytes[1], "CopyBufferToImage3d4Bytes", kernelBytes[2], "CopyBufferToImage3d8Bytes", kernelBytes[3], "CopyBufferToImage3d16Bytes", kernelBytes[4]); } } template bool buildDispatchInfosTyped(MultiDispatchInfo &multiDispatchInfo) const { DispatchInfoBuilder kernelNoSplit3DBuilder(clDevice); auto &operationParams = multiDispatchInfo.peekBuiltinOpParams(); DEBUG_BREAK_IF(!(((operationParams.srcPtr != nullptr) || (operationParams.srcMemObj != nullptr)) && (operationParams.dstPtr == nullptr))); auto dstImage = castToObjectOrAbort(operationParams.dstMemObj); // Redescribe image to be byte-copy auto dstImageRedescribed = dstImage->redescribe(); multiDispatchInfo.pushRedescribedMemObj(std::unique_ptr(dstImageRedescribed)); // life range same as mdi's // Calculate srcRowPitch and srcSlicePitch auto bytesPerPixel = dstImage->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes; size_t region[] = {operationParams.size.x, operationParams.size.y, operationParams.size.z}; auto srcRowPitch = operationParams.srcRowPitch ? operationParams.srcRowPitch : region[0] * bytesPerPixel; auto srcSlicePitch = operationParams.srcSlicePitch ? operationParams.srcSlicePitch : ((dstImage->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ? 1 : region[1]) * srcRowPitch); // Determine size of host ptr surface for residency purposes size_t hostPtrSize = operationParams.srcPtr ? Image::calculateHostPtrSize(region, srcRowPitch, srcSlicePitch, bytesPerPixel, dstImage->getImageDesc().image_type) : 0; hostPtrSize += operationParams.srcOffset.x; // Set-up kernel auto bytesExponent = Math::log2(bytesPerPixel); DEBUG_BREAK_IF(bytesExponent >= 5); kernelNoSplit3DBuilder.setKernel(kernelBytes[bytesExponent]->getKernel(clDevice.getRootDeviceIndex())); // Set-up source host ptr / buffer if (operationParams.srcPtr) { kernelNoSplit3DBuilder.setArgSvm(0, hostPtrSize, operationParams.srcPtr, nullptr, CL_MEM_READ_ONLY); } else { kernelNoSplit3DBuilder.setArg(0, operationParams.srcMemObj); } // Set-up destination image kernelNoSplit3DBuilder.setArg(1, dstImageRedescribed, operationParams.dstMipLevel); // Set-up srcOffset kernelNoSplit3DBuilder.setArg(2, static_cast(operationParams.srcOffset.x)); // Set-up dstOrigin { uint32_t origin[] = { static_cast(operationParams.dstOffset.x), static_cast(operationParams.dstOffset.y), static_cast(operationParams.dstOffset.z), 0}; kernelNoSplit3DBuilder.setArg(3, sizeof(origin), origin); } // Set-up srcRowPitch { OffsetType pitch[] = { static_cast(srcRowPitch), static_cast(srcSlicePitch)}; kernelNoSplit3DBuilder.setArg(4, sizeof(pitch), pitch); } // Set-up work sizes kernelNoSplit3DBuilder.setDispatchGeometry(operationParams.size, Vec3{0, 0, 0}, Vec3{0, 0, 0}); kernelNoSplit3DBuilder.bake(multiDispatchInfo); return true; } }; template <> class BuiltInOp : public BuiltInOp { public: BuiltInOp(BuiltIns &kernelsLib, ClDevice &device) : BuiltInOp(kernelsLib, device, false) { populate(EBuiltInOps::CopyBufferToImage3dStateless, CompilerOptions::greaterThan4gbBuffersRequired, "CopyBufferToImage3dBytes", kernelBytes[0], "CopyBufferToImage3d2Bytes", kernelBytes[1], "CopyBufferToImage3d4Bytes", kernelBytes[2], "CopyBufferToImage3d8Bytes", kernelBytes[3], "CopyBufferToImage3d16Bytes", kernelBytes[4]); } bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override { return buildDispatchInfosTyped(multiDispatchInfo); } }; template <> class BuiltInOp : public BuiltinDispatchInfoBuilder { public: BuiltInOp(BuiltIns &kernelsLib, ClDevice &device) : BuiltInOp(kernelsLib, device, true) {} bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override { return buildDispatchInfosTyped(multiDispatchInfo); } protected: MultiDeviceKernel *kernelBytes[5] = {nullptr}; BuiltInOp(BuiltIns &kernelsLib, ClDevice &device, bool populateKernels) : BuiltinDispatchInfoBuilder(kernelsLib, device) { if (populateKernels) { populate(EBuiltInOps::CopyImage3dToBuffer, "", "CopyImage3dToBufferBytes", kernelBytes[0], "CopyImage3dToBuffer2Bytes", kernelBytes[1], "CopyImage3dToBuffer4Bytes", kernelBytes[2], "CopyImage3dToBuffer8Bytes", kernelBytes[3], "CopyImage3dToBuffer16Bytes", kernelBytes[4]); } } template bool buildDispatchInfosTyped(MultiDispatchInfo &multiDispatchInfo) const { DispatchInfoBuilder kernelNoSplit3DBuilder(clDevice); auto &operationParams = multiDispatchInfo.peekBuiltinOpParams(); DEBUG_BREAK_IF(!((operationParams.srcPtr == nullptr) && ((operationParams.dstPtr != nullptr) || (operationParams.dstMemObj != nullptr)))); auto srcImage = castToObjectOrAbort(operationParams.srcMemObj); // Redescribe image to be byte-copy auto srcImageRedescribed = srcImage->redescribe(); multiDispatchInfo.pushRedescribedMemObj(std::unique_ptr(srcImageRedescribed)); // life range same as mdi's // Calculate dstRowPitch and dstSlicePitch auto bytesPerPixel = srcImage->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes; size_t region[] = {operationParams.size.x, operationParams.size.y, operationParams.size.z}; auto dstRowPitch = operationParams.dstRowPitch ? operationParams.dstRowPitch : region[0] * bytesPerPixel; auto dstSlicePitch = operationParams.dstSlicePitch ? operationParams.dstSlicePitch : ((srcImage->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ? 1 : region[1]) * dstRowPitch); // Determine size of host ptr surface for residency purposes size_t hostPtrSize = operationParams.dstPtr ? Image::calculateHostPtrSize(region, dstRowPitch, dstSlicePitch, bytesPerPixel, srcImage->getImageDesc().image_type) : 0; hostPtrSize += operationParams.dstOffset.x; // Set-up ISA auto bytesExponent = Math::log2(bytesPerPixel); DEBUG_BREAK_IF(bytesExponent >= 5); kernelNoSplit3DBuilder.setKernel(kernelBytes[bytesExponent]->getKernel(clDevice.getRootDeviceIndex())); // Set-up source image kernelNoSplit3DBuilder.setArg(0, srcImageRedescribed, operationParams.srcMipLevel); // Set-up destination host ptr / buffer if (operationParams.dstPtr) { kernelNoSplit3DBuilder.setArgSvm(1, hostPtrSize, operationParams.dstPtr, nullptr, 0u); } else { kernelNoSplit3DBuilder.setArg(1, operationParams.dstMemObj); } // Set-up srcOrigin { uint32_t origin[] = { static_cast(operationParams.srcOffset.x), static_cast(operationParams.srcOffset.y), static_cast(operationParams.srcOffset.z), 0}; kernelNoSplit3DBuilder.setArg(2, sizeof(origin), origin); } // Set-up dstOffset kernelNoSplit3DBuilder.setArg(3, static_cast(operationParams.dstOffset.x)); // Set-up dstRowPitch { OffsetType pitch[] = { static_cast(dstRowPitch), static_cast(dstSlicePitch)}; kernelNoSplit3DBuilder.setArg(4, sizeof(pitch), pitch); } // Set-up work sizes kernelNoSplit3DBuilder.setDispatchGeometry(operationParams.size, Vec3{0, 0, 0}, Vec3{0, 0, 0}); kernelNoSplit3DBuilder.bake(multiDispatchInfo); return true; } }; template <> class BuiltInOp : public BuiltInOp { public: BuiltInOp(BuiltIns &kernelsLib, ClDevice &device) : BuiltInOp(kernelsLib, device, false) { populate(EBuiltInOps::CopyImage3dToBufferStateless, CompilerOptions::greaterThan4gbBuffersRequired, "CopyImage3dToBufferBytes", kernelBytes[0], "CopyImage3dToBuffer2Bytes", kernelBytes[1], "CopyImage3dToBuffer4Bytes", kernelBytes[2], "CopyImage3dToBuffer8Bytes", kernelBytes[3], "CopyImage3dToBuffer16Bytes", kernelBytes[4]); } bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override { return buildDispatchInfosTyped(multiDispatchInfo); } }; template <> class BuiltInOp : public BuiltinDispatchInfoBuilder { public: BuiltInOp(BuiltIns &kernelsLib, ClDevice &device) : BuiltinDispatchInfoBuilder(kernelsLib, device) { populate(EBuiltInOps::CopyImageToImage3d, "", "CopyImageToImage3d", kernel); } bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override { DispatchInfoBuilder kernelNoSplit3DBuilder(clDevice); auto &operationParams = multiDispatchInfo.peekBuiltinOpParams(); DEBUG_BREAK_IF(!((operationParams.srcPtr == nullptr) && (operationParams.dstPtr == nullptr))); auto srcImage = castToObjectOrAbort(operationParams.srcMemObj); auto dstImage = castToObjectOrAbort(operationParams.dstMemObj); // Redescribe images to be byte-copies auto srcImageRedescribed = srcImage->redescribe(); auto dstImageRedescribed = dstImage->redescribe(); multiDispatchInfo.pushRedescribedMemObj(std::unique_ptr(srcImageRedescribed)); // life range same as mdi's multiDispatchInfo.pushRedescribedMemObj(std::unique_ptr(dstImageRedescribed)); // life range same as mdi's // Set-up kernel kernelNoSplit3DBuilder.setKernel(kernel->getKernel(clDevice.getRootDeviceIndex())); // Set-up source image kernelNoSplit3DBuilder.setArg(0, srcImageRedescribed, operationParams.srcMipLevel); // Set-up destination image kernelNoSplit3DBuilder.setArg(1, dstImageRedescribed, operationParams.dstMipLevel); // Set-up srcOrigin { uint32_t origin[] = { static_cast(operationParams.srcOffset.x), static_cast(operationParams.srcOffset.y), static_cast(operationParams.srcOffset.z), 0}; kernelNoSplit3DBuilder.setArg(2, sizeof(origin), origin); } // Set-up dstOrigin { uint32_t origin[] = { static_cast(operationParams.dstOffset.x), static_cast(operationParams.dstOffset.y), static_cast(operationParams.dstOffset.z), 0}; kernelNoSplit3DBuilder.setArg(3, sizeof(origin), origin); } // Set-up work sizes kernelNoSplit3DBuilder.setDispatchGeometry(operationParams.size, Vec3{0, 0, 0}, Vec3{0, 0, 0}); kernelNoSplit3DBuilder.bake(multiDispatchInfo); return true; } protected: MultiDeviceKernel *kernel = nullptr; }; template <> class BuiltInOp : public BuiltinDispatchInfoBuilder { public: BuiltInOp(BuiltIns &kernelsLib, ClDevice &device) : BuiltinDispatchInfoBuilder(kernelsLib, device) { populate(EBuiltInOps::FillImage3d, "", "FillImage3d", kernel); } bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const override { DispatchInfoBuilder kernelNoSplit3DBuilder(clDevice); auto &operationParams = multiDispatchInfo.peekBuiltinOpParams(); DEBUG_BREAK_IF(!((operationParams.srcMemObj == nullptr) && (operationParams.srcPtr != nullptr) && (operationParams.dstPtr == nullptr))); auto image = castToObjectOrAbort(operationParams.dstMemObj); // Redescribe image to be byte-copy auto imageRedescribed = image->redescribeFillImage(); multiDispatchInfo.pushRedescribedMemObj(std::unique_ptr(imageRedescribed)); // Set-up kernel kernelNoSplit3DBuilder.setKernel(kernel->getKernel(clDevice.getRootDeviceIndex())); // Set-up destination image kernelNoSplit3DBuilder.setArg(0, imageRedescribed); // Set-up fill color int iFillColor[4] = {0}; const void *fillColor = operationParams.srcPtr; convertFillColor(fillColor, iFillColor, image->getSurfaceFormatInfo().OCLImageFormat, imageRedescribed->getSurfaceFormatInfo().OCLImageFormat); kernelNoSplit3DBuilder.setArg(1, 4 * sizeof(int32_t), iFillColor); // Set-up dstOffset { uint32_t offset[] = { static_cast(operationParams.dstOffset.x), static_cast(operationParams.dstOffset.y), static_cast(operationParams.dstOffset.z), 0}; kernelNoSplit3DBuilder.setArg(2, sizeof(offset), offset); } // Set-up work sizes kernelNoSplit3DBuilder.setDispatchGeometry(operationParams.size, Vec3{0, 0, 0}, Vec3{0, 0, 0}); kernelNoSplit3DBuilder.bake(multiDispatchInfo); return true; } protected: MultiDeviceKernel *kernel = nullptr; }; BuiltinDispatchInfoBuilder &BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::Type operation, ClDevice &device) { uint32_t operationId = static_cast(operation); auto &builtins = *device.getDevice().getBuiltIns(); auto clExecutionEnvironment = static_cast(device.getExecutionEnvironment()); auto &operationBuilder = clExecutionEnvironment->peekBuilders(device.getRootDeviceIndex())[operationId]; switch (operation) { case EBuiltInOps::CopyBufferToBuffer: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(builtins, device); }); break; case EBuiltInOps::CopyBufferToBufferStateless: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(builtins, device); }); break; case EBuiltInOps::CopyBufferRect: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(builtins, device); }); break; case EBuiltInOps::CopyBufferRectStateless: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(builtins, device); }); break; case EBuiltInOps::FillBuffer: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(builtins, device); }); break; case EBuiltInOps::FillBufferStateless: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(builtins, device); }); break; case EBuiltInOps::CopyBufferToImage3d: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(builtins, device); }); break; case EBuiltInOps::CopyBufferToImage3dStateless: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(builtins, device); }); break; case EBuiltInOps::CopyImage3dToBuffer: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(builtins, device); }); break; case EBuiltInOps::CopyImage3dToBufferStateless: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(builtins, device); }); break; case EBuiltInOps::CopyImageToImage3d: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(builtins, device); }); break; case EBuiltInOps::FillImage3d: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(builtins, device); }); break; case EBuiltInOps::AuxTranslation: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(builtins, device); }); break; default: return getUnknownDispatchInfoBuilder(operation, device); } return *operationBuilder.first; } BuiltInOwnershipWrapper::BuiltInOwnershipWrapper(BuiltinDispatchInfoBuilder &inputBuilder, Context *context) { takeOwnership(inputBuilder, context); } BuiltInOwnershipWrapper::~BuiltInOwnershipWrapper() { if (builder) { for (auto &kernel : builder->peekUsedKernels()) { kernel->releaseOwnership(); } if (!builder->peekUsedKernels().empty()) { builder->peekUsedKernels()[0]->getProgram()->setContext(nullptr); builder->peekUsedKernels()[0]->getProgram()->releaseOwnership(); } } } void BuiltInOwnershipWrapper::takeOwnership(BuiltinDispatchInfoBuilder &inputBuilder, Context *context) { UNRECOVERABLE_IF(builder); builder = &inputBuilder; if (!builder->peekUsedKernels().empty()) { builder->peekUsedKernels()[0]->getProgram()->takeOwnership(); builder->peekUsedKernels()[0]->getProgram()->setContext(context); } for (auto &kernel : builder->peekUsedKernels()) { kernel->takeOwnership(); } } std::unique_ptr BuiltinDispatchInfoBuilder::createProgramFromCode(const BuiltinCode &bc, const ClDeviceVector &deviceVector) { std::unique_ptr ret; const char *data = bc.resource.data(); size_t dataLen = bc.resource.size(); cl_int err = 0; switch (bc.type) { default: break; case BuiltinCode::ECodeType::Source: case BuiltinCode::ECodeType::Intermediate: ret.reset(Program::createBuiltInFromSource(data, nullptr, deviceVector, &err)); break; case BuiltinCode::ECodeType::Binary: ret.reset(Program::createBuiltInFromGenBinary(nullptr, deviceVector, data, dataLen, &err)); break; } return ret; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/built_ins/builtins_dispatch_builder.h000066400000000000000000000106531422164147700275160ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "shared/source/helpers/vec.h" #include "opencl/source/kernel/multi_device_kernel.h" #include "CL/cl.h" #include "built_in_ops.h" #include #include #include #include #include #include #include #include namespace NEO { typedef std::vector BuiltinResourceT; class ClDeviceVector; class Context; class Device; class MemObj; struct MultiDispatchInfo; class Program; struct BuiltinOpParams { void *srcPtr = nullptr; void *dstPtr = nullptr; MemObj *srcMemObj = nullptr; MemObj *dstMemObj = nullptr; GraphicsAllocation *srcSvmAlloc = nullptr; GraphicsAllocation *dstSvmAlloc = nullptr; GraphicsAllocation *transferAllocation = nullptr; //mapAllocation or hostPtrAllocation AuxTranslationDirection auxTranslationDirection = AuxTranslationDirection::None; bool unifiedMemoryArgsRequireMemSync = true; Vec3 srcOffset = {0, 0, 0}; Vec3 dstOffset = {0, 0, 0}; Vec3 size = {0, 0, 0}; size_t srcRowPitch = 0; size_t dstRowPitch = 0; size_t srcSlicePitch = 0; size_t dstSlicePitch = 0; uint32_t srcMipLevel = 0; uint32_t dstMipLevel = 0; void *userPtrForPostOperationCpuCopy = nullptr; }; class BuiltinDispatchInfoBuilder { public: BuiltinDispatchInfoBuilder(BuiltIns &kernelLib, ClDevice &device) : kernelsLib(kernelLib), clDevice(device) {} virtual ~BuiltinDispatchInfoBuilder() = default; template void populate(EBuiltInOps::Type operation, ConstStringRef options, KernelsDescArgsT &&...desc); virtual bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const { return false; } virtual bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo, Kernel *kernel, const uint32_t dim, const Vec3 &gws, const Vec3 &elws, const Vec3 &offset) const { return false; } virtual cl_int validateDispatch(Kernel *kernel, uint32_t inworkDim, const Vec3 &gws, const Vec3 &elws, const Vec3 &offset) const { return CL_SUCCESS; } // returns true if argument should be updated in kernel exposed to user code virtual bool setExplicitArg(uint32_t argIndex, size_t argSize, const void *argVal, cl_int &err) const { err = 0; return true; } std::vector> &peekUsedKernels() { return usedKernels; } static std::unique_ptr createProgramFromCode(const BuiltinCode &bc, const ClDeviceVector &device); protected: template void grabKernels(KernelNameT &&kernelName, MultiDeviceKernel *&kernelDst, KernelsDescArgsT &&...kernelsDesc) { auto rootDeviceIndex = clDevice.getRootDeviceIndex(); const KernelInfo *kernelInfo = prog->getKernelInfo(kernelName, rootDeviceIndex); UNRECOVERABLE_IF(nullptr == kernelInfo); cl_int err = 0; KernelInfoContainer kernelInfos; kernelInfos.resize(rootDeviceIndex + 1); kernelInfos[rootDeviceIndex] = kernelInfo; kernelDst = MultiDeviceKernel::create(prog.get(), kernelInfos, &err); kernelDst->getKernel(rootDeviceIndex)->isBuiltIn = true; usedKernels.push_back(std::unique_ptr(kernelDst)); grabKernels(std::forward(kernelsDesc)...); } cl_int grabKernels() { return CL_SUCCESS; } std::unique_ptr prog; std::vector> usedKernels; BuiltIns &kernelsLib; ClDevice &clDevice; }; class BuiltInDispatchBuilderOp { public: static BuiltinDispatchInfoBuilder &getBuiltinDispatchInfoBuilder(EBuiltInOps::Type op, ClDevice &device); static BuiltinDispatchInfoBuilder &getUnknownDispatchInfoBuilder(EBuiltInOps::Type op, ClDevice &device); }; class BuiltInOwnershipWrapper : public NonCopyableOrMovableClass { public: BuiltInOwnershipWrapper() = default; BuiltInOwnershipWrapper(BuiltinDispatchInfoBuilder &inputBuilder, Context *context); ~BuiltInOwnershipWrapper(); void takeOwnership(BuiltinDispatchInfoBuilder &inputBuilder, Context *context); protected: BuiltinDispatchInfoBuilder *builder = nullptr; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/built_ins/kernels/000077500000000000000000000000001422164147700235655ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/built_ins/kernels/CMakeLists.txt000066400000000000000000000130541422164147700263300ustar00rootroot00000000000000# # Copyright (C) 2018-2022 Intel Corporation # # SPDX-License-Identifier: MIT # add_custom_target(builtins_vme_sources) set_target_properties(builtins_vme_sources PROPERTIES FOLDER "${OPENCL_RUNTIME_PROJECTS_FOLDER}/${OPENCL_BUILTINS_PROJECTS_FOLDER}") set(BUILTINS_OUTDIR_WITH_ARCH "${TargetDir}/built_ins/${NEO_ARCH}") add_dependencies(${BUILTINS_BINARIES_BINDFUL_LIB_NAME} builtins_vme_sources) add_subdirectories() set(GENERATED_BUILTINS ${GENERATED_BUILTINS} PARENT_SCOPE) set(GENERATED_BUILTINS_STATELESS ${GENERATED_BUILTINS_STATELESS} PARENT_SCOPE) set(BUILTIN_OPTIONS_STATELESS "-cl-intel-greater-than-4GB-buffer-required" ) if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug") list(APPEND __ocloc__options__ "-D DEBUG") endif() set(BUILTINS_INCLUDE_DIR ${TargetDir} PARENT_SCOPE) set(BUILTIN_CPP "") function(get_bits_for_stateless core_type platform_type) # Force 32bits compiling on gen9lp for stateless builtins if((${CORE_TYPE} STREQUAL "GEN9") AND (${PLATFORM_TYPE} STREQUAL "LP")) set(BITS "32" PARENT_SCOPE) else() set(BITS ${NEO_BITS} PARENT_SCOPE) endif() endfunction() function(get_builtin_options core_type neo_arch) if("${neo_arch}" STREQUAL "x32") set(BUILTIN_OPTIONS "-cl-intel-greater-than-4GB-buffer-required" PARENT_SCOPE) elseif("${core_type}" STREQUAL "XE_HPC_CORE") set(BUILTIN_OPTIONS "" PARENT_SCOPE) else() set(BUILTIN_OPTIONS "-force_stos_opt" PARENT_SCOPE) endif() endfunction() # Define function for compiling built-ins (with ocloc) function(compile_builtin core_type platform_type builtin bits builtin_options) string(TOLOWER ${core_type} core_type_lower) get_family_name_with_type(${core_type} ${platform_type}) set(OUTPUTDIR "${BUILTINS_OUTDIR_WITH_ARCH}/${core_type_lower}") # get filename set(FILENAME ${builtin}) # get name of the file w/o extension get_filename_component(BASENAME ${builtin} NAME_WE) get_filename_component(absolute_filepath ${builtin} ABSOLUTE) set(OUTPUTPATH_BASE "${OUTPUTDIR}/${BASENAME}_${family_name_with_type}") # function returns builtin cpp filename unset(BUILTIN_CPP) list(APPEND __ocloc__options__ "-cl-kernel-arg-info") if(NOT NEO_DISABLE_BUILTINS_COMPILATION) set(OUTPUT_FILES ${OUTPUTPATH_BASE}.spv ${OUTPUTPATH_BASE}.bin ${OUTPUTPATH_BASE}.cpp ${OUTPUTPATH_BASE}.gen ) add_custom_command( OUTPUT ${OUTPUT_FILES} COMMAND ${ocloc_cmd_prefix} -q -file ${absolute_filepath} -device ${DEFAULT_SUPPORTED_${core_type}_${platform_type}_PLATFORM} ${builtin_options} -${bits} -out_dir ${OUTPUTDIR} -options "$" WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} DEPENDS ${builtin} ocloc copy_compiler_files ) # set variable outside function set(BUILTIN_CPP built_ins/${NEO_ARCH}/${core_type_lower}/${BASENAME}_${family_name_with_type}.cpp PARENT_SCOPE) else() foreach(_file_name "spv" "bin" "gen") set(_file_prebuilt "${NEO_KERNELS_BIN_DIR}/built_ins/${NEO_ARCH}/${core_type_lower}/${BASENAME}_${family_name_with_type}.${_file_name}") if(EXISTS ${_file_prebuilt}) add_custom_command( OUTPUT ${OUTPUTPATH_BASE}.${_file_name} COMMAND ${CMAKE_COMMAND} -E make_directory ${OUTPUTDIR} COMMAND ${CMAKE_COMMAND} -E copy_if_different ${_file_prebuilt} ${OUTPUTDIR} ) endif() endforeach() set(_file_prebuilt "${NEO_KERNELS_BIN_DIR}/built_ins/${NEO_ARCH}/${core_type_lower}/${BASENAME}_${family_name_with_type}.cpp") if(EXISTS ${_file_prebuilt}) add_custom_command( OUTPUT ${OUTPUTPATH_BASE}.cpp COMMAND ${CMAKE_COMMAND} -E make_directory ${OUTPUTDIR} COMMAND ${CMAKE_COMMAND} -E copy_if_different ${_file_prebuilt} ${OUTPUTDIR} ) # set variable outside function set(BUILTIN_CPP built_ins/${NEO_ARCH}/${core_type_lower}/${BASENAME}_${family_name_with_type}.cpp PARENT_SCOPE) endif() endif() endfunction() macro(macro_for_each_core_type) foreach(PLATFORM_TYPE ${PLATFORM_TYPES}) if(${CORE_TYPE}_HAS_${PLATFORM_TYPE}) get_family_name_with_type(${CORE_TYPE} ${PLATFORM_TYPE}) string(TOLOWER ${PLATFORM_TYPE} PLATFORM_TYPE_LOWER) unset(BUILTINS_COMMANDS) foreach(GENERATED_BUILTIN ${GENERATED_BUILTINS}) compile_builtin(${CORE_TYPE} ${PLATFORM_TYPE} ${GENERATED_BUILTIN}.builtin_kernel ${NEO_BITS} "${BUILTIN_OPTIONS}") if(NOT ${BUILTIN_CPP} STREQUAL "") list(APPEND BUILTINS_COMMANDS ${TargetDir}/${BUILTIN_CPP}) endif() endforeach() get_bits_for_stateless(${CORE_TYPE} ${PLATFORM_TYPE}) get_builtin_options(${CORE_TYPE} ${NEO_ARCH}) foreach(GENERATED_BUILTIN_STATELESS ${GENERATED_BUILTINS_STATELESS}) compile_builtin(${CORE_TYPE} ${PLATFORM_TYPE} ${GENERATED_BUILTIN_STATELESS}.builtin_kernel ${BITS} "${BUILTIN_OPTIONS_STATELESS}") if(NOT ${BUILTIN_CPP} STREQUAL "") list(APPEND BUILTINS_COMMANDS ${TargetDir}/${BUILTIN_CPP}) endif() endforeach() if(NOT "${BUILTINS_COMMANDS}" STREQUAL "") set(target_name builtins_${family_name_with_type}_vme) add_custom_target(${target_name} DEPENDS ${BUILTINS_COMMANDS}) add_dependencies(builtins ${target_name}) set_target_properties(${target_name} PROPERTIES FOLDER "${OPENCL_RUNTIME_PROJECTS_FOLDER}/built_ins/${family_name_with_type}") endif() endif() endforeach() endmacro() apply_macro_for_each_core_type("SUPPORTED") vme_block_advanced_motion_estimate_bidirectional_check_intel.builtin_kernel000066400000000000000000000361721422164147700427350ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/built_ins/kernels/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( __kernel __attribute__((reqd_work_group_size(16, 1, 1))) void block_advanced_motion_estimate_bidirectional_check_intel( sampler_t accelerator, __read_only image2d_t srcImg, __read_only image2d_t refImg, __read_only image2d_t src_check_image, __read_only image2d_t ref0_check_image, __read_only image2d_t ref1_check_image, uint flags, uint search_cost_penalty, uint search_cost_precision, short2 count_global, uchar bidir_weight, __global short2 *count_motion_vector_buffer, __global short2 *prediction_motion_vector_buffer, __global char *skip_input_mode_buffer, __global short2 *skip_motion_vector_buffer, __global short2 *search_motion_vector_buffer, __global char *intra_search_predictor_modes, __global ushort *search_residuals, __global ushort *skip_residuals, __global ushort *intra_residuals, __read_only image2d_t intraSrcImg, int height, int width, int stride) { __local uint dstSearch[64]; // 8 GRFs __local uint dstSkipIntra[32 + 24]; // 7 GRFs (4 for inter, 3 for intra) // distortion in the 6th GRF __local ushort *distSearch = (__local ushort *)&dstSearch[8 * 5]; // Initialize the MV cost table: // MV Cost in U4U4 format: // No cost : 0, 0, 0, 0, 0, 0, 0, 0 // Low Cost : 1, 4, 5, 9, 10, 12, 14, 15 // Normal Cost: 5, 26, 29, 43, 45, 47, 57, 57 // High Cost : 29, 61, 72, 78, 88, 89, 91, 92 uint2 MVCostTable; if (search_cost_penalty == 1) { MVCostTable.s0 = 0x09050401; MVCostTable.s1 = 0x0F0E0C0A; } else if (search_cost_penalty == 2) { MVCostTable.s0 = 0x2B1D1A05; MVCostTable.s1 = 0x39392F2D; } else if (search_cost_penalty == 3) { MVCostTable.s0 = 0x4E483D1D; MVCostTable.s1 = 0x5C5B5958; } else { MVCostTable.s0 = 0; MVCostTable.s1 = 0; } uint MVCostPrecision = ((uint)search_cost_precision) << 16; // Frame is divided into rows * columns of MBs. // One h/w thread per WG. // One WG processes "row" MBs - one row per iteration and one MB per row. // Number of WGs (or h/w threads) is number of columns MBs.Each iteration // processes the MB in a row - gid_0 is the MB id in a row and gid_1 is the // row offset. int sid_0 = stride * get_group_id(0); int gid_0 = sid_0 / height; int gid_1 = sid_0 % height; for (int sid = sid_0; sid < sid_0 + stride && gid_0 < width && gid_1 < height; sid++, gid_0 = sid / height, gid_1 = sid % height) { int2 srcCoord; srcCoord.x = gid_0 * 16 + get_global_offset(0); // 16 pixels wide MBs (globally scalar) srcCoord.y = gid_1 * 16 + get_global_offset(1); // 16 pixels tall MBs (globally scalar) uint curMB = gid_0 + gid_1 * width; // current MB id short2 count; // If either the search or skip vector counts are per-MB, then we need to // read in // the count motion vector buffer. if ((count_global.s0 == -1) | (count_global.s1 == -1)) { count = count_motion_vector_buffer[curMB]; } // If either the search or skip vector counts are per-frame, we need to use // those. if (count_global.s0 >= 0) { count.s0 = count_global.s0; } if (count_global.s1 >= 0) { count.s1 = count_global.s1; } int countPredMVs = count.x; if (countPredMVs != 0) { uint offset = curMB * 4; // 4 predictors per MB offset += get_local_id(0) % 4; // 16 work-items access 4 MVs for MB // one predictor for MB per SIMD channel // Reduce predictors from Q-pixel to integer precision. int2 predMV = 0; if (get_local_id(0) < countPredMVs) { // one MV per work-item if(prediction_motion_vector_buffer != NULL) { predMV = convert_int2(prediction_motion_vector_buffer[offset]); } // Predictors are input in QP resolution. Convert that to integer // resolution. predMV.x /= 4; predMV.y /= 4; predMV.y &= 0xFFFFFFFE; } // Do up to 4 IMEs, get the best MVs and their distortions, and optionally // a FBR of // the best MVs. Finally the results are written out to SLM. intel_work_group_vme_mb_multi_query_4( dstSearch, // best search MV and its distortions into SLM countPredMVs, // count of predictor MVs (globally scalar - value range // 1 to 4) MVCostPrecision, // MV cost precision MVCostTable, // MV cost table srcCoord, // MB 2-D offset (globally scalar) predMV, // predictor MVs (up to 4 distinct MVs for SIMD16 thread) srcImg, // source refImg, // reference accelerator); // vme object } int doIntra = ((flags & 0x2) != 0); int intraEdges = 0; if (doIntra) { // Enable all edges by default. intraEdges = 0x3C; // If this is a left-edge MB, then disable left edges. if ((gid_0 == 0) & (get_global_offset(0) == 0)) { intraEdges &= 0x18; } // If this is a right edge MB then disable right edges. if (gid_0 == width - 1) { intraEdges &= 0x34; } // If this is a top-edge MB, then disable top edges. if ((gid_1 == 0) & (get_global_offset(1) == 0)) { intraEdges &= 0x20; } // Set bit6=bit5. intraEdges |= ((intraEdges & 0x20) << 1); intraEdges <<= 8; } int skip_block_type_8x8 = flags & 0x4; int countSkipMVs = count.y; if (countSkipMVs != 0 || doIntra == true) { // one set of skip MV per SIMD channel // Do up to 4 skip checks and get the distortions for each of them. // Finally the results are written out to SLM. if ((skip_block_type_8x8 == 0) | ((doIntra) & (countSkipMVs == 0))) { // 16x16: uint offset = curMB * 4 * 2; // 4 sets of skip check MVs per MB int skipMV = 0; if (get_local_id(0) < countSkipMVs * 2) // need 2 values per MV { offset += (get_local_id(0)); // 16 work-items access 4 sets of MVs for MB if(skip_motion_vector_buffer != NULL){ __global int *skip1_motion_vector_buffer = (__global int *)skip_motion_vector_buffer; skipMV = skip1_motion_vector_buffer[offset]; // one MV per work-item } } uchar skipMode = 0; if (get_local_id(0) < countSkipMVs) { if(skip_input_mode_buffer != NULL) skipMode = skip_input_mode_buffer[curMB]; if (skipMode == 0) { skipMode = 1; } if (skipMode > 3) { skipMode = 3; } } intel_work_group_vme_mb_multi_bidir_check_16x16( dstSkipIntra, // distortions into SLM countSkipMVs, // count of skip check MVs (globally scalar - value // range 1 to 4) doIntra, // compute intra modes intraEdges, // intra edges to use srcCoord, // MB 2-D offset (globally scalar) bidir_weight, // bidirectional weight skipMode, // skip modes skipMV, // skip check MVs (up to 4 distinct sets of skip check MVs // for SIMD16 thread) src_check_image, // source ref0_check_image, // reference fwd ref1_check_image, // reference bwd intraSrcImg, // intra source accelerator); // vme object } else { // 8x8: uint offset = curMB * 4 * 8; // 4 sets of skip check MVs, 16 shorts (8 ints) each per MB int2 skipMVs = 0; if (get_local_id(0) < countSkipMVs * 8) // need 8 values per MV { offset += (get_local_id(0)); // 16 work-items access 4 sets of MVs for MB if(skip_motion_vector_buffer != NULL){ __global int *skip1_motion_vector_buffer = (__global int *)(skip_motion_vector_buffer); skipMVs.x = skip1_motion_vector_buffer[offset]; // four component MVs // per work-item skipMVs.y = skip1_motion_vector_buffer[offset + 16];} } uchar skipModes = 0; if (get_local_id(0) < countSkipMVs) { if(skip_input_mode_buffer != NULL) skipModes = skip_input_mode_buffer[curMB]; } intel_work_group_vme_mb_multi_bidir_check_8x8( dstSkipIntra, // distortions into SLM countSkipMVs, // count of skip check MVs per MB (globally scalar - // value range 1 to 4) doIntra, // compute intra modes intraEdges, // intra edges to use srcCoord, // MB 2-D offset (globally scalar) bidir_weight, // bidirectional weight skipModes, // skip modes skipMVs, // skip check MVs (up to 4 distinct sets of skip check MVs // for SIMD16 thread) src_check_image, // source ref0_check_image, // reference fwd ref1_check_image, // reference bwd intraSrcImg, // intra source accelerator); // vme object } } barrier(CLK_LOCAL_MEM_FENCE); // Write Out motion estimation result: // Result format // Hierarchical row-major layout // i.e. row-major of blocks MVs in MBs, and row-major of 4 sets of // MVs/distortion in blocks if (countPredMVs != 0) { // 4x4 if (intel_get_accelerator_mb_block_type(accelerator) == 0x2) { int index = (gid_0 * 16 + get_local_id(0)) + (gid_1 * 16 * width); // 1. 16 work-items enabled. // 2. Work-items gather fwd MVs in strided dword locations 0, 2, .., 30 // (interleaved // fwd/bdw MVs) with constant offset 8 (control data size) from SLM // into contiguous // short2 locations 0, 1, .., 15 of global buffer // search_motion_vector_buffer with // offset index. // 3. Work-items gather contiguous ushort locations 0, 1, .., 15 from // distSearch into // contiguous ushort locations 0, 1, .., 15 of search_residuals with // offset index. short2 val = as_short2(dstSearch[8 + get_local_id(0) * 2]); if(search_motion_vector_buffer != NULL) search_motion_vector_buffer[index] = val; if (search_residuals != NULL) { search_residuals[index] = distSearch[get_local_id(0)]; } } // 8x8 else if (intel_get_accelerator_mb_block_type(accelerator) == 0x1) { // Only 1st 4 work-item are needed. if (get_local_id(0) < 4) { int index = (gid_0 * 4 + get_local_id(0)) + (gid_1 * 4 * width); // 1. 4 work-items enabled. // 2. Work-items gather fw MVs in strided dword locations 0, 8, 16, 24 // (interleaved // fwd/bdw MVs) with constant offset 8 from SLM into contiguous // short2 locations // 0, 1, .., 15 of global buffer search_motion_vector_buffer with // offset index. // 3. Work-items gather strided ushort locations 0, 4, 8, 12 from // distSearch into // contiguous ushort locations 0, 1, .., 15 of search_residuals // with offset index. short2 val = as_short2(dstSearch[8 + get_local_id(0) * 4 * 2]); if(search_motion_vector_buffer != NULL) search_motion_vector_buffer[index] = val; if (search_residuals != NULL) { search_residuals[index] = distSearch[get_local_id(0) * 4]; } } } // 16x16 else if (intel_get_accelerator_mb_block_type(accelerator) == 0x0) { // One 1st work is needed. if (get_local_id(0) == 0) { int index = gid_0 + gid_1 * width; // 1. 1 work-item enabled. // 2. Work-item gathers fwd MV in dword location 0 with constant // offset 8 from // SLM into short2 locations 0 of global buffer // search_motion_vector_buffer. // 3. Work-item gathers ushort location 0 from distSearch into ushort // location 0 of search_residuals with offset index. short2 val = as_short2(dstSearch[8]); if(search_motion_vector_buffer != NULL) search_motion_vector_buffer[index] = val; if (search_residuals != NULL) { search_residuals[index] = distSearch[0]; } } } } // Write out motion skip check result: // Result format // Hierarchical row-major layout // i.e. row-major of blocks in MBs, and row-major of 8 sets of // distortions in blocks if (countSkipMVs != 0) { if (skip_block_type_8x8 == false) { // Copy out 4 (1 component) sets of distortion values. int index = (gid_0 * 4) + (get_local_id(0)) + (gid_1 * 4 * width); if (get_local_id(0) < countSkipMVs) { // 1. Up to 4 work-items are enabled. // 2. The work-item gathers distSkip locations 0, 16*1, .., 16*7 and // copies them to contiguous skip_residual locations 0, 1, 2, .., // 7. __local ushort *distSkip = (__local ushort *)&dstSkipIntra[0]; if(skip_residuals != NULL) skip_residuals[index] = distSkip[get_local_id(0) * 16]; } } else { // Copy out 4 (4 component) sets of distortion values. int index = (gid_0 * 4 * 4) + (get_local_id(0)) + (gid_1 * 4 * 4 * width); if (get_local_id(0) < countSkipMVs * 4) { // 1. Up to 16 work-items are enabled. // 2. The work-item gathers distSkip locations 0, 4*1, .., 4*15 and // copies them to contiguous skip_residual locations 0, 1, 2, .., // 15. __local ushort *distSkip = (__local ushort *)&dstSkipIntra[0]; if(skip_residuals != NULL) skip_residuals[index] = distSkip[get_local_id(0) * 4]; } } } // Write out intra search result: if (doIntra) { // Write out the 4x4 intra modes if (get_local_id(0) < 8) { __local char *dstIntra_4x4 = (__local char *)(&dstSkipIntra[32 + 16 + 4]); char value = dstIntra_4x4[get_local_id(0)]; char value_low = (value)&0xf; char value_high = (value >> 4) & 0xf; int index_low = (gid_0 * 22) + (get_local_id(0) * 2) + (gid_1 * 22 * width); int index_high = (gid_0 * 22) + (get_local_id(0) * 2) + 1 + (gid_1 * 22 * width); if(intra_search_predictor_modes != NULL) { intra_search_predictor_modes[index_low + 5] = value_low; intra_search_predictor_modes[index_high + 5] = value_high; } } // Write out the 8x8 intra modes if (get_local_id(0) < 4) { __local char *dstIntra_8x8 = (__local char *)(&dstSkipIntra[32 + 8 + 4]); char value = dstIntra_8x8[get_local_id(0) * 2]; char value_low = (value)&0xf; int index = (gid_0 * 22) + (get_local_id(0)) + (gid_1 * 22 * width); if(intra_search_predictor_modes != NULL) intra_search_predictor_modes[index + 1] = value_low; } // Write out the 16x16 intra modes if (get_local_id(0) < 1) { __local char *dstIntra_16x16 = (__local char *)(&dstSkipIntra[32 + 0 + 4]); char value = dstIntra_16x16[0]; char value_low = (value)&0xf; int index = (gid_0 * 22) + (gid_1 * 22 * width); if(intra_search_predictor_modes != NULL) intra_search_predictor_modes[index] = value_low; } // Get the intra residuals. if (intra_residuals != NULL) { int index = (gid_0 * 4) + (gid_1 * 4 * width); if (get_local_id(0) < 1) { __local ushort *distIntra_4x4 = (__local ushort *)(&dstSkipIntra[32 + 16 + 3]); __local ushort *distIntra_8x8 = (__local ushort *)(&dstSkipIntra[32 + 8 + 3]); __local ushort *distIntra_16x16 = (__local ushort *)(&dstSkipIntra[32 + 0 + 3]); intra_residuals[index + 2] = distIntra_4x4[0]; intra_residuals[index + 1] = distIntra_8x8[0]; intra_residuals[index + 0] = distIntra_16x16[0]; } } } } } )===" vme_block_advanced_motion_estimate_bidirectional_check_intel_frontend.builtin_kernel000066400000000000000000000017011422164147700446220ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/built_ins/kernels/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( __kernel __attribute__((reqd_work_group_size(16, 1, 1))) void block_advanced_motion_estimate_bidirectional_check_intel( sampler_t accelerator, __read_only image2d_t srcImg, __read_only image2d_t refImg, __read_only image2d_t src_check_image, __read_only image2d_t ref0_check_image, __read_only image2d_t ref1_check_image, uint flags, uint search_cost_penalty, uint search_cost_precision, short2 count_global, uchar bidir_weight, __global short2 *count_motion_vector_buffer, __global short2 *prediction_motion_vector_buffer, __global char *skip_input_mode_buffer, __global short2 *skip_motion_vector_buffer, __global short2 *search_motion_vector_buffer, __global char *intra_search_predictor_modes, __global ushort *search_residuals, __global ushort *skip_residuals, __global ushort *intra_residuals) { } )===" vme_block_advanced_motion_estimate_check_intel.builtin_kernel000066400000000000000000000341621422164147700400420ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/built_ins/kernels/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( __kernel __attribute__((reqd_work_group_size(16, 1, 1))) void block_advanced_motion_estimate_check_intel( sampler_t accelerator, __read_only image2d_t srcImg, __read_only image2d_t refImg, uint flags, uint skip_block_type, uint search_cost_penalty, uint search_cost_precision, __global short2 *count_motion_vector_buffer, __global short2 *predictors_buffer, __global short2 *skip_motion_vector_buffer, __global short2 *motion_vector_buffer, __global char *intra_search_predictor_modes, __global ushort *residuals, __global ushort *skip_residuals, __global ushort *intra_residuals, __read_only image2d_t intraSrcImg, int height, int width, int stride) { __local uint dstSearch[64]; // 8 GRFs __local uint dstSkipIntra[64 + 24]; // 11 GRFs (8 for inter, 3 for intra) __local ushort *distSearch = (__local ushort *)&dstSearch[8 * 5]; // distortion in the 6th GRF // Initialize the MV cost table: // MV Cost in U4U4 format: // No cost : 0, 0, 0, 0, 0, 0, 0, 0 // Low Cost : 1, 4, 5, 9, 10, 12, 14, 15 // Normal Cost: 5, 26, 29, 43, 45, 47, 57, 57 // High Cost : 29, 61, 72, 78, 88, 89, 91, 92 uint2 MVCostTable; if (search_cost_penalty == 1) { MVCostTable.s0 = 0x09050401; MVCostTable.s1 = 0x0F0E0C0A; } else if (search_cost_penalty == 2) { MVCostTable.s0 = 0x2B1D1A05; MVCostTable.s1 = 0x39392F2D; } else if (search_cost_penalty == 3) { MVCostTable.s0 = 0x4E483D1D; MVCostTable.s1 = 0x5C5B5958; } else { MVCostTable.s0 = 0; MVCostTable.s1 = 0; } uint MVCostPrecision = ((uint)search_cost_precision) << 16; // Frame is divided into rows * columns of MBs. // One h/w thread per WG. // One WG processes 'row' MBs - one row per iteration and one MB per row. // Number of WGs (or h/w threads) is number of columns MBs // Each iteration processes the MB in a row - gid_0 is the MB id in a row and // gid_1 is the row offset. int sid_0 = stride * get_group_id(0); int gid_0 = sid_0 / height; int gid_1 = sid_0 % height; for (int sid = sid_0; sid < sid_0 + stride && gid_0 < width && gid_1 < height; sid++, gid_0 = sid / height, gid_1 = sid % height) { int2 srcCoord; srcCoord.x = gid_0 * 16 + get_global_offset(0); // 16 pixels wide MBs (globally scalar) srcCoord.y = gid_1 * 16 + get_global_offset(1); // 16 pixels tall MBs (globally scalar) uint curMB = gid_0 + gid_1 * width; // current MB id short2 count = 0; if(count_motion_vector_buffer != NULL) count = count_motion_vector_buffer[curMB]; int countPredMVs = count.x; if (countPredMVs != 0) { uint offset = curMB * 8; // 8 predictors per MB offset += get_local_id(0) % 8; // 16 work-items access 8 MVs for MB // one predictor for MB per SIMD channel // Reduce predictors from Q-pixel to integer precision. int2 predMV = 0; if (get_local_id(0) < countPredMVs) { if(predictors_buffer != NULL){ predMV = convert_int2(predictors_buffer[offset]); // one MV per work-item predMV.x /= 4; predMV.y /= 4; predMV.y &= 0xFFFE;} } // Do up to 8 IMEs, get the best MVs and their distortions, and optionally // a FBR of the best MVs. // Finally the results are written out to SLM. intel_work_group_vme_mb_multi_query_8( dstSearch, // best search MV and its distortions into SLM countPredMVs, // count of predictor MVs (globally scalar - value range // 1 to 8) MVCostPrecision, // MV cost precision MVCostTable, // MV cost table srcCoord, // MB 2-D offset (globally scalar) predMV, // predictor MVs (up to 8 distinct MVs for SIMD16 thread) srcImg, // source refImg, // reference accelerator); // vme object } int doIntra = (flags & 0x2) != 0; int intraEdges = 0; if (doIntra) { // Enable all edges by default. intraEdges = 0x3C; // If this is a left-edge MB, then disable left edges. if ((gid_0 == 0) & (get_global_offset(0) == 0)) { intraEdges &= 0x18; } // If this is a right edge MB then disable right edges. if (gid_0 == width - 1) { intraEdges &= 0x34; } // If this is a top-edge MB, then disable top edges. if ((gid_1 == 0) & (get_global_offset(1) == 0)) { intraEdges &= 0x20; } // Set bit6=bit5. intraEdges |= ((intraEdges & 0x20) << 1); intraEdges <<= 8; } int countSkipMVs = count.y; if (countSkipMVs != 0 || doIntra == true) { uint offset = curMB * 8; // 8 sets of skip check MVs per MB offset += (get_local_id(0) % 8); // 16 work-items access 8 sets of MVs for MB // one set of skip MV per SIMD channel // Do up to 8 skip checks and get the distortions for each of them. // Finally the results are written out to SLM. if ((skip_block_type == 0x0) | ((doIntra) & (countSkipMVs == 0))) { int skipMVs = 0; if (get_local_id(0) < countSkipMVs) { if(skip_motion_vector_buffer != NULL ) { __global int *skip1_motion_vector_buffer = (__global int *)skip_motion_vector_buffer; skipMVs = skip1_motion_vector_buffer[offset]; } // one packed MV for one // work-item } intel_work_group_vme_mb_multi_check_16x16( dstSkipIntra, // distortions into SLM countSkipMVs, // count of skip check MVs (value range 0 to 8) doIntra, // compute intra modes intraEdges, // intra edges to use srcCoord, // MB 2-D offset (globally scalar) skipMVs, // skip check MVs (up to 8 sets of skip check MVs for // SIMD16 thread) srcImg, // source refImg, // reference intraSrcImg, // intra source accelerator); } if ((skip_block_type == 0x1) & (countSkipMVs > 0)) { int4 skipMVs = 0; if (get_local_id(0) < countSkipMVs) { if(skip_motion_vector_buffer != NULL){ __global int4 *skip4_motion_vector_buffer = (__global int4 *)(skip_motion_vector_buffer); skipMVs = skip4_motion_vector_buffer[offset]; } // four component MVs // per work-item } intel_work_group_vme_mb_multi_check_8x8( dstSkipIntra, // distortions into SLM countSkipMVs, // count of skip check MVs per MB (value range 0 to 8) doIntra, // compute intra modes intraEdges, // intra edges to use srcCoord, // MB 2-D offset (globally scalar) skipMVs, // skip check MVs (up to 8 ets of skip check MVs for SIMD16 // thread) srcImg, // source refImg, // reference intraSrcImg, // intra source accelerator); } } barrier(CLK_LOCAL_MEM_FENCE); // Write Out motion estimation result: // Result format // Hierarchical row-major layout // i.e. row-major of blocks MVs in MBs, and row-major of 8 sets of // MVs/distortion in blocks if (countPredMVs != 0) { // 4x4 if (intel_get_accelerator_mb_block_type(accelerator) == 0x2) { int index = (gid_0 * 16 + get_local_id(0)) + (gid_1 * 16 * width); // 1. 16 work-items enabled. // 2. Work-items gather fwd MVs in strided dword locations 0, 2, .., 30 // (interleaved // fwd/bdw MVs) with constant offset 8 (control data size) from SLM // into contiguous // short2 locations 0, 1, .., 15 of global buffer // search_motion_vector_buffer with // offset index. // 3. Work-items gather contiguous ushort locations 0, 1, .., 15 from // distSearch into // contiguous ushort locations 0, 1, .., 15 of search_residuals with // offset index. short2 val = as_short2(dstSearch[8 + get_local_id(0) * 2]); if(motion_vector_buffer != NULL) motion_vector_buffer[index] = val; if (residuals != NULL) { residuals[index] = distSearch[get_local_id(0)]; } } // 8x8 else if (intel_get_accelerator_mb_block_type(accelerator) == 0x1) { // Only 1st 4 work-item are needed. if (get_local_id(0) < 4) { int index = (gid_0 * 4 + get_local_id(0)) + (gid_1 * 4 * width); // 1. 4 work-items enabled. // 2. Work-items gather fw MVs in strided dword locations 0, 8, 16, 24 // (interleaved // fwd/bdw MVs) with constant offset 8 from SLM into contiguous // short2 locations // 0, 1, .., 15 of global buffer search_motion_vector_buffer with // offset index. // 3. Work-items gather strided ushort locations 0, 4, 8, 12 from // distSearch into // contiguous ushort locations 0, 1, .., 15 of search_residuals // with offset index. short2 val = as_short2(dstSearch[8 + get_local_id(0) * 4 * 2]); if(motion_vector_buffer != NULL) motion_vector_buffer[index] = val; if (residuals != NULL) { residuals[index] = distSearch[get_local_id(0) * 4]; } } } // 16x16 else if (intel_get_accelerator_mb_block_type(accelerator) == 0x0) { // One 1st work is needed. if (get_local_id(0) == 0) { int index = gid_0 + gid_1 * width; // 1. 1 work-item enabled. // 2. Work-item gathers fwd MV in dword location 0 with constant // offset 8 from // SLM into short2 locations 0 of global buffer // search_motion_vector_buffer. // 3. Work-item gathers ushort location 0 from distSearch into ushort // location 0 of search_residuals with offset index. short2 val = as_short2(dstSearch[8]); if(motion_vector_buffer != NULL) motion_vector_buffer[index] = val; if (residuals != NULL) { residuals[index] = distSearch[0]; } } } } // Write out motion skip check result: // Result format // Hierarchical row-major layout // i.e. row-major of blocks in MBs, and row-major of 8 sets of // distortions in blocks if (countSkipMVs != 0) { if (skip_block_type == 0x0) { // Copy out 8 (1 component) sets of distortion values. int index = (gid_0 * 8) + (get_local_id(0)) + (gid_1 * 8 * width); if (get_local_id(0) < countSkipMVs) { __local ushort *distSkip = (__local ushort *)&dstSkipIntra[0]; // 1. Up to 8 work-items are enabled. // 2. The work-item gathers distSkip locations 0, 16*1, .., 16*7 and // copies them to contiguous skip_residual locations 0, 1, 2, .., // 7. if(skip_residuals != NULL) skip_residuals[index] = distSkip[get_local_id(0) * 16]; } } else { // Copy out 8 (4 component) sets of distortion values. int index = (gid_0 * 8 * 4) + (get_local_id(0)) + (gid_1 * 8 * 4 * width); __local ushort *distSkip = (__local ushort *)&dstSkipIntra[0]; if (get_local_id(0) < countSkipMVs * 4) { // 1. Up to 16 work-items are enabled. // 2. The work-item gathers distSkip locations 0, 4*1, .., 4*31 and // copies them to contiguous skip_residual locations 0, 1, 2, .., // 31. if(skip_residuals != NULL){ skip_residuals[index] = distSkip[get_local_id(0) * 4]; skip_residuals[index + 16] = distSkip[(get_local_id(0) + 16) * 4];} } } } // Write out intra search result: if (doIntra) { int index_low = (gid_0 * 22) + (get_local_id(0) * 2) + (gid_1 * 22 * width); int index_high = (gid_0 * 22) + (get_local_id(0) * 2) + 1 + (gid_1 * 22 * width); // Write out the 4x4 intra modes if (get_local_id(0) < 8) { __local char *dstIntra_4x4 = (__local char *)(&dstSkipIntra[64 + 16 + 4]); char value = dstIntra_4x4[get_local_id(0)]; char value_low = (value)&0xf; char value_high = (value >> 4) & 0xf; if(intra_search_predictor_modes != NULL){ intra_search_predictor_modes[index_low + 5] = value_low; intra_search_predictor_modes[index_high + 5] = value_high;} } // Write out the 8x8 intra modes if (get_local_id(0) < 4) { __local char *dstIntra_8x8 = (__local char *)(&dstSkipIntra[64 + 8 + 4]); char value = dstIntra_8x8[get_local_id(0) * 2]; char value_low = (value)&0xf; int index = (gid_0 * 22) + (get_local_id(0)) + (gid_1 * 22 * width); if(intra_search_predictor_modes != NULL) intra_search_predictor_modes[index + 1] = value_low; } // Write out the 16x16 intra modes if (get_local_id(0) < 1) { __local char *dstIntra_16x16 = (__local char *)(&dstSkipIntra[64 + 0 + 4]); char value = dstIntra_16x16[get_local_id(0)]; char value_low = (value)&0xf; if(intra_search_predictor_modes != NULL) intra_search_predictor_modes[index_low] = value_low; } // Get the intra residuals. if (intra_residuals != NULL) { int index = (gid_0 * 4) + (gid_1 * 4 * width); if (get_local_id(0) < 1) { __local ushort *distIntra_4x4 = (__local ushort *)(&dstSkipIntra[64 + 16 + 3]); __local ushort *distIntra_8x8 = (__local ushort *)(&dstSkipIntra[64 + 8 + 3]); __local ushort *distIntra_16x16 = (__local ushort *)(&dstSkipIntra[64 + 0 + 3]); intra_residuals[index + 2] = distIntra_4x4[0]; intra_residuals[index + 1] = distIntra_8x8[0]; intra_residuals[index + 0] = distIntra_16x16[0]; } } } } } )===" vme_block_advanced_motion_estimate_check_intel_frontend.builtin_kernel000066400000000000000000000013251422164147700417340ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/built_ins/kernels/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( __kernel __attribute__((reqd_work_group_size(16, 1, 1))) void block_advanced_motion_estimate_check_intel( sampler_t accelerator, __read_only image2d_t srcImg, __read_only image2d_t refImg, uint flags, uint skip_block_type, uint search_cost_penalty, uint search_cost_precision, __global short2 *count_motion_vector_buffer, __global short2 *predictors_buffer, __global short2 *skip_motion_vector_buffer, __global short2 *motion_vector_buffer, __global char *intra_search_predictor_modes, __global ushort *residuals, __global ushort *skip_residuals, __global ushort *intra_residuals) { } )===" vme_block_motion_estimate_intel.builtin_kernel000066400000000000000000000054461422164147700350630ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/built_ins/kernels/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( __kernel __attribute__((reqd_work_group_size(16, 1, 1))) void block_motion_estimate_intel(sampler_t accelerator, __read_only image2d_t srcImg, __read_only image2d_t refImg, __global short2 *prediction_motion_vector_buffer, __global short2 *motion_vector_buffer, __global ushort *residuals, int height, int width, int stride) { __local uint dst[64]; __local ushort *dist = (__local ushort *)&dst[8 * 5]; int sid_0 = stride * get_group_id(0); int gid_0 = sid_0 / height; int gid_1 = sid_0 % height; for (int sid = sid_0; sid < sid_0 + stride && gid_0 < width && gid_1 < height; sid++, gid_0 = sid / height, gid_1 = sid % height) { int2 srcCoord = 0; int2 refCoord = 0; srcCoord.x = gid_0 * 16 + get_global_offset(0); srcCoord.y = gid_1 * 16 + get_global_offset(1); short2 predMV = 0; #ifndef HW_NULL_CHECK if (prediction_motion_vector_buffer != NULL) #endif { predMV = prediction_motion_vector_buffer[gid_0 + gid_1 * width]; refCoord.x = predMV.x / 4; refCoord.y = predMV.y / 4; refCoord.y = refCoord.y & 0xFFFE; } { intel_work_group_vme_mb_query(dst, srcCoord, refCoord, srcImg, refImg, accelerator); } barrier(CLK_LOCAL_MEM_FENCE); // Write Out Result // 4x4 if (intel_get_accelerator_mb_block_type(accelerator) == 0x2) { int x = get_local_id(0) % 4; int y = get_local_id(0) / 4; int index = (gid_0 * 4 + x) + (gid_1 * 4 + y) * width * 4; short2 val = as_short2(dst[8 + (y * 4 + x) * 2]); motion_vector_buffer[index] = val; #ifndef HW_NULL_CHECK if (residuals != NULL) #endif { residuals[index] = dist[y * 4 + x]; } } // 8x8 if (intel_get_accelerator_mb_block_type(accelerator) == 0x1) { if (get_local_id(0) < 4) { int x = get_local_id(0) % 2; int y = get_local_id(0) / 2; int index = (gid_0 * 2 + x) + (gid_1 * 2 + y) * width * 2; short2 val = as_short2(dst[8 + (y * 2 + x) * 8]); motion_vector_buffer[index] = val; #ifndef HW_NULL_CHECK if (residuals != NULL) #endif { residuals[index] = dist[(y * 2 + x) * 4]; } } } // 16x16 if (intel_get_accelerator_mb_block_type(accelerator) == 0x0) { if (get_local_id(0) == 0) { int index = gid_0 + gid_1 * width; short2 val = as_short2(dst[8]); motion_vector_buffer[index] = val; #ifndef HW_NULL_CHECK if (residuals != NULL) #endif { residuals[index] = dist[0]; } } } } } )===" vme_block_motion_estimate_intel_frontend.builtin_kernel000066400000000000000000000007761422164147700367630ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/built_ins/kernels/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( __kernel __attribute__((reqd_work_group_size(16, 1, 1))) void block_motion_estimate_intel(sampler_t accelerator, __read_only image2d_t srcImg, __read_only image2d_t refImg, __global short2 *prediction_motion_vector_buffer, __global short2 *motion_vector_buffer, __global ushort *residuals) { } )===" compute-runtime-22.14.22890/opencl/source/built_ins/populate_built_ins.inl000066400000000000000000000013611422164147700265300ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/cl_device/cl_device.h" namespace NEO { template void BuiltinDispatchInfoBuilder::populate(EBuiltInOps::Type op, ConstStringRef options, KernelsDescArgsT &&...desc) { auto src = kernelsLib.getBuiltinsLib().getBuiltinCode(op, BuiltinCode::ECodeType::Any, clDevice.getDevice()); ClDeviceVector deviceVector; deviceVector.push_back(&clDevice); prog.reset(BuiltinDispatchInfoBuilder::createProgramFromCode(src, deviceVector).release()); prog->build(deviceVector, options.data(), kernelsLib.isCacheingEnabled()); grabKernels(std::forward(desc)...); } } // namespace NEO compute-runtime-22.14.22890/opencl/source/built_ins/registry/000077500000000000000000000000001422164147700237725ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/built_ins/registry/CMakeLists.txt000066400000000000000000000010461422164147700265330ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # add_library(${BUILTINS_VME_LIB_NAME} OBJECT EXCLUDE_FROM_ALL CMakeLists.txt register_ext_vme_source.cpp ) set_target_properties(${BUILTINS_VME_LIB_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON) set_target_properties(${BUILTINS_VME_LIB_NAME} PROPERTIES FOLDER "${OPENCL_RUNTIME_PROJECTS_FOLDER}/${OPENCL_BUILTINS_PROJECTS_FOLDER}") target_include_directories(${BUILTINS_VME_LIB_NAME} PRIVATE ${KHRONOS_HEADERS_DIR} ) compute-runtime-22.14.22890/opencl/source/built_ins/registry/register_ext_vme_source.cpp000066400000000000000000000025451422164147700314370ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/registry/built_ins_registry.h" #include "opencl/source/built_ins/built_in_ops_vme.h" #include namespace NEO { static RegisterEmbeddedResource registerVmeSrc( createBuiltinResourceName( EBuiltInOps::VmeBlockMotionEstimateIntel, BuiltinCode::getExtension(BuiltinCode::ECodeType::Source)) .c_str(), std::string( #include "opencl/source/built_ins/kernels/vme_block_motion_estimate_intel.builtin_kernel" )); static RegisterEmbeddedResource registerVmeAdvancedSrc( createBuiltinResourceName( EBuiltInOps::VmeBlockAdvancedMotionEstimateCheckIntel, BuiltinCode::getExtension(BuiltinCode::ECodeType::Source)) .c_str(), std::string( #include "opencl/source/built_ins/kernels/vme_block_advanced_motion_estimate_check_intel.builtin_kernel" )); static RegisterEmbeddedResource registerVmeAdvancedBidirectionalSrc( createBuiltinResourceName( EBuiltInOps::VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel, BuiltinCode::getExtension(BuiltinCode::ECodeType::Source)) .c_str(), std::string( #include "opencl/source/built_ins/kernels/vme_block_advanced_motion_estimate_bidirectional_check_intel.builtin_kernel" )); } // namespace NEO compute-runtime-22.14.22890/opencl/source/built_ins/unknown_built_in.cpp000066400000000000000000000006741422164147700262210ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" namespace NEO { BuiltinDispatchInfoBuilder &BuiltInDispatchBuilderOp::getUnknownDispatchInfoBuilder(EBuiltInOps::Type operation, ClDevice &device) { throw std::runtime_error("getBuiltinDispatchInfoBuilder failed"); } } // namespace NEO compute-runtime-22.14.22890/opencl/source/built_ins/vme_builtin.cpp000066400000000000000000000126351422164147700251520ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/built_ins/vme_builtin.h" #include "shared/source/built_ins/built_ins.h" #include "shared/source/device/device.h" #include "opencl/source/built_ins/built_in_ops_vme.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/built_ins/populate_built_ins.inl" #include "opencl/source/built_ins/vme_dispatch_builder.h" #include "opencl/source/execution_environment/cl_execution_environment.h" #include "opencl/source/program/program.h" namespace NEO { static const char *blockMotionEstimateIntelSrc = { #include "kernels/vme_block_motion_estimate_intel_frontend.builtin_kernel" }; static const char *blockAdvancedMotionEstimateCheckIntelSrc = { #include "kernels/vme_block_advanced_motion_estimate_check_intel_frontend.builtin_kernel" }; static const char *blockAdvancedMotionEstimateBidirectionalCheckIntelSrc = { #include "kernels/vme_block_advanced_motion_estimate_bidirectional_check_intel_frontend.builtin_kernel" }; static const std::tuple mediaBuiltIns[] = { {"block_motion_estimate_intel", blockMotionEstimateIntelSrc}, {"block_advanced_motion_estimate_check_intel", blockAdvancedMotionEstimateCheckIntelSrc}, {"block_advanced_motion_estimate_bidirectional_check_intel", blockAdvancedMotionEstimateBidirectionalCheckIntelSrc}}; // Unlike other built-ins media kernels are not stored in BuiltIns object. // Pointer to program with built in kernels is returned to the user through API // call and user is responsible for releasing it by calling clReleaseProgram. Program *Vme::createBuiltInProgram( Context &context, const ClDeviceVector &deviceVector, const char *kernelNames, int &errcodeRet) { std::string programSourceStr = ""; std::istringstream ss(kernelNames); std::string currentKernelName; while (std::getline(ss, currentKernelName, ';')) { bool found = false; for (auto &builtInTuple : mediaBuiltIns) { if (currentKernelName == std::get<0>(builtInTuple)) { programSourceStr += std::get<1>(builtInTuple); found = true; break; } } if (!found) { errcodeRet = CL_INVALID_VALUE; return nullptr; } } if (programSourceStr.empty() == true) { errcodeRet = CL_INVALID_VALUE; return nullptr; } Program *pBuiltInProgram = nullptr; pBuiltInProgram = Program::createBuiltInFromSource(programSourceStr.c_str(), &context, deviceVector, nullptr); auto &device = *deviceVector[0]; if (pBuiltInProgram) { std::unordered_map builtinsBuilders; builtinsBuilders["block_motion_estimate_intel"] = &Vme::getBuiltinDispatchInfoBuilder(EBuiltInOps::VmeBlockMotionEstimateIntel, device); builtinsBuilders["block_advanced_motion_estimate_check_intel"] = &Vme::getBuiltinDispatchInfoBuilder(EBuiltInOps::VmeBlockAdvancedMotionEstimateCheckIntel, device); builtinsBuilders["block_advanced_motion_estimate_bidirectional_check_intel"] = &Vme::getBuiltinDispatchInfoBuilder(EBuiltInOps::VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel, device); errcodeRet = pBuiltInProgram->build(deviceVector, mediaKernelsBuildOptions, true, builtinsBuilders); } else { errcodeRet = CL_INVALID_VALUE; } return pBuiltInProgram; } const char *getAdditionalBuiltinAsString(EBuiltInOps::Type builtin) { switch (builtin) { default: return nullptr; case EBuiltInOps::VmeBlockMotionEstimateIntel: return "vme_block_motion_estimate_intel.builtin_kernel"; case EBuiltInOps::VmeBlockAdvancedMotionEstimateCheckIntel: return "vme_block_advanced_motion_estimate_check_intel.builtin_kernel"; case EBuiltInOps::VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel: return "vme_block_advanced_motion_estimate_bidirectional_check_intel"; } } BuiltinDispatchInfoBuilder &Vme::getBuiltinDispatchInfoBuilder(EBuiltInOps::Type operation, ClDevice &device) { auto &builtins = *device.getDevice().getBuiltIns(); uint32_t operationId = static_cast(operation); auto clExecutionEnvironment = static_cast(device.getExecutionEnvironment()); auto &operationBuilder = clExecutionEnvironment->peekBuilders(device.getRootDeviceIndex())[operationId]; switch (operation) { default: return BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(operation, device); case EBuiltInOps::VmeBlockMotionEstimateIntel: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(builtins, device); }); break; case EBuiltInOps::VmeBlockAdvancedMotionEstimateCheckIntel: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(builtins, device); }); break; case EBuiltInOps::VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(builtins, device); }); break; } return *operationBuilder.first; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/built_ins/vme_builtin.h000066400000000000000000000011221422164147700246040ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/built_ins/built_in_ops_vme.h" namespace NEO { class Program; class ClDevice; class ClDeviceVector; class Context; class BuiltIns; class BuiltinDispatchInfoBuilder; namespace Vme { Program *createBuiltInProgram( Context &context, const ClDeviceVector &deviceVector, const char *kernelNames, int &errcodeRet); BuiltinDispatchInfoBuilder &getBuiltinDispatchInfoBuilder(EBuiltInOps::Type operation, ClDevice &device); } // namespace Vme } // namespace NEOcompute-runtime-22.14.22890/opencl/source/built_ins/vme_dispatch_builder.h000066400000000000000000000526341422164147700264610ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "opencl/source/accelerators/intel_accelerator.h" #include "opencl/source/accelerators/intel_motion_estimation.h" #include "opencl/source/built_ins/built_in_ops_vme.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/helpers/dispatch_info_builder.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" namespace NEO { class VmeBuiltinDispatchInfoBuilder : public BuiltinDispatchInfoBuilder { public: VmeBuiltinDispatchInfoBuilder(BuiltIns &kernelsLib, ClDevice &device, EBuiltInOps::Type builtinOp, const char *kernelName) : BuiltinDispatchInfoBuilder(kernelsLib, device) { populate(builtinOp, mediaKernelsBuildOptions, kernelName, multiDeviceVmeKernel); auto rootDeviceIndex = device.getRootDeviceIndex(); vmeKernel = multiDeviceVmeKernel->getKernel(rootDeviceIndex); widthArgNum = vmeKernel->getKernelInfo().getArgNumByName("width"); heightArgNum = vmeKernel->getKernelInfo().getArgNumByName("height"); strideArgNum = vmeKernel->getKernelInfo().getArgNumByName("stride"); acceleratorArgNum = vmeKernel->getKernelInfo().getArgNumByName("accelerator"); srcImgArgNum = vmeKernel->getKernelInfo().getArgNumByName("srcImg"); refImgArgNum = vmeKernel->getKernelInfo().getArgNumByName("refImg"); motionVectorBufferArgNum = vmeKernel->getKernelInfo().getArgNumByName("motion_vector_buffer"); predictionMotionVectorBufferArgNum = vmeKernel->getKernelInfo().getArgNumByName("prediction_motion_vector_buffer"); residualsArgNum = vmeKernel->getKernelInfo().getArgNumByName("residuals"); } void getBlkTraits(const Vec3 &inGws, size_t &gwWidthInBlk, size_t &gwHeightInBlk) const { const size_t vmeMacroBlockWidth = 16; const size_t vmeMacroBlockHeight = 16; gwWidthInBlk = Math::divideAndRoundUp(inGws.x, vmeMacroBlockWidth); gwHeightInBlk = Math::divideAndRoundUp(inGws.y, vmeMacroBlockHeight); } bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo, Kernel *kern, const uint32_t inDim, const Vec3 &inGws, const Vec3 &inLws, const Vec3 &inOffset) const override { if (kern == nullptr) { return false; } size_t gwWidthInBlk = 0; size_t gwHeightInBlk = 0; getBlkTraits(inGws, gwWidthInBlk, gwHeightInBlk); cl_int height = (cl_int)gwHeightInBlk; cl_int width = (cl_int)gwWidthInBlk; cl_int stride = height; size_t numThreadsX = gwWidthInBlk; const size_t simdWidth = vmeKernel->getKernelInfo().getMaxSimdSize(); stride = static_cast(Math::divideAndRoundUp(height * width, numThreadsX)); // update implicit args vmeKernel->setArg(heightArgNum, sizeof(height), &height); vmeKernel->setArg(widthArgNum, sizeof(width), &width); vmeKernel->setArg(strideArgNum, sizeof(stride), &stride); // Update global work size to force macro-block to HW thread execution model Vec3 gws = {numThreadsX * simdWidth, 1, 1}; Vec3 lws = {vmeKernel->getKernelInfo().kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0], 1, 1}; DispatchInfoBuilder builder(clDevice); builder.setDispatchGeometry(gws, lws, inOffset, gws, lws); builder.setKernel(vmeKernel); builder.bake(multiDispatchInfo); return true; } bool setExplicitArg(uint32_t argIndex, size_t argSize, const void *argVal, cl_int &err) const override { DEBUG_BREAK_IF(!((argIndex != widthArgNum) && (argIndex != heightArgNum) && (argIndex != strideArgNum))); if ((argIndex == acceleratorArgNum) && (argVal == nullptr)) { err = CL_INVALID_ACCELERATOR_INTEL; return false; } err = vmeKernel->setArg(argIndex, argSize, argVal); return false; } cl_int validateDispatch(Kernel *kernel, uint32_t inworkDim, const Vec3 &inGws, const Vec3 &inLws, const Vec3 &inOffset) const override { if (inworkDim != 2) { return CL_INVALID_WORK_DIMENSION; } size_t gwWidthInBlk = 0; size_t gwHeightInBlk = 0; getBlkTraits(inGws, gwWidthInBlk, gwHeightInBlk); size_t BlkNum = gwWidthInBlk * gwHeightInBlk; size_t BlkMul = 1; IntelAccelerator *accelerator = castToObject((cl_accelerator_intel)vmeKernel->getKernelArg(acceleratorArgNum)); if (accelerator == nullptr) { return CL_INVALID_KERNEL_ARGS; // accelerator was not set } DEBUG_BREAK_IF(accelerator->getDescriptorSize() != sizeof(cl_motion_estimation_desc_intel)); const cl_motion_estimation_desc_intel *acceleratorDesc = reinterpret_cast(accelerator->getDescriptor()); switch (acceleratorDesc->mb_block_type) { case CL_ME_MB_TYPE_8x8_INTEL: BlkMul = 4; break; case CL_ME_MB_TYPE_4x4_INTEL: BlkMul = 16; break; default: break; } return validateVmeDispatch(inGws, inOffset, BlkNum, BlkMul); } // notes on corner cases : // * if arg not available in kernels - returns true // * if arg set to nullptr - returns true bool validateBufferSize(int32_t bufferArgNum, size_t minimumSizeExpected) const { if (bufferArgNum == -1) { return true; } auto buff = castToObject((cl_mem)vmeKernel->getKernelArg(bufferArgNum)); if (buff == nullptr) { return true; } size_t bufferSize = buff->getSize(); if (bufferSize < minimumSizeExpected) { return false; } return true; } template bool validateEnumVal(EnumBaseType val) const { return false; } template bool validateEnumVal(EnumBaseType val, ExpectedValType expectedVal, ExpectedValsTypes... expVals) const { return (val == static_cast(expectedVal)) || validateEnumVal(val, expVals...); } // notes on corner cases : // * if arg not available in kernels - returns true template bool validateEnumArg(int32_t argNum, ExpectedValsTypes... expVals) const { if (argNum == -1) { return true; } EnumBaseType val = this->getKernelArgByValValue(static_cast(argNum)); return validateEnumVal(val, expVals...); } template RetType getKernelArgByValValue(uint32_t argNum) const { const auto &argAsVal = vmeKernel->getKernelInfo().kernelDescriptor.payloadMappings.explicitArgs[argNum].as(); DEBUG_BREAK_IF(argAsVal.elements.size() != 1); const auto &element = argAsVal.elements[0]; DEBUG_BREAK_IF(sizeof(RetType) > element.size); return *(RetType *)(vmeKernel->getCrossThreadData() + element.offset); } cl_int validateImages(const Vec3 &inputRegion, const Vec3 &offset) const { Image *srcImg = castToObject((cl_mem)vmeKernel->getKernelArg(srcImgArgNum)); Image *refImg = castToObject((cl_mem)vmeKernel->getKernelArg(refImgArgNum)); if ((srcImg == nullptr) || (refImg == nullptr)) { return CL_INVALID_KERNEL_ARGS; } for (Image *img : {srcImg, refImg}) { const cl_image_format &imgFormat = img->getImageFormat(); if ((imgFormat.image_channel_order != CL_R) || (imgFormat.image_channel_data_type != CL_UNORM_INT8)) { return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; } if (false == img->isTiledAllocation()) { //VME only works with tiled images. return CL_OUT_OF_RESOURCES; } } { const cl_image_desc &srcImgDesc = srcImg->getImageDesc(); size_t srcImageWidth = srcImgDesc.image_width; size_t srcImageHeight = srcImgDesc.image_height; if (((inputRegion.x + offset.x) > srcImageWidth) || ((inputRegion.y + offset.y) > srcImageHeight)) { return CL_INVALID_IMAGE_SIZE; } } return CL_SUCCESS; } virtual cl_int validateVmeDispatch(const Vec3 &inputRegion, const Vec3 &offset, size_t blkNum, size_t blkMul) const { { cl_int imageValidationStatus = validateImages(inputRegion, offset); if (imageValidationStatus != CL_SUCCESS) { return imageValidationStatus; } } size_t numPredictors = 1; std::pair bufferRequirements[] = { std::make_pair(motionVectorBufferArgNum, (blkNum * blkMul * 2 * sizeof(cl_short))), std::make_pair(predictionMotionVectorBufferArgNum, (blkNum * numPredictors * 2 * sizeof(cl_short))), std::make_pair(residualsArgNum, (blkNum * blkMul * sizeof(cl_ushort)))}; for (const auto &req : bufferRequirements) { if (false == validateBufferSize(req.first, req.second)) { return CL_INVALID_BUFFER_SIZE; } } return CL_SUCCESS; } protected: uint32_t heightArgNum; uint32_t widthArgNum; uint32_t strideArgNum; uint32_t acceleratorArgNum; uint32_t srcImgArgNum; uint32_t refImgArgNum; int32_t motionVectorBufferArgNum; int32_t predictionMotionVectorBufferArgNum; int32_t residualsArgNum; MultiDeviceKernel *multiDeviceVmeKernel; Kernel *vmeKernel; }; template <> class BuiltInOp : public VmeBuiltinDispatchInfoBuilder { public: BuiltInOp(BuiltIns &kernelsLib, ClDevice &device) : VmeBuiltinDispatchInfoBuilder(kernelsLib, device, EBuiltInOps::VmeBlockMotionEstimateIntel, "block_motion_estimate_intel") { } }; class AdvancedVmeBuiltinDispatchInfoBuilder : public VmeBuiltinDispatchInfoBuilder { public: AdvancedVmeBuiltinDispatchInfoBuilder(BuiltIns &kernelsLib, ClDevice &device, EBuiltInOps::Type builtinOp, const char *kernelName) : VmeBuiltinDispatchInfoBuilder(kernelsLib, device, builtinOp, kernelName) { flagsArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("flags"); intraSrcImgArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("intraSrcImg"); skipBlockTypeArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("skip_block_type"); searchCostPenaltyArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("search_cost_penalty"); searchCostPrecisionArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("search_cost_precision"); bidirWeightArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("bidir_weight"); predictorsBufferArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("predictors_buffer"); countMotionVectorBufferArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("count_motion_vector_buffer"); skipMotionVectorBufferArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("skip_motion_vector_buffer"); intraSearchPredictorModesArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("intra_search_predictor_modes"); skipResidualsArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("skip_residuals"); intraResidualsArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("intra_residuals"); } bool setExplicitArg(uint32_t argIndex, size_t argSize, const void *argVal, cl_int &err) const override { DEBUG_BREAK_IF(argIndex == intraSrcImgArgNum); if (argIndex == this->srcImgArgNum) { // rebind also as media block image this->vmeKernel->setArg(intraSrcImgArgNum, argSize, argVal); } return VmeBuiltinDispatchInfoBuilder::setExplicitArg(argIndex, argSize, argVal, err); } virtual bool isBidirKernel() const { return false; } bool validateFlags(uint32_t &outSkipBlockType) const { uint32_t flagsVal = VmeBuiltinDispatchInfoBuilder::template getKernelArgByValValue(flagsArgNum); if ((flagsVal & CL_ME_CHROMA_INTRA_PREDICT_ENABLED_INTEL) == CL_ME_CHROMA_INTRA_PREDICT_ENABLED_INTEL) { return false; } if (flagsVal == CL_ME_SKIP_BLOCK_TYPE_16x16_INTEL) { outSkipBlockType = CL_ME_MB_TYPE_16x16_INTEL; } else if ((flagsVal & CL_ME_SKIP_BLOCK_TYPE_8x8_INTEL) == CL_ME_SKIP_BLOCK_TYPE_8x8_INTEL) { outSkipBlockType = CL_ME_MB_TYPE_8x8_INTEL; } return true; } bool validateSkipBlockTypeArg(uint32_t &outSkipBlockType) const { if (skipBlockTypeArgNum == -1) { return true; } outSkipBlockType = VmeBuiltinDispatchInfoBuilder::template getKernelArgByValValue(static_cast(skipBlockTypeArgNum)); switch (outSkipBlockType) { case CL_ME_MB_TYPE_16x16_INTEL: break; case CL_ME_MB_TYPE_8x8_INTEL: break; default: return false; ; } return true; } size_t getIntraSearchPredictorModesBuffExpSize(size_t blkNum) const { // vector size is 22 - 1 (16x16 luma block) + 4 (8x8 luma block) + 16 (4x4 luma block) + 1 (8x8 chroma block) int VectorSize = 22; size_t intraSearchPredictorModesBuffExpSize = blkNum * VectorSize; return intraSearchPredictorModesBuffExpSize; } size_t getSkipMotionVectorBufferExpSize(uint32_t skipBlockType, size_t blkNum) const { // vector size is either 1 (16x16 block) or 4 (8x8 block) // 0 to 8 skip MVs per MB // may be null if all MBs in frame have 0 skip check MVs in which case VME skip checks are not performed // layout assumes 4 (for bidir) or 8 (otherwise) skip check MVs per MB // row-major block layout; all MVs for a block are contiguous // buffer size depends on the block and frame size . int vectorSize = (skipBlockType == CL_ME_MB_TYPE_16x16_INTEL) ? 1 : 4; int numChecks = (isBidirKernel() ? 4 : 8); size_t skipMotionVectorBufferExpSize = blkNum * numChecks * vectorSize * 2 * sizeof(cl_short); return skipMotionVectorBufferExpSize; } size_t getSkipResidualsBuffExpSize(uint32_t skipBlockType, size_t blkNum) const { /* output buffer of vectors of unsigned short SAD adjusted values corresponding to the input skip check MVs may be null if skip_motion_vector_buffer is null vector size is either 1 (16x16 block) or 4 (8x8 block) 0 to 8 skip check residuals per MB layout always assumes 8 skip check residuals per MB row major block layout; all MVs for a block are contiguous buffer size depends on the block and frame size */ int vectorSize = 1; switch (skipBlockType) { case CL_ME_MB_TYPE_16x16_INTEL: vectorSize = 1; break; case CL_ME_MB_TYPE_8x8_INTEL: vectorSize = 4; break; default: break; }; int numChecks = (isBidirKernel() ? 4 : 8); size_t skipResidualsBuffExpSize = blkNum * vectorSize * numChecks * sizeof(cl_ushort); return skipResidualsBuffExpSize; } size_t getIntraResidualsBuffExpSize(size_t blkNum) const { /* output buffer of vectors of unsigned short SAD adjusted values may be null in which case the intra residuals corresponding not returned vector size is 4 - 1 (16x16 luma block) + 1 (8x8 luma block) + 1 (4x4 luma block) + 1 (8x8 chroma block) 1 vector per MB buffer size depends on the frame size */ int vectorSize = 4; size_t intraResidualsBuffExpSize = (blkNum * sizeof(cl_ushort) * vectorSize); return intraResidualsBuffExpSize; } size_t getPredictorsBufferExpSize(size_t blkNum) const { size_t numPredictors = 8; size_t predictorsBufferExpSize = (blkNum * numPredictors * 2 * sizeof(cl_short)); return predictorsBufferExpSize; } cl_int validateVmeDispatch(const Vec3 &inputRegion, const Vec3 &offset, size_t blkNum, size_t blkMul) const override { cl_int basicVmeValidationStatus = VmeBuiltinDispatchInfoBuilder::validateVmeDispatch(inputRegion, offset, blkNum, blkMul); if (basicVmeValidationStatus != CL_SUCCESS) { return basicVmeValidationStatus; } uint32_t skipBlockType = CL_ME_MB_TYPE_16x16_INTEL; if (false == validateFlags(skipBlockType)) { return CL_INVALID_KERNEL_ARGS; } if (false == validateSkipBlockTypeArg(skipBlockType)) { return CL_OUT_OF_RESOURCES; } if (false == VmeBuiltinDispatchInfoBuilder::template validateEnumArg(searchCostPenaltyArgNum, CL_ME_COST_PENALTY_NONE_INTEL, CL_ME_COST_PENALTY_LOW_INTEL, CL_ME_COST_PENALTY_NORMAL_INTEL, CL_ME_COST_PENALTY_HIGH_INTEL)) { return CL_OUT_OF_RESOURCES; } if (false == VmeBuiltinDispatchInfoBuilder::template validateEnumArg(searchCostPrecisionArgNum, CL_ME_COST_PRECISION_QPEL_INTEL, CL_ME_COST_PRECISION_HPEL_INTEL, CL_ME_COST_PRECISION_PEL_INTEL, CL_ME_COST_PRECISION_DPEL_INTEL)) { return CL_OUT_OF_RESOURCES; } if (false == VmeBuiltinDispatchInfoBuilder::template validateEnumArg(bidirWeightArgNum, 0, CL_ME_BIDIR_WEIGHT_QUARTER_INTEL, CL_ME_BIDIR_WEIGHT_THIRD_INTEL, CL_ME_BIDIR_WEIGHT_HALF_INTEL, CL_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL, CL_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL)) { return CL_INVALID_KERNEL_ARGS; } std::pair bufferRequirements[] = { std::make_pair(countMotionVectorBufferArgNum, (blkNum * 2 * sizeof(cl_short))), std::make_pair(skipMotionVectorBufferArgNum, getSkipMotionVectorBufferExpSize(skipBlockType, blkNum)), std::make_pair(intraSearchPredictorModesArgNum, getIntraSearchPredictorModesBuffExpSize(blkNum)), std::make_pair(skipResidualsArgNum, getSkipResidualsBuffExpSize(skipBlockType, blkNum)), std::make_pair(intraResidualsArgNum, getIntraResidualsBuffExpSize(blkNum)), std::make_pair(predictorsBufferArgNum, getPredictorsBufferExpSize(blkNum))}; for (const auto &req : bufferRequirements) { if (false == this->validateBufferSize(req.first, req.second)) { return CL_INVALID_BUFFER_SIZE; } } return CL_SUCCESS; } protected: uint32_t flagsArgNum; int32_t skipBlockTypeArgNum; uint32_t searchCostPenaltyArgNum; uint32_t searchCostPrecisionArgNum; int32_t bidirWeightArgNum; int32_t predictorsBufferArgNum; uint32_t countMotionVectorBufferArgNum; uint32_t skipMotionVectorBufferArgNum; uint32_t intraSearchPredictorModesArgNum; uint32_t skipResidualsArgNum; uint32_t intraResidualsArgNum; uint32_t intraSrcImgArgNum; }; template <> class BuiltInOp : public AdvancedVmeBuiltinDispatchInfoBuilder { public: BuiltInOp(BuiltIns &kernelsLib, ClDevice &device) : AdvancedVmeBuiltinDispatchInfoBuilder(kernelsLib, device, EBuiltInOps::VmeBlockAdvancedMotionEstimateCheckIntel, "block_advanced_motion_estimate_check_intel") { } cl_int validateVmeDispatch(const Vec3 &inputRegion, const Vec3 &offset, size_t gwWidthInBlk, size_t gwHeightInBlk) const override { cl_int basicAdvVmeValidationStatus = AdvancedVmeBuiltinDispatchInfoBuilder::validateVmeDispatch(inputRegion, offset, gwWidthInBlk, gwHeightInBlk); if (basicAdvVmeValidationStatus != CL_SUCCESS) { return basicAdvVmeValidationStatus; } auto countMotionVectorBuff = castToObject((cl_mem)this->vmeKernel->getKernelArg(this->countMotionVectorBufferArgNum)); if (countMotionVectorBuff == nullptr) { return CL_INVALID_BUFFER_SIZE; } return CL_SUCCESS; } }; template <> class BuiltInOp : public AdvancedVmeBuiltinDispatchInfoBuilder { public: BuiltInOp(BuiltIns &kernelsLib, ClDevice &device) : AdvancedVmeBuiltinDispatchInfoBuilder(kernelsLib, device, EBuiltInOps::VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel, "block_advanced_motion_estimate_bidirectional_check_intel") { } bool isBidirKernel() const override { return true; } }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/cl_device/000077500000000000000000000000001422164147700220475ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/cl_device/CMakeLists.txt000066400000000000000000000013331422164147700246070ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_CL_DEVICE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cl_device.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_device.h ${CMAKE_CURRENT_SOURCE_DIR}/cl_device_caps.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_device_get_cap.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_device_info.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_device_info.h ${CMAKE_CURRENT_SOURCE_DIR}/cl_device_info_map.h ${CMAKE_CURRENT_SOURCE_DIR}/cl_device_vector.h ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_CL_DEVICE}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_CL_DEVICE ${RUNTIME_SRCS_CL_DEVICE}) add_subdirectories() compute-runtime-22.14.22890/opencl/source/cl_device/cl_device.cpp000066400000000000000000000272761422164147700245060ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/cl_device/cl_device.h" #include "shared/source/compiler_interface/oclc_extensions.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/device/device.h" #include "shared/source/device/sub_device.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/string.h" #include "shared/source/os_interface/driver_info.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/source_level_debugger/source_level_debugger.h" #include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/source/platform/platform.h" namespace NEO { ClDevice::ClDevice(Device &device, ClDevice &rootClDevice, Platform *platform) : device(device), rootClDevice(rootClDevice), platformId(platform) { device.incRefInternal(); device.setSpecializedDevice(this); deviceExtensions.reserve(1000); name.reserve(100); auto osInterface = getRootDeviceEnvironment().osInterface.get(); driverInfo.reset(DriverInfo::create(&device.getHardwareInfo(), osInterface)); initializeCaps(); OpenClCFeaturesContainer emptyOpenClCFeatures; compilerExtensions = convertEnabledExtensionsToCompilerInternalOptions(deviceInfo.deviceExtensions, emptyOpenClCFeatures); compilerExtensionsWithFeatures = convertEnabledExtensionsToCompilerInternalOptions(deviceInfo.deviceExtensions, deviceInfo.openclCFeatures); for (auto &subDevice : device.getSubDevices()) { if (!subDevice) { continue; } auto pClSubDevice = std::make_unique(*subDevice, rootClDevice, platform); pClSubDevice->incRefInternal(); pClSubDevice->decRefApi(); auto &deviceInfo = pClSubDevice->deviceInfo; deviceInfo.parentDevice = this; deviceInfo.partitionType[0] = CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN; deviceInfo.partitionType[1] = CL_DEVICE_AFFINITY_DOMAIN_NUMA; deviceInfo.partitionType[2] = 0; subDevices.push_back(std::move(pClSubDevice)); } if (getSharedDeviceInfo().debuggerActive && getSourceLevelDebugger()) { auto osInterface = device.getRootDeviceEnvironment().osInterface.get(); getSourceLevelDebugger()->notifyNewDevice(osInterface ? osInterface->getDriverModel()->getDeviceHandle() : 0); } } ClDevice::ClDevice(Device &device, Platform *platformId) : ClDevice(device, *this, platformId) { } ClDevice::~ClDevice() { if (getSharedDeviceInfo().debuggerActive && getSourceLevelDebugger()) { getSourceLevelDebugger()->notifyDeviceDestruction(); } for (auto &subDevice : subDevices) { subDevice.reset(); } device.decRefInternal(); } void ClDevice::incRefInternal() { if (deviceInfo.parentDevice == nullptr) { BaseObject<_cl_device_id>::incRefInternal(); return; } auto pParentDevice = static_cast(deviceInfo.parentDevice); pParentDevice->incRefInternal(); } unique_ptr_if_unused ClDevice::decRefInternal() { if (deviceInfo.parentDevice == nullptr) { return BaseObject<_cl_device_id>::decRefInternal(); } auto pParentDevice = static_cast(deviceInfo.parentDevice); return pParentDevice->decRefInternal(); } void ClDevice::retainApi() { auto parentDeviceId = deviceInfo.parentDevice; if (parentDeviceId) { auto pParentClDevice = static_cast(parentDeviceId); pParentClDevice->incRefInternal(); this->incRefApi(); } }; unique_ptr_if_unused ClDevice::releaseApi() { auto parentDeviceId = deviceInfo.parentDevice; if (!parentDeviceId) { return unique_ptr_if_unused(this, false); } auto pParentClDevice = static_cast(parentDeviceId); pParentClDevice->decRefInternal(); return this->decRefApi(); } const DeviceInfo &ClDevice::getSharedDeviceInfo() const { return device.getDeviceInfo(); } ClDevice *ClDevice::getSubDevice(uint32_t deviceId) const { UNRECOVERABLE_IF(deviceId >= subDevices.size()); return subDevices[deviceId].get(); } ClDevice *ClDevice::getNearestGenericSubDevice(uint32_t deviceId) { /* * EngineInstanced: Upper level * Generic SubDevice: 'this' * RootCsr Device: Next level SubDevice (generic) */ if (getDevice().isEngineInstanced()) { return rootClDevice.getNearestGenericSubDevice(Math::log2(static_cast(getDeviceBitfield().to_ulong()))); } if (subDevices.empty() || !getDevice().hasRootCsr()) { return const_cast(this); } UNRECOVERABLE_IF(deviceId >= subDevices.size()); return subDevices[deviceId].get(); } bool ClDevice::getDeviceAndHostTimer(uint64_t *deviceTimestamp, uint64_t *hostTimestamp) const { return device.getDeviceAndHostTimer(deviceTimestamp, hostTimestamp); } bool ClDevice::getHostTimer(uint64_t *hostTimestamp) const { return device.getHostTimer(hostTimestamp); } const HardwareInfo &ClDevice::getHardwareInfo() const { return device.getHardwareInfo(); } EngineControl &ClDevice::getEngine(aub_stream::EngineType engineType, EngineUsage engineUsage) { return device.getEngine(engineType, engineUsage); } EngineControl &ClDevice::getDefaultEngine() { return device.getDefaultEngine(); } EngineControl &ClDevice::getInternalEngine() { return device.getInternalEngine(); } SelectorCopyEngine &ClDevice::getSelectorCopyEngine() { return device.getSelectorCopyEngine(); } MemoryManager *ClDevice::getMemoryManager() const { return device.getMemoryManager(); } GmmHelper *ClDevice::getGmmHelper() const { return device.getGmmHelper(); } GmmClientContext *ClDevice::getGmmClientContext() const { return device.getGmmClientContext(); } double ClDevice::getPlatformHostTimerResolution() const { return device.getPlatformHostTimerResolution(); } GFXCORE_FAMILY ClDevice::getRenderCoreFamily() const { return device.getRenderCoreFamily(); } PerformanceCounters *ClDevice::getPerformanceCounters() { return device.getPerformanceCounters(); } PreemptionMode ClDevice::getPreemptionMode() const { return device.getPreemptionMode(); } bool ClDevice::isDebuggerActive() const { return device.isDebuggerActive(); } Debugger *ClDevice::getDebugger() { return device.getDebugger(); } SourceLevelDebugger *ClDevice::getSourceLevelDebugger() { return device.getSourceLevelDebugger(); } ExecutionEnvironment *ClDevice::getExecutionEnvironment() const { return device.getExecutionEnvironment(); } const RootDeviceEnvironment &ClDevice::getRootDeviceEnvironment() const { return device.getRootDeviceEnvironment(); } bool ClDevice::isFullRangeSvm() const { return device.isFullRangeSvm(); } bool ClDevice::areSharedSystemAllocationsAllowed() const { return device.areSharedSystemAllocationsAllowed(); } uint32_t ClDevice::getRootDeviceIndex() const { return device.getRootDeviceIndex(); } uint32_t ClDevice::getNumGenericSubDevices() const { return device.getNumGenericSubDevices(); } uint32_t ClDevice::getNumSubDevices() const { return static_cast(subDevices.size()); } ClDeviceVector::ClDeviceVector(const cl_device_id *devices, cl_uint numDevices) { for (cl_uint i = 0; i < numDevices; i++) { auto pClDevice = castToObject(devices[i]); this->push_back(pClDevice); } } void ClDeviceVector::toDeviceIDs(std::vector &devIDs) const { int i = 0; devIDs.resize(this->size()); for (auto &it : *this) { devIDs[i] = it; i++; } } const std::string &ClDevice::peekCompilerExtensions() const { return compilerExtensions; } const std::string &ClDevice::peekCompilerExtensionsWithFeatures() const { return compilerExtensionsWithFeatures; } DeviceBitfield ClDevice::getDeviceBitfield() const { return device.getDeviceBitfield(); } bool ClDevice::arePipesSupported() const { if (DebugManager.flags.ForcePipeSupport.get() != -1) { return DebugManager.flags.ForcePipeSupport.get(); } return device.getHardwareInfo().capabilityTable.supportsPipes; } cl_command_queue_capabilities_intel ClDevice::getQueueFamilyCapabilitiesAll() { return CL_QUEUE_CAPABILITY_CREATE_SINGLE_QUEUE_EVENTS_INTEL | CL_QUEUE_CAPABILITY_CREATE_CROSS_QUEUE_EVENTS_INTEL | CL_QUEUE_CAPABILITY_SINGLE_QUEUE_EVENT_WAIT_LIST_INTEL | CL_QUEUE_CAPABILITY_CROSS_QUEUE_EVENT_WAIT_LIST_INTEL | CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL | CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_RECT_INTEL | CL_QUEUE_CAPABILITY_MAP_BUFFER_INTEL | CL_QUEUE_CAPABILITY_FILL_BUFFER_INTEL | CL_QUEUE_CAPABILITY_TRANSFER_IMAGE_INTEL | CL_QUEUE_CAPABILITY_MAP_IMAGE_INTEL | CL_QUEUE_CAPABILITY_FILL_IMAGE_INTEL | CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_IMAGE_INTEL | CL_QUEUE_CAPABILITY_TRANSFER_IMAGE_BUFFER_INTEL | CL_QUEUE_CAPABILITY_MARKER_INTEL | CL_QUEUE_CAPABILITY_BARRIER_INTEL | CL_QUEUE_CAPABILITY_KERNEL_INTEL; } cl_command_queue_capabilities_intel ClDevice::getQueueFamilyCapabilities(EngineGroupType type) { auto &clHwHelper = NEO::ClHwHelper::get(getHardwareInfo().platform.eRenderCoreFamily); cl_command_queue_capabilities_intel disabledProperties = 0u; if (EngineHelper::isCopyOnlyEngineType(type)) { disabledProperties |= static_cast(CL_QUEUE_CAPABILITY_KERNEL_INTEL); disabledProperties |= static_cast(CL_QUEUE_CAPABILITY_FILL_BUFFER_INTEL); // clEnqueueFillBuffer disabledProperties |= static_cast(CL_QUEUE_CAPABILITY_TRANSFER_IMAGE_INTEL); // clEnqueueCopyImage disabledProperties |= static_cast(CL_QUEUE_CAPABILITY_FILL_IMAGE_INTEL); // clEnqueueFillImage disabledProperties |= static_cast(CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_IMAGE_INTEL); // clEnqueueCopyBufferToImage disabledProperties |= static_cast(CL_QUEUE_CAPABILITY_TRANSFER_IMAGE_BUFFER_INTEL); // clEnqueueCopyImageToBuffer } disabledProperties |= clHwHelper.getAdditionalDisabledQueueFamilyCapabilities(type); if (disabledProperties != 0) { return getQueueFamilyCapabilitiesAll() & ~disabledProperties; } return CL_QUEUE_DEFAULT_CAPABILITIES_INTEL; } void ClDevice::getQueueFamilyName(char *outputName, EngineGroupType type) { std::string name{}; const auto &clHwHelper = ClHwHelper::get(getHardwareInfo().platform.eRenderCoreFamily); const bool hasHwSpecificName = clHwHelper.getQueueFamilyName(name, type); if (!hasHwSpecificName) { switch (type) { case EngineGroupType::RenderCompute: name = "rcs"; break; case EngineGroupType::Compute: name = "ccs"; break; case EngineGroupType::Copy: name = "bcs"; break; default: name = ""; break; } } UNRECOVERABLE_IF(name.size() >= CL_QUEUE_FAMILY_MAX_NAME_SIZE_INTEL); strncpy_s(outputName, CL_QUEUE_FAMILY_MAX_NAME_SIZE_INTEL, name.c_str(), name.size()); } Platform *ClDevice::getPlatform() const { return castToObject(platformId); } bool ClDevice::isPciBusInfoValid() const { return deviceInfo.pciBusInfo.pci_domain != PhysicalDevicePciBusInfo::InvalidValue && deviceInfo.pciBusInfo.pci_bus != PhysicalDevicePciBusInfo::InvalidValue && deviceInfo.pciBusInfo.pci_device != PhysicalDevicePciBusInfo::InvalidValue && deviceInfo.pciBusInfo.pci_function != PhysicalDevicePciBusInfo::InvalidValue; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/cl_device/cl_device.h000066400000000000000000000123161422164147700241400ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/preemption_mode.h" #include "shared/source/helpers/common_types.h" #include "shared/source/helpers/engine_node_helper.h" #include "shared/source/utilities/reference_tracked_object.h" #include "opencl/source/api/cl_types.h" #include "opencl/source/cl_device/cl_device_info.h" #include "opencl/source/helpers/base_object.h" #include "engine_node.h" #include "igfxfmid.h" #include namespace NEO { class Debugger; class Device; class DriverInfo; class ExecutionEnvironment; class GmmHelper; class GmmClientContext; class MemoryManager; class PerformanceCounters; class Platform; class SourceLevelDebugger; struct DeviceInfo; struct EngineControl; struct HardwareInfo; struct RootDeviceEnvironment; struct SelectorCopyEngine; template <> struct OpenCLObjectMapper<_cl_device_id> { typedef class ClDevice DerivedType; }; class ClDevice : public BaseObject<_cl_device_id> { public: static const cl_ulong objectMagic = 0x8055832341AC8D08LL; ClDevice &operator=(const ClDevice &) = delete; ClDevice(const ClDevice &) = delete; explicit ClDevice(Device &device, Platform *platformId); explicit ClDevice(Device &device, ClDevice &rootClDevice, Platform *platformId); ~ClDevice() override; void incRefInternal(); unique_ptr_if_unused decRefInternal(); unsigned int getEnabledClVersion() const { return enabledClVersion; }; bool areOcl21FeaturesEnabled() const { return ocl21FeaturesEnabled; }; void retainApi(); unique_ptr_if_unused releaseApi(); bool getDeviceAndHostTimer(uint64_t *deviceTimestamp, uint64_t *hostTimestamp) const; bool getHostTimer(uint64_t *hostTimestamp) const; const HardwareInfo &getHardwareInfo() const; EngineControl &getEngine(aub_stream::EngineType engineType, EngineUsage engineUsage); EngineControl &getDefaultEngine(); EngineControl &getInternalEngine(); SelectorCopyEngine &getSelectorCopyEngine(); MemoryManager *getMemoryManager() const; GmmHelper *getGmmHelper() const; GmmClientContext *getGmmClientContext() const; double getPlatformHostTimerResolution() const; GFXCORE_FAMILY getRenderCoreFamily() const; PerformanceCounters *getPerformanceCounters(); PreemptionMode getPreemptionMode() const; bool isDebuggerActive() const; Debugger *getDebugger(); SourceLevelDebugger *getSourceLevelDebugger(); ExecutionEnvironment *getExecutionEnvironment() const; const RootDeviceEnvironment &getRootDeviceEnvironment() const; bool isFullRangeSvm() const; bool areSharedSystemAllocationsAllowed() const; uint32_t getRootDeviceIndex() const; uint32_t getNumGenericSubDevices() const; uint32_t getNumSubDevices() const; // API entry points cl_int getDeviceInfo(cl_device_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); bool getDeviceInfoForImage(cl_device_info paramName, const void *&src, size_t &srcSize, size_t &retSize); // This helper template is meant to simplify getDeviceInfo template void getCap(const void *&src, size_t &size, size_t &retSize); template void getStr(const void *&src, size_t &size, size_t &retSize); Device &getDevice() const noexcept { return device; } const ClDeviceInfo &getDeviceInfo() const { return deviceInfo; } const DeviceInfo &getSharedDeviceInfo() const; ClDevice *getSubDevice(uint32_t deviceId) const; ClDevice *getNearestGenericSubDevice(uint32_t deviceId); const std::string &peekCompilerExtensions() const; const std::string &peekCompilerExtensionsWithFeatures() const; DeviceBitfield getDeviceBitfield() const; bool arePipesSupported() const; bool isPciBusInfoValid() const; static cl_command_queue_capabilities_intel getQueueFamilyCapabilitiesAll(); MOCKABLE_VIRTUAL cl_command_queue_capabilities_intel getQueueFamilyCapabilities(EngineGroupType type); void getQueueFamilyName(char *outputName, EngineGroupType type); Platform *getPlatform() const; protected: void initializeCaps(); void initializeExtensionsWithVersion(); void initializeOpenclCAllVersions(); void initializeOsSpecificCaps(); void setupFp64Flags(); const std::string getClDeviceName(const HardwareInfo &hwInfo) const; Device &device; ClDevice &rootClDevice; std::vector> subDevices; cl_platform_id platformId; std::string name; std::unique_ptr driverInfo; unsigned int enabledClVersion = 0u; bool ocl21FeaturesEnabled = false; std::string deviceExtensions; std::string exposedBuiltinKernels = ""; ClDeviceInfo deviceInfo = {}; std::once_flag initializeExtensionsWithVersionOnce; std::vector simultaneousInterops = {0}; std::string compilerExtensions; std::string compilerExtensionsWithFeatures; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/cl_device/cl_device_caps.cpp000066400000000000000000000467551422164147700255170ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/compiler_interface/oclc_extensions.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/device/device.h" #include "shared/source/device/device_info.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/string.h" #include "shared/source/os_interface/driver_info.h" #include "shared/source/os_interface/hw_info_config.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/source/sharings/sharing_factory.h" #include "driver_version.h" #include namespace NEO { extern const char *familyName[]; static std::string vendor = "Intel(R) Corporation"; static std::string profile = "FULL_PROFILE"; static std::string spirVersions = "1.2 "; static std::string spirvName = "SPIR-V"; const char *latestConformanceVersionPassed = "v2021-06-16-00"; #define QTR(a) #a #define TOSTR(b) QTR(b) static std::string driverVersion = TOSTR(NEO_OCL_DRIVER_VERSION); static constexpr cl_device_fp_config defaultFpFlags = static_cast(CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO | CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM | CL_FP_FMA); void ClDevice::setupFp64Flags() { auto &hwInfo = getHardwareInfo(); if (DebugManager.flags.OverrideDefaultFP64Settings.get() == 1) { deviceExtensions += "cl_khr_fp64 "; deviceInfo.singleFpConfig = static_cast(CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT); deviceInfo.doubleFpConfig = defaultFpFlags; } else if (DebugManager.flags.OverrideDefaultFP64Settings.get() == -1) { if (hwInfo.capabilityTable.ftrSupportsFP64) { deviceExtensions += "cl_khr_fp64 "; } deviceInfo.singleFpConfig = static_cast( hwInfo.capabilityTable.ftrSupports64BitMath ? CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT : 0); deviceInfo.doubleFpConfig = hwInfo.capabilityTable.ftrSupportsFP64 ? defaultFpFlags : 0; } } void ClDevice::initializeCaps() { auto &hwInfo = getHardwareInfo(); auto hwInfoConfig = HwInfoConfig::get(hwInfo.platform.eProductFamily); auto &sharedDeviceInfo = getSharedDeviceInfo(); deviceExtensions.clear(); deviceExtensions.append(deviceExtensionsList); driverVersion = TOSTR(NEO_OCL_DRIVER_VERSION); name = getClDeviceName(hwInfo); if (driverInfo) { name.assign(driverInfo.get()->getDeviceName(name).c_str()); driverVersion.assign(driverInfo.get()->getVersion(driverVersion).c_str()); sharingFactory.verifyExtensionSupport(driverInfo.get()); } auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); deviceInfo.name = name.c_str(); deviceInfo.driverVersion = driverVersion.c_str(); setupFp64Flags(); deviceInfo.vendor = vendor.c_str(); deviceInfo.profile = profile.c_str(); enabledClVersion = hwInfo.capabilityTable.clVersionSupport; ocl21FeaturesEnabled = hwInfo.capabilityTable.supportsOcl21Features; if (DebugManager.flags.ForceOCLVersion.get() != 0) { enabledClVersion = DebugManager.flags.ForceOCLVersion.get(); ocl21FeaturesEnabled = (enabledClVersion == 21); } if (DebugManager.flags.ForceOCL21FeaturesSupport.get() != -1) { ocl21FeaturesEnabled = DebugManager.flags.ForceOCL21FeaturesSupport.get(); } switch (enabledClVersion) { case 30: deviceInfo.clVersion = "OpenCL 3.0 NEO "; deviceInfo.clCVersion = "OpenCL C 1.2 "; deviceInfo.numericClVersion = CL_MAKE_VERSION(3, 0, 0); break; case 21: deviceInfo.clVersion = "OpenCL 2.1 NEO "; deviceInfo.clCVersion = "OpenCL C 2.0 "; deviceInfo.numericClVersion = CL_MAKE_VERSION(2, 1, 0); break; case 12: default: deviceInfo.clVersion = "OpenCL 1.2 NEO "; deviceInfo.clCVersion = "OpenCL C 1.2 "; deviceInfo.numericClVersion = CL_MAKE_VERSION(1, 2, 0); break; } deviceInfo.latestConformanceVersionPassed = latestConformanceVersionPassed; initializeOpenclCAllVersions(); deviceInfo.platformLP = (hwInfo.capabilityTable.supportsOcl21Features == false); deviceInfo.spirVersions = spirVersions.c_str(); deviceInfo.ilsWithVersion[0].version = CL_MAKE_VERSION(1, 2, 0); strcpy_s(deviceInfo.ilsWithVersion[0].name, CL_NAME_VERSION_MAX_NAME_SIZE, spirvName.c_str()); auto supportsVme = hwInfo.capabilityTable.supportsVme; auto supportsAdvancedVme = hwInfo.capabilityTable.supportsVme; deviceInfo.independentForwardProgress = hwInfo.capabilityTable.supportsIndependentForwardProgress; deviceInfo.maxNumOfSubGroups = 0; if (ocl21FeaturesEnabled) { auto simdSizeUsed = DebugManager.flags.UseMaxSimdSizeToDeduceMaxWorkgroupSize.get() ? CommonConstants::maximalSimdSize : hwHelper.getMinimalSIMDSize(); // calculate a maximum number of subgroups in a workgroup (for the required SIMD size) deviceInfo.maxNumOfSubGroups = static_cast(sharedDeviceInfo.maxWorkGroupSize / simdSizeUsed); if (deviceInfo.independentForwardProgress) { deviceExtensions += "cl_khr_subgroups "; } if (supportsVme) { deviceExtensions += "cl_intel_spirv_device_side_avc_motion_estimation "; } if (hwInfo.capabilityTable.supportsMediaBlock) { deviceExtensions += "cl_intel_spirv_media_block_io "; } deviceExtensions += "cl_intel_spirv_subgroups "; deviceExtensions += "cl_khr_spirv_no_integer_wrap_decoration "; deviceExtensions += "cl_intel_unified_shared_memory "; if (hwInfo.capabilityTable.supportsImages) { deviceExtensions += "cl_khr_mipmap_image cl_khr_mipmap_image_writes "; } } if (DebugManager.flags.EnableNV12.get() && hwInfo.capabilityTable.supportsImages) { deviceExtensions += "cl_intel_planar_yuv "; deviceInfo.nv12Extension = true; } if (DebugManager.flags.EnablePackedYuv.get() && hwInfo.capabilityTable.supportsImages) { deviceExtensions += "cl_intel_packed_yuv "; deviceInfo.packedYuvExtension = true; } if (DebugManager.flags.EnableIntelVme.get() != -1) { supportsVme = !!DebugManager.flags.EnableIntelVme.get(); } if (supportsVme) { deviceExtensions += "cl_intel_motion_estimation cl_intel_device_side_avc_motion_estimation "; deviceInfo.vmeExtension = true; } if (DebugManager.flags.EnableIntelAdvancedVme.get() != -1) { supportsAdvancedVme = !!DebugManager.flags.EnableIntelAdvancedVme.get(); } if (supportsAdvancedVme) { deviceExtensions += "cl_intel_advanced_motion_estimation "; } if (hwInfo.capabilityTable.ftrSupportsInteger64BitAtomics) { deviceExtensions += "cl_khr_int64_base_atomics "; deviceExtensions += "cl_khr_int64_extended_atomics "; } if (hwInfo.capabilityTable.supportsImages) { deviceExtensions += "cl_khr_image2d_from_buffer "; deviceExtensions += "cl_khr_depth_images "; deviceExtensions += "cl_khr_3d_image_writes "; } if (hwInfo.capabilityTable.supportsMediaBlock) { deviceExtensions += "cl_intel_media_block_io "; } auto sharingAllowed = (getNumGenericSubDevices() <= 1u); if (sharingAllowed) { deviceExtensions += sharingFactory.getExtensions(driverInfo.get()); } PhysicalDevicePciBusInfo pciBusInfo(PhysicalDevicePciBusInfo::InvalidValue, PhysicalDevicePciBusInfo::InvalidValue, PhysicalDevicePciBusInfo::InvalidValue, PhysicalDevicePciBusInfo::InvalidValue); if (driverInfo) { pciBusInfo = driverInfo->getPciBusInfo(); } deviceInfo.pciBusInfo.pci_domain = pciBusInfo.pciDomain; deviceInfo.pciBusInfo.pci_bus = pciBusInfo.pciBus; deviceInfo.pciBusInfo.pci_device = pciBusInfo.pciDevice; deviceInfo.pciBusInfo.pci_function = pciBusInfo.pciFunction; if (isPciBusInfoValid()) { deviceExtensions += "cl_khr_pci_bus_info "; } deviceExtensions += hwHelper.getExtensions(); deviceInfo.deviceExtensions = deviceExtensions.c_str(); std::vector exposedBuiltinKernelsVector; if (supportsVme) { exposedBuiltinKernelsVector.push_back("block_motion_estimate_intel"); } if (supportsAdvancedVme) { exposedBuiltinKernelsVector.push_back("block_advanced_motion_estimate_check_intel"); exposedBuiltinKernelsVector.push_back("block_advanced_motion_estimate_bidirectional_check_intel"); } for (auto builtInKernel : exposedBuiltinKernelsVector) { exposedBuiltinKernels.append(builtInKernel); exposedBuiltinKernels.append(";"); cl_name_version kernelNameVersion; kernelNameVersion.version = CL_MAKE_VERSION(1, 0, 0); strcpy_s(kernelNameVersion.name, CL_NAME_VERSION_MAX_NAME_SIZE, builtInKernel.c_str()); deviceInfo.builtInKernelsWithVersion.push_back(kernelNameVersion); } deviceInfo.builtInKernels = exposedBuiltinKernels.c_str(); deviceInfo.deviceType = CL_DEVICE_TYPE_GPU; deviceInfo.endianLittle = 1; deviceInfo.hostUnifiedMemory = (false == hwHelper.isLocalMemoryEnabled(hwInfo)); deviceInfo.deviceAvailable = CL_TRUE; deviceInfo.compilerAvailable = CL_TRUE; deviceInfo.parentDevice = nullptr; deviceInfo.partitionMaxSubDevices = device.getNumSubDevices(); if (deviceInfo.partitionMaxSubDevices > 0) { deviceInfo.partitionProperties[0] = CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN; deviceInfo.partitionProperties[1] = 0; deviceInfo.partitionAffinityDomain = CL_DEVICE_AFFINITY_DOMAIN_NUMA | CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE; } else { deviceInfo.partitionMaxSubDevices = 0; deviceInfo.partitionProperties[0] = 0; deviceInfo.partitionAffinityDomain = 0; } deviceInfo.partitionType[0] = 0; deviceInfo.preferredVectorWidthChar = 16; deviceInfo.preferredVectorWidthShort = 8; deviceInfo.preferredVectorWidthInt = 4; deviceInfo.preferredVectorWidthLong = 1; deviceInfo.preferredVectorWidthFloat = 1; deviceInfo.preferredVectorWidthDouble = 1; deviceInfo.preferredVectorWidthHalf = 8; deviceInfo.nativeVectorWidthChar = 16; deviceInfo.nativeVectorWidthShort = 8; deviceInfo.nativeVectorWidthInt = 4; deviceInfo.nativeVectorWidthLong = 1; deviceInfo.nativeVectorWidthFloat = 1; deviceInfo.nativeVectorWidthDouble = 1; deviceInfo.nativeVectorWidthHalf = 8; deviceInfo.maxReadWriteImageArgs = hwInfo.capabilityTable.supportsImages ? 128 : 0; deviceInfo.executionCapabilities = CL_EXEC_KERNEL; //copy system info to prevent misaligned reads const auto systemInfo = hwInfo.gtSystemInfo; deviceInfo.globalMemCacheSize = systemInfo.L3BankCount * 128 * KB; deviceInfo.grfSize = hwInfo.capabilityTable.grfSize; deviceInfo.globalMemCacheType = CL_READ_WRITE_CACHE; deviceInfo.memBaseAddressAlign = 1024; deviceInfo.minDataTypeAlignSize = 128; deviceInfo.preferredInteropUserSync = 1u; // OpenCL 1.2 requires 128MB minimum deviceInfo.maxConstantBufferSize = sharedDeviceInfo.maxMemAllocSize; deviceInfo.maxWorkItemDimensions = 3; deviceInfo.maxComputUnits = systemInfo.EUCount * std::max(getNumGenericSubDevices(), 1u); if (device.isEngineInstanced()) { deviceInfo.maxComputUnits /= systemInfo.CCSInfo.NumberOfCCSEnabled; } deviceInfo.maxConstantArgs = 8; deviceInfo.maxSliceCount = systemInfo.SliceCount; deviceInfo.singleFpConfig |= defaultFpFlags; deviceInfo.halfFpConfig = defaultFpFlags; PRINT_DEBUG_STRING(DebugManager.flags.PrintDebugMessages.get(), stderr, "computeUnitsUsedForScratch: %d\n", sharedDeviceInfo.computeUnitsUsedForScratch); PRINT_DEBUG_STRING(DebugManager.flags.PrintDebugMessages.get(), stderr, "hwInfo: {%d, %d}: (%d, %d, %d)\n", systemInfo.EUCount, systemInfo.ThreadCount, systemInfo.MaxEuPerSubSlice, systemInfo.MaxSlicesSupported, systemInfo.MaxSubSlicesSupported); deviceInfo.localMemType = CL_LOCAL; deviceInfo.image3DMaxWidth = hwHelper.getMax3dImageWidthOrHeight(); deviceInfo.image3DMaxHeight = hwHelper.getMax3dImageWidthOrHeight(); // cl_khr_image2d_from_buffer deviceInfo.imagePitchAlignment = hwHelper.getPitchAlignmentForImage(&hwInfo); deviceInfo.imageBaseAddressAlignment = 4; deviceInfo.queueOnHostProperties = CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; deviceInfo.pipeSupport = arePipesSupported(); if (arePipesSupported()) { deviceInfo.maxPipeArgs = 16; deviceInfo.pipeMaxPacketSize = 1024; deviceInfo.pipeMaxActiveReservations = 1; } else { deviceInfo.maxPipeArgs = 0; deviceInfo.pipeMaxPacketSize = 0; deviceInfo.pipeMaxActiveReservations = 0; } deviceInfo.atomicMemoryCapabilities = CL_DEVICE_ATOMIC_ORDER_RELAXED | CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP; if (ocl21FeaturesEnabled) { deviceInfo.atomicMemoryCapabilities |= CL_DEVICE_ATOMIC_ORDER_ACQ_REL | CL_DEVICE_ATOMIC_ORDER_SEQ_CST | CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES | CL_DEVICE_ATOMIC_SCOPE_DEVICE; } deviceInfo.atomicFenceCapabilities = CL_DEVICE_ATOMIC_ORDER_RELAXED | CL_DEVICE_ATOMIC_ORDER_ACQ_REL | CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP; if (ocl21FeaturesEnabled) { deviceInfo.atomicFenceCapabilities |= CL_DEVICE_ATOMIC_ORDER_SEQ_CST | CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES | CL_DEVICE_ATOMIC_SCOPE_DEVICE | CL_DEVICE_ATOMIC_SCOPE_WORK_ITEM; } deviceInfo.nonUniformWorkGroupSupport = true; deviceInfo.workGroupCollectiveFunctionsSupport = ocl21FeaturesEnabled; deviceInfo.genericAddressSpaceSupport = ocl21FeaturesEnabled; deviceInfo.linkerAvailable = true; deviceInfo.svmCapabilities = hwInfo.capabilityTable.ftrSvm * CL_DEVICE_SVM_COARSE_GRAIN_BUFFER; if (hwInfo.capabilityTable.ftrSvm) { auto reportFineGrained = hwInfo.capabilityTable.ftrSvm * hwInfo.capabilityTable.ftrSupportsCoherency; if (DebugManager.flags.ForceFineGrainedSVMSupport.get() != -1) { reportFineGrained = !!DebugManager.flags.ForceFineGrainedSVMSupport.get(); } if (reportFineGrained) { deviceInfo.svmCapabilities |= static_cast(CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_ATOMICS); } } for (auto &engineGroup : this->getDevice().getRegularEngineGroups()) { cl_queue_family_properties_intel properties = {}; properties.capabilities = getQueueFamilyCapabilities(engineGroup.engineGroupType); properties.count = static_cast(engineGroup.engines.size()); properties.properties = deviceInfo.queueOnHostProperties; getQueueFamilyName(properties.name, engineGroup.engineGroupType); deviceInfo.queueFamilyProperties.push_back(properties); } auto &clHwHelper = NEO::ClHwHelper::get(hwInfo.platform.eRenderCoreFamily); const std::vector &supportedThreadArbitrationPolicies = clHwHelper.getSupportedThreadArbitrationPolicies(); deviceInfo.supportedThreadArbitrationPolicies.resize(supportedThreadArbitrationPolicies.size()); for (size_t policy = 0u; policy < supportedThreadArbitrationPolicies.size(); policy++) { deviceInfo.supportedThreadArbitrationPolicies[policy] = supportedThreadArbitrationPolicies[policy]; } deviceInfo.preemptionSupported = false; deviceInfo.maxGlobalVariableSize = ocl21FeaturesEnabled ? 64 * KB : 0; deviceInfo.globalVariablePreferredTotalSize = ocl21FeaturesEnabled ? static_cast(sharedDeviceInfo.maxMemAllocSize) : 0; deviceInfo.planarYuvMaxWidth = 16384; deviceInfo.planarYuvMaxHeight = hwHelper.getPlanarYuvMaxHeight(); deviceInfo.vmeAvcSupportsTextureSampler = hwInfo.capabilityTable.ftrSupportsVmeAvcTextureSampler; if (hwInfo.capabilityTable.supportsVme) { deviceInfo.vmeAvcVersion = CL_AVC_ME_VERSION_1_INTEL; deviceInfo.vmeVersion = CL_ME_VERSION_ADVANCED_VER_2_INTEL; } deviceInfo.platformHostTimerResolution = getPlatformHostTimerResolution(); deviceInfo.internalDriverVersion = CL_DEVICE_DRIVER_VERSION_INTEL_NEO1; deviceInfo.preferredGlobalAtomicAlignment = MemoryConstants::cacheLineSize; deviceInfo.preferredLocalAtomicAlignment = MemoryConstants::cacheLineSize; deviceInfo.preferredPlatformAtomicAlignment = MemoryConstants::cacheLineSize; deviceInfo.preferredWorkGroupSizeMultiple = hwHelper.isFusedEuDispatchEnabled(hwInfo, false) ? CommonConstants::maximalSimdSize * 2 : CommonConstants::maximalSimdSize; deviceInfo.hostMemCapabilities = hwInfoConfig->getHostMemCapabilities(&hwInfo); deviceInfo.deviceMemCapabilities = hwInfoConfig->getDeviceMemCapabilities(); deviceInfo.singleDeviceSharedMemCapabilities = hwInfoConfig->getSingleDeviceSharedMemCapabilities(); deviceInfo.crossDeviceSharedMemCapabilities = hwInfoConfig->getCrossDeviceSharedMemCapabilities(); deviceInfo.sharedSystemMemCapabilities = hwInfoConfig->getSharedSystemMemCapabilities(&hwInfo); initializeOsSpecificCaps(); getOpenclCFeaturesList(hwInfo, deviceInfo.openclCFeatures); } void ClDevice::initializeExtensionsWithVersion() { std::stringstream deviceExtensionsStringStream{deviceExtensions}; std::vector deviceExtensionsVector{ std::istream_iterator{deviceExtensionsStringStream}, std::istream_iterator{}}; deviceInfo.extensionsWithVersion.reserve(deviceExtensionsVector.size()); for (auto deviceExtension : deviceExtensionsVector) { cl_name_version deviceExtensionWithVersion; deviceExtensionWithVersion.version = CL_MAKE_VERSION(1, 0, 0); strcpy_s(deviceExtensionWithVersion.name, CL_NAME_VERSION_MAX_NAME_SIZE, deviceExtension.c_str()); deviceInfo.extensionsWithVersion.push_back(deviceExtensionWithVersion); } } void ClDevice::initializeOpenclCAllVersions() { cl_name_version openClCVersion; strcpy_s(openClCVersion.name, CL_NAME_VERSION_MAX_NAME_SIZE, "OpenCL C"); openClCVersion.version = CL_MAKE_VERSION(1, 0, 0); deviceInfo.openclCAllVersions.push_back(openClCVersion); openClCVersion.version = CL_MAKE_VERSION(1, 1, 0); deviceInfo.openclCAllVersions.push_back(openClCVersion); openClCVersion.version = CL_MAKE_VERSION(1, 2, 0); deviceInfo.openclCAllVersions.push_back(openClCVersion); if (enabledClVersion == 30) { openClCVersion.version = CL_MAKE_VERSION(3, 0, 0); deviceInfo.openclCAllVersions.push_back(openClCVersion); } } const std::string ClDevice::getClDeviceName(const HardwareInfo &hwInfo) const { return this->getDevice().getDeviceInfo().name; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/cl_device/cl_device_get_cap.inl000066400000000000000000000010041422164147700261450ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/cl_device/cl_device_info_map.h" namespace NEO { template inline void ClDevice::getCap(const void *&src, size_t &size, size_t &retSize) { src = &ClDeviceInfoTable::Map::getValue(*this); retSize = size = ClDeviceInfoTable::Map::size; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/cl_device/cl_device_info.cpp000066400000000000000000000642021422164147700255070ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/cl_device/cl_device_info.h" #include "shared/source/device/device.h" #include "shared/source/device/device_info.h" #include "shared/source/helpers/get_info.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/os_interface/os_time.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/cl_device/cl_device_get_cap.inl" #include "opencl/source/cl_device/cl_device_info_map.h" #include "opencl/source/cl_device/cl_device_vector.h" #include "opencl/source/helpers/cl_device_helpers.h" #include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/source/helpers/get_info_status_mapper.h" #include "opencl/source/platform/platform.h" namespace NEO { using ClDeviceInfoTable::Map; template inline void ClDevice::getStr(const void *&src, size_t &size, size_t &retSize) { src = Map::getValue(*this); retSize = size = strlen(Map::getValue(*this)) + 1; } template <> inline void ClDevice::getCap(const void *&src, size_t &size, size_t &retSize) { src = getSharedDeviceInfo().maxWorkItemSizes; retSize = size = sizeof(getSharedDeviceInfo().maxWorkItemSizes); } template <> inline void ClDevice::getCap(const void *&src, size_t &size, size_t &retSize) { src = &platformId; retSize = size = sizeof(cl_platform_id); } template <> inline void ClDevice::getCap(const void *&src, size_t &size, size_t &retSize) { src = getSharedDeviceInfo().maxSubGroups.begin(); retSize = size = (getSharedDeviceInfo().maxSubGroups.size() * sizeof(size_t)); } cl_int ClDevice::getDeviceInfo(cl_device_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { cl_int retVal = CL_INVALID_VALUE; size_t srcSize = GetInfo::invalidSourceSize; size_t retSize = 0; size_t value = 0u; ClDeviceInfoParam param{}; const void *src = nullptr; // clang-format off // please keep alphabetical order switch (paramName) { case CL_DEVICE_ADDRESS_BITS: getCap(src, srcSize, retSize); break; case CL_DEVICE_ATOMIC_FENCE_CAPABILITIES: getCap(src, srcSize, retSize); break; case CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES: getCap(src, srcSize, retSize); break; case CL_DEVICE_AVAILABLE: getCap(src, srcSize, retSize); break; case CL_DEVICE_AVC_ME_SUPPORTS_PREEMPTION_INTEL: getCap(src, srcSize, retSize); break; case CL_DEVICE_AVC_ME_SUPPORTS_TEXTURE_SAMPLER_USE_INTEL: getCap(src, srcSize, retSize); break; case CL_DEVICE_AVC_ME_VERSION_INTEL: getCap(src, srcSize, retSize); break; case CL_DEVICE_BUILT_IN_KERNELS: getStr(src, srcSize, retSize); break; case CL_DEVICE_COMPILER_AVAILABLE: getCap(src, srcSize, retSize); break; case CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL: getCap(src, srcSize, retSize); break; case CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL: getCap(src, srcSize, retSize); break; case CL_DEVICE_DOUBLE_FP_CONFIG: getCap(src, srcSize, retSize); break; case CL_DEVICE_DRIVER_VERSION_INTEL: getCap(src, srcSize, retSize); break; case CL_DEVICE_ENDIAN_LITTLE: getCap(src, srcSize, retSize); break; case CL_DEVICE_ERROR_CORRECTION_SUPPORT: getCap(src, srcSize, retSize); break; case CL_DEVICE_EXECUTION_CAPABILITIES: getCap(src, srcSize, retSize); break; case CL_DEVICE_EXTENSIONS: getStr(src, srcSize, retSize); break; case CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT: getCap(src, srcSize, retSize); break; case CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE: getCap(src, srcSize, retSize); break; case CL_DEVICE_GLOBAL_MEM_CACHE_SIZE: getCap(src, srcSize, retSize); break; case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE: getCap(src, srcSize, retSize); break; case CL_DEVICE_GLOBAL_MEM_SIZE: getCap(src, srcSize, retSize); break; case CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE: getCap(src, srcSize, retSize); break; case CL_DEVICE_HALF_FP_CONFIG: getCap(src, srcSize, retSize); break; case CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL: getCap(src, srcSize, retSize); break; case CL_DEVICE_HOST_UNIFIED_MEMORY: getCap(src, srcSize, retSize); break; case CL_DEVICE_ILS_WITH_VERSION: getCap(src, srcSize, retSize); break; case CL_DEVICE_IL_VERSION: getStr(src, srcSize, retSize); break; case CL_DEVICE_IMAGE_SUPPORT: getCap(src, srcSize, retSize); break; case CL_DEVICE_LATEST_CONFORMANCE_VERSION_PASSED: getStr(src, srcSize, retSize); break; case CL_DEVICE_LINKER_AVAILABLE: getCap(src, srcSize, retSize); break; case CL_DEVICE_LOCAL_MEM_SIZE: getCap(src, srcSize, retSize); break; case CL_DEVICE_LOCAL_MEM_TYPE: getCap(src, srcSize, retSize); break; case CL_DEVICE_MAX_CLOCK_FREQUENCY: getCap(src, srcSize, retSize); break; case CL_DEVICE_MAX_COMPUTE_UNITS: getCap(src, srcSize, retSize); break; case CL_DEVICE_MAX_CONSTANT_ARGS: getCap(src, srcSize, retSize); break; case CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE: getCap(src, srcSize, retSize); break; case CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE: getCap(src, srcSize, retSize); break; case CL_DEVICE_MAX_MEM_ALLOC_SIZE: getCap(src, srcSize, retSize); break; case CL_DEVICE_MAX_NUM_SUB_GROUPS: getCap(src, srcSize, retSize); break; case CL_DEVICE_MAX_ON_DEVICE_EVENTS: getCap(src, srcSize, retSize); break; case CL_DEVICE_MAX_ON_DEVICE_QUEUES: getCap(src, srcSize, retSize); break; case CL_DEVICE_MAX_PARAMETER_SIZE: getCap(src, srcSize, retSize); break; case CL_DEVICE_MAX_PIPE_ARGS: getCap(src, srcSize, retSize); break; case CL_DEVICE_MAX_SAMPLERS: getCap(src, srcSize, retSize); break; case CL_DEVICE_MAX_WORK_GROUP_SIZE: getCap(src, srcSize, retSize); break; case CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS: getCap(src, srcSize, retSize); break; case CL_DEVICE_MAX_WORK_ITEM_SIZES: getCap(src, srcSize, retSize); break; case CL_DEVICE_MEM_BASE_ADDR_ALIGN: getCap(src, srcSize, retSize); break; case CL_DEVICE_ME_VERSION_INTEL: getCap(src, srcSize, retSize); break; case CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE: getCap(src, srcSize, retSize); break; case CL_DEVICE_NAME: getStr(src, srcSize, retSize); break; case CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR: getCap(src, srcSize, retSize); break; case CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE: getCap(src, srcSize, retSize); break; case CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT: getCap(src, srcSize, retSize); break; case CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF: getCap(src, srcSize, retSize); break; case CL_DEVICE_NATIVE_VECTOR_WIDTH_INT: getCap(src, srcSize, retSize); break; case CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG: getCap(src, srcSize, retSize); break; case CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT: getCap(src, srcSize, retSize); break; case CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT: getCap(src, srcSize, retSize); break; case CL_DEVICE_NUMERIC_VERSION: getCap(src, srcSize, retSize); break; case CL_DEVICE_OPENCL_C_VERSION: getStr(src, srcSize, retSize); break; case CL_DEVICE_PARENT_DEVICE: getCap(src, srcSize, retSize); break; case CL_DEVICE_PARTITION_AFFINITY_DOMAIN: getCap(src, srcSize, retSize); break; case CL_DEVICE_PARTITION_MAX_SUB_DEVICES: getCap(src, srcSize, retSize); break; case CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS: getCap(src, srcSize, retSize); break; case CL_DEVICE_PIPE_MAX_PACKET_SIZE: getCap(src, srcSize, retSize); break; case CL_DEVICE_PIPE_SUPPORT: getCap(src, srcSize, retSize); break; case CL_DEVICE_PLATFORM: getCap(src, srcSize, retSize); break; case CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT: getCap(src, srcSize, retSize); break; case CL_DEVICE_PREFERRED_INTEROP_USER_SYNC: getCap(src, srcSize, retSize); break; case CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT: getCap(src, srcSize, retSize); break; case CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT: getCap(src, srcSize, retSize); break; case CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR: getCap(src, srcSize, retSize); break; case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE: getCap(src, srcSize, retSize); break; case CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT: getCap(src, srcSize, retSize); break; case CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF: getCap(src, srcSize, retSize); break; case CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT: getCap(src, srcSize, retSize); break; case CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG: getCap(src, srcSize, retSize); break; case CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT: getCap(src, srcSize, retSize); break; case CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: getCap(src, srcSize, retSize); break; case CL_DEVICE_PRINTF_BUFFER_SIZE: getCap(src, srcSize, retSize); break; case CL_DEVICE_PROFILE: getStr(src, srcSize, retSize); break; case CL_DEVICE_PROFILING_TIMER_RESOLUTION: getCap(src, srcSize, retSize); break; case CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE: getCap(src, srcSize, retSize); break; case CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE: getCap(src, srcSize, retSize); break; case CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES: getCap(src, srcSize, retSize); break; case CL_DEVICE_QUEUE_ON_HOST_PROPERTIES: getCap(src, srcSize, retSize); break; case CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL: getCap(src, srcSize, retSize); break; case CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL: getCap(src, srcSize, retSize); break; case CL_DEVICE_SINGLE_FP_CONFIG: getCap(src, srcSize, retSize); break; case CL_DEVICE_SLICE_COUNT_INTEL: getCap(src, srcSize, retSize); break; case CL_DEVICE_SPIR_VERSIONS: getStr(src, srcSize, retSize); break; case CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS: getCap(src, srcSize, retSize); break; case CL_DEVICE_SUB_GROUP_SIZES_INTEL: getCap(src, srcSize, retSize); break; case CL_DEVICE_SVM_CAPABILITIES: getCap(src, srcSize, retSize); break; case CL_DEVICE_TYPE: getCap(src, srcSize, retSize); break; case CL_DEVICE_VENDOR: getStr(src, srcSize, retSize); break; case CL_DEVICE_VENDOR_ID: getCap(src, srcSize, retSize); break; case CL_DEVICE_VERSION: getStr(src, srcSize, retSize); break; case CL_DEVICE_WORK_GROUP_COLLECTIVE_FUNCTIONS_SUPPORT: getCap(src, srcSize, retSize); break; case CL_DRIVER_VERSION: getStr(src, srcSize, retSize); break; // clang-format on case CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES: if (paramValueSize == sizeof(cl_bool)) { srcSize = retSize = sizeof(cl_bool); param.boolean = CL_FALSE; src = ¶m.boolean; } else { getCap(src, srcSize, retSize); } break; case CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL: if (simultaneousInterops.size() > 1u) { srcSize = retSize = sizeof(cl_uint); param.uint = 1u; src = ¶m.uint; } break; case CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL: if (simultaneousInterops.size() > 1u) { srcSize = retSize = sizeof(cl_uint) * simultaneousInterops.size(); src = &simultaneousInterops[0]; } break; case CL_DEVICE_QUEUE_FAMILY_PROPERTIES_INTEL: src = deviceInfo.queueFamilyProperties.data(); retSize = srcSize = deviceInfo.queueFamilyProperties.size() * sizeof(cl_queue_family_properties_intel); break; case CL_DEVICE_REFERENCE_COUNT: { cl_int ref = this->getReference(); DEBUG_BREAK_IF(ref != 1 && !deviceInfo.parentDevice); param.uint = static_cast(ref); src = ¶m.uint; retSize = srcSize = sizeof(param.uint); break; } case CL_DEVICE_PARTITION_PROPERTIES: getCap(src, srcSize, retSize); if (deviceInfo.partitionProperties[0] == 0) { retSize = srcSize = sizeof(deviceInfo.partitionProperties[0]); } break; case CL_DEVICE_PARTITION_TYPE: getCap(src, srcSize, retSize); if (deviceInfo.partitionType[0] == 0) { retSize = srcSize = sizeof(deviceInfo.partitionType[0]); } break; case CL_DEVICE_OPENCL_C_FEATURES: src = deviceInfo.openclCFeatures.data(); retSize = srcSize = deviceInfo.openclCFeatures.size() * sizeof(cl_name_version); break; case CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION: src = deviceInfo.builtInKernelsWithVersion.data(); retSize = srcSize = deviceInfo.builtInKernelsWithVersion.size() * sizeof(cl_name_version); break; case CL_DEVICE_OPENCL_C_ALL_VERSIONS: src = deviceInfo.openclCAllVersions.data(); retSize = srcSize = deviceInfo.openclCAllVersions.size() * sizeof(cl_name_version); break; case CL_DEVICE_EXTENSIONS_WITH_VERSION: std::call_once(initializeExtensionsWithVersionOnce, [this]() { this->initializeExtensionsWithVersion(); }); src = deviceInfo.extensionsWithVersion.data(); retSize = srcSize = deviceInfo.extensionsWithVersion.size() * sizeof(cl_name_version); break; case CL_DEVICE_SUPPORTED_THREAD_ARBITRATION_POLICY_INTEL: src = deviceInfo.supportedThreadArbitrationPolicies.data(); retSize = srcSize = deviceInfo.supportedThreadArbitrationPolicies.size() * sizeof(cl_uint); break; case CL_DEVICE_IP_VERSION_INTEL: { auto &clHwHelper = ClHwHelper::get(getHardwareInfo().platform.eRenderCoreFamily); param.uint = clHwHelper.getDeviceIpVersion(getHardwareInfo()); src = ¶m.uint; retSize = srcSize = sizeof(cl_version); break; } case CL_DEVICE_ID_INTEL: param.uint = getHardwareInfo().platform.usDeviceID; src = ¶m.uint; retSize = srcSize = sizeof(cl_uint); break; case CL_DEVICE_NUM_SLICES_INTEL: param.uint = static_cast(getHardwareInfo().gtSystemInfo.SliceCount * std::max(device.getNumGenericSubDevices(), 1u)); src = ¶m.uint; retSize = srcSize = sizeof(cl_uint); break; case CL_DEVICE_NUM_SUB_SLICES_PER_SLICE_INTEL: { const auto >SysInfo = getHardwareInfo().gtSystemInfo; param.uint = gtSysInfo.SubSliceCount / gtSysInfo.SliceCount; src = ¶m.uint; retSize = srcSize = sizeof(cl_uint); break; } case CL_DEVICE_NUM_EUS_PER_SUB_SLICE_INTEL: param.uint = getHardwareInfo().gtSystemInfo.MaxEuPerSubSlice; src = ¶m.uint; retSize = srcSize = sizeof(cl_uint); break; case CL_DEVICE_NUM_THREADS_PER_EU_INTEL: { const auto >SysInfo = getHardwareInfo().gtSystemInfo; param.uint = gtSysInfo.ThreadCount / gtSysInfo.EUCount; src = ¶m.uint; retSize = srcSize = sizeof(cl_uint); break; } case CL_DEVICE_FEATURE_CAPABILITIES_INTEL: { auto &clHwHelper = ClHwHelper::get(getHardwareInfo().platform.eRenderCoreFamily); param.bitfield = clHwHelper.getSupportedDeviceFeatureCapabilities(); src = ¶m.bitfield; retSize = srcSize = sizeof(cl_device_feature_capabilities_intel); break; } case CL_DEVICE_PCI_BUS_INFO_KHR: if (isPciBusInfoValid()) { src = &deviceInfo.pciBusInfo; retSize = srcSize = sizeof(deviceInfo.pciBusInfo); } break; default: if (getDeviceInfoForImage(paramName, src, srcSize, retSize) && !getSharedDeviceInfo().imageSupport) { src = &value; break; } ClDeviceHelper::getExtraDeviceInfo(*this, paramName, param, src, srcSize, retSize); } auto getInfoStatus = GetInfo::getInfo(paramValue, paramValueSize, src, srcSize); retVal = changeGetInfoStatusToCLResultType(getInfoStatus); GetInfo::setParamValueReturnSize(paramValueSizeRet, retSize, getInfoStatus); return retVal; } bool ClDevice::getDeviceInfoForImage(cl_device_info paramName, const void *&src, size_t &srcSize, size_t &retSize) { bool retVal = true; switch (paramName) { case CL_DEVICE_MAX_READ_IMAGE_ARGS: getCap(src, srcSize, retSize); break; case CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS: getCap(src, srcSize, retSize); break; case CL_DEVICE_MAX_WRITE_IMAGE_ARGS: getCap(src, srcSize, retSize); break; case CL_DEVICE_IMAGE2D_MAX_HEIGHT: getCap(src, srcSize, retSize); break; case CL_DEVICE_IMAGE2D_MAX_WIDTH: getCap(src, srcSize, retSize); break; case CL_DEVICE_IMAGE3D_MAX_DEPTH: getCap(src, srcSize, retSize); break; case CL_DEVICE_IMAGE3D_MAX_HEIGHT: getCap(src, srcSize, retSize); break; case CL_DEVICE_IMAGE3D_MAX_WIDTH: getCap(src, srcSize, retSize); break; case CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT: getCap(src, srcSize, retSize); break; case CL_DEVICE_IMAGE_MAX_ARRAY_SIZE: getCap(src, srcSize, retSize); break; case CL_DEVICE_IMAGE_MAX_BUFFER_SIZE: getCap(src, srcSize, retSize); break; case CL_DEVICE_IMAGE_PITCH_ALIGNMENT: getCap(src, srcSize, retSize); break; case CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL: if (deviceInfo.nv12Extension) { getCap(src, srcSize, retSize); break; } retVal = false; break; case CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL: if (deviceInfo.nv12Extension) { getCap(src, srcSize, retSize); break; } retVal = false; break; default: retVal = false; } return retVal; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/cl_device/cl_device_info.h000066400000000000000000000274071422164147700251620ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/compiler_interface/oclc_extensions.h" #include "shared/source/helpers/constants.h" #include "opencl/extensions/public/cl_ext_private.h" #include namespace NEO { struct ClDeviceInfoParam { union { cl_bool boolean; cl_uint uint; cl_bitfield bitfield; }; }; // clang-format off struct ClDeviceInfo { cl_name_version ilsWithVersion[1]; StackVec builtInKernelsWithVersion; StackVec openclCAllVersions; OpenClCFeaturesContainer openclCFeatures; std::vector extensionsWithVersion; cl_device_type deviceType; size_t maxSliceCount; size_t image3DMaxWidth; size_t image3DMaxHeight; size_t maxBufferSize; size_t maxArraySize; cl_device_fp_config singleFpConfig; cl_device_fp_config halfFpConfig; cl_device_fp_config doubleFpConfig; cl_ulong globalMemCacheSize; cl_ulong maxConstantBufferSize; size_t maxGlobalVariableSize; size_t globalVariablePreferredTotalSize; size_t preferredWorkGroupSizeMultiple; cl_device_exec_capabilities executionCapabilities; cl_command_queue_properties queueOnHostProperties; cl_command_queue_properties queueOnDeviceProperties; const char *builtInKernels; cl_platform_id platform; const char *name; const char *vendor; const char *driverVersion; const char *profile; const char *clVersion; const char *clCVersion; const char *spirVersions; const char *deviceExtensions; const char *latestConformanceVersionPassed; cl_device_id parentDevice; cl_device_affinity_domain partitionAffinityDomain; cl_uint partitionMaxSubDevices; cl_device_partition_property partitionProperties[2]; cl_device_partition_property partitionType[3]; cl_device_svm_capabilities svmCapabilities; StackVec queueFamilyProperties; double platformHostTimerResolution; size_t planarYuvMaxWidth; size_t planarYuvMaxHeight; cl_version numericClVersion; cl_uint maxComputUnits; cl_uint maxWorkItemDimensions; cl_uint maxNumOfSubGroups; cl_bool independentForwardProgress; cl_device_atomic_capabilities atomicMemoryCapabilities; cl_device_atomic_capabilities atomicFenceCapabilities; cl_bool nonUniformWorkGroupSupport; cl_bool workGroupCollectiveFunctionsSupport; cl_bool genericAddressSpaceSupport; cl_device_device_enqueue_capabilities deviceEnqueueSupport; cl_bool pipeSupport; cl_uint preferredVectorWidthChar; cl_uint preferredVectorWidthShort; cl_uint preferredVectorWidthInt; cl_uint preferredVectorWidthLong; cl_uint preferredVectorWidthFloat; cl_uint preferredVectorWidthDouble; cl_uint preferredVectorWidthHalf; cl_uint nativeVectorWidthChar; cl_uint nativeVectorWidthShort; cl_uint nativeVectorWidthInt; cl_uint nativeVectorWidthLong; cl_uint nativeVectorWidthFloat; cl_uint nativeVectorWidthDouble; cl_uint nativeVectorWidthHalf; cl_uint maxReadWriteImageArgs; cl_uint imagePitchAlignment; cl_uint imageBaseAddressAlignment; cl_uint maxPipeArgs; cl_uint pipeMaxActiveReservations; cl_uint pipeMaxPacketSize; cl_uint memBaseAddressAlign; cl_uint minDataTypeAlignSize; cl_device_mem_cache_type globalMemCacheType; cl_uint maxConstantArgs; cl_device_local_mem_type localMemType; cl_bool endianLittle; cl_bool deviceAvailable; cl_bool compilerAvailable; cl_bool linkerAvailable; cl_uint queueOnDevicePreferredSize; cl_uint queueOnDeviceMaxSize; cl_uint maxOnDeviceQueues; cl_uint maxOnDeviceEvents; cl_bool preferredInteropUserSync; cl_uint referenceCount; cl_uint preferredPlatformAtomicAlignment; cl_uint preferredGlobalAtomicAlignment; cl_uint preferredLocalAtomicAlignment; cl_bool hostUnifiedMemory; cl_bool vmeAvcSupportsTextureSampler; cl_uint vmeAvcVersion; cl_uint vmeVersion; cl_uint internalDriverVersion; cl_uint grfSize; bool preemptionSupported; cl_device_pci_bus_info_khr pciBusInfo; /* Extensions supported */ bool nv12Extension; bool vmeExtension; bool platformLP; bool packedYuvExtension; /*Unified Shared Memory Capabilites*/ cl_unified_shared_memory_capabilities_intel hostMemCapabilities; cl_unified_shared_memory_capabilities_intel deviceMemCapabilities; cl_unified_shared_memory_capabilities_intel singleDeviceSharedMemCapabilities; cl_unified_shared_memory_capabilities_intel crossDeviceSharedMemCapabilities; cl_unified_shared_memory_capabilities_intel sharedSystemMemCapabilities; StackVec supportedThreadArbitrationPolicies; }; // clang-format on } // namespace NEO compute-runtime-22.14.22890/opencl/source/cl_device/cl_device_info_map.h000066400000000000000000000636771422164147700260300ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/device/device_info.h" #include "opencl/extensions/public/cl_ext_private.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/cl_device/cl_device_info.h" #include "CL/cl_ext_intel.h" #include #include namespace NEO { namespace ClDeviceInfoTable { template struct ClMapBase { enum { param = Param }; typedef _Type Type; enum { size = sizeof(Type) }; static const Type &getValue(const NEO::ClDevice &clDevice) { return clDevice.getDeviceInfo().*val; } }; template struct MapBase { enum { param = Param }; typedef _Type Type; enum { size = sizeof(Type) }; static const Type &getValue(const NEO::ClDevice &clDevice) { return clDevice.getSharedDeviceInfo().*val; } }; template struct Map {}; ////////////////////////////////////////////////////// // DeviceInfo mapping table // Map::param - i.e. CL_DEVICE_ADDRESS_BITS // Map::Type - i.e. cl_uint // Map::size - ie. sizeof( cl_uint ) // Map::getValue - ie. return deviceInfo.AddressBits ////////////////////////////////////////////////////// // clang-format off // please keep alphabetical order template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; // clang-format on } // namespace ClDeviceInfoTable } // namespace NEO compute-runtime-22.14.22890/opencl/source/cl_device/cl_device_vector.h000066400000000000000000000011501422164147700255140ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/utilities/stackvec.h" #include "opencl/source/api/cl_types.h" namespace NEO { class ClDevice; class ClDeviceVector : public StackVec { public: ClDeviceVector() = default; ClDeviceVector(const ClDeviceVector &) = default; ClDeviceVector &operator=(const ClDeviceVector &) = default; ClDeviceVector(const cl_device_id *devices, cl_uint numDevices); void toDeviceIDs(std::vector &devIDs) const; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/command_queue/000077500000000000000000000000001422164147700227545ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/command_queue/CMakeLists.txt000066400000000000000000000054021422164147700255150ustar00rootroot00000000000000# # Copyright (C) 2018-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_COMMAND_QUEUE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cl_local_work_size.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_local_work_size.h ${CMAKE_CURRENT_SOURCE_DIR}/command_queue.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_queue.h ${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw.h ${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_base.inl ${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_bdw_and_later.inl ${CMAKE_CURRENT_SOURCE_DIR}/copy_engine_state.h ${CMAKE_CURRENT_SOURCE_DIR}/cpu_data_transfer_handler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/csr_selection_args.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_barrier.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_common.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_copy_buffer.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_copy_buffer_rect.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_copy_buffer_to_image.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_copy_image.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_copy_image_to_buffer.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_fill_buffer.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_fill_image.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_kernel.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_marker.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_migrate_mem_objects.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_buffer.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_buffer_rect.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_image.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_resource_barrier.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_svm.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_write_buffer.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_write_buffer_rect.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_write_image.h ${CMAKE_CURRENT_SOURCE_DIR}/finish.h ${CMAKE_CURRENT_SOURCE_DIR}/flush.h ${CMAKE_CURRENT_SOURCE_DIR}/gpgpu_walker.h ${CMAKE_CURRENT_SOURCE_DIR}/gpgpu_walker_base.inl ${CMAKE_CURRENT_SOURCE_DIR}/gpgpu_walker_bdw_and_later.inl ${CMAKE_CURRENT_SOURCE_DIR}/hardware_interface.h ${CMAKE_CURRENT_SOURCE_DIR}/hardware_interface_base.inl ${CMAKE_CURRENT_SOURCE_DIR}/hardware_interface_bdw_and_later.inl ${CMAKE_CURRENT_SOURCE_DIR}/resource_barrier.cpp ${CMAKE_CURRENT_SOURCE_DIR}/resource_barrier.h ) if(SUPPORT_XEHP_AND_LATER) list(APPEND RUNTIME_SRCS_COMMAND_QUEUE ${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_xehp_and_later.inl ${CMAKE_CURRENT_SOURCE_DIR}/gpgpu_walker_xehp_and_later.inl ${CMAKE_CURRENT_SOURCE_DIR}/hardware_interface_xehp_and_later.inl ) endif() target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_COMMAND_QUEUE}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_COMMAND_QUEUE ${RUNTIME_SRCS_COMMAND_QUEUE}) add_subdirectories() compute-runtime-22.14.22890/opencl/source/command_queue/cl_local_work_size.cpp000066400000000000000000000136221422164147700273300ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/cl_local_work_size.h" #include "shared/source/device/device.h" #include "shared/source/helpers/array_count.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/local_work_size.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/dispatch_info.h" #include #include #include namespace NEO { Vec3 computeWorkgroupSize(const DispatchInfo &dispatchInfo) { size_t workGroupSize[3] = {}; auto kernel = dispatchInfo.getKernel(); if (kernel != nullptr) { auto &device = dispatchInfo.getClDevice(); const auto &hwInfo = device.getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); if (DebugManager.flags.EnableComputeWorkSizeND.get()) { WorkSizeInfo wsInfo = createWorkSizeInfoFromDispatchInfo(dispatchInfo); if (wsInfo.slmTotalSize == 0 && !wsInfo.hasBarriers && !wsInfo.imgUsed && hwHelper.preferSmallWorkgroupSizeForKernel(kernel->getKernelInfo().heapInfo.KernelUnpaddedSize, hwInfo) && ((dispatchInfo.getDim() == 1) && (dispatchInfo.getGWS().x % wsInfo.simdSize * 2 == 0))) { wsInfo.maxWorkGroupSize = wsInfo.simdSize * 2; } size_t workItems[3] = {dispatchInfo.getGWS().x, dispatchInfo.getGWS().y, dispatchInfo.getGWS().z}; computeWorkgroupSizeND(wsInfo, workGroupSize, workItems, dispatchInfo.getDim()); } else { auto maxWorkGroupSize = kernel->getMaxKernelWorkGroupSize(); auto simd = kernel->getKernelInfo().getMaxSimdSize(); size_t workItems[3] = {dispatchInfo.getGWS().x, dispatchInfo.getGWS().y, dispatchInfo.getGWS().z}; if (dispatchInfo.getDim() == 1) { computeWorkgroupSize1D(maxWorkGroupSize, workGroupSize, workItems, simd); } else if (DebugManager.flags.EnableComputeWorkSizeSquared.get() && dispatchInfo.getDim() == 2) { computeWorkgroupSizeSquared(maxWorkGroupSize, workGroupSize, workItems, simd, dispatchInfo.getDim()); } else { computeWorkgroupSize2D(maxWorkGroupSize, workGroupSize, workItems, simd); } } } DBG_LOG(PrintLWSSizes, "Input GWS enqueueBlocked", dispatchInfo.getGWS().x, dispatchInfo.getGWS().y, dispatchInfo.getGWS().z, " Driver deduced LWS", workGroupSize[0], workGroupSize[1], workGroupSize[2]); return {workGroupSize[0], workGroupSize[1], workGroupSize[2]}; } Vec3 generateWorkgroupSize(const DispatchInfo &dispatchInfo) { return (dispatchInfo.getEnqueuedWorkgroupSize().x == 0) ? computeWorkgroupSize(dispatchInfo) : dispatchInfo.getEnqueuedWorkgroupSize(); } Vec3 generateWorkgroupsNumber(const DispatchInfo &dispatchInfo) { return generateWorkgroupsNumber(dispatchInfo.getGWS(), dispatchInfo.getLocalWorkgroupSize()); } void provideLocalWorkGroupSizeHints(Context *context, const DispatchInfo &dispatchInfo) { if (context != nullptr && context->isProvidingPerformanceHints() && dispatchInfo.getDim() <= 3) { size_t preferredWorkGroupSize[3]; auto lws = computeWorkgroupSize(dispatchInfo); preferredWorkGroupSize[0] = lws.x; preferredWorkGroupSize[1] = lws.y; preferredWorkGroupSize[2] = lws.z; const auto &kernelInfo = dispatchInfo.getKernel()->getKernelInfo(); if (dispatchInfo.getEnqueuedWorkgroupSize().x == 0) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, NULL_LOCAL_WORKGROUP_SIZE, kernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str(), preferredWorkGroupSize[0], preferredWorkGroupSize[1], preferredWorkGroupSize[2]); } else { size_t localWorkSizesIn[3] = {dispatchInfo.getEnqueuedWorkgroupSize().x, dispatchInfo.getEnqueuedWorkgroupSize().y, dispatchInfo.getEnqueuedWorkgroupSize().z}; for (auto i = 0u; i < dispatchInfo.getDim(); i++) { if (localWorkSizesIn[i] != preferredWorkGroupSize[i]) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, BAD_LOCAL_WORKGROUP_SIZE, localWorkSizesIn[0], localWorkSizesIn[1], localWorkSizesIn[2], kernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str(), preferredWorkGroupSize[0], preferredWorkGroupSize[1], preferredWorkGroupSize[2]); break; } } } } } WorkSizeInfo createWorkSizeInfoFromDispatchInfo(const DispatchInfo &dispatchInfo) { auto &device = dispatchInfo.getClDevice(); const auto &kernelInfo = dispatchInfo.getKernel()->getKernelInfo(); auto numThreadsPerSubSlice = static_cast(device.getSharedDeviceInfo().maxNumEUsPerSubSlice) * device.getSharedDeviceInfo().numThreadsPerEU; WorkSizeInfo wsInfo(dispatchInfo.getKernel()->getMaxKernelWorkGroupSize(), kernelInfo.kernelDescriptor.kernelAttributes.usesBarriers(), static_cast(kernelInfo.getMaxSimdSize()), static_cast(dispatchInfo.getKernel()->getSlmTotalSize()), &device.getHardwareInfo(), numThreadsPerSubSlice, static_cast(device.getSharedDeviceInfo().localMemSize), false, false, kernelInfo.kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion); wsInfo.setIfUseImg(kernelInfo); return wsInfo; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/command_queue/cl_local_work_size.h000066400000000000000000000012251422164147700267710ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/vec.h" #include "shared/source/program/kernel_info.h" namespace NEO { class Context; class DispatchInfo; Vec3 computeWorkgroupSize( const DispatchInfo &dispatchInfo); Vec3 generateWorkgroupSize( const DispatchInfo &dispatchInfo); Vec3 generateWorkgroupsNumber( const DispatchInfo &dispatchInfo); void provideLocalWorkGroupSizeHints(Context *context, const DispatchInfo &dispatchInfo); WorkSizeInfo createWorkSizeInfoFromDispatchInfo(const DispatchInfo &dispatchInfo); } // namespace NEO compute-runtime-22.14.22890/opencl/source/command_queue/command_queue.cpp000066400000000000000000001407661422164147700263200ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/array_count.h" #include "shared/source/helpers/engine_node_helper.h" #include "shared/source/helpers/get_info.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/helpers/string.h" #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/os_interface/os_context.h" #include "shared/source/utilities/api_intercept.h" #include "shared/source/utilities/tag_allocator.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/context/context.h" #include "opencl/source/event/event_builder.h" #include "opencl/source/event/user_event.h" #include "opencl/source/gtpin/gtpin_notify.h" #include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/source/helpers/convert_color.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/helpers/mipmap.h" #include "opencl/source/helpers/queue_helpers.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/program/printf_handler.h" #include "CL/cl_ext.h" #include #include namespace NEO { // Global table of create functions CommandQueueCreateFunc commandQueueFactory[IGFX_MAX_CORE] = {}; CommandQueue *CommandQueue::create(Context *context, ClDevice *device, const cl_queue_properties *properties, bool internalUsage, cl_int &retVal) { retVal = CL_SUCCESS; auto funcCreate = commandQueueFactory[device->getRenderCoreFamily()]; DEBUG_BREAK_IF(nullptr == funcCreate); return funcCreate(context, device, properties, internalUsage); } CommandQueue::CommandQueue(Context *context, ClDevice *device, const cl_queue_properties *properties, bool internalUsage) : context(context), device(device) { if (context) { context->incRefInternal(); } commandQueueProperties = getCmdQueueProperties(properties); flushStamp.reset(new FlushStampTracker(true)); if (device) { auto &hwInfo = device->getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); auto hwInfoConfig = HwInfoConfig::get(hwInfo.platform.eProductFamily); gpgpuEngine = &device->getDefaultEngine(); UNRECOVERABLE_IF(gpgpuEngine->getEngineType() >= aub_stream::EngineType::NUM_ENGINES); bool bcsAllowed = hwInfoConfig->isBlitterFullySupported(hwInfo) && hwHelper.isSubDeviceEngineSupported(hwInfo, device->getDeviceBitfield(), aub_stream::EngineType::ENGINE_BCS); if (bcsAllowed || gpgpuEngine->commandStreamReceiver->peekTimestampPacketWriteEnabled()) { timestampPacketContainer = std::make_unique(); deferredTimestampPackets = std::make_unique(); } if (bcsAllowed) { auto &neoDevice = device->getNearestGenericSubDevice(0)->getDevice(); auto &selectorCopyEngine = neoDevice.getSelectorCopyEngine(); auto bcsEngineType = EngineHelpers::getBcsEngineType(hwInfo, device->getDeviceBitfield(), selectorCopyEngine, internalUsage); bcsEngines[EngineHelpers::getBcsIndex(bcsEngineType)] = neoDevice.tryGetEngine(bcsEngineType, EngineUsage::Regular); bcsEngineTypes.push_back(bcsEngineType); } } storeProperties(properties); processProperties(properties); } CommandQueue::~CommandQueue() { if (virtualEvent) { UNRECOVERABLE_IF(this->virtualEvent->getCommandQueue() != this && this->virtualEvent->getCommandQueue() != nullptr); virtualEvent->decRefInternal(); } if (device) { auto storageForAllocation = gpgpuEngine->commandStreamReceiver->getInternalAllocationStorage(); if (commandStream) { storageForAllocation->storeAllocation(std::unique_ptr(commandStream->getGraphicsAllocation()), REUSABLE_ALLOCATION); } delete commandStream; if (this->perfCountersEnabled) { device->getPerformanceCounters()->shutdown(); } if (auto mainBcs = bcsEngines[0]; mainBcs != nullptr) { auto &selectorCopyEngine = device->getNearestGenericSubDevice(0)->getSelectorCopyEngine(); EngineHelpers::releaseBcsEngineType(mainBcs->getEngineType(), selectorCopyEngine); } } timestampPacketContainer.reset(); //for normal queue, decrement ref count on context //special queue is owned by context so ref count doesn't have to be decremented if (context && !isSpecialCommandQueue) { context->decRefInternal(); } gtpinRemoveCommandQueue(this); } CommandStreamReceiver &CommandQueue::getGpgpuCommandStreamReceiver() const { return *gpgpuEngine->commandStreamReceiver; } CommandStreamReceiver *CommandQueue::getBcsCommandStreamReceiver(aub_stream::EngineType bcsEngineType) const { const EngineControl *engine = this->bcsEngines[EngineHelpers::getBcsIndex(bcsEngineType)]; if (engine == nullptr) { return nullptr; } else { return engine->commandStreamReceiver; } } CommandStreamReceiver *CommandQueue::getBcsForAuxTranslation() const { for (const EngineControl *engine : this->bcsEngines) { if (engine != nullptr) { return engine->commandStreamReceiver; } } return nullptr; } CommandStreamReceiver &CommandQueue::selectCsrForBuiltinOperation(const CsrSelectionArgs &args) const { if (isCopyOnly) { return *getBcsCommandStreamReceiver(bcsEngineTypes[0]); } if (!blitEnqueueAllowed(args)) { return getGpgpuCommandStreamReceiver(); } bool preferBcs = true; aub_stream::EngineType preferredBcsEngineType = aub_stream::EngineType::NUM_ENGINES; switch (args.direction) { case TransferDirection::LocalToLocal: { const auto &clHwHelper = ClHwHelper::get(device->getHardwareInfo().platform.eRenderCoreFamily); preferBcs = clHwHelper.preferBlitterForLocalToLocalTransfers(); if (auto flag = DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.get(); flag != -1) { preferBcs = static_cast(flag); } if (preferBcs) { preferredBcsEngineType = aub_stream::EngineType::ENGINE_BCS; } break; } case TransferDirection::HostToHost: case TransferDirection::HostToLocal: case TransferDirection::LocalToHost: { preferBcs = true; auto preferredBCSType = true; if (DebugManager.flags.AssignBCSAtEnqueue.get() != -1) { preferredBCSType = DebugManager.flags.AssignBCSAtEnqueue.get(); } if (preferredBCSType) { preferredBcsEngineType = EngineHelpers::getBcsEngineType(device->getHardwareInfo(), device->getDeviceBitfield(), device->getSelectorCopyEngine(), false); } break; } default: UNRECOVERABLE_IF(true); } CommandStreamReceiver *selectedCsr = nullptr; if (preferBcs) { auto assignBCS = true; if (DebugManager.flags.AssignBCSAtEnqueue.get() != -1) { assignBCS = DebugManager.flags.AssignBCSAtEnqueue.get(); } if (assignBCS) { selectedCsr = getBcsCommandStreamReceiver(preferredBcsEngineType); } if (selectedCsr == nullptr && !bcsEngineTypes.empty()) { selectedCsr = getBcsCommandStreamReceiver(bcsEngineTypes[0]); } } if (selectedCsr == nullptr) { selectedCsr = &getGpgpuCommandStreamReceiver(); } UNRECOVERABLE_IF(selectedCsr == nullptr); return *selectedCsr; } Device &CommandQueue::getDevice() const noexcept { return device->getDevice(); } uint32_t CommandQueue::getHwTag() const { uint32_t tag = *getHwTagAddress(); return tag; } volatile uint32_t *CommandQueue::getHwTagAddress() const { return getGpgpuCommandStreamReceiver().getTagAddress(); } bool CommandQueue::isCompleted(uint32_t gpgpuTaskCount, CopyEngineState bcsState) const { DEBUG_BREAK_IF(getHwTag() == CompletionStamp::notReady); if (getGpgpuCommandStreamReceiver().testTaskCountReady(getHwTagAddress(), gpgpuTaskCount)) { if (bcsState.isValid()) { return *getBcsCommandStreamReceiver(bcsState.engineType)->getTagAddress() >= peekBcsTaskCount(bcsState.engineType); } return true; } return false; } WaitStatus CommandQueue::waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) { WAIT_ENTER() WaitStatus waitStatus{WaitStatus::Ready}; DBG_LOG(LogTaskCounts, __FUNCTION__, "Waiting for taskCount:", gpgpuTaskCountToWait); DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "Current taskCount:", getHwTag()); if (!skipWait) { waitStatus = getGpgpuCommandStreamReceiver().waitForTaskCountWithKmdNotifyFallback(gpgpuTaskCountToWait, flushStampToWait, useQuickKmdSleep, this->getThrottle()); if (waitStatus == WaitStatus::GpuHang) { return WaitStatus::GpuHang; } DEBUG_BREAK_IF(getHwTag() < gpgpuTaskCountToWait); if (gtpinIsGTPinInitialized()) { gtpinNotifyTaskCompletion(gpgpuTaskCountToWait); } } for (const CopyEngineState ©Engine : copyEnginesToWait) { auto bcsCsr = getBcsCommandStreamReceiver(copyEngine.engineType); waitStatus = bcsCsr->waitForTaskCountWithKmdNotifyFallback(copyEngine.taskCount, 0, false, this->getThrottle()); if (waitStatus == WaitStatus::GpuHang) { return WaitStatus::GpuHang; } waitStatus = bcsCsr->waitForTaskCountAndCleanTemporaryAllocationList(copyEngine.taskCount); if (waitStatus == WaitStatus::GpuHang) { return WaitStatus::GpuHang; } } waitStatus = cleanTemporaryAllocationList ? getGpgpuCommandStreamReceiver().waitForTaskCountAndCleanTemporaryAllocationList(gpgpuTaskCountToWait) : getGpgpuCommandStreamReceiver().waitForTaskCount(gpgpuTaskCountToWait); WAIT_LEAVE() return waitStatus; } bool CommandQueue::isQueueBlocked() { TakeOwnershipWrapper takeOwnershipWrapper(*this); //check if we have user event and if so, if it is in blocked state. if (this->virtualEvent) { auto executionStatus = this->virtualEvent->peekExecutionStatus(); if (executionStatus <= CL_SUBMITTED) { UNRECOVERABLE_IF(this->virtualEvent == nullptr); if (this->virtualEvent->isStatusCompletedByTermination(executionStatus) == false) { taskCount = this->virtualEvent->peekTaskCount(); flushStamp->setStamp(this->virtualEvent->flushStamp->peekStamp()); taskLevel = this->virtualEvent->taskLevel; // If this isn't an OOQ, update the taskLevel for the queue if (!isOOQEnabled()) { taskLevel++; } } else { //at this point we may reset queue TaskCount, since all command previous to this were aborted taskCount = 0; flushStamp->setStamp(0); taskLevel = getGpgpuCommandStreamReceiver().peekTaskLevel(); } FileLoggerInstance().log(DebugManager.flags.EventsDebugEnable.get(), "isQueueBlocked taskLevel change from", taskLevel, "to new from virtualEvent", this->virtualEvent, "new tasklevel", this->virtualEvent->taskLevel.load()); //close the access to virtual event, driver added only 1 ref count. this->virtualEvent->decRefInternal(); this->virtualEvent = nullptr; return false; } return true; } return false; } cl_int CommandQueue::getCommandQueueInfo(cl_command_queue_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { return getQueueInfo(this, paramName, paramValueSize, paramValue, paramValueSizeRet); } uint32_t CommandQueue::getTaskLevelFromWaitList(uint32_t taskLevel, cl_uint numEventsInWaitList, const cl_event *eventWaitList) { for (auto iEvent = 0u; iEvent < numEventsInWaitList; ++iEvent) { auto pEvent = (Event *)(eventWaitList[iEvent]); uint32_t eventTaskLevel = pEvent->taskLevel; taskLevel = std::max(taskLevel, eventTaskLevel); } return taskLevel; } LinearStream &CommandQueue::getCS(size_t minRequiredSize) { DEBUG_BREAK_IF(nullptr == device); if (!commandStream) { commandStream = new LinearStream(nullptr); } minRequiredSize += CSRequirements::minCommandQueueCommandStreamSize; constexpr static auto additionalAllocationSize = CSRequirements::minCommandQueueCommandStreamSize + CSRequirements::csOverfetchSize; getGpgpuCommandStreamReceiver().ensureCommandBufferAllocation(*commandStream, minRequiredSize, additionalAllocationSize); return *commandStream; } cl_int CommandQueue::enqueueAcquireSharedObjects(cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *oclEvent, cl_uint cmdType) { if ((memObjects == nullptr && numObjects != 0) || (memObjects != nullptr && numObjects == 0)) { return CL_INVALID_VALUE; } for (unsigned int object = 0; object < numObjects; object++) { auto memObject = castToObject(memObjects[object]); if (memObject == nullptr || memObject->peekSharingHandler() == nullptr) { return CL_INVALID_MEM_OBJECT; } int result = memObject->peekSharingHandler()->acquire(memObject, getDevice().getRootDeviceIndex()); if (result != CL_SUCCESS) { return result; } memObject->acquireCount++; } auto status = enqueueMarkerWithWaitList( numEventsInWaitList, eventWaitList, oclEvent); if (oclEvent) { castToObjectOrAbort(*oclEvent)->setCmdType(cmdType); } return status; } cl_int CommandQueue::enqueueReleaseSharedObjects(cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *oclEvent, cl_uint cmdType) { if ((memObjects == nullptr && numObjects != 0) || (memObjects != nullptr && numObjects == 0)) { return CL_INVALID_VALUE; } for (unsigned int object = 0; object < numObjects; object++) { auto memObject = castToObject(memObjects[object]); if (memObject == nullptr || memObject->peekSharingHandler() == nullptr) { return CL_INVALID_MEM_OBJECT; } memObject->peekSharingHandler()->release(memObject, getDevice().getRootDeviceIndex()); DEBUG_BREAK_IF(memObject->acquireCount <= 0); memObject->acquireCount--; } auto status = enqueueMarkerWithWaitList( numEventsInWaitList, eventWaitList, oclEvent); if (oclEvent) { castToObjectOrAbort(*oclEvent)->setCmdType(cmdType); } return status; } void CommandQueue::updateFromCompletionStamp(const CompletionStamp &completionStamp, Event *outEvent) { DEBUG_BREAK_IF(this->taskLevel > completionStamp.taskLevel); DEBUG_BREAK_IF(this->taskCount > completionStamp.taskCount); if (completionStamp.taskCount != CompletionStamp::notReady) { taskCount = completionStamp.taskCount; } flushStamp->setStamp(completionStamp.flushStamp); this->taskLevel = completionStamp.taskLevel; if (outEvent) { outEvent->updateCompletionStamp(completionStamp.taskCount, outEvent->peekBcsTaskCountFromCommandQueue(), completionStamp.taskLevel, completionStamp.flushStamp); FileLoggerInstance().log(DebugManager.flags.EventsDebugEnable.get(), "updateCompletionStamp Event", outEvent, "taskLevel", outEvent->taskLevel.load()); } } bool CommandQueue::setPerfCountersEnabled() { DEBUG_BREAK_IF(device == nullptr); auto perfCounters = device->getPerformanceCounters(); bool isCcsEngine = EngineHelpers::isCcs(getGpgpuEngine().osContext->getEngineType()); perfCountersEnabled = perfCounters->enable(isCcsEngine); if (!perfCountersEnabled) { perfCounters->shutdown(); } return perfCountersEnabled; } PerformanceCounters *CommandQueue::getPerfCounters() { return device->getPerformanceCounters(); } cl_int CommandQueue::enqueueWriteMemObjForUnmap(MemObj *memObj, void *mappedPtr, EventsRequest &eventsRequest) { cl_int retVal = CL_SUCCESS; MapInfo unmapInfo; if (!memObj->findMappedPtr(mappedPtr, unmapInfo)) { return CL_INVALID_VALUE; } if (!unmapInfo.readOnly) { memObj->getMapAllocation(getDevice().getRootDeviceIndex())->setAubWritable(true, GraphicsAllocation::defaultBank); memObj->getMapAllocation(getDevice().getRootDeviceIndex())->setTbxWritable(true, GraphicsAllocation::defaultBank); if (memObj->peekClMemObjType() == CL_MEM_OBJECT_BUFFER) { auto buffer = castToObject(memObj); retVal = enqueueWriteBuffer(buffer, CL_FALSE, unmapInfo.offset[0], unmapInfo.size[0], mappedPtr, memObj->getMapAllocation(getDevice().getRootDeviceIndex()), eventsRequest.numEventsInWaitList, eventsRequest.eventWaitList, eventsRequest.outEvent); } else { auto image = castToObjectOrAbort(memObj); size_t writeOrigin[4] = {unmapInfo.offset[0], unmapInfo.offset[1], unmapInfo.offset[2], 0}; auto mipIdx = getMipLevelOriginIdx(image->peekClMemObjType()); UNRECOVERABLE_IF(mipIdx >= 4); writeOrigin[mipIdx] = unmapInfo.mipLevel; retVal = enqueueWriteImage(image, CL_FALSE, writeOrigin, &unmapInfo.size[0], image->getHostPtrRowPitch(), image->getHostPtrSlicePitch(), mappedPtr, memObj->getMapAllocation(getDevice().getRootDeviceIndex()), eventsRequest.numEventsInWaitList, eventsRequest.eventWaitList, eventsRequest.outEvent); } } else { retVal = enqueueMarkerWithWaitList(eventsRequest.numEventsInWaitList, eventsRequest.eventWaitList, eventsRequest.outEvent); } if (retVal == CL_SUCCESS) { memObj->removeMappedPtr(mappedPtr); if (eventsRequest.outEvent) { auto event = castToObject(*eventsRequest.outEvent); event->setCmdType(CL_COMMAND_UNMAP_MEM_OBJECT); } } return retVal; } void *CommandQueue::enqueueReadMemObjForMap(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet) { void *basePtr = transferProperties.memObj->getBasePtrForMap(getDevice().getRootDeviceIndex()); size_t mapPtrOffset = transferProperties.memObj->calculateOffsetForMapping(transferProperties.offset) + transferProperties.mipPtrOffset; if (transferProperties.memObj->peekClMemObjType() == CL_MEM_OBJECT_BUFFER) { mapPtrOffset += transferProperties.memObj->getOffset(); } void *returnPtr = ptrOffset(basePtr, mapPtrOffset); if (!transferProperties.memObj->addMappedPtr(returnPtr, transferProperties.memObj->calculateMappedPtrLength(transferProperties.size), transferProperties.mapFlags, transferProperties.size, transferProperties.offset, transferProperties.mipLevel, transferProperties.memObj->getMapAllocation(getDevice().getRootDeviceIndex()))) { errcodeRet = CL_INVALID_OPERATION; return nullptr; } if (transferProperties.mapFlags == CL_MAP_WRITE_INVALIDATE_REGION) { errcodeRet = enqueueMarkerWithWaitList(eventsRequest.numEventsInWaitList, eventsRequest.eventWaitList, eventsRequest.outEvent); } else { if (transferProperties.memObj->peekClMemObjType() == CL_MEM_OBJECT_BUFFER) { auto buffer = castToObject(transferProperties.memObj); errcodeRet = enqueueReadBuffer(buffer, transferProperties.blocking, transferProperties.offset[0], transferProperties.size[0], returnPtr, transferProperties.memObj->getMapAllocation(getDevice().getRootDeviceIndex()), eventsRequest.numEventsInWaitList, eventsRequest.eventWaitList, eventsRequest.outEvent); } else { auto image = castToObjectOrAbort(transferProperties.memObj); size_t readOrigin[4] = {transferProperties.offset[0], transferProperties.offset[1], transferProperties.offset[2], 0}; auto mipIdx = getMipLevelOriginIdx(image->peekClMemObjType()); UNRECOVERABLE_IF(mipIdx >= 4); readOrigin[mipIdx] = transferProperties.mipLevel; errcodeRet = enqueueReadImage(image, transferProperties.blocking, readOrigin, &transferProperties.size[0], image->getHostPtrRowPitch(), image->getHostPtrSlicePitch(), returnPtr, transferProperties.memObj->getMapAllocation(getDevice().getRootDeviceIndex()), eventsRequest.numEventsInWaitList, eventsRequest.eventWaitList, eventsRequest.outEvent); } } if (errcodeRet != CL_SUCCESS) { transferProperties.memObj->removeMappedPtr(returnPtr); return nullptr; } if (eventsRequest.outEvent) { auto event = castToObject(*eventsRequest.outEvent); event->setCmdType(transferProperties.cmdType); } return returnPtr; } void *CommandQueue::enqueueMapMemObject(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet) { if (transferProperties.memObj->mappingOnCpuAllowed()) { return cpuDataTransferHandler(transferProperties, eventsRequest, errcodeRet); } else { return enqueueReadMemObjForMap(transferProperties, eventsRequest, errcodeRet); } } cl_int CommandQueue::enqueueUnmapMemObject(TransferProperties &transferProperties, EventsRequest &eventsRequest) { cl_int retVal = CL_SUCCESS; if (transferProperties.memObj->mappingOnCpuAllowed()) { cpuDataTransferHandler(transferProperties, eventsRequest, retVal); } else { retVal = enqueueWriteMemObjForUnmap(transferProperties.memObj, transferProperties.ptr, eventsRequest); } return retVal; } void *CommandQueue::enqueueMapBuffer(Buffer *buffer, cl_bool blockingMap, cl_map_flags mapFlags, size_t offset, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, cl_int &errcodeRet) { TransferProperties transferProperties(buffer, CL_COMMAND_MAP_BUFFER, mapFlags, blockingMap != CL_FALSE, &offset, &size, nullptr, false, getDevice().getRootDeviceIndex()); EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event); return enqueueMapMemObject(transferProperties, eventsRequest, errcodeRet); } void *CommandQueue::enqueueMapImage(Image *image, cl_bool blockingMap, cl_map_flags mapFlags, const size_t *origin, const size_t *region, size_t *imageRowPitch, size_t *imageSlicePitch, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, cl_int &errcodeRet) { TransferProperties transferProperties(image, CL_COMMAND_MAP_IMAGE, mapFlags, blockingMap != CL_FALSE, const_cast(origin), const_cast(region), nullptr, false, getDevice().getRootDeviceIndex()); EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event); if (image->isMemObjZeroCopy() && image->mappingOnCpuAllowed()) { GetInfoHelper::set(imageSlicePitch, image->getImageDesc().image_slice_pitch); if (image->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { // There are differences in qPitch programming between Gen8 vs Gen9+ devices. // For Gen8 qPitch is distance in rows while Gen9+ it is in pixels. // Minimum value of qPitch is 4 and this causes slicePitch = 4*rowPitch on Gen8. // To allow zero-copy we have to tell what is correct value rowPitch which should equal to slicePitch. GetInfoHelper::set(imageRowPitch, image->getImageDesc().image_slice_pitch); } else { GetInfoHelper::set(imageRowPitch, image->getImageDesc().image_row_pitch); } } else { GetInfoHelper::set(imageSlicePitch, image->getHostPtrSlicePitch()); GetInfoHelper::set(imageRowPitch, image->getHostPtrRowPitch()); } if (Image::hasSlices(image->peekClMemObjType()) == false) { GetInfoHelper::set(imageSlicePitch, static_cast(0)); } return enqueueMapMemObject(transferProperties, eventsRequest, errcodeRet); } cl_int CommandQueue::enqueueUnmapMemObject(MemObj *memObj, void *mappedPtr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TransferProperties transferProperties(memObj, CL_COMMAND_UNMAP_MEM_OBJECT, 0, false, nullptr, nullptr, mappedPtr, false, getDevice().getRootDeviceIndex()); EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event); return enqueueUnmapMemObject(transferProperties, eventsRequest); } void CommandQueue::enqueueBlockedMapUnmapOperation(const cl_event *eventWaitList, size_t numEventsInWaitlist, MapOperationType opType, MemObj *memObj, MemObjSizeArray ©Size, MemObjOffsetArray ©Offset, bool readOnly, EventBuilder &externalEventBuilder) { EventBuilder internalEventBuilder; EventBuilder *eventBuilder; // check if event will be exposed externally if (externalEventBuilder.getEvent()) { externalEventBuilder.getEvent()->incRefInternal(); eventBuilder = &externalEventBuilder; } else { // it will be an internal event internalEventBuilder.create(this, context); eventBuilder = &internalEventBuilder; } //store task data in event auto cmd = std::unique_ptr(new CommandMapUnmap(opType, *memObj, copySize, copyOffset, readOnly, *this)); eventBuilder->getEvent()->setCommand(std::move(cmd)); //bind output event with input events eventBuilder->addParentEvents(ArrayRef(eventWaitList, numEventsInWaitlist)); eventBuilder->addParentEvent(this->virtualEvent); eventBuilder->finalize(); if (this->virtualEvent) { this->virtualEvent->decRefInternal(); } this->virtualEvent = eventBuilder->getEvent(); } bool CommandQueue::setupDebugSurface(Kernel *kernel) { auto debugSurface = getGpgpuCommandStreamReceiver().getDebugSurfaceAllocation(); auto surfaceState = ptrOffset(reinterpret_cast(kernel->getSurfaceStateHeap()), kernel->getKernelInfo().kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful); void *addressToPatch = reinterpret_cast(debugSurface->getGpuAddress()); size_t sizeToPatch = debugSurface->getUnderlyingBufferSize(); Buffer::setSurfaceState(&device->getDevice(), surfaceState, false, false, sizeToPatch, addressToPatch, 0, debugSurface, 0, 0, kernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, kernel->areMultipleSubDevicesInContext()); return true; } bool CommandQueue::validateCapability(cl_command_queue_capabilities_intel capability) const { return this->queueCapabilities == CL_QUEUE_DEFAULT_CAPABILITIES_INTEL || isValueSet(this->queueCapabilities, capability); } bool CommandQueue::validateCapabilitiesForEventWaitList(cl_uint numEventsInWaitList, const cl_event *waitList) const { for (cl_uint eventIndex = 0u; eventIndex < numEventsInWaitList; eventIndex++) { const Event *event = castToObject(waitList[eventIndex]); if (event->isUserEvent()) { continue; } const CommandQueue *eventCommandQueue = event->getCommandQueue(); const bool crossQueue = this != eventCommandQueue; const cl_command_queue_capabilities_intel createCap = crossQueue ? CL_QUEUE_CAPABILITY_CREATE_CROSS_QUEUE_EVENTS_INTEL : CL_QUEUE_CAPABILITY_CREATE_SINGLE_QUEUE_EVENTS_INTEL; const cl_command_queue_capabilities_intel waitCap = crossQueue ? CL_QUEUE_CAPABILITY_CROSS_QUEUE_EVENT_WAIT_LIST_INTEL : CL_QUEUE_CAPABILITY_SINGLE_QUEUE_EVENT_WAIT_LIST_INTEL; if (!validateCapability(waitCap) || !eventCommandQueue->validateCapability(createCap)) { return false; } } return true; } bool CommandQueue::validateCapabilityForOperation(cl_command_queue_capabilities_intel capability, cl_uint numEventsInWaitList, const cl_event *waitList, const cl_event *outEvent) const { const bool operationValid = validateCapability(capability); const bool waitListValid = validateCapabilitiesForEventWaitList(numEventsInWaitList, waitList); const bool outEventValid = outEvent == nullptr || validateCapability(CL_QUEUE_CAPABILITY_CREATE_SINGLE_QUEUE_EVENTS_INTEL) || validateCapability(CL_QUEUE_CAPABILITY_CREATE_CROSS_QUEUE_EVENTS_INTEL); return operationValid && waitListValid && outEventValid; } cl_uint CommandQueue::getQueueFamilyIndex() const { if (isQueueFamilySelected()) { return queueFamilyIndex; } else { const auto &hwInfo = device->getHardwareInfo(); const auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); const auto engineGroupType = hwHelper.getEngineGroupType(gpgpuEngine->getEngineType(), gpgpuEngine->getEngineUsage(), hwInfo); const auto familyIndex = device->getDevice().getEngineGroupIndexFromEngineGroupType(engineGroupType); return static_cast(familyIndex); } } void CommandQueue::updateBcsTaskCount(aub_stream::EngineType bcsEngineType, uint32_t newBcsTaskCount) { CopyEngineState &state = bcsStates[EngineHelpers::getBcsIndex(bcsEngineType)]; state.engineType = bcsEngineType; state.taskCount = newBcsTaskCount; } uint32_t CommandQueue::peekBcsTaskCount(aub_stream::EngineType bcsEngineType) const { const CopyEngineState &state = bcsStates[EngineHelpers::getBcsIndex(bcsEngineType)]; return state.taskCount; } bool CommandQueue::isTextureCacheFlushNeeded(uint32_t commandType) const { return commandType == CL_COMMAND_COPY_IMAGE && getGpgpuCommandStreamReceiver().isDirectSubmissionEnabled(); } IndirectHeap &CommandQueue::getIndirectHeap(IndirectHeap::Type heapType, size_t minRequiredSize) { return getGpgpuCommandStreamReceiver().getIndirectHeap(heapType, minRequiredSize); } void CommandQueue::allocateHeapMemory(IndirectHeap::Type heapType, size_t minRequiredSize, IndirectHeap *&indirectHeap) { getGpgpuCommandStreamReceiver().allocateHeapMemory(heapType, minRequiredSize, indirectHeap); } void CommandQueue::releaseIndirectHeap(IndirectHeap::Type heapType) { getGpgpuCommandStreamReceiver().releaseIndirectHeap(heapType); } void CommandQueue::obtainNewTimestampPacketNodes(size_t numberOfNodes, TimestampPacketContainer &previousNodes, bool clearAllDependencies, CommandStreamReceiver &csr) { TagAllocatorBase *allocator = csr.getTimestampPacketAllocator(); previousNodes.swapNodes(*timestampPacketContainer); if (clearAllDependencies) { previousNodes.moveNodesToNewContainer(*deferredTimestampPackets); } DEBUG_BREAK_IF(timestampPacketContainer->peekNodes().size() > 0); for (size_t i = 0; i < numberOfNodes; i++) { timestampPacketContainer->add(allocator->getTag()); } } size_t CommandQueue::estimateTimestampPacketNodesCount(const MultiDispatchInfo &dispatchInfo) const { size_t nodesCount = dispatchInfo.size(); auto mainKernel = dispatchInfo.peekMainKernel(); if (obtainTimestampPacketForCacheFlush(mainKernel->requiresCacheFlushCommand(*this))) { nodesCount++; } return nodesCount; } bool CommandQueue::bufferCpuCopyAllowed(Buffer *buffer, cl_command_type commandType, cl_bool blocking, size_t size, void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList) { auto debugVariableSet = false; // Requested by debug variable or allowed by Buffer if (CL_COMMAND_READ_BUFFER == commandType && DebugManager.flags.DoCpuCopyOnReadBuffer.get() != -1) { if (DebugManager.flags.DoCpuCopyOnReadBuffer.get() == 0) { return false; } debugVariableSet = true; } if (CL_COMMAND_WRITE_BUFFER == commandType && DebugManager.flags.DoCpuCopyOnWriteBuffer.get() != -1) { if (DebugManager.flags.DoCpuCopyOnWriteBuffer.get() == 0) { return false; } debugVariableSet = true; } //if we are blocked by user events, we can't service the call on CPU if (Event::checkUserEventDependencies(numEventsInWaitList, eventWaitList)) { return false; } //check if buffer is compatible if (!buffer->isReadWriteOnCpuAllowed(device->getDevice())) { return false; } if (buffer->getMemoryManager() && buffer->getMemoryManager()->isCpuCopyRequired(ptr)) { return true; } if (debugVariableSet) { return true; } //non blocking transfers are not expected to be serviced by CPU //we do not want to artifically stall the pipeline to allow CPU access if (blocking == CL_FALSE) { return false; } //check if it is beneficial to do transfer on CPU if (!buffer->isReadWriteOnCpuPreferred(ptr, size, getDevice())) { return false; } //make sure that event wait list is empty if (numEventsInWaitList == 0) { return true; } return false; } bool CommandQueue::queueDependenciesClearRequired() const { return isOOQEnabled() || DebugManager.flags.OmitTimestampPacketDependencies.get(); } bool CommandQueue::blitEnqueueAllowed(const CsrSelectionArgs &args) const { bool blitEnqueueAllowed = getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled() || this->isCopyOnly; if (DebugManager.flags.EnableBlitterForEnqueueOperations.get() != -1) { blitEnqueueAllowed = DebugManager.flags.EnableBlitterForEnqueueOperations.get(); } if (!blitEnqueueAllowed) { return false; } switch (args.cmdType) { case CL_COMMAND_READ_BUFFER: case CL_COMMAND_WRITE_BUFFER: case CL_COMMAND_COPY_BUFFER: case CL_COMMAND_READ_BUFFER_RECT: case CL_COMMAND_WRITE_BUFFER_RECT: case CL_COMMAND_COPY_BUFFER_RECT: case CL_COMMAND_SVM_MEMCPY: case CL_COMMAND_SVM_MAP: case CL_COMMAND_SVM_UNMAP: return true; case CL_COMMAND_READ_IMAGE: return blitEnqueueImageAllowed(args.srcResource.imageOrigin, args.size, *args.srcResource.image); case CL_COMMAND_WRITE_IMAGE: return blitEnqueueImageAllowed(args.dstResource.imageOrigin, args.size, *args.dstResource.image); case CL_COMMAND_COPY_IMAGE: return blitEnqueueImageAllowed(args.srcResource.imageOrigin, args.size, *args.srcResource.image) && blitEnqueueImageAllowed(args.dstResource.imageOrigin, args.size, *args.dstResource.image); default: return false; } } bool CommandQueue::blitEnqueueImageAllowed(const size_t *origin, const size_t *region, const Image &image) const { const auto &hwInfo = device->getHardwareInfo(); const auto &hwInfoConfig = HwInfoConfig::get(hwInfo.platform.eProductFamily); auto blitEnqueueImageAllowed = hwInfoConfig->isBlitterForImagesSupported(); if (DebugManager.flags.EnableBlitterForEnqueueImageOperations.get() != -1) { blitEnqueueImageAllowed = DebugManager.flags.EnableBlitterForEnqueueImageOperations.get(); } blitEnqueueImageAllowed &= !isMipMapped(image.getImageDesc()); const auto &defaultGmm = image.getGraphicsAllocation(device->getRootDeviceIndex())->getDefaultGmm(); if (defaultGmm != nullptr) { auto isTile64 = defaultGmm->gmmResourceInfo->getResourceFlags()->Info.Tile64; auto imageType = image.getImageDesc().image_type; if (isTile64 && (imageType == CL_MEM_OBJECT_IMAGE3D)) { blitEnqueueImageAllowed &= hwInfoConfig->isTile64With3DSurfaceOnBCSSupported(hwInfo); } } return blitEnqueueImageAllowed; } bool CommandQueue::isBlockedCommandStreamRequired(uint32_t commandType, const EventsRequest &eventsRequest, bool blockedQueue, bool isMarkerWithProfiling) const { if (!blockedQueue) { return false; } if (isCacheFlushCommand(commandType) || !isCommandWithoutKernel(commandType) || isMarkerWithProfiling) { return true; } if (CL_COMMAND_BARRIER == commandType || CL_COMMAND_MARKER == commandType) { auto timestampPacketWriteEnabled = getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled(); if (timestampPacketWriteEnabled || context->getRootDeviceIndices().size() > 1) { for (size_t i = 0; i < eventsRequest.numEventsInWaitList; i++) { auto waitlistEvent = castToObjectOrAbort(eventsRequest.eventWaitList[i]); if (timestampPacketWriteEnabled && waitlistEvent->getTimestampPacketNodes()) { return true; } if (waitlistEvent->getCommandQueue() && waitlistEvent->getCommandQueue()->getDevice().getRootDeviceIndex() != this->getDevice().getRootDeviceIndex()) { return true; } } } } return false; } void CommandQueue::storeProperties(const cl_queue_properties *properties) { if (properties) { for (size_t i = 0; properties[i] != 0; i += 2) { propertiesVector.push_back(properties[i]); propertiesVector.push_back(properties[i + 1]); } propertiesVector.push_back(0); } } void CommandQueue::processProperties(const cl_queue_properties *properties) { if (properties != nullptr) { bool specificEngineSelected = false; cl_uint selectedQueueFamilyIndex = std::numeric_limits::max(); cl_uint selectedQueueIndex = std::numeric_limits::max(); for (auto currentProperties = properties; *currentProperties != 0; currentProperties += 2) { switch (*currentProperties) { case CL_QUEUE_FAMILY_INTEL: selectedQueueFamilyIndex = static_cast(*(currentProperties + 1)); specificEngineSelected = true; break; case CL_QUEUE_INDEX_INTEL: selectedQueueIndex = static_cast(*(currentProperties + 1)); specificEngineSelected = true; break; } } if (specificEngineSelected) { this->queueFamilySelected = true; if (!getDevice().hasRootCsr()) { const auto &engine = getDevice().getRegularEngineGroups()[selectedQueueFamilyIndex].engines[selectedQueueIndex]; auto engineType = engine.getEngineType(); auto engineUsage = engine.getEngineUsage(); if ((DebugManager.flags.EngineUsageHint.get() != -1) && (getDevice().tryGetEngine(engineType, static_cast(DebugManager.flags.EngineUsageHint.get())) != nullptr)) { engineUsage = static_cast(DebugManager.flags.EngineUsageHint.get()); } this->overrideEngine(engineType, engineUsage); this->queueCapabilities = getClDevice().getDeviceInfo().queueFamilyProperties[selectedQueueFamilyIndex].capabilities; this->queueFamilyIndex = selectedQueueFamilyIndex; this->queueIndexWithinFamily = selectedQueueIndex; } } } requiresCacheFlushAfterWalker = device && (device->getDeviceInfo().parentDevice != nullptr); } void CommandQueue::overrideEngine(aub_stream::EngineType engineType, EngineUsage engineUsage) { const HardwareInfo &hwInfo = getDevice().getHardwareInfo(); const HwHelper &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); const EngineGroupType engineGroupType = hwHelper.getEngineGroupType(engineType, engineUsage, hwInfo); const bool isEngineCopyOnly = EngineHelper::isCopyOnlyEngineType(engineGroupType); if (isEngineCopyOnly) { std::fill(bcsEngines.begin(), bcsEngines.end(), nullptr); bcsEngines[EngineHelpers::getBcsIndex(engineType)] = &device->getEngine(engineType, EngineUsage::Regular); bcsEngineTypes = {engineType}; timestampPacketContainer = std::make_unique(); deferredTimestampPackets = std::make_unique(); isCopyOnly = true; } else { gpgpuEngine = &device->getEngine(engineType, engineUsage); } } void CommandQueue::aubCaptureHook(bool &blocking, bool &clearAllDependencies, const MultiDispatchInfo &multiDispatchInfo) { if (DebugManager.flags.AUBDumpSubCaptureMode.get()) { auto status = getGpgpuCommandStreamReceiver().checkAndActivateAubSubCapture(multiDispatchInfo.empty() ? "" : multiDispatchInfo.peekMainKernel()->getDescriptor().kernelMetadata.kernelName); if (!status.isActive) { // make each enqueue blocking when subcapture is not active to split batch buffer blocking = true; } else if (!status.wasActiveInPreviousEnqueue) { // omit timestamp packet dependencies dependencies upon subcapture activation clearAllDependencies = true; } } if (getGpgpuCommandStreamReceiver().getType() > CommandStreamReceiverType::CSR_HW) { for (auto &dispatchInfo : multiDispatchInfo) { auto &kernelName = dispatchInfo.getKernel()->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName; getGpgpuCommandStreamReceiver().addAubComment(kernelName.c_str()); } } } void CommandQueue::assignDataToOverwrittenBcsNode(TagNodeBase *node) { std::array timestampData; timestampData.fill(std::numeric_limits::max()); if (node->refCountFetchSub(0) <= 2) { //One ref from deferred container and one from bcs barrier container it is going to be released from for (uint32_t i = 0; i < node->getPacketsUsed(); i++) { node->assignDataToAllTimestamps(i, timestampData.data()); } } } bool CommandQueue::isWaitForTimestampsEnabled() const { auto &hwHelper = HwHelper::get(getDevice().getHardwareInfo().platform.eRenderCoreFamily); auto enabled = CommandQueue::isTimestampWaitEnabled(); enabled &= hwHelper.isTimestampWaitSupported(); switch (DebugManager.flags.EnableTimestampWait.get()) { case 0: enabled = false; break; case 1: enabled = getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled(); break; case 2: enabled = getGpgpuCommandStreamReceiver().isDirectSubmissionEnabled(); break; case 3: enabled = getGpgpuCommandStreamReceiver().isAnyDirectSubmissionEnabled(); break; case 4: enabled = true; break; } return enabled; } WaitStatus CommandQueue::waitForAllEngines(bool blockedQueue, PrintfHandler *printfHandler, bool cleanTemporaryAllocationsList) { if (blockedQueue) { while (isQueueBlocked()) { } } auto waitedOnTimestamps = waitForTimestamps(taskCount); TimestampPacketContainer nodesToRelease; if (deferredTimestampPackets) { deferredTimestampPackets->swapNodes(nodesToRelease); } StackVec activeBcsStates{}; for (CopyEngineState &state : this->bcsStates) { if (state.isValid()) { activeBcsStates.push_back(state); } } const auto waitStatus = waitUntilComplete(taskCount, activeBcsStates, flushStamp->peekStamp(), false, cleanTemporaryAllocationsList, waitedOnTimestamps); if (printfHandler) { printfHandler->printEnqueueOutput(); } return waitStatus; } void CommandQueue::setupBarrierTimestampForBcsEngines(aub_stream::EngineType engineType, TimestampPacketDependencies ×tampPacketDependencies) { if (!getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired()) { return; } // Ensure we have exactly 1 barrier node. if (timestampPacketDependencies.barrierNodes.peekNodes().empty()) { timestampPacketDependencies.barrierNodes.add(getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag()); } if (isOOQEnabled()) { // Barrier node will be signalled on gpgpuCsr. Save it for later use on blitters. for (auto currentBcsIndex = 0u; currentBcsIndex < bcsTimestampPacketContainers.size(); currentBcsIndex++) { const auto currentBcsEngineType = EngineHelpers::mapBcsIndexToEngineType(currentBcsIndex, true); if (currentBcsEngineType == engineType) { // Node is already added to barrierNodes for this engine, no need to save it. continue; } // Save latest timestamp (override previous, if any). if (!bcsTimestampPacketContainers[currentBcsIndex].lastBarrierToWaitFor.peekNodes().empty()) { for (auto &node : bcsTimestampPacketContainers[currentBcsIndex].lastBarrierToWaitFor.peekNodes()) { this->assignDataToOverwrittenBcsNode(node); } } TimestampPacketContainer newContainer{}; newContainer.assignAndIncrementNodesRefCounts(timestampPacketDependencies.barrierNodes); bcsTimestampPacketContainers[currentBcsIndex].lastBarrierToWaitFor.swapNodes(newContainer); } } } void CommandQueue::processBarrierTimestampForBcsEngine(aub_stream::EngineType bcsEngineType, TimestampPacketDependencies &blitDependencies) { BcsTimestampPacketContainers &bcsContainers = bcsTimestampPacketContainers[EngineHelpers::getBcsIndex(bcsEngineType)]; bcsContainers.lastBarrierToWaitFor.moveNodesToNewContainer(blitDependencies.barrierNodes); } void CommandQueue::setLastBcsPacket(aub_stream::EngineType bcsEngineType) { if (isOOQEnabled()) { TimestampPacketContainer dummyContainer{}; dummyContainer.assignAndIncrementNodesRefCounts(*this->timestampPacketContainer); BcsTimestampPacketContainers &bcsContainers = bcsTimestampPacketContainers[EngineHelpers::getBcsIndex(bcsEngineType)]; bcsContainers.lastSignalledPacket.swapNodes(dummyContainer); } } void CommandQueue::fillCsrDependenciesWithLastBcsPackets(CsrDependencies &csrDeps) { for (BcsTimestampPacketContainers &bcsContainers : bcsTimestampPacketContainers) { if (bcsContainers.lastSignalledPacket.peekNodes().empty()) { continue; } csrDeps.timestampPacketContainer.push_back(&bcsContainers.lastSignalledPacket); } } void CommandQueue::clearLastBcsPackets() { for (BcsTimestampPacketContainers &bcsContainers : bcsTimestampPacketContainers) { bcsContainers.lastSignalledPacket.moveNodesToNewContainer(*deferredTimestampPackets); } } } // namespace NEO compute-runtime-22.14.22890/opencl/source/command_queue/command_queue.h000066400000000000000000000542761422164147700257650ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/engine_control.h" #include "shared/source/utilities/range.h" #include "opencl/source/command_queue/copy_engine_state.h" #include "opencl/source/command_queue/csr_selection_args.h" #include "opencl/source/event/event.h" #include "opencl/source/helpers/base_object.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/source/helpers/enqueue_properties.h" #include "opencl/source/helpers/task_information.h" #include #include namespace NEO { class BarrierCommand; class Buffer; class LinearStream; class ClDevice; class Context; class Device; class Event; class EventBuilder; class FlushStampTracker; class Image; class IndirectHeap; class Kernel; class MemObj; class PerformanceCounters; struct CompletionStamp; struct MultiDispatchInfo; enum class QueuePriority { LOW, MEDIUM, HIGH }; template <> struct OpenCLObjectMapper<_cl_command_queue> { typedef class CommandQueue DerivedType; }; class CommandQueue : public BaseObject<_cl_command_queue> { public: static const cl_ulong objectMagic = 0x1234567890987654LL; static CommandQueue *create(Context *context, ClDevice *device, const cl_queue_properties *properties, bool internalUsage, cl_int &errcodeRet); CommandQueue() = delete; CommandQueue(Context *context, ClDevice *device, const cl_queue_properties *properties, bool internalUsage); CommandQueue &operator=(const CommandQueue &) = delete; CommandQueue(const CommandQueue &) = delete; ~CommandQueue() override; // API entry points virtual cl_int enqueueCopyImage(Image *srcImage, Image *dstImage, const size_t *srcOrigin, const size_t *dstOrigin, const size_t *region, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueFillImage(Image *image, const void *fillColor, const size_t *origin, const size_t *region, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueFillBuffer(Buffer *buffer, const void *pattern, size_t patternSize, size_t offset, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueKernel(Kernel *kernel, cl_uint workDim, const size_t *globalWorkOffset, const size_t *globalWorkSize, const size_t *localWorkSize, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueBarrierWithWaitList(cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; MOCKABLE_VIRTUAL void *enqueueMapBuffer(Buffer *buffer, cl_bool blockingMap, cl_map_flags mapFlags, size_t offset, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, cl_int &errcodeRet); MOCKABLE_VIRTUAL void *enqueueMapImage(Image *image, cl_bool blockingMap, cl_map_flags mapFlags, const size_t *origin, const size_t *region, size_t *imageRowPitch, size_t *imageSlicePitch, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, cl_int &errcodeRet); MOCKABLE_VIRTUAL cl_int enqueueUnmapMemObject(MemObj *memObj, void *mappedPtr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); virtual cl_int enqueueSVMMap(cl_bool blockingMap, cl_map_flags mapFlags, void *svmPtr, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool externalAppCall) = 0; virtual cl_int enqueueSVMUnmap(void *svmPtr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool externalAppCall) = 0; virtual cl_int enqueueSVMFree(cl_uint numSvmPointers, void *svmPointers[], void(CL_CALLBACK *pfnFreeFunc)(cl_command_queue queue, cl_uint numSvmPointers, void *svmPointers[], void *userData), void *userData, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueSVMMemcpy(cl_bool blockingCopy, void *dstPtr, const void *srcPtr, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueSVMMemFill(void *svmPtr, const void *pattern, size_t patternSize, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueMarkerWithWaitList(cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueMigrateMemObjects(cl_uint numMemObjects, const cl_mem *memObjects, cl_mem_migration_flags flags, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueSVMMigrateMem(cl_uint numSvmPointers, const void **svmPointers, const size_t *sizes, const cl_mem_migration_flags flags, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueCopyBuffer(Buffer *srcBuffer, Buffer *dstBuffer, size_t srcOffset, size_t dstOffset, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueReadBuffer(Buffer *buffer, cl_bool blockingRead, size_t offset, size_t size, void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueReadImage(Image *srcImage, cl_bool blockingRead, const size_t *origin, const size_t *region, size_t rowPitch, size_t slicePitch, void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueWriteBuffer(Buffer *buffer, cl_bool blockingWrite, size_t offset, size_t cb, const void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueWriteImage(Image *dstImage, cl_bool blockingWrite, const size_t *origin, const size_t *region, size_t inputRowPitch, size_t inputSlicePitch, const void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueCopyBufferRect(Buffer *srcBuffer, Buffer *dstBuffer, const size_t *srcOrigin, const size_t *dstOrigin, const size_t *region, size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueWriteBufferRect(Buffer *buffer, cl_bool blockingWrite, const size_t *bufferOrigin, const size_t *hostOrigin, const size_t *region, size_t bufferRowPitch, size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch, const void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueReadBufferRect(Buffer *buffer, cl_bool blockingRead, const size_t *bufferOrigin, const size_t *hostOrigin, const size_t *region, size_t bufferRowPitch, size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch, void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueCopyBufferToImage(Buffer *srcBuffer, Image *dstImage, size_t srcOffset, const size_t *dstOrigin, const size_t *region, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueCopyImageToBuffer(Image *srcImage, Buffer *dstBuffer, const size_t *srcOrigin, const size_t *region, size_t dstOffset, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; cl_int enqueueAcquireSharedObjects(cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *oclEvent, cl_uint cmdType); cl_int enqueueReleaseSharedObjects(cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *oclEvent, cl_uint cmdType); MOCKABLE_VIRTUAL void *cpuDataTransferHandler(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &retVal); virtual cl_int enqueueResourceBarrier(BarrierCommand *resourceBarrier, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int finish() = 0; virtual cl_int flush() = 0; void updateFromCompletionStamp(const CompletionStamp &completionStamp, Event *outEvent); virtual bool isCacheFlushCommand(uint32_t commandType) const { return false; } cl_int getCommandQueueInfo(cl_command_queue_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); uint32_t getHwTag() const; volatile uint32_t *getHwTagAddress() const; bool isCompleted(uint32_t gpgpuTaskCount, CopyEngineState bcsState) const; bool isWaitForTimestampsEnabled() const; virtual bool waitForTimestamps(uint32_t taskCount) = 0; MOCKABLE_VIRTUAL bool isQueueBlocked(); MOCKABLE_VIRTUAL WaitStatus waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait); MOCKABLE_VIRTUAL WaitStatus waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) { return this->waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, true, false); } MOCKABLE_VIRTUAL WaitStatus waitForAllEngines(bool blockedQueue, PrintfHandler *printfHandler, bool cleanTemporaryAllocationsList); MOCKABLE_VIRTUAL WaitStatus waitForAllEngines(bool blockedQueue, PrintfHandler *printfHandler) { return this->waitForAllEngines(blockedQueue, printfHandler, true); } static uint32_t getTaskLevelFromWaitList(uint32_t taskLevel, cl_uint numEventsInWaitList, const cl_event *eventWaitList); MOCKABLE_VIRTUAL CommandStreamReceiver &getGpgpuCommandStreamReceiver() const; MOCKABLE_VIRTUAL CommandStreamReceiver *getBcsCommandStreamReceiver(aub_stream::EngineType bcsEngineType) const; CommandStreamReceiver *getBcsForAuxTranslation() const; MOCKABLE_VIRTUAL CommandStreamReceiver &selectCsrForBuiltinOperation(const CsrSelectionArgs &args) const; Device &getDevice() const noexcept; ClDevice &getClDevice() const { return *device; } Context &getContext() const { return *context; } Context *getContextPtr() const { return context; } EngineControl &getGpgpuEngine() const { return *gpgpuEngine; } MOCKABLE_VIRTUAL LinearStream &getCS(size_t minRequiredSize); IndirectHeap &getIndirectHeap(IndirectHeap::Type heapType, size_t minRequiredSize); void allocateHeapMemory(IndirectHeap::Type heapType, size_t minRequiredSize, IndirectHeap *&indirectHeap); static bool isAssignEngineRoundRobinEnabled(); static bool isTimestampWaitEnabled(); MOCKABLE_VIRTUAL void releaseIndirectHeap(IndirectHeap::Type heapType); void releaseVirtualEvent() { if (this->virtualEvent != nullptr) { this->virtualEvent->decRefInternal(); this->virtualEvent = nullptr; } } cl_command_queue_properties getCommandQueueProperties() const { return commandQueueProperties; } bool isProfilingEnabled() const { return !!(this->getCommandQueueProperties() & CL_QUEUE_PROFILING_ENABLE); } bool isOOQEnabled() const { return !!(this->getCommandQueueProperties() & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE); } bool isPerfCountersEnabled() const { return perfCountersEnabled; } PerformanceCounters *getPerfCounters(); bool setPerfCountersEnabled(); void setIsSpecialCommandQueue(bool newValue) { this->isSpecialCommandQueue = newValue; } bool isSpecial() { return this->isSpecialCommandQueue; } QueuePriority getPriority() const { return priority; } QueueThrottle getThrottle() const { return throttle; } const TimestampPacketContainer *getTimestampPacketContainer() const { return timestampPacketContainer.get(); } const std::vector &getPropertiesVector() const { return propertiesVector; } void enqueueBlockedMapUnmapOperation(const cl_event *eventWaitList, size_t numEventsInWaitlist, MapOperationType opType, MemObj *memObj, MemObjSizeArray ©Size, MemObjOffsetArray ©Offset, bool readOnly, EventBuilder &externalEventBuilder); MOCKABLE_VIRTUAL bool setupDebugSurface(Kernel *kernel); bool validateCapability(cl_command_queue_capabilities_intel capability) const; bool validateCapabilitiesForEventWaitList(cl_uint numEventsInWaitList, const cl_event *waitList) const; bool validateCapabilityForOperation(cl_command_queue_capabilities_intel capability, cl_uint numEventsInWaitList, const cl_event *waitList, const cl_event *outEvent) const; cl_uint getQueueFamilyIndex() const; cl_uint getQueueIndexWithinFamily() const { return queueIndexWithinFamily; } bool isQueueFamilySelected() const { return queueFamilySelected; } bool getRequiresCacheFlushAfterWalker() const { return requiresCacheFlushAfterWalker; } template static PtrType convertAddressWithOffsetToGpuVa(PtrType ptr, InternalMemoryType memoryType, GraphicsAllocation &allocation); void updateBcsTaskCount(aub_stream::EngineType bcsEngineType, uint32_t newBcsTaskCount); uint32_t peekBcsTaskCount(aub_stream::EngineType bcsEngineType) const; void updateLatestSentEnqueueType(EnqueueProperties::Operation newEnqueueType) { this->latestSentEnqueueType = newEnqueueType; } EnqueueProperties::Operation peekLatestSentEnqueueOperation() { return this->latestSentEnqueueType; } void setupBarrierTimestampForBcsEngines(aub_stream::EngineType engineType, TimestampPacketDependencies ×tampPacketDependencies); void processBarrierTimestampForBcsEngine(aub_stream::EngineType bcsEngineType, TimestampPacketDependencies &blitDependencies); void setLastBcsPacket(aub_stream::EngineType bcsEngineType); void fillCsrDependenciesWithLastBcsPackets(CsrDependencies &csrDeps); void clearLastBcsPackets(); // taskCount of last task uint32_t taskCount = 0; // current taskLevel. Used for determining if a PIPE_CONTROL is needed. uint32_t taskLevel = 0; std::unique_ptr flushStamp; // virtual event that holds last Enqueue information Event *virtualEvent = nullptr; size_t estimateTimestampPacketNodesCount(const MultiDispatchInfo &dispatchInfo) const; uint64_t getSliceCount() const { return sliceCount; } TimestampPacketContainer *getDeferredTimestampPackets() const { return deferredTimestampPackets.get(); } uint64_t dispatchHints = 0; bool isTextureCacheFlushNeeded(uint32_t commandType) const; protected: void *enqueueReadMemObjForMap(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet); cl_int enqueueWriteMemObjForUnmap(MemObj *memObj, void *mappedPtr, EventsRequest &eventsRequest); void *enqueueMapMemObject(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet); cl_int enqueueUnmapMemObject(TransferProperties &transferProperties, EventsRequest &eventsRequest); virtual void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType){}; bool isBlockedCommandStreamRequired(uint32_t commandType, const EventsRequest &eventsRequest, bool blockedQueue, bool isMarkerWithProfiling) const; MOCKABLE_VIRTUAL void obtainNewTimestampPacketNodes(size_t numberOfNodes, TimestampPacketContainer &previousNodes, bool clearAllDependencies, CommandStreamReceiver &csr); void storeProperties(const cl_queue_properties *properties); void processProperties(const cl_queue_properties *properties); void overrideEngine(aub_stream::EngineType engineType, EngineUsage engineUsage); bool bufferCpuCopyAllowed(Buffer *buffer, cl_command_type commandType, cl_bool blocking, size_t size, void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList); void providePerformanceHint(TransferProperties &transferProperties); bool queueDependenciesClearRequired() const; bool blitEnqueueAllowed(const CsrSelectionArgs &args) const; inline bool shouldFlushDC(uint32_t commandType, PrintfHandler *printfHandler) const { return (commandType == CL_COMMAND_READ_BUFFER || commandType == CL_COMMAND_READ_BUFFER_RECT || commandType == CL_COMMAND_READ_IMAGE || commandType == CL_COMMAND_SVM_MAP || printfHandler || isTextureCacheFlushNeeded(commandType)); } MOCKABLE_VIRTUAL bool blitEnqueueImageAllowed(const size_t *origin, const size_t *region, const Image &image) const; void aubCaptureHook(bool &blocking, bool &clearAllDependencies, const MultiDispatchInfo &multiDispatchInfo); virtual bool obtainTimestampPacketForCacheFlush(bool isCacheFlushRequired) const = 0; void assignDataToOverwrittenBcsNode(TagNodeBase *node); Context *context = nullptr; ClDevice *device = nullptr; EngineControl *gpgpuEngine = nullptr; std::array bcsEngines = {}; std::vector bcsEngineTypes = {}; cl_command_queue_properties commandQueueProperties = 0; std::vector propertiesVector; cl_command_queue_capabilities_intel queueCapabilities = CL_QUEUE_DEFAULT_CAPABILITIES_INTEL; cl_uint queueFamilyIndex = 0; cl_uint queueIndexWithinFamily = 0; bool queueFamilySelected = false; QueuePriority priority = QueuePriority::MEDIUM; QueueThrottle throttle = QueueThrottle::MEDIUM; EnqueueProperties::Operation latestSentEnqueueType = EnqueueProperties::Operation::None; uint64_t sliceCount = QueueSliceCount::defaultSliceCount; std::array bcsStates = {}; bool perfCountersEnabled = false; bool isCopyOnly = false; LinearStream *commandStream = nullptr; bool isSpecialCommandQueue = false; bool requiresCacheFlushAfterWalker = false; std::unique_ptr deferredTimestampPackets; std::unique_ptr timestampPacketContainer; struct BcsTimestampPacketContainers { TimestampPacketContainer lastBarrierToWaitFor; TimestampPacketContainer lastSignalledPacket; }; std::array bcsTimestampPacketContainers; }; template PtrType CommandQueue::convertAddressWithOffsetToGpuVa(PtrType ptr, InternalMemoryType memoryType, GraphicsAllocation &allocation) { // If this is device or shared USM pointer, it is already a gpuVA and we don't have to do anything. // Otherwise, we assume this is a cpuVA and we have to convert to gpuVA, while preserving offset from allocation start. const bool isCpuPtr = (memoryType != DEVICE_UNIFIED_MEMORY) && (memoryType != SHARED_UNIFIED_MEMORY); if (isCpuPtr) { size_t dstOffset = ptrDiff(ptr, allocation.getUnderlyingBuffer()); ptr = reinterpret_cast(allocation.getGpuAddress() + dstOffset); } return ptr; } using CommandQueueCreateFunc = CommandQueue *(*)(Context *context, ClDevice *device, const cl_queue_properties *properties, bool internalUsage); } // namespace NEO compute-runtime-22.14.22890/opencl/source/command_queue/command_queue_hw.h000066400000000000000000000701231422164147700264500ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/helpers/engine_control.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/source/helpers/queue_helpers.h" #include "opencl/source/mem_obj/mem_obj.h" #include "opencl/source/program/printf_handler.h" #include namespace NEO { class EventBuilder; struct EnqueueProperties; template class CommandQueueHw : public CommandQueue { using BaseClass = CommandQueue; public: CommandQueueHw(Context *context, ClDevice *device, const cl_queue_properties *properties, bool internalUsage) : BaseClass(context, device, properties, internalUsage) { auto clPriority = getCmdQueueProperties(properties, CL_QUEUE_PRIORITY_KHR); if (clPriority & static_cast(CL_QUEUE_PRIORITY_LOW_KHR)) { priority = QueuePriority::LOW; this->gpgpuEngine = &device->getNearestGenericSubDevice(0)->getEngine(getChosenEngineType(device->getHardwareInfo()), EngineUsage::LowPriority); } else if (clPriority & static_cast(CL_QUEUE_PRIORITY_MED_KHR)) { priority = QueuePriority::MEDIUM; } else if (clPriority & static_cast(CL_QUEUE_PRIORITY_HIGH_KHR)) { priority = QueuePriority::HIGH; } auto clThrottle = getCmdQueueProperties(properties, CL_QUEUE_THROTTLE_KHR); if (clThrottle & static_cast(CL_QUEUE_THROTTLE_LOW_KHR)) { throttle = QueueThrottle::LOW; } else if (clThrottle & static_cast(CL_QUEUE_THROTTLE_MED_KHR)) { throttle = QueueThrottle::MEDIUM; } else if (clThrottle & static_cast(CL_QUEUE_THROTTLE_HIGH_KHR)) { throttle = QueueThrottle::HIGH; } if (internalUsage) { this->gpgpuEngine = &device->getInternalEngine(); } auto &hwInfo = device->getDevice().getHardwareInfo(); auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily); auto assignEngineRoundRobin = !internalUsage && !this->queueFamilySelected && !(clPriority & static_cast(CL_QUEUE_PRIORITY_LOW_KHR)) && hwHelper.isAssignEngineRoundRobinSupported() && this->isAssignEngineRoundRobinEnabled(); if (assignEngineRoundRobin) { this->gpgpuEngine = &device->getDevice().getNextEngineForCommandQueue(); } if (getCmdQueueProperties(properties, CL_QUEUE_PROPERTIES) & static_cast(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)) { getGpgpuCommandStreamReceiver().overrideDispatchPolicy(DispatchMode::BatchedDispatch); if (DebugManager.flags.CsrDispatchMode.get() != 0) { getGpgpuCommandStreamReceiver().overrideDispatchPolicy(static_cast(DebugManager.flags.CsrDispatchMode.get())); } getGpgpuCommandStreamReceiver().enableNTo1SubmissionModel(); } if (device->getDevice().getDebugger() && !getGpgpuCommandStreamReceiver().getDebugSurfaceAllocation()) { auto maxDbgSurfaceSize = hwHelper.getSipKernelMaxDbgSurfaceSize(hwInfo); auto debugSurface = getGpgpuCommandStreamReceiver().allocateDebugSurface(maxDbgSurfaceSize); memset(debugSurface->getUnderlyingBuffer(), 0, debugSurface->getUnderlyingBufferSize()); auto &stateSaveAreaHeader = SipKernel::getSipKernel(device->getDevice()).getStateSaveAreaHeader(); if (stateSaveAreaHeader.size() > 0) { NEO::MemoryTransferHelper::transferMemoryToAllocation(hwHelper.isBlitCopyRequiredForLocalMemory(hwInfo, *debugSurface), device->getDevice(), debugSurface, 0, stateSaveAreaHeader.data(), stateSaveAreaHeader.size()); } } uint64_t requestedSliceCount = getCmdQueueProperties(properties, CL_QUEUE_SLICE_COUNT_INTEL); if (requestedSliceCount > 0) { sliceCount = requestedSliceCount; } gpgpuEngine->osContext->ensureContextInitialized(); gpgpuEngine->commandStreamReceiver->initDirectSubmission(device->getDevice(), *gpgpuEngine->osContext); for (const EngineControl *engine : bcsEngines) { if (engine != nullptr) { engine->osContext->ensureContextInitialized(); engine->commandStreamReceiver->initDirectSubmission(device->getDevice(), *engine->osContext); } } } static CommandQueue *create(Context *context, ClDevice *device, const cl_queue_properties *properties, bool internalUsage) { return new CommandQueueHw(context, device, properties, internalUsage); } MOCKABLE_VIRTUAL void notifyEnqueueReadBuffer(Buffer *buffer, bool blockingRead, bool notifyBcsCsr); MOCKABLE_VIRTUAL void notifyEnqueueReadImage(Image *image, bool blockingRead, bool notifyBcsCsr); MOCKABLE_VIRTUAL void notifyEnqueueSVMMemcpy(GraphicsAllocation *gfxAllocation, bool blockingCopy, bool notifyBcsCsr); cl_int enqueueBarrierWithWaitList(cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override; cl_int enqueueCopyBuffer(Buffer *srcBuffer, Buffer *dstBuffer, size_t srcOffset, size_t dstOffset, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override; cl_int enqueueCopyBufferRect(Buffer *srcBuffer, Buffer *dstBuffer, const size_t *srcOrigin, const size_t *dstOrigin, const size_t *region, size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override; cl_int enqueueCopyImage(Image *srcImage, Image *dstImage, const size_t *srcOrigin, const size_t *dstOrigin, const size_t *region, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override; cl_int enqueueFillBuffer(Buffer *buffer, const void *pattern, size_t patternSize, size_t offset, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override; cl_int enqueueFillImage(Image *image, const void *fillColor, const size_t *origin, const size_t *region, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override; cl_int enqueueKernel(Kernel *kernel, cl_uint workDim, const size_t *globalWorkOffset, const size_t *globalWorkSize, const size_t *localWorkSize, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override; cl_int enqueueSVMMap(cl_bool blockingMap, cl_map_flags mapFlags, void *svmPtr, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool externalAppCall) override; cl_int enqueueSVMUnmap(void *svmPtr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool externalAppCall) override; cl_int enqueueSVMFree(cl_uint numSvmPointers, void *svmPointers[], void(CL_CALLBACK *pfnFreeFunc)(cl_command_queue queue, cl_uint numSvmPointers, void *svmPointers[], void *userData), void *userData, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override; cl_int enqueueSVMMemcpy(cl_bool blockingCopy, void *dstPtr, const void *srcPtr, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override; cl_int enqueueSVMMemFill(void *svmPtr, const void *pattern, size_t patternSize, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override; cl_int enqueueMarkerWithWaitList(cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override; cl_int enqueueMigrateMemObjects(cl_uint numMemObjects, const cl_mem *memObjects, cl_mem_migration_flags flags, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override; cl_int enqueueSVMMigrateMem(cl_uint numSvmPointers, const void **svmPointers, const size_t *sizes, const cl_mem_migration_flags flags, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override; cl_int enqueueReadBuffer(Buffer *buffer, cl_bool blockingRead, size_t offset, size_t size, void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override; cl_int enqueueReadBufferRect(Buffer *buffer, cl_bool blockingRead, const size_t *bufferOrigin, const size_t *hostOrigin, const size_t *region, size_t bufferRowPitch, size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch, void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override; cl_int enqueueReadImage(Image *srcImage, cl_bool blockingRead, const size_t *origin, const size_t *region, size_t rowPitch, size_t slicePitch, void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override; cl_int enqueueWriteBuffer(Buffer *buffer, cl_bool blockingWrite, size_t offset, size_t cb, const void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override; cl_int enqueueWriteBufferRect(Buffer *buffer, cl_bool blockingWrite, const size_t *bufferOrigin, const size_t *hostOrigin, const size_t *region, size_t bufferRowPitch, size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch, const void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override; cl_int enqueueWriteImage(Image *dstImage, cl_bool blockingWrite, const size_t *origin, const size_t *region, size_t inputRowPitch, size_t inputSlicePitch, const void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override; cl_int enqueueCopyBufferToImage(Buffer *srcBuffer, Image *dstImage, size_t srcOffset, const size_t *dstOrigin, const size_t *region, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override; cl_int enqueueCopyImageToBuffer(Image *srcImage, Buffer *dstBuffer, const size_t *srcOrigin, const size_t *region, size_t dstOffset, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override; cl_int enqueueResourceBarrier(BarrierCommand *resourceBarrier, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override; cl_int finish() override; cl_int flush() override; template cl_int enqueueHandler(Surface **surfacesForResidency, size_t numSurfaceForResidency, bool blocking, const MultiDispatchInfo &dispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); template cl_int enqueueHandler(Surface *(&surfacesForResidency)[size], bool blocking, const MultiDispatchInfo &dispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { return enqueueHandler(surfacesForResidency, size, blocking, dispatchInfo, numEventsInWaitList, eventWaitList, event); } template cl_int enqueueHandler(Surface *(&surfacesForResidency)[size], bool blocking, Kernel *kernel, cl_uint workDim, const size_t globalOffsets[3], const size_t workItems[3], const size_t *localWorkSizesIn, const size_t *enqueuedWorkSizes, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); template cl_int dispatchBcsOrGpgpuEnqueue(MultiDispatchInfo &dispatchInfo, Surface *(&surfaces)[surfaceCount], EBuiltInOps::Type builtInOperation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, CommandStreamReceiver &csr); template cl_int enqueueBlit(const MultiDispatchInfo &multiDispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, CommandStreamReceiver &bcsCsr); template CompletionStamp enqueueNonBlocked(Surface **surfacesForResidency, size_t surfaceCount, LinearStream &commandStream, size_t commandStreamStart, bool &blocking, bool clearDependenciesForSubCapture, const MultiDispatchInfo &multiDispatchInfo, const EnqueueProperties &enqueueProperties, TimestampPacketDependencies ×tampPacketDependencies, EventsRequest &eventsRequest, EventBuilder &eventBuilder, uint32_t taskLevel, PrintfHandler *printfHandler, CommandStreamReceiver *bcsCsr); void enqueueBlocked(uint32_t commandType, Surface **surfacesForResidency, size_t surfacesCount, const MultiDispatchInfo &multiDispatchInfo, TimestampPacketDependencies ×tampPacketDependencies, std::unique_ptr &blockedCommandsData, const EnqueueProperties &enqueueProperties, EventsRequest &eventsRequest, EventBuilder &externalEventBuilder, std::unique_ptr &&printfHandler, CommandStreamReceiver *bcsCsr); CompletionStamp enqueueCommandWithoutKernel(Surface **surfaces, size_t surfaceCount, LinearStream *commandStream, size_t commandStreamStart, bool &blocking, const EnqueueProperties &enqueueProperties, TimestampPacketDependencies ×tampPacketDependencies, EventsRequest &eventsRequest, EventBuilder &eventBuilder, uint32_t taskLevel, CsrDependencies &csrDeps, CommandStreamReceiver *bcsCsr); void processDispatchForCacheFlush(Surface **surfaces, size_t numSurfaces, LinearStream *commandStream, CsrDependencies &csrDeps); void processDispatchForMarker(CommandQueue &commandQueue, LinearStream *commandStream, EventsRequest &eventsRequest, CsrDependencies &csrDeps); void processDispatchForMarkerWithTimestampPacket(CommandQueue &commandQueue, LinearStream *commandStream, EventsRequest &eventsRequest, CsrDependencies &csrDeps); BlitProperties processDispatchForBlitEnqueue(CommandStreamReceiver &blitCommandStreamReceiver, const MultiDispatchInfo &multiDispatchInfo, TimestampPacketDependencies ×tampPacketDependencies, const EventsRequest &eventsRequest, LinearStream *commandStream, uint32_t commandType, bool queueBlocked); void submitCacheFlush(Surface **surfaces, size_t numSurfaces, LinearStream *commandStream, uint64_t postSyncAddress); bool isCacheFlushCommand(uint32_t commandType) const override; bool waitForTimestamps(uint32_t taskCount) override; MOCKABLE_VIRTUAL bool isCacheFlushForBcsRequired() const; protected: MOCKABLE_VIRTUAL void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &dispatchInfo){}; size_t calculateHostPtrSizeForImage(const size_t *region, size_t rowPitch, size_t slicePitch, Image *image); cl_int enqueueReadWriteBufferOnCpuWithMemoryTransfer(cl_command_type commandType, Buffer *buffer, size_t offset, size_t size, void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int enqueueReadWriteBufferOnCpuWithoutMemoryTransfer(cl_command_type commandType, Buffer *buffer, size_t offset, size_t size, void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int enqueueMarkerForReadWriteOperation(MemObj *memObj, void *ptr, cl_command_type commandType, cl_bool blocking, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); MOCKABLE_VIRTUAL void dispatchAuxTranslationBuiltin(MultiDispatchInfo &multiDispatchInfo, AuxTranslationDirection auxTranslationDirection); void setupBlitAuxTranslation(MultiDispatchInfo &multiDispatchInfo); MOCKABLE_VIRTUAL bool forceStateless(size_t size); template LinearStream *obtainCommandStream(const CsrDependencies &csrDependencies, bool blitEnqueue, bool blockedQueue, const MultiDispatchInfo &multiDispatchInfo, const EventsRequest &eventsRequest, std::unique_ptr &blockedCommandsData, Surface **surfaces, size_t numSurfaces, bool isMarkerWithProfiling) { LinearStream *commandStream = nullptr; bool profilingRequired = (this->isProfilingEnabled() && eventsRequest.outEvent); bool perfCountersRequired = (this->isPerfCountersEnabled() && eventsRequest.outEvent); if (isBlockedCommandStreamRequired(commandType, eventsRequest, blockedQueue, isMarkerWithProfiling)) { constexpr size_t additionalAllocationSize = CSRequirements::csOverfetchSize; constexpr size_t allocationSize = MemoryConstants::pageSize64k - CSRequirements::csOverfetchSize; commandStream = new LinearStream(); auto &gpgpuCsr = getGpgpuCommandStreamReceiver(); gpgpuCsr.ensureCommandBufferAllocation(*commandStream, allocationSize, additionalAllocationSize); blockedCommandsData = std::make_unique(commandStream, *gpgpuCsr.getInternalAllocationStorage()); } else { commandStream = &getCommandStream(*this, csrDependencies, profilingRequired, perfCountersRequired, blitEnqueue, multiDispatchInfo, surfaces, numSurfaces, isMarkerWithProfiling, eventsRequest.numEventsInWaitList > 0); } return commandStream; } void processDispatchForBlitAuxTranslation(CommandStreamReceiver &bcsCsr, const MultiDispatchInfo &multiDispatchInfo, BlitPropertiesContainer &blitPropertiesContainer, TimestampPacketDependencies ×tampPacketDependencies, const EventsRequest &eventsRequest, bool queueBlocked); bool obtainTimestampPacketForCacheFlush(bool isCacheFlushRequired) const override; bool isTaskLevelUpdateRequired(const uint32_t &taskLevel, const cl_event *eventWaitList, const cl_uint &numEventsInWaitList, unsigned int commandType); void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType) override; static void computeOffsetsValueForRectCommands(size_t *bufferOffset, size_t *hostOffset, const size_t *bufferOrigin, const size_t *hostOrigin, const size_t *region, size_t bufferRowPitch, size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch); template void processDispatchForKernels(const MultiDispatchInfo &multiDispatchInfo, std::unique_ptr &printfHandler, Event *event, TagNodeBase *&hwTimeStamps, bool blockQueue, CsrDependencies &csrDeps, KernelOperation *blockedCommandsData, TimestampPacketDependencies ×tampPacketDependencies); bool isGpgpuSubmissionForBcsRequired(bool queueBlocked, TimestampPacketDependencies ×tampPacketDependencies) const; void setupEvent(EventBuilder &eventBuilder, cl_event *outEvent, uint32_t cmdType); bool isBlitAuxTranslationRequired(const MultiDispatchInfo &multiDispatchInfo); }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/command_queue/command_queue_hw_base.inl000066400000000000000000000243751422164147700300050ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/blit_commands_helper.h" #include "shared/source/utilities/wait_util.h" #include "opencl/source/built_ins/aux_translation_builtin.h" #include "opencl/source/command_queue/enqueue_barrier.h" #include "opencl/source/command_queue/enqueue_copy_buffer.h" #include "opencl/source/command_queue/enqueue_copy_buffer_rect.h" #include "opencl/source/command_queue/enqueue_copy_buffer_to_image.h" #include "opencl/source/command_queue/enqueue_copy_image.h" #include "opencl/source/command_queue/enqueue_copy_image_to_buffer.h" #include "opencl/source/command_queue/enqueue_fill_buffer.h" #include "opencl/source/command_queue/enqueue_fill_image.h" #include "opencl/source/command_queue/enqueue_kernel.h" #include "opencl/source/command_queue/enqueue_marker.h" #include "opencl/source/command_queue/enqueue_migrate_mem_objects.h" #include "opencl/source/command_queue/enqueue_read_buffer.h" #include "opencl/source/command_queue/enqueue_read_buffer_rect.h" #include "opencl/source/command_queue/enqueue_read_image.h" #include "opencl/source/command_queue/enqueue_svm.h" #include "opencl/source/command_queue/enqueue_write_buffer.h" #include "opencl/source/command_queue/enqueue_write_buffer_rect.h" #include "opencl/source/command_queue/enqueue_write_image.h" #include "opencl/source/command_queue/finish.h" #include "opencl/source/command_queue/flush.h" #include "opencl/source/command_queue/gpgpu_walker.h" namespace NEO { template void CommandQueueHw::notifyEnqueueReadBuffer(Buffer *buffer, bool blockingRead, bool notifyBcsCsr) { if (DebugManager.flags.AUBDumpAllocsOnEnqueueReadOnly.get()) { buffer->getGraphicsAllocation(getDevice().getRootDeviceIndex())->setAllocDumpable(blockingRead, notifyBcsCsr); buffer->forceDisallowCPUCopy = blockingRead; } } template void CommandQueueHw::notifyEnqueueReadImage(Image *image, bool blockingRead, bool notifyBcsCsr) { if (DebugManager.flags.AUBDumpAllocsOnEnqueueReadOnly.get()) { image->getGraphicsAllocation(getDevice().getRootDeviceIndex())->setAllocDumpable(blockingRead, notifyBcsCsr); } } template void CommandQueueHw::notifyEnqueueSVMMemcpy(GraphicsAllocation *gfxAllocation, bool blockingCopy, bool notifyBcsCsr) { if (DebugManager.flags.AUBDumpAllocsOnEnqueueSVMMemcpyOnly.get()) { gfxAllocation->setAllocDumpable(blockingCopy, notifyBcsCsr); } } template cl_int CommandQueueHw::enqueueReadWriteBufferOnCpuWithMemoryTransfer(cl_command_type commandType, Buffer *buffer, size_t offset, size_t size, void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { cl_int retVal = CL_SUCCESS; EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event); TransferProperties transferProperties(buffer, commandType, 0, true, &offset, &size, ptr, true, getDevice().getRootDeviceIndex()); cpuDataTransferHandler(transferProperties, eventsRequest, retVal); return retVal; } template cl_int CommandQueueHw::enqueueReadWriteBufferOnCpuWithoutMemoryTransfer(cl_command_type commandType, Buffer *buffer, size_t offset, size_t size, void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { cl_int retVal = CL_SUCCESS; EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event); TransferProperties transferProperties(buffer, CL_COMMAND_MARKER, 0, true, &offset, &size, ptr, false, getDevice().getRootDeviceIndex()); cpuDataTransferHandler(transferProperties, eventsRequest, retVal); if (event) { auto pEvent = castToObjectOrAbort(*event); pEvent->setCmdType(commandType); } if (context->isProvidingPerformanceHints()) { context->providePerformanceHintForMemoryTransfer(commandType, false, static_cast(buffer), ptr); } return retVal; } template cl_int CommandQueueHw::enqueueMarkerForReadWriteOperation(MemObj *memObj, void *ptr, cl_command_type commandType, cl_bool blocking, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { MultiDispatchInfo multiDispatchInfo; NullSurface s; Surface *surfaces[] = {&s}; const auto enqueueResult = enqueueHandler( surfaces, blocking == CL_TRUE, multiDispatchInfo, numEventsInWaitList, eventWaitList, event); if (enqueueResult != CL_SUCCESS) { return enqueueResult; } if (event) { auto pEvent = castToObjectOrAbort(*event); pEvent->setCmdType(commandType); } if (context->isProvidingPerformanceHints()) { context->providePerformanceHintForMemoryTransfer(commandType, false, static_cast(memObj), ptr); } return CL_SUCCESS; } template void CommandQueueHw::dispatchAuxTranslationBuiltin(MultiDispatchInfo &multiDispatchInfo, AuxTranslationDirection auxTranslationDirection) { auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, getClDevice()); auto &auxTranslationBuilder = static_cast &>(builder); BuiltinOpParams dispatchParams; dispatchParams.auxTranslationDirection = auxTranslationDirection; auxTranslationBuilder.buildDispatchInfosForAuxTranslation(multiDispatchInfo, dispatchParams); } template bool CommandQueueHw::forceStateless(size_t size) { return size >= 4ull * MemoryConstants::gigaByte; } template bool CommandQueueHw::isCacheFlushForBcsRequired() const { if (DebugManager.flags.ForceCacheFlushForBcs.get() != -1) { return !!DebugManager.flags.ForceCacheFlushForBcs.get(); } return true; } template inline bool waitForTimestampsWithinContainer(TimestampPacketContainer *container, CommandStreamReceiver &csr) { bool waited = false; if (container) { for (const auto ×tamp : container->peekNodes()) { for (uint32_t i = 0; i < timestamp->getPacketsUsed(); i++) { while (timestamp->getContextEndValue(i) == 1) { csr.downloadAllocation(*timestamp->getBaseGraphicsAllocation()->getGraphicsAllocation(csr.getRootDeviceIndex())); WaitUtils::waitFunctionWithPredicate(static_cast(timestamp->getContextEndAddress(i)), 1u, std::not_equal_to()); } waited = true; } } } return waited; } template bool CommandQueueHw::waitForTimestamps(uint32_t taskCount) { using TSPacketType = typename Family::TimestampPacketType; bool waited = false; if (isWaitForTimestampsEnabled()) { waited = waitForTimestampsWithinContainer(timestampPacketContainer.get(), getGpgpuCommandStreamReceiver()); if (isOOQEnabled()) { waitForTimestampsWithinContainer(deferredTimestampPackets.get(), getGpgpuCommandStreamReceiver()); } } return waited; } template void CommandQueueHw::setupBlitAuxTranslation(MultiDispatchInfo &multiDispatchInfo) { multiDispatchInfo.begin()->dispatchInitCommands.registerMethod( TimestampPacketHelper::programSemaphoreForAuxTranslation); multiDispatchInfo.begin()->dispatchInitCommands.registerCommandsSizeEstimationMethod( TimestampPacketHelper::getRequiredCmdStreamSizeForAuxTranslationNodeDependency); multiDispatchInfo.rbegin()->dispatchEpilogueCommands.registerMethod( TimestampPacketHelper::programSemaphoreForAuxTranslation); multiDispatchInfo.rbegin()->dispatchEpilogueCommands.registerCommandsSizeEstimationMethod( TimestampPacketHelper::getRequiredCmdStreamSizeForAuxTranslationNodeDependency); } template bool CommandQueueHw::obtainTimestampPacketForCacheFlush(bool isCacheFlushRequired) const { return isCacheFlushRequired; } template bool CommandQueueHw::isGpgpuSubmissionForBcsRequired(bool queueBlocked, TimestampPacketDependencies ×tampPacketDependencies) const { if (queueBlocked || timestampPacketDependencies.barrierNodes.peekNodes().size() > 0u) { return true; } bool required = (latestSentEnqueueType != EnqueueProperties::Operation::Blit) && (latestSentEnqueueType != EnqueueProperties::Operation::None) && (isCacheFlushForBcsRequired() || !getGpgpuCommandStreamReceiver().isLatestTaskCountFlushed()); if (DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.get() == 1) { required = true; } return required; } template void CommandQueueHw::setupEvent(EventBuilder &eventBuilder, cl_event *outEvent, uint32_t cmdType) { if (outEvent) { eventBuilder.create(this, cmdType, CompletionStamp::notReady, 0); auto eventObj = eventBuilder.getEvent(); *outEvent = eventObj; if (eventObj->isProfilingEnabled()) { TimeStampData queueTimeStamp; getDevice().getOSTime()->getCpuGpuTime(&queueTimeStamp); eventObj->setQueueTimeStamp(&queueTimeStamp); if (isCommandWithoutKernel(cmdType) && cmdType != CL_COMMAND_MARKER) { eventObj->setCPUProfilingPath(true); eventObj->setQueueTimeStamp(); } } DBG_LOG(EventsDebugEnable, "enqueueHandler commandType", cmdType, "output Event", eventObj); } } } // namespace NEO compute-runtime-22.14.22890/opencl/source/command_queue/command_queue_hw_bdw_and_later.inl000066400000000000000000000012141422164147700316430ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue_hw_base.inl" namespace NEO { template void CommandQueueHw::submitCacheFlush(Surface **surfaces, size_t numSurfaces, LinearStream *commandStream, uint64_t postSyncAddress) { } template bool CommandQueueHw::isCacheFlushCommand(uint32_t commandType) const { return false; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/command_queue/command_queue_hw_xehp_and_later.inl000066400000000000000000000061601422164147700320400ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/cache_flush_xehp_and_later.inl" #include "opencl/extensions/public/cl_ext_private.h" #include "opencl/source/command_queue/command_queue_hw_base.inl" #include "opencl/source/memory_manager/resource_surface.h" namespace NEO { template <> void CommandQueueHw::submitCacheFlush(Surface **surfaces, size_t numSurfaces, LinearStream *commandStream, uint64_t postSyncAddress) { if constexpr (Family::isUsingL3Control) { StackVec subranges; for (auto surface : CreateRange(surfaces, numSurfaces)) { auto resource = reinterpret_cast(surface); auto alloc = resource->getGraphicsAllocation(); coverRangeExact(alloc->getGpuAddress(), alloc->getUnderlyingBufferSize(), subranges, resource->resourceType); } for (size_t subrangeNumber = 0; subrangeNumber < subranges.size(); subrangeNumber += maxFlushSubrangeCount) { size_t rangeCount = subranges.size() <= subrangeNumber + maxFlushSubrangeCount ? subranges.size() - subrangeNumber : maxFlushSubrangeCount; Range range = CreateRange(subranges.begin() + subrangeNumber, rangeCount); uint64_t postSyncAddressToFlush = 0; if (rangeCount < maxFlushSubrangeCount || subranges.size() - subrangeNumber - maxFlushSubrangeCount == 0) { postSyncAddressToFlush = postSyncAddress; } flushGpuCache(commandStream, range, postSyncAddressToFlush, device->getHardwareInfo()); } } } template <> bool CommandQueueHw::isCacheFlushCommand(uint32_t commandType) const { return commandType == CL_COMMAND_RESOURCE_BARRIER; } template <> LinearStream &getCommandStream(CommandQueue &commandQueue, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounterCmdsSpace, bool blitEnqueue, const MultiDispatchInfo &multiDispatchInfo, Surface **surfaces, size_t numSurfaces, bool isMarkerWithProfiling, bool eventsInWaitList) { size_t expectedSizeCS = 0; bool usePostSync = false; if (commandQueue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { expectedSizeCS += TimestampPacketHelper::getRequiredCmdStreamSize(csrDeps); usePostSync = true; } if constexpr (Family::isUsingL3Control) { StackVec subranges; for (auto surface : CreateRange(surfaces, numSurfaces)) { ResourceSurface *resource = reinterpret_cast(surface); auto alloc = resource->getGraphicsAllocation(); coverRangeExact(alloc->getGpuAddress(), alloc->getUnderlyingBufferSize(), subranges, resource->resourceType); } expectedSizeCS += getSizeNeededToFlushGpuCache(subranges, usePostSync); } return commandQueue.getCS(expectedSizeCS); } } // namespace NEO compute-runtime-22.14.22890/opencl/source/command_queue/copy_engine_state.h000066400000000000000000000006121422164147700266230ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "engine_node.h" namespace NEO { struct CopyEngineState { aub_stream::EngineType engineType = aub_stream::EngineType::NUM_ENGINES; uint32_t taskCount = 0; bool isValid() const { return engineType != aub_stream::EngineType::NUM_ENGINES; } }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/command_queue/cpu_data_transfer_handler.cpp000066400000000000000000000214001422164147700306360ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/device/device.h" #include "shared/source/helpers/get_info.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/event/event.h" #include "opencl/source/event/event_builder.h" #include "opencl/source/helpers/mipmap.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" namespace NEO { void *CommandQueue::cpuDataTransferHandler(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &retVal) { MapInfo unmapInfo; Event *outEventObj = nullptr; void *returnPtr = nullptr; EventBuilder eventBuilder; bool eventCompleted = false; bool mapOperation = transferProperties.cmdType == CL_COMMAND_MAP_BUFFER || transferProperties.cmdType == CL_COMMAND_MAP_IMAGE; ErrorCodeHelper err(&retVal, CL_SUCCESS); if (mapOperation) { returnPtr = ptrOffset(transferProperties.memObj->getCpuAddressForMapping(), transferProperties.memObj->calculateOffsetForMapping(transferProperties.offset) + transferProperties.mipPtrOffset); if (!transferProperties.memObj->addMappedPtr(returnPtr, transferProperties.memObj->calculateMappedPtrLength(transferProperties.size), transferProperties.mapFlags, transferProperties.size, transferProperties.offset, transferProperties.mipLevel, nullptr)) { err.set(CL_INVALID_OPERATION); return nullptr; } } else if (transferProperties.cmdType == CL_COMMAND_UNMAP_MEM_OBJECT) { if (!transferProperties.memObj->findMappedPtr(transferProperties.ptr, unmapInfo)) { err.set(CL_INVALID_VALUE); return nullptr; } transferProperties.memObj->removeMappedPtr(unmapInfo.ptr); } if (eventsRequest.outEvent) { eventBuilder.create(this, transferProperties.cmdType, CompletionStamp::notReady, CompletionStamp::notReady); outEventObj = eventBuilder.getEvent(); outEventObj->setQueueTimeStamp(); outEventObj->setCPUProfilingPath(true); *eventsRequest.outEvent = outEventObj; } TakeOwnershipWrapper queueOwnership(*this); auto commandStreamReceieverOwnership = getGpgpuCommandStreamReceiver().obtainUniqueOwnership(); auto blockQueue = false; auto taskLevel = 0u; obtainTaskLevelAndBlockedStatus(taskLevel, eventsRequest.numEventsInWaitList, eventsRequest.eventWaitList, blockQueue, transferProperties.cmdType); DBG_LOG(LogTaskCounts, __FUNCTION__, "taskLevel", taskLevel); if (outEventObj) { outEventObj->taskLevel = taskLevel; } if (blockQueue && (transferProperties.cmdType == CL_COMMAND_MAP_BUFFER || transferProperties.cmdType == CL_COMMAND_MAP_IMAGE || transferProperties.cmdType == CL_COMMAND_UNMAP_MEM_OBJECT)) { // Pass size and offset only. Unblocked command will call transferData(size, offset) method enqueueBlockedMapUnmapOperation(eventsRequest.eventWaitList, static_cast(eventsRequest.numEventsInWaitList), mapOperation ? MAP : UNMAP, transferProperties.memObj, mapOperation ? transferProperties.size : unmapInfo.size, mapOperation ? transferProperties.offset : unmapInfo.offset, mapOperation ? transferProperties.mapFlags == CL_MAP_READ : unmapInfo.readOnly, eventBuilder); } commandStreamReceieverOwnership.unlock(); queueOwnership.unlock(); // read/write buffers are always blocking if (!blockQueue || transferProperties.blocking) { err.set(Event::waitForEvents(eventsRequest.numEventsInWaitList, eventsRequest.eventWaitList)); bool modifySimulationFlags = false; if (outEventObj) { outEventObj->setSubmitTimeStamp(); } //wait for the completness of previous commands if (transferProperties.cmdType != CL_COMMAND_UNMAP_MEM_OBJECT) { if (!transferProperties.memObj->isMemObjZeroCopy() || transferProperties.blocking) { finish(); eventCompleted = true; } } if (outEventObj) { outEventObj->setStartTimeStamp(); } UNRECOVERABLE_IF((transferProperties.memObj->isMemObjZeroCopy() == false) && isMipMapped(transferProperties.memObj)); switch (transferProperties.cmdType) { case CL_COMMAND_MAP_BUFFER: if (!transferProperties.memObj->isMemObjZeroCopy()) { if (transferProperties.mapFlags != CL_MAP_WRITE_INVALIDATE_REGION) { transferProperties.memObj->transferDataToHostPtr(transferProperties.size, transferProperties.offset); } eventCompleted = true; } break; case CL_COMMAND_MAP_IMAGE: if (!transferProperties.memObj->isMemObjZeroCopy()) { if (transferProperties.mapFlags != CL_MAP_WRITE_INVALIDATE_REGION) { transferProperties.memObj->transferDataToHostPtr(transferProperties.size, transferProperties.offset); } eventCompleted = true; } break; case CL_COMMAND_UNMAP_MEM_OBJECT: if (!transferProperties.memObj->isMemObjZeroCopy()) { if (!unmapInfo.readOnly) { transferProperties.memObj->transferDataFromHostPtr(unmapInfo.size, unmapInfo.offset); } eventCompleted = true; } if (!unmapInfo.readOnly) { modifySimulationFlags = true; } break; case CL_COMMAND_READ_BUFFER: memcpy_s(transferProperties.ptr, transferProperties.size[0], transferProperties.getCpuPtrForReadWrite(), transferProperties.size[0]); eventCompleted = true; break; case CL_COMMAND_WRITE_BUFFER: memcpy_s(transferProperties.getCpuPtrForReadWrite(), transferProperties.size[0], transferProperties.ptr, transferProperties.size[0]); eventCompleted = true; modifySimulationFlags = true; break; case CL_COMMAND_MARKER: break; default: err.set(CL_INVALID_OPERATION); } if (outEventObj) { outEventObj->setEndTimeStamp(); outEventObj->updateTaskCount(this->taskCount, outEventObj->peekBcsTaskCountFromCommandQueue()); outEventObj->flushStamp->replaceStampObject(this->flushStamp->getStampReference()); if (eventCompleted) { outEventObj->setStatus(CL_COMPLETE); } else { outEventObj->updateExecutionStatus(); } } if (modifySimulationFlags) { auto graphicsAllocation = transferProperties.memObj->getGraphicsAllocation(getDevice().getRootDeviceIndex()); graphicsAllocation->setAubWritable(true, GraphicsAllocation::defaultBank); graphicsAllocation->setTbxWritable(true, GraphicsAllocation::defaultBank); } } if (context->isProvidingPerformanceHints()) { providePerformanceHint(transferProperties); } return returnPtr; // only map returns pointer } void CommandQueue::providePerformanceHint(TransferProperties &transferProperties) { switch (transferProperties.cmdType) { case CL_COMMAND_MAP_BUFFER: case CL_COMMAND_MAP_IMAGE: context->providePerformanceHintForMemoryTransfer(transferProperties.cmdType, !transferProperties.memObj->isMemObjZeroCopy(), static_cast(transferProperties.memObj)); break; case CL_COMMAND_UNMAP_MEM_OBJECT: if (!transferProperties.memObj->isMemObjZeroCopy()) { context->providePerformanceHintForMemoryTransfer(transferProperties.cmdType, true, transferProperties.ptr, static_cast(transferProperties.memObj)); break; } context->providePerformanceHintForMemoryTransfer(transferProperties.cmdType, false, transferProperties.ptr); break; case CL_COMMAND_READ_BUFFER: case CL_COMMAND_WRITE_BUFFER: context->providePerformanceHintForMemoryTransfer(transferProperties.cmdType, true, static_cast(transferProperties.memObj), transferProperties.ptr); break; } } } // namespace NEO compute-runtime-22.14.22890/opencl/source/command_queue/csr_selection_args.h000066400000000000000000000063741422164147700270070ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/memory_manager/multi_graphics_allocation.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/mem_obj/mem_obj.h" namespace NEO { enum class TransferDirection { HostToHost, HostToLocal, LocalToHost, LocalToLocal, }; struct CsrSelectionArgs { struct Resource { bool isLocal = false; const GraphicsAllocation *allocation = nullptr; const Image *image = nullptr; const size_t *imageOrigin = nullptr; }; cl_command_type cmdType; const size_t *size = nullptr; Resource srcResource; Resource dstResource; TransferDirection direction; CsrSelectionArgs(cl_command_type cmdType, const size_t *size) : cmdType(cmdType), size(size), direction(TransferDirection::HostToHost) {} template CsrSelectionArgs(cl_command_type cmdType, ResourceType *src, ResourceType *dst, uint32_t rootDeviceIndex, const size_t *size) : cmdType(cmdType), size(size) { if (src) { processResource(*src, rootDeviceIndex, this->srcResource); } if (dst) { processResource(*dst, rootDeviceIndex, this->dstResource); } this->direction = createTransferDirection(srcResource.isLocal, dstResource.isLocal); } CsrSelectionArgs(cl_command_type cmdType, Image *src, Image *dst, uint32_t rootDeviceIndex, const size_t *size, const size_t *srcOrigin, const size_t *dstOrigin) : CsrSelectionArgs(cmdType, src, dst, rootDeviceIndex, size) { if (src) { srcResource.imageOrigin = srcOrigin; } if (dst) { dstResource.imageOrigin = dstOrigin; } } static void processResource(const Image &image, uint32_t rootDeviceIndex, Resource &outResource) { processResource(image.getMultiGraphicsAllocation(), rootDeviceIndex, outResource); outResource.image = ℑ } static void processResource(const Buffer &buffer, uint32_t rootDeviceIndex, Resource &outResource) { processResource(buffer.getMultiGraphicsAllocation(), rootDeviceIndex, outResource); } static void processResource(const MultiGraphicsAllocation &multiGfxAlloc, uint32_t rootDeviceIndex, Resource &outResource) { processResource(*multiGfxAlloc.getGraphicsAllocation(rootDeviceIndex), rootDeviceIndex, outResource); } static void processResource(const GraphicsAllocation &gfxAlloc, uint32_t rootDeviceIndex, Resource &outResource) { outResource.allocation = &gfxAlloc; outResource.isLocal = gfxAlloc.isAllocatedInLocalMemoryPool(); } static inline TransferDirection createTransferDirection(bool srcLocal, bool dstLocal) { if (srcLocal) { if (dstLocal) { return TransferDirection::LocalToLocal; } else { return TransferDirection::LocalToHost; } } else { if (dstLocal) { return TransferDirection::HostToLocal; } else { return TransferDirection::HostToHost; } } } }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/command_queue/enqueue_barrier.h000066400000000000000000000017571422164147700263140ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/device/device.h" #include "shared/source/memory_manager/surface.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/event/event.h" #include namespace NEO { template cl_int CommandQueueHw::enqueueBarrierWithWaitList( cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { NullSurface s; Surface *surfaces[] = {&s}; return enqueueHandler(surfaces, false, MultiDispatchInfo(), numEventsInWaitList, eventWaitList, event); } } // namespace NEO compute-runtime-22.14.22890/opencl/source/command_queue/enqueue_common.h000066400000000000000000001673471422164147700261660ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/wait_status.h" #include "shared/source/helpers/array_count.h" #include "shared/source/helpers/engine_node_helper.h" #include "shared/source/helpers/local_work_size.h" #include "shared/source/helpers/pipe_control_args.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/surface.h" #include "shared/source/os_interface/os_context.h" #include "shared/source/program/sync_buffer_handler.h" #include "shared/source/program/sync_buffer_handler.inl" #include "shared/source/utilities/range.h" #include "shared/source/utilities/tag_allocator.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/command_queue/hardware_interface.h" #include "opencl/source/event/event_builder.h" #include "opencl/source/event/user_event.h" #include "opencl/source/gtpin/gtpin_notify.h" #include "opencl/source/helpers/cl_blit_properties.h" #include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/source/helpers/cl_preemption_helper.h" #include "opencl/source/helpers/dispatch_info_builder.h" #include "opencl/source/helpers/enqueue_properties.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/helpers/task_information.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/memory_manager/migration_controller.h" #include "opencl/source/program/printf_handler.h" #include "opencl/source/utilities/cl_logger.h" #include #include namespace NEO { template template cl_int CommandQueueHw::enqueueHandler(Surface *(&surfaces)[surfaceCount], bool blocking, Kernel *kernel, cl_uint workDim, const size_t globalOffsets[3], const size_t workItems[3], const size_t *localWorkSizesIn, const size_t *enqueuedWorkSizes, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { BuiltInOwnershipWrapper builtInLock; KernelObjsForAuxTranslation kernelObjsForAuxTranslation; MultiDispatchInfo multiDispatchInfo(kernel); auto auxTranslationMode = AuxTranslationMode::None; kernel->updateAuxTranslationRequired(); if (kernel->isAuxTranslationRequired()) { kernel->fillWithKernelObjsForAuxTranslation(kernelObjsForAuxTranslation); multiDispatchInfo.setKernelObjsForAuxTranslation(kernelObjsForAuxTranslation); if (!kernelObjsForAuxTranslation.empty()) { auxTranslationMode = HwHelperHw::get().getAuxTranslationMode(device->getHardwareInfo()); } } if (AuxTranslationMode::Builtin == auxTranslationMode) { auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, getClDevice()); builtInLock.takeOwnership(builder, this->context); dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::AuxToNonAux); } if (kernel->getKernelInfo().builtinDispatchBuilder == nullptr) { DispatchInfoBuilder builder(getClDevice()); builder.setDispatchGeometry(workDim, workItems, enqueuedWorkSizes, globalOffsets, Vec3{0, 0, 0}, localWorkSizesIn); builder.setKernel(kernel); builder.bake(multiDispatchInfo); } else { auto builder = kernel->getKernelInfo().builtinDispatchBuilder; builder->buildDispatchInfos(multiDispatchInfo, kernel, workDim, workItems, enqueuedWorkSizes, globalOffsets); if (multiDispatchInfo.size() == 0) { return CL_SUCCESS; } } if (AuxTranslationMode::Builtin == auxTranslationMode) { dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::NonAuxToAux); } if (AuxTranslationMode::Blit == auxTranslationMode) { setupBlitAuxTranslation(multiDispatchInfo); } return enqueueHandler(surfaces, blocking, multiDispatchInfo, numEventsInWaitList, eventWaitList, event); } template template cl_int CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, size_t numSurfaceForResidency, bool blocking, const MultiDispatchInfo &multiDispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { if (multiDispatchInfo.empty() && !isCommandWithoutKernel(commandType)) { const auto enqueueResult = enqueueHandler(nullptr, 0, blocking, multiDispatchInfo, numEventsInWaitList, eventWaitList, event); if (enqueueResult != CL_SUCCESS) { return enqueueResult; } if (event) { castToObjectOrAbort(*event)->setCmdType(commandType); } return CL_SUCCESS; } TagNodeBase *hwTimeStamps = nullptr; CommandStreamReceiver &computeCommandStreamReceiver = getGpgpuCommandStreamReceiver(); EventBuilder eventBuilder; setupEvent(eventBuilder, event, commandType); bool isMarkerWithProfiling = (CL_COMMAND_MARKER == commandType) && (eventBuilder.getEvent() && eventBuilder.getEvent()->isProfilingEnabled()); std::unique_ptr blockedCommandsData; std::unique_ptr printfHandler; TakeOwnershipWrapper> queueOwnership(*this); auto commandStreamReceiverOwnership = computeCommandStreamReceiver.obtainUniqueOwnership(); auto blockQueue = false; auto taskLevel = 0u; obtainTaskLevelAndBlockedStatus(taskLevel, numEventsInWaitList, eventWaitList, blockQueue, commandType); enqueueHandlerHook(commandType, multiDispatchInfo); bool clearDependenciesForSubCapture = false; aubCaptureHook(blocking, clearDependenciesForSubCapture, multiDispatchInfo); bool clearAllDependencies = (queueDependenciesClearRequired() || clearDependenciesForSubCapture); if (DebugManager.flags.MakeEachEnqueueBlocking.get()) { blocking = true; } TimestampPacketDependencies timestampPacketDependencies; EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event); CsrDependencies csrDeps; BlitPropertiesContainer blitPropertiesContainer; if (this->context->getRootDeviceIndices().size() > 1) { eventsRequest.fillCsrDependenciesForTaskCountContainer(csrDeps, computeCommandStreamReceiver); } bool enqueueWithBlitAuxTranslation = isBlitAuxTranslationRequired(multiDispatchInfo); if (computeCommandStreamReceiver.peekTimestampPacketWriteEnabled()) { if (!clearDependenciesForSubCapture) { eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, computeCommandStreamReceiver, CsrDependencies::DependenciesType::OnCsr); } auto allocator = computeCommandStreamReceiver.getTimestampPacketAllocator(); size_t nodesCount = 0u; if (isCacheFlushCommand(commandType) || isMarkerWithProfiling) { nodesCount = 1; } else if (!multiDispatchInfo.empty()) { nodesCount = estimateTimestampPacketNodesCount(multiDispatchInfo); } if (isCacheFlushForBcsRequired() && enqueueWithBlitAuxTranslation) { // Cache flush for aux translation is always required (if supported) timestampPacketDependencies.cacheFlushNodes.add(allocator->getTag()); } if (nodesCount > 0) { obtainNewTimestampPacketNodes(nodesCount, timestampPacketDependencies.previousEnqueueNodes, clearAllDependencies, computeCommandStreamReceiver); csrDeps.timestampPacketContainer.push_back(×tampPacketDependencies.previousEnqueueNodes); } } auto &commandStream = *obtainCommandStream(csrDeps, false, blockQueue, multiDispatchInfo, eventsRequest, blockedCommandsData, surfacesForResidency, numSurfaceForResidency, isMarkerWithProfiling); auto commandStreamStart = commandStream.getUsed(); if (this->context->getRootDeviceIndices().size() > 1) { TimestampPacketHelper::programCsrDependenciesForForTaskCountContainer(commandStream, csrDeps); } if (enqueueWithBlitAuxTranslation) { processDispatchForBlitAuxTranslation(*getBcsForAuxTranslation(), multiDispatchInfo, blitPropertiesContainer, timestampPacketDependencies, eventsRequest, blockQueue); } if (eventBuilder.getEvent() && computeCommandStreamReceiver.peekTimestampPacketWriteEnabled()) { eventBuilder.getEvent()->addTimestampPacketNodes(*timestampPacketContainer); eventBuilder.getEvent()->addTimestampPacketNodes(timestampPacketDependencies.nonAuxToAuxNodes); eventBuilder.getEvent()->addTimestampPacketNodes(timestampPacketDependencies.auxToNonAuxNodes); } bool flushDependenciesForNonKernelCommand = false; if (multiDispatchInfo.empty() == false) { processDispatchForKernels(multiDispatchInfo, printfHandler, eventBuilder.getEvent(), hwTimeStamps, blockQueue, csrDeps, blockedCommandsData.get(), timestampPacketDependencies); } else if (isCacheFlushCommand(commandType)) { processDispatchForCacheFlush(surfacesForResidency, numSurfaceForResidency, &commandStream, csrDeps); } else if (computeCommandStreamReceiver.peekTimestampPacketWriteEnabled()) { if (CL_COMMAND_BARRIER == commandType) { computeCommandStreamReceiver.requestStallingCommandsOnNextFlush(); } for (size_t i = 0; i < eventsRequest.numEventsInWaitList; i++) { auto waitlistEvent = castToObjectOrAbort(eventsRequest.eventWaitList[i]); if (waitlistEvent->getTimestampPacketNodes()) { flushDependenciesForNonKernelCommand = true; if (eventBuilder.getEvent()) { eventBuilder.getEvent()->addTimestampPacketNodes(*waitlistEvent->getTimestampPacketNodes()); } } } if (isMarkerWithProfiling) { flushDependenciesForNonKernelCommand = true; } if (flushDependenciesForNonKernelCommand) { TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer(commandStream, csrDeps); } if (isMarkerWithProfiling) { if (numEventsInWaitList == 0) { computeCommandStreamReceiver.programComputeBarrierCommand(commandStream); } processDispatchForMarkerWithTimestampPacket(*this, &commandStream, eventsRequest, csrDeps); } } else if (isMarkerWithProfiling) { processDispatchForMarker(*this, &commandStream, eventsRequest, csrDeps); } CompletionStamp completionStamp = {CompletionStamp::notReady, taskLevel, 0}; const EnqueueProperties enqueueProperties(false, !multiDispatchInfo.empty(), isCacheFlushCommand(commandType), flushDependenciesForNonKernelCommand, isMarkerWithProfiling, &blitPropertiesContainer); if (!blockQueue && isOOQEnabled()) { setupBarrierTimestampForBcsEngines(computeCommandStreamReceiver.getOsContext().getEngineType(), timestampPacketDependencies); } bool migratedMemory = false; if (!blockQueue && multiDispatchInfo.peekMainKernel() && multiDispatchInfo.peekMainKernel()->requiresMemoryMigration()) { for (auto &arg : multiDispatchInfo.peekMainKernel()->getMemObjectsToMigrate()) { MigrationController::handleMigration(*this->context, computeCommandStreamReceiver, arg.second); migratedMemory = true; } } if (!blockQueue) { if (enqueueProperties.operation == EnqueueProperties::Operation::GpuKernel) { csrDeps.makeResident(computeCommandStreamReceiver); completionStamp = enqueueNonBlocked( surfacesForResidency, numSurfaceForResidency, commandStream, commandStreamStart, blocking, clearDependenciesForSubCapture, multiDispatchInfo, enqueueProperties, timestampPacketDependencies, eventsRequest, eventBuilder, taskLevel, printfHandler.get(), getBcsForAuxTranslation()); } else if (enqueueProperties.isFlushWithoutKernelRequired()) { completionStamp = enqueueCommandWithoutKernel( surfacesForResidency, numSurfaceForResidency, &commandStream, commandStreamStart, blocking, enqueueProperties, timestampPacketDependencies, eventsRequest, eventBuilder, taskLevel, csrDeps, nullptr); } else { UNRECOVERABLE_IF(enqueueProperties.operation != EnqueueProperties::Operation::EnqueueWithoutSubmission); auto maxTaskCountCurrentRootDevice = this->taskCount; for (auto eventId = 0u; eventId < numEventsInWaitList; eventId++) { auto event = castToObject(eventWaitList[eventId]); if (event->getCommandQueue() && event->getCommandQueue()->getDevice().getRootDeviceIndex() == this->getDevice().getRootDeviceIndex()) { maxTaskCountCurrentRootDevice = std::max(maxTaskCountCurrentRootDevice, event->peekTaskCount()); } } //inherit data from event_wait_list and previous packets completionStamp.flushStamp = this->flushStamp->peekStamp(); completionStamp.taskCount = maxTaskCountCurrentRootDevice; completionStamp.taskLevel = taskLevel; if (eventBuilder.getEvent() && isProfilingEnabled()) { eventBuilder.getEvent()->setSubmitTimeStamp(); eventBuilder.getEvent()->setStartTimeStamp(); } //check if we have BCS associated, if so we need to make sure it is completed as well if (eventBuilder.getEvent() && this->bcsEngineTypes.size() > 0u) { eventBuilder.getEvent()->setupBcs(this->getBcsCommandStreamReceiver(this->bcsEngineTypes[0u])->getOsContext().getEngineType()); } } if (eventBuilder.getEvent()) { eventBuilder.getEvent()->flushStamp->replaceStampObject(this->flushStamp->getStampReference()); } this->latestSentEnqueueType = enqueueProperties.operation; } updateFromCompletionStamp(completionStamp, eventBuilder.getEvent()); if (blockQueue) { enqueueBlocked(commandType, surfacesForResidency, numSurfaceForResidency, multiDispatchInfo, timestampPacketDependencies, blockedCommandsData, enqueueProperties, eventsRequest, eventBuilder, std::move(printfHandler), nullptr); } if (deferredTimestampPackets.get()) { timestampPacketDependencies.moveNodesToNewContainer(*deferredTimestampPackets); } commandStreamReceiverOwnership.unlock(); queueOwnership.unlock(); if (blocking) { auto waitStatus = WaitStatus::Ready; auto &builtinOpParams = multiDispatchInfo.peekBuiltinOpParams(); if (builtinOpParams.userPtrForPostOperationCpuCopy) { waitStatus = waitForAllEngines(blockQueue, (blockQueue ? nullptr : printfHandler.get()), false); if (waitStatus == WaitStatus::GpuHang) { return CL_OUT_OF_RESOURCES; } auto hostPtrAlloc = builtinOpParams.transferAllocation; UNRECOVERABLE_IF(nullptr == hostPtrAlloc); auto size = hostPtrAlloc->getUnderlyingBufferSize(); [[maybe_unused]] int cpuCopyStatus = memcpy_s(builtinOpParams.userPtrForPostOperationCpuCopy, size, hostPtrAlloc->getUnderlyingBuffer(), size); DEBUG_BREAK_IF(cpuCopyStatus != 0); waitStatus = waitForAllEngines(blockQueue, (blockQueue ? nullptr : printfHandler.get()), true); } else { waitStatus = waitForAllEngines(blockQueue, (blockQueue ? nullptr : printfHandler.get()), true); } if (waitStatus == WaitStatus::GpuHang) { return CL_OUT_OF_RESOURCES; } } if (migratedMemory) { computeCommandStreamReceiver.flushBatchedSubmissions(); } return CL_SUCCESS; } template template void CommandQueueHw::processDispatchForKernels(const MultiDispatchInfo &multiDispatchInfo, std::unique_ptr &printfHandler, Event *event, TagNodeBase *&hwTimeStamps, bool blockQueue, CsrDependencies &csrDeps, KernelOperation *blockedCommandsData, TimestampPacketDependencies ×tampPacketDependencies) { TagNodeBase *hwPerfCounter = nullptr; getClFileLogger().dumpKernelArgs(&multiDispatchInfo); printfHandler.reset(PrintfHandler::create(multiDispatchInfo, *device)); if (printfHandler) { printfHandler->prepareDispatch(multiDispatchInfo); } if (multiDispatchInfo.peekMainKernel()->usesSyncBuffer()) { auto &gws = multiDispatchInfo.begin()->getGWS(); auto &lws = multiDispatchInfo.begin()->getLocalWorkgroupSize(); size_t workGroupsCount = (gws.x * gws.y * gws.z) / (lws.x * lws.y * lws.z); device->getDevice().syncBufferHandler->prepareForEnqueue(workGroupsCount, *multiDispatchInfo.peekMainKernel()); } if (commandType == CL_COMMAND_NDRANGE_KERNEL) { if (multiDispatchInfo.peekMainKernel()->isKernelDebugEnabled()) { setupDebugSurface(multiDispatchInfo.peekMainKernel()); } } if (event && this->isProfilingEnabled()) { // Get allocation for timestamps hwTimeStamps = event->getHwTimeStampNode(); } if (event && this->isPerfCountersEnabled()) { hwPerfCounter = event->getHwPerfCounterNode(); } HardwareInterface::dispatchWalker( *this, multiDispatchInfo, csrDeps, blockedCommandsData, hwTimeStamps, hwPerfCounter, ×tampPacketDependencies, timestampPacketContainer.get(), commandType); if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) { for (auto &dispatchInfo : multiDispatchInfo) { for (auto &patchInfoData : dispatchInfo.getKernel()->getPatchInfoDataList()) { getGpgpuCommandStreamReceiver().getFlatBatchBufferHelper().setPatchInfoData(patchInfoData); } } } getGpgpuCommandStreamReceiver().setRequiredScratchSizes(multiDispatchInfo.getRequiredScratchSize(), multiDispatchInfo.getRequiredPrivateScratchSize()); } template BlitProperties CommandQueueHw::processDispatchForBlitEnqueue(CommandStreamReceiver &blitCommandStreamReceiver, const MultiDispatchInfo &multiDispatchInfo, TimestampPacketDependencies ×tampPacketDependencies, const EventsRequest &eventsRequest, LinearStream *commandStream, uint32_t commandType, bool queueBlocked) { auto blitDirection = ClBlitProperties::obtainBlitDirection(commandType); auto blitProperties = ClBlitProperties::constructProperties(blitDirection, blitCommandStreamReceiver, multiDispatchInfo.peekBuiltinOpParams()); if (!queueBlocked) { eventsRequest.fillCsrDependenciesForTimestampPacketContainer(blitProperties.csrDependencies, blitCommandStreamReceiver, CsrDependencies::DependenciesType::All); blitProperties.csrDependencies.timestampPacketContainer.push_back(×tampPacketDependencies.cacheFlushNodes); blitProperties.csrDependencies.timestampPacketContainer.push_back(×tampPacketDependencies.previousEnqueueNodes); blitProperties.csrDependencies.timestampPacketContainer.push_back(×tampPacketDependencies.barrierNodes); } auto currentTimestampPacketNode = timestampPacketContainer->peekNodes().at(0); blitProperties.outputTimestampPacket = currentTimestampPacketNode; if (commandStream) { if (timestampPacketDependencies.cacheFlushNodes.peekNodes().size() > 0) { auto cacheFlushTimestampPacketGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(*timestampPacketDependencies.cacheFlushNodes.peekNodes()[0]); const auto &hwInfo = device->getHardwareInfo(); PipeControlArgs args; args.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo); MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( *commandStream, GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, cacheFlushTimestampPacketGpuAddress, 0, hwInfo, args); } } return blitProperties; } template void CommandQueueHw::processDispatchForBlitAuxTranslation(CommandStreamReceiver &bcsCsr, const MultiDispatchInfo &multiDispatchInfo, BlitPropertiesContainer &blitPropertiesContainer, TimestampPacketDependencies ×tampPacketDependencies, const EventsRequest &eventsRequest, bool queueBlocked) { auto rootDeviceIndex = getDevice().getRootDeviceIndex(); auto nodesAllocator = getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(); auto numKernelObjs = multiDispatchInfo.getKernelObjsForAuxTranslation()->size(); blitPropertiesContainer.resize(numKernelObjs * 2); auto bufferIndex = 0; for (auto &kernelObj : *multiDispatchInfo.getKernelObjsForAuxTranslation()) { GraphicsAllocation *allocation = nullptr; if (kernelObj.type == KernelObjForAuxTranslation::Type::MEM_OBJ) { auto buffer = static_cast(kernelObj.object); allocation = buffer->getGraphicsAllocation(rootDeviceIndex); } else { DEBUG_BREAK_IF(kernelObj.type != KernelObjForAuxTranslation::Type::GFX_ALLOC); allocation = static_cast(kernelObj.object); } { // Aux to NonAux blitPropertiesContainer[bufferIndex] = BlitProperties::constructPropertiesForAuxTranslation( AuxTranslationDirection::AuxToNonAux, allocation, getGpgpuCommandStreamReceiver().getClearColorAllocation()); auto auxToNonAuxNode = nodesAllocator->getTag(); timestampPacketDependencies.auxToNonAuxNodes.add(auxToNonAuxNode); } { // NonAux to Aux blitPropertiesContainer[bufferIndex + numKernelObjs] = BlitProperties::constructPropertiesForAuxTranslation( AuxTranslationDirection::NonAuxToAux, allocation, getGpgpuCommandStreamReceiver().getClearColorAllocation()); auto nonAuxToAuxNode = nodesAllocator->getTag(); timestampPacketDependencies.nonAuxToAuxNodes.add(nonAuxToAuxNode); } bufferIndex++; } if (!queueBlocked) { CsrDependencies csrDeps; eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, bcsCsr, CsrDependencies::DependenciesType::All); BlitProperties::setupDependenciesForAuxTranslation(blitPropertiesContainer, timestampPacketDependencies, *this->timestampPacketContainer, csrDeps, getGpgpuCommandStreamReceiver(), bcsCsr); } eventsRequest.setupBcsCsrForOutputEvent(bcsCsr); } template void CommandQueueHw::processDispatchForCacheFlush(Surface **surfaces, size_t numSurfaces, LinearStream *commandStream, CsrDependencies &csrDeps) { TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer(*commandStream, csrDeps); uint64_t postSyncAddress = 0; if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { auto timestampPacketNodeForPostSync = timestampPacketContainer->peekNodes().at(0); timestampPacketNodeForPostSync->setProfilingCapable(false); postSyncAddress = TimestampPacketHelper::getContextEndGpuAddress(*timestampPacketNodeForPostSync); } submitCacheFlush(surfaces, numSurfaces, commandStream, postSyncAddress); } template void CommandQueueHw::processDispatchForMarker(CommandQueue &commandQueue, LinearStream *commandStream, EventsRequest &eventsRequest, CsrDependencies &csrDeps) { auto event = castToObjectOrAbort(*eventsRequest.outEvent); TagNodeBase *hwTimeStamps = nullptr; TagNodeBase *hwPerfCounter = nullptr; hwTimeStamps = event->getHwTimeStampNode(); HardwareInterface::dispatchProfilingPerfStartCommands(hwTimeStamps, hwPerfCounter, commandStream, commandQueue); HardwareInterface::dispatchProfilingPerfEndCommands(hwTimeStamps, hwPerfCounter, commandStream, commandQueue); getGpgpuCommandStreamReceiver().makeResident(*hwTimeStamps->getBaseGraphicsAllocation()); } template void CommandQueueHw::processDispatchForMarkerWithTimestampPacket(CommandQueue &commandQueue, LinearStream *commandStream, EventsRequest &eventsRequest, CsrDependencies &csrDeps) { auto currentTimestampPacketNode = commandQueue.getTimestampPacketContainer()->peekNodes().at(0); auto timestampContextStartGpuAddress = TimestampPacketHelper::getContextStartGpuAddress(*currentTimestampPacketNode); auto timestampGlobalStartAddress = TimestampPacketHelper::getGlobalStartGpuAddress(*currentTimestampPacketNode); EncodeStoreMMIO::encode(*commandStream, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, timestampContextStartGpuAddress); EncodeStoreMMIO::encode(*commandStream, REG_GLOBAL_TIMESTAMP_LDW, timestampGlobalStartAddress); auto timestampContextEndGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(*currentTimestampPacketNode); auto timestampGlobalEndAddress = TimestampPacketHelper::getGlobalEndGpuAddress(*currentTimestampPacketNode); EncodeStoreMMIO::encode(*commandStream, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, timestampContextEndGpuAddress); EncodeStoreMMIO::encode(*commandStream, REG_GLOBAL_TIMESTAMP_LDW, timestampGlobalEndAddress); } template void CommandQueueHw::obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType) { auto isQueueBlockedStatus = isQueueBlocked(); taskLevel = getTaskLevelFromWaitList(this->taskLevel, numEventsInWaitList, eventWaitList); blockQueueStatus = (taskLevel == CompletionStamp::notReady) || isQueueBlockedStatus; auto taskLevelUpdateRequired = isTaskLevelUpdateRequired(taskLevel, eventWaitList, numEventsInWaitList, commandType); if (taskLevelUpdateRequired) { taskLevel++; this->taskLevel = taskLevel; } DBG_LOG(EventsDebugEnable, "blockQueue", blockQueueStatus, "virtualEvent", virtualEvent, "taskLevel", taskLevel); } template bool CommandQueueHw::isTaskLevelUpdateRequired(const uint32_t &taskLevel, const cl_event *eventWaitList, const cl_uint &numEventsInWaitList, unsigned int commandType) { bool updateTaskLevel = true; //if we are blocked by user event then no update if (taskLevel == CompletionStamp::notReady) { updateTaskLevel = false; } //if we are executing command without kernel then it will inherit state from //previous commands, barrier is exception if (isCommandWithoutKernel(commandType) && commandType != CL_COMMAND_BARRIER) { updateTaskLevel = false; } //ooq special cases starts here if (this->isOOQEnabled()) { //if no wait list and barrier , do not update task level if (eventWaitList == nullptr && commandType != CL_COMMAND_BARRIER) { updateTaskLevel = false; } //if we have waitlist then deduce task level from waitlist and check if it is higher then current task level of queue if (eventWaitList != nullptr) { auto taskLevelFromEvents = getTaskLevelFromWaitList(0, numEventsInWaitList, eventWaitList); taskLevelFromEvents++; if (taskLevelFromEvents <= this->taskLevel) { updateTaskLevel = false; } } } return updateTaskLevel; } template template CompletionStamp CommandQueueHw::enqueueNonBlocked( Surface **surfaces, size_t surfaceCount, LinearStream &commandStream, size_t commandStreamStart, bool &blocking, bool clearDependenciesForSubCapture, const MultiDispatchInfo &multiDispatchInfo, const EnqueueProperties &enqueueProperties, TimestampPacketDependencies ×tampPacketDependencies, EventsRequest &eventsRequest, EventBuilder &eventBuilder, uint32_t taskLevel, PrintfHandler *printfHandler, CommandStreamReceiver *bcsCsr) { UNRECOVERABLE_IF(multiDispatchInfo.empty()); auto implicitFlush = false; if (printfHandler) { blocking = true; printfHandler->makeResident(getGpgpuCommandStreamReceiver()); } if (multiDispatchInfo.peekMainKernel()->usesSyncBuffer()) { device->getDevice().syncBufferHandler->makeResident(getGpgpuCommandStreamReceiver()); } if (timestampPacketContainer) { timestampPacketContainer->makeResident(getGpgpuCommandStreamReceiver()); timestampPacketDependencies.previousEnqueueNodes.makeResident(getGpgpuCommandStreamReceiver()); timestampPacketDependencies.cacheFlushNodes.makeResident(getGpgpuCommandStreamReceiver()); } bool anyUncacheableArgs = false; auto requiresCoherency = false; for (auto surface : CreateRange(surfaces, surfaceCount)) { surface->makeResident(getGpgpuCommandStreamReceiver()); requiresCoherency |= surface->IsCoherent; if (!surface->allowsL3Caching()) { anyUncacheableArgs = true; } } auto mediaSamplerRequired = false; uint32_t numGrfRequired = GrfConfig::DefaultGrfNumber; auto specialPipelineSelectMode = false; Kernel *kernel = nullptr; bool auxTranslationRequired = false; bool useGlobalAtomics = false; for (auto &dispatchInfo : multiDispatchInfo) { if (kernel != dispatchInfo.getKernel()) { kernel = dispatchInfo.getKernel(); } else { continue; } kernel->makeResident(getGpgpuCommandStreamReceiver()); requiresCoherency |= kernel->requiresCoherency(); mediaSamplerRequired |= kernel->isVmeKernel(); auto numGrfRequiredByKernel = static_cast(kernel->getKernelInfo().kernelDescriptor.kernelAttributes.numGrfRequired); numGrfRequired = std::max(numGrfRequired, numGrfRequiredByKernel); specialPipelineSelectMode |= kernel->requiresSpecialPipelineSelectMode(); auxTranslationRequired |= kernel->isAuxTranslationRequired(); if (kernel->hasUncacheableStatelessArgs()) { anyUncacheableArgs = true; } if (kernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics) { useGlobalAtomics = true; } } if (mediaSamplerRequired) { DEBUG_BREAK_IF(device->getDeviceInfo().preemptionSupported != false); } if (isProfilingEnabled() && eventBuilder.getEvent()) { eventBuilder.getEvent()->setSubmitTimeStamp(); auto hwTimestampNode = eventBuilder.getEvent()->getHwTimeStampNode(); if (hwTimestampNode) { getGpgpuCommandStreamReceiver().makeResident(*hwTimestampNode->getBaseGraphicsAllocation()); } if (isPerfCountersEnabled()) { getGpgpuCommandStreamReceiver().makeResident(*eventBuilder.getEvent()->getHwPerfCounterNode()->getBaseGraphicsAllocation()); } } IndirectHeap *dsh = nullptr; IndirectHeap *ioh = nullptr; dsh = &getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 0u); ioh = &getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 0u); auto allocNeedsFlushDC = false; if (!device->isFullRangeSvm()) { if (std::any_of(getGpgpuCommandStreamReceiver().getResidencyAllocations().begin(), getGpgpuCommandStreamReceiver().getResidencyAllocations().end(), [](const auto allocation) { return allocation->isFlushL3Required(); })) { allocNeedsFlushDC = true; } } auto memoryCompressionState = getGpgpuCommandStreamReceiver().getMemoryCompressionState(auxTranslationRequired, device->getHardwareInfo()); DispatchFlags dispatchFlags( {}, //csrDependencies ×tampPacketDependencies.barrierNodes, //barrierTimestampPacketNodes {}, //pipelineSelectArgs this->flushStamp->getStampReference(), //flushStampReference getThrottle(), //throttle ClPreemptionHelper::taskPreemptionMode(getDevice(), multiDispatchInfo), //preemptionMode numGrfRequired, //numGrfRequired L3CachingSettings::l3CacheOn, //l3CacheSettings kernel->getThreadArbitrationPolicy(), //threadArbitrationPolicy kernel->getAdditionalKernelExecInfo(), //additionalKernelExecInfo kernel->getExecutionType(), //kernelExecutionType memoryCompressionState, //memoryCompressionState getSliceCount(), //sliceCount blocking, //blocking shouldFlushDC(commandType, printfHandler) || allocNeedsFlushDC, //dcFlush multiDispatchInfo.usesSlm(), //useSLM !getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled(), //guardCommandBufferWithPipeControl commandType == CL_COMMAND_NDRANGE_KERNEL, //GSBA32BitRequired requiresCoherency, //requiresCoherency (QueuePriority::LOW == priority), //lowPriority implicitFlush, //implicitFlush !eventBuilder.getEvent() || getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed false, //epilogueRequired false, //usePerDssBackedBuffer kernel->isSingleSubdevicePreferred(), //useSingleSubdevice useGlobalAtomics, //useGlobalAtomics kernel->areMultipleSubDevicesInContext(), //areMultipleSubDevicesInContext kernel->requiresMemoryMigration(), //memoryMigrationRequired isTextureCacheFlushNeeded(commandType)); //textureCacheFlush dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = mediaSamplerRequired; dispatchFlags.pipelineSelectArgs.specialPipelineSelectMode = specialPipelineSelectMode; dispatchFlags.disableEUFusion = kernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion; const bool isHandlingBarrier = getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired(); if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled() && !clearDependenciesForSubCapture) { eventsRequest.fillCsrDependenciesForTimestampPacketContainer(dispatchFlags.csrDependencies, getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OutOfCsr); if (isHandlingBarrier) { fillCsrDependenciesWithLastBcsPackets(dispatchFlags.csrDependencies); } dispatchFlags.csrDependencies.makeResident(getGpgpuCommandStreamReceiver()); } DEBUG_BREAK_IF(taskLevel >= CompletionStamp::notReady); if (anyUncacheableArgs) { dispatchFlags.l3CacheSettings = L3CachingSettings::l3CacheOff; } else if (!kernel->areStatelessWritesUsed()) { dispatchFlags.l3CacheSettings = L3CachingSettings::l3AndL1On; } if (this->dispatchHints != 0) { dispatchFlags.engineHints = this->dispatchHints; dispatchFlags.epilogueRequired = true; } if (gtpinIsGTPinInitialized()) { gtpinNotifyPreFlushTask(this); } if (enqueueProperties.blitPropertiesContainer->size() > 0) { const auto newTaskCount = bcsCsr->flushBcsTask(*enqueueProperties.blitPropertiesContainer, false, this->isProfilingEnabled(), getDevice()); this->updateBcsTaskCount(bcsCsr->getOsContext().getEngineType(), newTaskCount); dispatchFlags.implicitFlush = true; } PRINT_DEBUG_STRING(DebugManager.flags.PrintDebugMessages.get(), stdout, "preemption = %d.\n", static_cast(dispatchFlags.preemptionMode)); CompletionStamp completionStamp = getGpgpuCommandStreamReceiver().flushTask( commandStream, commandStreamStart, dsh, ioh, &getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0u), taskLevel, dispatchFlags, getDevice()); if (isHandlingBarrier) { clearLastBcsPackets(); } if (gtpinIsGTPinInitialized()) { gtpinNotifyFlushTask(completionStamp.taskCount); } return completionStamp; } template void CommandQueueHw::enqueueBlocked( uint32_t commandType, Surface **surfaces, size_t surfaceCount, const MultiDispatchInfo &multiDispatchInfo, TimestampPacketDependencies ×tampPacketDependencies, std::unique_ptr &blockedCommandsData, const EnqueueProperties &enqueueProperties, EventsRequest &eventsRequest, EventBuilder &externalEventBuilder, std::unique_ptr &&printfHandler, CommandStreamReceiver *bcsCsr) { TakeOwnershipWrapper> queueOwnership(*this); //store previous virtual event as it will add dependecies to new virtual event if (this->virtualEvent) { DBG_LOG(EventsDebugEnable, "enqueueBlocked", "previousVirtualEvent", this->virtualEvent); } EventBuilder internalEventBuilder; EventBuilder *eventBuilder; // check if event will be exposed externally if (externalEventBuilder.getEvent()) { externalEventBuilder.getEvent()->incRefInternal(); eventBuilder = &externalEventBuilder; DBG_LOG(EventsDebugEnable, "enqueueBlocked", "output event as virtualEvent", virtualEvent); } else { // it will be an internal event internalEventBuilder.create(this, context); eventBuilder = &internalEventBuilder; DBG_LOG(EventsDebugEnable, "enqueueBlocked", "new virtualEvent", eventBuilder->getEvent()); } auto outEvent = eventBuilder->getEvent(); //update queue taskCount taskCount = outEvent->getCompletionStamp(); std::unique_ptr command; bool storeTimestampPackets = false; if (blockedCommandsData) { if (enqueueProperties.blitPropertiesContainer) { blockedCommandsData->blitPropertiesContainer = *enqueueProperties.blitPropertiesContainer; blockedCommandsData->bcsCsr = bcsCsr; blockedCommandsData->blitEnqueue = true; } storeTimestampPackets = (timestampPacketContainer != nullptr); } if (enqueueProperties.operation != EnqueueProperties::Operation::GpuKernel) { command = std::make_unique(*this, blockedCommandsData); } else { //store task data in event std::vector allSurfaces; Kernel *kernel = nullptr; for (auto &dispatchInfo : multiDispatchInfo) { if (kernel != dispatchInfo.getKernel()) { kernel = dispatchInfo.getKernel(); } else { continue; } kernel->getResidency(allSurfaces); } allSurfaces.reserve(allSurfaces.size() + surfaceCount); for (auto &surface : CreateRange(surfaces, surfaceCount)) { allSurfaces.push_back(surface->duplicate()); } PreemptionMode preemptionMode = ClPreemptionHelper::taskPreemptionMode(getDevice(), multiDispatchInfo); bool slmUsed = multiDispatchInfo.usesSlm(); command = std::make_unique(*this, blockedCommandsData, std::move(allSurfaces), shouldFlushDC(commandType, printfHandler.get()), slmUsed, commandType, std::move(printfHandler), preemptionMode, multiDispatchInfo.peekMainKernel(), (uint32_t)multiDispatchInfo.size()); } if (storeTimestampPackets) { command->setTimestampPacketNode(*timestampPacketContainer, std::move(timestampPacketDependencies)); command->setEventsRequest(eventsRequest); } else if (this->context->getRootDeviceIndices().size() > 1) { command->setEventsRequest(eventsRequest); } outEvent->setCommand(std::move(command)); eventBuilder->addParentEvents(ArrayRef(eventsRequest.eventWaitList, eventsRequest.numEventsInWaitList)); eventBuilder->addParentEvent(this->virtualEvent); eventBuilder->finalize(); if (this->virtualEvent) { this->virtualEvent->decRefInternal(); } this->virtualEvent = outEvent; } template CompletionStamp CommandQueueHw::enqueueCommandWithoutKernel( Surface **surfaces, size_t surfaceCount, LinearStream *commandStream, size_t commandStreamStart, bool &blocking, const EnqueueProperties &enqueueProperties, TimestampPacketDependencies ×tampPacketDependencies, EventsRequest &eventsRequest, EventBuilder &eventBuilder, uint32_t taskLevel, CsrDependencies &csrDeps, CommandStreamReceiver *bcsCsr) { CompletionStamp completionStamp = {this->taskCount, this->taskLevel, this->flushStamp->peekStamp()}; bool flushGpgpuCsr = true; if ((enqueueProperties.operation == EnqueueProperties::Operation::Blit) && commandStream == nullptr) { flushGpgpuCsr = false; } else { csrDeps.makeResident(getGpgpuCommandStreamReceiver()); } if (eventBuilder.getEvent() && isProfilingEnabled()) { eventBuilder.getEvent()->setSubmitTimeStamp(); eventBuilder.getEvent()->setStartTimeStamp(); } if (flushGpgpuCsr) { if (timestampPacketContainer) { timestampPacketContainer->makeResident(getGpgpuCommandStreamReceiver()); timestampPacketDependencies.previousEnqueueNodes.makeResident(getGpgpuCommandStreamReceiver()); timestampPacketDependencies.cacheFlushNodes.makeResident(getGpgpuCommandStreamReceiver()); } for (auto surface : CreateRange(surfaces, surfaceCount)) { surface->makeResident(getGpgpuCommandStreamReceiver()); } auto rootDeviceIndex = getDevice().getRootDeviceIndex(); DispatchFlags dispatchFlags( {}, //csrDependencies ×tampPacketDependencies.barrierNodes, //barrierTimestampPacketNodes {}, //pipelineSelectArgs flushStamp->getStampReference(), //flushStampReference getThrottle(), //throttle device->getPreemptionMode(), //preemptionMode GrfConfig::NotApplicable, //numGrfRequired L3CachingSettings::NotApplicable, //l3CacheSettings ThreadArbitrationPolicy::NotPresent, //threadArbitrationPolicy AdditionalKernelExecInfo::NotApplicable, //additionalKernelExecInfo KernelExecutionType::NotApplicable, //kernelExecutionType MemoryCompressionState::NotApplicable, //memoryCompressionState getSliceCount(), //sliceCount blocking, //blocking false, //dcFlush false, //useSLM !getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled(), //guardCommandBufferWithPipeControl false, //GSBA32BitRequired false, //requiresCoherency false, //lowPriority (enqueueProperties.operation == EnqueueProperties::Operation::Blit), //implicitFlush getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed false, //epilogueRequired false, //usePerDssBackedBuffer false, //useSingleSubdevice false, //useGlobalAtomics context->containsMultipleSubDevices(rootDeviceIndex), //areMultipleSubDevicesInContext false, //memoryMigrationRequired false); //textureCacheFlush const bool isHandlingBarrier = getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired(); if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { eventsRequest.fillCsrDependenciesForTimestampPacketContainer(dispatchFlags.csrDependencies, getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OutOfCsr); if (isHandlingBarrier) { fillCsrDependenciesWithLastBcsPackets(dispatchFlags.csrDependencies); } dispatchFlags.csrDependencies.makeResident(getGpgpuCommandStreamReceiver()); } completionStamp = getGpgpuCommandStreamReceiver().flushTask( *commandStream, commandStreamStart, &getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 0u), &getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 0u), &getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0u), taskLevel, dispatchFlags, getDevice()); if (isHandlingBarrier) { clearLastBcsPackets(); } } if (enqueueProperties.operation == EnqueueProperties::Operation::Blit) { UNRECOVERABLE_IF(!enqueueProperties.blitPropertiesContainer); const auto newTaskCount = bcsCsr->flushBcsTask(*enqueueProperties.blitPropertiesContainer, false, this->isProfilingEnabled(), getDevice()); this->updateBcsTaskCount(bcsCsr->getOsContext().getEngineType(), newTaskCount); } return completionStamp; } template void CommandQueueHw::computeOffsetsValueForRectCommands(size_t *bufferOffset, size_t *hostOffset, const size_t *bufferOrigin, const size_t *hostOrigin, const size_t *region, size_t bufferRowPitch, size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch) { size_t computedBufferRowPitch = bufferRowPitch ? bufferRowPitch : region[0]; size_t computedBufferSlicePitch = bufferSlicePitch ? bufferSlicePitch : region[1] * computedBufferRowPitch; size_t computedHostRowPitch = hostRowPitch ? hostRowPitch : region[0]; size_t computedHostSlicePitch = hostSlicePitch ? hostSlicePitch : region[1] * computedHostRowPitch; *bufferOffset = bufferOrigin[2] * computedBufferSlicePitch + bufferOrigin[1] * computedBufferRowPitch + bufferOrigin[0]; *hostOffset = hostOrigin[2] * computedHostSlicePitch + hostOrigin[1] * computedHostRowPitch + hostOrigin[0]; } template size_t CommandQueueHw::calculateHostPtrSizeForImage(const size_t *region, size_t rowPitch, size_t slicePitch, Image *image) { auto bytesPerPixel = image->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes; auto dstRowPitch = rowPitch ? rowPitch : region[0] * bytesPerPixel; auto dstSlicePitch = slicePitch ? slicePitch : ((image->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ? 1 : region[1]) * dstRowPitch); return Image::calculateHostPtrSize(region, dstRowPitch, dstSlicePitch, bytesPerPixel, image->getImageDesc().image_type); } template template cl_int CommandQueueHw::enqueueBlit(const MultiDispatchInfo &multiDispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, CommandStreamReceiver &bcsCsr) { auto bcsCommandStreamReceiverOwnership = bcsCsr.obtainUniqueOwnership(); EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event); EventBuilder eventBuilder; setupEvent(eventBuilder, eventsRequest.outEvent, cmdType); eventsRequest.setupBcsCsrForOutputEvent(bcsCsr); std::unique_ptr blockedCommandsData; TakeOwnershipWrapper> queueOwnership(*this); auto commandStreamReceiverOwnership = getGpgpuCommandStreamReceiver().obtainUniqueOwnership(); auto blockQueue = false; auto taskLevel = 0u; obtainTaskLevelAndBlockedStatus(taskLevel, eventsRequest.numEventsInWaitList, eventsRequest.eventWaitList, blockQueue, cmdType); auto clearAllDependencies = queueDependenciesClearRequired(); enqueueHandlerHook(cmdType, multiDispatchInfo); aubCaptureHook(blocking, clearAllDependencies, multiDispatchInfo); if (DebugManager.flags.MakeEachEnqueueBlocking.get()) { blocking = true; } TimestampPacketDependencies timestampPacketDependencies; BlitPropertiesContainer blitPropertiesContainer; CsrDependencies csrDeps; eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, bcsCsr, CsrDependencies::DependenciesType::All); auto allocator = bcsCsr.getTimestampPacketAllocator(); if (!blockQueue) { setupBarrierTimestampForBcsEngines(bcsCsr.getOsContext().getEngineType(), timestampPacketDependencies); if (isOOQEnabled()) { TimestampPacketContainer clearBarrierNodes; timestampPacketDependencies.barrierNodes.swapNodes(clearBarrierNodes); } } processBarrierTimestampForBcsEngine(bcsCsr.getOsContext().getEngineType(), timestampPacketDependencies); auto gpgpuSubmission = isGpgpuSubmissionForBcsRequired(blockQueue, timestampPacketDependencies); if (isCacheFlushForBcsRequired() && gpgpuSubmission) { timestampPacketDependencies.cacheFlushNodes.add(allocator->getTag()); } obtainNewTimestampPacketNodes(1, timestampPacketDependencies.previousEnqueueNodes, clearAllDependencies, bcsCsr); csrDeps.timestampPacketContainer.push_back(×tampPacketDependencies.previousEnqueueNodes); if (eventBuilder.getEvent()) { eventBuilder.getEvent()->addTimestampPacketNodes(*timestampPacketContainer); } CompletionStamp completionStamp = {CompletionStamp::notReady, taskLevel, 0}; const EnqueueProperties enqueueProperties(true, false, false, false, false, &blitPropertiesContainer); LinearStream *gpgpuCommandStream = {}; size_t gpgpuCommandStreamStart = {}; if (gpgpuSubmission) { gpgpuCommandStream = obtainCommandStream(csrDeps, true, blockQueue, multiDispatchInfo, eventsRequest, blockedCommandsData, nullptr, 0, false); gpgpuCommandStreamStart = gpgpuCommandStream->getUsed(); } blitPropertiesContainer.push_back(processDispatchForBlitEnqueue(bcsCsr, multiDispatchInfo, timestampPacketDependencies, eventsRequest, gpgpuCommandStream, cmdType, blockQueue)); if (!blockQueue) { completionStamp = enqueueCommandWithoutKernel(nullptr, 0, gpgpuCommandStream, gpgpuCommandStreamStart, blocking, enqueueProperties, timestampPacketDependencies, eventsRequest, eventBuilder, taskLevel, csrDeps, &bcsCsr); if (eventBuilder.getEvent()) { eventBuilder.getEvent()->flushStamp->replaceStampObject(this->flushStamp->getStampReference()); } this->latestSentEnqueueType = enqueueProperties.operation; setLastBcsPacket(bcsCsr.getOsContext().getEngineType()); } updateFromCompletionStamp(completionStamp, eventBuilder.getEvent()); if (blockQueue) { enqueueBlocked(cmdType, nullptr, 0, multiDispatchInfo, timestampPacketDependencies, blockedCommandsData, enqueueProperties, eventsRequest, eventBuilder, nullptr, &bcsCsr); } timestampPacketDependencies.moveNodesToNewContainer(*deferredTimestampPackets); commandStreamReceiverOwnership.unlock(); queueOwnership.unlock(); bcsCommandStreamReceiverOwnership.unlock(); if (blocking) { const auto waitStatus = waitForAllEngines(blockQueue, nullptr); if (waitStatus == WaitStatus::GpuHang) { return CL_OUT_OF_RESOURCES; } } return CL_SUCCESS; } template template cl_int CommandQueueHw::dispatchBcsOrGpgpuEnqueue(MultiDispatchInfo &dispatchInfo, Surface *(&surfaces)[surfaceCount], EBuiltInOps::Type builtInOperation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, CommandStreamReceiver &csr) { const bool blit = EngineHelpers::isBcs(csr.getOsContext().getEngineType()); if (blit) { return enqueueBlit(dispatchInfo, numEventsInWaitList, eventWaitList, event, blocking, csr); } else { auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInOperation, this->getClDevice()); BuiltInOwnershipWrapper builtInLock(builder, this->context); builder.buildDispatchInfos(dispatchInfo); return enqueueHandler( surfaces, blocking, dispatchInfo, numEventsInWaitList, eventWaitList, event); } } template bool CommandQueueHw::isBlitAuxTranslationRequired(const MultiDispatchInfo &multiDispatchInfo) { return multiDispatchInfo.getKernelObjsForAuxTranslation() && (multiDispatchInfo.getKernelObjsForAuxTranslation()->size() > 0) && (HwHelperHw::get().getAuxTranslationMode(device->getHardwareInfo()) == AuxTranslationMode::Blit); } } // namespace NEO compute-runtime-22.14.22890/opencl/source/command_queue/enqueue_copy_buffer.h000066400000000000000000000033771422164147700271710ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/enqueue_common.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/memory_manager/mem_obj_surface.h" #include namespace NEO { template cl_int CommandQueueHw::enqueueCopyBuffer( Buffer *srcBuffer, Buffer *dstBuffer, size_t srcOffset, size_t dstOffset, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { auto eBuiltInOpsType = EBuiltInOps::CopyBufferToBuffer; constexpr cl_command_type cmdType = CL_COMMAND_COPY_BUFFER; CsrSelectionArgs csrSelectionArgs{cmdType, srcBuffer, dstBuffer, device->getRootDeviceIndex(), &size}; CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs); if (forceStateless(std::max(srcBuffer->getSize(), dstBuffer->getSize()))) { eBuiltInOpsType = EBuiltInOps::CopyBufferToBufferStateless; } BuiltinOpParams dc; dc.srcMemObj = srcBuffer; dc.dstMemObj = dstBuffer; dc.srcOffset = {srcOffset, 0, 0}; dc.dstOffset = {dstOffset, 0, 0}; dc.size = {size, 0, 0}; MultiDispatchInfo dispatchInfo(dc); MemObjSurface s1(srcBuffer); MemObjSurface s2(dstBuffer); Surface *surfaces[] = {&s1, &s2}; return dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, eBuiltInOpsType, numEventsInWaitList, eventWaitList, event, false, csr); } } // namespace NEO compute-runtime-22.14.22890/opencl/source/command_queue/enqueue_copy_buffer_rect.h000066400000000000000000000037421422164147700302020ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/memory_manager/mem_obj_surface.h" #include namespace NEO { template cl_int CommandQueueHw::enqueueCopyBufferRect( Buffer *srcBuffer, Buffer *dstBuffer, const size_t *srcOrigin, const size_t *dstOrigin, const size_t *region, size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { auto eBuiltInOps = EBuiltInOps::CopyBufferRect; constexpr cl_command_type cmdType = CL_COMMAND_COPY_BUFFER_RECT; CsrSelectionArgs csrSelectionArgs{cmdType, srcBuffer, dstBuffer, device->getRootDeviceIndex(), region}; CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs); if (forceStateless(std::max(srcBuffer->getSize(), dstBuffer->getSize()))) { eBuiltInOps = EBuiltInOps::CopyBufferRectStateless; } MemObjSurface srcBufferSurf(srcBuffer); MemObjSurface dstBufferSurf(dstBuffer); Surface *surfaces[] = {&srcBufferSurf, &dstBufferSurf}; BuiltinOpParams dc; dc.srcMemObj = srcBuffer; dc.dstMemObj = dstBuffer; dc.srcOffset = srcOrigin; dc.dstOffset = dstOrigin; dc.size = region; dc.srcRowPitch = srcRowPitch; dc.srcSlicePitch = srcSlicePitch; dc.dstRowPitch = dstRowPitch; dc.dstSlicePitch = dstSlicePitch; MultiDispatchInfo dispatchInfo(dc); return dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, false, csr); } } // namespace NEO compute-runtime-22.14.22890/opencl/source/command_queue/enqueue_copy_buffer_to_image.h000066400000000000000000000040001422164147700310150ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/helpers/mipmap.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/memory_manager/mem_obj_surface.h" #include namespace NEO { template cl_int CommandQueueHw::enqueueCopyBufferToImage( Buffer *srcBuffer, Image *dstImage, size_t srcOffset, const size_t *dstOrigin, const size_t *region, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { auto eBuiltInOpsType = EBuiltInOps::CopyBufferToImage3d; if (forceStateless(srcBuffer->getSize())) { eBuiltInOpsType = EBuiltInOps::CopyBufferToImage3dStateless; } auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(eBuiltInOpsType, this->getClDevice()); BuiltInOwnershipWrapper builtInLock(builder, this->context); MemObjSurface srcBufferSurf(srcBuffer); MemObjSurface dstImgSurf(dstImage); Surface *surfaces[] = {&srcBufferSurf, &dstImgSurf}; BuiltinOpParams dc; dc.srcMemObj = srcBuffer; dc.dstMemObj = dstImage; dc.srcOffset = {srcOffset, 0, 0}; dc.dstOffset = dstOrigin; dc.size = region; if (isMipMapped(dstImage->getImageDesc())) { dc.dstMipLevel = findMipLevel(dstImage->getImageDesc().image_type, dstOrigin); } MultiDispatchInfo dispatchInfo(dc); builder.buildDispatchInfos(dispatchInfo); return enqueueHandler( surfaces, false, dispatchInfo, numEventsInWaitList, eventWaitList, event); } } // namespace NEO compute-runtime-22.14.22890/opencl/source/command_queue/enqueue_copy_image.h000066400000000000000000000036631422164147700270000ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/basic_math.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/helpers/mipmap.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/memory_manager/mem_obj_surface.h" #include #include namespace NEO { template cl_int CommandQueueHw::enqueueCopyImage( Image *srcImage, Image *dstImage, const size_t *srcOrigin, const size_t *dstOrigin, const size_t *region, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { constexpr cl_command_type cmdType = CL_COMMAND_COPY_IMAGE; CsrSelectionArgs csrSelectionArgs{cmdType, srcImage, dstImage, device->getRootDeviceIndex(), region, srcOrigin, dstOrigin}; CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs); MemObjSurface srcImgSurf(srcImage); MemObjSurface dstImgSurf(dstImage); Surface *surfaces[] = {&srcImgSurf, &dstImgSurf}; BuiltinOpParams dc; dc.srcMemObj = srcImage; dc.dstMemObj = dstImage; dc.srcOffset = srcOrigin; dc.dstOffset = dstOrigin; dc.size = region; if (isMipMapped(srcImage->getImageDesc())) { dc.srcMipLevel = findMipLevel(srcImage->getImageDesc().image_type, srcOrigin); } if (isMipMapped(dstImage->getImageDesc())) { dc.dstMipLevel = findMipLevel(dstImage->getImageDesc().image_type, dstOrigin); } MultiDispatchInfo dispatchInfo(dc); return dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, EBuiltInOps::CopyImageToImage3d, numEventsInWaitList, eventWaitList, event, false, csr); } } // namespace NEO compute-runtime-22.14.22890/opencl/source/command_queue/enqueue_copy_image_to_buffer.h000066400000000000000000000037771422164147700310410ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/helpers/mipmap.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/memory_manager/mem_obj_surface.h" #include namespace NEO { template cl_int CommandQueueHw::enqueueCopyImageToBuffer( Image *srcImage, Buffer *dstBuffer, const size_t *srcOrigin, const size_t *region, size_t dstOffset, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { auto eBuiltInOpsType = EBuiltInOps::CopyImage3dToBuffer; if (forceStateless(dstBuffer->getSize())) { eBuiltInOpsType = EBuiltInOps::CopyImage3dToBufferStateless; } auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(eBuiltInOpsType, this->getClDevice()); BuiltInOwnershipWrapper builtInLock(builder, this->context); MemObjSurface srcImgSurf(srcImage); MemObjSurface dstBufferSurf(dstBuffer); Surface *surfaces[] = {&srcImgSurf, &dstBufferSurf}; BuiltinOpParams dc; dc.srcMemObj = srcImage; dc.dstMemObj = dstBuffer; dc.srcOffset = srcOrigin; dc.dstOffset = {dstOffset, 0, 0}; dc.size = region; if (isMipMapped(srcImage->getImageDesc())) { dc.srcMipLevel = findMipLevel(srcImage->getImageDesc().image_type, srcOrigin); } MultiDispatchInfo dispatchInfo(dc); builder.buildDispatchInfos(dispatchInfo); return enqueueHandler( surfaces, false, dispatchInfo, numEventsInWaitList, eventWaitList, event); } } // namespace NEO compute-runtime-22.14.22890/opencl/source/command_queue/enqueue_fill_buffer.h000066400000000000000000000075251422164147700271440ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/memory_manager.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/memory_manager/mem_obj_surface.h" #include namespace NEO { template cl_int CommandQueueHw::enqueueFillBuffer( Buffer *buffer, const void *pattern, size_t patternSize, size_t offset, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { auto memoryManager = getDevice().getMemoryManager(); DEBUG_BREAK_IF(nullptr == memoryManager); auto commandStreamReceieverOwnership = getGpgpuCommandStreamReceiver().obtainUniqueOwnership(); auto storageWithAllocations = getGpgpuCommandStreamReceiver().getInternalAllocationStorage(); auto allocationType = AllocationType::FILL_PATTERN; auto patternAllocation = storageWithAllocations->obtainReusableAllocation(patternSize, allocationType).release(); commandStreamReceieverOwnership.unlock(); if (!patternAllocation) { patternAllocation = memoryManager->allocateGraphicsMemoryWithProperties({getDevice().getRootDeviceIndex(), alignUp(patternSize, MemoryConstants::cacheLineSize), AllocationType::FILL_PATTERN, getDevice().getDeviceBitfield()}); } if (patternSize == 1) { int patternInt = (uint32_t)((*(uint8_t *)pattern << 24) | (*(uint8_t *)pattern << 16) | (*(uint8_t *)pattern << 8) | *(uint8_t *)pattern); memcpy_s(patternAllocation->getUnderlyingBuffer(), sizeof(int), &patternInt, sizeof(int)); } else if (patternSize == 2) { int patternInt = (uint32_t)((*(uint16_t *)pattern << 16) | *(uint16_t *)pattern); memcpy_s(patternAllocation->getUnderlyingBuffer(), sizeof(int), &patternInt, sizeof(int)); } else { memcpy_s(patternAllocation->getUnderlyingBuffer(), patternSize, pattern, patternSize); } auto eBuiltInOps = EBuiltInOps::FillBuffer; if (forceStateless(buffer->getSize())) { eBuiltInOps = EBuiltInOps::FillBufferStateless; } auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(eBuiltInOps, this->getClDevice()); BuiltInOwnershipWrapper builtInLock(builder, this->context); BuiltinOpParams dc; auto multiGraphicsAllocation = MultiGraphicsAllocation(getDevice().getRootDeviceIndex()); multiGraphicsAllocation.addAllocation(patternAllocation); MemObj patternMemObj(this->context, 0, {}, 0, 0, alignUp(patternSize, 4), patternAllocation->getUnderlyingBuffer(), patternAllocation->getUnderlyingBuffer(), std::move(multiGraphicsAllocation), false, false, true); dc.srcMemObj = &patternMemObj; dc.dstMemObj = buffer; dc.dstOffset = {offset, 0, 0}; dc.size = {size, 0, 0}; MultiDispatchInfo dispatchInfo(dc); builder.buildDispatchInfos(dispatchInfo); MemObjSurface s1(buffer); GeneralSurface s2(patternAllocation); Surface *surfaces[] = {&s1, &s2}; const auto enqueueResult = enqueueHandler( surfaces, false, dispatchInfo, numEventsInWaitList, eventWaitList, event); auto storageForAllocation = getGpgpuCommandStreamReceiver().getInternalAllocationStorage(); storageForAllocation->storeAllocationWithTaskCount(std::unique_ptr(patternAllocation), REUSABLE_ALLOCATION, taskCount); return enqueueResult; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/command_queue/enqueue_fill_image.h000066400000000000000000000030541422164147700267460ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/basic_math.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/memory_manager/mem_obj_surface.h" #include #include namespace NEO { template cl_int CommandQueueHw::enqueueFillImage( Image *image, const void *fillColor, const size_t *origin, const size_t *region, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::FillImage3d, this->getClDevice()); BuiltInOwnershipWrapper builtInLock(builder, this->context); MemObjSurface dstImgSurf(image); Surface *surfaces[] = {&dstImgSurf}; BuiltinOpParams dc; dc.srcPtr = const_cast(fillColor); dc.dstMemObj = image; dc.srcOffset = {0, 0, 0}; dc.dstOffset = origin; dc.size = region; MultiDispatchInfo di(dc); builder.buildDispatchInfos(di); return enqueueHandler( surfaces, false, di, numEventsInWaitList, eventWaitList, event); } } // namespace NEO compute-runtime-22.14.22890/opencl/source/command_queue/enqueue_kernel.h000066400000000000000000000121271422164147700261370ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/helpers/task_information.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/memory_manager/mem_obj_surface.h" #include namespace NEO { template cl_int CommandQueueHw::enqueueKernel( Kernel *pKernel, cl_uint workDim, const size_t *globalWorkOffsetIn, const size_t *globalWorkSizeIn, const size_t *localWorkSizeIn, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { if (workDim > device->getDeviceInfo().maxWorkItemDimensions) { return CL_INVALID_WORK_DIMENSION; } size_t region[3] = {1, 1, 1}; size_t globalWorkOffset[3] = {0, 0, 0}; size_t workGroupSize[3] = {1, 1, 1}; size_t enqueuedLocalWorkSize[3] = {0, 0, 0}; auto &kernel = *pKernel; const auto &kernelInfo = kernel.getKernelInfo(); if (!kernel.isPatched()) { if (event) { *event = nullptr; } return CL_INVALID_KERNEL_ARGS; } if (kernel.isUsingSharedObjArgs()) { kernel.resetSharedObjectsPatchAddresses(); } bool haveRequiredWorkGroupSize = false; if (kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] != 0) { haveRequiredWorkGroupSize = true; } size_t remainder = 0; size_t totalWorkItems = 1u; const size_t *localWkgSizeToPass = localWorkSizeIn ? workGroupSize : nullptr; size_t reqdWorkgroupSize[3] = {}; for (auto i = 0u; i < workDim; i++) { region[i] = globalWorkSizeIn ? globalWorkSizeIn[i] : 0; globalWorkOffset[i] = globalWorkOffsetIn ? globalWorkOffsetIn[i] : 0; if (localWorkSizeIn) { if (haveRequiredWorkGroupSize) { if (kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[i] != localWorkSizeIn[i]) { return CL_INVALID_WORK_GROUP_SIZE; } } if (localWorkSizeIn[i] == 0) { return CL_INVALID_WORK_GROUP_SIZE; } if (kernel.getAllowNonUniform()) { workGroupSize[i] = std::min(localWorkSizeIn[i], std::max(static_cast(1), globalWorkSizeIn[i])); } else { workGroupSize[i] = localWorkSizeIn[i]; } enqueuedLocalWorkSize[i] = localWorkSizeIn[i]; totalWorkItems *= localWorkSizeIn[i]; } remainder += region[i] % workGroupSize[i]; } if (remainder != 0 && !kernel.getAllowNonUniform()) { return CL_INVALID_WORK_GROUP_SIZE; } if (haveRequiredWorkGroupSize) { reqdWorkgroupSize[0] = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0]; reqdWorkgroupSize[1] = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1]; reqdWorkgroupSize[2] = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2]; localWkgSizeToPass = reqdWorkgroupSize; } NullSurface s; Surface *surfaces[] = {&s}; if (context->isProvidingPerformanceHints()) { if (kernel.hasPrintfOutput()) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, PRINTF_DETECTED_IN_KERNEL, kernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str()); } if (kernel.requiresCoherency()) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, KERNEL_REQUIRES_COHERENCY, kernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str()); } } if (kernelInfo.builtinDispatchBuilder != nullptr) { cl_int err = kernelInfo.builtinDispatchBuilder->validateDispatch(&kernel, workDim, Vec3(region), Vec3(workGroupSize), Vec3(globalWorkOffset)); if (err != CL_SUCCESS) return err; } DBG_LOG(PrintDispatchParameters, "Kernel: ", kernelInfo.kernelDescriptor.kernelMetadata.kernelName, ",LWS:, ", localWorkSizeIn ? localWorkSizeIn[0] : 0, ",", localWorkSizeIn ? localWorkSizeIn[1] : 0, ",", localWorkSizeIn ? localWorkSizeIn[2] : 0, ",GWS:,", globalWorkSizeIn[0], ",", globalWorkSizeIn[1], ",", globalWorkSizeIn[2], ",SIMD:, ", kernelInfo.getMaxSimdSize()); if (totalWorkItems > kernel.getMaxKernelWorkGroupSize()) { return CL_INVALID_WORK_GROUP_SIZE; } return enqueueHandler( surfaces, false, &kernel, workDim, globalWorkOffset, region, localWkgSizeToPass, enqueuedLocalWorkSize, numEventsInWaitList, eventWaitList, event); } } // namespace NEO compute-runtime-22.14.22890/opencl/source/command_queue/enqueue_marker.h000066400000000000000000000020471422164147700261400ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/device/device.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/event/event.h" #include "opencl/source/memory_manager/mem_obj_surface.h" #include namespace NEO { template cl_int CommandQueueHw::enqueueMarkerWithWaitList( cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { NullSurface s; Surface *surfaces[] = {&s}; return enqueueHandler(surfaces, false, MultiDispatchInfo(), numEventsInWaitList, eventWaitList, event); } } // namespace NEO compute-runtime-22.14.22890/opencl/source/command_queue/enqueue_migrate_mem_objects.h000066400000000000000000000027051422164147700306570ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/device/device.h" #include "shared/source/memory_manager/surface.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/event/event.h" namespace NEO { template cl_int CommandQueueHw::enqueueMigrateMemObjects(cl_uint numMemObjects, const cl_mem *memObjects, cl_mem_migration_flags flags, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { NullSurface s; Surface *surfaces[] = {&s}; return enqueueHandler(surfaces, false, MultiDispatchInfo(), numEventsInWaitList, eventWaitList, event); } } // namespace NEO compute-runtime-22.14.22890/opencl/source/command_queue/enqueue_read_buffer.h000066400000000000000000000113561422164147700271260ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/cache_policy.h" #include "shared/source/helpers/engine_node_helper.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/source/os_interface/os_context.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/enqueue_common.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/memory_manager/mem_obj_surface.h" #include namespace NEO { template cl_int CommandQueueHw::enqueueReadBuffer( Buffer *buffer, cl_bool blockingRead, size_t offset, size_t size, void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { const cl_command_type cmdType = CL_COMMAND_READ_BUFFER; CsrSelectionArgs csrSelectionArgs{cmdType, buffer, {}, device->getRootDeviceIndex(), &size}; CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs); if (nullptr == mapAllocation) { notifyEnqueueReadBuffer(buffer, !!blockingRead, EngineHelpers::isBcs(csr.getOsContext().getEngineType())); } auto rootDeviceIndex = getDevice().getRootDeviceIndex(); bool isMemTransferNeeded = buffer->isMemObjZeroCopy() ? buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, cmdType) : true; bool isCpuCopyAllowed = bufferCpuCopyAllowed(buffer, cmdType, blockingRead, size, ptr, numEventsInWaitList, eventWaitList); InternalMemoryType memoryType = InternalMemoryType::NOT_SPECIFIED; if (!mapAllocation) { cl_int retVal = getContext().tryGetExistingHostPtrAllocation(ptr, size, rootDeviceIndex, mapAllocation, memoryType, isCpuCopyAllowed); if (retVal != CL_SUCCESS) { return retVal; } } if (isCpuCopyAllowed) { if (isMemTransferNeeded) { return enqueueReadWriteBufferOnCpuWithMemoryTransfer(cmdType, buffer, offset, size, ptr, numEventsInWaitList, eventWaitList, event); } else { return enqueueReadWriteBufferOnCpuWithoutMemoryTransfer(cmdType, buffer, offset, size, ptr, numEventsInWaitList, eventWaitList, event); } } else if (!isMemTransferNeeded) { return enqueueMarkerForReadWriteOperation(buffer, ptr, cmdType, blockingRead, numEventsInWaitList, eventWaitList, event); } auto eBuiltInOps = EBuiltInOps::CopyBufferToBuffer; if (forceStateless(buffer->getSize())) { eBuiltInOps = EBuiltInOps::CopyBufferToBufferStateless; } void *dstPtr = ptr; MemObjSurface bufferSurf(buffer); HostPtrSurface hostPtrSurf(dstPtr, size); GeneralSurface mapSurface; Surface *surfaces[] = {&bufferSurf, nullptr}; if (mapAllocation) { surfaces[1] = &mapSurface; mapSurface.setGraphicsAllocation(mapAllocation); dstPtr = convertAddressWithOffsetToGpuVa(dstPtr, memoryType, *mapAllocation); } else { surfaces[1] = &hostPtrSurf; if (size != 0) { bool status = csr.createAllocationForHostSurface(hostPtrSurf, true); if (!status) { return CL_OUT_OF_RESOURCES; } dstPtr = reinterpret_cast(hostPtrSurf.getAllocation()->getGpuAddress()); } } void *alignedDstPtr = alignDown(dstPtr, 4); size_t dstPtrOffset = ptrDiff(dstPtr, alignedDstPtr); BuiltinOpParams dc; dc.dstPtr = alignedDstPtr; dc.dstOffset = {dstPtrOffset, 0, 0}; dc.srcMemObj = buffer; dc.srcOffset = {offset, 0, 0}; dc.size = {size, 0, 0}; dc.transferAllocation = mapAllocation ? mapAllocation : hostPtrSurf.getAllocation(); MultiDispatchInfo dispatchInfo(dc); if (context->isProvidingPerformanceHints()) { context->providePerformanceHintForMemoryTransfer(CL_COMMAND_READ_BUFFER, true, static_cast(buffer), ptr); if (!isL3Capable(ptr, size)) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_READ_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS, ptr, size, MemoryConstants::pageSize, MemoryConstants::pageSize); } } return dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead, csr); } } // namespace NEO compute-runtime-22.14.22890/opencl/source/command_queue/enqueue_read_buffer_rect.h000066400000000000000000000110211422164147700301300ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/enqueue_common.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/memory_manager/mem_obj_surface.h" #include namespace NEO { template cl_int CommandQueueHw::enqueueReadBufferRect( Buffer *buffer, cl_bool blockingRead, const size_t *bufferOrigin, const size_t *hostOrigin, const size_t *region, size_t bufferRowPitch, size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch, void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { const cl_command_type cmdType = CL_COMMAND_READ_BUFFER_RECT; CsrSelectionArgs csrSelectionArgs{cmdType, buffer, {}, device->getRootDeviceIndex(), region}; CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs); auto isMemTransferNeeded = true; if (buffer->isMemObjZeroCopy()) { size_t bufferOffset; size_t hostOffset; computeOffsetsValueForRectCommands(&bufferOffset, &hostOffset, bufferOrigin, hostOrigin, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch); isMemTransferNeeded = buffer->checkIfMemoryTransferIsRequired(bufferOffset, hostOffset, ptr, cmdType); } if (!isMemTransferNeeded) { return enqueueMarkerForReadWriteOperation(buffer, ptr, cmdType, blockingRead, numEventsInWaitList, eventWaitList, event); } const size_t hostPtrSize = Buffer::calculateHostPtrSize(hostOrigin, region, hostRowPitch, hostSlicePitch); const uint32_t rootDeviceIndex = getDevice().getRootDeviceIndex(); InternalMemoryType memoryType = InternalMemoryType::NOT_SPECIFIED; GraphicsAllocation *mapAllocation = nullptr; bool isCpuCopyAllowed = false; getContext().tryGetExistingHostPtrAllocation(ptr, hostPtrSize, rootDeviceIndex, mapAllocation, memoryType, isCpuCopyAllowed); auto eBuiltInOps = EBuiltInOps::CopyBufferRect; if (forceStateless(buffer->getSize())) { eBuiltInOps = EBuiltInOps::CopyBufferRectStateless; } void *dstPtr = ptr; MemObjSurface srcBufferSurf(buffer); HostPtrSurface hostPtrSurf(dstPtr, hostPtrSize); GeneralSurface mapSurface; Surface *surfaces[] = {&srcBufferSurf, nullptr}; if (region[0] != 0 && region[1] != 0 && region[2] != 0) { if (mapAllocation) { surfaces[1] = &mapSurface; mapSurface.setGraphicsAllocation(mapAllocation); dstPtr = convertAddressWithOffsetToGpuVa(dstPtr, memoryType, *mapAllocation); } else { surfaces[1] = &hostPtrSurf; bool status = csr.createAllocationForHostSurface(hostPtrSurf, true); if (!status) { return CL_OUT_OF_RESOURCES; } dstPtr = reinterpret_cast(hostPtrSurf.getAllocation()->getGpuAddress()); } } void *alignedDstPtr = alignDown(dstPtr, 4); size_t dstPtrOffset = ptrDiff(dstPtr, alignedDstPtr); BuiltinOpParams dc; dc.srcMemObj = buffer; dc.dstPtr = alignedDstPtr; dc.srcOffset = bufferOrigin; dc.dstOffset = hostOrigin; dc.transferAllocation = hostPtrSurf.getAllocation(); dc.dstOffset.x += dstPtrOffset; dc.size = region; dc.srcRowPitch = bufferRowPitch; dc.srcSlicePitch = bufferSlicePitch; dc.dstRowPitch = hostRowPitch; dc.dstSlicePitch = hostSlicePitch; MultiDispatchInfo dispatchInfo(dc); const auto dispatchResult = dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead, csr); if (dispatchResult != CL_SUCCESS) { return dispatchResult; } if (context->isProvidingPerformanceHints()) { context->providePerformanceHintForMemoryTransfer(CL_COMMAND_READ_BUFFER_RECT, true, static_cast(buffer), ptr); if (!isL3Capable(ptr, hostPtrSize)) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_READ_BUFFER_RECT_DOESNT_MEET_ALIGNMENT_RESTRICTIONS, ptr, hostPtrSize, MemoryConstants::pageSize, MemoryConstants::pageSize); } } return CL_SUCCESS; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/command_queue/enqueue_read_image.h000066400000000000000000000123271422164147700267360ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/cache_policy.h" #include "shared/source/helpers/engine_node_helper.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/os_interface/os_context.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/context/context.h" #include "opencl/source/event/event.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/helpers/mipmap.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/memory_manager/mem_obj_surface.h" #include #include namespace NEO { template cl_int CommandQueueHw::enqueueReadImage( Image *srcImage, cl_bool blockingRead, const size_t *origin, const size_t *region, size_t inputRowPitch, size_t inputSlicePitch, void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { constexpr cl_command_type cmdType = CL_COMMAND_READ_IMAGE; CsrSelectionArgs csrSelectionArgs{cmdType, srcImage, {}, device->getRootDeviceIndex(), region, origin, nullptr}; CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs); if (nullptr == mapAllocation) { notifyEnqueueReadImage(srcImage, static_cast(blockingRead), EngineHelpers::isBcs(csr.getOsContext().getEngineType())); } auto isMemTransferNeeded = true; if (srcImage->isMemObjZeroCopy()) { size_t hostOffset; Image::calculateHostPtrOffset(&hostOffset, origin, region, inputRowPitch, inputSlicePitch, srcImage->getImageDesc().image_type, srcImage->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes); isMemTransferNeeded = srcImage->checkIfMemoryTransferIsRequired(hostOffset, 0, ptr, cmdType); } if (!isMemTransferNeeded) { return enqueueMarkerForReadWriteOperation(srcImage, ptr, cmdType, blockingRead, numEventsInWaitList, eventWaitList, event); } size_t hostPtrSize = calculateHostPtrSizeForImage(region, inputRowPitch, inputSlicePitch, srcImage); void *dstPtr = ptr; MemObjSurface srcImgSurf(srcImage); HostPtrSurface hostPtrSurf(dstPtr, hostPtrSize); GeneralSurface mapSurface; Surface *surfaces[] = {&srcImgSurf, nullptr}; bool tempAllocFallback = false; if (mapAllocation) { surfaces[1] = &mapSurface; mapSurface.setGraphicsAllocation(mapAllocation); //get offset between base cpu ptr of map allocation and dst ptr size_t dstOffset = ptrDiff(dstPtr, mapAllocation->getUnderlyingBuffer()); dstPtr = reinterpret_cast(mapAllocation->getGpuAddress() + dstOffset); } else { surfaces[1] = &hostPtrSurf; if (region[0] != 0 && region[1] != 0 && region[2] != 0) { bool status = csr.createAllocationForHostSurface(hostPtrSurf, true); if (!status) { if (CL_TRUE == blockingRead) { hostPtrSurf.setIsPtrCopyAllowed(true); status = csr.createAllocationForHostSurface(hostPtrSurf, true); if (!status) { return CL_OUT_OF_RESOURCES; } tempAllocFallback = true; } else { return CL_OUT_OF_RESOURCES; } } dstPtr = reinterpret_cast(hostPtrSurf.getAllocation()->getGpuAddress()); } } void *alignedDstPtr = alignDown(dstPtr, 4); size_t dstPtrOffset = ptrDiff(dstPtr, alignedDstPtr); BuiltinOpParams dc; dc.srcMemObj = srcImage; dc.dstPtr = alignedDstPtr; dc.dstOffset.x = dstPtrOffset; dc.srcOffset = origin; dc.size = region; dc.dstRowPitch = (srcImage->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) ? inputSlicePitch : inputRowPitch; dc.dstSlicePitch = inputSlicePitch; if (isMipMapped(srcImage->getImageDesc())) { dc.srcMipLevel = findMipLevel(srcImage->getImageDesc().image_type, origin); } dc.transferAllocation = mapAllocation ? mapAllocation : hostPtrSurf.getAllocation(); if (tempAllocFallback) { dc.userPtrForPostOperationCpuCopy = ptr; } auto eBuiltInOps = EBuiltInOps::CopyImage3dToBuffer; MultiDispatchInfo dispatchInfo(dc); const auto dispatchResult = dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead == CL_TRUE, csr); if (dispatchResult != CL_SUCCESS) { return dispatchResult; } if (context->isProvidingPerformanceHints()) { if (!isL3Capable(ptr, hostPtrSize)) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_READ_IMAGE_DOESNT_MEET_ALIGNMENT_RESTRICTIONS, ptr, hostPtrSize, MemoryConstants::pageSize, MemoryConstants::pageSize); } } return CL_SUCCESS; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/command_queue/enqueue_resource_barrier.h000066400000000000000000000025451422164147700302170ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/device/device.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/event/event.h" #include "opencl/source/memory_manager/resource_surface.h" #include "resource_barrier.h" #include namespace NEO { template cl_int CommandQueueHw::enqueueResourceBarrier(BarrierCommand *resourceBarrier, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { MultiDispatchInfo multiDispatch; return enqueueHandler(resourceBarrier->surfacePtrs.begin(), resourceBarrier->numSurfaces, false, multiDispatch, numEventsInWaitList, eventWaitList, event); } } // namespace NEO compute-runtime-22.14.22890/opencl/source/command_queue/enqueue_svm.h000066400000000000000000000626031422164147700254700ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "shared/source/memory_manager/surface.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/enqueue_common.h" #include "opencl/source/event/event.h" #include namespace NEO { using SvmFreeClbT = void(CL_CALLBACK *)(cl_command_queue queue, cl_uint numSvmPointers, void *svmPointers[], void *userData); struct SvmFreeUserData { cl_uint numSvmPointers; void **svmPointers; SvmFreeClbT clb; void *userData; bool ownsEventDeletion; SvmFreeUserData(cl_uint numSvmPointers, void **svmPointers, SvmFreeClbT clb, void *userData, bool ownsEventDeletion) : numSvmPointers(numSvmPointers), svmPointers(svmPointers), clb(clb), userData(userData), ownsEventDeletion(ownsEventDeletion){}; }; inline void CL_CALLBACK freeSvmEventClb(cl_event event, cl_int commandExecCallbackType, void *usrData) { auto freeDt = reinterpret_cast(usrData); auto eventObject = castToObjectOrAbort(event); if (freeDt->clb == nullptr) { auto ctx = eventObject->getContext(); for (cl_uint i = 0; i < freeDt->numSvmPointers; i++) { castToObjectOrAbort(ctx)->getSVMAllocsManager()->freeSVMAlloc(freeDt->svmPointers[i]); } } else { freeDt->clb(eventObject->getCommandQueue(), freeDt->numSvmPointers, freeDt->svmPointers, freeDt->userData); } if (freeDt->ownsEventDeletion) { castToObjectOrAbort(event)->release(); } delete freeDt; } template cl_int CommandQueueHw::enqueueSVMMap(cl_bool blockingMap, cl_map_flags mapFlags, void *svmPtr, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool externalAppCall) { auto svmData = context->getSVMAllocsManager()->getSVMAlloc(svmPtr); if (svmData == nullptr) { return CL_INVALID_VALUE; } bool blocking = blockingMap == CL_TRUE; if (svmData->gpuAllocations.getAllocationType() == AllocationType::SVM_ZERO_COPY) { NullSurface s; Surface *surfaces[] = {&s}; if (context->isProvidingPerformanceHints()) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_SVM_MAP_DOESNT_REQUIRE_COPY_DATA, svmPtr); } return enqueueHandler(surfaces, blocking, MultiDispatchInfo(), numEventsInWaitList, eventWaitList, event); } else { auto svmOperation = context->getSVMAllocsManager()->getSvmMapOperation(svmPtr); if (svmOperation) { NullSurface s; Surface *surfaces[] = {&s}; return enqueueHandler(surfaces, blocking, MultiDispatchInfo(), numEventsInWaitList, eventWaitList, event); } CsrSelectionArgs csrSelectionArgs{CL_COMMAND_READ_BUFFER, &svmData->gpuAllocations, {}, device->getRootDeviceIndex(), &size}; CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs); auto gpuAllocation = svmData->gpuAllocations.getGraphicsAllocation(getDevice().getRootDeviceIndex()); GeneralSurface dstSurface(svmData->cpuAllocation); GeneralSurface srcSurface(gpuAllocation); Surface *surfaces[] = {&dstSurface, &srcSurface}; void *svmBasePtr = svmData->cpuAllocation->getUnderlyingBuffer(); size_t svmOffset = ptrDiff(svmPtr, svmBasePtr); BuiltinOpParams dc; dc.dstPtr = reinterpret_cast(svmData->cpuAllocation->getGpuAddressToPatch()); dc.dstSvmAlloc = svmData->cpuAllocation; dc.dstOffset = {svmOffset, 0, 0}; dc.srcPtr = reinterpret_cast(gpuAllocation->getGpuAddressToPatch()); dc.srcSvmAlloc = gpuAllocation; dc.srcOffset = {svmOffset, 0, 0}; dc.size = {size, 0, 0}; dc.unifiedMemoryArgsRequireMemSync = externalAppCall; MultiDispatchInfo dispatchInfo(dc); const auto dispatchResult = dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, EBuiltInOps::CopyBufferToBuffer, numEventsInWaitList, eventWaitList, event, blocking, csr); if (dispatchResult != CL_SUCCESS) { return dispatchResult; } if (event) { castToObjectOrAbort(*event)->setCmdType(CL_COMMAND_SVM_MAP); } bool readOnlyMap = (mapFlags == CL_MAP_READ); context->getSVMAllocsManager()->insertSvmMapOperation(svmPtr, size, svmBasePtr, svmOffset, readOnlyMap); dispatchInfo.backupUnifiedMemorySyncRequirement(); return CL_SUCCESS; } } template cl_int CommandQueueHw::enqueueSVMUnmap(void *svmPtr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool externalAppCall) { auto svmData = context->getSVMAllocsManager()->getSVMAlloc(svmPtr); if (svmData == nullptr) { return CL_INVALID_VALUE; } if (svmData->gpuAllocations.getAllocationType() == AllocationType::SVM_ZERO_COPY) { NullSurface s; Surface *surfaces[] = {&s}; return enqueueHandler(surfaces, false, MultiDispatchInfo(), numEventsInWaitList, eventWaitList, event); } else { auto svmOperation = context->getSVMAllocsManager()->getSvmMapOperation(svmPtr); if (!svmOperation) { NullSurface s; Surface *surfaces[] = {&s}; return enqueueHandler(surfaces, false, MultiDispatchInfo(), numEventsInWaitList, eventWaitList, event); } if (svmOperation->readOnlyMap) { NullSurface s; Surface *surfaces[] = {&s}; const auto enqueueResult = enqueueHandler(surfaces, false, MultiDispatchInfo(), numEventsInWaitList, eventWaitList, event); context->getSVMAllocsManager()->removeSvmMapOperation(svmPtr); return enqueueResult; } CsrSelectionArgs csrSelectionArgs{CL_COMMAND_READ_BUFFER, {}, &svmData->gpuAllocations, device->getRootDeviceIndex(), &svmOperation->regionSize}; CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs); auto gpuAllocation = svmData->gpuAllocations.getGraphicsAllocation(getDevice().getRootDeviceIndex()); gpuAllocation->setAubWritable(true, GraphicsAllocation::defaultBank); gpuAllocation->setTbxWritable(true, GraphicsAllocation::defaultBank); GeneralSurface dstSurface(gpuAllocation); GeneralSurface srcSurface(svmData->cpuAllocation); Surface *surfaces[] = {&dstSurface, &srcSurface}; BuiltinOpParams dc; dc.dstPtr = reinterpret_cast(gpuAllocation->getGpuAddressToPatch()); dc.dstSvmAlloc = gpuAllocation; dc.dstOffset = {svmOperation->offset, 0, 0}; dc.srcPtr = reinterpret_cast(svmData->cpuAllocation->getGpuAddressToPatch()); dc.srcSvmAlloc = svmData->cpuAllocation; dc.srcOffset = {svmOperation->offset, 0, 0}; dc.size = {svmOperation->regionSize, 0, 0}; dc.unifiedMemoryArgsRequireMemSync = externalAppCall; MultiDispatchInfo dispatchInfo(dc); const auto dispatchResult = dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, EBuiltInOps::CopyBufferToBuffer, numEventsInWaitList, eventWaitList, event, false, csr); if (dispatchResult != CL_SUCCESS) { return dispatchResult; } if (event) { castToObjectOrAbort(*event)->setCmdType(CL_COMMAND_SVM_UNMAP); } context->getSVMAllocsManager()->removeSvmMapOperation(svmPtr); dispatchInfo.backupUnifiedMemorySyncRequirement(); return CL_SUCCESS; } } template cl_int CommandQueueHw::enqueueSVMFree(cl_uint numSvmPointers, void *svmPointers[], SvmFreeClbT clb, void *userData, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *retEvent) { cl_event event = nullptr; bool ownsEventDeletion = false; if (retEvent == nullptr) { ownsEventDeletion = true; retEvent = &event; } SvmFreeUserData *pFreeData = new SvmFreeUserData(numSvmPointers, svmPointers, clb, userData, ownsEventDeletion); NullSurface s; Surface *surfaces[] = {&s}; const auto enqueueResult = enqueueHandler(surfaces, false, MultiDispatchInfo(), numEventsInWaitList, eventWaitList, retEvent); if (enqueueResult != CL_SUCCESS) { delete pFreeData; if (ownsEventDeletion) { castToObjectOrAbort(*retEvent)->release(); retEvent = nullptr; } return enqueueResult; } auto eventObject = castToObjectOrAbort(*retEvent); eventObject->addCallback(freeSvmEventClb, CL_COMPLETE, pFreeData); return CL_SUCCESS; } inline void setOperationParams(BuiltinOpParams &operationParams, size_t size, const void *srcPtr, GraphicsAllocation *srcSvmAlloc, void *dstPtr, GraphicsAllocation *dstSvmAlloc) { operationParams.size = {size, 0, 0}; operationParams.srcPtr = const_cast(alignDown(srcPtr, 4)); operationParams.srcSvmAlloc = srcSvmAlloc; operationParams.srcOffset = {ptrDiff(srcPtr, operationParams.srcPtr), 0, 0}; operationParams.dstPtr = alignDown(dstPtr, 4); operationParams.dstSvmAlloc = dstSvmAlloc; operationParams.dstOffset = {ptrDiff(dstPtr, operationParams.dstPtr), 0, 0}; } template inline std::tuple getExistingAlloc(Context *context, PtrType ptr, size_t size, uint32_t rootDeviceIndex) { SvmAllocationData *svmData = context->getSVMAllocsManager()->getSVMAlloc(ptr); GraphicsAllocation *allocation = nullptr; if (svmData) { allocation = svmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex); } else { context->tryGetExistingMapAllocation(ptr, size, allocation); if (allocation) { ptr = CommandQueue::convertAddressWithOffsetToGpuVa(ptr, InternalMemoryType::NOT_SPECIFIED, *allocation); } } return std::make_tuple(svmData, allocation, ptr); } template cl_int CommandQueueHw::enqueueSVMMemcpy(cl_bool blockingCopy, void *dstPtr, const void *srcPtr, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { if ((dstPtr == nullptr) || (srcPtr == nullptr)) { return CL_INVALID_VALUE; } auto rootDeviceIndex = getDevice().getRootDeviceIndex(); auto [dstSvmData, dstAllocation, dstGpuPtr] = getExistingAlloc(context, dstPtr, size, rootDeviceIndex); auto [srcSvmData, srcAllocation, srcGpuPtr] = getExistingAlloc(context, srcPtr, size, rootDeviceIndex); enum CopyType { HostToHost, SvmToHost, HostToSvm, SvmToSvm }; CopyType copyType = HostToHost; if ((srcAllocation != nullptr) && (dstAllocation != nullptr)) { copyType = SvmToSvm; } else if ((srcAllocation == nullptr) && (dstAllocation != nullptr)) { copyType = HostToSvm; } else if (srcAllocation != nullptr) { copyType = SvmToHost; } auto pageFaultManager = context->getMemoryManager()->getPageFaultManager(); if (dstSvmData && pageFaultManager) { pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast(dstAllocation->getGpuAddress())); } if (srcSvmData && pageFaultManager) { pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast(srcAllocation->getGpuAddress())); } auto isStatelessRequired = false; if (srcSvmData != nullptr) { isStatelessRequired = forceStateless(srcSvmData->size); } if (dstSvmData != nullptr) { isStatelessRequired |= forceStateless(dstSvmData->size); } auto builtInType = EBuiltInOps::CopyBufferToBuffer; if (isStatelessRequired) { builtInType = EBuiltInOps::CopyBufferToBufferStateless; } MultiDispatchInfo dispatchInfo; BuiltinOpParams operationParams; Surface *surfaces[2]; cl_command_type cmdType; cl_int dispatchResult = CL_SUCCESS; if (copyType == SvmToHost) { CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, srcAllocation, {}, device->getRootDeviceIndex(), &size}; CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs); GeneralSurface srcSvmSurf(srcAllocation); HostPtrSurface dstHostPtrSurf(dstGpuPtr, size); if (size != 0) { bool status = csr.createAllocationForHostSurface(dstHostPtrSurf, true); if (!status) { return CL_OUT_OF_RESOURCES; } dstGpuPtr = reinterpret_cast(dstHostPtrSurf.getAllocation()->getGpuAddress()); notifyEnqueueSVMMemcpy(srcAllocation, !!blockingCopy, EngineHelpers::isBcs(csr.getOsContext().getEngineType())); } setOperationParams(operationParams, size, srcGpuPtr, srcAllocation, dstGpuPtr, dstHostPtrSurf.getAllocation()); surfaces[0] = &srcSvmSurf; surfaces[1] = &dstHostPtrSurf; dispatchInfo.setBuiltinOpParams(operationParams); dispatchResult = dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr); } else if (copyType == HostToSvm) { CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, {}, dstAllocation, device->getRootDeviceIndex(), &size}; CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs); HostPtrSurface srcHostPtrSurf(const_cast(srcGpuPtr), size, true); GeneralSurface dstSvmSurf(dstAllocation); cmdType = CL_COMMAND_WRITE_BUFFER; if (size != 0) { bool status = csr.createAllocationForHostSurface(srcHostPtrSurf, false); if (!status) { return CL_OUT_OF_RESOURCES; } srcGpuPtr = reinterpret_cast(srcHostPtrSurf.getAllocation()->getGpuAddress()); } setOperationParams(operationParams, size, srcGpuPtr, srcHostPtrSurf.getAllocation(), dstGpuPtr, dstAllocation); surfaces[0] = &dstSvmSurf; surfaces[1] = &srcHostPtrSurf; dispatchInfo.setBuiltinOpParams(operationParams); dispatchResult = dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr); } else if (copyType == SvmToSvm) { CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, srcAllocation, dstAllocation, device->getRootDeviceIndex(), &size}; CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs); GeneralSurface srcSvmSurf(srcAllocation); GeneralSurface dstSvmSurf(dstAllocation); setOperationParams(operationParams, size, srcGpuPtr, srcAllocation, dstGpuPtr, dstAllocation); surfaces[0] = &srcSvmSurf; surfaces[1] = &dstSvmSurf; dispatchInfo.setBuiltinOpParams(operationParams); dispatchResult = dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr); } else { CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, &size}; CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs); HostPtrSurface srcHostPtrSurf(const_cast(srcGpuPtr), size); HostPtrSurface dstHostPtrSurf(dstGpuPtr, size); cmdType = CL_COMMAND_WRITE_BUFFER; if (size != 0) { bool status = csr.createAllocationForHostSurface(srcHostPtrSurf, false); status &= csr.createAllocationForHostSurface(dstHostPtrSurf, true); if (!status) { return CL_OUT_OF_RESOURCES; } srcGpuPtr = reinterpret_cast(srcHostPtrSurf.getAllocation()->getGpuAddress()); dstGpuPtr = reinterpret_cast(dstHostPtrSurf.getAllocation()->getGpuAddress()); } setOperationParams(operationParams, size, srcGpuPtr, srcHostPtrSurf.getAllocation(), dstGpuPtr, dstHostPtrSurf.getAllocation()); surfaces[0] = &srcHostPtrSurf; surfaces[1] = &dstHostPtrSurf; dispatchInfo.setBuiltinOpParams(operationParams); dispatchResult = dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr); } if (event) { auto pEvent = castToObjectOrAbort(*event); pEvent->setCmdType(CL_COMMAND_SVM_MEMCPY); } return dispatchResult; } template cl_int CommandQueueHw::enqueueSVMMemFill(void *svmPtr, const void *pattern, size_t patternSize, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { auto svmData = context->getSVMAllocsManager()->getSVMAlloc(svmPtr); if (svmData == nullptr) { return CL_INVALID_VALUE; } auto gpuAllocation = svmData->gpuAllocations.getGraphicsAllocation(getDevice().getRootDeviceIndex()); auto memoryManager = context->getMemoryManager(); DEBUG_BREAK_IF(nullptr == memoryManager); auto pageFaultManager = memoryManager->getPageFaultManager(); if (pageFaultManager) { pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast(gpuAllocation->getGpuAddress())); } auto commandStreamReceieverOwnership = getGpgpuCommandStreamReceiver().obtainUniqueOwnership(); auto storageWithAllocations = getGpgpuCommandStreamReceiver().getInternalAllocationStorage(); auto allocationType = AllocationType::FILL_PATTERN; auto patternAllocation = storageWithAllocations->obtainReusableAllocation(patternSize, allocationType).release(); commandStreamReceieverOwnership.unlock(); if (!patternAllocation) { patternAllocation = memoryManager->allocateGraphicsMemoryWithProperties({getDevice().getRootDeviceIndex(), patternSize, allocationType, getDevice().getDeviceBitfield()}); } if (patternSize == 1) { int patternInt = (uint32_t)((*(uint8_t *)pattern << 24) | (*(uint8_t *)pattern << 16) | (*(uint8_t *)pattern << 8) | *(uint8_t *)pattern); memcpy_s(patternAllocation->getUnderlyingBuffer(), sizeof(int), &patternInt, sizeof(int)); } else if (patternSize == 2) { int patternInt = (uint32_t)((*(uint16_t *)pattern << 16) | *(uint16_t *)pattern); memcpy_s(patternAllocation->getUnderlyingBuffer(), sizeof(int), &patternInt, sizeof(int)); } else { memcpy_s(patternAllocation->getUnderlyingBuffer(), patternSize, pattern, patternSize); } auto builtInType = EBuiltInOps::FillBuffer; if (forceStateless(svmData->size)) { builtInType = EBuiltInOps::FillBufferStateless; } auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInType, this->getClDevice()); BuiltInOwnershipWrapper builtInLock(builder, this->context); BuiltinOpParams operationParams; auto multiGraphicsAllocation = MultiGraphicsAllocation(getDevice().getRootDeviceIndex()); multiGraphicsAllocation.addAllocation(patternAllocation); MemObj patternMemObj(this->context, 0, {}, 0, 0, alignUp(patternSize, 4), patternAllocation->getUnderlyingBuffer(), patternAllocation->getUnderlyingBuffer(), std::move(multiGraphicsAllocation), false, false, true); void *alignedDstPtr = alignDown(svmPtr, 4); size_t dstPtrOffset = ptrDiff(svmPtr, alignedDstPtr); operationParams.srcMemObj = &patternMemObj; operationParams.dstPtr = alignedDstPtr; operationParams.dstSvmAlloc = gpuAllocation; operationParams.dstOffset = {dstPtrOffset, 0, 0}; operationParams.size = {size, 0, 0}; MultiDispatchInfo dispatchInfo(operationParams); builder.buildDispatchInfos(dispatchInfo); GeneralSurface s1(gpuAllocation); GeneralSurface s2(patternAllocation); Surface *surfaces[] = {&s1, &s2}; const auto enqueueResult = enqueueHandler( surfaces, false, dispatchInfo, numEventsInWaitList, eventWaitList, event); storageWithAllocations->storeAllocationWithTaskCount(std::unique_ptr(patternAllocation), REUSABLE_ALLOCATION, taskCount); return enqueueResult; } template cl_int CommandQueueHw::enqueueSVMMigrateMem(cl_uint numSvmPointers, const void **svmPointers, const size_t *sizes, const cl_mem_migration_flags flags, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { NullSurface s; Surface *surfaces[] = {&s}; return enqueueHandler(surfaces, false, MultiDispatchInfo(), numEventsInWaitList, eventWaitList, event); } } // namespace NEO compute-runtime-22.14.22890/opencl/source/command_queue/enqueue_write_buffer.h000066400000000000000000000106721422164147700273450ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/string.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/memory_manager/mem_obj_surface.h" #include namespace NEO { template cl_int CommandQueueHw::enqueueWriteBuffer( Buffer *buffer, cl_bool blockingWrite, size_t offset, size_t size, const void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { const cl_command_type cmdType = CL_COMMAND_WRITE_BUFFER; CsrSelectionArgs csrSelectionArgs{cmdType, {}, buffer, device->getRootDeviceIndex(), &size}; CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs); auto rootDeviceIndex = getDevice().getRootDeviceIndex(); auto isMemTransferNeeded = buffer->isMemObjZeroCopy() ? buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, cmdType) : true; bool isCpuCopyAllowed = bufferCpuCopyAllowed(buffer, cmdType, blockingWrite, size, const_cast(ptr), numEventsInWaitList, eventWaitList); InternalMemoryType memoryType = InternalMemoryType::NOT_SPECIFIED; if (!mapAllocation) { cl_int retVal = getContext().tryGetExistingHostPtrAllocation(ptr, size, rootDeviceIndex, mapAllocation, memoryType, isCpuCopyAllowed); if (retVal != CL_SUCCESS) { return retVal; } } if (isCpuCopyAllowed) { if (isMemTransferNeeded) { return enqueueReadWriteBufferOnCpuWithMemoryTransfer(cmdType, buffer, offset, size, const_cast(ptr), numEventsInWaitList, eventWaitList, event); } else { return enqueueReadWriteBufferOnCpuWithoutMemoryTransfer(cmdType, buffer, offset, size, const_cast(ptr), numEventsInWaitList, eventWaitList, event); } } else if (!isMemTransferNeeded) { return enqueueMarkerForReadWriteOperation(buffer, const_cast(ptr), cmdType, blockingWrite, numEventsInWaitList, eventWaitList, event); } auto eBuiltInOps = EBuiltInOps::CopyBufferToBuffer; if (forceStateless(buffer->getSize())) { eBuiltInOps = EBuiltInOps::CopyBufferToBufferStateless; } void *srcPtr = const_cast(ptr); HostPtrSurface hostPtrSurf(srcPtr, size, true); MemObjSurface bufferSurf(buffer); GeneralSurface mapSurface; Surface *surfaces[] = {&bufferSurf, nullptr}; if (mapAllocation) { surfaces[1] = &mapSurface; mapSurface.setGraphicsAllocation(mapAllocation); srcPtr = convertAddressWithOffsetToGpuVa(srcPtr, memoryType, *mapAllocation); } else { surfaces[1] = &hostPtrSurf; if (size != 0) { bool status = csr.createAllocationForHostSurface(hostPtrSurf, false); if (!status) { return CL_OUT_OF_RESOURCES; } srcPtr = reinterpret_cast(hostPtrSurf.getAllocation()->getGpuAddress()); } } void *alignedSrcPtr = alignDown(srcPtr, 4); size_t srcPtrOffset = ptrDiff(srcPtr, alignedSrcPtr); BuiltinOpParams dc; dc.srcPtr = alignedSrcPtr; dc.srcOffset = {srcPtrOffset, 0, 0}; dc.dstMemObj = buffer; dc.dstOffset = {offset, 0, 0}; dc.size = {size, 0, 0}; dc.transferAllocation = mapAllocation ? mapAllocation : hostPtrSurf.getAllocation(); MultiDispatchInfo dispatchInfo(dc); const auto dispatchResult = dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite, csr); if (dispatchResult != CL_SUCCESS) { return dispatchResult; } if (context->isProvidingPerformanceHints()) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA, static_cast(buffer)); } return CL_SUCCESS; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/command_queue/enqueue_write_buffer_rect.h000066400000000000000000000104441422164147700303570ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/memory_manager/mem_obj_surface.h" #include namespace NEO { template cl_int CommandQueueHw::enqueueWriteBufferRect( Buffer *buffer, cl_bool blockingWrite, const size_t *bufferOrigin, const size_t *hostOrigin, const size_t *region, size_t bufferRowPitch, size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch, const void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { const cl_command_type cmdType = CL_COMMAND_WRITE_BUFFER_RECT; CsrSelectionArgs csrSelectionArgs{cmdType, buffer, {}, device->getRootDeviceIndex(), region}; CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs); auto isMemTransferNeeded = true; if (buffer->isMemObjZeroCopy()) { size_t bufferOffset; size_t hostOffset; computeOffsetsValueForRectCommands(&bufferOffset, &hostOffset, bufferOrigin, hostOrigin, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch); isMemTransferNeeded = buffer->checkIfMemoryTransferIsRequired(bufferOffset, hostOffset, ptr, cmdType); } if (!isMemTransferNeeded) { return enqueueMarkerForReadWriteOperation(buffer, const_cast(ptr), cmdType, blockingWrite, numEventsInWaitList, eventWaitList, event); } const size_t hostPtrSize = Buffer::calculateHostPtrSize(hostOrigin, region, hostRowPitch, hostSlicePitch); const uint32_t rootDeviceIndex = getDevice().getRootDeviceIndex(); InternalMemoryType memoryType = InternalMemoryType::NOT_SPECIFIED; GraphicsAllocation *mapAllocation = nullptr; bool isCpuCopyAllowed = false; getContext().tryGetExistingHostPtrAllocation(ptr, hostPtrSize, rootDeviceIndex, mapAllocation, memoryType, isCpuCopyAllowed); auto eBuiltInOps = EBuiltInOps::CopyBufferRect; if (forceStateless(buffer->getSize())) { eBuiltInOps = EBuiltInOps::CopyBufferRectStateless; } void *srcPtr = const_cast(ptr); MemObjSurface dstBufferSurf(buffer); HostPtrSurface hostPtrSurf(srcPtr, hostPtrSize, true); GeneralSurface mapSurface; Surface *surfaces[] = {&dstBufferSurf, nullptr}; if (region[0] != 0 && region[1] != 0 && region[2] != 0) { if (mapAllocation) { surfaces[1] = &mapSurface; mapSurface.setGraphicsAllocation(mapAllocation); srcPtr = convertAddressWithOffsetToGpuVa(srcPtr, memoryType, *mapAllocation); } else { surfaces[1] = &hostPtrSurf; bool status = csr.createAllocationForHostSurface(hostPtrSurf, false); if (!status) { return CL_OUT_OF_RESOURCES; } srcPtr = reinterpret_cast(hostPtrSurf.getAllocation()->getGpuAddress()); } } void *alignedSrcPtr = alignDown(srcPtr, 4); size_t srcPtrOffset = ptrDiff(srcPtr, alignedSrcPtr); BuiltinOpParams dc; dc.srcPtr = alignedSrcPtr; dc.dstMemObj = buffer; dc.srcOffset = hostOrigin; dc.srcOffset.x += srcPtrOffset; dc.dstOffset = bufferOrigin; dc.transferAllocation = hostPtrSurf.getAllocation(); dc.size = region; dc.srcRowPitch = hostRowPitch; dc.srcSlicePitch = hostSlicePitch; dc.dstRowPitch = bufferRowPitch; dc.dstSlicePitch = bufferSlicePitch; MultiDispatchInfo dispatchInfo(dc); const auto dispatchResult = dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite, csr); if (dispatchResult != CL_SUCCESS) { return dispatchResult; } if (context->isProvidingPerformanceHints()) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, CL_ENQUEUE_WRITE_BUFFER_RECT_REQUIRES_COPY_DATA, static_cast(buffer)); } return CL_SUCCESS; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/command_queue/enqueue_write_image.h000066400000000000000000000103711422164147700271520ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/helpers/mipmap.h" #include "opencl/source/mem_obj/image.h" #include #include namespace NEO { template cl_int CommandQueueHw::enqueueWriteImage( Image *dstImage, cl_bool blockingWrite, const size_t *origin, const size_t *region, size_t inputRowPitch, size_t inputSlicePitch, const void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { constexpr cl_command_type cmdType = CL_COMMAND_WRITE_IMAGE; CsrSelectionArgs csrSelectionArgs{cmdType, nullptr, dstImage, device->getRootDeviceIndex(), region, nullptr, origin}; CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs); auto isMemTransferNeeded = true; if (dstImage->isMemObjZeroCopy()) { size_t hostOffset; Image::calculateHostPtrOffset(&hostOffset, origin, region, inputRowPitch, inputSlicePitch, dstImage->getImageDesc().image_type, dstImage->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes); isMemTransferNeeded = dstImage->checkIfMemoryTransferIsRequired(hostOffset, 0, ptr, cmdType); } if (!isMemTransferNeeded) { return enqueueMarkerForReadWriteOperation(dstImage, const_cast(ptr), cmdType, blockingWrite, numEventsInWaitList, eventWaitList, event); } size_t hostPtrSize = calculateHostPtrSizeForImage(region, inputRowPitch, inputSlicePitch, dstImage); void *srcPtr = const_cast(ptr); MemObjSurface dstImgSurf(dstImage); HostPtrSurface hostPtrSurf(srcPtr, hostPtrSize, true); GeneralSurface mapSurface; Surface *surfaces[] = {&dstImgSurf, nullptr}; if (mapAllocation) { surfaces[1] = &mapSurface; mapSurface.setGraphicsAllocation(mapAllocation); //get offset between base cpu ptr of map allocation and dst ptr size_t srcOffset = ptrDiff(srcPtr, mapAllocation->getUnderlyingBuffer()); srcPtr = reinterpret_cast(mapAllocation->getGpuAddress() + srcOffset); } else { surfaces[1] = &hostPtrSurf; if (region[0] != 0 && region[1] != 0 && region[2] != 0) { bool status = csr.createAllocationForHostSurface(hostPtrSurf, false); if (!status) { return CL_OUT_OF_RESOURCES; } srcPtr = reinterpret_cast(hostPtrSurf.getAllocation()->getGpuAddress()); } } void *alignedSrcPtr = alignDown(srcPtr, 4); size_t srcPtrOffset = ptrDiff(srcPtr, alignedSrcPtr); BuiltinOpParams dc; dc.srcPtr = alignedSrcPtr; dc.srcOffset.x = srcPtrOffset; dc.dstMemObj = dstImage; dc.dstOffset = origin; dc.size = region; dc.srcRowPitch = ((dstImage->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) && (inputSlicePitch > inputRowPitch)) ? inputSlicePitch : inputRowPitch; dc.srcSlicePitch = inputSlicePitch; if (isMipMapped(dstImage->getImageDesc())) { dc.dstMipLevel = findMipLevel(dstImage->getImageDesc().image_type, origin); } dc.transferAllocation = mapAllocation ? mapAllocation : hostPtrSurf.getAllocation(); auto eBuiltInOps = EBuiltInOps::CopyBufferToImage3d; MultiDispatchInfo dispatchInfo(dc); const auto dispatchResult = dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite == CL_TRUE, csr); if (dispatchResult != CL_SUCCESS) { return dispatchResult; } if (context->isProvidingPerformanceHints()) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, CL_ENQUEUE_WRITE_IMAGE_REQUIRES_COPY_DATA, static_cast(dstImage)); } return CL_SUCCESS; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/command_queue/finish.h000066400000000000000000000014001422164147700244000ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/wait_status.h" #include "opencl/source/command_queue/command_queue_hw.h" namespace NEO { template cl_int CommandQueueHw::finish() { auto result = getGpgpuCommandStreamReceiver().flushBatchedSubmissions(); if (!result) { return CL_OUT_OF_RESOURCES; } // Stall until HW reaches taskCount on all its engines const auto waitStatus = waitForAllEngines(true, nullptr); if (waitStatus == WaitStatus::GpuHang) { return CL_OUT_OF_RESOURCES; } return CL_SUCCESS; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/command_queue/flush.h000066400000000000000000000005011422164147700242420ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once namespace NEO { template cl_int CommandQueueHw::flush() { return getGpgpuCommandStreamReceiver().flushBatchedSubmissions() ? CL_SUCCESS : CL_OUT_OF_RESOURCES; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/command_queue/gpgpu_walker.h000066400000000000000000000134501422164147700256170ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/helpers/register_offsets.h" #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/indirect_heap/indirect_heap.h" #include "shared/source/utilities/hw_timestamps.h" #include "shared/source/utilities/perf_counter.h" #include "shared/source/utilities/tag_allocator.h" #include "opencl/source/command_queue/cl_local_work_size.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/helpers/task_information.h" #include "opencl/source/kernel/kernel.h" namespace NEO { struct RootDeviceEnvironment; template using MI_STORE_REG_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM_CMD; template class GpgpuWalkerHelper { using WALKER_TYPE = typename GfxFamily::WALKER_TYPE; public: static void applyWADisableLSQCROPERFforOCL(LinearStream *pCommandStream, const Kernel &kernel, bool disablePerfMode); static size_t getSizeForWADisableLSQCROPERFforOCL(const Kernel *pKernel); static size_t getSizeForWaDisableRccRhwoOptimization(const Kernel *pKernel); static size_t setGpgpuWalkerThreadData( WALKER_TYPE *walkerCmd, const KernelDescriptor &kernelDescriptor, const size_t globalOffsets[3], const size_t startWorkGroups[3], const size_t numWorkGroups[3], const size_t localWorkSizesIn[3], uint32_t simd, uint32_t workDim, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, uint32_t requiredWorkgroupOrder); static void dispatchProfilingCommandsStart( TagNodeBase &hwTimeStamps, LinearStream *commandStream, const HardwareInfo &hwInfo); static void dispatchProfilingCommandsEnd( TagNodeBase &hwTimeStamps, LinearStream *commandStream, const HardwareInfo &hwInfo); static void dispatchPerfCountersCommandsStart( CommandQueue &commandQueue, TagNodeBase &hwPerfCounter, LinearStream *commandStream); static void dispatchPerfCountersCommandsEnd( CommandQueue &commandQueue, TagNodeBase &hwPerfCounter, LinearStream *commandStream); static void setupTimestampPacket( LinearStream *cmdStream, WALKER_TYPE *walkerCmd, TagNodeBase *timestampPacketNode, const RootDeviceEnvironment &rootDeviceEnvironment); static void adjustMiStoreRegMemMode(MI_STORE_REG_MEM *storeCmd); private: using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; static void addAluReadModifyWriteRegister( LinearStream *pCommandStream, uint32_t aluRegister, AluRegisters operation, uint32_t mask); }; template struct EnqueueOperation { using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; static size_t getTotalSizeRequiredCS(uint32_t eventType, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounters, bool blitEnqueue, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, bool isMarkerWithProfiling, bool eventsInWaitList); static size_t getSizeRequiredCS(uint32_t cmdType, bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue, const Kernel *pKernel, const DispatchInfo &dispatchInfo); static size_t getSizeRequiredForTimestampPacketWrite(); static size_t getSizeForCacheFlushAfterWalkerCommands(const Kernel &kernel, const CommandQueue &commandQueue); private: static size_t getSizeRequiredCSKernel(bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue, const Kernel *pKernel, const DispatchInfo &dispatchInfo); static size_t getSizeRequiredCSNonKernel(bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue); }; template LinearStream &getCommandStream(CommandQueue &commandQueue, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounterCmdsSpace, bool blitEnqueue, const MultiDispatchInfo &multiDispatchInfo, Surface **surfaces, size_t numSurfaces, bool isMarkerWithProfiling, bool eventsInWaitList) { size_t expectedSizeCS = EnqueueOperation::getTotalSizeRequiredCS(eventType, csrDeps, reserveProfilingCmdsSpace, reservePerfCounterCmdsSpace, blitEnqueue, commandQueue, multiDispatchInfo, isMarkerWithProfiling, eventsInWaitList); return commandQueue.getCS(expectedSizeCS); } template IndirectHeap &getIndirectHeap(CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo) { size_t expectedSize = 0; IndirectHeap *ih = nullptr; // clang-format off switch (heapType) { case IndirectHeap::Type::DYNAMIC_STATE: expectedSize = HardwareCommandsHelper::getTotalSizeRequiredDSH(multiDispatchInfo); break; case IndirectHeap::Type::INDIRECT_OBJECT: expectedSize = HardwareCommandsHelper::getTotalSizeRequiredIOH(multiDispatchInfo); break; case IndirectHeap::Type::SURFACE_STATE: expectedSize = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo); break; } // clang-format on if (ih == nullptr) ih = &commandQueue.getIndirectHeap(heapType, expectedSize); return *ih; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/command_queue/gpgpu_walker_base.inl000066400000000000000000000270211422164147700271430ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/engine_node_helper.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/local_id_gen.h" #include "shared/source/helpers/pipe_control_args.h" #include "shared/source/indirect_heap/indirect_heap.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/os_interface/os_context.h" #include "shared/source/utilities/perf_counter.h" #include "shared/source/utilities/tag_allocator.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/event/user_event.h" #include "opencl/source/helpers/cl_validators.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/helpers/queue_helpers.h" #include "opencl/source/mem_obj/mem_obj.h" #include #include namespace NEO { // Performs ReadModifyWrite operation on value of a register: Register = Register Operation Mask template void GpgpuWalkerHelper::addAluReadModifyWriteRegister( LinearStream *pCommandStream, uint32_t aluRegister, AluRegisters operation, uint32_t mask) { // Load "Register" value into CS_GPR_R0 using MI_LOAD_REGISTER_REG = typename GfxFamily::MI_LOAD_REGISTER_REG; using MI_MATH = typename GfxFamily::MI_MATH; using MI_MATH_ALU_INST_INLINE = typename GfxFamily::MI_MATH_ALU_INST_INLINE; auto pCmd = pCommandStream->getSpaceForCmd(); MI_LOAD_REGISTER_REG cmdReg = GfxFamily::cmdInitLoadRegisterReg; cmdReg.setSourceRegisterAddress(aluRegister); cmdReg.setDestinationRegisterAddress(CS_GPR_R0); *pCmd = cmdReg; // Load "Mask" into CS_GPR_R1 LriHelper::program(pCommandStream, CS_GPR_R1, mask, false); // Add instruction MI_MATH with 4 MI_MATH_ALU_INST_INLINE operands auto pCmd3 = reinterpret_cast(pCommandStream->getSpace(sizeof(MI_MATH) + NUM_ALU_INST_FOR_READ_MODIFY_WRITE * sizeof(MI_MATH_ALU_INST_INLINE))); MI_MATH mathCmd; mathCmd.DW0.Value = 0x0; mathCmd.DW0.BitField.InstructionType = MI_MATH::COMMAND_TYPE_MI_COMMAND; mathCmd.DW0.BitField.InstructionOpcode = MI_MATH::MI_COMMAND_OPCODE_MI_MATH; // 0x3 - 5 Dwords length cmd (-2): 1 for MI_MATH, 4 for MI_MATH_ALU_INST_INLINE mathCmd.DW0.BitField.DwordLength = NUM_ALU_INST_FOR_READ_MODIFY_WRITE - 1; *reinterpret_cast(pCmd3) = mathCmd; pCmd3++; MI_MATH_ALU_INST_INLINE *pAluParam = reinterpret_cast(pCmd3); MI_MATH_ALU_INST_INLINE cmdAluParam; cmdAluParam.DW0.Value = 0x0; // Setup first operand of MI_MATH - load CS_GPR_R0 into register A cmdAluParam.DW0.BitField.ALUOpcode = static_cast(AluRegisters::OPCODE_LOAD); cmdAluParam.DW0.BitField.Operand1 = static_cast(AluRegisters::R_SRCA); cmdAluParam.DW0.BitField.Operand2 = static_cast(AluRegisters::R_0); *pAluParam = cmdAluParam; pAluParam++; cmdAluParam.DW0.Value = 0x0; // Setup second operand of MI_MATH - load CS_GPR_R1 into register B cmdAluParam.DW0.BitField.ALUOpcode = static_cast(AluRegisters::OPCODE_LOAD); cmdAluParam.DW0.BitField.Operand1 = static_cast(AluRegisters::R_SRCB); cmdAluParam.DW0.BitField.Operand2 = static_cast(AluRegisters::R_1); *pAluParam = cmdAluParam; pAluParam++; cmdAluParam.DW0.Value = 0x0; // Setup third operand of MI_MATH - "Operation" on registers A and B cmdAluParam.DW0.BitField.ALUOpcode = static_cast(operation); cmdAluParam.DW0.BitField.Operand1 = 0; cmdAluParam.DW0.BitField.Operand2 = 0; *pAluParam = cmdAluParam; pAluParam++; cmdAluParam.DW0.Value = 0x0; // Setup fourth operand of MI_MATH - store result into CS_GPR_R0 cmdAluParam.DW0.BitField.ALUOpcode = static_cast(AluRegisters::OPCODE_STORE); cmdAluParam.DW0.BitField.Operand1 = static_cast(AluRegisters::R_0); cmdAluParam.DW0.BitField.Operand2 = static_cast(AluRegisters::R_ACCU); *pAluParam = cmdAluParam; // LOAD value of CS_GPR_R0 into "Register" auto pCmd4 = pCommandStream->getSpaceForCmd(); cmdReg = GfxFamily::cmdInitLoadRegisterReg; cmdReg.setSourceRegisterAddress(CS_GPR_R0); cmdReg.setDestinationRegisterAddress(aluRegister); *pCmd4 = cmdReg; // Add PIPE_CONTROL to flush caches auto pCmd5 = pCommandStream->getSpaceForCmd(); PIPE_CONTROL cmdPipeControl = GfxFamily::cmdInitPipeControl; cmdPipeControl.setCommandStreamerStallEnable(true); cmdPipeControl.setDcFlushEnable(true); cmdPipeControl.setTextureCacheInvalidationEnable(true); cmdPipeControl.setPipeControlFlushEnable(true); cmdPipeControl.setStateCacheInvalidationEnable(true); *pCmd5 = cmdPipeControl; } template void GpgpuWalkerHelper::dispatchPerfCountersCommandsStart( CommandQueue &commandQueue, TagNodeBase &hwPerfCounter, LinearStream *commandStream) { const auto pPerformanceCounters = commandQueue.getPerfCounters(); const auto commandBufferType = EngineHelpers::isCcs(commandQueue.getGpgpuEngine().osContext->getEngineType()) ? MetricsLibraryApi::GpuCommandBufferType::Compute : MetricsLibraryApi::GpuCommandBufferType::Render; const uint32_t size = pPerformanceCounters->getGpuCommandsSize(commandBufferType, true); void *pBuffer = commandStream->getSpace(size); pPerformanceCounters->getGpuCommands(commandBufferType, hwPerfCounter, true, size, pBuffer); } template void GpgpuWalkerHelper::dispatchPerfCountersCommandsEnd( CommandQueue &commandQueue, TagNodeBase &hwPerfCounter, LinearStream *commandStream) { const auto pPerformanceCounters = commandQueue.getPerfCounters(); const auto commandBufferType = EngineHelpers::isCcs(commandQueue.getGpgpuEngine().osContext->getEngineType()) ? MetricsLibraryApi::GpuCommandBufferType::Compute : MetricsLibraryApi::GpuCommandBufferType::Render; const uint32_t size = pPerformanceCounters->getGpuCommandsSize(commandBufferType, false); void *pBuffer = commandStream->getSpace(size); pPerformanceCounters->getGpuCommands(commandBufferType, hwPerfCounter, false, size, pBuffer); } template void GpgpuWalkerHelper::applyWADisableLSQCROPERFforOCL(NEO::LinearStream *pCommandStream, const Kernel &kernel, bool disablePerfMode) { } template size_t GpgpuWalkerHelper::getSizeForWADisableLSQCROPERFforOCL(const Kernel *pKernel) { return (size_t)0; } template size_t GpgpuWalkerHelper::getSizeForWaDisableRccRhwoOptimization(const Kernel *pKernel) { return 0u; } template size_t EnqueueOperation::getTotalSizeRequiredCS(uint32_t eventType, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounters, bool blitEnqueue, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, bool isMarkerWithProfiling, bool eventsInWaitlist) { size_t expectedSizeCS = 0; auto &hwInfo = commandQueue.getDevice().getHardwareInfo(); auto &commandQueueHw = static_cast &>(commandQueue); if (blitEnqueue) { size_t expectedSizeCS = TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue(); if (commandQueueHw.isCacheFlushForBcsRequired()) { expectedSizeCS += MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo); } return expectedSizeCS; } for (auto &dispatchInfo : multiDispatchInfo) { expectedSizeCS += EnqueueOperation::getSizeRequiredCS(eventType, reserveProfilingCmdsSpace, reservePerfCounters, commandQueue, dispatchInfo.getKernel(), dispatchInfo); size_t kernelObjAuxCount = multiDispatchInfo.getKernelObjsForAuxTranslation() != nullptr ? multiDispatchInfo.getKernelObjsForAuxTranslation()->size() : 0; expectedSizeCS += dispatchInfo.dispatchInitCommands.estimateCommandsSize(kernelObjAuxCount, hwInfo, commandQueueHw.isCacheFlushForBcsRequired()); expectedSizeCS += dispatchInfo.dispatchEpilogueCommands.estimateCommandsSize(kernelObjAuxCount, hwInfo, commandQueueHw.isCacheFlushForBcsRequired()); } if (commandQueue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { expectedSizeCS += TimestampPacketHelper::getRequiredCmdStreamSize(csrDeps); expectedSizeCS += EnqueueOperation::getSizeRequiredForTimestampPacketWrite(); if (isMarkerWithProfiling) { if (!eventsInWaitlist) { expectedSizeCS += commandQueue.getGpgpuCommandStreamReceiver().getCmdsSizeForComputeBarrierCommand(); } expectedSizeCS += 4 * EncodeStoreMMIO::size; } } else if (isMarkerWithProfiling) { expectedSizeCS += 2 * MemorySynchronizationCommands::getSizeForSinglePipeControl(); if (!HwHelper::get(hwInfo.platform.eRenderCoreFamily).useOnlyGlobalTimestamps()) { expectedSizeCS += 2 * EncodeStoreMMIO::size; } } if (multiDispatchInfo.peekMainKernel()) { expectedSizeCS += EnqueueOperation::getSizeForCacheFlushAfterWalkerCommands(*multiDispatchInfo.peekMainKernel(), commandQueue); } if (DebugManager.flags.PauseOnEnqueue.get() != -1) { expectedSizeCS += MemorySynchronizationCommands::getSizeForSinglePipeControl() * 2; expectedSizeCS += sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT) * 2; } if (DebugManager.flags.GpuScratchRegWriteAfterWalker.get() != -1) { expectedSizeCS += sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM); } expectedSizeCS += TimestampPacketHelper::getRequiredCmdStreamSizeForTaskCountContainer(csrDeps); return expectedSizeCS; } template size_t EnqueueOperation::getSizeRequiredCS(uint32_t cmdType, bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue, const Kernel *pKernel, const DispatchInfo &dispatchInfo) { if (isCommandWithoutKernel(cmdType)) { return EnqueueOperation::getSizeRequiredCSNonKernel(reserveProfilingCmdsSpace, reservePerfCounters, commandQueue); } else { return EnqueueOperation::getSizeRequiredCSKernel(reserveProfilingCmdsSpace, reservePerfCounters, commandQueue, pKernel, dispatchInfo); } } template size_t EnqueueOperation::getSizeRequiredCSNonKernel(bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue) { size_t size = 0; if (reserveProfilingCmdsSpace) { size += 2 * sizeof(PIPE_CONTROL) + 4 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM); } return size; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/command_queue/gpgpu_walker_bdw_and_later.inl000066400000000000000000000161711422164147700310220ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/pipe_control_args.h" #include "shared/source/helpers/simd_helper.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/command_queue/gpgpu_walker_base.inl" namespace NEO { template inline size_t GpgpuWalkerHelper::setGpgpuWalkerThreadData( WALKER_TYPE *walkerCmd, const KernelDescriptor &kernelDescriptor, const size_t globalOffsets[3], const size_t startWorkGroups[3], const size_t numWorkGroups[3], const size_t localWorkSizesIn[3], uint32_t simd, uint32_t workDim, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, uint32_t requiredWorkgroupOrder) { auto localWorkSize = localWorkSizesIn[0] * localWorkSizesIn[1] * localWorkSizesIn[2]; auto threadsPerWorkGroup = getThreadsPerWG(simd, localWorkSize); walkerCmd->setThreadWidthCounterMaximum(static_cast(threadsPerWorkGroup)); walkerCmd->setThreadGroupIdXDimension(static_cast(numWorkGroups[0])); walkerCmd->setThreadGroupIdYDimension(static_cast(numWorkGroups[1])); walkerCmd->setThreadGroupIdZDimension(static_cast(numWorkGroups[2])); // compute executionMask - to tell which SIMD lines are active within thread auto remainderSimdLanes = localWorkSize & (simd - 1); uint64_t executionMask = maxNBitValue(remainderSimdLanes); if (!executionMask) executionMask = ~executionMask; using SIMD_SIZE = typename WALKER_TYPE::SIMD_SIZE; walkerCmd->setRightExecutionMask(static_cast(executionMask)); walkerCmd->setBottomExecutionMask(static_cast(0xffffffff)); walkerCmd->setSimdSize(getSimdConfig(simd)); walkerCmd->setThreadGroupIdStartingX(static_cast(startWorkGroups[0])); walkerCmd->setThreadGroupIdStartingY(static_cast(startWorkGroups[1])); walkerCmd->setThreadGroupIdStartingResumeZ(static_cast(startWorkGroups[2])); return localWorkSize; } template void GpgpuWalkerHelper::setupTimestampPacket( LinearStream *cmdStream, WALKER_TYPE *walkerCmd, TagNodeBase *timestampPacketNode, const RootDeviceEnvironment &rootDeviceEnvironment) { uint64_t address = TimestampPacketHelper::getContextEndGpuAddress(*timestampPacketNode); PipeControlArgs args; MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( *cmdStream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, address, 0, *rootDeviceEnvironment.getHardwareInfo(), args); EncodeDispatchKernel::adjustTimestampPacket(*walkerCmd, *rootDeviceEnvironment.getHardwareInfo()); } template size_t EnqueueOperation::getSizeRequiredCSKernel(bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue, const Kernel *pKernel, const DispatchInfo &dispatchInfo) { size_t size = sizeof(typename GfxFamily::GPGPU_WALKER) + HardwareCommandsHelper::getSizeRequiredCS() + sizeof(PIPE_CONTROL) * (MemorySynchronizationCommands::isPipeControlWArequired(commandQueue.getDevice().getHardwareInfo()) ? 2 : 1); size += HardwareCommandsHelper::getSizeRequiredForCacheFlush(commandQueue, pKernel, 0U); size += PreemptionHelper::getPreemptionWaCsSize(commandQueue.getDevice()); if (reserveProfilingCmdsSpace) { size += 2 * sizeof(PIPE_CONTROL) + 2 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM); } size += PerformanceCounters::getGpuCommandsSize(commandQueue.getPerfCounters(), commandQueue.getGpgpuEngine().osContext->getEngineType(), reservePerfCounters); size += GpgpuWalkerHelper::getSizeForWADisableLSQCROPERFforOCL(pKernel); size += GpgpuWalkerHelper::getSizeForWaDisableRccRhwoOptimization(pKernel); return size; } template size_t EnqueueOperation::getSizeRequiredForTimestampPacketWrite() { return sizeof(PIPE_CONTROL); } template void GpgpuWalkerHelper::adjustMiStoreRegMemMode(MI_STORE_REG_MEM *storeCmd) { } template void GpgpuWalkerHelper::dispatchProfilingCommandsStart( TagNodeBase &hwTimeStamps, LinearStream *commandStream, const HardwareInfo &hwInfo) { using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; // PIPE_CONTROL for global timestamp uint64_t timeStampAddress = hwTimeStamps.getGpuAddress() + offsetof(HwTimeStamps, GlobalStartTS); PipeControlArgs args; MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( *commandStream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, timeStampAddress, 0llu, hwInfo, args); if (!HwHelper::get(hwInfo.platform.eRenderCoreFamily).useOnlyGlobalTimestamps()) { //MI_STORE_REGISTER_MEM for context local timestamp timeStampAddress = hwTimeStamps.getGpuAddress() + offsetof(HwTimeStamps, ContextStartTS); //low part auto pMICmdLow = commandStream->getSpaceForCmd(); MI_STORE_REGISTER_MEM cmd = GfxFamily::cmdInitStoreRegisterMem; adjustMiStoreRegMemMode(&cmd); cmd.setRegisterAddress(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW); cmd.setMemoryAddress(timeStampAddress); *pMICmdLow = cmd; } } template void GpgpuWalkerHelper::dispatchProfilingCommandsEnd( TagNodeBase &hwTimeStamps, LinearStream *commandStream, const HardwareInfo &hwInfo) { using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; // PIPE_CONTROL for global timestamp uint64_t timeStampAddress = hwTimeStamps.getGpuAddress() + offsetof(HwTimeStamps, GlobalEndTS); PipeControlArgs args; MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( *commandStream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, timeStampAddress, 0llu, hwInfo, args); if (!HwHelper::get(hwInfo.platform.eRenderCoreFamily).useOnlyGlobalTimestamps()) { //MI_STORE_REGISTER_MEM for context local timestamp uint64_t timeStampAddress = hwTimeStamps.getGpuAddress() + offsetof(HwTimeStamps, ContextEndTS); //low part auto pMICmdLow = commandStream->getSpaceForCmd(); MI_STORE_REGISTER_MEM cmd = GfxFamily::cmdInitStoreRegisterMem; adjustMiStoreRegMemMode(&cmd); cmd.setRegisterAddress(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW); cmd.setMemoryAddress(timeStampAddress); *pMICmdLow = cmd; } } template size_t EnqueueOperation::getSizeForCacheFlushAfterWalkerCommands(const Kernel &kernel, const CommandQueue &commandQueue) { return 0; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/command_queue/gpgpu_walker_xehp_and_later.inl000066400000000000000000000212641422164147700312110ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_container/implicit_scaling.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/cache_flush_xehp_and_later.inl" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/l3_range.h" #include "shared/source/helpers/simd_helper.h" #include "opencl/source/command_queue/gpgpu_walker_base.inl" #include "opencl/source/platform/platform.h" namespace NEO { template size_t GpgpuWalkerHelper::setGpgpuWalkerThreadData( WALKER_TYPE *walkerCmd, const KernelDescriptor &kernelDescriptor, const size_t globalOffsets[3], const size_t startWorkGroups[3], const size_t numWorkGroups[3], const size_t localWorkSizesIn[3], uint32_t simd, uint32_t workDim, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, uint32_t requiredWorkGroupOrder) { bool kernelUsesLocalIds = kernelDescriptor.kernelAttributes.numLocalIdChannels > 0; auto localWorkSize = localWorkSizesIn[0] * localWorkSizesIn[1] * localWorkSizesIn[2]; walkerCmd->setThreadGroupIdXDimension(static_cast(numWorkGroups[0])); walkerCmd->setThreadGroupIdYDimension(static_cast(numWorkGroups[1])); walkerCmd->setThreadGroupIdZDimension(static_cast(numWorkGroups[2])); // compute executionMask - to tell which SIMD lines are active within thread auto remainderSimdLanes = localWorkSize & (simd - 1); uint64_t executionMask = maxNBitValue(remainderSimdLanes); if (!executionMask) { executionMask = maxNBitValue((simd == 1) ? 32 : simd); } walkerCmd->setExecutionMask(static_cast(executionMask)); walkerCmd->setSimdSize(getSimdConfig(simd)); walkerCmd->setMessageSimd(walkerCmd->getSimdSize()); if (DebugManager.flags.ForceSimdMessageSizeInWalker.get() != -1) { walkerCmd->setMessageSimd(DebugManager.flags.ForceSimdMessageSizeInWalker.get()); } walkerCmd->setThreadGroupIdStartingX(static_cast(startWorkGroups[0])); walkerCmd->setThreadGroupIdStartingY(static_cast(startWorkGroups[1])); walkerCmd->setThreadGroupIdStartingZ(static_cast(startWorkGroups[2])); //1) cross-thread inline data will be put into R1, but if kernel uses local ids, then cross-thread should be put further back //so whenever local ids are driver or hw generated, reserve space by setting right values for emitLocalIds //2) Auto-generation of local ids should be possible, when in fact local ids are used if (!localIdsGenerationByRuntime && kernelUsesLocalIds) { uint32_t emitLocalIdsForDim = 0; if (kernelDescriptor.kernelAttributes.localId[0]) { emitLocalIdsForDim |= (1 << 0); } if (kernelDescriptor.kernelAttributes.localId[1]) { emitLocalIdsForDim |= (1 << 1); } if (kernelDescriptor.kernelAttributes.localId[2]) { emitLocalIdsForDim |= (1 << 2); } walkerCmd->setEmitLocalId(emitLocalIdsForDim); } if (inlineDataProgrammingRequired == true) { walkerCmd->setEmitInlineParameter(1); } if ((!localIdsGenerationByRuntime) && kernelUsesLocalIds) { walkerCmd->setLocalXMaximum(static_cast(localWorkSizesIn[0] - 1)); walkerCmd->setLocalYMaximum(static_cast(localWorkSizesIn[1] - 1)); walkerCmd->setLocalZMaximum(static_cast(localWorkSizesIn[2] - 1)); walkerCmd->setGenerateLocalId(1); walkerCmd->setWalkOrder(requiredWorkGroupOrder); } return localWorkSize; } template void GpgpuWalkerHelper::setupTimestampPacket(LinearStream *cmdStream, WALKER_TYPE *walkerCmd, TagNodeBase *timestampPacketNode, const RootDeviceEnvironment &rootDeviceEnvironment) { using COMPUTE_WALKER = typename GfxFamily::COMPUTE_WALKER; const auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo(); auto &postSyncData = walkerCmd->getPostSync(); postSyncData.setDataportPipelineFlush(true); EncodeDispatchKernel::setupPostSyncMocs(*walkerCmd, rootDeviceEnvironment); EncodeDispatchKernel::adjustTimestampPacket(*walkerCmd, hwInfo); if (DebugManager.flags.UseImmDataWriteModeOnPostSyncOperation.get()) { postSyncData.setOperation(GfxFamily::POSTSYNC_DATA::OPERATION::OPERATION_WRITE_IMMEDIATE_DATA); auto contextEndAddress = TimestampPacketHelper::getContextEndGpuAddress(*timestampPacketNode); postSyncData.setDestinationAddress(contextEndAddress); postSyncData.setImmediateData(0x2'0000'0002); } else { postSyncData.setOperation(GfxFamily::POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP); auto contextStartAddress = TimestampPacketHelper::getContextStartGpuAddress(*timestampPacketNode); postSyncData.setDestinationAddress(contextStartAddress); } if (DebugManager.flags.OverrideSystolicInComputeWalker.get() != -1) { walkerCmd->setSystolicModeEnable((DebugManager.flags.OverrideSystolicInComputeWalker.get())); } } template void GpgpuWalkerHelper::adjustMiStoreRegMemMode(MI_STORE_REG_MEM *storeCmd) { storeCmd->setMmioRemapEnable(true); } template size_t EnqueueOperation::getSizeRequiredCSKernel(bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue, const Kernel *pKernel, const DispatchInfo &dispatchInfo) { size_t numPipeControls = MemorySynchronizationCommands::isPipeControlWArequired(commandQueue.getDevice().getHardwareInfo()) ? 2 : 1; size_t size = sizeof(typename GfxFamily::COMPUTE_WALKER) + (sizeof(typename GfxFamily::PIPE_CONTROL) * numPipeControls) + HardwareCommandsHelper::getSizeRequiredCS() + EncodeMemoryPrefetch::getSizeForMemoryPrefetch(pKernel->getKernelInfo().heapInfo.KernelHeapSize); auto devices = commandQueue.getGpgpuCommandStreamReceiver().getOsContext().getDeviceBitfield(); auto partitionWalker = ImplicitScalingHelper::isImplicitScalingEnabled(devices, !pKernel->isSingleSubdevicePreferred()); if (partitionWalker) { Vec3 groupStart = dispatchInfo.getStartOfWorkgroups(); Vec3 groupCount = dispatchInfo.getNumberOfWorkgroups(); UNRECOVERABLE_IF(groupCount.x == 0); const bool staticPartitioning = commandQueue.getGpgpuCommandStreamReceiver().isStaticWorkPartitioningEnabled(); size += static_cast(ImplicitScalingDispatch::getSize(false, staticPartitioning, devices, groupStart, groupCount)); } size += PerformanceCounters::getGpuCommandsSize(commandQueue.getPerfCounters(), commandQueue.getGpgpuEngine().osContext->getEngineType(), reservePerfCounters); return size; } template size_t EnqueueOperation::getSizeRequiredForTimestampPacketWrite() { return 0; } template void GpgpuWalkerHelper::dispatchProfilingCommandsStart(TagNodeBase &hwTimeStamps, LinearStream *commandStream, const HardwareInfo &hwInfo) { } template void GpgpuWalkerHelper::dispatchProfilingCommandsEnd(TagNodeBase &hwTimeStamps, LinearStream *commandStream, const HardwareInfo &hwInfo) { } template size_t EnqueueOperation::getSizeForCacheFlushAfterWalkerCommands(const Kernel &kernel, const CommandQueue &commandQueue) { size_t size = 0; if (kernel.requiresCacheFlushCommand(commandQueue)) { size += sizeof(typename GfxFamily::PIPE_CONTROL); if constexpr (GfxFamily::isUsingL3Control) { StackVec allocationsForCacheFlush; kernel.getAllocationsForCacheFlush(allocationsForCacheFlush); StackVec subranges; for (auto &allocation : allocationsForCacheFlush) { coverRangeExact(allocation->getGpuAddress(), allocation->getUnderlyingBufferSize(), subranges, GfxFamily::L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION); } size += getSizeNeededToFlushGpuCache(subranges, true); } } return size; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/command_queue/hardware_interface.h000066400000000000000000000073271422164147700267530ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/preemption_mode.h" #include "CL/cl.h" #include namespace NEO { class CommandQueue; class DispatchInfo; class IndirectHeap; class Kernel; class LinearStream; class HwPerfCounter; class HwTimeStamps; struct KernelOperation; struct MultiDispatchInfo; template class TagNode; template class HardwareInterface { public: using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA; using WALKER_TYPE = typename GfxFamily::WALKER_TYPE; static void dispatchWalker( CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, const CsrDependencies &csrDependencies, KernelOperation *blockedCommandsData, TagNodeBase *hwTimeStamps, TagNodeBase *hwPerfCounter, TimestampPacketDependencies *timestampPacketDependencies, TimestampPacketContainer *currentTimestampPacketNodes, uint32_t commandType); static void getDefaultDshSpace( const size_t &offsetInterfaceDescriptorTable, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, size_t &totalInterfaceDescriptorTableSize, IndirectHeap *dsh, LinearStream *commandStream); static void dispatchWorkarounds( LinearStream *commandStream, CommandQueue &commandQueue, Kernel &kernel, const bool &enable); static void dispatchProfilingPerfStartCommands( TagNodeBase *hwTimeStamps, TagNodeBase *hwPerfCounter, LinearStream *commandStream, CommandQueue &commandQueue); static void dispatchProfilingPerfEndCommands( TagNodeBase *hwTimeStamps, TagNodeBase *hwPerfCounter, LinearStream *commandStream, CommandQueue &commandQueue); static void dispatchDebugPauseCommands( LinearStream *commandStream, CommandQueue &commandQueue, DebugPauseState confirmationTrigger, DebugPauseState waitCondition, const HardwareInfo &hwInfo); static void programWalker( LinearStream &commandStream, Kernel &kernel, CommandQueue &commandQueue, TimestampPacketContainer *currentTimestampPacketNodes, IndirectHeap &dsh, IndirectHeap &ioh, IndirectHeap &ssh, size_t globalWorkSizes[3], size_t localWorkSizes[3], PreemptionMode preemptionMode, size_t currentDispatchIndex, uint32_t &interfaceDescriptorIndex, const DispatchInfo &dispatchInfo, size_t offsetInterfaceDescriptorTable, const Vec3 &numberOfWorkgroups, const Vec3 &startOfWorkgroups); static WALKER_TYPE *allocateWalkerSpace(LinearStream &commandStream, const Kernel &kernel); static void obtainIndirectHeaps(CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, bool blockedQueue, IndirectHeap *&dsh, IndirectHeap *&ioh, IndirectHeap *&ssh); static void dispatchKernelCommands(CommandQueue &commandQueue, const DispatchInfo &dispatchInfo, uint32_t commandType, LinearStream &commandStream, bool isMainKernel, size_t currentDispatchIndex, TimestampPacketContainer *currentTimestampPacketNodes, PreemptionMode preemptionMode, uint32_t &interfaceDescriptorIndex, size_t offsetInterfaceDescriptorTable, IndirectHeap &dsh, IndirectHeap &ioh, IndirectHeap &ssh); }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/command_queue/hardware_interface_base.inl000066400000000000000000000374041422164147700302770ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_container/command_encoder.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/pause_on_gpu_properties.h" #include "shared/source/helpers/pipe_control_args.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/command_queue/hardware_interface.h" #include "opencl/source/helpers/cl_preemption_helper.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/helpers/task_information.h" #include "opencl/source/mem_obj/buffer.h" namespace NEO { template inline typename GfxFamily::WALKER_TYPE *HardwareInterface::allocateWalkerSpace(LinearStream &commandStream, const Kernel &kernel) { auto walkerCmd = commandStream.getSpaceForCmd(); return walkerCmd; } template inline void HardwareInterface::dispatchProfilingPerfStartCommands( TagNodeBase *hwTimeStamps, TagNodeBase *hwPerfCounter, LinearStream *commandStream, CommandQueue &commandQueue) { // If hwTimeStampAlloc is passed (not nullptr), then we know that profiling is enabled if (hwTimeStamps != nullptr) { GpgpuWalkerHelper::dispatchProfilingCommandsStart(*hwTimeStamps, commandStream, commandQueue.getDevice().getHardwareInfo()); } if (hwPerfCounter != nullptr) { GpgpuWalkerHelper::dispatchPerfCountersCommandsStart(commandQueue, *hwPerfCounter, commandStream); } } template inline void HardwareInterface::dispatchProfilingPerfEndCommands( TagNodeBase *hwTimeStamps, TagNodeBase *hwPerfCounter, LinearStream *commandStream, CommandQueue &commandQueue) { // If hwTimeStamps is passed (not nullptr), then we know that profiling is enabled if (hwTimeStamps != nullptr) { GpgpuWalkerHelper::dispatchProfilingCommandsEnd(*hwTimeStamps, commandStream, commandQueue.getDevice().getHardwareInfo()); } if (hwPerfCounter != nullptr) { GpgpuWalkerHelper::dispatchPerfCountersCommandsEnd(commandQueue, *hwPerfCounter, commandStream); } } template void HardwareInterface::dispatchWalker( CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, const CsrDependencies &csrDependencies, KernelOperation *blockedCommandsData, TagNodeBase *hwTimeStamps, TagNodeBase *hwPerfCounter, TimestampPacketDependencies *timestampPacketDependencies, TimestampPacketContainer *currentTimestampPacketNodes, uint32_t commandType) { LinearStream *commandStream = nullptr; IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr; auto mainKernel = multiDispatchInfo.peekMainKernel(); auto preemptionMode = ClPreemptionHelper::taskPreemptionMode(commandQueue.getDevice(), multiDispatchInfo); for (auto &dispatchInfo : multiDispatchInfo) { // Compute local workgroup sizes if (dispatchInfo.getLocalWorkgroupSize().x == 0) { const auto lws = generateWorkgroupSize(dispatchInfo); const_cast(dispatchInfo).setLWS(lws); } } // Allocate command stream and indirect heaps bool blockedQueue = (blockedCommandsData != nullptr); obtainIndirectHeaps(commandQueue, multiDispatchInfo, blockedQueue, dsh, ioh, ssh); if (blockedQueue) { blockedCommandsData->setHeaps(dsh, ioh, ssh); commandStream = blockedCommandsData->commandStream.get(); } else { commandStream = &commandQueue.getCS(0); } if (commandQueue.getDevice().getDebugger()) { auto debugSurface = commandQueue.getGpgpuCommandStreamReceiver().getDebugSurfaceAllocation(); void *addressToPatch = reinterpret_cast(debugSurface->getGpuAddress()); size_t sizeToPatch = debugSurface->getUnderlyingBufferSize(); Buffer::setSurfaceState(&commandQueue.getDevice(), commandQueue.getDevice().getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh), false, false, sizeToPatch, addressToPatch, 0, debugSurface, 0, 0, mainKernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, mainKernel->areMultipleSubDevicesInContext()); } bool programDependencies = true; if (DebugManager.flags.ResolveDependenciesViaPipeControls.get() == 1) { //only optimize kernel after kernel if (commandQueue.peekLatestSentEnqueueOperation() == EnqueueProperties::Operation::GpuKernel) { programDependencies = false; } } if (programDependencies) { TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer(*commandStream, csrDependencies); } dsh->align(EncodeStates::alignInterfaceDescriptorData); uint32_t interfaceDescriptorIndex = 0; const size_t offsetInterfaceDescriptorTable = dsh->getUsed(); size_t totalInterfaceDescriptorTableSize = sizeof(INTERFACE_DESCRIPTOR_DATA); getDefaultDshSpace(offsetInterfaceDescriptorTable, commandQueue, multiDispatchInfo, totalInterfaceDescriptorTableSize, dsh, commandStream); // Program media interface descriptor load HardwareCommandsHelper::sendMediaInterfaceDescriptorLoad( *commandStream, offsetInterfaceDescriptorTable, totalInterfaceDescriptorTableSize); DEBUG_BREAK_IF(offsetInterfaceDescriptorTable % 64 != 0); dispatchProfilingPerfStartCommands(hwTimeStamps, hwPerfCounter, commandStream, commandQueue); const auto &hwInfo = commandQueue.getDevice().getHardwareInfo(); if (PauseOnGpuProperties::pauseModeAllowed(DebugManager.flags.PauseOnEnqueue.get(), commandQueue.getGpgpuCommandStreamReceiver().peekTaskCount(), PauseOnGpuProperties::PauseMode::BeforeWorkload)) { dispatchDebugPauseCommands(commandStream, commandQueue, DebugPauseState::waitingForUserStartConfirmation, DebugPauseState::hasUserStartConfirmation, hwInfo); } mainKernel->performKernelTuning(commandQueue.getGpgpuCommandStreamReceiver(), multiDispatchInfo.begin()->getLocalWorkgroupSize(), multiDispatchInfo.begin()->getActualWorkgroupSize(), multiDispatchInfo.begin()->getOffset(), currentTimestampPacketNodes); size_t currentDispatchIndex = 0; for (auto &dispatchInfo : multiDispatchInfo) { dispatchInfo.dispatchInitCommands(*commandStream, timestampPacketDependencies, commandQueue.getDevice().getHardwareInfo()); bool isMainKernel = (dispatchInfo.getKernel() == mainKernel); dispatchKernelCommands(commandQueue, dispatchInfo, commandType, *commandStream, isMainKernel, currentDispatchIndex, currentTimestampPacketNodes, preemptionMode, interfaceDescriptorIndex, offsetInterfaceDescriptorTable, *dsh, *ioh, *ssh); currentDispatchIndex++; dispatchInfo.dispatchEpilogueCommands(*commandStream, timestampPacketDependencies, commandQueue.getDevice().getHardwareInfo()); } if (mainKernel->requiresCacheFlushCommand(commandQueue)) { uint64_t postSyncAddress = 0; if (commandQueue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { auto timestampPacketNodeForPostSync = currentTimestampPacketNodes->peekNodes().at(currentDispatchIndex); timestampPacketNodeForPostSync->setProfilingCapable(false); postSyncAddress = TimestampPacketHelper::getContextEndGpuAddress(*timestampPacketNodeForPostSync); } HardwareCommandsHelper::programCacheFlushAfterWalkerCommand(commandStream, commandQueue, mainKernel, postSyncAddress); } if (PauseOnGpuProperties::GpuScratchRegWriteAllowed(DebugManager.flags.GpuScratchRegWriteAfterWalker.get(), commandQueue.getGpgpuCommandStreamReceiver().peekTaskCount())) { uint32_t registerOffset = DebugManager.flags.GpuScratchRegWriteRegisterOffset.get(); uint32_t registerData = DebugManager.flags.GpuScratchRegWriteRegisterData.get(); LriHelper::program(commandStream, registerOffset, registerData, EncodeSetMMIO::isRemapApplicable(registerOffset)); } if (PauseOnGpuProperties::pauseModeAllowed(DebugManager.flags.PauseOnEnqueue.get(), commandQueue.getGpgpuCommandStreamReceiver().peekTaskCount(), PauseOnGpuProperties::PauseMode::AfterWorkload)) { dispatchDebugPauseCommands(commandStream, commandQueue, DebugPauseState::waitingForUserEndConfirmation, DebugPauseState::hasUserEndConfirmation, hwInfo); } dispatchProfilingPerfEndCommands(hwTimeStamps, hwPerfCounter, commandStream, commandQueue); } template void HardwareInterface::dispatchKernelCommands(CommandQueue &commandQueue, const DispatchInfo &dispatchInfo, uint32_t commandType, LinearStream &commandStream, bool isMainKernel, size_t currentDispatchIndex, TimestampPacketContainer *currentTimestampPacketNodes, PreemptionMode preemptionMode, uint32_t &interfaceDescriptorIndex, size_t offsetInterfaceDescriptorTable, IndirectHeap &dsh, IndirectHeap &ioh, IndirectHeap &ssh) { auto &kernel = *dispatchInfo.getKernel(); DEBUG_BREAK_IF(!(dispatchInfo.getDim() >= 1 && dispatchInfo.getDim() <= 3)); DEBUG_BREAK_IF(!(dispatchInfo.getGWS().z == 1 || dispatchInfo.getDim() == 3)); DEBUG_BREAK_IF(!(dispatchInfo.getGWS().y == 1 || dispatchInfo.getDim() >= 2)); DEBUG_BREAK_IF(!(dispatchInfo.getOffset().z == 0 || dispatchInfo.getDim() == 3)); DEBUG_BREAK_IF(!(dispatchInfo.getOffset().y == 0 || dispatchInfo.getDim() >= 2)); // If we don't have a required WGS, compute one opportunistically if (commandType == CL_COMMAND_NDRANGE_KERNEL) { provideLocalWorkGroupSizeHints(commandQueue.getContextPtr(), dispatchInfo); } //Get dispatch geometry auto dim = dispatchInfo.getDim(); const auto &gws = dispatchInfo.getGWS(); const auto &offset = dispatchInfo.getOffset(); const auto &startOfWorkgroups = dispatchInfo.getStartOfWorkgroups(); // Compute local workgroup sizes const auto &lws = dispatchInfo.getLocalWorkgroupSize(); const auto &elws = (dispatchInfo.getEnqueuedWorkgroupSize().x > 0) ? dispatchInfo.getEnqueuedWorkgroupSize() : lws; // Compute number of work groups const auto &totalNumberOfWorkgroups = dispatchInfo.getTotalNumberOfWorkgroups(); const auto &numberOfWorkgroups = dispatchInfo.getNumberOfWorkgroups(); UNRECOVERABLE_IF(totalNumberOfWorkgroups.x == 0); UNRECOVERABLE_IF(numberOfWorkgroups.x == 0); size_t globalWorkSizes[3] = {gws.x, gws.y, gws.z}; // Patch our kernel constants kernel.setGlobalWorkOffsetValues(static_cast(offset.x), static_cast(offset.y), static_cast(offset.z)); kernel.setGlobalWorkSizeValues(static_cast(gws.x), static_cast(gws.y), static_cast(gws.z)); if (isMainKernel || (!kernel.isLocalWorkSize2Patchable())) { kernel.setLocalWorkSizeValues(static_cast(lws.x), static_cast(lws.y), static_cast(lws.z)); } kernel.setLocalWorkSize2Values(static_cast(lws.x), static_cast(lws.y), static_cast(lws.z)); kernel.setEnqueuedLocalWorkSizeValues(static_cast(elws.x), static_cast(elws.y), static_cast(elws.z)); if (isMainKernel) { kernel.setNumWorkGroupsValues(static_cast(totalNumberOfWorkgroups.x), static_cast(totalNumberOfWorkgroups.y), static_cast(totalNumberOfWorkgroups.z)); } kernel.setWorkDim(dim); // Send our indirect object data size_t localWorkSizes[3] = {lws.x, lws.y, lws.z}; dispatchWorkarounds(&commandStream, commandQueue, kernel, true); programWalker(commandStream, kernel, commandQueue, currentTimestampPacketNodes, dsh, ioh, ssh, globalWorkSizes, localWorkSizes, preemptionMode, currentDispatchIndex, interfaceDescriptorIndex, dispatchInfo, offsetInterfaceDescriptorTable, numberOfWorkgroups, startOfWorkgroups); dispatchWorkarounds(&commandStream, commandQueue, kernel, false); } template void HardwareInterface::obtainIndirectHeaps(CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, bool blockedQueue, IndirectHeap *&dsh, IndirectHeap *&ioh, IndirectHeap *&ssh) { if (blockedQueue) { size_t dshSize = 0; size_t colorCalcSize = 0; size_t sshSize = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo); bool iohEqualsDsh = false; dshSize = HardwareCommandsHelper::getTotalSizeRequiredDSH(multiDispatchInfo); commandQueue.allocateHeapMemory(IndirectHeap::Type::DYNAMIC_STATE, dshSize, dsh); dsh->getSpace(colorCalcSize); commandQueue.allocateHeapMemory(IndirectHeap::Type::SURFACE_STATE, sshSize, ssh); if (iohEqualsDsh) { ioh = dsh; } else { commandQueue.allocateHeapMemory(IndirectHeap::Type::INDIRECT_OBJECT, HardwareCommandsHelper::getTotalSizeRequiredIOH(multiDispatchInfo), ioh); } } else { dsh = &getIndirectHeap(commandQueue, multiDispatchInfo); ioh = &getIndirectHeap(commandQueue, multiDispatchInfo); ssh = &getIndirectHeap(commandQueue, multiDispatchInfo); } } template inline void HardwareInterface::dispatchDebugPauseCommands( LinearStream *commandStream, CommandQueue &commandQueue, DebugPauseState confirmationTrigger, DebugPauseState waitCondition, const HardwareInfo &hwInfo) { if (!commandQueue.isSpecial()) { auto address = commandQueue.getGpgpuCommandStreamReceiver().getDebugPauseStateGPUAddress(); { using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; const auto &hwInfo = commandQueue.getDevice().getHardwareInfo(); PipeControlArgs args; args.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo); MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( *commandStream, POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, address, static_cast(confirmationTrigger), hwInfo, args); } { using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT; using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION; EncodeSempahore::addMiSemaphoreWaitCommand(*commandStream, address, static_cast(waitCondition), COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD); } } } } // namespace NEO compute-runtime-22.14.22890/opencl/source/command_queue/hardware_interface_bdw_and_later.inl000066400000000000000000000104411422164147700321420ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/engine_node_helper.h" #include "shared/source/os_interface/os_context.h" #include "opencl/source/command_queue/hardware_interface_base.inl" namespace NEO { template inline void HardwareInterface::getDefaultDshSpace( const size_t &offsetInterfaceDescriptorTable, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, size_t &totalInterfaceDescriptorTableSize, IndirectHeap *dsh, LinearStream *commandStream) { size_t numDispatches = multiDispatchInfo.size(); totalInterfaceDescriptorTableSize *= numDispatches; dsh->getSpace(totalInterfaceDescriptorTableSize); } template inline void HardwareInterface::dispatchWorkarounds( LinearStream *commandStream, CommandQueue &commandQueue, Kernel &kernel, const bool &enable) { if (enable) { PreemptionHelper::applyPreemptionWaCmdsBegin(commandStream, commandQueue.getDevice()); // Implement enabling special WA DisableLSQCROPERFforOCL if needed GpgpuWalkerHelper::applyWADisableLSQCROPERFforOCL(commandStream, kernel, enable); } else { // Implement disabling special WA DisableLSQCROPERFforOCL if needed GpgpuWalkerHelper::applyWADisableLSQCROPERFforOCL(commandStream, kernel, enable); PreemptionHelper::applyPreemptionWaCmdsEnd(commandStream, commandQueue.getDevice()); } } template inline void HardwareInterface::programWalker( LinearStream &commandStream, Kernel &kernel, CommandQueue &commandQueue, TimestampPacketContainer *currentTimestampPacketNodes, IndirectHeap &dsh, IndirectHeap &ioh, IndirectHeap &ssh, size_t globalWorkSizes[3], size_t localWorkSizes[3], PreemptionMode preemptionMode, size_t currentDispatchIndex, uint32_t &interfaceDescriptorIndex, const DispatchInfo &dispatchInfo, size_t offsetInterfaceDescriptorTable, const Vec3 &numberOfWorkgroups, const Vec3 &startOfWorkgroups) { auto walkerCmdBuf = allocateWalkerSpace(commandStream, kernel); WALKER_TYPE walkerCmd = GfxFamily::cmdInitGpgpuWalker; uint32_t dim = dispatchInfo.getDim(); uint32_t simd = kernel.getKernelInfo().getMaxSimdSize(); size_t globalOffsets[3] = {dispatchInfo.getOffset().x, dispatchInfo.getOffset().y, dispatchInfo.getOffset().z}; size_t startWorkGroups[3] = {startOfWorkgroups.x, startOfWorkgroups.y, startOfWorkgroups.z}; size_t numWorkGroups[3] = {numberOfWorkgroups.x, numberOfWorkgroups.y, numberOfWorkgroups.z}; if (currentTimestampPacketNodes && commandQueue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { auto timestampPacketNode = currentTimestampPacketNodes->peekNodes().at(currentDispatchIndex); GpgpuWalkerHelper::setupTimestampPacket(&commandStream, &walkerCmd, timestampPacketNode, commandQueue.getDevice().getRootDeviceEnvironment()); } auto isCcsUsed = EngineHelpers::isCcs(commandQueue.getGpgpuEngine().osContext->getEngineType()); auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(kernel); HardwareCommandsHelper::sendIndirectState( commandStream, dsh, ioh, ssh, kernel, kernel.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), simd, localWorkSizes, offsetInterfaceDescriptorTable, interfaceDescriptorIndex, preemptionMode, &walkerCmd, nullptr, true, commandQueue.getDevice()); GpgpuWalkerHelper::setGpgpuWalkerThreadData(&walkerCmd, kernel.getKernelInfo().kernelDescriptor, globalOffsets, startWorkGroups, numWorkGroups, localWorkSizes, simd, dim, false, false, 0u); EncodeDispatchKernel::encodeAdditionalWalkerFields(commandQueue.getDevice().getHardwareInfo(), walkerCmd, kernel.getExecutionType()); *walkerCmdBuf = walkerCmd; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/command_queue/hardware_interface_xehp_and_later.inl000066400000000000000000000150351422164147700323360ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_container/implicit_scaling.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/engine_node_helper.h" #include "shared/source/os_interface/os_context.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/utilities/tag_allocator.h" #include "opencl/source/command_queue/hardware_interface_base.inl" namespace NEO { template inline void HardwareInterface::getDefaultDshSpace( const size_t &offsetInterfaceDescriptorTable, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, size_t &totalInterfaceDescriptorTableSize, IndirectHeap *dsh, LinearStream *commandStream) { } template inline void HardwareInterface::dispatchWorkarounds( LinearStream *commandStream, CommandQueue &commandQueue, Kernel &kernel, const bool &enable) { } template inline void HardwareInterface::programWalker( LinearStream &commandStream, Kernel &kernel, CommandQueue &commandQueue, TimestampPacketContainer *currentTimestampPacketNodes, IndirectHeap &dsh, IndirectHeap &ioh, IndirectHeap &ssh, size_t globalWorkSizes[3], size_t localWorkSizes[3], PreemptionMode preemptionMode, size_t currentDispatchIndex, uint32_t &interfaceDescriptorIndex, const DispatchInfo &dispatchInfo, size_t offsetInterfaceDescriptorTable, const Vec3 &numberOfWorkgroups, const Vec3 &startOfWorkgroups) { using COMPUTE_WALKER = typename GfxFamily::COMPUTE_WALKER; COMPUTE_WALKER walkerCmd = GfxFamily::cmdInitGpgpuWalker; auto &kernelInfo = kernel.getKernelInfo(); uint32_t dim = dispatchInfo.getDim(); uint32_t simd = kernelInfo.getMaxSimdSize(); auto numChannels = kernelInfo.kernelDescriptor.kernelAttributes.numLocalIdChannels; size_t globalOffsets[3] = {dispatchInfo.getOffset().x, dispatchInfo.getOffset().y, dispatchInfo.getOffset().z}; size_t startWorkGroups[3] = {startOfWorkgroups.x, startOfWorkgroups.y, startOfWorkgroups.z}; size_t numWorkGroups[3] = {numberOfWorkgroups.x, numberOfWorkgroups.y, numberOfWorkgroups.z}; uint32_t requiredWalkOrder = 0u; bool localIdsGenerationByRuntime = EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired( numChannels, localWorkSizes, std::array{{kernelInfo.kernelDescriptor.kernelAttributes.workgroupWalkOrder[0], kernelInfo.kernelDescriptor.kernelAttributes.workgroupWalkOrder[1], kernelInfo.kernelDescriptor.kernelAttributes.workgroupWalkOrder[2]}}, kernelInfo.kernelDescriptor.kernelAttributes.flags.requiresWorkgroupWalkOrder, requiredWalkOrder, simd); bool inlineDataProgrammingRequired = HardwareCommandsHelper::inlineDataProgrammingRequired(kernel); auto idd = &walkerCmd.getInterfaceDescriptor(); auto &queueCsr = commandQueue.getGpgpuCommandStreamReceiver(); if (currentTimestampPacketNodes && queueCsr.peekTimestampPacketWriteEnabled()) { auto timestampPacket = currentTimestampPacketNodes->peekNodes().at(currentDispatchIndex); GpgpuWalkerHelper::setupTimestampPacket(&commandStream, &walkerCmd, timestampPacket, commandQueue.getDevice().getRootDeviceEnvironment()); } auto isCcsUsed = EngineHelpers::isCcs(commandQueue.getGpgpuEngine().osContext->getEngineType()); auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(kernel); const auto &hwInfo = commandQueue.getDevice().getHardwareInfo(); if (auto kernelAllocation = kernelInfo.getGraphicsAllocation()) { EncodeMemoryPrefetch::programMemoryPrefetch(commandStream, *kernelAllocation, kernelInfo.heapInfo.KernelHeapSize, 0, hwInfo); } HardwareCommandsHelper::sendIndirectState( commandStream, dsh, ioh, ssh, kernel, kernel.getKernelStartOffset(localIdsGenerationByRuntime, kernelUsesLocalIds, isCcsUsed), simd, localWorkSizes, offsetInterfaceDescriptorTable, interfaceDescriptorIndex, preemptionMode, &walkerCmd, idd, localIdsGenerationByRuntime, commandQueue.getDevice()); GpgpuWalkerHelper::setGpgpuWalkerThreadData(&walkerCmd, kernelInfo.kernelDescriptor, globalOffsets, startWorkGroups, numWorkGroups, localWorkSizes, simd, dim, localIdsGenerationByRuntime, inlineDataProgrammingRequired, requiredWalkOrder); EncodeDispatchKernel::encodeAdditionalWalkerFields(hwInfo, walkerCmd, kernel.getExecutionType()); auto devices = queueCsr.getOsContext().getDeviceBitfield(); auto partitionWalker = ImplicitScalingHelper::isImplicitScalingEnabled(devices, !kernel.isSingleSubdevicePreferred()); if (partitionWalker) { const uint64_t workPartitionAllocationGpuVa = commandQueue.getDevice().getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocationGpuAddress(); uint32_t partitionCount = 0u; ImplicitScalingDispatch::dispatchCommands(commandStream, walkerCmd, devices, partitionCount, false, false, kernel.usesImages(), workPartitionAllocationGpuVa, hwInfo); if (queueCsr.isStaticWorkPartitioningEnabled()) { queueCsr.setActivePartitions(std::max(queueCsr.getActivePartitions(), partitionCount)); } auto timestampPacket = currentTimestampPacketNodes->peekNodes().at(currentDispatchIndex); timestampPacket->setPacketsUsed(partitionCount); } else { auto computeWalkerOnStream = commandStream.getSpaceForCmd(); *computeWalkerOnStream = walkerCmd; } } } // namespace NEO compute-runtime-22.14.22890/opencl/source/command_queue/resource_barrier.cpp000066400000000000000000000027471422164147700270270ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/resource_barrier.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/source/utilities/range.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/cl_validators.h" namespace NEO { BarrierCommand::BarrierCommand(CommandQueue *commandQueue, const cl_resource_barrier_descriptor_intel *descriptors, uint32_t numDescriptors) : numSurfaces(numDescriptors) { for (auto description : CreateRange(descriptors, numDescriptors)) { GraphicsAllocation *allocation; if (description.mem_object) { MemObj *memObj = nullptr; WithCastToInternal(description.mem_object, &memObj); allocation = memObj->getGraphicsAllocation(commandQueue->getDevice().getRootDeviceIndex()); } else { auto svmData = commandQueue->getContext().getSVMAllocsManager()->getSVMAlloc(description.svm_allocation_pointer); UNRECOVERABLE_IF(svmData == nullptr); allocation = svmData->gpuAllocations.getGraphicsAllocation(commandQueue->getDevice().getRootDeviceIndex()); } surfaces.push_back(ResourceSurface(allocation, description.type, description.scope)); } for (auto it = surfaces.begin(), end = surfaces.end(); it != end; it++) { surfacePtrs.push_back(it); } } } // namespace NEO compute-runtime-22.14.22890/opencl/source/command_queue/resource_barrier.h000066400000000000000000000011021422164147700264540ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/extensions/public/cl_ext_private.h" #include "opencl/source/memory_manager/resource_surface.h" namespace NEO { class CommandQueue; class BarrierCommand { public: BarrierCommand(CommandQueue *commandQueue, const cl_resource_barrier_descriptor_intel *descriptors, uint32_t numDescriptors); ~BarrierCommand() {} uint32_t numSurfaces = 0; StackVec surfaces; StackVec surfacePtrs; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/compiler_interface/000077500000000000000000000000001422164147700237645ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/compiler_interface/CMakeLists.txt000066400000000000000000000011011422164147700265150ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_COMPILER_INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/default_cl_cache_config.cpp ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_COMPILER_INTERFACE} ${NEO_SHARED_DIRECTORY}/compiler_interface/oclc_extensions.cpp ${NEO_SHARED_DIRECTORY}/compiler_interface/oclc_extensions.h ) set_property(GLOBAL PROPERTY RUNTIME_SRCS_COMPILER_INTERFACE ${RUNTIME_SRCS_COMPILER_INTERFACE}) compute-runtime-22.14.22890/opencl/source/compiler_interface/default_cache_config.cpp000066400000000000000000000005531422164147700305670ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/compiler_interface/default_cache_config.h" #include "opencl/source/compiler_interface/default_cl_cache_config.h" namespace NEO { CompilerCacheConfig getDefaultCompilerCacheConfig() { return getDefaultClCompilerCacheConfig(); } } // namespace NEO compute-runtime-22.14.22890/opencl/source/compiler_interface/default_cl_cache_config.cpp000066400000000000000000000014321422164147700312420ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "default_cl_cache_config.h" #include "shared/source/utilities/debug_settings_reader.h" #include "opencl/source/os_interface/ocl_reg_path.h" #include "config.h" #include "os_inc.h" #include namespace NEO { CompilerCacheConfig getDefaultClCompilerCacheConfig() { CompilerCacheConfig ret; std::string keyName = oclRegPath; keyName += "cl_cache_dir"; std::unique_ptr settingsReader(SettingsReader::createOsReader(false, keyName)); ret.cacheDir = settingsReader->getSetting(settingsReader->appSpecificLocation(keyName), static_cast(CL_CACHE_LOCATION)); ret.cacheFileExtension = ".cl_cache"; return ret; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/compiler_interface/default_cl_cache_config.h000066400000000000000000000003611422164147700307070ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/compiler_interface/compiler_cache.h" namespace NEO { CompilerCacheConfig getDefaultClCompilerCacheConfig(); } compute-runtime-22.14.22890/opencl/source/context/000077500000000000000000000000001422164147700216165ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/context/CMakeLists.txt000066400000000000000000000011141422164147700243530ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_CONTEXT ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/context.cpp ${CMAKE_CURRENT_SOURCE_DIR}/context.h ${CMAKE_CURRENT_SOURCE_DIR}/context.inl ${CMAKE_CURRENT_SOURCE_DIR}/context_type.h ${CMAKE_CURRENT_SOURCE_DIR}/driver_diagnostics.cpp ${CMAKE_CURRENT_SOURCE_DIR}/driver_diagnostics.h ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_CONTEXT}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_CONTEXT ${RUNTIME_SRCS_CONTEXT}) compute-runtime-22.14.22890/opencl/source/context/context.cpp000066400000000000000000000415171422164147700240160ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/context/context.h" #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/get_info.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/helpers/string.h" #include "shared/source/memory_manager/deferred_deleter.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/execution_environment/cl_execution_environment.h" #include "opencl/source/gtpin/gtpin_notify.h" #include "opencl/source/helpers/get_info_status_mapper.h" #include "opencl/source/helpers/surface_formats.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/platform/platform.h" #include "opencl/source/sharings/sharing.h" #include "opencl/source/sharings/sharing_factory.h" #include "d3d_sharing_functions.h" #include #include namespace NEO { Context::Context( void(CL_CALLBACK *funcNotify)(const char *, const void *, size_t, void *), void *data) { contextCallback = funcNotify; userData = data; sharingFunctions.resize(SharingType::MAX_SHARING_VALUE); } Context::~Context() { delete[] properties; for (auto rootDeviceIndex = 0u; rootDeviceIndex < specialQueues.size(); rootDeviceIndex++) { if (specialQueues[rootDeviceIndex]) { delete specialQueues[rootDeviceIndex]; } } if (svmAllocsManager) { delete svmAllocsManager; } if (driverDiagnostics) { delete driverDiagnostics; } if (memoryManager && memoryManager->isAsyncDeleterEnabled()) { memoryManager->getDeferredDeleter()->removeClient(); } gtpinNotifyContextDestroy((cl_context)this); destructorCallbacks.invoke(this); for (auto &device : devices) { device->decRefInternal(); } } cl_int Context::setDestructorCallback(void(CL_CALLBACK *funcNotify)(cl_context, void *), void *userData) { std::unique_lock theLock(mtx); destructorCallbacks.add(funcNotify, userData); return CL_SUCCESS; } cl_int Context::tryGetExistingHostPtrAllocation(const void *ptr, size_t size, uint32_t rootDeviceIndex, GraphicsAllocation *&allocation, InternalMemoryType &memoryType, bool &isCpuCopyAllowed) { cl_int retVal = tryGetExistingSvmAllocation(ptr, size, rootDeviceIndex, allocation, memoryType, isCpuCopyAllowed); if (retVal != CL_SUCCESS || allocation != nullptr) { return retVal; } retVal = tryGetExistingMapAllocation(ptr, size, allocation); return retVal; } cl_int Context::tryGetExistingSvmAllocation(const void *ptr, size_t size, uint32_t rootDeviceIndex, GraphicsAllocation *&allocation, InternalMemoryType &memoryType, bool &isCpuCopyAllowed) { if (getSVMAllocsManager()) { SvmAllocationData *svmEntry = getSVMAllocsManager()->getSVMAlloc(ptr); if (svmEntry) { memoryType = svmEntry->memoryType; if ((svmEntry->gpuAllocations.getGraphicsAllocation(rootDeviceIndex)->getGpuAddress() + svmEntry->size) < (castToUint64(ptr) + size)) { return CL_INVALID_OPERATION; } allocation = svmEntry->cpuAllocation ? svmEntry->cpuAllocation : svmEntry->gpuAllocations.getGraphicsAllocation(rootDeviceIndex); if (isCpuCopyAllowed) { if (svmEntry->memoryType == DEVICE_UNIFIED_MEMORY) { isCpuCopyAllowed = false; } } } } return CL_SUCCESS; } cl_int Context::tryGetExistingMapAllocation(const void *ptr, size_t size, GraphicsAllocation *&allocation) { if (MapInfo mapInfo = {}; mapOperationsStorage.getInfoForHostPtr(ptr, size, mapInfo)) { if (mapInfo.graphicsAllocation) { allocation = mapInfo.graphicsAllocation; } } return CL_SUCCESS; } const std::set &Context::getRootDeviceIndices() const { return rootDeviceIndices; } uint32_t Context::getMaxRootDeviceIndex() const { return maxRootDeviceIndex; } CommandQueue *Context::getSpecialQueue(uint32_t rootDeviceIndex) { return specialQueues[rootDeviceIndex]; } void Context::setSpecialQueue(CommandQueue *commandQueue, uint32_t rootDeviceIndex) { specialQueues[rootDeviceIndex] = commandQueue; } void Context::overrideSpecialQueueAndDecrementRefCount(CommandQueue *commandQueue, uint32_t rootDeviceIndex) { setSpecialQueue(commandQueue, rootDeviceIndex); commandQueue->setIsSpecialCommandQueue(true); //decrement ref count that special queue added this->decRefInternal(); }; bool Context::areMultiStorageAllocationsPreferred() { return this->contextType != ContextType::CONTEXT_TYPE_SPECIALIZED; } bool Context::createImpl(const cl_context_properties *properties, const ClDeviceVector &inputDevices, void(CL_CALLBACK *funcNotify)(const char *, const void *, size_t, void *), void *data, cl_int &errcodeRet) { auto propertiesCurrent = properties; bool interopUserSync = false; int32_t driverDiagnosticsUsed = -1; auto sharingBuilder = sharingFactory.build(); std::unique_ptr driverDiagnostics; while (propertiesCurrent && *propertiesCurrent) { errcodeRet = CL_SUCCESS; auto propertyType = propertiesCurrent[0]; auto propertyValue = propertiesCurrent[1]; propertiesCurrent += 2; switch (propertyType) { case CL_CONTEXT_PLATFORM: break; case CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL: driverDiagnosticsUsed = static_cast(propertyValue); break; case CL_CONTEXT_INTEROP_USER_SYNC: interopUserSync = propertyValue > 0; break; default: if (!sharingBuilder->processProperties(propertyType, propertyValue)) { errcodeRet = CL_INVALID_PROPERTY; return false; } break; } } auto numProperties = ptrDiff(propertiesCurrent, properties) / sizeof(cl_context_properties); cl_context_properties *propertiesNew = nullptr; // copy the user properties if there are any if (numProperties) { propertiesNew = new cl_context_properties[numProperties + 1]; memcpy_s(propertiesNew, (numProperties + 1) * sizeof(cl_context_properties), properties, numProperties * sizeof(cl_context_properties)); propertiesNew[numProperties] = 0; numProperties++; } if (DebugManager.flags.PrintDriverDiagnostics.get() != -1) { driverDiagnosticsUsed = DebugManager.flags.PrintDriverDiagnostics.get(); } if (driverDiagnosticsUsed >= 0) { driverDiagnostics.reset(new DriverDiagnostics((cl_diagnostics_verbose_level)driverDiagnosticsUsed)); } this->numProperties = numProperties; this->properties = propertiesNew; this->setInteropUserSyncEnabled(interopUserSync); if (!sharingBuilder->finalizeProperties(*this, errcodeRet)) { return false; } bool containsDeviceWithSubdevices = false; for (const auto &device : inputDevices) { rootDeviceIndices.insert(device->getRootDeviceIndex()); containsDeviceWithSubdevices |= device->getNumGenericSubDevices() > 1; } this->driverDiagnostics = driverDiagnostics.release(); if (rootDeviceIndices.size() > 1 && containsDeviceWithSubdevices && !DebugManager.flags.EnableMultiRootDeviceContexts.get()) { DEBUG_BREAK_IF("No support for context with multiple devices with subdevices"); errcodeRet = CL_OUT_OF_HOST_MEMORY; return false; } devices = inputDevices; for (auto &rootDeviceIndex : rootDeviceIndices) { DeviceBitfield deviceBitfield{}; for (const auto &pDevice : devices) { if (pDevice->getRootDeviceIndex() == rootDeviceIndex) { deviceBitfield |= pDevice->getDeviceBitfield(); } } deviceBitfields.insert({rootDeviceIndex, deviceBitfield}); } if (devices.size() > 0) { maxRootDeviceIndex = *std::max_element(rootDeviceIndices.begin(), rootDeviceIndices.end(), std::less()); specialQueues.resize(maxRootDeviceIndex + 1u); auto device = this->getDevice(0); this->memoryManager = device->getMemoryManager(); if (memoryManager->isAsyncDeleterEnabled()) { memoryManager->getDeferredDeleter()->addClient(); } bool anySvmSupport = false; for (auto &device : devices) { device->incRefInternal(); anySvmSupport |= device->getHardwareInfo().capabilityTable.ftrSvm; } setupContextType(); if (anySvmSupport) { this->svmAllocsManager = new SVMAllocsManager(this->memoryManager, this->areMultiStorageAllocationsPreferred()); } } for (auto &device : devices) { if (!specialQueues[device->getRootDeviceIndex()]) { auto commandQueue = CommandQueue::create(this, device, nullptr, true, errcodeRet); // NOLINT DEBUG_BREAK_IF(commandQueue == nullptr); overrideSpecialQueueAndDecrementRefCount(commandQueue, device->getRootDeviceIndex()); } } return true; } cl_int Context::getInfo(cl_context_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { cl_int retVal; size_t valueSize = GetInfo::invalidSourceSize; const void *pValue = nullptr; cl_uint numDevices; cl_uint refCount = 0; std::vector devIDs; auto callGetinfo = true; switch (paramName) { case CL_CONTEXT_DEVICES: valueSize = devices.size() * sizeof(cl_device_id); devices.toDeviceIDs(devIDs); pValue = devIDs.data(); break; case CL_CONTEXT_NUM_DEVICES: numDevices = (cl_uint)(devices.size()); valueSize = sizeof(numDevices); pValue = &numDevices; break; case CL_CONTEXT_PROPERTIES: valueSize = this->numProperties * sizeof(cl_context_properties); pValue = this->properties; if (valueSize == 0) { callGetinfo = false; } break; case CL_CONTEXT_REFERENCE_COUNT: refCount = static_cast(this->getReference()); valueSize = sizeof(refCount); pValue = &refCount; break; default: pValue = getOsContextInfo(paramName, &valueSize); break; } GetInfoStatus getInfoStatus = GetInfoStatus::SUCCESS; if (callGetinfo) { getInfoStatus = GetInfo::getInfo(paramValue, paramValueSize, pValue, valueSize); } retVal = changeGetInfoStatusToCLResultType(getInfoStatus); GetInfo::setParamValueReturnSize(paramValueSizeRet, valueSize, getInfoStatus); return retVal; } size_t Context::getNumDevices() const { return devices.size(); } bool Context::containsMultipleSubDevices(uint32_t rootDeviceIndex) const { return deviceBitfields.at(rootDeviceIndex).count() > 1; } ClDevice *Context::getDevice(size_t deviceOrdinal) const { return (ClDevice *)devices[deviceOrdinal]; } cl_int Context::getSupportedImageFormats( Device *device, cl_mem_flags flags, cl_mem_object_type imageType, cl_uint numEntries, cl_image_format *imageFormats, cl_uint *numImageFormatsReturned) { size_t numImageFormats = 0; const bool nv12ExtensionEnabled = device->getSpecializedDevice()->getDeviceInfo().nv12Extension; const bool packedYuvExtensionEnabled = device->getSpecializedDevice()->getDeviceInfo().packedYuvExtension; auto appendImageFormats = [&](ArrayRef formats) { if (imageFormats) { size_t offset = numImageFormats; for (size_t i = 0; i < formats.size() && offset < numEntries; ++i) { imageFormats[offset++] = formats[i].OCLImageFormat; } } numImageFormats += formats.size(); }; if (flags & CL_MEM_READ_ONLY) { if (this->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features) { appendImageFormats(SurfaceFormats::readOnly20()); } else { appendImageFormats(SurfaceFormats::readOnly12()); } if (Image::isImage2d(imageType) && nv12ExtensionEnabled) { appendImageFormats(SurfaceFormats::planarYuv()); } if (Image::isImage2dOr2dArray(imageType)) { appendImageFormats(SurfaceFormats::readOnlyDepth()); } if (Image::isImage2d(imageType) && packedYuvExtensionEnabled) { appendImageFormats(SurfaceFormats::packedYuv()); } } else if (flags & CL_MEM_WRITE_ONLY) { appendImageFormats(SurfaceFormats::writeOnly()); if (Image::isImage2dOr2dArray(imageType)) { appendImageFormats(SurfaceFormats::readWriteDepth()); } } else if (nv12ExtensionEnabled && (flags & CL_MEM_NO_ACCESS_INTEL)) { if (this->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features) { appendImageFormats(SurfaceFormats::readOnly20()); } else { appendImageFormats(SurfaceFormats::readOnly12()); } if (Image::isImage2d(imageType)) { appendImageFormats(SurfaceFormats::planarYuv()); } } else { appendImageFormats(SurfaceFormats::readWrite()); if (Image::isImage2dOr2dArray(imageType)) { appendImageFormats(SurfaceFormats::readWriteDepth()); } } if (numImageFormatsReturned) { *numImageFormatsReturned = static_cast(numImageFormats); } return CL_SUCCESS; } bool Context::isDeviceAssociated(const ClDevice &clDevice) const { for (const auto &pDevice : devices) { if (pDevice == &clDevice) { return true; } } return false; } ClDevice *Context::getSubDeviceByIndex(uint32_t subDeviceIndex) const { auto isExpectedSubDevice = [subDeviceIndex](ClDevice *pClDevice) -> bool { bool isSubDevice = (pClDevice->getDeviceInfo().parentDevice != nullptr); if (isSubDevice == false) { return false; } auto &subDevice = static_cast(pClDevice->getDevice()); return (subDevice.getSubDeviceIndex() == subDeviceIndex); }; auto foundDeviceIterator = std::find_if(devices.begin(), devices.end(), isExpectedSubDevice); return (foundDeviceIterator != devices.end() ? *foundDeviceIterator : nullptr); } AsyncEventsHandler &Context::getAsyncEventsHandler() const { return *static_cast(devices[0]->getExecutionEnvironment())->getAsyncEventsHandler(); } DeviceBitfield Context::getDeviceBitfieldForAllocation(uint32_t rootDeviceIndex) const { return deviceBitfields.at(rootDeviceIndex); } void Context::setupContextType() { if (contextType == ContextType::CONTEXT_TYPE_DEFAULT) { if (devices.size() > 1) { for (const auto &pDevice : devices) { if (!pDevice->getDeviceInfo().parentDevice) { contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE; return; } } } if (devices[0]->getDeviceInfo().parentDevice) { contextType = ContextType::CONTEXT_TYPE_SPECIALIZED; } } } Platform *Context::getPlatformFromProperties(const cl_context_properties *properties, cl_int &errcode) { errcode = CL_SUCCESS; auto propertiesCurrent = properties; while (propertiesCurrent && *propertiesCurrent) { auto propertyType = propertiesCurrent[0]; auto propertyValue = propertiesCurrent[1]; propertiesCurrent += 2; if (CL_CONTEXT_PLATFORM == propertyType) { Platform *pPlatform = nullptr; errcode = validateObject(WithCastToInternal(reinterpret_cast(propertyValue), &pPlatform)); return pPlatform; } } return nullptr; } bool Context::isSingleDeviceContext() { return devices[0]->getNumGenericSubDevices() == 0 && getNumDevices() == 1; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/context/context.h000066400000000000000000000206441422164147700234610ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/common_types.h" #include "shared/source/helpers/string.h" #include "shared/source/helpers/vec.h" #include "shared/source/unified_memory/unified_memory.h" #include "opencl/source/cl_device/cl_device_vector.h" #include "opencl/source/context/context_type.h" #include "opencl/source/context/driver_diagnostics.h" #include "opencl/source/gtpin/gtpin_notify.h" #include "opencl/source/helpers/base_object.h" #include "opencl/source/helpers/destructor_callbacks.h" #include "opencl/source/mem_obj/map_operations_handler.h" #include #include #include namespace NEO { class AsyncEventsHandler; struct BuiltInKernel; class CommandQueue; class Device; class MemObj; class MemoryManager; class SharingFunctions; class SVMAllocsManager; class Program; class Platform; template <> struct OpenCLObjectMapper<_cl_context> { typedef class Context DerivedType; }; class Context : public BaseObject<_cl_context> { public: static const cl_ulong objectMagic = 0xA4234321DC002130LL; bool createImpl(const cl_context_properties *properties, const ClDeviceVector &devices, void(CL_CALLBACK *pfnNotify)(const char *, const void *, size_t, void *), void *userData, cl_int &errcodeRet); template static T *create(const cl_context_properties *properties, const ClDeviceVector &devices, void(CL_CALLBACK *funcNotify)(const char *, const void *, size_t, void *), void *data, cl_int &errcodeRet) { auto pContext = new T(funcNotify, data); if (!pContext->createImpl(properties, devices, funcNotify, data, errcodeRet)) { delete pContext; pContext = nullptr; } gtpinNotifyContextCreate(pContext); return pContext; } Context &operator=(const Context &) = delete; Context(const Context &) = delete; ~Context() override; cl_int setDestructorCallback(void(CL_CALLBACK *funcNotify)(cl_context, void *), void *userData); cl_int getInfo(cl_context_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); cl_int getSupportedImageFormats(Device *device, cl_mem_flags flags, cl_mem_object_type imageType, cl_uint numEntries, cl_image_format *imageFormats, cl_uint *numImageFormats); size_t getNumDevices() const; bool containsMultipleSubDevices(uint32_t rootDeviceIndex) const; ClDevice *getDevice(size_t deviceOrdinal) const; MemoryManager *getMemoryManager() const { return memoryManager; } SVMAllocsManager *getSVMAllocsManager() const { return svmAllocsManager; } auto &getMapOperationsStorage() { return mapOperationsStorage; } cl_int tryGetExistingHostPtrAllocation(const void *ptr, size_t size, uint32_t rootDeviceIndex, GraphicsAllocation *&allocation, InternalMemoryType &memoryType, bool &isCpuCopyAllowed); cl_int tryGetExistingSvmAllocation(const void *ptr, size_t size, uint32_t rootDeviceIndex, GraphicsAllocation *&allocation, InternalMemoryType &memoryType, bool &isCpuCopyAllowed); cl_int tryGetExistingMapAllocation(const void *ptr, size_t size, GraphicsAllocation *&allocation); const std::set &getRootDeviceIndices() const; uint32_t getMaxRootDeviceIndex() const; CommandQueue *getSpecialQueue(uint32_t rootDeviceIndex); void setSpecialQueue(CommandQueue *commandQueue, uint32_t rootDeviceIndex); void overrideSpecialQueueAndDecrementRefCount(CommandQueue *commandQueue, uint32_t rootDeviceIndex); template Sharing *getSharing(); template void registerSharing(Sharing *sharing); template void providePerformanceHint(cl_diagnostics_verbose_level flags, PerformanceHints performanceHint, Args &&...args) { DEBUG_BREAK_IF(contextCallback == nullptr); DEBUG_BREAK_IF(driverDiagnostics == nullptr); char hint[DriverDiagnostics::maxHintStringSize]; snprintf_s(hint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[performanceHint], std::forward(args)..., 0); if (driverDiagnostics->validFlags(flags)) { if (contextCallback) { contextCallback(hint, &flags, sizeof(flags), userData); } if (DebugManager.flags.PrintDriverDiagnostics.get() != -1) { printf("\n%s\n", hint); } } } template void providePerformanceHintForMemoryTransfer(cl_command_type commandType, bool transferRequired, Args &&...args) { cl_diagnostics_verbose_level verboseLevel = transferRequired ? CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL : CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL; PerformanceHints hint = driverDiagnostics->obtainHintForTransferOperation(commandType, transferRequired); providePerformanceHint(verboseLevel, hint, args...); } cl_bool isProvidingPerformanceHints() const { return driverDiagnostics != nullptr; } bool getInteropUserSyncEnabled() { return interopUserSync; } void setInteropUserSyncEnabled(bool enabled) { interopUserSync = enabled; } bool areMultiStorageAllocationsPreferred(); bool isSingleDeviceContext(); ContextType peekContextType() const { return contextType; } bool isDeviceAssociated(const ClDevice &clDevice) const; ClDevice *getSubDeviceByIndex(uint32_t subDeviceIndex) const; AsyncEventsHandler &getAsyncEventsHandler() const; DeviceBitfield getDeviceBitfieldForAllocation(uint32_t rootDeviceIndex) const; bool getResolvesRequiredInKernels() const { return resolvesRequiredInKernels; } void setResolvesRequiredInKernels(bool resolves) { resolvesRequiredInKernels = resolves; } const ClDeviceVector &getDevices() const { return devices; } const std::map &getDeviceBitfields() const { return deviceBitfields; }; static Platform *getPlatformFromProperties(const cl_context_properties *properties, cl_int &errcode); protected: struct BuiltInKernel { const char *pSource = nullptr; Program *pProgram = nullptr; std::once_flag programIsInitialized; // guard for creating+building the program Kernel *pKernel = nullptr; BuiltInKernel() { } }; Context(void(CL_CALLBACK *pfnNotify)(const char *, const void *, size_t, void *) = nullptr, void *userData = nullptr); // OS specific implementation void *getOsContextInfo(cl_context_info ¶mName, size_t *srcParamSize); void setupContextType(); std::set rootDeviceIndices = {}; std::map deviceBitfields; std::vector> sharingFunctions; ClDeviceVector devices; ContextDestructorCallbacks destructorCallbacks; const cl_context_properties *properties = nullptr; size_t numProperties = 0u; void(CL_CALLBACK *contextCallback)(const char *, const void *, size_t, void *) = nullptr; void *userData = nullptr; MemoryManager *memoryManager = nullptr; SVMAllocsManager *svmAllocsManager = nullptr; MapOperationsStorage mapOperationsStorage = {}; StackVec specialQueues; DriverDiagnostics *driverDiagnostics = nullptr; uint32_t maxRootDeviceIndex = std::numeric_limits::max(); cl_bool preferD3dSharedResources = 0u; ContextType contextType = ContextType::CONTEXT_TYPE_DEFAULT; bool interopUserSync = false; bool resolvesRequiredInKernels = false; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/context/context.inl000066400000000000000000000011061422164147700240040ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/context/context.h" namespace NEO { template void Context::registerSharing(Sharing *sharing) { UNRECOVERABLE_IF(!sharing); this->sharingFunctions[Sharing::sharingId].reset(sharing); } template Sharing *Context::getSharing() { if (Sharing::sharingId >= sharingFunctions.size()) { return nullptr; } return reinterpret_cast(sharingFunctions[Sharing::sharingId].get()); } } // namespace NEO compute-runtime-22.14.22890/opencl/source/context/context_type.h000066400000000000000000000004311422164147700245120ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include namespace NEO { enum ContextType : uint32_t { CONTEXT_TYPE_DEFAULT, CONTEXT_TYPE_SPECIALIZED, CONTEXT_TYPE_UNRESTRICTIVE }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/context/driver_diagnostics.cpp000066400000000000000000000330751422164147700262140ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "driver_diagnostics.h" #include "shared/source/helpers/debug_helpers.h" namespace NEO { DriverDiagnostics::DriverDiagnostics(cl_diagnostics_verbose_level level) { this->verboseLevel = level; } bool DriverDiagnostics::validFlags(cl_diagnostics_verbose_level flags) const { return !!(verboseLevel & flags); } const char *const DriverDiagnostics::hintFormat[] = { "Performance hint: clCreateBuffer with pointer %p and size %u doesn't meet alignment restrictions. Size should be aligned to %u bytes and pointer should be aligned to %u. Buffer is not sharing the same physical memory with CPU.", //CL_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS "Performance hint: clCreateBuffer with pointer %p and size %u meets alignment restrictions and buffer will share the same physical memory with CPU.", //CL_BUFFER_MEETS_ALIGNMENT_RESTRICTIONS "Performance hint: clCreateBuffer needs to allocate memory for buffer. For subsequent operations the buffer will share the same physical memory with CPU.", //CL_BUFFER_NEEDS_ALLOCATE_MEMORY "Performance hint: clCreateImage with pointer %p meets alignment restrictions and image will share the same physical memory with CPU.", //CL_IMAGE_MEETS_ALIGNMENT_RESTRICTIONS "Performance hint: Driver calls internal clFlush on the command queue each time 1 command is enqueued.", //DRIVER_CALLS_INTERNAL_CL_FLUSH "Performance hint: Profiling adds overhead on all enqueue commands with events.", //PROFILING_ENABLED "Performance hint: Profiled kernels will be executed with disabled preemption.", //PROFILING_ENABLED_WITH_DISABLED_PREEMPTION "Performance hint: Subbuffer created from buffer %p shares the same memory with buffer.", //SUBBUFFER_SHARES_MEMORY "Performance hint: clSVMAlloc with pointer %p and size %u meets alignment restrictions.", //CL_SVM_ALLOC_MEETS_ALIGNMENT_RESTRICTIONS "Performance hint: clEnqueueReadBuffer call on a buffer %p with pointer %p will require driver to copy the data.Consider using clEnqueueMapBuffer with buffer that shares the same physical memory with CPU.", //CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA "Performance hint: clEnqueueReadBuffer call on a buffer %p with pointer %p will not require any data copy as the buffer shares the same physical memory with CPU.", //CL_ENQUEUE_READ_BUFFER_DOESNT_REQUIRE_COPY_DATA "Performance hint: Pointer %p and size %u passed to clEnqueueReadBuffer doesn't meet alignment restrictions. Size should be aligned to %u bytes and pointer should be aligned to %u. Driver needs to disable L3 caching.", //CL_ENQUEUE_READ_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS "Performance hint: clEnqueueReadBufferRect call on a buffer %p with pointer %p will require driver to copy the data.Consider using clEnqueueMapBuffer with buffer that shares the same physical memory with CPU.", //CL_ENQUEUE_READ_BUFFER_RECT_REQUIRES_COPY_DATA "Performance hint: clEnqueueReadBufferRect call on a buffer %p with pointer %p will not require any data copy as the buffer shares the same physical memory with CPU.", //CL_ENQUEUE_READ_BUFFER_RECT_DOESNT_REQUIRES_COPY_DATA "Performance hint: Pointer %p and size %u passed to clEnqueueReadBufferRect doesn't meet alignment restrictions. Size should be aligned to %u bytes and pointer should be aligned to %u. Driver needs to disable L3 caching.", //CL_ENQUEUE_READ_BUFFER_RECT_DOESNT_MEET_ALIGNMENT_RESTRICTIONS "Performance hint: clEnqueueWriteBuffer call on a buffer %p require driver to copy the data. Consider using clEnqueueMapBuffer with buffer that shares the same physical memory with CPU.", //CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA "Performance hint: clEnqueueWriteBuffer call on a buffer %p with pointer %p will not require any data copy as the buffer shares the same physical memory with CPU.", //CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA "Performance hint: clEnqueueWriteBufferRect call on a buffer %p require driver to copy the data. Consider using clEnqueueMapBuffer with buffer that shares the same physical memory with CPU.", //CL_ENQUEUE_WRITE_BUFFER_RECT_REQUIRES_COPY_DATA "Performance hint: clEnqueueWriteBufferRect call on a buffer %p will not require any data copy as the buffer shares the same physical memory with CPU.", //CL_ENQUEUE_WRITE_BUFFER_RECT_DOESNT_REQUIRE_COPY_DATA "Performance hint: Pointer %p and size %u passed to clEnqueueReadImage doesn't meet alignment restrictions. Size should be aligned to %u bytes and pointer should be aligned to %u. Driver needs to disable L3 caching.", //CL_ENQUEUE_READ_IMAGE_DOESNT_MEET_ALIGNMENT_RESTRICTIONS "Performance hint: clEnqueueReadImage call on an image %p will not require any data copy as the image shares the same physical memory with CPU.", //CL_ENQUEUE_READ_IMAGE_DOESNT_REQUIRES_COPY_DATA "Performance hint: clEnqueueWriteImage call on an image %p require driver to copy the data.", //CL_ENQUEUE_WRITE_IMAGE_REQUIRES_COPY_DATA "Performance hint: clEnqueueWriteImage call on an image %p will not require any data copy as the image shares the same physical memory with CPU.", //CL_ENQUEUE_WRITE_IMAGE_DOESNT_REQUIRES_COPY_DATA "Performance hint: clEnqueueMapBuffer call on a buffer %p will require driver to make a copy as buffer is not sharing the same physical memory with CPU.", //CL_ENQUEUE_MAP_BUFFER_REQUIRES_COPY_DATA "Performance hint: clEnqueueMapBuffer call on a buffer %p will not require any data copy as buffer shares the same physical memory with CPU.", //CL_ENQUEUE_MAP_BUFFER_DOESNT_REQUIRE_COPY_DATA "Performance hint: clEnqueueMapImage call on an image %p will require driver to make a copy, as image is not sharing the same physical memory with CPU.", //CL_ENQUEUE_MAP_IMAGE_REQUIRES_COPY_DATA "Performance hint: clEnqueueMapImage call on an image %p will not require any data copy as image shares the same physical memory with CPU.", //CL_ENQUEUE_MAP_IMAGE_DOESNT_REQUIRE_COPY_DATA "Performance hint: clEnqueueUnmapMemObject call with pointer %p will not require any data copy.", //CL_ENQUEUE_UNMAP_MEM_OBJ_DOESNT_REQUIRE_COPY_DATA "Performance hint: clEnqueueUnmapMemObject call with pointer %p will require driver to copy the data to memory object %p.", //CL_ENQUEUE_UNMAP_MEM_OBJ_REQUIRES_COPY_DATA "Performance hint: clEnqueueSVMMap call with pointer %p will not require any data copy.", //CL_ENQUEUE_SVM_MAP_DOESNT_REQUIRE_COPY_DATA "Performance hint: Printf detected in kernel %s, it may cause overhead.", //PRINTF_DETECTED_IN_KERNEL "Performance hint: Null local workgroup size detected ( kernel name: %s ); following sizes will be used for execution : { %u, %u, %u }.", //NULL_LOCAL_WORKGROUP_SIZE "Performance hint: Local workgroup sizes { %u, %u, %u } selected for this workload ( kernel name: %s ) may not be optimal, consider using following local workgroup size: { %u, %u, %u }.", //BAD_LOCAL_WORKGROUP_SIZE "Performance hint: Kernel %s register pressure is too high, spill fills will be generated, additional surface needs to be allocated of size %u, consider simplifying your kernel.", //REGISTER_PRESSURE_TOO_HIGH "Performance hint: Kernel %s private memory usage is too high and exhausts register space, additional surface needs to be allocated of size %u, consider reducing amount of private memory used, avoid using private memory arrays.", //PRIVATE_MEMORY_USAGE_TOO_HIGH "Performance hint: Kernel %s submission requires coherency with CPU; this will impact performance.", //KERNEL_REQUIRES_COHERENCY "Performance hint: Kernel %s requires aux translation on argument [%u] = \"%s\"", //KERNEL_ARGUMENT_AUX_TRANSLATION "Performance hint: Kernel %s requires aux translation for allocation with pointer %p and size %u", //KERNEL_ALLOCATION_AUX_TRANSLATION "Performance hint: Buffer %p will use compressed memory.", //BUFFER_IS_COMPRESSED "Performance hint: Buffer %p will not use compressed memory.", //BUFFER_IS_NOT_COMPRESSED "Performance hint: Image %p will use compressed memory.", //IMAGE_IS_COMPRESSED "Performance hint: Image %p will not use compressed memory."}; //IMAGE_IS_NOT_COMPRESSED PerformanceHints DriverDiagnostics::obtainHintForTransferOperation(cl_command_type commandType, bool transferRequired) { PerformanceHints hint; switch (commandType) { case CL_COMMAND_MAP_BUFFER: hint = transferRequired ? CL_ENQUEUE_MAP_BUFFER_REQUIRES_COPY_DATA : CL_ENQUEUE_MAP_BUFFER_DOESNT_REQUIRE_COPY_DATA; break; case CL_COMMAND_MAP_IMAGE: hint = transferRequired ? CL_ENQUEUE_MAP_IMAGE_REQUIRES_COPY_DATA : CL_ENQUEUE_MAP_IMAGE_DOESNT_REQUIRE_COPY_DATA; break; case CL_COMMAND_UNMAP_MEM_OBJECT: hint = transferRequired ? CL_ENQUEUE_UNMAP_MEM_OBJ_REQUIRES_COPY_DATA : CL_ENQUEUE_UNMAP_MEM_OBJ_DOESNT_REQUIRE_COPY_DATA; break; case CL_COMMAND_WRITE_BUFFER: hint = transferRequired ? CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA : CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA; break; case CL_COMMAND_READ_BUFFER: hint = transferRequired ? CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA : CL_ENQUEUE_READ_BUFFER_DOESNT_REQUIRE_COPY_DATA; break; case CL_COMMAND_WRITE_BUFFER_RECT: hint = transferRequired ? CL_ENQUEUE_WRITE_BUFFER_RECT_REQUIRES_COPY_DATA : CL_ENQUEUE_WRITE_BUFFER_RECT_DOESNT_REQUIRE_COPY_DATA; break; case CL_COMMAND_READ_BUFFER_RECT: hint = transferRequired ? CL_ENQUEUE_READ_BUFFER_RECT_REQUIRES_COPY_DATA : CL_ENQUEUE_READ_BUFFER_RECT_DOESNT_REQUIRES_COPY_DATA; break; case CL_COMMAND_WRITE_IMAGE: hint = transferRequired ? CL_ENQUEUE_WRITE_IMAGE_REQUIRES_COPY_DATA : CL_ENQUEUE_WRITE_IMAGE_DOESNT_REQUIRES_COPY_DATA; break; case CL_COMMAND_READ_IMAGE: UNRECOVERABLE_IF(transferRequired) hint = CL_ENQUEUE_READ_IMAGE_DOESNT_REQUIRES_COPY_DATA; break; default: UNRECOVERABLE_IF(true); } return hint; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/context/driver_diagnostics.h000066400000000000000000000047111422164147700256540ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "CL/cl_ext_intel.h" namespace NEO { enum PerformanceHints { CL_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS, CL_BUFFER_MEETS_ALIGNMENT_RESTRICTIONS, CL_BUFFER_NEEDS_ALLOCATE_MEMORY, CL_IMAGE_MEETS_ALIGNMENT_RESTRICTIONS, DRIVER_CALLS_INTERNAL_CL_FLUSH, PROFILING_ENABLED, PROFILING_ENABLED_WITH_DISABLED_PREEMPTION, SUBBUFFER_SHARES_MEMORY, CL_SVM_ALLOC_MEETS_ALIGNMENT_RESTRICTIONS, CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA, CL_ENQUEUE_READ_BUFFER_DOESNT_REQUIRE_COPY_DATA, CL_ENQUEUE_READ_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS, CL_ENQUEUE_READ_BUFFER_RECT_REQUIRES_COPY_DATA, CL_ENQUEUE_READ_BUFFER_RECT_DOESNT_REQUIRES_COPY_DATA, CL_ENQUEUE_READ_BUFFER_RECT_DOESNT_MEET_ALIGNMENT_RESTRICTIONS, CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA, CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA, CL_ENQUEUE_WRITE_BUFFER_RECT_REQUIRES_COPY_DATA, CL_ENQUEUE_WRITE_BUFFER_RECT_DOESNT_REQUIRE_COPY_DATA, CL_ENQUEUE_READ_IMAGE_DOESNT_MEET_ALIGNMENT_RESTRICTIONS, CL_ENQUEUE_READ_IMAGE_DOESNT_REQUIRES_COPY_DATA, CL_ENQUEUE_WRITE_IMAGE_REQUIRES_COPY_DATA, CL_ENQUEUE_WRITE_IMAGE_DOESNT_REQUIRES_COPY_DATA, CL_ENQUEUE_MAP_BUFFER_REQUIRES_COPY_DATA, CL_ENQUEUE_MAP_BUFFER_DOESNT_REQUIRE_COPY_DATA, CL_ENQUEUE_MAP_IMAGE_REQUIRES_COPY_DATA, CL_ENQUEUE_MAP_IMAGE_DOESNT_REQUIRE_COPY_DATA, CL_ENQUEUE_UNMAP_MEM_OBJ_DOESNT_REQUIRE_COPY_DATA, CL_ENQUEUE_UNMAP_MEM_OBJ_REQUIRES_COPY_DATA, CL_ENQUEUE_SVM_MAP_DOESNT_REQUIRE_COPY_DATA, PRINTF_DETECTED_IN_KERNEL, NULL_LOCAL_WORKGROUP_SIZE, BAD_LOCAL_WORKGROUP_SIZE, REGISTER_PRESSURE_TOO_HIGH, PRIVATE_MEMORY_USAGE_TOO_HIGH, KERNEL_REQUIRES_COHERENCY, KERNEL_ARGUMENT_AUX_TRANSLATION, KERNEL_ALLOCATION_AUX_TRANSLATION, BUFFER_IS_COMPRESSED, BUFFER_IS_NOT_COMPRESSED, IMAGE_IS_COMPRESSED, IMAGE_IS_NOT_COMPRESSED }; class DriverDiagnostics { public: DriverDiagnostics(cl_diagnostics_verbose_level level); bool validFlags(cl_diagnostics_verbose_level flags) const; ~DriverDiagnostics() = default; static const char *const hintFormat[]; static const cl_int maxHintStringSize = 1024; PerformanceHints obtainHintForTransferOperation(cl_command_type commandType, bool transferRequired); protected: cl_diagnostics_verbose_level verboseLevel; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/dll/000077500000000000000000000000001422164147700207055ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/dll/CMakeLists.txt000066400000000000000000000070221422164147700234460ustar00rootroot00000000000000# # Copyright (C) 2018-2022 Intel Corporation # # SPDX-License-Identifier: MIT # if(NOT DISABLED_GTPIN_SUPPORT) set(GTPIN_INIT_FILE "${NEO_SOURCE_DIR}/opencl/source/gtpin/gtpin_init.cpp") else() set(GTPIN_INIT_FILE "") endif() set(RUNTIME_SRCS_DLL_BASE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/command_queue_dll.cpp ${NEO_SHARED_DIRECTORY}/dll/create_deferred_deleter.cpp ${NEO_SHARED_DIRECTORY}/dll/create_memory_manager_${DRIVER_MODEL}.cpp ${NEO_SHARED_DIRECTORY}/dll/create_tbx_sockets.cpp ${NEO_SHARED_DIRECTORY}/dll/direct_submission_controller_enabled.cpp ${NEO_SHARED_DIRECTORY}/dll/source_level_debugger_dll.cpp ${NEO_SHARED_DIRECTORY}/dll/get_devices.cpp ${NEO_SHARED_DIRECTORY}/built_ins/sip_init.cpp ${NEO_SHARED_DIRECTORY}/dll/create_command_stream.cpp ${NEO_SHARED_DIRECTORY}/dll/options_dll.cpp ${NEO_SHARED_DIRECTORY}/gmm_helper/resource_info.cpp ${NEO_SHARED_DIRECTORY}/gmm_helper/page_table_mngr.cpp ${NEO_SHARED_DIRECTORY}/helpers/abort.cpp ${NEO_SHARED_DIRECTORY}/helpers/allow_deferred_deleter.cpp ${NEO_SHARED_DIRECTORY}/helpers/debug_helpers.cpp ${NEO_SHARED_DIRECTORY}/utilities/cpuintrinsics.cpp ${NEO_SHARED_DIRECTORY}/utilities/debug_settings_reader_creator.cpp ${NEO_SHARED_DIRECTORY}/utilities/io_functions.cpp ${NEO_SOURCE_DIR}/opencl/source/api/api.cpp ${NEO_SOURCE_DIR}/opencl/source/compiler_interface/default_cache_config.cpp ${NEO_SOURCE_DIR}/opencl/source/dll/debug_manager.cpp ${NEO_SOURCE_DIR}/opencl/source/helpers/api_specific_config_ocl.cpp ${GTPIN_INIT_FILE} ${HW_SRC_LINK} ${EXPORTS_FILENAME} ) append_sources_from_properties(RUNTIME_SRCS_DLL_BASE NEO_CORE_SRCS_LINK NEO_SRCS_ENABLE_CORE ) set(RUNTIME_SRCS_DLL_LINUX ${NEO_SHARED_DIRECTORY}/dll/linux/drm_neo_create.cpp ${NEO_SHARED_DIRECTORY}/dll/linux/options_linux.cpp ${NEO_SHARED_DIRECTORY}/dll/linux/os_interface.cpp ${NEO_SHARED_DIRECTORY}/dll/devices${BRANCH_DIR_SUFFIX}devices.inl ${NEO_SHARED_DIRECTORY}/dll/devices${BRANCH_DIR_SUFFIX}devices_additional.inl ${NEO_SHARED_DIRECTORY}/dll/devices/devices_base.inl ${NEO_SHARED_DIRECTORY}/os_interface/linux/sys_calls_linux.cpp ${NEO_SOURCE_DIR}/opencl/source/os_interface/linux/platform_teardown_linux.cpp ) set(RUNTIME_SRCS_DLL_WINDOWS ${NEO_SHARED_DIRECTORY}/dll/windows/options_windows.cpp ${NEO_SHARED_DIRECTORY}/dll/windows/os_interface.cpp ${NEO_SHARED_DIRECTORY}/dll/windows/environment_variables.cpp ${NEO_SHARED_DIRECTORY}/gmm_helper/windows/gmm_memory.cpp ${NEO_SHARED_DIRECTORY}/os_interface/windows/os_memory_virtual_alloc.cpp ${NEO_SHARED_DIRECTORY}/os_interface/windows/sys_calls.cpp ${NEO_SHARED_DIRECTORY}/os_interface/windows/wddm/wddm_calls.cpp ${NEO_SHARED_DIRECTORY}/os_interface/windows/wddm/wddm_create.cpp ${NEO_SOURCE_DIR}/opencl/source/os_interface/windows/platform_teardown_win.cpp ) if(NOT DISABLE_WDDM_LINUX) list(APPEND RUNTIME_SRCS_DLL_LINUX ${NEO_SHARED_DIRECTORY}/gmm_helper/windows/gmm_memory.cpp ${NEO_SHARED_DIRECTORY}/os_interface/windows/wddm/wddm_create.cpp ) endif() target_sources(${NEO_DYNAMIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_DLL_BASE}) if(WIN32) if(DEFINED NEO_DLL_RC_FILE) list(APPEND RUNTIME_SRCS_DLL_WINDOWS ${NEO_DLL_RC_FILE}) endif() target_sources(${NEO_DYNAMIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_DLL_WINDOWS}) else() target_sources(${NEO_DYNAMIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_DLL_LINUX}) endif() compute-runtime-22.14.22890/opencl/source/dll/command_queue_dll.cpp000066400000000000000000000010601422164147700250630ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" namespace NEO { bool CommandQueue::isAssignEngineRoundRobinEnabled() { auto assignEngineRoundRobin = false; if (DebugManager.flags.EnableCmdQRoundRobindEngineAssign.get() != -1) { assignEngineRoundRobin = DebugManager.flags.EnableCmdQRoundRobindEngineAssign.get(); } return assignEngineRoundRobin; } bool CommandQueue::isTimestampWaitEnabled() { return true; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/dll/debug_manager.cpp000066400000000000000000000004631422164147700241740ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "opencl/source/os_interface/ocl_reg_path.h" namespace NEO { DebugSettingsManager DebugManager(oclRegPath); } compute-runtime-22.14.22890/opencl/source/dll/linux/000077500000000000000000000000001422164147700220445ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/dll/linux/ocl_internal.exports000066400000000000000000000067101422164147700261470ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ { global: clGetExtensionFunctionAddress; clIcdGetPlatformIDsKHR; clGetPlatformInfo; clGetDeviceIDs; clGetDeviceInfo; clCreateContext; clCreateContextFromType; clRetainContext; clReleaseContext; clGetContextInfo; clSetContextDestructorCallback; clCreateCommandQueue; clRetainCommandQueue; clReleaseCommandQueue; clGetCommandQueueInfo; clSetCommandQueueProperty; clCreateBuffer; clCreateImage2D; clCreateImage3D; clRetainMemObject; clReleaseMemObject; clGetSupportedImageFormats; clGetMemObjectInfo; clGetImageInfo; clCreateSampler; clRetainSampler; clReleaseSampler; clGetSamplerInfo; clCreateProgramWithSource; clCreateProgramWithBinary; clRetainProgram; clReleaseProgram; clBuildProgram; clUnloadCompiler; clGetProgramInfo; clGetProgramBuildInfo; clCreateKernel; clCreateKernelsInProgram; clRetainKernel; clReleaseKernel; clSetKernelArg; clGetKernelInfo; clGetKernelWorkGroupInfo; clWaitForEvents; clGetEventInfo; clRetainEvent; clReleaseEvent; clGetEventProfilingInfo; clFlush; clFinish; clEnqueueReadBuffer; clEnqueueWriteBuffer; clEnqueueCopyBuffer; clEnqueueReadImage; clEnqueueWriteImage; clEnqueueCopyImage; clEnqueueCopyImageToBuffer; clEnqueueCopyBufferToImage; clEnqueueMapBuffer; clEnqueueMapImage; clEnqueueUnmapMemObject; clEnqueueNDRangeKernel; clEnqueueTask; clEnqueueNativeKernel; clEnqueueMarker; clEnqueueWaitForEvents; clEnqueueBarrier; clSetEventCallback; clCreateSubBuffer; clSetMemObjectDestructorCallback; clCreateUserEvent; clSetUserEventStatus; clEnqueueReadBufferRect; clEnqueueWriteBufferRect; clEnqueueCopyBufferRect; clCreateSubDevices; clRetainDevice; clReleaseDevice; clCreateImage; clCreateProgramWithBuiltInKernels; clCompileProgram; clLinkProgram; clUnloadPlatformCompiler; clGetKernelArgInfo; clEnqueueFillBuffer; clEnqueueFillImage; clEnqueueMigrateMemObjects; clEnqueueMarkerWithWaitList; clEnqueueBarrierWithWaitList; clGetExtensionFunctionAddressForPlatform; clCreateCommandQueueWithProperties; clCreatePipe; clGetPipeInfo; clSVMAlloc; clSVMFree; clEnqueueSVMFree; clEnqueueSVMMemcpy; clEnqueueSVMMemFill; clEnqueueSVMMap; clEnqueueSVMUnmap; clCreateSamplerWithProperties; clSetKernelArgSVMPointer; clSetKernelExecInfo; clGetKernelSubGroupInfoKHR; clCloneKernel; clCreateProgramWithIL; clEnqueueSVMMigrateMem; clGetDeviceAndHostTimer; clGetHostTimer; clGetKernelSubGroupInfo; clSetDefaultDeviceCommandQueue; clSetProgramReleaseCallback; clSetProgramSpecializationConstant; clCreateBufferWithProperties; clCreateImageWithProperties; clGetImageParamsINTEL; clCreatePerfCountersCommandQueueINTEL; clCreateAcceleratorINTEL; clGetAcceleratorInfoINTEL; clRetainAcceleratorINTEL; clReleaseAcceleratorINTEL; clSetPerformanceConfigurationINTEL; GTPin_Init; local: *; }; compute-runtime-22.14.22890/opencl/source/dll/linux/ocl_release.exports000066400000000000000000000003441422164147700257500ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ { global: clGetExtensionFunctionAddress; clIcdGetPlatformIDsKHR; clGetPlatformInfo; GTPin_Init; local: *; }; compute-runtime-22.14.22890/opencl/source/dll/windows/000077500000000000000000000000001422164147700223775ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/dll/windows/OpenCLInternalExports.def.in000066400000000000000000000052321422164147700276700ustar00rootroot00000000000000; ; Copyright (C) 2017-2021 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ; ${MSVC_DEF_HEADER} LIBRARY "${MSVC_DEF_LIB_NAME}" EXPORTS clGetPlatformIDs clGetPlatformInfo clGetDeviceIDs clGetDeviceInfo clCreateContext clCreateContextFromType clRetainContext clReleaseContext clGetContextInfo clSetContextDestructorCallback clCreateCommandQueue clRetainCommandQueue clReleaseCommandQueue clGetCommandQueueInfo clSetCommandQueueProperty clCreateBuffer clCreateImage2D clCreateImage3D clRetainMemObject clReleaseMemObject clGetSupportedImageFormats clGetMemObjectInfo clGetImageInfo clCreateSampler clRetainSampler clReleaseSampler clGetSamplerInfo clCreateProgramWithSource clCreateProgramWithBinary clRetainProgram clReleaseProgram clBuildProgram clUnloadCompiler clGetProgramInfo clGetProgramBuildInfo clCreateKernel clCreateKernelsInProgram clRetainKernel clReleaseKernel clSetKernelArg clGetKernelInfo clGetKernelWorkGroupInfo clWaitForEvents clGetEventInfo clRetainEvent clReleaseEvent clGetEventProfilingInfo clFlush clFinish clEnqueueReadBuffer clEnqueueWriteBuffer clEnqueueCopyBuffer clEnqueueReadImage clEnqueueWriteImage clEnqueueCopyImage clEnqueueCopyImageToBuffer clEnqueueCopyBufferToImage clEnqueueMapBuffer clEnqueueMapImage clEnqueueUnmapMemObject clEnqueueNDRangeKernel clEnqueueTask clEnqueueNativeKernel clEnqueueMarker clEnqueueWaitForEvents clEnqueueBarrier clGetExtensionFunctionAddress clSetEventCallback clCreateSubBuffer clSetMemObjectDestructorCallback clCreateUserEvent clSetUserEventStatus clEnqueueReadBufferRect clEnqueueWriteBufferRect clEnqueueCopyBufferRect clCreateSubDevices clRetainDevice clReleaseDevice clCreateImage clCreateProgramWithBuiltInKernels clCompileProgram clLinkProgram clUnloadPlatformCompiler clGetKernelArgInfo clEnqueueFillBuffer clEnqueueFillImage clEnqueueMigrateMemObjects clEnqueueMarkerWithWaitList clEnqueueBarrierWithWaitList clGetExtensionFunctionAddressForPlatform clCreateCommandQueueWithProperties clCreatePipe clGetPipeInfo clSVMAlloc clSVMFree clEnqueueSVMFree clEnqueueSVMMemcpy clEnqueueSVMMemFill clEnqueueSVMMap clEnqueueSVMUnmap clCreateSamplerWithProperties clSetKernelArgSVMPointer clSetKernelExecInfo clGetKernelSubGroupInfoKHR clCloneKernel clCreateProgramWithIL clEnqueueSVMMigrateMem clGetDeviceAndHostTimer clGetHostTimer clGetKernelSubGroupInfo clSetDefaultDeviceCommandQueue clSetProgramReleaseCallback clSetProgramSpecializationConstant clCreateBufferWithProperties clCreateImageWithProperties clGetImageParamsINTEL clCreatePerfCountersCommandQueueINTEL clCreateAcceleratorINTEL clGetAcceleratorInfoINTEL clRetainAcceleratorINTEL clReleaseAcceleratorINTEL clSetPerformanceConfigurationINTEL ${MSVC_DEF_ADDITIONAL_EXPORTS} compute-runtime-22.14.22890/opencl/source/dll/windows/OpenCLReleaseExports.def.in000066400000000000000000000003411422164147700274700ustar00rootroot00000000000000; ; Copyright (C) 2017-2021 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ; ${MSVC_DEF_HEADER} LIBRARY "${MSVC_DEF_LIB_NAME}" EXPORTS clGetPlatformInfo clGetExtensionFunctionAddress ${MSVC_DEF_ADDITIONAL_EXPORTS} compute-runtime-22.14.22890/opencl/source/enable_cores.cmake000066400000000000000000000051001422164147700235510ustar00rootroot00000000000000# # Copyright (C) 2018-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_COREX_CPP_BASE buffer cl_hw_helper command_queue gpgpu_walker hardware_commands_helper image sampler ) macro(macro_for_each_core_type) foreach(BRANCH_DIR ${BRANCH_DIR_LIST}) set(COREX_PREFIX ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR}${CORE_TYPE_LOWER}) # Add default CORE files if(EXISTS "${COREX_PREFIX}/additional_files_${CORE_TYPE_LOWER}.cmake") include("${COREX_PREFIX}/additional_files_${CORE_TYPE_LOWER}.cmake") endif() foreach(SRC_IT ${RUNTIME_SRCS_COREX_CPP_BASE}) if(EXISTS ${COREX_PREFIX}/${SRC_IT}_${CORE_TYPE_LOWER}.cpp) list(APPEND RUNTIME_SRCS_${CORE_TYPE}_CPP_BASE ${COREX_PREFIX}/${SRC_IT}_${CORE_TYPE_LOWER}.cpp) endif() endforeach() if(EXISTS ${COREX_PREFIX}/enable_family_full_ocl_${CORE_TYPE_LOWER}.cpp) list(APPEND ${CORE_TYPE}_SRC_LINK_BASE ${COREX_PREFIX}/enable_family_full_ocl_${CORE_TYPE_LOWER}.cpp) endif() if(NOT DISABLED_GTPIN_SUPPORT) if(EXISTS ${COREX_PREFIX}/gtpin_setup_${CORE_TYPE_LOWER}.cpp) list(APPEND ${CORE_TYPE}_SRC_LINK_BASE ${COREX_PREFIX}/gtpin_setup_${CORE_TYPE_LOWER}.cpp) endif() endif() list(APPEND RUNTIME_SRCS_COREX_ALL_BASE ${RUNTIME_SRCS_${CORE_TYPE}_H_BASE}) list(APPEND RUNTIME_SRCS_COREX_ALL_BASE ${RUNTIME_SRCS_${CORE_TYPE}_CPP_BASE}) list(APPEND HW_SRC_LINK ${${CORE_TYPE}_SRC_LINK_BASE}) list(APPEND RUNTIME_SRCS_COREX_ALL_WINDOWS ${RUNTIME_SRCS_${CORE_TYPE}_CPP_WINDOWS}) list(APPEND RUNTIME_SRCS_COREX_ALL_LINUX ${RUNTIME_SRCS_${CORE_TYPE}_CPP_LINUX}) if(UNIX) list(APPEND HW_SRC_LINK ${${CORE_TYPE}_SRC_LINK_LINUX}) endif() endforeach() endmacro() get_property(RUNTIME_SRCS_COREX_ALL_BASE GLOBAL PROPERTY RUNTIME_SRCS_COREX_ALL_BASE) get_property(RUNTIME_SRCS_COREX_ALL_LINUX GLOBAL PROPERTY RUNTIME_SRCS_COREX_ALL_LINUX) get_property(RUNTIME_SRCS_COREX_ALL_WINDOWS GLOBAL PROPERTY RUNTIME_SRCS_COREX_ALL_WINDOWS) apply_macro_for_each_core_type("SUPPORTED") target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_COREX_ALL_BASE}) if(WIN32) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_COREX_ALL_WINDOWS}) else() target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_COREX_ALL_LINUX}) endif() set_property(GLOBAL PROPERTY RUNTIME_SRCS_COREX_ALL_BASE ${RUNTIME_SRCS_COREX_ALL_BASE}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_COREX_ALL_LINUX ${RUNTIME_SRCS_COREX_ALL_LINUX}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_COREX_ALL_WINDOWS ${RUNTIME_SRCS_COREX_ALL_WINDOWS}) compute-runtime-22.14.22890/opencl/source/event/000077500000000000000000000000001422164147700212535ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/event/CMakeLists.txt000066400000000000000000000014101422164147700240070ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_EVENT ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/async_events_handler.h ${CMAKE_CURRENT_SOURCE_DIR}/async_events_handler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/event.cpp ${CMAKE_CURRENT_SOURCE_DIR}/event.h ${CMAKE_CURRENT_SOURCE_DIR}/event_builder.cpp ${CMAKE_CURRENT_SOURCE_DIR}/event_builder.h ${CMAKE_CURRENT_SOURCE_DIR}/event_tracker.cpp ${CMAKE_CURRENT_SOURCE_DIR}/event_tracker.h ${CMAKE_CURRENT_SOURCE_DIR}/user_event.cpp ${CMAKE_CURRENT_SOURCE_DIR}/user_event.h ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_EVENT}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_EVENT ${RUNTIME_SRCS_EVENT}) compute-runtime-22.14.22890/opencl/source/event/async_events_handler.cpp000066400000000000000000000064421422164147700261630ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/event/async_events_handler.h" #include "shared/source/command_stream/wait_status.h" #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/os_interface/os_thread.h" #include "opencl/source/event/event.h" #include namespace NEO { AsyncEventsHandler::AsyncEventsHandler() { allowAsyncProcess = false; registerList.reserve(64); list.reserve(64); pendingList.reserve(64); } AsyncEventsHandler::~AsyncEventsHandler() { closeThread(); } void AsyncEventsHandler::registerEvent(Event *event) { std::unique_lock lock(asyncMtx); //Create on first use openThread(); event->incRefInternal(); registerList.push_back(event); asyncCond.notify_one(); } Event *AsyncEventsHandler::processList() { uint32_t lowestTaskCount = CompletionStamp::notReady; Event *sleepCandidate = nullptr; pendingList.clear(); for (auto event : list) { event->updateExecutionStatus(); if (event->peekHasCallbacks() || (event->isExternallySynchronized() && (event->peekExecutionStatus() > CL_COMPLETE))) { pendingList.push_back(event); if (event->peekTaskCount() < lowestTaskCount) { sleepCandidate = event; lowestTaskCount = event->peekTaskCount(); } } else { event->decRefInternal(); } } list.swap(pendingList); return sleepCandidate; } void *AsyncEventsHandler::asyncProcess(void *arg) { auto self = reinterpret_cast(arg); std::unique_lock lock(self->asyncMtx, std::defer_lock); Event *sleepCandidate = nullptr; WaitStatus waitStatus{}; while (true) { lock.lock(); self->transferRegisterList(); if (!self->allowAsyncProcess) { self->processList(); self->releaseEvents(); break; } if (self->list.empty()) { self->asyncCond.wait(lock); } lock.unlock(); sleepCandidate = self->processList(); if (sleepCandidate) { waitStatus = sleepCandidate->wait(true, true); if (waitStatus == WaitStatus::GpuHang) { sleepCandidate->abortExecutionDueToGpuHang(); } } std::this_thread::yield(); } return nullptr; } void AsyncEventsHandler::closeThread() { std::unique_lock lock(asyncMtx); if (allowAsyncProcess) { allowAsyncProcess = false; asyncCond.notify_one(); lock.unlock(); thread.get()->join(); thread.reset(nullptr); } } void AsyncEventsHandler::openThread() { if (!thread.get()) { DEBUG_BREAK_IF(allowAsyncProcess); allowAsyncProcess = true; thread = Thread::create(asyncProcess, reinterpret_cast(this)); } } void AsyncEventsHandler::transferRegisterList() { std::move(registerList.begin(), registerList.end(), std::back_inserter(list)); registerList.clear(); } void AsyncEventsHandler::releaseEvents() { for (auto event : list) { event->decRefInternal(); } list.clear(); UNRECOVERABLE_IF(!registerList.empty()) // transferred before release } } // namespace NEO compute-runtime-22.14.22890/opencl/source/event/async_events_handler.h000066400000000000000000000015601422164147700256240ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include #include #include #include #include namespace NEO { class Event; class Thread; class AsyncEventsHandler { public: AsyncEventsHandler(); virtual ~AsyncEventsHandler(); void registerEvent(Event *event); void closeThread(); protected: Event *processList(); static void *asyncProcess(void *arg); void releaseEvents(); MOCKABLE_VIRTUAL void openThread(); MOCKABLE_VIRTUAL void transferRegisterList(); std::vector registerList; std::vector list; std::vector pendingList; std::unique_ptr thread; std::mutex asyncMtx; std::condition_variable asyncCond; std::atomic allowAsyncProcess; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/event/event.cpp000066400000000000000000000766211422164147700231140ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/event/event.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/device/device.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/get_info.h" #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/utilities/range.h" #include "shared/source/utilities/stackvec.h" #include "shared/source/utilities/tag_allocator.h" #include "opencl/extensions/public/cl_ext_private.h" #include "opencl/source/api/cl_types.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/event/async_events_handler.h" #include "opencl/source/event/event_tracker.h" #include "opencl/source/helpers/get_info_status_mapper.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/mem_obj/mem_obj.h" #include namespace NEO { Event::Event( Context *ctx, CommandQueue *cmdQueue, cl_command_type cmdType, uint32_t taskLevel, uint32_t taskCount) : taskLevel(taskLevel), currentCmdQVirtualEvent(false), cmdToSubmit(nullptr), submittedCmd(nullptr), ctx(ctx), cmdQueue(cmdQueue), cmdType(cmdType), dataCalculated(false), taskCount(taskCount) { if (NEO::DebugManager.flags.EventsTrackerEnable.get()) { EventsTracker::getEventsTracker().notifyCreation(this); } parentCount = 0; executionStatus = CL_QUEUED; flushStamp.reset(new FlushStampTracker(true)); DBG_LOG(EventsDebugEnable, "Event()", this); // Event can live longer than command queue that created it, // hence command queue refCount must be incremented // non-null command queue is only passed when Base Event object is created // any other Event types must increment refcount when setting command queue if (cmdQueue != nullptr) { cmdQueue->incRefInternal(); } if ((this->ctx == nullptr) && (cmdQueue != nullptr)) { this->ctx = &cmdQueue->getContext(); if (cmdQueue->getTimestampPacketContainer()) { timestampPacketContainer = std::make_unique(); } } if (this->ctx != nullptr) { this->ctx->incRefInternal(); } queueTimeStamp = {0, 0}; submitTimeStamp = {0, 0}; startTimeStamp = 0; endTimeStamp = 0; completeTimeStamp = 0; profilingEnabled = !isUserEvent() && (cmdQueue ? cmdQueue->getCommandQueueProperties() & CL_QUEUE_PROFILING_ENABLE : false); profilingCpuPath = ((cmdType == CL_COMMAND_MAP_BUFFER) || (cmdType == CL_COMMAND_MAP_IMAGE)) && profilingEnabled; perfCountersEnabled = cmdQueue ? cmdQueue->isPerfCountersEnabled() : false; } Event::Event( CommandQueue *cmdQueue, cl_command_type cmdType, uint32_t taskLevel, uint32_t taskCount) : Event(nullptr, cmdQueue, cmdType, taskLevel, taskCount) { } Event::~Event() { if (NEO::DebugManager.flags.EventsTrackerEnable.get()) { EventsTracker::getEventsTracker().notifyDestruction(this); } DBG_LOG(EventsDebugEnable, "~Event()", this); // no commands should be registred DEBUG_BREAK_IF(this->cmdToSubmit.load()); submitCommand(true); int32_t lastStatus = executionStatus; if (isStatusCompleted(lastStatus) == false) { transitionExecutionStatus(-1); DEBUG_BREAK_IF(peekHasCallbacks() || peekHasChildEvents()); } // Note from OCL spec: // "All callbacks registered for an event object must be called. // All enqueued callbacks shall be called before the event object is destroyed." if (peekHasCallbacks()) { executeCallbacks(lastStatus); } { // clean-up submitted command if needed std::unique_ptr submittedCommand(submittedCmd.exchange(nullptr)); } if (cmdQueue != nullptr) { if (timeStampNode != nullptr) { timeStampNode->returnTag(); } if (perfCounterNode != nullptr) { cmdQueue->getPerfCounters()->deleteQuery(perfCounterNode->getQueryHandleRef()); perfCounterNode->getQueryHandleRef() = {}; perfCounterNode->returnTag(); } cmdQueue->decRefInternal(); } if (ctx != nullptr) { ctx->decRefInternal(); } // in case event did not unblock child events before unblockEventsBlockedByThis(executionStatus); } cl_int Event::getEventProfilingInfo(cl_profiling_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { cl_int retVal; const void *src = nullptr; size_t srcSize = GetInfo::invalidSourceSize; // CL_PROFILING_INFO_NOT_AVAILABLE if event refers to the clEnqueueSVMFree command if (isUserEvent() != CL_FALSE || // or is a user event object. !updateStatusAndCheckCompletion() || // if the execution status of the command identified by event is not CL_COMPLETE !profilingEnabled) // the CL_QUEUE_PROFILING_ENABLE flag is not set for the command-queue, { return CL_PROFILING_INFO_NOT_AVAILABLE; } uint64_t timestamp = 0u; // if paramValue is NULL, it is ignored switch (paramName) { case CL_PROFILING_COMMAND_QUEUED: timestamp = getTimeInNSFromTimestampData(queueTimeStamp); src = ×tamp; srcSize = sizeof(cl_ulong); break; case CL_PROFILING_COMMAND_SUBMIT: calculateSubmitTimestampData(); timestamp = getTimeInNSFromTimestampData(submitTimeStamp); src = ×tamp; srcSize = sizeof(cl_ulong); break; case CL_PROFILING_COMMAND_START: calcProfilingData(); src = &startTimeStamp; srcSize = sizeof(cl_ulong); break; case CL_PROFILING_COMMAND_END: calcProfilingData(); src = &endTimeStamp; srcSize = sizeof(cl_ulong); break; case CL_PROFILING_COMMAND_COMPLETE: calcProfilingData(); src = &completeTimeStamp; srcSize = sizeof(cl_ulong); break; case CL_PROFILING_COMMAND_PERFCOUNTERS_INTEL: if (!perfCountersEnabled) { return CL_INVALID_VALUE; } if (!cmdQueue->getPerfCounters()->getApiReport(perfCounterNode, paramValueSize, paramValue, paramValueSizeRet, updateStatusAndCheckCompletion())) { return CL_PROFILING_INFO_NOT_AVAILABLE; } return CL_SUCCESS; default: return CL_INVALID_VALUE; } auto getInfoStatus = GetInfo::getInfo(paramValue, paramValueSize, src, srcSize); retVal = changeGetInfoStatusToCLResultType(getInfoStatus); GetInfo::setParamValueReturnSize(paramValueSizeRet, srcSize, getInfoStatus); return retVal; } // namespace NEO void Event::setupBcs(aub_stream::EngineType bcsEngineType) { DEBUG_BREAK_IF(!EngineHelpers::isBcs(bcsEngineType)); this->bcsState.engineType = bcsEngineType; } uint32_t Event::peekBcsTaskCountFromCommandQueue() { if (bcsState.isValid()) { return this->cmdQueue->peekBcsTaskCount(bcsState.engineType); } else { return 0u; } } uint32_t Event::getCompletionStamp() const { return this->taskCount; } void Event::updateCompletionStamp(uint32_t gpgpuTaskCount, uint32_t bcsTaskCount, uint32_t tasklevel, FlushStamp flushStamp) { this->taskCount = gpgpuTaskCount; this->bcsState.taskCount = bcsTaskCount; this->taskLevel = tasklevel; this->flushStamp->setStamp(flushStamp); } cl_ulong Event::getDelta(cl_ulong startTime, cl_ulong endTime) { auto &hwInfo = cmdQueue->getDevice().getHardwareInfo(); cl_ulong Max = maxNBitValue(hwInfo.capabilityTable.kernelTimestampValidBits); cl_ulong Delta = 0; startTime &= Max; endTime &= Max; if (startTime > endTime) { Delta = Max - startTime; Delta += endTime; } else { Delta = endTime - startTime; } return Delta; } void Event::calculateSubmitTimestampData() { if (DebugManager.flags.EnableDeviceBasedTimestamps.get()) { auto &device = cmdQueue->getDevice(); auto &hwHelper = HwHelper::get(device.getHardwareInfo().platform.eRenderCoreFamily); double resolution = device.getDeviceInfo().profilingTimerResolution; int64_t timerDiff = queueTimeStamp.CPUTimeinNS - hwHelper.getGpuTimeStampInNS(queueTimeStamp.GPUTimeStamp, resolution); submitTimeStamp.GPUTimeStamp = static_cast((submitTimeStamp.CPUTimeinNS - timerDiff) / resolution); } } uint64_t Event::getTimeInNSFromTimestampData(const TimeStampData ×tamp) const { if (isCPUProfilingPath()) { return timestamp.CPUTimeinNS; } if (DebugManager.flags.ReturnRawGpuTimestamps.get()) { return timestamp.GPUTimeStamp; } if (cmdQueue && DebugManager.flags.EnableDeviceBasedTimestamps.get()) { auto &device = cmdQueue->getDevice(); auto &hwHelper = HwHelper::get(device.getHardwareInfo().platform.eRenderCoreFamily); double resolution = device.getDeviceInfo().profilingTimerResolution; return hwHelper.getGpuTimeStampInNS(timestamp.GPUTimeStamp, resolution); } return timestamp.CPUTimeinNS; } bool Event::calcProfilingData() { if (!dataCalculated && !profilingCpuPath) { if (timestampPacketContainer && timestampPacketContainer->peekNodes().size() > 0) { const auto timestamps = timestampPacketContainer->peekNodes(); if (DebugManager.flags.PrintTimestampPacketContents.get()) { for (auto i = 0u; i < timestamps.size(); i++) { std::cout << "Timestamp " << i << ", " << "cmd type: " << this->cmdType << ", "; for (auto j = 0u; j < timestamps[i]->getPacketsUsed(); j++) { std::cout << "packet " << j << ": " << "global start: " << timestamps[i]->getGlobalStartValue(j) << ", " << "global end: " << timestamps[i]->getGlobalEndValue(j) << ", " << "context start: " << timestamps[i]->getContextStartValue(j) << ", " << "context end: " << timestamps[i]->getContextEndValue(j) << ", " << "global delta: " << timestamps[i]->getGlobalEndValue(j) - timestamps[i]->getGlobalStartValue(j) << ", " << "context delta: " << timestamps[i]->getContextEndValue(j) - timestamps[i]->getContextStartValue(j) << std::endl; } } } uint64_t globalStartTS = 0u; uint64_t globalEndTS = 0u; Event::getBoundaryTimestampValues(timestampPacketContainer.get(), globalStartTS, globalEndTS); calculateProfilingDataInternal(globalStartTS, globalEndTS, &globalEndTS, globalStartTS); } else if (timeStampNode) { if (HwHelper::get(this->cmdQueue->getDevice().getHardwareInfo().platform.eRenderCoreFamily).useOnlyGlobalTimestamps()) { calculateProfilingDataInternal( timeStampNode->getGlobalStartValue(0), timeStampNode->getGlobalEndValue(0), &timeStampNode->getGlobalEndRef(), timeStampNode->getGlobalStartValue(0)); } else { calculateProfilingDataInternal( timeStampNode->getContextStartValue(0), timeStampNode->getContextEndValue(0), &timeStampNode->getContextCompleteRef(), timeStampNode->getGlobalStartValue(0)); } } } return dataCalculated; } void Event::calculateProfilingDataInternal(uint64_t contextStartTS, uint64_t contextEndTS, uint64_t *contextCompleteTS, uint64_t globalStartTS) { uint64_t gpuDuration = 0; uint64_t cpuDuration = 0; uint64_t gpuCompleteDuration = 0; uint64_t cpuCompleteDuration = 0; auto &device = this->cmdQueue->getDevice(); auto &hwHelper = HwHelper::get(device.getHardwareInfo().platform.eRenderCoreFamily); auto frequency = device.getDeviceInfo().profilingTimerResolution; auto gpuQueueTimeStamp = hwHelper.getGpuTimeStampInNS(queueTimeStamp.GPUTimeStamp, frequency); if (DebugManager.flags.EnableDeviceBasedTimestamps.get()) { startTimeStamp = static_cast(globalStartTS * frequency); if (startTimeStamp < gpuQueueTimeStamp) { startTimeStamp += static_cast((1ULL << hwHelper.getGlobalTimeStampBits()) * frequency); } } else { int64_t c0 = queueTimeStamp.CPUTimeinNS - gpuQueueTimeStamp; startTimeStamp = static_cast(globalStartTS * frequency) + c0; if (startTimeStamp < queueTimeStamp.CPUTimeinNS) { c0 += static_cast((1ULL << (hwHelper.getGlobalTimeStampBits())) * frequency); startTimeStamp = static_cast(globalStartTS * frequency) + c0; } } /* calculation based on equation CpuTime = GpuTime * scalar + const( == c0) scalar = DeltaCpu( == dCpu) / DeltaGpu( == dGpu) to determine the value of the const we can use one pair of values const = CpuTimeQueue - GpuTimeQueue * scalar */ // If device enqueue has not updated complete timestamp, assign end timestamp gpuDuration = getDelta(contextStartTS, contextEndTS); if (*contextCompleteTS == 0) { *contextCompleteTS = contextEndTS; gpuCompleteDuration = gpuDuration; } else { gpuCompleteDuration = getDelta(contextStartTS, *contextCompleteTS); } cpuDuration = static_cast(gpuDuration * frequency); cpuCompleteDuration = static_cast(gpuCompleteDuration * frequency); endTimeStamp = startTimeStamp + cpuDuration; completeTimeStamp = startTimeStamp + cpuCompleteDuration; if (DebugManager.flags.ReturnRawGpuTimestamps.get()) { startTimeStamp = contextStartTS; endTimeStamp = contextEndTS; completeTimeStamp = *contextCompleteTS; } dataCalculated = true; } void Event::getBoundaryTimestampValues(TimestampPacketContainer *timestampContainer, uint64_t &globalStartTS, uint64_t &globalEndTS) { const auto timestamps = timestampContainer->peekNodes(); globalStartTS = timestamps[0]->getGlobalStartValue(0); globalEndTS = timestamps[0]->getGlobalEndValue(0); for (const auto ×tamp : timestamps) { if (!timestamp->isProfilingCapable()) { continue; } for (auto i = 0u; i < timestamp->getPacketsUsed(); ++i) { if (globalStartTS > timestamp->getGlobalStartValue(i)) { globalStartTS = timestamp->getGlobalStartValue(i); } if (globalEndTS < timestamp->getGlobalEndValue(i)) { globalEndTS = timestamp->getGlobalEndValue(i); } } } } inline WaitStatus Event::wait(bool blocking, bool useQuickKmdSleep) { while (this->taskCount == CompletionStamp::notReady) { if (blocking == false) { return WaitStatus::NotReady; } } Range states{&bcsState, bcsState.isValid() ? 1u : 0u}; const auto waitStatus = cmdQueue->waitUntilComplete(taskCount.load(), states, flushStamp->peekStamp(), useQuickKmdSleep); if (waitStatus == WaitStatus::GpuHang) { return WaitStatus::GpuHang; } updateExecutionStatus(); DEBUG_BREAK_IF(this->taskLevel == CompletionStamp::notReady && this->executionStatus >= 0); auto *allocationStorage = cmdQueue->getGpgpuCommandStreamReceiver().getInternalAllocationStorage(); allocationStorage->cleanAllocationList(this->taskCount, TEMPORARY_ALLOCATION); return WaitStatus::Ready; } void Event::updateExecutionStatus() { if (taskLevel == CompletionStamp::notReady) { return; } int32_t statusSnapshot = executionStatus; if (isStatusCompleted(statusSnapshot)) { executeCallbacks(statusSnapshot); return; } if (peekIsBlocked()) { transitionExecutionStatus(CL_QUEUED); executeCallbacks(CL_QUEUED); return; } if (statusSnapshot == CL_QUEUED) { bool abortBlockedTasks = isStatusCompletedByTermination(statusSnapshot); submitCommand(abortBlockedTasks); transitionExecutionStatus(CL_SUBMITTED); executeCallbacks(CL_SUBMITTED); unblockEventsBlockedByThis(CL_SUBMITTED); // Note : Intentional fallthrough (no return) to check for CL_COMPLETE } if ((cmdQueue != nullptr) && this->isCompleted()) { transitionExecutionStatus(CL_COMPLETE); executeCallbacks(CL_COMPLETE); unblockEventsBlockedByThis(CL_COMPLETE); auto *allocationStorage = cmdQueue->getGpgpuCommandStreamReceiver().getInternalAllocationStorage(); allocationStorage->cleanAllocationList(this->taskCount, TEMPORARY_ALLOCATION); return; } transitionExecutionStatus(CL_SUBMITTED); } void Event::addChild(Event &childEvent) { childEvent.parentCount++; childEvent.incRefInternal(); childEventsToNotify.pushRefFrontOne(childEvent); DBG_LOG(EventsDebugEnable, "addChild: Parent event:", this, "child:", &childEvent); if (DebugManager.flags.TrackParentEvents.get()) { childEvent.parentEvents.push_back(this); } if (executionStatus == CL_COMPLETE) { unblockEventsBlockedByThis(CL_COMPLETE); } } void Event::unblockEventsBlockedByThis(int32_t transitionStatus) { int32_t status = transitionStatus; (void)status; DEBUG_BREAK_IF(!(isStatusCompleted(status) || (peekIsSubmitted(status)))); uint32_t taskLevelToPropagate = CompletionStamp::notReady; if (isStatusCompletedByTermination(transitionStatus) == false) { // if we are event on top of the tree , obtain taskLevel from CSR if (taskLevel == CompletionStamp::notReady) { this->taskLevel = getTaskLevel(); // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall) taskLevelToPropagate = this->taskLevel; } else { taskLevelToPropagate = taskLevel + 1; } } auto childEventRef = childEventsToNotify.detachNodes(); while (childEventRef != nullptr) { auto childEvent = childEventRef->ref; childEvent->unblockEventBy(*this, taskLevelToPropagate, transitionStatus); childEvent->decRefInternal(); auto next = childEventRef->next; delete childEventRef; childEventRef = next; } } bool Event::setStatus(cl_int status) { int32_t prevStatus = executionStatus; DBG_LOG(EventsDebugEnable, "setStatus event", this, " new status", status, "previousStatus", prevStatus); if (isStatusCompleted(prevStatus)) { return false; } if (status == prevStatus) { return false; } if (peekIsBlocked() && (isStatusCompletedByTermination(status) == false)) { return false; } if ((status == CL_SUBMITTED) || (isStatusCompleted(status))) { bool abortBlockedTasks = isStatusCompletedByTermination(status); submitCommand(abortBlockedTasks); } this->incRefInternal(); transitionExecutionStatus(status); if (isStatusCompleted(status) || (status == CL_SUBMITTED)) { unblockEventsBlockedByThis(status); } executeCallbacks(status); this->decRefInternal(); return true; } void Event::transitionExecutionStatus(int32_t newExecutionStatus) const { int32_t prevStatus = executionStatus; DBG_LOG(EventsDebugEnable, "transitionExecutionStatus event", this, " new status", newExecutionStatus, "previousStatus", prevStatus); while (prevStatus > newExecutionStatus) { executionStatus.compare_exchange_weak(prevStatus, newExecutionStatus); } if (NEO::DebugManager.flags.EventsTrackerEnable.get()) { EventsTracker::getEventsTracker().notifyTransitionedExecutionStatus(); } } void Event::submitCommand(bool abortTasks) { std::unique_ptr cmdToProcess(cmdToSubmit.exchange(nullptr)); if (cmdToProcess.get() != nullptr) { auto lockCSR = getCommandQueue()->getGpgpuCommandStreamReceiver().obtainUniqueOwnership(); if (this->isProfilingEnabled()) { if (timeStampNode) { this->cmdQueue->getGpgpuCommandStreamReceiver().makeResident(*timeStampNode->getBaseGraphicsAllocation()); cmdToProcess->timestamp = timeStampNode; } if (profilingCpuPath) { setSubmitTimeStamp(); setStartTimeStamp(); } else { this->cmdQueue->getDevice().getOSTime()->getCpuGpuTime(&submitTimeStamp); } if (perfCountersEnabled && perfCounterNode) { this->cmdQueue->getGpgpuCommandStreamReceiver().makeResident(*perfCounterNode->getBaseGraphicsAllocation()); } } auto &complStamp = cmdToProcess->submit(taskLevel, abortTasks); if (profilingCpuPath && this->isProfilingEnabled()) { setEndTimeStamp(); } if (complStamp.taskCount == CompletionStamp::gpuHang) { abortExecutionDueToGpuHang(); return; } updateTaskCount(complStamp.taskCount, peekBcsTaskCountFromCommandQueue()); flushStamp->setStamp(complStamp.flushStamp); submittedCmd.exchange(cmdToProcess.release()); } else if (profilingCpuPath && endTimeStamp == 0) { setEndTimeStamp(); } if (this->taskCount == CompletionStamp::notReady) { if (!this->isUserEvent() && this->eventWithoutCommand) { if (this->cmdQueue) { auto lockCSR = this->getCommandQueue()->getGpgpuCommandStreamReceiver().obtainUniqueOwnership(); updateTaskCount(this->cmdQueue->getGpgpuCommandStreamReceiver().peekTaskCount(), peekBcsTaskCountFromCommandQueue()); } } // make sure that task count is synchronized for events with kernels if (!this->eventWithoutCommand && !abortTasks) { this->synchronizeTaskCount(); } } } cl_int Event::waitForEvents(cl_uint numEvents, const cl_event *eventList) { if (numEvents == 0) { return CL_SUCCESS; } // flush all command queues for (const cl_event *it = eventList, *end = eventList + numEvents; it != end; ++it) { Event *event = castToObjectOrAbort(*it); if (event->cmdQueue) { if (event->taskLevel != CompletionStamp::notReady) { event->cmdQueue->flush(); } } } using WorkerListT = StackVec; WorkerListT workerList1(eventList, eventList + numEvents); WorkerListT workerList2; workerList2.reserve(numEvents); // pointers to workerLists - for fast swap operations WorkerListT *currentlyPendingEvents = &workerList1; WorkerListT *pendingEventsLeft = &workerList2; WaitStatus eventWaitStatus = WaitStatus::NotReady; while (currentlyPendingEvents->size() > 0) { for (auto current = currentlyPendingEvents->begin(), end = currentlyPendingEvents->end(); current != end; ++current) { Event *event = castToObjectOrAbort(*current); if (event->peekExecutionStatus() < CL_COMPLETE) { return CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST; } eventWaitStatus = event->wait(false, false); if (eventWaitStatus == WaitStatus::NotReady) { pendingEventsLeft->push_back(event); } else if (eventWaitStatus == WaitStatus::GpuHang) { setExecutionStatusToAbortedDueToGpuHang(pendingEventsLeft->begin(), pendingEventsLeft->end()); setExecutionStatusToAbortedDueToGpuHang(current, end); return CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST; } } std::swap(currentlyPendingEvents, pendingEventsLeft); pendingEventsLeft->clear(); } return CL_SUCCESS; } inline void Event::setExecutionStatusToAbortedDueToGpuHang(cl_event *first, cl_event *last) { std::for_each(first, last, [](cl_event &e) { Event *event = castToObjectOrAbort(e); event->abortExecutionDueToGpuHang(); }); } bool Event::isCompleted() { return cmdQueue->isCompleted(getCompletionStamp(), this->bcsState) || this->areTimestampsCompleted(); } bool Event::areTimestampsCompleted() { if (this->timestampPacketContainer.get()) { if (this->cmdQueue->isWaitForTimestampsEnabled()) { for (const auto ×tamp : this->timestampPacketContainer->peekNodes()) { for (uint32_t i = 0; i < timestamp->getPacketsUsed(); i++) { this->cmdQueue->getGpgpuCommandStreamReceiver().downloadAllocation(*timestamp->getBaseGraphicsAllocation()->getGraphicsAllocation(this->cmdQueue->getGpgpuCommandStreamReceiver().getRootDeviceIndex())); if (timestamp->getContextEndValue(i) == 1) { return false; } } } return true; } } return false; } uint32_t Event::getTaskLevel() { return taskLevel; } inline void Event::unblockEventBy(Event &event, uint32_t taskLevel, int32_t transitionStatus) { int32_t numEventsBlockingThis = --parentCount; DEBUG_BREAK_IF(numEventsBlockingThis < 0); int32_t blockerStatus = transitionStatus; DEBUG_BREAK_IF(!(isStatusCompleted(blockerStatus) || peekIsSubmitted(blockerStatus))); if ((numEventsBlockingThis > 0) && (isStatusCompletedByTermination(blockerStatus) == false)) { return; } DBG_LOG(EventsDebugEnable, "Event", this, "is unblocked by", &event); if (this->taskLevel == CompletionStamp::notReady) { this->taskLevel = std::max(cmdQueue->getGpgpuCommandStreamReceiver().peekTaskLevel(), taskLevel); } else { this->taskLevel = std::max(this->taskLevel.load(), taskLevel); } int32_t statusToPropagate = CL_SUBMITTED; if (isStatusCompletedByTermination(blockerStatus)) { statusToPropagate = blockerStatus; } setStatus(statusToPropagate); // event may be completed after this operation, transtition the state to not block others. this->updateExecutionStatus(); } bool Event::updateStatusAndCheckCompletion() { auto currentStatus = updateEventAndReturnCurrentStatus(); return isStatusCompleted(currentStatus); } bool Event::isReadyForSubmission() { return taskLevel != CompletionStamp::notReady ? true : false; } void Event::addCallback(Callback::ClbFuncT fn, cl_int type, void *data) { ECallbackTarget target = translateToCallbackTarget(type); if (target == ECallbackTarget::Invalid) { DEBUG_BREAK_IF(true); return; } incRefInternal(); // Note from spec : // "All callbacks registered for an event object must be called. // All enqueued callbacks shall be called before the event object is destroyed." // That's why each registered calback increments the internal refcount incRefInternal(); DBG_LOG(EventsDebugEnable, "event", this, "addCallback", "ECallbackTarget", (uint32_t)type); callbacks[(uint32_t)target].pushFrontOne(*new Callback(this, fn, type, data)); // Callback added after event reached its "completed" state if (updateStatusAndCheckCompletion()) { int32_t status = executionStatus; DBG_LOG(EventsDebugEnable, "event", this, "addCallback executing callbacks with status", status); executeCallbacks(status); } if (peekHasCallbacks() && !isUserEvent() && DebugManager.flags.EnableAsyncEventsHandler.get()) { ctx->getAsyncEventsHandler().registerEvent(this); } decRefInternal(); } void Event::executeCallbacks(int32_t executionStatusIn) { int32_t execStatus = executionStatusIn; bool terminated = isStatusCompletedByTermination(execStatus); ECallbackTarget target; if (terminated) { target = ECallbackTarget::Completed; } else { target = translateToCallbackTarget(execStatus); if (target == ECallbackTarget::Invalid) { DEBUG_BREAK_IF(true); return; } } // run through all needed callback targets and execute callbacks for (uint32_t i = 0; i <= (uint32_t)target; ++i) { auto cb = callbacks[i].detachNodes(); auto curr = cb; while (curr != nullptr) { auto next = curr->next; if (terminated) { curr->overrideCallbackExecutionStatusTarget(execStatus); } DBG_LOG(EventsDebugEnable, "event", this, "executing callback", "ECallbackTarget", (uint32_t)target); curr->execute(); decRefInternal(); delete curr; curr = next; } } } void Event::tryFlushEvent() { // only if event is not completed, completed event has already been flushed if (cmdQueue && updateStatusAndCheckCompletion() == false) { // flush the command queue only if it is not blocked event if (taskLevel != CompletionStamp::notReady) { cmdQueue->getGpgpuCommandStreamReceiver().flushBatchedSubmissions(); } } } void Event::setQueueTimeStamp() { if (this->profilingEnabled && (this->cmdQueue != nullptr)) { this->cmdQueue->getDevice().getOSTime()->getCpuTime(&queueTimeStamp.CPUTimeinNS); } } void Event::setSubmitTimeStamp() { if (this->profilingEnabled && (this->cmdQueue != nullptr)) { this->cmdQueue->getDevice().getOSTime()->getCpuTime(&submitTimeStamp.CPUTimeinNS); } } void Event::setStartTimeStamp() { if (this->profilingEnabled && (this->cmdQueue != nullptr)) { this->cmdQueue->getDevice().getOSTime()->getCpuTime(&startTimeStamp); } } void Event::setEndTimeStamp() { if (this->profilingEnabled && (this->cmdQueue != nullptr)) { this->cmdQueue->getDevice().getOSTime()->getCpuTime(&endTimeStamp); completeTimeStamp = endTimeStamp; } } TagNodeBase *Event::getHwTimeStampNode() { if (!cmdQueue->getTimestampPacketContainer() && !timeStampNode) { timeStampNode = cmdQueue->getGpgpuCommandStreamReceiver().getEventTsAllocator()->getTag(); } return timeStampNode; } TagNodeBase *Event::getHwPerfCounterNode() { if (!perfCounterNode && cmdQueue->getPerfCounters()) { const uint32_t gpuReportSize = HwPerfCounter::getSize(*(cmdQueue->getPerfCounters())); perfCounterNode = cmdQueue->getGpgpuCommandStreamReceiver().getEventPerfCountAllocator(gpuReportSize)->getTag(); } return perfCounterNode; } void Event::addTimestampPacketNodes(const TimestampPacketContainer &inputTimestampPacketContainer) { timestampPacketContainer->assignAndIncrementNodesRefCounts(inputTimestampPacketContainer); } TimestampPacketContainer *Event::getTimestampPacketNodes() const { return timestampPacketContainer.get(); } bool Event::checkUserEventDependencies(cl_uint numEventsInWaitList, const cl_event *eventWaitList) { bool userEventsDependencies = false; for (uint32_t i = 0; i < numEventsInWaitList; i++) { auto event = castToObjectOrAbort(eventWaitList[i]); if (!event->isReadyForSubmission()) { userEventsDependencies = true; break; } } return userEventsDependencies; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/event/event.h000066400000000000000000000306571422164147700225600ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/wait_status.h" #include "shared/source/helpers/flush_stamp.h" #include "shared/source/os_interface/os_time.h" #include "shared/source/os_interface/performance_counters.h" #include "shared/source/utilities/arrayref.h" #include "shared/source/utilities/hw_timestamps.h" #include "shared/source/utilities/idlist.h" #include "shared/source/utilities/iflist.h" #include "opencl/source/api/cl_types.h" #include "opencl/source/command_queue/copy_engine_state.h" #include "opencl/source/helpers/base_object.h" #include "opencl/source/helpers/task_information.h" #include #include #include namespace NEO { template class TagNode; class CommandQueue; class Context; class Device; class TimestampPacketContainer; template <> struct OpenCLObjectMapper<_cl_event> { typedef class Event DerivedType; }; class Event : public BaseObject<_cl_event>, public IDNode { public: enum class ECallbackTarget : uint32_t { Queued = 0, Submitted, Running, Completed, MAX, Invalid }; struct Callback : public IFNode { typedef void(CL_CALLBACK *ClbFuncT)(cl_event, cl_int, void *); Callback(cl_event event, ClbFuncT clb, cl_int type, void *data) : event(event), callbackFunction(clb), callbackExecutionStatusTarget(type), userData(data) { } void execute() { callbackFunction(event, callbackExecutionStatusTarget, userData); } int32_t getCallbackExecutionStatusTarget() const { return callbackExecutionStatusTarget; } // From OCL spec : // "If the callback is called as the result of the command associated with // event being abnormally terminated, an appropriate error code for the error that caused // the termination will be passed to event_command_exec_status instead." // This function allows to override this value void overrideCallbackExecutionStatusTarget(int32_t newCallbackExecutionStatusTarget) { DEBUG_BREAK_IF(newCallbackExecutionStatusTarget >= 0); callbackExecutionStatusTarget = newCallbackExecutionStatusTarget; } private: cl_event event; ClbFuncT callbackFunction; int32_t callbackExecutionStatusTarget; // minimum event execution status that will triger this callback void *userData; }; static const cl_ulong objectMagic = 0x80134213A43C981ALL; static constexpr cl_int executionAbortedDueToGpuHang = -777; Event(CommandQueue *cmdQueue, cl_command_type cmdType, uint32_t taskLevel, uint32_t taskCount); Event(const Event &) = delete; Event &operator=(const Event &) = delete; ~Event() override; void setupBcs(aub_stream::EngineType bcsEngineType); uint32_t peekBcsTaskCountFromCommandQueue(); uint32_t getCompletionStamp() const; void updateCompletionStamp(uint32_t taskCount, uint32_t bcsTaskCount, uint32_t tasklevel, FlushStamp flushStamp); cl_ulong getDelta(cl_ulong startTime, cl_ulong endTime); void setCPUProfilingPath(bool isCPUPath) { this->profilingCpuPath = isCPUPath; } bool isCPUProfilingPath() const { return profilingCpuPath; } cl_int getEventProfilingInfo(cl_profiling_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); bool isProfilingEnabled() const { return profilingEnabled; } void setProfilingEnabled(bool profilingEnabled) { this->profilingEnabled = profilingEnabled; } TagNodeBase *getHwTimeStampNode(); void addTimestampPacketNodes(const TimestampPacketContainer &inputTimestampPacketContainer); TimestampPacketContainer *getTimestampPacketNodes() const; bool isPerfCountersEnabled() const { return perfCountersEnabled; } void setPerfCountersEnabled(bool perfCountersEnabled) { this->perfCountersEnabled = perfCountersEnabled; } void abortExecutionDueToGpuHang() { this->transitionExecutionStatus(executionAbortedDueToGpuHang); } TagNodeBase *getHwPerfCounterNode(); std::unique_ptr flushStamp; std::atomic taskLevel; void addChild(Event &e); virtual bool setStatus(cl_int status); static cl_int waitForEvents(cl_uint numEvents, const cl_event *eventList); void setCommand(std::unique_ptr newCmd) { UNRECOVERABLE_IF(cmdToSubmit.load()); cmdToSubmit.exchange(newCmd.release()); eventWithoutCommand = false; } Command *peekCommand() { return cmdToSubmit; } IFNodeRef *peekChildEvents() { return childEventsToNotify.peekHead(); } bool peekHasChildEvents() { return (peekChildEvents() != nullptr); } bool peekHasCallbacks(ECallbackTarget target) { if (target >= ECallbackTarget::MAX) { DEBUG_BREAK_IF(true); return false; } return (callbacks[(uint32_t)target].peekHead() != nullptr); } bool peekHasCallbacks() { for (uint32_t i = 0; i < (uint32_t)ECallbackTarget::MAX; ++i) { if (peekHasCallbacks((ECallbackTarget)i)) { return true; } } return false; } // return the number of events that are blocking this event uint32_t peekNumEventsBlockingThis() const { return parentCount; } // returns true if event is completed (in terms of definition provided by OCL spec) // Note from OLC spec : // "A command is considered complete if its execution status // is CL_COMPLETE or a negative value." bool isStatusCompleted(const int32_t executionStatusSnapshot) { return executionStatusSnapshot <= CL_COMPLETE; } bool updateStatusAndCheckCompletion(); bool isCompleted(); // Note from OCL spec : // "A negative integer value causes all enqueued commands that wait on this user event // to be terminated." bool isStatusCompletedByTermination(const int32_t executionStatusSnapshot) const { return executionStatusSnapshot < 0; } bool peekIsSubmitted(const int32_t executionStatusSnapshot) const { return executionStatusSnapshot == CL_SUBMITTED; } bool peekIsCmdSubmitted() { return submittedCmd != nullptr; } //commands blocked by user event depencies bool isReadyForSubmission(); // adds a callback (execution state change listener) to this event's list of callbacks void addCallback(Callback::ClbFuncT fn, cl_int type, void *data); //if(blocking==false), will return with WaitStatus::NotReady instead of blocking while waiting for completion virtual WaitStatus wait(bool blocking, bool useQuickKmdSleep); bool isUserEvent() const { return (CL_COMMAND_USER == cmdType); } bool isEventWithoutCommand() const { return eventWithoutCommand; } Context *getContext() { return ctx; } CommandQueue *getCommandQueue() { return cmdQueue; } const CommandQueue *getCommandQueue() const { return cmdQueue; } cl_command_type getCommandType() { return cmdType; } virtual uint32_t getTaskLevel(); cl_int peekExecutionStatus() const { return executionStatus; } cl_int updateEventAndReturnCurrentStatus() { updateExecutionStatus(); return executionStatus; } bool peekIsBlocked() const { return (peekNumEventsBlockingThis() > 0); } virtual void unblockEventBy(Event &event, uint32_t taskLevel, int32_t transitionStatus); void updateTaskCount(uint32_t gpgpuTaskCount, uint32_t bcsTaskCount) { if (gpgpuTaskCount == CompletionStamp::notReady) { DEBUG_BREAK_IF(true); return; } this->bcsState.taskCount = bcsTaskCount; uint32_t prevTaskCount = this->taskCount.exchange(gpgpuTaskCount); if ((prevTaskCount != CompletionStamp::notReady) && (prevTaskCount > gpgpuTaskCount)) { this->taskCount = prevTaskCount; DEBUG_BREAK_IF(true); } } bool isCurrentCmdQVirtualEvent() { return currentCmdQVirtualEvent; } void setCurrentCmdQVirtualEvent(bool isCurrentVirtualEvent) { currentCmdQVirtualEvent = isCurrentVirtualEvent; } virtual void updateExecutionStatus(); void tryFlushEvent(); uint32_t peekTaskCount() const { return this->taskCount; } void setQueueTimeStamp(TimeStampData *queueTimeStamp) { this->queueTimeStamp = *queueTimeStamp; }; void setQueueTimeStamp(); void setSubmitTimeStamp(); void setStartTimeStamp(); void setEndTimeStamp(); void setCmdType(uint32_t cmdType) { this->cmdType = cmdType; } std::vector &getParentEvents() { return this->parentEvents; } virtual bool isExternallySynchronized() const { return false; } static bool checkUserEventDependencies(cl_uint numEventsInWaitList, const cl_event *eventWaitList); static void getBoundaryTimestampValues(TimestampPacketContainer *timestampContainer, uint64_t &globalStartTS, uint64_t &globalEndTS); protected: Event(Context *ctx, CommandQueue *cmdQueue, cl_command_type cmdType, uint32_t taskLevel, uint32_t taskCount); ECallbackTarget translateToCallbackTarget(cl_int execStatus) { switch (execStatus) { default: { DEBUG_BREAK_IF(true); return ECallbackTarget::Invalid; } case CL_QUEUED: return ECallbackTarget::Queued; case CL_SUBMITTED: return ECallbackTarget::Submitted; case CL_RUNNING: return ECallbackTarget::Running; case CL_COMPLETE: return ECallbackTarget::Completed; } } void calculateSubmitTimestampData(); uint64_t getTimeInNSFromTimestampData(const TimeStampData ×tamp) const; bool calcProfilingData(); MOCKABLE_VIRTUAL void calculateProfilingDataInternal(uint64_t contextStartTS, uint64_t contextEndTS, uint64_t *contextCompleteTS, uint64_t globalStartTS); MOCKABLE_VIRTUAL void synchronizeTaskCount() { while (this->taskCount == CompletionStamp::notReady) ; }; // executes all callbacks associated with this event void executeCallbacks(int32_t executionStatus); // transitions event to new execution state // guarantees that newStatus <= oldStatus void transitionExecutionStatus(int32_t newExecutionStatus) const; //vector storing events that needs to be notified when this event is ready to go IFRefList childEventsToNotify; void unblockEventsBlockedByThis(int32_t transitionStatus); void submitCommand(bool abortBlockedTasks); static void setExecutionStatusToAbortedDueToGpuHang(cl_event *first, cl_event *last); bool areTimestampsCompleted(); bool currentCmdQVirtualEvent; std::atomic cmdToSubmit; std::atomic submittedCmd; bool eventWithoutCommand = true; Context *ctx; CommandQueue *cmdQueue; cl_command_type cmdType; // callbacks to be executed when this event changes its execution state IFList callbacks[(uint32_t)ECallbackTarget::MAX]; // can be accessed only with transitionExecutionState // this is to ensure state consitency event when doning lock-free multithreading // e.g. CL_COMPLETE -> CL_SUBMITTED or CL_SUBMITTED -> CL_QUEUED becomes forbiden mutable std::atomic executionStatus; // Timestamps bool profilingEnabled; bool profilingCpuPath; bool dataCalculated; TimeStampData queueTimeStamp; TimeStampData submitTimeStamp; uint64_t startTimeStamp; uint64_t endTimeStamp; uint64_t completeTimeStamp; CopyEngineState bcsState{}; bool perfCountersEnabled; TagNodeBase *timeStampNode = nullptr; TagNodeBase *perfCounterNode = nullptr; std::unique_ptr timestampPacketContainer; //number of events this event depends on std::atomic parentCount; //event parents std::vector parentEvents; private: // can be accessed only with updateTaskCount std::atomic taskCount; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/event/event_builder.cpp000066400000000000000000000037171422164147700246160ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/event/event_builder.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/timestamp_packet.h" #include "opencl/source/api/cl_types.h" #include "opencl/source/context/context.h" #include "opencl/source/event/user_event.h" namespace NEO { EventBuilder::~EventBuilder() { UNRECOVERABLE_IF((this->event == nullptr) && ((parentEvents.size() != 0U))); finalize(); } void EventBuilder::addParentEvent(Event &newParentEvent) { bool duplicate = false; for (Event *parent : parentEvents) { if (parent == &newParentEvent) { duplicate = true; break; } } if (!duplicate) { newParentEvent.incRefInternal(); parentEvents.push_back(&newParentEvent); } } void EventBuilder::addParentEvents(ArrayRef newParentEvents) { for (cl_event clEv : newParentEvents) { auto neoEv = castToObject(clEv); DEBUG_BREAK_IF(neoEv == nullptr); addParentEvent(neoEv); } } void EventBuilder::finalize() { if ((this->event == nullptr) || finalized) { clear(); return; } if (parentEvents.size() != 0) { UserEvent sentinel; sentinel.addChild(*this->event); for (Event *parent : parentEvents) { //do not add as child if: //parent has no parents and is not blocked if (!(parent->peekIsBlocked() == false && parent->taskLevel != CompletionStamp::notReady) || (!parent->isEventWithoutCommand() && !parent->peekIsCmdSubmitted())) { parent->addChild(*this->event); } } sentinel.setStatus(CL_COMPLETE); } clear(); finalized = true; } void EventBuilder::clear() { for (Event *parent : parentEvents) { parent->decRefInternal(); } parentEvents.clear(); } } // namespace NEO compute-runtime-22.14.22890/opencl/source/event/event_builder.h000066400000000000000000000026331422164147700242570ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/utilities/arrayref.h" #include "shared/source/utilities/stackvec.h" #include "CL/cl.h" #include namespace NEO { class Event; class EventBuilder { public: template void create(ArgsT &&...args) { event = new EventType(std::forward(args)...); } EventBuilder() = default; EventBuilder(const EventBuilder &) = delete; EventBuilder &operator=(const EventBuilder &) = delete; EventBuilder(EventBuilder &&) = delete; EventBuilder &operator=(EventBuilder &&) = delete; ~EventBuilder(); Event *getEvent() const { return event; } void addParentEvent(Event &newParentEvent); void addParentEvent(Event *newParentEvent) { if (newParentEvent != nullptr) { addParentEvent(*newParentEvent); } } void addParentEvents(ArrayRef newParentEvents); void finalize(); Event *finalizeAndRelease() { finalize(); Event *retEvent = this->event; this->event = nullptr; finalized = false; return retEvent; } protected: void clear(); Event *event = nullptr; bool finalized = false; StackVec parentEvents; bool doNotRegister = false; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/event/event_tracker.cpp000066400000000000000000000174761422164147700246320ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/event/event_tracker.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/helpers/cl_helper.h" namespace NEO { std::unique_ptr EventsTracker::globalEvTracker = nullptr; EventsTracker &EventsTracker::getEventsTracker() { static std::mutex initMutex; std::lock_guard autolock(initMutex); if (!EventsTracker::globalEvTracker) { EventsTracker::globalEvTracker = std::unique_ptr{new EventsTracker()}; } UNRECOVERABLE_IF(EventsTracker::globalEvTracker == nullptr); return *EventsTracker::globalEvTracker; } std::string EventsTracker::label(Event *node, const EventIdMap &eventsIdMapping) { std::string retLabel("e"); auto eventTag = eventsIdMapping.find(node); if (eventTag != eventsIdMapping.end()) { auto id = eventTag->second; retLabel += std::to_string(id); } return retLabel; } std::string EventsTracker::label(CommandQueue *cmdQ) { return "cq" + std::to_string(reinterpret_cast(cmdQ)); } void EventsTracker::dumpQueue(CommandQueue *cmdQ, std::ostream &out, CmdqSet &dumpedCmdQs) { if ((cmdQ == nullptr) || (dumpedCmdQs.find(cmdQ) != dumpedCmdQs.end())) { return; } out << label(cmdQ) << "[label=\"{------CmdQueue, ptr=" << cmdQ << "------|task count="; auto taskCount = cmdQ->taskCount; auto taskLevel = cmdQ->taskLevel; if (taskCount == CompletionStamp::notReady) { out << "NOT_READY"; } else { out << taskCount; } out << ", level="; if (taskLevel == CompletionStamp::notReady) { out << "NOT_READY"; } else { out << taskLevel; } out << "}\",color=blue];\n"; dumpedCmdQs.insert(cmdQ); } void EventsTracker::dumpNode(Event *node, std::ostream &out, const EventIdMap &eventsIdMapping) { if (node == nullptr) { out << "eNULL[label=\"{ptr=nullptr}\",color=red];\n"; return; } bool isUserEvent = node->isUserEvent(); uint32_t statusId = static_cast(node->peekExecutionStatus()); // clamp to aborted statusId = (statusId > CL_QUEUED) ? (CL_QUEUED + 1) : statusId; const char *color = ((statusId == CL_COMPLETE) || (statusId > CL_QUEUED)) ? "green" : (((statusId == CL_SUBMITTED) && (isUserEvent == false)) ? "yellow" : "red"); std::string eventType = isUserEvent ? "USER_EVENT" : (node->isCurrentCmdQVirtualEvent() ? "---V_EVENT " : "-----EVENT "); std::string commandType = ""; if (isUserEvent == false) { commandType = NEO::cmdTypetoString(node->getCommandType()); } static const char *status[] = { "CL_COMPLETE", "CL_RUNNING", "CL_SUBMITTED", "CL_QUEUED", "ABORTED"}; auto taskCount = node->peekTaskCount(); auto taskLevel = node->taskLevel.load(); out << label(node, eventsIdMapping) << "[label=\"{------" << eventType << " ptr=" << node << "------" "|" << commandType << "|" << status[statusId] << "|" "task count="; if (taskCount == CompletionStamp::notReady) { out << "NOT_READY"; } else { out << taskCount; } out << ", level="; if (taskLevel == CompletionStamp::notReady) { out << "NOT_READY"; } else { out << taskLevel; } out << "|CALLBACKS=" << (node->peekHasCallbacks() ? "TRUE" : "FALSE") << "}\",color=" << color << "];\n"; if (node->isCurrentCmdQVirtualEvent()) { out << label(node->getCommandQueue()) << "->" << label(node, eventsIdMapping); out << "[label=\"VIRTUAL_EVENT\"]"; out << ";\n"; } } void EventsTracker::dumpEdge(Event *leftNode, Event *rightNode, std::ostream &out, const EventIdMap &eventsIdMapping) { out << label(leftNode, eventsIdMapping) << "->" << label(rightNode, eventsIdMapping) << ";\n"; } // walk in DFS manner void EventsTracker::dumpGraph(Event *node, std::ostream &out, CmdqSet &dumpedCmdQs, std::set &dumpedEvents, const EventIdMap &eventsIdMapping) { if ((node == nullptr) || (dumpedEvents.find(node) != dumpedEvents.end())) { return; } dumpedEvents.insert(node); if (node->getCommandQueue() != nullptr) { dumpQueue(node->getCommandQueue(), out, dumpedCmdQs); } dumpNode(node, out, eventsIdMapping); auto *childNode = node->peekChildEvents(); while (childNode != nullptr) { dumpGraph(childNode->ref, out, dumpedCmdQs, dumpedEvents, eventsIdMapping); dumpEdge(node, childNode->ref, out, eventsIdMapping); childNode = childNode->next; } } TrackedEvent *EventsTracker::getNodes() { return trackedEvents.detachNodes(); } void EventsTracker::dump() { static std::mutex mutex; std::lock_guard lock(mutex); auto time = std::chrono::system_clock::now(); std::string dumpFileName = "eg_" "reg" + std::to_string(reinterpret_cast(this)) + "_" + std::to_string(time.time_since_epoch().count()) + ".gv"; auto out = createDumpStream(dumpFileName); *out << "digraph events_registry_" << this << " {\n"; *out << "node [shape=record]\n"; *out << "//pragma: somePragmaData" << "\n"; auto allNodes = getNodes(); EventIdMap deadNodeTags; auto curr = allNodes; TrackedEvent *prev = nullptr; EventIdMap eventsIdMapping; while (curr != nullptr) { auto next = curr->next; bool eraseNode = false; if (curr->eventId < 0) { auto prevTag = deadNodeTags.find(curr->ev); if (prevTag == deadNodeTags.end()) { deadNodeTags[curr->ev] = -curr->eventId; } eraseNode = true; } else if ((deadNodeTags.find(curr->ev) != deadNodeTags.end()) && (deadNodeTags[curr->ev] > curr->eventId)) { eraseNode = true; } if (eraseNode) { if (prev != nullptr) { prev->next = next; } if (allNodes == curr) { allNodes = nullptr; } delete curr; } else { if (allNodes == nullptr) { allNodes = curr; } prev = curr; eventsIdMapping[curr->ev] = curr->eventId; } curr = next; } auto node = allNodes; CmdqSet dumpedCmdQs; std::set dumpedEvents; while (node != nullptr) { if (node->ev->peekNumEventsBlockingThis() != 0) { node = node->next; continue; } dumpGraph(node->ev, *out, dumpedCmdQs, dumpedEvents, eventsIdMapping); node = node->next; } *out << "\n}\n"; if (allNodes == nullptr) { return; } if (trackedEvents.peekHead() != nullptr) { trackedEvents.peekHead()->getTail()->insertAllNext(*allNodes); } else { auto rest = allNodes->next; trackedEvents.pushFrontOne(*allNodes); if (rest != nullptr) { allNodes->insertAllNext(*rest); } } } void EventsTracker::notifyCreation(Event *eventToTrack) { dump(); auto trackedE = new TrackedEvent{eventToTrack, eventId++}; trackedEvents.pushFrontOne(*trackedE); } void EventsTracker::notifyDestruction(Event *eventToDestroy) { auto trackedE = new TrackedEvent{eventToDestroy, -(eventId++)}; trackedEvents.pushFrontOne(*trackedE); dump(); } void EventsTracker::notifyTransitionedExecutionStatus() { dump(); } std::unique_ptr EventsTracker::createDumpStream(const std::string &filename) { return std::make_unique(filename, std::ios::binary | std::ios::out); } } // namespace NEO compute-runtime-22.14.22890/opencl/source/event/event_tracker.h000066400000000000000000000034251422164147700242640ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/utilities/iflist.h" #include "opencl/source/event/event.h" #include #include namespace NEO { class CommandQueue; struct TrackedEvent : IFNode { TrackedEvent(Event *ev, int64_t eventId) : ev(ev), eventId(eventId) { } Event *ev = nullptr; int64_t eventId = 1; }; class EventsTracker { using EventIdMap = std::unordered_map; using CmdqSet = std::set; protected: std::atomic eventId{0}; static std::unique_ptr globalEvTracker; IFList trackedEvents; EventsTracker() = default; public: void dump(); void notifyCreation(Event *eventToTrack); void notifyDestruction(Event *eventToDestroy); void notifyTransitionedExecutionStatus(); MOCKABLE_VIRTUAL ~EventsTracker() = default; MOCKABLE_VIRTUAL TrackedEvent *getNodes(); MOCKABLE_VIRTUAL std::unique_ptr createDumpStream(const std::string &filename); static EventsTracker &getEventsTracker(); static std::string label(Event *node, const EventIdMap &eventsIdMapping); static std::string label(CommandQueue *cmdQ); static void dumpQueue(CommandQueue *cmdQ, std::ostream &out, CmdqSet &dumpedCmdQs); static void dumpEdge(Event *leftNode, Event *rightNode, std::ostream &out, const EventIdMap &eventsIdMapping); static void dumpNode(Event *node, std::ostream &out, const EventIdMap &eventsIdMapping); static void dumpGraph(Event *node, std::ostream &out, CmdqSet &dumpedCmdQs, std::set &dumpedEvents, const EventIdMap &eventsIdMapping); }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/event/user_event.cpp000066400000000000000000000042611422164147700241410ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "user_event.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/device/device.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" namespace NEO { UserEvent::UserEvent(Context *ctx) : Event(ctx, nullptr, CL_COMMAND_USER, CompletionStamp::notReady, CompletionStamp::notReady) { transitionExecutionStatus(CL_QUEUED); } void UserEvent::updateExecutionStatus() { return; } WaitStatus UserEvent::wait(bool blocking, bool useQuickKmdSleep) { while (updateStatusAndCheckCompletion() == false) { if (blocking == false) { return WaitStatus::NotReady; } } return WaitStatus::Ready; } uint32_t UserEvent::getTaskLevel() { if (peekExecutionStatus() == CL_COMPLETE) { return 0; } return CompletionStamp::notReady; } bool UserEvent::isInitialEventStatus() const { return executionStatus == CL_QUEUED; } VirtualEvent::VirtualEvent(CommandQueue *cmdQ, Context *ctx) : Event(ctx, cmdQ, -1, CompletionStamp::notReady, CompletionStamp::notReady) { transitionExecutionStatus(CL_QUEUED); // internal object - no need for API refcount convertToInternalObject(); } void VirtualEvent::updateExecutionStatus() { } WaitStatus VirtualEvent::wait(bool blocking, bool useQuickKmdSleep) { while (updateStatusAndCheckCompletion() == false) { if (blocking == false) { return WaitStatus::NotReady; } } return WaitStatus::Ready; } uint32_t VirtualEvent::getTaskLevel() { uint32_t taskLevel = 0; if (cmdQueue != nullptr) { auto &csr = cmdQueue->getGpgpuCommandStreamReceiver(); taskLevel = csr.peekTaskLevel(); } return taskLevel; } bool VirtualEvent::setStatus(cl_int status) { // virtual events are just helper events and will have either // "waiting" (after construction) or "complete" (on change if not blocked) execution state if (isStatusCompletedByTermination(status) == false) { status = CL_COMPLETE; } return Event::setStatus(status); } } // namespace NEO compute-runtime-22.14.22890/opencl/source/event/user_event.h000066400000000000000000000015461422164147700236110ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "event.h" namespace NEO { class CommandQueue; class Context; class UserEvent : public Event { public: UserEvent(Context *ctx = nullptr); ~UserEvent() override = default; WaitStatus wait(bool blocking, bool useQuickKmdSleep) override; void updateExecutionStatus() override; uint32_t getTaskLevel() override; bool isInitialEventStatus() const; }; class VirtualEvent : public Event { public: VirtualEvent(CommandQueue *cmdQ = nullptr, Context *ctx = nullptr); ~VirtualEvent() override = default; WaitStatus wait(bool blocking, bool useQuickKmdSleep) override; bool setStatus(cl_int status) override; void updateExecutionStatus() override; uint32_t getTaskLevel() override; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/execution_environment/000077500000000000000000000000001422164147700245615ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/execution_environment/CMakeLists.txt000066400000000000000000000005631422164147700273250ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_EXECUTION_ENVIRONMENT ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cl_execution_environment.h ${CMAKE_CURRENT_SOURCE_DIR}/cl_execution_environment.cpp ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_EXECUTION_ENVIRONMENT}) compute-runtime-22.14.22890/opencl/source/execution_environment/cl_execution_environment.cpp000066400000000000000000000017621422164147700324000ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/execution_environment/cl_execution_environment.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/event/async_events_handler.h" namespace NEO { ClExecutionEnvironment::ClExecutionEnvironment() : ExecutionEnvironment() { asyncEventsHandler.reset(new AsyncEventsHandler()); } AsyncEventsHandler *ClExecutionEnvironment::getAsyncEventsHandler() const { return asyncEventsHandler.get(); } ClExecutionEnvironment::~ClExecutionEnvironment() { asyncEventsHandler->closeThread(); }; void ClExecutionEnvironment::prepareRootDeviceEnvironments(uint32_t numRootDevices) { ExecutionEnvironment::prepareRootDeviceEnvironments(numRootDevices); builtinOpsBuilders.resize(numRootDevices); for (auto i = 0u; i < numRootDevices; i++) { builtinOpsBuilders[i] = std::make_unique(EBuiltInOps::COUNT); } } } // namespace NEO compute-runtime-22.14.22890/opencl/source/execution_environment/cl_execution_environment.h000066400000000000000000000017061422164147700320430ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/execution_environment/execution_environment.h" #include "built_in_ops.h" #include #include #include namespace NEO { class AsyncEventsHandler; class BuiltinDispatchInfoBuilder; class ClExecutionEnvironment : public ExecutionEnvironment { public: ClExecutionEnvironment(); AsyncEventsHandler *getAsyncEventsHandler() const; ~ClExecutionEnvironment() override; void prepareRootDeviceEnvironments(uint32_t numRootDevices) override; using BuilderT = std::pair, std::once_flag>; BuilderT *peekBuilders(uint32_t rootDeviceIndex) { return builtinOpsBuilders[rootDeviceIndex].get(); } protected: std::vector> builtinOpsBuilders; std::unique_ptr asyncEventsHandler; }; } // namespace NEOcompute-runtime-22.14.22890/opencl/source/gen11/000077500000000000000000000000001422164147700210455ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/gen11/CMakeLists.txt000066400000000000000000000002031422164147700236000ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(SUPPORT_GEN11) add_subdirectories() endif() compute-runtime-22.14.22890/opencl/source/gen11/buffer_gen11.cpp000066400000000000000000000005261422164147700240200ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen11/hw_cmds.h" #include "opencl/source/mem_obj/buffer_base.inl" namespace NEO { typedef ICLFamily Family; static auto gfxCore = IGFX_GEN11_CORE; #include "opencl/source/mem_obj/buffer_factory_init.inl" } // namespace NEO compute-runtime-22.14.22890/opencl/source/gen11/cl_hw_helper_gen11.cpp000066400000000000000000000013661422164147700252050ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/populate_factory.h" #include "opencl/source/helpers/cl_hw_helper_base.inl" #include "opencl/source/helpers/cl_hw_helper_bdw_and_later.inl" #include "hw_cmds.h" namespace NEO { using Family = ICLFamily; static auto gfxCore = IGFX_GEN11_CORE; template <> void populateFactoryTable>() { extern ClHwHelper *clHwHelperFactory[IGFX_MAX_CORE]; clHwHelperFactory[gfxCore] = &ClHwHelperHw::get(); } template <> cl_version ClHwHelperHw::getDeviceIpVersion(const HardwareInfo &hwInfo) const { return makeDeviceIpVersion(11, 0, 0); } template class ClHwHelperHw; } // namespace NEO compute-runtime-22.14.22890/opencl/source/gen11/command_queue_gen11.cpp000066400000000000000000000013631422164147700253710ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/populate_factory.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/command_queue_hw_bdw_and_later.inl" #include "opencl/source/command_queue/enqueue_resource_barrier.h" namespace NEO { typedef ICLFamily Family; static auto gfxCore = IGFX_GEN11_CORE; template class CommandQueueHw; template <> void populateFactoryTable>() { extern CommandQueueCreateFunc commandQueueFactory[IGFX_MAX_CORE]; commandQueueFactory[gfxCore] = CommandQueueHw::create; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/gen11/enable_family_full_ocl_gen11.cpp000066400000000000000000000014231422164147700272120ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/populate_factory.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/sampler/sampler.h" namespace NEO { using Family = ICLFamily; struct EnableOCLGen11 { EnableOCLGen11() { populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); } }; static EnableOCLGen11 enable; } // namespace NEO compute-runtime-22.14.22890/opencl/source/gen11/gpgpu_walker_gen11.cpp000066400000000000000000000007061422164147700252360ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen11/hw_info.h" #include "opencl/source/command_queue/gpgpu_walker_bdw_and_later.inl" #include "opencl/source/command_queue/hardware_interface_bdw_and_later.inl" namespace NEO { template class HardwareInterface; template class GpgpuWalkerHelper; template struct EnqueueOperation; } // namespace NEO compute-runtime-22.14.22890/opencl/source/gen11/gtpin_setup_gen11.cpp000066400000000000000000000014341422164147700251070ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/gtpin/gtpin_hw_helper.h" #include "opencl/source/gtpin/gtpin_hw_helper.inl" #include "opencl/source/gtpin/gtpin_hw_helper_bdw_and_later.inl" #include "ocl_igc_shared/gtpin/gtpin_ocl_interface.h" namespace NEO { extern GTPinHwHelper *gtpinHwHelperFactory[IGFX_MAX_CORE]; typedef ICLFamily Family; static const auto gfxFamily = IGFX_GEN11_CORE; template <> uint32_t GTPinHwHelperHw::getGenVersion() { return gtpin::GTPIN_GEN_11; } template class GTPinHwHelperHw; struct GTPinEnableGen11 { GTPinEnableGen11() { gtpinHwHelperFactory[gfxFamily] = >PinHwHelperHw::get(); } }; static GTPinEnableGen11 gtpinEnable; } // namespace NEO compute-runtime-22.14.22890/opencl/source/gen11/hardware_commands_helper_gen11.cpp000066400000000000000000000010101422164147700275510ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/gen11/hw_cmds.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/helpers/hardware_commands_helper_base.inl" #include "opencl/source/helpers/hardware_commands_helper_bdw_and_later.inl" namespace NEO { using FamilyType = ICLFamily; template struct HardwareCommandsHelper; } // namespace NEO compute-runtime-22.14.22890/opencl/source/gen11/image_gen11.cpp000066400000000000000000000012531422164147700236270ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen11/hw_cmds_base.h" #include "opencl/source/mem_obj/image.inl" #include namespace NEO { using Family = ICLFamily; static auto gfxCore = IGFX_GEN11_CORE; template <> void ImageHw::appendSurfaceStateParams(RENDER_SURFACE_STATE *surfaceState, uint32_t rootDeviceIndex, bool useGlobalAtomics) { if (hasAlphaChannel(&imageFormat)) { surfaceState->setSampleTapDiscardDisable(RENDER_SURFACE_STATE::SAMPLE_TAP_DISCARD_DISABLE_ENABLE); } } } // namespace NEO // factory initializer #include "opencl/source/mem_obj/image_factory_init.inl" compute-runtime-22.14.22890/opencl/source/gen11/sampler_gen11.cpp000066400000000000000000000005761422164147700242170ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen11/hw_cmds.h" #include "opencl/source/sampler/sampler.h" #include "opencl/source/sampler/sampler.inl" namespace NEO { typedef ICLFamily Family; static auto gfxCore = IGFX_GEN11_CORE; #include "opencl/source/sampler/sampler_factory_init.inl" } // namespace NEO compute-runtime-22.14.22890/opencl/source/gen12lp/000077500000000000000000000000001422164147700214025ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/gen12lp/CMakeLists.txt000066400000000000000000000002051422164147700241370ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(SUPPORT_GEN12LP) add_subdirectories() endif() compute-runtime-22.14.22890/opencl/source/gen12lp/additional_files_gen12lp.cmake000066400000000000000000000011661422164147700272320ustar00rootroot00000000000000# # Copyright (C) 2019-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_ADDITIONAL_FILES_GEN12LP ${CMAKE_CURRENT_SOURCE_DIR}/gen12lp/definitions${BRANCH_DIR_SUFFIX}command_queue_helpers_gen12lp.inl ${CMAKE_CURRENT_SOURCE_DIR}/gen12lp/definitions${BRANCH_DIR_SUFFIX}hardware_commands_helper_gen12lp.inl ) include_directories(${NEO_SOURCE_DIR}/opencl/source/gen12lp/definitions${BRANCH_DIR_SUFFIX}) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_ADDITIONAL_FILES_GEN12LP}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_ADDITIONAL_FILES_GEN12LP ${RUNTIME_SRCS_ADDITIONAL_FILES_GEN12LP}) compute-runtime-22.14.22890/opencl/source/gen12lp/buffer_gen12lp.cpp000066400000000000000000000005341422164147700247110ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen12lp/hw_cmds.h" #include "opencl/source/mem_obj/buffer_base.inl" namespace NEO { typedef TGLLPFamily Family; static auto gfxCore = IGFX_GEN12LP_CORE; #include "opencl/source/mem_obj/buffer_factory_init.inl" } // namespace NEO compute-runtime-22.14.22890/opencl/source/gen12lp/cl_hw_helper_gen12lp.cpp000066400000000000000000000016731422164147700261000ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/populate_factory.h" #include "opencl/source/helpers/cl_hw_helper_base.inl" #include "opencl/source/helpers/cl_hw_helper_bdw_and_later.inl" #include "hw_cmds.h" namespace NEO { using Family = TGLLPFamily; static auto gfxCore = IGFX_GEN12LP_CORE; template <> void populateFactoryTable>() { extern ClHwHelper *clHwHelperFactory[IGFX_MAX_CORE]; clHwHelperFactory[gfxCore] = &ClHwHelperHw::get(); } template <> cl_device_feature_capabilities_intel ClHwHelperHw::getSupportedDeviceFeatureCapabilities() const { return CL_DEVICE_FEATURE_FLAG_DP4A_INTEL; } template <> cl_version ClHwHelperHw::getDeviceIpVersion(const HardwareInfo &hwInfo) const { return makeDeviceIpVersion(12, 0, makeDeviceRevision(hwInfo)); } template class ClHwHelperHw; } // namespace NEO compute-runtime-22.14.22890/opencl/source/gen12lp/command_queue_gen12lp.cpp000066400000000000000000000012431422164147700262600ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/populate_factory.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/command_queue_hw_bdw_and_later.inl" #include "command_queue_helpers_gen12lp.inl" namespace NEO { typedef TGLLPFamily Family; static auto gfxCore = IGFX_GEN12LP_CORE; template <> void populateFactoryTable>() { extern CommandQueueCreateFunc commandQueueFactory[IGFX_MAX_CORE]; commandQueueFactory[gfxCore] = CommandQueueHw::create; } template class CommandQueueHw; } // namespace NEO compute-runtime-22.14.22890/opencl/source/gen12lp/definitions/000077500000000000000000000000001422164147700237155ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/gen12lp/definitions/command_queue_helpers_gen12lp.inl000066400000000000000000000002351422164147700323150ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/enqueue_resource_barrier.h" compute-runtime-22.14.22890/opencl/source/gen12lp/definitions/hardware_commands_helper_gen12lp.inl000066400000000000000000000001321422164147700327620ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ compute-runtime-22.14.22890/opencl/source/gen12lp/enable_family_full_ocl_gen12lp.cpp000066400000000000000000000014331422164147700301050ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/populate_factory.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/sampler/sampler.h" namespace NEO { using Family = TGLLPFamily; struct EnableOCLGen12LP { EnableOCLGen12LP() { populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); } }; static EnableOCLGen12LP enable; } // namespace NEO compute-runtime-22.14.22890/opencl/source/gen12lp/gpgpu_walker_gen12lp.cpp000066400000000000000000000037131422164147700261310ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen12lp/hw_info.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/resource_info.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "opencl/source/command_queue/gpgpu_walker_bdw_and_later.inl" #include "opencl/source/command_queue/hardware_interface_bdw_and_later.inl" namespace NEO { template <> void GpgpuWalkerHelper::adjustMiStoreRegMemMode(MI_STORE_REG_MEM *storeCmd) { storeCmd->setMmioRemapEnable(true); } template <> void HardwareInterface::dispatchWorkarounds( LinearStream *commandStream, CommandQueue &commandQueue, Kernel &kernel, const bool &enable) { using MI_LOAD_REGISTER_IMM = typename TGLLPFamily::MI_LOAD_REGISTER_IMM; using PIPE_CONTROL = typename TGLLPFamily::PIPE_CONTROL; if (kernel.requiresWaDisableRccRhwoOptimization()) { PIPE_CONTROL cmdPipeControl = TGLLPFamily::cmdInitPipeControl; cmdPipeControl.setCommandStreamerStallEnable(true); auto pCmdPipeControl = commandStream->getSpaceForCmd(); *pCmdPipeControl = cmdPipeControl; uint32_t value = enable ? 0x40004000 : 0x40000000; NEO::LriHelper::program(commandStream, 0x7010, value, false); } } template <> size_t GpgpuWalkerHelper::getSizeForWaDisableRccRhwoOptimization(const Kernel *pKernel) { if (pKernel->requiresWaDisableRccRhwoOptimization()) { return (2 * (sizeof(TGLLPFamily::PIPE_CONTROL) + sizeof(TGLLPFamily::MI_LOAD_REGISTER_IMM))); } return 0u; } template class HardwareInterface; template class GpgpuWalkerHelper; template struct EnqueueOperation; } // namespace NEO compute-runtime-22.14.22890/opencl/source/gen12lp/gtpin_setup_gen12lp.cpp000066400000000000000000000014501422164147700257770ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/gtpin/gtpin_hw_helper.h" #include "opencl/source/gtpin/gtpin_hw_helper.inl" #include "opencl/source/gtpin/gtpin_hw_helper_bdw_and_later.inl" #include "ocl_igc_shared/gtpin/gtpin_ocl_interface.h" namespace NEO { extern GTPinHwHelper *gtpinHwHelperFactory[IGFX_MAX_CORE]; typedef TGLLPFamily Family; static const auto gfxFamily = IGFX_GEN12LP_CORE; template <> uint32_t GTPinHwHelperHw::getGenVersion() { return gtpin::GTPIN_GEN_12_1; } template class GTPinHwHelperHw; struct GTPinEnableGen12LP { GTPinEnableGen12LP() { gtpinHwHelperFactory[gfxFamily] = >PinHwHelperHw::get(); } }; static GTPinEnableGen12LP gtpinEnable; } // namespace NEO compute-runtime-22.14.22890/opencl/source/gen12lp/hardware_commands_helper_gen12lp.cpp000066400000000000000000000014531422164147700304560ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "hardware_commands_helper_gen12lp.inl" #include "shared/source/gen12lp/hw_cmds.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/helpers/hardware_commands_helper_base.inl" #include "opencl/source/helpers/hardware_commands_helper_bdw_and_later.inl" namespace NEO { using FamilyType = TGLLPFamily; template <> size_t HardwareCommandsHelper::getSizeRequiredCS() { size_t size = 2 * sizeof(typename FamilyType::MEDIA_STATE_FLUSH) + sizeof(typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD); return size; } template struct HardwareCommandsHelper; } // namespace NEO compute-runtime-22.14.22890/opencl/source/gen12lp/image_gen12lp.cpp000066400000000000000000000006541422164147700245250ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen12lp/hw_cmds_base.h" #include "opencl/source/mem_obj/image.inl" namespace NEO { using Family = TGLLPFamily; static auto gfxCore = IGFX_GEN12LP_CORE; } // namespace NEO #include "opencl/source/mem_obj/image_tgllp_and_later.inl" // factory initializer #include "opencl/source/mem_obj/image_factory_init.inl" compute-runtime-22.14.22890/opencl/source/gen12lp/sampler_gen12lp.cpp000066400000000000000000000004341422164147700251020ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen12lp/hw_cmds_base.h" using Family = NEO::TGLLPFamily; constexpr static auto gfxCore = IGFX_GEN12LP_CORE; #include "opencl/source/sampler/sampler_tgllp_and_later.inl" compute-runtime-22.14.22890/opencl/source/gen8/000077500000000000000000000000001422164147700207735ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/gen8/CMakeLists.txt000066400000000000000000000002021422164147700235250ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(SUPPORT_GEN8) add_subdirectories() endif() compute-runtime-22.14.22890/opencl/source/gen8/buffer_gen8.cpp000066400000000000000000000005241422164147700236720ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen8/hw_cmds.h" #include "opencl/source/mem_obj/buffer_base.inl" namespace NEO { typedef BDWFamily Family; static auto gfxCore = IGFX_GEN8_CORE; #include "opencl/source/mem_obj/buffer_factory_init.inl" } // namespace NEO compute-runtime-22.14.22890/opencl/source/gen8/cl_hw_helper_gen8.cpp000066400000000000000000000013641422164147700250570ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/populate_factory.h" #include "opencl/source/helpers/cl_hw_helper_base.inl" #include "opencl/source/helpers/cl_hw_helper_bdw_and_later.inl" #include "hw_cmds.h" namespace NEO { using Family = BDWFamily; static auto gfxCore = IGFX_GEN8_CORE; template <> void populateFactoryTable>() { extern ClHwHelper *clHwHelperFactory[IGFX_MAX_CORE]; clHwHelperFactory[gfxCore] = &ClHwHelperHw::get(); } template <> cl_version ClHwHelperHw::getDeviceIpVersion(const HardwareInfo &hwInfo) const { return makeDeviceIpVersion(8, 0, 0); } template class ClHwHelperHw; } // namespace NEO compute-runtime-22.14.22890/opencl/source/gen8/command_queue_gen8.cpp000066400000000000000000000013631422164147700252450ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/populate_factory.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/command_queue_hw_bdw_and_later.inl" #include "opencl/source/command_queue/enqueue_resource_barrier.h" namespace NEO { typedef BDWFamily Family; static auto gfxCore = IGFX_GEN8_CORE; template class CommandQueueHw; template <> void populateFactoryTable>() { extern CommandQueueCreateFunc commandQueueFactory[IGFX_MAX_CORE]; commandQueueFactory[gfxCore] = CommandQueueHw::create; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/gen8/enable_family_full_ocl_gen8.cpp000066400000000000000000000014201422164147700270630ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/populate_factory.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/sampler/sampler.h" namespace NEO { using Family = BDWFamily; struct EnableOCLGen8 { EnableOCLGen8() { populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); } }; static EnableOCLGen8 enable; } // namespace NEO compute-runtime-22.14.22890/opencl/source/gen8/gpgpu_walker_gen8.cpp000066400000000000000000000052241422164147700251120ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen8/hw_info.h" #include "opencl/source/command_queue/gpgpu_walker_bdw_and_later.inl" #include "opencl/source/command_queue/hardware_interface_bdw_and_later.inl" namespace NEO { template <> void GpgpuWalkerHelper::applyWADisableLSQCROPERFforOCL(NEO::LinearStream *pCommandStream, const Kernel &kernel, bool disablePerfMode) { if (disablePerfMode) { if (kernel.getKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages) { // Set bit L3SQC_BIT_LQSC_RO_PERF_DIS in L3SQC_REG4 GpgpuWalkerHelper::addAluReadModifyWriteRegister(pCommandStream, L3SQC_REG4, AluRegisters::OPCODE_OR, L3SQC_BIT_LQSC_RO_PERF_DIS); } } else { if (kernel.getKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages) { // Add PIPE_CONTROL with CS_Stall to wait till GPU finishes its work typedef typename BDWFamily::PIPE_CONTROL PIPE_CONTROL; auto pipeControlSpace = reinterpret_cast(pCommandStream->getSpace(sizeof(PIPE_CONTROL))); auto pipeControl = BDWFamily::cmdInitPipeControl; pipeControl.setCommandStreamerStallEnable(true); *pipeControlSpace = pipeControl; // Clear bit L3SQC_BIT_LQSC_RO_PERF_DIS in L3SQC_REG4 GpgpuWalkerHelper::addAluReadModifyWriteRegister(pCommandStream, L3SQC_REG4, AluRegisters::OPCODE_AND, ~L3SQC_BIT_LQSC_RO_PERF_DIS); } } } template <> size_t GpgpuWalkerHelper::getSizeForWADisableLSQCROPERFforOCL(const Kernel *pKernel) { typedef typename BDWFamily::MI_LOAD_REGISTER_REG MI_LOAD_REGISTER_REG; typedef typename BDWFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; typedef typename BDWFamily::PIPE_CONTROL PIPE_CONTROL; typedef typename BDWFamily::MI_MATH MI_MATH; typedef typename BDWFamily::MI_MATH_ALU_INST_INLINE MI_MATH_ALU_INST_INLINE; size_t n = 0; if (pKernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages) { n += sizeof(PIPE_CONTROL) + (2 * sizeof(MI_LOAD_REGISTER_REG) + sizeof(MI_LOAD_REGISTER_IMM) + sizeof(PIPE_CONTROL) + sizeof(MI_MATH) + NUM_ALU_INST_FOR_READ_MODIFY_WRITE * sizeof(MI_MATH_ALU_INST_INLINE)) * 2; // For 2 WADisableLSQCROPERFforOCL WAs } return n; } template class HardwareInterface; template class GpgpuWalkerHelper; template struct EnqueueOperation; } // namespace NEO compute-runtime-22.14.22890/opencl/source/gen8/gtpin_setup_gen8.cpp000066400000000000000000000014271422164147700247650ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/gtpin/gtpin_hw_helper.h" #include "opencl/source/gtpin/gtpin_hw_helper.inl" #include "opencl/source/gtpin/gtpin_hw_helper_bdw_and_later.inl" #include "ocl_igc_shared/gtpin/gtpin_ocl_interface.h" namespace NEO { extern GTPinHwHelper *gtpinHwHelperFactory[IGFX_MAX_CORE]; typedef BDWFamily Family; static const auto gfxFamily = IGFX_GEN8_CORE; template <> uint32_t GTPinHwHelperHw::getGenVersion() { return gtpin::GTPIN_GEN_8; } template class GTPinHwHelperHw; struct GTPinEnableGen8 { GTPinEnableGen8() { gtpinHwHelperFactory[gfxFamily] = >PinHwHelperHw::get(); } }; static GTPinEnableGen8 gtpinEnable; } // namespace NEO compute-runtime-22.14.22890/opencl/source/gen8/hardware_commands_helper_gen8.cpp000066400000000000000000000007061422164147700274400ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen8/hw_cmds.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/helpers/hardware_commands_helper_base.inl" #include "opencl/source/helpers/hardware_commands_helper_bdw_and_later.inl" namespace NEO { using FamilyType = BDWFamily; template struct HardwareCommandsHelper; } // namespace NEO compute-runtime-22.14.22890/opencl/source/gen8/image_gen8.cpp000066400000000000000000000010331422164147700234770ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen8/hw_cmds_base.h" #include "opencl/source/mem_obj/image.inl" #include namespace NEO { using Family = BDWFamily; static auto gfxCore = IGFX_GEN8_CORE; template <> void ImageHw::setMediaSurfaceRotation(void *) {} template <> void ImageHw::setSurfaceMemoryObjectControlState(void *, uint32_t) {} } // namespace NEO // factory initializer #include "opencl/source/mem_obj/image_factory_init.inl" compute-runtime-22.14.22890/opencl/source/gen8/sampler_gen8.cpp000066400000000000000000000005741422164147700240710ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen8/hw_cmds.h" #include "opencl/source/sampler/sampler.h" #include "opencl/source/sampler/sampler.inl" namespace NEO { typedef BDWFamily Family; static auto gfxCore = IGFX_GEN8_CORE; #include "opencl/source/sampler/sampler_factory_init.inl" } // namespace NEO compute-runtime-22.14.22890/opencl/source/gen9/000077500000000000000000000000001422164147700207745ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/gen9/CMakeLists.txt000066400000000000000000000002021422164147700235260ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(SUPPORT_GEN9) add_subdirectories() endif() compute-runtime-22.14.22890/opencl/source/gen9/buffer_gen9.cpp000066400000000000000000000005241422164147700236740ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds.h" #include "opencl/source/mem_obj/buffer_base.inl" namespace NEO { typedef SKLFamily Family; static auto gfxCore = IGFX_GEN9_CORE; #include "opencl/source/mem_obj/buffer_factory_init.inl" } // namespace NEO compute-runtime-22.14.22890/opencl/source/gen9/cl_hw_helper_gen9.cpp000066400000000000000000000013641422164147700250610ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/populate_factory.h" #include "opencl/source/helpers/cl_hw_helper_base.inl" #include "opencl/source/helpers/cl_hw_helper_bdw_and_later.inl" #include "hw_cmds.h" namespace NEO { using Family = SKLFamily; static auto gfxCore = IGFX_GEN9_CORE; template <> void populateFactoryTable>() { extern ClHwHelper *clHwHelperFactory[IGFX_MAX_CORE]; clHwHelperFactory[gfxCore] = &ClHwHelperHw::get(); } template <> cl_version ClHwHelperHw::getDeviceIpVersion(const HardwareInfo &hwInfo) const { return makeDeviceIpVersion(9, 0, 0); } template class ClHwHelperHw; } // namespace NEO compute-runtime-22.14.22890/opencl/source/gen9/command_queue_gen9.cpp000066400000000000000000000013631422164147700252470ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/populate_factory.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/command_queue_hw_bdw_and_later.inl" #include "opencl/source/command_queue/enqueue_resource_barrier.h" namespace NEO { typedef SKLFamily Family; static auto gfxCore = IGFX_GEN9_CORE; template class CommandQueueHw; template <> void populateFactoryTable>() { extern CommandQueueCreateFunc commandQueueFactory[IGFX_MAX_CORE]; commandQueueFactory[gfxCore] = CommandQueueHw::create; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/gen9/enable_family_full_ocl_gen9.cpp000066400000000000000000000014201422164147700270650ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/populate_factory.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/sampler/sampler.h" namespace NEO { using Family = SKLFamily; struct EnableOCLGen9 { EnableOCLGen9() { populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); } }; static EnableOCLGen9 enable; } // namespace NEO compute-runtime-22.14.22890/opencl/source/gen9/gpgpu_walker_gen9.cpp000066400000000000000000000052321422164147700251130ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds_base.h" #include "opencl/source/command_queue/gpgpu_walker_bdw_and_later.inl" #include "opencl/source/command_queue/hardware_interface_bdw_and_later.inl" namespace NEO { template <> void GpgpuWalkerHelper::applyWADisableLSQCROPERFforOCL(NEO::LinearStream *pCommandStream, const Kernel &kernel, bool disablePerfMode) { if (disablePerfMode) { if (kernel.getKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages) { // Set bit L3SQC_BIT_LQSC_RO_PERF_DIS in L3SQC_REG4 GpgpuWalkerHelper::addAluReadModifyWriteRegister(pCommandStream, L3SQC_REG4, AluRegisters::OPCODE_OR, L3SQC_BIT_LQSC_RO_PERF_DIS); } } else { if (kernel.getKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages) { // Add PIPE_CONTROL with CS_Stall to wait till GPU finishes its work typedef typename SKLFamily::PIPE_CONTROL PIPE_CONTROL; auto pipeControlSpace = reinterpret_cast(pCommandStream->getSpace(sizeof(PIPE_CONTROL))); auto pipeControl = SKLFamily::cmdInitPipeControl; pipeControl.setCommandStreamerStallEnable(true); *pipeControlSpace = pipeControl; // Clear bit L3SQC_BIT_LQSC_RO_PERF_DIS in L3SQC_REG4 GpgpuWalkerHelper::addAluReadModifyWriteRegister(pCommandStream, L3SQC_REG4, AluRegisters::OPCODE_AND, ~L3SQC_BIT_LQSC_RO_PERF_DIS); } } } template <> size_t GpgpuWalkerHelper::getSizeForWADisableLSQCROPERFforOCL(const Kernel *pKernel) { typedef typename SKLFamily::MI_LOAD_REGISTER_REG MI_LOAD_REGISTER_REG; typedef typename SKLFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; typedef typename SKLFamily::PIPE_CONTROL PIPE_CONTROL; typedef typename SKLFamily::MI_MATH MI_MATH; typedef typename SKLFamily::MI_MATH_ALU_INST_INLINE MI_MATH_ALU_INST_INLINE; size_t n = 0; if (pKernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages) { n += sizeof(PIPE_CONTROL) + (2 * sizeof(MI_LOAD_REGISTER_REG) + sizeof(MI_LOAD_REGISTER_IMM) + sizeof(PIPE_CONTROL) + sizeof(MI_MATH) + NUM_ALU_INST_FOR_READ_MODIFY_WRITE * sizeof(MI_MATH_ALU_INST_INLINE)) * 2; // For 2 WADisableLSQCROPERFforOCL WAs } return n; } template class HardwareInterface; template class GpgpuWalkerHelper; template struct EnqueueOperation; } // namespace NEO compute-runtime-22.14.22890/opencl/source/gen9/gtpin_setup_gen9.cpp000066400000000000000000000014271422164147700247670ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/gtpin/gtpin_hw_helper.h" #include "opencl/source/gtpin/gtpin_hw_helper.inl" #include "opencl/source/gtpin/gtpin_hw_helper_bdw_and_later.inl" #include "ocl_igc_shared/gtpin/gtpin_ocl_interface.h" namespace NEO { extern GTPinHwHelper *gtpinHwHelperFactory[IGFX_MAX_CORE]; typedef SKLFamily Family; static const auto gfxFamily = IGFX_GEN9_CORE; template <> uint32_t GTPinHwHelperHw::getGenVersion() { return gtpin::GTPIN_GEN_9; } template class GTPinHwHelperHw; struct GTPinEnableGen9 { GTPinEnableGen9() { gtpinHwHelperFactory[gfxFamily] = >PinHwHelperHw::get(); } }; static GTPinEnableGen9 gtpinEnable; } // namespace NEO compute-runtime-22.14.22890/opencl/source/gen9/hardware_commands_helper_gen9.cpp000066400000000000000000000007321422164147700274410ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/helpers/hardware_commands_helper_base.inl" #include "opencl/source/helpers/hardware_commands_helper_bdw_and_later.inl" #include namespace NEO { using FamilyType = SKLFamily; template struct HardwareCommandsHelper; } // namespace NEO compute-runtime-22.14.22890/opencl/source/gen9/image_gen9.cpp000066400000000000000000000005511422164147700235050ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds_base.h" #include "opencl/source/mem_obj/image.inl" namespace NEO { using Family = SKLFamily; static auto gfxCore = IGFX_GEN9_CORE; } // namespace NEO // factory initializer #include "opencl/source/mem_obj/image_factory_init.inl" compute-runtime-22.14.22890/opencl/source/gen9/sampler_gen9.cpp000066400000000000000000000005741422164147700240730ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds.h" #include "opencl/source/sampler/sampler.h" #include "opencl/source/sampler/sampler.inl" namespace NEO { typedef SKLFamily Family; static auto gfxCore = IGFX_GEN9_CORE; #include "opencl/source/sampler/sampler_factory_init.inl" } // namespace NEO compute-runtime-22.14.22890/opencl/source/gtpin/000077500000000000000000000000001422164147700212535ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/gtpin/CMakeLists.txt000066400000000000000000000026751422164147700240250ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(NOT DISABLED_GTPIN_SUPPORT) set(RUNTIME_SRCS_GTPIN ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/gtpin_callbacks.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gtpin_helpers.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gtpin_helpers.h ${CMAKE_CURRENT_SOURCE_DIR}/gtpin_hw_helper.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gtpin_hw_helper.h ${CMAKE_CURRENT_SOURCE_DIR}/gtpin_hw_helper.inl ${CMAKE_CURRENT_SOURCE_DIR}/gtpin_hw_helper_bdw_and_later.inl ${CMAKE_CURRENT_SOURCE_DIR}/gtpin_init.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gtpin_init.h ${CMAKE_CURRENT_SOURCE_DIR}/gtpin_notify.h ${CMAKE_CURRENT_SOURCE_DIR}/gtpin_defs.h ) if(SUPPORT_XEHP_AND_LATER) list(APPEND RUNTIME_SRCS_GTPIN ${CMAKE_CURRENT_SOURCE_DIR}/gtpin_hw_helper_xehp_and_later.inl ) endif() if(WIN32) set(MSVC_DEF_ADDITIONAL_EXPORTS "${MSVC_DEF_ADDITIONAL_EXPORTS}\nGTPin_Init") endif() else() set(RUNTIME_SRCS_GTPIN ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/gtpin_callback_stubs.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gtpin_notify.h ) endif() target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_GTPIN}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_GTPIN ${RUNTIME_SRCS_GTPIN}) set(MSVC_DEF_ADDITIONAL_EXPORTS "${MSVC_DEF_ADDITIONAL_EXPORTS}" PARENT_SCOPE) add_subdirectories() compute-runtime-22.14.22890/opencl/source/gtpin/gtpin_callback_stubs.cpp000066400000000000000000000016661422164147700261450ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "CL/cl.h" #include namespace NEO { bool isGTPinInitialized = false; void gtpinNotifyContextCreate(cl_context context) { } void gtpinNotifyContextDestroy(cl_context context) { } void gtpinNotifyKernelCreate(cl_kernel kernel) { } void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue) { } void gtpinNotifyPreFlushTask(void *pCmdQueue) { } void gtpinNotifyFlushTask(uint32_t flushedTaskCount) { } void gtpinNotifyTaskCompletion(uint32_t completedTaskCount) { } void gtpinNotifyMakeResident(void *pKernel, void *pCommandStreamReceiver) { } void gtpinNotifyUpdateResidencyList(void *pKernel, void *pResidencyVector) { } void gtpinNotifyPlatformShutdown() { } void *gtpinGetIgcInit() { return nullptr; } void setIgcInfo(const void *igcInfo) { } const void *gtpinGetIgcInfo() { return nullptr; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/gtpin/gtpin_callbacks.cpp000066400000000000000000000312021422164147700250750ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/memory_manager/surface.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/gtpin/gtpin_defs.h" #include "opencl/source/gtpin/gtpin_hw_helper.h" #include "opencl/source/gtpin/gtpin_notify.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/kernel/multi_device_kernel.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/program/program.h" #include "CL/cl.h" #include "ocl_igc_shared/gtpin/gtpin_ocl_interface.h" #include #include using namespace gtpin; namespace NEO { using GTPinLockType = std::recursive_mutex; extern gtpin::ocl::gtpin_events_t GTPinCallbacks; igc_init_t *pIgcInit = nullptr; std::atomic sequenceCount(1); CommandQueue *pCmdQueueForFlushTask = nullptr; std::deque kernelExecQueue; GTPinLockType kernelExecQueueLock; void gtpinNotifyContextCreate(cl_context context) { if (isGTPinInitialized) { platform_info_t gtpinPlatformInfo; auto pContext = castToObjectOrAbort(context); auto pDevice = pContext->getDevice(0); UNRECOVERABLE_IF(pDevice == nullptr); GFXCORE_FAMILY genFamily = pDevice->getHardwareInfo().platform.eRenderCoreFamily; GTPinHwHelper >pinHelper = GTPinHwHelper::get(genFamily); gtpinPlatformInfo.gen_version = (gtpin::GTPIN_GEN_VERSION)gtpinHelper.getGenVersion(); gtpinPlatformInfo.device_id = static_cast(pDevice->getHardwareInfo().platform.usDeviceID); (*GTPinCallbacks.onContextCreate)((context_handle_t)context, >pinPlatformInfo, &pIgcInit); } } void gtpinNotifyContextDestroy(cl_context context) { if (isGTPinInitialized) { (*GTPinCallbacks.onContextDestroy)((context_handle_t)context); } } void gtpinNotifyKernelCreate(cl_kernel kernel) { if (nullptr == kernel) { return; } if (isGTPinInitialized) { auto pMultiDeviceKernel = castToObjectOrAbort(kernel); auto pKernel = pMultiDeviceKernel->getDefaultKernel(); auto &device = pMultiDeviceKernel->getDevices()[0]->getDevice(); size_t gtpinBTI = pKernel->getNumberOfBindingTableStates(); // Enlarge local copy of SSH by 1 SS GFXCORE_FAMILY genFamily = device.getHardwareInfo().platform.eRenderCoreFamily; GTPinHwHelper >pinHelper = GTPinHwHelper::get(genFamily); if (!gtpinHelper.addSurfaceState(pKernel)) { // Kernel with no SSH or Kernel EM, not supported return; } if (pKernel->isKernelHeapSubstituted()) { // ISA for this kernel was already substituted return; } // Notify GT-Pin that new kernel was created Context *pContext = &(pKernel->getContext()); cl_context context = pContext; auto &kernelInfo = pKernel->getKernelInfo(); instrument_params_in_t paramsIn = {}; paramsIn.kernel_type = GTPIN_KERNEL_TYPE_CS; paramsIn.simd = (GTPIN_SIMD_WIDTH)kernelInfo.getMaxSimdSize(); paramsIn.orig_kernel_binary = (uint8_t *)pKernel->getKernelHeap(); paramsIn.orig_kernel_size = static_cast(pKernel->getKernelHeapSize()); paramsIn.buffer_type = GTPIN_BUFFER_BINDFULL; paramsIn.buffer_desc.BTI = static_cast(gtpinBTI); paramsIn.igc_hash_id = kernelInfo.shaderHashCode; paramsIn.kernel_name = (char *)kernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str(); paramsIn.igc_info = kernelInfo.igcInfoForGtpin; if (kernelInfo.debugData.vIsa != nullptr) { paramsIn.debug_data = kernelInfo.debugData.vIsa; paramsIn.debug_data_size = static_cast(kernelInfo.debugData.vIsaSize); } else { paramsIn.debug_data = nullptr; paramsIn.debug_data_size = 0; } instrument_params_out_t paramsOut = {0}; (*GTPinCallbacks.onKernelCreate)((context_handle_t)(cl_context)context, ¶msIn, ¶msOut); // Substitute ISA of created kernel with instrumented code pKernel->substituteKernelHeap(paramsOut.inst_kernel_binary, paramsOut.inst_kernel_size); pKernel->setKernelId(paramsOut.kernel_id); } } void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue) { if (isGTPinInitialized) { auto pCmdQ = reinterpret_cast(pCmdQueue); auto &device = pCmdQ->getDevice(); auto rootDeviceIndex = device.getRootDeviceIndex(); auto pMultiDeviceKernel = castToObjectOrAbort(kernel); auto pKernel = pMultiDeviceKernel->getKernel(rootDeviceIndex); if (pKernel->getSurfaceStateHeapSize() == 0) { // Kernel with no SSH, not supported return; } Context *pContext = &(pKernel->getContext()); cl_context context = (cl_context)pContext; uint64_t kernelId = pKernel->getKernelId(); command_buffer_handle_t commandBuffer = (command_buffer_handle_t)((uintptr_t)(sequenceCount++)); uint32_t kernelOffset = 0; resource_handle_t resource = 0; // Notify GT-Pin that abstract "command buffer" was created (*GTPinCallbacks.onCommandBufferCreate)((context_handle_t)context, commandBuffer); // Notify GT-Pin that kernel was submited for execution (*GTPinCallbacks.onKernelSubmit)(commandBuffer, kernelId, &kernelOffset, &resource); // Create new record in Kernel Execution Queue describing submited kernel pKernel->setStartOffset(kernelOffset); gtpinkexec_t kExec; kExec.pKernel = pKernel; kExec.gtpinResource = (cl_mem)resource; kExec.commandBuffer = commandBuffer; kExec.pCommandQueue = (CommandQueue *)pCmdQueue; std::unique_lock lock{kernelExecQueueLock}; kernelExecQueue.push_back(kExec); lock.unlock(); // Patch SSH[gtpinBTI] with GT-Pin resource if (!resource) { return; } GTPinHwHelper >pinHelper = GTPinHwHelper::get(device.getHardwareInfo().platform.eRenderCoreFamily); size_t gtpinBTI = pKernel->getNumberOfBindingTableStates() - 1; void *pSurfaceState = gtpinHelper.getSurfaceState(pKernel, gtpinBTI); if (gtpinHelper.canUseSharedAllocation(device.getHardwareInfo())) { auto allocData = reinterpret_cast(resource); auto gpuAllocation = allocData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex); size_t size = gpuAllocation->getUnderlyingBufferSize(); Buffer::setSurfaceState(&device, pSurfaceState, false, false, size, gpuAllocation->getUnderlyingBuffer(), 0, gpuAllocation, 0, 0, pKernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, pContext->getNumDevices()); device.getMemoryManager()->getPageFaultManager()->moveAllocationToGpuDomain(reinterpret_cast(gpuAllocation->getGpuAddress())); } else { cl_mem buffer = (cl_mem)resource; auto pBuffer = castToObjectOrAbort(buffer); pBuffer->setArgStateful(pSurfaceState, false, false, false, false, device, pKernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, pContext->getNumDevices()); } } } void gtpinNotifyPreFlushTask(void *pCmdQueue) { if (isGTPinInitialized) { pCmdQueueForFlushTask = (CommandQueue *)pCmdQueue; } } void gtpinNotifyFlushTask(uint32_t flushedTaskCount) { if (isGTPinInitialized) { std::unique_lock lock{kernelExecQueueLock}; size_t numElems = kernelExecQueue.size(); for (size_t n = 0; n < numElems; n++) { if ((kernelExecQueue[n].pCommandQueue == pCmdQueueForFlushTask) && !kernelExecQueue[n].isTaskCountValid) { // Update record in Kernel Execution Queue with kernel's TC kernelExecQueue[n].isTaskCountValid = true; kernelExecQueue[n].taskCount = flushedTaskCount; break; } } pCmdQueueForFlushTask = nullptr; } } void gtpinNotifyTaskCompletion(uint32_t completedTaskCount) { if (isGTPinInitialized) { std::unique_lock lock{kernelExecQueueLock}; size_t numElems = kernelExecQueue.size(); for (size_t n = 0; n < numElems;) { if (kernelExecQueue[n].isTaskCountValid && (kernelExecQueue[n].taskCount <= completedTaskCount)) { // Notify GT-Pin that execution of "command buffer" was completed (*GTPinCallbacks.onCommandBufferComplete)(kernelExecQueue[n].commandBuffer); // Remove kernel's record from Kernel Execution Queue kernelExecQueue.erase(kernelExecQueue.begin() + n); numElems--; } else { n++; } } } } void gtpinNotifyMakeResident(void *pKernel, void *pCSR) { if (isGTPinInitialized) { std::unique_lock lock{kernelExecQueueLock}; size_t numElems = kernelExecQueue.size(); for (size_t n = 0; n < numElems; n++) { if ((kernelExecQueue[n].pKernel == pKernel) && !kernelExecQueue[n].isResourceResident && kernelExecQueue[n].gtpinResource) { // It's time for kernel to make resident its GT-Pin resource CommandStreamReceiver *pCommandStreamReceiver = reinterpret_cast(pCSR); GraphicsAllocation *pGfxAlloc = nullptr; Context &context = static_cast(pKernel)->getContext(); GTPinHwHelper >pinHelper = GTPinHwHelper::get(context.getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily); if (gtpinHelper.canUseSharedAllocation(context.getDevice(0)->getHardwareInfo())) { auto allocData = reinterpret_cast(kernelExecQueue[n].gtpinResource); pGfxAlloc = allocData->gpuAllocations.getGraphicsAllocation(pCommandStreamReceiver->getRootDeviceIndex()); } else { cl_mem gtpinBuffer = kernelExecQueue[n].gtpinResource; auto pBuffer = castToObjectOrAbort(gtpinBuffer); pGfxAlloc = pBuffer->getGraphicsAllocation(pCommandStreamReceiver->getRootDeviceIndex()); } pCommandStreamReceiver->makeResident(*pGfxAlloc); kernelExecQueue[n].isResourceResident = true; break; } } } } void gtpinNotifyUpdateResidencyList(void *pKernel, void *pResVec) { if (isGTPinInitialized) { std::unique_lock lock{kernelExecQueueLock}; size_t numElems = kernelExecQueue.size(); for (size_t n = 0; n < numElems; n++) { if ((kernelExecQueue[n].pKernel == pKernel) && !kernelExecQueue[n].isResourceResident && kernelExecQueue[n].gtpinResource) { // It's time for kernel to update its residency list with its GT-Pin resource std::vector *pResidencyVector = (std::vector *)pResVec; cl_mem gtpinBuffer = kernelExecQueue[n].gtpinResource; auto pBuffer = castToObjectOrAbort(gtpinBuffer); auto rootDeviceIndex = kernelExecQueue[n].pCommandQueue->getDevice().getRootDeviceIndex(); GraphicsAllocation *pGfxAlloc = pBuffer->getGraphicsAllocation(rootDeviceIndex); GeneralSurface *pSurface = new GeneralSurface(pGfxAlloc); pResidencyVector->push_back(pSurface); kernelExecQueue[n].isResourceResident = true; break; } } } } void gtpinNotifyPlatformShutdown() { if (isGTPinInitialized) { // Clear Kernel Execution Queue kernelExecQueue.clear(); } } void *gtpinGetIgcInit() { return pIgcInit; } void gtpinSetIgcInit(void *pIgcInitPtr) { pIgcInit = static_cast(pIgcInitPtr); } void gtpinRemoveCommandQueue(void *pCmdQueue) { if (isGTPinInitialized) { std::unique_lock lock{kernelExecQueueLock}; size_t n = 0; while (n < kernelExecQueue.size()) { if (kernelExecQueue[n].pCommandQueue == pCmdQueue) { kernelExecQueue.erase(kernelExecQueue.begin() + n); } else { n++; } } } } } // namespace NEO compute-runtime-22.14.22890/opencl/source/gtpin/gtpin_defs.h000066400000000000000000000015001422164147700235420ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/kernel/kernel.h" #include "CL/cl.h" #include "ocl_igc_shared/gtpin/gtpin_ocl_interface.h" namespace NEO { struct GTPinKernelExec { Kernel *pKernel; cl_mem gtpinResource; CommandQueue *pCommandQueue; gtpin::command_buffer_handle_t commandBuffer; uint32_t taskCount; bool isTaskCountValid; bool isResourceResident; GTPinKernelExec() { pKernel = nullptr; gtpinResource = nullptr; pCommandQueue = nullptr; commandBuffer = nullptr; taskCount = 0; isTaskCountValid = false; isResourceResident = false; } }; typedef struct GTPinKernelExec gtpinkexec_t; } // namespace NEO compute-runtime-22.14.22890/opencl/source/gtpin/gtpin_helpers.cpp000066400000000000000000000111511422164147700246210ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "gtpin_helpers.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "opencl/source/api/api.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/context/context.h" #include "opencl/source/gtpin/gtpin_hw_helper.h" #include "opencl/source/helpers/cl_validators.h" #include "opencl/source/mem_obj/buffer.h" #include "CL/cl.h" #include "ocl_igc_shared/gtpin/gtpin_ocl_interface.h" using namespace gtpin; namespace NEO { GTPIN_DI_STATUS GTPIN_DRIVER_CALLCONV gtpinCreateBuffer(context_handle_t context, uint32_t reqSize, resource_handle_t *pResource) { cl_int diag = CL_SUCCESS; Context *pContext = castToObject((cl_context)context); if ((pContext == nullptr) || (pResource == nullptr)) { return GTPIN_DI_ERROR_INVALID_ARGUMENT; } size_t size = alignUp(reqSize, MemoryConstants::cacheLineSize); GTPinHwHelper >pinHelper = GTPinHwHelper::get(pContext->getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily); if (gtpinHelper.canUseSharedAllocation(pContext->getDevice(0)->getHardwareInfo())) { void *unifiedMemorySharedAllocation = clSharedMemAllocINTEL(pContext, pContext->getDevice(0), 0, size, 0, &diag); auto allocationsManager = pContext->getSVMAllocsManager(); auto graphicsAllocation = allocationsManager->getSVMAlloc(unifiedMemorySharedAllocation); *pResource = (resource_handle_t)graphicsAllocation; } else { void *hostPtr = pContext->getMemoryManager()->allocateSystemMemory(size, MemoryConstants::pageSize); if (hostPtr == nullptr) { return GTPIN_DI_ERROR_ALLOCATION_FAILED; } cl_mem buffer = Buffer::create(pContext, CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE | CL_MEM_FORCE_HOST_MEMORY_INTEL, size, hostPtr, diag); *pResource = (resource_handle_t)buffer; } return GTPIN_DI_SUCCESS; } GTPIN_DI_STATUS GTPIN_DRIVER_CALLCONV gtpinFreeBuffer(context_handle_t context, resource_handle_t resource) { Context *pContext = castToObject((cl_context)context); if ((pContext == nullptr) || (resource == nullptr)) { return GTPIN_DI_ERROR_INVALID_ARGUMENT; } GTPinHwHelper >pinHelper = GTPinHwHelper::get(pContext->getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily); if (gtpinHelper.canUseSharedAllocation(pContext->getDevice(0)->getHardwareInfo())) { auto allocData = reinterpret_cast(resource); clMemFreeINTEL(pContext, allocData->cpuAllocation->getUnderlyingBuffer()); } else { auto pMemObj = castToObject(resource); if (pMemObj == nullptr) { return GTPIN_DI_ERROR_INVALID_ARGUMENT; } alignedFree(pMemObj->getHostPtr()); pMemObj->release(); } return GTPIN_DI_SUCCESS; } GTPIN_DI_STATUS GTPIN_DRIVER_CALLCONV gtpinMapBuffer(context_handle_t context, resource_handle_t resource, uint8_t **pAddress) { cl_mem buffer = (cl_mem)resource; Context *pContext = castToObject((cl_context)context); if ((pContext == nullptr) || (buffer == nullptr) || (pAddress == nullptr)) { return GTPIN_DI_ERROR_INVALID_ARGUMENT; } GTPinHwHelper >pinHelper = GTPinHwHelper::get(pContext->getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily); if (gtpinHelper.canUseSharedAllocation(pContext->getDevice(0)->getHardwareInfo())) { auto allocData = reinterpret_cast(resource); *pAddress = reinterpret_cast(allocData->cpuAllocation->getUnderlyingBuffer()); } else { auto pMemObj = castToObject(buffer); if (pMemObj == nullptr) { return GTPIN_DI_ERROR_INVALID_ARGUMENT; } *pAddress = reinterpret_cast(pMemObj->getHostPtr()); } return GTPIN_DI_SUCCESS; } GTPIN_DI_STATUS GTPIN_DRIVER_CALLCONV gtpinUnmapBuffer(context_handle_t context, resource_handle_t resource) { Context *pContext = castToObject((cl_context)context); if ((pContext == nullptr) || (resource == nullptr)) { return GTPIN_DI_ERROR_INVALID_ARGUMENT; } GTPinHwHelper >pinHelper = GTPinHwHelper::get(pContext->getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily); if (!gtpinHelper.canUseSharedAllocation(pContext->getDevice(0)->getHardwareInfo())) { auto pMemObj = castToObject(resource); if (pMemObj == nullptr) { return GTPIN_DI_ERROR_INVALID_ARGUMENT; } } return GTPIN_DI_SUCCESS; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/gtpin/gtpin_helpers.h000066400000000000000000000013621422164147700242710ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "ocl_igc_shared/gtpin/gtpin_driver_common.h" namespace NEO { gtpin::GTPIN_DI_STATUS GTPIN_DRIVER_CALLCONV gtpinCreateBuffer(gtpin::context_handle_t context, uint32_t size, gtpin::resource_handle_t *pResource); gtpin::GTPIN_DI_STATUS GTPIN_DRIVER_CALLCONV gtpinFreeBuffer(gtpin::context_handle_t context, gtpin::resource_handle_t resource); gtpin::GTPIN_DI_STATUS GTPIN_DRIVER_CALLCONV gtpinMapBuffer(gtpin::context_handle_t context, gtpin::resource_handle_t resource, uint8_t **pAddress); gtpin::GTPIN_DI_STATUS GTPIN_DRIVER_CALLCONV gtpinUnmapBuffer(gtpin::context_handle_t context, gtpin::resource_handle_t resource); } // namespace NEO compute-runtime-22.14.22890/opencl/source/gtpin/gtpin_hw_helper.cpp000066400000000000000000000005231422164147700251350ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/gtpin/gtpin_hw_helper.h" namespace NEO { GTPinHwHelper *gtpinHwHelperFactory[IGFX_MAX_CORE] = {}; GTPinHwHelper >PinHwHelper::get(GFXCORE_FAMILY gfxCore) { return *gtpinHwHelperFactory[gfxCore]; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/gtpin/gtpin_hw_helper.h000066400000000000000000000020141422164147700245770ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "hw_cmds.h" namespace NEO { class Kernel; class GTPinHwHelper { public: static GTPinHwHelper &get(GFXCORE_FAMILY gfxCore); virtual uint32_t getGenVersion() = 0; virtual bool addSurfaceState(Kernel *pKernel) = 0; virtual void *getSurfaceState(Kernel *pKernel, size_t bti) = 0; virtual bool canUseSharedAllocation(const HardwareInfo &hwInfo) const = 0; protected: GTPinHwHelper(){}; }; template class GTPinHwHelperHw : public GTPinHwHelper { public: static GTPinHwHelper &get() { static GTPinHwHelperHw gtpinHwHelper; return gtpinHwHelper; } uint32_t getGenVersion() override; bool addSurfaceState(Kernel *pKernel) override; void *getSurfaceState(Kernel *pKernel, size_t bti) override; bool canUseSharedAllocation(const HardwareInfo &hwInfo) const override; protected: GTPinHwHelperHw(){}; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/gtpin/gtpin_hw_helper.inl000066400000000000000000000050471422164147700251430ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/string.h" #include "opencl/source/gtpin/gtpin_hw_helper.h" #include "opencl/source/kernel/kernel.h" #include "hw_cmds.h" namespace NEO { template bool GTPinHwHelperHw::addSurfaceState(Kernel *pKernel) { using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE; using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE; size_t sshSize = pKernel->getSurfaceStateHeapSize(); if (sshSize == 0) { // Kernels which do not use SSH or use Execution Model are not supported (yet) return false; } size_t ssSize = sizeof(RENDER_SURFACE_STATE); size_t btsSize = sizeof(BINDING_TABLE_STATE); size_t sizeToEnlarge = ssSize + btsSize; size_t currBTOffset = pKernel->getBindingTableOffset(); size_t currSurfaceStateSize = currBTOffset; char *pSsh = static_cast(pKernel->getSurfaceStateHeap()); char *pNewSsh = new char[sshSize + sizeToEnlarge]; memcpy_s(pNewSsh, sshSize + sizeToEnlarge, pSsh, currSurfaceStateSize); RENDER_SURFACE_STATE *pSS = reinterpret_cast(pNewSsh + currSurfaceStateSize); *pSS = GfxFamily::cmdInitRenderSurfaceState; size_t newSurfaceStateSize = currSurfaceStateSize + ssSize; size_t currBTCount = pKernel->getNumberOfBindingTableStates(); memcpy_s(pNewSsh + newSurfaceStateSize, sshSize + sizeToEnlarge - newSurfaceStateSize, pSsh + currBTOffset, currBTCount * btsSize); BINDING_TABLE_STATE *pNewBTS = reinterpret_cast(pNewSsh + newSurfaceStateSize + currBTCount * btsSize); *pNewBTS = GfxFamily::cmdInitBindingTableState; pNewBTS->setSurfaceStatePointer((uint64_t)currBTOffset); pKernel->resizeSurfaceStateHeap(pNewSsh, sshSize + sizeToEnlarge, currBTCount + 1, newSurfaceStateSize); return true; } template void *GTPinHwHelperHw::getSurfaceState(Kernel *pKernel, size_t bti) { using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE; if ((nullptr == pKernel->getSurfaceStateHeap()) || (bti >= pKernel->getNumberOfBindingTableStates())) { return nullptr; } auto *pBts = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(), (pKernel->getBindingTableOffset() + bti * sizeof(BINDING_TABLE_STATE)))); auto pSurfaceState = ptrOffset(pKernel->getSurfaceStateHeap(), pBts->getSurfaceStatePointer()); return pSurfaceState; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/gtpin/gtpin_hw_helper_bdw_and_later.inl000066400000000000000000000012451422164147700300040ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "opencl/source/gtpin/gtpin_hw_helper.h" namespace NEO { template bool GTPinHwHelperHw::canUseSharedAllocation(const HardwareInfo &hwInfo) const { bool canUseSharedAllocation = false; if (DebugManager.flags.GTPinAllocateBufferInSharedMemory.get() != -1) { canUseSharedAllocation = !!DebugManager.flags.GTPinAllocateBufferInSharedMemory.get(); } canUseSharedAllocation &= hwInfo.capabilityTable.ftrSvm; return canUseSharedAllocation; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/gtpin/gtpin_hw_helper_xehp_and_later.inl000066400000000000000000000011471422164147700301750ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "opencl/source/gtpin/gtpin_hw_helper.h" namespace NEO { template bool GTPinHwHelperHw::canUseSharedAllocation(const HardwareInfo &hwInfo) const { bool canUseSharedAllocation = true; if (DebugManager.flags.GTPinAllocateBufferInSharedMemory.get() != -1) { canUseSharedAllocation = !!DebugManager.flags.GTPinAllocateBufferInSharedMemory.get(); } return canUseSharedAllocation; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/gtpin/gtpin_init.cpp000066400000000000000000000052231422164147700241250ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "gtpin_init.h" #include "shared/source/device/device.h" #include "opencl/source/platform/platform.h" #include "CL/cl.h" #include "gtpin_helpers.h" using namespace gtpin; using namespace NEO; namespace NEO { bool isGTPinInitialized = false; gtpin::ocl::gtpin_events_t GTPinCallbacks = {0}; } // namespace NEO // Do not change this code, needed to avoid compiler optimization that breaks GTPin_Init // Beginning void passCreateBuffer(BufferAllocateFPTR src, BufferAllocateFPTR &dst) { dst = src; } void passFreeBuffer(BufferDeallocateFPTR src, BufferDeallocateFPTR &dst) { dst = src; } void passMapBuffer(BufferMapFPTR src, BufferMapFPTR &dst) { dst = src; } void passUnMapBuffer(BufferUnMapFPTR src, BufferUnMapFPTR &dst) { dst = src; } // End of WA GTPIN_DI_STATUS GTPin_Init(gtpin::ocl::gtpin_events_t *pGtpinEvents, driver_services_t *pDriverServices, interface_version_t *pDriverVersion) { if (isGTPinInitialized) { return GTPIN_DI_ERROR_INSTANCE_ALREADY_CREATED; } if (pDriverVersion != nullptr) { // GT-Pin is asking to obtain GT-Pin Interface version that is supported pDriverVersion->common = gtpin::GTPIN_COMMON_INTERFACE_VERSION; pDriverVersion->specific = gtpin::ocl::GTPIN_OCL_INTERFACE_VERSION; if ((pDriverServices == nullptr) || (pGtpinEvents == nullptr)) { return GTPIN_DI_SUCCESS; } } if ((pDriverServices == nullptr) || (pGtpinEvents == nullptr)) { return GTPIN_DI_ERROR_INVALID_ARGUMENT; } if ((pGtpinEvents->onContextCreate == nullptr) || (pGtpinEvents->onContextDestroy == nullptr) || (pGtpinEvents->onKernelCreate == nullptr) || (pGtpinEvents->onKernelSubmit == nullptr) || (pGtpinEvents->onCommandBufferCreate == nullptr) || (pGtpinEvents->onCommandBufferComplete == nullptr)) { return GTPIN_DI_ERROR_INVALID_ARGUMENT; } // Do not change this code, needed to avoid compiler optimization that breaks GTPin_Init // Beginning auto createBuffer = NEO::gtpinCreateBuffer; auto freeBuffer = NEO::gtpinFreeBuffer; auto mapBuffer = NEO::gtpinMapBuffer; auto unMapBuffer = NEO::gtpinUnmapBuffer; passCreateBuffer(createBuffer, pDriverServices->bufferAllocate); passFreeBuffer(freeBuffer, pDriverServices->bufferDeallocate); passMapBuffer(mapBuffer, pDriverServices->bufferMap); passUnMapBuffer(unMapBuffer, pDriverServices->bufferUnMap); // End of WA GTPinCallbacks = *pGtpinEvents; isGTPinInitialized = true; return GTPIN_DI_SUCCESS; } compute-runtime-22.14.22890/opencl/source/gtpin/gtpin_init.h000066400000000000000000000006111422164147700235660ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "ocl_igc_shared/gtpin/gtpin_ocl_interface.h" #ifdef __cplusplus extern "C" { #endif gtpin::GTPIN_DI_STATUS GTPin_Init(gtpin::ocl::gtpin_events_t *pGtpinEvents, gtpin::driver_services_t *pDriverServices, gtpin::interface_version_t *pDriverVersion); #ifdef __cplusplus } #endif compute-runtime-22.14.22890/opencl/source/gtpin/gtpin_notify.h000066400000000000000000000017001422164147700241330ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "CL/cl.h" #include namespace NEO { extern bool isGTPinInitialized; void gtpinNotifyContextCreate(cl_context context); void gtpinNotifyContextDestroy(cl_context context); void gtpinNotifyKernelCreate(cl_kernel kernel); void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue); void gtpinNotifyPreFlushTask(void *pCmdQueue); void gtpinNotifyFlushTask(uint32_t flushedTaskCount); void gtpinNotifyTaskCompletion(uint32_t completedTaskCount); void gtpinNotifyMakeResident(void *pKernel, void *pCommandStreamReceiver); void gtpinNotifyUpdateResidencyList(void *pKernel, void *pResidencyVector); void gtpinNotifyPlatformShutdown(); inline bool gtpinIsGTPinInitialized() { return isGTPinInitialized; } void *gtpinGetIgcInit(); void gtpinSetIgcInit(void *pIgcInitPtr); void gtpinRemoveCommandQueue(void *pCmdQueue); } // namespace NEO compute-runtime-22.14.22890/opencl/source/guidelines/000077500000000000000000000000001422164147700222625ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/guidelines/CMakeLists.txt000066400000000000000000000004671422164147700250310ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_CODING_GUIDELINES ${NEO_SOURCE_DIR}/GUIDELINES.md ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_CODING_GUIDELINES}) set_property(GLOBAL PROPERTY RUNTIME_CODING_GUIDELINES ${RUNTIME_CODING_GUIDELINES}) compute-runtime-22.14.22890/opencl/source/helpers/000077500000000000000000000000001422164147700215745ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/helpers/CMakeLists.txt000066400000000000000000000062361422164147700243430ustar00rootroot00000000000000# # Copyright (C) 2018-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_HELPERS_BASE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/base_object.cpp ${CMAKE_CURRENT_SOURCE_DIR}/base_object.h ${CMAKE_CURRENT_SOURCE_DIR}/cl_blit_properties.h ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}cl_device_helpers.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_device_helpers.h ${CMAKE_CURRENT_SOURCE_DIR}/cl_helper.h ${CMAKE_CURRENT_SOURCE_DIR}/cl_hw_helper.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_hw_helper.h ${CMAKE_CURRENT_SOURCE_DIR}/cl_hw_helper_base.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_hw_helper_bdw_and_later.inl ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}cl_memory_properties_helpers.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_memory_properties_helpers.h ${CMAKE_CURRENT_SOURCE_DIR}/cl_memory_properties_helpers_base.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_preemption_helper.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_preemption_helper.h ${CMAKE_CURRENT_SOURCE_DIR}/cl_validators.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_validators.h ${CMAKE_CURRENT_SOURCE_DIR}/convert_color.h ${CMAKE_CURRENT_SOURCE_DIR}/destructor_callbacks.h ${CMAKE_CURRENT_SOURCE_DIR}/dispatch_info.cpp ${CMAKE_CURRENT_SOURCE_DIR}/dispatch_info.h ${CMAKE_CURRENT_SOURCE_DIR}/dispatch_info_builder.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_properties.h ${CMAKE_CURRENT_SOURCE_DIR}/error_mappers.h ${CMAKE_CURRENT_SOURCE_DIR}/get_info_status_mapper.h ${CMAKE_CURRENT_SOURCE_DIR}/gmm_types_converter.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gmm_types_converter.h ${CMAKE_CURRENT_SOURCE_DIR}/hardware_commands_helper.h ${CMAKE_CURRENT_SOURCE_DIR}/hardware_commands_helper_base.inl ${CMAKE_CURRENT_SOURCE_DIR}/hardware_commands_helper_bdw_and_later.inl ${CMAKE_CURRENT_SOURCE_DIR}/helper_options.cpp ${CMAKE_CURRENT_SOURCE_DIR}/implicit_scaling_ocl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mipmap.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mipmap.h ${CMAKE_CURRENT_SOURCE_DIR}/properties_helper.h ${CMAKE_CURRENT_SOURCE_DIR}/properties_helper.cpp ${CMAKE_CURRENT_SOURCE_DIR}/queue_helpers.h ${CMAKE_CURRENT_SOURCE_DIR}/sampler_helpers.h ${CMAKE_CURRENT_SOURCE_DIR}/surface_formats.cpp ${CMAKE_CURRENT_SOURCE_DIR}/surface_formats.h ${CMAKE_CURRENT_SOURCE_DIR}/task_information.cpp ${CMAKE_CURRENT_SOURCE_DIR}/task_information.h ${CMAKE_CURRENT_SOURCE_DIR}/task_information.inl ) if(SUPPORT_XEHP_AND_LATER) list(APPEND RUNTIME_SRCS_HELPERS_BASE ${CMAKE_CURRENT_SOURCE_DIR}/cl_hw_helper_xehp_and_later.inl ${CMAKE_CURRENT_SOURCE_DIR}/hardware_commands_helper_xehp_and_later.inl ) endif() if(SUPPORT_PVC_AND_LATER) list(APPEND RUNTIME_SRCS_HELPERS_BASE ${CMAKE_CURRENT_SOURCE_DIR}/cl_hw_helper_pvc_and_later.inl) endif() set(RUNTIME_SRCS_HELPERS_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/windows/gl_helper.h ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_HELPERS_BASE}) if(WIN32) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_HELPERS_WINDOWS}) endif() set_property(GLOBAL PROPERTY RUNTIME_SRCS_HELPERS_BASE ${RUNTIME_SRCS_HELPERS_BASE}) add_subdirectories() compute-runtime-22.14.22890/opencl/source/helpers/api_specific_config_ocl.cpp000066400000000000000000000017621422164147700271060ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/api_specific_config.h" #include "opencl/source/os_interface/ocl_reg_path.h" namespace NEO { bool ApiSpecificConfig::isStatelessCompressionSupported() { return true; } bool ApiSpecificConfig::getHeapConfiguration() { return false; } bool ApiSpecificConfig::getBindlessConfiguration() { if (DebugManager.flags.UseBindlessMode.get() != -1) { return DebugManager.flags.UseBindlessMode.get(); } else { return false; } } ApiSpecificConfig::ApiType ApiSpecificConfig::getApiType() { return ApiSpecificConfig::OCL; } std::string ApiSpecificConfig::getName() { return "ocl"; } uint64_t ApiSpecificConfig::getReducedMaxAllocSize(uint64_t maxAllocSize) { return maxAllocSize / 2; } const char *ApiSpecificConfig::getRegistryPath() { return oclRegPath; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/helpers/base_object.cpp000066400000000000000000000003211422164147700245340ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/base_object.h" namespace NEO { std::thread::id invalidThreadID; } // namespace NEO compute-runtime-22.14.22890/opencl/source/helpers/base_object.h000066400000000000000000000133711422164147700242120ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/abort.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/utilities/reference_tracked_object.h" #include "opencl/source/api/dispatch.h" #include "CL/cl.h" #include #include #include #include #include namespace NEO { #if defined(__clang__) #define NO_SANITIZE __attribute__((no_sanitize("undefined"))) #else #define NO_SANITIZE #endif template struct OpenCLObjectMapper { }; template using DerivedType_t = typename OpenCLObjectMapper::DerivedType; template NO_SANITIZE inline DerivedType *castToObject(typename DerivedType::BaseType *object) { if (object == nullptr) { return nullptr; } auto derivedObject = static_cast(object); if (((derivedObject->getMagic() & DerivedType::maskMagic) == DerivedType::objectMagic) && (derivedObject->dispatch.icdDispatch == &icdGlobalDispatchTable)) { return derivedObject; } return nullptr; } template inline DerivedType *castToObjectOrAbort(typename DerivedType::BaseType *object) { auto derivedObject = castToObject(object); if (derivedObject == nullptr) { abortExecution(); } else { return derivedObject; } } template inline const DerivedType *castToObject(const typename DerivedType::BaseType *object) { return castToObject(const_cast(object)); } template inline DerivedType *castToObject(const void *object) { cl_mem clMem = const_cast(static_cast(object)); return castToObject(clMem); } extern std::thread::id invalidThreadID; class ConditionVariableWithCounter { public: ConditionVariableWithCounter() { waitersCount = 0; } template void wait(Args &&...args) { ++waitersCount; cond.wait(std::forward(args)...); --waitersCount; } void notify_one() { // NOLINT cond.notify_one(); } uint32_t peekNumWaiters() { return waitersCount.load(); } private: std::atomic_uint waitersCount; std::condition_variable cond; }; template class TakeOwnershipWrapper { public: TakeOwnershipWrapper(T &obj) : obj(obj) { lock(); } TakeOwnershipWrapper(T &obj, bool lockImmediately) : obj(obj) { if (lockImmediately) { lock(); } } ~TakeOwnershipWrapper() { unlock(); } void unlock() { if (locked) { obj.releaseOwnership(); locked = false; } } void lock() { if (!locked) { obj.takeOwnership(); locked = true; } } private: T &obj; bool locked = false; }; // This class should act as a base class for all CL objects. It will handle the // MT safe and reference things for every CL object. template class BaseObject : public B, public ReferenceTrackedObject> { public: typedef BaseObject ThisType; typedef B BaseType; typedef DerivedType_t DerivedType; const static cl_ulong maskMagic = 0xFFFFFFFFFFFFFFFFLL; const static cl_ulong deadMagic = 0xFFFFFFFFFFFFFFFFLL; BaseObject(const BaseObject &) = delete; BaseObject &operator=(const BaseObject &) = delete; protected: cl_long magic; mutable std::mutex mtx; mutable ConditionVariableWithCounter cond; mutable std::thread::id owner; mutable uint32_t recursiveOwnageCounter = 0; BaseObject() : magic(DerivedType::objectMagic) { this->incRefApi(); } ~BaseObject() override { magic = deadMagic; } bool isValid() const { return (magic & DerivedType::maskMagic) == DerivedType::objectMagic; } void convertToInternalObject() { this->incRefInternal(); this->decRefApi(); } public: NO_SANITIZE cl_ulong getMagic() const { return this->magic; } virtual void retain() { DEBUG_BREAK_IF(!isValid()); this->incRefApi(); } virtual unique_ptr_if_unused release() { DEBUG_BREAK_IF(!isValid()); return this->decRefApi(); } cl_int getReference() const { DEBUG_BREAK_IF(!isValid()); return this->getRefApiCount(); } MOCKABLE_VIRTUAL void takeOwnership() const { DEBUG_BREAK_IF(!isValid()); std::unique_lock theLock(mtx); std::thread::id self = std::this_thread::get_id(); if (owner == invalidThreadID) { owner = self; return; } if (owner == self) { ++recursiveOwnageCounter; return; } cond.wait(theLock, [&] { return owner == invalidThreadID; }); owner = self; recursiveOwnageCounter = 0; } MOCKABLE_VIRTUAL void releaseOwnership() const { DEBUG_BREAK_IF(!isValid()); std::unique_lock theLock(mtx); if (hasOwnership() == false) { DEBUG_BREAK_IF(true); return; } if (recursiveOwnageCounter > 0) { --recursiveOwnageCounter; return; } owner = invalidThreadID; cond.notify_one(); } // checks whether current thread owns object mutex bool hasOwnership() const { DEBUG_BREAK_IF(!isValid()); return (owner == std::this_thread::get_id()); } ConditionVariableWithCounter &getCond() { return this->cond; } }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/helpers/cache_policy.cpp000066400000000000000000000012011422164147700247140ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/cache_policy.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/memory_manager/graphics_allocation.h" namespace NEO { bool isL3Capable(void *ptr, size_t size) { return isAligned(ptr) && isAligned(size); } bool isL3Capable(const NEO::GraphicsAllocation &graphicsAllocation) { return isL3Capable(graphicsAllocation.getUnderlyingBuffer(), graphicsAllocation.getUnderlyingBufferSize()); } } // namespace NEO compute-runtime-22.14.22890/opencl/source/helpers/cl_blit_properties.h000066400000000000000000000267021422164147700256400ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/blit_commands_helper.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/mem_obj/image.h" #include "CL/cl.h" namespace NEO { struct ClBlitProperties { static BlitProperties constructProperties(BlitterConstants::BlitDirection blitDirection, CommandStreamReceiver &commandStreamReceiver, const BuiltinOpParams &builtinOpParams) { auto rootDeviceIndex = commandStreamReceiver.getRootDeviceIndex(); auto clearColorAllocation = commandStreamReceiver.getClearColorAllocation(); BlitProperties blitProperties{}; if (BlitterConstants::BlitDirection::BufferToBuffer == blitDirection || BlitterConstants::BlitDirection::ImageToImage == blitDirection) { auto dstOffset = builtinOpParams.dstOffset.x; auto srcOffset = builtinOpParams.srcOffset.x; GraphicsAllocation *dstAllocation = nullptr; GraphicsAllocation *srcAllocation = nullptr; if (!builtinOpParams.dstSvmAlloc) { dstOffset += builtinOpParams.dstMemObj->getOffset(); srcOffset += builtinOpParams.srcMemObj->getOffset(); dstAllocation = builtinOpParams.dstMemObj->getGraphicsAllocation(rootDeviceIndex); srcAllocation = builtinOpParams.srcMemObj->getGraphicsAllocation(rootDeviceIndex); } else { dstAllocation = builtinOpParams.dstSvmAlloc; srcAllocation = builtinOpParams.srcSvmAlloc; dstOffset += ptrDiff(builtinOpParams.dstPtr, dstAllocation->getGpuAddress()); srcOffset += ptrDiff(builtinOpParams.srcPtr, srcAllocation->getGpuAddress()); } blitProperties = BlitProperties::constructPropertiesForCopy(dstAllocation, srcAllocation, {dstOffset, builtinOpParams.dstOffset.y, builtinOpParams.dstOffset.z}, {srcOffset, builtinOpParams.srcOffset.y, builtinOpParams.srcOffset.z}, builtinOpParams.size, builtinOpParams.srcRowPitch, builtinOpParams.srcSlicePitch, builtinOpParams.dstRowPitch, builtinOpParams.dstSlicePitch, clearColorAllocation); if (BlitterConstants::BlitDirection::ImageToImage == blitDirection) { blitProperties.blitDirection = blitDirection; setBlitPropertiesForImage(blitProperties, builtinOpParams); } return blitProperties; } GraphicsAllocation *gpuAllocation = nullptr; Vec3 copyOffset = 0; void *hostPtr = nullptr; Vec3 hostPtrOffset = 0; uint64_t memObjGpuVa = 0; uint64_t hostAllocGpuVa = 0; GraphicsAllocation *hostAllocation = builtinOpParams.transferAllocation; Vec3 copySize = 0; size_t hostRowPitch = 0; size_t hostSlicePitch = 0; size_t gpuRowPitch = 0; size_t gpuSlicePitch = 0; if (BlitterConstants::BlitDirection::HostPtrToBuffer == blitDirection || BlitterConstants::BlitDirection::HostPtrToImage == blitDirection) { // write buffer/image hostPtr = builtinOpParams.srcPtr; hostPtrOffset = builtinOpParams.srcOffset; copyOffset = builtinOpParams.dstOffset; memObjGpuVa = castToUint64(builtinOpParams.dstPtr); hostAllocGpuVa = castToUint64(builtinOpParams.srcPtr); if (builtinOpParams.dstSvmAlloc) { gpuAllocation = builtinOpParams.dstSvmAlloc; hostAllocation = builtinOpParams.srcSvmAlloc; } else { gpuAllocation = builtinOpParams.dstMemObj->getGraphicsAllocation(rootDeviceIndex); memObjGpuVa = (gpuAllocation->getGpuAddress() + builtinOpParams.dstMemObj->getOffset()); } hostRowPitch = builtinOpParams.srcRowPitch; hostSlicePitch = builtinOpParams.srcSlicePitch; gpuRowPitch = builtinOpParams.dstRowPitch; gpuSlicePitch = builtinOpParams.dstSlicePitch; copySize = builtinOpParams.size; } if (BlitterConstants::BlitDirection::BufferToHostPtr == blitDirection || BlitterConstants::BlitDirection::ImageToHostPtr == blitDirection) { // read buffer/image hostPtr = builtinOpParams.dstPtr; hostPtrOffset = builtinOpParams.dstOffset; copyOffset = builtinOpParams.srcOffset; memObjGpuVa = castToUint64(builtinOpParams.srcPtr); hostAllocGpuVa = castToUint64(builtinOpParams.dstPtr); if (builtinOpParams.srcSvmAlloc) { gpuAllocation = builtinOpParams.srcSvmAlloc; hostAllocation = builtinOpParams.dstSvmAlloc; } else { gpuAllocation = builtinOpParams.srcMemObj->getGraphicsAllocation(rootDeviceIndex); memObjGpuVa = (gpuAllocation->getGpuAddress() + builtinOpParams.srcMemObj->getOffset()); } hostRowPitch = builtinOpParams.dstRowPitch; hostSlicePitch = builtinOpParams.dstSlicePitch; gpuRowPitch = builtinOpParams.srcRowPitch; gpuSlicePitch = builtinOpParams.srcSlicePitch; copySize = builtinOpParams.size; } UNRECOVERABLE_IF(BlitterConstants::BlitDirection::HostPtrToBuffer != blitDirection && BlitterConstants::BlitDirection::BufferToHostPtr != blitDirection && BlitterConstants::BlitDirection::HostPtrToImage != blitDirection && BlitterConstants::BlitDirection::ImageToHostPtr != blitDirection); blitProperties = BlitProperties::constructPropertiesForReadWrite(blitDirection, commandStreamReceiver, gpuAllocation, hostAllocation, hostPtr, memObjGpuVa, hostAllocGpuVa, hostPtrOffset, copyOffset, copySize, hostRowPitch, hostSlicePitch, gpuRowPitch, gpuSlicePitch); if (BlitterConstants::BlitDirection::HostPtrToImage == blitDirection || BlitterConstants::BlitDirection::ImageToHostPtr == blitDirection) { setBlitPropertiesForImage(blitProperties, builtinOpParams); } return blitProperties; } static BlitterConstants::BlitDirection obtainBlitDirection(uint32_t commandType) { switch (commandType) { case CL_COMMAND_WRITE_BUFFER: case CL_COMMAND_WRITE_BUFFER_RECT: return BlitterConstants::BlitDirection::HostPtrToBuffer; case CL_COMMAND_READ_BUFFER: case CL_COMMAND_READ_BUFFER_RECT: return BlitterConstants::BlitDirection::BufferToHostPtr; case CL_COMMAND_COPY_BUFFER: case CL_COMMAND_COPY_BUFFER_RECT: case CL_COMMAND_SVM_MEMCPY: return BlitterConstants::BlitDirection::BufferToBuffer; case CL_COMMAND_WRITE_IMAGE: return BlitterConstants::BlitDirection::HostPtrToImage; case CL_COMMAND_READ_IMAGE: return BlitterConstants::BlitDirection::ImageToHostPtr; case CL_COMMAND_COPY_IMAGE: return BlitterConstants::BlitDirection::ImageToImage; default: UNRECOVERABLE_IF(true); } } static void adjustBlitPropertiesForImage(MemObj *memObj, BlitProperties &blitProperties, size_t &rowPitch, size_t &slicePitch, const bool isSource) { auto image = castToObject(memObj); const auto &imageDesc = image->getImageDesc(); auto image_width = imageDesc.image_width; auto image_height = imageDesc.image_height; auto image_depth = imageDesc.image_depth; if (imageDesc.image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) { image_depth = std::max(image_depth, imageDesc.image_array_size); } SurfaceOffsets surfaceOffsets; auto &gpuAddress = isSource ? blitProperties.srcGpuAddress : blitProperties.dstGpuAddress; auto &size = isSource ? blitProperties.srcSize : blitProperties.dstSize; auto ©Size = blitProperties.copySize; auto &bytesPerPixel = blitProperties.bytesPerPixel; auto &blitDirection = blitProperties.blitDirection; image->getSurfaceOffsets(surfaceOffsets); gpuAddress += surfaceOffsets.offset; size.x = image_width; size.y = image_height ? image_height : 1; size.z = image_depth ? image_depth : 1; bytesPerPixel = image->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes; rowPitch = imageDesc.image_row_pitch; slicePitch = imageDesc.image_slice_pitch; if (imageDesc.image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) { if (blitDirection == BlitterConstants::BlitDirection::HostPtrToImage) { blitDirection = BlitterConstants::BlitDirection::HostPtrToBuffer; } if (blitDirection == BlitterConstants::BlitDirection::ImageToHostPtr) { blitDirection = BlitterConstants::BlitDirection::BufferToHostPtr; } if (blitDirection == BlitterConstants::BlitDirection::ImageToImage) { blitDirection = BlitterConstants::BlitDirection::BufferToBuffer; } size.x *= bytesPerPixel; copySize.x *= bytesPerPixel; bytesPerPixel = 1; } } static void setBlitPropertiesForImage(BlitProperties &blitProperties, const BuiltinOpParams &builtinOpParams) { size_t srcRowPitch = builtinOpParams.srcRowPitch; size_t dstRowPitch = builtinOpParams.dstRowPitch; size_t srcSlicePitch = builtinOpParams.srcSlicePitch; size_t dstSlicePitch = builtinOpParams.dstSlicePitch; if (blitProperties.blitDirection == BlitterConstants::BlitDirection::ImageToHostPtr || blitProperties.blitDirection == BlitterConstants::BlitDirection::ImageToImage) { adjustBlitPropertiesForImage(builtinOpParams.srcMemObj, blitProperties, srcRowPitch, srcSlicePitch, true); } if (blitProperties.blitDirection == BlitterConstants::BlitDirection::HostPtrToImage || blitProperties.blitDirection == BlitterConstants::BlitDirection::ImageToImage) { adjustBlitPropertiesForImage(builtinOpParams.dstMemObj, blitProperties, dstRowPitch, dstSlicePitch, false); } blitProperties.srcRowPitch = srcRowPitch ? srcRowPitch : blitProperties.srcSize.x * blitProperties.bytesPerPixel; blitProperties.dstRowPitch = dstRowPitch ? dstRowPitch : blitProperties.dstSize.x * blitProperties.bytesPerPixel; blitProperties.srcSlicePitch = srcSlicePitch ? srcSlicePitch : blitProperties.srcSize.y * blitProperties.srcRowPitch; blitProperties.dstSlicePitch = dstSlicePitch ? dstSlicePitch : blitProperties.dstSize.y * blitProperties.dstRowPitch; } }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/helpers/cl_device_helpers.cpp000066400000000000000000000006701422164147700257420ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/cl_device_helpers.h" namespace NEO { void ClDeviceHelper::getExtraDeviceInfo(const ClDevice &clDevice, cl_device_info paramName, ClDeviceInfoParam ¶m, const void *&src, size_t &size, size_t &retSize) {} cl_device_feature_capabilities_intel ClDeviceHelper::getExtraCapabilities() { return 0; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/helpers/cl_device_helpers.h000066400000000000000000000010421422164147700254010ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/extensions/public/cl_ext_private.h" #include "CL/cl.h" #include namespace NEO { class ClDevice; struct ClDeviceInfoParam; namespace ClDeviceHelper { void getExtraDeviceInfo(const ClDevice &clDevice, cl_device_info paramName, ClDeviceInfoParam ¶m, const void *&src, size_t &size, size_t &retSize); cl_device_feature_capabilities_intel getExtraCapabilities(); }; // namespace ClDeviceHelper } // namespace NEO compute-runtime-22.14.22890/opencl/source/helpers/cl_helper.h000066400000000000000000000054771422164147700237170ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "CL/cl.h" #include "CL/cl_gl_ext.h" #include namespace NEO { inline const std::string cmdTypetoString(cl_command_type cmd) { switch (cmd) { case CL_COMMAND_NDRANGE_KERNEL: return "CL_COMMAND_NDRANGE_KERNEL"; case CL_COMMAND_TASK: return "CL_COMMAND_TASK"; case CL_COMMAND_NATIVE_KERNEL: return "CL_COMMAND_NATIVE_KERNEL"; case CL_COMMAND_READ_BUFFER: return "CL_COMMAND_READ_BUFFER"; case CL_COMMAND_WRITE_BUFFER: return "CL_COMMAND_WRITE_BUFFER"; case CL_COMMAND_COPY_BUFFER: return "CL_COMMAND_COPY_BUFFER"; case CL_COMMAND_READ_IMAGE: return "CL_COMMAND_READ_IMAGE"; case CL_COMMAND_WRITE_IMAGE: return "CL_COMMAND_WRITE_IMAGE"; case CL_COMMAND_COPY_IMAGE: return "CL_COMMAND_COPY_IMAGE"; case CL_COMMAND_COPY_IMAGE_TO_BUFFER: return "CL_COMMAND_COPY_IMAGE_TO_BUFFER"; case CL_COMMAND_COPY_BUFFER_TO_IMAGE: return "CL_COMMAND_COPY_BUFFER_TO_IMAGE"; case CL_COMMAND_MAP_BUFFER: return "CL_COMMAND_MAP_BUFFER"; case CL_COMMAND_MAP_IMAGE: return "CL_COMMAND_MAP_IMAGE"; case CL_COMMAND_UNMAP_MEM_OBJECT: return "CL_COMMAND_UNMAP_MEM_OBJECT"; case CL_COMMAND_MARKER: return "CL_COMMAND_MARKER"; case CL_COMMAND_ACQUIRE_GL_OBJECTS: return "CL_COMMAND_ACQUIRE_GL_OBJECTS"; case CL_COMMAND_RELEASE_GL_OBJECTS: return "CL_COMMAND_RELEASE_GL_OBJECTS"; case CL_COMMAND_READ_BUFFER_RECT: return "CL_COMMAND_READ_BUFFER_RECT"; case CL_COMMAND_WRITE_BUFFER_RECT: return "CL_COMMAND_WRITE_BUFFER_RECT"; case CL_COMMAND_COPY_BUFFER_RECT: return "CL_COMMAND_COPY_BUFFER_RECT"; case CL_COMMAND_USER: return "CL_COMMAND_USER"; case CL_COMMAND_BARRIER: return "CL_COMMAND_BARRIER"; case CL_COMMAND_MIGRATE_MEM_OBJECTS: return "CL_COMMAND_MIGRATE_MEM_OBJECTS"; case CL_COMMAND_FILL_BUFFER: return "CL_COMMAND_FILL_BUFFER"; case CL_COMMAND_FILL_IMAGE: return "CL_COMMAND_FILL_IMAGE"; case CL_COMMAND_SVM_FREE: return "CL_COMMAND_SVM_FREE"; case CL_COMMAND_SVM_MEMCPY: return "CL_COMMAND_SVM_MEMCPY"; case CL_COMMAND_SVM_MEMFILL: return "CL_COMMAND_SVM_MEMFILL"; case CL_COMMAND_SVM_MAP: return "CL_COMMAND_SVM_MAP"; case CL_COMMAND_SVM_UNMAP: return "CL_COMMAND_SVM_UNMAP"; case CL_COMMAND_SVM_MIGRATE_MEM: return "CL_COMMAND_SVM_MIGRATE_MEM"; case CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR: return "CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR"; default: { std::string returnString("CMD_UNKNOWN:" + std::to_string((cl_command_type)cmd)); return returnString; } } } } // namespace NEO compute-runtime-22.14.22890/opencl/source/helpers/cl_hw_helper.cpp000066400000000000000000000012331422164147700247320ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/cl_hw_helper.h" #include "shared/source/helpers/hw_info.h" namespace NEO { ClHwHelper *clHwHelperFactory[IGFX_MAX_CORE] = {}; ClHwHelper &ClHwHelper::get(GFXCORE_FAMILY gfxCore) { return *clHwHelperFactory[gfxCore]; } uint8_t ClHwHelper::makeDeviceRevision(const HardwareInfo &hwInfo) { return static_cast(!hwInfo.capabilityTable.isIntegratedDevice); } cl_version ClHwHelper::makeDeviceIpVersion(uint16_t major, uint8_t minor, uint8_t revision) { return (major << 16) | (minor << 8) | revision; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/helpers/cl_hw_helper.h000066400000000000000000000066551422164147700244140ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/definitions/engine_group_types.h" #include "opencl/extensions/public/cl_ext_private.h" #include "igfxfmid.h" #include #include namespace NEO { class Context; class ClDevice; struct ArgDescPointer; struct HardwareInfo; struct KernelInfo; struct MultiDispatchInfo; class ClHwHelper { public: static ClHwHelper &get(GFXCORE_FAMILY gfxCore); virtual bool requiresNonAuxMode(const ArgDescPointer &argAsPtr, const HardwareInfo &hwInfo) const = 0; virtual bool requiresAuxResolves(const KernelInfo &kernelInfo, const HardwareInfo &hwInfo) const = 0; virtual bool allowCompressionForContext(const ClDevice &clDevice, const Context &context) const = 0; virtual cl_command_queue_capabilities_intel getAdditionalDisabledQueueFamilyCapabilities(EngineGroupType type) const = 0; virtual bool getQueueFamilyName(std::string &name, EngineGroupType type) const = 0; virtual cl_ulong getKernelPrivateMemSize(const KernelInfo &kernelInfo) const = 0; virtual bool preferBlitterForLocalToLocalTransfers() const = 0; virtual bool isSupportedKernelThreadArbitrationPolicy() const = 0; virtual std::vector getSupportedThreadArbitrationPolicies() const = 0; virtual cl_version getDeviceIpVersion(const HardwareInfo &hwInfo) const = 0; virtual cl_device_feature_capabilities_intel getSupportedDeviceFeatureCapabilities() const = 0; virtual bool allowImageCompression(cl_image_format format) const = 0; virtual bool isFormatRedescribable(cl_image_format format) const = 0; protected: virtual bool hasStatelessAccessToBuffer(const KernelInfo &kernelInfo) const = 0; static uint8_t makeDeviceRevision(const HardwareInfo &hwInfo); static cl_version makeDeviceIpVersion(uint16_t major, uint8_t minor, uint8_t revision); ClHwHelper() = default; }; template class ClHwHelperHw : public ClHwHelper { public: static ClHwHelper &get() { static ClHwHelperHw clHwHelper; return clHwHelper; } bool requiresNonAuxMode(const ArgDescPointer &argAsPtr, const HardwareInfo &hwInfo) const override; bool requiresAuxResolves(const KernelInfo &kernelInfo, const HardwareInfo &hwInfo) const override; bool allowCompressionForContext(const ClDevice &clDevice, const Context &context) const override; cl_command_queue_capabilities_intel getAdditionalDisabledQueueFamilyCapabilities(EngineGroupType type) const override; bool getQueueFamilyName(std::string &name, EngineGroupType type) const override; cl_ulong getKernelPrivateMemSize(const KernelInfo &kernelInfo) const override; bool preferBlitterForLocalToLocalTransfers() const override; bool isSupportedKernelThreadArbitrationPolicy() const override; std::vector getSupportedThreadArbitrationPolicies() const override; cl_version getDeviceIpVersion(const HardwareInfo &hwInfo) const override; cl_device_feature_capabilities_intel getSupportedDeviceFeatureCapabilities() const override; bool allowImageCompression(cl_image_format format) const override; bool isFormatRedescribable(cl_image_format format) const override; protected: bool hasStatelessAccessToBuffer(const KernelInfo &kernelInfo) const override; ClHwHelperHw() = default; }; extern ClHwHelper *clHwHelperFactory[IGFX_MAX_CORE]; } // namespace NEO compute-runtime-22.14.22890/opencl/source/helpers/cl_hw_helper_base.inl000066400000000000000000000042271422164147700257320ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_helper.h" #include "shared/source/program/kernel_info.h" #include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/source/helpers/dispatch_info.h" namespace NEO { template inline bool ClHwHelperHw::requiresNonAuxMode(const ArgDescPointer &argAsPtr, const HardwareInfo &hwInfo) const { return !argAsPtr.isPureStateful(); } template inline bool ClHwHelperHw::requiresAuxResolves(const KernelInfo &kernelInfo, const HardwareInfo &hwInfo) const { return hasStatelessAccessToBuffer(kernelInfo); } template inline bool ClHwHelperHw::hasStatelessAccessToBuffer(const KernelInfo &kernelInfo) const { for (const auto &arg : kernelInfo.kernelDescriptor.payloadMappings.explicitArgs) { if (arg.is() && !arg.as().isPureStateful()) { return true; } } return false; } template inline bool ClHwHelperHw::allowCompressionForContext(const ClDevice &clDevice, const Context &context) const { return true; } template inline bool ClHwHelperHw::getQueueFamilyName(std::string &name, EngineGroupType type) const { return false; } template inline bool ClHwHelperHw::preferBlitterForLocalToLocalTransfers() const { return false; } template bool ClHwHelperHw::isSupportedKernelThreadArbitrationPolicy() const { return true; } template std::vector ClHwHelperHw::getSupportedThreadArbitrationPolicies() const { return std::vector{CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_OLDEST_FIRST_INTEL, CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_ROUND_ROBIN_INTEL, CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_AFTER_DEPENDENCY_ROUND_ROBIN_INTEL}; } template bool ClHwHelperHw::allowImageCompression(cl_image_format format) const { return true; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/helpers/cl_hw_helper_bdw_and_later.inl000066400000000000000000000027711422164147700276070ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/source/helpers/surface_formats.h" namespace NEO { template inline cl_command_queue_capabilities_intel ClHwHelperHw::getAdditionalDisabledQueueFamilyCapabilities(EngineGroupType type) const { if (type == EngineGroupType::Copy) { return CL_QUEUE_CAPABILITY_CREATE_CROSS_QUEUE_EVENTS_INTEL; } return 0; } template cl_ulong ClHwHelperHw::getKernelPrivateMemSize(const KernelInfo &kernelInfo) const { return kernelInfo.kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize; } template cl_device_feature_capabilities_intel ClHwHelperHw::getSupportedDeviceFeatureCapabilities() const { return 0; } static const std::vector redescribeFormats = { {CL_R, CL_UNSIGNED_INT8}, {CL_R, CL_UNSIGNED_INT16}, {CL_R, CL_UNSIGNED_INT32}, {CL_RG, CL_UNSIGNED_INT32}, {CL_RGBA, CL_UNSIGNED_INT32}}; template bool ClHwHelperHw::isFormatRedescribable(cl_image_format format) const { for (const auto &referenceFormat : redescribeFormats) { if (referenceFormat.image_channel_data_type == format.image_channel_data_type && referenceFormat.image_channel_order == format.image_channel_order) { return false; } } return true; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/helpers/cl_hw_helper_pvc_and_later.inl000066400000000000000000000014431422164147700276160ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/extensions/public/cl_ext_private.h" template <> inline bool ClHwHelperHw::preferBlitterForLocalToLocalTransfers() const { return false; } template <> std::vector ClHwHelperHw::getSupportedThreadArbitrationPolicies() const { return std::vector{CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_OLDEST_FIRST_INTEL, CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_ROUND_ROBIN_INTEL, CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_AFTER_DEPENDENCY_ROUND_ROBIN_INTEL, CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_STALL_BASED_ROUND_ROBIN_INTEL}; } template <> bool ClHwHelperHw::allowImageCompression(cl_image_format format) const { return true; } compute-runtime-22.14.22890/opencl/source/helpers/cl_hw_helper_xehp_and_later.inl000066400000000000000000000021161422164147700277700ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/cl_device_helpers.h" #include "opencl/source/helpers/cl_hw_helper.h" namespace NEO { template inline cl_command_queue_capabilities_intel ClHwHelperHw::getAdditionalDisabledQueueFamilyCapabilities(EngineGroupType type) const { return 0; } template cl_ulong ClHwHelperHw::getKernelPrivateMemSize(const KernelInfo &kernelInfo) const { const auto &kernelAttributes = kernelInfo.kernelDescriptor.kernelAttributes; return (kernelAttributes.perThreadScratchSize[1] > 0) ? kernelAttributes.perThreadScratchSize[1] : kernelAttributes.perHwThreadPrivateMemorySize; } template cl_device_feature_capabilities_intel ClHwHelperHw::getSupportedDeviceFeatureCapabilities() const { return ClDeviceHelper::getExtraCapabilities(); } template bool ClHwHelperHw::isFormatRedescribable(cl_image_format format) const { return false; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/helpers/cl_memory_properties_helpers.cpp000066400000000000000000000042611422164147700302670ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/cl_memory_properties_helpers_base.inl" #include "opencl/source/mem_obj/mem_obj_helper.h" namespace NEO { bool ClMemoryPropertiesHelper::parseMemoryProperties(const cl_mem_properties_intel *properties, MemoryProperties &memoryProperties, cl_mem_flags &flags, cl_mem_flags_intel &flagsIntel, cl_mem_alloc_flags_intel &allocflags, MemoryPropertiesHelper::ObjType objectType, Context &context) { Device *pDevice = &context.getDevice(0)->getDevice(); uintptr_t hostptr = 0; if (properties != nullptr) { for (int i = 0; properties[i] != 0; i += 2) { switch (properties[i]) { case CL_MEM_FLAGS: flags |= static_cast(properties[i + 1]); break; case CL_MEM_FLAGS_INTEL: flagsIntel |= static_cast(properties[i + 1]); break; case CL_MEM_ALLOC_FLAGS_INTEL: allocflags |= static_cast(properties[i + 1]); break; case CL_MEM_ALLOC_USE_HOST_PTR_INTEL: hostptr = static_cast(properties[i + 1]); break; default: return false; } } } memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, allocflags, pDevice); memoryProperties.hostptr = hostptr; switch (objectType) { case MemoryPropertiesHelper::ObjType::BUFFER: return isFieldValid(flags, MemObjHelper::validFlagsForBuffer) && isFieldValid(flagsIntel, MemObjHelper::validFlagsForBufferIntel); case MemoryPropertiesHelper::ObjType::IMAGE: return isFieldValid(flags, MemObjHelper::validFlagsForImage) && isFieldValid(flagsIntel, MemObjHelper::validFlagsForImageIntel); default: break; } return true; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/helpers/cl_memory_properties_helpers.h000066400000000000000000000015531422164147700277350ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/memory_properties_helpers.h" #include "opencl/extensions/public/cl_ext_private.h" namespace NEO { class Context; class ClMemoryPropertiesHelper { public: static MemoryProperties createMemoryProperties(cl_mem_flags flags, cl_mem_flags_intel flagsIntel, cl_mem_alloc_flags_intel allocflags, const Device *pDevice); static bool parseMemoryProperties(const cl_mem_properties_intel *properties, MemoryProperties &memoryProperties, cl_mem_flags &flags, cl_mem_flags_intel &flagsIntel, cl_mem_alloc_flags_intel &allocflags, MemoryPropertiesHelper::ObjType objectType, Context &context); }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/helpers/cl_memory_properties_helpers_base.inl000066400000000000000000000073601422164147700312640ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/bit_helpers.h" #include "opencl/extensions/public/cl_ext_private.h" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "CL/cl_ext_intel.h" namespace NEO { MemoryProperties ClMemoryPropertiesHelper::createMemoryProperties(cl_mem_flags flags, cl_mem_flags_intel flagsIntel, cl_mem_alloc_flags_intel allocflags, const Device *pDevice) { MemoryProperties memoryProperties; if (isValueSet(flags, CL_MEM_READ_WRITE)) { memoryProperties.flags.readWrite = true; } if (isValueSet(flags, CL_MEM_WRITE_ONLY)) { memoryProperties.flags.writeOnly = true; } if (isValueSet(flags, CL_MEM_READ_ONLY)) { memoryProperties.flags.readOnly = true; } if (isValueSet(flags, CL_MEM_USE_HOST_PTR)) { memoryProperties.flags.useHostPtr = true; } if (isValueSet(flags, CL_MEM_ALLOC_HOST_PTR)) { memoryProperties.flags.allocHostPtr = true; } if (isValueSet(flags, CL_MEM_COPY_HOST_PTR)) { memoryProperties.flags.copyHostPtr = true; } if (isValueSet(flags, CL_MEM_HOST_WRITE_ONLY)) { memoryProperties.flags.hostWriteOnly = true; } if (isValueSet(flags, CL_MEM_HOST_READ_ONLY)) { memoryProperties.flags.hostReadOnly = true; } if (isValueSet(flags, CL_MEM_HOST_NO_ACCESS)) { memoryProperties.flags.hostNoAccess = true; } if (isValueSet(flags, CL_MEM_KERNEL_READ_AND_WRITE)) { memoryProperties.flags.kernelReadAndWrite = true; } if (isValueSet(flags, CL_MEM_FORCE_LINEAR_STORAGE_INTEL) || isValueSet(flagsIntel, CL_MEM_FORCE_LINEAR_STORAGE_INTEL)) { memoryProperties.flags.forceLinearStorage = true; } if (isValueSet(flags, CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL)) { memoryProperties.flags.accessFlagsUnrestricted = true; } if (isValueSet(flags, CL_MEM_NO_ACCESS_INTEL)) { memoryProperties.flags.noAccess = true; } if (isValueSet(flags, CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL) || isValueSet(flagsIntel, CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL) || DebugManager.flags.AllowUnrestrictedSize.get()) { memoryProperties.flags.allowUnrestrictedSize = true; } if (isValueSet(flagsIntel, CL_MEM_LOCALLY_UNCACHED_RESOURCE)) { memoryProperties.flags.locallyUncachedResource = true; } if (isValueSet(flagsIntel, CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE)) { memoryProperties.flags.locallyUncachedInSurfaceState = true; } if (isValueSet(flags, CL_MEM_FORCE_HOST_MEMORY_INTEL)) { memoryProperties.flags.forceHostMemory = true; } if (isValueSet(allocflags, CL_MEM_ALLOC_WRITE_COMBINED_INTEL)) { memoryProperties.allocFlags.allocWriteCombined = true; } if (isValueSet(allocflags, CL_MEM_ALLOC_INITIAL_PLACEMENT_DEVICE_INTEL)) { memoryProperties.allocFlags.usmInitialPlacementGpu = true; } if (isValueSet(allocflags, CL_MEM_ALLOC_INITIAL_PLACEMENT_HOST_INTEL)) { memoryProperties.allocFlags.usmInitialPlacementCpu = true; } if (isValueSet(flagsIntel, CL_MEM_48BIT_RESOURCE_INTEL)) { memoryProperties.flags.resource48Bit = true; } if (isValueSet(flags, CL_MEM_COMPRESSED_HINT_INTEL) || isValueSet(flagsIntel, CL_MEM_COMPRESSED_HINT_INTEL)) { memoryProperties.flags.compressedHint = true; } if (isValueSet(flags, CL_MEM_UNCOMPRESSED_HINT_INTEL) || isValueSet(flagsIntel, CL_MEM_UNCOMPRESSED_HINT_INTEL)) { memoryProperties.flags.uncompressedHint = true; } memoryProperties.pDevice = pDevice; return memoryProperties; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/helpers/cl_preemption_helper.cpp000066400000000000000000000022171422164147700265010ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/cl_preemption_helper.h" #include "opencl/source/helpers/dispatch_info.h" namespace NEO { PreemptionMode ClPreemptionHelper::taskPreemptionMode(Device &device, const MultiDispatchInfo &multiDispatchInfo) { PreemptionMode devMode = device.getPreemptionMode(); for (const auto &di : multiDispatchInfo) { auto kernel = di.getKernel(); const KernelDescriptor *kernelDescriptor = nullptr; if (kernel != nullptr) { kernelDescriptor = &kernel->getDescriptor(); } PreemptionFlags flags = PreemptionHelper::createPreemptionLevelFlags(device, kernelDescriptor); PreemptionMode taskMode = PreemptionHelper::taskPreemptionMode(devMode, flags); if (devMode > taskMode) { devMode = taskMode; } PRINT_DEBUG_STRING(DebugManager.flags.PrintDebugMessages.get(), stdout, "devMode = %d, taskMode = %d.\n", static_cast(device.getPreemptionMode()), static_cast(taskMode)); } return devMode; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/helpers/cl_preemption_helper.h000066400000000000000000000006061422164147700261460ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/preemption.h" namespace NEO { class Kernel; class Device; struct MultiDispatchInfo; class ClPreemptionHelper { public: static PreemptionMode taskPreemptionMode(Device &device, const MultiDispatchInfo &multiDispatchInfo); }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/helpers/cl_validators.cpp000066400000000000000000000104711422164147700251310ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/cl_validators.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/event/event.h" #include "opencl/source/helpers/base_object.h" #include "opencl/source/kernel/multi_device_kernel.h" #include "opencl/source/mem_obj/mem_obj.h" #include "opencl/source/platform/platform.h" #include "opencl/source/program/program.h" #include "opencl/source/sampler/sampler.h" namespace NEO { cl_int validateObject(void *ptr) { return ptr != nullptr ? CL_SUCCESS : CL_INVALID_VALUE; } cl_int validateObject(cl_context object) { return castToObject(object) != nullptr ? CL_SUCCESS : CL_INVALID_CONTEXT; } cl_int validateObject(cl_device_id object) { return castToObject(object) != nullptr ? CL_SUCCESS : CL_INVALID_DEVICE; } cl_int validateObject(cl_platform_id object) { return castToObject(object) != nullptr ? CL_SUCCESS : CL_INVALID_PLATFORM; } cl_int validateObject(cl_command_queue object) { return castToObject(object) != nullptr ? CL_SUCCESS : CL_INVALID_COMMAND_QUEUE; } cl_int validateObject(cl_event object) { return castToObject(object) != nullptr ? CL_SUCCESS : CL_INVALID_EVENT; } cl_int validateObject(cl_mem object) { return castToObject(object) != nullptr ? CL_SUCCESS : CL_INVALID_MEM_OBJECT; } cl_int validateObject(cl_sampler object) { return castToObject(object) != nullptr ? CL_SUCCESS : CL_INVALID_SAMPLER; } cl_int validateObject(cl_program object) { return castToObject(object) != nullptr ? CL_SUCCESS : CL_INVALID_PROGRAM; } cl_int validateObject(cl_kernel object) { return castToObject(object) != nullptr ? CL_SUCCESS : CL_INVALID_KERNEL; } cl_int validateObject(const EventWaitList &eventWaitList) { if ((!eventWaitList.first) != (!eventWaitList.second)) return CL_INVALID_EVENT_WAIT_LIST; for (cl_uint i = 0; i < eventWaitList.first; i++) { if (validateObject(eventWaitList.second[i]) != CL_SUCCESS) return CL_INVALID_EVENT_WAIT_LIST; } return CL_SUCCESS; } cl_int validateObject(const DeviceList &deviceList) { if ((!deviceList.first) != (!deviceList.second)) return CL_INVALID_VALUE; for (cl_uint i = 0; i < deviceList.first; i++) { if (validateObject(deviceList.second[i]) != CL_SUCCESS) return CL_INVALID_DEVICE; } return CL_SUCCESS; } cl_int validateObject(const MemObjList &memObjList) { if ((!memObjList.first) != (!memObjList.second)) return CL_INVALID_VALUE; for (cl_uint i = 0; i < memObjList.first; i++) { if (validateObject(memObjList.second[i]) != CL_SUCCESS) return CL_INVALID_MEM_OBJECT; } return CL_SUCCESS; } cl_int validateObject(const NonZeroBufferSize &nzbs) { return nzbs ? CL_SUCCESS : CL_INVALID_BUFFER_SIZE; } cl_int validateObject(const PatternSize &ps) { switch ((cl_int)ps) { case 128: case 64: case 32: case 16: case 8: case 4: case 2: case 1: return CL_SUCCESS; default: break; } return CL_INVALID_VALUE; } cl_int validateObject(bool isValid) { return isValid ? CL_SUCCESS : CL_INVALID_VALUE; } cl_int validateYuvOperation(const size_t *origin, const size_t *region) { if (!origin || !region) return CL_INVALID_VALUE; return ((origin[0] % 2 == 0) && (region[0] % 2 == 0)) ? CL_SUCCESS : CL_INVALID_VALUE; } bool isPackedYuvImage(const cl_image_format *imageFormat) { auto channelOrder = imageFormat->image_channel_order; return (channelOrder == CL_YUYV_INTEL) || (channelOrder == CL_UYVY_INTEL) || (channelOrder == CL_YVYU_INTEL) || (channelOrder == CL_VYUY_INTEL); } bool isNV12Image(const cl_image_format *imageFormat) { return imageFormat->image_channel_order == CL_NV12_INTEL; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/helpers/cl_validators.h000066400000000000000000000052531422164147700246000ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/api/cl_types.h" #include "opencl/source/helpers/base_object.h" #include "opencl/source/helpers/error_mappers.h" #include namespace NEO { // Provide some aggregators... typedef std::pair EventWaitList; typedef std::pair DeviceList; typedef std::pair MemObjList; // Custom validators enum NonZeroBufferSize : size_t; enum PatternSize : size_t; template CLType WithCastToInternal(CLType clObject, InternalType **internalObject) { *internalObject = NEO::castToObject(clObject); return (*internalObject) ? clObject : nullptr; } // This is the default instance of validateObject. // It should be specialized for specific types. template inline cl_int validateObject(Type object) { return CL_SUCCESS; } // Example of specialization. cl_int validateObject(void *ptr); cl_int validateObject(cl_context context); cl_int validateObject(cl_device_id device); cl_int validateObject(cl_platform_id platform); cl_int validateObject(cl_command_queue commandQueue); cl_int validateObject(cl_event platform); cl_int validateObject(cl_mem mem); cl_int validateObject(cl_sampler sampler); cl_int validateObject(cl_program program); cl_int validateObject(cl_kernel kernel); cl_int validateObject(const EventWaitList &eventWaitList); cl_int validateObject(const DeviceList &deviceList); cl_int validateObject(const MemObjList &memObjList); cl_int validateObject(const NonZeroBufferSize &nzbs); cl_int validateObject(const PatternSize &ps); cl_int validateObject(bool isValid); // This is the sentinel for the follow variadic template definition. inline cl_int validateObjects() { return CL_SUCCESS; } // This provides variadic object validation. // It automatically checks for nullptrs and then passes // onto type specific validator. template inline cl_int validateObjects(const Type &object, Types... rest) { auto retVal = validateObject(object); return CL_SUCCESS != retVal ? retVal : validateObjects(rest...); } template inline cl_int validateObjects(Type *object, Types... rest) { auto retVal = object ? validateObject(object) : NullObjectErrorMapper::retVal; return CL_SUCCESS != retVal ? retVal : validateObjects(rest...); } cl_int validateYuvOperation(const size_t *origin, const size_t *region); bool isPackedYuvImage(const cl_image_format *imageFormat); bool isNV12Image(const cl_image_format *imageFormat); } // namespace NEO compute-runtime-22.14.22890/opencl/source/helpers/convert_color.h000066400000000000000000000067201422164147700246300ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/basic_math.h" #include "CL/cl.h" #include namespace NEO { inline int32_t selectNormalizingFactor(const cl_channel_type &channelType) { if (channelType == CL_UNORM_INT8) { return 0xFF; } if (channelType == CL_SNORM_INT8) { return 0x7F; } if (channelType == CL_UNORM_INT16) { return 0xFFFF; } if (channelType == CL_SNORM_INT16) { return 0x7fFF; } return 0; } inline void convertFillColor(const void *fillColor, int32_t *iFillColor, const cl_image_format &oldImageFormat, const cl_image_format &newImageFormat) { float fFillColor[4] = {0.0f}; for (auto i = 0; i < 4; i++) { iFillColor[i] = *((int32_t *)fillColor + i); fFillColor[i] = *((float *)fillColor + i); } if (oldImageFormat.image_channel_order == CL_A) { std::swap(iFillColor[0], iFillColor[3]); std::swap(fFillColor[0], fFillColor[3]); } else if (oldImageFormat.image_channel_order == CL_BGRA || oldImageFormat.image_channel_order == CL_sBGRA) { std::swap(iFillColor[0], iFillColor[2]); std::swap(fFillColor[0], fFillColor[2]); } if (oldImageFormat.image_channel_order == CL_sRGBA || oldImageFormat.image_channel_order == CL_sBGRA) { for (auto i = 0; i < 3; i++) { if (fFillColor[i] != fFillColor[i]) { fFillColor[i] = 0.0f; } if (fFillColor[i] > 1.0f) { fFillColor[i] = 1.0f; } else if (fFillColor[i] < 0.0f) { fFillColor[i] = 0.0f; } else if (fFillColor[i] < 0.0031308f) { fFillColor[i] = 12.92f * fFillColor[i]; } else { fFillColor[i] = 1.055f * pow(fFillColor[i], 1.0f / 2.4f) - 0.055f; } } } if (newImageFormat.image_channel_data_type == CL_UNSIGNED_INT8) { auto normalizingFactor = selectNormalizingFactor(oldImageFormat.image_channel_data_type); if (normalizingFactor > 0) { for (auto i = 0; i < 4; i++) { if ((oldImageFormat.image_channel_order == CL_sRGBA || oldImageFormat.image_channel_order == CL_sBGRA) && i < 3) { iFillColor[i] = static_cast(normalizingFactor * fFillColor[i] + 0.5f); } else { iFillColor[i] = static_cast(normalizingFactor * fFillColor[i]); } } } for (auto i = 0; i < 4; i++) { iFillColor[i] = iFillColor[i] & 0xFF; } } else if (newImageFormat.image_channel_data_type == CL_UNSIGNED_INT16) { auto normalizingFactor = selectNormalizingFactor(oldImageFormat.image_channel_data_type); if (normalizingFactor > 0) { for (auto i = 0; i < 4; i++) { iFillColor[i] = static_cast(normalizingFactor * fFillColor[i]); } } else if (oldImageFormat.image_channel_data_type == CL_HALF_FLOAT) { //float to half convert. for (auto i = 0; i < 4; i++) { uint16_t temp = Math::float2Half(fFillColor[i]); iFillColor[i] = temp; } } for (auto i = 0; i < 4; i++) { iFillColor[i] = iFillColor[i] & 0xFFFF; } } } } // namespace NEO compute-runtime-22.14.22890/opencl/source/helpers/csr_deps.cpp000066400000000000000000000007031422164147700241020ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/csr_deps.h" #include "opencl/source/helpers/timestamp_packet.h" namespace NEO { void CsrDependencies::makeResident(CommandStreamReceiver &commandStreamReceiver) const { for (auto ×tampPacketContainer : *this) { timestampPacketContainer->makeResident(commandStreamReceiver); } } } // namespace NEO compute-runtime-22.14.22890/opencl/source/helpers/destructor_callbacks.h000066400000000000000000000015021422164147700261400ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "CL/cl.h" namespace NEO { template class DestructorCallbacks { using CallbackType = void CL_CALLBACK(T, void *); public: inline void add(CallbackType *callback, void *userData) { callbacks.push_back({callback, userData}); } inline bool empty() { return callbacks.empty(); } inline void invoke(T object) { for (auto it = callbacks.rbegin(); it != callbacks.rend(); it++) { it->first(object, it->second); } } private: std::vector> callbacks; }; using ContextDestructorCallbacks = DestructorCallbacks; using MemObjDestructorCallbacks = DestructorCallbacks; } // namespace NEO compute-runtime-22.14.22890/opencl/source/helpers/dispatch_info.cpp000066400000000000000000000021231422164147700251100ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/dispatch_info.h" #include "opencl/source/kernel/kernel.h" namespace NEO { bool DispatchInfo::usesSlm() const { return (kernel == nullptr) ? false : kernel->getSlmTotalSize() > 0; } bool DispatchInfo::usesStatelessPrintfSurface() const { return (kernel == nullptr) ? false : kernel->hasPrintfOutput(); } uint32_t DispatchInfo::getRequiredScratchSize() const { return (kernel == nullptr) ? 0 : kernel->getScratchSize(); } uint32_t DispatchInfo::getRequiredPrivateScratchSize() const { return (kernel == nullptr) ? 0 : kernel->getPrivateScratchSize(); } Kernel *MultiDispatchInfo::peekMainKernel() const { if (dispatchInfos.size() == 0) { return nullptr; } return mainKernel ? mainKernel : dispatchInfos.begin()->getKernel(); } void MultiDispatchInfo::backupUnifiedMemorySyncRequirement() { for (const auto &dispatchInfo : dispatchInfos) { dispatchInfo.getKernel()->setUnifiedMemorySyncRequirement(true); } } } // namespace NEO compute-runtime-22.14.22890/opencl/source/helpers/dispatch_info.h000066400000000000000000000171441422164147700245660ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/registered_method_dispatcher.h" #include "shared/source/helpers/vec.h" #include "shared/source/memory_manager/surface.h" #include "shared/source/utilities/stackvec.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/kernel/kernel_objects_for_aux_translation.h" #include "opencl/source/mem_obj/mem_obj.h" #include #include namespace NEO { class Kernel; class ClDevice; struct TimestampPacketDependencies; class DispatchInfo { public: using DispatchCommandMethodT = void(LinearStream &commandStream, TimestampPacketDependencies *timestampPacketDependencies, const HardwareInfo &); using EstimateCommandsMethodT = size_t(size_t, const HardwareInfo &, bool); DispatchInfo() = default; DispatchInfo(ClDevice *device, Kernel *kernel, uint32_t dim, const Vec3 &gws, const Vec3 &elws, const Vec3 &offset) : pClDevice(device), kernel(kernel), dim(dim), gws(gws), elws(elws), offset(offset) {} DispatchInfo(ClDevice *device, Kernel *kernel, uint32_t dim, const Vec3 &gws, const Vec3 &elws, const Vec3 &offset, const Vec3 &agws, const Vec3 &lws, const Vec3 &twgs, const Vec3 &nwgs, const Vec3 &swgs) : pClDevice(device), kernel(kernel), dim(dim), gws(gws), elws(elws), offset(offset), agws(agws), lws(lws), twgs(twgs), nwgs(nwgs), swgs(swgs) {} ClDevice &getClDevice() const { return *pClDevice; } void setClDevice(ClDevice *device) { pClDevice = device; } bool usesSlm() const; bool usesStatelessPrintfSurface() const; uint32_t getRequiredScratchSize() const; uint32_t getRequiredPrivateScratchSize() const; void setKernel(Kernel *kernel) { this->kernel = kernel; } Kernel *getKernel() const { return kernel; } uint32_t getDim() const { return dim; } void setDim(uint32_t dim) { this->dim = dim; } const Vec3 &getGWS() const { return gws; }; void setGWS(const Vec3 &gws) { this->gws = gws; } const Vec3 &getEnqueuedWorkgroupSize() const { return elws; }; void setEnqueuedWorkgroupSize(const Vec3 &elws) { this->elws = elws; } const Vec3 &getOffset() const { return offset; }; void setOffsets(const Vec3 &offset) { this->offset = offset; } const Vec3 &getActualWorkgroupSize() const { return agws; }; void setActualGlobalWorkgroupSize(const Vec3 &agws) { this->agws = agws; } const Vec3 &getLocalWorkgroupSize() const { return lws; }; void setLWS(const Vec3 &lws) { this->lws = lws; } const Vec3 &getTotalNumberOfWorkgroups() const { return twgs; }; void setTotalNumberOfWorkgroups(const Vec3 &twgs) { this->twgs = twgs; } const Vec3 &getNumberOfWorkgroups() const { return nwgs; }; void setNumberOfWorkgroups(const Vec3 &nwgs) { this->nwgs = nwgs; } const Vec3 &getStartOfWorkgroups() const { return swgs; }; void setStartOfWorkgroups(const Vec3 &swgs) { this->swgs = swgs; } bool peekCanBePartitioned() const { return canBePartitioned; } void setCanBePartitioned(bool canBePartitioned) { this->canBePartitioned = canBePartitioned; } RegisteredMethodDispatcher dispatchInitCommands; RegisteredMethodDispatcher dispatchEpilogueCommands; protected: ClDevice *pClDevice = nullptr; bool canBePartitioned = false; Kernel *kernel = nullptr; uint32_t dim = 0; Vec3 gws{0, 0, 0}; //global work size Vec3 elws{0, 0, 0}; //enqueued local work size Vec3 offset{0, 0, 0}; //global offset Vec3 agws{0, 0, 0}; //actual global work size Vec3 lws{0, 0, 0}; //local work size Vec3 twgs{0, 0, 0}; //total number of work groups Vec3 nwgs{0, 0, 0}; //number of work groups Vec3 swgs{0, 0, 0}; //start of work groups }; struct MultiDispatchInfo { ~MultiDispatchInfo() { for (MemObj *redescribedSurface : redescribedSurfaces) { redescribedSurface->release(); } } explicit MultiDispatchInfo(Kernel *mainKernel) : mainKernel(mainKernel) {} explicit MultiDispatchInfo(const BuiltinOpParams &operationParams) : builtinOpParams(operationParams) {} MultiDispatchInfo() = default; MultiDispatchInfo &operator=(const MultiDispatchInfo &) = delete; MultiDispatchInfo(const MultiDispatchInfo &) = delete; bool empty() const { return dispatchInfos.size() == 0; } bool usesSlm() const { for (const auto &dispatchInfo : dispatchInfos) { if (dispatchInfo.usesSlm()) { return true; } } return false; } bool usesStatelessPrintfSurface() const { for (const auto &dispatchInfo : dispatchInfos) { if (dispatchInfo.usesStatelessPrintfSurface()) { return true; } } return false; } uint32_t getRequiredScratchSize() const { uint32_t ret = 0; for (const auto &dispatchInfo : dispatchInfos) { ret = std::max(ret, dispatchInfo.getRequiredScratchSize()); } return ret; } uint32_t getRequiredPrivateScratchSize() const { uint32_t ret = 0; for (const auto &dispatchInfo : dispatchInfos) { ret = std::max(ret, dispatchInfo.getRequiredPrivateScratchSize()); } return ret; } void backupUnifiedMemorySyncRequirement(); DispatchInfo *begin() { return dispatchInfos.begin(); } const DispatchInfo *begin() const { return dispatchInfos.begin(); } std::reverse_iterator rbegin() { return dispatchInfos.rbegin(); } std::reverse_iterator crbegin() const { return dispatchInfos.crbegin(); } DispatchInfo *end() { return dispatchInfos.end(); } const DispatchInfo *end() const { return dispatchInfos.end(); } std::reverse_iterator rend() { return dispatchInfos.rend(); } std::reverse_iterator crend() const { return dispatchInfos.crend(); } void push(const DispatchInfo &dispatchInfo) { dispatchInfos.push_back(dispatchInfo); } size_t size() const { return dispatchInfos.size(); } StackVec &getRedescribedSurfaces() { return redescribedSurfaces; } void pushRedescribedMemObj(std::unique_ptr memObj) { redescribedSurfaces.push_back(memObj.release()); } Kernel *peekMainKernel() const; void setBuiltinOpParams(const BuiltinOpParams &builtinOpParams) { this->builtinOpParams = builtinOpParams; } const BuiltinOpParams &peekBuiltinOpParams() const { return builtinOpParams; } void setKernelObjsForAuxTranslation(const KernelObjsForAuxTranslation &kernelObjsForAuxTranslation) { this->kernelObjsForAuxTranslation = &kernelObjsForAuxTranslation; } const KernelObjsForAuxTranslation *getKernelObjsForAuxTranslation() const { return kernelObjsForAuxTranslation; } protected: BuiltinOpParams builtinOpParams = {}; StackVec dispatchInfos; StackVec redescribedSurfaces; const KernelObjsForAuxTranslation *kernelObjsForAuxTranslation = nullptr; Kernel *mainKernel = nullptr; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/helpers/dispatch_info_builder.h000066400000000000000000000704151422164147700262740ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/local_work_size.h" #include "opencl/source/command_queue/cl_local_work_size.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/source/kernel/kernel.h" namespace NEO { namespace SplitDispatch { enum class Dim : uint32_t { d1D = 0, d2D = 1, d3D = 2 }; enum class SplitMode : uint32_t { NoSplit = 0, WalkerSplit = 1, // 1 kernel and many GPGPU walkers (e.g. for non-uniform workgroup sizes) KernelSplit = 2 // many kernels and many GPGPU walkers (e.g. for copy kernels) }; // Left | Middle | Right enum class RegionCoordX : uint32_t { Left = 0, Middle = 1, Right = 2 }; // Top // ------ // Middle // ------ // Bottom enum class RegionCoordY : uint32_t { Top = 0, Middle = 1, Bottom = 2 }; // Front / / // / Middle / // / / Back enum class RegionCoordZ : uint32_t { Front = 0, Middle = 1, Back = 2 }; } // namespace SplitDispatch // Compute power in compile time static constexpr uint32_t powConst(uint32_t base, uint32_t currExp) { return (currExp == 1) ? base : base * powConst(base, currExp - 1); } template class DispatchInfoBuilder { public: DispatchInfoBuilder(ClDevice &clDevice) { for (auto i = 0u; i < numDispatches; i++) { dispatchInfos[i].setClDevice(&clDevice); } }; void setKernel(Kernel *kernel) { for (auto &dispatchInfo : dispatchInfos) { dispatchInfo.setKernel(kernel); } } cl_int setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocation *svmAlloc) { for (auto &dispatchInfo : dispatchInfos) { if (dispatchInfo.getKernel()) { dispatchInfo.getKernel()->setArgSvmAlloc(argIndex, svmPtr, svmAlloc, 0u); } } return CL_SUCCESS; } template cl_int setArgSvm(ArgsT &&...args) { for (auto &dispatchInfo : dispatchInfos) { if (dispatchInfo.getKernel()) { dispatchInfo.getKernel()->setArgSvm(std::forward(args)...); } } return CL_SUCCESS; } void setUnifiedMemorySyncRequirement(bool isUnifiedMemorySyncRequired) { for (auto &dispatchInfo : dispatchInfos) { dispatchInfo.getKernel()->setUnifiedMemorySyncRequirement(isUnifiedMemorySyncRequired); } } template typename std::enable_if<(D == SplitDispatch::Dim::d1D) && (Mode != SplitDispatch::SplitMode::NoSplit), void>::type setArgSvm(SplitDispatch::RegionCoordX x, ArgsT &&...args) { dispatchInfos[getDispatchId(x)].getKernel()->setArgSvm(std::forward(args)...); } template typename std::enable_if<(D == SplitDispatch::Dim::d2D) && (Mode != SplitDispatch::SplitMode::NoSplit), void>::type setArgSvm(SplitDispatch::RegionCoordX x, SplitDispatch::RegionCoordY y, ArgsT &&...args) { dispatchInfos[getDispatchId(x, y)].getKernel()->setArgSvm(std::forward(args)...); } template typename std::enable_if<(D == SplitDispatch::Dim::d3D) && (Mode != SplitDispatch::SplitMode::NoSplit), void>::type setArgSvm(SplitDispatch::RegionCoordX x, SplitDispatch::RegionCoordY y, SplitDispatch::RegionCoordZ z, ArgsT &&...args) { dispatchInfos[getDispatchId(x, y, z)].getKernel()->setArgSvm(std::forward(args)...); } template cl_int setArg(ArgsT &&...args) { cl_int result = CL_SUCCESS; for (auto &dispatchInfo : dispatchInfos) { if (dispatchInfo.getKernel()) { result = dispatchInfo.getKernel()->setArg(std::forward(args)...); if (result != CL_SUCCESS) { break; } } } return result; } template typename std::enable_if<(D == SplitDispatch::Dim::d1D) && (Mode != SplitDispatch::SplitMode::NoSplit), void>::type setArg(SplitDispatch::RegionCoordX x, ArgsT &&...args) { dispatchInfos[getDispatchId(x)].getKernel()->setArg(std::forward(args)...); } template typename std::enable_if<(D == SplitDispatch::Dim::d2D) && (Mode != SplitDispatch::SplitMode::NoSplit), void>::type setArg(SplitDispatch::RegionCoordX x, SplitDispatch::RegionCoordY y, ArgsT &&...args) { dispatchInfos[getDispatchId(x, y)].getKernel()->setArg(std::forward(args)...); } template typename std::enable_if<(D == SplitDispatch::Dim::d3D) && (Mode != SplitDispatch::SplitMode::NoSplit), void>::type setArg(SplitDispatch::RegionCoordX x, SplitDispatch::RegionCoordY y, SplitDispatch::RegionCoordZ z, ArgsT &&...args) { dispatchInfos[getDispatchId(x, y, z)].getKernel()->setArg(std::forward(args)...); } template typename std::enable_if<(D == SplitDispatch::Dim::d1D) && (Mode != SplitDispatch::SplitMode::NoSplit), void>::type setKernel(SplitDispatch::RegionCoordX x, Kernel *kern) { dispatchInfos[getDispatchId(x)].setKernel(kern); } template typename std::enable_if<(D == SplitDispatch::Dim::d2D) && (Mode != SplitDispatch::SplitMode::NoSplit), void>::type setKernel(SplitDispatch::RegionCoordX x, SplitDispatch::RegionCoordY y, Kernel *kern) { dispatchInfos[getDispatchId(x, y)].setKernel(kern); } template typename std::enable_if<(D == SplitDispatch::Dim::d3D) && (Mode != SplitDispatch::SplitMode::NoSplit), void>::type setKernel(SplitDispatch::RegionCoordX x, SplitDispatch::RegionCoordY y, SplitDispatch::RegionCoordZ z, Kernel *kern) { dispatchInfos[getDispatchId(x, y, z)].setKernel(kern); } template typename std::enable_if<(M == SplitDispatch::SplitMode::NoSplit) || (M == SplitDispatch::SplitMode::WalkerSplit), void>::type setDispatchGeometry(const uint32_t dim, const Vec3 &gws, const Vec3 &elws, const Vec3 &offset, const Vec3 &agws = {0, 0, 0}, const Vec3 &lws = {0, 0, 0}, const Vec3 &twgs = {0, 0, 0}, const Vec3 &nwgs = {0, 0, 0}, const Vec3 &swgs = {0, 0, 0}) { auto &dispatchInfo = dispatchInfos[0]; DEBUG_BREAK_IF(dim > static_cast(Dim) + 1); dispatchInfo.setDim(dim); dispatchInfo.setGWS(gws); dispatchInfo.setEnqueuedWorkgroupSize(elws); dispatchInfo.setOffsets(offset); dispatchInfo.setActualGlobalWorkgroupSize(agws); dispatchInfo.setLWS(lws); dispatchInfo.setTotalNumberOfWorkgroups(twgs); dispatchInfo.setNumberOfWorkgroups(nwgs); dispatchInfo.setStartOfWorkgroups(swgs); } template typename std::enable_if<(M == SplitDispatch::SplitMode::NoSplit) || (M == SplitDispatch::SplitMode::WalkerSplit), void>::type setDispatchGeometry(const Vec3 &gws, const Vec3 &elws, const Vec3 &offset, const Vec3 &agws = {0, 0, 0}, const Vec3 &lws = {0, 0, 0}, const Vec3 &twgs = {0, 0, 0}, const Vec3 &nwgs = {0, 0, 0}, const Vec3 &swgs = {0, 0, 0}) { auto &dispatchInfo = dispatchInfos[0]; dispatchInfo.setDim(static_cast(Dim) + 1); dispatchInfo.setGWS(gws); dispatchInfo.setEnqueuedWorkgroupSize(elws); dispatchInfo.setOffsets(offset); dispatchInfo.setActualGlobalWorkgroupSize(agws); dispatchInfo.setLWS(lws); dispatchInfo.setTotalNumberOfWorkgroups(twgs); dispatchInfo.setNumberOfWorkgroups(nwgs); dispatchInfo.setStartOfWorkgroups(swgs); } template typename std::enable_if<(D == SplitDispatch::Dim::d1D) && (Mode != SplitDispatch::SplitMode::NoSplit), void>::type setDispatchGeometry(SplitDispatch::RegionCoordX x, const Vec3 &gws, const Vec3 &elws, const Vec3 &offset, const Vec3 &agws = {0, 0, 0}, const Vec3 &lws = {0, 0, 0}, const Vec3 &twgs = {0, 0, 0}, const Vec3 &nwgs = {0, 0, 0}, const Vec3 &swgs = {0, 0, 0}) { auto &dispatchInfo = dispatchInfos[getDispatchId(x)]; dispatchInfo.setDim(static_cast(Dim) + 1); dispatchInfo.setGWS(gws); dispatchInfo.setEnqueuedWorkgroupSize(elws); dispatchInfo.setOffsets(offset); dispatchInfo.setActualGlobalWorkgroupSize(agws); dispatchInfo.setLWS(lws); dispatchInfo.setTotalNumberOfWorkgroups(twgs); dispatchInfo.setNumberOfWorkgroups(nwgs); dispatchInfo.setStartOfWorkgroups(swgs); } template typename std::enable_if<(D == SplitDispatch::Dim::d2D) && (Mode != SplitDispatch::SplitMode::NoSplit), void>::type setDispatchGeometry(SplitDispatch::RegionCoordX x, SplitDispatch::RegionCoordY y, const Vec3 &gws, const Vec3 &elws, const Vec3 &offset, const Vec3 &agws = {0, 0, 0}, const Vec3 lws = {0, 0, 0}, const Vec3 &twgs = {0, 0, 0}, const Vec3 &nwgs = {0, 0, 0}, const Vec3 &swgs = {0, 0, 0}) { auto &dispatchInfo = dispatchInfos[getDispatchId(x, y)]; dispatchInfo.setDim(static_cast(Dim) + 1); dispatchInfo.setGWS(gws); dispatchInfo.setEnqueuedWorkgroupSize(elws); dispatchInfo.setOffsets(offset); dispatchInfo.setActualGlobalWorkgroupSize(agws); dispatchInfo.setLWS(lws); dispatchInfo.setTotalNumberOfWorkgroups(twgs); dispatchInfo.setNumberOfWorkgroups(nwgs); dispatchInfo.setStartOfWorkgroups(swgs); } template typename std::enable_if<(D == SplitDispatch::Dim::d3D) && (Mode != SplitDispatch::SplitMode::NoSplit), void>::type setDispatchGeometry(SplitDispatch::RegionCoordX x, SplitDispatch::RegionCoordY y, SplitDispatch::RegionCoordZ z, const Vec3 &gws, const Vec3 &elws, const Vec3 &offset, const Vec3 &agws = {0, 0, 0}, const Vec3 &lws = {0, 0, 0}, const Vec3 &twgs = {0, 0, 0}, const Vec3 &nwgs = {0, 0, 0}, const Vec3 &swgs = {0, 0, 0}) { auto &dispatchInfo = dispatchInfos[getDispatchId(x, y, z)]; dispatchInfo.setDim(static_cast(Dim) + 1); dispatchInfo.setGWS(gws); dispatchInfo.setEnqueuedWorkgroupSize(elws); dispatchInfo.setOffsets(offset); dispatchInfo.setActualGlobalWorkgroupSize(agws); dispatchInfo.setLWS(lws); dispatchInfo.setTotalNumberOfWorkgroups(twgs); dispatchInfo.setNumberOfWorkgroups(nwgs); dispatchInfo.setStartOfWorkgroups(swgs); } void bake(MultiDispatchInfo &target) { for (auto &dispatchInfo : dispatchInfos) { if (!isWorkSizeValid(dispatchInfo.getDim(), dispatchInfo.getGWS())) { continue; } dispatchInfo.setDim(dispatchInfo.getDim() == 0 ? calculateDispatchDim(dispatchInfo.getGWS(), dispatchInfo.getOffset()) : dispatchInfo.getDim()); dispatchInfo.setGWS(canonizeWorkgroup(dispatchInfo.getGWS())); if (dispatchInfo.getActualWorkgroupSize() == Vec3({0, 0, 0})) { dispatchInfo.setActualGlobalWorkgroupSize(dispatchInfo.getGWS()); } if (!isWorkSizeValid(dispatchInfo.getDim(), dispatchInfo.getActualWorkgroupSize())) { continue; } dispatchInfo.setEnqueuedWorkgroupSize(canonizeWorkgroup(dispatchInfo.getEnqueuedWorkgroupSize())); if (dispatchInfo.getLocalWorkgroupSize().x == 0) { dispatchInfo.setLWS(generateWorkgroupSize(dispatchInfo)); } dispatchInfo.setLWS(canonizeWorkgroup(dispatchInfo.getLocalWorkgroupSize())); if (dispatchInfo.getTotalNumberOfWorkgroups().x == 0) { dispatchInfo.setTotalNumberOfWorkgroups(generateWorkgroupsNumber(dispatchInfo)); } dispatchInfo.setTotalNumberOfWorkgroups(canonizeWorkgroup(dispatchInfo.getTotalNumberOfWorkgroups())); if (dispatchInfo.getNumberOfWorkgroups().x == 0) { dispatchInfo.setNumberOfWorkgroups(dispatchInfo.getTotalNumberOfWorkgroups()); } if (supportsSplit() && needsSplit(dispatchInfo)) { pushSplit(dispatchInfo, target); } else { target.push(dispatchInfo); PRINT_DEBUG_STRING(DebugManager.flags.PrintDebugMessages.get(), stdout, "DIM:%u\tGWS:(%zu, %zu, %zu)\tELWS:(%zu, %zu, %zu)\tOffset:(%zu, %zu, %zu)\tAGWS:(%zu, %zu, %zu)\tLWS:(%zu, %zu, %zu)\tTWGS:(%zu, %zu, %zu)\tNWGS:(%zu, %zu, %zu)\tSWGS:(%zu, %zu, %zu)\n", dispatchInfo.getDim(), dispatchInfo.getGWS().x, dispatchInfo.getGWS().y, dispatchInfo.getGWS().z, dispatchInfo.getEnqueuedWorkgroupSize().x, dispatchInfo.getEnqueuedWorkgroupSize().y, dispatchInfo.getEnqueuedWorkgroupSize().z, dispatchInfo.getOffset().x, dispatchInfo.getOffset().y, dispatchInfo.getOffset().z, dispatchInfo.getActualWorkgroupSize().x, dispatchInfo.getActualWorkgroupSize().y, dispatchInfo.getActualWorkgroupSize().z, dispatchInfo.getLocalWorkgroupSize().x, dispatchInfo.getLocalWorkgroupSize().y, dispatchInfo.getLocalWorkgroupSize().z, dispatchInfo.getTotalNumberOfWorkgroups().x, dispatchInfo.getTotalNumberOfWorkgroups().y, dispatchInfo.getTotalNumberOfWorkgroups().z, dispatchInfo.getNumberOfWorkgroups().x, dispatchInfo.getNumberOfWorkgroups().y, dispatchInfo.getNumberOfWorkgroups().z, dispatchInfo.getStartOfWorkgroups().x, dispatchInfo.getStartOfWorkgroups().y, dispatchInfo.getStartOfWorkgroups().z); } } } DispatchInfo &getDispatchInfo(size_t index) { return dispatchInfos[index]; } static constexpr size_t getMaxNumDispatches() { return numDispatches; } protected: static bool supportsSplit() { return (Mode == SplitDispatch::SplitMode::WalkerSplit); } static bool needsSplit(const DispatchInfo &dispatchInfo) { return (dispatchInfo.getGWS().x % dispatchInfo.getLocalWorkgroupSize().x + dispatchInfo.getGWS().y % dispatchInfo.getLocalWorkgroupSize().y + dispatchInfo.getGWS().z % dispatchInfo.getLocalWorkgroupSize().z != 0); } static void pushSplit(const DispatchInfo &dispatchInfo, MultiDispatchInfo &outMdi) { constexpr auto xMain = SplitDispatch::RegionCoordX::Left; constexpr auto xRight = SplitDispatch::RegionCoordX::Middle; constexpr auto yMain = SplitDispatch::RegionCoordY::Top; constexpr auto yBottom = SplitDispatch::RegionCoordY::Middle; constexpr auto zMain = SplitDispatch::RegionCoordZ::Front; constexpr auto zBack = SplitDispatch::RegionCoordZ::Middle; switch (dispatchInfo.getDim()) { default: break; case 1: { Vec3 mainLWS = {dispatchInfo.getLocalWorkgroupSize().x, 1, 1}; Vec3 rightLWS = {dispatchInfo.getGWS().x % dispatchInfo.getLocalWorkgroupSize().x, 1, 1}; Vec3 mainGWS = {alignDown(dispatchInfo.getGWS().x, mainLWS.x), 1, 1}; Vec3 rightGWS = {dispatchInfo.getGWS().x % mainLWS.x, 1, 1}; Vec3 mainNWGS = {mainGWS.x / mainLWS.x, 1, 1}; Vec3 rightNWGS = {mainNWGS.x + isIndivisible(rightGWS.x, mainLWS.x), 1, 1}; Vec3 mainSWGS = {0, 0, 0}; Vec3 rightSWGS = {mainNWGS.x, 0, 0}; DispatchInfoBuilder builder1D(dispatchInfo.getClDevice()); builder1D.setKernel(dispatchInfo.getKernel()); builder1D.setDispatchGeometry(xMain, dispatchInfo.getGWS(), dispatchInfo.getEnqueuedWorkgroupSize(), dispatchInfo.getOffset(), mainGWS, mainLWS, dispatchInfo.getTotalNumberOfWorkgroups(), mainNWGS, mainSWGS); builder1D.setDispatchGeometry(xRight, dispatchInfo.getGWS(), dispatchInfo.getEnqueuedWorkgroupSize(), dispatchInfo.getOffset(), rightGWS, rightLWS, dispatchInfo.getTotalNumberOfWorkgroups(), rightNWGS, rightSWGS); builder1D.bake(outMdi); } break; case 2: { Vec3 mainLWS = {dispatchInfo.getLocalWorkgroupSize().x, dispatchInfo.getLocalWorkgroupSize().y, 1}; Vec3 rightLWS = {dispatchInfo.getGWS().x % dispatchInfo.getLocalWorkgroupSize().x, dispatchInfo.getLocalWorkgroupSize().y, 1}; Vec3 bottomLWS = {dispatchInfo.getLocalWorkgroupSize().x, dispatchInfo.getGWS().y % dispatchInfo.getLocalWorkgroupSize().y, 1}; Vec3 rightbottomLWS = {dispatchInfo.getGWS().x % dispatchInfo.getLocalWorkgroupSize().x, dispatchInfo.getGWS().y % dispatchInfo.getLocalWorkgroupSize().y, 1}; Vec3 mainGWS = {alignDown(dispatchInfo.getGWS().x, mainLWS.x), alignDown(dispatchInfo.getGWS().y, mainLWS.y), 1}; Vec3 rightGWS = {dispatchInfo.getGWS().x % mainLWS.x, alignDown(dispatchInfo.getGWS().y, mainLWS.y), 1}; Vec3 bottomGWS = {alignDown(dispatchInfo.getGWS().x, mainLWS.x), dispatchInfo.getGWS().y % mainLWS.y, 1}; Vec3 rightbottomGWS = {dispatchInfo.getGWS().x % mainLWS.x, dispatchInfo.getGWS().y % mainLWS.y, 1}; Vec3 mainNWGS = {mainGWS.x / mainLWS.x, mainGWS.y / mainLWS.y, 1}; Vec3 rightNWGS = {mainNWGS.x + isIndivisible(rightGWS.x, mainLWS.x), mainGWS.y / mainLWS.y, 1}; Vec3 bottomNWGS = {mainGWS.x / mainLWS.x, mainNWGS.y + isIndivisible(bottomGWS.y, mainLWS.y), 1}; Vec3 rightbottomNWGS = {mainNWGS.x + isIndivisible(rightGWS.x, mainLWS.x), mainNWGS.y + isIndivisible(bottomGWS.y, mainLWS.y), 1}; Vec3 mainSWGS = {0, 0, 0}; Vec3 rightSWGS = {mainNWGS.x, 0, 0}; Vec3 bottomSWGS = {0, mainNWGS.y, 0}; Vec3 rightbottomSWGS = {mainNWGS.x, mainNWGS.y, 0}; DispatchInfoBuilder builder2D(dispatchInfo.getClDevice()); builder2D.setKernel(dispatchInfo.getKernel()); builder2D.setDispatchGeometry(xMain, yMain, dispatchInfo.getGWS(), dispatchInfo.getEnqueuedWorkgroupSize(), dispatchInfo.getOffset(), mainGWS, mainLWS, dispatchInfo.getTotalNumberOfWorkgroups(), mainNWGS, mainSWGS); builder2D.setDispatchGeometry(xRight, yMain, dispatchInfo.getGWS(), dispatchInfo.getEnqueuedWorkgroupSize(), dispatchInfo.getOffset(), rightGWS, rightLWS, dispatchInfo.getTotalNumberOfWorkgroups(), rightNWGS, rightSWGS); builder2D.setDispatchGeometry(xMain, yBottom, dispatchInfo.getGWS(), dispatchInfo.getEnqueuedWorkgroupSize(), dispatchInfo.getOffset(), bottomGWS, bottomLWS, dispatchInfo.getTotalNumberOfWorkgroups(), bottomNWGS, bottomSWGS); builder2D.setDispatchGeometry(xRight, yBottom, dispatchInfo.getGWS(), dispatchInfo.getEnqueuedWorkgroupSize(), dispatchInfo.getOffset(), rightbottomGWS, rightbottomLWS, dispatchInfo.getTotalNumberOfWorkgroups(), rightbottomNWGS, rightbottomSWGS); builder2D.bake(outMdi); } break; case 3: { Vec3 mainLWS = dispatchInfo.getLocalWorkgroupSize(); Vec3 rightLWS = {dispatchInfo.getGWS().x % dispatchInfo.getLocalWorkgroupSize().x, dispatchInfo.getLocalWorkgroupSize().y, dispatchInfo.getLocalWorkgroupSize().z}; Vec3 bottomLWS = {dispatchInfo.getLocalWorkgroupSize().x, dispatchInfo.getGWS().y % dispatchInfo.getLocalWorkgroupSize().y, dispatchInfo.getLocalWorkgroupSize().z}; Vec3 rightbottomLWS = {dispatchInfo.getGWS().x % dispatchInfo.getLocalWorkgroupSize().x, dispatchInfo.getGWS().y % dispatchInfo.getLocalWorkgroupSize().y, dispatchInfo.getLocalWorkgroupSize().z}; Vec3 mainbackLWS = {dispatchInfo.getLocalWorkgroupSize().x, dispatchInfo.getLocalWorkgroupSize().y, dispatchInfo.getGWS().z % dispatchInfo.getLocalWorkgroupSize().z}; Vec3 rightbackLWS = {dispatchInfo.getGWS().x % dispatchInfo.getLocalWorkgroupSize().x, dispatchInfo.getLocalWorkgroupSize().y, dispatchInfo.getGWS().z % dispatchInfo.getLocalWorkgroupSize().z}; Vec3 bottombackLWS = {dispatchInfo.getLocalWorkgroupSize().x, dispatchInfo.getGWS().y % dispatchInfo.getLocalWorkgroupSize().y, dispatchInfo.getGWS().z % dispatchInfo.getLocalWorkgroupSize().z}; Vec3 rightbottombackLWS = {dispatchInfo.getGWS().x % dispatchInfo.getLocalWorkgroupSize().x, dispatchInfo.getGWS().y % dispatchInfo.getLocalWorkgroupSize().y, dispatchInfo.getGWS().z % dispatchInfo.getLocalWorkgroupSize().z}; Vec3 mainGWS = {alignDown(dispatchInfo.getGWS().x, mainLWS.x), alignDown(dispatchInfo.getGWS().y, mainLWS.y), alignDown(dispatchInfo.getGWS().z, mainLWS.z)}; Vec3 rightGWS = {dispatchInfo.getGWS().x % mainLWS.x, alignDown(dispatchInfo.getGWS().y, mainLWS.y), alignDown(dispatchInfo.getGWS().z, mainLWS.z)}; Vec3 bottomGWS = {alignDown(dispatchInfo.getGWS().x, mainLWS.x), dispatchInfo.getGWS().y % mainLWS.y, alignDown(dispatchInfo.getGWS().z, mainLWS.z)}; Vec3 rightbottomGWS = {dispatchInfo.getGWS().x % mainLWS.x, dispatchInfo.getGWS().y % mainLWS.y, alignDown(dispatchInfo.getGWS().z, mainLWS.z)}; Vec3 mainbackGWS = {alignDown(dispatchInfo.getGWS().x, mainLWS.x), alignDown(dispatchInfo.getGWS().y, mainLWS.y), dispatchInfo.getGWS().z % mainLWS.z}; Vec3 rightbackGWS = {dispatchInfo.getGWS().x % mainLWS.x, alignDown(dispatchInfo.getGWS().y, mainLWS.y), dispatchInfo.getGWS().z % mainLWS.z}; Vec3 bottombackGWS = {alignDown(dispatchInfo.getGWS().x, mainLWS.x), dispatchInfo.getGWS().y % mainLWS.y, dispatchInfo.getGWS().z % mainLWS.z}; Vec3 rightbottombackGWS = {dispatchInfo.getGWS().x % mainLWS.x, dispatchInfo.getGWS().y % mainLWS.y, dispatchInfo.getGWS().z % mainLWS.z}; Vec3 mainNWGS = {mainGWS.x / mainLWS.x, mainGWS.y / mainLWS.y, mainGWS.z / mainLWS.z + isIndivisible(mainGWS.z, mainLWS.z)}; Vec3 rightNWGS = {mainNWGS.x + isIndivisible(rightGWS.x, mainLWS.x), mainGWS.y / mainLWS.y, mainGWS.z / mainLWS.z}; Vec3 bottomNWGS = {mainGWS.x / mainLWS.x, mainNWGS.y + isIndivisible(bottomGWS.y, mainLWS.y), mainGWS.z / mainLWS.z}; Vec3 rightbottomNWGS = {mainNWGS.x + isIndivisible(rightGWS.x, mainLWS.x), mainNWGS.y + isIndivisible(bottomGWS.y, mainLWS.y), mainGWS.z / mainLWS.z}; Vec3 mainbackNWGS = {mainGWS.x / mainLWS.x, mainGWS.y / mainLWS.y, mainNWGS.z + isIndivisible(mainbackGWS.z, mainLWS.z)}; Vec3 rightbackNWGS = {mainNWGS.x + isIndivisible(rightGWS.x, mainLWS.x), mainGWS.y / mainLWS.y, mainNWGS.z + isIndivisible(rightbackGWS.z, mainLWS.z)}; Vec3 bottombackNWGS = {mainGWS.x / mainLWS.x, mainNWGS.y + isIndivisible(bottomGWS.y, mainLWS.y), mainNWGS.z + isIndivisible(bottombackGWS.z, mainLWS.z)}; Vec3 rightbottombackNWGS = {mainNWGS.x + isIndivisible(rightGWS.x, mainLWS.x), mainNWGS.y + isIndivisible(bottomGWS.y, mainLWS.y), mainNWGS.z + isIndivisible(rightbottombackGWS.z, mainLWS.z)}; Vec3 mainSWGS = {0, 0, 0}; Vec3 rightSWGS = {mainNWGS.x, 0, 0}; Vec3 bottomSWGS = {0, mainNWGS.y, 0}; Vec3 rightbottomSWGS = {mainNWGS.x, mainNWGS.y, 0}; Vec3 mainbackSWGS = {0, 0, mainNWGS.z}; Vec3 rightbackSWGS = {mainNWGS.x, 0, mainNWGS.z}; Vec3 bottombackSWGS = {0, mainNWGS.y, mainNWGS.z}; Vec3 rightbottombackSWGS = {mainNWGS.x, mainNWGS.y, mainNWGS.z}; DispatchInfoBuilder builder3D(dispatchInfo.getClDevice()); builder3D.setKernel(dispatchInfo.getKernel()); builder3D.setDispatchGeometry(xMain, yMain, zMain, dispatchInfo.getGWS(), dispatchInfo.getEnqueuedWorkgroupSize(), dispatchInfo.getOffset(), mainGWS, mainLWS, dispatchInfo.getTotalNumberOfWorkgroups(), mainNWGS, mainSWGS); builder3D.setDispatchGeometry(xRight, yMain, zMain, dispatchInfo.getGWS(), dispatchInfo.getEnqueuedWorkgroupSize(), dispatchInfo.getOffset(), rightGWS, rightLWS, dispatchInfo.getTotalNumberOfWorkgroups(), rightNWGS, rightSWGS); builder3D.setDispatchGeometry(xMain, yBottom, zMain, dispatchInfo.getGWS(), dispatchInfo.getEnqueuedWorkgroupSize(), dispatchInfo.getOffset(), bottomGWS, bottomLWS, dispatchInfo.getTotalNumberOfWorkgroups(), bottomNWGS, bottomSWGS); builder3D.setDispatchGeometry(xRight, yBottom, zMain, dispatchInfo.getGWS(), dispatchInfo.getEnqueuedWorkgroupSize(), dispatchInfo.getOffset(), rightbottomGWS, rightbottomLWS, dispatchInfo.getTotalNumberOfWorkgroups(), rightbottomNWGS, rightbottomSWGS); builder3D.setDispatchGeometry(xMain, yMain, zBack, dispatchInfo.getGWS(), dispatchInfo.getEnqueuedWorkgroupSize(), dispatchInfo.getOffset(), mainbackGWS, mainbackLWS, dispatchInfo.getTotalNumberOfWorkgroups(), mainbackNWGS, mainbackSWGS); builder3D.setDispatchGeometry(xRight, yMain, zBack, dispatchInfo.getGWS(), dispatchInfo.getEnqueuedWorkgroupSize(), dispatchInfo.getOffset(), rightbackGWS, rightbackLWS, dispatchInfo.getTotalNumberOfWorkgroups(), rightbackNWGS, rightbackSWGS); builder3D.setDispatchGeometry(xMain, yBottom, zBack, dispatchInfo.getGWS(), dispatchInfo.getEnqueuedWorkgroupSize(), dispatchInfo.getOffset(), bottombackGWS, bottombackLWS, dispatchInfo.getTotalNumberOfWorkgroups(), bottombackNWGS, bottombackSWGS); builder3D.setDispatchGeometry(xRight, yBottom, zBack, dispatchInfo.getGWS(), dispatchInfo.getEnqueuedWorkgroupSize(), dispatchInfo.getOffset(), rightbottombackGWS, rightbottombackLWS, dispatchInfo.getTotalNumberOfWorkgroups(), rightbottombackNWGS, rightbottombackSWGS); builder3D.bake(outMdi); } break; } } static constexpr uint32_t getDispatchId(SplitDispatch::RegionCoordX x, SplitDispatch::RegionCoordY y, SplitDispatch::RegionCoordZ z) { return static_cast(x) + static_cast(y) * (static_cast(Mode) + 1) + static_cast(z) * (static_cast(Mode) + 1) * (static_cast(Mode) + 1); } static constexpr uint32_t getDispatchId(SplitDispatch::RegionCoordX x, SplitDispatch::RegionCoordY y) { return static_cast(x) + static_cast(y) * (static_cast(Mode) + 1); } static constexpr uint32_t getDispatchId(SplitDispatch::RegionCoordX x) { return static_cast(x); } static const size_t numDispatches = (Mode == SplitDispatch::SplitMode::WalkerSplit) ? 1 : powConst((static_cast(Mode) + 1), // 1 (middle) 2 (middle + right/bottom) or 3 (lef/top + middle + right/mottom) (static_cast(Dim) + 1)); // 1, 2 or 3 DispatchInfo dispatchInfos[numDispatches]; private: static size_t alignDown(size_t x, size_t y) { return x - x % y; } static size_t isIndivisible(size_t x, size_t y) { return x % y ? 1 : 0; } static bool isWorkSizeValid(uint32_t dim, const Vec3 &workSize) { switch (dim) { case 1: return workSize.x > 0; case 2: return workSize.x > 0 && workSize.y > 0; case 3: return workSize.x > 0 && workSize.y > 0 && workSize.z > 0; default: return true; } } }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/helpers/enqueue_properties.h000066400000000000000000000034641422164147700256770ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/blit_commands_helper.h" namespace NEO { struct EnqueueProperties { enum class Operation { None, Blit, ExplicitCacheFlush, EnqueueWithoutSubmission, DependencyResolveOnGpu, GpuKernel, ProfilingOnly }; EnqueueProperties() = delete; EnqueueProperties(bool blitEnqueue, bool hasKernels, bool isCacheFlushCmd, bool flushDependenciesOnly, bool isMarkerWithEvent, const BlitPropertiesContainer *blitPropertiesContainer) { if (blitEnqueue) { operation = Operation::Blit; this->blitPropertiesContainer = blitPropertiesContainer; return; } if (hasKernels) { operation = Operation::GpuKernel; this->blitPropertiesContainer = blitPropertiesContainer; return; } if (isCacheFlushCmd) { operation = Operation::ExplicitCacheFlush; return; } if (flushDependenciesOnly) { operation = Operation::DependencyResolveOnGpu; return; } if (isMarkerWithEvent) { operation = Operation::ProfilingOnly; return; } operation = Operation::EnqueueWithoutSubmission; } bool isFlushWithoutKernelRequired() const { return (operation == Operation::Blit) || (operation == Operation::ExplicitCacheFlush) || (operation == Operation::DependencyResolveOnGpu) || (operation == EnqueueProperties::Operation::ProfilingOnly); } const BlitPropertiesContainer *blitPropertiesContainer = nullptr; Operation operation = Operation::EnqueueWithoutSubmission; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/helpers/error_mappers.h000066400000000000000000000054671422164147700246410ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once template struct NullObjectErrorMapper { static const cl_int retVal = CL_SUCCESS; }; // clang-format off template <> struct NullObjectErrorMapper { static const cl_int retVal = CL_INVALID_COMMAND_QUEUE; }; template <> struct NullObjectErrorMapper { static const cl_int retVal = CL_INVALID_CONTEXT; }; template <> struct NullObjectErrorMapper { static const cl_int retVal = CL_INVALID_DEVICE; }; template <> struct NullObjectErrorMapper { static const cl_int retVal = CL_INVALID_EVENT; }; template <> struct NullObjectErrorMapper { static const cl_int retVal = CL_INVALID_KERNEL; }; template <> struct NullObjectErrorMapper { static const cl_int retVal = CL_INVALID_MEM_OBJECT; }; template <> struct NullObjectErrorMapper { static const cl_int retVal = CL_INVALID_PLATFORM; }; template <> struct NullObjectErrorMapper { static const cl_int retVal = CL_INVALID_PROGRAM; }; template <> struct NullObjectErrorMapper { static const cl_int retVal = CL_INVALID_SAMPLER; }; template <> struct NullObjectErrorMapper { static const cl_int retVal = CL_INVALID_VALUE; }; template <> struct NullObjectErrorMapper { static const cl_int retVal = CL_INVALID_VALUE; }; // clang-format on // defaults to CL_SUCCESS template struct InvalidObjectErrorMapper { static const cl_int retVal = CL_SUCCESS; }; // clang-format off // Special case the ones we do have proper validation for. template <> struct InvalidObjectErrorMapper { static const cl_int retVal = NullObjectErrorMapper::retVal; }; template <> struct InvalidObjectErrorMapper { static const cl_int retVal = NullObjectErrorMapper::retVal; }; template <> struct InvalidObjectErrorMapper { static const cl_int retVal = NullObjectErrorMapper::retVal; }; template <> struct InvalidObjectErrorMapper { static const cl_int retVal = NullObjectErrorMapper::retVal; }; template <> struct InvalidObjectErrorMapper { static const cl_int retVal = NullObjectErrorMapper::retVal; }; template <> struct InvalidObjectErrorMapper { static const cl_int retVal = NullObjectErrorMapper::retVal; }; template <> struct InvalidObjectErrorMapper { static const cl_int retVal = NullObjectErrorMapper::retVal; }; template <> struct InvalidObjectErrorMapper { static const cl_int retVal = NullObjectErrorMapper::retVal; }; template <> struct InvalidObjectErrorMapper { static const cl_int retVal = NullObjectErrorMapper::retVal; }; // clang-format on compute-runtime-22.14.22890/opencl/source/helpers/get_info_status_mapper.h000066400000000000000000000010341422164147700265040ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/get_info_status.h" #include #include static inline cl_int changeGetInfoStatusToCLResultType(GetInfoStatus status) { switch (status) { case GetInfoStatus::SUCCESS: return CL_SUCCESS; case GetInfoStatus::INVALID_CONTEXT: return CL_INVALID_CONTEXT; case GetInfoStatus::INVALID_VALUE: return CL_INVALID_VALUE; } return CL_INVALID_VALUE; } compute-runtime-22.14.22890/opencl/source/helpers/gmm_types_converter.cpp000066400000000000000000000042321422164147700263740ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/gmm_types_converter.h" #include "shared/source/helpers/get_info.h" #include "shared/source/helpers/surface_format_info.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "GL/gl.h" #include "GL/glext.h" using namespace NEO; void GmmTypesConverter::queryImgFromBufferParams(ImageInfo &imgInfo, GraphicsAllocation *gfxAlloc) { // 1D or 2D from buffer if (imgInfo.imgDesc.imageRowPitch > 0) { imgInfo.rowPitch = imgInfo.imgDesc.imageRowPitch; } else { imgInfo.rowPitch = getValidParam(imgInfo.imgDesc.imageWidth) * imgInfo.surfaceFormat->ImageElementSizeInBytes; } imgInfo.slicePitch = imgInfo.rowPitch * getValidParam(imgInfo.imgDesc.imageHeight); imgInfo.size = gfxAlloc->getUnderlyingBufferSize(); imgInfo.qPitch = 0; } uint32_t GmmTypesConverter::getRenderMultisamplesCount(uint32_t numSamples) { if (numSamples == 2) { return 1; } else if (numSamples == 4) { return 2; } else if (numSamples == 8) { return 3; } else if (numSamples == 16) { return 4; } return 0; } GMM_YUV_PLANE GmmTypesConverter::convertPlane(ImagePlane imagePlane) { if (imagePlane == ImagePlane::PLANE_Y) { return GMM_PLANE_Y; } else if (imagePlane == ImagePlane::PLANE_U || imagePlane == ImagePlane::PLANE_UV) { return GMM_PLANE_U; } else if (imagePlane == ImagePlane::PLANE_V) { return GMM_PLANE_V; } return GMM_NO_PLANE; } GMM_CUBE_FACE_ENUM GmmTypesConverter::getCubeFaceIndex(uint32_t target) { switch (target) { case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: return __GMM_CUBE_FACE_NEG_X; case GL_TEXTURE_CUBE_MAP_POSITIVE_X: return __GMM_CUBE_FACE_POS_X; case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: return __GMM_CUBE_FACE_NEG_Y; case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: return __GMM_CUBE_FACE_POS_Y; case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: return __GMM_CUBE_FACE_NEG_Z; case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: return __GMM_CUBE_FACE_POS_Z; } return __GMM_NO_CUBE_MAP; } compute-runtime-22.14.22890/opencl/source/helpers/gmm_types_converter.h000066400000000000000000000010741422164147700260420ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/gmm_helper/gmm_lib.h" namespace NEO { enum class ImagePlane; class GraphicsAllocation; struct ImageInfo; struct GmmTypesConverter { static void queryImgFromBufferParams(ImageInfo &imgInfo, GraphicsAllocation *gfxAlloc); static GMM_CUBE_FACE_ENUM getCubeFaceIndex(uint32_t target); static uint32_t getRenderMultisamplesCount(uint32_t numSamples); static GMM_YUV_PLANE convertPlane(ImagePlane imagePlane); }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/helpers/hardware_commands_helper.h000066400000000000000000000111411422164147700267600ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "shared/source/helpers/per_thread_data.h" #include "opencl/source/kernel/kernel.h" #include #include #include namespace NEO { class CommandQueue; class LinearStream; class IndirectHeap; struct CrossThreadInfo; struct MultiDispatchInfo; template struct HardwareCommandsHelper : public PerThreadDataHelper { using WALKER_TYPE = typename GfxFamily::WALKER_TYPE; using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE; using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE; using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA; using MI_ATOMIC = typename GfxFamily::MI_ATOMIC; using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION; static INTERFACE_DESCRIPTOR_DATA *getInterfaceDescriptor( const IndirectHeap &indirectHeap, uint64_t offsetInterfaceDescriptor, INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor); inline static uint32_t additionalSizeRequiredDsh(); static size_t sendInterfaceDescriptorData( const IndirectHeap &indirectHeap, uint64_t offsetInterfaceDescriptor, uint64_t kernelStartOffset, size_t sizeCrossThreadData, size_t sizePerThreadData, size_t bindingTablePointer, [[maybe_unused]] size_t offsetSamplerState, uint32_t numSamplers, uint32_t numThreadsPerThreadGroup, const Kernel &kernel, uint32_t bindingTablePrefetchSize, PreemptionMode preemptionMode, INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor, const Device &device); static void sendMediaStateFlush( LinearStream &commandStream, size_t offsetInterfaceDescriptorData); static void sendMediaInterfaceDescriptorLoad( LinearStream &commandStream, size_t offsetInterfaceDescriptorData, size_t sizeInterfaceDescriptorData); static size_t sendCrossThreadData( IndirectHeap &indirectHeap, Kernel &kernel, bool inlineDataProgrammingRequired, WALKER_TYPE *walkerCmd, uint32_t &sizeCrossThreadData); static size_t sendIndirectState( LinearStream &commandStream, IndirectHeap &dsh, IndirectHeap &ioh, IndirectHeap &ssh, Kernel &kernel, uint64_t kernelStartOffset, uint32_t simd, const size_t localWorkSize[3], const uint64_t offsetInterfaceDescriptorTable, uint32_t &interfaceDescriptorIndex, PreemptionMode preemptionMode, WALKER_TYPE *walkerCmd, INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor, bool localIdsGenerationByRuntime, const Device &device); static void programPerThreadData( size_t &sizePerThreadData, const bool &localIdsGenerationByRuntime, LinearStream &ioh, uint32_t &simd, uint32_t &numChannels, const size_t localWorkSize[3], Kernel &kernel, size_t &sizePerThreadDataTotal, size_t &localWorkItems, uint32_t rootDeviceIndex); static void updatePerThreadDataTotal( size_t &sizePerThreadData, uint32_t &simd, uint32_t &numChannels, size_t &sizePerThreadDataTotal, size_t &localWorkItems); inline static bool resetBindingTablePrefetch(); static size_t getSizeRequiredCS(); static size_t getSizeRequiredForCacheFlush(const CommandQueue &commandQueue, const Kernel *kernel, uint64_t postSyncAddress); static size_t getSizeRequiredDSH( const Kernel &kernel); static size_t getSizeRequiredIOH( const Kernel &kernel, size_t localWorkSize = 256); static size_t getSizeRequiredSSH( const Kernel &kernel); static size_t getTotalSizeRequiredDSH( const MultiDispatchInfo &multiDispatchInfo); static size_t getTotalSizeRequiredIOH( const MultiDispatchInfo &multiDispatchInfo); static size_t getTotalSizeRequiredSSH( const MultiDispatchInfo &multiDispatchInfo); static void setInterfaceDescriptorOffset( WALKER_TYPE *walkerCmd, uint32_t &interfaceDescriptorIndex); static void programCacheFlushAfterWalkerCommand(LinearStream *commandStream, const CommandQueue &commandQueue, const Kernel *kernel, uint64_t postSyncAddress); static bool inlineDataProgrammingRequired(const Kernel &kernel); static bool kernelUsesLocalIds(const Kernel &kernel); }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/helpers/hardware_commands_helper_base.inl000066400000000000000000000344321422164147700303150ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_stream/csr_definitions.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/address_patch.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/local_id_gen.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/helpers/string.h" #include "shared/source/indirect_heap/indirect_heap.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/source/kernel/kernel.h" #include namespace NEO { template size_t HardwareCommandsHelper::getSizeRequiredDSH(const Kernel &kernel) { constexpr auto samplerStateSize = sizeof(typename GfxFamily::SAMPLER_STATE); constexpr auto maxIndirectSamplerStateSize = alignUp(sizeof(typename GfxFamily::SAMPLER_BORDER_COLOR_STATE), MemoryConstants::cacheLineSize); const auto numSamplers = kernel.getKernelInfo().kernelDescriptor.payloadMappings.samplerTable.numSamplers; if (numSamplers == 0U) { return alignUp(additionalSizeRequiredDsh(), MemoryConstants::cacheLineSize); } auto calculatedTotalSize = alignUp(maxIndirectSamplerStateSize + numSamplers * samplerStateSize + additionalSizeRequiredDsh(), MemoryConstants::cacheLineSize); DEBUG_BREAK_IF(calculatedTotalSize > kernel.getDynamicStateHeapSize()); return calculatedTotalSize; } template size_t HardwareCommandsHelper::getSizeRequiredIOH(const Kernel &kernel, size_t localWorkSize) { typedef typename GfxFamily::WALKER_TYPE WALKER_TYPE; const auto &kernelDescriptor = kernel.getDescriptor(); const auto &hwInfo = kernel.getHardwareInfo(); auto numChannels = kernelDescriptor.kernelAttributes.numLocalIdChannels; uint32_t grfSize = hwInfo.capabilityTable.grfSize; auto simdSize = kernelDescriptor.kernelAttributes.simdSize; auto size = kernel.getCrossThreadDataSize() + getPerThreadDataSizeTotal(simdSize, grfSize, numChannels, localWorkSize); auto pImplicitArgs = kernel.getImplicitArgs(); if (pImplicitArgs) { size += ImplicitArgsHelper::getSizeForImplicitArgsPatching(pImplicitArgs, kernelDescriptor, hwInfo); } return alignUp(size, WALKER_TYPE::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); } template size_t HardwareCommandsHelper::getSizeRequiredSSH(const Kernel &kernel) { typedef typename GfxFamily::BINDING_TABLE_STATE BINDING_TABLE_STATE; auto sizeSSH = kernel.getSurfaceStateHeapSize(); sizeSSH += sizeSSH ? BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE : 0; return sizeSSH; } template size_t getSizeRequired(const MultiDispatchInfo &multiDispatchInfo, SizeGetterT &&getSize, ArgsT... args) { size_t totalSize = 0; auto it = multiDispatchInfo.begin(); for (auto e = multiDispatchInfo.end(); it != e; ++it) { totalSize = alignUp(totalSize, MemoryConstants::cacheLineSize); totalSize += getSize(*it, std::forward(args)...); } totalSize = alignUp(totalSize, MemoryConstants::pageSize); return totalSize; } template size_t HardwareCommandsHelper::getTotalSizeRequiredDSH( const MultiDispatchInfo &multiDispatchInfo) { return getSizeRequired(multiDispatchInfo, [](const DispatchInfo &dispatchInfo) { return getSizeRequiredDSH(*dispatchInfo.getKernel()); }); } template size_t HardwareCommandsHelper::getTotalSizeRequiredIOH( const MultiDispatchInfo &multiDispatchInfo) { return getSizeRequired(multiDispatchInfo, [](const DispatchInfo &dispatchInfo) { return getSizeRequiredIOH( *dispatchInfo.getKernel(), Math::computeTotalElementsCount(dispatchInfo.getLocalWorkgroupSize())); }); } template size_t HardwareCommandsHelper::getTotalSizeRequiredSSH( const MultiDispatchInfo &multiDispatchInfo) { return getSizeRequired(multiDispatchInfo, [](const DispatchInfo &dispatchInfo) { return getSizeRequiredSSH(*dispatchInfo.getKernel()); }); } template size_t HardwareCommandsHelper::sendInterfaceDescriptorData( const IndirectHeap &indirectHeap, uint64_t offsetInterfaceDescriptor, uint64_t kernelStartOffset, size_t sizeCrossThreadData, size_t sizePerThreadData, size_t bindingTablePointer, [[maybe_unused]] size_t offsetSamplerState, uint32_t numSamplers, uint32_t threadsPerThreadGroup, const Kernel &kernel, uint32_t bindingTablePrefetchSize, PreemptionMode preemptionMode, INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor, const Device &device) { using SAMPLER_STATE = typename GfxFamily::SAMPLER_STATE; using SHARED_LOCAL_MEMORY_SIZE = typename INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE; const auto &hardwareInfo = device.getHardwareInfo(); const auto &kernelDescriptor = kernel.getKernelInfo().kernelDescriptor; // Allocate some memory for the interface descriptor auto pInterfaceDescriptor = getInterfaceDescriptor(indirectHeap, offsetInterfaceDescriptor, inlineInterfaceDescriptor); auto interfaceDescriptor = GfxFamily::cmdInitInterfaceDescriptorData; // Program the kernel start pointer interfaceDescriptor.setKernelStartPointer(static_cast(kernelStartOffset & std::numeric_limits::max())); // # of threads in thread group should be based on LWS. interfaceDescriptor.setNumberOfThreadsInGpgpuThreadGroup(threadsPerThreadGroup); interfaceDescriptor.setDenormMode(INTERFACE_DESCRIPTOR_DATA::DENORM_MODE_SETBYKERNEL); auto slmTotalSize = kernel.getSlmTotalSize(); EncodeDispatchKernel::setGrfInfo(&interfaceDescriptor, kernelDescriptor.kernelAttributes.numGrfRequired, sizeCrossThreadData, sizePerThreadData); EncodeDispatchKernel::appendAdditionalIDDFields(&interfaceDescriptor, hardwareInfo, threadsPerThreadGroup, slmTotalSize, SlmPolicy::SlmPolicyNone); interfaceDescriptor.setBindingTablePointer(static_cast(bindingTablePointer)); if constexpr (GfxFamily::supportsSampler) { interfaceDescriptor.setSamplerStatePointer(static_cast(offsetSamplerState)); } EncodeDispatchKernel::adjustBindingTablePrefetch(interfaceDescriptor, numSamplers, bindingTablePrefetchSize); auto programmableIDSLMSize = static_cast(HwHelperHw::get().computeSlmValues(hardwareInfo, slmTotalSize)); if (DebugManager.flags.OverrideSlmAllocationSize.get() != -1) { programmableIDSLMSize = static_cast(DebugManager.flags.OverrideSlmAllocationSize.get()); } interfaceDescriptor.setSharedLocalMemorySize(programmableIDSLMSize); EncodeDispatchKernel::programBarrierEnable(interfaceDescriptor, kernelDescriptor.kernelAttributes.barrierCount, hardwareInfo); PreemptionHelper::programInterfaceDescriptorDataPreemption(&interfaceDescriptor, preemptionMode); EncodeDispatchKernel::adjustInterfaceDescriptorData(interfaceDescriptor, hardwareInfo); *pInterfaceDescriptor = interfaceDescriptor; return (size_t)offsetInterfaceDescriptor; } template size_t HardwareCommandsHelper::sendIndirectState( LinearStream &commandStream, IndirectHeap &dsh, IndirectHeap &ioh, IndirectHeap &ssh, Kernel &kernel, uint64_t kernelStartOffset, uint32_t simd, const size_t localWorkSize[3], const uint64_t offsetInterfaceDescriptorTable, uint32_t &interfaceDescriptorIndex, PreemptionMode preemptionMode, WALKER_TYPE *walkerCmd, INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor, bool localIdsGenerationByRuntime, const Device &device) { using SAMPLER_STATE = typename GfxFamily::SAMPLER_STATE; auto rootDeviceIndex = device.getRootDeviceIndex(); DEBUG_BREAK_IF(simd != 1 && simd != 8 && simd != 16 && simd != 32); auto inlineDataProgrammingRequired = HardwareCommandsHelper::inlineDataProgrammingRequired(kernel); // Copy the kernel over to the ISH const auto &kernelInfo = kernel.getKernelInfo(); ssh.align(BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); auto dstBindingTablePointer = EncodeSurfaceState::pushBindingTableAndSurfaceStates(ssh, kernelInfo.kernelDescriptor.payloadMappings.bindingTable.numEntries, kernel.getSurfaceStateHeap(), kernel.getSurfaceStateHeapSize(), kernel.getNumberOfBindingTableStates(), kernel.getBindingTableOffset()); // Copy our sampler state if it exists const auto &samplerTable = kernelInfo.kernelDescriptor.payloadMappings.samplerTable; uint32_t samplerCount = 0; uint32_t samplerStateOffset = 0; if (isValidOffset(samplerTable.tableOffset) && isValidOffset(samplerTable.borderColor)) { samplerCount = samplerTable.numSamplers; samplerStateOffset = EncodeStates::copySamplerState(&dsh, samplerTable.tableOffset, samplerCount, samplerTable.borderColor, kernel.getDynamicStateHeap(), device.getBindlessHeapsHelper(), device.getHardwareInfo()); } auto localWorkItems = localWorkSize[0] * localWorkSize[1] * localWorkSize[2]; auto threadsPerThreadGroup = static_cast(getThreadsPerWG(simd, localWorkItems)); auto numChannels = static_cast(kernelInfo.kernelDescriptor.kernelAttributes.numLocalIdChannels); uint32_t sizeCrossThreadData = kernel.getCrossThreadDataSize(); size_t offsetCrossThreadData = HardwareCommandsHelper::sendCrossThreadData( ioh, kernel, inlineDataProgrammingRequired, walkerCmd, sizeCrossThreadData); size_t sizePerThreadDataTotal = 0; size_t sizePerThreadData = 0; HardwareCommandsHelper::programPerThreadData( sizePerThreadData, localIdsGenerationByRuntime, ioh, simd, numChannels, localWorkSize, kernel, sizePerThreadDataTotal, localWorkItems, rootDeviceIndex); uint64_t offsetInterfaceDescriptor = offsetInterfaceDescriptorTable + interfaceDescriptorIndex * sizeof(INTERFACE_DESCRIPTOR_DATA); auto bindingTablePrefetchSize = std::min(31u, static_cast(kernel.getNumberOfBindingTableStates())); if (resetBindingTablePrefetch()) { bindingTablePrefetchSize = 0; } HardwareCommandsHelper::sendInterfaceDescriptorData( dsh, offsetInterfaceDescriptor, kernelStartOffset, sizeCrossThreadData, sizePerThreadData, dstBindingTablePointer, samplerStateOffset, samplerCount, threadsPerThreadGroup, kernel, bindingTablePrefetchSize, preemptionMode, inlineInterfaceDescriptor, device); if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) { PatchInfoData patchInfoData(kernelStartOffset, 0, PatchInfoAllocationType::InstructionHeap, dsh.getGraphicsAllocation()->getGpuAddress(), offsetInterfaceDescriptor, PatchInfoAllocationType::DynamicStateHeap); kernel.getPatchInfoDataList().push_back(patchInfoData); } // Program media state flush to set interface descriptor offset sendMediaStateFlush( commandStream, interfaceDescriptorIndex); DEBUG_BREAK_IF(offsetCrossThreadData % 64 != 0); walkerCmd->setIndirectDataStartAddress(static_cast(offsetCrossThreadData)); setInterfaceDescriptorOffset(walkerCmd, interfaceDescriptorIndex); auto indirectDataLength = alignUp(static_cast(sizeCrossThreadData + sizePerThreadDataTotal), WALKER_TYPE::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); walkerCmd->setIndirectDataLength(indirectDataLength); return offsetCrossThreadData; } template void HardwareCommandsHelper::updatePerThreadDataTotal( size_t &sizePerThreadData, uint32_t &simd, uint32_t &numChannels, size_t &sizePerThreadDataTotal, size_t &localWorkItems) { uint32_t grfSize = sizeof(typename GfxFamily::GRF); sizePerThreadData = getPerThreadSizeLocalIDs(simd, grfSize, numChannels); uint32_t localIdSizePerThread = PerThreadDataHelper::getLocalIdSizePerThread(simd, grfSize, numChannels); localIdSizePerThread = std::max(localIdSizePerThread, grfSize); sizePerThreadDataTotal = getThreadsPerWG(simd, localWorkItems) * localIdSizePerThread; DEBUG_BREAK_IF(sizePerThreadDataTotal == 0); // Hardware requires at least 1 GRF of perThreadData for each thread in thread group } template bool HardwareCommandsHelper::inlineDataProgrammingRequired(const Kernel &kernel) { auto checkKernelForInlineData = true; if (DebugManager.flags.EnablePassInlineData.get() != -1) { checkKernelForInlineData = !!DebugManager.flags.EnablePassInlineData.get(); } if (checkKernelForInlineData) { return kernel.getKernelInfo().kernelDescriptor.kernelAttributes.flags.passInlineData; } return false; } template bool HardwareCommandsHelper::kernelUsesLocalIds(const Kernel &kernel) { return kernel.getKernelInfo().kernelDescriptor.kernelAttributes.numLocalIdChannels > 0; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/helpers/hardware_commands_helper_bdw_and_later.inl000066400000000000000000000154201422164147700321640ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/pipe_control_args.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/kernel/kernel.h" namespace NEO { template typename HardwareCommandsHelper::INTERFACE_DESCRIPTOR_DATA *HardwareCommandsHelper::getInterfaceDescriptor( const IndirectHeap &indirectHeap, uint64_t offsetInterfaceDescriptor, HardwareCommandsHelper::INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor) { return static_cast(ptrOffset(indirectHeap.getCpuBase(), (size_t)offsetInterfaceDescriptor)); } template uint32_t HardwareCommandsHelper::additionalSizeRequiredDsh() { return sizeof(INTERFACE_DESCRIPTOR_DATA); } template size_t HardwareCommandsHelper::getSizeRequiredCS() { size_t size = 2 * sizeof(typename GfxFamily::MEDIA_STATE_FLUSH) + sizeof(typename GfxFamily::MEDIA_INTERFACE_DESCRIPTOR_LOAD); return size; } template size_t HardwareCommandsHelper::getSizeRequiredForCacheFlush(const CommandQueue &commandQueue, const Kernel *kernel, uint64_t postSyncAddress) { return kernel->requiresCacheFlushCommand(commandQueue) ? sizeof(typename GfxFamily::PIPE_CONTROL) : 0; } template void HardwareCommandsHelper::sendMediaStateFlush( LinearStream &commandStream, size_t offsetInterfaceDescriptorData) { using MEDIA_STATE_FLUSH = typename GfxFamily::MEDIA_STATE_FLUSH; auto pCmd = commandStream.getSpaceForCmd(); MEDIA_STATE_FLUSH cmd = GfxFamily::cmdInitMediaStateFlush; cmd.setInterfaceDescriptorOffset(static_cast(offsetInterfaceDescriptorData)); *pCmd = cmd; } template void HardwareCommandsHelper::sendMediaInterfaceDescriptorLoad( LinearStream &commandStream, size_t offsetInterfaceDescriptorData, size_t sizeInterfaceDescriptorData) { { using MEDIA_STATE_FLUSH = typename GfxFamily::MEDIA_STATE_FLUSH; auto pCmd = commandStream.getSpaceForCmd(); *pCmd = GfxFamily::cmdInitMediaStateFlush; } { using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename GfxFamily::MEDIA_INTERFACE_DESCRIPTOR_LOAD; auto pCmd = commandStream.getSpaceForCmd(); MEDIA_INTERFACE_DESCRIPTOR_LOAD cmd = GfxFamily::cmdInitMediaInterfaceDescriptorLoad; cmd.setInterfaceDescriptorDataStartAddress(static_cast(offsetInterfaceDescriptorData)); cmd.setInterfaceDescriptorTotalLength(static_cast(sizeInterfaceDescriptorData)); *pCmd = cmd; } } template void HardwareCommandsHelper::programPerThreadData( size_t &sizePerThreadData, const bool &localIdsGenerationByRuntime, LinearStream &ioh, uint32_t &simd, uint32_t &numChannels, const size_t localWorkSize[3], Kernel &kernel, size_t &sizePerThreadDataTotal, size_t &localWorkItems, uint32_t rootDeviceIndex) { uint32_t grfSize = sizeof(typename GfxFamily::GRF); sendPerThreadData( ioh, simd, grfSize, numChannels, std::array{{static_cast(localWorkSize[0]), static_cast(localWorkSize[1]), static_cast(localWorkSize[2])}}, std::array{{kernel.getKernelInfo().kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[0], kernel.getKernelInfo().kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[1], kernel.getKernelInfo().kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[2]}}, kernel.usesOnlyImages()); updatePerThreadDataTotal(sizePerThreadData, simd, numChannels, sizePerThreadDataTotal, localWorkItems); } template size_t HardwareCommandsHelper::sendCrossThreadData( IndirectHeap &indirectHeap, Kernel &kernel, bool inlineDataProgrammingRequired, WALKER_TYPE *walkerCmd, uint32_t &sizeCrossThreadData) { indirectHeap.align(WALKER_TYPE::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); auto pImplicitArgs = kernel.getImplicitArgs(); if (pImplicitArgs) { const auto &kernelDescriptor = kernel.getDescriptor(); const auto &hwInfo = kernel.getHardwareInfo(); auto sizeForImplicitArgsProgramming = ImplicitArgsHelper::getSizeForImplicitArgsPatching(pImplicitArgs, kernelDescriptor, hwInfo); auto implicitArgsGpuVA = indirectHeap.getGraphicsAllocation()->getGpuAddress() + indirectHeap.getUsed(); auto ptrToPatchImplicitArgs = indirectHeap.getSpace(sizeForImplicitArgsProgramming); ImplicitArgsHelper::patchImplicitArgs(ptrToPatchImplicitArgs, *pImplicitArgs, kernelDescriptor, hwInfo, {}); auto implicitArgsCrossThreadPtr = ptrOffset(reinterpret_cast(kernel.getCrossThreadData()), kernelDescriptor.payloadMappings.implicitArgs.implicitArgsBuffer); *implicitArgsCrossThreadPtr = implicitArgsGpuVA; } auto offsetCrossThreadData = indirectHeap.getUsed(); char *pDest = nullptr; pDest = static_cast(indirectHeap.getSpace(sizeCrossThreadData)); memcpy_s(pDest, sizeCrossThreadData, kernel.getCrossThreadData(), sizeCrossThreadData); if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) { FlatBatchBufferHelper::fixCrossThreadDataInfo(kernel.getPatchInfoDataList(), offsetCrossThreadData, indirectHeap.getGraphicsAllocation()->getGpuAddress()); } return offsetCrossThreadData + static_cast(indirectHeap.getHeapGpuStartOffset()); } template bool HardwareCommandsHelper::resetBindingTablePrefetch() { return !EncodeSurfaceState::doBindingTablePrefetch(); } template void HardwareCommandsHelper::setInterfaceDescriptorOffset( WALKER_TYPE *walkerCmd, uint32_t &interfaceDescriptorIndex) { walkerCmd->setInterfaceDescriptorOffset(interfaceDescriptorIndex++); } template void HardwareCommandsHelper::programCacheFlushAfterWalkerCommand(LinearStream *commandStream, const CommandQueue &commandQueue, const Kernel *kernel, uint64_t postSyncAddress) { const auto &hwInfo = commandQueue.getDevice().getHardwareInfo(); PipeControlArgs args; args.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo); MemorySynchronizationCommands::addPipeControl(*commandStream, args); } } // namespace NEO compute-runtime-22.14.22890/opencl/source/helpers/hardware_commands_helper_xehp_and_later.inl000066400000000000000000000177321422164147700323640ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/flat_batch_buffer_helper.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/l3_range.h" #include "shared/source/helpers/pipe_control_args.h" #include "shared/source/helpers/string.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/kernel/kernel.h" namespace NEO { template typename HardwareCommandsHelper::INTERFACE_DESCRIPTOR_DATA *HardwareCommandsHelper::getInterfaceDescriptor( const IndirectHeap &indirectHeap, uint64_t offsetInterfaceDescriptor, INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor) { return inlineInterfaceDescriptor; } template uint32_t HardwareCommandsHelper::additionalSizeRequiredDsh() { return 0u; } template size_t HardwareCommandsHelper::getSizeRequiredCS() { return 0; } template size_t HardwareCommandsHelper::getSizeRequiredForCacheFlush(const CommandQueue &commandQueue, const Kernel *kernel, uint64_t postSyncAddress) { UNRECOVERABLE_IF(true); return 0; } template void HardwareCommandsHelper::sendMediaStateFlush( LinearStream &commandStream, size_t offsetInterfaceDescriptorData) { } template void HardwareCommandsHelper::sendMediaInterfaceDescriptorLoad( LinearStream &commandStream, size_t offsetInterfaceDescriptorData, size_t sizeInterfaceDescriptorData) { } template void HardwareCommandsHelper::programPerThreadData( size_t &sizePerThreadData, const bool &localIdsGenerationByRuntime, LinearStream &ioh, uint32_t &simd, uint32_t &numChannels, const size_t localWorkSize[3], Kernel &kernel, size_t &sizePerThreadDataTotal, size_t &localWorkItems, uint32_t rootDeviceIndex) { if (localIdsGenerationByRuntime) { constexpr uint32_t grfSize = sizeof(typename GfxFamily::GRF); sendPerThreadData( ioh, simd, grfSize, numChannels, std::array{{static_cast(localWorkSize[0]), static_cast(localWorkSize[1]), static_cast(localWorkSize[2])}}, {{0u, 1u, 2u}}, kernel.usesOnlyImages()); updatePerThreadDataTotal(sizePerThreadData, simd, numChannels, sizePerThreadDataTotal, localWorkItems); } } template size_t HardwareCommandsHelper::sendCrossThreadData( IndirectHeap &indirectHeap, Kernel &kernel, bool inlineDataProgrammingRequired, WALKER_TYPE *walkerCmd, uint32_t &sizeCrossThreadData) { indirectHeap.align(WALKER_TYPE::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); auto offsetCrossThreadData = indirectHeap.getUsed(); char *dest = nullptr; char *src = kernel.getCrossThreadData(); auto pImplicitArgs = kernel.getImplicitArgs(); if (pImplicitArgs) { pImplicitArgs->localIdTablePtr = indirectHeap.getGraphicsAllocation()->getGpuAddress() + offsetCrossThreadData; const auto &kernelDescriptor = kernel.getDescriptor(); const auto &hwInfo = kernel.getHardwareInfo(); auto sizeForImplicitArgsProgramming = ImplicitArgsHelper::getSizeForImplicitArgsPatching(pImplicitArgs, kernelDescriptor, hwInfo); auto sizeForLocalIdsProgramming = sizeForImplicitArgsProgramming - sizeof(ImplicitArgs); offsetCrossThreadData += sizeForLocalIdsProgramming; auto ptrToPatchImplicitArgs = indirectHeap.getSpace(sizeForImplicitArgsProgramming); const auto &kernelAttributes = kernelDescriptor.kernelAttributes; uint32_t requiredWalkOrder = 0u; size_t localWorkSize[3] = {pImplicitArgs->localSizeX, pImplicitArgs->localSizeY, pImplicitArgs->localSizeZ}; auto generationOfLocalIdsByRuntime = EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired( 3, localWorkSize, std::array{ {kernelAttributes.workgroupWalkOrder[0], kernelAttributes.workgroupWalkOrder[1], kernelAttributes.workgroupWalkOrder[2]}}, kernelAttributes.flags.requiresWorkgroupWalkOrder, requiredWalkOrder, kernelDescriptor.kernelAttributes.simdSize); ImplicitArgsHelper::patchImplicitArgs(ptrToPatchImplicitArgs, *pImplicitArgs, kernelDescriptor, hwInfo, std::make_pair(generationOfLocalIdsByRuntime, requiredWalkOrder)); } using InlineData = typename GfxFamily::INLINE_DATA; using GRF = typename GfxFamily::GRF; uint32_t inlineDataSize = sizeof(InlineData); uint32_t sizeToCopy = sizeCrossThreadData; if (inlineDataProgrammingRequired == true) { sizeToCopy = std::min(inlineDataSize, sizeCrossThreadData); dest = reinterpret_cast(walkerCmd->getInlineDataPointer()); memcpy_s(dest, sizeToCopy, kernel.getCrossThreadData(), sizeToCopy); auto offset = std::min(inlineDataSize, sizeCrossThreadData); sizeCrossThreadData -= offset; src += offset; } if (sizeCrossThreadData > 0) { dest = static_cast(indirectHeap.getSpace(sizeCrossThreadData)); memcpy_s(dest, sizeCrossThreadData, src, sizeCrossThreadData); } if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) { FlatBatchBufferHelper::fixCrossThreadDataInfo(kernel.getPatchInfoDataList(), offsetCrossThreadData, indirectHeap.getGraphicsAllocation()->getGpuAddress()); } return offsetCrossThreadData + static_cast(is64bit ? indirectHeap.getHeapGpuStartOffset() : indirectHeap.getHeapGpuBase()); } template bool HardwareCommandsHelper::resetBindingTablePrefetch() { return false; } template void HardwareCommandsHelper::setInterfaceDescriptorOffset( WALKER_TYPE *walkerCmd, uint32_t &interfaceDescriptorIndex) { } template void HardwareCommandsHelper::programCacheFlushAfterWalkerCommand(LinearStream *commandStream, const CommandQueue &commandQueue, const Kernel *kernel, [[maybe_unused]] uint64_t postSyncAddress) { // 1. make sure previous kernel finished PipeControlArgs args; auto &hardwareInfo = commandQueue.getDevice().getHardwareInfo(); args.unTypedDataPortCacheFlush = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily).unTypedDataPortCacheFlushRequired(); MemorySynchronizationCommands::addPipeControl(*commandStream, args); // 2. flush all affected L3 lines if constexpr (GfxFamily::isUsingL3Control) { StackVec allocationsForCacheFlush; kernel->getAllocationsForCacheFlush(allocationsForCacheFlush); StackVec subranges; for (GraphicsAllocation *alloc : allocationsForCacheFlush) { coverRangeExact(alloc->getGpuAddress(), alloc->getUnderlyingBufferSize(), subranges, GfxFamily::L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION); } for (size_t subrangeNumber = 0; subrangeNumber < subranges.size(); subrangeNumber += maxFlushSubrangeCount) { size_t rangeCount = subranges.size() <= subrangeNumber + maxFlushSubrangeCount ? subranges.size() - subrangeNumber : maxFlushSubrangeCount; Range range = CreateRange(subranges.begin() + subrangeNumber, rangeCount); uint64_t postSyncAddressToFlush = 0; if (rangeCount < maxFlushSubrangeCount || subranges.size() - subrangeNumber - maxFlushSubrangeCount == 0) { postSyncAddressToFlush = postSyncAddress; } flushGpuCache(commandStream, range, postSyncAddressToFlush, hardwareInfo); } } } } // namespace NEO compute-runtime-22.14.22890/opencl/source/helpers/helper_options.cpp000066400000000000000000000004761422164147700253410ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include #include namespace NEO { // AUB file folder location const char *folderAUB = "."; // Initial value for HW tag uint32_t initialHardwareTag = std::numeric_limits::max(); } // namespace NEO compute-runtime-22.14.22890/opencl/source/helpers/implicit_scaling_ocl.cpp000066400000000000000000000004131422164147700264450ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/implicit_scaling.h" namespace NEO { namespace ImplicitScaling { bool apiSupport = true; } // namespace ImplicitScaling } // namespace NEO compute-runtime-22.14.22890/opencl/source/helpers/mipmap.cpp000066400000000000000000000053621422164147700235710ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/mipmap.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/resource_info.h" #include "opencl/source/mem_obj/image.h" #include #include #include namespace NEO { uint32_t getMipLevelOriginIdx(cl_mem_object_type imageType) { switch (imageType) { case CL_MEM_OBJECT_IMAGE1D: return 1; case CL_MEM_OBJECT_IMAGE1D_ARRAY: case CL_MEM_OBJECT_IMAGE2D: return 2; case CL_MEM_OBJECT_IMAGE2D_ARRAY: case CL_MEM_OBJECT_IMAGE3D: return 3; case CL_MEM_OBJECT_IMAGE1D_BUFFER: return 0; default: DEBUG_BREAK_IF(true); return std::numeric_limits::max(); } } uint32_t findMipLevel(cl_mem_object_type imageType, const size_t *origin) { size_t mipLevel = 0; switch (imageType) { case CL_MEM_OBJECT_IMAGE1D: case CL_MEM_OBJECT_IMAGE1D_ARRAY: case CL_MEM_OBJECT_IMAGE2D: case CL_MEM_OBJECT_IMAGE2D_ARRAY: case CL_MEM_OBJECT_IMAGE3D: mipLevel = origin[getMipLevelOriginIdx(imageType)]; break; default: mipLevel = 0; break; } return static_cast(mipLevel); } bool isMipMapped(const MemObj *memObj) { auto image = castToObject(memObj); if (image == nullptr) { return false; } return isMipMapped(image->getImageDesc()); } uint32_t getMipOffset(Image *image, const size_t *origin) { if (isMipMapped(image) == false) { return 0; } UNRECOVERABLE_IF(origin == nullptr); auto bytesPerPixel = image->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes; size_t offset{}; auto imageType = image->getImageDesc().image_type; auto lod = findMipLevel(imageType, origin); auto baseWidth = image->getImageDesc().image_width; auto baseHeight = image->getImageDesc().image_height; if (lod) { size_t mipHeight = baseHeight; size_t mipWidth = baseWidth; bool translate = false; if (lod >= 2) { translate = true; mipWidth += std::max(baseWidth >> 2, 1); } for (size_t currentLod = 3; currentLod <= lod; currentLod++) { mipHeight += std::max(baseHeight >> currentLod, 1); mipWidth += std::max(baseWidth >> currentLod, 1); } if (imageType == CL_MEM_OBJECT_IMAGE1D) { offset = mipWidth; } else { offset = baseWidth * mipHeight; if (translate) { offset += std::max(baseWidth >> 1, 1); } } } return static_cast(bytesPerPixel * offset); } } // namespace NEO compute-runtime-22.14.22890/opencl/source/helpers/mipmap.h000066400000000000000000000010431422164147700232260ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "CL/cl.h" #include namespace NEO { class MemObj; class Image; uint32_t getMipLevelOriginIdx(cl_mem_object_type imageType); uint32_t findMipLevel(cl_mem_object_type imageType, const size_t *origin); inline bool isMipMapped(const cl_image_desc &imgDesc) { return (imgDesc.num_mip_levels > 1); } bool isMipMapped(const MemObj *memObj); uint32_t getMipOffset(Image *image, const size_t *origin); } // namespace NEO compute-runtime-22.14.22890/opencl/source/helpers/properties_helper.cpp000066400000000000000000000115221422164147700260340ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/properties_helper.h" #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/memory_manager/memory_manager.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/helpers/mipmap.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/mem_obj/mem_obj.h" namespace NEO { void EventsRequest::fillCsrDependenciesForTimestampPacketContainer(CsrDependencies &csrDeps, CommandStreamReceiver ¤tCsr, CsrDependencies::DependenciesType depsType) const { for (cl_uint i = 0; i < this->numEventsInWaitList; i++) { auto event = castToObjectOrAbort(this->eventWaitList[i]); if (event->isUserEvent()) { continue; } auto timestampPacketContainer = event->getTimestampPacketNodes(); if (!timestampPacketContainer || timestampPacketContainer->peekNodes().empty()) { continue; } auto sameRootDevice = event->getCommandQueue()->getClDevice().getRootDeviceIndex() == currentCsr.getRootDeviceIndex(); if (!sameRootDevice) { continue; } auto sameCsr = (&event->getCommandQueue()->getGpgpuCommandStreamReceiver() == ¤tCsr); bool pushDependency = (CsrDependencies::DependenciesType::OnCsr == depsType && sameCsr) || (CsrDependencies::DependenciesType::OutOfCsr == depsType && !sameCsr) || (CsrDependencies::DependenciesType::All == depsType); if (pushDependency) { csrDeps.timestampPacketContainer.push_back(timestampPacketContainer); } } } void EventsRequest::fillCsrDependenciesForTaskCountContainer(CsrDependencies &csrDeps, CommandStreamReceiver ¤tCsr) const { for (cl_uint i = 0; i < this->numEventsInWaitList; i++) { auto event = castToObjectOrAbort(this->eventWaitList[i]); if (event->isUserEvent() || CompletionStamp::notReady == event->peekTaskCount()) { continue; } if (event->getCommandQueue() && event->getCommandQueue()->getDevice().getRootDeviceIndex() != currentCsr.getRootDeviceIndex()) { auto taskCountPreviousRootDevice = event->peekTaskCount(); auto tagAddressPreviousRootDevice = event->getCommandQueue()->getGpgpuCommandStreamReceiver().getTagAddress(); csrDeps.taskCountContainer.push_back({taskCountPreviousRootDevice, reinterpret_cast(tagAddressPreviousRootDevice)}); auto graphicsAllocation = event->getCommandQueue()->getGpgpuCommandStreamReceiver().getTagsMultiAllocation()->getGraphicsAllocation(currentCsr.getRootDeviceIndex()); currentCsr.getResidencyAllocations().push_back(graphicsAllocation); } } } void EventsRequest::setupBcsCsrForOutputEvent(CommandStreamReceiver &bcsCsr) const { if (outEvent) { auto event = castToObjectOrAbort(*outEvent); event->setupBcs(bcsCsr.getOsContext().getEngineType()); } } TransferProperties::TransferProperties(MemObj *memObj, cl_command_type cmdType, cl_map_flags mapFlags, bool blocking, size_t *offsetPtr, size_t *sizePtr, void *ptr, bool doTransferOnCpu, uint32_t rootDeviceIndex) : memObj(memObj), ptr(ptr), cmdType(cmdType), mapFlags(mapFlags), blocking(blocking), doTransferOnCpu(doTransferOnCpu) { // no size or offset passed for unmap operation if (cmdType != CL_COMMAND_UNMAP_MEM_OBJECT) { if (memObj->peekClMemObjType() == CL_MEM_OBJECT_BUFFER) { size[0] = *sizePtr; offset[0] = *offsetPtr; if (doTransferOnCpu && (false == MemoryPool::isSystemMemoryPool(memObj->getGraphicsAllocation(rootDeviceIndex)->getMemoryPool())) && (memObj->getMemoryManager() != nullptr)) { this->lockedPtr = memObj->getMemoryManager()->lockResource(memObj->getGraphicsAllocation(rootDeviceIndex)); } } else { size = {{sizePtr[0], sizePtr[1], sizePtr[2]}}; offset = {{offsetPtr[0], offsetPtr[1], offsetPtr[2]}}; if (isMipMapped(memObj)) { // decompose origin to coordinates and miplevel mipLevel = findMipLevel(memObj->peekClMemObjType(), offsetPtr); mipPtrOffset = getMipOffset(castToObjectOrAbort(memObj), offsetPtr); auto mipLevelIdx = getMipLevelOriginIdx(memObj->peekClMemObjType()); if (mipLevelIdx < offset.size()) { offset[mipLevelIdx] = 0; } } } } } void *TransferProperties::getCpuPtrForReadWrite() { return ptrOffset(lockedPtr ? ptrOffset(lockedPtr, memObj->getOffset()) : memObj->getCpuAddressForMemoryTransfer(), offset[0]); } } // namespace NEO compute-runtime-22.14.22890/opencl/source/helpers/properties_helper.h000066400000000000000000000044061422164147700255040ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/csr_deps.h" #include "shared/source/command_stream/queue_throttle.h" #include "opencl/source/api/cl_types.h" #include namespace NEO { class MemObj; class Buffer; class GraphicsAllocation; struct EventsRequest { EventsRequest() = delete; EventsRequest(cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *outEvent) : numEventsInWaitList(numEventsInWaitList), eventWaitList(eventWaitList), outEvent(outEvent) {} void fillCsrDependenciesForTimestampPacketContainer(CsrDependencies &csrDeps, CommandStreamReceiver ¤tCsr, CsrDependencies::DependenciesType depsType) const; void fillCsrDependenciesForTaskCountContainer(CsrDependencies &csrDeps, CommandStreamReceiver ¤tCsr) const; void setupBcsCsrForOutputEvent(CommandStreamReceiver &bcsCsr) const; cl_uint numEventsInWaitList; const cl_event *eventWaitList; cl_event *outEvent; }; using MemObjSizeArray = std::array; using MemObjOffsetArray = std::array; struct TransferProperties { TransferProperties() = delete; TransferProperties(MemObj *memObj, cl_command_type cmdType, cl_map_flags mapFlags, bool blocking, size_t *offsetPtr, size_t *sizePtr, void *ptr, bool doTransferOnCpu, uint32_t rootDeviceIndex); MemObjOffsetArray offset = {}; MemObjSizeArray size = {}; MemObj *memObj = nullptr; void *ptr = nullptr; void *lockedPtr = nullptr; cl_command_type cmdType = 0; cl_map_flags mapFlags = 0; uint32_t mipLevel = 0; uint32_t mipPtrOffset = 0; bool blocking = false; bool doTransferOnCpu = false; void *getCpuPtrForReadWrite(); }; struct MapInfo { MapInfo() = default; MapInfo(void *ptr, size_t ptrLength, MemObjSizeArray size, MemObjOffsetArray offset, uint32_t mipLevel) : size(size), offset(offset), ptrLength(ptrLength), ptr(ptr), mipLevel(mipLevel) { } MemObjSizeArray size = {}; MemObjOffsetArray offset = {}; size_t ptrLength = 0; void *ptr = nullptr; uint32_t mipLevel = 0; bool readOnly = false; GraphicsAllocation *graphicsAllocation = nullptr; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/helpers/queue_helpers.h000066400000000000000000000125221422164147700246150ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/device/device.h" #include "shared/source/helpers/get_info.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/get_info_status_mapper.h" namespace NEO { inline void releaseVirtualEvent(CommandQueue &commandQueue) { if (commandQueue.getRefApiCount() == 1) { commandQueue.releaseVirtualEvent(); } } inline bool isCommandWithoutKernel(uint32_t commandType) { return ((commandType == CL_COMMAND_BARRIER) || (commandType == CL_COMMAND_MARKER) || (commandType == CL_COMMAND_MIGRATE_MEM_OBJECTS) || (commandType == CL_COMMAND_RESOURCE_BARRIER) || (commandType == CL_COMMAND_SVM_FREE) || (commandType == CL_COMMAND_SVM_MAP) || (commandType == CL_COMMAND_SVM_MIGRATE_MEM) || (commandType == CL_COMMAND_SVM_UNMAP)); } inline void retainQueue(cl_command_queue commandQueue, cl_int &retVal) { using BaseType = typename CommandQueue::BaseType; auto queue = castToObject(static_cast(commandQueue)); if (queue) { queue->retain(); retVal = CL_SUCCESS; } } inline void releaseQueue(cl_command_queue commandQueue, cl_int &retVal) { using BaseType = typename CommandQueue::BaseType; auto queue = castToObject(static_cast(commandQueue)); if (queue) { queue->flush(); releaseVirtualEvent(*queue); queue->release(); retVal = CL_SUCCESS; } } inline void getHostQueueInfo(CommandQueue *queue, cl_command_queue_info paramName, GetInfoHelper &getInfoHelper, cl_int &retVal) { switch (paramName) { case CL_QUEUE_FAMILY_INTEL: retVal = changeGetInfoStatusToCLResultType(getInfoHelper.set(queue->getQueueFamilyIndex())); break; case CL_QUEUE_INDEX_INTEL: retVal = changeGetInfoStatusToCLResultType(getInfoHelper.set(queue->getQueueIndexWithinFamily())); break; default: retVal = CL_INVALID_VALUE; break; } } inline cl_int getQueueInfo(CommandQueue *queue, cl_command_queue_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { cl_int retVal = CL_SUCCESS; GetInfoHelper getInfoHelper(paramValue, paramValueSize, paramValueSizeRet); switch (paramName) { case CL_QUEUE_CONTEXT: retVal = changeGetInfoStatusToCLResultType(getInfoHelper.set(queue->getContextPtr())); break; case CL_QUEUE_DEVICE: { Device &device = queue->getDevice(); retVal = changeGetInfoStatusToCLResultType(getInfoHelper.set(device.getSpecializedDevice())); break; } case CL_QUEUE_REFERENCE_COUNT: retVal = changeGetInfoStatusToCLResultType(getInfoHelper.set(queue->getReference())); break; case CL_QUEUE_PROPERTIES: retVal = changeGetInfoStatusToCLResultType(getInfoHelper.set(queue->getCommandQueueProperties())); break; case CL_QUEUE_DEVICE_DEFAULT: retVal = changeGetInfoStatusToCLResultType(getInfoHelper.set(nullptr)); break; case CL_QUEUE_SIZE: retVal = CL_INVALID_COMMAND_QUEUE; break; case CL_QUEUE_PROPERTIES_ARRAY: { auto &propertiesVector = queue->getPropertiesVector(); auto source = propertiesVector.data(); auto sourceSize = propertiesVector.size() * sizeof(cl_queue_properties); auto getInfoStatus = GetInfo::getInfo(paramValue, paramValueSize, source, sourceSize); retVal = changeGetInfoStatusToCLResultType(getInfoStatus); GetInfo::setParamValueReturnSize(paramValueSizeRet, sourceSize, getInfoStatus); break; } default: getHostQueueInfo(queue, paramName, getInfoHelper, retVal); break; } return retVal; } inline void getQueueInfo(cl_command_queue commandQueue, cl_command_queue_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet, cl_int &retVal) { using BaseType = typename CommandQueue::BaseType; auto queue = castToObject(static_cast(commandQueue)); if (queue) { retVal = getQueueInfo(queue, paramName, paramValueSize, paramValue, paramValueSizeRet); } } template returnType getCmdQueueProperties(const cl_queue_properties *properties, cl_queue_properties propertyName = CL_QUEUE_PROPERTIES, bool *foundValue = nullptr) { if (properties != nullptr) { while (*properties != 0) { if (*properties == propertyName) { if (foundValue) { *foundValue = true; } return static_cast(*(properties + 1)); } properties += 2; } } if (foundValue) { *foundValue = false; } return 0; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/helpers/sampler_helpers.h000066400000000000000000000026011422164147700251310ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "CL/cl.h" #include // It's max SSH size per kernel (MAX_BINDING_TABLE_INDEX * 64) const uint32_t SAMPLER_OBJECT_ID_SHIFT = 253 * 64; // Sampler Patch Token Enums enum SAMPLER_PATCH_ENUM { CLK_DEFAULT_SAMPLER = 0x00, CLK_ADDRESS_NONE = 0x00, CLK_ADDRESS_CLAMP = 0x01, CLK_ADDRESS_CLAMP_TO_EDGE = 0x02, CLK_ADDRESS_REPEAT = 0x03, CLK_ADDRESS_MIRRORED_REPEAT = 0x04, CLK_ADDRESS_MIRRORED_REPEAT_101 = 0x05, CLK_NORMALIZED_COORDS_FALSE = 0x00, CLK_NORMALIZED_COORDS_TRUE = 0x08, CLK_FILTER_NEAREST = 0x00, CLK_FILTER_LINEAR = 0x00, }; inline SAMPLER_PATCH_ENUM GetAddrModeEnum(cl_addressing_mode addressingMode) { switch (addressingMode) { case CL_ADDRESS_REPEAT: return CLK_ADDRESS_REPEAT; case CL_ADDRESS_CLAMP_TO_EDGE: return CLK_ADDRESS_CLAMP_TO_EDGE; case CL_ADDRESS_CLAMP: return CLK_ADDRESS_CLAMP; case CL_ADDRESS_NONE: return CLK_ADDRESS_NONE; case CL_ADDRESS_MIRRORED_REPEAT: return CLK_ADDRESS_MIRRORED_REPEAT; } return CLK_ADDRESS_NONE; } inline SAMPLER_PATCH_ENUM GetNormCoordsEnum(cl_bool normalizedCoords) { if (normalizedCoords == CL_TRUE) { return CLK_NORMALIZED_COORDS_TRUE; } else { return CLK_NORMALIZED_COORDS_FALSE; } } compute-runtime-22.14.22890/opencl/source/helpers/surface_formats.cpp000066400000000000000000000322041422164147700254640ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "surface_formats.h" #include "shared/source/gmm_helper/gmm_lib.h" #include "shared/source/helpers/array_count.h" #include "opencl/source/api/cl_types.h" #include "opencl/source/helpers/cl_validators.h" #include "opencl/source/mem_obj/image.h" namespace NEO { // clang-format off #define COMMONFORMATS \ {{CL_RGBA, CL_UNORM_INT8}, {GMM_FORMAT_R8G8B8A8_UNORM_TYPE, GFX3DSTATE_SURFACEFORMAT_R8G8B8A8_UNORM , 0, 4, 1, 4}}, \ {{CL_RGBA, CL_UNORM_INT16}, {GMM_FORMAT_R16G16B16A16_UNORM_TYPE, GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_UNORM , 0, 4, 2, 8}}, \ {{CL_RGBA, CL_SIGNED_INT8}, {GMM_FORMAT_R8G8B8A8_SINT_TYPE, GFX3DSTATE_SURFACEFORMAT_R8G8B8A8_SINT , 0, 4, 1, 4}}, \ {{CL_RGBA, CL_SIGNED_INT16}, {GMM_FORMAT_R16G16B16A16_SINT_TYPE, GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_SINT , 0, 4, 2, 8}}, \ {{CL_RGBA, CL_SIGNED_INT32}, {GMM_FORMAT_R32G32B32A32_SINT_TYPE, GFX3DSTATE_SURFACEFORMAT_R32G32B32A32_SINT , 0, 4, 4, 16}}, \ {{CL_RGBA, CL_UNSIGNED_INT8}, {GMM_FORMAT_R8G8B8A8_UINT_TYPE, GFX3DSTATE_SURFACEFORMAT_R8G8B8A8_UINT , 0, 4, 1, 4}}, \ {{CL_RGBA, CL_UNSIGNED_INT16}, {GMM_FORMAT_R16G16B16A16_UINT_TYPE, GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_UINT , 0, 4, 2, 8}}, \ {{CL_RGBA, CL_UNSIGNED_INT32}, {GMM_FORMAT_R32G32B32A32_UINT_TYPE, GFX3DSTATE_SURFACEFORMAT_R32G32B32A32_UINT , 0, 4, 4, 16}}, \ {{CL_RGBA, CL_HALF_FLOAT}, {GMM_FORMAT_R16G16B16A16_FLOAT_TYPE, GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_FLOAT , 0, 4, 2, 8}}, \ {{CL_RGBA, CL_FLOAT}, {GMM_FORMAT_R32G32B32A32_FLOAT_TYPE, GFX3DSTATE_SURFACEFORMAT_R32G32B32A32_FLOAT , 0, 4, 4, 16}}, \ {{CL_BGRA, CL_UNORM_INT8}, {GMM_FORMAT_B8G8R8A8_UNORM_TYPE, GFX3DSTATE_SURFACEFORMAT_B8G8R8A8_UNORM , 0, 4, 1, 4}}, \ {{CL_R, CL_FLOAT}, {GMM_FORMAT_R32_FLOAT_TYPE, GFX3DSTATE_SURFACEFORMAT_R32_FLOAT , 0, 1, 4, 4}}, \ {{CL_R, CL_UNORM_INT8}, {GMM_FORMAT_R8_UNORM_TYPE, GFX3DSTATE_SURFACEFORMAT_R8_UNORM , 0, 1, 1, 1}}, \ {{CL_R, CL_UNORM_INT16}, {GMM_FORMAT_R16_UNORM_TYPE, GFX3DSTATE_SURFACEFORMAT_R16_UNORM , 0, 1, 2, 2}}, \ {{CL_R, CL_SIGNED_INT8}, {GMM_FORMAT_R8_SINT_TYPE, GFX3DSTATE_SURFACEFORMAT_R8_SINT , 0, 1, 1, 1}}, \ {{CL_R, CL_SIGNED_INT16}, {GMM_FORMAT_R16_SINT_TYPE, GFX3DSTATE_SURFACEFORMAT_R16_SINT , 0, 1, 2, 2}}, \ {{CL_R, CL_SIGNED_INT32}, {GMM_FORMAT_R32_SINT_TYPE, GFX3DSTATE_SURFACEFORMAT_R32_SINT , 0, 1, 4, 4}}, \ {{CL_R, CL_UNSIGNED_INT8}, {GMM_FORMAT_R8_UINT_TYPE, GFX3DSTATE_SURFACEFORMAT_R8_UINT , 0, 1, 1, 1}}, \ {{CL_R, CL_UNSIGNED_INT16}, {GMM_FORMAT_R16_UINT_TYPE, GFX3DSTATE_SURFACEFORMAT_R16_UINT , 0, 1, 2, 2}}, \ {{CL_R, CL_UNSIGNED_INT32}, {GMM_FORMAT_R32_UINT_TYPE, GFX3DSTATE_SURFACEFORMAT_R32_UINT , 0, 1, 4, 4}}, \ {{CL_R, CL_HALF_FLOAT}, {GMM_FORMAT_R16_FLOAT_TYPE, GFX3DSTATE_SURFACEFORMAT_R16_FLOAT , 0, 1, 2, 2}}, \ {{CL_A, CL_UNORM_INT8}, {GMM_FORMAT_A8_UNORM_TYPE, GFX3DSTATE_SURFACEFORMAT_A8_UNORM , 0, 1, 1, 1}}, \ {{CL_RG, CL_UNORM_INT8}, {GMM_FORMAT_R8G8_UNORM_TYPE, GFX3DSTATE_SURFACEFORMAT_R8G8_UNORM , 0, 2, 1, 2}}, \ {{CL_RG, CL_UNORM_INT16}, {GMM_FORMAT_R16G16_UNORM_TYPE, GFX3DSTATE_SURFACEFORMAT_R16G16_UNORM , 0, 2, 2, 4}}, \ {{CL_RG, CL_SIGNED_INT8}, {GMM_FORMAT_R8G8_SINT_TYPE, GFX3DSTATE_SURFACEFORMAT_R8G8_SINT , 0, 2, 1, 2}}, \ {{CL_RG, CL_SIGNED_INT16}, {GMM_FORMAT_R16G16_SINT_TYPE, GFX3DSTATE_SURFACEFORMAT_R16G16_SINT , 0, 2, 2, 4}}, \ {{CL_RG, CL_SIGNED_INT32}, {GMM_FORMAT_R32G32_SINT_TYPE, GFX3DSTATE_SURFACEFORMAT_R32G32_SINT , 0, 2, 4, 8}}, \ {{CL_RG, CL_UNSIGNED_INT8}, {GMM_FORMAT_R8G8_UINT_TYPE, GFX3DSTATE_SURFACEFORMAT_R8G8_UINT , 0, 2, 1, 2}}, \ {{CL_RG, CL_UNSIGNED_INT16}, {GMM_FORMAT_R16G16_UINT_TYPE, GFX3DSTATE_SURFACEFORMAT_R16G16_UINT , 0, 2, 2, 4}}, \ {{CL_RG, CL_UNSIGNED_INT32}, {GMM_FORMAT_R32G32_UINT_TYPE, GFX3DSTATE_SURFACEFORMAT_R32G32_UINT , 0, 2, 4, 8}}, \ {{CL_RG, CL_HALF_FLOAT}, {GMM_FORMAT_R16G16_FLOAT_TYPE, GFX3DSTATE_SURFACEFORMAT_R16G16_FLOAT , 0, 2, 2, 4}}, \ {{CL_RG, CL_FLOAT}, {GMM_FORMAT_R32G32_FLOAT_TYPE, GFX3DSTATE_SURFACEFORMAT_R32G32_FLOAT , 0, 2, 4, 8}}, \ {{CL_LUMINANCE, CL_UNORM_INT8}, {GMM_FORMAT_GENERIC_8BIT, GFX3DSTATE_SURFACEFORMAT_R8_UNORM , 0, 1, 1, 1}}, \ {{CL_LUMINANCE, CL_UNORM_INT16}, {GMM_FORMAT_GENERIC_16BIT, GFX3DSTATE_SURFACEFORMAT_R16_UNORM , 0, 1, 2, 2}}, \ {{CL_LUMINANCE, CL_HALF_FLOAT}, {GMM_FORMAT_GENERIC_16BIT, GFX3DSTATE_SURFACEFORMAT_R16_FLOAT , 0, 1, 2, 2}}, \ {{CL_LUMINANCE, CL_FLOAT}, {GMM_FORMAT_GENERIC_32BIT, GFX3DSTATE_SURFACEFORMAT_R32_FLOAT , 0, 1, 4, 4}}, \ {{CL_R, CL_SNORM_INT8}, {GMM_FORMAT_R8_SNORM_TYPE, GFX3DSTATE_SURFACEFORMAT_R8_SNORM , 0, 1, 1, 1}}, \ {{CL_R, CL_SNORM_INT16}, {GMM_FORMAT_R16_SNORM_TYPE, GFX3DSTATE_SURFACEFORMAT_R16_SNORM , 0, 1, 2, 2}}, \ {{CL_RG, CL_SNORM_INT8}, {GMM_FORMAT_R8G8_SNORM_TYPE, GFX3DSTATE_SURFACEFORMAT_R8G8_SNORM , 0, 2, 1, 2}}, \ {{CL_RG, CL_SNORM_INT16}, {GMM_FORMAT_R16G16_SNORM_TYPE, GFX3DSTATE_SURFACEFORMAT_R16G16_SNORM , 0, 2, 2, 4}}, \ {{CL_RGBA, CL_SNORM_INT8}, {GMM_FORMAT_R8G8B8A8_SNORM_TYPE, GFX3DSTATE_SURFACEFORMAT_R8G8B8A8_SNORM , 0, 4, 1, 4}}, \ {{CL_RGBA, CL_SNORM_INT16}, {GMM_FORMAT_R16G16B16A16_SNORM_TYPE, GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_SNORM , 0, 4, 2, 8}} #define READONLYFORMATS \ {{CL_INTENSITY, CL_UNORM_INT8}, {GMM_FORMAT_GENERIC_8BIT, GFX3DSTATE_SURFACEFORMAT_I8_UNORM , 0, 1, 1, 1}}, \ {{CL_INTENSITY, CL_UNORM_INT16}, {GMM_FORMAT_GENERIC_16BIT, GFX3DSTATE_SURFACEFORMAT_I16_UNORM , 0, 1, 2, 2}}, \ {{CL_INTENSITY, CL_HALF_FLOAT}, {GMM_FORMAT_GENERIC_16BIT, GFX3DSTATE_SURFACEFORMAT_I16_FLOAT , 0, 1, 2, 2}}, \ {{CL_INTENSITY, CL_FLOAT}, {GMM_FORMAT_GENERIC_32BIT, GFX3DSTATE_SURFACEFORMAT_I32_FLOAT , 0, 1, 4, 4}}, \ {{CL_A, CL_UNORM_INT16}, {GMM_FORMAT_GENERIC_16BIT, GFX3DSTATE_SURFACEFORMAT_A16_UNORM , 0, 1, 2, 2}}, \ {{CL_A, CL_HALF_FLOAT}, {GMM_FORMAT_GENERIC_16BIT, GFX3DSTATE_SURFACEFORMAT_A16_FLOAT , 0, 1, 2, 2}}, \ {{CL_A, CL_FLOAT}, {GMM_FORMAT_GENERIC_32BIT, GFX3DSTATE_SURFACEFORMAT_A32_FLOAT , 0, 1, 4, 4}} #define SRGBFORMATS \ {{CL_sRGBA, CL_UNORM_INT8}, {GMM_FORMAT_R8G8B8A8_UNORM_SRGB_TYPE, GFX3DSTATE_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB , 0, 4, 1, 4}}, \ {{CL_sBGRA, CL_UNORM_INT8}, {GMM_FORMAT_B8G8R8A8_UNORM_SRGB_TYPE, GFX3DSTATE_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB , 0, 4, 1, 4}} #define DEPTHFORMATS \ {{ CL_DEPTH, CL_FLOAT}, {GMM_FORMAT_R32_FLOAT_TYPE, GFX3DSTATE_SURFACEFORMAT_R32_FLOAT , 0, 1, 4, 4}}, \ {{ CL_DEPTH, CL_UNORM_INT16}, {GMM_FORMAT_R16_UNORM_TYPE, GFX3DSTATE_SURFACEFORMAT_R16_UNORM , 0, 1, 2, 2}} #define DEPTHSTENCILFORMATS \ {{ CL_DEPTH_STENCIL, CL_UNORM_INT24}, {GMM_FORMAT_GENERIC_32BIT, GFX3DSTATE_SURFACEFORMAT_R24_UNORM_X8_TYPELESS , 0, 1, 4, 4}}, \ {{ CL_DEPTH_STENCIL, CL_FLOAT}, {GMM_FORMAT_R32G32_FLOAT_TYPE, GFX3DSTATE_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS, 0, 2, 4, 8}} //Initialize this with the required formats first. //Append the optional one later const ClSurfaceFormatInfo SurfaceFormats::readOnlySurfaceFormats12[] = { COMMONFORMATS, READONLYFORMATS }; const ClSurfaceFormatInfo SurfaceFormats::readOnlySurfaceFormats20[] = { COMMONFORMATS, READONLYFORMATS, SRGBFORMATS }; const ClSurfaceFormatInfo SurfaceFormats::writeOnlySurfaceFormats[] = { COMMONFORMATS }; const ClSurfaceFormatInfo SurfaceFormats::readWriteSurfaceFormats[] = { COMMONFORMATS }; const ClSurfaceFormatInfo SurfaceFormats::packedYuvSurfaceFormats[] = { {{CL_YUYV_INTEL, CL_UNORM_INT8}, {GMM_FORMAT_YUY2, GFX3DSTATE_SURFACEFORMAT_YCRCB_NORMAL , 0, 2, 1, 2}}, {{CL_UYVY_INTEL, CL_UNORM_INT8}, {GMM_FORMAT_UYVY, GFX3DSTATE_SURFACEFORMAT_YCRCB_SWAPY , 0, 2, 1, 2}}, {{CL_YVYU_INTEL, CL_UNORM_INT8}, {GMM_FORMAT_YVYU, GFX3DSTATE_SURFACEFORMAT_YCRCB_SWAPUV , 0, 2, 1, 2}}, {{CL_VYUY_INTEL, CL_UNORM_INT8}, {GMM_FORMAT_VYUY, GFX3DSTATE_SURFACEFORMAT_YCRCB_SWAPUVY , 0, 2, 1, 2}} }; const ClSurfaceFormatInfo SurfaceFormats::planarYuvSurfaceFormats[] = { {{CL_NV12_INTEL, CL_UNORM_INT8}, {GMM_FORMAT_NV12, GFX3DSTATE_SURFACEFORMAT_PLANAR_420_8 , 0, 1, 1, 1}} }; const ClSurfaceFormatInfo SurfaceFormats::packedSurfaceFormats[] = { {{CL_RGBA, CL_UNORM_INT16}, {GMM_FORMAT_Y210, GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_UNORM , 0, 4, 2, 8}}, {{CL_RG, CL_UNORM_INT16}, {GMM_FORMAT_YUY2_2x1, GFX3DSTATE_SURFACEFORMAT_R10G10B10A2_UNORM , 0, 2, 2, 4}} }; const ClSurfaceFormatInfo SurfaceFormats::readOnlyDepthSurfaceFormats[] = { DEPTHFORMATS, DEPTHSTENCILFORMATS }; const ClSurfaceFormatInfo SurfaceFormats::readWriteDepthSurfaceFormats[] = { DEPTHFORMATS }; ArrayRef SurfaceFormats::readOnly12() noexcept { return ArrayRef(readOnlySurfaceFormats12); } ArrayRef SurfaceFormats::readOnly20() noexcept { return ArrayRef(readOnlySurfaceFormats20); } ArrayRef SurfaceFormats::writeOnly() noexcept { return ArrayRef(writeOnlySurfaceFormats); } ArrayRef SurfaceFormats::readWrite() noexcept { return ArrayRef(readWriteSurfaceFormats); } ArrayRef SurfaceFormats::packedYuv() noexcept { return ArrayRef(packedYuvSurfaceFormats); } ArrayRef SurfaceFormats::planarYuv() noexcept { return ArrayRef(planarYuvSurfaceFormats); } ArrayRef SurfaceFormats::packed() noexcept { return ArrayRef(packedSurfaceFormats); } ArrayRef SurfaceFormats::readOnlyDepth() noexcept { return ArrayRef(readOnlyDepthSurfaceFormats); } ArrayRef SurfaceFormats::readWriteDepth() noexcept { return ArrayRef(readWriteDepthSurfaceFormats); } ArrayRef SurfaceFormats::surfaceFormats(cl_mem_flags flags, bool supportsOcl20Features) noexcept { if (flags & CL_MEM_READ_ONLY) { if(supportsOcl20Features) { return readOnly20(); } else { return readOnly12(); } } else if (flags & CL_MEM_WRITE_ONLY) { return writeOnly(); } else { return readWrite(); } } ArrayRef SurfaceFormats::surfaceFormats(cl_mem_flags flags, const cl_image_format *imageFormat, bool supportsOcl20Features) noexcept { if (NEO::isNV12Image(imageFormat)) { return planarYuv(); } else if (isPackedYuvImage(imageFormat)) { return packedYuv(); } else if (Image::isDepthFormat(*imageFormat)) { if (flags & CL_MEM_READ_ONLY) { return readOnlyDepth(); } else { return readWriteDepth(); } } else if (flags & CL_MEM_READ_ONLY) { if(supportsOcl20Features) { return readOnly20(); } else { return readOnly12(); } } else if (flags & CL_MEM_WRITE_ONLY) { return writeOnly(); } else { return readWrite(); } } // clang-format on } // namespace NEO compute-runtime-22.14.22890/opencl/source/helpers/surface_formats.h000066400000000000000000000036421422164147700251350ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/gmm_helper/gmm_lib.h" #include "shared/source/helpers/surface_format_info.h" #include "shared/source/utilities/arrayref.h" #include "CL/cl.h" namespace NEO { struct ClSurfaceFormatInfo { cl_image_format OCLImageFormat; SurfaceFormatInfo surfaceFormat; }; class SurfaceFormats { private: static const ClSurfaceFormatInfo readOnlySurfaceFormats12[]; static const ClSurfaceFormatInfo readOnlySurfaceFormats20[]; static const ClSurfaceFormatInfo writeOnlySurfaceFormats[]; static const ClSurfaceFormatInfo readWriteSurfaceFormats[]; static const ClSurfaceFormatInfo readOnlyDepthSurfaceFormats[]; static const ClSurfaceFormatInfo readWriteDepthSurfaceFormats[]; static const ClSurfaceFormatInfo packedYuvSurfaceFormats[]; static const ClSurfaceFormatInfo planarYuvSurfaceFormats[]; static const ClSurfaceFormatInfo packedSurfaceFormats[]; public: static ArrayRef readOnly12() noexcept; static ArrayRef readOnly20() noexcept; static ArrayRef writeOnly() noexcept; static ArrayRef readWrite() noexcept; static ArrayRef packedYuv() noexcept; static ArrayRef planarYuv() noexcept; static ArrayRef packed() noexcept; static ArrayRef readOnlyDepth() noexcept; static ArrayRef readWriteDepth() noexcept; static ArrayRef surfaceFormats(cl_mem_flags flags, bool supportsOcl20Features) noexcept; static ArrayRef surfaceFormats(cl_mem_flags flags, const cl_image_format *imageFormat, bool supportsOcl20Features) noexcept; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/helpers/task_information.cpp000066400000000000000000000655741422164147700256700ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/task_information.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/csr_deps.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/command_stream/wait_status.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/engine_node_helper.h" #include "shared/source/helpers/string.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/surface.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/command_queue/enqueue_common.h" #include "opencl/source/gtpin/gtpin_notify.h" #include "opencl/source/helpers/cl_preemption_helper.h" #include "opencl/source/helpers/enqueue_properties.h" #include "opencl/source/helpers/task_information.inl" #include "opencl/source/mem_obj/mem_obj.h" namespace NEO { template void KernelOperation::ResourceCleaner::operator()(LinearStream *); template void KernelOperation::ResourceCleaner::operator()(IndirectHeap *); CommandMapUnmap::CommandMapUnmap(MapOperationType operationType, MemObj &memObj, MemObjSizeArray ©Size, MemObjOffsetArray ©Offset, bool readOnly, CommandQueue &commandQueue) : Command(commandQueue), memObj(memObj), copySize(copySize), copyOffset(copyOffset), readOnly(readOnly), operationType(operationType) { memObj.incRefInternal(); } CompletionStamp &CommandMapUnmap::submit(uint32_t taskLevel, bool terminated) { DecRefInternalAtScopeEnd decRefInternalAtScopeEnd{memObj}; if (terminated) { this->terminated = true; return completionStamp; } auto &commandStreamReceiver = commandQueue.getGpgpuCommandStreamReceiver(); auto commandStreamReceiverOwnership = commandStreamReceiver.obtainUniqueOwnership(); auto &queueCommandStream = commandQueue.getCS(0); size_t offset = queueCommandStream.getUsed(); MultiDispatchInfo multiDispatch; Device &device = commandQueue.getDevice(); DispatchFlags dispatchFlags( {}, //csrDependencies nullptr, //barrierTimestampPacketNodes {}, //pipelineSelectArgs commandQueue.flushStamp->getStampReference(), //flushStampReference commandQueue.getThrottle(), //throttle ClPreemptionHelper::taskPreemptionMode(device, multiDispatch), //preemptionMode GrfConfig::NotApplicable, //numGrfRequired L3CachingSettings::NotApplicable, //l3CacheSettings ThreadArbitrationPolicy::NotPresent, //threadArbitrationPolicy AdditionalKernelExecInfo::NotApplicable, //additionalKernelExecInfo KernelExecutionType::NotApplicable, //kernelExecutionType MemoryCompressionState::NotApplicable, //memoryCompressionState commandQueue.getSliceCount(), //sliceCount true, //blocking true, //dcFlush false, //useSLM !commandQueue.getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled(), //guardCommandBufferWithPipeControl false, //GSBA32BitRequired false, //requiresCoherency commandQueue.getPriority() == QueuePriority::LOW, //lowPriority false, //implicitFlush commandQueue.getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed false, //epilogueRequired false, //usePerDssBackedBuffer false, //useSingleSubdevice false, //useGlobalAtomics false, //areMultipleSubDevicesInContext false, //memoryMigrationRequired false); //textureCacheFlush DEBUG_BREAK_IF(taskLevel >= CompletionStamp::notReady); gtpinNotifyPreFlushTask(&commandQueue); completionStamp = commandStreamReceiver.flushTask(queueCommandStream, offset, &commandQueue.getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 0u), &commandQueue.getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 0u), &commandQueue.getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0u), taskLevel, dispatchFlags, commandQueue.getDevice()); commandQueue.updateLatestSentEnqueueType(EnqueueProperties::Operation::DependencyResolveOnGpu); if (!memObj.isMemObjZeroCopy()) { const auto waitStatus = commandQueue.waitUntilComplete(completionStamp.taskCount, {}, completionStamp.flushStamp, false); if (waitStatus == WaitStatus::GpuHang) { completionStamp.taskCount = CompletionStamp::gpuHang; return completionStamp; } if (operationType == MAP) { memObj.transferDataToHostPtr(copySize, copyOffset); } else if (!readOnly) { DEBUG_BREAK_IF(operationType != UNMAP); memObj.transferDataFromHostPtr(copySize, copyOffset); } } return completionStamp; } CommandComputeKernel::CommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr &kernelOperation, std::vector surfaces, bool flushDC, bool usesSLM, uint32_t commandType, std::unique_ptr &&printfHandler, PreemptionMode preemptionMode, Kernel *kernel, uint32_t kernelCount) : Command(commandQueue, kernelOperation), surfaces(std::move(surfaces)), flushDC(flushDC), slmUsed(usesSLM), commandType(commandType), printfHandler(std::move(printfHandler)), kernel(kernel), kernelCount(kernelCount), preemptionMode(preemptionMode) { UNRECOVERABLE_IF(nullptr == this->kernel); kernel->incRefInternal(); } CommandComputeKernel::~CommandComputeKernel() { kernel->decRefInternal(); } CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminated) { if (terminated) { this->terminated = true; for (auto surface : surfaces) { delete surface; } surfaces.clear(); return completionStamp; } auto &commandStreamReceiver = commandQueue.getGpgpuCommandStreamReceiver(); auto bcsCsrForAuxTranslation = commandQueue.getBcsForAuxTranslation(); auto commandStreamReceiverOwnership = commandStreamReceiver.obtainUniqueOwnership(); IndirectHeap *dsh = kernelOperation->dsh.get(); IndirectHeap *ioh = kernelOperation->ioh.get(); IndirectHeap *ssh = kernelOperation->ssh.get(); auto requiresCoherency = false; auto anyUncacheableArgs = false; for (auto &surface : surfaces) { DEBUG_BREAK_IF(!surface); surface->makeResident(commandStreamReceiver); requiresCoherency |= surface->IsCoherent; if (!surface->allowsL3Caching()) { anyUncacheableArgs = true; } } if (printfHandler) { printfHandler.get()->makeResident(commandStreamReceiver); } makeTimestampPacketsResident(commandStreamReceiver); if (kernelOperation->blitPropertiesContainer.size() > 0) { CsrDependencies csrDeps; eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, *bcsCsrForAuxTranslation, CsrDependencies::DependenciesType::All); BlitProperties::setupDependenciesForAuxTranslation(kernelOperation->blitPropertiesContainer, *timestampPacketDependencies, *currentTimestampPacketNodes, csrDeps, commandQueue.getGpgpuCommandStreamReceiver(), *bcsCsrForAuxTranslation); } if (timestampPacketDependencies && commandQueue.isOOQEnabled()) { commandQueue.setupBarrierTimestampForBcsEngines(commandQueue.getGpgpuCommandStreamReceiver().getOsContext().getEngineType(), *timestampPacketDependencies); } const auto &kernelDescriptor = kernel->getKernelInfo().kernelDescriptor; auto memoryCompressionState = commandStreamReceiver.getMemoryCompressionState(kernel->isAuxTranslationRequired(), commandQueue.getDevice().getHardwareInfo()); DispatchFlags dispatchFlags( {}, //csrDependencies nullptr, //barrierTimestampPacketNodes {false, kernel->isVmeKernel()}, //pipelineSelectArgs commandQueue.flushStamp->getStampReference(), //flushStampReference commandQueue.getThrottle(), //throttle preemptionMode, //preemptionMode kernelDescriptor.kernelAttributes.numGrfRequired, //numGrfRequired L3CachingSettings::l3CacheOn, //l3CacheSettings kernel->getThreadArbitrationPolicy(), //threadArbitrationPolicy kernel->getAdditionalKernelExecInfo(), //additionalKernelExecInfo kernel->getExecutionType(), //kernelExecutionType memoryCompressionState, //memoryCompressionState commandQueue.getSliceCount(), //sliceCount true, //blocking flushDC, //dcFlush slmUsed, //useSLM !commandQueue.getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled(), //guardCommandBufferWithPipeControl commandType == CL_COMMAND_NDRANGE_KERNEL, //GSBA32BitRequired requiresCoherency, //requiresCoherency commandQueue.getPriority() == QueuePriority::LOW, //lowPriority false, //implicitFlush commandQueue.getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed false, //epilogueRequired false, //usePerDssBackedBuffer kernel->isSingleSubdevicePreferred(), //useSingleSubdevice kernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, //useGlobalAtomics kernel->areMultipleSubDevicesInContext(), //areMultipleSubDevicesInContext kernel->requiresMemoryMigration(), //memoryMigrationRequired commandQueue.isTextureCacheFlushNeeded(this->commandType)); //textureCacheFlush if (commandQueue.getContext().getRootDeviceIndices().size() > 1) { eventsRequest.fillCsrDependenciesForTaskCountContainer(dispatchFlags.csrDependencies, commandStreamReceiver); } const bool isHandlingBarrier = commandQueue.getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired(); if (timestampPacketDependencies) { eventsRequest.fillCsrDependenciesForTimestampPacketContainer(dispatchFlags.csrDependencies, commandStreamReceiver, CsrDependencies::DependenciesType::OutOfCsr); if (isHandlingBarrier) { commandQueue.fillCsrDependenciesWithLastBcsPackets(dispatchFlags.csrDependencies); } dispatchFlags.barrierTimestampPacketNodes = ×tampPacketDependencies->barrierNodes; } dispatchFlags.pipelineSelectArgs.specialPipelineSelectMode = kernel->requiresSpecialPipelineSelectMode(); if (anyUncacheableArgs) { dispatchFlags.l3CacheSettings = L3CachingSettings::l3CacheOff; } else if (!kernel->areStatelessWritesUsed()) { dispatchFlags.l3CacheSettings = L3CachingSettings::l3AndL1On; } if (commandQueue.dispatchHints != 0) { dispatchFlags.engineHints = commandQueue.dispatchHints; dispatchFlags.epilogueRequired = true; } DEBUG_BREAK_IF(taskLevel >= CompletionStamp::notReady); gtpinNotifyPreFlushTask(&commandQueue); if (kernel->requiresMemoryMigration()) { for (auto &arg : kernel->getMemObjectsToMigrate()) { MigrationController::handleMigration(commandQueue.getContext(), commandStreamReceiver, arg.second); } } completionStamp = commandStreamReceiver.flushTask(*kernelOperation->commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, commandQueue.getDevice()); if (isHandlingBarrier) { commandQueue.clearLastBcsPackets(); } if (kernelOperation->blitPropertiesContainer.size() > 0) { const auto newTaskCount = bcsCsrForAuxTranslation->flushBcsTask(kernelOperation->blitPropertiesContainer, false, commandQueue.isProfilingEnabled(), commandQueue.getDevice()); commandQueue.updateBcsTaskCount(bcsCsrForAuxTranslation->getOsContext().getEngineType(), newTaskCount); } commandQueue.updateLatestSentEnqueueType(EnqueueProperties::Operation::GpuKernel); if (gtpinIsGTPinInitialized()) { gtpinNotifyFlushTask(completionStamp.taskCount); } if (printfHandler) { commandQueue.waitUntilComplete(completionStamp.taskCount, {}, completionStamp.flushStamp, false); printfHandler.get()->printEnqueueOutput(); } for (auto surface : surfaces) { delete surface; } surfaces.clear(); return completionStamp; } void CommandWithoutKernel::dispatchBlitOperation() { auto bcsCsr = kernelOperation->bcsCsr; UNRECOVERABLE_IF(bcsCsr == nullptr); UNRECOVERABLE_IF(kernelOperation->blitPropertiesContainer.size() != 1); auto &blitProperties = *kernelOperation->blitPropertiesContainer.begin(); eventsRequest.fillCsrDependenciesForTimestampPacketContainer(blitProperties.csrDependencies, *bcsCsr, CsrDependencies::DependenciesType::All); blitProperties.csrDependencies.timestampPacketContainer.push_back(×tampPacketDependencies->cacheFlushNodes); blitProperties.csrDependencies.timestampPacketContainer.push_back(×tampPacketDependencies->previousEnqueueNodes); blitProperties.csrDependencies.timestampPacketContainer.push_back(×tampPacketDependencies->barrierNodes); blitProperties.outputTimestampPacket = currentTimestampPacketNodes->peekNodes()[0]; if (commandQueue.getContext().getRootDeviceIndices().size() > 1) { eventsRequest.fillCsrDependenciesForTaskCountContainer(blitProperties.csrDependencies, *bcsCsr); } const auto newTaskCount = bcsCsr->flushBcsTask(kernelOperation->blitPropertiesContainer, false, commandQueue.isProfilingEnabled(), commandQueue.getDevice()); commandQueue.updateBcsTaskCount(bcsCsr->getOsContext().getEngineType(), newTaskCount); commandQueue.setLastBcsPacket(bcsCsr->getOsContext().getEngineType()); } CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminated) { if (terminated) { this->terminated = true; return completionStamp; } auto &commandStreamReceiver = commandQueue.getGpgpuCommandStreamReceiver(); if (!kernelOperation) { completionStamp.taskCount = commandStreamReceiver.peekTaskCount(); completionStamp.taskLevel = commandStreamReceiver.peekTaskLevel(); completionStamp.flushStamp = commandStreamReceiver.obtainCurrentFlushStamp(); return completionStamp; } auto barrierNodes = timestampPacketDependencies ? ×tampPacketDependencies->barrierNodes : nullptr; auto lockCSR = commandStreamReceiver.obtainUniqueOwnership(); auto enqueueOperationType = EnqueueProperties::Operation::DependencyResolveOnGpu; if (kernelOperation->blitEnqueue) { enqueueOperationType = EnqueueProperties::Operation::Blit; UNRECOVERABLE_IF(!barrierNodes); if (commandStreamReceiver.isStallingCommandsOnNextFlushRequired()) { barrierNodes->add(commandStreamReceiver.getTimestampPacketAllocator()->getTag()); } } if (timestampPacketDependencies && commandQueue.isOOQEnabled()) { commandQueue.setupBarrierTimestampForBcsEngines(commandQueue.getGpgpuCommandStreamReceiver().getOsContext().getEngineType(), *timestampPacketDependencies); } auto rootDeviceIndex = commandStreamReceiver.getRootDeviceIndex(); DispatchFlags dispatchFlags( {}, //csrDependencies barrierNodes, //barrierTimestampPacketNodes {}, //pipelineSelectArgs commandQueue.flushStamp->getStampReference(), //flushStampReference commandQueue.getThrottle(), //throttle commandQueue.getDevice().getPreemptionMode(), //preemptionMode GrfConfig::NotApplicable, //numGrfRequired L3CachingSettings::NotApplicable, //l3CacheSettings ThreadArbitrationPolicy::NotPresent, //threadArbitrationPolicy AdditionalKernelExecInfo::NotApplicable, //additionalKernelExecInfo KernelExecutionType::NotApplicable, //kernelExecutionType MemoryCompressionState::NotApplicable, //memoryCompressionState commandQueue.getSliceCount(), //sliceCount true, //blocking false, //dcFlush false, //useSLM !commandStreamReceiver.isUpdateTagFromWaitEnabled(), //guardCommandBufferWithPipeControl false, //GSBA32BitRequired false, //requiresCoherency commandQueue.getPriority() == QueuePriority::LOW, //lowPriority false, //implicitFlush commandStreamReceiver.isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed false, //epilogueRequired false, //usePerDssBackedBuffer false, //useSingleSubdevice false, //useGlobalAtomics commandQueue.getContext().containsMultipleSubDevices(rootDeviceIndex), //areMultipleSubDevicesInContext false, //memoryMigrationRequired false); //textureCacheFlush if (commandQueue.getContext().getRootDeviceIndices().size() > 1) { eventsRequest.fillCsrDependenciesForTaskCountContainer(dispatchFlags.csrDependencies, commandStreamReceiver); } const bool isHandlingBarrier = commandQueue.getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired(); if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) { eventsRequest.fillCsrDependenciesForTimestampPacketContainer(dispatchFlags.csrDependencies, commandStreamReceiver, CsrDependencies::DependenciesType::OutOfCsr); if (isHandlingBarrier) { commandQueue.fillCsrDependenciesWithLastBcsPackets(dispatchFlags.csrDependencies); } makeTimestampPacketsResident(commandStreamReceiver); } gtpinNotifyPreFlushTask(&commandQueue); completionStamp = commandStreamReceiver.flushTask(*kernelOperation->commandStream, 0, &commandQueue.getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 0u), &commandQueue.getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 0u), &commandQueue.getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0u), taskLevel, dispatchFlags, commandQueue.getDevice()); if (isHandlingBarrier) { commandQueue.clearLastBcsPackets(); } if (kernelOperation->blitEnqueue) { dispatchBlitOperation(); } commandQueue.updateLatestSentEnqueueType(enqueueOperationType); return completionStamp; } void Command::setEventsRequest(EventsRequest &eventsRequest) { this->eventsRequest = eventsRequest; if (eventsRequest.numEventsInWaitList > 0) { eventsWaitlist.resize(eventsRequest.numEventsInWaitList); auto size = eventsRequest.numEventsInWaitList * sizeof(cl_event); memcpy_s(&eventsWaitlist[0], size, eventsRequest.eventWaitList, size); this->eventsRequest.eventWaitList = &eventsWaitlist[0]; for (cl_uint i = 0; i < eventsRequest.numEventsInWaitList; i++) { auto event = castToObjectOrAbort(eventsRequest.eventWaitList[i]); event->incRefInternal(); } } } void Command::setTimestampPacketNode(TimestampPacketContainer ¤t, TimestampPacketDependencies &&dependencies) { currentTimestampPacketNodes = std::make_unique(); currentTimestampPacketNodes->assignAndIncrementNodesRefCounts(current); timestampPacketDependencies = std::make_unique(); *timestampPacketDependencies = std::move(dependencies); } Command::~Command() { if (terminated) { if (commandQueue.getTimestampPacketContainer()) { std::array timestampData; timestampData.fill(std::numeric_limits::max()); if (currentTimestampPacketNodes.get()) { for (auto &node : currentTimestampPacketNodes->peekNodes()) { for (const auto &cmdQueueNode : commandQueue.getTimestampPacketContainer()->peekNodes()) { if (node == cmdQueueNode) { for (uint32_t i = 0; i < node->getPacketsUsed(); i++) { node->assignDataToAllTimestamps(i, timestampData.data()); } } } } } } } else { if (commandQueue.getDeferredTimestampPackets() && timestampPacketDependencies.get()) { timestampPacketDependencies->moveNodesToNewContainer(*commandQueue.getDeferredTimestampPackets()); } } for (cl_event &eventFromWaitList : eventsWaitlist) { auto event = castToObjectOrAbort(eventFromWaitList); event->decRefInternal(); } } void Command::makeTimestampPacketsResident(CommandStreamReceiver &commandStreamReceiver) { if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) { for (cl_event &eventFromWaitList : eventsWaitlist) { auto event = castToObjectOrAbort(eventFromWaitList); if (event->getTimestampPacketNodes() && event->getCommandQueue()->getClDevice().getRootDeviceIndex() == commandStreamReceiver.getRootDeviceIndex()) { event->getTimestampPacketNodes()->makeResident(commandStreamReceiver); } } } if (currentTimestampPacketNodes) { currentTimestampPacketNodes->makeResident(commandStreamReceiver); } if (timestampPacketDependencies) { timestampPacketDependencies->cacheFlushNodes.makeResident(commandStreamReceiver); timestampPacketDependencies->previousEnqueueNodes.makeResident(commandStreamReceiver); } } Command::Command(CommandQueue &commandQueue) : commandQueue(commandQueue) {} Command::Command(CommandQueue &commandQueue, std::unique_ptr &kernelOperation) : commandQueue(commandQueue), kernelOperation(std::move(kernelOperation)) {} } // namespace NEO compute-runtime-22.14.22890/opencl/source/helpers/task_information.h000066400000000000000000000125571422164147700253260ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/linear_stream.h" #include "shared/source/helpers/blit_commands_helper.h" #include "shared/source/helpers/completion_stamp.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/indirect_heap/indirect_heap.h" #include "shared/source/utilities/iflist.h" #include "opencl/source/helpers/properties_helper.h" #include #include namespace NEO { class CommandQueue; class CommandStreamReceiver; class InternalAllocationStorage; class Kernel; class MemObj; class Surface; class PrintfHandler; class HwTimeStamps; class TimestampPacketContainer; template class TagNode; enum MapOperationType { MAP, UNMAP }; struct KernelOperation { protected: struct ResourceCleaner { ResourceCleaner() = delete; ResourceCleaner(InternalAllocationStorage *storageForAllocations) : storageForAllocations(storageForAllocations){}; template void operator()(ObjectT *object); InternalAllocationStorage *storageForAllocations = nullptr; } resourceCleaner{nullptr}; using LinearStreamUniquePtrT = std::unique_ptr; using IndirectHeapUniquePtrT = std::unique_ptr; public: KernelOperation() = delete; KernelOperation(LinearStream *commandStream, InternalAllocationStorage &storageForAllocations) { resourceCleaner.storageForAllocations = &storageForAllocations; this->commandStream = LinearStreamUniquePtrT(commandStream, resourceCleaner); } void setHeaps(IndirectHeap *dsh, IndirectHeap *ioh, IndirectHeap *ssh) { this->dsh = IndirectHeapUniquePtrT(dsh, resourceCleaner); this->ioh = IndirectHeapUniquePtrT(ioh, resourceCleaner); this->ssh = IndirectHeapUniquePtrT(ssh, resourceCleaner); } ~KernelOperation() { if (ioh.get() == dsh.get()) { ioh.release(); } } LinearStreamUniquePtrT commandStream{nullptr, resourceCleaner}; IndirectHeapUniquePtrT dsh{nullptr, resourceCleaner}; IndirectHeapUniquePtrT ioh{nullptr, resourceCleaner}; IndirectHeapUniquePtrT ssh{nullptr, resourceCleaner}; CommandStreamReceiver *bcsCsr = nullptr; BlitPropertiesContainer blitPropertiesContainer; bool blitEnqueue = false; size_t surfaceStateHeapSizeEM = 0; }; class Command : public IFNode { public: // returns command's taskCount obtained from completion stamp // as acquired from command stream receiver virtual CompletionStamp &submit(uint32_t taskLevel, bool terminated) = 0; Command() = delete; Command(CommandQueue &commandQueue); Command(CommandQueue &commandQueue, std::unique_ptr &kernelOperation); virtual ~Command(); virtual LinearStream *getCommandStream() { return nullptr; } void setTimestampPacketNode(TimestampPacketContainer ¤t, TimestampPacketDependencies &&dependencies); void setEventsRequest(EventsRequest &eventsRequest); void makeTimestampPacketsResident(CommandStreamReceiver &commandStreamReceiver); TagNodeBase *timestamp = nullptr; CompletionStamp completionStamp = {}; protected: bool terminated = false; CommandQueue &commandQueue; std::unique_ptr kernelOperation; std::unique_ptr currentTimestampPacketNodes; std::unique_ptr timestampPacketDependencies; EventsRequest eventsRequest = {0, nullptr, nullptr}; std::vector eventsWaitlist; }; class CommandMapUnmap : public Command { public: CommandMapUnmap(MapOperationType operationType, MemObj &memObj, MemObjSizeArray ©Size, MemObjOffsetArray ©Offset, bool readOnly, CommandQueue &commandQueue); ~CommandMapUnmap() override = default; CompletionStamp &submit(uint32_t taskLevel, bool terminated) override; private: MemObj &memObj; MemObjSizeArray copySize; MemObjOffsetArray copyOffset; bool readOnly; MapOperationType operationType; }; class CommandComputeKernel : public Command { public: CommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr &kernelOperation, std::vector surfaces, bool flushDC, bool usesSLM, uint32_t commandType, std::unique_ptr &&printfHandler, PreemptionMode preemptionMode, Kernel *kernel, uint32_t kernelCount); ~CommandComputeKernel() override; CompletionStamp &submit(uint32_t taskLevel, bool terminated) override; LinearStream *getCommandStream() override { return kernelOperation->commandStream.get(); } Kernel *peekKernel() const { return kernel; } PrintfHandler *peekPrintfHandler() const { return printfHandler.get(); } protected: std::vector surfaces; bool flushDC; bool slmUsed; uint32_t commandType; std::unique_ptr printfHandler; Kernel *kernel; uint32_t kernelCount; PreemptionMode preemptionMode; }; class CommandWithoutKernel : public Command { public: using Command::Command; CompletionStamp &submit(uint32_t taskLevel, bool terminated) override; void dispatchBlitOperation(); }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/helpers/task_information.inl000066400000000000000000000007331422164147700256520ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/task_information.h" namespace NEO { template void KernelOperation::ResourceCleaner::operator()(ObjectT *object) { storageForAllocations->storeAllocation(std::unique_ptr(object->getGraphicsAllocation()), REUSABLE_ALLOCATION); delete object; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/helpers/windows/000077500000000000000000000000001422164147700232665ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/helpers/windows/gl_helper.h000066400000000000000000000012521422164147700254000ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/os_library.h" #include "GL/gl.h" namespace Os { extern const char *openglDllName; } namespace NEO { class glFunctionHelper { public: glFunctionHelper::glFunctionHelper(OsLibrary *glLibrary, const std::string &functionName) { glFunctionPtr = (*glLibrary)[functionName]; } ConvertibleProcAddr operator[](const char *name) { return ConvertibleProcAddr{glFunctionPtr(name)}; } protected: // clang-format off PROC(__stdcall *glFunctionPtr)(LPCSTR Arg1) = nullptr; // clang-format on }; }; // namespace NEO compute-runtime-22.14.22890/opencl/source/kernel/000077500000000000000000000000001422164147700214125ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/kernel/CMakeLists.txt000066400000000000000000000015421422164147700241540ustar00rootroot00000000000000# # Copyright (C) 2018-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_KERNEL ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}get_additional_kernel_info.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_transformer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_transformer.h ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel.h ${CMAKE_CURRENT_SOURCE_DIR}/kernel_info_cl.h ${CMAKE_CURRENT_SOURCE_DIR}/kernel_objects_for_aux_translation.h ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}kernel_extra.cpp ${CMAKE_CURRENT_SOURCE_DIR}/multi_device_kernel.cpp ${CMAKE_CURRENT_SOURCE_DIR}/multi_device_kernel.h ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_KERNEL}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_KERNEL ${RUNTIME_SRCS_KERNEL}) compute-runtime-22.14.22890/opencl/source/kernel/get_additional_kernel_info.cpp000066400000000000000000000006531422164147700274440ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/kernel/kernel.h" namespace NEO { void Kernel::getAdditionalInfo(cl_kernel_info paramName, const void *¶mValue, size_t ¶mValueSizeRet) const { } void Kernel::getAdditionalWorkGroupInfo(cl_kernel_work_group_info paramName, const void *¶mValue, size_t ¶mValueSizeRet) const { } } // namespace NEO compute-runtime-22.14.22890/opencl/source/kernel/image_transformer.cpp000066400000000000000000000036401422164147700256250ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/kernel/image_transformer.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/program/kernel_info.h" #include "opencl/source/mem_obj/image.h" namespace NEO { void ImageTransformer::registerImage3d(uint32_t argIndex) { if (std::find(argIndexes.begin(), argIndexes.end(), argIndex) == argIndexes.end()) { argIndexes.push_back(argIndex); } } void ImageTransformer::transformImagesTo2dArray(const KernelInfo &kernelInfo, const std::vector &kernelArguments, void *ssh) { for (auto const &argIndex : argIndexes) { const auto &arg = kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[argIndex]; if (arg.getExtendedTypeInfo().isTransformable) { auto clMemObj = static_cast(kernelArguments.at(argIndex).object); auto image = castToObjectOrAbort(clMemObj); auto surfaceState = ptrOffset(ssh, arg.as().bindful); image->transformImage3dTo2dArray(surfaceState); } } transformed = true; } void ImageTransformer::transformImagesTo3d(const KernelInfo &kernelInfo, const std::vector &kernelArguments, void *ssh) { for (auto const &argIndex : argIndexes) { const auto &arg = kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[argIndex]; auto clMemObj = static_cast(kernelArguments.at(argIndex).object); auto image = castToObjectOrAbort(clMemObj); auto surfaceState = ptrOffset(ssh, arg.as().bindful); image->transformImage2dArrayTo3d(surfaceState); } transformed = false; } bool ImageTransformer::didTransform() const { return transformed; } bool ImageTransformer::hasRegisteredImages3d() const { return !argIndexes.empty(); } } // namespace NEO compute-runtime-22.14.22890/opencl/source/kernel/image_transformer.h000066400000000000000000000012671422164147700252750ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/kernel/kernel.h" namespace NEO { class ImageTransformer { public: void registerImage3d(uint32_t argIndex); void transformImagesTo2dArray(const KernelInfo &kernelInfo, const std::vector &kernelArguments, void *ssh); void transformImagesTo3d(const KernelInfo &kernelInfo, const std::vector &kernelArguments, void *ssh); bool didTransform() const; bool hasRegisteredImages3d() const; protected: bool transformed = false; std::vector argIndexes; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/kernel/kernel.cpp000066400000000000000000003043761422164147700234130ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/kernel/kernel.h" #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/device_binary_format/patchtokens_decoder.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/api_specific_config.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/get_info.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/kernel_helpers.h" #include "shared/source/helpers/per_thread_data.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/helpers/surface_format_info.h" #include "shared/source/kernel/kernel_arg_descriptor_extended_device_side_enqueue.h" #include "shared/source/kernel/kernel_arg_descriptor_extended_vme.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/program/kernel_info.h" #include "opencl/source/accelerators/intel_accelerator.h" #include "opencl/source/accelerators/intel_motion_estimation.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/command_queue/cl_local_work_size.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/gtpin/gtpin_notify.h" #include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/source/helpers/get_info_status_mapper.h" #include "opencl/source/helpers/sampler_helpers.h" #include "opencl/source/kernel/image_transformer.h" #include "opencl/source/kernel/kernel_info_cl.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/mem_obj/pipe.h" #include "opencl/source/memory_manager/mem_obj_surface.h" #include "opencl/source/platform/platform.h" #include "opencl/source/sampler/sampler.h" #include "patch_list.h" #include #include #include using namespace iOpenCL; namespace NEO { class Surface; uint32_t Kernel::dummyPatchLocation = 0xbaddf00d; Kernel::Kernel(Program *programArg, const KernelInfo &kernelInfoArg, ClDevice &clDeviceArg) : executionEnvironment(programArg->getExecutionEnvironment()), program(programArg), clDevice(clDeviceArg), kernelInfo(kernelInfoArg) { program->retain(); program->retainForKernel(); imageTransformer.reset(new ImageTransformer); auto &deviceInfo = getDevice().getDevice().getDeviceInfo(); if (kernelInfoArg.kernelDescriptor.kernelAttributes.simdSize == 1u) { auto &hwInfoConfig = *HwInfoConfig::get(getHardwareInfo().platform.eProductFamily); maxKernelWorkGroupSize = hwInfoConfig.getMaxThreadsForWorkgroupInDSSOrSS(getHardwareInfo(), static_cast(deviceInfo.maxNumEUsPerSubSlice), static_cast(deviceInfo.maxNumEUsPerDualSubSlice)); } else { maxKernelWorkGroupSize = static_cast(deviceInfo.maxWorkGroupSize); } slmTotalSize = kernelInfoArg.kernelDescriptor.kernelAttributes.slmInlineSize; } Kernel::~Kernel() { delete[] crossThreadData; crossThreadData = nullptr; crossThreadDataSize = 0; if (privateSurface) { program->peekExecutionEnvironment().memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(privateSurface); privateSurface = nullptr; } for (uint32_t i = 0; i < patchedArgumentsNum; i++) { if (SAMPLER_OBJ == getKernelArguments()[i].type) { auto sampler = castToObject(kernelArguments.at(i).object); if (sampler) { sampler->decRefInternal(); } } } kernelArgHandlers.clear(); program->releaseForKernel(); program->release(); } // If dstOffsetBytes is not an invalid offset, then patches dst at dstOffsetBytes // with src casted to DstT type. template inline void patch(const SrcT &src, void *dst, CrossThreadDataOffset dstOffsetBytes) { if (isValidOffset(dstOffsetBytes)) { DstT *patchLocation = reinterpret_cast(ptrOffset(dst, dstOffsetBytes)); *patchLocation = static_cast(src); } } void Kernel::patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, GraphicsAllocation &allocation, const ArgDescPointer &arg) { if ((nullptr != crossThreadData) && isValidOffset(arg.stateless)) { auto pp = ptrOffset(crossThreadData, arg.stateless); uintptr_t addressToPatch = reinterpret_cast(ptrToPatchInCrossThreadData); patchWithRequiredSize(pp, arg.pointerSize, addressToPatch); if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) { PatchInfoData patchInfoData(addressToPatch, 0u, PatchInfoAllocationType::KernelArg, reinterpret_cast(crossThreadData), arg.stateless, PatchInfoAllocationType::IndirectObjectHeap, arg.pointerSize); this->patchInfoDataList.push_back(patchInfoData); } } void *ssh = getSurfaceStateHeap(); if ((nullptr != ssh) && isValidOffset(arg.bindful)) { auto surfaceState = ptrOffset(ssh, arg.bindful); void *addressToPatch = reinterpret_cast(allocation.getGpuAddressToPatch()); size_t sizeToPatch = allocation.getUnderlyingBufferSize(); Buffer::setSurfaceState(&clDevice.getDevice(), surfaceState, false, false, sizeToPatch, addressToPatch, 0, &allocation, 0, 0, kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, areMultipleSubDevicesInContext()); } } cl_int Kernel::initialize() { this->kernelHasIndirectAccess = false; auto pClDevice = &getDevice(); auto rootDeviceIndex = pClDevice->getRootDeviceIndex(); reconfigureKernel(); auto &hwInfo = pClDevice->getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); auto &kernelDescriptor = kernelInfo.kernelDescriptor; const auto &implicitArgs = kernelDescriptor.payloadMappings.implicitArgs; const auto &explicitArgs = kernelDescriptor.payloadMappings.explicitArgs; auto maxSimdSize = kernelInfo.getMaxSimdSize(); const auto &heapInfo = kernelInfo.heapInfo; if (maxSimdSize != 1 && maxSimdSize < hwHelper.getMinimalSIMDSize()) { return CL_INVALID_KERNEL; } if (kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs) { pImplicitArgs = std::make_unique(); *pImplicitArgs = {}; pImplicitArgs->structSize = sizeof(ImplicitArgs); pImplicitArgs->structVersion = 0; pImplicitArgs->simdWidth = maxSimdSize; } crossThreadDataSize = kernelDescriptor.kernelAttributes.crossThreadDataSize; // now allocate our own cross-thread data, if necessary if (crossThreadDataSize) { crossThreadData = new char[crossThreadDataSize]; if (kernelInfo.crossThreadData) { memcpy_s(crossThreadData, crossThreadDataSize, kernelInfo.crossThreadData, crossThreadDataSize); } else { memset(crossThreadData, 0x00, crossThreadDataSize); } auto crossThread = reinterpret_cast(crossThreadData); auto setArgsIfValidOffset = [&](uint32_t *&crossThreadData, NEO::CrossThreadDataOffset offset, uint32_t value) { if (isValidOffset(offset)) { crossThreadData = ptrOffset(crossThread, offset); *crossThreadData = value; } }; setArgsIfValidOffset(maxWorkGroupSizeForCrossThreadData, implicitArgs.maxWorkGroupSize, maxKernelWorkGroupSize); setArgsIfValidOffset(dataParameterSimdSize, implicitArgs.simdSize, maxSimdSize); setArgsIfValidOffset(preferredWkgMultipleOffset, implicitArgs.preferredWkgMultiple, maxSimdSize); setArgsIfValidOffset(parentEventOffset, implicitArgs.deviceSideEnqueueParentEvent, undefined); } // allocate our own SSH, if necessary sshLocalSize = heapInfo.SurfaceStateHeapSize; if (sshLocalSize) { pSshLocal = std::make_unique(sshLocalSize); // copy the ssh into our local copy memcpy_s(pSshLocal.get(), sshLocalSize, heapInfo.pSsh, heapInfo.SurfaceStateHeapSize); } numberOfBindingTableStates = kernelDescriptor.payloadMappings.bindingTable.numEntries; localBindingTableOffset = kernelDescriptor.payloadMappings.bindingTable.tableOffset; // patch crossthread data and ssh with inline surfaces, if necessary auto status = patchPrivateSurface(); if (CL_SUCCESS != status) { return status; } if (isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.stateless)) { DEBUG_BREAK_IF(program->getConstantSurface(rootDeviceIndex) == nullptr); uintptr_t constMemory = isBuiltIn ? (uintptr_t)program->getConstantSurface(rootDeviceIndex)->getUnderlyingBuffer() : (uintptr_t)program->getConstantSurface(rootDeviceIndex)->getGpuAddressToPatch(); const auto &arg = kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress; patchWithImplicitSurface(reinterpret_cast(constMemory), *program->getConstantSurface(rootDeviceIndex), arg); } if (isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.stateless)) { DEBUG_BREAK_IF(program->getGlobalSurface(rootDeviceIndex) == nullptr); uintptr_t globalMemory = isBuiltIn ? (uintptr_t)program->getGlobalSurface(rootDeviceIndex)->getUnderlyingBuffer() : (uintptr_t)program->getGlobalSurface(rootDeviceIndex)->getGpuAddressToPatch(); const auto &arg = kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress; patchWithImplicitSurface(reinterpret_cast(globalMemory), *program->getGlobalSurface(rootDeviceIndex), arg); } // Patch Surface State Heap bool useGlobalAtomics = kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics; if (isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress.bindful)) { auto surfaceState = ptrOffset(reinterpret_cast(getSurfaceStateHeap()), kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress.bindful); Buffer::setSurfaceState(&pClDevice->getDevice(), surfaceState, false, false, 0, nullptr, 0, nullptr, 0, 0, useGlobalAtomics, areMultipleSubDevicesInContext()); } if (isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress.bindful)) { auto surfaceState = ptrOffset(reinterpret_cast(getSurfaceStateHeap()), kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress.bindful); Buffer::setSurfaceState(&pClDevice->getDevice(), surfaceState, false, false, 0, nullptr, 0, nullptr, 0, 0, useGlobalAtomics, areMultipleSubDevicesInContext()); } setThreadArbitrationPolicy(hwHelper.getDefaultThreadArbitrationPolicy()); if (false == kernelInfo.kernelDescriptor.kernelAttributes.flags.requiresSubgroupIndependentForwardProgress) { setThreadArbitrationPolicy(ThreadArbitrationPolicy::AgeBased); } auto &clHwHelper = ClHwHelper::get(hwInfo.platform.eRenderCoreFamily); auxTranslationRequired = !program->getIsBuiltIn() && HwHelper::compressedBuffersSupported(hwInfo) && clHwHelper.requiresAuxResolves(kernelInfo, hwInfo); if (DebugManager.flags.ForceAuxTranslationEnabled.get() != -1) { auxTranslationRequired &= !!DebugManager.flags.ForceAuxTranslationEnabled.get(); } if (auxTranslationRequired) { program->getContextPtr()->setResolvesRequiredInKernels(true); } if (program->isKernelDebugEnabled() && isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful)) { debugEnabled = true; } auto numArgs = explicitArgs.size(); slmSizes.resize(numArgs); this->kernelHasIndirectAccess |= kernelInfo.kernelDescriptor.kernelAttributes.hasNonKernelArgLoad || kernelInfo.kernelDescriptor.kernelAttributes.hasNonKernelArgStore || kernelInfo.kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic; provideInitializationHints(); // resolve the new kernel info to account for kernel handlers // I think by this time we have decoded the binary and know the number of args etc. // double check this assumption bool usingBuffers = false; kernelArguments.resize(numArgs); kernelArgHandlers.resize(numArgs); kernelArgRequiresCacheFlush.resize(numArgs); for (uint32_t i = 0; i < numArgs; ++i) { storeKernelArg(i, NONE_OBJ, nullptr, nullptr, 0); // set the argument handler const auto &arg = explicitArgs[i]; if (arg.is()) { if (arg.getTraits().addressQualifier == KernelArgMetadata::AddrLocal) { kernelArgHandlers[i] = &Kernel::setArgLocal; } else if (arg.getTraits().typeQualifiers.pipeQual) { kernelArgHandlers[i] = &Kernel::setArgPipe; kernelArguments[i].type = PIPE_OBJ; } else { kernelArgHandlers[i] = &Kernel::setArgBuffer; kernelArguments[i].type = BUFFER_OBJ; usingBuffers = true; allBufferArgsStateful &= static_cast(arg.as().isPureStateful()); } } else if (arg.is()) { kernelArgHandlers[i] = &Kernel::setArgImage; kernelArguments[i].type = IMAGE_OBJ; usingImages = true; } else if (arg.is()) { if (arg.getExtendedTypeInfo().isAccelerator) { kernelArgHandlers[i] = &Kernel::setArgAccelerator; } else { kernelArgHandlers[i] = &Kernel::setArgSampler; kernelArguments[i].type = SAMPLER_OBJ; } } else { kernelArgHandlers[i] = &Kernel::setArgImmediate; } } if (usingImages && !usingBuffers) { usingImagesOnly = true; } return CL_SUCCESS; } cl_int Kernel::patchPrivateSurface() { auto pClDevice = &getDevice(); auto rootDeviceIndex = pClDevice->getRootDeviceIndex(); auto &kernelDescriptor = kernelInfo.kernelDescriptor; auto perHwThreadPrivateMemorySize = kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize; if (perHwThreadPrivateMemorySize) { if (!privateSurface) { privateSurfaceSize = KernelHelper::getPrivateSurfaceSize(perHwThreadPrivateMemorySize, pClDevice->getSharedDeviceInfo().computeUnitsUsedForScratch); DEBUG_BREAK_IF(privateSurfaceSize == 0); if (privateSurfaceSize > std::numeric_limits::max()) { return CL_OUT_OF_RESOURCES; } privateSurface = executionEnvironment.memoryManager->allocateGraphicsMemoryWithProperties( {rootDeviceIndex, static_cast(privateSurfaceSize), AllocationType::PRIVATE_SURFACE, pClDevice->getDeviceBitfield()}); if (privateSurface == nullptr) { return CL_OUT_OF_RESOURCES; } } const auto &privateMemoryAddress = kernelDescriptor.payloadMappings.implicitArgs.privateMemoryAddress; patchWithImplicitSurface(reinterpret_cast(privateSurface->getGpuAddressToPatch()), *privateSurface, privateMemoryAddress); } return CL_SUCCESS; } cl_int Kernel::cloneKernel(Kernel *pSourceKernel) { // copy cross thread data to store arguments set to source kernel with clSetKernelArg on immediate data (non-pointer types) memcpy_s(crossThreadData, crossThreadDataSize, pSourceKernel->crossThreadData, pSourceKernel->crossThreadDataSize); DEBUG_BREAK_IF(pSourceKernel->crossThreadDataSize != crossThreadDataSize); [[maybe_unused]] auto status = patchPrivateSurface(); DEBUG_BREAK_IF(status != CL_SUCCESS); // copy arguments set to source kernel with clSetKernelArg or clSetKernelArgSVMPointer for (uint32_t i = 0; i < pSourceKernel->kernelArguments.size(); i++) { if (0 == pSourceKernel->getKernelArgInfo(i).size) { // skip copying arguments that haven't been set to source kernel continue; } switch (pSourceKernel->kernelArguments[i].type) { case NONE_OBJ: // all arguments with immediate data (non-pointer types) have been copied in cross thread data storeKernelArg(i, NONE_OBJ, nullptr, nullptr, pSourceKernel->getKernelArgInfo(i).size); patchedArgumentsNum++; kernelArguments[i].isPatched = true; break; case SVM_OBJ: setArgSvm(i, pSourceKernel->getKernelArgInfo(i).size, const_cast(pSourceKernel->getKernelArgInfo(i).value), pSourceKernel->getKernelArgInfo(i).pSvmAlloc, pSourceKernel->getKernelArgInfo(i).svmFlags); break; case SVM_ALLOC_OBJ: setArgSvmAlloc(i, const_cast(pSourceKernel->getKernelArgInfo(i).value), (GraphicsAllocation *)pSourceKernel->getKernelArgInfo(i).object, pSourceKernel->getKernelArgInfo(i).allocId); break; default: setArg(i, pSourceKernel->getKernelArgInfo(i).size, pSourceKernel->getKernelArgInfo(i).value); break; } } // copy additional information other than argument values set to source kernel with clSetKernelExecInfo for (auto &gfxAlloc : pSourceKernel->kernelSvmGfxAllocations) { kernelSvmGfxAllocations.push_back(gfxAlloc); } for (auto &gfxAlloc : pSourceKernel->kernelUnifiedMemoryGfxAllocations) { kernelUnifiedMemoryGfxAllocations.push_back(gfxAlloc); } if (pImplicitArgs) { memcpy_s(pImplicitArgs.get(), sizeof(ImplicitArgs), pSourceKernel->getImplicitArgs(), sizeof(ImplicitArgs)); } this->isBuiltIn = pSourceKernel->isBuiltIn; return CL_SUCCESS; } cl_int Kernel::getInfo(cl_kernel_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const { cl_int retVal; const void *pSrc = nullptr; size_t srcSize = GetInfo::invalidSourceSize; cl_uint numArgs = 0; const _cl_program *prog; const _cl_context *ctxt; cl_uint refCount = 0; uint64_t nonCannonizedGpuAddress = 0llu; switch (paramName) { case CL_KERNEL_FUNCTION_NAME: pSrc = kernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str(); srcSize = kernelInfo.kernelDescriptor.kernelMetadata.kernelName.length() + 1; break; case CL_KERNEL_NUM_ARGS: srcSize = sizeof(cl_uint); numArgs = static_cast(kernelInfo.kernelDescriptor.payloadMappings.explicitArgs.size()); pSrc = &numArgs; break; case CL_KERNEL_CONTEXT: ctxt = &program->getContext(); srcSize = sizeof(ctxt); pSrc = &ctxt; break; case CL_KERNEL_PROGRAM: prog = program; srcSize = sizeof(prog); pSrc = &prog; break; case CL_KERNEL_REFERENCE_COUNT: refCount = static_cast(pMultiDeviceKernel->getRefApiCount()); srcSize = sizeof(refCount); pSrc = &refCount; break; case CL_KERNEL_ATTRIBUTES: pSrc = kernelInfo.kernelDescriptor.kernelMetadata.kernelLanguageAttributes.c_str(); srcSize = kernelInfo.kernelDescriptor.kernelMetadata.kernelLanguageAttributes.length() + 1; break; case CL_KERNEL_BINARY_PROGRAM_INTEL: pSrc = getKernelHeap(); srcSize = getKernelHeapSize(); break; case CL_KERNEL_BINARY_GPU_ADDRESS_INTEL: nonCannonizedGpuAddress = GmmHelper::decanonize(kernelInfo.kernelAllocation->getGpuAddress()); pSrc = &nonCannonizedGpuAddress; srcSize = sizeof(nonCannonizedGpuAddress); break; default: getAdditionalInfo(paramName, pSrc, srcSize); break; } auto getInfoStatus = GetInfo::getInfo(paramValue, paramValueSize, pSrc, srcSize); retVal = changeGetInfoStatusToCLResultType(getInfoStatus); GetInfo::setParamValueReturnSize(paramValueSizeRet, srcSize, getInfoStatus); return retVal; } cl_int Kernel::getArgInfo(cl_uint argIndex, cl_kernel_arg_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const { cl_int retVal; const void *pSrc = nullptr; size_t srcSize = GetInfo::invalidSourceSize; const auto &args = kernelInfo.kernelDescriptor.payloadMappings.explicitArgs; if (argIndex >= args.size()) { retVal = CL_INVALID_ARG_INDEX; return retVal; } const auto &argTraits = args[argIndex].getTraits(); const auto &argMetadata = kernelInfo.kernelDescriptor.explicitArgsExtendedMetadata[argIndex]; cl_kernel_arg_address_qualifier addressQualifier; cl_kernel_arg_access_qualifier accessQualifier; cl_kernel_arg_type_qualifier typeQualifier; switch (paramName) { case CL_KERNEL_ARG_ADDRESS_QUALIFIER: addressQualifier = asClKernelArgAddressQualifier(argTraits.getAddressQualifier()); srcSize = sizeof(addressQualifier); pSrc = &addressQualifier; break; case CL_KERNEL_ARG_ACCESS_QUALIFIER: accessQualifier = asClKernelArgAccessQualifier(argTraits.getAccessQualifier()); srcSize = sizeof(accessQualifier); pSrc = &accessQualifier; break; case CL_KERNEL_ARG_TYPE_QUALIFIER: typeQualifier = asClKernelArgTypeQualifier(argTraits.typeQualifiers); srcSize = sizeof(typeQualifier); pSrc = &typeQualifier; break; case CL_KERNEL_ARG_TYPE_NAME: srcSize = argMetadata.type.length() + 1; pSrc = argMetadata.type.c_str(); break; case CL_KERNEL_ARG_NAME: srcSize = argMetadata.argName.length() + 1; pSrc = argMetadata.argName.c_str(); break; default: break; } auto getInfoStatus = GetInfo::getInfo(paramValue, paramValueSize, pSrc, srcSize); retVal = changeGetInfoStatusToCLResultType(getInfoStatus); GetInfo::setParamValueReturnSize(paramValueSizeRet, srcSize, getInfoStatus); return retVal; } cl_int Kernel::getWorkGroupInfo(cl_kernel_work_group_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const { cl_int retVal = CL_INVALID_VALUE; const void *pSrc = nullptr; size_t srcSize = GetInfo::invalidSourceSize; struct size_t3 { size_t val[3]; } requiredWorkGroupSize; cl_ulong localMemorySize; const auto &kernelDescriptor = kernelInfo.kernelDescriptor; size_t preferredWorkGroupSizeMultiple = 0; cl_ulong scratchSize; cl_ulong privateMemSize; size_t maxWorkgroupSize; const auto &hwInfo = clDevice.getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); auto &clHwHelper = ClHwHelper::get(hwInfo.platform.eRenderCoreFamily); GetInfoHelper info(paramValue, paramValueSize, paramValueSizeRet); switch (paramName) { case CL_KERNEL_WORK_GROUP_SIZE: maxWorkgroupSize = maxKernelWorkGroupSize; if (DebugManager.flags.UseMaxSimdSizeToDeduceMaxWorkgroupSize.get()) { auto divisionSize = CommonConstants::maximalSimdSize / kernelInfo.getMaxSimdSize(); maxWorkgroupSize /= divisionSize; } srcSize = sizeof(maxWorkgroupSize); pSrc = &maxWorkgroupSize; break; case CL_KERNEL_COMPILE_WORK_GROUP_SIZE: requiredWorkGroupSize.val[0] = kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0]; requiredWorkGroupSize.val[1] = kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1]; requiredWorkGroupSize.val[2] = kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2]; srcSize = sizeof(requiredWorkGroupSize); pSrc = &requiredWorkGroupSize; break; case CL_KERNEL_LOCAL_MEM_SIZE: localMemorySize = kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize; srcSize = sizeof(localMemorySize); pSrc = &localMemorySize; break; case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: preferredWorkGroupSizeMultiple = kernelInfo.getMaxSimdSize(); if (hwHelper.isFusedEuDispatchEnabled(hwInfo, kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion)) { preferredWorkGroupSizeMultiple *= 2; } srcSize = sizeof(preferredWorkGroupSizeMultiple); pSrc = &preferredWorkGroupSizeMultiple; break; case CL_KERNEL_SPILL_MEM_SIZE_INTEL: scratchSize = kernelDescriptor.kernelAttributes.perThreadScratchSize[0]; srcSize = sizeof(scratchSize); pSrc = &scratchSize; break; case CL_KERNEL_PRIVATE_MEM_SIZE: privateMemSize = clHwHelper.getKernelPrivateMemSize(kernelInfo); srcSize = sizeof(privateMemSize); pSrc = &privateMemSize; break; default: getAdditionalWorkGroupInfo(paramName, pSrc, srcSize); break; } auto getInfoStatus = GetInfo::getInfo(paramValue, paramValueSize, pSrc, srcSize); retVal = changeGetInfoStatusToCLResultType(getInfoStatus); GetInfo::setParamValueReturnSize(paramValueSizeRet, srcSize, getInfoStatus); return retVal; } cl_int Kernel::getSubGroupInfo(cl_kernel_sub_group_info paramName, size_t inputValueSize, const void *inputValue, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const { size_t numDimensions = 0; size_t WGS = 1; auto maxSimdSize = static_cast(kernelInfo.getMaxSimdSize()); auto maxRequiredWorkGroupSize = static_cast(kernelInfo.getMaxRequiredWorkGroupSize(getMaxKernelWorkGroupSize())); auto largestCompiledSIMDSize = static_cast(kernelInfo.getMaxSimdSize()); GetInfoHelper info(paramValue, paramValueSize, paramValueSizeRet); if ((paramName == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT) || (paramName == CL_KERNEL_MAX_NUM_SUB_GROUPS) || (paramName == CL_KERNEL_COMPILE_NUM_SUB_GROUPS)) { if (clDevice.areOcl21FeaturesEnabled() == false) { return CL_INVALID_OPERATION; } } if ((paramName == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR) || (paramName == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR)) { if (!inputValue) { return CL_INVALID_VALUE; } if (inputValueSize % sizeof(size_t) != 0) { return CL_INVALID_VALUE; } numDimensions = inputValueSize / sizeof(size_t); if (numDimensions == 0 || numDimensions > static_cast(clDevice.getDeviceInfo().maxWorkItemDimensions)) { return CL_INVALID_VALUE; } } if (paramName == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT) { if (!paramValue) { return CL_INVALID_VALUE; } if (paramValueSize % sizeof(size_t) != 0) { return CL_INVALID_VALUE; } numDimensions = paramValueSize / sizeof(size_t); if (numDimensions == 0 || numDimensions > static_cast(clDevice.getDeviceInfo().maxWorkItemDimensions)) { return CL_INVALID_VALUE; } } switch (paramName) { case CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR: { return changeGetInfoStatusToCLResultType(info.set(maxSimdSize)); } case CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR: { for (size_t i = 0; i < numDimensions; i++) { WGS *= ((size_t *)inputValue)[i]; } return changeGetInfoStatusToCLResultType( info.set((WGS / maxSimdSize) + std::min(static_cast(1), WGS % maxSimdSize))); // add 1 if WGS % maxSimdSize != 0 } case CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT: { auto subGroupsNum = *(size_t *)inputValue; auto workGroupSize = subGroupsNum * largestCompiledSIMDSize; // return workgroup size in first dimension, the rest shall be 1 in positive case if (workGroupSize > maxRequiredWorkGroupSize) { workGroupSize = 0; } // If no work group size can accommodate the requested number of subgroups, return 0 in each element of the returned array. switch (numDimensions) { case 1: return changeGetInfoStatusToCLResultType(info.set(workGroupSize)); case 2: struct size_t2 { size_t val[2]; } workGroupSize2; workGroupSize2.val[0] = workGroupSize; workGroupSize2.val[1] = (workGroupSize > 0) ? 1 : 0; return changeGetInfoStatusToCLResultType(info.set(workGroupSize2)); default: struct size_t3 { size_t val[3]; } workGroupSize3; workGroupSize3.val[0] = workGroupSize; workGroupSize3.val[1] = (workGroupSize > 0) ? 1 : 0; workGroupSize3.val[2] = (workGroupSize > 0) ? 1 : 0; return changeGetInfoStatusToCLResultType(info.set(workGroupSize3)); } } case CL_KERNEL_MAX_NUM_SUB_GROUPS: { // round-up maximum number of subgroups return changeGetInfoStatusToCLResultType(info.set(Math::divideAndRoundUp(maxRequiredWorkGroupSize, largestCompiledSIMDSize))); } case CL_KERNEL_COMPILE_NUM_SUB_GROUPS: { return changeGetInfoStatusToCLResultType(info.set(static_cast(kernelInfo.kernelDescriptor.kernelMetadata.compiledSubGroupsNumber))); } case CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL: { return changeGetInfoStatusToCLResultType(info.set(kernelInfo.kernelDescriptor.kernelMetadata.requiredSubGroupSize)); } default: return CL_INVALID_VALUE; } } const void *Kernel::getKernelHeap() const { return kernelInfo.heapInfo.pKernelHeap; } size_t Kernel::getKernelHeapSize() const { return kernelInfo.heapInfo.KernelHeapSize; } void Kernel::substituteKernelHeap(void *newKernelHeap, size_t newKernelHeapSize) { KernelInfo *pKernelInfo = const_cast(&kernelInfo); void **pKernelHeap = const_cast(&pKernelInfo->heapInfo.pKernelHeap); *pKernelHeap = newKernelHeap; auto &heapInfo = pKernelInfo->heapInfo; heapInfo.KernelHeapSize = static_cast(newKernelHeapSize); pKernelInfo->isKernelHeapSubstituted = true; auto memoryManager = executionEnvironment.memoryManager.get(); auto currentAllocationSize = pKernelInfo->kernelAllocation->getUnderlyingBufferSize(); bool status = false; const auto &hwInfo = clDevice.getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); size_t isaPadding = hwHelper.getPaddingForISAAllocation(); if (currentAllocationSize >= newKernelHeapSize + isaPadding) { auto &hwInfo = clDevice.getDevice().getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); status = MemoryTransferHelper::transferMemoryToAllocation(hwHelper.isBlitCopyRequiredForLocalMemory(hwInfo, *pKernelInfo->getGraphicsAllocation()), clDevice.getDevice(), pKernelInfo->getGraphicsAllocation(), 0, newKernelHeap, static_cast(newKernelHeapSize)); } else { memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(pKernelInfo->kernelAllocation); pKernelInfo->kernelAllocation = nullptr; status = pKernelInfo->createKernelAllocation(clDevice.getDevice(), isBuiltIn); } UNRECOVERABLE_IF(!status); } bool Kernel::isKernelHeapSubstituted() const { return kernelInfo.isKernelHeapSubstituted; } uint64_t Kernel::getKernelId() const { return kernelInfo.kernelId; } void Kernel::setKernelId(uint64_t newKernelId) { KernelInfo *pKernelInfo = const_cast(&kernelInfo); pKernelInfo->kernelId = newKernelId; } uint32_t Kernel::getStartOffset() const { return this->startOffset; } void Kernel::setStartOffset(uint32_t offset) { this->startOffset = offset; } void *Kernel::getSurfaceStateHeap() const { return pSshLocal.get(); } size_t Kernel::getDynamicStateHeapSize() const { return kernelInfo.heapInfo.DynamicStateHeapSize; } const void *Kernel::getDynamicStateHeap() const { return kernelInfo.heapInfo.pDsh; } size_t Kernel::getSurfaceStateHeapSize() const { return sshLocalSize; } size_t Kernel::getNumberOfBindingTableStates() const { return numberOfBindingTableStates; } void Kernel::resizeSurfaceStateHeap(void *pNewSsh, size_t newSshSize, size_t newBindingTableCount, size_t newBindingTableOffset) { pSshLocal.reset(static_cast(pNewSsh)); sshLocalSize = static_cast(newSshSize); numberOfBindingTableStates = newBindingTableCount; localBindingTableOffset = newBindingTableOffset; } void Kernel::markArgPatchedAndResolveArgs(uint32_t argIndex) { if (!kernelArguments[argIndex].isPatched) { patchedArgumentsNum++; kernelArguments[argIndex].isPatched = true; } if (program->getContextPtr() && getContext().getRootDeviceIndices().size() > 1u && Kernel::isMemObj(kernelArguments[argIndex].type) && kernelArguments[argIndex].object) { auto argMemObj = castToObjectOrAbort(reinterpret_cast(kernelArguments[argIndex].object)); auto memObj = argMemObj->getHighestRootMemObj(); auto migrateRequiredForArg = memObj->getMultiGraphicsAllocation().requiresMigrations(); if (migratableArgsMap.find(argIndex) == migratableArgsMap.end() && migrateRequiredForArg) { migratableArgsMap.insert({argIndex, memObj}); } else if (migrateRequiredForArg) { migratableArgsMap[argIndex] = memObj; } else { migratableArgsMap.erase(argIndex); } } resolveArgs(); } cl_int Kernel::setArg(uint32_t argIndex, size_t argSize, const void *argVal) { cl_int retVal = CL_SUCCESS; bool updateExposedKernel = true; auto argWasUncacheable = false; if (kernelInfo.builtinDispatchBuilder != nullptr) { updateExposedKernel = kernelInfo.builtinDispatchBuilder->setExplicitArg(argIndex, argSize, argVal, retVal); } if (updateExposedKernel) { if (argIndex >= kernelArgHandlers.size()) { return CL_INVALID_ARG_INDEX; } argWasUncacheable = kernelArguments[argIndex].isStatelessUncacheable; auto argHandler = kernelArgHandlers[argIndex]; retVal = (this->*argHandler)(argIndex, argSize, argVal); } if (retVal == CL_SUCCESS) { auto argIsUncacheable = kernelArguments[argIndex].isStatelessUncacheable; statelessUncacheableArgsCount += (argIsUncacheable ? 1 : 0) - (argWasUncacheable ? 1 : 0); markArgPatchedAndResolveArgs(argIndex); } return retVal; } cl_int Kernel::setArg(uint32_t argIndex, uint32_t argVal) { return setArg(argIndex, sizeof(argVal), &argVal); } cl_int Kernel::setArg(uint32_t argIndex, uint64_t argVal) { return setArg(argIndex, sizeof(argVal), &argVal); } cl_int Kernel::setArg(uint32_t argIndex, cl_mem argVal) { return setArg(argIndex, sizeof(argVal), &argVal); } cl_int Kernel::setArg(uint32_t argIndex, cl_mem argVal, uint32_t mipLevel) { auto retVal = setArgImageWithMipLevel(argIndex, sizeof(argVal), &argVal, mipLevel); if (retVal == CL_SUCCESS) { markArgPatchedAndResolveArgs(argIndex); } return retVal; } void *Kernel::patchBufferOffset(const ArgDescPointer &argAsPtr, void *svmPtr, GraphicsAllocation *svmAlloc) { if (isUndefinedOffset(argAsPtr.bufferOffset)) { return svmPtr; } void *ptrToPatch = svmPtr; if (svmAlloc != nullptr) { ptrToPatch = reinterpret_cast(svmAlloc->getGpuAddressToPatch()); } constexpr uint32_t minimumAlignment = 4; ptrToPatch = alignDown(ptrToPatch, minimumAlignment); DEBUG_BREAK_IF(ptrDiff(svmPtr, ptrToPatch) != static_cast(ptrDiff(svmPtr, ptrToPatch))); uint32_t offsetToPatch = static_cast(ptrDiff(svmPtr, ptrToPatch)); patch(offsetToPatch, getCrossThreadData(), argAsPtr.bufferOffset); return ptrToPatch; } cl_int Kernel::setArgSvm(uint32_t argIndex, size_t svmAllocSize, void *svmPtr, GraphicsAllocation *svmAlloc, cl_mem_flags svmFlags) { const auto &argAsPtr = getKernelInfo().kernelDescriptor.payloadMappings.explicitArgs[argIndex].as(); auto patchLocation = ptrOffset(getCrossThreadData(), argAsPtr.stateless); patchWithRequiredSize(patchLocation, argAsPtr.pointerSize, reinterpret_cast(svmPtr)); void *ptrToPatch = patchBufferOffset(argAsPtr, svmPtr, svmAlloc); if (isValidOffset(argAsPtr.bindful)) { auto surfaceState = ptrOffset(getSurfaceStateHeap(), argAsPtr.bindful); Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, false, false, svmAllocSize + ptrDiff(svmPtr, ptrToPatch), ptrToPatch, 0, svmAlloc, svmFlags, 0, kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, areMultipleSubDevicesInContext()); } storeKernelArg(argIndex, SVM_OBJ, nullptr, svmPtr, sizeof(void *), svmAlloc, svmFlags); if (!kernelArguments[argIndex].isPatched) { patchedArgumentsNum++; kernelArguments[argIndex].isPatched = true; } addAllocationToCacheFlushVector(argIndex, svmAlloc); return CL_SUCCESS; } cl_int Kernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocation *svmAlloc, uint32_t allocId) { DBG_LOG_INPUTS("setArgBuffer svm_alloc", svmAlloc); const auto &argAsPtr = getKernelInfo().kernelDescriptor.payloadMappings.explicitArgs[argIndex].as(); auto patchLocation = ptrOffset(getCrossThreadData(), argAsPtr.stateless); patchWithRequiredSize(patchLocation, argAsPtr.pointerSize, reinterpret_cast(svmPtr)); bool disableL3 = false; bool forceNonAuxMode = false; bool isAuxTranslationKernel = (AuxTranslationDirection::None != auxTranslationDirection); auto &hwInfo = getDevice().getHardwareInfo(); auto &clHwHelper = ClHwHelper::get(hwInfo.platform.eRenderCoreFamily); if (isAuxTranslationKernel) { if (((AuxTranslationDirection::AuxToNonAux == auxTranslationDirection) && argIndex == 1) || ((AuxTranslationDirection::NonAuxToAux == auxTranslationDirection) && argIndex == 0)) { forceNonAuxMode = true; } disableL3 = (argIndex == 0); } else if (svmAlloc && svmAlloc->isCompressionEnabled() && clHwHelper.requiresNonAuxMode(argAsPtr, hwInfo)) { forceNonAuxMode = true; } bool argWasUncacheable = kernelArguments[argIndex].isStatelessUncacheable; bool argIsUncacheable = svmAlloc ? svmAlloc->isUncacheable() : false; statelessUncacheableArgsCount += (argIsUncacheable ? 1 : 0) - (argWasUncacheable ? 1 : 0); void *ptrToPatch = patchBufferOffset(argAsPtr, svmPtr, svmAlloc); if (isValidOffset(argAsPtr.bindful)) { auto surfaceState = ptrOffset(getSurfaceStateHeap(), argAsPtr.bindful); size_t allocSize = 0; size_t offset = 0; if (svmAlloc != nullptr) { allocSize = svmAlloc->getUnderlyingBufferSize(); offset = ptrDiff(ptrToPatch, svmAlloc->getGpuAddressToPatch()); allocSize -= offset; } Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, forceNonAuxMode, disableL3, allocSize, ptrToPatch, offset, svmAlloc, 0, 0, kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, areMultipleSubDevicesInContext()); } storeKernelArg(argIndex, SVM_ALLOC_OBJ, svmAlloc, svmPtr, sizeof(uintptr_t)); kernelArguments[argIndex].allocId = allocId; kernelArguments[argIndex].allocIdMemoryManagerCounter = allocId ? this->getContext().getSVMAllocsManager()->allocationsCounter.load() : 0u; if (!kernelArguments[argIndex].isPatched) { patchedArgumentsNum++; kernelArguments[argIndex].isPatched = true; } addAllocationToCacheFlushVector(argIndex, svmAlloc); return CL_SUCCESS; } void Kernel::storeKernelArg(uint32_t argIndex, kernelArgType argType, void *argObject, const void *argValue, size_t argSize, GraphicsAllocation *argSvmAlloc, cl_mem_flags argSvmFlags) { kernelArguments[argIndex].type = argType; kernelArguments[argIndex].object = argObject; kernelArguments[argIndex].value = argValue; kernelArguments[argIndex].size = argSize; kernelArguments[argIndex].pSvmAlloc = argSvmAlloc; kernelArguments[argIndex].svmFlags = argSvmFlags; } void Kernel::storeKernelArgAllocIdMemoryManagerCounter(uint32_t argIndex, uint32_t allocIdMemoryManagerCounter) { kernelArguments[argIndex].allocIdMemoryManagerCounter = allocIdMemoryManagerCounter; } const void *Kernel::getKernelArg(uint32_t argIndex) const { return kernelArguments[argIndex].object; } const Kernel::SimpleKernelArgInfo &Kernel::getKernelArgInfo(uint32_t argIndex) const { return kernelArguments[argIndex]; } void Kernel::setSvmKernelExecInfo(GraphicsAllocation *argValue) { kernelSvmGfxAllocations.push_back(argValue); if (allocationForCacheFlush(argValue)) { svmAllocationsRequireCacheFlush = true; } } void Kernel::clearSvmKernelExecInfo() { kernelSvmGfxAllocations.clear(); svmAllocationsRequireCacheFlush = false; } void Kernel::setUnifiedMemoryProperty(cl_kernel_exec_info infoType, bool infoValue) { if (infoType == CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL) { this->unifiedMemoryControls.indirectDeviceAllocationsAllowed = infoValue; return; } if (infoType == CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL) { this->unifiedMemoryControls.indirectHostAllocationsAllowed = infoValue; return; } if (infoType == CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL) { this->unifiedMemoryControls.indirectSharedAllocationsAllowed = infoValue; return; } } void Kernel::setUnifiedMemoryExecInfo(GraphicsAllocation *unifiedMemoryAllocation) { kernelUnifiedMemoryGfxAllocations.push_back(unifiedMemoryAllocation); } void Kernel::clearUnifiedMemoryExecInfo() { kernelUnifiedMemoryGfxAllocations.clear(); } cl_int Kernel::setKernelExecutionType(cl_execution_info_kernel_type_intel executionType) { switch (executionType) { case CL_KERNEL_EXEC_INFO_DEFAULT_TYPE_INTEL: this->executionType = KernelExecutionType::Default; break; case CL_KERNEL_EXEC_INFO_CONCURRENT_TYPE_INTEL: this->executionType = KernelExecutionType::Concurrent; break; default: { return CL_INVALID_VALUE; } } return CL_SUCCESS; } void Kernel::getSuggestedLocalWorkSize(const cl_uint workDim, const size_t *globalWorkSize, const size_t *globalWorkOffset, size_t *localWorkSize) { UNRECOVERABLE_IF((workDim == 0) || (workDim > 3)); UNRECOVERABLE_IF(globalWorkSize == nullptr); Vec3 elws{0, 0, 0}; Vec3 gws{ globalWorkSize[0], (workDim > 1) ? globalWorkSize[1] : 1, (workDim > 2) ? globalWorkSize[2] : 1}; Vec3 offset{0, 0, 0}; if (globalWorkOffset) { offset.x = globalWorkOffset[0]; if (workDim > 1) { offset.y = globalWorkOffset[1]; if (workDim > 2) { offset.z = globalWorkOffset[2]; } } } Vec3 suggestedLws{0, 0, 0}; if (kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] != 0) { suggestedLws.x = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0]; suggestedLws.y = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1]; suggestedLws.z = kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2]; } else { uint32_t dispatchWorkDim = std::max(1U, std::max(gws.getSimplifiedDim(), offset.getSimplifiedDim())); const DispatchInfo dispatchInfo{&clDevice, this, dispatchWorkDim, gws, elws, offset}; suggestedLws = computeWorkgroupSize(dispatchInfo); } localWorkSize[0] = suggestedLws.x; if (workDim > 1) localWorkSize[1] = suggestedLws.y; if (workDim > 2) localWorkSize[2] = suggestedLws.z; } uint32_t Kernel::getMaxWorkGroupCount(const cl_uint workDim, const size_t *localWorkSize, const CommandQueue *commandQueue) const { auto &hardwareInfo = getHardwareInfo(); auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); auto engineGroupType = hwHelper.getEngineGroupType(commandQueue->getGpgpuEngine().getEngineType(), commandQueue->getGpgpuEngine().getEngineUsage(), hardwareInfo); const auto &kernelDescriptor = kernelInfo.kernelDescriptor; auto dssCount = hardwareInfo.gtSystemInfo.DualSubSliceCount; if (dssCount == 0) { dssCount = hardwareInfo.gtSystemInfo.SubSliceCount; } auto availableThreadCount = hwHelper.calculateAvailableThreadCount( hardwareInfo.platform.eProductFamily, kernelDescriptor.kernelAttributes.numGrfRequired, hardwareInfo.gtSystemInfo.EUCount, hardwareInfo.gtSystemInfo.ThreadCount / hardwareInfo.gtSystemInfo.EUCount); auto barrierCount = kernelDescriptor.kernelAttributes.barrierCount; auto maxWorkGroupCount = KernelHelper::getMaxWorkGroupCount(kernelInfo.getMaxSimdSize(), availableThreadCount, dssCount, dssCount * KB * hardwareInfo.capabilityTable.slmSize, hwHelper.alignSlmSize(slmTotalSize), static_cast(hwHelper.getMaxBarrierRegisterPerSlice()), hwHelper.getBarriersCountFromHasBarriers(barrierCount), workDim, localWorkSize); auto isEngineInstanced = commandQueue->getGpgpuCommandStreamReceiver().getOsContext().isEngineInstanced(); maxWorkGroupCount = hwHelper.adjustMaxWorkGroupCount(maxWorkGroupCount, engineGroupType, hardwareInfo, isEngineInstanced); return maxWorkGroupCount; } inline void Kernel::makeArgsResident(CommandStreamReceiver &commandStreamReceiver) { auto numArgs = kernelInfo.kernelDescriptor.payloadMappings.explicitArgs.size(); for (decltype(numArgs) argIndex = 0; argIndex < numArgs; argIndex++) { if (kernelArguments[argIndex].object) { if (kernelArguments[argIndex].type == SVM_ALLOC_OBJ) { auto pSVMAlloc = (GraphicsAllocation *)kernelArguments[argIndex].object; auto pageFaultManager = executionEnvironment.memoryManager->getPageFaultManager(); if (pageFaultManager && this->isUnifiedMemorySyncRequired) { pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast(pSVMAlloc->getGpuAddress())); } commandStreamReceiver.makeResident(*pSVMAlloc); } else if (Kernel::isMemObj(kernelArguments[argIndex].type)) { auto clMem = const_cast(static_cast(kernelArguments[argIndex].object)); auto memObj = castToObjectOrAbort(clMem); auto image = castToObject(clMem); if (image && image->isImageFromImage()) { commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore); } commandStreamReceiver.makeResident(*memObj->getGraphicsAllocation(commandStreamReceiver.getRootDeviceIndex())); if (memObj->getMcsAllocation()) { commandStreamReceiver.makeResident(*memObj->getMcsAllocation()); } } } } } void Kernel::performKernelTuning(CommandStreamReceiver &commandStreamReceiver, const Vec3 &lws, const Vec3 &gws, const Vec3 &offsets, TimestampPacketContainer *timestampContainer) { auto performTunning = TunningType::DISABLED; if (DebugManager.flags.EnableKernelTunning.get() != -1) { performTunning = static_cast(DebugManager.flags.EnableKernelTunning.get()); } if (performTunning == TunningType::SIMPLE) { this->singleSubdevicePreferredInCurrentEnqueue = !this->kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics; } else if (performTunning == TunningType::FULL) { KernelConfig config{gws, lws, offsets}; auto submissionDataIt = this->kernelSubmissionMap.find(config); if (submissionDataIt == this->kernelSubmissionMap.end()) { KernelSubmissionData submissionData; submissionData.kernelStandardTimestamps = std::make_unique(); submissionData.kernelSubdeviceTimestamps = std::make_unique(); submissionData.status = TunningStatus::STANDARD_TUNNING_IN_PROGRESS; submissionData.kernelStandardTimestamps->assignAndIncrementNodesRefCounts(*timestampContainer); this->kernelSubmissionMap[config] = std::move(submissionData); this->singleSubdevicePreferredInCurrentEnqueue = false; return; } auto &submissionData = submissionDataIt->second; if (submissionData.status == TunningStatus::TUNNING_DONE) { this->singleSubdevicePreferredInCurrentEnqueue = submissionData.singleSubdevicePreferred; } if (submissionData.status == TunningStatus::SUBDEVICE_TUNNING_IN_PROGRESS) { if (this->hasTunningFinished(submissionData)) { submissionData.status = TunningStatus::TUNNING_DONE; submissionData.kernelStandardTimestamps.reset(); submissionData.kernelSubdeviceTimestamps.reset(); this->singleSubdevicePreferredInCurrentEnqueue = submissionData.singleSubdevicePreferred; } else { this->singleSubdevicePreferredInCurrentEnqueue = false; } } if (submissionData.status == TunningStatus::STANDARD_TUNNING_IN_PROGRESS) { submissionData.status = TunningStatus::SUBDEVICE_TUNNING_IN_PROGRESS; submissionData.kernelSubdeviceTimestamps->assignAndIncrementNodesRefCounts(*timestampContainer); this->singleSubdevicePreferredInCurrentEnqueue = true; } } } bool Kernel::hasTunningFinished(KernelSubmissionData &submissionData) { if (!this->hasRunFinished(submissionData.kernelStandardTimestamps.get()) || !this->hasRunFinished(submissionData.kernelSubdeviceTimestamps.get())) { return false; } uint64_t globalStartTS = 0u; uint64_t globalEndTS = 0u; Event::getBoundaryTimestampValues(submissionData.kernelStandardTimestamps.get(), globalStartTS, globalEndTS); auto standardTSDiff = globalEndTS - globalStartTS; Event::getBoundaryTimestampValues(submissionData.kernelSubdeviceTimestamps.get(), globalStartTS, globalEndTS); auto subdeviceTSDiff = globalEndTS - globalStartTS; submissionData.singleSubdevicePreferred = standardTSDiff > subdeviceTSDiff; return true; } bool Kernel::hasRunFinished(TimestampPacketContainer *timestampContainer) { for (const auto &node : timestampContainer->peekNodes()) { for (uint32_t i = 0; i < node->getPacketsUsed(); i++) { if (node->getContextEndValue(i) == 1) { return false; } } } return true; } bool Kernel::isSingleSubdevicePreferred() const { return this->singleSubdevicePreferredInCurrentEnqueue || this->usesSyncBuffer(); } void Kernel::makeResident(CommandStreamReceiver &commandStreamReceiver) { auto rootDeviceIndex = commandStreamReceiver.getRootDeviceIndex(); if (privateSurface) { commandStreamReceiver.makeResident(*privateSurface); } if (program->getConstantSurface(rootDeviceIndex)) { commandStreamReceiver.makeResident(*(program->getConstantSurface(rootDeviceIndex))); } if (program->getGlobalSurface(rootDeviceIndex)) { commandStreamReceiver.makeResident(*(program->getGlobalSurface(rootDeviceIndex))); } if (program->getExportedFunctionsSurface(rootDeviceIndex)) { commandStreamReceiver.makeResident(*(program->getExportedFunctionsSurface(rootDeviceIndex))); } for (auto gfxAlloc : kernelSvmGfxAllocations) { commandStreamReceiver.makeResident(*gfxAlloc); } auto pageFaultManager = program->peekExecutionEnvironment().memoryManager->getPageFaultManager(); for (auto gfxAlloc : kernelUnifiedMemoryGfxAllocations) { commandStreamReceiver.makeResident(*gfxAlloc); if (pageFaultManager) { pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast(gfxAlloc->getGpuAddress())); } } if (unifiedMemoryControls.indirectSharedAllocationsAllowed && pageFaultManager) { pageFaultManager->moveAllocationsWithinUMAllocsManagerToGpuDomain(this->getContext().getSVMAllocsManager()); } makeArgsResident(commandStreamReceiver); auto kernelIsaAllocation = this->kernelInfo.kernelAllocation; if (kernelIsaAllocation) { commandStreamReceiver.makeResident(*kernelIsaAllocation); } gtpinNotifyMakeResident(this, &commandStreamReceiver); if (unifiedMemoryControls.indirectDeviceAllocationsAllowed || unifiedMemoryControls.indirectHostAllocationsAllowed || unifiedMemoryControls.indirectSharedAllocationsAllowed) { this->getContext().getSVMAllocsManager()->makeInternalAllocationsResident(commandStreamReceiver, unifiedMemoryControls.generateMask()); } } void Kernel::getResidency(std::vector &dst) { if (privateSurface) { GeneralSurface *surface = new GeneralSurface(privateSurface); dst.push_back(surface); } auto rootDeviceIndex = getDevice().getRootDeviceIndex(); if (program->getConstantSurface(rootDeviceIndex)) { GeneralSurface *surface = new GeneralSurface(program->getConstantSurface(rootDeviceIndex)); dst.push_back(surface); } if (program->getGlobalSurface(rootDeviceIndex)) { GeneralSurface *surface = new GeneralSurface(program->getGlobalSurface(rootDeviceIndex)); dst.push_back(surface); } if (program->getExportedFunctionsSurface(rootDeviceIndex)) { GeneralSurface *surface = new GeneralSurface(program->getExportedFunctionsSurface(rootDeviceIndex)); dst.push_back(surface); } for (auto gfxAlloc : kernelSvmGfxAllocations) { GeneralSurface *surface = new GeneralSurface(gfxAlloc); dst.push_back(surface); } auto numArgs = kernelInfo.kernelDescriptor.payloadMappings.explicitArgs.size(); for (decltype(numArgs) argIndex = 0; argIndex < numArgs; argIndex++) { if (kernelArguments[argIndex].object) { if (kernelArguments[argIndex].type == SVM_ALLOC_OBJ) { auto pSVMAlloc = (GraphicsAllocation *)kernelArguments[argIndex].object; dst.push_back(new GeneralSurface(pSVMAlloc)); } else if (Kernel::isMemObj(kernelArguments[argIndex].type)) { auto clMem = const_cast(static_cast(kernelArguments[argIndex].object)); auto memObj = castToObject(clMem); DEBUG_BREAK_IF(memObj == nullptr); dst.push_back(new MemObjSurface(memObj)); } } } auto kernelIsaAllocation = this->kernelInfo.kernelAllocation; if (kernelIsaAllocation) { GeneralSurface *surface = new GeneralSurface(kernelIsaAllocation); dst.push_back(surface); } gtpinNotifyUpdateResidencyList(this, &dst); } bool Kernel::requiresCoherency() { auto numArgs = kernelInfo.kernelDescriptor.payloadMappings.explicitArgs.size(); for (decltype(numArgs) argIndex = 0; argIndex < numArgs; argIndex++) { if (kernelArguments[argIndex].object) { if (kernelArguments[argIndex].type == SVM_ALLOC_OBJ) { auto pSVMAlloc = (GraphicsAllocation *)kernelArguments[argIndex].object; if (pSVMAlloc->isCoherent()) { return true; } } if (Kernel::isMemObj(kernelArguments[argIndex].type)) { auto clMem = const_cast(static_cast(kernelArguments[argIndex].object)); auto memObj = castToObjectOrAbort(clMem); if (memObj->getMultiGraphicsAllocation().isCoherent()) { return true; } } } } return false; } cl_int Kernel::setArgLocal(uint32_t argIndexIn, size_t argSize, const void *argVal) { storeKernelArg(argIndexIn, SLM_OBJ, nullptr, argVal, argSize); uint32_t *crossThreadData = reinterpret_cast(this->crossThreadData); uint32_t argIndex = argIndexIn; const auto &args = kernelInfo.kernelDescriptor.payloadMappings.explicitArgs; const auto &currArg = args[argIndex]; UNRECOVERABLE_IF(currArg.getTraits().getAddressQualifier() != KernelArgMetadata::AddrLocal); slmSizes[argIndex] = static_cast(argSize); UNRECOVERABLE_IF(isUndefinedOffset(currArg.as().slmOffset)); auto slmOffset = *ptrOffset(crossThreadData, currArg.as().slmOffset); slmOffset += static_cast(argSize); ++argIndex; while (argIndex < slmSizes.size()) { if (args[argIndex].getTraits().getAddressQualifier() != KernelArgMetadata::AddrLocal) { ++argIndex; continue; } const auto &nextArg = args[argIndex].as(); UNRECOVERABLE_IF(0 == nextArg.requiredSlmAlignment); slmOffset = alignUp(slmOffset, nextArg.requiredSlmAlignment); auto patchLocation = ptrOffset(crossThreadData, nextArg.slmOffset); *patchLocation = slmOffset; slmOffset += static_cast(slmSizes[argIndex]); ++argIndex; } slmTotalSize = kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize + alignUp(slmOffset, KB); return CL_SUCCESS; } cl_int Kernel::setArgBuffer(uint32_t argIndex, size_t argSize, const void *argVal) { if (argSize != sizeof(cl_mem *)) { return CL_INVALID_ARG_SIZE; } auto clMem = reinterpret_cast(argVal); auto pClDevice = &getDevice(); auto rootDeviceIndex = pClDevice->getRootDeviceIndex(); const auto &arg = kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[argIndex]; const auto &argAsPtr = arg.as(); if (clMem && *clMem) { auto clMemObj = *clMem; DBG_LOG_INPUTS("setArgBuffer cl_mem", clMemObj); storeKernelArg(argIndex, BUFFER_OBJ, clMemObj, argVal, argSize); auto buffer = castToObject(clMemObj); if (!buffer) return CL_INVALID_MEM_OBJECT; if (buffer->peekSharingHandler()) { usingSharedObjArgs = true; } patchBufferOffset(argAsPtr, nullptr, nullptr); if (isValidOffset(argAsPtr.stateless)) { auto patchLocation = ptrOffset(crossThreadData, argAsPtr.stateless); uint64_t addressToPatch = buffer->setArgStateless(patchLocation, argAsPtr.pointerSize, rootDeviceIndex, !this->isBuiltIn); if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) { PatchInfoData patchInfoData(addressToPatch - buffer->getOffset(), static_cast(buffer->getOffset()), PatchInfoAllocationType::KernelArg, reinterpret_cast(crossThreadData), static_cast(argAsPtr.stateless), PatchInfoAllocationType::IndirectObjectHeap, argAsPtr.pointerSize); this->patchInfoDataList.push_back(patchInfoData); } } bool disableL3 = false; bool forceNonAuxMode = false; bool isAuxTranslationKernel = (AuxTranslationDirection::None != auxTranslationDirection); auto graphicsAllocation = buffer->getGraphicsAllocation(rootDeviceIndex); auto &hwInfo = pClDevice->getHardwareInfo(); auto &clHwHelper = ClHwHelper::get(hwInfo.platform.eRenderCoreFamily); if (isAuxTranslationKernel) { if (((AuxTranslationDirection::AuxToNonAux == auxTranslationDirection) && argIndex == 1) || ((AuxTranslationDirection::NonAuxToAux == auxTranslationDirection) && argIndex == 0)) { forceNonAuxMode = true; } disableL3 = (argIndex == 0); } else if (graphicsAllocation->isCompressionEnabled() && clHwHelper.requiresNonAuxMode(argAsPtr, hwInfo)) { forceNonAuxMode = true; } if (isValidOffset(argAsPtr.bindful)) { buffer->setArgStateful(ptrOffset(getSurfaceStateHeap(), argAsPtr.bindful), forceNonAuxMode, disableL3, isAuxTranslationKernel, arg.isReadOnly(), pClDevice->getDevice(), kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, areMultipleSubDevicesInContext()); } else if (isValidOffset(argAsPtr.bindless)) { buffer->setArgStateful(patchBindlessSurfaceState(graphicsAllocation, argAsPtr.bindless), forceNonAuxMode, disableL3, isAuxTranslationKernel, arg.isReadOnly(), pClDevice->getDevice(), kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, areMultipleSubDevicesInContext()); } kernelArguments[argIndex].isStatelessUncacheable = argAsPtr.isPureStateful() ? false : buffer->isMemObjUncacheable(); auto allocationForCacheFlush = graphicsAllocation; //if we make object uncacheable for surface state and there are not stateless accessess , then ther is no need to flush caches if (buffer->isMemObjUncacheableForSurfaceState() && argAsPtr.isPureStateful()) { allocationForCacheFlush = nullptr; } addAllocationToCacheFlushVector(argIndex, allocationForCacheFlush); return CL_SUCCESS; } else { storeKernelArg(argIndex, BUFFER_OBJ, nullptr, argVal, argSize); if (isValidOffset(argAsPtr.stateless)) { auto patchLocation = ptrOffset(getCrossThreadData(), argAsPtr.stateless); patchWithRequiredSize(patchLocation, argAsPtr.pointerSize, 0u); } if (isValidOffset(argAsPtr.bindful)) { auto surfaceState = ptrOffset(getSurfaceStateHeap(), argAsPtr.bindful); Buffer::setSurfaceState(&pClDevice->getDevice(), surfaceState, false, false, 0, nullptr, 0, nullptr, 0, 0, kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, areMultipleSubDevicesInContext()); } return CL_SUCCESS; } } cl_int Kernel::setArgPipe(uint32_t argIndex, size_t argSize, const void *argVal) { if (argSize != sizeof(cl_mem *)) { return CL_INVALID_ARG_SIZE; } auto clMem = reinterpret_cast(argVal); if (clMem && *clMem) { auto clMemObj = *clMem; DBG_LOG_INPUTS("setArgPipe cl_mem", clMemObj); storeKernelArg(argIndex, PIPE_OBJ, clMemObj, argVal, argSize); auto memObj = castToObject(clMemObj); if (!memObj) { return CL_INVALID_MEM_OBJECT; } auto pipe = castToObject(clMemObj); if (!pipe) { return CL_INVALID_ARG_VALUE; } if (memObj->getContext() != &(this->getContext())) { return CL_INVALID_MEM_OBJECT; } auto rootDeviceIndex = getDevice().getRootDeviceIndex(); const auto &argAsPtr = kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[argIndex].as(); auto patchLocation = ptrOffset(getCrossThreadData(), argAsPtr.stateless); pipe->setPipeArg(patchLocation, argAsPtr.pointerSize, rootDeviceIndex); if (isValidOffset(argAsPtr.bindful)) { auto graphicsAllocation = pipe->getGraphicsAllocation(rootDeviceIndex); auto surfaceState = ptrOffset(getSurfaceStateHeap(), argAsPtr.bindful); Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, false, false, pipe->getSize(), pipe->getCpuAddress(), 0, graphicsAllocation, 0, 0, kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, areMultipleSubDevicesInContext()); } return CL_SUCCESS; } else { return CL_INVALID_MEM_OBJECT; } } cl_int Kernel::setArgImage(uint32_t argIndex, size_t argSize, const void *argVal) { return setArgImageWithMipLevel(argIndex, argSize, argVal, 0u); } cl_int Kernel::setArgImageWithMipLevel(uint32_t argIndex, size_t argSize, const void *argVal, uint32_t mipLevel) { auto retVal = CL_INVALID_ARG_VALUE; auto rootDeviceIndex = getDevice().getRootDeviceIndex(); const auto &arg = kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[argIndex]; const auto &argAsImg = arg.as(); uint32_t *crossThreadData = reinterpret_cast(this->crossThreadData); auto clMemObj = *(static_cast(argVal)); auto pImage = castToObject(clMemObj); if (pImage && argSize == sizeof(cl_mem *)) { if (pImage->peekSharingHandler()) { usingSharedObjArgs = true; } DBG_LOG_INPUTS("setArgImage cl_mem", clMemObj); storeKernelArg(argIndex, IMAGE_OBJ, clMemObj, argVal, argSize); void *surfaceState = nullptr; if (isValidOffset(argAsImg.bindless)) { surfaceState = patchBindlessSurfaceState(pImage->getGraphicsAllocation(rootDeviceIndex), argAsImg.bindless); } else { DEBUG_BREAK_IF(isUndefinedOffset(argAsImg.bindful)); surfaceState = ptrOffset(getSurfaceStateHeap(), argAsImg.bindful); } // Sets SS structure if (arg.getExtendedTypeInfo().isMediaImage) { DEBUG_BREAK_IF(!kernelInfo.kernelDescriptor.kernelAttributes.flags.usesVme); pImage->setMediaImageArg(surfaceState, rootDeviceIndex); } else { pImage->setImageArg(surfaceState, arg.getExtendedTypeInfo().isMediaBlockImage, mipLevel, rootDeviceIndex, getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics); } auto &imageDesc = pImage->getImageDesc(); auto &imageFormat = pImage->getImageFormat(); auto graphicsAllocation = pImage->getGraphicsAllocation(rootDeviceIndex); if (imageDesc.image_type == CL_MEM_OBJECT_IMAGE3D) { imageTransformer->registerImage3d(argIndex); } patch(imageDesc.num_samples, crossThreadData, argAsImg.metadataPayload.numSamples); patch(imageDesc.num_mip_levels, crossThreadData, argAsImg.metadataPayload.numMipLevels); patch(imageDesc.image_width, crossThreadData, argAsImg.metadataPayload.imgWidth); patch(imageDesc.image_height, crossThreadData, argAsImg.metadataPayload.imgHeight); patch(imageDesc.image_depth, crossThreadData, argAsImg.metadataPayload.imgDepth); patch(imageDesc.image_array_size, crossThreadData, argAsImg.metadataPayload.arraySize); patch(imageFormat.image_channel_data_type, crossThreadData, argAsImg.metadataPayload.channelDataType); patch(imageFormat.image_channel_order, crossThreadData, argAsImg.metadataPayload.channelOrder); if (arg.getExtendedTypeInfo().hasDeviceSideEnqueueExtendedDescriptor) { const auto &explicitArgsExtendedDescriptors = kernelInfo.kernelDescriptor.payloadMappings.explicitArgsExtendedDescriptors; UNRECOVERABLE_IF(argIndex >= explicitArgsExtendedDescriptors.size()); auto deviceSideEnqueueDescriptor = static_cast(explicitArgsExtendedDescriptors[argIndex].get()); patch(argAsImg.bindful, crossThreadData, deviceSideEnqueueDescriptor->objectId); } auto pixelSize = pImage->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes; patch(graphicsAllocation->getGpuAddress(), crossThreadData, argAsImg.metadataPayload.flatBaseOffset); patch((imageDesc.image_width * pixelSize) - 1, crossThreadData, argAsImg.metadataPayload.flatWidth); patch((imageDesc.image_height * pixelSize) - 1, crossThreadData, argAsImg.metadataPayload.flatHeight); patch(imageDesc.image_row_pitch - 1, crossThreadData, argAsImg.metadataPayload.flatPitch); addAllocationToCacheFlushVector(argIndex, graphicsAllocation); retVal = CL_SUCCESS; } return retVal; } cl_int Kernel::setArgImmediate(uint32_t argIndex, size_t argSize, const void *argVal) { auto retVal = CL_INVALID_ARG_VALUE; if (argVal) { storeKernelArg(argIndex, NONE_OBJ, nullptr, nullptr, argSize); [[maybe_unused]] auto crossThreadDataEnd = ptrOffset(crossThreadData, crossThreadDataSize); const auto &argAsVal = kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[argIndex].as(); for (const auto &element : argAsVal.elements) { DEBUG_BREAK_IF(element.size <= 0); auto pDst = ptrOffset(crossThreadData, element.offset); auto pSrc = ptrOffset(argVal, element.sourceOffset); DEBUG_BREAK_IF(!(ptrOffset(pDst, element.size) <= crossThreadDataEnd)); if (element.sourceOffset < argSize) { size_t maxBytesToCopy = argSize - element.sourceOffset; size_t bytesToCopy = std::min(static_cast(element.size), maxBytesToCopy); memcpy_s(pDst, element.size, pSrc, bytesToCopy); } } retVal = CL_SUCCESS; } return retVal; } cl_int Kernel::setArgSampler(uint32_t argIndex, size_t argSize, const void *argVal) { auto retVal = CL_INVALID_SAMPLER; if (!argVal) { return retVal; } uint32_t *crossThreadData = reinterpret_cast(this->crossThreadData); auto clSamplerObj = *(static_cast(argVal)); auto pSampler = castToObject(clSamplerObj); if (pSampler) { pSampler->incRefInternal(); } if (kernelArguments.at(argIndex).object) { auto oldSampler = castToObject(kernelArguments.at(argIndex).object); UNRECOVERABLE_IF(!oldSampler); oldSampler->decRefInternal(); } if (pSampler && argSize == sizeof(cl_sampler *)) { const auto &arg = kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[argIndex]; const auto &argAsSmp = arg.as(); storeKernelArg(argIndex, SAMPLER_OBJ, clSamplerObj, argVal, argSize); auto dsh = getDynamicStateHeap(); auto samplerState = ptrOffset(dsh, argAsSmp.bindful); pSampler->setArg(const_cast(samplerState), clDevice.getHardwareInfo()); patch(pSampler->getSnapWaValue(), crossThreadData, argAsSmp.metadataPayload.samplerSnapWa); patch(GetAddrModeEnum(pSampler->addressingMode), crossThreadData, argAsSmp.metadataPayload.samplerAddressingMode); patch(GetNormCoordsEnum(pSampler->normalizedCoordinates), crossThreadData, argAsSmp.metadataPayload.samplerNormalizedCoords); if (arg.getExtendedTypeInfo().hasDeviceSideEnqueueExtendedDescriptor) { const auto &explicitArgsExtendedDescriptors = kernelInfo.kernelDescriptor.payloadMappings.explicitArgsExtendedDescriptors; UNRECOVERABLE_IF(argIndex >= explicitArgsExtendedDescriptors.size()); auto deviceSideEnqueueDescriptor = static_cast(explicitArgsExtendedDescriptors[argIndex].get()); patch(SAMPLER_OBJECT_ID_SHIFT + argAsSmp.bindful, crossThreadData, deviceSideEnqueueDescriptor->objectId); } retVal = CL_SUCCESS; } return retVal; } cl_int Kernel::setArgAccelerator(uint32_t argIndex, size_t argSize, const void *argVal) { auto retVal = CL_INVALID_ARG_VALUE; if (argSize != sizeof(cl_accelerator_intel)) { return CL_INVALID_ARG_SIZE; } if (!argVal) { return retVal; } auto clAcceleratorObj = *(static_cast(argVal)); DBG_LOG_INPUTS("setArgAccelerator cl_mem", clAcceleratorObj); const auto pAccelerator = castToObject(clAcceleratorObj); if (pAccelerator) { storeKernelArg(argIndex, ACCELERATOR_OBJ, clAcceleratorObj, argVal, argSize); const auto &arg = kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[argIndex]; const auto &argAsSmp = arg.as(); if (argAsSmp.samplerType == iOpenCL::SAMPLER_OBJECT_VME) { const auto pVmeAccelerator = castToObjectOrAbort(pAccelerator); auto pDesc = static_cast(pVmeAccelerator->getDescriptor()); DEBUG_BREAK_IF(!pDesc); if (arg.getExtendedTypeInfo().hasVmeExtendedDescriptor) { const auto &explicitArgsExtendedDescriptors = kernelInfo.kernelDescriptor.payloadMappings.explicitArgsExtendedDescriptors; UNRECOVERABLE_IF(argIndex >= explicitArgsExtendedDescriptors.size()); auto vmeDescriptor = static_cast(explicitArgsExtendedDescriptors[argIndex].get()); auto pVmeMbBlockTypeDst = reinterpret_cast(ptrOffset(crossThreadData, vmeDescriptor->mbBlockType)); *pVmeMbBlockTypeDst = pDesc->mb_block_type; auto pVmeSubpixelMode = reinterpret_cast(ptrOffset(crossThreadData, vmeDescriptor->subpixelMode)); *pVmeSubpixelMode = pDesc->subpixel_mode; auto pVmeSadAdjustMode = reinterpret_cast(ptrOffset(crossThreadData, vmeDescriptor->sadAdjustMode)); *pVmeSadAdjustMode = pDesc->sad_adjust_mode; auto pVmeSearchPathType = reinterpret_cast(ptrOffset(crossThreadData, vmeDescriptor->searchPathType)); *pVmeSearchPathType = pDesc->search_path_type; } retVal = CL_SUCCESS; } else if (argAsSmp.samplerType == iOpenCL::SAMPLER_OBJECT_VE) { retVal = CL_SUCCESS; } } return retVal; } void Kernel::setKernelArgHandler(uint32_t argIndex, KernelArgHandler handler) { if (kernelArgHandlers.size() <= argIndex) { kernelArgHandlers.resize(argIndex + 1); } kernelArgHandlers[argIndex] = handler; } void Kernel::unsetArg(uint32_t argIndex) { if (kernelArguments[argIndex].isPatched) { patchedArgumentsNum--; kernelArguments[argIndex].isPatched = false; if (kernelArguments[argIndex].isStatelessUncacheable) { statelessUncacheableArgsCount--; kernelArguments[argIndex].isStatelessUncacheable = false; } } } bool Kernel::hasPrintfOutput() const { return kernelInfo.kernelDescriptor.kernelAttributes.flags.usesPrintf; } void Kernel::resetSharedObjectsPatchAddresses() { for (size_t i = 0; i < getKernelArgsNumber(); i++) { auto clMem = (cl_mem)kernelArguments[i].object; auto memObj = castToObject(clMem); if (memObj && memObj->peekSharingHandler()) { setArg((uint32_t)i, sizeof(cl_mem), &clMem); } } } void Kernel::provideInitializationHints() { Context *context = program->getContextPtr(); if (context == nullptr || !context->isProvidingPerformanceHints()) return; auto pClDevice = &getDevice(); if (privateSurfaceSize) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, PRIVATE_MEMORY_USAGE_TOO_HIGH, kernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str(), privateSurfaceSize); } auto scratchSize = kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[0] * pClDevice->getSharedDeviceInfo().computeUnitsUsedForScratch * kernelInfo.getMaxSimdSize(); if (scratchSize > 0) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, REGISTER_PRESSURE_TOO_HIGH, kernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str(), scratchSize); } } bool Kernel::usesSyncBuffer() const { return kernelInfo.kernelDescriptor.kernelAttributes.flags.usesSyncBuffer; } void Kernel::patchSyncBuffer(GraphicsAllocation *gfxAllocation, size_t bufferOffset) { const auto &syncBuffer = kernelInfo.kernelDescriptor.payloadMappings.implicitArgs.syncBufferAddress; auto bufferPatchAddress = ptrOffset(crossThreadData, syncBuffer.stateless); patchWithRequiredSize(bufferPatchAddress, syncBuffer.pointerSize, ptrOffset(gfxAllocation->getGpuAddressToPatch(), bufferOffset)); if (isValidOffset(syncBuffer.bindful)) { auto surfaceState = ptrOffset(reinterpret_cast(getSurfaceStateHeap()), syncBuffer.bindful); auto addressToPatch = gfxAllocation->getUnderlyingBuffer(); auto sizeToPatch = gfxAllocation->getUnderlyingBufferSize(); Buffer::setSurfaceState(&clDevice.getDevice(), surfaceState, false, false, sizeToPatch, addressToPatch, 0, gfxAllocation, 0, 0, kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, areMultipleSubDevicesInContext()); } } bool Kernel::isPatched() const { return patchedArgumentsNum == kernelInfo.kernelDescriptor.kernelAttributes.numArgsToPatch; } cl_int Kernel::checkCorrectImageAccessQualifier(cl_uint argIndex, size_t argSize, const void *argValue) const { const auto &arg = kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[argIndex]; if (arg.is()) { cl_mem mem = *(static_cast(argValue)); MemObj *pMemObj = nullptr; WithCastToInternal(mem, &pMemObj); if (pMemObj) { auto accessQualifier = arg.getTraits().accessQualifier; cl_mem_flags flags = pMemObj->getFlags(); if ((accessQualifier == KernelArgMetadata::AccessReadOnly && ((flags | CL_MEM_WRITE_ONLY) == flags)) || (accessQualifier == KernelArgMetadata::AccessWriteOnly && ((flags | CL_MEM_READ_ONLY) == flags))) { return CL_INVALID_ARG_VALUE; } } else { return CL_INVALID_ARG_VALUE; } } return CL_SUCCESS; } void Kernel::resolveArgs() { if (!Kernel::isPatched() || !imageTransformer->hasRegisteredImages3d() || !canTransformImages()) return; bool canTransformImageTo2dArray = true; const auto &args = kernelInfo.kernelDescriptor.payloadMappings.explicitArgs; for (uint32_t i = 0; i < patchedArgumentsNum; i++) { if (args[i].is()) { auto sampler = castToObject(kernelArguments.at(i).object); if (sampler->isTransformable()) { canTransformImageTo2dArray = true; } else { canTransformImageTo2dArray = false; break; } } } if (canTransformImageTo2dArray) { imageTransformer->transformImagesTo2dArray(kernelInfo, kernelArguments, getSurfaceStateHeap()); } else if (imageTransformer->didTransform()) { imageTransformer->transformImagesTo3d(kernelInfo, kernelArguments, getSurfaceStateHeap()); } } bool Kernel::canTransformImages() const { auto renderCoreFamily = clDevice.getHardwareInfo().platform.eRenderCoreFamily; return renderCoreFamily >= IGFX_GEN9_CORE && renderCoreFamily <= IGFX_GEN11LP_CORE && !isBuiltIn; } void Kernel::fillWithKernelObjsForAuxTranslation(KernelObjsForAuxTranslation &kernelObjsForAuxTranslation) { kernelObjsForAuxTranslation.reserve(getKernelArgsNumber()); for (uint32_t i = 0; i < getKernelArgsNumber(); i++) { const auto &arg = kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[i]; if (BUFFER_OBJ == kernelArguments.at(i).type && !arg.as().isPureStateful()) { auto buffer = castToObject(getKernelArg(i)); if (buffer && buffer->getMultiGraphicsAllocation().getDefaultGraphicsAllocation()->isCompressionEnabled()) { kernelObjsForAuxTranslation.insert({KernelObjForAuxTranslation::Type::MEM_OBJ, buffer}); auto &context = this->program->getContext(); if (context.isProvidingPerformanceHints()) { const auto &argExtMeta = kernelInfo.kernelDescriptor.explicitArgsExtendedMetadata[i]; context.providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, KERNEL_ARGUMENT_AUX_TRANSLATION, kernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str(), i, argExtMeta.argName.c_str()); } } } if (SVM_ALLOC_OBJ == getKernelArguments().at(i).type && !arg.as().isPureStateful()) { auto svmAlloc = reinterpret_cast(const_cast(getKernelArg(i))); if (svmAlloc && svmAlloc->isCompressionEnabled()) { kernelObjsForAuxTranslation.insert({KernelObjForAuxTranslation::Type::GFX_ALLOC, svmAlloc}); auto &context = this->program->getContext(); if (context.isProvidingPerformanceHints()) { const auto &argExtMeta = kernelInfo.kernelDescriptor.explicitArgsExtendedMetadata[i]; context.providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, KERNEL_ARGUMENT_AUX_TRANSLATION, kernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str(), i, argExtMeta.argName.c_str()); } } } } const auto &hwInfoConfig = *HwInfoConfig::get(getDevice().getHardwareInfo().platform.eProductFamily); if (hwInfoConfig.allowStatelessCompression(getDevice().getHardwareInfo())) { for (auto gfxAllocation : kernelUnifiedMemoryGfxAllocations) { if (gfxAllocation->isCompressionEnabled()) { kernelObjsForAuxTranslation.insert({KernelObjForAuxTranslation::Type::GFX_ALLOC, gfxAllocation}); auto &context = this->program->getContext(); if (context.isProvidingPerformanceHints()) { context.providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, KERNEL_ALLOCATION_AUX_TRANSLATION, kernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str(), reinterpret_cast(gfxAllocation->getGpuAddress()), gfxAllocation->getUnderlyingBufferSize()); } } } if (getContext().getSVMAllocsManager()) { for (auto &allocation : getContext().getSVMAllocsManager()->getSVMAllocs()->allocations) { auto gfxAllocation = allocation.second.gpuAllocations.getDefaultGraphicsAllocation(); if (gfxAllocation->isCompressionEnabled()) { kernelObjsForAuxTranslation.insert({KernelObjForAuxTranslation::Type::GFX_ALLOC, gfxAllocation}); auto &context = this->program->getContext(); if (context.isProvidingPerformanceHints()) { context.providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, KERNEL_ALLOCATION_AUX_TRANSLATION, kernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str(), reinterpret_cast(gfxAllocation->getGpuAddress()), gfxAllocation->getUnderlyingBufferSize()); } } } } } } bool Kernel::hasDirectStatelessAccessToSharedBuffer() const { for (uint32_t i = 0; i < getKernelArgsNumber(); i++) { const auto &arg = kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[i]; if (BUFFER_OBJ == kernelArguments.at(i).type && !arg.as().isPureStateful()) { auto buffer = castToObject(getKernelArg(i)); if (buffer && buffer->getMultiGraphicsAllocation().getAllocationType() == AllocationType::SHARED_BUFFER) { return true; } } } return false; } bool Kernel::hasDirectStatelessAccessToHostMemory() const { for (uint32_t i = 0; i < getKernelArgsNumber(); i++) { const auto &arg = kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[i]; if (BUFFER_OBJ == kernelArguments.at(i).type && !arg.as().isPureStateful()) { auto buffer = castToObject(getKernelArg(i)); if (buffer && buffer->getMultiGraphicsAllocation().getAllocationType() == AllocationType::BUFFER_HOST_MEMORY) { return true; } } if (SVM_ALLOC_OBJ == kernelArguments.at(i).type && !arg.as().isPureStateful()) { auto svmAlloc = reinterpret_cast(getKernelArg(i)); if (svmAlloc && svmAlloc->getAllocationType() == AllocationType::BUFFER_HOST_MEMORY) { return true; } } } return false; } bool Kernel::hasIndirectStatelessAccessToHostMemory() const { if (!kernelInfo.hasIndirectStatelessAccess) { return false; } for (auto gfxAllocation : kernelUnifiedMemoryGfxAllocations) { if (gfxAllocation->getAllocationType() == AllocationType::BUFFER_HOST_MEMORY) { return true; } } if (unifiedMemoryControls.indirectHostAllocationsAllowed) { return getContext().getSVMAllocsManager()->hasHostAllocations(); } return false; } void Kernel::getAllocationsForCacheFlush(CacheFlushAllocationsVec &out) const { if (false == HwHelper::cacheFlushAfterWalkerSupported(getHardwareInfo())) { return; } for (GraphicsAllocation *alloc : this->kernelArgRequiresCacheFlush) { if (nullptr == alloc) { continue; } out.push_back(alloc); } auto rootDeviceIndex = getDevice().getRootDeviceIndex(); auto global = getProgram()->getGlobalSurface(rootDeviceIndex); if (global != nullptr) { out.push_back(global); } if (svmAllocationsRequireCacheFlush) { for (GraphicsAllocation *alloc : kernelSvmGfxAllocations) { if (allocationForCacheFlush(alloc)) { out.push_back(alloc); } } } } bool Kernel::allocationForCacheFlush(GraphicsAllocation *argAllocation) const { return argAllocation->isFlushL3Required(); } void Kernel::addAllocationToCacheFlushVector(uint32_t argIndex, GraphicsAllocation *argAllocation) { if (argAllocation == nullptr) { kernelArgRequiresCacheFlush[argIndex] = nullptr; } else { if (allocationForCacheFlush(argAllocation)) { kernelArgRequiresCacheFlush[argIndex] = argAllocation; } else { kernelArgRequiresCacheFlush[argIndex] = nullptr; } } } uint64_t Kernel::getKernelStartOffset( const bool localIdsGenerationByRuntime, const bool kernelUsesLocalIds, const bool isCssUsed) const { uint64_t kernelStartOffset = 0; if (kernelInfo.getGraphicsAllocation()) { kernelStartOffset = kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch(); if (localIdsGenerationByRuntime == false && kernelUsesLocalIds == true) { kernelStartOffset += kernelInfo.kernelDescriptor.entryPoints.skipPerThreadDataLoad; } } kernelStartOffset += getStartOffset(); auto &hardwareInfo = getHardwareInfo(); auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); if (isCssUsed && hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)) { kernelStartOffset += kernelInfo.kernelDescriptor.entryPoints.skipSetFFIDGP; } return kernelStartOffset; } void *Kernel::patchBindlessSurfaceState(NEO::GraphicsAllocation *alloc, uint32_t bindless) { auto &hwHelper = HwHelper::get(getDevice().getHardwareInfo().platform.eRenderCoreFamily); auto surfaceStateSize = hwHelper.getRenderSurfaceStateSize(); NEO::BindlessHeapsHelper *bindlessHeapsHelper = getDevice().getDevice().getBindlessHeapsHelper(); auto ssInHeap = bindlessHeapsHelper->allocateSSInHeap(surfaceStateSize, alloc, NEO::BindlessHeapsHelper::GLOBAL_SSH); auto patchLocation = ptrOffset(getCrossThreadData(), bindless); auto patchValue = hwHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast(ssInHeap.surfaceStateOffset)); patchWithRequiredSize(patchLocation, sizeof(patchValue), patchValue); return ssInHeap.ssPtr; } void Kernel::setAdditionalKernelExecInfo(uint32_t additionalKernelExecInfo) { this->additionalKernelExecInfo = additionalKernelExecInfo; } uint32_t Kernel::getAdditionalKernelExecInfo() const { return this->additionalKernelExecInfo; } bool Kernel::requiresWaDisableRccRhwoOptimization() const { auto &hardwareInfo = getHardwareInfo(); auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); auto rootDeviceIndex = getDevice().getRootDeviceIndex(); if (hwHelper.isWaDisableRccRhwoOptimizationRequired() && isUsingSharedObjArgs()) { for (auto &arg : getKernelArguments()) { auto clMemObj = static_cast(arg.object); auto memObj = castToObject(clMemObj); if (memObj && memObj->peekSharingHandler()) { auto allocation = memObj->getGraphicsAllocation(rootDeviceIndex); for (uint32_t handleId = 0u; handleId < allocation->getNumGmms(); handleId++) { if (allocation->getGmm(handleId)->gmmResourceInfo->getResourceFlags()->Info.MediaCompressed) { return true; } } } } } return false; } const HardwareInfo &Kernel::getHardwareInfo() const { return getDevice().getHardwareInfo(); } void Kernel::setWorkDim(uint32_t workDim) { patchNonPointer(getCrossThreadDataRef(), getDescriptor().payloadMappings.dispatchTraits.workDim, workDim); if (pImplicitArgs) { pImplicitArgs->numWorkDim = workDim; } } void Kernel::setGlobalWorkOffsetValues(uint32_t globalWorkOffsetX, uint32_t globalWorkOffsetY, uint32_t globalWorkOffsetZ) { patchVecNonPointer(getCrossThreadDataRef(), getDescriptor().payloadMappings.dispatchTraits.globalWorkOffset, {globalWorkOffsetX, globalWorkOffsetY, globalWorkOffsetZ}); if (pImplicitArgs) { pImplicitArgs->globalOffsetX = globalWorkOffsetX; pImplicitArgs->globalOffsetY = globalWorkOffsetY; pImplicitArgs->globalOffsetZ = globalWorkOffsetZ; } } void Kernel::setGlobalWorkSizeValues(uint32_t globalWorkSizeX, uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ) { patchVecNonPointer(getCrossThreadDataRef(), getDescriptor().payloadMappings.dispatchTraits.globalWorkSize, {globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ}); if (pImplicitArgs) { pImplicitArgs->globalSizeX = globalWorkSizeX; pImplicitArgs->globalSizeY = globalWorkSizeY; pImplicitArgs->globalSizeZ = globalWorkSizeZ; } } void Kernel::setLocalWorkSizeValues(uint32_t localWorkSizeX, uint32_t localWorkSizeY, uint32_t localWorkSizeZ) { patchVecNonPointer(getCrossThreadDataRef(), getDescriptor().payloadMappings.dispatchTraits.localWorkSize, {localWorkSizeX, localWorkSizeY, localWorkSizeZ}); if (pImplicitArgs) { pImplicitArgs->localSizeX = localWorkSizeX; pImplicitArgs->localSizeY = localWorkSizeY; pImplicitArgs->localSizeZ = localWorkSizeZ; } } void Kernel::setLocalWorkSize2Values(uint32_t localWorkSizeX, uint32_t localWorkSizeY, uint32_t localWorkSizeZ) { patchVecNonPointer(getCrossThreadDataRef(), getDescriptor().payloadMappings.dispatchTraits.localWorkSize2, {localWorkSizeX, localWorkSizeY, localWorkSizeZ}); } void Kernel::setEnqueuedLocalWorkSizeValues(uint32_t localWorkSizeX, uint32_t localWorkSizeY, uint32_t localWorkSizeZ) { patchVecNonPointer(getCrossThreadDataRef(), getDescriptor().payloadMappings.dispatchTraits.enqueuedLocalWorkSize, {localWorkSizeX, localWorkSizeY, localWorkSizeZ}); } void Kernel::setNumWorkGroupsValues(uint32_t numWorkGroupsX, uint32_t numWorkGroupsY, uint32_t numWorkGroupsZ) { patchVecNonPointer(getCrossThreadDataRef(), getDescriptor().payloadMappings.dispatchTraits.numWorkGroups, {numWorkGroupsX, numWorkGroupsY, numWorkGroupsZ}); if (pImplicitArgs) { pImplicitArgs->groupCountX = numWorkGroupsX; pImplicitArgs->groupCountY = numWorkGroupsY; pImplicitArgs->groupCountZ = numWorkGroupsZ; } } bool Kernel::isLocalWorkSize2Patchable() { const auto &localWorkSize2 = getDescriptor().payloadMappings.dispatchTraits.localWorkSize2; return isValidOffset(localWorkSize2[0]) && isValidOffset(localWorkSize2[1]) && isValidOffset(localWorkSize2[2]); } uint32_t Kernel::getMaxKernelWorkGroupSize() const { return maxKernelWorkGroupSize; } uint32_t Kernel::getSlmTotalSize() const { return slmTotalSize; } bool Kernel::areMultipleSubDevicesInContext() const { auto context = program->getContextPtr(); return context ? context->containsMultipleSubDevices(clDevice.getRootDeviceIndex()) : false; } void Kernel::reconfigureKernel() { auto &kernelDescriptor = kernelInfo.kernelDescriptor; if (kernelDescriptor.kernelAttributes.numGrfRequired == GrfConfig::LargeGrfNumber && kernelDescriptor.kernelAttributes.simdSize != 32) { maxKernelWorkGroupSize >>= 1; } this->containsStatelessWrites = kernelDescriptor.kernelAttributes.flags.usesStatelessWrites; this->specialPipelineSelectMode = kernelDescriptor.kernelAttributes.flags.usesSpecialPipelineSelectMode; } bool Kernel::requiresCacheFlushCommand(const CommandQueue &commandQueue) const { if (false == HwHelper::cacheFlushAfterWalkerSupported(commandQueue.getDevice().getHardwareInfo())) { return false; } if (DebugManager.flags.EnableCacheFlushAfterWalkerForAllQueues.get() != -1) { return !!DebugManager.flags.EnableCacheFlushAfterWalkerForAllQueues.get(); } bool cmdQueueRequiresCacheFlush = commandQueue.getRequiresCacheFlushAfterWalker(); if (false == cmdQueueRequiresCacheFlush) { return false; } if (commandQueue.getGpgpuCommandStreamReceiver().isMultiOsContextCapable()) { return false; } bool isMultiDevice = commandQueue.getContext().containsMultipleSubDevices(commandQueue.getDevice().getRootDeviceIndex()); if (false == isMultiDevice) { return false; } bool isDefaultContext = (commandQueue.getContext().peekContextType() == ContextType::CONTEXT_TYPE_DEFAULT); if (true == isDefaultContext) { return false; } if (getProgram()->getGlobalSurface(commandQueue.getDevice().getRootDeviceIndex()) != nullptr) { return true; } if (svmAllocationsRequireCacheFlush) { return true; } size_t args = kernelArgRequiresCacheFlush.size(); for (size_t i = 0; i < args; i++) { if (kernelArgRequiresCacheFlush[i] != nullptr) { return true; } } return false; } void Kernel::updateAuxTranslationRequired() { const auto &hwInfoConfig = *HwInfoConfig::get(getDevice().getHardwareInfo().platform.eProductFamily); if (hwInfoConfig.allowStatelessCompression(getDevice().getHardwareInfo())) { if (hasDirectStatelessAccessToHostMemory() || hasIndirectStatelessAccessToHostMemory() || hasDirectStatelessAccessToSharedBuffer()) { setAuxTranslationRequired(true); } } } int Kernel::setKernelThreadArbitrationPolicy(uint32_t policy) { auto &hwInfo = clDevice.getHardwareInfo(); auto &hwHelper = NEO::ClHwHelper::get(hwInfo.platform.eRenderCoreFamily); if (!hwHelper.isSupportedKernelThreadArbitrationPolicy()) { this->threadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent; return CL_INVALID_DEVICE; } else if (policy == CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_ROUND_ROBIN_INTEL) { this->threadArbitrationPolicy = ThreadArbitrationPolicy::RoundRobin; } else if (policy == CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_OLDEST_FIRST_INTEL) { this->threadArbitrationPolicy = ThreadArbitrationPolicy::AgeBased; } else if (policy == CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_AFTER_DEPENDENCY_ROUND_ROBIN_INTEL || policy == CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_STALL_BASED_ROUND_ROBIN_INTEL) { this->threadArbitrationPolicy = ThreadArbitrationPolicy::RoundRobinAfterDependency; } else { this->threadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent; return CL_INVALID_VALUE; } return CL_SUCCESS; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/kernel/kernel.h000066400000000000000000000531611422164147700230510ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/command_stream/csr_properties_flags.h" #include "shared/source/command_stream/thread_arbitration_policy.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/device/device.h" #include "shared/source/helpers/address_patch.h" #include "shared/source/helpers/preamble.h" #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/kernel/implicit_args.h" #include "shared/source/kernel/kernel_execution_type.h" #include "shared/source/program/kernel_info.h" #include "shared/source/unified_memory/unified_memory.h" #include "shared/source/utilities/stackvec.h" #include "opencl/extensions/public/cl_ext_private.h" #include "opencl/source/api/cl_types.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/helpers/base_object.h" #include "opencl/source/helpers/properties_helper.h" #include "opencl/source/kernel/kernel_objects_for_aux_translation.h" #include "opencl/source/program/program.h" #include namespace NEO { struct CompletionStamp; class Buffer; class CommandQueue; class CommandStreamReceiver; class GraphicsAllocation; class ImageTransformer; class Surface; class PrintfHandler; class MultiDeviceKernel; class Kernel : public ReferenceTrackedObject { public: static const uint32_t kernelBinaryAlignment = 64; enum kernelArgType { NONE_OBJ, IMAGE_OBJ, BUFFER_OBJ, PIPE_OBJ, SVM_OBJ, SVM_ALLOC_OBJ, SAMPLER_OBJ, ACCELERATOR_OBJ, DEVICE_QUEUE_OBJ, SLM_OBJ }; struct SimpleKernelArgInfo { kernelArgType type; void *object; const void *value; size_t size; GraphicsAllocation *pSvmAlloc; cl_mem_flags svmFlags; bool isPatched = false; bool isStatelessUncacheable = false; uint32_t allocId; uint32_t allocIdMemoryManagerCounter; }; enum class TunningStatus { STANDARD_TUNNING_IN_PROGRESS, SUBDEVICE_TUNNING_IN_PROGRESS, TUNNING_DONE }; enum class TunningType { DISABLED, SIMPLE, FULL }; typedef int32_t (Kernel::*KernelArgHandler)(uint32_t argIndex, size_t argSize, const void *argVal); template static kernel_t *create(program_t *program, const KernelInfo &kernelInfo, ClDevice &clDevice, cl_int *errcodeRet) { cl_int retVal; kernel_t *pKernel = nullptr; pKernel = new kernel_t(program, kernelInfo, clDevice); retVal = pKernel->initialize(); if (retVal != CL_SUCCESS) { delete pKernel; pKernel = nullptr; } if (errcodeRet) { *errcodeRet = retVal; } if (FileLoggerInstance().enabled()) { std::string source; program->getSource(source); FileLoggerInstance().dumpKernel(kernelInfo.kernelDescriptor.kernelMetadata.kernelName, source); } return pKernel; } Kernel &operator=(const Kernel &) = delete; Kernel(const Kernel &) = delete; virtual ~Kernel(); static bool isMemObj(kernelArgType kernelArg) { return kernelArg == BUFFER_OBJ || kernelArg == IMAGE_OBJ || kernelArg == PIPE_OBJ; } bool isAuxTranslationRequired() const { return auxTranslationRequired; } void setAuxTranslationRequired(bool onOff) { auxTranslationRequired = onOff; } void updateAuxTranslationRequired(); ArrayRef getCrossThreadDataRef() { return ArrayRef(reinterpret_cast(crossThreadData), crossThreadDataSize); } char *getCrossThreadData() const { return crossThreadData; } uint32_t getCrossThreadDataSize() const { return crossThreadDataSize; } cl_int initialize(); MOCKABLE_VIRTUAL cl_int cloneKernel(Kernel *pSourceKernel); MOCKABLE_VIRTUAL bool canTransformImages() const; MOCKABLE_VIRTUAL bool isPatched() const; // API entry points cl_int setArgument(uint32_t argIndex, size_t argSize, const void *argVal) { return setArg(argIndex, argSize, argVal); } cl_int setArgSvm(uint32_t argIndex, size_t svmAllocSize, void *svmPtr, GraphicsAllocation *svmAlloc, cl_mem_flags svmFlags); MOCKABLE_VIRTUAL cl_int setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocation *svmAlloc, uint32_t allocId); void setSvmKernelExecInfo(GraphicsAllocation *argValue); void clearSvmKernelExecInfo(); cl_int getInfo(cl_kernel_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const; void getAdditionalInfo(cl_kernel_info paramName, const void *¶mValue, size_t ¶mValueSizeRet) const; void getAdditionalWorkGroupInfo(cl_kernel_work_group_info paramName, const void *¶mValue, size_t ¶mValueSizeRet) const; cl_int getArgInfo(cl_uint argIndx, cl_kernel_arg_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const; cl_int getWorkGroupInfo(cl_kernel_work_group_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const; cl_int getSubGroupInfo(cl_kernel_sub_group_info paramName, size_t inputValueSize, const void *inputValue, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const; const void *getKernelHeap() const; void *getSurfaceStateHeap() const; const void *getDynamicStateHeap() const; size_t getKernelHeapSize() const; size_t getSurfaceStateHeapSize() const; size_t getDynamicStateHeapSize() const; size_t getNumberOfBindingTableStates() const; size_t getBindingTableOffset() const { return localBindingTableOffset; } void resizeSurfaceStateHeap(void *pNewSsh, size_t newSshSize, size_t newBindingTableCount, size_t newBindingTableOffset); void substituteKernelHeap(void *newKernelHeap, size_t newKernelHeapSize); bool isKernelHeapSubstituted() const; uint64_t getKernelId() const; void setKernelId(uint64_t newKernelId); uint32_t getStartOffset() const; void setStartOffset(uint32_t offset); const std::vector &getKernelArguments() const { return kernelArguments; } size_t getKernelArgsNumber() const { return kernelArguments.size(); } bool usesBindfulAddressingForBuffers() const { return KernelDescriptor::BindfulAndStateless == kernelInfo.kernelDescriptor.kernelAttributes.bufferAddressingMode; } inline const KernelDescriptor &getDescriptor() const { return kernelInfo.kernelDescriptor; } inline const KernelInfo &getKernelInfo() const { return kernelInfo; } Context &getContext() const { return program->getContext(); } Program *getProgram() const { return program; } uint32_t getScratchSize() { return kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[0]; } uint32_t getPrivateScratchSize() { return kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[1]; } bool usesSyncBuffer() const; void patchSyncBuffer(GraphicsAllocation *gfxAllocation, size_t bufferOffset); void *patchBindlessSurfaceState(NEO::GraphicsAllocation *alloc, uint32_t bindless); // Helpers cl_int setArg(uint32_t argIndex, uint32_t argValue); cl_int setArg(uint32_t argIndex, uint64_t argValue); cl_int setArg(uint32_t argIndex, cl_mem argValue); cl_int setArg(uint32_t argIndex, cl_mem argValue, uint32_t mipLevel); cl_int setArg(uint32_t argIndex, size_t argSize, const void *argVal); // Handlers void setKernelArgHandler(uint32_t argIndex, KernelArgHandler handler); void unsetArg(uint32_t argIndex); cl_int setArgImmediate(uint32_t argIndex, size_t argSize, const void *argVal); cl_int setArgBuffer(uint32_t argIndex, size_t argSize, const void *argVal); cl_int setArgPipe(uint32_t argIndex, size_t argSize, const void *argVal); cl_int setArgImage(uint32_t argIndex, size_t argSize, const void *argVal); cl_int setArgImageWithMipLevel(uint32_t argIndex, size_t argSize, const void *argVal, uint32_t mipLevel); cl_int setArgLocal(uint32_t argIndex, size_t argSize, const void *argVal); cl_int setArgSampler(uint32_t argIndex, size_t argSize, const void *argVal); cl_int setArgAccelerator(uint32_t argIndex, size_t argSize, const void *argVal); void storeKernelArg(uint32_t argIndex, kernelArgType argType, void *argObject, const void *argValue, size_t argSize, GraphicsAllocation *argSvmAlloc = nullptr, cl_mem_flags argSvmFlags = 0); void storeKernelArgAllocIdMemoryManagerCounter(uint32_t argIndex, uint32_t allocIdMemoryManagerCounter); const void *getKernelArg(uint32_t argIndex) const; const SimpleKernelArgInfo &getKernelArgInfo(uint32_t argIndex) const; bool getAllowNonUniform() const { return program->getAllowNonUniform(); } bool isVmeKernel() const { return kernelInfo.kernelDescriptor.kernelAttributes.flags.usesVme; } bool requiresSpecialPipelineSelectMode() const { return specialPipelineSelectMode; } void performKernelTuning(CommandStreamReceiver &commandStreamReceiver, const Vec3 &lws, const Vec3 &gws, const Vec3 &offsets, TimestampPacketContainer *timestampContainer); MOCKABLE_VIRTUAL bool isSingleSubdevicePreferred() const; //residency for kernel surfaces MOCKABLE_VIRTUAL void makeResident(CommandStreamReceiver &commandStreamReceiver); MOCKABLE_VIRTUAL void getResidency(std::vector &dst); bool requiresCoherency(); void resetSharedObjectsPatchAddresses(); bool isUsingSharedObjArgs() const { return usingSharedObjArgs; } bool hasUncacheableStatelessArgs() const { return statelessUncacheableArgsCount > 0; } bool hasPrintfOutput() const; cl_int checkCorrectImageAccessQualifier(cl_uint argIndex, size_t argSize, const void *argValue) const; static uint32_t dummyPatchLocation; uint32_t allBufferArgsStateful = CL_TRUE; bool isBuiltIn = false; int32_t getThreadArbitrationPolicy() const { return threadArbitrationPolicy; } KernelExecutionType getExecutionType() const { return executionType; } bool is32Bit() const { return kernelInfo.kernelDescriptor.kernelAttributes.gpuPointerSize == 4; } size_t getPerThreadSystemThreadSurfaceSize() const { return kernelInfo.kernelDescriptor.kernelAttributes.perThreadSystemThreadSurfaceSize; } std::vector &getPatchInfoDataList() { return patchInfoDataList; }; bool usesImages() const { return usingImages; } bool usesOnlyImages() const { return usingImagesOnly; } void fillWithKernelObjsForAuxTranslation(KernelObjsForAuxTranslation &kernelObjsForAuxTranslation); MOCKABLE_VIRTUAL bool requiresCacheFlushCommand(const CommandQueue &commandQueue) const; using CacheFlushAllocationsVec = StackVec; void getAllocationsForCacheFlush(CacheFlushAllocationsVec &out) const; void setAuxTranslationDirection(AuxTranslationDirection auxTranslationDirection) { this->auxTranslationDirection = auxTranslationDirection; } void setUnifiedMemorySyncRequirement(bool isUnifiedMemorySyncRequired) { this->isUnifiedMemorySyncRequired = isUnifiedMemorySyncRequired; } void setUnifiedMemoryProperty(cl_kernel_exec_info infoType, bool infoValue); void setUnifiedMemoryExecInfo(GraphicsAllocation *argValue); void clearUnifiedMemoryExecInfo(); bool areStatelessWritesUsed() { return containsStatelessWrites; } int setKernelThreadArbitrationPolicy(uint32_t propertyValue); cl_int setKernelExecutionType(cl_execution_info_kernel_type_intel executionType); void setThreadArbitrationPolicy(int32_t policy) { this->threadArbitrationPolicy = policy; } void getSuggestedLocalWorkSize(const cl_uint workDim, const size_t *globalWorkSize, const size_t *globalWorkOffset, size_t *localWorkSize); uint32_t getMaxWorkGroupCount(const cl_uint workDim, const size_t *localWorkSize, const CommandQueue *commandQueue) const; uint64_t getKernelStartOffset( const bool localIdsGenerationByRuntime, const bool kernelUsesLocalIds, const bool isCssUsed) const; bool isKernelDebugEnabled() const { return debugEnabled; } int32_t setAdditionalKernelExecInfoWithParam(uint32_t paramName, size_t paramValueSize, const void *paramValue); void setAdditionalKernelExecInfo(uint32_t additionalKernelExecInfo); uint32_t getAdditionalKernelExecInfo() const; MOCKABLE_VIRTUAL bool requiresWaDisableRccRhwoOptimization() const; //dispatch traits void setGlobalWorkOffsetValues(uint32_t globalWorkOffsetX, uint32_t globalWorkOffsetY, uint32_t globalWorkOffsetZ); void setGlobalWorkSizeValues(uint32_t globalWorkSizeX, uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ); void setLocalWorkSizeValues(uint32_t localWorkSizeX, uint32_t localWorkSizeY, uint32_t localWorkSizeZ); void setLocalWorkSize2Values(uint32_t localWorkSizeX, uint32_t localWorkSizeY, uint32_t localWorkSizeZ); void setEnqueuedLocalWorkSizeValues(uint32_t localWorkSizeX, uint32_t localWorkSizeY, uint32_t localWorkSizeZ); void setNumWorkGroupsValues(uint32_t numWorkGroupsX, uint32_t numWorkGroupsY, uint32_t numWorkGroupsZ); void setWorkDim(uint32_t workDim); const uint32_t *getDispatchTrait(const CrossThreadDataOffset offset) const { return isValidOffset(offset) ? reinterpret_cast(getCrossThreadData() + offset) : &Kernel::dummyPatchLocation; } const uint32_t *getWorkDim() const { return getDispatchTrait(getDescriptor().payloadMappings.dispatchTraits.workDim); } std::array getDispatchTraitArray(const CrossThreadDataOffset dispatchTrait[3]) const { return {getDispatchTrait(dispatchTrait[0]), getDispatchTrait(dispatchTrait[1]), getDispatchTrait(dispatchTrait[2])}; } std::array getGlobalWorkOffsetValues() const { return getDispatchTraitArray(getDescriptor().payloadMappings.dispatchTraits.globalWorkOffset); } std::array getLocalWorkSizeValues() const { return getDispatchTraitArray(getDescriptor().payloadMappings.dispatchTraits.localWorkSize); } std::array getLocalWorkSize2Values() const { return getDispatchTraitArray(getDescriptor().payloadMappings.dispatchTraits.localWorkSize2); } std::array getEnqueuedLocalWorkSizeValues() const { return getDispatchTraitArray(getDescriptor().payloadMappings.dispatchTraits.enqueuedLocalWorkSize); } std::array getNumWorkGroupsValues() const { return getDispatchTraitArray(getDescriptor().payloadMappings.dispatchTraits.numWorkGroups); } bool isLocalWorkSize2Patchable(); uint32_t getMaxKernelWorkGroupSize() const; uint32_t getSlmTotalSize() const; bool getHasIndirectAccess() const { return this->kernelHasIndirectAccess; } MultiDeviceKernel *getMultiDeviceKernel() const { return pMultiDeviceKernel; } void setMultiDeviceKernel(MultiDeviceKernel *pMultiDeviceKernelToSet) { pMultiDeviceKernel = pMultiDeviceKernelToSet; } bool areMultipleSubDevicesInContext() const; bool requiresMemoryMigration() const { return migratableArgsMap.size() > 0; } const std::map &getMemObjectsToMigrate() const { return migratableArgsMap; } ImplicitArgs *getImplicitArgs() const { return pImplicitArgs.get(); } const HardwareInfo &getHardwareInfo() const; protected: void makeArgsResident(CommandStreamReceiver &commandStreamReceiver); void *patchBufferOffset(const ArgDescPointer &argAsPtr, void *svmPtr, GraphicsAllocation *svmAlloc); void patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, GraphicsAllocation &allocation, const ArgDescPointer &arg); Kernel(Program *programArg, const KernelInfo &kernelInfo, ClDevice &clDevice); void provideInitializationHints(); void markArgPatchedAndResolveArgs(uint32_t argIndex); void resolveArgs(); void reconfigureKernel(); bool hasDirectStatelessAccessToSharedBuffer() const; bool hasDirectStatelessAccessToHostMemory() const; bool hasIndirectStatelessAccessToHostMemory() const; void addAllocationToCacheFlushVector(uint32_t argIndex, GraphicsAllocation *argAllocation); bool allocationForCacheFlush(GraphicsAllocation *argAllocation) const; const ClDevice &getDevice() const { return clDevice; } cl_int patchPrivateSurface(); bool containsStatelessWrites = true; const ExecutionEnvironment &executionEnvironment; Program *program; ClDevice &clDevice; const KernelInfo &kernelInfo; std::vector kernelArguments; std::vector kernelArgHandlers; std::vector kernelSvmGfxAllocations; std::vector kernelUnifiedMemoryGfxAllocations; AuxTranslationDirection auxTranslationDirection = AuxTranslationDirection::None; bool usingSharedObjArgs = false; bool usingImages = false; bool usingImagesOnly = false; bool auxTranslationRequired = false; uint32_t patchedArgumentsNum = 0; uint32_t startOffset = 0; uint32_t statelessUncacheableArgsCount = 0; int32_t threadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent; KernelExecutionType executionType = KernelExecutionType::Default; std::vector patchInfoDataList; std::unique_ptr imageTransformer; std::map migratableArgsMap{}; bool specialPipelineSelectMode = false; bool svmAllocationsRequireCacheFlush = false; std::vector kernelArgRequiresCacheFlush; UnifiedMemoryControls unifiedMemoryControls{}; bool isUnifiedMemorySyncRequired = true; bool debugEnabled = false; uint32_t additionalKernelExecInfo = AdditionalKernelExecInfo::DisableOverdispatch; uint32_t *maxWorkGroupSizeForCrossThreadData = &Kernel::dummyPatchLocation; uint32_t maxKernelWorkGroupSize = 0; uint32_t *dataParameterSimdSize = &Kernel::dummyPatchLocation; uint32_t *parentEventOffset = &Kernel::dummyPatchLocation; uint32_t *preferredWkgMultipleOffset = &Kernel::dummyPatchLocation; size_t numberOfBindingTableStates = 0u; size_t localBindingTableOffset = 0u; std::vector slmSizes; uint32_t slmTotalSize = 0u; std::unique_ptr pSshLocal; uint32_t sshLocalSize = 0u; char *crossThreadData = nullptr; uint32_t crossThreadDataSize = 0u; GraphicsAllocation *privateSurface = nullptr; uint64_t privateSurfaceSize = 0u; struct KernelConfig { Vec3 gws; Vec3 lws; Vec3 offsets; bool operator==(const KernelConfig &other) const { return this->gws == other.gws && this->lws == other.lws && this->offsets == other.offsets; } }; struct KernelConfigHash { size_t operator()(KernelConfig const &config) const { auto hash = std::hash{}; size_t gwsHashX = hash(config.gws.x); size_t gwsHashY = hash(config.gws.y); size_t gwsHashZ = hash(config.gws.z); size_t gwsHash = hashCombine(gwsHashX, gwsHashY, gwsHashZ); size_t lwsHashX = hash(config.lws.x); size_t lwsHashY = hash(config.lws.y); size_t lwsHashZ = hash(config.lws.z); size_t lwsHash = hashCombine(lwsHashX, lwsHashY, lwsHashZ); size_t offsetsHashX = hash(config.offsets.x); size_t offsetsHashY = hash(config.offsets.y); size_t offsetsHashZ = hash(config.offsets.z); size_t offsetsHash = hashCombine(offsetsHashX, offsetsHashY, offsetsHashZ); return hashCombine(gwsHash, lwsHash, offsetsHash); } size_t hashCombine(size_t hash1, size_t hash2, size_t hash3) const { return (hash1 ^ (hash2 << 1u)) ^ (hash3 << 2u); } }; struct KernelSubmissionData { std::unique_ptr kernelStandardTimestamps; std::unique_ptr kernelSubdeviceTimestamps; TunningStatus status; bool singleSubdevicePreferred = false; }; bool hasTunningFinished(KernelSubmissionData &submissionData); bool hasRunFinished(TimestampPacketContainer *timestampContainer); std::unordered_map kernelSubmissionMap; bool singleSubdevicePreferredInCurrentEnqueue = false; bool kernelHasIndirectAccess = true; MultiDeviceKernel *pMultiDeviceKernel = nullptr; std::unique_ptr pImplicitArgs = nullptr; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/kernel/kernel_extra.cpp000066400000000000000000000006651422164147700246100ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/source/kernel/kernel.h" namespace NEO { int32_t Kernel::setAdditionalKernelExecInfoWithParam(uint32_t paramName, size_t paramValueSize, const void *paramValue) { return CL_INVALID_VALUE; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/kernel/kernel_info_cl.h000066400000000000000000000033411422164147700245350ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/kernel/kernel_arg_descriptor.h" #include "CL/cl.h" namespace NEO { constexpr cl_kernel_arg_access_qualifier asClKernelArgAccessQualifier(KernelArgMetadata::AccessQualifier accessQualifier) { using namespace KernelArgMetadata; switch (accessQualifier) { default: return 0U; case AccessNone: return CL_KERNEL_ARG_ACCESS_NONE; case AccessReadOnly: return CL_KERNEL_ARG_ACCESS_READ_ONLY; case AccessWriteOnly: return CL_KERNEL_ARG_ACCESS_WRITE_ONLY; case AccessReadWrite: return CL_KERNEL_ARG_ACCESS_READ_WRITE; } } constexpr cl_kernel_arg_address_qualifier asClKernelArgAddressQualifier(KernelArgMetadata::AddressSpaceQualifier addressQualifier) { using namespace KernelArgMetadata; switch (addressQualifier) { default: return 0U; case AddrGlobal: return CL_KERNEL_ARG_ADDRESS_GLOBAL; case AddrLocal: return CL_KERNEL_ARG_ADDRESS_LOCAL; case AddrPrivate: return CL_KERNEL_ARG_ADDRESS_PRIVATE; case AddrConstant: return CL_KERNEL_ARG_ADDRESS_CONSTANT; } } constexpr cl_kernel_arg_type_qualifier asClKernelArgTypeQualifier(KernelArgMetadata::TypeQualifiers typeQualifiers) { using namespace KernelArgMetadata; cl_kernel_arg_type_qualifier ret = 0U; ret |= (typeQualifiers.constQual) ? CL_KERNEL_ARG_TYPE_CONST : 0U; ret |= (typeQualifiers.volatileQual) ? CL_KERNEL_ARG_TYPE_VOLATILE : 0U; ret |= (typeQualifiers.restrictQual) ? CL_KERNEL_ARG_TYPE_RESTRICT : 0U; ret |= (typeQualifiers.pipeQual) ? CL_KERNEL_ARG_TYPE_PIPE : 0U; return ret; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/kernel/kernel_objects_for_aux_translation.h000066400000000000000000000014521422164147700307170ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include namespace NEO { struct KernelObjForAuxTranslation { enum class Type { MEM_OBJ, GFX_ALLOC }; KernelObjForAuxTranslation(Type type, void *object) : type(type), object(object) {} Type type; void *object; bool operator==(const KernelObjForAuxTranslation &t) const { return (this->object == t.object); } }; struct KernelObjForAuxTranslationHash { std::size_t operator()(const KernelObjForAuxTranslation &kernelObj) const { return reinterpret_cast(kernelObj.object); } }; using KernelObjsForAuxTranslation = std::unordered_set; } // namespace NEO compute-runtime-22.14.22890/opencl/source/kernel/multi_device_kernel.cpp000066400000000000000000000140361422164147700261330ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/kernel/multi_device_kernel.h" namespace NEO { MultiDeviceKernel::~MultiDeviceKernel() { for (auto &pKernel : kernels) { if (pKernel) { pKernel->decRefInternal(); } } } Kernel *MultiDeviceKernel::determineDefaultKernel(KernelVectorType &kernelVector) { for (auto &pKernel : kernelVector) { if (pKernel) { return pKernel; } } UNRECOVERABLE_IF(true); return nullptr; } MultiDeviceKernel::MultiDeviceKernel(KernelVectorType kernelVector, const KernelInfoContainer kernelInfosArg) : kernels(std::move(kernelVector)), defaultKernel(MultiDeviceKernel::determineDefaultKernel(kernels)), program(defaultKernel->getProgram()), kernelInfos(kernelInfosArg) { for (auto &pKernel : kernels) { if (pKernel) { pKernel->incRefInternal(); pKernel->setMultiDeviceKernel(this); } } }; const std::vector &MultiDeviceKernel::getKernelArguments() const { return defaultKernel->getKernelArguments(); } cl_int MultiDeviceKernel::getInfo(cl_kernel_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const { return defaultKernel->getInfo(paramName, paramValueSize, paramValue, paramValueSizeRet); } cl_int MultiDeviceKernel::getArgInfo(cl_uint argIndx, cl_kernel_arg_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const { return defaultKernel->getArgInfo(argIndx, paramName, paramValueSize, paramValue, paramValueSizeRet); } const ClDeviceVector &MultiDeviceKernel::getDevices() const { return program->getDevicesInProgram(); } size_t MultiDeviceKernel::getKernelArgsNumber() const { return defaultKernel->getKernelArgsNumber(); } Context &MultiDeviceKernel::getContext() const { return defaultKernel->getContext(); } bool MultiDeviceKernel::getHasIndirectAccess() const { return defaultKernel->getHasIndirectAccess(); } cl_int MultiDeviceKernel::checkCorrectImageAccessQualifier(cl_uint argIndex, size_t argSize, const void *argValue) const { return getResultFromEachKernel(&Kernel::checkCorrectImageAccessQualifier, argIndex, argSize, argValue); } void MultiDeviceKernel::unsetArg(uint32_t argIndex) { callOnEachKernel(&Kernel::unsetArg, argIndex); } cl_int MultiDeviceKernel::setArg(uint32_t argIndex, size_t argSize, const void *argVal) { return getResultFromEachKernel(&Kernel::setArgument, argIndex, argSize, argVal); } void MultiDeviceKernel::setUnifiedMemoryProperty(cl_kernel_exec_info infoType, bool infoValue) { callOnEachKernel(&Kernel::setUnifiedMemoryProperty, infoType, infoValue); } void MultiDeviceKernel::clearSvmKernelExecInfo() { callOnEachKernel(&Kernel::clearSvmKernelExecInfo); } void MultiDeviceKernel::clearUnifiedMemoryExecInfo() { callOnEachKernel(&Kernel::clearUnifiedMemoryExecInfo); } int MultiDeviceKernel::setKernelThreadArbitrationPolicy(uint32_t propertyValue) { return getResultFromEachKernel(&Kernel::setKernelThreadArbitrationPolicy, propertyValue); } cl_int MultiDeviceKernel::setKernelExecutionType(cl_execution_info_kernel_type_intel executionType) { return getResultFromEachKernel(&Kernel::setKernelExecutionType, executionType); } int32_t MultiDeviceKernel::setAdditionalKernelExecInfoWithParam(uint32_t paramName, size_t paramValueSize, const void *paramValue) { return getResultFromEachKernel(&Kernel::setAdditionalKernelExecInfoWithParam, paramName, paramValueSize, paramValue); } void MultiDeviceKernel::storeKernelArgAllocIdMemoryManagerCounter(uint32_t argIndex, uint32_t allocIdMemoryManagerCounter) { for (auto rootDeviceIndex = 0u; rootDeviceIndex < kernels.size(); rootDeviceIndex++) { auto pKernel = getKernel(rootDeviceIndex); if (pKernel) { pKernel->storeKernelArgAllocIdMemoryManagerCounter(argIndex, allocIdMemoryManagerCounter); } } } cl_int MultiDeviceKernel::cloneKernel(MultiDeviceKernel *pSourceMultiDeviceKernel) { for (auto rootDeviceIndex = 0u; rootDeviceIndex < kernels.size(); rootDeviceIndex++) { auto pSrcKernel = pSourceMultiDeviceKernel->getKernel(rootDeviceIndex); auto pDstKernel = getKernel(rootDeviceIndex); if (pSrcKernel) { pDstKernel->cloneKernel(pSrcKernel); } } return CL_SUCCESS; } cl_int MultiDeviceKernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, MultiGraphicsAllocation *svmAllocs, uint32_t allocId) { for (auto rootDeviceIndex = 0u; rootDeviceIndex < kernels.size(); rootDeviceIndex++) { auto pKernel = getKernel(rootDeviceIndex); if (pKernel) { if (svmAllocs && (svmAllocs->getGraphicsAllocations().size() <= rootDeviceIndex || !svmAllocs->getGraphicsAllocation(rootDeviceIndex))) { continue; } auto svmAlloc = svmAllocs ? svmAllocs->getGraphicsAllocation(rootDeviceIndex) : nullptr; pKernel->setArgSvmAlloc(argIndex, svmPtr, svmAlloc, allocId); } } return CL_SUCCESS; } void MultiDeviceKernel::setSvmKernelExecInfo(const MultiGraphicsAllocation &argValue) { for (auto rootDeviceIndex = 0u; rootDeviceIndex < kernels.size(); rootDeviceIndex++) { auto pKernel = getKernel(rootDeviceIndex); if (pKernel) { pKernel->setSvmKernelExecInfo(argValue.getGraphicsAllocation(rootDeviceIndex)); } } } void MultiDeviceKernel::setUnifiedMemoryExecInfo(const MultiGraphicsAllocation &argValue) { for (auto rootDeviceIndex = 0u; rootDeviceIndex < kernels.size(); rootDeviceIndex++) { auto pKernel = getKernel(rootDeviceIndex); if (pKernel) { pKernel->setUnifiedMemoryExecInfo(argValue.getGraphicsAllocation(rootDeviceIndex)); } } } } // namespace NEO compute-runtime-22.14.22890/opencl/source/kernel/multi_device_kernel.h000066400000000000000000000105221422164147700255740ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/kernel/kernel.h" namespace NEO { template <> struct OpenCLObjectMapper<_cl_kernel> { typedef class MultiDeviceKernel DerivedType; }; using KernelVectorType = StackVec; using KernelInfoContainer = StackVec; class MultiDeviceKernel : public BaseObject<_cl_kernel> { public: static const cl_ulong objectMagic = 0x3284ADC8EA0AFE25LL; ~MultiDeviceKernel() override; MultiDeviceKernel(KernelVectorType kernelVector, const KernelInfoContainer kernelInfosArg); Kernel *getKernel(uint32_t rootDeviceIndex) const { return kernels[rootDeviceIndex]; } Kernel *getDefaultKernel() const { return defaultKernel; } template static multi_device_kernel_t *create(program_t *program, const KernelInfoContainer &kernelInfos, cl_int *errcodeRet) { KernelVectorType kernels{}; kernels.resize(program->getMaxRootDeviceIndex() + 1); for (auto &pDevice : program->getDevicesInProgram()) { auto rootDeviceIndex = pDevice->getRootDeviceIndex(); if (kernels[rootDeviceIndex]) { continue; } kernels[rootDeviceIndex] = Kernel::create(program, *kernelInfos[rootDeviceIndex], *pDevice, errcodeRet); if (!kernels[rootDeviceIndex]) { return nullptr; } } auto pMultiDeviceKernel = new multi_device_kernel_t(std::move(kernels), kernelInfos); return pMultiDeviceKernel; } cl_int cloneKernel(MultiDeviceKernel *pSourceMultiDeviceKernel); const std::vector &getKernelArguments() const; cl_int checkCorrectImageAccessQualifier(cl_uint argIndex, size_t argSize, const void *argValue) const; void unsetArg(uint32_t argIndex); cl_int setArg(uint32_t argIndex, size_t argSize, const void *argVal); cl_int getInfo(cl_kernel_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const; cl_int getArgInfo(cl_uint argIndx, cl_kernel_arg_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const; const ClDeviceVector &getDevices() const; size_t getKernelArgsNumber() const; Context &getContext() const; cl_int setArgSvmAlloc(uint32_t argIndex, void *svmPtr, MultiGraphicsAllocation *svmAllocs, uint32_t allocId); bool getHasIndirectAccess() const; void setUnifiedMemoryProperty(cl_kernel_exec_info infoType, bool infoValue); void setSvmKernelExecInfo(const MultiGraphicsAllocation &argValue); void clearSvmKernelExecInfo(); void setUnifiedMemoryExecInfo(const MultiGraphicsAllocation &argValue); void clearUnifiedMemoryExecInfo(); int setKernelThreadArbitrationPolicy(uint32_t propertyValue); cl_int setKernelExecutionType(cl_execution_info_kernel_type_intel executionType); int32_t setAdditionalKernelExecInfoWithParam(uint32_t paramName, size_t paramValueSize, const void *paramValue); void storeKernelArgAllocIdMemoryManagerCounter(uint32_t argIndex, uint32_t allocIdMemoryManagerCounter); Program *getProgram() const { return program; } const KernelInfoContainer &getKernelInfos() const { return kernelInfos; } protected: template cl_int getResultFromEachKernel(FuncType function, Args &&...args) const { cl_int retVal = CL_INVALID_VALUE; for (auto &pKernel : kernels) { if (pKernel) { retVal = (pKernel->*function)(std::forward(args)...); if (CL_SUCCESS != retVal) { break; } } } return retVal; } template void callOnEachKernel(FuncType function, Args &&...args) { for (auto &pKernel : kernels) { if (pKernel) { (pKernel->*function)(std::forward(args)...); } } } static Kernel *determineDefaultKernel(KernelVectorType &kernelVector); KernelVectorType kernels; Kernel *defaultKernel = nullptr; Program *program = nullptr; const KernelInfoContainer kernelInfos; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/mem_obj/000077500000000000000000000000001422164147700215425ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/mem_obj/CMakeLists.txt000066400000000000000000000024061422164147700243040ustar00rootroot00000000000000# # Copyright (C) 2018-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_MEM_OBJ ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/buffer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/buffer.h ${CMAKE_CURRENT_SOURCE_DIR}/buffer_base.inl ${CMAKE_CURRENT_SOURCE_DIR}/buffer_factory_init.inl ${CMAKE_CURRENT_SOURCE_DIR}/image.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image.h ${CMAKE_CURRENT_SOURCE_DIR}/image.inl ${CMAKE_CURRENT_SOURCE_DIR}/image_tgllp_and_later.inl ${CMAKE_CURRENT_SOURCE_DIR}/image_factory_init.inl ${CMAKE_CURRENT_SOURCE_DIR}/map_operations_handler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/map_operations_handler.h ${CMAKE_CURRENT_SOURCE_DIR}/mem_obj.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mem_obj.h ${CMAKE_CURRENT_SOURCE_DIR}/mem_obj_helper.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mem_obj_helper.h ${CMAKE_CURRENT_SOURCE_DIR}/pipe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/pipe.h ${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}buffer_ext.cpp ${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}image_ext.inl ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_MEM_OBJ}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_MEM_OBJ ${RUNTIME_SRCS_MEM_OBJ}) add_subdirectories() compute-runtime-22.14.22890/opencl/source/mem_obj/buffer.cpp000066400000000000000000001130531422164147700235220ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/mem_obj/buffer.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/get_info.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/helpers/string.h" #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/memory_manager/host_ptr_manager.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/memory_operations_handler.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/utilities/debug_settings_reader_creator.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "opencl/source/helpers/cl_validators.h" #include "opencl/source/mem_obj/mem_obj_helper.h" #include "opencl/source/os_interface/ocl_reg_path.h" namespace NEO { BufferFactoryFuncs bufferFactory[IGFX_MAX_CORE] = {}; namespace BufferFunctions { ValidateInputAndCreateBufferFunc validateInputAndCreateBuffer = Buffer::validateInputAndCreateBuffer; } // namespace BufferFunctions Buffer::Buffer(Context *context, MemoryProperties memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *memoryStorage, void *hostPtr, MultiGraphicsAllocation multiGraphicsAllocation, bool zeroCopy, bool isHostPtrSVM, bool isObjectRedescribed) : MemObj(context, CL_MEM_OBJECT_BUFFER, memoryProperties, flags, flagsIntel, size, memoryStorage, hostPtr, std::move(multiGraphicsAllocation), zeroCopy, isHostPtrSVM, isObjectRedescribed) { magic = objectMagic; setHostPtrMinSize(size); } Buffer::Buffer() : MemObj(nullptr, CL_MEM_OBJECT_BUFFER, {}, 0, 0, 0, nullptr, nullptr, 0, false, false, false) { } Buffer::~Buffer() = default; bool Buffer::isSubBuffer() { return this->associatedMemObject != nullptr; } bool Buffer::isValidSubBufferOffset(size_t offset) { if (multiGraphicsAllocation.getDefaultGraphicsAllocation()->isCompressionEnabled()) { // From spec: "origin value is aligned to the CL_DEVICE_MEM_BASE_ADDR_ALIGN value" if (!isAligned(offset, this->getContext()->getDevice(0)->getDeviceInfo().memBaseAddressAlign / 8u)) { return false; } } cl_uint address_align = 32; // 4 byte alignment if ((offset & (address_align / 8 - 1)) == 0) { return true; } return false; } cl_mem Buffer::validateInputAndCreateBuffer(cl_context context, const cl_mem_properties *properties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *hostPtr, cl_int &retVal) { Context *pContext = nullptr; retVal = validateObjects(WithCastToInternal(context, &pContext)); if (retVal != CL_SUCCESS) { return nullptr; } MemoryProperties memoryProperties{}; cl_mem_alloc_flags_intel allocflags = 0; cl_mem_flags_intel emptyFlagsIntel = 0; if ((false == ClMemoryPropertiesHelper::parseMemoryProperties(nullptr, memoryProperties, flags, emptyFlagsIntel, allocflags, MemoryPropertiesHelper::ObjType::BUFFER, *pContext)) || (false == MemObjHelper::validateMemoryPropertiesForBuffer(memoryProperties, flags, emptyFlagsIntel, *pContext))) { retVal = CL_INVALID_VALUE; return nullptr; } if ((false == ClMemoryPropertiesHelper::parseMemoryProperties(properties, memoryProperties, flags, flagsIntel, allocflags, MemoryPropertiesHelper::ObjType::BUFFER, *pContext)) || (false == MemObjHelper::validateMemoryPropertiesForBuffer(memoryProperties, flags, flagsIntel, *pContext))) { retVal = CL_INVALID_PROPERTY; return nullptr; } auto pDevice = pContext->getDevice(0); bool allowCreateBuffersWithUnrestrictedSize = isValueSet(flags, CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL) || isValueSet(flagsIntel, CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL) || DebugManager.flags.AllowUnrestrictedSize.get(); if (size == 0 || (size > pDevice->getDevice().getDeviceInfo().maxMemAllocSize && !allowCreateBuffersWithUnrestrictedSize)) { retVal = CL_INVALID_BUFFER_SIZE; return nullptr; } /* Check the host ptr and data */ bool expectHostPtr = (flags & (CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR)) != 0; if ((hostPtr == nullptr) == expectHostPtr) { retVal = CL_INVALID_HOST_PTR; return nullptr; } // create the buffer auto buffer = create(pContext, memoryProperties, flags, flagsIntel, size, hostPtr, retVal); if (retVal == CL_SUCCESS) { buffer->storeProperties(properties); } return buffer; } Buffer *Buffer::create(Context *context, cl_mem_flags flags, size_t size, void *hostPtr, cl_int &errcodeRet) { return create(context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, size, hostPtr, errcodeRet); } Buffer *Buffer::create(Context *context, MemoryProperties memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *hostPtr, cl_int &errcodeRet) { Buffer *pBuffer = nullptr; errcodeRet = CL_SUCCESS; MemoryManager *memoryManager = context->getMemoryManager(); UNRECOVERABLE_IF(!memoryManager); auto maxRootDeviceIndex = context->getMaxRootDeviceIndex(); auto multiGraphicsAllocation = MultiGraphicsAllocation(maxRootDeviceIndex); AllocationInfoType allocationInfo; allocationInfo.resize(maxRootDeviceIndex + 1u); void *ptr = nullptr; bool forceCopyHostPtr = false; bool copyExecuted = false; for (auto &rootDeviceIndex : context->getRootDeviceIndices()) { allocationInfo[rootDeviceIndex] = {}; auto hwInfo = (&memoryManager->peekExecutionEnvironment())->rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo(); bool compressionEnabled = MemObjHelper::isSuitableForCompression(HwHelper::compressedBuffersSupported(*hwInfo), memoryProperties, *context, HwHelper::get(hwInfo->platform.eRenderCoreFamily).isBufferSizeSuitableForCompression(size, *hwInfo)); allocationInfo[rootDeviceIndex].allocationType = getGraphicsAllocationTypeAndCompressionPreference(memoryProperties, *context, compressionEnabled, memoryManager->isLocalMemorySupported(rootDeviceIndex)); if (ptr) { if (!memoryProperties.flags.useHostPtr) { if (!memoryProperties.flags.copyHostPtr) { forceCopyHostPtr = true; } } checkMemory(memoryProperties, size, ptr, errcodeRet, allocationInfo[rootDeviceIndex].alignementSatisfied, allocationInfo[rootDeviceIndex].copyMemoryFromHostPtr, memoryManager, rootDeviceIndex, forceCopyHostPtr); } else { checkMemory(memoryProperties, size, hostPtr, errcodeRet, allocationInfo[rootDeviceIndex].alignementSatisfied, allocationInfo[rootDeviceIndex].copyMemoryFromHostPtr, memoryManager, rootDeviceIndex, false); } if (errcodeRet != CL_SUCCESS) { cleanAllGraphicsAllocations(*context, *memoryManager, allocationInfo, false); return nullptr; } if (compressionEnabled) { allocationInfo[rootDeviceIndex].zeroCopyAllowed = false; allocationInfo[rootDeviceIndex].allocateMemory = true; } if (allocationInfo[rootDeviceIndex].allocationType == AllocationType::BUFFER_HOST_MEMORY) { if (memoryProperties.flags.useHostPtr) { if (allocationInfo[rootDeviceIndex].alignementSatisfied) { allocationInfo[rootDeviceIndex].allocateMemory = false; allocationInfo[rootDeviceIndex].zeroCopyAllowed = true; } else { allocationInfo[rootDeviceIndex].zeroCopyAllowed = false; allocationInfo[rootDeviceIndex].allocateMemory = true; } } } if (memoryProperties.flags.useHostPtr) { if (DebugManager.flags.DisableZeroCopyForUseHostPtr.get()) { allocationInfo[rootDeviceIndex].zeroCopyAllowed = false; allocationInfo[rootDeviceIndex].allocateMemory = true; } auto svmManager = context->getSVMAllocsManager(); if (svmManager) { auto svmData = svmManager->getSVMAlloc(hostPtr); if (svmData) { allocationInfo[rootDeviceIndex].memory = svmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex); allocationInfo[rootDeviceIndex].allocationType = allocationInfo[rootDeviceIndex].memory->getAllocationType(); allocationInfo[rootDeviceIndex].isHostPtrSVM = true; allocationInfo[rootDeviceIndex].zeroCopyAllowed = allocationInfo[rootDeviceIndex].memory->getAllocationType() == AllocationType::SVM_ZERO_COPY; allocationInfo[rootDeviceIndex].copyMemoryFromHostPtr = false; allocationInfo[rootDeviceIndex].allocateMemory = false; allocationInfo[rootDeviceIndex].mapAllocation = svmData->cpuAllocation; } } } if (context->isSharedContext) { allocationInfo[rootDeviceIndex].zeroCopyAllowed = true; allocationInfo[rootDeviceIndex].copyMemoryFromHostPtr = false; allocationInfo[rootDeviceIndex].allocateMemory = false; } if (hostPtr && context->isProvidingPerformanceHints()) { if (allocationInfo[rootDeviceIndex].zeroCopyAllowed) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_BUFFER_MEETS_ALIGNMENT_RESTRICTIONS, hostPtr, size); } else { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS, hostPtr, size, MemoryConstants::pageSize, MemoryConstants::pageSize); } } if (DebugManager.flags.DisableZeroCopyForBuffers.get()) { allocationInfo[rootDeviceIndex].zeroCopyAllowed = false; } if (allocationInfo[rootDeviceIndex].allocateMemory && context->isProvidingPerformanceHints()) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_BUFFER_NEEDS_ALLOCATE_MEMORY); } if (!allocationInfo[rootDeviceIndex].memory) { if (ptr) { allocationInfo[rootDeviceIndex].allocateMemory = false; AllocationProperties allocProperties = MemoryPropertiesHelper::getAllocationProperties(rootDeviceIndex, memoryProperties, allocationInfo[rootDeviceIndex].allocateMemory, size, allocationInfo[rootDeviceIndex].allocationType, context->areMultiStorageAllocationsPreferred(), *hwInfo, context->getDeviceBitfieldForAllocation(rootDeviceIndex), context->isSingleDeviceContext()); allocProperties.flags.crossRootDeviceAccess = context->getRootDeviceIndices().size() > 1; allocProperties.flags.preferCompressed = compressionEnabled; allocationInfo[rootDeviceIndex].memory = memoryManager->createGraphicsAllocationFromExistingStorage(allocProperties, ptr, multiGraphicsAllocation); } else { AllocationProperties allocProperties = MemoryPropertiesHelper::getAllocationProperties(rootDeviceIndex, memoryProperties, allocationInfo[rootDeviceIndex].allocateMemory, size, allocationInfo[rootDeviceIndex].allocationType, context->areMultiStorageAllocationsPreferred(), *hwInfo, context->getDeviceBitfieldForAllocation(rootDeviceIndex), context->isSingleDeviceContext()); allocProperties.flags.crossRootDeviceAccess = context->getRootDeviceIndices().size() > 1; allocProperties.flags.preferCompressed = compressionEnabled; allocationInfo[rootDeviceIndex].memory = memoryManager->allocateGraphicsMemoryWithProperties(allocProperties, hostPtr); if (allocationInfo[rootDeviceIndex].memory) { ptr = reinterpret_cast(allocationInfo[rootDeviceIndex].memory->getUnderlyingBuffer()); } } } if (allocationInfo[rootDeviceIndex].allocateMemory && allocationInfo[rootDeviceIndex].memory && MemoryPool::isSystemMemoryPool(allocationInfo[rootDeviceIndex].memory->getMemoryPool())) { memoryManager->addAllocationToHostPtrManager(allocationInfo[rootDeviceIndex].memory); } //if allocation failed for CL_MEM_USE_HOST_PTR case retry with non zero copy path if (memoryProperties.flags.useHostPtr && !allocationInfo[rootDeviceIndex].memory && Buffer::isReadOnlyMemoryPermittedByFlags(memoryProperties)) { allocationInfo[rootDeviceIndex].allocationType = AllocationType::BUFFER_HOST_MEMORY; allocationInfo[rootDeviceIndex].zeroCopyAllowed = false; allocationInfo[rootDeviceIndex].copyMemoryFromHostPtr = true; AllocationProperties allocProperties = MemoryPropertiesHelper::getAllocationProperties(rootDeviceIndex, memoryProperties, true, // allocateMemory size, allocationInfo[rootDeviceIndex].allocationType, context->areMultiStorageAllocationsPreferred(), *hwInfo, context->getDeviceBitfieldForAllocation(rootDeviceIndex), context->isSingleDeviceContext()); allocProperties.flags.crossRootDeviceAccess = context->getRootDeviceIndices().size() > 1; allocationInfo[rootDeviceIndex].memory = memoryManager->allocateGraphicsMemoryWithProperties(allocProperties); } if (!allocationInfo[rootDeviceIndex].memory) { errcodeRet = CL_OUT_OF_HOST_MEMORY; cleanAllGraphicsAllocations(*context, *memoryManager, allocationInfo, false); return nullptr; } if (!MemoryPool::isSystemMemoryPool(allocationInfo[rootDeviceIndex].memory->getMemoryPool())) { allocationInfo[rootDeviceIndex].zeroCopyAllowed = false; if (hostPtr) { if (!allocationInfo[rootDeviceIndex].isHostPtrSVM) { allocationInfo[rootDeviceIndex].copyMemoryFromHostPtr = true; } } } else if (allocationInfo[rootDeviceIndex].allocationType == AllocationType::BUFFER && !compressionEnabled) { allocationInfo[rootDeviceIndex].allocationType = AllocationType::BUFFER_HOST_MEMORY; } allocationInfo[rootDeviceIndex].memory->setAllocationType(allocationInfo[rootDeviceIndex].allocationType); allocationInfo[rootDeviceIndex].memory->setMemObjectsAllocationWithWritableFlags(!(memoryProperties.flags.readOnly || memoryProperties.flags.hostReadOnly || memoryProperties.flags.hostNoAccess)); multiGraphicsAllocation.addAllocation(allocationInfo[rootDeviceIndex].memory); if (forceCopyHostPtr) { allocationInfo[rootDeviceIndex].copyMemoryFromHostPtr = false; } } auto rootDeviceIndex = context->getDevice(0u)->getRootDeviceIndex(); auto memoryStorage = multiGraphicsAllocation.getDefaultGraphicsAllocation()->getUnderlyingBuffer(); pBuffer = createBufferHw(context, memoryProperties, flags, flagsIntel, size, memoryStorage, (memoryProperties.flags.useHostPtr) ? hostPtr : nullptr, std::move(multiGraphicsAllocation), allocationInfo[rootDeviceIndex].zeroCopyAllowed, allocationInfo[rootDeviceIndex].isHostPtrSVM, false); if (!pBuffer) { errcodeRet = CL_OUT_OF_HOST_MEMORY; cleanAllGraphicsAllocations(*context, *memoryManager, allocationInfo, false); return nullptr; } DBG_LOG(LogMemoryObject, __FUNCTION__, "Created Buffer: Handle: ", pBuffer, ", hostPtr: ", hostPtr, ", size: ", size, ", memoryStorage: ", allocationInfo[rootDeviceIndex].memory->getUnderlyingBuffer(), ", GPU address: ", allocationInfo[rootDeviceIndex].memory->getGpuAddress(), ", memoryPool: ", allocationInfo[rootDeviceIndex].memory->getMemoryPool()); for (auto &rootDeviceIndex : context->getRootDeviceIndices()) { if (memoryProperties.flags.useHostPtr) { if (!allocationInfo[rootDeviceIndex].zeroCopyAllowed && !allocationInfo[rootDeviceIndex].isHostPtrSVM) { AllocationProperties properties{rootDeviceIndex, false, // allocateMemory size, AllocationType::MAP_ALLOCATION, false, // isMultiStorageAllocation context->getDeviceBitfieldForAllocation(rootDeviceIndex)}; properties.flags.flushL3RequiredForRead = properties.flags.flushL3RequiredForWrite = true; allocationInfo[rootDeviceIndex].mapAllocation = memoryManager->allocateGraphicsMemoryWithProperties(properties, hostPtr); } } Buffer::provideCompressionHint(allocationInfo[rootDeviceIndex].memory->isCompressionEnabled(), context, pBuffer); if (allocationInfo[rootDeviceIndex].mapAllocation) { pBuffer->mapAllocations.addAllocation(allocationInfo[rootDeviceIndex].mapAllocation); } pBuffer->setHostPtrMinSize(size); if (allocationInfo[rootDeviceIndex].copyMemoryFromHostPtr && !copyExecuted) { auto isLocalMemory = !MemoryPool::isSystemMemoryPool(allocationInfo[rootDeviceIndex].memory->getMemoryPool()); bool gpuCopyRequired = (allocationInfo[rootDeviceIndex].memory->isCompressionEnabled()) || isLocalMemory; if (gpuCopyRequired) { auto &device = pBuffer->getContext()->getDevice(0u)->getDevice(); auto &hwInfo = device.getHardwareInfo(); auto hwInfoConfig = HwInfoConfig::get(hwInfo.platform.eProductFamily); auto blitMemoryToAllocationResult = BlitOperationResult::Unsupported; if (hwInfoConfig->isBlitterFullySupported(hwInfo) && isLocalMemory) { blitMemoryToAllocationResult = BlitHelperFunctions::blitMemoryToAllocation(device, allocationInfo[rootDeviceIndex].memory, pBuffer->getOffset(), hostPtr, {size, 1, 1}); } if (blitMemoryToAllocationResult != BlitOperationResult::Success) { auto cmdQ = context->getSpecialQueue(rootDeviceIndex); if (CL_SUCCESS != cmdQ->enqueueWriteBuffer(pBuffer, CL_TRUE, 0, size, hostPtr, allocationInfo[rootDeviceIndex].mapAllocation, 0, nullptr, nullptr)) { errcodeRet = CL_OUT_OF_RESOURCES; } } copyExecuted = true; } else { memcpy_s(allocationInfo[rootDeviceIndex].memory->getUnderlyingBuffer(), size, hostPtr, size); copyExecuted = true; } } } if (errcodeRet != CL_SUCCESS) { pBuffer->release(); return nullptr; } if (DebugManager.flags.MakeAllBuffersResident.get()) { for (size_t deviceNum = 0u; deviceNum < context->getNumDevices(); deviceNum++) { auto device = context->getDevice(deviceNum); auto graphicsAllocation = pBuffer->getGraphicsAllocation(device->getRootDeviceIndex()); auto rootDeviceEnvironment = pBuffer->executionEnvironment->rootDeviceEnvironments[device->getRootDeviceIndex()].get(); rootDeviceEnvironment->memoryOperationsInterface->makeResident(&device->getDevice(), ArrayRef(&graphicsAllocation, 1)); } } return pBuffer; } Buffer *Buffer::createSharedBuffer(Context *context, cl_mem_flags flags, SharingHandler *sharingHandler, MultiGraphicsAllocation multiGraphicsAllocation) { auto rootDeviceIndex = context->getDevice(0)->getRootDeviceIndex(); auto size = multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex)->getUnderlyingBufferSize(); auto sharedBuffer = createBufferHw( context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, size, nullptr, nullptr, std::move(multiGraphicsAllocation), false, false, false); sharedBuffer->setSharingHandler(sharingHandler); return sharedBuffer; } void Buffer::checkMemory(MemoryProperties memoryProperties, size_t size, void *hostPtr, cl_int &errcodeRet, bool &alignementSatisfied, bool ©MemoryFromHostPtr, MemoryManager *memoryManager, uint32_t rootDeviceIndex, bool forceCopyHostPtr) { errcodeRet = CL_SUCCESS; alignementSatisfied = true; copyMemoryFromHostPtr = false; uintptr_t minAddress = 0; auto memRestrictions = memoryManager->getAlignedMallocRestrictions(); if (memRestrictions) { minAddress = memRestrictions->minAddress; } if (hostPtr) { if (!(memoryProperties.flags.useHostPtr || memoryProperties.flags.copyHostPtr || forceCopyHostPtr)) { errcodeRet = CL_INVALID_HOST_PTR; return; } } if (memoryProperties.flags.useHostPtr) { if (hostPtr) { auto fragment = memoryManager->getHostPtrManager()->getFragment({hostPtr, rootDeviceIndex}); if (fragment && fragment->driverAllocation) { errcodeRet = CL_INVALID_HOST_PTR; return; } if (alignUp(hostPtr, MemoryConstants::cacheLineSize) != hostPtr || alignUp(size, MemoryConstants::cacheLineSize) != size || minAddress > reinterpret_cast(hostPtr)) { alignementSatisfied = false; copyMemoryFromHostPtr = true; } } else { errcodeRet = CL_INVALID_HOST_PTR; } } if (memoryProperties.flags.copyHostPtr || forceCopyHostPtr) { if (hostPtr) { copyMemoryFromHostPtr = true; } else { errcodeRet = CL_INVALID_HOST_PTR; } } return; } AllocationType Buffer::getGraphicsAllocationTypeAndCompressionPreference(const MemoryProperties &properties, Context &context, bool &compressionEnabled, bool isLocalMemoryEnabled) { if (context.isSharedContext || properties.flags.forceHostMemory) { compressionEnabled = false; return AllocationType::BUFFER_HOST_MEMORY; } if (properties.flags.useHostPtr && !isLocalMemoryEnabled) { compressionEnabled = false; return AllocationType::BUFFER_HOST_MEMORY; } return AllocationType::BUFFER; } bool Buffer::isReadOnlyMemoryPermittedByFlags(const MemoryProperties &properties) { // Host won't access or will only read and kernel will only read return (properties.flags.hostNoAccess || properties.flags.hostReadOnly) && properties.flags.readOnly; } Buffer *Buffer::createSubBuffer(cl_mem_flags flags, cl_mem_flags_intel flagsIntel, const cl_buffer_region *region, cl_int &errcodeRet) { DEBUG_BREAK_IF(nullptr == createFunction); MemoryProperties memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, &this->context->getDevice(0)->getDevice()); auto buffer = createFunction(this->context, memoryProperties, flags, 0, region->size, ptrOffset(this->memoryStorage, region->origin), this->hostPtr ? ptrOffset(this->hostPtr, region->origin) : nullptr, this->multiGraphicsAllocation, this->isZeroCopy, this->isHostPtrSVM, false); if (this->context->isProvidingPerformanceHints()) { this->context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, SUBBUFFER_SHARES_MEMORY, static_cast(this)); } buffer->associatedMemObject = this; buffer->offset = region->origin; buffer->setParentSharingHandler(this->getSharingHandler()); this->incRefInternal(); errcodeRet = CL_SUCCESS; return buffer; } uint64_t Buffer::setArgStateless(void *memory, uint32_t patchSize, uint32_t rootDeviceIndex, bool set32BitAddressing) { // Subbuffers have offset that graphicsAllocation is not aware of auto graphicsAllocation = multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex); uintptr_t addressToPatch = ((set32BitAddressing) ? static_cast(graphicsAllocation->getGpuAddressToPatch()) : static_cast(graphicsAllocation->getGpuAddress())) + this->offset; DEBUG_BREAK_IF(!(graphicsAllocation->isLocked() || (addressToPatch != 0) || (graphicsAllocation->getGpuBaseAddress() != 0) || (this->getCpuAddress() == nullptr && graphicsAllocation->peekSharedHandle()))); patchWithRequiredSize(memory, patchSize, addressToPatch); return addressToPatch; } bool Buffer::bufferRectPitchSet(const size_t *bufferOrigin, const size_t *region, size_t &bufferRowPitch, size_t &bufferSlicePitch, size_t &hostRowPitch, size_t &hostSlicePitch, bool isSrcBuffer) { if (bufferRowPitch == 0) bufferRowPitch = region[0]; if (bufferSlicePitch == 0) bufferSlicePitch = region[1] * bufferRowPitch; if (hostRowPitch == 0) hostRowPitch = region[0]; if (hostSlicePitch == 0) hostSlicePitch = region[1] * hostRowPitch; if (region[0] == 0 || region[1] == 0 || region[2] == 0) { return false; } if (bufferRowPitch < region[0] || hostRowPitch < region[0]) { return false; } if ((bufferSlicePitch < region[1] * bufferRowPitch || bufferSlicePitch % bufferRowPitch != 0) || (hostSlicePitch < region[1] * hostRowPitch || hostSlicePitch % hostRowPitch != 0)) { return false; } auto slicePitch = isSrcBuffer ? bufferSlicePitch : hostSlicePitch; auto rowPitch = isSrcBuffer ? bufferRowPitch : hostRowPitch; if ((bufferOrigin[2] + region[2] - 1) * slicePitch + (bufferOrigin[1] + region[1] - 1) * rowPitch + bufferOrigin[0] + region[0] > this->getSize()) { return false; } return true; } void Buffer::transferData(void *dst, void *src, size_t copySize, size_t copyOffset) { DBG_LOG(LogMemoryObject, __FUNCTION__, " hostPtr: ", hostPtr, ", size: ", copySize, ", offset: ", copyOffset, ", memoryStorage: ", memoryStorage); auto dstPtr = ptrOffset(dst, copyOffset); auto srcPtr = ptrOffset(src, copyOffset); memcpy_s(dstPtr, copySize, srcPtr, copySize); } void Buffer::transferDataToHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) { transferData(hostPtr, memoryStorage, copySize[0], copyOffset[0]); } void Buffer::transferDataFromHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) { transferData(memoryStorage, hostPtr, copySize[0], copyOffset[0]); } size_t Buffer::calculateHostPtrSize(const size_t *origin, const size_t *region, size_t rowPitch, size_t slicePitch) { size_t hostPtrOffsetInBytes = origin[2] * slicePitch + origin[1] * rowPitch + origin[0]; size_t hostPtrRegionSizeInbytes = region[0] + rowPitch * (region[1] - 1) + slicePitch * (region[2] - 1); size_t hostPtrSize = hostPtrOffsetInBytes + hostPtrRegionSizeInbytes; return hostPtrSize; } bool Buffer::isReadWriteOnCpuAllowed(const Device &device) { if (forceDisallowCPUCopy) { return false; } auto rootDeviceIndex = device.getRootDeviceIndex(); if (this->isCompressed(rootDeviceIndex)) { return false; } auto graphicsAllocation = multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex); if (graphicsAllocation->peekSharedHandle() != 0) { return false; } if (graphicsAllocation->isAllocatedInLocalMemoryPool()) { return false; } return true; } bool Buffer::isReadWriteOnCpuPreferred(void *ptr, size_t size, const Device &device) { auto graphicsAllocation = multiGraphicsAllocation.getGraphicsAllocation(device.getRootDeviceIndex()); if (MemoryPool::isSystemMemoryPool(graphicsAllocation->getMemoryPool())) { //if buffer is not zero copy and pointer is aligned it will be more beneficial to do the transfer on GPU if (!isMemObjZeroCopy() && (reinterpret_cast(ptr) & (MemoryConstants::cacheLineSize - 1)) == 0) { return false; } //on low power devices larger transfers are better on the GPU if (device.getSpecializedDevice()->getDeviceInfo().platformLP && size > maxBufferSizeForReadWriteOnCpu) { return false; } return true; } return false; } Buffer *Buffer::createBufferHw(Context *context, MemoryProperties memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *memoryStorage, void *hostPtr, MultiGraphicsAllocation multiGraphicsAllocation, bool zeroCopy, bool isHostPtrSVM, bool isImageRedescribed) { const auto device = context->getDevice(0); const auto &hwInfo = device->getHardwareInfo(); auto funcCreate = bufferFactory[hwInfo.platform.eRenderCoreFamily].createBufferFunction; DEBUG_BREAK_IF(nullptr == funcCreate); auto pBuffer = funcCreate(context, memoryProperties, flags, flagsIntel, size, memoryStorage, hostPtr, std::move(multiGraphicsAllocation), zeroCopy, isHostPtrSVM, isImageRedescribed); DEBUG_BREAK_IF(nullptr == pBuffer); if (pBuffer) { pBuffer->createFunction = funcCreate; } return pBuffer; } Buffer *Buffer::createBufferHwFromDevice(const Device *device, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *memoryStorage, void *hostPtr, MultiGraphicsAllocation multiGraphicsAllocation, size_t offset, bool zeroCopy, bool isHostPtrSVM, bool isImageRedescribed) { const auto &hwInfo = device->getHardwareInfo(); auto funcCreate = bufferFactory[hwInfo.platform.eRenderCoreFamily].createBufferFunction; DEBUG_BREAK_IF(nullptr == funcCreate); MemoryProperties memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, device); auto pBuffer = funcCreate(nullptr, memoryProperties, flags, flagsIntel, size, memoryStorage, hostPtr, std::move(multiGraphicsAllocation), zeroCopy, isHostPtrSVM, isImageRedescribed); pBuffer->offset = offset; pBuffer->executionEnvironment = device->getExecutionEnvironment(); return pBuffer; } uint32_t Buffer::getMocsValue(bool disableL3Cache, bool isReadOnlyArgument, uint32_t rootDeviceIndex) const { uint64_t bufferAddress = 0; size_t bufferSize = 0; auto graphicsAllocation = multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex); if (graphicsAllocation) { bufferAddress = graphicsAllocation->getGpuAddress(); bufferSize = graphicsAllocation->getUnderlyingBufferSize(); } else { bufferAddress = reinterpret_cast(getHostPtr()); bufferSize = getSize(); } bufferAddress += this->offset; bool readOnlyMemObj = isValueSet(getFlags(), CL_MEM_READ_ONLY) || isReadOnlyArgument; bool alignedMemObj = isAligned(bufferAddress) && isAligned(bufferSize); auto gmmHelper = executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->getGmmHelper(); if (!disableL3Cache && !isMemObjUncacheableForSurfaceState() && (alignedMemObj || readOnlyMemObj || !isMemObjZeroCopy())) { return gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); } else { return gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED); } } uint32_t Buffer::getSurfaceSize(bool alignSizeForAuxTranslation, uint32_t rootDeviceIndex) const { auto bufferAddress = getBufferAddress(rootDeviceIndex); auto bufferAddressAligned = alignDown(bufferAddress, 4); auto bufferOffset = ptrDiff(bufferAddress, bufferAddressAligned); uint32_t surfaceSize = static_cast(alignUp(getSize() + bufferOffset, alignSizeForAuxTranslation ? 512 : 4)); return surfaceSize; } uint64_t Buffer::getBufferAddress(uint32_t rootDeviceIndex) const { // The graphics allocation for Host Ptr surface will be created in makeResident call and GPU address is expected to be the same as CPU address auto graphicsAllocation = multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex); auto bufferAddress = (graphicsAllocation != nullptr) ? graphicsAllocation->getGpuAddress() : castToUint64(getHostPtr()); bufferAddress += this->offset; return bufferAddress; } bool Buffer::isCompressed(uint32_t rootDeviceIndex) const { return multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex)->isCompressionEnabled(); } void Buffer::setSurfaceState(const Device *device, void *surfaceState, bool forceNonAuxMode, bool disableL3, size_t svmSize, void *svmPtr, size_t offset, GraphicsAllocation *gfxAlloc, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, bool useGlobalAtomics, bool areMultipleSubDevicesInContext) { auto multiGraphicsAllocation = MultiGraphicsAllocation(device->getRootDeviceIndex()); if (gfxAlloc) { multiGraphicsAllocation.addAllocation(gfxAlloc); } auto buffer = Buffer::createBufferHwFromDevice(device, flags, flagsIntel, svmSize, svmPtr, svmPtr, std::move(multiGraphicsAllocation), offset, true, false, false); buffer->setArgStateful(surfaceState, forceNonAuxMode, disableL3, false, false, *device, useGlobalAtomics, areMultipleSubDevicesInContext); delete buffer; } void Buffer::provideCompressionHint(bool compressionEnabled, Context *context, Buffer *buffer) { if (context->isProvidingPerformanceHints() && HwHelper::compressedBuffersSupported(context->getDevice(0)->getHardwareInfo())) { if (compressionEnabled) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, BUFFER_IS_COMPRESSED, buffer); } else { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, BUFFER_IS_NOT_COMPRESSED, buffer); } } } } // namespace NEO compute-runtime-22.14.22890/opencl/source/mem_obj/buffer.h000066400000000000000000000261271422164147700231740ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/constants.h" #include "opencl/extensions/public/cl_ext_private.h" #include "opencl/source/context/context_type.h" #include "opencl/source/mem_obj/mem_obj.h" #include "igfxfmid.h" #include "memory_properties_flags.h" #include namespace NEO { class Buffer; class ClDevice; class Device; class MemoryManager; struct EncodeSurfaceStateArgs; using BufferCreatFunc = Buffer *(*)(Context *context, MemoryProperties memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *memoryStorage, void *hostPtr, MultiGraphicsAllocation multiGraphicsAllocation, bool zeroCopy, bool isHostPtrSVM, bool isImageRedescribed); struct BufferFactoryFuncs { BufferCreatFunc createBufferFunction; }; extern BufferFactoryFuncs bufferFactory[IGFX_MAX_CORE]; namespace BufferFunctions { using ValidateInputAndCreateBufferFunc = std::function; extern ValidateInputAndCreateBufferFunc validateInputAndCreateBuffer; } // namespace BufferFunctions class Buffer : public MemObj { public: constexpr static size_t maxBufferSizeForReadWriteOnCpu = 10 * MB; constexpr static cl_ulong maskMagic = 0xFFFFFFFFFFFFFFFFLL; constexpr static cl_ulong objectMagic = MemObj::objectMagic | 0x02; bool forceDisallowCPUCopy = false; ~Buffer() override; static cl_mem validateInputAndCreateBuffer(cl_context context, const cl_mem_properties *properties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *hostPtr, cl_int &retVal); static Buffer *create(Context *context, cl_mem_flags flags, size_t size, void *hostPtr, cl_int &errcodeRet); static Buffer *create(Context *context, MemoryProperties properties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *hostPtr, cl_int &errcodeRet); static Buffer *createSharedBuffer(Context *context, cl_mem_flags flags, SharingHandler *sharingHandler, MultiGraphicsAllocation multiGraphicsAllocation); static Buffer *createBufferHw(Context *context, MemoryProperties memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *memoryStorage, void *hostPtr, MultiGraphicsAllocation multiGraphicsAllocation, bool zeroCopy, bool isHostPtrSVM, bool isImageRedescribed); static Buffer *createBufferHwFromDevice(const Device *device, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *memoryStorage, void *hostPtr, MultiGraphicsAllocation multiGraphicsAllocation, size_t offset, bool zeroCopy, bool isHostPtrSVM, bool isImageRedescribed); Buffer *createSubBuffer(cl_mem_flags flags, cl_mem_flags_intel flagsIntel, const cl_buffer_region *region, cl_int &errcodeRet); static void setSurfaceState(const Device *device, void *surfaceState, bool forceNonAuxMode, bool disableL3, size_t svmSize, void *svmPtr, size_t offset, GraphicsAllocation *gfxAlloc, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, bool useGlobalAtomics, bool areMultipleSubDevicesInContext); static void provideCompressionHint(bool compressionEnabled, Context *context, Buffer *buffer); BufferCreatFunc createFunction = nullptr; bool isSubBuffer(); bool isValidSubBufferOffset(size_t offset); uint64_t setArgStateless(void *memory, uint32_t patchSize, uint32_t rootDeviceIndex, bool set32BitAddressing); virtual void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device, bool useGlobalAtomics, bool areMultipleSubDevicesInContext) = 0; bool bufferRectPitchSet(const size_t *bufferOrigin, const size_t *region, size_t &bufferRowPitch, size_t &bufferSlicePitch, size_t &hostRowPitch, size_t &hostSlicePitch, bool isSrcBuffer); static size_t calculateHostPtrSize(const size_t *origin, const size_t *region, size_t rowPitch, size_t slicePitch); void transferDataToHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) override; void transferDataFromHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) override; bool isReadWriteOnCpuAllowed(const Device &device); bool isReadWriteOnCpuPreferred(void *ptr, size_t size, const Device &device); uint32_t getMocsValue(bool disableL3Cache, bool isReadOnlyArgument, uint32_t rootDeviceIndex) const; uint32_t getSurfaceSize(bool alignSizeForAuxTranslation, uint32_t rootDeviceIndex) const; uint64_t getBufferAddress(uint32_t rootDeviceIndex) const; bool isCompressed(uint32_t rootDeviceIndex) const; protected: Buffer(Context *context, MemoryProperties memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *memoryStorage, void *hostPtr, MultiGraphicsAllocation multiGraphicsAllocation, bool zeroCopy, bool isHostPtrSVM, bool isObjectRedescribed); Buffer(); static void checkMemory(MemoryProperties memoryProperties, size_t size, void *hostPtr, cl_int &errcodeRet, bool &isZeroCopy, bool ©MemoryFromHostPtr, MemoryManager *memMngr, uint32_t rootDeviceIndex, bool forceCopyHostPtr); static AllocationType getGraphicsAllocationTypeAndCompressionPreference(const MemoryProperties &properties, Context &context, bool &compressionEnabled, bool localMemoryEnabled); static bool isReadOnlyMemoryPermittedByFlags(const MemoryProperties &properties); void transferData(void *dst, void *src, size_t copySize, size_t copyOffset); void appendSurfaceStateArgs(EncodeSurfaceStateArgs &args); }; template class BufferHw : public Buffer { public: BufferHw(Context *context, MemoryProperties memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *memoryStorage, void *hostPtr, MultiGraphicsAllocation multiGraphicsAllocation, bool zeroCopy, bool isHostPtrSVM, bool isObjectRedescribed) : Buffer(context, memoryProperties, flags, flagsIntel, size, memoryStorage, hostPtr, std::move(multiGraphicsAllocation), zeroCopy, isHostPtrSVM, isObjectRedescribed) {} void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnlyArgument, const Device &device, bool useGlobalAtomics, bool areMultipleSubDevicesInContext) override; static Buffer *create(Context *context, MemoryProperties memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *memoryStorage, void *hostPtr, MultiGraphicsAllocation multiGraphicsAllocation, bool zeroCopy, bool isHostPtrSVM, bool isObjectRedescribed) { auto buffer = new BufferHw(context, memoryProperties, flags, flagsIntel, size, memoryStorage, hostPtr, std::move(multiGraphicsAllocation), zeroCopy, isHostPtrSVM, isObjectRedescribed); buffer->surfaceType = SURFACE_STATE::SURFACE_TYPE_SURFTYPE_1D; return buffer; } typedef typename GfxFamily::RENDER_SURFACE_STATE SURFACE_STATE; typename SURFACE_STATE::SURFACE_TYPE surfaceType; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/mem_obj/buffer_base.inl000066400000000000000000000047041422164147700245160ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_container/implicit_scaling.h" #include "shared/source/device/device.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/gmm_helper/resource_info.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/bit_helpers.h" #include "shared/source/helpers/populate_factory.h" #include "opencl/source/helpers/surface_formats.h" #include "opencl/source/mem_obj/buffer.h" #include "hw_cmds.h" namespace NEO { union SURFACE_STATE_BUFFER_LENGTH { uint32_t Length; struct SurfaceState { uint32_t Width : BITFIELD_RANGE(0, 6); uint32_t Height : BITFIELD_RANGE(7, 20); uint32_t Depth : BITFIELD_RANGE(21, 31); } SurfaceState; }; template void BufferHw::setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnlyArgument, const Device &device, bool useGlobalAtomics, bool areMultipleSubDevicesInContext) { auto rootDeviceIndex = device.getRootDeviceIndex(); auto graphicsAllocation = multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex); const auto isReadOnly = isValueSet(getFlags(), CL_MEM_READ_ONLY) || isReadOnlyArgument; NEO::EncodeSurfaceStateArgs args; args.outMemory = memory; args.graphicsAddress = getBufferAddress(rootDeviceIndex); args.size = getSurfaceSize(alignSizeForAuxTranslation, rootDeviceIndex); args.mocs = getMocsValue(disableL3, isReadOnly, rootDeviceIndex); args.cpuCoherent = true; args.forceNonAuxMode = forceNonAuxMode; args.isReadOnly = isReadOnly; args.numAvailableDevices = device.getNumGenericSubDevices(); args.allocation = graphicsAllocation; args.gmmHelper = device.getGmmHelper(); args.useGlobalAtomics = useGlobalAtomics; args.areMultipleSubDevicesInContext = areMultipleSubDevicesInContext; args.implicitScaling = ImplicitScalingHelper::isImplicitScalingEnabled(device.getDeviceBitfield(), true); appendSurfaceStateArgs(args); EncodeSurfaceState::encodeBuffer(args); } } // namespace NEO compute-runtime-22.14.22890/opencl/source/mem_obj/buffer_factory_init.inl000066400000000000000000000004411422164147700262700ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ template <> void populateFactoryTable>() { extern BufferFactoryFuncs bufferFactory[IGFX_MAX_CORE]; bufferFactory[gfxCore].createBufferFunction = BufferHw::create; } compute-runtime-22.14.22890/opencl/source/mem_obj/definitions/000077500000000000000000000000001422164147700240555ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/mem_obj/definitions/buffer_ext.cpp000066400000000000000000000004311422164147700267100ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/mem_obj/buffer.h" #include "encode_surface_state_args.h" namespace NEO { void Buffer::appendSurfaceStateArgs(EncodeSurfaceStateArgs &args) { } } // namespace NEO compute-runtime-22.14.22890/opencl/source/mem_obj/definitions/image_ext.inl000066400000000000000000000004111422164147700265170ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/mem_obj/image.h" namespace NEO { template void ImageHw::appendSurfaceStateExt(void *memory) { } } // namespace NEO compute-runtime-22.14.22890/opencl/source/mem_obj/image.cpp000066400000000000000000002123241422164147700233340ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/mem_obj/image.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/resource_info.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/get_info.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/helpers/string.h" #include "shared/source/memory_manager/memory_manager.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/cl_device/cl_device_get_cap.inl" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "opencl/source/helpers/get_info_status_mapper.h" #include "opencl/source/helpers/gmm_types_converter.h" #include "opencl/source/helpers/mipmap.h" #include "opencl/source/helpers/surface_formats.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/mem_obj_helper.h" #include "opencl/source/platform/platform.h" #include "igfxfmid.h" #include namespace NEO { ImageFactoryFuncs imageFactory[IGFX_MAX_CORE] = {}; namespace ImageFunctions { ValidateAndCreateImageFunc validateAndCreateImage = Image::validateAndCreateImage; } // namespace ImageFunctions Image::Image(Context *context, const MemoryProperties &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *memoryStorage, void *hostPtr, cl_image_format imageFormat, const cl_image_desc &imageDesc, bool zeroCopy, MultiGraphicsAllocation multiGraphicsAllocation, bool isObjectRedescribed, uint32_t baseMipLevel, uint32_t mipCount, const ClSurfaceFormatInfo &surfaceFormatInfo, const SurfaceOffsets *surfaceOffsets) : MemObj(context, imageDesc.image_type, memoryProperties, flags, flagsIntel, size, memoryStorage, hostPtr, std::move(multiGraphicsAllocation), zeroCopy, false, isObjectRedescribed), createFunction(nullptr), imageFormat(std::move(imageFormat)), imageDesc(imageDesc), surfaceFormatInfo(surfaceFormatInfo), cubeFaceIndex(__GMM_NO_CUBE_MAP), mediaPlaneType(0), baseMipLevel(baseMipLevel), mipCount(mipCount) { magic = objectMagic; if (surfaceOffsets) setSurfaceOffsets(surfaceOffsets->offset, surfaceOffsets->xOffset, surfaceOffsets->yOffset, surfaceOffsets->yOffsetForUVplane); else setSurfaceOffsets(0, 0, 0, 0); } void Image::transferData(void *dest, size_t destRowPitch, size_t destSlicePitch, void *src, size_t srcRowPitch, size_t srcSlicePitch, std::array copyRegion, std::array copyOrigin) { size_t pixelSize = surfaceFormatInfo.surfaceFormat.ImageElementSizeInBytes; size_t lineWidth = copyRegion[0] * pixelSize; DBG_LOG(LogMemoryObject, __FUNCTION__, "memcpy dest:", dest, "sizeRowToCopy:", lineWidth, "src:", src); if (imageDesc.image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { // For 1DArray type, array region and origin are stored on 2nd position. For 2Darray its on 3rd position. std::swap(copyOrigin[1], copyOrigin[2]); std::swap(copyRegion[1], copyRegion[2]); } for (size_t slice = copyOrigin[2]; slice < (copyOrigin[2] + copyRegion[2]); slice++) { auto srcSliceOffset = ptrOffset(src, srcSlicePitch * slice); auto dstSliceOffset = ptrOffset(dest, destSlicePitch * slice); for (size_t height = copyOrigin[1]; height < (copyOrigin[1] + copyRegion[1]); height++) { auto srcRowOffset = ptrOffset(srcSliceOffset, srcRowPitch * height); auto dstRowOffset = ptrOffset(dstSliceOffset, destRowPitch * height); memcpy_s(ptrOffset(dstRowOffset, copyOrigin[0] * pixelSize), lineWidth, ptrOffset(srcRowOffset, copyOrigin[0] * pixelSize), lineWidth); } } } Image::~Image() = default; Image *Image::create(Context *context, const MemoryProperties &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, const ClSurfaceFormatInfo *surfaceFormat, const cl_image_desc *imageDesc, const void *hostPtr, cl_int &errcodeRet) { UNRECOVERABLE_IF(surfaceFormat == nullptr); Image *image = nullptr; MemoryManager *memoryManager = context->getMemoryManager(); Buffer *parentBuffer = castToObject(imageDesc->mem_object); Image *parentImage = castToObject(imageDesc->mem_object); auto &defaultHwHelper = HwHelper::get(context->getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily); bool transferedMemory = false; do { size_t imageWidth = imageDesc->image_width; size_t imageHeight = 1; size_t imageDepth = 1; size_t imageCount = 1; size_t hostPtrMinSize = 0; cl_image_desc imageDescriptor = *imageDesc; ImageInfo imgInfo = {}; void *hostPtrToSet = nullptr; if (memoryProperties.flags.useHostPtr) { hostPtrToSet = const_cast(hostPtr); } imgInfo.imgDesc = Image::convertDescriptor(imageDescriptor); imgInfo.surfaceFormat = &surfaceFormat->surfaceFormat; imgInfo.mipCount = imageDesc->num_mip_levels; if (imageDesc->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY || imageDesc->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) { imageCount = imageDesc->image_array_size; } switch (imageDesc->image_type) { case CL_MEM_OBJECT_IMAGE3D: imageDepth = imageDesc->image_depth; [[fallthrough]]; case CL_MEM_OBJECT_IMAGE2D: case CL_MEM_OBJECT_IMAGE2D_ARRAY: imageHeight = imageDesc->image_height; case CL_MEM_OBJECT_IMAGE1D: case CL_MEM_OBJECT_IMAGE1D_ARRAY: case CL_MEM_OBJECT_IMAGE1D_BUFFER: break; default: DEBUG_BREAK_IF("Unsupported cl_image_type"); break; } if (parentImage) { imageWidth = parentImage->getImageDesc().image_width; imageHeight = parentImage->getImageDesc().image_height; imageDepth = 1; if (isNV12Image(&parentImage->getImageFormat())) { if (imageDesc->image_depth == 1) { // UV Plane imageWidth /= 2; imageHeight /= 2; imgInfo.plane = GMM_PLANE_U; } else { imgInfo.plane = GMM_PLANE_Y; } } imgInfo.surfaceFormat = &parentImage->surfaceFormatInfo.surfaceFormat; imageDescriptor = parentImage->getImageDesc(); } auto hostPtrRowPitch = imageDesc->image_row_pitch ? imageDesc->image_row_pitch : imageWidth * surfaceFormat->surfaceFormat.ImageElementSizeInBytes; auto hostPtrSlicePitch = imageDesc->image_slice_pitch ? imageDesc->image_slice_pitch : hostPtrRowPitch * imageHeight; auto &clHwHelper = ClHwHelper::get(context->getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily); imgInfo.linearStorage = !defaultHwHelper.tilingAllowed(context->isSharedContext, Image::isImage1d(*imageDesc), memoryProperties.flags.forceLinearStorage); bool preferCompression = MemObjHelper::isSuitableForCompression(!imgInfo.linearStorage, memoryProperties, *context, true); preferCompression &= clHwHelper.allowImageCompression(surfaceFormat->OCLImageFormat); preferCompression &= !clHwHelper.isFormatRedescribable(surfaceFormat->OCLImageFormat); if (!context->getDevice(0)->getSharedDeviceInfo().imageSupport && !imgInfo.linearStorage) { errcodeRet = CL_INVALID_OPERATION; return nullptr; } switch (imageDesc->image_type) { case CL_MEM_OBJECT_IMAGE3D: hostPtrMinSize = hostPtrSlicePitch * imageDepth; break; case CL_MEM_OBJECT_IMAGE2D: if (isNV12Image(&surfaceFormat->OCLImageFormat)) { hostPtrMinSize = hostPtrRowPitch * imageHeight + hostPtrRowPitch * imageHeight / 2; } else { hostPtrMinSize = hostPtrRowPitch * imageHeight; } hostPtrSlicePitch = 0; break; case CL_MEM_OBJECT_IMAGE1D_ARRAY: case CL_MEM_OBJECT_IMAGE2D_ARRAY: hostPtrMinSize = hostPtrSlicePitch * imageCount; break; case CL_MEM_OBJECT_IMAGE1D: case CL_MEM_OBJECT_IMAGE1D_BUFFER: hostPtrMinSize = hostPtrRowPitch; hostPtrSlicePitch = 0; break; default: DEBUG_BREAK_IF("Unsupported cl_image_type"); break; } auto maxRootDeviceIndex = context->getMaxRootDeviceIndex(); auto multiGraphicsAllocation = MultiGraphicsAllocation(maxRootDeviceIndex); AllocationInfoType allocationInfo; allocationInfo.resize(maxRootDeviceIndex + 1u); bool isParentObject = parentBuffer || parentImage; for (auto &rootDeviceIndex : context->getRootDeviceIndices()) { allocationInfo[rootDeviceIndex] = {}; allocationInfo[rootDeviceIndex].zeroCopyAllowed = false; Gmm *gmm = nullptr; auto &hwInfo = *memoryManager->peekExecutionEnvironment().rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo(); auto &hwHelper = HwHelper::get((&memoryManager->peekExecutionEnvironment())->rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo()->platform.eRenderCoreFamily); auto clientContext = (&memoryManager->peekExecutionEnvironment())->rootDeviceEnvironments[rootDeviceIndex]->getGmmClientContext(); if (((imageDesc->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) || (imageDesc->image_type == CL_MEM_OBJECT_IMAGE2D)) && (parentBuffer != nullptr)) { allocationInfo[rootDeviceIndex].memory = parentBuffer->getGraphicsAllocation(rootDeviceIndex); if (!hwHelper.checkResourceCompatibility(*allocationInfo[rootDeviceIndex].memory)) { cleanAllGraphicsAllocations(*context, *memoryManager, allocationInfo, isParentObject); errcodeRet = CL_INVALID_MEM_OBJECT; return nullptr; } // Image from buffer - we never allocate memory, we use what buffer provides allocationInfo[rootDeviceIndex].zeroCopyAllowed = true; hostPtr = parentBuffer->getHostPtr(); hostPtrToSet = const_cast(hostPtr); GmmTypesConverter::queryImgFromBufferParams(imgInfo, allocationInfo[rootDeviceIndex].memory); UNRECOVERABLE_IF(imgInfo.offset != 0); imgInfo.offset = parentBuffer->getOffset(); if (memoryManager->peekVirtualPaddingSupport() && (imageDesc->image_type == CL_MEM_OBJECT_IMAGE2D) && (allocationInfo[rootDeviceIndex].memory->getUnderlyingBuffer() != 0)) { // Retrieve sizes from GMM and apply virtual padding if buffer storage is not big enough auto queryGmmImgInfo(imgInfo); auto gmm = std::make_unique(clientContext, queryGmmImgInfo, StorageInfo{}, preferCompression); auto gmmAllocationSize = gmm->gmmResourceInfo->getSizeAllocation(); if (gmmAllocationSize > allocationInfo[rootDeviceIndex].memory->getUnderlyingBufferSize()) { allocationInfo[rootDeviceIndex].memory = memoryManager->createGraphicsAllocationWithPadding(allocationInfo[rootDeviceIndex].memory, gmmAllocationSize); } } } else if (parentImage != nullptr) { allocationInfo[rootDeviceIndex].memory = parentImage->getGraphicsAllocation(rootDeviceIndex); allocationInfo[rootDeviceIndex].memory->getDefaultGmm()->queryImageParams(imgInfo); } else { errcodeRet = CL_OUT_OF_HOST_MEMORY; if (memoryProperties.flags.useHostPtr) { if (!context->isSharedContext) { AllocationProperties allocProperties = MemObjHelper::getAllocationPropertiesWithImageInfo(rootDeviceIndex, imgInfo, false, // allocateMemory memoryProperties, hwInfo, context->getDeviceBitfieldForAllocation(rootDeviceIndex), context->isSingleDeviceContext()); allocProperties.flags.preferCompressed = preferCompression; allocationInfo[rootDeviceIndex].memory = memoryManager->allocateGraphicsMemoryWithProperties(allocProperties, hostPtr); if (allocationInfo[rootDeviceIndex].memory) { if (allocationInfo[rootDeviceIndex].memory->getUnderlyingBuffer() != hostPtr) { allocationInfo[rootDeviceIndex].zeroCopyAllowed = false; allocationInfo[rootDeviceIndex].transferNeeded = true; } else { allocationInfo[rootDeviceIndex].zeroCopyAllowed = true; } } } else { gmm = new Gmm(clientContext, imgInfo, StorageInfo{}, preferCompression); allocationInfo[rootDeviceIndex].memory = memoryManager->allocateGraphicsMemoryWithProperties({rootDeviceIndex, false, // allocateMemory imgInfo.size, AllocationType::SHARED_CONTEXT_IMAGE, false, // isMultiStorageAllocation context->getDeviceBitfieldForAllocation(rootDeviceIndex)}, hostPtr); allocationInfo[rootDeviceIndex].memory->setDefaultGmm(gmm); allocationInfo[rootDeviceIndex].zeroCopyAllowed = true; } if (!allocationInfo[rootDeviceIndex].zeroCopyAllowed) { if (allocationInfo[rootDeviceIndex].memory) { AllocationProperties properties{rootDeviceIndex, false, // allocateMemory hostPtrMinSize, AllocationType::MAP_ALLOCATION, false, // isMultiStorageAllocation context->getDeviceBitfieldForAllocation(rootDeviceIndex)}; properties.flags.flushL3RequiredForRead = properties.flags.flushL3RequiredForWrite = true; properties.flags.preferCompressed = preferCompression; allocationInfo[rootDeviceIndex].mapAllocation = memoryManager->allocateGraphicsMemoryWithProperties(properties, hostPtr); } } } else { AllocationProperties allocProperties = MemObjHelper::getAllocationPropertiesWithImageInfo(rootDeviceIndex, imgInfo, true, // allocateMemory memoryProperties, hwInfo, context->getDeviceBitfieldForAllocation(rootDeviceIndex), context->isSingleDeviceContext()); allocProperties.flags.preferCompressed = preferCompression; allocationInfo[rootDeviceIndex].memory = memoryManager->allocateGraphicsMemoryWithProperties(allocProperties); if (allocationInfo[rootDeviceIndex].memory && MemoryPool::isSystemMemoryPool(allocationInfo[rootDeviceIndex].memory->getMemoryPool())) { allocationInfo[rootDeviceIndex].zeroCopyAllowed = true; } } } allocationInfo[rootDeviceIndex].transferNeeded |= memoryProperties.flags.copyHostPtr; if (!allocationInfo[rootDeviceIndex].memory) { cleanAllGraphicsAllocations(*context, *memoryManager, allocationInfo, isParentObject); return image; } if (parentBuffer == nullptr) { allocationInfo[rootDeviceIndex].memory->setAllocationType(AllocationType::IMAGE); } allocationInfo[rootDeviceIndex].memory->setMemObjectsAllocationWithWritableFlags(!memoryProperties.flags.readOnly && !memoryProperties.flags.hostReadOnly && !memoryProperties.flags.hostNoAccess); DBG_LOG(LogMemoryObject, __FUNCTION__, "hostPtr:", hostPtr, "size:", allocationInfo[rootDeviceIndex].memory->getUnderlyingBufferSize(), "memoryStorage:", allocationInfo[rootDeviceIndex].memory->getUnderlyingBuffer(), "GPU address:", std::hex, allocationInfo[rootDeviceIndex].memory->getGpuAddress()); if (parentImage) { imageDescriptor.image_height = imageHeight; imageDescriptor.image_width = imageWidth; imageDescriptor.image_type = CL_MEM_OBJECT_IMAGE2D; imageDescriptor.image_depth = 1; imageDescriptor.image_array_size = 0; imageDescriptor.image_row_pitch = 0; imageDescriptor.image_slice_pitch = 0; imageDescriptor.mem_object = imageDesc->mem_object; parentImage->incRefInternal(); imgInfo.imgDesc = Image::convertDescriptor(imageDescriptor); } multiGraphicsAllocation.addAllocation(allocationInfo[rootDeviceIndex].memory); } auto defaultRootDeviceIndex = context->getDevice(0u)->getRootDeviceIndex(); multiGraphicsAllocation.setMultiStorage(context->getRootDeviceIndices().size() > 1); image = createImageHw(context, memoryProperties, flags, flagsIntel, imgInfo.size, hostPtrToSet, surfaceFormat->OCLImageFormat, imageDescriptor, allocationInfo[defaultRootDeviceIndex].zeroCopyAllowed, std::move(multiGraphicsAllocation), false, 0, 0, surfaceFormat); for (auto &rootDeviceIndex : context->getRootDeviceIndices()) { auto &hwInfo = *memoryManager->peekExecutionEnvironment().rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo(); if (context->isProvidingPerformanceHints() && HwHelper::compressedImagesSupported(hwInfo)) { if (allocationInfo[rootDeviceIndex].memory->isCompressionEnabled()) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, IMAGE_IS_COMPRESSED, image); } else { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, IMAGE_IS_NOT_COMPRESSED, image); } } if (imageDesc->image_type != CL_MEM_OBJECT_IMAGE1D_ARRAY && imageDesc->image_type != CL_MEM_OBJECT_IMAGE2D_ARRAY) { image->imageDesc.image_array_size = 0; } if ((imageDesc->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) || ((imageDesc->image_type == CL_MEM_OBJECT_IMAGE2D) && (imageDesc->mem_object != nullptr))) { image->associatedMemObject = castToObject(imageDesc->mem_object); } // Driver needs to store rowPitch passed by the app in order to synchronize the host_ptr later on map call image->setHostPtrRowPitch(imageDesc->image_row_pitch ? imageDesc->image_row_pitch : hostPtrRowPitch); image->setHostPtrSlicePitch(hostPtrSlicePitch); image->setImageCount(imageCount); image->setHostPtrMinSize(hostPtrMinSize); image->setImageRowPitch(imgInfo.rowPitch); image->setImageSlicePitch(imgInfo.slicePitch); image->setQPitch(imgInfo.qPitch); image->setSurfaceOffsets(imgInfo.offset, imgInfo.xOffset, imgInfo.yOffset, imgInfo.yOffsetForUVPlane); image->setMipCount(imgInfo.mipCount); image->setPlane(imgInfo.plane); if (parentImage) { image->setMediaPlaneType(static_cast(imageDesc->image_depth)); image->setParentSharingHandler(parentImage->getSharingHandler()); } if (parentBuffer) { image->setParentSharingHandler(parentBuffer->getSharingHandler()); } errcodeRet = CL_SUCCESS; if (context->isProvidingPerformanceHints() && image->isMemObjZeroCopy()) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_IMAGE_MEETS_ALIGNMENT_RESTRICTIONS, static_cast(image)); } if (allocationInfo[rootDeviceIndex].transferNeeded && !transferedMemory) { std::array copyOrigin = {{0, 0, 0}}; std::array copyRegion = {{imageWidth, imageHeight, std::max(imageDepth, imageCount)}}; if (imageDesc->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { copyRegion = {{imageWidth, imageCount, 1}}; } else { copyRegion = {{imageWidth, imageHeight, std::max(imageDepth, imageCount)}}; } bool isCpuTransferPreferrred = imgInfo.linearStorage && (MemoryPool::isSystemMemoryPool(allocationInfo[rootDeviceIndex].memory->getMemoryPool()) || defaultHwHelper.isCpuImageTransferPreferred(hwInfo)); if (!isCpuTransferPreferrred) { auto cmdQ = context->getSpecialQueue(rootDeviceIndex); if (isNV12Image(&image->getImageFormat())) { errcodeRet = image->writeNV12Planes(hostPtr, hostPtrRowPitch, rootDeviceIndex); } else { errcodeRet = cmdQ->enqueueWriteImage(image, CL_TRUE, ©Origin[0], ©Region[0], hostPtrRowPitch, hostPtrSlicePitch, hostPtr, allocationInfo[rootDeviceIndex].mapAllocation, 0, nullptr, nullptr); } } else { void *pDestinationAddress = allocationInfo[rootDeviceIndex].memory->getUnderlyingBuffer(); auto isNotInSystemMemory = !MemoryPool::isSystemMemoryPool(allocationInfo[rootDeviceIndex].memory->getMemoryPool()); if (isNotInSystemMemory) { pDestinationAddress = context->getMemoryManager()->lockResource(allocationInfo[rootDeviceIndex].memory); } image->transferData(pDestinationAddress, imgInfo.rowPitch, imgInfo.slicePitch, const_cast(hostPtr), hostPtrRowPitch, hostPtrSlicePitch, copyRegion, copyOrigin); if (isNotInSystemMemory) { context->getMemoryManager()->unlockResource(allocationInfo[rootDeviceIndex].memory); } } transferedMemory = true; } if (allocationInfo[rootDeviceIndex].mapAllocation) { image->mapAllocations.addAllocation(allocationInfo[rootDeviceIndex].mapAllocation); } } if (((imageDesc->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) || (imageDesc->image_type == CL_MEM_OBJECT_IMAGE2D)) && (parentBuffer != nullptr)) { parentBuffer->incRefInternal(); } if (errcodeRet != CL_SUCCESS) { image->release(); image = nullptr; cleanAllGraphicsAllocations(*context, *memoryManager, allocationInfo, isParentObject); return image; } } while (false); return image; } Image *Image::createImageHw(Context *context, const MemoryProperties &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *hostPtr, const cl_image_format &imageFormat, const cl_image_desc &imageDesc, bool zeroCopy, MultiGraphicsAllocation multiGraphicsAllocation, bool isObjectRedescribed, uint32_t baseMipLevel, uint32_t mipCount, const ClSurfaceFormatInfo *surfaceFormatInfo) { const auto device = context->getDevice(0); const auto &hwInfo = device->getHardwareInfo(); auto funcCreate = imageFactory[hwInfo.platform.eRenderCoreFamily].createImageFunction; DEBUG_BREAK_IF(nullptr == funcCreate); auto image = funcCreate(context, memoryProperties, flags, flagsIntel, size, hostPtr, imageFormat, imageDesc, zeroCopy, std::move(multiGraphicsAllocation), isObjectRedescribed, baseMipLevel, mipCount, surfaceFormatInfo, nullptr); DEBUG_BREAK_IF(nullptr == image); image->createFunction = funcCreate; return image; } Image *Image::createSharedImage(Context *context, SharingHandler *sharingHandler, const McsSurfaceInfo &mcsSurfaceInfo, MultiGraphicsAllocation multiGraphicsAllocation, GraphicsAllocation *mcsAllocation, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, const ClSurfaceFormatInfo *surfaceFormat, ImageInfo &imgInfo, uint32_t cubeFaceIndex, uint32_t baseMipLevel, uint32_t mipCount) { auto rootDeviceIndex = context->getDevice(0)->getRootDeviceIndex(); auto size = multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex)->getUnderlyingBufferSize(); auto sharedImage = createImageHw( context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, flagsIntel, size, nullptr, surfaceFormat->OCLImageFormat, Image::convertDescriptor(imgInfo.imgDesc), false, std::move(multiGraphicsAllocation), false, baseMipLevel, mipCount, surfaceFormat); sharedImage->setSharingHandler(sharingHandler); sharedImage->setMcsAllocation(mcsAllocation); sharedImage->setQPitch(imgInfo.qPitch); sharedImage->setHostPtrRowPitch(imgInfo.imgDesc.imageRowPitch); sharedImage->setHostPtrSlicePitch(imgInfo.imgDesc.imageSlicePitch); sharedImage->setCubeFaceIndex(cubeFaceIndex); sharedImage->setSurfaceOffsets(imgInfo.offset, imgInfo.xOffset, imgInfo.yOffset, imgInfo.yOffsetForUVPlane); sharedImage->setMcsSurfaceInfo(mcsSurfaceInfo); sharedImage->setPlane(imgInfo.plane); return sharedImage; } cl_int Image::validate(Context *context, const MemoryProperties &memoryProperties, const ClSurfaceFormatInfo *surfaceFormat, const cl_image_desc *imageDesc, const void *hostPtr) { auto pClDevice = context->getDevice(0); size_t srcSize = 0; size_t retSize = 0; const size_t *maxWidth = nullptr; const size_t *maxHeight = nullptr; const uint32_t *pitchAlignment = nullptr; const uint32_t *baseAddressAlignment = nullptr; if (!surfaceFormat) { return CL_IMAGE_FORMAT_NOT_SUPPORTED; } Image *parentImage = castToObject(imageDesc->mem_object); Buffer *parentBuffer = castToObject(imageDesc->mem_object); if (imageDesc->image_type == CL_MEM_OBJECT_IMAGE2D) { if ((imageDesc->mem_object != nullptr) && (pClDevice->getSharedDeviceInfo().imageSupport == false)) { return CL_INVALID_OPERATION; } pClDevice->getCap(reinterpret_cast(maxWidth), srcSize, retSize); pClDevice->getCap(reinterpret_cast(maxHeight), srcSize, retSize); if (imageDesc->image_width > *maxWidth || imageDesc->image_height > *maxHeight) { return CL_INVALID_IMAGE_SIZE; } if (parentBuffer) { // Image 2d from buffer pClDevice->getCap(reinterpret_cast(pitchAlignment), srcSize, retSize); pClDevice->getCap(reinterpret_cast(baseAddressAlignment), srcSize, retSize); const auto rowSize = imageDesc->image_row_pitch != 0 ? imageDesc->image_row_pitch : alignUp(imageDesc->image_width * surfaceFormat->surfaceFormat.NumChannels * surfaceFormat->surfaceFormat.PerChannelSizeInBytes, *pitchAlignment); const auto minimumBufferSize = imageDesc->image_height * rowSize; if ((imageDesc->image_row_pitch % (*pitchAlignment)) || ((parentBuffer->getFlags() & CL_MEM_USE_HOST_PTR) && (reinterpret_cast(parentBuffer->getHostPtr()) % (*baseAddressAlignment))) || (minimumBufferSize > parentBuffer->getSize())) { return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; } else if (memoryProperties.flags.useHostPtr || memoryProperties.flags.copyHostPtr) { return CL_INVALID_VALUE; } } if (parentImage && !isNV12Image(&parentImage->getImageFormat())) { // Image 2d from image 2d if (!parentImage->hasSameDescriptor(*imageDesc) || !parentImage->hasValidParentImageFormat(surfaceFormat->OCLImageFormat)) { return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; } } if (!(parentImage && isNV12Image(&parentImage->getImageFormat())) && (imageDesc->image_width == 0 || imageDesc->image_height == 0)) { return CL_INVALID_IMAGE_DESCRIPTOR; } } if (hostPtr == nullptr) { if (imageDesc->image_row_pitch != 0 && imageDesc->mem_object == nullptr) { return CL_INVALID_IMAGE_DESCRIPTOR; } } else { if (imageDesc->image_row_pitch != 0) { if (imageDesc->image_row_pitch % surfaceFormat->surfaceFormat.ImageElementSizeInBytes != 0 || imageDesc->image_row_pitch < imageDesc->image_width * surfaceFormat->surfaceFormat.ImageElementSizeInBytes) { return CL_INVALID_IMAGE_DESCRIPTOR; } } } if (parentBuffer && imageDesc->image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER && imageDesc->image_type != CL_MEM_OBJECT_IMAGE2D) { return CL_INVALID_IMAGE_DESCRIPTOR; } if (parentImage && imageDesc->image_type != CL_MEM_OBJECT_IMAGE2D) { return CL_INVALID_IMAGE_DESCRIPTOR; } return validateImageTraits(context, memoryProperties, &surfaceFormat->OCLImageFormat, imageDesc, hostPtr); } cl_int Image::validateImageFormat(const cl_image_format *imageFormat) { if (!imageFormat) { return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; } bool isValidFormat = isValidSingleChannelFormat(imageFormat) || isValidIntensityFormat(imageFormat) || isValidLuminanceFormat(imageFormat) || isValidDepthFormat(imageFormat) || isValidDoubleChannelFormat(imageFormat) || isValidTripleChannelFormat(imageFormat) || isValidRGBAFormat(imageFormat) || isValidSRGBFormat(imageFormat) || isValidARGBFormat(imageFormat) || isValidDepthStencilFormat(imageFormat) || isValidYUVFormat(imageFormat); if (isValidFormat) { return CL_SUCCESS; } return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; } cl_int Image::validatePlanarYUV(Context *context, const MemoryProperties &memoryProperties, const cl_image_desc *imageDesc, const void *hostPtr) { cl_int errorCode = CL_SUCCESS; auto pClDevice = context->getDevice(0); const size_t *maxWidth = nullptr; const size_t *maxHeight = nullptr; size_t srcSize = 0; size_t retSize = 0; while (true) { Image *memObject = castToObject(imageDesc->mem_object); if (memObject != nullptr) { if (memObject->memObjectType == CL_MEM_OBJECT_IMAGE2D) { if (imageDesc->image_depth != 1 && imageDesc->image_depth != 0) { errorCode = CL_INVALID_IMAGE_DESCRIPTOR; } } break; } if (imageDesc->mem_object != nullptr) { errorCode = CL_INVALID_IMAGE_DESCRIPTOR; break; } if (!memoryProperties.flags.hostNoAccess) { errorCode = CL_INVALID_VALUE; break; } else { if (imageDesc->image_height % 4 || imageDesc->image_width % 4 || imageDesc->image_type != CL_MEM_OBJECT_IMAGE2D) { errorCode = CL_INVALID_IMAGE_DESCRIPTOR; break; } } pClDevice->getCap(reinterpret_cast(maxWidth), srcSize, retSize); pClDevice->getCap(reinterpret_cast(maxHeight), srcSize, retSize); if (imageDesc->image_width > *maxWidth || imageDesc->image_height > *maxHeight) { errorCode = CL_INVALID_IMAGE_SIZE; break; } break; } return errorCode; } cl_int Image::validatePackedYUV(const MemoryProperties &memoryProperties, const cl_image_desc *imageDesc) { cl_int errorCode = CL_SUCCESS; while (true) { if (!memoryProperties.flags.readOnly) { errorCode = CL_INVALID_VALUE; break; } else { if (imageDesc->image_width % 2 != 0 || imageDesc->image_type != CL_MEM_OBJECT_IMAGE2D) { errorCode = CL_INVALID_IMAGE_DESCRIPTOR; break; } } break; } return errorCode; } cl_int Image::validateImageTraits(Context *context, const MemoryProperties &memoryProperties, const cl_image_format *imageFormat, const cl_image_desc *imageDesc, const void *hostPtr) { if (isNV12Image(imageFormat)) return validatePlanarYUV(context, memoryProperties, imageDesc, hostPtr); else if (isPackedYuvImage(imageFormat)) return validatePackedYUV(memoryProperties, imageDesc); return CL_SUCCESS; } size_t Image::calculateHostPtrSize(const size_t *region, size_t rowPitch, size_t slicePitch, size_t pixelSize, uint32_t imageType) { DEBUG_BREAK_IF(!((rowPitch != 0) && (slicePitch != 0))); size_t sizeToReturn = 0u; switch (imageType) { case CL_MEM_OBJECT_IMAGE1D: case CL_MEM_OBJECT_IMAGE1D_BUFFER: sizeToReturn = region[0] * pixelSize; break; case CL_MEM_OBJECT_IMAGE2D: sizeToReturn = (region[1] - 1) * rowPitch + region[0] * pixelSize; break; case CL_MEM_OBJECT_IMAGE1D_ARRAY: sizeToReturn = (region[1] - 1) * slicePitch + region[0] * pixelSize; break; case CL_MEM_OBJECT_IMAGE3D: case CL_MEM_OBJECT_IMAGE2D_ARRAY: sizeToReturn = (region[2] - 1) * slicePitch + (region[1] - 1) * rowPitch + region[0] * pixelSize; break; default: DEBUG_BREAK_IF("Unsupported cl_image_type"); break; } DEBUG_BREAK_IF(sizeToReturn == 0); return sizeToReturn; } void Image::calculateHostPtrOffset(size_t *imageOffset, const size_t *origin, const size_t *region, size_t rowPitch, size_t slicePitch, uint32_t imageType, size_t bytesPerPixel) { size_t computedImageRowPitch = rowPitch ? rowPitch : region[0] * bytesPerPixel; size_t computedImageSlicePitch = slicePitch ? slicePitch : region[1] * computedImageRowPitch * bytesPerPixel; switch (imageType) { case CL_MEM_OBJECT_IMAGE1D: case CL_MEM_OBJECT_IMAGE1D_BUFFER: case CL_MEM_OBJECT_IMAGE2D: DEBUG_BREAK_IF(slicePitch != 0 && slicePitch < computedImageRowPitch * region[1]); [[fallthrough]]; case CL_MEM_OBJECT_IMAGE2D_ARRAY: case CL_MEM_OBJECT_IMAGE3D: *imageOffset = origin[2] * computedImageSlicePitch + origin[1] * computedImageRowPitch + origin[0] * bytesPerPixel; break; case CL_MEM_OBJECT_IMAGE1D_ARRAY: *imageOffset = origin[1] * computedImageSlicePitch + origin[0] * bytesPerPixel; break; default: DEBUG_BREAK_IF("Unsupported cl_image_type"); *imageOffset = 0; break; } } // Called by clGetImageParamsINTEL to obtain image row pitch and slice pitch // Assumption: all parameters are already validated be calling function cl_int Image::getImageParams(Context *context, cl_mem_flags memFlags, const ClSurfaceFormatInfo *surfaceFormat, const cl_image_desc *imageDesc, size_t *imageRowPitch, size_t *imageSlicePitch) { cl_int retVal = CL_SUCCESS; auto clientContext = context->getDevice(0)->getRootDeviceEnvironment().getGmmClientContext(); ImageInfo imgInfo = {}; cl_image_desc imageDescriptor = *imageDesc; imgInfo.imgDesc = Image::convertDescriptor(imageDescriptor); imgInfo.surfaceFormat = &surfaceFormat->surfaceFormat; auto gmm = std::make_unique(clientContext, imgInfo, StorageInfo{}, false); *imageRowPitch = imgInfo.rowPitch; *imageSlicePitch = imgInfo.slicePitch; return retVal; } const cl_image_desc &Image::getImageDesc() const { return imageDesc; } const cl_image_format &Image::getImageFormat() const { return imageFormat; } const ClSurfaceFormatInfo &Image::getSurfaceFormatInfo() const { return surfaceFormatInfo; } cl_mem_object_type Image::convertType(const ImageType type) { switch (type) { case ImageType::Image2D: return CL_MEM_OBJECT_IMAGE2D; case ImageType::Image3D: return CL_MEM_OBJECT_IMAGE3D; case ImageType::Image2DArray: return CL_MEM_OBJECT_IMAGE2D_ARRAY; case ImageType::Image1D: return CL_MEM_OBJECT_IMAGE1D; case ImageType::Image1DArray: return CL_MEM_OBJECT_IMAGE1D_ARRAY; case ImageType::Image1DBuffer: return CL_MEM_OBJECT_IMAGE1D_BUFFER; default: break; } return 0; } ImageType Image::convertType(const cl_mem_object_type type) { switch (type) { case CL_MEM_OBJECT_IMAGE2D: return ImageType::Image2D; case CL_MEM_OBJECT_IMAGE3D: return ImageType::Image3D; case CL_MEM_OBJECT_IMAGE2D_ARRAY: return ImageType::Image2DArray; case CL_MEM_OBJECT_IMAGE1D: return ImageType::Image1D; case CL_MEM_OBJECT_IMAGE1D_ARRAY: return ImageType::Image1DArray; case CL_MEM_OBJECT_IMAGE1D_BUFFER: return ImageType::Image1DBuffer; default: break; } return ImageType::Invalid; } ImageDescriptor Image::convertDescriptor(const cl_image_desc &imageDesc) { ImageDescriptor desc = {}; desc.fromParent = imageDesc.mem_object != nullptr; desc.imageArraySize = imageDesc.image_array_size; desc.imageDepth = imageDesc.image_depth; desc.imageHeight = imageDesc.image_height; desc.imageRowPitch = imageDesc.image_row_pitch; desc.imageSlicePitch = imageDesc.image_slice_pitch; desc.imageType = convertType(imageDesc.image_type); desc.imageWidth = imageDesc.image_width; desc.numMipLevels = imageDesc.num_mip_levels; desc.numSamples = imageDesc.num_samples; return desc; } cl_image_desc Image::convertDescriptor(const ImageDescriptor &imageDesc) { cl_image_desc desc = {}; desc.mem_object = nullptr; desc.image_array_size = imageDesc.imageArraySize; desc.image_depth = imageDesc.imageDepth; desc.image_height = imageDesc.imageHeight; desc.image_row_pitch = imageDesc.imageRowPitch; desc.image_slice_pitch = imageDesc.imageSlicePitch; desc.image_type = convertType(imageDesc.imageType); desc.image_width = imageDesc.imageWidth; desc.num_mip_levels = imageDesc.numMipLevels; desc.num_samples = imageDesc.numSamples; return desc; } cl_int Image::getImageInfo(cl_image_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { cl_int retVal; size_t srcParamSize = GetInfo::invalidSourceSize; void *srcParam = nullptr; auto imageDesc = getImageDesc(); auto surfFmtInfo = getSurfaceFormatInfo(); size_t retParam; size_t array_size = imageDesc.image_array_size * (imageDesc.image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY || imageDesc.image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY); size_t SlicePitch = hostPtrSlicePitch * !(imageDesc.image_type == CL_MEM_OBJECT_IMAGE2D || imageDesc.image_type == CL_MEM_OBJECT_IMAGE1D || imageDesc.image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER); switch (paramName) { case CL_IMAGE_FORMAT: srcParamSize = sizeof(cl_image_format); srcParam = &(surfFmtInfo.OCLImageFormat); break; case CL_IMAGE_ELEMENT_SIZE: srcParamSize = sizeof(size_t); srcParam = &(surfFmtInfo.surfaceFormat.ImageElementSizeInBytes); break; case CL_IMAGE_ROW_PITCH: srcParamSize = sizeof(size_t); if (mcsSurfaceInfo.multisampleCount > 1) { retParam = imageDesc.image_width * surfFmtInfo.surfaceFormat.ImageElementSizeInBytes * imageDesc.num_samples; } else { retParam = hostPtrRowPitch; } srcParam = &retParam; break; case CL_IMAGE_SLICE_PITCH: srcParamSize = sizeof(size_t); srcParam = &SlicePitch; break; case CL_IMAGE_WIDTH: srcParamSize = sizeof(size_t); retParam = imageDesc.image_width; if (this->baseMipLevel) { retParam = imageDesc.image_width >> this->baseMipLevel; retParam = std::max(retParam, (size_t)1); } srcParam = &retParam; break; case CL_IMAGE_HEIGHT: srcParamSize = sizeof(size_t); retParam = imageDesc.image_height * !((imageDesc.image_type == CL_MEM_OBJECT_IMAGE1D) || (imageDesc.image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) || (imageDesc.image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER)); if ((retParam != 0) && (this->baseMipLevel > 0)) { retParam = retParam >> this->baseMipLevel; retParam = std::max(retParam, (size_t)1); } srcParam = &retParam; break; case CL_IMAGE_DEPTH: srcParamSize = sizeof(size_t); retParam = imageDesc.image_depth * (imageDesc.image_type == CL_MEM_OBJECT_IMAGE3D); if ((retParam != 0) && (this->baseMipLevel > 0)) { retParam = retParam >> this->baseMipLevel; retParam = std::max(retParam, (size_t)1); } srcParam = &retParam; break; case CL_IMAGE_ARRAY_SIZE: srcParamSize = sizeof(size_t); srcParam = &(array_size); break; case CL_IMAGE_BUFFER: srcParamSize = sizeof(cl_mem); srcParam = &(imageDesc.buffer); break; case CL_IMAGE_NUM_MIP_LEVELS: srcParamSize = sizeof(cl_uint); srcParam = &(imageDesc.num_mip_levels); break; case CL_IMAGE_NUM_SAMPLES: srcParamSize = sizeof(cl_uint); srcParam = &(imageDesc.num_samples); break; default: getOsSpecificImageInfo(paramName, &srcParamSize, &srcParam); break; } auto getInfoStatus = GetInfo::getInfo(paramValue, paramValueSize, srcParam, srcParamSize); retVal = changeGetInfoStatusToCLResultType(getInfoStatus); GetInfo::setParamValueReturnSize(paramValueSizeRet, srcParamSize, getInfoStatus); return retVal; } Image *Image::redescribeFillImage() { const uint32_t redescribeTable[3][3] = { {17, 27, 5}, // {CL_R, CL_UNSIGNED_INT8}, {CL_RG, CL_UNSIGNED_INT8}, {CL_RGBA, CL_UNSIGNED_INT8} {18, 28, 6}, // {CL_R, CL_UNSIGNED_INT16}, {CL_RG, CL_UNSIGNED_INT16}, {CL_RGBA, CL_UNSIGNED_INT16} {19, 29, 7} // {CL_R, CL_UNSIGNED_INT32}, {CL_RG, CL_UNSIGNED_INT32}, {CL_RGBA, CL_UNSIGNED_INT32} }; auto imageFormatNew = this->imageFormat; auto imageDescNew = this->imageDesc; const ClSurfaceFormatInfo *surfaceFormat = nullptr; uint32_t redescribeTableCol = this->surfaceFormatInfo.surfaceFormat.NumChannels / 2; uint32_t redescribeTableRow = this->surfaceFormatInfo.surfaceFormat.PerChannelSizeInBytes / 2; ArrayRef readWriteSurfaceFormats = SurfaceFormats::readWrite(); uint32_t surfaceFormatIdx = redescribeTable[redescribeTableRow][redescribeTableCol]; surfaceFormat = &readWriteSurfaceFormats[surfaceFormatIdx]; imageFormatNew.image_channel_order = surfaceFormat->OCLImageFormat.image_channel_order; imageFormatNew.image_channel_data_type = surfaceFormat->OCLImageFormat.image_channel_data_type; DEBUG_BREAK_IF(nullptr == createFunction); MemoryProperties memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags | CL_MEM_USE_HOST_PTR, flagsIntel, 0, &context->getDevice(0)->getDevice()); auto image = createFunction(context, memoryProperties, flags | CL_MEM_USE_HOST_PTR, flagsIntel, this->getSize(), this->getCpuAddress(), imageFormatNew, imageDescNew, this->isMemObjZeroCopy(), this->multiGraphicsAllocation, true, this->baseMipLevel, this->mipCount, surfaceFormat, &this->surfaceOffsets); image->setQPitch(this->getQPitch()); image->setCubeFaceIndex(this->getCubeFaceIndex()); image->associatedMemObject = this->associatedMemObject; return image; } static const uint32_t redescribeTableBytes[] = { 17, // {CL_R, CL_UNSIGNED_INT8} 1 byte 18, // {CL_R, CL_UNSIGNED_INT16} 2 byte 19, // {CL_R, CL_UNSIGNED_INT32} 4 byte 29, // {CL_RG, CL_UNSIGNED_INT32} 8 byte 7 // {CL_RGBA, CL_UNSIGNED_INT32} 16 byte }; Image *Image::redescribe() { const uint32_t bytesPerPixel = this->surfaceFormatInfo.surfaceFormat.NumChannels * surfaceFormatInfo.surfaceFormat.PerChannelSizeInBytes; const uint32_t exponent = Math::log2(bytesPerPixel); DEBUG_BREAK_IF(exponent >= 5u); const uint32_t surfaceFormatIdx = redescribeTableBytes[exponent % 5]; const ArrayRef readWriteSurfaceFormats = SurfaceFormats::readWrite(); const ClSurfaceFormatInfo *surfaceFormat = &readWriteSurfaceFormats[surfaceFormatIdx]; auto imageFormatNew = this->imageFormat; imageFormatNew.image_channel_order = surfaceFormat->OCLImageFormat.image_channel_order; imageFormatNew.image_channel_data_type = surfaceFormat->OCLImageFormat.image_channel_data_type; DEBUG_BREAK_IF(nullptr == createFunction); MemoryProperties memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags | CL_MEM_USE_HOST_PTR, flagsIntel, 0, &context->getDevice(0)->getDevice()); auto image = createFunction(context, memoryProperties, flags | CL_MEM_USE_HOST_PTR, flagsIntel, this->getSize(), this->getCpuAddress(), imageFormatNew, this->imageDesc, this->isMemObjZeroCopy(), this->multiGraphicsAllocation, true, this->baseMipLevel, this->mipCount, surfaceFormat, &this->surfaceOffsets); image->setQPitch(this->getQPitch()); image->setCubeFaceIndex(this->getCubeFaceIndex()); image->associatedMemObject = this->associatedMemObject; image->createFunction = createFunction; return image; } void Image::transferDataToHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) { transferData(hostPtr, hostPtrRowPitch, hostPtrSlicePitch, memoryStorage, imageDesc.image_row_pitch, imageDesc.image_slice_pitch, copySize, copyOffset); } void Image::transferDataFromHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) { transferData(memoryStorage, imageDesc.image_row_pitch, imageDesc.image_slice_pitch, hostPtr, hostPtrRowPitch, hostPtrSlicePitch, copySize, copyOffset); } cl_int Image::writeNV12Planes(const void *hostPtr, size_t hostPtrRowPitch, uint32_t rootDeviceIndex) { CommandQueue *cmdQ = context->getSpecialQueue(rootDeviceIndex); size_t origin[3] = {0, 0, 0}; size_t region[3] = {this->imageDesc.image_width, this->imageDesc.image_height, 1}; cl_int retVal = 0; cl_image_desc imageDesc = {0}; cl_image_format imageFormat = {0}; // Make NV12 planes readable and writable both on device and host cl_mem_flags flags = CL_MEM_READ_WRITE; // Plane Y imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; // image_width & image_height are ignored for plane extraction imageDesc.image_width = 0; imageDesc.image_height = 0; // set mem_object to the full NV12 image imageDesc.mem_object = this; // get access to the Y plane (CL_R) imageDesc.image_depth = 0; const ClSurfaceFormatInfo *surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); // Create NV12 UV Plane image std::unique_ptr imageYPlane(Image::create( context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); retVal = cmdQ->enqueueWriteImage(imageYPlane.get(), CL_TRUE, origin, region, hostPtrRowPitch, 0, hostPtr, nullptr, 0, nullptr, nullptr); // UV Plane is two times smaller than Plane Y region[0] = region[0] / 2; region[1] = region[1] / 2; imageDesc.image_width = 0; imageDesc.image_height = 0; imageDesc.image_depth = 1; // UV plane imageFormat.image_channel_order = CL_RG; hostPtr = static_cast(static_cast(hostPtr) + (hostPtrRowPitch * this->imageDesc.image_height)); surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); // Create NV12 UV Plane image std::unique_ptr imageUVPlane(Image::create( context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); retVal = cmdQ->enqueueWriteImage(imageUVPlane.get(), CL_TRUE, origin, region, hostPtrRowPitch, 0, hostPtr, nullptr, 0, nullptr, nullptr); return retVal; } const ClSurfaceFormatInfo *Image::getSurfaceFormatFromTable(cl_mem_flags flags, const cl_image_format *imageFormat, bool supportsOcl20Features) { if (!imageFormat) { DEBUG_BREAK_IF("Invalid format"); return nullptr; } ArrayRef formats = SurfaceFormats::surfaceFormats(flags, imageFormat, supportsOcl20Features); for (auto &format : formats) { if (format.OCLImageFormat.image_channel_data_type == imageFormat->image_channel_data_type && format.OCLImageFormat.image_channel_order == imageFormat->image_channel_order) { return &format; } } DEBUG_BREAK_IF("Invalid format"); return nullptr; } bool Image::isImage1d(const cl_image_desc &imageDesc) { auto imageType = imageDesc.image_type; auto buffer = castToObject(imageDesc.buffer); return (imageType == CL_MEM_OBJECT_IMAGE1D || imageType == CL_MEM_OBJECT_IMAGE1D_ARRAY || imageType == CL_MEM_OBJECT_IMAGE1D_BUFFER || buffer); } bool Image::isImage2d(cl_mem_object_type imageType) { return imageType == CL_MEM_OBJECT_IMAGE2D; } bool Image::isImage2dOr2dArray(cl_mem_object_type imageType) { return imageType == CL_MEM_OBJECT_IMAGE2D || imageType == CL_MEM_OBJECT_IMAGE2D_ARRAY; } bool Image::isDepthFormat(const cl_image_format &imageFormat) { return imageFormat.image_channel_order == CL_DEPTH || imageFormat.image_channel_order == CL_DEPTH_STENCIL; } cl_mem Image::validateAndCreateImage(cl_context context, const cl_mem_properties *properties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, const cl_image_format *imageFormat, const cl_image_desc *imageDesc, const void *hostPtr, cl_int &errcodeRet) { Context *pContext = nullptr; errcodeRet = validateObjects(WithCastToInternal(context, &pContext)); if (errcodeRet != CL_SUCCESS) { return nullptr; } MemoryProperties memoryProperties{}; cl_mem_flags_intel emptyFlagsIntel = 0; cl_mem_alloc_flags_intel allocflags = 0; if ((false == ClMemoryPropertiesHelper::parseMemoryProperties(nullptr, memoryProperties, flags, emptyFlagsIntel, allocflags, MemoryPropertiesHelper::ObjType::IMAGE, *pContext)) || (false == MemObjHelper::validateMemoryPropertiesForImage(memoryProperties, flags, emptyFlagsIntel, imageDesc->mem_object, *pContext))) { errcodeRet = CL_INVALID_VALUE; return nullptr; } if ((false == ClMemoryPropertiesHelper::parseMemoryProperties(properties, memoryProperties, flags, flagsIntel, allocflags, MemoryPropertiesHelper::ObjType::IMAGE, *pContext)) || (false == MemObjHelper::validateMemoryPropertiesForImage(memoryProperties, flags, flagsIntel, imageDesc->mem_object, *pContext))) { errcodeRet = CL_INVALID_PROPERTY; return nullptr; } bool isHostPtrUsed = (hostPtr != nullptr); bool areHostPtrFlagsUsed = memoryProperties.flags.copyHostPtr || memoryProperties.flags.useHostPtr; if (isHostPtrUsed != areHostPtrFlagsUsed) { errcodeRet = CL_INVALID_HOST_PTR; return nullptr; } errcodeRet = Image::validateImageFormat(imageFormat); if (errcodeRet != CL_SUCCESS) { return nullptr; } const auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, imageFormat, pContext->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); errcodeRet = Image::validate(pContext, memoryProperties, surfaceFormat, imageDesc, hostPtr); if (errcodeRet != CL_SUCCESS) { return nullptr; } auto image = Image::create(pContext, memoryProperties, flags, flagsIntel, surfaceFormat, imageDesc, hostPtr, errcodeRet); if (errcodeRet == CL_SUCCESS) { image->storeProperties(properties); } return image; } bool Image::isValidSingleChannelFormat(const cl_image_format *imageFormat) { auto channelOrder = imageFormat->image_channel_order; auto dataType = imageFormat->image_channel_data_type; bool isValidOrder = (channelOrder == CL_A) || (channelOrder == CL_R) || (channelOrder == CL_Rx); bool isValidDataType = (dataType == CL_UNORM_INT8) || (dataType == CL_UNORM_INT16) || (dataType == CL_SNORM_INT8) || (dataType == CL_SNORM_INT16) || (dataType == CL_HALF_FLOAT) || (dataType == CL_FLOAT) || (dataType == CL_SIGNED_INT8) || (dataType == CL_SIGNED_INT16) || (dataType == CL_SIGNED_INT32) || (dataType == CL_UNSIGNED_INT8) || (dataType == CL_UNSIGNED_INT16) || (dataType == CL_UNSIGNED_INT32); return isValidOrder && isValidDataType; } bool Image::isValidIntensityFormat(const cl_image_format *imageFormat) { if (imageFormat->image_channel_order != CL_INTENSITY) { return false; } auto dataType = imageFormat->image_channel_data_type; return (dataType == CL_UNORM_INT8) || (dataType == CL_UNORM_INT16) || (dataType == CL_SNORM_INT8) || (dataType == CL_SNORM_INT16) || (dataType == CL_HALF_FLOAT) || (dataType == CL_FLOAT); } bool Image::isValidLuminanceFormat(const cl_image_format *imageFormat) { if (imageFormat->image_channel_order != CL_LUMINANCE) { return false; } auto dataType = imageFormat->image_channel_data_type; return (dataType == CL_UNORM_INT8) || (dataType == CL_UNORM_INT16) || (dataType == CL_SNORM_INT8) || (dataType == CL_SNORM_INT16) || (dataType == CL_HALF_FLOAT) || (dataType == CL_FLOAT); } bool Image::isValidDepthFormat(const cl_image_format *imageFormat) { if (imageFormat->image_channel_order != CL_DEPTH) { return false; } auto dataType = imageFormat->image_channel_data_type; return (dataType == CL_UNORM_INT16) || (dataType == CL_FLOAT); } bool Image::isValidDoubleChannelFormat(const cl_image_format *imageFormat) { auto channelOrder = imageFormat->image_channel_order; auto dataType = imageFormat->image_channel_data_type; bool isValidOrder = (channelOrder == CL_RG) || (channelOrder == CL_RGx) || (channelOrder == CL_RA); bool isValidDataType = (dataType == CL_UNORM_INT8) || (dataType == CL_UNORM_INT16) || (dataType == CL_SNORM_INT8) || (dataType == CL_SNORM_INT16) || (dataType == CL_HALF_FLOAT) || (dataType == CL_FLOAT) || (dataType == CL_SIGNED_INT8) || (dataType == CL_SIGNED_INT16) || (dataType == CL_SIGNED_INT32) || (dataType == CL_UNSIGNED_INT8) || (dataType == CL_UNSIGNED_INT16) || (dataType == CL_UNSIGNED_INT32); return isValidOrder && isValidDataType; } bool Image::isValidTripleChannelFormat(const cl_image_format *imageFormat) { auto channelOrder = imageFormat->image_channel_order; auto dataType = imageFormat->image_channel_data_type; bool isValidOrder = (channelOrder == CL_RGB) || (channelOrder == CL_RGBx); bool isValidDataType = (dataType == CL_UNORM_SHORT_565) || (dataType == CL_UNORM_SHORT_555) || (dataType == CL_UNORM_INT_101010); return isValidOrder && isValidDataType; } bool Image::isValidRGBAFormat(const cl_image_format *imageFormat) { if (imageFormat->image_channel_order != CL_RGBA) { return false; } auto dataType = imageFormat->image_channel_data_type; return (dataType == CL_UNORM_INT8) || (dataType == CL_UNORM_INT16) || (dataType == CL_SNORM_INT8) || (dataType == CL_SNORM_INT16) || (dataType == CL_HALF_FLOAT) || (dataType == CL_FLOAT) || (dataType == CL_SIGNED_INT8) || (dataType == CL_SIGNED_INT16) || (dataType == CL_SIGNED_INT32) || (dataType == CL_UNSIGNED_INT8) || (dataType == CL_UNSIGNED_INT16) || (dataType == CL_UNSIGNED_INT32); } bool Image::isValidSRGBFormat(const cl_image_format *imageFormat) { auto channelOrder = imageFormat->image_channel_order; auto dataType = imageFormat->image_channel_data_type; bool isValidOrder = (channelOrder == CL_sRGB) || (channelOrder == CL_sRGBx) || (channelOrder == CL_sRGBA) || (channelOrder == CL_sBGRA); bool isValidDataType = (dataType == CL_UNORM_INT8); return isValidOrder && isValidDataType; } bool Image::isValidARGBFormat(const cl_image_format *imageFormat) { auto channelOrder = imageFormat->image_channel_order; auto dataType = imageFormat->image_channel_data_type; bool isValidOrder = (channelOrder == CL_ARGB) || (channelOrder == CL_BGRA) || (channelOrder == CL_ABGR); bool isValidDataType = (dataType == CL_UNORM_INT8) || (dataType == CL_SNORM_INT8) || (dataType == CL_SIGNED_INT8) || (dataType == CL_UNSIGNED_INT8); return isValidOrder && isValidDataType; } bool Image::isValidDepthStencilFormat(const cl_image_format *imageFormat) { if (imageFormat->image_channel_order != CL_DEPTH_STENCIL) { return false; } auto dataType = imageFormat->image_channel_data_type; return (dataType == CL_UNORM_INT24) || (dataType == CL_FLOAT); } bool Image::isValidYUVFormat(const cl_image_format *imageFormat) { auto dataType = imageFormat->image_channel_data_type; bool isValidOrder = isNV12Image(imageFormat) || isPackedYuvImage(imageFormat); bool isValidDataType = (dataType == CL_UNORM_INT8); return isValidOrder && isValidDataType; } bool Image::hasAlphaChannel(const cl_image_format *imageFormat) { auto channelOrder = imageFormat->image_channel_order; return (channelOrder == CL_A) || (channelOrder == CL_Rx) || (channelOrder == CL_RA) || (channelOrder == CL_RGx) || (channelOrder == CL_RGBx) || (channelOrder == CL_RGBA) || (channelOrder == CL_BGRA) || (channelOrder == CL_ARGB) || (channelOrder == CL_INTENSITY) || (channelOrder == CL_sRGBA) || (channelOrder == CL_sBGRA) || (channelOrder == CL_sRGBx) || (channelOrder == CL_ABGR); } size_t Image::calculateOffsetForMapping(const MemObjOffsetArray &origin) const { size_t rowPitch = mappingOnCpuAllowed() ? imageDesc.image_row_pitch : getHostPtrRowPitch(); size_t slicePitch = mappingOnCpuAllowed() ? imageDesc.image_slice_pitch : getHostPtrSlicePitch(); size_t offset = getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes * origin[0]; switch (imageDesc.image_type) { case CL_MEM_OBJECT_IMAGE1D_ARRAY: offset += slicePitch * origin[1]; break; case CL_MEM_OBJECT_IMAGE2D: offset += rowPitch * origin[1]; break; case CL_MEM_OBJECT_IMAGE2D_ARRAY: case CL_MEM_OBJECT_IMAGE3D: offset += rowPitch * origin[1] + slicePitch * origin[2]; break; default: break; } return offset; } cl_int Image::validateRegionAndOrigin(const size_t *origin, const size_t *region, const cl_image_desc &imgDesc) { if (region[0] == 0 || region[1] == 0 || region[2] == 0) { return CL_INVALID_VALUE; } if (origin[0] + region[0] > imgDesc.image_width) { return CL_INVALID_VALUE; } if (imgDesc.image_type == CL_MEM_OBJECT_IMAGE2D || imgDesc.image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY || imgDesc.image_type == CL_MEM_OBJECT_IMAGE3D) { if (origin[1] + region[1] > imgDesc.image_height) { return CL_INVALID_VALUE; } } if (imgDesc.image_type == CL_MEM_OBJECT_IMAGE3D) { if (origin[2] + region[2] > imgDesc.image_depth) { return CL_INVALID_VALUE; } } if (imgDesc.image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { if (origin[1] + region[1] > imgDesc.image_array_size) { return CL_INVALID_VALUE; } } if (imgDesc.image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) { if (origin[2] + region[2] > imgDesc.image_array_size) { return CL_INVALID_VALUE; } } bool notMipMapped = (false == isMipMapped(imgDesc)); if ((imgDesc.image_type == CL_MEM_OBJECT_IMAGE1D || imgDesc.image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) && (((origin[1] > 0) && notMipMapped) || origin[2] > 0 || region[1] > 1 || region[2] > 1)) { return CL_INVALID_VALUE; } if ((imgDesc.image_type == CL_MEM_OBJECT_IMAGE2D || imgDesc.image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) && (((origin[2] > 0) && notMipMapped) || region[2] > 1)) { return CL_INVALID_VALUE; } if (notMipMapped) { return CL_SUCCESS; } uint32_t mipLevel = findMipLevel(imgDesc.image_type, origin); if (mipLevel < imgDesc.num_mip_levels) { return CL_SUCCESS; } else { return CL_INVALID_MIP_LEVEL; } } bool Image::hasSameDescriptor(const cl_image_desc &imageDesc) const { return this->imageDesc.image_type == imageDesc.image_type && this->imageDesc.image_width == imageDesc.image_width && this->imageDesc.image_height == imageDesc.image_height && this->imageDesc.image_depth == imageDesc.image_depth && this->imageDesc.image_array_size == imageDesc.image_array_size && this->hostPtrRowPitch == imageDesc.image_row_pitch && this->hostPtrSlicePitch == imageDesc.image_slice_pitch && this->imageDesc.num_mip_levels == imageDesc.num_mip_levels && this->imageDesc.num_samples == imageDesc.num_samples; } bool Image::hasValidParentImageFormat(const cl_image_format &imageFormat) const { if (this->imageFormat.image_channel_data_type != imageFormat.image_channel_data_type) { return false; } switch (this->imageFormat.image_channel_order) { case CL_BGRA: return imageFormat.image_channel_order == CL_sBGRA; case CL_sBGRA: return imageFormat.image_channel_order == CL_BGRA; case CL_RGBA: return imageFormat.image_channel_order == CL_sRGBA; case CL_sRGBA: return imageFormat.image_channel_order == CL_RGBA; case CL_RGB: return imageFormat.image_channel_order == CL_sRGB; case CL_sRGB: return imageFormat.image_channel_order == CL_RGB; case CL_RGBx: return imageFormat.image_channel_order == CL_sRGBx; case CL_sRGBx: return imageFormat.image_channel_order == CL_RGBx; case CL_R: return imageFormat.image_channel_order == CL_DEPTH; default: return false; } } cl_int Image::checkIfDeviceSupportsImages(cl_context context) { auto pContext = castToObject(context); if (pContext != nullptr) { auto capabilityTable = pContext->getDevice(0)->getHardwareInfo().capabilityTable; if (!capabilityTable.supportsImages) { return CL_INVALID_OPERATION; } return CL_SUCCESS; } return CL_INVALID_CONTEXT; } void Image::fillImageRegion(size_t *region) const { region[0] = imageDesc.image_width; if (imageDesc.image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { region[1] = imageDesc.image_array_size; } else if (Image::isImage1d(imageDesc)) { region[1] = 1u; } else { region[1] = imageDesc.image_height; } if (imageDesc.image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) { region[2] = imageDesc.image_array_size; } else if (imageDesc.image_type == CL_MEM_OBJECT_IMAGE3D) { region[2] = imageDesc.image_depth; } else { region[2] = 1u; } } } // namespace NEO compute-runtime-22.14.22890/opencl/source/mem_obj/image.h000066400000000000000000000454211422164147700230030ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/string.h" #include "shared/source/image/image_surface_state.h" #include "opencl/source/helpers/cl_validators.h" #include "opencl/source/helpers/surface_formats.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/mem_obj.h" namespace NEO { class Image; struct KernelInfo; struct SurfaceFormatInfo; using ImageCreatFunc = Image *(*)(Context *context, const MemoryProperties &memoryProperties, uint64_t flags, uint64_t flagsIntel, size_t size, void *hostPtr, const cl_image_format &imageFormat, const cl_image_desc &imageDesc, bool zeroCopy, MultiGraphicsAllocation multiGraphicsAllocation, bool isImageRedescribed, uint32_t baseMipLevel, uint32_t mipCount, const ClSurfaceFormatInfo *surfaceFormatInfo, const SurfaceOffsets *surfaceOffsets); struct ImageFactoryFuncs { ImageCreatFunc createImageFunction; }; namespace ImageFunctions { using ValidateAndCreateImageFunc = std::function; extern ValidateAndCreateImageFunc validateAndCreateImage; } // namespace ImageFunctions class Image : public MemObj { public: const static cl_ulong maskMagic = 0xFFFFFFFFFFFFFFFFLL; static const cl_ulong objectMagic = MemObj::objectMagic | 0x01; ~Image() override; static Image *create(Context *context, const MemoryProperties &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, const ClSurfaceFormatInfo *surfaceFormat, const cl_image_desc *imageDesc, const void *hostPtr, cl_int &errcodeRet); static cl_mem validateAndCreateImage(cl_context context, const cl_mem_properties *properties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, const cl_image_format *imageFormat, const cl_image_desc *imageDesc, const void *hostPtr, cl_int &errcodeRet); static Image *createImageHw(Context *context, const MemoryProperties &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *hostPtr, const cl_image_format &imageFormat, const cl_image_desc &imageDesc, bool zeroCopy, MultiGraphicsAllocation multiGraphicsAllocation, bool isObjectRedescribed, uint32_t baseMipLevel, uint32_t mipCount, const ClSurfaceFormatInfo *surfaceFormatInfo = nullptr); static Image *createSharedImage(Context *context, SharingHandler *sharingHandler, const McsSurfaceInfo &mcsSurfaceInfo, MultiGraphicsAllocation multiGraphicsAllocation, GraphicsAllocation *mcsAllocation, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, const ClSurfaceFormatInfo *surfaceFormat, ImageInfo &imgInfo, uint32_t cubeFaceIndex, uint32_t baseMipLevel, uint32_t mipCount); static cl_int validate(Context *context, const MemoryProperties &memoryProperties, const ClSurfaceFormatInfo *surfaceFormat, const cl_image_desc *imageDesc, const void *hostPtr); static cl_int validateImageFormat(const cl_image_format *imageFormat); static int32_t validatePlanarYUV(Context *context, const MemoryProperties &memoryProperties, const cl_image_desc *imageDesc, const void *hostPtr); static int32_t validatePackedYUV(const MemoryProperties &memoryProperties, const cl_image_desc *imageDesc); static cl_int validateImageTraits(Context *context, const MemoryProperties &memoryProperties, const cl_image_format *imageFormat, const cl_image_desc *imageDesc, const void *hostPtr); static size_t calculateHostPtrSize(const size_t *region, size_t rowPitch, size_t slicePitch, size_t pixelSize, uint32_t imageType); static void calculateHostPtrOffset(size_t *imageOffset, const size_t *origin, const size_t *region, size_t rowPitch, size_t slicePitch, uint32_t imageType, size_t bytesPerPixel); static cl_int getImageParams(Context *context, cl_mem_flags flags, const ClSurfaceFormatInfo *surfaceFormat, const cl_image_desc *imageDesc, size_t *imageRowPitch, size_t *imageSlicePitch); static bool isImage1d(const cl_image_desc &imageDesc); static bool isImage2d(cl_mem_object_type imageType); static bool isImage2dOr2dArray(cl_mem_object_type imageType); static bool isDepthFormat(const cl_image_format &imageFormat); static bool hasSlices(cl_mem_object_type type) { return (type == CL_MEM_OBJECT_IMAGE3D) || (type == CL_MEM_OBJECT_IMAGE1D_ARRAY) || (type == CL_MEM_OBJECT_IMAGE2D_ARRAY); } static ImageType convertType(const cl_mem_object_type type); static cl_mem_object_type convertType(const ImageType type); static ImageDescriptor convertDescriptor(const cl_image_desc &imageDesc); static cl_image_desc convertDescriptor(const ImageDescriptor &imageDesc); cl_int getImageInfo(cl_image_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); virtual void setImageArg(void *memory, bool isMediaBlockImage, uint32_t mipLevel, uint32_t rootDeviceIndex, bool useGlobalAtomics) = 0; virtual void setMediaImageArg(void *memory, uint32_t rootDeviceIndex) = 0; virtual void setMediaSurfaceRotation(void *memory) = 0; virtual void setSurfaceMemoryObjectControlState(void *memory, uint32_t value) = 0; const cl_image_desc &getImageDesc() const; const cl_image_format &getImageFormat() const; const ClSurfaceFormatInfo &getSurfaceFormatInfo() const; void transferDataToHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) override; void transferDataFromHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) override; Image *redescribe(); Image *redescribeFillImage(); ImageCreatFunc createFunction; uint32_t getQPitch() { return qPitch; } void setQPitch(uint32_t qPitch) { this->qPitch = qPitch; } size_t getHostPtrRowPitch() const { return hostPtrRowPitch; } void setHostPtrRowPitch(size_t pitch) { this->hostPtrRowPitch = pitch; } size_t getHostPtrSlicePitch() const { return hostPtrSlicePitch; } void setHostPtrSlicePitch(size_t pitch) { this->hostPtrSlicePitch = pitch; } size_t getImageCount() const { return imageCount; } void setImageCount(size_t imageCount) { this->imageCount = imageCount; } void setImageRowPitch(size_t rowPitch) { imageDesc.image_row_pitch = rowPitch; } void setImageSlicePitch(size_t slicePitch) { imageDesc.image_slice_pitch = slicePitch; } void setSurfaceOffsets(uint64_t offset, uint32_t xOffset, uint32_t yOffset, uint32_t yOffsetForUVPlane) { surfaceOffsets.offset = offset; surfaceOffsets.xOffset = xOffset; surfaceOffsets.yOffset = yOffset; surfaceOffsets.yOffsetForUVplane = yOffsetForUVPlane; } void getSurfaceOffsets(SurfaceOffsets &surfaceOffsetsOut) { surfaceOffsetsOut = this->surfaceOffsets; } void setCubeFaceIndex(uint32_t index) { cubeFaceIndex = index; } uint32_t getCubeFaceIndex() { return cubeFaceIndex; } void setMediaPlaneType(cl_uint type) { mediaPlaneType = type; } cl_uint getMediaPlaneType() const { return mediaPlaneType; } int peekBaseMipLevel() { return baseMipLevel; } void setBaseMipLevel(int level) { this->baseMipLevel = level; } uint32_t peekMipCount() { return mipCount; } void setMipCount(uint32_t mipCountNew) { this->mipCount = mipCountNew; } static const ClSurfaceFormatInfo *getSurfaceFormatFromTable(cl_mem_flags flags, const cl_image_format *imageFormat, bool supportsOcl20Features); static cl_int validateRegionAndOrigin(const size_t *origin, const size_t *region, const cl_image_desc &imgDesc); cl_int writeNV12Planes(const void *hostPtr, size_t hostPtrRowPitch, uint32_t rootDeviceIndex); void setMcsSurfaceInfo(const McsSurfaceInfo &info) { mcsSurfaceInfo = info; } const McsSurfaceInfo &getMcsSurfaceInfo() { return mcsSurfaceInfo; } void setPlane(const GMM_YUV_PLANE_ENUM plane) { this->plane = plane; } GMM_YUV_PLANE_ENUM getPlane() const { return this->plane; } size_t calculateOffsetForMapping(const MemObjOffsetArray &origin) const override; virtual void transformImage2dArrayTo3d(void *memory) = 0; virtual void transformImage3dTo2dArray(void *memory) = 0; bool hasSameDescriptor(const cl_image_desc &imageDesc) const; bool hasValidParentImageFormat(const cl_image_format &imageFormat) const; bool isImageFromBuffer() const { return castToObject(static_cast(associatedMemObject)) ? true : false; } bool isImageFromImage() const { return castToObject(static_cast(associatedMemObject)) ? true : false; } static cl_int checkIfDeviceSupportsImages(cl_context context); void fillImageRegion(size_t *region) const; protected: Image(Context *context, const MemoryProperties &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *memoryStorage, void *hostPtr, cl_image_format imageFormat, const cl_image_desc &imageDesc, bool zeroCopy, MultiGraphicsAllocation multiGraphicsAllocation, bool isObjectRedescribed, uint32_t baseMipLevel, uint32_t mipCount, const ClSurfaceFormatInfo &surfaceFormatInfo, const SurfaceOffsets *surfaceOffsets = nullptr); void getOsSpecificImageInfo(const cl_mem_info ¶mName, size_t *srcParamSize, void **srcParam); MOCKABLE_VIRTUAL void transferData(void *dst, size_t dstRowPitch, size_t dstSlicePitch, void *src, size_t srcRowPitch, size_t srcSlicePitch, std::array copyRegion, std::array copyOrigin); cl_image_format imageFormat; cl_image_desc imageDesc; ClSurfaceFormatInfo surfaceFormatInfo; McsSurfaceInfo mcsSurfaceInfo = {}; uint32_t qPitch = 0; size_t hostPtrRowPitch = 0; size_t hostPtrSlicePitch = 0; size_t imageCount = 0; uint32_t cubeFaceIndex; cl_uint mediaPlaneType; SurfaceOffsets surfaceOffsets = {0}; uint32_t baseMipLevel = 0; uint32_t mipCount = 1; GMM_YUV_PLANE_ENUM plane = GMM_NO_PLANE; static bool isValidSingleChannelFormat(const cl_image_format *imageFormat); static bool isValidIntensityFormat(const cl_image_format *imageFormat); static bool isValidLuminanceFormat(const cl_image_format *imageFormat); static bool isValidDepthFormat(const cl_image_format *imageFormat); static bool isValidDoubleChannelFormat(const cl_image_format *imageFormat); static bool isValidTripleChannelFormat(const cl_image_format *imageFormat); static bool isValidRGBAFormat(const cl_image_format *imageFormat); static bool isValidSRGBFormat(const cl_image_format *imageFormat); static bool isValidARGBFormat(const cl_image_format *imageFormat); static bool isValidDepthStencilFormat(const cl_image_format *imageFormat); static bool isValidYUVFormat(const cl_image_format *imageFormat); static bool hasAlphaChannel(const cl_image_format *imageFormat); }; template class ImageHw : public Image { using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; public: ImageHw(Context *context, const MemoryProperties &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *memoryStorage, void *hostPtr, const cl_image_format &imageFormat, const cl_image_desc &imageDesc, bool zeroCopy, MultiGraphicsAllocation multiGraphicsAllocation, bool isObjectRedescribed, uint32_t baseMipLevel, uint32_t mipCount, const ClSurfaceFormatInfo &surfaceFormatInfo, const SurfaceOffsets *surfaceOffsets = nullptr) : Image(context, memoryProperties, flags, flagsIntel, size, memoryStorage, hostPtr, imageFormat, imageDesc, zeroCopy, std::move(multiGraphicsAllocation), isObjectRedescribed, baseMipLevel, mipCount, surfaceFormatInfo, surfaceOffsets) { if (getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D || getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER || getImageDesc().image_type == CL_MEM_OBJECT_IMAGE2D || getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY || getImageDesc().image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) { this->imageDesc.image_depth = 0; } switch (imageDesc.image_type) { case CL_MEM_OBJECT_IMAGE1D: case CL_MEM_OBJECT_IMAGE1D_BUFFER: case CL_MEM_OBJECT_IMAGE1D_ARRAY: surfaceType = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_1D; break; default: case CL_MEM_OBJECT_IMAGE2D_ARRAY: case CL_MEM_OBJECT_IMAGE2D: surfaceType = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_2D; break; case CL_MEM_OBJECT_IMAGE3D: surfaceType = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_3D; break; } } void setImageArg(void *memory, bool setAsMediaBlockImage, uint32_t mipLevel, uint32_t rootDeviceIndex, bool useGlobalAtomics) override; void setAuxParamsForMultisamples(RENDER_SURFACE_STATE *surfaceState); void setMediaImageArg(void *memory, uint32_t rootDeviceIndex) override; void setMediaSurfaceRotation(void *memory) override; void setSurfaceMemoryObjectControlState(void *memory, uint32_t value) override; void appendSurfaceStateParams(RENDER_SURFACE_STATE *surfaceState, uint32_t rootDeviceIndex, bool useGlobalAtomics); void appendSurfaceStateDepthParams(RENDER_SURFACE_STATE *surfaceState, Gmm *gmm); void appendSurfaceStateExt(void *memory); void transformImage2dArrayTo3d(void *memory) override; void transformImage3dTo2dArray(void *memory) override; static Image *create(Context *context, const MemoryProperties &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *hostPtr, const cl_image_format &imageFormat, const cl_image_desc &imageDesc, bool zeroCopy, MultiGraphicsAllocation multiGraphicsAllocation, bool isObjectRedescribed, uint32_t baseMipLevel, uint32_t mipCount, const ClSurfaceFormatInfo *surfaceFormatInfo, const SurfaceOffsets *surfaceOffsets) { UNRECOVERABLE_IF(surfaceFormatInfo == nullptr); auto memoryStorage = multiGraphicsAllocation.getDefaultGraphicsAllocation()->getUnderlyingBuffer(); return new ImageHw(context, memoryProperties, flags, flagsIntel, size, memoryStorage, hostPtr, imageFormat, imageDesc, zeroCopy, std::move(multiGraphicsAllocation), isObjectRedescribed, baseMipLevel, mipCount, *surfaceFormatInfo, surfaceOffsets); } static int getShaderChannelValue(int inputShaderChannel, cl_channel_order imageChannelOrder) { if (imageChannelOrder == CL_A) { if (inputShaderChannel == RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_RED || inputShaderChannel == RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_GREEN || inputShaderChannel == RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_BLUE) { return RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO; } } else if (imageChannelOrder == CL_R || imageChannelOrder == CL_RA || imageChannelOrder == CL_Rx) { if (inputShaderChannel == RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_GREEN || inputShaderChannel == RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_BLUE) { return RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO; } } else if (imageChannelOrder == CL_RG || imageChannelOrder == CL_RGx) { if (inputShaderChannel == RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_BLUE) { return RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO; } } return inputShaderChannel; } typename RENDER_SURFACE_STATE::SURFACE_TYPE surfaceType; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/mem_obj/image.inl000066400000000000000000000254761422164147700233460ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/command_encoder.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/gmm_helper/resource_info.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/populate_factory.h" #include "opencl/source/helpers/surface_formats.h" #include "opencl/source/mem_obj/image.h" #include "hw_cmds.h" #include "image_ext.inl" namespace NEO { union SURFACE_STATE_BUFFER_LENGTH { uint32_t Length; struct SurfaceState { uint32_t Width : BITFIELD_RANGE(0, 6); uint32_t Height : BITFIELD_RANGE(7, 20); uint32_t Depth : BITFIELD_RANGE(21, 31); } SurfaceState; }; template void ImageHw::setImageArg(void *memory, bool setAsMediaBlockImage, uint32_t mipLevel, uint32_t rootDeviceIndex, bool useGlobalAtomics) { using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT; auto surfaceState = reinterpret_cast(memory); auto graphicsAllocation = multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex); auto gmm = graphicsAllocation->getDefaultGmm(); auto gmmHelper = executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->getGmmHelper(); auto imageDescriptor = Image::convertDescriptor(getImageDesc()); ImageInfo imgInfo; imgInfo.imgDesc = imageDescriptor; imgInfo.qPitch = qPitch; imgInfo.surfaceFormat = &getSurfaceFormatInfo().surfaceFormat; setImageSurfaceState(surfaceState, imgInfo, graphicsAllocation->getDefaultGmm(), *gmmHelper, cubeFaceIndex, graphicsAllocation->getGpuAddress(), surfaceOffsets, isNV12Image(&this->getImageFormat())); if (getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) { // image1d_buffer is image1d created from buffer. The length of buffer could be larger // than the maximal image width. Mock image1d_buffer with SURFACE_TYPE_SURFTYPE_BUFFER. SURFACE_STATE_BUFFER_LENGTH Length = {0}; Length.Length = static_cast(getImageDesc().image_width - 1); surfaceState->setWidth(static_cast(Length.SurfaceState.Width + 1)); surfaceState->setHeight(static_cast(Length.SurfaceState.Height + 1)); surfaceState->setDepth(static_cast(Length.SurfaceState.Depth + 1)); surfaceState->setSurfacePitch(static_cast(getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes)); surfaceState->setSurfaceType(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER); } else { setImageSurfaceStateDimensions(surfaceState, imgInfo, cubeFaceIndex, surfaceType); if (setAsMediaBlockImage) { setWidthForMediaBlockSurfaceState(surfaceState, imgInfo); } } surfaceState->setSurfaceMinLod(this->baseMipLevel + mipLevel); surfaceState->setMipCountLod((this->mipCount > 0) ? (this->mipCount - 1) : 0); setMipTailStartLod(surfaceState, gmm); cl_channel_order imgChannelOrder = getSurfaceFormatInfo().OCLImageFormat.image_channel_order; int shaderChannelValue = ImageHw::getShaderChannelValue(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_RED, imgChannelOrder); surfaceState->setShaderChannelSelectRed(static_cast(shaderChannelValue)); if (imgChannelOrder == CL_LUMINANCE) { surfaceState->setShaderChannelSelectGreen(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_RED); surfaceState->setShaderChannelSelectBlue(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_RED); } else { shaderChannelValue = ImageHw::getShaderChannelValue(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_GREEN, imgChannelOrder); surfaceState->setShaderChannelSelectGreen(static_cast(shaderChannelValue)); shaderChannelValue = ImageHw::getShaderChannelValue(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_BLUE, imgChannelOrder); surfaceState->setShaderChannelSelectBlue(static_cast(shaderChannelValue)); } surfaceState->setNumberOfMultisamples((typename RENDER_SURFACE_STATE::NUMBER_OF_MULTISAMPLES)mcsSurfaceInfo.multisampleCount); if (imageDesc.num_samples > 1) { setAuxParamsForMultisamples(surfaceState); } else if (graphicsAllocation->isCompressionEnabled()) { EncodeSurfaceState::setImageAuxParamsForCCS(surfaceState, gmm); } else { EncodeSurfaceState::disableCompressionFlags(surfaceState); } appendSurfaceStateDepthParams(surfaceState, gmm); EncodeSurfaceState::appendImageCompressionParams(surfaceState, graphicsAllocation, gmmHelper, isImageFromBuffer(), this->plane); appendSurfaceStateParams(surfaceState, rootDeviceIndex, useGlobalAtomics); appendSurfaceStateExt(surfaceState); } template void ImageHw::setAuxParamsForMultisamples(RENDER_SURFACE_STATE *surfaceState) { using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT; if (getMcsAllocation()) { auto mcsGmm = getMcsAllocation()->getDefaultGmm(); if (mcsGmm->unifiedAuxTranslationCapable() && mcsGmm->hasMultisampleControlSurface()) { EncodeSurfaceState::setAuxParamsForMCSCCS(surfaceState); surfaceState->setAuxiliarySurfacePitch(mcsGmm->getUnifiedAuxPitchTiles()); surfaceState->setAuxiliarySurfaceQpitch(mcsGmm->getAuxQPitch()); EncodeSurfaceState::setClearColorParams(surfaceState, mcsGmm); setUnifiedAuxBaseAddress(surfaceState, mcsGmm); } else if (mcsGmm->unifiedAuxTranslationCapable()) { EncodeSurfaceState::setImageAuxParamsForCCS(surfaceState, mcsGmm); } else { surfaceState->setAuxiliarySurfaceMode((typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE)1); surfaceState->setAuxiliarySurfacePitch(mcsSurfaceInfo.pitch); surfaceState->setAuxiliarySurfaceQpitch(mcsSurfaceInfo.qPitch); surfaceState->setAuxiliarySurfaceBaseAddress(mcsAllocation->getGpuAddress()); } } else if (isDepthFormat(imageFormat) && surfaceState->getSurfaceFormat() != SURFACE_FORMAT::SURFACE_FORMAT_R32_FLOAT_X8X24_TYPELESS) { surfaceState->setMultisampledSurfaceStorageFormat(RENDER_SURFACE_STATE::MULTISAMPLED_SURFACE_STORAGE_FORMAT::MULTISAMPLED_SURFACE_STORAGE_FORMAT_DEPTH_STENCIL); } } template void ImageHw::appendSurfaceStateParams(RENDER_SURFACE_STATE *surfaceState, uint32_t rootDeviceIndex, bool useGlobalAtomics) { } template inline void ImageHw::appendSurfaceStateDepthParams(RENDER_SURFACE_STATE *surfaceState, Gmm *gmm) { } template void ImageHw::setMediaImageArg(void *memory, uint32_t rootDeviceIndex) { using MEDIA_SURFACE_STATE = typename GfxFamily::MEDIA_SURFACE_STATE; using SURFACE_FORMAT = typename MEDIA_SURFACE_STATE::SURFACE_FORMAT; SURFACE_FORMAT surfaceFormat = MEDIA_SURFACE_STATE::SURFACE_FORMAT_Y8_UNORM_VA; auto graphicsAllocation = multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex); auto gmmHelper = executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->getGmmHelper(); auto surfaceState = reinterpret_cast(memory); MEDIA_SURFACE_STATE state = GfxFamily::cmdInitMediaSurfaceState; setMediaSurfaceRotation(reinterpret_cast(&state)); DEBUG_BREAK_IF(surfaceFormat == MEDIA_SURFACE_STATE::SURFACE_FORMAT_Y1_UNORM); state.setWidth(static_cast(getImageDesc().image_width)); state.setHeight(static_cast(getImageDesc().image_height)); state.setPictureStructure(MEDIA_SURFACE_STATE::PICTURE_STRUCTURE_FRAME_PICTURE); auto gmm = graphicsAllocation->getDefaultGmm(); auto tileMode = static_cast(gmm->gmmResourceInfo->getTileModeSurfaceState()); state.setTileMode(tileMode); state.setSurfacePitch(static_cast(getImageDesc().image_row_pitch)); state.setSurfaceFormat(surfaceFormat); state.setHalfPitchForChroma(false); state.setInterleaveChroma(false); state.setXOffsetForUCb(0); state.setYOffsetForUCb(0); state.setXOffsetForVCr(0); state.setYOffsetForVCr(0); setSurfaceMemoryObjectControlState( reinterpret_cast(&state), gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_IMAGE)); if (isNV12Image(&this->getImageFormat())) { state.setInterleaveChroma(true); state.setYOffsetForUCb(this->surfaceOffsets.yOffsetForUVplane); } state.setVerticalLineStride(0); state.setVerticalLineStrideOffset(0); state.setSurfaceBaseAddress(graphicsAllocation->getGpuAddress() + this->surfaceOffsets.offset); *surfaceState = state; } template void ImageHw::transformImage2dArrayTo3d(void *memory) { DEBUG_BREAK_IF(imageDesc.image_type != CL_MEM_OBJECT_IMAGE3D); using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; auto surfaceState = reinterpret_cast(memory); surfaceState->setSurfaceType(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_3D); surfaceState->setSurfaceArray(false); } template void ImageHw::transformImage3dTo2dArray(void *memory) { DEBUG_BREAK_IF(imageDesc.image_type != CL_MEM_OBJECT_IMAGE3D); using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; auto surfaceState = reinterpret_cast(memory); surfaceState->setSurfaceType(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_2D); surfaceState->setSurfaceArray(true); } template inline void ImageHw::setMediaSurfaceRotation(void *memory) { using MEDIA_SURFACE_STATE = typename GfxFamily::MEDIA_SURFACE_STATE; using SURFACE_FORMAT = typename MEDIA_SURFACE_STATE::SURFACE_FORMAT; auto surfaceState = reinterpret_cast(memory); surfaceState->setRotation(MEDIA_SURFACE_STATE::ROTATION_NO_ROTATION_OR_0_DEGREE); surfaceState->setXOffset(0); surfaceState->setYOffset(0); } template inline void ImageHw::setSurfaceMemoryObjectControlState(void *memory, uint32_t value) { using MEDIA_SURFACE_STATE = typename GfxFamily::MEDIA_SURFACE_STATE; using SURFACE_FORMAT = typename MEDIA_SURFACE_STATE::SURFACE_FORMAT; auto surfaceState = reinterpret_cast(memory); surfaceState->setSurfaceMemoryObjectControlState(value); } } // namespace NEO compute-runtime-22.14.22890/opencl/source/mem_obj/image_factory_init.inl000066400000000000000000000005371422164147700261070ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ namespace NEO { template class ImageHw; template <> void populateFactoryTable>() { extern ImageFactoryFuncs imageFactory[IGFX_MAX_CORE]; imageFactory[gfxCore].createImageFunction = ImageHw::create; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/mem_obj/image_tgllp_and_later.inl000066400000000000000000000010471422164147700265450ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/resource_info.h" #include "opencl/source/mem_obj/image.h" namespace NEO { template <> void ImageHw::appendSurfaceStateDepthParams(RENDER_SURFACE_STATE *surfaceState, Gmm *gmm) { if (gmm) { const bool isDepthResource = gmm->gmmResourceInfo->getResourceFlags()->Gpu.Depth; surfaceState->setDepthStencilResource(isDepthResource); } } } // namespace NEOcompute-runtime-22.14.22890/opencl/source/mem_obj/map_operations_handler.cpp000066400000000000000000000066551422164147700267770ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/mem_obj/map_operations_handler.h" #include "shared/source/helpers/ptr_math.h" using namespace NEO; size_t MapOperationsHandler::size() const { std::lock_guard lock(mtx); return mappedPointers.size(); } bool MapOperationsHandler::add(void *ptr, size_t ptrLength, cl_map_flags &mapFlags, MemObjSizeArray &size, MemObjOffsetArray &offset, uint32_t mipLevel, GraphicsAllocation *graphicsAllocation) { std::lock_guard lock(mtx); MapInfo mapInfo(ptr, ptrLength, size, offset, mipLevel); mapInfo.readOnly = (mapFlags == CL_MAP_READ); mapInfo.graphicsAllocation = graphicsAllocation; if (isOverlapping(mapInfo)) { return false; } mappedPointers.push_back(mapInfo); return true; } bool MapOperationsHandler::isOverlapping(MapInfo &inputMapInfo) { if (inputMapInfo.readOnly) { return false; } auto inputStartPtr = inputMapInfo.ptr; auto inputEndPtr = ptrOffset(inputStartPtr, inputMapInfo.ptrLength); for (auto &mapInfo : mappedPointers) { auto mappedStartPtr = mapInfo.ptr; auto mappedEndPtr = ptrOffset(mappedStartPtr, mapInfo.ptrLength); // Requested ptr starts before or inside existing ptr range and overlapping end if (inputStartPtr < mappedEndPtr && inputEndPtr >= mappedStartPtr) { return true; } } return false; } bool MapOperationsHandler::find(void *mappedPtr, MapInfo &outMapInfo) { std::lock_guard lock(mtx); for (auto &mapInfo : mappedPointers) { if (mapInfo.ptr == mappedPtr) { outMapInfo = mapInfo; return true; } } return false; } bool NEO::MapOperationsHandler::findInfoForHostPtr(const void *ptr, size_t size, MapInfo &outMapInfo) { std::lock_guard lock(mtx); for (auto &mapInfo : mappedPointers) { void *ptrStart = mapInfo.ptr; void *ptrEnd = ptrOffset(mapInfo.ptr, mapInfo.ptrLength); if (ptrStart <= ptr && ptrOffset(ptr, size) <= ptrEnd) { outMapInfo = mapInfo; return true; } } return false; } void MapOperationsHandler::remove(void *mappedPtr) { std::lock_guard lock(mtx); auto endIter = mappedPointers.end(); for (auto it = mappedPointers.begin(); it != endIter; it++) { if (it->ptr == mappedPtr) { std::iter_swap(it, mappedPointers.end() - 1); mappedPointers.pop_back(); break; } } } MapOperationsHandler &NEO::MapOperationsStorage::getHandler(cl_mem memObj) { std::lock_guard lock(mutex); return handlers[memObj]; } MapOperationsHandler *NEO::MapOperationsStorage::getHandlerIfExists(cl_mem memObj) { std::lock_guard lock(mutex); auto iterator = handlers.find(memObj); if (iterator == handlers.end()) { return nullptr; } return &iterator->second; } bool NEO::MapOperationsStorage::getInfoForHostPtr(const void *ptr, size_t size, MapInfo &outInfo) { for (auto &entry : handlers) { if (entry.second.findInfoForHostPtr(ptr, size, outInfo)) { return true; } } return false; } void NEO::MapOperationsStorage::removeHandler(cl_mem memObj) { std::lock_guard lock(mutex); auto iterator = handlers.find(memObj); handlers.erase(iterator); } compute-runtime-22.14.22890/opencl/source/mem_obj/map_operations_handler.h000066400000000000000000000023471422164147700264360ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/helpers/properties_helper.h" #include #include #include namespace NEO { class MapOperationsHandler { public: virtual ~MapOperationsHandler() = default; bool add(void *ptr, size_t ptrLength, cl_map_flags &mapFlags, MemObjSizeArray &size, MemObjOffsetArray &offset, uint32_t mipLevel, GraphicsAllocation *graphicsAllocation); void remove(void *mappedPtr); bool find(void *mappedPtr, MapInfo &outMapInfo); bool findInfoForHostPtr(const void *ptr, size_t size, MapInfo &outMapInfo); size_t size() const; protected: bool isOverlapping(MapInfo &inputMapInfo); std::vector mappedPointers; mutable std::mutex mtx; }; class MapOperationsStorage { public: using HandlersMap = std::unordered_map; MapOperationsHandler &getHandler(cl_mem memObj); MapOperationsHandler *getHandlerIfExists(cl_mem memObj); bool getInfoForHostPtr(const void *ptr, size_t size, MapInfo &outInfo); void removeHandler(cl_mem memObj); protected: std::mutex mutex; HandlersMap handlers{}; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/mem_obj/mem_obj.cpp000066400000000000000000000373661422164147700236750ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/mem_obj/mem_obj.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/resource_info.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/bit_helpers.h" #include "shared/source/helpers/get_info.h" #include "shared/source/memory_manager/deferred_deleter.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/os_context.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/get_info_status_mapper.h" #include namespace NEO { MemObj::MemObj(Context *context, cl_mem_object_type memObjectType, const MemoryProperties &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *memoryStorage, void *hostPtr, MultiGraphicsAllocation multiGraphicsAllocation, bool zeroCopy, bool isHostPtrSVM, bool isObjectRedescribed) : context(context), memObjectType(memObjectType), memoryProperties(memoryProperties), flags(flags), flagsIntel(flagsIntel), size(size), memoryStorage(memoryStorage), hostPtr(hostPtr), isZeroCopy(zeroCopy), isHostPtrSVM(isHostPtrSVM), isObjectRedescribed(isObjectRedescribed), multiGraphicsAllocation(std::move(multiGraphicsAllocation)), mapAllocations(static_cast(this->multiGraphicsAllocation.getGraphicsAllocations().size() - 1)) { if (context) { context->incRefInternal(); memoryManager = context->getMemoryManager(); auto device = context->getDevice(0); executionEnvironment = device->getExecutionEnvironment(); } } MemObj::~MemObj() { if (!context) { return; } bool needWait = false; if (allocatedMapPtr != nullptr) { needWait = true; } if (auto mapOperationsHandler = getMapOperationsHandlerIfExists(); mapOperationsHandler != nullptr) { if (mapOperationsHandler->size() > 0 && !getCpuAddressForMapping()) { needWait = true; } context->getMapOperationsStorage().removeHandler(this); } if (!destructorCallbacks.empty()) { needWait = true; } if (!isObjectRedescribed) { if (peekSharingHandler()) { peekSharingHandler()->releaseReusedGraphicsAllocation(); } for (auto graphicsAllocation : multiGraphicsAllocation.getGraphicsAllocations()) { auto rootDeviceIndex = graphicsAllocation ? graphicsAllocation->getRootDeviceIndex() : 0; bool doAsyncDestructions = DebugManager.flags.EnableAsyncDestroyAllocations.get(); if (graphicsAllocation && !associatedMemObject && !isHostPtrSVM && graphicsAllocation->peekReuseCount() == 0) { memoryManager->removeAllocationFromHostPtrManager(graphicsAllocation); if (!doAsyncDestructions) { needWait = true; } if (needWait && graphicsAllocation->isUsed()) { memoryManager->waitForEnginesCompletion(*graphicsAllocation); } destroyGraphicsAllocation(graphicsAllocation, doAsyncDestructions); graphicsAllocation = nullptr; } if (!associatedMemObject) { releaseMapAllocation(rootDeviceIndex, doAsyncDestructions); } if (mcsAllocation) { destroyGraphicsAllocation(mcsAllocation, false); } if (graphicsAllocation && associatedMemObject) { if (associatedMemObject->getGraphicsAllocation(graphicsAllocation->getRootDeviceIndex()) != graphicsAllocation) { destroyGraphicsAllocation(graphicsAllocation, false); } } } if (associatedMemObject) { associatedMemObject->decRefInternal(); } if (!associatedMemObject) { releaseAllocatedMapPtr(); } } destructorCallbacks.invoke(this); context->decRefInternal(); } cl_int MemObj::getMemObjectInfo(cl_mem_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { cl_int retVal; size_t srcParamSize = GetInfo::invalidSourceSize; void *srcParam = nullptr; cl_bool usesSVMPointer; cl_uint refCnt = 0; cl_uint mapCount = 0; cl_mem clAssociatedMemObject = static_cast(this->associatedMemObject); cl_context ctx = nullptr; uint64_t internalHandle = 0llu; auto allocation = getMultiGraphicsAllocation().getDefaultGraphicsAllocation(); cl_bool usesCompression; switch (paramName) { case CL_MEM_TYPE: srcParamSize = sizeof(memObjectType); srcParam = &memObjectType; break; case CL_MEM_FLAGS: srcParamSize = sizeof(flags); srcParam = &flags; break; case CL_MEM_SIZE: srcParamSize = sizeof(size); srcParam = &size; break; case CL_MEM_HOST_PTR: srcParamSize = sizeof(hostPtr); srcParam = &hostPtr; break; case CL_MEM_CONTEXT: srcParamSize = sizeof(context); ctx = context; srcParam = &ctx; break; case CL_MEM_USES_SVM_POINTER: usesSVMPointer = isHostPtrSVM && isValueSet(flags, CL_MEM_USE_HOST_PTR); srcParamSize = sizeof(cl_bool); srcParam = &usesSVMPointer; break; case CL_MEM_OFFSET: srcParamSize = sizeof(offset); srcParam = &offset; break; case CL_MEM_ASSOCIATED_MEMOBJECT: srcParamSize = sizeof(clAssociatedMemObject); srcParam = &clAssociatedMemObject; break; case CL_MEM_MAP_COUNT: srcParamSize = sizeof(mapCount); mapCount = static_cast(getMapOperationsHandler().size()); srcParam = &mapCount; break; case CL_MEM_REFERENCE_COUNT: refCnt = static_cast(this->getReference()); srcParamSize = sizeof(refCnt); srcParam = &refCnt; break; case CL_MEM_ALLOCATION_HANDLE_INTEL: internalHandle = multiGraphicsAllocation.getDefaultGraphicsAllocation()->peekInternalHandle(this->memoryManager); srcParamSize = sizeof(internalHandle); srcParam = &internalHandle; break; case CL_MEM_USES_COMPRESSION_INTEL: usesCompression = allocation->isCompressionEnabled(); srcParam = &usesCompression; srcParamSize = sizeof(cl_bool); break; case CL_MEM_PROPERTIES: srcParamSize = propertiesVector.size() * sizeof(cl_mem_properties); srcParam = propertiesVector.data(); break; default: getOsSpecificMemObjectInfo(paramName, &srcParamSize, &srcParam); break; } auto getInfoStatus = GetInfo::getInfo(paramValue, paramValueSize, srcParam, srcParamSize); retVal = changeGetInfoStatusToCLResultType(getInfoStatus); GetInfo::setParamValueReturnSize(paramValueSizeRet, srcParamSize, getInfoStatus); return retVal; } cl_int MemObj::setDestructorCallback(void(CL_CALLBACK *funcNotify)(cl_mem, void *), void *userData) { std::unique_lock theLock(mtx); destructorCallbacks.add(funcNotify, userData); return CL_SUCCESS; } void *MemObj::getCpuAddress() const { return memoryStorage; } void *MemObj::getHostPtr() const { return hostPtr; } size_t MemObj::getSize() const { return size; } void MemObj::setAllocatedMapPtr(void *allocatedMapPtr) { this->allocatedMapPtr = allocatedMapPtr; } bool MemObj::isMemObjZeroCopy() const { return isZeroCopy; } bool MemObj::isMemObjWithHostPtrSVM() const { return isHostPtrSVM; } bool MemObj::isMemObjUncacheable() const { return isValueSet(flagsIntel, CL_MEM_LOCALLY_UNCACHED_RESOURCE); } bool MemObj::isMemObjUncacheableForSurfaceState() const { return isAnyBitSet(flagsIntel, CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE | CL_MEM_LOCALLY_UNCACHED_RESOURCE); } GraphicsAllocation *MemObj::getGraphicsAllocation(uint32_t rootDeviceIndex) const { return multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex); } void MemObj::checkUsageAndReleaseOldAllocation(uint32_t rootDeviceIndex) { auto graphicsAllocation = getGraphicsAllocation(rootDeviceIndex); if (graphicsAllocation != nullptr && (peekSharingHandler() == nullptr || graphicsAllocation->peekReuseCount() == 0)) { memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(graphicsAllocation); } } void MemObj::resetGraphicsAllocation(GraphicsAllocation *newGraphicsAllocation) { TakeOwnershipWrapper lock(*this); checkUsageAndReleaseOldAllocation(newGraphicsAllocation->getRootDeviceIndex()); multiGraphicsAllocation.addAllocation(newGraphicsAllocation); } void MemObj::removeGraphicsAllocation(uint32_t rootDeviceIndex) { TakeOwnershipWrapper lock(*this); checkUsageAndReleaseOldAllocation(rootDeviceIndex); multiGraphicsAllocation.removeAllocation(rootDeviceIndex); } bool MemObj::readMemObjFlagsInvalid() { return isValueSet(flags, CL_MEM_HOST_WRITE_ONLY) || isValueSet(flags, CL_MEM_HOST_NO_ACCESS); } bool MemObj::writeMemObjFlagsInvalid() { return isValueSet(flags, CL_MEM_HOST_READ_ONLY) || isValueSet(flags, CL_MEM_HOST_NO_ACCESS); } bool MemObj::mapMemObjFlagsInvalid(cl_map_flags mapFlags) { return (writeMemObjFlagsInvalid() && (mapFlags & CL_MAP_WRITE)) || (readMemObjFlagsInvalid() && (mapFlags & CL_MAP_READ)); } void MemObj::setHostPtrMinSize(size_t size) { hostPtrMinSize = size; } void *MemObj::getCpuAddressForMapping() { void *ptrToReturn = nullptr; if (isValueSet(flags, CL_MEM_USE_HOST_PTR)) { ptrToReturn = this->hostPtr; } else { ptrToReturn = this->memoryStorage; } return ptrToReturn; } void *MemObj::getCpuAddressForMemoryTransfer() { void *ptrToReturn = nullptr; if (isValueSet(flags, CL_MEM_USE_HOST_PTR) && this->isMemObjZeroCopy()) { ptrToReturn = this->hostPtr; } else { ptrToReturn = this->memoryStorage; } return ptrToReturn; } void MemObj::releaseAllocatedMapPtr() { if (allocatedMapPtr) { DEBUG_BREAK_IF(isValueSet(flags, CL_MEM_USE_HOST_PTR)); memoryManager->freeSystemMemory(allocatedMapPtr); } allocatedMapPtr = nullptr; } void MemObj::releaseMapAllocation(uint32_t rootDeviceIndex, bool asyncDestroy) { auto mapAllocation = mapAllocations.getGraphicsAllocation(rootDeviceIndex); if (mapAllocation && !isHostPtrSVM) { if (asyncDestroy && !isValueSet(flags, CL_MEM_USE_HOST_PTR)) { destroyGraphicsAllocation(mapAllocation, true); } else { if (mapAllocation->isUsed()) { memoryManager->waitForEnginesCompletion(*mapAllocation); } destroyGraphicsAllocation(mapAllocation, false); } } } void MemObj::destroyGraphicsAllocation(GraphicsAllocation *allocation, bool asyncDestroy) { if (asyncDestroy) { memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(allocation); } else { memoryManager->freeGraphicsMemory(allocation); } } bool MemObj::checkIfMemoryTransferIsRequired(size_t offsetInMemObject, size_t offsetInHostPtr, const void *hostPtr, cl_command_type cmdType) { auto bufferStorage = ptrOffset(this->getCpuAddressForMemoryTransfer(), offsetInMemObject); auto hostStorage = ptrOffset(hostPtr, offsetInHostPtr); auto isMemTransferNeeded = !((bufferStorage == hostStorage) && (cmdType == CL_COMMAND_WRITE_BUFFER || cmdType == CL_COMMAND_READ_BUFFER || cmdType == CL_COMMAND_WRITE_BUFFER_RECT || cmdType == CL_COMMAND_READ_BUFFER_RECT || cmdType == CL_COMMAND_WRITE_IMAGE || cmdType == CL_COMMAND_READ_IMAGE)); return isMemTransferNeeded; } void *MemObj::getBasePtrForMap(uint32_t rootDeviceIndex) { if (associatedMemObject) { return associatedMemObject->getBasePtrForMap(rootDeviceIndex); } if (getFlags() & CL_MEM_USE_HOST_PTR) { return getHostPtr(); } else { TakeOwnershipWrapper memObjOwnership(*this); if (getMapAllocation(rootDeviceIndex)) { return getMapAllocation(rootDeviceIndex)->getUnderlyingBuffer(); } else { auto memory = getAllocatedMapPtr(); if (!memory) { memory = memoryManager->allocateSystemMemory(getSize(), MemoryConstants::pageSize); setAllocatedMapPtr(memory); } AllocationProperties properties{rootDeviceIndex, false, // allocateMemory getSize(), AllocationType::MAP_ALLOCATION, false, //isMultiStorageAllocation context->getDeviceBitfieldForAllocation(rootDeviceIndex)}; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties, memory); setMapAllocation(allocation); return getAllocatedMapPtr(); } } } MapOperationsHandler &MemObj::getMapOperationsHandler() { return context->getMapOperationsStorage().getHandler(this); } MapOperationsHandler *MemObj::getMapOperationsHandlerIfExists() { return context->getMapOperationsStorage().getHandlerIfExists(this); } bool MemObj::addMappedPtr(void *ptr, size_t ptrLength, cl_map_flags &mapFlags, MemObjSizeArray &size, MemObjOffsetArray &offset, uint32_t mipLevel, GraphicsAllocation *graphicsAllocation) { return getMapOperationsHandler().add(ptr, ptrLength, mapFlags, size, offset, mipLevel, graphicsAllocation); } bool MemObj::findMappedPtr(void *mappedPtr, MapInfo &outMapInfo) { return getMapOperationsHandler().find(mappedPtr, outMapInfo); } void MemObj::removeMappedPtr(void *mappedPtr) { getMapOperationsHandler().remove(mappedPtr); } bool MemObj::isTiledAllocation() const { auto graphicsAllocation = multiGraphicsAllocation.getDefaultGraphicsAllocation(); auto gmm = graphicsAllocation->getDefaultGmm(); return gmm && (gmm->gmmResourceInfo->getTileModeSurfaceState() != 0); } bool MemObj::mappingOnCpuAllowed() const { auto graphicsAllocation = multiGraphicsAllocation.getDefaultGraphicsAllocation(); return !isTiledAllocation() && !peekSharingHandler() && !isMipMapped(this) && !DebugManager.flags.DisableZeroCopyForBuffers.get() && !graphicsAllocation->isCompressionEnabled() && MemoryPool::isSystemMemoryPool(graphicsAllocation->getMemoryPool()); } void MemObj::storeProperties(const cl_mem_properties *properties) { if (properties) { for (size_t i = 0; properties[i] != 0; i += 2) { propertiesVector.push_back(properties[i]); propertiesVector.push_back(properties[i + 1]); } propertiesVector.push_back(0); } } void MemObj::cleanAllGraphicsAllocations(Context &context, MemoryManager &memoryManager, AllocationInfoType &allocationInfo, bool isParentObject) { if (!isParentObject) { for (auto &index : context.getRootDeviceIndices()) { if (allocationInfo[index].memory) { memoryManager.removeAllocationFromHostPtrManager(allocationInfo[index].memory); memoryManager.freeGraphicsMemory(allocationInfo[index].memory); } } } } } // namespace NEO compute-runtime-22.14.22890/opencl/source/mem_obj/mem_obj.h000066400000000000000000000166471422164147700233410ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/memory_manager/multi_graphics_allocation.h" #include "opencl/extensions/public/cl_ext_private.h" #include "opencl/source/api/cl_types.h" #include "opencl/source/helpers/base_object.h" #include "opencl/source/helpers/destructor_callbacks.h" #include "opencl/source/helpers/mipmap.h" #include "opencl/source/mem_obj/map_operations_handler.h" #include "opencl/source/sharings/sharing.h" #include "memory_properties_flags.h" #include #include #include #include namespace NEO { class ExecutionEnvironment; class GraphicsAllocation; struct KernelInfo; class MemoryManager; class Context; template <> struct OpenCLObjectMapper<_cl_mem> { typedef class MemObj DerivedType; }; namespace CreateMemObj { struct AllocationInfo { GraphicsAllocation *mapAllocation = nullptr; GraphicsAllocation *memory = nullptr; AllocationType allocationType = AllocationType::UNKNOWN; bool zeroCopyAllowed = true; bool isHostPtrSVM = false; bool alignementSatisfied = true; bool allocateMemory = true; bool copyMemoryFromHostPtr = false; bool transferNeeded = false; }; } // namespace CreateMemObj using AllocationInfoType = StackVec; class MemObj : public BaseObject<_cl_mem> { public: constexpr static cl_ulong maskMagic = 0xFFFFFFFFFFFFFF00LL; constexpr static cl_ulong objectMagic = 0xAB2212340CACDD00LL; MemObj(Context *context, cl_mem_object_type memObjectType, const MemoryProperties &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *memoryStorage, void *hostPtr, MultiGraphicsAllocation multiGraphicsAllocation, bool zeroCopy, bool isHostPtrSVM, bool isObjectRedescrbied); ~MemObj() override; cl_int getMemObjectInfo(cl_mem_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); cl_int setDestructorCallback(void(CL_CALLBACK *funcNotify)(cl_mem, void *), void *userData); void *getCpuAddress() const; void *getHostPtr() const; bool getIsObjectRedescribed() const { return isObjectRedescribed; }; size_t getSize() const; MapOperationsHandler &getMapOperationsHandler(); MapOperationsHandler *getMapOperationsHandlerIfExists(); bool addMappedPtr(void *ptr, size_t ptrLength, cl_map_flags &mapFlags, MemObjSizeArray &size, MemObjOffsetArray &offset, uint32_t mipLevel, GraphicsAllocation *graphicsAllocation); bool findMappedPtr(void *mappedPtr, MapInfo &outMapInfo); void removeMappedPtr(void *mappedPtr); void *getBasePtrForMap(uint32_t rootDeviceIndex); MOCKABLE_VIRTUAL void setAllocatedMapPtr(void *allocatedMapPtr); void *getAllocatedMapPtr() const { return allocatedMapPtr; } void setHostPtrMinSize(size_t size); void releaseAllocatedMapPtr(); void releaseMapAllocation(uint32_t rootDeviceIndex, bool asyncDestroy); bool isMemObjZeroCopy() const; bool isMemObjWithHostPtrSVM() const; bool isMemObjUncacheable() const; bool isMemObjUncacheableForSurfaceState() const; virtual void transferDataToHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) { UNRECOVERABLE_IF(true); }; virtual void transferDataFromHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) { UNRECOVERABLE_IF(true); }; GraphicsAllocation *getGraphicsAllocation(uint32_t rootDeviceIndex) const; void resetGraphicsAllocation(GraphicsAllocation *newGraphicsAllocation); void removeGraphicsAllocation(uint32_t rootDeviceIndex); GraphicsAllocation *getMcsAllocation() { return mcsAllocation; } void setMcsAllocation(GraphicsAllocation *alloc) { mcsAllocation = alloc; } bool readMemObjFlagsInvalid(); bool writeMemObjFlagsInvalid(); bool mapMemObjFlagsInvalid(cl_map_flags mapFlags); MOCKABLE_VIRTUAL bool isTiledAllocation() const; void *getCpuAddressForMapping(); void *getCpuAddressForMemoryTransfer(); std::shared_ptr &getSharingHandler() { return sharingHandler; } SharingHandler *peekSharingHandler() const { return sharingHandler.get(); } void setSharingHandler(SharingHandler *sharingHandler) { this->sharingHandler.reset(sharingHandler); } void setParentSharingHandler(std::shared_ptr &handler) { sharingHandler = handler; } unsigned int acquireCount = 0; Context *getContext() const { return context; } void destroyGraphicsAllocation(GraphicsAllocation *allocation, bool asyncDestroy); bool checkIfMemoryTransferIsRequired(size_t offsetInMemObject, size_t offsetInHostPtr, const void *ptr, cl_command_type cmdType); bool mappingOnCpuAllowed() const; virtual size_t calculateOffsetForMapping(const MemObjOffsetArray &offset) const { return offset[0]; } size_t calculateMappedPtrLength(const MemObjSizeArray &size) const { return calculateOffsetForMapping(size); } cl_mem_object_type peekClMemObjType() const { return memObjectType; } size_t getOffset() const { return offset; } MemoryManager *getMemoryManager() const { return memoryManager; } void setMapAllocation(GraphicsAllocation *allocation) { mapAllocations.addAllocation(allocation); } GraphicsAllocation *getMapAllocation(uint32_t rootDeviceIndex) const { if (associatedMemObject) { return associatedMemObject->getMapAllocation(rootDeviceIndex); } return mapAllocations.getGraphicsAllocation(rootDeviceIndex); } const cl_mem_flags &getFlags() const { return flags; } const cl_mem_flags &getFlagsIntel() const { return flagsIntel; } const MultiGraphicsAllocation &getMultiGraphicsAllocation() const { return multiGraphicsAllocation; } static void cleanAllGraphicsAllocations(Context &context, MemoryManager &memoryManager, AllocationInfoType &allocationInfo, bool isParentObject); MemObj *getHighestRootMemObj() { if (!associatedMemObject) { return this; } return associatedMemObject->getHighestRootMemObj(); } protected: void getOsSpecificMemObjectInfo(const cl_mem_info ¶mName, size_t *srcParamSize, void **srcParam); void storeProperties(const cl_mem_properties *properties); void checkUsageAndReleaseOldAllocation(uint32_t rootDeviceIndex); Context *context; cl_mem_object_type memObjectType; MemoryProperties memoryProperties; cl_mem_flags flags = 0; cl_mem_flags_intel flagsIntel = 0; size_t size; size_t hostPtrMinSize = 0; void *memoryStorage; void *hostPtr; void *allocatedMapPtr = nullptr; size_t offset = 0; MemObj *associatedMemObject = nullptr; cl_uint refCount = 0; ExecutionEnvironment *executionEnvironment = nullptr; bool isZeroCopy; bool isHostPtrSVM; bool isObjectRedescribed; MemoryManager *memoryManager = nullptr; MultiGraphicsAllocation multiGraphicsAllocation; GraphicsAllocation *mcsAllocation = nullptr; MultiGraphicsAllocation mapAllocations; std::shared_ptr sharingHandler; std::vector propertiesVector; MemObjDestructorCallbacks destructorCallbacks; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/mem_obj/mem_obj_helper.cpp000066400000000000000000000226161422164147700252240ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/mem_obj/mem_obj_helper.h" #include "shared/source/helpers/memory_properties_helpers.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/cl_hw_helper.h" namespace NEO { bool MemObjHelper::validateMemoryPropertiesForBuffer(const MemoryProperties &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, const Context &context) { /* Check all the invalid flags combination. */ if ((isValueSet(flags, CL_MEM_READ_WRITE | CL_MEM_READ_ONLY)) || (isValueSet(flags, CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY)) || (isValueSet(flags, CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY)) || (isValueSet(flags, CL_MEM_ALLOC_HOST_PTR | CL_MEM_USE_HOST_PTR)) || (isValueSet(flags, CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR)) || (isValueSet(flags, CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)) || (isValueSet(flags, CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY)) || (isValueSet(flags, CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS))) { return false; } return validateExtraMemoryProperties(memoryProperties, flags, flagsIntel, context); } bool MemObjHelper::validateMemoryPropertiesForImage(const MemoryProperties &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, cl_mem parent, const Context &context) { /* Check all the invalid flags combination. */ if ((!isValueSet(flags, CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL)) && (isValueSet(flags, CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY) || isValueSet(flags, CL_MEM_READ_WRITE | CL_MEM_READ_ONLY) || isValueSet(flags, CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY) || isValueSet(flags, CL_MEM_ALLOC_HOST_PTR | CL_MEM_USE_HOST_PTR) || isValueSet(flags, CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR) || isValueSet(flags, CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY) || isValueSet(flags, CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS) || isValueSet(flags, CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS) || isValueSet(flags, CL_MEM_NO_ACCESS_INTEL | CL_MEM_READ_WRITE) || isValueSet(flags, CL_MEM_NO_ACCESS_INTEL | CL_MEM_WRITE_ONLY) || isValueSet(flags, CL_MEM_NO_ACCESS_INTEL | CL_MEM_READ_ONLY))) { return false; } auto parentMemObj = castToObject(parent); if (parentMemObj != nullptr && flags) { auto parentFlags = parentMemObj->getFlags(); /* Check whether flags are compatible with parent. */ if (isValueSet(flags, CL_MEM_ALLOC_HOST_PTR) || isValueSet(flags, CL_MEM_COPY_HOST_PTR) || isValueSet(flags, CL_MEM_USE_HOST_PTR) || ((!isValueSet(parentFlags, CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL)) && (!isValueSet(flags, CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL)) && ((isValueSet(parentFlags, CL_MEM_WRITE_ONLY) && isValueSet(flags, CL_MEM_READ_WRITE)) || (isValueSet(parentFlags, CL_MEM_WRITE_ONLY) && isValueSet(flags, CL_MEM_READ_ONLY)) || (isValueSet(parentFlags, CL_MEM_READ_ONLY) && isValueSet(flags, CL_MEM_READ_WRITE)) || (isValueSet(parentFlags, CL_MEM_READ_ONLY) && isValueSet(flags, CL_MEM_WRITE_ONLY)) || (isValueSet(parentFlags, CL_MEM_NO_ACCESS_INTEL) && isValueSet(flags, CL_MEM_READ_WRITE)) || (isValueSet(parentFlags, CL_MEM_NO_ACCESS_INTEL) && isValueSet(flags, CL_MEM_WRITE_ONLY)) || (isValueSet(parentFlags, CL_MEM_NO_ACCESS_INTEL) && isValueSet(flags, CL_MEM_READ_ONLY)) || (isValueSet(parentFlags, CL_MEM_HOST_NO_ACCESS) && isValueSet(flags, CL_MEM_HOST_WRITE_ONLY)) || (isValueSet(parentFlags, CL_MEM_HOST_NO_ACCESS) && isValueSet(flags, CL_MEM_HOST_READ_ONLY))))) { return false; } } return validateExtraMemoryProperties(memoryProperties, flags, flagsIntel, context); } AllocationProperties MemObjHelper::getAllocationPropertiesWithImageInfo( uint32_t rootDeviceIndex, ImageInfo &imgInfo, bool allocateMemory, const MemoryProperties &memoryProperties, const HardwareInfo &hwInfo, DeviceBitfield subDevicesBitfieldParam, bool deviceOnlyVisibilty) { auto deviceBitfield = MemoryPropertiesHelper::adjustDeviceBitfield(rootDeviceIndex, memoryProperties, subDevicesBitfieldParam); AllocationProperties allocationProperties{rootDeviceIndex, allocateMemory, imgInfo, AllocationType::IMAGE, deviceBitfield}; MemoryPropertiesHelper::fillPoliciesInProperties(allocationProperties, memoryProperties, hwInfo, deviceOnlyVisibilty); return allocationProperties; } bool MemObjHelper::checkMemFlagsForSubBuffer(cl_mem_flags flags) { const cl_mem_flags allValidFlags = CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS; return isFieldValid(flags, allValidFlags); } SVMAllocsManager::SvmAllocationProperties MemObjHelper::getSvmAllocationProperties(cl_mem_flags flags) { SVMAllocsManager::SvmAllocationProperties svmProperties; svmProperties.coherent = isValueSet(flags, CL_MEM_SVM_FINE_GRAIN_BUFFER); svmProperties.hostPtrReadOnly = isValueSet(flags, CL_MEM_HOST_READ_ONLY) || isValueSet(flags, CL_MEM_HOST_NO_ACCESS); svmProperties.readOnly = isValueSet(flags, CL_MEM_READ_ONLY); return svmProperties; } bool MemObjHelper::isSuitableForCompression(bool compressionSupported, const MemoryProperties &properties, Context &context, bool preferCompression) { if (!compressionSupported) { return false; } if (context.getRootDeviceIndices().size() > 1u) { return false; } for (auto &pClDevice : context.getDevices()) { auto rootDeviceIndex = pClDevice->getRootDeviceIndex(); auto &hwInfo = pClDevice->getHardwareInfo(); auto &clHwHelper = ClHwHelper::get(hwInfo.platform.eRenderCoreFamily); if (!clHwHelper.allowCompressionForContext(*pClDevice, context)) { return false; } if (context.containsMultipleSubDevices(rootDeviceIndex)) { if (DebugManager.flags.EnableMultiTileCompression.get() <= 0) { return false; } //for unrestrictive and default context, turn on compression only for read only surfaces with no host access. bool isContextSpecialized = (context.peekContextType() == ContextType::CONTEXT_TYPE_SPECIALIZED); bool isReadOnlyAndHostNoAccess = (properties.flags.readOnly && properties.flags.hostNoAccess); if (!isContextSpecialized && !isReadOnlyAndHostNoAccess) { return false; } } } if (preferCompression) { if (properties.flags.uncompressedHint) { return false; } if (properties.flags.compressedHint) { return true; } int32_t disableCompression = DebugManager.flags.ToggleHintKernelDisableCompression.get(); if (disableCompression != -1) { return !!disableCompression; } else { if (context.getResolvesRequiredInKernels()) { return false; } } return true; } return properties.flags.compressedHint; } bool MemObjHelper::validateExtraMemoryProperties(const MemoryProperties &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, const Context &context) { bool compressedFlagSet = isValueSet(flags, CL_MEM_COMPRESSED_HINT_INTEL) || isValueSet(flagsIntel, CL_MEM_COMPRESSED_HINT_INTEL); bool uncompressedFlagSet = isValueSet(flags, CL_MEM_UNCOMPRESSED_HINT_INTEL) || isValueSet(flagsIntel, CL_MEM_UNCOMPRESSED_HINT_INTEL); if (compressedFlagSet && uncompressedFlagSet) { return false; } auto pClDevice = memoryProperties.pDevice->getSpecializedDevice(); auto &contextRootDeviceIndices = context.getRootDeviceIndices(); bool isRootDeviceAssociated = (contextRootDeviceIndices.find(pClDevice->getRootDeviceIndex()) != contextRootDeviceIndices.end()); return isRootDeviceAssociated; } const uint64_t MemObjHelper::commonFlags = CL_MEM_COMPRESSED_HINT_INTEL | CL_MEM_UNCOMPRESSED_HINT_INTEL | CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS; const uint64_t MemObjHelper::commonFlagsIntel = CL_MEM_COMPRESSED_HINT_INTEL | CL_MEM_UNCOMPRESSED_HINT_INTEL | CL_MEM_LOCALLY_UNCACHED_RESOURCE | CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE | CL_MEM_48BIT_RESOURCE_INTEL; const uint64_t MemObjHelper::validFlagsForBuffer = commonFlags | CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL | CL_MEM_FORCE_HOST_MEMORY_INTEL; const uint64_t MemObjHelper::validFlagsForBufferIntel = commonFlagsIntel | CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL; const uint64_t MemObjHelper::validFlagsForImage = commonFlags | CL_MEM_NO_ACCESS_INTEL | CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL | CL_MEM_FORCE_LINEAR_STORAGE_INTEL; const uint64_t MemObjHelper::validFlagsForImageIntel = commonFlagsIntel; } // namespace NEO compute-runtime-22.14.22890/opencl/source/mem_obj/mem_obj_helper.h000066400000000000000000000043671422164147700246740ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/bit_helpers.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "opencl/extensions/public/cl_ext_private.h" #include "opencl/source/mem_obj/mem_obj.h" #include "CL/cl.h" #include "memory_properties_flags.h" namespace NEO { class MemObjHelper { public: static const uint64_t extraFlags; static const uint64_t extraFlagsIntel; static const uint64_t commonFlags; static const uint64_t commonFlagsIntel; static const uint64_t validFlagsForBuffer; static const uint64_t validFlagsForBufferIntel; static const uint64_t validFlagsForImage; static const uint64_t validFlagsForImageIntel; static bool validateMemoryPropertiesForBuffer(const MemoryProperties &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, const Context &context); static bool validateMemoryPropertiesForImage(const MemoryProperties &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, cl_mem parent, const Context &context); static AllocationProperties getAllocationPropertiesWithImageInfo(uint32_t rootDeviceIndex, ImageInfo &imgInfo, bool allocateMemory, const MemoryProperties &memoryProperties, const HardwareInfo &hwInfo, DeviceBitfield subDevicesBitfieldParam, bool deviceOnlyVisibilty); static bool checkMemFlagsForSubBuffer(cl_mem_flags flags); static SVMAllocsManager::SvmAllocationProperties getSvmAllocationProperties(cl_mem_flags flags); static bool isSuitableForCompression(bool compressionSupported, const MemoryProperties &properties, Context &context, bool preferCompression); protected: static bool validateExtraMemoryProperties(const MemoryProperties &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, const Context &context); }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/mem_obj/pipe.cpp000066400000000000000000000115661422164147700232140ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/mem_obj/pipe.h" #include "shared/source/helpers/get_info.h" #include "shared/source/memory_manager/memory_manager.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "opencl/source/helpers/get_info_status_mapper.h" #include "opencl/source/mem_obj/mem_obj_helper.h" namespace NEO { Pipe::Pipe(Context *context, cl_mem_flags flags, cl_uint packetSize, cl_uint maxPackets, const cl_pipe_properties *properties, void *memoryStorage, MultiGraphicsAllocation multiGraphicsAllocation) : MemObj(context, CL_MEM_OBJECT_PIPE, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, static_cast(packetSize * (maxPackets + 1) + intelPipeHeaderReservedSpace), memoryStorage, nullptr, std::move(multiGraphicsAllocation), false, false, false), pipePacketSize(packetSize), pipeMaxPackets(maxPackets) { magic = objectMagic; } Pipe *Pipe::create(Context *context, cl_mem_flags flags, cl_uint packetSize, cl_uint maxPackets, const cl_pipe_properties *properties, cl_int &errcodeRet) { Pipe *pPipe = nullptr; errcodeRet = CL_SUCCESS; MemoryManager *memoryManager = context->getMemoryManager(); DEBUG_BREAK_IF(!memoryManager); MemoryProperties memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()); while (true) { auto size = static_cast(packetSize * (maxPackets + 1) + intelPipeHeaderReservedSpace); auto rootDeviceIndex = context->getDevice(0)->getRootDeviceIndex(); AllocationProperties allocProperties = MemoryPropertiesHelper::getAllocationProperties(rootDeviceIndex, memoryProperties, true, // allocateMemory size, AllocationType::PIPE, false, // isMultiStorageAllocation context->getDevice(0)->getHardwareInfo(), context->getDeviceBitfieldForAllocation(rootDeviceIndex), context->isSingleDeviceContext()); GraphicsAllocation *memory = memoryManager->allocateGraphicsMemoryWithProperties(allocProperties); if (!memory) { errcodeRet = CL_OUT_OF_HOST_MEMORY; break; } auto multiGraphicsAllocation = MultiGraphicsAllocation(rootDeviceIndex); multiGraphicsAllocation.addAllocation(memory); pPipe = new (std::nothrow) Pipe(context, flags, packetSize, maxPackets, properties, memory->getUnderlyingBuffer(), std::move(multiGraphicsAllocation)); if (!pPipe) { memoryManager->freeGraphicsMemory(memory); memory = nullptr; errcodeRet = CL_OUT_OF_HOST_MEMORY; break; } // Initialize pipe_control_intel_t structure located at the beginning of the surface memset(memory->getUnderlyingBuffer(), 0, intelPipeHeaderReservedSpace); *reinterpret_cast(memory->getUnderlyingBuffer()) = maxPackets + 1; break; } return pPipe; } cl_int Pipe::getPipeInfo(cl_image_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { cl_int retVal; size_t srcParamSize = GetInfo::invalidSourceSize; void *srcParam = nullptr; switch (paramName) { case CL_PIPE_PACKET_SIZE: srcParamSize = sizeof(cl_uint); srcParam = &(pipePacketSize); break; case CL_PIPE_MAX_PACKETS: srcParamSize = sizeof(cl_uint); srcParam = &(pipeMaxPackets); break; case CL_PIPE_PROPERTIES: srcParamSize = 0; break; default: break; } auto getInfoStatus = GetInfo::getInfo(paramValue, paramValueSize, srcParam, srcParamSize); retVal = changeGetInfoStatusToCLResultType(getInfoStatus); GetInfo::setParamValueReturnSize(paramValueSizeRet, srcParamSize, getInfoStatus); return retVal; } void Pipe::setPipeArg(void *memory, uint32_t patchSize, uint32_t rootDeviceIndex) { patchWithRequiredSize(memory, patchSize, static_cast(multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex)->getGpuAddressToPatch())); } Pipe::~Pipe() = default; } // namespace NEO compute-runtime-22.14.22890/opencl/source/mem_obj/pipe.h000066400000000000000000000023231422164147700226500ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/mem_obj/buffer.h" namespace NEO { class Pipe : public MemObj { public: static const size_t intelPipeHeaderReservedSpace = 128; static const cl_ulong maskMagic = 0xFFFFFFFFFFFFFFFFLL; static const cl_ulong objectMagic = MemObj::objectMagic | 0x03; static Pipe *create( Context *context, cl_mem_flags flags, cl_uint packetSize, cl_uint maxPackets, const cl_pipe_properties *properties, cl_int &errcodeRet); ~Pipe() override; cl_int getPipeInfo(cl_image_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); void setPipeArg(void *memory, uint32_t patchSize, uint32_t rootDeviceIndex); protected: Pipe(Context *context, cl_mem_flags flags, cl_uint packetSize, cl_uint maxPackets, const cl_pipe_properties *properties, void *memoryStorage, MultiGraphicsAllocation multiGraphicsAllocation); cl_uint pipePacketSize; cl_uint pipeMaxPackets; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/memory_manager/000077500000000000000000000000001422164147700231345ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/memory_manager/CMakeLists.txt000066400000000000000000000012711422164147700256750ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_MEMORY_MANAGER ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/compression_selector_ocl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cpu_page_fault_manager_memory_sync.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mem_obj_surface.h ${CMAKE_CURRENT_SOURCE_DIR}/migration_controller.cpp ${CMAKE_CURRENT_SOURCE_DIR}/migration_controller.h ${CMAKE_CURRENT_SOURCE_DIR}/resource_surface.h ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_MEMORY_MANAGER}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_MEMORY_MANAGER ${RUNTIME_SRCS_MEMORY_MANAGER}) add_subdirectories() compute-runtime-22.14.22890/opencl/source/memory_manager/compression_selector_ocl.cpp000066400000000000000000000014021422164147700307330ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/compression_selector.h" #include "shared/source/os_interface/hw_info_config.h" namespace NEO { bool CompressionSelector::preferCompressedAllocation(const AllocationProperties &properties, const HardwareInfo &hwInfo) { switch (properties.allocationType) { case AllocationType::GLOBAL_SURFACE: case AllocationType::CONSTANT_SURFACE: case AllocationType::SVM_GPU: case AllocationType::PRINTF_SURFACE: { const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily); return hwInfoConfig.allowStatelessCompression(hwInfo); } default: return false; } } } // namespace NEO compute-runtime-22.14.22890/opencl/source/memory_manager/cpu_page_fault_manager_memory_sync.cpp000066400000000000000000000022771422164147700327440ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/debug_helpers.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/source/page_fault_manager/cpu_page_fault_manager.h" #include "opencl/source/command_queue/command_queue.h" namespace NEO { void PageFaultManager::transferToCpu(void *ptr, size_t size, void *cmdQ) { auto commandQueue = static_cast(cmdQ); auto retVal = commandQueue->enqueueSVMMap(true, CL_MAP_WRITE, ptr, size, 0, nullptr, nullptr, false); UNRECOVERABLE_IF(retVal); } void PageFaultManager::transferToGpu(void *ptr, void *cmdQ) { auto commandQueue = static_cast(cmdQ); memoryData[ptr].unifiedMemoryManager->insertSvmMapOperation(ptr, memoryData[ptr].size, ptr, 0, false); auto retVal = commandQueue->enqueueSVMUnmap(ptr, 0, nullptr, nullptr, false); UNRECOVERABLE_IF(retVal); retVal = commandQueue->finish(); UNRECOVERABLE_IF(retVal); auto allocData = memoryData[ptr].unifiedMemoryManager->getSVMAlloc(ptr); this->evictMemoryAfterImplCopy(allocData->cpuAllocation, &commandQueue->getDevice()); } } // namespace NEO compute-runtime-22.14.22890/opencl/source/memory_manager/mem_obj_surface.h000066400000000000000000000015241422164147700264270ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/memory_manager/surface.h" #include "opencl/source/mem_obj/mem_obj.h" namespace NEO { class MemObjSurface : public Surface { public: MemObjSurface(MemObj *memObj) : Surface(memObj->getMultiGraphicsAllocation().isCoherent()), memObj(memObj) { memObj->incRefInternal(); } ~MemObjSurface() override { memObj->decRefInternal(); memObj = nullptr; }; void makeResident(CommandStreamReceiver &csr) override { DEBUG_BREAK_IF(!memObj); csr.makeResident(*memObj->getGraphicsAllocation(csr.getRootDeviceIndex())); } Surface *duplicate() override { return new MemObjSurface(this->memObj); }; protected: class MemObj *memObj; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/memory_manager/migration_controller.cpp000066400000000000000000000100021422164147700300650ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/memory_manager/migration_controller.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/migration_sync_data.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/mem_obj/mem_obj.h" namespace NEO { void MigrationController::handleMigration(Context &context, CommandStreamReceiver &targetCsr, MemObj *memObj) { auto memoryManager = targetCsr.getMemoryManager(); auto targetRootDeviceIndex = targetCsr.getRootDeviceIndex(); auto migrationSyncData = memObj->getMultiGraphicsAllocation().getMigrationSyncData(); if (!migrationSyncData->isUsedByTheSameContext(targetCsr.getTagAddress())) { migrationSyncData->waitOnCpu(); } if (migrationSyncData->getCurrentLocation() != targetRootDeviceIndex) { migrateMemory(context, *memoryManager, memObj, targetRootDeviceIndex); } migrationSyncData->signalUsage(targetCsr.getTagAddress(), targetCsr.peekTaskCount() + 1); } void MigrationController::migrateMemory(Context &context, MemoryManager &memoryManager, MemObj *memObj, uint32_t targetRootDeviceIndex) { auto &multiGraphicsAllocation = memObj->getMultiGraphicsAllocation(); auto migrationSyncData = multiGraphicsAllocation.getMigrationSyncData(); auto sourceRootDeviceIndex = migrationSyncData->getCurrentLocation(); if (sourceRootDeviceIndex == std::numeric_limits::max()) { migrationSyncData->setCurrentLocation(targetRootDeviceIndex); return; } migrationSyncData->startMigration(); auto srcMemory = multiGraphicsAllocation.getGraphicsAllocation(sourceRootDeviceIndex); auto dstMemory = multiGraphicsAllocation.getGraphicsAllocation(targetRootDeviceIndex); auto size = srcMemory->getUnderlyingBufferSize(); auto hostPtr = migrationSyncData->getHostPtr(); if (srcMemory->isAllocationLockable()) { auto srcLockPtr = memoryManager.lockResource(srcMemory); memcpy_s(hostPtr, size, srcLockPtr, size); memoryManager.unlockResource(srcMemory); } else { auto srcCmdQ = context.getSpecialQueue(sourceRootDeviceIndex); if (srcMemory->getAllocationType() == AllocationType::IMAGE) { auto pImage = static_cast(memObj); size_t origin[3] = {}; size_t region[3] = {}; pImage->fillImageRegion(region); srcCmdQ->enqueueReadImage(pImage, CL_TRUE, origin, region, pImage->getHostPtrRowPitch(), pImage->getHostPtrSlicePitch(), hostPtr, nullptr, 0, nullptr, nullptr); } else { auto pBuffer = static_cast(memObj); srcCmdQ->enqueueReadBuffer(pBuffer, CL_TRUE, 0u, pBuffer->getSize(), hostPtr, nullptr, 0, nullptr, nullptr); } srcCmdQ->finish(); } if (dstMemory->isAllocationLockable()) { auto dstLockPtr = memoryManager.lockResource(dstMemory); memcpy_s(dstLockPtr, size, hostPtr, size); memoryManager.unlockResource(dstMemory); } else { auto dstCmdQ = context.getSpecialQueue(targetRootDeviceIndex); if (dstMemory->getAllocationType() == AllocationType::IMAGE) { auto pImage = static_cast(memObj); size_t origin[3] = {}; size_t region[3] = {}; pImage->fillImageRegion(region); dstCmdQ->enqueueWriteImage(pImage, CL_TRUE, origin, region, pImage->getHostPtrRowPitch(), pImage->getHostPtrSlicePitch(), hostPtr, nullptr, 0, nullptr, nullptr); } else { auto pBuffer = static_cast(memObj); dstCmdQ->enqueueWriteBuffer(pBuffer, CL_TRUE, 0u, pBuffer->getSize(), hostPtr, nullptr, 0, nullptr, nullptr); } dstCmdQ->finish(); } migrationSyncData->setCurrentLocation(targetRootDeviceIndex); } } // namespace NEOcompute-runtime-22.14.22890/opencl/source/memory_manager/migration_controller.h000066400000000000000000000010241422164147700275360ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include namespace NEO { class MemoryManager; class CommandStreamReceiver; class Context; class MultiGraphicsAllocation; class MemObj; class MigrationController { public: static void handleMigration(Context &context, CommandStreamReceiver &targetCsr, MemObj *memObj); static void migrateMemory(Context &context, MemoryManager &memoryManager, MemObj *memObj, uint32_t targetRootDeviceIndex); }; } // namespace NEOcompute-runtime-22.14.22890/opencl/source/memory_manager/resource_surface.h000066400000000000000000000012761422164147700266520ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/memory_manager/surface.h" #include "opencl/extensions/public/cl_ext_private.h" namespace NEO { class ResourceSurface : public GeneralSurface { public: ResourceSurface(GraphicsAllocation *gfxAlloc, cl_resource_barrier_type type, cl_resource_memory_scope scope) : GeneralSurface(gfxAlloc), resourceType(type), resourceScope(scope) {} ~ResourceSurface() override = default; GraphicsAllocation *getGraphicsAllocation() { return gfxAllocation; } cl_resource_barrier_type resourceType; cl_resource_memory_scope resourceScope; }; } // namespace NEOcompute-runtime-22.14.22890/opencl/source/os_interface/000077500000000000000000000000001422164147700225735ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/os_interface/CMakeLists.txt000066400000000000000000000006331422164147700253350ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_OS_INTERFACE_BASE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/ocl_reg_path.h ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_OS_INTERFACE_BASE}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_OS_INTERFACE_BASE ${RUNTIME_SRCS_OS_INTERFACE_BASE}) add_subdirectories() compute-runtime-22.14.22890/opencl/source/os_interface/linux/000077500000000000000000000000001422164147700237325ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/os_interface/linux/CMakeLists.txt000066400000000000000000000011261422164147700264720ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_OS_INTERFACE_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/api_linux.cpp ${CMAKE_CURRENT_SOURCE_DIR}/d3d_sharing_functions.h ${CMAKE_CURRENT_SOURCE_DIR}/device_caps_init_linux.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ocl_reg_path.cpp ) if(UNIX) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_OS_INTERFACE_LINUX}) endif() set_property(GLOBAL PROPERTY RUNTIME_SRCS_OS_INTERFACE_LINUX ${RUNTIME_SRCS_OS_INTERFACE_LINUX}) add_subdirectories() compute-runtime-22.14.22890/opencl/source/os_interface/linux/api_linux.cpp000066400000000000000000000021231422164147700264240ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/get_info.h" #include "opencl/source/api/api.h" #include "opencl/source/api/dispatch.h" #include "opencl/source/context/context.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/mem_obj/mem_obj.h" void NEO::MemObj::getOsSpecificMemObjectInfo(const cl_mem_info ¶mName, size_t *srcParamSize, void **srcParam) { switch (paramName) { #ifdef LIBVA case CL_MEM_VA_API_MEDIA_SURFACE_INTEL: peekSharingHandler()->getMemObjectInfo(*srcParamSize, *srcParam); break; #endif default: break; } } void NEO::Image::getOsSpecificImageInfo(const cl_image_info ¶mName, size_t *srcParamSize, void **srcParam) { switch (paramName) { #ifdef LIBVA case CL_IMAGE_VA_API_PLANE_INTEL: *srcParamSize = sizeof(cl_uint); *srcParam = &mediaPlaneType; break; #endif default: break; } } void *NEO::Context::getOsContextInfo(cl_context_info ¶mName, size_t *srcParamSize) { return nullptr; } compute-runtime-22.14.22890/opencl/source/os_interface/linux/d3d_sharing_functions.h000066400000000000000000000004631422164147700303630ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once namespace NEO { namespace D3DTypesHelper { struct D3D9 { }; struct D3D10 { }; struct D3D11 { }; } // namespace D3DTypesHelper template class D3DSharingFunctions { }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/os_interface/linux/device_caps_init_linux.cpp000066400000000000000000000003301422164147700311410ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/cl_device/cl_device.h" namespace NEO { void ClDevice::initializeOsSpecificCaps() { } } // namespace NEO compute-runtime-22.14.22890/opencl/source/os_interface/linux/ocl_reg_path.cpp000066400000000000000000000003001422164147700270550ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/os_interface/ocl_reg_path.h" namespace NEO { const char *oclRegPath = ""; } compute-runtime-22.14.22890/opencl/source/os_interface/linux/platform_teardown_linux.cpp000066400000000000000000000006261422164147700314100ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/platform/platform.h" namespace NEO { void __attribute__((constructor)) platformsConstructor() { platformsImpl = new std::vector>; } void __attribute__((destructor)) platformsDestructor() { delete platformsImpl; platformsImpl = nullptr; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/os_interface/ocl_reg_path.h000066400000000000000000000002521422164147700253710ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once namespace NEO { extern const char *oclRegPath; } // namespace NEO compute-runtime-22.14.22890/opencl/source/os_interface/windows/000077500000000000000000000000001422164147700242655ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/os_interface/windows/CMakeLists.txt000066400000000000000000000013521422164147700270260ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_OS_INTERFACE_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/api_win.cpp ${CMAKE_CURRENT_SOURCE_DIR}/d3d10_11_sharing_functions.cpp ${CMAKE_CURRENT_SOURCE_DIR}/d3d9_sharing_functions.cpp ${CMAKE_CURRENT_SOURCE_DIR}/d3d_sharing_functions.h ${CMAKE_CURRENT_SOURCE_DIR}/device_caps_init_win.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ocl_reg_path.cpp ) if(WIN32) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_OS_INTERFACE_WINDOWS} ) endif() add_subdirectories() set_property(GLOBAL PROPERTY RUNTIME_SRCS_OS_INTERFACE_WINDOWS ${RUNTIME_SRCS_OS_INTERFACE_WINDOWS}) compute-runtime-22.14.22890/opencl/source/os_interface/windows/api_win.cpp000066400000000000000000001001061422164147700264150ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/get_info.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/os_interface/windows/wddm/wddm.h" #include "shared/source/utilities/api_intercept.h" #include "opencl/source/api/api.h" #include "opencl/source/api/dispatch.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/platform/platform.h" #include "opencl/source/sharings/d3d/d3d_buffer.h" #include "opencl/source/sharings/d3d/d3d_surface.h" #include "opencl/source/sharings/d3d/d3d_texture.h" #include "opencl/source/utilities/cl_logger.h" using namespace NEO; ClDevice *pickDeviceWithAdapterLuid(Platform *platform, LUID adapterLuid) { ClDevice *deviceToReturn = nullptr; for (auto i = 0u; i < platform->getNumDevices(); i++) { auto device = platform->getClDevice(i); if (device->getRootDeviceEnvironment().osInterface->getDriverModel()->as()->verifyAdapterLuid(adapterLuid)) { deviceToReturn = device; break; } } return deviceToReturn; } void NEO::MemObj::getOsSpecificMemObjectInfo(const cl_mem_info ¶mName, size_t *srcParamSize, void **srcParam) { switch (paramName) { case CL_MEM_D3D10_RESOURCE_KHR: *srcParamSize = sizeof(ID3D10Resource *); *srcParam = static_cast *>(peekSharingHandler())->getResourceHandler(); break; case CL_MEM_D3D11_RESOURCE_KHR: *srcParamSize = sizeof(ID3D11Resource *); *srcParam = static_cast *>(peekSharingHandler())->getResourceHandler(); break; case CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR: *srcParamSize = sizeof(cl_dx9_surface_info_khr); *srcParam = &static_cast(peekSharingHandler())->getSurfaceInfo(); break; case CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR: *srcParamSize = sizeof(cl_dx9_media_adapter_type_khr); *srcParam = &static_cast(peekSharingHandler())->getAdapterType(); break; case CL_MEM_DX9_RESOURCE_INTEL: *srcParamSize = sizeof(IDirect3DSurface9 *); *srcParam = &static_cast(peekSharingHandler())->getSurfaceInfo().resource; break; case CL_MEM_DX9_SHARED_HANDLE_INTEL: *srcParamSize = sizeof(HANDLE); *srcParam = &static_cast(peekSharingHandler())->getSurfaceInfo().shared_handle; break; } } void NEO::Image::getOsSpecificImageInfo(const cl_mem_info ¶mName, size_t *srcParamSize, void **srcParam) { switch (paramName) { case CL_IMAGE_D3D10_SUBRESOURCE_KHR: *srcParamSize = sizeof(unsigned int); *srcParam = &static_cast *>(peekSharingHandler())->getSubresource(); break; case CL_IMAGE_D3D11_SUBRESOURCE_KHR: *srcParamSize = sizeof(unsigned int); *srcParam = &static_cast *>(peekSharingHandler())->getSubresource(); break; case CL_IMAGE_DX9_MEDIA_PLANE_KHR: case CL_IMAGE_DX9_PLANE_INTEL: *srcParamSize = sizeof(cl_uint); *srcParam = &static_cast(peekSharingHandler())->getPlane(); break; } } void *NEO::Context::getOsContextInfo(cl_context_info ¶mName, size_t *srcParamSize) { switch (paramName) { case CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR: case CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR: *srcParamSize = sizeof(cl_bool); return &preferD3dSharedResources; default: break; } return nullptr; } cl_int CL_API_CALL clGetDeviceIDsFromDX9INTEL(cl_platform_id platform, cl_dx9_device_source_intel dx9DeviceSource, void *dx9Object, cl_dx9_device_set_intel dx9DeviceSet, cl_uint numEntries, cl_device_id *devices, cl_uint *numDevices) { Platform *platformInternal = nullptr; auto retVal = validateObjects(WithCastToInternal(platform, &platformInternal)); API_ENTER(&retVal); DBG_LOG_INPUTS("platform", platform, "dx9DeviceSource", dx9DeviceSource, "dx9Object", dx9Object, "dx9DeviceSet", dx9DeviceSet, "numEntries", numEntries, "devices", devices, "numDevices", numDevices); if (retVal != CL_SUCCESS) { return retVal; } cl_device_id device = platformInternal->getClDevice(0); GetInfoHelper::set(devices, device); GetInfoHelper::set(numDevices, 1u); retVal = CL_SUCCESS; return retVal; } cl_mem CL_API_CALL clCreateFromDX9MediaSurfaceINTEL(cl_context context, cl_mem_flags flags, IDirect3DSurface9 *resource, HANDLE sharedHandle, UINT plane, cl_int *errcodeRet) { API_ENTER(errcodeRet); DBG_LOG_INPUTS("context", context, "flags", flags, "resource", resource, "sharedHandle", sharedHandle, "plane", plane); ErrorCodeHelper err(errcodeRet, CL_SUCCESS); cl_mem_flags validFlags = CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY; if ((flags & (~validFlags)) != 0) { err.set(CL_INVALID_VALUE); return nullptr; } if (!resource) { err.set(CL_INVALID_DX9_RESOURCE_INTEL); return nullptr; } cl_dx9_surface_info_khr surfaceInfo = {resource, sharedHandle}; auto ctx = castToObject(context); if (ctx) { return D3DSurface::create(ctx, &surfaceInfo, flags, 0, plane, errcodeRet); } else { err.set(CL_INVALID_CONTEXT); return nullptr; } } cl_int CL_API_CALL clEnqueueAcquireDX9ObjectsINTEL(cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { CommandQueue *cmdQ = nullptr; auto retVal = validateObjects(WithCastToInternal(commandQueue, &cmdQ)); API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "numObjects", numObjects, "memObjects", memObjects, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", getClFileLogger().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", getClFileLogger().getEvents(reinterpret_cast(event), 1)); if (retVal != CL_SUCCESS) { return retVal; } retVal = cmdQ->enqueueAcquireSharedObjects(numObjects, memObjects, numEventsInWaitList, eventWaitList, event, CL_COMMAND_ACQUIRE_DX9_OBJECTS_INTEL); return retVal; } cl_int CL_API_CALL clEnqueueReleaseDX9ObjectsINTEL(cl_command_queue commandQueue, cl_uint numObjects, cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { CommandQueue *cmdQ = nullptr; auto retVal = validateObjects(WithCastToInternal(commandQueue, &cmdQ)); API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "numObjects", numObjects, "memObjects", memObjects, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", getClFileLogger().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", getClFileLogger().getEvents(reinterpret_cast(event), 1)); if (retVal != CL_SUCCESS) { return retVal; } for (unsigned int object = 0; object < numObjects; object++) { auto memObject = castToObject(memObjects[object]); if (!static_cast *>(memObject->peekSharingHandler())->isSharedResource()) { cmdQ->finish(); break; } } retVal = cmdQ->enqueueReleaseSharedObjects(numObjects, memObjects, numEventsInWaitList, eventWaitList, event, CL_COMMAND_RELEASE_DX9_OBJECTS_INTEL); if (!cmdQ->getContext().getInteropUserSyncEnabled()) { cmdQ->finish(); } return retVal; } cl_int CL_API_CALL clGetDeviceIDsFromDX9MediaAdapterKHR(cl_platform_id platform, cl_uint numMediaAdapters, cl_dx9_media_adapter_type_khr *mediaAdapterType, void *mediaAdapters, cl_dx9_media_adapter_set_khr mediaAdapterSet, cl_uint numEntries, cl_device_id *devices, cl_uint *numDevices) { Platform *platformInternal = nullptr; auto retVal = validateObjects(WithCastToInternal(platform, &platformInternal)); API_ENTER(&retVal); DBG_LOG_INPUTS("platform", platform, "numMediaAdapters", numMediaAdapters, "mediaAdapterType", mediaAdapterType, "mediaAdapters", mediaAdapters, "mediaAdapterSet", mediaAdapterSet, "numEntries", numEntries, "devices", devices, "numDevices", numDevices); if (retVal != CL_SUCCESS) { return retVal; } cl_device_id device = platformInternal->getClDevice(0); GetInfoHelper::set(devices, device); GetInfoHelper::set(numDevices, 1u); retVal = CL_SUCCESS; return retVal; } cl_mem CL_API_CALL clCreateFromDX9MediaSurfaceKHR(cl_context context, cl_mem_flags flags, cl_dx9_media_adapter_type_khr adapterType, void *surfaceInfo, cl_uint plane, cl_int *errcodeRet) { API_ENTER(errcodeRet); DBG_LOG_INPUTS("context", context, "flags", flags, "adapterType", adapterType, "surfaceInfo", surfaceInfo, "plane", plane); ErrorCodeHelper err(errcodeRet, CL_SUCCESS); auto localSurfaceInfo = (cl_dx9_surface_info_khr *)surfaceInfo; auto ctx = castToObject(context); return D3DSurface::create(ctx, localSurfaceInfo, flags, adapterType, plane, errcodeRet); } cl_int CL_API_CALL clEnqueueAcquireDX9MediaSurfacesKHR(cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { CommandQueue *cmdQ = nullptr; auto retVal = validateObjects(WithCastToInternal(commandQueue, &cmdQ)); API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "numObjects", numObjects, "memObjects", memObjects, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", getClFileLogger().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", getClFileLogger().getEvents(reinterpret_cast(event), 1)); if (retVal != CL_SUCCESS) { return retVal; } retVal = cmdQ->enqueueAcquireSharedObjects(numObjects, memObjects, numEventsInWaitList, eventWaitList, event, CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR); return retVal; } cl_int CL_API_CALL clEnqueueReleaseDX9MediaSurfacesKHR(cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { CommandQueue *cmdQ = nullptr; auto retVal = validateObjects(WithCastToInternal(commandQueue, &cmdQ)); API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "numObjects", numObjects, "memObjects", memObjects, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", getClFileLogger().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", getClFileLogger().getEvents(reinterpret_cast(event), 1)); if (retVal != CL_SUCCESS) { return retVal; } for (unsigned int object = 0; object < numObjects; object++) { auto memObject = castToObject(memObjects[object]); if (memObject) { if (!static_cast *>(memObject->peekSharingHandler())->isSharedResource()) { cmdQ->finish(); break; } } else { retVal = CL_INVALID_MEM_OBJECT; return retVal; } } retVal = cmdQ->enqueueReleaseSharedObjects(numObjects, memObjects, numEventsInWaitList, eventWaitList, event, CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR); if (!cmdQ->getContext().getInteropUserSyncEnabled()) { cmdQ->finish(); } return retVal; } cl_int CL_API_CALL clGetDeviceIDsFromD3D10KHR(cl_platform_id platform, cl_d3d10_device_source_khr d3dDeviceSource, void *d3dObject, cl_d3d10_device_set_khr d3dDeviceSet, cl_uint numEntries, cl_device_id *devices, cl_uint *numDevices) { DXGI_ADAPTER_DESC dxgiDesc = {{0}}; IDXGIAdapter *dxgiAdapter = nullptr; ID3D10Device *d3dDevice = nullptr; D3DSharingFunctions sharingFcns((ID3D10Device *)nullptr); cl_uint localNumDevices = 0; cl_int retCode = CL_SUCCESS; Platform *platformInternal = nullptr; ClDevice *device = nullptr; auto retVal = validateObjects(WithCastToInternal(platform, &platformInternal)); API_ENTER(&retVal); DBG_LOG_INPUTS("platform", platform, "d3dDeviceSource", d3dDeviceSource, "d3dObject", d3dObject, "d3dDeviceSet", d3dDeviceSet, "numEntries", numEntries, "devices", devices, "numDevices", numDevices); if (retVal != CL_SUCCESS) { return retVal; } if (DebugManager.injectFcn) { sharingFcns.getDxgiDescFcn = (D3DSharingFunctions::GetDxgiDescFcn)DebugManager.injectFcn; } switch (d3dDeviceSource) { case CL_D3D10_DEVICE_KHR: d3dDevice = (ID3D10Device *)d3dObject; break; case CL_D3D10_DXGI_ADAPTER_KHR: dxgiAdapter = (IDXGIAdapter *)d3dObject; break; default: GetInfoHelper::set(numDevices, localNumDevices); retVal = CL_INVALID_VALUE; return retVal; } sharingFcns.getDxgiDescFcn(&dxgiDesc, dxgiAdapter, d3dDevice); if (dxgiDesc.VendorId != INTEL_VENDOR_ID) { GetInfoHelper::set(numDevices, localNumDevices); retVal = CL_DEVICE_NOT_FOUND; return retVal; } switch (d3dDeviceSet) { case CL_PREFERRED_DEVICES_FOR_D3D10_KHR: case CL_ALL_DEVICES_FOR_D3D10_KHR: device = pickDeviceWithAdapterLuid(platformInternal, dxgiDesc.AdapterLuid); if (device) { GetInfoHelper::set(devices, static_cast(device)); localNumDevices = 1; } else { retCode = CL_DEVICE_NOT_FOUND; } break; default: retCode = CL_INVALID_VALUE; break; } GetInfoHelper::set(numDevices, localNumDevices); return retCode; } cl_mem CL_API_CALL clCreateFromD3D10BufferKHR(cl_context context, cl_mem_flags flags, ID3D10Buffer *resource, cl_int *errcodeRet) { API_ENTER(errcodeRet); DBG_LOG_INPUTS("context", context, "flags", flags, "resource", resource); ErrorCodeHelper err(errcodeRet, CL_SUCCESS); Context *ctx = nullptr; err.set(validateObjects(WithCastToInternal(context, &ctx))); if (err.localErrcode != CL_SUCCESS) { return nullptr; } if (ctx->getSharing>()->isTracked(resource, 0)) { err.set(CL_INVALID_D3D10_RESOURCE_KHR); return nullptr; } return D3DBuffer::create(ctx, resource, flags, errcodeRet); } cl_mem CL_API_CALL clCreateFromD3D10Texture2DKHR(cl_context context, cl_mem_flags flags, ID3D10Texture2D *resource, UINT subresource, cl_int *errcodeRet) { API_ENTER(errcodeRet); DBG_LOG_INPUTS("context", context, "flags", flags, "resource", resource, "subresource", subresource); ErrorCodeHelper err(errcodeRet, CL_SUCCESS); Context *ctx = nullptr; err.set(validateObjects(WithCastToInternal(context, &ctx))); if (err.localErrcode != CL_SUCCESS) { return nullptr; } if (ctx->getSharing>()->isTracked(resource, subresource)) { err.set(CL_INVALID_D3D10_RESOURCE_KHR); return nullptr; } return D3DTexture::create2d(ctx, resource, flags, subresource, errcodeRet); } cl_mem CL_API_CALL clCreateFromD3D10Texture3DKHR(cl_context context, cl_mem_flags flags, ID3D10Texture3D *resource, UINT subresource, cl_int *errcodeRet) { API_ENTER(errcodeRet); DBG_LOG_INPUTS("context", context, "flags", flags, "resource", resource, "subresource", subresource); ErrorCodeHelper err(errcodeRet, CL_SUCCESS); Context *ctx = nullptr; err.set(validateObjects(WithCastToInternal(context, &ctx))); if (err.localErrcode != CL_SUCCESS) { return nullptr; } if (ctx->getSharing>()->isTracked(resource, subresource)) { err.set(CL_INVALID_D3D10_RESOURCE_KHR); return nullptr; } return D3DTexture::create3d(ctx, resource, flags, subresource, errcodeRet); } cl_int CL_API_CALL clEnqueueAcquireD3D10ObjectsKHR(cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { CommandQueue *cmdQ = nullptr; auto retVal = validateObjects(WithCastToInternal(commandQueue, &cmdQ)); API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "numObjects", numObjects, "memObjects", memObjects, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", getClFileLogger().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", getClFileLogger().getEvents(reinterpret_cast(event), 1)); if (retVal != CL_SUCCESS) { return retVal; } retVal = validateObjects(MemObjList(numObjects, memObjects)); if (retVal != CL_SUCCESS) { return retVal; } for (unsigned int object = 0; object < numObjects; object++) { auto memObj = castToObject(memObjects[object]); if (memObj->acquireCount >= 1) { retVal = CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR; return retVal; } } retVal = cmdQ->enqueueAcquireSharedObjects(numObjects, memObjects, numEventsInWaitList, eventWaitList, event, CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR); return retVal; } cl_int CL_API_CALL clEnqueueReleaseD3D10ObjectsKHR(cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { CommandQueue *cmdQ = nullptr; auto retVal = validateObjects(WithCastToInternal(commandQueue, &cmdQ)); API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "numObjects", numObjects, "memObjects", memObjects, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", getClFileLogger().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", getClFileLogger().getEvents(reinterpret_cast(event), 1)); if (retVal != CL_SUCCESS) { return retVal; } retVal = validateObjects(MemObjList(numObjects, memObjects)); if (retVal != CL_SUCCESS) { return retVal; } for (unsigned int object = 0; object < numObjects; object++) { auto memObject = castToObject(memObjects[object]); if (memObject->acquireCount == 0) { retVal = CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR; return retVal; } if (!static_cast *>(memObject->peekSharingHandler())->isSharedResource()) { cmdQ->finish(); break; } } retVal = cmdQ->enqueueReleaseSharedObjects(numObjects, memObjects, numEventsInWaitList, eventWaitList, event, CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR); if (!cmdQ->getContext().getInteropUserSyncEnabled()) { cmdQ->finish(); } return retVal; } cl_int CL_API_CALL clGetDeviceIDsFromD3D11KHR(cl_platform_id platform, cl_d3d11_device_source_khr d3dDeviceSource, void *d3dObject, cl_d3d11_device_set_khr d3dDeviceSet, cl_uint numEntries, cl_device_id *devices, cl_uint *numDevices) { DXGI_ADAPTER_DESC dxgiDesc = {{0}}; IDXGIAdapter *dxgiAdapter = nullptr; ID3D11Device *d3dDevice = nullptr; D3DSharingFunctions sharingFcns((ID3D11Device *)nullptr); cl_uint localNumDevices = 0; Platform *platformInternal = nullptr; ClDevice *device = nullptr; auto retVal = validateObjects(WithCastToInternal(platform, &platformInternal)); API_ENTER(&retVal); DBG_LOG_INPUTS("platform", platform, "d3dDeviceSource", d3dDeviceSource, "d3dObject", d3dObject, "d3dDeviceSet", d3dDeviceSet, "numEntries", numEntries, "devices", devices, "numDevices", numDevices); if (retVal != CL_SUCCESS) { return retVal; } if (DebugManager.injectFcn) { sharingFcns.getDxgiDescFcn = (D3DSharingFunctions::GetDxgiDescFcn)DebugManager.injectFcn; } switch (d3dDeviceSource) { case CL_D3D11_DEVICE_KHR: d3dDevice = (ID3D11Device *)d3dObject; break; case CL_D3D11_DXGI_ADAPTER_KHR: dxgiAdapter = (IDXGIAdapter *)d3dObject; break; default: GetInfoHelper::set(numDevices, localNumDevices); retVal = CL_INVALID_VALUE; return retVal; break; } sharingFcns.getDxgiDescFcn(&dxgiDesc, dxgiAdapter, d3dDevice); if (dxgiDesc.VendorId != INTEL_VENDOR_ID) { GetInfoHelper::set(numDevices, localNumDevices); retVal = CL_DEVICE_NOT_FOUND; return retVal; } switch (d3dDeviceSet) { case CL_PREFERRED_DEVICES_FOR_D3D11_KHR: case CL_ALL_DEVICES_FOR_D3D11_KHR: device = pickDeviceWithAdapterLuid(platformInternal, dxgiDesc.AdapterLuid); if (device) { GetInfoHelper::set(devices, static_cast(device)); localNumDevices = 1; } else { retVal = CL_DEVICE_NOT_FOUND; } break; default: retVal = CL_INVALID_VALUE; break; } GetInfoHelper::set(numDevices, localNumDevices); return retVal; } cl_mem CL_API_CALL clCreateFromD3D11BufferKHR(cl_context context, cl_mem_flags flags, ID3D11Buffer *resource, cl_int *errcodeRet) { API_ENTER(errcodeRet); DBG_LOG_INPUTS("context", context, "flags", flags, "resource", resource); ErrorCodeHelper err(errcodeRet, CL_SUCCESS); Context *ctx = nullptr; err.set(validateObjects(WithCastToInternal(context, &ctx))); if (err.localErrcode != CL_SUCCESS) { return nullptr; } if (ctx->getSharing>()->isTracked(resource, 0)) { err.set(CL_INVALID_D3D11_RESOURCE_KHR); return nullptr; } return D3DBuffer::create(ctx, resource, flags, errcodeRet); } cl_mem CL_API_CALL clCreateFromD3D11Texture2DKHR(cl_context context, cl_mem_flags flags, ID3D11Texture2D *resource, UINT subresource, cl_int *errcodeRet) { API_ENTER(errcodeRet); DBG_LOG_INPUTS("context", context, "flags", flags, "resource", resource, "subresource", subresource); ErrorCodeHelper err(errcodeRet, CL_SUCCESS); Context *ctx = nullptr; err.set(validateObjects(WithCastToInternal(context, &ctx))); if (err.localErrcode != CL_SUCCESS) { return nullptr; } if (ctx->getSharing>()->isTracked(resource, subresource)) { err.set(CL_INVALID_D3D11_RESOURCE_KHR); return nullptr; } return D3DTexture::create2d(ctx, resource, flags, subresource, errcodeRet); } cl_mem CL_API_CALL clCreateFromD3D11Texture3DKHR(cl_context context, cl_mem_flags flags, ID3D11Texture3D *resource, UINT subresource, cl_int *errcodeRet) { API_ENTER(errcodeRet); DBG_LOG_INPUTS("context", context, "flags", flags, "resource", resource, "subresource", subresource); ErrorCodeHelper err(errcodeRet, CL_SUCCESS); Context *ctx = nullptr; err.set(validateObjects(WithCastToInternal(context, &ctx))); if (err.localErrcode != CL_SUCCESS) { return nullptr; } if (ctx->getSharing>()->isTracked(resource, subresource)) { err.set(CL_INVALID_D3D11_RESOURCE_KHR); return nullptr; } return D3DTexture::create3d(ctx, resource, flags, subresource, errcodeRet); } cl_int CL_API_CALL clEnqueueAcquireD3D11ObjectsKHR(cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { CommandQueue *cmdQ = nullptr; auto retVal = validateObjects(WithCastToInternal(commandQueue, &cmdQ)); API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "numObjects", numObjects, "memObjects", memObjects, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", getClFileLogger().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", getClFileLogger().getEvents(reinterpret_cast(event), 1)); if (retVal != CL_SUCCESS) { return retVal; } retVal = validateObjects(MemObjList(numObjects, memObjects)); if (retVal != CL_SUCCESS) { return retVal; } for (unsigned int object = 0; object < numObjects; object++) { auto memObj = castToObject(memObjects[object]); if (memObj->acquireCount >= 1) { retVal = CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR; return retVal; } } retVal = cmdQ->enqueueAcquireSharedObjects(numObjects, memObjects, numEventsInWaitList, eventWaitList, event, CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR); return retVal; } cl_int CL_API_CALL clEnqueueReleaseD3D11ObjectsKHR(cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { CommandQueue *cmdQ = nullptr; auto retVal = validateObjects(WithCastToInternal(commandQueue, &cmdQ)); API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "numObjects", numObjects, "memObjects", memObjects, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", getClFileLogger().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", getClFileLogger().getEvents(reinterpret_cast(event), 1)); if (retVal != CL_SUCCESS) { return retVal; } retVal = validateObjects(MemObjList(numObjects, memObjects)); if (retVal != CL_SUCCESS) { return retVal; } for (unsigned int object = 0; object < numObjects; object++) { auto memObject = castToObject(memObjects[object]); if (memObject->acquireCount == 0) { retVal = CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR; return retVal; } if (!static_cast *>(memObject->peekSharingHandler())->isSharedResource()) { cmdQ->finish(); break; } } retVal = cmdQ->enqueueReleaseSharedObjects(numObjects, memObjects, numEventsInWaitList, eventWaitList, event, CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR); if (!cmdQ->getContext().getInteropUserSyncEnabled()) { cmdQ->finish(); } return retVal; } cl_int CL_API_CALL clGetSupportedDX9MediaSurfaceFormatsINTEL(cl_context context, cl_mem_flags flags, cl_mem_object_type imageType, cl_uint plane, cl_uint numEntries, D3DFORMAT *dx9Formats, cl_uint *numImageFormats) { if (validateObject(context) != CL_SUCCESS) { return CL_INVALID_CONTEXT; } if ((imageType != CL_MEM_OBJECT_BUFFER) && (imageType != CL_MEM_OBJECT_IMAGE2D) && (imageType != CL_MEM_OBJECT_IMAGE3D)) { return CL_INVALID_VALUE; } if (((flags & CL_MEM_READ_WRITE) == 0) && ((flags & CL_MEM_WRITE_ONLY) == 0) && ((flags & CL_MEM_READ_ONLY) == 0)) { return CL_INVALID_VALUE; } cl_uint i = 0; switch (plane) { case 0: for (auto format : D3DSurface::D3DtoClFormatConversions) { if (i >= numEntries) { break; } dx9Formats[i++] = format.first; } *numImageFormats = static_cast(D3DSurface::D3DtoClFormatConversions.size()); break; case 1: for (auto format : D3DSurface::D3DPlane1Formats) { if (i >= numEntries) { break; } dx9Formats[i++] = format; } *numImageFormats = static_cast(D3DSurface::D3DPlane1Formats.size()); break; case 2: for (auto format : D3DSurface::D3DPlane2Formats) { if (i >= numEntries) { break; } dx9Formats[i++] = format; } *numImageFormats = static_cast(D3DSurface::D3DPlane2Formats.size()); break; default: *numImageFormats = 0; } return CL_SUCCESS; } cl_int CL_API_CALL clGetSupportedD3D10TextureFormatsINTEL(cl_context context, cl_mem_flags flags, cl_mem_object_type imageType, cl_uint numEntries, DXGI_FORMAT *formats, cl_uint *numTextureFormats) { return getSupportedDXTextureFormats(context, imageType, 0, numEntries, formats, numTextureFormats); } cl_int CL_API_CALL clGetSupportedD3D11TextureFormatsINTEL(cl_context context, cl_mem_flags flags, cl_mem_object_type imageType, cl_uint plane, cl_uint numEntries, DXGI_FORMAT *formats, cl_uint *numTextureFormats) { return getSupportedDXTextureFormats(context, imageType, plane, numEntries, formats, numTextureFormats); } compute-runtime-22.14.22890/opencl/source/os_interface/windows/d3d10_11_sharing_functions.cpp000066400000000000000000000331631422164147700317160ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/context/context.inl" #include "opencl/source/os_interface/windows/d3d_sharing_functions.h" #include "opencl/source/sharings/d3d/d3d_sharing.h" #include "opencl/source/sharings/sharing_factory.h" using namespace NEO; template class D3DSharingFunctions; template class D3DSharingFunctions; const uint32_t D3DSharingFunctions::sharingId = SharingType::D3D10_SHARING; const uint32_t D3DSharingFunctions::sharingId = SharingType::D3D11_SHARING; static const DXGI_FORMAT DXGIFormats[] = { DXGI_FORMAT_R32G32B32A32_TYPELESS, DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_R32G32B32A32_UINT, DXGI_FORMAT_R32G32B32A32_SINT, DXGI_FORMAT_R32G32B32_TYPELESS, DXGI_FORMAT_R32G32B32_FLOAT, DXGI_FORMAT_R32G32B32_UINT, DXGI_FORMAT_R32G32B32_SINT, DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_UNORM, DXGI_FORMAT_R16G16B16A16_UINT, DXGI_FORMAT_R16G16B16A16_SNORM, DXGI_FORMAT_R16G16B16A16_SINT, DXGI_FORMAT_R32G32_TYPELESS, DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32_UINT, DXGI_FORMAT_R32G32_SINT, DXGI_FORMAT_R32G8X24_TYPELESS, DXGI_FORMAT_D32_FLOAT_S8X24_UINT, DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS, DXGI_FORMAT_X32_TYPELESS_G8X24_UINT, DXGI_FORMAT_R10G10B10A2_TYPELESS, DXGI_FORMAT_R10G10B10A2_UNORM, DXGI_FORMAT_R10G10B10A2_UINT, DXGI_FORMAT_R11G11B10_FLOAT, DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_R8G8B8A8_UNORM_SRGB, DXGI_FORMAT_R8G8B8A8_UINT, DXGI_FORMAT_R8G8B8A8_SNORM, DXGI_FORMAT_R8G8B8A8_SINT, DXGI_FORMAT_R16G16_TYPELESS, DXGI_FORMAT_R16G16_FLOAT, DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_R16G16_UINT, DXGI_FORMAT_R16G16_SNORM, DXGI_FORMAT_R16G16_SINT, DXGI_FORMAT_R32_TYPELESS, DXGI_FORMAT_D32_FLOAT, DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32_UINT, DXGI_FORMAT_R32_SINT, DXGI_FORMAT_R24G8_TYPELESS, DXGI_FORMAT_D24_UNORM_S8_UINT, DXGI_FORMAT_R24_UNORM_X8_TYPELESS, DXGI_FORMAT_X24_TYPELESS_G8_UINT, DXGI_FORMAT_R8G8_TYPELESS, DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8_UINT, DXGI_FORMAT_R8G8_SNORM, DXGI_FORMAT_R8G8_SINT, DXGI_FORMAT_R16_TYPELESS, DXGI_FORMAT_R16_FLOAT, DXGI_FORMAT_D16_UNORM, DXGI_FORMAT_R16_UNORM, DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16_SNORM, DXGI_FORMAT_R16_SINT, DXGI_FORMAT_R8_TYPELESS, DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8_UINT, DXGI_FORMAT_R8_SNORM, DXGI_FORMAT_R8_SINT, DXGI_FORMAT_A8_UNORM, DXGI_FORMAT_R1_UNORM, DXGI_FORMAT_R9G9B9E5_SHAREDEXP, DXGI_FORMAT_R8G8_B8G8_UNORM, DXGI_FORMAT_G8R8_G8B8_UNORM, DXGI_FORMAT_BC1_TYPELESS, DXGI_FORMAT_BC1_UNORM, DXGI_FORMAT_BC1_UNORM_SRGB, DXGI_FORMAT_BC2_TYPELESS, DXGI_FORMAT_BC2_UNORM, DXGI_FORMAT_BC2_UNORM_SRGB, DXGI_FORMAT_BC3_TYPELESS, DXGI_FORMAT_BC3_UNORM, DXGI_FORMAT_BC3_UNORM_SRGB, DXGI_FORMAT_BC4_TYPELESS, DXGI_FORMAT_BC4_UNORM, DXGI_FORMAT_BC4_SNORM, DXGI_FORMAT_BC5_TYPELESS, DXGI_FORMAT_BC5_UNORM, DXGI_FORMAT_BC5_SNORM, DXGI_FORMAT_B5G6R5_UNORM, DXGI_FORMAT_B5G5R5A1_UNORM, DXGI_FORMAT_B8G8R8A8_UNORM, DXGI_FORMAT_B8G8R8X8_UNORM, DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM, DXGI_FORMAT_B8G8R8A8_TYPELESS, DXGI_FORMAT_B8G8R8A8_UNORM_SRGB, DXGI_FORMAT_B8G8R8X8_TYPELESS, DXGI_FORMAT_B8G8R8X8_UNORM_SRGB, DXGI_FORMAT_BC6H_TYPELESS, DXGI_FORMAT_BC6H_UF16, DXGI_FORMAT_BC6H_SF16, DXGI_FORMAT_BC7_TYPELESS, DXGI_FORMAT_BC7_UNORM, DXGI_FORMAT_BC7_UNORM_SRGB, DXGI_FORMAT_AYUV, DXGI_FORMAT_Y410, DXGI_FORMAT_Y416, DXGI_FORMAT_NV12, DXGI_FORMAT_P010, DXGI_FORMAT_P016, DXGI_FORMAT_YUY2, DXGI_FORMAT_Y210, DXGI_FORMAT_Y216, DXGI_FORMAT_AI44, DXGI_FORMAT_IA44, DXGI_FORMAT_P8, DXGI_FORMAT_A8P8, DXGI_FORMAT_B4G4R4A4_UNORM, DXGI_FORMAT_V208, DXGI_FORMAT_V408, DXGI_FORMAT_FORCE_UINT}; template void D3DSharingFunctions::createQuery(D3DQuery **query) { D3DQueryDesc desc = {}; d3dDevice->CreateQuery(&desc, query); } template void D3DSharingFunctions::updateDevice(D3DResource *resource) { resource->GetDevice(&d3dDevice); } template void D3DSharingFunctions::fillCreateBufferDesc(D3DBufferDesc &desc, unsigned int width) { desc.ByteWidth = width; desc.MiscFlags = D3DResourceFlags::MISC_SHARED; } template void D3DSharingFunctions::fillCreateTexture2dDesc(D3DTexture2dDesc &desc, D3DTexture2dDesc *srcDesc, cl_uint subresource) { desc.Width = srcDesc->Width; desc.Height = srcDesc->Height; desc.MipLevels = 1; desc.ArraySize = 1; desc.Format = srcDesc->Format; desc.MiscFlags = D3DResourceFlags::MISC_SHARED; desc.SampleDesc.Count = srcDesc->SampleDesc.Count; desc.SampleDesc.Quality = srcDesc->SampleDesc.Quality; for (uint32_t i = 0u; i < (subresource % srcDesc->MipLevels); i++) { desc.Width /= 2; desc.Height /= 2; } } template void D3DSharingFunctions::fillCreateTexture3dDesc(D3DTexture3dDesc &desc, D3DTexture3dDesc *srcDesc, cl_uint subresource) { desc.Width = srcDesc->Width; desc.Height = srcDesc->Height; desc.Depth = srcDesc->Depth; desc.MipLevels = 1; desc.Format = srcDesc->Format; desc.MiscFlags = D3DResourceFlags::MISC_SHARED; for (uint32_t i = 0u; i < (subresource % srcDesc->MipLevels); i++) { desc.Width /= 2; desc.Height /= 2; desc.Depth /= 2; } } template void D3DSharingFunctions::createBuffer(D3DBufferObj **buffer, unsigned int width) { D3DBufferDesc stagingDesc = {}; fillCreateBufferDesc(stagingDesc, width); d3dDevice->CreateBuffer(&stagingDesc, nullptr, buffer); } template void D3DSharingFunctions::createTexture2d(D3DTexture2d **texture, D3DTexture2dDesc *desc, cl_uint subresource) { D3DTexture2dDesc stagingDesc = {}; fillCreateTexture2dDesc(stagingDesc, desc, subresource); d3dDevice->CreateTexture2D(&stagingDesc, nullptr, texture); } template void D3DSharingFunctions::createTexture3d(D3DTexture3d **texture, D3DTexture3dDesc *desc, cl_uint subresource) { D3DTexture3dDesc stagingDesc = {}; fillCreateTexture3dDesc(stagingDesc, desc, subresource); d3dDevice->CreateTexture3D(&stagingDesc, nullptr, texture); } template bool D3DSharingFunctions::checkFormatSupport(DXGI_FORMAT format, UINT *pFormat) { auto errorCode = d3dDevice->CheckFormatSupport(format, pFormat); return errorCode == S_OK; } template cl_int D3DSharingFunctions::validateFormatSupport(DXGI_FORMAT format, cl_mem_object_type type) { auto &formats = retrieveTextureFormats(type, 0); auto iter = std::find(formats.begin(), formats.end(), format); if (iter != formats.end()) { return CL_SUCCESS; } return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; } template std::vector &D3DSharingFunctions::retrieveTextureFormats(cl_mem_object_type imageType, cl_uint plane) { auto cached = textureFormatCache.find(imageType); if (cached == textureFormatCache.end()) { bool success; std::tie(cached, success) = textureFormatCache.emplace(imageType, std::vector(0)); if (!success) { return DXGINoFormats; } std::vector &cached_formats = cached->second; std::vector planarFormats(0); cached_formats.reserve(arrayCount(DXGIFormats)); for (auto DXGIFormat : DXGIFormats) { UINT format = 0; if (checkFormatSupport(DXGIFormat, &format)) { if (memObjectFormatSupport(imageType, format)) { cached_formats.push_back(DXGIFormat); if (D3DSharing::isFormatWithPlane1(DXGIFormat)) { planarFormats.push_back(DXGIFormat); } } } } cached_formats.shrink_to_fit(); textureFormatPlane1Cache.emplace(imageType, planarFormats); } if (plane == 1) { return textureFormatPlane1Cache.find(imageType)->second; } return cached->second; } template <> bool D3DSharingFunctions::memObjectFormatSupport(cl_mem_object_type objectType, UINT format) { auto d3dformat = static_cast(format); return ((objectType & CL_MEM_OBJECT_BUFFER) && (d3dformat & D3D10_FORMAT_SUPPORT_BUFFER)) || ((objectType & CL_MEM_OBJECT_IMAGE2D) && (d3dformat & D3D10_FORMAT_SUPPORT_TEXTURE2D)) || ((objectType & CL_MEM_OBJECT_IMAGE3D) && (d3dformat & D3D10_FORMAT_SUPPORT_TEXTURE3D)); } template <> bool D3DSharingFunctions::memObjectFormatSupport(cl_mem_object_type objectType, UINT format) { auto d3dformat = static_cast(format); return ((objectType & CL_MEM_OBJECT_BUFFER) && (d3dformat & D3D11_FORMAT_SUPPORT_BUFFER)) || ((objectType & CL_MEM_OBJECT_IMAGE2D) && (d3dformat & D3D11_FORMAT_SUPPORT_TEXTURE2D)) || ((objectType & CL_MEM_OBJECT_IMAGE3D) && (d3dformat & D3D11_FORMAT_SUPPORT_TEXTURE3D)); } template void D3DSharingFunctions::getBufferDesc(D3DBufferDesc *bufferDesc, D3DBufferObj *buffer) { buffer->GetDesc(bufferDesc); } template void D3DSharingFunctions::getTexture2dDesc(D3DTexture2dDesc *textureDesc, D3DTexture2d *texture) { texture->GetDesc(textureDesc); } template void D3DSharingFunctions::getTexture3dDesc(D3DTexture3dDesc *textureDesc, D3DTexture3d *texture) { texture->GetDesc(textureDesc); } template void D3DSharingFunctions::getSharedHandle(D3DResource *resource, void **handle) { IDXGIResource *dxgiResource = nullptr; resource->QueryInterface(__uuidof(IDXGIResource), (void **)&dxgiResource); dxgiResource->GetSharedHandle(handle); dxgiResource->Release(); } template void D3DSharingFunctions::getSharedNTHandle(D3DResource *resource, void **handle) { IDXGIResource *dxgiResource = nullptr; IDXGIResource1 *dxgiResource1 = nullptr; resource->QueryInterface(__uuidof(IDXGIResource), (void **)&dxgiResource); dxgiResource->QueryInterface(__uuidof(IDXGIResource1), (void **)&dxgiResource1); dxgiResource1->CreateSharedHandle(nullptr, DXGI_SHARED_RESOURCE_READ | DXGI_SHARED_RESOURCE_WRITE, nullptr, handle); dxgiResource1->Release(); dxgiResource->Release(); } template void D3DSharingFunctions::addRef(D3DResource *resource) { resource->AddRef(); } template void D3DSharingFunctions::release(IUnknown *resource) { resource->Release(); } template void D3DSharingFunctions::lockRect(D3DTexture2d *resource, D3DLOCKED_RECT *lockedRect, uint32_t flags) { } template void D3DSharingFunctions::unlockRect(D3DTexture2d *resource) { } template void D3DSharingFunctions::updateSurface(D3DTexture2d *src, D3DTexture2d *dst) { } template void D3DSharingFunctions::getRenderTargetData(D3DTexture2d *renderTarget, D3DTexture2d *dstSurface) { } template <> void D3DSharingFunctions::copySubresourceRegion(D3DResource *dst, cl_uint dstSubresource, D3DResource *src, cl_uint srcSubresource) { d3dDevice->CopySubresourceRegion(dst, dstSubresource, 0, 0, 0, src, srcSubresource, nullptr); } template <> void D3DSharingFunctions::copySubresourceRegion(D3DResource *dst, cl_uint dstSubresource, D3DResource *src, cl_uint srcSubresource) { d3d11DeviceContext->CopySubresourceRegion(dst, dstSubresource, 0, 0, 0, src, srcSubresource, nullptr); } template <> void D3DSharingFunctions::flushAndWait(D3DQuery *query) { query->End(); d3dDevice->Flush(); while (query->GetData(nullptr, 0, 0) != S_OK) ; } template <> void D3DSharingFunctions::flushAndWait(D3DQuery *query) { d3d11DeviceContext->End(query); d3d11DeviceContext->Flush(); while (d3d11DeviceContext->GetData(query, nullptr, 0, 0) != S_OK) ; } template <> void D3DSharingFunctions::getDeviceContext(D3DQuery *query) { } template <> void D3DSharingFunctions::getDeviceContext(D3DQuery *query) { d3dDevice->GetImmediateContext(&d3d11DeviceContext); } template <> void D3DSharingFunctions::releaseDeviceContext(D3DQuery *query) { } template <> void D3DSharingFunctions::releaseDeviceContext(D3DQuery *query) { d3d11DeviceContext->Release(); d3d11DeviceContext = nullptr; } template void D3DSharingFunctions::getDxgiDesc(DXGI_ADAPTER_DESC *dxgiDesc, IDXGIAdapter *adapter, D3DDevice *device) { if (!adapter) { IDXGIDevice *dxgiDevice = nullptr; device->QueryInterface(__uuidof(IDXGIDevice), (void **)&dxgiDevice); dxgiDevice->GetAdapter(&adapter); dxgiDevice->Release(); } else { adapter->AddRef(); } adapter->GetDesc(dxgiDesc); adapter->Release(); } template D3DSharingFunctions *Context::getSharing>(); template D3DSharingFunctions *Context::getSharing>(); compute-runtime-22.14.22890/opencl/source/os_interface/windows/d3d9_sharing_functions.cpp000066400000000000000000000121731422164147700313430ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/context/context.inl" #include "opencl/source/os_interface/windows/d3d_sharing_functions.h" #include "opencl/source/sharings/sharing_factory.h" using namespace NEO; template class D3DSharingFunctions; const uint32_t D3DSharingFunctions::sharingId = SharingType::D3D9_SHARING; template <> void D3DSharingFunctions::createQuery(D3DQuery **query) { D3DQUERYTYPE queryType = D3DQUERYTYPE_EVENT; d3dDevice->CreateQuery(queryType, query); } template <> void D3DSharingFunctions::updateDevice(D3DResource *resource) { resource->GetDevice(&d3dDevice); } template <> void D3DSharingFunctions::fillCreateBufferDesc(D3DBufferDesc &desc, unsigned int width) { } template <> std::vector &D3DSharingFunctions::retrieveTextureFormats(cl_mem_object_type imageType, cl_uint plane) { return DXGINoFormats; } template <> void D3DSharingFunctions::fillCreateTexture2dDesc(D3DTexture2dDesc &desc, D3DTexture2dDesc *srcDesc, cl_uint subresource) { } template <> void D3DSharingFunctions::fillCreateTexture3dDesc(D3DTexture3dDesc &desc, D3DTexture3dDesc *srcDesc, cl_uint subresource) { } template <> void D3DSharingFunctions::createBuffer(D3DBufferObj **buffer, unsigned int width) { } template <> void D3DSharingFunctions::createTexture2d(D3DTexture2d **texture, D3DTexture2dDesc *desc, cl_uint subresource) { d3dDevice->CreateOffscreenPlainSurface(desc->Width, desc->Height, desc->Format, D3DPOOL_SYSTEMMEM, texture, nullptr); } template <> void D3DSharingFunctions::createTexture3d(D3DTexture3d **texture, D3DTexture3dDesc *desc, cl_uint subresource) { } template <> bool D3DSharingFunctions::checkFormatSupport(DXGI_FORMAT format, UINT *pFormat) { return false; } cl_int D3DSharingFunctions::validateFormatSupport(DXGI_FORMAT format, cl_mem_object_type type) { return CL_SUCCESS; } template <> bool D3DSharingFunctions::memObjectFormatSupport(cl_mem_object_type object, UINT format) { return false; } template <> void D3DSharingFunctions::getBufferDesc(D3DBufferDesc *bufferDesc, D3DBufferObj *buffer) { } template <> void D3DSharingFunctions::getTexture2dDesc(D3DTexture2dDesc *textureDesc, D3DTexture2d *texture) { texture->GetDesc(textureDesc); } template <> void D3DSharingFunctions::getTexture3dDesc(D3DTexture3dDesc *textureDesc, D3DTexture3d *texture) { } template <> void D3DSharingFunctions::getSharedHandle(D3DResource *resource, void **handle) { } template <> void D3DSharingFunctions::getSharedNTHandle(D3DResource *resource, void **handle) { } template <> void D3DSharingFunctions::addRef(D3DResource *resource) { resource->AddRef(); } template <> void D3DSharingFunctions::release(IUnknown *resource) { if (resource) { resource->Release(); } } template <> void D3DSharingFunctions::lockRect(D3DTexture2d *d3dresource, D3DLOCKED_RECT *lockedRect, uint32_t flags) { d3dresource->LockRect(lockedRect, nullptr, flags); } template <> void D3DSharingFunctions::unlockRect(D3DTexture2d *d3dresource) { d3dresource->UnlockRect(); } template <> void D3DSharingFunctions::getRenderTargetData(D3DTexture2d *renderTarget, D3DTexture2d *dstSurface) { d3dDevice->GetRenderTargetData(renderTarget, dstSurface); } template <> void D3DSharingFunctions::copySubresourceRegion(D3DResource *dst, cl_uint dstSubresource, D3DResource *src, cl_uint srcSubresource) { } template <> void D3DSharingFunctions::updateSurface(D3DTexture2d *src, D3DTexture2d *dst) { d3dDevice->UpdateSurface(src, nullptr, dst, nullptr); } template <> void D3DSharingFunctions::flushAndWait(D3DQuery *query) { query->Issue(D3DISSUE_END); while (query->GetData(nullptr, 0, D3DGETDATA_FLUSH) != S_OK) ; } template <> void D3DSharingFunctions::getDeviceContext(D3DQuery *query) { } template <> void D3DSharingFunctions::releaseDeviceContext(D3DQuery *query) { } template <> void D3DSharingFunctions::getDxgiDesc(DXGI_ADAPTER_DESC *dxgiDesc, IDXGIAdapter *adapter, D3DDevice *device) { if (!adapter) { IDXGIDevice *dxgiDevice = nullptr; device->QueryInterface(__uuidof(IDXGIDevice), (void **)&dxgiDevice); dxgiDevice->GetAdapter(&adapter); dxgiDevice->Release(); } else { adapter->AddRef(); } adapter->GetDesc(dxgiDesc); adapter->Release(); } template D3DSharingFunctions *Context::getSharing>(); compute-runtime-22.14.22890/opencl/source/os_interface/windows/d3d_sharing_functions.h000066400000000000000000000170021422164147700307130ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/array_count.h" #include "shared/source/helpers/debug_helpers.h" #include "opencl/source/api/dispatch.h" #include "opencl/source/sharings/sharing.h" #include "DXGI1_2.h" #include #include namespace NEO { namespace D3DTypesHelper { struct D3D9 { typedef IDirect3DDevice9 D3DDevice; typedef IDirect3DQuery9 D3DQuery; typedef D3DQUERYTYPE D3DQueryDesc; typedef IDirect3DResource9 D3DResource; typedef struct { } D3DBufferDesc; typedef void *D3DBufferObj; typedef D3DSURFACE_DESC D3DTexture2dDesc; typedef struct { } D3DTexture3dDesc; typedef IDirect3DSurface9 D3DTexture2d; typedef struct { } D3DTexture3d; }; struct D3D10 { typedef ID3D10Device D3DDevice; typedef ID3D10Query D3DQuery; typedef D3D10_QUERY_DESC D3DQueryDesc; typedef ID3D10Resource D3DResource; typedef D3D10_BUFFER_DESC D3DBufferDesc; typedef ID3D10Buffer D3DBufferObj; typedef D3D10_TEXTURE2D_DESC D3DTexture2dDesc; typedef D3D10_TEXTURE3D_DESC D3DTexture3dDesc; typedef ID3D10Texture2D D3DTexture2d; typedef ID3D10Texture3D D3DTexture3d; }; struct D3D11 { typedef ID3D11Device D3DDevice; typedef ID3D11Query D3DQuery; typedef D3D11_QUERY_DESC D3DQueryDesc; typedef ID3D11Resource D3DResource; typedef D3D11_BUFFER_DESC D3DBufferDesc; typedef ID3D11Buffer D3DBufferObj; typedef D3D11_TEXTURE2D_DESC D3DTexture2dDesc; typedef D3D11_TEXTURE3D_DESC D3DTexture3dDesc; typedef ID3D11Texture2D D3DTexture2d; typedef ID3D11Texture3D D3DTexture3d; }; } // namespace D3DTypesHelper enum D3DResourceFlags { USAGE_RENDERTARGET = 1, MISC_SHARED = 2, MISC_SHARED_KEYEDMUTEX = 256, MISC_SHARED_NTHANDLE = 2048 }; template class D3DSharingFunctions : public SharingFunctions { typedef typename D3D::D3DDevice D3DDevice; typedef typename D3D::D3DQuery D3DQuery; typedef typename D3D::D3DQueryDesc D3DQueryDesc; typedef typename D3D::D3DResource D3DResource; typedef typename D3D::D3DBufferDesc D3DBufferDesc; typedef typename D3D::D3DBufferObj D3DBufferObj; typedef typename D3D::D3DTexture2dDesc D3DTexture2dDesc; typedef typename D3D::D3DTexture3dDesc D3DTexture3dDesc; typedef typename D3D::D3DTexture2d D3DTexture2d; typedef typename D3D::D3DTexture3d D3DTexture3d; public: typedef void (*GetDxgiDescFcn)(DXGI_ADAPTER_DESC *dxgiDesc, IDXGIAdapter *adapter, D3DDevice *device); D3DSharingFunctions(D3DDevice *d3dDevice) : d3dDevice(d3dDevice) { trackedResources.reserve(128); getDxgiDescFcn = &this->getDxgiDesc; }; uint32_t getId() const override { return D3DSharingFunctions::sharingId; } D3DSharingFunctions() = delete; static const uint32_t sharingId; MOCKABLE_VIRTUAL void createQuery(D3DQuery **query); MOCKABLE_VIRTUAL void createBuffer(D3DBufferObj **buffer, unsigned int width); MOCKABLE_VIRTUAL void createTexture2d(D3DTexture2d **texture, D3DTexture2dDesc *desc, cl_uint subresource); MOCKABLE_VIRTUAL void createTexture3d(D3DTexture3d **texture, D3DTexture3dDesc *desc, cl_uint subresource); MOCKABLE_VIRTUAL void getBufferDesc(D3DBufferDesc *bufferDesc, D3DBufferObj *buffer); MOCKABLE_VIRTUAL void getTexture2dDesc(D3DTexture2dDesc *textureDesc, D3DTexture2d *texture); MOCKABLE_VIRTUAL void getTexture3dDesc(D3DTexture3dDesc *textureDesc, D3DTexture3d *texture); MOCKABLE_VIRTUAL void getSharedHandle(D3DResource *resource, void **handle); MOCKABLE_VIRTUAL void getSharedNTHandle(D3DResource *resource, void **handle); MOCKABLE_VIRTUAL void addRef(D3DResource *resource); MOCKABLE_VIRTUAL void release(IUnknown *resource); MOCKABLE_VIRTUAL void copySubresourceRegion(D3DResource *dst, cl_uint dstSubresource, D3DResource *src, cl_uint srcSubresource); MOCKABLE_VIRTUAL void flushAndWait(D3DQuery *query); MOCKABLE_VIRTUAL void getDeviceContext(D3DQuery *query); MOCKABLE_VIRTUAL void releaseDeviceContext(D3DQuery *query); MOCKABLE_VIRTUAL void lockRect(D3DTexture2d *d3dResource, D3DLOCKED_RECT *lockedRect, uint32_t flags); MOCKABLE_VIRTUAL void unlockRect(D3DTexture2d *d3dResource); MOCKABLE_VIRTUAL void getRenderTargetData(D3DTexture2d *renderTarget, D3DTexture2d *dstSurface); MOCKABLE_VIRTUAL void updateSurface(D3DTexture2d *src, D3DTexture2d *dst); MOCKABLE_VIRTUAL void updateDevice(D3DResource *resource); MOCKABLE_VIRTUAL bool checkFormatSupport(DXGI_FORMAT format, UINT *pFormat); MOCKABLE_VIRTUAL bool memObjectFormatSupport(cl_mem_object_type object, UINT format); MOCKABLE_VIRTUAL cl_int validateFormatSupport(DXGI_FORMAT format, cl_mem_object_type type); GetDxgiDescFcn getDxgiDescFcn = nullptr; bool isTracked(D3DResource *resource, cl_uint subresource) { return std::find(trackedResources.begin(), trackedResources.end(), std::make_pair(resource, subresource)) != trackedResources.end(); } void track(D3DResource *resource, cl_uint subresource) { trackedResources.push_back(std::make_pair(resource, subresource)); } void untrack(D3DResource *resource, cl_uint subresource) { auto element = std::find(trackedResources.begin(), trackedResources.end(), std::make_pair(resource, subresource)); DEBUG_BREAK_IF(element == trackedResources.end()); trackedResources.erase(element); } void setDevice(D3DDevice *d3dDevice) { this->d3dDevice = d3dDevice; } D3DDevice *getDevice() { return d3dDevice; } void fillCreateBufferDesc(D3DBufferDesc &desc, unsigned int width); void fillCreateTexture2dDesc(D3DTexture2dDesc &desc, D3DTexture2dDesc *srcDesc, cl_uint subresource); void fillCreateTexture3dDesc(D3DTexture3dDesc &desc, D3DTexture3dDesc *srcDesc, cl_uint subresource); std::vector &retrieveTextureFormats(cl_mem_object_type imageType, cl_uint plane); protected: D3DDevice *d3dDevice = nullptr; ID3D11DeviceContext *d3d11DeviceContext = nullptr; std::vector DXGINoFormats; std::vector> trackedResources; std::map> textureFormatCache; std::map> textureFormatPlane1Cache; static void getDxgiDesc(DXGI_ADAPTER_DESC *dxgiDesc, IDXGIAdapter *adapter, D3DDevice *device); }; template static inline cl_int getSupportedDXTextureFormats(cl_context context, cl_mem_object_type imageType, cl_uint plane, cl_uint numEntries, DXGI_FORMAT *formats, cl_uint *numImageFormats) { Context *pContext = castToObject(context); if (!pContext) { return CL_INVALID_CONTEXT; } auto pSharing = pContext->getSharing>(); if (!pSharing) { return CL_INVALID_CONTEXT; } size_t numberOfFormats = 0; if (plane <= 1) { auto supported_formats = pSharing->retrieveTextureFormats(imageType, plane); numberOfFormats = supported_formats.size(); if (formats != nullptr) { memcpy_s(formats, sizeof(DXGI_FORMAT) * numEntries, supported_formats.data(), sizeof(DXGI_FORMAT) * std::min(static_cast(numEntries), numberOfFormats)); } } if (numImageFormats) { *numImageFormats = static_cast(numberOfFormats); } return CL_SUCCESS; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/os_interface/windows/device_caps_init_win.cpp000066400000000000000000000017331422164147700311420ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #ifdef _WIN32 #include "shared/source/device/device.h" #include "opencl/source/cl_device/cl_device.h" namespace NEO { void ClDevice::initializeOsSpecificCaps() { deviceExtensions += "cl_intel_simultaneous_sharing "; deviceInfo.deviceExtensions = deviceExtensions.c_str(); simultaneousInterops = {CL_GL_CONTEXT_KHR, CL_WGL_HDC_KHR, CL_CONTEXT_ADAPTER_D3D9_KHR, CL_CONTEXT_D3D9_DEVICE_INTEL, CL_CONTEXT_ADAPTER_D3D9EX_KHR, CL_CONTEXT_D3D9EX_DEVICE_INTEL, CL_CONTEXT_ADAPTER_DXVA_KHR, CL_CONTEXT_DXVA_DEVICE_INTEL, CL_CONTEXT_D3D10_DEVICE_KHR, CL_CONTEXT_D3D11_DEVICE_KHR, 0}; } } // namespace NEO #endif compute-runtime-22.14.22890/opencl/source/os_interface/windows/ocl_reg_path.cpp000066400000000000000000000003341422164147700274170ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/os_interface/ocl_reg_path.h" namespace NEO { const char *oclRegPath = "Software\\Intel\\IGFX\\OCL\\"; } compute-runtime-22.14.22890/opencl/source/os_interface/windows/platform_teardown_win.cpp000066400000000000000000000007021422164147700313740ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/platform/platform.h" using namespace NEO; BOOL APIENTRY DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) { if (fdwReason == DLL_PROCESS_DETACH) { delete platformsImpl; } if (fdwReason == DLL_PROCESS_ATTACH) { platformsImpl = new std::vector>; } return TRUE; } compute-runtime-22.14.22890/opencl/source/platform/000077500000000000000000000000001422164147700217565ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/platform/CMakeLists.txt000066400000000000000000000006721422164147700245230ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_PLATFORM ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/platform.cpp ${CMAKE_CURRENT_SOURCE_DIR}/platform.h ${CMAKE_CURRENT_SOURCE_DIR}/platform_info.h ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_PLATFORM}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_PLATFORM ${RUNTIME_SRCS_PLATFORM}) compute-runtime-22.14.22890/opencl/source/platform/platform.cpp000066400000000000000000000200431422164147700243050ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "platform.h" #include "shared/source/built_ins/sip.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/compiler_interface/oclc_extensions.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/device/root_device.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/get_info.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/string.h" #include "shared/source/os_interface/device_factory.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/source_level_debugger/source_level_debugger.h" #include "opencl/source/api/api.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/gtpin/gtpin_notify.h" #include "opencl/source/helpers/get_info_status_mapper.h" #include "opencl/source/sharings/sharing_factory.h" #include "CL/cl_ext.h" #include #include namespace NEO { std::vector> *platformsImpl = nullptr; Platform::Platform(ExecutionEnvironment &executionEnvironmentIn) : executionEnvironment(executionEnvironmentIn) { clDevices.reserve(4); executionEnvironment.incRefInternal(); } Platform::~Platform() { for (auto clDevice : this->clDevices) { clDevice->decRefInternal(); } gtpinNotifyPlatformShutdown(); executionEnvironment.decRefInternal(); } cl_int Platform::getInfo(cl_platform_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { auto retVal = CL_INVALID_VALUE; const std::string *param = nullptr; size_t paramSize = GetInfo::invalidSourceSize; auto getInfoStatus = GetInfoStatus::INVALID_VALUE; switch (paramName) { case CL_PLATFORM_HOST_TIMER_RESOLUTION: { auto pVal = static_cast(this->clDevices[0]->getPlatformHostTimerResolution()); paramSize = sizeof(uint64_t); getInfoStatus = GetInfo::getInfo(paramValue, paramValueSize, &pVal, paramSize); break; } case CL_PLATFORM_NUMERIC_VERSION: { auto pVal = platformInfo->numericVersion; paramSize = sizeof(pVal); getInfoStatus = GetInfo::getInfo(paramValue, paramValueSize, &pVal, paramSize); break; } case CL_PLATFORM_EXTENSIONS_WITH_VERSION: { std::call_once(initializeExtensionsWithVersionOnce, [this]() { this->clDevices[0]->getDeviceInfo(CL_DEVICE_EXTENSIONS_WITH_VERSION, 0, nullptr, nullptr); this->platformInfo->extensionsWithVersion = this->clDevices[0]->getDeviceInfo().extensionsWithVersion; }); auto pVal = platformInfo->extensionsWithVersion.data(); paramSize = platformInfo->extensionsWithVersion.size() * sizeof(cl_name_version); getInfoStatus = GetInfo::getInfo(paramValue, paramValueSize, pVal, paramSize); break; } case CL_PLATFORM_PROFILE: param = &platformInfo->profile; break; case CL_PLATFORM_VERSION: param = &platformInfo->version; break; case CL_PLATFORM_NAME: param = &platformInfo->name; break; case CL_PLATFORM_VENDOR: param = &platformInfo->vendor; break; case CL_PLATFORM_EXTENSIONS: param = &platformInfo->extensions; break; case CL_PLATFORM_ICD_SUFFIX_KHR: param = &platformInfo->icdSuffixKhr; break; default: break; } // Case for string parameters if (param) { paramSize = param->length() + 1; getInfoStatus = GetInfo::getInfo(paramValue, paramValueSize, param->c_str(), paramSize); } retVal = changeGetInfoStatusToCLResultType(getInfoStatus); GetInfo::setParamValueReturnSize(paramValueSizeRet, paramSize, getInfoStatus); return retVal; } bool Platform::initialize(std::vector> devices) { TakeOwnershipWrapper platformOwnership(*this); if (devices.empty()) { return false; } if (state == StateInited) { return true; } state = StateIniting; for (auto &inputDevice : devices) { ClDevice *pClDevice = nullptr; auto pDevice = inputDevice.release(); UNRECOVERABLE_IF(!pDevice); pClDevice = new ClDevice{*pDevice, this}; this->clDevices.push_back(pClDevice); if (pClDevice->getPreemptionMode() == PreemptionMode::MidThread || pClDevice->isDebuggerActive()) { bool ret = SipKernel::initSipKernel(SipKernel::getSipKernelType(*pDevice), *pDevice); UNRECOVERABLE_IF(!ret); } } DEBUG_BREAK_IF(this->platformInfo); this->platformInfo.reset(new PlatformInfo); this->platformInfo->extensions = this->clDevices[0]->getDeviceInfo().deviceExtensions; switch (this->clDevices[0]->getEnabledClVersion()) { case 30: this->platformInfo->version = "OpenCL 3.0 "; this->platformInfo->numericVersion = CL_MAKE_VERSION(3, 0, 0); break; case 21: this->platformInfo->version = "OpenCL 2.1 "; this->platformInfo->numericVersion = CL_MAKE_VERSION(2, 1, 0); break; default: this->platformInfo->version = "OpenCL 1.2 "; this->platformInfo->numericVersion = CL_MAKE_VERSION(1, 2, 0); break; } this->fillGlobalDispatchTable(); DEBUG_BREAK_IF(DebugManager.flags.CreateMultipleSubDevices.get() > 1 && !this->clDevices[0]->getDefaultEngine().commandStreamReceiver->peekTimestampPacketWriteEnabled()); state = StateInited; return true; } void Platform::fillGlobalDispatchTable() { sharingFactory.fillGlobalDispatchTable(); } bool Platform::isInitialized() { TakeOwnershipWrapper platformOwnership(*this); bool ret = (this->state == StateInited); return ret; } ClDevice *Platform::getClDevice(size_t deviceOrdinal) { TakeOwnershipWrapper platformOwnership(*this); if (this->state != StateInited || deviceOrdinal >= clDevices.size()) { return nullptr; } auto pClDevice = clDevices[deviceOrdinal]; DEBUG_BREAK_IF(pClDevice == nullptr); return pClDevice; } size_t Platform::getNumDevices() const { TakeOwnershipWrapper platformOwnership(*this); if (this->state != StateInited) { return 0; } return clDevices.size(); } ClDevice **Platform::getClDevices() { TakeOwnershipWrapper platformOwnership(*this); if (this->state != StateInited) { return nullptr; } return clDevices.data(); } const PlatformInfo &Platform::getPlatformInfo() const { DEBUG_BREAK_IF(!platformInfo); return *platformInfo; } std::unique_ptr (*Platform::createFunc)(ExecutionEnvironment &) = [](ExecutionEnvironment &executionEnvironment) -> std::unique_ptr { return std::make_unique(executionEnvironment); }; std::vector Platform::groupDevices(DeviceVector devices) { std::map platformsMap; std::vector outDevices; for (auto &device : devices) { auto productFamily = device->getHardwareInfo().platform.eProductFamily; auto result = platformsMap.find(productFamily); if (result == platformsMap.end()) { platformsMap.insert({productFamily, platformsMap.size()}); outDevices.push_back(DeviceVector{}); } auto platformId = platformsMap[productFamily]; outDevices[platformId].push_back(std::move(device)); } std::sort(outDevices.begin(), outDevices.end(), [](DeviceVector &lhs, DeviceVector &rhs) -> bool { return lhs[0]->getHardwareInfo().platform.eProductFamily > rhs[0]->getHardwareInfo().platform.eProductFamily; }); return outDevices; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/platform/platform.h000066400000000000000000000040531422164147700237550ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/common_types.h" #include "opencl/source/api/cl_types.h" #include "opencl/source/cl_device/cl_device_vector.h" #include "opencl/source/helpers/base_object.h" #include "platform_info.h" #include #include #include namespace NEO { class CompilerInterface; class Device; class ExecutionEnvironment; class GmmHelper; class GmmClientContext; struct HardwareInfo; template <> struct OpenCLObjectMapper<_cl_platform_id> { typedef class Platform DerivedType; }; class Platform : public BaseObject<_cl_platform_id> { public: static const cl_ulong objectMagic = 0x8873ACDEF2342133LL; Platform(ExecutionEnvironment &executionEnvironment); ~Platform() override; Platform(const Platform &) = delete; Platform &operator=(Platform const &) = delete; cl_int getInfo(cl_platform_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); MOCKABLE_VIRTUAL bool initialize(std::vector> devices); bool isInitialized(); size_t getNumDevices() const; ClDevice **getClDevices(); ClDevice *getClDevice(size_t deviceOrdinal); const PlatformInfo &getPlatformInfo() const; ExecutionEnvironment *peekExecutionEnvironment() const { return &executionEnvironment; } static std::unique_ptr (*createFunc)(ExecutionEnvironment &executionEnvironment); static std::vector groupDevices(DeviceVector devices); protected: enum { StateNone, StateIniting, StateInited, }; cl_uint state = StateNone; void fillGlobalDispatchTable(); std::unique_ptr platformInfo; ClDeviceVector clDevices; ExecutionEnvironment &executionEnvironment; std::once_flag initializeExtensionsWithVersionOnce; }; extern std::vector> *platformsImpl; } // namespace NEO compute-runtime-22.14.22890/opencl/source/platform/platform_info.h000066400000000000000000000010101422164147700247560ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "CL/cl.h" #include #include struct PlatformInfo { std::vector extensionsWithVersion; std::string profile = "FULL_PROFILE"; std::string version = ""; std::string name = "Intel(R) OpenCL HD Graphics"; std::string vendor = "Intel(R) Corporation"; std::string extensions; std::string icdSuffixKhr = "INTEL"; cl_version numericVersion = 0; }; compute-runtime-22.14.22890/opencl/source/program/000077500000000000000000000000001422164147700216015ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/program/CMakeLists.txt000066400000000000000000000017661422164147700243530ustar00rootroot00000000000000# # Copyright (C) 2018-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_PROGRAM ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}additional_options.cpp ${CMAKE_CURRENT_SOURCE_DIR}/build.cpp ${CMAKE_CURRENT_SOURCE_DIR}/compile.cpp ${CMAKE_CURRENT_SOURCE_DIR}/create.cpp ${CMAKE_CURRENT_SOURCE_DIR}/create.inl ${CMAKE_CURRENT_SOURCE_DIR}/get_info.cpp ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}internal_options.cpp ${CMAKE_CURRENT_SOURCE_DIR}/link.cpp ${CMAKE_CURRENT_SOURCE_DIR}/printf_handler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/printf_handler.h ${CMAKE_CURRENT_SOURCE_DIR}/process_device_binary.cpp ${CMAKE_CURRENT_SOURCE_DIR}/process_intermediate_binary.cpp ${CMAKE_CURRENT_SOURCE_DIR}/program.cpp ${CMAKE_CURRENT_SOURCE_DIR}/program.h ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_PROGRAM}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_PROGRAM ${RUNTIME_SRCS_PROGRAM}) compute-runtime-22.14.22890/opencl/source/program/additional_options.cpp000066400000000000000000000003621422164147700261710ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/program/program.h" namespace NEO { void Program::applyAdditionalOptions(std::string &internalOptions) { } }; // namespace NEO compute-runtime-22.14.22890/opencl/source/program/build.cpp000066400000000000000000000260301422164147700234050ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/compiler_interface/compiler_warnings/compiler_warnings.h" #include "shared/source/device/device.h" #include "shared/source/device_binary_format/device_binary_formats.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/helpers/compiler_options_parser.h" #include "shared/source/program/kernel_info.h" #include "shared/source/source_level_debugger/source_level_debugger.h" #include "shared/source/utilities/logger.h" #include "shared/source/utilities/time_measure_wrapper.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/gtpin/gtpin_notify.h" #include "opencl/source/helpers/cl_validators.h" #include "opencl/source/platform/platform.h" #include "opencl/source/program/program.h" #include "compiler_options.h" #include #include #include namespace NEO { cl_int Program::build( const ClDeviceVector &deviceVector, const char *buildOptions, bool enableCaching) { cl_int retVal = CL_SUCCESS; auto internalOptions = getInternalOptions(); auto defaultClDevice = deviceVector[0]; UNRECOVERABLE_IF(defaultClDevice == nullptr); auto &defaultDevice = defaultClDevice->getDevice(); enum class BuildPhase { Init, SourceCodeNotification, BinaryCreation, BinaryProcessing, DebugDataNotification }; std::unordered_map phaseReached; for (const auto &clDevice : deviceVector) { phaseReached[clDevice->getRootDeviceIndex()] = BuildPhase::Init; } do { // check to see if a previous build request is in progress if (std::any_of(deviceVector.begin(), deviceVector.end(), [&](auto device) { return CL_BUILD_IN_PROGRESS == deviceBuildInfos[device].buildStatus; })) { retVal = CL_INVALID_OPERATION; break; } if (isCreatedFromBinary == false) { for (const auto &device : deviceVector) { deviceBuildInfos[device].buildStatus = CL_BUILD_IN_PROGRESS; } if (nullptr != buildOptions) { options = buildOptions; } else if (this->createdFrom != CreatedFrom::BINARY) { options = ""; } const bool shouldSuppressRebuildWarning{CompilerOptions::extract(CompilerOptions::noRecompiledFromIr, options)}; extractInternalOptions(options, internalOptions); applyAdditionalOptions(internalOptions); CompilerInterface *pCompilerInterface = defaultDevice.getCompilerInterface(); if (!pCompilerInterface) { retVal = CL_OUT_OF_HOST_MEMORY; break; } TranslationInput inputArgs = {IGC::CodeType::oclC, IGC::CodeType::oclGenBin}; if (createdFrom != CreatedFrom::SOURCE) { inputArgs.srcType = isSpirV ? IGC::CodeType::spirV : IGC::CodeType::llvmBc; inputArgs.src = ArrayRef(irBinary.get(), irBinarySize); } else { inputArgs.src = ArrayRef(sourceCode.c_str(), sourceCode.size()); } if (inputArgs.src.size() == 0) { retVal = CL_INVALID_PROGRAM; break; } if (isKernelDebugEnabled()) { std::string filename; for (const auto &clDevice : deviceVector) { if (BuildPhase::SourceCodeNotification == phaseReached[clDevice->getRootDeviceIndex()]) { continue; } appendKernelDebugOptions(*clDevice, internalOptions); notifyDebuggerWithSourceCode(*clDevice, filename); prependFilePathToOptions(filename); phaseReached[clDevice->getRootDeviceIndex()] = BuildPhase::SourceCodeNotification; } } std::string extensions = requiresOpenClCFeatures(options) ? defaultClDevice->peekCompilerExtensionsWithFeatures() : defaultClDevice->peekCompilerExtensions(); if (requiresAdditionalExtensions(options)) { extensions.erase(extensions.length() - 1); extensions += ",+cl_khr_3d_image_writes "; } CompilerOptions::concatenateAppend(internalOptions, extensions); if (!this->getIsBuiltIn() && DebugManager.flags.InjectInternalBuildOptions.get() != "unk") { NEO::CompilerOptions::concatenateAppend(internalOptions, NEO::DebugManager.flags.InjectInternalBuildOptions.get()); } inputArgs.apiOptions = ArrayRef(options.c_str(), options.length()); inputArgs.internalOptions = ArrayRef(internalOptions.c_str(), internalOptions.length()); inputArgs.GTPinInput = gtpinGetIgcInit(); inputArgs.specializedValues = this->specConstantsValues; DBG_LOG(LogApiCalls, "Build Options", inputArgs.apiOptions.begin(), "\nBuild Internal Options", inputArgs.internalOptions.begin()); inputArgs.allowCaching = enableCaching; NEO::TranslationOutput compilerOuput = {}; for (const auto &clDevice : deviceVector) { if (shouldWarnAboutRebuild && !shouldSuppressRebuildWarning) { this->updateBuildLog(clDevice->getRootDeviceIndex(), CompilerWarnings::recompiledFromIr.data(), CompilerWarnings::recompiledFromIr.length()); } auto compilerErr = pCompilerInterface->build(clDevice->getDevice(), inputArgs, compilerOuput); this->updateBuildLog(clDevice->getRootDeviceIndex(), compilerOuput.frontendCompilerLog.c_str(), compilerOuput.frontendCompilerLog.size()); this->updateBuildLog(clDevice->getRootDeviceIndex(), compilerOuput.backendCompilerLog.c_str(), compilerOuput.backendCompilerLog.size()); retVal = asClError(compilerErr); if (retVal != CL_SUCCESS) { break; } if (inputArgs.srcType == IGC::CodeType::oclC) { this->irBinary = std::move(compilerOuput.intermediateRepresentation.mem); this->irBinarySize = compilerOuput.intermediateRepresentation.size; this->isSpirV = compilerOuput.intermediateCodeType == IGC::CodeType::spirV; } this->buildInfos[clDevice->getRootDeviceIndex()].debugData = std::move(compilerOuput.debugData.mem); this->buildInfos[clDevice->getRootDeviceIndex()].debugDataSize = compilerOuput.debugData.size; if (BuildPhase::BinaryCreation == phaseReached[clDevice->getRootDeviceIndex()]) { continue; } this->replaceDeviceBinary(std::move(compilerOuput.deviceBinary.mem), compilerOuput.deviceBinary.size, clDevice->getRootDeviceIndex()); phaseReached[clDevice->getRootDeviceIndex()] = BuildPhase::BinaryCreation; } if (retVal != CL_SUCCESS) { break; } } updateNonUniformFlag(); for (auto &clDevice : deviceVector) { if (BuildPhase::BinaryProcessing == phaseReached[clDevice->getRootDeviceIndex()]) { continue; } if (DebugManager.flags.PrintProgramBinaryProcessingTime.get()) { retVal = TimeMeasureWrapper::functionExecution(*this, &Program::processGenBinary, *clDevice); } else { retVal = processGenBinary(*clDevice); } if (retVal != CL_SUCCESS) { break; } phaseReached[clDevice->getRootDeviceIndex()] = BuildPhase::BinaryProcessing; } if (retVal != CL_SUCCESS) { break; } if (isKernelDebugEnabled() || gtpinIsGTPinInitialized()) { for (auto &clDevice : deviceVector) { auto rootDeviceIndex = clDevice->getRootDeviceIndex(); if (BuildPhase::DebugDataNotification == phaseReached[rootDeviceIndex]) { continue; } notifyDebuggerWithDebugData(clDevice); phaseReached[rootDeviceIndex] = BuildPhase::DebugDataNotification; } } } while (false); if (retVal != CL_SUCCESS) { for (const auto &device : deviceVector) { deviceBuildInfos[device].buildStatus = CL_BUILD_ERROR; deviceBuildInfos[device].programBinaryType = CL_PROGRAM_BINARY_TYPE_NONE; } } else { setBuildStatusSuccess(deviceVector, CL_PROGRAM_BINARY_TYPE_EXECUTABLE); } return retVal; } bool Program::appendKernelDebugOptions(ClDevice &clDevice, std::string &internalOptions) { CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::debugKernelEnable); CompilerOptions::concatenateAppend(options, CompilerOptions::generateDebugInfo); auto debugger = clDevice.getSourceLevelDebugger(); if (debugger && (NEO::SourceLevelDebugger::shouldAppendOptDisable(*debugger))) { CompilerOptions::concatenateAppend(options, CompilerOptions::optDisable); } return true; } void Program::notifyDebuggerWithSourceCode(ClDevice &clDevice, std::string &filename) { if (clDevice.getSourceLevelDebugger()) { clDevice.getSourceLevelDebugger()->notifySourceCode(sourceCode.c_str(), sourceCode.size(), filename); } } cl_int Program::build(const ClDeviceVector &deviceVector, const char *buildOptions, bool enableCaching, std::unordered_map &builtinsMap) { auto ret = this->build(deviceVector, buildOptions, enableCaching); if (ret != CL_SUCCESS) { return ret; } for (auto &ki : buildInfos[deviceVector[0]->getRootDeviceIndex()].kernelInfoArray) { auto fit = builtinsMap.find(ki->kernelDescriptor.kernelMetadata.kernelName); if (fit == builtinsMap.end()) { continue; } ki->builtinDispatchBuilder = fit->second; } return ret; } void Program::extractInternalOptions(const std::string &options, std::string &internalOptions) { auto tokenized = CompilerOptions::tokenize(options); for (auto &optionString : internalOptionsToExtract) { auto element = std::find(tokenized.begin(), tokenized.end(), optionString); if (element == tokenized.end()) { continue; } if (isFlagOption(optionString)) { CompilerOptions::concatenateAppend(internalOptions, optionString); } else if ((element + 1 != tokenized.end()) && isOptionValueValid(optionString, *(element + 1))) { CompilerOptions::concatenateAppend(internalOptions, optionString); CompilerOptions::concatenateAppend(internalOptions, *(element + 1)); } } } } // namespace NEO compute-runtime-22.14.22890/opencl/source/program/compile.cpp000066400000000000000000000174201422164147700237410ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/compiler_interface/compiler_warnings/compiler_warnings.h" #include "shared/source/device/device.h" #include "shared/source/device_binary_format/elf/elf.h" #include "shared/source/device_binary_format/elf/elf_encoder.h" #include "shared/source/device_binary_format/elf/ocl_elf.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/helpers/compiler_options_parser.h" #include "shared/source/source_level_debugger/source_level_debugger.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/helpers/cl_validators.h" #include "opencl/source/platform/platform.h" #include "compiler_options.h" #include "program.h" #include namespace NEO { cl_int Program::compile( const ClDeviceVector &deviceVector, const char *buildOptions, cl_uint numInputHeaders, const cl_program *inputHeaders, const char **headerIncludeNames) { cl_int retVal = CL_SUCCESS; auto defaultClDevice = deviceVector[0]; UNRECOVERABLE_IF(defaultClDevice == nullptr); auto &defaultDevice = defaultClDevice->getDevice(); auto internalOptions = getInternalOptions(); std::unordered_map sourceLevelDebuggerNotified; do { if (numInputHeaders == 0) { if ((headerIncludeNames != nullptr) || (inputHeaders != nullptr)) { retVal = CL_INVALID_VALUE; break; } } else { if ((headerIncludeNames == nullptr) || (inputHeaders == nullptr)) { retVal = CL_INVALID_VALUE; break; } } if (std::any_of(deviceVector.begin(), deviceVector.end(), [&](auto device) { return CL_BUILD_IN_PROGRESS == deviceBuildInfos[device].buildStatus; })) { retVal = CL_INVALID_OPERATION; break; } if ((createdFrom == CreatedFrom::IL) || std::all_of(deviceVector.begin(), deviceVector.end(), [&](auto device) { return CL_PROGRAM_BINARY_TYPE_INTERMEDIATE == deviceBuildInfos[device].programBinaryType; })) { retVal = CL_SUCCESS; break; } for (const auto &device : deviceVector) { sourceLevelDebuggerNotified[device->getRootDeviceIndex()] = false; deviceBuildInfos[device].buildStatus = CL_BUILD_IN_PROGRESS; } options = (buildOptions != nullptr) ? buildOptions : ""; const auto shouldSuppressRebuildWarning{CompilerOptions::extract(CompilerOptions::noRecompiledFromIr, options)}; for (const auto &optionString : {CompilerOptions::gtpinRera, CompilerOptions::greaterThan4gbBuffersRequired}) { const auto wasExtracted{CompilerOptions::extract(optionString, options)}; if (wasExtracted) { CompilerOptions::concatenateAppend(internalOptions, optionString); } } // create ELF writer to process all sources to be compiled NEO::Elf::ElfEncoder<> elfEncoder(true, true, 1U); elfEncoder.getElfFileHeader().type = NEO::Elf::ET_OPENCL_SOURCE; elfEncoder.appendSection(NEO::Elf::SHT_OPENCL_SOURCE, "CLMain", sourceCode); for (cl_uint i = 0; i < numInputHeaders; i++) { auto program = inputHeaders[i]; if (program == nullptr) { retVal = CL_INVALID_PROGRAM; break; } auto pHeaderProgObj = castToObject(program); if (pHeaderProgObj == nullptr) { retVal = CL_INVALID_PROGRAM; break; } std::string includeHeaderSource; retVal = pHeaderProgObj->getSource(includeHeaderSource); if (retVal != CL_SUCCESS) { break; } elfEncoder.appendSection(NEO::Elf::SHT_OPENCL_HEADER, ConstStringRef(headerIncludeNames[i], strlen(headerIncludeNames[i])), includeHeaderSource); } if (retVal != CL_SUCCESS) { break; } std::vector compileData = elfEncoder.encode(); CompilerInterface *pCompilerInterface = defaultDevice.getCompilerInterface(); if (!pCompilerInterface) { retVal = CL_OUT_OF_HOST_MEMORY; break; } TranslationInput inputArgs = {IGC::CodeType::elf, IGC::CodeType::undefined}; // set parameters for compilation std::string extensions = requiresOpenClCFeatures(options) ? defaultClDevice->peekCompilerExtensionsWithFeatures() : defaultClDevice->peekCompilerExtensions(); if (requiresAdditionalExtensions(options)) { extensions.erase(extensions.length() - 1); extensions += ",+cl_khr_3d_image_writes "; } CompilerOptions::concatenateAppend(internalOptions, extensions); if (isKernelDebugEnabled()) { for (const auto &device : deviceVector) { if (sourceLevelDebuggerNotified[device->getRootDeviceIndex()]) { continue; } std::string filename; appendKernelDebugOptions(*device, internalOptions); notifyDebuggerWithSourceCode(*device, filename); prependFilePathToOptions(filename); sourceLevelDebuggerNotified[device->getRootDeviceIndex()] = true; } } if (!this->getIsBuiltIn() && DebugManager.flags.InjectInternalBuildOptions.get() != "unk") { NEO::CompilerOptions::concatenateAppend(internalOptions, NEO::DebugManager.flags.InjectInternalBuildOptions.get()); } inputArgs.src = ArrayRef(reinterpret_cast(compileData.data()), compileData.size()); inputArgs.apiOptions = ArrayRef(options.c_str(), options.length()); inputArgs.internalOptions = ArrayRef(internalOptions.c_str(), internalOptions.length()); TranslationOutput compilerOuput; auto compilerErr = pCompilerInterface->compile(defaultDevice, inputArgs, compilerOuput); for (const auto &device : deviceVector) { if (shouldWarnAboutRebuild && !shouldSuppressRebuildWarning) { this->updateBuildLog(device->getRootDeviceIndex(), CompilerWarnings::recompiledFromIr.data(), CompilerWarnings::recompiledFromIr.length()); } this->updateBuildLog(device->getRootDeviceIndex(), compilerOuput.frontendCompilerLog.c_str(), compilerOuput.frontendCompilerLog.size()); this->updateBuildLog(device->getRootDeviceIndex(), compilerOuput.backendCompilerLog.c_str(), compilerOuput.backendCompilerLog.size()); } retVal = asClError(compilerErr); if (retVal != CL_SUCCESS) { break; } this->irBinary = std::move(compilerOuput.intermediateRepresentation.mem); this->irBinarySize = compilerOuput.intermediateRepresentation.size; this->isSpirV = compilerOuput.intermediateCodeType == IGC::CodeType::spirV; for (const auto &device : deviceVector) { this->buildInfos[device->getRootDeviceIndex()].debugData = std::move(compilerOuput.debugData.mem); this->buildInfos[device->getRootDeviceIndex()].debugDataSize = compilerOuput.debugData.size; } updateNonUniformFlag(); } while (false); if (retVal != CL_SUCCESS) { for (const auto &device : deviceVector) { deviceBuildInfos[device].buildStatus = CL_BUILD_ERROR; deviceBuildInfos[device].programBinaryType = CL_PROGRAM_BINARY_TYPE_NONE; } } else { setBuildStatusSuccess(deviceVector, CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT); } return retVal; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/program/create.cpp000066400000000000000000000017051422164147700235530ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/program/create.inl" #include "opencl/source/program/program.h" namespace NEO { namespace ProgramFunctions { CreateFromILFunc createFromIL = Program::createFromIL; } // namespace ProgramFunctions template Program *Program::create(Context *, const ClDeviceVector &, const size_t *, const unsigned char **, cl_int *, cl_int &); template Program *Program::create(Context *, cl_uint, const char **, const size_t *, cl_int &); template Program *Program::createBuiltInFromSource(const char *, Context *, const ClDeviceVector &, cl_int *); template Program *Program::createFromIL(Context *, const void *, size_t length, cl_int &); template Program *Program::createBuiltInFromGenBinary(Context *context, const ClDeviceVector &, const void *binary, size_t size, cl_int *errcodeRet); } // namespace NEO compute-runtime-22.14.22890/opencl/source/program/create.inl000066400000000000000000000105661422164147700235600ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/device/device.h" #include "shared/source/helpers/constants.h" #include "shared/source/helpers/string_helpers.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/context/context.h" #include "opencl/source/platform/platform.h" #include "opencl/source/program/program.h" #include "compiler_options.h" namespace NEO { template T *Program::create( Context *pContext, const ClDeviceVector &deviceVector, const size_t *lengths, const unsigned char **binaries, cl_int *binaryStatus, cl_int &errcodeRet) { auto program = new T(pContext, false, deviceVector); cl_int retVal = CL_INVALID_PROGRAM; for (auto i = 0u; i < deviceVector.size(); i++) { auto device = deviceVector[i]; retVal = program->createProgramFromBinary(binaries[i], lengths[i], *device); if (retVal != CL_SUCCESS) { break; } } program->createdFrom = CreatedFrom::BINARY; if (binaryStatus) { DEBUG_BREAK_IF(retVal != CL_SUCCESS); *binaryStatus = CL_SUCCESS; } if (retVal != CL_SUCCESS) { delete program; program = nullptr; } errcodeRet = retVal; return program; } template T *Program::create( Context *pContext, cl_uint count, const char **strings, const size_t *lengths, cl_int &errcodeRet) { std::string combinedString; size_t combinedStringSize = 0; T *program = nullptr; auto retVal = StringHelpers::createCombinedString( combinedString, combinedStringSize, count, strings, lengths); if (CL_SUCCESS == retVal) { program = new T(pContext, false, pContext->getDevices()); program->sourceCode.swap(combinedString); program->createdFrom = CreatedFrom::SOURCE; } errcodeRet = retVal; return program; } template T *Program::createBuiltInFromSource( const char *nullTerminatedString, Context *context, const ClDeviceVector &deviceVector, cl_int *errcodeRet) { cl_int retVal = CL_SUCCESS; T *program = nullptr; if (nullTerminatedString == nullptr) { retVal = CL_INVALID_VALUE; } if (retVal == CL_SUCCESS) { program = new T(context, true, deviceVector); program->sourceCode = nullTerminatedString; program->createdFrom = CreatedFrom::SOURCE; } if (errcodeRet) { *errcodeRet = retVal; } return program; } template T *Program::createBuiltInFromGenBinary( Context *context, const ClDeviceVector &deviceVector, const void *binary, size_t size, cl_int *errcodeRet) { cl_int retVal = CL_SUCCESS; T *program = nullptr; if ((binary == nullptr) || (size == 0)) { retVal = CL_INVALID_VALUE; } if (CL_SUCCESS == retVal) { program = new T(context, true, deviceVector); for (const auto &device : deviceVector) { if (program->buildInfos[device->getRootDeviceIndex()].packedDeviceBinarySize == 0) { program->replaceDeviceBinary(std::move(makeCopy(binary, size)), size, device->getRootDeviceIndex()); } } program->setBuildStatusSuccess(deviceVector, CL_PROGRAM_BINARY_TYPE_EXECUTABLE); program->isCreatedFromBinary = true; program->createdFrom = CreatedFrom::BINARY; } if (errcodeRet) { *errcodeRet = retVal; } return program; } template T *Program::createFromIL(Context *context, const void *il, size_t length, cl_int &errcodeRet) { errcodeRet = CL_SUCCESS; if ((il == nullptr) || (length == 0)) { errcodeRet = CL_INVALID_BINARY; return nullptr; } auto deviceVector = context->getDevices(); T *program = new T(context, false, deviceVector); for (const auto &device : deviceVector) { errcodeRet = program->createProgramFromBinary(il, length, *device); if (errcodeRet != CL_SUCCESS) { break; } } program->createdFrom = CreatedFrom::IL; if (errcodeRet != CL_SUCCESS) { delete program; program = nullptr; } return program; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/program/get_info.cpp000066400000000000000000000215641422164147700241070ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/get_info.h" #include "shared/source/device/device.h" #include "shared/source/device_binary_format/device_binary_formats.h" #include "shared/source/program/kernel_info.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/base_object.h" #include "opencl/source/helpers/cl_validators.h" #include "opencl/source/helpers/get_info_status_mapper.h" #include "program.h" namespace NEO { cl_int Program::getInfo(cl_program_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { cl_int retVal = CL_SUCCESS; const void *pSrc = nullptr; size_t srcSize = GetInfo::invalidSourceSize; size_t retSize = 0; std::string kernelNamesString; cl_uint refCount = 0; size_t numKernels; cl_context clContext = context; cl_uint clFalse = CL_FALSE; std::vector devicesToExpose; StackVec binarySizes; StackVec debugDataSizes; uint32_t numDevices = static_cast(clDevices.size()); switch (paramName) { case CL_PROGRAM_CONTEXT: pSrc = &clContext; retSize = srcSize = sizeof(clContext); break; case CL_PROGRAM_BINARIES: { auto requiredSize = clDevices.size() * sizeof(const unsigned char **); if (!paramValue) { retSize = requiredSize; srcSize = 0u; break; } if (paramValueSize < requiredSize) { retVal = CL_INVALID_VALUE; break; } auto outputBinaries = reinterpret_cast(paramValue); for (auto i = 0u; i < clDevices.size(); i++) { if (outputBinaries[i] == nullptr) { continue; } auto rootDeviceIndex = clDevices[i]->getRootDeviceIndex(); auto binarySize = buildInfos[rootDeviceIndex].packedDeviceBinarySize; memcpy_s(outputBinaries[i], binarySize, buildInfos[rootDeviceIndex].packedDeviceBinary.get(), binarySize); } GetInfo::setParamValueReturnSize(paramValueSizeRet, requiredSize, GetInfoStatus::SUCCESS); return CL_SUCCESS; } break; case CL_PROGRAM_BINARY_SIZES: for (auto i = 0u; i < clDevices.size(); i++) { auto rootDeviceIndex = clDevices[i]->getRootDeviceIndex(); packDeviceBinary(*clDevices[i]); binarySizes.push_back(buildInfos[rootDeviceIndex].packedDeviceBinarySize); } pSrc = binarySizes.data(); retSize = srcSize = binarySizes.size() * sizeof(cl_device_id); break; case CL_PROGRAM_KERNEL_NAMES: kernelNamesString = concatenateKernelNames(buildInfos[clDevices[0]->getRootDeviceIndex()].kernelInfoArray); pSrc = kernelNamesString.c_str(); retSize = srcSize = kernelNamesString.length() + 1; if (!isBuilt()) { retVal = CL_INVALID_PROGRAM_EXECUTABLE; } break; case CL_PROGRAM_NUM_KERNELS: numKernels = getNumKernels(); pSrc = &numKernels; retSize = srcSize = sizeof(numKernels); if (!isBuilt()) { retVal = CL_INVALID_PROGRAM_EXECUTABLE; } break; case CL_PROGRAM_NUM_DEVICES: pSrc = &numDevices; retSize = srcSize = sizeof(cl_uint); break; case CL_PROGRAM_DEVICES: clDevices.toDeviceIDs(devicesToExpose); pSrc = devicesToExpose.data(); retSize = srcSize = devicesToExpose.size() * sizeof(cl_device_id); break; case CL_PROGRAM_REFERENCE_COUNT: refCount = static_cast(this->getReference()); retSize = srcSize = sizeof(refCount); pSrc = &refCount; break; case CL_PROGRAM_SOURCE: if (createdFrom == CreatedFrom::SOURCE) { pSrc = sourceCode.c_str(); retSize = srcSize = strlen(sourceCode.c_str()) + 1; } else { if (paramValueSizeRet) { *paramValueSizeRet = 0; } return CL_SUCCESS; } break; case CL_PROGRAM_IL: if (createdFrom != CreatedFrom::IL) { if (paramValueSizeRet) { *paramValueSizeRet = 0; } return CL_SUCCESS; } pSrc = irBinary.get(); retSize = srcSize = irBinarySize; break; case CL_PROGRAM_DEBUG_INFO_SIZES_INTEL: for (auto i = 0u; i < clDevices.size(); i++) { auto rootDeviceIndex = clDevices[i]->getRootDeviceIndex(); if (nullptr == buildInfos[rootDeviceIndex].debugData) { auto refBin = ArrayRef(reinterpret_cast(buildInfos[rootDeviceIndex].unpackedDeviceBinary.get()), buildInfos[rootDeviceIndex].unpackedDeviceBinarySize); if (isDeviceBinaryFormat(refBin)) { createDebugZebin(rootDeviceIndex); } else continue; } debugDataSizes.push_back(buildInfos[rootDeviceIndex].debugDataSize); } pSrc = debugDataSizes.data(); retSize = srcSize = debugDataSizes.size() * sizeof(cl_device_id); break; case CL_PROGRAM_DEBUG_INFO_INTEL: { auto requiredSize = numDevices * sizeof(void **); if (paramValue == nullptr) { retSize = requiredSize; srcSize = 0u; break; } if (paramValueSize < requiredSize) { retVal = CL_INVALID_VALUE; break; } auto outputDebugData = reinterpret_cast(paramValue); for (auto i = 0u; i < clDevices.size(); i++) { auto rootDeviceIndex = clDevices[i]->getRootDeviceIndex(); if (nullptr == buildInfos[rootDeviceIndex].debugData) { auto refBin = ArrayRef(reinterpret_cast(buildInfos[rootDeviceIndex].unpackedDeviceBinary.get()), buildInfos[rootDeviceIndex].unpackedDeviceBinarySize); if (isDeviceBinaryFormat(refBin)) { createDebugZebin(rootDeviceIndex); } else continue; } auto dbgDataSize = buildInfos[rootDeviceIndex].debugDataSize; memcpy_s(outputDebugData[i], dbgDataSize, buildInfos[rootDeviceIndex].debugData.get(), dbgDataSize); } GetInfo::setParamValueReturnSize(paramValueSizeRet, requiredSize, GetInfoStatus::SUCCESS); return CL_SUCCESS; } break; case CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT: case CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT: retSize = srcSize = sizeof(clFalse); pSrc = &clFalse; break; default: retVal = CL_INVALID_VALUE; break; } auto getInfoStatus = GetInfoStatus::INVALID_VALUE; if (retVal == CL_SUCCESS) { getInfoStatus = GetInfo::getInfo(paramValue, paramValueSize, pSrc, srcSize); retVal = changeGetInfoStatusToCLResultType(getInfoStatus); } GetInfo::setParamValueReturnSize(paramValueSizeRet, retSize, getInfoStatus); return retVal; } cl_int Program::getBuildInfo(cl_device_id device, cl_program_build_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const { cl_int retVal = CL_SUCCESS; const void *pSrc = nullptr; size_t srcSize = GetInfo::invalidSourceSize; size_t retSize = 0; auto pClDev = castToObject(device); auto rootDeviceIndex = pClDev->getRootDeviceIndex(); switch (paramName) { case CL_PROGRAM_BUILD_STATUS: srcSize = retSize = sizeof(cl_build_status); pSrc = &deviceBuildInfos.at(pClDev).buildStatus; break; case CL_PROGRAM_BUILD_OPTIONS: srcSize = retSize = strlen(options.c_str()) + 1; pSrc = options.c_str(); break; case CL_PROGRAM_BUILD_LOG: { const char *pBuildLog = getBuildLog(pClDev->getRootDeviceIndex()); pSrc = pBuildLog; srcSize = retSize = strlen(pBuildLog) + 1; } break; case CL_PROGRAM_BINARY_TYPE: srcSize = retSize = sizeof(cl_program_binary_type); pSrc = &deviceBuildInfos.at(pClDev).programBinaryType; break; case CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE: pSrc = &buildInfos[rootDeviceIndex].globalVarTotalSize; retSize = srcSize = sizeof(size_t); break; default: retVal = CL_INVALID_VALUE; break; } auto getInfoStatus = GetInfoStatus::INVALID_VALUE; if (retVal == CL_SUCCESS) { getInfoStatus = GetInfo::getInfo(paramValue, paramValueSize, pSrc, srcSize); retVal = changeGetInfoStatusToCLResultType(getInfoStatus); } GetInfo::setParamValueReturnSize(paramValueSizeRet, retSize, getInfoStatus); return retVal; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/program/internal_options.cpp000066400000000000000000000011471422164147700256770ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/program/program.h" #include "compiler_options.h" #include namespace NEO { const std::vector Program::internalOptionsToExtract = {CompilerOptions::gtpinRera, CompilerOptions::greaterThan4gbBuffersRequired}; bool Program::isFlagOption(ConstStringRef option) { return true; } bool Program::isOptionValueValid(ConstStringRef option, ConstStringRef value) { return false; } }; // namespace NEO compute-runtime-22.14.22890/opencl/source/program/link.cpp000066400000000000000000000233611422164147700232470ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/device/device.h" #include "shared/source/device_binary_format/device_binary_formats.h" #include "shared/source/device_binary_format/elf/elf.h" #include "shared/source/device_binary_format/elf/elf_encoder.h" #include "shared/source/device_binary_format/elf/ocl_elf.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/program/kernel_info.h" #include "shared/source/utilities/stackvec.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/gtpin/gtpin_notify.h" #include "opencl/source/helpers/cl_validators.h" #include "opencl/source/platform/platform.h" #include "opencl/source/program/program.h" #include "compiler_options.h" #include namespace NEO { cl_int Program::link( const ClDeviceVector &deviceVector, const char *buildOptions, cl_uint numInputPrograms, const cl_program *inputPrograms) { cl_int retVal = CL_SUCCESS; bool isCreateLibrary; auto defaultClDevice = deviceVector[0]; UNRECOVERABLE_IF(defaultClDevice == nullptr); auto &defaultDevice = defaultClDevice->getDevice(); std::unordered_map kernelDebugDataNotified; std::unordered_map debugOptionsAppended; auto internalOptions = getInternalOptions(); cl_program_binary_type binaryType = CL_PROGRAM_BINARY_TYPE_NONE; do { if ((numInputPrograms == 0) || (inputPrograms == nullptr)) { retVal = CL_INVALID_VALUE; break; } if (std::any_of(deviceVector.begin(), deviceVector.end(), [&](auto device) { return CL_BUILD_IN_PROGRESS == deviceBuildInfos[device].buildStatus; })) { retVal = CL_INVALID_OPERATION; break; } for (const auto &device : deviceVector) { kernelDebugDataNotified[device->getRootDeviceIndex()] = false; debugOptionsAppended[device->getRootDeviceIndex()] = false; deviceBuildInfos[device].buildStatus = CL_BUILD_IN_PROGRESS; } options = (buildOptions != nullptr) ? buildOptions : ""; for (const auto &optionString : {CompilerOptions::gtpinRera, CompilerOptions::greaterThan4gbBuffersRequired}) { size_t pos = options.find(optionString.data()); if (pos != std::string::npos) { options.erase(pos, optionString.length()); CompilerOptions::concatenateAppend(internalOptions, optionString); } } if (isKernelDebugEnabled()) { for (auto &device : deviceVector) { if (debugOptionsAppended[device->getRootDeviceIndex()]) { continue; } appendKernelDebugOptions(*device, internalOptions); debugOptionsAppended[device->getRootDeviceIndex()] = true; } } isCreateLibrary = CompilerOptions::contains(options, CompilerOptions::createLibrary); NEO::Elf::ElfEncoder<> elfEncoder(true, false, 1U); elfEncoder.getElfFileHeader().type = NEO::Elf::ET_OPENCL_OBJECTS; StackVec inputProgramsInternal; StackVec specConstIds; StackVec specConstValues; for (cl_uint i = 0; i < numInputPrograms; i++) { auto program = inputPrograms[i]; if (program == nullptr) { retVal = CL_INVALID_PROGRAM; break; } auto pInputProgObj = castToObject(program); if (pInputProgObj == nullptr) { retVal = CL_INVALID_PROGRAM; break; } inputProgramsInternal.push_back(pInputProgObj); if ((pInputProgObj->irBinary == nullptr) || (pInputProgObj->irBinarySize == 0)) { retVal = CL_INVALID_PROGRAM; break; } if (pInputProgObj->areSpecializationConstantsInitialized) { specConstIds.clear(); specConstValues.clear(); specConstIds.reserve(pInputProgObj->specConstantsValues.size()); specConstValues.reserve(pInputProgObj->specConstantsValues.size()); for (const auto &specConst : pInputProgObj->specConstantsValues) { specConstIds.push_back(specConst.first); specConstValues.push_back(specConst.second); } elfEncoder.appendSection(NEO::Elf::SHT_OPENCL_SPIRV_SC_IDS, NEO::Elf::SectionNamesOpenCl::spirvSpecConstIds, ArrayRef::fromAny(specConstIds.begin(), specConstIds.size())); elfEncoder.appendSection(NEO::Elf::SHT_OPENCL_SPIRV_SC_VALUES, NEO::Elf::SectionNamesOpenCl::spirvSpecConstValues, ArrayRef::fromAny(specConstValues.begin(), specConstValues.size())); } auto sectionType = pInputProgObj->getIsSpirV() ? NEO::Elf::SHT_OPENCL_SPIRV : NEO::Elf::SHT_OPENCL_LLVM_BINARY; ConstStringRef sectionName = pInputProgObj->getIsSpirV() ? NEO::Elf::SectionNamesOpenCl::spirvObject : NEO::Elf::SectionNamesOpenCl::llvmObject; elfEncoder.appendSection(sectionType, sectionName, ArrayRef(reinterpret_cast(pInputProgObj->irBinary.get()), pInputProgObj->irBinarySize)); } if (retVal != CL_SUCCESS) { break; } auto clLinkInput = elfEncoder.encode(); CompilerInterface *pCompilerInterface = defaultDevice.getCompilerInterface(); if (!pCompilerInterface) { retVal = CL_OUT_OF_HOST_MEMORY; break; } TranslationInput inputArgs = {IGC::CodeType::elf, IGC::CodeType::undefined}; inputArgs.src = ArrayRef(reinterpret_cast(clLinkInput.data()), clLinkInput.size()); inputArgs.apiOptions = ArrayRef(options.c_str(), options.length()); inputArgs.internalOptions = ArrayRef(internalOptions.c_str(), internalOptions.length()); inputArgs.GTPinInput = gtpinGetIgcInit(); if (!isCreateLibrary) { for (const auto &device : deviceVector) { auto rootDeviceIndex = device->getRootDeviceIndex(); inputArgs.outType = IGC::CodeType::oclGenBin; NEO::TranslationOutput compilerOuput = {}; auto compilerErr = pCompilerInterface->link(device->getDevice(), inputArgs, compilerOuput); this->updateBuildLog(device->getRootDeviceIndex(), compilerOuput.frontendCompilerLog.c_str(), compilerOuput.frontendCompilerLog.size()); this->updateBuildLog(device->getRootDeviceIndex(), compilerOuput.backendCompilerLog.c_str(), compilerOuput.backendCompilerLog.size()); retVal = asClError(compilerErr); if (retVal != CL_SUCCESS) { break; } this->replaceDeviceBinary(std::move(compilerOuput.deviceBinary.mem), compilerOuput.deviceBinary.size, rootDeviceIndex); this->buildInfos[device->getRootDeviceIndex()].debugData = std::move(compilerOuput.debugData.mem); this->buildInfos[device->getRootDeviceIndex()].debugDataSize = compilerOuput.debugData.size; retVal = processGenBinary(*device); if (retVal != CL_SUCCESS) { break; } binaryType = CL_PROGRAM_BINARY_TYPE_EXECUTABLE; if (isKernelDebugEnabled()) { if (kernelDebugDataNotified[rootDeviceIndex]) { continue; } notifyDebuggerWithDebugData(device); kernelDebugDataNotified[device->getRootDeviceIndex()] = true; } } } else { inputArgs.outType = IGC::CodeType::llvmBc; NEO::TranslationOutput compilerOuput = {}; auto compilerErr = pCompilerInterface->createLibrary(defaultDevice, inputArgs, compilerOuput); for (const auto &device : deviceVector) { this->updateBuildLog(device->getRootDeviceIndex(), compilerOuput.frontendCompilerLog.c_str(), compilerOuput.frontendCompilerLog.size()); this->updateBuildLog(device->getRootDeviceIndex(), compilerOuput.backendCompilerLog.c_str(), compilerOuput.backendCompilerLog.size()); } retVal = asClError(compilerErr); if (retVal != CL_SUCCESS) { break; } this->irBinary = std::move(compilerOuput.intermediateRepresentation.mem); this->irBinarySize = compilerOuput.intermediateRepresentation.size; this->isSpirV = (compilerOuput.intermediateCodeType == IGC::CodeType::spirV); for (const auto &device : deviceVector) { this->buildInfos[device->getRootDeviceIndex()].debugData = std::move(compilerOuput.debugData.mem); this->buildInfos[device->getRootDeviceIndex()].debugDataSize = compilerOuput.debugData.size; } binaryType = CL_PROGRAM_BINARY_TYPE_LIBRARY; } if (retVal != CL_SUCCESS) { break; } updateNonUniformFlag(&*inputProgramsInternal.begin(), inputProgramsInternal.size()); } while (false); if (retVal != CL_SUCCESS) { for (const auto &device : deviceVector) { deviceBuildInfos[device].buildStatus = CL_BUILD_ERROR; deviceBuildInfos[device].programBinaryType = CL_PROGRAM_BINARY_TYPE_NONE; } } else { setBuildStatusSuccess(deviceVector, binaryType); } return retVal; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/program/printf_handler.cpp000066400000000000000000000134531422164147700253120ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "printf_handler.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/blit_commands_helper.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/program/print_formatter.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/buffer.h" namespace NEO { PrintfHandler::PrintfHandler(ClDevice &deviceArg) : device(deviceArg) { printfSurfaceInitialDataSizePtr = std::make_unique(); *printfSurfaceInitialDataSizePtr = sizeof(uint32_t); } PrintfHandler::~PrintfHandler() { device.getMemoryManager()->freeGraphicsMemory(printfSurface); } PrintfHandler *PrintfHandler::create(const MultiDispatchInfo &multiDispatchInfo, ClDevice &device) { if (multiDispatchInfo.usesStatelessPrintfSurface()) { return new PrintfHandler(device); } auto mainKernel = multiDispatchInfo.peekMainKernel(); if (mainKernel != nullptr) { if (mainKernel->getImplicitArgs()) { return new PrintfHandler(device); } } return nullptr; } void PrintfHandler::prepareDispatch(const MultiDispatchInfo &multiDispatchInfo) { auto printfSurfaceSize = device.getSharedDeviceInfo().printfBufferSize; if (printfSurfaceSize == 0) { return; } auto rootDeviceIndex = device.getRootDeviceIndex(); kernel = multiDispatchInfo.peekMainKernel(); printfSurface = device.getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, printfSurfaceSize, AllocationType::PRINTF_SURFACE, device.getDeviceBitfield()}); auto &hwInfo = device.getHardwareInfo(); auto &helper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); MemoryTransferHelper::transferMemoryToAllocation(helper.isBlitCopyRequiredForLocalMemory(hwInfo, *printfSurface), device.getDevice(), printfSurface, 0, printfSurfaceInitialDataSizePtr.get(), sizeof(*printfSurfaceInitialDataSizePtr.get())); if (kernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.usesPrintf) { const auto &printfSurfaceArg = kernel->getKernelInfo().kernelDescriptor.payloadMappings.implicitArgs.printfSurfaceAddress; auto printfPatchAddress = ptrOffset(reinterpret_cast(kernel->getCrossThreadData()), printfSurfaceArg.stateless); patchWithRequiredSize(printfPatchAddress, printfSurfaceArg.pointerSize, (uintptr_t)printfSurface->getGpuAddressToPatch()); if (isValidOffset(printfSurfaceArg.bindful)) { auto surfaceState = ptrOffset(reinterpret_cast(kernel->getSurfaceStateHeap()), printfSurfaceArg.bindful); void *addressToPatch = printfSurface->getUnderlyingBuffer(); size_t sizeToPatch = printfSurface->getUnderlyingBufferSize(); Buffer::setSurfaceState(&device.getDevice(), surfaceState, false, false, sizeToPatch, addressToPatch, 0, printfSurface, 0, 0, kernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, kernel->areMultipleSubDevicesInContext()); } } auto pImplicitArgs = kernel->getImplicitArgs(); if (pImplicitArgs) { pImplicitArgs->printfBufferPtr = printfSurface->getGpuAddress(); } } void PrintfHandler::makeResident(CommandStreamReceiver &commandStreamReceiver) { commandStreamReceiver.makeResident(*printfSurface); } void PrintfHandler::printEnqueueOutput() { auto &hwInfo = device.getHardwareInfo(); auto usesStringMap = kernel->getDescriptor().kernelAttributes.usesStringMap(); const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily); auto printfOutputBuffer = reinterpret_cast(printfSurface->getUnderlyingBuffer()); auto printfOutputSize = static_cast(printfSurface->getUnderlyingBufferSize()); std::unique_ptr printfOutputDecompressed; auto &helper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); if (hwInfoConfig.allowStatelessCompression(hwInfo) || helper.isBlitCopyRequiredForLocalMemory(hwInfo, *printfSurface)) { printfOutputDecompressed = std::make_unique(printfOutputSize); printfOutputBuffer = printfOutputDecompressed.get(); auto &bcsEngine = device.getEngine(EngineHelpers::getBcsEngineType(hwInfo, device.getDeviceBitfield(), device.getSelectorCopyEngine(), true), EngineUsage::Regular); BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back( BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::BufferToHostPtr, *bcsEngine.commandStreamReceiver, printfSurface, nullptr, printfOutputDecompressed.get(), printfSurface->getGpuAddress(), 0, 0, 0, Vec3(printfOutputSize, 0, 0), 0, 0, 0, 0)); bcsEngine.commandStreamReceiver->flushBcsTask(blitPropertiesContainer, true, false, device.getDevice()); } PrintFormatter printFormatter(printfOutputBuffer, printfOutputSize, kernel->is32Bit(), usesStringMap ? &kernel->getDescriptor().kernelMetadata.printfStringsMap : nullptr); printFormatter.printKernelOutput(); } } // namespace NEO compute-runtime-22.14.22890/opencl/source/program/printf_handler.h000066400000000000000000000016211422164147700247510ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver.h" #include "opencl/source/kernel/kernel.h" namespace NEO { class ClDevice; struct MultiDispatchInfo; class PrintfHandler { public: static PrintfHandler *create(const MultiDispatchInfo &multiDispatchInfo, ClDevice &deviceArg); ~PrintfHandler(); void prepareDispatch(const MultiDispatchInfo &multiDispatchInfo); void makeResident(CommandStreamReceiver &commandStreamReceiver); void printEnqueueOutput(); GraphicsAllocation *getSurface() { return printfSurface; } protected: PrintfHandler(ClDevice &device); std::unique_ptr printfSurfaceInitialDataSizePtr; ClDevice &device; Kernel *kernel = nullptr; GraphicsAllocation *printfSurface = nullptr; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/program/process_device_binary.cpp000066400000000000000000000410451422164147700266520ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device_binary_format/device_binary_formats.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/helpers/string.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/source/program/kernel_info.h" #include "shared/source/program/program_info.h" #include "shared/source/program/program_initialization.h" #include "shared/source/source_level_debugger/source_level_debugger.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/context/context.h" #include "opencl/source/gtpin/gtpin_notify.h" #include "opencl/source/program/program.h" #include "program_debug_data.h" #include using namespace iOpenCL; namespace NEO { extern bool familyEnabled[]; const KernelInfo *Program::getKernelInfo( const char *kernelName, uint32_t rootDeviceIndex) const { if (kernelName == nullptr) { return nullptr; } auto &kernelInfoArray = buildInfos[rootDeviceIndex].kernelInfoArray; auto it = std::find_if(kernelInfoArray.begin(), kernelInfoArray.end(), [=](const KernelInfo *kInfo) { return (0 == strcmp(kInfo->kernelDescriptor.kernelMetadata.kernelName.c_str(), kernelName)); }); return (it != kernelInfoArray.end()) ? *it : nullptr; } size_t Program::getNumKernels() const { return buildInfos[clDevices[0]->getRootDeviceIndex()].kernelInfoArray.size(); } const KernelInfo *Program::getKernelInfo(size_t ordinal, uint32_t rootDeviceIndex) const { auto &kernelInfoArray = buildInfos[rootDeviceIndex].kernelInfoArray; DEBUG_BREAK_IF(ordinal >= kernelInfoArray.size()); return kernelInfoArray[ordinal]; } cl_int Program::linkBinary(Device *pDevice, const void *constantsInitData, const void *variablesInitData, const ProgramInfo::GlobalSurfaceInfo &stringsInfo, std::vector &extFuncInfos) { auto linkerInput = getLinkerInput(pDevice->getRootDeviceIndex()); if (linkerInput == nullptr) { return CL_SUCCESS; } auto rootDeviceIndex = pDevice->getRootDeviceIndex(); auto &kernelInfoArray = buildInfos[rootDeviceIndex].kernelInfoArray; buildInfos[rootDeviceIndex].constStringSectionData = stringsInfo; Linker linker(*linkerInput); Linker::SegmentInfo globals; Linker::SegmentInfo constants; Linker::SegmentInfo exportedFunctions; Linker::SegmentInfo strings; GraphicsAllocation *globalsForPatching = getGlobalSurface(rootDeviceIndex); GraphicsAllocation *constantsForPatching = getConstantSurface(rootDeviceIndex); if (globalsForPatching != nullptr) { globals.gpuAddress = static_cast(globalsForPatching->getGpuAddress()); globals.segmentSize = globalsForPatching->getUnderlyingBufferSize(); } if (constantsForPatching != nullptr) { constants.gpuAddress = static_cast(constantsForPatching->getGpuAddress()); constants.segmentSize = constantsForPatching->getUnderlyingBufferSize(); } if (stringsInfo.initData != nullptr) { strings.gpuAddress = reinterpret_cast(stringsInfo.initData); strings.segmentSize = stringsInfo.size; } if (linkerInput->getExportedFunctionsSegmentId() >= 0) { // Exported functions reside in instruction heap of one of kernels auto exportedFunctionHeapId = linkerInput->getExportedFunctionsSegmentId(); buildInfos[rootDeviceIndex].exportedFunctionsSurface = kernelInfoArray[exportedFunctionHeapId]->getGraphicsAllocation(); exportedFunctions.gpuAddress = static_cast(buildInfos[rootDeviceIndex].exportedFunctionsSurface->getGpuAddressToPatch()); exportedFunctions.segmentSize = buildInfos[rootDeviceIndex].exportedFunctionsSurface->getUnderlyingBufferSize(); } Linker::PatchableSegments isaSegmentsForPatching; std::vector> patchedIsaTempStorage; Linker::KernelDescriptorsT kernelDescriptors; if (linkerInput->getTraits().requiresPatchingOfInstructionSegments) { patchedIsaTempStorage.reserve(kernelInfoArray.size()); kernelDescriptors.reserve(kernelInfoArray.size()); for (const auto &kernelInfo : kernelInfoArray) { auto &kernHeapInfo = kernelInfo->heapInfo; const char *originalIsa = reinterpret_cast(kernHeapInfo.pKernelHeap); patchedIsaTempStorage.push_back(std::vector(originalIsa, originalIsa + kernHeapInfo.KernelHeapSize)); isaSegmentsForPatching.push_back(Linker::PatchableSegment{patchedIsaTempStorage.rbegin()->data(), kernHeapInfo.KernelHeapSize}); kernelDescriptors.push_back(&kernelInfo->kernelDescriptor); } } Linker::UnresolvedExternals unresolvedExternalsInfo; bool linkSuccess = LinkingStatus::LinkedFully == linker.link(globals, constants, exportedFunctions, strings, globalsForPatching, constantsForPatching, isaSegmentsForPatching, unresolvedExternalsInfo, pDevice, constantsInitData, variablesInitData, kernelDescriptors, extFuncInfos); setSymbols(rootDeviceIndex, linker.extractRelocatedSymbols()); if (false == linkSuccess) { std::vector kernelNames; for (const auto &kernelInfo : kernelInfoArray) { kernelNames.push_back("kernel : " + kernelInfo->kernelDescriptor.kernelMetadata.kernelName); } auto error = constructLinkerErrorMessage(unresolvedExternalsInfo, kernelNames); updateBuildLog(pDevice->getRootDeviceIndex(), error.c_str(), error.size()); return CL_INVALID_BINARY; } else if (linkerInput->getTraits().requiresPatchingOfInstructionSegments) { for (auto kernelId = 0u; kernelId < kernelInfoArray.size(); kernelId++) { const auto &kernelInfo = kernelInfoArray[kernelId]; if (nullptr == kernelInfo->getGraphicsAllocation()) { continue; } auto &kernHeapInfo = kernelInfo->heapInfo; auto segmentId = &kernelInfo - &kernelInfoArray[0]; auto &hwInfo = pDevice->getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); MemoryTransferHelper::transferMemoryToAllocation(hwHelper.isBlitCopyRequiredForLocalMemory(hwInfo, *kernelInfo->getGraphicsAllocation()), *pDevice, kernelInfo->getGraphicsAllocation(), 0, isaSegmentsForPatching[segmentId].hostPointer, static_cast(kernHeapInfo.KernelHeapSize)); } } DBG_LOG(PrintRelocations, NEO::constructRelocationsDebugMessage(this->getSymbols(pDevice->getRootDeviceIndex()))); return CL_SUCCESS; } cl_int Program::processGenBinary(const ClDevice &clDevice) { auto rootDeviceIndex = clDevice.getRootDeviceIndex(); if (nullptr == this->buildInfos[rootDeviceIndex].unpackedDeviceBinary) { return CL_INVALID_BINARY; } cleanCurrentKernelInfo(rootDeviceIndex); auto &buildInfo = buildInfos[rootDeviceIndex]; if (buildInfo.constantSurface || buildInfo.globalSurface) { clDevice.getMemoryManager()->freeGraphicsMemory(buildInfo.constantSurface); clDevice.getMemoryManager()->freeGraphicsMemory(buildInfo.globalSurface); buildInfo.constantSurface = nullptr; buildInfo.globalSurface = nullptr; } ProgramInfo programInfo; auto blob = ArrayRef(reinterpret_cast(buildInfo.unpackedDeviceBinary.get()), buildInfo.unpackedDeviceBinarySize); SingleDeviceBinary binary = {}; binary.deviceBinary = blob; binary.targetDevice = NEO::targetDeviceFromHwInfo(clDevice.getDevice().getHardwareInfo()); std::string decodeErrors; std::string decodeWarnings; DecodeError decodeError; DeviceBinaryFormat singleDeviceBinaryFormat; std::tie(decodeError, singleDeviceBinaryFormat) = NEO::decodeSingleDeviceBinary(programInfo, binary, decodeErrors, decodeWarnings); if (isDeviceBinaryFormat(binary.deviceBinary)) { NEO::LinkerInput::SectionNameToSegmentIdMap nameToKernelId; uint32_t id = 0; for (auto &kernelInfo : programInfo.kernelInfos) { nameToKernelId[kernelInfo->kernelDescriptor.kernelMetadata.kernelName] = id; id++; } programInfo.prepareLinkerInputStorage(); programInfo.linkerInput->decodeElfSymbolTableAndRelocations(programInfo.decodedElf, nameToKernelId); } if (decodeWarnings.empty() == false) { PRINT_DEBUG_STRING(DebugManager.flags.PrintDebugMessages.get(), stderr, "%s\n", decodeWarnings.c_str()); } if (DecodeError::Success != decodeError) { PRINT_DEBUG_STRING(DebugManager.flags.PrintDebugMessages.get(), stderr, "%s\n", decodeErrors.c_str()); return CL_INVALID_BINARY; } return this->processProgramInfo(programInfo, clDevice); } cl_int Program::processProgramInfo(ProgramInfo &src, const ClDevice &clDevice) { auto rootDeviceIndex = clDevice.getRootDeviceIndex(); auto &kernelInfoArray = buildInfos[rootDeviceIndex].kernelInfoArray; size_t slmNeeded = getMaxInlineSlmNeeded(src); size_t slmAvailable = 0U; NEO::DeviceInfoKernelPayloadConstants deviceInfoConstants; LinkerInput *linkerInput = nullptr; slmAvailable = static_cast(clDevice.getSharedDeviceInfo().localMemSize); deviceInfoConstants.maxWorkGroupSize = static_cast(clDevice.getSharedDeviceInfo().maxWorkGroupSize); deviceInfoConstants.computeUnitsUsedForScratch = clDevice.getSharedDeviceInfo().computeUnitsUsedForScratch; deviceInfoConstants.slmWindowSize = static_cast(clDevice.getSharedDeviceInfo().localMemSize); if (requiresLocalMemoryWindowVA(src)) { deviceInfoConstants.slmWindow = this->executionEnvironment.memoryManager->getReservedMemory(MemoryConstants::slmWindowSize, MemoryConstants::slmWindowAlignment); } linkerInput = src.linkerInput.get(); setLinkerInput(rootDeviceIndex, std::move(src.linkerInput)); if (slmNeeded > slmAvailable) { return CL_OUT_OF_RESOURCES; } kernelInfoArray = std::move(src.kernelInfos); auto svmAllocsManager = context ? context->getSVMAllocsManager() : nullptr; if (src.globalConstants.size != 0) { buildInfos[rootDeviceIndex].constantSurface = allocateGlobalsSurface(svmAllocsManager, clDevice.getDevice(), src.globalConstants.size, true, linkerInput, src.globalConstants.initData); } buildInfos[rootDeviceIndex].globalVarTotalSize = src.globalVariables.size; if (src.globalVariables.size != 0) { buildInfos[rootDeviceIndex].globalSurface = allocateGlobalsSurface(svmAllocsManager, clDevice.getDevice(), src.globalVariables.size, false, linkerInput, src.globalVariables.initData); if (clDevice.areOcl21FeaturesEnabled() == false) { buildInfos[rootDeviceIndex].globalVarTotalSize = 0u; } } for (auto &kernelInfo : kernelInfoArray) { cl_int retVal = CL_SUCCESS; if (kernelInfo->heapInfo.KernelHeapSize) { retVal = kernelInfo->createKernelAllocation(clDevice.getDevice(), isBuiltIn) ? CL_SUCCESS : CL_OUT_OF_HOST_MEMORY; } if (retVal != CL_SUCCESS) { return retVal; } kernelInfo->apply(deviceInfoConstants); } return linkBinary(&clDevice.getDevice(), src.globalConstants.initData, src.globalVariables.initData, src.globalStrings, src.externalFunctions); } void Program::processDebugData(uint32_t rootDeviceIndex) { if (this->buildInfos[rootDeviceIndex].debugData != nullptr) { auto &kernelInfoArray = buildInfos[rootDeviceIndex].kernelInfoArray; SProgramDebugDataHeaderIGC *programDebugHeader = reinterpret_cast(this->buildInfos[rootDeviceIndex].debugData.get()); DEBUG_BREAK_IF(programDebugHeader->NumberOfKernels != kernelInfoArray.size()); const SKernelDebugDataHeaderIGC *kernelDebugHeader = reinterpret_cast(ptrOffset(programDebugHeader, sizeof(SProgramDebugDataHeaderIGC))); const char *kernelName = nullptr; const char *kernelDebugData = nullptr; for (uint32_t i = 0; i < programDebugHeader->NumberOfKernels; i++) { kernelName = reinterpret_cast(ptrOffset(kernelDebugHeader, sizeof(SKernelDebugDataHeaderIGC))); auto kernelInfo = kernelInfoArray[i]; UNRECOVERABLE_IF(kernelInfo->kernelDescriptor.kernelMetadata.kernelName.compare(0, kernelInfo->kernelDescriptor.kernelMetadata.kernelName.size(), kernelName) != 0); kernelDebugData = ptrOffset(kernelName, kernelDebugHeader->KernelNameSize); kernelInfo->debugData.vIsa = kernelDebugData; kernelInfo->debugData.genIsa = ptrOffset(kernelDebugData, kernelDebugHeader->SizeVisaDbgInBytes); kernelInfo->debugData.vIsaSize = kernelDebugHeader->SizeVisaDbgInBytes; kernelInfo->debugData.genIsaSize = kernelDebugHeader->SizeGenIsaDbgInBytes; kernelDebugData = ptrOffset(kernelDebugData, kernelDebugHeader->SizeVisaDbgInBytes + kernelDebugHeader->SizeGenIsaDbgInBytes); kernelDebugHeader = reinterpret_cast(kernelDebugData); } } } Debug::Segments Program::getZebinSegments(uint32_t rootDeviceIndex) { ArrayRef strings = {reinterpret_cast(buildInfos[rootDeviceIndex].constStringSectionData.initData), buildInfos[rootDeviceIndex].constStringSectionData.size}; std::vector> kernels; for (const auto &kernelInfo : buildInfos[rootDeviceIndex].kernelInfoArray) kernels.push_back({kernelInfo->kernelDescriptor.kernelMetadata.kernelName, kernelInfo->getGraphicsAllocation()}); return Debug::Segments(getGlobalSurface(rootDeviceIndex), getConstantSurface(rootDeviceIndex), strings, kernels); } void Program::createDebugZebin(uint32_t rootDeviceIndex) { if (this->buildInfos[rootDeviceIndex].debugDataSize != 0) { return; } auto &debugDataRef = this->buildInfos[rootDeviceIndex].debugData; auto &debugDataSizeRef = this->buildInfos[rootDeviceIndex].debugDataSize; auto refBin = ArrayRef(reinterpret_cast(buildInfos[rootDeviceIndex].unpackedDeviceBinary.get()), buildInfos[rootDeviceIndex].unpackedDeviceBinarySize); auto segments = getZebinSegments(rootDeviceIndex); auto debugZebin = Debug::createDebugZebin(refBin, segments); debugDataSizeRef = debugZebin.size(); debugDataRef.reset(new char[debugDataSizeRef]); memcpy_s(debugDataRef.get(), debugDataSizeRef, debugZebin.data(), debugZebin.size()); } void Program::notifyDebuggerWithDebugData(ClDevice *clDevice) { auto rootDeviceIndex = clDevice->getRootDeviceIndex(); auto &buildInfo = this->buildInfos[rootDeviceIndex]; auto refBin = ArrayRef(reinterpret_cast(buildInfo.unpackedDeviceBinary.get()), buildInfo.unpackedDeviceBinarySize); if (NEO::isDeviceBinaryFormat(refBin)) { createDebugZebin(rootDeviceIndex); if (clDevice->getSourceLevelDebugger()) { NEO::DebugData debugData; debugData.vIsa = reinterpret_cast(buildInfo.debugData.get()); debugData.vIsaSize = static_cast(buildInfo.debugDataSize); clDevice->getSourceLevelDebugger()->notifyKernelDebugData(&debugData, "debug_zebin", nullptr, 0); } } else { processDebugData(rootDeviceIndex); if (clDevice->getSourceLevelDebugger()) { for (auto &kernelInfo : buildInfo.kernelInfoArray) { clDevice->getSourceLevelDebugger()->notifyKernelDebugData(&kernelInfo->debugData, kernelInfo->kernelDescriptor.kernelMetadata.kernelName, kernelInfo->heapInfo.pKernelHeap, kernelInfo->heapInfo.KernelHeapSize); } } } } } // namespace NEO compute-runtime-22.14.22890/opencl/source/program/process_intermediate_binary.cpp000066400000000000000000000010741422164147700300630ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "program.h" namespace NEO { cl_int Program::processSpirBinary( const void *pBinary, size_t binarySize, bool isSpirV) { for (const auto &device : clDevices) { deviceBuildInfos[device].programBinaryType = CL_PROGRAM_BINARY_TYPE_INTERMEDIATE; } this->irBinary = makeCopy(pBinary, binarySize); this->irBinarySize = binarySize; setBuildStatus(CL_BUILD_NONE); this->isSpirV = isSpirV; return CL_SUCCESS; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/program/program.cpp000066400000000000000000000561451422164147700237670ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "program.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/compiler_interface/intermediate_representations.h" #include "shared/source/compiler_interface/oclc_extensions.h" #include "shared/source/device_binary_format/device_binary_formats.h" #include "shared/source/device_binary_format/elf/elf_encoder.h" #include "shared/source/device_binary_format/elf/ocl_elf.h" #include "shared/source/device_binary_format/patchtokens_decoder.h" #include "shared/source/helpers/api_specific_config.h" #include "shared/source/helpers/compiler_hw_info_config.h" #include "shared/source/helpers/compiler_options_parser.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/kernel_helpers.h" #include "shared/source/helpers/string.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/os_interface/os_context.h" #include "shared/source/program/kernel_info.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/context/context.h" #include "opencl/source/platform/platform.h" #include "compiler_options.h" #include namespace NEO { Program::Program(Context *context, bool isBuiltIn, const ClDeviceVector &clDevicesIn) : executionEnvironment(*clDevicesIn[0]->getExecutionEnvironment()), context(context), clDevices(clDevicesIn), isBuiltIn(isBuiltIn) { if (this->context && !this->isBuiltIn) { this->context->incRefInternal(); } maxRootDeviceIndex = 0; for (const auto &device : clDevicesIn) { if (device->getRootDeviceIndex() > maxRootDeviceIndex) { maxRootDeviceIndex = device->getRootDeviceIndex(); } deviceBuildInfos[device] = {}; for (auto i = 0u; i < device->getNumSubDevices(); i++) { auto subDevice = device->getSubDevice(i); if (isDeviceAssociated(*subDevice)) { deviceBuildInfos[device].associatedSubDevices.push_back(subDevice); } } } buildInfos.resize(maxRootDeviceIndex + 1); kernelDebugEnabled = clDevices[0]->isDebuggerActive(); } std::string Program::getInternalOptions() const { auto pClDevice = clDevices[0]; auto force32BitAddressess = pClDevice->getSharedDeviceInfo().force32BitAddressess; auto internalOptions = getOclVersionCompilerInternalOption(pClDevice->getEnabledClVersion()); if (force32BitAddressess && !isBuiltIn) { CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::arch32bit); } if ((isBuiltIn && is32bit) || pClDevice->areSharedSystemAllocationsAllowed() || DebugManager.flags.DisableStatelessToStatefulOptimization.get()) { CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::greaterThan4gbBuffersRequired); } if (ApiSpecificConfig::getBindlessConfiguration()) { CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::bindlessMode); } auto enableStatelessToStatefullWithOffset = HwHelper::get(pClDevice->getHardwareInfo().platform.eRenderCoreFamily).isStatelesToStatefullWithOffsetSupported(); if (DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.get() != -1) { enableStatelessToStatefullWithOffset = DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.get() != 0; } if (enableStatelessToStatefullWithOffset) { CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::hasBufferOffsetArg); } auto &hwInfo = pClDevice->getHardwareInfo(); const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily); if (hwInfoConfig.isForceEmuInt32DivRemSPWARequired(hwInfo)) { CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::forceEmuInt32DivRemSP); } if (hwInfo.capabilityTable.supportsImages) { CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::enableImageSupport); } CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::preserveVec3Type); return internalOptions; } Program::~Program() { for (auto i = 0u; i < buildInfos.size(); i++) { cleanCurrentKernelInfo(i); } for (const auto &buildInfo : buildInfos) { if (buildInfo.constantSurface) { if ((nullptr != context) && (nullptr != context->getSVMAllocsManager()) && (context->getSVMAllocsManager()->getSVMAlloc(reinterpret_cast(buildInfo.constantSurface->getGpuAddress())))) { context->getSVMAllocsManager()->freeSVMAlloc(reinterpret_cast(buildInfo.constantSurface->getGpuAddress())); } else { this->executionEnvironment.memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(buildInfo.constantSurface); } } if (buildInfo.globalSurface) { if ((nullptr != context) && (nullptr != context->getSVMAllocsManager()) && (context->getSVMAllocsManager()->getSVMAlloc(reinterpret_cast(buildInfo.globalSurface->getGpuAddress())))) { context->getSVMAllocsManager()->freeSVMAlloc(reinterpret_cast(buildInfo.globalSurface->getGpuAddress())); } else { this->executionEnvironment.memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(buildInfo.globalSurface); } } } if (context && !isBuiltIn) { context->decRefInternal(); } } cl_int Program::createProgramFromBinary( const void *pBinary, size_t binarySize, ClDevice &clDevice) { auto rootDeviceIndex = clDevice.getRootDeviceIndex(); cl_int retVal = CL_INVALID_BINARY; this->irBinary.reset(); this->irBinarySize = 0U; this->isSpirV = false; this->buildInfos[rootDeviceIndex].unpackedDeviceBinary.reset(); this->buildInfos[rootDeviceIndex].unpackedDeviceBinarySize = 0U; this->buildInfos[rootDeviceIndex].packedDeviceBinary.reset(); this->buildInfos[rootDeviceIndex].packedDeviceBinarySize = 0U; this->createdFrom = CreatedFrom::BINARY; ArrayRef archive(reinterpret_cast(pBinary), binarySize); bool isSpirV = NEO::isSpirVBitcode(archive); if (isSpirV || NEO::isLlvmBitcode(archive)) { deviceBuildInfos[&clDevice].programBinaryType = CL_PROGRAM_BINARY_TYPE_INTERMEDIATE; retVal = processSpirBinary(archive.begin(), archive.size(), isSpirV); } else if (isAnyDeviceBinaryFormat(archive)) { deviceBuildInfos[&clDevice].programBinaryType = CL_PROGRAM_BINARY_TYPE_EXECUTABLE; this->isCreatedFromBinary = true; auto hwInfo = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo(); auto productAbbreviation = hardwarePrefix[hwInfo->platform.eProductFamily]; auto copyHwInfo = *hwInfo; const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(copyHwInfo.platform.eProductFamily); compilerHwInfoConfig.adjustHwInfoForIgc(copyHwInfo); TargetDevice targetDevice = targetDeviceFromHwInfo(*hwInfo); std::string decodeErrors; std::string decodeWarnings; auto singleDeviceBinary = unpackSingleDeviceBinary(archive, ConstStringRef(productAbbreviation, strlen(productAbbreviation)), targetDevice, decodeErrors, decodeWarnings); if (decodeWarnings.empty() == false) { PRINT_DEBUG_STRING(DebugManager.flags.PrintDebugMessages.get(), stderr, "%s\n", decodeWarnings.c_str()); } if (singleDeviceBinary.intermediateRepresentation.empty() && singleDeviceBinary.deviceBinary.empty()) { retVal = CL_INVALID_BINARY; PRINT_DEBUG_STRING(DebugManager.flags.PrintDebugMessages.get(), stderr, "%s\n", decodeErrors.c_str()); } else { retVal = CL_SUCCESS; this->irBinary = makeCopy(reinterpret_cast(singleDeviceBinary.intermediateRepresentation.begin()), singleDeviceBinary.intermediateRepresentation.size()); this->irBinarySize = singleDeviceBinary.intermediateRepresentation.size(); this->isSpirV = NEO::isSpirVBitcode(ArrayRef(reinterpret_cast(this->irBinary.get()), this->irBinarySize)); this->options = singleDeviceBinary.buildOptions.str(); if (false == singleDeviceBinary.debugData.empty()) { this->buildInfos[rootDeviceIndex].debugData = makeCopy(reinterpret_cast(singleDeviceBinary.debugData.begin()), singleDeviceBinary.debugData.size()); this->buildInfos[rootDeviceIndex].debugDataSize = singleDeviceBinary.debugData.size(); } bool forceRebuildBuiltInFromIr = isBuiltIn && DebugManager.flags.RebuildPrecompiledKernels.get(); if ((false == singleDeviceBinary.deviceBinary.empty()) && (false == forceRebuildBuiltInFromIr)) { this->buildInfos[rootDeviceIndex].unpackedDeviceBinary = makeCopy(reinterpret_cast(singleDeviceBinary.deviceBinary.begin()), singleDeviceBinary.deviceBinary.size()); this->buildInfos[rootDeviceIndex].unpackedDeviceBinarySize = singleDeviceBinary.deviceBinary.size(); this->buildInfos[rootDeviceIndex].packedDeviceBinary = makeCopy(reinterpret_cast(archive.begin()), archive.size()); this->buildInfos[rootDeviceIndex].packedDeviceBinarySize = archive.size(); } else { this->isCreatedFromBinary = false; this->shouldWarnAboutRebuild = true; } switch (singleDeviceBinary.format) { default: break; case DeviceBinaryFormat::OclLibrary: deviceBuildInfos[&clDevice].programBinaryType = CL_PROGRAM_BINARY_TYPE_LIBRARY; break; case DeviceBinaryFormat::OclCompiledObject: deviceBuildInfos[&clDevice].programBinaryType = CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT; break; } } } return retVal; } cl_int Program::setProgramSpecializationConstant(cl_uint specId, size_t specSize, const void *specValue) { if (!isSpirV) { return CL_INVALID_PROGRAM; } static std::mutex mutex; std::lock_guard lock(mutex); auto &device = clDevices[0]->getDevice(); if (!areSpecializationConstantsInitialized) { auto pCompilerInterface = device.getCompilerInterface(); if (nullptr == pCompilerInterface) { return CL_OUT_OF_HOST_MEMORY; } SpecConstantInfo specConstInfo; auto retVal = pCompilerInterface->getSpecConstantsInfo(device, ArrayRef(irBinary.get(), irBinarySize), specConstInfo); if (retVal != TranslationOutput::ErrorCode::Success) { return CL_INVALID_VALUE; } this->specConstantsIds.reset(specConstInfo.idsBuffer.release()); this->specConstantsSizes.reset(specConstInfo.sizesBuffer.release()); areSpecializationConstantsInitialized = true; } return updateSpecializationConstant(specId, specSize, specValue); } cl_int Program::updateSpecializationConstant(cl_uint specId, size_t specSize, const void *specValue) { for (uint32_t i = 0; i < specConstantsIds->GetSize(); i++) { if (specConstantsIds->GetMemory()[i] == specId) { if (specConstantsSizes->GetMemory()[i] == static_cast(specSize)) { uint64_t specConstValue = 0u; memcpy_s(&specConstValue, sizeof(uint64_t), specValue, specSize); specConstantsValues[specId] = specConstValue; return CL_SUCCESS; } else { return CL_INVALID_VALUE; } } } return CL_INVALID_SPEC_ID; } cl_int Program::getSource(std::string &binary) const { cl_int retVal = CL_INVALID_PROGRAM; binary = {}; if (!sourceCode.empty()) { binary = sourceCode; retVal = CL_SUCCESS; } return retVal; } void Program::updateBuildLog(uint32_t rootDeviceIndex, const char *pErrorString, size_t errorStringSize) { if ((pErrorString == nullptr) || (errorStringSize == 0) || (pErrorString[0] == '\0')) { return; } if (pErrorString[errorStringSize - 1] == '\0') { --errorStringSize; } auto ¤tLog = buildInfos[rootDeviceIndex].buildLog; if (currentLog.empty()) { currentLog.assign(pErrorString, pErrorString + errorStringSize); return; } currentLog.append("\n"); currentLog.append(pErrorString, pErrorString + errorStringSize); } const char *Program::getBuildLog(uint32_t rootDeviceIndex) const { auto ¤tLog = buildInfos[rootDeviceIndex].buildLog; return currentLog.c_str(); } void Program::cleanCurrentKernelInfo(uint32_t rootDeviceIndex) { auto &buildInfo = buildInfos[rootDeviceIndex]; for (auto &kernelInfo : buildInfo.kernelInfoArray) { if (kernelInfo->kernelAllocation) { //register cache flush in all csrs where kernel allocation was used for (auto &engine : this->executionEnvironment.memoryManager->getRegisteredEngines()) { auto contextId = engine.osContext->getContextId(); if (kernelInfo->kernelAllocation->isUsedByOsContext(contextId)) { engine.commandStreamReceiver->registerInstructionCacheFlush(); } } if (executionEnvironment.memoryManager->isKernelBinaryReuseEnabled()) { auto lock = executionEnvironment.memoryManager->lockKernelAllocationMap(); auto kernelName = kernelInfo->kernelDescriptor.kernelMetadata.kernelName; auto &storedBinaries = executionEnvironment.memoryManager->getKernelAllocationMap(); auto kernelAllocations = storedBinaries.find(kernelName); if (kernelAllocations != storedBinaries.end()) { kernelAllocations->second.reuseCounter--; if (kernelAllocations->second.reuseCounter == 0) { this->executionEnvironment.memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(kernelAllocations->second.kernelAllocation); storedBinaries.erase(kernelAllocations); } } } else { this->executionEnvironment.memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(kernelInfo->kernelAllocation); } } delete kernelInfo; } buildInfo.kernelInfoArray.clear(); } void Program::updateNonUniformFlag() { //Look for -cl-std=CL substring and extract value behind which can be 1.2 2.0 2.1 and convert to value auto pos = options.find(clStdOptionName); if (pos == std::string::npos) { programOptionVersion = 12u; //Default is 1.2 } else { std::stringstream ss{options.c_str() + pos + clStdOptionName.size()}; uint32_t majorV = 0u, minorV = 0u; char dot = 0u; ss >> majorV; ss >> dot; ss >> minorV; programOptionVersion = majorV * 10u + minorV; } if (programOptionVersion >= 20u && (false == CompilerOptions::contains(options, CompilerOptions::uniformWorkgroupSize))) { allowNonUniform = true; } } void Program::updateNonUniformFlag(const Program **inputPrograms, size_t numInputPrograms) { bool allowNonUniform = true; for (cl_uint i = 0; i < numInputPrograms; i++) { allowNonUniform = allowNonUniform && inputPrograms[i]->getAllowNonUniform(); } this->allowNonUniform = allowNonUniform; } void Program::replaceDeviceBinary(std::unique_ptr &&newBinary, size_t newBinarySize, uint32_t rootDeviceIndex) { if (isAnyPackedDeviceBinaryFormat(ArrayRef(reinterpret_cast(newBinary.get()), newBinarySize))) { this->buildInfos[rootDeviceIndex].packedDeviceBinary = std::move(newBinary); this->buildInfos[rootDeviceIndex].packedDeviceBinarySize = newBinarySize; this->buildInfos[rootDeviceIndex].unpackedDeviceBinary.reset(); this->buildInfos[rootDeviceIndex].unpackedDeviceBinarySize = 0U; if (isAnySingleDeviceBinaryFormat(ArrayRef(reinterpret_cast(this->buildInfos[rootDeviceIndex].packedDeviceBinary.get()), this->buildInfos[rootDeviceIndex].packedDeviceBinarySize))) { this->buildInfos[rootDeviceIndex].unpackedDeviceBinary = makeCopy(buildInfos[rootDeviceIndex].packedDeviceBinary.get(), buildInfos[rootDeviceIndex].packedDeviceBinarySize); this->buildInfos[rootDeviceIndex].unpackedDeviceBinarySize = buildInfos[rootDeviceIndex].packedDeviceBinarySize; } } else { this->buildInfos[rootDeviceIndex].packedDeviceBinary.reset(); this->buildInfos[rootDeviceIndex].packedDeviceBinarySize = 0U; this->buildInfos[rootDeviceIndex].unpackedDeviceBinary = std::move(newBinary); this->buildInfos[rootDeviceIndex].unpackedDeviceBinarySize = newBinarySize; } } cl_int Program::packDeviceBinary(ClDevice &clDevice) { auto rootDeviceIndex = clDevice.getRootDeviceIndex(); if (nullptr != buildInfos[rootDeviceIndex].packedDeviceBinary) { return CL_SUCCESS; } auto hwInfo = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo(); if (nullptr != this->buildInfos[rootDeviceIndex].unpackedDeviceBinary.get()) { SingleDeviceBinary singleDeviceBinary = {}; singleDeviceBinary.targetDevice = NEO::targetDeviceFromHwInfo(*hwInfo); singleDeviceBinary.buildOptions = this->options; singleDeviceBinary.deviceBinary = ArrayRef(reinterpret_cast(this->buildInfos[rootDeviceIndex].unpackedDeviceBinary.get()), this->buildInfos[rootDeviceIndex].unpackedDeviceBinarySize); singleDeviceBinary.intermediateRepresentation = ArrayRef(reinterpret_cast(this->irBinary.get()), this->irBinarySize); singleDeviceBinary.debugData = ArrayRef(reinterpret_cast(this->buildInfos[rootDeviceIndex].debugData.get()), this->buildInfos[rootDeviceIndex].debugDataSize); std::string packWarnings; std::string packErrors; auto packedDeviceBinary = NEO::packDeviceBinary(singleDeviceBinary, packErrors, packWarnings); if (packedDeviceBinary.empty()) { DEBUG_BREAK_IF(true); return CL_OUT_OF_HOST_MEMORY; } this->buildInfos[rootDeviceIndex].packedDeviceBinary = makeCopy(packedDeviceBinary.data(), packedDeviceBinary.size()); this->buildInfos[rootDeviceIndex].packedDeviceBinarySize = packedDeviceBinary.size(); } else if (nullptr != this->irBinary.get()) { NEO::Elf::ElfEncoder<> elfEncoder(true, true, 1U); if (deviceBuildInfos[&clDevice].programBinaryType == CL_PROGRAM_BINARY_TYPE_LIBRARY) { elfEncoder.getElfFileHeader().type = NEO::Elf::ET_OPENCL_LIBRARY; } else { elfEncoder.getElfFileHeader().type = NEO::Elf::ET_OPENCL_OBJECTS; } elfEncoder.appendSection(NEO::Elf::SHT_OPENCL_SPIRV, NEO::Elf::SectionNamesOpenCl::spirvObject, ArrayRef::fromAny(this->irBinary.get(), this->irBinarySize)); elfEncoder.appendSection(NEO::Elf::SHT_OPENCL_OPTIONS, NEO::Elf::SectionNamesOpenCl::buildOptions, this->options); auto elfData = elfEncoder.encode(); this->buildInfos[rootDeviceIndex].packedDeviceBinary = makeCopy(elfData.data(), elfData.size()); this->buildInfos[rootDeviceIndex].packedDeviceBinarySize = elfData.size(); } else { return CL_INVALID_PROGRAM; } return CL_SUCCESS; } void Program::setBuildStatus(cl_build_status status) { for (auto &deviceBuildInfo : deviceBuildInfos) { deviceBuildInfo.second.buildStatus = status; } } void Program::setBuildStatusSuccess(const ClDeviceVector &deviceVector, cl_program_binary_type binaryType) { for (const auto &device : deviceVector) { deviceBuildInfos[device].buildStatus = CL_BUILD_SUCCESS; if (deviceBuildInfos[device].programBinaryType != binaryType) { std::unique_lock lock(lockMutex); clDevicesInProgram.push_back(device); } deviceBuildInfos[device].programBinaryType = binaryType; for (const auto &subDevice : deviceBuildInfos[device].associatedSubDevices) { deviceBuildInfos[subDevice].buildStatus = CL_BUILD_SUCCESS; if (deviceBuildInfos[subDevice].programBinaryType != binaryType) { std::unique_lock lock(lockMutex); clDevicesInProgram.push_back(subDevice); } deviceBuildInfos[subDevice].programBinaryType = binaryType; } } } bool Program::isValidCallback(void(CL_CALLBACK *funcNotify)(cl_program program, void *userData), void *userData) { return funcNotify != nullptr || userData == nullptr; } void Program::invokeCallback(void(CL_CALLBACK *funcNotify)(cl_program program, void *userData), void *userData) { if (funcNotify != nullptr) { (*funcNotify)(this, userData); } } bool Program::isDeviceAssociated(const ClDevice &clDevice) const { return std::any_of(clDevices.begin(), clDevices.end(), [&](auto programDevice) { return programDevice == &clDevice; }); } cl_int Program::processInputDevices(ClDeviceVector *&deviceVectorPtr, cl_uint numDevices, const cl_device_id *deviceList, const ClDeviceVector &allAvailableDevices) { if (deviceList == nullptr) { if (numDevices == 0) { deviceVectorPtr = const_cast(&allAvailableDevices); } else { return CL_INVALID_VALUE; } } else { if (numDevices == 0) { return CL_INVALID_VALUE; } else { for (auto i = 0u; i < numDevices; i++) { auto device = castToObject(deviceList[i]); if (!device || !std::any_of(allAvailableDevices.begin(), allAvailableDevices.end(), [&](auto validDevice) { return validDevice == device; })) { return CL_INVALID_DEVICE; } deviceVectorPtr->push_back(device); } } } return CL_SUCCESS; } void Program::prependFilePathToOptions(const std::string &filename) { ConstStringRef cmcOption = "-cmc"; if (!filename.empty() && options.compare(0, cmcOption.size(), cmcOption.data())) { // Add "-s" flag first so it will be ignored by clang in case the options already have this flag set. options = std::string("-s ") + filename + " " + options; } } const ClDeviceVector &Program::getDevicesInProgram() const { if (clDevicesInProgram.empty()) { return clDevices; } else { return clDevicesInProgram; } } } // namespace NEO compute-runtime-22.14.22890/opencl/source/program/program.h000066400000000000000000000316511422164147700234270ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/compiler_interface/linker.h" #include "shared/source/device_binary_format/debug_zebin.h" #include "shared/source/device_binary_format/elf/elf_encoder.h" #include "shared/source/helpers/non_copyable_or_moveable.h" #include "shared/source/program/program_info.h" #include "shared/source/utilities/const_stringref.h" #include "opencl/source/api/cl_types.h" #include "opencl/source/cl_device/cl_device_vector.h" #include "opencl/source/helpers/base_object.h" #include "cif/builtins/memory/buffer/buffer.h" #include "patch_list.h" #include #include #include namespace NEO { namespace PatchTokenBinary { struct ProgramFromPatchtokens; } class BuiltinDispatchInfoBuilder; class ClDevice; class Context; class CompilerInterface; class Device; class ExecutionEnvironment; class Program; struct KernelInfo; template <> struct OpenCLObjectMapper<_cl_program> { typedef class Program DerivedType; }; namespace ProgramFunctions { using CreateFromILFunc = std::function; extern CreateFromILFunc createFromIL; } // namespace ProgramFunctions constexpr cl_int asClError(TranslationOutput::ErrorCode err) { switch (err) { default: return CL_OUT_OF_HOST_MEMORY; case TranslationOutput::ErrorCode::Success: return CL_SUCCESS; case TranslationOutput::ErrorCode::CompilerNotAvailable: return CL_COMPILER_NOT_AVAILABLE; case TranslationOutput::ErrorCode::CompilationFailure: return CL_COMPILE_PROGRAM_FAILURE; case TranslationOutput::ErrorCode::BuildFailure: return CL_BUILD_PROGRAM_FAILURE; case TranslationOutput::ErrorCode::LinkFailure: return CL_LINK_PROGRAM_FAILURE; } } class Program : public BaseObject<_cl_program> { public: static const cl_ulong objectMagic = 0x5651C89100AAACFELL; enum class CreatedFrom { SOURCE, IL, BINARY, UNKNOWN }; // Create program from binary template static T *create( Context *pContext, const ClDeviceVector &deviceVector, const size_t *lengths, const unsigned char **binaries, cl_int *binaryStatus, cl_int &errcodeRet); // Create program from source template static T *create( Context *pContext, cl_uint count, const char **strings, const size_t *lengths, cl_int &errcodeRet); template static T *createBuiltInFromSource( const char *nullTerminatedString, Context *context, const ClDeviceVector &deviceVector, cl_int *errcodeRet); template static T *createBuiltInFromGenBinary( Context *context, const ClDeviceVector &deviceVector, const void *binary, size_t size, cl_int *errcodeRet); template static T *createFromIL(Context *context, const void *il, size_t length, cl_int &errcodeRet); Program(Context *context, bool isBuiltIn, const ClDeviceVector &clDevicesIn); ~Program() override; Program(const Program &) = delete; Program &operator=(const Program &) = delete; cl_int build(const ClDeviceVector &deviceVector, const char *buildOptions, bool enableCaching); cl_int build(const ClDeviceVector &deviceVector, const char *buildOptions, bool enableCaching, std::unordered_map &builtinsMap); MOCKABLE_VIRTUAL cl_int processGenBinary(const ClDevice &clDevice); MOCKABLE_VIRTUAL cl_int processProgramInfo(ProgramInfo &dst, const ClDevice &clDevice); cl_int compile(const ClDeviceVector &deviceVector, const char *buildOptions, cl_uint numInputHeaders, const cl_program *inputHeaders, const char **headerIncludeNames); cl_int link(const ClDeviceVector &deviceVector, const char *buildOptions, cl_uint numInputPrograms, const cl_program *inputPrograms); cl_int setProgramSpecializationConstant(cl_uint specId, size_t specSize, const void *specValue); MOCKABLE_VIRTUAL cl_int updateSpecializationConstant(cl_uint specId, size_t specSize, const void *specValue); size_t getNumKernels() const; const KernelInfo *getKernelInfo(const char *kernelName, uint32_t rootDeviceIndex) const; const KernelInfo *getKernelInfo(size_t ordinal, uint32_t rootDeviceIndex) const; cl_int getInfo(cl_program_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); cl_int getBuildInfo(cl_device_id device, cl_program_build_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const; bool isBuilt() const { return std::any_of(this->deviceBuildInfos.begin(), this->deviceBuildInfos.end(), [](auto deviceBuildInfo) { return deviceBuildInfo.second.buildStatus == CL_SUCCESS && deviceBuildInfo.second.programBinaryType == CL_PROGRAM_BINARY_TYPE_EXECUTABLE; }); } Context &getContext() const { return *context; } Context *getContextPtr() const { return context; } ExecutionEnvironment &peekExecutionEnvironment() const { return executionEnvironment; } cl_int processSpirBinary(const void *pBinary, size_t binarySize, bool isSpirV); cl_int getSource(std::string &binary) const; MOCKABLE_VIRTUAL void processDebugData(uint32_t rootDeviceIndex); void updateBuildLog(uint32_t rootDeviceIndex, const char *pErrorString, const size_t errorStringSize); const char *getBuildLog(uint32_t rootDeviceIndex) const; cl_uint getProgramBinaryType(ClDevice *clDevice) const { return deviceBuildInfos.at(clDevice).programBinaryType; } bool getIsSpirV() const { return isSpirV; } GraphicsAllocation *getConstantSurface(uint32_t rootDeviceIndex) const { return buildInfos[rootDeviceIndex].constantSurface; } GraphicsAllocation *getGlobalSurface(uint32_t rootDeviceIndex) const { return buildInfos[rootDeviceIndex].globalSurface; } GraphicsAllocation *getExportedFunctionsSurface(uint32_t rootDeviceIndex) const { return buildInfos[rootDeviceIndex].exportedFunctionsSurface; } void cleanCurrentKernelInfo(uint32_t rootDeviceIndex); const std::string &getOptions() const { return options; } bool getAllowNonUniform() const { return allowNonUniform; } bool getIsBuiltIn() const { return isBuiltIn; } uint32_t getProgramOptionVersion() const { return programOptionVersion; } void enableKernelDebug() { kernelDebugEnabled = true; } bool isKernelDebugEnabled() { return kernelDebugEnabled; } char *getDebugData(uint32_t rootDeviceIndex) { return buildInfos[rootDeviceIndex].debugData.get(); } size_t getDebugDataSize(uint32_t rootDeviceIndex) { return buildInfos[rootDeviceIndex].debugDataSize; } const Linker::RelocatedSymbolsMap &getSymbols(uint32_t rootDeviceIndex) const { return buildInfos[rootDeviceIndex].symbols; } void setSymbols(uint32_t rootDeviceIndex, Linker::RelocatedSymbolsMap &&symbols) { buildInfos[rootDeviceIndex].symbols = std::move(symbols); } LinkerInput *getLinkerInput(uint32_t rootDeviceIndex) const { return buildInfos[rootDeviceIndex].linkerInput.get(); } void setLinkerInput(uint32_t rootDeviceIndex, std::unique_ptr &&linkerInput) { buildInfos[rootDeviceIndex].linkerInput = std::move(linkerInput); } MOCKABLE_VIRTUAL void replaceDeviceBinary(std::unique_ptr &&newBinary, size_t newBinarySize, uint32_t rootDeviceIndex); static bool isValidCallback(void(CL_CALLBACK *funcNotify)(cl_program program, void *userData), void *userData); void invokeCallback(void(CL_CALLBACK *funcNotify)(cl_program program, void *userData), void *userData); const ClDeviceVector &getDevices() const { return clDevices; } const ClDeviceVector &getDevicesInProgram() const; bool isDeviceAssociated(const ClDevice &clDevice) const; static cl_int processInputDevices(ClDeviceVector *&deviceVectorPtr, cl_uint numDevices, const cl_device_id *deviceList, const ClDeviceVector &allAvailableDevices); MOCKABLE_VIRTUAL std::string getInternalOptions() const; uint32_t getMaxRootDeviceIndex() const { return maxRootDeviceIndex; } void retainForKernel() { std::unique_lock lock{lockMutex}; exposedKernels++; } void releaseForKernel() { std::unique_lock lock{lockMutex}; UNRECOVERABLE_IF(exposedKernels == 0); exposedKernels--; } bool isLocked() { std::unique_lock lock{lockMutex}; return 0 != exposedKernels; } const ExecutionEnvironment &getExecutionEnvironment() const { return executionEnvironment; } void setContext(Context *pContext) { this->context = pContext; } void notifyDebuggerWithDebugData(ClDevice *clDevice); MOCKABLE_VIRTUAL void createDebugZebin(uint32_t rootDeviceIndex); Debug::Segments getZebinSegments(uint32_t rootDeviceIndex); protected: MOCKABLE_VIRTUAL cl_int createProgramFromBinary(const void *pBinary, size_t binarySize, ClDevice &clDevice); cl_int packDeviceBinary(ClDevice &clDevice); MOCKABLE_VIRTUAL cl_int linkBinary(Device *pDevice, const void *constantsInitData, const void *variablesInitData, const ProgramInfo::GlobalSurfaceInfo &stringInfo, std::vector &extFuncInfos); void updateNonUniformFlag(); void updateNonUniformFlag(const Program **inputProgram, size_t numInputPrograms); void extractInternalOptions(const std::string &options, std::string &internalOptions); MOCKABLE_VIRTUAL bool isFlagOption(ConstStringRef option); MOCKABLE_VIRTUAL bool isOptionValueValid(ConstStringRef option, ConstStringRef value); MOCKABLE_VIRTUAL void applyAdditionalOptions(std::string &internalOptions); MOCKABLE_VIRTUAL bool appendKernelDebugOptions(ClDevice &clDevice, std::string &internalOptions); void notifyDebuggerWithSourceCode(ClDevice &clDevice, std::string &filename); void prependFilePathToOptions(const std::string &filename); void setBuildStatus(cl_build_status status); void setBuildStatusSuccess(const ClDeviceVector &deviceVector, cl_program_binary_type binaryType); bool isSpirV = false; std::unique_ptr irBinary; size_t irBinarySize = 0U; CreatedFrom createdFrom = CreatedFrom::UNKNOWN; struct DeviceBuildInfo { StackVec associatedSubDevices; cl_build_status buildStatus = CL_BUILD_NONE; cl_program_binary_type programBinaryType = CL_PROGRAM_BINARY_TYPE_NONE; }; std::unordered_map deviceBuildInfos; bool isCreatedFromBinary = false; bool shouldWarnAboutRebuild = false; std::string sourceCode; std::string options; static const std::vector internalOptionsToExtract; uint32_t programOptionVersion = 12U; bool allowNonUniform = false; struct BuildInfo : public NonCopyableClass { std::vector kernelInfoArray; GraphicsAllocation *constantSurface = nullptr; GraphicsAllocation *globalSurface = nullptr; GraphicsAllocation *exportedFunctionsSurface = nullptr; size_t globalVarTotalSize = 0U; std::unique_ptr linkerInput; Linker::RelocatedSymbolsMap symbols{}; std::string buildLog{}; std::unique_ptr unpackedDeviceBinary; size_t unpackedDeviceBinarySize = 0U; std::unique_ptr packedDeviceBinary; size_t packedDeviceBinarySize = 0U; ProgramInfo::GlobalSurfaceInfo constStringSectionData; std::unique_ptr debugData; size_t debugDataSize = 0U; }; std::vector buildInfos; bool areSpecializationConstantsInitialized = false; CIF::RAII::UPtr_t specConstantsIds; CIF::RAII::UPtr_t specConstantsSizes; specConstValuesMap specConstantsValues; ExecutionEnvironment &executionEnvironment; Context *context = nullptr; ClDeviceVector clDevices; ClDeviceVector clDevicesInProgram; bool isBuiltIn = false; bool kernelDebugEnabled = false; uint32_t maxRootDeviceIndex = std::numeric_limits::max(); std::mutex lockMutex; uint32_t exposedKernels = 0; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/sampler/000077500000000000000000000000001422164147700215755ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/sampler/CMakeLists.txt000066400000000000000000000010721422164147700243350ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_SAMPLER ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/sampler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler.h ${CMAKE_CURRENT_SOURCE_DIR}/sampler.inl ${CMAKE_CURRENT_SOURCE_DIR}/sampler_factory_init.inl ${CMAKE_CURRENT_SOURCE_DIR}/sampler_tgllp_and_later.inl ) add_subdirectories() target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_SAMPLER}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_SAMPLER ${RUNTIME_SRCS_SAMPLER}) compute-runtime-22.14.22890/opencl/source/sampler/sampler.cpp000066400000000000000000000173421422164147700237530ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/sampler/sampler.h" #include "shared/source/helpers/get_info.h" #include "shared/source/helpers/hw_info.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/get_info_status_mapper.h" #include "patch_list.h" #include namespace NEO { SamplerCreateFunc samplerFactory[IGFX_MAX_CORE] = {}; Sampler::Sampler(Context *context, cl_bool normalizedCoordinates, cl_addressing_mode addressingMode, cl_filter_mode filterMode, cl_filter_mode mipFilterMode, float lodMin, float lodMax) : context(context), normalizedCoordinates(normalizedCoordinates), addressingMode(addressingMode), filterMode(filterMode), mipFilterMode(mipFilterMode), lodMin(lodMin), lodMax(lodMax) { } Sampler::Sampler(Context *context, cl_bool normalizedCoordinates, cl_addressing_mode addressingMode, cl_filter_mode filterMode) : Sampler(context, normalizedCoordinates, addressingMode, filterMode, CL_FILTER_NEAREST, 0.0f, std::numeric_limits::max()) { } Sampler *Sampler::create(Context *context, cl_bool normalizedCoordinates, cl_addressing_mode addressingMode, cl_filter_mode filterMode, cl_filter_mode mipFilterMode, float lodMin, float lodMax, cl_int &errcodeRet) { errcodeRet = CL_SUCCESS; Sampler *sampler = nullptr; DEBUG_BREAK_IF(nullptr == context); const auto device = context->getDevice(0); const auto &hwInfo = device->getHardwareInfo(); auto funcCreate = samplerFactory[hwInfo.platform.eRenderCoreFamily]; DEBUG_BREAK_IF(nullptr == funcCreate); sampler = funcCreate(context, normalizedCoordinates, addressingMode, filterMode, mipFilterMode, lodMin, lodMax); if (sampler == nullptr) { errcodeRet = CL_OUT_OF_HOST_MEMORY; } return sampler; } template struct SetOnce { SetOnce(ParameterType defaultValue, ParameterType min, ParameterType max) : value(defaultValue), min(min), max(max) { } cl_int setValue(ParameterType property) { if (alreadySet) { return CL_INVALID_VALUE; } if ((property < min) || (property > max)) { return CL_INVALID_VALUE; } this->value = property; alreadySet = true; return CL_SUCCESS; } bool alreadySet = false; ParameterType value; ParameterType min; ParameterType max; }; Sampler *Sampler::create(Context *context, const cl_sampler_properties *samplerProperties, cl_int &errcodeRet) { SetOnce normalizedCoords(CL_TRUE, CL_FALSE, CL_TRUE); SetOnce filterMode(CL_FILTER_NEAREST, CL_FILTER_NEAREST, CL_FILTER_LINEAR); SetOnce addressingMode(CL_ADDRESS_CLAMP, CL_ADDRESS_NONE, CL_ADDRESS_MIRRORED_REPEAT); SetOnce mipFilterMode(CL_FILTER_NEAREST, CL_FILTER_NEAREST, CL_FILTER_LINEAR); SetOnce lodMin(0.0f, 0.0f, std::numeric_limits::max()); SetOnce lodMax(std::numeric_limits::max(), 0.0f, std::numeric_limits::max()); errcodeRet = CL_SUCCESS; auto samplerProperty = samplerProperties; if (samplerProperty) { cl_ulong samType; while ((samType = *samplerProperty) != 0) { ++samplerProperty; auto samValue = *samplerProperty; switch (samType) { case CL_SAMPLER_NORMALIZED_COORDS: errcodeRet = normalizedCoords.setValue(static_cast(samValue)); break; case CL_SAMPLER_ADDRESSING_MODE: errcodeRet = addressingMode.setValue(static_cast(samValue)); break; case CL_SAMPLER_FILTER_MODE: errcodeRet = filterMode.setValue(static_cast(samValue)); break; case CL_SAMPLER_MIP_FILTER_MODE: errcodeRet = mipFilterMode.setValue(static_cast(samValue)); break; case CL_SAMPLER_LOD_MIN: { SamplerLodProperty lodData; lodData.data = samValue; errcodeRet = lodMin.setValue(lodData.lod); break; } case CL_SAMPLER_LOD_MAX: { SamplerLodProperty lodData; lodData.data = samValue; errcodeRet = lodMax.setValue(lodData.lod); break; } default: errcodeRet = CL_INVALID_VALUE; break; } ++samplerProperty; } } Sampler *sampler = nullptr; if (errcodeRet == CL_SUCCESS) { sampler = create(context, normalizedCoords.value, addressingMode.value, filterMode.value, mipFilterMode.value, lodMin.value, lodMax.value, errcodeRet); } if (errcodeRet == CL_SUCCESS) { sampler->storeProperties(samplerProperties); } return sampler; } unsigned int Sampler::getSnapWaValue() const { if (filterMode == CL_FILTER_NEAREST && addressingMode == CL_ADDRESS_CLAMP) { return iOpenCL::CONSTANT_REGISTER_BOOLEAN_TRUE; } else { return iOpenCL::CONSTANT_REGISTER_BOOLEAN_FALSE; } } cl_int Sampler::getInfo(cl_sampler_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { cl_int retVal; size_t valueSize = GetInfo::invalidSourceSize; const void *pValue = nullptr; cl_uint refCount = 0; switch (paramName) { case CL_SAMPLER_CONTEXT: valueSize = sizeof(cl_device_id); pValue = &this->context; break; case CL_SAMPLER_NORMALIZED_COORDS: valueSize = sizeof(cl_bool); pValue = &this->normalizedCoordinates; break; case CL_SAMPLER_ADDRESSING_MODE: valueSize = sizeof(cl_addressing_mode); pValue = &this->addressingMode; break; case CL_SAMPLER_FILTER_MODE: valueSize = sizeof(cl_filter_mode); pValue = &this->filterMode; break; case CL_SAMPLER_MIP_FILTER_MODE: valueSize = sizeof(cl_filter_mode); pValue = &this->mipFilterMode; break; case CL_SAMPLER_LOD_MIN: valueSize = sizeof(float); pValue = &this->lodMin; break; case CL_SAMPLER_LOD_MAX: valueSize = sizeof(float); pValue = &this->lodMax; break; case CL_SAMPLER_REFERENCE_COUNT: refCount = static_cast(this->getReference()); valueSize = sizeof(refCount); pValue = &refCount; break; case CL_SAMPLER_PROPERTIES: valueSize = propertiesVector.size() * sizeof(cl_sampler_properties); pValue = propertiesVector.data(); break; default: break; } auto getInfoStatus = GetInfo::getInfo(paramValue, paramValueSize, pValue, valueSize); retVal = changeGetInfoStatusToCLResultType(getInfoStatus); GetInfo::setParamValueReturnSize(paramValueSizeRet, valueSize, getInfoStatus); return retVal; } bool Sampler::isTransformable() const { return addressingMode == CL_ADDRESS_CLAMP_TO_EDGE && filterMode == CL_FILTER_NEAREST && normalizedCoordinates == CL_FALSE; } void Sampler::storeProperties(const cl_sampler_properties *properties) { if (properties) { for (size_t i = 0; properties[i] != 0; i += 2) { propertiesVector.push_back(properties[i]); propertiesVector.push_back(properties[i + 1]); } propertiesVector.push_back(0); } } } // namespace NEO compute-runtime-22.14.22890/opencl/source/sampler/sampler.h000066400000000000000000000114411422164147700234120ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/api/cl_types.h" #include "opencl/source/helpers/base_object.h" #include namespace NEO { class Context; struct HardwareInfo; template <> struct OpenCLObjectMapper<_cl_sampler> { typedef class Sampler DerivedType; }; union SamplerLodProperty { cl_sampler_properties data; float lod; }; class Sampler : public BaseObject<_cl_sampler> { public: static const cl_ulong objectMagic = 0x4684913AC213EF00LL; static const uint32_t samplerStateArrayAlignment = 64; static Sampler *create(Context *context, cl_bool normalizedCoordinates, cl_addressing_mode addressingMode, cl_filter_mode filterMode, cl_filter_mode mipFilterMode, float lodMin, float lodMax, cl_int &errcodeRet); static Sampler *create(Context *context, cl_bool normalizedCoordinates, cl_addressing_mode addressingMode, cl_filter_mode filterMode, cl_int &errcodeRet) { return Sampler::create(context, normalizedCoordinates, addressingMode, filterMode, CL_FILTER_NEAREST, 0.0f, std::numeric_limits::max(), errcodeRet); } static Sampler *create(Context *context, const cl_sampler_properties *samplerProperties, cl_int &errcodeRet); cl_int getInfo(cl_sampler_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); virtual void setArg(void *memory, const HardwareInfo &hwInfo) = 0; bool isTransformable() const; Sampler(Context *context, cl_bool normalizedCoordinates, cl_addressing_mode addressingMode, cl_filter_mode filterMode, cl_filter_mode mipFilterMode, float lodMin, float lodMax); Sampler(Context *context, cl_bool normalizedCoordinates, cl_addressing_mode addressingMode, cl_filter_mode filterMode); unsigned int getSnapWaValue() const; cl_context context; cl_bool normalizedCoordinates; cl_addressing_mode addressingMode; cl_filter_mode filterMode; cl_filter_mode mipFilterMode; float lodMin; float lodMax; protected: void storeProperties(const cl_sampler_properties *properties); std::vector propertiesVector; }; template struct SamplerHw : public Sampler { void setArg(void *memory, const HardwareInfo &hwInfo) override; static constexpr float getGenSamplerMaxLod() { return 14.0f; } SamplerHw(Context *context, cl_bool normalizedCoordinates, cl_addressing_mode addressingMode, cl_filter_mode filterMode, cl_filter_mode mipFilterMode, float lodMin, float lodMax) : Sampler(context, normalizedCoordinates, addressingMode, filterMode, mipFilterMode, lodMin, lodMax) { } SamplerHw(Context *context, cl_bool normalizedCoordinates, cl_addressing_mode addressingMode, cl_filter_mode filterMode) : Sampler(context, normalizedCoordinates, addressingMode, filterMode) { } static Sampler *create(Context *context, cl_bool normalizedCoordinates, cl_addressing_mode addressingMode, cl_filter_mode filterMode, cl_filter_mode mipFilterMode, float lodMin, float lodMax) { return new SamplerHw(context, normalizedCoordinates, addressingMode, filterMode, mipFilterMode, lodMin, lodMax); } }; typedef Sampler *(*SamplerCreateFunc)(Context *context, cl_bool normalizedCoordinates, cl_addressing_mode addressingMode, cl_filter_mode filterMode, cl_filter_mode mipFilterMode, float lodMin, float lodMax); typedef size_t (*getSamplerStateSizeHwFunc)(); template <> inline Sampler *castToObject(const void *object) { auto clSamplerObj = reinterpret_cast(object); return castToObject(const_cast(clSamplerObj)); } } // namespace NEO compute-runtime-22.14.22890/opencl/source/sampler/sampler.inl000066400000000000000000000100561422164147700237460ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/populate_factory.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/utilities/numeric.h" #include namespace NEO { template void SamplerHw::setArg(void *memory, const HardwareInfo &hwInfo) { using SAMPLER_STATE = typename GfxFamily::SAMPLER_STATE; auto samplerState = reinterpret_cast(memory); samplerState->setNonNormalizedCoordinateEnable(!this->normalizedCoordinates); samplerState->setLodPreclampMode(SAMPLER_STATE::LOD_PRECLAMP_MODE::LOD_PRECLAMP_MODE_OGL); auto addressControlModeX = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP; auto addressControlModeY = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP; auto addressControlModeZ = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP; switch (this->addressingMode) { case CL_ADDRESS_NONE: case CL_ADDRESS_CLAMP: addressControlModeX = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP_BORDER; addressControlModeY = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP_BORDER; addressControlModeZ = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP_BORDER; break; case CL_ADDRESS_CLAMP_TO_EDGE: addressControlModeX = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP; addressControlModeY = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP; addressControlModeZ = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP; break; case CL_ADDRESS_MIRRORED_REPEAT: addressControlModeX = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_MIRROR; addressControlModeY = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_MIRROR; addressControlModeZ = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_MIRROR; break; case CL_ADDRESS_REPEAT: addressControlModeX = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_WRAP; addressControlModeY = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_WRAP; addressControlModeZ = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_WRAP; break; } auto minMode = SAMPLER_STATE::MIN_MODE_FILTER_NEAREST; auto magMode = SAMPLER_STATE::MAG_MODE_FILTER_NEAREST; auto mipMode = SAMPLER_STATE::MIP_MODE_FILTER_NEAREST; if (CL_FILTER_LINEAR == filterMode) { minMode = SAMPLER_STATE::MIN_MODE_FILTER_LINEAR; magMode = SAMPLER_STATE::MAG_MODE_FILTER_LINEAR; } if (CL_FILTER_LINEAR == mipFilterMode) { mipMode = SAMPLER_STATE::MIP_MODE_FILTER_LINEAR; } samplerState->setMinModeFilter(minMode); samplerState->setMagModeFilter(magMode); samplerState->setMipModeFilter(mipMode); samplerState->setTcxAddressControlMode(addressControlModeX); samplerState->setTcyAddressControlMode(addressControlModeY); samplerState->setTczAddressControlMode(addressControlModeZ); if (CL_FILTER_NEAREST != filterMode) { samplerState->setRAddressMinFilterRoundingEnable(true); samplerState->setRAddressMagFilterRoundingEnable(true); samplerState->setVAddressMinFilterRoundingEnable(true); samplerState->setVAddressMagFilterRoundingEnable(true); samplerState->setUAddressMinFilterRoundingEnable(true); samplerState->setUAddressMagFilterRoundingEnable(true); } else { samplerState->setRAddressMinFilterRoundingEnable(false); samplerState->setRAddressMagFilterRoundingEnable(false); samplerState->setVAddressMinFilterRoundingEnable(false); samplerState->setVAddressMagFilterRoundingEnable(false); samplerState->setUAddressMinFilterRoundingEnable(false); samplerState->setUAddressMagFilterRoundingEnable(false); } FixedU4D8 minLodValue = FixedU4D8(std::min(getGenSamplerMaxLod(), this->lodMin)); FixedU4D8 maxLodValue = FixedU4D8(std::min(getGenSamplerMaxLod(), this->lodMax)); samplerState->setMinLod(minLodValue.getRawAccess()); samplerState->setMaxLod(maxLodValue.getRawAccess()); HwInfoConfig::get(hwInfo.platform.eProductFamily)->adjustSamplerState(samplerState, hwInfo); } } // namespace NEO compute-runtime-22.14.22890/opencl/source/sampler/sampler_factory_init.inl000066400000000000000000000007041422164147700265170ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ // Sampler factory table initialization. // Family, gfxCore came from outside, do not set them here unless you // really know what you are doing template struct SamplerHw; template <> void populateFactoryTable>() { extern SamplerCreateFunc samplerFactory[IGFX_MAX_CORE]; samplerFactory[gfxCore] = SamplerHw::create; } compute-runtime-22.14.22890/opencl/source/sampler/sampler_tgllp_and_later.inl000066400000000000000000000004211422164147700271540ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/sampler/sampler.h" #include "opencl/source/sampler/sampler.inl" namespace NEO { #include "opencl/source/sampler/sampler_factory_init.inl" } // namespace NEO compute-runtime-22.14.22890/opencl/source/sharings/000077500000000000000000000000001422164147700217505ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/sharings/CMakeLists.txt000066400000000000000000000030401422164147700245050ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # file(GLOB_RECURSE SHARING_ENABLE_CPPS enable*.cpp) add_library(${SHARINGS_ENABLE_LIB_NAME} OBJECT EXCLUDE_FROM_ALL ${SHARING_ENABLE_CPPS}) set_target_properties(${SHARINGS_ENABLE_LIB_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON) set_target_properties(${SHARINGS_ENABLE_LIB_NAME} PROPERTIES FOLDER ${OPENCL_RUNTIME_PROJECTS_FOLDER}) target_include_directories(${SHARINGS_ENABLE_LIB_NAME} PRIVATE ${KHRONOS_HEADERS_DIR} ${KHRONOS_GL_HEADERS_DIR} ${NEO__GMM_INCLUDE_DIR} ${NEO__IGC_INCLUDE_DIR} ${THIRD_PARTY_DIR} ) set(RUNTIME_SRCS_SHARINGS ${CMAKE_CURRENT_SOURCE_DIR}/sharing.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sharing.h ${CMAKE_CURRENT_SOURCE_DIR}/sharing_factory.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sharing_factory.h ${CMAKE_CURRENT_SOURCE_DIR}/sharing_factory.inl ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_SHARINGS}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_SHARINGS ${RUNTIME_SRCS_SHARINGS}) if(WIN32) target_include_directories(${SHARINGS_ENABLE_LIB_NAME} PRIVATE ${NEO_SOURCE_DIR}/opencl/source/os_interface/windows) else() target_include_directories(${SHARINGS_ENABLE_LIB_NAME} PRIVATE ${NEO_SOURCE_DIR}/opencl/source/os_interface/linux) endif() add_subdirectories() create_project_source_tree(${SHARINGS_ENABLE_LIB_NAME}) set(MSVC_DEF_ADDITIONAL_EXPORTS ${MSVC_DEF_ADDITIONAL_EXPORTS} PARENT_SCOPE) compute-runtime-22.14.22890/opencl/source/sharings/d3d/000077500000000000000000000000001422164147700224225ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/sharings/d3d/CMakeLists.txt000066400000000000000000000013401422164147700251600ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) set(RUNTIME_SRCS_SHARINGS_D3D ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cl_d3d_api.h ${CMAKE_CURRENT_SOURCE_DIR}/d3d_buffer.h ${CMAKE_CURRENT_SOURCE_DIR}/d3d_sharing.cpp ${CMAKE_CURRENT_SOURCE_DIR}/d3d_sharing.h ${CMAKE_CURRENT_SOURCE_DIR}/d3d_surface.cpp ${CMAKE_CURRENT_SOURCE_DIR}/d3d_surface.h ${CMAKE_CURRENT_SOURCE_DIR}/d3d_texture.cpp ${CMAKE_CURRENT_SOURCE_DIR}/d3d_texture.h ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_SHARINGS_D3D}) endif() set_property(GLOBAL PROPERTY RUNTIME_SRCS_SHARINGS_D3D ${RUNTIME_SRCS_SHARINGS_D3D}) compute-runtime-22.14.22890/opencl/source/sharings/d3d/cl_d3d_api.h000066400000000000000000000144031422164147700245560ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include #include "CL/cl.h" #include "CL/cl_d3d10.h" #include "CL/cl_d3d11.h" #include "CL/cl_dx9_media_sharing.h" #define CL_DX9_MEDIA_SHARING_INTEL_EXT #include "CL/cl_dx9_media_sharing_intel.h" extern CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsFromDX9INTEL( cl_platform_id platform, cl_dx9_device_source_intel dx9DeviceSource, void *dx9Object, cl_dx9_device_set_intel dx9DeviceSet, cl_uint numEntries, cl_device_id *devices, cl_uint *numDevices); extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromDX9MediaSurfaceINTEL( cl_context context, cl_mem_flags flags, IDirect3DSurface9 *resource, HANDLE sharedHandle, UINT plane, cl_int *errcodeRet); extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireDX9ObjectsINTEL( cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseDX9ObjectsINTEL( cl_command_queue commandQueue, cl_uint numObjects, cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); extern CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsFromDX9MediaAdapterKHR( cl_platform_id platform, cl_uint numMediaAdapters, cl_dx9_media_adapter_type_khr *mediaAdapterType, void *mediaAdapters, cl_dx9_media_adapter_set_khr mediaAdapterSet, cl_uint numEntries, cl_device_id *devices, cl_uint *numDevices) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromDX9MediaSurfaceKHR( cl_context context, cl_mem_flags flags, cl_dx9_media_adapter_type_khr adapterType, void *surfaceInfo, cl_uint plane, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireDX9MediaSurfacesKHR( cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseDX9MediaSurfacesKHR( cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsFromD3D10KHR( cl_platform_id platform, cl_d3d10_device_source_khr d3dDeviceSource, void *d3dObject, cl_d3d10_device_set_khr d3dDeviceSet, cl_uint numEntries, cl_device_id *devices, cl_uint *numDevices) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D10BufferKHR( cl_context context, cl_mem_flags flags, ID3D10Buffer *resource, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D10Texture2DKHR( cl_context context, cl_mem_flags flags, ID3D10Texture2D *resource, UINT subresource, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D10Texture3DKHR( cl_context context, cl_mem_flags flags, ID3D10Texture3D *resource, UINT subresource, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireD3D10ObjectsKHR( cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseD3D10ObjectsKHR( cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsFromD3D11KHR( cl_platform_id platform, cl_d3d11_device_source_khr d3dDeviceSource, void *d3dObject, cl_d3d11_device_set_khr d3dDeviceSet, cl_uint numEntries, cl_device_id *devices, cl_uint *numDevices) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D11BufferKHR( cl_context context, cl_mem_flags flags, ID3D11Buffer *resource, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D11Texture2DKHR( cl_context context, cl_mem_flags flags, ID3D11Texture2D *resource, UINT subresource, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D11Texture3DKHR( cl_context context, cl_mem_flags flags, ID3D11Texture3D *resource, UINT subresource, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireD3D11ObjectsKHR( cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseD3D11ObjectsKHR( cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clGetSupportedDX9MediaSurfaceFormatsINTEL( cl_context context, cl_mem_flags flags, cl_mem_object_type imageType, cl_uint plane, cl_uint numEntries, D3DFORMAT *dx9Formats, cl_uint *numImageFormats) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clGetSupportedD3D10TextureFormatsINTEL( cl_context context, cl_mem_flags flags, cl_mem_object_type imageType, cl_uint numEntries, DXGI_FORMAT *dx10Formats, cl_uint *numImageFormats) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clGetSupportedD3D11TextureFormatsINTEL( cl_context context, cl_mem_flags flags, cl_mem_object_type imageType, cl_uint plane, cl_uint numEntries, DXGI_FORMAT *dx11Formats, cl_uint *numImageFormats) CL_API_SUFFIX__VERSION_1_2; compute-runtime-22.14.22890/opencl/source/sharings/d3d/d3d_buffer.h000066400000000000000000000050121422164147700245740ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/memory_manager/memory_manager.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/sharings/d3d/d3d_sharing.h" namespace NEO { class Buffer; class Context; template class D3DBuffer : public D3DSharing { typedef typename D3D::D3DBufferObj D3DBufferObj; typedef typename D3D::D3DBufferDesc D3DBufferDesc; public: static Buffer *create(Context *context, D3DBufferObj *d3dBuffer, cl_mem_flags flags, cl_int *retCode) { ErrorCodeHelper err(retCode, CL_SUCCESS); auto sharingFcns = context->getSharing>(); void *sharedHandle = nullptr; D3DBufferDesc bufferDesc = {}; sharingFcns->getBufferDesc(&bufferDesc, d3dBuffer); bool sharedResource = false; D3DBufferObj *bufferStaging = nullptr; if (bufferDesc.MiscFlags & D3DResourceFlags::MISC_SHARED) { bufferStaging = d3dBuffer; sharedResource = true; } else { sharingFcns->createBuffer(&bufferStaging, bufferDesc.ByteWidth); } sharingFcns->getSharedHandle(bufferStaging, &sharedHandle); AllocationProperties properties = {context->getDevice(0)->getRootDeviceIndex(), false, // allocateMemory 0, // size AllocationType::SHARED_BUFFER, false, // isMultiStorageAllocation context->getDeviceBitfieldForAllocation(context->getDevice(0)->getRootDeviceIndex())}; auto alloc = context->getMemoryManager()->createGraphicsAllocationFromSharedHandle(toOsHandle(sharedHandle), properties, true, false); auto d3dBufferObj = new D3DBuffer(context, d3dBuffer, bufferStaging, sharedResource); auto rootDeviceIndex = alloc->getRootDeviceIndex(); auto multiGraphicsAllocation = MultiGraphicsAllocation(rootDeviceIndex); multiGraphicsAllocation.addAllocation(alloc); return Buffer::createSharedBuffer(context, flags, d3dBufferObj, std::move(multiGraphicsAllocation)); } ~D3DBuffer() override = default; protected: D3DBuffer(Context *context, D3DBufferObj *d3dBuffer, D3DBufferObj *bufferStaging, bool sharedResource) : D3DSharing(context, d3dBuffer, bufferStaging, 0, sharedResource){}; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/sharings/d3d/d3d_sharing.cpp000066400000000000000000000077141422164147700253240ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/sharings/d3d/d3d_sharing.h" #include "shared/source/gmm_helper/gmm.h" #include "opencl/source/context/context.h" #include "opencl/source/mem_obj/image.h" using namespace NEO; template class D3DSharing; template class D3DSharing; template class D3DSharing; template D3DSharing::D3DSharing(Context *context, D3DResource *resource, D3DResource *resourceStaging, unsigned int subresource, bool sharedResource) : sharedResource(sharedResource), subresource(subresource), resource(resource), resourceStaging(resourceStaging), context(context) { sharingFunctions = context->getSharing>(); if (sharingFunctions) { sharingFunctions->addRef(resource); sharingFunctions->createQuery(&this->d3dQuery); sharingFunctions->track(resource, subresource); } }; template D3DSharing::~D3DSharing() { if (sharingFunctions) { sharingFunctions->untrack(resource, subresource); if (!sharedResource) { sharingFunctions->release(resourceStaging); } sharingFunctions->release(resource); sharingFunctions->release(d3dQuery); } }; template void D3DSharing::synchronizeObject(UpdateData &updateData) { std::unique_lock lock(this->mtx); sharingFunctions->getDeviceContext(d3dQuery); if (!sharedResource) { sharingFunctions->copySubresourceRegion(resourceStaging, 0, resource, subresource); sharingFunctions->flushAndWait(d3dQuery); } else if (!context->getInteropUserSyncEnabled()) { sharingFunctions->flushAndWait(d3dQuery); } sharingFunctions->releaseDeviceContext(d3dQuery); updateData.synchronizationStatus = SynchronizeStatus::ACQUIRE_SUCCESFUL; } template void D3DSharing::releaseResource(MemObj *memObject, uint32_t rootDeviceIndex) { if (!sharedResource) { std::unique_lock lock(this->mtx); sharingFunctions->getDeviceContext(d3dQuery); sharingFunctions->copySubresourceRegion(resource, subresource, resourceStaging, 0); if (!context->getInteropUserSyncEnabled()) { sharingFunctions->flushAndWait(d3dQuery); } sharingFunctions->releaseDeviceContext(d3dQuery); } } template void D3DSharing::updateImgInfoAndDesc(Gmm *gmm, ImageInfo &imgInfo, ImagePlane imagePlane, cl_uint arrayIndex) { gmm->updateImgInfoAndDesc(imgInfo, arrayIndex); if (imagePlane == ImagePlane::PLANE_U || imagePlane == ImagePlane::PLANE_V || imagePlane == ImagePlane::PLANE_UV) { imgInfo.imgDesc.imageWidth /= 2; imgInfo.imgDesc.imageHeight /= 2; if (imagePlane != ImagePlane::PLANE_UV) { imgInfo.imgDesc.imageRowPitch /= 2; } } } template const ClSurfaceFormatInfo *D3DSharing::findSurfaceFormatInfo(GMM_RESOURCE_FORMAT_ENUM gmmFormat, cl_mem_flags flags, bool supportsOcl20Features, bool packedSupported) { ArrayRef formats = SurfaceFormats::surfaceFormats(flags, supportsOcl20Features); for (auto &format : formats) { if (gmmFormat == format.surfaceFormat.GMMSurfaceFormat) { return &format; } } if (packedSupported) { formats = SurfaceFormats::packed(); for (auto &format : formats) { if (gmmFormat == format.surfaceFormat.GMMSurfaceFormat) { return &format; } } } return nullptr; } template bool D3DSharing::isFormatWithPlane1(DXGI_FORMAT format) { switch (format) { case DXGI_FORMAT_NV12: case DXGI_FORMAT_P010: case DXGI_FORMAT_P016: case DXGI_FORMAT_420_OPAQUE: case DXGI_FORMAT_NV11: case DXGI_FORMAT_P208: return true; } return false; } compute-runtime-22.14.22890/opencl/source/sharings/d3d/d3d_sharing.h000066400000000000000000000032511422164147700247610ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "d3d_sharing_functions.h" enum GMM_RESOURCE_FORMAT_ENUM; namespace NEO { enum class ImagePlane; class Context; class Gmm; struct ClSurfaceFormatInfo; struct ImageInfo; template class D3DSharing : public SharingHandler { typedef typename D3D::D3DQuery D3DQuery; typedef typename D3D::D3DResource D3DResource; public: D3DSharing(Context *context, D3DResource *resource, D3DResource *resourceStaging, unsigned int subresource, bool sharedResource); ~D3DSharing() override; void synchronizeObject(UpdateData &updateData) override; void releaseResource(MemObj *memObject, uint32_t rootDeviceIndex) override; D3DResource **getResourceHandler() { return &resource; } void *getResourceStaging() { return resourceStaging; } unsigned int &getSubresource() { return subresource; } typename D3DQuery *getQuery() { return d3dQuery; } bool isSharedResource() { return sharedResource; } static const ClSurfaceFormatInfo *findSurfaceFormatInfo(GMM_RESOURCE_FORMAT_ENUM gmmFormat, cl_mem_flags flags, bool supportsOcl20Features, bool packedSupported); static bool isFormatWithPlane1(DXGI_FORMAT format); protected: static void updateImgInfoAndDesc(Gmm *gmm, ImageInfo &imgInfo, ImagePlane imagePlane, cl_uint arrayIndex); Context *context; D3DSharingFunctions *sharingFunctions = nullptr; D3DResource *resource = nullptr; D3DResource *resourceStaging = nullptr; D3DQuery *d3dQuery = nullptr; bool sharedResource = false; unsigned int subresource = 0; std::mutex mtx; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/sharings/d3d/d3d_surface.cpp000066400000000000000000000301331422164147700253100ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/sharings/d3d/d3d_surface.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/helpers/get_info.h" #include "shared/source/memory_manager/memory_manager.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "opencl/source/helpers/gmm_types_converter.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/mem_obj/mem_obj_helper.h" #include "mmsystem.h" using namespace NEO; D3DSurface::D3DSurface(Context *context, cl_dx9_surface_info_khr *surfaceInfo, D3D9Surface *surfaceStaging, cl_uint plane, ImagePlane imagePlane, cl_dx9_media_adapter_type_khr adapterType, bool sharedResource, bool lockable) : D3DSharing(context, surfaceInfo->resource, surfaceStaging, plane, sharedResource), adapterType(adapterType), surfaceInfo(*surfaceInfo), lockable(lockable), plane(plane), imagePlane(imagePlane), d3d9Surface(surfaceInfo->resource), d3d9SurfaceStaging(surfaceStaging) { if (sharingFunctions) { resourceDevice = sharingFunctions->getDevice(); } }; Image *D3DSurface::create(Context *context, cl_dx9_surface_info_khr *surfaceInfo, cl_mem_flags flags, cl_dx9_media_adapter_type_khr adapterType, cl_uint plane, cl_int *retCode) { ErrorCodeHelper err(retCode, CL_SUCCESS); D3D9Surface *surfaceStaging = nullptr; ImageInfo imgInfo = {}; cl_image_format imgFormat = {}; McsSurfaceInfo mcsSurfaceInfo = {}; ImagePlane imagePlane = ImagePlane::NO_PLANE; if (!context || !context->getSharing>() || !context->getSharing>()->getDevice()) { err.set(CL_INVALID_CONTEXT); return nullptr; } auto sharingFcns = context->getSharing>(); if (sharingFcns->isTracked(surfaceInfo->resource, plane)) { err.set(CL_INVALID_DX9_RESOURCE_INTEL); return nullptr; } sharingFcns->updateDevice(surfaceInfo->resource); imgInfo.imgDesc.imageType = ImageType::Image2D; D3D9SurfaceDesc surfaceDesc = {}; sharingFcns->getTexture2dDesc(&surfaceDesc, surfaceInfo->resource); imgInfo.imgDesc.imageWidth = surfaceDesc.Width; imgInfo.imgDesc.imageHeight = surfaceDesc.Height; if (surfaceDesc.Pool != D3DPOOL_DEFAULT) { err.set(CL_INVALID_DX9_RESOURCE_INTEL); return nullptr; } err.set(findImgFormat(surfaceDesc.Format, imgFormat, plane, imagePlane)); if (err.localErrcode != CL_SUCCESS) { return nullptr; } imgInfo.plane = GmmTypesConverter::convertPlane(imagePlane); auto *clSurfaceFormat = Image::getSurfaceFormatFromTable(flags, &imgFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); imgInfo.surfaceFormat = &clSurfaceFormat->surfaceFormat; bool isSharedResource = false; bool lockable = false; auto rootDeviceIndex = context->getDevice(0)->getRootDeviceIndex(); GraphicsAllocation *alloc = nullptr; if (surfaceInfo->shared_handle) { isSharedResource = true; AllocationProperties allocProperties(rootDeviceIndex, false, // allocateMemory 0u, // size AllocationType::SHARED_IMAGE, false, // isMultiStorageAllocation context->getDeviceBitfieldForAllocation(rootDeviceIndex)); alloc = context->getMemoryManager()->createGraphicsAllocationFromSharedHandle(toOsHandle(surfaceInfo->shared_handle), allocProperties, false, false); updateImgInfoAndDesc(alloc->getDefaultGmm(), imgInfo, imagePlane, 0u); } else { lockable = !(surfaceDesc.Usage & D3DResourceFlags::USAGE_RENDERTARGET) || imagePlane != ImagePlane::NO_PLANE; if (!lockable) { sharingFcns->createTexture2d(&surfaceStaging, &surfaceDesc, 0u); } if (imagePlane == ImagePlane::PLANE_U || imagePlane == ImagePlane::PLANE_V || imagePlane == ImagePlane::PLANE_UV) { imgInfo.imgDesc.imageWidth /= 2; imgInfo.imgDesc.imageHeight /= 2; } MemoryProperties memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()); AllocationProperties allocProperties = MemObjHelper::getAllocationPropertiesWithImageInfo(rootDeviceIndex, imgInfo, true, // allocateMemory memoryProperties, context->getDevice(0)->getHardwareInfo(), context->getDeviceBitfieldForAllocation(rootDeviceIndex), context->isSingleDeviceContext()); allocProperties.allocationType = AllocationType::SHARED_RESOURCE_COPY; alloc = context->getMemoryManager()->allocateGraphicsMemoryInPreferredPool(allocProperties, nullptr); imgInfo.imgDesc.imageRowPitch = imgInfo.rowPitch; imgInfo.imgDesc.imageSlicePitch = imgInfo.slicePitch; } DEBUG_BREAK_IF(!alloc); auto surface = new D3DSurface(context, surfaceInfo, surfaceStaging, plane, imagePlane, adapterType, isSharedResource, lockable); auto multiGraphicsAllocation = MultiGraphicsAllocation(rootDeviceIndex); multiGraphicsAllocation.addAllocation(alloc); return Image::createSharedImage(context, surface, mcsSurfaceInfo, std::move(multiGraphicsAllocation), nullptr, flags, 0, clSurfaceFormat, imgInfo, __GMM_NO_CUBE_MAP, 0, 0); } void D3DSurface::synchronizeObject(UpdateData &updateData) { D3DLOCKED_RECT lockedRect = {}; sharingFunctions->setDevice(resourceDevice); if (sharedResource && !context->getInteropUserSyncEnabled()) { sharingFunctions->flushAndWait(d3dQuery); } else if (!sharedResource) { if (lockable) { sharingFunctions->lockRect(d3d9Surface, &lockedRect, D3DLOCK_READONLY); } else { sharingFunctions->getRenderTargetData(d3d9Surface, d3d9SurfaceStaging); sharingFunctions->lockRect(d3d9SurfaceStaging, &lockedRect, D3DLOCK_READONLY); } auto image = castToObjectOrAbort(updateData.memObject); auto graphicsAllocation = image->getGraphicsAllocation(updateData.rootDeviceIndex); auto sys = lockedRect.pBits; auto gpu = context->getMemoryManager()->lockResource(graphicsAllocation); auto pitch = static_cast(lockedRect.Pitch); auto height = static_cast(image->getImageDesc().image_height); graphicsAllocation->getDefaultGmm()->resourceCopyBlt(sys, gpu, pitch, height, 1u, imagePlane); context->getMemoryManager()->unlockResource(graphicsAllocation); if (lockable) { sharingFunctions->unlockRect(d3d9Surface); } else { sharingFunctions->unlockRect(d3d9SurfaceStaging); } sharingFunctions->flushAndWait(d3dQuery); } updateData.synchronizationStatus = SynchronizeStatus::ACQUIRE_SUCCESFUL; } void D3DSurface::releaseResource(MemObj *memObject, uint32_t rootDeviceIndex) { D3DLOCKED_RECT lockedRect = {}; auto image = castToObject(memObject); if (!image) { return; } sharingFunctions->setDevice(resourceDevice); if (!sharedResource) { if (lockable) { sharingFunctions->lockRect(d3d9Surface, &lockedRect, 0); } else { sharingFunctions->lockRect(d3d9SurfaceStaging, &lockedRect, 0); } auto sys = lockedRect.pBits; auto graphicsAllocation = image->getGraphicsAllocation(rootDeviceIndex); auto gpu = context->getMemoryManager()->lockResource(graphicsAllocation); auto pitch = static_cast(lockedRect.Pitch); auto height = static_cast(image->getImageDesc().image_height); graphicsAllocation->getDefaultGmm()->resourceCopyBlt(sys, gpu, pitch, height, 0u, imagePlane); context->getMemoryManager()->unlockResource(graphicsAllocation); if (lockable) { sharingFunctions->unlockRect(d3d9Surface); } else { sharingFunctions->unlockRect(d3d9SurfaceStaging); sharingFunctions->updateSurface(d3d9SurfaceStaging, d3d9Surface); } } } const std::map D3DSurface::D3DtoClFormatConversions = { {D3DFMT_R32F, {CL_R, CL_FLOAT}}, {D3DFMT_R16F, {CL_R, CL_HALF_FLOAT}}, {D3DFMT_L16, {CL_R, CL_UNORM_INT16}}, {D3DFMT_A8, {CL_A, CL_UNORM_INT8}}, {D3DFMT_L8, {CL_R, CL_UNORM_INT8}}, {D3DFMT_G32R32F, {CL_RG, CL_FLOAT}}, {D3DFMT_G16R16F, {CL_RG, CL_HALF_FLOAT}}, {D3DFMT_G16R16, {CL_RG, CL_UNORM_INT16}}, {D3DFMT_A8L8, {CL_RG, CL_UNORM_INT8}}, {D3DFMT_A32B32G32R32F, {CL_RGBA, CL_FLOAT}}, {D3DFMT_A16B16G16R16F, {CL_RGBA, CL_HALF_FLOAT}}, {D3DFMT_A16B16G16R16, {CL_RGBA, CL_UNORM_INT16}}, {D3DFMT_X8B8G8R8, {CL_RGBA, CL_UNORM_INT8}}, {D3DFMT_A8B8G8R8, {CL_RGBA, CL_UNORM_INT8}}, {D3DFMT_A8R8G8B8, {CL_BGRA, CL_UNORM_INT8}}, {D3DFMT_X8R8G8B8, {CL_BGRA, CL_UNORM_INT8}}, {D3DFMT_YUY2, {CL_YUYV_INTEL, CL_UNORM_INT8}}, {D3DFMT_UYVY, {CL_UYVY_INTEL, CL_UNORM_INT8}}, // The specific channel_order for NV12 is selected in findImgFormat {static_cast(MAKEFOURCC('N', 'V', '1', '2')), {CL_R | CL_RG, CL_UNORM_INT8}}, {static_cast(MAKEFOURCC('Y', 'V', '1', '2')), {CL_R, CL_UNORM_INT8}}, {static_cast(MAKEFOURCC('Y', 'V', 'Y', 'U')), {CL_YVYU_INTEL, CL_UNORM_INT8}}, {static_cast(MAKEFOURCC('V', 'Y', 'U', 'Y')), {CL_VYUY_INTEL, CL_UNORM_INT8}}}; const std::vector D3DSurface::D3DPlane1Formats = { static_cast(MAKEFOURCC('N', 'V', '1', '2')), static_cast(MAKEFOURCC('Y', 'V', '1', '2'))}; const std::vector D3DSurface::D3DPlane2Formats = {static_cast(MAKEFOURCC('Y', 'V', '1', '2'))}; cl_int D3DSurface::findImgFormat(D3DFORMAT d3dFormat, cl_image_format &imgFormat, cl_uint plane, ImagePlane &imagePlane) { imagePlane = ImagePlane::NO_PLANE; static const cl_image_format unknown_format = {0, 0}; auto element = D3DtoClFormatConversions.find(d3dFormat); if (element == D3DtoClFormatConversions.end()) { imgFormat = unknown_format; return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; } imgFormat = element->second; switch (d3dFormat) { case static_cast(MAKEFOURCC('N', 'V', '1', '2')): switch (plane) { case 0: imgFormat.image_channel_order = CL_R; imagePlane = ImagePlane::PLANE_Y; return CL_SUCCESS; case 1: imgFormat.image_channel_order = CL_RG; imagePlane = ImagePlane::PLANE_UV; return CL_SUCCESS; default: imgFormat = unknown_format; return CL_INVALID_VALUE; } case static_cast(MAKEFOURCC('Y', 'V', '1', '2')): switch (plane) { case 0: imagePlane = ImagePlane::PLANE_Y; return CL_SUCCESS; case 1: imagePlane = ImagePlane::PLANE_V; return CL_SUCCESS; case 2: imagePlane = ImagePlane::PLANE_U; return CL_SUCCESS; default: imgFormat = unknown_format; return CL_INVALID_VALUE; } } if (plane > 0) { return CL_INVALID_VALUE; } return CL_SUCCESS; } int D3DSurface::validateUpdateData(UpdateData &updateData) { auto image = castToObject(updateData.memObject); if (!image) { return CL_INVALID_MEM_OBJECT; } return CL_SUCCESS; } compute-runtime-22.14.22890/opencl/source/sharings/d3d/d3d_surface.h000066400000000000000000000041531422164147700247600ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/sharings/d3d/d3d_sharing.h" #include struct ErrorCodeHelper; namespace NEO { enum class ImagePlane; class Image; class Context; class D3DSurface : public D3DSharing { typedef typename D3DTypesHelper::D3D9::D3DTexture2dDesc D3D9SurfaceDesc; typedef typename D3DTypesHelper::D3D9::D3DTexture2d D3D9Surface; typedef typename D3DTypesHelper::D3D9::D3DResource D3DResource; typedef typename D3DTypesHelper::D3D9::D3DDevice D3DDevice; public: static Image *create(Context *context, cl_dx9_surface_info_khr *surfaceInfo, cl_mem_flags flags, cl_dx9_media_adapter_type_khr adapterType, cl_uint plane, cl_int *retCode); static const std::map D3DtoClFormatConversions; static const std::vector D3DPlane1Formats; static const std::vector D3DPlane2Formats; static cl_int findImgFormat(D3DFORMAT d3dFormat, cl_image_format &imgFormat, cl_uint plane, ImagePlane &imagePlane); void synchronizeObject(UpdateData &updateData) override; void releaseResource(MemObj *memObject, uint32_t rootDeviceIndex) override; int validateUpdateData(UpdateData &updateData) override; cl_dx9_surface_info_khr &getSurfaceInfo() { return surfaceInfo; } cl_dx9_media_adapter_type_khr &getAdapterType() { return adapterType; } cl_uint &getPlane() { return plane; } ~D3DSurface() override = default; const bool lockable = false; protected: D3DSurface(Context *context, cl_dx9_surface_info_khr *surfaceInfo, D3D9Surface *surfaceStaging, cl_uint plane, ImagePlane imagePlane, cl_dx9_media_adapter_type_khr adapterType, bool sharedResource, bool lockable); cl_dx9_media_adapter_type_khr adapterType = 0u; cl_dx9_surface_info_khr surfaceInfo = {}; cl_uint plane = 0; ImagePlane imagePlane; D3D9Surface *d3d9Surface = nullptr; D3D9Surface *d3d9SurfaceStaging = nullptr; D3DDevice *resourceDevice = nullptr; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/sharings/d3d/d3d_texture.cpp000066400000000000000000000256051422164147700253700ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/sharings/d3d/d3d_texture.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/resource_info.h" #include "shared/source/helpers/get_info.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/hw_info_config.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/gmm_types_converter.h" #include "opencl/source/mem_obj/image.h" using namespace NEO; template class D3DTexture; template class D3DTexture; template Image *D3DTexture::create2d(Context *context, D3DTexture2d *d3dTexture, cl_mem_flags flags, cl_uint subresource, cl_int *retCode) { ErrorCodeHelper err(retCode, CL_SUCCESS); auto sharingFcns = context->getSharing>(); ImagePlane imagePlane = ImagePlane::NO_PLANE; void *sharedHandle = nullptr; cl_uint arrayIndex = 0u; cl_image_format imgFormat = {}; McsSurfaceInfo mcsSurfaceInfo = {}; ImageInfo imgInfo = {}; imgInfo.imgDesc.imageType = ImageType::Image2D; D3DTexture2dDesc textureDesc = {}; sharingFcns->getTexture2dDesc(&textureDesc, d3dTexture); cl_int formatSupportError = sharingFcns->validateFormatSupport(textureDesc.Format, CL_MEM_OBJECT_IMAGE2D); if (formatSupportError != CL_SUCCESS) { err.set(formatSupportError); return nullptr; } if (D3DSharing::isFormatWithPlane1(textureDesc.Format)) { if ((subresource % 2) == 0) { imagePlane = ImagePlane::PLANE_Y; } else { imagePlane = ImagePlane::PLANE_UV; } imgInfo.plane = GmmTypesConverter::convertPlane(imagePlane); arrayIndex = subresource / 2u; } else if (subresource >= textureDesc.MipLevels * textureDesc.ArraySize) { err.set(CL_INVALID_VALUE); return nullptr; } bool sharedResource = false; D3DTexture2d *textureStaging = nullptr; if ((textureDesc.MiscFlags & D3DResourceFlags::MISC_SHARED || textureDesc.MiscFlags & D3DResourceFlags::MISC_SHARED_KEYEDMUTEX) && subresource % textureDesc.MipLevels == 0) { textureStaging = d3dTexture; sharedResource = true; } else { sharingFcns->createTexture2d(&textureStaging, &textureDesc, subresource); } GraphicsAllocation *alloc = nullptr; auto memoryManager = context->getMemoryManager(); auto rootDeviceIndex = context->getDevice(0)->getRootDeviceIndex(); if (textureDesc.MiscFlags & D3DResourceFlags::MISC_SHARED_NTHANDLE) { sharingFcns->getSharedNTHandle(textureStaging, &sharedHandle); if (memoryManager->verifyHandle(toOsHandle(sharedHandle), rootDeviceIndex, true)) { alloc = memoryManager->createGraphicsAllocationFromNTHandle(sharedHandle, rootDeviceIndex, AllocationType::SHARED_IMAGE); } else { err.set(CL_INVALID_D3D11_RESOURCE_KHR); return nullptr; } } else { sharingFcns->getSharedHandle(textureStaging, &sharedHandle); AllocationProperties allocProperties(rootDeviceIndex, false, // allocateMemory 0u, // size AllocationType::SHARED_IMAGE, false, // isMultiStorageAllocation context->getDeviceBitfieldForAllocation(rootDeviceIndex)); if (memoryManager->verifyHandle(toOsHandle(sharedHandle), rootDeviceIndex, false)) { alloc = memoryManager->createGraphicsAllocationFromSharedHandle(toOsHandle(sharedHandle), allocProperties, false, false); } else { err.set(CL_INVALID_D3D11_RESOURCE_KHR); return nullptr; } } if (alloc == nullptr) { err.set(CL_OUT_OF_HOST_MEMORY); return nullptr; } updateImgInfoAndDesc(alloc->getDefaultGmm(), imgInfo, imagePlane, arrayIndex); auto d3dTextureObj = new D3DTexture(context, d3dTexture, subresource, textureStaging, sharedResource); auto hwInfo = memoryManager->peekExecutionEnvironment().rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo->platform.eRenderCoreFamily); const ClSurfaceFormatInfo *clSurfaceFormat = nullptr; if ((textureDesc.Format == DXGI_FORMAT_NV12) || (textureDesc.Format == DXGI_FORMAT_P010) || (textureDesc.Format == DXGI_FORMAT_P016)) { clSurfaceFormat = findYuvSurfaceFormatInfo(textureDesc.Format, imagePlane, flags); imgInfo.surfaceFormat = &clSurfaceFormat->surfaceFormat; } else { clSurfaceFormat = findSurfaceFormatInfo(alloc->getDefaultGmm()->gmmResourceInfo->getResourceFormat(), flags, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features, hwHelper.packedFormatsSupported()); imgInfo.surfaceFormat = &clSurfaceFormat->surfaceFormat; } if (alloc->getDefaultGmm()->unifiedAuxTranslationCapable()) { const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo->platform.eProductFamily); alloc->getDefaultGmm()->isCompressionEnabled = hwInfoConfig.isPageTableManagerSupported(*hwInfo) ? memoryManager->mapAuxGpuVA(alloc) : true; } auto multiGraphicsAllocation = MultiGraphicsAllocation(rootDeviceIndex); multiGraphicsAllocation.addAllocation(alloc); return Image::createSharedImage(context, d3dTextureObj, mcsSurfaceInfo, std::move(multiGraphicsAllocation), nullptr, flags, 0, clSurfaceFormat, imgInfo, __GMM_NO_CUBE_MAP, 0, 0); } template Image *D3DTexture::create3d(Context *context, D3DTexture3d *d3dTexture, cl_mem_flags flags, cl_uint subresource, cl_int *retCode) { ErrorCodeHelper err(retCode, CL_SUCCESS); auto sharingFcns = context->getSharing>(); void *sharedHandle = nullptr; cl_image_format imgFormat = {}; McsSurfaceInfo mcsSurfaceInfo = {}; ImageInfo imgInfo = {}; imgInfo.imgDesc.imageType = ImageType::Image3D; D3DTexture3dDesc textureDesc = {}; sharingFcns->getTexture3dDesc(&textureDesc, d3dTexture); cl_int formatSupportError = sharingFcns->validateFormatSupport(textureDesc.Format, CL_MEM_OBJECT_IMAGE3D); if (formatSupportError != CL_SUCCESS) { err.set(formatSupportError); return nullptr; } if (subresource >= textureDesc.MipLevels) { err.set(CL_INVALID_VALUE); return nullptr; } bool sharedResource = false; D3DTexture3d *textureStaging = nullptr; if ((textureDesc.MiscFlags & D3DResourceFlags::MISC_SHARED || textureDesc.MiscFlags & D3DResourceFlags::MISC_SHARED_KEYEDMUTEX) && subresource == 0) { textureStaging = d3dTexture; sharedResource = true; } else { sharingFcns->createTexture3d(&textureStaging, &textureDesc, subresource); } GraphicsAllocation *alloc = nullptr; auto memoryManager = context->getMemoryManager(); auto rootDeviceIndex = context->getDevice(0)->getRootDeviceIndex(); if (textureDesc.MiscFlags & D3DResourceFlags::MISC_SHARED_NTHANDLE) { sharingFcns->getSharedNTHandle(textureStaging, &sharedHandle); if (memoryManager->verifyHandle(toOsHandle(sharedHandle), rootDeviceIndex, true)) { alloc = memoryManager->createGraphicsAllocationFromNTHandle(sharedHandle, rootDeviceIndex, AllocationType::SHARED_IMAGE); } else { err.set(CL_INVALID_D3D11_RESOURCE_KHR); return nullptr; } } else { sharingFcns->getSharedHandle(textureStaging, &sharedHandle); AllocationProperties allocProperties(rootDeviceIndex, false, // allocateMemory 0u, // size AllocationType::SHARED_IMAGE, false, // isMultiStorageAllocation context->getDeviceBitfieldForAllocation(rootDeviceIndex)); if (memoryManager->verifyHandle(toOsHandle(sharedHandle), rootDeviceIndex, false)) { alloc = memoryManager->createGraphicsAllocationFromSharedHandle(toOsHandle(sharedHandle), allocProperties, false, false); } else { err.set(CL_INVALID_D3D11_RESOURCE_KHR); return nullptr; } } if (alloc == nullptr) { err.set(CL_OUT_OF_HOST_MEMORY); return nullptr; } updateImgInfoAndDesc(alloc->getDefaultGmm(), imgInfo, ImagePlane::NO_PLANE, 0u); auto hwInfo = memoryManager->peekExecutionEnvironment().rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo->platform.eRenderCoreFamily); auto d3dTextureObj = new D3DTexture(context, d3dTexture, subresource, textureStaging, sharedResource); auto *clSurfaceFormat = findSurfaceFormatInfo(alloc->getDefaultGmm()->gmmResourceInfo->getResourceFormat(), flags, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features, hwHelper.packedFormatsSupported()); imgInfo.qPitch = alloc->getDefaultGmm()->queryQPitch(GMM_RESOURCE_TYPE::RESOURCE_3D); imgInfo.surfaceFormat = &clSurfaceFormat->surfaceFormat; if (alloc->getDefaultGmm()->unifiedAuxTranslationCapable()) { const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo->platform.eProductFamily); alloc->getDefaultGmm()->isCompressionEnabled = hwInfoConfig.isPageTableManagerSupported(*hwInfo) ? memoryManager->mapAuxGpuVA(alloc) : true; } auto multiGraphicsAllocation = MultiGraphicsAllocation(rootDeviceIndex); multiGraphicsAllocation.addAllocation(alloc); return Image::createSharedImage(context, d3dTextureObj, mcsSurfaceInfo, std::move(multiGraphicsAllocation), nullptr, flags, 0, clSurfaceFormat, imgInfo, __GMM_NO_CUBE_MAP, 0, 0); } template const ClSurfaceFormatInfo *D3DTexture::findYuvSurfaceFormatInfo(DXGI_FORMAT dxgiFormat, ImagePlane imagePlane, cl_mem_flags flags) { cl_image_format imgFormat = {}; if (imagePlane == ImagePlane::PLANE_Y) { imgFormat.image_channel_order = CL_R; } else { imgFormat.image_channel_order = CL_RG; } if ((dxgiFormat == DXGI_FORMAT_P010) || (dxgiFormat == DXGI_FORMAT_P016)) { imgFormat.image_channel_data_type = CL_UNORM_INT16; } else { imgFormat.image_channel_data_type = CL_UNORM_INT8; } return Image::getSurfaceFormatFromTable(flags, &imgFormat, false /* supportsOcl20Features */); } compute-runtime-22.14.22890/opencl/source/sharings/d3d/d3d_texture.h000066400000000000000000000023371422164147700250320ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/sharings/d3d/d3d_sharing.h" namespace NEO { class Context; class Image; template class D3DTexture : public D3DSharing { typedef typename D3D::D3DTexture2dDesc D3DTexture2dDesc; typedef typename D3D::D3DTexture3dDesc D3DTexture3dDesc; typedef typename D3D::D3DTexture2d D3DTexture2d; typedef typename D3D::D3DTexture3d D3DTexture3d; typedef typename D3D::D3DResource D3DResource; public: ~D3DTexture() override = default; static Image *create2d(Context *context, D3DTexture2d *d3dTexture, cl_mem_flags flags, cl_uint subresource, cl_int *retCode); static Image *create3d(Context *context, D3DTexture3d *d3dTexture, cl_mem_flags flags, cl_uint subresource, cl_int *retCode); static const ClSurfaceFormatInfo *findYuvSurfaceFormatInfo(DXGI_FORMAT dxgiFormat, ImagePlane imagePlane, cl_mem_flags flags); protected: D3DTexture(Context *context, D3DResource *d3dTexture, cl_uint subresource, D3DResource *textureStaging, bool sharedResource) : D3DSharing(context, d3dTexture, textureStaging, subresource, sharedResource){}; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/sharings/d3d/enable_d3d.cpp000066400000000000000000000172201422164147700251100ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #ifdef WIN32 #include "opencl/source/sharings/d3d/enable_d3d.h" #include "shared/source/os_interface/driver_info.h" #include "opencl/source/api/api.h" #include "opencl/source/context/context.h" #include "opencl/source/context/context.inl" #include "opencl/source/os_interface/windows/d3d_sharing_functions.h" #include "opencl/source/sharings/d3d/cl_d3d_api.h" #include "opencl/source/sharings/sharing_factory.h" #include "opencl/source/sharings/sharing_factory.inl" #include namespace NEO { bool D3DSharingContextBuilder::processProperties(cl_context_properties &propertyType, cl_context_properties &propertyValue) { if (contextData.get() == nullptr) { contextData = std::make_unique>(); } switch (propertyType) { case CL_CONTEXT_ADAPTER_D3D9_KHR: case CL_CONTEXT_ADAPTER_D3D9EX_KHR: case CL_CONTEXT_ADAPTER_DXVA_KHR: case CL_CONTEXT_D3D9_DEVICE_INTEL: case CL_CONTEXT_D3D9EX_DEVICE_INTEL: case CL_CONTEXT_DXVA_DEVICE_INTEL: contextData->pDevice = (D3DTypesHelper::D3D9::D3DDevice *)propertyValue; contextData->argumentsDefined = true; return true; } return false; } bool D3DSharingContextBuilder::processProperties(cl_context_properties &propertyType, cl_context_properties &propertyValue) { if (contextData.get() == nullptr) { contextData = std::make_unique>(); } switch (propertyType) { case CL_CONTEXT_D3D10_DEVICE_KHR: contextData->pDevice = (D3DTypesHelper::D3D10::D3DDevice *)propertyValue; contextData->argumentsDefined = true; return true; } return false; } bool D3DSharingContextBuilder::processProperties(cl_context_properties &propertyType, cl_context_properties &propertyValue) { if (contextData.get() == nullptr) { contextData = std::make_unique>(); } switch (propertyType) { case CL_CONTEXT_D3D11_DEVICE_KHR: contextData->pDevice = (D3DTypesHelper::D3D11::D3DDevice *)propertyValue; contextData->argumentsDefined = true; return true; } return false; } template <> void Context::registerSharing(D3DSharingFunctions *sharing) { this->sharingFunctions[D3DSharingFunctions::sharingId].reset(sharing); this->preferD3dSharedResources = 1u; } template <> void Context::registerSharing(D3DSharingFunctions *sharing) { this->sharingFunctions[D3DSharingFunctions::sharingId].reset(sharing); this->preferD3dSharedResources = 1u; } template bool D3DSharingContextBuilder::finalizeProperties(Context &context, int32_t &errcodeRet) { if (contextData.get() == nullptr) return true; if (contextData->argumentsDefined) { context.registerSharing(new D3DSharingFunctions(contextData->pDevice)); } return true; } template std::unique_ptr D3DSharingBuilderFactory::createContextBuilder() { return std::make_unique>(); }; std::string D3DSharingBuilderFactory::getExtensions(DriverInfo *driverInfo) { return extensionEnabled ? "cl_intel_dx9_media_sharing cl_khr_dx9_media_sharing " : ""; } std::string D3DSharingBuilderFactory::getExtensions(DriverInfo *driverInfo) { return "cl_khr_d3d10_sharing "; } std::string D3DSharingBuilderFactory::getExtensions(DriverInfo *driverInfo) { return "cl_khr_d3d11_sharing cl_intel_d3d11_nv12_media_sharing "; } void D3DSharingBuilderFactory::fillGlobalDispatchTable() { icdGlobalDispatchTable.clGetDeviceIDsFromDX9MediaAdapterKHR = clGetDeviceIDsFromDX9MediaAdapterKHR; icdGlobalDispatchTable.clCreateFromDX9MediaSurfaceKHR = clCreateFromDX9MediaSurfaceKHR; icdGlobalDispatchTable.clEnqueueReleaseDX9MediaSurfacesKHR = clEnqueueReleaseDX9MediaSurfacesKHR; icdGlobalDispatchTable.clEnqueueAcquireDX9MediaSurfacesKHR = clEnqueueAcquireDX9MediaSurfacesKHR; crtGlobalDispatchTable.clGetDeviceIDsFromDX9INTEL = clGetDeviceIDsFromDX9INTEL; crtGlobalDispatchTable.clCreateFromDX9MediaSurfaceINTEL = clCreateFromDX9MediaSurfaceINTEL; crtGlobalDispatchTable.clEnqueueAcquireDX9ObjectsINTEL = clEnqueueAcquireDX9ObjectsINTEL; crtGlobalDispatchTable.clEnqueueReleaseDX9ObjectsINTEL = clEnqueueReleaseDX9ObjectsINTEL; } void D3DSharingBuilderFactory::fillGlobalDispatchTable() { icdGlobalDispatchTable.clCreateFromD3D10BufferKHR = clCreateFromD3D10BufferKHR; icdGlobalDispatchTable.clCreateFromD3D10Texture2DKHR = clCreateFromD3D10Texture2DKHR; icdGlobalDispatchTable.clCreateFromD3D10Texture3DKHR = clCreateFromD3D10Texture3DKHR; icdGlobalDispatchTable.clEnqueueAcquireD3D10ObjectsKHR = clEnqueueAcquireD3D10ObjectsKHR; icdGlobalDispatchTable.clEnqueueReleaseD3D10ObjectsKHR = clEnqueueReleaseD3D10ObjectsKHR; icdGlobalDispatchTable.clGetDeviceIDsFromD3D10KHR = clGetDeviceIDsFromD3D10KHR; } void D3DSharingBuilderFactory::fillGlobalDispatchTable() { icdGlobalDispatchTable.clCreateFromD3D11BufferKHR = clCreateFromD3D11BufferKHR; icdGlobalDispatchTable.clCreateFromD3D11Texture2DKHR = clCreateFromD3D11Texture2DKHR; icdGlobalDispatchTable.clCreateFromD3D11Texture3DKHR = clCreateFromD3D11Texture3DKHR; icdGlobalDispatchTable.clEnqueueAcquireD3D11ObjectsKHR = clEnqueueAcquireD3D11ObjectsKHR; icdGlobalDispatchTable.clEnqueueReleaseD3D11ObjectsKHR = clEnqueueReleaseD3D11ObjectsKHR; icdGlobalDispatchTable.clGetDeviceIDsFromD3D11KHR = clGetDeviceIDsFromD3D11KHR; } void *D3DSharingBuilderFactory::getExtensionFunctionAddress(const std::string &functionName) { if (DebugManager.flags.EnableFormatQuery.get() && functionName == "clGetSupportedDX9MediaSurfaceFormatsINTEL") { return ((void *)(clGetSupportedDX9MediaSurfaceFormatsINTEL)); } return nullptr; } void *D3DSharingBuilderFactory::getExtensionFunctionAddress(const std::string &functionName) { if (DebugManager.flags.EnableFormatQuery.get() && functionName == "clGetSupportedD3D10TextureFormatsINTEL") { return ((void *)(clGetSupportedD3D10TextureFormatsINTEL)); } return nullptr; } void *D3DSharingBuilderFactory::getExtensionFunctionAddress(const std::string &functionName) { if (DebugManager.flags.EnableFormatQuery.get() && functionName == "clGetSupportedD3D11TextureFormatsINTEL") { return ((void *)(clGetSupportedD3D11TextureFormatsINTEL)); } return nullptr; } void D3DSharingBuilderFactory::setExtensionEnabled(DriverInfo *driverInfo) { extensionEnabled = driverInfo->getMediaSharingSupport(); } void D3DSharingBuilderFactory::setExtensionEnabled(DriverInfo *driverInfo) {} void D3DSharingBuilderFactory::setExtensionEnabled(DriverInfo *driverInfo) {} static SharingFactory::RegisterSharing, D3DSharingFunctions> D3D9Sharing; static SharingFactory::RegisterSharing, D3DSharingFunctions> D3D10Sharing; static SharingFactory::RegisterSharing, D3DSharingFunctions> D3D11Sharing; } // namespace NEO #endif compute-runtime-22.14.22890/opencl/source/sharings/d3d/enable_d3d.h000066400000000000000000000023141422164147700245530ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/sharings/sharing_factory.h" #include namespace NEO { class Context; class DriverInfo; template struct D3DCreateContextProperties { typename D3D::D3DDevice *pDevice = nullptr; bool argumentsDefined = false; }; template class D3DSharingContextBuilder : public SharingContextBuilder { protected: std::unique_ptr> contextData; public: bool processProperties(cl_context_properties &propertyType, cl_context_properties &propertyValue) override; bool finalizeProperties(Context &context, int32_t &errcodeRet) override; }; template class D3DSharingBuilderFactory : public SharingBuilderFactory { public: std::unique_ptr createContextBuilder() override; std::string getExtensions(DriverInfo *driverInfo) override; void fillGlobalDispatchTable() override; void *getExtensionFunctionAddress(const std::string &functionName) override; void setExtensionEnabled(DriverInfo *driverInfo) override; bool extensionEnabled = true; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/sharings/gl/000077500000000000000000000000001422164147700223525ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/sharings/gl/CMakeLists.txt000066400000000000000000000023041422164147700251110ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) set(RUNTIME_SRCS_SHARINGS_GL ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cl_gl_api_intel.h ${CMAKE_CURRENT_SOURCE_DIR}/gl_arb_sync_event.h ${CMAKE_CURRENT_SOURCE_DIR}/gl_buffer.h ${CMAKE_CURRENT_SOURCE_DIR}/gl_cl_image_format.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gl_context_guard.h ${CMAKE_CURRENT_SOURCE_DIR}/gl_sharing.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gl_sharing.h ${CMAKE_CURRENT_SOURCE_DIR}/gl_sync_event.h ${CMAKE_CURRENT_SOURCE_DIR}/gl_texture.h ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_SHARINGS_GL}) add_subdirectories() set(ADDITIONAL_EXPORTS "clEnqueueMarkerWithSyncObjectINTEL" "clGetCLObjectInfoINTEL" "clGetCLEventInfoINTEL" "clReleaseGlSharedEventINTEL" ) foreach(EXPORT_NAME ${ADDITIONAL_EXPORTS}) set(MSVC_DEF_ADDITIONAL_EXPORTS "${MSVC_DEF_ADDITIONAL_EXPORTS}\n${EXPORT_NAME}") endforeach() set(MSVC_DEF_ADDITIONAL_EXPORTS "${MSVC_DEF_ADDITIONAL_EXPORTS}" PARENT_SCOPE) endif() set_property(GLOBAL PROPERTY RUNTIME_SRCS_SHARINGS_GL ${RUNTIME_SRCS_SHARINGS_GL}) compute-runtime-22.14.22890/opencl/source/sharings/gl/cl_gl_api_intel.h000066400000000000000000000005431422164147700256310ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "CL/cl_gl.h" extern cl_int CL_API_CALL clGetSupportedGLTextureFormatsINTEL( cl_context context, cl_mem_flags flags, cl_mem_object_type imageType, cl_uint numEntries, cl_GLenum *glFormats, cl_uint *numTextureFormats); compute-runtime-22.14.22890/opencl/source/sharings/gl/gl_arb_sync_event.h000066400000000000000000000035611422164147700262130ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/event/event.h" struct _tagCLGLSyncInfo; typedef _tagCLGLSyncInfo CL_GL_SYNC_INFO; namespace NEO { class Context; class GLSharingFunctions; class OsInterface; class OsContext; char *createArbSyncEventName(); void destroyArbSyncEventName(char *name); void cleanupArbSyncObject(OSInterface &osInterface, CL_GL_SYNC_INFO *glSyncInfo); bool setupArbSyncObject(GLSharingFunctions &sharing, OSInterface &osInterface, CL_GL_SYNC_INFO &glSyncInfo); void signalArbSyncObject(OsContext &osContext, CL_GL_SYNC_INFO &glSyncInfo); void serverWaitForArbSyncObject(OSInterface &osInterface, CL_GL_SYNC_INFO &glSyncInfo); class GlArbSyncEvent : public Event { public: GlArbSyncEvent() = delete; ~GlArbSyncEvent() override; void unblockEventBy(Event &event, uint32_t taskLevel, int32_t transitionStatus) override; static GlArbSyncEvent *create(Event &baseEvent); CL_GL_SYNC_INFO *getSyncInfo() { return glSyncInfo.get(); } protected: GlArbSyncEvent(Context &context); MOCKABLE_VIRTUAL bool setBaseEvent(Event &ev); Event *baseEvent = nullptr; OSInterface *osInterface = nullptr; std::unique_ptr glSyncInfo; }; } // namespace NEO extern "C" CL_API_ENTRY cl_int CL_API_CALL clEnqueueMarkerWithSyncObjectINTEL(cl_command_queue commandQueue, cl_event *event, cl_context *context); extern "C" CL_API_ENTRY cl_int CL_API_CALL clGetCLObjectInfoINTEL(cl_mem memObj, void *pResourceInfo); extern "C" CL_API_ENTRY cl_int CL_API_CALL clGetCLEventInfoINTEL(cl_event event, CL_GL_SYNC_INFO **pSyncInfoHandleRet, cl_context *pClContextRet); extern "C" CL_API_ENTRY cl_int CL_API_CALL clReleaseGlSharedEventINTEL(cl_event event); compute-runtime-22.14.22890/opencl/source/sharings/gl/gl_buffer.h000066400000000000000000000021401422164147700244530ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/sharings/gl/gl_sharing.h" #include "CL/cl_gl.h" struct _tagCLGLBufferInfo; namespace NEO { class Buffer; class Context; class GlBuffer : public GlSharing { public: static Buffer *createSharedGlBuffer(Context *context, cl_mem_flags flags, unsigned int bufferId, cl_int *errcodeRet); void synchronizeObject(UpdateData &updateData) override; void releaseReusedGraphicsAllocation() override; protected: GlBuffer(GLSharingFunctions *sharingFunctions, unsigned int glObjectId) : GlSharing(sharingFunctions, CL_GL_OBJECT_BUFFER, glObjectId){}; void releaseResource(MemObj *memObject, uint32_t rootDeviceIndex) override; void resolveGraphicsAllocationChange(osHandle currentSharedHandle, UpdateData *updateData) override; void popGraphicsAllocationFromReuse(GraphicsAllocation *graphicsAllocation); static GraphicsAllocation *createGraphicsAllocation(Context *context, unsigned int bufferId, _tagCLGLBufferInfo &bufferInfo); }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/sharings/gl/gl_cl_image_format.cpp000066400000000000000000000013641422164147700266540ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/extensions/public/cl_gl_private_intel.h" #include "opencl/source/sharings/gl/gl_sharing.h" #include "opencl/source/sharings/gl/gl_texture.h" #include "GL/gl.h" #include "config.h" namespace NEO { bool GlTexture::setClImageFormat(int glFormat, cl_image_format &clImgFormat) { auto clFormat = GlSharing::glToCLFormats.find(static_cast(glFormat)); if (clFormat != GlSharing::glToCLFormats.end()) { clImgFormat.image_channel_data_type = clFormat->second.image_channel_data_type; clImgFormat.image_channel_order = clFormat->second.image_channel_order; return true; } return false; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/sharings/gl/gl_context_guard.h000066400000000000000000000007411422164147700260550ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/sharings/gl/gl_sharing.h" #include "gl_types.h" namespace NEO { class GLContextGuard { public: GLContextGuard() = delete; GLContextGuard(GLSharingFunctions &sharingFcns); ~GLContextGuard(); protected: GLSharingFunctions *sharingFunctions; GLContext currentContextHandle; GLDisplay currentDisplayHandle; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/sharings/gl/gl_sharing.cpp000066400000000000000000000111411422164147700251710ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/sharings/gl/gl_sharing.h" #include "shared/source/helpers/string.h" #include "shared/source/helpers/timestamp_packet.h" #include "opencl/source/context/context.inl" #include "opencl/source/sharings/gl/gl_context_guard.h" #include "opencl/source/sharings/sharing_factory.h" #include namespace NEO { const uint32_t GLSharingFunctions::sharingId = SharingType::CLGL_SHARING; const std::unordered_map GlSharing::glToCLFormats = { {GL_RGBA8, {CL_RGBA, CL_UNORM_INT8}}, {GL_RGBA8I, {CL_RGBA, CL_SIGNED_INT8}}, {GL_RGBA16, {CL_RGBA, CL_UNORM_INT16}}, {GL_RGBA16I, {CL_RGBA, CL_SIGNED_INT16}}, {GL_RGBA32I, {CL_RGBA, CL_SIGNED_INT32}}, {GL_RGBA8UI, {CL_RGBA, CL_UNSIGNED_INT8}}, {GL_RGBA16UI, {CL_RGBA, CL_UNSIGNED_INT16}}, {GL_RGBA32UI, {CL_RGBA, CL_UNSIGNED_INT32}}, {GL_RGBA16F, {CL_RGBA, CL_HALF_FLOAT}}, {GL_RGBA32F, {CL_RGBA, CL_FLOAT}}, {GL_RGBA, {CL_RGBA, CL_UNORM_INT8}}, {GL_RGBA8_SNORM, {CL_RGBA, CL_SNORM_INT8}}, {GL_RGBA16_SNORM, {CL_RGBA, CL_SNORM_INT16}}, {GL_BGRA, {CL_BGRA, CL_UNORM_INT8}}, {GL_R8, {CL_R, CL_UNORM_INT8}}, {GL_R8_SNORM, {CL_R, CL_SNORM_INT8}}, {GL_R16, {CL_R, CL_UNORM_INT16}}, {GL_R16_SNORM, {CL_R, CL_SNORM_INT16}}, {GL_R16F, {CL_R, CL_HALF_FLOAT}}, {GL_R32F, {CL_R, CL_FLOAT}}, {GL_R8I, {CL_R, CL_SIGNED_INT8}}, {GL_R16I, {CL_R, CL_SIGNED_INT16}}, {GL_R32I, {CL_R, CL_SIGNED_INT32}}, {GL_R8UI, {CL_R, CL_UNSIGNED_INT8}}, {GL_R16UI, {CL_R, CL_UNSIGNED_INT16}}, {GL_R32UI, {CL_R, CL_UNSIGNED_INT32}}, {GL_DEPTH_COMPONENT32F, {CL_DEPTH, CL_FLOAT}}, {GL_DEPTH_COMPONENT16, {CL_DEPTH, CL_UNORM_INT16}}, {GL_DEPTH24_STENCIL8, {CL_DEPTH_STENCIL, CL_UNORM_INT24}}, {GL_DEPTH32F_STENCIL8, {CL_DEPTH_STENCIL, CL_FLOAT}}, {GL_SRGB8_ALPHA8, {CL_sRGBA, CL_UNORM_INT8}}, {GL_RG8, {CL_RG, CL_UNORM_INT8}}, {GL_RG8_SNORM, {CL_RG, CL_SNORM_INT8}}, {GL_RG16, {CL_RG, CL_UNORM_INT16}}, {GL_RG16_SNORM, {CL_RG, CL_SNORM_INT16}}, {GL_RG16F, {CL_RG, CL_HALF_FLOAT}}, {GL_RG32F, {CL_RG, CL_FLOAT}}, {GL_RG8I, {CL_RG, CL_SIGNED_INT8}}, {GL_RG16I, {CL_RG, CL_SIGNED_INT16}}, {GL_RG32I, {CL_RG, CL_SIGNED_INT32}}, {GL_RG8UI, {CL_RG, CL_UNSIGNED_INT8}}, {GL_RG16UI, {CL_RG, CL_UNSIGNED_INT16}}, {GL_RG32UI, {CL_RG, CL_UNSIGNED_INT32}}, {GL_RGB10, {CL_RGBA, CL_UNORM_INT16}}}; cl_int GLSharingFunctions::getSupportedFormats(cl_mem_flags flags, cl_mem_object_type imageType, size_t numEntries, cl_GLenum *formats, uint32_t *numImageFormats) { if (flags != CL_MEM_READ_ONLY && flags != CL_MEM_WRITE_ONLY && flags != CL_MEM_READ_WRITE && flags != CL_MEM_KERNEL_READ_AND_WRITE) { return CL_INVALID_VALUE; } if (imageType != CL_MEM_OBJECT_IMAGE1D && imageType != CL_MEM_OBJECT_IMAGE2D && imageType != CL_MEM_OBJECT_IMAGE3D && imageType != CL_MEM_OBJECT_IMAGE1D_ARRAY && imageType != CL_MEM_OBJECT_IMAGE1D_BUFFER && imageType != CL_MEM_OBJECT_IMAGE2D_ARRAY) { return CL_INVALID_VALUE; } const auto formatsCount = GlSharing::glToCLFormats.size(); if (numImageFormats != nullptr) { *numImageFormats = static_cast(formatsCount); } if (formats != nullptr && formatsCount > 0) { const auto elementsToCopy = std::min(numEntries, formatsCount); uint32_t outputFormatsIndex = 0; for (const auto &formatMapping : GlSharing::glToCLFormats) { formats[outputFormatsIndex++] = formatMapping.first; if (outputFormatsIndex == elementsToCopy) { break; } } } return CL_SUCCESS; } int GlSharing::synchronizeHandler(UpdateData &updateData) { GLContextGuard guard(*sharingFunctions); synchronizeObject(updateData); return CL_SUCCESS; } char *createArbSyncEventName() { static std::atomic synchCounter{0}; uint32_t id = synchCounter++; constexpr int maxDigitsForId = std::numeric_limits::digits10; static const char prefix[] = "NEO_SYNC_"; constexpr int nameMaxLen = sizeof(prefix) + maxDigitsForId + 1; char *ret = new char[nameMaxLen]; snprintf(ret, nameMaxLen, "%s_%d", prefix, id); return ret; } void destroyArbSyncEventName(char *name) { delete[] name; } template GLSharingFunctions *Context::getSharing(); } // namespace NEO compute-runtime-22.14.22890/opencl/source/sharings/gl/gl_sharing.h000066400000000000000000000042461422164147700246460ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/os_library.h" #include "opencl/source/sharings/sharing.h" #include "CL/cl.h" #include "GL/gl.h" #include "GL/glext.h" #include #include #include struct _tagCLGLSyncInfo; typedef struct _tagCLGLSyncInfo CL_GL_SYNC_INFO, *PCL_GL_SYNC_INFO; namespace NEO { class Event; class GlArbSyncEvent; class GLSharingFunctions; class OSInterface; class OsContext; typedef unsigned int OS_HANDLE; typedef struct CLGLContextInfo { OS_HANDLE DeviceHandle; OS_HANDLE ContextHandle; } ContextInfo; class GLSharingFunctions : public SharingFunctions { public: GLSharingFunctions() = default; uint32_t getId() const override { return GLSharingFunctions::sharingId; } static const uint32_t sharingId; static cl_int getSupportedFormats(cl_mem_flags flags, cl_mem_object_type imageType, size_t numEntries, cl_GLenum *formats, uint32_t *numImageFormats); virtual GLboolean initGLFunctions() = 0; virtual bool isOpenGlSharingSupported() = 0; }; class GlSharing : public SharingHandler { public: GlSharing(GLSharingFunctions *sharingFunctions, unsigned int glObjectType, unsigned int glObjectId) : sharingFunctions(sharingFunctions), clGlObjectType(glObjectType), clGlObjectId(glObjectId){}; GLSharingFunctions *peekFunctionsHandler() { return sharingFunctions; } void getGlObjectInfo(unsigned int *pClGlObjectType, unsigned int *pClGlObjectId) { if (pClGlObjectType) { *pClGlObjectType = clGlObjectType; } if (pClGlObjectId) { *pClGlObjectId = clGlObjectId; } } static const std::unordered_map glToCLFormats; protected: int synchronizeHandler(UpdateData &updateData) override; GLSharingFunctions *sharingFunctions = nullptr; unsigned int clGlObjectType = 0u; unsigned int clGlObjectId = 0u; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/sharings/gl/gl_sync_event.h000066400000000000000000000013511422164147700253620ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/event/event.h" #include struct _tagGLCLSyncInfo; typedef _tagGLCLSyncInfo GL_CL_SYNC_INFO; namespace NEO { class Context; class GlSyncEvent : public Event { public: GlSyncEvent() = delete; GlSyncEvent(Context &context, const GL_CL_SYNC_INFO &sync); ~GlSyncEvent() override; static GlSyncEvent *create(Context &context, cl_GLsync sync, cl_int *errCode); void updateExecutionStatus() override; uint32_t getTaskLevel() override; bool isExternallySynchronized() const override { return true; } protected: std::unique_ptr glSync; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/sharings/gl/gl_texture.h000066400000000000000000000033201422164147700247030ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/extensions/public/cl_gl_private_intel.h" #include "opencl/source/sharings/gl/gl_sharing.h" #include "CL/cl_gl.h" namespace NEO { class Context; class Image; class GlTexture : GlSharing { public: static Image *createSharedGlTexture(Context *context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture, cl_int *errcodeRet); void synchronizeObject(UpdateData &updateData) override; cl_int getGlTextureInfo(cl_gl_texture_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const; cl_GLint getMiplevel() const { return miplevel; } CL_GL_RESOURCE_INFO *getTextureInfo() { return &textureInfo; } cl_GLenum getTarget() const { return target; } static bool setClImageFormat(int glFormat, cl_image_format &clImgFormat); static cl_mem_object_type getClMemObjectType(cl_GLenum glType); static cl_gl_object_type getClGlObjectType(cl_GLenum glType); static cl_GLenum getBaseTargetType(cl_GLenum target); protected: GlTexture(GLSharingFunctions *sharingFunctions, unsigned int glObjectType, unsigned int glObjectId, CL_GL_RESOURCE_INFO texInfo, cl_GLenum target, cl_GLint miplevel) : GlSharing(sharingFunctions, glObjectType, glObjectId), target(target), miplevel(miplevel), textureInfo(texInfo){}; static uint32_t getClObjectType(cl_GLenum glType, bool returnClGlObjectType); void releaseResource(MemObj *memObject, uint32_t rootDeviceIndex) override; cl_GLenum target; cl_GLint miplevel; CL_GL_RESOURCE_INFO textureInfo; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/sharings/gl/windows/000077500000000000000000000000001422164147700240445ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/sharings/gl/windows/CMakeLists.txt000066400000000000000000000025731422164147700266130ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) set(RUNTIME_SRCS_SHARINGS_GL_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cl_gl_api.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gl_arb_sync_event_windows.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gl_buffer_windows.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gl_context_guard_windows.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gl_library_windows.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gl_sharing_windows.h ${CMAKE_CURRENT_SOURCE_DIR}/gl_sharing_windows.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gl_sync_event_windows.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gl_texture_windows.cpp ${CMAKE_CURRENT_SOURCE_DIR}/include/gl_types.h ${CMAKE_CURRENT_SOURCE_DIR}/win_enable_gl.h ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_SHARINGS_GL_WINDOWS}) set(RUNTIME_SRCS_SHARINGS_GL_ENABLE_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/win_enable_gl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/win_enable_gl.h ) target_sources(${SHARINGS_ENABLE_LIB_NAME} PRIVATE ${RUNTIME_SRCS_SHARINGS_GL_ENABLE_WINDOWS}) endif() set_property(GLOBAL PROPERTY RUNTIME_SRCS_SHARINGS_GL_WINDOWS ${RUNTIME_SRCS_SHARINGS_GL_WINDOWS}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_SHARINGS_GL_ENABLE_WINDOWS ${RUNTIME_SRCS_SHARINGS_GL_ENABLE_WINDOWS}) compute-runtime-22.14.22890/opencl/source/sharings/gl/windows/cl_gl_api.cpp000066400000000000000000000377221422164147700264740ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/get_info.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/os_interface/windows/wddm/wddm.h" #include "shared/source/utilities/api_intercept.h" #include "opencl/source/api/api.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/event/async_events_handler.h" #include "opencl/source/helpers/base_object.h" #include "opencl/source/helpers/cl_validators.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/mem_obj/mem_obj.h" #include "opencl/source/platform/platform.h" #include "opencl/source/sharings/gl/gl_buffer.h" #include "opencl/source/sharings/gl/gl_sync_event.h" #include "opencl/source/sharings/gl/gl_texture.h" #include "opencl/source/sharings/gl/windows/gl_sharing_windows.h" #include "opencl/source/tracing/tracing_notify.h" #include "opencl/source/utilities/cl_logger.h" #include "CL/cl.h" #include "CL/cl_gl.h" #include "config.h" using namespace NEO; cl_mem CL_API_CALL clCreateFromGLBuffer(cl_context context, cl_mem_flags flags, cl_GLuint bufobj, cl_int *errcodeRet) { TRACING_ENTER(clCreateFromGLBuffer, &context, &flags, &bufobj, &errcodeRet); API_ENTER(errcodeRet); DBG_LOG_INPUTS("context", context, "flags", flags, "bufobj", bufobj); Context *pContext = nullptr; auto returnCode = validateObjects(WithCastToInternal(context, &pContext)); ErrorCodeHelper err(errcodeRet, returnCode); if (returnCode != CL_SUCCESS) { cl_mem buffer = nullptr; TRACING_EXIT(clCreateFromGLBuffer, &buffer); return buffer; } if (pContext->getSharing() == nullptr) { err.set(CL_INVALID_CONTEXT); cl_mem buffer = nullptr; TRACING_EXIT(clCreateFromGLBuffer, &buffer); return buffer; } cl_mem buffer = GlBuffer::createSharedGlBuffer(pContext, flags, bufobj, errcodeRet); TRACING_EXIT(clCreateFromGLBuffer, &buffer); return buffer; } cl_mem CL_API_CALL clCreateFromGLTexture(cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture, cl_int *errcodeRet) { TRACING_ENTER(clCreateFromGLTexture, &context, &flags, &target, &miplevel, &texture, &errcodeRet); API_ENTER(errcodeRet); DBG_LOG_INPUTS("context", context, "flags", flags, "target", target, "miplevel", miplevel, "texture", texture); Context *pContext = nullptr; auto returnCode = validateObjects(WithCastToInternal(context, &pContext)); ErrorCodeHelper err(errcodeRet, returnCode); cl_mem image = nullptr; if (returnCode != CL_SUCCESS) { TRACING_EXIT(clCreateFromGLTexture, &image); return image; } if (pContext->getSharing() == nullptr) { err.set(CL_INVALID_CONTEXT); TRACING_EXIT(clCreateFromGLTexture, &image); return image; } image = GlTexture::createSharedGlTexture(pContext, flags, target, miplevel, texture, errcodeRet); DBG_LOG_INPUTS("image", image); TRACING_EXIT(clCreateFromGLTexture, &image); return image; } // deprecated OpenCL 1.1 cl_mem CL_API_CALL clCreateFromGLTexture2D(cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture, cl_int *errcodeRet) { TRACING_ENTER(clCreateFromGLTexture2D, &context, &flags, &target, &miplevel, &texture, &errcodeRet); API_ENTER(errcodeRet); DBG_LOG_INPUTS("context", context, "flags", flags, "target", target, "miplevel", miplevel, "texture", texture); Context *pContext = nullptr; auto returnCode = validateObjects(WithCastToInternal(context, &pContext)); ErrorCodeHelper err(errcodeRet, returnCode); cl_mem image = nullptr; if (returnCode != CL_SUCCESS) { TRACING_EXIT(clCreateFromGLTexture2D, &image); return image; } if (pContext->getSharing() == nullptr) { err.set(CL_INVALID_CONTEXT); TRACING_EXIT(clCreateFromGLTexture2D, &image); return image; } image = GlTexture::createSharedGlTexture(pContext, flags, target, miplevel, texture, errcodeRet); DBG_LOG_INPUTS("image", image); TRACING_EXIT(clCreateFromGLTexture2D, &image); return image; } // deprecated OpenCL 1.1 cl_mem CL_API_CALL clCreateFromGLTexture3D(cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture, cl_int *errcodeRet) { TRACING_ENTER(clCreateFromGLTexture3D, &context, &flags, &target, &miplevel, &texture, &errcodeRet); API_ENTER(errcodeRet); DBG_LOG_INPUTS("context", context, "flags", flags, "target", target, "miplevel", miplevel, "texture", texture); Context *pContext = nullptr; auto returnCode = validateObjects(WithCastToInternal(context, &pContext)); ErrorCodeHelper err(errcodeRet, returnCode); cl_mem image = nullptr; if (returnCode != CL_SUCCESS) { TRACING_EXIT(clCreateFromGLTexture3D, &image); return image; } if (pContext->getSharing() == nullptr) { err.set(CL_INVALID_CONTEXT); TRACING_EXIT(clCreateFromGLTexture3D, &image); return image; } image = GlTexture::createSharedGlTexture(pContext, flags, target, miplevel, texture, errcodeRet); DBG_LOG_INPUTS("image", image); TRACING_EXIT(clCreateFromGLTexture3D, &image); return image; } cl_mem CL_API_CALL clCreateFromGLRenderbuffer(cl_context context, cl_mem_flags flags, cl_GLuint renderbuffer, cl_int *errcodeRet) { TRACING_ENTER(clCreateFromGLRenderbuffer, &context, &flags, &renderbuffer, &errcodeRet); API_ENTER(errcodeRet); DBG_LOG_INPUTS("context", context, "flags", flags, "renderbuffer", renderbuffer); Context *pContext = nullptr; auto returnCode = validateObjects(WithCastToInternal(context, &pContext)); ErrorCodeHelper err(errcodeRet, returnCode); if (returnCode != CL_SUCCESS) { cl_mem buffer = nullptr; TRACING_EXIT(clCreateFromGLRenderbuffer, &buffer); return buffer; } if (pContext->getSharing() == nullptr) { err.set(CL_INVALID_CONTEXT); cl_mem buffer = nullptr; TRACING_EXIT(clCreateFromGLRenderbuffer, &buffer); return buffer; } cl_mem buffer = GlTexture::createSharedGlTexture(pContext, flags, GL_RENDERBUFFER_EXT, 0, renderbuffer, errcodeRet); TRACING_EXIT(clCreateFromGLRenderbuffer, &buffer); return buffer; } cl_int CL_API_CALL clGetGLObjectInfo(cl_mem memobj, cl_gl_object_type *glObjectType, cl_GLuint *glObjectName) { TRACING_ENTER(clGetGLObjectInfo, &memobj, &glObjectType, &glObjectName); cl_int retValue = CL_SUCCESS; API_ENTER(&retValue); DBG_LOG_INPUTS("memobj", memobj, "glObjectType", glObjectType, "glObjectName", glObjectName); retValue = validateObjects(memobj); if (retValue == CL_SUCCESS) { auto pMemObj = castToObject(memobj); auto handler = (GlSharing *)pMemObj->peekSharingHandler(); if (handler != nullptr) { handler->getGlObjectInfo(glObjectType, glObjectName); } else { retValue = CL_INVALID_GL_OBJECT; TRACING_EXIT(clGetGLObjectInfo, &retValue); return retValue; } } TRACING_EXIT(clGetGLObjectInfo, &retValue); return retValue; } cl_int CL_API_CALL clGetGLTextureInfo(cl_mem memobj, cl_gl_texture_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { TRACING_ENTER(clGetGLTextureInfo, &memobj, ¶mName, ¶mValueSize, ¶mValue, ¶mValueSizeRet); cl_int retValue = CL_SUCCESS; API_ENTER(&retValue); DBG_LOG_INPUTS("memobj", memobj, "paramName", paramName, "paramValueSize", paramValueSize, "paramValueSize", FileLoggerInstance().infoPointerToString(paramValue, paramValueSize), "paramValueSizeRet", FileLoggerInstance().getInput(paramValueSizeRet, 0)); retValue = validateObjects(memobj); if (retValue == CL_SUCCESS) { auto pMemObj = castToObject(memobj); auto glTexture = (GlTexture *)pMemObj->peekSharingHandler(); retValue = glTexture->getGlTextureInfo(paramName, paramValueSize, paramValue, paramValueSizeRet); } TRACING_EXIT(clGetGLTextureInfo, &retValue); return retValue; } cl_int CL_API_CALL clEnqueueAcquireGLObjects(cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueAcquireGLObjects, &commandQueue, &numObjects, &memObjects, &numEventsInWaitList, &eventWaitList, &event); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "numObjects", numObjects, "memObjects", memObjects, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", getClFileLogger().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", getClFileLogger().getEvents(reinterpret_cast(event), 1)); CommandQueue *pCommandQueue = nullptr; retVal = validateObjects(WithCastToInternal(commandQueue, &pCommandQueue), EventWaitList(numEventsInWaitList, eventWaitList)); if (retVal == CL_SUCCESS) { if (pCommandQueue->getContext().getSharing() == nullptr) { retVal = CL_INVALID_CONTEXT; TRACING_EXIT(clEnqueueAcquireGLObjects, &retVal); return retVal; } for (auto id = 0u; id < numEventsInWaitList; id++) { auto event = castToObjectOrAbort(eventWaitList[id]); event->updateExecutionStatus(); if ((event->peekExecutionStatus() > CL_COMPLETE) && (event->isExternallySynchronized())) { if (DebugManager.flags.EnableAsyncEventsHandler.get()) { event->getContext()->getAsyncEventsHandler().registerEvent(event); } } } retVal = pCommandQueue->enqueueAcquireSharedObjects(numObjects, memObjects, numEventsInWaitList, eventWaitList, event, CL_COMMAND_ACQUIRE_GL_OBJECTS); } TRACING_EXIT(clEnqueueAcquireGLObjects, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueReleaseGLObjects(cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueReleaseGLObjects, &commandQueue, &numObjects, &memObjects, &numEventsInWaitList, &eventWaitList, &event); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "numObjects", numObjects, "memObjects", memObjects, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", getClFileLogger().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", getClFileLogger().getEvents(reinterpret_cast(event), 1)); CommandQueue *pCommandQueue = nullptr; retVal = validateObjects(WithCastToInternal(commandQueue, &pCommandQueue), EventWaitList(numEventsInWaitList, eventWaitList)); if (retVal == CL_SUCCESS) { if (pCommandQueue->getContext().getSharing() == nullptr) { retVal = CL_INVALID_CONTEXT; TRACING_EXIT(clEnqueueReleaseGLObjects, &retVal); return retVal; } pCommandQueue->finish(); retVal = pCommandQueue->enqueueReleaseSharedObjects(numObjects, memObjects, numEventsInWaitList, eventWaitList, event, CL_COMMAND_RELEASE_GL_OBJECTS); } TRACING_EXIT(clEnqueueReleaseGLObjects, &retVal); return retVal; } cl_event CL_API_CALL clCreateEventFromGLsyncKHR(cl_context context, cl_GLsync sync, cl_int *errcodeRet) { API_ENTER(errcodeRet); DBG_LOG_INPUTS("context", context, "sync", sync); Context *pContext = nullptr; auto returnCode = validateObjects(WithCastToInternal(context, &pContext)); ErrorCodeHelper err(errcodeRet, returnCode); if (returnCode != CL_SUCCESS) { return nullptr; } if (pContext->getSharing() == nullptr) { err.set(CL_INVALID_CONTEXT); return nullptr; } return GlSyncEvent::create(*pContext, sync, errcodeRet); } cl_int CL_API_CALL clGetGLContextInfoKHR(const cl_context_properties *properties, cl_gl_context_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("properties", properties, "paramName", paramName, "paramValueSize", paramValueSize, "paramValue", FileLoggerInstance().infoPointerToString(paramValue, paramValueSize), "paramValueSizeRet", FileLoggerInstance().getInput(paramValueSizeRet, 0)); GetInfoHelper info(paramValue, paramValueSize, paramValueSizeRet); uint32_t GLHGLRCHandle = 0; uint32_t GLHDCHandle = 0; uint32_t propertyType = 0; uint32_t propertyValue = 0; Platform *platform = nullptr; if (properties != nullptr) { while (*properties != 0) { propertyType = static_cast(properties[0]); propertyValue = static_cast(properties[1]); switch (propertyType) { case CL_CONTEXT_PLATFORM: { platform = castToObject(reinterpret_cast(properties[1])); } break; case CL_GL_CONTEXT_KHR: GLHGLRCHandle = propertyValue; break; case CL_WGL_HDC_KHR: GLHDCHandle = propertyValue; break; } properties += 2; } } if ((GLHDCHandle == 0) || (GLHGLRCHandle == 0)) { retVal = CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR; return retVal; } auto glSharing = std::make_unique(); glSharing->initGLFunctions(); if (glSharing->isOpenGlSharingSupported() == false) { retVal = CL_INVALID_CONTEXT; return retVal; } if (paramName == CL_DEVICES_FOR_GL_CONTEXT_KHR || paramName == CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR) { if (!platform) { platform = (*platformsImpl)[0].get(); } ClDevice *deviceToReturn = nullptr; for (auto i = 0u; i < platform->getNumDevices(); i++) { auto device = platform->getClDevice(i); if (device->getRootDeviceEnvironment().osInterface->getDriverModel()->as()->verifyAdapterLuid(glSharing->getAdapterLuid(reinterpret_cast(static_cast(GLHGLRCHandle))))) { deviceToReturn = device; break; } } if (!deviceToReturn) { retVal = CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR; return retVal; } info.set(deviceToReturn); return retVal; } retVal = CL_INVALID_VALUE; return retVal; } cl_int CL_API_CALL clGetSupportedGLTextureFormatsINTEL( cl_context context, cl_mem_flags flags, cl_mem_object_type imageType, cl_uint numEntries, cl_GLenum *glFormats, cl_uint *numTextureFormats) { if (numTextureFormats) { *numTextureFormats = 0; } Context *pContext = castToObjectOrAbort(context); auto pSharing = pContext->getSharing(); if (!pSharing) { return CL_INVALID_CONTEXT; } return pSharing->getSupportedFormats(flags, imageType, numEntries, glFormats, numTextureFormats); } compute-runtime-22.14.22890/opencl/source/sharings/gl/windows/gl_arb_sync_event_windows.cpp000066400000000000000000000252251422164147700320130ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/device/device.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/os_interface/windows/gdi_interface.h" #include "shared/source/os_interface/windows/os_context_win.h" #include "shared/source/os_interface/windows/sys_calls.h" #include "shared/source/os_interface/windows/wddm/wddm.h" #include "opencl/extensions/public/cl_gl_private_intel.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/base_object.h" #include "opencl/source/sharings/gl/gl_arb_sync_event.h" #include "opencl/source/sharings/gl/windows/gl_sharing_windows.h" #include namespace NEO { void destroySync(Gdi &gdi, D3DKMT_HANDLE sync) { if (!sync) { return; } D3DKMT_DESTROYSYNCHRONIZATIONOBJECT destroySyncInfo = {}; destroySyncInfo.hSyncObject = sync; [[maybe_unused]] NTSTATUS status = gdi.destroySynchronizationObject(&destroySyncInfo); DEBUG_BREAK_IF(STATUS_SUCCESS != status); } void destroyEvent(OSInterface &osInterface, HANDLE event) { if (!event) { return; } [[maybe_unused]] auto ret = SysCalls::closeHandle(event); DEBUG_BREAK_IF(TRUE != ret); } void cleanupArbSyncObject(OSInterface &osInterface, CL_GL_SYNC_INFO *glSyncInfo) { if (nullptr == glSyncInfo) { return; } auto gdi = osInterface.getDriverModel()->as()->getGdi(); UNRECOVERABLE_IF(nullptr == gdi); destroySync(*gdi, glSyncInfo->serverSynchronizationObject); destroySync(*gdi, glSyncInfo->clientSynchronizationObject); destroySync(*gdi, glSyncInfo->submissionSynchronizationObject); destroyEvent(osInterface, glSyncInfo->event); destroyEvent(osInterface, glSyncInfo->submissionEvent); destroyArbSyncEventName(glSyncInfo->eventName); destroyArbSyncEventName(glSyncInfo->submissionEventName); } bool setupArbSyncObject(GLSharingFunctions &sharing, OSInterface &osInterface, CL_GL_SYNC_INFO &glSyncInfo) { auto &sharingFunctions = static_cast(sharing); glSyncInfo.hContextToBlock = static_cast(sharingFunctions.getGLContextHandle()); auto glDevice = static_cast(sharingFunctions.getGLDeviceHandle()); auto wddm = osInterface.getDriverModel()->as(); D3DKMT_CREATESYNCHRONIZATIONOBJECT serverSyncInitInfo = {}; serverSyncInitInfo.hDevice = glDevice; serverSyncInitInfo.Info.Type = D3DDDI_SEMAPHORE; serverSyncInitInfo.Info.Semaphore.MaxCount = 32; serverSyncInitInfo.Info.Semaphore.InitialCount = 0; NTSTATUS serverSyncInitStatus = wddm->getGdi()->createSynchronizationObject(&serverSyncInitInfo); glSyncInfo.serverSynchronizationObject = serverSyncInitInfo.hSyncObject; glSyncInfo.eventName = createArbSyncEventName(); glSyncInfo.event = SysCalls::createEvent(nullptr, TRUE, FALSE, glSyncInfo.eventName); D3DKMT_CREATESYNCHRONIZATIONOBJECT2 clientSyncInitInfo = {}; clientSyncInitInfo.hDevice = glDevice; clientSyncInitInfo.Info.Type = D3DDDI_CPU_NOTIFICATION; clientSyncInitInfo.Info.CPUNotification.Event = glSyncInfo.event; NTSTATUS clientSyncInitStatus = wddm->getGdi()->createSynchronizationObject2(&clientSyncInitInfo); glSyncInfo.clientSynchronizationObject = clientSyncInitInfo.hSyncObject; D3DKMT_CREATESYNCHRONIZATIONOBJECT2 submissionSyncEventInfo = {}; glSyncInfo.submissionEventName = createArbSyncEventName(); glSyncInfo.submissionEvent = SysCalls::createEvent(nullptr, TRUE, FALSE, glSyncInfo.submissionEventName); submissionSyncEventInfo.hDevice = glDevice; submissionSyncEventInfo.Info.Type = D3DDDI_CPU_NOTIFICATION; submissionSyncEventInfo.Info.CPUNotification.Event = glSyncInfo.submissionEvent; auto submissionSyncInitStatus = wddm->getGdi()->createSynchronizationObject2(&submissionSyncEventInfo); glSyncInfo.submissionSynchronizationObject = submissionSyncEventInfo.hSyncObject; glSyncInfo.waitCalled = false; bool setupFailed = (glSyncInfo.event == nullptr) || (glSyncInfo.submissionEvent == nullptr) || (STATUS_SUCCESS != serverSyncInitStatus) || (STATUS_SUCCESS != clientSyncInitStatus) || (STATUS_SUCCESS != submissionSyncInitStatus); if (setupFailed) { DEBUG_BREAK_IF(true); cleanupArbSyncObject(osInterface, &glSyncInfo); return false; } return true; } void signalArbSyncObject(OsContext &osContext, CL_GL_SYNC_INFO &glSyncInfo) { auto osContextWin = static_cast(&osContext); UNRECOVERABLE_IF(!osContextWin); auto wddm = osContextWin->getWddm(); D3DKMT_SIGNALSYNCHRONIZATIONOBJECT signalServerClientSyncInfo = {}; signalServerClientSyncInfo.hContext = osContextWin->getWddmContextHandle(); signalServerClientSyncInfo.Flags.SignalAtSubmission = 0; // Wait for GPU to complete processing command buffer signalServerClientSyncInfo.ObjectHandleArray[0] = glSyncInfo.serverSynchronizationObject; signalServerClientSyncInfo.ObjectHandleArray[1] = glSyncInfo.clientSynchronizationObject; signalServerClientSyncInfo.ObjectCount = 2; NTSTATUS status = wddm->getGdi()->signalSynchronizationObject(&signalServerClientSyncInfo); if (STATUS_SUCCESS != status) { DEBUG_BREAK_IF(true); return; } D3DKMT_SIGNALSYNCHRONIZATIONOBJECT signalSubmissionSyncInfo = {}; signalSubmissionSyncInfo.hContext = osContextWin->getWddmContextHandle(); signalSubmissionSyncInfo.Flags.SignalAtSubmission = 1; // Don't wait for GPU to complete processing command buffer signalSubmissionSyncInfo.ObjectHandleArray[0] = glSyncInfo.submissionSynchronizationObject; signalSubmissionSyncInfo.ObjectCount = 1; status = wddm->getGdi()->signalSynchronizationObject(&signalSubmissionSyncInfo); DEBUG_BREAK_IF(STATUS_SUCCESS != status); } void serverWaitForArbSyncObject(OSInterface &osInterface, CL_GL_SYNC_INFO &glSyncInfo) { auto wddm = osInterface.getDriverModel()->as(); D3DKMT_WAITFORSYNCHRONIZATIONOBJECT waitForSyncInfo = {}; waitForSyncInfo.hContext = glSyncInfo.hContextToBlock; waitForSyncInfo.ObjectCount = 1; waitForSyncInfo.ObjectHandleArray[0] = glSyncInfo.serverSynchronizationObject; NTSTATUS status = wddm->getGdi()->waitForSynchronizationObject(&waitForSyncInfo); if (status != STATUS_SUCCESS) { DEBUG_BREAK_IF(true); return; } glSyncInfo.waitCalled = true; } GlArbSyncEvent::GlArbSyncEvent(Context &context) : Event(&context, nullptr, CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR, CompletionStamp::notReady, CompletionStamp::notReady), glSyncInfo(std::make_unique()) { } bool GlArbSyncEvent::setBaseEvent(Event &ev) { UNRECOVERABLE_IF(this->baseEvent != nullptr); UNRECOVERABLE_IF(ev.getContext() == nullptr); UNRECOVERABLE_IF(ev.getCommandQueue() == nullptr); auto cmdQueue = ev.getCommandQueue(); auto osInterface = cmdQueue->getGpgpuCommandStreamReceiver().getOSInterface(); UNRECOVERABLE_IF(osInterface == nullptr); if (false == ctx->getSharing()->glArbSyncObjectSetup(*osInterface, *glSyncInfo)) { return false; } this->baseEvent = &ev; this->cmdQueue = cmdQueue; this->cmdQueue->incRefInternal(); this->baseEvent->incRefInternal(); this->osInterface = osInterface; ev.addChild(*this); return true; } GlArbSyncEvent::~GlArbSyncEvent() { if (baseEvent != nullptr) { ctx->getSharing()->glArbSyncObjectCleanup(*osInterface, glSyncInfo.get()); baseEvent->decRefInternal(); } } GlArbSyncEvent *GlArbSyncEvent::create(Event &baseEvent) { if (baseEvent.getContext() == nullptr) { return nullptr; } auto arbSyncEvent = new GlArbSyncEvent(*baseEvent.getContext()); if (false == arbSyncEvent->setBaseEvent(baseEvent)) { delete arbSyncEvent; arbSyncEvent = nullptr; } return arbSyncEvent; } void GlArbSyncEvent::unblockEventBy(Event &event, uint32_t taskLevel, int32_t transitionStatus) { DEBUG_BREAK_IF(&event != this->baseEvent); if ((transitionStatus > CL_SUBMITTED) || (transitionStatus < 0)) { return; } ctx->getSharing()->glArbSyncObjectSignal(event.getCommandQueue()->getGpgpuCommandStreamReceiver().getOsContext(), *glSyncInfo); ctx->getSharing()->glArbSyncObjectWaitServer(*osInterface, *glSyncInfo); } } // namespace NEO extern "C" CL_API_ENTRY cl_int CL_API_CALL clEnqueueMarkerWithSyncObjectINTEL(cl_command_queue commandQueue, cl_event *event, cl_context *context) { return CL_INVALID_OPERATION; } extern "C" CL_API_ENTRY cl_int CL_API_CALL clGetCLObjectInfoINTEL(cl_mem memObj, void *pResourceInfo) { return CL_INVALID_OPERATION; } extern "C" CL_API_ENTRY cl_int CL_API_CALL clGetCLEventInfoINTEL(cl_event event, PCL_GL_SYNC_INFO *pSyncInfoHandleRet, cl_context *pClContextRet) { if ((nullptr == pSyncInfoHandleRet) || (nullptr == pClContextRet)) { return CL_INVALID_ARG_VALUE; } auto neoEvent = NEO::castToObject(event); if (nullptr == neoEvent) { return CL_INVALID_EVENT; } if (neoEvent->getCommandType() != CL_COMMAND_RELEASE_GL_OBJECTS) { *pSyncInfoHandleRet = nullptr; *pClContextRet = static_cast(neoEvent->getContext()); return CL_SUCCESS; } auto sharing = neoEvent->getContext()->getSharing(); if (sharing == nullptr) { return CL_INVALID_OPERATION; } NEO::GlArbSyncEvent *arbSyncEvent = sharing->getOrCreateGlArbSyncEvent(*neoEvent); if (nullptr == arbSyncEvent) { return CL_OUT_OF_RESOURCES; } neoEvent->updateExecutionStatus(); CL_GL_SYNC_INFO *syncInfo = arbSyncEvent->getSyncInfo(); *pSyncInfoHandleRet = syncInfo; *pClContextRet = static_cast(neoEvent->getContext()); return CL_SUCCESS; } extern "C" CL_API_ENTRY cl_int CL_API_CALL clReleaseGlSharedEventINTEL(cl_event event) { auto neoEvent = NEO::castToObject(event); if (nullptr == neoEvent) { return CL_INVALID_EVENT; } auto arbSyncEvent = neoEvent->getContext()->getSharing()->getGlArbSyncEvent(*neoEvent); neoEvent->getContext()->getSharing()->removeGlArbSyncEventMapping(*neoEvent); if (nullptr != arbSyncEvent) { arbSyncEvent->release(); } neoEvent->release(); return CL_SUCCESS; } compute-runtime-22.14.22890/opencl/source/sharings/gl/windows/gl_buffer_windows.cpp000066400000000000000000000174321422164147700302640ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/gmm.h" #include "shared/source/helpers/get_info.h" #include "shared/source/memory_manager/memory_manager.h" #include "opencl/extensions/public/cl_gl_private_intel.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/context/context.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/sharings/gl/gl_buffer.h" #include "opencl/source/sharings/gl/windows/gl_sharing_windows.h" #include "config.h" using namespace NEO; Buffer *GlBuffer::createSharedGlBuffer(Context *context, cl_mem_flags flags, unsigned int bufferId, cl_int *errcodeRet) { ErrorCodeHelper errorCode(errcodeRet, CL_SUCCESS); CL_GL_BUFFER_INFO bufferInfo = {0}; bufferInfo.bufferName = bufferId; GLSharingFunctionsWindows *sharingFunctions = context->getSharing(); if (sharingFunctions->acquireSharedBufferINTEL(&bufferInfo) == GL_FALSE) { errorCode.set(CL_INVALID_GL_OBJECT); return nullptr; } auto graphicsAllocation = GlBuffer::createGraphicsAllocation(context, bufferId, bufferInfo); if (!graphicsAllocation) { errorCode.set(CL_INVALID_GL_OBJECT); return nullptr; } auto glHandler = new GlBuffer(sharingFunctions, bufferId); auto rootDeviceIndex = graphicsAllocation->getRootDeviceIndex(); auto multiGraphicsAllocation = MultiGraphicsAllocation(rootDeviceIndex); multiGraphicsAllocation.addAllocation(graphicsAllocation); return Buffer::createSharedBuffer(context, flags, glHandler, std::move(multiGraphicsAllocation)); } void GlBuffer::synchronizeObject(UpdateData &updateData) { auto sharingFunctions = static_cast(this->sharingFunctions); CL_GL_BUFFER_INFO bufferInfo = {}; bufferInfo.bufferName = this->clGlObjectId; sharingFunctions->acquireSharedBufferINTEL(&bufferInfo); auto graphicsAllocation = updateData.memObject->getGraphicsAllocation(updateData.rootDeviceIndex); updateData.sharedHandle = bufferInfo.globalShareHandle; updateData.synchronizationStatus = SynchronizeStatus::ACQUIRE_SUCCESFUL; graphicsAllocation->setAllocationOffset(bufferInfo.bufferOffset); const auto currentSharedHandle = graphicsAllocation->peekSharedHandle(); if (currentSharedHandle != updateData.sharedHandle) { updateData.updateData = new CL_GL_BUFFER_INFO(bufferInfo); } } void GlBuffer::resolveGraphicsAllocationChange(osHandle currentSharedHandle, UpdateData *updateData) { const auto memObject = updateData->memObject; if (currentSharedHandle != updateData->sharedHandle) { const auto bufferInfo = std::unique_ptr(static_cast(updateData->updateData)); auto oldGraphicsAllocation = memObject->getGraphicsAllocation(updateData->rootDeviceIndex); popGraphicsAllocationFromReuse(oldGraphicsAllocation); Context *context = memObject->getContext(); auto newGraphicsAllocation = createGraphicsAllocation(context, clGlObjectId, *bufferInfo); if (newGraphicsAllocation == nullptr) { updateData->synchronizationStatus = SynchronizeStatus::SYNCHRONIZE_ERROR; memObject->removeGraphicsAllocation(updateData->rootDeviceIndex); } else { updateData->synchronizationStatus = SynchronizeStatus::ACQUIRE_SUCCESFUL; memObject->resetGraphicsAllocation(newGraphicsAllocation); } if (updateData->synchronizationStatus == SynchronizeStatus::ACQUIRE_SUCCESFUL) { memObject->getGraphicsAllocation(updateData->rootDeviceIndex)->setAllocationOffset(bufferInfo->bufferOffset); } } } void GlBuffer::popGraphicsAllocationFromReuse(GraphicsAllocation *graphicsAllocation) { auto sharingFunctions = static_cast(this->sharingFunctions); std::unique_lock lock(sharingFunctions->mutex); auto &graphicsAllocations = sharingFunctions->graphicsAllocationsForGlBufferReuse; auto foundIter = std::find_if(graphicsAllocations.begin(), graphicsAllocations.end(), [&graphicsAllocation](const std::pair &entry) { return entry.second == graphicsAllocation; }); if (foundIter != graphicsAllocations.end()) { std::iter_swap(foundIter, graphicsAllocations.end() - 1); graphicsAllocations.pop_back(); } graphicsAllocation->decReuseCount(); } void GlBuffer::releaseReusedGraphicsAllocation() { auto sharingFunctions = static_cast(this->sharingFunctions); std::unique_lock lock(sharingFunctions->mutex); auto &allocationsVector = sharingFunctions->graphicsAllocationsForGlBufferReuse; auto itEnd = allocationsVector.end(); for (auto it = allocationsVector.begin(); it != itEnd; it++) { if (it->first == clGlObjectId) { it->second->decReuseCount(); if (it->second->peekReuseCount() == 0) { std::iter_swap(it, itEnd - 1); allocationsVector.pop_back(); } break; } } } GraphicsAllocation *GlBuffer::createGraphicsAllocation(Context *context, unsigned int bufferId, _tagCLGLBufferInfo &bufferInfo) { GLSharingFunctionsWindows *sharingFunctions = context->getSharing(); auto &allocationsVector = sharingFunctions->graphicsAllocationsForGlBufferReuse; GraphicsAllocation *graphicsAllocation = nullptr; bool reusedAllocation = false; std::unique_lock lock(sharingFunctions->mutex); auto endIter = allocationsVector.end(); auto foundIter = std::find_if(allocationsVector.begin(), endIter, [&bufferId](const std::pair &entry) { return entry.first == bufferId; }); if (foundIter != endIter) { graphicsAllocation = foundIter->second; reusedAllocation = true; } if (!graphicsAllocation) { AllocationProperties properties = {context->getDevice(0)->getRootDeviceIndex(), false, // allocateMemory 0u, // size AllocationType::SHARED_BUFFER, false, // isMultiStorageAllocation context->getDeviceBitfieldForAllocation(context->getDevice(0)->getRootDeviceIndex())}; // couldn't find allocation for reuse - create new graphicsAllocation = context->getMemoryManager()->createGraphicsAllocationFromSharedHandle(bufferInfo.globalShareHandle, properties, true, false); } if (!graphicsAllocation) { return nullptr; } graphicsAllocation->incReuseCount(); // decremented in releaseReusedGraphicsAllocation() called from MemObj destructor if (!reusedAllocation) { sharingFunctions->graphicsAllocationsForGlBufferReuse.push_back(std::make_pair(bufferId, graphicsAllocation)); if (bufferInfo.pGmmResInfo) { DEBUG_BREAK_IF(graphicsAllocation->getDefaultGmm() != nullptr); auto clientContext = context->getDevice(0)->getRootDeviceEnvironment().getGmmClientContext(); graphicsAllocation->setDefaultGmm(new Gmm(clientContext, bufferInfo.pGmmResInfo)); } } return graphicsAllocation; } void GlBuffer::releaseResource(MemObj *memObject, uint32_t rootDeviceIndex) { auto sharingFunctions = static_cast(this->sharingFunctions); CL_GL_BUFFER_INFO bufferInfo = {}; bufferInfo.bufferName = this->clGlObjectId; sharingFunctions->releaseSharedBufferINTEL(&bufferInfo); } compute-runtime-22.14.22890/opencl/source/sharings/gl/windows/gl_context_guard_windows.cpp000066400000000000000000000024121422164147700316510ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/sharings/gl/gl_context_guard.h" #include "opencl/source/sharings/gl/windows/gl_sharing_windows.h" namespace NEO { GLContextGuard::GLContextGuard(GLSharingFunctions &sharingFcns) : sharingFunctions(&sharingFcns) { auto &sharing = *static_cast(sharingFunctions); currentContextHandle = sharing.getCurrentContext(); currentDisplayHandle = sharing.getCurrentDisplay(); auto ctxToMakeCurrent = sharing.getContextHandle(); if (currentContextHandle == 0) { ctxToMakeCurrent = sharing.getBackupContextHandle(); } if (currentContextHandle != sharing.getContextHandle() && currentContextHandle != sharing.getBackupContextHandle()) { if (sharing.makeCurrent(ctxToMakeCurrent) == GL_FALSE) { while (sharing.makeCurrent(sharing.getBackupContextHandle()) == GL_FALSE) { ; } } } } GLContextGuard::~GLContextGuard() { auto &sharing = *static_cast(sharingFunctions); if (currentContextHandle != sharing.getContextHandle()) { sharing.makeCurrent(currentContextHandle, currentDisplayHandle); } } } // namespace NEO compute-runtime-22.14.22890/opencl/source/sharings/gl/windows/gl_library_windows.cpp000066400000000000000000000002541422164147700304510ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include namespace Os { const char *openglDllName = "opengl32.dll"; } compute-runtime-22.14.22890/opencl/source/sharings/gl/windows/gl_sharing_windows.cpp000066400000000000000000000136711422164147700304470ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/sharings/gl/windows/gl_sharing_windows.h" #include "opencl/source/context/context.inl" #include "opencl/source/helpers/windows/gl_helper.h" #include "opencl/source/sharings/gl/gl_arb_sync_event.h" namespace NEO { GLSharingFunctionsWindows::GLSharingFunctionsWindows(GLType glhdcType, GLContext glhglrcHandle, GLContext glhglrcHandleBkpCtx, GLDisplay glhdcHandle) : GLHDCType(glhdcType), GLHGLRCHandle(glhglrcHandle), GLHGLRCHandleBkpCtx(glhglrcHandleBkpCtx), GLHDCHandle(glhdcHandle) { initGLFunctions(); updateOpenGLContext(); createBackupContext(); } GLSharingFunctionsWindows::~GLSharingFunctionsWindows() { if (pfnWglDeleteContext) { pfnWglDeleteContext(GLHGLRCHandleBkpCtx); } } bool GLSharingFunctionsWindows::isGlSharingEnabled() { static bool oglLibAvailable = std::unique_ptr(OsLibrary::load(Os::openglDllName)).get() != nullptr; return oglLibAvailable; } void GLSharingFunctionsWindows::createBackupContext() { if (pfnWglCreateContext) { GLHGLRCHandleBkpCtx = pfnWglCreateContext(GLHDCHandle); pfnWglShareLists(GLHGLRCHandle, GLHGLRCHandleBkpCtx); } } GLboolean GLSharingFunctionsWindows::setSharedOCLContextState() { ContextInfo CtxInfo = {0}; GLboolean retVal = GLSetSharedOCLContextState(GLHDCHandle, GLHGLRCHandle, CL_TRUE, &CtxInfo); if (retVal == GL_FALSE) { return GL_FALSE; } GLContextHandle = CtxInfo.ContextHandle; GLDeviceHandle = CtxInfo.DeviceHandle; return retVal; } bool GLSharingFunctionsWindows::isOpenGlExtensionSupported(const unsigned char *pExtensionString) { bool LoadedNull = (glGetStringi == nullptr) || (glGetIntegerv == nullptr); if (LoadedNull) { return false; } cl_int NumberOfExtensions = 0; glGetIntegerv(GL_NUM_EXTENSIONS, &NumberOfExtensions); for (cl_int i = 0; i < NumberOfExtensions; i++) { std::basic_string pString = glGetStringi(GL_EXTENSIONS, i); if (pString == pExtensionString) { return true; } } return false; } bool GLSharingFunctionsWindows::isOpenGlSharingSupported() { std::basic_string Vendor = glGetString(GL_VENDOR); const unsigned char intelVendor[] = "Intel"; if ((Vendor.empty()) || (Vendor != intelVendor)) { return false; } std::basic_string Version = glGetString(GL_VERSION); if (Version.empty()) { return false; } bool IsOpenGLES = false; const unsigned char versionES[] = "OpenGL ES"; if (Version.find(versionES) != std::string::npos) { IsOpenGLES = true; } if (IsOpenGLES == true) { const unsigned char versionES1[] = "OpenGL ES 1."; if (Version.find(versionES1) != std::string::npos) { const unsigned char supportGLOES[] = "GL_OES_framebuffer_object"; if (isOpenGlExtensionSupported(supportGLOES) == false) { return false; } } } else { if (Version[0] < '3') { const unsigned char supportGLEXT[] = "GL_EXT_framebuffer_object"; if (isOpenGlExtensionSupported(supportGLEXT) == false) { return false; } } } return true; } GlArbSyncEvent *GLSharingFunctionsWindows::getGlArbSyncEvent(Event &baseEvent) { std::lock_guard lock{glArbEventMutex}; auto it = glArbEventMapping.find(&baseEvent); if (it != glArbEventMapping.end()) { return it->second; } return nullptr; } void GLSharingFunctionsWindows::removeGlArbSyncEventMapping(Event &baseEvent) { std::lock_guard lock{glArbEventMutex}; auto it = glArbEventMapping.find(&baseEvent); if (it == glArbEventMapping.end()) { DEBUG_BREAK_IF(it == glArbEventMapping.end()); return; } glArbEventMapping.erase(it); } GLboolean GLSharingFunctionsWindows::initGLFunctions() { glLibrary.reset(OsLibrary::load(Os::openglDllName)); if (glLibrary->isLoaded()) { glFunctionHelper wglLibrary(glLibrary.get(), "wglGetProcAddress"); GLGetCurrentContext = (*glLibrary)["wglGetCurrentContext"]; GLGetCurrentDisplay = (*glLibrary)["wglGetCurrentDC"]; glGetString = (*glLibrary)["glGetString"]; glGetIntegerv = (*glLibrary)["glGetIntegerv"]; pfnWglCreateContext = (*glLibrary)["wglCreateContext"]; pfnWglDeleteContext = (*glLibrary)["wglDeleteContext"]; pfnWglShareLists = (*glLibrary)["wglShareLists"]; wglMakeCurrent = (*glLibrary)["wglMakeCurrent"]; GLSetSharedOCLContextState = wglLibrary["wglSetSharedOCLContextStateINTEL"]; GLAcquireSharedBuffer = wglLibrary["wglAcquireSharedBufferINTEL"]; GLReleaseSharedBuffer = wglLibrary["wglReleaseSharedBufferINTEL"]; GLAcquireSharedRenderBuffer = wglLibrary["wglAcquireSharedRenderBufferINTEL"]; GLReleaseSharedRenderBuffer = wglLibrary["wglReleaseSharedRenderBufferINTEL"]; GLAcquireSharedTexture = wglLibrary["wglAcquireSharedTextureINTEL"]; GLReleaseSharedTexture = wglLibrary["wglReleaseSharedTextureINTEL"]; GLRetainSync = wglLibrary["wglRetainSyncINTEL"]; GLReleaseSync = wglLibrary["wglReleaseSyncINTEL"]; GLGetSynciv = wglLibrary["wglGetSyncivINTEL"]; glGetStringi = wglLibrary["glGetStringi"]; glGetLuid = wglLibrary["wglGetLuidINTEL"]; } this->pfnGlArbSyncObjectCleanup = cleanupArbSyncObject; this->pfnGlArbSyncObjectSetup = setupArbSyncObject; this->pfnGlArbSyncObjectSignal = signalArbSyncObject; this->pfnGlArbSyncObjectWaitServer = serverWaitForArbSyncObject; return 1; } LUID GLSharingFunctionsWindows::getAdapterLuid(GLContext glhglrcHandle) const { if (glGetLuid) { return glGetLuid(glhglrcHandle); } return {}; } template GLSharingFunctionsWindows *Context::getSharing(); } // namespace NEO compute-runtime-22.14.22890/opencl/source/sharings/gl/windows/gl_sharing_windows.h000066400000000000000000000221371422164147700301110ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/sharings/gl/gl_sharing.h" #include "gl_types.h" #include namespace NEO { //OpenGL API names typedef GLboolean(OSAPI *PFNOGLSetSharedOCLContextStateINTEL)(GLDisplay hdcHandle, GLContext contextHandle, GLboolean state, GLvoid *pContextInfo); typedef GLboolean(OSAPI *PFNOGLAcquireSharedBufferINTEL)(GLDisplay hdcHandle, GLContext contextHandle, GLContext backupContextHandle, GLvoid *pBufferInfo); typedef GLboolean(OSAPI *PFNOGLAcquireSharedRenderBufferINTEL)(GLDisplay hdcHandle, GLContext contextHandle, GLContext backupContextHandle, GLvoid *pResourceInfo); typedef GLboolean(OSAPI *PFNOGLAcquireSharedTextureINTEL)(GLDisplay hdcHandle, GLContext contextHandle, GLContext backupContextHandle, GLvoid *pResourceInfo); typedef GLboolean(OSAPI *PFNOGLReleaseSharedBufferINTEL)(GLDisplay hdcHandle, GLContext contextHandle, GLContext backupContextHandle, GLvoid *pBufferInfo); typedef GLboolean(OSAPI *PFNOGLReleaseSharedRenderBufferINTEL)(GLDisplay hdcHandle, GLContext contextHandle, GLContext backupContextHandle, GLvoid *pResourceInfo); typedef GLboolean(OSAPI *PFNOGLReleaseSharedTextureINTEL)(GLDisplay hdcHandle, GLContext contextHandle, GLContext backupContextHandle, GLvoid *pResourceInfo); typedef GLContext(OSAPI *PFNOGLGetCurrentContext)(); typedef GLDisplay(OSAPI *PFNOGLGetCurrentDisplay)(); typedef GLboolean(OSAPI *PFNOGLMakeCurrent)(GLDisplay hdcHandle, void *draw, void *read, GLContext contextHandle); typedef GLboolean(OSAPI *PFNOGLRetainSyncINTEL)(GLDisplay hdcHandle, GLContext contextHandle, GLContext backupContextHandle, GLvoid *pSyncInfo); typedef GLboolean(OSAPI *PFNOGLReleaseSyncINTEL)(GLDisplay hdcHandle, GLContext contextHandle, GLContext backupContextHandle, GLvoid *pSync); typedef void(OSAPI *PFNOGLGetSyncivINTEL)(GLvoid *pSync, GLenum pname, GLint *value); typedef const GLubyte *(OSAPI *PFNglGetString)(GLenum name); typedef const GLubyte *(OSAPI *PFNglGetStringi)(GLenum name, GLuint index); typedef void(OSAPI *PFNglGetIntegerv)(GLenum pname, GLint *params); typedef void(OSAPI *PFNglBindTexture)(GLenum target, GLuint texture); //wgl typedef BOOL(OSAPI *PFNwglMakeCurrent)(HDC, HGLRC); typedef GLContext(OSAPI *PFNwglCreateContext)(GLDisplay hdcHandle); typedef int(OSAPI *PFNwglShareLists)(GLContext contextHandle, GLContext backupContextHandle); typedef BOOL(OSAPI *PFNwglDeleteContext)(HGLRC hglrcHandle); typedef bool (*PFNglArbSyncObjectSetup)(GLSharingFunctions &sharing, OSInterface &osInterface, CL_GL_SYNC_INFO &glSyncInfo); typedef void (*PFNglArbSyncObjectCleanup)(OSInterface &osInterface, CL_GL_SYNC_INFO *glSyncInfo); typedef void (*PFNglArbSyncObjectSignal)(OsContext &osContext, CL_GL_SYNC_INFO &glSyncInfo); typedef void (*PFNglArbSyncObjectWaitServer)(OSInterface &osInterface, CL_GL_SYNC_INFO &glSyncInfo); typedef LUID(OSAPI *PFNGLGETLUIDINTEL)(HGLRC hglrcHandle); class GLSharingFunctionsWindows : public GLSharingFunctions { public: GLSharingFunctionsWindows() = default; GLSharingFunctionsWindows(GLType glhdcType, GLContext glhglrcHandle, GLContext glhglrcHandleBkpCtx, GLDisplay glhdcHandle); ~GLSharingFunctionsWindows() override; OS_HANDLE getGLDeviceHandle() const { return GLDeviceHandle; } OS_HANDLE getGLContextHandle() const { return GLContextHandle; } GLboolean initGLFunctions() override; bool isOpenGlSharingSupported() override; static bool isGlSharingEnabled(); // Arb sync event template auto getOrCreateGlArbSyncEvent(Event &baseEvent) -> decltype(EventType::create(baseEvent)); GlArbSyncEvent *getGlArbSyncEvent(Event &baseEvent); void removeGlArbSyncEventMapping(Event &baseEvent); // Gl functions GLboolean acquireSharedBufferINTEL(GLvoid *pBufferInfo) { return GLAcquireSharedBuffer(GLHDCHandle, GLHGLRCHandle, GLHGLRCHandleBkpCtx, pBufferInfo); } GLboolean releaseSharedBufferINTEL(GLvoid *pBufferInfo) { return GLReleaseSharedBuffer(GLHDCHandle, GLHGLRCHandle, GLHGLRCHandleBkpCtx, pBufferInfo); } GLboolean acquireSharedRenderBuffer(GLvoid *pResourceInfo) { return GLAcquireSharedRenderBuffer(GLHDCHandle, GLHGLRCHandle, GLHGLRCHandleBkpCtx, pResourceInfo); } GLboolean releaseSharedRenderBuffer(GLvoid *pResourceInfo) { return GLReleaseSharedRenderBuffer(GLHDCHandle, GLHGLRCHandle, GLHGLRCHandleBkpCtx, pResourceInfo); } GLboolean acquireSharedTexture(GLvoid *pResourceInfo) { return GLAcquireSharedTexture(GLHDCHandle, GLHGLRCHandle, GLHGLRCHandleBkpCtx, pResourceInfo); } GLboolean releaseSharedTexture(GLvoid *pResourceInfo) { return GLReleaseSharedTexture(GLHDCHandle, GLHGLRCHandle, GLHGLRCHandleBkpCtx, pResourceInfo); } GLboolean retainSync(GLvoid *pSyncInfo) { return GLRetainSync(GLHDCHandle, GLHGLRCHandle, GLHGLRCHandleBkpCtx, pSyncInfo); } GLboolean releaseSync(GLvoid *pSync) { return GLReleaseSync(GLHDCHandle, GLHGLRCHandle, GLHGLRCHandleBkpCtx, pSync); } void getSynciv(GLvoid *pSync, GLenum pname, GLint *value) { return GLGetSynciv(pSync, pname, value); } GLContext getCurrentContext() { return GLGetCurrentContext(); } GLDisplay getCurrentDisplay() { return GLGetCurrentDisplay(); } GLboolean makeCurrent(GLContext contextHandle, GLDisplay displayHandle = 0) { if (displayHandle == 0) { displayHandle = GLHDCHandle; } return this->wglMakeCurrent(displayHandle, contextHandle); } GLContext getBackupContextHandle() { return GLHGLRCHandleBkpCtx; } GLContext getContextHandle() { return GLHGLRCHandle; } bool glArbSyncObjectSetup(OSInterface &osInterface, CL_GL_SYNC_INFO &glSyncInfo) { return pfnGlArbSyncObjectSetup(*this, osInterface, glSyncInfo); } void glArbSyncObjectCleanup(OSInterface &osInterface, CL_GL_SYNC_INFO *glSyncInfo) { pfnGlArbSyncObjectCleanup(osInterface, glSyncInfo); } void glArbSyncObjectSignal(OsContext &osContext, CL_GL_SYNC_INFO &glSyncInfo) { pfnGlArbSyncObjectSignal(osContext, glSyncInfo); } void glArbSyncObjectWaitServer(OSInterface &osInterface, CL_GL_SYNC_INFO &glSyncInfo) { pfnGlArbSyncObjectWaitServer(osInterface, glSyncInfo); } LUID getAdapterLuid(GLContext glhglrcHandle) const; // Buffer reuse std::mutex mutex; std::vector> graphicsAllocationsForGlBufferReuse; protected: void updateOpenGLContext() { if (GLSetSharedOCLContextState) { setSharedOCLContextState(); } } GLboolean setSharedOCLContextState(); void createBackupContext(); bool isOpenGlExtensionSupported(const unsigned char *pExtentionString); // Handles GLType GLHDCType = 0; GLContext GLHGLRCHandle = 0; GLContext GLHGLRCHandleBkpCtx = 0; GLDisplay GLHDCHandle = 0; OS_HANDLE GLDeviceHandle = 0; OS_HANDLE GLContextHandle = 0; // GL functions std::unique_ptr glLibrary; PFNOGLSetSharedOCLContextStateINTEL GLSetSharedOCLContextState = nullptr; PFNOGLAcquireSharedBufferINTEL GLAcquireSharedBuffer = nullptr; PFNOGLReleaseSharedBufferINTEL GLReleaseSharedBuffer = nullptr; PFNOGLAcquireSharedRenderBufferINTEL GLAcquireSharedRenderBuffer = nullptr; PFNOGLReleaseSharedRenderBufferINTEL GLReleaseSharedRenderBuffer = nullptr; PFNOGLAcquireSharedTextureINTEL GLAcquireSharedTexture = nullptr; PFNOGLReleaseSharedTextureINTEL GLReleaseSharedTexture = nullptr; PFNOGLGetCurrentContext GLGetCurrentContext = nullptr; PFNOGLGetCurrentDisplay GLGetCurrentDisplay = nullptr; PFNglGetString glGetString = nullptr; PFNglGetStringi glGetStringi = nullptr; PFNglGetIntegerv glGetIntegerv = nullptr; PFNwglCreateContext pfnWglCreateContext = nullptr; PFNwglMakeCurrent wglMakeCurrent = nullptr; PFNwglShareLists pfnWglShareLists = nullptr; PFNwglDeleteContext pfnWglDeleteContext = nullptr; PFNOGLRetainSyncINTEL GLRetainSync = nullptr; PFNOGLReleaseSyncINTEL GLReleaseSync = nullptr; PFNOGLGetSyncivINTEL GLGetSynciv = nullptr; PFNglArbSyncObjectSetup pfnGlArbSyncObjectSetup = nullptr; PFNglArbSyncObjectCleanup pfnGlArbSyncObjectCleanup = nullptr; PFNglArbSyncObjectSignal pfnGlArbSyncObjectSignal = nullptr; PFNglArbSyncObjectWaitServer pfnGlArbSyncObjectWaitServer = nullptr; PFNGLGETLUIDINTEL glGetLuid = nullptr; // support for GL_ARB_cl_event std::mutex glArbEventMutex; std::unordered_map glArbEventMapping; }; template inline auto GLSharingFunctionsWindows::getOrCreateGlArbSyncEvent(Event &baseEvent) -> decltype(EventType::create(baseEvent)) { std::lock_guard lock{glArbEventMutex}; auto it = glArbEventMapping.find(&baseEvent); if (it != glArbEventMapping.end()) { return it->second; } auto arbEvent = EventType::create(baseEvent); if (nullptr == arbEvent) { return arbEvent; } glArbEventMapping[&baseEvent] = arbEvent; return arbEvent; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/sharings/gl/windows/gl_sync_event_windows.cpp000066400000000000000000000041641422164147700311660ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/device/device.h" #include "shared/source/helpers/get_info.h" #include "shared/source/helpers/timestamp_packet.h" #include "opencl/extensions/public/cl_gl_private_intel.h" #include "opencl/source/context/context.h" #include "opencl/source/event/async_events_handler.h" #include "opencl/source/event/event_builder.h" #include "opencl/source/platform/platform.h" #include "opencl/source/sharings/gl/gl_context_guard.h" #include "opencl/source/sharings/gl/gl_sync_event.h" #include "opencl/source/sharings/gl/windows/gl_sharing_windows.h" namespace NEO { GlSyncEvent::GlSyncEvent(Context &context, const GL_CL_SYNC_INFO &sync) : Event(&context, nullptr, CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR, CompletionStamp::notReady, CompletionStamp::notReady), glSync(std::make_unique(sync)) { transitionExecutionStatus(CL_SUBMITTED); } GlSyncEvent::~GlSyncEvent() { ctx->getSharing()->releaseSync(glSync->pSync); } GlSyncEvent *GlSyncEvent::create(Context &context, cl_GLsync sync, cl_int *errCode) { GLContextGuard guard(*context.getSharing()); ErrorCodeHelper err(errCode, CL_SUCCESS); GL_CL_SYNC_INFO syncInfo = {sync, nullptr}; context.getSharing()->retainSync(&syncInfo); DEBUG_BREAK_IF(!syncInfo.pSync); EventBuilder eventBuilder; eventBuilder.create(context, syncInfo); return static_cast(eventBuilder.finalizeAndRelease()); } void GlSyncEvent::updateExecutionStatus() { GLContextGuard guard(*ctx->getSharing()); int retVal = 0; ctx->getSharing()->getSynciv(glSync->pSync, GL_SYNC_STATUS, &retVal); if (retVal == GL_SIGNALED) { setStatus(CL_COMPLETE); } } uint32_t GlSyncEvent::getTaskLevel() { if (peekExecutionStatus() == CL_COMPLETE) { return 0; } return CompletionStamp::notReady; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/sharings/gl/windows/gl_texture_windows.cpp000066400000000000000000000271121422164147700305070ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/gmm_helper/resource_info.h" #include "shared/source/helpers/get_info.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/hw_info_config.h" #include "opencl/extensions/public/cl_gl_private_intel.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/gmm_types_converter.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/sharings/gl/gl_texture.h" #include "opencl/source/sharings/gl/windows/gl_sharing_windows.h" #include "CL/cl_gl.h" #include "config.h" #include namespace NEO { Image *GlTexture::createSharedGlTexture(Context *context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture, cl_int *errcodeRet) { ErrorCodeHelper errorCode(errcodeRet, CL_INVALID_GL_OBJECT); auto clientContext = context->getDevice(0)->getRootDeviceEnvironment().getGmmClientContext(); auto memoryManager = context->getMemoryManager(); cl_image_desc imgDesc = {}; cl_image_format imgFormat = {}; McsSurfaceInfo mcsSurfaceInfo = {}; CL_GL_RESOURCE_INFO texInfo = {}; texInfo.name = texture; texInfo.target = getBaseTargetType(target); GLSharingFunctionsWindows *sharingFunctions = context->getSharing(); if (target == GL_RENDERBUFFER_EXT) { sharingFunctions->acquireSharedRenderBuffer(&texInfo); } else { sharingFunctions->acquireSharedTexture(&texInfo); } errorCode.set(CL_SUCCESS); AllocationProperties allocProperties(context->getDevice(0)->getRootDeviceIndex(), false, // allocateMemory 0u, // size AllocationType::SHARED_IMAGE, false, // isMultiStorageAllocation context->getDeviceBitfieldForAllocation(context->getDevice(0)->getRootDeviceIndex())); auto alloc = memoryManager->createGraphicsAllocationFromSharedHandle(texInfo.globalShareHandle, allocProperties, false, false); if (alloc == nullptr) { errorCode.set(CL_INVALID_GL_OBJECT); return nullptr; } if (texInfo.pGmmResInfo) { DEBUG_BREAK_IF(alloc->getDefaultGmm() != nullptr); alloc->setDefaultGmm(new Gmm(clientContext, texInfo.pGmmResInfo)); } auto gmm = alloc->getDefaultGmm(); imgDesc.image_type = getClMemObjectType(target); if (target == GL_TEXTURE_BUFFER) { imgDesc.image_width = texInfo.textureBufferWidth; imgDesc.image_row_pitch = texInfo.textureBufferSize; } else { imgDesc.image_width = gmm->gmmResourceInfo->getBaseWidth(); imgDesc.image_row_pitch = gmm->gmmResourceInfo->getRenderPitch(); if (imgDesc.image_row_pitch == 0) { size_t alignedWidth = alignUp(imgDesc.image_width, gmm->gmmResourceInfo->getHAlign()); size_t bpp = gmm->gmmResourceInfo->getBitsPerPixel() >> 3; imgDesc.image_row_pitch = alignedWidth * bpp; } } uint32_t numSamples = static_cast(gmm->gmmResourceInfo->getNumSamples()); imgDesc.num_samples = getValidParam(numSamples, 0u, 1u); imgDesc.image_height = gmm->gmmResourceInfo->getBaseHeight(); imgDesc.image_array_size = gmm->gmmResourceInfo->getArraySize(); if (target == GL_TEXTURE_3D) { imgDesc.image_depth = gmm->gmmResourceInfo->getBaseDepth(); } if (imgDesc.image_array_size > 1 || imgDesc.image_depth > 1) { GMM_REQ_OFFSET_INFO GMMReqInfo = {}; GMMReqInfo.ArrayIndex = imgDesc.image_array_size > 1 ? 1 : 0; GMMReqInfo.Slice = imgDesc.image_depth > 1 ? 1 : 0; GMMReqInfo.ReqLock = 1; gmm->gmmResourceInfo->getOffset(GMMReqInfo); imgDesc.image_slice_pitch = GMMReqInfo.Lock.Offset; } else { imgDesc.image_slice_pitch = alloc->getUnderlyingBufferSize(); } uint32_t cubeFaceIndex = GmmTypesConverter::getCubeFaceIndex(target); auto qPitch = gmm->queryQPitch(gmm->gmmResourceInfo->getResourceType()); if (setClImageFormat(texInfo.glInternalFormat, imgFormat) == false) { memoryManager->freeGraphicsMemory(alloc); errorCode.set(CL_INVALID_GL_OBJECT); return nullptr; } auto surfaceFormatInfoAddress = Image::getSurfaceFormatFromTable(flags, &imgFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); if (!surfaceFormatInfoAddress) { memoryManager->freeGraphicsMemory(alloc); errorCode.set(CL_INVALID_GL_OBJECT); return nullptr; } auto surfaceFormatInfo = *surfaceFormatInfoAddress; if (texInfo.glInternalFormat != GL_RGB10) { surfaceFormatInfo.surfaceFormat.GenxSurfaceFormat = (GFX3DSTATE_SURFACEFORMAT)texInfo.glHWFormat; } GraphicsAllocation *mcsAlloc = nullptr; if (texInfo.globalShareHandleMCS) { AllocationProperties allocProperties(context->getDevice(0)->getRootDeviceIndex(), 0, AllocationType::MCS, context->getDeviceBitfieldForAllocation(context->getDevice(0)->getRootDeviceIndex())); mcsAlloc = memoryManager->createGraphicsAllocationFromSharedHandle(texInfo.globalShareHandleMCS, allocProperties, false, false); if (texInfo.pGmmResInfoMCS) { DEBUG_BREAK_IF(mcsAlloc->getDefaultGmm() != nullptr); mcsAlloc->setDefaultGmm(new Gmm(clientContext, texInfo.pGmmResInfoMCS)); } mcsSurfaceInfo.pitch = getValidParam(static_cast(mcsAlloc->getDefaultGmm()->gmmResourceInfo->getRenderPitch() / 128)); mcsSurfaceInfo.qPitch = mcsAlloc->getDefaultGmm()->gmmResourceInfo->getQPitch(); } mcsSurfaceInfo.multisampleCount = GmmTypesConverter::getRenderMultisamplesCount(static_cast(imgDesc.num_samples)); if (miplevel < 0) { imgDesc.num_mip_levels = gmm->gmmResourceInfo->getMaxLod() + 1; } ImageInfo imgInfo = {}; imgInfo.imgDesc = Image::convertDescriptor(imgDesc); imgInfo.surfaceFormat = &surfaceFormatInfo.surfaceFormat; imgInfo.qPitch = qPitch; auto glTexture = new GlTexture(sharingFunctions, getClGlObjectType(target), texture, texInfo, target, std::max(miplevel, 0)); if (texInfo.isAuxEnabled && alloc->getDefaultGmm()->unifiedAuxTranslationCapable()) { const auto &hwInfo = context->getDevice(0)->getHardwareInfo(); const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily); alloc->getDefaultGmm()->isCompressionEnabled = hwInfoConfig.isPageTableManagerSupported(hwInfo) ? memoryManager->mapAuxGpuVA(alloc) : true; } auto multiGraphicsAllocation = MultiGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex()); multiGraphicsAllocation.addAllocation(alloc); return Image::createSharedImage(context, glTexture, mcsSurfaceInfo, std::move(multiGraphicsAllocation), mcsAlloc, flags, 0, &surfaceFormatInfo, imgInfo, cubeFaceIndex, std::max(miplevel, 0), imgInfo.imgDesc.numMipLevels); } // namespace NEO void GlTexture::synchronizeObject(UpdateData &updateData) { auto sharingFunctions = static_cast(this->sharingFunctions); CL_GL_RESOURCE_INFO resourceInfo = {0}; resourceInfo.name = this->clGlObjectId; if (target == GL_RENDERBUFFER_EXT) { sharingFunctions->acquireSharedRenderBuffer(&resourceInfo); } else { sharingFunctions->acquireSharedTexture(&resourceInfo); // Set texture buffer offset acquired from OpenGL layer in graphics allocation updateData.memObject->getGraphicsAllocation(updateData.rootDeviceIndex)->setAllocationOffset(resourceInfo.textureBufferOffset); } updateData.sharedHandle = resourceInfo.globalShareHandle; updateData.synchronizationStatus = SynchronizeStatus::ACQUIRE_SUCCESFUL; } cl_int GlTexture::getGlTextureInfo(cl_gl_texture_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const { GetInfoHelper info(paramValue, paramValueSize, paramValueSizeRet); if (paramName == CL_GL_TEXTURE_TARGET) { info.set(target); } else if (paramName == CL_GL_MIPMAP_LEVEL) { info.set(miplevel); } else if (paramName == CL_GL_NUM_SAMPLES) { info.set(textureInfo.numberOfSamples > 1 ? textureInfo.numberOfSamples : 0); } else { return CL_INVALID_VALUE; } return CL_SUCCESS; } cl_mem_object_type GlTexture::getClMemObjectType(cl_GLenum glType) { return static_cast(getClObjectType(glType, false)); } cl_gl_object_type GlTexture::getClGlObjectType(cl_GLenum glType) { return static_cast(getClObjectType(glType, true)); } uint32_t GlTexture::getClObjectType(cl_GLenum glType, bool returnClGlObjectType) { // return cl_gl_object_type if returnClGlObjectType is ture, otherwise cl_mem_object_type uint32_t retValue = 0; switch (glType) { case GL_TEXTURE_1D: retValue = returnClGlObjectType ? CL_GL_OBJECT_TEXTURE1D : CL_MEM_OBJECT_IMAGE1D; break; case GL_TEXTURE_1D_ARRAY: retValue = returnClGlObjectType ? CL_GL_OBJECT_TEXTURE1D_ARRAY : CL_MEM_OBJECT_IMAGE1D_ARRAY; break; case GL_TEXTURE_2D: case GL_TEXTURE_RECTANGLE: case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: case GL_TEXTURE_CUBE_MAP_POSITIVE_X: case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: case GL_TEXTURE_2D_MULTISAMPLE: retValue = returnClGlObjectType ? CL_GL_OBJECT_TEXTURE2D : CL_MEM_OBJECT_IMAGE2D; break; case GL_TEXTURE_2D_ARRAY: case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: retValue = returnClGlObjectType ? CL_GL_OBJECT_TEXTURE2D_ARRAY : CL_MEM_OBJECT_IMAGE2D_ARRAY; break; case GL_TEXTURE_3D: retValue = returnClGlObjectType ? CL_GL_OBJECT_TEXTURE3D : CL_MEM_OBJECT_IMAGE3D; break; case GL_TEXTURE_BUFFER: retValue = returnClGlObjectType ? CL_GL_OBJECT_TEXTURE_BUFFER : CL_MEM_OBJECT_IMAGE1D_BUFFER; break; case GL_RENDERBUFFER_EXT: retValue = returnClGlObjectType ? CL_GL_OBJECT_RENDERBUFFER : CL_MEM_OBJECT_IMAGE2D; break; default: retValue = 0; break; } return retValue; } cl_GLenum GlTexture::getBaseTargetType(cl_GLenum target) { cl_GLenum returnTarget = 0; switch (target) { case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: case GL_TEXTURE_CUBE_MAP_POSITIVE_X: case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: returnTarget = GL_TEXTURE_CUBE_MAP_ARB; break; default: returnTarget = target; break; } return returnTarget; } void GlTexture::releaseResource(MemObj *memObject, uint32_t rootDeviceIndex) { auto sharingFunctions = static_cast(this->sharingFunctions); if (target == GL_RENDERBUFFER_EXT) { sharingFunctions->releaseSharedRenderBuffer(&textureInfo); } else { sharingFunctions->releaseSharedTexture(&textureInfo); } } } // namespace NEO compute-runtime-22.14.22890/opencl/source/sharings/gl/windows/include/000077500000000000000000000000001422164147700254675ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/sharings/gl/windows/include/gl_types.h000066400000000000000000000004171422164147700274700ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/windows/windows_wrapper.h" #include #define OSAPI WINAPI typedef uint32_t GLType; typedef HDC GLDisplay; typedef HGLRC GLContext; compute-runtime-22.14.22890/opencl/source/sharings/gl/windows/win_enable_gl.cpp000066400000000000000000000075261422164147700273470ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/sharings/gl/windows/win_enable_gl.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "opencl/source/context/context.h" #include "opencl/source/context/context.inl" #include "opencl/source/sharings/gl/cl_gl_api_intel.h" #include "opencl/source/sharings/gl/windows/gl_sharing_windows.h" #include "opencl/source/sharings/sharing_factory.h" #include "opencl/source/sharings/sharing_factory.inl" #include namespace NEO { bool GlSharingContextBuilder::processProperties(cl_context_properties &propertyType, cl_context_properties &propertyValue) { if (contextData.get() == nullptr) { contextData = std::make_unique(); } switch (propertyType) { case CL_GL_CONTEXT_KHR: contextData->GLHGLRCHandle = (GLContext)propertyValue; return true; case CL_WGL_HDC_KHR: contextData->GLHDCType = (GLType)CL_WGL_HDC_KHR; contextData->GLHDCHandle = (GLDisplay)propertyValue; return true; case CL_GLX_DISPLAY_KHR: contextData->GLHDCType = (GLType)CL_GLX_DISPLAY_KHR; contextData->GLHDCHandle = (GLDisplay)propertyValue; return true; case CL_EGL_DISPLAY_KHR: contextData->GLHDCType = (GLType)CL_EGL_DISPLAY_KHR; contextData->GLHDCHandle = (GLDisplay)propertyValue; return true; } return false; } bool GlSharingContextBuilder::finalizeProperties(Context &context, int32_t &errcodeRet) { if (contextData.get() == nullptr) return true; if (contextData->GLHGLRCHandle) { context.registerSharing(new GLSharingFunctionsWindows(contextData->GLHDCType, contextData->GLHGLRCHandle, nullptr, contextData->GLHDCHandle)); } contextData.reset(nullptr); return true; } std::unique_ptr GlSharingBuilderFactory::createContextBuilder() { return std::make_unique(); }; void GlSharingBuilderFactory::fillGlobalDispatchTable() { icdGlobalDispatchTable.clCreateFromGLBuffer = clCreateFromGLBuffer; icdGlobalDispatchTable.clCreateFromGLTexture = clCreateFromGLTexture; icdGlobalDispatchTable.clCreateFromGLTexture2D = clCreateFromGLTexture2D; icdGlobalDispatchTable.clCreateFromGLTexture3D = clCreateFromGLTexture3D; icdGlobalDispatchTable.clCreateFromGLRenderbuffer = clCreateFromGLRenderbuffer; icdGlobalDispatchTable.clGetGLObjectInfo = clGetGLObjectInfo; icdGlobalDispatchTable.clGetGLTextureInfo = clGetGLTextureInfo; icdGlobalDispatchTable.clEnqueueAcquireGLObjects = clEnqueueAcquireGLObjects; icdGlobalDispatchTable.clEnqueueReleaseGLObjects = clEnqueueReleaseGLObjects; icdGlobalDispatchTable.clCreateEventFromGLsyncKHR = clCreateEventFromGLsyncKHR; icdGlobalDispatchTable.clGetGLContextInfoKHR = clGetGLContextInfoKHR; } std::string GlSharingBuilderFactory::getExtensions(DriverInfo *driverInfo) { auto isGlSharingEnabled = GLSharingFunctionsWindows::isGlSharingEnabled(); if (DebugManager.flags.AddClGlSharing.get() != -1) { isGlSharingEnabled = DebugManager.flags.AddClGlSharing.get(); } if (isGlSharingEnabled) { return "cl_khr_gl_sharing " "cl_khr_gl_depth_images " "cl_khr_gl_event " "cl_khr_gl_msaa_sharing "; } return ""; } void *GlSharingBuilderFactory::getExtensionFunctionAddress(const std::string &functionName) { if (DebugManager.flags.EnableFormatQuery.get() && functionName == "clGetSupportedGLTextureFormatsINTEL") { return ((void *)(clGetSupportedGLTextureFormatsINTEL)); } return nullptr; } static SharingFactory::RegisterSharing glSharing; } // namespace NEO compute-runtime-22.14.22890/opencl/source/sharings/gl/windows/win_enable_gl.h000066400000000000000000000021201422164147700267750ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/sharings/gl/windows/gl_sharing_windows.h" #include "opencl/source/sharings/sharing_factory.h" #include namespace NEO { class Context; struct GlCreateContextProperties { GLType GLHDCType = 0; GLContext GLHGLRCHandle = 0; GLDisplay GLHDCHandle = 0; }; class GlSharingContextBuilder : public SharingContextBuilder { protected: std::unique_ptr contextData; public: bool processProperties(cl_context_properties &propertyType, cl_context_properties &propertyValue) override; bool finalizeProperties(Context &context, int32_t &errcodeRet) override; }; class GlSharingBuilderFactory : public SharingBuilderFactory { public: std::unique_ptr createContextBuilder() override; std::string getExtensions(DriverInfo *driverInfo) override; void fillGlobalDispatchTable() override; void *getExtensionFunctionAddress(const std::string &functionName) override; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/sharings/sharing.cpp000066400000000000000000000033351422164147700241130ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "sharing.h" #include "opencl/source/mem_obj/mem_obj.h" #include "CL/cl.h" #include namespace NEO { int SharingHandler::acquire(MemObj *memObj, uint32_t rootDeviceIndex) { if (acquireCount == 0) { UpdateData updateData{rootDeviceIndex}; auto graphicsAllocation = memObj->getGraphicsAllocation(rootDeviceIndex); auto currentSharedHandle = graphicsAllocation->peekSharedHandle(); updateData.sharedHandle = currentSharedHandle; updateData.memObject = memObj; int result = synchronizeHandler(updateData); resolveGraphicsAllocationChange(currentSharedHandle, &updateData); if (result != CL_SUCCESS) { return result; } if (updateData.synchronizationStatus != SynchronizeStatus::ACQUIRE_SUCCESFUL) { return CL_OUT_OF_RESOURCES; } DEBUG_BREAK_IF(graphicsAllocation->peekSharedHandle() != updateData.sharedHandle); } acquireCount++; return CL_SUCCESS; } int SharingHandler::synchronizeHandler(UpdateData &updateData) { auto result = validateUpdateData(updateData); if (result == CL_SUCCESS) { synchronizeObject(updateData); } return result; } int SharingHandler::validateUpdateData(UpdateData &updateData) { return CL_SUCCESS; } void SharingHandler::resolveGraphicsAllocationChange(osHandle currentSharedHandle, UpdateData *updateData) { } void SharingHandler::release(MemObj *memObject, uint32_t rootDeviceIndex) { DEBUG_BREAK_IF(acquireCount <= 0); acquireCount--; if (acquireCount == 0) { releaseResource(memObject, rootDeviceIndex); } } } // namespace NEO compute-runtime-22.14.22890/opencl/source/sharings/sharing.h000066400000000000000000000031151422164147700235540ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/memory_manager/graphics_allocation.h" namespace NEO { class Context; class MemObj; enum SynchronizeStatus { SHARED_OBJECT_NOT_CHANGED, SHARED_OBJECT_REQUIRES_UPDATE, ACQUIRE_SUCCESFUL, SYNCHRONIZE_ERROR }; struct UpdateData { UpdateData(uint32_t inRootDeviceIndex) : rootDeviceIndex(inRootDeviceIndex){}; const uint32_t rootDeviceIndex; SynchronizeStatus synchronizationStatus = SHARED_OBJECT_NOT_CHANGED; osHandle sharedHandle = 0; MemObj *memObject = nullptr; void *updateData = nullptr; }; class SharingFunctions { public: virtual uint32_t getId() const = 0; virtual ~SharingFunctions() = default; }; class SharingHandler { public: int acquire(MemObj *memObj, uint32_t rootDeviceIndex); void release(MemObj *memObject, uint32_t rootDeviceIndex); virtual ~SharingHandler() = default; virtual void getMemObjectInfo(size_t ¶mValueSize, void *¶mValue){}; virtual void releaseReusedGraphicsAllocation(){}; protected: virtual int synchronizeHandler(UpdateData &updateData); virtual int validateUpdateData(UpdateData &updateData); virtual void synchronizeObject(UpdateData &updateData) { updateData.synchronizationStatus = SYNCHRONIZE_ERROR; } virtual void resolveGraphicsAllocationChange(osHandle currentSharedHandle, UpdateData *updateData); virtual void releaseResource(MemObj *memObject, uint32_t rootDeviceIndex){}; unsigned int acquireCount = 0u; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/sharings/sharing_factory.cpp000066400000000000000000000050061422164147700256370ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "sharing_factory.h" #include "shared/source/compiler_interface/oclc_extensions.h" #include "shared/source/debug_settings/debug_settings_manager.h" namespace NEO { std::unique_ptr SharingFactory::build() { auto res = std::make_unique(); for (auto &builder : sharingContextBuilder) { if (builder == nullptr) continue; res->sharings.push_back(builder->createContextBuilder()); } return res; } std::string SharingFactory::getExtensions(DriverInfo *driverInfo) { std::string res; bool sharingAvailable = false; for (auto &builder : sharingContextBuilder) { if (builder == nullptr) continue; res += builder->getExtensions(driverInfo); sharingAvailable = true; } if (DebugManager.flags.EnableFormatQuery.get() && sharingAvailable) { res += Extensions::sharingFormatQuery; } return res; } void SharingFactory::fillGlobalDispatchTable() { for (auto &builder : sharingContextBuilder) { if (builder == nullptr) continue; builder->fillGlobalDispatchTable(); } } void *SharingFactory::getExtensionFunctionAddress(const std::string &functionName) { for (auto &builder : sharingContextBuilder) { if (builder == nullptr) continue; auto ret = builder->getExtensionFunctionAddress(functionName); if (ret != nullptr) return ret; } return nullptr; } bool SharingFactory::processProperties(cl_context_properties &propertyType, cl_context_properties &propertyValue) { for (auto &sharing : sharings) { if (sharing->processProperties(propertyType, propertyValue)) return true; } return false; } bool SharingFactory::finalizeProperties(Context &context, int32_t &errcodeRet) { for (auto &sharing : sharings) { if (!sharing->finalizeProperties(context, errcodeRet)) return false; } return true; } SharingBuilderFactory *SharingFactory::sharingContextBuilder[SharingType::MAX_SHARING_VALUE] = { nullptr, }; void SharingFactory::verifyExtensionSupport(DriverInfo *driverInfo) { for (auto &builder : sharingContextBuilder) { if (builder == nullptr) continue; builder->setExtensionEnabled(driverInfo); } }; void SharingBuilderFactory::setExtensionEnabled(DriverInfo *driverInfo){}; SharingFactory sharingFactory; } // namespace NEO compute-runtime-22.14.22890/opencl/source/sharings/sharing_factory.h000066400000000000000000000036371422164147700253140ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "CL/cl.h" #include #include #include namespace NEO { class Context; class DriverInfo; enum SharingType { CLGL_SHARING = 0, VA_SHARING = 1, D3D9_SHARING = 2, D3D10_SHARING = 3, D3D11_SHARING = 4, UNIFIED_SHARING = 5, MAX_SHARING_VALUE = 6 }; class SharingContextBuilder { public: virtual ~SharingContextBuilder() = default; virtual bool processProperties(cl_context_properties &propertyType, cl_context_properties &propertyValue) = 0; virtual bool finalizeProperties(Context &context, int32_t &errcodeRet) = 0; }; class SharingBuilderFactory { public: virtual ~SharingBuilderFactory() = default; virtual std::unique_ptr createContextBuilder() = 0; virtual std::string getExtensions(DriverInfo *driverInfo) = 0; virtual void fillGlobalDispatchTable() {} virtual void *getExtensionFunctionAddress(const std::string &functionName) = 0; virtual void setExtensionEnabled(DriverInfo *driverInfo); }; class SharingFactory { protected: static SharingBuilderFactory *sharingContextBuilder[SharingType::MAX_SHARING_VALUE]; std::vector> sharings; public: template class RegisterSharing { public: RegisterSharing(); }; static std::unique_ptr build(); bool processProperties(cl_context_properties &propertyType, cl_context_properties &propertyValue); bool finalizeProperties(Context &context, int32_t &errcodeRet); std::string getExtensions(DriverInfo *driverInfo); void fillGlobalDispatchTable(); void *getExtensionFunctionAddress(const std::string &functionName); void verifyExtensionSupport(DriverInfo *driverInfo); }; extern SharingFactory sharingFactory; } // namespace NEO compute-runtime-22.14.22890/opencl/source/sharings/sharing_factory.inl000066400000000000000000000005221422164147700256350ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/sharings/sharing_factory.h" namespace NEO { template SharingFactory::RegisterSharing::RegisterSharing() { sharingContextBuilder[T::sharingId] = new F; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/sharings/unified/000077500000000000000000000000001422164147700233735ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/sharings/unified/CMakeLists.txt000066400000000000000000000013751422164147700261410ustar00rootroot00000000000000# # Copyright (C) 2019-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_SHARINGS_UNIFIED ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/enable_unified.h ${CMAKE_CURRENT_SOURCE_DIR}/unified_buffer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/unified_buffer.h ${CMAKE_CURRENT_SOURCE_DIR}/unified_image.cpp ${CMAKE_CURRENT_SOURCE_DIR}/unified_image.h ${CMAKE_CURRENT_SOURCE_DIR}/unified_sharing.cpp ${CMAKE_CURRENT_SOURCE_DIR}/unified_sharing.h ${CMAKE_CURRENT_SOURCE_DIR}/unified_sharing_types.h ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_SHARINGS_UNIFIED}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_SHARINGS_UNIFIED ${RUNTIME_SRCS_SHARINGS_UNIFIED}) add_subdirectories() compute-runtime-22.14.22890/opencl/source/sharings/unified/enable_unified.cpp000066400000000000000000000035341422164147700270350ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/sharings/unified/enable_unified.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "opencl/source/context/context.h" #include "opencl/source/context/context.inl" #include "opencl/source/sharings/sharing_factory.h" #include "opencl/source/sharings/sharing_factory.inl" #include "opencl/source/sharings/unified/unified_sharing.h" #include "opencl/source/sharings/unified/unified_sharing_types.h" #include namespace NEO { bool UnifiedSharingContextBuilder::processProperties(cl_context_properties &propertyType, cl_context_properties &propertyValue) { switch (propertyType) { case static_cast(UnifiedSharingContextType::DeviceHandle): case static_cast(UnifiedSharingContextType::DeviceGroup): this->contextData = std::make_unique(); return true; default: return false; } } bool UnifiedSharingContextBuilder::finalizeProperties(Context &context, int32_t &errcodeRet) { if (contextData.get() != nullptr) { if (context.getInteropUserSyncEnabled()) { context.registerSharing(new UnifiedSharingFunctions()); } contextData.reset(nullptr); } return true; } std::unique_ptr UnifiedSharingBuilderFactory::createContextBuilder() { return std::make_unique(); }; std::string UnifiedSharingBuilderFactory::getExtensions(DriverInfo *driverInfo) { return ""; } void *UnifiedSharingBuilderFactory::getExtensionFunctionAddress(const std::string &functionName) { return nullptr; } static SharingFactory::RegisterSharing unifiedSharing; } // namespace NEO compute-runtime-22.14.22890/opencl/source/sharings/unified/enable_unified.h000066400000000000000000000016331422164147700265000ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/sharings/sharing_factory.h" #include namespace NEO { class Context; struct UnifiedCreateContextProperties { }; class UnifiedSharingContextBuilder : public SharingContextBuilder { protected: std::unique_ptr contextData; public: bool processProperties(cl_context_properties &propertyType, cl_context_properties &propertyValue) override; bool finalizeProperties(Context &context, int32_t &errcodeRet) override; }; class UnifiedSharingBuilderFactory : public SharingBuilderFactory { public: std::unique_ptr createContextBuilder() override; std::string getExtensions(DriverInfo *driverInfo) override; void *getExtensionFunctionAddress(const std::string &functionName) override; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/sharings/unified/unified_buffer.cpp000066400000000000000000000024441422164147700270570ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "unified_buffer.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/helpers/get_info.h" #include "shared/source/memory_manager/memory_manager.h" #include "opencl/source/context/context.h" #include "opencl/source/mem_obj/buffer.h" #include "config.h" using namespace NEO; Buffer *UnifiedBuffer::createSharedUnifiedBuffer(Context *context, cl_mem_flags flags, UnifiedSharingMemoryDescription extMem, cl_int *errcodeRet) { ErrorCodeHelper errorCode(errcodeRet, CL_SUCCESS); auto graphicsAllocation = UnifiedBuffer::createGraphicsAllocation(context, extMem, AllocationType::SHARED_BUFFER); if (!graphicsAllocation) { errorCode.set(CL_INVALID_MEM_OBJECT); return nullptr; } UnifiedSharingFunctions *sharingFunctions = context->getSharing(); auto sharingHandler = new UnifiedBuffer(sharingFunctions, extMem.type); auto rootDeviceIndex = graphicsAllocation->getRootDeviceIndex(); auto multiGraphicsAllocation = MultiGraphicsAllocation(rootDeviceIndex); multiGraphicsAllocation.addAllocation(graphicsAllocation); return Buffer::createSharedBuffer(context, flags, sharingHandler, std::move(multiGraphicsAllocation)); } compute-runtime-22.14.22890/opencl/source/sharings/unified/unified_buffer.h000066400000000000000000000007421422164147700265230ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/sharings/unified/unified_sharing.h" namespace NEO { class Buffer; class Context; class UnifiedBuffer : public UnifiedSharing { using UnifiedSharing::UnifiedSharing; public: static Buffer *createSharedUnifiedBuffer(Context *context, cl_mem_flags flags, UnifiedSharingMemoryDescription description, cl_int *errcodeRet); }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/sharings/unified/unified_image.cpp000066400000000000000000000054311422164147700266670ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "unified_image.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/resource_info.h" #include "shared/source/helpers/get_info.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/hw_info_config.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/context/context.h" #include "opencl/source/mem_obj/image.h" namespace NEO { Image *UnifiedImage::createSharedUnifiedImage(Context *context, cl_mem_flags flags, UnifiedSharingMemoryDescription description, const cl_image_format *imageFormat, const cl_image_desc *imageDesc, cl_int *errcodeRet) { ErrorCodeHelper errorCode(errcodeRet, CL_SUCCESS); UnifiedSharingFunctions *sharingFunctions = context->getSharing(); auto *clSurfaceFormat = Image::getSurfaceFormatFromTable(flags, imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); ImageInfo imgInfo = {}; imgInfo.imgDesc = Image::convertDescriptor(*imageDesc); imgInfo.surfaceFormat = &clSurfaceFormat->surfaceFormat; GraphicsAllocation *graphicsAllocation = createGraphicsAllocation(context, description, AllocationType::SHARED_IMAGE); if (!graphicsAllocation) { errorCode.set(CL_INVALID_MEM_OBJECT); return nullptr; } graphicsAllocation->getDefaultGmm()->updateOffsetsInImgInfo(imgInfo, 0u); auto &memoryManager = *context->getMemoryManager(); if (graphicsAllocation->getDefaultGmm()->unifiedAuxTranslationCapable()) { const auto &hwInfo = context->getDevice(0)->getHardwareInfo(); const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily); graphicsAllocation->getDefaultGmm()->isCompressionEnabled = hwInfoConfig.isPageTableManagerSupported(hwInfo) ? memoryManager.mapAuxGpuVA(graphicsAllocation) : true; } const uint32_t baseMipmapIndex = 0u; const uint32_t sharedMipmapsCount = imageDesc->num_mip_levels; auto sharingHandler = new UnifiedImage(sharingFunctions, description.type); auto multiGraphicsAllocation = MultiGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex()); multiGraphicsAllocation.addAllocation(graphicsAllocation); return Image::createSharedImage(context, sharingHandler, McsSurfaceInfo{}, std::move(multiGraphicsAllocation), nullptr, flags, 0, clSurfaceFormat, imgInfo, __GMM_NO_CUBE_MAP, baseMipmapIndex, sharedMipmapsCount); } } // namespace NEO compute-runtime-22.14.22890/opencl/source/sharings/unified/unified_image.h000066400000000000000000000011151422164147700263270ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/sharings/unified/unified_sharing.h" namespace NEO { class Image; class Context; class UnifiedImage : public UnifiedSharing { using UnifiedSharing::UnifiedSharing; public: static Image *createSharedUnifiedImage(Context *context, cl_mem_flags flags, UnifiedSharingMemoryDescription description, const cl_image_format *imageFormat, const cl_image_desc *imageDesc, cl_int *errcodeRet); }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/sharings/unified/unified_sharing.cpp000066400000000000000000000046231422164147700272420ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/sharings/unified/unified_sharing.h" #include "shared/source/helpers/string.h" #include "shared/source/helpers/timestamp_packet.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/context/context.h" #include "opencl/source/sharings/sharing_factory.h" #include namespace NEO { const uint32_t UnifiedSharingFunctions::sharingId = SharingType::UNIFIED_SHARING; UnifiedSharing::UnifiedSharing(UnifiedSharingFunctions *sharingFunctions, UnifiedSharingHandleType memoryType) : sharingFunctions(sharingFunctions), memoryType(memoryType) { } void UnifiedSharing::synchronizeObject(UpdateData &updateData) { updateData.synchronizationStatus = SynchronizeStatus::ACQUIRE_SUCCESFUL; } void UnifiedSharing::releaseResource(MemObj *memObject, uint32_t rootDeviceIndex) { } GraphicsAllocation *UnifiedSharing::createGraphicsAllocation(Context *context, UnifiedSharingMemoryDescription description, AllocationType allocationType) { auto memoryManager = context->getMemoryManager(); switch (description.type) { case UnifiedSharingHandleType::Win32Nt: { return memoryManager->createGraphicsAllocationFromNTHandle(description.handle, context->getDevice(0)->getRootDeviceIndex(), AllocationType::SHARED_IMAGE); } case UnifiedSharingHandleType::LinuxFd: case UnifiedSharingHandleType::Win32Shared: { const AllocationProperties properties{context->getDevice(0)->getRootDeviceIndex(), false, // allocateMemory 0u, // size allocationType, false, // isMultiStorageAllocation context->getDeviceBitfieldForAllocation(context->getDevice(0)->getRootDeviceIndex())}; return memoryManager->createGraphicsAllocationFromSharedHandle(toOsHandle(description.handle), properties, false, false); } default: return nullptr; } } template <> UnifiedSharingFunctions *Context::getSharing() { UNRECOVERABLE_IF(UnifiedSharingFunctions::sharingId >= sharingFunctions.size()) return reinterpret_cast(sharingFunctions[UnifiedSharingFunctions::sharingId].get()); } } // namespace NEO compute-runtime-22.14.22890/opencl/source/sharings/unified/unified_sharing.h000066400000000000000000000025221422164147700267030ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/os_library.h" #include "opencl/source/mem_obj/mem_obj.h" #include "opencl/source/sharings/sharing.h" #include "opencl/source/sharings/unified/unified_sharing_types.h" #include "CL/cl.h" #include #include #include namespace NEO { class UnifiedSharingFunctions : public SharingFunctions { public: uint32_t getId() const override { return UnifiedSharingFunctions::sharingId; } static const uint32_t sharingId; }; class UnifiedSharing : public SharingHandler { public: UnifiedSharing(UnifiedSharingFunctions *sharingFunctions, UnifiedSharingHandleType memoryType); UnifiedSharingFunctions *peekFunctionsHandler() { return sharingFunctions; } UnifiedSharingHandleType getExternalMemoryType() { return memoryType; } protected: void synchronizeObject(UpdateData &updateData) override; void releaseResource(MemObj *memObject, uint32_t rootDeviceIndex) override; static GraphicsAllocation *createGraphicsAllocation(Context *context, UnifiedSharingMemoryDescription description, AllocationType allocationType); private: UnifiedSharingFunctions *sharingFunctions; UnifiedSharingHandleType memoryType; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/sharings/unified/unified_sharing_types.h000066400000000000000000000010211422164147700301200ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include namespace NEO { using UnifiedSharingMemoryProperties = uint64_t; enum class UnifiedSharingContextType { DeviceHandle = 0x300B, DeviceGroup = 0x300C }; enum class UnifiedSharingHandleType { LinuxFd = 1, Win32Shared = 2, Win32Nt = 3 }; struct UnifiedSharingMemoryDescription { UnifiedSharingHandleType type; void *handle; unsigned long long size; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/sharings/va/000077500000000000000000000000001422164147700223565ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/sharings/va/CMakeLists.txt000066400000000000000000000017761422164147700251310ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_SHARINGS_VA ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/va_device.cpp ${CMAKE_CURRENT_SOURCE_DIR}/va_device.h ${CMAKE_CURRENT_SOURCE_DIR}/va_device_shared.cpp ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}va_extension.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_va_api.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_va_api.h ${CMAKE_CURRENT_SOURCE_DIR}/va_sharing.cpp ${CMAKE_CURRENT_SOURCE_DIR}/va_sharing.h ${CMAKE_CURRENT_SOURCE_DIR}/va_sharing_defines.h ${CMAKE_CURRENT_SOURCE_DIR}/va_sharing_functions.cpp ${CMAKE_CURRENT_SOURCE_DIR}/va_sharing_functions.h ${CMAKE_CURRENT_SOURCE_DIR}/va_surface.cpp ${CMAKE_CURRENT_SOURCE_DIR}/va_surface.h ) set_property(GLOBAL PROPERTY RUNTIME_SRCS_SHARINGS_VA ${RUNTIME_SRCS_SHARINGS_VA}) if(NEO__LIBVA_FOUND) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_SHARINGS_VA}) endif() add_subdirectories() compute-runtime-22.14.22890/opencl/source/sharings/va/cl_va_api.cpp000066400000000000000000000145311422164147700250030ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/get_info.h" #include "shared/source/utilities/api_intercept.h" #include "opencl/source/api/api.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/platform/platform.h" #include "opencl/source/sharings/va/va_device.h" #include "opencl/source/sharings/va/va_sharing.h" #include "opencl/source/sharings/va/va_surface.h" #include "opencl/source/utilities/cl_logger.h" #include "CL/cl.h" #include using namespace NEO; cl_mem CL_API_CALL clCreateFromVA_APIMediaSurfaceINTEL(cl_context context, cl_mem_flags flags, VASurfaceID *surface, cl_uint plane, cl_int *errcodeRet) { cl_int returnCode = CL_SUCCESS; API_ENTER(&returnCode); DBG_LOG_INPUTS("context", context, "flags", flags, "VASurfaceID", surface, "plane", plane); Context *pContext = nullptr; cl_mem image = nullptr; returnCode = validateObject(WithCastToInternal(context, &pContext)); ErrorCodeHelper err(errcodeRet, returnCode); if (returnCode != CL_SUCCESS) { return nullptr; } if (!VASurface::validate(flags, plane)) { returnCode = CL_INVALID_VALUE; err.set(returnCode); return nullptr; } image = VASurface::createSharedVaSurface(pContext, pContext->getSharing(), flags, 0, surface, plane, errcodeRet); DBG_LOG_INPUTS("image", image); return image; } cl_int CL_API_CALL clGetDeviceIDsFromVA_APIMediaAdapterINTEL(cl_platform_id platform, cl_va_api_device_source_intel mediaAdapterType, void *mediaAdapter, cl_va_api_device_set_intel mediaAdapterSet, cl_uint numEntries, cl_device_id *devices, cl_uint *numDevices) { cl_int status = CL_SUCCESS; API_ENTER(&status); DBG_LOG_INPUTS("platform", platform, "mediaAdapterType", mediaAdapterType, "mediaAdapter", mediaAdapter, "mediaAdapterSet", mediaAdapterSet, "numEntries", numEntries); Platform *pPlatform = nullptr; status = validateObjects(WithCastToInternal(platform, &pPlatform)); if (status != CL_SUCCESS) { status = CL_INVALID_PLATFORM; } else { VADevice vaDevice{}; cl_device_id device = vaDevice.getDeviceFromVA(pPlatform, mediaAdapter); GetInfoHelper::set(devices, device); if (device == nullptr) { GetInfoHelper::set(numDevices, 0u); status = CL_DEVICE_NOT_FOUND; } else { GetInfoHelper::set(numDevices, 1u); } } return status; } cl_int CL_API_CALL clEnqueueAcquireVA_APIMediaSurfacesINTEL(cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { cl_int status = CL_SUCCESS; API_ENTER(&status); DBG_LOG_INPUTS("commandQueue", commandQueue, "numObjects", numObjects, "memObjects", getClFileLogger().getMemObjects(reinterpret_cast(memObjects), numObjects), "numEventsInWaitList", numEventsInWaitList, "eventWaitList", getClFileLogger().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", getClFileLogger().getEvents(reinterpret_cast(event), 1)); CommandQueue *pCommandQueue = nullptr; status = validateObjects(WithCastToInternal(commandQueue, &pCommandQueue)); if (status == CL_SUCCESS) { status = pCommandQueue->enqueueAcquireSharedObjects(numObjects, memObjects, numEventsInWaitList, eventWaitList, event, CL_COMMAND_ACQUIRE_VA_API_MEDIA_SURFACES_INTEL); } return status; } cl_int CL_API_CALL clEnqueueReleaseVA_APIMediaSurfacesINTEL(cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { cl_int status = CL_SUCCESS; API_ENTER(&status); DBG_LOG_INPUTS("commandQueue", commandQueue, "numObjects", numObjects, "memObjects", getClFileLogger().getMemObjects(reinterpret_cast(memObjects), numObjects), "numEventsInWaitList", numEventsInWaitList, "eventWaitList", getClFileLogger().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", getClFileLogger().getEvents(reinterpret_cast(event), 1)); CommandQueue *pCommandQueue = nullptr; status = validateObjects(WithCastToInternal(commandQueue, &pCommandQueue)); if (status == CL_SUCCESS) { status = pCommandQueue->enqueueReleaseSharedObjects(numObjects, memObjects, numEventsInWaitList, eventWaitList, event, CL_COMMAND_RELEASE_VA_API_MEDIA_SURFACES_INTEL); if (!pCommandQueue->getContext().getInteropUserSyncEnabled()) { pCommandQueue->finish(); } } return status; } cl_int CL_API_CALL clGetSupportedVA_APIMediaSurfaceFormatsINTEL( cl_context context, cl_mem_flags flags, cl_mem_object_type imageType, cl_uint plane, cl_uint numEntries, VAImageFormat *vaApiFormats, cl_uint *numImageFormats) { if (numImageFormats) { *numImageFormats = 0; } Context *pContext = castToObjectOrAbort(context); auto pSharing = pContext->getSharing(); if (!pSharing) { return CL_INVALID_CONTEXT; } return pSharing->getSupportedFormats(flags, imageType, plane, numEntries, vaApiFormats, numImageFormats); } compute-runtime-22.14.22890/opencl/source/sharings/va/cl_va_api.h000066400000000000000000000006331422164147700244460ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "CL/cl.h" #include "CL/cl_va_api_media_sharing_intel.h" cl_int CL_API_CALL clGetSupportedVA_APIMediaSurfaceFormatsINTEL( cl_context context, cl_mem_flags flags, cl_mem_object_type imageType, cl_uint plane, cl_uint numEntries, VAImageFormat *vaApiFormats, cl_uint *numImageFormats); compute-runtime-22.14.22890/opencl/source/sharings/va/enable_va.cpp000066400000000000000000000066061422164147700250060ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #ifdef LIBVA #include "opencl/source/sharings/va/enable_va.h" #include "shared/source/os_interface/driver_info.h" #include "opencl/source/api/api.h" #include "opencl/source/context/context.h" #include "opencl/source/context/context.inl" #include "opencl/source/sharings/sharing_factory.h" #include "opencl/source/sharings/sharing_factory.inl" #include "opencl/source/sharings/va/cl_va_api.h" #include "opencl/source/sharings/va/va_sharing.h" #include namespace NEO { bool VaSharingContextBuilder::processProperties(cl_context_properties &propertyType, cl_context_properties &propertyValue) { if (contextData.get() == nullptr) { contextData = std::make_unique(); } switch (propertyType) { case CL_CONTEXT_VA_API_DISPLAY_INTEL: contextData->vaDisplay = (VADisplay)propertyValue; return true; } return false; } bool VaSharingContextBuilder::finalizeProperties(Context &context, int32_t &errcodeRet) { if (contextData.get() == nullptr) return true; if (contextData->vaDisplay) { context.registerSharing(new VASharingFunctions(contextData->vaDisplay)); if (!context.getSharing()->isValidVaDisplay()) { errcodeRet = CL_INVALID_VA_API_MEDIA_ADAPTER_INTEL; return false; } context.getSharing()->querySupportedVaImageFormats(contextData->vaDisplay); } return true; } std::unique_ptr VaSharingBuilderFactory::createContextBuilder() { return std::make_unique(); }; std::string VaSharingBuilderFactory::getExtensions(DriverInfo *driverInfo) { auto imageSupport = driverInfo ? driverInfo->getImageSupport() : false; if (imageSupport && VASharingFunctions::isVaLibraryAvailable()) { return "cl_intel_va_api_media_sharing "; } return ""; } void VaSharingBuilderFactory::fillGlobalDispatchTable() { crtGlobalDispatchTable.clCreateFromVA_APIMediaSurfaceINTEL = clCreateFromVA_APIMediaSurfaceINTEL; crtGlobalDispatchTable.clGetDeviceIDsFromVA_APIMediaAdapterINTEL = clGetDeviceIDsFromVA_APIMediaAdapterINTEL; crtGlobalDispatchTable.clEnqueueReleaseVA_APIMediaSurfacesINTEL = clEnqueueReleaseVA_APIMediaSurfacesINTEL; crtGlobalDispatchTable.clEnqueueAcquireVA_APIMediaSurfacesINTEL = clEnqueueAcquireVA_APIMediaSurfacesINTEL; } #define RETURN_FUNC_PTR_IF_EXIST(name) \ { \ if (functionName == #name) { \ return ((void *)(name)); \ } \ } void *VaSharingBuilderFactory::getExtensionFunctionAddress(const std::string &functionName) { RETURN_FUNC_PTR_IF_EXIST(clCreateFromVA_APIMediaSurfaceINTEL); RETURN_FUNC_PTR_IF_EXIST(clGetDeviceIDsFromVA_APIMediaAdapterINTEL); RETURN_FUNC_PTR_IF_EXIST(clEnqueueAcquireVA_APIMediaSurfacesINTEL); RETURN_FUNC_PTR_IF_EXIST(clEnqueueReleaseVA_APIMediaSurfacesINTEL); if (DebugManager.flags.EnableFormatQuery.get()) { RETURN_FUNC_PTR_IF_EXIST(clGetSupportedVA_APIMediaSurfaceFormatsINTEL); } auto extraFunction = getExtensionFunctionAddressExtra(functionName); return extraFunction; } static SharingFactory::RegisterSharing vaSharing; } // namespace NEO #endif compute-runtime-22.14.22890/opencl/source/sharings/va/enable_va.h000066400000000000000000000021711422164147700244440ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/sharings/sharing_factory.h" #include "opencl/source/sharings/va/va_sharing_defines.h" #include namespace NEO { class Context; class DriverInfo; struct VaCreateContextProperties { VADisplay vaDisplay = nullptr; }; class VaSharingContextBuilder : public SharingContextBuilder { protected: std::unique_ptr contextData; public: bool processProperties(cl_context_properties &propertyType, cl_context_properties &propertyValue) override; bool finalizeProperties(Context &context, int32_t &errcodeRet) override; }; class VaSharingBuilderFactory : public SharingBuilderFactory { public: std::unique_ptr createContextBuilder() override; std::string getExtensions(DriverInfo *driverInfo) override; void fillGlobalDispatchTable() override; void *getExtensionFunctionAddress(const std::string &functionName) override; virtual void *getExtensionFunctionAddressExtra(const std::string &functionName); }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/sharings/va/va_device.cpp000066400000000000000000000007231422164147700250110ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/sharings/va/va_device.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/platform/platform.h" namespace NEO { VADevice::VADevice() { } ClDevice *VADevice::getDeviceFromVA(Platform *pPlatform, VADisplay vaDisplay) { return getRootDeviceFromVaDisplay(pPlatform, vaDisplay); } VADevice::~VADevice() { } } // namespace NEO compute-runtime-22.14.22890/opencl/source/sharings/va/va_device.h000066400000000000000000000013471422164147700244610ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "CL/cl_va_api_media_sharing_intel.h" #include namespace NEO { class ClDevice; class Platform; class VADevice { public: VADevice(); virtual ~VADevice(); ClDevice *getRootDeviceFromVaDisplay(Platform *pPlatform, VADisplay vaDisplay); ClDevice *getDeviceFromVA(Platform *pPlatform, VADisplay vaDisplay); static std::function fdlopen; static std::function fdlsym; static std::function fdlclose; protected: void *vaLibHandle = nullptr; void *vaGetDevice = nullptr; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/sharings/va/va_device_shared.cpp000066400000000000000000000030011422164147700263270ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/source/os_interface/linux/drm_neo.h" #include "shared/source/os_interface/linux/pci_path.h" #include "shared/source/os_interface/os_interface.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/platform/platform.h" #include "opencl/source/sharings/va/va_device.h" #include #include #include #include namespace NEO { ClDevice *VADevice::getRootDeviceFromVaDisplay(Platform *pPlatform, VADisplay vaDisplay) { VADisplayContextP pDisplayContext_test = reinterpret_cast(vaDisplay); UNRECOVERABLE_IF(pDisplayContext_test->vadpy_magic != 0x56414430); VADriverContextP pDriverContext_test = pDisplayContext_test->pDriverContext; int deviceFd = *static_cast(pDriverContext_test->drm_state); UNRECOVERABLE_IF(deviceFd < 0); auto devicePath = NEO::getPciPath(deviceFd); if (devicePath == std::nullopt) { return nullptr; } for (size_t i = 0; i < pPlatform->getNumDevices(); ++i) { auto device = pPlatform->getClDevice(i); NEO::Device *neoDevice = &device->getDevice(); auto *drm = neoDevice->getRootDeviceEnvironment().osInterface->getDriverModel()->as(); auto pciPath = drm->getPciPath(); if (devicePath == pciPath) { return device; } } return nullptr; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/sharings/va/va_extension.cpp000066400000000000000000000005151422164147700255650ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #ifdef LIBVA #include "opencl/source/sharings/va/enable_va.h" #include namespace NEO { void *VaSharingBuilderFactory::getExtensionFunctionAddressExtra(const std::string &functionName) { return nullptr; } } // namespace NEO #endifcompute-runtime-22.14.22890/opencl/source/sharings/va/va_sharing.cpp000066400000000000000000000007471422164147700252130ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/context/context.inl" #include "opencl/source/sharings/sharing_factory.h" #include "opencl/source/sharings/va/va_sharing_defines.h" #include "opencl/source/sharings/va/va_sharing_functions.h" namespace NEO { const uint32_t VASharingFunctions::sharingId = SharingType::VA_SHARING; template VASharingFunctions *Context::getSharing(); } // namespace NEO compute-runtime-22.14.22890/opencl/source/sharings/va/va_sharing.h000066400000000000000000000012731422164147700246530ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/api/cl_types.h" #include "opencl/source/sharings/sharing.h" #include "opencl/source/sharings/va/va_sharing_defines.h" #include "opencl/source/sharings/va/va_sharing_functions.h" namespace NEO { class VASharing : public SharingHandler { public: VASharing(VASharingFunctions *sharingFunctions, VAImageID imageId) : sharingFunctions(sharingFunctions), imageId(imageId){}; VASharingFunctions *peekFunctionsHandler() { return sharingFunctions; } protected: VASharingFunctions *sharingFunctions = nullptr; VAImageID imageId; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/sharings/va/va_sharing_defines.h000066400000000000000000000017371422164147700263550ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "CL/cl.h" #include "CL/cl_va_api_media_sharing_intel.h" typedef int (*VADisplayIsValidPFN)(VADisplay vaDisplay); typedef VAStatus (*VADeriveImagePFN)(VADisplay vaDisplay, VASurfaceID vaSurface, VAImage *vaImage); typedef VAStatus (*VADestroyImagePFN)(VADisplay vaDisplay, VAImageID vaImageId); typedef VAStatus (*VAExtGetSurfaceHandlePFN)(VADisplay vaDisplay, VASurfaceID *vaSurface, unsigned int *handleId); typedef VAStatus (*VAExportSurfaceHandlePFN)(VADisplay vaDisplay, VASurfaceID vaSurface, uint32_t memType, uint32_t flags, void *descriptor); typedef VAStatus (*VASyncSurfacePFN)(VADisplay vaDisplay, VASurfaceID vaSurface); typedef void *(*VAGetLibFuncPFN)(VADisplay vaDisplay, const char *func); typedef VAStatus (*VAQueryImageFormatsPFN)(VADisplay vaDisplay, VAImageFormat *formatList, int *numFormats); typedef int (*VAMaxNumImageFormatsPFN)(VADisplay vaDisplay); compute-runtime-22.14.22890/opencl/source/sharings/va/va_sharing_functions.cpp000066400000000000000000000131301422164147700272710ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "va_sharing_functions.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "opencl/source/sharings/va/va_surface.h" #include namespace Os { extern const char *libvaDllName; } namespace NEO { std::function VASharingFunctions::fdlopen = dlopen; std::function VASharingFunctions::fdlsym = dlsym; std::function VASharingFunctions::fdlclose = dlclose; VASharingFunctions::VASharingFunctions(VADisplay vaDisplay) : vaDisplay(vaDisplay) { initFunctions(); }; VASharingFunctions::~VASharingFunctions() { if (libHandle != nullptr) { fdlclose(libHandle); libHandle = nullptr; } } bool VASharingFunctions::isVaLibraryAvailable() { auto lib = fdlopen(Os::libvaDllName, RTLD_LAZY); if (lib) { fdlclose(lib); return true; } return false; } void VASharingFunctions::initFunctions() { bool enableVaLibCalls = true; if (DebugManager.flags.EnableVaLibCalls.get() != -1) { enableVaLibCalls = !!DebugManager.flags.EnableVaLibCalls.get(); } if (enableVaLibCalls) { libHandle = fdlopen(Os::libvaDllName, RTLD_LAZY); if (libHandle) { vaDisplayIsValidPFN = reinterpret_cast(fdlsym(libHandle, "vaDisplayIsValid")); vaDeriveImagePFN = reinterpret_cast(fdlsym(libHandle, "vaDeriveImage")); vaDestroyImagePFN = reinterpret_cast(fdlsym(libHandle, "vaDestroyImage")); vaSyncSurfacePFN = reinterpret_cast(fdlsym(libHandle, "vaSyncSurface")); vaGetLibFuncPFN = reinterpret_cast(fdlsym(libHandle, "vaGetLibFunc")); vaExtGetSurfaceHandlePFN = reinterpret_cast(getLibFunc("DdiMedia_ExtGetSurfaceHandle")); vaExportSurfaceHandlePFN = reinterpret_cast(fdlsym(libHandle, "vaExportSurfaceHandle")); vaQueryImageFormatsPFN = reinterpret_cast(fdlsym(libHandle, "vaQueryImageFormats")); vaMaxNumImageFormatsPFN = reinterpret_cast(fdlsym(libHandle, "vaMaxNumImageFormats")); } else { vaDisplayIsValidPFN = nullptr; vaDeriveImagePFN = nullptr; vaDestroyImagePFN = nullptr; vaSyncSurfacePFN = nullptr; vaGetLibFuncPFN = nullptr; vaExtGetSurfaceHandlePFN = nullptr; vaExportSurfaceHandlePFN = nullptr; vaQueryImageFormatsPFN = nullptr; vaMaxNumImageFormatsPFN = nullptr; } } } void VASharingFunctions::querySupportedVaImageFormats(VADisplay vaDisplay) { int maxFormats = this->maxNumImageFormats(vaDisplay); if (maxFormats > 0) { std::unique_ptr allVaFormats(new VAImageFormat[maxFormats]); auto result = this->queryImageFormats(vaDisplay, allVaFormats.get(), &maxFormats); if (result == VA_STATUS_SUCCESS) { for (int i = 0; i < maxFormats; i++) { if (VASurface::isSupportedFourCCTwoPlaneFormat(allVaFormats[i].fourcc)) { supported2PlaneFormats.emplace_back(allVaFormats[i]); } else if (VASurface::isSupportedFourCCThreePlaneFormat(allVaFormats[i].fourcc)) { supported3PlaneFormats.emplace_back(allVaFormats[i]); } } } } } cl_int VASharingFunctions::getSupportedFormats(cl_mem_flags flags, cl_mem_object_type imageType, cl_uint plane, cl_uint numEntries, VAImageFormat *formats, cl_uint *numImageFormats) { if (flags != CL_MEM_READ_ONLY && flags != CL_MEM_WRITE_ONLY && flags != CL_MEM_READ_WRITE && flags != CL_MEM_KERNEL_READ_AND_WRITE) { return CL_INVALID_VALUE; } if (imageType != CL_MEM_OBJECT_IMAGE2D) { return CL_SUCCESS; } if (numImageFormats != nullptr) { if (plane == 2) { *numImageFormats = static_cast(supported3PlaneFormats.size()); } else if (plane < 2) { *numImageFormats = static_cast(supported2PlaneFormats.size() + supported3PlaneFormats.size()); } } if (plane == 2) { if (formats != nullptr && supported3PlaneFormats.size() > 0) { uint32_t elementsToCopy = std::min(numEntries, static_cast(supported3PlaneFormats.size())); memcpy_s(formats, elementsToCopy * sizeof(VAImageFormat), &supported3PlaneFormats[0], elementsToCopy * sizeof(VAImageFormat)); } } else if (plane < 2) { if (formats != nullptr && (supported2PlaneFormats.size() > 0 || supported3PlaneFormats.size() > 0)) { uint32_t elementsToCopy = std::min(numEntries, static_cast(supported2PlaneFormats.size() + supported3PlaneFormats.size())); std::vector tmp_formats; tmp_formats.insert(tmp_formats.end(), supported2PlaneFormats.begin(), supported2PlaneFormats.end()); tmp_formats.insert(tmp_formats.end(), supported3PlaneFormats.begin(), supported3PlaneFormats.end()); memcpy_s(formats, elementsToCopy * sizeof(VAImageFormat), &tmp_formats[0], elementsToCopy * sizeof(VAImageFormat)); } } return CL_SUCCESS; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/sharings/va/va_sharing_functions.h000066400000000000000000000064521422164147700267470ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/sharings/sharing.h" #include "opencl/source/sharings/va/va_sharing_defines.h" #include #include #include namespace NEO { class VASharingFunctions : public SharingFunctions { public: VASharingFunctions(VADisplay vaDisplay); ~VASharingFunctions() override; uint32_t getId() const override { return VASharingFunctions::sharingId; } static const uint32_t sharingId; MOCKABLE_VIRTUAL bool isValidVaDisplay() { return vaDisplayIsValidPFN(vaDisplay) == 1; } MOCKABLE_VIRTUAL VAStatus deriveImage(VASurfaceID vaSurface, VAImage *vaImage) { return vaDeriveImagePFN(vaDisplay, vaSurface, vaImage); } MOCKABLE_VIRTUAL VAStatus destroyImage(VAImageID vaImageId) { return vaDestroyImagePFN(vaDisplay, vaImageId); } MOCKABLE_VIRTUAL VAStatus extGetSurfaceHandle(VASurfaceID *vaSurface, unsigned int *handleId) { return vaExtGetSurfaceHandlePFN(vaDisplay, vaSurface, handleId); } MOCKABLE_VIRTUAL VAStatus exportSurfaceHandle(VASurfaceID vaSurface, uint32_t memType, uint32_t flags, void *descriptor) { return vaExportSurfaceHandlePFN(vaDisplay, vaSurface, memType, flags, descriptor); } MOCKABLE_VIRTUAL VAStatus syncSurface(VASurfaceID vaSurface) { return vaSyncSurfacePFN(vaDisplay, vaSurface); } MOCKABLE_VIRTUAL VAStatus queryImageFormats(VADisplay vaDisplay, VAImageFormat *formatList, int *numFormats) { return vaQueryImageFormatsPFN(vaDisplay, formatList, numFormats); } MOCKABLE_VIRTUAL int maxNumImageFormats(VADisplay vaDisplay) { if (vaMaxNumImageFormatsPFN) { return vaMaxNumImageFormatsPFN(vaDisplay); } return 0; } void *getLibFunc(const char *func) { if (vaGetLibFuncPFN) { return vaGetLibFuncPFN(vaDisplay, func); } return nullptr; } void initFunctions(); void querySupportedVaImageFormats(VADisplay vaDisplay); cl_int getSupportedFormats(cl_mem_flags flags, cl_mem_object_type imageType, cl_uint plane, cl_uint numEntries, VAImageFormat *formats, cl_uint *numImageFormats); static std::function fdlopen; static std::function fdlsym; static std::function fdlclose; static bool isVaLibraryAvailable(); std::mutex mutex; protected: void *libHandle = nullptr; VADisplay vaDisplay = nullptr; VADisplayIsValidPFN vaDisplayIsValidPFN = [](VADisplay vaDisplay) { return 0; }; VADeriveImagePFN vaDeriveImagePFN; VADestroyImagePFN vaDestroyImagePFN; VASyncSurfacePFN vaSyncSurfacePFN; VAExtGetSurfaceHandlePFN vaExtGetSurfaceHandlePFN; VAExportSurfaceHandlePFN vaExportSurfaceHandlePFN; VAGetLibFuncPFN vaGetLibFuncPFN; VAQueryImageFormatsPFN vaQueryImageFormatsPFN; VAMaxNumImageFormatsPFN vaMaxNumImageFormatsPFN; std::vector supported2PlaneFormats; std::vector supported3PlaneFormats; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/sharings/va/va_surface.cpp000066400000000000000000000232521422164147700252040ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/sharings/va/va_surface.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/helpers/get_info.h" #include "shared/source/memory_manager/memory_manager.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/context/context.h" #include "opencl/source/mem_obj/image.h" #include #include namespace NEO { Image *VASurface::createSharedVaSurface(Context *context, VASharingFunctions *sharingFunctions, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, VASurfaceID *surface, cl_uint plane, cl_int *errcodeRet) { ErrorCodeHelper errorCode(errcodeRet, CL_SUCCESS); auto memoryManager = context->getMemoryManager(); unsigned int sharedHandle = 0; VADRMPRIMESurfaceDescriptor vaDrmPrimeSurfaceDesc = {}; VAImage vaImage = {}; cl_image_desc imgDesc = {}; cl_image_format gmmImgFormat = {CL_NV12_INTEL, CL_UNORM_INT8}; cl_channel_order channelOrder = CL_RG; cl_channel_type channelType = CL_UNORM_INT8; ImageInfo imgInfo = {}; VAImageID imageId = 0; McsSurfaceInfo mcsSurfaceInfo = {}; VAStatus vaStatus; uint32_t imageFourcc = 0; size_t imageOffset = 0; size_t imagePitch = 0; std::unique_lock lock(sharingFunctions->mutex); vaStatus = sharingFunctions->exportSurfaceHandle(*surface, VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2, VA_EXPORT_SURFACE_READ_WRITE | VA_EXPORT_SURFACE_SEPARATE_LAYERS, &vaDrmPrimeSurfaceDesc); if (VA_STATUS_SUCCESS == vaStatus) { imageId = VA_INVALID_ID; imgDesc.image_width = vaDrmPrimeSurfaceDesc.width; imgDesc.image_height = vaDrmPrimeSurfaceDesc.height; imageFourcc = vaDrmPrimeSurfaceDesc.fourcc; if (plane == 1) { imageOffset = vaDrmPrimeSurfaceDesc.layers[1].offset[0]; imagePitch = vaDrmPrimeSurfaceDesc.layers[1].pitch[0]; } else if (plane == 2) { imageOffset = vaDrmPrimeSurfaceDesc.layers[2].offset[0]; imagePitch = vaDrmPrimeSurfaceDesc.layers[2].pitch[0]; } imgInfo.linearStorage = DRM_FORMAT_MOD_LINEAR == vaDrmPrimeSurfaceDesc.objects[0].drm_format_modifier; sharedHandle = vaDrmPrimeSurfaceDesc.objects[0].fd; } else { sharingFunctions->deriveImage(*surface, &vaImage); imageId = vaImage.image_id; imgDesc.image_width = vaImage.width; imgDesc.image_height = vaImage.height; imageFourcc = vaImage.format.fourcc; if (plane == 1) { imageOffset = vaImage.offsets[1]; imagePitch = vaImage.pitches[0]; } else if (plane == 2) { imageOffset = vaImage.offsets[2]; imagePitch = vaImage.pitches[0]; } imgInfo.linearStorage = false; sharingFunctions->extGetSurfaceHandle(surface, &sharedHandle); } bool isRGBPFormat = DebugManager.flags.EnableExtendedVaFormats.get() && imageFourcc == VA_FOURCC_RGBP; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imgInfo.imgDesc = Image::convertDescriptor(imgDesc); if (plane == 0) { imgInfo.plane = GMM_PLANE_Y; channelOrder = CL_R; } else if (plane == 1) { imgInfo.plane = GMM_PLANE_U; channelOrder = isRGBPFormat ? CL_R : CL_RG; } else if (plane == 2) { UNRECOVERABLE_IF(!isRGBPFormat); imgInfo.plane = GMM_PLANE_V; channelOrder = CL_R; } else { UNRECOVERABLE_IF(true); } auto gmmSurfaceFormat = Image::getSurfaceFormatFromTable(flags, &gmmImgFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); //vaImage.format.fourcc == VA_FOURCC_NV12 if (DebugManager.flags.EnableExtendedVaFormats.get() && imageFourcc == VA_FOURCC_RGBP) { channelType = CL_UNORM_INT8; gmmSurfaceFormat = getExtendedSurfaceFormatInfo(imageFourcc); } else if (imageFourcc == VA_FOURCC_P010 || imageFourcc == VA_FOURCC_P016) { channelType = CL_UNORM_INT16; gmmSurfaceFormat = getExtendedSurfaceFormatInfo(imageFourcc); } imgInfo.surfaceFormat = &gmmSurfaceFormat->surfaceFormat; cl_image_format imgFormat = {channelOrder, channelType}; auto imgSurfaceFormat = Image::getSurfaceFormatFromTable(flags, &imgFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); AllocationProperties properties(context->getDevice(0)->getRootDeviceIndex(), false, // allocateMemory imgInfo, AllocationType::SHARED_IMAGE, context->getDeviceBitfieldForAllocation(context->getDevice(0)->getRootDeviceIndex())); auto alloc = memoryManager->createGraphicsAllocationFromSharedHandle(sharedHandle, properties, false, false); memoryManager->closeSharedHandle(alloc); lock.unlock(); imgDesc.image_row_pitch = imgInfo.rowPitch; imgDesc.image_slice_pitch = 0u; imgInfo.slicePitch = 0u; imgInfo.surfaceFormat = &imgSurfaceFormat->surfaceFormat; imgInfo.yOffset = 0; imgInfo.xOffset = 0; if (plane == 1) { if (!isRGBPFormat) { imgDesc.image_width /= 2; imgDesc.image_height /= 2; } imgInfo.offset = imageOffset; imgInfo.yOffsetForUVPlane = static_cast(imageOffset / imagePitch); } if (isRGBPFormat && plane == 2) { imgInfo.offset = imageOffset; } imgInfo.imgDesc = Image::convertDescriptor(imgDesc); if (VA_INVALID_ID != imageId) { sharingFunctions->destroyImage(imageId); } auto vaSurface = new VASurface(sharingFunctions, imageId, plane, surface, context->getInteropUserSyncEnabled()); auto multiGraphicsAllocation = MultiGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex()); multiGraphicsAllocation.addAllocation(alloc); auto image = Image::createSharedImage(context, vaSurface, mcsSurfaceInfo, std::move(multiGraphicsAllocation), nullptr, flags, flagsIntel, imgSurfaceFormat, imgInfo, __GMM_NO_CUBE_MAP, 0, 0); image->setMediaPlaneType(plane); return image; } void VASurface::synchronizeObject(UpdateData &updateData) { updateData.synchronizationStatus = SynchronizeStatus::ACQUIRE_SUCCESFUL; if (!interopUserSync) { if (sharingFunctions->syncSurface(surfaceId) != VA_STATUS_SUCCESS) { updateData.synchronizationStatus = SYNCHRONIZE_ERROR; } } } void VASurface::getMemObjectInfo(size_t ¶mValueSize, void *¶mValue) { paramValueSize = sizeof(surfaceIdPtr); paramValue = &surfaceIdPtr; } bool VASurface::validate(cl_mem_flags flags, cl_uint plane) { switch (flags) { case CL_MEM_READ_ONLY: case CL_MEM_WRITE_ONLY: case CL_MEM_READ_WRITE: break; default: return false; } if (plane > 1 && !DebugManager.flags.EnableExtendedVaFormats.get()) { return false; } return true; } const ClSurfaceFormatInfo *VASurface::getExtendedSurfaceFormatInfo(uint32_t formatFourCC) { if (formatFourCC == VA_FOURCC_P010) { static const ClSurfaceFormatInfo formatInfoP010 = {{CL_NV12_INTEL, CL_UNORM_INT16}, {GMM_RESOURCE_FORMAT::GMM_FORMAT_P010, static_cast(NUM_GFX3DSTATE_SURFACEFORMATS), // not used for plane images 0, 1, 2, 2}}; return &formatInfoP010; } if (formatFourCC == VA_FOURCC_P016) { static const ClSurfaceFormatInfo formatInfoP016 = {{CL_NV12_INTEL, CL_UNORM_INT16}, {GMM_RESOURCE_FORMAT::GMM_FORMAT_P016, static_cast(NUM_GFX3DSTATE_SURFACEFORMATS), // not used for plane images 0, 1, 2, 2}}; return &formatInfoP016; } if (formatFourCC == VA_FOURCC_RGBP) { static const ClSurfaceFormatInfo formatInfoRGBP = {{CL_NV12_INTEL, CL_UNORM_INT8}, {GMM_RESOURCE_FORMAT::GMM_FORMAT_RGBP, static_cast(GFX3DSTATE_SURFACEFORMAT_R8_UNORM), // not used for plane images 0, 1, 1, 1}}; return &formatInfoRGBP; } return nullptr; } bool VASurface::isSupportedFourCCTwoPlaneFormat(int fourcc) { if ((fourcc == VA_FOURCC_NV12) || (fourcc == VA_FOURCC_P010) || (fourcc == VA_FOURCC_P016)) { return true; } return false; } bool VASurface::isSupportedFourCCThreePlaneFormat(int fourcc) { if (DebugManager.flags.EnableExtendedVaFormats.get() && fourcc == VA_FOURCC_RGBP) { return true; } return false; } } // namespace NEO compute-runtime-22.14.22890/opencl/source/sharings/va/va_surface.h000066400000000000000000000026371422164147700246550ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/mem_obj/image.h" #include "opencl/source/sharings/va/va_sharing.h" namespace NEO { class Context; class Image; class VASurface : VASharing { public: static Image *createSharedVaSurface(Context *context, VASharingFunctions *sharingFunctions, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, VASurfaceID *surface, cl_uint plane, cl_int *errcodeRet); void synchronizeObject(UpdateData &updateData) override; void getMemObjectInfo(size_t ¶mValueSize, void *¶mValue) override; static bool validate(cl_mem_flags flags, cl_uint plane); static const ClSurfaceFormatInfo *getExtendedSurfaceFormatInfo(uint32_t formatFourCC); static bool isSupportedFourCCTwoPlaneFormat(int fourcc); static bool isSupportedFourCCThreePlaneFormat(int fourcc); protected: VASurface(VASharingFunctions *sharingFunctions, VAImageID imageId, cl_uint plane, VASurfaceID *surfaceId, bool interopUserSync) : VASharing(sharingFunctions, imageId), plane(plane), surfaceId(*surfaceId), interopUserSync(interopUserSync) { surfaceIdPtr = &this->surfaceId; }; cl_uint plane; VASurfaceID surfaceId; VASurfaceID *surfaceIdPtr; bool interopUserSync; }; } // namespace NEO compute-runtime-22.14.22890/opencl/source/tracing/000077500000000000000000000000001422164147700215615ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/tracing/CMakeLists.txt000066400000000000000000000010361422164147700243210ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_TRACING ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/tracing_api.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tracing_api.h ${CMAKE_CURRENT_SOURCE_DIR}/tracing_handle.h ${CMAKE_CURRENT_SOURCE_DIR}/tracing_notify.h ${CMAKE_CURRENT_SOURCE_DIR}/tracing_types.h ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_TRACING}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_TRACING ${RUNTIME_SRCS_TRACING}) compute-runtime-22.14.22890/opencl/source/tracing/tracing_api.cpp000066400000000000000000000143041422164147700245470ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/tracing/tracing_api.h" #include "opencl/source/tracing/tracing_handle.h" #include "opencl/source/tracing/tracing_notify.h" namespace HostSideTracing { // [XYZZ..Z] - { X - enabled/disabled bit, Y - locked/unlocked bit, ZZ..Z - client count bits } std::atomic tracingState(0); TracingHandle *tracingHandle[TRACING_MAX_HANDLE_COUNT] = {nullptr}; std::atomic tracingCorrelationId(0); bool addTracingClient() { uint32_t state = tracingState.load(std::memory_order_acquire); state = TRACING_SET_ENABLED_BIT(state); state = TRACING_UNSET_LOCKED_BIT(state); AtomicBackoff backoff; while (!tracingState.compare_exchange_weak(state, state + 1, std::memory_order_release, std::memory_order_acquire)) { if (!TRACING_GET_ENABLED_BIT(state)) { return false; } else if (TRACING_GET_LOCKED_BIT(state)) { DEBUG_BREAK_IF(TRACING_GET_CLIENT_COUNTER(state) != 0); state = TRACING_UNSET_LOCKED_BIT(state); backoff.pause(); } else { backoff.pause(); } } return true; } void removeTracingClient() { DEBUG_BREAK_IF(!TRACING_GET_ENABLED_BIT(tracingState.load(std::memory_order_acquire))); DEBUG_BREAK_IF(TRACING_GET_LOCKED_BIT(tracingState.load(std::memory_order_acquire))); DEBUG_BREAK_IF(TRACING_GET_CLIENT_COUNTER(tracingState.load(std::memory_order_acquire)) == 0); tracingState.fetch_sub(1, std::memory_order_acq_rel); } static void LockTracingState() { uint32_t state = tracingState.load(std::memory_order_acquire); state = TRACING_ZERO_CLIENT_COUNTER(state); state = TRACING_UNSET_LOCKED_BIT(state); AtomicBackoff backoff; while (!tracingState.compare_exchange_weak(state, TRACING_SET_LOCKED_BIT(state), std::memory_order_release, std::memory_order_acquire)) { state = TRACING_ZERO_CLIENT_COUNTER(state); state = TRACING_UNSET_LOCKED_BIT(state); backoff.pause(); } DEBUG_BREAK_IF(!TRACING_GET_LOCKED_BIT(tracingState.load(std::memory_order_acquire))); DEBUG_BREAK_IF(TRACING_GET_CLIENT_COUNTER(tracingState.load(std::memory_order_acquire)) > 0); } static void UnlockTracingState() { DEBUG_BREAK_IF(!TRACING_GET_LOCKED_BIT(tracingState.load(std::memory_order_acquire))); DEBUG_BREAK_IF(TRACING_GET_CLIENT_COUNTER(tracingState.load(std::memory_order_acquire)) > 0); tracingState.fetch_and(~TRACING_STATE_LOCKED_BIT, std::memory_order_acq_rel); } } // namespace HostSideTracing using namespace HostSideTracing; cl_int CL_API_CALL clCreateTracingHandleINTEL(cl_device_id device, cl_tracing_callback callback, void *userData, cl_tracing_handle *handle) { if (device == nullptr || callback == nullptr || handle == nullptr) { return CL_INVALID_VALUE; } *handle = new _cl_tracing_handle; if (*handle == nullptr) { return CL_OUT_OF_HOST_MEMORY; } (*handle)->device = device; (*handle)->handle = new TracingHandle(callback, userData); if ((*handle)->handle == nullptr) { delete *handle; return CL_OUT_OF_HOST_MEMORY; } return CL_SUCCESS; } cl_int CL_API_CALL clSetTracingPointINTEL(cl_tracing_handle handle, cl_function_id fid, cl_bool enable) { if (handle == nullptr) { return CL_INVALID_VALUE; } DEBUG_BREAK_IF(handle->handle == nullptr); if (static_cast(fid) >= CL_FUNCTION_COUNT) { return CL_INVALID_VALUE; } handle->handle->setTracingPoint(fid, enable); return CL_SUCCESS; } cl_int CL_API_CALL clDestroyTracingHandleINTEL(cl_tracing_handle handle) { if (handle == nullptr) { return CL_INVALID_VALUE; } DEBUG_BREAK_IF(handle->handle == nullptr); delete handle->handle; delete handle; return CL_SUCCESS; } cl_int CL_API_CALL clEnableTracingINTEL(cl_tracing_handle handle) { if (handle == nullptr) { return CL_INVALID_VALUE; } LockTracingState(); size_t i = 0; DEBUG_BREAK_IF(handle->handle == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { if (tracingHandle[i] == handle->handle) { UnlockTracingState(); return CL_INVALID_VALUE; } ++i; } if (i == TRACING_MAX_HANDLE_COUNT) { UnlockTracingState(); return CL_OUT_OF_RESOURCES; } DEBUG_BREAK_IF(tracingHandle[i] != nullptr); tracingHandle[i] = handle->handle; if (i == 0) { tracingState.fetch_or(TRACING_STATE_ENABLED_BIT, std::memory_order_acq_rel); } UnlockTracingState(); return CL_SUCCESS; } cl_int CL_API_CALL clDisableTracingINTEL(cl_tracing_handle handle) { if (handle == nullptr) { return CL_INVALID_VALUE; } LockTracingState(); size_t size = 0; while (size < TRACING_MAX_HANDLE_COUNT && tracingHandle[size] != nullptr) { ++size; } size_t i = 0; DEBUG_BREAK_IF(handle->handle == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { if (tracingHandle[i] == handle->handle) { if (size == 1) { DEBUG_BREAK_IF(i != 0); tracingState.fetch_and(~TRACING_STATE_ENABLED_BIT, std::memory_order_acq_rel); tracingHandle[i] = nullptr; } else { tracingHandle[i] = tracingHandle[size - 1]; tracingHandle[size - 1] = nullptr; } UnlockTracingState(); return CL_SUCCESS; } ++i; } UnlockTracingState(); return CL_INVALID_VALUE; } cl_int CL_API_CALL clGetTracingStateINTEL(cl_tracing_handle handle, cl_bool *enable) { if (handle == nullptr || enable == nullptr) { return CL_INVALID_VALUE; } LockTracingState(); *enable = CL_FALSE; size_t i = 0; DEBUG_BREAK_IF(handle->handle == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { if (tracingHandle[i] == handle->handle) { *enable = CL_TRUE; break; } ++i; } UnlockTracingState(); return CL_SUCCESS; } compute-runtime-22.14.22890/opencl/source/tracing/tracing_api.h000066400000000000000000000050721422164147700242160ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/tracing/tracing_types.h" #ifdef __cplusplus extern "C" { #endif /*! Function creates a tracing handle object \param[in] device Device to create tracing handle for \param[in] callback User-defined callback that will be called along with traced API function \param[in] userData Pointer to any data user would like to pass into the callback, can be zero \param[out] handle Tracing handle object that describes current tracing session \return Status code for current operation Thread Safety: yes */ cl_int CL_API_CALL clCreateTracingHandleINTEL(cl_device_id device, cl_tracing_callback callback, void *userData, cl_tracing_handle *handle); /*! Function allows to specify which target API call should be traced. By default function will NOT be traced \param[in] handle Tracing handle object \param[in] fid Target function identifier \param[in] enable Flag to enable/disable tracing for target function \return Status code for current operation Thread Safety: no */ cl_int CL_API_CALL clSetTracingPointINTEL(cl_tracing_handle handle, cl_function_id fid, cl_bool enable); /*! Function destroys the tracing handle object and releases all the associated resources \param[in] handle Tracing handle object \return Status code for current operation Thread Safety: no */ cl_int CL_API_CALL clDestroyTracingHandleINTEL(cl_tracing_handle handle); /*! Function enables the tracing process for the handle. Multiple handles can be enabled at a time \param[in] handle Tracing handle object \return Status code for current operation Thread Safety: yes */ cl_int CL_API_CALL clEnableTracingINTEL(cl_tracing_handle handle); /*! Function disables the tracing process for the handle. It will wait until all currently running callbacks are done \param[in] handle Tracing handle object \return Status code for current operation Thread Safety: yes */ cl_int CL_API_CALL clDisableTracingINTEL(cl_tracing_handle handle); /*! Function requests the tracing state for the handle \param[in] handle Tracing handle object \param[out] enable Returns TRUE if tracing handle is in use and FALSE otherwise \return Status code for current operation Thread Safety: yes */ cl_int CL_API_CALL clGetTracingStateINTEL(cl_tracing_handle handle, cl_bool *enable); #ifdef __cplusplus } #endif compute-runtime-22.14.22890/opencl/source/tracing/tracing_handle.h000066400000000000000000000022001422164147700246660ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/debug_helpers.h" #include "opencl/source/tracing/tracing_types.h" #include #include namespace HostSideTracing { struct TracingHandle { public: TracingHandle(cl_tracing_callback callback, void *userData) : callback(callback), userData(userData) {} void call(cl_function_id fid, cl_callback_data *callbackData) { callback(fid, callbackData, userData); } void setTracingPoint(cl_function_id fid, bool enable) { DEBUG_BREAK_IF(static_cast(fid) >= CL_FUNCTION_COUNT); mask[static_cast(fid)] = enable; } bool getTracingPoint(cl_function_id fid) const { DEBUG_BREAK_IF(static_cast(fid) >= CL_FUNCTION_COUNT); return mask[static_cast(fid)]; } private: cl_tracing_callback callback; void *userData; std::bitset mask; }; } // namespace HostSideTracing struct _cl_tracing_handle { cl_device_id device; HostSideTracing::TracingHandle *handle; }; compute-runtime-22.14.22890/opencl/source/tracing/tracing_notify.h000066400000000000000000010745761422164147700247750ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/utilities/cpuintrinsics.h" #include "opencl/source/tracing/tracing_handle.h" #include #include #include namespace HostSideTracing { #define TRACING_SET_ENABLED_BIT(state) ((state) | (HostSideTracing::TRACING_STATE_ENABLED_BIT)) #define TRACING_UNSET_ENABLED_BIT(state) ((state) & (~HostSideTracing::TRACING_STATE_ENABLED_BIT)) #define TRACING_GET_ENABLED_BIT(state) ((state) & (HostSideTracing::TRACING_STATE_ENABLED_BIT)) #define TRACING_SET_LOCKED_BIT(state) ((state) | (HostSideTracing::TRACING_STATE_LOCKED_BIT)) #define TRACING_UNSET_LOCKED_BIT(state) ((state) & (~HostSideTracing::TRACING_STATE_LOCKED_BIT)) #define TRACING_GET_LOCKED_BIT(state) ((state) & (HostSideTracing::TRACING_STATE_LOCKED_BIT)) #define TRACING_ZERO_CLIENT_COUNTER(state) ((state) & (HostSideTracing::TRACING_STATE_ENABLED_BIT | HostSideTracing::TRACING_STATE_LOCKED_BIT)) #define TRACING_GET_CLIENT_COUNTER(state) ((state) & (~(HostSideTracing::TRACING_STATE_ENABLED_BIT | HostSideTracing::TRACING_STATE_LOCKED_BIT))) #define TRACING_ENTER(name, ...) \ bool isHostSideTracingEnabled_##name = false; \ HostSideTracing::name##Tracer tracer_##name; \ if (TRACING_GET_ENABLED_BIT(HostSideTracing::tracingState.load(std::memory_order_acquire))) { \ isHostSideTracingEnabled_##name = HostSideTracing::addTracingClient(); \ if (isHostSideTracingEnabled_##name) { \ tracer_##name.enter(__VA_ARGS__); \ } \ } #define TRACING_EXIT(name, ...) \ if (isHostSideTracingEnabled_##name) { \ tracer_##name.exit(__VA_ARGS__); \ HostSideTracing::removeTracingClient(); \ } typedef enum _tracing_notify_state_t { TRACING_NOTIFY_STATE_NOTHING_CALLED = 0, TRACING_NOTIFY_STATE_ENTER_CALLED = 1, TRACING_NOTIFY_STATE_EXIT_CALLED = 2, } tracing_notify_state_t; constexpr size_t TRACING_MAX_HANDLE_COUNT = 16; constexpr uint32_t TRACING_STATE_ENABLED_BIT = 0x80000000u; constexpr uint32_t TRACING_STATE_LOCKED_BIT = 0x40000000u; extern std::atomic tracingState; extern TracingHandle *tracingHandle[TRACING_MAX_HANDLE_COUNT]; extern std::atomic tracingCorrelationId; bool addTracingClient(); void removeTracingClient(); class AtomicBackoff { public: AtomicBackoff() {} void pause() { if (count < loopsBeforeYield) { for (uint32_t i = 0; i < count; i++) { NEO::CpuIntrinsics::pause(); } count *= 2; } else { std::this_thread::yield(); } } private: static const uint32_t loopsBeforeYield = 16; uint32_t count = 1; }; class clBuildProgramTracer { public: clBuildProgramTracer() {} void enter(cl_program *program, cl_uint *numDevices, const cl_device_id **deviceList, const char **options, void(CL_CALLBACK **funcNotify)(cl_program program, void *userData), void **userData) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.program = program; params.numDevices = numDevices; params.deviceList = deviceList; params.options = options; params.funcNotify = funcNotify; params.userData = userData; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clBuildProgram"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clBuildProgram)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clBuildProgram, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clBuildProgram)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clBuildProgram, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clBuildProgramTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clBuildProgram params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCloneKernelTracer { public: clCloneKernelTracer() {} void enter(cl_kernel *sourceKernel, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.sourceKernel = sourceKernel; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCloneKernel"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCloneKernel)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCloneKernel, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_kernel *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCloneKernel)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCloneKernel, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCloneKernelTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCloneKernel params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCompileProgramTracer { public: clCompileProgramTracer() {} void enter(cl_program *program, cl_uint *numDevices, const cl_device_id **deviceList, const char **options, cl_uint *numInputHeaders, const cl_program **inputHeaders, const char ***headerIncludeNames, void(CL_CALLBACK **funcNotify)(cl_program program, void *userData), void **userData) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.program = program; params.numDevices = numDevices; params.deviceList = deviceList; params.options = options; params.numInputHeaders = numInputHeaders; params.inputHeaders = inputHeaders; params.headerIncludeNames = headerIncludeNames; params.funcNotify = funcNotify; params.userData = userData; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCompileProgram"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCompileProgram)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCompileProgram, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCompileProgram)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCompileProgram, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCompileProgramTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCompileProgram params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreateBufferTracer { public: clCreateBufferTracer() {} void enter(cl_context *context, cl_mem_flags *flags, size_t *size, void **hostPtr, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.flags = flags; params.size = size; params.hostPtr = hostPtr; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateBuffer"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateBuffer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateBuffer, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_mem *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateBuffer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateBuffer, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateBufferTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateBuffer params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreateCommandQueueTracer { public: clCreateCommandQueueTracer() {} void enter(cl_context *context, cl_device_id *device, cl_command_queue_properties *properties, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.device = device; params.properties = properties; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateCommandQueue"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateCommandQueue)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateCommandQueue, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_command_queue *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateCommandQueue)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateCommandQueue, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateCommandQueueTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateCommandQueue params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreateCommandQueueWithPropertiesTracer { public: clCreateCommandQueueWithPropertiesTracer() {} void enter(cl_context *context, cl_device_id *device, const cl_queue_properties **properties, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.device = device; params.properties = properties; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateCommandQueueWithProperties"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateCommandQueueWithProperties)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateCommandQueueWithProperties, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_command_queue *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateCommandQueueWithProperties)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateCommandQueueWithProperties, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateCommandQueueWithPropertiesTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateCommandQueueWithProperties params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreateContextTracer { public: clCreateContextTracer() {} void enter(const cl_context_properties **properties, cl_uint *numDevices, const cl_device_id **devices, void(CL_CALLBACK **funcNotify)(const char *, const void *, size_t, void *), void **userData, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.properties = properties; params.numDevices = numDevices; params.devices = devices; params.funcNotify = funcNotify; params.userData = userData; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateContext"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateContext)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateContext, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_context *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateContext)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateContext, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateContextTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateContext params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreateContextFromTypeTracer { public: clCreateContextFromTypeTracer() {} void enter(const cl_context_properties **properties, cl_device_type *deviceType, void(CL_CALLBACK **funcNotify)(const char *, const void *, size_t, void *), void **userData, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.properties = properties; params.deviceType = deviceType; params.funcNotify = funcNotify; params.userData = userData; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateContextFromType"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateContextFromType)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateContextFromType, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_context *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateContextFromType)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateContextFromType, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateContextFromTypeTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateContextFromType params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreateImageTracer { public: clCreateImageTracer() {} void enter(cl_context *context, cl_mem_flags *flags, const cl_image_format **imageFormat, const cl_image_desc **imageDesc, void **hostPtr, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.flags = flags; params.imageFormat = imageFormat; params.imageDesc = imageDesc; params.hostPtr = hostPtr; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateImage"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateImage)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateImage, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_mem *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateImage)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateImage, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateImageTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateImage params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreateImage2DTracer { public: clCreateImage2DTracer() {} void enter(cl_context *context, cl_mem_flags *flags, const cl_image_format **imageFormat, size_t *imageWidth, size_t *imageHeight, size_t *imageRowPitch, void **hostPtr, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.flags = flags; params.imageFormat = imageFormat; params.imageWidth = imageWidth; params.imageHeight = imageHeight; params.imageRowPitch = imageRowPitch; params.hostPtr = hostPtr; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateImage2D"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateImage2D)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateImage2D, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_mem *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateImage2D)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateImage2D, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateImage2DTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateImage2D params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreateImage3DTracer { public: clCreateImage3DTracer() {} void enter(cl_context *context, cl_mem_flags *flags, const cl_image_format **imageFormat, size_t *imageWidth, size_t *imageHeight, size_t *imageDepth, size_t *imageRowPitch, size_t *imageSlicePitch, void **hostPtr, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.flags = flags; params.imageFormat = imageFormat; params.imageWidth = imageWidth; params.imageHeight = imageHeight; params.imageDepth = imageDepth; params.imageRowPitch = imageRowPitch; params.imageSlicePitch = imageSlicePitch; params.hostPtr = hostPtr; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateImage3D"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateImage3D)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateImage3D, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_mem *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateImage3D)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateImage3D, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateImage3DTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateImage3D params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreateKernelTracer { public: clCreateKernelTracer() {} void enter(cl_program *program, const char **kernelName, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.program = program; params.kernelName = kernelName; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateKernel"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateKernel)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateKernel, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_kernel *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateKernel)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateKernel, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateKernelTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateKernel params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreateKernelsInProgramTracer { public: clCreateKernelsInProgramTracer() {} void enter(cl_program *program, cl_uint *numKernels, cl_kernel **kernels, cl_uint **numKernelsRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.program = program; params.numKernels = numKernels; params.kernels = kernels; params.numKernelsRet = numKernelsRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateKernelsInProgram"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateKernelsInProgram)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateKernelsInProgram, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateKernelsInProgram)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateKernelsInProgram, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateKernelsInProgramTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateKernelsInProgram params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreatePipeTracer { public: clCreatePipeTracer() {} void enter(cl_context *context, cl_mem_flags *flags, cl_uint *pipePacketSize, cl_uint *pipeMaxPackets, const cl_pipe_properties **properties, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.flags = flags; params.pipePacketSize = pipePacketSize; params.pipeMaxPackets = pipeMaxPackets; params.properties = properties; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreatePipe"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreatePipe)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreatePipe, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_mem *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreatePipe)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreatePipe, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreatePipeTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreatePipe params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreateProgramWithBinaryTracer { public: clCreateProgramWithBinaryTracer() {} void enter(cl_context *context, cl_uint *numDevices, const cl_device_id **deviceList, const size_t **lengths, const unsigned char ***binaries, cl_int **binaryStatus, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.numDevices = numDevices; params.deviceList = deviceList; params.lengths = lengths; params.binaries = binaries; params.binaryStatus = binaryStatus; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateProgramWithBinary"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateProgramWithBinary)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateProgramWithBinary, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_program *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateProgramWithBinary)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateProgramWithBinary, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateProgramWithBinaryTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateProgramWithBinary params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreateProgramWithBuiltInKernelsTracer { public: clCreateProgramWithBuiltInKernelsTracer() {} void enter(cl_context *context, cl_uint *numDevices, const cl_device_id **deviceList, const char **kernelNames, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.numDevices = numDevices; params.deviceList = deviceList; params.kernelNames = kernelNames; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateProgramWithBuiltInKernels"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateProgramWithBuiltInKernels)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateProgramWithBuiltInKernels, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_program *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateProgramWithBuiltInKernels)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateProgramWithBuiltInKernels, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateProgramWithBuiltInKernelsTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateProgramWithBuiltInKernels params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreateProgramWithILTracer { public: clCreateProgramWithILTracer() {} void enter(cl_context *context, const void **il, size_t *length, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.il = il; params.length = length; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateProgramWithIL"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateProgramWithIL)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateProgramWithIL, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_program *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateProgramWithIL)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateProgramWithIL, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateProgramWithILTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateProgramWithIL params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreateProgramWithSourceTracer { public: clCreateProgramWithSourceTracer() {} void enter(cl_context *context, cl_uint *count, const char ***strings, const size_t **lengths, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.count = count; params.strings = strings; params.lengths = lengths; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateProgramWithSource"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateProgramWithSource)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateProgramWithSource, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_program *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateProgramWithSource)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateProgramWithSource, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateProgramWithSourceTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateProgramWithSource params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreateSamplerTracer { public: clCreateSamplerTracer() {} void enter(cl_context *context, cl_bool *normalizedCoords, cl_addressing_mode *addressingMode, cl_filter_mode *filterMode, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.normalizedCoords = normalizedCoords; params.addressingMode = addressingMode; params.filterMode = filterMode; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateSampler"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateSampler)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateSampler, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_sampler *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateSampler)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateSampler, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateSamplerTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateSampler params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreateSamplerWithPropertiesTracer { public: clCreateSamplerWithPropertiesTracer() {} void enter(cl_context *context, const cl_sampler_properties **samplerProperties, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.samplerProperties = samplerProperties; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateSamplerWithProperties"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateSamplerWithProperties)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateSamplerWithProperties, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_sampler *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateSamplerWithProperties)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateSamplerWithProperties, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateSamplerWithPropertiesTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateSamplerWithProperties params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreateSubBufferTracer { public: clCreateSubBufferTracer() {} void enter(cl_mem *buffer, cl_mem_flags *flags, cl_buffer_create_type *bufferCreateType, const void **bufferCreateInfo, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.buffer = buffer; params.flags = flags; params.bufferCreateType = bufferCreateType; params.bufferCreateInfo = bufferCreateInfo; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateSubBuffer"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateSubBuffer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateSubBuffer, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_mem *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateSubBuffer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateSubBuffer, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateSubBufferTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateSubBuffer params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreateUserEventTracer { public: clCreateUserEventTracer() {} void enter(cl_context *context, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateUserEvent"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateUserEvent)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateUserEvent, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_event *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateUserEvent)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateUserEvent, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateUserEventTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateUserEvent params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueBarrierTracer { public: clEnqueueBarrierTracer() {} void enter(cl_command_queue *commandQueue) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueBarrier"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueBarrier)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueBarrier, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueBarrier)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueBarrier, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueBarrierTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueBarrier params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueBarrierWithWaitListTracer { public: clEnqueueBarrierWithWaitListTracer() {} void enter(cl_command_queue *commandQueue, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueBarrierWithWaitList"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueBarrierWithWaitList)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueBarrierWithWaitList, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueBarrierWithWaitList)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueBarrierWithWaitList, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueBarrierWithWaitListTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueBarrierWithWaitList params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueCopyBufferTracer { public: clEnqueueCopyBufferTracer() {} void enter(cl_command_queue *commandQueue, cl_mem *srcBuffer, cl_mem *dstBuffer, size_t *srcOffset, size_t *dstOffset, size_t *cb, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.srcBuffer = srcBuffer; params.dstBuffer = dstBuffer; params.srcOffset = srcOffset; params.dstOffset = dstOffset; params.cb = cb; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueCopyBuffer"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueCopyBuffer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueCopyBuffer, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueCopyBuffer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueCopyBuffer, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueCopyBufferTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueCopyBuffer params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueCopyBufferRectTracer { public: clEnqueueCopyBufferRectTracer() {} void enter(cl_command_queue *commandQueue, cl_mem *srcBuffer, cl_mem *dstBuffer, const size_t **srcOrigin, const size_t **dstOrigin, const size_t **region, size_t *srcRowPitch, size_t *srcSlicePitch, size_t *dstRowPitch, size_t *dstSlicePitch, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.srcBuffer = srcBuffer; params.dstBuffer = dstBuffer; params.srcOrigin = srcOrigin; params.dstOrigin = dstOrigin; params.region = region; params.srcRowPitch = srcRowPitch; params.srcSlicePitch = srcSlicePitch; params.dstRowPitch = dstRowPitch; params.dstSlicePitch = dstSlicePitch; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueCopyBufferRect"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueCopyBufferRect)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueCopyBufferRect, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueCopyBufferRect)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueCopyBufferRect, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueCopyBufferRectTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueCopyBufferRect params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueCopyBufferToImageTracer { public: clEnqueueCopyBufferToImageTracer() {} void enter(cl_command_queue *commandQueue, cl_mem *srcBuffer, cl_mem *dstImage, size_t *srcOffset, const size_t **dstOrigin, const size_t **region, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.srcBuffer = srcBuffer; params.dstImage = dstImage; params.srcOffset = srcOffset; params.dstOrigin = dstOrigin; params.region = region; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueCopyBufferToImage"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueCopyBufferToImage)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueCopyBufferToImage, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueCopyBufferToImage)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueCopyBufferToImage, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueCopyBufferToImageTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueCopyBufferToImage params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueCopyImageTracer { public: clEnqueueCopyImageTracer() {} void enter(cl_command_queue *commandQueue, cl_mem *srcImage, cl_mem *dstImage, const size_t **srcOrigin, const size_t **dstOrigin, const size_t **region, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.srcImage = srcImage; params.dstImage = dstImage; params.srcOrigin = srcOrigin; params.dstOrigin = dstOrigin; params.region = region; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueCopyImage"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueCopyImage)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueCopyImage, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueCopyImage)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueCopyImage, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueCopyImageTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueCopyImage params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueCopyImageToBufferTracer { public: clEnqueueCopyImageToBufferTracer() {} void enter(cl_command_queue *commandQueue, cl_mem *srcImage, cl_mem *dstBuffer, const size_t **srcOrigin, const size_t **region, size_t *dstOffset, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.srcImage = srcImage; params.dstBuffer = dstBuffer; params.srcOrigin = srcOrigin; params.region = region; params.dstOffset = dstOffset; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueCopyImageToBuffer"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueCopyImageToBuffer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueCopyImageToBuffer, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueCopyImageToBuffer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueCopyImageToBuffer, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueCopyImageToBufferTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueCopyImageToBuffer params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueFillBufferTracer { public: clEnqueueFillBufferTracer() {} void enter(cl_command_queue *commandQueue, cl_mem *buffer, const void **pattern, size_t *patternSize, size_t *offset, size_t *size, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.buffer = buffer; params.pattern = pattern; params.patternSize = patternSize; params.offset = offset; params.size = size; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueFillBuffer"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueFillBuffer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueFillBuffer, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueFillBuffer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueFillBuffer, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueFillBufferTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueFillBuffer params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueFillImageTracer { public: clEnqueueFillImageTracer() {} void enter(cl_command_queue *commandQueue, cl_mem *image, const void **fillColor, const size_t **origin, const size_t **region, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.image = image; params.fillColor = fillColor; params.origin = origin; params.region = region; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueFillImage"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueFillImage)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueFillImage, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueFillImage)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueFillImage, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueFillImageTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueFillImage params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueMapBufferTracer { public: clEnqueueMapBufferTracer() {} void enter(cl_command_queue *commandQueue, cl_mem *buffer, cl_bool *blockingMap, cl_map_flags *mapFlags, size_t *offset, size_t *cb, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.buffer = buffer; params.blockingMap = blockingMap; params.mapFlags = mapFlags; params.offset = offset; params.cb = cb; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueMapBuffer"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueMapBuffer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueMapBuffer, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(void **retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueMapBuffer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueMapBuffer, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueMapBufferTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueMapBuffer params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueMapImageTracer { public: clEnqueueMapImageTracer() {} void enter(cl_command_queue *commandQueue, cl_mem *image, cl_bool *blockingMap, cl_map_flags *mapFlags, const size_t **origin, const size_t **region, size_t **imageRowPitch, size_t **imageSlicePitch, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.image = image; params.blockingMap = blockingMap; params.mapFlags = mapFlags; params.origin = origin; params.region = region; params.imageRowPitch = imageRowPitch; params.imageSlicePitch = imageSlicePitch; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueMapImage"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueMapImage)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueMapImage, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(void **retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueMapImage)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueMapImage, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueMapImageTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueMapImage params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueMarkerTracer { public: clEnqueueMarkerTracer() {} void enter(cl_command_queue *commandQueue, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueMarker"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueMarker)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueMarker, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueMarker)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueMarker, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueMarkerTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueMarker params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueMarkerWithWaitListTracer { public: clEnqueueMarkerWithWaitListTracer() {} void enter(cl_command_queue *commandQueue, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueMarkerWithWaitList"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueMarkerWithWaitList)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueMarkerWithWaitList, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueMarkerWithWaitList)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueMarkerWithWaitList, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueMarkerWithWaitListTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueMarkerWithWaitList params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueMigrateMemObjectsTracer { public: clEnqueueMigrateMemObjectsTracer() {} void enter(cl_command_queue *commandQueue, cl_uint *numMemObjects, const cl_mem **memObjects, cl_mem_migration_flags *flags, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.numMemObjects = numMemObjects; params.memObjects = memObjects; params.flags = flags; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueMigrateMemObjects"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueMigrateMemObjects)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueMigrateMemObjects, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueMigrateMemObjects)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueMigrateMemObjects, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueMigrateMemObjectsTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueMigrateMemObjects params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueNDRangeKernelTracer { public: clEnqueueNDRangeKernelTracer() {} void enter(cl_command_queue *commandQueue, cl_kernel *kernel, cl_uint *workDim, const size_t **globalWorkOffset, const size_t **globalWorkSize, const size_t **localWorkSize, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.kernel = kernel; params.workDim = workDim; params.globalWorkOffset = globalWorkOffset; params.globalWorkSize = globalWorkSize; params.localWorkSize = localWorkSize; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueNDRangeKernel"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueNDRangeKernel)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueNDRangeKernel, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueNDRangeKernel)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueNDRangeKernel, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueNDRangeKernelTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueNDRangeKernel params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueNativeKernelTracer { public: clEnqueueNativeKernelTracer() {} void enter(cl_command_queue *commandQueue, void(CL_CALLBACK **userFunc)(void *), void **args, size_t *cbArgs, cl_uint *numMemObjects, const cl_mem **memList, const void ***argsMemLoc, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.userFunc = userFunc; params.args = args; params.cbArgs = cbArgs; params.numMemObjects = numMemObjects; params.memList = memList; params.argsMemLoc = argsMemLoc; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueNativeKernel"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueNativeKernel)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueNativeKernel, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueNativeKernel)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueNativeKernel, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueNativeKernelTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueNativeKernel params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueReadBufferTracer { public: clEnqueueReadBufferTracer() {} void enter(cl_command_queue *commandQueue, cl_mem *buffer, cl_bool *blockingRead, size_t *offset, size_t *cb, void **ptr, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.buffer = buffer; params.blockingRead = blockingRead; params.offset = offset; params.cb = cb; params.ptr = ptr; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueReadBuffer"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueReadBuffer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueReadBuffer, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueReadBuffer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueReadBuffer, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueReadBufferTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueReadBuffer params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueReadBufferRectTracer { public: clEnqueueReadBufferRectTracer() {} void enter(cl_command_queue *commandQueue, cl_mem *buffer, cl_bool *blockingRead, const size_t **bufferOrigin, const size_t **hostOrigin, const size_t **region, size_t *bufferRowPitch, size_t *bufferSlicePitch, size_t *hostRowPitch, size_t *hostSlicePitch, void **ptr, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.buffer = buffer; params.blockingRead = blockingRead; params.bufferOrigin = bufferOrigin; params.hostOrigin = hostOrigin; params.region = region; params.bufferRowPitch = bufferRowPitch; params.bufferSlicePitch = bufferSlicePitch; params.hostRowPitch = hostRowPitch; params.hostSlicePitch = hostSlicePitch; params.ptr = ptr; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueReadBufferRect"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueReadBufferRect)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueReadBufferRect, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueReadBufferRect)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueReadBufferRect, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueReadBufferRectTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueReadBufferRect params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueReadImageTracer { public: clEnqueueReadImageTracer() {} void enter(cl_command_queue *commandQueue, cl_mem *image, cl_bool *blockingRead, const size_t **origin, const size_t **region, size_t *rowPitch, size_t *slicePitch, void **ptr, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.image = image; params.blockingRead = blockingRead; params.origin = origin; params.region = region; params.rowPitch = rowPitch; params.slicePitch = slicePitch; params.ptr = ptr; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueReadImage"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueReadImage)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueReadImage, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueReadImage)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueReadImage, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueReadImageTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueReadImage params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueSVMFreeTracer { public: clEnqueueSVMFreeTracer() {} void enter(cl_command_queue *commandQueue, cl_uint *numSvmPointers, void ***svmPointers, void(CL_CALLBACK **pfnFreeFunc)(cl_command_queue queue, cl_uint numSvmPointers, void **svmPointers, void *userData), void **userData, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.numSvmPointers = numSvmPointers; params.svmPointers = svmPointers; params.pfnFreeFunc = pfnFreeFunc; params.userData = userData; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueSVMFree"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueSVMFree)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueSVMFree, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueSVMFree)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueSVMFree, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueSVMFreeTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueSVMFree params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueSVMMapTracer { public: clEnqueueSVMMapTracer() {} void enter(cl_command_queue *commandQueue, cl_bool *blockingMap, cl_map_flags *mapFlags, void **svmPtr, size_t *size, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.blockingMap = blockingMap; params.mapFlags = mapFlags; params.svmPtr = svmPtr; params.size = size; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueSVMMap"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueSVMMap)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueSVMMap, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueSVMMap)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueSVMMap, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueSVMMapTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueSVMMap params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueSVMMemFillTracer { public: clEnqueueSVMMemFillTracer() {} void enter(cl_command_queue *commandQueue, void **svmPtr, const void **pattern, size_t *patternSize, size_t *size, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.svmPtr = svmPtr; params.pattern = pattern; params.patternSize = patternSize; params.size = size; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueSVMMemFill"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueSVMMemFill)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueSVMMemFill, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueSVMMemFill)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueSVMMemFill, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueSVMMemFillTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueSVMMemFill params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueSVMMemcpyTracer { public: clEnqueueSVMMemcpyTracer() {} void enter(cl_command_queue *commandQueue, cl_bool *blockingCopy, void **dstPtr, const void **srcPtr, size_t *size, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.blockingCopy = blockingCopy; params.dstPtr = dstPtr; params.srcPtr = srcPtr; params.size = size; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueSVMMemcpy"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueSVMMemcpy)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueSVMMemcpy, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueSVMMemcpy)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueSVMMemcpy, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueSVMMemcpyTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueSVMMemcpy params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueSVMMigrateMemTracer { public: clEnqueueSVMMigrateMemTracer() {} void enter(cl_command_queue *commandQueue, cl_uint *numSvmPointers, const void ***svmPointers, const size_t **sizes, const cl_mem_migration_flags *flags, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.numSvmPointers = numSvmPointers; params.svmPointers = svmPointers; params.sizes = sizes; params.flags = flags; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueSVMMigrateMem"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueSVMMigrateMem)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueSVMMigrateMem, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueSVMMigrateMem)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueSVMMigrateMem, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueSVMMigrateMemTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueSVMMigrateMem params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueSVMUnmapTracer { public: clEnqueueSVMUnmapTracer() {} void enter(cl_command_queue *commandQueue, void **svmPtr, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.svmPtr = svmPtr; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueSVMUnmap"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueSVMUnmap)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueSVMUnmap, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueSVMUnmap)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueSVMUnmap, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueSVMUnmapTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueSVMUnmap params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueTaskTracer { public: clEnqueueTaskTracer() {} void enter(cl_command_queue *commandQueue, cl_kernel *kernel, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.kernel = kernel; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueTask"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueTask)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueTask, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueTask)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueTask, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueTaskTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueTask params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueUnmapMemObjectTracer { public: clEnqueueUnmapMemObjectTracer() {} void enter(cl_command_queue *commandQueue, cl_mem *memobj, void **mappedPtr, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.memobj = memobj; params.mappedPtr = mappedPtr; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueUnmapMemObject"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueUnmapMemObject)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueUnmapMemObject, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueUnmapMemObject)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueUnmapMemObject, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueUnmapMemObjectTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueUnmapMemObject params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueWaitForEventsTracer { public: clEnqueueWaitForEventsTracer() {} void enter(cl_command_queue *commandQueue, cl_uint *numEvents, const cl_event **eventList) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.numEvents = numEvents; params.eventList = eventList; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueWaitForEvents"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueWaitForEvents)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueWaitForEvents, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueWaitForEvents)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueWaitForEvents, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueWaitForEventsTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueWaitForEvents params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueWriteBufferTracer { public: clEnqueueWriteBufferTracer() {} void enter(cl_command_queue *commandQueue, cl_mem *buffer, cl_bool *blockingWrite, size_t *offset, size_t *cb, const void **ptr, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.buffer = buffer; params.blockingWrite = blockingWrite; params.offset = offset; params.cb = cb; params.ptr = ptr; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueWriteBuffer"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueWriteBuffer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueWriteBuffer, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueWriteBuffer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueWriteBuffer, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueWriteBufferTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueWriteBuffer params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueWriteBufferRectTracer { public: clEnqueueWriteBufferRectTracer() {} void enter(cl_command_queue *commandQueue, cl_mem *buffer, cl_bool *blockingWrite, const size_t **bufferOrigin, const size_t **hostOrigin, const size_t **region, size_t *bufferRowPitch, size_t *bufferSlicePitch, size_t *hostRowPitch, size_t *hostSlicePitch, const void **ptr, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.buffer = buffer; params.blockingWrite = blockingWrite; params.bufferOrigin = bufferOrigin; params.hostOrigin = hostOrigin; params.region = region; params.bufferRowPitch = bufferRowPitch; params.bufferSlicePitch = bufferSlicePitch; params.hostRowPitch = hostRowPitch; params.hostSlicePitch = hostSlicePitch; params.ptr = ptr; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueWriteBufferRect"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueWriteBufferRect)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueWriteBufferRect, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueWriteBufferRect)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueWriteBufferRect, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueWriteBufferRectTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueWriteBufferRect params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueWriteImageTracer { public: clEnqueueWriteImageTracer() {} void enter(cl_command_queue *commandQueue, cl_mem *image, cl_bool *blockingWrite, const size_t **origin, const size_t **region, size_t *inputRowPitch, size_t *inputSlicePitch, const void **ptr, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.image = image; params.blockingWrite = blockingWrite; params.origin = origin; params.region = region; params.inputRowPitch = inputRowPitch; params.inputSlicePitch = inputSlicePitch; params.ptr = ptr; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueWriteImage"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueWriteImage)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueWriteImage, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueWriteImage)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueWriteImage, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueWriteImageTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueWriteImage params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clFinishTracer { public: clFinishTracer() {} void enter(cl_command_queue *commandQueue) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clFinish"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clFinish)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clFinish, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clFinish)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clFinish, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clFinishTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clFinish params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clFlushTracer { public: clFlushTracer() {} void enter(cl_command_queue *commandQueue) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clFlush"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clFlush)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clFlush, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clFlush)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clFlush, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clFlushTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clFlush params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetCommandQueueInfoTracer { public: clGetCommandQueueInfoTracer() {} void enter(cl_command_queue *commandQueue, cl_command_queue_info *paramName, size_t *paramValueSize, void **paramValue, size_t **paramValueSizeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.paramName = paramName; params.paramValueSize = paramValueSize; params.paramValue = paramValue; params.paramValueSizeRet = paramValueSizeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetCommandQueueInfo"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetCommandQueueInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetCommandQueueInfo, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetCommandQueueInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetCommandQueueInfo, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetCommandQueueInfoTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetCommandQueueInfo params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetContextInfoTracer { public: clGetContextInfoTracer() {} void enter(cl_context *context, cl_context_info *paramName, size_t *paramValueSize, void **paramValue, size_t **paramValueSizeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.paramName = paramName; params.paramValueSize = paramValueSize; params.paramValue = paramValue; params.paramValueSizeRet = paramValueSizeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetContextInfo"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetContextInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetContextInfo, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetContextInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetContextInfo, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetContextInfoTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetContextInfo params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetDeviceAndHostTimerTracer { public: clGetDeviceAndHostTimerTracer() {} void enter(cl_device_id *device, cl_ulong **deviceTimestamp, cl_ulong **hostTimestamp) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.device = device; params.deviceTimestamp = deviceTimestamp; params.hostTimestamp = hostTimestamp; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetDeviceAndHostTimer"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetDeviceAndHostTimer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetDeviceAndHostTimer, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetDeviceAndHostTimer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetDeviceAndHostTimer, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetDeviceAndHostTimerTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetDeviceAndHostTimer params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetDeviceIDsTracer { public: clGetDeviceIDsTracer() {} void enter(cl_platform_id *platform, cl_device_type *deviceType, cl_uint *numEntries, cl_device_id **devices, cl_uint **numDevices) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.platform = platform; params.deviceType = deviceType; params.numEntries = numEntries; params.devices = devices; params.numDevices = numDevices; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetDeviceIDs"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetDeviceIDs)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetDeviceIDs, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetDeviceIDs)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetDeviceIDs, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetDeviceIDsTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetDeviceIDs params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetDeviceInfoTracer { public: clGetDeviceInfoTracer() {} void enter(cl_device_id *device, cl_device_info *paramName, size_t *paramValueSize, void **paramValue, size_t **paramValueSizeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.device = device; params.paramName = paramName; params.paramValueSize = paramValueSize; params.paramValue = paramValue; params.paramValueSizeRet = paramValueSizeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetDeviceInfo"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetDeviceInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetDeviceInfo, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetDeviceInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetDeviceInfo, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetDeviceInfoTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetDeviceInfo params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetEventInfoTracer { public: clGetEventInfoTracer() {} void enter(cl_event *event, cl_event_info *paramName, size_t *paramValueSize, void **paramValue, size_t **paramValueSizeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.event = event; params.paramName = paramName; params.paramValueSize = paramValueSize; params.paramValue = paramValue; params.paramValueSizeRet = paramValueSizeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetEventInfo"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetEventInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetEventInfo, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetEventInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetEventInfo, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetEventInfoTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetEventInfo params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetEventProfilingInfoTracer { public: clGetEventProfilingInfoTracer() {} void enter(cl_event *event, cl_profiling_info *paramName, size_t *paramValueSize, void **paramValue, size_t **paramValueSizeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.event = event; params.paramName = paramName; params.paramValueSize = paramValueSize; params.paramValue = paramValue; params.paramValueSizeRet = paramValueSizeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetEventProfilingInfo"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetEventProfilingInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetEventProfilingInfo, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetEventProfilingInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetEventProfilingInfo, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetEventProfilingInfoTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetEventProfilingInfo params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetExtensionFunctionAddressTracer { public: clGetExtensionFunctionAddressTracer() {} void enter(const char **funcName) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.funcName = funcName; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetExtensionFunctionAddress"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetExtensionFunctionAddress)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetExtensionFunctionAddress, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(void **retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetExtensionFunctionAddress)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetExtensionFunctionAddress, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetExtensionFunctionAddressTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetExtensionFunctionAddress params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetExtensionFunctionAddressForPlatformTracer { public: clGetExtensionFunctionAddressForPlatformTracer() {} void enter(cl_platform_id *platform, const char **funcName) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.platform = platform; params.funcName = funcName; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetExtensionFunctionAddressForPlatform"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetExtensionFunctionAddressForPlatform)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetExtensionFunctionAddressForPlatform, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(void **retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetExtensionFunctionAddressForPlatform)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetExtensionFunctionAddressForPlatform, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetExtensionFunctionAddressForPlatformTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetExtensionFunctionAddressForPlatform params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetHostTimerTracer { public: clGetHostTimerTracer() {} void enter(cl_device_id *device, cl_ulong **hostTimestamp) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.device = device; params.hostTimestamp = hostTimestamp; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetHostTimer"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetHostTimer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetHostTimer, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetHostTimer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetHostTimer, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetHostTimerTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetHostTimer params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetImageInfoTracer { public: clGetImageInfoTracer() {} void enter(cl_mem *image, cl_image_info *paramName, size_t *paramValueSize, void **paramValue, size_t **paramValueSizeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.image = image; params.paramName = paramName; params.paramValueSize = paramValueSize; params.paramValue = paramValue; params.paramValueSizeRet = paramValueSizeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetImageInfo"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetImageInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetImageInfo, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetImageInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetImageInfo, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetImageInfoTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetImageInfo params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetKernelArgInfoTracer { public: clGetKernelArgInfoTracer() {} void enter(cl_kernel *kernel, cl_uint *argIndx, cl_kernel_arg_info *paramName, size_t *paramValueSize, void **paramValue, size_t **paramValueSizeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.kernel = kernel; params.argIndx = argIndx; params.paramName = paramName; params.paramValueSize = paramValueSize; params.paramValue = paramValue; params.paramValueSizeRet = paramValueSizeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetKernelArgInfo"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetKernelArgInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetKernelArgInfo, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetKernelArgInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetKernelArgInfo, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetKernelArgInfoTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetKernelArgInfo params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetKernelInfoTracer { public: clGetKernelInfoTracer() {} void enter(cl_kernel *kernel, cl_kernel_info *paramName, size_t *paramValueSize, void **paramValue, size_t **paramValueSizeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.kernel = kernel; params.paramName = paramName; params.paramValueSize = paramValueSize; params.paramValue = paramValue; params.paramValueSizeRet = paramValueSizeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetKernelInfo"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetKernelInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetKernelInfo, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetKernelInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetKernelInfo, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetKernelInfoTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetKernelInfo params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetKernelSubGroupInfoTracer { public: clGetKernelSubGroupInfoTracer() {} void enter(cl_kernel *kernel, cl_device_id *device, cl_kernel_sub_group_info *paramName, size_t *inputValueSize, const void **inputValue, size_t *paramValueSize, void **paramValue, size_t **paramValueSizeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.kernel = kernel; params.device = device; params.paramName = paramName; params.inputValueSize = inputValueSize; params.inputValue = inputValue; params.paramValueSize = paramValueSize; params.paramValue = paramValue; params.paramValueSizeRet = paramValueSizeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetKernelSubGroupInfo"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetKernelSubGroupInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetKernelSubGroupInfo, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetKernelSubGroupInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetKernelSubGroupInfo, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetKernelSubGroupInfoTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetKernelSubGroupInfo params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetKernelWorkGroupInfoTracer { public: clGetKernelWorkGroupInfoTracer() {} void enter(cl_kernel *kernel, cl_device_id *device, cl_kernel_work_group_info *paramName, size_t *paramValueSize, void **paramValue, size_t **paramValueSizeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.kernel = kernel; params.device = device; params.paramName = paramName; params.paramValueSize = paramValueSize; params.paramValue = paramValue; params.paramValueSizeRet = paramValueSizeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetKernelWorkGroupInfo"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetKernelWorkGroupInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetKernelWorkGroupInfo, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetKernelWorkGroupInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetKernelWorkGroupInfo, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetKernelWorkGroupInfoTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetKernelWorkGroupInfo params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetMemObjectInfoTracer { public: clGetMemObjectInfoTracer() {} void enter(cl_mem *memobj, cl_mem_info *paramName, size_t *paramValueSize, void **paramValue, size_t **paramValueSizeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.memobj = memobj; params.paramName = paramName; params.paramValueSize = paramValueSize; params.paramValue = paramValue; params.paramValueSizeRet = paramValueSizeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetMemObjectInfo"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetMemObjectInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetMemObjectInfo, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetMemObjectInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetMemObjectInfo, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetMemObjectInfoTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetMemObjectInfo params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetPipeInfoTracer { public: clGetPipeInfoTracer() {} void enter(cl_mem *pipe, cl_pipe_info *paramName, size_t *paramValueSize, void **paramValue, size_t **paramValueSizeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.pipe = pipe; params.paramName = paramName; params.paramValueSize = paramValueSize; params.paramValue = paramValue; params.paramValueSizeRet = paramValueSizeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetPipeInfo"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetPipeInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetPipeInfo, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetPipeInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetPipeInfo, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetPipeInfoTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetPipeInfo params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetPlatformIDsTracer { public: clGetPlatformIDsTracer() {} void enter(cl_uint *numEntries, cl_platform_id **platforms, cl_uint **numPlatforms) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.numEntries = numEntries; params.platforms = platforms; params.numPlatforms = numPlatforms; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetPlatformIDs"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetPlatformIDs)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetPlatformIDs, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetPlatformIDs)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetPlatformIDs, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetPlatformIDsTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetPlatformIDs params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetPlatformInfoTracer { public: clGetPlatformInfoTracer() {} void enter(cl_platform_id *platform, cl_platform_info *paramName, size_t *paramValueSize, void **paramValue, size_t **paramValueSizeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.platform = platform; params.paramName = paramName; params.paramValueSize = paramValueSize; params.paramValue = paramValue; params.paramValueSizeRet = paramValueSizeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetPlatformInfo"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetPlatformInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetPlatformInfo, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetPlatformInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetPlatformInfo, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetPlatformInfoTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetPlatformInfo params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetProgramBuildInfoTracer { public: clGetProgramBuildInfoTracer() {} void enter(cl_program *program, cl_device_id *device, cl_program_build_info *paramName, size_t *paramValueSize, void **paramValue, size_t **paramValueSizeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.program = program; params.device = device; params.paramName = paramName; params.paramValueSize = paramValueSize; params.paramValue = paramValue; params.paramValueSizeRet = paramValueSizeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetProgramBuildInfo"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetProgramBuildInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetProgramBuildInfo, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetProgramBuildInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetProgramBuildInfo, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetProgramBuildInfoTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetProgramBuildInfo params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetProgramInfoTracer { public: clGetProgramInfoTracer() {} void enter(cl_program *program, cl_program_info *paramName, size_t *paramValueSize, void **paramValue, size_t **paramValueSizeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.program = program; params.paramName = paramName; params.paramValueSize = paramValueSize; params.paramValue = paramValue; params.paramValueSizeRet = paramValueSizeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetProgramInfo"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetProgramInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetProgramInfo, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetProgramInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetProgramInfo, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetProgramInfoTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetProgramInfo params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetSamplerInfoTracer { public: clGetSamplerInfoTracer() {} void enter(cl_sampler *sampler, cl_sampler_info *paramName, size_t *paramValueSize, void **paramValue, size_t **paramValueSizeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.sampler = sampler; params.paramName = paramName; params.paramValueSize = paramValueSize; params.paramValue = paramValue; params.paramValueSizeRet = paramValueSizeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetSamplerInfo"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetSamplerInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetSamplerInfo, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetSamplerInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetSamplerInfo, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetSamplerInfoTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetSamplerInfo params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetSupportedImageFormatsTracer { public: clGetSupportedImageFormatsTracer() {} void enter(cl_context *context, cl_mem_flags *flags, cl_mem_object_type *imageType, cl_uint *numEntries, cl_image_format **imageFormats, cl_uint **numImageFormats) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.flags = flags; params.imageType = imageType; params.numEntries = numEntries; params.imageFormats = imageFormats; params.numImageFormats = numImageFormats; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetSupportedImageFormats"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetSupportedImageFormats)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetSupportedImageFormats, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetSupportedImageFormats)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetSupportedImageFormats, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetSupportedImageFormatsTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetSupportedImageFormats params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clLinkProgramTracer { public: clLinkProgramTracer() {} void enter(cl_context *context, cl_uint *numDevices, const cl_device_id **deviceList, const char **options, cl_uint *numInputPrograms, const cl_program **inputPrograms, void(CL_CALLBACK **funcNotify)(cl_program program, void *userData), void **userData, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.numDevices = numDevices; params.deviceList = deviceList; params.options = options; params.numInputPrograms = numInputPrograms; params.inputPrograms = inputPrograms; params.funcNotify = funcNotify; params.userData = userData; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clLinkProgram"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clLinkProgram)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clLinkProgram, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_program *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clLinkProgram)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clLinkProgram, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clLinkProgramTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clLinkProgram params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clReleaseCommandQueueTracer { public: clReleaseCommandQueueTracer() {} void enter(cl_command_queue *commandQueue) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clReleaseCommandQueue"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clReleaseCommandQueue)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clReleaseCommandQueue, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clReleaseCommandQueue)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clReleaseCommandQueue, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clReleaseCommandQueueTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clReleaseCommandQueue params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clReleaseContextTracer { public: clReleaseContextTracer() {} void enter(cl_context *context) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clReleaseContext"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clReleaseContext)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clReleaseContext, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clReleaseContext)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clReleaseContext, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clReleaseContextTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clReleaseContext params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clReleaseDeviceTracer { public: clReleaseDeviceTracer() {} void enter(cl_device_id *device) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.device = device; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clReleaseDevice"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clReleaseDevice)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clReleaseDevice, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clReleaseDevice)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clReleaseDevice, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clReleaseDeviceTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clReleaseDevice params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clReleaseEventTracer { public: clReleaseEventTracer() {} void enter(cl_event *event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clReleaseEvent"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clReleaseEvent)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clReleaseEvent, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clReleaseEvent)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clReleaseEvent, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clReleaseEventTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clReleaseEvent params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clReleaseKernelTracer { public: clReleaseKernelTracer() {} void enter(cl_kernel *kernel) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.kernel = kernel; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clReleaseKernel"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clReleaseKernel)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clReleaseKernel, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clReleaseKernel)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clReleaseKernel, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clReleaseKernelTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clReleaseKernel params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clReleaseMemObjectTracer { public: clReleaseMemObjectTracer() {} void enter(cl_mem *memobj) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.memobj = memobj; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clReleaseMemObject"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clReleaseMemObject)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clReleaseMemObject, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clReleaseMemObject)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clReleaseMemObject, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clReleaseMemObjectTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clReleaseMemObject params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clReleaseProgramTracer { public: clReleaseProgramTracer() {} void enter(cl_program *program) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.program = program; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clReleaseProgram"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clReleaseProgram)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clReleaseProgram, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clReleaseProgram)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clReleaseProgram, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clReleaseProgramTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clReleaseProgram params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clReleaseSamplerTracer { public: clReleaseSamplerTracer() {} void enter(cl_sampler *sampler) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.sampler = sampler; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clReleaseSampler"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clReleaseSampler)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clReleaseSampler, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clReleaseSampler)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clReleaseSampler, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clReleaseSamplerTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clReleaseSampler params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clRetainCommandQueueTracer { public: clRetainCommandQueueTracer() {} void enter(cl_command_queue *commandQueue) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clRetainCommandQueue"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clRetainCommandQueue)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clRetainCommandQueue, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clRetainCommandQueue)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clRetainCommandQueue, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clRetainCommandQueueTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clRetainCommandQueue params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clRetainContextTracer { public: clRetainContextTracer() {} void enter(cl_context *context) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clRetainContext"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clRetainContext)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clRetainContext, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clRetainContext)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clRetainContext, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clRetainContextTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clRetainContext params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clRetainDeviceTracer { public: clRetainDeviceTracer() {} void enter(cl_device_id *device) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.device = device; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clRetainDevice"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clRetainDevice)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clRetainDevice, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clRetainDevice)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clRetainDevice, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clRetainDeviceTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clRetainDevice params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clRetainEventTracer { public: clRetainEventTracer() {} void enter(cl_event *event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clRetainEvent"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clRetainEvent)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clRetainEvent, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clRetainEvent)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clRetainEvent, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clRetainEventTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clRetainEvent params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clRetainKernelTracer { public: clRetainKernelTracer() {} void enter(cl_kernel *kernel) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.kernel = kernel; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clRetainKernel"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clRetainKernel)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clRetainKernel, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clRetainKernel)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clRetainKernel, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clRetainKernelTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clRetainKernel params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clRetainMemObjectTracer { public: clRetainMemObjectTracer() {} void enter(cl_mem *memobj) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.memobj = memobj; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clRetainMemObject"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clRetainMemObject)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clRetainMemObject, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clRetainMemObject)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clRetainMemObject, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clRetainMemObjectTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clRetainMemObject params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clRetainProgramTracer { public: clRetainProgramTracer() {} void enter(cl_program *program) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.program = program; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clRetainProgram"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clRetainProgram)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clRetainProgram, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clRetainProgram)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clRetainProgram, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clRetainProgramTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clRetainProgram params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clRetainSamplerTracer { public: clRetainSamplerTracer() {} void enter(cl_sampler *sampler) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.sampler = sampler; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clRetainSampler"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clRetainSampler)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clRetainSampler, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clRetainSampler)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clRetainSampler, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clRetainSamplerTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clRetainSampler params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clSVMAllocTracer { public: clSVMAllocTracer() {} void enter(cl_context *context, cl_svm_mem_flags *flags, size_t *size, cl_uint *alignment) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.flags = flags; params.size = size; params.alignment = alignment; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clSVMAlloc"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clSVMAlloc)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clSVMAlloc, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(void **retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clSVMAlloc)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clSVMAlloc, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clSVMAllocTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clSVMAlloc params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clSVMFreeTracer { public: clSVMFreeTracer() {} void enter(cl_context *context, void **svmPointer) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.svmPointer = svmPointer; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clSVMFree"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clSVMFree)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clSVMFree, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(void *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clSVMFree)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clSVMFree, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clSVMFreeTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clSVMFree params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clSetCommandQueuePropertyTracer { public: clSetCommandQueuePropertyTracer() {} void enter(cl_command_queue *commandQueue, cl_command_queue_properties *properties, cl_bool *enable, cl_command_queue_properties **oldProperties) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.properties = properties; params.enable = enable; params.oldProperties = oldProperties; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clSetCommandQueueProperty"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clSetCommandQueueProperty)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clSetCommandQueueProperty, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clSetCommandQueueProperty)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clSetCommandQueueProperty, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clSetCommandQueuePropertyTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clSetCommandQueueProperty params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clSetDefaultDeviceCommandQueueTracer { public: clSetDefaultDeviceCommandQueueTracer() {} void enter(cl_context *context, cl_device_id *device, cl_command_queue *commandQueue) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.device = device; params.commandQueue = commandQueue; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clSetDefaultDeviceCommandQueue"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clSetDefaultDeviceCommandQueue)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clSetDefaultDeviceCommandQueue, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clSetDefaultDeviceCommandQueue)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clSetDefaultDeviceCommandQueue, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clSetDefaultDeviceCommandQueueTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clSetDefaultDeviceCommandQueue params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clSetEventCallbackTracer { public: clSetEventCallbackTracer() {} void enter(cl_event *event, cl_int *commandExecCallbackType, void(CL_CALLBACK **funcNotify)(cl_event, cl_int, void *), void **userData) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.event = event; params.commandExecCallbackType = commandExecCallbackType; params.funcNotify = funcNotify; params.userData = userData; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clSetEventCallback"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clSetEventCallback)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clSetEventCallback, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clSetEventCallback)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clSetEventCallback, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clSetEventCallbackTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clSetEventCallback params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clSetKernelArgTracer { public: clSetKernelArgTracer() {} void enter(cl_kernel *kernel, cl_uint *argIndex, size_t *argSize, const void **argValue) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.kernel = kernel; params.argIndex = argIndex; params.argSize = argSize; params.argValue = argValue; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clSetKernelArg"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clSetKernelArg)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clSetKernelArg, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clSetKernelArg)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clSetKernelArg, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clSetKernelArgTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clSetKernelArg params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clSetKernelArgSVMPointerTracer { public: clSetKernelArgSVMPointerTracer() {} void enter(cl_kernel *kernel, cl_uint *argIndex, const void **argValue) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.kernel = kernel; params.argIndex = argIndex; params.argValue = argValue; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clSetKernelArgSVMPointer"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clSetKernelArgSVMPointer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clSetKernelArgSVMPointer, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clSetKernelArgSVMPointer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clSetKernelArgSVMPointer, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clSetKernelArgSVMPointerTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clSetKernelArgSVMPointer params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clSetKernelExecInfoTracer { public: clSetKernelExecInfoTracer() {} void enter(cl_kernel *kernel, cl_kernel_exec_info *paramName, size_t *paramValueSize, const void **paramValue) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.kernel = kernel; params.paramName = paramName; params.paramValueSize = paramValueSize; params.paramValue = paramValue; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clSetKernelExecInfo"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clSetKernelExecInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clSetKernelExecInfo, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clSetKernelExecInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clSetKernelExecInfo, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clSetKernelExecInfoTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clSetKernelExecInfo params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clSetMemObjectDestructorCallbackTracer { public: clSetMemObjectDestructorCallbackTracer() {} void enter(cl_mem *memobj, void(CL_CALLBACK **funcNotify)(cl_mem, void *), void **userData) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.memobj = memobj; params.funcNotify = funcNotify; params.userData = userData; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clSetMemObjectDestructorCallback"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clSetMemObjectDestructorCallback)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clSetMemObjectDestructorCallback, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clSetMemObjectDestructorCallback)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clSetMemObjectDestructorCallback, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clSetMemObjectDestructorCallbackTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clSetMemObjectDestructorCallback params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clSetUserEventStatusTracer { public: clSetUserEventStatusTracer() {} void enter(cl_event *event, cl_int *executionStatus) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.event = event; params.executionStatus = executionStatus; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clSetUserEventStatus"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clSetUserEventStatus)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clSetUserEventStatus, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clSetUserEventStatus)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clSetUserEventStatus, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clSetUserEventStatusTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clSetUserEventStatus params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clUnloadCompilerTracer { public: clUnloadCompilerTracer() {} void enter() { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clUnloadCompiler"; data.functionParams = nullptr; data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clUnloadCompiler)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clUnloadCompiler, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clUnloadCompiler)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clUnloadCompiler, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clUnloadCompilerTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clUnloadPlatformCompilerTracer { public: clUnloadPlatformCompilerTracer() {} void enter(cl_platform_id *platform) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.platform = platform; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clUnloadPlatformCompiler"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clUnloadPlatformCompiler)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clUnloadPlatformCompiler, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clUnloadPlatformCompiler)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clUnloadPlatformCompiler, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clUnloadPlatformCompilerTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clUnloadPlatformCompiler params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clWaitForEventsTracer { public: clWaitForEventsTracer() {} void enter(cl_uint *numEvents, const cl_event **eventList) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.numEvents = numEvents; params.eventList = eventList; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clWaitForEvents"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clWaitForEvents)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clWaitForEvents, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clWaitForEvents)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clWaitForEvents, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clWaitForEventsTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clWaitForEvents params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; #ifdef _WIN32 class clCreateFromGLBufferTracer { public: clCreateFromGLBufferTracer() {} void enter(cl_context *context, cl_mem_flags *flags, cl_GLuint *bufobj, int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.flags = flags; params.bufobj = bufobj; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateFromGLBuffer"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateFromGLBuffer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateFromGLBuffer, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_mem *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateFromGLBuffer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateFromGLBuffer, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateFromGLBufferTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateFromGLBuffer params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreateFromGLRenderbufferTracer { public: clCreateFromGLRenderbufferTracer() {} void enter(cl_context *context, cl_mem_flags *flags, cl_GLuint *renderbuffer, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.flags = flags; params.renderbuffer = renderbuffer; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateFromGLRenderbuffer"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateFromGLRenderbuffer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateFromGLRenderbuffer, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_mem *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateFromGLRenderbuffer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateFromGLRenderbuffer, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateFromGLRenderbufferTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateFromGLRenderbuffer params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreateFromGLTextureTracer { public: clCreateFromGLTextureTracer() {} void enter(cl_context *context, cl_mem_flags *flags, cl_GLenum *target, cl_GLint *miplevel, cl_GLuint *texture, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.flags = flags; params.target = target; params.miplevel = miplevel; params.texture = texture; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateFromGLTexture"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateFromGLTexture)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateFromGLTexture, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_mem *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateFromGLTexture)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateFromGLTexture, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateFromGLTextureTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateFromGLTexture params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreateFromGLTexture2DTracer { public: clCreateFromGLTexture2DTracer() {} void enter(cl_context *context, cl_mem_flags *flags, cl_GLenum *target, cl_GLint *miplevel, cl_GLuint *texture, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.flags = flags; params.target = target; params.miplevel = miplevel; params.texture = texture; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateFromGLTexture2D"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateFromGLTexture2D)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateFromGLTexture2D, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_mem *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateFromGLTexture2D)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateFromGLTexture2D, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateFromGLTexture2DTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateFromGLTexture2D params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreateFromGLTexture3DTracer { public: clCreateFromGLTexture3DTracer() {} void enter(cl_context *context, cl_mem_flags *flags, cl_GLenum *target, cl_GLint *miplevel, cl_GLuint *texture, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.flags = flags; params.target = target; params.miplevel = miplevel; params.texture = texture; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateFromGLTexture3D"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateFromGLTexture3D)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateFromGLTexture3D, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_mem *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateFromGLTexture3D)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateFromGLTexture3D, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateFromGLTexture3DTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateFromGLTexture3D params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueAcquireGLObjectsTracer { public: clEnqueueAcquireGLObjectsTracer() {} void enter(cl_command_queue *commandQueue, cl_uint *numObjects, const cl_mem **memObjects, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.numObjects = numObjects; params.memObjects = memObjects; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueAcquireGLObjects"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueAcquireGLObjects)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueAcquireGLObjects, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueAcquireGLObjects)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueAcquireGLObjects, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueAcquireGLObjectsTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueAcquireGLObjects params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueReleaseGLObjectsTracer { public: clEnqueueReleaseGLObjectsTracer() {} void enter(cl_command_queue *commandQueue, cl_uint *numObjects, const cl_mem **memObjects, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.numObjects = numObjects; params.memObjects = memObjects; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueReleaseGLObjects"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueReleaseGLObjects)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueReleaseGLObjects, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueReleaseGLObjects)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueReleaseGLObjects, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueReleaseGLObjectsTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueReleaseGLObjects params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetGLObjectInfoTracer { public: clGetGLObjectInfoTracer() {} void enter(cl_mem *memobj, cl_gl_object_type **glObjectType, cl_GLuint **glObjectName) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.memobj = memobj; params.glObjectType = glObjectType; params.glObjectName = glObjectName; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetGLObjectInfo"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetGLObjectInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetGLObjectInfo, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetGLObjectInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetGLObjectInfo, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetGLObjectInfoTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetGLObjectInfo params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetGLTextureInfoTracer { public: clGetGLTextureInfoTracer() {} void enter(cl_mem *memobj, cl_gl_texture_info *paramName, size_t *paramValueSize, void **paramValue, size_t **paramValueSizeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.memobj = memobj; params.paramName = paramName; params.paramValueSize = paramValueSize; params.paramValue = paramValue; params.paramValueSizeRet = paramValueSizeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetGLTextureInfo"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetGLTextureInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetGLTextureInfo, &data); } ++i; } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; size_t i = 0; DEBUG_BREAK_IF(tracingHandle[0] == nullptr); while (i < TRACING_MAX_HANDLE_COUNT && tracingHandle[i] != nullptr) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetGLTextureInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetGLTextureInfo, &data); } ++i; } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetGLTextureInfoTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetGLTextureInfo params{}; cl_callback_data data{}; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; #endif } // namespace HostSideTracing compute-runtime-22.14.22890/opencl/source/tracing/tracing_types.h000066400000000000000000001022201422164147700246020ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "CL/cl.h" #include "CL/cl_gl.h" struct _cl_tracing_handle; typedef _cl_tracing_handle *cl_tracing_handle; //! Enumeration of callback call sites typedef enum _cl_callback_site { CL_CALLBACK_SITE_ENTER = 0, //!< Before the function CL_CALLBACK_SITE_EXIT = 1 //!< After the function } cl_callback_site; /*! \brief Callback data structure The structure contains information about the traced function. Function name allows to determine which function is currently traced. Call site is used to determine if the callback was called at the beginning or at the end of function. Correlation ID and Data fields allow to associate the callback on enter with the callback on exit and pass any piece of data between them. Function arguments and return value available both for reading and writing. Return value will be available only within on-exit callback */ typedef struct _cl_callback_data { cl_callback_site site; //!< Call site, can be ENTER or EXIT cl_uint correlationId; //!< Correlation identifier, the same for ENTER //!< and EXIT callbacks cl_ulong *correlationData; //!< Pointer to correlation data repository, //!< can be used to move data from ENTER to //!< EXIT callback const char *functionName; //!< Name of the traced function const void *functionParams; //!< Traced function arguments, should be //!< casted to appropriate params structure void *functionReturnValue; //!< Return value for the traced function } cl_callback_data; //! Enumeration of supported functions for tracing typedef enum _cl_function_id { CL_FUNCTION_clBuildProgram = 0, CL_FUNCTION_clCloneKernel = 1, CL_FUNCTION_clCompileProgram = 2, CL_FUNCTION_clCreateBuffer = 3, CL_FUNCTION_clCreateCommandQueue = 4, CL_FUNCTION_clCreateCommandQueueWithProperties = 5, CL_FUNCTION_clCreateContext = 6, CL_FUNCTION_clCreateContextFromType = 7, CL_FUNCTION_clCreateFromGLBuffer = 8, CL_FUNCTION_clCreateFromGLRenderbuffer = 9, CL_FUNCTION_clCreateFromGLTexture = 10, CL_FUNCTION_clCreateFromGLTexture2D = 11, CL_FUNCTION_clCreateFromGLTexture3D = 12, CL_FUNCTION_clCreateImage = 13, CL_FUNCTION_clCreateImage2D = 14, CL_FUNCTION_clCreateImage3D = 15, CL_FUNCTION_clCreateKernel = 16, CL_FUNCTION_clCreateKernelsInProgram = 17, CL_FUNCTION_clCreatePipe = 18, CL_FUNCTION_clCreateProgramWithBinary = 19, CL_FUNCTION_clCreateProgramWithBuiltInKernels = 20, CL_FUNCTION_clCreateProgramWithIL = 21, CL_FUNCTION_clCreateProgramWithSource = 22, CL_FUNCTION_clCreateSampler = 23, CL_FUNCTION_clCreateSamplerWithProperties = 24, CL_FUNCTION_clCreateSubBuffer = 25, CL_FUNCTION_clCreateSubDevices = 26, CL_FUNCTION_clCreateUserEvent = 27, CL_FUNCTION_clEnqueueAcquireGLObjects = 28, CL_FUNCTION_clEnqueueBarrier = 29, CL_FUNCTION_clEnqueueBarrierWithWaitList = 30, CL_FUNCTION_clEnqueueCopyBuffer = 31, CL_FUNCTION_clEnqueueCopyBufferRect = 32, CL_FUNCTION_clEnqueueCopyBufferToImage = 33, CL_FUNCTION_clEnqueueCopyImage = 34, CL_FUNCTION_clEnqueueCopyImageToBuffer = 35, CL_FUNCTION_clEnqueueFillBuffer = 36, CL_FUNCTION_clEnqueueFillImage = 37, CL_FUNCTION_clEnqueueMapBuffer = 38, CL_FUNCTION_clEnqueueMapImage = 39, CL_FUNCTION_clEnqueueMarker = 40, CL_FUNCTION_clEnqueueMarkerWithWaitList = 41, CL_FUNCTION_clEnqueueMigrateMemObjects = 42, CL_FUNCTION_clEnqueueNDRangeKernel = 43, CL_FUNCTION_clEnqueueNativeKernel = 44, CL_FUNCTION_clEnqueueReadBuffer = 45, CL_FUNCTION_clEnqueueReadBufferRect = 46, CL_FUNCTION_clEnqueueReadImage = 47, CL_FUNCTION_clEnqueueReleaseGLObjects = 48, CL_FUNCTION_clEnqueueSVMFree = 49, CL_FUNCTION_clEnqueueSVMMap = 50, CL_FUNCTION_clEnqueueSVMMemFill = 51, CL_FUNCTION_clEnqueueSVMMemcpy = 52, CL_FUNCTION_clEnqueueSVMMigrateMem = 53, CL_FUNCTION_clEnqueueSVMUnmap = 54, CL_FUNCTION_clEnqueueTask = 55, CL_FUNCTION_clEnqueueUnmapMemObject = 56, CL_FUNCTION_clEnqueueWaitForEvents = 57, CL_FUNCTION_clEnqueueWriteBuffer = 58, CL_FUNCTION_clEnqueueWriteBufferRect = 59, CL_FUNCTION_clEnqueueWriteImage = 60, CL_FUNCTION_clFinish = 61, CL_FUNCTION_clFlush = 62, CL_FUNCTION_clGetCommandQueueInfo = 63, CL_FUNCTION_clGetContextInfo = 64, CL_FUNCTION_clGetDeviceAndHostTimer = 65, CL_FUNCTION_clGetDeviceIDs = 66, CL_FUNCTION_clGetDeviceInfo = 67, CL_FUNCTION_clGetEventInfo = 68, CL_FUNCTION_clGetEventProfilingInfo = 69, CL_FUNCTION_clGetExtensionFunctionAddress = 70, CL_FUNCTION_clGetExtensionFunctionAddressForPlatform = 71, CL_FUNCTION_clGetGLObjectInfo = 72, CL_FUNCTION_clGetGLTextureInfo = 73, CL_FUNCTION_clGetHostTimer = 74, CL_FUNCTION_clGetImageInfo = 75, CL_FUNCTION_clGetKernelArgInfo = 76, CL_FUNCTION_clGetKernelInfo = 77, CL_FUNCTION_clGetKernelSubGroupInfo = 78, CL_FUNCTION_clGetKernelWorkGroupInfo = 79, CL_FUNCTION_clGetMemObjectInfo = 80, CL_FUNCTION_clGetPipeInfo = 81, CL_FUNCTION_clGetPlatformIDs = 82, CL_FUNCTION_clGetPlatformInfo = 83, CL_FUNCTION_clGetProgramBuildInfo = 84, CL_FUNCTION_clGetProgramInfo = 85, CL_FUNCTION_clGetSamplerInfo = 86, CL_FUNCTION_clGetSupportedImageFormats = 87, CL_FUNCTION_clLinkProgram = 88, CL_FUNCTION_clReleaseCommandQueue = 89, CL_FUNCTION_clReleaseContext = 90, CL_FUNCTION_clReleaseDevice = 91, CL_FUNCTION_clReleaseEvent = 92, CL_FUNCTION_clReleaseKernel = 93, CL_FUNCTION_clReleaseMemObject = 94, CL_FUNCTION_clReleaseProgram = 95, CL_FUNCTION_clReleaseSampler = 96, CL_FUNCTION_clRetainCommandQueue = 97, CL_FUNCTION_clRetainContext = 98, CL_FUNCTION_clRetainDevice = 99, CL_FUNCTION_clRetainEvent = 100, CL_FUNCTION_clRetainKernel = 101, CL_FUNCTION_clRetainMemObject = 102, CL_FUNCTION_clRetainProgram = 103, CL_FUNCTION_clRetainSampler = 104, CL_FUNCTION_clSVMAlloc = 105, CL_FUNCTION_clSVMFree = 106, CL_FUNCTION_clSetCommandQueueProperty = 107, CL_FUNCTION_clSetDefaultDeviceCommandQueue = 108, CL_FUNCTION_clSetEventCallback = 109, CL_FUNCTION_clSetKernelArg = 110, CL_FUNCTION_clSetKernelArgSVMPointer = 111, CL_FUNCTION_clSetKernelExecInfo = 112, CL_FUNCTION_clSetMemObjectDestructorCallback = 113, CL_FUNCTION_clSetUserEventStatus = 114, CL_FUNCTION_clUnloadCompiler = 115, CL_FUNCTION_clUnloadPlatformCompiler = 116, CL_FUNCTION_clWaitForEvents = 117, CL_FUNCTION_COUNT = 118, } cl_function_id; /*! User-defined tracing callback prototype \param[in] fid Identifier of the function for which the callback is called \param[in] callbackData Data structure with information about the traced function \param[in] userData User-defined data pointer passed through clCreateTracingHandleINTEL() function Thread Safety: must be guaranteed by customer */ typedef void (*cl_tracing_callback)(cl_function_id fid, cl_callback_data *callbackData, void *userData); typedef struct _cl_params_clBuildProgram { cl_program *program; cl_uint *numDevices; const cl_device_id **deviceList; const char **options; void(CL_CALLBACK **funcNotify)(cl_program program, void *userData); void **userData; } cl_params_clBuildProgram; typedef struct _cl_params_clCloneKernel { cl_kernel *sourceKernel; cl_int **errcodeRet; } cl_params_clCloneKernel; typedef struct _cl_params_clCompileProgram { cl_program *program; cl_uint *numDevices; const cl_device_id **deviceList; const char **options; cl_uint *numInputHeaders; const cl_program **inputHeaders; const char ***headerIncludeNames; void(CL_CALLBACK **funcNotify)(cl_program program, void *userData); void **userData; } cl_params_clCompileProgram; typedef struct _cl_params_clCreateBuffer { cl_context *context; cl_mem_flags *flags; size_t *size; void **hostPtr; cl_int **errcodeRet; } cl_params_clCreateBuffer; typedef struct _cl_params_clCreateCommandQueue { cl_context *context; cl_device_id *device; cl_command_queue_properties *properties; cl_int **errcodeRet; } cl_params_clCreateCommandQueue; typedef struct _cl_params_clCreateCommandQueueWithProperties { cl_context *context; cl_device_id *device; const cl_queue_properties **properties; cl_int **errcodeRet; } cl_params_clCreateCommandQueueWithProperties; typedef struct _cl_params_clCreateContext { const cl_context_properties **properties; cl_uint *numDevices; const cl_device_id **devices; void(CL_CALLBACK **funcNotify)(const char *, const void *, size_t, void *); void **userData; cl_int **errcodeRet; } cl_params_clCreateContext; typedef struct _cl_params_clCreateContextFromType { const cl_context_properties **properties; cl_device_type *deviceType; void(CL_CALLBACK **funcNotify)(const char *, const void *, size_t, void *); void **userData; cl_int **errcodeRet; } cl_params_clCreateContextFromType; typedef struct _cl_params_clCreateFromGLBuffer { cl_context *context; cl_mem_flags *flags; cl_GLuint *bufobj; int **errcodeRet; } cl_params_clCreateFromGLBuffer; typedef struct _cl_params_clCreateFromGLRenderbuffer { cl_context *context; cl_mem_flags *flags; cl_GLuint *renderbuffer; cl_int **errcodeRet; } cl_params_clCreateFromGLRenderbuffer; typedef struct _cl_params_clCreateFromGLTexture { cl_context *context; cl_mem_flags *flags; cl_GLenum *target; cl_GLint *miplevel; cl_GLuint *texture; cl_int **errcodeRet; } cl_params_clCreateFromGLTexture; typedef struct _cl_params_clCreateFromGLTexture2D { cl_context *context; cl_mem_flags *flags; cl_GLenum *target; cl_GLint *miplevel; cl_GLuint *texture; cl_int **errcodeRet; } cl_params_clCreateFromGLTexture2D; typedef struct _cl_params_clCreateFromGLTexture3D { cl_context *context; cl_mem_flags *flags; cl_GLenum *target; cl_GLint *miplevel; cl_GLuint *texture; cl_int **errcodeRet; } cl_params_clCreateFromGLTexture3D; typedef struct _cl_params_clCreateImage { cl_context *context; cl_mem_flags *flags; const cl_image_format **imageFormat; const cl_image_desc **imageDesc; void **hostPtr; cl_int **errcodeRet; } cl_params_clCreateImage; typedef struct _cl_params_clCreateImage2D { cl_context *context; cl_mem_flags *flags; const cl_image_format **imageFormat; size_t *imageWidth; size_t *imageHeight; size_t *imageRowPitch; void **hostPtr; cl_int **errcodeRet; } cl_params_clCreateImage2D; typedef struct _cl_params_clCreateImage3D { cl_context *context; cl_mem_flags *flags; const cl_image_format **imageFormat; size_t *imageWidth; size_t *imageHeight; size_t *imageDepth; size_t *imageRowPitch; size_t *imageSlicePitch; void **hostPtr; cl_int **errcodeRet; } cl_params_clCreateImage3D; typedef struct _cl_params_clCreateKernel { cl_program *program; const char **kernelName; cl_int **errcodeRet; } cl_params_clCreateKernel; typedef struct _cl_params_clCreateKernelsInProgram { cl_program *program; cl_uint *numKernels; cl_kernel **kernels; cl_uint **numKernelsRet; } cl_params_clCreateKernelsInProgram; typedef struct _cl_params_clCreatePipe { cl_context *context; cl_mem_flags *flags; cl_uint *pipePacketSize; cl_uint *pipeMaxPackets; const cl_pipe_properties **properties; cl_int **errcodeRet; } cl_params_clCreatePipe; typedef struct _cl_params_clCreateProgramWithBinary { cl_context *context; cl_uint *numDevices; const cl_device_id **deviceList; const size_t **lengths; const unsigned char ***binaries; cl_int **binaryStatus; cl_int **errcodeRet; } cl_params_clCreateProgramWithBinary; typedef struct _cl_params_clCreateProgramWithBuiltInKernels { cl_context *context; cl_uint *numDevices; const cl_device_id **deviceList; const char **kernelNames; cl_int **errcodeRet; } cl_params_clCreateProgramWithBuiltInKernels; typedef struct _cl_params_clCreateProgramWithIL { cl_context *context; const void **il; size_t *length; cl_int **errcodeRet; } cl_params_clCreateProgramWithIL; typedef struct _cl_params_clCreateProgramWithSource { cl_context *context; cl_uint *count; const char ***strings; const size_t **lengths; cl_int **errcodeRet; } cl_params_clCreateProgramWithSource; typedef struct _cl_params_clCreateSampler { cl_context *context; cl_bool *normalizedCoords; cl_addressing_mode *addressingMode; cl_filter_mode *filterMode; cl_int **errcodeRet; } cl_params_clCreateSampler; typedef struct _cl_params_clCreateSamplerWithProperties { cl_context *context; const cl_sampler_properties **samplerProperties; cl_int **errcodeRet; } cl_params_clCreateSamplerWithProperties; typedef struct _cl_params_clCreateSubBuffer { cl_mem *buffer; cl_mem_flags *flags; cl_buffer_create_type *bufferCreateType; const void **bufferCreateInfo; cl_int **errcodeRet; } cl_params_clCreateSubBuffer; typedef struct _cl_params_clCreateSubDevices { cl_device_id *inDevice; const cl_device_partition_property **properties; cl_uint *numDevices; cl_device_id **outDevices; cl_uint **numDevicesRet; } cl_params_clCreateSubDevices; typedef struct _cl_params_clCreateUserEvent { cl_context *context; cl_int **errcodeRet; } cl_params_clCreateUserEvent; typedef struct _cl_params_clEnqueueAcquireGLObjects { cl_command_queue *commandQueue; cl_uint *numObjects; const cl_mem **memObjects; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueAcquireGLObjects; typedef struct _cl_params_clEnqueueBarrier { cl_command_queue *commandQueue; } cl_params_clEnqueueBarrier; typedef struct _cl_params_clEnqueueBarrierWithWaitList { cl_command_queue *commandQueue; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueBarrierWithWaitList; typedef struct _cl_params_clEnqueueCopyBuffer { cl_command_queue *commandQueue; cl_mem *srcBuffer; cl_mem *dstBuffer; size_t *srcOffset; size_t *dstOffset; size_t *cb; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueCopyBuffer; typedef struct _cl_params_clEnqueueCopyBufferRect { cl_command_queue *commandQueue; cl_mem *srcBuffer; cl_mem *dstBuffer; const size_t **srcOrigin; const size_t **dstOrigin; const size_t **region; size_t *srcRowPitch; size_t *srcSlicePitch; size_t *dstRowPitch; size_t *dstSlicePitch; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueCopyBufferRect; typedef struct _cl_params_clEnqueueCopyBufferToImage { cl_command_queue *commandQueue; cl_mem *srcBuffer; cl_mem *dstImage; size_t *srcOffset; const size_t **dstOrigin; const size_t **region; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueCopyBufferToImage; typedef struct _cl_params_clEnqueueCopyImage { cl_command_queue *commandQueue; cl_mem *srcImage; cl_mem *dstImage; const size_t **srcOrigin; const size_t **dstOrigin; const size_t **region; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueCopyImage; typedef struct _cl_params_clEnqueueCopyImageToBuffer { cl_command_queue *commandQueue; cl_mem *srcImage; cl_mem *dstBuffer; const size_t **srcOrigin; const size_t **region; size_t *dstOffset; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueCopyImageToBuffer; typedef struct _cl_params_clEnqueueFillBuffer { cl_command_queue *commandQueue; cl_mem *buffer; const void **pattern; size_t *patternSize; size_t *offset; size_t *size; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueFillBuffer; typedef struct _cl_params_clEnqueueFillImage { cl_command_queue *commandQueue; cl_mem *image; const void **fillColor; const size_t **origin; const size_t **region; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueFillImage; typedef struct _cl_params_clEnqueueMapBuffer { cl_command_queue *commandQueue; cl_mem *buffer; cl_bool *blockingMap; cl_map_flags *mapFlags; size_t *offset; size_t *cb; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; cl_int **errcodeRet; } cl_params_clEnqueueMapBuffer; typedef struct _cl_params_clEnqueueMapImage { cl_command_queue *commandQueue; cl_mem *image; cl_bool *blockingMap; cl_map_flags *mapFlags; const size_t **origin; const size_t **region; size_t **imageRowPitch; size_t **imageSlicePitch; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; cl_int **errcodeRet; } cl_params_clEnqueueMapImage; typedef struct _cl_params_clEnqueueMarker { cl_command_queue *commandQueue; cl_event **event; } cl_params_clEnqueueMarker; typedef struct _cl_params_clEnqueueMarkerWithWaitList { cl_command_queue *commandQueue; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueMarkerWithWaitList; typedef struct _cl_params_clEnqueueMigrateMemObjects { cl_command_queue *commandQueue; cl_uint *numMemObjects; const cl_mem **memObjects; cl_mem_migration_flags *flags; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueMigrateMemObjects; typedef struct _cl_params_clEnqueueNDRangeKernel { cl_command_queue *commandQueue; cl_kernel *kernel; cl_uint *workDim; const size_t **globalWorkOffset; const size_t **globalWorkSize; const size_t **localWorkSize; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueNDRangeKernel; typedef struct _cl_params_clEnqueueNativeKernel { cl_command_queue *commandQueue; void(CL_CALLBACK **userFunc)(void *); void **args; size_t *cbArgs; cl_uint *numMemObjects; const cl_mem **memList; const void ***argsMemLoc; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueNativeKernel; typedef struct _cl_params_clEnqueueReadBuffer { cl_command_queue *commandQueue; cl_mem *buffer; cl_bool *blockingRead; size_t *offset; size_t *cb; void **ptr; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueReadBuffer; typedef struct _cl_params_clEnqueueReadBufferRect { cl_command_queue *commandQueue; cl_mem *buffer; cl_bool *blockingRead; const size_t **bufferOrigin; const size_t **hostOrigin; const size_t **region; size_t *bufferRowPitch; size_t *bufferSlicePitch; size_t *hostRowPitch; size_t *hostSlicePitch; void **ptr; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueReadBufferRect; typedef struct _cl_params_clEnqueueReadImage { cl_command_queue *commandQueue; cl_mem *image; cl_bool *blockingRead; const size_t **origin; const size_t **region; size_t *rowPitch; size_t *slicePitch; void **ptr; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueReadImage; typedef struct _cl_params_clEnqueueReleaseGLObjects { cl_command_queue *commandQueue; cl_uint *numObjects; const cl_mem **memObjects; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueReleaseGLObjects; typedef struct _cl_params_clEnqueueSVMFree { cl_command_queue *commandQueue; cl_uint *numSvmPointers; void ***svmPointers; void(CL_CALLBACK **pfnFreeFunc)(cl_command_queue queue, cl_uint numSvmPointers, void **svmPointers, void *userData); void **userData; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueSVMFree; typedef struct _cl_params_clEnqueueSVMMap { cl_command_queue *commandQueue; cl_bool *blockingMap; cl_map_flags *mapFlags; void **svmPtr; size_t *size; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueSVMMap; typedef struct _cl_params_clEnqueueSVMMemFill { cl_command_queue *commandQueue; void **svmPtr; const void **pattern; size_t *patternSize; size_t *size; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueSVMMemFill; typedef struct _cl_params_clEnqueueSVMMemcpy { cl_command_queue *commandQueue; cl_bool *blockingCopy; void **dstPtr; const void **srcPtr; size_t *size; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueSVMMemcpy; typedef struct _cl_params_clEnqueueSVMMigrateMem { cl_command_queue *commandQueue; cl_uint *numSvmPointers; const void ***svmPointers; const size_t **sizes; const cl_mem_migration_flags *flags; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueSVMMigrateMem; typedef struct _cl_params_clEnqueueSVMUnmap { cl_command_queue *commandQueue; void **svmPtr; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueSVMUnmap; typedef struct _cl_params_clEnqueueTask { cl_command_queue *commandQueue; cl_kernel *kernel; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueTask; typedef struct _cl_params_clEnqueueUnmapMemObject { cl_command_queue *commandQueue; cl_mem *memobj; void **mappedPtr; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueUnmapMemObject; typedef struct _cl_params_clEnqueueWaitForEvents { cl_command_queue *commandQueue; cl_uint *numEvents; const cl_event **eventList; } cl_params_clEnqueueWaitForEvents; typedef struct _cl_params_clEnqueueWriteBuffer { cl_command_queue *commandQueue; cl_mem *buffer; cl_bool *blockingWrite; size_t *offset; size_t *cb; const void **ptr; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueWriteBuffer; typedef struct _cl_params_clEnqueueWriteBufferRect { cl_command_queue *commandQueue; cl_mem *buffer; cl_bool *blockingWrite; const size_t **bufferOrigin; const size_t **hostOrigin; const size_t **region; size_t *bufferRowPitch; size_t *bufferSlicePitch; size_t *hostRowPitch; size_t *hostSlicePitch; const void **ptr; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueWriteBufferRect; typedef struct _cl_params_clEnqueueWriteImage { cl_command_queue *commandQueue; cl_mem *image; cl_bool *blockingWrite; const size_t **origin; const size_t **region; size_t *inputRowPitch; size_t *inputSlicePitch; const void **ptr; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueWriteImage; typedef struct _cl_params_clFinish { cl_command_queue *commandQueue; } cl_params_clFinish; typedef struct _cl_params_clFlush { cl_command_queue *commandQueue; } cl_params_clFlush; typedef struct _cl_params_clGetCommandQueueInfo { cl_command_queue *commandQueue; cl_command_queue_info *paramName; size_t *paramValueSize; void **paramValue; size_t **paramValueSizeRet; } cl_params_clGetCommandQueueInfo; typedef struct _cl_params_clGetContextInfo { cl_context *context; cl_context_info *paramName; size_t *paramValueSize; void **paramValue; size_t **paramValueSizeRet; } cl_params_clGetContextInfo; typedef struct _cl_params_clGetDeviceAndHostTimer { cl_device_id *device; cl_ulong **deviceTimestamp; cl_ulong **hostTimestamp; } cl_params_clGetDeviceAndHostTimer; typedef struct _cl_params_clGetDeviceIDs { cl_platform_id *platform; cl_device_type *deviceType; cl_uint *numEntries; cl_device_id **devices; cl_uint **numDevices; } cl_params_clGetDeviceIDs; typedef struct _cl_params_clGetDeviceInfo { cl_device_id *device; cl_device_info *paramName; size_t *paramValueSize; void **paramValue; size_t **paramValueSizeRet; } cl_params_clGetDeviceInfo; typedef struct _cl_params_clGetEventInfo { cl_event *event; cl_event_info *paramName; size_t *paramValueSize; void **paramValue; size_t **paramValueSizeRet; } cl_params_clGetEventInfo; typedef struct _cl_params_clGetEventProfilingInfo { cl_event *event; cl_profiling_info *paramName; size_t *paramValueSize; void **paramValue; size_t **paramValueSizeRet; } cl_params_clGetEventProfilingInfo; typedef struct _cl_params_clGetExtensionFunctionAddress { const char **funcName; } cl_params_clGetExtensionFunctionAddress; typedef struct _cl_params_clGetExtensionFunctionAddressForPlatform { cl_platform_id *platform; const char **funcName; } cl_params_clGetExtensionFunctionAddressForPlatform; typedef struct _cl_params_clGetGLObjectInfo { cl_mem *memobj; cl_gl_object_type **glObjectType; cl_GLuint **glObjectName; } cl_params_clGetGLObjectInfo; typedef struct _cl_params_clGetGLTextureInfo { cl_mem *memobj; cl_gl_texture_info *paramName; size_t *paramValueSize; void **paramValue; size_t **paramValueSizeRet; } cl_params_clGetGLTextureInfo; typedef struct _cl_params_clGetHostTimer { cl_device_id *device; cl_ulong **hostTimestamp; } cl_params_clGetHostTimer; typedef struct _cl_params_clGetImageInfo { cl_mem *image; cl_image_info *paramName; size_t *paramValueSize; void **paramValue; size_t **paramValueSizeRet; } cl_params_clGetImageInfo; typedef struct _cl_params_clGetKernelArgInfo { cl_kernel *kernel; cl_uint *argIndx; cl_kernel_arg_info *paramName; size_t *paramValueSize; void **paramValue; size_t **paramValueSizeRet; } cl_params_clGetKernelArgInfo; typedef struct _cl_params_clGetKernelInfo { cl_kernel *kernel; cl_kernel_info *paramName; size_t *paramValueSize; void **paramValue; size_t **paramValueSizeRet; } cl_params_clGetKernelInfo; typedef struct _cl_params_clGetKernelSubGroupInfo { cl_kernel *kernel; cl_device_id *device; cl_kernel_sub_group_info *paramName; size_t *inputValueSize; const void **inputValue; size_t *paramValueSize; void **paramValue; size_t **paramValueSizeRet; } cl_params_clGetKernelSubGroupInfo; typedef struct _cl_params_clGetKernelWorkGroupInfo { cl_kernel *kernel; cl_device_id *device; cl_kernel_work_group_info *paramName; size_t *paramValueSize; void **paramValue; size_t **paramValueSizeRet; } cl_params_clGetKernelWorkGroupInfo; typedef struct _cl_params_clGetMemObjectInfo { cl_mem *memobj; cl_mem_info *paramName; size_t *paramValueSize; void **paramValue; size_t **paramValueSizeRet; } cl_params_clGetMemObjectInfo; typedef struct _cl_params_clGetPipeInfo { cl_mem *pipe; cl_pipe_info *paramName; size_t *paramValueSize; void **paramValue; size_t **paramValueSizeRet; } cl_params_clGetPipeInfo; typedef struct _cl_params_clGetPlatformIDs { cl_uint *numEntries; cl_platform_id **platforms; cl_uint **numPlatforms; } cl_params_clGetPlatformIDs; typedef struct _cl_params_clGetPlatformInfo { cl_platform_id *platform; cl_platform_info *paramName; size_t *paramValueSize; void **paramValue; size_t **paramValueSizeRet; } cl_params_clGetPlatformInfo; typedef struct _cl_params_clGetProgramBuildInfo { cl_program *program; cl_device_id *device; cl_program_build_info *paramName; size_t *paramValueSize; void **paramValue; size_t **paramValueSizeRet; } cl_params_clGetProgramBuildInfo; typedef struct _cl_params_clGetProgramInfo { cl_program *program; cl_program_info *paramName; size_t *paramValueSize; void **paramValue; size_t **paramValueSizeRet; } cl_params_clGetProgramInfo; typedef struct _cl_params_clGetSamplerInfo { cl_sampler *sampler; cl_sampler_info *paramName; size_t *paramValueSize; void **paramValue; size_t **paramValueSizeRet; } cl_params_clGetSamplerInfo; typedef struct _cl_params_clGetSupportedImageFormats { cl_context *context; cl_mem_flags *flags; cl_mem_object_type *imageType; cl_uint *numEntries; cl_image_format **imageFormats; cl_uint **numImageFormats; } cl_params_clGetSupportedImageFormats; typedef struct _cl_params_clLinkProgram { cl_context *context; cl_uint *numDevices; const cl_device_id **deviceList; const char **options; cl_uint *numInputPrograms; const cl_program **inputPrograms; void(CL_CALLBACK **funcNotify)(cl_program program, void *userData); void **userData; cl_int **errcodeRet; } cl_params_clLinkProgram; typedef struct _cl_params_clReleaseCommandQueue { cl_command_queue *commandQueue; } cl_params_clReleaseCommandQueue; typedef struct _cl_params_clReleaseContext { cl_context *context; } cl_params_clReleaseContext; typedef struct _cl_params_clReleaseDevice { cl_device_id *device; } cl_params_clReleaseDevice; typedef struct _cl_params_clReleaseEvent { cl_event *event; } cl_params_clReleaseEvent; typedef struct _cl_params_clReleaseKernel { cl_kernel *kernel; } cl_params_clReleaseKernel; typedef struct _cl_params_clReleaseMemObject { cl_mem *memobj; } cl_params_clReleaseMemObject; typedef struct _cl_params_clReleaseProgram { cl_program *program; } cl_params_clReleaseProgram; typedef struct _cl_params_clReleaseSampler { cl_sampler *sampler; } cl_params_clReleaseSampler; typedef struct _cl_params_clRetainCommandQueue { cl_command_queue *commandQueue; } cl_params_clRetainCommandQueue; typedef struct _cl_params_clRetainContext { cl_context *context; } cl_params_clRetainContext; typedef struct _cl_params_clRetainDevice { cl_device_id *device; } cl_params_clRetainDevice; typedef struct _cl_params_clRetainEvent { cl_event *event; } cl_params_clRetainEvent; typedef struct _cl_params_clRetainKernel { cl_kernel *kernel; } cl_params_clRetainKernel; typedef struct _cl_params_clRetainMemObject { cl_mem *memobj; } cl_params_clRetainMemObject; typedef struct _cl_params_clRetainProgram { cl_program *program; } cl_params_clRetainProgram; typedef struct _cl_params_clRetainSampler { cl_sampler *sampler; } cl_params_clRetainSampler; typedef struct _cl_params_clSVMAlloc { cl_context *context; cl_svm_mem_flags *flags; size_t *size; cl_uint *alignment; } cl_params_clSVMAlloc; typedef struct _cl_params_clSVMFree { cl_context *context; void **svmPointer; } cl_params_clSVMFree; typedef struct _cl_params_clSetCommandQueueProperty { cl_command_queue *commandQueue; cl_command_queue_properties *properties; cl_bool *enable; cl_command_queue_properties **oldProperties; } cl_params_clSetCommandQueueProperty; typedef struct _cl_params_clSetDefaultDeviceCommandQueue { cl_context *context; cl_device_id *device; cl_command_queue *commandQueue; } cl_params_clSetDefaultDeviceCommandQueue; typedef struct _cl_params_clSetEventCallback { cl_event *event; cl_int *commandExecCallbackType; void(CL_CALLBACK **funcNotify)(cl_event, cl_int, void *); void **userData; } cl_params_clSetEventCallback; typedef struct _cl_params_clSetKernelArg { cl_kernel *kernel; cl_uint *argIndex; size_t *argSize; const void **argValue; } cl_params_clSetKernelArg; typedef struct _cl_params_clSetKernelArgSVMPointer { cl_kernel *kernel; cl_uint *argIndex; const void **argValue; } cl_params_clSetKernelArgSVMPointer; typedef struct _cl_params_clSetKernelExecInfo { cl_kernel *kernel; cl_kernel_exec_info *paramName; size_t *paramValueSize; const void **paramValue; } cl_params_clSetKernelExecInfo; typedef struct _cl_params_clSetMemObjectDestructorCallback { cl_mem *memobj; void(CL_CALLBACK **funcNotify)(cl_mem, void *); void **userData; } cl_params_clSetMemObjectDestructorCallback; typedef struct _cl_params_clSetUserEventStatus { cl_event *event; cl_int *executionStatus; } cl_params_clSetUserEventStatus; typedef struct _cl_params_clUnloadCompiler { } cl_params_clUnloadCompiler; typedef struct _cl_params_clUnloadPlatformCompiler { cl_platform_id *platform; } cl_params_clUnloadPlatformCompiler; typedef struct _cl_params_clWaitForEvents { cl_uint *numEvents; const cl_event **eventList; } cl_params_clWaitForEvents; compute-runtime-22.14.22890/opencl/source/utilities/000077500000000000000000000000001422164147700221455ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/utilities/CMakeLists.txt000066400000000000000000000011331422164147700247030ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_UTILITIES_BASE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cl_logger.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_logger.h ) set_property(GLOBAL PROPERTY RUNTIME_SRCS_UTILITIES_BASE ${RUNTIME_SRCS_UTILITIES_BASE}) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_UTILITIES_BASE}) if(WIN32) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${NEO_CORE_UTILITIES_WINDOWS}) else() target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${NEO_CORE_UTILITIES_LINUX}) endif() compute-runtime-22.14.22890/opencl/source/utilities/cl_logger.cpp000066400000000000000000000124141422164147700246100ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/utilities/cl_logger.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "opencl/source/event/event.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/mem_obj.h" namespace NEO { template ClFileLogger::ClFileLogger(FileLogger &baseLoggerIn, const DebugVariables &flags) : baseLogger(baseLoggerIn) { dumpKernelArgsEnabled = flags.DumpKernelArgs.get(); } ClFileLogger &getClFileLogger() { static ClFileLogger clFileLoggerInstance(FileLoggerInstance(), DebugManager.flags); return clFileLoggerInstance; } template void ClFileLogger::dumpKernelArgs(const MultiDispatchInfo *multiDispatchInfo) { if (false == baseLogger.enabled()) { return; } if (dumpKernelArgsEnabled == false || multiDispatchInfo == nullptr) { return; } for (auto &dispatchInfo : *multiDispatchInfo) { auto kernel = dispatchInfo.getKernel(); if (kernel == nullptr) { continue; } const auto &kernelDescriptor = kernel->getKernelInfo().kernelDescriptor; const auto &explicitArgs = kernelDescriptor.payloadMappings.explicitArgs; for (unsigned int i = 0; i < explicitArgs.size(); i++) { std::string type; std::string fileName; const char *ptr = nullptr; size_t size = 0; uint64_t flags = 0; std::unique_ptr argVal = nullptr; const auto &arg = explicitArgs[i]; if (arg.getTraits().getAddressQualifier() == KernelArgMetadata::AddrLocal) { type = "local"; } else if (arg.is()) { type = "image"; auto clMem = reinterpret_cast(kernel->getKernelArg(i)); auto memObj = castToObject(clMem); if (memObj != nullptr) { ptr = static_cast(memObj->getCpuAddress()); size = memObj->getSize(); flags = memObj->getFlags(); } } else if (arg.is()) { type = "sampler"; } else if (arg.is()) { type = "buffer"; auto clMem = reinterpret_cast(kernel->getKernelArg(i)); auto memObj = castToObject(clMem); if (memObj != nullptr) { ptr = static_cast(memObj->getCpuAddress()); size = memObj->getSize(); flags = memObj->getFlags(); } } else { type = "immediate"; auto crossThreadData = kernel->getCrossThreadData(); auto crossThreadDataSize = kernel->getCrossThreadDataSize(); argVal = std::unique_ptr(new char[crossThreadDataSize]); size_t totalArgSize = 0; for (const auto &element : arg.as().elements) { auto pSource = ptrOffset(crossThreadData, element.offset); auto pDestination = ptrOffset(argVal.get(), element.sourceOffset); memcpy_s(pDestination, element.size, pSource, element.size); totalArgSize += element.size; } size = totalArgSize; ptr = argVal.get(); } if (ptr && size) { fileName = kernelDescriptor.kernelMetadata.kernelName + "_arg_" + std::to_string(i) + "_" + type + "_size_" + std::to_string(size) + "_flags_" + std::to_string(flags) + ".bin"; baseLogger.writeToFile(fileName, ptr, size, std::ios::trunc | std::ios::binary); } } } } template const std::string ClFileLogger::getEvents(const uintptr_t *input, uint32_t numOfEvents) { if (false == baseLogger.enabled()) { return ""; } std::stringstream os; for (uint32_t i = 0; i < numOfEvents; i++) { if (input != nullptr) { cl_event event = (reinterpret_cast(input))[i]; os << "cl_event " << event << ", Event " << (Event *)event << ", "; } } return os.str(); } template const std::string ClFileLogger::getMemObjects(const uintptr_t *input, uint32_t numOfObjects) { if (false == baseLogger.enabled()) { return ""; } std::stringstream os; for (uint32_t i = 0; i < numOfObjects; i++) { if (input != nullptr) { cl_mem mem = const_cast(reinterpret_cast(input)[i]); os << "cl_mem " << mem << ", MemObj " << static_cast(mem) << ", "; } } return os.str(); } template class ClFileLogger; template class ClFileLogger; template class ClFileLogger; } // namespace NEO compute-runtime-22.14.22890/opencl/source/utilities/cl_logger.h000066400000000000000000000016271422164147700242610ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/non_copyable_or_moveable.h" #include "shared/source/utilities/logger.h" #include #include #include namespace NEO { struct MultiDispatchInfo; template class ClFileLogger : public NonCopyableOrMovableClass { public: ClFileLogger(FileLogger &baseLoggerInm, const DebugVariables &flags); void dumpKernelArgs(const MultiDispatchInfo *multiDispatchInfo); const std::string getEvents(const uintptr_t *input, uint32_t numOfEvents); const std::string getMemObjects(const uintptr_t *input, uint32_t numOfObjects); protected: bool dumpKernelArgsEnabled = false; FileLogger &baseLogger; }; ClFileLogger &getClFileLogger(); }; // namespace NEO compute-runtime-22.14.22890/opencl/source/xe_hp_core/000077500000000000000000000000001422164147700222455ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/xe_hp_core/CMakeLists.txt000066400000000000000000000002031422164147700250000ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(SUPPORT_XE_HP_CORE) add_subdirectories() endif() compute-runtime-22.14.22890/opencl/source/xe_hp_core/buffer_xe_hp_core.cpp000066400000000000000000000005711422164147700264200ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/xe_hp_core/hw_cmds.h" #include "opencl/source/mem_obj/buffer_base.inl" namespace NEO { typedef XeHpFamily Family; static auto gfxCore = IGFX_XE_HP_CORE; template class BufferHw; #include "opencl/source/mem_obj/buffer_factory_init.inl" } // namespace NEO compute-runtime-22.14.22890/opencl/source/xe_hp_core/cl_hw_helper_xe_hp_core.cpp000066400000000000000000000042751422164147700276070ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/populate_factory.h" #include "shared/source/os_interface/hw_info_config.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/cl_hw_helper_base.inl" #include "opencl/source/helpers/cl_hw_helper_xehp_and_later.inl" #include "hw_cmds.h" namespace NEO { using Family = XeHpFamily; static auto gfxCore = IGFX_XE_HP_CORE; template <> void populateFactoryTable>() { extern ClHwHelper *clHwHelperFactory[IGFX_MAX_CORE]; clHwHelperFactory[gfxCore] = &ClHwHelperHw::get(); } template <> bool ClHwHelperHw::requiresNonAuxMode(const ArgDescPointer &argAsPtr, const HardwareInfo &hwInfo) const { if (HwInfoConfig::get(hwInfo.platform.eProductFamily)->allowStatelessCompression(hwInfo)) { return false; } else { return !argAsPtr.isPureStateful(); } } template <> bool ClHwHelperHw::requiresAuxResolves(const KernelInfo &kernelInfo, const HardwareInfo &hwInfo) const { if (HwInfoConfig::get(hwInfo.platform.eProductFamily)->allowStatelessCompression(hwInfo)) { return false; } else { return hasStatelessAccessToBuffer(kernelInfo); } } template <> inline bool ClHwHelperHw::allowCompressionForContext(const ClDevice &clDevice, const Context &context) const { auto rootDeviceIndex = clDevice.getRootDeviceIndex(); auto &hwInfo = clDevice.getHardwareInfo(); if (context.containsMultipleSubDevices(rootDeviceIndex) && HwHelperHw::get().isWorkaroundRequired(REVISION_A0, REVISION_A1, hwInfo)) { return false; } return true; } template <> bool ClHwHelperHw::isSupportedKernelThreadArbitrationPolicy() const { return false; } template <> std::vector ClHwHelperHw::getSupportedThreadArbitrationPolicies() const { return {}; } template <> cl_version ClHwHelperHw::getDeviceIpVersion(const HardwareInfo &hwInfo) const { return makeDeviceIpVersion(12, 5, makeDeviceRevision(hwInfo)); } template class ClHwHelperHw; } // namespace NEO compute-runtime-22.14.22890/opencl/source/xe_hp_core/command_queue_xe_hp_core.cpp000066400000000000000000000014371422164147700277730ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/populate_factory.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/enqueue_resource_barrier.h" namespace NEO { using Family = XeHpFamily; static auto gfxCore = IGFX_XE_HP_CORE; } // namespace NEO #include "opencl/source/command_queue/command_queue_hw_xehp_and_later.inl" namespace NEO { template <> void populateFactoryTable>() { extern CommandQueueCreateFunc commandQueueFactory[IGFX_MAX_CORE]; commandQueueFactory[gfxCore] = CommandQueueHw::create; } } // namespace NEO template class NEO::CommandQueueHw; compute-runtime-22.14.22890/opencl/source/xe_hp_core/enable_family_full_ocl_xe_hp_core.cpp000066400000000000000000000014351422164147700316150ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/populate_factory.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/sampler/sampler.h" namespace NEO { using Family = XeHpFamily; struct EnableOCLXeHpCore { EnableOCLXeHpCore() { populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); } }; static EnableOCLXeHpCore enable; } // namespace NEO compute-runtime-22.14.22890/opencl/source/xe_hp_core/gpgpu_walker_xe_hp_core.cpp000066400000000000000000000006341422164147700276360ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/gpgpu_walker_xehp_and_later.inl" #include "opencl/source/command_queue/hardware_interface_xehp_and_later.inl" namespace NEO { template class GpgpuWalkerHelper; template class HardwareInterface; template struct EnqueueOperation; } // namespace NEO compute-runtime-22.14.22890/opencl/source/xe_hp_core/gtpin_setup_xe_hp_core.cpp000066400000000000000000000014451422164147700275110ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/gtpin/gtpin_hw_helper.h" #include "opencl/source/gtpin/gtpin_hw_helper.inl" #include "opencl/source/gtpin/gtpin_hw_helper_xehp_and_later.inl" #include "ocl_igc_shared/gtpin/gtpin_ocl_interface.h" namespace NEO { extern GTPinHwHelper *gtpinHwHelperFactory[IGFX_MAX_CORE]; typedef XeHpFamily Family; static const auto gfxFamily = IGFX_XE_HP_CORE; template <> uint32_t GTPinHwHelperHw::getGenVersion() { return gtpin::GTPIN_XEHP_CORE; } template class GTPinHwHelperHw; struct GTPinEnableXeHpCore { GTPinEnableXeHpCore() { gtpinHwHelperFactory[gfxFamily] = >PinHwHelperHw::get(); } }; static GTPinEnableXeHpCore gtpinEnable; } // namespace NEO compute-runtime-22.14.22890/opencl/source/xe_hp_core/hardware_commands_helper_xe_hp_core.cpp000066400000000000000000000010111422164147700321520ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/cache_flush_xehp_and_later.inl" #include "shared/source/xe_hp_core/hw_cmds.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/helpers/hardware_commands_helper_base.inl" #include "opencl/source/helpers/hardware_commands_helper_xehp_and_later.inl" namespace NEO { using FamilyType = XeHpFamily; template struct HardwareCommandsHelper; } // namespace NEO compute-runtime-22.14.22890/opencl/source/xe_hp_core/image_xe_hp_core.cpp000066400000000000000000000017131422164147700262300ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/xe_hp_core/hw_cmds_base.h" #include "opencl/source/context/context.h" #include "opencl/source/mem_obj/image.inl" namespace NEO { using Family = XeHpFamily; static auto gfxCore = IGFX_XE_HP_CORE; template <> void ImageHw::appendSurfaceStateParams(Family::RENDER_SURFACE_STATE *surfaceState, uint32_t rootDeviceIndex, bool useGlobalAtomics) { EncodeSurfaceStateArgs args{}; args.outMemory = surfaceState; args.useGlobalAtomics = useGlobalAtomics; args.areMultipleSubDevicesInContext = context->containsMultipleSubDevices(rootDeviceIndex); args.implicitScaling = args.areMultipleSubDevicesInContext; EncodeSurfaceState::encodeImplicitScalingParams(args); } } // namespace NEO #include "opencl/source/mem_obj/image_tgllp_and_later.inl" // factory initializer #include "opencl/source/mem_obj/image_factory_init.inl" compute-runtime-22.14.22890/opencl/source/xe_hp_core/sampler_xe_hp_core.cpp000066400000000000000000000006211422164147700266060ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/xe_hp_core/hw_cmds_base.h" using Family = NEO::XeHpFamily; constexpr static auto gfxCore = IGFX_XE_HP_CORE; #include "opencl/source/sampler/sampler.h" #include "opencl/source/sampler/sampler.inl" namespace NEO { #include "opencl/source/sampler/sampler_factory_init.inl" } // namespace NEO compute-runtime-22.14.22890/opencl/source/xe_hpc_core/000077500000000000000000000000001422164147700224105ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/xe_hpc_core/CMakeLists.txt000066400000000000000000000005441422164147700251530ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(SUPPORT_XE_HPC_CORE) set(RUNTIME_SRCS_XE_HPC_CORE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_XE_HPC_CORE}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_XE_HPC_CORE ${RUNTIME_SRCS_XE_HPC_CORE}) endif() compute-runtime-22.14.22890/opencl/source/xe_hpc_core/buffer_xe_hpc_core.cpp000066400000000000000000000006021422164147700267210ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/xe_hpc_core/hw_cmds.h" #include "opencl/source/mem_obj/buffer_base.inl" namespace NEO { using Family = XE_HPC_COREFamily; static auto gfxCore = IGFX_XE_HPC_CORE; template class BufferHw; #include "opencl/source/mem_obj/buffer_factory_init.inl" } // namespace NEO compute-runtime-22.14.22890/opencl/source/xe_hpc_core/cl_hw_helper_xe_hpc_core.cpp000066400000000000000000000025061422164147700301100ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/populate_factory.h" #include "opencl/source/helpers/cl_hw_helper_base.inl" #include "opencl/source/helpers/cl_hw_helper_xehp_and_later.inl" #include "hw_cmds.h" namespace NEO { using Family = XE_HPC_COREFamily; static auto gfxCore = IGFX_XE_HPC_CORE; #include "opencl/source/helpers/cl_hw_helper_pvc_and_later.inl" template <> void populateFactoryTable>() { extern ClHwHelper *clHwHelperFactory[IGFX_MAX_CORE]; clHwHelperFactory[gfxCore] = &ClHwHelperHw::get(); } template <> bool ClHwHelperHw::requiresAuxResolves(const KernelInfo &kernelInfo, const HardwareInfo &hwInfo) const { return false; } template <> inline bool ClHwHelperHw::getQueueFamilyName(std::string &name, EngineGroupType type) const { switch (type) { case EngineGroupType::RenderCompute: name = "cccs"; return true; case EngineGroupType::LinkedCopy: name = "linked bcs"; return true; default: return false; } } template <> cl_version ClHwHelperHw::getDeviceIpVersion(const HardwareInfo &hwInfo) const { return makeDeviceIpVersion(12, 8, makeDeviceRevision(hwInfo)); } template class ClHwHelperHw; } // namespace NEO compute-runtime-22.14.22890/opencl/source/xe_hpc_core/command_queue_xe_hpc_core.cpp000066400000000000000000000020201422164147700302660ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/populate_factory.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/enqueue_resource_barrier.h" namespace NEO { using Family = XE_HPC_COREFamily; static auto gfxCore = IGFX_XE_HPC_CORE; } // namespace NEO #include "opencl/source/command_queue/command_queue_hw_xehp_and_later.inl" namespace NEO { template <> void populateFactoryTable>() { extern CommandQueueCreateFunc commandQueueFactory[IGFX_MAX_CORE]; commandQueueFactory[gfxCore] = CommandQueueHw::create; } template <> bool CommandQueueHw::isCacheFlushForBcsRequired() const { if (DebugManager.flags.ForceCacheFlushForBcs.get() != -1) { return !!DebugManager.flags.ForceCacheFlushForBcs.get(); } return false; } } // namespace NEO template class NEO::CommandQueueHw; compute-runtime-22.14.22890/opencl/source/xe_hpc_core/enable_family_full_ocl_xe_hpc_core.cpp000066400000000000000000000014471422164147700321260ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/populate_factory.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/sampler/sampler.h" namespace NEO { using Family = XE_HPC_COREFamily; struct EnableOCLXeHpcCore { EnableOCLXeHpcCore() { populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); } }; static EnableOCLXeHpcCore enable; } // namespace NEO compute-runtime-22.14.22890/opencl/source/xe_hpc_core/gpgpu_walker_xe_hpc_core.cpp000066400000000000000000000006611422164147700301440ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/gpgpu_walker_xehp_and_later.inl" #include "opencl/source/command_queue/hardware_interface_xehp_and_later.inl" namespace NEO { template class GpgpuWalkerHelper; template class HardwareInterface; template struct EnqueueOperation; } // namespace NEO compute-runtime-22.14.22890/opencl/source/xe_hpc_core/gtpin_setup_xe_hpc_core.cpp000066400000000000000000000014671422164147700300230ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/gtpin/gtpin_hw_helper.h" #include "opencl/source/gtpin/gtpin_hw_helper.inl" #include "opencl/source/gtpin/gtpin_hw_helper_xehp_and_later.inl" #include "ocl_igc_shared/gtpin/gtpin_ocl_interface.h" namespace NEO { extern GTPinHwHelper *gtpinHwHelperFactory[IGFX_MAX_CORE]; using Family = XE_HPC_COREFamily; static const auto gfxFamily = IGFX_XE_HPC_CORE; template class GTPinHwHelperHw; struct GTPinEnableXeHpcCore { GTPinEnableXeHpcCore() { gtpinHwHelperFactory[gfxFamily] = >PinHwHelperHw::get(); } }; template <> uint32_t GTPinHwHelperHw::getGenVersion() { return gtpin::GTPIN_XE_HPC_CORE; } static GTPinEnableXeHpcCore gtpinEnable; } // namespace NEO compute-runtime-22.14.22890/opencl/source/xe_hpc_core/hardware_commands_helper_xe_hpc_core.cpp000066400000000000000000000010211422164147700324610ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/cache_flush_xehp_and_later.inl" #include "shared/source/xe_hpc_core/hw_cmds.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/helpers/hardware_commands_helper_base.inl" #include "opencl/source/helpers/hardware_commands_helper_xehp_and_later.inl" namespace NEO { using FamilyType = XE_HPC_COREFamily; template struct HardwareCommandsHelper; } // namespace NEO compute-runtime-22.14.22890/opencl/source/xe_hpc_core/image_xe_hpc_core.cpp000066400000000000000000000006651422164147700265430ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/xe_hpc_core/hw_cmds_base.h" #include "opencl/source/mem_obj/image.inl" namespace NEO { using Family = XE_HPC_COREFamily; static auto gfxCore = IGFX_XE_HPC_CORE; } // namespace NEO #include "opencl/source/mem_obj/image_tgllp_and_later.inl" // factory initializer #include "opencl/source/mem_obj/image_factory_init.inl" compute-runtime-22.14.22890/opencl/source/xe_hpc_core/sampler_xe_hpc_core.cpp000066400000000000000000000004401422164147700271130ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/xe_hpc_core/hw_cmds_base.h" using Family = NEO::XE_HPC_COREFamily; constexpr static auto gfxCore = IGFX_XE_HPC_CORE; #include "opencl/source/sampler/sampler_tgllp_and_later.inl" compute-runtime-22.14.22890/opencl/source/xe_hpg_core/000077500000000000000000000000001422164147700224145ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/source/xe_hpg_core/CMakeLists.txt000066400000000000000000000005441422164147700251570ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(SUPPORT_XE_HPG_CORE) set(RUNTIME_SRCS_XE_HPG_CORE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_XE_HPG_CORE}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_XE_HPG_CORE ${RUNTIME_SRCS_XE_HPG_CORE}) endif() compute-runtime-22.14.22890/opencl/source/xe_hpg_core/buffer_xe_hpg_core.cpp000066400000000000000000000006021422164147700267310ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/xe_hpg_core/hw_cmds.h" #include "opencl/source/mem_obj/buffer_base.inl" namespace NEO { typedef XE_HPG_COREFamily Family; static auto gfxCore = IGFX_XE_HPG_CORE; template class BufferHw; #include "opencl/source/mem_obj/buffer_factory_init.inl" } // namespace NEO compute-runtime-22.14.22890/opencl/source/xe_hpg_core/cl_hw_helper_xe_hpg_core.cpp000066400000000000000000000051011422164147700301120ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/populate_factory.h" #include "shared/source/os_interface/hw_info_config.h" #include "opencl/source/helpers/cl_hw_helper_base.inl" #include "opencl/source/helpers/cl_hw_helper_xehp_and_later.inl" #include "opencl/source/helpers/surface_formats.h" #include "hw_cmds.h" namespace NEO { using Family = XE_HPG_COREFamily; static auto gfxCore = IGFX_XE_HPG_CORE; template <> void populateFactoryTable>() { extern ClHwHelper *clHwHelperFactory[IGFX_MAX_CORE]; clHwHelperFactory[gfxCore] = &ClHwHelperHw::get(); } template <> bool ClHwHelperHw::requiresNonAuxMode(const ArgDescPointer &argAsPtr, const HardwareInfo &hwInfo) const { if (HwInfoConfig::get(hwInfo.platform.eProductFamily)->allowStatelessCompression(hwInfo)) { return false; } else { return !argAsPtr.isPureStateful(); } } template <> bool ClHwHelperHw::requiresAuxResolves(const KernelInfo &kernelInfo, const HardwareInfo &hwInfo) const { if (HwInfoConfig::get(hwInfo.platform.eProductFamily)->allowStatelessCompression(hwInfo)) { return false; } else { return hasStatelessAccessToBuffer(kernelInfo); } } template <> std::vector ClHwHelperHw::getSupportedThreadArbitrationPolicies() const { return {}; } template <> bool ClHwHelperHw::isSupportedKernelThreadArbitrationPolicy() const { return false; } template <> cl_version ClHwHelperHw::getDeviceIpVersion(const HardwareInfo &hwInfo) const { return makeDeviceIpVersion(12, 7, makeDeviceRevision(hwInfo)); } static const std::vector incompressibleFormats = { {CL_LUMINANCE, CL_UNORM_INT8}, {CL_LUMINANCE, CL_UNORM_INT16}, {CL_LUMINANCE, CL_HALF_FLOAT}, {CL_LUMINANCE, CL_FLOAT}, {CL_INTENSITY, CL_UNORM_INT8}, {CL_INTENSITY, CL_UNORM_INT16}, {CL_INTENSITY, CL_HALF_FLOAT}, {CL_INTENSITY, CL_FLOAT}, {CL_A, CL_UNORM_INT16}, {CL_A, CL_HALF_FLOAT}, {CL_A, CL_FLOAT}}; template <> bool ClHwHelperHw::allowImageCompression(cl_image_format format) const { for (auto &referenceFormat : incompressibleFormats) { if (format.image_channel_data_type == referenceFormat.image_channel_data_type && format.image_channel_order == referenceFormat.image_channel_order) { return false; } } return true; } template class ClHwHelperHw; } // namespace NEO compute-runtime-22.14.22890/opencl/source/xe_hpg_core/command_queue_xe_hpg_core.cpp000066400000000000000000000014471422164147700303120ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/populate_factory.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/enqueue_resource_barrier.h" namespace NEO { using Family = XE_HPG_COREFamily; static auto gfxCore = IGFX_XE_HPG_CORE; } // namespace NEO #include "opencl/source/command_queue/command_queue_hw_xehp_and_later.inl" namespace NEO { template <> void populateFactoryTable>() { extern CommandQueueCreateFunc commandQueueFactory[IGFX_MAX_CORE]; commandQueueFactory[gfxCore] = CommandQueueHw::create; } } // namespace NEO template class NEO::CommandQueueHw; compute-runtime-22.14.22890/opencl/source/xe_hpg_core/enable_family_full_ocl_xe_hpg_core.cpp000066400000000000000000000014471422164147700321360ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/populate_factory.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/sampler/sampler.h" namespace NEO { using Family = XE_HPG_COREFamily; struct EnableOCLXeHpgCore { EnableOCLXeHpgCore() { populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); } }; static EnableOCLXeHpgCore enable; } // namespace NEO compute-runtime-22.14.22890/opencl/source/xe_hpg_core/gpgpu_walker_xe_hpg_core.cpp000066400000000000000000000006611422164147700301540ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/gpgpu_walker_xehp_and_later.inl" #include "opencl/source/command_queue/hardware_interface_xehp_and_later.inl" namespace NEO { template class GpgpuWalkerHelper; template class HardwareInterface; template struct EnqueueOperation; } // namespace NEO compute-runtime-22.14.22890/opencl/source/xe_hpg_core/gtpin_setup_xe_hpg_core.cpp000066400000000000000000000014671422164147700300330ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/gtpin/gtpin_hw_helper.h" #include "opencl/source/gtpin/gtpin_hw_helper.inl" #include "opencl/source/gtpin/gtpin_hw_helper_xehp_and_later.inl" #include "ocl_igc_shared/gtpin/gtpin_ocl_interface.h" namespace NEO { extern GTPinHwHelper *gtpinHwHelperFactory[IGFX_MAX_CORE]; typedef XE_HPG_COREFamily Family; static const auto gfxFamily = IGFX_XE_HPG_CORE; template class GTPinHwHelperHw; struct GTPinEnableXeHpgCore { GTPinEnableXeHpgCore() { gtpinHwHelperFactory[gfxFamily] = >PinHwHelperHw::get(); } }; template <> uint32_t GTPinHwHelperHw::getGenVersion() { return gtpin::GTPIN_XE_HPG_CORE; } static GTPinEnableXeHpgCore gtpinEnable; } // namespace NEO compute-runtime-22.14.22890/opencl/source/xe_hpg_core/hardware_commands_helper_xe_hpg_core.cpp000066400000000000000000000010211422164147700324710ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/cache_flush_xehp_and_later.inl" #include "shared/source/xe_hpg_core/hw_cmds.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/helpers/hardware_commands_helper_base.inl" #include "opencl/source/helpers/hardware_commands_helper_xehp_and_later.inl" namespace NEO { using FamilyType = XE_HPG_COREFamily; template struct HardwareCommandsHelper; } // namespace NEO compute-runtime-22.14.22890/opencl/source/xe_hpg_core/image_xe_hpg_core.cpp000066400000000000000000000006651422164147700265530ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/xe_hpg_core/hw_cmds_base.h" #include "opencl/source/mem_obj/image.inl" namespace NEO { using Family = XE_HPG_COREFamily; static auto gfxCore = IGFX_XE_HPG_CORE; } // namespace NEO #include "opencl/source/mem_obj/image_tgllp_and_later.inl" // factory initializer #include "opencl/source/mem_obj/image_factory_init.inl" compute-runtime-22.14.22890/opencl/source/xe_hpg_core/sampler_xe_hpg_core.cpp000066400000000000000000000006321422164147700271260ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/xe_hpg_core/hw_cmds_base.h" using Family = NEO::XE_HPG_COREFamily; constexpr static auto gfxCore = IGFX_XE_HPG_CORE; #include "opencl/source/sampler/sampler.h" #include "opencl/source/sampler/sampler.inl" namespace NEO { #include "opencl/source/sampler/sampler_factory_init.inl" } // namespace NEO compute-runtime-22.14.22890/opencl/test/000077500000000000000000000000001422164147700176115ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/.clang-tidy000066400000000000000000000035161422164147700216520ustar00rootroot00000000000000--- Checks: 'clang-diagnostic-*,clang-analyzer-*,google-default-arguments,modernize-use-override,modernize-use-default-member-init,-clang-analyzer-alpha*,readability-identifier-naming,-clang-analyzer-optin.performance.Padding,-clang-analyzer-security.insecureAPI.strcpy,-clang-analyzer-cplusplus.NewDeleteLeaks,-clang-analyzer-core.CallAndMessage,-clang-analyzer-unix.MismatchedDeallocator,-clang-analyzer-core.NullDereference,-clang-analyzer-cplusplus.NewDelete,-clang-analyzer-optin.cplusplus.VirtualCall' # WarningsAsErrors: '.*' HeaderFilterRegex: '^((?!^third_party\/).+)\.(h|hpp|inl)$' AnalyzeTemporaryDtors: false CheckOptions: - key: google-readability-braces-around-statements.ShortStatementLines value: '1' - key: google-readability-function-size.StatementThreshold value: '800' - key: google-readability-namespace-comments.ShortNamespaceLines value: '10' - key: google-readability-namespace-comments.SpacesBeforeComments value: '2' - key: readability-identifier-naming.ParameterCase value: camelBack - key: readability-identifier-naming.StructMemberCase value: camelBack - key: modernize-loop-convert.MaxCopySize value: '16' - key: modernize-loop-convert.MinConfidence value: reasonable - key: modernize-loop-convert.NamingStyle value: CamelCase - key: modernize-pass-by-value.IncludeStyle value: llvm - key: modernize-replace-auto-ptr.IncludeStyle value: llvm - key: modernize-use-nullptr.NullMacros value: 'NULL' - key: modernize-use-default-member-init.UseAssignment value: '1' ... compute-runtime-22.14.22890/opencl/test/CMakeLists.txt000066400000000000000000000004571422164147700223570ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(DEFAULT_TESTED_PLATFORM AND NOT NEO_SKIP_OCL_UNIT_TESTS) add_subdirectory_unique(unit_test ${NEO_BUILD_DIR}/opencl/test/unit_test) endif() if(NOT BUILD_WITHOUT_RUNTIME) add_subdirectory_unique(black_box_test) endif() compute-runtime-22.14.22890/opencl/test/black_box_test/000077500000000000000000000000001422164147700225745ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/black_box_test/CMakeLists.txt000066400000000000000000000021051422164147700253320ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug") set(OPENCL_BLACK_BOX_TEST_PROJECT_FOLDER "opencl runtime/black_box_tests") set(TEST_NAME hello_world_opencl) add_executable(${TEST_NAME} hello_world_opencl.cpp) set_target_properties(${TEST_NAME} PROPERTIES VS_DEBUGGER_COMMAND "$(TargetPath)" VS_DEBUGGER_COMMAND_ARGUMENTS "" VS_DEBUGGER_WORKING_DIRECTORY "$(OutDir)" ) add_dependencies(${TEST_NAME} ${NEO_DYNAMIC_LIB_NAME}) set_target_properties(${TEST_NAME} PROPERTIES FOLDER ${OPENCL_BLACK_BOX_TEST_PROJECT_FOLDER}) if(UNIX) find_package(OpenCL QUIET) if(NOT ${OpenCL_FOUND}) message(STATUS "Failed to find OpenCL package") set_target_properties(${TEST_NAME} PROPERTIES EXCLUDE_FROM_ALL TRUE) else() target_link_libraries(${TEST_NAME} PUBLIC ${OpenCL_LIBRARIES}) endif() else() target_link_libraries(${TEST_NAME} PUBLIC ${NEO_DYNAMIC_LIB_NAME}) endif() endif() compute-runtime-22.14.22890/opencl/test/black_box_test/hello_world_opencl.cpp000066400000000000000000000143071422164147700271570ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "CL/cl.h" #include #include #include #include #include using namespace std; int main(int argc, char **argv) { int retVal = 0; const char *fileName = "kernelOutput.txt"; cl_int err = 0; unique_ptr platforms; cl_device_id device_id = 0; cl_uint platformsCount = 0; cl_context context = NULL; cl_command_queue queue = NULL; cl_program program = NULL; cl_kernel kernel = NULL; cl_mem buffer = NULL; const size_t bufferSize = sizeof(int) * 1024; size_t offset[3] = {0, 0, 0}; size_t gws[3] = {4, 1, 1}; size_t lws[3] = {4, 1, 1}; cl_uint dimension = 1; bool validatePrintfOutput = true; err = clGetPlatformIDs(0, NULL, &platformsCount); if (err != CL_SUCCESS) { cout << "Error getting platforms" << endl; abort(); } cout << "num_platforms == " << platformsCount << endl; platforms.reset(new cl_platform_id[platformsCount]); err = clGetPlatformIDs(platformsCount, platforms.get(), NULL); if (err != CL_SUCCESS) { cout << "Error clGetPlatformIDs failed" << endl; abort(); } cl_device_type deviceType = CL_DEVICE_TYPE_GPU; err = clGetDeviceIDs(platforms.get()[0], deviceType, 1, &device_id, NULL); if (err != CL_SUCCESS) { cout << "Error gettting device_id" << endl; abort(); } context = clCreateContext(0, 1, &device_id, NULL, NULL, &err); if (err != CL_SUCCESS) { cout << "Error creating context" << endl; abort(); } queue = clCreateCommandQueue(context, device_id, 0, &err); if (err != CL_SUCCESS || !queue) { cout << "Error creating command queue" << endl; abort(); } { char source[] = R"===( __kernel void hello(__global int* in){ int i = in[0] > 0 ? in[0] : 0; for( ; i >= 0; i--) { printf("%d\n", i); } printf("Hello world!\n"); in[1] = 2; } )==="; const char *strings = source; program = clCreateProgramWithSource(context, 1, &strings, 0, &err); if (err != CL_SUCCESS) { cout << "Error creating program" << endl; abort(); } err = clBuildProgram(program, 1, &device_id, nullptr, nullptr, nullptr); if (err != CL_SUCCESS) { cout << "Error building program" << endl; size_t logSize = 0; unique_ptr buildLog; if (clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize) == CL_SUCCESS) { if (logSize) { buildLog.reset(new char[logSize]); if (buildLog) { if (clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, logSize, buildLog.get(), NULL) == CL_SUCCESS) { buildLog[logSize - 1] = '\0'; cout << "Build log:\n" << buildLog.get() << endl; } } } } abort(); } kernel = clCreateKernel(program, "hello", &err); if (err != CL_SUCCESS) { cout << "Error creating kernel" << endl; abort(); } } { cl_mem_flags flags = CL_MEM_READ_WRITE; buffer = clCreateBuffer(context, flags, bufferSize, nullptr, &err); if (err != CL_SUCCESS) { cout << "Error creating buffer" << endl; abort(); } void *ptr = clEnqueueMapBuffer(queue, buffer, CL_TRUE, CL_MAP_READ, 0, bufferSize, 0, nullptr, nullptr, &err); if (err || ptr == nullptr) { cout << "Error mapping buffer" << endl; abort(); } memset(ptr, 0, bufferSize); *(int *)ptr = 4; err = clEnqueueUnmapMemObject(queue, buffer, ptr, 0, nullptr, nullptr); if (err) { cout << "Error unmapping buffer" << endl; abort(); } } err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &buffer); if (err) { cout << "Error setting kernel arg" << endl; abort(); } if (validatePrintfOutput) { auto newFile = freopen(fileName, "w", stdout); if (newFile == nullptr) { cout << "Failed in freopen()" << endl; abort(); } } err = clEnqueueNDRangeKernel(queue, kernel, dimension, offset, gws, lws, 0, 0, nullptr); if (err) { cout << "Error NDRange" << endl; abort(); } err = clFinish(queue); if (err) { cout << "Error Finish" << endl; abort(); } { void *ptr = clEnqueueMapBuffer(queue, buffer, CL_TRUE, CL_MAP_READ, 0, bufferSize, 0, nullptr, nullptr, &err); if (err || ptr == nullptr) { cout << "Error mapping buffer" << endl; abort(); } if (((int *)ptr)[1] != 2) { cout << "Invalid value in buffer" << endl; retVal = 1; } err = clEnqueueUnmapMemObject(queue, buffer, ptr, 0, nullptr, nullptr); if (err) { cout << "Error unmapping buffer" << endl; abort(); } } if (validatePrintfOutput) { auto kernelOutput = make_unique(1024); auto kernelOutputFile = fopen(fileName, "r"); auto result = fread(kernelOutput.get(), sizeof(char), 1024, kernelOutputFile); fclose(kernelOutputFile); if (result == 0) { fclose(stdout); abort(); } char *foundString = strstr(kernelOutput.get(), "Hello world!"); if (foundString == nullptr) { retVal = 1; } } clReleaseMemObject(buffer); clReleaseKernel(kernel); clReleaseProgram(program); clReleaseCommandQueue(queue); clReleaseContext(context); return retVal; }compute-runtime-22.14.22890/opencl/test/unit_test/000077500000000000000000000000001422164147700216275ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/CMakeLists.txt000066400000000000000000000700051422164147700243710ustar00rootroot00000000000000# # Copyright (C) 2018-2022 Intel Corporation # # SPDX-License-Identifier: MIT # project(igdrcl_tests) set(OPENCL_TEST_PROJECTS_FOLDER "opencl runtime") set(PLATFORM_SPECIFIC_TEST_TARGETS_FOLDER "${OPENCL_TEST_PROJECTS_FOLDER}/test platforms") set(OPENCL_UNIT_TEST_DIR ${CMAKE_CURRENT_SOURCE_DIR}) include(${NEO_SOURCE_DIR}/cmake/setup_ult_global_flags.cmake) function(ADD_SUPPORTED_TEST_PRODUCT_FAMILIES_DEFINITION) set(NEO_SUPPORTED_TEST_PRODUCT_FAMILIES ${ALL_TESTED_PRODUCT_FAMILY}) string(REPLACE ";" "," NEO_SUPPORTED_TEST_PRODUCT_FAMILIES "${NEO_SUPPORTED_TEST_PRODUCT_FAMILIES}") add_definitions(-DSUPPORTED_TEST_PRODUCT_FAMILIES=${NEO_SUPPORTED_TEST_PRODUCT_FAMILIES}) endfunction() ADD_SUPPORTED_TEST_PRODUCT_FAMILIES_DEFINITION() link_libraries(${ASAN_LIBS} ${TSAN_LIBS}) add_custom_target(prepare_test_kernels_for_ocl) add_dependencies(prepare_test_kernels_for_ocl ${BUILTINS_BINARIES_BINDFUL_LIB_NAME}) add_custom_target(copy_test_files_per_product) add_custom_target(run_unit_tests ALL) add_dependencies(unit_tests copy_test_files_per_product) set(IGDRCL_SRCS_tests_local ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${NEO_SHARED_TEST_DIRECTORY}/unit_test/tests_configuration.h ) add_subdirectory(libult) hide_subdir(libult) hide_subdir(linux) if(UNIX) add_subdirectory(linux) add_custom_command( TARGET run_unit_tests POST_BUILD COMMAND echo running tests for linux dynamic library - .so in ${TargetDir} COMMAND ${NEO_RUN_INTERCEPTOR_LIST} igdrcl_linux_dll_tests ${NEO_TESTS_LISTENER_OPTION} COMMAND WORKING_DIRECTORY ${TargetDir} ) endif() set(NEO_IGDRCL_TESTS__TARGET_OBJECTS $ $ $ $ $ $ $ $ $ $ $ $ ) add_executable(igdrcl_tests ${NEO_IGDRCL_TESTS__TARGET_OBJECTS} ${IGDRCL_SRCS_tests_local} ) hide_subdir(gen_common) add_subdirectory(gen_common) if(NOT GTEST_EXCEPTION_OPTIONS) set(GTEST_EXCEPTION_OPTIONS --gtest_catch_exceptions=1) endif() message(STATUS "GTest exception options set to ${GTEST_EXCEPTION_OPTIONS}") if(GTEST_FILTERING_PATTERN) set(GTEST_FILTER_OPTION "--gtest_filter=${GTEST_FILTERING_PATTERN}") message(STATUS "GTest filter for regular tests: ${GTEST_FILTERING_PATTERN}") endif() if(USE_ASAN) set(GTEST_ENV "LSAN_OPTIONS=suppressions=${CMAKE_CURRENT_SOURCE_DIR}/lsan_suppressions.txt") endif() if(COMPILER_SUPPORTS_SSE42) set_source_files_properties(helpers/uint16_sse4_tests.cpp PROPERTIES COMPILE_FLAGS -msse4.2) endif() target_link_libraries(igdrcl_tests ${NEO_MOCKABLE_LIB_NAME} ${NEO_SHARED_MOCKABLE_LIB_NAME}) target_link_libraries(igdrcl_tests igdrcl_mocks) target_include_directories(igdrcl_tests PRIVATE ${NEO_SOURCE_DIR}/opencl/test/unit_test/mocks${BRANCH_DIR_SUFFIX} ${ENGINE_NODE_DIR} ${KHRONOS_GL_HEADERS_DIR} ${NEO_SHARED_TEST_DIRECTORY}/common/test_configuration/unit_tests ) if(WIN32) target_include_directories(igdrcl_tests PRIVATE ${NEO_SHARED_TEST_DIRECTORY}/common/mocks/windows/gmm_memory${BRANCH_DIR_SUFFIX} ) endif() target_link_libraries(igdrcl_tests gmock-gtest ${NEO_EXTRA_LIBS}) set(BUILT_IN_KERNEL_DIR "${NEO_SOURCE_DIR}/shared/source/built_ins") function(neo_copy_test_files target product) set(outputdir "${TargetDir}/${product}") add_custom_target(${target}) add_custom_command( TARGET ${target} POST_BUILD COMMAND echo deleting and re-creating ${product} cache directory... COMMAND ${CMAKE_COMMAND} -E remove_directory ${outputdir}/cl_cache COMMAND ${CMAKE_COMMAND} -E make_directory ${outputdir}/cl_cache COMMAND echo copying built-in kernel files from ${BUILT_IN_KERNEL_DIR}/kernels to ${outputdir}/test_files COMMAND ${CMAKE_COMMAND} -E copy_directory ${BUILT_IN_KERNEL_DIR}/kernels ${outputdir}/test_files COMMAND echo copying test files from ${NEO_SOURCE_DIR}/opencl/test/unit_test/test_files to ${outputdir}/test_files COMMAND ${CMAKE_COMMAND} -E copy_directory ${NEO_SOURCE_DIR}/opencl/test/unit_test/test_files ${outputdir}/test_files COMMAND WORKING_DIRECTORY ${TargetDir} DEPENDS ${NEO_SOURCE_DIR}/opencl/test/unit_test/test_files ) add_dependencies(${target} copy_compiler_files) set_target_properties(${target} PROPERTIES FOLDER "${PLATFORM_SPECIFIC_TEST_TARGETS_FOLDER}/${product}") endfunction() function(neo_copy_test_files_with_revision target product revision_id) set(outputdir "${TargetDir}/${product}/${revision_id}") add_custom_target(${target}) add_dependencies(${target} copy_test_kernel_${product}_${revision_id}) add_custom_command( TARGET ${target} POST_BUILD COMMAND echo deleting and re-creating ${product} cache directory... COMMAND ${CMAKE_COMMAND} -E remove_directory ${outputdir}/cl_cache COMMAND ${CMAKE_COMMAND} -E make_directory ${outputdir}/cl_cache COMMAND echo copying built-in kernel files from ${BUILT_IN_KERNEL_DIR}/kernels to ${outputdir}/test_files COMMAND ${CMAKE_COMMAND} -E copy_directory ${BUILT_IN_KERNEL_DIR}/kernels ${outputdir}/test_files COMMAND echo copying test files from ${NEO_SOURCE_DIR}/opencl/test/unit_test/test_files to ${outputdir}/test_files COMMAND ${CMAKE_COMMAND} -E copy_directory ${NEO_SOURCE_DIR}/opencl/test/unit_test/test_files ${outputdir}/test_files COMMAND WORKING_DIRECTORY ${TargetDir} DEPENDS ${NEO_SOURCE_DIR}/opencl/test/unit_test/test_files ) add_dependencies(${target} copy_compiler_files) set_target_properties(${target} PROPERTIES FOLDER "${PLATFORM_SPECIFIC_TEST_TARGETS_FOLDER}/${product}/${revision_id}") endfunction() add_dependencies(unit_tests igdrcl_tests test_dynamic_lib prepare_test_kernels_for_ocl prepare_test_kernels_for_shared ) set_target_properties(igdrcl_tests PROPERTIES FOLDER ${OPENCL_TEST_PROJECTS_FOLDER}) set_property(TARGET igdrcl_tests APPEND_STRING PROPERTY COMPILE_FLAGS ${ASAN_FLAGS}) if(UNIX) set_property(TARGET igdrcl_tests APPEND_STRING PROPERTY COMPILE_FLAGS " -g") endif() set_target_properties(unit_tests PROPERTIES FOLDER ${OPENCL_TEST_PROJECTS_FOLDER}) set_target_properties(copy_test_files_per_product PROPERTIES FOLDER ${OPENCL_TEST_PROJECTS_FOLDER}) set_target_properties(prepare_test_kernels_for_ocl PROPERTIES FOLDER ${OPENCL_TEST_PROJECTS_FOLDER}) set_target_properties(run_unit_tests PROPERTIES FOLDER ${OPENCL_TEST_PROJECTS_FOLDER}) target_include_directories(igdrcl_tests BEFORE PRIVATE ${NEO_SHARED_TEST_DIRECTORY}/common/test_macros/header${BRANCH_DIR_SUFFIX} ${NEO_SHARED_TEST_DIRECTORY}/common/helpers/includes${BRANCH_DIR_SUFFIX} ) function(neo_gen_kernels platform_name_with_type platform_name revision_id suffix) set(outputdir "${TargetDir}/${suffix}/${revision_id}/test_files/${NEO_ARCH}/") set(kernels_to_compile) foreach(filepath ${ARGN}) get_filename_component(filename ${filepath} NAME) get_filename_component(basename ${filepath} NAME_WE) get_filename_component(workdir ${filepath} DIRECTORY) get_filename_component(absolute_filepath ${filepath} ABSOLUTE) set(outputpath_base "${outputdir}${basename}_${suffix}") if(NOT NEO_DISABLE_BUILTINS_COMPILATION) set(output_files ${outputpath_base}.spv ${outputpath_base}.bin ${outputpath_base}.gen ) add_custom_command( OUTPUT ${output_files} COMMAND ${ocloc_cmd_prefix} -q -file ${absolute_filepath} -device ${platform_name} -${NEO_BITS} -revision_id ${revision_id} -out_dir ${outputdir} WORKING_DIRECTORY ${workdir} DEPENDS ${filepath} ocloc ) list(APPEND kernels_to_compile ${output_files}) else() foreach(_file_name "spv" "bin" "gen") set(_file_prebuilt "${NEO_KERNELS_BIN_DIR}/${suffix}/${revision_id}/test_files/${NEO_ARCH}/${basename}_${suffix}.${_file_name}") add_custom_command( OUTPUT ${outputpath_base}.${_file_name} COMMAND ${CMAKE_COMMAND} -E make_directory ${outputdir} COMMAND ${CMAKE_COMMAND} -E copy_if_different ${_file_prebuilt} ${outputdir} ) list(APPEND kernels_to_compile_${platform_name_with_type}_${revision_id} ${outputpath_base}.${_file_name}) endforeach() endif() endforeach() list(APPEND kernels_to_compile_${platform_name_with_type}_${revision_id} ${kernels_to_compile}) set(kernels_to_compile_${platform_name_with_type}_${revision_id} ${kernels_to_compile_${platform_name_with_type}_${revision_id}} PARENT_SCOPE) endfunction() function(neo_gen_kernels_with_options platform_name_with_type platform_name revision_id suffix filepath) set(kernels_to_compile) foreach(filearg ${filepath}) get_filename_component(filename ${filearg} NAME) get_filename_component(basename ${filearg} NAME_WE) get_filename_component(base_workdir ${filearg} DIRECTORY) get_filename_component(absolute_filepath ${filearg} ABSOLUTE) set(outputdir "${TargetDir}/${suffix}/${revision_id}/test_files/${NEO_ARCH}/") set(workdir "${CMAKE_CURRENT_SOURCE_DIR}/${base_workdir}/") foreach(arg ${ARGN}) string(REPLACE " " "_" argwospaces ${arg}) set(outputpath_base "${outputdir}/${basename}_${suffix}") if(NOT NEO_DISABLE_BUILTINS_COMPILATION) set(output_files ${outputpath_base}.spv${argwospaces} ${outputpath_base}.bin${argwospaces} ${outputpath_base}.gen${argwospaces} ) add_custom_command( OUTPUT ${output_files} COMMAND ${ocloc_cmd_prefix} -file ${absolute_filepath} -device ${platform_name} -${NEO_BITS} -out_dir ${outputdir} -revision_id ${revision_id} -options ${arg} -options_name WORKING_DIRECTORY ${workdir} DEPENDS ${filearg} ocloc ) list(APPEND kernels_to_compile ${output_files}) else() foreach(_file_name "spv" "bin" "gen") set(_file_prebuilt "${NEO_KERNELS_BIN_DIR}/${suffix}/${revision_id}/test_files/${NEO_ARCH}/${basename}_${suffix}.${_file_name}${argwospaces}") add_custom_command( OUTPUT ${outputpath_base}.${_file_name}${argwospaces} COMMAND ${CMAKE_COMMAND} -E make_directory ${outputdir} COMMAND ${CMAKE_COMMAND} -E copy_if_different ${_file_prebuilt} ${outputdir} ) list(APPEND kernels_to_compile_${platform_name_with_type}_${revision_id} ${outputpath_base}.${_file_name}${argwospaces}) endforeach() endif() endforeach() endforeach() list(APPEND kernels_to_compile_${platform_name_with_type}_${revision_id} ${kernels_to_compile}) set(kernels_to_compile_${platform_name_with_type}_${revision_id} ${kernels_to_compile_${platform_name_with_type}_${revision_id}} PARENT_SCOPE) endfunction() function(neo_gen_kernels_with_internal_options platform_name_with_type platform_name revision_id suffix filepath output_name_prefix) set(kernels_to_compile) set(filearg ${filepath}) get_filename_component(filename ${filearg} NAME) get_filename_component(basename ${filearg} NAME_WE) get_filename_component(base_workdir ${filearg} DIRECTORY) get_filename_component(absolute_filepath ${filearg} ABSOLUTE) set(outputdir "${TargetDir}/${suffix}/${revision_id}/test_files/${NEO_ARCH}/") set(workdir "${CMAKE_CURRENT_SOURCE_DIR}/${base_workdir}/") if(NOT "${output_name_prefix}" STREQUAL "") set(basename ${output_name_prefix}_${basename}) endif() set(outputpath_base "${outputdir}/${basename}_${suffix}") if(NOT NEO_DISABLE_BUILTINS_COMPILATION) set(output_files ${outputpath_base}.spv ${outputpath_base}.bin ${outputpath_base}.gen ) if(NOT "${output_name_prefix}" STREQUAL "") set(output_name -output ${basename}) endif() add_custom_command( OUTPUT ${output_files} COMMAND ${ocloc_cmd_prefix} -file ${absolute_filepath} -device ${platform_name} -revision_id ${revision_id} -${NEO_BITS} -out_dir ${outputdir} ${output_name} -internal_options ${ARGN} WORKING_DIRECTORY ${workdir} DEPENDS ${filearg} ocloc ) list(APPEND kernels_to_compile_${platform_name_with_type}_${revision_id} ${output_files}) else() foreach(_file_name "spv" "bin" "gen") set(_file_prebuilt "${NEO_KERNELS_BIN_DIR}/${suffix}/${revision_id}/test_files/${NEO_ARCH}/${basename}_${suffix}.${_file_name}") add_custom_command( OUTPUT ${outputpath_base}.${_file_name} COMMAND ${CMAKE_COMMAND} -E make_directory ${outputdir} COMMAND ${CMAKE_COMMAND} -E copy_if_different ${_file_prebuilt} ${outputdir} ) list(APPEND kernels_to_compile_${platform_name_with_type}_${revision_id} ${outputpath_base}.${_file_name}) endforeach() endif() set(kernels_to_compile_${platform_name_with_type}_${revision_id} ${kernels_to_compile_${platform_name_with_type}_${revision_id}} PARENT_SCOPE) endfunction() set(TEST_KERNEL_kernel_debug_enable "-cl-kernel-debug-enable" ) function(neo_gen_kernel_with_kernel_debug_options platform_name_with_type platform_name revision_id suffix filepath) get_filename_component(filename ${filepath} NAME) get_filename_component(basename ${filepath} NAME_WE) get_filename_component(base_workdir ${filepath} DIRECTORY) get_filename_component(absolute_filepath ${filepath} ABSOLUTE) set(outputdir "${TargetDir}/${suffix}/${revision_id}/test_files/${NEO_ARCH}/") set(workdir "${CMAKE_CURRENT_SOURCE_DIR}/${base_workdir}/") string(REPLACE " " "_" argwospaces ${TEST_KERNEL_kernel_debug_enable}) set(outputpath_base "${outputdir}/${argwospaces}_${suffix}") if(NOT NEO_DISABLE_BUILTINS_COMPILATION) set(output_files ${outputpath_base}.spv ${outputpath_base}.bin ${outputpath_base}.gen ${outputpath_base}.dbg ) add_custom_command( OUTPUT ${output_files} COMMAND ${ocloc_cmd_prefix} -q -file ${absolute_filepath} -device ${platform_name} -revision_id ${revision_id} -${NEO_BITS} -out_dir ${outputdir} -output ${argwospaces} -internal_options ${TEST_KERNEL_kernel_debug_enable} -options "-g" WORKING_DIRECTORY ${workdir} DEPENDS ${filepath} ocloc ) list(APPEND kernels_to_compile_${platform_name_with_type}_${revision_id} ${output_files}) else() foreach(_file_name "spv" "bin" "gen" "dbg") set(_file_prebuilt "${NEO_KERNELS_BIN_DIR}/${suffix}/${revision_id}/test_files/${NEO_ARCH}/${argwospaces}_${suffix}.${_file_name}") add_custom_command( OUTPUT ${outputpath_base}.${_file_name} COMMAND ${CMAKE_COMMAND} -E make_directory ${outputdir} COMMAND ${CMAKE_COMMAND} -E copy_if_different ${_file_prebuilt} ${outputdir} ) list(APPEND kernels_to_compile_${platform_name_with_type}_${revision_id} ${outputpath_base}.${_file_name}) endforeach() endif() set(kernels_to_compile_${platform_name_with_type}_${revision_id} ${kernels_to_compile_${platform_name_with_type}_${revision_id}} PARENT_SCOPE) endfunction() function(neo_gen_kernel_from_ll platform_name_with_type platform_name suffix filepath output_name compile_options) get_filename_component(filename ${filepath} NAME) get_filename_component(basename ${filepath} NAME_WE) get_filename_component(absolute_filepath ${filepath} ABSOLUTE) set(outputdir "${TargetDir}/${suffix}/test_files/${NEO_ARCH}") set(workdir "${CMAKE_CURRENT_SOURCE_DIR}/test_files/") set(outputpath_base "${outputdir}/${output_name}_${suffix}") set(output_files ${outputpath_base}.bin ${outputpath_base}.gen ) string(CONCAT compile_options \" ${compile_options} \" ) add_custom_command( OUTPUT ${output_files} COMMAND ${ocloc_cmd_prefix} -q -file ${absolute_filepath} -output ${output_name} -device ${platform_name} -${NEO_BITS} -out_dir ${outputdir} -internal_options ${compile_options} -llvm_input WORKING_DIRECTORY ${workdir} DEPENDS ${filepath} ocloc ) list(APPEND kernels_to_compile_${platform_name_with_type} ${output_files}) set(kernels_to_compile_${platform_name_with_type} ${kernels_to_compile_${platform_name_with_type}} PARENT_SCOPE) endfunction() set(TEST_KERNEL test_files/CopyBuffer_simd16.cl) set(TEST_KERNEL_options "-cl-fast-relaxed-math" "-cl-finite-math-only" "-cl-kernel-arg-info" "-x spir -spir-std=1.2" "-g" ) set(TEST_KERNEL_2_0_options "-cl-std=CL2.0" ) set(TEST_KERNEL_2_0 test_files/simple_nonuniform.cl ) set(TEST_KERNEL_STATELESS_internal_options "-cl-intel-greater-than-4GB-buffer-required" ) set(TEST_KERNEL_STATELESS_internal_options_gen9lp "-cl-intel-greater-than-4GB-buffer-required -m32" ) set(TEST_KERNEL_STATELESS test_files/stateless_kernel.cl ) set(TEST_KERNEL_VME ${CMAKE_CURRENT_SOURCE_DIR}/test_files/vme_kernels.cl ${CMAKE_CURRENT_SOURCE_DIR}/test_files/media_kernels_backend.cl ${CMAKE_CURRENT_SOURCE_DIR}/test_files/media_kernels_frontend.cl ) set(TEST_KERNEL_SIP_DEBUG_options "-cl-include-sip-kernel-debug -cl-include-sip-csr -cl-set-bti:0" ) set(TEST_KERNEL_SIP_DEBUG_LOCAL_options "-cl-include-sip-kernel-local-debug -cl-include-sip-csr -cl-set-bti:0" ) set(TEST_KERNEL_BINDLESS_internal_options "-cl-intel-use-bindless-mode -cl-intel-use-bindless-advanced-mode" ) set(TEST_KERNEL_BINDLESS test_files/stateful_copy_buffer.cl ) set(TEST_KERNEL_BINDLESS_IMAGES test_files/copy_buffer_to_image.cl ) set(TEST_KERNEL_PRINTF test_files/printf.cl ) set(TEST_KERNEL_PRINTF_internal_options_gen9lp "-m32" ) file(GLOB_RECURSE TEST_KERNELS test_files/*.cl) list(REMOVE_ITEM TEST_KERNELS "${CMAKE_CURRENT_SOURCE_DIR}/test_files/shouldfail.cl") list(REMOVE_ITEM TEST_KERNELS "${CMAKE_CURRENT_SOURCE_DIR}/test_files/valid_kernel.cl") list(REMOVE_ITEM TEST_KERNELS "${CMAKE_CURRENT_SOURCE_DIR}/test_files/simple_nonuniform.cl") list(REMOVE_ITEM TEST_KERNELS "${CMAKE_CURRENT_SOURCE_DIR}/test_files/stateless_kernel.cl") list(REMOVE_ITEM TEST_KERNELS ${TEST_KERNEL_VME}) list(REMOVE_ITEM TEST_KERNELS "${CMAKE_CURRENT_SOURCE_DIR}/${TEST_KERNEL_PRINTF}") macro(macro_for_each_core_type) foreach(PLATFORM_TYPE ${PLATFORM_TYPES}) if(${CORE_TYPE}_HAS_${PLATFORM_TYPE}) get_family_name_with_type(${CORE_TYPE} ${PLATFORM_TYPE}) string(TOLOWER ${PLATFORM_TYPE} PLATFORM_TYPE_LOWER) set(PLATFORM_LOWER ${DEFAULT_SUPPORTED_${CORE_TYPE}_${PLATFORM_TYPE}_PLATFORM}) set(PLATFORM_2_0_LOWER ${DEFAULT_SUPPORTED_2_0_${CORE_TYPE}_${PLATFORM_TYPE}_PLATFORM}) set(PLATFORM_VME_LOWER ${DEFAULT_SUPPORTED_VME_${CORE_TYPE}_${PLATFORM_TYPE}_PLATFORM}) set(PLATFORM_TEST_KERNELS ${TEST_KERNELS}) set(IMAGE_SUPPORT FALSE) CORE_CONTAINS_PLATFORMS("SUPPORTED_IMAGES" ${CORE_TYPE} IMAGE_SUPPORT) if(NOT IMAGE_SUPPORT) list(REMOVE_ITEM PLATFORM_TEST_KERNELS "${CMAKE_CURRENT_SOURCE_DIR}/test_files/copy_buffer_to_image.cl") endif() foreach(KERNEL_TO_REMOVE ${${CORE_TYPE}_TEST_KERNELS_BLOCKLIST}) set(KERNEL_TO_REMOVE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/test_files/${KERNEL_TO_REMOVE}") list(REMOVE_ITEM PLATFORM_TEST_KERNELS ${KERNEL_TO_REMOVE_PATH}) endforeach() if(MSVC OR CMAKE_SIZEOF_VOID_P EQUAL 8) foreach(REVISION_ID ${${PLATFORM_TYPE}_${CORE_TYPE}_REVISIONS}) neo_gen_kernels(${family_name_with_type} ${PLATFORM_LOWER} ${REVISION_ID} ${family_name_with_type} ${PLATFORM_TEST_KERNELS}) neo_gen_kernels_with_options(${family_name_with_type} ${PLATFORM_LOWER} ${REVISION_ID} ${family_name_with_type} ${TEST_KERNEL} ${TEST_KERNEL_options}) endforeach() #compile gen specific kernels if any were found file(GLOB_RECURSE ${CORE_TYPE_LOWER}_TEST_KERNELS test_files/*.${CORE_TYPE_LOWER}) if(NOT "${${CORE_TYPE_LOWER}_TEST_KERNELS}" STREQUAL "") foreach(REVISION_ID ${${PLATFORM_TYPE}_${CORE_TYPE}_REVISIONS}) neo_gen_kernels(${family_name_with_type} ${PLATFORM_LOWER} ${REVISION_ID} ${family_name_with_type} ${${CORE_TYPE_LOWER}_TEST_KERNELS}) endforeach() endif() # Compile platform specific kernels if any were found file(GLOB_RECURSE ${PLATFORM_LOWER}_TEST_KERNELS test_files/*.${PLATFORM_LOWER}) if(NOT "${${PLATFORM_LOWER}_TEST_KERNELS}" STREQUAL "") foreach(REVISION_ID ${${PLATFORM_TYPE}_${CORE_TYPE}_REVISIONS}) neo_gen_kernels(${family_name_with_type} ${PLATFORM_LOWER} ${REVISION_ID} ${family_name_with_type} ${${PLATFORM_LOWER}_TEST_KERNELS}) endforeach() endif() # Disable debug kernel generation on gen8 - debugger not supported on gen8 if(NOT ("${CORE_TYPE_LOWER}" STREQUAL "gen8")) foreach(REVISION_ID ${${PLATFORM_TYPE}_${CORE_TYPE}_REVISIONS}) neo_gen_kernel_with_kernel_debug_options(${family_name_with_type} ${PLATFORM_LOWER} ${REVISION_ID} ${family_name_with_type} ${TEST_KERNEL}) endforeach() endif() # Gen9lp needs extra -m32 flag if(("${CORE_TYPE_LOWER}" STREQUAL "gen9") AND ("${PLATFORM_TYPE_LOWER}" STREQUAL "lp")) foreach(REVISION_ID ${${PLATFORM_TYPE}_${CORE_TYPE}_REVISIONS}) neo_gen_kernels_with_internal_options(${family_name_with_type} ${PLATFORM_LOWER} ${REVISION_ID} ${family_name_with_type} ${TEST_KERNEL_PRINTF} "" ${TEST_KERNEL_PRINTF_internal_options_gen9lp}) neo_gen_kernels_with_internal_options(${family_name_with_type} ${PLATFORM_LOWER} ${REVISION_ID} ${family_name_with_type} ${TEST_KERNEL_STATELESS} "" ${TEST_KERNEL_STATELESS_internal_options_gen9lp}) endforeach() else() foreach(REVISION_ID ${${PLATFORM_TYPE}_${CORE_TYPE}_REVISIONS}) neo_gen_kernels_with_internal_options(${family_name_with_type} ${PLATFORM_LOWER} ${REVISION_ID} ${family_name_with_type} ${TEST_KERNEL_PRINTF} "" " ") neo_gen_kernels_with_internal_options(${family_name_with_type} ${PLATFORM_LOWER} ${REVISION_ID} ${family_name_with_type} ${TEST_KERNEL_STATELESS} "" ${TEST_KERNEL_STATELESS_internal_options}) endforeach() endif() set(BINDLESS_KERNELS_IMAGES "") if(IMAGE_SUPPORT) set(BINDLESS_KERNELS_IMAGES ${TEST_KERNEL_BINDLESS_IMAGES}) endif() foreach(file ${TEST_KERNEL_BINDLESS} ${BINDLESS_KERNELS_IMAGES}) foreach(REVISION_ID ${${PLATFORM_TYPE}_${CORE_TYPE}_REVISIONS}) neo_gen_kernels_with_internal_options(${family_name_with_type} ${PLATFORM_LOWER} ${REVISION_ID} ${family_name_with_type} ${file} "bindless" ${TEST_KERNEL_BINDLESS_internal_options}) endforeach() endforeach() if(PLATFORM_2_0_LOWER) foreach(REVISION_ID ${${PLATFORM_TYPE}_${CORE_TYPE}_REVISIONS}) neo_gen_kernels_with_options(${family_name_with_type} ${PLATFORM_2_0_LOWER} ${REVISION_ID} ${family_name_with_type} "${TEST_KERNEL_2_0}" ${TEST_KERNEL_2_0_options}) endforeach() endif() if(PLATFORM_VME_LOWER) foreach(REVISION_ID ${${PLATFORM_TYPE}_${CORE_TYPE}_REVISIONS}) neo_gen_kernels(${family_name_with_type} ${PLATFORM_VME_LOWER} ${REVISION_ID} ${family_name_with_type} ${TEST_KERNEL_VME}) endforeach() endif() endif() foreach(REVISION_ID ${${PLATFORM_TYPE}_${CORE_TYPE}_REVISIONS}) add_custom_target(prepare_test_kernels_${family_name_with_type}_${REVISION_ID} DEPENDS ${kernels_to_compile_${family_name_with_type}_${REVISION_ID}} copy_compiler_files) add_dependencies(prepare_test_kernels_for_ocl prepare_test_kernels_${family_name_with_type}_${REVISION_ID}) set_target_properties(prepare_test_kernels_${family_name_with_type}_${REVISION_ID} PROPERTIES FOLDER "${PLATFORM_SPECIFIC_TEST_TARGETS_FOLDER}/${family_name_with_type}/${REVISION_ID}") endforeach() endif() endforeach() endmacro() apply_macro_for_each_core_type("TESTED") add_subdirectories() create_project_source_tree(igdrcl_tests) if(MSVC) add_dependencies(unit_tests mock_gdi) add_dependencies(igdrcl_tests mock_gdi) endif() set(UltPchHeader "${CMAKE_CURRENT_SOURCE_DIR}/igdrcl_tests_pch.h") set(UltPchSource "${CMAKE_CURRENT_SOURCE_DIR}/igdrcl_tests_pch.cpp") get_target_property(UltSources igdrcl_tests SOURCES) if(MSVC AND NOT DISABLE_ULT_PCH_WIN) set(UltPchBinary "${CMAKE_CURRENT_BINARY_DIR}/igdrcl_tests_pch.pch") set(IGDRCL_SRCS_ult_pch ${UltPchSource} ${UltPchHeader}) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_ult_pch}) set_source_files_properties(${UltSources} PROPERTIES COMPILE_FLAGS "/Yu${UltPchHeader} /FI${UltPchHeader} /Fp${UltPchBinary}" OBJECT_DEPENDS "${UltPchBinary}" ) set_source_files_properties(${UltPchSource} PROPERTIES COMPILE_FLAGS "/Yc${UltPchHeader} /FI${UltPchHeader} /Fp${UltPchBinary}" OBJECT_OUTPUTS "${UltPchBinary}" ) elseif(USE_ULT_PCH) set(UltPchHeaderInBuildDir "${CMAKE_CURRENT_BINARY_DIR}/igdrcl_tests_pch.h") set(UltPchBinaryGch "${UltPchHeaderInBuildDir}.gch") set(UltPchBinary "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/igdrcl_ult_pch.dir/igdrcl_tests_pch.h.o") add_library(igdrcl_ult_pch STATIC EXCLUDE_FROM_ALL ${UltPchHeader}) add_dependencies(igdrcl_tests igdrcl_ult_pch) target_include_directories(igdrcl_ult_pch PRIVATE $ $ ) target_compile_definitions(igdrcl_ult_pch PRIVATE $) target_include_directories(igdrcl_tests PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) if(NOT USE_ASAN) set_source_files_properties(${UltSources} PROPERTIES COMPILE_FLAGS "-include ${UltPchHeaderInBuildDir} -msse4" OBJECT_DEPENDS ${UltPchBinaryGch} ) endif() set_source_files_properties(${UltPchHeader} PROPERTIES LANGUAGE "CXX" COMPILE_FLAGS "-x c++-header -msse4 -gdwarf-2" ) add_custom_command( OUTPUT ${UltPchBinaryGch} COMMAND cp "${UltPchHeader}" "${UltPchHeaderInBuildDir}" COMMAND cp "${UltPchBinary}" "${UltPchBinaryGch}" DEPENDS ${UltPchBinary} ) endif() # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # !! !! # !! DONT ADD ANY SOURCES HERE! !! # !! !! # !! You are below PCH logic! !! # !! This is to keep PCH dependencies correctly without creating new target !! # !! !! # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! compute-runtime-22.14.22890/opencl/test/unit_test/accelerators/000077500000000000000000000000001422164147700242765ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/accelerators/CMakeLists.txt000066400000000000000000000004471422164147700270430ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_accelerators ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/media_image_arg_tests.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_accelerators}) compute-runtime-22.14.22890/opencl/test/unit_test/accelerators/media_image_arg_tests.cpp000066400000000000000000000117621422164147700313050ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/memory_manager/surface.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/helpers/surface_formats.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" using namespace NEO; class MediaImageSetArgTest : public ClDeviceFixture, public testing::Test { public: MediaImageSetArgTest() = default; protected: void SetUp() override { ClDeviceFixture::SetUp(); pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; program = std::make_unique(toClDeviceVector(*pClDevice)); pKernelInfo->heapInfo.SurfaceStateHeapSize = sizeof(surfaceStateHeap); pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->kernelDescriptor.kernelAttributes.flags.usesVme = true; pKernelInfo->addArgImage(0, 0x00, iOpenCL::IMAGE_MEMORY_OBJECT_2D_MEDIA); pKernelInfo->addArgImage(0, 0x40, iOpenCL::IMAGE_MEMORY_OBJECT_2D_MEDIA); int32_t retVal = CL_INVALID_PLATFORM; pMultiDeviceKernel = MultiDeviceKernel::create(program.get(), MockKernel::toKernelInfoContainer(*static_cast(pKernelInfo.get()), rootDeviceIndex), &retVal); pKernel = static_cast(pMultiDeviceKernel->getKernel(rootDeviceIndex)); ASSERT_NE(nullptr, pKernel); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(true, pKernel->isVmeKernel()); pKernel->setKernelArgHandler(0, &Kernel::setArgImage); pKernel->setKernelArgHandler(1, &Kernel::setArgImage); context = new MockContext(pClDevice); srcImage = Image2dHelper<>::create(context); ASSERT_NE(nullptr, srcImage); } void TearDown() override { delete srcImage; delete pMultiDeviceKernel; program.reset(); delete context; ClDeviceFixture::TearDown(); } cl_int retVal = CL_SUCCESS; MockContext *context; std::unique_ptr program; MockKernel *pKernel = nullptr; MultiDeviceKernel *pMultiDeviceKernel = nullptr; std::unique_ptr pKernelInfo; char surfaceStateHeap[0x80]; Image *srcImage = nullptr; }; HWTEST_F(MediaImageSetArgTest, WhenSettingMediaImageArgThenArgsSetCorrectly) { typedef typename FamilyType::MEDIA_SURFACE_STATE MEDIA_SURFACE_STATE; auto pSurfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->argAsImg(0).bindful)); srcImage->setMediaImageArg(const_cast(pSurfaceState), pClDevice->getRootDeviceIndex()); SurfaceOffsets surfaceOffsets; srcImage->getSurfaceOffsets(surfaceOffsets); EXPECT_EQ(srcImage->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress() + surfaceOffsets.offset, pSurfaceState->getSurfaceBaseAddress()); std::vector surfaces; pKernel->getResidency(surfaces); EXPECT_EQ(0u, surfaces.size()); } HWTEST_F(MediaImageSetArgTest, WhenSettingKernelArgImageThenArgsSetCorrectly) { typedef typename FamilyType::MEDIA_SURFACE_STATE MEDIA_SURFACE_STATE; cl_mem memObj = srcImage; retVal = clSetKernelArg( pMultiDeviceKernel, 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); auto pSurfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->argAsImg(0).bindful)); uint64_t surfaceAddress = pSurfaceState->getSurfaceBaseAddress(); ASSERT_EQ(srcImage->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress(), surfaceAddress); EXPECT_EQ(srcImage->getImageDesc().image_width, pSurfaceState->getWidth()); EXPECT_EQ(srcImage->getImageDesc().image_height, pSurfaceState->getHeight()); typename FamilyType::MEDIA_SURFACE_STATE::TILE_MODE tileMode; if (srcImage->isTiledAllocation()) { tileMode = FamilyType::MEDIA_SURFACE_STATE::TILE_MODE_TILEMODE_YMAJOR; } else { tileMode = FamilyType::MEDIA_SURFACE_STATE::TILE_MODE_TILEMODE_LINEAR; } EXPECT_EQ(tileMode, pSurfaceState->getTileMode()); EXPECT_EQ(MEDIA_SURFACE_STATE::SURFACE_FORMAT_Y8_UNORM_VA, pSurfaceState->getSurfaceFormat()); EXPECT_EQ(MEDIA_SURFACE_STATE::PICTURE_STRUCTURE_FRAME_PICTURE, pSurfaceState->getPictureStructure()); std::vector surfaces; pKernel->getResidency(surfaces); for (auto &surface : surfaces) { delete surface; } EXPECT_EQ(1u, surfaces.size()); } compute-runtime-22.14.22890/opencl/test/unit_test/api/000077500000000000000000000000001422164147700224005ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/api/CMakeLists.txt000066400000000000000000000175561422164147700251560ustar00rootroot00000000000000# # Copyright (C) 2018-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_api ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}additional_extensions_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/api_tests_wrapper1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/api_tests_wrapper2.cpp ${CMAKE_CURRENT_SOURCE_DIR}/api_tests_wrapper3.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_add_comment_to_aub_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_api_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_api_tests.h ${CMAKE_CURRENT_SOURCE_DIR}/cl_build_program_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_clone_kernel_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_compile_program_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_buffer_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_command_queue_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_command_queue_with_properties_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_context_from_type_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_context_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_image_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_kernel_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_kernels_in_program_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_perf_counters_command_queue_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_pipe_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_program_with_binary_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_program_with_built_in_kernels_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_sampler_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_sampler_with_properties_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_sub_buffer_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_sub_devices_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_user_event_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_barrier_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_barrier_with_wait_list_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_copy_buffer_rect_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_copy_buffer_to_image_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_copy_image_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_copy_image_to_buffer_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_fill_buffer_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_fill_image_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_image_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_map_buffer_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_map_image_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_marker_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_marker_with_wait_list_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_migrate_mem_objects_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_native_kernel_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_nd_range_kernel_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_read_buffer_rect_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_read_buffer_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_read_image_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_svm_free_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_svm_map_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_svm_mem_fill_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_svm_memcpy_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_svm_migrate_mem_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_svm_unmap_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_task_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_unmap_mem_object_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_verify_memory.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_wait_for_events_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_write_buffer_rect_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_write_buffer_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_write_image_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_finish_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_flush_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_function_pointers_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_context_info_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_device_and_host_timer.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_device_ids_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_device_info_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_event_profiling_info_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_extension_function_address_for_platform_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_extension_function_address_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_image_info_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_image_params_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_kernel_arg_info_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_kernel_info_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_kernel_max_concurrent_work_group_count_intel_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_kernel_sub_group_info_khr_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_kernel_sub_group_info_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_kernel_suggested_local_work_size_intel_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_kernel_suggested_local_work_size_khr_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_kernel_work_group_info_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_mem_object_info_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_pipe_info_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_platform_ids_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_platform_info_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_program_build_info_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_program_info_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_supported_image_formats_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_icd_get_platform_ids_khr_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_intel_accelerator_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_intel_motion_estimation.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_intel_tracing_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_link_program_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_mem_locally_uncached_resource_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_release_command_queue_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_release_context_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_release_event_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_release_kernel_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_release_mem_obj_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_release_program_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_retain_mem_obj_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_retain_release_command_queue_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_retain_release_context_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_retain_release_device_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_retain_release_sampler_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_set_context_destructor_callback.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_set_event_callback_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_set_kernel_arg_svm_pointer_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_set_kernel_exec_info_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_set_mem_object_destructor_callback_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_set_performance_configuration_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_set_program_release_callback.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_set_program_specialization_constant_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_svm_alloc_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_svm_free_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_unified_shared_memory_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_unload_compiler_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_unload_platform_compiler_tests.inl ) if(TESTS_PVC_AND_LATER) list(APPEND IGDRCL_SRCS_tests_api ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_device_info_pvc_and_later_tests.cpp) endif() target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_api}) add_subdirectories() compute-runtime-22.14.22890/opencl/test/unit_test/api/additional_extensions_tests.cpp000066400000000000000000000006311422164147700307150ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/source/api/additional_extensions.h" using namespace NEO; TEST(AdditionalExtension, GivenFuncNameWhenGetingFunctionAddressThenReturnNullptr) { auto address = getAdditionalExtensionFunctionAddress("clFunction"); EXPECT_EQ(nullptr, address); } compute-runtime-22.14.22890/opencl/test/unit_test/api/api_tests_wrapper1.cpp000066400000000000000000000044611422164147700267250ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/api/cl_add_comment_to_aub_tests.inl" #include "opencl/test/unit_test/api/cl_build_program_tests.inl" #include "opencl/test/unit_test/api/cl_clone_kernel_tests.inl" #include "opencl/test/unit_test/api/cl_compile_program_tests.inl" #include "opencl/test/unit_test/api/cl_create_command_queue_tests.inl" #include "opencl/test/unit_test/api/cl_create_context_from_type_tests.inl" #include "opencl/test/unit_test/api/cl_create_context_tests.inl" #include "opencl/test/unit_test/api/cl_create_kernel_tests.inl" #include "opencl/test/unit_test/api/cl_create_kernels_in_program_tests.inl" #include "opencl/test/unit_test/api/cl_create_perf_counters_command_queue_tests.inl" #include "opencl/test/unit_test/api/cl_create_pipe_tests.inl" #include "opencl/test/unit_test/api/cl_create_program_with_binary_tests.inl" #include "opencl/test/unit_test/api/cl_create_sampler_tests.inl" #include "opencl/test/unit_test/api/cl_create_sampler_with_properties_tests.inl" #include "opencl/test/unit_test/api/cl_create_sub_buffer_tests.inl" #include "opencl/test/unit_test/api/cl_create_sub_devices_tests.inl" #include "opencl/test/unit_test/api/cl_create_user_event_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_barrier_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_barrier_with_wait_list_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_copy_buffer_rect_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_copy_buffer_to_image_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_copy_image_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_copy_image_to_buffer_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_fill_buffer_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_fill_image_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_image_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_map_buffer_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_map_image_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_marker_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_marker_with_wait_list_tests.inl" #include "opencl/test/unit_test/api/cl_function_pointers_tests.inl" #include "opencl/test/unit_test/api/cl_unified_shared_memory_tests.inl" compute-runtime-22.14.22890/opencl/test/unit_test/api/api_tests_wrapper2.cpp000066400000000000000000000045641422164147700267320ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/api/cl_enqueue_migrate_mem_objects_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_native_kernel_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_nd_range_kernel_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_read_buffer_rect_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_read_buffer_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_read_image_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_svm_free_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_svm_map_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_svm_mem_fill_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_svm_memcpy_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_svm_unmap_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_task_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_unmap_mem_object_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_verify_memory.inl" #include "opencl/test/unit_test/api/cl_enqueue_wait_for_events_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_write_buffer_rect_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_write_buffer_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_write_image_tests.inl" #include "opencl/test/unit_test/api/cl_finish_tests.inl" #include "opencl/test/unit_test/api/cl_flush_tests.inl" #include "opencl/test/unit_test/api/cl_get_context_info_tests.inl" #include "opencl/test/unit_test/api/cl_get_device_and_host_timer.inl" #include "opencl/test/unit_test/api/cl_get_device_ids_tests.inl" #include "opencl/test/unit_test/api/cl_get_device_info_tests.inl" #include "opencl/test/unit_test/api/cl_get_event_profiling_info_tests.inl" #include "opencl/test/unit_test/api/cl_get_extension_function_address_for_platform_tests.inl" #include "opencl/test/unit_test/api/cl_get_extension_function_address_tests.inl" #include "opencl/test/unit_test/api/cl_get_image_info_tests.inl" #include "opencl/test/unit_test/api/cl_get_image_params_tests.inl" #include "opencl/test/unit_test/api/cl_get_kernel_arg_info_tests.inl" #include "opencl/test/unit_test/api/cl_get_kernel_info_tests.inl" #include "opencl/test/unit_test/api/cl_get_kernel_sub_group_info_khr_tests.inl" #include "opencl/test/unit_test/api/cl_get_kernel_sub_group_info_tests.inl" compute-runtime-22.14.22890/opencl/test/unit_test/api/api_tests_wrapper3.cpp000066400000000000000000000054361422164147700267320ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/api/cl_get_kernel_max_concurrent_work_group_count_intel_tests.inl" #include "opencl/test/unit_test/api/cl_get_kernel_suggested_local_work_size_intel_tests.inl" #include "opencl/test/unit_test/api/cl_get_kernel_suggested_local_work_size_khr_tests.inl" #include "opencl/test/unit_test/api/cl_get_kernel_work_group_info_tests.inl" #include "opencl/test/unit_test/api/cl_get_mem_object_info_tests.inl" #include "opencl/test/unit_test/api/cl_get_pipe_info_tests.inl" #include "opencl/test/unit_test/api/cl_get_platform_ids_tests.inl" #include "opencl/test/unit_test/api/cl_get_platform_info_tests.inl" #include "opencl/test/unit_test/api/cl_get_program_build_info_tests.inl" #include "opencl/test/unit_test/api/cl_get_program_info_tests.inl" #include "opencl/test/unit_test/api/cl_get_supported_image_formats_tests.inl" #include "opencl/test/unit_test/api/cl_icd_get_platform_ids_khr_tests.inl" #include "opencl/test/unit_test/api/cl_intel_accelerator_tests.inl" #include "opencl/test/unit_test/api/cl_intel_tracing_tests.inl" #include "opencl/test/unit_test/api/cl_link_program_tests.inl" #include "opencl/test/unit_test/api/cl_release_command_queue_tests.inl" #include "opencl/test/unit_test/api/cl_release_context_tests.inl" #include "opencl/test/unit_test/api/cl_release_event_tests.inl" #include "opencl/test/unit_test/api/cl_release_kernel_tests.inl" #include "opencl/test/unit_test/api/cl_release_mem_obj_tests.inl" #include "opencl/test/unit_test/api/cl_release_program_tests.inl" #include "opencl/test/unit_test/api/cl_retain_mem_obj_tests.inl" #include "opencl/test/unit_test/api/cl_retain_release_command_queue_tests.inl" #include "opencl/test/unit_test/api/cl_retain_release_context_tests.inl" #include "opencl/test/unit_test/api/cl_retain_release_device_tests.inl" #include "opencl/test/unit_test/api/cl_retain_release_sampler_tests.inl" #include "opencl/test/unit_test/api/cl_set_context_destructor_callback.inl" #include "opencl/test/unit_test/api/cl_set_event_callback_tests.inl" #include "opencl/test/unit_test/api/cl_set_kernel_arg_svm_pointer_tests.inl" #include "opencl/test/unit_test/api/cl_set_kernel_exec_info_tests.inl" #include "opencl/test/unit_test/api/cl_set_mem_object_destructor_callback_tests.inl" #include "opencl/test/unit_test/api/cl_set_performance_configuration_tests.inl" #include "opencl/test/unit_test/api/cl_set_program_release_callback.inl" #include "opencl/test/unit_test/api/cl_set_program_specialization_constant_tests.inl" #include "opencl/test/unit_test/api/cl_svm_alloc_tests.inl" #include "opencl/test/unit_test/api/cl_svm_free_tests.inl" #include "opencl/test/unit_test/api/cl_unload_compiler_tests.inl" #include "opencl/test/unit_test/api/cl_unload_platform_compiler_tests.inl" compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_add_comment_to_aub_tests.inl000066400000000000000000000051461422164147700306150ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/test/common/mocks/mock_aub_center.h" #include "shared/test/common/mocks/mock_aub_manager.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/context/context.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/api/cl_api_tests.h" using namespace NEO; namespace ULT { struct clAddCommentToAubTest : api_tests { void SetUp() override { api_tests::SetUp(); pDevice = pContext->getDevice(0); } void TearDown() override { api_tests::TearDown(); } ClDevice *pDevice = nullptr; }; TEST_F(clAddCommentToAubTest, givenProperCommentNullptrAubCenterWhenAddCommentToAubThenSuccessIsReturned) { auto retVal = clAddCommentINTEL(pDevice, "comment"); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clAddCommentToAubTest, givenInvalidDeviceWhenAddCommentToAubThenErrorIsReturned) { auto retVal = clAddCommentINTEL(nullptr, "comment"); EXPECT_EQ(CL_INVALID_DEVICE, retVal); } TEST_F(clAddCommentToAubTest, givenNullptrCommentWhenAddCommentToAubThenErrorIsReturned) { auto retVal = clAddCommentINTEL(pDevice, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clAddCommentToAubTest, givenAubCenterAndProperCommentButNullptrAubManagerWhenAddCommentToAubThenErrorIsReturned) { pDevice->getExecutionEnvironment()->rootDeviceEnvironments[testedRootDeviceIndex]->aubCenter.reset(new MockAubCenter()); auto retVal = clAddCommentINTEL(pDevice, "comment"); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clAddCommentToAubTest, givenProperCommentAubCenterAndAubManagerWhenAddCommentToAubThenSuccessIsReturned) { struct AubManagerCommentMock : public MockAubManager { using MockAubManager::MockAubManager; void addComment(const char *message) override { addCommentCalled = true; EXPECT_STREQ("comment", message); } bool addCommentCalled = false; }; auto mockAubCenter = new MockAubCenter(); auto mockAubManager = new AubManagerCommentMock; mockAubCenter->aubManager.reset(mockAubManager); pDevice->getExecutionEnvironment()->rootDeviceEnvironments[testedRootDeviceIndex]->aubCenter.reset(mockAubCenter); EXPECT_FALSE(mockAubManager->addCommentCalled); auto retVal = clAddCommentINTEL(pDevice, "comment"); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(mockAubManager->addCommentCalled); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_api_tests.cpp000066400000000000000000000026451422164147700255640ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "cl_api_tests.h" #include "shared/test/common/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" namespace NEO { void CL_CALLBACK notifyFuncProgram( cl_program program, void *userData) { *((char *)userData) = 'a'; } void api_fixture_using_aligned_memory_manager::SetUp() { retVal = CL_SUCCESS; retSize = 0; device = new MockClDevice{MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())}; cl_device_id deviceId = device; context = Context::create(nullptr, ClDeviceVector(&deviceId, 1), nullptr, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); Context *ctxPtr = reinterpret_cast(context); commandQueue = new MockCommandQueue(context, device, 0, false); program = new MockProgram(ctxPtr, false, toClDeviceVector(*device)); Program *prgPtr = reinterpret_cast(program); kernel = new MockKernel(prgPtr, program->mockKernelInfo, *device); ASSERT_NE(nullptr, kernel); } void api_fixture_using_aligned_memory_manager::TearDown() { delete kernel; delete commandQueue; context->release(); program->release(); delete device; } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_api_tests.h000066400000000000000000000103031422164147700252170ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/os_interface/device_factory.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/api/api.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/execution_environment/cl_execution_environment.h" #include "opencl/source/tracing/tracing_api.h" #include "opencl/test/unit_test/helpers/ult_limits.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include namespace NEO { template struct ApiFixture { virtual void SetUp() { DebugManager.flags.CreateMultipleRootDevices.set(numRootDevices); executionEnvironment = new ClExecutionEnvironment(); prepareDeviceEnvironments(*executionEnvironment); auto rootDevice = MockDevice::createWithExecutionEnvironment(defaultHwInfo.get(), executionEnvironment, rootDeviceIndex); if (rootDeviceIndex != 0u) { rootDeviceEnvironmentBackup.swap(executionEnvironment->rootDeviceEnvironments[0]); } pDevice = new MockClDevice(rootDevice); ASSERT_NE(nullptr, pDevice); testedClDevice = pDevice; pContext = Context::create(nullptr, ClDeviceVector(&testedClDevice, 1), nullptr, nullptr, retVal); EXPECT_EQ(retVal, CL_SUCCESS); pCommandQueue = new MockCommandQueue(pContext, pDevice, nullptr, false); pProgram = new MockProgram(pContext, false, toClDeviceVector(*pDevice)); pMultiDeviceKernel = MockMultiDeviceKernel::create(pProgram, MockKernel::toKernelInfoContainer(pProgram->mockKernelInfo, testedRootDeviceIndex)); pKernel = static_cast(pMultiDeviceKernel->getKernel(testedRootDeviceIndex)); ASSERT_NE(nullptr, pKernel); } virtual void TearDown() { pMultiDeviceKernel->release(); pCommandQueue->release(); pContext->release(); pProgram->release(); if (rootDeviceIndex != 0u) { rootDeviceEnvironmentBackup.swap(executionEnvironment->rootDeviceEnvironments[0]); } pDevice->decRefInternal(); } void disableQueueCapabilities(cl_command_queue_capabilities_intel capabilities) { if (pCommandQueue->queueCapabilities == CL_QUEUE_DEFAULT_CAPABILITIES_INTEL) { pCommandQueue->queueCapabilities = pDevice->getQueueFamilyCapabilitiesAll(); } pCommandQueue->queueCapabilities &= ~capabilities; } DebugManagerStateRestore restorer; cl_int retVal = CL_SUCCESS; size_t retSize = 0; MockCommandQueue *pCommandQueue = nullptr; Context *pContext = nullptr; MultiDeviceKernel *pMultiDeviceKernel = nullptr; MockKernel *pKernel = nullptr; MockProgram *pProgram = nullptr; constexpr static uint32_t numRootDevices = maxRootDeviceCount; constexpr static uint32_t testedRootDeviceIndex = rootDeviceIndex; cl_device_id testedClDevice = nullptr; MockClDevice *pDevice = nullptr; ClExecutionEnvironment *executionEnvironment = nullptr; std::unique_ptr rootDeviceEnvironmentBackup; }; struct api_tests : public ApiFixture<>, public ::testing::Test { void SetUp() override { ApiFixture::SetUp(); } void TearDown() override { ApiFixture::TearDown(); } }; struct api_fixture_using_aligned_memory_manager { public: virtual void SetUp(); virtual void TearDown(); cl_int retVal; size_t retSize; CommandQueue *commandQueue; Context *context; MockKernel *kernel; MockProgram *program; MockClDevice *device; }; using api_test_using_aligned_memory_manager = Test; void CL_CALLBACK notifyFuncProgram( cl_program program, void *userData); } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_build_program_tests.inl000066400000000000000000000473201422164147700276400ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/device/device.h" #include "shared/source/helpers/file_io.h" #include "shared/source/program/kernel_info.h" #include "shared/test/common/helpers/kernel_binary_helper.h" #include "shared/test/common/helpers/test_files.h" #include "shared/test/common/mocks/mock_compilers.h" #include "opencl/source/context/context.h" #include "opencl/source/program/program.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clBuildProgramTests; namespace ULT { TEST_F(clBuildProgramTests, GivenSourceAsInputWhenCreatingProgramWithSourceThenProgramBuildSucceeds) { cl_program pProgram = nullptr; std::unique_ptr pSource = nullptr; size_t sourceSize = 0; std::string testFile; KernelBinaryHelper kbHelper("CopyBuffer_simd16", false); testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); pSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pSource); const char *sourceArray[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( pContext, 1, sourceArray, &sourceSize, &retVal); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clBuildProgram( pProgram, 1, &testedClDevice, nullptr, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); pProgram = clCreateProgramWithSource( nullptr, 1, sourceArray, &sourceSize, nullptr); EXPECT_EQ(nullptr, pProgram); } TEST_F(clBuildProgramTests, GivenBinaryAsInputWhenCreatingProgramWithSourceThenProgramBuildSucceeds) { cl_program pProgram = nullptr; cl_int binaryStatus = CL_SUCCESS; std::unique_ptr pBinary = nullptr; size_t binarySize = 0; std::string testFile; retrieveBinaryKernelFilename(testFile, "CopyBuffer_simd16_", ".bin"); pBinary = loadDataFromFile( testFile.c_str(), binarySize); ASSERT_NE(0u, binarySize); ASSERT_NE(nullptr, pBinary); const unsigned char *binaries[1] = {reinterpret_cast(pBinary.get())}; pProgram = clCreateProgramWithBinary( pContext, 1, &testedClDevice, &binarySize, binaries, &binaryStatus, &retVal); pBinary.reset(); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clBuildProgram( pProgram, 1, &testedClDevice, nullptr, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clBuildProgramTests, GivenBinaryAsInputWhenCreatingProgramWithBinaryForMultipleDevicesThenProgramBuildSucceeds) { MockUnrestrictiveContextMultiGPU context; cl_program pProgram = nullptr; cl_int binaryStatus = CL_SUCCESS; std::unique_ptr pBinary = nullptr; size_t binarySize = 0; std::string testFile; retrieveBinaryKernelFilename(testFile, "CopyBuffer_simd16_", ".bin"); pBinary = loadDataFromFile( testFile.c_str(), binarySize); ASSERT_NE(0u, binarySize); ASSERT_NE(nullptr, pBinary); const size_t numBinaries = 6; const unsigned char *binaries[numBinaries]; std::fill(binaries, binaries + numBinaries, reinterpret_cast(pBinary.get())); cl_device_id devicesForProgram[] = {context.pRootDevice0, context.pSubDevice00, context.pSubDevice01, context.pRootDevice1, context.pSubDevice10, context.pSubDevice11}; size_t sizeBinaries[numBinaries]; std::fill(sizeBinaries, sizeBinaries + numBinaries, binarySize); pProgram = clCreateProgramWithBinary( &context, numBinaries, devicesForProgram, sizeBinaries, binaries, &binaryStatus, &retVal); pBinary.reset(); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clBuildProgram( pProgram, 0, nullptr, nullptr, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clBuildProgramTests, GivenProgramCreatedFromBinaryWhenBuildProgramWithOptionsIsCalledThenStoredOptionsAreUsed) { cl_program pProgram = nullptr; cl_int binaryStatus = CL_SUCCESS; size_t binarySize = 0; std::string testFile; retrieveBinaryKernelFilename(testFile, "CopyBuffer_simd16_", ".bin"); auto pBinary = loadDataFromFile( testFile.c_str(), binarySize); ASSERT_NE(0u, binarySize); ASSERT_NE(nullptr, pBinary); const unsigned char *binaries[1] = {reinterpret_cast(pBinary.get())}; pProgram = clCreateProgramWithBinary( pContext, 1, &testedClDevice, &binarySize, binaries, &binaryStatus, &retVal); auto pInternalProgram = castToObject(pProgram); pBinary.reset(); auto storedOptionsSize = pInternalProgram->getOptions().size(); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); const char *newBuildOption = "cl-fast-relaxed-math"; retVal = clBuildProgram( pProgram, 1, &testedClDevice, newBuildOption, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); auto optionsAfterBuildSize = pInternalProgram->getOptions().size(); EXPECT_EQ(optionsAfterBuildSize, storedOptionsSize); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clBuildProgramTests, GivenSpirAsInputWhenCreatingProgramFromBinaryThenProgramBuildSucceeds) { cl_program pProgram = nullptr; cl_int binaryStatus = CL_SUCCESS; unsigned char llvm[16] = "BC\xc0\xde_unique"; size_t binarySize = sizeof(llvm); const unsigned char *binaries[1] = {llvm}; pProgram = clCreateProgramWithBinary( pContext, 1, &testedClDevice, &binarySize, binaries, &binaryStatus, &retVal); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); MockCompilerDebugVars igcDebugVars; SProgramBinaryHeader progBin = {}; progBin.Magic = iOpenCL::MAGIC_CL; progBin.Version = iOpenCL::CURRENT_ICBE_VERSION; progBin.Device = pContext->getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily; progBin.GPUPointerSizeInBytes = sizeof(uintptr_t); igcDebugVars.binaryToReturn = &progBin; igcDebugVars.binaryToReturnSize = sizeof(progBin); auto prevDebugVars = getIgcDebugVars(); setIgcDebugVars(igcDebugVars); retVal = clBuildProgram( pProgram, 1, &testedClDevice, "-x spir -spir-std=1.2", nullptr, nullptr); setIgcDebugVars(prevDebugVars); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clBuildProgramTests, GivenNullAsInputWhenCreatingProgramThenInvalidProgramErrorIsReturned) { retVal = clBuildProgram( nullptr, 1, nullptr, nullptr, nullptr, nullptr); EXPECT_EQ(CL_INVALID_PROGRAM, retVal); } TEST_F(clBuildProgramTests, GivenInvalidCallbackInputWhenBuildProgramThenInvalidValueErrorIsReturned) { cl_program pProgram = nullptr; cl_int binaryStatus = CL_SUCCESS; size_t binarySize = 0; std::string testFile; retrieveBinaryKernelFilename(testFile, "CopyBuffer_simd16_", ".bin"); auto pBinary = loadDataFromFile( testFile.c_str(), binarySize); ASSERT_NE(0u, binarySize); ASSERT_NE(nullptr, pBinary); const unsigned char *binaries[1] = {reinterpret_cast(pBinary.get())}; pProgram = clCreateProgramWithBinary( pContext, 1, &testedClDevice, &binarySize, binaries, &binaryStatus, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, pProgram); retVal = clBuildProgram( pProgram, 1, &testedClDevice, nullptr, nullptr, &retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clBuildProgramTests, GivenValidCallbackInputWhenBuildProgramThenCallbackIsInvoked) { cl_program pProgram = nullptr; cl_int binaryStatus = CL_SUCCESS; size_t binarySize = 0; std::string testFile; retrieveBinaryKernelFilename(testFile, "CopyBuffer_simd16_", ".bin"); auto pBinary = loadDataFromFile( testFile.c_str(), binarySize); ASSERT_NE(0u, binarySize); ASSERT_NE(nullptr, pBinary); const unsigned char *binaries[1] = {reinterpret_cast(pBinary.get())}; pProgram = clCreateProgramWithBinary( pContext, 1, &testedClDevice, &binarySize, binaries, &binaryStatus, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, pProgram); char userData = 0; retVal = clBuildProgram( pProgram, 1, &testedClDevice, nullptr, notifyFuncProgram, &userData); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ('a', userData); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clBuildProgramTests, givenProgramWhenBuildingForInvalidDevicesInputThenInvalidDeviceErrorIsReturned) { cl_program pProgram = nullptr; size_t sourceSize = 0; std::string testFile; testFile.append(clFiles); testFile.append("copybuffer.cl"); auto pSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pSource); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( pContext, 1, sources, &sourceSize, &retVal); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); MockContext mockContext; cl_device_id nullDeviceInput[] = {pContext->getDevice(0), nullptr}; cl_device_id notAssociatedDeviceInput[] = {mockContext.getDevice(0)}; cl_device_id validDeviceInput[] = {pContext->getDevice(0)}; retVal = clBuildProgram( pProgram, 0, validDeviceInput, nullptr, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clBuildProgram( pProgram, 1, nullptr, nullptr, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clBuildProgram( pProgram, 2, nullDeviceInput, nullptr, nullptr, nullptr); EXPECT_EQ(CL_INVALID_DEVICE, retVal); retVal = clBuildProgram( pProgram, 1, notAssociatedDeviceInput, nullptr, nullptr, nullptr); EXPECT_EQ(CL_INVALID_DEVICE, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clBuildProgramTest, givenMultiDeviceProgramWithCreatedKernelWhenBuildingThenInvalidOperationErrorIsReturned) { MockSpecializedContext context; cl_program pProgram = nullptr; size_t sourceSize = 0; cl_int retVal = CL_INVALID_PROGRAM; std::string testFile; testFile.append(clFiles); testFile.append("copybuffer.cl"); auto pSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pSource); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( &context, 1, sources, &sourceSize, &retVal); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); cl_device_id firstSubDevice = context.pSubDevice0; cl_device_id secondSubDevice = context.pSubDevice1; retVal = clBuildProgram( pProgram, 1, &firstSubDevice, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); auto kernel = clCreateKernel(pProgram, "fullCopy", &retVal); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clBuildProgram( pProgram, 1, &secondSubDevice, nullptr, nullptr, nullptr); EXPECT_EQ(CL_INVALID_OPERATION, retVal); retVal = clReleaseKernel(kernel); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clBuildProgram( pProgram, 1, &secondSubDevice, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clBuildProgramTest, givenMultiDeviceProgramWithCreatedKernelsWhenBuildingThenInvalidOperationErrorIsReturned) { MockSpecializedContext context; cl_program pProgram = nullptr; size_t sourceSize = 0; cl_int retVal = CL_INVALID_PROGRAM; std::string testFile; testFile.append(clFiles); testFile.append("copybuffer.cl"); auto pSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pSource); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( &context, 1, sources, &sourceSize, &retVal); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); cl_device_id firstSubDevice = context.pSubDevice0; cl_device_id secondSubDevice = context.pSubDevice1; retVal = clBuildProgram( pProgram, 1, &firstSubDevice, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); size_t numKernels = 0; retVal = clGetProgramInfo(pProgram, CL_PROGRAM_NUM_KERNELS, sizeof(numKernels), &numKernels, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); auto kernels = std::make_unique(numKernels); retVal = clCreateKernelsInProgram(pProgram, static_cast(numKernels), kernels.get(), nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clBuildProgram( pProgram, 1, &secondSubDevice, nullptr, nullptr, nullptr); EXPECT_EQ(CL_INVALID_OPERATION, retVal); for (auto i = 0u; i < numKernels; i++) { retVal = clReleaseKernel(kernels[i]); EXPECT_EQ(CL_SUCCESS, retVal); } retVal = clBuildProgram( pProgram, 1, &secondSubDevice, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clBuildProgramTest, givenMultiDeviceProgramWithProgramBuiltForSingleDeviceWhenCreatingKernelThenProgramAndKernelDevicesMatchAndSuccessIsReturned) { MockUnrestrictiveContextMultiGPU context; cl_program pProgram = nullptr; size_t sourceSize = 0; cl_int retVal = CL_INVALID_PROGRAM; std::string testFile; testFile.append(clFiles); testFile.append("copybuffer.cl"); auto pSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pSource); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( &context, 1, sources, &sourceSize, &retVal); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); cl_device_id firstDevice = context.pRootDevice0; cl_device_id firstSubDevice = context.pSubDevice00; cl_device_id secondSubDevice = context.pSubDevice01; retVal = clBuildProgram( pProgram, 1, &firstDevice, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); cl_kernel pKernel = clCreateKernel(pProgram, "fullCopy", &retVal); EXPECT_EQ(CL_SUCCESS, retVal); MultiDeviceKernel *kernel = castToObject(pKernel); auto devs = kernel->getDevices(); EXPECT_EQ(devs[0], firstDevice); EXPECT_EQ(devs[1], firstSubDevice); EXPECT_EQ(devs[2], secondSubDevice); retVal = clReleaseKernel(pKernel); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clBuildProgramTest, givenMultiDeviceProgramWithProgramBuiltForSingleDeviceWithCreatedKernelWhenBuildingProgramForSecondDeviceThenInvalidOperationReturned) { MockUnrestrictiveContextMultiGPU context; cl_program pProgram = nullptr; size_t sourceSize = 0; cl_int retVal = CL_INVALID_PROGRAM; std::string testFile; testFile.append(clFiles); testFile.append("copybuffer.cl"); auto pSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pSource); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( &context, 1, sources, &sourceSize, &retVal); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); cl_device_id firstDevice = context.pRootDevice0; cl_device_id secondDevice = context.pRootDevice1; retVal = clBuildProgram( pProgram, 1, &firstDevice, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); cl_kernel kernel = clCreateKernel(pProgram, "fullCopy", &retVal); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clBuildProgram( pProgram, 1, &secondDevice, nullptr, nullptr, nullptr); EXPECT_EQ(CL_INVALID_OPERATION, retVal); retVal = clReleaseKernel(kernel); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clBuildProgram( pProgram, 1, &secondDevice, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); kernel = clCreateKernel(pProgram, "fullCopy", &retVal); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseKernel(kernel); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clBuildProgramTest, givenMultiDeviceProgramWithProgramBuiltForMultipleDevicesSeparatelyWithCreatedKernelThenProgramAndKernelDevicesMatch) { MockUnrestrictiveContextMultiGPU context; cl_program pProgram = nullptr; size_t sourceSize = 0; cl_int retVal = CL_INVALID_PROGRAM; std::string testFile; testFile.append(clFiles); testFile.append("copybuffer.cl"); auto pSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pSource); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( &context, 1, sources, &sourceSize, &retVal); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); cl_device_id firstDevice = context.pRootDevice0; cl_device_id secondDevice = context.pRootDevice1; retVal = clBuildProgram( pProgram, 1, &firstDevice, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clBuildProgram( pProgram, 1, &secondDevice, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); cl_kernel pKernel = clCreateKernel(pProgram, "fullCopy", &retVal); EXPECT_EQ(CL_SUCCESS, retVal); MultiDeviceKernel *kernel = castToObject(pKernel); Program *program = castToObject(pProgram); EXPECT_EQ(kernel->getDevices(), program->getDevices()); retVal = clReleaseKernel(pKernel); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_clone_kernel_tests.inl000066400000000000000000000044101422164147700274430ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/file_io.h" #include "shared/test/common/helpers/test_files.h" #include "opencl/source/context/context.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clCloneKernelTests; namespace ULT { TEST_F(clCloneKernelTests, GivenNullKernelWhenCloningKernelThenNullIsReturned) { auto kernel = clCloneKernel(nullptr, nullptr); EXPECT_EQ(nullptr, kernel); } TEST_F(clCloneKernelTests, GivenNullKernelWhenCloningKernelThenInvalidKernelErrorIsReturned) { clCloneKernel(nullptr, &retVal); EXPECT_EQ(CL_INVALID_KERNEL, retVal); } TEST_F(clCloneKernelTests, GivenValidKernelWhenCloningKernelThenSuccessIsReturned) { cl_kernel pSourceKernel = nullptr; cl_kernel pClonedKernel = nullptr; cl_program pProgram = nullptr; cl_int binaryStatus = CL_SUCCESS; size_t binarySize = 0; std::string testFile; retrieveBinaryKernelFilename(testFile, "CopyBuffer_simd16_", ".bin"); auto pBinary = loadDataFromFile( testFile.c_str(), binarySize); ASSERT_NE(0u, binarySize); ASSERT_NE(nullptr, pBinary); const unsigned char *binaries[1] = {reinterpret_cast(pBinary.get())}; pProgram = clCreateProgramWithBinary( pContext, 1, &testedClDevice, &binarySize, binaries, &binaryStatus, &retVal); pBinary.reset(); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clBuildProgram( pProgram, 1, &testedClDevice, nullptr, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); pSourceKernel = clCreateKernel( pProgram, "CopyBuffer", &retVal); EXPECT_NE(nullptr, pSourceKernel); ASSERT_EQ(CL_SUCCESS, retVal); pClonedKernel = clCloneKernel( pSourceKernel, &retVal); EXPECT_NE(nullptr, pClonedKernel); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clReleaseKernel(pClonedKernel); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clReleaseKernel(pSourceKernel); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_compile_program_tests.inl000066400000000000000000000260621422164147700301710ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/helpers/file_io.h" #include "shared/test/common/helpers/kernel_binary_helper.h" #include "shared/test/common/helpers/test_files.h" #include "opencl/source/context/context.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clCompileProgramTests; namespace ULT { TEST_F(clCompileProgramTests, GivenKernelAsSingleSourceWhenCompilingProgramThenSuccessIsReturned) { cl_program pProgram = nullptr; size_t sourceSize = 0; std::string testFile; KernelBinaryHelper kbHelper("copybuffer", false); testFile.append(clFiles); testFile.append("copybuffer.cl"); auto pSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pSource); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( pContext, 1, sources, &sourceSize, &retVal); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clCompileProgram( pProgram, 1, &testedClDevice, nullptr, 0, nullptr, nullptr, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCompileProgramTests, GivenKernelAsSourceWithHeaderWhenCompilingProgramThenSuccessIsReturned) { cl_program pProgram = nullptr; cl_program pHeader = nullptr; size_t sourceSize = 0; std::string testFile; const char *simpleHeaderName = "simple_header.h"; testFile.append(clFiles); testFile.append("/copybuffer_with_header.cl"); auto pSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pSource); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( pContext, 1, sources, &sourceSize, &retVal); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); testFile.clear(); testFile.append(clFiles); testFile.append("simple_header.h"); auto pHeaderSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pHeaderSource); const char *headerSources[1] = {pHeaderSource.get()}; pHeader = clCreateProgramWithSource( pContext, 1, headerSources, &sourceSize, &retVal); EXPECT_NE(nullptr, pHeader); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clCompileProgram( pProgram, 1, &testedClDevice, nullptr, 1, &pHeader, &simpleHeaderName, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pHeader); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCompileProgramTests, GivenNullProgramWhenCompilingProgramThenInvalidProgramErrorIsReturned) { retVal = clCompileProgram( nullptr, 1, nullptr, "", 0, nullptr, nullptr, nullptr, nullptr); EXPECT_EQ(CL_INVALID_PROGRAM, retVal); } TEST_F(clCompileProgramTests, GivenInvalidCallbackInputWhenCompileProgramThenInvalidValueErrorIsReturned) { cl_program pProgram = nullptr; size_t sourceSize = 0; std::string testFile; testFile.append(clFiles); testFile.append("copybuffer.cl"); auto pSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pSource); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( pContext, 1, sources, &sourceSize, &retVal); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clCompileProgram( pProgram, 1, &testedClDevice, nullptr, 0, nullptr, nullptr, nullptr, &retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCompileProgramTests, GivenValidCallbackInputWhenLinkProgramThenCallbackIsInvoked) { cl_program pProgram = nullptr; size_t sourceSize = 0; std::string testFile; testFile.append(clFiles); testFile.append("copybuffer.cl"); auto pSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pSource); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( pContext, 1, sources, &sourceSize, &retVal); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); char userData = 0; retVal = clCompileProgram( pProgram, 1, &testedClDevice, nullptr, 0, nullptr, nullptr, notifyFuncProgram, &userData); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ('a', userData); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clCompileProgramTest, givenProgramWhenCompilingForInvalidDevicesInputThenInvalidDeviceErrorIsReturned) { cl_program pProgram = nullptr; std::unique_ptr pSource = nullptr; size_t sourceSize = 0; std::string testFile; KernelBinaryHelper kbHelper("CopyBuffer_simd16"); testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); pSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pSource); const char *sources[1] = {pSource.get()}; MockUnrestrictiveContextMultiGPU context; cl_int retVal = CL_INVALID_PROGRAM; pProgram = clCreateProgramWithSource( &context, 1, sources, &sourceSize, &retVal); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); MockContext mockContext; cl_device_id nullDeviceInput[] = {context.getDevice(0), nullptr}; cl_device_id notAssociatedDeviceInput[] = {mockContext.getDevice(0)}; cl_device_id validDeviceInput[] = {context.getDevice(0)}; retVal = clCompileProgram( pProgram, 0, validDeviceInput, nullptr, 0, nullptr, nullptr, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clCompileProgram( pProgram, 1, nullptr, nullptr, 0, nullptr, nullptr, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clCompileProgram( pProgram, 2, nullDeviceInput, nullptr, 0, nullptr, nullptr, nullptr, nullptr); EXPECT_EQ(CL_INVALID_DEVICE, retVal); retVal = clCompileProgram( pProgram, 1, notAssociatedDeviceInput, nullptr, 0, nullptr, nullptr, nullptr, nullptr); EXPECT_EQ(CL_INVALID_DEVICE, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clCompileProgramTest, givenMultiDeviceProgramWithCreatedKernelWhenCompilingThenInvalidOperationErrorIsReturned) { MockSpecializedContext context; cl_program pProgram = nullptr; size_t sourceSize = 0; cl_int retVal = CL_INVALID_PROGRAM; std::string testFile; testFile.append(clFiles); testFile.append("copybuffer.cl"); auto pSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pSource); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( &context, 1, sources, &sourceSize, &retVal); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); cl_device_id firstSubDevice = context.pSubDevice0; cl_device_id secondSubDevice = context.pSubDevice1; retVal = clBuildProgram( pProgram, 1, &firstSubDevice, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); auto kernel = clCreateKernel(pProgram, "fullCopy", &retVal); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clCompileProgram( pProgram, 1, &secondSubDevice, nullptr, 0, nullptr, nullptr, nullptr, nullptr); EXPECT_EQ(CL_INVALID_OPERATION, retVal); retVal = clReleaseKernel(kernel); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clCompileProgram( pProgram, 1, &secondSubDevice, nullptr, 0, nullptr, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clCompileProgramTest, givenMultiDeviceProgramWithCreatedKernelsWhenCompilingThenInvalidOperationErrorIsReturned) { MockSpecializedContext context; cl_program pProgram = nullptr; size_t sourceSize = 0; cl_int retVal = CL_INVALID_PROGRAM; std::string testFile; testFile.append(clFiles); testFile.append("copybuffer.cl"); auto pSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pSource); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( &context, 1, sources, &sourceSize, &retVal); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); cl_device_id firstSubDevice = context.pSubDevice0; cl_device_id secondSubDevice = context.pSubDevice1; retVal = clBuildProgram( pProgram, 1, &firstSubDevice, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); size_t numKernels = 0; retVal = clGetProgramInfo(pProgram, CL_PROGRAM_NUM_KERNELS, sizeof(numKernels), &numKernels, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); auto kernels = std::make_unique(numKernels); retVal = clCreateKernelsInProgram(pProgram, static_cast(numKernels), kernels.get(), nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clCompileProgram( pProgram, 1, &secondSubDevice, nullptr, 0, nullptr, nullptr, nullptr, nullptr); EXPECT_EQ(CL_INVALID_OPERATION, retVal); for (auto i = 0u; i < numKernels; i++) { retVal = clReleaseKernel(kernels[i]); EXPECT_EQ(CL_SUCCESS, retVal); } retVal = clCompileProgram( pProgram, 1, &secondSubDevice, nullptr, 0, nullptr, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_create_buffer_tests.cpp000066400000000000000000000570561422164147700276150ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clCreateBufferTests; namespace ClCreateBufferTests { class clCreateBufferTemplateTests : public ApiFixture<>, public testing::TestWithParam { void SetUp() override { ApiFixture::SetUp(); } void TearDown() override { ApiFixture::TearDown(); } }; struct clCreateBufferValidFlagsTests : public clCreateBufferTemplateTests { cl_uchar pHostPtr[64]; }; TEST_P(clCreateBufferValidFlagsTests, GivenValidFlagsWhenCreatingBufferThenBufferIsCreated) { cl_mem_flags flags = GetParam() | CL_MEM_USE_HOST_PTR; auto buffer = clCreateBuffer(pContext, flags, 64, pHostPtr, &retVal); EXPECT_NE(nullptr, buffer); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseMemObject(buffer); cl_mem_properties_intel properties[] = {CL_MEM_FLAGS, flags, 0}; buffer = clCreateBufferWithPropertiesINTEL(pContext, properties, 0, 64, pHostPtr, &retVal); EXPECT_NE(nullptr, buffer); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseMemObject(buffer); buffer = clCreateBufferWithPropertiesINTEL(pContext, nullptr, flags, 64, pHostPtr, &retVal); EXPECT_NE(nullptr, buffer); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseMemObject(buffer); }; static cl_mem_flags validFlags[] = { CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY, CL_MEM_HOST_READ_ONLY, CL_MEM_HOST_WRITE_ONLY, CL_MEM_HOST_NO_ACCESS, CL_MEM_FORCE_HOST_MEMORY_INTEL}; INSTANTIATE_TEST_CASE_P( CreateBufferCheckFlags, clCreateBufferValidFlagsTests, testing::ValuesIn(validFlags)); using clCreateBufferInvalidFlagsTests = clCreateBufferTemplateTests; TEST_P(clCreateBufferInvalidFlagsTests, GivenInvalidFlagsWhenCreatingBufferThenBufferIsNotCreated) { cl_mem_flags flags = GetParam(); auto buffer = clCreateBuffer(pContext, flags, 64, nullptr, &retVal); EXPECT_EQ(nullptr, buffer); EXPECT_EQ(CL_INVALID_VALUE, retVal); cl_mem_properties_intel properties[] = {CL_MEM_FLAGS, flags, 0}; buffer = clCreateBufferWithPropertiesINTEL(pContext, properties, 0, 64, nullptr, &retVal); EXPECT_EQ(nullptr, buffer); EXPECT_EQ(CL_INVALID_PROPERTY, retVal); buffer = clCreateBufferWithPropertiesINTEL(pContext, nullptr, flags, 64, nullptr, &retVal); EXPECT_EQ(nullptr, buffer); EXPECT_EQ(CL_INVALID_VALUE, retVal); }; cl_mem_flags invalidFlags[] = { CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY, CL_MEM_READ_WRITE | CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY, CL_MEM_ALLOC_HOST_PTR | CL_MEM_USE_HOST_PTR, CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR, CL_MEM_HOST_NO_ACCESS | CL_MEM_HOST_WRITE_ONLY, CL_MEM_HOST_NO_ACCESS | CL_MEM_HOST_READ_ONLY, CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY, 0xffcc, }; INSTANTIATE_TEST_CASE_P( CreateBufferCheckFlags, clCreateBufferInvalidFlagsTests, testing::ValuesIn(invalidFlags)); using clCreateBufferValidFlagsIntelTests = clCreateBufferTemplateTests; TEST_P(clCreateBufferValidFlagsIntelTests, GivenValidFlagsIntelWhenCreatingBufferThenBufferIsCreated) { cl_mem_properties_intel properties[] = {CL_MEM_FLAGS_INTEL, GetParam(), 0}; auto buffer = clCreateBufferWithPropertiesINTEL(pContext, properties, 0, 64, nullptr, &retVal); EXPECT_NE(nullptr, buffer); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseMemObject(buffer); }; static cl_mem_flags validFlagsIntel[] = { CL_MEM_LOCALLY_UNCACHED_RESOURCE, CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE, CL_MEM_48BIT_RESOURCE_INTEL}; INSTANTIATE_TEST_CASE_P( CreateBufferCheckFlagsIntel, clCreateBufferValidFlagsIntelTests, testing::ValuesIn(validFlagsIntel)); using clCreateBufferInvalidFlagsIntelTests = clCreateBufferTemplateTests; TEST_P(clCreateBufferInvalidFlagsIntelTests, GivenInvalidFlagsIntelWhenCreatingBufferThenBufferIsNotCreated) { cl_mem_properties_intel properties[] = {CL_MEM_FLAGS_INTEL, GetParam(), 0}; auto buffer = clCreateBufferWithPropertiesINTEL(pContext, properties, 0, 64, nullptr, &retVal); EXPECT_EQ(nullptr, buffer); EXPECT_EQ(CL_INVALID_PROPERTY, retVal); }; cl_mem_flags invalidFlagsIntel[] = { 0xffcc, }; INSTANTIATE_TEST_CASE_P( CreateBufferCheckFlagsIntel, clCreateBufferInvalidFlagsIntelTests, testing::ValuesIn(invalidFlagsIntel)); using clCreateBufferInvalidProperties = clCreateBufferTemplateTests; TEST_F(clCreateBufferInvalidProperties, GivenInvalidPropertyKeyWhenCreatingBufferThenBufferIsNotCreated) { cl_mem_properties_intel properties[] = {(cl_mem_properties_intel(1) << 31), 0, 0}; auto buffer = clCreateBufferWithPropertiesINTEL(pContext, properties, 0, 64, nullptr, &retVal); EXPECT_EQ(nullptr, buffer); EXPECT_EQ(CL_INVALID_PROPERTY, retVal); }; TEST_F(clCreateBufferTests, GivenValidParametersWhenCreatingBufferThenSuccessIsReturned) { cl_mem_flags flags = CL_MEM_USE_HOST_PTR; static const unsigned int bufferSize = 16; cl_mem buffer = nullptr; unsigned char pHostMem[bufferSize]; memset(pHostMem, 0xaa, bufferSize); buffer = clCreateBuffer(pContext, flags, bufferSize, pHostMem, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateBufferTests, GivenForceExtendedBufferSizeDebugFlagWhenBufferIsCreatedThenSizeIsProperlyExtended) { DebugManagerStateRestore restorer; unsigned char *pHostMem = nullptr; cl_mem_flags flags = 0; constexpr auto bufferSize = 16; auto pageSizeNumber = 1; DebugManager.flags.ForceExtendedBufferSize.set(pageSizeNumber); auto extendedBufferSize = bufferSize + MemoryConstants::pageSize * pageSizeNumber; auto buffer = clCreateBuffer(pContext, flags, bufferSize, pHostMem, &retVal); EXPECT_NE(nullptr, buffer); EXPECT_EQ(CL_SUCCESS, retVal); auto bufferObj = NEO::castToObject(buffer); EXPECT_EQ(extendedBufferSize, bufferObj->getSize()); clReleaseMemObject(buffer); pageSizeNumber = 4; DebugManager.flags.ForceExtendedBufferSize.set(pageSizeNumber); extendedBufferSize = bufferSize + MemoryConstants::pageSize * pageSizeNumber; buffer = clCreateBufferWithProperties(pContext, nullptr, flags, bufferSize, pHostMem, &retVal); EXPECT_NE(nullptr, buffer); EXPECT_EQ(CL_SUCCESS, retVal); bufferObj = NEO::castToObject(buffer); EXPECT_EQ(extendedBufferSize, bufferObj->getSize()); clReleaseMemObject(buffer); pageSizeNumber = 6; DebugManager.flags.ForceExtendedBufferSize.set(pageSizeNumber); extendedBufferSize = bufferSize + MemoryConstants::pageSize * pageSizeNumber; buffer = clCreateBufferWithPropertiesINTEL(pContext, nullptr, flags, bufferSize, pHostMem, &retVal); EXPECT_NE(nullptr, buffer); EXPECT_EQ(CL_SUCCESS, retVal); bufferObj = NEO::castToObject(buffer); EXPECT_EQ(extendedBufferSize, bufferObj->getSize()); clReleaseMemObject(buffer); } TEST_F(clCreateBufferTests, GivenNullContextWhenCreatingBufferThenInvalidContextErrorIsReturned) { unsigned char *pHostMem = nullptr; cl_mem_flags flags = 0; static const unsigned int bufferSize = 16; clCreateBuffer(nullptr, flags, bufferSize, pHostMem, &retVal); ASSERT_EQ(CL_INVALID_CONTEXT, retVal); clCreateBufferWithPropertiesINTEL(nullptr, nullptr, 0, bufferSize, pHostMem, &retVal); ASSERT_EQ(CL_INVALID_CONTEXT, retVal); } TEST_F(clCreateBufferTests, GivenBufferSizeZeroWhenCreatingBufferThenInvalidBufferSizeErrorIsReturned) { uint8_t hostData = 0; clCreateBuffer(pContext, CL_MEM_USE_HOST_PTR, 0, &hostData, &retVal); ASSERT_EQ(CL_INVALID_BUFFER_SIZE, retVal); } TEST_F(clCreateBufferTests, GivenInvalidHostPointerWhenCreatingBufferThenInvalidHostPointerErrorIsReturned) { uint32_t hostData = 0; cl_mem_flags flags = 0; clCreateBuffer(pContext, flags, sizeof(uint32_t), &hostData, &retVal); ASSERT_EQ(CL_INVALID_HOST_PTR, retVal); } TEST_F(clCreateBufferTests, GivenNullHostPointerAndMemCopyHostPtrFlagWhenCreatingBufferThenInvalidHostPointerErrorIsReturned) { cl_mem_flags flags = CL_MEM_COPY_HOST_PTR; clCreateBuffer(pContext, flags, sizeof(uint32_t), nullptr, &retVal); ASSERT_EQ(CL_INVALID_HOST_PTR, retVal); } TEST_F(clCreateBufferTests, GivenNullHostPointerAndMemUseHostPtrFlagWhenCreatingBufferThenInvalidHostPointerErrorIsReturned) { cl_mem_flags flags = CL_MEM_USE_HOST_PTR; clCreateBuffer(pContext, flags, sizeof(uint32_t), nullptr, &retVal); ASSERT_EQ(CL_INVALID_HOST_PTR, retVal); } TEST_F(clCreateBufferTests, GivenMemWriteOnlyFlagAndMemReadWriteFlagWhenCreatingBufferThenInvalidValueErrorIsReturned) { cl_mem_flags flags = CL_MEM_WRITE_ONLY | CL_MEM_READ_WRITE; clCreateBuffer(pContext, flags, 16, nullptr, &retVal); ASSERT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clCreateBufferTests, GivenBufferSizeOverMaxMemAllocSizeWhenCreatingBufferThenInvalidBufferSizeErrorIsReturned) { auto pDevice = pContext->getDevice(0); size_t size = static_cast(pDevice->getDevice().getDeviceInfo().maxMemAllocSize) + 1; auto buffer = clCreateBuffer(pContext, CL_MEM_ALLOC_HOST_PTR, size, nullptr, &retVal); EXPECT_EQ(CL_INVALID_BUFFER_SIZE, retVal); EXPECT_EQ(nullptr, buffer); } TEST_F(clCreateBufferTests, GivenBufferSizeOverMaxMemAllocSizeWhenCreateBufferWithPropertiesINTELThenInvalidBufferSizeErrorIsReturned) { auto pDevice = pContext->getDevice(0); size_t size = static_cast(pDevice->getDevice().getDeviceInfo().maxMemAllocSize) + 1; auto buffer = clCreateBufferWithPropertiesINTEL(pContext, nullptr, 0, size, nullptr, &retVal); EXPECT_EQ(CL_INVALID_BUFFER_SIZE, retVal); EXPECT_EQ(nullptr, buffer); } TEST_F(clCreateBufferTests, GivenBufferSizeOverMaxMemAllocSizeAndClMemAllowUnrestirctedSizeFlagWhenCreatingBufferThenClSuccessIsReturned) { auto pDevice = pContext->getDevice(0); uint64_t bigSize = GB * 5; size_t size = static_cast(bigSize); cl_mem_flags flags = CL_MEM_ALLOC_HOST_PTR | CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL; auto memoryManager = static_cast(pDevice->getMemoryManager()); memoryManager->turnOnFakingBigAllocations(); if (memoryManager->peekForce32BitAllocations() || is32bit) { GTEST_SKIP(); } auto buffer = clCreateBuffer(pContext, flags, size, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateBufferTests, GivenBufferSizeOverMaxMemAllocSizeAndClMemAllowUnrestirctedSizeFlagWhenCreatingBufferWithPropertiesINTELThenClSuccesssIsReturned) { auto pDevice = pContext->getDevice(0); uint64_t bigSize = GB * 5; size_t size = static_cast(bigSize); cl_mem_properties_intel properties[] = {CL_MEM_FLAGS_INTEL, CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL, 0}; auto memoryManager = static_cast(pDevice->getMemoryManager()); memoryManager->turnOnFakingBigAllocations(); if (memoryManager->peekForce32BitAllocations() || is32bit) { GTEST_SKIP(); } auto buffer = clCreateBufferWithPropertiesINTEL(pContext, properties, 0, size, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateBufferTests, GivenBufferSizeOverMaxMemAllocSizeAndDebugFlagSetWhenCreatingBufferThenClSuccessIsReturned) { DebugManagerStateRestore restorer; DebugManager.flags.AllowUnrestrictedSize.set(1); auto pDevice = pContext->getDevice(0); size_t size = static_cast(pDevice->getDevice().getDeviceInfo().maxMemAllocSize) + 1; auto memoryManager = static_cast(pDevice->getMemoryManager()); memoryManager->turnOnFakingBigAllocations(); if (memoryManager->peekForce32BitAllocations() || is32bit) { GTEST_SKIP(); } auto buffer = clCreateBuffer(pContext, 0, size, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateBufferTests, GivenNullHostPointerAndMemCopyHostPtrFlagWhenCreatingBufferThenNullIsReturned) { cl_mem_flags flags = CL_MEM_USE_HOST_PTR; static const unsigned int bufferSize = 16; cl_mem buffer = nullptr; unsigned char pHostMem[bufferSize]; memset(pHostMem, 0xaa, bufferSize); buffer = clCreateBuffer(pContext, flags, bufferSize, pHostMem, nullptr); EXPECT_NE(nullptr, buffer); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateBufferTests, WhenCreatingBufferWithPropertiesThenParametersAreCorrectlyPassed) { VariableBackup bufferCreateBackup{&BufferFunctions::validateInputAndCreateBuffer}; cl_context context = pContext; cl_mem_properties *propertiesValues[] = {nullptr, reinterpret_cast(0x1234)}; cl_mem_flags flagsValues[] = {0, 4321}; size_t bufferSize = 128; void *pHostMem = reinterpret_cast(0x8000); for (auto properties : propertiesValues) { for (auto flags : flagsValues) { auto mockFunction = [context, properties, flags, bufferSize, pHostMem](cl_context contextArg, const cl_mem_properties *propertiesArg, cl_mem_flags flagsArg, cl_mem_flags_intel flagsIntelArg, size_t sizeArg, void *hostPtrArg, cl_int &retValArg) -> cl_mem { cl_mem_flags_intel expectedFlagsIntelArg = 0; EXPECT_EQ(context, contextArg); EXPECT_EQ(properties, propertiesArg); EXPECT_EQ(flags, flagsArg); EXPECT_EQ(expectedFlagsIntelArg, flagsIntelArg); EXPECT_EQ(bufferSize, sizeArg); EXPECT_EQ(pHostMem, hostPtrArg); return nullptr; }; bufferCreateBackup = mockFunction; clCreateBufferWithProperties(context, properties, flags, bufferSize, pHostMem, nullptr); } } } TEST_F(clCreateBufferTests, WhenCreatingBufferWithPropertiesThenErrorCodeIsCorrectlySet) { VariableBackup bufferCreateBackup{&BufferFunctions::validateInputAndCreateBuffer}; cl_mem_properties *properties = nullptr; cl_mem_flags flags = 0; size_t bufferSize = 128; void *pHostMem = nullptr; cl_int errcodeRet; cl_int retValues[] = {CL_SUCCESS, CL_INVALID_PROPERTY}; for (auto retValue : retValues) { auto mockFunction = [retValue](cl_context contextArg, const cl_mem_properties *propertiesArg, cl_mem_flags flagsArg, cl_mem_flags_intel flagsIntelArg, size_t sizeArg, void *hostPtrArg, cl_int &retValArg) -> cl_mem { retValArg = retValue; return nullptr; }; bufferCreateBackup = mockFunction; clCreateBufferWithProperties(pContext, properties, flags, bufferSize, pHostMem, &errcodeRet); EXPECT_EQ(retValue, errcodeRet); } } TEST_F(clCreateBufferTests, GivenBufferCreatedWithNullPropertiesWhenQueryingPropertiesThenNothingIsReturned) { cl_int retVal = CL_SUCCESS; size_t size = 10; auto buffer = clCreateBufferWithPropertiesINTEL(pContext, nullptr, 0, size, nullptr, &retVal); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_NE(nullptr, buffer); size_t propertiesSize; retVal = clGetMemObjectInfo(buffer, CL_MEM_PROPERTIES, 0, nullptr, &propertiesSize); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(0u, propertiesSize); clReleaseMemObject(buffer); } TEST_F(clCreateBufferTests, WhenCreatingBufferWithPropertiesThenPropertiesAreCorrectlyStored) { cl_int retVal = CL_SUCCESS; size_t size = 10; cl_mem_properties properties[5]; size_t propertiesSize; std::vector> propertiesToTest{ {0}, {CL_MEM_FLAGS, CL_MEM_WRITE_ONLY, 0}, {CL_MEM_FLAGS_INTEL, CL_MEM_LOCALLY_UNCACHED_RESOURCE, 0}, {CL_MEM_FLAGS, CL_MEM_WRITE_ONLY, CL_MEM_FLAGS_INTEL, CL_MEM_LOCALLY_UNCACHED_RESOURCE, 0}}; for (auto testProperties : propertiesToTest) { auto buffer = clCreateBufferWithPropertiesINTEL(pContext, testProperties.data(), 0, size, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); retVal = clGetMemObjectInfo(buffer, CL_MEM_PROPERTIES, sizeof(properties), properties, &propertiesSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(testProperties.size() * sizeof(cl_mem_properties), propertiesSize); for (size_t i = 0; i < testProperties.size(); i++) { EXPECT_EQ(testProperties[i], properties[i]); } retVal = clReleaseMemObject(buffer); } } using clCreateBufferTestsWithRestrictions = api_test_using_aligned_memory_manager; TEST_F(clCreateBufferTestsWithRestrictions, GivenMemoryManagerRestrictionsWhenMinIsLessThanHostPtrThenUseZeroCopy) { std::unique_ptr hostMem(nullptr); unsigned char *destMem = nullptr; cl_mem_flags flags = CL_MEM_USE_HOST_PTR; const unsigned int bufferSize = MemoryConstants::pageSize * 3; const unsigned int destBufferSize = MemoryConstants::pageSize; cl_mem buffer = nullptr; uintptr_t minAddress = 0; MockAllocSysMemAgnosticMemoryManager *memMngr = reinterpret_cast(device->getMemoryManager()); memMngr->ptrRestrictions = &memMngr->testRestrictions; EXPECT_EQ(minAddress, memMngr->ptrRestrictions->minAddress); hostMem.reset(new unsigned char[bufferSize]); destMem = hostMem.get(); destMem += MemoryConstants::pageSize; destMem -= (reinterpret_cast(destMem) % MemoryConstants::pageSize); buffer = clCreateBuffer(context, flags, destBufferSize, destMem, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); Buffer *bufferObj = NEO::castToObject(buffer); EXPECT_TRUE(bufferObj->isMemObjZeroCopy()); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateBufferTestsWithRestrictions, GivenMemoryManagerRestrictionsWhenMinIsLessThanHostPtrThenCreateCopy) { std::unique_ptr hostMem(nullptr); unsigned char *destMem = nullptr; cl_mem_flags flags = CL_MEM_USE_HOST_PTR; const unsigned int realBufferSize = MemoryConstants::pageSize * 3; const unsigned int destBufferSize = MemoryConstants::pageSize; cl_mem buffer = nullptr; MockAllocSysMemAgnosticMemoryManager *memMngr = reinterpret_cast(device->getMemoryManager()); memMngr->ptrRestrictions = &memMngr->testRestrictions; hostMem.reset(new unsigned char[realBufferSize]); destMem = hostMem.get(); destMem += MemoryConstants::pageSize; destMem -= (reinterpret_cast(destMem) % MemoryConstants::pageSize); memMngr->ptrRestrictions->minAddress = reinterpret_cast(destMem) + 1; buffer = clCreateBuffer(context, flags, destBufferSize, destMem, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); Buffer *bufferObj = NEO::castToObject(buffer); EXPECT_FALSE(bufferObj->isMemObjZeroCopy()); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); } using clCreateBufferWithMultiDeviceContextTests = clCreateBufferTemplateTests; TEST_P(clCreateBufferWithMultiDeviceContextTests, GivenBufferCreatedWithContextdWithMultiDeviceThenGraphicsAllocationsAreProperlyFilled) { UltClDeviceFactory deviceFactory{2, 0}; DebugManager.flags.EnableMultiRootDeviceContexts.set(true); cl_device_id devices[] = {deviceFactory.rootDevices[0], deviceFactory.rootDevices[1]}; auto context = clCreateContext(nullptr, 2u, devices, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, context); EXPECT_EQ(CL_SUCCESS, retVal); auto pContext = castToObject(context); EXPECT_EQ(1u, pContext->getMaxRootDeviceIndex()); constexpr auto bufferSize = 64u; auto hostBuffer = alignedMalloc(bufferSize, MemoryConstants::pageSize64k); auto ptrHostBuffer = static_cast(hostBuffer); cl_mem_flags flags = GetParam(); auto buffer = clCreateBuffer(context, flags, bufferSize, flags == 0 ? nullptr : ptrHostBuffer, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); Buffer *bufferObj = NEO::castToObject(buffer); EXPECT_EQ(bufferObj->getMultiGraphicsAllocation().getGraphicsAllocations().size(), 2u); EXPECT_NE(bufferObj->getMultiGraphicsAllocation().getGraphicsAllocation(0u), nullptr); EXPECT_NE(bufferObj->getMultiGraphicsAllocation().getGraphicsAllocation(1u), nullptr); EXPECT_NE(bufferObj->getMultiGraphicsAllocation().getGraphicsAllocation(0u), bufferObj->getMultiGraphicsAllocation().getGraphicsAllocation(1u)); alignedFree(hostBuffer); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseContext(context); } static cl_mem_flags validFlagsForMultiDeviceContextBuffer[] = { CL_MEM_USE_HOST_PTR, CL_MEM_COPY_HOST_PTR, 0}; INSTANTIATE_TEST_CASE_P( CreateBufferWithMultiDeviceContextCheckFlags, clCreateBufferWithMultiDeviceContextTests, testing::ValuesIn(validFlagsForMultiDeviceContextBuffer)); using clCreateBufferWithMultiDeviceContextFaillingAllocationTests = clCreateBufferTemplateTests; TEST_F(clCreateBufferWithMultiDeviceContextFaillingAllocationTests, GivenContextdWithMultiDeviceFailingAllocationThenBufferAllocateFails) { UltClDeviceFactory deviceFactory{3, 0}; DebugManager.flags.EnableMultiRootDeviceContexts.set(true); cl_device_id devices[] = {deviceFactory.rootDevices[0], deviceFactory.rootDevices[1], deviceFactory.rootDevices[2]}; MockContext pContext(ClDeviceVector(devices, 3)); EXPECT_EQ(2u, pContext.getMaxRootDeviceIndex()); constexpr auto bufferSize = 64u; auto hostBuffer = alignedMalloc(bufferSize, MemoryConstants::pageSize64k); auto ptrHostBuffer = static_cast(hostBuffer); cl_mem_flags flags = CL_MEM_USE_HOST_PTR; static_cast(pContext.memoryManager)->successAllocatedGraphicsMemoryIndex = 0u; static_cast(pContext.memoryManager)->maxSuccessAllocatedGraphicsMemoryIndex = 2u; auto buffer = clCreateBuffer(&pContext, flags, bufferSize, ptrHostBuffer, &retVal); ASSERT_EQ(CL_OUT_OF_HOST_MEMORY, retVal); EXPECT_EQ(nullptr, buffer); alignedFree(hostBuffer); } } // namespace ClCreateBufferTests compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_create_command_queue_tests.inl000066400000000000000000000112021422164147700311450ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/context/context.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clCreateCommandQueueTest; namespace ULT { TEST_F(clCreateCommandQueueTest, GivenCorrectParametersWhenCreatingCommandQueueThenCommandQueueIsCreatedAndSuccessIsReturned) { cl_command_queue cmdQ = nullptr; cl_queue_properties properties = 0; cmdQ = clCreateCommandQueue(pContext, testedClDevice, properties, &retVal); ASSERT_NE(nullptr, cmdQ); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clReleaseCommandQueue(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateCommandQueueTest, GivenNullContextWhenCreatingCommandQueueThenInvalidContextErrorIsReturned) { clCreateCommandQueue(nullptr, testedClDevice, 0, &retVal); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } TEST_F(clCreateCommandQueueTest, GivenNullDeviceWhenCreatingCommandQueueThenInvalidDeviceErrorIsReturned) { clCreateCommandQueue(pContext, nullptr, 0, &retVal); EXPECT_EQ(CL_INVALID_DEVICE, retVal); } TEST_F(clCreateCommandQueueTest, GivenDeviceNotAssociatedWithContextWhenCreatingCommandQueueThenInvalidDeviceErrorIsReturned) { UltClDeviceFactory deviceFactory{1, 0}; EXPECT_FALSE(pContext->isDeviceAssociated(*deviceFactory.rootDevices[0])); clCreateCommandQueue(pContext, deviceFactory.rootDevices[0], 0, &retVal); EXPECT_EQ(CL_INVALID_DEVICE, retVal); } TEST_F(clCreateCommandQueueTest, GivenInvalidPropertiesWhenCreatingCommandQueueThenInvalidValueErrorIsReturned) { cl_command_queue cmdQ = nullptr; cl_queue_properties properties = 0xf0000; cmdQ = clCreateCommandQueue(pContext, testedClDevice, properties, &retVal); ASSERT_EQ(nullptr, cmdQ); ASSERT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clCreateCommandQueueTest, GivenOoqParametersWhenQueueIsCreatedThenQueueIsSucesfullyCreated) { cl_int retVal = CL_SUCCESS; cl_queue_properties ooq = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; auto cmdq = clCreateCommandQueue(pContext, testedClDevice, ooq, &retVal); EXPECT_NE(nullptr, cmdq); EXPECT_EQ(retVal, CL_SUCCESS); retVal = clReleaseCommandQueue(cmdq); } HWTEST_F(clCreateCommandQueueTest, GivenOoqParametersWhenQueueIsCreatedThenCommandStreamReceiverSwitchesToBatchingMode) { cl_int retVal = CL_SUCCESS; cl_queue_properties ooq = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; auto clDevice = castToObject(testedClDevice); auto mockDevice = reinterpret_cast(&clDevice->getDevice()); auto &csr = mockDevice->getUltCommandStreamReceiver(); EXPECT_EQ(DispatchMode::ImmediateDispatch, csr.dispatchMode); auto cmdq = clCreateCommandQueue(pContext, testedClDevice, ooq, &retVal); EXPECT_EQ(DispatchMode::BatchedDispatch, csr.dispatchMode); retVal = clReleaseCommandQueue(cmdq); } HWTEST_F(clCreateCommandQueueTest, GivenForcedDispatchModeAndOoqParametersWhenQueueIsCreatedThenCommandStreamReceiverDoesntSwitchToBatchingMode) { DebugManagerStateRestore restorer; DebugManager.flags.CsrDispatchMode.set(static_cast(DispatchMode::ImmediateDispatch)); cl_int retVal = CL_SUCCESS; cl_queue_properties ooq = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; auto clDevice = castToObject(testedClDevice); auto mockDevice = reinterpret_cast(&clDevice->getDevice()); auto &csr = mockDevice->getUltCommandStreamReceiver(); EXPECT_EQ(DispatchMode::ImmediateDispatch, csr.dispatchMode); auto cmdq = clCreateCommandQueue(pContext, testedClDevice, ooq, &retVal); EXPECT_EQ(DispatchMode::ImmediateDispatch, csr.dispatchMode); retVal = clReleaseCommandQueue(cmdq); } HWTEST_F(clCreateCommandQueueTest, GivenOoqParametersWhenQueueIsCreatedThenCommandStreamReceiverSwitchesToNTo1SubmissionModel) { cl_int retVal = CL_SUCCESS; cl_queue_properties ooq = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; auto clDevice = castToObject(testedClDevice); auto mockDevice = reinterpret_cast(&clDevice->getDevice()); auto &csr = mockDevice->getUltCommandStreamReceiver(); EXPECT_FALSE(csr.isNTo1SubmissionModelEnabled()); auto cmdq = clCreateCommandQueue(pContext, testedClDevice, ooq, &retVal); EXPECT_TRUE(csr.isNTo1SubmissionModelEnabled()); retVal = clReleaseCommandQueue(cmdq); } } // namespace ULT cl_create_command_queue_with_properties_tests.cpp000066400000000000000000000551001422164147700344020ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/api/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/common/fixtures/memory_management_fixture.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/helpers/variable_backup.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" #include "CL/cl_ext.h" #include "cl_api_tests.h" using namespace NEO; namespace ULT { struct CommandQueueWithPropertiesTest : public ApiFixture<>, public ::testing::WithParamInterface>, public ::testing::Test { CommandQueueWithPropertiesTest() : queuePriority(CL_QUEUE_PRIORITY_MED_KHR), queueThrottle(CL_QUEUE_THROTTLE_MED_KHR) { } void SetUp() override { std::tie(commandQueueProperties, queueSize, queuePriority, queueThrottle) = GetParam(); ApiFixture::SetUp(); } void TearDown() override { ApiFixture::TearDown(); } cl_command_queue_properties commandQueueProperties = 0; cl_uint queueSize = 0; cl_queue_priority_khr queuePriority; cl_queue_throttle_khr queueThrottle; }; struct clCreateCommandQueueWithPropertiesApi : public ApiFixture<>, public MemoryManagementFixture, public ::testing::Test { clCreateCommandQueueWithPropertiesApi() { } void SetUp() override { platformsImpl->clear(); MemoryManagementFixture::SetUp(); ApiFixture::SetUp(); } void TearDown() override { ApiFixture::TearDown(); MemoryManagementFixture::TearDown(); } }; typedef CommandQueueWithPropertiesTest clCreateCommandQueueWithPropertiesTests; TEST_P(clCreateCommandQueueWithPropertiesTests, GivenPropertiesWhenCreatingCommandQueueThenExpectedResultIsReturned) { cl_command_queue cmdQ = nullptr; cl_queue_properties properties[] = { CL_QUEUE_PROPERTIES, 0, CL_QUEUE_SIZE, 0, CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_HIGH_KHR, CL_QUEUE_THROTTLE_KHR, CL_QUEUE_THROTTLE_MED_KHR, 0}; bool queueOnDeviceUsed = false; bool priorityHintsUsed = false; bool throttleHintsUsed = false; cl_queue_properties *pProp = &properties[0]; if (commandQueueProperties) { *pProp++ = CL_QUEUE_PROPERTIES; *pProp++ = (cl_queue_properties)commandQueueProperties; } if ((commandQueueProperties & CL_QUEUE_ON_DEVICE) && queueSize) { *pProp++ = CL_QUEUE_SIZE; *pProp++ = queueSize; } if (commandQueueProperties & CL_QUEUE_ON_DEVICE) { queueOnDeviceUsed = true; } if (queuePriority) { *pProp++ = CL_QUEUE_PRIORITY_KHR; *pProp++ = queuePriority; priorityHintsUsed = true; } if (queueThrottle) { *pProp++ = CL_QUEUE_THROTTLE_KHR; *pProp++ = queueThrottle; throttleHintsUsed = true; } *pProp++ = 0; cmdQ = clCreateCommandQueueWithProperties( pContext, testedClDevice, properties, &retVal); if (queueOnDeviceUsed && priorityHintsUsed) { EXPECT_EQ(nullptr, cmdQ); EXPECT_EQ(retVal, CL_INVALID_QUEUE_PROPERTIES); return; } else if (queueOnDeviceUsed && throttleHintsUsed) { EXPECT_EQ(nullptr, cmdQ); EXPECT_EQ(retVal, CL_INVALID_QUEUE_PROPERTIES); return; } else { EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, cmdQ); } auto commandQueueObj = castToObject(cmdQ); ASSERT_NE(commandQueueObj, nullptr); retVal = clReleaseCommandQueue(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); auto icdStoredFunction = icdGlobalDispatchTable.clCreateCommandQueueWithProperties; auto pFunction = &clCreateCommandQueueWithProperties; EXPECT_EQ(icdStoredFunction, pFunction); } static cl_command_queue_properties commandQueueProperties[] = { 0, CL_QUEUE_PROFILING_ENABLE}; static cl_uint queueSizes[] = { 0, 2000}; cl_queue_priority_khr queuePriorities[] = { 0, CL_QUEUE_PRIORITY_LOW_KHR, CL_QUEUE_PRIORITY_MED_KHR, CL_QUEUE_PRIORITY_HIGH_KHR}; cl_queue_throttle_khr queueThrottles[] = { 0, CL_QUEUE_THROTTLE_LOW_KHR, CL_QUEUE_THROTTLE_MED_KHR, CL_QUEUE_THROTTLE_HIGH_KHR}; INSTANTIATE_TEST_CASE_P(api, clCreateCommandQueueWithPropertiesTests, ::testing::Combine( ::testing::ValuesIn(commandQueueProperties), ::testing::ValuesIn(queueSizes), ::testing::ValuesIn(queuePriorities), ::testing::ValuesIn(queueThrottles))); TEST_F(clCreateCommandQueueWithPropertiesApi, GivenNullContextWhenCreatingCommandQueueWithPropertiesThenInvalidContextErrorIsReturned) { cl_int retVal = CL_SUCCESS; auto cmdQ = clCreateCommandQueueWithProperties( nullptr, nullptr, 0, &retVal); EXPECT_EQ(cmdQ, nullptr); EXPECT_EQ(retVal, CL_INVALID_CONTEXT); } TEST_F(clCreateCommandQueueWithPropertiesApi, GivenNullContextWhenCreatingCommandQueueWithPropertiesKHRThenInvalidContextErrorIsReturned) { cl_int retVal = CL_SUCCESS; auto cmdQ = clCreateCommandQueueWithPropertiesKHR( nullptr, nullptr, 0, &retVal); EXPECT_EQ(cmdQ, nullptr); EXPECT_EQ(retVal, CL_INVALID_CONTEXT); } TEST_F(clCreateCommandQueueWithPropertiesApi, GivenOoqPropertiesWhenQueueIsCreatedThenSuccessIsReturned) { cl_int retVal = CL_SUCCESS; cl_queue_properties ooq[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0}; auto cmdq = clCreateCommandQueueWithProperties(pContext, testedClDevice, ooq, &retVal); EXPECT_NE(nullptr, cmdq); EXPECT_EQ(retVal, CL_SUCCESS); retVal = clReleaseCommandQueue(cmdq); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateCommandQueueWithPropertiesApi, GivenQueueOnDeviceWithoutOoqPropertiesWhenQueueIsCreatedThenErrorIsReturned) { cl_queue_properties ondevice[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_ON_DEVICE, 0}; auto cmdqd = clCreateCommandQueueWithProperties(pContext, testedClDevice, ondevice, &retVal); EXPECT_EQ(nullptr, cmdqd); EXPECT_EQ(retVal, CL_INVALID_VALUE); } TEST_F(clCreateCommandQueueWithPropertiesApi, GivenNullContextAndOoqPropertiesWhenCreatingCommandQueueWithPropertiesThenInvalidContextErrorIsReturned) { cl_int retVal = CL_SUCCESS; cl_queue_properties ooq[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT, 0}; auto cmdq = clCreateCommandQueueWithProperties(nullptr, testedClDevice, ooq, &retVal); EXPECT_EQ(nullptr, cmdq); EXPECT_EQ(retVal, CL_INVALID_CONTEXT); } TEST_F(clCreateCommandQueueWithPropertiesApi, GivenNullDeviceWhenCreatingCommandQueueWithPropertiesThenInvalidDeviceErrorIsReturned) { cl_int retVal = CL_SUCCESS; cl_queue_properties ooq[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT, 0}; auto cmdq = clCreateCommandQueueWithProperties(pContext, nullptr, ooq, &retVal); EXPECT_EQ(nullptr, cmdq); EXPECT_EQ(retVal, CL_INVALID_DEVICE); } TEST_F(clCreateCommandQueueWithPropertiesApi, GivenDeviceNotAssociatedWithContextWhenCreatingCommandQueueWithPropertiesThenInvalidDeviceErrorIsReturned) { cl_int retVal = CL_OUT_OF_HOST_MEMORY; UltClDeviceFactory deviceFactory{1, 0}; EXPECT_FALSE(pContext->isDeviceAssociated(*deviceFactory.rootDevices[0])); auto cmdq = clCreateCommandQueueWithProperties(pContext, deviceFactory.rootDevices[0], nullptr, &retVal); EXPECT_EQ(nullptr, cmdq); EXPECT_EQ(retVal, CL_INVALID_DEVICE); } TEST_F(clCreateCommandQueueWithPropertiesApi, GivenSizeWhichExceedsMaxDeviceQueueSizeWhenCreatingCommandQueueWithPropertiesThenInvalidQueuePropertiesErrorIsReturned) { cl_int retVal = CL_SUCCESS; cl_queue_properties ooq[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT, CL_QUEUE_SIZE, (cl_uint)0xffffffff, 0, 0}; auto cmdq = clCreateCommandQueueWithProperties(pContext, testedClDevice, ooq, &retVal); EXPECT_EQ(nullptr, cmdq); EXPECT_EQ(retVal, CL_INVALID_QUEUE_PROPERTIES); } TEST_F(clCreateCommandQueueWithPropertiesApi, GivenQueueOnDeviceWithoutOutOfOrderExecModePropertyWhenCreatingCommandQueueWithPropertiesThenInvalidValueErrorIsReturned) { cl_int retVal = CL_SUCCESS; cl_queue_properties odq[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_ON_DEVICE, 0}; auto cmdq = clCreateCommandQueueWithProperties(pContext, testedClDevice, odq, &retVal); EXPECT_EQ(nullptr, cmdq); EXPECT_EQ(retVal, CL_INVALID_VALUE); } TEST_F(clCreateCommandQueueWithPropertiesApi, GivenDefaultDeviceQueueWithoutQueueOnDevicePropertyWhenCreatingCommandQueueWithPropertiesThenInvalidValueErrorIsReturned) { cl_int retVal = CL_SUCCESS; cl_queue_properties ddq[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_ON_DEVICE_DEFAULT, 0}; auto cmdq = clCreateCommandQueueWithProperties(pContext, testedClDevice, ddq, &retVal); EXPECT_EQ(nullptr, cmdq); EXPECT_EQ(retVal, CL_INVALID_VALUE); } TEST_F(clCreateCommandQueueWithPropertiesApi, GivenHighPriorityWhenCreatingOoqCommandQueueWithPropertiesThenInvalidQueuePropertiesErrorIsReturned) { cl_int retVal = CL_SUCCESS; cl_queue_properties ondevice[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_HIGH_KHR, 0}; auto cmdqd = clCreateCommandQueueWithProperties(pContext, testedClDevice, ondevice, &retVal); EXPECT_EQ(nullptr, cmdqd); EXPECT_EQ(retVal, CL_INVALID_QUEUE_PROPERTIES); } TEST_F(clCreateCommandQueueWithPropertiesApi, GivenLowPriorityWhenCreatingOoqCommandQueueWithPropertiesThenInvalidQueuePropertiesErrorIsReturned) { cl_int retVal = CL_SUCCESS; cl_queue_properties ondevice[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_LOW_KHR, 0}; auto cmdqd = clCreateCommandQueueWithProperties(pContext, testedClDevice, ondevice, &retVal); EXPECT_EQ(nullptr, cmdqd); EXPECT_EQ(retVal, CL_INVALID_QUEUE_PROPERTIES); } TEST_F(clCreateCommandQueueWithPropertiesApi, GivenMedPriorityWhenCreatingOoqCommandQueueWithPropertiesThenInvalidQueuePropertiesErrorIsReturned) { cl_int retVal = CL_SUCCESS; cl_queue_properties ondevice[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_MED_KHR, 0}; auto cmdqd = clCreateCommandQueueWithProperties(pContext, testedClDevice, ondevice, &retVal); EXPECT_EQ(nullptr, cmdqd); EXPECT_EQ(retVal, CL_INVALID_QUEUE_PROPERTIES); } TEST_F(clCreateCommandQueueWithPropertiesApi, givenDeviceQueuePropertiesWhenCreatingCommandQueueWithPropertiesThenNullQueueAndInvalidQueuePropertiesErrorIsReturned) { auto pClDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockContext context{pClDevice.get()}; cl_int retVal = CL_SUCCESS; cl_queue_properties queueProperties[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0}; auto pCmdQ = clCreateCommandQueueWithProperties(&context, pClDevice.get(), queueProperties, &retVal); EXPECT_EQ(nullptr, pCmdQ); EXPECT_EQ(retVal, CL_INVALID_QUEUE_PROPERTIES); } TEST_F(clCreateCommandQueueWithPropertiesApi, givenDefaultDeviceQueuePropertiesWhenCreatingCommandQueueWithPropertiesThenNullQueueAndInvalidQueuePropertiesErrorIsReturned) { auto pClDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockContext context{pClDevice.get()}; cl_int retVal = CL_SUCCESS; cl_queue_properties queueProperties[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0}; auto pCmdQ = clCreateCommandQueueWithProperties(&context, pClDevice.get(), queueProperties, &retVal); EXPECT_EQ(nullptr, pCmdQ); EXPECT_EQ(retVal, CL_INVALID_QUEUE_PROPERTIES); } TEST_F(clCreateCommandQueueWithPropertiesApi, GivenInvalidPropertiesWhenCreatingOoqCommandQueueWithPropertiesThenInvalidValueErrorIsReturned) { cl_int retVal = CL_SUCCESS; cl_queue_properties properties = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; auto commandQueue = clCreateCommandQueueWithProperties(pContext, testedClDevice, &properties, &retVal); EXPECT_EQ(nullptr, commandQueue); EXPECT_EQ(retVal, CL_INVALID_VALUE); } TEST_F(clCreateCommandQueueWithPropertiesApi, givenInvalidPropertiesOnSubsequentTokenWhenQueueIsCreatedThenReturnError) { cl_int retVal = CL_SUCCESS; cl_queue_properties properties[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, CL_DEVICE_PARTITION_EQUALLY, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0}; auto commandQueue = clCreateCommandQueueWithProperties(pContext, testedClDevice, properties, &retVal); EXPECT_EQ(nullptr, commandQueue); EXPECT_EQ(retVal, CL_INVALID_VALUE); } TEST_F(clCreateCommandQueueWithPropertiesApi, GivenNullPropertiesWhenCreatingCommandQueueWithPropertiesThenSuccessIsReturned) { cl_int retVal = CL_SUCCESS; auto commandQueue = clCreateCommandQueueWithProperties(pContext, testedClDevice, nullptr, &retVal); EXPECT_NE(nullptr, commandQueue); EXPECT_EQ(retVal, CL_SUCCESS); clReleaseCommandQueue(commandQueue); } TEST_F(clCreateCommandQueueWithPropertiesApi, GivenLowPriorityWhenCreatingCommandQueueWithPropertiesThenSuccessIsReturned) { cl_int retVal = CL_SUCCESS; cl_queue_properties ondevice[] = {CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_LOW_KHR, 0}; auto cmdqd = clCreateCommandQueueWithProperties(pContext, testedClDevice, ondevice, &retVal); EXPECT_NE(nullptr, cmdqd); EXPECT_EQ(retVal, CL_SUCCESS); retVal = clReleaseCommandQueue(cmdqd); EXPECT_EQ(retVal, CL_SUCCESS); } HWTEST_F(clCreateCommandQueueWithPropertiesApi, GivenLowPriorityWhenCreatingCommandQueueThenSelectRcsEngine) { cl_queue_properties properties[] = {CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_LOW_KHR, 0}; auto cmdQ = clCreateCommandQueueWithProperties(pContext, testedClDevice, properties, nullptr); auto commandQueueObj = castToObject(cmdQ); auto &osContext = commandQueueObj->getGpgpuCommandStreamReceiver().getOsContext(); EXPECT_EQ(getChosenEngineType(pDevice->getHardwareInfo()), osContext.getEngineType()); EXPECT_TRUE(osContext.isLowPriority()); clReleaseCommandQueue(cmdQ); } TEST_F(clCreateCommandQueueWithPropertiesApi, GivenCommandQueueCreatedWithNullPropertiesWhenQueryingPropertiesArrayThenNothingIsReturned) { cl_int retVal = CL_SUCCESS; auto commandQueue = clCreateCommandQueueWithProperties(pContext, testedClDevice, nullptr, &retVal); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_NE(nullptr, commandQueue); size_t propertiesArraySize; retVal = clGetCommandQueueInfo(commandQueue, CL_QUEUE_PROPERTIES_ARRAY, 0, nullptr, &propertiesArraySize); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(0u, propertiesArraySize); clReleaseCommandQueue(commandQueue); } TEST_F(clCreateCommandQueueWithPropertiesApi, GivenCommandQueueCreatedWithVariousPropertiesWhenQueryingPropertiesArrayThenCorrectValuesAreReturned) { cl_int retVal = CL_SUCCESS; cl_queue_properties propertiesArray[3]; size_t propertiesArraySize; std::vector> propertiesToTest{ {0}, {CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_LOW_KHR, 0}, {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0}}; for (auto properties : propertiesToTest) { auto commandQueue = clCreateCommandQueueWithProperties(pContext, testedClDevice, properties.data(), &retVal); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetCommandQueueInfo(commandQueue, CL_QUEUE_PROPERTIES_ARRAY, sizeof(propertiesArray), propertiesArray, &propertiesArraySize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(properties.size() * sizeof(cl_queue_properties), propertiesArraySize); for (size_t i = 0; i < properties.size(); i++) { EXPECT_EQ(properties[i], propertiesArray[i]); } clReleaseCommandQueue(commandQueue); } } TEST_F(clCreateCommandQueueWithPropertiesApi, givenQueueFamilySelectedAndNotIndexWhenCreatingQueueThenFail) { cl_queue_properties queueProperties[] = { CL_QUEUE_FAMILY_INTEL, 0, 0, }; auto queue = clCreateCommandQueueWithProperties(pContext, testedClDevice, queueProperties, &retVal); EXPECT_EQ(nullptr, queue); EXPECT_EQ(CL_INVALID_QUEUE_PROPERTIES, retVal); } TEST_F(clCreateCommandQueueWithPropertiesApi, givenQueueIndexSelectedAndNotFamilyWhenCreatingQueueThenFail) { cl_queue_properties queueProperties[] = { CL_QUEUE_INDEX_INTEL, 0, 0, }; auto queue = clCreateCommandQueueWithProperties(pContext, testedClDevice, queueProperties, &retVal); EXPECT_EQ(nullptr, queue); EXPECT_EQ(CL_INVALID_QUEUE_PROPERTIES, retVal); } TEST_F(clCreateCommandQueueWithPropertiesApi, givenValidFamilyAndIndexSelectedWhenCreatingQueueThenReturnSuccess) { cl_queue_properties queueProperties[] = { CL_QUEUE_FAMILY_INTEL, 0, CL_QUEUE_INDEX_INTEL, 0, 0, }; auto queue = clCreateCommandQueueWithProperties(pContext, testedClDevice, queueProperties, &retVal); EXPECT_NE(nullptr, queue); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, clReleaseCommandQueue(queue)); } TEST_F(clCreateCommandQueueWithPropertiesApi, givenInvalidQueueFamilySelectedWhenCreatingQueueThenFail) { cl_queue_properties queueProperties[] = { CL_QUEUE_FAMILY_INTEL, CommonConstants::engineGroupCount, CL_QUEUE_INDEX_INTEL, 0, 0, }; auto queue = clCreateCommandQueueWithProperties(pContext, testedClDevice, queueProperties, &retVal); EXPECT_EQ(nullptr, queue); EXPECT_EQ(CL_INVALID_QUEUE_PROPERTIES, retVal); } TEST_F(clCreateCommandQueueWithPropertiesApi, givenInvalidQueueIndexSelectedWhenCreatingQueueThenFail) { cl_queue_properties queueProperties[] = { CL_QUEUE_FAMILY_INTEL, 0, CL_QUEUE_INDEX_INTEL, 50, 0, }; auto queue = clCreateCommandQueueWithProperties(pContext, testedClDevice, queueProperties, &retVal); EXPECT_EQ(nullptr, queue); EXPECT_EQ(CL_INVALID_QUEUE_PROPERTIES, retVal); } using LowPriorityCommandQueueTest = ::testing::Test; HWTEST_F(LowPriorityCommandQueueTest, GivenDeviceWithSubdevicesWhenCreatingLowPriorityCommandQueueThenEngineFromFirstSubdeviceIsTaken) { DebugManagerStateRestore restorer; VariableBackup mockDeviceFlagBackup{&MockDevice::createSingleDevice, false}; DebugManager.flags.CreateMultipleSubDevices.set(2); MockContext context; cl_queue_properties properties[] = {CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_LOW_KHR, 0}; EXPECT_EQ(2u, context.getDevice(0)->getNumGenericSubDevices()); auto cmdQ = clCreateCommandQueueWithProperties(&context, context.getDevice(0), properties, nullptr); auto commandQueueObj = castToObject(cmdQ); auto subDevice = context.getDevice(0)->getSubDevice(0); auto &engine = subDevice->getEngine(getChosenEngineType(subDevice->getHardwareInfo()), EngineUsage::LowPriority); EXPECT_EQ(engine.commandStreamReceiver, &commandQueueObj->getGpgpuCommandStreamReceiver()); EXPECT_EQ(engine.osContext, &commandQueueObj->getGpgpuCommandStreamReceiver().getOsContext()); clReleaseCommandQueue(cmdQ); } std::pair priorityParams[3]{ std::make_pair(CL_QUEUE_PRIORITY_LOW_KHR, QueuePriority::LOW), std::make_pair(CL_QUEUE_PRIORITY_MED_KHR, QueuePriority::MEDIUM), std::make_pair(CL_QUEUE_PRIORITY_HIGH_KHR, QueuePriority::HIGH)}; class clCreateCommandQueueWithPropertiesApiPriority : public clCreateCommandQueueWithPropertiesApi, public ::testing::WithParamInterface> { }; TEST_P(clCreateCommandQueueWithPropertiesApiPriority, GivenValidPriorityWhenCreatingCommandQueueWithPropertiesThenCorrectPriorityIsSetInternally) { cl_int retVal = CL_SUCCESS; cl_queue_properties ondevice[] = {CL_QUEUE_PRIORITY_KHR, GetParam().first, 0}; auto cmdqd = clCreateCommandQueueWithProperties(pContext, testedClDevice, ondevice, &retVal); EXPECT_NE(nullptr, cmdqd); EXPECT_EQ(retVal, CL_SUCCESS); auto commandQueue = castToObject(cmdqd); EXPECT_EQ(commandQueue->getPriority(), GetParam().second); retVal = clReleaseCommandQueue(cmdqd); EXPECT_EQ(retVal, CL_SUCCESS); } INSTANTIATE_TEST_CASE_P(AllValidPriorities, clCreateCommandQueueWithPropertiesApiPriority, ::testing::ValuesIn(priorityParams)); std::pair throttleParams[3]{ std::make_pair(CL_QUEUE_THROTTLE_LOW_KHR, QueueThrottle::LOW), std::make_pair(CL_QUEUE_THROTTLE_MED_KHR, QueueThrottle::MEDIUM), std::make_pair(CL_QUEUE_THROTTLE_HIGH_KHR, QueueThrottle::HIGH)}; class clCreateCommandQueueWithPropertiesApiThrottle : public clCreateCommandQueueWithPropertiesApi, public ::testing::WithParamInterface> { }; TEST_P(clCreateCommandQueueWithPropertiesApiThrottle, GivenThrottlePropertiesWhenCreatingCommandQueueWithPropertiesThenCorrectThrottleIsSetInternally) { cl_int retVal = CL_SUCCESS; cl_queue_properties ondevice[] = {CL_QUEUE_THROTTLE_KHR, GetParam().first, 0}; auto cmdqd = clCreateCommandQueueWithProperties(pContext, testedClDevice, ondevice, &retVal); EXPECT_NE(nullptr, cmdqd); EXPECT_EQ(retVal, CL_SUCCESS); auto commandQueue = castToObject(cmdqd); EXPECT_EQ(commandQueue->getThrottle(), GetParam().second); retVal = clReleaseCommandQueue(cmdqd); EXPECT_EQ(retVal, CL_SUCCESS); } INSTANTIATE_TEST_CASE_P(AllValidThrottleValues, clCreateCommandQueueWithPropertiesApiThrottle, ::testing::ValuesIn(throttleParams)); } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_create_context_from_type_tests.inl000066400000000000000000000124121422164147700320770ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/test/unit_test/mocks/mock_platform.h" using namespace NEO; struct clCreateContextFromTypeTests : Test { cl_int retVal = CL_DEVICE_NOT_AVAILABLE; }; namespace ULT { void CL_CALLBACK contextCallBack(const char *, const void *, size_t, void *) { } TEST_F(clCreateContextFromTypeTests, GivenOnlyGpuDeviceTypeAndReturnValueWhenCreatingContextFromTypeThenContextWithSingleDeviceIsCreated) { auto context = clCreateContextFromType(nullptr, CL_DEVICE_TYPE_GPU, nullptr, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, context); EXPECT_NE(nullptr, context->dispatch.icdDispatch); EXPECT_NE(nullptr, context->dispatch.crtDispatch); auto pContext = castToObject(context); EXPECT_EQ(1u, pContext->getNumDevices()); retVal = clReleaseContext(context); ASSERT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateContextFromTypeTests, GivenCpuTypeWhenCreatingContextFromTypeThenInvalidValueErrorIsReturned) { auto context = clCreateContextFromType(nullptr, CL_DEVICE_TYPE_CPU, nullptr, nullptr, &retVal); ASSERT_EQ(nullptr, context); ASSERT_EQ(CL_DEVICE_NOT_FOUND, retVal); } TEST_F(clCreateContextFromTypeTests, GivenNullCallbackFunctionAndNotNullUserDataWhenCreatingContextFromTypeThenInvalidValueErrorIsReturned) { cl_int a; auto context = clCreateContextFromType(nullptr, CL_DEVICE_TYPE_GPU, nullptr, &a, &retVal); ASSERT_EQ(nullptr, context); ASSERT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clCreateContextFromTypeTests, GivenCallbackFunctionWhenCreatingContextFromTypeThenCallSucceeds) { auto context = clCreateContextFromType(nullptr, CL_DEVICE_TYPE_GPU, contextCallBack, nullptr, &retVal); ASSERT_NE(nullptr, context); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clReleaseContext(context); ASSERT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateContextFromTypeTests, GivenOnlyGpuDeviceTypeWhenCreatingContextFromTypeThenCallSucceeds) { auto context = clCreateContextFromType(nullptr, CL_DEVICE_TYPE_GPU, nullptr, nullptr, nullptr); ASSERT_NE(nullptr, context); EXPECT_NE(nullptr, context->dispatch.icdDispatch); EXPECT_NE(nullptr, context->dispatch.crtDispatch); retVal = clReleaseContext(context); ASSERT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateContextFromTypeTests, GivenInvalidContextCreationPropertiesWhenCreatingContextFromTypeThenInvalidPlatformErrorIsReturned) { cl_context_properties invalidProperties[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties) nullptr, 0}; auto context = clCreateContextFromType(invalidProperties, CL_DEVICE_TYPE_GPU, nullptr, nullptr, &retVal); EXPECT_EQ(CL_INVALID_PLATFORM, retVal); EXPECT_EQ(nullptr, context); } TEST_F(clCreateContextFromTypeTests, GivenNonDefaultPlatformInContextCreationPropertiesWhenCreatingContextFromTypeThenSuccessIsReturned) { auto nonDefaultPlatform = std::make_unique(); nonDefaultPlatform->initializeWithNewDevices(); cl_platform_id nonDefaultPlatformCl = nonDefaultPlatform.get(); cl_context_properties properties[3] = {CL_CONTEXT_PLATFORM, reinterpret_cast(nonDefaultPlatformCl), 0}; auto clContext = clCreateContextFromType(properties, CL_DEVICE_TYPE_GPU, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, clContext); auto pContext = castToObject(clContext); for (auto i = 0u; i < nonDefaultPlatform->getNumDevices(); i++) { EXPECT_EQ(nonDefaultPlatform->getClDevice(i), pContext->getDevice(i)); } clReleaseContext(clContext); } TEST_F(clCreateContextFromTypeTests, GivenNonDefaultPlatformWithInvalidIcdDispatchInContextCreationPropertiesWhenCreatingContextFromTypeThenInvalidPlatformErrorIsReturned) { auto nonDefaultPlatform = std::make_unique(); nonDefaultPlatform->initializeWithNewDevices(); cl_platform_id nonDefaultPlatformCl = nonDefaultPlatform.get(); nonDefaultPlatformCl->dispatch.icdDispatch = reinterpret_cast(nonDefaultPlatform.get()); cl_context_properties properties[3] = {CL_CONTEXT_PLATFORM, reinterpret_cast(nonDefaultPlatformCl), 0}; auto clContext = clCreateContextFromType(properties, CL_DEVICE_TYPE_GPU, nullptr, nullptr, &retVal); EXPECT_EQ(CL_INVALID_PLATFORM, retVal); EXPECT_EQ(nullptr, clContext); } TEST(clCreateContextFromTypeTest, GivenPlatformWithMultipleDevicesWhenCreatingContextFromTypeThenContextContainsOnlyOneDevice) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleRootDevices.set(2); initPlatform(); cl_int retVal = CL_INVALID_CONTEXT; auto context = clCreateContextFromType(nullptr, CL_DEVICE_TYPE_GPU, nullptr, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, context); auto pContext = castToObject(context); EXPECT_EQ(1u, pContext->getNumDevices()); EXPECT_EQ(platform()->getClDevice(0), pContext->getDevice(0)); retVal = clReleaseContext(context); ASSERT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_create_context_tests.inl000066400000000000000000000263541422164147700300250ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/ptr_math.h" #include "opencl/test/unit_test/api/cl_api_tests.h" #include "opencl/test/unit_test/mocks/mock_platform.h" using namespace NEO; typedef api_tests clCreateContextTests; namespace ClCreateContextTests { static int cbInvoked = 0; void CL_CALLBACK eventCallBack(const char *, const void *, size_t, void *) { cbInvoked++; } TEST_F(clCreateContextTests, GivenValidParamsWhenCreatingContextThenContextIsCreated) { auto context = clCreateContext(nullptr, 1u, &testedClDevice, nullptr, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, context); EXPECT_NE(nullptr, context->dispatch.icdDispatch); EXPECT_NE(nullptr, context->dispatch.crtDispatch); retVal = clReleaseContext(context); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateContextTests, GivenNullptrRetValWhenCreatingContextThenContextIsCreated) { auto context = clCreateContext(nullptr, 1u, &testedClDevice, nullptr, nullptr, nullptr); ASSERT_NE(nullptr, context); EXPECT_NE(nullptr, context->dispatch.icdDispatch); EXPECT_NE(nullptr, context->dispatch.crtDispatch); retVal = clReleaseContext(context); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateContextTests, GivenZeroDevicesWhenCreatingContextThenInvalidValueErrorIsReturned) { auto context = clCreateContext(nullptr, 0, &testedClDevice, nullptr, nullptr, &retVal); ASSERT_EQ(nullptr, context); ASSERT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clCreateContextTests, GivenInvalidUserDataWhenCreatingContextThenInvalidValueErrorIsReturned) { cl_int someData = 25; auto context = clCreateContext(nullptr, 1u, &testedClDevice, nullptr, &someData, &retVal); ASSERT_EQ(nullptr, context); ASSERT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clCreateContextTests, GivenInvalidDeviceListWhenCreatingContextThenInvalidDeviceErrorIsReturned) { cl_device_id devList[2]; devList[0] = testedClDevice; devList[1] = (cl_device_id)ptrGarbage; auto context = clCreateContext(nullptr, 2, devList, nullptr, nullptr, &retVal); ASSERT_EQ(nullptr, context); ASSERT_EQ(CL_INVALID_DEVICE, retVal); } TEST_F(clCreateContextTests, GivenNullDeviceListWhenCreatingContextThenInvalidValueErrorIsReturned) { auto context = clCreateContext(nullptr, 2, nullptr, nullptr, nullptr, &retVal); ASSERT_EQ(nullptr, context); ASSERT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clCreateContextTests, GivenNullUserDataWhenCreatingContextThenContextIsCreated) { auto context = clCreateContext(nullptr, 1u, &testedClDevice, eventCallBack, nullptr, &retVal); ASSERT_NE(nullptr, context); retVal = clReleaseContext(context); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateContextTests, givenMultipleRootDevicesWithoutSubDevicesWhenCreatingContextThenContextIsCreated) { UltClDeviceFactory deviceFactory{2, 0}; cl_device_id devices[] = {deviceFactory.rootDevices[0], deviceFactory.rootDevices[1]}; auto context = clCreateContext(nullptr, 2u, devices, eventCallBack, nullptr, &retVal); EXPECT_NE(nullptr, context); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseContext(context); } TEST_F(clCreateContextTests, givenMultipleSubDevicesFromDifferentRootDevicesWhenCreatingContextThenContextIsCreated) { UltClDeviceFactory deviceFactory{2, 2}; cl_device_id devices[] = {deviceFactory.subDevices[0], deviceFactory.subDevices[1], deviceFactory.subDevices[2], deviceFactory.subDevices[3]}; auto context = clCreateContext(nullptr, 4u, devices, eventCallBack, nullptr, &retVal); EXPECT_NE(nullptr, context); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseContext(context); } TEST_F(clCreateContextTests, givenDisabledMultipleRootDeviceSupportWhenCreatingContextThenOutOfHostMemoryErrorIsReturned) { UltClDeviceFactory deviceFactory{2, 2}; DebugManager.flags.EnableMultiRootDeviceContexts.set(false); cl_device_id devices[] = {deviceFactory.rootDevices[0], deviceFactory.rootDevices[1]}; auto context = clCreateContext(nullptr, 2u, devices, eventCallBack, nullptr, &retVal); EXPECT_EQ(nullptr, context); EXPECT_EQ(CL_OUT_OF_HOST_MEMORY, retVal); } TEST_F(clCreateContextTests, whenCreateContextWithMultipleRootDevicesWithSubDevicesThenContextIsCreated) { UltClDeviceFactory deviceFactory{2, 2}; cl_device_id devices[] = {deviceFactory.rootDevices[0], deviceFactory.rootDevices[1]}; auto context = clCreateContext(nullptr, 2u, devices, eventCallBack, nullptr, &retVal); EXPECT_NE(nullptr, context); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseContext(context); } TEST_F(clCreateContextTests, givenMultipleRootDevicesWhenCreateContextThenRootDeviceIndicesSetIsFilled) { UltClDeviceFactory deviceFactory{3, 2}; cl_device_id devices[] = {deviceFactory.rootDevices[0], deviceFactory.rootDevices[1], deviceFactory.rootDevices[2]}; auto context = clCreateContext(nullptr, 3u, devices, eventCallBack, nullptr, &retVal); EXPECT_NE(nullptr, context); EXPECT_EQ(CL_SUCCESS, retVal); auto pContext = castToObject(context); auto rootDeviceIndices = pContext->getRootDeviceIndices(); for (auto numDevice = 0u; numDevice < pContext->getNumDevices(); numDevice++) { auto rootDeviceIndex = rootDeviceIndices.find(pContext->getDevice(numDevice)->getRootDeviceIndex()); EXPECT_EQ(*rootDeviceIndex, pContext->getDevice(numDevice)->getRootDeviceIndex()); } clReleaseContext(context); } TEST_F(clCreateContextTests, givenMultipleRootDevicesWhenCreateContextThenMaxRootDeviceIndexIsProperlyFilled) { UltClDeviceFactory deviceFactory{3, 0}; DebugManager.flags.EnableMultiRootDeviceContexts.set(true); cl_device_id devices[] = {deviceFactory.rootDevices[0], deviceFactory.rootDevices[2]}; auto context = clCreateContext(nullptr, 2u, devices, eventCallBack, nullptr, &retVal); EXPECT_NE(nullptr, context); EXPECT_EQ(CL_SUCCESS, retVal); auto pContext = castToObject(context); EXPECT_EQ(2u, pContext->getMaxRootDeviceIndex()); clReleaseContext(context); } TEST_F(clCreateContextTests, givenMultipleRootDevicesWhenCreateContextThenSpecialQueueIsProperlyFilled) { UltClDeviceFactory deviceFactory{3, 0}; DebugManager.flags.EnableMultiRootDeviceContexts.set(true); cl_device_id devices[] = {deviceFactory.rootDevices[0], deviceFactory.rootDevices[2]}; auto context = clCreateContext(nullptr, 2u, devices, eventCallBack, nullptr, &retVal); EXPECT_NE(nullptr, context); EXPECT_EQ(CL_SUCCESS, retVal); auto pContext = castToObject(context); auto rootDeviceIndices = pContext->getRootDeviceIndices(); EXPECT_EQ(2u, pContext->getMaxRootDeviceIndex()); StackVec specialQueues; specialQueues.resize(pContext->getMaxRootDeviceIndex()); for (auto numDevice = 0u; numDevice < pContext->getNumDevices(); numDevice++) { auto rootDeviceIndex = rootDeviceIndices.find(pContext->getDevice(numDevice)->getRootDeviceIndex()); EXPECT_EQ(*rootDeviceIndex, pContext->getDevice(numDevice)->getRootDeviceIndex()); EXPECT_EQ(*rootDeviceIndex, pContext->getSpecialQueue(*rootDeviceIndex)->getDevice().getRootDeviceIndex()); specialQueues[numDevice] = pContext->getSpecialQueue(*rootDeviceIndex); } EXPECT_EQ(2u, specialQueues.size()); clReleaseContext(context); } TEST_F(clCreateContextTests, givenInvalidContextCreationPropertiesThenContextCreationFails) { cl_context_properties invalidProperties[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties) nullptr, 0}; auto context = clCreateContext(invalidProperties, 1u, &testedClDevice, nullptr, nullptr, &retVal); EXPECT_EQ(CL_INVALID_PLATFORM, retVal); EXPECT_EQ(nullptr, context); } TEST_F(clCreateContextTests, GivenNonDefaultPlatformInContextCreationPropertiesWhenCreatingContextThenSuccessIsReturned) { auto nonDefaultPlatform = std::make_unique(); nonDefaultPlatform->initializeWithNewDevices(); cl_platform_id nonDefaultPlatformCl = nonDefaultPlatform.get(); cl_device_id clDevice = nonDefaultPlatform->getClDevice(0); cl_context_properties properties[3] = {CL_CONTEXT_PLATFORM, reinterpret_cast(nonDefaultPlatformCl), 0}; auto clContext = clCreateContext(properties, 1, &clDevice, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, clContext); clReleaseContext(clContext); } TEST_F(clCreateContextTests, GivenNonDefaultPlatformWithInvalidIcdDispatchInContextCreationPropertiesWhenCreatingContextThenInvalidPlatformErrorIsReturned) { auto nonDefaultPlatform = std::make_unique(); nonDefaultPlatform->initializeWithNewDevices(); cl_platform_id nonDefaultPlatformCl = nonDefaultPlatform.get(); nonDefaultPlatformCl->dispatch.icdDispatch = reinterpret_cast(nonDefaultPlatform.get()); cl_device_id clDevice = nonDefaultPlatform->getClDevice(0); cl_context_properties properties[3] = {CL_CONTEXT_PLATFORM, reinterpret_cast(nonDefaultPlatformCl), 0}; auto clContext = clCreateContext(properties, 1, &clDevice, nullptr, nullptr, &retVal); EXPECT_EQ(CL_INVALID_PLATFORM, retVal); EXPECT_EQ(nullptr, clContext); } TEST_F(clCreateContextTests, GivenDeviceNotAssociatedToPlatformInPropertiesWhenCreatingContextThenInvalidDeviceErrorIsReturned) { auto nonDefaultPlatform = std::make_unique(); nonDefaultPlatform->initializeWithNewDevices(); cl_device_id clDevice = platform()->getClDevice(0); cl_platform_id nonDefaultPlatformCl = nonDefaultPlatform.get(); cl_context_properties properties[3] = {CL_CONTEXT_PLATFORM, reinterpret_cast(nonDefaultPlatformCl), 0}; auto clContext = clCreateContext(properties, 1, &clDevice, nullptr, nullptr, &retVal); EXPECT_EQ(CL_INVALID_DEVICE, retVal); EXPECT_EQ(nullptr, clContext); } TEST_F(clCreateContextTests, GivenDevicesFromDifferentPlatformsWhenCreatingContextWithoutSpecifiedPlatformThenInvalidDeviceErrorIsReturned) { auto platform1 = std::make_unique(); auto platform2 = std::make_unique(); platform1->initializeWithNewDevices(); platform2->initializeWithNewDevices(); cl_device_id clDevices[] = {platform1->getClDevice(0), platform2->getClDevice(0)}; auto clContext = clCreateContext(nullptr, 2, clDevices, nullptr, nullptr, &retVal); EXPECT_EQ(CL_INVALID_DEVICE, retVal); EXPECT_EQ(nullptr, clContext); } TEST_F(clCreateContextTests, GivenDevicesFromDifferentPlatformsWhenCreatingContextWithSpecifiedPlatformThenInvalidDeviceErrorIsReturned) { auto platform1 = std::make_unique(); auto platform2 = std::make_unique(); platform1->initializeWithNewDevices(); platform2->initializeWithNewDevices(); cl_device_id clDevices[] = {platform1->getClDevice(0), platform2->getClDevice(0)}; cl_platform_id clPlatform = platform1.get(); cl_context_properties properties[3] = {CL_CONTEXT_PLATFORM, reinterpret_cast(clPlatform), 0}; auto clContext = clCreateContext(properties, 2, clDevices, nullptr, nullptr, &retVal); EXPECT_EQ(CL_INVALID_DEVICE, retVal); EXPECT_EQ(nullptr, clContext); } } // namespace ClCreateContextTests compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_create_image_tests.cpp000066400000000000000000001406331422164147700274200ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_info.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "opencl/source/context/context.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" #include "cl_api_tests.h" using namespace NEO; namespace ClCreateImageTests { template struct clCreateImageTests : public ApiFixture<>, public T { void SetUp() override { ApiFixture::SetUp(); // clang-format off imageFormat.image_channel_order = CL_RGBA; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 32; imageDesc.image_height = 32; imageDesc.image_depth = 1; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = nullptr; // clang-format on } void TearDown() override { ApiFixture::TearDown(); } cl_image_format imageFormat; cl_image_desc imageDesc; }; typedef clCreateImageTests<::testing::Test> clCreateImageTest; TEST_F(clCreateImageTest, GivenNullHostPtrWhenCreatingImageThenImageIsCreatedAndSuccessReturned) { REQUIRE_IMAGE_SUPPORT_OR_SKIP(pContext); auto image = clCreateImage( pContext, CL_MEM_READ_WRITE, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(clCreateImageTest, GivenDeviceThatDoesntSupportImagesWhenCreatingTiledImageThenInvalidOperationErrorIsReturned) { REQUIRE_IMAGE_SUPPORT_OR_SKIP(pContext); MockClDevice mockClDevice{MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get(), 0)}; MockContext mockContext{&mockClDevice}; mockClDevice.sharedDeviceInfo.imageSupport = CL_FALSE; cl_bool imageSupportInfo = CL_TRUE; auto status = clGetDeviceInfo(&mockClDevice, CL_DEVICE_IMAGE_SUPPORT, sizeof(imageSupportInfo), &imageSupportInfo, nullptr); EXPECT_EQ(CL_SUCCESS, status); cl_bool expectedValue = CL_FALSE; EXPECT_EQ(expectedValue, imageSupportInfo); auto image = clCreateImage( &mockContext, CL_MEM_READ_WRITE, &imageFormat, &imageDesc, nullptr, &retVal); if (UnitTestHelper::tiledImagesSupported) { EXPECT_EQ(CL_INVALID_OPERATION, retVal); EXPECT_EQ(nullptr, image); } else { EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } } HWTEST_F(clCreateImageTest, GivenDeviceThatDoesntSupportImagesWhenCreatingNonTiledImageThenCreate) { REQUIRE_IMAGE_SUPPORT_OR_SKIP(pContext); MockClDevice mockClDevice{MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get(), 0)}; MockContext mockContext{&mockClDevice}; mockClDevice.sharedDeviceInfo.imageSupport = CL_FALSE; cl_bool imageSupportInfo = CL_TRUE; auto status = clGetDeviceInfo(&mockClDevice, CL_DEVICE_IMAGE_SUPPORT, sizeof(imageSupportInfo), &imageSupportInfo, nullptr); EXPECT_EQ(CL_SUCCESS, status); cl_bool expectedValue = CL_FALSE; EXPECT_EQ(expectedValue, imageSupportInfo); imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_height = 1; auto image = clCreateImage( &mockContext, CL_MEM_READ_WRITE, &imageFormat, &imageDesc, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateImageTest, GivenDeviceThatDoesntSupportImagesWhenCreatingImageWithPropertiesINTELThenImageCreatedAndSuccessIsReturned) { DebugManagerStateRestore stateRestore; MockClDevice mockClDevice{MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get(), 0)}; MockContext mockContext{&mockClDevice}; mockClDevice.sharedDeviceInfo.imageSupport = CL_FALSE; cl_bool imageSupportInfo = CL_TRUE; auto status = clGetDeviceInfo(&mockClDevice, CL_DEVICE_IMAGE_SUPPORT, sizeof(imageSupportInfo), &imageSupportInfo, nullptr); EXPECT_EQ(CL_SUCCESS, status); cl_bool expectedValue = CL_FALSE; EXPECT_EQ(expectedValue, imageSupportInfo); imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_height = 1; DebugManager.flags.ForceLinearImages.set(true); auto image = clCreateImageWithPropertiesINTEL( &mockContext, nullptr, 0, &imageFormat, &imageDesc, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateImageTest, GivenDeviceThatDoesntSupportImagesWhenCreatingImagesWithPropertiesAndWithoutThenInvalidOperationErrorIsReturned) { auto hardwareInfo = *defaultHwInfo; hardwareInfo.capabilityTable.supportsImages = false; auto pClDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hardwareInfo, 0)); cl_device_id deviceId = pClDevice.get(); auto pContext = std::unique_ptr(Context::create(nullptr, ClDeviceVector(&deviceId, 1), nullptr, nullptr, retVal)); auto image = clCreateImage(pContext.get(), CL_MEM_READ_WRITE, &imageFormat, &imageDesc, nullptr, &retVal); EXPECT_EQ(CL_INVALID_OPERATION, retVal); EXPECT_EQ(nullptr, image); auto imageWithProperties = clCreateImageWithProperties(pContext.get(), nullptr, CL_MEM_READ_WRITE, &imageFormat, &imageDesc, nullptr, &retVal); EXPECT_EQ(CL_INVALID_OPERATION, retVal); EXPECT_EQ(nullptr, imageWithProperties); } TEST_F(clCreateImageTest, GivenNullContextWhenCreatingImageWithPropertiesThenInvalidContextErrorIsReturned) { auto image = clCreateImageWithProperties( nullptr, nullptr, CL_MEM_READ_WRITE, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_INVALID_CONTEXT, retVal); EXPECT_EQ(nullptr, image); } TEST_F(clCreateImageTest, GivenNonNullHostPtrAndAlignedRowPitchWhenCreatingImageThenImageIsCreatedAndSuccessReturned) { REQUIRE_IMAGE_SUPPORT_OR_SKIP(pContext); char hostPtr[4096]; imageDesc.image_row_pitch = 128; auto image = clCreateImage( pContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, &imageFormat, &imageDesc, hostPtr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateImageTest, GivenNonNullHostPtrAndUnalignedRowPitchWhenCreatingImageThenInvalidImageDescriptotErrorIsReturned) { REQUIRE_IMAGE_SUPPORT_OR_SKIP(pContext); char hostPtr[4096]; imageDesc.image_row_pitch = 129; auto image = clCreateImage( pContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, &imageFormat, &imageDesc, hostPtr, &retVal); ASSERT_EQ(CL_INVALID_IMAGE_DESCRIPTOR, retVal); EXPECT_EQ(nullptr, image); } TEST_F(clCreateImageTest, GivenNonNullHostPtrAndSmallRowPitchWhenCreatingImageThenInvalidImageDescriptorErrorIsReturned) { REQUIRE_IMAGE_SUPPORT_OR_SKIP(pContext); char hostPtr[4096]; imageDesc.image_row_pitch = 4; auto image = clCreateImage( pContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, &imageFormat, &imageDesc, hostPtr, &retVal); ASSERT_EQ(CL_INVALID_IMAGE_DESCRIPTOR, retVal); EXPECT_EQ(nullptr, image); } TEST_F(clCreateImageTest, GivenUnrestrictedIntelFlagWhenCreatingImageWithInvalidFlagCombinationThenImageIsCreatedAndSuccessReturned) { REQUIRE_IMAGE_SUPPORT_OR_SKIP(pContext); cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL; auto image = clCreateImage( pContext, flags, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateImageTest, GivenNotNullHostPtrAndNoHostPtrFlagWhenCreatingImageThenInvalidHostPtrErrorIsReturned) { REQUIRE_IMAGE_SUPPORT_OR_SKIP(pContext); char hostPtr[4096]; auto image = clCreateImage( pContext, CL_MEM_READ_WRITE, &imageFormat, &imageDesc, hostPtr, &retVal); ASSERT_EQ(CL_INVALID_HOST_PTR, retVal); EXPECT_EQ(nullptr, image); } TEST_F(clCreateImageTest, GivenInvalidFlagBitsWhenCreatingImageThenInvalidValueErrorIsReturned) { REQUIRE_IMAGE_SUPPORT_OR_SKIP(pContext); cl_mem_flags flags = (1 << 12); auto image = clCreateImage( pContext, flags, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(nullptr, image); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(clCreateImageTest, GivenInvalidFlagBitsWhenCreatingImageFromAnotherImageThenInvalidValueErrorIsReturned) { REQUIRE_IMAGE_SUPPORT_OR_SKIP(pContext); imageFormat.image_channel_order = CL_NV12_INTEL; auto image = clCreateImage( pContext, CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); imageFormat.image_channel_order = CL_RG; imageDesc.mem_object = image; cl_mem_flags flags = (1 << 30); auto imageFromImageObject = clCreateImage( pContext, flags, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(nullptr, imageFromImageObject); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(imageFromImageObject); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(clCreateImageTest, GivenInvalidRowPitchWhenCreatingImageThenInvalidImageDescriptorErrorIsReturned) { REQUIRE_IMAGE_SUPPORT_OR_SKIP(pContext); imageDesc.image_row_pitch = 655; auto image = clCreateImage( pContext, CL_MEM_READ_WRITE, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_INVALID_IMAGE_DESCRIPTOR, retVal); EXPECT_EQ(nullptr, image); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(clCreateImageTest, GivenNullHostPtrAndCopyHostPtrFlagWhenCreatingImageThenInvalidHostPtrErrorIsReturned) { REQUIRE_IMAGE_SUPPORT_OR_SKIP(pContext); auto image = clCreateImage( pContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_INVALID_HOST_PTR, retVal); EXPECT_EQ(nullptr, image); } TEST_F(clCreateImageTest, GivenNullHostPtrAndMemUseHostPtrFlagWhenCreatingImageThenInvalidHostPtrErrorIsReturned) { REQUIRE_IMAGE_SUPPORT_OR_SKIP(pContext); auto image = clCreateImage( pContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_INVALID_HOST_PTR, retVal); EXPECT_EQ(nullptr, image); } TEST_F(clCreateImageTest, GivenNullHostPtrAndNonZeroRowPitchWhenCreatingImageThenInvalidImageDescriptorErrorIsReturned) { REQUIRE_IMAGE_SUPPORT_OR_SKIP(pContext); imageDesc.image_row_pitch = 4; auto image = clCreateImage( pContext, CL_MEM_READ_WRITE, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_INVALID_IMAGE_DESCRIPTOR, retVal); EXPECT_EQ(nullptr, image); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(clCreateImageTest, GivenDeviceNotSupportingImagesWhenCreatingImageFromBufferThenInvalidOperationErrorIsReturned) { auto hardwareInfo = *defaultHwInfo; hardwareInfo.capabilityTable.supportsImages = false; auto pClDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hardwareInfo, 0)); cl_device_id deviceId = pClDevice.get(); auto pContext = std::unique_ptr(Context::create(nullptr, ClDeviceVector(&deviceId, 1), nullptr, nullptr, retVal)); auto buffer = clCreateBuffer(pContext.get(), CL_MEM_READ_WRITE, 4096 * 9, nullptr, nullptr); imageDesc.mem_object = buffer; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; auto image = clCreateImageWithPropertiesINTEL( pContext.get(), nullptr, 0, &imageFormat, &imageDesc, nullptr, &retVal); EXPECT_EQ(nullptr, image); EXPECT_EQ(CL_INVALID_OPERATION, retVal); clReleaseMemObject(buffer); } TEST_F(clCreateImageTest, GivenNonZeroPitchWhenCreatingImageFromBufferThenImageIsCreatedAndSuccessReturned) { REQUIRE_IMAGES_OR_SKIP(pContext); auto buffer = clCreateBuffer(pContext, CL_MEM_READ_WRITE, 4096 * 9, nullptr, nullptr); auto &helper = HwHelper::get(renderCoreFamily); HardwareInfo hardwareInfo = *defaultHwInfo; imageDesc.mem_object = buffer; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 17; imageDesc.image_height = 17; imageDesc.image_row_pitch = helper.getPitchAlignmentForImage(&hardwareInfo) * 17; auto image = clCreateImage( pContext, CL_MEM_READ_WRITE, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_NE(CL_INVALID_IMAGE_DESCRIPTOR, retVal); EXPECT_NE(nullptr, image); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateImageTest, GivenNotNullHostPtrAndRowPitchIsNotGreaterThanWidthTimesElementSizeWhenCreatingImageThenInvalidImageDescriptorErrorIsReturned) { REQUIRE_IMAGE_SUPPORT_OR_SKIP(pContext); imageDesc.image_row_pitch = 64; auto image = clCreateImage( pContext, CL_MEM_READ_WRITE, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_INVALID_IMAGE_DESCRIPTOR, retVal); EXPECT_EQ(nullptr, image); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(clCreateImageTest, GivenNullContextWhenCreatingImageThenInvalidContextErrorIsReturned) { auto image = clCreateImage( nullptr, CL_MEM_READ_WRITE, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_INVALID_CONTEXT, retVal); EXPECT_EQ(nullptr, image); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(clCreateImageTest, WhenCreatingImageWithPropertiesThenParametersAreCorrectlyPassed) { VariableBackup imageCreateBackup{&ImageFunctions::validateAndCreateImage}; cl_context context = pContext; cl_mem_properties *propertiesValues[] = {nullptr, reinterpret_cast(0x1234)}; cl_mem_flags flagsValues[] = {0, 4321}; cl_image_format imageFormat = this->imageFormat; cl_image_desc imageDesc = this->imageDesc; void *pHostMem = reinterpret_cast(0x8000); for (auto properties : propertiesValues) { for (auto flags : flagsValues) { auto mockFunction = [context, properties, flags, &imageFormat, &imageDesc, pHostMem](cl_context contextArg, const cl_mem_properties *propertiesArg, cl_mem_flags flagsArg, cl_mem_flags_intel flagsIntelArg, const cl_image_format *imageFormatArg, const cl_image_desc *imageDescArg, const void *hostPtrArg, cl_int &errcodeRetArg) -> cl_mem { cl_mem_flags_intel expectedFlagsIntelArg = 0; EXPECT_EQ(context, contextArg); EXPECT_EQ(properties, propertiesArg); EXPECT_EQ(flags, flagsArg); EXPECT_EQ(expectedFlagsIntelArg, flagsIntelArg); EXPECT_EQ(&imageFormat, imageFormatArg); EXPECT_EQ(&imageDesc, imageDescArg); EXPECT_EQ(pHostMem, hostPtrArg); return nullptr; }; imageCreateBackup = mockFunction; clCreateImageWithProperties(context, properties, flags, &imageFormat, &imageDesc, pHostMem, nullptr); } } } TEST_F(clCreateImageTest, WhenCreatingImageWithPropertiesThenErrorCodeIsCorrectlySet) { REQUIRE_IMAGE_SUPPORT_OR_SKIP(pContext); VariableBackup imageCreateBackup{&ImageFunctions::validateAndCreateImage}; cl_mem_properties *properties = nullptr; cl_mem_flags flags = 0; void *pHostMem = nullptr; cl_int errcodeRet; cl_int retValues[] = {CL_SUCCESS, CL_INVALID_PROPERTY}; for (auto retValue : retValues) { auto mockFunction = [retValue](cl_context contextArg, const cl_mem_properties *propertiesArg, cl_mem_flags flagsArg, cl_mem_flags_intel flagsIntelArg, const cl_image_format *imageFormatArg, const cl_image_desc *imageDescArg, const void *hostPtrArg, cl_int &errcodeRetArg) -> cl_mem { errcodeRetArg = retValue; return nullptr; }; imageCreateBackup = mockFunction; clCreateImageWithProperties(pContext, properties, flags, &imageFormat, &imageDesc, pHostMem, &errcodeRet); EXPECT_EQ(retValue, errcodeRet); } } TEST_F(clCreateImageTest, GivenImageCreatedWithNullPropertiesWhenQueryingPropertiesThenNothingIsReturned) { REQUIRE_IMAGE_SUPPORT_OR_SKIP(pContext); cl_int retVal = CL_SUCCESS; auto image = clCreateImageWithProperties(pContext, nullptr, 0, &imageFormat, &imageDesc, nullptr, &retVal); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_NE(nullptr, image); size_t propertiesSize; retVal = clGetMemObjectInfo(image, CL_MEM_PROPERTIES, 0, nullptr, &propertiesSize); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(0u, propertiesSize); clReleaseMemObject(image); } TEST_F(clCreateImageTest, WhenCreatingImageWithPropertiesThenPropertiesAreCorrectlyStored) { REQUIRE_IMAGE_SUPPORT_OR_SKIP(pContext); cl_int retVal = CL_SUCCESS; cl_mem_properties properties[5]; size_t propertiesSize; std::vector> propertiesToTest{ {0}, {CL_MEM_FLAGS, CL_MEM_WRITE_ONLY, 0}, {CL_MEM_FLAGS_INTEL, CL_MEM_LOCALLY_UNCACHED_RESOURCE, 0}, {CL_MEM_FLAGS, CL_MEM_WRITE_ONLY, CL_MEM_FLAGS_INTEL, CL_MEM_LOCALLY_UNCACHED_RESOURCE, 0}}; for (auto testProperties : propertiesToTest) { auto image = clCreateImageWithProperties(pContext, testProperties.data(), 0, &imageFormat, &imageDesc, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); retVal = clGetMemObjectInfo(image, CL_MEM_PROPERTIES, sizeof(properties), properties, &propertiesSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(testProperties.size() * sizeof(cl_mem_properties), propertiesSize); for (size_t i = 0; i < testProperties.size(); i++) { EXPECT_EQ(testProperties[i], properties[i]); } clReleaseMemObject(image); } } typedef clCreateImageTests<::testing::Test> clCreateImageTestYUV; TEST_F(clCreateImageTestYUV, GivenInvalidGlagWhenCreatingYuvImageThenInvalidValueErrorIsReturned) { REQUIRE_IMAGE_SUPPORT_OR_SKIP(pContext); imageFormat.image_channel_order = CL_YUYV_INTEL; auto image = clCreateImage( pContext, CL_MEM_READ_WRITE, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(nullptr, image); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(clCreateImageTestYUV, Given1DImageTypeWhenCreatingYuvImageThenInvalidImageDescriptorErrorIsReturned) { REQUIRE_IMAGE_SUPPORT_OR_SKIP(pContext); imageFormat.image_channel_order = CL_YUYV_INTEL; imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; auto image = clCreateImage( pContext, CL_MEM_READ_ONLY, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_INVALID_IMAGE_DESCRIPTOR, retVal); EXPECT_EQ(nullptr, image); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } typedef clCreateImageTests<::testing::TestWithParam> clCreateImageValidFlags; static cl_mem_flags validFlags[] = { CL_MEM_READ_WRITE, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY, CL_MEM_USE_HOST_PTR, CL_MEM_ALLOC_HOST_PTR, CL_MEM_COPY_HOST_PTR, CL_MEM_HOST_WRITE_ONLY, CL_MEM_HOST_READ_ONLY, CL_MEM_HOST_NO_ACCESS, CL_MEM_NO_ACCESS_INTEL, CL_MEM_FORCE_LINEAR_STORAGE_INTEL, CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL, }; INSTANTIATE_TEST_CASE_P(CreateImageWithFlags, clCreateImageValidFlags, ::testing::ValuesIn(validFlags)); TEST_P(clCreateImageValidFlags, GivenValidFlagsWhenCreatingImageThenImageIsCreatedAndSuccessReturned) { REQUIRE_IMAGE_SUPPORT_OR_SKIP(pContext); cl_mem_flags flags = GetParam(); std::unique_ptr ptr; char *hostPtr = nullptr; if (flags & CL_MEM_USE_HOST_PTR || flags & CL_MEM_COPY_HOST_PTR) { ptr = std::make_unique(alignUp(imageDesc.image_width * imageDesc.image_height * 4, MemoryConstants::pageSize)); hostPtr = ptr.get(); } auto image = clCreateImage( pContext, flags, &imageFormat, &imageDesc, hostPtr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } typedef clCreateImageTests<::testing::TestWithParam> clCreateImageInvalidFlags; static cl_mem_flags invalidFlagsCombinations[] = { CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY, CL_MEM_READ_WRITE | CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY, CL_MEM_ALLOC_HOST_PTR | CL_MEM_USE_HOST_PTR, CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR, CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY, CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS, CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS, CL_MEM_NO_ACCESS_INTEL | CL_MEM_READ_WRITE, CL_MEM_NO_ACCESS_INTEL | CL_MEM_WRITE_ONLY, CL_MEM_NO_ACCESS_INTEL | CL_MEM_READ_ONLY}; INSTANTIATE_TEST_CASE_P(CreateImageWithFlags, clCreateImageInvalidFlags, ::testing::ValuesIn(invalidFlagsCombinations)); TEST_P(clCreateImageInvalidFlags, GivenInvalidFlagsCombinationsWhenCreatingImageThenInvalidValueErrorIsReturned) { REQUIRE_IMAGE_SUPPORT_OR_SKIP(pContext); char ptr[10]; imageDesc.image_row_pitch = 128; cl_mem_flags flags = GetParam(); auto image = clCreateImage( pContext, flags, &imageFormat, &imageDesc, ptr, &retVal); ASSERT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(nullptr, image); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); cl_mem_properties_intel properties[] = {CL_MEM_FLAGS, flags, 0}; image = clCreateImageWithPropertiesINTEL( pContext, properties, 0, &imageFormat, &imageDesc, ptr, &retVal); ASSERT_EQ(CL_INVALID_PROPERTY, retVal); EXPECT_EQ(nullptr, image); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); image = clCreateImageWithPropertiesINTEL( pContext, nullptr, flags, &imageFormat, &imageDesc, ptr, &retVal); ASSERT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(nullptr, image); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } struct ImageFlags { cl_mem_flags parentFlags; cl_mem_flags flags; }; static ImageFlags flagsWithUnrestrictedIntel[] = { {CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL, CL_MEM_READ_WRITE}, {CL_MEM_READ_WRITE, CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL}}; typedef clCreateImageTests<::testing::TestWithParam> clCreateImageFlagsUnrestrictedIntel; INSTANTIATE_TEST_CASE_P(CreateImageWithFlags, clCreateImageFlagsUnrestrictedIntel, ::testing::ValuesIn(flagsWithUnrestrictedIntel)); TEST_P(clCreateImageFlagsUnrestrictedIntel, GivenFlagsIncludingUnrestrictedIntelWhenCreatingImageThenImageIsCreatedAndSuccessReturned) { REQUIRE_IMAGES_OR_SKIP(pContext); imageFormat.image_channel_order = CL_NV12_INTEL; ImageFlags imageFlags = GetParam(); cl_mem_flags parentFlags = imageFlags.parentFlags; cl_mem_flags flags = imageFlags.flags; auto image = clCreateImage( pContext, parentFlags | CL_MEM_HOST_NO_ACCESS, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); imageDesc.mem_object = image; imageFormat.image_channel_order = CL_RG; auto imageFromImageObject = clCreateImage( pContext, flags, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, imageFromImageObject); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(imageFromImageObject); EXPECT_EQ(CL_SUCCESS, retVal); } static ImageFlags validFlagsAndParentFlags[] = { {CL_MEM_WRITE_ONLY, CL_MEM_HOST_NO_ACCESS}, {CL_MEM_READ_ONLY, CL_MEM_HOST_NO_ACCESS}, {CL_MEM_NO_ACCESS_INTEL, CL_MEM_HOST_NO_ACCESS}, {CL_MEM_HOST_NO_ACCESS, CL_MEM_READ_WRITE}}; typedef clCreateImageTests<::testing::TestWithParam> clCreateImageValidFlagsAndParentFlagsCombinations; INSTANTIATE_TEST_CASE_P(CreateImageWithFlags, clCreateImageValidFlagsAndParentFlagsCombinations, ::testing::ValuesIn(validFlagsAndParentFlags)); TEST_P(clCreateImageValidFlagsAndParentFlagsCombinations, GivenValidFlagsAndParentFlagsWhenCreatingImageThenImageIsCreatedAndSuccessReturned) { REQUIRE_IMAGES_OR_SKIP(pContext); imageFormat.image_channel_order = CL_NV12_INTEL; ImageFlags imageFlags = GetParam(); cl_mem_flags parentFlags = imageFlags.parentFlags; cl_mem_flags flags = imageFlags.flags; auto image = clCreateImage( pContext, parentFlags | CL_MEM_HOST_NO_ACCESS, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); imageDesc.mem_object = image; imageFormat.image_channel_order = CL_RG; auto imageFromImageObject = clCreateImage( pContext, flags, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, imageFromImageObject); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(imageFromImageObject); EXPECT_EQ(CL_SUCCESS, retVal); } static ImageFlags invalidFlagsAndParentFlags[] = { {CL_MEM_WRITE_ONLY, CL_MEM_READ_WRITE}, {CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY}, {CL_MEM_READ_ONLY, CL_MEM_READ_WRITE}, {CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY}, {CL_MEM_NO_ACCESS_INTEL, CL_MEM_READ_WRITE}, {CL_MEM_NO_ACCESS_INTEL, CL_MEM_WRITE_ONLY}, {CL_MEM_NO_ACCESS_INTEL, CL_MEM_READ_ONLY}, {CL_MEM_HOST_NO_ACCESS, CL_MEM_HOST_WRITE_ONLY}, {CL_MEM_HOST_NO_ACCESS, CL_MEM_HOST_READ_ONLY}}; typedef clCreateImageTests<::testing::TestWithParam> clCreateImageInvalidFlagsAndParentFlagsCombinations; INSTANTIATE_TEST_CASE_P(CreateImageWithFlags, clCreateImageInvalidFlagsAndParentFlagsCombinations, ::testing::ValuesIn(invalidFlagsAndParentFlags)); TEST_P(clCreateImageInvalidFlagsAndParentFlagsCombinations, GivenInvalidFlagsAndParentFlagsWhenCreatingImageThenInvalidMemObjectErrorIsReturned) { REQUIRE_IMAGE_SUPPORT_OR_SKIP(pContext); imageFormat.image_channel_order = CL_NV12_INTEL; ImageFlags imageFlags = GetParam(); cl_mem_flags parentFlags = imageFlags.parentFlags; cl_mem_flags flags = imageFlags.flags; auto image = clCreateImage( pContext, parentFlags | CL_MEM_HOST_NO_ACCESS, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); imageFormat.image_channel_order = CL_RG; imageDesc.mem_object = image; auto imageFromImageObject = clCreateImage( pContext, flags, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(nullptr, imageFromImageObject); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(imageFromImageObject); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } struct ImageSizes { size_t width; size_t height; size_t depth; }; ImageSizes validImage2DSizes[] = {{64, 64, 1}, {3, 3, 1}, {8192, 1, 1}, {117, 39, 1}, {16384, 4, 1}, {4, 16384, 1}}; typedef clCreateImageTests<::testing::TestWithParam> clCreateImageValidSizesTest; INSTANTIATE_TEST_CASE_P(validImage2DSizes, clCreateImageValidSizesTest, ::testing::ValuesIn(validImage2DSizes)); TEST_P(clCreateImageValidSizesTest, GivenValidSizesWhenCreatingImageThenImageIsCreatedAndSuccessReturned) { REQUIRE_IMAGE_SUPPORT_OR_SKIP(pContext); ImageSizes sizes = GetParam(); imageDesc.image_width = sizes.width; imageDesc.image_height = sizes.height; imageDesc.image_depth = sizes.depth; auto image = clCreateImage( pContext, CL_MEM_READ_WRITE, &imageFormat, &imageDesc, nullptr, &retVal); EXPECT_NE(nullptr, image); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(image); } typedef clCreateImageTests<::testing::Test> clCreateImage2DTest; TEST_F(clCreateImage2DTest, GivenValidParametersWhenCreating2DImageThenImageIsCreatedAndSuccessReturned) { REQUIRE_IMAGE_SUPPORT_OR_SKIP(pContext); auto image = clCreateImage2D( pContext, CL_MEM_READ_WRITE, &imageFormat, 10, 10, 0, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateImage2DTest, GivenNoPtrToReturnValueWhenCreating2DImageThenImageIsCreated) { REQUIRE_IMAGE_SUPPORT_OR_SKIP(pContext); auto image = clCreateImage2D( pContext, CL_MEM_READ_WRITE, &imageFormat, 10, 10, 0, 0, nullptr); EXPECT_NE(nullptr, image); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateImage2DTest, GivenInvalidContextsWhenCreating2DImageThenInvalidContextErrorIsReturned) { auto image = clCreateImage2D( nullptr, CL_MEM_READ_WRITE, &imageFormat, 10, 10, 0, 0, &retVal); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); EXPECT_EQ(nullptr, image); } TEST_F(clCreateImage2DTest, GivenDeviceThatDoesntSupportImagesWhenCreatingImagesWithclCreateImage2DThenInvalidOperationErrorIsReturned) { auto hardwareInfo = *defaultHwInfo; hardwareInfo.capabilityTable.supportsImages = false; auto pClDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hardwareInfo, 0)); cl_device_id deviceId = pClDevice.get(); auto pContext = std::unique_ptr(Context::create(nullptr, ClDeviceVector(&deviceId, 1), nullptr, nullptr, retVal)); auto image = clCreateImage2D( pContext.get(), CL_MEM_READ_WRITE, &imageFormat, 10, 10, 0, 0, &retVal); ASSERT_EQ(nullptr, image); ASSERT_EQ(CL_INVALID_OPERATION, retVal); } typedef clCreateImageTests<::testing::Test> clCreateImage3DTest; TEST_F(clCreateImage3DTest, GivenValidParametersWhenCreating3DImageThenImageIsCreatedAndSuccessReturned) { REQUIRE_IMAGE_SUPPORT_OR_SKIP(pContext); auto image = clCreateImage3D( pContext, CL_MEM_READ_WRITE, &imageFormat, 10, 10, 1, 0, 0, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateImage3DTest, GivenNoPtrToReturnValueWhenCreating3DImageThenImageIsCreated) { REQUIRE_IMAGE_SUPPORT_OR_SKIP(pContext); auto image = clCreateImage3D( pContext, CL_MEM_READ_WRITE, &imageFormat, 10, 10, 1, 0, 0, 0, nullptr); EXPECT_NE(nullptr, image); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateImage3DTest, GivenInvalidContextsWhenCreating3DImageThenInvalidContextErrorIsReturned) { auto image = clCreateImage3D( nullptr, CL_MEM_READ_WRITE, &imageFormat, 10, 10, 1, 0, 0, 0, &retVal); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); EXPECT_EQ(nullptr, image); } TEST_F(clCreateImage3DTest, GivenDeviceThatDoesntSupportImagesWhenCreatingImagesWithclCreateImage3DThenInvalidOperationErrorIsReturned) { auto hardwareInfo = *defaultHwInfo; hardwareInfo.capabilityTable.supportsImages = false; auto pClDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hardwareInfo, 0)); cl_device_id deviceId = pClDevice.get(); auto pContext = std::unique_ptr(Context::create(nullptr, ClDeviceVector(&deviceId, 1), nullptr, nullptr, retVal)); auto image = clCreateImage3D( pContext.get(), CL_MEM_READ_WRITE, &imageFormat, 10, 10, 1, 0, 0, 0, &retVal); ASSERT_EQ(nullptr, image); ASSERT_EQ(CL_INVALID_OPERATION, retVal); } using clCreateImageWithPropertiesINTELTest = clCreateImageTest; TEST_F(clCreateImageWithPropertiesINTELTest, GivenInvalidContextWhenCreatingImageWithPropertiesThenInvalidContextErrorIsReturned) { auto image = clCreateImageWithPropertiesINTEL( nullptr, nullptr, 0, nullptr, nullptr, nullptr, &retVal); ASSERT_EQ(CL_INVALID_CONTEXT, retVal); EXPECT_EQ(nullptr, image); } TEST_F(clCreateImageWithPropertiesINTELTest, GivenValidParametersWhenCreatingImageWithPropertiesThenImageIsCreatedAndSuccessReturned) { cl_mem_properties_intel properties[] = {CL_MEM_FLAGS, CL_MEM_READ_WRITE, 0}; auto image = clCreateImageWithPropertiesINTEL( pContext, properties, 0, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); image = clCreateImageWithPropertiesINTEL( pContext, nullptr, CL_MEM_READ_WRITE, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateImageWithPropertiesINTELTest, GivenInvalidPropertyKeyWhenCreatingImageWithPropertiesThenInvalidValueErrorIsReturned) { cl_mem_properties_intel properties[] = {(cl_mem_properties_intel(1) << 31), 0, 0}; auto image = clCreateImageWithPropertiesINTEL( pContext, properties, 0, &imageFormat, &imageDesc, nullptr, &retVal); EXPECT_EQ(nullptr, image); EXPECT_EQ(CL_INVALID_PROPERTY, retVal); } typedef clCreateImageTests<::testing::Test> clCreateImageFromImageTest; TEST_F(clCreateImageFromImageTest, GivenImage2dWhenCreatingImage2dFromImageWithTheSameDescriptorAndValidFormatThenImageIsCreatedAndSuccessReturned) { REQUIRE_IMAGES_OR_SKIP(pContext); imageFormat.image_channel_order = CL_BGRA; auto image = clCreateImage( pContext, CL_MEM_READ_ONLY, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); retVal = clGetImageInfo(image, CL_IMAGE_WIDTH, sizeof(imageDesc.image_width), &imageDesc.image_width, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetImageInfo(image, CL_IMAGE_WIDTH, sizeof(imageDesc.image_height), &imageDesc.image_height, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetImageInfo(image, CL_IMAGE_DEPTH, sizeof(imageDesc.image_depth), &imageDesc.image_depth, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetImageInfo(image, CL_IMAGE_ROW_PITCH, sizeof(imageDesc.image_row_pitch), &imageDesc.image_row_pitch, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetImageInfo(image, CL_IMAGE_SLICE_PITCH, sizeof(imageDesc.image_slice_pitch), &imageDesc.image_slice_pitch, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetImageInfo(image, CL_IMAGE_NUM_MIP_LEVELS, sizeof(imageDesc.num_mip_levels), &imageDesc.num_mip_levels, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetImageInfo(image, CL_IMAGE_NUM_SAMPLES, sizeof(imageDesc.num_samples), &imageDesc.num_samples, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetImageInfo(image, CL_IMAGE_ARRAY_SIZE, sizeof(imageDesc.image_array_size), &imageDesc.image_array_size, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); imageDesc.mem_object = image; imageFormat.image_channel_order = CL_sBGRA; auto imageFromImageObject = clCreateImage( pContext, CL_MEM_READ_ONLY, &imageFormat, &imageDesc, nullptr, &retVal); if (pContext->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features == false) { EXPECT_EQ(CL_IMAGE_FORMAT_NOT_SUPPORTED, retVal); EXPECT_EQ(nullptr, imageFromImageObject); } else { EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, imageFromImageObject); retVal = clReleaseMemObject(imageFromImageObject); EXPECT_EQ(CL_SUCCESS, retVal); } retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateImageFromImageTest, GivenImage2dWhenCreatingImage2dFromImageWithDifferentDescriptorAndValidFormatThenInvalidImageFormatDescriptorErrorIsReturned) { REQUIRE_IMAGES_OR_SKIP(pContext); imageFormat.image_channel_order = CL_BGRA; auto image = clCreateImage( pContext, CL_MEM_READ_ONLY, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); retVal = clGetImageInfo(image, CL_IMAGE_WIDTH, sizeof(imageDesc.image_width), &imageDesc.image_width, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetImageInfo(image, CL_IMAGE_WIDTH, sizeof(imageDesc.image_height), &imageDesc.image_height, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetImageInfo(image, CL_IMAGE_DEPTH, sizeof(imageDesc.image_depth), &imageDesc.image_depth, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetImageInfo(image, CL_IMAGE_ROW_PITCH, sizeof(imageDesc.image_row_pitch), &imageDesc.image_row_pitch, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetImageInfo(image, CL_IMAGE_SLICE_PITCH, sizeof(imageDesc.image_slice_pitch), &imageDesc.image_slice_pitch, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetImageInfo(image, CL_IMAGE_NUM_MIP_LEVELS, sizeof(imageDesc.num_mip_levels), &imageDesc.num_mip_levels, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetImageInfo(image, CL_IMAGE_NUM_SAMPLES, sizeof(imageDesc.num_samples), &imageDesc.num_samples, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetImageInfo(image, CL_IMAGE_ARRAY_SIZE, sizeof(imageDesc.image_array_size), &imageDesc.image_array_size, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); imageDesc.mem_object = image; imageDesc.image_width++; imageFormat.image_channel_order = CL_sBGRA; auto imageFromImageObject = clCreateImage( pContext, CL_MEM_READ_ONLY, &imageFormat, &imageDesc, nullptr, &retVal); if (pContext->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features) { EXPECT_EQ(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, retVal); } else { EXPECT_EQ(CL_IMAGE_FORMAT_NOT_SUPPORTED, retVal); } EXPECT_EQ(nullptr, imageFromImageObject); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateImageFromImageTest, GivenImage2dWhenCreatingImage2dFromImageWithTheSameDescriptorAndNotValidFormatThenInvalidImageFormatDescriptorErrorIsReturned) { REQUIRE_IMAGES_OR_SKIP(pContext); imageFormat.image_channel_order = CL_BGRA; auto image = clCreateImage( pContext, CL_MEM_READ_ONLY, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); retVal = clGetImageInfo(image, CL_IMAGE_WIDTH, sizeof(imageDesc.image_width), &imageDesc.image_width, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetImageInfo(image, CL_IMAGE_WIDTH, sizeof(imageDesc.image_height), &imageDesc.image_height, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetImageInfo(image, CL_IMAGE_DEPTH, sizeof(imageDesc.image_depth), &imageDesc.image_depth, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetImageInfo(image, CL_IMAGE_ROW_PITCH, sizeof(imageDesc.image_row_pitch), &imageDesc.image_row_pitch, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetImageInfo(image, CL_IMAGE_SLICE_PITCH, sizeof(imageDesc.image_slice_pitch), &imageDesc.image_slice_pitch, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetImageInfo(image, CL_IMAGE_NUM_MIP_LEVELS, sizeof(imageDesc.num_mip_levels), &imageDesc.num_mip_levels, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetImageInfo(image, CL_IMAGE_NUM_SAMPLES, sizeof(imageDesc.num_samples), &imageDesc.num_samples, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetImageInfo(image, CL_IMAGE_ARRAY_SIZE, sizeof(imageDesc.image_array_size), &imageDesc.image_array_size, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); imageDesc.mem_object = image; imageFormat.image_channel_order = CL_BGRA; auto imageFromImageObject = clCreateImage( pContext, CL_MEM_READ_ONLY, &imageFormat, &imageDesc, nullptr, &retVal); EXPECT_EQ(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, retVal); EXPECT_EQ(nullptr, imageFromImageObject); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } uint32_t non2dImageTypes[] = {CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE1D_BUFFER, CL_MEM_OBJECT_IMAGE2D_ARRAY, CL_MEM_OBJECT_IMAGE3D}; struct clCreateNon2dImageFromImageTest : public clCreateImageFromImageTest, public ::testing::WithParamInterface { void SetUp() override { clCreateImageFromImageTest::SetUp(); image = ImageFunctions::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_ONLY, 0, &imageFormat, &imageDesc, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); imageDesc.mem_object = image; } void TearDown() override { retVal = clReleaseMemObject(image); clCreateImageFromImageTest::TearDown(); } cl_mem image; }; TEST_P(clCreateNon2dImageFromImageTest, GivenImage2dWhenCreatingImageFromNon2dImageThenInvalidImageDescriptorErrorIsReturned) { REQUIRE_IMAGE_SUPPORT_OR_SKIP(pContext); imageDesc.image_type = GetParam(); auto imageFromImageObject = clCreateImage( pContext, CL_MEM_READ_ONLY, &imageFormat, &imageDesc, nullptr, &retVal); EXPECT_EQ(CL_INVALID_IMAGE_DESCRIPTOR, retVal); EXPECT_EQ(nullptr, imageFromImageObject); } INSTANTIATE_TEST_CASE_P(clCreateNon2dImageFromImageTests, clCreateNon2dImageFromImageTest, ::testing::ValuesIn(non2dImageTypes)); using clCreateImageWithMultiDeviceContextTests = MultiRootDeviceFixture; TEST_F(clCreateImageWithMultiDeviceContextTests, GivenImageCreatedWithoutHostPtrAndWithContextdWithMultiDeviceThenGraphicsAllocationsAreProperlyCreatedAndMapPtrIsNotSet) { REQUIRE_IMAGES_OR_SKIP(defaultHwInfo); std::unique_ptr image(ImageHelper::create(context.get())); EXPECT_EQ(image->getMultiGraphicsAllocation().getGraphicsAllocations().size(), 3u); EXPECT_NE(image->getMultiGraphicsAllocation().getGraphicsAllocation(1u), nullptr); EXPECT_NE(image->getMultiGraphicsAllocation().getGraphicsAllocation(2u), nullptr); EXPECT_NE(image->getMultiGraphicsAllocation().getGraphicsAllocation(1u), image->getMultiGraphicsAllocation().getGraphicsAllocation(2u)); EXPECT_EQ(image->getAllocatedMapPtr(), nullptr); EXPECT_TRUE(image->getMultiGraphicsAllocation().requiresMigrations()); } TEST_F(clCreateImageWithMultiDeviceContextTests, GivenImageCreatedWithHostPtrAndWithContextdWithMultiDeviceThenGraphicsAllocationsAreProperlyCreatedAndMapPtrIsNotSet) { REQUIRE_IMAGES_OR_SKIP(defaultHwInfo); DebugManagerStateRestore dbgRestore; std::unique_ptr image(ImageHelper>::create(context.get())); EXPECT_EQ(image->getMultiGraphicsAllocation().getGraphicsAllocations().size(), 3u); EXPECT_NE(image->getMultiGraphicsAllocation().getGraphicsAllocation(1u), nullptr); EXPECT_NE(image->getMultiGraphicsAllocation().getGraphicsAllocation(2u), nullptr); EXPECT_NE(image->getMultiGraphicsAllocation().getGraphicsAllocation(1u), image->getMultiGraphicsAllocation().getGraphicsAllocation(2u)); EXPECT_TRUE(image->getMultiGraphicsAllocation().requiresMigrations()); EXPECT_EQ(image->getAllocatedMapPtr(), nullptr); } TEST_F(clCreateImageWithMultiDeviceContextTests, GivenContextdWithMultiDeviceFailingAllocationThenImageAllocateFails) { REQUIRE_IMAGES_OR_SKIP(defaultHwInfo); { static_cast(context.get()->getMemoryManager())->successAllocatedGraphicsMemoryIndex = 0u; static_cast(context.get()->getMemoryManager())->maxSuccessAllocatedGraphicsMemoryIndex = 0u; std::unique_ptr image(ImageHelper::create(context.get())); EXPECT_EQ(nullptr, image); } { static_cast(context.get()->getMemoryManager())->successAllocatedGraphicsMemoryIndex = 0u; static_cast(context.get()->getMemoryManager())->maxSuccessAllocatedGraphicsMemoryIndex = 1u; std::unique_ptr image(ImageHelper::create(context.get())); EXPECT_EQ(nullptr, image); } } } // namespace ClCreateImageTests compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_create_kernel_tests.inl000066400000000000000000000116041422164147700276110ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/file_io.h" #include "shared/source/program/kernel_info.h" #include "shared/test/common/helpers/test_files.h" #include "opencl/source/context/context.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clCreateKernelTests; namespace ULT { TEST_F(clCreateKernelTests, GivenCorrectKernelInProgramWhenCreatingNewKernelThenKernelIsCreatedAndSuccessIsReturned) { cl_kernel kernel = nullptr; cl_program pProgram = nullptr; cl_int binaryStatus = CL_SUCCESS; size_t binarySize = 0; std::string testFile; retrieveBinaryKernelFilename(testFile, "CopyBuffer_simd16_", ".bin"); auto pBinary = loadDataFromFile( testFile.c_str(), binarySize); ASSERT_NE(0u, binarySize); ASSERT_NE(nullptr, pBinary); const unsigned char *binaries[1] = {reinterpret_cast(pBinary.get())}; pProgram = clCreateProgramWithBinary( pContext, 1, &testedClDevice, &binarySize, binaries, &binaryStatus, &retVal); pBinary.reset(); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clBuildProgram( pProgram, 1, &testedClDevice, nullptr, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); kernel = clCreateKernel( pProgram, "CopyBuffer", &retVal); EXPECT_NE(nullptr, kernel); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clReleaseKernel(kernel); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateKernelTests, GivenInvalidKernelNameWhenCreatingNewKernelThenInvalidKernelNameErrorIsReturned) { cl_kernel kernel = nullptr; cl_program pProgram = nullptr; cl_int binaryStatus = CL_SUCCESS; size_t binarySize = 0; std::string testFile; retrieveBinaryKernelFilename(testFile, "CopyBuffer_simd16_", ".bin"); auto pBinary = loadDataFromFile( testFile.c_str(), binarySize); ASSERT_NE(0u, binarySize); ASSERT_NE(nullptr, pBinary); const unsigned char *binaries[1] = {reinterpret_cast(pBinary.get())}; pProgram = clCreateProgramWithBinary( pContext, 1, &testedClDevice, &binarySize, binaries, &binaryStatus, &retVal); pBinary.reset(); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clBuildProgram( pProgram, 1, &testedClDevice, nullptr, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); kernel = clCreateKernel( pProgram, "WrongName", &retVal); ASSERT_EQ(nullptr, kernel); ASSERT_EQ(CL_INVALID_KERNEL_NAME, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateKernelTests, GivenNullProgramWhenCreatingNewKernelThenInvalidProgramErrorIsReturned) { cl_kernel kernel = nullptr; kernel = clCreateKernel( nullptr, "CopyBuffer", &retVal); ASSERT_EQ(CL_INVALID_PROGRAM, retVal); ASSERT_EQ(nullptr, kernel); } TEST_F(clCreateKernelTests, GivenNullKernelNameWhenCreatingNewKernelThenInvalidValueErrorIsReturned) { cl_kernel kernel = nullptr; KernelInfo *pKernelInfo = new KernelInfo(); std::unique_ptr pMockProg = std::make_unique(pContext, false, toClDeviceVector(*pDevice)); pMockProg->addKernelInfo(pKernelInfo, testedRootDeviceIndex); kernel = clCreateKernel( pMockProg.get(), nullptr, &retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(nullptr, kernel); } TEST_F(clCreateKernelTests, GivenInvalidProgramWhenCreatingNewKernelThenInvalidProgramErrorIsReturned) { cl_kernel kernel = nullptr; kernel = clCreateKernel( reinterpret_cast(pContext), "CopyBuffer", &retVal); ASSERT_EQ(CL_INVALID_PROGRAM, retVal); ASSERT_EQ(nullptr, kernel); } TEST_F(clCreateKernelTests, GivenProgramWithBuildErrorWhenCreatingNewKernelThenInvalidProgramExecutableErrorIsReturned) { cl_kernel kernel = nullptr; std::unique_ptr pMockProg = std::make_unique(pContext, false, toClDeviceVector(*pDevice)); pMockProg->setBuildStatus(CL_BUILD_ERROR); kernel = clCreateKernel( pMockProg.get(), "", &retVal); EXPECT_EQ(CL_INVALID_PROGRAM_EXECUTABLE, retVal); EXPECT_EQ(nullptr, kernel); } TEST_F(clCreateKernelTests, GivenNullPtrForReturnWhenCreatingNewKernelThenKernelIsCreated) { cl_kernel kernel = nullptr; kernel = clCreateKernel( nullptr, "CopyBuffer", nullptr); ASSERT_EQ(nullptr, kernel); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_create_kernels_in_program_tests.inl000066400000000000000000000061561422164147700322170ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/file_io.h" #include "shared/test/common/helpers/test_files.h" #include "opencl/source/context/context.h" #include "cl_api_tests.h" using namespace NEO; struct clCreateKernelsInProgramTests : public api_tests { void SetUp() override { api_tests::SetUp(); std::string testFile; retrieveBinaryKernelFilename(testFile, "CopyBuffer_simd16_", ".bin"); size_t binarySize = 0; auto pBinary = loadDataFromFile( testFile.c_str(), binarySize); ASSERT_NE(0u, binarySize); ASSERT_NE(nullptr, pBinary); auto binaryStatus = CL_SUCCESS; const unsigned char *binaries[1] = {reinterpret_cast(pBinary.get())}; program = clCreateProgramWithBinary( pContext, 1, &testedClDevice, &binarySize, binaries, &binaryStatus, &retVal); pBinary.reset(); ASSERT_NE(nullptr, program); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clBuildProgram( program, 1, &testedClDevice, nullptr, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); } void TearDown() override { clReleaseKernel(kernel); clReleaseProgram(program); api_tests::TearDown(); } cl_program program = nullptr; cl_kernel kernel = nullptr; std::unique_ptr pBinary = nullptr; }; TEST_F(clCreateKernelsInProgramTests, GivenValidParametersWhenCreatingKernelObjectsThenKernelsAndSuccessAreReturned) { cl_uint numKernelsRet = 0; retVal = clCreateKernelsInProgram( program, 1, &kernel, &numKernelsRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, numKernelsRet); EXPECT_NE(nullptr, kernel); } TEST_F(clCreateKernelsInProgramTests, GivenNullKernelArgWhenCreatingKernelObjectsThenSuccessIsReturned) { cl_uint numKernelsRet = 0; retVal = clCreateKernelsInProgram( program, 0, nullptr, &numKernelsRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, numKernelsRet); } TEST_F(clCreateKernelsInProgramTests, GivenNullPtrForNumKernelsReturnWhenCreatingKernelObjectsThenSuccessIsReturned) { retVal = clCreateKernelsInProgram( program, 1, &kernel, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, kernel); } TEST_F(clCreateKernelsInProgramTests, GivenNullProgramWhenCreatingKernelObjectsThenInvalidProgramErrorIsReturn) { retVal = clCreateKernelsInProgram( nullptr, 1, &kernel, nullptr); EXPECT_EQ(CL_INVALID_PROGRAM, retVal); EXPECT_EQ(nullptr, kernel); } TEST_F(clCreateKernelsInProgramTests, GivenTooSmallOutputBufferWhenCreatingKernelObjectsThenInvalidValueErrorIsReturned) { retVal = clCreateKernelsInProgram( program, 0, &kernel, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(nullptr, kernel); } cl_create_perf_counters_command_queue_tests.inl000066400000000000000000000227621422164147700340410ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/api/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/event/event.h" #include "opencl/test/unit_test/fixtures/device_instrumentation_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/os_interface/mock_performance_counters.h" #include "cl_api_tests.h" using namespace NEO; struct clCreatePerfCountersCommandQueueINTELTests : public DeviceInstrumentationFixture, public PerformanceCountersDeviceFixture, ::testing::Test { void SetUp() override { PerformanceCountersDeviceFixture::SetUp(); DeviceInstrumentationFixture::SetUp(true); deviceId = device.get(); retVal = CL_SUCCESS; context = std::unique_ptr(Context::create(nullptr, ClDeviceVector(&deviceId, 1), nullptr, nullptr, retVal)); } void TearDown() override { PerformanceCountersDeviceFixture::TearDown(); } std::unique_ptr context; cl_device_id deviceId; cl_int retVal; }; namespace ULT { TEST_F(clCreatePerfCountersCommandQueueINTELTests, GivenCorrectParamatersWhenCreatingPerfCountersCmdQThenCmdQIsCreatedAndPerfCountersAreEnabled) { cl_command_queue cmdQ = nullptr; cl_queue_properties properties = CL_QUEUE_PROFILING_ENABLE; cl_uint configuration = 0; cmdQ = clCreatePerfCountersCommandQueueINTEL(context.get(), deviceId, properties, configuration, &retVal); ASSERT_NE(nullptr, cmdQ); ASSERT_EQ(CL_SUCCESS, retVal); auto commandQueueObject = castToObject(cmdQ); EXPECT_TRUE(commandQueueObject->isPerfCountersEnabled()); retVal = clReleaseCommandQueue(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreatePerfCountersCommandQueueINTELTests, GivenNullPropertiesWhenCreatingPerfCountersCmdQThenInvalidQueuePropertiesErrorIsReturned) { cl_command_queue cmdQ = nullptr; cl_queue_properties properties = 0; cl_uint configuration = 0; cmdQ = clCreatePerfCountersCommandQueueINTEL(context.get(), deviceId, properties, configuration, &retVal); ASSERT_EQ(nullptr, cmdQ); ASSERT_EQ(CL_INVALID_QUEUE_PROPERTIES, retVal); } TEST_F(clCreatePerfCountersCommandQueueINTELTests, GivenClQueueOnDevicePropertyWhenCreatingPerfCountersCmdQThenInvalidQueuePropertiesErrorIsReturned) { cl_command_queue cmdQ = nullptr; cl_queue_properties properties = CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_ON_DEVICE; cl_uint configuration = 0; cmdQ = clCreatePerfCountersCommandQueueINTEL(context.get(), deviceId, properties, configuration, &retVal); ASSERT_EQ(nullptr, cmdQ); ASSERT_EQ(CL_INVALID_QUEUE_PROPERTIES, retVal); properties = CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_ON_DEVICE_DEFAULT; cmdQ = clCreatePerfCountersCommandQueueINTEL(context.get(), deviceId, properties, configuration, &retVal); ASSERT_EQ(nullptr, cmdQ); ASSERT_EQ(CL_INVALID_QUEUE_PROPERTIES, retVal); } TEST_F(clCreatePerfCountersCommandQueueINTELTests, GivenNullContextWhenCreatingPerfCountersCmdQThenInvalidContextErrorIsReturned) { cl_command_queue cmdQ = nullptr; cl_queue_properties properties = CL_QUEUE_PROFILING_ENABLE; cl_uint configuration = 0; cmdQ = clCreatePerfCountersCommandQueueINTEL(nullptr, deviceId, properties, configuration, &retVal); ASSERT_EQ(nullptr, cmdQ); ASSERT_EQ(CL_INVALID_CONTEXT, retVal); } TEST_F(clCreatePerfCountersCommandQueueINTELTests, GivenMaximumGtdiConfigurationWhenCreatingPerfCountersCmdQThenOutOfResourcesErrorIsReturned) { cl_command_queue cmdQ = nullptr; cl_queue_properties properties = CL_QUEUE_PROFILING_ENABLE; cl_uint configuration = 4; cmdQ = clCreatePerfCountersCommandQueueINTEL(context.get(), deviceId, properties, configuration, &retVal); ASSERT_EQ(nullptr, cmdQ); ASSERT_EQ(CL_INVALID_OPERATION, retVal); } TEST_F(clCreatePerfCountersCommandQueueINTELTests, GivenCorrectCmdQWhenEventIsCreatedThenPerfCountersAreEnabled) { cl_command_queue cmdQ = nullptr; cl_queue_properties properties = CL_QUEUE_PROFILING_ENABLE; cl_uint configuration = 0; cmdQ = clCreatePerfCountersCommandQueueINTEL(context.get(), deviceId, properties, configuration, &retVal); ASSERT_NE(nullptr, cmdQ); ASSERT_EQ(CL_SUCCESS, retVal); auto commandQueueObject = castToObject(cmdQ); EXPECT_TRUE(commandQueueObject->isPerfCountersEnabled()); Event event(commandQueueObject, CL_COMMAND_NDRANGE_KERNEL, 1, 5); EXPECT_TRUE(event.isPerfCountersEnabled()); retVal = clReleaseCommandQueue(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreatePerfCountersCommandQueueINTELTests, GivenInstrumentationEnabledIsFalseWhenCreatingPerfCountersCmdQThenInvalidDeviceErrorIsReturned) { hwInfo->capabilityTable.instrumentationEnabled = false; cl_command_queue cmdQ = nullptr; cl_queue_properties properties = CL_QUEUE_PROFILING_ENABLE; cl_uint configuration = 0; cmdQ = clCreatePerfCountersCommandQueueINTEL(context.get(), deviceId, properties, configuration, &retVal); ASSERT_EQ(nullptr, cmdQ); EXPECT_EQ(CL_INVALID_DEVICE, retVal); } TEST_F(clCreatePerfCountersCommandQueueINTELTests, GivenInvalidDeviceWhenCreatingPerfCountersCmdQThenInvalidDeviceErrorIsReturned) { cl_command_queue cmdQ = nullptr; cl_queue_properties properties = CL_QUEUE_PROFILING_ENABLE; cl_uint configuration = 0; cmdQ = clCreatePerfCountersCommandQueueINTEL(context.get(), (cl_device_id)context.get(), properties, configuration, &retVal); ASSERT_EQ(nullptr, cmdQ); ASSERT_EQ(CL_INVALID_DEVICE, retVal); } TEST_F(clCreatePerfCountersCommandQueueINTELTests, GivenInvalidMetricsLibraryWhenCreatingPerfCountersThenPerfCountersReturnError) { cl_command_queue cmdQ = nullptr; cl_queue_properties properties = CL_QUEUE_PROFILING_ENABLE; cl_uint configuration = 0; cmdQ = clCreatePerfCountersCommandQueueINTEL(context.get(), deviceId, properties, configuration, &retVal); auto commandQueueObject = castToObject(cmdQ); ASSERT_NE(nullptr, cmdQ); ASSERT_EQ(CL_SUCCESS, retVal); auto performanceCounters = commandQueueObject->getPerfCounters(); auto metricsLibary = static_cast(performanceCounters->getMetricsLibraryInterface()); metricsLibary->validOpen = false; ASSERT_NE(nullptr, metricsLibary); EXPECT_TRUE(commandQueueObject->isPerfCountersEnabled()); retVal = clReleaseCommandQueue(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreatePerfCountersCommandQueueINTELTests, givenInvalidMetricsLibraryWhenCreatingCommandQueueThenReturnError) { cl_command_queue cmdQ = nullptr; cl_queue_properties properties = CL_QUEUE_PROFILING_ENABLE; cl_uint configuration = 0; auto performanceCounters = device->getPerformanceCounters(); auto metricsLibary = static_cast(performanceCounters->getMetricsLibraryInterface()); metricsLibary->validOpen = false; cmdQ = clCreatePerfCountersCommandQueueINTEL(context.get(), deviceId, properties, configuration, &retVal); EXPECT_EQ(nullptr, cmdQ); EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal); } struct clCreateCommandQueueWithPropertiesMdapiTests : public clCreatePerfCountersCommandQueueINTELTests { cl_queue_properties queueProperties[7] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, CL_QUEUE_MDAPI_PROPERTIES_INTEL, CL_QUEUE_MDAPI_ENABLE_INTEL, CL_QUEUE_MDAPI_CONFIGURATION_INTEL, 0, 0}; }; TEST_F(clCreateCommandQueueWithPropertiesMdapiTests, givenCorrectParamsWhenCreatingQueueWithPropertiesThenEnablePerfCounters) { auto cmdQ = clCreateCommandQueueWithProperties(context.get(), deviceId, queueProperties, &retVal); ASSERT_NE(nullptr, cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); auto commandQueueObject = castToObject(cmdQ); EXPECT_TRUE(commandQueueObject->isPerfCountersEnabled()); clReleaseCommandQueue(cmdQ); } TEST_F(clCreateCommandQueueWithPropertiesMdapiTests, givenParamsWithDisabledPerfCounterWhenCreatingQueueWithPropertiesThenCreateRegularQueue) { queueProperties[3] = 0; auto cmdQ = clCreateCommandQueueWithProperties(context.get(), deviceId, queueProperties, &retVal); ASSERT_NE(nullptr, cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); auto commandQueueObject = castToObject(cmdQ); EXPECT_FALSE(commandQueueObject->isPerfCountersEnabled()); clReleaseCommandQueue(cmdQ); } TEST_F(clCreateCommandQueueWithPropertiesMdapiTests, givenIncorrectConfigurationWhenCreatingQueueWithPropertiesThenFail) { queueProperties[5] = 1; auto cmdQ = clCreateCommandQueueWithProperties(context.get(), deviceId, queueProperties, &retVal); EXPECT_EQ(nullptr, cmdQ); EXPECT_NE(CL_SUCCESS, retVal); } TEST_F(clCreateCommandQueueWithPropertiesMdapiTests, givenInvalidMdapiOpenWhenCreatingQueueWithPropertiesThenFail) { auto performanceCounters = device->getPerformanceCounters(); auto metricsLibary = static_cast(performanceCounters->getMetricsLibraryInterface()); metricsLibary->validOpen = false; auto cmdQ = clCreateCommandQueueWithProperties(context.get(), deviceId, queueProperties, &retVal); EXPECT_EQ(nullptr, cmdQ); EXPECT_NE(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_create_pipe_tests.inl000066400000000000000000000157021422164147700272710ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/base_object.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" #include "cl_api_tests.h" using namespace NEO; struct clCreatePipeTests : api_tests { VariableBackup supportsPipesBackup{&defaultHwInfo->capabilityTable.supportsPipes, true}; }; namespace ClCreatePipeTests { class clCreatePipeWithParamTests : public ApiFixture<>, public testing::TestWithParam { void SetUp() override { ApiFixture::SetUp(); } void TearDown() override { ApiFixture::TearDown(); } VariableBackup supportsPipesBackup{&defaultHwInfo->capabilityTable.supportsPipes, true}; }; class clCreatePipeWithParamNegativeTests : public ApiFixture<>, public testing::TestWithParam { void SetUp() override { ApiFixture::SetUp(); } void TearDown() override { ApiFixture::TearDown(); } VariableBackup supportsPipesBackup{&defaultHwInfo->capabilityTable.supportsPipes, true}; }; TEST_P(clCreatePipeWithParamTests, GivenValidFlagsWhenCreatingPipeThenPipeIsCreatedAndSuccessIsReturned) { cl_mem_flags flags = GetParam(); auto pipe = clCreatePipe(pContext, flags, 1, 20, nullptr, &retVal); EXPECT_NE(nullptr, pipe); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseMemObject(pipe); } TEST_P(clCreatePipeWithParamNegativeTests, GivenInalidFlagsWhenCreatingPipeThenInvalidValueErrorIsReturned) { cl_mem_flags flags = GetParam(); auto pipe = clCreatePipe(pContext, flags, 1, 20, nullptr, &retVal); EXPECT_EQ(nullptr, pipe); EXPECT_EQ(CL_INVALID_VALUE, retVal); clReleaseMemObject(pipe); } static cl_mem_flags validFlags[] = { 0, CL_MEM_READ_WRITE, CL_MEM_HOST_NO_ACCESS, CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, }; static cl_mem_flags invalidFlags[] = { CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY, CL_MEM_HOST_READ_ONLY, CL_MEM_HOST_WRITE_ONLY, CL_MEM_USE_HOST_PTR, CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE, CL_MEM_COPY_HOST_PTR, CL_MEM_ALLOC_HOST_PTR, CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_HOST_PTR, CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY, CL_MEM_READ_WRITE | CL_MEM_READ_ONLY, CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY, CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS, CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY, CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS, CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR, CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR, }; INSTANTIATE_TEST_CASE_P( CreatePipeCheckFlags, clCreatePipeWithParamTests, testing::ValuesIn(validFlags)); INSTANTIATE_TEST_CASE_P( CreatePipeCheckFlagsNegative, clCreatePipeWithParamNegativeTests, testing::ValuesIn(invalidFlags)); TEST_F(clCreatePipeTests, GivenValidFlagsAndNullReturnWhenCreatingPipeThenPipeIsCreated) { cl_mem_flags flags = CL_MEM_READ_WRITE; auto pipe = clCreatePipe(pContext, flags, 1, 20, nullptr, nullptr); EXPECT_NE(nullptr, pipe); clReleaseMemObject(pipe); } TEST_F(clCreatePipeTests, GivenPipePacketSizeZeroWhenCreatingPipeThenInvalidPipeSizeErrorIsReturned) { cl_mem_flags flags = CL_MEM_READ_WRITE; auto pipe = clCreatePipe(pContext, flags, 0, 20, nullptr, &retVal); EXPECT_EQ(nullptr, pipe); EXPECT_EQ(CL_INVALID_PIPE_SIZE, retVal); clReleaseMemObject(pipe); } TEST_F(clCreatePipeTests, GivenPipeMaxSizeZeroWhenCreatingPipeThenInvalidPipeSizeErrorIsReturned) { cl_mem_flags flags = CL_MEM_READ_WRITE; auto pipe = clCreatePipe(pContext, flags, 1, 0, nullptr, &retVal); EXPECT_EQ(nullptr, pipe); EXPECT_EQ(CL_INVALID_PIPE_SIZE, retVal); clReleaseMemObject(pipe); } TEST_F(clCreatePipeTests, GivenPipePropertiesNotNullWhenCreatingPipeThenInvalidValueErrorIsReturned) { cl_mem_flags flags = CL_MEM_READ_WRITE; cl_pipe_properties properties = {0}; auto pipe = clCreatePipe(pContext, flags, 1, 20, &properties, &retVal); EXPECT_EQ(nullptr, pipe); EXPECT_EQ(CL_INVALID_VALUE, retVal); clReleaseMemObject(pipe); } TEST_F(clCreatePipeTests, GivenDeviceNotSupportingPipesWhenCreatingPipeThenInvalidOperationErrorIsReturned) { auto hardwareInfo = *defaultHwInfo; hardwareInfo.capabilityTable.supportsPipes = false; auto pClDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hardwareInfo, 0)); MockContext mockContext{pClDevice.get(), false}; auto pipe = clCreatePipe(&mockContext, 0, 1, 20, nullptr, &retVal); EXPECT_EQ(nullptr, pipe); EXPECT_EQ(CL_INVALID_OPERATION, retVal); } TEST_F(clCreatePipeTests, GivenPipePacketSizeGreaterThanAllowedWhenCreatingPipeThenInvalidPipeSizeErrorIsReturned) { cl_uint packetSize = pContext->getDevice(0)->getDeviceInfo().pipeMaxPacketSize; cl_mem_flags flags = CL_MEM_READ_WRITE; auto pipe = clCreatePipe(pContext, flags, packetSize, 20, nullptr, &retVal); EXPECT_NE(nullptr, pipe); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseMemObject(pipe); packetSize += 1; pipe = clCreatePipe(pContext, flags, packetSize, 20, nullptr, &retVal); EXPECT_EQ(nullptr, pipe); EXPECT_EQ(CL_INVALID_PIPE_SIZE, retVal); clReleaseMemObject(pipe); } TEST_F(clCreatePipeTests, GivenNullContextWhenCreatingPipeThenInvalidContextErrorIsReturned) { auto pipe = clCreatePipe(nullptr, 0, 1, 20, nullptr, &retVal); EXPECT_EQ(nullptr, pipe); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); clReleaseMemObject(pipe); } TEST(clCreatePipeTest, givenPlatformWithoutDevicesWhenClCreatePipeIsCalledThenDeviceIsTakenFromContext) { auto executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->initializeMemoryManager(); executionEnvironment->prepareRootDeviceEnvironments(1); auto device = std::make_unique(*Device::create(executionEnvironment, 0u), platform()); const ClDeviceInfo &devInfo = device->getDeviceInfo(); if (devInfo.svmCapabilities == 0 || device->getHardwareInfo().capabilityTable.supportsPipes == false) { GTEST_SKIP(); } cl_device_id clDevice = device.get(); cl_int retVal; auto context = ReleaseableObjectPtr(Context::create(nullptr, ClDeviceVector(&clDevice, 1), nullptr, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, platform()->getNumDevices()); cl_uint packetSize = context->getDevice(0)->getDeviceInfo().pipeMaxPacketSize; cl_mem_flags flags = CL_MEM_READ_WRITE; auto pipe = clCreatePipe(context.get(), flags, packetSize, 20, nullptr, &retVal); EXPECT_NE(nullptr, pipe); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseMemObject(pipe); } } // namespace ClCreatePipeTests compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_create_program_with_binary_tests.inl000066400000000000000000000221031422164147700323730ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/file_io.h" #include "shared/test/common/helpers/test_files.h" #include "opencl/source/context/context.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clCreateProgramWithBinaryTests; typedef api_tests clCreateProgramWithILTests; typedef api_tests clCreateProgramWithILKHRTests; namespace ULT { TEST_F(clCreateProgramWithBinaryTests, GivenCorrectParametersWhenCreatingProgramWithBinaryThenProgramIsCreatedAndSuccessIsReturned) { cl_program pProgram = nullptr; cl_int binaryStatus = CL_INVALID_VALUE; size_t binarySize = 0; std::string testFile; retrieveBinaryKernelFilename(testFile, "CopyBuffer_simd16_", ".bin"); ASSERT_EQ(true, fileExists(testFile)); auto pBinary = loadDataFromFile( testFile.c_str(), binarySize); ASSERT_NE(0u, binarySize); ASSERT_NE(nullptr, pBinary); const unsigned char *binaries[1] = {reinterpret_cast(pBinary.get())}; pProgram = clCreateProgramWithBinary( pContext, 1, &testedClDevice, &binarySize, binaries, &binaryStatus, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, pProgram); EXPECT_EQ(CL_SUCCESS, binaryStatus); pBinary.reset(); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); pProgram = clCreateProgramWithBinary( nullptr, 1, &testedClDevice, &binarySize, binaries, &binaryStatus, nullptr); EXPECT_EQ(nullptr, pProgram); } TEST_F(clCreateProgramWithBinaryTests, GivenInvalidInputWhenCreatingProgramWithBinaryThenInvalidValueErrorIsReturned) { cl_program pProgram = nullptr; cl_int binaryStatus = CL_INVALID_VALUE; size_t binarySize = 0; std::string testFile; retrieveBinaryKernelFilename(testFile, "CopyBuffer_simd16_", ".bin"); ASSERT_EQ(true, fileExists(testFile)); auto pBinary = loadDataFromFile( testFile.c_str(), binarySize); ASSERT_NE(0u, binarySize); ASSERT_NE(nullptr, pBinary); const unsigned char *validBinaries[] = {reinterpret_cast(pBinary.get()), reinterpret_cast(pBinary.get())}; const unsigned char *invalidBinaries[] = {reinterpret_cast(pBinary.get()), nullptr}; size_t validSizeBinaries[] = {binarySize, binarySize}; size_t invalidSizeBinaries[] = {binarySize, 0}; cl_device_id devicesForProgram[] = {testedClDevice, testedClDevice}; pProgram = clCreateProgramWithBinary( pContext, 2, devicesForProgram, validSizeBinaries, invalidBinaries, &binaryStatus, &retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(nullptr, pProgram); retVal = CL_INVALID_PROGRAM; pProgram = clCreateProgramWithBinary( pContext, 2, devicesForProgram, invalidSizeBinaries, validBinaries, &binaryStatus, &retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(nullptr, pProgram); pProgram = clCreateProgramWithBinary( pContext, 2, devicesForProgram, validSizeBinaries, validBinaries, &binaryStatus, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, pProgram); clReleaseProgram(pProgram); } TEST_F(clCreateProgramWithBinaryTests, GivenDeviceNotAssociatedWithContextWhenCreatingProgramWithBinaryThenInvalidDeviceErrorIsReturned) { cl_program pProgram = nullptr; cl_int binaryStatus = CL_INVALID_VALUE; size_t binarySize = 0; std::string testFile; retrieveBinaryKernelFilename(testFile, "CopyBuffer_simd16_", ".bin"); ASSERT_EQ(true, fileExists(testFile)); auto pBinary = loadDataFromFile( testFile.c_str(), binarySize); ASSERT_NE(0u, binarySize); ASSERT_NE(nullptr, pBinary); const unsigned char *binaries[1] = {reinterpret_cast(pBinary.get())}; MockClDevice invalidDevice(new MockDevice()); cl_device_id devicesForProgram[] = {&invalidDevice}; pProgram = clCreateProgramWithBinary( pContext, 1, devicesForProgram, &binarySize, binaries, &binaryStatus, &retVal); EXPECT_EQ(CL_INVALID_DEVICE, retVal); EXPECT_EQ(nullptr, pProgram); retVal = CL_INVALID_PROGRAM; devicesForProgram[0] = nullptr; pProgram = clCreateProgramWithBinary( pContext, 1, devicesForProgram, &binarySize, binaries, &binaryStatus, &retVal); EXPECT_EQ(CL_INVALID_DEVICE, retVal); EXPECT_EQ(nullptr, pProgram); } TEST_F(clCreateProgramWithILTests, GivenInvalidContextWhenCreatingProgramWithIlThenInvalidContextErrorIsReturned) { const uint32_t spirv[16] = {0x03022307}; cl_int err = CL_SUCCESS; cl_program prog = clCreateProgramWithIL(nullptr, spirv, sizeof(spirv), &err); EXPECT_EQ(CL_INVALID_CONTEXT, err); EXPECT_EQ(nullptr, prog); } TEST_F(clCreateProgramWithILTests, GivenNullIlWhenCreatingProgramWithIlThenInvalidValueErrorIsReturned) { cl_int err = CL_SUCCESS; cl_program prog = clCreateProgramWithIL(pContext, nullptr, 0, &err); EXPECT_EQ(CL_INVALID_VALUE, err); EXPECT_EQ(nullptr, prog); } TEST_F(clCreateProgramWithILTests, GivenIncorrectIlSizeWhenCreatingProgramWithIlThenInvalidBinaryErrorIsReturned) { const uint32_t spirv[16] = {0x03022307}; cl_int err = CL_SUCCESS; cl_program prog = clCreateProgramWithIL(pContext, spirv, 0, &err); EXPECT_EQ(CL_INVALID_BINARY, err); EXPECT_EQ(nullptr, prog); } TEST_F(clCreateProgramWithILTests, GivenIncorrectIlWhenCreatingProgramWithIlThenInvalidBinaryErrorIsReturned) { const uint32_t notSpirv[16] = {0xDEADBEEF}; cl_int err = CL_SUCCESS; cl_program prog = clCreateProgramWithIL(pContext, notSpirv, sizeof(notSpirv), &err); EXPECT_EQ(CL_INVALID_BINARY, err); EXPECT_EQ(nullptr, prog); } TEST_F(clCreateProgramWithILTests, GivenIncorrectIlAndNoErrorPointerWhenCreatingProgramWithIlThenInvalidBinaryErrorIsReturned) { const uint32_t notSpirv[16] = {0xDEADBEEF}; cl_program prog = clCreateProgramWithIL(pContext, notSpirv, sizeof(notSpirv), nullptr); EXPECT_EQ(nullptr, prog); } TEST_F(clCreateProgramWithILKHRTests, GivenCorrectParametersWhenCreatingProgramWithIlkhrThenProgramIsCreatedAndSuccessIsReturned) { const uint32_t spirv[16] = {0x03022307}; cl_int err = CL_INVALID_VALUE; cl_program program = clCreateProgramWithILKHR(pContext, spirv, sizeof(spirv), &err); EXPECT_EQ(CL_SUCCESS, err); EXPECT_NE(nullptr, program); retVal = clReleaseProgram(program); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateProgramWithILKHRTests, GivenProgramCreatedWithILWhenBuildAfterBuildIsCalledThenReturnSuccess) { const uint32_t spirv[16] = {0x03022307}; cl_int err = CL_INVALID_VALUE; cl_program program = clCreateProgramWithIL(pContext, spirv, sizeof(spirv), &err); EXPECT_EQ(CL_SUCCESS, err); EXPECT_NE(nullptr, program); err = clBuildProgram(program, 0, nullptr, "", nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, err); err = clBuildProgram(program, 0, nullptr, "", nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, err); retVal = clReleaseProgram(program); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateProgramWithILKHRTests, GivenNullIlWhenCreatingProgramWithIlkhrThenNullProgramIsReturned) { cl_program program = clCreateProgramWithILKHR(pContext, nullptr, 0, nullptr); EXPECT_EQ(nullptr, program); } TEST_F(clCreateProgramWithILKHRTests, GivenBothFunctionVariantsWhenCreatingProgramWithIlThenCommonLogicIsUsed) { VariableBackup createFromIlBackup{&ProgramFunctions::createFromIL}; bool createFromIlCalled; Context *receivedContext; const void *receivedIl; size_t receivedLength; auto mockFunction = [&](Context *ctx, const void *il, size_t length, cl_int &errcodeRet) -> Program * { createFromIlCalled = true; receivedContext = ctx; receivedIl = il; receivedLength = length; return nullptr; }; createFromIlBackup = mockFunction; const uint32_t spirv[16] = {0x03022307}; createFromIlCalled = false; receivedContext = nullptr; receivedIl = nullptr; receivedLength = 0; clCreateProgramWithIL(pContext, spirv, sizeof(spirv), nullptr); EXPECT_TRUE(createFromIlCalled); EXPECT_EQ(pContext, receivedContext); EXPECT_EQ(&spirv, receivedIl); EXPECT_EQ(sizeof(spirv), receivedLength); createFromIlCalled = false; receivedContext = nullptr; receivedIl = nullptr; receivedLength = 0; clCreateProgramWithILKHR(pContext, spirv, sizeof(spirv), nullptr); EXPECT_TRUE(createFromIlCalled); EXPECT_EQ(pContext, receivedContext); EXPECT_EQ(&spirv, receivedIl); EXPECT_EQ(sizeof(spirv), receivedLength); } } // namespace ULT cl_create_program_with_built_in_kernels_tests.cpp000066400000000000000000000251161422164147700343670ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/api/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/device/device.h" #include "opencl/source/built_ins/built_in_ops_vme.h" #include "opencl/source/built_ins/vme_builtin.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/base_object.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/program/program.h" #include "opencl/test/unit_test/fixtures/run_kernel_fixture.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clCreateProgramWithBuiltInKernelsTests; struct clCreateProgramWithBuiltInVmeKernelsTests : clCreateProgramWithBuiltInKernelsTests { void SetUp() override { clCreateProgramWithBuiltInKernelsTests::SetUp(); if (!castToObject(testedClDevice)->getHardwareInfo().capabilityTable.supportsVme) { GTEST_SKIP(); } pClDevice = pContext->getDevice(0); } ClDevice *pClDevice; }; namespace ULT { TEST_F(clCreateProgramWithBuiltInKernelsTests, GivenInvalidContextWhenCreatingProgramWithBuiltInKernelsThenInvalidContextErrorIsReturned) { cl_int retVal = CL_SUCCESS; auto program = clCreateProgramWithBuiltInKernels( nullptr, // context 1, // num_devices nullptr, // device_list nullptr, // kernel_names &retVal); EXPECT_EQ(nullptr, program); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } TEST_F(clCreateProgramWithBuiltInKernelsTests, GivenNoKernelsWhenCreatingProgramWithBuiltInKernelsThenInvalidValueErrorIsReturned) { cl_int retVal = CL_SUCCESS; auto program = clCreateProgramWithBuiltInKernels( pContext, // context 1, // num_devices &testedClDevice, // device_list "", // kernel_names &retVal); EXPECT_EQ(nullptr, program); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clCreateProgramWithBuiltInKernelsTests, GivenNoDeviceWhenCreatingProgramWithBuiltInKernelsThenInvalidValueErrorIsReturned) { cl_int retVal = CL_SUCCESS; auto program = clCreateProgramWithBuiltInKernels( pContext, // context 0, // num_devices &testedClDevice, // device_list "", // kernel_names &retVal); EXPECT_EQ(nullptr, program); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clCreateProgramWithBuiltInKernelsTests, GivenNoKernelsAndNoReturnWhenCreatingProgramWithBuiltInKernelsThenProgramIsNotCreated) { auto program = clCreateProgramWithBuiltInKernels( pContext, // context 1, // num_devices &testedClDevice, // device_list "", // kernel_names nullptr); EXPECT_EQ(nullptr, program); } TEST_F(clCreateProgramWithBuiltInVmeKernelsTests, GivenDeviceNotAssociatedWithContextWhenCreatingProgramWithBuiltInThenInvalidDeviceErrorIsReturned) { cl_program pProgram = nullptr; const char *kernelNamesString = { "block_advanced_motion_estimate_bidirectional_check_intel;" "block_motion_estimate_intel;" "block_advanced_motion_estimate_check_intel;"}; MockClDevice invalidDevice(new MockDevice()); cl_device_id devicesForProgram[] = {&invalidDevice}; pProgram = clCreateProgramWithBuiltInKernels( pContext, 1, devicesForProgram, kernelNamesString, &retVal); EXPECT_EQ(CL_INVALID_DEVICE, retVal); EXPECT_EQ(nullptr, pProgram); retVal = CL_INVALID_PROGRAM; devicesForProgram[0] = nullptr; pProgram = clCreateProgramWithBuiltInKernels( pContext, 1, devicesForProgram, kernelNamesString, &retVal); EXPECT_EQ(CL_INVALID_DEVICE, retVal); EXPECT_EQ(nullptr, pProgram); } TEST_F(clCreateProgramWithBuiltInVmeKernelsTests, GivenValidMediaKernelsWhenCreatingProgramWithBuiltInKernelsThenProgramIsSuccessfullyCreated) { cl_int retVal = CL_SUCCESS; overwriteBuiltInBinaryName("media_kernels_frontend"); const char *kernelNamesString = { "block_advanced_motion_estimate_bidirectional_check_intel;" "block_motion_estimate_intel;" "block_advanced_motion_estimate_check_intel;"}; const char *kernelNames[] = { "block_motion_estimate_intel", "block_advanced_motion_estimate_check_intel", "block_advanced_motion_estimate_bidirectional_check_intel", }; cl_program program = clCreateProgramWithBuiltInKernels( pContext, // context 1, // num_devices &testedClDevice, // device_list kernelNamesString, // kernel_names &retVal); restoreBuiltInBinaryName(); EXPECT_NE(nullptr, program); EXPECT_EQ(CL_SUCCESS, retVal); for (auto &kernelName : kernelNames) { cl_kernel kernel = clCreateKernel( program, kernelName, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, kernel); retVal = clReleaseKernel(kernel); EXPECT_EQ(CL_SUCCESS, retVal); } retVal = clReleaseProgram(program); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateProgramWithBuiltInVmeKernelsTests, GivenValidMediaKernelsWithOptionsWhenCreatingProgramWithBuiltInKernelsThenProgramIsSuccessfullyCreatedWithThoseOptions) { cl_int retVal = CL_SUCCESS; overwriteBuiltInBinaryName("media_kernels_frontend"); const char *kernelNamesString = { "block_motion_estimate_intel;"}; cl_program program = clCreateProgramWithBuiltInKernels( pContext, // context 1, // num_devices &testedClDevice, // device_list kernelNamesString, // kernel_names &retVal); restoreBuiltInBinaryName(); auto neoProgram = castToObject(program); auto builtinOptions = neoProgram->getOptions(); auto it = builtinOptions.find("HW_NULL_CHECK"); EXPECT_EQ(std::string::npos, it); clReleaseProgram(program); } TEST_F(clCreateProgramWithBuiltInVmeKernelsTests, GivenVmeBlockMotionEstimateKernelWhenCreatingProgramWithBuiltInKernelsThenCorrectDispatchBuilderAndFrontendKernelIsCreated) { cl_int retVal = CL_SUCCESS; overwriteBuiltInBinaryName("media_kernels_backend"); Vme::getBuiltinDispatchInfoBuilder(EBuiltInOps::VmeBlockMotionEstimateIntel, *pClDevice); restoreBuiltInBinaryName(); overwriteBuiltInBinaryName("media_kernels_frontend"); const char *kernelNamesString = { "block_motion_estimate_intel;"}; cl_program program = clCreateProgramWithBuiltInKernels( pContext, // context 1, // num_devices &testedClDevice, // device_list kernelNamesString, // kernel_names &retVal); restoreBuiltInBinaryName(); cl_kernel kernel = clCreateKernel( program, "block_motion_estimate_intel", &retVal); auto pMultiDeviceKernel = castToObject(kernel); auto kernNeo = pMultiDeviceKernel->getKernel(testedRootDeviceIndex); EXPECT_NE(nullptr, kernNeo->getKernelInfo().builtinDispatchBuilder); EXPECT_EQ(6U, kernNeo->getKernelArgsNumber()); auto &vmeBuilder = Vme::getBuiltinDispatchInfoBuilder(EBuiltInOps::VmeBlockMotionEstimateIntel, *pClDevice); EXPECT_EQ(&vmeBuilder, kernNeo->getKernelInfo().builtinDispatchBuilder); clReleaseKernel(kernel); clReleaseProgram(program); } TEST_F(clCreateProgramWithBuiltInVmeKernelsTests, GivenVmeBlockAdvancedMotionEstimateKernelWhenCreatingProgramWithBuiltInKernelsThenCorrectDispatchBuilderAndFrontendKernelIsCreated) { cl_int retVal = CL_SUCCESS; overwriteBuiltInBinaryName("media_kernels_backend"); Vme::getBuiltinDispatchInfoBuilder(EBuiltInOps::VmeBlockAdvancedMotionEstimateCheckIntel, *pClDevice); restoreBuiltInBinaryName(); overwriteBuiltInBinaryName("media_kernels_frontend"); const char *kernelNamesString = { "block_advanced_motion_estimate_check_intel;"}; cl_program program = clCreateProgramWithBuiltInKernels( pContext, // context 1, // num_devices &testedClDevice, // device_list kernelNamesString, // kernel_names &retVal); restoreBuiltInBinaryName(); cl_kernel kernel = clCreateKernel( program, "block_advanced_motion_estimate_check_intel", &retVal); auto pMultiDeviceKernel = castToObject(kernel); auto kernNeo = pMultiDeviceKernel->getKernel(testedRootDeviceIndex); EXPECT_NE(nullptr, kernNeo->getKernelInfo().builtinDispatchBuilder); EXPECT_EQ(15U, kernNeo->getKernelArgsNumber()); auto &vmeBuilder = Vme::getBuiltinDispatchInfoBuilder(EBuiltInOps::VmeBlockAdvancedMotionEstimateCheckIntel, *pClDevice); EXPECT_EQ(&vmeBuilder, kernNeo->getKernelInfo().builtinDispatchBuilder); clReleaseKernel(kernel); clReleaseProgram(program); } TEST_F(clCreateProgramWithBuiltInVmeKernelsTests, GivenVmeBlockAdvancedMotionEstimateBidirectionalCheckKernelWhenCreatingProgramWithBuiltInKernelsThenCorrectDispatchBuilderAndFrontendKernelIsCreated) { cl_int retVal = CL_SUCCESS; overwriteBuiltInBinaryName("media_kernels_backend"); Vme::getBuiltinDispatchInfoBuilder(EBuiltInOps::VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel, *pClDevice); restoreBuiltInBinaryName(); overwriteBuiltInBinaryName("media_kernels_frontend"); const char *kernelNamesString = { "block_advanced_motion_estimate_bidirectional_check_intel;"}; cl_program program = clCreateProgramWithBuiltInKernels( pContext, // context 1, // num_devices &testedClDevice, // device_list kernelNamesString, // kernel_names &retVal); restoreBuiltInBinaryName(); cl_kernel kernel = clCreateKernel( program, "block_advanced_motion_estimate_bidirectional_check_intel", &retVal); auto pMultiDeviceKernel = castToObject(kernel); auto kernNeo = pMultiDeviceKernel->getKernel(testedRootDeviceIndex); EXPECT_NE(nullptr, kernNeo->getKernelInfo().builtinDispatchBuilder); EXPECT_EQ(20U, kernNeo->getKernelArgsNumber()); auto ctxNeo = castToObject(pContext); auto &vmeBuilder = Vme::getBuiltinDispatchInfoBuilder(EBuiltInOps::VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel, *ctxNeo->getDevice(0)); EXPECT_EQ(&vmeBuilder, kernNeo->getKernelInfo().builtinDispatchBuilder); clReleaseKernel(kernel); clReleaseProgram(program); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_create_sampler_tests.inl000066400000000000000000000025451422164147700300000ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/context/context.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clCreateSamplerTests; namespace ULT { TEST_F(clCreateSamplerTests, GivenCorrectParametersWhenCreatingSamplerThenSamplerIsCreatedAndSuccessReturned) { auto sampler = clCreateSampler( pContext, CL_TRUE, CL_ADDRESS_CLAMP, CL_FILTER_NEAREST, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, sampler); retVal = clReleaseSampler(sampler); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateSamplerTests, GivenCorrectParametersAndNullReturnPointerWhenCreatingSamplerThenSamplerIsCreated) { auto sampler = clCreateSampler( pContext, CL_TRUE, CL_ADDRESS_CLAMP, CL_FILTER_NEAREST, nullptr); EXPECT_NE(nullptr, sampler); retVal = clReleaseSampler(sampler); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateSamplerTests, GivenInvalidContextWhenCreatingSamplerThenInvalidContextErrorIsReturned) { auto sampler = clCreateSampler( nullptr, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_LINEAR, &retVal); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); EXPECT_EQ(nullptr, sampler); delete sampler; } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_create_sampler_with_properties_tests.inl000066400000000000000000000211771422164147700333110ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/context/context.h" #include "opencl/source/sampler/sampler.h" #include "CL/cl_ext.h" #include "cl_api_tests.h" using namespace NEO; namespace ULT { struct SamplerWithPropertiesTest : public ApiFixture<>, public ::testing::WithParamInterface>, public ::testing::Test { SamplerWithPropertiesTest() { } void SetUp() override { std::tie(NormalizdProperties, AddressingProperties, FilterProperties) = GetParam(); ApiFixture::SetUp(); } void TearDown() override { ApiFixture::TearDown(); } cl_sampler_properties NormalizdProperties = 0; cl_sampler_properties AddressingProperties = 0; cl_sampler_properties FilterProperties = 0; }; typedef api_tests clCreateSamplerWithPropertiesTests; typedef SamplerWithPropertiesTest clCreateSamplerWithPropertiesTests2; TEST_F(clCreateSamplerWithPropertiesTests, GivenSamplerPropertiesAndNoReturnPointerWhenCreatingSamplerWithPropertiesThenSamplerIsCreated) { cl_sampler sampler = nullptr; cl_queue_properties properties[] = { CL_SAMPLER_NORMALIZED_COORDS, 0, CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_NONE, CL_SAMPLER_FILTER_MODE, CL_FILTER_LINEAR, 0}; sampler = clCreateSamplerWithProperties( pContext, properties, nullptr); ASSERT_NE(nullptr, sampler); retVal = clReleaseSampler(sampler); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateSamplerWithPropertiesTests, GivenNullContextWhenCreatingSamplerWithPropertiesThenInvalidContextErrorIsReturned) { cl_sampler sampler = nullptr; cl_queue_properties properties[] = { CL_SAMPLER_NORMALIZED_COORDS, 0, CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_NONE, CL_SAMPLER_FILTER_MODE, CL_FILTER_LINEAR, 0}; sampler = clCreateSamplerWithProperties( nullptr, properties, &retVal); ASSERT_EQ(nullptr, sampler); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } TEST_F(clCreateSamplerWithPropertiesTests, GivenSamplerCreatedWithNullPropertiesWhenQueryingPropertiesThenNothingIsReturned) { cl_int retVal = CL_SUCCESS; auto sampler = clCreateSamplerWithProperties(pContext, nullptr, &retVal); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_NE(nullptr, sampler); size_t propertiesSize; retVal = clGetSamplerInfo(sampler, CL_SAMPLER_PROPERTIES, 0, nullptr, &propertiesSize); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(0u, propertiesSize); clReleaseSampler(sampler); } TEST_F(clCreateSamplerWithPropertiesTests, WhenCreatingSamplerWithPropertiesThenPropertiesAreCorrectlyStored) { cl_int retVal = CL_SUCCESS; cl_sampler_properties properties[7]; size_t propertiesSize; std::vector> propertiesToTest{ {0}, {CL_SAMPLER_FILTER_MODE, CL_FILTER_LINEAR, 0}, {CL_SAMPLER_NORMALIZED_COORDS, 0, CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_NONE, CL_SAMPLER_FILTER_MODE, CL_FILTER_LINEAR, 0}}; for (auto testProperties : propertiesToTest) { auto sampler = clCreateSamplerWithProperties(pContext, testProperties.data(), &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, sampler); retVal = clGetSamplerInfo(sampler, CL_SAMPLER_PROPERTIES, sizeof(properties), properties, &propertiesSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(testProperties.size() * sizeof(cl_sampler_properties), propertiesSize); for (size_t i = 0; i < testProperties.size(); i++) { EXPECT_EQ(testProperties[i], properties[i]); } clReleaseSampler(sampler); } } TEST_P(clCreateSamplerWithPropertiesTests2, GivenCorrectParametersWhenCreatingSamplerWithPropertiesThenSamplerIsCreatedAndSuccessIsReturned) { cl_sampler sampler = nullptr; cl_queue_properties properties[] = { CL_SAMPLER_NORMALIZED_COORDS, 0, CL_SAMPLER_ADDRESSING_MODE, 0, CL_SAMPLER_FILTER_MODE, 0, 0}; cl_queue_properties *pProp = &properties[0]; if (NormalizdProperties) { *pProp++ = CL_SAMPLER_NORMALIZED_COORDS; *pProp++ = (cl_queue_properties)NormalizdProperties; } if (AddressingProperties) { *pProp++ = CL_SAMPLER_ADDRESSING_MODE; *pProp++ = (cl_queue_properties)AddressingProperties; } if (FilterProperties) { *pProp++ = CL_SAMPLER_FILTER_MODE; *pProp++ = (cl_queue_properties)FilterProperties; } *pProp++ = 0; sampler = clCreateSamplerWithProperties( pContext, properties, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, sampler); retVal = clReleaseSampler(sampler); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_P(clCreateSamplerWithPropertiesTests2, GivenInvalidPropertiesWhenCreatingSamplerWithPropertiesThenInvalidValueErrorIsReturned) { cl_sampler sampler = nullptr; cl_queue_properties properties[] = { CL_SAMPLER_NORMALIZED_COORDS, 0, CL_SAMPLER_NORMALIZED_COORDS, 0, 0}; cl_queue_properties *pProp = &properties[0]; if (NormalizdProperties) { *pProp++ = CL_SAMPLER_NORMALIZED_COORDS; *pProp++ = (cl_queue_properties)NormalizdProperties; *pProp++ = CL_SAMPLER_NORMALIZED_COORDS; *pProp++ = (cl_queue_properties)NormalizdProperties; *pProp++ = 0; sampler = clCreateSamplerWithProperties( pContext, properties, &retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); ASSERT_EQ(nullptr, sampler); } pProp = &properties[0]; if (AddressingProperties) { *pProp++ = CL_SAMPLER_ADDRESSING_MODE; *pProp++ = (cl_queue_properties)AddressingProperties; *pProp++ = CL_SAMPLER_ADDRESSING_MODE; *pProp++ = (cl_queue_properties)AddressingProperties; *pProp++ = 0; sampler = clCreateSamplerWithProperties( pContext, properties, &retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); ASSERT_EQ(nullptr, sampler); } pProp = &properties[0]; if (FilterProperties) { *pProp++ = CL_SAMPLER_FILTER_MODE; *pProp++ = (cl_queue_properties)FilterProperties; *pProp++ = CL_SAMPLER_FILTER_MODE; *pProp++ = (cl_queue_properties)FilterProperties; *pProp++ = 0; sampler = clCreateSamplerWithProperties( pContext, properties, &retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); ASSERT_EQ(nullptr, sampler); } } static cl_sampler_properties NormalizdProperties[] = { CL_TRUE, CL_FALSE, }; static cl_sampler_properties AddressingProperties[] = { CL_ADDRESS_NONE, CL_ADDRESS_CLAMP_TO_EDGE, CL_ADDRESS_CLAMP, CL_ADDRESS_REPEAT, CL_ADDRESS_MIRRORED_REPEAT, }; static cl_sampler_properties FilterProperties[] = { CL_FILTER_NEAREST, CL_FILTER_LINEAR, }; INSTANTIATE_TEST_CASE_P(api, clCreateSamplerWithPropertiesTests2, ::testing::Combine( ::testing::ValuesIn(NormalizdProperties), ::testing::ValuesIn(AddressingProperties), ::testing::ValuesIn(FilterProperties))); TEST_F(clCreateSamplerWithPropertiesTests, GivenMipMapDataWhenCreatingSamplerWithPropertiesThenSamplerIsCreatedAndCorrectlyPopulated) { SamplerLodProperty minLodProperty; SamplerLodProperty maxLodProperty; minLodProperty.lod = 2.0f; maxLodProperty.lod = 3.0f; cl_sampler_properties mipMapFilteringMode = CL_FILTER_LINEAR; cl_sampler_properties properties[] = { CL_SAMPLER_NORMALIZED_COORDS, 0, CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_NONE, CL_SAMPLER_FILTER_MODE, CL_FILTER_LINEAR, CL_SAMPLER_MIP_FILTER_MODE, mipMapFilteringMode, CL_SAMPLER_LOD_MIN, minLodProperty.data, CL_SAMPLER_LOD_MAX, maxLodProperty.data, 0}; cl_sampler clSampler = clCreateSamplerWithProperties( pContext, properties, &retVal); auto sampler = castToObject(clSampler); ASSERT_NE(nullptr, sampler); EXPECT_EQ(mipMapFilteringMode, sampler->mipFilterMode); EXPECT_EQ(minLodProperty.lod, sampler->lodMin); EXPECT_EQ(maxLodProperty.lod, sampler->lodMax); clReleaseSampler(sampler); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_create_sub_buffer_tests.inl000066400000000000000000000161031422164147700304520ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/context/context.h" #include "cl_api_tests.h" using namespace NEO; namespace ClCreateSubbufferTests { template class clCreateSubBufferTemplateTests : public ApiFixture<>, public testing::TestWithParam { void SetUp() override { ApiFixture::SetUp(); cl_mem_flags flg = parentFlags; void *ptr = nullptr; if (hasHostPtr == true) { flg |= CL_MEM_USE_HOST_PTR; ptr = pHostPtr; } buffer = clCreateBuffer(pContext, flg, 64, ptr, &retVal); EXPECT_NE(nullptr, buffer); EXPECT_EQ(CL_SUCCESS, retVal); } void TearDown() override { clReleaseMemObject(buffer); ApiFixture::TearDown(); } protected: cl_mem buffer; cl_uchar pHostPtr[64]; }; struct clCreateSubBufferValidFlagsNoHostPtrTests : public clCreateSubBufferTemplateTests { }; TEST_P(clCreateSubBufferValidFlagsNoHostPtrTests, GivenValidFlagsWhenCreatingSubBufferThenSubBufferIsCreatedAndSuccessIsReturned) { cl_buffer_region region = {0, 12}; cl_mem_flags flags = GetParam(); auto subBuffer = clCreateSubBuffer(buffer, flags, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &retVal); EXPECT_NE(nullptr, subBuffer); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseMemObject(subBuffer); }; static cl_mem_flags validFlags[] = { CL_MEM_READ_WRITE, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY, CL_MEM_HOST_READ_ONLY, CL_MEM_HOST_WRITE_ONLY, CL_MEM_HOST_NO_ACCESS, }; INSTANTIATE_TEST_CASE_P( CreateSubBufferCheckFlags, clCreateSubBufferValidFlagsNoHostPtrTests, testing::ValuesIn(validFlags)); struct clCreateSubBufferInvalidFlagsHostPtrTests : public clCreateSubBufferTemplateTests { }; TEST_P(clCreateSubBufferInvalidFlagsHostPtrTests, GivenInvalidFlagsWhenCreatingSubBufferThenInvalidValueErrorIsReturned) { cl_buffer_region region = {4, 12}; cl_mem_flags flags = GetParam(); auto subBuffer = clCreateSubBuffer(buffer, flags, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &retVal); EXPECT_EQ(nullptr, subBuffer); EXPECT_EQ(CL_INVALID_VALUE, retVal); }; cl_mem_flags invalidFlags[] = { CL_MEM_READ_WRITE, CL_MEM_WRITE_ONLY, CL_MEM_HOST_WRITE_ONLY, CL_MEM_HOST_READ_ONLY, CL_MEM_USE_HOST_PTR, CL_MEM_ALLOC_HOST_PTR, CL_MEM_COPY_HOST_PTR, 0xffcc, }; INSTANTIATE_TEST_CASE_P( CreateSubBufferCheckFlags, clCreateSubBufferInvalidFlagsHostPtrTests, testing::ValuesIn(invalidFlags)); class clCreateSubBufferTests : public api_tests { void SetUp() override { api_tests::SetUp(); cl_mem_flags flg = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_NO_ACCESS; buffer = clCreateBuffer(pContext, flg, 64, pHostPtr, &retVal); EXPECT_NE(nullptr, buffer); EXPECT_EQ(CL_SUCCESS, retVal); } void TearDown() override { clReleaseMemObject(buffer); ApiFixture::TearDown(); } protected: cl_mem buffer; cl_uchar pHostPtr[64]; }; TEST_F(clCreateSubBufferTests, GivenInBoundsRegionWhenCreatingSubBufferThenSubBufferIsCreatedAndSuccessIsReturned) { cl_buffer_region region = {0, 12}; auto subBuffer = clCreateSubBuffer(buffer, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &retVal); EXPECT_NE(nullptr, subBuffer); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseMemObject(subBuffer); } TEST_F(clCreateSubBufferTests, GivenOutOfBoundsRegionWhenCreatingSubBufferThenInvalidValueErrorIsReturned) { cl_buffer_region region = {4, 68}; auto subBuffer = clCreateSubBuffer(buffer, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &retVal); EXPECT_EQ(nullptr, subBuffer); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clCreateSubBufferTests, GivenSubBufferAsBufferWhenCreatingSubBufferThenInvalidMemObjectErrorIsReturned) { cl_buffer_region region0 = {0, 60}; cl_buffer_region region1 = {8, 20}; auto subBuffer = clCreateSubBuffer(buffer, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, ®ion0, &retVal); EXPECT_NE(nullptr, subBuffer); EXPECT_EQ(CL_SUCCESS, retVal); auto subsubBuffer = clCreateSubBuffer(subBuffer, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, ®ion1, &retVal); EXPECT_EQ(nullptr, subsubBuffer); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); clReleaseMemObject(subBuffer); } TEST_F(clCreateSubBufferTests, GivenInvalidBufferObjectWhenCreatingSubBufferThenInvalidMemObjectErrorIsReturned) { cl_buffer_region region = {4, 60}; cl_int trash[] = {0x01, 0x08, 0x88, 0xcc, 0xab, 0x55}; auto subBuffer = clCreateSubBuffer(reinterpret_cast(trash), CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &retVal); EXPECT_EQ(nullptr, subBuffer); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(clCreateSubBufferTests, GivenInvalidOffsetWhenCreatingSubBufferThenMisalignedSubBufferOffsetErrorIsReturned) { cl_buffer_region region = {1, 60}; auto subBuffer = clCreateSubBuffer(buffer, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &retVal); EXPECT_EQ(nullptr, subBuffer); EXPECT_EQ(CL_MISALIGNED_SUB_BUFFER_OFFSET, retVal); } TEST_F(clCreateSubBufferTests, GivenNoRegionWhenCreatingSubBufferThenInvalidValueErrorIsReturned) { cl_buffer_region region = {4, 60}; auto subBuffer = clCreateSubBuffer(buffer, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, nullptr, &retVal); EXPECT_EQ(nullptr, subBuffer); EXPECT_EQ(CL_INVALID_VALUE, retVal); subBuffer = clCreateSubBuffer(buffer, CL_MEM_READ_WRITE, 0, ®ion, &retVal); EXPECT_EQ(nullptr, subBuffer); EXPECT_EQ(CL_INVALID_VALUE, retVal); subBuffer = clCreateSubBuffer(buffer, CL_MEM_READ_WRITE, 0, nullptr, &retVal); EXPECT_EQ(nullptr, subBuffer); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clCreateSubBufferTests, GivenBufferWithFlagsWhenCreatingSubBufferThenFlagsAreInherited) { cl_buffer_region region = {0, 60}; auto subBuffer = clCreateSubBuffer(buffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &retVal); EXPECT_NE(nullptr, subBuffer); EXPECT_EQ(CL_SUCCESS, retVal); cl_mem_flags retFlag; size_t retSZ; retVal = clGetMemObjectInfo(subBuffer, CL_MEM_FLAGS, sizeof(cl_mem_flags), &retFlag, &retSZ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_mem_flags), retSZ); EXPECT_EQ(static_cast(CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_NO_ACCESS), retFlag); clReleaseMemObject(subBuffer); } } // namespace ClCreateSubbufferTests compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_create_sub_devices_tests.inl000066400000000000000000000456151422164147700306350ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/api/api.h" #include using namespace NEO; namespace ULT { struct clCreateSubDevicesTests : ::testing::Test { DebugManagerStateRestore restorer; VariableBackup mockDeviceCreateSingleDeviceBackup{&MockDevice::createSingleDevice}; std::unique_ptr device; cl_device_partition_property properties[3] = {CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, CL_DEVICE_AFFINITY_DOMAIN_NUMA, 0}; cl_uint outDevicesCount; cl_device_id outDevices[4]; void setup(int numberOfDevices) { DebugManager.flags.CreateMultipleSubDevices.set(numberOfDevices); mockDeviceCreateSingleDeviceBackup = (numberOfDevices == 1); device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); outDevicesCount = numberOfDevices; } }; TEST_F(clCreateSubDevicesTests, GivenInvalidDeviceWhenCreatingSubDevicesThenInvalidDeviceErrorIsReturned) { auto retVal = clCreateSubDevices( nullptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_DEVICE, retVal); } TEST_F(clCreateSubDevicesTests, GivenDeviceWithoutSubDevicesWhenCreatingSubDevicesThenDevicePartitionFailedErrorIsReturned) { setup(1); EXPECT_EQ(0u, device->getNumGenericSubDevices()); cl_int retVal = CL_SUCCESS; if (device->getNumSubDevices() > 0) { retVal = clCreateSubDevices(device->getSubDevice(0), nullptr, 0, nullptr, nullptr); } else { retVal = clCreateSubDevices(device.get(), nullptr, 0, nullptr, nullptr); } EXPECT_EQ(CL_DEVICE_PARTITION_FAILED, retVal); } TEST_F(clCreateSubDevicesTests, GivenInvalidOrUnsupportedPropertiesWhenCreatingSubDevicesThenInvalidValueErrorIsReturned) { setup(2); auto retVal = clCreateSubDevices(device.get(), nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); properties[0] = 0; retVal = clCreateSubDevices(device.get(), properties, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); properties[0] = CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN; properties[1] = 0; retVal = clCreateSubDevices(device.get(), properties, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); properties[1] = CL_DEVICE_AFFINITY_DOMAIN_NUMA; properties[2] = CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN; retVal = clCreateSubDevices(device.get(), properties, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clCreateSubDevicesTests, GivenOutDevicesNullWhenCreatingSubDevicesThenSuccessIsReturned) { setup(2); cl_uint returnedOutDeviceCount = 0; auto retVal = clCreateSubDevices(device.get(), properties, 0, nullptr, &returnedOutDeviceCount); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2u, returnedOutDeviceCount); } TEST_F(clCreateSubDevicesTests, GivenOutDevicesTooSmallWhenCreatingSubDevicesThenInvalidValueErrorIsReturned) { setup(2); outDevicesCount = 1; auto retVal = clCreateSubDevices(device.get(), properties, outDevicesCount, outDevices, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clCreateSubDevicesTests, GivenValidInputWhenCreatingSubDevicesThenSubDevicesAreReturned) { setup(2); auto retVal = clCreateSubDevices(device.get(), properties, outDevicesCount, outDevices, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(device->getSubDevice(0), outDevices[0]); EXPECT_EQ(device->getSubDevice(1), outDevices[1]); properties[1] = CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE; cl_device_id outDevices2[2]; retVal = clCreateSubDevices(device.get(), properties, outDevicesCount, outDevices2, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(outDevices[0], outDevices2[0]); EXPECT_EQ(outDevices[1], outDevices2[1]); } TEST_F(clCreateSubDevicesTests, GivenValidInputWhenCreatingSubDevicesThenDeviceApiReferenceCountIsIncreasedEveryTime) { setup(2); EXPECT_EQ(0, device->getSubDevice(0)->getRefApiCount()); EXPECT_EQ(0, device->getSubDevice(1)->getRefApiCount()); auto retVal = clCreateSubDevices(device.get(), properties, outDevicesCount, outDevices, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1, device->getSubDevice(0)->getRefApiCount()); EXPECT_EQ(1, device->getSubDevice(1)->getRefApiCount()); retVal = clCreateSubDevices(device.get(), properties, outDevicesCount, outDevices, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2, device->getSubDevice(0)->getRefApiCount()); EXPECT_EQ(2, device->getSubDevice(1)->getRefApiCount()); } struct clCreateSubDevicesDeviceInfoTests : clCreateSubDevicesTests { void setup(int numberOfDevices) { clCreateSubDevicesTests::setup(numberOfDevices); expectedSubDeviceParentDevice = device.get(); expectedRootDevicePartitionMaxSubDevices = numberOfDevices; } cl_device_id expectedRootDeviceParentDevice = nullptr; cl_device_affinity_domain expectedRootDevicePartitionAffinityDomain = CL_DEVICE_AFFINITY_DOMAIN_NUMA | CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE; cl_uint expectedRootDevicePartitionMaxSubDevices; cl_device_partition_property expectedRootDevicePartitionProperties[2] = {CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, 0}; cl_device_partition_property expectedRootDevicePartitionType[1] = {0}; cl_device_id expectedSubDeviceParentDevice; cl_device_affinity_domain expectedSubDevicePartitionAffinityDomain = 0; cl_uint expectedSubDevicePartitionMaxSubDevices = 0; cl_device_partition_property expectedSubDevicePartitionProperties[1] = {0}; cl_device_partition_property expectedSubDevicePartitionType[3] = {CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, CL_DEVICE_AFFINITY_DOMAIN_NUMA, 0}; cl_device_id expectedRootDeviceWithoutSubDevicesParentDevice = nullptr; cl_device_affinity_domain expectedRootDeviceWithoutSubDevicesPartitionAffinityDomain = 0; cl_uint expectedRootDeviceWithoutSubDevicesPartitionMaxSubDevices = 0; cl_device_partition_property expectedRootDeviceWithoutSubDevicesPartitionProperties[1] = {0}; cl_device_partition_property expectedRootDeviceWithoutSubDevicesPartitionType[1] = {0}; cl_device_id parentDevice; cl_device_affinity_domain partitionAffinityDomain; cl_uint partitionMaxSubDevices; cl_device_partition_property partitionProperties[2]; cl_device_partition_property partitionType[3]; size_t returnValueSize; }; TEST_F(clCreateSubDevicesDeviceInfoTests, WhenGettingSubDeviceRelatedDeviceInfoThenCorrectValuesAreSet) { setup(4); auto &rootDeviceInfo = device->getDeviceInfo(); EXPECT_EQ(expectedRootDeviceParentDevice, rootDeviceInfo.parentDevice); EXPECT_EQ(expectedRootDevicePartitionAffinityDomain, rootDeviceInfo.partitionAffinityDomain); EXPECT_EQ(expectedRootDevicePartitionMaxSubDevices, rootDeviceInfo.partitionMaxSubDevices); EXPECT_EQ(expectedRootDevicePartitionProperties[0], rootDeviceInfo.partitionProperties[0]); EXPECT_EQ(expectedRootDevicePartitionProperties[1], rootDeviceInfo.partitionProperties[1]); EXPECT_EQ(expectedRootDevicePartitionType[0], rootDeviceInfo.partitionType[0]); auto retVal = clCreateSubDevices(device.get(), properties, outDevicesCount, outDevices, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); for (auto outDevice : outDevices) { auto &subDevice = *castToObject(outDevice); auto &subDeviceInfo = subDevice.getDeviceInfo(); EXPECT_EQ(expectedSubDeviceParentDevice, subDeviceInfo.parentDevice); if (subDevice.getNumSubDevices() > 0) { for (uint32_t i = 0; i < subDevice.getNumSubDevices(); i++) { auto subSubDevice = subDevice.getSubDevice(i); auto &subSubDeviceInfo = subSubDevice->getDeviceInfo(); EXPECT_EQ(expectedSubDevicePartitionAffinityDomain, subSubDeviceInfo.partitionAffinityDomain); EXPECT_EQ(expectedSubDevicePartitionMaxSubDevices, subSubDeviceInfo.partitionMaxSubDevices); EXPECT_EQ(expectedSubDevicePartitionProperties[0], subSubDeviceInfo.partitionProperties[0]); EXPECT_EQ(expectedSubDevicePartitionType[0], subSubDeviceInfo.partitionType[0]); EXPECT_EQ(expectedSubDevicePartitionType[1], subSubDeviceInfo.partitionType[1]); EXPECT_EQ(expectedSubDevicePartitionType[2], subSubDeviceInfo.partitionType[2]); } EXPECT_NE(expectedSubDevicePartitionAffinityDomain, subDeviceInfo.partitionAffinityDomain); EXPECT_NE(expectedSubDevicePartitionMaxSubDevices, subDeviceInfo.partitionMaxSubDevices); EXPECT_NE(expectedSubDevicePartitionProperties[0], subDeviceInfo.partitionProperties[0]); EXPECT_EQ(expectedSubDevicePartitionType[0], subDeviceInfo.partitionType[0]); EXPECT_EQ(expectedSubDevicePartitionType[1], subDeviceInfo.partitionType[1]); EXPECT_EQ(expectedSubDevicePartitionType[2], subDeviceInfo.partitionType[2]); } else { EXPECT_EQ(expectedSubDevicePartitionAffinityDomain, subDeviceInfo.partitionAffinityDomain); EXPECT_EQ(expectedSubDevicePartitionMaxSubDevices, subDeviceInfo.partitionMaxSubDevices); EXPECT_EQ(expectedSubDevicePartitionProperties[0], subDeviceInfo.partitionProperties[0]); EXPECT_EQ(expectedSubDevicePartitionType[0], subDeviceInfo.partitionType[0]); EXPECT_EQ(expectedSubDevicePartitionType[1], subDeviceInfo.partitionType[1]); EXPECT_EQ(expectedSubDevicePartitionType[2], subDeviceInfo.partitionType[2]); } } } TEST_F(clCreateSubDevicesDeviceInfoTests, GivenRootDeviceWithoutSubDevicesWhenGettingSubDeviceRelatedDeviceInfoThenCorrectValuesAreSet) { setup(1); auto &rootDeviceInfo = device->getDeviceInfo(); EXPECT_EQ(expectedRootDeviceWithoutSubDevicesParentDevice, rootDeviceInfo.parentDevice); if (expectedRootDeviceWithoutSubDevicesPartitionAffinityDomain != rootDeviceInfo.partitionAffinityDomain) { EXPECT_EQ(0u, device->getNumGenericSubDevices()); EXPECT_NE(0u, device->getNumSubDevices()); EXPECT_EQ(expectedRootDeviceWithoutSubDevicesPartitionAffinityDomain, device->getSubDevice(0)->getDeviceInfo().partitionAffinityDomain); EXPECT_EQ(expectedRootDeviceWithoutSubDevicesPartitionMaxSubDevices, device->getSubDevice(0)->getDeviceInfo().partitionMaxSubDevices); EXPECT_EQ(expectedRootDeviceWithoutSubDevicesPartitionProperties[0], device->getSubDevice(0)->getDeviceInfo().partitionProperties[0]); EXPECT_NE(expectedRootDeviceWithoutSubDevicesPartitionType[0], device->getSubDevice(0)->getDeviceInfo().partitionType[0]); } else { EXPECT_EQ(0u, device->getNumGenericSubDevices()); EXPECT_EQ(0u, device->getNumSubDevices()); EXPECT_EQ(expectedRootDeviceWithoutSubDevicesPartitionMaxSubDevices, rootDeviceInfo.partitionMaxSubDevices); EXPECT_EQ(expectedRootDeviceWithoutSubDevicesPartitionProperties[0], rootDeviceInfo.partitionProperties[0]); EXPECT_EQ(expectedRootDeviceWithoutSubDevicesPartitionType[0], rootDeviceInfo.partitionType[0]); } } TEST_F(clCreateSubDevicesDeviceInfoTests, WhenGettingSubDeviceRelatedDeviceInfoViaApiThenCorrectValuesAreSet) { setup(4); size_t partitionPropertiesReturnValueSize = 0; size_t partitionTypeReturnValueSize = 0; clGetDeviceInfo(device.get(), CL_DEVICE_PARENT_DEVICE, sizeof(parentDevice), &parentDevice, nullptr); EXPECT_EQ(expectedRootDeviceParentDevice, parentDevice); clGetDeviceInfo(device.get(), CL_DEVICE_PARTITION_AFFINITY_DOMAIN, sizeof(partitionAffinityDomain), &partitionAffinityDomain, nullptr); EXPECT_EQ(expectedRootDevicePartitionAffinityDomain, partitionAffinityDomain); clGetDeviceInfo(device.get(), CL_DEVICE_PARTITION_MAX_SUB_DEVICES, sizeof(partitionMaxSubDevices), &partitionMaxSubDevices, nullptr); EXPECT_EQ(expectedRootDevicePartitionMaxSubDevices, partitionMaxSubDevices); clGetDeviceInfo(device.get(), CL_DEVICE_PARTITION_PROPERTIES, sizeof(partitionProperties), &partitionProperties, &partitionPropertiesReturnValueSize); EXPECT_EQ(sizeof(expectedRootDevicePartitionProperties), partitionPropertiesReturnValueSize); EXPECT_EQ(expectedRootDevicePartitionProperties[0], partitionProperties[0]); EXPECT_EQ(expectedRootDevicePartitionProperties[1], partitionProperties[1]); clGetDeviceInfo(device.get(), CL_DEVICE_PARTITION_TYPE, sizeof(partitionType), &partitionType, &partitionTypeReturnValueSize); EXPECT_EQ(sizeof(expectedRootDevicePartitionType), partitionTypeReturnValueSize); EXPECT_EQ(expectedRootDevicePartitionType[0], partitionType[0]); auto retVal = clCreateSubDevices(device.get(), properties, outDevicesCount, outDevices, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); for (auto subDevice : outDevices) { auto neoSubDevice = castToObject(subDevice); ASSERT_NE(nullptr, neoSubDevice); bool hasSubDevices = neoSubDevice->getNumSubDevices() > 0; clGetDeviceInfo(subDevice, CL_DEVICE_PARENT_DEVICE, sizeof(parentDevice), &parentDevice, nullptr); clGetDeviceInfo(subDevice, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, sizeof(partitionAffinityDomain), &partitionAffinityDomain, nullptr); clGetDeviceInfo(subDevice, CL_DEVICE_PARTITION_MAX_SUB_DEVICES, sizeof(partitionMaxSubDevices), &partitionMaxSubDevices, nullptr); clGetDeviceInfo(subDevice, CL_DEVICE_PARTITION_PROPERTIES, sizeof(partitionProperties), &partitionProperties, &partitionPropertiesReturnValueSize); clGetDeviceInfo(subDevice, CL_DEVICE_PARTITION_TYPE, sizeof(partitionType), &partitionType, &partitionTypeReturnValueSize); EXPECT_EQ(expectedSubDeviceParentDevice, parentDevice); if (hasSubDevices) { EXPECT_NE(expectedSubDevicePartitionAffinityDomain, partitionAffinityDomain); EXPECT_NE(expectedSubDevicePartitionMaxSubDevices, partitionMaxSubDevices); EXPECT_NE(sizeof(expectedSubDevicePartitionProperties), partitionPropertiesReturnValueSize); EXPECT_NE(expectedSubDevicePartitionProperties[0], partitionProperties[0]); EXPECT_EQ(sizeof(expectedSubDevicePartitionType), partitionTypeReturnValueSize); EXPECT_EQ(expectedSubDevicePartitionType[0], partitionType[0]); EXPECT_EQ(expectedSubDevicePartitionType[1], partitionType[1]); EXPECT_EQ(expectedSubDevicePartitionType[2], partitionType[2]); auto neoSubDevice = castToObject(subDevice); ASSERT_NE(nullptr, neoSubDevice); EXPECT_NE(0u, neoSubDevice->getNumSubDevices()); cl_device_id clSubSubDevice = neoSubDevice->getSubDevice(0); clGetDeviceInfo(clSubSubDevice, CL_DEVICE_PARENT_DEVICE, sizeof(parentDevice), &parentDevice, nullptr); EXPECT_EQ(subDevice, parentDevice); clGetDeviceInfo(clSubSubDevice, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, sizeof(partitionAffinityDomain), &partitionAffinityDomain, nullptr); EXPECT_EQ(expectedSubDevicePartitionAffinityDomain, partitionAffinityDomain); clGetDeviceInfo(clSubSubDevice, CL_DEVICE_PARTITION_MAX_SUB_DEVICES, sizeof(partitionMaxSubDevices), &partitionMaxSubDevices, nullptr); EXPECT_EQ(expectedSubDevicePartitionMaxSubDevices, partitionMaxSubDevices); clGetDeviceInfo(clSubSubDevice, CL_DEVICE_PARTITION_PROPERTIES, sizeof(partitionProperties), &partitionProperties, &partitionPropertiesReturnValueSize); EXPECT_EQ(sizeof(expectedSubDevicePartitionProperties), partitionPropertiesReturnValueSize); EXPECT_EQ(expectedSubDevicePartitionProperties[0], partitionProperties[0]); clGetDeviceInfo(clSubSubDevice, CL_DEVICE_PARTITION_TYPE, sizeof(partitionType), &partitionType, &partitionTypeReturnValueSize); EXPECT_EQ(sizeof(expectedSubDevicePartitionType), partitionTypeReturnValueSize); EXPECT_EQ(expectedSubDevicePartitionType[0], partitionType[0]); EXPECT_EQ(expectedSubDevicePartitionType[1], partitionType[1]); EXPECT_EQ(expectedSubDevicePartitionType[2], partitionType[2]); } else { EXPECT_EQ(expectedSubDeviceParentDevice, parentDevice); EXPECT_EQ(expectedSubDevicePartitionAffinityDomain, partitionAffinityDomain); EXPECT_EQ(expectedSubDevicePartitionMaxSubDevices, partitionMaxSubDevices); EXPECT_EQ(sizeof(expectedSubDevicePartitionProperties), partitionPropertiesReturnValueSize); EXPECT_EQ(expectedSubDevicePartitionProperties[0], partitionProperties[0]); EXPECT_EQ(sizeof(expectedSubDevicePartitionType), partitionTypeReturnValueSize); EXPECT_EQ(expectedSubDevicePartitionType[0], partitionType[0]); EXPECT_EQ(expectedSubDevicePartitionType[1], partitionType[1]); EXPECT_EQ(expectedSubDevicePartitionType[2], partitionType[2]); } } } TEST_F(clCreateSubDevicesDeviceInfoTests, GivenRootDeviceWithoutSubDevicesWhenGettingSubDeviceRelatedDeviceInfoViaApiThenCorrectValuesAreSet) { DebugManager.flags.EngineInstancedSubDevices.set(false); setup(1); clGetDeviceInfo(device.get(), CL_DEVICE_PARENT_DEVICE, sizeof(parentDevice), &parentDevice, nullptr); EXPECT_EQ(expectedRootDeviceWithoutSubDevicesParentDevice, parentDevice); clGetDeviceInfo(device.get(), CL_DEVICE_PARTITION_AFFINITY_DOMAIN, sizeof(partitionAffinityDomain), &partitionAffinityDomain, nullptr); EXPECT_EQ(expectedRootDeviceWithoutSubDevicesPartitionAffinityDomain, partitionAffinityDomain); clGetDeviceInfo(device.get(), CL_DEVICE_PARTITION_MAX_SUB_DEVICES, sizeof(partitionMaxSubDevices), &partitionMaxSubDevices, nullptr); EXPECT_EQ(expectedRootDeviceWithoutSubDevicesPartitionMaxSubDevices, partitionMaxSubDevices); clGetDeviceInfo(device.get(), CL_DEVICE_PARTITION_PROPERTIES, sizeof(partitionProperties), &partitionProperties, &returnValueSize); EXPECT_EQ(sizeof(expectedRootDeviceWithoutSubDevicesPartitionProperties), returnValueSize); EXPECT_EQ(expectedRootDeviceWithoutSubDevicesPartitionProperties[0], partitionProperties[0]); clGetDeviceInfo(device.get(), CL_DEVICE_PARTITION_TYPE, sizeof(partitionType), &partitionType, &returnValueSize); EXPECT_EQ(sizeof(expectedRootDeviceWithoutSubDevicesPartitionType), returnValueSize); EXPECT_EQ(expectedRootDeviceWithoutSubDevicesPartitionType[0], partitionType[0]); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_create_user_event_tests.inl000066400000000000000000000101601422164147700305040ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/context/context.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clCreateUserEventTests; namespace ULT { TEST_F(clCreateUserEventTests, GivenValidContextWhenCreatingUserEventThenEventIsCreated) { auto userEvent = clCreateUserEvent( pContext, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, userEvent); retVal = clReleaseEvent(userEvent); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateUserEventTests, GivenNullContextWhenCreatingUserEventThenInvalidContextErrorIsReturned) { auto userEvent = clCreateUserEvent( nullptr, &retVal); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); EXPECT_EQ(nullptr, userEvent); } TEST_F(clCreateUserEventTests, GivenCorrectUserEventWhenGetingEventInfoThenClCommandUserCmdTypeIsReturned) { auto userEvent = clCreateUserEvent( pContext, &retVal); size_t retSize; retVal = clGetEventInfo(userEvent, CL_EVENT_COMMAND_QUEUE, 0, nullptr, &retSize); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_command_queue), retSize); auto cmdQueue = reinterpret_cast(static_cast(0xdeadbeaf)); retVal = clGetEventInfo(userEvent, CL_EVENT_COMMAND_QUEUE, retSize, &cmdQueue, 0); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, cmdQueue); retVal = clGetEventInfo(userEvent, CL_EVENT_COMMAND_TYPE, 0, nullptr, &retSize); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_event_info), retSize); auto cmd_type = CL_COMMAND_SVM_UNMAP; retVal = clGetEventInfo(userEvent, CL_EVENT_COMMAND_TYPE, retSize, &cmd_type, 0); EXPECT_EQ(CL_COMMAND_USER, cmd_type); retVal = clReleaseEvent(userEvent); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateUserEventTests, GivenUserEventStatusSetToCompleteWhenGettingEventInfoThenStatusIsSetToCompleteAndSuccessReturned) { auto userEvent = clCreateUserEvent( pContext, &retVal); retVal = clSetUserEventStatus(userEvent, CL_COMPLETE); ASSERT_EQ(CL_SUCCESS, retVal); size_t retSize; retVal = clGetEventInfo(userEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, 0, nullptr, &retSize); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_int), retSize); auto status = CL_SUBMITTED; retVal = clGetEventInfo(userEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, retSize, &status, 0); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(CL_COMPLETE, status); retVal = clReleaseEvent(userEvent); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateUserEventTests, GivenValidUserEventWhenGettingContextThenValidContextAndSuccessIsReturned) { auto userEvent = clCreateUserEvent( pContext, &retVal); size_t retSize; retVal = clGetEventInfo(userEvent, CL_EVENT_CONTEXT, 0, nullptr, &retSize); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_context), retSize); cl_context oclContext; retVal = clGetEventInfo(userEvent, CL_EVENT_CONTEXT, retSize, &oclContext, 0); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(oclContext, pContext); retVal = clReleaseEvent(userEvent); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateUserEventTests, GivenCompleteUserEventWhenWaitingForUserEventThenReturnIsImmediate) { auto userEvent = clCreateUserEvent( pContext, &retVal); retVal = clSetUserEventStatus(userEvent, CL_COMPLETE); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clWaitForEvents(1, &userEvent); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clReleaseEvent(userEvent); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateUserEventTests, GivenUserEventWithErrorStatusWhenWaitingForUserEventThenClExecStatusErrorForEventsInWaitListErrorIsReturned) { auto userEvent = clCreateUserEvent( pContext, &retVal); retVal = clSetUserEventStatus(userEvent, -1); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clWaitForEvents(1, &userEvent); ASSERT_EQ(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST, retVal); retVal = clReleaseEvent(userEvent); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_enqueue_barrier_tests.inl000066400000000000000000000016361422164147700301670ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueBarrierTests; TEST_F(clEnqueueBarrierTests, GivenNullCommandQueueWhenEnqueuingThenInvalidCommandQueueErrorIsReturned) { auto retVal = clEnqueueBarrier( nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(clEnqueueBarrierTests, GivenValidCommandQueueWhenEnqueuingBarrierThenSuccessIsReturned) { auto retVal = clEnqueueBarrier( pCommandQueue); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueBarrierTests, GivenQueueIncapableWhenEnqueuingBarrierThenInvalidOperationIsReturned) { this->disableQueueCapabilities(CL_QUEUE_CAPABILITY_BARRIER_INTEL); auto retVal = clEnqueueBarrier( pCommandQueue); EXPECT_EQ(CL_INVALID_OPERATION, retVal); } compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_enqueue_barrier_with_wait_list_tests.inl000066400000000000000000000022441422164147700332750ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueBarrierWithWaitListTests; TEST_F(clEnqueueBarrierWithWaitListTests, GivenNullCommandQueueWhenEnqueuingBarrierWithWaitListThenInvalidCommandQueueErrorIsReturned) { auto retVal = clEnqueueBarrierWithWaitList( nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(clEnqueueBarrierWithWaitListTests, GivenValidCommandQueueWhenEnqueuingBarrierWithWaitListThenSuccessIsReturned) { auto retVal = clEnqueueBarrierWithWaitList( pCommandQueue, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueBarrierWithWaitListTests, GivenQueueIncapableWhenEnqueuingBarrierWithWaitListThenInvalidOperationIsReturned) { this->disableQueueCapabilities(CL_QUEUE_CAPABILITY_BARRIER_INTEL); auto retVal = clEnqueueBarrierWithWaitList( pCommandQueue, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_OPERATION, retVal); } compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_enqueue_copy_buffer_rect_tests.inl000066400000000000000000000211501422164147700320520ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/ptr_math.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "cl_api_tests.h" using namespace NEO; struct clEnqueueCopyBufferRectTests : public ApiFixture<0>, ::testing::Test { void SetUp() override { ApiFixture::SetUp(); } void TearDown() override { ApiFixture::TearDown(); } }; namespace ULT { TEST_F(clEnqueueCopyBufferRectTests, GivenCorrectParametersWhenEnqueingCopyBufferRectThenSuccessIsReturned) { MockBuffer srcBuffer; MockBuffer dstBuffer; size_t srcOrigin[] = {0, 0, 0}; size_t dstOrigin[] = {0, 0, 0}; size_t region[] = {10, 10, 1}; auto retVal = clEnqueueCopyBufferRect( pCommandQueue, &srcBuffer, //srcBuffer &dstBuffer, //dstBuffer srcOrigin, dstOrigin, region, 10, //srcRowPitch 0, //srcSlicePitch 10, //dstRowPitch 0, //dstSlicePitch 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueCopyBufferRectTests, GivenNullCommandQueueWhenEnqueingCopyBufferRectThenInvalidCommandQueueErrorIsReturned) { auto retVal = clEnqueueCopyBufferRect( nullptr, //command_queue nullptr, //srcBuffer nullptr, //dstBuffer nullptr, //srcOrigin nullptr, //dstOrigin nullptr, //retion 0, //srcRowPitch 0, //srcSlicePitch 0, //dstRowPitch 0, //dstSlicePitch 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(clEnqueueCopyBufferRectTests, GivenQueueIncapableWhenEnqueingCopyBufferRectThenInvalidOperationIsReturned) { MockBuffer srcBuffer; MockBuffer dstBuffer; size_t srcOrigin[] = {0, 0, 0}; size_t dstOrigin[] = {0, 0, 0}; size_t region[] = {10, 10, 1}; this->disableQueueCapabilities(CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_RECT_INTEL); auto retVal = clEnqueueCopyBufferRect( pCommandQueue, &srcBuffer, //srcBuffer &dstBuffer, //dstBuffer srcOrigin, dstOrigin, region, 10, //srcRowPitch 0, //srcSlicePitch 10, //dstRowPitch 0, //dstSlicePitch 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_INVALID_OPERATION, retVal); } TEST_F(clEnqueueCopyBufferRectTests, givenPitchesEqualZeroAndZerosInRegionWhenCallClEnqueueCopyBufferRectThenClInvalidValueIsReturned) { MockBuffer srcBuffer; MockBuffer dstBuffer; size_t srcOrigin[] = {0, 0, 0}; size_t dstOrigin[] = {0, 0, 0}; size_t region[] = {0, 0, 0}; auto retVal = clEnqueueCopyBufferRect( pCommandQueue, &srcBuffer, //srcBuffer &dstBuffer, //dstBuffer srcOrigin, dstOrigin, region, 0, //srcRowPitch 0, //srcSlicePitch 0, //dstRowPitch 0, //dstSlicePitch 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clEnqueueCopyBufferRectTests, givenZeroInRegionWhenCallClEnqueueCopyBufferRectThenClInvalidValueIsReturned) { MockBuffer srcBuffer; MockBuffer dstBuffer; size_t srcOrigin[] = {0, 0, 0}; size_t dstOrigin[] = {0, 0, 0}; size_t region[] = {0, 0, 0}; auto retVal = clEnqueueCopyBufferRect( pCommandQueue, &srcBuffer, //srcBuffer &dstBuffer, //dstBuffer srcOrigin, dstOrigin, region, 10, //srcRowPitch 0, //srcSlicePitch 10, //dstRowPitch 0, //dstSlicePitch 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); size_t region1[] = {10, 10, 0}; retVal = clEnqueueCopyBufferRect( pCommandQueue, &srcBuffer, //srcBuffer &dstBuffer, //dstBuffer srcOrigin, dstOrigin, region1, 10, //srcRowPitch 0, //srcSlicePitch 10, //dstRowPitch 0, //dstSlicePitch 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); size_t region2[] = {10, 0, 1}; retVal = clEnqueueCopyBufferRect( pCommandQueue, &srcBuffer, //srcBuffer &dstBuffer, //dstBuffer srcOrigin, dstOrigin, region2, 10, //srcRowPitch 0, //srcSlicePitch 10, //dstRowPitch 0, //dstSlicePitch 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); size_t region3[] = {10, 10, 0}; retVal = clEnqueueCopyBufferRect( pCommandQueue, &srcBuffer, //srcBuffer &dstBuffer, //dstBuffer srcOrigin, dstOrigin, region3, 10, //srcRowPitch 0, //srcSlicePitch 10, //dstRowPitch 0, //dstSlicePitch 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clEnqueueCopyBufferRectTests, givenNonProperSrcBufferSizeWhenCallClEnqueueCopyBufferRectThenClInvalidValueIsReturned) { MockBuffer srcBuffer; srcBuffer.size = 10; MockBuffer dstBuffer; size_t srcOrigin[] = {0, 0, 0}; size_t dstOrigin[] = {0, 0, 0}; size_t region[] = {10, 10, 1}; auto retVal = clEnqueueCopyBufferRect( pCommandQueue, &srcBuffer, //srcBuffer &dstBuffer, //dstBuffer srcOrigin, dstOrigin, region, 10, //srcRowPitch 0, //srcSlicePitch 10, //dstRowPitch 0, //dstSlicePitch 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clEnqueueCopyBufferRectTests, givenNonProperDstBufferSizeWhenCallClEnqueueCopyBufferRectThenClInvalidValueIsReturned) { MockBuffer srcBuffer; MockBuffer dstBuffer; dstBuffer.size = 10; size_t srcOrigin[] = {0, 0, 0}; size_t dstOrigin[] = {0, 0, 0}; size_t region[] = {10, 10, 1}; auto retVal = clEnqueueCopyBufferRect( pCommandQueue, &srcBuffer, //srcBuffer &dstBuffer, //dstBuffer srcOrigin, dstOrigin, region, 10, //srcRowPitch 0, //srcSlicePitch 10, //dstRowPitch 0, //dstSlicePitch 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clEnqueueCopyBufferRectTests, givenPitchesEqualZeroAndNotZeroRegionWhenCallClEnqueueCopyBufferRectThenPitchIsSetBasedOnRegionAndClSuccessIsReturned) { class CommandQueueMock : public MockCommandQueue { public: CommandQueueMock(Context *context, ClDevice *device, const cl_queue_properties *props) : MockCommandQueue(context, device, props, false) {} cl_int enqueueCopyBufferRect(Buffer *srcBuffer, Buffer *dstBuffer, const size_t *srcOrigin, const size_t *dstOrigin, const size_t *region, size_t argSrcRowPitch, size_t argSrcSlicePitch, size_t argDstRowPitch, size_t argDstSlicePitch, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { srcRowPitch = argSrcRowPitch; srcSlicePitch = argSrcSlicePitch; dstRowPitch = argDstRowPitch; dstSlicePitch = argDstSlicePitch; return CL_SUCCESS; } size_t srcRowPitch; size_t srcSlicePitch; size_t dstRowPitch; size_t dstSlicePitch; }; auto commandQueue = std::make_unique(pContext, pDevice, nullptr); MockBuffer srcBuffer; MockBuffer dstBuffer; dstBuffer.size = 200; srcBuffer.size = 200; size_t srcOrigin[] = {0, 0, 0}; size_t dstOrigin[] = {0, 0, 0}; size_t region[] = {10, 20, 1}; auto retVal = clEnqueueCopyBufferRect( commandQueue.get(), &srcBuffer, //srcBuffer &dstBuffer, //dstBuffer srcOrigin, dstOrigin, region, 0, //srcRowPitch 0, //srcSlicePitch 0, //dstRowPitch 0, //dstSlicePitch 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(region[0], commandQueue->srcRowPitch); EXPECT_EQ(region[0], commandQueue->dstRowPitch); EXPECT_EQ(region[1], commandQueue->srcSlicePitch / commandQueue->srcRowPitch); EXPECT_EQ(region[1], commandQueue->dstSlicePitch / commandQueue->dstRowPitch); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_enqueue_copy_buffer_to_image_tests.inl000066400000000000000000000154761422164147700327170ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/helpers/surface_formats.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueCopyBufferToImageTests; namespace ULT { struct clEnqueueCopyBufferToImageTests : public ApiFixture<>, public ::testing::Test { void SetUp() override { ApiFixture::SetUp(); // clang-format off imageFormat.image_channel_order = CL_YUYV_INTEL; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 32; imageDesc.image_height = 32; imageDesc.image_depth = 1; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = nullptr; // clang-format on } void TearDown() override { ApiFixture::TearDown(); } cl_image_format imageFormat; cl_image_desc imageDesc; }; TEST_F(clEnqueueCopyBufferToImageTests, GivenInvalidCmdQueueWhenCopyingBufferToImageThenInvalidCommandQueueErrorIsReturned) { size_t dstOrigin[] = {0, 0, 0}; size_t region[] = {10, 10, 0}; auto retVal = clEnqueueCopyBufferToImage( nullptr, //commandQueue nullptr, //srcBuffer nullptr, //dstBuffer 0u, //src_offset dstOrigin, region, 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(clEnqueueCopyBufferToImageTests, GivenInvalidSrcBufferWhenCopyingBufferToImageThenInvalidMemObjectErrorIsReturned) { size_t dstOrigin[] = {0, 0, 0}; size_t region[] = {10, 10, 0}; auto retVal = clEnqueueCopyBufferToImage( pCommandQueue, nullptr, //srcBuffer nullptr, //dstBuffer 0u, //src_offset dstOrigin, region, 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(clEnqueueCopyBufferToImageTests, GivenValidParametersWhenCopyingBufferToImageThenSuccessIsReturned) { imageFormat.image_channel_order = CL_RGBA; cl_mem dstImage = ImageFunctions::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_WRITE, 0, &imageFormat, &imageDesc, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, dstImage); auto srcBuffer = std::unique_ptr(BufferHelper>::create(pContext)); size_t dstOrigin[] = {0, 0, 0}; size_t region[] = {10, 10, 1}; auto retVal = clEnqueueCopyBufferToImage( pCommandQueue, srcBuffer.get(), dstImage, 0u, //src_offset dstOrigin, region, 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(dstImage); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueCopyBufferToImageTests, GivenQueueIncapableWhenCopyingBufferToImageThenInvalidOperationIsReturned) { imageFormat.image_channel_order = CL_RGBA; cl_mem dstImage = ImageFunctions::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_WRITE, 0, &imageFormat, &imageDesc, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, dstImage); auto srcBuffer = std::unique_ptr(BufferHelper>::create(pContext)); size_t dstOrigin[] = {0, 0, 0}; size_t region[] = {10, 10, 1}; this->disableQueueCapabilities(CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_IMAGE_INTEL); auto retVal = clEnqueueCopyBufferToImage( pCommandQueue, srcBuffer.get(), dstImage, 0u, //src_offset dstOrigin, region, 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_INVALID_OPERATION, retVal); retVal = clReleaseMemObject(dstImage); EXPECT_EQ(CL_SUCCESS, retVal); } typedef clEnqueueCopyBufferToImageTests clEnqueueCopyBufferToImageYUV; TEST_F(clEnqueueCopyBufferToImageYUV, GivenValidYuvDstImageWhenCopyingBufferToImageThenSuccessIsReturned) { auto srcBuffer = std::unique_ptr(BufferHelper>::create(pContext)); auto dstImage = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_ONLY, 0, &imageFormat, &imageDesc, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, dstImage); const size_t origin[] = {2, 2, 0}; const size_t region[] = {2, 2, 1}; auto retVal = clEnqueueCopyBufferToImage( pCommandQueue, srcBuffer.get(), dstImage, 0, origin, region, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(dstImage); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueCopyBufferToImageYUV, GivenInvalidOriginAndYuvDstImageWhenCopyingBufferToImageThenInvalidValueErrorIsReturned) { auto srcBuffer = std::unique_ptr(BufferHelper>::create(pContext)); auto dstImage = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_ONLY, 0, &imageFormat, &imageDesc, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, dstImage); const size_t origin[] = {1, 2, 0}; const size_t region[] = {2, 2, 0}; auto retVal = clEnqueueCopyBufferToImage( pCommandQueue, srcBuffer.get(), dstImage, 0, origin, region, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clReleaseMemObject(dstImage); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueCopyBufferToImageYUV, GivenInvalidRegionAndValidYuvDstImageWhenCopyingBufferToImageThenInvalidValueErrorIsReturned) { auto srcBuffer = std::unique_ptr(BufferHelper>::create(pContext)); auto dstImage = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_ONLY, 0, &imageFormat, &imageDesc, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, dstImage); const size_t origin[] = {2, 2, 0}; const size_t region[] = {1, 2, 0}; auto retVal = clEnqueueCopyBufferToImage( pCommandQueue, srcBuffer.get(), dstImage, 0, origin, region, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clReleaseMemObject(dstImage); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_enqueue_copy_image_tests.inl000066400000000000000000000223631422164147700306550ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/ptr_math.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/surface_formats.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueCopyImageTests; namespace ULT { struct clEnqueueCopyImageTests : public ApiFixture<>, public ::testing::Test { void SetUp() override { ApiFixture::SetUp(); // clang-format off imageFormat.image_channel_order = CL_YUYV_INTEL; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 32; imageDesc.image_height = 32; imageDesc.image_depth = 1; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = nullptr; // clang-format on } void TearDown() override { ApiFixture::TearDown(); } cl_image_format imageFormat; cl_image_desc imageDesc; }; TEST_F(clEnqueueCopyImageTests, GivenNullCommandQueueWhenCopyingImageThenInvalidCommandQueueErrorIsReturned) { auto buffer = (cl_mem)ptrGarbage; retVal = clEnqueueCopyImage( nullptr, buffer, buffer, nullptr, nullptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(clEnqueueCopyImageTests, GivenNullSrcBufferWhenCopyingImageThenInvalidMemObjectErrorIsReturned) { auto buffer = (cl_mem)ptrGarbage; retVal = clEnqueueCopyImage( pCommandQueue, nullptr, buffer, nullptr, nullptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(clEnqueueCopyImageTests, GivenNullDstBufferWhenCopyingImageThenInvalidMemObjectErrorIsReturned) { auto buffer = (cl_mem)ptrGarbage; retVal = clEnqueueCopyImage( pCommandQueue, buffer, nullptr, nullptr, nullptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(clEnqueueCopyImageTests, GivenDifferentSrcAndDstImageFormatsWhenCopyingImageThenImageFormatMismatchErrorIsReturned) { imageFormat.image_channel_order = CL_RGBA; auto srcImage = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_WRITE, 0, &imageFormat, &imageDesc, nullptr, retVal); imageFormat.image_channel_order = CL_BGRA; auto dstImage = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_WRITE, 0, &imageFormat, &imageDesc, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, srcImage); EXPECT_NE(nullptr, dstImage); const size_t origin[] = {2, 2, 0}; const size_t region[] = {2, 2, 0}; auto retVal = clEnqueueCopyImage( pCommandQueue, srcImage, dstImage, origin, origin, region, 0, nullptr, nullptr); EXPECT_EQ(CL_IMAGE_FORMAT_MISMATCH, retVal); retVal = clReleaseMemObject(srcImage); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(dstImage); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueCopyImageTests, GivenValidParametersWhenCopyingImageThenSuccessIsReturned) { imageFormat.image_channel_order = CL_RGBA; auto srcImage = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_WRITE, 0, &imageFormat, &imageDesc, nullptr, retVal); auto dstImage = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_WRITE, 0, &imageFormat, &imageDesc, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, srcImage); EXPECT_NE(nullptr, dstImage); const size_t origin[] = {2, 2, 0}; const size_t region[] = {2, 2, 1}; auto retVal = clEnqueueCopyImage( pCommandQueue, srcImage, dstImage, origin, origin, region, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(srcImage); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(dstImage); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueCopyImageTests, GivenQueueIncapableWhenCopyingImageThenInvalidOperationIsReturned) { imageFormat.image_channel_order = CL_RGBA; auto srcImage = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_WRITE, 0, &imageFormat, &imageDesc, nullptr, retVal); auto dstImage = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_WRITE, 0, &imageFormat, &imageDesc, nullptr, retVal); const size_t origin[] = {2, 2, 0}; const size_t region[] = {2, 2, 1}; this->disableQueueCapabilities(CL_QUEUE_CAPABILITY_TRANSFER_IMAGE_INTEL); auto retVal = clEnqueueCopyImage( pCommandQueue, srcImage, dstImage, origin, origin, region, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_OPERATION, retVal); retVal = clReleaseMemObject(srcImage); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(dstImage); EXPECT_EQ(CL_SUCCESS, retVal); } typedef clEnqueueCopyImageTests clEnqueueCopyImageYUVTests; TEST_F(clEnqueueCopyImageYUVTests, GivenValidParametersWhenCopyingYuvImageThenSuccessIsReturned) { auto srcImage = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_ONLY, 0, &imageFormat, &imageDesc, nullptr, retVal); auto dstImage = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_ONLY, 0, &imageFormat, &imageDesc, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, srcImage); EXPECT_NE(nullptr, dstImage); const size_t origin[] = {2, 2, 0}; const size_t region[] = {2, 2, 1}; auto retVal = clEnqueueCopyImage( pCommandQueue, srcImage, dstImage, origin, origin, region, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(srcImage); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(dstImage); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueCopyImageYUVTests, GivenInvalidSrcOriginWhenCopyingYuvImageThenInvalidValueErrorIsReturned) { auto srcImage = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_ONLY, 0, &imageFormat, &imageDesc, nullptr, retVal); auto dstImage = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_ONLY, 0, &imageFormat, &imageDesc, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, srcImage); EXPECT_NE(nullptr, dstImage); const size_t srcOrigin[] = {1, 2, 0}; const size_t dstOrigin[] = {2, 2, 0}; const size_t region[] = {2, 2, 0}; auto retVal = clEnqueueCopyImage( pCommandQueue, srcImage, dstImage, srcOrigin, dstOrigin, region, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clReleaseMemObject(srcImage); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(dstImage); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueCopyImageYUVTests, GivenInvalidDstOriginWhenCopyingYuvImageThenInvalidValueErrorIsReturned) { auto srcImage = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_ONLY, 0, &imageFormat, &imageDesc, nullptr, retVal); auto dstImage = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_ONLY, 0, &imageFormat, &imageDesc, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, srcImage); EXPECT_NE(nullptr, dstImage); const size_t srcOrigin[] = {2, 2, 0}; const size_t dstOrigin[] = {1, 2, 0}; const size_t region[] = {2, 2, 0}; auto retVal = clEnqueueCopyImage( pCommandQueue, srcImage, dstImage, srcOrigin, dstOrigin, region, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clReleaseMemObject(srcImage); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(dstImage); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueCopyImageYUVTests, GivenInvalidDstOriginFor2dImageWhenCopyingYuvImageThenInvalidValueErrorIsReturned) { auto srcImage = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_ONLY, 0, &imageFormat, &imageDesc, nullptr, retVal); auto dstImage = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_ONLY, 0, &imageFormat, &imageDesc, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, srcImage); EXPECT_NE(nullptr, dstImage); const size_t origin[] = {2, 2, 1}; const size_t region[] = {2, 2, 0}; auto retVal = clEnqueueCopyImage( pCommandQueue, srcImage, dstImage, origin, origin, region, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clReleaseMemObject(srcImage); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(dstImage); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_enqueue_copy_image_to_buffer_tests.inl000066400000000000000000000151271422164147700327100ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/helpers/surface_formats.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueCopyImageToBufferTests; namespace ULT { struct clEnqueueCopyImageToBufferTests : public ApiFixture<>, public ::testing::Test { void SetUp() override { ApiFixture::SetUp(); // clang-format off imageFormat.image_channel_order = CL_YUYV_INTEL; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 32; imageDesc.image_height = 32; imageDesc.image_depth = 1; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = nullptr; // clang-format on } void TearDown() override { ApiFixture::TearDown(); } cl_image_format imageFormat; cl_image_desc imageDesc; }; TEST_F(clEnqueueCopyImageToBufferTests, GivenInvalidQueueWhenCopyingImageToBufferThenInvalidCommandQueueErrorIsReturned) { size_t srcOrigin[] = {0, 0, 0}; size_t region[] = {10, 10, 0}; auto retVal = clEnqueueCopyImageToBuffer( nullptr, nullptr, //srcBuffer nullptr, //dstBuffer srcOrigin, region, 0, //dstOffset 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(clEnqueueCopyImageToBufferTests, GivenInvalidBufferWhenCopyingImageToBufferThenInvalidMemObjectErrorIsReturned) { size_t srcOrigin[] = {0, 0, 0}; size_t region[] = {10, 10, 0}; auto retVal = clEnqueueCopyImageToBuffer( pCommandQueue, nullptr, //srcBuffer nullptr, //dstBuffer srcOrigin, region, 0, //dstOffset 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(clEnqueueCopyImageToBufferTests, GivenValidParametersWhenCopyingImageToBufferThenSuccessIsReturned) { imageFormat.image_channel_order = CL_RGBA; auto srcImage = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_WRITE, 0, &imageFormat, &imageDesc, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, srcImage); auto dstBuffer = std::unique_ptr(BufferHelper>::create(pContext)); size_t srcOrigin[] = {0, 0, 0}; size_t region[] = {10, 10, 1}; auto retVal = clEnqueueCopyImageToBuffer( pCommandQueue, srcImage, dstBuffer.get(), srcOrigin, region, 0, //dstOffset 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(srcImage); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueCopyImageToBufferTests, GivenQueueIncapableWhenCopyingImageToBufferThenInvalidOperationIsReturned) { imageFormat.image_channel_order = CL_RGBA; auto srcImage = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_WRITE, 0, &imageFormat, &imageDesc, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, srcImage); auto dstBuffer = std::unique_ptr(BufferHelper>::create(pContext)); size_t srcOrigin[] = {0, 0, 0}; size_t region[] = {10, 10, 1}; this->disableQueueCapabilities(CL_QUEUE_CAPABILITY_TRANSFER_IMAGE_BUFFER_INTEL); auto retVal = clEnqueueCopyImageToBuffer( pCommandQueue, srcImage, dstBuffer.get(), srcOrigin, region, 0, //dstOffset 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_INVALID_OPERATION, retVal); retVal = clReleaseMemObject(srcImage); EXPECT_EQ(CL_SUCCESS, retVal); } typedef clEnqueueCopyImageToBufferTests clEnqueueCopyImageToBufferYUVTests; TEST_F(clEnqueueCopyImageToBufferYUVTests, GivenValidParametersWhenCopyingYuvImageToBufferThenSuccessIsReturned) { auto dstBuffer = std::unique_ptr(BufferHelper>::create(pContext)); auto srcImage = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_ONLY, 0, &imageFormat, &imageDesc, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, srcImage); const size_t origin[] = {2, 2, 0}; const size_t region[] = {2, 2, 1}; auto retVal = clEnqueueCopyImageToBuffer( pCommandQueue, srcImage, dstBuffer.get(), origin, region, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(srcImage); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueCopyImageToBufferYUVTests, GivenInvalidOriginWhenCopyingYuvImageToBufferThenInvalidValueErrorIsReturned) { auto dstBuffer = std::unique_ptr(BufferHelper>::create(pContext)); auto srcImage = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_ONLY, 0, &imageFormat, &imageDesc, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, srcImage); const size_t origin[] = {1, 2, 0}; const size_t region[] = {2, 2, 0}; auto retVal = clEnqueueCopyImageToBuffer( pCommandQueue, srcImage, dstBuffer.get(), origin, region, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clReleaseMemObject(srcImage); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueCopyImageToBufferYUVTests, GivenInvalidRegionWhenCopyingYuvImageToBufferThenInvalidValueErrorIsReturned) { auto dstBuffer = std::unique_ptr(BufferHelper>::create(pContext)); auto srcImage = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_ONLY, 0, &imageFormat, &imageDesc, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, srcImage); const size_t origin[] = {2, 2, 0}; const size_t region[] = {1, 2, 0}; auto retVal = clEnqueueCopyImageToBuffer( pCommandQueue, srcImage, dstBuffer.get(), origin, region, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clReleaseMemObject(srcImage); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_enqueue_fill_buffer_tests.inl000066400000000000000000000037151422164147700310200ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/ptr_math.h" #include "opencl/source/command_queue/command_queue.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueFillBufferTests; namespace ULT { TEST_F(clEnqueueFillBufferTests, GivenNullCommandQueueWhenFillingBufferThenInvalidCommandQueueErrorIsReturned) { auto buffer = (cl_mem)ptrGarbage; cl_float pattern = 1.0f; retVal = clEnqueueFillBuffer( nullptr, buffer, &pattern, sizeof(pattern), 0, sizeof(pattern), 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(clEnqueueFillBufferTests, GivenNullBufferWhenFillingBufferThenInvalidMemObjectErrorIsReturned) { cl_float pattern = 1.0f; retVal = clEnqueueFillBuffer( pCommandQueue, nullptr, &pattern, sizeof(pattern), 0, sizeof(pattern), 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(clEnqueueFillBufferTests, GivenValidArgumentsWhenFillingBufferThenSuccessIsReturned) { MockBuffer buffer{}; cl_float pattern = 1.0f; retVal = clEnqueueFillBuffer( pCommandQueue, &buffer, &pattern, sizeof(pattern), 0, sizeof(pattern), 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueFillBufferTests, GivenQueueIncapableWhenFillingBufferThenInvalidOperationIsReturned) { MockBuffer buffer{}; cl_float pattern = 1.0f; this->disableQueueCapabilities(CL_QUEUE_CAPABILITY_FILL_BUFFER_INTEL); retVal = clEnqueueFillBuffer( pCommandQueue, &buffer, &pattern, sizeof(pattern), 0, sizeof(pattern), 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_OPERATION, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_enqueue_fill_image_tests.inl000066400000000000000000000061111422164147700306220ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/ptr_math.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueFillImageTests; namespace ULT { TEST_F(clEnqueueFillImageTests, GivenNullCommandQueueWhenFillingImageThenInvalidCommandQueueErrorIsReturned) { auto image = std::unique_ptr(Image2dHelper>::create(pContext)); uint32_t fill_color[4] = {0xaaaaaaaa, 0xbbbbbbbb, 0xcccccccc, 0xdddddddd}; size_t origin[3] = {0, 0, 0}; size_t region[3] = {2, 2, 1}; retVal = clEnqueueFillImage( nullptr, image.get(), fill_color, origin, region, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(clEnqueueFillImageTests, GivenNullImageWhenFillingImageThenInvalidMemObjectErrorIsReturned) { uint32_t fill_color[4] = {0xaaaaaaaa, 0xbbbbbbbb, 0xcccccccc, 0xdddddddd}; size_t origin[3] = {0, 0, 0}; size_t region[3] = {2, 2, 1}; retVal = clEnqueueFillImage( pCommandQueue, nullptr, fill_color, origin, region, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(clEnqueueFillImageTests, GivenNullFillColorWhenFillingImageThenInvalidValueErrorIsReturned) { auto image = std::unique_ptr(Image2dHelper>::create(pContext)); size_t origin[3] = {0, 0, 0}; size_t region[3] = {2, 2, 1}; retVal = clEnqueueFillImage( pCommandQueue, image.get(), nullptr, origin, region, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clEnqueueFillImageTests, GivenCorrectArgumentsWhenFillingImageThenSuccessIsReturned) { auto image = std::unique_ptr(Image2dHelper>::create(pContext)); uint32_t fillColor[4] = {0xaaaaaaaa, 0xbbbbbbbb, 0xcccccccc, 0xdddddddd}; size_t origin[3] = {0, 0, 0}; size_t region[3] = {2, 2, 1}; retVal = clEnqueueFillImage( pCommandQueue, image.get(), fillColor, origin, region, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueFillImageTests, GivenQueueIncapableWhenFillingImageThenInvalidOperationReturned) { auto image = std::unique_ptr(Image2dHelper>::create(pContext)); uint32_t fillColor[4] = {0xaaaaaaaa, 0xbbbbbbbb, 0xcccccccc, 0xdddddddd}; size_t origin[3] = {0, 0, 0}; size_t region[3] = {2, 2, 1}; this->disableQueueCapabilities(CL_QUEUE_CAPABILITY_FILL_IMAGE_INTEL); retVal = clEnqueueFillImage( pCommandQueue, image.get(), fillColor, origin, region, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_OPERATION, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_enqueue_image_tests.inl000066400000000000000000000217701422164147700276240ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include using namespace NEO; using ImageEnqueueCall = std::function; struct ValidateRegionAndOriginTests : public ::testing::TestWithParam { void SetUp() override { context.reset(new MockContext()); cmdQ.reset(new MockCommandQueue(context.get(), context->getDevice(0), nullptr, false)); } static void readImage(MockCommandQueue *cmdQ, Image *image, size_t *origin, size_t *region, int32_t &retVal) { uint32_t tempPtr = 0; retVal = clEnqueueReadImage(cmdQ, image, CL_TRUE, origin, region, 0, 0, &tempPtr, 0, nullptr, nullptr); } static void writeImage(MockCommandQueue *cmdQ, Image *image, size_t *origin, size_t *region, int32_t &retVal) { uint32_t tempPtr = 0; retVal = clEnqueueWriteImage(cmdQ, image, CL_TRUE, origin, region, 0, 0, &tempPtr, 0, nullptr, nullptr); } static void fillImage(MockCommandQueue *cmdQ, Image *image, size_t *origin, size_t *region, int32_t &retVal) { uint32_t fill_color[4] = {0xaaaaaaaa, 0xbbbbbbbb, 0xcccccccc, 0xdddddddd}; retVal = clEnqueueFillImage(cmdQ, image, fill_color, origin, region, 0, nullptr, nullptr); } static void copyImageWithCorrectSrc(MockCommandQueue *cmdQ, Image *dstImage, size_t *dstOrigin, size_t *region, int32_t &retVal) { std::unique_ptr srcImage(ImageHelper::create(&cmdQ->getContext())); size_t srcOrigin[3] = {0, 0, 0}; retVal = clEnqueueCopyImage(cmdQ, srcImage.get(), dstImage, srcOrigin, dstOrigin, region, 0, nullptr, nullptr); } static void copyImageWithCorrectDst(MockCommandQueue *cmdQ, Image *srcImage, size_t *srcOrigin, size_t *region, int32_t &retVal) { std::unique_ptr dstImage(ImageHelper::create(&cmdQ->getContext())); size_t dstOrigin[3] = {0, 0, 0}; retVal = clEnqueueCopyImage(cmdQ, srcImage, dstImage.get(), srcOrigin, dstOrigin, region, 0, nullptr, nullptr); } static void copyImageToBuffer(MockCommandQueue *cmdQ, Image *image, size_t *origin, size_t *region, int32_t &retVal) { MockBuffer buffer; retVal = clEnqueueCopyImageToBuffer(cmdQ, image, &buffer, origin, region, 0, 0, nullptr, nullptr); } static void copyBufferToImage(MockCommandQueue *cmdQ, Image *image, size_t *origin, size_t *region, int32_t &retVal) { MockBuffer buffer; retVal = clEnqueueCopyBufferToImage(cmdQ, &buffer, image, 0, origin, region, 0, nullptr, nullptr); } static void mapImage(MockCommandQueue *cmdQ, Image *image, size_t *origin, size_t *region, int32_t &retVal) { clEnqueueMapImage(cmdQ, image, CL_TRUE, CL_MAP_READ, origin, region, nullptr, nullptr, 0, nullptr, nullptr, &retVal); } std::unique_ptr context; std::unique_ptr cmdQ; cl_int retVal = CL_SUCCESS; }; TEST_P(ValidateRegionAndOriginTests, givenAnyZeroRegionParamWhenEnqueueCalledThenReturnError) { std::unique_ptr image(ImageHelper::create(context.get())); EXPECT_NE(nullptr, image.get()); size_t origin[3] = {0, 0, 0}; std::array region = {{0, 1, 1}}; GetParam()(cmdQ.get(), image.get(), origin, ®ion[0], retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); region = {{1, 0, 1}}; GetParam()(cmdQ.get(), image.get(), origin, ®ion[0], retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); region = {{1, 1, 0}}; GetParam()(cmdQ.get(), image.get(), origin, ®ion[0], retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); region = {{0, 0, 0}}; GetParam()(cmdQ.get(), image.get(), origin, ®ion[0], retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_P(ValidateRegionAndOriginTests, givenMaxImage2DFirstAndSecondRegionCoordinateAndAnyNonZeroFirstOrSecondOriginCoordinateWhenEnqueueCalledThenReturnError) { std::unique_ptr image(ImageHelper::create(context.get())); EXPECT_NE(nullptr, image.get()); const auto &deviceInfo = context->getDevice(0)->getDevice().getDeviceInfo(); size_t region[3] = {deviceInfo.image2DMaxWidth, deviceInfo.image2DMaxHeight, 1}; std::array origin = {{0, 1, 0}}; GetParam()(cmdQ.get(), image.get(), &origin[0], region, retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); origin = {{1, 0, 0}}; GetParam()(cmdQ.get(), image.get(), &origin[0], region, retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); origin = {{1, 1, 0}}; GetParam()(cmdQ.get(), image.get(), &origin[0], region, retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_P(ValidateRegionAndOriginTests, givenSecondOriginCoordinateAndNotAllowedImgTypeWhenEnqueueCalledThenReturnError) { size_t region[3] = {1, 1, 1}; size_t origin[3] = {0, 1, 0}; std::unique_ptr image(ImageHelper::create(context.get())); GetParam()(cmdQ.get(), image.get(), origin, ®ion[0], retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); auto image1dBufferDesc = Image1dDefaults::imageDesc; image1dBufferDesc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; image.reset(ImageHelper::create(context.get(), &image1dBufferDesc)); GetParam()(cmdQ.get(), image.get(), origin, ®ion[0], retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_P(ValidateRegionAndOriginTests, givenThirdOriginCoordinateAndNotAllowedImgTypeWhenEnqueueCalledThenReturnError) { size_t region[3] = {1, 1, 1}; size_t origin[3] = {0, 0, 1}; std::unique_ptr image(ImageHelper::create(context.get())); GetParam()(cmdQ.get(), image.get(), origin, ®ion[0], retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); image.reset(ImageHelper::create(context.get())); GetParam()(cmdQ.get(), image.get(), origin, ®ion[0], retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); image.reset(ImageHelper::create(context.get())); GetParam()(cmdQ.get(), image.get(), origin, ®ion[0], retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); auto image1dBufferDesc = Image1dDefaults::imageDesc; image1dBufferDesc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; image.reset(ImageHelper::create(context.get(), &image1dBufferDesc)); GetParam()(cmdQ.get(), image.get(), origin, ®ion[0], retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_P(ValidateRegionAndOriginTests, givenSecondRegionCoordinateAndNotAllowedImgTypeWhenEnqueueCalledThenReturnError) { size_t region[3] = {1, 2, 1}; size_t origin[3] = {0, 0, 0}; std::unique_ptr image(ImageHelper::create(context.get())); GetParam()(cmdQ.get(), image.get(), origin, ®ion[0], retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); auto image1dBufferDesc = Image1dDefaults::imageDesc; image1dBufferDesc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; image.reset(ImageHelper::create(context.get(), &image1dBufferDesc)); GetParam()(cmdQ.get(), image.get(), origin, ®ion[0], retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_P(ValidateRegionAndOriginTests, givenThirdRegionCoordinateAndNotAllowedImgTypeWhenEnqueueCalledThenReturnError) { size_t region[3] = {1, 1, 2}; size_t origin[3] = {0, 0, 0}; std::unique_ptr image(ImageHelper::create(context.get())); GetParam()(cmdQ.get(), image.get(), origin, ®ion[0], retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); image.reset(ImageHelper::create(context.get())); GetParam()(cmdQ.get(), image.get(), origin, ®ion[0], retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); image.reset(ImageHelper::create(context.get())); GetParam()(cmdQ.get(), image.get(), origin, ®ion[0], retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); auto image1dBufferDesc = Image1dDefaults::imageDesc; image1dBufferDesc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; image.reset(ImageHelper::create(context.get(), &image1dBufferDesc)); GetParam()(cmdQ.get(), image.get(), origin, ®ion[0], retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); } ImageEnqueueCall enqueueFunctions[8] = { &ValidateRegionAndOriginTests::readImage, &ValidateRegionAndOriginTests::writeImage, &ValidateRegionAndOriginTests::fillImage, &ValidateRegionAndOriginTests::copyImageWithCorrectSrc, &ValidateRegionAndOriginTests::copyImageWithCorrectDst, &ValidateRegionAndOriginTests::copyImageToBuffer, &ValidateRegionAndOriginTests::copyBufferToImage, &ValidateRegionAndOriginTests::mapImage, }; INSTANTIATE_TEST_CASE_P( ValidateRegionAndOriginTests, ValidateRegionAndOriginTests, ::testing::ValuesIn(enqueueFunctions)); compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_enqueue_map_buffer_tests.inl000066400000000000000000000141311422164147700306410ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueMapBufferTests; TEST_F(clEnqueueMapBufferTests, GivenNullCommandQueueWhenMappingBufferThenInvalidCommandQueueErrorIsReturned) { unsigned int bufferSize = 16; auto pHostMem = new unsigned char[bufferSize]; memset(pHostMem, 0xaa, bufferSize); cl_mem_flags flags = CL_MEM_USE_HOST_PTR; auto buffer = clCreateBuffer( pContext, flags, bufferSize, pHostMem, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); cl_event eventReturned = nullptr; auto ptrResult = clEnqueueMapBuffer( nullptr, buffer, CL_TRUE, CL_MAP_READ, 0, 8, 0, nullptr, &eventReturned, &retVal); EXPECT_EQ(nullptr, ptrResult); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); delete[] pHostMem; clReleaseEvent(eventReturned); } TEST_F(clEnqueueMapBufferTests, GivenValidParametersWhenMappingBufferThenSuccessIsReturned) { unsigned int bufferSize = 16; auto pHostMem = new unsigned char[bufferSize]; memset(pHostMem, 0xaa, bufferSize); cl_mem_flags flags = CL_MEM_USE_HOST_PTR; auto buffer = clCreateBuffer( pContext, flags, bufferSize, pHostMem, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); cl_event eventReturned = nullptr; auto ptrResult = clEnqueueMapBuffer( pCommandQueue, buffer, CL_TRUE, CL_MAP_READ, 0, 8, 0, nullptr, &eventReturned, &retVal); EXPECT_NE(nullptr, ptrResult); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); delete[] pHostMem; clReleaseEvent(eventReturned); } TEST_F(clEnqueueMapBufferTests, GivenQueueIncapableWhenMappingBufferThenInvalidOperationIsReturned) { MockBuffer buffer{}; disableQueueCapabilities(CL_QUEUE_CAPABILITY_MAP_BUFFER_INTEL); auto ptrResult = clEnqueueMapBuffer( pCommandQueue, &buffer, CL_TRUE, CL_MAP_READ, 0, 8, 0, nullptr, nullptr, &retVal); EXPECT_EQ(nullptr, ptrResult); EXPECT_EQ(CL_INVALID_OPERATION, retVal); } TEST_F(clEnqueueMapBufferTests, GivenMappedPointerWhenCreatingBufferFromThisPointerThenInvalidHostPtrErrorIsReturned) { unsigned int bufferSize = 16; cl_mem buffer = clCreateBuffer(pContext, CL_MEM_READ_WRITE, bufferSize, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); void *hostPointer = clEnqueueMapBuffer(pCommandQueue, buffer, CL_TRUE, CL_MAP_READ, 0, bufferSize, 0, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, hostPointer); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueUnmapMemObject(pCommandQueue, buffer, hostPointer, 0, NULL, NULL); EXPECT_EQ(CL_SUCCESS, retVal); auto bufferFromHostPtr = clCreateBuffer(pContext, CL_MEM_USE_HOST_PTR, bufferSize, hostPointer, &retVal); EXPECT_EQ(CL_INVALID_HOST_PTR, retVal); EXPECT_EQ(nullptr, bufferFromHostPtr); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); } class EnqueueMapBufferFlagsTest : public ApiFixture<>, public testing::TestWithParam { public: EnqueueMapBufferFlagsTest() { } protected: void SetUp() override { ApiFixture::SetUp(); buffer_flags = GetParam(); unsigned int bufferSize = 16; pHostMem = new unsigned char[bufferSize]; memset(pHostMem, 0xaa, bufferSize); buffer = clCreateBuffer( pContext, buffer_flags, bufferSize, pHostMem, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); } void TearDown() override { retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); delete[] pHostMem; ApiFixture::TearDown(); } cl_int retVal = CL_SUCCESS; cl_mem_flags buffer_flags = 0; unsigned char *pHostMem; cl_mem buffer; }; typedef EnqueueMapBufferFlagsTest EnqueueMapReadBufferTests; TEST_P(EnqueueMapReadBufferTests, GivenInvalidFlagsWhenMappingBufferForReadingThenInvalidOperationErrorIsReturned) { cl_event eventReturned = nullptr; auto ptrResult = clEnqueueMapBuffer( pCommandQueue, buffer, CL_TRUE, CL_MAP_READ, 0, 8, 0, nullptr, &eventReturned, &retVal); EXPECT_EQ(nullptr, ptrResult); EXPECT_EQ(CL_INVALID_OPERATION, retVal); clReleaseEvent(eventReturned); } static cl_mem_flags NoReadAccessFlags[] = { CL_MEM_USE_HOST_PTR | CL_MEM_HOST_WRITE_ONLY, CL_MEM_USE_HOST_PTR | CL_MEM_HOST_NO_ACCESS}; INSTANTIATE_TEST_CASE_P( EnqueueMapBufferFlagsTests_Create, EnqueueMapReadBufferTests, testing::ValuesIn(NoReadAccessFlags)); typedef EnqueueMapBufferFlagsTest EnqueueMapWriteBufferTests; TEST_P(EnqueueMapWriteBufferTests, GivenInvalidFlagsWhenMappingBufferForWritingThenInvalidOperationErrorIsReturned) { cl_event eventReturned = nullptr; auto ptrResult = clEnqueueMapBuffer( pCommandQueue, buffer, CL_TRUE, CL_MAP_WRITE, 0, 8, 0, nullptr, &eventReturned, &retVal); EXPECT_EQ(nullptr, ptrResult); EXPECT_EQ(CL_INVALID_OPERATION, retVal); clReleaseEvent(eventReturned); } static cl_mem_flags NoWriteAccessFlags[] = { CL_MEM_USE_HOST_PTR | CL_MEM_HOST_READ_ONLY, CL_MEM_USE_HOST_PTR | CL_MEM_HOST_NO_ACCESS}; INSTANTIATE_TEST_CASE_P( EnqueueMapBufferFlagsTests_Create, EnqueueMapWriteBufferTests, testing::ValuesIn(NoWriteAccessFlags)); compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_enqueue_map_image_tests.inl000066400000000000000000000136331422164147700304600ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/surface_formats.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "cl_api_tests.h" using namespace NEO; namespace ULT { struct clEnqueueMapImageTests : public ApiFixture<>, public ::testing::Test { void SetUp() override { ApiFixture::SetUp(); // clang-format off imageFormat.image_channel_order = CL_RGBA; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 32; imageDesc.image_height = 32; imageDesc.image_depth = 1; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = nullptr; // clang-format on } void TearDown() override { ApiFixture::TearDown(); } cl_image_format imageFormat; cl_image_desc imageDesc; }; TEST_F(clEnqueueMapImageTests, GivenValidParametersWhenMappingImageThenSuccessIsReturned) { auto image = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_WRITE, 0, &imageFormat, &imageDesc, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); const size_t origin[3] = {0, 0, 0}; const size_t region[3] = {1, 1, 1}; size_t imageRowPitch = 0; size_t imageSlicePitch = 0; clEnqueueMapImage( pCommandQueue, image, CL_TRUE, CL_MAP_READ, origin, region, &imageRowPitch, &imageSlicePitch, 0, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueMapImageTests, GivenQueueIncapableWhenMappingImageThenInvalidOperationIsReturned) { auto image = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_WRITE, 0, &imageFormat, &imageDesc, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); const size_t origin[3] = {0, 0, 0}; const size_t region[3] = {1, 1, 1}; size_t imageRowPitch = 0; size_t imageSlicePitch = 0; this->disableQueueCapabilities(CL_QUEUE_CAPABILITY_MAP_IMAGE_INTEL); clEnqueueMapImage( pCommandQueue, image, CL_TRUE, CL_MAP_READ, origin, region, &imageRowPitch, &imageSlicePitch, 0, nullptr, nullptr, &retVal); EXPECT_EQ(CL_INVALID_OPERATION, retVal); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } struct clEnqueueMapImageYUVTests : public ApiFixture<>, public ::testing::Test { void SetUp() override { ApiFixture::SetUp(); // clang-format off imageFormat.image_channel_order = CL_YUYV_INTEL; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 32; imageDesc.image_height = 32; imageDesc.image_depth = 1; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = nullptr; // clang-format on } void TearDown() override { ApiFixture::TearDown(); } cl_image_format imageFormat; cl_image_desc imageDesc; }; TEST_F(clEnqueueMapImageYUVTests, GivenValidYuvImageWhenMappingImageThenSuccessIsReturned) { auto image = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_ONLY, 0, &imageFormat, &imageDesc, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); const size_t origin[] = {2, 2, 0}; const size_t region[] = {2, 2, 1}; clEnqueueMapImage( pCommandQueue, image, CL_TRUE, CL_MAP_READ, origin, region, 0, 0, 0, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueMapImageYUVTests, GivenInvalidOriginWhenMappingYuvImageThenInvalidValueErrorIsReturned) { auto image = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_ONLY, 0, &imageFormat, &imageDesc, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); const size_t origin[] = {1, 2, 0}; const size_t region[] = {2, 2, 0}; clEnqueueMapImage( pCommandQueue, image, CL_TRUE, CL_MAP_READ, origin, region, 0, 0, 0, nullptr, nullptr, &retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueMapImageYUVTests, GivenInvalidRegionWhenMappingYuvImageThenInvalidValueErrorIsReturned) { auto image = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_ONLY, 0, &imageFormat, &imageDesc, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); const size_t origin[] = {2, 2, 0}; const size_t region[] = {1, 2, 0}; clEnqueueMapImage( pCommandQueue, image, CL_TRUE, CL_MAP_READ, origin, region, 0, 0, 0, nullptr, nullptr, &retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_enqueue_marker_tests.inl000066400000000000000000000035341422164147700300210ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/command_queue/enqueue_common.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueMarkerTests; TEST_F(clEnqueueMarkerTests, GivenNullCommandQueueWhenEnqueingMarkerThenInvalidCommandQueueErrorIsReturned) { auto retVal = clEnqueueMarker( nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(clEnqueueMarkerTests, GivenValidCommandQueueWhenEnqueingMarkerThenSuccessIsReturned) { auto retVal = clEnqueueMarker( pCommandQueue, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueMarkerTests, GivenQueueIncapableWhenEnqueingMarkerThenInvalidOperationReturned) { this->disableQueueCapabilities(CL_QUEUE_CAPABILITY_MARKER_INTEL); auto retVal = clEnqueueMarker( pCommandQueue, nullptr); EXPECT_EQ(CL_INVALID_OPERATION, retVal); } class CommandWithoutKernelTypesTests : public testing::TestWithParam { }; TEST_P(CommandWithoutKernelTypesTests, GivenCommandTypeWhenCheckingIsCommandWithoutKernelThenTrueIsReturned) { unsigned int commandType = GetParam(); EXPECT_TRUE(isCommandWithoutKernel(commandType)); }; TEST_F(CommandWithoutKernelTypesTests, GivenZeroWhenCheckingIsCommandWithoutKernelThenFalseIsReturned) { EXPECT_FALSE(isCommandWithoutKernel(0)); }; static unsigned int commandWithoutKernelTypes[] = { CL_COMMAND_BARRIER, CL_COMMAND_MARKER, CL_COMMAND_MIGRATE_MEM_OBJECTS, CL_COMMAND_SVM_MAP, CL_COMMAND_SVM_MIGRATE_MEM, CL_COMMAND_SVM_UNMAP, CL_COMMAND_SVM_FREE}; INSTANTIATE_TEST_CASE_P( commandWithoutKernelTypes, CommandWithoutKernelTypesTests, testing::ValuesIn(commandWithoutKernelTypes)); compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_enqueue_marker_with_wait_list_tests.inl000066400000000000000000000022261422164147700331300ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueMarkerWithWaitListTests; TEST_F(clEnqueueMarkerWithWaitListTests, GivenNullCommandQueueWhenEnqueingMarkerWithWaitListThenInvalidCommandQueueErrorIsReturned) { auto retVal = clEnqueueMarkerWithWaitList( nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(clEnqueueMarkerWithWaitListTests, GivenValidCommandQueueWhenEnqueingMarkerWithWaitListThenSuccessIsReturned) { auto retVal = clEnqueueMarkerWithWaitList( pCommandQueue, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueMarkerWithWaitListTests, GivenQueueIncapableWhenEnqueingMarkerWithWaitListThenInvalidOperationIsReturned) { this->disableQueueCapabilities(CL_QUEUE_CAPABILITY_MARKER_INTEL); auto retVal = clEnqueueMarkerWithWaitList( pCommandQueue, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_OPERATION, retVal); } compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_enqueue_migrate_mem_objects_tests.inl000066400000000000000000000145621422164147700325420ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/event/event.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueMigrateMemObjectsTests; TEST_F(clEnqueueMigrateMemObjectsTests, GivenNullCommandQueueWhenMigratingMemObjThenInvalidCommandQueueErrorIsReturned) { unsigned int bufferSize = 16; auto pHostMem = new unsigned char[bufferSize]; memset(pHostMem, 0xaa, bufferSize); cl_mem_flags flags = CL_MEM_USE_HOST_PTR; auto buffer = clCreateBuffer( pContext, flags, bufferSize, pHostMem, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); cl_event eventReturned = nullptr; auto result = clEnqueueMigrateMemObjects( nullptr, 1, &buffer, CL_MIGRATE_MEM_OBJECT_HOST, 0, nullptr, &eventReturned); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, result); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); delete[] pHostMem; clReleaseEvent(eventReturned); } TEST_F(clEnqueueMigrateMemObjectsTests, GivenValidInputsWhenMigratingMemObjThenSuccessIsReturned) { unsigned int bufferSize = 16; auto pHostMem = new unsigned char[bufferSize]; memset(pHostMem, 0xaa, bufferSize); cl_mem_flags flags = CL_MEM_USE_HOST_PTR; auto buffer = clCreateBuffer( pContext, flags, bufferSize, pHostMem, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); cl_event eventReturned = nullptr; auto result = clEnqueueMigrateMemObjects( pCommandQueue, 1, &buffer, CL_MIGRATE_MEM_OBJECT_HOST, 0, nullptr, &eventReturned); EXPECT_EQ(CL_SUCCESS, result); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); delete[] pHostMem; clReleaseEvent(eventReturned); } TEST_F(clEnqueueMigrateMemObjectsTests, GivenNullMemObjsWhenMigratingMemObjThenInvalidValueErrorIsReturned) { cl_event eventReturned = nullptr; auto result = clEnqueueMigrateMemObjects( pCommandQueue, 1, nullptr, CL_MIGRATE_MEM_OBJECT_HOST, 0, nullptr, &eventReturned); EXPECT_EQ(CL_INVALID_VALUE, result); } TEST_F(clEnqueueMigrateMemObjectsTests, GivenZeroMemObjectsWhenMigratingMemObjsThenInvalidValueErrorIsReturned) { cl_event eventReturned = nullptr; auto result = clEnqueueMigrateMemObjects( pCommandQueue, 0, nullptr, CL_MIGRATE_MEM_OBJECT_HOST, 0, nullptr, &eventReturned); EXPECT_EQ(CL_INVALID_VALUE, result); } TEST_F(clEnqueueMigrateMemObjectsTests, GivenNonZeroEventsAndNullWaitlistWhenMigratingMemObjThenInvalidWaitListErrorIsReturned) { cl_event eventReturned = nullptr; auto result = clEnqueueMigrateMemObjects( pCommandQueue, 0, nullptr, CL_MIGRATE_MEM_OBJECT_HOST, 2, nullptr, &eventReturned); EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, result); } TEST_F(clEnqueueMigrateMemObjectsTests, GivenZeroEventsAndNonNullWaitlistWhenMigratingMemObjsThenInvalidWaitListErrorIsReturned) { cl_event eventReturned = nullptr; Event event(pCommandQueue, CL_COMMAND_MIGRATE_MEM_OBJECTS, 0, 0); auto result = clEnqueueMigrateMemObjects( pCommandQueue, 0, nullptr, CL_MIGRATE_MEM_OBJECT_HOST, 0, (cl_event *)&event, &eventReturned); EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, result); } TEST_F(clEnqueueMigrateMemObjectsTests, GivenValidFlagsWhenMigratingMemObjsThenSuccessIsReturned) { unsigned int bufferSize = 16; auto pHostMem = new unsigned char[bufferSize]; memset(pHostMem, 0xaa, bufferSize); cl_mem_flags flags = CL_MEM_USE_HOST_PTR; auto buffer = clCreateBuffer( pContext, flags, bufferSize, pHostMem, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); cl_mem_migration_flags validFlags[] = {0, CL_MIGRATE_MEM_OBJECT_HOST, CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED, CL_MIGRATE_MEM_OBJECT_HOST | CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED}; for (auto validFlag : validFlags) { cl_event eventReturned = nullptr; auto result = clEnqueueMigrateMemObjects( pCommandQueue, 1, &buffer, validFlag, 0, nullptr, &eventReturned); EXPECT_EQ(CL_SUCCESS, result); clReleaseEvent(eventReturned); } retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); delete[] pHostMem; } TEST_F(clEnqueueMigrateMemObjectsTests, GivenInvalidFlagsWhenMigratingMemObjsThenInvalidValueErrorIsReturned) { unsigned int bufferSize = 16; auto pHostMem = new unsigned char[bufferSize]; memset(pHostMem, 0xaa, bufferSize); cl_mem_flags flags = CL_MEM_USE_HOST_PTR; auto buffer = clCreateBuffer( pContext, flags, bufferSize, pHostMem, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); cl_mem_migration_flags invalidFlags[] = {(cl_mem_migration_flags)0xffffffff, CL_MIGRATE_MEM_OBJECT_HOST | (1 << 10), CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED | (1 << 10), (cl_mem_migration_flags)12345}; for (auto invalidFlag : invalidFlags) { cl_event eventReturned = nullptr; auto result = clEnqueueMigrateMemObjects( pCommandQueue, 1, &buffer, invalidFlag, 0, nullptr, &eventReturned); EXPECT_EQ(CL_INVALID_VALUE, result); clReleaseEvent(eventReturned); } retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); delete[] pHostMem; } TEST_F(clEnqueueMigrateMemObjectsTests, GivenInvalidMemObjectWhenMigratingMemObjsThenInvalidMemObjectErrorIsReturned) { cl_event eventReturned = nullptr; auto result = clEnqueueMigrateMemObjects( pCommandQueue, 1, reinterpret_cast(pCommandQueue), CL_MIGRATE_MEM_OBJECT_HOST, 0, nullptr, &eventReturned); EXPECT_EQ(CL_INVALID_MEM_OBJECT, result); } compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_enqueue_native_kernel_tests.inl000066400000000000000000000013401422164147700313570ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueNativeKernelTests; namespace ULT { TEST_F(clEnqueueNativeKernelTests, GivenAnyParametersWhenExecutingNativeKernelThenOutOfHostMemoryErrorIsReturned) { auto retVal = clEnqueueNativeKernel( nullptr, // commandQueue nullptr, // user_func nullptr, // args 0u, // cb_args 0, // num_mem_objects nullptr, // mem_list nullptr, // args_mem_loc 0, // num_events nullptr, //event_list nullptr // event ); EXPECT_EQ(CL_OUT_OF_HOST_MEMORY, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_enqueue_nd_range_kernel_tests.inl000066400000000000000000000110001422164147700316400ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueNDRangeKernelTests; namespace ULT { TEST_F(clEnqueueNDRangeKernelTests, GivenValidParametersWhenExecutingKernelThenSuccessIsReturned) { cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {1, 1, 1}; size_t localWorkSize[3] = {1, 1, 1}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; retVal = clEnqueueNDRangeKernel( pCommandQueue, pMultiDeviceKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueNDRangeKernelTests, GivenQueueIncapableWhenExecutingKernelThenInvalidOperationIsReturned) { cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {1, 1, 1}; size_t localWorkSize[3] = {1, 1, 1}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; this->disableQueueCapabilities(CL_QUEUE_CAPABILITY_KERNEL_INTEL); retVal = clEnqueueNDRangeKernel( pCommandQueue, pMultiDeviceKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); EXPECT_EQ(CL_INVALID_OPERATION, retVal); } TEST_F(clEnqueueNDRangeKernelTests, GivenNullCommandQueueWhenExecutingKernelThenInvalidCommandQueueErrorIsReturned) { size_t globalWorkSize[3] = {1, 1, 1}; retVal = clEnqueueNDRangeKernel( nullptr, pMultiDeviceKernel, 1, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(clEnqueueNDRangeKernelTests, GivenNonZeroEventsAndEmptyEventWaitListWhenExecutingKernelThenInvalidEventWaitListErrorIsReturned) { cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {1, 1, 1}; size_t localWorkSize[3] = {1, 1, 1}; cl_uint numEventsInWaitList = 1; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; retVal = clEnqueueNDRangeKernel( pCommandQueue, pMultiDeviceKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, retVal); } TEST_F(clEnqueueNDRangeKernelTests, GivenConcurrentKernelWhenExecutingKernelThenInvalidKernelErrorIsReturned) { cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {1, 1, 1}; size_t localWorkSize[3] = {1, 1, 1}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; pKernel->executionType = KernelExecutionType::Concurrent; retVal = clEnqueueNDRangeKernel( pCommandQueue, pMultiDeviceKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); EXPECT_EQ(CL_INVALID_KERNEL, retVal); } TEST_F(clEnqueueNDRangeKernelTests, GivenKernelWithAllocateSyncBufferPatchWhenExecutingKernelThenInvalidKernelErrorIsReturned) { cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {1, 1, 1}; size_t localWorkSize[3] = {1, 1, 1}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; pProgram->mockKernelInfo.kernelDescriptor.kernelAttributes.flags.usesSyncBuffer = true; auto &syncBufferAddress = pProgram->mockKernelInfo.kernelDescriptor.payloadMappings.implicitArgs.syncBufferAddress; syncBufferAddress.pointerSize = sizeof(uint8_t); syncBufferAddress.stateless = 0; syncBufferAddress.bindful = 0; EXPECT_TRUE(pKernel->usesSyncBuffer()); retVal = clEnqueueNDRangeKernel( pCommandQueue, pMultiDeviceKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); EXPECT_EQ(CL_INVALID_KERNEL, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_enqueue_read_buffer_rect_tests.inl000066400000000000000000000215041422164147700320160ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/ptr_math.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueReadBufferRectTest; namespace ULT { TEST_F(clEnqueueReadBufferRectTest, GivenInvalidBufferWhenReadingRectangularRegionThenInvalidMemObjectErrorIsReturned) { auto buffer = (cl_mem)ptrGarbage; size_t buffOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {10, 10, 0}; char ptr[10]; auto retVal = clEnqueueReadBufferRect( pCommandQueue, buffer, CL_FALSE, buffOrigin, hostOrigin, region, 10, //bufferRowPitch 0, //bufferSlicePitch 10, //hostRowPitch 0, //hostSlicePitch ptr, //hostPtr 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(clEnqueueReadBufferRectTest, GivenNullCommandQueueWhenReadingRectangularRegionThenInvalidCommandQueueErrorIsReturned) { auto buffer = (cl_mem)ptrGarbage; size_t buffOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {10, 10, 0}; char ptr[10]; auto retVal = clEnqueueReadBufferRect( nullptr, buffer, CL_FALSE, buffOrigin, hostOrigin, region, 10, //bufferRowPitch 0, //bufferSlicePitch 10, //hostRowPitch 0, //hostSlicePitch ptr, //hostPtr 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(clEnqueueReadBufferRectTest, GivenNullHostPtrWhenReadingRectangularRegionThenInvalidValueErrorIsReturned) { auto buffer = clCreateBuffer( pContext, CL_MEM_READ_WRITE, 100, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); size_t buffOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {10, 10, 0}; auto retVal = clEnqueueReadBufferRect( pCommandQueue, buffer, CL_FALSE, buffOrigin, hostOrigin, region, 10, //bufferRowPitch 0, //bufferSlicePitch 10, //hostRowPitch 0, //hostSlicePitch nullptr, //hostPtr 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueReadBufferRectTest, GivenValidParametersWhenReadingRectangularRegionThenSuccessIsReturned) { auto buffer = clCreateBuffer( pContext, CL_MEM_READ_WRITE, 100, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); char ptr[10]; size_t buffOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {10, 10, 1}; auto retVal = clEnqueueReadBufferRect( pCommandQueue, buffer, CL_FALSE, buffOrigin, hostOrigin, region, 10, //bufferRowPitch 0, //bufferSlicePitch 10, //hostRowPitch 0, //hostSlicePitch ptr, //hostPtr 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseMemObject(buffer); } TEST_F(clEnqueueReadBufferRectTest, GivenQueueIncapableWhenReadingRectangularRegionThenInvalidOperationIsReturned) { MockBuffer buffer{}; buffer.size = 100; char ptr[10]; size_t buffOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {10, 10, 1}; this->disableQueueCapabilities(CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_RECT_INTEL); auto retVal = clEnqueueReadBufferRect( pCommandQueue, &buffer, CL_FALSE, buffOrigin, hostOrigin, region, 10, //bufferRowPitch 0, //bufferSlicePitch 10, //hostRowPitch 0, //hostSlicePitch ptr, //hostPtr 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_INVALID_OPERATION, retVal); } TEST_F(clEnqueueReadBufferRectTest, GivenInvalidPitchWhenReadingRectangularRegionThenInvalidValueErrorIsReturned) { auto buffer = clCreateBuffer( pContext, CL_MEM_READ_WRITE, 100, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); char ptr[10]; size_t buffOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {10, 10, 1}; size_t bufferRowPitch = 9; auto retVal = clEnqueueReadBufferRect( pCommandQueue, buffer, CL_FALSE, buffOrigin, hostOrigin, region, bufferRowPitch, //bufferRowPitch 0, //bufferSlicePitch 10, //hostRowPitch 0, //hostSlicePitch ptr, //hostPtr 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); size_t hostRowPitch = 9; retVal = clEnqueueReadBufferRect( pCommandQueue, buffer, CL_FALSE, buffOrigin, hostOrigin, region, 10, //bufferRowPitch 0, //bufferSlicePitch hostRowPitch, //hostRowPitch 0, //hostSlicePitch ptr, //hostPtr 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); size_t bufferSlicePitch = 9; retVal = clEnqueueReadBufferRect( pCommandQueue, buffer, CL_FALSE, buffOrigin, hostOrigin, region, 10, //bufferRowPitch bufferSlicePitch, //bufferSlicePitch 10, //hostRowPitch 0, //hostSlicePitch ptr, //hostPtr 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); size_t hostSlicePitch = 9; retVal = clEnqueueReadBufferRect( pCommandQueue, buffer, CL_FALSE, buffOrigin, hostOrigin, region, 10, //bufferRowPitch 0, //bufferSlicePitch 10, //hostRowPitch hostSlicePitch, //hostSlicePitch ptr, //hostPtr 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); clReleaseMemObject(buffer); } class EnqueueReadBufferRectFlagsTest : public ApiFixture<>, public testing::TestWithParam { public: EnqueueReadBufferRectFlagsTest() { } protected: void SetUp() override { ApiFixture::SetUp(); buffer_flags = GetParam(); unsigned int bufferSize = 16; pHostMem = new unsigned char[bufferSize]; memset(pHostMem, 0xaa, bufferSize); buffer = clCreateBuffer( pContext, buffer_flags, bufferSize, pHostMem, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); } void TearDown() override { retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); delete[] pHostMem; ApiFixture::TearDown(); } cl_int retVal = CL_SUCCESS; cl_mem_flags buffer_flags = 0; unsigned char *pHostMem; cl_mem buffer; }; typedef EnqueueReadBufferRectFlagsTest EnqueueReadReadBufferRectTests; TEST_P(EnqueueReadReadBufferRectTests, GivenNoReadFlagsWhenReadingRectangularRegionThenInvalidOperationErrorIsReturned) { size_t buffOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {10, 10, 0}; char ptr[10]; cl_event eventReturned = nullptr; retVal = clEnqueueReadBufferRect( pCommandQueue, buffer, CL_FALSE, buffOrigin, hostOrigin, region, 10, //bufferRowPitch 0, //bufferSlicePitch 10, //hostRowPitch 0, //hostSlicePitch ptr, //hostPtr 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_INVALID_OPERATION, retVal); clReleaseEvent(eventReturned); } static cl_mem_flags read_buffer_rect_flags[] = { CL_MEM_USE_HOST_PTR | CL_MEM_HOST_WRITE_ONLY, CL_MEM_USE_HOST_PTR | CL_MEM_HOST_NO_ACCESS}; INSTANTIATE_TEST_CASE_P( EnqueueReadBufferRectFlagsTests_Create, EnqueueReadReadBufferRectTests, testing::ValuesIn(read_buffer_rect_flags)); } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_enqueue_read_buffer_tests.inl000066400000000000000000000111321422164147700307750ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "opencl/source/cl_device/cl_device_info.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueReadBufferTests; namespace ULT { TEST_F(clEnqueueReadBufferTests, GivenCorrectArgumentsWhenReadingBufferThenSuccessIsReturned) { MockBuffer buffer{}; auto data = 1; auto retVal = clEnqueueReadBuffer( pCommandQueue, &buffer, false, 0, sizeof(data), &data, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueReadBufferTests, GivenQueueIncapableArgumentsWhenReadingBufferThenInvalidOperationIsReturned) { MockBuffer buffer{}; auto data = 1; this->disableQueueCapabilities(CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL); auto retVal = clEnqueueReadBuffer( pCommandQueue, &buffer, false, 0, sizeof(data), &data, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_OPERATION, retVal); } TEST_F(clEnqueueReadBufferTests, GivenNullCommandQueueWhenReadingBufferThenInvalidCommandQueueErrorIsReturned) { auto data = 1; auto retVal = clEnqueueReadBuffer( nullptr, nullptr, false, 0, sizeof(data), &data, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } class EnqueueReadBufferFlagsTest : public ApiFixture<>, public testing::TestWithParam { protected: void SetUp() override { ApiFixture::SetUp(); buffer_flags = GetParam(); unsigned int bufferSize = 16; pHostMem = new unsigned char[bufferSize]; memset(pHostMem, 0xaa, bufferSize); buffer = clCreateBuffer( pContext, buffer_flags, bufferSize, pHostMem, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); } void TearDown() override { retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); delete[] pHostMem; ApiFixture::TearDown(); } cl_int retVal = CL_SUCCESS; cl_mem_flags buffer_flags = 0; unsigned char *pHostMem; cl_mem buffer; }; typedef EnqueueReadBufferFlagsTest EnqueueReadReadBufferTests; TEST_P(EnqueueReadReadBufferTests, GivenNoReadFlagsWhenReadingBufferThenInvalidOperationErrorIsReturned) { auto data = 1; cl_event eventReturned = nullptr; retVal = clEnqueueReadBuffer( pCommandQueue, buffer, CL_TRUE, 0, sizeof(data), &data, 0, nullptr, &eventReturned); EXPECT_EQ(CL_INVALID_OPERATION, retVal); clReleaseEvent(eventReturned); } static cl_mem_flags read_buffer_flags[] = { CL_MEM_USE_HOST_PTR | CL_MEM_HOST_WRITE_ONLY, CL_MEM_USE_HOST_PTR | CL_MEM_HOST_NO_ACCESS}; INSTANTIATE_TEST_CASE_P( EnqueueReadBufferFlagsTests_Create, EnqueueReadReadBufferTests, testing::ValuesIn(read_buffer_flags)); } // namespace ULT class EnqueueReadBufferTest : public api_tests { public: EnqueueReadBufferTest() {} protected: cl_mem buffer = nullptr; cl_int retVal = CL_SUCCESS; unsigned char *pHostMem = nullptr; unsigned int bufferSize = 0; void SetUp() override { api_tests::SetUp(); bufferSize = 16; pHostMem = new unsigned char[bufferSize]; memset(pHostMem, 0xaa, bufferSize); buffer = clCreateBuffer( pContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, bufferSize, pHostMem, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); } void TearDown() override { retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); delete[] pHostMem; api_tests::TearDown(); } }; TEST_F(EnqueueReadBufferTest, GivenSvmPtrWhenReadingBufferThenSuccessIsReturned) { const ClDeviceInfo &devInfo = pDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { auto data = clSVMAlloc(pContext, CL_MEM_READ_WRITE, bufferSize, 64); auto retVal = clEnqueueReadBuffer(pCommandQueue, buffer, CL_TRUE, bufferSize, 0, data, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); clSVMFree(pContext, data); } } compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_enqueue_read_image_tests.inl000066400000000000000000000132511422164147700306120ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/surface_formats.h" #include "opencl/source/mem_obj/image.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueReadImageTests; namespace ULT { struct clEnqueueReadImageTests : public ApiFixture<>, public ::testing::Test { void SetUp() override { ApiFixture::SetUp(); // clang-format off imageFormat.image_channel_order = CL_YUYV_INTEL; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 32; imageDesc.image_height = 32; imageDesc.image_depth = 1; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = nullptr; // clang-format on } void TearDown() override { ApiFixture::TearDown(); } cl_image_format imageFormat; cl_image_desc imageDesc; }; TEST_F(clEnqueueReadImageTests, GivenNullCommandQueueWhenReadingImageThenInvalidCommandQueueErrorIsReturned) { auto retVal = clEnqueueReadImage( nullptr, nullptr, false, nullptr, nullptr, 0, 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(clEnqueueReadImageTests, GivenNullImageWhenReadingImageThenInvalidMemObjectErrorIsReturned) { auto retVal = clEnqueueReadImage( pCommandQueue, nullptr, false, nullptr, nullptr, 0, 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(clEnqueueReadImageTests, GivenValidParametersWhenReadinImagesThenSuccessIsReturned) { imageFormat.image_channel_order = CL_RGBA; auto image = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_WRITE, 0, &imageFormat, &imageDesc, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); const size_t origin[] = {2, 2, 0}; const size_t region[] = {2, 2, 1}; auto retVal = clEnqueueReadImage( pCommandQueue, image, false, origin, region, 0, 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueReadImageTests, GivenQueueIncapableParametersWhenReadingImageThenInvalidOperationIsReturned) { imageFormat.image_channel_order = CL_RGBA; auto image = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_WRITE, 0, &imageFormat, &imageDesc, nullptr, retVal); const size_t origin[] = {2, 2, 0}; const size_t region[] = {2, 2, 1}; this->disableQueueCapabilities(CL_QUEUE_CAPABILITY_TRANSFER_IMAGE_INTEL); auto mockAddress = reinterpret_cast(0x1234); auto retVal = clEnqueueReadImage( pCommandQueue, image, false, origin, region, 0, 0, mockAddress, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_OPERATION, retVal); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } typedef clEnqueueReadImageTests clEnqueueReadImageYuv; TEST_F(clEnqueueReadImageYuv, GivenValidYuvImageWhenReadingImageThenSuccessIsReturned) { auto image = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_ONLY, 0, &imageFormat, &imageDesc, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); const size_t origin[] = {2, 2, 0}; const size_t region[] = {2, 2, 1}; auto retVal = clEnqueueReadImage( pCommandQueue, image, false, origin, region, 0, 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueReadImageYuv, GivenInvalidOriginWhenReadingYuvImageThenInvalidValueErrorIsReturned) { auto image = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_ONLY, 0, &imageFormat, &imageDesc, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); const size_t origin[] = {1, 2, 0}; const size_t region[] = {2, 2, 0}; auto retVal = clEnqueueReadImage( pCommandQueue, image, false, origin, region, 0, 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueReadImageYuv, GivenInvalidRegionWhenReadingYuvImageThenInvalidValueErrorIsReturned) { auto image = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_ONLY, 0, &imageFormat, &imageDesc, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); const size_t origin[] = {2, 2, 0}; const size_t region[] = {1, 2, 0}; auto retVal = clEnqueueReadImage( pCommandQueue, image, false, origin, region, 0, 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_enqueue_svm_free_tests.inl000066400000000000000000000166601422164147700303520ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/test/common/mocks/mock_device.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/event/user_event.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueSVMFreeTests; namespace ULT { TEST_F(clEnqueueSVMFreeTests, GivenInvalidCommandQueueWhenFreeingSVMThenInvalidCommandQueueErrorIsReturned) { auto retVal = clEnqueueSVMFree( nullptr, // cl_command_queue command_queue 0, // cl_uint num_svm_pointers nullptr, // void *svm_pointers[] nullptr, // (CL_CALLBACK *pfn_free_func) ( cl_command_queue queue, cl_uint num_svm_pointers, void *svm_pointers[]) nullptr, // void *user_data 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(clEnqueueSVMFreeTests, GivenNonZeroNumOfSVMPointersAndNullSVMPointersWhenFreeingSVMThenInvalidValueErrorIsReturned) { const ClDeviceInfo &devInfo = pDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { auto retVal = clEnqueueSVMFree( pCommandQueue, // cl_command_queue command_queue 1, // cl_uint num_svm_pointers nullptr, // void *svm_pointers[] nullptr, // (CL_CALLBACK *pfn_free_func) ( cl_command_queue queue, cl_uint num_svm_pointers, void *svm_pointers[]) nullptr, // void *user_data 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_VALUE, retVal); } } TEST_F(clEnqueueSVMFreeTests, GivenZeroNumOfSVMPointersAndNonNullSVMPointersWhenFreeingSVMThenInvalidValueErrorIsReturned) { const ClDeviceInfo &devInfo = pDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); EXPECT_NE(nullptr, ptrSvm); void *svmPtrs[] = {ptrSvm}; auto retVal = clEnqueueSVMFree( pCommandQueue, // cl_command_queue command_queue 0, // cl_uint num_svm_pointers svmPtrs, // void *svm_pointers[] nullptr, // (CL_CALLBACK *pfn_free_func) ( cl_command_queue queue, cl_uint num_svm_pointers, void *svm_pointers[]) nullptr, // void *user_data 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_VALUE, retVal); clSVMFree(pContext, ptrSvm); } } TEST_F(clEnqueueSVMFreeTests, GivenNonZeroNumOfEventsAndNullEventListWhenFreeingSVMThenInvalidEventWaitListErrorIsReturned) { auto retVal = clEnqueueSVMFree( pCommandQueue, // cl_command_queue command_queue 0, // cl_uint num_svm_pointers nullptr, // void *svm_pointers[] nullptr, // (CL_CALLBACK *pfn_free_func) ( cl_command_queue queue, cl_uint num_svm_pointers, void *svm_pointers[]) nullptr, // void *user_data 1, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, retVal); } TEST_F(clEnqueueSVMFreeTests, GivenZeroNumOfEventsAndNonNullEventListWhenFreeingSVMThenInvalidEventWaitListErrorIsReturned) { UserEvent uEvent(pContext); cl_event eventWaitList[] = {&uEvent}; auto retVal = clEnqueueSVMFree( pCommandQueue, // cl_command_queue command_queue 0, // cl_uint num_svm_pointers nullptr, // void *svm_pointers[] nullptr, // (CL_CALLBACK *pfn_free_func) ( cl_command_queue queue, cl_uint num_svm_pointers, void *svm_pointers[]) nullptr, // void *user_data 0, // cl_uint num_events_in_wait_list eventWaitList, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, retVal); } TEST_F(clEnqueueSVMFreeTests, GivenNonZeroNumOfSVMPointersAndNonNullSVMPointersWhenFreeingSVMThenSuccessIsReturned) { const ClDeviceInfo &devInfo = pDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); EXPECT_NE(nullptr, ptrSvm); void *svmPtrs[] = {ptrSvm}; auto retVal = clEnqueueSVMFree( pCommandQueue, // cl_command_queue command_queue 1, // cl_uint num_svm_pointers svmPtrs, // void *svm_pointers[] nullptr, // (CL_CALLBACK *pfn_free_func) ( cl_command_queue queue, cl_uint num_svm_pointers, void *svm_pointers[]) nullptr, // void *user_data 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); clSVMFree(pContext, ptrSvm); } } TEST_F(clEnqueueSVMFreeTests, GivenZeroNumOfSVMPointersAndNullSVMPointersWhenFreeingSVMThenSuccessIsReturned) { const ClDeviceInfo &devInfo = pDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { auto retVal = clEnqueueSVMFree( pCommandQueue, // cl_command_queue command_queue 0, // cl_uint num_svm_pointers nullptr, // void *svm_pointers[] nullptr, // (CL_CALLBACK *pfn_free_func) ( cl_command_queue queue, cl_uint num_svm_pointers, void *svm_pointers[]) nullptr, // void *user_data 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); } } TEST_F(clEnqueueSVMFreeTests, GivenDeviceNotSupportingSvmWhenEnqueuingSVMFreeThenInvalidOperationErrorIsReturned) { auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.ftrSvm = false; auto pDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); cl_device_id deviceId = pDevice.get(); auto pContext = std::unique_ptr(Context::create(nullptr, ClDeviceVector(&deviceId, 1), nullptr, nullptr, retVal)); auto pCommandQueue = std::make_unique(pContext.get(), pDevice.get(), nullptr, false); auto retVal = clEnqueueSVMFree( pCommandQueue.get(), // cl_command_queue command_queue 0, // cl_uint num_svm_pointers nullptr, // void *svm_pointers[] nullptr, // (CL_CALLBACK *pfn_free_func) ( cl_command_queue queue, cl_uint num_svm_pointers, void *svm_pointers[]) nullptr, // void *user_data 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_OPERATION, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_enqueue_svm_map_tests.inl000066400000000000000000000152241422164147700302010ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/test/common/mocks/mock_device.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueSVMMapTests; namespace ULT { TEST_F(clEnqueueSVMMapTests, GivenInvalidCommandQueueWhenMappingSVMThenInvalidCommandQueueErrorIsReturned) { auto retVal = clEnqueueSVMMap( nullptr, // cl_command_queue command_queue CL_FALSE, // cl_bool blocking_map CL_MAP_READ, // cl_map_flags map_flags nullptr, // void *svm_ptr 0, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // const cL_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(clEnqueueSVMMapTests, GivenNullSVMPointerWhenMappingSVMThenInvalidValueErrorIsReturned) { const ClDeviceInfo &devInfo = pDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { auto retVal = clEnqueueSVMMap( pCommandQueue, // cl_command_queue command_queue CL_FALSE, // cl_bool blocking_map CL_MAP_READ, // cl_map_flags map_flags nullptr, // void *svm_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // const cL_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_VALUE, retVal); } } TEST_F(clEnqueueSVMMapTests, GivenRegionSizeZeroWhenMappingSVMThenInvalidValueErrorIsReturned) { const ClDeviceInfo &devInfo = pDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); EXPECT_NE(nullptr, ptrSvm); auto retVal = clEnqueueSVMMap( pCommandQueue, // cl_command_queue command_queue CL_FALSE, // cl_bool blocking_map CL_MAP_READ, // cl_map_flags map_flags ptrSvm, // void *svm_ptr 0, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // const cL_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_VALUE, retVal); clSVMFree(pContext, ptrSvm); } } TEST_F(clEnqueueSVMMapTests, GivenNullEventWaitListAndNonZeroNumEventsWhenMappingSVMThenInvalidEventWaitListErrorIsReturned) { auto retVal = clEnqueueSVMMap( pCommandQueue, // cl_command_queue command_queue CL_FALSE, // cl_bool blocking_map CL_MAP_READ, // cl_map_flags map_flags nullptr, // void *svm_ptr 0, // size_t size 1, // cl_uint num_events_in_wait_list nullptr, // const cL_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, retVal); } TEST_F(clEnqueueSVMMapTests, GivenNonNullEventWaitListAndZeroNumEventsWhenMappingSVMThenInvalidEventWaitListErrorIsReturned) { UserEvent uEvent(pContext); cl_event eventWaitList[] = {&uEvent}; auto retVal = clEnqueueSVMMap( pCommandQueue, // cl_command_queue command_queue CL_FALSE, // cl_bool blocking_map CL_MAP_READ, // cl_map_flags map_flags nullptr, // void *svm_ptr 0, // size_t size 0, // cl_uint num_events_in_wait_list eventWaitList, // const cL_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, retVal); } TEST_F(clEnqueueSVMMapTests, GivenValidParametersWhenMappingSVMThenSuccessIsReturned) { const ClDeviceInfo &devInfo = pDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); EXPECT_NE(nullptr, ptrSvm); auto retVal = clEnqueueSVMMap( pCommandQueue, // cl_command_queue command_queue CL_FALSE, // cl_bool blocking_map CL_MAP_READ, // cl_map_flags map_flags ptrSvm, // void *svm_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // const cL_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); clSVMFree(pContext, ptrSvm); } } TEST_F(clEnqueueSVMMapTests, GivenQueueIncapableWhenMappingSvmBufferThenInvalidOperationIsReturned) { REQUIRE_SVM_OR_SKIP(pDevice); disableQueueCapabilities(CL_QUEUE_CAPABILITY_MAP_BUFFER_INTEL); void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); EXPECT_NE(nullptr, ptrSvm); auto retVal = clEnqueueSVMMap( pCommandQueue, // cl_command_queue command_queue CL_FALSE, // cl_bool blocking_map CL_MAP_READ, // cl_map_flags map_flags ptrSvm, // void *svm_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // const cL_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_OPERATION, retVal); clSVMFree(pContext, ptrSvm); } TEST_F(clEnqueueSVMMapTests, GivenDeviceNotSupportingSvmWhenEnqueuingSVMMapThenInvalidOperationErrorIsReturned) { auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.ftrSvm = false; auto pDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); cl_device_id deviceId = pDevice.get(); auto pContext = std::unique_ptr(Context::create(nullptr, ClDeviceVector(&deviceId, 1), nullptr, nullptr, retVal)); auto pCommandQueue = std::make_unique(pContext.get(), pDevice.get(), nullptr, false); auto retVal = clEnqueueSVMMap( pCommandQueue.get(), // cl_command_queue command_queue CL_FALSE, // cl_bool blocking_map CL_MAP_READ, // cl_map_flags map_flags nullptr, // void *svm_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // const cL_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_OPERATION, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_enqueue_svm_mem_fill_tests.inl000066400000000000000000000151751422164147700312150ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/test/common/mocks/mock_device.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueSVMMemFillTests; namespace ULT { TEST_F(clEnqueueSVMMemFillTests, GivenInvalidCommandQueueWhenFillingSVMMemoryThenInvalidCommandQueueErrorIsReturned) { auto retVal = clEnqueueSVMMemFill( nullptr, // cl_command_queue command_queue nullptr, // void *svm_ptr nullptr, // const void *pattern 0, // size_t pattern_size 0, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(clEnqueueSVMMemFillTests, GivenNullSVMPtrWhenFillingSVMMemoryThenInvalidValueErrorIsReturned) { const ClDeviceInfo &devInfo = pDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { auto retVal = clEnqueueSVMMemFill( pCommandQueue, // cl_command_queue command_queue nullptr, // void *svm_ptr nullptr, // const void *pattern 0, // size_t pattern_size 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_INVALID_VALUE, retVal); } } TEST_F(clEnqueueSVMMemFillTests, GivenRegionSizeZeroWhenFillingSVMMemoryThenInvalidValueErrorIsReturned) { const ClDeviceInfo &devInfo = pDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); EXPECT_NE(nullptr, ptrSvm); auto retVal = clEnqueueSVMMemFill( pCommandQueue, // cl_command_queue command_queue ptrSvm, // void *svm_ptr nullptr, // const void *pattern 0, // size_t pattern_size 0, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_INVALID_VALUE, retVal); clSVMFree(pContext, ptrSvm); } } TEST_F(clEnqueueSVMMemFillTests, GivenNullEventWaitListAndNonZeroEventsWhenFillingSVMMemoryThenInvalidEventWaitListIsReturned) { auto retVal = clEnqueueSVMMemFill( pCommandQueue, // cl_command_queue command_queue nullptr, // void *svm_ptr nullptr, // const void *pattern 0, // size_t pattern_size 0, // size_t size 1, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, retVal); } TEST_F(clEnqueueSVMMemFillTests, GivenNonNullEventWaitListAndZeroEventsWhenFillingSVMMemoryThenInvalidEventWaitListIsReturned) { UserEvent uEvent(pContext); cl_event eventWaitList[] = {&uEvent}; auto retVal = clEnqueueSVMMemFill( pCommandQueue, // cl_command_queue command_queue nullptr, // void *svm_ptr nullptr, // const void *pattern 0, // size_t pattern_size 0, // size_t size 0, // cl_uint num_events_in_wait_list eventWaitList, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, retVal); } TEST_F(clEnqueueSVMMemFillTests, GivenValidParametersWhenFillingSVMMemoryThenSuccessIsReturned) { const ClDeviceInfo &devInfo = pDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); EXPECT_NE(nullptr, ptrSvm); auto retVal = clEnqueueSVMMemFill( pCommandQueue, // cl_command_queue command_queue ptrSvm, // void *svm_ptr nullptr, // const void *pattern 0, // size_t pattern_size 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); clSVMFree(pContext, ptrSvm); } } TEST_F(clEnqueueSVMMemFillTests, GivenQueueIncapableWhenFillingSvmBufferThenInvalidOperationIsReturned) { REQUIRE_SVM_OR_SKIP(pDevice); disableQueueCapabilities(CL_QUEUE_CAPABILITY_FILL_BUFFER_INTEL); void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); EXPECT_NE(nullptr, ptrSvm); auto retVal = clEnqueueSVMMemFill( pCommandQueue, // cl_command_queue command_queue ptrSvm, // void *svm_ptr nullptr, // const void *pattern 0, // size_t pattern_size 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_INVALID_OPERATION, retVal); clSVMFree(pContext, ptrSvm); } TEST_F(clEnqueueSVMMemFillTests, GivenDeviceNotSupportingSvmWhenEnqueuingSVMMemFillThenInvalidOperationErrorIsReturned) { auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.ftrSvm = false; auto pDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); cl_device_id deviceId = pDevice.get(); auto pContext = std::unique_ptr(Context::create(nullptr, ClDeviceVector(&deviceId, 1), nullptr, nullptr, retVal)); auto pCommandQueue = std::make_unique(pContext.get(), pDevice.get(), nullptr, false); auto retVal = clEnqueueSVMMemFill( pCommandQueue.get(), // cl_command_queue command_queue nullptr, // void *svm_ptr nullptr, // const void *pattern 0, // size_t pattern_size 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_INVALID_OPERATION, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_enqueue_svm_memcpy_tests.inl000066400000000000000000000201341422164147700307120ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/test/common/mocks/mock_device.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueSVMMemcpyTests; namespace ULT { TEST_F(clEnqueueSVMMemcpyTests, GivenInvalidCommandQueueWhenCopyingSVMMemoryThenInvalidCommandQueueErrorIsReturned) { auto retVal = clEnqueueSVMMemcpy( nullptr, // cl_command_queue command_queue CL_FALSE, // cl_bool blocking_copy nullptr, // void *dst_ptr nullptr, // const void *src_ptr 0, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(clEnqueueSVMMemcpyTests, GivenNullDstPtrWhenCopyingSVMMemoryThenInvalidValueErrorIsReturned) { const ClDeviceInfo &devInfo = pDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *pSrcSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); EXPECT_NE(nullptr, pSrcSvm); auto retVal = clEnqueueSVMMemcpy( pCommandQueue, // cl_command_queue command_queue CL_FALSE, // cl_bool blocking_copy nullptr, // void *dst_ptr pSrcSvm, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_VALUE, retVal); clSVMFree(pContext, pSrcSvm); } } TEST_F(clEnqueueSVMMemcpyTests, GivenNullSrcPtrWhenCopyingSVMMemoryThenInvalidValueErrorIsReturned) { const ClDeviceInfo &devInfo = pDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *pDstSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); EXPECT_NE(nullptr, pDstSvm); auto retVal = clEnqueueSVMMemcpy( pCommandQueue, // cl_command_queue command_queue CL_FALSE, // cl_bool blocking_copy pDstSvm, // void *dst_ptr nullptr, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_VALUE, retVal); clSVMFree(pContext, pDstSvm); } } TEST_F(clEnqueueSVMMemcpyTests, GivenNonZeroEventsAndNullEventListWhenCopyingSVMMemoryThenInvalidEventWaitListErrorIsReturned) { auto retVal = clEnqueueSVMMemcpy( pCommandQueue, // cl_command_queue command_queue CL_FALSE, // cl_bool blocking_copy nullptr, // void *dst_ptr nullptr, // const void *src_ptr 0, // size_t size 1, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, retVal); } TEST_F(clEnqueueSVMMemcpyTests, GivenZeroEventsAndNonNullEventListWhenCopyingSVMMemoryThenInvalidEventWaitListErrorIsReturned) { UserEvent uEvent(pContext); cl_event eventWaitList[] = {&uEvent}; auto retVal = clEnqueueSVMMemcpy( pCommandQueue, // cl_command_queue command_queue CL_FALSE, // cl_bool blocking_copy nullptr, // void *dst_ptr nullptr, // const void *src_ptr 0, // size_t size 0, // cl_uint num_events_in_wait_list eventWaitList, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, retVal); } TEST_F(clEnqueueSVMMemcpyTests, GivenNonZeroSizeWhenCopyingSVMMemoryThenSuccessIsReturned) { const ClDeviceInfo &devInfo = pDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *pDstSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); EXPECT_NE(nullptr, pDstSvm); void *pSrcSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); EXPECT_NE(nullptr, pSrcSvm); auto retVal = clEnqueueSVMMemcpy( pCommandQueue, // cl_command_queue command_queue CL_FALSE, // cl_bool blocking_copy pDstSvm, // void *dst_ptr pSrcSvm, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); clSVMFree(pContext, pDstSvm); clSVMFree(pContext, pSrcSvm); } } TEST_F(clEnqueueSVMMemcpyTests, GivenQueueIncapableWhenCopyingSvmBufferThenInvalidOperationIsReturned) { REQUIRE_SVM_OR_SKIP(pDevice); disableQueueCapabilities(CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL); void *pDstSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); EXPECT_NE(nullptr, pDstSvm); void *pSrcSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); EXPECT_NE(nullptr, pSrcSvm); auto retVal = clEnqueueSVMMemcpy( pCommandQueue, // cl_command_queue command_queue CL_FALSE, // cl_bool blocking_copy pDstSvm, // void *dst_ptr pSrcSvm, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_OPERATION, retVal); clSVMFree(pContext, pDstSvm); clSVMFree(pContext, pSrcSvm); } TEST_F(clEnqueueSVMMemcpyTests, GivenZeroSizeWhenCopyingSVMMemoryThenSuccessIsReturned) { const ClDeviceInfo &devInfo = pDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *pDstSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); EXPECT_NE(nullptr, pDstSvm); void *pSrcSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); EXPECT_NE(nullptr, pSrcSvm); auto retVal = clEnqueueSVMMemcpy( pCommandQueue, // cl_command_queue command_queue CL_FALSE, // cl_bool blocking_copy pDstSvm, // void *dst_ptr pSrcSvm, // const void *src_ptr 0, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); clSVMFree(pContext, pDstSvm); clSVMFree(pContext, pSrcSvm); } } TEST_F(clEnqueueSVMMemcpyTests, GivenDeviceNotSupportingSvmWhenEnqueuingSVMMemcpyThenInvalidOperationErrorIsReturned) { auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.ftrSvm = false; auto pDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); cl_device_id deviceId = pDevice.get(); auto pContext = std::unique_ptr(Context::create(nullptr, ClDeviceVector(&deviceId, 1), nullptr, nullptr, retVal)); auto pCommandQueue = std::make_unique(pContext.get(), pDevice.get(), nullptr, false); auto retVal = clEnqueueSVMMemcpy( pCommandQueue.get(), // cl_command_queue command_queue CL_FALSE, // cl_bool blocking_copy nullptr, // void *dst_ptr nullptr, // const void *src_ptr 0, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_OPERATION, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_enqueue_svm_migrate_mem_tests.cpp000066400000000000000000000331351422164147700317130ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/test/common/mocks/mock_device.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/event/user_event.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "cl_api_tests.h" #include using namespace NEO; typedef api_tests clEnqueueSVMMigrateMemTests; namespace ULT { TEST_F(clEnqueueSVMMigrateMemTests, GivenInvalidCommandQueueWhenMigratingSVMThenInvalidCommandQueueErrorIsReturned) { auto retVal = clEnqueueSVMMigrateMem( nullptr, // cl_command_queue command_queue 0, // cl_uint num_svm_pointers nullptr, // const void **svm_pointers nullptr, // const size_t *sizes 0, // const cl_mem_migration_flags flags 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(clEnqueueSVMMigrateMemTests, GivenNullSvmPointersWhenMigratingSvmThenInvalidValueErrorIsReturned) { const ClDeviceInfo &devInfo = pDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { auto retVal = clEnqueueSVMMigrateMem( pCommandQueue, // cl_command_queue command_queue 1, // cl_uint num_svm_pointers nullptr, // const void **svm_pointers nullptr, // const size_t *sizes 0, // const cl_mem_migration_flags flags 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_VALUE, retVal); } } TEST_F(clEnqueueSVMMigrateMemTests, GivenNumSvmPointersIsZeroWhenMigratingSvmThenInvalidValueErrorIsReturned) { const ClDeviceInfo &devInfo = pDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); ASSERT_NE(nullptr, ptrSvm); const void *svmPtrs[] = {ptrSvm}; auto retVal = clEnqueueSVMMigrateMem( pCommandQueue, // cl_command_queue command_queue 0, // cl_uint num_svm_pointers svmPtrs, // const void **svm_pointers nullptr, // const size_t *sizes 0, // const cl_mem_migration_flags flags 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_VALUE, retVal); clSVMFree(pContext, ptrSvm); } } TEST_F(clEnqueueSVMMigrateMemTests, GivenSvmPointerIsHostPtrWhenMigratingSvmThenInvalidValueErrorIsReturned) { const ClDeviceInfo &devInfo = pDevice->getDeviceInfo(); if (devInfo.svmCapabilities == 0) { GTEST_SKIP(); } char *ptrHost = new char[10]; ASSERT_NE(nullptr, ptrHost); const void *svmPtrs[] = {ptrHost}; auto retVal = clEnqueueSVMMigrateMem( pCommandQueue, // cl_command_queue command_queue 1, // cl_uint num_svm_pointers svmPtrs, // const void **svm_pointers nullptr, // const size_t *sizes 0, // const cl_mem_migration_flags flags 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_VALUE, retVal); delete[] ptrHost; } TEST_F(clEnqueueSVMMigrateMemTests, GivenNonZeroSizeIsNotContainedWithinAllocationWhenMigratingSvmThenInvalidValueErrorIsReturned) { const ClDeviceInfo &devInfo = pDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); ASSERT_NE(nullptr, ptrSvm); auto svmData = pContext->getSVMAllocsManager()->getSVMAlloc(ptrSvm); ASSERT_NE(nullptr, svmData); auto svmAlloc = svmData->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex()); EXPECT_NE(nullptr, svmAlloc); size_t allocSize = svmAlloc->getUnderlyingBufferSize(); const void *svmPtrs[] = {ptrSvm}; const size_t sizes[] = {allocSize + 1}; auto retVal = clEnqueueSVMMigrateMem( pCommandQueue, // cl_command_queue command_queue 1, // cl_uint num_svm_pointers svmPtrs, // const void **svm_pointers sizes, // const size_t *sizes 0, // const cl_mem_migration_flags flags 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_VALUE, retVal); clSVMFree(pContext, ptrSvm); } } TEST_F(clEnqueueSVMMigrateMemTests, GivenUnsupportedFlagsWhenMigratingSvmThenInvalidValueErrorIsReturned) { const ClDeviceInfo &devInfo = pDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); ASSERT_NE(nullptr, ptrSvm); const void *svmPtrs[] = {ptrSvm}; auto retVal = clEnqueueSVMMigrateMem( pCommandQueue, // cl_command_queue command_queue 1, // cl_uint num_svm_pointers svmPtrs, // const void **svm_pointers nullptr, // const size_t *sizes 0xAA55AA55AA55AA55, // const cl_mem_migration_flags flags 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_VALUE, retVal); clSVMFree(pContext, ptrSvm); } } TEST_F(clEnqueueSVMMigrateMemTests, GivenNullEventWaitListAndNonZeroNumEventsWhenMigratingSvmThenInvalidEventWaitListErrorIsReturned) { auto retVal = clEnqueueSVMMigrateMem( pCommandQueue, // cl_command_queue command_queue 0, // cl_uint num_svm_pointers nullptr, // const void **svm_pointers nullptr, // const size_t *sizes 0, // const cl_mem_migration_flags flags 1, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, retVal); } TEST_F(clEnqueueSVMMigrateMemTests, GivenNonNullEventWaitListAndZeroNumEventsWhenMigratingSvmThenInvalidEventWaitListErrorIsReturned) { UserEvent uEvent(pContext); cl_event eventWaitList[] = {&uEvent}; auto retVal = clEnqueueSVMMigrateMem( pCommandQueue, // cl_command_queue command_queue 0, // cl_uint num_svm_pointers nullptr, // const void **svm_pointers nullptr, // const size_t *sizes 0, // const cl_mem_migration_flags flags 0, // cl_uint num_events_in_wait_list eventWaitList, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, retVal); } TEST_F(clEnqueueSVMMigrateMemTests, GivenDifferentContextCommandQueueAndEventsWhenMigratingSvmThenInvalidContextErrorIsReturned) { const ClDeviceInfo &devInfo = pDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); ASSERT_NE(nullptr, ptrSvm); MockContext mockContext; UserEvent uEvent(&mockContext); cl_event eventWaitList[] = {&uEvent}; const void *svmPtrs[] = {ptrSvm}; auto retVal = clEnqueueSVMMigrateMem( pCommandQueue, // cl_command_queue command_queue 1, // cl_uint num_svm_pointers svmPtrs, // const void **svm_pointers nullptr, // const size_t *sizes 0, // const cl_mem_migration_flags flags 1, // cl_uint num_events_in_wait_list eventWaitList, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); clSVMFree(pContext, ptrSvm); } } TEST_F(clEnqueueSVMMigrateMemTests, GivenNullSizesWhenMigratingSvmThenSuccessIsReturned) { const ClDeviceInfo &devInfo = pDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); ASSERT_NE(nullptr, ptrSvm); const void *svmPtrs[] = {ptrSvm}; auto retVal = clEnqueueSVMMigrateMem( pCommandQueue, // cl_command_queue command_queue 1, // cl_uint num_svm_pointers svmPtrs, // const void **svm_pointers nullptr, // const size_t *sizes 0, // const cl_mem_migration_flags flags 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); clSVMFree(pContext, ptrSvm); } } TEST_F(clEnqueueSVMMigrateMemTests, GivenSizeZeroWhenMigratingSvmThenSuccessIsReturned) { const ClDeviceInfo &devInfo = pDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); ASSERT_NE(nullptr, ptrSvm); const void *svmPtrs[] = {ptrSvm}; const size_t sizes[] = {0}; auto retVal = clEnqueueSVMMigrateMem( pCommandQueue, // cl_command_queue command_queue 1, // cl_uint num_svm_pointers svmPtrs, // const void **svm_pointers sizes, // const size_t *sizes 0, // const cl_mem_migration_flags flags 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); clSVMFree(pContext, ptrSvm); } } TEST_F(clEnqueueSVMMigrateMemTests, GivenNonZeroSizeWhenMigratingSvmThenSuccessIsReturned) { const ClDeviceInfo &devInfo = pDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); ASSERT_NE(nullptr, ptrSvm); const void *svmPtrs[] = {ptrSvm}; const size_t sizes[] = {256}; auto retVal = clEnqueueSVMMigrateMem( pCommandQueue, // cl_command_queue command_queue 1, // cl_uint num_svm_pointers svmPtrs, // const void **svm_pointers sizes, // const size_t *sizes 0, // const cl_mem_migration_flags flags 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); clSVMFree(pContext, ptrSvm); } } TEST_F(clEnqueueSVMMigrateMemTests, GivenSameContextCommandQueueAndEventsWhenMigratingSvmThenSuccessIsReturned) { const ClDeviceInfo &devInfo = pDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); ASSERT_NE(nullptr, ptrSvm); UserEvent uEvent(pContext); cl_event eventWaitList[] = {&uEvent}; const void *svmPtrs[] = {ptrSvm}; auto retVal = clEnqueueSVMMigrateMem( pCommandQueue, // cl_command_queue command_queue 1, // cl_uint num_svm_pointers svmPtrs, // const void **svm_pointers nullptr, // const size_t *sizes 0, // const cl_mem_migration_flags flags 1, // cl_uint num_events_in_wait_list eventWaitList, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); clSVMFree(pContext, ptrSvm); } } TEST_F(clEnqueueSVMMigrateMemTests, GivenDeviceNotSupportingSvmWhenEnqueuingSVMMigrateMemThenInvalidOperationErrorIsReturned) { auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.ftrSvm = false; auto pDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); cl_device_id deviceId = pDevice.get(); auto pContext = std::unique_ptr(Context::create(nullptr, ClDeviceVector(&deviceId, 1), nullptr, nullptr, retVal)); auto pCommandQueue = std::make_unique(pContext.get(), pDevice.get(), nullptr, false); auto retVal = clEnqueueSVMMigrateMem( pCommandQueue.get(), // cl_command_queue command_queue 1, // cl_uint num_svm_pointers nullptr, // const void **svm_pointers nullptr, // const size_t *sizes 0, // const cl_mem_migration_flags flags 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_OPERATION, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_enqueue_svm_unmap_tests.inl000066400000000000000000000133451422164147700305460ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/test/common/mocks/mock_device.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueSVMUnmapTests; namespace ULT { TEST_F(clEnqueueSVMUnmapTests, GivenInvalidCommandQueueWhenUnmappingSvmThenInvalidCommandQueueErrorIsReturned) { auto retVal = clEnqueueSVMUnmap( nullptr, // cl_command_queue command_queue nullptr, // void *svm_ptr 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(clEnqueueSVMUnmapTests, GivenNullSvmPtrWhenUnmappingSvmThenInvalidValueErrorIsReturned) { auto retVal = clEnqueueSVMUnmap( pCommandQueue, // cl_command_queue command_queue nullptr, // void *svm_ptr 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clEnqueueSVMUnmapTests, GivenNullEventListAndNonZeroEventsWhenUnmappingSvmThenInvalidEventWaitListErrorIsReturned) { auto retVal = clEnqueueSVMUnmap( pCommandQueue, // cl_command_queue command_queue nullptr, // void *svm_ptr 1, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, retVal); } TEST_F(clEnqueueSVMUnmapTests, GivenNonNullEventListAndZeroEventsWhenUnmappingSvmThenInvalidEventWaitListErrorIsReturned) { UserEvent uEvent(pContext); cl_event eventWaitList[] = {&uEvent}; auto retVal = clEnqueueSVMUnmap( pCommandQueue, // cl_command_queue command_queue nullptr, // void *svm_ptr 0, // cl_uint num_events_in_wait_list eventWaitList, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, retVal); } TEST_F(clEnqueueSVMUnmapTests, GivenValidParametersWhenUnmappingSvmThenSuccessIsReturned) { const ClDeviceInfo &devInfo = pDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); EXPECT_NE(nullptr, ptrSvm); auto retVal = clEnqueueSVMMap( pCommandQueue, // cl_command_queue command_queue CL_FALSE, // cl_bool blocking_map CL_MAP_READ, // cl_map_flags map_flags ptrSvm, // void *svm_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // const cL_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueSVMUnmap( pCommandQueue, // cl_command_queue command_queue ptrSvm, // void *svm_ptr 0, // cl_uint num_events_in_wait_list nullptr, // const cL_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); clSVMFree(pContext, ptrSvm); } } TEST_F(clEnqueueSVMUnmapTests, GivenQueueIncapableWhenUnmappingSvmBufferThenInvalidOperationIsReturned) { REQUIRE_SVM_OR_SKIP(pDevice); void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); EXPECT_NE(nullptr, ptrSvm); auto retVal = clEnqueueSVMMap( pCommandQueue, // cl_command_queue command_queue CL_FALSE, // cl_bool blocking_map CL_MAP_READ, // cl_map_flags map_flags ptrSvm, // void *svm_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // const cL_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); disableQueueCapabilities(CL_QUEUE_CAPABILITY_MAP_BUFFER_INTEL); retVal = clEnqueueSVMUnmap( pCommandQueue, // cl_command_queue command_queue ptrSvm, // void *svm_ptr 0, // cl_uint num_events_in_wait_list nullptr, // const cL_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_OPERATION, retVal); clSVMFree(pContext, ptrSvm); } TEST_F(clEnqueueSVMUnmapTests, GivenDeviceNotSupportingSvmWhenEnqueuingSVMUnmapThenInvalidOperationErrorIsReturned) { auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.ftrSvm = false; auto pDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); cl_device_id deviceId = pDevice.get(); auto pContext = std::unique_ptr(Context::create(nullptr, ClDeviceVector(&deviceId, 1), nullptr, nullptr, retVal)); auto pCommandQueue = std::make_unique(pContext.get(), pDevice.get(), nullptr, false); retVal = clEnqueueSVMUnmap( pCommandQueue.get(), // cl_command_queue command_queue reinterpret_cast(0x1234), // void *svm_ptr 0, // cl_uint num_events_in_wait_list nullptr, // const cL_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_OPERATION, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_enqueue_task_tests.inl000066400000000000000000000033131422164147700274750ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/helpers/base_object.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueTaskTests; struct EnqueueTaskWithRequiredWorkGroupSize : public HelloWorldTest { typedef HelloWorldTest Parent; void SetUp() override { Parent::kernelFilename = "required_work_group"; Parent::kernelName = "CopyBuffer2"; Parent::SetUp(); } void TearDown() override { Parent::TearDown(); } }; namespace ULT { TEST_F(clEnqueueTaskTests, GivenValidParametersWhenEnqueingTaskThenSuccessIsReturned) { cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; retVal = clEnqueueTask( pCommandQueue, pMultiDeviceKernel, numEventsInWaitList, eventWaitList, event); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EnqueueTaskWithRequiredWorkGroupSize, GivenRequiredWorkGroupSizeWhenEnqueingTaskThenSuccessIsReturned) { cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; cl_command_queue command_queue = static_cast(pCmdQ); cl_kernel kernel = static_cast(pMultiDeviceKernel); retVal = clEnqueueTask( command_queue, kernel, numEventsInWaitList, eventWaitList, event); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_enqueue_unmap_mem_object_tests.inl000066400000000000000000000145561422164147700320520ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "opencl/test/unit_test/api/cl_api_tests.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include using namespace NEO; typedef api_tests clEnqueueUnmapMemObjTests; TEST_F(clEnqueueUnmapMemObjTests, givenValidAddressWhenUnmappingThenReturnSuccess) { auto buffer = std::unique_ptr(BufferHelper>::create(pContext)); cl_int retVal = CL_SUCCESS; auto mappedPtr = clEnqueueMapBuffer(pCommandQueue, buffer.get(), CL_TRUE, CL_MAP_READ, 0, 1, 0, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueUnmapMemObject( pCommandQueue, buffer.get(), mappedPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueUnmapMemObjTests, GivenQueueIncapableWhenUnmappingBufferThenInvalidOperationIsReturned) { auto buffer = std::unique_ptr(BufferHelper>::create(pContext)); cl_int retVal = CL_SUCCESS; auto mappedPtr = clEnqueueMapBuffer(pCommandQueue, buffer.get(), CL_TRUE, CL_MAP_READ, 0, 1, 0, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); disableQueueCapabilities(CL_QUEUE_CAPABILITY_MAP_BUFFER_INTEL); retVal = clEnqueueUnmapMemObject( pCommandQueue, buffer.get(), mappedPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_OPERATION, retVal); } TEST_F(clEnqueueUnmapMemObjTests, givenInvalidAddressWhenUnmappingOnCpuThenReturnError) { auto buffer = std::unique_ptr(BufferHelper>::create(pContext)); EXPECT_TRUE(buffer->mappingOnCpuAllowed()); cl_int retVal = CL_SUCCESS; auto mappedPtr = clEnqueueMapBuffer(pCommandQueue, buffer.get(), CL_TRUE, CL_MAP_READ, 0, 1, 0, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueUnmapMemObject( pCommandQueue, buffer.get(), ptrOffset(mappedPtr, buffer->getSize() + 1), 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clEnqueueUnmapMemObjTests, givenInvalidAddressWhenUnmappingOnGpuThenReturnError) { auto buffer = std::unique_ptr(BufferHelper>::create(pContext)); buffer->setSharingHandler(new SharingHandler()); EXPECT_FALSE(buffer->mappingOnCpuAllowed()); cl_int retVal = CL_SUCCESS; auto mappedPtr = clEnqueueMapBuffer(pCommandQueue, buffer.get(), CL_TRUE, CL_MAP_READ, 0, 1, 0, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueUnmapMemObject( pCommandQueue, buffer.get(), ptrOffset(mappedPtr, buffer->getSize() + 1), 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clEnqueueUnmapMemObjTests, GivenInvalidMemObjectTypeWhenUnmappingImageThenInvalidMemObjectIsReturned) { MockContext context{}; MockGraphicsAllocation allocation{}; MockBuffer buffer{&context, allocation}; cl_int retVal = CL_SUCCESS; auto mappedPtr = clEnqueueMapBuffer(pCommandQueue, &buffer, CL_TRUE, CL_MAP_READ, 0, 1, 0, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); buffer.memObjectType = 0x123456; retVal = clEnqueueUnmapMemObject( pCommandQueue, &buffer, mappedPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } struct clEnqueueUnmapImageTests : clEnqueueUnmapMemObjTests, ::testing::WithParamInterface { void SetUp() override { clEnqueueUnmapMemObjTests::SetUp(); const auto imageType = static_cast(GetParam()); this->image.reset(createImage(imageType)); EXPECT_NE(nullptr, image.get()); } Image *createImage(cl_mem_object_type type) { switch (type) { case CL_MEM_OBJECT_IMAGE1D: return Image1dHelper<>::create(pContext); case CL_MEM_OBJECT_IMAGE1D_BUFFER: return Image1dBufferHelper<>::create(pContext); case CL_MEM_OBJECT_IMAGE1D_ARRAY: return Image1dArrayHelper<>::create(pContext); case CL_MEM_OBJECT_IMAGE2D: return Image2dHelper<>::create(pContext); case CL_MEM_OBJECT_IMAGE2D_ARRAY: return Image2dArrayHelper<>::create(pContext); case CL_MEM_OBJECT_IMAGE3D: return Image3dHelper<>::create(pContext); default: return nullptr; } } std::unique_ptr image; const size_t origin[3] = {0, 0, 0}; const size_t region[3] = {1, 1, 1}; size_t imageRowPitch = 0; size_t imageSlicePitch = 0; }; TEST_P(clEnqueueUnmapImageTests, GivenValidParametersWhenUnmappingImageThenSuccessIsReturned) { void *mappedImage = clEnqueueMapImage( pCommandQueue, image.get(), CL_TRUE, CL_MAP_READ, origin, region, &imageRowPitch, &imageSlicePitch, 0, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueUnmapMemObject( pCommandQueue, image.get(), mappedImage, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_P(clEnqueueUnmapImageTests, GivenQueueIncapableParametersWhenUnmappingImageThenInvalidOperationIsReturned) { void *mappedImage = clEnqueueMapImage( pCommandQueue, image.get(), CL_TRUE, CL_MAP_READ, origin, region, &imageRowPitch, &imageSlicePitch, 0, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); this->disableQueueCapabilities(CL_QUEUE_CAPABILITY_MAP_IMAGE_INTEL); retVal = clEnqueueUnmapMemObject( pCommandQueue, image.get(), mappedImage, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_OPERATION, retVal); } INSTANTIATE_TEST_SUITE_P( clEnqueueUnmapImageTests, clEnqueueUnmapImageTests, ::testing::Values( CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE3D, CL_MEM_OBJECT_IMAGE2D_ARRAY, CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE1D_BUFFER)); compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_enqueue_verify_memory.inl000066400000000000000000000052631422164147700302130ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/test_macros/test.h" #include "opencl/extensions/public/cl_ext_private.h" #include "opencl/test/unit_test/api/cl_api_tests.h" using namespace NEO; TEST(CheckVerifyMemoryRelatedApiConstants, givenVerifyMemoryRelatedApiConstantsWhenVerifyingTheirValueThenCorrectValuesAreReturned) { EXPECT_EQ(AubMemDump::CmdServicesMemTraceMemoryCompare::CompareOperationValues::CompareEqual, CL_MEM_COMPARE_EQUAL); EXPECT_EQ(AubMemDump::CmdServicesMemTraceMemoryCompare::CompareOperationValues::CompareNotEqual, CL_MEM_COMPARE_NOT_EQUAL); } struct clEnqueueVerifyMemoryINTELSettings { const cl_uint comparisonMode = CL_MEM_COMPARE_EQUAL; const size_t bufferSize = 1; static constexpr size_t expectedSize = 1; int expected[expectedSize]{}; void *gpuAddress = expected; }; class clEnqueueVerifyMemoryINTELTests : public api_tests, public clEnqueueVerifyMemoryINTELSettings { }; TEST_F(clEnqueueVerifyMemoryINTELTests, givenSizeOfComparisonEqualZeroWhenCallingVerifyMemoryThenErrorIsReturned) { cl_int retval = clEnqueueVerifyMemoryINTEL(nullptr, nullptr, nullptr, 0, comparisonMode); EXPECT_EQ(CL_INVALID_VALUE, retval); } TEST_F(clEnqueueVerifyMemoryINTELTests, givenNullExpectedDataWhenCallingVerifyMemoryThenErrorIsReturned) { cl_int retval = clEnqueueVerifyMemoryINTEL(nullptr, nullptr, nullptr, expectedSize, comparisonMode); EXPECT_EQ(CL_INVALID_VALUE, retval); } TEST_F(clEnqueueVerifyMemoryINTELTests, givenInvalidAllocationPointerWhenCallingVerifyMemoryThenErrorIsReturned) { cl_int retval = clEnqueueVerifyMemoryINTEL(nullptr, nullptr, expected, expectedSize, comparisonMode); EXPECT_EQ(CL_INVALID_VALUE, retval); } TEST_F(clEnqueueVerifyMemoryINTELTests, givenInvalidCommandQueueWhenCallingVerifyMemoryThenErrorIsReturned) { cl_int retval = clEnqueueVerifyMemoryINTEL(nullptr, gpuAddress, expected, expectedSize, comparisonMode); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retval); } TEST_F(clEnqueueVerifyMemoryINTELTests, givenEqualMemoryWhenCallingVerifyMemoryThenSuccessIsReturned) { cl_int retval = clEnqueueVerifyMemoryINTEL(pCommandQueue, gpuAddress, expected, expectedSize, comparisonMode); EXPECT_EQ(CL_SUCCESS, retval); } TEST_F(clEnqueueVerifyMemoryINTELTests, givenNotEqualMemoryWhenCallingVerifyMemoryThenInvalidValueErrorIsReturned) { int differentMemory = expected[0] + 1; cl_int retval = clEnqueueVerifyMemoryINTEL(pCommandQueue, gpuAddress, &differentMemory, sizeof(differentMemory), comparisonMode); EXPECT_EQ(CL_INVALID_VALUE, retval); } compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_enqueue_wait_for_events_tests.inl000066400000000000000000000074221422164147700317360ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/wait_status.h" #include "shared/source/helpers/array_count.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/event/event.h" #include "opencl/source/event/user_event.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "cl_api_tests.h" using namespace NEO; using clEnqueueWaitForEventsTests = api_tests; TEST_F(clEnqueueWaitForEventsTests, GivenInvalidCommandQueueWhenClEnqueueWaitForEventsIsCalledThenReturnError) { auto retVal = CL_SUCCESS; auto userEvent = clCreateUserEvent( pContext, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueWaitForEvents( nullptr, 1, &userEvent); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); retVal = clReleaseEvent(userEvent); ASSERT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueWaitForEventsTests, GivenQueueIncapableWhenEnqueingWaitForEventsThenInvalidOperationReturned) { MockEvent events[] = { {pCommandQueue, CL_COMMAND_READ_BUFFER, 0, 0}, {pCommandQueue, CL_COMMAND_READ_BUFFER, 0, 0}, {pCommandQueue, CL_COMMAND_READ_BUFFER, 0, 0}, }; const cl_event waitList[] = {events, events + 1, events + 2}; const cl_uint waitListSize = static_cast(arrayCount(waitList)); auto retVal = clEnqueueWaitForEvents(pCommandQueue, waitListSize, waitList); EXPECT_EQ(CL_SUCCESS, retVal); this->disableQueueCapabilities(CL_QUEUE_CAPABILITY_SINGLE_QUEUE_EVENT_WAIT_LIST_INTEL); retVal = clEnqueueWaitForEvents(pCommandQueue, waitListSize, waitList); EXPECT_EQ(CL_INVALID_OPERATION, retVal); } TEST_F(clEnqueueWaitForEventsTests, GivenProperParamsWhenClEnqueueWaitForEventsIsCalledAndEventStatusIsCompleteThenWaitAndReturnSuccess) { struct MyEvent : public UserEvent { MyEvent(Context *context) : UserEvent(context) { } WaitStatus wait(bool blocking, bool quickKmdSleep) override { wasWaitCalled = true; return WaitStatus::Ready; }; bool wasWaitCalled = false; }; auto retVal = CL_SUCCESS; auto event = std::make_unique(pContext); cl_event clEvent = static_cast(event.get()); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueWaitForEvents( pCommandQueue, 1, &clEvent); EXPECT_EQ(true, event->wasWaitCalled); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueWaitForEventsTests, GivenProperParamsWhenClEnqueueWaitForEventsIsCalledAndEventStatusIsNotCompleteThenReturnError) { auto retVal = CL_SUCCESS; auto userEvent = clCreateUserEvent( pContext, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clSetUserEventStatus(userEvent, -1); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueWaitForEvents( pCommandQueue, 1, &userEvent); EXPECT_EQ(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST, retVal); retVal = clReleaseEvent(userEvent); ASSERT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueWaitForEventsTests, GivenInvalidEventWhenClEnqueueWaitForEventsIsCalledThenReturnError) { auto retVal = CL_SUCCESS; auto validUserEvent = clCreateUserEvent( pContext, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); auto ptr = std::make_unique(sizeof(Event)); cl_event invalidEvent = reinterpret_cast(ptr.get()); cl_event events[]{validUserEvent, invalidEvent, validUserEvent}; retVal = clEnqueueWaitForEvents( pCommandQueue, 3, events); EXPECT_EQ(CL_INVALID_EVENT, retVal); retVal = clReleaseEvent(validUserEvent); ASSERT_EQ(CL_SUCCESS, retVal); } compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_enqueue_write_buffer_rect_tests.inl000066400000000000000000000102301422164147700322270ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/ptr_math.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueWriteBufferRectTests; namespace ULT { TEST_F(clEnqueueWriteBufferRectTests, GivenInvalidBufferWhenWritingRectangularRegionThenInvalidMemObjectErrorIsReturned) { auto buffer = (cl_mem)ptrGarbage; size_t buffOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {10, 10, 0}; char ptr[10] = {}; auto retVal = clEnqueueWriteBufferRect( pCommandQueue, buffer, CL_FALSE, buffOrigin, hostOrigin, region, 10, //bufferRowPitch 0, //bufferSlicePitch 10, //hostRowPitch 0, //hostSlicePitch ptr, //hostPtr 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(clEnqueueWriteBufferRectTests, GivenNullCommandQueueWhenWritingRectangularRegionThenInvalidCommandQueueErrorIsReturned) { auto buffer = (cl_mem)ptrGarbage; size_t buffOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {10, 10, 0}; char ptr[10] = {}; auto retVal = clEnqueueWriteBufferRect( nullptr, buffer, CL_FALSE, buffOrigin, hostOrigin, region, 10, //bufferRowPitch 0, //bufferSlicePitch 10, //hostRowPitch 0, //hostSlicePitch ptr, //hostPtr 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(clEnqueueWriteBufferRectTests, GivenNullHostPtrWhenWritingRectangularRegionThenInvalidValueErrorIsReturned) { auto buffer = clCreateBuffer( pContext, CL_MEM_READ_WRITE, 100, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); size_t buffOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {10, 10, 0}; auto retVal = clEnqueueWriteBufferRect( pCommandQueue, buffer, CL_FALSE, buffOrigin, hostOrigin, region, 10, //bufferRowPitch 0, //bufferSlicePitch 10, //hostRowPitch 0, //hostSlicePitch nullptr, //hostPtr 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueWriteBufferRectTests, GivenCorrectParametersWhenWritingRectangularRegionThenSuccessIsReturned) { MockBuffer buffer{}; buffer.size = 100; char ptr[10] = {}; size_t buffOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {10, 10, 1}; auto retVal = clEnqueueWriteBufferRect( pCommandQueue, &buffer, CL_FALSE, buffOrigin, hostOrigin, region, 10, //bufferRowPitch 0, //bufferSlicePitch 10, //hostRowPitch 0, //hostSlicePitch ptr, //hostPtr 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueWriteBufferRectTests, GivenQueueIncapableWhenWritingRectangularRegionThenInvalidOperationIsReturned) { MockBuffer buffer{}; buffer.size = 100; char ptr[10] = {}; size_t buffOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {10, 10, 1}; this->disableQueueCapabilities(CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_RECT_INTEL); auto retVal = clEnqueueWriteBufferRect( pCommandQueue, &buffer, CL_FALSE, buffOrigin, hostOrigin, region, 10, //bufferRowPitch 0, //bufferSlicePitch 10, //hostRowPitch 0, //hostSlicePitch ptr, //hostPtr 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_INVALID_OPERATION, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_enqueue_write_buffer_tests.inl000066400000000000000000000040171422164147700312200ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/ptr_math.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueWriteBufferTests; namespace ULT { TEST_F(clEnqueueWriteBufferTests, GivenCorrectArgumentsWhenWritingBufferThenSuccessIsReturned) { MockBuffer buffer{}; auto data = 1; auto retVal = clEnqueueWriteBuffer( pCommandQueue, &buffer, false, 0, sizeof(data), &data, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueWriteBufferTests, GivenQueueIncapableArgumentsWhenWritingBufferThenInvalidOperationIsReturned) { MockBuffer buffer{}; auto data = 1; this->disableQueueCapabilities(CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL); auto retVal = clEnqueueWriteBuffer( pCommandQueue, &buffer, false, 0, sizeof(data), &data, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_OPERATION, retVal); } TEST_F(clEnqueueWriteBufferTests, GivenNullCommandQueueWhenWritingBufferThenInvalidCommandQueueErrorIsReturned) { auto buffer = (cl_mem)ptrGarbage; retVal = clEnqueueWriteBuffer( nullptr, buffer, CL_FALSE, //blocking write 0, //offset 0, //sb nullptr, 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(clEnqueueWriteBufferTests, GivenNullBufferWhenWritingBufferThenInvalidMemObjectErrorIsReturned) { void *ptr = nullptr; retVal = clEnqueueWriteBuffer( pCommandQueue, nullptr, CL_FALSE, //blocking write 0, //offset 0, //cb ptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_enqueue_write_image_tests.inl000066400000000000000000000132771422164147700310410ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/surface_formats.h" #include "opencl/source/mem_obj/image.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueWriteImageTests; namespace ULT { struct clEnqueueWriteImageTests : public ApiFixture<>, public ::testing::Test { void SetUp() override { ApiFixture::SetUp(); // clang-format off imageFormat.image_channel_order = CL_YUYV_INTEL; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 32; imageDesc.image_height = 32; imageDesc.image_depth = 1; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = nullptr; // clang-format on } void TearDown() override { ApiFixture::TearDown(); } cl_image_format imageFormat; cl_image_desc imageDesc; }; TEST_F(clEnqueueWriteImageTests, GivenNullCommandQueueWhenWritingImageThenInvalidCommandQueueErrorIsReturned) { auto retVal = clEnqueueWriteImage( nullptr, nullptr, false, nullptr, nullptr, 0, 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(clEnqueueWriteImageTests, GivenNullImageWhenWritingImageThenInvalidMemObjectErrorIsReturned) { auto retVal = clEnqueueWriteImage( pCommandQueue, nullptr, false, nullptr, nullptr, 0, 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(clEnqueueWriteImageTests, GivenValidParametersWhenWritingImageThenSuccessIsReturned) { imageFormat.image_channel_order = CL_RGBA; auto image = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_WRITE, 0, &imageFormat, &imageDesc, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); const size_t origin[] = {2, 2, 0}; const size_t region[] = {2, 2, 1}; auto retVal = clEnqueueWriteImage( pCommandQueue, image, false, origin, region, 0, 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueReadImageTests, GivenQueueIncapableParametersWhenWritingImageThenInvalidOperationIsReturned) { imageFormat.image_channel_order = CL_RGBA; auto image = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_WRITE, 0, &imageFormat, &imageDesc, nullptr, retVal); const size_t origin[] = {2, 2, 0}; const size_t region[] = {2, 2, 1}; this->disableQueueCapabilities(CL_QUEUE_CAPABILITY_TRANSFER_IMAGE_INTEL); auto mockAddress = reinterpret_cast(0x1234); auto retVal = clEnqueueWriteImage( pCommandQueue, image, false, origin, region, 0, 0, mockAddress, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_OPERATION, retVal); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } typedef clEnqueueWriteImageTests clEnqueueWriteImageYUV; TEST_F(clEnqueueWriteImageYUV, GivenValidParametersWhenWritingYuvImageThenSuccessIsReturned) { auto image = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_ONLY, 0, &imageFormat, &imageDesc, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); const size_t origin[] = {2, 2, 0}; const size_t region[] = {2, 2, 1}; auto retVal = clEnqueueWriteImage( pCommandQueue, image, false, origin, region, 0, 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueWriteImageYUV, GivenInvalidOriginWhenWritingYuvImageThenInvalidValueErrorIsReturned) { auto image = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_ONLY, 0, &imageFormat, &imageDesc, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); const size_t origin[] = {1, 2, 0}; const size_t region[] = {2, 2, 0}; auto retVal = clEnqueueWriteImage( pCommandQueue, image, false, origin, region, 0, 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueWriteImageYUV, GivenInvalidRegionWhenWritingYuvImageThenInvalidValueErrorIsReturned) { auto image = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_ONLY, 0, &imageFormat, &imageDesc, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); const size_t origin[] = {2, 2, 0}; const size_t region[] = {1, 2, 0}; auto retVal = clEnqueueWriteImage( pCommandQueue, image, false, origin, region, 0, 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_finish_tests.inl000066400000000000000000000011601422164147700262620ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clFinishTests; namespace ULT { TEST_F(clFinishTests, GivenValidCommandQueueWhenWaitingForFinishThenSuccessIsReturned) { retVal = clFinish(pCommandQueue); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clFinishTests, GivenNullCommandQueueWhenWaitingForFinishThenInvalidCommandQueueErrorIsReturned) { auto retVal = clFinish(nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_flush_tests.inl000066400000000000000000000011331422164147700261230ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clFlushTests; namespace ULT { TEST_F(clFlushTests, GivenValidCommandQueueWhenFlushingThenSuccessIsReturned) { retVal = clFlush(pCommandQueue); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clFlushTests, GivenNullCommandQueueWhenFlushingThenInvalidCommandQueueErrorIsReturned) { auto retVal = clFlush(nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_function_pointers_tests.inl000066400000000000000000000120021422164147700305470ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/mocks/mock_program.h" #include "cl_api_tests.h" using namespace NEO; using clGetDeviceGlobalVariablePointer = api_tests; using clGetDeviceFunctionPointer = api_tests; TEST_F(clGetDeviceGlobalVariablePointer, GivenNullMandatoryArgumentsThenReturnInvalidArgError) { auto &symbols = pProgram->buildInfos[testedRootDeviceIndex].symbols; symbols["A"].gpuAddress = 7U; symbols["A"].symbol.size = 64U; symbols["A"].symbol.segment = NEO::SegmentType::GlobalVariables; void *globalRet = 0; auto ret = clGetDeviceGlobalVariablePointerINTEL(this->pContext->getDevice(0), this->pProgram, "A", nullptr, &globalRet); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_EQ(7U, reinterpret_cast(globalRet)); ret = clGetDeviceGlobalVariablePointerINTEL(this->pContext->getDevice(0), this->pProgram, "A", nullptr, nullptr); EXPECT_EQ(CL_INVALID_ARG_VALUE, ret); ret = clGetDeviceGlobalVariablePointerINTEL(this->pContext->getDevice(0), nullptr, "A", nullptr, &globalRet); EXPECT_EQ(CL_INVALID_PROGRAM, ret); ret = clGetDeviceGlobalVariablePointerINTEL(nullptr, this->pProgram, "A", nullptr, &globalRet); EXPECT_EQ(CL_INVALID_DEVICE, ret); } TEST_F(clGetDeviceGlobalVariablePointer, GivenValidSymbolNameThenReturnProperAddressAndSize) { auto &symbols = pProgram->buildInfos[testedRootDeviceIndex].symbols; symbols["A"].gpuAddress = 7U; symbols["A"].symbol.size = 64U; symbols["A"].symbol.segment = NEO::SegmentType::GlobalVariables; void *globalRet = 0; size_t sizeRet = 0; auto ret = clGetDeviceGlobalVariablePointerINTEL(this->pContext->getDevice(0), this->pProgram, "A", &sizeRet, &globalRet); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_EQ(7U, reinterpret_cast(globalRet)); EXPECT_EQ(64U, sizeRet); } TEST_F(clGetDeviceGlobalVariablePointer, GivenFunctionSymbolNameThenReturnInvalidArgError) { auto &symbols = pProgram->buildInfos[testedRootDeviceIndex].symbols; symbols["A"].gpuAddress = 7U; symbols["A"].symbol.size = 64U; symbols["A"].symbol.segment = NEO::SegmentType::Instructions; void *globalRet = 0; auto ret = clGetDeviceGlobalVariablePointerINTEL(this->pContext->getDevice(0), this->pProgram, "A", nullptr, &globalRet); EXPECT_EQ(CL_INVALID_ARG_VALUE, ret); } TEST_F(clGetDeviceGlobalVariablePointer, GivenUnknownSymbolNameThenReturnInvalidArgError) { void *globalRet = 0; auto ret = clGetDeviceGlobalVariablePointerINTEL(this->pContext->getDevice(0), this->pProgram, "A", nullptr, &globalRet); EXPECT_EQ(CL_INVALID_ARG_VALUE, ret); } TEST_F(clGetDeviceFunctionPointer, GivenNullMandatoryArgumentsThenReturnInvalidArgError) { auto &symbols = pProgram->buildInfos[testedRootDeviceIndex].symbols; symbols["A"].gpuAddress = 7U; symbols["A"].symbol.size = 64U; symbols["A"].symbol.segment = NEO::SegmentType::Instructions; cl_ulong fptrRet = 0; auto ret = clGetDeviceFunctionPointerINTEL(this->pContext->getDevice(0), this->pProgram, "A", &fptrRet); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_EQ(7U, fptrRet); ret = clGetDeviceFunctionPointerINTEL(this->pContext->getDevice(0), this->pProgram, "A", nullptr); EXPECT_EQ(CL_INVALID_ARG_VALUE, ret); ret = clGetDeviceFunctionPointerINTEL(this->pContext->getDevice(0), nullptr, "A", &fptrRet); EXPECT_EQ(CL_INVALID_PROGRAM, ret); ret = clGetDeviceFunctionPointerINTEL(nullptr, this->pProgram, "A", &fptrRet); EXPECT_EQ(CL_INVALID_DEVICE, ret); } TEST_F(clGetDeviceFunctionPointer, GivenValidSymbolNameThenReturnProperAddress) { auto &symbols = pProgram->buildInfos[testedRootDeviceIndex].symbols; symbols["A"].gpuAddress = 7U; symbols["A"].symbol.size = 64U; symbols["A"].symbol.segment = NEO::SegmentType::Instructions; cl_ulong fptrRet = 0; auto ret = clGetDeviceFunctionPointerINTEL(this->pContext->getDevice(0), this->pProgram, "A", &fptrRet); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_EQ(7U, fptrRet); } TEST_F(clGetDeviceFunctionPointer, GivenGlobalSymbolNameThenReturnInvalidArgError) { auto &symbols = pProgram->buildInfos[testedRootDeviceIndex].symbols; symbols["A"].gpuAddress = 7U; symbols["A"].symbol.size = 64U; symbols["A"].symbol.segment = NEO::SegmentType::GlobalVariables; symbols["B"].gpuAddress = 7U; symbols["B"].symbol.size = 64U; symbols["B"].symbol.segment = NEO::SegmentType::GlobalConstants; cl_ulong fptrRet = 0; auto ret = clGetDeviceFunctionPointerINTEL(this->pContext->getDevice(0), this->pProgram, "A", &fptrRet); EXPECT_EQ(CL_INVALID_ARG_VALUE, ret); ret = clGetDeviceFunctionPointerINTEL(this->pContext->getDevice(0), this->pProgram, "B", &fptrRet); EXPECT_EQ(CL_INVALID_ARG_VALUE, ret); } TEST_F(clGetDeviceFunctionPointer, GivenUnknownSymbolNameThenReturnInvalidArgError) { cl_ulong fptrRet = 0; auto ret = clGetDeviceFunctionPointerINTEL(this->pContext->getDevice(0), this->pProgram, "A", &fptrRet); EXPECT_EQ(CL_INVALID_ARG_VALUE, ret); } compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_get_context_info_tests.inl000066400000000000000000000045441422164147700303510ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "opencl/source/context/context.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clGetContextInfoTests; namespace ULT { TEST_F(clGetContextInfoTests, GivenContextNumDevicesParamWhenGettingContextInfoThenNumDevicesIsReturned) { cl_uint numDevices = 0; retVal = clGetContextInfo( pContext, CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint), &numDevices, nullptr); EXPECT_EQ(1u, numDevices); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clGetContextInfoTests, GivenContextWithSingleDeviceAndContextDevicesParamWhenGettingContextInfoThenListOfDevicesContainsOneDevice) { retVal = clGetContextInfo( pContext, CL_CONTEXT_DEVICES, 0, nullptr, &retSize); EXPECT_EQ(1 * sizeof(cl_device_id), retSize); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clGetContextInfoTests, GivenContextWithMultipleDevicesAndContextDevicesParamWhenGettingContextInfoThenListOfDevicesContainsAllDevices) { cl_uint numDevices = 2u; auto inputDevices = std::make_unique(numDevices); auto outputDevices = std::make_unique(numDevices); for (auto i = 0u; i < numDevices; i++) { inputDevices[i] = testedClDevice; } auto context = clCreateContext( nullptr, numDevices, inputDevices.get(), nullptr, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, context); retVal = clGetContextInfo( context, CL_CONTEXT_DEVICES, numDevices * sizeof(cl_device_id), outputDevices.get(), nullptr); ASSERT_EQ(CL_SUCCESS, retVal); for (size_t deviceOrdinal = 0; deviceOrdinal < numDevices; ++deviceOrdinal) { EXPECT_EQ(inputDevices[deviceOrdinal], outputDevices[deviceOrdinal]); } clReleaseContext(context); } TEST(clGetContextInfo, GivenNullContextWhenGettingContextInfoThenInvalidContextErrorIsReturned) { cl_device_id pDevices[1]; cl_uint numDevices = 1; auto retVal = clGetContextInfo( nullptr, CL_CONTEXT_DEVICES, numDevices * sizeof(cl_device_id), pDevices, nullptr); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_get_device_and_host_timer.inl000066400000000000000000000111301422164147700307330ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/mocks/mock_ostime.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "cl_api_tests.h" using namespace NEO; struct FailDeviceTime : public MockDeviceTime { bool getCpuGpuTime(TimeStampData *pGpuCpuTime, OSTime *) override { return false; } }; struct FailOSTime : public MockOSTime { public: FailOSTime() { this->deviceTime = std::make_unique(); } bool getCpuTime(uint64_t *timeStamp) override { return false; }; }; typedef api_tests clGetDeviceAndHostTimerTest; typedef api_tests clGetHostTimerTest; namespace ULT { TEST_F(clGetDeviceAndHostTimerTest, GivenNullDeviceWhenGettingDeviceAndHostTimerThenInvalidDeviceErrorIsReturned) { cl_ulong device_timestamp = 0; cl_ulong host_timestamp = 0; retVal = clGetDeviceAndHostTimer( nullptr, &device_timestamp, &host_timestamp); EXPECT_EQ(CL_INVALID_DEVICE, retVal); } TEST_F(clGetDeviceAndHostTimerTest, GivenNullHostTimerWhenGettingDeviceAndHostTimerThenInvalidValueErrorIsReturned) { cl_ulong device_timestamp = 0; retVal = clGetDeviceAndHostTimer( testedClDevice, &device_timestamp, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clGetDeviceAndHostTimerTest, GivenNullDevicesTimerWhenGettingDeviceAndHostTimerThenInvalidValueErrorIsReturned) { cl_ulong host_timestamp = 0; retVal = clGetDeviceAndHostTimer( testedClDevice, nullptr, &host_timestamp); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clGetDeviceAndHostTimerTest, GivenValidOSTimeWhenGettingDeviceAndHostTimerThenSuccessIsReturned) { cl_ulong device_timestamp = 0; cl_ulong host_timestamp = 0; cl_ulong zero_timestamp = 0; auto mDev = new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr)}; mDev->setOSTime(new MockOSTime()); retVal = clGetDeviceAndHostTimer( mDev, &device_timestamp, &host_timestamp); EXPECT_GT(device_timestamp, zero_timestamp); EXPECT_GT(host_timestamp, zero_timestamp); EXPECT_EQ(retVal, CL_SUCCESS); delete mDev; } TEST_F(clGetDeviceAndHostTimerTest, GivenInvalidOSTimeWhenGettingDeviceAndHostTimerThenOutOfResourcesErrorIsReturned) { cl_ulong device_timestamp = 0; cl_ulong host_timestamp = 0; cl_ulong zero_timestamp = 0; auto mDev = new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr)}; mDev->setOSTime(new FailOSTime()); retVal = clGetDeviceAndHostTimer( mDev, &device_timestamp, &host_timestamp); EXPECT_EQ(device_timestamp, zero_timestamp); EXPECT_EQ(host_timestamp, zero_timestamp); EXPECT_EQ(retVal, CL_OUT_OF_RESOURCES); delete mDev; } TEST_F(clGetHostTimerTest, GivenNullDeviceWhenGettingHostTimerThenInvalidDeviceErrorIsReturned) { cl_ulong host_timestamp = 0; retVal = clGetHostTimer( nullptr, &host_timestamp); EXPECT_EQ(CL_INVALID_DEVICE, retVal); } TEST_F(clGetHostTimerTest, GivenNullHostTimerWhenGettingHostTimerThenInvalidValueErrorIsReturned) { retVal = clGetHostTimer( testedClDevice, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clGetHostTimerTest, GivenCorrectParametersWhenGettingHostTimerThenSuccessIsReturned) { cl_ulong host_timestamp = 0; cl_ulong zero_timestamp = 0; retVal = clGetHostTimer( testedClDevice, &host_timestamp); EXPECT_GE(host_timestamp, zero_timestamp); EXPECT_EQ(retVal, CL_SUCCESS); } TEST_F(clGetHostTimerTest, GivenValidOSTimeWhenGettingHostTimerThenSuccessIsReturned) { cl_ulong host_timestamp = 0; cl_ulong zero_timestamp = 0; auto mDev = new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr)}; mDev->setOSTime(new MockOSTime()); retVal = clGetHostTimer( mDev, &host_timestamp); EXPECT_GE(host_timestamp, zero_timestamp); EXPECT_EQ(retVal, CL_SUCCESS); delete mDev; } TEST_F(clGetHostTimerTest, GivenInvalidOSTimeWhenGettingHostTimerThenOutOfResourcesErrorIsReturned) { cl_ulong host_timestamp = 0; cl_ulong zero_timestamp = 0; auto mDev = new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr)}; mDev->setOSTime(new FailOSTime()); retVal = clGetHostTimer( mDev, &host_timestamp); EXPECT_EQ(host_timestamp, zero_timestamp); EXPECT_EQ(retVal, CL_OUT_OF_RESOURCES); delete mDev; } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_get_device_ids_tests.inl000066400000000000000000000164101422164147700277430ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/device_factory.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/ult_hw_config.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" using namespace NEO; using clGetDeviceIDsTests = Test; namespace ULT { TEST_F(clGetDeviceIDsTests, GivenZeroNumEntriesWhenGettingDeviceIdsThenNumberOfDevicesIsGreaterThanZero) { cl_uint numDevices = 0; auto retVal = clGetDeviceIDs(pPlatform, CL_DEVICE_TYPE_GPU, 0, nullptr, &numDevices); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_GT(numDevices, (cl_uint)0); } TEST_F(clGetDeviceIDsTests, GivenNonNullDevicesWhenGettingDeviceIdsThenDeviceIdIsReturned) { cl_uint numEntries = 1; cl_device_id pDevices[1]; auto retVal = clGetDeviceIDs(pPlatform, CL_DEVICE_TYPE_GPU, numEntries, pDevices, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clGetDeviceIDsTests, GivenNullPlatformWhenGettingDeviceIdsThenDeviceIdIsReturned) { cl_uint numEntries = 1; cl_device_id pDevices[1]; auto retVal = clGetDeviceIDs(nullptr, CL_DEVICE_TYPE_GPU, numEntries, pDevices, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clGetDeviceIDsTests, GivenInvalidDeviceTypeWhenGettingDeviceIdsThenInvalidDeviceTypeErrorIsReturned) { cl_uint numEntries = 1; cl_device_id pDevices[1]; auto retVal = clGetDeviceIDs(pPlatform, 0x0f00, numEntries, pDevices, nullptr); EXPECT_EQ(CL_INVALID_DEVICE_TYPE, retVal); } TEST_F(clGetDeviceIDsTests, GivenZeroNumEntriesAndNonNullDevicesWhenGettingDeviceIdsThenInvalidValueErrorIsReturned) { cl_device_id pDevices[1]; auto retVal = clGetDeviceIDs(pPlatform, CL_DEVICE_TYPE_GPU, 0, pDevices, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clGetDeviceIDsTests, GivenInvalidPlatformWhenGettingDeviceIdsThenInvalidPlatformErrorIsReturned) { cl_uint numEntries = 1; cl_device_id pDevices[1]; uint32_t trash[6] = {0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef}; cl_platform_id p = reinterpret_cast(trash); auto retVal = clGetDeviceIDs(p, CL_DEVICE_TYPE_GPU, numEntries, pDevices, nullptr); EXPECT_EQ(CL_INVALID_PLATFORM, retVal); } TEST_F(clGetDeviceIDsTests, GivenDeviceTypeAllWhenGettingDeviceIdsThenDeviceIdIsReturned) { cl_uint numDevices = 0; cl_uint numEntries = 1; cl_device_id pDevices[1]; auto retVal = clGetDeviceIDs(pPlatform, CL_DEVICE_TYPE_ALL, numEntries, pDevices, &numDevices); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_GT(numDevices, (cl_uint)0); } TEST_F(clGetDeviceIDsTests, GivenDeviceTypeDefaultWhenGettingDeviceIdsThenDeviceIdIsReturned) { cl_uint numDevices = 0; cl_uint numEntries = 1; cl_device_id pDevices[1]; auto retVal = clGetDeviceIDs(pPlatform, CL_DEVICE_TYPE_DEFAULT, numEntries, pDevices, &numDevices); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_GT(numDevices, (cl_uint)0); } TEST_F(clGetDeviceIDsTests, GivenDeviceTypeCpuWhenGettingDeviceIdsThenDeviceNotFoundErrorIsReturned) { cl_uint numDevices = 0; auto retVal = clGetDeviceIDs(pPlatform, CL_DEVICE_TYPE_CPU, 0, nullptr, &numDevices); EXPECT_EQ(CL_DEVICE_NOT_FOUND, retVal); EXPECT_EQ(numDevices, (cl_uint)0); } TEST(clGetDeviceIDsTest, givenMultipleRootDevicesWhenGetDeviceIdsThenAllRootDevicesAreReturned) { platformsImpl->clear(); constexpr auto numRootDevices = 3u; VariableBackup backup(&ultHwConfig); ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false; DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleRootDevices.set(numRootDevices); cl_uint numDevices = 0; cl_uint numEntries = numRootDevices; cl_device_id devices[numRootDevices]; auto retVal = clGetDeviceIDs(nullptr, CL_DEVICE_TYPE_ALL, numEntries, devices, &numDevices); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(numEntries, numDevices); for (auto i = 0u; i < numRootDevices; i++) { EXPECT_EQ(devices[i], platform()->getClDevice(i)); } } TEST(clGetDeviceIDsTest, givenMultipleRootDevicesWhenGetDeviceIdsButNumEntriesIsLowerThanNumDevicesThenSubsetOfRootDevicesIsReturned) { platformsImpl->clear(); constexpr auto numRootDevices = 3u; VariableBackup backup(&ultHwConfig); ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false; DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleRootDevices.set(numRootDevices); cl_uint maxNumDevices; auto retVal = clGetDeviceIDs(nullptr, CL_DEVICE_TYPE_ALL, 0, nullptr, &maxNumDevices); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(numRootDevices, maxNumDevices); cl_uint numDevices = 0; cl_uint numEntries = numRootDevices - 1; cl_device_id devices[numRootDevices]; const auto dummyDevice = reinterpret_cast(0x1357); for (auto i = 0u; i < numRootDevices; i++) { devices[i] = dummyDevice; } retVal = clGetDeviceIDs(nullptr, CL_DEVICE_TYPE_ALL, numEntries, devices, &numDevices); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_LT(numDevices, maxNumDevices); EXPECT_EQ(numEntries, numDevices); for (auto i = 0u; i < numEntries; i++) { EXPECT_EQ(devices[i], platform()->getClDevice(i)); } EXPECT_EQ(devices[numEntries], dummyDevice); } TEST(clGetDeviceIDsTest, givenMultipleRootDevicesAndLimitedNumberOfReturnedDevicesWhenGetDeviceIdsThenLimitedNumberOfRootDevicesIsReturned) { platformsImpl->clear(); constexpr auto numRootDevices = 3u; VariableBackup backup(&ultHwConfig); ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false; DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleRootDevices.set(numRootDevices); DebugManager.flags.LimitAmountOfReturnedDevices.set(numRootDevices - 1); cl_uint numDevices = 0; cl_uint numEntries = numRootDevices; cl_device_id devices[numRootDevices]; const auto dummyDevice = reinterpret_cast(0x1357); for (auto i = 0u; i < numRootDevices; i++) { devices[i] = dummyDevice; } auto retVal = clGetDeviceIDs(nullptr, CL_DEVICE_TYPE_ALL, numEntries, devices, &numDevices); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(numEntries - 1, numDevices); for (auto i = 0u; i < numDevices; i++) { EXPECT_EQ(devices[i], platform()->getClDevice(i)); } EXPECT_EQ(devices[numDevices], dummyDevice); } TEST(clGetDeviceIDsNegativeTests, whenFailToCreateDeviceThenclGetDeviceIDsReturnsNoDeviceError) { VariableBackup createFuncBackup{&DeviceFactory::createRootDeviceFunc}; DeviceFactory::createRootDeviceFunc = [](ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) -> std::unique_ptr { return nullptr; }; platformsImpl->clear(); constexpr auto numRootDevices = 3u; cl_uint numDevices = 0; cl_uint numEntries = numRootDevices; cl_device_id devices[numRootDevices]; auto retVal = clGetDeviceIDs(nullptr, CL_DEVICE_TYPE_ALL, numEntries, devices, &numDevices); EXPECT_EQ(CL_DEVICE_NOT_FOUND, retVal); EXPECT_EQ(numDevices, 0u); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_get_device_info_pvc_and_later_tests.cpp000066400000000000000000000025171422164147700330030ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/api/cl_api_tests.h" using namespace NEO; using clGetDeviceInfoPvcAndLater = api_tests; using matcherPvcAndLater = IsAtLeastXeHpcCore; namespace ULT { HWTEST2_F(clGetDeviceInfoPvcAndLater, givenClDeviceSupportedThreadArbitrationPolicyIntelWhenPvcAndLatereAndCallClGetDeviceInfoThenProperArrayIsReturned, matcherPvcAndLater) { cl_device_info paramName = 0; cl_uint paramValue[4]; size_t paramSize = sizeof(paramValue); size_t paramRetSize = 0; paramName = CL_DEVICE_SUPPORTED_THREAD_ARBITRATION_POLICY_INTEL; cl_uint expectedRetValue[] = {CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_OLDEST_FIRST_INTEL, CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_ROUND_ROBIN_INTEL, CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_AFTER_DEPENDENCY_ROUND_ROBIN_INTEL, CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_STALL_BASED_ROUND_ROBIN_INTEL}; retVal = clGetDeviceInfo( testedClDevice, paramName, paramSize, paramValue, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(expectedRetValue), paramRetSize); EXPECT_TRUE(memcmp(expectedRetValue, paramValue, sizeof(expectedRetValue)) == 0); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_get_device_info_tests.inl000066400000000000000000000323341422164147700301220ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/source/helpers/hw_info.h" #include "opencl/source/helpers/cl_hw_helper.h" #include "cl_api_tests.h" #include using namespace NEO; using clGetDeviceInfoTests = api_tests; namespace ULT { static_assert(CL_DEVICE_IL_VERSION == CL_DEVICE_IL_VERSION_KHR, "Param values are different"); TEST_F(clGetDeviceInfoTests, givenNeoDeviceWhenAskedForSliceCountThenNumberOfSlicesIsReturned) { cl_device_info paramName = 0; size_t paramSize = 0; void *paramValue = nullptr; size_t paramRetSize = 0; size_t numSlices = 0; paramName = CL_DEVICE_SLICE_COUNT_INTEL; retVal = clGetDeviceInfo( testedClDevice, paramName, 0, nullptr, ¶mRetSize); EXPECT_EQ(sizeof(size_t), paramRetSize); paramSize = paramRetSize; paramValue = &numSlices; retVal = clGetDeviceInfo( testedClDevice, paramName, paramSize, paramValue, ¶mRetSize); EXPECT_EQ(defaultHwInfo->gtSystemInfo.SliceCount, numSlices); } TEST_F(clGetDeviceInfoTests, GivenGpuDeviceWhenGettingDeviceInfoThenDeviceTypeGpuIsReturned) { cl_device_info paramName = 0; size_t paramSize = 0; void *paramValue = nullptr; size_t paramRetSize = 0; cl_device_type deviceType = CL_DEVICE_TYPE_CPU; // set to wrong value paramName = CL_DEVICE_TYPE; paramSize = sizeof(cl_device_type); paramValue = &deviceType; retVal = clGetDeviceInfo( testedClDevice, paramName, paramSize, paramValue, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(static_cast(CL_DEVICE_TYPE_GPU), deviceType); } TEST_F(clGetDeviceInfoTests, GivenNullDeviceWhenGettingDeviceInfoThenInvalidDeviceErrorIsReturned) { size_t paramRetSize = 0; retVal = clGetDeviceInfo( nullptr, CL_DEVICE_TYPE, 0, nullptr, ¶mRetSize); EXPECT_EQ(CL_INVALID_DEVICE, retVal); } TEST_F(clGetDeviceInfoTests, givenOpenCLDeviceWhenAskedForSupportedSvmTypeThenCorrectValueIsReturned) { cl_device_svm_capabilities svmCaps; retVal = clGetDeviceInfo( testedClDevice, CL_DEVICE_SVM_CAPABILITIES, sizeof(cl_device_svm_capabilities), &svmCaps, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); const HardwareInfo &hwInfo = pDevice->getHardwareInfo(); cl_device_svm_capabilities expectedCaps = 0; if (hwInfo.capabilityTable.ftrSvm != 0) { if (hwInfo.capabilityTable.ftrSupportsCoherency != 0) { expectedCaps = CL_DEVICE_SVM_COARSE_GRAIN_BUFFER | CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_ATOMICS; } else { expectedCaps = CL_DEVICE_SVM_COARSE_GRAIN_BUFFER; } } EXPECT_EQ(svmCaps, expectedCaps); } TEST(clGetDeviceGlobalMemSizeTests, givenDebugFlagForGlobalMemSizePercentWhenAskedForGlobalMemSizeThenAdjustedGlobalMemSizeIsReturned) { DebugManagerStateRestore restorer; DebugManager.flags.ClDeviceGlobalMemSizeAvailablePercent.set(100u); uint64_t globalMemSize100percent = 0u; auto hwInfo = *defaultHwInfo; auto pDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); auto retVal = clGetDeviceInfo( pDevice.get(), CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(uint64_t), &globalMemSize100percent, nullptr); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_NE(globalMemSize100percent, 0u); DebugManager.flags.ClDeviceGlobalMemSizeAvailablePercent.set(50u); uint64_t globalMemSize50percent = 0u; hwInfo = *defaultHwInfo; pDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); retVal = clGetDeviceInfo( pDevice.get(), CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(uint64_t), &globalMemSize50percent, nullptr); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_NE(globalMemSize50percent, 0u); EXPECT_EQ(globalMemSize100percent / 2u, globalMemSize50percent); } TEST(clGetDeviceFineGrainedTests, givenDebugFlagForFineGrainedOverrideWhenItIsUsedWithZeroThenNoFineGrainSupport) { DebugManagerStateRestore restorer; DebugManager.flags.ForceFineGrainedSVMSupport.set(0); cl_device_svm_capabilities svmCaps; auto hwInfo = *defaultHwInfo; auto pDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); auto retVal = clGetDeviceInfo( pDevice.get(), CL_DEVICE_SVM_CAPABILITIES, sizeof(cl_device_svm_capabilities), &svmCaps, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); cl_device_svm_capabilities expectedCaps = 0; if (hwInfo.capabilityTable.ftrSvm != 0) { expectedCaps = CL_DEVICE_SVM_COARSE_GRAIN_BUFFER; } EXPECT_EQ(svmCaps, expectedCaps); } TEST(clGetDeviceFineGrainedTests, givenDebugFlagForFineGrainedOverrideWhenItIsUsedWithOneThenThereIsFineGrainSupport) { DebugManagerStateRestore restorer; DebugManager.flags.ForceFineGrainedSVMSupport.set(1); cl_device_svm_capabilities svmCaps; auto hwInfo = *defaultHwInfo; auto pDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); auto retVal = clGetDeviceInfo( pDevice.get(), CL_DEVICE_SVM_CAPABILITIES, sizeof(cl_device_svm_capabilities), &svmCaps, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); cl_device_svm_capabilities expectedCaps = 0; if (hwInfo.capabilityTable.ftrSvm != 0) { expectedCaps = CL_DEVICE_SVM_COARSE_GRAIN_BUFFER | CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_ATOMICS; } EXPECT_EQ(svmCaps, expectedCaps); } TEST_F(clGetDeviceInfoTests, givenNeoDeviceWhenAskedForDriverVersionThenNeoIsReturned) { cl_device_info paramName = 0; size_t paramSize = 0; void *paramValue = nullptr; size_t paramRetSize = 0; cl_uint driverVersion = 0; paramName = CL_DEVICE_DRIVER_VERSION_INTEL; retVal = clGetDeviceInfo( testedClDevice, paramName, 0, nullptr, ¶mRetSize); EXPECT_EQ(sizeof(cl_uint), paramRetSize); paramSize = paramRetSize; paramValue = &driverVersion; retVal = clGetDeviceInfo( testedClDevice, paramName, paramSize, paramValue, ¶mRetSize); EXPECT_EQ((cl_uint)CL_DEVICE_DRIVER_VERSION_INTEL_NEO1, driverVersion); } TEST_F(clGetDeviceInfoTests, GivenClDeviceExtensionsParamWhenGettingDeviceInfoThenAllExtensionsAreListed) { size_t paramRetSize = 0; cl_int retVal = clGetDeviceInfo( testedClDevice, CL_DEVICE_EXTENSIONS, 0, nullptr, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(0u, paramRetSize); auto paramValue = std::make_unique(paramRetSize); retVal = clGetDeviceInfo( testedClDevice, CL_DEVICE_EXTENSIONS, paramRetSize, paramValue.get(), nullptr); EXPECT_EQ(CL_SUCCESS, retVal); std::string extensionString(paramValue.get()); static const char *const supportedExtensions[] = { "cl_khr_byte_addressable_store ", "cl_khr_fp16 ", "cl_khr_global_int32_base_atomics ", "cl_khr_global_int32_extended_atomics ", "cl_khr_icd ", "cl_khr_local_int32_base_atomics ", "cl_khr_local_int32_extended_atomics ", "cl_intel_command_queue_families", "cl_intel_subgroups ", "cl_intel_required_subgroup_size ", "cl_intel_subgroups_short ", "cl_khr_spir ", "cl_intel_accelerator ", "cl_intel_driver_diagnostics ", "cl_khr_priority_hints ", "cl_khr_throttle_hints ", "cl_khr_create_command_queue ", "cl_intel_subgroups_char ", "cl_intel_subgroups_long ", "cl_khr_il_program ", "cl_khr_subgroup_extended_types ", "cl_khr_subgroup_non_uniform_vote ", "cl_khr_subgroup_ballot ", "cl_khr_subgroup_non_uniform_arithmetic ", "cl_khr_subgroup_shuffle ", "cl_khr_subgroup_shuffle_relative ", "cl_khr_subgroup_clustered_reduce " "cl_intel_device_attribute_query " "cl_khr_suggested_local_work_size "}; for (auto extension : supportedExtensions) { auto foundOffset = extensionString.find(extension); EXPECT_TRUE(foundOffset != std::string::npos); } } TEST_F(clGetDeviceInfoTests, GivenClDeviceIlVersionParamWhenGettingDeviceInfoThenSpirv12IsReturned) { size_t paramRetSize = 0; cl_int retVal = clGetDeviceInfo( testedClDevice, CL_DEVICE_IL_VERSION, 0, nullptr, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(0u, paramRetSize); auto paramValue = std::make_unique(paramRetSize); retVal = clGetDeviceInfo( testedClDevice, CL_DEVICE_IL_VERSION, paramRetSize, paramValue.get(), nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_STREQ("SPIR-V_1.2 ", paramValue.get()); } using matcherAtMostGen12lp = IsAtMostGfxCore; HWTEST2_F(clGetDeviceInfoTests, givenClDeviceSupportedThreadArbitrationPolicyIntelWhenCallClGetDeviceInfoThenProperArrayIsReturned, matcherAtMostGen12lp) { cl_device_info paramName = 0; cl_uint paramValue[3]; size_t paramSize = sizeof(paramValue); size_t paramRetSize = 0; paramName = CL_DEVICE_SUPPORTED_THREAD_ARBITRATION_POLICY_INTEL; cl_uint expectedRetValue[] = {CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_OLDEST_FIRST_INTEL, CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_ROUND_ROBIN_INTEL, CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_AFTER_DEPENDENCY_ROUND_ROBIN_INTEL}; retVal = clGetDeviceInfo( testedClDevice, paramName, paramSize, paramValue, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(expectedRetValue), paramRetSize); EXPECT_TRUE(memcmp(expectedRetValue, paramValue, sizeof(expectedRetValue)) == 0); } HWTEST_F(clGetDeviceInfoTests, givenClDeviceSupportedThreadArbitrationPolicyIntelWhenThreadArbitrationPolicyChangeNotSupportedAndCallClGetDeviceInfoThenParamRetSizeIsZero) { auto &hwHelper = NEO::ClHwHelper::get(defaultHwInfo->platform.eRenderCoreFamily); if (hwHelper.isSupportedKernelThreadArbitrationPolicy()) { GTEST_SKIP(); } cl_device_info paramName = 0; cl_uint paramValue[3]; size_t paramSize = sizeof(paramValue); size_t paramRetSize = 0; paramName = CL_DEVICE_SUPPORTED_THREAD_ARBITRATION_POLICY_INTEL; retVal = clGetDeviceInfo( testedClDevice, paramName, paramSize, paramValue, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, paramRetSize); } //------------------------------------------------------------------------------ struct GetDeviceInfoP : public ApiFixture<>, public ::testing::TestWithParam { void SetUp() override { param = GetParam(); ApiFixture::SetUp(); } void TearDown() override { ApiFixture::TearDown(); } cl_device_info param; }; typedef GetDeviceInfoP GetDeviceInfoStr; TEST_P(GetDeviceInfoStr, GivenStringTypeParamWhenGettingDeviceInfoThenSuccessIsReturned) { size_t paramRetSize = 0; cl_int retVal = clGetDeviceInfo( testedClDevice, param, 0, nullptr, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(0u, paramRetSize); auto paramValue = std::make_unique(paramRetSize); retVal = clGetDeviceInfo( testedClDevice, param, paramRetSize, paramValue.get(), nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } static cl_device_info deviceInfoStrParams[] = { CL_DEVICE_BUILT_IN_KERNELS, CL_DEVICE_LATEST_CONFORMANCE_VERSION_PASSED, CL_DEVICE_NAME, CL_DEVICE_OPENCL_C_VERSION, CL_DEVICE_PROFILE, CL_DEVICE_VENDOR, CL_DEVICE_VERSION, CL_DRIVER_VERSION}; INSTANTIATE_TEST_CASE_P( api, GetDeviceInfoStr, testing::ValuesIn(deviceInfoStrParams)); typedef GetDeviceInfoP GetDeviceInfoVectorWidth; TEST_P(GetDeviceInfoVectorWidth, GivenParamTypeVectorWhenGettingDeviceInfoThenSizeIsGreaterThanZeroAndValueIsGreaterThanZero) { cl_uint paramValue = 0; size_t paramRetSize = 0; auto retVal = clGetDeviceInfo( testedClDevice, param, 0, nullptr, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_uint), paramRetSize); retVal = clGetDeviceInfo( testedClDevice, param, paramRetSize, ¶mValue, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_GT(paramValue, 0u); } cl_device_info devicePreferredVector[] = { CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT}; INSTANTIATE_TEST_CASE_P( api, GetDeviceInfoVectorWidth, testing::ValuesIn(devicePreferredVector)); } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_get_event_profiling_info_tests.inl000066400000000000000000000312551422164147700320560ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/extensions/public/cl_ext_private.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/event/event.h" #include "opencl/source/event/user_event.h" #include "opencl/test/unit_test/api/cl_api_tests.h" #include "opencl/test/unit_test/fixtures/device_instrumentation_fixture.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "opencl/test/unit_test/os_interface/mock_performance_counters.h" using namespace NEO; template class EventFixture : public ApiFixture<>, public T { public: void SetUp() override { ApiFixture::SetUp(); } void TearDown() override { ApiFixture::TearDown(); } }; typedef EventFixture<::testing::Test> clEventProfilingTests; cl_int ProfilingInfo[] = { CL_PROFILING_COMMAND_QUEUED, CL_PROFILING_COMMAND_SUBMIT, CL_PROFILING_COMMAND_START, CL_PROFILING_COMMAND_END, CL_PROFILING_COMMAND_COMPLETE}; TEST_F(clEventProfilingTests, GivenInvalidParamNameWhenGettingEventProfilingInfoThenInvalidValueErrorIsReturned) { Event *pEvent = new Event(pCommandQueue, 0, 0, 0); pEvent->setStatus(CL_COMPLETE); size_t param_value_size = sizeof(cl_ulong); cl_ulong param_value; size_t param_value_size_ret; cl_int retVal = CL_PROFILING_INFO_NOT_AVAILABLE; cl_event event = (cl_event)pEvent; pEvent->setProfilingEnabled(true); retVal = clGetEventProfilingInfo(event, 0, param_value_size, ¶m_value, ¶m_value_size_ret); EXPECT_EQ(CL_INVALID_VALUE, retVal); delete pEvent; } TEST_F(clEventProfilingTests, GivenInvalidParametersWhenGettingEventProfilingInfoThenValueSizeRetIsNotUpdated) { Event event{pCommandQueue, 0, 0, 0}; event.setStatus(CL_COMPLETE); size_t paramValueSize = sizeof(cl_ulong); cl_ulong paramValue; size_t paramValueSizeRet = 0x1234; cl_int retVal = CL_PROFILING_INFO_NOT_AVAILABLE; event.setProfilingEnabled(true); retVal = clGetEventProfilingInfo(&event, 0, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(0x1234u, paramValueSizeRet); } TEST_F(clEventProfilingTests, GivenInvalidParamValueSizeWhenGettingEventProfilingInfoThenInvalidValueErrorIsReturned) { Event *pEvent = new Event(pCommandQueue, 0, 0, 0); pEvent->setStatus(CL_COMPLETE); size_t param_value_size = sizeof(cl_ulong); cl_ulong param_value; size_t param_value_size_ret; cl_int retVal = CL_PROFILING_INFO_NOT_AVAILABLE; cl_event event = (cl_event)pEvent; pEvent->setProfilingEnabled(true); retVal = clGetEventProfilingInfo(event, ProfilingInfo[0], param_value_size - 1, ¶m_value, ¶m_value_size_ret); EXPECT_EQ(CL_INVALID_VALUE, retVal); delete pEvent; } TEST_F(clEventProfilingTests, GivenValidParametersWhenGettingEventProfilingInfoThenSuccessIsReturned) { Event *pEvent = new Event(pCommandQueue, 0, 0, 0); pEvent->setStatus(CL_COMPLETE); size_t param_value_size = sizeof(cl_ulong); cl_ulong param_value; size_t param_value_size_ret; cl_event event = (cl_event)pEvent; pEvent->setProfilingEnabled(true); for (auto infoId : ::ProfilingInfo) { cl_int retVal = clGetEventProfilingInfo(event, infoId, param_value_size, ¶m_value, ¶m_value_size_ret); EXPECT_EQ(CL_SUCCESS, retVal); } delete pEvent; } TEST_F(clEventProfilingTests, GivenNullParamValueSizeRetWhenGettingEventProfilingInfoThenSuccessIsReturned) { Event *pEvent = new Event(pCommandQueue, 0, 0, 0); pEvent->setStatus(CL_COMPLETE); size_t param_value_size = sizeof(cl_ulong); cl_ulong param_value; cl_event event = (cl_event)pEvent; pEvent->setProfilingEnabled(true); cl_int retVal = clGetEventProfilingInfo(event, ProfilingInfo[0], param_value_size, ¶m_value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); delete pEvent; } TEST_F(clEventProfilingTests, GivenNullEventWhenGettingEventProfilingInfoThenInvalidEventErrorIsReturned) { auto retVal = clGetEventProfilingInfo(nullptr, CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), 0u, nullptr); EXPECT_EQ(CL_INVALID_EVENT, retVal); } TEST_F(clEventProfilingTests, GivenNullParamValueAndZeroParamValueSizeWhenGettingEventProfilingInfoThenSuccessIsReturned) { Event *pEvent = new Event(pCommandQueue, 0, 0, 0); size_t param_value_size = 0; pEvent->setStatus(CL_COMPLETE); pEvent->setProfilingEnabled(true); cl_event event = (cl_event)pEvent; cl_int retVal = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_QUEUED, param_value_size, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); delete pEvent; } TEST_F(clEventProfilingTests, GivenNullParamValueAndCorrectParamValueSizeWhenGettingEventProfilingInfoThenSuccessIsReturned) { Event *pEvent = new Event(pCommandQueue, 0, 0, 0); size_t param_value_size = sizeof(cl_ulong); pEvent->setStatus(CL_COMPLETE); pEvent->setProfilingEnabled(true); cl_event event = (cl_event)pEvent; cl_int retVal = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_QUEUED, param_value_size, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); delete pEvent; } TEST_F(clEventProfilingTests, GivenUserEventWhenGettingEventProfilingInfoThenProfilingInfoNotAvailableErrorIsReturned) { UserEvent *ue = new UserEvent(); size_t param_value_size = sizeof(cl_ulong); cl_ulong param_value; size_t param_value_size_ret; cl_event event = (cl_event)ue; for (auto infoId : ::ProfilingInfo) { cl_int retVal = clGetEventProfilingInfo(event, infoId, param_value_size, ¶m_value, ¶m_value_size_ret); EXPECT_EQ(CL_PROFILING_INFO_NOT_AVAILABLE, retVal); } delete ue; } TEST_F(clEventProfilingTests, GivenStartAndEndTimeWhenGettingDeltaThenCorrectDeltaIsReturned) { Event *pEvent = new Event(pCommandQueue, 0, 0, 0); cl_ulong startTime = 1; cl_ulong endTime = 2; cl_ulong delta = 0; delta = pEvent->getDelta(startTime, endTime); EXPECT_EQ(endTime - startTime, delta); delete pEvent; } TEST_F(clEventProfilingTests, GivenStartTimeGreaterThenEndTimeWhenGettingDeltaThenCorrectDeltaIsReturned) { Event *pEvent = new Event(pCommandQueue, 0, 0, 0); cl_ulong startTime = 2; cl_ulong endTime = 1; cl_ulong delta = 0; cl_ulong timeMax = maxNBitValue(pDevice->getHardwareInfo().capabilityTable.kernelTimestampValidBits); delta = pEvent->getDelta(startTime, endTime); EXPECT_EQ((timeMax + (endTime - startTime)), delta); delete pEvent; } TEST_F(clEventProfilingTests, givenTimestampThatOverlapWhenGetDeltaIsCalledThenProperDeltaIsComputed) { Event *pEvent = new Event(pCommandQueue, 0, 0, 0); cl_ulong TimeMax = maxNBitValue(pDevice->getHardwareInfo().capabilityTable.kernelTimestampValidBits); cl_ulong realDelta = 10; cl_ulong startTime = TimeMax - realDelta; cl_ulong endTime = 2; cl_ulong Delta = 0; Delta = pEvent->getDelta(startTime, endTime); EXPECT_EQ(realDelta + endTime, Delta); delete pEvent; } TEST_F(clEventProfilingTests, GivenProfilingDisabledWhenCalculatingProfilingDataThenFalseIsReturned) { auto *pEvent = new MockEvent(nullptr, 0, 0, 0); EXPECT_FALSE(pEvent->calcProfilingData()); delete pEvent; } TEST_F(clEventProfilingTests, GivenProfilingEnabledWhenCalculatingProfilingDataThenFalseIsNotReturned) { Event *pEvent = new Event(pCommandQueue, 0, 0, 0); cl_bool Result = pEvent->isProfilingEnabled(); EXPECT_EQ(((cl_bool)CL_FALSE), Result); pEvent->setProfilingEnabled(true); Result = pEvent->isProfilingEnabled(); EXPECT_NE(((cl_bool)CL_FALSE), Result); delete pEvent; } TEST_F(clEventProfilingTests, GivenProfilingEnabledAndUserEventsWhenCalculatingProfilingDataThenFalseIsReturned) { Event *pEvent = new UserEvent(); cl_bool Result = pEvent->isProfilingEnabled(); EXPECT_EQ(((cl_bool)CL_FALSE), Result); delete pEvent; } TEST_F(clEventProfilingTests, GivenPerfCountersEnabledWhenCheckingPerfCountersThenTrueIsReturned) { Event *pEvent = new Event(pCommandQueue, 0, 0, 0); bool Result = pEvent->isPerfCountersEnabled(); EXPECT_FALSE(Result); pEvent->setPerfCountersEnabled(true); Result = pEvent->isPerfCountersEnabled(); EXPECT_TRUE(Result); delete pEvent; } class clEventProfilingWithPerfCountersTests : public DeviceInstrumentationFixture, public PerformanceCountersDeviceFixture, public ::testing::Test { public: void SetUp() override { PerformanceCountersDeviceFixture::SetUp(); DeviceInstrumentationFixture::SetUp(true); cl_device_id deviceId = device.get(); cl_int retVal = CL_SUCCESS; context = std::unique_ptr(Context::create(nullptr, ClDeviceVector(&deviceId, 1), nullptr, nullptr, retVal)); commandQueue = std::make_unique(context.get(), device.get(), nullptr, false); event = std::make_unique(commandQueue.get(), 0, 0, 0); event->setStatus(CL_COMPLETE); event->setProfilingEnabled(true); commandQueue->getPerfCounters()->getApiReport(event->getHwPerfCounterNode(), 0, nullptr, ¶m_value_size, true); eventCl = static_cast(event.get()); } void TearDown() override { PerformanceCountersDeviceFixture::TearDown(); } std::unique_ptr context; std::unique_ptr commandQueue; std::unique_ptr event; size_t param_value_size = 0; cl_event eventCl = nullptr; cl_ulong param_value = 0; size_t param_value_size_ret = 0; }; TEST_F(clEventProfilingWithPerfCountersTests, GivenDisabledPerfCountersWhenGettingEventProfilingInfoThenInvalidValueErrorIsReturned) { event->setPerfCountersEnabled(false); cl_int retVal = clGetEventProfilingInfo(eventCl, CL_PROFILING_COMMAND_PERFCOUNTERS_INTEL, param_value_size, ¶m_value, ¶m_value_size_ret); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clEventProfilingWithPerfCountersTests, GivenEnabledPerfCountersWhenGettingEventProfilingInfoThenSuccessIsReturned) { event->setPerfCountersEnabled(true); cl_int retVal = clGetEventProfilingInfo(eventCl, CL_PROFILING_COMMAND_PERFCOUNTERS_INTEL, param_value_size, ¶m_value, ¶m_value_size_ret); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEventProfilingWithPerfCountersTests, GivenEnabledPerfCountersAndIncorrectParamValueSizeWhenGettingEventProfilingInfoThenProfilingInfoNotAvailableErrorIsReturned) { event->setPerfCountersEnabled(true); cl_int retVal = clGetEventProfilingInfo(eventCl, CL_PROFILING_COMMAND_PERFCOUNTERS_INTEL, param_value_size - 1, ¶m_value, ¶m_value_size_ret); EXPECT_EQ(CL_PROFILING_INFO_NOT_AVAILABLE, retVal); } cl_get_extension_function_address_for_platform_tests.inl000066400000000000000000000131001422164147700357570ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/api/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "cl_api_tests.h" using namespace NEO; using clGetExtensionFunctionAddressForPlatformTests = Test; namespace ULT { TEST_F(clGetExtensionFunctionAddressForPlatformTests, GivenNullPlatformWhenGettingExtensionFunctionThenNullIsReturned) { auto retVal = clGetExtensionFunctionAddressForPlatform(nullptr, "clCreateAcceleratorINTEL"); EXPECT_EQ(retVal, nullptr); } TEST_F(clGetExtensionFunctionAddressForPlatformTests, GivenNonExistentExtensionWhenGettingExtensionFunctionThenNullIsReturned) { auto retVal = clGetExtensionFunctionAddressForPlatform(pPlatform, "__some__function__"); EXPECT_EQ(retVal, nullptr); } TEST_F(clGetExtensionFunctionAddressForPlatformTests, GivenClCreateAcceleratorINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddressForPlatform(pPlatform, "clCreateAcceleratorINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clCreateAcceleratorINTEL)); } TEST_F(clGetExtensionFunctionAddressForPlatformTests, GivenClGetAcceleratorInfoINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddressForPlatform(pPlatform, "clGetAcceleratorInfoINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clGetAcceleratorInfoINTEL)); } TEST_F(clGetExtensionFunctionAddressForPlatformTests, GivenClRetainAcceleratorINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddressForPlatform(pPlatform, "clRetainAcceleratorINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clRetainAcceleratorINTEL)); } TEST_F(clGetExtensionFunctionAddressForPlatformTests, GivenClReleaseAcceleratorINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddressForPlatform(pPlatform, "clReleaseAcceleratorINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clReleaseAcceleratorINTEL)); } TEST_F(clGetExtensionFunctionAddressForPlatformTests, GivenClCreateProgramWithILKHRWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddressForPlatform(pPlatform, "clCreateProgramWithILKHR"); EXPECT_EQ(retVal, reinterpret_cast(clCreateProgramWithILKHR)); } TEST_F(clGetExtensionFunctionAddressForPlatformTests, GivenClCreateTracingHandleINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddressForPlatform(pPlatform, "clCreateTracingHandleINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clCreateTracingHandleINTEL)); } TEST_F(clGetExtensionFunctionAddressForPlatformTests, GivenClSetTracingPointINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddressForPlatform(pPlatform, "clSetTracingPointINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clSetTracingPointINTEL)); } TEST_F(clGetExtensionFunctionAddressForPlatformTests, GivenClDestroyTracingHandleINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddressForPlatform(pPlatform, "clDestroyTracingHandleINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clDestroyTracingHandleINTEL)); } TEST_F(clGetExtensionFunctionAddressForPlatformTests, GivenClEnableTracingINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddressForPlatform(pPlatform, "clEnableTracingINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clEnableTracingINTEL)); } TEST_F(clGetExtensionFunctionAddressForPlatformTests, GivenClDisableTracingINTELLWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddressForPlatform(pPlatform, "clDisableTracingINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clDisableTracingINTEL)); } TEST_F(clGetExtensionFunctionAddressForPlatformTests, GivenClGetTracingStateINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddressForPlatform(pPlatform, "clGetTracingStateINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clGetTracingStateINTEL)); } TEST_F(clGetExtensionFunctionAddressForPlatformTests, GivenClGetKernelSuggestedLocalWorkSizeINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddressForPlatform(pPlatform, "clGetKernelSuggestedLocalWorkSizeINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clGetKernelSuggestedLocalWorkSizeINTEL)); } TEST_F(clGetExtensionFunctionAddressForPlatformTests, GivenClGetKernelSuggestedLocalWorkSizeKHRWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddressForPlatform(pPlatform, "clGetKernelSuggestedLocalWorkSizeKHR"); EXPECT_EQ(retVal, reinterpret_cast(clGetKernelSuggestedLocalWorkSizeKHR)); } TEST_F(clGetExtensionFunctionAddressForPlatformTests, GivenClGetKernelMaxConcurrentWorkGroupCountINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddressForPlatform(pPlatform, "clGetKernelMaxConcurrentWorkGroupCountINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clGetKernelMaxConcurrentWorkGroupCountINTEL)); } TEST_F(clGetExtensionFunctionAddressForPlatformTests, GivenClEnqueueNDCountKernelINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddressForPlatform(pPlatform, "clEnqueueNDCountKernelINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clEnqueueNDCountKernelINTEL)); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_get_extension_function_address_tests.inl000066400000000000000000000250311422164147700332720ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "cl_api_tests.h" using namespace NEO; typedef api_tests clGetExtensionFunctionAddressTests; namespace ULT { TEST_F(clGetExtensionFunctionAddressTests, GivenNonExistentExtensionWhenGettingExtensionFunctionThenNullIsReturned) { auto retVal = clGetExtensionFunctionAddress("__some__function__"); EXPECT_EQ(nullptr, retVal); } TEST_F(clGetExtensionFunctionAddressTests, GivenClIcdGetPlatformIDsKHRWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clIcdGetPlatformIDsKHR"); EXPECT_EQ(retVal, reinterpret_cast(clIcdGetPlatformIDsKHR)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClCreateAcceleratorINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clCreateAcceleratorINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clCreateAcceleratorINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClGetAcceleratorInfoINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clGetAcceleratorInfoINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clGetAcceleratorInfoINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClRetainAcceleratorINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clRetainAcceleratorINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clRetainAcceleratorINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClReleaseAcceleratorINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clReleaseAcceleratorINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clReleaseAcceleratorINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClCreatePerfCountersCommandQueueINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clCreatePerfCountersCommandQueueINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clCreatePerfCountersCommandQueueINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClSetPerformanceConfigurationINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clSetPerformanceConfigurationINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clSetPerformanceConfigurationINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClCreateBufferWithPropertiesINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto functionPointer = clGetExtensionFunctionAddress("clCreateBufferWithPropertiesINTEL"); EXPECT_EQ(functionPointer, reinterpret_cast(clCreateBufferWithPropertiesINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClCreateImageWithPropertiesINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto functionPointer = clGetExtensionFunctionAddress("clCreateImageWithPropertiesINTEL"); EXPECT_EQ(functionPointer, reinterpret_cast(clCreateImageWithPropertiesINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, givenClAddCommentToAubIntelAsInputWhenFunctionIsCalledThenProperPointerIsReturned) { auto functionPointer = clGetExtensionFunctionAddress("clAddCommentINTEL"); EXPECT_EQ(functionPointer, reinterpret_cast(clAddCommentINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClCreateTracingHandleINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clCreateTracingHandleINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clCreateTracingHandleINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClSetTracingPointINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clSetTracingPointINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clSetTracingPointINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClDestroyTracingHandleINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clDestroyTracingHandleINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clDestroyTracingHandleINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClEnableTracingINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clEnableTracingINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clEnableTracingINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClDisableTracingINTELLWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clDisableTracingINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clDisableTracingINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClGetTracingStateINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clGetTracingStateINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clGetTracingStateINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClHostMemAllocINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clHostMemAllocINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clHostMemAllocINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClDeviceMemAllocINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clDeviceMemAllocINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clDeviceMemAllocINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClSharedMemAllocINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clSharedMemAllocINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clSharedMemAllocINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClMemFreeINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clMemFreeINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clMemFreeINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClMemBlockingFreeINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clMemBlockingFreeINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clMemBlockingFreeINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClGetMemAllocInfoINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clGetMemAllocInfoINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clGetMemAllocInfoINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClSetKernelArgMemPointerINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clSetKernelArgMemPointerINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clSetKernelArgMemPointerINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClEnqueueMemsetINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clEnqueueMemsetINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clEnqueueMemsetINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClEnqueueMemFillINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clEnqueueMemFillINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clEnqueueMemFillINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClEnqueueMemcpyINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clEnqueueMemcpyINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clEnqueueMemcpyINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClEnqueueMigrateMemINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clEnqueueMigrateMemINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clEnqueueMigrateMemINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClEnqueueMemAdviseINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clEnqueueMemAdviseINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clEnqueueMemAdviseINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClGetDeviceGlobalVariablePointerINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clGetDeviceGlobalVariablePointerINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clGetDeviceGlobalVariablePointerINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClGetDeviceFunctionPointerINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clGetDeviceFunctionPointerINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clGetDeviceFunctionPointerINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClGetKernelSuggestedLocalWorkSizeINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clGetKernelSuggestedLocalWorkSizeINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clGetKernelSuggestedLocalWorkSizeINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClGetKernelSuggestedLocalWorkSizeKHRWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clGetKernelSuggestedLocalWorkSizeKHR"); EXPECT_EQ(retVal, reinterpret_cast(clGetKernelSuggestedLocalWorkSizeKHR)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClGetKernelMaxConcurrentWorkGroupCountINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clGetKernelMaxConcurrentWorkGroupCountINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clGetKernelMaxConcurrentWorkGroupCountINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClEnqueueNDCountKernelINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clEnqueueNDCountKernelINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clEnqueueNDCountKernelINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenCSlSetProgramSpecializationConstantWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clSetProgramSpecializationConstant"); EXPECT_EQ(retVal, reinterpret_cast(clSetProgramSpecializationConstant)); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_get_image_info_tests.inl000066400000000000000000000342461422164147700277510ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_info.h" #include "shared/test/common/mocks/mock_device.h" #include "opencl/source/context/context.h" #include "opencl/source/mem_obj/image.h" #include "cl_api_tests.h" using namespace NEO; namespace ULT { struct clGetImageInfoTests : public ApiFixture<>, public ::testing::Test { void SetUp() override { ApiFixture::SetUp(); imageFormat.image_channel_order = CL_RGBA; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 32; imageDesc.image_height = 32; imageDesc.image_depth = 1; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = nullptr; image = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_WRITE, 0, &imageFormat, &imageDesc, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); } void TearDown() override { retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); ApiFixture::TearDown(); } cl_image_format imageFormat; cl_image_desc imageDesc; cl_mem image; }; TEST_F(clGetImageInfoTests, GivenBufferWhenGettingImageInfoThenInvalidMemObjectErrorIsReturned) { size_t paramRetSize = 0; auto buffer = clCreateBuffer(pContext, CL_MEM_READ_WRITE, 42, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetImageInfo(buffer, CL_IMAGE_ELEMENT_SIZE, 0, nullptr, ¶mRetSize); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); clReleaseMemObject(buffer); } TEST_F(clGetImageInfoTests, GivenNullWhenGettingImageInfoThenInvalidMemObjectErrorIsReturned) { size_t paramRetSize = 0; retVal = clGetImageInfo(nullptr, CL_IMAGE_ELEMENT_SIZE, 0, nullptr, ¶mRetSize); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(clGetImageInfoTests, GivenInvalidParamNameWhenGettingImageInfoThenInvalidValueErrorIsReturned) { size_t paramRetSize = 0; retVal = clGetImageInfo(image, CL_MEM_SIZE, 0, nullptr, ¶mRetSize); EXPECT_EQ(CL_INVALID_VALUE, retVal); ASSERT_EQ(0u, paramRetSize); } TEST_F(clGetImageInfoTests, GivenInvalidParametersWhenGettingImageInfoThenValueSizeRetIsNotUpdated) { size_t paramRetSize = 0x1234; retVal = clGetImageInfo(image, CL_MEM_SIZE, 0, nullptr, ¶mRetSize); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(0x1234u, paramRetSize); } TEST_F(clGetImageInfoTests, GivenClImageFormatWhenGettingImageInfoThenImageFormatIsReturned) { cl_image_format imgFmtRet; size_t paramRetSize = 0; retVal = clGetImageInfo(image, CL_IMAGE_FORMAT, 0, nullptr, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(sizeof(cl_image_format), paramRetSize); retVal = clGetImageInfo(image, CL_IMAGE_FORMAT, paramRetSize, &imgFmtRet, NULL); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(this->imageFormat.image_channel_data_type, imgFmtRet.image_channel_data_type); ASSERT_EQ(this->imageFormat.image_channel_order, imgFmtRet.image_channel_order); } TEST_F(clGetImageInfoTests, GivenClImageElementSizeWhenGettingImageInfoThenSizeOfImageElementIsReturned) { size_t elemSize = 4; size_t sizeRet = 0; size_t paramRetSize = 0; retVal = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, 0, nullptr, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(sizeof(size_t), paramRetSize); retVal = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, paramRetSize, &sizeRet, NULL); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(elemSize, sizeRet); } TEST_F(clGetImageInfoTests, GivenClImageRowPitchWhenGettingImageInfoThenSizeOfRowIsReturned) { size_t rowPitchRet = 0; size_t paramRetSize = 0; retVal = clGetImageInfo(image, CL_IMAGE_ROW_PITCH, 0, nullptr, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(sizeof(size_t), paramRetSize); retVal = clGetImageInfo(image, CL_IMAGE_ROW_PITCH, paramRetSize, &rowPitchRet, NULL); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(0u, rowPitchRet); } TEST_F(clGetImageInfoTests, GivenClImageSlicePitchAnd2dImageWhenGettingImageInfoThenZeroIsReturned) { size_t slicePitchRet = 0; size_t paramRetSize = 0; retVal = clGetImageInfo(image, CL_IMAGE_SLICE_PITCH, 0, nullptr, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(sizeof(size_t), paramRetSize); retVal = clGetImageInfo(image, CL_IMAGE_SLICE_PITCH, paramRetSize, &slicePitchRet, NULL); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(0u, slicePitchRet); } TEST_F(clGetImageInfoTests, GivenClImageWidthWhenGettingImageInfoThenWidthOfImageIsReturned) { size_t widthRet = 0; size_t paramRetSize = 0; retVal = clGetImageInfo(image, CL_IMAGE_WIDTH, 0, nullptr, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(sizeof(size_t), paramRetSize); retVal = clGetImageInfo(image, CL_IMAGE_WIDTH, paramRetSize, &widthRet, NULL); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(this->imageDesc.image_width, widthRet); } TEST_F(clGetImageInfoTests, GivenImageWithMipMapsWhenGettingImageInfoThenWidthAndHeightOfImageAreShifted) { auto initialWidth = this->imageDesc.image_width; auto initialHeight = this->imageDesc.image_height; auto pImage = castToObject(image); size_t returnValue = 0; size_t paramRetSize = sizeof(size_t); for (int mipLevel = 0; mipLevel < 10; mipLevel++) { pImage->setBaseMipLevel(mipLevel); auto expectedWidth = initialWidth >> mipLevel; expectedWidth = expectedWidth == 0 ? 1 : expectedWidth; auto expectedHeight = initialHeight >> mipLevel; expectedHeight = expectedHeight == 0 ? 1 : expectedHeight; retVal = clGetImageInfo(image, CL_IMAGE_WIDTH, paramRetSize, &returnValue, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedWidth, returnValue); retVal = clGetImageInfo(image, CL_IMAGE_HEIGHT, paramRetSize, &returnValue, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedHeight, returnValue); } } TEST_F(clGetImageInfoTests, GivenClImageHeightWhenGettingImageInfoThenHeightOfImageIsReturned) { size_t heightRet = 0; size_t paramRetSize = 0; retVal = clGetImageInfo(image, CL_IMAGE_HEIGHT, 0, nullptr, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(sizeof(size_t), paramRetSize); retVal = clGetImageInfo(image, CL_IMAGE_HEIGHT, paramRetSize, &heightRet, NULL); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(this->imageDesc.image_height, heightRet); } TEST_F(clGetImageInfoTests, Given3dImageWithMipMapsWhenGettingImageInfoThenWidthAndHeightOfImageAreShifted) { size_t widthRet; size_t expectedWidth; size_t heightRet; size_t expectedHeight; size_t depthRet; size_t expectedDepth; cl_image_format imageFormat2; cl_image_desc imageDesc2; cl_mem image2; imageFormat2.image_channel_order = CL_RGBA; imageFormat2.image_channel_data_type = CL_UNORM_INT8; imageDesc2.image_type = CL_MEM_OBJECT_IMAGE3D; imageDesc2.image_width = 8; imageDesc2.image_height = 8; imageDesc2.image_depth = 4; imageDesc2.image_array_size = 1; imageDesc2.image_row_pitch = 0; imageDesc2.image_slice_pitch = 0; imageDesc2.num_mip_levels = 5; imageDesc2.num_samples = 0; imageDesc2.mem_object = nullptr; image2 = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_WRITE, 0, &imageFormat2, &imageDesc2, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image2); auto pImgObj = castToObject(image2); for (cl_uint n = 0; n <= imageDesc2.num_mip_levels; n++) { pImgObj->setBaseMipLevel(n); retVal = clGetImageInfo(image2, CL_IMAGE_WIDTH, sizeof(widthRet), &widthRet, NULL); EXPECT_EQ(CL_SUCCESS, retVal); expectedWidth = imageDesc2.image_width >> n; expectedWidth = (expectedWidth == 0) ? 1 : expectedWidth; ASSERT_EQ(expectedWidth, widthRet); retVal = clGetImageInfo(image2, CL_IMAGE_HEIGHT, sizeof(heightRet), &heightRet, NULL); ASSERT_EQ(CL_SUCCESS, retVal); expectedHeight = imageDesc2.image_height >> n; expectedHeight = (expectedHeight == 0) ? 1 : expectedHeight; ASSERT_EQ(expectedHeight, heightRet); retVal = clGetImageInfo(image2, CL_IMAGE_DEPTH, sizeof(depthRet), &depthRet, NULL); ASSERT_EQ(CL_SUCCESS, retVal); expectedDepth = imageDesc2.image_depth >> n; expectedDepth = (expectedDepth == 0) ? 1 : expectedDepth; ASSERT_EQ(expectedDepth, depthRet); } retVal = clReleaseMemObject(image2); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clGetImageInfoTests, Given1dImageWithMipMapsWhenGettingImageInfoThenWidthAndHeightOfImageAreShifted) { size_t widthRet; size_t expectedWidth; size_t heightRet; size_t depthRet; cl_image_format imageFormat2; cl_image_desc imageDesc2; cl_mem image2; imageFormat2.image_channel_order = CL_RGBA; imageFormat2.image_channel_data_type = CL_UNORM_INT8; imageDesc2.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc2.image_width = 8; imageDesc2.image_height = 1; imageDesc2.image_depth = 1; imageDesc2.image_array_size = 1; imageDesc2.image_row_pitch = 0; imageDesc2.image_slice_pitch = 0; imageDesc2.num_mip_levels = 5; imageDesc2.num_samples = 0; imageDesc2.mem_object = nullptr; image2 = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_WRITE, 0, &imageFormat2, &imageDesc2, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image2); auto pImgObj = castToObject(image2); for (cl_uint n = 0; n <= imageDesc2.num_mip_levels; n++) { pImgObj->setBaseMipLevel(n); retVal = clGetImageInfo(image2, CL_IMAGE_WIDTH, sizeof(widthRet), &widthRet, NULL); EXPECT_EQ(CL_SUCCESS, retVal); expectedWidth = imageDesc2.image_width >> n; expectedWidth = (expectedWidth == 0) ? 1 : expectedWidth; ASSERT_EQ(expectedWidth, widthRet); retVal = clGetImageInfo(image2, CL_IMAGE_HEIGHT, sizeof(heightRet), &heightRet, NULL); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(0u, heightRet); retVal = clGetImageInfo(image2, CL_IMAGE_DEPTH, sizeof(depthRet), &depthRet, NULL); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(0u, depthRet); } retVal = clReleaseMemObject(image2); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clGetImageInfoTests, GivenClImageDepthAnd2dImageWhenGettingImageInfoThenZeroIsReturned) { size_t depthRet = 0; size_t paramRetSize = 0; retVal = clGetImageInfo(image, CL_IMAGE_DEPTH, 0, nullptr, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(sizeof(size_t), paramRetSize); retVal = clGetImageInfo(image, CL_IMAGE_DEPTH, paramRetSize, &depthRet, NULL); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(0U, depthRet); } TEST_F(clGetImageInfoTests, GivenClImageArraySizeAndNonArrayImageWhenGettingImageInfoThenZeroIsReturned) { size_t arraySizeRet = 0; size_t paramRetSize = 0; retVal = clGetImageInfo(image, CL_IMAGE_ARRAY_SIZE, 0, nullptr, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(sizeof(size_t), paramRetSize); retVal = clGetImageInfo(image, CL_IMAGE_ARRAY_SIZE, paramRetSize, &arraySizeRet, NULL); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(0u, arraySizeRet); } TEST_F(clGetImageInfoTests, GivenClImageBufferWhenGettingImageInfoThenBufferIsReturned) { cl_mem bufferRet = nullptr; size_t paramRetSize = 0; retVal = clGetImageInfo(image, CL_IMAGE_BUFFER, 0, nullptr, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(sizeof(cl_mem), paramRetSize); retVal = clGetImageInfo(image, CL_IMAGE_BUFFER, paramRetSize, &bufferRet, NULL); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(this->imageDesc.buffer, bufferRet); } TEST_F(clGetImageInfoTests, GivenClImageNumMipLevelsWhenGettingImageInfoThenCorrectMipMapLevelIsReturned) { cl_uint numMipLevelRet = 0; size_t paramRetSize = 0; retVal = clGetImageInfo(image, CL_IMAGE_NUM_MIP_LEVELS, 0, nullptr, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(sizeof(cl_uint), paramRetSize); retVal = clGetImageInfo(image, CL_IMAGE_NUM_MIP_LEVELS, paramRetSize, &numMipLevelRet, NULL); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(this->imageDesc.num_mip_levels, numMipLevelRet); } TEST_F(clGetImageInfoTests, GivenClImageNumSamplesWhenGettingImageInfoThenCorrectNumberOfSamplesIsReturned) { cl_uint numSamplesRet = 0; size_t paramRetSize = 0; retVal = clGetImageInfo(image, CL_IMAGE_NUM_SAMPLES, 0, nullptr, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(sizeof(cl_uint), paramRetSize); retVal = clGetImageInfo(image, CL_IMAGE_NUM_SAMPLES, paramRetSize, &numSamplesRet, NULL); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(this->imageDesc.num_samples, numSamplesRet); } TEST_F(clGetImageInfoTests, givenMultisampleCountForMcsWhenAskingForRowPitchThenReturnNewValueIfGreaterThanOne) { McsSurfaceInfo mcsInfo = {1, 1, 0}; imageDesc.num_samples = 16; size_t receivedRowPitch = 0; clReleaseMemObject(image); image = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_WRITE, 0, &imageFormat, &imageDesc, nullptr, retVal); auto imageObj = castToObject(image); auto formatInfo = imageObj->getSurfaceFormatInfo(); size_t multisampleRowPitch = imageDesc.image_width * formatInfo.surfaceFormat.ImageElementSizeInBytes * imageDesc.num_samples; EXPECT_NE(multisampleRowPitch, imageObj->getHostPtrRowPitch()); for (uint32_t multisampleCount = 0; multisampleCount <= 4; multisampleCount++) { mcsInfo.multisampleCount = multisampleCount; imageObj->setMcsSurfaceInfo(mcsInfo); clGetImageInfo(image, CL_IMAGE_ROW_PITCH, sizeof(size_t), &receivedRowPitch, nullptr); if (multisampleCount > 1) { EXPECT_EQ(multisampleRowPitch, receivedRowPitch); } else { EXPECT_EQ(imageObj->getHostPtrRowPitch(), receivedRowPitch); } } } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_get_image_params_tests.inl000066400000000000000000000113141422164147700302700ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_info.h" #include "shared/test/common/mocks/mock_device.h" #include "opencl/source/context/context.h" #include "cl_api_tests.h" using namespace NEO; namespace ULT { template struct clGetImageParams : public ApiFixture<>, public T { void SetUp() override { ApiFixture::SetUp(); // clang-format off imageFormat.image_channel_order = CL_RGBA; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 32; imageDesc.image_height = 32; imageDesc.image_depth = 1; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = nullptr; // clang-format on } void TearDown() override { ApiFixture::TearDown(); } cl_image_format imageFormat; cl_image_desc imageDesc; }; typedef clGetImageParams<::testing::Test> clGetImageParamsTest; TEST_F(clGetImageParamsTest, GivenValidParamsWhenGettingImageParamsThenSuccessIsReturned) { size_t imageRowPitch = 0; size_t imageSlicePitch = 0; cl_int retVal = CL_INVALID_VALUE; retVal = clGetImageParamsINTEL(pContext, &imageFormat, &imageDesc, &imageRowPitch, &imageSlicePitch); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(imageRowPitch, 0u); EXPECT_NE(imageSlicePitch, 0u); } TEST_F(clGetImageParamsTest, GivenDefaultAndSpecializedContextsWhenGettingImageParamsThenTheSameValuesAreReturned) { cl_int retVal = CL_INVALID_VALUE; MockDefaultContext defaultContext; size_t defaultContextImageRowPitch = 0; size_t defaultContextImageSlicePitch = 0; retVal = clGetImageParamsINTEL(&defaultContext, &imageFormat, &imageDesc, &defaultContextImageRowPitch, &defaultContextImageSlicePitch); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(defaultContextImageRowPitch, 0u); EXPECT_NE(defaultContextImageSlicePitch, 0u); MockSpecializedContext specializedContext; size_t specializedContextImageRowPitch = 0; size_t specializedContextImageSlicePitch = 0; retVal = clGetImageParamsINTEL(&specializedContext, &imageFormat, &imageDesc, &specializedContextImageRowPitch, &specializedContextImageSlicePitch); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(defaultContextImageRowPitch, specializedContextImageRowPitch); EXPECT_EQ(defaultContextImageSlicePitch, specializedContextImageSlicePitch); } TEST_F(clGetImageParamsTest, GivenNullContextWhenGettingImageParamsThenInvalidContextErrorIsReturned) { size_t imageRowPitch = 0; size_t imageSlicePitch = 0; cl_int retVal = CL_SUCCESS; retVal = clGetImageParamsINTEL(nullptr, &imageFormat, &imageDesc, &imageRowPitch, &imageSlicePitch); ASSERT_EQ(CL_INVALID_CONTEXT, retVal); EXPECT_EQ(imageRowPitch, 0u); EXPECT_EQ(imageSlicePitch, 0u); } TEST_F(clGetImageParamsTest, GivenNullParamsWhenGettingImageParamsThenInvalidValueErrorIsReturned) { size_t imageRowPitch = 0; size_t imageSlicePitch = 0; cl_int retVal = CL_SUCCESS; retVal = clGetImageParamsINTEL(pContext, nullptr, &imageDesc, &imageRowPitch, &imageSlicePitch); ASSERT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(imageRowPitch, 0u); EXPECT_EQ(imageSlicePitch, 0u); retVal = clGetImageParamsINTEL(pContext, &imageFormat, nullptr, &imageRowPitch, &imageSlicePitch); ASSERT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(imageRowPitch, 0u); EXPECT_EQ(imageSlicePitch, 0u); retVal = clGetImageParamsINTEL(pContext, &imageFormat, &imageDesc, nullptr, &imageSlicePitch); ASSERT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(imageRowPitch, 0u); EXPECT_EQ(imageSlicePitch, 0u); retVal = clGetImageParamsINTEL(pContext, &imageFormat, &imageDesc, &imageRowPitch, nullptr); ASSERT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(imageRowPitch, 0u); EXPECT_EQ(imageSlicePitch, 0u); } TEST_F(clGetImageParamsTest, GivenInvalidFormatWhenGettingImageParamsThenImageFormatNotSupportedErrorIsReturned) { size_t imageRowPitch = 0; size_t imageSlicePitch = 0; imageFormat.image_channel_order = CL_A; imageFormat.image_channel_data_type = CL_SIGNED_INT32; auto retVal = clGetImageParamsINTEL(pContext, &imageFormat, &imageDesc, &imageRowPitch, &imageSlicePitch); ASSERT_EQ(CL_IMAGE_FORMAT_NOT_SUPPORTED, retVal); EXPECT_EQ(imageRowPitch, 0u); EXPECT_EQ(imageSlicePitch, 0u); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_get_kernel_arg_info_tests.inl000066400000000000000000000040711422164147700307710ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/device/device.h" #include "shared/source/helpers/file_io.h" #include "shared/test/common/helpers/kernel_binary_helper.h" #include "shared/test/common/helpers/test_files.h" #include "opencl/source/context/context.h" #include "cl_api_tests.h" #include "compiler_options.h" using namespace NEO; typedef api_tests clGetKernelArgInfoTests; namespace ULT { TEST_F(clGetKernelArgInfoTests, GivenValidParamsWhenGettingKernelArgInfoThenSuccessAndCorrectSizeAreReturned) { cl_program pProgram = nullptr; size_t sourceSize = 0; std::string testFile; KernelBinaryHelper kbHelper("CopyBuffer_simd16", false); testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); auto pSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pSource); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( pContext, 1, sources, &sourceSize, &retVal); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clBuildProgram( pProgram, 1, &testedClDevice, CompilerOptions::argInfo.data(), nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); cl_kernel kernel = clCreateKernel(pProgram, "CopyBuffer", &retVal); ASSERT_EQ(CL_SUCCESS, retVal); size_t returnSize = 0; cl_kernel_arg_type_qualifier typeQualifier = CL_KERNEL_ARG_TYPE_NONE; retVal = clGetKernelArgInfo(kernel, 0, CL_KERNEL_ARG_TYPE_QUALIFIER, sizeof(typeQualifier), &typeQualifier, &returnSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(returnSize, sizeof(cl_kernel_arg_type_qualifier)); retVal = clReleaseKernel(kernel); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_get_kernel_info_tests.inl000066400000000000000000000041041422164147700301350ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/device/device.h" #include "shared/source/helpers/file_io.h" #include "shared/test/common/helpers/kernel_binary_helper.h" #include "shared/test/common/helpers/test_files.h" #include "opencl/source/context/context.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clGetKernelInfoTests; namespace ULT { TEST_F(clGetKernelInfoTests, GivenValidParamsWhenGettingKernelInfoThenSuccessIsReturned) { cl_program pProgram = nullptr; size_t sourceSize = 0; std::string testFile; KernelBinaryHelper kbHelper("CopyBuffer_simd16", false); testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); auto pSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pSource); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( pContext, 1, sources, &sourceSize, &retVal); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clBuildProgram( pProgram, 1, &testedClDevice, nullptr, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); cl_kernel kernel = clCreateKernel(pProgram, "CopyBuffer", &retVal); ASSERT_EQ(CL_SUCCESS, retVal); size_t paramValueSizeRet; retVal = clGetKernelInfo( kernel, CL_KERNEL_FUNCTION_NAME, 0, nullptr, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_GT(paramValueSizeRet, 0u); retVal = clGetKernelInfo( kernel, CL_KERNEL_ATTRIBUTES, 0, nullptr, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_GT(paramValueSizeRet, 0u); retVal = clReleaseKernel(kernel); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT cl_get_kernel_max_concurrent_work_group_count_intel_tests.inl000066400000000000000000000110731422164147700370360ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/api/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/kernel/grf_config.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "cl_api_tests.h" using namespace NEO; using clGetKernelMaxConcurrentWorkGroupCountTests = api_tests; namespace ULT { TEST_F(clGetKernelMaxConcurrentWorkGroupCountTests, GivenInvalidInputWhenCallingGetKernelMaxConcurrentWorkGroupCountThenErrorIsReturned) { size_t globalWorkOffset[3] = {}; size_t localWorkSize[3] = {}; size_t suggestedWorkGroupCount; cl_uint workDim = 1; retVal = clGetKernelMaxConcurrentWorkGroupCountINTEL(nullptr, pMultiDeviceKernel, workDim, globalWorkOffset, localWorkSize, &suggestedWorkGroupCount); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); retVal = clGetKernelMaxConcurrentWorkGroupCountINTEL(pCommandQueue, nullptr, workDim, globalWorkOffset, localWorkSize, &suggestedWorkGroupCount); EXPECT_EQ(CL_INVALID_KERNEL, retVal); pKernel->isPatchedOverride = false; retVal = clGetKernelMaxConcurrentWorkGroupCountINTEL(pCommandQueue, pMultiDeviceKernel, workDim, globalWorkOffset, localWorkSize, &suggestedWorkGroupCount); EXPECT_EQ(CL_INVALID_KERNEL, retVal); pKernel->isPatchedOverride = true; retVal = clGetKernelMaxConcurrentWorkGroupCountINTEL(pCommandQueue, pMultiDeviceKernel, workDim, globalWorkOffset, localWorkSize, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clGetKernelMaxConcurrentWorkGroupCountINTEL(pCommandQueue, pMultiDeviceKernel, 0, globalWorkOffset, localWorkSize, &suggestedWorkGroupCount); EXPECT_EQ(CL_INVALID_WORK_DIMENSION, retVal); retVal = clGetKernelMaxConcurrentWorkGroupCountINTEL(pCommandQueue, pMultiDeviceKernel, 4, globalWorkOffset, localWorkSize, &suggestedWorkGroupCount); EXPECT_EQ(CL_INVALID_WORK_DIMENSION, retVal); retVal = clGetKernelMaxConcurrentWorkGroupCountINTEL(pCommandQueue, pMultiDeviceKernel, workDim, nullptr, localWorkSize, &suggestedWorkGroupCount); EXPECT_EQ(CL_INVALID_GLOBAL_OFFSET, retVal); retVal = clGetKernelMaxConcurrentWorkGroupCountINTEL(pCommandQueue, pMultiDeviceKernel, workDim, globalWorkOffset, nullptr, &suggestedWorkGroupCount); EXPECT_EQ(CL_INVALID_WORK_GROUP_SIZE, retVal); } TEST_F(clGetKernelMaxConcurrentWorkGroupCountTests, GivenVariousInputWhenGettingMaxConcurrentWorkGroupCountThenCorrectValuesAreReturned) { cl_uint workDim = 3; size_t globalWorkOffset[] = {0, 0, 0}; size_t localWorkSize[] = {8, 8, 8}; size_t maxConcurrentWorkGroupCount = 0; const_cast(pKernel->getKernelInfo()).kernelDescriptor.kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber; retVal = clGetKernelMaxConcurrentWorkGroupCountINTEL(pCommandQueue, pMultiDeviceKernel, workDim, globalWorkOffset, localWorkSize, &maxConcurrentWorkGroupCount); EXPECT_EQ(CL_SUCCESS, retVal); size_t expectedMaxConcurrentWorkGroupCount = pKernel->getMaxWorkGroupCount(workDim, localWorkSize, pCommandQueue); EXPECT_EQ(expectedMaxConcurrentWorkGroupCount, maxConcurrentWorkGroupCount); auto pKernelWithExecutionEnvironmentPatch = MockKernel::create(pCommandQueue->getDevice(), pProgram); auto kernelInfos = MockKernel::toKernelInfoContainer(pKernelWithExecutionEnvironmentPatch->getKernelInfo(), testedRootDeviceIndex); MultiDeviceKernel multiDeviceKernelWithExecutionEnvironmentPatch(MockMultiDeviceKernel::toKernelVector(pKernelWithExecutionEnvironmentPatch), kernelInfos); retVal = clGetKernelMaxConcurrentWorkGroupCountINTEL(pCommandQueue, &multiDeviceKernelWithExecutionEnvironmentPatch, workDim, globalWorkOffset, localWorkSize, &maxConcurrentWorkGroupCount); EXPECT_EQ(CL_SUCCESS, retVal); expectedMaxConcurrentWorkGroupCount = pKernelWithExecutionEnvironmentPatch->getMaxWorkGroupCount(workDim, localWorkSize, pCommandQueue); EXPECT_EQ(expectedMaxConcurrentWorkGroupCount, maxConcurrentWorkGroupCount); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_get_kernel_sub_group_info_khr_tests.inl000066400000000000000000000223431422164147700330730ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" using namespace NEO; struct KernelSubGroupInfoKhrFixture : HelloWorldFixture { typedef HelloWorldFixture ParentClass; void SetUp() override { ParentClass::SetUp(); MaxSimdSize = static_cast(pKernel->getKernelInfo().getMaxSimdSize()); ASSERT_GE(MaxSimdSize, 8u); MaxWorkDim = static_cast(pClDevice->getDeviceInfo().maxWorkItemDimensions); ASSERT_EQ(MaxWorkDim, 3u); } void TearDown() override { ParentClass::TearDown(); } size_t inputValue[3]; size_t paramValue; size_t paramValueSizeRet; size_t MaxSimdSize; size_t CalculatedWGS; size_t MaxWorkDim; }; namespace ULT { typedef Test KernelSubGroupInfoKhrTest; template struct KernelSubGroupInfoKhrParamFixture : KernelSubGroupInfoKhrFixture, ::testing::TestWithParam { void SetUp() override { KernelSubGroupInfoKhrFixture::SetUp(); } void TearDown() override { KernelSubGroupInfoKhrFixture::TearDown(); } }; struct TestParam { size_t gwsX; size_t gwsY; size_t gwsZ; } KernelSubGroupInfoKhrWGS[] = { {0, 0, 0}, {1, 1, 1}, {1, 5, 1}, {8, 1, 1}, {16, 1, 1}, {32, 1, 1}, {64, 1, 1}, {1, 190, 1}, {1, 510, 1}, {512, 1, 1}}; typedef KernelSubGroupInfoKhrParamFixture KernelSubGroupInfoKhrReturnSizeTest; INSTANTIATE_TEST_CASE_P(wgs, KernelSubGroupInfoKhrReturnSizeTest, ::testing::ValuesIn(KernelSubGroupInfoKhrWGS)); TEST_P(KernelSubGroupInfoKhrReturnSizeTest, GivenLwsParameterWhenGettingMaxSubGroupSizeThenCorrectValueIsReturned) { paramValueSizeRet = 0; retVal = clGetKernelSubGroupInfoKHR( pMultiDeviceKernel, pClDevice, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE, sizeof(size_t) * 3, inputValue, sizeof(size_t), ¶mValue, ¶mValueSizeRet); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(paramValueSizeRet, sizeof(size_t)); EXPECT_EQ(paramValue, MaxSimdSize); } typedef KernelSubGroupInfoKhrParamFixture KernelSubGroupInfoKhrReturnCountTest; INSTANTIATE_TEST_CASE_P(wgs, KernelSubGroupInfoKhrReturnCountTest, ::testing::ValuesIn(KernelSubGroupInfoKhrWGS)); TEST_P(KernelSubGroupInfoKhrReturnCountTest, GivenLwsParameterWhenGettingSubGroupCountThenCorrectValueIsReturned) { paramValueSizeRet = 0; inputValue[0] = GetParam().gwsX; inputValue[1] = GetParam().gwsY; inputValue[2] = GetParam().gwsZ; CalculatedWGS = inputValue[0] * inputValue[1] * inputValue[2]; retVal = clGetKernelSubGroupInfoKHR( pMultiDeviceKernel, pClDevice, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE, sizeof(size_t) * 3, inputValue, sizeof(size_t), ¶mValue, ¶mValueSizeRet); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(paramValueSizeRet, sizeof(size_t)); if (CalculatedWGS % MaxSimdSize == 0) { EXPECT_EQ(paramValue, CalculatedWGS / MaxSimdSize); } else { EXPECT_EQ(paramValue, (CalculatedWGS / MaxSimdSize) + 1); } } typedef KernelSubGroupInfoKhrParamFixture KernelSubGroupInfoKhrReturnCompileSizeTest; TEST_F(KernelSubGroupInfoKhrReturnCompileSizeTest, GivenKernelWhenGettingRequiredSubGroupSizeThenCorrectValueIsReturned) { retVal = clGetKernelSubGroupInfoKHR( pMultiDeviceKernel, pClDevice, CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL, 0, nullptr, sizeof(size_t), ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValueSizeRet, sizeof(size_t)); size_t requiredSubGroupSize = 0; auto start = pKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelLanguageAttributes.find("intel_reqd_sub_group_size("); if (start != std::string::npos) { start += strlen("intel_reqd_sub_group_size("); auto stop = pKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelLanguageAttributes.find(")", start); requiredSubGroupSize = stoi(pKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelLanguageAttributes.substr(start, stop - start)); } EXPECT_EQ(paramValue, requiredSubGroupSize); } TEST_F(KernelSubGroupInfoKhrTest, GivenNullKernelWhenGettingKernelSubGroupInfoThenInvalidKernelErrorIsReturned) { retVal = clGetKernelSubGroupInfoKHR( nullptr, pClDevice, 0, 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(retVal, CL_INVALID_KERNEL); } TEST_F(KernelSubGroupInfoKhrTest, GivenInvalidDeviceWhenGettingSubGroupInfoFromSingleDeviceKernelThenInvalidDeviceErrorIsReturned) { retVal = clGetKernelSubGroupInfoKHR( pMultiDeviceKernel, reinterpret_cast(pKernel), CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL, 0, nullptr, sizeof(size_t), ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_INVALID_DEVICE, retVal); } TEST_F(KernelSubGroupInfoKhrTest, GivenNullDeviceWhenGettingSubGroupInfoFromSingleDeviceKernelThenSuccessIsReturned) { retVal = clGetKernelSubGroupInfoKHR( pMultiDeviceKernel, nullptr, CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL, 0, nullptr, sizeof(size_t), ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(KernelSubGroupInfoKhrTest, GivenNullDeviceWhenGettingSubGroupInfoFromMultiDeviceKernelThenInvalidDeviceErrorIsReturned) { MockUnrestrictiveContext context; auto mockProgram = std::make_unique(&context, false, context.getDevices()); std::unique_ptr pMultiDeviceKernel( MultiDeviceKernel::create(mockProgram.get(), this->pMultiDeviceKernel->getKernelInfos(), nullptr)); retVal = clGetKernelSubGroupInfoKHR( pMultiDeviceKernel.get(), nullptr, CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL, 0, nullptr, sizeof(size_t), ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_INVALID_DEVICE, retVal); } TEST_F(KernelSubGroupInfoKhrTest, GivenInvalidParamNameWhenGettingKernelSubGroupInfoThenInvalidValueErrorIsReturned) { retVal = clGetKernelSubGroupInfoKHR( pMultiDeviceKernel, pClDevice, 0, sizeof(size_t), inputValue, sizeof(size_t), ¶mValue, nullptr); EXPECT_EQ(retVal, CL_INVALID_VALUE); } uint32_t /*cl_kernel_sub_group_info_khr*/ KernelSubGroupInfoKhrInputParams[] = { CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR}; typedef KernelSubGroupInfoKhrParamFixture KernelSubGroupInfoKhrInputParamsTest; INSTANTIATE_TEST_CASE_P(KernelSubGroupInfoKhrInputParams, KernelSubGroupInfoKhrInputParamsTest, ::testing::ValuesIn(KernelSubGroupInfoKhrInputParams)); TEST_P(KernelSubGroupInfoKhrInputParamsTest, GivenInvalidInputWhenGettingKernelSubGroupInfoThenInvalidValueErrorIsReturned) { // work dim == 0 retVal = clGetKernelSubGroupInfoKHR( pMultiDeviceKernel, pClDevice, GetParam(), 0, inputValue, 0, nullptr, nullptr); EXPECT_EQ(retVal, CL_INVALID_VALUE); // work dim % sizeof(size_t) != 0 retVal = clGetKernelSubGroupInfoKHR( pMultiDeviceKernel, pClDevice, GetParam(), (sizeof(size_t) * MaxWorkDim) - 1, inputValue, 0, nullptr, nullptr); EXPECT_EQ(retVal, CL_INVALID_VALUE); // work dim > MaxWorkDim retVal = clGetKernelSubGroupInfoKHR( pMultiDeviceKernel, pClDevice, GetParam(), sizeof(size_t) * (MaxWorkDim + 1), inputValue, 0, nullptr, nullptr); EXPECT_EQ(retVal, CL_INVALID_VALUE); // null input_value retVal = clGetKernelSubGroupInfoKHR( pMultiDeviceKernel, pClDevice, GetParam(), sizeof(size_t) * (MaxWorkDim), nullptr, 0, nullptr, nullptr); EXPECT_EQ(retVal, CL_INVALID_VALUE); } TEST_P(KernelSubGroupInfoKhrInputParamsTest, GivenInvalidParamSizeWhenGettingKernelSubGroupInfoThenInvalidValueErrorIsReturned) { //param_value_size < sizeof(size_t) retVal = clGetKernelSubGroupInfoKHR( pMultiDeviceKernel, pClDevice, GetParam(), sizeof(size_t), inputValue, sizeof(size_t) - 1, ¶mValue, nullptr); EXPECT_EQ(retVal, CL_INVALID_VALUE); } TEST_P(KernelSubGroupInfoKhrInputParamsTest, GivenNoReturnPointerWhenGettingKernelSubGroupInfoThenSuccessIsReturned) { retVal = clGetKernelSubGroupInfoKHR( pMultiDeviceKernel, pClDevice, GetParam(), sizeof(size_t), inputValue, 0, nullptr, nullptr); EXPECT_EQ(retVal, CL_SUCCESS); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_get_kernel_sub_group_info_tests.inl000066400000000000000000000460501422164147700322300ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" using namespace NEO; struct KernelSubGroupInfoFixture : HelloWorldFixture { typedef HelloWorldFixture ParentClass; void SetUp() override { ParentClass::SetUp(); pKernel->maxKernelWorkGroupSize = static_cast(pDevice->getDeviceInfo().maxWorkGroupSize / 2); maxSimdSize = static_cast(pKernel->getKernelInfo().getMaxSimdSize()); ASSERT_LE(8u, maxSimdSize); maxWorkDim = static_cast(pClDevice->getDeviceInfo().maxWorkItemDimensions); ASSERT_EQ(3u, maxWorkDim); maxWorkGroupSize = static_cast(pKernel->maxKernelWorkGroupSize); ASSERT_GE(1024u, maxWorkGroupSize); largestCompiledSIMDSize = static_cast(pKernel->getKernelInfo().getMaxSimdSize()); ASSERT_EQ(32u, largestCompiledSIMDSize); auto requiredWorkGroupSizeX = static_cast(pKernel->getKernelInfo().kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0]); auto requiredWorkGroupSizeY = static_cast(pKernel->getKernelInfo().kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1]); auto requiredWorkGroupSizeZ = static_cast(pKernel->getKernelInfo().kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2]); calculatedMaxWorkgroupSize = requiredWorkGroupSizeX * requiredWorkGroupSizeY * requiredWorkGroupSizeZ; if ((calculatedMaxWorkgroupSize == 0) || (calculatedMaxWorkgroupSize > static_cast(pKernel->maxKernelWorkGroupSize))) { calculatedMaxWorkgroupSize = static_cast(pKernel->maxKernelWorkGroupSize); } } void TearDown() override { ParentClass::TearDown(); } size_t inputValue[3]; size_t paramValue[3]; size_t paramValueSizeRet; size_t maxSimdSize; size_t maxWorkDim; size_t maxWorkGroupSize; size_t largestCompiledSIMDSize; size_t calculatedMaxWorkgroupSize; }; namespace ULT { typedef Test KernelSubGroupInfoTest; template struct KernelSubGroupInfoParamFixture : KernelSubGroupInfoFixture, ::testing::TestWithParam { void SetUp() override { KernelSubGroupInfoFixture::SetUp(); } void TearDown() override { KernelSubGroupInfoFixture::TearDown(); } }; static size_t WorkDimensions[] = {1, 2, 3}; static struct WorkSizeParam { size_t x; size_t y; size_t z; } KernelSubGroupInfoWGS[] = { {0, 0, 0}, {1, 1, 1}, {1, 5, 1}, {8, 1, 1}, {16, 1, 1}, {32, 1, 1}, {64, 1, 1}, {1, 190, 1}, {1, 510, 1}, {512, 1, 1}}; typedef KernelSubGroupInfoParamFixture> KernelSubGroupInfoReturnSizeTest; INSTANTIATE_TEST_CASE_P(wgs, KernelSubGroupInfoReturnSizeTest, ::testing::Combine( ::testing::ValuesIn(KernelSubGroupInfoWGS), ::testing::ValuesIn(WorkDimensions))); TEST_P(KernelSubGroupInfoReturnSizeTest, GivenWorkGroupSizeWhenGettingMaxSubGroupSizeThenReturnIsCalculatedCorrectly) { REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); WorkSizeParam workSize; size_t workDim; std::tie(workSize, workDim) = GetParam(); memset(inputValue, 0, sizeof(inputValue)); inputValue[0] = workSize.x; if (workDim > 1) { inputValue[1] = workSize.y; } if (workDim > 2) { inputValue[2] = workSize.z; } paramValueSizeRet = 0; retVal = clGetKernelSubGroupInfo( pMultiDeviceKernel, pClDevice, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE, sizeof(size_t) * workDim, inputValue, sizeof(size_t), paramValue, ¶mValueSizeRet); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(paramValueSizeRet, sizeof(size_t)); EXPECT_EQ(maxSimdSize, paramValue[0]); } typedef KernelSubGroupInfoParamFixture> KernelSubGroupInfoReturnCountTest; INSTANTIATE_TEST_CASE_P(wgs, KernelSubGroupInfoReturnCountTest, ::testing::Combine( ::testing::ValuesIn(KernelSubGroupInfoWGS), ::testing::ValuesIn(WorkDimensions))); TEST_P(KernelSubGroupInfoReturnCountTest, GivenWorkGroupSizeWhenGettingSubGroupCountThenReturnIsCalculatedCorrectly) { REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); WorkSizeParam workSize; size_t workDim; std::tie(workSize, workDim) = GetParam(); memset(inputValue, 0, sizeof(inputValue)); inputValue[0] = workSize.x; if (workDim > 1) { inputValue[1] = workSize.y; } if (workDim > 2) { inputValue[2] = workSize.z; } paramValueSizeRet = 0; retVal = clGetKernelSubGroupInfo( pMultiDeviceKernel, pClDevice, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE, sizeof(size_t) * workDim, inputValue, sizeof(size_t), paramValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(size_t), paramValueSizeRet); auto calculatedWGS = workSize.x; if (workDim > 1) { calculatedWGS *= workSize.y; } if (workDim > 2) { calculatedWGS *= workSize.z; } if (calculatedWGS % maxSimdSize == 0) { EXPECT_EQ(calculatedWGS / maxSimdSize, paramValue[0]); } else { EXPECT_EQ((calculatedWGS / maxSimdSize) + 1, paramValue[0]); } } static size_t SubGroupsNumbers[] = {0, 1, 10, 12, 21, 33, 67, 99}; typedef KernelSubGroupInfoParamFixture> KernelSubGroupInfoReturnLocalSizeTest; INSTANTIATE_TEST_CASE_P(sgn, KernelSubGroupInfoReturnLocalSizeTest, ::testing::Combine( ::testing::ValuesIn(SubGroupsNumbers), ::testing::ValuesIn(WorkDimensions))); TEST_P(KernelSubGroupInfoReturnLocalSizeTest, GivenWorkGroupSizeWhenGettingLocalSizeThenReturnIsCalculatedCorrectly) { REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); size_t subGroupsNum; size_t workDim; std::tie(subGroupsNum, workDim) = GetParam(); inputValue[0] = subGroupsNum; retVal = clGetKernelSubGroupInfo( pMultiDeviceKernel, pClDevice, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, sizeof(size_t), inputValue, sizeof(size_t) * workDim, paramValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(size_t) * workDim, paramValueSizeRet); size_t workGroupSize = subGroupsNum * largestCompiledSIMDSize; if (workGroupSize > calculatedMaxWorkgroupSize) { workGroupSize = 0; } EXPECT_EQ(workGroupSize, paramValue[0]); if (workDim > 1) { EXPECT_EQ(workGroupSize ? 1u : 0u, paramValue[1]); } if (workDim > 2) { EXPECT_EQ(workGroupSize ? 1u : 0u, paramValue[2]); } } typedef KernelSubGroupInfoParamFixture KernelSubGroupInfoReturnMaxNumberTest; TEST_F(KernelSubGroupInfoReturnMaxNumberTest, GivenWorkGroupSizeWhenGettingMaxNumSubGroupsThenReturnIsCalculatedCorrectly) { REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); retVal = clGetKernelSubGroupInfo( pMultiDeviceKernel, pClDevice, CL_KERNEL_MAX_NUM_SUB_GROUPS, 0, nullptr, sizeof(size_t), paramValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValueSizeRet, sizeof(size_t)); EXPECT_EQ(paramValue[0], Math::divideAndRoundUp(calculatedMaxWorkgroupSize, largestCompiledSIMDSize)); } typedef KernelSubGroupInfoParamFixture KernelSubGroupInfoReturnCompileNumberTest; TEST_F(KernelSubGroupInfoReturnCompileNumberTest, GivenKernelWhenGettingCompileNumSubGroupThenReturnIsCalculatedCorrectly) { REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); retVal = clGetKernelSubGroupInfo( pMultiDeviceKernel, pClDevice, CL_KERNEL_COMPILE_NUM_SUB_GROUPS, 0, nullptr, sizeof(size_t), paramValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValueSizeRet, sizeof(size_t)); EXPECT_EQ(paramValue[0], static_cast(pKernel->getKernelInfo().kernelDescriptor.kernelMetadata.compiledSubGroupsNumber)); } typedef KernelSubGroupInfoParamFixture KernelSubGroupInfoReturnCompileSizeTest; TEST_F(KernelSubGroupInfoReturnCompileSizeTest, GivenKernelWhenGettingCompileSubGroupSizeThenReturnIsCalculatedCorrectly) { REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); retVal = clGetKernelSubGroupInfo( pMultiDeviceKernel, pClDevice, CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL, 0, nullptr, sizeof(size_t), paramValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValueSizeRet, sizeof(size_t)); size_t requiredSubGroupSize = 0; auto start = pKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelLanguageAttributes.find("intel_reqd_sub_group_size("); if (start != std::string::npos) { start += strlen("intel_reqd_sub_group_size("); auto stop = pKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelLanguageAttributes.find(")", start); requiredSubGroupSize = stoi(pKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelLanguageAttributes.substr(start, stop - start)); } EXPECT_EQ(paramValue[0], requiredSubGroupSize); } TEST_F(KernelSubGroupInfoTest, GivenNullKernelWhenGettingSubGroupInfoThenInvalidKernelErrorIsReturned) { REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); retVal = clGetKernelSubGroupInfo( nullptr, pClDevice, 0, 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL, retVal); } TEST_F(KernelSubGroupInfoTest, GivenInvalidDeviceWhenGettingSubGroupInfoFromSingleDeviceKernelThenInvalidDeviceErrorIsReturned) { REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); retVal = clGetKernelSubGroupInfo( pMultiDeviceKernel, reinterpret_cast(pKernel), CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL, 0, nullptr, sizeof(size_t), paramValue, ¶mValueSizeRet); EXPECT_EQ(CL_INVALID_DEVICE, retVal); } TEST_F(KernelSubGroupInfoTest, GivenNullDeviceWhenGettingSubGroupInfoFromSingleDeviceKernelThenSuccessIsReturned) { REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); retVal = clGetKernelSubGroupInfo( pMultiDeviceKernel, nullptr, CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL, 0, nullptr, sizeof(size_t), paramValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(KernelSubGroupInfoTest, GivenNullDeviceWhenGettingSubGroupInfoFromMultiDeviceKernelThenInvalidDeviceErrorIsReturned) { REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); MockUnrestrictiveContext context; auto mockProgram = std::make_unique(&context, false, context.getDevices()); std::unique_ptr pMultiDeviceKernel(MultiDeviceKernel::create(mockProgram.get(), this->pMultiDeviceKernel->getKernelInfos(), nullptr)); retVal = clGetKernelSubGroupInfo( pMultiDeviceKernel.get(), nullptr, CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL, 0, nullptr, sizeof(size_t), paramValue, ¶mValueSizeRet); EXPECT_EQ(CL_INVALID_DEVICE, retVal); } TEST_F(KernelSubGroupInfoTest, GivenInvalidParamNameWhenGettingSubGroupInfoThenInvalidValueErrorIsReturned) { REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); retVal = clGetKernelSubGroupInfo( pMultiDeviceKernel, pClDevice, 0, sizeof(size_t), inputValue, sizeof(size_t), paramValue, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } uint32_t /*cl_kernel_sub_group_info*/ KernelSubGroupInfoInputParams[] = { CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, CL_KERNEL_MAX_NUM_SUB_GROUPS, CL_KERNEL_COMPILE_NUM_SUB_GROUPS, CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL}; typedef KernelSubGroupInfoParamFixture KernelSubGroupInfoInputParamsTest; INSTANTIATE_TEST_CASE_P(KernelSubGroupInfoInputParams, KernelSubGroupInfoInputParamsTest, ::testing::ValuesIn(KernelSubGroupInfoInputParams)); TEST_P(KernelSubGroupInfoInputParamsTest, GivenOpenClVersionLowerThan21WhenGettingKenrelSubGroupInfoThenInvalidOperationErrorIsReturned) { bool requireOpenCL21 = (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT) || (GetParam() == CL_KERNEL_MAX_NUM_SUB_GROUPS) || (GetParam() == CL_KERNEL_COMPILE_NUM_SUB_GROUPS); if (requireOpenCL21) { DebugManager.flags.ForceOCLVersion.set(20); pDevice->initializeCaps(); pClDevice->initializeCaps(); retVal = clGetKernelSubGroupInfo( pMultiDeviceKernel, pClDevice, GetParam(), 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_OPERATION, retVal); DebugManager.flags.ForceOCLVersion.set(0); pDevice->initializeCaps(); pClDevice->initializeCaps(); } } TEST_P(KernelSubGroupInfoInputParamsTest, GivenWorkDimZeroWhenGettingSubGroupInfoThenSuccessOrErrorIsCorrectlyReturned) { REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); bool requireInput = (GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) || (GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE) || (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT); retVal = clGetKernelSubGroupInfo( pMultiDeviceKernel, pClDevice, GetParam(), 0, inputValue, 0, nullptr, nullptr); EXPECT_EQ(requireInput ? CL_INVALID_VALUE : CL_SUCCESS, retVal); } TEST_P(KernelSubGroupInfoInputParamsTest, GivenIndivisibleWorkDimWhenGettingSubGroupInfoThenSuccessOrErrorIsCorrectlyReturned) { REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); bool requireInput = (GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) || (GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE) || (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT); size_t workDim = ((GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) || (GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE)) ? maxWorkDim : 1; retVal = clGetKernelSubGroupInfo( pMultiDeviceKernel, pClDevice, GetParam(), (sizeof(size_t) * workDim) - 1, inputValue, 0, nullptr, nullptr); EXPECT_EQ(requireInput ? CL_INVALID_VALUE : CL_SUCCESS, retVal); } TEST_P(KernelSubGroupInfoInputParamsTest, GivenWorkDimGreaterThanMaxWorkDimWhenGettingSubGroupInfoThenSuccessOrErrorIsCorrectlyReturned) { REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); bool requireInput = (GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) || (GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE) || (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT); size_t workDim = ((GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) || (GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE)) ? maxWorkDim : 1; retVal = clGetKernelSubGroupInfo( pMultiDeviceKernel, pClDevice, GetParam(), sizeof(size_t) * (workDim + 1), inputValue, 0, nullptr, nullptr); EXPECT_EQ(requireInput ? CL_INVALID_VALUE : CL_SUCCESS, retVal); } TEST_P(KernelSubGroupInfoInputParamsTest, GivenInputValueIsNullWhenGettingSubGroupInfoThenSuccessOrErrorIsCorrectlyReturned) { REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); bool requireInput = (GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) || (GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE) || (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT); size_t workDim = ((GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) || (GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE)) ? maxWorkDim : 1; retVal = clGetKernelSubGroupInfo( pMultiDeviceKernel, pClDevice, GetParam(), sizeof(size_t) * (workDim), nullptr, 0, nullptr, nullptr); EXPECT_EQ(requireInput ? CL_INVALID_VALUE : CL_SUCCESS, retVal); } TEST_P(KernelSubGroupInfoInputParamsTest, GivenParamValueSizeZeroWhenGettingSubGroupInfoThenInvalidValueErrorIsReturned) { REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); retVal = clGetKernelSubGroupInfo( pMultiDeviceKernel, pClDevice, GetParam(), sizeof(size_t), inputValue, 0, paramValue, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_P(KernelSubGroupInfoInputParamsTest, GivenUnalignedParamValueSizeWhenGettingSubGroupInfoThenInvalidValueErrorIsReturned) { REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); size_t workDim = (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT) ? maxWorkDim : 1; retVal = clGetKernelSubGroupInfo( pMultiDeviceKernel, pClDevice, GetParam(), sizeof(size_t), inputValue, (sizeof(size_t) * workDim) - 1, paramValue, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_P(KernelSubGroupInfoInputParamsTest, GivenTooLargeParamValueSizeWhenGettingSubGroupInfoThenCorrectRetValIsReturned) { REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); bool requireOutputArray = (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT); size_t workDim = (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT) ? maxWorkDim : 1; // paramValue size / sizeof(size_t) > MaxWorkDim retVal = clGetKernelSubGroupInfo( pMultiDeviceKernel, pClDevice, GetParam(), sizeof(size_t), inputValue, sizeof(size_t) * (workDim + 1), paramValue, nullptr); EXPECT_EQ(requireOutputArray ? CL_INVALID_VALUE : CL_SUCCESS, retVal); } TEST_P(KernelSubGroupInfoInputParamsTest, GivenNullPtrForReturnWhenGettingKernelSubGroupInfoThenSuccessIsReturned) { REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); bool requireOutputArray = (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT); retVal = clGetKernelSubGroupInfo( pMultiDeviceKernel, pClDevice, GetParam(), sizeof(size_t), inputValue, 0, nullptr, nullptr); EXPECT_EQ(requireOutputArray ? CL_INVALID_VALUE : CL_SUCCESS, retVal); } } // namespace ULT cl_get_kernel_suggested_local_work_size_intel_tests.inl000066400000000000000000000166541422164147700355730ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/api/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/cl_local_work_size.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "cl_api_tests.h" using namespace NEO; using clGetKernelSuggestedLocalWorkSizeTests = api_tests; namespace ULT { TEST_F(clGetKernelSuggestedLocalWorkSizeTests, GivenInvalidInputWhenCallingGetKernelSuggestedLocalWorkSizeThenErrorIsReturned) { size_t globalWorkOffset[3] = {}; size_t globalWorkSize[3] = {1, 1, 1}; size_t suggestedLocalWorkSize[3]; cl_uint workDim = 1; retVal = clGetKernelSuggestedLocalWorkSizeINTEL(nullptr, pMultiDeviceKernel, workDim, globalWorkOffset, globalWorkSize, suggestedLocalWorkSize); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); retVal = clGetKernelSuggestedLocalWorkSizeINTEL(pCommandQueue, nullptr, workDim, globalWorkOffset, globalWorkSize, suggestedLocalWorkSize); EXPECT_EQ(CL_INVALID_KERNEL, retVal); pKernel->isPatchedOverride = false; retVal = clGetKernelSuggestedLocalWorkSizeINTEL(pCommandQueue, pMultiDeviceKernel, workDim, globalWorkOffset, globalWorkSize, suggestedLocalWorkSize); EXPECT_EQ(CL_INVALID_KERNEL, retVal); pKernel->isPatchedOverride = true; retVal = clGetKernelSuggestedLocalWorkSizeINTEL(pCommandQueue, pMultiDeviceKernel, workDim, globalWorkOffset, globalWorkSize, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clGetKernelSuggestedLocalWorkSizeINTEL(pCommandQueue, pMultiDeviceKernel, 0, globalWorkOffset, globalWorkSize, suggestedLocalWorkSize); EXPECT_EQ(CL_INVALID_WORK_DIMENSION, retVal); retVal = clGetKernelSuggestedLocalWorkSizeINTEL(pCommandQueue, pMultiDeviceKernel, 4, globalWorkOffset, globalWorkSize, suggestedLocalWorkSize); EXPECT_EQ(CL_INVALID_WORK_DIMENSION, retVal); retVal = clGetKernelSuggestedLocalWorkSizeINTEL(pCommandQueue, pMultiDeviceKernel, workDim, globalWorkOffset, nullptr, suggestedLocalWorkSize); EXPECT_EQ(CL_INVALID_GLOBAL_WORK_SIZE, retVal); for (size_t i = 0; i < 3; ++i) { globalWorkSize[i] = 0; retVal = clGetKernelSuggestedLocalWorkSizeINTEL(pCommandQueue, pMultiDeviceKernel, 3, globalWorkOffset, globalWorkSize, suggestedLocalWorkSize); EXPECT_EQ(CL_INVALID_GLOBAL_WORK_SIZE, retVal); globalWorkSize[i] = 1; } } TEST_F(clGetKernelSuggestedLocalWorkSizeTests, GivenVariousInputWhenGettingSuggestedLocalWorkSizeThenCorrectValuesAreReturned) { size_t globalWorkOffset[] = {0, 0, 0}; size_t globalWorkSize[] = {128, 128, 128}; size_t suggestedLocalWorkSize[] = {0, 0, 0}; Vec3 elws{0, 0, 0}; Vec3 gws{128, 128, 128}; Vec3 offset{0, 0, 0}; DispatchInfo dispatchInfo{pDevice, pKernel, 1, gws, elws, offset}; auto expectedLws = computeWorkgroupSize(dispatchInfo); EXPECT_GT(expectedLws.x, 1u); retVal = clGetKernelSuggestedLocalWorkSizeINTEL(pCommandQueue, pMultiDeviceKernel, 1, globalWorkOffset, globalWorkSize, suggestedLocalWorkSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedLws.x, suggestedLocalWorkSize[0]); EXPECT_EQ(0u, suggestedLocalWorkSize[1]); EXPECT_EQ(0u, suggestedLocalWorkSize[2]); dispatchInfo.setDim(2); expectedLws = computeWorkgroupSize(dispatchInfo); retVal = clGetKernelSuggestedLocalWorkSizeINTEL(pCommandQueue, pMultiDeviceKernel, 2, globalWorkOffset, globalWorkSize, suggestedLocalWorkSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedLws.x, suggestedLocalWorkSize[0]); EXPECT_EQ(expectedLws.y, suggestedLocalWorkSize[1]); EXPECT_EQ(0u, suggestedLocalWorkSize[2]); dispatchInfo.setDim(3); expectedLws = computeWorkgroupSize(dispatchInfo); retVal = clGetKernelSuggestedLocalWorkSizeINTEL(pCommandQueue, pMultiDeviceKernel, 3, globalWorkOffset, globalWorkSize, suggestedLocalWorkSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedLws.x, suggestedLocalWorkSize[0]); EXPECT_EQ(expectedLws.y, suggestedLocalWorkSize[1]); EXPECT_EQ(expectedLws.z, suggestedLocalWorkSize[2]); //null global work offset is fine retVal = clGetKernelSuggestedLocalWorkSizeINTEL(pCommandQueue, pMultiDeviceKernel, 3, nullptr, globalWorkSize, suggestedLocalWorkSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedLws.x, suggestedLocalWorkSize[0]); EXPECT_EQ(expectedLws.y, suggestedLocalWorkSize[1]); EXPECT_EQ(expectedLws.z, suggestedLocalWorkSize[2]); } TEST_F(clGetKernelSuggestedLocalWorkSizeTests, GivenKernelWithReqdWorkGroupSizeWhenGettingSuggestedLocalWorkSizeThenRequiredWorkSizeIsReturned) { size_t globalWorkOffset[] = {0, 0, 0}; size_t globalWorkSize[] = {128, 128, 128}; size_t suggestedLocalWorkSize[] = {0, 0, 0}; uint16_t regdLocalWorkSize[] = {32, 32, 32}; MockKernelWithInternals mockKernel(*pDevice); mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] = regdLocalWorkSize[0]; mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1] = regdLocalWorkSize[1]; mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2] = regdLocalWorkSize[2]; retVal = clGetKernelSuggestedLocalWorkSizeINTEL(pCommandQueue, mockKernel.mockMultiDeviceKernel, 3, globalWorkOffset, globalWorkSize, suggestedLocalWorkSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(regdLocalWorkSize[0], suggestedLocalWorkSize[0]); EXPECT_EQ(regdLocalWorkSize[1], suggestedLocalWorkSize[1]); EXPECT_EQ(regdLocalWorkSize[2], suggestedLocalWorkSize[2]); } TEST_F(clGetKernelSuggestedLocalWorkSizeTests, GivenKernelWithExecutionEnvironmentPatchedWhenGettingSuggestedLocalWorkSizeThenCorrectValuesAreReturned) { auto pKernelWithExecutionEnvironmentPatch = MockKernel::create(pCommandQueue->getDevice(), pProgram); auto kernelInfos = MockKernel::toKernelInfoContainer(pKernelWithExecutionEnvironmentPatch->getKernelInfo(), testedRootDeviceIndex); MultiDeviceKernel multiDeviceKernelWithExecutionEnvironmentPatch(MockMultiDeviceKernel::toKernelVector(pKernelWithExecutionEnvironmentPatch), kernelInfos); size_t globalWorkOffset[] = {0, 0, 0}; size_t globalWorkSize[] = {128, 128, 128}; size_t suggestedLocalWorkSize[] = {0, 0, 0}; cl_uint workDim = 3; Vec3 elws{0, 0, 0}; Vec3 gws{128, 128, 128}; Vec3 offset{0, 0, 0}; const DispatchInfo dispatchInfo{pDevice, pKernelWithExecutionEnvironmentPatch, workDim, gws, elws, offset}; auto expectedLws = computeWorkgroupSize(dispatchInfo); EXPECT_GT(expectedLws.x * expectedLws.y * expectedLws.z, 1u); retVal = clGetKernelSuggestedLocalWorkSizeINTEL(pCommandQueue, &multiDeviceKernelWithExecutionEnvironmentPatch, workDim, globalWorkOffset, globalWorkSize, suggestedLocalWorkSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedLws.x, suggestedLocalWorkSize[0]); EXPECT_EQ(expectedLws.y, suggestedLocalWorkSize[1]); EXPECT_EQ(expectedLws.z, suggestedLocalWorkSize[2]); } } // namespace ULT cl_get_kernel_suggested_local_work_size_khr_tests.inl000066400000000000000000000166071422164147700352420ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/api/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "cl_api_tests.h" using namespace NEO; using clGetKernelSuggestedLocalWorkSizeKHRTests = api_tests; namespace ULT { TEST_F(clGetKernelSuggestedLocalWorkSizeKHRTests, GivenInvalidInputWhenCallingGetKernelSuggestedLocalWorkSizeThenErrorIsReturned) { size_t globalWorkOffset[3] = {}; size_t globalWorkSize[3] = {1, 1, 1}; size_t suggestedLocalWorkSize[3]; cl_uint workDim = 1; retVal = clGetKernelSuggestedLocalWorkSizeKHR(nullptr, pMultiDeviceKernel, workDim, globalWorkOffset, globalWorkSize, suggestedLocalWorkSize); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); retVal = clGetKernelSuggestedLocalWorkSizeKHR(pCommandQueue, nullptr, workDim, globalWorkOffset, globalWorkSize, suggestedLocalWorkSize); EXPECT_EQ(CL_INVALID_KERNEL, retVal); pKernel->isPatchedOverride = false; retVal = clGetKernelSuggestedLocalWorkSizeKHR(pCommandQueue, pMultiDeviceKernel, workDim, globalWorkOffset, globalWorkSize, suggestedLocalWorkSize); EXPECT_EQ(CL_INVALID_KERNEL, retVal); pKernel->isPatchedOverride = true; retVal = clGetKernelSuggestedLocalWorkSizeKHR(pCommandQueue, pMultiDeviceKernel, workDim, globalWorkOffset, globalWorkSize, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clGetKernelSuggestedLocalWorkSizeKHR(pCommandQueue, pMultiDeviceKernel, 0, globalWorkOffset, globalWorkSize, suggestedLocalWorkSize); EXPECT_EQ(CL_INVALID_WORK_DIMENSION, retVal); retVal = clGetKernelSuggestedLocalWorkSizeKHR(pCommandQueue, pMultiDeviceKernel, 4, globalWorkOffset, globalWorkSize, suggestedLocalWorkSize); EXPECT_EQ(CL_INVALID_WORK_DIMENSION, retVal); retVal = clGetKernelSuggestedLocalWorkSizeKHR(pCommandQueue, pMultiDeviceKernel, workDim, globalWorkOffset, nullptr, suggestedLocalWorkSize); EXPECT_EQ(CL_INVALID_GLOBAL_WORK_SIZE, retVal); for (size_t i = 0; i < 3; ++i) { globalWorkSize[i] = 0; retVal = clGetKernelSuggestedLocalWorkSizeKHR(pCommandQueue, pMultiDeviceKernel, 3, globalWorkOffset, globalWorkSize, suggestedLocalWorkSize); EXPECT_EQ(CL_INVALID_GLOBAL_WORK_SIZE, retVal); globalWorkSize[i] = 1; } } TEST_F(clGetKernelSuggestedLocalWorkSizeKHRTests, GivenVariousInputWhenGettingSuggestedLocalWorkSizeThenCorrectValuesAreReturned) { size_t globalWorkOffset[] = {0, 0, 0}; size_t globalWorkSize[] = {128, 128, 128}; size_t suggestedLocalWorkSize[] = {0, 0, 0}; Vec3 elws{0, 0, 0}; Vec3 gws{128, 128, 128}; Vec3 offset{0, 0, 0}; DispatchInfo dispatchInfo{pDevice, pKernel, 1, gws, elws, offset}; auto expectedLws = computeWorkgroupSize(dispatchInfo); EXPECT_GT(expectedLws.x, 1u); retVal = clGetKernelSuggestedLocalWorkSizeKHR(pCommandQueue, pMultiDeviceKernel, 1, globalWorkOffset, globalWorkSize, suggestedLocalWorkSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedLws.x, suggestedLocalWorkSize[0]); EXPECT_EQ(0u, suggestedLocalWorkSize[1]); EXPECT_EQ(0u, suggestedLocalWorkSize[2]); dispatchInfo.setDim(2); expectedLws = computeWorkgroupSize(dispatchInfo); retVal = clGetKernelSuggestedLocalWorkSizeKHR(pCommandQueue, pMultiDeviceKernel, 2, globalWorkOffset, globalWorkSize, suggestedLocalWorkSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedLws.x, suggestedLocalWorkSize[0]); EXPECT_EQ(expectedLws.y, suggestedLocalWorkSize[1]); EXPECT_EQ(0u, suggestedLocalWorkSize[2]); dispatchInfo.setDim(3); expectedLws = computeWorkgroupSize(dispatchInfo); retVal = clGetKernelSuggestedLocalWorkSizeKHR(pCommandQueue, pMultiDeviceKernel, 3, globalWorkOffset, globalWorkSize, suggestedLocalWorkSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedLws.x, suggestedLocalWorkSize[0]); EXPECT_EQ(expectedLws.y, suggestedLocalWorkSize[1]); EXPECT_EQ(expectedLws.z, suggestedLocalWorkSize[2]); //null global work offset is fine retVal = clGetKernelSuggestedLocalWorkSizeKHR(pCommandQueue, pMultiDeviceKernel, 3, nullptr, globalWorkSize, suggestedLocalWorkSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedLws.x, suggestedLocalWorkSize[0]); EXPECT_EQ(expectedLws.y, suggestedLocalWorkSize[1]); EXPECT_EQ(expectedLws.z, suggestedLocalWorkSize[2]); } TEST_F(clGetKernelSuggestedLocalWorkSizeKHRTests, GivenKernelWithReqdWorkGroupSizeWhenGettingSuggestedLocalWorkSizeThenRequiredWorkSizeIsReturned) { size_t globalWorkOffset[] = {0, 0, 0}; size_t globalWorkSize[] = {128, 128, 128}; size_t suggestedLocalWorkSize[] = {0, 0, 0}; uint16_t regdLocalWorkSize[] = {32, 32, 32}; MockKernelWithInternals mockKernel(*pDevice); mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] = regdLocalWorkSize[0]; mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1] = regdLocalWorkSize[1]; mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2] = regdLocalWorkSize[2]; retVal = clGetKernelSuggestedLocalWorkSizeKHR(pCommandQueue, mockKernel.mockMultiDeviceKernel, 3, globalWorkOffset, globalWorkSize, suggestedLocalWorkSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(regdLocalWorkSize[0], suggestedLocalWorkSize[0]); EXPECT_EQ(regdLocalWorkSize[1], suggestedLocalWorkSize[1]); EXPECT_EQ(regdLocalWorkSize[2], suggestedLocalWorkSize[2]); } TEST_F(clGetKernelSuggestedLocalWorkSizeKHRTests, GivenKernelWithExecutionEnvironmentPatchedWhenGettingSuggestedLocalWorkSizeThenCorrectValuesAreReturned) { auto pKernelWithExecutionEnvironmentPatch = MockKernel::create(pCommandQueue->getDevice(), pProgram); auto kernelInfos = MockKernel::toKernelInfoContainer(pKernelWithExecutionEnvironmentPatch->getKernelInfo(), testedRootDeviceIndex); MultiDeviceKernel multiDeviceKernelWithExecutionEnvironmentPatch(MockMultiDeviceKernel::toKernelVector(pKernelWithExecutionEnvironmentPatch), kernelInfos); size_t globalWorkOffset[] = {0, 0, 0}; size_t globalWorkSize[] = {128, 128, 128}; size_t suggestedLocalWorkSize[] = {0, 0, 0}; cl_uint workDim = 3; Vec3 elws{0, 0, 0}; Vec3 gws{128, 128, 128}; Vec3 offset{0, 0, 0}; const DispatchInfo dispatchInfo{pDevice, pKernelWithExecutionEnvironmentPatch, workDim, gws, elws, offset}; auto expectedLws = computeWorkgroupSize(dispatchInfo); EXPECT_GT(expectedLws.x * expectedLws.y * expectedLws.z, 1u); retVal = clGetKernelSuggestedLocalWorkSizeKHR(pCommandQueue, &multiDeviceKernelWithExecutionEnvironmentPatch, workDim, globalWorkOffset, globalWorkSize, suggestedLocalWorkSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedLws.x, suggestedLocalWorkSize[0]); EXPECT_EQ(expectedLws.y, suggestedLocalWorkSize[1]); EXPECT_EQ(expectedLws.z, suggestedLocalWorkSize[2]); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_get_kernel_work_group_info_tests.inl000066400000000000000000000114171422164147700324200ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device_binary_format/patchtokens_decoder.h" #include "opencl/test/unit_test/fixtures/kernel_work_group_info_fixture.h" using namespace NEO; namespace ULT { TEST_P(clGetKernelWorkGroupInfoTests, GivenValidParametersWhenGettingKernelWorkGroupInfoThenSuccessIsReturned) { size_t paramValueSizeRet; auto retVal = clGetKernelWorkGroupInfo( kernel, testedClDevice, GetParam(), 0, nullptr, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(0u, paramValueSizeRet); } TEST_F(clGetKernelWorkGroupInfoTest, GivenInvalidDeviceWhenGettingWorkGroupInfoFromSingleDeviceKernelThenInvalidDeviceErrorIsReturned) { size_t paramValueSizeRet; auto retVal = clGetKernelWorkGroupInfo( pMultiDeviceKernel, reinterpret_cast(pKernel), CL_KERNEL_WORK_GROUP_SIZE, 0, nullptr, ¶mValueSizeRet); EXPECT_EQ(CL_INVALID_DEVICE, retVal); } TEST_F(clGetKernelWorkGroupInfoTest, GivenNullDeviceWhenGettingWorkGroupInfoFromSingleDeviceKernelThenSuccessIsReturned) { size_t paramValueSizeRet; auto retVal = clGetKernelWorkGroupInfo( pMultiDeviceKernel, nullptr, CL_KERNEL_WORK_GROUP_SIZE, 0, nullptr, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clGetKernelWorkGroupInfoTest, GivenNullDeviceWhenGettingWorkGroupInfoFromMultiDeviceKernelThenInvalidDeviceErrorIsReturned) { size_t paramValueSizeRet; MockUnrestrictiveContext context; auto mockProgram = std::make_unique(&context, false, context.getDevices()); std::unique_ptr pMultiDeviceKernel( MockMultiDeviceKernel::create(mockProgram.get(), MockKernel::toKernelInfoContainer(pKernel->getKernelInfo(), context.getDevice(0)->getRootDeviceIndex()))); retVal = clGetKernelWorkGroupInfo( pMultiDeviceKernel.get(), nullptr, CL_KERNEL_WORK_GROUP_SIZE, 0, nullptr, ¶mValueSizeRet); EXPECT_EQ(CL_INVALID_DEVICE, retVal); } TEST_F(clGetKernelWorkGroupInfoTests, GivenKernelRequiringScratchSpaceWhenGettingKernelWorkGroupInfoThenCorrectSpillMemSizeIsReturned) { size_t paramValueSizeRet; cl_ulong param_value; auto pDevice = castToObject(testedClDevice); MockKernelWithInternals mockKernel(*pDevice); mockKernel.kernelInfo.setPerThreadScratchSize(1024, 0); cl_ulong scratchSpaceSize = static_cast(mockKernel.mockKernel->getScratchSize()); EXPECT_EQ(scratchSpaceSize, 1024u); retVal = clGetKernelWorkGroupInfo( mockKernel.mockMultiDeviceKernel, pDevice, CL_KERNEL_SPILL_MEM_SIZE_INTEL, sizeof(cl_ulong), ¶m_value, ¶mValueSizeRet); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(paramValueSizeRet, sizeof(cl_ulong)); EXPECT_EQ(param_value, scratchSpaceSize); } using matcher = IsWithinProducts; HWTEST2_F(clGetKernelWorkGroupInfoTests, givenKernelHavingPrivateMemoryAllocationWhenAskedForPrivateAllocationSizeThenProperSizeIsReturned, matcher) { size_t paramValueSizeRet; cl_ulong param_value; auto pDevice = castToObject(testedClDevice); MockKernelWithInternals mockKernel(*pDevice); mockKernel.kernelInfo.setPrivateMemory(1024, false, 0, 0, 0); retVal = clGetKernelWorkGroupInfo( mockKernel.mockMultiDeviceKernel, pDevice, CL_KERNEL_PRIVATE_MEM_SIZE, sizeof(cl_ulong), ¶m_value, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_ulong), paramValueSizeRet); EXPECT_EQ(1024U, param_value); } TEST_F(clGetKernelWorkGroupInfoTests, givenKernelNotHavingPrivateMemoryAllocationWhenAskedForPrivateAllocationSizeThenZeroIsReturned) { size_t paramValueSizeRet; cl_ulong param_value; auto pDevice = castToObject(testedClDevice); MockKernelWithInternals mockKernel(*pDevice); retVal = clGetKernelWorkGroupInfo( mockKernel.mockMultiDeviceKernel, pDevice, CL_KERNEL_PRIVATE_MEM_SIZE, sizeof(cl_ulong), ¶m_value, ¶mValueSizeRet); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(paramValueSizeRet, sizeof(cl_ulong)); EXPECT_EQ(param_value, 0u); } static cl_kernel_work_group_info paramNames[] = { CL_KERNEL_WORK_GROUP_SIZE, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, CL_KERNEL_LOCAL_MEM_SIZE, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, CL_KERNEL_SPILL_MEM_SIZE_INTEL, CL_KERNEL_PRIVATE_MEM_SIZE}; INSTANTIATE_TEST_CASE_P( api, clGetKernelWorkGroupInfoTests, testing::ValuesIn(paramNames)); } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_get_mem_object_info_tests.inl000066400000000000000000000057631422164147700307750ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/device/device.h" #include "shared/source/helpers/file_io.h" #include "shared/test/common/helpers/test_files.h" #include "opencl/source/context/context.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clGetMemObjectInfoTests; namespace ULT { TEST_F(clGetMemObjectInfoTests, GivenValidBufferWhenGettingMemObjectInfoThenCorrectBufferSizeIsReturned) { size_t bufferSize = 16; cl_mem buffer = nullptr; buffer = clCreateBuffer( pContext, 0, bufferSize, NULL, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); size_t paramValue = 0; retVal = clGetMemObjectInfo(buffer, CL_MEM_SIZE, sizeof(paramValue), ¶mValue, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(bufferSize, paramValue); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clGetMemObjectInfoTests, GivenBufferWithMappedRegionWhenGettingMemObjectInfoThenCorrectMapCountIsReturned) { size_t bufferSize = 16; cl_mem buffer = nullptr; cl_queue_properties properties = 0; cl_command_queue cmdQ = clCreateCommandQueue(pContext, testedClDevice, properties, &retVal); buffer = clCreateBuffer( pContext, 0, bufferSize, NULL, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); clEnqueueMapBuffer( cmdQ, buffer, CL_TRUE, CL_MAP_WRITE, 0, 8, 0, nullptr, nullptr, nullptr); cl_uint paramValue = 0; retVal = clGetMemObjectInfo(buffer, CL_MEM_MAP_COUNT, sizeof(paramValue), ¶mValue, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(1u, paramValue); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseCommandQueue(cmdQ); } TEST_F(clGetMemObjectInfoTests, GivenBufferCreatedFromSvmPointerWhenGettingMemObjectInfoThenClTrueIsReturned) { const ClDeviceInfo &devInfo = pDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { size_t bufferSize = 64; cl_mem buffer = nullptr; auto ptr = clSVMAlloc(pContext, CL_MEM_READ_WRITE, bufferSize, 64); ASSERT_NE(nullptr, ptr); buffer = clCreateBuffer( pContext, CL_MEM_USE_HOST_PTR, bufferSize, ptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); cl_bool paramValue = CL_FALSE; retVal = clGetMemObjectInfo(buffer, CL_MEM_USES_SVM_POINTER, sizeof(paramValue), ¶mValue, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(static_cast(CL_TRUE), paramValue); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); clSVMFree(pContext, ptr); } } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_get_pipe_info_tests.inl000066400000000000000000000125201422164147700276130ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/context/context.h" #include "opencl/source/mem_obj/pipe.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" #include "cl_api_tests.h" using namespace NEO; struct clGetPipeInfoTests : api_tests { VariableBackup supportsPipesBackup{&defaultHwInfo->capabilityTable.supportsPipes, true}; }; namespace ULT { TEST_F(clGetPipeInfoTests, GivenValidPipeWithPacketSizeOneWhenGettingPipeInfoThenPacketSizeReturnedIsOne) { auto pipe = clCreatePipe(pContext, CL_MEM_READ_WRITE, 1, 20, nullptr, &retVal); EXPECT_NE(nullptr, pipe); EXPECT_EQ(CL_SUCCESS, retVal); cl_uint paramValue = 0; size_t paramValueRetSize = 0; retVal = clGetPipeInfo(pipe, CL_PIPE_PACKET_SIZE, sizeof(paramValue), ¶mValue, ¶mValueRetSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValue, 1u); EXPECT_EQ(paramValueRetSize, sizeof(cl_uint)); clReleaseMemObject(pipe); } TEST_F(clGetPipeInfoTests, GivenValidPipeWithMaxPacketEqualTwentyWhenGettingPipeInfoThenMaxPacketReturnedIsTwenty) { auto pipe = clCreatePipe(pContext, CL_MEM_READ_WRITE, 1, 20, nullptr, &retVal); EXPECT_NE(nullptr, pipe); EXPECT_EQ(CL_SUCCESS, retVal); cl_uint paramValue = 0; size_t paramValueRetSize = 0; retVal = clGetPipeInfo(pipe, CL_PIPE_MAX_PACKETS, sizeof(paramValue), ¶mValue, ¶mValueRetSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValue, 20u); EXPECT_EQ(paramValueRetSize, sizeof(cl_uint)); clReleaseMemObject(pipe); } TEST_F(clGetPipeInfoTests, GivenInvalidParamNameWhenGettingPipeInfoThenClInvalidValueErrorIsReturned) { auto pipe = clCreatePipe(pContext, CL_MEM_READ_WRITE, 1, 20, nullptr, &retVal); EXPECT_NE(nullptr, pipe); EXPECT_EQ(CL_SUCCESS, retVal); cl_uint paramValue = 0; size_t paramValueRetSize = 0; retVal = clGetPipeInfo(pipe, CL_MEM_READ_WRITE, sizeof(paramValue), ¶mValue, ¶mValueRetSize); EXPECT_EQ(CL_INVALID_VALUE, retVal); clReleaseMemObject(pipe); } TEST_F(clGetPipeInfoTests, GivenInvalidParametersWhenGettingPipeInfoThenValueSizeRetIsNotUpdated) { auto pipe = clCreatePipe(pContext, CL_MEM_READ_WRITE, 1, 20, nullptr, &retVal); EXPECT_NE(nullptr, pipe); EXPECT_EQ(CL_SUCCESS, retVal); cl_uint paramValue = 0; size_t paramValueRetSize = 0x1234; retVal = clGetPipeInfo(pipe, CL_MEM_READ_WRITE, sizeof(paramValue), ¶mValue, ¶mValueRetSize); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(0x1234u, paramValueRetSize); clReleaseMemObject(pipe); } TEST_F(clGetPipeInfoTests, GivenInvalidMemoryObjectWhenGettingPipeInfoThenClInvalidMemObjectErrorIsReturned) { cl_uint paramValue = 0; size_t paramValueRetSize = 0; char fakeMemoryObj[sizeof(Pipe)]; retVal = clGetPipeInfo((cl_mem)&fakeMemoryObj[0], CL_MEM_READ_WRITE, sizeof(paramValue), ¶mValue, ¶mValueRetSize); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(clGetPipeInfoTests, GivenNullParamValueWhenGettingPipeInfoThenClSuccessIsReturned) { auto pipe = clCreatePipe(pContext, CL_MEM_READ_WRITE, 1, 20, nullptr, &retVal); EXPECT_NE(nullptr, pipe); EXPECT_EQ(CL_SUCCESS, retVal); size_t paramValueRetSize = 0; cl_uint paramValue = 0; retVal = clGetPipeInfo(pipe, CL_PIPE_MAX_PACKETS, sizeof(paramValue), nullptr, ¶mValueRetSize); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseMemObject(pipe); } TEST_F(clGetPipeInfoTests, GivenNullParamValueSizeRetWhenGettingPipeInfoThenClSuccessIsReturned) { auto pipe = clCreatePipe(pContext, CL_MEM_READ_WRITE, 1, 20, nullptr, &retVal); EXPECT_NE(nullptr, pipe); EXPECT_EQ(CL_SUCCESS, retVal); cl_uint paramValue = 0; retVal = clGetPipeInfo(pipe, CL_PIPE_MAX_PACKETS, sizeof(paramValue), ¶mValue, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseMemObject(pipe); } TEST_F(clGetPipeInfoTests, GivenParamValueSizeRetTooSmallWhenGettingPipeInfoThenClInvalidValueErrorIsReturned) { auto pipe = clCreatePipe(pContext, CL_MEM_READ_WRITE, 1, 20, nullptr, &retVal); EXPECT_NE(nullptr, pipe); EXPECT_EQ(CL_SUCCESS, retVal); uint16_t paramValue = 0; size_t paramValueRetSize = 0; retVal = clGetPipeInfo(pipe, CL_PIPE_PACKET_SIZE, sizeof(paramValue), ¶mValue, ¶mValueRetSize); EXPECT_EQ(CL_INVALID_VALUE, retVal); clReleaseMemObject(pipe); } TEST_F(clGetPipeInfoTests, GivenBufferInsteadOfPipeWhenGettingPipeInfoThenClInvalidMemObjectErrorIsReturned) { auto buffer = clCreateBuffer(pContext, CL_MEM_READ_WRITE, 20, nullptr, &retVal); EXPECT_NE(nullptr, buffer); EXPECT_EQ(CL_SUCCESS, retVal); cl_uint paramValue = 0; size_t paramValueRetSize = 0; retVal = clGetPipeInfo(buffer, CL_PIPE_PACKET_SIZE, sizeof(paramValue), ¶mValue, ¶mValueRetSize); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); clReleaseMemObject(buffer); } TEST_F(clGetPipeInfoTests, WhenQueryingPipePropertiesThenNothingIsCopied) { auto pipe = clCreatePipe(pContext, CL_MEM_READ_WRITE, 1, 20, nullptr, &retVal); EXPECT_NE(nullptr, pipe); EXPECT_EQ(CL_SUCCESS, retVal); size_t paramSize = 1u; retVal = clGetPipeInfo(pipe, CL_PIPE_PROPERTIES, 0, nullptr, ¶mSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, paramSize); clReleaseMemObject(pipe); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_get_platform_ids_tests.inl000066400000000000000000000103501422164147700303250ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/device_factory.h" #include "shared/test/common/helpers/ult_hw_config.h" #include "shared/test/common/helpers/variable_backup.h" #include "opencl/source/context/context.h" #include "opencl/source/platform/platform.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clGetPlatformIDsTests; namespace ULT { TEST_F(clGetPlatformIDsTests, GivenNullPlatformWhenGettingPlatformIdsThenNumberofPlatformsIsReturned) { cl_int retVal = CL_SUCCESS; cl_uint numPlatforms = 0; retVal = clGetPlatformIDs(0, nullptr, &numPlatforms); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_GT(numPlatforms, 0u); } TEST_F(clGetPlatformIDsTests, GivenPlatformsWhenGettingPlatformIdsThenPlatformsIdIsReturned) { cl_int retVal = CL_SUCCESS; cl_platform_id platform = nullptr; retVal = clGetPlatformIDs(1, &platform, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, platform); } TEST_F(clGetPlatformIDsTests, GivenNumEntriesZeroAndPlatformNotNullWhenGettingPlatformIdsThenClInvalidValueErrorIsReturned) { cl_int retVal = CL_SUCCESS; cl_platform_id platform = nullptr; retVal = clGetPlatformIDs(0, &platform, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST(clGetPlatformIDsNegativeTests, GivenFailedInitializationWhenGettingPlatformIdsThenClOutOfHostMemoryErrorIsReturned) { platformsImpl->clear(); VariableBackup backup{&ultHwConfig}; ultHwConfig.mockedPrepareDeviceEnvironmentsFuncResult = false; cl_int retVal = CL_SUCCESS; cl_platform_id platformRet = nullptr; cl_uint numPlatforms = 0; retVal = clGetPlatformIDs(1, &platformRet, &numPlatforms); EXPECT_EQ(CL_OUT_OF_HOST_MEMORY, retVal); EXPECT_EQ(0u, numPlatforms); EXPECT_EQ(nullptr, platformRet); platformsImpl->clear(); } TEST(clGetPlatformIDsNegativeTests, whenFailToCreateDeviceThenClGetPlatfomsIdsReturnsOutOfHostMemoryError) { VariableBackup createFuncBackup{&DeviceFactory::createRootDeviceFunc}; DeviceFactory::createRootDeviceFunc = [](ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) -> std::unique_ptr { return nullptr; }; platformsImpl->clear(); cl_int retVal = CL_SUCCESS; cl_platform_id platformRet = nullptr; cl_uint numPlatforms = 0; retVal = clGetPlatformIDs(1, &platformRet, &numPlatforms); EXPECT_EQ(CL_OUT_OF_HOST_MEMORY, retVal); EXPECT_EQ(0u, numPlatforms); EXPECT_EQ(nullptr, platformRet); platformsImpl->clear(); } TEST(clGetPlatformIDsNegativeTests, whenFailToCreatePlatformThenClGetPlatfomsIdsReturnsOutOfHostMemoryError) { VariableBackup createFuncBackup{&Platform::createFunc}; Platform::createFunc = [](ExecutionEnvironment &executionEnvironment) -> std::unique_ptr { return nullptr; }; platformsImpl->clear(); cl_int retVal = CL_SUCCESS; cl_platform_id platformRet = nullptr; cl_uint numPlatforms = 0; retVal = clGetPlatformIDs(1, &platformRet, &numPlatforms); EXPECT_EQ(CL_OUT_OF_HOST_MEMORY, retVal); EXPECT_EQ(0u, numPlatforms); EXPECT_EQ(nullptr, platformRet); platformsImpl->clear(); } TEST(clGetPlatformIDsNegativeTests, whenFailToInitializePlatformThenClGetPlatfomsIdsReturnsOutOfHostMemoryError) { VariableBackup createFuncBackup{&Platform::createFunc}; struct FailingPlatform : public Platform { using Platform::Platform; bool initialize(std::vector> devices) override { return false; } }; Platform::createFunc = [](ExecutionEnvironment &executionEnvironment) -> std::unique_ptr { return std::make_unique(executionEnvironment); }; platformsImpl->clear(); cl_int retVal = CL_SUCCESS; cl_platform_id platformRet = nullptr; cl_uint numPlatforms = 0; retVal = clGetPlatformIDs(1, &platformRet, &numPlatforms); EXPECT_EQ(CL_OUT_OF_HOST_MEMORY, retVal); EXPECT_EQ(0u, numPlatforms); EXPECT_EQ(nullptr, platformRet); platformsImpl->clear(); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_get_platform_info_tests.inl000066400000000000000000000221121422164147700305000ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" using namespace NEO; struct clGetPlatformInfoTests : Test { void SetUp() override { Test::SetUp(); } void TearDown() override { delete[] paramValue; Test::TearDown(); } char *getPlatformInfoString(Platform *pPlatform, cl_platform_info paramName) { size_t retSize; auto retVal = clGetPlatformInfo(pPlatform, paramName, 0, nullptr, &retSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_GT(retSize, 0u); auto value = new char[retSize]; retVal = clGetPlatformInfo(pPlatform, paramName, retSize, value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); return value; } size_t retSize = 0; char *paramValue = nullptr; }; namespace ULT { TEST_F(clGetPlatformInfoTests, GivenClPlatformProfileWhenGettingPlatformInfoStringThenFullProfileIsReturned) { paramValue = getPlatformInfoString(pPlatform, CL_PLATFORM_PROFILE); EXPECT_STREQ(paramValue, "FULL_PROFILE"); } class clGetPlatformInfoParameterizedTests : public clGetPlatformInfoTests, public ::testing::WithParamInterface { void SetUp() override { DebugManager.flags.ForceOCLVersion.set(GetParam()); clGetPlatformInfoTests::SetUp(); } void TearDown() override { clGetPlatformInfoTests::TearDown(); DebugManager.flags.ForceOCLVersion.set(0); } }; TEST_P(clGetPlatformInfoParameterizedTests, GivenClPlatformVersionWhenGettingPlatformInfoStringThenCorrectOpenClVersionIsReturned) { paramValue = getPlatformInfoString(pPlatform, CL_PLATFORM_VERSION); cl_version platformNumericVersion = 0; auto retVal = clGetPlatformInfo(pPlatform, CL_PLATFORM_NUMERIC_VERSION, sizeof(platformNumericVersion), &platformNumericVersion, &retSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_version), retSize); std::string expectedPlatformVersion; cl_version expectedNumericPlatformVersion; switch (GetParam()) { case 30: expectedPlatformVersion = "OpenCL 3.0 "; expectedNumericPlatformVersion = CL_MAKE_VERSION(3, 0, 0); break; case 21: expectedPlatformVersion = "OpenCL 2.1 "; expectedNumericPlatformVersion = CL_MAKE_VERSION(2, 1, 0); break; case 12: default: expectedPlatformVersion = "OpenCL 1.2 "; expectedNumericPlatformVersion = CL_MAKE_VERSION(1, 2, 0); break; } EXPECT_STREQ(expectedPlatformVersion.c_str(), paramValue); EXPECT_EQ(expectedNumericPlatformVersion, platformNumericVersion); } INSTANTIATE_TEST_CASE_P(OCLVersions, clGetPlatformInfoParameterizedTests, ::testing::Values(12, 21, 30)); TEST_F(clGetPlatformInfoTests, GivenClPlatformNameWhenGettingPlatformInfoStringThenCorrectStringIsReturned) { paramValue = getPlatformInfoString(pPlatform, CL_PLATFORM_NAME); EXPECT_STREQ(paramValue, "Intel(R) OpenCL HD Graphics"); } TEST_F(clGetPlatformInfoTests, GivenClPlatformVendorWhenGettingPlatformInfoStringThenCorrectStringIsReturned) { paramValue = getPlatformInfoString(pPlatform, CL_PLATFORM_VENDOR); EXPECT_STREQ(paramValue, "Intel(R) Corporation"); } TEST_F(clGetPlatformInfoTests, GivenClPlatformExtensionsWhenGettingPlatformInfoStringThenExtensionStringIsReturned) { paramValue = getPlatformInfoString(pPlatform, CL_PLATFORM_EXTENSIONS); EXPECT_NE(nullptr, strstr(paramValue, "cl_khr_icd ")); EXPECT_NE(nullptr, strstr(paramValue, "cl_khr_fp16 ")); } TEST_F(clGetPlatformInfoTests, GivenClPlatformIcdSuffixKhrWhenGettingPlatformInfoStringThenIntelIsReturned) { paramValue = getPlatformInfoString(pPlatform, CL_PLATFORM_ICD_SUFFIX_KHR); EXPECT_STREQ(paramValue, "INTEL"); } TEST_F(clGetPlatformInfoTests, GivenClPlatformHostTimerResolutionWhenGettingPlatformInfoStringThenCorrectResolutionIsReturned) { auto retVal = clGetPlatformInfo(pPlatform, CL_PLATFORM_HOST_TIMER_RESOLUTION, 0, nullptr, &retSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_GT(retSize, 0u); cl_ulong value = 0; retVal = clGetPlatformInfo(pPlatform, CL_PLATFORM_HOST_TIMER_RESOLUTION, retSize, &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); auto device = pPlatform->getClDevice(0); cl_ulong resolution = static_cast(device->getPlatformHostTimerResolution()); EXPECT_EQ(resolution, value); } TEST_F(clGetPlatformInfoTests, GivenNullPlatformWhenGettingPlatformInfoStringThenClInvalidPlatformErrorIsReturned) { char extensions[512]; auto retVal = clGetPlatformInfo( nullptr, // invalid platform CL_PLATFORM_EXTENSIONS, sizeof(extensions), extensions, &retSize); EXPECT_EQ(CL_INVALID_PLATFORM, retVal); } TEST_F(clGetPlatformInfoTests, GivenInvalidParamNameWhenGettingPlatformInfoStringThenClInvalidValueErrorIsReturned) { char extensions[512]; auto retVal = clGetPlatformInfo( pPlatform, 0, // invalid platform info enum sizeof(extensions), extensions, &retSize); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clGetPlatformInfoTests, GivenInvalidParametersWhenGettingPlatformInfoThenValueSizeRetIsNotUpdated) { char extensions[512]; retSize = 0x1234; auto retVal = clGetPlatformInfo( pPlatform, 0, // invalid platform info enum sizeof(extensions), extensions, &retSize); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(0x1234u, retSize); } TEST_F(clGetPlatformInfoTests, GivenInvalidParamSizeWhenGettingPlatformInfoStringThenClInvalidValueErrorIsReturned) { char extensions[512]; auto retVal = clGetPlatformInfo( pPlatform, CL_PLATFORM_EXTENSIONS, 0, // invalid size extensions, &retSize); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clGetPlatformInfoTests, GivenDeviceWhenGettingIcdDispatchTableThenDeviceAndPlatformTablesMatch) { EXPECT_NE(pPlatform->dispatch.icdDispatch, nullptr); for (size_t deviceOrdinal = 0; deviceOrdinal < pPlatform->getNumDevices(); ++deviceOrdinal) { auto device = pPlatform->getClDevice(deviceOrdinal); ASSERT_NE(nullptr, device); EXPECT_EQ(pPlatform->dispatch.icdDispatch, device->dispatch.icdDispatch); } } TEST_F(clGetPlatformInfoTests, WhenCheckingPlatformExtensionsWithVersionThenTheyMatchPlatformExtensions) { auto retVal = clGetPlatformInfo(pPlatform, CL_PLATFORM_EXTENSIONS_WITH_VERSION, 0, nullptr, &retSize); EXPECT_EQ(CL_SUCCESS, retVal); size_t extensionsCount = retSize / sizeof(cl_name_version); auto platformExtensionsWithVersion = std::make_unique(extensionsCount); retVal = clGetPlatformInfo(pPlatform, CL_PLATFORM_EXTENSIONS_WITH_VERSION, retSize, platformExtensionsWithVersion.get(), nullptr); EXPECT_EQ(CL_SUCCESS, retVal); std::string allExtensions; for (size_t i = 0; i < extensionsCount; i++) { EXPECT_EQ(CL_MAKE_VERSION(1u, 0u, 0u), platformExtensionsWithVersion[i].version); allExtensions += platformExtensionsWithVersion[i].name; allExtensions += " "; } EXPECT_STREQ(pPlatform->getPlatformInfo().extensions.c_str(), allExtensions.c_str()); } class GetPlatformInfoTests : public PlatformFixture, public testing::TestWithParam { using PlatformFixture::SetUp; public: GetPlatformInfoTests() {} protected: void SetUp() override { platformInfo = GetParam(); PlatformFixture::SetUp(); } void TearDown() override { PlatformFixture::TearDown(); } char *getPlatformInfoString(Platform *pPlatform, cl_platform_info paramName) { size_t retSize; auto retVal = clGetPlatformInfo(pPlatform, paramName, 0, nullptr, &retSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_GT(retSize, 0u); auto value = new char[retSize]; retVal = clGetPlatformInfo(pPlatform, paramName, retSize, value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); return value; } cl_int retVal = CL_SUCCESS; size_t retSize = 0; cl_platform_info platformInfo = 0; const HardwareInfo *pHwInfo = nullptr; }; TEST_P(GetPlatformInfoTests, GivenValidParamWhenGettingPlatformInfoStringThenNonEmptyStringIsReturned) { auto paramValue = getPlatformInfoString(pPlatform, platformInfo); EXPECT_STRNE(paramValue, ""); delete[] paramValue; } const cl_platform_info PlatformInfoTestValues[] = { CL_PLATFORM_PROFILE, CL_PLATFORM_VERSION, CL_PLATFORM_NAME, CL_PLATFORM_VENDOR, CL_PLATFORM_EXTENSIONS, CL_PLATFORM_ICD_SUFFIX_KHR, }; INSTANTIATE_TEST_CASE_P(api, GetPlatformInfoTests, ::testing::ValuesIn(PlatformInfoTestValues)); } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_get_program_build_info_tests.inl000066400000000000000000000454771422164147700315250ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/compiler_interface/intermediate_representations.h" #include "shared/source/device/device.h" #include "shared/source/device_binary_format/elf/elf.h" #include "shared/source/device_binary_format/elf/elf_encoder.h" #include "shared/source/device_binary_format/elf/ocl_elf.h" #include "shared/source/helpers/file_io.h" #include "shared/test/common/helpers/kernel_binary_helper.h" #include "shared/test/common/helpers/test_files.h" #include "opencl/source/context/context.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clGetProgramBuildInfoTests; namespace ULT { void verifyDevices(cl_program pProgram, size_t expectedNumDevices, cl_device_id *expectedDevices); TEST_F(clGetProgramBuildInfoTests, givenSourceWhenclGetProgramBuildInfoIsCalledThenReturnClBuildNone) { cl_program pProgram = nullptr; std::unique_ptr pSource = nullptr; size_t sourceSize = 0; std::string testFile; KernelBinaryHelper kbHelper("CopyBuffer_simd16"); testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); pSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pSource); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( pContext, 1, sources, &sourceSize, &retVal); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); cl_build_status buildStatus; retVal = clGetProgramBuildInfo(pProgram, testedClDevice, CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_BUILD_NONE, buildStatus); retVal = clCompileProgram( pProgram, 1, &testedClDevice, nullptr, 0, nullptr, nullptr, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clGetProgramBuildInfo(pProgram, testedClDevice, CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_BUILD_SUCCESS, buildStatus); retVal = clBuildProgram( pProgram, 1, &testedClDevice, nullptr, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clGetProgramBuildInfo(pProgram, testedClDevice, CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_BUILD_SUCCESS, buildStatus); // try to get program build info for invalid program object - should fail retVal = clGetProgramBuildInfo(nullptr, testedClDevice, CL_PROGRAM_BUILD_STATUS, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_PROGRAM, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clGetProgramBuildInfoTest, givenMultiDeviceProgramWhenCompilingForSpecificDevicesThenOnlySpecificDevicesAndTheirSubDevicesReportBuildStatus) { cl_program pProgram = nullptr; std::unique_ptr pSource = nullptr; size_t sourceSize = 0; std::string testFile; KernelBinaryHelper kbHelper("CopyBuffer_simd16"); testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); pSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pSource); const char *sources[1] = {pSource.get()}; MockUnrestrictiveContextMultiGPU context; cl_int retVal = CL_INVALID_PROGRAM; pProgram = clCreateProgramWithSource( &context, 1, sources, &sourceSize, &retVal); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); cl_build_status buildStatus; for (const auto &device : context.getDevices()) { retVal = clGetProgramBuildInfo(pProgram, device, CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_BUILD_NONE, buildStatus); } cl_device_id devicesForCompilation[] = {context.getDevice(1), context.getDevice(3)}; cl_device_id associatedSubDevices[] = {context.getDevice(4), context.getDevice(5)}; cl_device_id devicesNotForCompilation[] = {context.getDevice(0), context.getDevice(2)}; retVal = clCompileProgram( pProgram, 2, devicesForCompilation, nullptr, 0, nullptr, nullptr, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); for (const auto &device : devicesNotForCompilation) { retVal = clGetProgramBuildInfo(pProgram, device, CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_BUILD_NONE, buildStatus); } for (const auto &device : devicesForCompilation) { retVal = clGetProgramBuildInfo(pProgram, device, CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_BUILD_SUCCESS, buildStatus); } for (const auto &device : associatedSubDevices) { retVal = clGetProgramBuildInfo(pProgram, device, CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_BUILD_SUCCESS, buildStatus); } retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clGetProgramBuildInfoTest, givenMultiDeviceProgramWhenCompilingWithoutInputDevicesThenAllDevicesReportBuildStatus) { cl_program pProgram = nullptr; std::unique_ptr pSource = nullptr; size_t sourceSize = 0; std::string testFile; KernelBinaryHelper kbHelper("CopyBuffer_simd16"); testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); pSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pSource); const char *sources[1] = {pSource.get()}; MockUnrestrictiveContextMultiGPU context; cl_int retVal = CL_INVALID_PROGRAM; pProgram = clCreateProgramWithSource( &context, 1, sources, &sourceSize, &retVal); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); cl_build_status buildStatus; for (const auto &device : context.getDevices()) { retVal = clGetProgramBuildInfo(pProgram, device, CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_BUILD_NONE, buildStatus); } retVal = clCompileProgram( pProgram, 0, nullptr, nullptr, 0, nullptr, nullptr, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); for (const auto &device : context.getDevices()) { retVal = clGetProgramBuildInfo(pProgram, device, CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_BUILD_SUCCESS, buildStatus); } retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clGetProgramBuildInfoTests, givenElfBinaryWhenclGetProgramBuildInfoIsCalledThenReturnClBuildNone) { cl_program pProgram = nullptr; cl_int binaryStatus = CL_INVALID_VALUE; NEO::Elf::ElfEncoder elfEncoder; elfEncoder.getElfFileHeader().type = NEO::Elf::ET_OPENCL_LIBRARY; const uint8_t data[4] = {}; elfEncoder.appendSection(NEO::Elf::SHT_OPENCL_OPTIONS, NEO::Elf::SectionNamesOpenCl::buildOptions, data); elfEncoder.appendSection(NEO::Elf::SHT_OPENCL_SPIRV, NEO::Elf::SectionNamesOpenCl::spirvObject, ArrayRef::fromAny(NEO::spirvMagic.begin(), NEO::spirvMagic.size())); auto elfBinary = elfEncoder.encode(); const size_t binarySize = elfBinary.size(); const unsigned char *elfBinaryTemp = reinterpret_cast(elfBinary.data()); pProgram = clCreateProgramWithBinary( pContext, 1, &testedClDevice, &binarySize, &elfBinaryTemp, &binaryStatus, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, pProgram); EXPECT_EQ(CL_SUCCESS, binaryStatus); cl_build_status buildStatus; retVal = clGetProgramBuildInfo(pProgram, testedClDevice, CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_BUILD_NONE, buildStatus); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clGetProgramBuildInfoTests, givenInvalidDeviceInputWhenGetProgramBuildInfoIsCalledThenInvalidDeviceErrorIsReturned) { cl_build_status buildStatus; retVal = clGetProgramBuildInfo(pProgram, nullptr, CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, nullptr); EXPECT_EQ(CL_INVALID_DEVICE, retVal); retVal = clGetProgramBuildInfo(pProgram, reinterpret_cast(pProgram), CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, nullptr); EXPECT_EQ(CL_INVALID_DEVICE, retVal); MockContext context; retVal = clGetProgramBuildInfo(pProgram, context.getDevice(0), CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, nullptr); EXPECT_EQ(CL_INVALID_DEVICE, retVal); } TEST(clGetProgramBuildInfoTest, givenMultiDeviceProgramWhenLinkingForSpecificDevicesThenOnlySpecificDevicesReportBuildStatus) { MockProgram *pProgram = nullptr; std::unique_ptr pSource = nullptr; size_t sourceSize = 0; std::string testFile; KernelBinaryHelper kbHelper("CopyBuffer_simd16"); testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); pSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pSource); const char *sources[1] = {pSource.get()}; MockUnrestrictiveContextMultiGPU context; cl_int retVal = CL_INVALID_PROGRAM; pProgram = Program::create( &context, 1, sources, &sourceSize, retVal); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); cl_build_status buildStatus; for (const auto &device : context.getDevices()) { retVal = clGetProgramBuildInfo(pProgram, device, CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_BUILD_NONE, buildStatus); } retVal = clCompileProgram( pProgram, 0, nullptr, nullptr, 0, nullptr, nullptr, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); pProgram->setBuildStatus(CL_BUILD_NONE); cl_device_id devicesForLinking[] = {context.getDevice(1), context.getDevice(3)}; cl_program programForLinking = pProgram; auto outProgram = clLinkProgram( &context, 2, devicesForLinking, nullptr, 1, &programForLinking, nullptr, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, outProgram); verifyDevices(outProgram, 2, devicesForLinking); for (const auto &device : devicesForLinking) { retVal = clGetProgramBuildInfo(outProgram, device, CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_BUILD_SUCCESS, buildStatus); } retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(outProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clGetProgramBuildInfoTest, givenMultiDeviceProgramWhenLinkingWithoutInputDevicesThenAllDevicesReportBuildStatus) { MockProgram *pProgram = nullptr; std::unique_ptr pSource = nullptr; size_t sourceSize = 0; std::string testFile; KernelBinaryHelper kbHelper("CopyBuffer_simd16"); testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); pSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pSource); const char *sources[1] = {pSource.get()}; MockUnrestrictiveContextMultiGPU context; cl_int retVal = CL_INVALID_PROGRAM; pProgram = Program::create( &context, 1, sources, &sourceSize, retVal); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); cl_build_status buildStatus; for (const auto &device : context.getDevices()) { retVal = clGetProgramBuildInfo(pProgram, device, CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_BUILD_NONE, buildStatus); } retVal = clCompileProgram( pProgram, 0, nullptr, nullptr, 0, nullptr, nullptr, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); pProgram->setBuildStatus(CL_BUILD_NONE); cl_program programForLinking = pProgram; auto outProgram = clLinkProgram( &context, 0, nullptr, nullptr, 1, &programForLinking, nullptr, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, outProgram); std::vector contextDevices; context.getDevices().toDeviceIDs(contextDevices); verifyDevices(outProgram, contextDevices.size(), contextDevices.data()); for (const auto &device : context.getDevices()) { retVal = clGetProgramBuildInfo(outProgram, device, CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_BUILD_SUCCESS, buildStatus); } retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(outProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clGetProgramBuildInfoTest, givenMultiDeviceProgramWhenBuildingForSpecificDevicesThenOnlySpecificDevicesAndTheirSubDevicesReportBuildStatus) { MockProgram *pProgram = nullptr; std::unique_ptr pSource = nullptr; size_t sourceSize = 0; std::string testFile; KernelBinaryHelper kbHelper("CopyBuffer_simd16"); testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); pSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pSource); const char *sources[1] = {pSource.get()}; MockUnrestrictiveContextMultiGPU context; cl_int retVal = CL_INVALID_PROGRAM; pProgram = Program::create( &context, 1, sources, &sourceSize, retVal); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); cl_build_status buildStatus; cl_program_binary_type binaryType; cl_program_binary_type expectedBinaryType = CL_PROGRAM_BINARY_TYPE_EXECUTABLE; for (const auto &device : context.getDevices()) { retVal = clGetProgramBuildInfo(pProgram, device, CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_BUILD_NONE, buildStatus); } cl_device_id devicesForBuild[] = {context.getDevice(1), context.getDevice(3)}; cl_device_id associatedSubDevices[] = {context.getDevice(4), context.getDevice(5)}; cl_device_id devicesNotForBuild[] = {context.getDevice(0), context.getDevice(2)}; retVal = clBuildProgram( pProgram, 2, devicesForBuild, nullptr, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); for (const auto &device : devicesForBuild) { retVal = clGetProgramBuildInfo(pProgram, device, CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_BUILD_SUCCESS, buildStatus); retVal = clGetProgramBuildInfo(pProgram, device, CL_PROGRAM_BINARY_TYPE, sizeof(binaryType), &binaryType, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedBinaryType, binaryType); } for (const auto &device : associatedSubDevices) { retVal = clGetProgramBuildInfo(pProgram, device, CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_BUILD_SUCCESS, buildStatus); retVal = clGetProgramBuildInfo(pProgram, device, CL_PROGRAM_BINARY_TYPE, sizeof(binaryType), &binaryType, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedBinaryType, binaryType); } for (const auto &device : devicesNotForBuild) { retVal = clGetProgramBuildInfo(pProgram, device, CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_BUILD_NONE, buildStatus); } retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clGetProgramBuildInfoTest, givenMultiDeviceProgramWhenBuildingWithoutInputDevicesThenAllDevicesReportBuildStatus) { MockProgram *pProgram = nullptr; std::unique_ptr pSource = nullptr; size_t sourceSize = 0; std::string testFile; KernelBinaryHelper kbHelper("CopyBuffer_simd16"); testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); pSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pSource); const char *sources[1] = {pSource.get()}; MockUnrestrictiveContextMultiGPU context; cl_int retVal = CL_INVALID_PROGRAM; pProgram = Program::create( &context, 1, sources, &sourceSize, retVal); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); cl_build_status buildStatus; cl_program_binary_type binaryType; cl_program_binary_type expectedBinaryType = CL_PROGRAM_BINARY_TYPE_EXECUTABLE; for (const auto &device : context.getDevices()) { retVal = clGetProgramBuildInfo(pProgram, device, CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_BUILD_NONE, buildStatus); } retVal = clBuildProgram( pProgram, 0, nullptr, nullptr, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); for (const auto &device : context.getDevices()) { retVal = clGetProgramBuildInfo(pProgram, device, CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_BUILD_SUCCESS, buildStatus); retVal = clGetProgramBuildInfo(pProgram, device, CL_PROGRAM_BINARY_TYPE, sizeof(binaryType), &binaryType, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedBinaryType, binaryType); } retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_get_program_info_tests.inl000066400000000000000000000407451422164147700303370ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/device/device.h" #include "shared/source/helpers/file_io.h" #include "shared/test/common/helpers/kernel_binary_helper.h" #include "shared/test/common/helpers/test_files.h" #include "opencl/source/context/context.h" #include "opencl/test/unit_test/fixtures/run_kernel_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clGetProgramInfoTests; namespace ULT { static_assert(CL_PROGRAM_IL == CL_PROGRAM_IL_KHR, "Param values are different"); void verifyDevices(cl_program pProgram, size_t expectedNumDevices, cl_device_id *expectedDevices) { cl_uint numDevices; auto retVal = clGetProgramInfo(pProgram, CL_PROGRAM_NUM_DEVICES, sizeof(numDevices), &numDevices, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedNumDevices, numDevices); auto programDevices = std::make_unique(expectedNumDevices); for (auto i = 0u; i < expectedNumDevices; i++) { programDevices[i] = nullptr; } retVal = clGetProgramInfo(pProgram, CL_PROGRAM_DEVICES, expectedNumDevices * sizeof(cl_device_id), programDevices.get(), nullptr); EXPECT_EQ(CL_SUCCESS, retVal); for (auto i = 0u; i < expectedNumDevices; i++) { EXPECT_EQ(expectedDevices[i], programDevices[i]); } } TEST_F(clGetProgramInfoTests, GivenSourceWhenBuildingProgramThenGetProgramInfoReturnsCorrectInfo) { cl_program pProgram = nullptr; std::unique_ptr pSource = nullptr; size_t sourceSize = 0; std::string testFile; KernelBinaryHelper kbHelper("CopyBuffer_simd16", false); testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); pSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pSource); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( pContext, 1, sources, &sourceSize, &retVal); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clBuildProgram( pProgram, 1, &testedClDevice, nullptr, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); cl_uint numDevices; retVal = clGetProgramInfo(pProgram, CL_PROGRAM_NUM_DEVICES, sizeof(numDevices), &numDevices, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, numDevices); cl_device_id programDevices; retVal = clGetProgramInfo(pProgram, CL_PROGRAM_DEVICES, sizeof(programDevices), &programDevices, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(testedClDevice, programDevices); size_t length = 0; char buffer[10240]; retVal = clGetProgramInfo(pProgram, CL_PROGRAM_SOURCE, 0, nullptr, &length); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sourceSize + 1, length); retVal = clGetProgramInfo(pProgram, CL_PROGRAM_SOURCE, sizeof(buffer), buffer, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(strlen(pSource.get()), strlen(buffer)); // try to get program info for invalid program object - should fail retVal = clGetProgramInfo(nullptr, CL_PROGRAM_SOURCE, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_PROGRAM, retVal); // set paramValueSizeRet to 0 for IL program queries on non-IL programs size_t sourceSizeRet = sourceSize; retVal = clGetProgramInfo(pProgram, CL_PROGRAM_IL, 0, nullptr, &sourceSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, sourceSizeRet); retVal = clGetProgramInfo(pProgram, CL_PROGRAM_IL, sourceSizeRet, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clGetProgramInfoTest, GivenMultiDeviceProgramCreatedWithSourceWhenGettingDevicesThenCorrectDevicesAreReturned) { MockUnrestrictiveContextMultiGPU context; auto expectedNumDevices = context.getNumDevices(); auto devicesForProgram = std::make_unique(expectedNumDevices); for (auto i = 0u; i < expectedNumDevices; i++) { devicesForProgram[i] = context.getDevice(i); } auto pSource = "//"; size_t sourceSize = 2; const char *sources[1] = {pSource}; cl_program pProgram = nullptr; cl_int retVal = CL_INVALID_PROGRAM; pProgram = clCreateProgramWithSource( &context, 1, sources, &sourceSize, &retVal); EXPECT_NE(nullptr, pProgram); EXPECT_EQ(CL_SUCCESS, retVal); verifyDevices(pProgram, expectedNumDevices, devicesForProgram.get()); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clGetProgramInfoTests, GivenIlWhenBuildingProgramThenGetProgramInfoReturnsCorrectInfo) { const size_t binarySize = 16; const uint32_t spirv[binarySize] = {0x03022307}; cl_int err = CL_INVALID_VALUE; cl_program pProgram = clCreateProgramWithIL(pContext, spirv, sizeof(spirv), &err); EXPECT_EQ(CL_SUCCESS, err); EXPECT_NE(nullptr, pProgram); uint32_t output[binarySize] = {}; size_t outputSize = 0; retVal = clGetProgramInfo(pProgram, CL_PROGRAM_IL, sizeof(output), output, &outputSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(spirv), outputSize); EXPECT_EQ(0, memcmp(spirv, output, outputSize)); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clGetProgramInfoTest, GivenMultiDeviceProgramCreatedWithBinaryWhenGettingDevicesThenCorrectDevicesAreReturned) { REQUIRE_IMAGES_OR_SKIP(defaultHwInfo); MockUnrestrictiveContextMultiGPU context; auto numDevicesForProgram = 2u; cl_device_id devicesForProgram[] = {context.getDevice(1), context.getDevice(3)}; std::unique_ptr pBinary0 = nullptr; std::unique_ptr pBinary1 = nullptr; size_t binarySize0 = 0; size_t binarySize1 = 0; std::string testFile; retrieveBinaryKernelFilename(testFile, "CopyBuffer_simd16_", ".bin"); pBinary0 = loadDataFromFile( testFile.c_str(), binarySize0); retrieveBinaryKernelFilename(testFile, "copy_buffer_to_image_", ".bin"); pBinary1 = loadDataFromFile( testFile.c_str(), binarySize1); ASSERT_NE(0u, binarySize0); ASSERT_NE(0u, binarySize1); ASSERT_NE(nullptr, pBinary0); ASSERT_NE(nullptr, pBinary1); EXPECT_NE(binarySize0, binarySize1); const unsigned char *binaries[] = {reinterpret_cast(pBinary0.get()), reinterpret_cast(pBinary1.get())}; size_t sizeBinaries[] = {binarySize0, binarySize1}; cl_program pProgram = nullptr; cl_int retVal = CL_INVALID_PROGRAM; cl_int binaryStaus = CL_INVALID_PROGRAM; pProgram = clCreateProgramWithBinary( &context, numDevicesForProgram, devicesForProgram, sizeBinaries, binaries, &binaryStaus, &retVal); EXPECT_NE(nullptr, pProgram); EXPECT_EQ(CL_SUCCESS, retVal); verifyDevices(pProgram, numDevicesForProgram, devicesForProgram); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clGetProgramInfoTest, GivenMultiDeviceProgramCreatedWithBinaryWhenGettingBinariesThenCorrectBinariesAreReturned) { REQUIRE_IMAGES_OR_SKIP(defaultHwInfo); MockUnrestrictiveContextMultiGPU context; auto numDevicesForProgram = 2u; cl_device_id devicesForProgram[] = {context.getDevice(1), context.getDevice(3)}; std::unique_ptr pBinary0 = nullptr; std::unique_ptr pBinary1 = nullptr; size_t binarySize0 = 0; size_t binarySize1 = 0; std::string testFile; retrieveBinaryKernelFilename(testFile, "CopyBuffer_simd16_", ".bin"); pBinary0 = loadDataFromFile( testFile.c_str(), binarySize0); retrieveBinaryKernelFilename(testFile, "copy_buffer_to_image_", ".bin"); pBinary1 = loadDataFromFile( testFile.c_str(), binarySize1); ASSERT_NE(0u, binarySize0); ASSERT_NE(0u, binarySize1); ASSERT_NE(nullptr, pBinary0); ASSERT_NE(nullptr, pBinary1); EXPECT_NE(binarySize0, binarySize1); const unsigned char *binaries[] = {reinterpret_cast(pBinary0.get()), reinterpret_cast(pBinary1.get())}; size_t sizeBinaries[] = {binarySize0, binarySize1}; cl_program pProgram = nullptr; cl_int retVal = CL_INVALID_PROGRAM; cl_int binaryStaus = CL_INVALID_PROGRAM; pProgram = clCreateProgramWithBinary( &context, numDevicesForProgram, devicesForProgram, sizeBinaries, binaries, &binaryStaus, &retVal); EXPECT_NE(nullptr, pProgram); EXPECT_EQ(CL_SUCCESS, retVal); size_t programBinarySizes[2] = {}; retVal = clGetProgramInfo(pProgram, CL_PROGRAM_BINARY_SIZES, numDevicesForProgram * sizeof(size_t), programBinarySizes, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); for (auto i = 0u; i < numDevicesForProgram; i++) { EXPECT_EQ(sizeBinaries[i], programBinarySizes[i]); } auto programBinary0 = std::make_unique(binarySize0); memset(programBinary0.get(), 0, binarySize0); auto programBinary1 = std::make_unique(binarySize1); memset(programBinary1.get(), 0, binarySize1); unsigned char *programBinaries[] = {programBinary0.get(), programBinary1.get()}; retVal = clGetProgramInfo(pProgram, CL_PROGRAM_BINARIES, numDevicesForProgram * sizeof(unsigned char *), programBinaries, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); for (auto i = 0u; i < numDevicesForProgram; i++) { for (auto j = 0u; j < programBinarySizes[i]; j++) { EXPECT_EQ(programBinaries[i][j], binaries[i][j]); } } memset(programBinary1.get(), 0, binarySize1); programBinaries[0] = nullptr; retVal = clGetProgramInfo(pProgram, CL_PROGRAM_BINARIES, numDevicesForProgram * sizeof(unsigned char *), programBinaries, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); for (auto j = 0u; j < programBinarySizes[1]; j++) { EXPECT_EQ(programBinaries[1][j], binaries[1][j]); } retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clGetProgramInfoTests, GivenSPIRVProgramWhenGettingProgramSourceThenReturnNullString) { const size_t binarySize = 16; const uint32_t spirv[binarySize] = {0x03022307}; cl_int err = CL_INVALID_VALUE; cl_program pProgram = clCreateProgramWithIL(pContext, spirv, sizeof(spirv), &err); EXPECT_EQ(CL_SUCCESS, err); EXPECT_NE(nullptr, pProgram); size_t outputSize = 0; uint32_t output[binarySize] = {}; const char reference[sizeof(output)] = {}; retVal = clGetProgramInfo(pProgram, CL_PROGRAM_SOURCE, 0, nullptr, &outputSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, outputSize); retVal = clGetProgramInfo(pProgram, CL_PROGRAM_SOURCE, sizeof(output), output, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0, memcmp(output, reference, sizeof(output))); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clGetProgramInfoTest, GivenMultiDeviceProgramCreatedWithILWhenGettingDevicesThenCorrectDevicesAreReturned) { MockUnrestrictiveContextMultiGPU context; auto expectedNumDevices = context.getNumDevices(); auto devicesForProgram = std::make_unique(expectedNumDevices); for (auto i = 0u; i < expectedNumDevices; i++) { devicesForProgram[i] = context.getDevice(i); } const size_t binarySize = 16; const uint32_t spirv[binarySize] = {0x03022307}; cl_program pProgram = nullptr; cl_int retVal = CL_INVALID_PROGRAM; pProgram = clCreateProgramWithIL( &context, spirv, binarySize, &retVal); EXPECT_NE(nullptr, pProgram); EXPECT_EQ(CL_SUCCESS, retVal); verifyDevices(pProgram, expectedNumDevices, devicesForProgram.get()); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clGetProgramInfoTest, GivenMultiDeviceProgramCreatedWithBuiltInKernelsWhenGettingDevicesThenCorrectDevicesAreReturned) { if (!defaultHwInfo->capabilityTable.supportsVme) { GTEST_SKIP(); } MockUnrestrictiveContextMultiGPU context; auto numDevicesForProgram = 2u; cl_device_id devicesForProgram[] = {context.getDevice(1), context.getDevice(3)}; overwriteBuiltInBinaryName("media_kernels_frontend"); const char *kernelNamesString = { "block_advanced_motion_estimate_bidirectional_check_intel;" "block_motion_estimate_intel;" "block_advanced_motion_estimate_check_intel;"}; cl_program pProgram = nullptr; cl_int retVal = CL_INVALID_PROGRAM; pProgram = clCreateProgramWithBuiltInKernels( &context, numDevicesForProgram, devicesForProgram, kernelNamesString, &retVal); EXPECT_NE(nullptr, pProgram); EXPECT_EQ(CL_SUCCESS, retVal); restoreBuiltInBinaryName(); verifyDevices(pProgram, numDevicesForProgram, devicesForProgram); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clGetProgramInfoTest, GivenMultiDeviceBuiltInProgramCreatedWithGenBinaryWhenGettingDevicesThenCorrectDevicesAreReturned) { MockUnrestrictiveContextMultiGPU context; auto expectedNumDevices = context.getNumDevices(); auto devicesForProgram = std::make_unique(expectedNumDevices); for (auto i = 0u; i < expectedNumDevices; i++) { devicesForProgram[i] = context.getDevice(i); } std::unique_ptr pBinary = nullptr; size_t binarySize = 0; std::string testFile; retrieveBinaryKernelFilename(testFile, "CopyBuffer_simd16_", ".bin"); pBinary = loadDataFromFile( testFile.c_str(), binarySize); ASSERT_NE(0u, binarySize); ASSERT_NE(nullptr, pBinary); cl_program pProgram = nullptr; cl_int retVal = CL_INVALID_PROGRAM; pProgram = Program::createBuiltInFromGenBinary(&context, context.getDevices(), pBinary.get(), binarySize, &retVal); EXPECT_NE(nullptr, pProgram); EXPECT_EQ(CL_SUCCESS, retVal); verifyDevices(pProgram, expectedNumDevices, devicesForProgram.get()); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clGetProgramInfoTest, GivenMultiDeviceBuiltInProgramCreatedWithGenBinaryWhenGettingDevicesThenCorrectBinariesAreReturned) { MockUnrestrictiveContextMultiGPU context; auto expectedNumDevices = context.getNumDevices(); auto devicesForProgram = std::make_unique(expectedNumDevices); for (auto i = 0u; i < expectedNumDevices; i++) { devicesForProgram[i] = context.getDevice(i); } std::unique_ptr pBinary = nullptr; size_t binarySize = 0; std::string testFile; retrieveBinaryKernelFilename(testFile, "CopyBuffer_simd16_", ".bin"); pBinary = loadDataFromFile( testFile.c_str(), binarySize); ASSERT_NE(0u, binarySize); ASSERT_NE(nullptr, pBinary); cl_program pProgram = nullptr; cl_int retVal = CL_INVALID_PROGRAM; pProgram = Program::createBuiltInFromGenBinary(&context, context.getDevices(), pBinary.get(), binarySize, &retVal); EXPECT_NE(nullptr, pProgram); EXPECT_EQ(CL_SUCCESS, retVal); auto programBinarySizes = std::make_unique(expectedNumDevices); memset(programBinarySizes.get(), 0, expectedNumDevices * sizeof(size_t)); retVal = clGetProgramInfo(pProgram, CL_PROGRAM_BINARY_SIZES, expectedNumDevices * sizeof(size_t), programBinarySizes.get(), nullptr); EXPECT_EQ(CL_SUCCESS, retVal); for (auto i = 0u; i < expectedNumDevices; i++) { EXPECT_EQ(binarySize, programBinarySizes[i]); } auto programBinaries = std::make_unique(expectedNumDevices); auto binariesBuffer = std::make_unique(expectedNumDevices * binarySize); memset(binariesBuffer.get(), 0, expectedNumDevices * binarySize); for (auto i = 0u; i < expectedNumDevices; i++) { programBinaries[i] = ptrOffset(binariesBuffer.get(), i * binarySize); } retVal = clGetProgramInfo(pProgram, CL_PROGRAM_BINARIES, expectedNumDevices * sizeof(unsigned char *), programBinaries.get(), nullptr); EXPECT_EQ(CL_SUCCESS, retVal); for (auto i = 0u; i < expectedNumDevices; i++) { EXPECT_EQ(0, memcmp(programBinaries[i], pBinary.get(), binarySize)); } retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_get_supported_image_formats_tests.inl000066400000000000000000000120531422164147700325660ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/context/context.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clGetSupportedImageFormatsTests; TEST_F(clGetSupportedImageFormatsTests, GivenValidParamsWhenGettingSupportImageFormatsThenNumImageFormatsIsGreaterThanZero) { if (!pContext->getDevice(0)->getSharedDeviceInfo().imageSupport) { GTEST_SKIP(); } cl_uint numImageFormats = 0; retVal = clGetSupportedImageFormats( pContext, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, nullptr, &numImageFormats); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_GT(numImageFormats, 0u); } TEST_F(clGetSupportedImageFormatsTests, givenInvalidContextWhenGettingSupportImageFormatsThenClInvalidContextErrorIsReturned) { auto device = pContext->getDevice(0u); auto dummyContext = reinterpret_cast(device); cl_uint numImageFormats = 0; retVal = clGetSupportedImageFormats( dummyContext, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, nullptr, &numImageFormats); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } TEST(clGetSupportedImageFormatsTest, givenPlatforNotSupportingImageWhenGettingSupportImageFormatsThenCLSuccessReturned) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsImages = false; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); cl_device_id clDevice = device.get(); cl_int retVal; auto context = ReleaseableObjectPtr(Context::create(nullptr, ClDeviceVector(&clDevice, 1), nullptr, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); cl_uint numImageFormats = 0; retVal = clGetSupportedImageFormats( context.get(), CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, nullptr, &numImageFormats); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, numImageFormats); } TEST(clGetSupportedImageFormatsTest, givenPlatformNotSupportingReadWriteImagesWhenGettingSupportedImageFormatsThenCLSuccessIsReturned) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsImages = true; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); cl_device_id clDevice = device.get(); cl_int retVal; auto context = ReleaseableObjectPtr(Context::create(nullptr, ClDeviceVector(&clDevice, 1), nullptr, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); cl_uint numImageFormats = 0; retVal = clGetSupportedImageFormats( context.get(), CL_MEM_KERNEL_READ_AND_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, nullptr, &numImageFormats); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_GT(numImageFormats, 0u); } TEST(clGetSupportedImageFormatsTest, givenPlatforNotSupportingImageAndNullPointerToNumFormatsWhenGettingSupportImageFormatsThenCLSuccessReturned) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsImages = false; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); cl_device_id clDevice = device.get(); cl_int retVal; auto context = ReleaseableObjectPtr(Context::create(nullptr, ClDeviceVector(&clDevice, 1), nullptr, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetSupportedImageFormats( context.get(), CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clGetSupportedImageFormatsTest, givenPlatformWithoutDevicesWhenClGetSupportedImageFormatIsCalledThenDeviceIsTakenFromContext) { auto executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->initializeMemoryManager(); executionEnvironment->prepareRootDeviceEnvironments(1); auto device = std::make_unique(*Device::create(executionEnvironment, 0u), platform()); const DeviceInfo &devInfo = device->getSharedDeviceInfo(); if (!devInfo.imageSupport) { GTEST_SKIP(); } cl_device_id clDevice = device.get(); cl_int retVal; auto context = ReleaseableObjectPtr(Context::create(nullptr, ClDeviceVector(&clDevice, 1), nullptr, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, platform()->getNumDevices()); cl_uint numImageFormats = 0; retVal = clGetSupportedImageFormats( context.get(), CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, nullptr, &numImageFormats); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_GT(numImageFormats, 0u); } compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_icd_get_platform_ids_khr_tests.inl000066400000000000000000000051271422164147700320160ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" using namespace NEO; using clIcdGetPlatformIDsKHRTests = Test; namespace ULT { TEST_F(clIcdGetPlatformIDsKHRTests, WhenPlatformIsCreatedThenDispatchLocationIsCorrect) { cl_platform_id platform = pPlatform; EXPECT_EQ((void *)platform, (void *)(&platform->dispatch)); } TEST_F(clIcdGetPlatformIDsKHRTests, WhenGettingNumberOfPlatformsThenGreaterThanZeroIsReturned) { cl_int retVal = CL_SUCCESS; cl_uint numPlatforms = 0; retVal = clIcdGetPlatformIDsKHR(0, nullptr, &numPlatforms); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_GT(numPlatforms, (cl_uint)0); } TEST_F(clIcdGetPlatformIDsKHRTests, WhenGettingExtensionFunctionAddressThenCorrectPointerIsReturned) { void *funPtr = clGetExtensionFunctionAddress("clIcdGetPlatformIDsKHR"); decltype(&clIcdGetPlatformIDsKHR) expected = clIcdGetPlatformIDsKHR; EXPECT_NE(nullptr, funPtr); EXPECT_EQ(expected, reinterpret_cast(funPtr)); } TEST_F(clIcdGetPlatformIDsKHRTests, WhenGettingPlatformIdThenCorrectIdIsReturned) { cl_uint numPlatforms = 0; cl_uint numPlatformsIcd = 0; auto retVal = clGetPlatformIDs(0, nullptr, &numPlatforms); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clIcdGetPlatformIDsKHR(0, nullptr, &numPlatformsIcd); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(numPlatforms, numPlatformsIcd); std::unique_ptr platforms(reinterpret_cast(malloc(sizeof(cl_platform_id) * numPlatforms)), free); ASSERT_NE(nullptr, platforms); std::unique_ptr platformsIcd(reinterpret_cast(malloc(sizeof(cl_platform_id) * numPlatforms)), free); ASSERT_NE(nullptr, platforms); retVal = clGetPlatformIDs(numPlatforms, platforms.get(), nullptr); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clIcdGetPlatformIDsKHR(numPlatformsIcd, platformsIcd.get(), nullptr); ASSERT_EQ(CL_SUCCESS, retVal); for (cl_uint i = 0; i < std::min(numPlatforms, numPlatformsIcd); i++) { EXPECT_EQ(platforms.get()[i], platformsIcd.get()[i]); } } TEST_F(clIcdGetPlatformIDsKHRTests, WhenCheckingExtensionStringThenClKhrIcdIsIncluded) { const ClDeviceInfo &caps = pPlatform->getClDevice(0)->getDeviceInfo(); EXPECT_NE(std::string::npos, std::string(caps.deviceExtensions).find("cl_khr_icd")); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_intel_accelerator_tests.inl000066400000000000000000000231351422164147700304670ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/accelerators/intel_accelerator.h" #include "opencl/source/accelerators/intel_motion_estimation.h" #include "opencl/source/context/context.h" #include "opencl/test/unit_test/api/cl_api_tests.h" using namespace NEO; namespace ULT { struct IntelAcceleratorTest : public api_tests { public: IntelAcceleratorTest() {} void SetUp() override { api_tests::SetUp(); } void TearDown() override { api_tests::TearDown(); } protected: cl_accelerator_intel accelerator = nullptr; cl_motion_estimation_desc_intel desc; cl_int retVal = 0xEEEEEEEEu; cl_int result = -1; }; struct IntelAcceleratorTestWithValidDescriptor : IntelAcceleratorTest { IntelAcceleratorTestWithValidDescriptor() {} void SetUp() override { IntelAcceleratorTest::SetUp(); desc.mb_block_type = CL_ME_MB_TYPE_16x16_INTEL; desc.subpixel_mode = CL_ME_SUBPIXEL_MODE_QPEL_INTEL; desc.sad_adjust_mode = CL_ME_SAD_ADJUST_MODE_HAAR_INTEL; desc.search_path_type = CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL; } void TearDown() override { IntelAcceleratorTest::TearDown(); } }; TEST_F(IntelAcceleratorTestWithValidDescriptor, GivenInvalidAcceleratorTypeWhenCreatingAcceleratorThenClInvalidAcceleratorTypeIntelErrorIsReturned) { auto INVALID_ACCELERATOR_TYPE = static_cast(0xEEEEEEEE); accelerator = clCreateAcceleratorINTEL( pContext, INVALID_ACCELERATOR_TYPE, sizeof(cl_motion_estimation_desc_intel), &desc, &retVal); EXPECT_EQ(static_cast(nullptr), accelerator); EXPECT_EQ(CL_INVALID_ACCELERATOR_TYPE_INTEL, retVal); } TEST_F(IntelAcceleratorTestWithValidDescriptor, GivenInvalidContextWhenCreatingAcceleratorThenClInvalidContextErrorIsReturned) { accelerator = clCreateAcceleratorINTEL( nullptr, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(cl_motion_estimation_desc_intel), &desc, &retVal); EXPECT_EQ(static_cast(nullptr), accelerator); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } TEST_F(IntelAcceleratorTest, GivenNullAcceleratorWhenReleasingAcceleratorThenClInvalidAcceleratorIntelErrorIsReturned) { result = clReleaseAcceleratorINTEL(nullptr); EXPECT_EQ(CL_INVALID_ACCELERATOR_INTEL, result); } TEST_F(IntelAcceleratorTest, GivenNullAcceleratorWhenRetainingAcceleratorThenClInvalidAcceleratorIntelErrorIsReturned) { result = clRetainAcceleratorINTEL(nullptr); EXPECT_EQ(CL_INVALID_ACCELERATOR_INTEL, result); } struct IntelAcceleratorGetInfoTest : IntelAcceleratorTestWithValidDescriptor { IntelAcceleratorGetInfoTest() {} void SetUp() override { IntelAcceleratorTestWithValidDescriptor::SetUp(); accelerator = clCreateAcceleratorINTEL( pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(cl_motion_estimation_desc_intel), &desc, &retVal); ASSERT_NE(nullptr, accelerator); ASSERT_EQ(CL_SUCCESS, retVal); } void TearDown() override { result = clReleaseAcceleratorINTEL(accelerator); ASSERT_EQ(CL_SUCCESS, result); IntelAcceleratorTestWithValidDescriptor::TearDown(); } protected: size_t param_value_size_ret = 0; }; TEST_F(IntelAcceleratorTest, GivenNullAcceleratorWhenGettingAcceleratorInfoThenClInvalidAcceleratorIntelErrorIsReturned) { result = clGetAcceleratorInfoINTEL( nullptr, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_ACCELERATOR_INTEL, result); } TEST_F(IntelAcceleratorTest, GivenNullAcceleratorWhenGettingAcceleratorInfoThenParamValueAndSizeArePreserved) { cl_uint paramValue = 0xEEEEEEE1u; size_t paramSize = 0xEEEEEEE3u; result = clGetAcceleratorInfoINTEL( nullptr, 0, sizeof(paramValue), ¶mValue, ¶mSize); EXPECT_EQ(CL_INVALID_ACCELERATOR_INTEL, result); // No changes to inputs EXPECT_EQ(static_cast(0xEEEEEEE1u), paramValue); EXPECT_EQ(0xEEEEEEE3u, paramSize); } TEST_F(IntelAcceleratorGetInfoTest, GivenInvalidParamNameWhenGettingAcceleratorInfoThenClInvalidValueErrorIsReturned) { result = clGetAcceleratorInfoINTEL( accelerator, 0xEEEEEEEE, sizeof(cl_uint), nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, result); } TEST_F(IntelAcceleratorGetInfoTest, GivenClAcceleratorReferenceCountIntelWhenGettingAcceleratorInfoThenParamValueSizeRetHasCorrectSize) { result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_REFERENCE_COUNT_INTEL, sizeof(cl_uint), nullptr, ¶m_value_size_ret); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(sizeof(cl_uint), param_value_size_ret); } TEST_F(IntelAcceleratorGetInfoTest, GivenClAcceleratorReferenceCountIntelWhenGettingAcceleratorInfoThenParamValueIsOne) { cl_uint param_value = static_cast(-1); result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_REFERENCE_COUNT_INTEL, sizeof(cl_uint), ¶m_value, nullptr); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(1u, param_value); } TEST_F(IntelAcceleratorGetInfoTest, GivenLongForDescriptorSizeWhenGettingAcceleratorInfoThenCorrectValuesAreReturned) { cl_uint param_value = static_cast(-1); result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_REFERENCE_COUNT_INTEL, sizeof(cl_uint) + 1, ¶m_value, ¶m_value_size_ret); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(sizeof(cl_uint), param_value_size_ret); EXPECT_EQ(1u, param_value); } TEST_F(IntelAcceleratorGetInfoTest, GivenShortForDescriptorSizeWhenGettingAcceleratorInfoThenClInvalidValueErrorIsReturned) { cl_uint param_value = static_cast(-1); result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_REFERENCE_COUNT_INTEL, sizeof(cl_uint) - 1, ¶m_value, ¶m_value_size_ret); EXPECT_EQ(CL_INVALID_VALUE, result); } TEST_F(IntelAcceleratorGetInfoTest, GivenZeroForDescriptorSizeGivenLongForDescriptorSizeWhenGettingAcceleratorInfoThenCorrectValuesAreReturned) { result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_REFERENCE_COUNT_INTEL, 0, nullptr, ¶m_value_size_ret); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(sizeof(cl_uint), param_value_size_ret); } TEST_F(IntelAcceleratorGetInfoTest, GivenCallToRetainAcceleratorWhenGettingAcceleratorInfoThenParamValueIsTwo) { cl_uint param_value = static_cast(-1); result = clRetainAcceleratorINTEL(accelerator); ASSERT_EQ(CL_SUCCESS, result); result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_REFERENCE_COUNT_INTEL, sizeof(cl_uint), ¶m_value, ¶m_value_size_ret); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(2u, param_value); result = clReleaseAcceleratorINTEL(accelerator); EXPECT_EQ(CL_SUCCESS, result); result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_REFERENCE_COUNT_INTEL, sizeof(cl_uint), ¶m_value, ¶m_value_size_ret); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(1u, param_value); } TEST_F(IntelAcceleratorGetInfoTest, GivenNullPtrForParamValueWhenGettingAcceleratorInfoThenClSuccessIsReturned) { result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_CONTEXT_INTEL, sizeof(cl_context), nullptr, ¶m_value_size_ret); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(sizeof(cl_context), param_value_size_ret); } TEST_F(IntelAcceleratorGetInfoTest, GivenLongForDescriptorSizeWhenGettingAcceleratorContextInfoThenCorrectValuesAreReturned) { cl_context param_value = reinterpret_cast(-1); result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_CONTEXT_INTEL, sizeof(cl_context) + 1, ¶m_value, ¶m_value_size_ret); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(sizeof(cl_context), param_value_size_ret); } TEST_F(IntelAcceleratorGetInfoTest, GivenAcceleratorContextIntelWhenGettingAcceleratorInfoThenCorrectValuesAreReturned) { cl_context param_value = reinterpret_cast(-1); result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_CONTEXT_INTEL, sizeof(cl_context), ¶m_value, ¶m_value_size_ret); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(sizeof(cl_context), param_value_size_ret); cl_context referenceContext = static_cast(pContext); EXPECT_EQ(referenceContext, param_value); } TEST_F(IntelAcceleratorGetInfoTest, GivenShortForDescriptorSizeWhenGettingAcceleratorContextInfoThenClInvalidValueErrorIsReturned) { cl_context param_value = reinterpret_cast(-1); result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_CONTEXT_INTEL, sizeof(cl_context) - 1, ¶m_value, ¶m_value_size_ret); EXPECT_EQ(CL_INVALID_VALUE, result); } TEST_F(IntelAcceleratorGetInfoTest, GivenZeroForDescriptorSizeGivenLongForDescriptorSizeWhenGettingAcceleratorContextInfoThenCorrectValuesAreReturned) { result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_CONTEXT_INTEL, 0, nullptr, ¶m_value_size_ret); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(sizeof(cl_context), param_value_size_ret); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_intel_motion_estimation.cpp000066400000000000000000000424271422164147700305270ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/accelerators/intel_accelerator.h" #include "opencl/source/accelerators/intel_motion_estimation.h" #include "opencl/source/context/context.h" #include "opencl/test/unit_test/api/cl_api_tests.h" using namespace NEO; namespace ULT { struct IntelMotionEstimationTest : public api_tests { public: IntelMotionEstimationTest() {} void SetUp() override { api_tests::SetUp(); desc.mb_block_type = CL_ME_MB_TYPE_16x16_INTEL; desc.subpixel_mode = CL_ME_SUBPIXEL_MODE_QPEL_INTEL; desc.sad_adjust_mode = CL_ME_SAD_ADJUST_MODE_HAAR_INTEL; desc.search_path_type = CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL; } void TearDown() override { api_tests::TearDown(); } protected: cl_accelerator_intel accelerator = nullptr; cl_motion_estimation_desc_intel desc; cl_int retVal = 0xEEEEEEEEu; cl_int result = -1; }; typedef IntelMotionEstimationTest IntelMotionEstimationNegativeTest; TEST_F(IntelMotionEstimationNegativeTest, GivenNullDescriptorWhenCreatingAcceleratorThenClInvalidAcceleratorDescriptorIntelErrorIsReturned) { accelerator = clCreateAcceleratorINTEL( pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(cl_motion_estimation_desc_intel), nullptr, &retVal); EXPECT_EQ(CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL, retVal); ASSERT_EQ(static_cast(nullptr), accelerator); } TEST_F(IntelMotionEstimationNegativeTest, GivenDescriptorSizeLongerThanActualWhenCreatingAcceleratorThenClInvalidAcceleratorDescriptorIntelErrorIsReturned) { accelerator = clCreateAcceleratorINTEL( pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(cl_motion_estimation_desc_intel) + 1, &desc, &retVal); EXPECT_EQ(CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL, retVal); ASSERT_EQ(static_cast(nullptr), accelerator); } TEST_F(IntelMotionEstimationNegativeTest, GivenDescriptorSizeShorterThanActualWhenCreatingAcceleratorThenClInvalidAcceleratorDescriptorIntelErrorIsReturned) { accelerator = clCreateAcceleratorINTEL( pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(cl_motion_estimation_desc_intel) - 1, &desc, &retVal); EXPECT_EQ(CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL, retVal); ASSERT_EQ(static_cast(nullptr), accelerator); } TEST_F(IntelMotionEstimationNegativeTest, GivenInvalidMacroBlockTypeWhenCreatingAcceleratorThenClInvalidAcceleratorDescriptorIntelErrorIsReturned) { desc.mb_block_type = 0xEEEEEEEE; accelerator = clCreateAcceleratorINTEL( pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(cl_motion_estimation_desc_intel), nullptr, &retVal); EXPECT_EQ(CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL, retVal); ASSERT_EQ(static_cast(nullptr), accelerator); } TEST_F(IntelMotionEstimationNegativeTest, GivenInvalidSubPixelModeWhenCreatingAcceleratorThenClInvalidAcceleratorDescriptorIntelErrorIsReturned) { desc.subpixel_mode = 0xEEEEEEEE; accelerator = clCreateAcceleratorINTEL( pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(cl_motion_estimation_desc_intel), nullptr, &retVal); EXPECT_EQ(CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL, retVal); ASSERT_EQ(static_cast(nullptr), accelerator); } TEST_F(IntelMotionEstimationNegativeTest, GivenInvalidSadAdjustModeWhenCreatingAcceleratorThenClInvalidAcceleratorDescriptorIntelErrorIsReturned) { desc.sad_adjust_mode = 0xEEEEEEEE; accelerator = clCreateAcceleratorINTEL( pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(cl_motion_estimation_desc_intel), nullptr, &retVal); EXPECT_EQ(CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL, retVal); ASSERT_EQ(static_cast(nullptr), accelerator); } TEST_F(IntelMotionEstimationNegativeTest, GivenInvalidSearchPathTypeWhenCreatingAcceleratorThenClInvalidAcceleratorDescriptorIntelErrorIsReturned) { desc.search_path_type = 0xEEEEEEEE; accelerator = clCreateAcceleratorINTEL( pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(cl_motion_estimation_desc_intel), nullptr, &retVal); EXPECT_EQ(CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL, retVal); ASSERT_EQ(static_cast(nullptr), accelerator); } TEST_F(IntelMotionEstimationTest, GivenValidArgumentsWhenCreatingAcceleratorThenAcceleratorIsCreated) { accelerator = clCreateAcceleratorINTEL( pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(cl_motion_estimation_desc_intel), &desc, &retVal); ASSERT_NE(nullptr, accelerator); EXPECT_EQ(CL_SUCCESS, retVal); auto acc = static_cast(accelerator); delete acc; } TEST_F(IntelMotionEstimationTest, GivenNullReturnWhenCreatingAcceleratorThenAcceleratorIsCreated) { accelerator = clCreateAcceleratorINTEL( pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(cl_motion_estimation_desc_intel), &desc, nullptr); ASSERT_NE(nullptr, accelerator); auto acc = static_cast(accelerator); delete acc; } TEST_F(IntelMotionEstimationTest, GivenValidAcceleratorWhenReleasingAcceleratorThenSuccessIsReturned) { accelerator = clCreateAcceleratorINTEL( pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(cl_motion_estimation_desc_intel), &desc, &retVal); ASSERT_NE(nullptr, accelerator); ASSERT_EQ(CL_SUCCESS, retVal); result = clReleaseAcceleratorINTEL(accelerator); EXPECT_EQ(CL_SUCCESS, result); } TEST_F(IntelMotionEstimationTest, GivenValidAcceleratorWhenRetainingAndReleasingAcceleratorThenReferenceCountIsAdjustedCorrectly) { accelerator = clCreateAcceleratorINTEL( pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(cl_motion_estimation_desc_intel), &desc, &retVal); ASSERT_NE(nullptr, accelerator); ASSERT_EQ(CL_SUCCESS, retVal); auto pAccelerator = static_cast(accelerator); ASSERT_EQ(1, pAccelerator->getReference()); result = clRetainAcceleratorINTEL(accelerator); ASSERT_EQ(CL_SUCCESS, result); ASSERT_EQ(2, pAccelerator->getReference()); result = clReleaseAcceleratorINTEL(accelerator); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(1, pAccelerator->getReference()); result = clReleaseAcceleratorINTEL(accelerator); EXPECT_EQ(CL_SUCCESS, result); } struct IntelMotionEstimationGetInfoTest : public IntelMotionEstimationTest { public: IntelMotionEstimationGetInfoTest() : type_returned(static_cast(-1)), param_value_size_ret(static_cast(-1)) {} void SetUp() override { IntelMotionEstimationTest::SetUp(); descReturn.mb_block_type = static_cast(-1); descReturn.subpixel_mode = static_cast(-1); descReturn.sad_adjust_mode = static_cast(-1); descReturn.search_path_type = static_cast(-1); accelerator = clCreateAcceleratorINTEL( pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(cl_motion_estimation_desc_intel), &desc, &retVal); ASSERT_NE(nullptr, accelerator); ASSERT_EQ(CL_SUCCESS, retVal); } void TearDown() override { result = clReleaseAcceleratorINTEL(accelerator); EXPECT_EQ(CL_SUCCESS, result); IntelMotionEstimationTest::TearDown(); } protected: cl_motion_estimation_desc_intel descReturn; cl_accelerator_type_intel type_returned; size_t param_value_size_ret; }; TEST_F(IntelMotionEstimationGetInfoTest, GivenValidParamsWhenGettingAcceleratorInfoThenDescriptorContainsCorrectInformation) { result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_DESCRIPTOR_INTEL, sizeof(cl_motion_estimation_desc_intel), // exact &descReturn, ¶m_value_size_ret); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(sizeof(cl_motion_estimation_desc_intel), param_value_size_ret); EXPECT_EQ(static_cast(CL_ME_MB_TYPE_16x16_INTEL), descReturn.mb_block_type); EXPECT_EQ(static_cast(CL_ME_SUBPIXEL_MODE_QPEL_INTEL), descReturn.subpixel_mode); EXPECT_EQ(static_cast(CL_ME_SAD_ADJUST_MODE_HAAR_INTEL), descReturn.sad_adjust_mode); EXPECT_EQ(static_cast(CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL), descReturn.search_path_type); } TEST_F(IntelMotionEstimationGetInfoTest, GivenTooShortDescriptorLengthWhenGettingAcceleratorInfoThenClInvalidValueErrorIsReturned) { result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_DESCRIPTOR_INTEL, sizeof(cl_motion_estimation_desc_intel) - 1, // short &descReturn, ¶m_value_size_ret); EXPECT_EQ(CL_INVALID_VALUE, result); } TEST_F(IntelMotionEstimationGetInfoTest, GivenDescriptorLengthZeroWhenGettingAcceleratorInfoThenClInvalidValueErrorIsReturned) { result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_DESCRIPTOR_INTEL, 0, &descReturn, ¶m_value_size_ret); EXPECT_EQ(CL_INVALID_VALUE, result); } TEST_F(IntelMotionEstimationGetInfoTest, GivenInvalidParametersWhenGettingAcceleratorInfoThenValueSizeRetIsNotUpdated) { param_value_size_ret = 0x1234; result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_DESCRIPTOR_INTEL, 0, &descReturn, ¶m_value_size_ret); EXPECT_EQ(CL_INVALID_VALUE, result); EXPECT_EQ(0x1234u, param_value_size_ret); } TEST_F(IntelMotionEstimationGetInfoTest, GivenLongerDescriptorLengthWhenGettingAcceleratorInfoThenCorrectDescriptorLengthIsReturned) { result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_DESCRIPTOR_INTEL, sizeof(cl_motion_estimation_desc_intel) + 1, // long &descReturn, ¶m_value_size_ret); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(sizeof(cl_motion_estimation_desc_intel), param_value_size_ret); } TEST_F(IntelMotionEstimationGetInfoTest, GivenDescriptorLengthZeroAndDescriptorNullWhenGettingAcceleratorInfoThenCorrectDescriptorLengthIsReturned) { result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_DESCRIPTOR_INTEL, 0, // query required size w/nullptr return nullptr, ¶m_value_size_ret); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(sizeof(cl_motion_estimation_desc_intel), param_value_size_ret); } TEST_F(IntelMotionEstimationGetInfoTest, GivenAcceleratorTypeWhenGettingAcceleratorInfoThenAcceleratorTypeMotionEstimationIntelIsReturned) { ASSERT_EQ(sizeof(cl_accelerator_type_intel), sizeof(cl_uint)); result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_TYPE_INTEL, sizeof(cl_uint), &type_returned, ¶m_value_size_ret); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(sizeof(cl_accelerator_type_intel), param_value_size_ret); EXPECT_EQ(static_cast(CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL), type_returned); } TEST_F(IntelMotionEstimationGetInfoTest, GivenAcceleratorTypeIntelWhenGettingAcceleratorInfoThenClAcceleratorTypeMotionEstimationIntelIsReturned) { result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_TYPE_INTEL, sizeof(cl_uint), // exact &type_returned, ¶m_value_size_ret); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(sizeof(cl_accelerator_type_intel), param_value_size_ret); EXPECT_EQ(static_cast(CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL), type_returned); } TEST_F(IntelMotionEstimationGetInfoTest, GivenAcceleratorTypeIntelAndTooShortTypeLengthWhenGettingAcceleratorInfoThenClInvalidValueIsReturned) { result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_TYPE_INTEL, sizeof(cl_uint) - 1, // short &type_returned, ¶m_value_size_ret); EXPECT_EQ(CL_INVALID_VALUE, result); } TEST_F(IntelMotionEstimationGetInfoTest, GivenAcceleratorTypeIntelAndTypeLengthZeroWhenGettingAcceleratorInfoThenClInvalidValueIsReturned) { result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_TYPE_INTEL, 0, // very short &type_returned, ¶m_value_size_ret); EXPECT_EQ(CL_INVALID_VALUE, result); } TEST_F(IntelMotionEstimationGetInfoTest, GivenAcceleratorTypeIntelAndTooLongTypeLengthWhenGettingAcceleratorInfoThenCorrectLengthIsReturned) { result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_TYPE_INTEL, sizeof(cl_uint) + 1, // long &type_returned, ¶m_value_size_ret); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(sizeof(cl_accelerator_type_intel), param_value_size_ret); } TEST_F(IntelMotionEstimationGetInfoTest, GivenAcceleratorTypeIntelAndNullTypeWhenGettingAcceleratorInfoThenCorrectLengthIsReturned) { result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_TYPE_INTEL, 0, nullptr, ¶m_value_size_ret); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(sizeof(cl_accelerator_type_intel), param_value_size_ret); } TEST_F(IntelMotionEstimationTest, GivenDescriptor8x8IntegerNone2x2WhenCreatingAcceleratorThenSuccessIsReturned) { desc.mb_block_type = CL_ME_MB_TYPE_8x8_INTEL; desc.subpixel_mode = CL_ME_SUBPIXEL_MODE_INTEGER_INTEL; desc.sad_adjust_mode = CL_ME_SAD_ADJUST_MODE_NONE_INTEL; desc.search_path_type = CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL; accelerator = clCreateAcceleratorINTEL( pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(cl_motion_estimation_desc_intel), &desc, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); result = clReleaseAcceleratorINTEL(accelerator); EXPECT_EQ(CL_SUCCESS, result); } TEST_F(IntelMotionEstimationTest, GivenDescriptor4x4HpelHaar16x12WhenCreatingAcceleratorThenSuccessIsReturned) { desc.mb_block_type = CL_ME_MB_TYPE_4x4_INTEL; desc.subpixel_mode = CL_ME_SUBPIXEL_MODE_HPEL_INTEL; desc.sad_adjust_mode = CL_ME_SAD_ADJUST_MODE_HAAR_INTEL; desc.search_path_type = CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL; accelerator = clCreateAcceleratorINTEL( pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(cl_motion_estimation_desc_intel), &desc, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); result = clReleaseAcceleratorINTEL(accelerator); EXPECT_EQ(CL_SUCCESS, result); } TEST_F(IntelMotionEstimationTest, GivenDescriptor16x16HpelHaar4x4WhenCreatingAcceleratorThenSuccessIsReturned) { desc.mb_block_type = CL_ME_MB_TYPE_16x16_INTEL; desc.subpixel_mode = CL_ME_SUBPIXEL_MODE_QPEL_INTEL; desc.sad_adjust_mode = CL_ME_SAD_ADJUST_MODE_HAAR_INTEL; desc.search_path_type = CL_ME_SEARCH_PATH_RADIUS_4_4_INTEL; accelerator = clCreateAcceleratorINTEL( pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(cl_motion_estimation_desc_intel), &desc, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); result = clReleaseAcceleratorINTEL(accelerator); EXPECT_EQ(CL_SUCCESS, result); } TEST_F(IntelMotionEstimationNegativeTest, GivenInvalidBlockTypeWhenCreatingAcceleratorThenClInvalidAcceleratorDescriptorIntelErrorIsReturned) { desc.mb_block_type = static_cast(-1); accelerator = clCreateAcceleratorINTEL( pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(cl_motion_estimation_desc_intel), &desc, &retVal); EXPECT_EQ(CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL, retVal); } TEST_F(IntelMotionEstimationNegativeTest, GivenInvalidSubpixelModeWhenCreatingAcceleratorThenClInvalidAcceleratorDescriptorIntelErrorIsReturned) { desc.subpixel_mode = static_cast(-1); accelerator = clCreateAcceleratorINTEL( pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(cl_motion_estimation_desc_intel), &desc, &retVal); EXPECT_EQ(CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL, retVal); } TEST_F(IntelMotionEstimationNegativeTest, GivenInvalidAdjustModeWhenCreatingAcceleratorThenClInvalidAcceleratorDescriptorIntelErrorIsReturned) { desc.sad_adjust_mode = static_cast(-1); accelerator = clCreateAcceleratorINTEL( pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(cl_motion_estimation_desc_intel), &desc, &retVal); EXPECT_EQ(CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL, retVal); } TEST_F(IntelMotionEstimationNegativeTest, GivenInvalidPathTypeWhenCreatingAcceleratorThenClInvalidAcceleratorDescriptorIntelErrorIsReturned) { desc.search_path_type = static_cast(-1); accelerator = clCreateAcceleratorINTEL( pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(cl_motion_estimation_desc_intel), &desc, &retVal); EXPECT_EQ(CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_intel_tracing_tests.inl000066400000000000000000000733441422164147700276410ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/tracing/tracing_api.h" #include "opencl/source/tracing/tracing_notify.h" #include "opencl/test/unit_test/api/cl_api_tests.h" using namespace NEO; namespace ULT { struct IntelTracingTest : public api_tests { public: IntelTracingTest() {} void SetUp() override { api_tests::SetUp(); } void TearDown() override { api_tests::TearDown(); } protected: static void callback(cl_function_id fid, cl_callback_data *callbackData, void *userData) { ASSERT_NE(nullptr, userData); IntelTracingTest *base = (IntelTracingTest *)userData; base->vcallback(fid, callbackData, nullptr); } virtual void vcallback(cl_function_id fid, cl_callback_data *callbackData, void *userData) {} protected: cl_tracing_handle handle = nullptr; cl_int status = CL_SUCCESS; }; TEST_F(IntelTracingTest, GivenInvalidDeviceWhenCreatingTracingHandleThenInvalidValueErrorIsReturned) { status = clCreateTracingHandleINTEL(nullptr, callback, nullptr, &handle); EXPECT_EQ(static_cast(nullptr), handle); EXPECT_EQ(CL_INVALID_VALUE, status); } TEST_F(IntelTracingTest, GivenInvalidCallbackExpectFailWhenCreatingTracingHandleThenInvalidValueErrorIsReturned) { status = clCreateTracingHandleINTEL(testedClDevice, nullptr, nullptr, &handle); EXPECT_EQ(static_cast(nullptr), handle); EXPECT_EQ(CL_INVALID_VALUE, status); } TEST_F(IntelTracingTest, GivenInvalidHandlePointerWhenCreatingTracingHandleThenInvalidValueErrorIsReturned) { status = clCreateTracingHandleINTEL(testedClDevice, callback, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, status); } TEST_F(IntelTracingTest, GivenNullHandleWhenCallingTracingFunctionThenInvalidValueErrorIsReturned) { status = clSetTracingPointINTEL(nullptr, CL_FUNCTION_clBuildProgram, CL_TRUE); EXPECT_EQ(CL_INVALID_VALUE, status); status = clDestroyTracingHandleINTEL(nullptr); EXPECT_EQ(CL_INVALID_VALUE, status); status = clEnableTracingINTEL(nullptr); EXPECT_EQ(CL_INVALID_VALUE, status); status = clDisableTracingINTEL(nullptr); EXPECT_EQ(CL_INVALID_VALUE, status); status = clSetTracingPointINTEL(nullptr, CL_FUNCTION_clBuildProgram, CL_FALSE); EXPECT_EQ(CL_INVALID_VALUE, status); cl_bool enabled = CL_FALSE; status = clGetTracingStateINTEL(nullptr, &enabled); EXPECT_EQ(CL_INVALID_VALUE, status); } TEST_F(IntelTracingTest, GivenInactiveHandleWhenCallingTracingFunctionThenInvalidValueErrorIsReturned) { status = clCreateTracingHandleINTEL(testedClDevice, callback, nullptr, &handle); EXPECT_EQ(CL_SUCCESS, status); status = clDisableTracingINTEL(handle); EXPECT_EQ(CL_INVALID_VALUE, status); status = clDestroyTracingHandleINTEL(handle); EXPECT_EQ(CL_SUCCESS, status); } TEST_F(IntelTracingTest, GivenTooManyHandlesWhenEnablingTracingFunctionThenOutOfResourcesErrorIsReturned) { cl_tracing_handle handle[HostSideTracing::TRACING_MAX_HANDLE_COUNT + 1] = {nullptr}; for (uint32_t i = 0; i < HostSideTracing::TRACING_MAX_HANDLE_COUNT + 1; ++i) { status = clCreateTracingHandleINTEL(testedClDevice, callback, nullptr, &(handle[i])); EXPECT_EQ(CL_SUCCESS, status); } for (uint32_t i = 0; i < HostSideTracing::TRACING_MAX_HANDLE_COUNT; ++i) { status = clEnableTracingINTEL(handle[i]); EXPECT_EQ(CL_SUCCESS, status); } status = clEnableTracingINTEL(handle[HostSideTracing::TRACING_MAX_HANDLE_COUNT]); EXPECT_EQ(CL_OUT_OF_RESOURCES, status); for (uint32_t i = 0; i < HostSideTracing::TRACING_MAX_HANDLE_COUNT; ++i) { status = clDisableTracingINTEL(handle[i]); EXPECT_EQ(CL_SUCCESS, status); } for (uint32_t i = 0; i < HostSideTracing::TRACING_MAX_HANDLE_COUNT + 1; ++i) { status = clDestroyTracingHandleINTEL(handle[i]); EXPECT_EQ(CL_SUCCESS, status); } } TEST_F(IntelTracingTest, GivenInactiveHandleWhenDisablingTracingThenInvalidValueIsReturned) { cl_tracing_handle handle[HostSideTracing::TRACING_MAX_HANDLE_COUNT + 1] = {nullptr}; for (uint32_t i = 0; i < HostSideTracing::TRACING_MAX_HANDLE_COUNT + 1; ++i) { status = clCreateTracingHandleINTEL(testedClDevice, callback, nullptr, &(handle[i])); EXPECT_EQ(CL_SUCCESS, status); } for (uint32_t i = 0; i < HostSideTracing::TRACING_MAX_HANDLE_COUNT; ++i) { status = clEnableTracingINTEL(handle[i]); EXPECT_EQ(CL_SUCCESS, status); } status = clDisableTracingINTEL(handle[HostSideTracing::TRACING_MAX_HANDLE_COUNT]); EXPECT_EQ(CL_INVALID_VALUE, status); cl_bool enable = CL_TRUE; status = clGetTracingStateINTEL(handle[HostSideTracing::TRACING_MAX_HANDLE_COUNT], &enable); EXPECT_EQ(CL_SUCCESS, status); EXPECT_EQ(static_cast(CL_FALSE), enable); for (uint32_t i = 0; i < HostSideTracing::TRACING_MAX_HANDLE_COUNT; ++i) { status = clDisableTracingINTEL(handle[i]); EXPECT_EQ(CL_SUCCESS, status); } for (uint32_t i = 0; i < HostSideTracing::TRACING_MAX_HANDLE_COUNT + 1; ++i) { status = clDestroyTracingHandleINTEL(handle[i]); EXPECT_EQ(CL_SUCCESS, status); } } TEST_F(IntelTracingTest, GivenValidParamsWhenCallingTracingFunctionsThenSuccessIsReturned) { status = clCreateTracingHandleINTEL(testedClDevice, callback, this, &handle); EXPECT_EQ(CL_SUCCESS, status); status = clSetTracingPointINTEL(handle, CL_FUNCTION_clBuildProgram, CL_TRUE); EXPECT_EQ(CL_SUCCESS, status); cl_bool enabled = CL_FALSE; status = clGetTracingStateINTEL(handle, &enabled); EXPECT_EQ(CL_SUCCESS, status); EXPECT_EQ(static_cast(CL_FALSE), enabled); status = clEnableTracingINTEL(handle); EXPECT_EQ(CL_SUCCESS, status); status = clGetTracingStateINTEL(handle, &enabled); EXPECT_EQ(CL_SUCCESS, status); EXPECT_EQ(static_cast(CL_TRUE), enabled); status = clDisableTracingINTEL(handle); EXPECT_EQ(CL_SUCCESS, status); status = clGetTracingStateINTEL(handle, &enabled); EXPECT_EQ(CL_SUCCESS, status); EXPECT_EQ(static_cast(CL_FALSE), enabled); status = clSetTracingPointINTEL(handle, CL_FUNCTION_clBuildProgram, CL_FALSE); EXPECT_EQ(CL_SUCCESS, status); status = clDestroyTracingHandleINTEL(handle); EXPECT_EQ(CL_SUCCESS, status); } TEST_F(IntelTracingTest, GivenTwoHandlesWhenCallingTracingFunctionsThenSuccessIsReturned) { cl_tracing_handle handle1 = nullptr; status = clCreateTracingHandleINTEL(testedClDevice, callback, this, &handle1); EXPECT_EQ(CL_SUCCESS, status); cl_tracing_handle handle2 = nullptr; status = clCreateTracingHandleINTEL(testedClDevice, callback, this, &handle2); EXPECT_EQ(CL_SUCCESS, status); status = clSetTracingPointINTEL(handle1, CL_FUNCTION_clBuildProgram, CL_TRUE); EXPECT_EQ(CL_SUCCESS, status); status = clSetTracingPointINTEL(handle2, CL_FUNCTION_clBuildProgram, CL_TRUE); EXPECT_EQ(CL_SUCCESS, status); status = clEnableTracingINTEL(handle1); EXPECT_EQ(CL_SUCCESS, status); status = clEnableTracingINTEL(handle2); EXPECT_EQ(CL_SUCCESS, status); cl_bool enabled = CL_FALSE; status = clGetTracingStateINTEL(handle1, &enabled); EXPECT_EQ(CL_SUCCESS, status); EXPECT_EQ(static_cast(CL_TRUE), enabled); status = clGetTracingStateINTEL(handle2, &enabled); EXPECT_EQ(CL_SUCCESS, status); EXPECT_EQ(static_cast(CL_TRUE), enabled); status = clDisableTracingINTEL(handle1); EXPECT_EQ(CL_SUCCESS, status); status = clDisableTracingINTEL(handle2); EXPECT_EQ(CL_SUCCESS, status); status = clDestroyTracingHandleINTEL(handle1); EXPECT_EQ(CL_SUCCESS, status); status = clDestroyTracingHandleINTEL(handle2); EXPECT_EQ(CL_SUCCESS, status); } struct IntelAllTracingTest : public IntelTracingTest { public: IntelAllTracingTest() {} void SetUp() override { IntelTracingTest::SetUp(); status = clCreateTracingHandleINTEL(testedClDevice, callback, this, &handle); ASSERT_NE(nullptr, handle); ASSERT_EQ(CL_SUCCESS, status); for (uint32_t i = 0; i < CL_FUNCTION_COUNT; ++i) { status = clSetTracingPointINTEL(handle, static_cast(i), CL_TRUE); ASSERT_EQ(CL_SUCCESS, status); } status = clEnableTracingINTEL(handle); ASSERT_EQ(CL_SUCCESS, status); } void TearDown() override { status = clDisableTracingINTEL(handle); ASSERT_EQ(CL_SUCCESS, status); status = clDestroyTracingHandleINTEL(handle); ASSERT_EQ(CL_SUCCESS, status); IntelTracingTest::TearDown(); } protected: void vcallback(cl_function_id fid, cl_callback_data *callbackData, void *userData) override { if (fid == functionId) { if (callbackData->site == CL_CALLBACK_SITE_ENTER) { ++enterCount; } else if (callbackData->site == CL_CALLBACK_SITE_EXIT) { ++exitCount; } } } uint16_t callFunctions() { uint16_t count = 0; ++count; functionId = CL_FUNCTION_clBuildProgram; clBuildProgram(0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clCloneKernel; clCloneKernel(0, 0); ++count; functionId = CL_FUNCTION_clCompileProgram; clCompileProgram(0, 0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clCreateBuffer; clCreateBuffer(0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clCreateCommandQueue; clCreateCommandQueue(0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clCreateCommandQueueWithProperties; clCreateCommandQueueWithProperties(0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clCreateContext; clCreateContext(0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clCreateContextFromType; clCreateContextFromType(0, 0, 0, 0, 0); ++count; cl_image_desc imageDesc = {0}; functionId = CL_FUNCTION_clCreateImage; clCreateImage(0, 0, 0, &imageDesc, 0, 0); ++count; functionId = CL_FUNCTION_clCreateImage2D; clCreateImage2D(0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clCreateImage3D; clCreateImage3D(0, 0, 0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clCreateKernel; clCreateKernel(0, 0, 0); ++count; functionId = CL_FUNCTION_clCreateKernelsInProgram; clCreateKernelsInProgram(0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clCreatePipe; clCreatePipe(0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clCreateProgramWithBinary; const size_t length = 32; unsigned char binary[length] = {0}; clCreateProgramWithBinary(0, 0, &(testedClDevice), &length, reinterpret_cast(&binary), 0, 0); ++count; functionId = CL_FUNCTION_clCreateProgramWithBuiltInKernels; clCreateProgramWithBuiltInKernels(0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clCreateProgramWithIL; clCreateProgramWithIL(0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clCreateProgramWithSource; clCreateProgramWithSource(0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clCreateSampler; clCreateSampler(0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clCreateSamplerWithProperties; clCreateSamplerWithProperties(0, 0, 0); ++count; functionId = CL_FUNCTION_clCreateSubBuffer; clCreateSubBuffer(0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clCreateUserEvent; clCreateUserEvent(0, 0); ++count; functionId = CL_FUNCTION_clEnqueueBarrier; clEnqueueBarrier(0); ++count; functionId = CL_FUNCTION_clEnqueueBarrierWithWaitList; clEnqueueBarrierWithWaitList(0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueCopyBuffer; clEnqueueCopyBuffer(0, 0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueCopyBufferRect; clEnqueueCopyBufferRect(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueCopyBufferToImage; clEnqueueCopyBufferToImage(0, 0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueCopyImage; clEnqueueCopyImage(0, 0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueCopyImageToBuffer; clEnqueueCopyImageToBuffer(0, 0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueFillBuffer; clEnqueueFillBuffer(0, 0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueFillImage; clEnqueueFillImage(0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueMapBuffer; clEnqueueMapBuffer(0, 0, 0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueMapImage; clEnqueueMapImage(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueMarker; clEnqueueMarker(0, 0); ++count; functionId = CL_FUNCTION_clEnqueueMarkerWithWaitList; clEnqueueMarkerWithWaitList(0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueMigrateMemObjects; clEnqueueMigrateMemObjects(0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueNDRangeKernel; clEnqueueNDRangeKernel(0, 0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueNativeKernel; clEnqueueNativeKernel(0, 0, 0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueReadBuffer; clEnqueueReadBuffer(0, 0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueReadBufferRect; clEnqueueReadBufferRect(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueReadImage; clEnqueueReadImage(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueSVMFree; clEnqueueSVMFree(0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueSVMMap; clEnqueueSVMMap(0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueSVMMemFill; clEnqueueSVMMemFill(0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueSVMMemcpy; clEnqueueSVMMemcpy(0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueSVMMigrateMem; clEnqueueSVMMigrateMem(0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueSVMUnmap; clEnqueueSVMUnmap(0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueTask; clEnqueueTask(0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueUnmapMemObject; clEnqueueUnmapMemObject(0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueWaitForEvents; clEnqueueWaitForEvents(0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueWriteBuffer; clEnqueueWriteBuffer(0, 0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueWriteBufferRect; clEnqueueWriteBufferRect(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueWriteImage; clEnqueueWriteImage(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clFinish; clFinish(0); ++count; functionId = CL_FUNCTION_clFlush; clFlush(0); ++count; functionId = CL_FUNCTION_clGetCommandQueueInfo; clGetCommandQueueInfo(0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clGetContextInfo; clGetContextInfo(0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clGetDeviceAndHostTimer; clGetDeviceAndHostTimer(0, 0, 0); ++count; functionId = CL_FUNCTION_clGetDeviceIDs; clGetDeviceIDs(0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clGetDeviceInfo; clGetDeviceInfo(0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clGetEventInfo; clGetEventInfo(0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clGetEventProfilingInfo; clGetEventProfilingInfo(0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clGetExtensionFunctionAddress; clGetExtensionFunctionAddress("test"); ++count; functionId = CL_FUNCTION_clGetExtensionFunctionAddressForPlatform; clGetExtensionFunctionAddressForPlatform(0, "test"); ++count; functionId = CL_FUNCTION_clGetHostTimer; clGetHostTimer(0, 0); ++count; functionId = CL_FUNCTION_clGetImageInfo; clGetImageInfo(0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clGetKernelArgInfo; clGetKernelArgInfo(0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clGetKernelInfo; clGetKernelInfo(0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clGetKernelSubGroupInfo; clGetKernelSubGroupInfo(0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clGetKernelWorkGroupInfo; clGetKernelWorkGroupInfo(0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clGetMemObjectInfo; clGetMemObjectInfo(0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clGetPipeInfo; clGetPipeInfo(0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clGetPlatformIDs; clGetPlatformIDs(0, 0, 0); ++count; functionId = CL_FUNCTION_clGetPlatformInfo; clGetPlatformInfo(0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clGetProgramBuildInfo; clGetProgramBuildInfo(0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clGetProgramInfo; clGetProgramInfo(0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clGetSamplerInfo; clGetSamplerInfo(0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clGetSupportedImageFormats; clGetSupportedImageFormats(0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clLinkProgram; clLinkProgram(0, 0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clReleaseCommandQueue; clReleaseCommandQueue(0); ++count; functionId = CL_FUNCTION_clReleaseContext; clReleaseContext(0); ++count; functionId = CL_FUNCTION_clReleaseDevice; clReleaseDevice(0); ++count; functionId = CL_FUNCTION_clReleaseEvent; clReleaseEvent(0); ++count; functionId = CL_FUNCTION_clReleaseKernel; clReleaseKernel(0); ++count; functionId = CL_FUNCTION_clReleaseMemObject; clReleaseMemObject(0); ++count; functionId = CL_FUNCTION_clReleaseProgram; clReleaseProgram(0); ++count; functionId = CL_FUNCTION_clReleaseSampler; clReleaseSampler(0); ++count; functionId = CL_FUNCTION_clRetainCommandQueue; clRetainCommandQueue(0); ++count; functionId = CL_FUNCTION_clRetainContext; clRetainContext(0); ++count; functionId = CL_FUNCTION_clRetainDevice; clRetainDevice(0); ++count; functionId = CL_FUNCTION_clRetainEvent; clRetainEvent(0); ++count; functionId = CL_FUNCTION_clRetainKernel; clRetainKernel(0); ++count; functionId = CL_FUNCTION_clRetainMemObject; clRetainMemObject(0); ++count; functionId = CL_FUNCTION_clRetainProgram; clRetainProgram(0); ++count; functionId = CL_FUNCTION_clRetainSampler; clRetainSampler(0); ++count; functionId = CL_FUNCTION_clSVMAlloc; clSVMAlloc(0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clSVMFree; clSVMFree(0, 0); ++count; functionId = CL_FUNCTION_clSetCommandQueueProperty; clSetCommandQueueProperty(0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clSetDefaultDeviceCommandQueue; clSetDefaultDeviceCommandQueue(0, 0, 0); ++count; functionId = CL_FUNCTION_clSetEventCallback; clSetEventCallback(0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clSetKernelArg; clSetKernelArg(0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clSetKernelArgSVMPointer; clSetKernelArgSVMPointer(0, 0, 0); ++count; functionId = CL_FUNCTION_clSetKernelExecInfo; clSetKernelExecInfo(0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clSetMemObjectDestructorCallback; clSetMemObjectDestructorCallback(0, 0, 0); ++count; functionId = CL_FUNCTION_clSetUserEventStatus; clSetUserEventStatus(0, 0); ++count; functionId = CL_FUNCTION_clUnloadCompiler; clUnloadCompiler(); ++count; functionId = CL_FUNCTION_clUnloadPlatformCompiler; clUnloadPlatformCompiler(0); ++count; functionId = CL_FUNCTION_clWaitForEvents; clWaitForEvents(0, 0); return count; } protected: uint16_t enterCount = 0; uint16_t exitCount = 0; cl_function_id functionId = CL_FUNCTION_COUNT; }; TEST_F(IntelAllTracingTest, GivenValidFunctionWhenTracingThenTracingIsPerformed) { uint16_t count = callFunctions(); EXPECT_EQ(count, enterCount); EXPECT_EQ(count, exitCount); } TEST_F(IntelAllTracingTest, GivenNoFunctionsWhenTracingThenTracingIsNotPerformed) { for (uint32_t i = 0; i < CL_FUNCTION_COUNT; ++i) { status = clSetTracingPointINTEL(handle, static_cast(i), CL_FALSE); EXPECT_EQ(CL_SUCCESS, status); } callFunctions(); EXPECT_EQ(0, enterCount); EXPECT_EQ(0, exitCount); } struct IntelAllTracingWithMaxHandlesTest : public IntelAllTracingTest { public: IntelAllTracingWithMaxHandlesTest() {} void SetUp() override { IntelTracingTest::SetUp(); for (size_t i = 0; i < HostSideTracing::TRACING_MAX_HANDLE_COUNT; ++i) { status = clCreateTracingHandleINTEL(testedClDevice, callback, this, handleList + i); ASSERT_NE(nullptr, handleList[i]); ASSERT_EQ(CL_SUCCESS, status); } for (size_t i = 0; i < HostSideTracing::TRACING_MAX_HANDLE_COUNT; ++i) { for (uint32_t j = 0; j < CL_FUNCTION_COUNT; ++j) { status = clSetTracingPointINTEL(handleList[i], static_cast(j), CL_TRUE); ASSERT_EQ(CL_SUCCESS, status); } } for (size_t i = 0; i < HostSideTracing::TRACING_MAX_HANDLE_COUNT; ++i) { status = clEnableTracingINTEL(handleList[i]); ASSERT_EQ(CL_SUCCESS, status); } } void TearDown() override { for (size_t i = 0; i < HostSideTracing::TRACING_MAX_HANDLE_COUNT; ++i) { status = clDisableTracingINTEL(handleList[i]); ASSERT_EQ(CL_SUCCESS, status); status = clDestroyTracingHandleINTEL(handleList[i]); ASSERT_EQ(CL_SUCCESS, status); } IntelTracingTest::TearDown(); } protected: cl_tracing_handle handleList[HostSideTracing::TRACING_MAX_HANDLE_COUNT] = {nullptr}; }; TEST_F(IntelAllTracingWithMaxHandlesTest, GivenAllFunctionsWithMaxHandlesWhenTracingThenTracingIsPerformed) { uint16_t count = callFunctions(); EXPECT_EQ(count * HostSideTracing::TRACING_MAX_HANDLE_COUNT, enterCount); EXPECT_EQ(count * HostSideTracing::TRACING_MAX_HANDLE_COUNT, exitCount); } struct IntelClGetDeviceInfoTracingCollectTest : public IntelAllTracingTest { public: IntelClGetDeviceInfoTracingCollectTest() {} protected: void call(cl_device_id target) { device = target; status = clGetDeviceInfo(device, CL_DEVICE_VENDOR, 0, nullptr, ¶mValueSizeRet); } void vcallback(cl_function_id fid, cl_callback_data *callbackData, void *userData) override { EXPECT_EQ(CL_FUNCTION_clGetDeviceInfo, fid); EXPECT_NE(nullptr, callbackData); if (callbackData->site == CL_CALLBACK_SITE_ENTER) { correlationId = callbackData->correlationId; EXPECT_NE(nullptr, callbackData->correlationData); callbackData->correlationData[0] = 777ull; } else { EXPECT_EQ(correlationId, callbackData->correlationId); EXPECT_NE(nullptr, callbackData->correlationData); EXPECT_EQ(777ull, callbackData->correlationData[0]); } EXPECT_NE(nullptr, callbackData->functionName); EXPECT_STREQ("clGetDeviceInfo", callbackData->functionName); EXPECT_NE(nullptr, callbackData->functionParams); if (callbackData->site == CL_CALLBACK_SITE_ENTER) { EXPECT_EQ(nullptr, callbackData->functionReturnValue); } else { EXPECT_NE(nullptr, callbackData->functionReturnValue); } cl_params_clGetDeviceInfo *params = (cl_params_clGetDeviceInfo *)callbackData->functionParams; EXPECT_NE(nullptr, *params->device); EXPECT_EQ(static_cast(CL_DEVICE_VENDOR), *params->paramName); EXPECT_EQ(0u, *params->paramValueSize); EXPECT_EQ(nullptr, *params->paramValue); if (callbackData->site == CL_CALLBACK_SITE_EXIT) { cl_int *retVal = (cl_int *)callbackData->functionReturnValue; EXPECT_EQ(CL_SUCCESS, *retVal); EXPECT_LT(0u, **params->paramValueSizeRet); } if (callbackData->site == CL_CALLBACK_SITE_ENTER) { ++enterCount; } else if (callbackData->site == CL_CALLBACK_SITE_EXIT) { ++exitCount; } } protected: cl_device_id device = nullptr; size_t paramValueSizeRet = 0; uint64_t correlationId = 0; }; TEST_F(IntelClGetDeviceInfoTracingCollectTest, GivenGeneralCollectionWhenTracingThenTracingIsPerformed) { call(testedClDevice); EXPECT_EQ(CL_SUCCESS, status); EXPECT_LT(0u, paramValueSizeRet); EXPECT_EQ(1u, enterCount); EXPECT_EQ(1u, exitCount); } struct IntelClGetDeviceInfoTracingChangeParamsTest : public IntelAllTracingTest { public: IntelClGetDeviceInfoTracingChangeParamsTest() {} protected: void call(cl_device_id target) { device = target; status = clGetDeviceInfo(device, CL_DEVICE_VENDOR, 0, nullptr, ¶mValueSizeRet); } void vcallback(cl_function_id fid, cl_callback_data *callbackData, void *userData) override { if (callbackData->site == CL_CALLBACK_SITE_ENTER) { cl_params_clGetDeviceInfo *params = (cl_params_clGetDeviceInfo *)callbackData->functionParams; *params->paramValueSize = paramValueSize; *params->paramValue = paramValue; } } protected: cl_device_id device = nullptr; size_t paramValueSizeRet = 0; static const size_t paramValueSize = 256; char paramValue[paramValueSize] = {'\0'}; }; TEST_F(IntelClGetDeviceInfoTracingChangeParamsTest, GivenTracingCallbackWithParamsChangeWhenApiFunctionIsCalledThenParamsAreChanged) { paramValue[0] = '\0'; call(testedClDevice); EXPECT_EQ(CL_SUCCESS, status); EXPECT_LT(0u, paramValueSizeRet); EXPECT_STRNE("", paramValue); } struct IntelClGetDeviceInfoTracingChangeRetValTest : public IntelAllTracingTest { public: IntelClGetDeviceInfoTracingChangeRetValTest() {} protected: void call(cl_device_id target) { device = target; status = clGetDeviceInfo(device, CL_DEVICE_VENDOR, 0, nullptr, ¶mValueSizeRet); } void vcallback(cl_function_id fid, cl_callback_data *callbackData, void *userData) override { if (callbackData->site == CL_CALLBACK_SITE_EXIT) { cl_int *retVal = reinterpret_cast(callbackData->functionReturnValue); *retVal = CL_INVALID_VALUE; } } protected: cl_device_id device = nullptr; size_t paramValueSizeRet = 0; }; TEST_F(IntelClGetDeviceInfoTracingChangeRetValTest, GivenTracingCallbackWithRetValChangeToInvalidValueWhenApiFunctionIsCalledThenInvalidValueErrorIsReturned) { call(testedClDevice); EXPECT_EQ(CL_INVALID_VALUE, status); EXPECT_LT(0u, paramValueSizeRet); } struct IntelClGetDeviceInfoTwoHandlesTracingCollectTest : public IntelClGetDeviceInfoTracingCollectTest { public: IntelClGetDeviceInfoTwoHandlesTracingCollectTest() {} void SetUp() override { IntelClGetDeviceInfoTracingCollectTest::SetUp(); status = clCreateTracingHandleINTEL(testedClDevice, callback, this, &secondHandle); ASSERT_NE(nullptr, secondHandle); ASSERT_EQ(CL_SUCCESS, status); status = clSetTracingPointINTEL(secondHandle, CL_FUNCTION_clGetDeviceInfo, CL_TRUE); ASSERT_EQ(CL_SUCCESS, status); status = clEnableTracingINTEL(secondHandle); ASSERT_EQ(CL_SUCCESS, status); } void TearDown() override { status = clDisableTracingINTEL(secondHandle); ASSERT_EQ(CL_SUCCESS, status); status = clDestroyTracingHandleINTEL(secondHandle); ASSERT_EQ(CL_SUCCESS, status); IntelClGetDeviceInfoTracingCollectTest::TearDown(); } protected: cl_tracing_handle secondHandle = nullptr; }; TEST_F(IntelClGetDeviceInfoTwoHandlesTracingCollectTest, GivenTwoHandlesWhenTracingThenTracingIsPerformed) { call(testedClDevice); EXPECT_EQ(CL_SUCCESS, status); EXPECT_LT(0u, paramValueSizeRet); EXPECT_EQ(2u, enterCount); EXPECT_EQ(2u, exitCount); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_link_program_tests.inl000066400000000000000000000323641422164147700275000ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/device_binary_format/elf/elf_decoder.h" #include "shared/source/helpers/file_io.h" #include "shared/test/common/helpers/test_files.h" #include "shared/test/common/libult/global_environment.h" #include "opencl/source/context/context.h" #include "cl_api_tests.h" #include "compiler_options.h" using namespace NEO; namespace ULT { typedef api_tests clLinkProgramTests; TEST_F(clLinkProgramTests, GivenValidParamsWhenLinkingProgramThenSuccessIsReturned) { cl_program pProgram = nullptr; size_t sourceSize = 0; std::string testFile; testFile.append(clFiles); testFile.append("copybuffer.cl"); auto pSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pSource); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( pContext, 1, sources, &sourceSize, &retVal); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clCompileProgram( pProgram, 1, &testedClDevice, nullptr, 0, nullptr, nullptr, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); cl_program program = pProgram; cl_program oprog; oprog = clLinkProgram( pContext, 1, &testedClDevice, nullptr, 1, &program, nullptr, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(oprog); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clLinkProgramTests, GivenCreateLibraryOptionWhenLinkingProgramThenSuccessIsReturned) { cl_program pProgram = nullptr; size_t sourceSize = 0; std::string testFile; testFile.append(clFiles); testFile.append("copybuffer.cl"); auto pSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pSource); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( pContext, 1, sources, &sourceSize, &retVal); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clCompileProgram( pProgram, 1, &testedClDevice, nullptr, 0, nullptr, nullptr, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); cl_program program = pProgram; cl_program oprog; oprog = clLinkProgram( pContext, 1, &testedClDevice, CompilerOptions::createLibrary.data(), 1, &program, nullptr, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(oprog); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clLinkProgramTests, GivenNullContextWhenLinkingProgramThenClInvalidContextErrorIsReturned) { cl_program program = {0}; cl_program oprog; oprog = clLinkProgram( nullptr, 1, &testedClDevice, nullptr, 1, &program, nullptr, nullptr, &retVal); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); EXPECT_EQ(nullptr, oprog); } template std::vector asVec(const uint8_t *src, size_t size) { auto beg = reinterpret_cast(src); auto end = beg + size / sizeof(T); return std::vector(beg, end); } TEST_F(clLinkProgramTests, GivenProgramsWithSpecConstantsThenSpecConstantsAreEmbeddedIntoElf) { uint8_t ir1[] = {15, 17, 19, 23}; uint8_t ir2[] = {29, 31, 37, 41}; uint8_t ir3[] = {43, 47, 53, 59}; uint32_t prog1Keys[2] = {2, 3}; uint64_t prog1Values[2] = {5, 7}; uint32_t prog2Keys[1] = {11}; uint64_t prog2Values[1] = {13}; auto progSrc1 = clUniquePtr(new MockProgram(pContext, false, toClDeviceVector(*pDevice))); progSrc1->specConstantsValues[prog1Keys[0]] = prog1Values[0]; progSrc1->specConstantsValues[prog1Keys[1]] = prog1Values[1]; progSrc1->areSpecializationConstantsInitialized = true; progSrc1->irBinary = makeCopy(ir1, sizeof(ir1)); progSrc1->irBinarySize = sizeof(ir1); progSrc1->isSpirV = true; auto progSrc2 = clUniquePtr(new MockProgram(pContext, false, toClDeviceVector(*pDevice))); progSrc2->specConstantsValues[prog2Keys[0]] = prog2Values[0]; progSrc2->areSpecializationConstantsInitialized = true; progSrc2->irBinary = makeCopy(ir2, sizeof(ir2)); progSrc2->irBinarySize = sizeof(ir2); progSrc2->isSpirV = true; auto progSrc3 = clUniquePtr(new MockProgram(pContext, false, toClDeviceVector(*pDevice))); progSrc3->irBinary = makeCopy(ir3, sizeof(ir3)); progSrc3->irBinarySize = sizeof(ir3); progSrc3->isSpirV = true; auto progDst = clUniquePtr(new MockProgram(pContext, false, toClDeviceVector(*pDevice))); cl_program inputPrograms[3] = {progSrc1.get(), progSrc2.get(), progSrc3.get()}; std::string receivedInput; MockCompilerDebugVars igcDebugVars; igcDebugVars.receivedInput = &receivedInput; gEnvironment->igcPushDebugVars(igcDebugVars); progDst->link(progDst->getDevices(), "", 3, inputPrograms); gEnvironment->igcPopDebugVars(); std::string elfDecodeError; std::string elfDecoceWarnings; auto elf = NEO::Elf::decodeElf(ArrayRef::fromAny(receivedInput.data(), receivedInput.size()), elfDecodeError, elfDecoceWarnings); ASSERT_NE(nullptr, elf.elfFileHeader) << elfDecodeError; ASSERT_EQ(8U, elf.sectionHeaders.size()); EXPECT_EQ(NEO::Elf::SHT_OPENCL_SPIRV_SC_IDS, elf.sectionHeaders[1].header->type); EXPECT_EQ(NEO::Elf::SHT_OPENCL_SPIRV_SC_VALUES, elf.sectionHeaders[2].header->type); EXPECT_EQ(NEO::Elf::SHT_OPENCL_SPIRV, elf.sectionHeaders[3].header->type); EXPECT_EQ(NEO::Elf::SHT_OPENCL_SPIRV_SC_IDS, elf.sectionHeaders[4].header->type); EXPECT_EQ(NEO::Elf::SHT_OPENCL_SPIRV_SC_VALUES, elf.sectionHeaders[5].header->type); EXPECT_EQ(NEO::Elf::SHT_OPENCL_SPIRV, elf.sectionHeaders[6].header->type); EXPECT_EQ(NEO::Elf::SHT_OPENCL_SPIRV, elf.sectionHeaders[7].header->type); ASSERT_EQ(sizeof(uint32_t) * progSrc1->specConstantsValues.size(), elf.sectionHeaders[1].data.size()); ASSERT_EQ(sizeof(uint64_t) * progSrc1->specConstantsValues.size(), elf.sectionHeaders[2].data.size()); ASSERT_EQ(sizeof(ir1), elf.sectionHeaders[3].data.size()); ASSERT_EQ(sizeof(uint32_t) * progSrc2->specConstantsValues.size(), elf.sectionHeaders[4].data.size()); ASSERT_EQ(sizeof(uint64_t) * progSrc2->specConstantsValues.size(), elf.sectionHeaders[5].data.size()); ASSERT_EQ(sizeof(ir2), elf.sectionHeaders[6].data.size()); ASSERT_EQ(sizeof(ir3), elf.sectionHeaders[7].data.size()); auto readSpecConstId = [](NEO::Elf::Elf::SectionHeaderAndData §ion, uint32_t offset) { return *(reinterpret_cast(section.data.begin()) + offset); }; auto readSpecConstValue = [](NEO::Elf::Elf::SectionHeaderAndData §ion, uint32_t offset) { return *(reinterpret_cast(section.data.begin()) + offset); }; ASSERT_EQ(1U, progSrc1->specConstantsValues.count(readSpecConstId(elf.sectionHeaders[1], 0))); EXPECT_EQ(progSrc1->specConstantsValues[readSpecConstId(elf.sectionHeaders[1], 0)], readSpecConstValue(elf.sectionHeaders[2], 0)); ASSERT_EQ(1U, progSrc1->specConstantsValues.count(readSpecConstId(elf.sectionHeaders[1], 1))); EXPECT_EQ(progSrc1->specConstantsValues[readSpecConstId(elf.sectionHeaders[1], 1)], readSpecConstValue(elf.sectionHeaders[2], 1)); EXPECT_EQ(0, memcmp(ir1, elf.sectionHeaders[3].data.begin(), sizeof(ir1))); ASSERT_EQ(1U, progSrc2->specConstantsValues.count(readSpecConstId(elf.sectionHeaders[4], 0))); EXPECT_EQ(progSrc2->specConstantsValues[readSpecConstId(elf.sectionHeaders[4], 0)], readSpecConstValue(elf.sectionHeaders[5], 0)); EXPECT_EQ(0, memcmp(ir2, elf.sectionHeaders[6].data.begin(), sizeof(ir2))); EXPECT_EQ(0, memcmp(ir3, elf.sectionHeaders[7].data.begin(), sizeof(ir3))); } TEST_F(clLinkProgramTests, GivenInvalidCallbackInputWhenLinkProgramThenInvalidValueErrorIsReturned) { cl_program pProgram = nullptr; size_t sourceSize = 0; std::string testFile; testFile.append(clFiles); testFile.append("copybuffer.cl"); auto pSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pSource); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( pContext, 1, sources, &sourceSize, &retVal); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clCompileProgram( pProgram, 1, &testedClDevice, nullptr, 0, nullptr, nullptr, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); cl_program program = pProgram; cl_program oprog; oprog = clLinkProgram( pContext, 1, &testedClDevice, CompilerOptions::createLibrary.data(), 1, &program, nullptr, &retVal, &retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(nullptr, oprog); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clLinkProgramTests, GivenValidCallbackInputWhenLinkProgramThenCallbackIsInvoked) { cl_program pProgram = nullptr; size_t sourceSize = 0; std::string testFile; testFile.append(clFiles); testFile.append("copybuffer.cl"); auto pSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pSource); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( pContext, 1, sources, &sourceSize, &retVal); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clCompileProgram( pProgram, 1, &testedClDevice, nullptr, 0, nullptr, nullptr, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); cl_program program = pProgram; cl_program oprog; char userData = 0; oprog = clLinkProgram( pContext, 1, &testedClDevice, CompilerOptions::createLibrary.data(), 1, &program, notifyFuncProgram, &userData, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ('a', userData); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(oprog); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clLinkProgramTests, givenMultiDeviceProgramWhenLinkingForInvalidDevicesInputThenInvalidDeviceErrorIsReturned) { cl_program pProgram = nullptr; size_t sourceSize = 0; std::string testFile; testFile.append(clFiles); testFile.append("copybuffer.cl"); auto pSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pSource); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( pContext, 1, sources, &sourceSize, &retVal); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clCompileProgram( pProgram, 1, &testedClDevice, nullptr, 0, nullptr, nullptr, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); cl_program program = pProgram; cl_program outProgram; MockContext mockContext; cl_device_id nullDeviceInput[] = {pContext->getDevice(0), nullptr}; cl_device_id notAssociatedDeviceInput[] = {mockContext.getDevice(0)}; cl_device_id validDeviceInput[] = {pContext->getDevice(0)}; outProgram = clLinkProgram( pContext, 0, validDeviceInput, nullptr, 1, &program, nullptr, nullptr, &retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(nullptr, outProgram); outProgram = clLinkProgram( pContext, 1, nullptr, nullptr, 1, &program, nullptr, nullptr, &retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(nullptr, outProgram); outProgram = clLinkProgram( pContext, 2, nullDeviceInput, nullptr, 1, &program, nullptr, nullptr, &retVal); EXPECT_EQ(CL_INVALID_DEVICE, retVal); EXPECT_EQ(nullptr, outProgram); outProgram = clLinkProgram( pContext, 1, notAssociatedDeviceInput, nullptr, 1, &program, nullptr, nullptr, &retVal); EXPECT_EQ(CL_INVALID_DEVICE, retVal); EXPECT_EQ(nullptr, outProgram); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_mem_locally_uncached_resource_tests.cpp000066400000000000000000000500551422164147700330470ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/device/device.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/state_base_address.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/extensions/public/cl_ext_private.h" #include "opencl/source/api/api.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/kernel/kernel.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "hw_cmds.h" using namespace NEO; namespace clMemLocallyUncachedResourceTests { template uint32_t argMocs(Kernel &kernel, size_t argIndex) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; auto surfaceStateHeapAddress = kernel.getSurfaceStateHeap(); auto surfaceStateHeapAddressOffset = static_cast(kernel.getKernelInfo().getArgDescriptorAt(static_cast(argIndex)).as().bindful); auto surfaceState = reinterpret_cast(ptrOffset(surfaceStateHeapAddress, surfaceStateHeapAddressOffset)); return surfaceState->getMemoryObjectControlState(); } template uint32_t cmdQueueMocs(CommandQueue *pCmdQ) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; auto pCmdQHw = reinterpret_cast *>(pCmdQ); auto &csr = pCmdQHw->getGpgpuCommandStreamReceiver(); HardwareParse hwParse; hwParse.parseCommands(csr.getCS(0), 0); auto itorCmd = reverse_find(hwParse.cmdList.rbegin(), hwParse.cmdList.rend()); EXPECT_NE(hwParse.cmdList.rend(), itorCmd); auto sba = genCmdCast(*itorCmd); EXPECT_NE(nullptr, sba); return sba->getStatelessDataPortAccessMemoryObjectControlState(); } const size_t n = 512; [[maybe_unused]] const size_t globalWorkSize[3] = {n, 1, 1}; [[maybe_unused]] const size_t localWorkSize[3] = {256, 1, 1}; [[maybe_unused]] const cl_mem_properties_intel *propertiesCacheable = nullptr; [[maybe_unused]] const cl_mem_properties_intel propertiesUncacheable[] = {CL_MEM_FLAGS_INTEL, CL_MEM_LOCALLY_UNCACHED_RESOURCE, 0}; [[maybe_unused]] const cl_mem_properties_intel propertiesUncacheableInSurfaceState[] = {CL_MEM_FLAGS_INTEL, CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE, 0}; using clMemLocallyUncachedResourceFixture = Test>; HWCMDTEST_F(IGFX_GEN8_CORE, clMemLocallyUncachedResourceFixture, GivenAtLeastOneLocallyUncacheableResourceWhenSettingKernelArgumentsThenKernelIsUncacheable) { cl_int retVal = CL_SUCCESS; MockKernelWithInternals mockKernel(*this->pClDevice, context, true); auto kernel = mockKernel.mockKernel; auto pMultiDeviceKernel = mockKernel.mockMultiDeviceKernel; auto bufferCacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, 0, n * sizeof(float), nullptr, nullptr); auto pBufferCacheable1 = clUniquePtr(castToObject(bufferCacheable1)); auto bufferCacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, 0, n * sizeof(float), nullptr, nullptr); auto pBufferCacheable2 = clUniquePtr(castToObject(bufferCacheable2)); auto bufferUncacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheable, 0, n * sizeof(float), nullptr, nullptr); auto pBufferUncacheable1 = clUniquePtr(castToObject(bufferUncacheable1)); auto bufferUncacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheable, 0, n * sizeof(float), nullptr, nullptr); auto pBufferUncacheable2 = clUniquePtr(castToObject(bufferUncacheable2)); auto mocsCacheable = pClDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); auto mocsUncacheable = pClDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED); retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferCacheable1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, argMocs(*kernel, 0)); retVal = clSetKernelArg(pMultiDeviceKernel, 1, sizeof(cl_mem), &bufferCacheable2); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, argMocs(*kernel, 1)); EXPECT_TRUE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); EXPECT_FALSE(kernel->hasUncacheableStatelessArgs()); retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferUncacheable1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsUncacheable, argMocs(*kernel, 0)); EXPECT_TRUE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsUncacheable, cmdQueueMocs(pCmdQ)); EXPECT_TRUE(kernel->hasUncacheableStatelessArgs()); retVal = clSetKernelArg(pMultiDeviceKernel, 1, sizeof(cl_mem), &bufferUncacheable2); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsUncacheable, argMocs(*kernel, 0)); EXPECT_TRUE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsUncacheable, cmdQueueMocs(pCmdQ)); EXPECT_TRUE(kernel->hasUncacheableStatelessArgs()); retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferCacheable1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, argMocs(*kernel, 0)); EXPECT_TRUE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsUncacheable, cmdQueueMocs(pCmdQ)); EXPECT_TRUE(kernel->hasUncacheableStatelessArgs()); retVal = clSetKernelArg(pMultiDeviceKernel, 1, sizeof(cl_mem), &bufferCacheable2); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, argMocs(*kernel, 1)); EXPECT_TRUE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); EXPECT_FALSE(kernel->hasUncacheableStatelessArgs()); } HWCMDTEST_F(IGFX_GEN8_CORE, clMemLocallyUncachedResourceFixture, givenBuffersThatAreUncachedInSurfaceStateWhenStatelessIsProgrammedThenItIsCached) { cl_int retVal = CL_SUCCESS; MockKernelWithInternals mockKernel(*this->pClDevice, context, true); auto kernel = mockKernel.mockKernel; auto pMultiDeviceKernel = mockKernel.mockMultiDeviceKernel; EXPECT_EQ(CL_SUCCESS, retVal); auto bufferCacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, 0, n * sizeof(float), nullptr, nullptr); auto pBufferCacheable1 = clUniquePtr(castToObject(bufferCacheable1)); auto bufferCacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, 0, n * sizeof(float), nullptr, nullptr); auto pBufferCacheable2 = clUniquePtr(castToObject(bufferCacheable2)); auto bufferUncacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheableInSurfaceState, 0, n * sizeof(float), nullptr, nullptr); auto pBufferUncacheable1 = clUniquePtr(castToObject(bufferUncacheable1)); auto bufferUncacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheableInSurfaceState, 0, n * sizeof(float), nullptr, nullptr); auto pBufferUncacheable2 = clUniquePtr(castToObject(bufferUncacheable2)); auto mocsCacheable = pClDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); auto mocsUncacheable = pClDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED); retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferCacheable1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, argMocs(*kernel, 0)); retVal = clSetKernelArg(pMultiDeviceKernel, 1, sizeof(cl_mem), &bufferCacheable2); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, argMocs(*kernel, 1)); EXPECT_TRUE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferUncacheable1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsUncacheable, argMocs(*kernel, 0)); EXPECT_FALSE(kernel->hasUncacheableStatelessArgs()); EXPECT_TRUE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); retVal = clSetKernelArg(pMultiDeviceKernel, 1, sizeof(cl_mem), &bufferUncacheable2); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsUncacheable, argMocs(*kernel, 0)); EXPECT_FALSE(kernel->hasUncacheableStatelessArgs()); EXPECT_TRUE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferCacheable1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, argMocs(*kernel, 0)); EXPECT_TRUE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); retVal = clSetKernelArg(pMultiDeviceKernel, 1, sizeof(cl_mem), &bufferCacheable2); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, argMocs(*kernel, 1)); EXPECT_TRUE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); } HWCMDTEST_F(IGFX_GEN8_CORE, clMemLocallyUncachedResourceFixture, givenBuffersThatAreUncachedButKernelDoesntHaveAnyStatelessAccessessThenSurfacesAreNotRecordedAsUncacheable) { cl_int retVal = CL_SUCCESS; MockKernelWithInternals mockKernel(*this->pClDevice, context, true); auto kernel = mockKernel.mockKernel; auto pMultiDeviceKernel = mockKernel.mockMultiDeviceKernel; mockKernel.kernelInfo.setBufferStateful(0); mockKernel.kernelInfo.setBufferStateful(1); auto bufferCacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, 0, n * sizeof(float), nullptr, nullptr); auto pBufferCacheable1 = clUniquePtr(castToObject(bufferCacheable1)); auto bufferCacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, 0, n * sizeof(float), nullptr, nullptr); auto pBufferCacheable2 = clUniquePtr(castToObject(bufferCacheable2)); auto bufferUncacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheable, 0, n * sizeof(float), nullptr, nullptr); auto pBufferUncacheable1 = clUniquePtr(castToObject(bufferUncacheable1)); auto bufferUncacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheable, 0, n * sizeof(float), nullptr, nullptr); auto pBufferUncacheable2 = clUniquePtr(castToObject(bufferUncacheable2)); auto mocsCacheable = pClDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); auto mocsUncacheable = pClDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED); retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferCacheable1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, argMocs(*kernel, 0)); retVal = clSetKernelArg(pMultiDeviceKernel, 1, sizeof(cl_mem), &bufferCacheable2); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, argMocs(*kernel, 1)); EXPECT_TRUE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferUncacheable1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsUncacheable, argMocs(*kernel, 0)); EXPECT_FALSE(kernel->hasUncacheableStatelessArgs()); EXPECT_TRUE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); retVal = clSetKernelArg(pMultiDeviceKernel, 1, sizeof(cl_mem), &bufferUncacheable2); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsUncacheable, argMocs(*kernel, 0)); EXPECT_FALSE(kernel->hasUncacheableStatelessArgs()); EXPECT_TRUE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferCacheable1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, argMocs(*kernel, 0)); EXPECT_FALSE(kernel->hasUncacheableStatelessArgs()); EXPECT_TRUE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); retVal = clSetKernelArg(pMultiDeviceKernel, 1, sizeof(cl_mem), &bufferCacheable2); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, argMocs(*kernel, 1)); EXPECT_FALSE(kernel->hasUncacheableStatelessArgs()); EXPECT_TRUE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); EXPECT_FALSE(kernel->hasUncacheableStatelessArgs()); } HWCMDTEST_F(IGFX_GEN8_CORE, clMemLocallyUncachedResourceFixture, WhenUnsettingUncacheableResourceFromKernelThenKernelContinuesToCorrectlySetMocs) { cl_int retVal = CL_SUCCESS; MockKernelWithInternals mockKernel(*this->pClDevice, context, true); auto pMultiDeviceKernel = mockKernel.mockMultiDeviceKernel; auto kernel = mockKernel.mockKernel; EXPECT_EQ(CL_SUCCESS, retVal); auto bufferCacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, 0, n * sizeof(float), nullptr, nullptr); auto pBufferCacheable1 = clUniquePtr(castToObject(bufferCacheable1)); auto bufferCacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, 0, n * sizeof(float), nullptr, nullptr); auto pBufferCacheable2 = clUniquePtr(castToObject(bufferCacheable2)); auto bufferUncacheable = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheable, 0, n * sizeof(float), nullptr, nullptr); auto pBufferUncacheable = clUniquePtr(castToObject(bufferUncacheable)); auto mocsCacheable = pClDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); auto mocsUncacheable = pClDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED); retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferCacheable1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, argMocs(*kernel, 0)); retVal = clSetKernelArg(pMultiDeviceKernel, 1, sizeof(cl_mem), &bufferCacheable2); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, argMocs(*kernel, 1)); EXPECT_TRUE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferUncacheable); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsUncacheable, argMocs(*kernel, 0)); EXPECT_TRUE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsUncacheable, cmdQueueMocs(pCmdQ)); kernel->unsetArg(0); retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferCacheable1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, argMocs(*kernel, 0)); EXPECT_TRUE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); kernel->unsetArg(0); retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferUncacheable); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsUncacheable, argMocs(*kernel, 0)); EXPECT_TRUE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsUncacheable, cmdQueueMocs(pCmdQ)); } HWCMDTEST_F(IGFX_GEN8_CORE, clMemLocallyUncachedResourceFixture, givenBuffersThatAreUncachedInSurfaceStateAndAreNotUsedInStatelessFashionThenThoseResourcesAreNotRegistredAsResourcesForCacheFlush) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(2); auto context = std::make_unique(); cl_int retVal = CL_SUCCESS; MockKernelWithInternals mockKernel(*context->getDevice(0), context.get(), true); auto kernel = mockKernel.mockKernel; auto pMultiDeviceKernel = mockKernel.mockMultiDeviceKernel; mockKernel.kernelInfo.setBufferStateful(0); mockKernel.kernelInfo.setBufferStateful(1); auto bufferCacheable = clCreateBufferWithPropertiesINTEL(context.get(), propertiesCacheable, 0, n * sizeof(float), nullptr, nullptr); auto bufferUncacheableInSurfaceState = clCreateBufferWithPropertiesINTEL(context.get(), propertiesUncacheableInSurfaceState, 0, n * sizeof(float), nullptr, nullptr); auto bufferUncacheable = clCreateBufferWithPropertiesINTEL(context.get(), propertiesUncacheable, 0, n * sizeof(float), nullptr, nullptr); retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferUncacheableInSurfaceState); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, kernel->kernelArgRequiresCacheFlush[0]); retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferCacheable); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, kernel->kernelArgRequiresCacheFlush[0]); retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &bufferUncacheable); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, kernel->kernelArgRequiresCacheFlush[0]); clReleaseMemObject(bufferUncacheableInSurfaceState); clReleaseMemObject(bufferUncacheable); clReleaseMemObject(bufferCacheable); } } // namespace clMemLocallyUncachedResourceTests compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_release_command_queue_tests.inl000066400000000000000000000033321422164147700313270ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/unit_test_helper.h" #include "opencl/source/context/context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "cl_api_tests.h" #include using namespace NEO; namespace ULT { TEST(clReleaseCommandQueueTest, GivenNullCmdQueueWhenReleasingCmdQueueThenClInvalidCommandQueueErrorIsReturned) { auto retVal = clReleaseCommandQueue(nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } typedef api_tests clReleaseCommandQueueTests; TEST_F(clReleaseCommandQueueTests, givenBlockedEnqueueWithOutputEventStoredAsVirtualEventWhenReleasingCmdQueueThenInternalRefCountIsDecrementedAndQueueDeleted) { cl_command_queue cmdQ = nullptr; cl_queue_properties properties = 0; ClDevice *device = (ClDevice *)testedClDevice; MockKernelWithInternals kernelInternals(*device, pContext); cmdQ = clCreateCommandQueue(pContext, testedClDevice, properties, &retVal); ASSERT_NE(nullptr, cmdQ); ASSERT_EQ(CL_SUCCESS, retVal); size_t offset[] = {0, 0, 0}; size_t gws[] = {1, 1, 1}; cl_int retVal = CL_SUCCESS; cl_int success = CL_SUCCESS; cl_event event = clCreateUserEvent(pContext, &retVal); cl_event eventOut = nullptr; EXPECT_EQ(success, retVal); retVal = clEnqueueNDRangeKernel(cmdQ, kernelInternals.mockMultiDeviceKernel, 1, offset, gws, nullptr, 1, &event, &eventOut); EXPECT_EQ(success, retVal); EXPECT_NE(nullptr, eventOut); clSetUserEventStatus(event, CL_COMPLETE); clReleaseEvent(event); clReleaseEvent(eventOut); retVal = clReleaseCommandQueue(cmdQ); EXPECT_EQ(success, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_release_context_tests.inl000066400000000000000000000014261422164147700301730ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "cl_api_tests.h" using namespace NEO; typedef api_tests clReleaseContextTests; namespace ULT { TEST_F(clReleaseContextTests, GivenValidContextWhenReleasingContextThenSuccessIsReturned) { auto context = clCreateContext( nullptr, 1u, &testedClDevice, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, context); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clReleaseContext(context); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clReleaseContextTests, GivenNullContextWhenReleasingContextThenClInvalidContextIsReturned) { auto retVal = clReleaseContext(nullptr); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_release_event_tests.inl000066400000000000000000000114101422164147700276220ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/source/context/context.h" #include "opencl/source/event/event.h" #include "cl_api_tests.h" using namespace NEO; namespace ClReleaseEventTests { template class EventFixture : public ApiFixture<>, public T { public: void SetUp() override { ApiFixture::SetUp(); } void TearDown() override { ApiFixture::TearDown(); } }; typedef EventFixture<::testing::Test> clEventTests; TEST_F(clEventTests, GivenNullEventWhenReleasingEventThenClInvalidEventErrorIsReturned) { auto retVal = clReleaseEvent(nullptr); EXPECT_EQ(CL_INVALID_EVENT, retVal); } TEST_F(clEventTests, GivenValidEventWhenReleasingEventThenSuccessIsReturned) { auto *pEvent = new Event(nullptr, 0, 0, 0); ASSERT_NE(nullptr, pEvent); cl_event event = (cl_event)pEvent; auto retVal = clReleaseEvent(event); EXPECT_EQ(CL_SUCCESS, retVal); //no delete operation. clReleaseEvent should do this for us } TEST_F(clEventTests, GivenValidEventWhenRetainedAndReleasedThenReferenceCountIsUpdated) { auto *pEvent = new Event(nullptr, 0, 0, 0); ASSERT_NE(nullptr, pEvent); cl_event event = (cl_event)pEvent; auto retVal = clRetainEvent(event); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pEvent->getReference(), 2); retVal = clReleaseEvent(event); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pEvent->getReference(), 1); delete pEvent; } TEST_F(clEventTests, GivenValidEventWhenRetainedAndReleasedTwiceThenClSuccessIsReturned) { auto *pEvent = new Event(nullptr, 0, 0, 0); ASSERT_NE(nullptr, pEvent); cl_event event = (cl_event)pEvent; auto retVal = clRetainEvent(event); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pEvent->getReference(), 2); retVal = clReleaseEvent(event); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pEvent->getReference(), 1); retVal = clReleaseEvent(event); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEventTests, GivenNullEventWhenRetainingEventThenClInvalidEventErrorIsReturned) { auto retVal = clRetainEvent(nullptr); EXPECT_EQ(CL_INVALID_EVENT, retVal); } TEST_F(clEventTests, GivenValidEventWhenGettingEventInfoThenSuccessIsReturned) { cl_command_queue cmdQ; auto *pEvent = new Event(nullptr, 0, 0, 0); cl_event event = (cl_event)pEvent; auto retVal = clGetEventInfo(event, CL_EVENT_COMMAND_QUEUE, sizeof(cmdQ), &cmdQ, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); delete pEvent; } TEST_F(clEventTests, GivenNullEventWhenGettingEventInfoThenClInvalidEventErrorIsReturned) { cl_command_queue cmdQ; auto retVal = clGetEventInfo(nullptr, CL_EVENT_COMMAND_QUEUE, sizeof(cmdQ), &cmdQ, nullptr); EXPECT_EQ(CL_INVALID_EVENT, retVal); } TEST_F(clEventTests, GivenInvalidEventWhenWaitingForEventsThenClInvalidEventErrorIsReturned) { char *ptr = new char[sizeof(Event)]; cl_event event = (cl_event)ptr; auto retVal = clWaitForEvents(1, &event); EXPECT_EQ(CL_INVALID_EVENT, retVal); delete[] ptr; } TEST_F(clEventTests, GivenValidEventWhenSettingStatusMultipleTimesThenClInvalidOperationErrorIsReturned) { cl_int retVal = 0; auto event = clCreateUserEvent(pContext, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetUserEventStatus(event, CL_COMPLETE); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetUserEventStatus(event, CL_COMPLETE); EXPECT_EQ(CL_INVALID_OPERATION, retVal); clReleaseEvent(event); } typedef EventFixture<::testing::TestWithParam>> clEventStatusTests; TEST_P(clEventStatusTests, GivenExecutionStatusWhenSettingUserEventStatusThenSuccessOrCorrectErrorIsReturned) { cl_int retVal = 0; cl_event event = clCreateUserEvent(pContext, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); auto status = std::get<0>(GetParam()); auto expect = std::get<1>(GetParam()); retVal = clSetUserEventStatus(event, status); EXPECT_EQ(expect, retVal); clReleaseEvent(event); } cl_int validStatus[] = {CL_COMPLETE, -1}; cl_int expectValidStatus[] = {CL_SUCCESS}; cl_int invalidStatus[] = {CL_QUEUED, CL_SUBMITTED, 12}; cl_int expectInvalidStatus[] = {CL_INVALID_VALUE}; INSTANTIATE_TEST_CASE_P(SetValidStatus, clEventStatusTests, ::testing::Combine( ::testing::ValuesIn(validStatus), ::testing::ValuesIn(expectValidStatus))); INSTANTIATE_TEST_CASE_P(SetInvalidStatus, clEventStatusTests, ::testing::Combine( ::testing::ValuesIn(invalidStatus), ::testing::ValuesIn(expectInvalidStatus))); } // namespace ClReleaseEventTests compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_release_kernel_tests.inl000066400000000000000000000047101422164147700277660ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/file_io.h" #include "shared/test/common/helpers/test_files.h" #include "opencl/source/context/context.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clReleaseKernelTests; namespace ULT { TEST_F(clReleaseKernelTests, GivenNullKernelWhenReleasingKernelThenClInvalidKernelErrorIsReturned) { retVal = clReleaseKernel(nullptr); EXPECT_EQ(CL_INVALID_KERNEL, retVal); } TEST_F(clReleaseKernelTests, GivenRetainedKernelWhenReleasingKernelThenKernelIsCorrectlyReleased) { cl_kernel kernel = nullptr; cl_program program = nullptr; cl_int binaryStatus = CL_SUCCESS; size_t binarySize = 0; std::string testFile; retrieveBinaryKernelFilename(testFile, "CopyBuffer_simd16_", ".bin"); auto binary = loadDataFromFile(testFile.c_str(), binarySize); ASSERT_NE(0u, binarySize); ASSERT_NE(nullptr, binary); unsigned const char *binaries[1] = {reinterpret_cast(binary.get())}; program = clCreateProgramWithBinary(pContext, 1, &testedClDevice, &binarySize, binaries, &binaryStatus, &retVal); binary.reset(); EXPECT_NE(nullptr, program); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clBuildProgram(program, 1, &testedClDevice, nullptr, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); kernel = clCreateKernel(program, "CopyBuffer", &retVal); EXPECT_NE(nullptr, kernel); ASSERT_EQ(CL_SUCCESS, retVal); cl_uint theRef; retVal = clGetKernelInfo(kernel, CL_KERNEL_REFERENCE_COUNT, sizeof(cl_uint), &theRef, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, theRef); retVal = clRetainKernel(kernel); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clRetainKernel(kernel); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetKernelInfo(kernel, CL_KERNEL_REFERENCE_COUNT, sizeof(cl_uint), &theRef, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(3u, theRef); retVal = clReleaseKernel(kernel); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clReleaseKernel(kernel); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clGetKernelInfo(kernel, CL_KERNEL_REFERENCE_COUNT, sizeof(cl_uint), &theRef, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, theRef); retVal = clReleaseKernel(kernel); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(program); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_release_mem_obj_tests.inl000066400000000000000000000015311422164147700301140ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/context/context.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clReleaseMemObjectTests; namespace ULT { TEST_F(clReleaseMemObjectTests, GivenValidBufferWhenReleasingMemObjectThenSuccessIsReturned) { cl_mem_flags flags = CL_MEM_USE_HOST_PTR; static const unsigned int bufferSize = 16; cl_mem buffer = nullptr; std::unique_ptr hostMem(new char[bufferSize]); memset(hostMem.get(), 0xaa, bufferSize); buffer = clCreateBuffer( pContext, flags, bufferSize, hostMem.get(), &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_release_program_tests.inl000066400000000000000000000026461422164147700301630ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/context/context.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clReleaseProgramTests; TEST_F(clReleaseProgramTests, GivenNullProgramWhenReleasingProgramThenClInvalidProgramIsReturned) { auto retVal = clReleaseProgram(nullptr); EXPECT_EQ(CL_INVALID_PROGRAM, retVal); } static const char fakeSrc[] = "__kernel void func(void) { }"; TEST_F(clReleaseProgramTests, GivenRetainedProgramWhenReleasingProgramThenProgramIsReleasedAndProgramReferenceCountDecrementedCorrectly) { size_t srcLen = sizeof(fakeSrc); const char *src = fakeSrc; cl_int retVal; cl_uint theRef; cl_program prog = clCreateProgramWithSource(pContext, 1, &src, &srcLen, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clRetainProgram(prog); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetProgramInfo(prog, CL_PROGRAM_REFERENCE_COUNT, sizeof(cl_uint), &theRef, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2u, theRef); retVal = clReleaseProgram(prog); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetProgramInfo(prog, CL_PROGRAM_REFERENCE_COUNT, sizeof(cl_uint), &theRef, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, theRef); retVal = clReleaseProgram(prog); EXPECT_EQ(CL_SUCCESS, retVal); } compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_retain_mem_obj_tests.inl000066400000000000000000000026321422164147700277610ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/context/context.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clRetainMemObjectTests; namespace ULT { TEST_F(clRetainMemObjectTests, GivenValidParamsWhenRetainingMemObjectThenRefCountIsIncremented) { cl_mem_flags flags = CL_MEM_USE_HOST_PTR; static const unsigned int bufferSize = 16; cl_mem buffer = nullptr; cl_int retVal; cl_uint theRef; std::unique_ptr hostMem(new char[bufferSize]); memset(hostMem.get(), 0xaa, bufferSize); buffer = clCreateBuffer( pContext, flags, bufferSize, hostMem.get(), &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); retVal = clRetainMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetMemObjectInfo(buffer, CL_MEM_REFERENCE_COUNT, sizeof(cl_uint), &theRef, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2u, theRef); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetMemObjectInfo(buffer, CL_MEM_REFERENCE_COUNT, sizeof(cl_uint), &theRef, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, theRef); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_retain_release_command_queue_tests.inl000066400000000000000000000040561422164147700326750ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/unit_test_helper.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/test/unit_test/api/cl_api_tests.h" using namespace NEO; namespace ULT { class clRetainReleaseCommandQueueTests : public ApiFixture<>, public ::testing::Test { public: void SetUp() override { ApiFixture::SetUp(); } void TearDown() override { ApiFixture::TearDown(); } cl_command_queue createClQueue() { return clCreateCommandQueueWithProperties(pContext, testedClDevice, noProperties, &retVal); } protected: cl_queue_properties noProperties[5] = {0}; }; TEST_F(clRetainReleaseCommandQueueTests, GivenValidCommandQueueWhenRetainingAndReleasingThenReferenceCountIsUpdatedCorrectly) { auto queue = this->createClQueue(); ASSERT_EQ(CL_SUCCESS, this->retVal); auto qObject = castToObject(queue); ASSERT_NE(qObject, nullptr); cl_uint refCount; this->retVal = clGetCommandQueueInfo(queue, CL_QUEUE_REFERENCE_COUNT, sizeof(cl_uint), &refCount, NULL); EXPECT_EQ(CL_SUCCESS, this->retVal); EXPECT_EQ(1u, refCount); this->retVal = clRetainCommandQueue(queue); EXPECT_EQ(CL_SUCCESS, this->retVal); this->retVal = clGetCommandQueueInfo(queue, CL_QUEUE_REFERENCE_COUNT, sizeof(cl_uint), &refCount, NULL); EXPECT_EQ(CL_SUCCESS, this->retVal); EXPECT_EQ(2u, refCount); this->retVal = clReleaseCommandQueue(queue); EXPECT_EQ(CL_SUCCESS, this->retVal); this->retVal = clGetCommandQueueInfo(queue, CL_QUEUE_REFERENCE_COUNT, sizeof(cl_uint), &refCount, NULL); EXPECT_EQ(CL_SUCCESS, this->retVal); EXPECT_EQ(1u, refCount); this->retVal = clReleaseCommandQueue(queue); EXPECT_EQ(CL_SUCCESS, this->retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_retain_release_context_tests.inl000066400000000000000000000025531422164147700315370ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/platform/platform.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clRetainReleaseContextTests; namespace ULT { TEST_F(clRetainReleaseContextTests, GivenValidContextWhenRetainingAndReleasingThenContextReferenceCountIsUpdatedCorrectly) { cl_context context = clCreateContext(nullptr, 1, &testedClDevice, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); cl_uint theRef; retVal = clGetContextInfo(context, CL_CONTEXT_REFERENCE_COUNT, sizeof(cl_uint), &theRef, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, theRef); retVal = clRetainContext(context); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetContextInfo(context, CL_CONTEXT_REFERENCE_COUNT, sizeof(cl_uint), &theRef, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2u, theRef); retVal = clReleaseContext(context); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetContextInfo(context, CL_CONTEXT_REFERENCE_COUNT, sizeof(cl_uint), &theRef, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, theRef); retVal = clReleaseContext(context); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_retain_release_device_tests.inl000066400000000000000000000040431422164147700313060ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" using namespace NEO; struct clRetainReleaseDeviceTests : Test { void SetUp() override { DebugManager.flags.CreateMultipleRootDevices.set(maxRootDeviceCount); Test::SetUp(); } DebugManagerStateRestore restorer; const uint32_t rootDeviceIndex = 1u; }; namespace ULT { TEST_F(clRetainReleaseDeviceTests, GivenRootDeviceWhenRetainingThenReferenceCountIsOne) { cl_uint numEntries = maxRootDeviceCount; cl_device_id devices[maxRootDeviceCount]; auto retVal = clGetDeviceIDs(pPlatform, CL_DEVICE_TYPE_GPU, numEntries, devices, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clRetainDevice(devices[rootDeviceIndex]); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clRetainDevice(devices[rootDeviceIndex]); EXPECT_EQ(CL_SUCCESS, retVal); cl_uint theRef; retVal = clGetDeviceInfo(devices[rootDeviceIndex], CL_DEVICE_REFERENCE_COUNT, sizeof(cl_uint), &theRef, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, theRef); } TEST_F(clRetainReleaseDeviceTests, GivenRootDeviceWhenReleasingThenReferenceCountIsOne) { constexpr cl_uint numEntries = maxRootDeviceCount; cl_device_id devices[maxRootDeviceCount]; auto retVal = clGetDeviceIDs(pPlatform, CL_DEVICE_TYPE_GPU, numEntries, devices, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseDevice(devices[rootDeviceIndex]); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseDevice(devices[rootDeviceIndex]); EXPECT_EQ(CL_SUCCESS, retVal); cl_uint theRef; retVal = clGetDeviceInfo(devices[rootDeviceIndex], CL_DEVICE_REFERENCE_COUNT, sizeof(cl_uint), &theRef, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, theRef); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_retain_release_sampler_tests.inl000066400000000000000000000026351422164147700315170ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/context/context.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clRetainReleaseSamplerTests; namespace ULT { TEST_F(clRetainReleaseSamplerTests, GivenValidSamplerWhenRetainingThenSamplerReferenceCountIsIncremented) { cl_int retVal = CL_SUCCESS; auto sampler = clCreateSampler(pContext, CL_TRUE, CL_ADDRESS_CLAMP, CL_FILTER_NEAREST, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, sampler); cl_uint theRef; retVal = clGetSamplerInfo(sampler, CL_SAMPLER_REFERENCE_COUNT, sizeof(cl_uint), &theRef, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, theRef); retVal = clRetainSampler(sampler); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetSamplerInfo(sampler, CL_SAMPLER_REFERENCE_COUNT, sizeof(cl_uint), &theRef, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2u, theRef); retVal = clReleaseSampler(sampler); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetSamplerInfo(sampler, CL_SAMPLER_REFERENCE_COUNT, sizeof(cl_uint), &theRef, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, theRef); retVal = clReleaseSampler(sampler); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_set_context_destructor_callback.inl000066400000000000000000000022651422164147700322200ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/api/cl_api_tests.h" using namespace NEO; namespace ULT { TEST(clSetContextDestructorCallbackTest, givenNullptrContextWhenSettingContextDestructorCallbackThenInvalidContextErrorIsReturned) { auto retVal = clSetContextDestructorCallback(nullptr, nullptr, nullptr); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } using clSetContextDestructorCallbackTests = api_tests; TEST_F(clSetContextDestructorCallbackTests, givenPfnNotifyNullptrWhenSettingContextDestructorCallbackThenInvalidValueErrorIsReturned) { auto retVal = clSetContextDestructorCallback(pContext, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } void CL_CALLBACK callback(cl_context, void *){}; TEST_F(clSetContextDestructorCallbackTests, WhenSettingContextDestructorCallbackThenSucccessIsReturned) { auto retVal = clSetContextDestructorCallback(pContext, callback, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); auto userData = reinterpret_cast(0x4321); retVal = clSetContextDestructorCallback(pContext, callback, userData); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_set_event_callback_tests.inl000066400000000000000000000135611422164147700306220ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "opencl/source/event/event.h" #include "cl_api_tests.h" using namespace NEO; namespace ClSetEventCallbackTests { static int cbInvoked = 0; static void *cbData = nullptr; void CL_CALLBACK eventCallBack(cl_event event, cl_int callbackType, void *userData) { cbInvoked++; cbData = userData; } class clSetEventCallbackTests : public ApiFixture<>, public ::testing::Test { void SetUp() override { dbgRestore.reset(new DebugManagerStateRestore()); DebugManager.flags.EnableAsyncEventsHandler.set(false); ApiFixture::SetUp(); cbInvoked = 0; cbData = nullptr; } void TearDown() override { ApiFixture::TearDown(); } std::unique_ptr dbgRestore; }; TEST_F(clSetEventCallbackTests, GivenValidEventWhenSettingEventCallbackThenSuccessIsReturned) { std::unique_ptr event(new Event(nullptr, 0, 0, 0)); retVal = clSetEventCallback(event.get(), CL_COMPLETE, eventCallBack, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); event->decRefInternal(); } TEST_F(clSetEventCallbackTests, GivenInvalidEventWhenSettingEventCallbackThenInvalidEventErrorIsReturned) { std::unique_ptr event(new char[sizeof(Event)]); memset(event.get(), 0, sizeof(Event)); retVal = clSetEventCallback(reinterpret_cast(event.get()), CL_COMPLETE, eventCallBack, nullptr); EXPECT_EQ(CL_INVALID_EVENT, retVal); } TEST_F(clSetEventCallbackTests, GivenValidCallbackTypeWhenSettingEventCallbackThenSuccessIsReturned) { std::unique_ptr event(new Event(nullptr, 0, 0, 0)); retVal = clSetEventCallback(event.get(), CL_COMPLETE, eventCallBack, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); event->decRefInternal(); event.reset(new Event(nullptr, 0, 0, 0)); retVal = clSetEventCallback(event.get(), CL_RUNNING, eventCallBack, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); event->decRefInternal(); event.reset(new Event(nullptr, 0, 0, 0)); retVal = clSetEventCallback(event.get(), CL_SUBMITTED, eventCallBack, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clSetEventCallbackTests, GivenInvalidCallbackTypeWhenSettingEventCallbackThenInvalidValueErrorIsReturned) { std::unique_ptr event(new Event(nullptr, 0, 0, 0)); retVal = clSetEventCallback(event.get(), CL_COMPLETE + CL_RUNNING + CL_SUBMITTED, eventCallBack, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clSetEventCallbackTests, GivenNullCallbackWhenSettingEventCallbackThenInvalidValueErrorIsReturned) { std::unique_ptr event(new Event(nullptr, 0, 0, 0)); retVal = clSetEventCallback(event.get(), CL_COMPLETE, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clSetEventCallbackTests, GivenMultipleCallbacksWhenSettingEventCallbackThenSuccessIsReturned) { std::unique_ptr event(new Event(nullptr, 0, 0, 0)); retVal = clSetEventCallback(event.get(), CL_COMPLETE, eventCallBack, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetEventCallback(event.get(), CL_RUNNING, eventCallBack, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetEventCallback(event.get(), CL_SUBMITTED, eventCallBack, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); event->decRefInternal(); event->decRefInternal(); } TEST_F(clSetEventCallbackTests, GivenValidCallbackWhenStatusIsSetToCompleteThenCallbackWasInvokedOnce) { std::unique_ptr event(new Event(nullptr, 0, 0, 0)); retVal = clSetEventCallback(event.get(), CL_COMPLETE, eventCallBack, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); event->setStatus(CL_COMPLETE); EXPECT_EQ(cbInvoked, 1); } TEST_F(clSetEventCallbackTests, GivenThreeCallbacksWhenStatusIsSetToCompleteThenCallbackWasInvokedThreeTimes) { std::unique_ptr event(new Event(nullptr, 0, 0, 0)); retVal = clSetEventCallback(event.get(), CL_COMPLETE, eventCallBack, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetEventCallback(event.get(), CL_RUNNING, eventCallBack, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetEventCallback(event.get(), CL_SUBMITTED, eventCallBack, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); event->setStatus(CL_COMPLETE); EXPECT_EQ(cbInvoked, 3); } TEST_F(clSetEventCallbackTests, GivenValidCallbackWhenStatusIsSetToCompleteMultipleTimesThenCallbackWasInvokedOnce) { std::unique_ptr event(new Event(nullptr, 0, 0, 0)); retVal = clSetEventCallback(event.get(), CL_COMPLETE, eventCallBack, nullptr); event->setStatus(CL_COMPLETE); event->setStatus(CL_COMPLETE); event->setStatus(CL_COMPLETE); EXPECT_EQ(cbInvoked, 1); } TEST_F(clSetEventCallbackTests, GivenThreeCallbacksWhenStatusIsSetToCompleteMultipleTimesThenCallbackWasInvokedThreeTimes) { std::unique_ptr event(new Event(nullptr, 0, 0, 0)); retVal = clSetEventCallback(event.get(), CL_COMPLETE, eventCallBack, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetEventCallback(event.get(), CL_RUNNING, eventCallBack, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetEventCallback(event.get(), CL_SUBMITTED, eventCallBack, nullptr); event->setStatus(CL_SUBMITTED); event->setStatus(CL_RUNNING); event->setStatus(CL_COMPLETE); event->setStatus(CL_COMPLETE); event->setStatus(CL_COMPLETE); EXPECT_EQ(cbInvoked, 3); } TEST_F(clSetEventCallbackTests, GivenUserDataWhenStatusIsSetToCompleteThenCallbackWasInvokedOnce) { std::unique_ptr event(new Event(nullptr, 0, 0, 0)); int data = 1; retVal = clSetEventCallback(event.get(), CL_COMPLETE, eventCallBack, &data); EXPECT_EQ(CL_SUCCESS, retVal); event->setStatus(CL_COMPLETE); EXPECT_EQ(cbInvoked, 1); EXPECT_EQ(&data, cbData); } } // namespace ClSetEventCallbackTests compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_set_kernel_arg_svm_pointer_tests.inl000066400000000000000000000344561422164147700324310ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_svm_manager.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" #include "cl_api_tests.h" using namespace NEO; class KernelArgSvmFixture : public ApiFixture<> { protected: void SetUp() override { ApiFixture::SetUp(); REQUIRE_SVM_OR_SKIP(defaultHwInfo); pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; pKernelInfo->heapInfo.SurfaceStateHeapSize = sizeof(pSshLocal); pKernelInfo->heapInfo.pSsh = pSshLocal; pKernelInfo->addArgBuffer(0, 0x30, sizeof(void *)); pKernelInfo->setAddressQualifier(0, KernelArgMetadata::AddrGlobal); pMockMultiDeviceKernel = MultiDeviceKernel::create(pProgram, MockKernel::toKernelInfoContainer(*pKernelInfo, testedRootDeviceIndex), nullptr); pMockKernel = static_cast(pMockMultiDeviceKernel->getKernel(testedRootDeviceIndex)); ASSERT_NE(nullptr, pMockKernel); pMockKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData)); } void TearDown() override { if (pMockMultiDeviceKernel) { delete pMockMultiDeviceKernel; } ApiFixture::TearDown(); } cl_int retVal = CL_SUCCESS; MockKernel *pMockKernel = nullptr; MultiDeviceKernel *pMockMultiDeviceKernel = nullptr; std::unique_ptr pKernelInfo; char pSshLocal[64]{}; char pCrossThreadData[64]{}; }; typedef Test clSetKernelArgSVMPointerTests; namespace ULT { TEST_F(clSetKernelArgSVMPointerTests, GivenNullKernelWhenSettingKernelArgThenInvalidKernelErrorIsReturned) { auto retVal = clSetKernelArgSVMPointer( nullptr, // cl_kernel kernel 0, // cl_uint arg_index nullptr // const void *arg_value ); EXPECT_EQ(CL_INVALID_KERNEL, retVal); } TEST_F(clSetKernelArgSVMPointerTests, GivenInvalidArgIndexWhenSettingKernelArgThenInvalidArgIndexErrorIsReturned) { auto retVal = clSetKernelArgSVMPointer( pMockMultiDeviceKernel, // cl_kernel kernel (cl_uint)-1, // cl_uint arg_index nullptr // const void *arg_value ); EXPECT_EQ(CL_INVALID_ARG_INDEX, retVal); } TEST_F(clSetKernelArgSVMPointerTests, GivenDeviceNotSupportingSvmWhenSettingKernelArgSVMPointerThenInvalidOperationErrorIsReturned) { auto hwInfo = executionEnvironment->rootDeviceEnvironments[ApiFixture::testedRootDeviceIndex]->getMutableHardwareInfo(); hwInfo->capabilityTable.ftrSvm = false; std::unique_ptr pMultiDeviceKernel( MultiDeviceKernel::create(pProgram, MockKernel::toKernelInfoContainer(*pKernelInfo, testedRootDeviceIndex), nullptr)); auto retVal = clSetKernelArgSVMPointer( pMultiDeviceKernel.get(), // cl_kernel kernel 0, // cl_uint arg_index nullptr // const void *arg_value ); EXPECT_EQ(CL_INVALID_OPERATION, retVal); } TEST_F(clSetKernelArgSVMPointerTests, GivenLocalAddressAndNullArgValueWhenSettingKernelArgThenInvalidArgValueErrorIsReturned) { pKernelInfo->setAddressQualifier(0, KernelArgMetadata::AddrLocal); auto retVal = clSetKernelArgSVMPointer( pMockMultiDeviceKernel, // cl_kernel kernel 0, // cl_uint arg_index nullptr // const void *arg_value ); EXPECT_EQ(CL_INVALID_ARG_VALUE, retVal); } TEST_F(clSetKernelArgSVMPointerTests, GivenInvalidArgValueWhenSettingKernelArgThenInvalidArgValueErrorIsReturned) { pDevice->deviceInfo.sharedSystemMemCapabilities = 0u; pDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.sharedSystemMemCapabilities = 0; void *ptrHost = malloc(256); EXPECT_NE(nullptr, ptrHost); auto retVal = clSetKernelArgSVMPointer( pMockMultiDeviceKernel, // cl_kernel kernel 0, // cl_uint arg_index ptrHost // const void *arg_value ); EXPECT_EQ(CL_INVALID_ARG_VALUE, retVal); free(ptrHost); } TEST_F(clSetKernelArgSVMPointerTests, GivenSvmAndNullArgValueWhenSettingKernelArgThenSuccessIsReturned) { const ClDeviceInfo &devInfo = pDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { auto retVal = clSetKernelArgSVMPointer( pMockMultiDeviceKernel, // cl_kernel kernel 0, // cl_uint arg_index nullptr // const void *arg_value ); EXPECT_EQ(CL_SUCCESS, retVal); } } TEST_F(clSetKernelArgSVMPointerTests, GivenSvmAndValidArgValueWhenSettingKernelArgThenSuccessIsReturned) { const ClDeviceInfo &devInfo = pDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); EXPECT_NE(nullptr, ptrSvm); auto retVal = clSetKernelArgSVMPointer( pMockMultiDeviceKernel, // cl_kernel kernel 0, // cl_uint arg_index ptrSvm // const void *arg_value ); EXPECT_EQ(CL_SUCCESS, retVal); clSVMFree(pContext, ptrSvm); } } TEST_F(clSetKernelArgSVMPointerTests, GivenSvmAndConstantAddressWhenSettingKernelArgThenSuccessIsReturned) { const ClDeviceInfo &devInfo = pDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); EXPECT_NE(nullptr, ptrSvm); pKernelInfo->setAddressQualifier(0, KernelArgMetadata::AddrConstant); auto retVal = clSetKernelArgSVMPointer( pMockMultiDeviceKernel, // cl_kernel kernel 0, // cl_uint arg_index ptrSvm // const void *arg_value ); EXPECT_EQ(CL_SUCCESS, retVal); clSVMFree(pContext, ptrSvm); } } TEST_F(clSetKernelArgSVMPointerTests, GivenSvmAndPointerWithOffsetWhenSettingKernelArgThenSuccessIsReturned) { const ClDeviceInfo &devInfo = pDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); size_t offset = 256 / 2; EXPECT_NE(nullptr, ptrSvm); auto retVal = clSetKernelArgSVMPointer( pMockMultiDeviceKernel, // cl_kernel kernel 0, // cl_uint arg_index (char *)ptrSvm + offset // const void *arg_value ); EXPECT_EQ(CL_SUCCESS, retVal); clSVMFree(pContext, ptrSvm); } } TEST_F(clSetKernelArgSVMPointerTests, GivenSvmAndPointerWithInvalidOffsetWhenSettingKernelArgThenInvalidArgValueErrorIsReturned) { pDevice->deviceInfo.sharedSystemMemCapabilities = 0u; pDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.sharedSystemMemCapabilities = 0; const ClDeviceInfo &devInfo = pDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); auto svmData = pContext->getSVMAllocsManager()->getSVMAlloc(ptrSvm); ASSERT_NE(nullptr, svmData); auto svmAlloc = svmData->gpuAllocations.getGraphicsAllocation(pContext->getDevice(0)->getRootDeviceIndex()); EXPECT_NE(nullptr, svmAlloc); size_t offset = svmAlloc->getUnderlyingBufferSize() + 1; auto retVal = clSetKernelArgSVMPointer( pMockMultiDeviceKernel, // cl_kernel kernel 0, // cl_uint arg_index (char *)ptrSvm + offset // const void *arg_value ); EXPECT_EQ(CL_INVALID_ARG_VALUE, retVal); clSVMFree(pContext, ptrSvm); } } TEST_F(clSetKernelArgSVMPointerTests, GivenSvmAndValidArgValueWhenSettingSameKernelArgThenSetArgSvmAllocCalledOnlyWhenNeeded) { const ClDeviceInfo &devInfo = pDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { auto mockSvmManager = reinterpret_cast(pMockKernel->getContext().getSVMAllocsManager()); EXPECT_EQ(0u, pMockKernel->setArgSvmAllocCalls); void *const ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); EXPECT_NE(nullptr, ptrSvm); auto callCounter = 0u; // first set arg - called auto retVal = clSetKernelArgSVMPointer( pMockMultiDeviceKernel, // cl_kernel kernel 0, // cl_uint arg_index ptrSvm // const void *arg_value ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(++callCounter, pMockKernel->setArgSvmAllocCalls); // same values but allocationsCounter == 0 - called retVal = clSetKernelArgSVMPointer( pMockMultiDeviceKernel, // cl_kernel kernel 0, // cl_uint arg_index ptrSvm // const void *arg_value ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(++callCounter, pMockKernel->setArgSvmAllocCalls); ++mockSvmManager->allocationsCounter; // same values - not called retVal = clSetKernelArgSVMPointer( pMockMultiDeviceKernel, // cl_kernel kernel 0, // cl_uint arg_index ptrSvm // const void *arg_value ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(callCounter, pMockKernel->setArgSvmAllocCalls); // same values and allocationsCounter - not called retVal = clSetKernelArgSVMPointer( pMockMultiDeviceKernel, // cl_kernel kernel 0, // cl_uint arg_index ptrSvm // const void *arg_value ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(callCounter, pMockKernel->setArgSvmAllocCalls); ++mockSvmManager->allocationsCounter; // different pointer - called void *const nextPtrSvm = static_cast(ptrSvm) + 1; retVal = clSetKernelArgSVMPointer( pMockMultiDeviceKernel, // cl_kernel kernel 0, // cl_uint arg_index nextPtrSvm // const void *arg_value ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(++callCounter, pMockKernel->setArgSvmAllocCalls); ++mockSvmManager->allocationsCounter; // different allocId - called pMockKernel->kernelArguments[0].allocId = 1; retVal = clSetKernelArgSVMPointer( pMockMultiDeviceKernel, // cl_kernel kernel 0, // cl_uint arg_index nextPtrSvm // const void *arg_value ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(++callCounter, pMockKernel->setArgSvmAllocCalls); ++mockSvmManager->allocationsCounter; // allocId = 0 - called pMockKernel->kernelArguments[0].allocId = 0; retVal = clSetKernelArgSVMPointer( pMockMultiDeviceKernel, // cl_kernel kernel 0, // cl_uint arg_index nextPtrSvm // const void *arg_value ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(++callCounter, pMockKernel->setArgSvmAllocCalls); ++mockSvmManager->allocationsCounter; // same values - not called retVal = clSetKernelArgSVMPointer( pMockMultiDeviceKernel, // cl_kernel kernel 0, // cl_uint arg_index nextPtrSvm // const void *arg_value ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(callCounter, pMockKernel->setArgSvmAllocCalls); ++mockSvmManager->allocationsCounter; DebugManagerStateRestore stateRestorer; DebugManager.flags.EnableSharedSystemUsmSupport.set(1); mockSvmManager->freeSVMAlloc(nextPtrSvm); // same values but no svmData - called retVal = clSetKernelArgSVMPointer( pMockMultiDeviceKernel, // cl_kernel kernel 0, // cl_uint arg_index nextPtrSvm // const void *arg_value ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(++callCounter, pMockKernel->setArgSvmAllocCalls); clSVMFree(pContext, ptrSvm); } } TEST_F(clSetKernelArgSVMPointerTests, GivenSvmAndValidArgValueWhenAllocIdCacheHitThenAllocIdMemoryManagerCounterIsUpdated) { const ClDeviceInfo &devInfo = pDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { auto mockSvmManager = reinterpret_cast(pMockKernel->getContext().getSVMAllocsManager()); EXPECT_EQ(0u, pMockKernel->setArgSvmAllocCalls); void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); EXPECT_NE(nullptr, ptrSvm); auto callCounter = 0u; // first set arg - called auto retVal = clSetKernelArgSVMPointer( pMockMultiDeviceKernel, // cl_kernel kernel 0, // cl_uint arg_index ptrSvm // const void *arg_value ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(++callCounter, pMockKernel->setArgSvmAllocCalls); EXPECT_EQ(0u, mockSvmManager->allocationsCounter); EXPECT_EQ(mockSvmManager->allocationsCounter, pMockKernel->getKernelArguments()[0].allocIdMemoryManagerCounter); ++mockSvmManager->allocationsCounter; // second set arg - cache hit on same allocId, updates allocIdMemoryManagerCounter retVal = clSetKernelArgSVMPointer( pMockMultiDeviceKernel, // cl_kernel kernel 0, // cl_uint arg_index ptrSvm // const void *arg_value ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(callCounter, pMockKernel->setArgSvmAllocCalls); EXPECT_EQ(1u, mockSvmManager->allocationsCounter); EXPECT_EQ(mockSvmManager->allocationsCounter, pMockKernel->getKernelArguments()[0].allocIdMemoryManagerCounter); clSVMFree(pContext, ptrSvm); } } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_set_kernel_exec_info_tests.inl000066400000000000000000000432651422164147700311700ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/test/unit_test/command_stream/thread_arbitration_policy_helper.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "cl_api_tests.h" using namespace NEO; class KernelExecInfoFixture : public ApiFixture<> { protected: void SetUp() override { ApiFixture::SetUp(); pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; pMockMultiDeviceKernel = MultiDeviceKernel::create(pProgram, MockKernel::toKernelInfoContainer(*pKernelInfo, testedRootDeviceIndex), nullptr); pMockKernel = static_cast(pMockMultiDeviceKernel->getKernel(testedRootDeviceIndex)); ASSERT_NE(nullptr, pMockKernel); svmCapabilities = pDevice->getDeviceInfo().svmCapabilities; if (svmCapabilities != 0) { ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); EXPECT_NE(nullptr, ptrSvm); } } void TearDown() override { if (svmCapabilities != 0) { clSVMFree(pContext, ptrSvm); } if (pMockMultiDeviceKernel) { delete pMockMultiDeviceKernel; } ApiFixture::TearDown(); } cl_int retVal = CL_SUCCESS; MockKernel *pMockKernel = nullptr; MultiDeviceKernel *pMockMultiDeviceKernel = nullptr; std::unique_ptr pKernelInfo; void *ptrSvm = nullptr; cl_device_svm_capabilities svmCapabilities = 0; }; typedef Test clSetKernelExecInfoTests; namespace ULT { TEST_F(clSetKernelExecInfoTests, GivenNullKernelWhenSettingAdditionalKernelInfoThenInvalidKernelErrorIsReturned) { retVal = clSetKernelExecInfo( nullptr, // cl_kernel kernel CL_KERNEL_EXEC_INFO_SVM_PTRS, // cl_kernel_exec_info param_name 0, // size_t param_value_size nullptr // const void *param_value ); EXPECT_EQ(CL_INVALID_KERNEL, retVal); } TEST_F(clSetKernelExecInfoTests, GivenDeviceNotSupportingSvmWhenSettingKernelExecInfoThenErrorIsReturnedOnSvmRelatedParams) { auto &hwHelper = NEO::ClHwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily); if (!hwHelper.isSupportedKernelThreadArbitrationPolicy()) { GTEST_SKIP(); } auto hwInfo = executionEnvironment->rootDeviceEnvironments[ApiFixture::testedRootDeviceIndex]->getMutableHardwareInfo(); VariableBackup ftrSvm{&hwInfo->capabilityTable.ftrSvm, false}; std::unique_ptr pMultiDeviceKernel(MultiDeviceKernel::create( pProgram, MockKernel::toKernelInfoContainer(*pKernelInfo, testedRootDeviceIndex), nullptr)); uint32_t newPolicy = CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_ROUND_ROBIN_INTEL; retVal = clSetKernelExecInfo( pMockMultiDeviceKernel, // cl_kernel kernel CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_INTEL, // cl_kernel_exec_info param_name sizeof(newPolicy), // size_t param_value_size &newPolicy // const void *param_value ); EXPECT_EQ(CL_SUCCESS, retVal); cl_kernel_exec_info svmParams[] = {CL_KERNEL_EXEC_INFO_SVM_PTRS, CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM}; for (auto svmParam : svmParams) { retVal = clSetKernelExecInfo( pMockMultiDeviceKernel, // cl_kernel kernel svmParam, // cl_kernel_exec_info param_name 0, // size_t param_value_size nullptr // const void *param_value ); EXPECT_EQ(CL_INVALID_OPERATION, retVal); } } TEST_F(clSetKernelExecInfoTests, GivenNullParamValueWhenSettingAdditionalKernelInfoThenInvalidValueErrorIsReturned) { REQUIRE_SVM_OR_SKIP(defaultHwInfo); void **pSvmPtrList = nullptr; size_t SvmPtrListSizeInBytes = 1 * sizeof(void *); retVal = clSetKernelExecInfo( pMockMultiDeviceKernel, // cl_kernel kernel CL_KERNEL_EXEC_INFO_SVM_PTRS, // cl_kernel_exec_info param_name SvmPtrListSizeInBytes, // size_t param_value_size pSvmPtrList // const void *param_value ); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clSetKernelExecInfoTests, GivenNullPointerInParamValueWhenSettingAdditionalKernelInfoThenInvalidValueErrorIsReturned) { REQUIRE_SVM_OR_SKIP(defaultHwInfo); void *pSvmPtrList[] = {nullptr}; size_t SvmPtrListSizeInBytes = 1 * sizeof(void *); retVal = clSetKernelExecInfo( pMockMultiDeviceKernel, // cl_kernel kernel CL_KERNEL_EXEC_INFO_SVM_PTRS, // cl_kernel_exec_info param_name SvmPtrListSizeInBytes, // size_t param_value_size pSvmPtrList // const void *param_value ); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clSetKernelExecInfoTests, GivenParamSizeZeroWhenSettingAdditionalKernelInfoThenInvalidValueErrorIsReturned) { REQUIRE_SVM_OR_SKIP(defaultHwInfo); void *pSvmPtrList[] = {ptrSvm}; size_t SvmPtrListSizeInBytes = 0; retVal = clSetKernelExecInfo( pMockMultiDeviceKernel, // cl_kernel kernel CL_KERNEL_EXEC_INFO_SVM_PTRS, // cl_kernel_exec_info param_name SvmPtrListSizeInBytes, // size_t param_value_size pSvmPtrList // const void *param_value ); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clSetKernelExecInfoTests, GivenInvalidParamSizeWhenSettingAdditionalKernelInfoThenInvalidValueErrorIsReturned) { REQUIRE_SVM_OR_SKIP(defaultHwInfo); void *pSvmPtrList[] = {ptrSvm}; size_t SvmPtrListSizeInBytes = (size_t)(-1); retVal = clSetKernelExecInfo( pMockMultiDeviceKernel, // cl_kernel kernel CL_KERNEL_EXEC_INFO_SVM_PTRS, // cl_kernel_exec_info param_name SvmPtrListSizeInBytes, // size_t param_value_size pSvmPtrList // const void *param_value ); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clSetKernelExecInfoTests, GivenInvalidParamNameWhenSettingAdditionalKernelInfoThenInvalidValueErrorIsReturned) { void *pSvmPtrList[] = {ptrSvm}; size_t SvmPtrListSizeInBytes = 1 * sizeof(void *); retVal = clSetKernelExecInfo( pMockMultiDeviceKernel, // cl_kernel kernel 0, // cl_kernel_exec_info param_name SvmPtrListSizeInBytes, // size_t param_value_size pSvmPtrList // const void *param_value ); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clSetKernelExecInfoTests, GivenInvalidOperationWhenSettingAdditionalKernelInfoThenInvalidOperationErrorIsReturned) { void *pSvmPtrList[] = {ptrSvm}; size_t SvmPtrListSizeInBytes = 1 * sizeof(void *); retVal = clSetKernelExecInfo( pMockMultiDeviceKernel, // cl_kernel kernel CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM, // cl_kernel_exec_info param_name SvmPtrListSizeInBytes, // size_t param_value_size pSvmPtrList // const void *param_value ); EXPECT_EQ(CL_INVALID_OPERATION, retVal); } TEST_F(clSetKernelExecInfoTests, GivenValidPointerListWithOnePointerWhenSettingAdditionalKernelInfoThenSuccessIsReturned) { if (svmCapabilities != 0) { void *pSvmPtrList[] = {ptrSvm}; size_t SvmPtrListSizeInBytes = 1 * sizeof(void *); retVal = clSetKernelExecInfo( pMockMultiDeviceKernel, // cl_kernel kernel CL_KERNEL_EXEC_INFO_SVM_PTRS, // cl_kernel_exec_info param_name SvmPtrListSizeInBytes, // size_t param_value_size pSvmPtrList // const void *param_value ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, pMockKernel->kernelSvmGfxAllocations.size()); } } TEST_F(clSetKernelExecInfoTests, GivenValidPointerListWithMultiplePointersWhenSettingAdditionalKernelInfoThenSuccessIsReturned) { if (svmCapabilities != 0) { void *ptrSvm1 = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); EXPECT_NE(nullptr, ptrSvm1); void *ptrSvm2 = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); EXPECT_NE(nullptr, ptrSvm2); void *pSvmPtrList[] = {ptrSvm, ptrSvm1, ptrSvm2}; size_t SvmPtrListSizeInBytes = 3 * sizeof(void *); retVal = clSetKernelExecInfo( pMockMultiDeviceKernel, // cl_kernel kernel CL_KERNEL_EXEC_INFO_SVM_PTRS, // cl_kernel_exec_info param_name SvmPtrListSizeInBytes, // size_t param_value_size pSvmPtrList // const void *param_value ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(3u, pMockKernel->kernelSvmGfxAllocations.size()); EXPECT_TRUE(pMockKernel->svmAllocationsRequireCacheFlush); clSVMFree(pContext, ptrSvm1); clSVMFree(pContext, ptrSvm2); } } TEST_F(clSetKernelExecInfoTests, givenReadOnlySvmPtrListWhenUsedAsKernelPointersThenCacheFlushIsNotRequired) { if (svmCapabilities != 0) { void *ptrSvm1 = clSVMAlloc(pContext, CL_MEM_READ_ONLY, 256, 4); EXPECT_NE(nullptr, ptrSvm1); void *ptrSvm2 = clSVMAlloc(pContext, CL_MEM_READ_ONLY, 256, 4); EXPECT_NE(nullptr, ptrSvm2); void *pSvmPtrList[] = {ptrSvm1, ptrSvm2}; size_t SvmPtrListSizeInBytes = 2 * sizeof(void *); retVal = clSetKernelExecInfo( pMockMultiDeviceKernel, // cl_kernel kernel CL_KERNEL_EXEC_INFO_SVM_PTRS, // cl_kernel_exec_info param_name SvmPtrListSizeInBytes, // size_t param_value_size pSvmPtrList // const void *param_value ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2u, pMockKernel->kernelSvmGfxAllocations.size()); EXPECT_FALSE(pMockKernel->svmAllocationsRequireCacheFlush); clSVMFree(pContext, ptrSvm1); clSVMFree(pContext, ptrSvm2); } } TEST_F(clSetKernelExecInfoTests, GivenMultipleSettingKernelInfoOperationsWhenSettingAdditionalKernelInfoThenSuccessIsReturned) { if (svmCapabilities != 0) { void *pSvmPtrList[] = {ptrSvm}; size_t SvmPtrListSizeInBytes = 1 * sizeof(void *); retVal = clSetKernelExecInfo( pMockMultiDeviceKernel, // cl_kernel kernel CL_KERNEL_EXEC_INFO_SVM_PTRS, // cl_kernel_exec_info param_name SvmPtrListSizeInBytes, // size_t param_value_size pSvmPtrList // const void *param_value ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, pMockKernel->kernelSvmGfxAllocations.size()); retVal = clSetKernelExecInfo( pMockMultiDeviceKernel, // cl_kernel kernel CL_KERNEL_EXEC_INFO_SVM_PTRS, // cl_kernel_exec_info param_name SvmPtrListSizeInBytes, // size_t param_value_size pSvmPtrList // const void *param_value ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, pMockKernel->kernelSvmGfxAllocations.size()); } } TEST_F(clSetKernelExecInfoTests, givenNonExistingParamNameWithValuesWhenSettingAdditionalKernelInfoThenInvalidValueIsReturned) { uint32_t paramName = 1234u; size_t size = sizeof(cl_bool); retVal = clSetKernelExecInfo(pMockMultiDeviceKernel, paramName, size, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); size = 2 * sizeof(cl_bool); cl_bool paramValue = CL_TRUE; retVal = clSetKernelExecInfo(pMockMultiDeviceKernel, paramName, size, ¶mValue); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clSetKernelExecInfo(pMockMultiDeviceKernel, paramName, size, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); size = sizeof(cl_bool); paramValue = CL_FALSE; retVal = clSetKernelExecInfo(pMockMultiDeviceKernel, paramName, size, ¶mValue); EXPECT_EQ(CL_INVALID_VALUE, retVal); paramValue = CL_TRUE; retVal = clSetKernelExecInfo(pMockMultiDeviceKernel, paramName, size, ¶mValue); EXPECT_EQ(CL_INVALID_VALUE, retVal); } HWTEST_F(clSetKernelExecInfoTests, givenKernelExecInfoThreadArbitrationPolicyWhenSettingAdditionalKernelInfoThenSuccessIsReturned) { auto &hwHelper = NEO::ClHwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily); if (!hwHelper.isSupportedKernelThreadArbitrationPolicy()) { GTEST_SKIP(); } uint32_t newThreadArbitrationPolicy = CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_ROUND_ROBIN_INTEL; size_t ptrSizeInBytes = sizeof(uint32_t *); retVal = clSetKernelExecInfo( pMockMultiDeviceKernel, // cl_kernel kernel CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_INTEL, // cl_kernel_exec_info param_name ptrSizeInBytes, // size_t param_value_size &newThreadArbitrationPolicy // const void *param_value ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(getNewKernelArbitrationPolicy(newThreadArbitrationPolicy), pMockKernel->threadArbitrationPolicy); EXPECT_EQ(getNewKernelArbitrationPolicy(newThreadArbitrationPolicy), pMockKernel->getThreadArbitrationPolicy()); } HWTEST_F(clSetKernelExecInfoTests, givenKernelExecInfoThreadArbitrationPolicyWhenNotSupportedAndSettingAdditionalKernelInfoThenClInvalidDeviceIsReturned) { auto &hwHelper = NEO::ClHwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily); if (hwHelper.isSupportedKernelThreadArbitrationPolicy()) { GTEST_SKIP(); } uint32_t newThreadArbitrationPolicy = CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_ROUND_ROBIN_INTEL; size_t ptrSizeInBytes = sizeof(uint32_t *); retVal = clSetKernelExecInfo( pMockMultiDeviceKernel, // cl_kernel kernel CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_INTEL, // cl_kernel_exec_info param_name ptrSizeInBytes, // size_t param_value_size &newThreadArbitrationPolicy // const void *param_value ); EXPECT_EQ(CL_INVALID_DEVICE, retVal); } HWTEST_F(clSetKernelExecInfoTests, givenInvalidThreadArbitrationPolicyWhenSettingAdditionalKernelInfoThenClInvalidValueIsReturned) { auto &hwHelper = NEO::ClHwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily); if (!hwHelper.isSupportedKernelThreadArbitrationPolicy()) { GTEST_SKIP(); } uint32_t invalidThreadArbitrationPolicy = 0; size_t ptrSizeInBytes = 1 * sizeof(uint32_t *); retVal = clSetKernelExecInfo( pMockMultiDeviceKernel, // cl_kernel kernel CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_INTEL, // cl_kernel_exec_info param_name ptrSizeInBytes, // size_t param_value_size &invalidThreadArbitrationPolicy // const void *param_value ); EXPECT_EQ(CL_INVALID_VALUE, retVal); } HWTEST_F(clSetKernelExecInfoTests, givenInvalidParamSizeWhenSettingKernelExecutionTypeThenClInvalidValueErrorIsReturned) { cl_execution_info_kernel_type_intel kernelExecutionType = 0; retVal = clSetKernelExecInfo( pMockMultiDeviceKernel, // cl_kernel kernel CL_KERNEL_EXEC_INFO_KERNEL_TYPE_INTEL, // cl_kernel_exec_info param_name sizeof(cl_execution_info_kernel_type_intel) - 1, // size_t param_value_size &kernelExecutionType // const void *param_value ); EXPECT_EQ(CL_INVALID_VALUE, retVal); } HWTEST_F(clSetKernelExecInfoTests, givenInvalidParamValueWhenSettingKernelExecutionTypeThenClInvalidValueErrorIsReturned) { retVal = clSetKernelExecInfo( pMockMultiDeviceKernel, // cl_kernel kernel CL_KERNEL_EXEC_INFO_KERNEL_TYPE_INTEL, // cl_kernel_exec_info param_name sizeof(cl_execution_info_kernel_type_intel), // size_t param_value_size nullptr // const void *param_value ); EXPECT_EQ(CL_INVALID_VALUE, retVal); } HWTEST_F(clSetKernelExecInfoTests, givenDifferentExecutionTypesWhenSettingAdditionalKernelInfoThenCorrectValuesAreSet) { cl_kernel_exec_info paramName = CL_KERNEL_EXEC_INFO_KERNEL_TYPE_INTEL; size_t paramSize = sizeof(cl_execution_info_kernel_type_intel); cl_execution_info_kernel_type_intel kernelExecutionType = -1; retVal = clSetKernelExecInfo( pMockMultiDeviceKernel, // cl_kernel kernel paramName, // cl_kernel_exec_info param_name paramSize, // size_t param_value_size &kernelExecutionType // const void *param_value ); EXPECT_EQ(CL_INVALID_VALUE, retVal); kernelExecutionType = CL_KERNEL_EXEC_INFO_DEFAULT_TYPE_INTEL; retVal = clSetKernelExecInfo( pMockMultiDeviceKernel, // cl_kernel kernel paramName, // cl_kernel_exec_info param_name paramSize, // size_t param_value_size &kernelExecutionType // const void *param_value ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(KernelExecutionType::Default, pMockKernel->executionType); kernelExecutionType = CL_KERNEL_EXEC_INFO_CONCURRENT_TYPE_INTEL; retVal = clSetKernelExecInfo( pMockMultiDeviceKernel, // cl_kernel kernel paramName, // cl_kernel_exec_info param_name paramSize, // size_t param_value_size &kernelExecutionType // const void *param_value ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(KernelExecutionType::Concurrent, pMockKernel->executionType); } } // namespace ULT cl_set_mem_object_destructor_callback_tests.inl000066400000000000000000000071631422164147700340050ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/api/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/context/context.h" #include "opencl/source/mem_obj/image.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clCreateBufferTests; namespace ULT { static int cbInvoked = 0; void CL_CALLBACK destructorCallback(cl_mem memObj, void *userData) { cbInvoked++; } struct clSetMemObjectDestructorCallbackTests : public ApiFixture<>, public ::testing::Test { void SetUp() override { ApiFixture::SetUp(); // clang-format off imageFormat.image_channel_order = CL_RGBA; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 32; imageDesc.image_height = 32; imageDesc.image_depth = 1; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = nullptr; // clang-format on cbInvoked = 0; } void TearDown() override { ApiFixture::TearDown(); } cl_image_format imageFormat; cl_image_desc imageDesc; }; TEST_F(clSetMemObjectDestructorCallbackTests, GivenNullMemObjWhenSettingMemObjCallbackThenInvalidMemObjectErrorIsReturned) { retVal = clSetMemObjectDestructorCallback(nullptr, nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); EXPECT_EQ(0, cbInvoked); } TEST_F(clSetMemObjectDestructorCallbackTests, GivenImageAndDestructorCallbackWhenSettingMemObjCallbackThenSuccessIsReturned) { auto image = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_WRITE, 0, &imageFormat, &imageDesc, nullptr, retVal); retVal = clSetMemObjectDestructorCallback(image, destructorCallback, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1, cbInvoked); } TEST_F(clSetMemObjectDestructorCallbackTests, GivenImageAndNullCallbackFunctionWhenSettingMemObjCallbackThenInvalidValueErrorIsReturned) { auto image = Image::validateAndCreateImage(pContext, nullptr, CL_MEM_READ_WRITE, 0, &imageFormat, &imageDesc, nullptr, retVal); retVal = clSetMemObjectDestructorCallback(image, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0, cbInvoked); } TEST_F(clSetMemObjectDestructorCallbackTests, GivenBufferAndDestructorCallbackFunctionWhenSettingMemObjCallbackThenSuccessIsReturned) { auto buffer = clCreateBuffer(pContext, CL_MEM_READ_WRITE, 42, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); retVal = clSetMemObjectDestructorCallback(buffer, destructorCallback, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1, cbInvoked); } TEST_F(clSetMemObjectDestructorCallbackTests, GivenBufferAndNullCallbackFunctionWhenSettingMemObjCallbackThenInvalidValueErrorIsReturned) { auto buffer = clCreateBuffer(pContext, CL_MEM_READ_WRITE, 42, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); cbInvoked = 0; retVal = clSetMemObjectDestructorCallback(buffer, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0, cbInvoked); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_set_performance_configuration_tests.inl000066400000000000000000000021751422164147700331140ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/device_instrumentation_fixture.h" #include "opencl/test/unit_test/os_interface/mock_performance_counters.h" #include "cl_api_tests.h" using namespace NEO; struct clSetPerformanceConfigurationINTELTests : public DeviceInstrumentationFixture, public PerformanceCountersDeviceFixture, ::testing::Test { void SetUp() override { PerformanceCountersDeviceFixture::SetUp(); DeviceInstrumentationFixture::SetUp(true); } void TearDown() override { PerformanceCountersDeviceFixture::TearDown(); } }; namespace ULT { TEST_F(clSetPerformanceConfigurationINTELTests, GivenAnyArgumentsWhenSettingPerformanceConfigurationThenInvalidOperationErrorIsReturned) { cl_int ret = CL_OUT_OF_RESOURCES; cl_uint offsets[2]; cl_uint values[2]; ret = clSetPerformanceConfigurationINTEL(device.get(), 2, offsets, values); EXPECT_EQ(CL_INVALID_OPERATION, ret); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_set_program_release_callback.inl000066400000000000000000000022651422164147700314250ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/api/cl_api_tests.h" using namespace NEO; namespace ULT { TEST(clSetProgramReleaseCallbackTest, givenNullptrProgramWhenSettingProgramReleaseCallbackThenInvalidProgramErrorIsReturned) { auto retVal = clSetProgramReleaseCallback(nullptr, nullptr, nullptr); EXPECT_EQ(CL_INVALID_PROGRAM, retVal); } using clSetProgramReleaseCallbackTests = api_tests; TEST_F(clSetProgramReleaseCallbackTests, givenPfnNotifyNullptrWhenSettingProgramReleaseCallbackThenInvalidValueErrorIsReturned) { auto retVal = clSetProgramReleaseCallback(pProgram, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } void CL_CALLBACK callback(cl_program, void *){}; TEST_F(clSetProgramReleaseCallbackTests, WhenSettingProgramReleaseCallbackThenInvalidOperationErrorIsReturned) { auto retVal = clSetProgramReleaseCallback(pProgram, callback, nullptr); EXPECT_EQ(CL_INVALID_OPERATION, retVal); auto userData = reinterpret_cast(0x4321); retVal = clSetProgramReleaseCallback(pProgram, callback, userData); EXPECT_EQ(CL_INVALID_OPERATION, retVal); } } // namespace ULT cl_set_program_specialization_constant_tests.inl000066400000000000000000000021461422164147700342610ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/api/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/api/cl_api_tests.h" using namespace NEO; namespace ULT { TEST(clSetProgramSpecializationConstantTest, givenNullptrProgramWhenSetProgramSpecializationConstantThenErrorIsReturned) { auto retVal = clSetProgramSpecializationConstant(nullptr, 1, 1, nullptr); EXPECT_EQ(CL_INVALID_PROGRAM, retVal); } using clSetProgramSpecializationConstantTests = api_tests; TEST_F(clSetProgramSpecializationConstantTests, givenNonSpirVProgramWhenSetProgramSpecializationConstantThenErrorIsReturned) { pProgram->isSpirV = false; int specValue = 1; auto retVal = clSetProgramSpecializationConstant(pProgram, 1, sizeof(int), &specValue); EXPECT_EQ(CL_INVALID_PROGRAM, retVal); } TEST_F(clSetProgramSpecializationConstantTests, givenProperProgramAndNullptrSpecValueWhenSetProgramSpecializationConstantThenErrorIsReturned) { pProgram->isSpirV = true; auto retVal = clSetProgramSpecializationConstant(pProgram, 1, 1, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_svm_alloc_tests.inl000066400000000000000000000254441422164147700267740ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/context/context.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clSVMAllocTests; namespace ULT { class clSVMAllocTemplateTests : public ApiFixture<>, public testing::TestWithParam { public: void SetUp() override { ApiFixture::SetUp(); REQUIRE_SVM_OR_SKIP(pDevice); } void TearDown() override { ApiFixture::TearDown(); } }; struct clSVMAllocValidFlagsTests : public clSVMAllocTemplateTests { cl_uchar pHostPtr[64]; }; TEST(clSVMAllocTest, givenPlatformWithoutDevicesWhenClSVMAllocIsCalledThenDeviceIsTakenFromContext) { auto executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->initializeMemoryManager(); executionEnvironment->prepareRootDeviceEnvironments(1); auto clDevice = std::make_unique(*Device::create(executionEnvironment, 0u), platform()); const ClDeviceInfo &devInfo = clDevice->getDeviceInfo(); if (devInfo.svmCapabilities == 0) { GTEST_SKIP(); } cl_device_id deviceId = clDevice.get(); cl_int retVal; auto context = ReleaseableObjectPtr(Context::create(nullptr, ClDeviceVector(&deviceId, 1), nullptr, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, platform()->getNumDevices()); auto SVMPtr = clSVMAlloc(context.get(), 0u, 4096, 128); EXPECT_NE(nullptr, SVMPtr); clSVMFree(context.get(), SVMPtr); } TEST_P(clSVMAllocValidFlagsTests, GivenSvmSupportWhenAllocatingSvmThenSvmIsAllocated) { cl_mem_flags flags = GetParam(); const ClDeviceInfo &devInfo = pDevice->getDeviceInfo(); //check for svm support if (devInfo.svmCapabilities != 0) { //fg svm flag if (flags & CL_MEM_SVM_FINE_GRAIN_BUFFER) { //fg svm flag, fg svm support - expected success if (devInfo.svmCapabilities & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) { auto SVMPtr = clSVMAlloc(pContext, flags, 4096 /* Size*/, 128 /* alignment */); EXPECT_NE(nullptr, SVMPtr); clSVMFree(pContext, SVMPtr); } //fg svm flag no fg svm support else { auto SVMPtr = clSVMAlloc(pContext, flags, 4096 /* Size*/, 128 /* alignment */); EXPECT_EQ(nullptr, SVMPtr); } } //no fg svm flag, svm support - expected success else { auto SVMPtr = clSVMAlloc(pContext, flags, 4096 /* Size*/, 128 /* alignment */); EXPECT_NE(nullptr, SVMPtr); clSVMFree(pContext, SVMPtr); } } else { //no svm support -expected fail auto SVMPtr = clSVMAlloc(pContext, flags, 4096 /* Size*/, 128 /* alignment */); EXPECT_EQ(nullptr, SVMPtr); } }; static cl_mem_flags SVMAllocValidFlags[] = { 0, CL_MEM_READ_WRITE, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY, CL_MEM_SVM_FINE_GRAIN_BUFFER, CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, CL_MEM_WRITE_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER, CL_MEM_WRITE_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER, CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS}; INSTANTIATE_TEST_CASE_P( SVMAllocCheckFlags, clSVMAllocValidFlagsTests, testing::ValuesIn(SVMAllocValidFlags)); using clSVMAllocFtrFlagsTests = clSVMAllocTemplateTests; INSTANTIATE_TEST_CASE_P( SVMAllocCheckFlagsFtrFlags, clSVMAllocFtrFlagsTests, testing::ValuesIn(SVMAllocValidFlags)); TEST_P(clSVMAllocFtrFlagsTests, GivenCorrectFlagsWhenAllocatingSvmThenSvmIsAllocated) { HardwareInfo *pHwInfo = pDevice->getExecutionEnvironment()->rootDeviceEnvironments[testedRootDeviceIndex]->getMutableHardwareInfo(); cl_mem_flags flags = GetParam(); void *SVMPtr = nullptr; //1: no svm - no flags supported pHwInfo->capabilityTable.ftrSvm = false; pHwInfo->capabilityTable.ftrSupportsCoherency = false; SVMPtr = clSVMAlloc(pContext, flags, 4096, 128); EXPECT_EQ(nullptr, SVMPtr); //2: coarse svm - normal flags supported pHwInfo->capabilityTable.ftrSvm = true; SVMPtr = clSVMAlloc(pContext, flags, 4096, 128); if (flags & CL_MEM_SVM_FINE_GRAIN_BUFFER) { //fg svm flags not supported EXPECT_EQ(nullptr, SVMPtr); } else { //no fg svm flags supported EXPECT_NE(nullptr, SVMPtr); clSVMFree(pContext, SVMPtr); } //3: fg svm - all flags supported pHwInfo->capabilityTable.ftrSupportsCoherency = true; SVMPtr = clSVMAlloc(pContext, flags, 4096, 128); EXPECT_NE(nullptr, SVMPtr); clSVMFree(pContext, SVMPtr); }; struct clSVMAllocInvalidFlagsTests : public clSVMAllocTemplateTests { }; TEST_P(clSVMAllocInvalidFlagsTests, GivenInvalidFlagsWhenAllocatingSvmThenSvmIsNotAllocated) { cl_mem_flags flags = GetParam(); auto SVMPtr = clSVMAlloc(pContext, flags, 4096 /* Size*/, 128 /* alignment */); EXPECT_EQ(nullptr, SVMPtr); }; cl_mem_flags SVMAllocInvalidFlags[] = { CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY, CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY, CL_MEM_SVM_ATOMICS, 0xffcc}; INSTANTIATE_TEST_CASE_P( SVMAllocCheckFlags, clSVMAllocInvalidFlagsTests, testing::ValuesIn(SVMAllocInvalidFlags)); TEST_F(clSVMAllocTests, GivenNullContextWhenAllocatingSvmThenSvmIsNotAllocated) { cl_mem_flags flags = CL_MEM_READ_WRITE; auto SVMPtr = clSVMAlloc(nullptr /* cl_context */, flags, 4096 /* Size*/, 128 /* alignment */); EXPECT_EQ(nullptr, SVMPtr); } TEST_F(clSVMAllocTests, GivenZeroSizeWhenAllocatingSvmThenSvmIsNotAllocated) { cl_mem_flags flags = CL_MEM_READ_WRITE; auto SVMPtr = clSVMAlloc(pContext /* cl_context */, flags, 0 /* Size*/, 128 /* alignment */); EXPECT_EQ(nullptr, SVMPtr); } TEST_F(clSVMAllocTests, GivenZeroAlignmentWhenAllocatingSvmThenSvmIsAllocated) { const ClDeviceInfo &devInfo = pDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { cl_mem_flags flags = CL_MEM_READ_WRITE; auto SVMPtr = clSVMAlloc(pContext /* cl_context */, flags, 4096 /* Size*/, 0 /* alignment */); EXPECT_NE(nullptr, SVMPtr); clSVMFree(pContext, SVMPtr); } } TEST_F(clSVMAllocTests, givenUnrestrictedFlagWhenCreatingSvmAllocThenAllowSizeBiggerThanMaxMemAllocSize) { REQUIRE_SVM_OR_SKIP(pDevice); const size_t maxMemAllocSize = 128; static_cast(pDevice->getDevice()).deviceInfo.maxMemAllocSize = maxMemAllocSize; size_t allowedSize = maxMemAllocSize; size_t notAllowedSize = maxMemAllocSize + 1; cl_mem_flags flags = 0; void *svmPtr = nullptr; { // no flag + not allowed size svmPtr = clSVMAlloc(pContext, flags, notAllowedSize, 0); EXPECT_EQ(nullptr, svmPtr); } flags = CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL; { // unrestricted size flag + not allowed size svmPtr = clSVMAlloc(pContext, flags, notAllowedSize, 0); EXPECT_NE(nullptr, svmPtr); clSVMFree(pContext, svmPtr); } { // debug flag + not allowed size DebugManagerStateRestore restorer; DebugManager.flags.AllowUnrestrictedSize.set(1); svmPtr = clSVMAlloc(pContext, 0, notAllowedSize, 0); EXPECT_NE(nullptr, svmPtr); clSVMFree(pContext, svmPtr); } { // unrestricted size flag + allowed size svmPtr = clSVMAlloc(pContext, flags, allowedSize, 0); EXPECT_NE(nullptr, svmPtr); clSVMFree(pContext, svmPtr); } } TEST_F(clSVMAllocTests, GivenUnalignedSizeAndDefaultAlignmentWhenAllocatingSvmThenSvmIsAllocated) { const ClDeviceInfo &devInfo = pDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { cl_mem_flags flags = CL_MEM_READ_WRITE; auto SVMPtr = clSVMAlloc(pContext /* cl_context */, flags, 4095 /* Size*/, 0 /* alignment */); EXPECT_NE(nullptr, SVMPtr); clSVMFree(pContext, SVMPtr); } } TEST_F(clSVMAllocTests, GivenAlignmentNotPowerOfTwoWhenAllocatingSvmThenSvmIsNotAllocated) { cl_mem_flags flags = CL_MEM_READ_WRITE; auto SVMPtr = clSVMAlloc(pContext /* cl_context */, flags, 4096 /* Size*/, 129 /* alignment */); EXPECT_EQ(nullptr, SVMPtr); } TEST_F(clSVMAllocTests, GivenAlignmentTooLargeWhenAllocatingSvmThenSvmIsNotAllocated) { auto SVMPtr = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 4096 /* Size */, 4096 /* alignment */); EXPECT_EQ(nullptr, SVMPtr); }; TEST_F(clSVMAllocTests, GivenForcedFineGrainedSvmWhenCreatingSvmAllocThenAllocationIsCreated) { REQUIRE_SVM_OR_SKIP(pDevice); DebugManagerStateRestore restore{}; HardwareInfo *hwInfo = pDevice->getExecutionEnvironment()->rootDeviceEnvironments[testedRootDeviceIndex]->getMutableHardwareInfo(); hwInfo->capabilityTable.ftrSvm = true; hwInfo->capabilityTable.ftrSupportsCoherency = false; auto allocation = clSVMAlloc(pContext, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, 4096 /* Size */, 0 /* alignment */); EXPECT_EQ(nullptr, allocation); clSVMFree(pContext, allocation); DebugManager.flags.ForceFineGrainedSVMSupport.set(1); allocation = clSVMAlloc(pContext, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, 4096 /* Size */, 0 /* alignment */); EXPECT_NE(nullptr, allocation); clSVMFree(pContext, allocation); } TEST(clSvmAllocTest, givenSubDeviceWhenCreatingSvmAllocThenProperDeviceBitfieldIsPassed) { REQUIRE_SVM_OR_SKIP(defaultHwInfo.get()); UltClDeviceFactory deviceFactory{1, 2}; auto device = deviceFactory.subDevices[1]; auto executionEnvironment = device->getExecutionEnvironment(); auto memoryManager = new MockMemoryManager(*executionEnvironment); std::unique_ptr memoryManagerBackup(memoryManager); std::swap(memoryManagerBackup, executionEnvironment->memoryManager); MockContext context(device); auto expectedDeviceBitfield = context.getDeviceBitfieldForAllocation(device->getRootDeviceIndex()); EXPECT_NE(expectedDeviceBitfield, memoryManager->recentlyPassedDeviceBitfield); auto svmPtr = clSVMAlloc(&context, CL_MEM_READ_WRITE, MemoryConstants::pageSize, MemoryConstants::cacheLineSize); EXPECT_NE(nullptr, svmPtr); EXPECT_EQ(expectedDeviceBitfield, memoryManager->recentlyPassedDeviceBitfield); clSVMFree(&context, svmPtr); std::swap(memoryManagerBackup, executionEnvironment->memoryManager); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_svm_free_tests.inl000066400000000000000000000017061422164147700266160ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "cl_api_tests.h" using namespace NEO; typedef api_tests clSVMFreeTests; namespace ULT { TEST_F(clSVMFreeTests, GivenNullPtrWhenFreeingSvmThenNoAction) { clSVMFree( nullptr, // cl_context context nullptr // void *svm_pointer ); } TEST_F(clSVMFreeTests, GivenContextWithDeviceNotSupportingSvmWhenFreeingSvmThenNoAction) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.ftrSvm = false; auto clDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); cl_device_id deviceId = clDevice.get(); auto context = clUniquePtr(Context::create(nullptr, ClDeviceVector(&deviceId, 1), nullptr, nullptr, retVal)); EXPECT_EQ(retVal, CL_SUCCESS); clSVMFree( context.get(), reinterpret_cast(0x1234)); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_unified_shared_memory_tests.inl000066400000000000000000001632721422164147700313600ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "opencl/source/api/api.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" using namespace NEO; TEST(clUnifiedSharedMemoryTests, whenClHostMemAllocINTELisCalledWithoutContextThenInvalidContextIsReturned) { cl_int retVal = CL_SUCCESS; auto ptr = clHostMemAllocINTEL(0, nullptr, 0, 0, &retVal); EXPECT_EQ(nullptr, ptr); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } TEST(clUnifiedSharedMemoryTests, whenClHostMemAllocIntelIsCalledThenItAllocatesHostUnifiedMemoryAllocation) { MockContext mockContext; auto device = mockContext.getDevice(0u); REQUIRE_SVM_OR_SKIP(device); cl_int retVal = CL_SUCCESS; auto unifiedMemoryHostAllocation = clHostMemAllocINTEL(&mockContext, nullptr, 4, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, unifiedMemoryHostAllocation); auto allocationsManager = mockContext.getSVMAllocsManager(); EXPECT_EQ(1u, allocationsManager->getNumAllocs()); auto graphicsAllocation = allocationsManager->getSVMAlloc(unifiedMemoryHostAllocation); EXPECT_EQ(graphicsAllocation->size, 4u); EXPECT_EQ(graphicsAllocation->memoryType, InternalMemoryType::HOST_UNIFIED_MEMORY); EXPECT_EQ(graphicsAllocation->gpuAllocations.getGraphicsAllocation(mockContext.getDevice(0)->getRootDeviceIndex())->getGpuAddress(), castToUint64(unifiedMemoryHostAllocation)); retVal = clMemFreeINTEL(&mockContext, unifiedMemoryHostAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, GivenForceExtendedUSMBufferSizeDebugFlagWhenUSMAllocationIsCreatedThenSizeIsProperlyExtended) { DebugManagerStateRestore restorer; MockContext mockContext; auto device = mockContext.getDevice(0u); REQUIRE_SVM_OR_SKIP(device); constexpr auto bufferSize = 16; auto pageSizeNumber = 2; DebugManager.flags.ForceExtendedUSMBufferSize.set(pageSizeNumber); auto extendedBufferSize = bufferSize + MemoryConstants::pageSize * pageSizeNumber; cl_int retVal = CL_SUCCESS; auto usmAllocation = clHostMemAllocINTEL(&mockContext, nullptr, bufferSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, usmAllocation); auto allocationsManager = mockContext.getSVMAllocsManager(); EXPECT_EQ(1u, allocationsManager->getNumAllocs()); auto graphicsAllocation = allocationsManager->getSVMAlloc(usmAllocation); EXPECT_EQ(graphicsAllocation->size, extendedBufferSize); retVal = clMemFreeINTEL(&mockContext, usmAllocation); EXPECT_EQ(CL_SUCCESS, retVal); pageSizeNumber = 4; DebugManager.flags.ForceExtendedUSMBufferSize.set(pageSizeNumber); extendedBufferSize = bufferSize + MemoryConstants::pageSize * pageSizeNumber; usmAllocation = clDeviceMemAllocINTEL(&mockContext, mockContext.getDevice(0u), nullptr, bufferSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, usmAllocation); allocationsManager = mockContext.getSVMAllocsManager(); EXPECT_EQ(1u, allocationsManager->getNumAllocs()); graphicsAllocation = allocationsManager->getSVMAlloc(usmAllocation); EXPECT_EQ(graphicsAllocation->size, extendedBufferSize); retVal = clMemFreeINTEL(&mockContext, usmAllocation); EXPECT_EQ(CL_SUCCESS, retVal); pageSizeNumber = 8; DebugManager.flags.ForceExtendedUSMBufferSize.set(pageSizeNumber); extendedBufferSize = bufferSize + MemoryConstants::pageSize * pageSizeNumber; usmAllocation = clSharedMemAllocINTEL(&mockContext, nullptr, nullptr, bufferSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, usmAllocation); allocationsManager = mockContext.getSVMAllocsManager(); EXPECT_EQ(1u, allocationsManager->getNumAllocs()); graphicsAllocation = allocationsManager->getSVMAlloc(usmAllocation); EXPECT_EQ(graphicsAllocation->size, extendedBufferSize); retVal = clMemFreeINTEL(&mockContext, usmAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, givenMappedAllocationWhenClMemFreeIntelIscalledThenMappingIsRemoved) { MockContext mockContext; cl_int retVal = CL_SUCCESS; auto unifiedMemorySharedAllocation = clSharedMemAllocINTEL(&mockContext, mockContext.getDevice(0u), nullptr, 4, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, unifiedMemorySharedAllocation); auto allocationsManager = mockContext.getSVMAllocsManager(); allocationsManager->insertSvmMapOperation(unifiedMemorySharedAllocation, 4u, unifiedMemorySharedAllocation, 0u, false); retVal = clMemFreeINTEL(&mockContext, unifiedMemorySharedAllocation); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, allocationsManager->getSvmMapOperation(unifiedMemorySharedAllocation)); } TEST(clUnifiedSharedMemoryTests, whenClDeviceMemAllocINTELisCalledWithWrongContextThenInvalidContextErrorIsReturned) { cl_int retVal = CL_SUCCESS; auto ptr = clDeviceMemAllocINTEL(0, 0, nullptr, 0, 0, &retVal); EXPECT_EQ(nullptr, ptr); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } TEST(clUnifiedSharedMemoryTests, whenClDeviceMemAllocIntelIsCalledThenItAllocatesDeviceUnifiedMemoryAllocation) { MockContext mockContext; cl_int retVal = CL_SUCCESS; auto unifiedMemoryDeviceAllocation = clDeviceMemAllocINTEL(&mockContext, mockContext.getDevice(0u), nullptr, 4, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, unifiedMemoryDeviceAllocation); auto allocationsManager = mockContext.getSVMAllocsManager(); EXPECT_EQ(1u, allocationsManager->getNumAllocs()); auto graphicsAllocation = allocationsManager->getSVMAlloc(unifiedMemoryDeviceAllocation); EXPECT_EQ(graphicsAllocation->size, 4u); EXPECT_EQ(graphicsAllocation->memoryType, InternalMemoryType::DEVICE_UNIFIED_MEMORY); EXPECT_EQ(graphicsAllocation->gpuAllocations.getGraphicsAllocation(mockContext.getDevice(0)->getRootDeviceIndex())->getGpuAddress(), castToUint64(unifiedMemoryDeviceAllocation)); retVal = clMemFreeINTEL(&mockContext, unifiedMemoryDeviceAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, whenUnifiedSharedMemoryAllocationCallsAreCalledWithSizeGreaterThenMaxMemAllocSizeThenErrorIsReturned) { MockContext mockContext; cl_int retVal = CL_SUCCESS; auto maxMemAllocSize = mockContext.getDevice(0u)->getDevice().getDeviceInfo().maxMemAllocSize; size_t requestedSize = static_cast(maxMemAllocSize) + 1u; auto unifiedMemoryDeviceAllocation = clDeviceMemAllocINTEL(&mockContext, mockContext.getDevice(0u), nullptr, requestedSize, 0, &retVal); EXPECT_EQ(CL_INVALID_BUFFER_SIZE, retVal); EXPECT_EQ(nullptr, unifiedMemoryDeviceAllocation); unifiedMemoryDeviceAllocation = clSharedMemAllocINTEL(&mockContext, mockContext.getDevice(0u), nullptr, requestedSize, 0, &retVal); EXPECT_EQ(CL_INVALID_BUFFER_SIZE, retVal); EXPECT_EQ(nullptr, unifiedMemoryDeviceAllocation); unifiedMemoryDeviceAllocation = clHostMemAllocINTEL(&mockContext, nullptr, requestedSize, 0, &retVal); EXPECT_EQ(CL_INVALID_BUFFER_SIZE, retVal); EXPECT_EQ(nullptr, unifiedMemoryDeviceAllocation); } TEST(clUnifiedSharedMemoryTests, givenSharedMemAllocCallWhenAllocatingGraphicsMemoryFailsThenOutOfResourcesErrorIsReturned) { UltClDeviceFactory deviceFactory{1, 0}; auto executionEnvironment = deviceFactory.rootDevices[0]->getExecutionEnvironment(); std::unique_ptr memoryManager = std::make_unique(0, *executionEnvironment); std::swap(memoryManager, executionEnvironment->memoryManager); MockContext context(deviceFactory.rootDevices[0]); cl_int retVal = CL_INVALID_CONTEXT; auto allocation = clSharedMemAllocINTEL(&context, nullptr, nullptr, MemoryConstants::pageSize, 0, &retVal); EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal); EXPECT_EQ(nullptr, allocation); std::swap(memoryManager, executionEnvironment->memoryManager); } TEST(clUnifiedSharedMemoryTests, whenClSharedMemAllocINTELisCalledWithWrongContextThenInvalidContextErrorIsReturned) { cl_int retVal = CL_SUCCESS; auto ptr = clSharedMemAllocINTEL(0, 0, nullptr, 0, 0, &retVal); EXPECT_EQ(nullptr, ptr); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } TEST(clUnifiedSharedMemoryTests, whenClSharedMemAllocINTELisCalledWithWrongDeviceThenInvalidDeviceErrorIsReturned) { cl_int retVal = CL_SUCCESS; MockContext context0; MockContext context1; auto ptr = clSharedMemAllocINTEL(&context0, context1.getDevice(0), nullptr, 0, 0, &retVal); EXPECT_EQ(nullptr, ptr); EXPECT_EQ(CL_INVALID_DEVICE, retVal); } TEST(clUnifiedSharedMemoryTests, whenClSharedMemAllocIntelIsCalledThenItAllocatesSharedUnifiedMemoryAllocation) { MockContext mockContext; cl_int retVal = CL_SUCCESS; auto unifiedMemorySharedAllocation = clSharedMemAllocINTEL(&mockContext, mockContext.getDevice(0u), nullptr, 4, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, unifiedMemorySharedAllocation); auto allocationsManager = mockContext.getSVMAllocsManager(); EXPECT_EQ(1u, allocationsManager->getNumAllocs()); auto graphicsAllocation = allocationsManager->getSVMAlloc(unifiedMemorySharedAllocation); EXPECT_EQ(graphicsAllocation->size, 4u); EXPECT_EQ(graphicsAllocation->memoryType, InternalMemoryType::SHARED_UNIFIED_MEMORY); EXPECT_EQ(graphicsAllocation->gpuAllocations.getGraphicsAllocation(mockContext.getDevice(0)->getRootDeviceIndex())->getGpuAddress(), castToUint64(unifiedMemorySharedAllocation)); retVal = clMemFreeINTEL(&mockContext, unifiedMemorySharedAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, whenClMemFreeINTELisCalledWithIncorrectContextThenReturnError) { auto retVal = clMemFreeINTEL(0, nullptr); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } TEST(clUnifiedSharedMemoryTests, whenClMemFreeINTELisCalledWithNullPointerThenNoActionOccurs) { MockContext mockContext; auto retVal = clMemFreeINTEL(&mockContext, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, whenClMemBlockingFreeINTELisCalledWithNullPointerThenNoActionOccurs) { MockContext mockContext; auto retVal = clMemBlockingFreeINTEL(&mockContext, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, whenClMemFreeINTELisCalledWithValidUmPointerThenMemoryIsFreed) { MockContext mockContext; auto device = mockContext.getDevice(0u); REQUIRE_SVM_OR_SKIP(device); cl_int retVal = CL_SUCCESS; auto unifiedMemoryHostAllocation = clHostMemAllocINTEL(&mockContext, nullptr, 4, 0, &retVal); auto allocationsManager = mockContext.getSVMAllocsManager(); EXPECT_EQ(1u, allocationsManager->getNumAllocs()); retVal = clMemFreeINTEL(&mockContext, unifiedMemoryHostAllocation); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, allocationsManager->getNumAllocs()); } TEST(clUnifiedSharedMemoryTests, whenClMemFreeINTELisCalledWithInvalidUmPointerThenMemoryIsNotFreed) { MockContext mockContext; auto device = mockContext.getDevice(0u); REQUIRE_SVM_OR_SKIP(device); cl_int retVal = CL_SUCCESS; auto unifiedMemoryHostAllocation = clHostMemAllocINTEL(&mockContext, nullptr, 4, 0, &retVal); auto allocationsManager = mockContext.getSVMAllocsManager(); EXPECT_EQ(1u, allocationsManager->getNumAllocs()); retVal = clMemFreeINTEL(&mockContext, ptrOffset(unifiedMemoryHostAllocation, 4)); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(1u, allocationsManager->getNumAllocs()); retVal = clMemFreeINTEL(&mockContext, unifiedMemoryHostAllocation); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, allocationsManager->getNumAllocs()); } TEST(clUnifiedSharedMemoryTests, whenClGetMemAllocInfoINTELisCalledWithoutContextThenInvalidContextIsReturned) { auto retVal = clGetMemAllocInfoINTEL(0, nullptr, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } TEST(clUnifiedSharedMemoryTests, whenClGetMemAllocInfoINTELisCalledWithoutAllocationThenInvalidValueIsReturned) { MockContext mockContext; auto retVal = clGetMemAllocInfoINTEL(&mockContext, nullptr, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST(clUnifiedSharedMemoryTests, whenClGetMemAllocInfoINTELisCalledWithoutAllocationAndWithPropertiesThenProperValueIsReturned) { MockContext mockContext; cl_int retVal = CL_INVALID_VALUE; size_t paramValueSize = sizeof(void *); size_t paramValueSizeRet = 0; { void *paramValue = reinterpret_cast(0xfeedbac); retVal = clGetMemAllocInfoINTEL(&mockContext, mockContext.getDevice(0), CL_MEM_ALLOC_BASE_PTR_INTEL, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(void *), paramValueSizeRet); EXPECT_EQ(static_cast(nullptr), paramValue); } { size_t paramValue = 1; paramValueSize = sizeof(size_t); retVal = clGetMemAllocInfoINTEL(&mockContext, mockContext.getDevice(0), CL_MEM_ALLOC_SIZE_INTEL, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(size_t), paramValueSizeRet); EXPECT_EQ(static_cast(0u), paramValue); } { cl_device_id paramValue = mockContext.getDevice(0); paramValueSize = sizeof(cl_device_id); retVal = clGetMemAllocInfoINTEL(&mockContext, mockContext.getDevice(0), CL_MEM_ALLOC_DEVICE_INTEL, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_device_id), paramValueSizeRet); EXPECT_EQ(static_cast(nullptr), paramValue); } { cl_mem_alloc_flags_intel paramValue = 1; paramValueSize = sizeof(cl_mem_properties_intel); retVal = clGetMemAllocInfoINTEL(&mockContext, mockContext.getDevice(0), CL_MEM_ALLOC_FLAGS_INTEL, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_mem_properties_intel), paramValueSizeRet); EXPECT_EQ(static_cast(0u), paramValue); } } TEST(clUnifiedSharedMemoryTests, whenClGetMemAllocInfoINTELisCalledWithoutSVMAllocationThenInvalidValueIsReturned) { MockContext mockContext; delete mockContext.svmAllocsManager; mockContext.svmAllocsManager = nullptr; auto retVal = clGetMemAllocInfoINTEL(&mockContext, nullptr, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST(clUnifiedSharedMemoryTests, whenClGetMemAllocInfoINTELisCalledWithAllocationTypeParamNameAndWithoutUnifiedSharedMemoryAllocationThenProperFieldsAreSet) { MockContext mockContext; cl_int retVal = CL_SUCCESS; size_t paramValueSize = sizeof(cl_int); cl_int paramValue = 0; size_t paramValueSizeRet = 0; retVal = clGetMemAllocInfoINTEL(&mockContext, nullptr, CL_MEM_ALLOC_TYPE_INTEL, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_MEM_TYPE_UNKNOWN_INTEL, paramValue); EXPECT_EQ(sizeof(cl_int), paramValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, whenClGetMemAllocInfoINTELisCalledWithValidUnifiedMemoryHostAllocationThenProperFieldsAreSet) { MockContext mockContext; auto device = mockContext.getDevice(0u); REQUIRE_SVM_OR_SKIP(device); cl_int retVal = CL_SUCCESS; size_t paramValueSize = sizeof(cl_int); cl_int paramValue = 0; size_t paramValueSizeRet = 0; auto unifiedMemoryHostAllocation = clHostMemAllocINTEL(&mockContext, nullptr, 4, 0, &retVal); auto allocationsManager = mockContext.getSVMAllocsManager(); auto graphicsAllocation = allocationsManager->getSVMAlloc(unifiedMemoryHostAllocation); retVal = clGetMemAllocInfoINTEL(&mockContext, unifiedMemoryHostAllocation, CL_MEM_ALLOC_TYPE_INTEL, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(graphicsAllocation->memoryType, InternalMemoryType::HOST_UNIFIED_MEMORY); EXPECT_EQ(CL_MEM_TYPE_HOST_INTEL, paramValue); EXPECT_EQ(sizeof(cl_int), paramValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(&mockContext, unifiedMemoryHostAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, whenHostMemAllocWithInvalidPropertiesTokenThenErrorIsReturned) { MockContext mockContext; cl_int retVal = CL_SUCCESS; cl_mem_properties_intel properties[] = {0x1234, CL_MEM_ALLOC_WRITE_COMBINED_INTEL, 0}; auto unifiedMemoryHostAllocation = clHostMemAllocINTEL(&mockContext, properties, 4, 0, &retVal); EXPECT_EQ(nullptr, unifiedMemoryHostAllocation); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST(clUnifiedSharedMemoryTests, whenHostMemAllocWithInvalidWriteCombinedTokenThenSuccessIsReturned) { MockContext mockContext; auto device = mockContext.getDevice(0u); REQUIRE_SVM_OR_SKIP(device); cl_int retVal = CL_SUCCESS; cl_mem_properties_intel properties[] = {CL_MEM_ALLOC_FLAGS_INTEL, CL_MEM_ALLOC_WRITE_COMBINED_INTEL, 0}; auto unifiedMemoryHostAllocation = clHostMemAllocINTEL(&mockContext, properties, 4, 0, &retVal); EXPECT_NE(nullptr, unifiedMemoryHostAllocation); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(&mockContext, unifiedMemoryHostAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, whenDeviceMemAllocWithInvalidPropertiesTokenThenErrorIsReturned) { MockContext mockContext; cl_int retVal = CL_SUCCESS; cl_mem_properties_intel properties[] = {0x1234, CL_MEM_ALLOC_WRITE_COMBINED_INTEL, 0}; auto unifiedMemoryDeviceAllocation = clDeviceMemAllocINTEL(&mockContext, mockContext.getDevice(0u), properties, 4, 0, &retVal); EXPECT_EQ(nullptr, unifiedMemoryDeviceAllocation); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST(clUnifiedSharedMemoryTests, whenSharedMemAllocWithInvalidPropertiesTokenThenErrorIsReturned) { MockContext mockContext; cl_int retVal = CL_SUCCESS; const uint64_t invalidToken = 0x1234; cl_mem_properties_intel properties[] = {invalidToken, CL_MEM_ALLOC_WRITE_COMBINED_INTEL, 0}; auto unifiedMemorySharedAllocation = clSharedMemAllocINTEL(&mockContext, mockContext.getDevice(0u), properties, 4, 0, &retVal); EXPECT_EQ(nullptr, unifiedMemorySharedAllocation); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST(clUnifiedSharedMemoryTests, whenSharedMemAllocWithInvalidWriteCombinedTokenThenSuccessIsReturned) { MockContext mockContext; cl_int retVal = CL_SUCCESS; cl_mem_properties_intel properties[] = {CL_MEM_ALLOC_FLAGS_INTEL, CL_MEM_ALLOC_WRITE_COMBINED_INTEL, 0}; auto unifiedMemorySharedAllocation = clSharedMemAllocINTEL(&mockContext, mockContext.getDevice(0u), properties, 4, 0, &retVal); EXPECT_NE(nullptr, unifiedMemorySharedAllocation); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(&mockContext, unifiedMemorySharedAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, givenUnifiedMemoryAllocWithoutPropertiesWhenGetMemAllocFlagsThenDefaultValueIsReturned) { uint64_t defaultValue = CL_MEM_ALLOC_DEFAULT_INTEL; MockContext mockContext; auto device = mockContext.getDevice(0u); REQUIRE_SVM_OR_SKIP(device); cl_int retVal = CL_SUCCESS; size_t paramValueSize = sizeof(cl_mem_properties_intel); cl_mem_properties_intel paramValue = 0; size_t paramValueSizeRet = 0; auto unifiedMemoryHostAllocation = clHostMemAllocINTEL(&mockContext, nullptr, 4, 0, &retVal); retVal = clGetMemAllocInfoINTEL(&mockContext, unifiedMemoryHostAllocation, CL_MEM_ALLOC_FLAGS_INTEL, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(defaultValue, paramValue); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(&mockContext, unifiedMemoryHostAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, whenClGetMemAllocTypeIsCalledWithValidUnifiedMemoryHostAllocationThenProperTypeIsReturned) { MockContext mockContext; auto device = mockContext.getDevice(0u); REQUIRE_SVM_OR_SKIP(device); cl_int retVal = CL_SUCCESS; size_t paramValueSize = sizeof(cl_mem_properties_intel); cl_mem_properties_intel paramValue = 0; size_t paramValueSizeRet = 0; cl_mem_properties_intel properties[] = {CL_MEM_ALLOC_FLAGS_INTEL, CL_MEM_ALLOC_DEFAULT_INTEL, 0}; auto unifiedMemoryHostAllocation = clHostMemAllocINTEL(&mockContext, properties, 4, 0, &retVal); retVal = clGetMemAllocInfoINTEL(&mockContext, unifiedMemoryHostAllocation, CL_MEM_ALLOC_FLAGS_INTEL, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(properties[1], paramValue); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(&mockContext, unifiedMemoryHostAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, whenClGetMemAllocTypeIsCalledWithValidUnifiedMemoryDeviceAllocationThenProperTypeIsReturned) { MockContext mockContext; cl_int retVal = CL_SUCCESS; size_t paramValueSize = sizeof(cl_mem_properties_intel); cl_mem_properties_intel paramValue = 0; size_t paramValueSizeRet = 0; cl_mem_properties_intel properties[] = {CL_MEM_ALLOC_FLAGS_INTEL, CL_MEM_ALLOC_WRITE_COMBINED_INTEL, 0}; auto unifiedMemoryDeviceAllocation = clDeviceMemAllocINTEL(&mockContext, mockContext.getDevice(0u), properties, 4, 0, &retVal); retVal = clGetMemAllocInfoINTEL(&mockContext, unifiedMemoryDeviceAllocation, CL_MEM_ALLOC_FLAGS_INTEL, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(properties[1], paramValue); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(&mockContext, unifiedMemoryDeviceAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, whenClGetMemAllocTypeIsCalledWithValidUnifiedMemorySharedAllocationThenProperTypeIsReturned) { MockContext mockContext; cl_int retVal = CL_SUCCESS; size_t paramValueSize = sizeof(cl_mem_properties_intel); cl_mem_properties_intel paramValue = 0; size_t paramValueSizeRet = 0; cl_mem_properties_intel properties[] = {CL_MEM_ALLOC_FLAGS_INTEL, CL_MEM_ALLOC_DEFAULT_INTEL, 0}; auto unifiedMemorySharedAllocation = clSharedMemAllocINTEL(&mockContext, mockContext.getDevice(0u), properties, 4, 0, &retVal); retVal = clGetMemAllocInfoINTEL(&mockContext, unifiedMemorySharedAllocation, CL_MEM_ALLOC_FLAGS_INTEL, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(properties[1], paramValue); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(&mockContext, unifiedMemorySharedAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, whenClGetMemAllocInfoINTELisCalledWithValidUnifiedMemoryDeviceAllocationThenProperFieldsAreSet) { MockContext mockContext; cl_int retVal = CL_SUCCESS; size_t paramValueSize = sizeof(cl_int); cl_int paramValue = 0; size_t paramValueSizeRet = 0; auto unifiedMemoryDeviceAllocation = clDeviceMemAllocINTEL(&mockContext, mockContext.getDevice(0u), nullptr, 4, 0, &retVal); auto allocationsManager = mockContext.getSVMAllocsManager(); auto graphicsAllocation = allocationsManager->getSVMAlloc(unifiedMemoryDeviceAllocation); retVal = clGetMemAllocInfoINTEL(&mockContext, unifiedMemoryDeviceAllocation, CL_MEM_ALLOC_TYPE_INTEL, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(graphicsAllocation->memoryType, InternalMemoryType::DEVICE_UNIFIED_MEMORY); EXPECT_EQ(CL_MEM_TYPE_DEVICE_INTEL, paramValue); EXPECT_EQ(sizeof(cl_int), paramValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(&mockContext, unifiedMemoryDeviceAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, whenClGetMemAllocInfoINTELisCalledWithValidUnifiedMemorySharedAllocationThenProperFieldsAreSet) { MockContext mockContext; cl_int retVal = CL_SUCCESS; size_t paramValueSize = sizeof(cl_int); cl_int paramValue = 0; size_t paramValueSizeRet = 0; auto unifiedMemorySharedAllocation = clSharedMemAllocINTEL(&mockContext, mockContext.getDevice(0u), nullptr, 4, 0, &retVal); auto allocationsManager = mockContext.getSVMAllocsManager(); auto graphicsAllocation = allocationsManager->getSVMAlloc(unifiedMemorySharedAllocation); retVal = clGetMemAllocInfoINTEL(&mockContext, unifiedMemorySharedAllocation, CL_MEM_ALLOC_TYPE_INTEL, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(graphicsAllocation->memoryType, InternalMemoryType::SHARED_UNIFIED_MEMORY); EXPECT_EQ(CL_MEM_TYPE_SHARED_INTEL, paramValue); EXPECT_EQ(sizeof(cl_int), paramValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(&mockContext, unifiedMemorySharedAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, givenDeviceAllocationWhenItIsQueriedForDeviceThenProperDeviceIsReturned) { MockContext mockContext; cl_int retVal = CL_SUCCESS; size_t paramValueSizeRet = 0; auto device = mockContext.getDevice(0u); cl_device_id clDevice = device; auto unifiedMemoryDeviceAllocation = clDeviceMemAllocINTEL(&mockContext, device, nullptr, 4, 0, &retVal); cl_device_id returnedDevice; retVal = clGetMemAllocInfoINTEL(&mockContext, unifiedMemoryDeviceAllocation, CL_MEM_ALLOC_DEVICE_INTEL, sizeof(returnedDevice), &returnedDevice, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValueSizeRet, sizeof(returnedDevice)); EXPECT_EQ(returnedDevice, clDevice); retVal = clMemFreeINTEL(&mockContext, unifiedMemoryDeviceAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, givenSharedAllocationWhenItIsQueriedForDeviceThenProperDeviceIsReturned) { MockContext mockContext; cl_int retVal = CL_SUCCESS; size_t paramValueSizeRet = 0; auto device = mockContext.getDevice(0u); cl_device_id clDevice = device; auto unifiedMemorySharedAllocation = clSharedMemAllocINTEL(&mockContext, device, nullptr, 4, 0, &retVal); cl_device_id returnedDevice; retVal = clGetMemAllocInfoINTEL(&mockContext, unifiedMemorySharedAllocation, CL_MEM_ALLOC_DEVICE_INTEL, sizeof(returnedDevice), &returnedDevice, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValueSizeRet, sizeof(returnedDevice)); EXPECT_EQ(returnedDevice, clDevice); retVal = clMemFreeINTEL(&mockContext, unifiedMemorySharedAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, givenSharedAllocationWithoutDeviceWhenItIsQueriedForDeviceThenNullIsReturned) { MockContext mockContext; cl_int retVal = CL_SUCCESS; size_t paramValueSizeRet = 0; auto unifiedMemorySharedAllocation = clSharedMemAllocINTEL(&mockContext, nullptr, nullptr, 4, 0, &retVal); cl_device_id returnedDevice; retVal = clGetMemAllocInfoINTEL(&mockContext, unifiedMemorySharedAllocation, CL_MEM_ALLOC_DEVICE_INTEL, sizeof(returnedDevice), &returnedDevice, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValueSizeRet, sizeof(returnedDevice)); EXPECT_EQ(returnedDevice, nullptr); retVal = clMemFreeINTEL(&mockContext, unifiedMemorySharedAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, givenHostAllocationWhenItIsQueriedForDeviceThenProperDeviceIsReturned) { MockContext mockContext; auto device = mockContext.getDevice(0u); REQUIRE_SVM_OR_SKIP(device); cl_int retVal = CL_SUCCESS; size_t paramValueSizeRet = 0; auto unifiedMemoryHostAllocation = clHostMemAllocINTEL(&mockContext, nullptr, 4, 0, &retVal); cl_device_id returnedDevice; retVal = clGetMemAllocInfoINTEL(&mockContext, unifiedMemoryHostAllocation, CL_MEM_ALLOC_DEVICE_INTEL, sizeof(returnedDevice), &returnedDevice, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValueSizeRet, sizeof(returnedDevice)); EXPECT_EQ(returnedDevice, nullptr); retVal = clMemFreeINTEL(&mockContext, unifiedMemoryHostAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, whenClGetMemAllocInfoINTELisCalledWithAllocationBasePtrParamNameThenProperFieldsAreSet) { MockContext mockContext; cl_int retVal = CL_SUCCESS; size_t paramValueSize = sizeof(uint64_t); uint64_t paramValue = 0; size_t paramValueSizeRet = 0; auto unifiedMemorySharedAllocation = clSharedMemAllocINTEL(&mockContext, mockContext.getDevice(0u), nullptr, 4, 0, &retVal); auto allocationsManager = mockContext.getSVMAllocsManager(); auto graphicsAllocation = allocationsManager->getSVMAlloc(unifiedMemorySharedAllocation); retVal = clGetMemAllocInfoINTEL(&mockContext, unifiedMemorySharedAllocation, CL_MEM_ALLOC_BASE_PTR_INTEL, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(graphicsAllocation->memoryType, InternalMemoryType::SHARED_UNIFIED_MEMORY); EXPECT_EQ(graphicsAllocation->gpuAllocations.getGraphicsAllocation(mockContext.getDevice(0)->getRootDeviceIndex())->getGpuAddress(), paramValue); EXPECT_EQ(sizeof(uint64_t), paramValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(&mockContext, unifiedMemorySharedAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, whenClGetMemAllocInfoINTELisCalledWithAllocationSizeParamNameThenProperFieldsAreSet) { MockContext mockContext; auto device = mockContext.getDevice(0u); REQUIRE_SVM_OR_SKIP(device); cl_int retVal = CL_SUCCESS; size_t paramValueSize = sizeof(size_t); size_t paramValue = 0; size_t paramValueSizeRet = 0; auto unifiedMemoryHostAllocation = clHostMemAllocINTEL(&mockContext, nullptr, 4, 0, &retVal); auto allocationsManager = mockContext.getSVMAllocsManager(); auto graphicsAllocation = allocationsManager->getSVMAlloc(unifiedMemoryHostAllocation); retVal = clGetMemAllocInfoINTEL(&mockContext, unifiedMemoryHostAllocation, CL_MEM_ALLOC_SIZE_INTEL, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(graphicsAllocation->memoryType, InternalMemoryType::HOST_UNIFIED_MEMORY); EXPECT_EQ(graphicsAllocation->size, paramValue); EXPECT_EQ(sizeof(size_t), paramValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(&mockContext, unifiedMemoryHostAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, whenClGetMemAllocInfoINTELisCalledWithoutParamNameThenInvalidValueIsReturned) { MockContext mockContext; cl_int retVal = CL_SUCCESS; size_t paramValueSize = sizeof(cl_uint); cl_uint paramValue = 0; size_t paramValueSizeRet = 0; auto unifiedMemorySharedAllocation = clSharedMemAllocINTEL(&mockContext, mockContext.getDevice(0u), nullptr, 4, 0, &retVal); retVal = clGetMemAllocInfoINTEL(&mockContext, unifiedMemorySharedAllocation, 0, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clMemFreeINTEL(&mockContext, unifiedMemorySharedAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, whenClSetKernelArgMemPointerINTELisCalledWithInvalidKernelThenInvaliKernelErrorIsReturned) { auto retVal = clSetKernelArgMemPointerINTEL(0, 0, nullptr); EXPECT_EQ(CL_INVALID_KERNEL, retVal); } TEST(clUnifiedSharedMemoryTests, whenDeviceSupportSharedMemoryAllocationsAndSystemPointerIsPassedThenItIsProperlySetInKernel) { DebugManagerStateRestore restorer; DebugManager.flags.EnableSharedSystemUsmSupport.set(1u); auto mockContext = std::make_unique(); auto device = mockContext->getDevice(0u); REQUIRE_SVM_OR_SKIP(device); MockKernelWithInternals mockKernel(*mockContext->getDevice(0u), mockContext.get(), true); auto systemPointer = reinterpret_cast(0xfeedbac); auto retVal = clSetKernelArgMemPointerINTEL(mockKernel.mockMultiDeviceKernel, 0, systemPointer); EXPECT_EQ(retVal, CL_SUCCESS); //check if cross thread is updated auto crossThreadLocation = reinterpret_cast(ptrOffset(mockKernel.mockKernel->getCrossThreadData(), mockKernel.kernelInfo.argAsPtr(0).stateless)); auto systemAddress = reinterpret_cast(systemPointer); EXPECT_EQ(*crossThreadLocation, systemAddress); } TEST(clUnifiedSharedMemoryTests, whenClSetKernelArgMemPointerINTELisCalledWithValidUnifiedMemoryAllocationThenProperFieldsAreSet) { auto mockContext = std::make_unique(); REQUIRE_SVM_OR_SKIP(mockContext->getDevice(0u)); cl_int retVal = CL_SUCCESS; auto unifiedMemoryDeviceAllocation = clDeviceMemAllocINTEL(mockContext.get(), mockContext->getDevice(0u), nullptr, 4, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); MockKernelWithInternals mockKernel(*mockContext->getDevice(0u), mockContext.get(), true); retVal = clSetKernelArgMemPointerINTEL(mockKernel.mockMultiDeviceKernel, 0, unifiedMemoryDeviceAllocation); EXPECT_EQ(CL_SUCCESS, retVal); auto svmAlloc = mockContext->getSVMAllocsManager()->getSVMAlloc(unifiedMemoryDeviceAllocation); EXPECT_EQ(mockKernel.mockKernel->kernelArguments[0].object, svmAlloc->gpuAllocations.getGraphicsAllocation(mockContext->getDevice(0)->getRootDeviceIndex())); retVal = clMemFreeINTEL(mockContext.get(), unifiedMemoryDeviceAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, whenclEnqueueMemsetINTELisCalledWithoutIncorrectCommandQueueThenInvaliQueueErrorIsReturned) { auto retVal = clEnqueueMemsetINTEL(0, nullptr, 0, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST(clUnifiedSharedMemoryTests, whenclEnqueueMemsetINTELisCalledWithProperParametersThenParametersArePassedCorrectly) { auto mockContext = std::make_unique(); const ClDeviceInfo &devInfo = mockContext->getDevice(0u)->getDeviceInfo(); if (devInfo.svmCapabilities == 0) { GTEST_SKIP(); } cl_int retVal = CL_SUCCESS; auto unifiedMemoryDeviceAllocation = clDeviceMemAllocINTEL(mockContext.get(), mockContext->getDevice(0u), nullptr, 400, 0, &retVal); struct MockedCommandQueue : public MockCommandQueue { using MockCommandQueue::MockCommandQueue; cl_int enqueueSVMMemFill(void *svmPtr, const void *pattern, size_t patternSize, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { EXPECT_EQ(12, *reinterpret_cast(pattern)); EXPECT_EQ(expectedDstPtr, svmPtr); EXPECT_EQ(400u, size); EXPECT_EQ(1u, patternSize); EXPECT_EQ(0u, numEventsInWaitList); EXPECT_EQ(nullptr, eventWaitList); EXPECT_EQ(nullptr, event); return CL_SUCCESS; } void *expectedDstPtr = nullptr; }; MockedCommandQueue queue{*mockContext}; queue.expectedDstPtr = unifiedMemoryDeviceAllocation; cl_int setValue = 12u; retVal = clEnqueueMemsetINTEL(&queue, unifiedMemoryDeviceAllocation, setValue, 400u, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); clMemFreeINTEL(mockContext.get(), unifiedMemoryDeviceAllocation); } TEST(clUnifiedSharedMemoryTests, whenclEnqueueMemFillINTELisCalledWithoutIncorrectCommandQueueThenInvaliQueueErrorIsReturned) { cl_int setValue = 12u; auto retVal = clEnqueueMemFillINTEL(0, nullptr, &setValue, 0u, 0u, 0u, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST(clUnifiedSharedMemoryTests, whenclEnqueueMemFillINTELisCalledWithProperParametersThenParametersArePassedCorrectly) { auto mockContext = std::make_unique(); const ClDeviceInfo &devInfo = mockContext->getDevice(0u)->getDeviceInfo(); if (devInfo.svmCapabilities == 0) { GTEST_SKIP(); } cl_int retVal = CL_SUCCESS; auto unifiedMemoryDeviceAllocation = clDeviceMemAllocINTEL(mockContext.get(), mockContext->getDevice(0u), nullptr, 400, 0, &retVal); struct MockedCommandQueue : public MockCommandQueue { using MockCommandQueue::MockCommandQueue; cl_int enqueueSVMMemFill(void *svmPtr, const void *pattern, size_t patternSize, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { EXPECT_EQ(12, *reinterpret_cast(pattern)); EXPECT_EQ(expectedDstPtr, svmPtr); EXPECT_EQ(400u, size); EXPECT_EQ(4u, patternSize); EXPECT_EQ(0u, numEventsInWaitList); EXPECT_EQ(nullptr, eventWaitList); EXPECT_EQ(nullptr, event); return CL_SUCCESS; } void *expectedDstPtr = nullptr; }; MockedCommandQueue queue{*mockContext}; queue.expectedDstPtr = unifiedMemoryDeviceAllocation; cl_int setValue = 12u; retVal = clEnqueueMemFillINTEL(&queue, unifiedMemoryDeviceAllocation, &setValue, sizeof(setValue), 400u, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); clMemFreeINTEL(mockContext.get(), unifiedMemoryDeviceAllocation); } TEST(clUnifiedSharedMemoryTests, whenClEnqueueMemcpyINTELisCalledWithWrongQueueThenInvalidQueueErrorIsReturned) { auto retVal = clEnqueueMemcpyINTEL(0, 0, nullptr, nullptr, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST(clUnifiedSharedMemoryTests, givenTwoUnifiedMemoryAllocationsWhenTheyAreCopiedThenProperParamtersArePassed) { auto mockContext = std::make_unique(); const ClDeviceInfo &devInfo = mockContext->getDevice(0u)->getDeviceInfo(); if (devInfo.svmCapabilities == 0) { GTEST_SKIP(); } cl_int retVal = CL_SUCCESS; auto unifiedMemoryDeviceAllocation = clDeviceMemAllocINTEL(mockContext.get(), mockContext->getDevice(0u), nullptr, 400, 0, &retVal); auto unifiedMemorySharedAllocation = clSharedMemAllocINTEL(mockContext.get(), mockContext->getDevice(0u), nullptr, 400, 0, &retVal); struct MockedCommandQueue : public MockCommandQueue { using MockCommandQueue::MockCommandQueue; cl_int enqueueSVMMemcpy(cl_bool blockingCopy, void *dstPtr, const void *srcPtr, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { EXPECT_EQ(0u, blockingCopy); EXPECT_EQ(expectedDstPtr, dstPtr); EXPECT_EQ(expectedSrcPtr, srcPtr); EXPECT_EQ(400u, size); EXPECT_EQ(0u, numEventsInWaitList); EXPECT_EQ(nullptr, eventWaitList); EXPECT_EQ(nullptr, event); return CL_SUCCESS; } void *expectedDstPtr = nullptr; const void *expectedSrcPtr = nullptr; }; MockedCommandQueue queue{*mockContext}; queue.expectedDstPtr = unifiedMemoryDeviceAllocation; queue.expectedSrcPtr = unifiedMemorySharedAllocation; retVal = clEnqueueMemcpyINTEL(&queue, 0, unifiedMemoryDeviceAllocation, unifiedMemorySharedAllocation, 400u, 0, nullptr, nullptr); EXPECT_EQ(retVal, CL_SUCCESS); clMemFreeINTEL(mockContext.get(), unifiedMemoryDeviceAllocation); clMemFreeINTEL(mockContext.get(), unifiedMemorySharedAllocation); } TEST(clUnifiedSharedMemoryTests, whenClEnqueueMigrateMemINTELisCalledWithWrongQueueThenInvalidQueueErrorIsReturned) { auto retVal = clEnqueueMigrateMemINTEL(0, nullptr, 0, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST(clUnifiedSharedMemoryTests, whenClEnqueueMigrateMemINTELisCalledWithProperParametersThenSuccessIsReturned) { MockCommandQueue cmdQ; void *unifiedMemoryAlloc = reinterpret_cast(0x1234); auto retVal = clEnqueueMigrateMemINTEL(&cmdQ, unifiedMemoryAlloc, 10, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, whenClEnqueueMemAdviseINTELisCalledWithWrongQueueThenInvalidQueueErrorIsReturned) { auto retVal = clEnqueueMemAdviseINTEL(0, nullptr, 0, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST(clUnifiedSharedMemoryTests, whenClEnqueueMemAdviseINTELisCalledWithProperParametersThenSuccessIsReturned) { MockCommandQueue cmdQ; void *unifiedMemoryAlloc = reinterpret_cast(0x1234); auto retVal = clEnqueueMemAdviseINTEL(&cmdQ, unifiedMemoryAlloc, 10, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } class clUnifiedSharedMemoryEventTests : public CommandQueueHwFixture, public ::testing::Test { public: void SetUp() override { this->pCmdQ = createCommandQueue(nullptr); } void TearDown() override { clReleaseEvent(event); CommandQueueHwFixture::TearDown(); } cl_event event = nullptr; }; TEST_F(clUnifiedSharedMemoryEventTests, whenClEnqueueMigrateMemINTELIsCalledWithEventThenProperCmdTypeIsSet) { void *unifiedMemoryAlloc = reinterpret_cast(0x1234); auto retVal = clEnqueueMigrateMemINTEL(this->pCmdQ, unifiedMemoryAlloc, 10, 0, 0, nullptr, &event); EXPECT_EQ(CL_SUCCESS, retVal); constexpr cl_command_type expectedCmd = CL_COMMAND_MIGRATEMEM_INTEL; cl_command_type actualCmd = castToObjectOrAbort(event)->getCommandType(); EXPECT_EQ(expectedCmd, actualCmd); } TEST_F(clUnifiedSharedMemoryEventTests, whenClEnqueueMemAdviseINTELIsCalledWithEventThenProperCmdTypeIsSet) { void *unifiedMemoryAlloc = reinterpret_cast(0x1234); auto retVal = clEnqueueMemAdviseINTEL(this->pCmdQ, unifiedMemoryAlloc, 10, 0, 0, nullptr, &event); EXPECT_EQ(CL_SUCCESS, retVal); constexpr cl_command_type expectedCmd = CL_COMMAND_MEMADVISE_INTEL; cl_command_type actualCmd = castToObjectOrAbort(event)->getCommandType(); EXPECT_EQ(expectedCmd, actualCmd); } TEST_F(clUnifiedSharedMemoryEventTests, whenClEnqueueMemcpyINTELIsCalledWithEventThenProperCmdTypeIsSet) { const ClDeviceInfo &devInfo = this->context->getDevice(0u)->getDeviceInfo(); if (devInfo.svmCapabilities == 0) { GTEST_SKIP(); } cl_int retVal = CL_SUCCESS; auto unifiedMemoryDst = clSharedMemAllocINTEL(this->context, this->context->getDevice(0u), nullptr, 400, 0, &retVal); auto unifiedMemorySrc = clSharedMemAllocINTEL(this->context, this->context->getDevice(0u), nullptr, 400, 0, &retVal); retVal = clEnqueueMemcpyINTEL(this->pCmdQ, 0, unifiedMemoryDst, unifiedMemorySrc, 400u, 0, nullptr, &event); EXPECT_EQ(retVal, CL_SUCCESS); constexpr cl_command_type expectedCmd = CL_COMMAND_MEMCPY_INTEL; cl_command_type actualCmd = castToObjectOrAbort(event)->getCommandType(); EXPECT_EQ(expectedCmd, actualCmd); clMemFreeINTEL(this->context, unifiedMemoryDst); clMemFreeINTEL(this->context, unifiedMemorySrc); } TEST_F(clUnifiedSharedMemoryEventTests, whenClEnqueueMemsetINTELIsCalledWithEventThenProperCmdTypeIsSet) { const ClDeviceInfo &devInfo = this->context->getDevice(0u)->getDeviceInfo(); if (devInfo.svmCapabilities == 0) { GTEST_SKIP(); } cl_int retVal = CL_SUCCESS; auto unifiedMemorySharedAllocation = clSharedMemAllocINTEL(this->context, this->context->getDevice(0u), nullptr, 400, 0, &retVal); cl_int setValue = 12u; retVal = clEnqueueMemsetINTEL(this->pCmdQ, unifiedMemorySharedAllocation, setValue, 400u, 0, nullptr, &event); EXPECT_EQ(CL_SUCCESS, retVal); constexpr cl_command_type expectedCmd = CL_COMMAND_MEMSET_INTEL; cl_command_type actualCmd = castToObjectOrAbort(event)->getCommandType(); EXPECT_EQ(expectedCmd, actualCmd); clMemFreeINTEL(this->context, unifiedMemorySharedAllocation); } TEST_F(clUnifiedSharedMemoryEventTests, whenClEnqueueMemFillINTELIsCalledWithEventThenProperCmdTypeIsSet) { const ClDeviceInfo &devInfo = this->context->getDevice(0u)->getDeviceInfo(); if (devInfo.svmCapabilities == 0) { GTEST_SKIP(); } cl_int retVal = CL_SUCCESS; auto unifiedMemorySharedAllocation = clSharedMemAllocINTEL(this->context, this->context->getDevice(0u), nullptr, 400, 0, &retVal); cl_int setValue = 12u; retVal = clEnqueueMemFillINTEL(this->pCmdQ, unifiedMemorySharedAllocation, &setValue, sizeof(setValue), 400u, 0, nullptr, &event); EXPECT_EQ(CL_SUCCESS, retVal); constexpr cl_command_type expectedCmd = CL_COMMAND_MEMFILL_INTEL; cl_command_type actualCmd = castToObjectOrAbort(event)->getCommandType(); EXPECT_EQ(expectedCmd, actualCmd); clMemFreeINTEL(this->context, unifiedMemorySharedAllocation); } TEST(clUnifiedSharedMemoryTests, givenDefaulMemPropertiesWhenClDeviceMemAllocIntelIsCalledThenItAllocatesDeviceUnifiedMemoryAllocationWithProperAllocationTypeAndSize) { MockContext mockContext; cl_int retVal = CL_SUCCESS; cl_mem_properties_intel properties[] = {CL_MEM_ALLOC_FLAGS_INTEL, CL_MEM_ALLOC_DEFAULT_INTEL, 0}; auto allocationSize = 4000u; auto unifiedMemoryDeviceAllocation = clDeviceMemAllocINTEL(&mockContext, mockContext.getDevice(0u), properties, allocationSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, unifiedMemoryDeviceAllocation); auto allocationsManager = mockContext.getSVMAllocsManager(); EXPECT_EQ(1u, allocationsManager->getNumAllocs()); auto graphicsAllocation = allocationsManager->getSVMAlloc(unifiedMemoryDeviceAllocation); auto gpuAllocation = graphicsAllocation->gpuAllocations.getGraphicsAllocation(mockContext.getDevice(0)->getRootDeviceIndex()); EXPECT_EQ(graphicsAllocation->size, allocationSize); EXPECT_EQ(graphicsAllocation->memoryType, InternalMemoryType::DEVICE_UNIFIED_MEMORY); EXPECT_EQ(AllocationType::BUFFER, gpuAllocation->getAllocationType()); EXPECT_EQ(gpuAllocation->getGpuAddress(), castToUint64(unifiedMemoryDeviceAllocation)); EXPECT_EQ(alignUp(allocationSize, MemoryConstants::pageSize64k), gpuAllocation->getUnderlyingBufferSize()); retVal = clMemFreeINTEL(&mockContext, unifiedMemoryDeviceAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, givenValidMemPropertiesWhenClDeviceMemAllocIntelIsCalledThenItAllocatesDeviceUnifiedMemoryAllocationWithProperAllocationTypeAndSize) { MockContext mockContext; cl_int retVal = CL_SUCCESS; auto allocationSize = 4000u; cl_mem_properties_intel properties[] = {CL_MEM_ALLOC_FLAGS_INTEL, CL_MEM_ALLOC_WRITE_COMBINED_INTEL, 0}; auto unifiedMemoryDeviceAllocation = clDeviceMemAllocINTEL(&mockContext, mockContext.getDevice(0u), properties, allocationSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, unifiedMemoryDeviceAllocation); auto allocationsManager = mockContext.getSVMAllocsManager(); EXPECT_EQ(1u, allocationsManager->getNumAllocs()); auto graphicsAllocation = allocationsManager->getSVMAlloc(unifiedMemoryDeviceAllocation); auto gpuAllocation = graphicsAllocation->gpuAllocations.getGraphicsAllocation(mockContext.getDevice(0)->getRootDeviceIndex()); EXPECT_EQ(graphicsAllocation->size, allocationSize); EXPECT_EQ(graphicsAllocation->memoryType, InternalMemoryType::DEVICE_UNIFIED_MEMORY); EXPECT_EQ(gpuAllocation->getAllocationType(), AllocationType::WRITE_COMBINED); EXPECT_EQ(gpuAllocation->getGpuAddress(), castToUint64(unifiedMemoryDeviceAllocation)); EXPECT_EQ(alignUp(allocationSize, MemoryConstants::pageSize64k), gpuAllocation->getUnderlyingBufferSize()); retVal = clMemFreeINTEL(&mockContext, unifiedMemoryDeviceAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, givenInvalidMemPropertiesWhenClSharedMemAllocIntelIsCalledThenInvalidValueIsReturned) { MockContext mockContext; cl_int retVal = CL_SUCCESS; cl_mem_properties_intel properties[] = {CL_MEM_ALLOC_WRITE_COMBINED_INTEL, 0}; auto unifiedMemorySharedAllocation = clSharedMemAllocINTEL(&mockContext, mockContext.getDevice(0u), properties, 4, 0, &retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(nullptr, unifiedMemorySharedAllocation); } TEST(clUnifiedSharedMemoryTests, givenUnifiedMemoryAllocationSizeGreaterThanMaxMemAllocSizeAndClMemAllowUnrestrictedSizeFlagWhenCreateAllocationThenSuccesIsReturned) { MockContext mockContext; cl_int retVal = CL_SUCCESS; cl_mem_properties_intel properties[] = {CL_MEM_FLAGS, CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL, 0}; auto bigSize = MemoryConstants::gigaByte * 10; auto allocationSize = static_cast(bigSize); auto memoryManager = static_cast(mockContext.getDevice(0u)->getMemoryManager()); memoryManager->turnOnFakingBigAllocations(); if (memoryManager->peekForce32BitAllocations() || is32bit) { GTEST_SKIP(); } { auto unifiedMemoryAllocation = clDeviceMemAllocINTEL(&mockContext, mockContext.getDevice(0u), properties, allocationSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, unifiedMemoryAllocation); retVal = clMemFreeINTEL(&mockContext, unifiedMemoryAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } { auto unifiedMemoryAllocation = clSharedMemAllocINTEL(&mockContext, mockContext.getDevice(0u), properties, allocationSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, unifiedMemoryAllocation); retVal = clMemFreeINTEL(&mockContext, unifiedMemoryAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } { auto unifiedMemoryAllocation = clHostMemAllocINTEL(&mockContext, properties, allocationSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, unifiedMemoryAllocation); retVal = clMemFreeINTEL(&mockContext, unifiedMemoryAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } } TEST(clUnifiedSharedMemoryTests, givenUnifiedMemoryAllocationSizeGreaterThanMaxMemAllocSizeAndDebugFlagSetWhenCreateAllocationThenSuccesIsReturned) { DebugManagerStateRestore restorer; DebugManager.flags.AllowUnrestrictedSize.set(1); MockContext mockContext; cl_int retVal = CL_SUCCESS; auto allocationSize = static_cast(mockContext.getDevice(0u)->getSharedDeviceInfo().maxMemAllocSize) + 1; auto memoryManager = static_cast(mockContext.getDevice(0u)->getMemoryManager()); memoryManager->turnOnFakingBigAllocations(); if (memoryManager->peekForce32BitAllocations() || is32bit) { GTEST_SKIP(); } { auto unifiedMemoryAllocation = clDeviceMemAllocINTEL(&mockContext, mockContext.getDevice(0u), 0, allocationSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, unifiedMemoryAllocation); retVal = clMemFreeINTEL(&mockContext, unifiedMemoryAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } { auto unifiedMemoryAllocation = clSharedMemAllocINTEL(&mockContext, mockContext.getDevice(0u), 0, allocationSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, unifiedMemoryAllocation); retVal = clMemFreeINTEL(&mockContext, unifiedMemoryAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } { auto unifiedMemoryAllocation = clHostMemAllocINTEL(&mockContext, 0, allocationSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, unifiedMemoryAllocation); retVal = clMemFreeINTEL(&mockContext, unifiedMemoryAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } } TEST(clUnifiedSharedMemoryTests, givenUnifiedMemoryAllocationSizeGreaterThanMaxMemAllocSizeWhenCreateAllocationThenErrorIsReturned) { MockContext mockContext; cl_int retVal = CL_SUCCESS; cl_mem_properties_intel properties[] = {0}; auto bigSize = MemoryConstants::gigaByte * 20; auto allocationSize = static_cast(bigSize); auto memoryManager = static_cast(mockContext.getDevice(0u)->getMemoryManager()); memoryManager->turnOnFakingBigAllocations(); if (memoryManager->peekForce32BitAllocations() || is32bit) { GTEST_SKIP(); } { auto unifiedMemoryAllocation = clDeviceMemAllocINTEL(&mockContext, mockContext.getDevice(0u), properties, allocationSize, 0, &retVal); EXPECT_NE(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, unifiedMemoryAllocation); } { auto unifiedMemoryAllocation = clSharedMemAllocINTEL(&mockContext, mockContext.getDevice(0u), properties, allocationSize, 0, &retVal); EXPECT_NE(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, unifiedMemoryAllocation); } { auto unifiedMemoryAllocation = clHostMemAllocINTEL(&mockContext, properties, allocationSize, 0, &retVal); EXPECT_NE(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, unifiedMemoryAllocation); } } using MultiRootDeviceClUnifiedSharedMemoryTests = MultiRootDeviceFixture; TEST_F(MultiRootDeviceClUnifiedSharedMemoryTests, WhenClHostMemAllocIntelIsCalledInMultiRootDeviceEnvironmentThenItAllocatesHostUnifiedMemoryAllocations) { REQUIRE_SVM_OR_SKIP(device1); REQUIRE_SVM_OR_SKIP(device2); cl_int retVal = CL_SUCCESS; auto unifiedMemoryHostAllocation = clHostMemAllocINTEL(context.get(), nullptr, 4, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, unifiedMemoryHostAllocation); auto allocationsManager = context.get()->getSVMAllocsManager(); EXPECT_EQ(allocationsManager->getNumAllocs(), 1u); auto svmAllocation = allocationsManager->getSVMAlloc(unifiedMemoryHostAllocation); auto graphicsAllocation1 = svmAllocation->gpuAllocations.getGraphicsAllocation(1u); auto graphicsAllocation2 = svmAllocation->gpuAllocations.getGraphicsAllocation(2u); EXPECT_EQ(svmAllocation->size, 4u); EXPECT_EQ(svmAllocation->memoryType, InternalMemoryType::HOST_UNIFIED_MEMORY); EXPECT_NE(graphicsAllocation1, nullptr); EXPECT_NE(graphicsAllocation2, nullptr); EXPECT_EQ(graphicsAllocation1->getRootDeviceIndex(), 1u); EXPECT_EQ(graphicsAllocation2->getRootDeviceIndex(), 2u); EXPECT_EQ(graphicsAllocation1->getAllocationType(), AllocationType::BUFFER_HOST_MEMORY); EXPECT_EQ(graphicsAllocation2->getAllocationType(), AllocationType::BUFFER_HOST_MEMORY); EXPECT_EQ(graphicsAllocation1->getGpuAddress(), castToUint64(unifiedMemoryHostAllocation)); EXPECT_EQ(graphicsAllocation2->getGpuAddress(), castToUint64(unifiedMemoryHostAllocation)); retVal = clMemFreeINTEL(context.get(), unifiedMemoryHostAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(MultiRootDeviceClUnifiedSharedMemoryTests, WhenClSharedMemAllocIntelIsCalledWithoutDeviceInMultiRootDeviceEnvironmentThenItAllocatesHostUnifiedMemoryAllocations) { REQUIRE_SVM_OR_SKIP(device1); REQUIRE_SVM_OR_SKIP(device2); cl_int retVal = CL_SUCCESS; auto unifiedMemorySharedAllocation = clSharedMemAllocINTEL(context.get(), nullptr, nullptr, 4, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, unifiedMemorySharedAllocation); auto allocationsManager = context.get()->getSVMAllocsManager(); EXPECT_EQ(allocationsManager->getNumAllocs(), 1u); auto svmAllocation = allocationsManager->getSVMAlloc(unifiedMemorySharedAllocation); auto graphicsAllocation1 = svmAllocation->gpuAllocations.getGraphicsAllocation(1u); auto graphicsAllocation2 = svmAllocation->gpuAllocations.getGraphicsAllocation(2u); EXPECT_EQ(svmAllocation->size, 4u); EXPECT_EQ(svmAllocation->memoryType, InternalMemoryType::SHARED_UNIFIED_MEMORY); EXPECT_NE(graphicsAllocation1, nullptr); EXPECT_NE(graphicsAllocation2, nullptr); EXPECT_EQ(graphicsAllocation1->getRootDeviceIndex(), 1u); EXPECT_EQ(graphicsAllocation2->getRootDeviceIndex(), 2u); EXPECT_EQ(graphicsAllocation1->getAllocationType(), AllocationType::BUFFER_HOST_MEMORY); EXPECT_EQ(graphicsAllocation2->getAllocationType(), AllocationType::BUFFER_HOST_MEMORY); EXPECT_EQ(graphicsAllocation1->getGpuAddress(), castToUint64(unifiedMemorySharedAllocation)); EXPECT_EQ(graphicsAllocation2->getGpuAddress(), castToUint64(unifiedMemorySharedAllocation)); retVal = clMemFreeINTEL(context.get(), unifiedMemorySharedAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(MultiRootDeviceClUnifiedSharedMemoryTests, WhenClSharedMemAllocIntelIsCalledWithoutDeviceInMultiRootDeviceEnvironmentThenItWaitsForAllGpuAllocations) { REQUIRE_SVM_OR_SKIP(device1); REQUIRE_SVM_OR_SKIP(device2); mockMemoryManager->waitAllocations.reset(new MultiGraphicsAllocation(2u)); cl_int retVal = CL_SUCCESS; auto unifiedMemorySharedAllocation = clSharedMemAllocINTEL(context.get(), nullptr, nullptr, 4, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, unifiedMemorySharedAllocation); auto allocationsManager = context.get()->getSVMAllocsManager(); EXPECT_EQ(allocationsManager->getNumAllocs(), 1u); auto svmAllocation = allocationsManager->getSVMAlloc(unifiedMemorySharedAllocation); auto graphicsAllocation1 = svmAllocation->gpuAllocations.getGraphicsAllocation(1u); auto graphicsAllocation2 = svmAllocation->gpuAllocations.getGraphicsAllocation(2u); EXPECT_EQ(svmAllocation->size, 4u); EXPECT_NE(graphicsAllocation1, nullptr); EXPECT_NE(graphicsAllocation2, nullptr); retVal = clMemBlockingFreeINTEL(context.get(), unifiedMemorySharedAllocation); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mockMemoryManager->waitForEnginesCompletionCalled, 2u); EXPECT_EQ(mockMemoryManager->waitAllocations.get()->getGraphicsAllocation(1u), graphicsAllocation1); EXPECT_EQ(mockMemoryManager->waitAllocations.get()->getGraphicsAllocation(2u), graphicsAllocation2); EXPECT_EQ(allocationsManager->getNumAllocs(), 0u); svmAllocation = allocationsManager->getSVMAlloc(unifiedMemorySharedAllocation); EXPECT_EQ(nullptr, svmAllocation); } compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_unload_compiler_tests.inl000066400000000000000000000005351422164147700301630ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" using namespace NEO; namespace ULT { TEST(clUnloadCompilerTests, WhenUnloadingCompilerThenSuccessIsReturned) { auto retVal = clUnloadCompiler(); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/cl_unload_platform_compiler_tests.inl000066400000000000000000000012271422164147700320660ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/api/cl_api_tests.h" using namespace NEO; using clUnloadPlatformCompilerTests = api_tests; namespace ULT { TEST_F(clUnloadPlatformCompilerTests, GivenNullptrPlatformWhenUnloadingPlatformCompilerThenInvalidPlatformErrorIsReturned) { auto retVal = clUnloadPlatformCompiler(nullptr); EXPECT_EQ(CL_INVALID_PLATFORM, retVal); } TEST_F(clUnloadPlatformCompilerTests, WhenUnloadingPlatformCompilerThenSuccessIsReturned) { auto retVal = clUnloadPlatformCompiler(platform()); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/gl/000077500000000000000000000000001422164147700230025ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/api/gl/CMakeLists.txt000066400000000000000000000017741422164147700255530ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) set(IGDRCL_SRCS_tests_api_gl ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_from_gl_buffer_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_from_gl_renderbuffer_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_from_gl_texture2d_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_from_gl_texture3d_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_from_gl_texture_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_acquire_gl_objects_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_release_gl_objects_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_gl_device_info_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_gl_object_info_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_gl_texture_info_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_gl_intel_tracing_tests.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_api_gl}) endif() compute-runtime-22.14.22890/opencl/test/unit_test/api/gl/cl_create_from_gl_buffer_tests.cpp000066400000000000000000000014051422164147700317070ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/api/cl_api_tests.h" using namespace NEO; typedef api_tests clCreateFromGLBuffer_; namespace ULT { TEST_F(clCreateFromGLBuffer_, givenNullConxtextWhenCreateFromGLIsCalledThenErrorIsReturned) { int errCode = CL_SUCCESS; auto retVal = clCreateFromGLBuffer(nullptr, // cl_context context CL_MEM_READ_WRITE, // cl_mem_flags flags 0, // cl_GLuint bufobj &errCode // cl_int * errcode_ret ); EXPECT_EQ(nullptr, retVal); EXPECT_EQ(errCode, CL_INVALID_CONTEXT); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/gl/cl_create_from_gl_renderbuffer_tests.cpp000066400000000000000000000014441422164147700331120ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/api/cl_api_tests.h" using namespace NEO; typedef api_tests clCreateFromGLRenderbuffer_; namespace ULT { TEST_F(clCreateFromGLRenderbuffer_, givenNullContextWhenCreateIsCalledThenErrorIsReturned) { int errCode = CL_SUCCESS; auto retVal = clCreateFromGLRenderbuffer(nullptr, // cl_context context CL_MEM_READ_WRITE, // cl_mem_flags flags 0, // GLuint renderbuffer &errCode // cl_int *errcode_ret ); EXPECT_EQ(nullptr, retVal); EXPECT_EQ(errCode, CL_INVALID_CONTEXT); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/gl/cl_create_from_gl_texture2d_tests.cpp000066400000000000000000000017151422164147700323700ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/api/cl_api_tests.h" using namespace NEO; typedef api_tests clCreateFromGLTexture2D_; namespace ULT { TEST_F(clCreateFromGLTexture2D_, givenNullConxtextWhenClCreateFromGlTexture2DIsCalledThenInvalidContextIsReturned) { int errCode = CL_SUCCESS; auto retVal = clCreateFromGLTexture2D(nullptr, // cl_context context CL_MEM_READ_WRITE, // cl_mem_flags flags 0, // GLenum texture_target 0, // GLint miplevel 0, // GLuint texture &errCode // cl_int *errcode_ret ); EXPECT_EQ(nullptr, retVal); EXPECT_EQ(errCode, CL_INVALID_CONTEXT); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/gl/cl_create_from_gl_texture3d_tests.cpp000066400000000000000000000017151422164147700323710ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/api/cl_api_tests.h" using namespace NEO; typedef api_tests clCreateFromGLTexture3D_; namespace ULT { TEST_F(clCreateFromGLTexture3D_, givenNullConxtextWhenClCreateFromGlTexture2DIsCalledThenInvalidContextIsReturned) { int errCode = CL_SUCCESS; auto retVal = clCreateFromGLTexture3D(nullptr, // cl_context context CL_MEM_READ_WRITE, // cl_mem_flags flags 0, // GLenum texture_target 0, // GLint miplevel 0, // GLuint texture &errCode // cl_int *errcode_ret ); EXPECT_EQ(nullptr, retVal); EXPECT_EQ(errCode, CL_INVALID_CONTEXT); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/gl/cl_create_from_gl_texture_tests.cpp000066400000000000000000000010561422164147700321400ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/api/cl_api_tests.h" #include using namespace NEO; typedef api_tests clCreateFromGLTexture_; namespace ULT { TEST_F(clCreateFromGLTexture_, givenNullContextWhenCreateIsCalledThenErrorIsReturned) { int errCode = CL_SUCCESS; auto image = clCreateFromGLTexture(nullptr, CL_MEM_READ_WRITE, GL_TEXTURE_1D, 0, 0, &errCode); EXPECT_EQ(nullptr, image); EXPECT_EQ(errCode, CL_INVALID_CONTEXT); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/gl/cl_enqueue_acquire_gl_objects_tests.cpp000066400000000000000000000016201422164147700327600ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/api/cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueAcquireGLObjects_; namespace ULT { TEST_F(clEnqueueAcquireGLObjects_, givenNullCommandQueueWhenAcquireIsCalledThenInvalidCommandQueueIsReturned) { auto retVal = clEnqueueAcquireGLObjects(nullptr, // cl_command_queue command_queue 0, // cl_uint num_objects nullptr, // const cl_mem *mem_objects 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/gl/cl_enqueue_release_gl_objects_tests.cpp000066400000000000000000000016311422164147700327510ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/api/cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueReleaseGLObjects_; namespace ULT { TEST_F(clEnqueueReleaseGLObjects_, givenNullCommandQueueWhenReleaseGlObjectsIsCalledThenInvalidCommandQueueIsReturned) { auto retVal = clEnqueueReleaseGLObjects(nullptr, // cl_command_queue command_queue 0, // cl_uint num_objects nullptr, // const cl_mem *mem_objects 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/gl/cl_get_gl_device_info_tests.cpp000066400000000000000000000057311422164147700312070ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/source/helpers/hw_info.h" #include "opencl/test/unit_test/api/cl_api_tests.h" #include using namespace NEO; namespace ULT { //------------------------------------------------------------------------------ struct GetDeviceInfoP : public ApiFixture<>, public ::testing::TestWithParam { void SetUp() override { param = GetParam(); ApiFixture::SetUp(); } void TearDown() override { ApiFixture::TearDown(); } cl_device_info param; }; typedef GetDeviceInfoP GetDeviceGlInfoStr; TEST_P(GetDeviceGlInfoStr, WhenGettingDeviceExtensionsThenExtensionsAreReportedCorrectly) { char *paramValue = nullptr; size_t paramRetSize = 0; cl_int retVal = clGetDeviceInfo(testedClDevice, param, 0, nullptr, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(0u, paramRetSize); paramValue = new char[paramRetSize]; retVal = clGetDeviceInfo(testedClDevice, param, paramRetSize, paramValue, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_GE(std::strlen(paramValue), 0u); // check for extensions if (param == CL_DEVICE_EXTENSIONS) { std::string extensionString(paramValue); size_t currentOffset = 0u; std::string supportedExtensions[] = { "cl_khr_byte_addressable_store ", "cl_khr_fp16 ", "cl_khr_global_int32_base_atomics ", "cl_khr_global_int32_extended_atomics ", "cl_khr_icd ", "cl_khr_local_int32_base_atomics ", "cl_khr_local_int32_extended_atomics ", "cl_intel_subgroups ", "cl_intel_required_subgroup_size ", "cl_intel_subgroups_short ", "cl_khr_spir ", "cl_intel_accelerator ", "cl_intel_driver_diagnostics ", "cl_khr_priority_hints ", "cl_khr_throttle_hints ", "cl_khr_create_command_queue ", "cl_khr_gl_depth_images", "cl_khr_gl_event", "cl_khr_gl_msaa_sharing", }; for (auto element = 0u; element < sizeof(supportedExtensions) / sizeof(supportedExtensions[0]); element++) { auto foundOffset = extensionString.find(supportedExtensions[element]); EXPECT_TRUE(foundOffset != std::string::npos); EXPECT_GE(foundOffset, currentOffset); currentOffset = foundOffset; } } delete[] paramValue; } // Define new command types to run the parameterized tests static cl_device_info deviceInfoStrParams[] = { CL_DEVICE_BUILT_IN_KERNELS, CL_DEVICE_EXTENSIONS, CL_DEVICE_LATEST_CONFORMANCE_VERSION_PASSED, CL_DEVICE_NAME, CL_DEVICE_OPENCL_C_VERSION, CL_DEVICE_PROFILE, CL_DEVICE_VENDOR, CL_DEVICE_VERSION, CL_DRIVER_VERSION}; INSTANTIATE_TEST_CASE_P(api, GetDeviceGlInfoStr, testing::ValuesIn(deviceInfoStrParams)); } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/gl/cl_get_gl_object_info_tests.cpp000066400000000000000000000011521422164147700312070ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/api/cl_api_tests.h" using namespace NEO; typedef api_tests clGetGLObjectInfo_; namespace ULT { TEST_F(clGetGLObjectInfo_, givenNullMemObjectWhenGetGlObjectInfoIsCalledThenInvalidMemObjectIsReturned) { auto retVal = clGetGLObjectInfo(nullptr, // cl_mem memobj nullptr, // cl_gl_object_type *gl_object_type nullptr // GLuint *gl_object_name ); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/gl/cl_get_gl_texture_info_tests.cpp000066400000000000000000000014761422164147700314520ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/api/cl_api_tests.h" using namespace NEO; typedef api_tests clGetGLTextureInfo_; namespace ULT { TEST_F(clGetGLTextureInfo_, givenNullMemObjectWhenGetGLTextureInfoIsCalledThenInvalidMemObjectIsReturned) { auto retVal = clGetGLTextureInfo(nullptr, // cl_mem memobj CL_GL_TEXTURE_TARGET, // cl_gl_texture_info param_name 0, // size_t param_value_size nullptr, // void *param_value nullptr // size_t *param_value_size_ret ); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/api/gl/cl_gl_intel_tracing_tests.cpp000066400000000000000000000072641422164147700307230ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/tracing/tracing_api.h" #include "opencl/source/tracing/tracing_notify.h" #include "opencl/test/unit_test/api/cl_api_tests.h" using namespace NEO; namespace ULT { struct IntelGlTracingTest : public api_tests { public: IntelGlTracingTest() {} void SetUp() override { api_tests::SetUp(); status = clCreateTracingHandleINTEL(testedClDevice, callback, this, &handle); ASSERT_NE(nullptr, handle); ASSERT_EQ(CL_SUCCESS, status); for (uint32_t i = 0; i < CL_FUNCTION_COUNT; ++i) { status = clSetTracingPointINTEL(handle, static_cast(i), CL_TRUE); ASSERT_EQ(CL_SUCCESS, status); } status = clEnableTracingINTEL(handle); ASSERT_EQ(CL_SUCCESS, status); } void TearDown() override { status = clDisableTracingINTEL(handle); ASSERT_EQ(CL_SUCCESS, status); status = clDestroyTracingHandleINTEL(handle); ASSERT_EQ(CL_SUCCESS, status); api_tests::TearDown(); } protected: static void callback(cl_function_id fid, cl_callback_data *callback_data, void *user_data) { ASSERT_NE(nullptr, user_data); IntelGlTracingTest *base = (IntelGlTracingTest *)user_data; base->vcallback(fid, callback_data, nullptr); } virtual void vcallback(cl_function_id fid, cl_callback_data *callback_data, void *user_data) { if (fid == functionId) { if (callback_data->site == CL_CALLBACK_SITE_ENTER) { ++enterCount; } else if (callback_data->site == CL_CALLBACK_SITE_EXIT) { ++exitCount; } } } uint16_t callFunctions() { uint16_t count = 0; ++count; functionId = CL_FUNCTION_clCreateFromGLBuffer; clCreateFromGLBuffer(0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clCreateFromGLRenderbuffer; clCreateFromGLRenderbuffer(0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clCreateFromGLTexture; clCreateFromGLTexture(0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clCreateFromGLTexture2D; clCreateFromGLTexture2D(0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clCreateFromGLTexture3D; clCreateFromGLTexture3D(0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueAcquireGLObjects; clEnqueueAcquireGLObjects(0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueReleaseGLObjects; clEnqueueReleaseGLObjects(0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clGetGLObjectInfo; clGetGLObjectInfo(0, 0, 0); ++count; functionId = CL_FUNCTION_clGetGLTextureInfo; clGetGLTextureInfo(0, 0, 0, 0, 0); return count; } protected: cl_tracing_handle handle = nullptr; cl_int status = CL_SUCCESS; uint16_t enterCount = 0; uint16_t exitCount = 0; cl_function_id functionId = CL_FUNCTION_COUNT; }; TEST_F(IntelGlTracingTest, GivenAllFunctionsWhenSettingTracingPointThenTracingOnAllFunctionsIsPerformed) { uint16_t count = callFunctions(); EXPECT_EQ(count, enterCount); EXPECT_EQ(count, exitCount); } TEST_F(IntelGlTracingTest, GivenNoFunctionsWhenSettingTracingPointThenNoTracingIsPerformed) { for (uint32_t i = 0; i < CL_FUNCTION_COUNT; ++i) { status = clSetTracingPointINTEL(handle, static_cast(i), CL_FALSE); EXPECT_EQ(CL_SUCCESS, status); } callFunctions(); EXPECT_EQ(0, enterCount); EXPECT_EQ(0, exitCount); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/aub_mem_dump/000077500000000000000000000000001422164147700242615ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_mem_dump/CMakeLists.txt000066400000000000000000000004461422164147700270250ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_aub_mem_dump_tests ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/aub_alloc_dump_tests.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_aub_mem_dump_tests}) compute-runtime-22.14.22890/opencl/test/unit_test/aub_mem_dump/aub_alloc_dump_tests.cpp000066400000000000000000000620331422164147700311610ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/aub_mem_dump/aub_alloc_dump.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/mocks/mock_gmm.h" #include "shared/test/common/mocks/mock_gmm_resource_info.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" using namespace NEO; typedef Test AubAllocDumpTests; struct AubFileStreamMock : public AubMemDump::AubFileStream { void write(const char *data, size_t size) override { buffer.resize(size); memcpy(buffer.data(), data, size); } char *getData() { return buffer.data(); } size_t getSize() { return buffer.size(); } std::vector buffer; }; HWTEST_F(AubAllocDumpTests, givenBufferOrImageWhenGraphicsAllocationIsKnownThenItsTypeCanBeCheckedIfItIsWritable) { auto memoryManager = pDevice->getMemoryManager(); auto gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); gfxAllocation->setAllocationType(AllocationType::BUFFER); EXPECT_FALSE(gfxAllocation->isMemObjectsAllocationWithWritableFlags()); EXPECT_FALSE(AubAllocDump::isWritableBuffer(*gfxAllocation)); gfxAllocation->setAllocationType(AllocationType::BUFFER); gfxAllocation->setMemObjectsAllocationWithWritableFlags(true); EXPECT_TRUE(AubAllocDump::isWritableBuffer(*gfxAllocation)); gfxAllocation->setAllocationType(AllocationType::BUFFER_HOST_MEMORY); gfxAllocation->setMemObjectsAllocationWithWritableFlags(false); EXPECT_FALSE(AubAllocDump::isWritableBuffer(*gfxAllocation)); gfxAllocation->setAllocationType(AllocationType::BUFFER_HOST_MEMORY); gfxAllocation->setMemObjectsAllocationWithWritableFlags(true); EXPECT_TRUE(AubAllocDump::isWritableBuffer(*gfxAllocation)); gfxAllocation->setAllocationType(AllocationType::EXTERNAL_HOST_PTR); gfxAllocation->setMemObjectsAllocationWithWritableFlags(false); EXPECT_FALSE(AubAllocDump::isWritableBuffer(*gfxAllocation)); gfxAllocation->setAllocationType(AllocationType::EXTERNAL_HOST_PTR); gfxAllocation->setMemObjectsAllocationWithWritableFlags(true); EXPECT_TRUE(AubAllocDump::isWritableBuffer(*gfxAllocation)); gfxAllocation->setAllocationType(AllocationType::MAP_ALLOCATION); gfxAllocation->setMemObjectsAllocationWithWritableFlags(false); EXPECT_FALSE(AubAllocDump::isWritableBuffer(*gfxAllocation)); gfxAllocation->setAllocationType(AllocationType::MAP_ALLOCATION); gfxAllocation->setMemObjectsAllocationWithWritableFlags(true); EXPECT_TRUE(AubAllocDump::isWritableBuffer(*gfxAllocation)); gfxAllocation->setAllocationType(AllocationType::IMAGE); gfxAllocation->setMemObjectsAllocationWithWritableFlags(false); EXPECT_FALSE(AubAllocDump::isWritableImage(*gfxAllocation)); gfxAllocation->setAllocationType(AllocationType::IMAGE); gfxAllocation->setMemObjectsAllocationWithWritableFlags(true); EXPECT_TRUE(AubAllocDump::isWritableImage(*gfxAllocation)); memoryManager->freeGraphicsMemory(gfxAllocation); } HWTEST_F(AubAllocDumpTests, givenImageResourceWhenGmmResourceInfoIsAvailableThenImageSurfaceTypeCanBeDeducedFromGmmResourceType) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_1D, AubAllocDump::getImageSurfaceTypeFromGmmResourceType(GMM_RESOURCE_TYPE::RESOURCE_1D)); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_2D, AubAllocDump::getImageSurfaceTypeFromGmmResourceType(GMM_RESOURCE_TYPE::RESOURCE_2D)); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_3D, AubAllocDump::getImageSurfaceTypeFromGmmResourceType(GMM_RESOURCE_TYPE::RESOURCE_3D)); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL, AubAllocDump::getImageSurfaceTypeFromGmmResourceType(GMM_RESOURCE_TYPE::RESOURCE_INVALID)); } HWTEST_F(AubAllocDumpTests, givenGraphicsAllocationWhenDumpAllocationIsCalledInDefaultModeThenGraphicsAllocationShouldNotBeDumped) { auto memoryManager = pDevice->getMemoryManager(); auto gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); std::unique_ptr mockAubFileStream(new AubFileStreamMock()); auto format = AubAllocDump::getDumpFormat(*gfxAllocation); AubAllocDump::dumpAllocation(format, *gfxAllocation, mockAubFileStream.get(), 0); EXPECT_EQ(0u, mockAubFileStream->getSize()); memoryManager->freeGraphicsMemory(gfxAllocation); } HWTEST_F(AubAllocDumpTests, givenGraphicsAllocationWhenDumpAllocationIsCalledButDumpFormatIsUnspecifiedThenGraphicsAllocationShouldNotBeDumped) { auto memoryManager = pDevice->getMemoryManager(); auto gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); std::unique_ptr mockAubFileStream(new AubFileStreamMock()); auto format = AubAllocDump::getDumpFormat(*gfxAllocation); AubAllocDump::dumpAllocation(format, *gfxAllocation, mockAubFileStream.get(), 0); EXPECT_EQ(0u, mockAubFileStream->getSize()); memoryManager->freeGraphicsMemory(gfxAllocation); } HWTEST_F(AubAllocDumpTests, givenNonWritableBufferWhenDumpAllocationIsCalledAndDumpFormatIsSpecifiedThenBufferShouldNotBeDumped) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AUBDumpBufferFormat.set("BIN"); auto memoryManager = pDevice->getMemoryManager(); auto gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties({pDevice->getRootDeviceIndex(), MemoryConstants::pageSize, AllocationType::BUFFER, pDevice->getDeviceBitfield()}); std::unique_ptr mockAubFileStream(new AubFileStreamMock()); auto format = AubAllocDump::getDumpFormat(*gfxAllocation); AubAllocDump::dumpAllocation(format, *gfxAllocation, mockAubFileStream.get(), 0); EXPECT_EQ(0u, mockAubFileStream->getSize()); memoryManager->freeGraphicsMemory(gfxAllocation); } HWTEST_F(AubAllocDumpTests, givenNonWritableImageWhenDumpAllocationIsCalledAndDumpFormatIsSpecifiedThenImageShouldNotBeDumped) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AUBDumpBufferFormat.set("BMP"); auto memoryManager = pDevice->getMemoryManager(); auto gfxAllocation = MockGmm::allocateImage2d(*memoryManager); std::unique_ptr mockAubFileStream(new AubFileStreamMock()); auto format = AubAllocDump::getDumpFormat(*gfxAllocation); AubAllocDump::dumpAllocation(format, *gfxAllocation, mockAubFileStream.get(), 0); EXPECT_EQ(0u, mockAubFileStream->getSize()); memoryManager->freeGraphicsMemory(gfxAllocation); } HWTEST_F(AubAllocDumpTests, givenWritableBufferWhenDumpAllocationIsCalledAndAubDumpBufferFormatIsNotSetThenBufferShouldNotBeDumped) { MockContext context; size_t bufferSize = 10; auto retVal = CL_INVALID_VALUE; std::unique_ptr buffer(Buffer::create(&context, CL_MEM_READ_WRITE, bufferSize, nullptr, retVal)); ASSERT_NE(nullptr, buffer); auto gfxAllocation = buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex()); std::unique_ptr mockAubFileStream(new AubFileStreamMock()); auto handle = static_cast(reinterpret_cast(this)); auto format = AubAllocDump::getDumpFormat(*gfxAllocation); AubAllocDump::dumpAllocation(format, *gfxAllocation, mockAubFileStream.get(), handle); EXPECT_EQ(0u, mockAubFileStream->getSize()); } HWTEST_F(AubAllocDumpTests, givenWritableImageWhenDumpAllocationIsCalledAndAubDumpImageFormatIsNotSetThenImageShouldNotBeDumped) { MockContext context; std::unique_ptr image(ImageHelper::create(&context)); ASSERT_NE(nullptr, image); auto gfxAllocation = image->getGraphicsAllocation(pClDevice->getRootDeviceIndex()); std::unique_ptr mockAubFileStream(new AubFileStreamMock()); auto handle = static_cast(reinterpret_cast(this)); auto format = AubAllocDump::getDumpFormat(*gfxAllocation); AubAllocDump::dumpAllocation(format, *gfxAllocation, mockAubFileStream.get(), handle); EXPECT_EQ(0u, mockAubFileStream->getSize()); } HWTEST_F(AubAllocDumpTests, givenWritableBufferWhenDumpAllocationIsCalledAndAubDumpBufferFormatIsSetToBinThenBufferShouldBeDumpedInBinFormat) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AUBDumpBufferFormat.set("BIN"); MockContext context; size_t bufferSize = 10; auto retVal = CL_INVALID_VALUE; std::unique_ptr buffer(Buffer::create(&context, CL_MEM_READ_WRITE, bufferSize, nullptr, retVal)); ASSERT_NE(nullptr, buffer); auto gfxAllocation = buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex()); std::unique_ptr mockAubFileStream(new AubFileStreamMock()); auto handle = static_cast(reinterpret_cast(this)); auto format = AubAllocDump::getDumpFormat(*gfxAllocation); AubAllocDump::dumpAllocation(format, *gfxAllocation, mockAubFileStream.get(), handle); ASSERT_EQ(sizeof(AubMemDump::AubCaptureBinaryDumpHD), mockAubFileStream->getSize()); AubMemDump::AubCaptureBinaryDumpHD cmd; memcpy(&cmd, mockAubFileStream->getData(), mockAubFileStream->getSize()); EXPECT_EQ(0x7u, cmd.Header.Type); EXPECT_EQ(0x1u, cmd.Header.Opcode); EXPECT_EQ(0x15u, cmd.Header.SubOp); EXPECT_EQ(((sizeof(cmd) - sizeof(cmd.Header)) / sizeof(uint32_t)) - 1, cmd.Header.DwordLength); EXPECT_EQ(gfxAllocation->getGpuAddress(), cmd.getBaseAddr()); EXPECT_EQ(static_cast(gfxAllocation->getUnderlyingBufferSize()), cmd.getWidth()); EXPECT_EQ(1u, cmd.getHeight()); EXPECT_EQ(static_cast(gfxAllocation->getUnderlyingBufferSize()), cmd.getPitch()); EXPECT_EQ(1u, cmd.GttType); EXPECT_EQ(handle, cmd.DirectoryHandle); } HWTEST_F(AubAllocDumpTests, givenWritableBufferWhenDumpAllocationIsCalledAndAubDumpBufferFormatIsSetToTreThenBufferShouldBeDumpedInTreFormat) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AUBDumpBufferFormat.set("TRE"); MockContext context(pClDevice); size_t bufferSize = 10; auto retVal = CL_INVALID_VALUE; std::unique_ptr buffer(Buffer::create(&context, CL_MEM_READ_WRITE, bufferSize, nullptr, retVal)); ASSERT_NE(nullptr, buffer); auto gfxAllocation = buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex()); std::unique_ptr mockAubFileStream(new AubFileStreamMock()); auto handle = static_cast(reinterpret_cast(this)); auto format = AubAllocDump::getDumpFormat(*gfxAllocation); AubAllocDump::dumpAllocation(format, *gfxAllocation, mockAubFileStream.get(), handle); ASSERT_EQ(sizeof(AubMemDump::CmdServicesMemTraceDumpCompress), mockAubFileStream->getSize()); AubMemDump::CmdServicesMemTraceDumpCompress cmd; memcpy(&cmd, mockAubFileStream->getData(), mockAubFileStream->getSize()); EXPECT_EQ((sizeof(AubMemDump::CmdServicesMemTraceDumpCompress) - 1) / 4, cmd.dwordCount); EXPECT_EQ(0x7u, cmd.instructionType); EXPECT_EQ(0x10u, cmd.instructionSubOpcode); EXPECT_EQ(0x2eu, cmd.instructionOpcode); using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT; EXPECT_EQ(gfxAllocation->getGpuAddress(), cmd.getSurfaceAddress()); EXPECT_EQ(static_cast(gfxAllocation->getUnderlyingBufferSize()), cmd.surfaceWidth); EXPECT_EQ(1u, cmd.surfaceHeight); EXPECT_EQ(static_cast(gfxAllocation->getUnderlyingBufferSize()), cmd.surfacePitch); EXPECT_EQ(SURFACE_FORMAT::SURFACE_FORMAT_RAW, cmd.surfaceFormat); EXPECT_EQ(AubMemDump::CmdServicesMemTraceDumpCompress::DumpTypeValues::Tre, cmd.dumpType); EXPECT_EQ(RENDER_SURFACE_STATE::TILE_MODE_LINEAR, cmd.surfaceTilingType); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER, cmd.surfaceType); EXPECT_EQ(AubMemDump::CmdServicesMemTraceDumpCompress::AlgorithmValues::Uncompressed, cmd.algorithm); EXPECT_EQ(1u, cmd.gttType); EXPECT_EQ(handle, cmd.directoryHandle); } HWTEST_F(AubAllocDumpTests, givenWritableImageWhenDumpAllocationIsCalledAndAubDumpImageFormatIsSetToBmpThenImageShouldBeDumpedInBmpFormat) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AUBDumpImageFormat.set("BMP"); MockContext context(pClDevice); std::unique_ptr image(ImageHelper::create(&context)); ASSERT_NE(nullptr, image); auto gfxAllocation = image->getGraphicsAllocation(pClDevice->getRootDeviceIndex()); std::unique_ptr mockAubFileStream(new AubFileStreamMock()); auto handle = static_cast(reinterpret_cast(this)); auto format = AubAllocDump::getDumpFormat(*gfxAllocation); AubAllocDump::dumpAllocation(format, *gfxAllocation, mockAubFileStream.get(), handle); ASSERT_EQ(sizeof(AubMemDump::AubCmdDumpBmpHd), mockAubFileStream->getSize()); AubMemDump::AubCmdDumpBmpHd cmd; memcpy(&cmd, mockAubFileStream->getData(), mockAubFileStream->getSize()); EXPECT_EQ(0x7u, cmd.Header.Type); EXPECT_EQ(0x1u, cmd.Header.Opcode); EXPECT_EQ(0x44u, cmd.Header.SubOp); EXPECT_EQ(((sizeof(cmd) - sizeof(cmd.Header)) / sizeof(uint32_t)) - 1, cmd.Header.DwordLength); EXPECT_EQ(0u, cmd.Xmin); EXPECT_EQ(0u, cmd.Ymin); auto gmm = gfxAllocation->getDefaultGmm(); EXPECT_EQ((8 * gmm->gmmResourceInfo->getRenderPitch()) / gmm->gmmResourceInfo->getBitsPerPixel(), cmd.BufferPitch); EXPECT_EQ(gmm->gmmResourceInfo->getBitsPerPixel(), cmd.BitsPerPixel); EXPECT_EQ(static_cast(gmm->gmmResourceInfo->getResourceFormatSurfaceState()), cmd.Format); EXPECT_EQ(static_cast(gmm->gmmResourceInfo->getBaseWidth()), cmd.Xsize); EXPECT_EQ(static_cast(gmm->gmmResourceInfo->getBaseHeight()), cmd.Ysize); EXPECT_EQ(gfxAllocation->getGpuAddress(), cmd.getBaseAddr()); EXPECT_EQ(0u, cmd.Secure); EXPECT_EQ(0u, cmd.UseFence); auto flagInfo = gmm->gmmResourceInfo->getResourceFlags()->Info; EXPECT_EQ(static_cast(flagInfo.TiledW || flagInfo.TiledX || flagInfo.TiledY || flagInfo.TiledYf || flagInfo.TiledYs), cmd.TileOn); EXPECT_EQ(flagInfo.TiledY, cmd.WalkY); EXPECT_EQ(1u, cmd.UsePPGTT); EXPECT_EQ(1u, cmd.Use32BitDump); EXPECT_EQ(1u, cmd.UseFullFormat); EXPECT_EQ(handle, cmd.DirectoryHandle); } HWTEST_F(AubAllocDumpTests, givenWritableImageWhenDumpAllocationIsCalledAndAubDumpImageFormatIsSetToTreThenImageShouldBeDumpedInTreFormat) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AUBDumpImageFormat.set("TRE"); MockContext context(pClDevice); std::unique_ptr image(ImageHelper::create(&context)); ASSERT_NE(nullptr, image); auto gfxAllocation = image->getGraphicsAllocation(pClDevice->getRootDeviceIndex()); std::unique_ptr mockAubFileStream(new AubFileStreamMock()); auto handle = static_cast(reinterpret_cast(this)); auto format = AubAllocDump::getDumpFormat(*gfxAllocation); AubAllocDump::dumpAllocation(format, *gfxAllocation, mockAubFileStream.get(), handle); ASSERT_EQ(sizeof(AubMemDump::CmdServicesMemTraceDumpCompress), mockAubFileStream->getSize()); AubMemDump::CmdServicesMemTraceDumpCompress cmd; memcpy(&cmd, mockAubFileStream->getData(), mockAubFileStream->getSize()); EXPECT_EQ((sizeof(AubMemDump::CmdServicesMemTraceDumpCompress) - 1) / 4, cmd.dwordCount); EXPECT_EQ(0x7u, cmd.instructionType); EXPECT_EQ(0x10u, cmd.instructionSubOpcode); EXPECT_EQ(0x2eu, cmd.instructionOpcode); using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT; EXPECT_EQ(gfxAllocation->getGpuAddress(), cmd.getSurfaceAddress()); auto gmm = gfxAllocation->getDefaultGmm(); EXPECT_EQ(static_cast(gmm->gmmResourceInfo->getBaseWidth()), cmd.surfaceWidth); EXPECT_EQ(static_cast(gmm->gmmResourceInfo->getBaseHeight()), cmd.surfaceHeight); EXPECT_EQ(static_cast(gmm->gmmResourceInfo->getRenderPitch()), cmd.surfacePitch); EXPECT_EQ(static_cast(gmm->gmmResourceInfo->getResourceFormatSurfaceState()), cmd.surfaceFormat); EXPECT_EQ(AubMemDump::CmdServicesMemTraceDumpCompress::DumpTypeValues::Tre, cmd.dumpType); EXPECT_EQ(gmm->gmmResourceInfo->getTileModeSurfaceState(), cmd.surfaceTilingType); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_2D, cmd.surfaceType); EXPECT_EQ(AubMemDump::CmdServicesMemTraceDumpCompress::AlgorithmValues::Uncompressed, cmd.algorithm); EXPECT_EQ(1u, cmd.gttType); EXPECT_EQ(handle, cmd.directoryHandle); } HWTEST_F(AubAllocDumpTests, givenCompressedImageWritableWhenDumpAllocationIsCalledAndAubDumpImageFormatIsSetToTreThenImageShouldBeDumpedInTreFormat) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AUBDumpImageFormat.set("TRE"); MockContext context(pClDevice); std::unique_ptr image(ImageHelper::create(&context)); ASSERT_NE(nullptr, image); auto gfxAllocation = image->getGraphicsAllocation(pClDevice->getRootDeviceIndex()); gfxAllocation->getDefaultGmm()->isCompressionEnabled = true; std::unique_ptr mockAubFileStream(new AubFileStreamMock()); auto handle = static_cast(reinterpret_cast(this)); auto format = AubAllocDump::getDumpFormat(*gfxAllocation); AubAllocDump::dumpAllocation(format, *gfxAllocation, mockAubFileStream.get(), handle); EXPECT_EQ(0u, mockAubFileStream->getSize()); } HWTEST_F(AubAllocDumpTests, givenMultisampleImageWritableWhenDumpAllocationIsCalledAndAubDumpImageFormatIsSetToTreThenImageDumpIsNotSupported) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AUBDumpImageFormat.set("TRE"); MockContext context(pClDevice); std::unique_ptr image(ImageHelper::create(&context)); ASSERT_NE(nullptr, image); auto gfxAllocation = image->getGraphicsAllocation(pClDevice->getRootDeviceIndex()); auto mockGmmResourceInfo = reinterpret_cast(gfxAllocation->getDefaultGmm()->gmmResourceInfo.get()); mockGmmResourceInfo->mockResourceCreateParams.MSAA.NumSamples = 2; std::unique_ptr mockAubFileStream(new AubFileStreamMock()); auto handle = static_cast(reinterpret_cast(this)); auto format = AubAllocDump::getDumpFormat(*gfxAllocation); AubAllocDump::dumpAllocation(format, *gfxAllocation, mockAubFileStream.get(), handle); EXPECT_EQ(0u, mockAubFileStream->getSize()); } HWTEST_F(AubAllocDumpTests, givenMultisampleImageWritableWheGetDumpSurfaceIsCalledAndDumpFormatIsSpecifiedThenNullSurfaceInfoIsReturned) { MockContext context(pClDevice); std::unique_ptr image(ImageHelper::create(&context)); ASSERT_NE(nullptr, image); auto gfxAllocation = image->getGraphicsAllocation(pClDevice->getRootDeviceIndex()); auto mockGmmResourceInfo = reinterpret_cast(gfxAllocation->getDefaultGmm()->gmmResourceInfo.get()); mockGmmResourceInfo->mockResourceCreateParams.MSAA.NumSamples = 2; EXPECT_EQ(nullptr, AubAllocDump::getDumpSurfaceInfo(*gfxAllocation, AubAllocDump::DumpFormat::IMAGE_BMP)); EXPECT_EQ(nullptr, AubAllocDump::getDumpSurfaceInfo(*gfxAllocation, AubAllocDump::DumpFormat::IMAGE_TRE)); } struct AubSurfaceDumpTests : public AubAllocDumpTests, public ::testing::WithParamInterface> { void SetUp() override { AubAllocDumpTests::SetUp(); isCompressed = std::get<0>(GetParam()); dumpFormat = std::get<1>(GetParam()); } void TearDown() override { AubAllocDumpTests::TearDown(); } bool isCompressed = false; AubAllocDump::DumpFormat dumpFormat = AubAllocDump::DumpFormat::NONE; }; HWTEST_P(AubSurfaceDumpTests, givenGraphicsAllocationWhenGetDumpSurfaceIsCalledAndDumpFormatIsSpecifiedThenSurfaceInfoIsReturned) { ExecutionEnvironment *executionEnvironment = pDevice->executionEnvironment; MockMemoryManager memoryManager(*executionEnvironment); if (AubAllocDump::isBufferDumpFormat(dumpFormat)) { auto bufferAllocation = memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, bufferAllocation); MockBuffer::setAllocationType(bufferAllocation, pDevice->getRootDeviceEnvironment().getGmmClientContext(), isCompressed); std::unique_ptr surfaceInfo(AubAllocDump::getDumpSurfaceInfo(*bufferAllocation, dumpFormat)); if (nullptr != surfaceInfo) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT; EXPECT_EQ(GmmHelper::decanonize(bufferAllocation->getGpuAddress()), surfaceInfo->address); EXPECT_EQ(static_cast(bufferAllocation->getUnderlyingBufferSize()), surfaceInfo->width); EXPECT_EQ(1u, surfaceInfo->height); EXPECT_EQ(static_cast(bufferAllocation->getUnderlyingBufferSize()), surfaceInfo->pitch); EXPECT_EQ(SURFACE_FORMAT::SURFACE_FORMAT_RAW, surfaceInfo->format); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER, surfaceInfo->surftype); EXPECT_EQ(RENDER_SURFACE_STATE::TILE_MODE_LINEAR, surfaceInfo->tilingType); EXPECT_EQ(bufferAllocation->isCompressionEnabled(), surfaceInfo->compressed); EXPECT_EQ((AubAllocDump::DumpFormat::BUFFER_TRE == dumpFormat) ? aub_stream::dumpType::tre : aub_stream::dumpType::bin, surfaceInfo->dumpType); } memoryManager.freeGraphicsMemory(bufferAllocation); } if (AubAllocDump::isImageDumpFormat(dumpFormat)) { ImageDescriptor imgDesc = {}; imgDesc.imageWidth = 512; imgDesc.imageHeight = 1; imgDesc.imageType = ImageType::Image2D; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); MockGmm::queryImgParams(pDevice->getGmmClientContext(), imgInfo, false); AllocationData allocationData; allocationData.imgInfo = &imgInfo; auto imageAllocation = memoryManager.allocateGraphicsMemoryForImage(allocationData); ASSERT_NE(nullptr, imageAllocation); auto gmm = imageAllocation->getDefaultGmm(); gmm->isCompressionEnabled = isCompressed; std::unique_ptr surfaceInfo(AubAllocDump::getDumpSurfaceInfo(*imageAllocation, dumpFormat)); if (nullptr != surfaceInfo) { EXPECT_EQ(GmmHelper::decanonize(imageAllocation->getGpuAddress()), surfaceInfo->address); EXPECT_EQ(static_cast(gmm->gmmResourceInfo->getBaseWidth()), surfaceInfo->width); EXPECT_EQ(static_cast(gmm->gmmResourceInfo->getBaseHeight()), surfaceInfo->height); EXPECT_EQ(static_cast(gmm->gmmResourceInfo->getRenderPitch()), surfaceInfo->pitch); EXPECT_EQ(static_cast(gmm->gmmResourceInfo->getResourceFormatSurfaceState()), surfaceInfo->format); EXPECT_EQ(AubAllocDump::getImageSurfaceTypeFromGmmResourceType(gmm->gmmResourceInfo->getResourceType()), surfaceInfo->surftype); EXPECT_EQ(gmm->gmmResourceInfo->getTileModeSurfaceState(), surfaceInfo->tilingType); EXPECT_EQ(gmm->isCompressionEnabled, surfaceInfo->compressed); EXPECT_EQ((AubAllocDump::DumpFormat::IMAGE_TRE == dumpFormat) ? aub_stream::dumpType::tre : aub_stream::dumpType::bmp, surfaceInfo->dumpType); } memoryManager.freeGraphicsMemory(imageAllocation); } } INSTANTIATE_TEST_CASE_P(GetDumpSurfaceTest, AubSurfaceDumpTests, ::testing::Combine( ::testing::Bool(), // isCompressed ::testing::Values( // dumpFormat AubAllocDump::DumpFormat::NONE, AubAllocDump::DumpFormat::BUFFER_BIN, AubAllocDump::DumpFormat::BUFFER_TRE, AubAllocDump::DumpFormat::IMAGE_BMP, AubAllocDump::DumpFormat::IMAGE_TRE))); compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/000077500000000000000000000000001422164147700236205ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/CMakeLists.txt000066400000000000000000000045351422164147700263670ustar00rootroot00000000000000# # Copyright (C) 2018-2022 Intel Corporation # # SPDX-License-Identifier: MIT # project(igdrcl_aub_tests) set(OPENCL_AUB_TEST_DIR ${CMAKE_CURRENT_SOURCE_DIR}) list(APPEND IGDRCL_AUB_TESTS__TARGET_OBJECTS ${NEO_SOURCE_DIR}/opencl/test/unit_test/test_macros/test_checks_ocl.cpp ${NEO_SOURCE_DIR}/shared/test/common/test_macros/test_checks_shared.cpp $ $ $ $ $ $ $ $ ) if(DEFINED AUB_STREAM_PROJECT_NAME) list(APPEND IGDRCL_AUB_TESTS__TARGET_OBJECTS $) endif() add_executable(igdrcl_aub_tests ${IGDRCL_AUB_TESTS__TARGET_OBJECTS} ) set_target_properties(igdrcl_aub_tests PROPERTIES FOLDER ${OPENCL_TEST_PROJECTS_FOLDER}) if(WIN32) set_target_properties(igdrcl_aub_tests PROPERTIES VS_DEBUGGER_WORKING_DIRECTORY ${TargetDir} VS_DEBUGGER_COMMAND_ARGUMENTS " --disable_pagefaulting_tests" ) endif() if(WIN32) target_include_directories(igdrcl_aub_tests PRIVATE ${NEO_SOURCE_DIR}/opencl/test/unit_test/mocks${BRANCH_DIR_SUFFIX} ) endif() target_include_directories(igdrcl_aub_tests PRIVATE ${NEO_SHARED_TEST_DIRECTORY}/common/test_configuration/aub_tests ${NEO_SOURCE_DIR}/shared/test/common/helpers/includes${BRANCH_DIR_SUFFIX} ) target_sources(igdrcl_aub_tests PRIVATE ${NEO_SHARED_TEST_DIRECTORY}/unit_test/page_fault_manager/default_asan_options.cpp ) copy_gmm_dll_for(igdrcl_aub_tests) add_subdirectories() target_link_libraries(igdrcl_aub_tests ${NEO_MOCKABLE_LIB_NAME} ${NEO_SHARED_MOCKABLE_LIB_NAME}) target_link_libraries(igdrcl_aub_tests igdrcl_mocks) target_link_libraries(igdrcl_aub_tests gmock-gtest ${NEO_EXTRA_LIBS}) if(UNIX) target_link_libraries(igdrcl_aub_tests ${GMM_LINK_NAME}) else() add_dependencies(igdrcl_aub_tests ${GMM_TARGET_NAME}) endif() target_include_directories(igdrcl_aub_tests BEFORE PRIVATE ${NEO_SHARED_TEST_DIRECTORY}/common/test_macros/header${BRANCH_DIR_SUFFIX}) create_project_source_tree(igdrcl_aub_tests) compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/command_queue/000077500000000000000000000000001422164147700264425ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/command_queue/CMakeLists.txt000066400000000000000000000051511422164147700312040ustar00rootroot00000000000000# # Copyright (C) 2018-2022 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(igdrcl_aub_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/command_enqueue_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_copy_buffer_aub_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_copy_buffer_rect_aub_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_copy_image_aub_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_fill_buffer_aub_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_fill_image_aub_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_kernel_aub_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_map_buffer_aub_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_map_image_aub_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_printf_kernel_aub_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_buffer_aub_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_buffer_rect_aub_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_image_aub_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_verify_memory_buffer_aub_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_verify_memory_image_aub_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_write_buffer_aub_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_write_buffer_rect_aub_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_write_copy_read_buffer_aub_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_write_copy_read_buffer_aub_tests.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_write_image_aub_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/single_tile_products_excludes.cpp ) if(TESTS_XEHP_AND_LATER) target_sources(igdrcl_aub_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/aub_enqueue_resource_barrier_xehp_and_later.cpp ${CMAKE_CURRENT_SOURCE_DIR}/aub_inline_data_local_id_tests_xehp_and_later.cpp ${CMAKE_CURRENT_SOURCE_DIR}/aub_multicontext_tests_xehp_and_later.cpp ${CMAKE_CURRENT_SOURCE_DIR}/aub_one_va_multi_physical_tests_xehp_and_later.cpp ${CMAKE_CURRENT_SOURCE_DIR}/aub_postsync_write_tests_xehp_and_later.cpp ${CMAKE_CURRENT_SOURCE_DIR}/aub_scratch_space_tests_xehp_and_later.cpp ${CMAKE_CURRENT_SOURCE_DIR}/compression_aub_tests_xehp_and_later.cpp ${CMAKE_CURRENT_SOURCE_DIR}/multi_tile_buffers_aub_tests_xehp_and_later.cpp ) endif() add_subdirectories() aub_enqueue_resource_barrier_xehp_and_later.cpp000066400000000000000000000111501422164147700400350ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/command_queue/resource_barrier.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/aub_tests/fixtures/aub_fixture.h" #include "opencl/test/unit_test/aub_tests/fixtures/hello_world_fixture.h" #include "opencl/test/unit_test/helpers/cmd_buffer_validator.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "test_traits_common.h" using namespace NEO; using ResourceBarrierAubTest = Test>; struct L3ControlSupportedMatcher { template static constexpr bool isMatched() { if constexpr (HwMapper::GfxProduct::supportsCmdSet(IGFX_XE_HP_CORE)) { return TestTraits::get()>::l3ControlSupported; } return false; } }; HWTEST2_F(ResourceBarrierAubTest, givenAllocationsWhenEnqueueResourceBarrierCalledThenL3FlushCommandWasSubmitted, L3ControlSupportedMatcher) { using L3_CONTROL = typename FamilyType::L3_CONTROL; constexpr size_t bufferSize = MemoryConstants::pageSize; char bufferAMemory[bufferSize]; char bufferBMemory[bufferSize]; memset(bufferAMemory, 1, bufferSize); memset(bufferBMemory, 129, bufferSize); auto retVal = CL_INVALID_VALUE; auto srcBuffer = std::unique_ptr(Buffer::create(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, bufferSize, bufferAMemory, retVal)); ASSERT_NE(nullptr, srcBuffer); auto dstBuffer1 = std::unique_ptr(Buffer::create(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, bufferSize, bufferBMemory, retVal)); ASSERT_NE(nullptr, dstBuffer1); auto dstBuffer2 = std::unique_ptr(Buffer::create(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, bufferSize, bufferBMemory, retVal)); ASSERT_NE(nullptr, dstBuffer2); retVal = pCmdQ->enqueueCopyBuffer(srcBuffer.get(), dstBuffer1.get(), 0, 0, bufferSize, 0, nullptr, nullptr); retVal = pCmdQ->enqueueCopyBuffer(srcBuffer.get(), dstBuffer2.get(), 0, 0, bufferSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); cl_resource_barrier_descriptor_intel descriptor{}; cl_resource_barrier_descriptor_intel descriptor2{}; descriptor.mem_object = dstBuffer1.get(); descriptor2.mem_object = dstBuffer2.get(); const cl_resource_barrier_descriptor_intel descriptors[] = {descriptor, descriptor2}; BarrierCommand bCmd(pCmdQ, descriptors, 2); auto sizeUsed = pCmdQ->getCS(0).getUsed(); retVal = pCmdQ->enqueueResourceBarrier(&bCmd, 0, nullptr, nullptr); LinearStream &l3FlushCmdStream = pCmdQ->getCS(0); std::string err; auto cmdBuffOk = expectCmdBuff(l3FlushCmdStream, sizeUsed, std::vector{ new MatchAnyCmd(AnyNumber), new MatchHwCmd(1), new MatchAnyCmd(AnyNumber), }, &err); EXPECT_TRUE(cmdBuffOk) << err; retVal = pCmdQ->enqueueCopyBuffer(srcBuffer.get(), dstBuffer2.get(), 0, 0, bufferSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); pCmdQ->flush(); expectMemory(reinterpret_cast(dstBuffer1->getGraphicsAllocation(device->getRootDeviceIndex())->getGpuAddress()), bufferAMemory, bufferSize); expectMemory(reinterpret_cast(dstBuffer2->getGraphicsAllocation(device->getRootDeviceIndex())->getGpuAddress()), bufferAMemory, bufferSize); } aub_inline_data_local_id_tests_xehp_and_later.cpp000066400000000000000000000423151422164147700402770ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/array_count.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/test/unit_test/aub_tests/command_stream/aub_command_stream_fixture.h" #include "opencl/test/unit_test/aub_tests/fixtures/aub_fixture.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/simple_arg_kernel_fixture.h" #include "opencl/test/unit_test/indirect_heap/indirect_heap_fixture.h" using namespace NEO; struct AubDispatchThreadDataFixture : public KernelAUBFixture { struct TestVariables { Buffer *destBuffer = nullptr; void *destMemory = nullptr; size_t sizeUserMemory = 0; size_t sizeWrittenMemory = 0; size_t sizeRemainderMemory = 0; void *expectedMemory = nullptr; void *expectedRemainderMemory = nullptr; char *remainderDestMemory = nullptr; unsigned int scalarArg = 0; size_t typeSize = 0; size_t gwsSize = 0; size_t lwsSize = 0; }; void SetUp() override { KernelAUBFixture::SetUp(); variablesCount = arrayCount(variables); BufferDefaults::context = context; for (size_t i = 0; i < variablesCount; i++) { if (variables[i].sizeUserMemory) { variables[i].destBuffer = Buffer::create( context, CL_MEM_READ_WRITE | CL_MEM_FORCE_HOST_MEMORY_INTEL, variables[i].sizeUserMemory, nullptr, retVal); ASSERT_NE(nullptr, variables[i].destBuffer); variables[i].destMemory = reinterpret_cast(variables[i].destBuffer->getCpuAddressForMapping()); } } } void TearDown() override { pCmdQ->flush(); for (size_t i = 0; i < variablesCount; i++) { if (variables[i].destBuffer) { delete variables[i].destBuffer; variables[i].destBuffer = nullptr; } if (variables[i].expectedMemory) { alignedFree(variables[i].expectedMemory); variables[i].expectedMemory = nullptr; } if (variables[i].expectedRemainderMemory) { alignedFree(variables[i].expectedRemainderMemory); variables[i].expectedRemainderMemory = nullptr; } } BufferDefaults::context = nullptr; KernelAUBFixture::TearDown(); } std::unique_ptr debugRestorer; TestVariables variables[5] = {}; size_t variablesCount; HardwareParse hwParser; }; struct InlineDataFixture : AubDispatchThreadDataFixture { void SetUp() override { debugRestorer = std::make_unique(); DebugManager.flags.EnablePassInlineData.set(true); initializeKernel3Variables(); initializeKernel4Variables(); AubDispatchThreadDataFixture::SetUp(); setUpKernel3(); } void initializeKernel4Variables() { kernelIds |= (1 << 4); variables[4].gwsSize = 1; variables[4].lwsSize = 1; } void initializeKernel3Variables() { kernelIds |= (1 << 3); variables[3].sizeUserMemory = 4096; variables[3].typeSize = sizeof(unsigned int); variables[3].gwsSize = 128; variables[3].lwsSize = 32; } void setUpKernel3() { memset(variables[3].destMemory, 0xFE, variables[3].sizeUserMemory); kernels[3]->setArg(0, variables[3].destBuffer); variables[3].sizeWrittenMemory = variables[3].gwsSize * variables[3].typeSize; variables[3].expectedMemory = alignedMalloc(variables[3].sizeWrittenMemory, 4096); memset(variables[3].expectedMemory, 0, variables[3].sizeWrittenMemory); variables[3].sizeRemainderMemory = variables[3].sizeUserMemory - variables[3].sizeWrittenMemory; variables[3].expectedRemainderMemory = alignedMalloc(variables[3].sizeRemainderMemory, 4096); memcpy_s(variables[3].expectedRemainderMemory, variables[3].sizeRemainderMemory, variables[3].destMemory, variables[3].sizeRemainderMemory); variables[3].remainderDestMemory = static_cast(variables[3].destMemory) + variables[3].sizeWrittenMemory; } }; using XeHPAndLaterAubInlineDataTest = Test; HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterAubInlineDataTest, givenCrossThreadFitIntoSingleGrfWhenInlineDataAllowedThenCopyAllCrossThreadIntoInline) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using INLINE_DATA = typename FamilyType::INLINE_DATA; if (!HardwareCommandsHelper::inlineDataProgrammingRequired(*kernels[4])) { return; } cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {variables[4].gwsSize, 1, 1}; size_t localWorkSize[3] = {variables[4].lwsSize, 1, 1}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; auto retVal = pCmdQ->enqueueKernel( kernels[4].get(), workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); pCmdQ->flush(); hwParser.parseCommands(pCmdQ->getCS(0), 0); hwParser.findHardwareCommands(); EXPECT_NE(hwParser.itorWalker, hwParser.cmdList.end()); auto walker = genCmdCast(*hwParser.itorWalker); EXPECT_EQ(1u, walker->getEmitInlineParameter()); auto localId = kernels[4]->getKernelInfo().kernelDescriptor.kernelAttributes.localId; uint32_t expectedEmitLocal = 0; if (localId[0]) { expectedEmitLocal |= (1 << 0); } if (localId[1]) { expectedEmitLocal |= (1 << 1); } if (localId[2]) { expectedEmitLocal |= (1 << 2); } EXPECT_EQ(expectedEmitLocal, walker->getEmitLocalId()); EXPECT_EQ(0, memcmp(walker->getInlineDataPointer(), kernels[4]->getCrossThreadData(), sizeof(INLINE_DATA))); //this kernel does nothing, so no expectMemory because only such kernel can fit into single GRF //this is for sake of testing inline data data copying by COMPUTE_WALKER } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterAubInlineDataTest, givenCrossThreadSizeMoreThanSingleGrfWhenInlineDataAllowedThenCopyGrfCrossThreadToInline) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using INLINE_DATA = typename FamilyType::INLINE_DATA; if (!HardwareCommandsHelper::inlineDataProgrammingRequired(*kernels[3])) { return; } cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {variables[3].gwsSize, 1, 1}; size_t localWorkSize[3] = {variables[3].lwsSize, 1, 1}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; IndirectHeap &ih = pCmdQ->getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 2048); auto retVal = pCmdQ->enqueueKernel( kernels[3].get(), workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); pCmdQ->flush(); hwParser.parseCommands(pCmdQ->getCS(0), 0); hwParser.findHardwareCommands(); EXPECT_NE(hwParser.itorWalker, hwParser.cmdList.end()); auto walker = genCmdCast(*hwParser.itorWalker); EXPECT_EQ(1u, walker->getEmitInlineParameter()); auto localId = kernels[3]->getKernelInfo().kernelDescriptor.kernelAttributes.localId; uint32_t expectedEmitLocal = 0; if (localId[0]) { expectedEmitLocal |= (1 << 0); } if (localId[1]) { expectedEmitLocal |= (1 << 1); } if (localId[2]) { expectedEmitLocal |= (1 << 2); } EXPECT_EQ(expectedEmitLocal, walker->getEmitLocalId()); char *crossThreadData = kernels[3]->getCrossThreadData(); size_t crossThreadDataSize = kernels[3]->getCrossThreadDataSize(); auto inlineSize = sizeof(INLINE_DATA); EXPECT_EQ(0, memcmp(walker->getInlineDataPointer(), crossThreadData, inlineSize)); crossThreadDataSize -= inlineSize; crossThreadData += inlineSize; void *payloadData = ih.getCpuBase(); EXPECT_EQ(0, memcmp(payloadData, crossThreadData, crossThreadDataSize)); expectMemory(variables[3].destMemory, variables[3].expectedMemory, variables[3].sizeWrittenMemory); expectMemory(variables[3].remainderDestMemory, variables[3].expectedRemainderMemory, variables[3].sizeRemainderMemory); } struct HwLocalIdsFixture : AubDispatchThreadDataFixture { void SetUp() override { debugRestorer = std::make_unique(); DebugManager.flags.EnableHwGenerationLocalIds.set(1); initializeKernel2Variables(); AubDispatchThreadDataFixture::SetUp(); if (kernels[2]->getKernelInfo().kernelDescriptor.kernelAttributes.flags.passInlineData) { DebugManager.flags.EnablePassInlineData.set(true); } setUpKernel2(); } void initializeKernel2Variables() { kernelIds |= (1 << 2); variables[2].sizeUserMemory = 4096; variables[2].scalarArg = 0xAA; variables[2].typeSize = sizeof(unsigned int); variables[2].gwsSize = 256; variables[2].lwsSize = 32; } void setUpKernel2() { memset(variables[2].destMemory, 0xFE, variables[2].sizeUserMemory); kernels[2]->setArg(0, sizeof(variables[2].scalarArg), &variables[2].scalarArg); kernels[2]->setArg(1, variables[2].destBuffer); variables[2].sizeWrittenMemory = variables[2].gwsSize * variables[2].typeSize; variables[2].expectedMemory = alignedMalloc(variables[2].sizeWrittenMemory, 4096); unsigned int *expectedData = static_cast(variables[2].expectedMemory); for (size_t i = 0; i < variables[2].gwsSize; i++) { *(expectedData + i) = variables[2].scalarArg; } variables[2].sizeRemainderMemory = variables[2].sizeUserMemory - variables[2].sizeWrittenMemory; variables[2].expectedRemainderMemory = alignedMalloc(variables[2].sizeRemainderMemory, 4096); memcpy_s(variables[2].expectedRemainderMemory, variables[2].sizeRemainderMemory, variables[2].destMemory, variables[2].sizeRemainderMemory); variables[2].remainderDestMemory = static_cast(variables[2].destMemory) + variables[2].sizeWrittenMemory; } }; using XeHPAndLaterAubHwLocalIdsTest = Test; HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterAubHwLocalIdsTest, WhenEnqueueDimensionsArePow2ThenSetEmitLocalIdsAndGenerateLocalIdsFields) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {variables[2].gwsSize, 1, 1}; size_t localWorkSize[3] = {variables[2].lwsSize, 1, 1}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; auto retVal = pCmdQ->enqueueKernel( kernels[2].get(), workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); HardwareParse hwParser; hwParser.parseCommands(pCmdQ->getCS(0), 0); hwParser.findHardwareCommands(); EXPECT_NE(hwParser.itorWalker, hwParser.cmdList.end()); auto walker = genCmdCast(*hwParser.itorWalker); auto localId = kernels[2]->getKernelInfo().kernelDescriptor.kernelAttributes.localId; uint32_t expectedEmitLocal = 0; if (localId[0]) { expectedEmitLocal |= (1 << 0); } if (localId[1]) { expectedEmitLocal |= (1 << 1); } if (localId[2]) { expectedEmitLocal |= (1 << 2); } EXPECT_EQ(expectedEmitLocal, walker->getEmitLocalId()); EXPECT_EQ(1u, walker->getGenerateLocalId()); auto kernelAllocationGpuAddr = kernels[2]->getKernelInfo().kernelAllocation->getGpuAddressToPatch(); auto skipOffset = kernels[2]->getKernelInfo().kernelDescriptor.entryPoints.skipPerThreadDataLoad; uint64_t kernelStartPointer = kernelAllocationGpuAddr + skipOffset; INTERFACE_DESCRIPTOR_DATA &idd = walker->getInterfaceDescriptor(); EXPECT_EQ(static_cast(kernelStartPointer), idd.getKernelStartPointer()); pCmdQ->flush(); expectMemory(variables[2].destMemory, variables[2].expectedMemory, variables[2].sizeWrittenMemory); expectMemory(variables[2].remainderDestMemory, variables[2].expectedRemainderMemory, variables[2].sizeRemainderMemory); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterAubHwLocalIdsTest, givenNonPowOf2LocalWorkSizeButCompatibleWorkOrderWhenLocalIdsAreUsedThenDataVerifiesCorrectly) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; cl_uint workDim = 1; size_t globalWorkSize[3] = {200, 1, 1}; size_t localWorkSize[3] = {200, 1, 1}; auto retVal = pCmdQ->enqueueKernel( kernels[2].get(), workDim, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); HardwareParse hwParser; hwParser.parseCommands(pCmdQ->getCS(0), 0); hwParser.findHardwareCommands(); EXPECT_NE(hwParser.itorWalker, hwParser.cmdList.end()); auto walker = genCmdCast(*hwParser.itorWalker); auto localId = kernels[2]->getKernelInfo().kernelDescriptor.kernelAttributes.localId; uint32_t expectedEmitLocal = 0; if (localId[0]) { expectedEmitLocal |= (1 << 0); } if (localId[1]) { expectedEmitLocal |= (1 << 1); } if (localId[2]) { expectedEmitLocal |= (1 << 2); } EXPECT_EQ(expectedEmitLocal, walker->getEmitLocalId()); EXPECT_EQ(1u, walker->getGenerateLocalId()); EXPECT_EQ(4u, walker->getWalkOrder()); pCmdQ->flush(); expectMemory(variables[2].destMemory, variables[2].expectedMemory, globalWorkSize[0] * variables[2].typeSize); } struct HwLocalIdsWithSubGroups : AubDispatchThreadDataFixture { void SetUp() override { debugRestorer = std::make_unique(); DebugManager.flags.EnableHwGenerationLocalIds.set(1); kernelIds |= (1 << 9); variables[0].sizeUserMemory = 16 * KB; AubDispatchThreadDataFixture::SetUp(); memset(variables[0].destMemory, 0, variables[0].sizeUserMemory); variables[0].expectedMemory = alignedMalloc(variables[0].sizeUserMemory, 4096); kernels[9]->setArg(0, variables[0].destBuffer); } }; using XeHPAndLaterAubHwLocalIdsWithSubgroupsTest = Test; HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterAubHwLocalIdsWithSubgroupsTest, givenKernelUsingSubgroupsWhenLocalIdsAreGeneratedByHwThenValuesAreCorrect) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; cl_uint workDim = 1; size_t globalWorkSize[3] = {200, 1, 1}; size_t localWorkSize[3] = {200, 1, 1}; auto retVal = pCmdQ->enqueueKernel( kernels[9].get(), workDim, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); HardwareParse hwParser; hwParser.parseCommands(pCmdQ->getCS(0), 0); hwParser.findHardwareCommands(); EXPECT_NE(hwParser.itorWalker, hwParser.cmdList.end()); auto walker = genCmdCast(*hwParser.itorWalker); auto localId = kernels[9]->getKernelInfo().kernelDescriptor.kernelAttributes.localId; uint32_t expectedEmitLocal = 0; if (localId[0]) { expectedEmitLocal |= (1 << 0); } if (localId[1]) { expectedEmitLocal |= (1 << 1); } if (localId[2]) { expectedEmitLocal |= (1 << 2); } EXPECT_EQ(expectedEmitLocal, walker->getEmitLocalId()); EXPECT_EQ(1u, walker->getGenerateLocalId()); EXPECT_EQ(4u, walker->getWalkOrder()); pCmdQ->finish(); //we expect sequence of local ids from 0..199 auto expectedMemory = reinterpret_cast(variables[0].expectedMemory); auto currentWorkItem = 0u; while (currentWorkItem < localWorkSize[0]) { expectedMemory[0] = currentWorkItem++; expectedMemory++; } expectMemory(variables[0].destMemory, variables[0].expectedMemory, ptrDiff(expectedMemory, variables[0].expectedMemory)); }aub_multicontext_tests_xehp_and_later.cpp000066400000000000000000000733311422164147700367430ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/implicit_scaling.h" #include "shared/source/command_container/walker_partition_xehp_and_later.h" #include "shared/source/command_stream/aub_command_stream_receiver_hw.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/test_macros/test.h" #include "opencl/extensions/public/cl_ext_private.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/aub_tests/fixtures/multicontext_aub_fixture.h" #include "opencl/test/unit_test/fixtures/simple_arg_kernel_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" using namespace NEO; template struct MultitileMulticontextTests : public MulticontextAubFixture, public ::testing::Test { void SetUp() override { MulticontextAubFixture::SetUp(numberOfTiles, enabledCommandStreamers, false); } void TearDown() override { MulticontextAubFixture::TearDown(); } template void runAubTest() { cl_int retVal = CL_SUCCESS; const uint32_t bufferSize = 64 * KB; uint8_t writePattern[bufferSize]; uint8_t initPattern[bufferSize]; std::fill(writePattern, writePattern + sizeof(writePattern), 1); std::fill(initPattern, initPattern + sizeof(initPattern), 0); std::vector>> regularBuffers; std::vector>> tileOnlyBuffers; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR; regularBuffers.resize(tileDevices.size()); tileOnlyBuffers.resize(tileDevices.size()); for (uint32_t tile = 0; tile < tileDevices.size(); tile++) { for (uint32_t tileEngine = 0; tileEngine < commandQueues[tile].size(); tileEngine++) { DebugManager.flags.DoCpuCopyOnWriteBuffer.set(true); auto memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, 0, 0, &context->getDevice(0)->getDevice()); auto regularBuffer = Buffer::create( context.get(), memoryProperties, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, 0, bufferSize, initPattern, retVal); auto tileOnlyProperties = ClMemoryPropertiesHelper::createMemoryProperties( flags, 0, 0, context->getDevice(0)->getDevice().getNearestGenericSubDevice(tile)); auto tileOnlyBuffer = Buffer::create(context.get(), tileOnlyProperties, flags, 0, bufferSize, initPattern, retVal); DebugManager.flags.DoCpuCopyOnWriteBuffer.set(false); regularBuffer->forceDisallowCPUCopy = true; tileOnlyBuffer->forceDisallowCPUCopy = true; regularBuffers[tile].push_back(std::unique_ptr(regularBuffer)); tileOnlyBuffers[tile].push_back(std::unique_ptr(tileOnlyBuffer)); commandQueues[tile][tileEngine]->enqueueWriteBuffer(regularBuffer, CL_FALSE, 0, bufferSize, writePattern, nullptr, 0, nullptr, nullptr); commandQueues[tile][tileEngine]->enqueueWriteBuffer(tileOnlyBuffer, CL_FALSE, 0, bufferSize, writePattern, nullptr, 0, nullptr, nullptr); commandQueues[tile][tileEngine]->flush(); } } for (uint32_t tile = 0; tile < tileDevices.size(); tile++) { for (uint32_t tileEngine = 0; tileEngine < commandQueues[tile].size(); tileEngine++) { getSimulatedCsr(tile, tileEngine)->pollForCompletion(); auto regularBufferGpuAddress = static_cast(regularBuffers[tile][tileEngine]->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress()); auto tileOnlyBufferGpuAddress = static_cast(tileOnlyBuffers[tile][tileEngine]->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress()); expectMemory(reinterpret_cast(regularBufferGpuAddress), writePattern, bufferSize, tile, tileEngine); expectMemory(reinterpret_cast(tileOnlyBufferGpuAddress), writePattern, bufferSize, tile, tileEngine); } } } template void runAubWriteImageTest() { if (!tileDevices[0]->getSharedDeviceInfo().imageSupport) { GTEST_SKIP(); } cl_int retVal = CL_SUCCESS; auto testWidth = 5u; auto testHeight = 5u; auto testDepth = 1u; auto numPixels = testWidth * testHeight * testDepth; cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_FLOAT; imageFormat.image_channel_order = CL_RGBA; cl_mem_flags flags = 0; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); cl_image_desc imageDesc; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = testWidth; imageDesc.image_height = testHeight; imageDesc.image_depth = testDepth; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; auto perChannelDataSize = 4u; auto numChannels = 4u; auto elementSize = perChannelDataSize * numChannels; auto srcMemory = (uint8_t *)alignedMalloc(elementSize * numPixels, MemoryConstants::pageSize); for (size_t i = 0; i < numPixels * elementSize; ++i) { auto origValue = static_cast(i); memcpy(srcMemory + i, &origValue, sizeof(origValue)); } size_t origin[3] = {0, 0, 0}; const size_t region[3] = {testWidth, testHeight, testDepth}; size_t inputRowPitch = testWidth * elementSize; size_t inputSlicePitch = inputRowPitch * testHeight; std::vector>> images; images.resize(tileDevices.size()); for (uint32_t tile = 0; tile < tileDevices.size(); tile++) { for (uint32_t tileEngine = 0; tileEngine < commandQueues[tile].size(); tileEngine++) { Image *dstImage = Image::create( context.get(), ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal); ASSERT_NE(nullptr, dstImage); memset(dstImage->getCpuAddress(), 0xFF, dstImage->getSize()); retVal = commandQueues[tile][tileEngine]->enqueueWriteImage( dstImage, CL_FALSE, origin, region, inputRowPitch, inputSlicePitch, srcMemory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); images[tile].push_back(std::unique_ptr(dstImage)); } } for (uint32_t tile = 0; tile < tileDevices.size(); tile++) { for (uint32_t tileEngine = 0; tileEngine < commandQueues[tile].size(); tileEngine++) { commandQueues[tile][tileEngine]->flush(); } } std::unique_ptr dstMemory; for (uint32_t tile = 0; tile < tileDevices.size(); tile++) { for (uint32_t tileEngine = 0; tileEngine < commandQueues[tile].size(); tileEngine++) { dstMemory.reset(new uint8_t[images[tile][tileEngine]->getSize()]); memset(dstMemory.get(), 0xFF, images[tile][tileEngine]->getSize()); commandQueues[tile][tileEngine]->enqueueReadImage( images[tile][tileEngine].get(), CL_FALSE, origin, region, 0, 0, dstMemory.get(), nullptr, 0, nullptr, nullptr); commandQueues[tile][tileEngine]->flush(); auto rowPitch = images[tile][tileEngine]->getHostPtrRowPitch(); auto slicePitch = images[tile][tileEngine]->getHostPtrSlicePitch(); auto pSrcMemory = srcMemory; auto pDstMemory = dstMemory.get(); for (size_t z = 0; z < testDepth; ++z) { for (size_t y = 0; y < testHeight; ++y) { expectMemory(pDstMemory, pSrcMemory, testWidth * elementSize, tile, tileEngine); pSrcMemory = ptrOffset(pSrcMemory, testWidth * elementSize); pDstMemory = ptrOffset(pDstMemory, rowPitch); } pDstMemory = ptrOffset(pDstMemory, slicePitch - (rowPitch * (testHeight > 0 ? testHeight : 1))); } } } alignedFree(srcMemory); } }; // 4 Tiles using FourTilesAllContextsTest = MultitileMulticontextTests<4, MulticontextAubFixture::EnabledCommandStreamers::All>; HWCMDTEST_F(IGFX_XE_HP_CORE, FourTilesAllContextsTest, GENERATEONLY_givenFourTilesAndAllContextsWhenSubmittingThenDataIsValid) { runAubTest(); } using FourTilesDualContextTest = MultitileMulticontextTests<4, MulticontextAubFixture::EnabledCommandStreamers::Dual>; HWCMDTEST_F(IGFX_XE_HP_CORE, FourTilesDualContextTest, HEAVY_givenFourTilesAndDualContextWhenSubmittingThenDataIsValid) { runAubTest(); } using FourTilesSingleContextTest = MultitileMulticontextTests<4, MulticontextAubFixture::EnabledCommandStreamers::Single>; HWCMDTEST_F(IGFX_XE_HP_CORE, FourTilesSingleContextTest, givenFourTilesAndSingleContextWhenSubmittingThenDataIsValid) { runAubTest(); } struct EnqueueWithWalkerPartitionFourTilesTests : public FourTilesSingleContextTest, SimpleKernelFixture { void SetUp() override { DebugManager.flags.EnableWalkerPartition.set(1u); kernelIds |= (1 << 5); kernelIds |= (1 << 8); FourTilesSingleContextTest::SetUp(); SimpleKernelFixture::SetUp(rootDevice, context.get()); rootCsr = rootDevice->getDefaultEngine().commandStreamReceiver; EXPECT_EQ(4u, rootCsr->getOsContext().getNumSupportedDevices()); engineControlForFusedQueue = {rootCsr, &rootCsr->getOsContext()}; bufferSize = 16 * MemoryConstants::kiloByte; auto destMemory = std::make_unique(bufferSize); memset(destMemory.get(), 0x0, bufferSize); cl_int retVal = CL_SUCCESS; buffer.reset(Buffer::create(multiTileDefaultContext.get(), CL_MEM_COPY_HOST_PTR, bufferSize, destMemory.get(), retVal)); clBuffer = buffer.get(); } void TearDown() override { SimpleKernelFixture::TearDown(); FourTilesSingleContextTest::TearDown(); } void *getGpuAddress(Buffer &buffer) { return reinterpret_cast(buffer.getGraphicsAllocation(this->rootDeviceIndex)->getGpuAddress()); } uint32_t bufferSize = 0; std::unique_ptr buffer; cl_mem clBuffer; EngineControl engineControlForFusedQueue = {}; CommandStreamReceiver *rootCsr = nullptr; }; struct DynamicWalkerPartitionFourTilesTests : EnqueueWithWalkerPartitionFourTilesTests { void SetUp() override { DebugManager.flags.EnableStaticPartitioning.set(0); EnqueueWithWalkerPartitionFourTilesTests::SetUp(); } DebugManagerStateRestore restore{}; }; HWCMDTEST_F(IGFX_XE_HP_CORE, DynamicWalkerPartitionFourTilesTests, whenWalkerPartitionIsEnabledForKernelWithAtomicThenOutputDataIsValid) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto mockCommandQueue = new MockCommandQueueHw(multiTileDefaultContext.get(), rootDevice, nullptr); commandQueues[0][0].reset(mockCommandQueue); constexpr size_t globalWorkOffset[] = {0, 0, 0}; constexpr size_t gwsSize[] = {512, 1, 1}; constexpr size_t lwsSize[] = {32, 1, 1}; constexpr cl_uint workingDimensions = 1; cl_int retVal = CL_SUCCESS; kernels[5]->setArg(0, sizeof(cl_mem), &clBuffer); retVal = mockCommandQueue->enqueueKernel(kernels[5].get(), workingDimensions, globalWorkOffset, gwsSize, lwsSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); mockCommandQueue->flush(); HardwareParse hwParser; auto &cmdStream = mockCommandQueue->getCS(0); hwParser.parseCommands(cmdStream, 0); bool lastSemaphoreFound = false; uint64_t tileAtomicGpuAddress = 0; for (auto it = hwParser.cmdList.rbegin(); it != hwParser.cmdList.rend(); it++) { auto semaphoreCmd = genCmdCast(*it); if (semaphoreCmd) { if (UnitTestHelper::isAdditionalMiSemaphoreWait(*semaphoreCmd)) { continue; } EXPECT_EQ(4u, semaphoreCmd->getSemaphoreDataDword()); tileAtomicGpuAddress = semaphoreCmd->getSemaphoreGraphicsAddress(); lastSemaphoreFound = true; break; } } if (ImplicitScalingDispatch::getPipeControlStallRequired()) { EXPECT_TRUE(lastSemaphoreFound); EXPECT_NE(0u, tileAtomicGpuAddress); } else { EXPECT_FALSE(lastSemaphoreFound); EXPECT_EQ(0u, tileAtomicGpuAddress); } expectMemory(getGpuAddress(*buffer), &gwsSize[workingDimensions - 1], sizeof(uint32_t), 0, 0); uint32_t expectedAtomicValue = 4; if (ImplicitScalingDispatch::getPipeControlStallRequired()) { expectMemory(reinterpret_cast(tileAtomicGpuAddress), &expectedAtomicValue, sizeof(uint32_t), 0, 0); } constexpr uint32_t workgroupCount = static_cast(gwsSize[workingDimensions - 1] / lwsSize[workingDimensions - 1]); auto groupSpecificWorkCounts = ptrOffset(getGpuAddress(*buffer), 4); std::array workgroupCounts; std::fill(workgroupCounts.begin(), workgroupCounts.end(), static_cast(lwsSize[workingDimensions - 1])); expectMemory(groupSpecificWorkCounts, &workgroupCounts[0], workgroupCounts.size() * sizeof(uint32_t), 0, 0); } HWCMDTEST_F(IGFX_XE_HP_CORE, DynamicWalkerPartitionFourTilesTests, whenWalkerPartitionIsEnabledForKernelWithoutAtomicThenOutputDataIsValid) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto mockCommandQueue = new MockCommandQueueHw(multiTileDefaultContext.get(), rootDevice, nullptr); commandQueues[0][0].reset(mockCommandQueue); constexpr size_t globalWorkOffset[3] = {0, 0, 0}; constexpr size_t gwsSize[3] = {1024, 1, 1}; constexpr size_t lwsSize[3] = {32, 1, 1}; constexpr cl_uint workingDimensions = 1; cl_uint kernelIncrementCounter = 1024; cl_int retVal = CL_SUCCESS; kernels[8]->setArg(0, sizeof(cl_mem), &clBuffer); kernels[8]->setArg(1, kernelIncrementCounter); retVal = mockCommandQueue->enqueueKernel(kernels[8].get(), workingDimensions, globalWorkOffset, gwsSize, lwsSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); mockCommandQueue->flush(); constexpr uint32_t workgroupCount = static_cast(gwsSize[workingDimensions - 1] / lwsSize[workingDimensions - 1]); std::array workgroupCounts; std::fill(workgroupCounts.begin(), workgroupCounts.end(), kernelIncrementCounter); expectMemory(getGpuAddress(*buffer), &workgroupCounts[0], workgroupCounts.size() * sizeof(uint32_t), 0, 0); } struct StaticWalkerPartitionFourTilesTests : EnqueueWithWalkerPartitionFourTilesTests { void SetUp() override { DebugManager.flags.EnableStaticPartitioning.set(1); DebugManager.flags.EnableBlitterOperationsSupport.set(1); EnqueueWithWalkerPartitionFourTilesTests::SetUp(); } std::unique_ptr createTaskStream() { const AllocationProperties commandStreamAllocationProperties{rootDevice->getRootDeviceIndex(), true, MemoryConstants::pageSize, AllocationType::COMMAND_BUFFER, true, false, rootDevice->getDeviceBitfield()}; GraphicsAllocation *streamAllocation = rootDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(commandStreamAllocationProperties); return std::make_unique(streamAllocation); } void destroyTaskStream(LinearStream &stream) { rootDevice->getMemoryManager()->freeGraphicsMemory(stream.getGraphicsAllocation()); } void flushTaskStream(LinearStream &stream) { DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.guardCommandBufferWithPipeControl = true; rootCsr->flushTask(stream, 0, &rootCsr->getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 0u), &rootCsr->getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 0u), &rootCsr->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0u), 0u, dispatchFlags, rootDevice->getDevice()); rootCsr->flushBatchedSubmissions(); } template void expectMemoryOnRootCsr(void *gfxAddress, const void *srcAddress, size_t length) { auto csr = static_cast *>(rootCsr); csr->expectMemoryEqual(gfxAddress, srcAddress, length); } DebugManagerStateRestore restore{}; }; HWCMDTEST_F(IGFX_XE_HP_CORE, StaticWalkerPartitionFourTilesTests, givenFourTilesWhenStaticWalkerPartitionIsEnabledForKernelThenOutputDataIsValid) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto mockCommandQueue = new MockCommandQueueHw(multiTileDefaultContext.get(), rootDevice, nullptr); commandQueues[0][0].reset(mockCommandQueue); constexpr size_t globalWorkOffset[3] = {0, 0, 0}; constexpr size_t gwsSize[3] = {1024, 1, 1}; constexpr size_t lwsSize[3] = {32, 1, 1}; constexpr cl_uint workingDimensions = 1; cl_uint kernelIncrementCounter = 1024; cl_int retVal = CL_SUCCESS; kernels[8]->setArg(0, sizeof(cl_mem), &clBuffer); kernels[8]->setArg(1, kernelIncrementCounter); retVal = mockCommandQueue->enqueueKernel(kernels[8].get(), workingDimensions, globalWorkOffset, gwsSize, lwsSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); mockCommandQueue->flush(); constexpr uint32_t workgroupCount = static_cast(gwsSize[workingDimensions - 1] / lwsSize[workingDimensions - 1]); std::array workgroupCounts; std::fill(workgroupCounts.begin(), workgroupCounts.end(), kernelIncrementCounter); expectMemoryOnRootCsr(getGpuAddress(*buffer), &workgroupCounts[0], workgroupCounts.size() * sizeof(uint32_t)); } HWCMDTEST_F(IGFX_XE_HP_CORE, StaticWalkerPartitionFourTilesTests, givenPreWalkerSyncWhenStaticWalkerPartitionIsThenAtomicsAreIncrementedCorrectly) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; auto taskStream = createTaskStream(); auto taskStreamCpu = taskStream->getSpace(0); auto taskStreamGpu = taskStream->getGraphicsAllocation()->getGpuAddress(); uint32_t totalBytesProgrammed = 0u; WALKER_TYPE walkerCmd = FamilyType::cmdInitGpgpuWalker; walkerCmd.setPartitionType(WALKER_TYPE::PARTITION_TYPE::PARTITION_TYPE_X); walkerCmd.getInterfaceDescriptor().setNumberOfThreadsInGpgpuThreadGroup(1u); WalkerPartition::WalkerPartitionArgs testArgs = {}; testArgs.initializeWparidRegister = true; testArgs.crossTileAtomicSynchronization = true; testArgs.emitPipeControlStall = true; testArgs.tileCount = static_cast(rootDevice->getDeviceBitfield().count()); testArgs.partitionCount = testArgs.tileCount; testArgs.synchronizeBeforeExecution = true; testArgs.secondaryBatchBuffer = false; testArgs.emitSelfCleanup = false; testArgs.staticPartitioning = true; testArgs.workPartitionAllocationGpuVa = rootCsr->getWorkPartitionAllocationGpuAddress(); WalkerPartition::constructStaticallyPartitionedCommandBuffer( taskStreamCpu, taskStreamGpu, &walkerCmd, totalBytesProgrammed, testArgs, *defaultHwInfo); taskStream->getSpace(totalBytesProgrammed); flushTaskStream(*taskStream); const auto controlSectionAddress = taskStreamGpu + WalkerPartition::computeStaticPartitioningControlSectionOffset(testArgs); const auto preWalkerSyncAddress = controlSectionAddress + offsetof(WalkerPartition::StaticPartitioningControlSection, synchronizeBeforeWalkerCounter); const auto postWalkerSyncAddress = controlSectionAddress + offsetof(WalkerPartition::StaticPartitioningControlSection, synchronizeAfterWalkerCounter); uint32_t expectedValue = 0x4; expectMemoryOnRootCsr(reinterpret_cast(preWalkerSyncAddress), &expectedValue, sizeof(expectedValue)); expectMemoryOnRootCsr(reinterpret_cast(postWalkerSyncAddress), &expectedValue, sizeof(expectedValue)); destroyTaskStream(*taskStream); } HWCMDTEST_F(IGFX_XE_HP_CORE, StaticWalkerPartitionFourTilesTests, whenNoPreWalkerSyncThenAtomicsAreIncrementedCorrectly) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; auto taskStream = createTaskStream(); auto taskStreamCpu = taskStream->getSpace(0); auto taskStreamGpu = taskStream->getGraphicsAllocation()->getGpuAddress(); uint32_t totalBytesProgrammed = 0u; WALKER_TYPE walkerCmd = FamilyType::cmdInitGpgpuWalker; walkerCmd.setPartitionType(WALKER_TYPE::PARTITION_TYPE::PARTITION_TYPE_X); walkerCmd.getInterfaceDescriptor().setNumberOfThreadsInGpgpuThreadGroup(1u); WalkerPartition::WalkerPartitionArgs testArgs = {}; testArgs.initializeWparidRegister = true; testArgs.crossTileAtomicSynchronization = true; testArgs.emitPipeControlStall = true; testArgs.tileCount = static_cast(rootDevice->getDeviceBitfield().count()); testArgs.partitionCount = testArgs.tileCount; testArgs.synchronizeBeforeExecution = false; testArgs.secondaryBatchBuffer = false; testArgs.emitSelfCleanup = false; testArgs.staticPartitioning = true; testArgs.workPartitionAllocationGpuVa = rootCsr->getWorkPartitionAllocationGpuAddress(); WalkerPartition::constructStaticallyPartitionedCommandBuffer( taskStreamCpu, taskStreamGpu, &walkerCmd, totalBytesProgrammed, testArgs, *defaultHwInfo); taskStream->getSpace(totalBytesProgrammed); flushTaskStream(*taskStream); const auto controlSectionAddress = taskStreamGpu + WalkerPartition::computeStaticPartitioningControlSectionOffset(testArgs); const auto preWalkerSyncAddress = controlSectionAddress + offsetof(WalkerPartition::StaticPartitioningControlSection, synchronizeBeforeWalkerCounter); const auto postWalkerSyncAddress = controlSectionAddress + offsetof(WalkerPartition::StaticPartitioningControlSection, synchronizeAfterWalkerCounter); uint32_t expectedValue = 0x0; expectMemoryOnRootCsr(reinterpret_cast(preWalkerSyncAddress), &expectedValue, sizeof(expectedValue)); expectedValue = 0x4; expectMemoryOnRootCsr(reinterpret_cast(postWalkerSyncAddress), &expectedValue, sizeof(expectedValue)); destroyTaskStream(*taskStream); } // 2 Tiles using TwoTilesAllContextsTest = MultitileMulticontextTests<2, MulticontextAubFixture::EnabledCommandStreamers::All>; HWCMDTEST_F(IGFX_XE_HP_CORE, TwoTilesAllContextsTest, HEAVY_givenTwoTilesAndAllContextsWhenSubmittingThenDataIsValid) { runAubTest(); } using TwoTilesDualContextTest = MultitileMulticontextTests<2, MulticontextAubFixture::EnabledCommandStreamers::Dual>; HWCMDTEST_F(IGFX_XE_HP_CORE, TwoTilesDualContextTest, givenTwoTilesAndDualContextWhenSubmittingThenDataIsValid) { runAubTest(); } using TwoTilesSingleContextTest = MultitileMulticontextTests<2, MulticontextAubFixture::EnabledCommandStreamers::Single>; HWCMDTEST_F(IGFX_XE_HP_CORE, TwoTilesSingleContextTest, givenTwoTilesAndSingleContextWhenSubmittingThenDataIsValid) { runAubTest(); } // 1 Tile using SingleTileAllContextsTest = MultitileMulticontextTests<1, MulticontextAubFixture::EnabledCommandStreamers::All>; HWCMDTEST_F(IGFX_XE_HP_CORE, SingleTileAllContextsTest, GENERATEONLY_givenSingleTileAndAllContextsWhenSubmittingThenDataIsValid) { runAubTest(); } using SingleTileDualContextTest = MultitileMulticontextTests<1, MulticontextAubFixture::EnabledCommandStreamers::Dual>; HWCMDTEST_F(IGFX_XE_HP_CORE, SingleTileDualContextTest, givenSingleTileAndDualContextWhenSubmittingThenDataIsValid) { runAubTest(); } HWCMDTEST_F(IGFX_XE_HP_CORE, SingleTileDualContextTest, givenSingleAllocationWhenUpdatedFromDifferentContextThenDataIsValid) { cl_int retVal = CL_SUCCESS; const uint32_t bufferSize = 256; const uint32_t halfBufferSize = bufferSize / 2; uint8_t writePattern1[halfBufferSize]; uint8_t writePattern2[halfBufferSize]; uint8_t initPattern[bufferSize]; std::fill(initPattern, initPattern + sizeof(initPattern), 0); std::fill(writePattern1, writePattern1 + sizeof(writePattern1), 1); std::fill(writePattern2, writePattern2 + sizeof(writePattern2), 2); std::unique_ptr buffer(Buffer::create(context.get(), CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, bufferSize, initPattern, retVal)); buffer->forceDisallowCPUCopy = true; auto simulatedCsr0 = getSimulatedCsr(0, 0); simulatedCsr0->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto simulatedCsr1 = getSimulatedCsr(0, 1); simulatedCsr1->overrideDispatchPolicy(DispatchMode::BatchedDispatch); commandQueues[0][0]->enqueueWriteBuffer(buffer.get(), CL_FALSE, 0, halfBufferSize, writePattern1, nullptr, 0, nullptr, nullptr); commandQueues[0][1]->enqueueWriteBuffer(buffer.get(), CL_FALSE, halfBufferSize, halfBufferSize, writePattern2, nullptr, 0, nullptr, nullptr); commandQueues[0][1]->finish(); // submit second enqueue first to make sure that residency flow is correct commandQueues[0][0]->finish(); auto gpuPtr = reinterpret_cast(buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress()); expectMemory(gpuPtr, writePattern1, halfBufferSize, 0, 0); expectMemory(ptrOffset(gpuPtr, halfBufferSize), writePattern2, halfBufferSize, 0, 1); } // 1 |Tile using SingleTileDualContextTest = MultitileMulticontextTests<1, MulticontextAubFixture::EnabledCommandStreamers::Dual>; HWCMDTEST_F(IGFX_XE_HP_CORE, SingleTileDualContextTest, givenSingleTileAndDualContextWhenWritingImageThenDataIsValid) { runAubWriteImageTest(); } using SingleTileAllContextsTest = MultitileMulticontextTests<1, MulticontextAubFixture::EnabledCommandStreamers::All>; HWCMDTEST_F(IGFX_XE_HP_CORE, SingleTileAllContextsTest, HEAVY_givenSingleTileAndAllContextsWhenWritingImageThenDataIsValid) { runAubWriteImageTest(); } // 2 Tiles using TwoTilesSingleContextTest = MultitileMulticontextTests<2, MulticontextAubFixture::EnabledCommandStreamers::Single>; HWCMDTEST_F(IGFX_XE_HP_CORE, TwoTilesSingleContextTest, givenTwoTilesAndSingleContextWhenWritingImageThenDataIsValid) { runAubWriteImageTest(); } using TwoTilesDualContextTest = MultitileMulticontextTests<2, MulticontextAubFixture::EnabledCommandStreamers::Dual>; HWCMDTEST_F(IGFX_XE_HP_CORE, TwoTilesDualContextTest, givenTwoTilesAndDualContextWhenWritingImageThenDataIsValid) { runAubWriteImageTest(); } using TwoTilesAllContextsTest = MultitileMulticontextTests<2, MulticontextAubFixture::EnabledCommandStreamers::All>; HWCMDTEST_F(IGFX_XE_HP_CORE, TwoTilesAllContextsTest, GENERATEONLY_givenTwoTilesAndAllContextsWhenWritingImageThenDataIsValid) { runAubWriteImageTest(); } // 4 Tiles using FourTilesSingleContextTest = MultitileMulticontextTests<4, MulticontextAubFixture::EnabledCommandStreamers::Single>; HWCMDTEST_F(IGFX_XE_HP_CORE, FourTilesSingleContextTest, givenFourTilesAndSingleContextWhenWritingImageThenDataIsValid) { runAubWriteImageTest(); } using FourTilesDualContextTest = MultitileMulticontextTests<4, MulticontextAubFixture::EnabledCommandStreamers::Dual>; HWCMDTEST_F(IGFX_XE_HP_CORE, FourTilesDualContextTest, GENERATEONLY_givenFourTilesAndDualContextWhenWritingImageThenDataIsValid) { runAubWriteImageTest(); } using FourTilesAllContextsTest = MultitileMulticontextTests<4, MulticontextAubFixture::EnabledCommandStreamers::All>; HWCMDTEST_F(IGFX_XE_HP_CORE, FourTilesAllContextsTest, GENERATEONLY_givenFourTilesAndAllContextsWhenWritingImageThenDataIsValid) { runAubWriteImageTest(); } aub_one_va_multi_physical_tests_xehp_and_later.cpp000066400000000000000000000140121422164147700405500ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/tests_configuration.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/aub_tests/fixtures/multicontext_aub_fixture.h" using namespace NEO; struct OneVAFourPhysicalStoragesTest : public MulticontextAubFixture, public ::testing::Test { static const uint32_t numTiles = 4; void SetUp() override { MulticontextAubFixture::SetUp(numTiles, MulticontextAubFixture::EnabledCommandStreamers::Single, false); } void TearDown() override { MulticontextAubFixture::TearDown(); } }; HWCMDTEST_F(IGFX_XE_HP_CORE, OneVAFourPhysicalStoragesTest, givenBufferWithFourPhysicalStoragesWhenEnqueueReadBufferThenReadFromCorrectBank) { if (is32bit) { return; } cl_int retVal = CL_OUT_OF_HOST_MEMORY; const uint32_t bufferSize = MemoryConstants::pageSize64k; uint8_t *memoryToWrite[numTiles]; uint8_t *memoryToRead[numTiles]; auto buffer = clUniquePtr(Buffer::create(context.get(), {}, bufferSize, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); buffer->forceDisallowCPUCopy = true; auto allocation = buffer->getGraphicsAllocation(rootDeviceIndex); EXPECT_EQ(MemoryPool::LocalMemory, allocation->getMemoryPool()); auto gpuAddress = allocation->getGpuAddress(); allocation->storageInfo.cloningOfPageTables = false; allocation->storageInfo.memoryBanks = 0; allocation->setAubWritable(false, static_cast(maxNBitValue(numTiles))); for (uint32_t tile = 0; tile < numTiles; tile++) { memoryToWrite[tile] = reinterpret_cast(alignedMalloc(bufferSize, MemoryConstants::pageSize64k)); std::fill(memoryToWrite[tile], ptrOffset(memoryToWrite[tile], bufferSize), tile + 1); auto hardwareContext = getSimulatedCsr(tile, 0)->hardwareContextController->hardwareContexts[0].get(); hardwareContext->writeMemory2({gpuAddress, memoryToWrite[tile], bufferSize, (1u << tile), AubMemDump::DataTypeHintValues::TraceNotype, MemoryConstants::pageSize64k}); } for (uint32_t tile = 0; tile < numTiles; tile++) { memoryToRead[tile] = reinterpret_cast(alignedMalloc(bufferSize, MemoryConstants::pageSize64k)); commandQueues[tile][0]->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, bufferSize, memoryToRead[tile], nullptr, 0, nullptr, nullptr); commandQueues[tile][0]->flush(); } for (uint32_t tile = 0; tile < numTiles; tile++) { expectMemory(memoryToRead[tile], memoryToWrite[tile], bufferSize, tile, 0); alignedFree(memoryToWrite[tile]); alignedFree(memoryToRead[tile]); } } HWCMDTEST_F(IGFX_XE_HP_CORE, OneVAFourPhysicalStoragesTest, givenBufferWithFourPhysicalStoragesWhenEnqueueWriteBufferThenCorrectMemoryIsWrittenToSpecificBank) { if (is32bit) { return; } cl_int retVal = CL_OUT_OF_HOST_MEMORY; const uint32_t bufferSize = MemoryConstants::pageSize64k; uint8_t *memoryToWrite[numTiles]; auto buffer = clUniquePtr(Buffer::create(context.get(), {}, bufferSize, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); buffer->forceDisallowCPUCopy = true; auto allocation = buffer->getGraphicsAllocation(rootDeviceIndex); EXPECT_EQ(MemoryPool::LocalMemory, allocation->getMemoryPool()); auto gpuAddress = allocation->getGpuAddress(); allocation->storageInfo.cloningOfPageTables = false; allocation->storageInfo.memoryBanks = 0; for (uint32_t tile = 0; tile < numTiles; tile++) { memoryToWrite[tile] = reinterpret_cast(alignedMalloc(bufferSize, MemoryConstants::pageSize64k)); std::fill(memoryToWrite[tile], ptrOffset(memoryToWrite[tile], bufferSize), tile + 1); allocation->setAubWritable(true, 0xffffffff); commandQueues[tile][0]->enqueueWriteBuffer(buffer.get(), CL_TRUE, 0, bufferSize, memoryToWrite[tile], nullptr, 0, nullptr, nullptr); } for (uint32_t tile = 0; tile < numTiles; tile++) { expectMemory(reinterpret_cast(gpuAddress), memoryToWrite[tile], bufferSize, tile, 0); alignedFree(memoryToWrite[tile]); } } HWCMDTEST_F(IGFX_XE_HP_CORE, OneVAFourPhysicalStoragesTest, givenColouredBufferWhenEnqueueWriteBufferThenCorrectMemoryIsWrittenToSpecificBank) { if (is32bit) { return; } cl_int retVal = CL_OUT_OF_HOST_MEMORY; const uint32_t bufferSize = numTiles * MemoryConstants::pageSize64k; const auto allTilesValue = maxNBitValue(numTiles); uint8_t *memoryToWrite = reinterpret_cast(alignedMalloc(bufferSize, MemoryConstants::pageSize64k)); auto buffer = clUniquePtr(Buffer::create(context.get(), {}, bufferSize, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); buffer->forceDisallowCPUCopy = true; auto allocation = buffer->getGraphicsAllocation(rootDeviceIndex); EXPECT_EQ(MemoryPool::LocalMemory, allocation->getMemoryPool()); EXPECT_EQ(allTilesValue, allocation->storageInfo.memoryBanks.to_ullong()); EXPECT_EQ(allTilesValue, allocation->storageInfo.pageTablesVisibility.to_ullong()); EXPECT_TRUE(allocation->storageInfo.cloningOfPageTables); for (uint32_t tile = 0; tile < numTiles; tile++) { std::fill(ptrOffset(memoryToWrite, tile * MemoryConstants::pageSize64k), ptrOffset(memoryToWrite, (tile + 1) * MemoryConstants::pageSize64k), tile + 1); } commandQueues[0][0]->enqueueWriteBuffer(buffer.get(), CL_TRUE, 0, bufferSize, memoryToWrite, nullptr, 0, nullptr, nullptr); auto gpuAddress = allocation->getGpuAddress(); for (uint32_t tile = 0; tile < numTiles; tile++) { for (uint32_t offset = 0; offset < bufferSize; offset += MemoryConstants::pageSize64k) { expectMemory(reinterpret_cast(gpuAddress + offset), ptrOffset(memoryToWrite, offset), MemoryConstants::pageSize64k, tile, 0); } } alignedFree(memoryToWrite); } aub_postsync_write_tests_xehp_and_later.cpp000066400000000000000000000207521422164147700372770ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/utilities/tag_allocator.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/aub_tests/fixtures/hello_world_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" using namespace NEO; struct PostSyncWriteXeHPTests : public HelloWorldFixture, public ::testing::Test { void SetUp() override { DebugManager.flags.EnableTimestampPacket.set(true); HelloWorldFixture::SetUp(); EXPECT_TRUE(pCommandStreamReceiver->peekTimestampPacketWriteEnabled()); }; void TearDown() override { HelloWorldFixture::TearDown(); } DebugManagerStateRestore restore; cl_int retVal = CL_SUCCESS; }; HWCMDTEST_F(IGFX_XE_HP_CORE, PostSyncWriteXeHPTests, givenTimestampWriteEnabledWhenEnqueueingThenWritePostsyncOperation) { MockCommandQueueHw cmdQ(pContext, pClDevice, nullptr); const uint32_t bufferSize = 4; std::unique_ptr buffer(Buffer::create(pContext, CL_MEM_READ_WRITE, bufferSize, nullptr, retVal)); auto graphicsAllocation = buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex()); memset(graphicsAllocation->getUnderlyingBuffer(), 0, graphicsAllocation->getUnderlyingBufferSize()); buffer->forceDisallowCPUCopy = true; uint8_t writeData[bufferSize] = {1, 2, 3, 4}; cmdQ.enqueueWriteBuffer(buffer.get(), CL_TRUE, 0, bufferSize, writeData, nullptr, 0, nullptr, nullptr); expectMemory(reinterpret_cast(graphicsAllocation->getGpuAddress()), writeData, bufferSize); typename FamilyType::TimestampPacketType expectedTimestampValues[4] = {1, 1, 1, 1}; auto tagGpuAddress = reinterpret_cast(cmdQ.timestampPacketContainer->peekNodes().at(0)->getGpuAddress()); expectMemoryNotEqual(tagGpuAddress, expectedTimestampValues, 4 * sizeof(typename FamilyType::TimestampPacketType)); } HWCMDTEST_F(IGFX_XE_HP_CORE, PostSyncWriteXeHPTests, givenDebugVariableEnabledWhenEnqueueingThenWritePostsyncOperationInImmWriteMode) { DebugManager.flags.UseImmDataWriteModeOnPostSyncOperation.set(true); MockCommandQueueHw cmdQ(pContext, pClDevice, nullptr); const uint32_t bufferSize = 4; std::unique_ptr buffer(Buffer::create(pContext, CL_MEM_READ_WRITE, bufferSize, nullptr, retVal)); auto graphicsAllocation = buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex()); memset(graphicsAllocation->getUnderlyingBuffer(), 0, graphicsAllocation->getUnderlyingBufferSize()); buffer->forceDisallowCPUCopy = true; uint8_t writeData[bufferSize] = {1, 2, 3, 4}; cmdQ.enqueueWriteBuffer(buffer.get(), CL_TRUE, 0, bufferSize, writeData, nullptr, 0, nullptr, nullptr); expectMemory(reinterpret_cast(graphicsAllocation->getGpuAddress()), writeData, bufferSize); auto tagGpuAddress = reinterpret_cast(cmdQ.timestampPacketContainer->peekNodes().at(0)->getGpuAddress()); constexpr auto timestampPacketTypeSize = sizeof(typename FamilyType::TimestampPacketType); if constexpr (timestampPacketTypeSize == 4u) { typename FamilyType::TimestampPacketType expectedTimestampValues[4] = {1, 1, 2, 2}; expectMemory(tagGpuAddress, expectedTimestampValues, 4 * timestampPacketTypeSize); } else { typename FamilyType::TimestampPacketType expectedTimestampValues[4] = {1, 1, 0x2'0000'0002u, 1}; expectMemory(tagGpuAddress, expectedTimestampValues, 4 * timestampPacketTypeSize); } } HWCMDTEST_F(IGFX_XE_HP_CORE, PostSyncWriteXeHPTests, givenTwoBatchedEnqueuesWhenDependencyIsResolvedThenDecrementCounterOnGpu) { MockContext context(pCmdQ->getDevice().getSpecializedDevice()); pCommandStreamReceiver->overrideDispatchPolicy(DispatchMode::BatchedDispatch); const size_t bufferSize = 1024; auto retVal = CL_SUCCESS; uint8_t initialMemory[bufferSize] = {}; uint8_t writePattern1[bufferSize]; uint8_t writePattern2[bufferSize]; std::fill(writePattern1, writePattern1 + sizeof(writePattern1), 1); std::fill(writePattern2, writePattern2 + sizeof(writePattern2), 1); auto buffer = std::unique_ptr(Buffer::create(&context, CL_MEM_COPY_HOST_PTR, bufferSize, initialMemory, retVal)); //make sure that GPU copy is used buffer->forceDisallowCPUCopy = true; cl_event outEvent1, outEvent2; pCmdQ->enqueueWriteBuffer(buffer.get(), CL_FALSE, 0, bufferSize, writePattern1, nullptr, 0, nullptr, &outEvent1); auto node1 = castToObject(outEvent1)->getTimestampPacketNodes()->peekNodes().at(0); node1->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation()->setAubWritable(true, 0xffffffff); // allow to write again after Buffer::create pCmdQ->enqueueWriteBuffer(buffer.get(), CL_TRUE, 0, bufferSize, writePattern2, nullptr, 0, nullptr, &outEvent2); auto node2 = castToObject(outEvent2)->getTimestampPacketNodes()->peekNodes().at(0); expectMemory(reinterpret_cast(buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress()), writePattern2, bufferSize); typename FamilyType::TimestampPacketType expectedEndTimestamp = 1; auto endTimestampAddress1 = TimestampPacketHelper::getContextEndGpuAddress(*node1); auto endTimestampAddress2 = TimestampPacketHelper::getGlobalEndGpuAddress(*node1); auto endTimestampAddress3 = TimestampPacketHelper::getContextEndGpuAddress(*node2); auto endTimestampAddress4 = TimestampPacketHelper::getGlobalEndGpuAddress(*node2); expectMemoryNotEqual(reinterpret_cast(endTimestampAddress1), &expectedEndTimestamp, sizeof(typename FamilyType::TimestampPacketType)); expectMemoryNotEqual(reinterpret_cast(endTimestampAddress2), &expectedEndTimestamp, sizeof(typename FamilyType::TimestampPacketType)); expectMemoryNotEqual(reinterpret_cast(endTimestampAddress3), &expectedEndTimestamp, sizeof(typename FamilyType::TimestampPacketType)); expectMemoryNotEqual(reinterpret_cast(endTimestampAddress4), &expectedEndTimestamp, sizeof(typename FamilyType::TimestampPacketType)); clReleaseEvent(outEvent1); clReleaseEvent(outEvent2); } HWCMDTEST_F(IGFX_XE_HP_CORE, PostSyncWriteXeHPTests, givenMultipleWalkersWhenEnqueueingThenWriteAllTimestamps) { MockContext context(pCmdQ->getDevice().getSpecializedDevice()); const size_t bufferSize = 70; const size_t writeSize = bufferSize - 2; uint8_t writeData[writeSize] = {}; cl_int retVal = CL_SUCCESS; cl_event outEvent; auto buffer = std::unique_ptr(Buffer::create(&context, CL_MEM_READ_WRITE, bufferSize, nullptr, retVal)); buffer->forceDisallowCPUCopy = true; pCmdQ->enqueueWriteBuffer(buffer.get(), CL_TRUE, 1, writeSize, writeData, nullptr, 0, nullptr, &outEvent); auto ×tampNodes = castToObject(outEvent)->getTimestampPacketNodes()->peekNodes(); EXPECT_EQ(2u, timestampNodes.size()); typename FamilyType::TimestampPacketType expectedEndTimestamp = 1; auto endTimestampAddress1 = TimestampPacketHelper::getContextEndGpuAddress(*timestampNodes[0]); auto endTimestampAddress2 = TimestampPacketHelper::getGlobalEndGpuAddress(*timestampNodes[0]); auto endTimestampAddress3 = TimestampPacketHelper::getContextEndGpuAddress(*timestampNodes[1]); auto endTimestampAddress4 = TimestampPacketHelper::getGlobalEndGpuAddress(*timestampNodes[1]); expectMemoryNotEqual(reinterpret_cast(endTimestampAddress1), &expectedEndTimestamp, sizeof(typename FamilyType::TimestampPacketType)); expectMemoryNotEqual(reinterpret_cast(endTimestampAddress2), &expectedEndTimestamp, sizeof(typename FamilyType::TimestampPacketType)); expectMemoryNotEqual(reinterpret_cast(endTimestampAddress3), &expectedEndTimestamp, sizeof(typename FamilyType::TimestampPacketType)); expectMemoryNotEqual(reinterpret_cast(endTimestampAddress4), &expectedEndTimestamp, sizeof(typename FamilyType::TimestampPacketType)); clReleaseEvent(outEvent); } aub_scratch_space_tests_xehp_and_later.cpp000066400000000000000000000245031422164147700370030ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/array_count.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_scratch_space_controller_xehp_and_later.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/test/unit_test/aub_tests/command_stream/aub_command_stream_fixture.h" #include "opencl/test/unit_test/aub_tests/fixtures/aub_fixture.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/hello_world_kernel_fixture.h" #include "opencl/test/unit_test/fixtures/simple_arg_kernel_fixture.h" #include "opencl/test/unit_test/indirect_heap/indirect_heap_fixture.h" using namespace NEO; struct Gen12AubScratchSpaceForPrivateFixture : public KernelAUBFixture { void SetUp() override { debugRestorer = std::make_unique(); kernelIdx = 6; kernelIds |= (1 << kernelIdx); KernelAUBFixture::SetUp(); arraySize = 32; vectorSize = 2; typeSize = sizeof(uint32_t); gwsSize = arraySize; lwsSize = 32; maxIterations1 = static_cast(arraySize); maxIterations2 = static_cast(arraySize); scalar = 0x4; expectedMemorySize = arraySize * vectorSize * typeSize; srcBuffer = alignedMalloc(expectedMemorySize, 0x1000); ASSERT_NE(nullptr, srcBuffer); auto srcBufferUint = static_cast(srcBuffer); uint32_t valOdd = 0x1; uint32_t valEven = 0x3; for (uint32_t i = 0; i < arraySize * vectorSize; ++i) { if (i % 2) { srcBufferUint[i] = valOdd; } else { srcBufferUint[i] = valEven; } } uint32_t sumOdd = 0; uint32_t sumEven = 0; for (uint32_t i = 0; i < arraySize; ++i) { sumOdd += ((i + scalar) + valOdd); sumEven += (i + valEven); } dstBuffer = alignedMalloc(expectedMemorySize, 0x1000); ASSERT_NE(nullptr, dstBuffer); memset(dstBuffer, 0, expectedMemorySize); expectedMemory = alignedMalloc(expectedMemorySize, 0x1000); ASSERT_NE(nullptr, expectedMemory); auto expectedMemoryUint = static_cast(expectedMemory); for (uint32_t i = 0; i < arraySize * vectorSize; ++i) { if (i % 2) { expectedMemoryUint[i] = sumOdd; } else { expectedMemoryUint[i] = sumEven; } } kernels[kernelIdx]->setArgSvm(0, expectedMemorySize, dstBuffer, nullptr, 0u); dstAllocation = createHostPtrAllocationFromSvmPtr(dstBuffer, expectedMemorySize); kernels[kernelIdx]->setArgSvm(1, expectedMemorySize, srcBuffer, nullptr, 0u); srcAllocation = createHostPtrAllocationFromSvmPtr(srcBuffer, expectedMemorySize); kernels[kernelIdx]->setArg(2, sizeof(uint32_t), &scalar); kernels[kernelIdx]->setArg(3, sizeof(uint32_t), &maxIterations1); kernels[kernelIdx]->setArg(4, sizeof(uint32_t), &maxIterations2); } void TearDown() override { pCmdQ->flush(); if (expectedMemory) { alignedFree(expectedMemory); expectedMemory = nullptr; } if (srcBuffer) { alignedFree(srcBuffer); srcBuffer = nullptr; } if (dstBuffer) { alignedFree(dstBuffer); dstBuffer = nullptr; } KernelAUBFixture::TearDown(); } std::unique_ptr debugRestorer; size_t arraySize; size_t vectorSize; size_t typeSize; size_t gwsSize; size_t lwsSize; uint32_t kernelIdx; void *expectedMemory = nullptr; size_t expectedMemorySize = 0; void *srcBuffer = nullptr; void *dstBuffer = nullptr; GraphicsAllocation *srcAllocation; GraphicsAllocation *dstAllocation; uint32_t scalar; uint32_t maxIterations1; uint32_t maxIterations2; }; using Gen12AubScratchSpaceForPrivateTest = Test; HWCMDTEST_F(IGFX_XE_HP_CORE, Gen12AubScratchSpaceForPrivateTest, WhenKernelUsesScratchSpaceForPrivateThenExpectCorrectResults) { cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {gwsSize, 1, 1}; size_t localWorkSize[3] = {lwsSize, 1, 1}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; auto retVal = pCmdQ->enqueueKernel( kernels[kernelIdx].get(), workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); pCmdQ->flush(); expectMemory(dstBuffer, expectedMemory, expectedMemorySize); } class DefaultGrfKernelFixture : public ProgramFixture { public: using ProgramFixture::SetUp; protected: void SetUp(ClDevice *device, Context *context) { ProgramFixture::SetUp(); std::string programName("simple_spill_fill_kernel"); CreateProgramFromBinary( context, context->getDevices(), programName); ASSERT_NE(nullptr, pProgram); retVal = pProgram->build( pProgram->getDevices(), nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); kernel.reset(Kernel::create( pProgram, pProgram->getKernelInfoForKernel("spill_test"), *device, &retVal)); } void TearDown() override { if (kernel) { kernel.reset(nullptr); } ProgramFixture::TearDown(); } cl_int retVal = CL_SUCCESS; std::unique_ptr kernel; }; struct Gen12AubScratchSpaceForSpillFillFixture : public KernelAUBFixture { void SetUp() override { debugRestorer = std::make_unique(); KernelAUBFixture::SetUp(); arraySize = 32; typeSize = sizeof(cl_int); gwsSize = arraySize; lwsSize = 32; expectedMemorySize = (arraySize * 2 + 1) * typeSize - 4; inMemorySize = expectedMemorySize; outMemorySize = expectedMemorySize; offsetMemorySize = 128 * arraySize; srcBuffer = alignedMalloc(inMemorySize, 0x1000); ASSERT_NE(nullptr, srcBuffer); memset(srcBuffer, 0, inMemorySize); outBuffer = alignedMalloc(outMemorySize, 0x1000); ASSERT_NE(nullptr, outBuffer); memset(outBuffer, 0, outMemorySize); expectedMemory = alignedMalloc(expectedMemorySize, 0x1000); ASSERT_NE(nullptr, expectedMemory); memset(expectedMemory, 0, expectedMemorySize); offsetBuffer = alignedMalloc(offsetMemorySize, 0x1000); ASSERT_NE(nullptr, expectedMemory); memset(offsetBuffer, 0, offsetMemorySize); auto srcBufferInt = static_cast(srcBuffer); auto expectedMemoryInt = static_cast(expectedMemory); const int expectedVal1 = 16256; const int expectedVal2 = 512; for (uint32_t i = 0; i < arraySize; ++i) { srcBufferInt[i] = 2; expectedMemoryInt[i * 2] = expectedVal1; expectedMemoryInt[i * 2 + 1] = expectedVal2; } auto &kernelInfo = kernel->getKernelInfo(); EXPECT_NE(0u, kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[0]); EXPECT_EQ(128u, kernelInfo.kernelDescriptor.kernelAttributes.numGrfRequired); kernel->setArgSvm(0, inMemorySize, srcBuffer, nullptr, 0u); inAllocation = createHostPtrAllocationFromSvmPtr(srcBuffer, inMemorySize); kernel->setArgSvm(1, outMemorySize, outBuffer, nullptr, 0u); outAllocation = createHostPtrAllocationFromSvmPtr(outBuffer, outMemorySize); kernel->setArgSvm(2, offsetMemorySize, offsetBuffer, nullptr, 0u); offsetAllocation = createHostPtrAllocationFromSvmPtr(offsetBuffer, offsetMemorySize); } void TearDown() override { pCmdQ->flush(); if (expectedMemory) { alignedFree(expectedMemory); expectedMemory = nullptr; } if (srcBuffer) { alignedFree(srcBuffer); srcBuffer = nullptr; } if (outBuffer) { alignedFree(outBuffer); outBuffer = nullptr; } if (offsetBuffer) { alignedFree(offsetBuffer); offsetBuffer = nullptr; } KernelAUBFixture::TearDown(); } std::unique_ptr debugRestorer; size_t arraySize; size_t vectorSize; size_t typeSize; size_t gwsSize; size_t lwsSize; void *expectedMemory = nullptr; size_t expectedMemorySize = 0; size_t inMemorySize = 0; size_t outMemorySize = 0; size_t offsetMemorySize = 0; void *srcBuffer = nullptr; void *outBuffer = nullptr; void *offsetBuffer = nullptr; GraphicsAllocation *inAllocation; GraphicsAllocation *outAllocation; GraphicsAllocation *offsetAllocation; }; using Gen12AubScratchSpaceForSpillFillTest = Test; HWCMDTEST_F(IGFX_XE_HP_CORE, Gen12AubScratchSpaceForSpillFillTest, givenSurfaceStateScratchSpaceEnabledWhenKernelUsesScratchForSpillFillThenExpectCorrectResults) { cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {gwsSize, 1, 1}; size_t localWorkSize[3] = {lwsSize, 1, 1}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; auto retVal = pCmdQ->enqueueKernel( kernel.get(), workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); pCmdQ->finish(); expectMemory(outBuffer, expectedMemory, expectedMemorySize); } compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/command_queue/command_enqueue_fixture.h000066400000000000000000000020751422164147700335320ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/common/cmd_parse/hw_parse.h" #include "opencl/test/unit_test/aub_tests/command_stream/aub_command_stream_fixture.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/command_stream/command_stream_fixture.h" #include "opencl/test/unit_test/fixtures/built_in_fixture.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" namespace NEO { struct CommandEnqueueAUBFixture : public CommandEnqueueBaseFixture, public AUBCommandStreamFixture { using AUBCommandStreamFixture::SetUp; void SetUp() override { CommandEnqueueBaseFixture::SetUp(cl_command_queue_properties(0)); AUBCommandStreamFixture::SetUp(pCmdQ); } void TearDown() override { AUBCommandStreamFixture::TearDown(); CommandEnqueueBaseFixture::TearDown(); } }; } // namespace NEO compression_aub_tests_xehp_and_later.cpp000066400000000000000000000332171422164147700365440ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/resource_info.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/common/test_macros/test_checks_shared.h" #include "opencl/extensions/public/cl_ext_private.h" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/aub_tests/fixtures/aub_fixture.h" #include "test_traits_common.h" using namespace NEO; template struct CompressionXeHPAndLater : public AUBFixture, public ::testing::Test, public ::testing::WithParamInterface { void SetUp() override { REQUIRE_64BIT_OR_SKIP(); debugRestorer = std::make_unique(); DebugManager.flags.RenderCompressedBuffersEnabled.set(true); DebugManager.flags.RenderCompressedImagesEnabled.set(true); DebugManager.flags.EnableLocalMemory.set(useLocalMemory); DebugManager.flags.NodeOrdinal.set(GetParam()); auto &hwHelper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily); auto expectedEngine = static_cast(GetParam()); bool engineSupported = false; for (auto &engine : hwHelper.getGpgpuEngineInstances(*defaultHwInfo)) { if (engine.first == expectedEngine) { engineSupported = true; break; } } if (!engineSupported) { GTEST_SKIP(); } AUBFixture::SetUp(defaultHwInfo.get()); auto &ftrTable = device->getHardwareInfo().featureTable; if ((!ftrTable.flags.ftrFlatPhysCCS) || (!ftrTable.flags.ftrLocalMemory && useLocalMemory)) { GTEST_SKIP(); } context->contextType = ContextType::CONTEXT_TYPE_SPECIALIZED; } void TearDown() override { AUBFixture::TearDown(); } std::unique_ptr debugRestorer; cl_int retVal = CL_SUCCESS; template void givenCompressedBuffersWhenWritingAndCopyingThenResultsAreCorrect(); template void givenCompressedImage2DFromBufferWhenItIsUsedThenDataIsCorrect(); template void givenCompressedImageWhenReadingThenResultsAreCorrect(); }; template template void CompressionXeHPAndLater::givenCompressedBuffersWhenWritingAndCopyingThenResultsAreCorrect() { const size_t bufferSize = 2048; uint8_t writePattern[bufferSize]; std::fill(writePattern, writePattern + sizeof(writePattern), 1); device->getGpgpuCommandStreamReceiver().overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto compressedBuffer = std::unique_ptr(Buffer::create(context, CL_MEM_READ_WRITE | CL_MEM_COMPRESSED_HINT_INTEL, bufferSize, nullptr, retVal)); auto compressedAllocation = compressedBuffer->getGraphicsAllocation(device->getRootDeviceIndex()); memset(compressedAllocation->getUnderlyingBuffer(), 0, bufferSize); EXPECT_NE(nullptr, compressedAllocation->getDefaultGmm()->gmmResourceInfo->peekHandle()); EXPECT_TRUE(compressedAllocation->getDefaultGmm()->isCompressionEnabled); if (testLocalMemory) { EXPECT_EQ(MemoryPool::LocalMemory, compressedAllocation->getMemoryPool()); } else { EXPECT_EQ(MemoryPool::System4KBPages, compressedAllocation->getMemoryPool()); } auto notCompressedBuffer = std::unique_ptr(Buffer::create(context, CL_MEM_READ_WRITE, bufferSize, nullptr, retVal)); auto nonCompressedAllocation = notCompressedBuffer->getGraphicsAllocation(device->getRootDeviceIndex()); nonCompressedAllocation->setAllocationType(AllocationType::BUFFER); if (nonCompressedAllocation->getDefaultGmm()) { nonCompressedAllocation->getDefaultGmm()->isCompressionEnabled = false; } memset(nonCompressedAllocation->getUnderlyingBuffer(), 0, bufferSize); pCmdQ->enqueueWriteBuffer(compressedBuffer.get(), CL_FALSE, 0, bufferSize, writePattern, nullptr, 0, nullptr, nullptr); pCmdQ->enqueueCopyBuffer(compressedBuffer.get(), notCompressedBuffer.get(), 0, 0, bufferSize, 0, nullptr, nullptr); pCmdQ->finish(); expectNotEqualMemory(AUBFixture::getGpuPointer(compressedAllocation), writePattern, bufferSize); expectMemory(AUBFixture::getGpuPointer(nonCompressedAllocation), writePattern, bufferSize); } template template void CompressionXeHPAndLater::givenCompressedImage2DFromBufferWhenItIsUsedThenDataIsCorrect() { const size_t imageWidth = 16; const size_t imageHeight = 16; const size_t bufferSize = 64 * KB; uint8_t writePattern[bufferSize]; std::fill(writePattern, writePattern + sizeof(writePattern), 1); device->getGpgpuCommandStreamReceiver().overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto compressedBuffer = std::unique_ptr(Buffer::create(context, CL_MEM_COPY_HOST_PTR | CL_MEM_COMPRESSED_HINT_INTEL, bufferSize, writePattern, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); //now create image2DFromBuffer cl_image_desc imageDescriptor = {}; imageDescriptor.mem_object = compressedBuffer.get(); imageDescriptor.image_height = imageWidth; imageDescriptor.image_width = imageHeight; imageDescriptor.image_type = CL_MEM_OBJECT_IMAGE2D; cl_image_format imageFormat = {}; imageFormat.image_channel_data_type = CL_UNSIGNED_INT32; imageFormat.image_channel_order = CL_RGBA; auto clCompressedImage = clCreateImage(context, CL_MEM_READ_WRITE, &imageFormat, &imageDescriptor, nullptr, &retVal); auto compressedImage = castToObject(clCompressedImage); EXPECT_EQ(CL_SUCCESS, retVal); const size_t perChannelDataSize = sizeof(cl_uint); const size_t numChannels = 4; const auto imageSize = imageWidth * imageHeight * perChannelDataSize * numChannels; cl_uint destMemory[imageSize / sizeof(cl_uint)] = {0}; const size_t origin[] = {0, 0, 0}; const size_t region[] = {imageWidth, imageHeight, 1}; retVal = pCmdQ->enqueueReadImage( compressedImage, CL_FALSE, origin, region, 0, 0, destMemory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pCmdQ->flush(); EXPECT_EQ(CL_SUCCESS, retVal); expectMemory(destMemory, writePattern, imageSize); //make sure our objects are in in fact compressed auto graphicsAllocation = compressedBuffer->getGraphicsAllocation(device->getRootDeviceIndex()); EXPECT_NE(nullptr, graphicsAllocation->getDefaultGmm()); EXPECT_TRUE(graphicsAllocation->getDefaultGmm()->isCompressionEnabled); EXPECT_TRUE(compressedImage->getGraphicsAllocation(device->getRootDeviceIndex())->getDefaultGmm()->isCompressionEnabled); expectNotEqualMemory(reinterpret_cast(graphicsAllocation->getGpuAddress()), writePattern, bufferSize); clReleaseMemObject(clCompressedImage); } template template void CompressionXeHPAndLater::givenCompressedImageWhenReadingThenResultsAreCorrect() { const size_t imageWidth = 8; const size_t imageHeight = 4; const size_t perChannelDataSize = sizeof(cl_float); const size_t numChannels = 4; const auto imageSize = imageWidth * imageHeight * perChannelDataSize * numChannels; const auto rowSize = imageSize / imageHeight; cl_float srcMemory[imageSize / sizeof(cl_float)] = {0}; const cl_float row[rowSize] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}; cl_float *pixel = srcMemory; for (uint32_t height = 0; height < imageHeight; height++) { memcpy(pixel, row, rowSize); pixel += imageWidth; } cl_float destMemory[imageSize / sizeof(cl_float)] = {0}; cl_image_format imageFormat; cl_image_desc imageDesc; imageFormat.image_channel_data_type = CL_FLOAT; imageFormat.image_channel_order = CL_RGBA; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = imageWidth; imageDesc.image_height = imageHeight; imageDesc.image_depth = 1; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; auto allocation = csr->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, imageSize}, destMemory); csr->makeResidentHostPtrAllocation(allocation); csr->getInternalAllocationStorage()->storeAllocation(std::unique_ptr(allocation), TEMPORARY_ALLOCATION); cl_mem_flags flags = CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto retVal = CL_INVALID_VALUE; std::unique_ptr srcImage(Image::create( context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, srcMemory, retVal)); ASSERT_NE(nullptr, srcImage); cl_bool blockingRead = CL_FALSE; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; const size_t origin[] = {0, 0, 0}; const size_t region[] = {imageWidth, imageHeight, 1}; retVal = pCmdQ->enqueueReadImage( srcImage.get(), blockingRead, origin, region, 0, 0, destMemory, nullptr, numEventsInWaitList, eventWaitList, event); EXPECT_EQ(CL_SUCCESS, retVal); allocation = csr->getTemporaryAllocations().peekHead(); while (allocation && allocation->getUnderlyingBuffer() != destMemory) { allocation = allocation->next; } auto pDestGpuAddress = reinterpret_cast(allocation->getGpuAddress()); pCmdQ->flush(); EXPECT_EQ(CL_SUCCESS, retVal); expectMemory(pDestGpuAddress, srcMemory, imageSize); expectNotEqualMemory(AUBFixture::getGpuPointer(srcImage->getGraphicsAllocation(rootDeviceIndex)), srcMemory, imageSize); } struct CompressionLocalAubsSupportedMatcher { template static constexpr bool isMatched() { if constexpr (HwMapper::GfxProduct::supportsCmdSet(IGFX_XE_HP_CORE)) { return TestTraits::get()>::localMemCompressionAubsSupported; } return false; } }; struct CompressionSystemAubsSupportedMatcher { template static constexpr bool isMatched() { if constexpr (HwMapper::GfxProduct::supportsCmdSet(IGFX_XE_HP_CORE)) { return TestTraits::get()>::systemMemCompressionAubsSupported; } return false; } }; using CompressionLocalXeHPAndLater = CompressionXeHPAndLater; HWTEST2_P(CompressionLocalXeHPAndLater, givenCompressedBuffersWhenWritingAndCopyingThenResultsAreCorrect, CompressionLocalAubsSupportedMatcher) { givenCompressedBuffersWhenWritingAndCopyingThenResultsAreCorrect(); } HWTEST2_P(CompressionLocalXeHPAndLater, givenCompressedImage2DFromBufferWhenItIsUsedThenDataIsCorrect, CompressionLocalAubsSupportedMatcher) { givenCompressedImage2DFromBufferWhenItIsUsedThenDataIsCorrect(); } HWTEST2_P(CompressionLocalXeHPAndLater, givenCompressedImageWhenReadingThenResultsAreCorrect, CompressionLocalAubsSupportedMatcher) { givenCompressedImageWhenReadingThenResultsAreCorrect(); } INSTANTIATE_TEST_CASE_P(, CompressionLocalXeHPAndLater, ::testing::Values(aub_stream::ENGINE_RCS, aub_stream::ENGINE_CCS)); using CompressionSystemXeHPAndLater = CompressionXeHPAndLater; HWTEST2_P(CompressionSystemXeHPAndLater, GENERATEONLY_givenCompressedBuffersWhenWritingAndCopyingThenResultsAreCorrect, CompressionSystemAubsSupportedMatcher) { givenCompressedBuffersWhenWritingAndCopyingThenResultsAreCorrect(); } HWTEST2_P(CompressionSystemXeHPAndLater, GENERATEONLY_givenCompressedImage2DFromBufferWhenItIsUsedThenDataIsCorrect, CompressionSystemAubsSupportedMatcher) { givenCompressedImage2DFromBufferWhenItIsUsedThenDataIsCorrect(); } HWTEST2_P(CompressionSystemXeHPAndLater, givenCompressedImageWhenReadingThenResultsAreCorrect, CompressionSystemAubsSupportedMatcher) { givenCompressedImageWhenReadingThenResultsAreCorrect(); } INSTANTIATE_TEST_CASE_P(, CompressionSystemXeHPAndLater, ::testing::Values(aub_stream::ENGINE_RCS, aub_stream::ENGINE_CCS)); enqueue_copy_buffer_aub_tests.cpp000066400000000000000000000070421422164147700351750ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/ptr_math.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/aub_tests/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" using namespace NEO; struct CopyBufferHw : public CommandEnqueueAUBFixture, public ::testing::WithParamInterface>, public ::testing::Test { void SetUp() override { CommandEnqueueAUBFixture::SetUp(); } void TearDown() override { CommandEnqueueAUBFixture::TearDown(); } }; typedef CopyBufferHw AUBCopyBuffer; HWTEST_P(AUBCopyBuffer, WhenCopyingThenExpectationsMet) { MockContext context(pCmdQ->getDevice().getSpecializedDevice()); cl_float srcMemory[] = {1.0f, 2.0f, 3.0f, 4.0f}; cl_float dstMemory[] = {0.0f, 0.0f, 0.0f, 0.0f}; auto retVal = CL_INVALID_VALUE; auto srcBuffer = Buffer::create( &context, CL_MEM_USE_HOST_PTR, sizeof(srcMemory), srcMemory, retVal); ASSERT_NE(nullptr, srcBuffer); auto dstBuffer = Buffer::create( &context, CL_MEM_USE_HOST_PTR, sizeof(dstMemory), dstMemory, retVal); ASSERT_NE(nullptr, dstBuffer); auto pSrcMemory = &srcMemory[0]; size_t srcOffset = std::get<0>(GetParam()); size_t dstOffset = std::get<1>(GetParam()); size_t sizeCopied = sizeof(cl_float); cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; auto pDstMemory = reinterpret_cast(dstBuffer->getGraphicsAllocation(pDevice->getRootDeviceIndex())->getGpuAddress()); retVal = pCmdQ->enqueueCopyBuffer( srcBuffer, dstBuffer, srcOffset, dstOffset, sizeCopied, numEventsInWaitList, eventWaitList, event); EXPECT_EQ(CL_SUCCESS, retVal); pCmdQ->flush(); pSrcMemory = ptrOffset(pSrcMemory, srcOffset); pDstMemory = ptrOffset(pDstMemory, dstOffset); // Compute our memory expecations based on kernel execution size_t sizeUserMemory = sizeof(dstMemory); AUBCommandStreamFixture::expectMemory(pDstMemory, pSrcMemory, sizeCopied); // If the copykernel wasn't max sized, ensure we didn't overwrite existing memory if (dstOffset + sizeCopied < sizeUserMemory) { pDstMemory = ptrOffset(pDstMemory, sizeCopied); float *dstMemoryRef = ptrOffset(dstMemory, sizeCopied); size_t sizeRemaining = sizeUserMemory - sizeCopied - dstOffset; AUBCommandStreamFixture::expectMemory(pDstMemory, dstMemoryRef, sizeRemaining); } delete srcBuffer; delete dstBuffer; } INSTANTIATE_TEST_CASE_P(AUBCopyBuffer_simple, AUBCopyBuffer, ::testing::Combine( ::testing::Values( // srcOffset 0 * sizeof(cl_float), 1 * sizeof(cl_float), 2 * sizeof(cl_float), 3 * sizeof(cl_float)), ::testing::Values( // dstOffset 0 * sizeof(cl_float), 1 * sizeof(cl_float), 2 * sizeof(cl_float), 3 * sizeof(cl_float)))); enqueue_copy_buffer_rect_aub_tests.cpp000066400000000000000000000107301422164147700362100ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/ptr_math.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/aub_tests/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include using namespace NEO; struct CopyBufferRectHw : public CommandEnqueueAUBFixture, public ::testing::TestWithParam> { void SetUp() override { CommandEnqueueAUBFixture::SetUp(); std::tie(srcOrigin0, srcOrigin1, srcOrigin2, dstOrigin0, dstOrigin1, dstOrigin2, copy3D) = GetParam(); } void TearDown() override { CommandEnqueueAUBFixture::TearDown(); } size_t srcOrigin0; size_t srcOrigin1; size_t srcOrigin2; size_t dstOrigin0; size_t dstOrigin1; size_t dstOrigin2; bool copy3D; }; typedef CopyBufferRectHw AUBCopyBufferRect; HWTEST_P(AUBCopyBufferRect, WhenCopyingThenExpectationsMet) { //3D UINT8 buffer 20x20x20 static const size_t rowPitch = 20; static const size_t slicePitch = rowPitch * rowPitch; static const size_t elementCount = slicePitch * rowPitch; MockContext context(this->pClDevice); cl_uchar *srcMemory = new uint8_t[elementCount + 8]; cl_uchar *dstMemory = new uint8_t[elementCount + 8]; for (size_t i = 0; i < elementCount; i++) { srcMemory[i] = static_cast(i + 1); dstMemory[i] = 0; } auto retVal = CL_INVALID_VALUE; auto srcBuffer = Buffer::create( &context, CL_MEM_USE_HOST_PTR, elementCount * sizeof(uint8_t), srcMemory, retVal); ASSERT_NE(nullptr, srcBuffer); auto dstBuffer = Buffer::create( &context, CL_MEM_USE_HOST_PTR, elementCount * sizeof(uint8_t), dstMemory, retVal); ASSERT_NE(nullptr, dstBuffer); auto pSrcMemory = &srcMemory[0]; auto pDestMemory = reinterpret_cast(dstBuffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress()); size_t regionX = std::min(rowPitch / 2, rowPitch - std::max(srcOrigin0, dstOrigin0)); size_t regionY = std::min(rowPitch / 2, rowPitch - std::max(srcOrigin1, dstOrigin1)); size_t regionZ = copy3D ? std::min(rowPitch / 2, rowPitch - std::max(srcOrigin2, dstOrigin2)) : 1; size_t srcOrigin[] = {srcOrigin0, srcOrigin1, srcOrigin2}; size_t dstOrigin[] = {dstOrigin0, dstOrigin1, dstOrigin2}; size_t region[] = {regionX, regionY, regionZ}; retVal = pCmdQ->enqueueCopyBufferRect( srcBuffer, dstBuffer, srcOrigin, dstOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); pCmdQ->flush(); // Verify Output, line by line uint8_t src[rowPitch * slicePitch]; memset(src, 0, sizeof(src)); auto tDst = pDestMemory; auto tSrc = ptrOffset(pSrcMemory, srcOrigin[0] + srcOrigin[1] * rowPitch + srcOrigin[2] * slicePitch); auto tRef = ptrOffset(src, dstOrigin[0] + dstOrigin[1] * rowPitch + dstOrigin[2] * slicePitch); for (unsigned int z = 0; z < regionZ; z++) { auto pDst = tDst; auto pSrc = tSrc; auto pRef = tRef; for (unsigned int y = 0; y < regionY; y++) { memcpy(pRef, pSrc, region[0]); pDst += rowPitch; pSrc += rowPitch; pRef += rowPitch; } tDst += slicePitch; tSrc += slicePitch; tRef += slicePitch; } AUBCommandStreamFixture::expectMemory(pDestMemory, src, rowPitch * slicePitch); delete srcBuffer; delete dstBuffer; delete[] srcMemory; delete[] dstMemory; } static size_t zero[] = {0}; INSTANTIATE_TEST_CASE_P(AUBCopyBufferRect, AUBCopyBufferRect, ::testing::Combine( ::testing::Values(0, 3), //srcOrigin ::testing::ValuesIn(zero), ::testing::Values(0, 7), ::testing::Values(0, 3), //dstPrigin ::testing::ValuesIn(zero), ::testing::Values(0, 7), ::testing::Values(true, false))); enqueue_copy_image_aub_tests.cpp000066400000000000000000000163421422164147700350110ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/memory_manager/os_agnostic_memory_manager.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/aub_tests/fixtures/image_aub_fixture.h" using namespace NEO; template struct AUBCopyImage : public ImageAubFixture, public ::testing::WithParamInterface>, public ::testing::Test { void SetUp() override { ImageAubFixture::SetUp(enableBlitter); } void TearDown() override { srcImage.reset(); dstImage.reset(); ImageAubFixture::TearDown(); } template void runAubTest() { const size_t testImageDimensions = 4; cl_float srcMemory[testImageDimensions * testImageDimensions] = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 0.5f, 1.5f, 2.5f, 3.5f, 4.5f, 5.5f, 6.5f}; cl_float origValue = -1.0f; cl_float dstMemory[testImageDimensions * testImageDimensions] = { origValue, origValue, origValue, origValue, origValue, origValue, origValue, origValue, origValue, origValue, origValue, origValue, origValue, origValue, origValue, origValue}; cl_image_format imageFormat; cl_image_desc imageDesc; cl_mem_flags flags = CL_MEM_COPY_HOST_PTR; // clang-format off imageFormat.image_channel_data_type = CL_FLOAT; imageFormat.image_channel_order = CL_R; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = testImageDimensions; imageDesc.image_height = testImageDimensions; imageDesc.image_depth = 1; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; // clang-format on auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, pClDevice->getHardwareInfo().capabilityTable.supportsOcl21Features); auto retVal = CL_INVALID_VALUE; srcImage.reset(Image::create( context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, srcMemory, retVal)); ASSERT_NE(nullptr, srcImage.get()); dstImage.reset(Image::create( context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, dstMemory, retVal)); ASSERT_NE(nullptr, dstImage.get()); size_t srcOffset = std::get<0>(GetParam()); size_t dstOffset = std::get<1>(GetParam()); size_t srcOrigin[3] = {srcOffset, srcOffset, 0}; size_t dstOrigin[3] = {dstOffset, dstOffset, 0}; // Only draw 1/4 of the original image const size_t region[3] = { testImageDimensions / 2, testImageDimensions / 2, 1}; retVal = pCmdQ->enqueueCopyImage( srcImage.get(), dstImage.get(), srcOrigin, dstOrigin, region, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); auto dstOutMemory = new cl_float[dstImage->getSize()]; size_t imgOrigin[] = {0, 0, 0}; size_t imgRegion[] = {imageDesc.image_width, imageDesc.image_height, imageDesc.image_depth}; retVal = pCmdQ->enqueueReadImage(dstImage.get(), CL_FALSE, imgOrigin, imgRegion, 0, 0, dstOutMemory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pCmdQ->finish(); EXPECT_EQ(CL_SUCCESS, retVal); // Offset the source memory auto pSrcMemory = ptrOffset(srcMemory, (srcOffset * testImageDimensions + srcOffset) * sizeof(origValue)); // Since the driver allocated his own memory, we need to use that for verification auto pDstMemory = static_cast(dstOutMemory); if (dstOffset > 0) { // Add expectations for rows that should be unmodified AUBCommandStreamFixture::expectMemory(pDstMemory, dstMemory, dstOffset * testImageDimensions * sizeof(origValue)); pDstMemory = ptrOffset(pDstMemory, dstOffset * testImageDimensions * sizeof(origValue)); } for (size_t row = 0; row < region[1]; ++row) { if (dstOffset > 0) { size_t length = dstOffset * sizeof(origValue); AUBCommandStreamFixture::expectMemory(pDstMemory, dstMemory, length); pDstMemory = ptrOffset(pDstMemory, length); } size_t length = region[0] * sizeof(origValue); AUBCommandStreamFixture::expectMemory(pDstMemory, pSrcMemory, length); pDstMemory = ptrOffset(pDstMemory, length); length = (testImageDimensions - region[0] - dstOffset) * sizeof(origValue); AUBCommandStreamFixture::expectMemory(pDstMemory, dstMemory, length); pDstMemory = ptrOffset(pDstMemory, length); pSrcMemory = ptrOffset(pSrcMemory, testImageDimensions * sizeof(origValue)); } size_t remainingRows = testImageDimensions - region[1] - dstOffset; while (remainingRows > 0) { size_t length = testImageDimensions * sizeof(origValue); AUBCommandStreamFixture::expectMemory(pDstMemory, dstMemory, length); pDstMemory = ptrOffset(pDstMemory, length); --remainingRows; } delete[] dstOutMemory; } std::unique_ptr srcImage; std::unique_ptr dstImage; }; using AUBCopyImageCCS = AUBCopyImage; HWTEST2_P(AUBCopyImageCCS, WhenCopyingThenExpectationsMet, ImagesSupportedMatcher) { runAubTest(); } INSTANTIATE_TEST_CASE_P(AUBCopyImage_simple, AUBCopyImageCCS, ::testing::Combine( ::testing::Values( // srcOffset 0u, 1u, 2u), ::testing::Values( // dstOffset 0u, 1u, 2u))); using AUBCopyImageBCS = AUBCopyImage; HWTEST2_P(AUBCopyImageBCS, WhenCopyingWithBlitterEnabledThenExpectationsMet, ImagesSupportedMatcher) { runAubTest(); ASSERT_EQ(pCmdQ->peekLatestSentEnqueueOperation(), EnqueueProperties::Operation::Blit); } INSTANTIATE_TEST_CASE_P(AUBCopyImage_simple, AUBCopyImageBCS, ::testing::Combine( ::testing::Values( // srcOffset 0u, 1u, 2u), ::testing::Values( // dstOffset 0u, 1u, 2u))); enqueue_fill_buffer_aub_tests.cpp000066400000000000000000000117671422164147700351620ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/ptr_math.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/aub_tests/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" using namespace NEO; struct FillBufferHw : public CommandEnqueueAUBFixture, public ::testing::WithParamInterface, public ::testing::Test { void SetUp() override { CommandEnqueueAUBFixture::SetUp(); } void TearDown() override { CommandEnqueueAUBFixture::TearDown(); } }; typedef FillBufferHw AUBFillBuffer; HWTEST_P(AUBFillBuffer, WhenFillingThenExpectationsMet) { cl_float destMemory[] = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}; auto pDestMemory = &destMemory[0]; MockContext context(this->pCmdQ->getDevice().getSpecializedDevice()); auto retVal = CL_INVALID_VALUE; auto destBuffer = Buffer::create( &context, CL_MEM_USE_HOST_PTR, sizeof(destMemory), pDestMemory, retVal); ASSERT_NE(nullptr, destBuffer); float pattern[] = {1.0f}; size_t patternSize = sizeof(pattern); size_t offset = GetParam(); size_t size = 2 * patternSize; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; retVal = pCmdQ->enqueueFillBuffer( destBuffer, pattern, patternSize, offset, size, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); pCmdQ->flush(); pDestMemory = reinterpret_cast((destBuffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress())); // The memory under offset should be untouched if (offset) { cl_float *destMemoryRef = ptrOffset(&destMemory[0], offset); AUBCommandStreamFixture::expectMemory(pDestMemory, destMemoryRef, offset); pDestMemory = ptrOffset(pDestMemory, offset); } // Compute our memory expecations based on kernel execution auto pEndMemory = ptrOffset(pDestMemory, size); while (pDestMemory < pEndMemory) { AUBCommandStreamFixture::expectMemory(pDestMemory, pattern, patternSize); pDestMemory = ptrOffset(pDestMemory, patternSize); } // If the copykernel wasn't max sized, ensure we didn't overwrite existing memory size_t sizeUserMemory = sizeof(destMemory); if (offset + size < sizeUserMemory) { size_t sizeRemaining = sizeUserMemory - size - offset; cl_float *destMemoryRef = ptrOffset(&destMemory[0], offset + size); AUBCommandStreamFixture::expectMemory(pDestMemory, destMemoryRef, sizeRemaining); } delete destBuffer; } HWTEST_F(AUBFillBuffer, givenFillBufferWhenSeveralSubmissionsWithoutPollForCompletionBetweenThenTestConcurrentCS) { DebugManagerStateRestore dbgRestorer; cl_float destMemory[] = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}; auto pDestMemory = &destMemory[0]; MockContext context(this->pCmdQ->getDevice().getSpecializedDevice()); auto retVal = CL_INVALID_VALUE; std::unique_ptr destBuffer(Buffer::create( &context, CL_MEM_USE_HOST_PTR, sizeof(destMemory), pDestMemory, retVal)); ASSERT_NE(nullptr, destBuffer); float pattern[] = {1.0f}; size_t patternSize = sizeof(pattern); size_t offset = 0; size_t size = 2 * patternSize; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; uint32_t numWrites = 4; for (uint32_t id = 0; id < numWrites; id++) { offset = id * size; retVal = pCmdQ->enqueueFillBuffer( destBuffer.get(), pattern, patternSize, offset, size, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); pCmdQ->flush(); } AUBCommandStreamFixture::pollForCompletion(); pDestMemory = reinterpret_cast((destBuffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress())); auto pEndMemory = ptrOffset(pDestMemory, numWrites * size); while (pDestMemory < pEndMemory) { AUBCommandStreamFixture::expectMemory(pDestMemory, pattern, patternSize); pDestMemory = ptrOffset(pDestMemory, patternSize); } } INSTANTIATE_TEST_CASE_P(AUBFillBuffer_simple, AUBFillBuffer, ::testing::Values( 0 * sizeof(cl_float), 1 * sizeof(cl_float), 2 * sizeof(cl_float), 3 * sizeof(cl_float))); enqueue_fill_image_aub_tests.cpp000066400000000000000000000224711422164147700347650ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/memory_manager/os_agnostic_memory_manager.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/aub_tests/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include using namespace NEO; // clang-format off struct FillImageParams { cl_mem_object_type imageType; size_t offsets[3]; } imageParams[] = { {CL_MEM_OBJECT_IMAGE1D, { 0u, 0u, 0u}}, {CL_MEM_OBJECT_IMAGE1D, { 1u, 0u, 0u}}, {CL_MEM_OBJECT_IMAGE2D, { 0u, 0u, 0u}}, {CL_MEM_OBJECT_IMAGE2D, { 1u, 2u, 0u}}, {CL_MEM_OBJECT_IMAGE3D, { 0u, 0u, 0u}}, {CL_MEM_OBJECT_IMAGE3D, { 1u, 2u, 3u}} }; static const uint32_t fillValues[4] = { 0x3f800000, 0x00000000, 0x3f555555, 0x3f2aaaaa }; static const uint16_t expectedHALF_FLOAT[4] = {0x3c00, 0x0000, 0x3aaa, 0x3955}; static const uint16_t expectedUNORM16[4] = {0xffff, 0x0000, 0xd554, 0xaaa9}; static const uint8_t expectedUNORM8[4] = { 0xff, 0x00, 0xd4, 0xa9}; //The distance between sRGB values and the expected values should not be greater than 0.6f //In this test, for simplicity purposes, we are checking if the distance is 0 static const uint8_t expectedUNORM8sRGB[4] = { 0xff, 0x00, 0xeb, 0xa9}; static const uint8_t expectedUNORM8sBGR[4] = { 0xeb, 0x00, 0xff, 0xa9}; static const uint16_t expectedSNORM16[4] = {0x7fff, 0x0000, 0x6AA9, 0x5554}; static const uint8_t expectedSNORM8[4] = { 0x7f, 0x00, 0x69, 0x54}; static auto expectedSINT32 = fillValues; static uint16_t expectedSINT16[4] = { 0x0000, 0x0000, 0x5555, 0xaaaa }; static uint8_t expectedSINT8[4] = { 0x00, 0x00, 0x55, 0xaa }; static auto expectedUINT32 = fillValues; static uint16_t expectedUINT16[4] = { 0x0000, 0x0000, 0x5555, 0xaaaa }; static uint8_t expectedUINT8[4] = { 0x00, 0x00, 0x55, 0xaa }; static auto expectedFLOAT = fillValues; // ChannelTypes/FillValues for test struct FillChannelType { cl_channel_type type; const void *expectedValues; } fillChannelTypes[] = { {CL_SNORM_INT8, expectedSNORM8}, {CL_SNORM_INT16, expectedSNORM16}, {CL_UNORM_INT8, expectedUNORM8}, {CL_UNORM_INT16, expectedUNORM16}, {CL_SIGNED_INT8, expectedSINT8}, {CL_SIGNED_INT16, expectedSINT16}, {CL_SIGNED_INT32, expectedSINT32}, {CL_UNSIGNED_INT8, expectedUINT8}, {CL_UNSIGNED_INT16, expectedUINT16}, {CL_UNSIGNED_INT32, expectedUINT32}, {CL_HALF_FLOAT, expectedHALF_FLOAT}, {CL_FLOAT, expectedFLOAT}}; // clang-format on struct AubFillImage : public CommandDeviceFixture, public AUBCommandStreamFixture, public ::testing::WithParamInterface>, public ::testing::Test { using AUBCommandStreamFixture::SetUp; typedef AUBCommandStreamFixture CommandStreamFixture; void SetUp() override { if (!(defaultHwInfo->capabilityTable.supportsImages)) { GTEST_SKIP(); } auto dataType = std::get<0>(GetParam()).type; auto channelOrder = std::get<1>(GetParam()); if (dataType != CL_UNORM_INT8 && (channelOrder == CL_sRGBA || channelOrder == CL_sBGRA)) { //sRGBA and sBGRA support only unorm int8 type GTEST_SKIP(); } CommandDeviceFixture::SetUp(cl_command_queue_properties(0)); CommandStreamFixture::SetUp(pCmdQ); context = std::make_unique(pClDevice); if ((pClDevice->getHardwareInfo().capabilityTable.supportsOcl21Features == false) && (channelOrder == CL_sRGBA || channelOrder == CL_sBGRA)) { GTEST_SKIP(); } } void TearDown() override { image.reset(); context.reset(); CommandStreamFixture::TearDown(); CommandDeviceFixture::TearDown(); } std::unique_ptr context; std::unique_ptr image; }; HWTEST_P(AubFillImage, WhenFillingThenExpectationsMet) { const unsigned int testWidth = 5; const unsigned int testHeight = std::get<2>(GetParam()).imageType != CL_MEM_OBJECT_IMAGE1D ? 5 : 1; const unsigned int testDepth = std::get<2>(GetParam()).imageType == CL_MEM_OBJECT_IMAGE3D ? 5 : 1; cl_image_format imageFormat; cl_image_desc imageDesc; // clang-format off imageFormat.image_channel_data_type = std::get<0>(GetParam()).type; imageFormat.image_channel_order = std::get<1>(GetParam()); imageDesc.image_type = std::get<2>(GetParam()).imageType; imageDesc.image_width = testWidth; imageDesc.image_height = testHeight; imageDesc.image_depth = testDepth; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; // clang-format on auto perChannelDataSize = 0u; switch (imageFormat.image_channel_data_type) { default: case CL_SIGNED_INT8: case CL_SNORM_INT8: case CL_UNORM_INT8: case CL_UNSIGNED_INT8: perChannelDataSize = 1; break; case CL_HALF_FLOAT: case CL_SIGNED_INT16: case CL_SNORM_INT16: case CL_UNORM_INT16: case CL_UNSIGNED_INT16: perChannelDataSize = 2; break; case CL_SIGNED_INT32: case CL_UNSIGNED_INT32: case CL_FLOAT: perChannelDataSize = 4; break; } auto numChannels = 0u; switch (imageFormat.image_channel_order) { default: case CL_R: numChannels = 1; break; case CL_RG: numChannels = 2; break; case CL_RGBA: case CL_sRGBA: case CL_sBGRA: numChannels = 4; break; } size_t elementSize = perChannelDataSize * numChannels; auto retVal = CL_INVALID_VALUE; cl_mem_flags flags = CL_MEM_READ_ONLY; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); image.reset(Image::create( context.get(), ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); ASSERT_NE(nullptr, image.get()); auto sizeMemory = image->getSize(); ASSERT_GT(sizeMemory, 0u); auto srcMemory = new uint8_t[elementSize]; memset(srcMemory, 0xAB, elementSize); memset(image->getCpuAddress(), 0xAB, sizeMemory); auto origin = std::get<2>(GetParam()).offsets; const size_t region[3] = { std::max(testWidth / 2, 1u), std::max(testHeight / 2, 1u), std::max(testDepth / 2, 1u)}; retVal = pCmdQ->enqueueFillImage( image.get(), fillValues, origin, region, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); size_t imgOrigin[] = {0, 0, 0}; size_t imgRegion[] = {testWidth, testHeight, testDepth}; auto dstMemory = new uint8_t[sizeMemory]; retVal = pCmdQ->enqueueReadImage(image.get(), CL_FALSE, imgOrigin, imgRegion, 0, 0, dstMemory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pCmdQ->flush(); EXPECT_EQ(CL_SUCCESS, retVal); size_t slicePitch = image->getHostPtrSlicePitch(); size_t rowPitch = image->getHostPtrRowPitch(); auto expected = std::get<0>(GetParam()).expectedValues; if (imageFormat.image_channel_order == CL_sRGBA) { expected = expectedUNORM8sRGB; } if (imageFormat.image_channel_order == CL_sBGRA) { expected = expectedUNORM8sBGR; } auto pImageData = dstMemory; for (size_t z = 0; z < testDepth; ++z) { for (size_t y = 0; y < testHeight; ++y) { for (size_t x = 0; x < testWidth; ++x) { if (z >= origin[2] && z < (origin[2] + region[2]) && y >= origin[1] && y < (origin[1] + region[1]) && x >= origin[0] && x < (origin[0] + region[0])) { AUBCommandStreamFixture::expectMemory(&pImageData[x * elementSize], expected, elementSize); } else { AUBCommandStreamFixture::expectMemory(&pImageData[x * elementSize], srcMemory, elementSize); } } pImageData = ptrOffset(pImageData, rowPitch); } pImageData = ptrOffset(pImageData, slicePitch - (rowPitch * (testHeight > 0 ? testHeight : 1))); } delete[] dstMemory; delete[] srcMemory; } INSTANTIATE_TEST_CASE_P(AubFillImage_simple, AubFillImage, ::testing::Combine( ::testing::ValuesIn(fillChannelTypes), ::testing::Values( // channels CL_R, CL_RG, CL_RGBA, CL_sRGBA, CL_sBGRA), ::testing::ValuesIn(imageParams))); enqueue_kernel_aub_tests.cpp000066400000000000000000001146301422164147700341540ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/flat_batch_buffer_helper_hw.h" #include "shared/source/helpers/ptr_math.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/event/event.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/test/unit_test/aub_tests/fixtures/aub_fixture.h" #include "opencl/test/unit_test/aub_tests/fixtures/hello_world_fixture.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "opencl/test/unit_test/fixtures/simple_arg_fixture.h" #include "opencl/test/unit_test/fixtures/two_walker_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" using namespace NEO; extern const HardwareInfo *defaultHwInfo; struct TestParam { cl_uint globalWorkSizeX; cl_uint globalWorkSizeY; cl_uint globalWorkSizeZ; cl_uint localWorkSizeX; cl_uint localWorkSizeY; cl_uint localWorkSizeZ; }; static TestParam TestParamTable[] = { {1, 1, 1, 1, 1, 1}, {16, 1, 1, 16, 1, 1}, {32, 1, 1, 16, 1, 1}, {64, 1, 1, 1, 1, 1}, {64, 1, 1, 16, 1, 1}, {64, 1, 1, 64, 1, 1}}; cl_uint TestSimdTable[] = { 8, 16, 32}; namespace ULT { struct AUBHelloWorld : public HelloWorldFixture, public ClHardwareParse, public ::testing::Test { void SetUp() override { HelloWorldFixture::SetUp(); ClHardwareParse::SetUp(); } void TearDown() override { ClHardwareParse::TearDown(); HelloWorldFixture::TearDown(); } }; HWCMDTEST_F(IGFX_GEN8_CORE, AUBHelloWorld, WhenEnqueuingKernelThenAdressesAreAligned) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {1, 1, 1}; size_t localWorkSize[3] = {1, 1, 1}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; // Intentionally mis-align data as we're going to test driver properly aligns commands pDSH->getSpace(sizeof(uint32_t)); auto retVal = pCmdQ->enqueueKernel( pKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); pCmdQ->flush(); parseCommands(*pCmdQ); auto *pWalker = reinterpret_cast(cmdWalker); ASSERT_NE(nullptr, pWalker); auto alignmentIDSA = 32 * sizeof(uint8_t); EXPECT_EQ(0u, pWalker->getIndirectDataStartAddress() % alignmentIDSA); // Check interface descriptor alignment auto pMIDL = reinterpret_cast(cmdMediaInterfaceDescriptorLoad); ASSERT_NE(nullptr, pMIDL); uintptr_t addrIDD = pMIDL->getInterfaceDescriptorDataStartAddress(); auto alignmentIDD = 64 * sizeof(uint8_t); EXPECT_EQ(0u, addrIDD % alignmentIDD); // Check kernel start pointer matches hard-coded kernel. auto pExpectedISA = pKernel->getKernelHeap(); auto expectedSize = pKernel->getKernelHeapSize(); auto pSBA = reinterpret_cast(cmdStateBaseAddress); ASSERT_NE(nullptr, pSBA); auto pISA = pKernel->getKernelInfo().getGraphicsAllocation()->getUnderlyingBuffer(); EXPECT_EQ(0, memcmp(pISA, pExpectedISA, expectedSize)); } struct AUBHelloWorldIntegrateTest : public HelloWorldFixture, public ::testing::TestWithParam> { typedef HelloWorldFixture ParentClass; void SetUp() override { std::tie(KernelFixture::simd, param) = GetParam(); if (KernelFixture::simd < HwHelper::get(NEO::defaultHwInfo->platform.eRenderCoreFamily).getMinimalSIMDSize()) { GTEST_SKIP(); } ParentClass::SetUp(); } void TearDown() override { if (!IsSkipped()) { ParentClass::TearDown(); } } template void writeMemory(GraphicsAllocation *allocation) { AUBCommandStreamReceiverHw *aubCsr = nullptr; if (testMode == TestMode::AubTests) { aubCsr = static_cast *>(pCommandStreamReceiver); } else if (testMode == TestMode::AubTestsWithTbx) { auto tbxWithAubCsr = static_cast> *>(pCommandStreamReceiver); aubCsr = static_cast *>(tbxWithAubCsr->aubCSR.get()); tbxWithAubCsr->writeMemory(*allocation); } aubCsr->writeMemory(*allocation); } TestParam param; }; HWTEST_P(AUBHelloWorldIntegrateTest, WhenEnqueingKernelThenExpectationsAreMet) { if (this->simd < UnitTestHelper::smallestTestableSimdSize) { GTEST_SKIP(); } cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {param.globalWorkSizeX, param.globalWorkSizeY, param.globalWorkSizeZ}; size_t localWorkSize[3] = {param.localWorkSizeX, param.localWorkSizeY, param.localWorkSizeZ}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; getSimulatedCsr()->initializeEngine(); writeMemory(destBuffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())); writeMemory(srcBuffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())); auto retVal = this->pCmdQ->enqueueKernel( this->pKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); pCmdQ->flush(); // Compute our memory expecations based on kernel execution auto globalWorkItems = globalWorkSize[0] * globalWorkSize[1] * globalWorkSize[2]; auto sizeWritten = globalWorkItems * sizeof(float); auto pDestGpuAddress = reinterpret_cast((destBuffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress())); AUBCommandStreamFixture::expectMemory(pDestGpuAddress, this->pSrcMemory, sizeWritten); // If the copykernel wasn't max sized, ensure we didn't overwrite existing memory if (sizeWritten < this->sizeUserMemory) { auto sizeRemaining = this->sizeUserMemory - sizeWritten; auto pDestUnwrittenMemory = ptrOffset(pDestGpuAddress, sizeWritten); auto pUnwrittenMemory = ptrOffset(this->pDestMemory, sizeWritten); AUBCommandStreamFixture::expectMemory(pDestUnwrittenMemory, pUnwrittenMemory, sizeRemaining); } } INSTANTIATE_TEST_CASE_P( AUB, AUBHelloWorldIntegrateTest, ::testing::Combine( ::testing::ValuesIn(TestSimdTable), ::testing::ValuesIn(TestParamTable))); struct AUBSimpleArg : public SimpleArgFixture, public ClHardwareParse, public ::testing::Test { using SimpleArgKernelFixture::SetUp; void SetUp() override { SimpleArgFixture::SetUp(); ClHardwareParse::SetUp(); } void TearDown() override { ClHardwareParse::TearDown(); SimpleArgFixture::TearDown(); } }; HWCMDTEST_F(IGFX_GEN8_CORE, AUBSimpleArg, WhenEnqueingKernelThenAdressesAreAligned) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {1, 1, 1}; size_t localWorkSize[3] = {1, 1, 1}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; // Intentionally mis-align data as we're going to test driver properly aligns commands pDSH->getSpace(sizeof(uint32_t)); auto retVal = pCmdQ->enqueueKernel( pKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); parseCommands(*pCmdQ); pCmdQ->flush(); auto *pWalker = reinterpret_cast(cmdWalker); ASSERT_NE(nullptr, pWalker); auto alignmentIDSA = 32 * sizeof(uint8_t); EXPECT_EQ(0u, pWalker->getIndirectDataStartAddress() % alignmentIDSA); // Check interface descriptor alignment auto pMIDL = reinterpret_cast(cmdMediaInterfaceDescriptorLoad); ASSERT_NE(nullptr, pMIDL); uintptr_t addrIDD = pMIDL->getInterfaceDescriptorDataStartAddress(); auto alignmentIDD = 64 * sizeof(uint8_t); EXPECT_EQ(0u, addrIDD % alignmentIDD); // Check kernel start pointer matches hard-coded kernel. auto pExpectedISA = pKernel->getKernelHeap(); auto expectedSize = pKernel->getKernelHeapSize(); auto pSBA = reinterpret_cast(cmdStateBaseAddress); ASSERT_NE(nullptr, pSBA); auto pISA = pKernel->getKernelInfo().getGraphicsAllocation()->getUnderlyingBuffer(); EXPECT_EQ(0, memcmp(pISA, pExpectedISA, expectedSize)); } HWTEST_F(AUBSimpleArg, givenAubCommandStreamerReceiverWhenBatchBufferFlateningIsForcedThenDumpedAubIsStillValid) { cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {1, 1, 1}; size_t localWorkSize[3] = {1, 1, 1}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; DebugManagerStateRestore dbgRestore; DebugManager.flags.FlattenBatchBufferForAUBDump.set(true); pCmdQ->getGpgpuCommandStreamReceiver().overwriteFlatBatchBufferHelper(new FlatBatchBufferHelperHw(*pCmdQ->getDevice().getExecutionEnvironment())); pCmdQ->getGpgpuCommandStreamReceiver().overrideDispatchPolicy(DispatchMode::ImmediateDispatch); auto retVal = pCmdQ->enqueueKernel( pKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); pCmdQ->flush(); } struct AUBSimpleArgIntegrateTest : public SimpleArgFixture, public ::testing::TestWithParam> { typedef SimpleArgFixture ParentClass; void SetUp() override { std::tie(simd, param) = GetParam(); if (simd < HwHelper::get(NEO::defaultHwInfo->platform.eRenderCoreFamily).getMinimalSIMDSize()) { GTEST_SKIP(); } ParentClass::SetUp(); } void TearDown() override { if (!IsSkipped()) { ParentClass::TearDown(); } } cl_uint simd; TestParam param; }; HWTEST_P(AUBSimpleArgIntegrateTest, WhenEnqueingKernelThenExpectationsAreMet) { cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {param.globalWorkSizeX, param.globalWorkSizeY, param.globalWorkSizeZ}; size_t localWorkSize[3] = {param.localWorkSizeX, param.localWorkSizeY, param.localWorkSizeZ}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; auto retVal = this->pCmdQ->enqueueKernel( this->pKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); pCmdQ->flush(); // Compute our memory expecations based on kernel execution size_t globalWorkItems = globalWorkSize[0] * globalWorkSize[1] * globalWorkSize[2]; size_t sizeWritten = globalWorkItems * sizeof(int); AUBCommandStreamFixture::expectMemory(this->pDestMemory, this->pExpectedMemory, sizeWritten); // If the copykernel wasn't max sized, ensure we didn't overwrite existing memory if (sizeWritten < this->sizeUserMemory) { auto sizeRemaining = this->sizeUserMemory - sizeWritten; auto pUnwrittenMemory = ptrOffset(this->pDestMemory, sizeWritten); AUBCommandStreamFixture::expectMemory(pUnwrittenMemory, pUnwrittenMemory, sizeRemaining); } } INSTANTIATE_TEST_CASE_P( AUB, AUBSimpleArgIntegrateTest, ::testing::Combine( ::testing::ValuesIn(TestSimdTable), ::testing::ValuesIn(TestParamTable))); } // namespace ULT struct AUBSimpleArgNonUniformFixture : public KernelAUBFixture { void SetUp() override { REQUIRE_OCL_21_OR_SKIP(NEO::defaultHwInfo); KernelAUBFixture::SetUp(); sizeUserMemory = alignUp(typeItems * typeSize, 64); destMemory = alignedMalloc(sizeUserMemory, 4096); ASSERT_NE(nullptr, destMemory); for (uint32_t i = 0; i < typeItems; i++) { *(static_cast(destMemory) + i) = 0xdeadbeef; } expectedMemory = alignedMalloc(sizeUserMemory, 4096); ASSERT_NE(nullptr, expectedMemory); memset(expectedMemory, 0x0, sizeUserMemory); } void initializeExpectedMemory(size_t globalX, size_t globalY, size_t globalZ) { uint32_t id = 0; size_t testGlobalMax = globalX * globalY * globalZ; ASSERT_GT(typeItems, testGlobalMax); int maxId = static_cast(testGlobalMax); argVal = maxId; kernel->setArg(0, sizeof(int), &argVal); int *expectedData = static_cast(expectedMemory); for (size_t z = 0; z < globalZ; z++) { for (size_t y = 0; y < globalY; y++) { for (size_t x = 0; x < globalX; x++) { *(expectedData + id) = id; ++id; } } } *(static_cast(destMemory) + maxId) = 0; *(expectedData + maxId) = maxId; outBuffer.reset(Buffer::create(context, CL_MEM_COPY_HOST_PTR, alignUp(sizeUserMemory, 4096), destMemory, retVal)); bufferGpuAddress = reinterpret_cast(outBuffer->getGraphicsAllocation(device->getRootDeviceIndex())->getGpuAddress()); kernel->setArg(1, outBuffer.get()); sizeWrittenMemory = maxId * typeSize; //add single int size for atomic sum of all work-items sizeWrittenMemory += typeSize; sizeRemainderMemory = sizeUserMemory - sizeWrittenMemory; expectedRemainderMemory = alignedMalloc(sizeRemainderMemory, 4096); ASSERT_NE(nullptr, expectedRemainderMemory); int *expectedReminderData = static_cast(expectedRemainderMemory); size_t reminderElements = sizeRemainderMemory / typeSize; for (size_t i = 0; i < reminderElements; i++) { *(expectedReminderData + i) = 0xdeadbeef; } remainderBufferGpuAddress = ptrOffset(bufferGpuAddress, sizeWrittenMemory); } void TearDown() override { if (NEO::defaultHwInfo->capabilityTable.supportsOcl21Features == false) { return; } if (destMemory) { alignedFree(destMemory); destMemory = nullptr; } if (expectedMemory) { alignedFree(expectedMemory); expectedMemory = nullptr; } if (expectedRemainderMemory) { alignedFree(expectedRemainderMemory); expectedRemainderMemory = nullptr; } KernelAUBFixture::TearDown(); } unsigned int deviceClVersionSupport; const size_t typeSize = sizeof(int); const size_t typeItems = 40 * 40 * 40; size_t sizeWrittenMemory = 0; size_t sizeUserMemory; size_t sizeRemainderMemory; int argVal = 0x22222222; void *destMemory = nullptr; void *expectedMemory = nullptr; void *expectedRemainderMemory = nullptr; void *remainderBufferGpuAddress = nullptr; void *bufferGpuAddress = nullptr; std::unique_ptr outBuffer; ClHardwareParse hwParser; }; using AUBSimpleKernelStatelessTest = Test>; HWTEST_F(AUBSimpleKernelStatelessTest, givenSimpleKernelWhenStatelessPathIsUsedThenExpectCorrectBuffer) { constexpr size_t bufferSize = MemoryConstants::pageSize; cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {bufferSize, 1, 1}; size_t localWorkSize[3] = {1, 1, 1}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; uint8_t bufferData[bufferSize] = {}; uint8_t bufferExpected[bufferSize]; memset(bufferExpected, 0xCD, bufferSize); auto pBuffer = std::unique_ptr(Buffer::create(context, CL_MEM_USE_HOST_PTR | CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL, bufferSize, bufferData, retVal)); ASSERT_NE(nullptr, pBuffer); kernel->setArg(0, pBuffer.get()); retVal = this->pCmdQ->enqueueKernel( kernel.get(), workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(this->kernel->getKernelInfo().kernelDescriptor.payloadMappings.explicitArgs[0].as().isPureStateful()); EXPECT_TRUE(this->kernel->getKernelInfo().kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb()); this->pCmdQ->flush(); expectMemory(reinterpret_cast(pBuffer->getGraphicsAllocation(device->getRootDeviceIndex())->getGpuAddress()), bufferExpected, bufferSize); } using AUBSimpleArgNonUniformTest = Test; HWTEST_F(AUBSimpleArgNonUniformTest, givenOpenCL20SupportWhenProvidingWork1DimNonUniformGroupThenExpectTwoWalkers) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {39, 1, 1}; size_t localWorkSize[3] = {32, 1, 1}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]); auto retVal = this->pCmdQ->enqueueKernel( this->kernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); hwParser.parseCommands(*pCmdQ); uint32_t walkerCount = hwParser.getCommandCount(); EXPECT_EQ(2u, walkerCount); pCmdQ->flush(); expectMemory(bufferGpuAddress, this->expectedMemory, sizeWrittenMemory); expectMemory(remainderBufferGpuAddress, this->expectedRemainderMemory, sizeRemainderMemory); } HWTEST_F(AUBSimpleArgNonUniformTest, givenOpenCL20SupportWhenProvidingWork2DimNonUniformGroupInXDimensionThenExpectTwoWalkers) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; cl_uint workDim = 2; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {39, 32, 1}; size_t localWorkSize[3] = {16, 16, 1}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]); auto retVal = this->pCmdQ->enqueueKernel( this->kernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); hwParser.parseCommands(*pCmdQ); uint32_t walkerCount = hwParser.getCommandCount(); EXPECT_EQ(2u, walkerCount); pCmdQ->flush(); expectMemory(bufferGpuAddress, this->expectedMemory, sizeWrittenMemory); expectMemory(remainderBufferGpuAddress, this->expectedRemainderMemory, sizeRemainderMemory); } HWTEST_F(AUBSimpleArgNonUniformTest, givenOpenCL20SupportWhenProvidingWork2DimNonUniformGroupInYDimensionThenExpectTwoWalkers) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; cl_uint workDim = 2; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {32, 39, 1}; size_t localWorkSize[3] = {16, 16, 1}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]); auto retVal = this->pCmdQ->enqueueKernel( this->kernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); hwParser.parseCommands(*pCmdQ); uint32_t walkerCount = hwParser.getCommandCount(); EXPECT_EQ(2u, walkerCount); pCmdQ->flush(); expectMemory(bufferGpuAddress, this->expectedMemory, sizeWrittenMemory); expectMemory(remainderBufferGpuAddress, this->expectedRemainderMemory, sizeRemainderMemory); } HWTEST_F(AUBSimpleArgNonUniformTest, givenOpenCL20SupportWhenProvidingWork2DimNonUniformGroupInXandYDimensionThenExpectFourWalkers) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; cl_uint workDim = 2; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {39, 39, 1}; size_t localWorkSize[3] = {16, 16, 1}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]); auto retVal = this->pCmdQ->enqueueKernel( this->kernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); hwParser.parseCommands(*pCmdQ); uint32_t walkerCount = hwParser.getCommandCount(); EXPECT_EQ(4u, walkerCount); pCmdQ->flush(); expectMemory(bufferGpuAddress, this->expectedMemory, sizeWrittenMemory); expectMemory(remainderBufferGpuAddress, this->expectedRemainderMemory, sizeRemainderMemory); } HWTEST_F(AUBSimpleArgNonUniformTest, givenOpenCL20SupportWhenProvidingWork3DimNonUniformGroupInXDimensionThenExpectTwoWalkers) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; cl_uint workDim = 3; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {39, 32, 32}; size_t localWorkSize[3] = {8, 8, 2}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]); auto retVal = this->pCmdQ->enqueueKernel( this->kernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); hwParser.parseCommands(*pCmdQ); uint32_t walkerCount = hwParser.getCommandCount(); EXPECT_EQ(2u, walkerCount); pCmdQ->flush(); expectMemory(bufferGpuAddress, this->expectedMemory, sizeWrittenMemory); expectMemory(remainderBufferGpuAddress, this->expectedRemainderMemory, sizeRemainderMemory); } HWTEST_F(AUBSimpleArgNonUniformTest, givenOpenCL20SupportWhenProvidingWork3DimNonUniformGroupInYDimensionThenExpectTwoWalkers) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; cl_uint workDim = 3; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {32, 39, 32}; size_t localWorkSize[3] = {8, 8, 2}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]); auto retVal = this->pCmdQ->enqueueKernel( this->kernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); hwParser.parseCommands(*pCmdQ); uint32_t walkerCount = hwParser.getCommandCount(); EXPECT_EQ(2u, walkerCount); pCmdQ->flush(); expectMemory(bufferGpuAddress, this->expectedMemory, sizeWrittenMemory); expectMemory(remainderBufferGpuAddress, this->expectedRemainderMemory, sizeRemainderMemory); } HWTEST_F(AUBSimpleArgNonUniformTest, givenOpenCL20SupportWhenProvidingWork3DimNonUniformGroupInZDimensionThenExpectTwoWalkers) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; cl_uint workDim = 3; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {32, 32, 39}; size_t localWorkSize[3] = {8, 2, 8}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]); auto retVal = this->pCmdQ->enqueueKernel( this->kernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); hwParser.parseCommands(*pCmdQ); uint32_t walkerCount = hwParser.getCommandCount(); EXPECT_EQ(2u, walkerCount); pCmdQ->flush(); expectMemory(bufferGpuAddress, this->expectedMemory, sizeWrittenMemory); expectMemory(remainderBufferGpuAddress, this->expectedRemainderMemory, sizeRemainderMemory); } HWTEST_F(AUBSimpleArgNonUniformTest, givenOpenCL20SupportWhenProvidingWork3DimNonUniformGroupInXandYDimensionThenExpectFourWalkers) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; cl_uint workDim = 3; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {39, 39, 32}; size_t localWorkSize[3] = {8, 8, 2}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]); auto retVal = this->pCmdQ->enqueueKernel( this->kernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); hwParser.parseCommands(*pCmdQ); uint32_t walkerCount = hwParser.getCommandCount(); EXPECT_EQ(4u, walkerCount); pCmdQ->flush(); expectMemory(bufferGpuAddress, this->expectedMemory, sizeWrittenMemory); expectMemory(remainderBufferGpuAddress, this->expectedRemainderMemory, sizeRemainderMemory); } HWTEST_F(AUBSimpleArgNonUniformTest, givenOpenCL20SupportWhenProvidingWork3DimNonUniformGroupInXandZDimensionThenExpectFourWalkers) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; cl_uint workDim = 3; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {39, 32, 39}; size_t localWorkSize[3] = {8, 2, 8}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]); auto retVal = this->pCmdQ->enqueueKernel( this->kernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); hwParser.parseCommands(*pCmdQ); uint32_t walkerCount = hwParser.getCommandCount(); EXPECT_EQ(4u, walkerCount); pCmdQ->flush(); expectMemory(bufferGpuAddress, this->expectedMemory, sizeWrittenMemory); expectMemory(remainderBufferGpuAddress, this->expectedRemainderMemory, sizeRemainderMemory); } HWTEST_F(AUBSimpleArgNonUniformTest, givenOpenCL20SupportWhenProvidingWork3DimNonUniformGroupInYandZDimensionThenExpectFourWalkers) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; cl_uint workDim = 3; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {32, 39, 39}; size_t localWorkSize[3] = {2, 8, 8}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]); auto retVal = this->pCmdQ->enqueueKernel( this->kernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); hwParser.parseCommands(*pCmdQ); uint32_t walkerCount = hwParser.getCommandCount(); EXPECT_EQ(4u, walkerCount); pCmdQ->flush(); expectMemory(bufferGpuAddress, this->expectedMemory, sizeWrittenMemory); expectMemory(remainderBufferGpuAddress, this->expectedRemainderMemory, sizeRemainderMemory); } HWTEST_F(AUBSimpleArgNonUniformTest, givenOpenCL20SupportWhenProvidingWork3DimNonUniformGroupInXandYandZDimensionThenExpectEightWalkers) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; cl_uint workDim = 3; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {39, 39, 39}; size_t localWorkSize[3] = {8, 8, 2}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]); auto retVal = this->pCmdQ->enqueueKernel( this->kernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); hwParser.parseCommands(*pCmdQ); uint32_t walkerCount = hwParser.getCommandCount(); EXPECT_EQ(8u, walkerCount); pCmdQ->flush(); expectMemory(bufferGpuAddress, this->expectedMemory, sizeWrittenMemory); expectMemory(remainderBufferGpuAddress, this->expectedRemainderMemory, sizeRemainderMemory); } struct AUBBindlessKernel : public KernelAUBFixture, public ::testing::Test { void SetUp() override { DebugManager.flags.UseBindlessMode.set(1); DebugManager.flags.UseExternalAllocatorForSshAndDsh.set(1); KernelAUBFixture::SetUp(); } void TearDown() override { KernelAUBFixture::TearDown(); } DebugManagerStateRestore restorer; }; HWTEST2_F(AUBBindlessKernel, DISABLED_givenBindlessCopyKernelWhenEnqueuedThenResultsValidate, IsAtLeastSkl) { constexpr size_t bufferSize = MemoryConstants::pageSize; auto simulatedCsr = AUBFixture::getSimulatedCsr(); simulatedCsr->initializeEngine(); createKernel(std::string("bindless_stateful_copy_buffer"), std::string("StatefulCopyBuffer")); cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {bufferSize / 2, 1, 1}; size_t localWorkSize[3] = {1, 1, 1}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; uint8_t bufferDataSrc[bufferSize]; uint8_t bufferDataDst[bufferSize]; memset(bufferDataSrc, 1, bufferSize); memset(bufferDataDst, 0, bufferSize); auto pBufferSrc = std::unique_ptr(Buffer::create(context, CL_MEM_READ_WRITE, bufferSize, nullptr, retVal)); ASSERT_NE(nullptr, pBufferSrc); auto pBufferDst = std::unique_ptr(Buffer::create(context, CL_MEM_READ_WRITE, bufferSize, nullptr, retVal)); ASSERT_NE(nullptr, pBufferDst); memcpy(pBufferSrc->getGraphicsAllocation(device->getRootDeviceIndex())->getUnderlyingBuffer(), bufferDataSrc, bufferSize); memcpy(pBufferDst->getGraphicsAllocation(device->getRootDeviceIndex())->getUnderlyingBuffer(), bufferDataDst, bufferSize); simulatedCsr->writeMemory(*pBufferSrc->getGraphicsAllocation(device->getRootDeviceIndex())); simulatedCsr->writeMemory(*pBufferDst->getGraphicsAllocation(device->getRootDeviceIndex())); //Src kernel->setArg(0, pBufferSrc.get()); //Dst kernel->setArg(1, pBufferDst.get()); retVal = this->pCmdQ->enqueueKernel( kernel.get(), workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); globalWorkOffset[0] = bufferSize / 2; retVal = this->pCmdQ->enqueueKernel( kernel.get(), workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(this->kernel->getKernelInfo().kernelDescriptor.payloadMappings.explicitArgs[0].as().isPureStateful()); this->pCmdQ->finish(); expectMemory(reinterpret_cast(pBufferDst->getGraphicsAllocation(device->getRootDeviceIndex())->getGpuAddress()), bufferDataSrc, bufferSize); } HWTEST2_F(AUBBindlessKernel, DISABLED_givenBindlessCopyImageKernelWhenEnqueuedThenResultsValidate, IsAtLeastSkl) { constexpr unsigned int testWidth = 5; constexpr unsigned int testHeight = 1; constexpr unsigned int testDepth = 1; auto simulatedCsr = AUBFixture::getSimulatedCsr(); simulatedCsr->initializeEngine(); createKernel(std::string("bindless_copy_buffer_to_image"), std::string("CopyBufferToImage3d")); constexpr size_t imageSize = testWidth * testHeight * testDepth; cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {imageSize, 1, 1}; size_t localWorkSize[3] = {1, 1, 1}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; uint8_t imageDataSrc[imageSize]; uint8_t imageDataDst[imageSize + 1]; memset(imageDataSrc, 1, imageSize); memset(imageDataDst, 0, imageSize + 1); cl_image_format imageFormat = {0}; cl_image_desc imageDesc = {0}; imageFormat.image_channel_data_type = CL_UNSIGNED_INT8; imageFormat.image_channel_order = CL_R; imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_width = testWidth; imageDesc.image_height = testHeight; imageDesc.image_depth = testDepth; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; auto retVal = CL_INVALID_VALUE; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, device->getHardwareInfo().capabilityTable.supportsOcl21Features); auto image = std::unique_ptr(Image::create( contextCl, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &contextCl->getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, imageDataDst, retVal)); ASSERT_NE(nullptr, image.get()); EXPECT_FALSE(image->isMemObjZeroCopy()); auto bufferSrc = std::unique_ptr(Buffer::create(context, CL_MEM_READ_WRITE, imageSize, nullptr, retVal)); ASSERT_NE(nullptr, bufferSrc); memcpy(image->getGraphicsAllocation(device->getRootDeviceIndex())->getUnderlyingBuffer(), imageDataDst, imageSize); memcpy(bufferSrc->getGraphicsAllocation(device->getRootDeviceIndex())->getUnderlyingBuffer(), imageDataSrc, imageSize); simulatedCsr->writeMemory(*bufferSrc->getGraphicsAllocation(device->getRootDeviceIndex())); simulatedCsr->writeMemory(*image->getGraphicsAllocation(device->getRootDeviceIndex())); kernel->setArg(0, bufferSrc.get()); kernel->setArg(1, image.get()); int srcOffset = 0; int dstOffset[4] = {0, 0, 0, 0}; int pitch[2] = {0, 0}; kernel->setArg(2, sizeof(srcOffset), &srcOffset); kernel->setArg(3, sizeof(dstOffset), &dstOffset); kernel->setArg(4, sizeof(pitch), &pitch); retVal = this->pCmdQ->enqueueKernel( kernel.get(), workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); EXPECT_EQ(CL_SUCCESS, retVal); retVal = this->pCmdQ->finish(); EXPECT_EQ(CL_SUCCESS, retVal); expectMemory(reinterpret_cast(image->getGraphicsAllocation(device->getRootDeviceIndex())->getGpuAddress()), imageDataSrc, imageSize); } enqueue_map_buffer_aub_tests.cpp000066400000000000000000000043471422164147700350050ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/aub_tests/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" using namespace NEO; struct AUBMapBuffer : public CommandEnqueueAUBFixture, public ::testing::Test { void SetUp() override { CommandEnqueueAUBFixture::SetUp(); } void TearDown() override { CommandEnqueueAUBFixture::TearDown(); } }; HWTEST_F(AUBMapBuffer, WhenMappingAndUnmappingThenExpectationsAreMet) { MockContext context(this->pCmdQ->getDevice().getSpecializedDevice()); auto retVal = CL_INVALID_VALUE; size_t bufferSize = 10; std::unique_ptr buffer(Buffer::create( &context, CL_MEM_READ_WRITE, bufferSize, nullptr, retVal)); ASSERT_NE(nullptr, buffer); uint8_t pattern[] = {0xFF}; size_t patternSize = sizeof(pattern); retVal = pCmdQ->enqueueFillBuffer( buffer.get(), pattern, patternSize, 0, bufferSize, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); auto mappedPtr = pCmdQ->enqueueMapBuffer(buffer.get(), CL_TRUE, CL_MAP_WRITE | CL_MAP_READ, 0, bufferSize, 0, nullptr, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); // write to mapped ptr auto mappedPtrStart = static_cast(mappedPtr); for (uint32_t i = 0; i < bufferSize; i++) { *(mappedPtrStart + i) = i; } pCmdQ->enqueueUnmapMemObject(buffer.get(), mappedPtr, 0, nullptr, nullptr); // verify unmap std::unique_ptr readMemory(new uint8_t[bufferSize]); buffer->forceDisallowCPUCopy = true; retVal = pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, bufferSize, readMemory.get(), nullptr, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); retVal = pCmdQ->flush(); ASSERT_EQ(CL_SUCCESS, retVal); for (size_t i = 0; i < bufferSize; i++) { AUBCommandStreamFixture::expectMemory(&readMemory[i], &i, sizeof(uint8_t)); } } enqueue_map_image_aub_tests.cpp000066400000000000000000000216531422164147700346150ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/memory_manager/os_agnostic_memory_manager.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/aub_tests/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" using namespace NEO; struct MapImageParams { cl_mem_object_type imageType; size_t offsets[3]; } mapImageParams[] = { {CL_MEM_OBJECT_IMAGE1D, {0u, 0u, 0u}}, {CL_MEM_OBJECT_IMAGE1D, {1u, 0u, 0u}}, {CL_MEM_OBJECT_IMAGE2D, {0u, 0u, 0u}}, {CL_MEM_OBJECT_IMAGE2D, {1u, 2u, 0u}}, {CL_MEM_OBJECT_IMAGE3D, {0u, 0u, 0u}}, {CL_MEM_OBJECT_IMAGE3D, {1u, 2u, 3u}}, }; struct AUBMapImage : public CommandDeviceFixture, public AUBCommandStreamFixture, public ::testing::WithParamInterface>, public ::testing::Test { typedef AUBCommandStreamFixture CommandStreamFixture; using AUBCommandStreamFixture::SetUp; void SetUp() override { if (!(defaultHwInfo->capabilityTable.supportsImages)) { GTEST_SKIP(); } CommandDeviceFixture::SetUp(cl_command_queue_properties(0)); CommandStreamFixture::SetUp(pCmdQ); context = std::make_unique(pClDevice); } void TearDown() override { srcImage.reset(); context.reset(); CommandStreamFixture::TearDown(); CommandDeviceFixture::TearDown(); } std::unique_ptr context; std::unique_ptr srcImage; }; HWTEST_P(AUBMapImage, WhenMappingAndUnmappingThenExpectationsAreMet) { const unsigned int testWidth = 5; const unsigned int testHeight = std::get<2>(GetParam()).imageType != CL_MEM_OBJECT_IMAGE1D ? 5 : 1; const unsigned int testDepth = std::get<2>(GetParam()).imageType == CL_MEM_OBJECT_IMAGE3D ? 5 : 1; cl_image_format imageFormat; cl_image_desc imageDesc; // clang-format off imageFormat.image_channel_data_type = std::get<0>( GetParam( ) ); imageFormat.image_channel_order = std::get<1>( GetParam( ) ); imageDesc.image_type = std::get<2>( GetParam( ) ).imageType; imageDesc.image_width = testWidth; imageDesc.image_height = testHeight; imageDesc.image_depth = testDepth; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; // clang-format on auto perChannelDataSize = 0; switch (imageFormat.image_channel_data_type) { case CL_UNORM_INT8: perChannelDataSize = 1; break; case CL_SIGNED_INT16: case CL_HALF_FLOAT: perChannelDataSize = 2; break; case CL_UNSIGNED_INT32: case CL_FLOAT: perChannelDataSize = 4; break; } auto numChannels = 0u; switch (imageFormat.image_channel_order) { case CL_R: numChannels = 1; break; case CL_RG: numChannels = 2; break; case CL_RGBA: numChannels = 4; break; } size_t elementSize = perChannelDataSize * numChannels; auto sizeMemory = testWidth * alignUp(testHeight, 4) * testDepth * elementSize; auto srcMemory = new (std::nothrow) uint8_t[sizeMemory]; ASSERT_NE(nullptr, srcMemory); for (unsigned i = 0; i < sizeMemory; ++i) { uint8_t origValue = i; memcpy(srcMemory + i, &origValue, sizeof(origValue)); } auto retVal = CL_INVALID_VALUE; cl_mem_flags flags = CL_MEM_COPY_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, pClDevice->getHardwareInfo().capabilityTable.supportsOcl21Features); srcImage.reset(Image::create( context.get(), ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, srcMemory, retVal)); ASSERT_NE(nullptr, srcImage.get()); auto origin = std::get<2>(GetParam()).offsets; const size_t region[3] = {std::max(testWidth / 2, 1u), std::max(testHeight / 2, 1u), std::max(testDepth / 2, 1u)}; size_t inputRowPitch = testWidth * elementSize; size_t inputSlicePitch = inputRowPitch * testHeight; size_t mappedRowPitch; size_t mappedSlicePitch; auto mappedPtr = pCmdQ->enqueueMapImage(srcImage.get(), CL_TRUE, CL_MAP_WRITE | CL_MAP_READ, origin, region, &mappedRowPitch, &mappedSlicePitch, 0, nullptr, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); uint8_t *mappedPtrStart; uint8_t *srcMemoryStart; bool isGpuCopy = srcImage->isTiledAllocation() || !MemoryPool::isSystemMemoryPool( srcImage->getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex())->getMemoryPool()); if (isGpuCopy) { mappedPtrStart = static_cast(mappedPtr); srcMemoryStart = srcMemory; // validate mapped region srcMemoryStart = ptrOffset(srcMemoryStart, inputSlicePitch * origin[2]); srcMemoryStart = ptrOffset(srcMemoryStart, inputRowPitch * origin[1]); srcMemoryStart = ptrOffset(srcMemoryStart, elementSize * origin[0]); for (size_t z = 0; z < region[2]; z++) { for (size_t y = 0; y < region[1]; y++) { AUBCommandStreamFixture::expectMemory(mappedPtrStart, srcMemoryStart, elementSize * region[0]); mappedPtrStart = ptrOffset(mappedPtrStart, mappedRowPitch); srcMemoryStart = ptrOffset(srcMemoryStart, inputRowPitch); } mappedPtrStart = ptrOffset(mappedPtrStart, mappedSlicePitch - (mappedRowPitch * region[1])); srcMemoryStart = ptrOffset(srcMemoryStart, inputSlicePitch - (inputRowPitch * (region[1]))); } } // write to mapped ptr mappedPtrStart = static_cast(mappedPtr); for (size_t z = 0; z < region[2]; z++) { for (size_t y = 0; y < region[1]; y++) { memset(mappedPtrStart, 0xFF, elementSize * region[0]); mappedPtrStart = ptrOffset(mappedPtrStart, mappedRowPitch); } mappedPtrStart = ptrOffset(mappedPtrStart, mappedSlicePitch - (mappedRowPitch * region[1])); } pCmdQ->enqueueUnmapMemObject(srcImage.get(), mappedPtr, 0, nullptr, nullptr); // verify unmap uint8_t *readMemory = new uint8_t[srcImage->getSize()]; size_t imgOrigin[] = {0, 0, 0}; size_t imgRegion[] = {testWidth, testHeight, testDepth}; retVal = pCmdQ->enqueueReadImage(srcImage.get(), CL_FALSE, imgOrigin, imgRegion, inputRowPitch, inputSlicePitch, readMemory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pCmdQ->flush(); EXPECT_EQ(CL_SUCCESS, retVal); srcMemoryStart = srcMemory; auto readMemoryStart = readMemory; uint8_t *expected = new uint8_t[elementSize]; memset(expected, 0xFF, elementSize); for (size_t z = 0; z < testDepth; ++z) { for (size_t y = 0; y < testHeight; ++y) { for (size_t x = 0; x < testWidth; ++x) { if (z >= origin[2] && z < (origin[2] + region[2]) && y >= origin[1] && y < (origin[1] + region[1]) && x >= origin[0] && x < (origin[0] + region[0])) { // this texel should be updated AUBCommandStreamFixture::expectMemory(&readMemoryStart[x * elementSize], expected, elementSize); } else { AUBCommandStreamFixture::expectMemory(&readMemoryStart[x * elementSize], &srcMemoryStart[x * elementSize], elementSize); } } readMemoryStart = ptrOffset(readMemoryStart, inputRowPitch); srcMemoryStart = ptrOffset(srcMemoryStart, inputRowPitch); } readMemoryStart = ptrOffset(readMemoryStart, inputSlicePitch - (inputRowPitch * (testHeight > 0 ? testHeight : 1))); srcMemoryStart = ptrOffset(srcMemoryStart, inputSlicePitch - (inputRowPitch * (testHeight > 0 ? testHeight : 1))); } delete[] readMemory; delete[] srcMemory; delete[] expected; } INSTANTIATE_TEST_CASE_P( AUBMapImage_simple, AUBMapImage, ::testing::Combine(::testing::Values( // formats CL_UNORM_INT8, CL_SIGNED_INT16, CL_UNSIGNED_INT32, CL_HALF_FLOAT, CL_FLOAT), ::testing::Values( // channels CL_R, CL_RG, CL_RGBA), ::testing::ValuesIn(mapImageParams))); enqueue_printf_kernel_aub_tests.cpp000066400000000000000000000032261422164147700355340ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/aub_tests/fixtures/aub_fixture.h" #include "opencl/test/unit_test/fixtures/hello_world_kernel_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "command_enqueue_fixture.h" using namespace NEO; class AUBPrintfKernelFixture : public AUBFixture, public HelloWorldKernelFixture, public testing::Test { public: using HelloWorldKernelFixture::SetUp; void SetUp() override { AUBFixture::SetUp(nullptr); ASSERT_NE(nullptr, device.get()); HelloWorldKernelFixture::SetUp(device.get(), programFile, kernelName); } void TearDown() override { if (IsSkipped()) { return; } HelloWorldKernelFixture::TearDown(); AUBFixture::TearDown(); } const char *programFile = "printf"; const char *kernelName = "test_printf_number"; }; HWTEST_F(AUBPrintfKernelFixture, GivenPrintfKernelThenEnqueuingSucceeds) { ASSERT_NE(nullptr, pKernel); size_t offset[3] = {0, 0, 0}; size_t gws[3] = {4, 1, 1}; size_t lws[3] = {4, 1, 1}; std::unique_ptr buffer(BufferHelper>::create(pContext)); const uint32_t number = 4; *(reinterpret_cast(buffer->getCpuAddressForMemoryTransfer())) = number; cl_mem bufferMem = buffer.get(); pKernel->setArg( 0, sizeof(cl_mem), &bufferMem); pCmdQ->enqueueKernel(pKernel, 1, offset, gws, lws, 0, 0, 0); pCmdQ->finish(); }enqueue_read_buffer_aub_tests.cpp000066400000000000000000000173361422164147700351450ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/ptr_math.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/test_configuration/aub_tests/aub_tests_configuration.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/aub_tests/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include using namespace NEO; struct ReadBufferHw : public CommandEnqueueAUBFixture, public ::testing::WithParamInterface, public ::testing::Test { void SetUp() override { CommandEnqueueAUBFixture::SetUp(); } void TearDown() override { CommandEnqueueAUBFixture::TearDown(); } }; typedef ReadBufferHw AUBReadBuffer; HWTEST_P(AUBReadBuffer, WhenReadingBufferThenExpectationsAreMet) { MockContext context(this->pClDevice); cl_float srcMemory[] = {1.0f, 2.0f, 3.0f, 4.0f}; cl_float destMemory[] = {0.0f, 0.0f, 0.0f, 0.0f}; auto retVal = CL_INVALID_VALUE; auto srcBuffer = std::unique_ptr(Buffer::create( &context, CL_MEM_USE_HOST_PTR, sizeof(srcMemory), srcMemory, retVal)); ASSERT_NE(nullptr, srcBuffer.get()); auto pSrcMemory = &srcMemory[0]; auto pDestMemory = &destMemory[0]; cl_bool blockingRead = CL_FALSE; size_t offset = GetParam(); size_t sizeWritten = sizeof(cl_float); cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; GraphicsAllocation *allocation = createResidentAllocationAndStoreItInCsr(pDestMemory, sizeof(destMemory)); srcBuffer->forceDisallowCPUCopy = true; retVal = pCmdQ->enqueueReadBuffer( srcBuffer.get(), blockingRead, offset, sizeWritten, pDestMemory, nullptr, numEventsInWaitList, eventWaitList, event); EXPECT_EQ(CL_SUCCESS, retVal); allocation = pCommandStreamReceiver->getTemporaryAllocations().peekHead(); while (allocation && allocation->getUnderlyingBuffer() != pDestMemory) { allocation = allocation->next; } retVal = pCmdQ->flush(); EXPECT_EQ(CL_SUCCESS, retVal); pSrcMemory = ptrOffset(pSrcMemory, offset); cl_float *destGpuaddress = reinterpret_cast(allocation->getGpuAddress()); // Compute our memory expecations based on kernel execution size_t sizeUserMemory = sizeof(destMemory); AUBCommandStreamFixture::expectMemory(destGpuaddress, pSrcMemory, sizeWritten); // If the copykernel wasn't max sized, ensure we didn't overwrite existing memory if (offset + sizeWritten < sizeUserMemory) { pDestMemory = ptrOffset(pDestMemory, sizeWritten); destGpuaddress = ptrOffset(destGpuaddress, sizeWritten); size_t sizeRemaining = sizeUserMemory - sizeWritten - offset; AUBCommandStreamFixture::expectMemory(destGpuaddress, pDestMemory, sizeRemaining); } } INSTANTIATE_TEST_CASE_P(AUBReadBuffer_simple, AUBReadBuffer, ::testing::Values( 0 * sizeof(cl_float), 1 * sizeof(cl_float), 2 * sizeof(cl_float), 3 * sizeof(cl_float))); HWTEST_F(AUBReadBuffer, GivenReserveCanonicalGpuAddressWhenReadingBufferThenExpectationsAreMet) { if (!GetAubTestsConfig().testCanonicalAddress) { return; } MockContext context(this->pClDevice); cl_float srcMemory[] = {1.0f, 2.0f, 3.0f, 4.0f}; cl_float dstMemory[] = {0.0f, 0.0f, 0.0f, 0.0f}; GraphicsAllocation *srcAllocation = new MockGraphicsAllocation(0, AllocationType::UNKNOWN, srcMemory, 0xFFFF800400001000, 0xFFFF800400001000, sizeof(srcMemory), MemoryPool::MemoryNull, MemoryManager::maxOsContextCount); std::unique_ptr srcBuffer(Buffer::createBufferHw( &context, ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_USE_HOST_PTR, 0, 0, &context.getDevice(0)->getDevice()), CL_MEM_USE_HOST_PTR, 0, sizeof(srcMemory), srcAllocation->getUnderlyingBuffer(), srcMemory, GraphicsAllocationHelper::toMultiGraphicsAllocation(srcAllocation), false, false, false)); ASSERT_NE(nullptr, srcBuffer); srcBuffer->forceDisallowCPUCopy = true; auto retVal = pCmdQ->enqueueReadBuffer(srcBuffer.get(), CL_FALSE, 0, sizeof(dstMemory), dstMemory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pCmdQ->flush(); EXPECT_EQ(CL_SUCCESS, retVal); GraphicsAllocation *dstAllocation = createResidentAllocationAndStoreItInCsr(dstMemory, sizeof(dstMemory)); cl_float *dstGpuAddress = reinterpret_cast(dstAllocation->getGpuAddress()); AUBCommandStreamFixture::expectMemory(dstGpuAddress, srcMemory, sizeof(dstMemory)); } struct AUBReadBufferUnaligned : public CommandEnqueueAUBFixture, public ::testing::Test { void SetUp() override { CommandEnqueueAUBFixture::SetUp(); } void TearDown() override { CommandEnqueueAUBFixture::TearDown(); } template void testReadBufferUnaligned(size_t offset, size_t size) { MockContext context(pCmdQ->getDevice().getSpecializedDevice()); char srcMemory[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; const auto bufferSize = sizeof(srcMemory); char dstMemory[bufferSize] = {0}; auto retVal = CL_INVALID_VALUE; auto buffer = std::unique_ptr(Buffer::create( &context, CL_MEM_USE_HOST_PTR, bufferSize, srcMemory, retVal)); ASSERT_NE(nullptr, buffer); buffer->forceDisallowCPUCopy = true; // Map destination memory to GPU GraphicsAllocation *allocation = createResidentAllocationAndStoreItInCsr(dstMemory, bufferSize); auto dstMemoryGPUPtr = reinterpret_cast(allocation->getGpuAddress()); // Do unaligned read retVal = pCmdQ->enqueueReadBuffer( buffer.get(), CL_FALSE, offset, size, ptrOffset(dstMemory, offset), nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pCmdQ->flush(); EXPECT_EQ(CL_SUCCESS, retVal); // Check the memory AUBCommandStreamFixture::expectMemory(ptrOffset(dstMemoryGPUPtr, offset), ptrOffset(srcMemory, offset), size); } }; HWTEST_F(AUBReadBufferUnaligned, GivenOffestAndSizeWhenReadingBufferThenExpectationsAreMet) { const std::vector offsets = {0, 1, 2, 3}; const std::vector sizes = {4, 3, 2, 1}; for (auto offset : offsets) { for (auto size : sizes) { testReadBufferUnaligned(offset, size); } } } enqueue_read_buffer_rect_aub_tests.cpp000066400000000000000000000140461422164147700361550ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/ptr_math.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/aub_tests/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" using namespace NEO; struct ReadBufferRectHw : public CommandEnqueueAUBFixture, public ::testing::WithParamInterface>, public ::testing::Test { void SetUp() override { CommandEnqueueAUBFixture::SetUp(); } void TearDown() override { CommandEnqueueAUBFixture::TearDown(); } }; typedef ReadBufferRectHw AUBReadBufferRect; static const size_t width = 10; HWTEST_P(AUBReadBufferRect, Given3dWhenReadingBufferThenExpectationsAreMet) { MockContext context(this->pClDevice); size_t rowPitch = width; size_t slicePitch = rowPitch * rowPitch; size_t bufferSizeBuff = rowPitch * rowPitch * rowPitch; size_t bufferSize = alignUp(bufferSizeBuff, 4096); size_t zHostOffs; size_t zBuffOffs; std::tie(zBuffOffs, zHostOffs) = GetParam(); ASSERT_LT(zBuffOffs, width); ASSERT_LT(zHostOffs, width); uint8_t *srcMemory = (uint8_t *)::alignedMalloc(bufferSize, 4096); uint8_t *destMemory = (uint8_t *)::alignedMalloc(bufferSize, 4096); for (unsigned int i = 0; i < bufferSize; i++) srcMemory[i] = i; memset(destMemory, 0x00, bufferSize); auto retVal = CL_INVALID_VALUE; auto srcBuffer = std::unique_ptr(Buffer::create( &context, CL_MEM_USE_HOST_PTR, bufferSize, srcMemory, retVal)); ASSERT_NE(nullptr, srcBuffer); cl_bool blockingRead = CL_FALSE; createResidentAllocationAndStoreItInCsr(destMemory, bufferSize); size_t bufferOrigin[] = {0, 0, zBuffOffs}; size_t hostOrigin[] = {0, 0, zHostOffs}; size_t region[] = {rowPitch, rowPitch, 1}; retVal = pCmdQ->enqueueReadBufferRect( srcBuffer.get(), blockingRead, bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, destMemory, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pCmdQ->flush(); EXPECT_EQ(CL_SUCCESS, retVal); char *ptr = new char[slicePitch]; memset(ptr, 0, slicePitch); for (unsigned int i = 0; i < rowPitch; i++) { //one slice will be copied from src. all others should be zeros if (i == zHostOffs) { AUBCommandStreamFixture::expectMemory(destMemory + slicePitch * i, srcMemory + slicePitch * zBuffOffs, slicePitch); } else { AUBCommandStreamFixture::expectMemory(destMemory + slicePitch * i, ptr, slicePitch); } } delete[] ptr; ::alignedFree(srcMemory); ::alignedFree(destMemory); } INSTANTIATE_TEST_CASE_P(AUBReadBufferRect_simple, AUBReadBufferRect, ::testing::Combine( ::testing::Values(0, 1, 2, 3, 4), ::testing::Values(0, 1, 2, 3, 4))); struct AUBReadBufferRectUnaligned : public CommandEnqueueAUBFixture, public ::testing::Test { void SetUp() override { CommandEnqueueAUBFixture::SetUp(); } void TearDown() override { CommandEnqueueAUBFixture::TearDown(); } template void testReadBufferUnaligned(size_t offset, size_t size) { MockContext context(pCmdQ->getDevice().getSpecializedDevice()); char srcMemory[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; const auto bufferSize = sizeof(srcMemory); void *dstMemory = alignedMalloc(bufferSize, MemoryConstants::pageSize); memset(dstMemory, 0, bufferSize); char referenceMemory[bufferSize] = {0}; auto retVal = CL_INVALID_VALUE; auto buffer = std::unique_ptr(Buffer::create( &context, CL_MEM_COPY_HOST_PTR, bufferSize, srcMemory, retVal)); ASSERT_NE(nullptr, buffer); buffer->forceDisallowCPUCopy = true; // Map destination memory to GPU GraphicsAllocation *allocation = createResidentAllocationAndStoreItInCsr(dstMemory, bufferSize); auto dstMemoryGPUPtr = reinterpret_cast(allocation->getGpuAddress()); cl_bool blockingRead = CL_FALSE; size_t rowPitch = bufferSize / 4; size_t slicePitch = 4 * rowPitch; size_t bufferOrigin[] = {0, 1, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {size, 1, 1}; retVal = pCmdQ->enqueueReadBufferRect( buffer.get(), blockingRead, bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, ptrOffset(dstMemory, offset), 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pCmdQ->flush(); EXPECT_EQ(CL_SUCCESS, retVal); AUBCommandStreamFixture::expectMemory(dstMemoryGPUPtr, referenceMemory, offset); AUBCommandStreamFixture::expectMemory(ptrOffset(dstMemoryGPUPtr, offset), &srcMemory[rowPitch * bufferOrigin[1]], size); AUBCommandStreamFixture::expectMemory(ptrOffset(dstMemoryGPUPtr, size + offset), referenceMemory, bufferSize - offset - size); pCmdQ->finish(); alignedFree(dstMemory); } }; HWTEST_F(AUBReadBufferRectUnaligned, GivenMisalignedHostPtrWhenReadingBufferThenExpectationAreMet) { const std::vector offsets = {0, 1, 2, 3}; const std::vector sizes = {4, 3, 2, 1}; for (auto offset : offsets) { for (auto size : sizes) { testReadBufferUnaligned(offset, size); } } } enqueue_read_image_aub_tests.cpp000066400000000000000000000334021422164147700347460ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/device/device.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/memory_manager/os_agnostic_memory_manager.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/aub_tests/fixtures/image_aub_fixture.h" using namespace NEO; struct ReadImageParams { cl_mem_object_type imageType; size_t offsets[3]; } readImageParams[] = { {CL_MEM_OBJECT_IMAGE1D, {0u, 0u, 0u}}, {CL_MEM_OBJECT_IMAGE1D, {1u, 0u, 0u}}, {CL_MEM_OBJECT_IMAGE2D, {0u, 0u, 0u}}, {CL_MEM_OBJECT_IMAGE2D, {1u, 2u, 0u}}, {CL_MEM_OBJECT_IMAGE3D, {0u, 0u, 0u}}, {CL_MEM_OBJECT_IMAGE3D, {1u, 2u, 3u}}, }; template struct AUBReadImage : public ImageAubFixture, public ::testing::WithParamInterface>, public ::testing::Test { void SetUp() override { ImageAubFixture::SetUp(enableBlitter); } void TearDown() override { srcImage.reset(); ImageAubFixture::TearDown(); } template void testReadImageUnaligned() { const auto testWidth = 5u; const auto testHeight = std::get<2>(GetParam()).imageType != CL_MEM_OBJECT_IMAGE1D ? 5u : 1u; const auto testDepth = std::get<2>(GetParam()).imageType == CL_MEM_OBJECT_IMAGE3D ? 5u : 1u; auto numPixels = testWidth * testHeight * testDepth; cl_image_format imageFormat; cl_image_desc imageDesc; // clang-format off imageFormat.image_channel_data_type = std::get<0>(GetParam()); imageFormat.image_channel_order = std::get<1>(GetParam()); imageDesc.image_type = std::get<2>(GetParam()).imageType; imageDesc.image_width = testWidth; imageDesc.image_height = testHeight; imageDesc.image_depth = testDepth; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; // clang-format on auto perChannelDataSize = 0u; switch (imageFormat.image_channel_data_type) { case CL_UNORM_INT8: perChannelDataSize = 1u; break; case CL_SIGNED_INT16: case CL_HALF_FLOAT: perChannelDataSize = 2u; break; case CL_UNSIGNED_INT32: case CL_FLOAT: perChannelDataSize = 4u; break; } auto numChannels = 0u; switch (imageFormat.image_channel_order) { case CL_R: numChannels = 1u; break; case CL_RG: numChannels = 2u; break; case CL_RGBA: numChannels = 4u; break; } size_t elementSize = perChannelDataSize * numChannels; size_t rowPitch = testWidth * elementSize; size_t slicePitch = rowPitch * testHeight; // Generate initial dst memory but make it unaligned to page size auto dstMemoryAligned = alignedMalloc(4 + elementSize * numPixels, MemoryConstants::pageSize); auto dstMemoryUnaligned = ptrOffset(reinterpret_cast(dstMemoryAligned), 4); auto sizeMemory = testWidth * alignUp(testHeight, 4) * testDepth * elementSize; auto srcMemoryAligned = alignedMalloc(sizeMemory, 4); auto srcMemory = reinterpret_cast(srcMemoryAligned); for (auto i = 0u; i < sizeMemory; ++i) { srcMemory[i] = static_cast(i); } memset(dstMemoryUnaligned, 0xFF, numPixels * elementSize); cl_mem_flags flags = CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto retVal = CL_INVALID_VALUE; srcImage.reset(Image::create( context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, srcMemory, retVal)); ASSERT_NE(nullptr, srcImage.get()); auto origin = std::get<2>(GetParam()).offsets; // Only draw 1/4 of the original image const size_t region[3] = {std::max(testWidth / 2, 1u), std::max(testHeight / 2, 1u), std::max(testDepth / 2, 1u)}; retVal = pCmdQ->enqueueReadImage( srcImage.get(), CL_TRUE, origin, region, rowPitch, slicePitch, dstMemoryUnaligned, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pCmdQ->finish(); EXPECT_EQ(CL_SUCCESS, retVal); auto imageMemory = srcMemory; auto memoryPool = srcImage->getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex())->getMemoryPool(); bool isGpuCopy = srcImage->isTiledAllocation() || !MemoryPool::isSystemMemoryPool(memoryPool); if (!isGpuCopy) { imageMemory = reinterpret_cast(srcImage->getCpuAddress()); } auto offset = (origin[2] * testWidth * testHeight + origin[1] * testWidth + origin[0]) * elementSize; auto pSrcMemory = ptrOffset(imageMemory, offset); auto pDstMemory = dstMemoryUnaligned; for (auto depth = origin[2] + 1; depth < (origin[2] + region[2]); ++depth) { for (size_t row = 0; row < region[1]; ++row) { size_t length = region[0] * elementSize; AUBCommandStreamFixture::expectMemory(pDstMemory, pSrcMemory, length); pDstMemory = ptrOffset(pDstMemory, length); length = (testWidth - region[0]) * elementSize; AUBCommandStreamFixture::expectMemory(pDstMemory, pDstMemory, length); pDstMemory = ptrOffset(pDstMemory, length); pSrcMemory = ptrOffset(pSrcMemory, testWidth * elementSize); } size_t remainingRows = testHeight - region[1]; while (remainingRows > 0) { size_t length = testHeight * elementSize; AUBCommandStreamFixture::expectMemory(pDstMemory, pDstMemory, length); pDstMemory = ptrOffset(pDstMemory, length); --remainingRows; } pDstMemory = ptrOffset(dstMemoryUnaligned, testWidth * testHeight * elementSize); } alignedFree(dstMemoryAligned); alignedFree(srcMemoryAligned); } template void testReadImageMisaligned(size_t offset, size_t size, size_t pixelSize) { const size_t testWidth = 14 / pixelSize; const size_t testHeight = 4; const size_t testDepth = 1; auto numPixels = testWidth * testHeight * testDepth; cl_image_format imageFormat; cl_image_desc imageDesc; imageFormat.image_channel_data_type = CL_UNSIGNED_INT8; switch (pixelSize) { case 1: imageFormat.image_channel_order = CL_R; break; case 2: imageFormat.image_channel_order = CL_RG; break; case 3: ASSERT_TRUE(false); break; case 4: imageFormat.image_channel_order = CL_RGBA; break; } imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = testWidth; imageDesc.image_height = testHeight; imageDesc.image_depth = testDepth; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; auto dstMemoryAligned = alignedMalloc(pixelSize * numPixels, MemoryConstants::cacheLineSize); memset(dstMemoryAligned, 0x0, pixelSize * numPixels); auto srcMemoryAligned = alignedMalloc(4 + pixelSize * numPixels, 4); auto srcMemoryUnaligned = reinterpret_cast(ptrOffset(srcMemoryAligned, 4)); for (auto i = 0u; i < pixelSize * numPixels; ++i) { srcMemoryUnaligned[i] = static_cast(i); } cl_mem_flags flags = CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, pClDevice->getHardwareInfo().capabilityTable.supportsOcl21Features); auto retVal = CL_INVALID_VALUE; auto image = std::unique_ptr(Image::create( context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, srcMemoryUnaligned, retVal)); ASSERT_NE(nullptr, image); EXPECT_FALSE(image->isMemObjZeroCopy()); auto csr = enableBlitter ? pCmdQ->getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS) : pCommandStreamReceiver; auto graphicsAllocation = csr->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, pixelSize * numPixels}, dstMemoryAligned); csr->makeResidentHostPtrAllocation(graphicsAllocation); csr->getInternalAllocationStorage()->storeAllocation(std::unique_ptr(graphicsAllocation), TEMPORARY_ALLOCATION); auto dstMemoryGPUPtr = reinterpret_cast(graphicsAllocation->getGpuAddress()); const size_t origin[3] = {0, 1, 0}; const size_t region[3] = {size, 1, 1}; size_t inputRowPitch = testWidth * pixelSize; size_t inputSlicePitch = inputRowPitch * testHeight; retVal = pCmdQ->enqueueReadImage( image.get(), CL_FALSE, origin, region, inputRowPitch, inputSlicePitch, ptrOffset(dstMemoryAligned, offset), nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); pCmdQ->finish(); std::vector referenceMemory(pixelSize * numPixels, 0x0); AUBCommandStreamFixture::expectMemory(dstMemoryGPUPtr, referenceMemory.data(), offset); AUBCommandStreamFixture::expectMemory(ptrOffset(dstMemoryGPUPtr, offset), &srcMemoryUnaligned[inputRowPitch * origin[1]], size * pixelSize); AUBCommandStreamFixture::expectMemory(ptrOffset(dstMemoryGPUPtr, offset + size * pixelSize), referenceMemory.data(), pixelSize * numPixels - offset - size * pixelSize); alignedFree(dstMemoryAligned); alignedFree(srcMemoryAligned); } std::unique_ptr srcImage; }; using AUBReadImageCCS = AUBReadImage; HWTEST2_F(AUBReadImageCCS, GivenMisalignedHostPtrWhenReadingImageThenExpectationsAreMet, ImagesSupportedMatcher) { const std::vector pixelSizes = {1, 2, 4}; const std::vector offsets = {0, 4, 8, 12}; const std::vector sizes = {3, 2, 1}; for (auto pixelSize : pixelSizes) { for (auto offset : offsets) { for (auto size : sizes) { testReadImageMisaligned(offset, size, pixelSize); } } } } HWTEST2_P(AUBReadImageCCS, GivenUnalignedMemoryWhenReadingImageThenExpectationsAreMet, ImagesSupportedMatcher) { testReadImageUnaligned(); } INSTANTIATE_TEST_CASE_P( AUBReadImage_simple, AUBReadImageCCS, ::testing::Combine(::testing::Values( // formats CL_UNORM_INT8, CL_SIGNED_INT16, CL_UNSIGNED_INT32, CL_HALF_FLOAT, CL_FLOAT), ::testing::Values( // channels CL_R, CL_RG, CL_RGBA), ::testing::ValuesIn(readImageParams))); using AUBReadImageBCS = AUBReadImage; HWTEST2_F(AUBReadImageBCS, GivenMisalignedHostPtrWhenReadingImageWithBlitterEnabledThenExpectationsAreMet, ImagesSupportedMatcher) { const std::vector pixelSizes = {1, 2, 4}; const std::vector offsets = {0, 4, 8, 12}; const std::vector sizes = {3, 2, 1}; for (auto pixelSize : pixelSizes) { for (auto offset : offsets) { for (auto size : sizes) { testReadImageMisaligned(offset, size, pixelSize); ASSERT_EQ(pCmdQ->peekLatestSentEnqueueOperation(), EnqueueProperties::Operation::Blit); } } } } HWTEST2_P(AUBReadImageBCS, GivenUnalignedMemoryWhenReadingImageWithBlitterEnabledThenExpectationsAreMet, ImagesSupportedMatcher) { if (std::get<2>(GetParam()).imageType == CL_MEM_OBJECT_IMAGE3D && !(HwInfoConfig::get(defaultHwInfo->platform.eProductFamily)->isTile64With3DSurfaceOnBCSSupported(*defaultHwInfo))) { GTEST_SKIP(); } testReadImageUnaligned(); ASSERT_EQ(pCmdQ->peekLatestSentEnqueueOperation(), EnqueueProperties::Operation::Blit); } INSTANTIATE_TEST_CASE_P( AUBReadImage_simple, AUBReadImageBCS, ::testing::Combine(::testing::Values( // formats CL_UNORM_INT8, CL_SIGNED_INT16, CL_UNSIGNED_INT32, CL_HALF_FLOAT, CL_FLOAT), ::testing::Values( // channels CL_R, CL_RG, CL_RGBA), ::testing::ValuesIn(readImageParams))); enqueue_verify_memory_buffer_aub_tests.cpp000066400000000000000000000110001422164147700371040ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/api/api.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/aub_tests/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" using namespace NEO; struct VerifyMemoryBufferHw : public CommandEnqueueAUBFixture, public ::testing::TestWithParam> { void SetUp() override { CommandEnqueueAUBFixture::SetUp(); } void TearDown() override { CommandEnqueueAUBFixture::TearDown(); } }; size_t testDataSizeTable[] = { 16, MemoryConstants::megaByte}; cl_mem_flags testFlagsTable[] = { 0, CL_MEM_COPY_HOST_PTR}; HWTEST_P(VerifyMemoryBufferHw, givenDifferentBuffersWhenValidatingMemoryThenSuccessIsReturned) { cl_uint testItem = 5; cl_uint testItemWrong1 = 4; cl_uint testItemWrong2 = 6; auto testItemSize = sizeof(testItem); const auto testDataSize = std::get<0>(GetParam()); EXPECT_FALSE(testDataSize < testItemSize); const auto flags = std::get<1>(GetParam()); const auto usesHostPointer = ((flags & CL_MEM_USE_HOST_PTR) || (flags & CL_MEM_COPY_HOST_PTR)); DebugManagerStateRestore restore; DebugManager.flags.DisableZeroCopyForBuffers.set(true); std::unique_ptr bufferContent(new uint8_t[testDataSize]); std::unique_ptr validContent(new uint8_t[testDataSize]); std::unique_ptr invalidContent1(new uint8_t[testDataSize]); std::unique_ptr invalidContent2(new uint8_t[testDataSize]); auto pTestItem = reinterpret_cast(&testItem); for (size_t offset = 0; offset < testDataSize; offset += testItemSize) { for (size_t itemOffset = 0; itemOffset < testItemSize; itemOffset++) { bufferContent.get()[offset + itemOffset] = pTestItem[itemOffset]; validContent.get()[offset + itemOffset] = pTestItem[itemOffset]; invalidContent1.get()[offset + itemOffset] = pTestItem[itemOffset]; invalidContent2.get()[offset + itemOffset] = pTestItem[itemOffset]; } } // set last item for invalid contents auto pTestItemWrong1 = reinterpret_cast(&testItemWrong1); auto pTestItemWrong2 = reinterpret_cast(&testItemWrong2); size_t offset = testDataSize - testItemSize; for (size_t itemOffset = 0; itemOffset < testItemSize; itemOffset++) { invalidContent1.get()[offset + itemOffset] = pTestItemWrong1[itemOffset]; invalidContent2.get()[offset + itemOffset] = pTestItemWrong2[itemOffset]; } MockContext context(this->pCmdQ->getDevice().getSpecializedDevice()); cl_int retVal = CL_INVALID_VALUE; std::unique_ptr buffer(Buffer::create( &context, flags, testDataSize, (usesHostPointer ? bufferContent.get() : nullptr), retVal)); EXPECT_NE(nullptr, buffer); if (!usesHostPointer) { retVal = pCmdQ->enqueueFillBuffer( buffer.get(), &testItem, testItemSize, 0, testDataSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } auto mappedAddress = clEnqueueMapBuffer(pCmdQ, buffer.get(), CL_FALSE, CL_MAP_READ, 0, testDataSize, 0, nullptr, nullptr, nullptr); clFlush(pCmdQ); retVal = clEnqueueVerifyMemoryINTEL(pCmdQ, mappedAddress, validContent.get(), testDataSize, CL_MEM_COMPARE_EQUAL); EXPECT_EQ(CL_SUCCESS, retVal); if (UnitTestHelper::isExpectMemoryNotEqualSupported()) { retVal = clEnqueueVerifyMemoryINTEL(pCmdQ, mappedAddress, invalidContent1.get(), testDataSize, CL_MEM_COMPARE_NOT_EQUAL); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueVerifyMemoryINTEL(pCmdQ, mappedAddress, invalidContent2.get(), testDataSize, CL_MEM_COMPARE_NOT_EQUAL); EXPECT_EQ(CL_SUCCESS, retVal); } clFinish(pCmdQ); } INSTANTIATE_TEST_CASE_P(VerifyMemoryBuffer, VerifyMemoryBufferHw, ::testing::Combine( ::testing::ValuesIn(testDataSizeTable), ::testing::ValuesIn(testFlagsTable))); enqueue_verify_memory_image_aub_tests.cpp000066400000000000000000000120641422164147700367300ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/memory_manager/os_agnostic_memory_manager.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/api/api.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/aub_tests/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" using namespace NEO; struct TestOffset { size_t offset[3]; }; struct VerifyMemoryImageHw : public CommandEnqueueAUBFixture, public ::testing::TestWithParam { void SetUp() override { CommandEnqueueAUBFixture::SetUp(); if (!pDevice->getDeviceInfo().imageSupport) { GTEST_SKIP(); } } void TearDown() override { CommandEnqueueAUBFixture::TearDown(); } }; TestOffset testInput[] = { {{0, 0, 0}}, {{1, 2, 3}}, {{3, 2, 1}}, {{5, 5, 5}}}; HWTEST_P(VerifyMemoryImageHw, givenDifferentImagesWhenValidatingMemoryThenSuccessIsReturned) { cl_image_format imageFormat; cl_image_desc imageDesc; // clang-format off imageFormat.image_channel_data_type = CL_UNSIGNED_INT32; imageFormat.image_channel_order = CL_RGBA; imageDesc.image_type = CL_MEM_OBJECT_IMAGE3D; imageDesc.image_width = 10; imageDesc.image_height = 19; imageDesc.image_depth = 7; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; // clang-format on // data per channel multplied by number of channels size_t elementSize = 16; cl_mem_flags flags = CL_MEM_READ_ONLY; auto surfaceFormat = Image:: getSurfaceFormatFromTable(flags, &imageFormat, pClDevice->getHardwareInfo().capabilityTable.supportsOcl21Features); auto retVal = CL_INVALID_VALUE; std::unique_ptr image(Image::create( context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); EXPECT_NE(nullptr, image); auto sizeMemory = image->getSize(); EXPECT_GT(sizeMemory, 0u); std::unique_ptr srcMemory(new uint8_t[elementSize]); memset(srcMemory.get(), 0xAB, elementSize); memset(image->getCpuAddress(), 0xAB, sizeMemory); const size_t *origin = GetParam().offset; const size_t region[] = { imageDesc.image_width - origin[0], imageDesc.image_height - origin[1], imageDesc.image_depth - origin[2]}; uint32_t fillValues[] = {0x3f800000, 0x00000000, 0x3f555555, 0x3f2aaaaa}; retVal = pCmdQ->enqueueFillImage( image.get(), fillValues, origin, region, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); size_t imgOrigin[] = {0, 0, 0}; size_t imgRegion[] = {imageDesc.image_width, imageDesc.image_height, imageDesc.image_depth}; size_t mappedRowPitch; size_t mappedSlicePitch; auto mappedAddress = clEnqueueMapImage(pCmdQ, image.get(), CL_TRUE, CL_MAP_READ, imgOrigin, imgRegion, &mappedRowPitch, &mappedSlicePitch, 0, nullptr, nullptr, &retVal); auto pImageData = reinterpret_cast(mappedAddress); for (size_t z = 0; z < imageDesc.image_depth; ++z) { for (size_t y = 0; y < imageDesc.image_height; ++y) { for (size_t x = 0; x < imageDesc.image_width; ++x) { void *validData = srcMemory.get(); void *invalidData = fillValues; if (z >= origin[2] && z < (origin[2] + region[2]) && y >= origin[1] && y < (origin[1] + region[1]) && x >= origin[0] && x < (origin[0] + region[0])) { std::swap(validData, invalidData); } retVal = clEnqueueVerifyMemoryINTEL(pCmdQ, &pImageData[x * elementSize], validData, elementSize, CL_MEM_COMPARE_EQUAL); EXPECT_EQ(CL_SUCCESS, retVal); if (UnitTestHelper::isExpectMemoryNotEqualSupported()) { retVal = clEnqueueVerifyMemoryINTEL(pCmdQ, &pImageData[x * elementSize], invalidData, elementSize, CL_MEM_COMPARE_NOT_EQUAL); EXPECT_EQ(CL_SUCCESS, retVal); } } pImageData = ptrOffset(pImageData, mappedRowPitch); } pImageData = ptrOffset(pImageData, mappedSlicePitch - (mappedRowPitch * imageDesc.image_height)); } } INSTANTIATE_TEST_CASE_P(VerifyMemoryImage, VerifyMemoryImageHw, ::testing::ValuesIn(testInput)); enqueue_write_buffer_aub_tests.cpp000066400000000000000000000123201422164147700353500ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/device/device.h" #include "shared/source/helpers/ptr_math.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/aub_tests/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" using namespace NEO; struct WriteBufferHw : public CommandEnqueueAUBFixture, public ::testing::WithParamInterface, public ::testing::Test { void SetUp() override { CommandEnqueueAUBFixture::SetUp(); } void TearDown() override { CommandEnqueueAUBFixture::TearDown(); } }; typedef WriteBufferHw AUBWriteBuffer; HWTEST_P(AUBWriteBuffer, WhenWritingBufferThenExpectationsAreMet) { MockContext context(this->pCmdQ->getDevice().getSpecializedDevice()); cl_float *srcMemory = new float[1024]; cl_float *destMemory = new float[1024]; cl_float *zeroMemory = new float[1024]; for (int i = 0; i < 1024; i++) { srcMemory[i] = (float)i + 1.0f; destMemory[i] = 0; zeroMemory[i] = 0; } auto retVal = CL_INVALID_VALUE; auto dstBuffer = Buffer::create( &context, CL_MEM_USE_HOST_PTR, 1024 * sizeof(float), destMemory, retVal); ASSERT_NE(nullptr, dstBuffer); auto pSrcMemory = &srcMemory[0]; cl_bool blockingWrite = CL_TRUE; size_t offset = GetParam(); size_t sizeWritten = sizeof(cl_float); cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; dstBuffer->forceDisallowCPUCopy = true; retVal = pCmdQ->enqueueWriteBuffer( dstBuffer, blockingWrite, offset, sizeWritten, pSrcMemory, nullptr, numEventsInWaitList, eventWaitList, event); auto pDestMemory = reinterpret_cast((dstBuffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress())); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal); // Compute our memory expecations based on kernel execution size_t sizeUserMemory = 1024 * sizeof(float); auto pVal = ptrOffset(pDestMemory, offset); AUBCommandStreamFixture::expectMemory(pVal, pSrcMemory, sizeWritten); // if offset provided, check the beginning if (offset > 0) { AUBCommandStreamFixture::expectMemory(pDestMemory, zeroMemory, offset); } // If the copykernel wasn't max sized, ensure we didn't overwrite existing memory if (offset + sizeWritten < sizeUserMemory) { pDestMemory = ptrOffset(pVal, sizeWritten); size_t sizeRemaining = sizeUserMemory - sizeWritten - offset; AUBCommandStreamFixture::expectMemory(pDestMemory, zeroMemory, sizeRemaining); } delete dstBuffer; delete[] srcMemory; delete[] destMemory; delete[] zeroMemory; } INSTANTIATE_TEST_CASE_P(AUBWriteBuffer_simple, AUBWriteBuffer, ::testing::Values( 0 * sizeof(cl_float), 1 * sizeof(cl_float), 2 * sizeof(cl_float), 3 * sizeof(cl_float))); struct AUBWriteBufferUnaligned : public CommandEnqueueAUBFixture, public ::testing::Test { void SetUp() override { CommandEnqueueAUBFixture::SetUp(); } void TearDown() override { CommandEnqueueAUBFixture::TearDown(); } template void testWriteBufferUnaligned(size_t offset, size_t size) { MockContext context(pCmdQ->getDevice().getSpecializedDevice()); char srcMemory[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; const auto bufferSize = sizeof(srcMemory); char dstMemory[bufferSize] = {0}; auto retVal = CL_INVALID_VALUE; auto buffer = std::unique_ptr(Buffer::create( &context, CL_MEM_USE_HOST_PTR, bufferSize, dstMemory, retVal)); ASSERT_NE(nullptr, buffer); buffer->forceDisallowCPUCopy = true; // Do unaligned write retVal = pCmdQ->enqueueWriteBuffer( buffer.get(), CL_TRUE, offset, size, ptrOffset(srcMemory, offset), nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); // Check the memory auto bufferGPUPtr = reinterpret_cast((buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress())); AUBCommandStreamFixture::expectMemory(ptrOffset(bufferGPUPtr, offset), ptrOffset(srcMemory, offset), size); } }; HWTEST_F(AUBWriteBufferUnaligned, GivenOffsetAndSizeWhenWritingBufferThenExpectationsAreMet) { const std::vector offsets = {0, 1, 2, 3}; const std::vector sizes = {4, 3, 2, 1}; for (auto offset : offsets) { for (auto size : sizes) { testWriteBufferUnaligned(offset, size); } } } enqueue_write_buffer_rect_aub_tests.cpp000066400000000000000000000135261422164147700363760ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/device/device.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/ptr_math.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/aub_tests/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" using namespace NEO; struct WriteBufferRectHw : public CommandEnqueueAUBFixture, public ::testing::WithParamInterface>, public ::testing::Test { void SetUp() override { CommandEnqueueAUBFixture::SetUp(); } void TearDown() override { CommandEnqueueAUBFixture::TearDown(); } }; typedef WriteBufferRectHw AUBWriteBufferRect; static const size_t width = 10; HWTEST_P(AUBWriteBufferRect, Given3dWhenWritingBufferThenExpectationsAreMet) { MockContext context(this->pClDevice); size_t rowPitch = width; size_t slicePitch = rowPitch * rowPitch; size_t bufferSizeBuff = rowPitch * rowPitch * rowPitch; size_t bufferSize = alignUp(bufferSizeBuff, 4096); size_t zHostOffs; size_t zBuffOffs; std::tie(zBuffOffs, zHostOffs) = GetParam(); ASSERT_LT(zBuffOffs, width); ASSERT_LT(zHostOffs, width); uint8_t *srcMemory = (uint8_t *)::alignedMalloc(bufferSize, 4096); uint8_t *destMemory = (uint8_t *)::alignedMalloc(bufferSize, 4096); for (unsigned int i = 0; i < bufferSize; i++) srcMemory[i] = i; memset(destMemory, 0x00, bufferSize); auto retVal = CL_INVALID_VALUE; auto dstBuffer = std::unique_ptr(Buffer::create( &context, CL_MEM_USE_HOST_PTR, bufferSize, destMemory, retVal)); ASSERT_NE(nullptr, dstBuffer); uint8_t *pDestMemory = reinterpret_cast(dstBuffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress()); cl_bool blockingWrite = CL_TRUE; size_t bufferOrigin[] = {0, 0, zBuffOffs}; size_t hostOrigin[] = {0, 0, zHostOffs}; size_t region[] = {rowPitch, rowPitch, 1}; retVal = pCmdQ->enqueueWriteBufferRect( dstBuffer.get(), blockingWrite, bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, srcMemory, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); char *ptr = new char[slicePitch]; memset(ptr, 0, slicePitch); for (unsigned int i = 0; i < rowPitch; i++) { //one slice will be copied from src. all others should be zeros if (i == zBuffOffs) { AUBCommandStreamFixture::expectMemory(pDestMemory + slicePitch * i, srcMemory + slicePitch * zHostOffs, slicePitch); } else { AUBCommandStreamFixture::expectMemory(pDestMemory + slicePitch * i, ptr, slicePitch); } } delete[] ptr; ::alignedFree(srcMemory); ::alignedFree(destMemory); } INSTANTIATE_TEST_CASE_P(AUBWriteBufferRect_simple, AUBWriteBufferRect, ::testing::Combine( ::testing::Values(0, 1, 2, 3, 4), ::testing::Values(0, 1, 2, 3, 4))); struct AUBWriteBufferRectUnaligned : public CommandEnqueueAUBFixture, public ::testing::Test { void SetUp() override { CommandEnqueueAUBFixture::SetUp(); } void TearDown() override { CommandEnqueueAUBFixture::TearDown(); } template void testWriteBufferUnaligned(size_t offset, size_t size) { MockContext context(pCmdQ->getDevice().getSpecializedDevice()); char srcMemory[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; const auto bufferSize = sizeof(srcMemory); char dstMemory[bufferSize] = {0}; char referenceMemory[bufferSize] = {0}; auto retVal = CL_INVALID_VALUE; auto buffer = std::unique_ptr(Buffer::create( &context, CL_MEM_COPY_HOST_PTR, bufferSize, dstMemory, retVal)); ASSERT_NE(nullptr, buffer); buffer->forceDisallowCPUCopy = true; uint8_t *pDestMemory = reinterpret_cast(buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress()); cl_bool blockingWrite = CL_TRUE; size_t rowPitch = bufferSize / 4; size_t slicePitch = 4 * rowPitch; size_t bufferOrigin[] = {0, 1, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {size, 1, 1}; retVal = pCmdQ->enqueueWriteBufferRect( buffer.get(), blockingWrite, bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, ptrOffset(srcMemory, offset), 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); pCmdQ->finish(); AUBCommandStreamFixture::expectMemory(pDestMemory, referenceMemory, rowPitch); AUBCommandStreamFixture::expectMemory(pDestMemory + rowPitch * bufferOrigin[1], ptrOffset(srcMemory, offset), size); AUBCommandStreamFixture::expectMemory(pDestMemory + rowPitch * bufferOrigin[1] + size, referenceMemory, bufferSize - size - rowPitch); } }; HWTEST_F(AUBWriteBufferRectUnaligned, GivenMisalignedHostPtrWhenWritingBufferThenExpectationsAreMet) { const std::vector offsets = {0, 1, 2, 3}; const std::vector sizes = {4, 3, 2, 1}; for (auto offset : offsets) { for (auto size : sizes) { testWriteBufferUnaligned(offset, size); } } } enqueue_write_copy_read_buffer_aub_tests.cpp000066400000000000000000000100531422164147700373760ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/aub_tests/command_queue/enqueue_write_copy_read_buffer_aub_tests.h" #include "shared/source/memory_manager/allocations_list.h" #include "shared/test/common/test_macros/test.h" using namespace NEO; template void AubWriteCopyReadBuffer::runTest() { auto simulatedCsr = AUBFixture::getSimulatedCsr(); simulatedCsr->initializeEngine(); char srcMemoryInitial[] = {1, 2, 3, 4, 5, 6, 7, 8}; char dstMemoryInitial[] = {11, 12, 13, 14, 15, 16, 17, 18}; char srcMemoryToWrite[] = {1, 2, 3, 4, 5, 6, 7, 8}; char dstMemoryToWrite[] = {11, 12, 13, 14, 15, 16, 17, 18}; const size_t bufferSize = sizeof(srcMemoryInitial); static_assert(bufferSize == sizeof(dstMemoryInitial), ""); static_assert(bufferSize == sizeof(srcMemoryToWrite), ""); static_assert(bufferSize == sizeof(dstMemoryToWrite), ""); auto retVal = CL_INVALID_VALUE; auto srcBuffer = std::unique_ptr(Buffer::create( context, CL_MEM_COPY_HOST_PTR, bufferSize, srcMemoryInitial, retVal)); ASSERT_NE(nullptr, srcBuffer); auto dstBuffer = std::unique_ptr(Buffer::create( context, CL_MEM_COPY_HOST_PTR, bufferSize, dstMemoryInitial, retVal)); ASSERT_NE(nullptr, dstBuffer); simulatedCsr->writeMemory(*srcBuffer->getGraphicsAllocation(device->getRootDeviceIndex())); simulatedCsr->writeMemory(*dstBuffer->getGraphicsAllocation(device->getRootDeviceIndex())); expectMemory(AUBFixture::getGpuPointer(srcBuffer->getGraphicsAllocation(device->getRootDeviceIndex())), srcMemoryInitial, bufferSize); expectMemory(AUBFixture::getGpuPointer(dstBuffer->getGraphicsAllocation(device->getRootDeviceIndex())), dstMemoryInitial, bufferSize); cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; retVal = pCmdQ->enqueueWriteBuffer( srcBuffer.get(), true, 0, bufferSize, srcMemoryToWrite, nullptr, numEventsInWaitList, eventWaitList, event); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pCmdQ->enqueueWriteBuffer( dstBuffer.get(), true, 0, bufferSize, dstMemoryToWrite, nullptr, numEventsInWaitList, eventWaitList, event); EXPECT_EQ(CL_SUCCESS, retVal); expectMemory(AUBFixture::getGpuPointer(srcBuffer->getGraphicsAllocation(device->getRootDeviceIndex())), srcMemoryToWrite, bufferSize); expectMemory(AUBFixture::getGpuPointer(dstBuffer->getGraphicsAllocation(device->getRootDeviceIndex())), dstMemoryToWrite, bufferSize); retVal = pCmdQ->enqueueCopyBuffer( srcBuffer.get(), dstBuffer.get(), 0, 0, bufferSize, numEventsInWaitList, eventWaitList, event); EXPECT_EQ(CL_SUCCESS, retVal); pCmdQ->flush(); // Destination buffer should have src buffer content expectMemory(AUBFixture::getGpuPointer(dstBuffer->getGraphicsAllocation(device->getRootDeviceIndex())), srcMemoryToWrite, bufferSize); char hostPtrMemory[] = {0, 0, 0, 0, 0, 0, 0, 0}; ASSERT_EQ(bufferSize, sizeof(hostPtrMemory)); retVal = pCmdQ->enqueueReadBuffer( dstBuffer.get(), false, 0, bufferSize, hostPtrMemory, nullptr, numEventsInWaitList, eventWaitList, event); pCmdQ->flush(); GraphicsAllocation *allocation = csr->getTemporaryAllocations().peekHead(); while (allocation && allocation->getUnderlyingBuffer() != hostPtrMemory) { allocation = allocation->next; } expectMemory(AUBFixture::getGpuPointer(allocation), srcMemoryToWrite, bufferSize); } HWTEST_F(AubWriteCopyReadBuffer, givenTwoBuffersFilledWithPatternWhenSourceIsCopiedToDestinationThenDestinationDataValidates) { runTest(); } enqueue_write_copy_read_buffer_aub_tests.h000066400000000000000000000012011422164147700370360ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/ptr_math.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/aub_tests/fixtures/aub_fixture.h" using namespace NEO; struct AubWriteCopyReadBuffer : public AUBFixture, public ::testing::Test { void SetUp() override { AUBFixture::SetUp(nullptr); } void TearDown() override { AUBFixture::TearDown(); } template void runTest(); }; enqueue_write_image_aub_tests.cpp000066400000000000000000000347701422164147700351760ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/memory_manager/os_agnostic_memory_manager.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/aub_tests/fixtures/image_aub_fixture.h" using namespace NEO; struct WriteImageParams { cl_mem_object_type imageType; size_t offsets[3]; } writeImageParams[] = { {CL_MEM_OBJECT_IMAGE1D, {0u, 0u, 0u}}, {CL_MEM_OBJECT_IMAGE1D, {1u, 0u, 0u}}, {CL_MEM_OBJECT_IMAGE2D, {0u, 0u, 0u}}, {CL_MEM_OBJECT_IMAGE2D, {1u, 2u, 0u}}, {CL_MEM_OBJECT_IMAGE3D, {0u, 0u, 0u}}, {CL_MEM_OBJECT_IMAGE3D, {1u, 2u, 3u}}, }; template struct AUBWriteImage : public ImageAubFixture, public ::testing::WithParamInterface>, public ::testing::Test { void SetUp() override { ImageAubFixture::SetUp(enableBlitter); } void TearDown() override { dstImage.reset(); ImageAubFixture::TearDown(); } template void testWriteImageUnaligned() { const auto testWidth = 5u; const auto testHeight = std::get<2>(GetParam()).imageType != CL_MEM_OBJECT_IMAGE1D ? 5u : 1u; const auto testDepth = std::get<2>(GetParam()).imageType == CL_MEM_OBJECT_IMAGE3D ? 5u : 1u; auto numPixels = testWidth * testHeight * testDepth; cl_image_format imageFormat; cl_image_desc imageDesc; // clang-format off imageFormat.image_channel_data_type = std::get<0>(GetParam()); imageFormat.image_channel_order = std::get<1>(GetParam()); imageDesc.image_type = std::get<2>(GetParam()).imageType; imageDesc.image_width = testWidth; imageDesc.image_height = testHeight; imageDesc.image_depth = testDepth; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; // clang-format on auto perChannelDataSize = 0u; switch (imageFormat.image_channel_data_type) { case CL_UNORM_INT8: perChannelDataSize = 1u; break; case CL_SIGNED_INT16: case CL_HALF_FLOAT: perChannelDataSize = 2u; break; case CL_UNSIGNED_INT32: case CL_FLOAT: perChannelDataSize = 4u; break; } auto numChannels = 0u; switch (imageFormat.image_channel_order) { case CL_R: numChannels = 1u; break; case CL_RG: numChannels = 2u; break; case CL_RGBA: numChannels = 4u; break; } size_t elementSize = perChannelDataSize * numChannels; size_t inputRowPitch = testWidth * elementSize; size_t inputSlicePitch = inputRowPitch * testHeight; // Generate initial src memory but make it unaligned to page size auto srcMemoryAligned = alignedMalloc(4 + elementSize * numPixels, MemoryConstants::pageSize); auto srcMemoryUnaligned = ptrOffset(reinterpret_cast(srcMemoryAligned), 4); for (auto i = 0u; i < numPixels * elementSize; ++i) { srcMemoryUnaligned[i] = static_cast(i); } auto retVal = CL_INVALID_VALUE; cl_mem_flags flags = 0; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); dstImage.reset(Image::create( context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); ASSERT_NE(nullptr, dstImage.get()); memset(dstImage->getCpuAddress(), 0xFF, dstImage->getSize()); // init image - avoid writeImage inside createImage (for tiled img) auto origin = std::get<2>(GetParam()).offsets; // Only draw 1/4 of the original image const size_t region[3] = {std::max(testWidth / 2, 1u), std::max(testHeight / 2, 1u), std::max(testDepth / 2, 1u)}; retVal = pCmdQ->enqueueWriteImage( dstImage.get(), CL_TRUE, origin, region, inputRowPitch, inputSlicePitch, srcMemoryUnaligned, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); auto readMemory = new uint8_t[dstImage->getSize()]; size_t imgOrigin[] = {0, 0, 0}; size_t imgRegion[] = {testWidth, testHeight, testDepth}; retVal = pCmdQ->enqueueReadImage(dstImage.get(), CL_TRUE, imgOrigin, imgRegion, 0, 0, readMemory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pCmdQ->finish(); EXPECT_EQ(CL_SUCCESS, retVal); auto pDstMemory = readMemory; auto pSrcMemory = srcMemoryUnaligned; std::vector referenceMemory(elementSize, 0xFF); auto rowPitch = dstImage->getHostPtrRowPitch(); auto slicePitch = dstImage->getHostPtrSlicePitch(); for (size_t z = 0; z < testDepth; ++z) { for (size_t y = 0; y < testHeight; ++y) { for (size_t x = 0; x < testWidth; ++x) { auto pos = x * elementSize; if (z >= origin[2] && z < (origin[2] + region[2]) && y >= origin[1] && y < (origin[1] + region[1]) && x >= origin[0] && x < (origin[0] + region[0])) { // this texel should be updated AUBCommandStreamFixture::expectMemory(&pDstMemory[pos], pSrcMemory, elementSize); pSrcMemory = ptrOffset(pSrcMemory, elementSize); } else { AUBCommandStreamFixture::expectMemory(&pDstMemory[pos], referenceMemory.data(), elementSize); } } pDstMemory = ptrOffset(pDstMemory, rowPitch); if (y >= origin[1] && y < origin[1] + region[1] && z >= origin[2] && z < origin[2] + region[2]) { pSrcMemory = ptrOffset(pSrcMemory, inputRowPitch - (elementSize * region[0])); } } pDstMemory = ptrOffset(pDstMemory, slicePitch - (rowPitch * (testHeight > 0 ? testHeight : 1))); if (z >= origin[2] && z < origin[2] + region[2]) { pSrcMemory = ptrOffset(pSrcMemory, inputSlicePitch - (inputRowPitch * (region[1]))); } } alignedFree(srcMemoryAligned); delete[] readMemory; } template void testWriteImageMisaligned(size_t offset, size_t size, size_t pixelSize) { DebugManager.flags.ForceLinearImages.set(true); const size_t testWidth = 14 / pixelSize; const size_t testHeight = 4; const size_t testDepth = 1; auto numPixels = testWidth * testHeight * testDepth; cl_image_format imageFormat; cl_image_desc imageDesc; imageFormat.image_channel_data_type = CL_UNSIGNED_INT8; switch (pixelSize) { case 1: imageFormat.image_channel_order = CL_R; break; case 2: imageFormat.image_channel_order = CL_RG; break; case 3: ASSERT_TRUE(false); break; case 4: imageFormat.image_channel_order = CL_RGBA; break; } imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = testWidth; imageDesc.image_height = testHeight; imageDesc.image_depth = testDepth; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; auto srcMemoryAligned = alignedMalloc(4 + pixelSize * numPixels, MemoryConstants::cacheLineSize); memset(srcMemoryAligned, 0x0, 4 + pixelSize * numPixels); auto srcMemoryUnaligned = ptrOffset(reinterpret_cast(srcMemoryAligned), 4); //ensure non cacheline-aligned (but aligned to 4) hostPtr to create non-zerocopy image cl_mem_flags flags = CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, pClDevice->getHardwareInfo().capabilityTable.supportsOcl21Features); auto retVal = CL_INVALID_VALUE; auto image = std::unique_ptr(Image::create( context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, srcMemoryUnaligned, retVal)); ASSERT_NE(nullptr, image); EXPECT_FALSE(image->isMemObjZeroCopy()); for (auto i = 0u; i < pixelSize * numPixels; ++i) { srcMemoryUnaligned[i] = static_cast(i); } auto dstMemoryGPUPtr = reinterpret_cast(image->getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex())->getGpuAddress()); const size_t origin[3] = {0, 1, 0}; // write first row const size_t region[3] = {size, 1, 1}; // write only "size" number of pixels size_t inputRowPitch = testWidth * pixelSize; size_t inputSlicePitch = inputRowPitch * testHeight; retVal = pCmdQ->enqueueWriteImage( image.get(), CL_TRUE, origin, region, inputRowPitch, inputSlicePitch, ptrOffset(srcMemoryUnaligned, offset), nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); pCmdQ->finish(); EXPECT_EQ(CL_SUCCESS, retVal); auto imageRowPitch = image->getImageDesc().image_row_pitch; std::vector referenceMemory(inputRowPitch * pixelSize, 0x0); AUBCommandStreamFixture::expectMemory(dstMemoryGPUPtr, referenceMemory.data(), inputRowPitch); // validate zero row is not written AUBCommandStreamFixture::expectMemory(ptrOffset(dstMemoryGPUPtr, imageRowPitch), &srcMemoryUnaligned[offset], size * pixelSize); // validate first row is written with correct data AUBCommandStreamFixture::expectMemory(ptrOffset(dstMemoryGPUPtr, imageRowPitch + size * pixelSize), referenceMemory.data(), inputRowPitch - size * pixelSize); // validate the remaining bytes of first row are not written for (uint32_t row = 2; row < testHeight; row++) { AUBCommandStreamFixture::expectMemory(ptrOffset(dstMemoryGPUPtr, row * imageRowPitch), referenceMemory.data(), inputRowPitch); // validate the remaining rows are not written } alignedFree(srcMemoryAligned); } std::unique_ptr dstImage; }; using AUBWriteImageCCS = AUBWriteImage; HWTEST2_F(AUBWriteImageCCS, GivenMisalignedHostPtrWhenWritingImageThenExpectationsAreMet, ImagesSupportedMatcher) { const std::vector pixelSizes = {1, 2, 4}; const std::vector offsets = {0, 4, 8, 12}; const std::vector sizes = {3, 2, 1}; for (auto pixelSize : pixelSizes) { for (auto offset : offsets) { for (auto size : sizes) { testWriteImageMisaligned(offset, size, pixelSize); } } } } HWTEST2_P(AUBWriteImageCCS, GivenUnalignedMemoryWhenWritingImageThenExpectationsAreMet, ImagesSupportedMatcher) { testWriteImageUnaligned(); } INSTANTIATE_TEST_CASE_P(AUBWriteImage_simple, AUBWriteImageCCS, ::testing::Combine(::testing::Values( // formats CL_UNORM_INT8, CL_SIGNED_INT16, CL_UNSIGNED_INT32, CL_HALF_FLOAT, CL_FLOAT), ::testing::Values( // channels CL_R, CL_RG, CL_RGBA), ::testing::ValuesIn(writeImageParams))); using AUBWriteImageBCS = AUBWriteImage; HWTEST2_F(AUBWriteImageBCS, GivenMisalignedHostPtrWhenWritingImageWithBlitterEnabledThenExpectationsAreMet, ImagesSupportedMatcher) { const std::vector pixelSizes = {1, 2, 4}; const std::vector offsets = {0, 4, 8, 12}; const std::vector sizes = {3, 2, 1}; for (auto pixelSize : pixelSizes) { for (auto offset : offsets) { for (auto size : sizes) { testWriteImageMisaligned(offset, size, pixelSize); ASSERT_EQ(pCmdQ->peekLatestSentEnqueueOperation(), EnqueueProperties::Operation::Blit); } } } } HWTEST2_P(AUBWriteImageBCS, GivenUnalignedMemoryWhenWritingImageWithBlitterEnabledThenExpectationsAreMet, ImagesSupportedMatcher) { if (std::get<2>(GetParam()).imageType == CL_MEM_OBJECT_IMAGE3D && !(HwInfoConfig::get(defaultHwInfo->platform.eProductFamily)->isTile64With3DSurfaceOnBCSSupported(*defaultHwInfo))) { GTEST_SKIP(); } testWriteImageUnaligned(); ASSERT_EQ(pCmdQ->peekLatestSentEnqueueOperation(), EnqueueProperties::Operation::Blit); } INSTANTIATE_TEST_CASE_P(AUBWriteImage_simple, AUBWriteImageBCS, ::testing::Combine(::testing::Values( // formats CL_UNORM_INT8, CL_SIGNED_INT16, CL_UNSIGNED_INT32, CL_HALF_FLOAT, CL_FLOAT), ::testing::Values( // channels CL_R, CL_RG, CL_RGBA), ::testing::ValuesIn(writeImageParams))); multi_tile_buffers_aub_tests_xehp_and_later.cpp000066400000000000000000000061771422164147700400730ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/constants.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "opencl/extensions/public/cl_ext_private.h" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/aub_tests/fixtures/aub_fixture.h" #include "opencl/test/unit_test/aub_tests/fixtures/multicontext_aub_fixture.h" #include struct MultiTileBuffersXeHPAndLater : public MulticontextAubFixture, public ::testing::Test { static constexpr uint32_t numTiles = 2; void SetUp() override { MulticontextAubFixture::SetUp(numTiles, EnabledCommandStreamers::Single, false); } void TearDown() override { MulticontextAubFixture::TearDown(); } }; HWCMDTEST_F(IGFX_XE_HP_CORE, MultiTileBuffersXeHPAndLater, givenTwoBuffersAllocatedOnDifferentTilesWhenCopiedThenDataValidates) { if constexpr (is64bit) { constexpr size_t bufferSize = 64 * 1024u; char bufferTile0Memory[bufferSize] = {}; char bufferTile1Memory[bufferSize] = {}; for (auto index = 0u; index < bufferSize; index++) { bufferTile0Memory[index] = index % 255; bufferTile1Memory[index] = index % 255; } auto retVal = CL_INVALID_VALUE; cl_mem_flags flags = CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR; MemoryProperties memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()); memoryProperties.pDevice = &context->getDevice(1)->getDevice(); auto srcBuffer = std::unique_ptr(Buffer::create(context.get(), memoryProperties, flags, 0, bufferSize, bufferTile0Memory, retVal)); ASSERT_NE(nullptr, srcBuffer); flags = CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR; memoryProperties.pDevice = &context->getDevice(2)->getDevice(); auto dstBuffer = std::unique_ptr(Buffer::create(context.get(), memoryProperties, flags, 0, bufferSize, bufferTile1Memory, retVal)); ASSERT_NE(nullptr, dstBuffer); auto cmdQ = commandQueues[0][0].get(); expectMemory(AUBFixture::getGpuPointer(srcBuffer->getGraphicsAllocation(rootDeviceIndex)), bufferTile0Memory, bufferSize, 0, 0); expectMemory(AUBFixture::getGpuPointer(dstBuffer->getGraphicsAllocation(rootDeviceIndex)), bufferTile1Memory, bufferSize, 0, 0); cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; retVal = cmdQ->enqueueCopyBuffer(srcBuffer.get(), dstBuffer.get(), 0, 0, bufferSize, numEventsInWaitList, eventWaitList, event); EXPECT_EQ(CL_SUCCESS, retVal); cmdQ->flush(); expectMemory(AUBFixture::getGpuPointer(dstBuffer->getGraphicsAllocation(rootDeviceIndex)), bufferTile0Memory, bufferSize, 0, 0); } } single_tile_products_excludes.cpp000066400000000000000000000065061422164147700352130ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" HWTEST_EXCLUDE_PRODUCT(FourTilesAllContextsTest, GENERATEONLY_givenFourTilesAndAllContextsWhenSubmittingThenDataIsValid, IGFX_XE_HPG_CORE); HWTEST_EXCLUDE_PRODUCT(FourTilesDualContextTest, HEAVY_givenFourTilesAndDualContextWhenSubmittingThenDataIsValid, IGFX_XE_HPG_CORE); HWTEST_EXCLUDE_PRODUCT(FourTilesSingleContextTest, givenFourTilesAndSingleContextWhenSubmittingThenDataIsValid, IGFX_XE_HPG_CORE); HWTEST_EXCLUDE_PRODUCT(DynamicWalkerPartitionFourTilesTests, whenWalkerPartitionIsEnabledForKernelWithAtomicThenOutputDataIsValid, IGFX_XE_HPG_CORE); HWTEST_EXCLUDE_PRODUCT(DynamicWalkerPartitionFourTilesTests, whenWalkerPartitionIsEnabledForKernelWithoutAtomicThenOutputDataIsValid, IGFX_XE_HPG_CORE); HWTEST_EXCLUDE_PRODUCT(TwoTilesAllContextsTest, HEAVY_givenTwoTilesAndAllContextsWhenSubmittingThenDataIsValid, IGFX_XE_HPG_CORE); HWTEST_EXCLUDE_PRODUCT(TwoTilesDualContextTest, givenTwoTilesAndDualContextWhenSubmittingThenDataIsValid, IGFX_XE_HPG_CORE); HWTEST_EXCLUDE_PRODUCT(TwoTilesSingleContextTest, givenTwoTilesAndSingleContextWhenSubmittingThenDataIsValid, IGFX_XE_HPG_CORE); HWTEST_EXCLUDE_PRODUCT(TwoTilesSingleContextTest, givenTwoTilesAndSingleContextWhenWritingImageThenDataIsValid, IGFX_XE_HPG_CORE); HWTEST_EXCLUDE_PRODUCT(TwoTilesDualContextTest, givenTwoTilesAndDualContextWhenWritingImageThenDataIsValid, IGFX_XE_HPG_CORE); HWTEST_EXCLUDE_PRODUCT(TwoTilesAllContextsTest, GENERATEONLY_givenTwoTilesAndAllContextsWhenWritingImageThenDataIsValid, IGFX_XE_HPG_CORE); HWTEST_EXCLUDE_PRODUCT(FourTilesSingleContextTest, givenFourTilesAndSingleContextWhenWritingImageThenDataIsValid, IGFX_XE_HPG_CORE); HWTEST_EXCLUDE_PRODUCT(FourTilesDualContextTest, GENERATEONLY_givenFourTilesAndDualContextWhenWritingImageThenDataIsValid, IGFX_XE_HPG_CORE); HWTEST_EXCLUDE_PRODUCT(FourTilesAllContextsTest, GENERATEONLY_givenFourTilesAndAllContextsWhenWritingImageThenDataIsValid, IGFX_XE_HPG_CORE); HWTEST_EXCLUDE_PRODUCT(OneVAFourPhysicalStoragesTest, givenBufferWithFourPhysicalStoragesWhenEnqueueReadBufferThenReadFromCorrectBank, IGFX_XE_HPG_CORE); HWTEST_EXCLUDE_PRODUCT(OneVAFourPhysicalStoragesTest, givenBufferWithFourPhysicalStoragesWhenEnqueueWriteBufferThenCorrectMemoryIsWrittenToSpecificBank, IGFX_XE_HPG_CORE); HWTEST_EXCLUDE_PRODUCT(OneVAFourPhysicalStoragesTest, givenColouredBufferWhenEnqueueWriteBufferThenCorrectMemoryIsWrittenToSpecificBank, IGFX_XE_HPG_CORE); HWTEST_EXCLUDE_PRODUCT(MultiTileBuffersXeHPAndLater, givenTwoBuffersAllocatedOnDifferentTilesWhenCopiedThenDataValidates, IGFX_XE_HPG_CORE); HWTEST_EXCLUDE_PRODUCT(StaticWalkerPartitionFourTilesTests, givenFourTilesWhenStaticWalkerPartitionIsEnabledForKernelThenOutputDataIsValid, IGFX_XE_HPG_CORE); HWTEST_EXCLUDE_PRODUCT(StaticWalkerPartitionFourTilesTests, givenPreWalkerSyncWhenStaticWalkerPartitionIsThenAtomicsAreIncrementedCorrectly, IGFX_XE_HPG_CORE); HWTEST_EXCLUDE_PRODUCT(StaticWalkerPartitionFourTilesTests, whenNoPreWalkerSyncThenAtomicsAreIncrementedCorrectly, IGFX_XE_HPG_CORE); HWTEST_EXCLUDE_PRODUCT(SingleTileAllContextsTest, HEAVY_givenSingleTileAndAllContextsWhenWritingImageThenDataIsValid, IGFX_XE_HPG_CORE); HWTEST_EXCLUDE_PRODUCT(SingleTileAllContextsTest, GENERATEONLY_givenSingleTileAndAllContextsWhenSubmittingThenDataIsValid, IGFX_XE_HPG_CORE); compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/command_stream/000077500000000000000000000000001422164147700266115ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/command_stream/CMakeLists.txt000066400000000000000000000022741422164147700313560ustar00rootroot00000000000000# # Copyright (C) 2018-2022 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(igdrcl_aub_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_fixture.cpp ${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/aub_mem_dump_tests.h ${CMAKE_CURRENT_SOURCE_DIR}/aub_mem_dump_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/aub_mi_atomic_tests.cpp ) if(TESTS_XEHP_AND_LATER) target_sources(igdrcl_aub_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/aub_range_based_flush_tests_xehp_and_later.cpp ${CMAKE_CURRENT_SOURCE_DIR}/aub_walker_partition_tests_xehp_and_later.cpp ${CMAKE_CURRENT_SOURCE_DIR}/copy_engine_aub_tests_xehp_and_later.h ${CMAKE_CURRENT_SOURCE_DIR}/copy_engine_aub_tests_xehp_and_later.cpp ) endif() if(TESTS_DG2_AND_LATER) target_sources(igdrcl_aub_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/mi_math_aub_tests_dg2_and_later.cpp ) endif() add_subdirectories() aub_command_stream_fixture.cpp000066400000000000000000000037701422164147700346330ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/command_stream/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/aub_tests/command_stream/aub_command_stream_fixture.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/tbx_command_stream_receiver.h" #include "shared/source/device/device.h" #include "shared/source/helpers/api_specific_config.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/helpers/memory_management.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/unit_test/tests_configuration.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/test/unit_test/aub_tests/fixtures/aub_fixture.h" #include "gtest/gtest.h" namespace NEO { void AUBCommandStreamFixture::SetUp(CommandQueue *pCmdQ) { ASSERT_NE(pCmdQ, nullptr); auto &device = reinterpret_cast(pCmdQ->getDevice()); const auto &hwInfo = device.getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); const ::testing::TestInfo *const testInfo = ::testing::UnitTest::GetInstance()->current_test_info(); std::stringstream strfilename; auto engineType = pCmdQ->getGpgpuCommandStreamReceiver().getOsContext().getEngineType(); strfilename << ApiSpecificConfig::getAubPrefixForSpecificApi(); strfilename << testInfo->test_case_name() << "_" << testInfo->name() << "_" << hwHelper.getCsTraits(engineType).name; pCommandStreamReceiver = AUBFixture::prepareComputeEngine(device, strfilename.str()); ASSERT_NE(nullptr, pCommandStreamReceiver); AUBFixture::prepareCopyEngines(device, strfilename.str()); CommandStreamFixture::SetUp(pCmdQ); pTagMemory = pCommandStreamReceiver->getTagAddress(); this->commandQueue = pCmdQ; } void AUBCommandStreamFixture::TearDown() { CommandStreamFixture::TearDown(); } } // namespace NEO aub_command_stream_fixture.h000066400000000000000000000113671422164147700343010ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/command_stream/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/aub_mem_dump/aub_mem_dump.h" #include "shared/source/aub_mem_dump/page_table_entry_bits.h" #include "shared/source/command_stream/aub_command_stream_receiver_hw.h" #include "shared/source/command_stream/command_stream_receiver_with_aub_dump.h" #include "shared/source/command_stream/tbx_command_stream_receiver_hw.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/memory_banks.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/unit_test/tests_configuration.h" #include "opencl/test/unit_test/command_stream/command_stream_fixture.h" #include "gtest/gtest.h" #include namespace NEO { class CommandStreamReceiver; class AUBCommandStreamFixture : public CommandStreamFixture { public: virtual void SetUp(CommandQueue *pCommandQueue); void TearDown() override; template AUBCommandStreamReceiverHw *getAubCsr() const { CommandStreamReceiver *csr = pCommandStreamReceiver; if (testMode == TestMode::AubTestsWithTbx) { csr = static_cast> *>(csr)->aubCSR.get(); } return static_cast *>(csr); } template void expectMMIO(uint32_t mmioRegister, uint32_t expectedValue) { CommandStreamReceiver *csr = pCommandStreamReceiver; if (testMode == TestMode::AubTestsWithTbx) { csr = static_cast> *>(pCommandStreamReceiver)->aubCSR.get(); } if (csr) { // Write our pseudo-op to the AUB file auto aubCsr = static_cast *>(csr); aubCsr->expectMMIO(mmioRegister, expectedValue); } } template void expectMemory(void *gfxAddress, const void *srcAddress, size_t length) { CommandStreamReceiver *csr = pCommandStreamReceiver; if (testMode == TestMode::AubTestsWithTbx) { auto tbxCsr = static_cast *>(pCommandStreamReceiver); EXPECT_TRUE(tbxCsr->expectMemoryEqual(gfxAddress, srcAddress, length)); csr = static_cast> *>(pCommandStreamReceiver)->aubCSR.get(); } if (csr) { auto aubCsr = static_cast *>(csr); aubCsr->expectMemoryEqual(gfxAddress, srcAddress, length); } } template void expectMemoryNotEqual(void *gfxAddress, const void *srcAddress, size_t length) { CommandStreamReceiver *csr = pCommandStreamReceiver; if (testMode == TestMode::AubTestsWithTbx) { auto tbxCsr = static_cast *>(pCommandStreamReceiver); EXPECT_TRUE(tbxCsr->expectMemoryNotEqual(gfxAddress, srcAddress, length)); csr = static_cast> *>(pCommandStreamReceiver)->aubCSR.get(); } if (csr) { auto aubCsr = static_cast *>(csr); aubCsr->expectMemoryNotEqual(gfxAddress, srcAddress, length); } } template CommandStreamReceiverSimulatedCommonHw *getSimulatedCsr() const { return static_cast *>(pCommandStreamReceiver); } template void pollForCompletion() { getSimulatedCsr()->pollForCompletion(); } GraphicsAllocation *createResidentAllocationAndStoreItInCsr(const void *address, size_t size) { GraphicsAllocation *graphicsAllocation = pCommandStreamReceiver->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pCommandStreamReceiver->getRootDeviceIndex(), false, size}, address); pCommandStreamReceiver->makeResidentHostPtrAllocation(graphicsAllocation); pCommandStreamReceiver->getInternalAllocationStorage()->storeAllocation(std::unique_ptr(graphicsAllocation), TEMPORARY_ALLOCATION); return graphicsAllocation; } CommandStreamReceiver *pCommandStreamReceiver = nullptr; volatile uint32_t *pTagMemory = nullptr; private: CommandQueue *commandQueue = nullptr; }; } // namespace NEO aub_command_stream_tests.cpp000066400000000000000000000133441422164147700343050ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/command_stream/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_stream/aub_command_stream_receiver_hw.h" #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "aub_command_stream_fixture.h" #include using namespace NEO; struct AUBFixture : public AUBCommandStreamFixture, public CommandQueueFixture, public ClDeviceFixture { using AUBCommandStreamFixture::SetUp; using CommandQueueFixture::SetUp; void SetUp() { ClDeviceFixture::SetUp(); CommandQueueFixture::SetUp(nullptr, pClDevice, 0); AUBCommandStreamFixture::SetUp(pCmdQ); } void TearDown() override { AUBCommandStreamFixture::TearDown(); CommandQueueFixture::TearDown(); ClDeviceFixture::TearDown(); } template void testNoopIdXcs(aub_stream::EngineType engineType) { pCommandStreamReceiver->getOsContext().getEngineType() = engineType; typedef typename FamilyType::MI_NOOP MI_NOOP; auto pCmd = (MI_NOOP *)pCS->getSpace(sizeof(MI_NOOP) * 4); uint32_t noopId = 0xbaadd; auto noop = FamilyType::cmdInitNoop; *pCmd++ = noop; *pCmd++ = noop; *pCmd++ = noop; noop.TheStructure.Common.IdentificationNumberRegisterWriteEnable = true; noop.TheStructure.Common.IdentificationNumber = noopId; *pCmd++ = noop; CommandStreamReceiverHw::addBatchBufferEnd(*pCS, nullptr); EncodeNoop::alignToCacheLine(*pCS); BatchBuffer batchBuffer{pCS->getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, pCS->getUsed(), pCS, nullptr, false}; ResidencyContainer allocationsForResidency; pCommandStreamReceiver->flush(batchBuffer, allocationsForResidency); AUBCommandStreamFixture::getSimulatedCsr()->pollForCompletionImpl(); auto mmioBase = CommandStreamReceiverSimulatedCommonHw::getCsTraits(engineType).mmioBase; AUBCommandStreamFixture::expectMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2094), noopId); } }; typedef Test AUBcommandstreamTests; HWTEST_F(AUBcommandstreamTests, WhenFlushingTwiceThenCompletes) { CommandStreamReceiverHw::addBatchBufferEnd(*pCS, nullptr); EncodeNoop::alignToCacheLine(*pCS); BatchBuffer batchBuffer{pCS->getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, pCS->getUsed(), pCS, nullptr, false}; ResidencyContainer allocationsForResidency; pCommandStreamReceiver->flush(batchBuffer, allocationsForResidency); BatchBuffer batchBuffer2{pCS->getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, pCS->getUsed(), pCS, nullptr, false}; pCommandStreamReceiver->flush(batchBuffer2, allocationsForResidency); AUBCommandStreamFixture::getSimulatedCsr()->pollForCompletion(); } HWTEST_F(AUBcommandstreamTests, GivenRcsWhenTestingNoopIdThenAubIsCorrect) { testNoopIdXcs(aub_stream::ENGINE_RCS); } HWTEST_F(AUBcommandstreamTests, GivenBcsWhenTestingNoopIdThenAubIsCorrect) { testNoopIdXcs(aub_stream::ENGINE_BCS); } HWTEST_F(AUBcommandstreamTests, GivenVcsWhenTestingNoopIdThenAubIsCorrect) { testNoopIdXcs(aub_stream::ENGINE_VCS); } HWTEST_F(AUBcommandstreamTests, GivenVecsWhenTestingNoopIdThenAubIsCorrect) { testNoopIdXcs(aub_stream::ENGINE_VECS); } HWTEST_F(AUBcommandstreamTests, WhenCreatingResidentAllocationThenAllocationIsResident) { uint8_t buffer[0x10000]; size_t size = sizeof(buffer); getSimulatedCsr()->initializeEngine(); auto &commandStreamReceiver = pDevice->getGpgpuCommandStreamReceiver(); auto graphicsAllocation = createResidentAllocationAndStoreItInCsr(buffer, size); ResidencyContainer allocationsForResidency = {graphicsAllocation}; commandStreamReceiver.processResidency(allocationsForResidency, 0u); } HWTEST_F(AUBcommandstreamTests, GivenSingleAllocationWhenCreatingResidentAllocationThenAubIsCorrect) { uint32_t buffer = 0xdeadbeef; size_t size = sizeof(buffer); getSimulatedCsr()->initializeEngine(); auto graphicsAllocation = createResidentAllocationAndStoreItInCsr(&buffer, size); ResidencyContainer allocationsForResidency = {graphicsAllocation}; pCommandStreamReceiver->processResidency(allocationsForResidency, 0u); AUBCommandStreamFixture::expectMemory(&buffer, &buffer, size); } HWTEST_F(AUBcommandstreamTests, GivenMultipleAllocationsWhenCreatingResidentAllocationThenAubIsCorrect) { size_t sizeBuffer = 0x100001; auto buffer = new uint8_t[sizeBuffer]; for (size_t index = 0; index < sizeBuffer; ++index) { buffer[index] = static_cast(index); } getSimulatedCsr()->initializeEngine(); auto graphicsAllocation = createResidentAllocationAndStoreItInCsr(buffer, sizeBuffer); ResidencyContainer allocationsForResidency = {graphicsAllocation}; pCommandStreamReceiver->processResidency(allocationsForResidency, 0u); AUBCommandStreamFixture::expectMemory(buffer, buffer, sizeBuffer); delete[] buffer; } compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/command_stream/aub_mem_dump_tests.cpp000066400000000000000000000255701422164147700332020ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "aub_mem_dump_tests.h" #include "shared/source/aub/aub_helper.h" #include "shared/source/helpers/hw_helper.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_aub_csr.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" using NEO::ApiSpecificConfig; using NEO::AUBCommandStreamReceiver; using NEO::AUBCommandStreamReceiverHw; using NEO::AUBFamilyMapper; using NEO::ClDeviceFixture; using NEO::folderAUB; std::string getAubFileName(const NEO::Device *pDevice, const std::string baseName) { const auto pGtSystemInfo = &pDevice->getHardwareInfo().gtSystemInfo; std::stringstream strfilename; uint32_t subSlicesPerSlice = pGtSystemInfo->SubSliceCount / pGtSystemInfo->SliceCount; strfilename << hardwarePrefix[pDevice->getHardwareInfo().platform.eProductFamily] << "_" << pGtSystemInfo->SliceCount << "x" << subSlicesPerSlice << "x" << pGtSystemInfo->MaxEuPerSubSlice << "_" << baseName; return strfilename.str(); } TEST(PageTableTraits, when48BitTraitsAreUsedThenPageTableAddressesAreCorrect) { EXPECT_EQ(BIT(32), AubMemDump::PageTableTraits<48>::ptBaseAddress); EXPECT_EQ(BIT(31), AubMemDump::PageTableTraits<48>::pdBaseAddress); EXPECT_EQ(BIT(30), AubMemDump::PageTableTraits<48>::pdpBaseAddress); EXPECT_EQ(BIT(29), AubMemDump::PageTableTraits<48>::pml4BaseAddress); } TEST(PageTableTraits, when32BitTraitsAreUsedThenPageTableAddressesAreCorrect) { EXPECT_EQ(BIT(38), AubMemDump::PageTableTraits<32>::ptBaseAddress); EXPECT_EQ(BIT(37), AubMemDump::PageTableTraits<32>::pdBaseAddress); EXPECT_EQ(BIT(36), AubMemDump::PageTableTraits<32>::pdpBaseAddress); } typedef Test AubMemDumpTests; HWTEST_F(AubMemDumpTests, givenAubFileStreamWhenOpenAndCloseIsCalledThenFileNameIsReportedCorrectly) { AUBCommandStreamReceiver::AubFileStream aubFile; std::string fileName = "file_name.aub"; aubFile.open(fileName.c_str()); EXPECT_STREQ(fileName.c_str(), aubFile.getFileName().c_str()); aubFile.close(); EXPECT_STREQ("", aubFile.getFileName().c_str()); } HWTEST_F(AubMemDumpTests, GivenHeaderThenExpectationsAreMet) { typedef typename AUBFamilyMapper::AUB AUB; std::string filePath(folderAUB); std::string filenameWithPrefix = ApiSpecificConfig::getAubPrefixForSpecificApi(); filePath.append(Os::fileSeparator); filePath.append(getAubFileName(pDevice, filenameWithPrefix.append("header.aub"))); AUBCommandStreamReceiver::AubFileStream aubFile; aubFile.fileHandle.open(filePath.c_str(), std::ofstream::binary); // Header auto deviceId = pDevice->getHardwareInfo().capabilityTable.aubDeviceId; aubFile.init(AubMemDump::SteppingValues::A, deviceId); aubFile.fileHandle.close(); } HWTEST_F(AubMemDumpTests, GivenReserveMaxAddressThenExpectationsAreMet) { typedef typename AUBFamilyMapper::AUB AUB; std::string filePath(folderAUB); std::string filenameWithPrefix = ApiSpecificConfig::getAubPrefixForSpecificApi(); filePath.append(Os::fileSeparator); filePath.append(getAubFileName(pDevice, filenameWithPrefix.append("reserveMaxAddress.aub"))); AUBCommandStreamReceiver::AubFileStream aubFile; aubFile.fileHandle.open(filePath.c_str(), std::ofstream::binary); // Header auto hwInfo = pDevice->getHardwareInfo(); auto deviceId = hwInfo.capabilityTable.aubDeviceId; aubFile.init(AubMemDump::SteppingValues::A, deviceId); auto gAddress = static_cast(-1) - 4096; auto pAddress = static_cast(gAddress) & 0xFFFFFFFF; auto enableLocalMemory = HwHelper::get(hwInfo.platform.eRenderCoreFamily).getEnableLocalMemory(hwInfo); NEO::AubHelperHw aubHelperHw(enableLocalMemory); AUB::reserveAddressPPGTT(aubFile, gAddress, 4096, pAddress, 7, aubHelperHw); aubFile.fileHandle.close(); } HWTEST_F(AubMemDumpTests, GivenWriteVerifyOneBytePpgttThenExpectationsAreMet) { typedef typename AUBFamilyMapper::AUB AUB; std::string filePath(folderAUB); std::string filenameWithPrefix = ApiSpecificConfig::getAubPrefixForSpecificApi(); filePath.append(Os::fileSeparator); filePath.append(getAubFileName(pDevice, filenameWithPrefix.append("writeVerifyOneBytePPGTT.aub"))); AUBCommandStreamReceiver::AubFileStream aubFile; aubFile.fileHandle.open(filePath.c_str(), std::ofstream::binary); // Header auto deviceId = pDevice->getHardwareInfo().capabilityTable.aubDeviceId; aubFile.init(AubMemDump::SteppingValues::A, deviceId); uint8_t byte = 0xbf; auto gAddress = reinterpret_cast(&byte); uint64_t physAddress = reinterpret_cast(&byte) & 0xFFFFFFFF; NEO::AubHelperHw aubHelperHw(false); AUB::reserveAddressPPGTT(aubFile, gAddress, sizeof(byte), physAddress, 7, aubHelperHw); AUB::addMemoryWrite(aubFile, physAddress, &byte, sizeof(byte), AubMemDump::AddressSpaceValues::TraceNonlocal); aubFile.expectMemory(physAddress, &byte, sizeof(byte), AubMemDump::AddressSpaceValues::TraceNonlocal, AubMemDump::CmdServicesMemTraceMemoryCompare::CompareOperationValues::CompareEqual); aubFile.fileHandle.close(); } HWTEST_F(AubMemDumpTests, GivenWriteVerifyOneByteGgttThenExpectationsAreMet) { typedef typename AUBFamilyMapper::AUB AUB; std::string filePath(folderAUB); std::string filenameWithPrefix = ApiSpecificConfig::getAubPrefixForSpecificApi(); filePath.append(Os::fileSeparator); filePath.append(getAubFileName(pDevice, filenameWithPrefix.append("writeVerifyOneByteGGTT.aub"))); AUBCommandStreamReceiver::AubFileStream aubFile; aubFile.fileHandle.open(filePath.c_str(), std::ofstream::binary); // Header auto deviceId = pDevice->getHardwareInfo().capabilityTable.aubDeviceId; aubFile.init(AubMemDump::SteppingValues::A, deviceId); uint8_t byte = 0xbf; uint64_t physAddress = reinterpret_cast(&byte) & 0xFFFFFFFF; AubGTTData data = {true, false}; AUB::reserveAddressGGTT(aubFile, &byte, sizeof(byte), physAddress, data); AUB::addMemoryWrite(aubFile, physAddress, &byte, sizeof(byte), AubMemDump::AddressSpaceValues::TraceNonlocal); aubFile.expectMemory(physAddress, &byte, sizeof(byte), AubMemDump::AddressSpaceValues::TraceNonlocal, AubMemDump::CmdServicesMemTraceMemoryCompare::CompareOperationValues::CompareEqual); aubFile.fileHandle.close(); } HWTEST_F(AubMemDumpTests, GivenWriteVerifySevenBytesPpgttThenExpectationsAreMet) { typedef typename AUBFamilyMapper::AUB AUB; std::string filePath(folderAUB); std::string filenameWithPrefix = ApiSpecificConfig::getAubPrefixForSpecificApi(); filePath.append(Os::fileSeparator); filePath.append(getAubFileName(pDevice, filenameWithPrefix.append("writeVerifySevenBytesPPGTT.aub"))); AUBCommandStreamReceiver::AubFileStream aubFile; aubFile.fileHandle.open(filePath.c_str(), std::ofstream::binary); // Header auto deviceId = pDevice->getHardwareInfo().capabilityTable.aubDeviceId; aubFile.init(AubMemDump::SteppingValues::A, deviceId); uint8_t bytes[] = {0, 1, 2, 3, 4, 5, 6}; auto gAddress = reinterpret_cast(bytes); auto physAddress = reinterpret_cast(bytes) & 0xFFFFFFFF; NEO::AubHelperHw aubHelperHw(false); AUB::reserveAddressPPGTT(aubFile, gAddress, sizeof(bytes), physAddress, 7, aubHelperHw); AUB::addMemoryWrite(aubFile, physAddress, bytes, sizeof(bytes), AubMemDump::AddressSpaceValues::TraceNonlocal); aubFile.expectMemory(physAddress, bytes, sizeof(bytes), AubMemDump::AddressSpaceValues::TraceNonlocal, AubMemDump::CmdServicesMemTraceMemoryCompare::CompareOperationValues::CompareEqual); aubFile.fileHandle.close(); } HWTEST_F(AubMemDumpTests, GivenWriteVerifySevenBytesGgttThenExpectationsAreMet) { typedef typename AUBFamilyMapper::AUB AUB; std::string filePath(folderAUB); std::string filenameWithPrefix = ApiSpecificConfig::getAubPrefixForSpecificApi(); filePath.append(Os::fileSeparator); filePath.append(getAubFileName(pDevice, filenameWithPrefix.append("writeVerifySevenBytesGGTT.aub"))); AUBCommandStreamReceiver::AubFileStream aubFile; aubFile.fileHandle.open(filePath.c_str(), std::ofstream::binary); // Header auto deviceId = pDevice->getHardwareInfo().capabilityTable.aubDeviceId; aubFile.init(AubMemDump::SteppingValues::A, deviceId); uint8_t bytes[] = {0, 1, 2, 3, 4, 5, 6}; uint64_t physAddress = reinterpret_cast(bytes) & 0xFFFFFFFF; AubGTTData data = {true, false}; AUB::reserveAddressGGTT(aubFile, bytes, sizeof(bytes), physAddress, data); AUB::addMemoryWrite(aubFile, physAddress, bytes, sizeof(bytes), AubMemDump::AddressSpaceValues::TraceNonlocal); aubFile.expectMemory(physAddress, bytes, sizeof(bytes), AubMemDump::AddressSpaceValues::TraceNonlocal, AubMemDump::CmdServicesMemTraceMemoryCompare::CompareOperationValues::CompareEqual); aubFile.fileHandle.close(); } HWTEST_F(AubMemDumpTests, GivenRcsThenExpectationsAreMet) { setupAUB(pDevice, aub_stream::ENGINE_RCS); } HWTEST_F(AubMemDumpTests, GivenBcsThenExpectationsAreMet) { setupAUB(pDevice, aub_stream::ENGINE_BCS); } HWTEST_F(AubMemDumpTests, GivenVcsThenExpectationsAreMet) { setupAUB(pDevice, aub_stream::ENGINE_VCS); } HWTEST_F(AubMemDumpTests, GivenVecsThenExpectationsAreMet) { setupAUB(pDevice, aub_stream::ENGINE_VECS); } TEST(AubMemDumpBasic, givenDebugOverrideMmioWhenMmioNotMatchThenDoNotAlterValue) { DebugManagerStateRestore dbgRestore; uint32_t dbgOffset = 0x1000; uint32_t dbgValue = 0xDEAD; DebugManager.flags.AubDumpOverrideMmioRegister.set(static_cast(dbgOffset)); DebugManager.flags.AubDumpOverrideMmioRegisterValue.set(static_cast(dbgValue)); uint32_t offset = 0x2000; uint32_t value = 0x3000; MMIOPair mmio = std::make_pair(offset, value); MockAubFileStreamMockMmioWrite mockAubStream; mockAubStream.writeMMIO(offset, value); EXPECT_EQ(1u, mockAubStream.mmioList.size()); EXPECT_TRUE(mockAubStream.isOnMmioList(mmio)); } TEST(AubMemDumpBasic, givenDebugOverrideMmioWhenMmioMatchThenAlterValue) { DebugManagerStateRestore dbgRestore; uint32_t dbgOffset = 0x2000; uint32_t dbgValue = 0xDEAD; MMIOPair dbgMmio = std::make_pair(dbgOffset, dbgValue); DebugManager.flags.AubDumpOverrideMmioRegister.set(static_cast(dbgOffset)); DebugManager.flags.AubDumpOverrideMmioRegisterValue.set(static_cast(dbgValue)); uint32_t offset = 0x2000; uint32_t value = 0x3000; MockAubFileStreamMockMmioWrite mockAubStream; mockAubStream.writeMMIO(offset, value); EXPECT_EQ(1u, mockAubStream.mmioList.size()); EXPECT_TRUE(mockAubStream.isOnMmioList(dbgMmio)); } compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/command_stream/aub_mem_dump_tests.h000066400000000000000000000123371422164147700326440ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/aub_mem_dump/aub_mem_dump.h" #include "shared/source/command_stream/aub_command_stream_receiver_hw.h" #include "shared/source/device/device.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/api_specific_config.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/test_macros/test.h" #include "aub_mapper.h" namespace Os { extern const char *fileSeparator; } extern std::string getAubFileName(const NEO::Device *pDevice, const std::string baseName); template void setupAUB(const NEO::Device *pDevice, aub_stream::EngineType engineType) { typedef typename NEO::AUBFamilyMapper::AUB AUB; const auto &csTraits = NEO::CommandStreamReceiverSimulatedCommonHw::getCsTraits(engineType); auto mmioBase = csTraits.mmioBase; uint64_t physAddress = 0x10000; NEO::AUBCommandStreamReceiver::AubFileStream aubFile; std::string filePath(NEO::folderAUB); filePath.append(Os::fileSeparator); std::string baseName(NEO::ApiSpecificConfig::getAubPrefixForSpecificApi()); baseName.append("simple"); baseName.append(csTraits.name); baseName.append(".aub"); filePath.append(getAubFileName(pDevice, baseName)); aubFile.fileHandle.open(filePath.c_str(), std::ofstream::binary); // Header auto &hwInfo = pDevice->getHardwareInfo(); auto deviceId = hwInfo.capabilityTable.aubDeviceId; const auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily); aubFile.init(hwInfoConfig.getAubStreamSteppingFromHwRevId(hwInfo), deviceId); aubFile.writeMMIO(mmioBase + 0x229c, 0xffff8280); const size_t sizeHWSP = 0x1000; const size_t sizeRing = 0x4 * 0x1000; const size_t sizeTotal = alignUp((sizeHWSP + sizeRing + csTraits.sizeLRCA), 0x1000); const size_t alignTotal = sizeTotal; auto totalBuffer = alignedMalloc(sizeTotal, alignTotal); size_t totalBufferOffset = 0; auto pGlobalHWStatusPage = totalBuffer; totalBufferOffset += sizeHWSP; uint32_t ggttGlobalHardwareStatusPage = (uint32_t)((uintptr_t)pGlobalHWStatusPage); AubGTTData data = {true, false}; AUB::reserveAddressGGTT(aubFile, ggttGlobalHardwareStatusPage, sizeHWSP, physAddress, data); physAddress += sizeHWSP; aubFile.writeMMIO(mmioBase + 0x2080, ggttGlobalHardwareStatusPage); size_t sizeCommands = 0; auto pRing = ptrOffset(totalBuffer, totalBufferOffset); totalBufferOffset += sizeRing; auto ggttRing = (uint32_t)(uintptr_t)pRing; auto physRing = physAddress; physAddress += sizeRing; auto rRing = AUB::reserveAddressGGTT(aubFile, ggttRing, sizeRing, physRing, data); ASSERT_NE(static_cast(-1), rRing); EXPECT_EQ(rRing, physRing); uint32_t noopId = 0xbaadd; auto cur = (uint32_t *)pRing; using MI_NOOP = typename FamilyType::MI_NOOP; auto noop = FamilyType::cmdInitNoop; *cur++ = noop.TheStructure.RawData[0]; *cur++ = noop.TheStructure.RawData[0]; *cur++ = noop.TheStructure.RawData[0]; noop.TheStructure.Common.IdentificationNumberRegisterWriteEnable = true; noop.TheStructure.Common.IdentificationNumber = noopId; *cur++ = noop.TheStructure.RawData[0]; sizeCommands = ptrDiff(cur, pRing); AUB::addMemoryWrite(aubFile, physRing, pRing, sizeCommands, AubMemDump::AddressSpaceValues::TraceNonlocal, csTraits.aubHintCommandBuffer); auto sizeLRCA = csTraits.sizeLRCA; auto pLRCABase = ptrOffset(totalBuffer, totalBufferOffset); totalBufferOffset += csTraits.sizeLRCA; csTraits.initialize(pLRCABase); csTraits.setRingHead(pLRCABase, 0x0000); csTraits.setRingTail(pLRCABase, static_cast(sizeCommands)); csTraits.setRingBase(pLRCABase, ggttRing); auto ringCtrl = static_cast((sizeRing - 0x1000) | 1); csTraits.setRingCtrl(pLRCABase, ringCtrl); auto ggttLRCA = static_cast(reinterpret_cast(pLRCABase)); auto physLRCA = physAddress; physAddress += sizeLRCA; AUB::reserveAddressGGTT(aubFile, ggttLRCA, sizeLRCA, physLRCA, data); AUB::addMemoryWrite(aubFile, physLRCA, pLRCABase, sizeLRCA, AubMemDump::AddressSpaceValues::TraceNonlocal, csTraits.aubHintLRCA); typename AUB::MiContextDescriptorReg contextDescriptor = {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}; contextDescriptor.sData.Valid = true; contextDescriptor.sData.ForcePageDirRestore = false; contextDescriptor.sData.ForceRestore = false; contextDescriptor.sData.Legacy = true; contextDescriptor.sData.FaultSupport = 0; contextDescriptor.sData.PrivilegeAccessOrPPGTT = true; contextDescriptor.sData.ADor64bitSupport = AUB::Traits::addressingBits > 32; contextDescriptor.sData.LogicalRingCtxAddress = (uintptr_t)pLRCABase / 4096; contextDescriptor.sData.ContextID = 0; aubFile.writeMMIO(mmioBase + 0x2230, 0); aubFile.writeMMIO(mmioBase + 0x2230, 0); aubFile.writeMMIO(mmioBase + 0x2230, contextDescriptor.ulData[1]); aubFile.writeMMIO(mmioBase + 0x2230, contextDescriptor.ulData[0]); alignedFree(totalBuffer); aubFile.fileHandle.close(); } compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/command_stream/aub_mi_atomic_tests.cpp000066400000000000000000000163361422164147700333400ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/aub_command_stream_receiver_hw.h" #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/test/common/helpers/dispatch_flags_helper.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/aub_tests/fixtures/aub_fixture.h" #include using namespace NEO; struct MiAtomicAubFixture : public AUBFixture { void SetUp() override { AUBFixture::SetUp(nullptr); auto memoryManager = this->device->getMemoryManager(); AllocationProperties commandBufferProperties = {device->getRootDeviceIndex(), true, MemoryConstants::pageSize, AllocationType::COMMAND_BUFFER, false, device->getDeviceBitfield()}; streamAllocation = memoryManager->allocateGraphicsMemoryWithProperties(commandBufferProperties); ASSERT_NE(nullptr, streamAllocation); AllocationProperties deviceBufferProperties = {device->getRootDeviceIndex(), true, MemoryConstants::pageSize, AllocationType::BUFFER, false, device->getDeviceBitfield()}; deviceSurface = memoryManager->allocateGraphicsMemoryWithProperties(deviceBufferProperties); ASSERT_NE(nullptr, deviceSurface); AllocationProperties systemBufferProperties = {device->getRootDeviceIndex(), true, MemoryConstants::pageSize, AllocationType::SVM_CPU, false, device->getDeviceBitfield()}; systemSurface = memoryManager->allocateGraphicsMemoryWithProperties(systemBufferProperties); ASSERT_NE(nullptr, systemSurface); taskStream.replaceGraphicsAllocation(streamAllocation); taskStream.replaceBuffer(streamAllocation->getUnderlyingBuffer(), streamAllocation->getUnderlyingBufferSize()); } void TearDown() override { auto memoryManager = this->device->getMemoryManager(); memoryManager->freeGraphicsMemory(streamAllocation); memoryManager->freeGraphicsMemory(deviceSurface); memoryManager->freeGraphicsMemory(systemSurface); AUBFixture::TearDown(); } void flushStream() { DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.guardCommandBufferWithPipeControl = true; csr->makeResident(*deviceSurface); csr->makeResident(*systemSurface); csr->flushTask(taskStream, 0, &csr->getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 0u), &csr->getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 0u), &csr->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0u), 0u, dispatchFlags, device->getDevice()); csr->flushBatchedSubmissions(); } LinearStream taskStream; GraphicsAllocation *streamAllocation = nullptr; GraphicsAllocation *deviceSurface = nullptr; GraphicsAllocation *systemSurface = nullptr; }; using MiAtomicAubTest = Test; HWTEST_F(MiAtomicAubTest, WhenDispatchingAtomicMoveOperationThenExpectCorrectEndValues) { using MI_ATOMIC = typename FamilyType::MI_ATOMIC; auto atomicAddress = deviceSurface->getGpuAddress(); auto expectedGpuAddressDwordOp1 = atomicAddress; auto expectedGpuAddressDwordOp2 = expectedGpuAddressDwordOp1 + sizeof(uint32_t); auto expectedGpuAddressQwordOp3 = expectedGpuAddressDwordOp2 + sizeof(uint32_t); uint32_t operation1dword0 = 0x10; EncodeAtomic::programMiAtomic(taskStream, expectedGpuAddressDwordOp1, MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_MOVE, MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD, 0, 0, operation1dword0, 0u); uint32_t operation2dword0 = 0x22; EncodeAtomic::programMiAtomic(taskStream, expectedGpuAddressDwordOp2, MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_MOVE, MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD, 0, 0, operation2dword0, 0u); uint32_t operation3dword0 = 0xF0; uint32_t operation3dword1 = 0x1F; EncodeAtomic::programMiAtomic(taskStream, expectedGpuAddressQwordOp3, MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_8B_MOVE, MI_ATOMIC::DATA_SIZE::DATA_SIZE_QWORD, 0, 0, operation3dword0, operation3dword1); uint64_t operation3qword = (static_cast(operation3dword1) << 32) | operation3dword0; flushStream(); expectMemory(reinterpret_cast(expectedGpuAddressDwordOp1), &operation1dword0, sizeof(operation1dword0)); expectMemory(reinterpret_cast(expectedGpuAddressDwordOp2), &operation2dword0, sizeof(operation2dword0)); expectMemory(reinterpret_cast(expectedGpuAddressQwordOp3), &operation3qword, sizeof(operation3qword)); } HWTEST_F(MiAtomicAubTest, GivenSystemMemoryWhenDispatchingAtomicMove4BytesOperationThenExpectCorrectEndValues) { using MI_ATOMIC = typename FamilyType::MI_ATOMIC; auto atomicAddress = systemSurface->getGpuAddress(); auto expectedGpuAddressDwordOp1 = atomicAddress; auto expectedGpuAddressDwordOp2 = expectedGpuAddressDwordOp1 + sizeof(uint32_t); uint32_t operation1dword0 = 0x15; EncodeAtomic::programMiAtomic(taskStream, expectedGpuAddressDwordOp1, MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_MOVE, MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD, 0, 0, operation1dword0, 0u); uint32_t operation2dword0 = 0xFF; EncodeAtomic::programMiAtomic(taskStream, expectedGpuAddressDwordOp2, MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_MOVE, MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD, 0, 0, operation2dword0, 0u); flushStream(); expectMemory(reinterpret_cast(expectedGpuAddressDwordOp1), &operation1dword0, sizeof(operation1dword0)); expectMemory(reinterpret_cast(expectedGpuAddressDwordOp2), &operation2dword0, sizeof(operation2dword0)); } aub_range_based_flush_tests_xehp_and_later.cpp000066400000000000000000000276641422164147700400160ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/command_stream/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/cache_flush_xehp_and_later.inl" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/utilities/tag_allocator.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/dispatch_flags_helper.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/aub_tests/fixtures/aub_fixture.h" #include "opencl/test/unit_test/aub_tests/fixtures/hello_world_fixture.h" #include "opencl/test/unit_test/helpers/cmd_buffer_validator.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "test_traits_common.h" using namespace NEO; struct RangeBasedFlushTest : public KernelAUBFixture, public ::testing::Test { void SetUp() override { DebugManager.flags.PerformImplicitFlushForNewResource.set(0); DebugManager.flags.PerformImplicitFlushForIdleGpu.set(0); KernelAUBFixture::SetUp(); }; void TearDown() override { KernelAUBFixture::TearDown(); } cl_int retVal = CL_SUCCESS; DebugManagerStateRestore debugSettingsRestore; }; struct L3ControlSupportedMatcher { template static constexpr bool isMatched() { if constexpr (HwMapper::GfxProduct::supportsCmdSet(IGFX_XE_HP_CORE)) { return TestTraits::get()>::l3ControlSupported; } return false; } }; HWTEST2_F(RangeBasedFlushTest, givenNoDcFlushInPipeControlWhenL3ControlFlushesCachesThenExpectFlushedCaches, L3ControlSupportedMatcher) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using WALKER = typename FamilyType::WALKER_TYPE; using L3_CONTROL = typename FamilyType::L3_CONTROL; using L3_FLUSH_ADDRESS_RANGE = typename FamilyType::L3_FLUSH_ADDRESS_RANGE; DebugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0); constexpr size_t bufferSize = MemoryConstants::pageSize; char bufferAMemory[bufferSize]; char bufferBMemory[bufferSize]; for (uint32_t i = 0; i < bufferSize / MemoryConstants::pageSize; ++i) { memset(bufferAMemory + i * MemoryConstants::pageSize, 1 + i, MemoryConstants::pageSize); memset(bufferBMemory + i * MemoryConstants::pageSize, 129 + i, MemoryConstants::pageSize); } auto retVal = CL_INVALID_VALUE; auto srcBuffer = std::unique_ptr(Buffer::create(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, bufferSize, bufferAMemory, retVal)); ASSERT_NE(nullptr, srcBuffer); auto dstBuffer = std::unique_ptr(Buffer::create(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, bufferSize, bufferBMemory, retVal)); ASSERT_NE(nullptr, dstBuffer); cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; retVal = pCmdQ->enqueueCopyBuffer(srcBuffer.get(), dstBuffer.get(), 0, 0, bufferSize, numEventsInWaitList, eventWaitList, event); EXPECT_EQ(CL_SUCCESS, retVal); L3RangesVec ranges; ranges.push_back(L3Range::fromAddressSizeWithPolicy(dstBuffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(), MemoryConstants::pageSize, L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION)); size_t requiredSize = getSizeNeededToFlushGpuCache(ranges, false) + 2 * sizeof(PIPE_CONTROL); LinearStream &l3FlushCmdStream = pCmdQ->getCS(requiredSize); auto offset = l3FlushCmdStream.getUsed(); auto pcBeforeFlush = l3FlushCmdStream.getSpaceForCmd(); *pcBeforeFlush = FamilyType::cmdInitPipeControl; flushGpuCache(&l3FlushCmdStream, ranges, 0U, device->getHardwareInfo()); auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto flags = DispatchFlagsHelper::createDefaultDispatchFlags(); flags.blocking = true; DebugManager.flags.DisableDcFlushInEpilogue.set(true); csr.flushTask(l3FlushCmdStream, offset, &pCmdQ->getIndirectHeap(NEO::IndirectHeap::Type::DYNAMIC_STATE, 0), &pCmdQ->getIndirectHeap(NEO::IndirectHeap::Type::INDIRECT_OBJECT, 0), &pCmdQ->getIndirectHeap(NEO::IndirectHeap::Type::SURFACE_STATE, 0), pCmdQ->taskLevel, flags, pCmdQ->getDevice()); std::string err; std::vector expectedCommands{ new MatchAnyCmd(AnyNumber), new MatchHwCmd(1, Expects{EXPECT_MEMBER(PIPE_CONTROL, getCommandStreamerStallEnable, true), EXPECT_MEMBER(PIPE_CONTROL, getDcFlushEnable, false)}), new MatchHwCmd(1, Expects{EXPECT_MEMBER(L3_CONTROL, getPostSyncOperation, L3_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_NO_WRITE)}), }; if (MemorySynchronizationCommands::isPipeControlWArequired(device->getHardwareInfo())) { expectedCommands.push_back(new MatchHwCmd(1, Expects{EXPECT_MEMBER(PIPE_CONTROL, getDcFlushEnable, false)})); if (MemorySynchronizationCommands::getSizeForAdditonalSynchronization(device->getHardwareInfo()) > 0) { expectedCommands.push_back(new MatchHwCmd(1, Expects{EXPECT_MEMBER(MI_SEMAPHORE_WAIT, getSemaphoreDataDword, EncodeSempahore::invalidHardwareTag)})); } } expectedCommands.push_back(new MatchHwCmd(1, Expects{EXPECT_MEMBER(PIPE_CONTROL, getDcFlushEnable, false)})); expectedCommands.push_back(new MatchAnyCmd(AnyNumber)); expectedCommands.push_back(new MatchHwCmd(0)); auto cmdBuffOk = expectCmdBuff(l3FlushCmdStream, 0, std::move(expectedCommands), &err); EXPECT_TRUE(cmdBuffOk) << err; expectMemory(reinterpret_cast(dstBuffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress()), bufferAMemory, bufferSize); } HWTEST2_F(RangeBasedFlushTest, givenL3ControlWhenPostSyncIsSetThenExpectPostSyncWrite, L3ControlSupportedMatcher) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using WALKER = typename FamilyType::WALKER_TYPE; using L3_CONTROL = typename FamilyType::L3_CONTROL; using L3_FLUSH_ADDRESS_RANGE = typename FamilyType::L3_FLUSH_ADDRESS_RANGE; if (MemorySynchronizationCommands::isPipeControlWArequired(device->getHardwareInfo())) { GTEST_SKIP(); } constexpr size_t bufferSize = MemoryConstants::pageSize; char bufferAMemory[bufferSize]; char bufferBMemory[bufferSize]; for (uint32_t i = 0; i < bufferSize / MemoryConstants::pageSize; ++i) { memset(bufferAMemory + i * MemoryConstants::pageSize, 1 + i, MemoryConstants::pageSize); memset(bufferBMemory + i * MemoryConstants::pageSize, 129 + i, MemoryConstants::pageSize); } auto retVal = CL_INVALID_VALUE; auto srcBuffer = std::unique_ptr(Buffer::create(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, bufferSize, bufferAMemory, retVal)); ASSERT_NE(nullptr, srcBuffer); auto dstBuffer = std::unique_ptr(Buffer::create(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, bufferSize, bufferBMemory, retVal)); ASSERT_NE(nullptr, dstBuffer); auto postSyncBuffer = std::unique_ptr(Buffer::create(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(uint64_t), bufferAMemory, retVal)); ASSERT_NE(nullptr, dstBuffer); uint64_t expectedPostSyncData = 0; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; retVal = pCmdQ->enqueueCopyBuffer(srcBuffer.get(), dstBuffer.get(), 0, 0, bufferSize, numEventsInWaitList, eventWaitList, event); EXPECT_EQ(CL_SUCCESS, retVal); L3RangesVec ranges; ranges.push_back(L3Range::fromAddressSizeWithPolicy(dstBuffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(), MemoryConstants::pageSize, L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION)); size_t requiredSize = getSizeNeededToFlushGpuCache(ranges, true) + 2 * sizeof(PIPE_CONTROL); LinearStream &l3FlushCmdStream = pCmdQ->getCS(requiredSize); auto offset = l3FlushCmdStream.getUsed(); auto pcBeforeFlush = l3FlushCmdStream.getSpaceForCmd(); *pcBeforeFlush = FamilyType::cmdInitPipeControl; flushGpuCache(&l3FlushCmdStream, ranges, postSyncBuffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(), device->getHardwareInfo()); auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto flags = DispatchFlagsHelper::createDefaultDispatchFlags(); flags.blocking = true; DebugManager.flags.DisableDcFlushInEpilogue.set(true); csr.makeResident(*postSyncBuffer->getGraphicsAllocation(rootDeviceIndex)); csr.flushTask(l3FlushCmdStream, offset, &pCmdQ->getIndirectHeap(NEO::IndirectHeap::Type::DYNAMIC_STATE, 0), &pCmdQ->getIndirectHeap(NEO::IndirectHeap::Type::INDIRECT_OBJECT, 0), &pCmdQ->getIndirectHeap(NEO::IndirectHeap::Type::SURFACE_STATE, 0), pCmdQ->taskLevel, flags, pCmdQ->getDevice()); std::string err; auto cmdBuffOk = expectCmdBuff(l3FlushCmdStream, 0, std::vector{ new MatchAnyCmd(AnyNumber), new MatchHwCmd(1, Expects{EXPECT_MEMBER(PIPE_CONTROL, getCommandStreamerStallEnable, true), EXPECT_MEMBER(PIPE_CONTROL, getDcFlushEnable, false)}), new MatchHwCmd(1, Expects{EXPECT_MEMBER(L3_CONTROL, getPostSyncOperation, L3_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA)}), new MatchHwCmd(1, Expects{EXPECT_MEMBER(PIPE_CONTROL, getDcFlushEnable, false)}), // epilogue new MatchAnyCmd(AnyNumber), new MatchHwCmd(0), }, &err); EXPECT_TRUE(cmdBuffOk) << err; expectMemory(reinterpret_cast(dstBuffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress()), bufferAMemory, bufferSize); expectMemory(reinterpret_cast(postSyncBuffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress()), &expectedPostSyncData, sizeof(expectedPostSyncData)); } aub_walker_partition_tests_xehp_and_later.cpp000066400000000000000000001563331422164147700377350ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/command_stream/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/walker_partition_xehp_and_later.h" #include "shared/source/helpers/array_count.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/utilities/tag_allocator.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/dispatch_flags_helper.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/event/event.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/aub_tests/command_stream/aub_command_stream_fixture.h" #include "opencl/test/unit_test/aub_tests/fixtures/aub_fixture.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/simple_arg_kernel_fixture.h" #include "opencl/test/unit_test/indirect_heap/indirect_heap_fixture.h" using namespace NEO; using namespace WalkerPartition; static int32_t testPartitionCount[] = {1, 2, 4, 8, 16}; static int32_t testPartitionType[] = {1, 2, 3}; static uint32_t testWorkingDimensions[] = {3}; extern bool generateRandomInput; struct DispatchParamters { size_t globalWorkSize[3]; size_t localWorkSize[3]; } DispatchParamtersForTests[] = { {{12, 25, 21}, {3, 5, 7}}, {{8, 16, 20}, {8, 4, 2}}, {{7, 13, 17}, {1, 1, 1}}, }; struct AubWalkerPartitionFixture : public KernelAUBFixture { void SetUp() override { debugRestorer = std::make_unique(); DebugManager.flags.EnableTimestampPacket.set(1); kernelIds |= (1 << 5); KernelAUBFixture::SetUp(); size_t userMemorySize = 16 * MemoryConstants::kiloByte; if (generateRandomInput) { userMemorySize = 16000 * MemoryConstants::kiloByte; } sizeUserMemory = userMemorySize; auto destMemory = alignedMalloc(sizeUserMemory, 4096); ASSERT_NE(nullptr, destMemory); memset(destMemory, 0x0, sizeUserMemory); dstBuffer.reset(Buffer::create(context, CL_MEM_COPY_HOST_PTR, sizeUserMemory, destMemory, retVal)); ASSERT_NE(nullptr, dstBuffer); alignedFree(destMemory); kernels[5]->setArg(0, dstBuffer.get()); } void TearDown() override { pCmdQ->flush(); KernelAUBFixture::TearDown(); } template void validatePartitionProgramming(uint64_t postSyncAddress, int32_t partitionCount) { using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; using WALKER_TYPE = typename FamilyType::WALKER_TYPE; uint32_t totalWorkgroupCount = 1u; uint32_t totalWorkItemsInWorkgroup = 1u; uint32_t totalWorkItemsCount = 1; for (auto dimension = 0u; dimension < workingDimensions; dimension++) { totalWorkgroupCount *= static_cast(dispatchParamters.globalWorkSize[dimension] / dispatchParamters.localWorkSize[dimension]); totalWorkItemsInWorkgroup *= static_cast(dispatchParamters.localWorkSize[dimension]); totalWorkItemsCount *= static_cast(dispatchParamters.globalWorkSize[dimension]); } const uint32_t workgroupCount = static_cast(dispatchParamters.globalWorkSize[partitionType - 1] / dispatchParamters.localWorkSize[partitionType - 1]); auto partitionSize = Math::divideAndRoundUp(workgroupCount, partitionCount); if (static_cast(partitionType) > workingDimensions) { partitionSize = 1; } hwParser.parseCommands(pCmdQ->getCS(0), 0); uint32_t walkersCount = hwParser.getCommandCount(); EXPECT_EQ(walkersCount, 1u); GenCmdList walkerList = hwParser.getCommandsList(); WALKER_TYPE *walkerCmd = static_cast(*walkerList.begin()); EXPECT_EQ(0u, walkerCmd->getPartitionId()); if (partitionCount > 1) { EXPECT_TRUE(walkerCmd->getWorkloadPartitionEnable()); EXPECT_EQ(partitionSize, walkerCmd->getPartitionSize()); EXPECT_EQ(partitionType, walkerCmd->getPartitionType()); } else { EXPECT_FALSE(walkerCmd->getWorkloadPartitionEnable()); EXPECT_EQ(0u, walkerCmd->getPartitionSize()); EXPECT_EQ(0u, walkerCmd->getPartitionType()); } EXPECT_EQ(FamilyType::POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation()); EXPECT_EQ(postSyncAddress, walkerCmd->getPostSync().getDestinationAddress()); int notExpectedValue[] = {1, 1, 1, 1}; for (auto partitionId = 0; partitionId < DebugManager.flags.ExperimentalSetWalkerPartitionCount.get(); partitionId++) { expectNotEqualMemory(reinterpret_cast(postSyncAddress), ¬ExpectedValue, sizeof(notExpectedValue)); postSyncAddress += 16; //next post sync needs to be right after the previous one } auto dstGpuAddress = reinterpret_cast(dstBuffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress()); expectMemory(dstGpuAddress, &totalWorkItemsCount, sizeof(uint32_t)); auto groupSpecificWorkCounts = ptrOffset(dstGpuAddress, 4); StackVec workgroupCounts; workgroupCounts.resize(totalWorkgroupCount); for (uint32_t workgroupId = 0u; workgroupId < totalWorkgroupCount; workgroupId++) { workgroupCounts[workgroupId] = totalWorkItemsInWorkgroup; } expectMemory(groupSpecificWorkCounts, workgroupCounts.begin(), workgroupCounts.size() * sizeof(uint32_t)); } template typename FamilyType::PIPE_CONTROL *retrieveSyncPipeControl(void *startAddress, const HardwareInfo &hwInfo) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; uint8_t buffer[256]; LinearStream stream(buffer, 256); MemorySynchronizationCommands::addPipeControlWA(stream, 0ull, hwInfo); void *syncPipeControlAddress = reinterpret_cast(reinterpret_cast(startAddress) + stream.getUsed()); PIPE_CONTROL *pipeControl = genCmdCast(syncPipeControlAddress); return pipeControl; } std::unique_ptr debugRestorer; std::unique_ptr dstBuffer; size_t sizeUserMemory = 0; cl_uint workingDimensions = 1; int32_t partitionCount; int32_t partitionType; HardwareParse hwParser; DispatchParamters dispatchParamters; }; struct AubWalkerPartitionTest : public AubWalkerPartitionFixture, public ::testing::TestWithParam> { void SetUp() override { AubWalkerPartitionFixture::SetUp(); std::tie(partitionCount, partitionType, dispatchParamters, workingDimensions) = GetParam(); if (generateRandomInput) { workingDimensions = (rand() % 3 + 1); partitionType = (rand() % 3 + 1); partitionCount = rand() % 16 + 1; //now generate dimensions that makes sense auto goodWorkingSizeGenerated = false; while (!goodWorkingSizeGenerated) { dispatchParamters.localWorkSize[0] = rand() % 128 + 1; dispatchParamters.localWorkSize[1] = rand() % 128 + 1; dispatchParamters.localWorkSize[2] = rand() % 128 + 1; auto totalWorkItemsInWorkgroup = 1; for (auto dimension = 0u; dimension < workingDimensions; dimension++) { totalWorkItemsInWorkgroup *= static_cast(dispatchParamters.localWorkSize[dimension]); } if (totalWorkItemsInWorkgroup <= 1024) { dispatchParamters.globalWorkSize[0] = dispatchParamters.localWorkSize[0] * (rand() % 32 + 1); dispatchParamters.globalWorkSize[1] = dispatchParamters.localWorkSize[1] * (rand() % 32 + 1); dispatchParamters.globalWorkSize[2] = dispatchParamters.localWorkSize[2] * (rand() % 32 + 1); printf("\n generated following dispatch paramters work dim %u gws %zu %zu %zu lws %zu %zu %zu, partition type %d partitionCount %d", workingDimensions, dispatchParamters.globalWorkSize[0], dispatchParamters.globalWorkSize[1], dispatchParamters.globalWorkSize[2], dispatchParamters.localWorkSize[0], dispatchParamters.localWorkSize[1], dispatchParamters.localWorkSize[2], partitionType, partitionCount); fflush(stdout); goodWorkingSizeGenerated = true; } }; } DebugManager.flags.ExperimentalSetWalkerPartitionCount.set(partitionCount); DebugManager.flags.ExperimentalSetWalkerPartitionType.set(partitionType); DebugManager.flags.EnableWalkerPartition.set(1u); } void TearDown() override { AubWalkerPartitionFixture::TearDown(); } }; struct AubWalkerPartitionZeroFixture : public AubWalkerPartitionFixture { void SetUp() override { AubWalkerPartitionFixture::SetUp(); partitionCount = 0; partitionType = 0; workingDimensions = 1; DebugManager.flags.ExperimentalSetWalkerPartitionCount.set(0); DebugManager.flags.ExperimentalSetWalkerPartitionType.set(0); commandBufferProperties = std::make_unique(device->getRootDeviceIndex(), true, MemoryConstants::pageSize, AllocationType::COMMAND_BUFFER, false, device->getDeviceBitfield()); auto memoryManager = this->device->getMemoryManager(); streamAllocation = memoryManager->allocateGraphicsMemoryWithProperties(*commandBufferProperties); helperSurface = memoryManager->allocateGraphicsMemoryWithProperties(*commandBufferProperties); memset(helperSurface->getUnderlyingBuffer(), 0, MemoryConstants::pageSize); taskStream = std::make_unique(streamAllocation); } void TearDown() override { auto memoryManager = this->device->getMemoryManager(); memoryManager->freeGraphicsMemory(streamAllocation); memoryManager->freeGraphicsMemory(helperSurface); AubWalkerPartitionFixture::TearDown(); } void flushStream() { DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.guardCommandBufferWithPipeControl = true; csr->makeResident(*helperSurface); csr->flushTask(*taskStream, 0, &csr->getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 0u), &csr->getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 0u), &csr->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0u), 0u, dispatchFlags, device->getDevice()); csr->flushBatchedSubmissions(); } std::unique_ptr taskStream; GraphicsAllocation *streamAllocation = nullptr; GraphicsAllocation *helperSurface = nullptr; std::unique_ptr commandBufferProperties; }; using AubWalkerPartitionZeroTest = Test; HWCMDTEST_F(IGFX_XE_HP_CORE, AubWalkerPartitionZeroTest, whenPartitionCountSetToZeroThenProvideEqualSingleWalker) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using PARTITION_TYPE = typename FamilyType::WALKER_TYPE::PARTITION_TYPE; size_t globalWorkOffset[3] = {0, 0, 0}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; size_t gwsSize[] = {128, 1, 1}; size_t lwsSize[] = {32, 1, 1}; auto retVal = pCmdQ->enqueueKernel( kernels[5].get(), workingDimensions, globalWorkOffset, gwsSize, lwsSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); pCmdQ->flush(); auto cmdPartitionType = static_cast(partitionType); uint32_t cmdPartitionCount = static_cast(partitionCount); hwParser.parseCommands(pCmdQ->getCS(0), 0); uint32_t walkersCount = hwParser.getCommandCount(); EXPECT_EQ(cmdPartitionCount + 1, walkersCount); GenCmdList walkerList = hwParser.getCommandsList(); EXPECT_EQ(walkersCount, static_cast(walkerList.size())); uint32_t i = 0; for (GenCmdList::iterator walker = walkerList.begin(); walker != walkerList.end(); ++walker, ++i) { WALKER_TYPE *walkerCmd = static_cast(*walker); EXPECT_EQ(cmdPartitionCount, walkerCmd->getPartitionId()); EXPECT_EQ(cmdPartitionType, walkerCmd->getPartitionType()); EXPECT_EQ(cmdPartitionCount, walkerCmd->getPartitionSize()); } auto dstGpuAddress = reinterpret_cast(dstBuffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress()); expectMemory(dstGpuAddress, &gwsSize[workingDimensions - 1], sizeof(uint32_t)); const uint32_t workgroupCount = static_cast(gwsSize[workingDimensions - 1] / lwsSize[workingDimensions - 1]); auto groupSpecificWorkCounts = ptrOffset(dstGpuAddress, 4); StackVec workgroupCounts; workgroupCounts.resize(workgroupCount); for (uint32_t workgroupId = 0u; workgroupId < workgroupCount; workgroupId++) { workgroupCounts[workgroupId] = static_cast(lwsSize[workingDimensions - 1]); } expectMemory(groupSpecificWorkCounts, workgroupCounts.begin(), workgroupCounts.size() * sizeof(uint32_t)); } HWCMDTEST_F(IGFX_XE_HP_CORE, AubWalkerPartitionZeroTest, whenPipeControlIsBeingEmittedWithPartitionBitSetThenMultipleFieldsAreBeingUpdatedWithValue) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; auto writeAddress = helperSurface->getGpuAddress(); auto partitionId = 1u; auto writeSize = 8u; auto miAddressOffset = WalkerPartition::addressOffsetCCSOffset; auto wparidOffset = WalkerPartition::wparidCCSOffset; uint64_t writeValue = 7llu; uint32_t totalBytesProgrammed = 0u; auto streamCpuPointer = taskStream->getSpace(0); WalkerPartition::programRegisterWithValue(streamCpuPointer, wparidOffset, totalBytesProgrammed, partitionId); WalkerPartition::programRegisterWithValue(streamCpuPointer, miAddressOffset, totalBytesProgrammed, writeSize); taskStream->getSpace(totalBytesProgrammed); void *pipeControlAddress = taskStream->getSpace(0); PipeControlArgs args; MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( *taskStream, FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, writeAddress, writeValue, device->getHardwareInfo(), args); auto pipeControl = retrieveSyncPipeControl(pipeControlAddress, device->getHardwareInfo()); ASSERT_NE(nullptr, pipeControl); pipeControl->setWorkloadPartitionIdOffsetEnable(true); flushStream(); expectNotEqualMemory(reinterpret_cast(writeAddress), &writeValue, 4u); //write needs to happen after 8 bytes expectMemory(reinterpret_cast(writeAddress + 8), &writeValue, 4u); } HWCMDTEST_F(IGFX_XE_HP_CORE, AubWalkerPartitionZeroTest, givenAtomicOperationDecOnLocalMemoryWhenItIsExecuteThenOperationUpdatesMemory) { auto writeAddress = helperSurface->getGpuAddress(); auto cpuAddress = reinterpret_cast(helperSurface->getUnderlyingBuffer()); *cpuAddress = 10; auto streamCpuPointer = taskStream->getSpace(0); uint32_t totalBytesProgrammed = 0u; uint32_t expectedValue = 9u; WalkerPartition::programMiAtomic(streamCpuPointer, totalBytesProgrammed, writeAddress, false, WalkerPartition::MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_DECREMENT); taskStream->getSpace(totalBytesProgrammed); flushStream(); expectMemory(reinterpret_cast(writeAddress), &expectedValue, sizeof(expectedValue)); } HWCMDTEST_F(IGFX_XE_HP_CORE, AubWalkerPartitionZeroTest, givenAtomicOperationIncOnLocalMemoryWhenItIsExecuteThenOperationUpdatesMemory) { auto writeAddress = helperSurface->getGpuAddress(); auto cpuAddress = reinterpret_cast(helperSurface->getUnderlyingBuffer()); *cpuAddress = 10; auto streamCpuPointer = taskStream->getSpace(0); uint32_t totalBytesProgrammed = 0u; uint32_t expectedValue = 11u; WalkerPartition::programMiAtomic(streamCpuPointer, totalBytesProgrammed, writeAddress, false, WalkerPartition::MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT); taskStream->getSpace(totalBytesProgrammed); flushStream(); expectMemory(reinterpret_cast(writeAddress), &expectedValue, sizeof(expectedValue)); } HWCMDTEST_F(IGFX_XE_HP_CORE, AubWalkerPartitionZeroTest, givenVariousCompareModesWhenConditionalBatchBufferEndIsEmittedItThenHandlesCompareCorrectly) { using CONDITIONAL_BATCH_BUFFER_END = typename FamilyType::MI_CONDITIONAL_BATCH_BUFFER_END; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; auto writeAddress = helperSurface->getGpuAddress(); auto compareAddress = reinterpret_cast(helperSurface->getUnderlyingBuffer()); auto conditionalBatchBufferEnd = reinterpret_cast(taskStream->getSpace(sizeof(CONDITIONAL_BATCH_BUFFER_END))); conditionalBatchBufferEnd->init(); conditionalBatchBufferEnd->setCompareAddress(writeAddress); conditionalBatchBufferEnd->setCompareSemaphore(1); writeAddress += sizeof(uint64_t); uint32_t writeValue = 7u; uint32_t pipeControlNotExecutedValue = 0u; //this pipe control should be executed void *pipeControlAddress = taskStream->getSpace(0); PipeControlArgs args; MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( *taskStream, FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, writeAddress, writeValue, device->getHardwareInfo(), args); auto pipeControl = retrieveSyncPipeControl(pipeControlAddress, device->getHardwareInfo()); ASSERT_NE(nullptr, pipeControl); auto programPipeControl = [&]() { pipeControl->setImmediateData(writeValue); pipeControl->setAddress(static_cast(writeAddress & 0x0000FFFFFFFFULL)); pipeControl->setAddressHigh(static_cast(writeAddress >> 32)); }; //we have now command buffer that has conditional batch buffer end and pipe control that tests whether batch buffer end acted correctly //MAD_GREATER_THAN_IDD If Indirect fetched data is greater than inline data then continue. //continue test conditionalBatchBufferEnd->setCompareOperation(CONDITIONAL_BATCH_BUFFER_END::COMPARE_OPERATION::COMPARE_OPERATION_MAD_GREATER_THAN_IDD); *compareAddress = 11; auto inlineData = 10u; conditionalBatchBufferEnd->setCompareDataDword(inlineData); programPipeControl(); flushStream(); expectMemory(reinterpret_cast(writeAddress), &writeValue, sizeof(writeValue)); //terminate test *compareAddress = 10; inlineData = 10u; writeAddress += sizeof(uint64_t); writeValue++; conditionalBatchBufferEnd->setCompareDataDword(inlineData); programPipeControl(); flushStream(); expectMemory(reinterpret_cast(writeAddress), &pipeControlNotExecutedValue, sizeof(pipeControlNotExecutedValue)); //MAD_GREATER_THAN_OR_EQUAL_IDD If Indirect fetched data is greater than or equal to inline data then continue. //continue test - greater conditionalBatchBufferEnd->setCompareOperation(CONDITIONAL_BATCH_BUFFER_END::COMPARE_OPERATION::COMPARE_OPERATION_MAD_GREATER_THAN_OR_EQUAL_IDD); *compareAddress = 11; inlineData = 10u; writeAddress += sizeof(uint64_t); writeValue++; conditionalBatchBufferEnd->setCompareDataDword(inlineData); programPipeControl(); flushStream(); expectMemory(reinterpret_cast(writeAddress), &writeValue, sizeof(writeValue)); //continue test - equal *compareAddress = 10; inlineData = 10u; writeAddress += sizeof(uint64_t); writeValue++; conditionalBatchBufferEnd->setCompareDataDword(inlineData); programPipeControl(); flushStream(); expectMemory(reinterpret_cast(writeAddress), &writeValue, sizeof(writeValue)); //terminate test *compareAddress = 9; inlineData = 10u; writeAddress += sizeof(uint64_t); writeValue++; conditionalBatchBufferEnd->setCompareDataDword(inlineData); programPipeControl(); flushStream(); expectMemory(reinterpret_cast(writeAddress), &pipeControlNotExecutedValue, sizeof(pipeControlNotExecutedValue)); //MAD_LESS_THAN_IDD If Indirect fetched data is less than inline data then continue. //continue test conditionalBatchBufferEnd->setCompareOperation(CONDITIONAL_BATCH_BUFFER_END::COMPARE_OPERATION::COMPARE_OPERATION_MAD_LESS_THAN_IDD); *compareAddress = 9; inlineData = 10u; writeAddress += sizeof(uint64_t); writeValue++; conditionalBatchBufferEnd->setCompareDataDword(inlineData); programPipeControl(); flushStream(); expectMemory(reinterpret_cast(writeAddress), &writeValue, sizeof(writeValue)); //terminate test *compareAddress = 10; inlineData = 10u; writeAddress += sizeof(uint64_t); writeValue++; conditionalBatchBufferEnd->setCompareDataDword(inlineData); programPipeControl(); flushStream(); expectMemory(reinterpret_cast(writeAddress), &pipeControlNotExecutedValue, sizeof(pipeControlNotExecutedValue)); //MAD_LESS_THAN_OR_EQUAL_IDD If Indirect fetched data is less than or equal to inline data then continue. //continue test - less conditionalBatchBufferEnd->setCompareOperation(CONDITIONAL_BATCH_BUFFER_END::COMPARE_OPERATION::COMPARE_OPERATION_MAD_LESS_THAN_OR_EQUAL_IDD); *compareAddress = 9; inlineData = 10u; writeAddress += sizeof(uint64_t); writeValue++; conditionalBatchBufferEnd->setCompareDataDword(inlineData); programPipeControl(); flushStream(); expectMemory(reinterpret_cast(writeAddress), &writeValue, sizeof(writeValue)); //continue test - equal *compareAddress = 10; inlineData = 10u; writeAddress += sizeof(uint64_t); writeValue++; conditionalBatchBufferEnd->setCompareDataDword(inlineData); programPipeControl(); flushStream(); expectMemory(reinterpret_cast(writeAddress), &writeValue, sizeof(writeValue)); //terminate test *compareAddress = 11; inlineData = 10u; writeAddress += sizeof(uint64_t); writeValue++; conditionalBatchBufferEnd->setCompareDataDword(inlineData); programPipeControl(); flushStream(); expectMemory(reinterpret_cast(writeAddress), &pipeControlNotExecutedValue, sizeof(pipeControlNotExecutedValue)); //MAD_EQUAL_IDD If Indirect fetched data is equal to inline data then continue. //continue test equal conditionalBatchBufferEnd->setCompareOperation(CONDITIONAL_BATCH_BUFFER_END::COMPARE_OPERATION::COMPARE_OPERATION_MAD_EQUAL_IDD); *compareAddress = 10; inlineData = 10u; writeAddress += sizeof(uint64_t); writeValue++; conditionalBatchBufferEnd->setCompareDataDword(inlineData); programPipeControl(); flushStream(); expectMemory(reinterpret_cast(writeAddress), &writeValue, sizeof(writeValue)); //terminate test *compareAddress = 0; inlineData = 10u; writeAddress += sizeof(uint64_t); writeValue++; conditionalBatchBufferEnd->setCompareDataDword(inlineData); programPipeControl(); flushStream(); expectMemory(reinterpret_cast(writeAddress), &pipeControlNotExecutedValue, sizeof(pipeControlNotExecutedValue)); //MAD_NOT_EQUAL_IDD If Indirect fetched data is not equal to inline data then continue. //continue test not equal conditionalBatchBufferEnd->setCompareOperation(CONDITIONAL_BATCH_BUFFER_END::COMPARE_OPERATION::COMPARE_OPERATION_MAD_NOT_EQUAL_IDD); *compareAddress = 11; inlineData = 10u; writeAddress += sizeof(uint64_t); writeValue++; conditionalBatchBufferEnd->setCompareDataDword(inlineData); programPipeControl(); flushStream(); expectMemory(reinterpret_cast(writeAddress), &writeValue, sizeof(writeValue)); //terminate test *compareAddress = 10; inlineData = 10u; writeAddress += sizeof(uint64_t); writeValue++; conditionalBatchBufferEnd->setCompareDataDword(inlineData); programPipeControl(); flushStream(); expectMemory(reinterpret_cast(writeAddress), &pipeControlNotExecutedValue, sizeof(pipeControlNotExecutedValue)); } template struct MultiLevelBatchAubFixture : public AubWalkerPartitionZeroFixture { void SetUp() override { if (enableNesting) { //turn on Batch Buffer nesting DebugManager.flags.AubDumpAddMmioRegistersList.set( "0x1A09C;0x10001000"); } else { //turn off Batch Buffer nesting DebugManager.flags.AubDumpAddMmioRegistersList.set( "0x1A09C;0x10000000"); } AubWalkerPartitionZeroFixture::SetUp(); auto memoryManager = this->device->getMemoryManager(); secondLevelBatch = memoryManager->allocateGraphicsMemoryWithProperties(*commandBufferProperties); thirdLevelBatch = memoryManager->allocateGraphicsMemoryWithProperties(*commandBufferProperties); secondLevelBatchStream = std::make_unique(secondLevelBatch); thirdLevelBatchStream = std::make_unique(thirdLevelBatch); }; void TearDown() override { debugRestorer.reset(nullptr); DebugManager.flags.AubDumpAddMmioRegistersList.getRef() = "unk"; DebugManager.flags.AubDumpAddMmioRegistersList.getRef().shrink_to_fit(); auto memoryManager = this->device->getMemoryManager(); memoryManager->freeGraphicsMemory(thirdLevelBatch); memoryManager->freeGraphicsMemory(secondLevelBatch); AubWalkerPartitionZeroFixture::TearDown(); }; std::unique_ptr secondLevelBatchStream; std::unique_ptr thirdLevelBatchStream; GraphicsAllocation *secondLevelBatch = nullptr; GraphicsAllocation *thirdLevelBatch = nullptr; }; using MultiLevelBatchTestsWithNesting = Test>; HWCMDTEST_F(IGFX_XE_HP_CORE, MultiLevelBatchTestsWithNesting, givenConditionalBatchBufferEndWhenItExitsThirdLevelCommandBufferThenSecondLevelBatchIsResumed) { auto writeAddress = helperSurface->getGpuAddress(); auto compareAddress = writeAddress; using CONDITIONAL_BATCH_BUFFER_END = typename FamilyType::MI_CONDITIONAL_BATCH_BUFFER_END; using BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; //nest to second level auto batchBufferStart = reinterpret_cast(taskStream->getSpace(sizeof(BATCH_BUFFER_START))); batchBufferStart->init(); batchBufferStart->setBatchBufferStartAddress(secondLevelBatch->getGpuAddress()); batchBufferStart->setNestedLevelBatchBuffer(BATCH_BUFFER_START::NESTED_LEVEL_BATCH_BUFFER::NESTED_LEVEL_BATCH_BUFFER_NESTED); //nest to third level batchBufferStart = reinterpret_cast(secondLevelBatchStream->getSpace(sizeof(BATCH_BUFFER_START))); batchBufferStart->init(); batchBufferStart->setBatchBufferStartAddress(thirdLevelBatch->getGpuAddress()); batchBufferStart->setNestedLevelBatchBuffer(BATCH_BUFFER_START::NESTED_LEVEL_BATCH_BUFFER::NESTED_LEVEL_BATCH_BUFFER_NESTED); auto conditionalBatchBufferEnd = reinterpret_cast(thirdLevelBatchStream->getSpace(sizeof(CONDITIONAL_BATCH_BUFFER_END))); conditionalBatchBufferEnd->init(); conditionalBatchBufferEnd->setEndCurrentBatchBufferLevel(1); conditionalBatchBufferEnd->setCompareAddress(compareAddress); conditionalBatchBufferEnd->setCompareSemaphore(1); writeAddress += sizeof(uint64_t); auto writeValue = 7u; //this pipe control should be executed PipeControlArgs args; MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( *secondLevelBatchStream, FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, writeAddress, writeValue, device->getHardwareInfo(), args); conditionalBatchBufferEnd = reinterpret_cast(secondLevelBatchStream->getSpace(sizeof(CONDITIONAL_BATCH_BUFFER_END))); conditionalBatchBufferEnd->init(); conditionalBatchBufferEnd->setCompareAddress(compareAddress); conditionalBatchBufferEnd->setEndCurrentBatchBufferLevel(1); conditionalBatchBufferEnd->setCompareSemaphore(1); writeAddress += sizeof(uint64_t); writeValue++; MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( *taskStream, FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, writeAddress, writeValue, device->getHardwareInfo(), args); csr->makeResident(*secondLevelBatch); csr->makeResident(*thirdLevelBatch); flushStream(); writeAddress = helperSurface->getGpuAddress() + sizeof(uint64_t); writeValue = 7u; expectMemory(reinterpret_cast(writeAddress), &writeValue, sizeof(writeValue)); writeAddress += sizeof(uint64_t); writeValue++; expectMemory(reinterpret_cast(writeAddress), &writeValue, sizeof(writeValue)); } HWCMDTEST_F(IGFX_XE_HP_CORE, MultiLevelBatchTestsWithNesting, givenConditionalBatchBufferEndWhenItExitsToTheRingThenAllCommandBufferLevelsAreSkipped) { auto writeAddress = helperSurface->getGpuAddress(); auto compareAddress = writeAddress; using CONDITIONAL_BATCH_BUFFER_END = typename FamilyType::MI_CONDITIONAL_BATCH_BUFFER_END; using BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; //nest to second level auto batchBufferStart = reinterpret_cast(taskStream->getSpace(sizeof(BATCH_BUFFER_START))); batchBufferStart->init(); batchBufferStart->setBatchBufferStartAddress(secondLevelBatch->getGpuAddress()); batchBufferStart->setNestedLevelBatchBuffer(BATCH_BUFFER_START::NESTED_LEVEL_BATCH_BUFFER::NESTED_LEVEL_BATCH_BUFFER_NESTED); //nest to third level batchBufferStart = reinterpret_cast(secondLevelBatchStream->getSpace(sizeof(BATCH_BUFFER_START))); batchBufferStart->init(); batchBufferStart->setBatchBufferStartAddress(thirdLevelBatch->getGpuAddress()); batchBufferStart->setNestedLevelBatchBuffer(BATCH_BUFFER_START::NESTED_LEVEL_BATCH_BUFFER::NESTED_LEVEL_BATCH_BUFFER_NESTED); auto conditionalBatchBufferEnd = reinterpret_cast(thirdLevelBatchStream->getSpace(sizeof(CONDITIONAL_BATCH_BUFFER_END))); conditionalBatchBufferEnd->init(); conditionalBatchBufferEnd->setEndCurrentBatchBufferLevel(0); conditionalBatchBufferEnd->setCompareAddress(compareAddress); conditionalBatchBufferEnd->setCompareSemaphore(1); writeAddress += sizeof(uint64_t); auto writeValue = 7u; //this pipe control should NOT be executed PipeControlArgs args; MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( *secondLevelBatchStream, FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, writeAddress, writeValue, device->getHardwareInfo(), args); conditionalBatchBufferEnd = reinterpret_cast(secondLevelBatchStream->getSpace(sizeof(CONDITIONAL_BATCH_BUFFER_END))); conditionalBatchBufferEnd->init(); conditionalBatchBufferEnd->setCompareAddress(compareAddress); conditionalBatchBufferEnd->setEndCurrentBatchBufferLevel(1); conditionalBatchBufferEnd->setCompareSemaphore(1); writeAddress += sizeof(uint64_t); writeValue++; MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( *taskStream, FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, writeAddress, writeValue, device->getHardwareInfo(), args); csr->makeResident(*secondLevelBatch); csr->makeResident(*thirdLevelBatch); flushStream(); writeAddress = helperSurface->getGpuAddress() + sizeof(uint64_t); writeValue = 0u; //pipe controls are not emitted expectMemory(reinterpret_cast(writeAddress), &writeValue, sizeof(writeValue)); writeAddress += sizeof(uint64_t); expectMemory(reinterpret_cast(writeAddress), &writeValue, sizeof(writeValue)); } HWCMDTEST_F(IGFX_XE_HP_CORE, MultiLevelBatchTestsWithNesting, givenCommandBufferCacheOnWhenBatchBufferIsExecutedThenItWorksCorrectly) { auto writeAddress = helperSurface->getGpuAddress(); auto writeValue = 7u; using BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; using BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; //nest to second level auto batchBufferStart = reinterpret_cast(taskStream->getSpace(sizeof(BATCH_BUFFER_START))); batchBufferStart->init(); batchBufferStart->setBatchBufferStartAddress(secondLevelBatch->getGpuAddress()); batchBufferStart->setEnableCommandCache(1u); batchBufferStart->setNestedLevelBatchBuffer(BATCH_BUFFER_START::NESTED_LEVEL_BATCH_BUFFER::NESTED_LEVEL_BATCH_BUFFER_NESTED); //this pipe control should be executed PipeControlArgs args; MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( *secondLevelBatchStream, FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, writeAddress, writeValue, device->getHardwareInfo(), args); auto batchBufferEnd = reinterpret_cast(secondLevelBatchStream->getSpace(sizeof(BATCH_BUFFER_END))); batchBufferEnd->init(); csr->makeResident(*secondLevelBatch); flushStream(); expectMemory(reinterpret_cast(writeAddress), &writeValue, sizeof(writeValue)); } using MultiLevelBatchTestsWithoutNesting = Test>; HWCMDTEST_F(IGFX_XE_HP_CORE, MultiLevelBatchTestsWithoutNesting, givenConditionalBBEndWhenItExitsFromSecondLevelThenUpperLevelIsResumed) { auto writeAddress = helperSurface->getGpuAddress(); auto compareAddress = writeAddress; using CONDITIONAL_BATCH_BUFFER_END = typename FamilyType::MI_CONDITIONAL_BATCH_BUFFER_END; using BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; //nest to second level auto batchBufferStart = reinterpret_cast(taskStream->getSpace(sizeof(BATCH_BUFFER_START))); batchBufferStart->init(); batchBufferStart->setBatchBufferStartAddress(secondLevelBatch->getGpuAddress()); batchBufferStart->setSecondLevelBatchBuffer(BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH); //nest to third level batchBufferStart = reinterpret_cast(secondLevelBatchStream->getSpace(sizeof(BATCH_BUFFER_START))); batchBufferStart->init(); batchBufferStart->setBatchBufferStartAddress(thirdLevelBatch->getGpuAddress()); batchBufferStart->setSecondLevelBatchBuffer(BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH); auto conditionalBatchBufferEnd = reinterpret_cast(thirdLevelBatchStream->getSpace(sizeof(CONDITIONAL_BATCH_BUFFER_END))); conditionalBatchBufferEnd->init(); conditionalBatchBufferEnd->setEndCurrentBatchBufferLevel(0); conditionalBatchBufferEnd->setCompareAddress(compareAddress); conditionalBatchBufferEnd->setCompareSemaphore(1); writeAddress += sizeof(uint64_t); auto writeValue = 7u; //this pipe control should't be executed PipeControlArgs args; MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( *secondLevelBatchStream, FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, writeAddress, writeValue, device->getHardwareInfo(), args); conditionalBatchBufferEnd = reinterpret_cast(secondLevelBatchStream->getSpace(sizeof(CONDITIONAL_BATCH_BUFFER_END))); conditionalBatchBufferEnd->init(); conditionalBatchBufferEnd->setCompareAddress(compareAddress); conditionalBatchBufferEnd->setEndCurrentBatchBufferLevel(1); conditionalBatchBufferEnd->setCompareSemaphore(1); writeAddress += sizeof(uint64_t); writeValue++; //and this shouldn't as well, we returned to ring MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( *taskStream, FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, writeAddress, writeValue, device->getHardwareInfo(), args); csr->makeResident(*secondLevelBatch); csr->makeResident(*thirdLevelBatch); flushStream(); writeAddress = helperSurface->getGpuAddress() + sizeof(uint64_t); auto zeroValue = 0llu; expectMemory(reinterpret_cast(writeAddress), &zeroValue, sizeof(zeroValue)); writeAddress += sizeof(uint64_t); writeValue++; expectMemory(reinterpret_cast(writeAddress), &zeroValue, sizeof(zeroValue)); } HWCMDTEST_F(IGFX_XE_HP_CORE, MultiLevelBatchTestsWithoutNesting, givenConditionalBBEndWhenExitsFromSecondLevelToRingThenFirstLevelIsNotExecuted) { auto writeAddress = helperSurface->getGpuAddress(); auto compareAddress = writeAddress; using CONDITIONAL_BATCH_BUFFER_END = typename FamilyType::MI_CONDITIONAL_BATCH_BUFFER_END; using BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; //nest to second level auto batchBufferStart = reinterpret_cast(taskStream->getSpace(sizeof(BATCH_BUFFER_START))); batchBufferStart->init(); batchBufferStart->setBatchBufferStartAddress(secondLevelBatch->getGpuAddress()); batchBufferStart->setSecondLevelBatchBuffer(BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH); //nest to third level batchBufferStart = reinterpret_cast(secondLevelBatchStream->getSpace(sizeof(BATCH_BUFFER_START))); batchBufferStart->init(); batchBufferStart->setBatchBufferStartAddress(thirdLevelBatch->getGpuAddress()); batchBufferStart->setSecondLevelBatchBuffer(BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH); auto conditionalBatchBufferEnd = reinterpret_cast(thirdLevelBatchStream->getSpace(sizeof(CONDITIONAL_BATCH_BUFFER_END))); conditionalBatchBufferEnd->init(); conditionalBatchBufferEnd->setEndCurrentBatchBufferLevel(1); conditionalBatchBufferEnd->setCompareAddress(compareAddress); conditionalBatchBufferEnd->setCompareSemaphore(1); writeAddress += sizeof(uint64_t); auto writeValue = 7u; //this pipe control should't be executed PipeControlArgs args; MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( *secondLevelBatchStream, FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, writeAddress, writeValue, device->getHardwareInfo(), args); conditionalBatchBufferEnd = reinterpret_cast(secondLevelBatchStream->getSpace(sizeof(CONDITIONAL_BATCH_BUFFER_END))); conditionalBatchBufferEnd->init(); conditionalBatchBufferEnd->setCompareAddress(compareAddress); conditionalBatchBufferEnd->setEndCurrentBatchBufferLevel(1); conditionalBatchBufferEnd->setCompareSemaphore(1); writeAddress += sizeof(uint64_t); writeValue++; //and this should , we returned to primary batch MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( *taskStream, FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, writeAddress, writeValue, device->getHardwareInfo(), args); csr->makeResident(*secondLevelBatch); csr->makeResident(*thirdLevelBatch); flushStream(); writeAddress = helperSurface->getGpuAddress() + sizeof(uint64_t); writeValue = 7u; auto zeroValue = 0llu; expectMemory(reinterpret_cast(writeAddress), &zeroValue, sizeof(zeroValue)); writeAddress += sizeof(uint64_t); writeValue++; expectMemory(reinterpret_cast(writeAddress), &writeValue, sizeof(writeValue)); } HWCMDTEST_F(IGFX_XE_HP_CORE, AubWalkerPartitionZeroTest, givenBlockingAtomicOperationIncOnLocalMemoryWhenItIsExecutedThenOperationUpdatesMemory) { auto writeAddress = helperSurface->getGpuAddress(); auto cpuAddress = reinterpret_cast(helperSurface->getUnderlyingBuffer()); *cpuAddress = 10; auto streamCpuPointer = taskStream->getSpace(0); uint32_t totalBytesProgrammed = 0u; uint32_t expectedValue = 11u; WalkerPartition::programMiAtomic(streamCpuPointer, totalBytesProgrammed, writeAddress, true, WalkerPartition::MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT); taskStream->getSpace(totalBytesProgrammed); flushStream(); expectMemory(reinterpret_cast(writeAddress), &expectedValue, sizeof(expectedValue)); } HWCMDTEST_F(IGFX_XE_HP_CORE, AubWalkerPartitionZeroTest, givenBlockingAtomicOperationIncOnSystemMemoryWhenItIsExecutedThenOperationUpdatesMemory) { auto writeAddress = helperSurface->getGpuAddress(); auto cpuAddress = reinterpret_cast(helperSurface->getUnderlyingBuffer()); *cpuAddress = 10; auto streamCpuPointer = taskStream->getSpace(0); uint32_t totalBytesProgrammed = 0u; uint32_t expectedValue = 11u; WalkerPartition::programMiAtomic(streamCpuPointer, totalBytesProgrammed, writeAddress, true, WalkerPartition::MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT); taskStream->getSpace(totalBytesProgrammed); flushStream(); expectMemory(reinterpret_cast(writeAddress), &expectedValue, sizeof(expectedValue)); } HWCMDTEST_F(IGFX_XE_HP_CORE, AubWalkerPartitionZeroTest, givenNonBlockingAtomicOperationIncOnSystemMemoryWhenItIsExecutedThenOperationUpdatesMemory) { auto writeAddress = helperSurface->getGpuAddress(); auto cpuAddress = reinterpret_cast(helperSurface->getUnderlyingBuffer()); *cpuAddress = 10; auto streamCpuPointer = taskStream->getSpace(0); uint32_t totalBytesProgrammed = 0u; uint32_t expectedValue = 11u; WalkerPartition::programMiAtomic(streamCpuPointer, totalBytesProgrammed, writeAddress, false, WalkerPartition::MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT); taskStream->getSpace(totalBytesProgrammed); flushStream(); expectMemory(reinterpret_cast(writeAddress), &expectedValue, sizeof(expectedValue)); } HWCMDTEST_F(IGFX_XE_HP_CORE, AubWalkerPartitionZeroTest, givenPredicatedCommandBufferWhenItIsExecutedThenAtomicIsIncrementedEquallyToPartitionCountPlusOne) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; auto streamCpuPointer = taskStream->getSpace(0); auto postSyncAddress = helperSurface->getGpuAddress(); uint32_t totalBytesProgrammed = 0u; WALKER_TYPE walkerCmd = FamilyType::cmdInitGpgpuWalker; walkerCmd.setPartitionType(WALKER_TYPE::PARTITION_TYPE::PARTITION_TYPE_X); walkerCmd.getInterfaceDescriptor().setNumberOfThreadsInGpgpuThreadGroup(1u); walkerCmd.getPostSync().setDestinationAddress(postSyncAddress); walkerCmd.getPostSync().setOperation(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP); WalkerPartition::WalkerPartitionArgs testArgs = {}; testArgs.initializeWparidRegister = true; testArgs.crossTileAtomicSynchronization = true; testArgs.emitPipeControlStall = true; testArgs.tileCount = 1; testArgs.partitionCount = 16u; testArgs.synchronizeBeforeExecution = false; testArgs.secondaryBatchBuffer = false; testArgs.emitSelfCleanup = false; WalkerPartition::constructDynamicallyPartitionedCommandBuffer( streamCpuPointer, taskStream->getGraphicsAllocation()->getGpuAddress(), &walkerCmd, totalBytesProgrammed, testArgs, *defaultHwInfo); taskStream->getSpace(totalBytesProgrammed); flushStream(); auto expectedGpuAddress = taskStream->getGraphicsAllocation()->getGpuAddress() + WalkerPartition::computeControlSectionOffset(testArgs); //16 partitions updated atomic to value 16 //17th partition updated it to 17 and was predicated out of the batch buffer uint32_t expectedValue = 17u; expectMemory(reinterpret_cast(expectedGpuAddress), &expectedValue, sizeof(expectedValue)); //this is 1 tile scenario uint32_t expectedTileValue = 1u; expectMemory(reinterpret_cast(expectedGpuAddress + 4llu), &expectedTileValue, sizeof(expectedTileValue)); } HWCMDTEST_F(IGFX_XE_HP_CORE, AubWalkerPartitionZeroTest, givenGeneralPurposeRegisterWhenItIsLoadedAndFetchedThenItIsNotPrivileged) { auto writeAddress = helperSurface->getGpuAddress(); uint32_t writeValue = 7u; auto streamCpuPointer = taskStream->getSpace(0); uint32_t totalBytesProgrammed = 0u; uint32_t wparidValue = 5u; WalkerPartition::programRegisterWithValue(streamCpuPointer, generalPurposeRegister0, totalBytesProgrammed, wparidValue); WalkerPartition::programMiLoadRegisterReg(streamCpuPointer, totalBytesProgrammed, generalPurposeRegister0, generalPurposeRegister1); WalkerPartition::programMiLoadRegisterReg(streamCpuPointer, totalBytesProgrammed, generalPurposeRegister1, generalPurposeRegister2); WalkerPartition::programMiLoadRegisterReg(streamCpuPointer, totalBytesProgrammed, generalPurposeRegister2, generalPurposeRegister3); WalkerPartition::programMiLoadRegisterReg(streamCpuPointer, totalBytesProgrammed, generalPurposeRegister3, generalPurposeRegister4); WalkerPartition::programMiLoadRegisterReg(streamCpuPointer, totalBytesProgrammed, generalPurposeRegister4, generalPurposeRegister5); WalkerPartition::programMiLoadRegisterReg(streamCpuPointer, totalBytesProgrammed, generalPurposeRegister5, wparidCCSOffset); WalkerPartition::programWparidMask(streamCpuPointer, totalBytesProgrammed, 4u); WalkerPartition::programWparidPredication(streamCpuPointer, totalBytesProgrammed, true); //this command must not execute taskStream->getSpace(totalBytesProgrammed); PipeControlArgs args; MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( *taskStream, FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, writeAddress, writeValue, device->getHardwareInfo(), args); streamCpuPointer = taskStream->getSpace(0); totalBytesProgrammed = 0u; WalkerPartition::programWparidPredication(streamCpuPointer, totalBytesProgrammed, false); taskStream->getSpace(totalBytesProgrammed); flushStream(); expectNotEqualMemory(reinterpret_cast(writeAddress), &writeValue, sizeof(writeValue)); } HWCMDTEST_F(IGFX_XE_HP_CORE, AubWalkerPartitionZeroTest, givenPredicationWhenItIsOnThenCommandMustNotBeExecuted) { auto streamCpuPointer = taskStream->getSpace(0); uint32_t totalBytesProgrammed = 0u; auto writeValue = 1u; auto zeroValue = 0u; auto addressShift = 8u; auto writeAddress = helperSurface->getGpuAddress(); //program WPARID mask to 16 partitions WalkerPartition::programWparidMask(streamCpuPointer, totalBytesProgrammed, 16u); streamCpuPointer = taskStream->getSpace(totalBytesProgrammed); //program WPARID to value within 0-19 for (uint32_t wparid = 0u; wparid < 20; wparid++) { totalBytesProgrammed = 0; streamCpuPointer = taskStream->getSpace(0); WalkerPartition::programRegisterWithValue(streamCpuPointer, WalkerPartition::wparidCCSOffset, totalBytesProgrammed, wparid); WalkerPartition::programWparidPredication(streamCpuPointer, totalBytesProgrammed, true); taskStream->getSpace(totalBytesProgrammed); //emit pipe control PipeControlArgs args; MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( *taskStream, FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, writeAddress, writeValue, device->getHardwareInfo(), args); //turn off predication streamCpuPointer = taskStream->getSpace(0); totalBytesProgrammed = 0; WalkerPartition::programWparidPredication(streamCpuPointer, totalBytesProgrammed, false); taskStream->getSpace(totalBytesProgrammed); writeAddress += addressShift; writeValue++; } flushStream(); writeAddress = helperSurface->getGpuAddress(); writeValue = 1u; for (uint32_t wparid = 0u; wparid < 20; wparid++) { if (wparid < 16) { expectMemory(reinterpret_cast(writeAddress), &writeValue, 4u); } else { expectMemory(reinterpret_cast(writeAddress), &zeroValue, 4u); } writeAddress += addressShift; writeValue++; } } HWCMDTEST_F(IGFX_XE_HP_CORE, AubWalkerPartitionZeroTest, givenPredicationWhenItIsOnThenPipeControlInWparidIsNotExecuted) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; auto streamCpuPointer = taskStream->getSpace(0); uint32_t totalBytesProgrammed = 0u; auto writeValue = 1u; auto zeroValue = 0u; auto addressShift = 32u; auto writeAddress = helperSurface->getGpuAddress(); WalkerPartition::programRegisterWithValue(streamCpuPointer, WalkerPartition::addressOffsetCCSOffset, totalBytesProgrammed, addressShift); //program WPARID mask to 8 partitions WalkerPartition::programWparidMask(streamCpuPointer, totalBytesProgrammed, 8u); streamCpuPointer = taskStream->getSpace(totalBytesProgrammed); //program WPARID to value within 0-13 for (uint32_t wparid = 0u; wparid < 13; wparid++) { totalBytesProgrammed = 0; streamCpuPointer = taskStream->getSpace(0); WalkerPartition::programRegisterWithValue(streamCpuPointer, WalkerPartition::wparidCCSOffset, totalBytesProgrammed, wparid); WalkerPartition::programWparidPredication(streamCpuPointer, totalBytesProgrammed, true); taskStream->getSpace(totalBytesProgrammed); //emit pipe control void *pipeControlAddress = taskStream->getSpace(0); PipeControlArgs args; MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( *taskStream, FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, writeAddress, writeValue, device->getHardwareInfo(), args); auto pipeControl = retrieveSyncPipeControl(pipeControlAddress, device->getHardwareInfo()); ASSERT_NE(nullptr, pipeControl); pipeControl->setWorkloadPartitionIdOffsetEnable(true); //turn off predication streamCpuPointer = taskStream->getSpace(0); totalBytesProgrammed = 0; WalkerPartition::programWparidPredication(streamCpuPointer, totalBytesProgrammed, false); taskStream->getSpace(totalBytesProgrammed); writeValue++; } flushStream(); writeAddress = helperSurface->getGpuAddress(); writeValue = 1u; for (uint32_t wparid = 0u; wparid < 13; wparid++) { if (wparid < 8) { expectMemory(reinterpret_cast(writeAddress), &writeValue, 4u); } else { expectMemory(reinterpret_cast(writeAddress), &zeroValue, 4u); } writeAddress += addressShift; writeValue++; } } HWCMDTEST_P(IGFX_XE_HP_CORE, AubWalkerPartitionTest, whenPartitionsAreUsedWithVariousInputsThenHardwareProgrammingIsCorrect) { size_t globalWorkOffset[3] = {0, 0, 0}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event event; auto retVal = pCmdQ->enqueueKernel( kernels[5].get(), workingDimensions, globalWorkOffset, dispatchParamters.globalWorkSize, dispatchParamters.localWorkSize, numEventsInWaitList, eventWaitList, &event); ASSERT_EQ(CL_SUCCESS, retVal); pCmdQ->flush(); auto neoEvent = castToObject(event); auto container = neoEvent->getTimestampPacketNodes(); auto postSyncAddress = TimestampPacketHelper::getContextStartGpuAddress(*container->peekNodes()[0]); validatePartitionProgramming(postSyncAddress, partitionCount); clReleaseEvent(event); } INSTANTIATE_TEST_CASE_P( AUBWPARID, AubWalkerPartitionTest, ::testing::Combine( ::testing::ValuesIn(testPartitionCount), ::testing::ValuesIn(testPartitionType), ::testing::ValuesIn(DispatchParamtersForTests), ::testing::ValuesIn(testWorkingDimensions))); using AubWparidTests = Test; HWCMDTEST_F(IGFX_XE_HP_CORE, AubWparidTests, whenPartitionCountSetAndPartitionIdSpecifiedViaWPARIDThenProvideEqualNumberWalkers) { size_t globalWorkOffset[3] = {0, 0, 0}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event event; workingDimensions = 3; dispatchParamters.globalWorkSize[0] = 30; dispatchParamters.globalWorkSize[1] = 39; dispatchParamters.globalWorkSize[2] = 5; dispatchParamters.localWorkSize[0] = 10; dispatchParamters.localWorkSize[1] = 3; dispatchParamters.localWorkSize[2] = 1; partitionType = 3; int32_t partitionCount = 4; DebugManager.flags.ExperimentalSetWalkerPartitionType.set(partitionType); DebugManager.flags.ExperimentalSetWalkerPartitionCount.set(partitionCount); DebugManager.flags.EnableWalkerPartition.set(1u); auto retVal = pCmdQ->enqueueKernel( kernels[5].get(), workingDimensions, globalWorkOffset, dispatchParamters.globalWorkSize, dispatchParamters.localWorkSize, numEventsInWaitList, eventWaitList, &event); ASSERT_EQ(CL_SUCCESS, retVal); pCmdQ->flush(); auto neoEvent = castToObject(event); auto container = neoEvent->getTimestampPacketNodes(); auto postSyncAddress = TimestampPacketHelper::getContextStartGpuAddress(*container->peekNodes()[0]); validatePartitionProgramming(postSyncAddress, partitionCount); clReleaseEvent(event); } copy_engine_aub_tests_xehp_and_later.cpp000066400000000000000000000100471422164147700366450ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/command_stream/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/aub_tests/command_stream/copy_engine_aub_tests_xehp_and_later.h" #include "shared/test/common/test_macros/test.h" using namespace NEO; using SingleTileCopyEngineTests = CopyEngineXeHPAndLater<1>; HWTEST_F(SingleTileCopyEngineTests, givenNotCompressedBufferWhenBltExecutedThenCompressDataAndResolve) { givenNotCompressedBufferWhenBltExecutedThenCompressDataAndResolveImpl(); } HWTEST_F(SingleTileCopyEngineTests, givenHostPtrWhenBlitCommandToCompressedBufferIsDispatchedThenCopiedDataIsValid) { givenHostPtrWhenBlitCommandToCompressedBufferIsDispatchedThenCopiedDataIsValidImpl(); } HWTEST_F(SingleTileCopyEngineTests, givenDstHostPtrWhenBlitCommandFromCompressedBufferIsDispatchedThenCopiedDataIsValid) { givenDstHostPtrWhenBlitCommandFromCompressedBufferIsDispatchedThenCopiedDataIsValidImpl(); } HWCMDTEST_F(IGFX_XE_HP_CORE, SingleTileCopyEngineTests, givenDstHostPtrWhenBlitCommandFromNotCompressedBufferIsDispatchedThenCopiedDataIsValid) { givenDstHostPtrWhenBlitCommandFromNotCompressedBufferIsDispatchedThenCopiedDataIsValidImpl(); } HWCMDTEST_F(IGFX_XE_HP_CORE, SingleTileCopyEngineTests, givenSrcHostPtrWhenBlitCommandToNotCompressedBufferIsDispatchedThenCopiedDataIsValid) { givenSrcHostPtrWhenBlitCommandToNotCompressedBufferIsDispatchedThenCopiedDataIsValidImpl(); } HWCMDTEST_F(IGFX_XE_HP_CORE, SingleTileCopyEngineTests, givenBufferWithOffsetWhenHostPtrBlitCommandIsDispatchedFromHostPtrThenDataIsCorrectlyCopied) { givenBufferWithOffsetWhenHostPtrBlitCommandIsDispatchedFromHostPtrThenDataIsCorrectlyCopiedImpl(); } HWCMDTEST_F(IGFX_XE_HP_CORE, SingleTileCopyEngineTests, givenBufferWithOffsetWhenHostPtrBlitCommandIsDispatchedToHostPtrThenDataIsCorrectlyCopied) { givenBufferWithOffsetWhenHostPtrBlitCommandIsDispatchedToHostPtrThenDataIsCorrectlyCopiedImpl(); } HWCMDTEST_F(IGFX_XE_HP_CORE, SingleTileCopyEngineTests, givenOffsetsWhenBltExecutedThenCopiedDataIsValid) { givenOffsetsWhenBltExecutedThenCopiedDataIsValidImpl(); } HWTEST_F(SingleTileCopyEngineTests, givenSrcCompressedBufferWhenBlitCommandToDstCompressedBufferIsDispatchedThenCopiedDataIsValid) { givenSrcCompressedBufferWhenBlitCommandToDstCompressedBufferIsDispatchedThenCopiedDataIsValidImpl(); } HWTEST_F(SingleTileCopyEngineTests, givenCompressedBufferWhenAuxTranslationCalledThenResolveAndCompress) { givenCompressedBufferWhenAuxTranslationCalledThenResolveAndCompressImpl(); } using SingleTileCopyEngineSystemMemoryTests = CopyEngineXeHPAndLater<1, false>; HWCMDTEST_F(IGFX_XE_HP_CORE, SingleTileCopyEngineSystemMemoryTests, givenSrcSystemBufferWhenBlitCommandToDstSystemBufferIsDispatchedThenCopiedDataIsValid) { givenSrcSystemBufferWhenBlitCommandToDstSystemBufferIsDispatchedThenCopiedDataIsValidImpl(); } HWCMDTEST_F(IGFX_XE_HP_CORE, SingleTileCopyEngineTests, givenReadBufferRectWithOffsetWhenHostPtrBlitCommandIsDispatchedToHostPtrThenDataIsCorrectlyCopied) { givenReadBufferRectWithOffsetWhenHostPtrBlitCommandIsDispatchedToHostPtrThenDataIsCorrectlyCopiedImpl(); } HWCMDTEST_F(IGFX_XE_HP_CORE, SingleTileCopyEngineTests, givenWriteBufferRectWithOffsetWhenHostPtrBlitCommandIsDispatchedToHostPtrThenDataIsCorrectlyCopied) { givenWriteBufferRectWithOffsetWhenHostPtrBlitCommandIsDispatchedToHostPtrThenDataIsCorrectlyCopiedImpl(); } HWCMDTEST_F(IGFX_XE_HP_CORE, SingleTileCopyEngineTests, givenCopyBufferRectWithOffsetWhenHostPtrBlitCommandIsDispatchedToHostPtrThenDataIsCorrectlyCopied) { givenCopyBufferRectWithOffsetWhenHostPtrBlitCommandIsDispatchedToHostPtrThenDataIsCorrectlyCopiedImpl(); } HWCMDTEST_F(IGFX_XE_HP_CORE, SingleTileCopyEngineTests, givenCopyBufferRectWithBigSizesWhenHostPtrBlitCommandIsDispatchedToHostPtrThenDataIsCorrectlyCopied) { givenCopyBufferRectWithBigSizesWhenHostPtrBlitCommandIsDispatchedToHostPtrThenDataIsCorrectlyCopiedImpl(); } copy_engine_aub_tests_xehp_and_later.h000066400000000000000000001166071422164147700363230ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/command_stream/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/aub_command_stream_receiver_hw.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/extensions/public/cl_ext_private.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/aub_tests/fixtures/multicontext_aub_fixture.h" using namespace NEO; template struct CopyEngineXeHPAndLater : public MulticontextAubFixture, public ::testing::Test { using MulticontextAubFixture::expectMemory; void SetUp() override { if (is32bit) { GTEST_SKIP(); } if (!HwInfoConfig::get(defaultHwInfo->platform.eProductFamily)->obtainBlitterPreference(*defaultHwInfo.get())) { GTEST_SKIP(); } if (useLocalMemory) { if (!defaultHwInfo->featureTable.flags.ftrLocalMemory) { GTEST_SKIP(); } DebugManager.flags.EnableLocalMemory.set(true); } DebugManager.flags.RenderCompressedBuffersEnabled.set(true); DebugManager.flags.RenderCompressedImagesEnabled.set(true); MulticontextAubFixture::SetUp(numTiles, EnabledCommandStreamers::Single, true); defaultCommandQueue = commandQueues[0][0].get(); bcsCsr = tileDevices[0]->getNearestGenericSubDevice(0)->getEngine(bcsEngineType, EngineUsage::Regular).commandStreamReceiver; compressiblePattern = std::make_unique(bufferSize); std::fill(compressiblePattern.get(), ptrOffset(compressiblePattern.get(), bufferSize), 0xC6); writePattern = std::make_unique(bufferSize); dstHostPtr = std::make_unique(bufferSize); for (size_t i = 0; i < bufferSize; i++) { writePattern.get()[i] = static_cast(i); dstHostPtr.get()[i] = 255 - writePattern[i]; } EXPECT_NE(writePattern.get()[0], writePattern.get()[offset]); } void TearDown() override { MulticontextAubFixture::TearDown(); } virtual bool compressionSupported() const { auto &ftrTable = rootDevice->getHardwareInfo().featureTable; return (ftrTable.flags.ftrLocalMemory && ftrTable.flags.ftrFlatPhysCCS); } ReleaseableObjectPtr createBuffer(bool compressed, bool inLocalMemory, void *srcHostPtr) { cl_mem_flags flags = CL_MEM_READ_WRITE; if (!compressed) { flags |= CL_MEM_UNCOMPRESSED_HINT_INTEL; } else { flags |= CL_MEM_COMPRESSED_HINT_INTEL; } if (!inLocalMemory && !compressed) { flags |= CL_MEM_FORCE_HOST_MEMORY_INTEL; } if (srcHostPtr) { flags |= CL_MEM_COPY_HOST_PTR; } auto buffer = clUniquePtr(Buffer::create(context.get(), flags, bufferSize, srcHostPtr, retVal)); auto graphicsAllocation = buffer->getGraphicsAllocation(this->rootDeviceIndex); EXPECT_EQ(CL_SUCCESS, retVal); if (compressed) { EXPECT_TRUE(graphicsAllocation->getDefaultGmm()->isCompressionEnabled); } EXPECT_EQ(!inLocalMemory, MemoryPool::isSystemMemoryPool(graphicsAllocation->getMemoryPool())); return buffer; } void *getGpuAddress(Buffer &buffer) { return reinterpret_cast(buffer.getGraphicsAllocation(this->rootDeviceIndex)->getGpuAddress()); } void executeBlitCommand(const BlitProperties &blitProperties, bool blocking) { BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties); bcsCsr->flushBcsTask(blitPropertiesContainer, blocking, false, rootDevice->getDevice()); } template void givenNotCompressedBufferWhenBltExecutedThenCompressDataAndResolveImpl(); template void givenHostPtrWhenBlitCommandToCompressedBufferIsDispatchedThenCopiedDataIsValidImpl(); template void givenDstHostPtrWhenBlitCommandFromCompressedBufferIsDispatchedThenCopiedDataIsValidImpl(); template void givenDstHostPtrWhenBlitCommandFromNotCompressedBufferIsDispatchedThenCopiedDataIsValidImpl(); template void givenSrcHostPtrWhenBlitCommandToNotCompressedBufferIsDispatchedThenCopiedDataIsValidImpl(); template void givenBufferWithOffsetWhenHostPtrBlitCommandIsDispatchedFromHostPtrThenDataIsCorrectlyCopiedImpl(); template void givenBufferWithOffsetWhenHostPtrBlitCommandIsDispatchedToHostPtrThenDataIsCorrectlyCopiedImpl(); template void givenOffsetsWhenBltExecutedThenCopiedDataIsValidImpl(); template void givenSrcCompressedBufferWhenBlitCommandToDstCompressedBufferIsDispatchedThenCopiedDataIsValidImpl(); template void givenCompressedBufferWhenAuxTranslationCalledThenResolveAndCompressImpl(); template void givenSrcSystemBufferWhenBlitCommandToDstSystemBufferIsDispatchedThenCopiedDataIsValidImpl(); template void GivenReadOnlyMultiStorageWhenAllocatingBufferThenAllocationIsCopiedWithBlitterToEveryTileImpl(); template void givenReadBufferRectWithOffsetWhenHostPtrBlitCommandIsDispatchedToHostPtrThenDataIsCorrectlyCopiedImpl(); template void givenWriteBufferRectWithOffsetWhenHostPtrBlitCommandIsDispatchedToHostPtrThenDataIsCorrectlyCopiedImpl(); template void givenCopyBufferRectWithOffsetWhenHostPtrBlitCommandIsDispatchedToHostPtrThenDataIsCorrectlyCopiedImpl(); template void givenCopyBufferRectWithBigSizesWhenHostPtrBlitCommandIsDispatchedToHostPtrThenDataIsCorrectlyCopiedImpl(); DebugManagerStateRestore restore; CommandQueue *defaultCommandQueue = nullptr; CommandStreamReceiver *bcsCsr = nullptr; TimestampPacketContainer timestampPacketContainer; CsrDependencies csrDependencies; const size_t bufferSize = MemoryConstants::pageSize64k + BlitterConstants::maxBlitWidth + 3; size_t offset = (bufferSize / 4) - 3; aub_stream::EngineType bcsEngineType = aub_stream::EngineType::ENGINE_BCS; std::unique_ptr compressiblePattern; std::unique_ptr writePattern; std::unique_ptr dstHostPtr; cl_int retVal = CL_SUCCESS; }; template template void CopyEngineXeHPAndLater::givenNotCompressedBufferWhenBltExecutedThenCompressDataAndResolveImpl() { if (!compressionSupported()) { GTEST_SKIP(); } auto srcNotCompressedBuffer = createBuffer(false, testLocalMemory, compressiblePattern.get()); auto dstNotCompressedBuffer = createBuffer(false, testLocalMemory, nullptr); auto dstCompressedBuffer = createBuffer(true, testLocalMemory, nullptr); auto dstResolvedBuffer = createBuffer(false, testLocalMemory, nullptr); // Buffer to Buffer - uncompressed HBM -> compressed HBM auto blitProperties = BlitProperties::constructPropertiesForCopy(dstCompressedBuffer->getGraphicsAllocation(rootDeviceIndex), srcNotCompressedBuffer->getGraphicsAllocation(rootDeviceIndex), 0, 0, {bufferSize, 1, 1}, 0, 0, 0, 0, bcsCsr->getClearColorAllocation()); executeBlitCommand(blitProperties, true); // Buffer to Buffer - uncompressed HBM -> uncompressed HBM blitProperties = BlitProperties::constructPropertiesForCopy(dstNotCompressedBuffer->getGraphicsAllocation(rootDeviceIndex), srcNotCompressedBuffer->getGraphicsAllocation(rootDeviceIndex), 0, 0, {bufferSize, 1, 1}, 0, 0, 0, 0, bcsCsr->getClearColorAllocation()); executeBlitCommand(blitProperties, true); // Buffer to Buffer - compressed HBM -> uncompressed HBM blitProperties = BlitProperties::constructPropertiesForCopy(dstResolvedBuffer->getGraphicsAllocation(rootDeviceIndex), dstCompressedBuffer->getGraphicsAllocation(rootDeviceIndex), 0, 0, {bufferSize, 1, 1}, 0, 0, 0, 0, bcsCsr->getClearColorAllocation()); executeBlitCommand(blitProperties, true); blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::BufferToHostPtr, *bcsCsr, dstCompressedBuffer->getGraphicsAllocation(rootDeviceIndex), nullptr, dstHostPtr.get(), dstCompressedBuffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(), 0, 0, 0, {bufferSize, 1, 1}, 0, 0, 0, 0); executeBlitCommand(blitProperties, true); expectMemoryNotEqual(getGpuAddress(*dstCompressedBuffer), compressiblePattern.get(), bufferSize, 0, 0); expectMemory(dstHostPtr.get(), compressiblePattern.get(), bufferSize, 0, 0); expectMemory(getGpuAddress(*dstResolvedBuffer), compressiblePattern.get(), bufferSize, 0, 0); expectMemory(getGpuAddress(*dstNotCompressedBuffer), compressiblePattern.get(), bufferSize, 0, 0); expectMemory(getGpuAddress(*srcNotCompressedBuffer), compressiblePattern.get(), bufferSize, 0, 0); } template template void CopyEngineXeHPAndLater::givenHostPtrWhenBlitCommandToCompressedBufferIsDispatchedThenCopiedDataIsValidImpl() { if (!compressionSupported()) { GTEST_SKIP(); } auto dstCompressedBuffer = createBuffer(true, testLocalMemory, nullptr); // HostPtr to Buffer - System -> compressed HBM auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::HostPtrToBuffer, *bcsCsr, dstCompressedBuffer->getGraphicsAllocation(rootDeviceIndex), nullptr, compressiblePattern.get(), dstCompressedBuffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(), 0, 0, 0, {bufferSize, 1, 1}, 0, 0, 0, 0); executeBlitCommand(blitProperties, true); blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::BufferToHostPtr, *bcsCsr, dstCompressedBuffer->getGraphicsAllocation(rootDeviceIndex), nullptr, dstHostPtr.get(), dstCompressedBuffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(), 0, 0, 0, {bufferSize, 1, 1}, 0, 0, 0, 0); executeBlitCommand(blitProperties, true); expectMemoryNotEqual(getGpuAddress(*dstCompressedBuffer), compressiblePattern.get(), bufferSize, 0, 0); expectMemory(dstHostPtr.get(), compressiblePattern.get(), bufferSize, 0, 0); } template template void CopyEngineXeHPAndLater::givenDstHostPtrWhenBlitCommandFromCompressedBufferIsDispatchedThenCopiedDataIsValidImpl() { if (!compressionSupported()) { GTEST_SKIP(); } auto srcCompressedBuffer = createBuffer(true, testLocalMemory, nullptr); auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::HostPtrToBuffer, *bcsCsr, srcCompressedBuffer->getGraphicsAllocation(rootDeviceIndex), nullptr, compressiblePattern.get(), srcCompressedBuffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(), 0, 0, 0, {bufferSize, 1, 1}, 0, 0, 0, 0); executeBlitCommand(blitProperties, true); // Buffer to HostPtr - compressed HBM -> System blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::BufferToHostPtr, *bcsCsr, srcCompressedBuffer->getGraphicsAllocation(rootDeviceIndex), nullptr, dstHostPtr.get(), srcCompressedBuffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(), 0, 0, 0, {bufferSize, 1, 1}, 0, 0, 0, 0); executeBlitCommand(blitProperties, false); bcsCsr->waitForTaskCountWithKmdNotifyFallback(0, 0, false, QueueThrottle::MEDIUM); expectMemoryNotEqual(getGpuAddress(*srcCompressedBuffer), compressiblePattern.get(), bufferSize, 0, 0); expectMemory(dstHostPtr.get(), compressiblePattern.get(), bufferSize, 0, 0); srcCompressedBuffer.reset(); } template template void CopyEngineXeHPAndLater::givenDstHostPtrWhenBlitCommandFromNotCompressedBufferIsDispatchedThenCopiedDataIsValidImpl() { auto srcNotCompressedLocalBuffer = createBuffer(false, testLocalMemory, compressiblePattern.get()); // Buffer to HostPtr - uncompressed HBM -> System auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::BufferToHostPtr, *bcsCsr, srcNotCompressedLocalBuffer->getGraphicsAllocation(rootDeviceIndex), nullptr, dstHostPtr.get(), srcNotCompressedLocalBuffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(), 0, 0, 0, {bufferSize, 1, 1}, 0, 0, 0, 0); executeBlitCommand(blitProperties, false); bcsCsr->waitForTaskCountWithKmdNotifyFallback(0, 0, false, QueueThrottle::MEDIUM); expectMemory(getGpuAddress(*srcNotCompressedLocalBuffer), compressiblePattern.get(), bufferSize, 0, 0); expectMemory(dstHostPtr.get(), compressiblePattern.get(), bufferSize, 0, 0); } template template void CopyEngineXeHPAndLater::givenSrcHostPtrWhenBlitCommandToNotCompressedBufferIsDispatchedThenCopiedDataIsValidImpl() { auto buffer = createBuffer(false, testLocalMemory, nullptr); // HostPtr to Buffer - System -> uncompressed HBM auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::HostPtrToBuffer, *bcsCsr, buffer->getGraphicsAllocation(rootDeviceIndex), nullptr, compressiblePattern.get(), buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(), 0, 0, 0, {bufferSize, 1, 1}, 0, 0, 0, 0); executeBlitCommand(blitProperties, true); expectMemory(getGpuAddress(*buffer), compressiblePattern.get(), bufferSize, 0, 0); } template template void CopyEngineXeHPAndLater::givenBufferWithOffsetWhenHostPtrBlitCommandIsDispatchedFromHostPtrThenDataIsCorrectlyCopiedImpl() { auto buffer = createBuffer(false, testLocalMemory, nullptr); auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::HostPtrToBuffer, *bcsCsr, buffer->getGraphicsAllocation(rootDeviceIndex), nullptr, writePattern.get(), buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(), 0, 0, {offset, 0, 0}, {bufferSize - offset, 1, 1}, 0, 0, 0, 0); executeBlitCommand(blitProperties, true); expectMemoryNotEqual(getGpuAddress(*buffer), writePattern.get(), bufferSize, 0, 0); expectMemoryNotEqual(ptrOffset(getGpuAddress(*buffer), offset - 1), writePattern.get(), bufferSize - offset, 0, 0); expectMemory(ptrOffset(getGpuAddress(*buffer), offset), writePattern.get(), bufferSize - offset, 0, 0); } template template void CopyEngineXeHPAndLater::givenBufferWithOffsetWhenHostPtrBlitCommandIsDispatchedToHostPtrThenDataIsCorrectlyCopiedImpl() { auto srcBuffer = createBuffer(false, testLocalMemory, writePattern.get()); auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::BufferToHostPtr, *bcsCsr, srcBuffer->getGraphicsAllocation(rootDeviceIndex), nullptr, dstHostPtr.get(), srcBuffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(), 0, 0, {offset, 0, 0}, {bufferSize - offset, 1, 1}, 0, 0, 0, 0); executeBlitCommand(blitProperties, false); bcsCsr->waitForTaskCountWithKmdNotifyFallback(0, 0, false, QueueThrottle::MEDIUM); expectMemoryNotEqual(dstHostPtr.get(), writePattern.get(), offset, 0, 0); expectMemoryNotEqual(ptrOffset(dstHostPtr.get(), 1), ptrOffset(writePattern.get(), offset), bufferSize - offset - 1, 0, 0); expectMemory(dstHostPtr.get(), ptrOffset(writePattern.get(), offset), bufferSize - offset, 0, 0); } template template void CopyEngineXeHPAndLater::givenOffsetsWhenBltExecutedThenCopiedDataIsValidImpl() { size_t copiedSize = bufferSize - (2 * offset); auto srcBuffer = createBuffer(false, testLocalMemory, writePattern.get()); auto dstBuffer = createBuffer(false, testLocalMemory, nullptr); auto blitProperties = BlitProperties::constructPropertiesForCopy(dstBuffer->getGraphicsAllocation(rootDeviceIndex), srcBuffer->getGraphicsAllocation(rootDeviceIndex), {offset, 0, 0}, 0, {copiedSize, 1, 1}, 0, 0, 0, 0, bcsCsr->getClearColorAllocation()); executeBlitCommand(blitProperties, true); expectMemoryNotEqual(getGpuAddress(*dstBuffer), writePattern.get(), bufferSize, 0, 0); expectMemoryNotEqual(getGpuAddress(*dstBuffer), writePattern.get(), copiedSize + 1, 0, 0); expectMemoryNotEqual(ptrOffset(getGpuAddress(*dstBuffer), offset - 1), writePattern.get(), copiedSize, 0, 0); expectMemory(ptrOffset(getGpuAddress(*dstBuffer), offset), writePattern.get(), copiedSize, 0, 0); } template template void CopyEngineXeHPAndLater::givenSrcCompressedBufferWhenBlitCommandToDstCompressedBufferIsDispatchedThenCopiedDataIsValidImpl() { if (!compressionSupported()) { GTEST_SKIP(); } auto srcBuffer = createBuffer(true, testLocalMemory, nullptr); auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::HostPtrToBuffer, *bcsCsr, srcBuffer->getGraphicsAllocation(rootDeviceIndex), nullptr, compressiblePattern.get(), srcBuffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(), 0, 0, 0, {bufferSize, 1, 1}, 0, 0, 0, 0); executeBlitCommand(blitProperties, false); auto dstBuffer = createBuffer(true, testLocalMemory, nullptr); // Buffer to Buffer - compressed HBM -> compressed HBM blitProperties = BlitProperties::constructPropertiesForCopy(dstBuffer->getGraphicsAllocation(rootDeviceIndex), srcBuffer->getGraphicsAllocation(rootDeviceIndex), 0, 0, {bufferSize, 1, 1}, 0, 0, 0, 0, bcsCsr->getClearColorAllocation()); executeBlitCommand(blitProperties, true); blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::BufferToHostPtr, *bcsCsr, dstBuffer->getGraphicsAllocation(rootDeviceIndex), nullptr, dstHostPtr.get(), dstBuffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(), 0, 0, 0, {bufferSize, 1, 1}, 0, 0, 0, 0); executeBlitCommand(blitProperties, true); expectMemoryNotEqual(getGpuAddress(*dstBuffer), compressiblePattern.get(), bufferSize, 0, 0); expectMemory(dstHostPtr.get(), compressiblePattern.get(), bufferSize, 0, 0); } template template void CopyEngineXeHPAndLater::givenCompressedBufferWhenAuxTranslationCalledThenResolveAndCompressImpl() { if (this->context->getDevice(0u)->areSharedSystemAllocationsAllowed() || !compressionSupported()) { //no support for scenarios where stateless is mixed with blitter compression GTEST_SKIP(); } auto buffer = createBuffer(true, testLocalMemory, compressiblePattern.get()); { // initialized as compressed expectMemoryNotEqual(getGpuAddress(*buffer), compressiblePattern.get(), bufferSize, 0, 0); } { // resolve auto blitProperties = BlitProperties::constructPropertiesForAuxTranslation(AuxTranslationDirection::AuxToNonAux, buffer->getGraphicsAllocation(rootDeviceIndex), bcsCsr->getClearColorAllocation()); executeBlitCommand(blitProperties, false); bcsCsr->waitForTaskCountWithKmdNotifyFallback(0, 0, false, QueueThrottle::MEDIUM); expectMemory(getGpuAddress(*buffer), compressiblePattern.get(), bufferSize, 0, 0); } { // compress again auto blitProperties = BlitProperties::constructPropertiesForAuxTranslation(AuxTranslationDirection::NonAuxToAux, buffer->getGraphicsAllocation(rootDeviceIndex), bcsCsr->getClearColorAllocation()); executeBlitCommand(blitProperties, false); bcsCsr->waitForTaskCountWithKmdNotifyFallback(0, 0, false, QueueThrottle::MEDIUM); expectMemoryNotEqual(getGpuAddress(*buffer), compressiblePattern.get(), bufferSize, 0, 0); } } template template void CopyEngineXeHPAndLater::givenSrcSystemBufferWhenBlitCommandToDstSystemBufferIsDispatchedThenCopiedDataIsValidImpl() { auto srcBuffer = createBuffer(false, false, compressiblePattern.get()); auto dstBuffer = createBuffer(false, false, nullptr); // Buffer to Buffer - System -> System auto blitProperties = BlitProperties::constructPropertiesForCopy(dstBuffer->getGraphicsAllocation(rootDeviceIndex), srcBuffer->getGraphicsAllocation(rootDeviceIndex), 0, 0, {bufferSize, 1, 1}, 0, 0, 0, 0, bcsCsr->getClearColorAllocation()); executeBlitCommand(blitProperties, true); expectMemory(getGpuAddress(*dstBuffer), compressiblePattern.get(), bufferSize, 0, 0); } template template void CopyEngineXeHPAndLater::givenReadBufferRectWithOffsetWhenHostPtrBlitCommandIsDispatchedToHostPtrThenDataIsCorrectlyCopiedImpl() { auto srcMemory = std::make_unique(bufferSize); auto destMemory = std::make_unique(bufferSize); for (unsigned int i = 0; i < bufferSize; i++) { srcMemory[i] = static_cast(1); destMemory[i] = static_cast(2); } auto srcBuffer = createBuffer(false, testLocalMemory, srcMemory.get()); auto pSrcMemory = &srcMemory[0]; auto pDestMemory = &destMemory[0]; size_t hostOrigin[] = {0, 0, 0}; size_t bufferOrigin[] = {1 * sizeof(uint8_t), 1, 0}; size_t region[] = {2 * sizeof(uint8_t), 2, 1}; size_t hostRowPitch = 2 * sizeof(uint8_t); size_t hostSlicePitch = 4 * sizeof(uint8_t); size_t bufferRowPitch = 4 * sizeof(uint8_t); size_t bufferSlicePitch = 8 * sizeof(uint8_t); EXPECT_TRUE(srcBuffer->bufferRectPitchSet(bufferOrigin, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, true)); size_t hostPtrSize = Buffer::calculateHostPtrSize(hostOrigin, region, hostRowPitch, hostSlicePitch); HostPtrSurface hostPtrSurface(destMemory.get(), hostPtrSize, true); bcsCsr->createAllocationForHostSurface(hostPtrSurface, true); GraphicsAllocation *allocation = hostPtrSurface.getAllocation(); auto srcAllocation = srcBuffer->getGraphicsAllocation(rootDeviceIndex); auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::BufferToHostPtr, //blitDirection *bcsCsr, //commandStreamReceiver srcAllocation, //memObjAllocation allocation, //preallocatedHostAllocation pDestMemory, //hostPtr srcAllocation->getGpuAddress(), //memObjGpuVa allocation->getGpuAddress(), //hostAllocGpuVa hostOrigin, //hostPtrOffset bufferOrigin, //copyOffset region, //copySize hostRowPitch, //hostRowPitch hostSlicePitch, //hostSlicePitch bufferRowPitch, //gpuRowPitch bufferSlicePitch); //gpuSlicePitch executeBlitCommand(blitProperties, false); bcsCsr->waitForTaskCountWithKmdNotifyFallback(0, 0, false, QueueThrottle::MEDIUM); pSrcMemory = ptrOffset(pSrcMemory, 0); int *destGpuAddress = reinterpret_cast(allocation->getGpuAddress()); expectMemoryNotEqual(destGpuAddress, pSrcMemory, hostPtrSize + 1, 0, 0); expectMemory(destGpuAddress, pSrcMemory, hostPtrSize, 0, 0); } template template void CopyEngineXeHPAndLater::givenWriteBufferRectWithOffsetWhenHostPtrBlitCommandIsDispatchedToHostPtrThenDataIsCorrectlyCopiedImpl() { auto srcMemory = std::make_unique(bufferSize); auto destMemory = std::make_unique(bufferSize); for (unsigned int i = 0; i < bufferSize; i++) { srcMemory[i] = static_cast(1); destMemory[i] = static_cast(2); } auto srcBuffer = createBuffer(false, testLocalMemory, destMemory.get()); auto pSrcMemory = &srcMemory[0]; auto pDestMemory = &destMemory[0]; size_t hostOrigin[] = {0, 0, 0}; size_t bufferOrigin[] = {1 * sizeof(uint8_t), 1, 0}; size_t region[] = {2 * sizeof(uint8_t), 2, 1}; size_t hostRowPitch = 2 * sizeof(uint8_t); size_t hostSlicePitch = 4 * sizeof(uint8_t); size_t bufferRowPitch = 4 * sizeof(uint8_t); size_t bufferSlicePitch = 8 * sizeof(uint8_t); EXPECT_TRUE(srcBuffer->bufferRectPitchSet(bufferOrigin, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, true)); size_t hostPtrSize = Buffer::calculateHostPtrSize(hostOrigin, region, hostRowPitch, hostSlicePitch); HostPtrSurface hostPtrSurface(srcMemory.get(), hostPtrSize, true); bcsCsr->createAllocationForHostSurface(hostPtrSurface, true); GraphicsAllocation *allocation = hostPtrSurface.getAllocation(); auto srcAllocation = srcBuffer->getGraphicsAllocation(rootDeviceIndex); auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::HostPtrToBuffer, //blitDirection *bcsCsr, //commandStreamReceiver srcAllocation, //memObjAllocation allocation, //preallocatedHostAllocation pDestMemory, //hostPtr srcAllocation->getGpuAddress(), //memObjGpuVa allocation->getGpuAddress(), //hostAllocGpuVa hostOrigin, //hostPtrOffset bufferOrigin, //copyOffset region, //copySize hostRowPitch, //hostRowPitch hostSlicePitch, //hostSlicePitch bufferRowPitch, //gpuRowPitch bufferSlicePitch); //gpuSlicePitch executeBlitCommand(blitProperties, false); bcsCsr->waitForTaskCountWithKmdNotifyFallback(0, 0, false, QueueThrottle::MEDIUM); pSrcMemory = ptrOffset(pSrcMemory, 0); int *destGpuAddress = reinterpret_cast(allocation->getGpuAddress()); expectMemoryNotEqual(destGpuAddress, pSrcMemory, hostPtrSize + 1, 0, 0); expectMemory(destGpuAddress, pSrcMemory, hostPtrSize, 0, 0); } template template void CopyEngineXeHPAndLater::givenCopyBufferRectWithOffsetWhenHostPtrBlitCommandIsDispatchedToHostPtrThenDataIsCorrectlyCopiedImpl() { auto srcMemory = std::make_unique(bufferSize); auto destMemory = std::make_unique(bufferSize); for (unsigned int i = 0; i < bufferSize; i++) { srcMemory[i] = static_cast(1); destMemory[i] = static_cast(2); } auto srcBuffer = createBuffer(false, testLocalMemory, srcMemory.get()); auto dstBuffer = createBuffer(false, testLocalMemory, destMemory.get()); auto pSrcMemory = &srcMemory[0]; auto pDestMemory = reinterpret_cast(dstBuffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress()); auto clearColorAllocation = bcsCsr->getClearColorAllocation(); size_t srcOrigin[] = {0, 0, 0}; size_t dstOrigin[] = {1 * sizeof(uint8_t), 0, 0}; size_t region[] = {2 * sizeof(uint8_t), 2, 2}; size_t srcRowPitch = region[0]; size_t srcSlicePitch = srcRowPitch * region[1]; size_t dstRowPitch = region[0]; size_t dstSlicePitch = dstRowPitch * region[1]; auto copySize = region[0] * region[1] * region[2]; auto blitProperties = BlitProperties::constructPropertiesForCopy(dstBuffer->getGraphicsAllocation(rootDeviceIndex), //dstAllocation srcBuffer->getGraphicsAllocation(rootDeviceIndex), //srcAllocation dstOrigin, //dstOffset srcOrigin, //srcOffset region, //copySize srcRowPitch, //srcRowPitch srcSlicePitch, //srcSlicePitch dstRowPitch, //dstRowPitch dstSlicePitch, //dstSlicePitch clearColorAllocation); //clearColorAllocation executeBlitCommand(blitProperties, false); bcsCsr->waitForTaskCountWithKmdNotifyFallback(0, 0, false, QueueThrottle::MEDIUM); pSrcMemory = ptrOffset(pSrcMemory, 0); expectMemoryNotEqual(ptrOffset(pDestMemory, dstOrigin[0]), pSrcMemory, copySize + 1, 0, 0); expectMemory(ptrOffset(pDestMemory, dstOrigin[0]), pSrcMemory, copySize, 0, 0); } template template void CopyEngineXeHPAndLater::givenCopyBufferRectWithBigSizesWhenHostPtrBlitCommandIsDispatchedToHostPtrThenDataIsCorrectlyCopiedImpl() { DebugManagerStateRestore restore; DebugManager.flags.LimitBlitterMaxWidth.set(8); DebugManager.flags.LimitBlitterMaxHeight.set(8); auto srcMemory = std::make_unique(bufferSize); auto destMemory = std::make_unique(bufferSize); for (unsigned int i = 0; i < bufferSize; i++) { srcMemory[i] = static_cast(1); destMemory[i] = static_cast(2); } auto srcBuffer = createBuffer(false, testLocalMemory, srcMemory.get()); auto dstBuffer = createBuffer(false, testLocalMemory, destMemory.get()); auto pSrcMemory = &srcMemory[0]; auto pDestMemory = reinterpret_cast(dstBuffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress()); auto clearColorAllocation = bcsCsr->getClearColorAllocation(); size_t srcOrigin[] = {0, 0, 0}; size_t dstOrigin[] = {1, 1, 1}; size_t region[] = {20, 16, 2}; size_t srcRowPitch = region[0]; size_t srcSlicePitch = srcRowPitch * region[1]; size_t dstRowPitch = region[0]; size_t dstSlicePitch = dstRowPitch * region[1]; auto copySize = region[0] * region[1] * region[2]; auto blitProperties = BlitProperties::constructPropertiesForCopy(dstBuffer->getGraphicsAllocation(rootDeviceIndex), //dstAllocation srcBuffer->getGraphicsAllocation(rootDeviceIndex), //srcAllocation dstOrigin, //dstOffset srcOrigin, //srcOffset region, //copySize srcRowPitch, //srcRowPitch srcSlicePitch, //srcSlicePitch dstRowPitch, //dstRowPitch dstSlicePitch, //dstSlicePitch clearColorAllocation); //clearColorAllocation executeBlitCommand(blitProperties, false); bcsCsr->waitForTaskCountWithKmdNotifyFallback(0, 0, false, QueueThrottle::MEDIUM); size_t dstOffset = dstOrigin[0] + dstOrigin[1] * dstRowPitch + dstOrigin[2] * dstSlicePitch; expectMemoryNotEqual(ptrOffset(pDestMemory, dstOffset), pSrcMemory, copySize + 1, 0, 0); expectMemory(ptrOffset(pDestMemory, dstOffset), pSrcMemory, copySize, 0, 0); } mi_math_aub_tests_dg2_and_later.cpp000066400000000000000000000747311422164147700355060ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/command_stream/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/register_offsets.h" #include "shared/test/common/helpers/dispatch_flags_helper.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/aub_tests/fixtures/aub_fixture.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" namespace NEO { enum class NewAluOpcodes : uint32_t { OPCODE_LOAD = 0x080, OPCODE_LOAD0 = 0x081, OPCODE_LOAD1 = 0x481, OPCODE_LOADIND = 0x082, OPCODE_STOREIND = 0x181, OPCODE_SHL = 0x105, OPCODE_SHR = 0x106, OPCODE_SAR = 0x107, OPCODE_FENCE = 0x001 }; struct MiMath : public AUBFixture, public ::testing::Test { void SetUp() override { AUBFixture::SetUp(defaultHwInfo.get()); streamAllocation = this->device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), MemoryConstants::pageSize, AllocationType::COMMAND_BUFFER, device->getDeviceBitfield()}); taskStream = std::make_unique(streamAllocation); } void TearDown() override { this->device->getMemoryManager()->freeGraphicsMemory(streamAllocation); AUBFixture::TearDown(); } void flushStream() { DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.guardCommandBufferWithPipeControl = true; csr->flushTask(*taskStream, 0, &csr->getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 0u), &csr->getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 0u), &csr->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0u), 0u, dispatchFlags, device->getDevice()); csr->flushBatchedSubmissions(); } uint32_t getPartOfGPUAddress(uint64_t address, bool lowPart) { constexpr uint32_t shift = 32u; constexpr uint32_t mask = 0xffffffff; if (lowPart) { return static_cast(address & mask); } else { return static_cast(address >> shift); } } template void loadValueToRegister(int32_t value, int32_t reg) { using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; MI_LOAD_REGISTER_IMM cmd = FamilyType::cmdInitLoadRegisterImm; cmd.setDataDword(value); cmd.setRegisterOffset(reg); cmd.setMmioRemapEnable(1); auto buffer = taskStream->getSpace(sizeof(MI_LOAD_REGISTER_IMM)); *static_cast(buffer) = cmd; } template void storeValueInRegisterToMemory(int64_t address, int32_t reg) { using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM; MI_STORE_REGISTER_MEM cmd2 = FamilyType::cmdInitStoreRegisterMem; cmd2.setRegisterAddress(reg); cmd2.setMemoryAddress(address); cmd2.setMmioRemapEnable(1); auto buffer2 = taskStream->getSpace(sizeof(MI_STORE_REGISTER_MEM)); *static_cast(buffer2) = cmd2; } template void loadAddressToRegisters(uint32_t registerWithLowPart, uint32_t registerWithHighPart, uint32_t registerWithShift, uint64_t address) { loadValueToRegister(getPartOfGPUAddress(address, true), registerWithLowPart); // low part to R0 loadValueToRegister(getPartOfGPUAddress(address, false), registerWithHighPart); // high part to R1 loadValueToRegister(32u, registerWithShift); // value to shift address } template void loadAddressToMiMathAccu(uint32_t lowAddressRegister, uint32_t highAddressRegister, uint32_t shiftReg) { using MI_MATH_ALU_INST_INLINE = typename FamilyType::MI_MATH_ALU_INST_INLINE; MI_MATH_ALU_INST_INLINE *pAluParam = reinterpret_cast(taskStream->getSpace(numberOfOperationToLoadAddressToMiMathAccu * sizeof(MI_MATH_ALU_INST_INLINE))); pAluParam->DW0.BitField.ALUOpcode = static_cast(AluRegisters::OPCODE_LOAD); // load high part of address from register with older to SRCA pAluParam->DW0.BitField.Operand1 = static_cast(AluRegisters::R_SRCA); pAluParam->DW0.BitField.Operand2 = highAddressRegister; pAluParam++; pAluParam->DW0.BitField.ALUOpcode = static_cast(AluRegisters::OPCODE_LOAD); // load 32 - value from shiftReg , to SRCB (to shift high part in register) pAluParam->DW0.BitField.Operand1 = static_cast(AluRegisters::R_SRCB); pAluParam->DW0.BitField.Operand2 = shiftReg; pAluParam++; pAluParam->DW0.BitField.ALUOpcode = static_cast(NewAluOpcodes::OPCODE_SHL); // shift high part pAluParam->DW0.BitField.Operand1 = 0; pAluParam->DW0.BitField.Operand2 = 0; pAluParam++; pAluParam->DW0.BitField.ALUOpcode = static_cast(AluRegisters::OPCODE_STORE); // move result to highAddressRegister pAluParam->DW0.BitField.Operand1 = highAddressRegister; pAluParam->DW0.BitField.Operand2 = static_cast(AluRegisters::R_ACCU); pAluParam++; pAluParam->DW0.BitField.ALUOpcode = static_cast(AluRegisters::OPCODE_LOAD); // load highAddressRegister to SRCA pAluParam->DW0.BitField.Operand1 = static_cast(AluRegisters::R_SRCA); pAluParam->DW0.BitField.Operand2 = highAddressRegister; pAluParam++; pAluParam->DW0.BitField.ALUOpcode = static_cast(AluRegisters::OPCODE_LOAD); // load low part of address to SRCB pAluParam->DW0.BitField.Operand1 = static_cast(AluRegisters::R_SRCB); pAluParam->DW0.BitField.Operand2 = lowAddressRegister; pAluParam++; pAluParam->DW0.BitField.ALUOpcode = static_cast(AluRegisters::OPCODE_OR); // join parts of address and locate in ACCU pAluParam->DW0.BitField.Operand1 = 0; pAluParam->DW0.BitField.Operand2 = 0; pAluParam++; } static constexpr size_t bufferSize = MemoryConstants::pageSize; const uint32_t numberOfOperationToLoadAddressToMiMathAccu = 7; std::unique_ptr taskStream; GraphicsAllocation *streamAllocation = nullptr; }; using MatcherIsDg2OrPvc = IsWithinProducts; HWTEST2_F(MiMath, givenLoadIndirectFromMemoryWhenUseMiMathToSimpleOperationThenStoreStateOfRegisterInirectToMemory, MatcherIsDg2OrPvc) { using MI_MATH = typename FamilyType::MI_MATH; using MI_MATH_ALU_INST_INLINE = typename FamilyType::MI_MATH_ALU_INST_INLINE; uint64_t bufferMemory[bufferSize] = {}; bufferMemory[0] = 1u; cl_int retVal = CL_SUCCESS; auto buffer = std::unique_ptr(Buffer::create(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, bufferSize, bufferMemory, retVal)); ASSERT_NE(nullptr, buffer); EXPECT_EQ(CL_SUCCESS, retVal); auto allocation = buffer->getGraphicsAllocation(rootDeviceIndex); csr->makeResident(*allocation); uint32_t valueToAdd = 5u; uint64_t valueAfterMiMathOperation = bufferMemory[0] + valueToAdd; loadAddressToRegisters(CS_GPR_R0, CS_GPR_R1, CS_GPR_R2, allocation->getGpuAddress()); // prepare registers to mi_math operation loadValueToRegister(valueToAdd, CS_GPR_R3); auto pCmd = reinterpret_cast(taskStream->getSpace(sizeof(MI_MATH))); reinterpret_cast(pCmd)->DW0.Value = 0x0; reinterpret_cast(pCmd)->DW0.BitField.InstructionType = MI_MATH::COMMAND_TYPE_MI_COMMAND; reinterpret_cast(pCmd)->DW0.BitField.InstructionOpcode = MI_MATH::MI_COMMAND_OPCODE_MI_MATH; reinterpret_cast(pCmd)->DW0.BitField.DwordLength = numberOfOperationToLoadAddressToMiMathAccu + 13 - 1; loadAddressToMiMathAccu(static_cast(AluRegisters::R_0), static_cast(AluRegisters::R_1), static_cast(AluRegisters::R_2)); // GPU address of buffer load to ACCU register MI_MATH_ALU_INST_INLINE *pAluParam = reinterpret_cast(taskStream->getSpace(13 * sizeof(MI_MATH_ALU_INST_INLINE))); pAluParam->DW0.BitField.ALUOpcode = static_cast(NewAluOpcodes::OPCODE_FENCE); // to be sure that all writes and reads are completed pAluParam->DW0.BitField.Operand1 = 0; pAluParam->DW0.BitField.Operand2 = 0; pAluParam++; pAluParam->DW0.BitField.ALUOpcode = static_cast(NewAluOpcodes::OPCODE_LOADIND); // load dword from memory address located in ACCU pAluParam->DW0.BitField.Operand1 = static_cast(AluRegisters::R_0); pAluParam->DW0.BitField.Operand2 = static_cast(AluRegisters::R_ACCU); pAluParam++; pAluParam->DW0.BitField.ALUOpcode = static_cast(NewAluOpcodes::OPCODE_FENCE); // to be sure that all writes and reads are completed pAluParam->DW0.BitField.Operand1 = 0; pAluParam->DW0.BitField.Operand2 = 0; pAluParam++; pAluParam->DW0.BitField.ALUOpcode = static_cast(AluRegisters::OPCODE_STORE); // copy address from ACCU to R2 pAluParam->DW0.BitField.Operand1 = static_cast(AluRegisters::R_2); pAluParam->DW0.BitField.Operand2 = static_cast(AluRegisters::R_ACCU); pAluParam++; pAluParam->DW0.BitField.ALUOpcode = static_cast(AluRegisters::OPCODE_LOAD); // R0 to SRCA pAluParam->DW0.BitField.Operand1 = static_cast(AluRegisters::R_SRCA); pAluParam->DW0.BitField.Operand2 = static_cast(AluRegisters::R_0); pAluParam++; pAluParam->DW0.BitField.ALUOpcode = static_cast(AluRegisters::OPCODE_LOAD); // R3 to SRCB where is value of 'valueToAdd' pAluParam->DW0.BitField.Operand1 = static_cast(AluRegisters::R_SRCB); pAluParam->DW0.BitField.Operand2 = static_cast(AluRegisters::R_3); pAluParam++; pAluParam->DW0.BitField.ALUOpcode = static_cast(AluRegisters::OPCODE_ADD); // do simple add on registers SRCA and SRCB pAluParam->DW0.BitField.Operand1 = 0; pAluParam->DW0.BitField.Operand2 = 0; pAluParam++; pAluParam->DW0.BitField.ALUOpcode = static_cast(AluRegisters::OPCODE_STORE); // R3 to SRCB where is value of 'valueToAdd' pAluParam->DW0.BitField.Operand1 = static_cast(AluRegisters::R_1); pAluParam->DW0.BitField.Operand2 = static_cast(AluRegisters::R_ACCU); pAluParam++; pAluParam->DW0.BitField.ALUOpcode = static_cast(AluRegisters::OPCODE_LOAD); // load address from R2 where is copy of address to SRCA pAluParam->DW0.BitField.Operand1 = static_cast(AluRegisters::R_SRCA); pAluParam->DW0.BitField.Operand2 = static_cast(AluRegisters::R_2); pAluParam++; pAluParam->DW0.BitField.ALUOpcode = static_cast(NewAluOpcodes::OPCODE_LOAD0); pAluParam->DW0.BitField.Operand1 = static_cast(AluRegisters::R_SRCB); pAluParam->DW0.BitField.Operand2 = 0; pAluParam++; pAluParam->DW0.BitField.ALUOpcode = static_cast(AluRegisters::OPCODE_ADD); // move address to ACCU pAluParam->DW0.BitField.Operand1 = 0; pAluParam->DW0.BitField.Operand2 = 0; pAluParam++; pAluParam->DW0.BitField.ALUOpcode = static_cast(NewAluOpcodes::OPCODE_FENCE); // to be sure that all writes and reads are completed pAluParam->DW0.BitField.Operand1 = 0; pAluParam->DW0.BitField.Operand2 = 0; pAluParam++; pAluParam->DW0.BitField.ALUOpcode = static_cast(NewAluOpcodes::OPCODE_STOREIND); // store to memory from ACCU, value from register R1 pAluParam->DW0.BitField.Operand1 = static_cast(AluRegisters::R_ACCU); pAluParam->DW0.BitField.Operand2 = static_cast(AluRegisters::R_1); pAluParam++; flushStream(); expectMemory(reinterpret_cast(allocation->getGpuAddress()), &valueAfterMiMathOperation, sizeof(valueAfterMiMathOperation)); } HWTEST2_F(MiMath, givenLoadIndirectFromMemoryWhenUseMiMathThenStoreIndirectToAnotherMemory, MatcherIsDg2OrPvc) { using MI_MATH = typename FamilyType::MI_MATH; using MI_MATH_ALU_INST_INLINE = typename FamilyType::MI_MATH_ALU_INST_INLINE; uint64_t bufferMemory[bufferSize] = {}; bufferMemory[0] = 1u; uint64_t bufferBMemory[bufferSize] = {}; cl_int retVal = CL_SUCCESS; auto buffer = std::unique_ptr(Buffer::create(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, bufferSize, bufferMemory, retVal)); ASSERT_NE(nullptr, buffer); EXPECT_EQ(CL_SUCCESS, retVal); auto bufferB = std::unique_ptr(Buffer::create(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, bufferSize, bufferBMemory, retVal)); ASSERT_NE(nullptr, buffer); EXPECT_EQ(CL_SUCCESS, retVal); csr->makeResident(*buffer->getGraphicsAllocation(rootDeviceIndex)); csr->makeResident(*bufferB->getGraphicsAllocation(rootDeviceIndex)); loadAddressToRegisters(CS_GPR_R0, CS_GPR_R1, CS_GPR_R2, buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress()); // prepare registers to mi_math operation loadAddressToRegisters(CS_GPR_R3, CS_GPR_R4, CS_GPR_R2, bufferB->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress()); // prepare registers to mi_math operation auto pCmd = reinterpret_cast(taskStream->getSpace(sizeof(MI_MATH))); reinterpret_cast(pCmd)->DW0.Value = 0x0; reinterpret_cast(pCmd)->DW0.BitField.InstructionType = MI_MATH::COMMAND_TYPE_MI_COMMAND; reinterpret_cast(pCmd)->DW0.BitField.InstructionOpcode = MI_MATH::MI_COMMAND_OPCODE_MI_MATH; reinterpret_cast(pCmd)->DW0.BitField.DwordLength = numberOfOperationToLoadAddressToMiMathAccu * 2 + 6 - 1; loadAddressToMiMathAccu(static_cast(AluRegisters::R_0), static_cast(AluRegisters::R_1), static_cast(AluRegisters::R_2)); // GPU address of buffer load to ACCU register MI_MATH_ALU_INST_INLINE *pAluParam = reinterpret_cast(taskStream->getSpace(4 * sizeof(MI_MATH_ALU_INST_INLINE))); pAluParam->DW0.BitField.ALUOpcode = static_cast(NewAluOpcodes::OPCODE_FENCE); // to be sure that all writes and reads are completed pAluParam->DW0.BitField.Operand1 = 0; pAluParam->DW0.BitField.Operand2 = 0; pAluParam++; pAluParam->DW0.BitField.ALUOpcode = static_cast(NewAluOpcodes::OPCODE_LOADIND); // load dword from memory address located in ACCU to R0 pAluParam->DW0.BitField.Operand1 = static_cast(AluRegisters::R_0); pAluParam->DW0.BitField.Operand2 = static_cast(AluRegisters::R_ACCU); pAluParam++; pAluParam->DW0.BitField.ALUOpcode = static_cast(NewAluOpcodes::OPCODE_FENCE); // to be sure that all writes and reads are completed pAluParam->DW0.BitField.Operand1 = 0; pAluParam->DW0.BitField.Operand2 = 0; pAluParam++; loadAddressToMiMathAccu(static_cast(AluRegisters::R_3), static_cast(AluRegisters::R_4), static_cast(AluRegisters::R_2)); // GPU address of bufferB load to ACCU register pAluParam = reinterpret_cast(taskStream->getSpace(2 * sizeof(MI_MATH_ALU_INST_INLINE))); pAluParam->DW0.BitField.ALUOpcode = static_cast(NewAluOpcodes::OPCODE_FENCE); // to be sure that all writes and reads are completed pAluParam->DW0.BitField.Operand1 = 0; pAluParam->DW0.BitField.Operand2 = 0; pAluParam++; pAluParam->DW0.BitField.ALUOpcode = static_cast(NewAluOpcodes::OPCODE_STOREIND); // store to memory from ACCU, value from register R0 pAluParam->DW0.BitField.Operand1 = static_cast(AluRegisters::R_ACCU); pAluParam->DW0.BitField.Operand2 = static_cast(AluRegisters::R_0); pAluParam++; pAluParam->DW0.BitField.ALUOpcode = static_cast(NewAluOpcodes::OPCODE_FENCE); // to be sure that all writes and reads are completed pAluParam->DW0.BitField.Operand1 = 0; pAluParam->DW0.BitField.Operand2 = 0; pAluParam++; flushStream(); expectMemory(reinterpret_cast(bufferB->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress()), bufferMemory, sizeof(uint64_t)); } HWTEST2_F(MiMath, givenValueToMakeLeftLogicalShiftWhenUseMiMathThenShiftIsDoneProperly, MatcherIsDg2OrPvc) { using MI_MATH = typename FamilyType::MI_MATH; using MI_MATH_ALU_INST_INLINE = typename FamilyType::MI_MATH_ALU_INST_INLINE; uint64_t bufferMemory[bufferSize] = {}; bufferMemory[0] = 1u; cl_int retVal = CL_SUCCESS; auto buffer = std::unique_ptr(Buffer::create(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, bufferSize, bufferMemory, retVal)); ASSERT_NE(nullptr, buffer); EXPECT_EQ(CL_SUCCESS, retVal); csr->makeResident(*buffer->getGraphicsAllocation(rootDeviceIndex)); uint32_t value = 1u; uint32_t shift = 2u; uint32_t notPowerOfTwoShift = 5u; uint32_t expectedUsedShift = 4u; loadValueToRegister(value, CS_GPR_R0); loadValueToRegister(shift, CS_GPR_R1); loadValueToRegister(notPowerOfTwoShift, CS_GPR_R2); auto pCmd = reinterpret_cast(taskStream->getSpace(sizeof(MI_MATH))); reinterpret_cast(pCmd)->DW0.Value = 0x0; reinterpret_cast(pCmd)->DW0.BitField.InstructionType = MI_MATH::COMMAND_TYPE_MI_COMMAND; reinterpret_cast(pCmd)->DW0.BitField.InstructionOpcode = MI_MATH::MI_COMMAND_OPCODE_MI_MATH; reinterpret_cast(pCmd)->DW0.BitField.DwordLength = 7 - 1; MI_MATH_ALU_INST_INLINE *pAluParam = reinterpret_cast(taskStream->getSpace(7 * sizeof(MI_MATH_ALU_INST_INLINE))); pAluParam->DW0.BitField.ALUOpcode = static_cast(AluRegisters::OPCODE_LOAD); // load value from R0 to SRCA pAluParam->DW0.BitField.Operand1 = static_cast(AluRegisters::R_SRCA); pAluParam->DW0.BitField.Operand2 = static_cast(AluRegisters::R_0); pAluParam++; pAluParam->DW0.BitField.ALUOpcode = static_cast(AluRegisters::OPCODE_LOAD); // load value to shift to SRCB pAluParam->DW0.BitField.Operand1 = static_cast(AluRegisters::R_SRCB); pAluParam->DW0.BitField.Operand2 = static_cast(AluRegisters::R_1); pAluParam++; pAluParam->DW0.BitField.ALUOpcode = static_cast(NewAluOpcodes::OPCODE_SHL); // load value to shift to SRCB pAluParam->DW0.BitField.Operand1 = 0; pAluParam->DW0.BitField.Operand2 = 0; pAluParam++; pAluParam->DW0.BitField.ALUOpcode = static_cast(AluRegisters::OPCODE_STORE); // load value to shift to SRCB pAluParam->DW0.BitField.Operand1 = static_cast(AluRegisters::R_1); pAluParam->DW0.BitField.Operand2 = static_cast(AluRegisters::R_ACCU); pAluParam++; pAluParam->DW0.BitField.ALUOpcode = static_cast(AluRegisters::OPCODE_LOAD); // load value to shift to SRCB pAluParam->DW0.BitField.Operand1 = static_cast(AluRegisters::R_SRCB); pAluParam->DW0.BitField.Operand2 = static_cast(AluRegisters::R_2); pAluParam++; pAluParam->DW0.BitField.ALUOpcode = static_cast(NewAluOpcodes::OPCODE_SHL); // load value to shift to SRCB pAluParam->DW0.BitField.Operand1 = 0; pAluParam->DW0.BitField.Operand2 = 0; pAluParam++; pAluParam->DW0.BitField.ALUOpcode = static_cast(AluRegisters::OPCODE_STORE); // load value to shift to SRCB pAluParam->DW0.BitField.Operand1 = static_cast(AluRegisters::R_2); pAluParam->DW0.BitField.Operand2 = static_cast(AluRegisters::R_ACCU); pAluParam++; storeValueInRegisterToMemory(buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(), CS_GPR_R1); storeValueInRegisterToMemory(buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress() + 4, CS_GPR_R2); flushStream(); uint32_t firstShift = value << shift; uint32_t secondShift = value << notPowerOfTwoShift; uint32_t executeSecondShift = value << expectedUsedShift; expectMemory(reinterpret_cast(buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress()), &firstShift, sizeof(firstShift)); expectNotEqualMemory(reinterpret_cast(buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress() + 4), &secondShift, sizeof(secondShift)); expectMemory(reinterpret_cast(buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress() + 4), &executeSecondShift, sizeof(executeSecondShift)); } HWTEST2_F(MiMath, givenValueToMakeRightLogicalShiftWhenUseMiMathThenShiftIsDoneProperly, MatcherIsDg2OrPvc) { using MI_MATH = typename FamilyType::MI_MATH; using MI_MATH_ALU_INST_INLINE = typename FamilyType::MI_MATH_ALU_INST_INLINE; uint64_t bufferMemory[bufferSize] = {}; bufferMemory[0] = 1u; cl_int retVal = CL_SUCCESS; auto buffer = std::unique_ptr(Buffer::create(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, bufferSize, bufferMemory, retVal)); ASSERT_NE(nullptr, buffer); EXPECT_EQ(CL_SUCCESS, retVal); auto allocation = buffer->getGraphicsAllocation(rootDeviceIndex); csr->makeResident(*allocation); uint32_t value = 32u; uint32_t shift = 2u; uint32_t notPowerOfTwoShift = 5u; uint32_t expectedUsedShift = 4u; loadValueToRegister(value, CS_GPR_R0); loadValueToRegister(shift, CS_GPR_R1); loadValueToRegister(notPowerOfTwoShift, CS_GPR_R2); auto pCmd = reinterpret_cast(taskStream->getSpace(sizeof(MI_MATH))); reinterpret_cast(pCmd)->DW0.Value = 0x0; reinterpret_cast(pCmd)->DW0.BitField.InstructionType = MI_MATH::COMMAND_TYPE_MI_COMMAND; reinterpret_cast(pCmd)->DW0.BitField.InstructionOpcode = MI_MATH::MI_COMMAND_OPCODE_MI_MATH; reinterpret_cast(pCmd)->DW0.BitField.DwordLength = 7 - 1; MI_MATH_ALU_INST_INLINE *pAluParam = reinterpret_cast(taskStream->getSpace(7 * sizeof(MI_MATH_ALU_INST_INLINE))); pAluParam->DW0.BitField.ALUOpcode = static_cast(AluRegisters::OPCODE_LOAD); // load value from R0 to SRCA pAluParam->DW0.BitField.Operand1 = static_cast(AluRegisters::R_SRCA); pAluParam->DW0.BitField.Operand2 = static_cast(AluRegisters::R_0); pAluParam++; pAluParam->DW0.BitField.ALUOpcode = static_cast(AluRegisters::OPCODE_LOAD); // load value to shift to SRCB pAluParam->DW0.BitField.Operand1 = static_cast(AluRegisters::R_SRCB); pAluParam->DW0.BitField.Operand2 = static_cast(AluRegisters::R_1); pAluParam++; pAluParam->DW0.BitField.ALUOpcode = static_cast(NewAluOpcodes::OPCODE_SHR); // load value to shift to SRCB pAluParam->DW0.BitField.Operand1 = 0; pAluParam->DW0.BitField.Operand2 = 0; pAluParam++; pAluParam->DW0.BitField.ALUOpcode = static_cast(AluRegisters::OPCODE_STORE); // load value to shift to SRCB pAluParam->DW0.BitField.Operand1 = static_cast(AluRegisters::R_1); pAluParam->DW0.BitField.Operand2 = static_cast(AluRegisters::R_ACCU); pAluParam++; pAluParam->DW0.BitField.ALUOpcode = static_cast(AluRegisters::OPCODE_LOAD); // load value to shift to SRCB pAluParam->DW0.BitField.Operand1 = static_cast(AluRegisters::R_SRCB); pAluParam->DW0.BitField.Operand2 = static_cast(AluRegisters::R_2); pAluParam++; pAluParam->DW0.BitField.ALUOpcode = static_cast(NewAluOpcodes::OPCODE_SHR); // load value to shift to SRCB pAluParam->DW0.BitField.Operand1 = 0; pAluParam->DW0.BitField.Operand2 = 0; pAluParam++; pAluParam->DW0.BitField.ALUOpcode = static_cast(AluRegisters::OPCODE_STORE); // load value to shift to SRCB pAluParam->DW0.BitField.Operand1 = static_cast(AluRegisters::R_2); pAluParam->DW0.BitField.Operand2 = static_cast(AluRegisters::R_ACCU); pAluParam++; storeValueInRegisterToMemory(allocation->getGpuAddress(), CS_GPR_R1); storeValueInRegisterToMemory(allocation->getGpuAddress() + 4, CS_GPR_R2); flushStream(); uint32_t firstShift = value >> shift; uint32_t secondShift = value >> notPowerOfTwoShift; uint32_t executeSecondShift = value >> expectedUsedShift; expectMemory(reinterpret_cast(allocation->getGpuAddress()), &firstShift, sizeof(firstShift)); expectNotEqualMemory(reinterpret_cast(allocation->getGpuAddress() + 4), &secondShift, sizeof(secondShift)); expectMemory(reinterpret_cast(allocation->getGpuAddress() + 4), &executeSecondShift, sizeof(executeSecondShift)); } HWTEST2_F(MiMath, givenValueToMakeRightAritmeticShiftWhenUseMiMathThenShiftIsDoneProperly, MatcherIsDg2OrPvc) { using MI_MATH = typename FamilyType::MI_MATH; using MI_MATH_ALU_INST_INLINE = typename FamilyType::MI_MATH_ALU_INST_INLINE; int64_t bufferMemory[bufferSize] = {}; bufferMemory[0] = -32; cl_int retVal = CL_SUCCESS; auto buffer = std::unique_ptr(Buffer::create(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, bufferSize, bufferMemory, retVal)); ASSERT_NE(nullptr, buffer); EXPECT_EQ(CL_SUCCESS, retVal); auto allocation = buffer->getGraphicsAllocation(rootDeviceIndex); csr->makeResident(*allocation); uint32_t shift = 2u; uint32_t notPowerOfTwoShift = 5u; uint32_t expectedUsedShift = 4u; loadAddressToRegisters(CS_GPR_R0, CS_GPR_R1, CS_GPR_R2, allocation->getGpuAddress()); // prepare registers to mi_math operation loadValueToRegister(shift, CS_GPR_R4); loadValueToRegister(notPowerOfTwoShift, CS_GPR_R5); auto pCmd = reinterpret_cast(taskStream->getSpace(sizeof(MI_MATH))); reinterpret_cast(pCmd)->DW0.Value = 0x0; reinterpret_cast(pCmd)->DW0.BitField.InstructionType = MI_MATH::COMMAND_TYPE_MI_COMMAND; reinterpret_cast(pCmd)->DW0.BitField.InstructionOpcode = MI_MATH::MI_COMMAND_OPCODE_MI_MATH; reinterpret_cast(pCmd)->DW0.BitField.DwordLength = numberOfOperationToLoadAddressToMiMathAccu + 9 - 1; loadAddressToMiMathAccu(static_cast(AluRegisters::R_0), static_cast(AluRegisters::R_1), static_cast(AluRegisters::R_2)); // GPU address of buffer load to ACCU register MI_MATH_ALU_INST_INLINE *pAluParam = reinterpret_cast(taskStream->getSpace(9 * sizeof(MI_MATH_ALU_INST_INLINE))); pAluParam->DW0.BitField.ALUOpcode = static_cast(NewAluOpcodes::OPCODE_LOADIND); // load value from R0 to SRCA pAluParam->DW0.BitField.Operand1 = static_cast(AluRegisters::R_3); pAluParam->DW0.BitField.Operand2 = static_cast(AluRegisters::R_ACCU); pAluParam++; pAluParam->DW0.BitField.ALUOpcode = static_cast(NewAluOpcodes::OPCODE_FENCE); // to be sure that all writes and reads are completed pAluParam->DW0.BitField.Operand1 = 0; pAluParam->DW0.BitField.Operand2 = 0; pAluParam++; pAluParam->DW0.BitField.ALUOpcode = static_cast(AluRegisters::OPCODE_LOAD); // load value from R0 to SRCA pAluParam->DW0.BitField.Operand1 = static_cast(AluRegisters::R_SRCA); pAluParam->DW0.BitField.Operand2 = static_cast(AluRegisters::R_3); pAluParam++; pAluParam->DW0.BitField.ALUOpcode = static_cast(AluRegisters::OPCODE_LOAD); // load value to shift to SRCB pAluParam->DW0.BitField.Operand1 = static_cast(AluRegisters::R_SRCB); pAluParam->DW0.BitField.Operand2 = static_cast(AluRegisters::R_4); pAluParam++; pAluParam->DW0.BitField.ALUOpcode = static_cast(NewAluOpcodes::OPCODE_SAR); // load value to shift to SRCB pAluParam->DW0.BitField.Operand1 = 0; pAluParam->DW0.BitField.Operand2 = 0; pAluParam++; pAluParam->DW0.BitField.ALUOpcode = static_cast(AluRegisters::OPCODE_STORE); // load value to shift to SRCB pAluParam->DW0.BitField.Operand1 = static_cast(AluRegisters::R_4); pAluParam->DW0.BitField.Operand2 = static_cast(AluRegisters::R_ACCU); pAluParam++; pAluParam->DW0.BitField.ALUOpcode = static_cast(AluRegisters::OPCODE_LOAD); // load value to shift to SRCB pAluParam->DW0.BitField.Operand1 = static_cast(AluRegisters::R_SRCB); pAluParam->DW0.BitField.Operand2 = static_cast(AluRegisters::R_5); pAluParam++; pAluParam->DW0.BitField.ALUOpcode = static_cast(NewAluOpcodes::OPCODE_SAR); // load value to shift to SRCB pAluParam->DW0.BitField.Operand1 = 0; pAluParam->DW0.BitField.Operand2 = 0; pAluParam++; pAluParam->DW0.BitField.ALUOpcode = static_cast(AluRegisters::OPCODE_STORE); // load value to shift to SRCB pAluParam->DW0.BitField.Operand1 = static_cast(AluRegisters::R_5); pAluParam->DW0.BitField.Operand2 = static_cast(AluRegisters::R_ACCU); pAluParam++; storeValueInRegisterToMemory(allocation->getGpuAddress(), CS_GPR_R4); storeValueInRegisterToMemory(allocation->getGpuAddress() + 4, CS_GPR_R5); flushStream(); int64_t firstShift = bufferMemory[0]; for (uint32_t i = 0; i < shift; i++) { firstShift /= 2; } int64_t secondShift = bufferMemory[0]; for (uint32_t i = 0; i < notPowerOfTwoShift; i++) { secondShift /= 2; } int64_t executeSecondShift = bufferMemory[0]; for (uint32_t i = 0; i < expectedUsedShift; i++) { executeSecondShift /= 2; } expectMemory(reinterpret_cast(allocation->getGpuAddress()), &firstShift, sizeof(uint32_t)); expectNotEqualMemory(reinterpret_cast(allocation->getGpuAddress() + 4), &secondShift, sizeof(uint32_t)); expectMemory(reinterpret_cast(allocation->getGpuAddress() + 4), &executeSecondShift, sizeof(uint32_t)); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/fixtures/000077500000000000000000000000001422164147700254715ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/fixtures/CMakeLists.txt000066400000000000000000000015651422164147700302400ustar00rootroot00000000000000# # Copyright (C) 2018-2022 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(igdrcl_aub_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/aub_fixture.cpp ${CMAKE_CURRENT_SOURCE_DIR}/aub_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/hello_world_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/image_aub_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/multicontext_aub_fixture.cpp ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}multicontext_aub_fixture_extended.cpp ${CMAKE_CURRENT_SOURCE_DIR}/multicontext_aub_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/run_kernel_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/simple_arg_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/unified_memory_fixture.h ) add_subdirectories() compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/fixtures/aub_fixture.cpp000066400000000000000000000020301422164147700305050ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/aub_tests/fixtures/aub_fixture.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/test/common/mocks/mock_allocation_properties.h" namespace NEO { GraphicsAllocation *AUBFixture::createHostPtrAllocationFromSvmPtr(void *svmPtr, size_t size) { GraphicsAllocation *allocation = csr->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, size}, svmPtr); csr->makeResidentHostPtrAllocation(allocation); csr->getInternalAllocationStorage()->storeAllocation(std::unique_ptr(allocation), TEMPORARY_ALLOCATION); allocation->setAllocationType(AllocationType::BUFFER); allocation->setMemObjectsAllocationWithWritableFlags(true); return allocation; } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/fixtures/aub_fixture.h000066400000000000000000000203551422164147700301640ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/aub_mem_dump/aub_mem_dump.h" #include "shared/source/aub_mem_dump/page_table_entry_bits.h" #include "shared/source/command_stream/aub_command_stream_receiver_hw.h" #include "shared/source/command_stream/command_stream_receiver_with_aub_dump.h" #include "shared/source/command_stream/tbx_command_stream_receiver_hw.h" #include "shared/source/helpers/api_specific_config.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/os_interface/os_interface.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_memory_operations_handler.h" #include "shared/test/unit_test/tests_configuration.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "gtest/gtest.h" #include namespace NEO { class AUBFixture : public CommandQueueHwFixture { public: static CommandStreamReceiver *prepareComputeEngine(MockDevice &device, const std::string &filename) { CommandStreamReceiver *pCommandStreamReceiver = nullptr; if (testMode == TestMode::AubTestsWithTbx) { pCommandStreamReceiver = TbxCommandStreamReceiver::create(filename, true, *device.executionEnvironment, device.getRootDeviceIndex(), device.getDeviceBitfield()); } else { pCommandStreamReceiver = AUBCommandStreamReceiver::create(filename, true, *device.executionEnvironment, device.getRootDeviceIndex(), device.getDeviceBitfield()); } device.resetCommandStreamReceiver(pCommandStreamReceiver); return pCommandStreamReceiver; } static void prepareCopyEngines(MockDevice &device, const std::string &filename) { for (auto i = 0u; i < device.allEngines.size(); i++) { if (EngineHelpers::isBcs(device.allEngines[i].getEngineType())) { CommandStreamReceiver *pBcsCommandStreamReceiver = nullptr; if (testMode == TestMode::AubTestsWithTbx) { pBcsCommandStreamReceiver = TbxCommandStreamReceiver::create(filename, true, *device.executionEnvironment, device.getRootDeviceIndex(), device.getDeviceBitfield()); } else { pBcsCommandStreamReceiver = AUBCommandStreamReceiver::create(filename, true, *device.executionEnvironment, device.getRootDeviceIndex(), device.getDeviceBitfield()); } device.resetCommandStreamReceiver(pBcsCommandStreamReceiver, i); } } } void SetUp(const HardwareInfo *hardwareInfo) { const HardwareInfo &hwInfo = hardwareInfo ? *hardwareInfo : *defaultHwInfo; auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); auto engineType = getChosenEngineType(hwInfo); const ::testing::TestInfo *const testInfo = ::testing::UnitTest::GetInstance()->current_test_info(); std::stringstream strfilename; strfilename << ApiSpecificConfig::getAubPrefixForSpecificApi(); strfilename << testInfo->test_case_name() << "_" << testInfo->name() << "_" << hwHelper.getCsTraits(engineType).name; executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(1u); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(&hwInfo); executionEnvironment->rootDeviceEnvironments[0]->memoryOperationsInterface = std::make_unique(); auto pDevice = MockDevice::create(executionEnvironment, rootDeviceIndex); device = std::make_unique(pDevice); this->csr = prepareComputeEngine(*pDevice, strfilename.str()); prepareCopyEngines(*pDevice, strfilename.str()); CommandQueueHwFixture::SetUp(AUBFixture::device.get(), cl_command_queue_properties(0)); } void TearDown() override { CommandQueueHwFixture::TearDown(); } GraphicsAllocation *createHostPtrAllocationFromSvmPtr(void *svmPtr, size_t size); template CommandStreamReceiverSimulatedCommonHw *getSimulatedCsr() const { return static_cast *>(csr); } template void expectMemory(void *gfxAddress, const void *srcAddress, size_t length) { CommandStreamReceiverSimulatedCommonHw *csrSimulated = getSimulatedCsr(); if (testMode == TestMode::AubTestsWithTbx) { auto tbxCsr = csrSimulated; EXPECT_TRUE(tbxCsr->expectMemoryEqual(gfxAddress, srcAddress, length)); csrSimulated = static_cast *>( static_cast> *>(csr)->aubCSR.get()); } if (csrSimulated) { csrSimulated->expectMemoryEqual(gfxAddress, srcAddress, length); } } template void writeMMIO(uint32_t offset, uint32_t value) { CommandStreamReceiverSimulatedCommonHw *csrSimulated = getSimulatedCsr(); if (csrSimulated) { csrSimulated->writeMMIO(offset, value); } } template void expectMMIO(uint32_t mmioRegister, uint32_t expectedValue) { CommandStreamReceiver *csrtemp = csr; if (testMode == TestMode::AubTestsWithTbx) { csrtemp = static_cast> *>(csr)->aubCSR.get(); } if (csrtemp) { // Write our pseudo-op to the AUB file auto aubCsr = static_cast *>(csrtemp); aubCsr->expectMMIO(mmioRegister, expectedValue); } } template void expectNotEqualMemory(void *gfxAddress, const void *srcAddress, size_t length) { CommandStreamReceiverSimulatedCommonHw *csrSimulated = getSimulatedCsr(); if (testMode == TestMode::AubTestsWithTbx) { auto tbxCsr = csrSimulated; EXPECT_TRUE(tbxCsr->expectMemoryNotEqual(gfxAddress, srcAddress, length)); csrSimulated = static_cast *>( static_cast> *>(csr)->aubCSR.get()); } if (csrSimulated) { csrSimulated->expectMemoryNotEqual(gfxAddress, srcAddress, length); } } template void expectCompressedMemory(void *gfxAddress, const void *srcAddress, size_t length) { CommandStreamReceiverSimulatedCommonHw *csrSimulated = getSimulatedCsr(); if (testMode == TestMode::AubTestsWithTbx) { auto tbxCsr = csrSimulated; EXPECT_TRUE(tbxCsr->expectMemoryCompressed(gfxAddress, srcAddress, length)); csrSimulated = static_cast *>( static_cast> *>(csr)->aubCSR.get()); } if (csrSimulated) { csrSimulated->expectMemoryCompressed(gfxAddress, srcAddress, length); } } static void *getGpuPointer(GraphicsAllocation *allocation) { return reinterpret_cast(allocation->getGpuAddress()); } const uint32_t rootDeviceIndex = 0; CommandStreamReceiver *csr = nullptr; volatile uint32_t *pTagMemory = nullptr; std::unique_ptr device; ExecutionEnvironment *executionEnvironment; private: using CommandQueueHwFixture::SetUp; }; // namespace NEO template struct KernelAUBFixture : public AUBFixture, public KernelFixture { void SetUp() override { AUBFixture::SetUp(nullptr); KernelFixture::SetUp(device.get(), context); } void TearDown() override { KernelFixture::TearDown(); AUBFixture::TearDown(); } }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/fixtures/hello_world_fixture.h000066400000000000000000000014251422164147700317240ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/test/unit_test/aub_tests/command_stream/aub_command_stream_fixture.h" #include "opencl/test/unit_test/aub_tests/fixtures/simple_arg_fixture.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "opencl/test/unit_test/fixtures/simple_arg_fixture.h" namespace NEO { //////////////////////////////////////////////////////////////////////////////// // Factory where all command stream traffic funnels to an AUB file //////////////////////////////////////////////////////////////////////////////// struct AUBHelloWorldFixtureFactory : public HelloWorldFixtureFactory { typedef AUBCommandStreamFixture CommandStreamFixture; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/fixtures/image_aub_fixture.h000066400000000000000000000037001422164147700313210ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "opencl/test/unit_test/aub_tests/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "test_traits_common.h" struct ImagesSupportedMatcher { template static constexpr bool isMatched() { return TestTraits::get()>::imagesSupported; } }; namespace NEO { struct ImageAubFixture : public ClDeviceFixture, public AUBCommandStreamFixture { typedef AUBCommandStreamFixture CommandStreamFixture; using AUBCommandStreamFixture::SetUp; void SetUp(bool enableBlitter) { if (enableBlitter) { if (!(HwInfoConfig::get(defaultHwInfo->platform.eProductFamily)->isBlitterForImagesSupported())) { GTEST_SKIP(); } hardwareInfo = *defaultHwInfo; hardwareInfo.capabilityTable.blitterOperationsSupported = true; ClDeviceFixture::SetUpImpl(&hardwareInfo); } else { ClDeviceFixture::SetUp(); } context = new MockContext(pClDevice); cl_int retVal = CL_SUCCESS; auto clQueue = clCreateCommandQueueWithProperties(context, pClDevice, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); pCmdQ = castToObject(clQueue); CommandStreamFixture::SetUp(pCmdQ); } void TearDown() { if (pCmdQ) { auto blocked = pCmdQ->isQueueBlocked(); UNRECOVERABLE_IF(blocked); pCmdQ->release(); } if (context) { context->release(); } CommandStreamFixture::TearDown(); ClDeviceFixture::TearDown(); } DebugManagerStateRestore restorer; CommandQueue *pCmdQ = nullptr; MockContext *context = nullptr; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/fixtures/multicontext_aub_fixture.cpp000066400000000000000000000252141422164147700333350ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/aub_tests/fixtures/multicontext_aub_fixture.h" #include "shared/source/command_stream/aub_command_stream_receiver.h" #include "shared/source/helpers/api_specific_config.h" #include "shared/source/helpers/hw_helper.h" #include "shared/test/common/helpers/ult_hw_config.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/tests_configuration.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_platform.h" namespace NEO { void MulticontextAubFixture::SetUp(uint32_t numberOfTiles, EnabledCommandStreamers enabledCommandStreamers, bool enableCompression) { this->numberOfEnabledTiles = numberOfTiles; const ::testing::TestInfo *const testInfo = ::testing::UnitTest::GetInstance()->current_test_info(); cl_int retVal = CL_SUCCESS; DebugManager.flags.CsrDispatchMode.set(static_cast(DispatchMode::BatchedDispatch)); DebugManager.flags.CreateMultipleSubDevices.set(numberOfTiles); if (testMode == TestMode::AubTestsWithTbx) { DebugManager.flags.SetCommandStreamReceiver.set(static_cast(CommandStreamReceiverType::CSR_TBX_WITH_AUB)); } else { DebugManager.flags.SetCommandStreamReceiver.set(static_cast(CommandStreamReceiverType::CSR_AUB)); } HardwareInfo localHwInfo = *defaultHwInfo; if ((EnabledCommandStreamers::All == enabledCommandStreamers) && localHwInfo.gtSystemInfo.SliceCount < 8) { overridePlatformConfigForAllEnginesSupport(localHwInfo); } if (numberOfTiles > 1) { localHwInfo.gtSystemInfo.MultiTileArchInfo.IsValid = (numberOfEnabledTiles > 1) ? 1 : 0; localHwInfo.gtSystemInfo.MultiTileArchInfo.TileCount = numberOfEnabledTiles; localHwInfo.gtSystemInfo.MultiTileArchInfo.TileMask = 0; for (uint32_t i = 0; i < numberOfEnabledTiles; i++) { localHwInfo.gtSystemInfo.MultiTileArchInfo.TileMask |= (1 << i); } } localHwInfo.capabilityTable.blitterOperationsSupported = true; if (DebugManager.flags.EnableBlitterOperationsSupport.get() != -1) { localHwInfo.capabilityTable.blitterOperationsSupported = !!DebugManager.flags.EnableBlitterOperationsSupport.get(); } auto &hwHelper = HwHelper::get(localHwInfo.platform.eRenderCoreFamily); auto engineType = getChosenEngineType(localHwInfo); auto renderEngine = aub_stream::NUM_ENGINES; for (auto &engine : hwHelper.getGpgpuEngineInstances(localHwInfo)) { if (!EngineHelpers::isCcs(engine.first)) { renderEngine = engine.first; break; } } ASSERT_NE(aub_stream::NUM_ENGINES, renderEngine); auto renderEngineName = hwHelper.getCsTraits(renderEngine).name; std::stringstream strfilename; strfilename << ApiSpecificConfig::getAubPrefixForSpecificApi(); strfilename << testInfo->test_case_name() << "_" << testInfo->name() << "_"; if (EnabledCommandStreamers::Single == enabledCommandStreamers) { strfilename << renderEngineName; } else if (EnabledCommandStreamers::Dual == enabledCommandStreamers) { strfilename << renderEngineName << "_CCS0"; } else if (EnabledCommandStreamers::All == enabledCommandStreamers) { strfilename << renderEngineName << "_CCS0_3"; // xehp_config_name_RCS_CCS0_3.aub } auto filename = AUBCommandStreamReceiver::createFullFilePath(localHwInfo, strfilename.str(), rootDeviceIndex); DebugManager.flags.AUBDumpCaptureFileName.set(filename); auto createCommandQueueForEngine = [&](uint32_t tileNumber, size_t engineFamily, size_t engineIndex) { cl_queue_properties properties[] = {CL_QUEUE_FAMILY_INTEL, engineFamily, CL_QUEUE_INDEX_INTEL, engineIndex, 0}; auto clQueue = clCreateCommandQueueWithProperties(context.get(), tileDevices[tileNumber], properties, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); return std::unique_ptr(castToObject(clQueue)); }; DebugManager.flags.RenderCompressedBuffersEnabled.set(enableCompression); DebugManager.flags.RenderCompressedImagesEnabled.set(enableCompression); DebugManager.flags.EnableBlitterForEnqueueOperations.set(false); platformsImpl->clear(); VariableBackup backup(&ultHwConfig); ultHwConfig.useHwCsr = true; constructPlatform()->peekExecutionEnvironment()->prepareRootDeviceEnvironments(1u); platform()->peekExecutionEnvironment()->rootDeviceEnvironments[rootDeviceIndex]->setHwInfo(&localHwInfo); initPlatform(); rootDevice = platform()->getClDevice(0); EXPECT_EQ(rootDeviceIndex, rootDevice->getRootDeviceIndex()); { cl_device_id deviceId = rootDevice; ClDeviceVector clDeviceVector{&deviceId, 1}; if (numberOfTiles > 1) { for (uint32_t i = 0; i < numberOfTiles; i++) { clDeviceVector.push_back(rootDevice->getNearestGenericSubDevice(i)); } } context.reset(MockContext::create(nullptr, clDeviceVector, nullptr, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); } commandQueues.resize(numberOfTiles); for (uint32_t tile = 0; tile < numberOfTiles; tile++) { tileDevices.push_back(rootDevice->getNearestGenericSubDevice(tile)); EXPECT_NE(nullptr, tileDevices[tile]); if (EnabledCommandStreamers::Single == enabledCommandStreamers) { if (EngineHelpers::isCcs(engineType)) { auto familyQueue = tileDevices[tile]->getDevice().getEngineGroupIndexFromEngineGroupType(EngineGroupType::Compute); commandQueues[tile].push_back(createCommandQueueForEngine(tile, familyQueue, 0)); } else { auto familyQueue = tileDevices[tile]->getDevice().getEngineGroupIndexFromEngineGroupType(EngineGroupType::RenderCompute); commandQueues[tile].push_back(createCommandQueueForEngine(tile, familyQueue, 0)); } } else if (EnabledCommandStreamers::Dual == enabledCommandStreamers) { auto rcsQueue = tileDevices[tile]->getDevice().getEngineGroupIndexFromEngineGroupType(EngineGroupType::RenderCompute); auto ccsQueue = tileDevices[tile]->getDevice().getEngineGroupIndexFromEngineGroupType(EngineGroupType::Compute); commandQueues[tile].push_back(createCommandQueueForEngine(tile, rcsQueue, 0)); commandQueues[tile].push_back(createCommandQueueForEngine(tile, ccsQueue, 0)); } else if (EnabledCommandStreamers::All == enabledCommandStreamers) { auto rcsQueue = tileDevices[tile]->getDevice().getEngineGroupIndexFromEngineGroupType(EngineGroupType::RenderCompute); auto ccsQueue = tileDevices[tile]->getDevice().getEngineGroupIndexFromEngineGroupType(EngineGroupType::Compute); commandQueues[tile].push_back(createCommandQueueForEngine(tile, rcsQueue, 0)); commandQueues[tile].push_back(createCommandQueueForEngine(tile, ccsQueue, 0)); commandQueues[tile].push_back(createCommandQueueForEngine(tile, ccsQueue, 1)); commandQueues[tile].push_back(createCommandQueueForEngine(tile, ccsQueue, 2)); commandQueues[tile].push_back(createCommandQueueForEngine(tile, ccsQueue, 3)); } } { cl_int retVal = CL_SUCCESS; cl_device_id deviceId = rootDevice; multiTileDefaultContext.reset(MockContext::create(nullptr, ClDeviceVector(&deviceId, 1), nullptr, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); } } void MulticontextAubFixture::TearDown() { auto filename = DebugManager.flags.AUBDumpCaptureFileName.get(); std::string tileString = std::to_string(numberOfEnabledTiles) + "tx"; if (numberOfEnabledTiles > 1) { EXPECT_NE(std::string::npos, filename.find(tileString)); } else { EXPECT_EQ(std::string::npos, filename.find(tileString)); } } void MulticontextAubFixture::overridePlatformConfigForAllEnginesSupport(HardwareInfo &localHwInfo) { const ::testing::TestInfo *const testInfo = ::testing::UnitTest::GetInstance()->current_test_info(); printf("\nWARNING: Platform configuration for %s_%s test forced to %dtx8x4x16\n", testInfo->test_case_name(), testInfo->name(), numberOfEnabledTiles); bool setupCalled = false; if (localHwInfo.platform.eRenderCoreFamily == IGFX_XE_HP_CORE) { #ifdef SUPPORT_XE_HP_SDV if (localHwInfo.platform.eProductFamily == IGFX_XE_HP_SDV) { setupCalled = true; XE_HP_SDV_CONFIG::setupHardwareInfoMultiTile(&localHwInfo, true, true); // Mock values localHwInfo.gtSystemInfo.SliceCount = 8; localHwInfo.gtSystemInfo.SubSliceCount = 32; localHwInfo.gtSystemInfo.EUCount = 512; localHwInfo.gtSystemInfo.CCSInfo.IsValid = true; localHwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 4; localHwInfo.gtSystemInfo.CCSInfo.Instances.CCSEnableMask = 0b1111; } #endif } if (localHwInfo.platform.eRenderCoreFamily == IGFX_XE_HPG_CORE) { #ifdef SUPPORT_DG2 if (localHwInfo.platform.eProductFamily == IGFX_DG2) { ASSERT_TRUE(numberOfEnabledTiles == 1); setupCalled = true; DG2_CONFIG::setupHardwareInfoMultiTile(&localHwInfo, true, false); // Mock values localHwInfo.gtSystemInfo.SliceCount = 8; localHwInfo.gtSystemInfo.SubSliceCount = 32; localHwInfo.gtSystemInfo.EUCount = 512; localHwInfo.gtSystemInfo.CCSInfo.IsValid = true; localHwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 4; localHwInfo.gtSystemInfo.CCSInfo.Instances.CCSEnableMask = 0b1111; } #endif } if (localHwInfo.platform.eRenderCoreFamily == IGFX_XE_HPC_CORE) { #ifdef SUPPORT_PVC if (localHwInfo.platform.eProductFamily == IGFX_PVC) { setupCalled = true; PVC_CONFIG::setupHardwareInfoMultiTile(&localHwInfo, true, true); // Mock values localHwInfo.gtSystemInfo.SliceCount = 8; localHwInfo.gtSystemInfo.SubSliceCount = 64; localHwInfo.gtSystemInfo.EUCount = 512; localHwInfo.gtSystemInfo.CCSInfo.IsValid = true; localHwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 4; localHwInfo.gtSystemInfo.CCSInfo.Instances.CCSEnableMask = 0b1111; } #endif } adjustPlatformOverride(localHwInfo, setupCalled); ASSERT_TRUE(setupCalled); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/fixtures/multicontext_aub_fixture.h000066400000000000000000000112141422164147700327750ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver_with_aub_dump.h" #include "shared/source/command_stream/tbx_command_stream_receiver_hw.h" #include "shared/source/helpers/hw_info.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/tests_configuration.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" #include #include namespace NEO { class MockDevice; struct MulticontextAubFixture { enum class EnabledCommandStreamers { Single, // default only Dual, // RCS + CCS0 All, // RCS + CCS0-3 }; void SetUp(uint32_t numberOfTiles, EnabledCommandStreamers enabledCommandStreamers, bool enableCompression); void TearDown(); template CommandStreamReceiverSimulatedCommonHw *getSimulatedCsr(uint32_t tile, uint32_t engine) { using CsrWithAubDump = CommandStreamReceiverWithAUBDump>; using SimulatedCsr = CommandStreamReceiverSimulatedCommonHw; SimulatedCsr *simulatedCsr = nullptr; if (testMode == TestMode::AubTestsWithTbx) { auto csrWithAubDump = static_cast(&commandQueues[tile][engine]->getGpgpuCommandStreamReceiver()); simulatedCsr = static_cast(csrWithAubDump); } else { simulatedCsr = static_cast(&commandQueues[tile][engine]->getGpgpuCommandStreamReceiver()); } return simulatedCsr; } template void expectMemory(void *gfxAddress, const void *srcAddress, size_t length, uint32_t tile, uint32_t engine) { CommandStreamReceiverSimulatedCommonHw *csrSimulated = getSimulatedCsr(tile, engine); if (testMode == TestMode::AubTestsWithTbx) { auto tbxCsr = csrSimulated; EXPECT_TRUE(tbxCsr->expectMemoryEqual(gfxAddress, srcAddress, length)); csrSimulated = static_cast *>( static_cast> *>(csrSimulated)->aubCSR.get()); } if (csrSimulated) { csrSimulated->expectMemoryEqual(gfxAddress, srcAddress, length); } } template void expectMemoryNotEqual(void *gfxAddress, const void *srcAddress, size_t length, uint32_t tile, uint32_t engine) { CommandStreamReceiverSimulatedCommonHw *csrSimulated = getSimulatedCsr(tile, engine); if (testMode == TestMode::AubTestsWithTbx) { auto tbxCsr = csrSimulated; EXPECT_TRUE(tbxCsr->expectMemoryNotEqual(gfxAddress, srcAddress, length)); csrSimulated = static_cast *>( static_cast> *>(csrSimulated)->aubCSR.get()); } if (csrSimulated) { csrSimulated->expectMemoryNotEqual(gfxAddress, srcAddress, length); } } template void expectMemoryCompressed(void *gfxAddress, const void *srcAddress, size_t length, uint32_t tile, uint32_t engine) { CommandStreamReceiverSimulatedCommonHw *csrSimulated = getSimulatedCsr(tile, engine); if (testMode == TestMode::AubTestsWithTbx) { auto tbxCsr = csrSimulated; EXPECT_TRUE(tbxCsr->expectMemoryCompressed(gfxAddress, srcAddress, length)); csrSimulated = static_cast *>( static_cast> *>(csrSimulated)->aubCSR.get()); } if (csrSimulated) { csrSimulated->expectMemoryCompressed(gfxAddress, srcAddress, length); } } void overridePlatformConfigForAllEnginesSupport(HardwareInfo &localHwInfo); void adjustPlatformOverride(HardwareInfo &localHwInfo, bool &setupCalled); DebugManagerStateRestore restore; const uint32_t rootDeviceIndex = 0u; uint32_t numberOfEnabledTiles = 0; std::vector tileDevices; ClDevice *rootDevice = nullptr; std::unique_ptr context; std::unique_ptr multiTileDefaultContext; std::vector>> commandQueues; }; } // namespace NEO multicontext_aub_fixture_extended.cpp000066400000000000000000000004611422164147700351330ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/fixtures/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/aub_tests/fixtures/multicontext_aub_fixture.h" namespace NEO { void MulticontextAubFixture::adjustPlatformOverride(HardwareInfo &localHwInfo, bool &setupCalled) { } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/fixtures/run_kernel_fixture.h000066400000000000000000000052361422164147700315620ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/file_io.h" #include "shared/test/common/helpers/test_files.h" #include "shared/test/common/libult/global_environment.h" #include "opencl/source/program/program.h" #include "opencl/test/unit_test/aub_tests/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/aub_tests/command_stream/aub_command_stream_fixture.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/command_stream/command_stream_fixture.h" #include "opencl/test/unit_test/fixtures/run_kernel_fixture.h" namespace NEO { //////////////////////////////////////////////////////////////////////////////// // Factory where all command stream traffic funnels to an AUB file //////////////////////////////////////////////////////////////////////////////// struct AUBRunKernelFixtureFactory : public RunKernelFixtureFactory { typedef AUBCommandStreamFixture CommandStreamFixture; }; //////////////////////////////////////////////////////////////////////////////// // RunKernelFixture // Instantiates a fixture based on the supplied fixture factory. // Performs proper initialization/shutdown of various elements in factory. // Used by most tests for integration testing with command queues. //////////////////////////////////////////////////////////////////////////////// template class RunKernelFixture : public CommandEnqueueAUBFixture { public: RunKernelFixture() { } void SetUp() override { CommandEnqueueAUBFixture::SetUp(); } void TearDown() override { CommandEnqueueAUBFixture::TearDown(); } protected: Program *CreateProgramFromBinary( const std::string &binaryFileName) { cl_int retVal = CL_SUCCESS; EXPECT_EQ(true, fileExists(binaryFileName)); size_t sourceSize = 0; auto pSource = loadDataFromFile(binaryFileName.c_str(), sourceSize); EXPECT_NE(0u, sourceSize); EXPECT_NE(nullptr, pSource); Program *pProgram = nullptr; const unsigned char *binaries[1] = {reinterpret_cast(pSource.get())}; pProgram = Program::create( context, context->getDevices(), &sourceSize, binaries, nullptr, retVal); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_NE(pProgram, nullptr); return pProgram; } }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/fixtures/simple_arg_fixture.h000066400000000000000000000102101422164147700315240ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver.h" #include "opencl/test/unit_test/aub_tests/command_stream/aub_command_stream_fixture.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/command_stream/command_stream_fixture.h" #include "opencl/test/unit_test/fixtures/simple_arg_fixture.h" #include "opencl/test/unit_test/fixtures/simple_arg_kernel_fixture.h" #include "opencl/test/unit_test/indirect_heap/indirect_heap_fixture.h" namespace NEO { //////////////////////////////////////////////////////////////////////////////// // Factory where all command stream traffic funnels to an AUB file //////////////////////////////////////////////////////////////////////////////// struct AUBSimpleArgFixtureFactory : public SimpleArgFixtureFactory, public IndirectHeapFixture { typedef AUBCommandStreamFixture CommandStreamFixture; }; //////////////////////////////////////////////////////////////////////////////// // SimpleArgTest // Instantiates a fixture based on the supplied fixture factory. // Performs proper initialization/shutdown of various elements in factory. // Used by most tests for integration testing with command queues. //////////////////////////////////////////////////////////////////////////////// template struct SimpleArgFixture : public FixtureFactory::IndirectHeapFixture, public FixtureFactory::CommandStreamFixture, public FixtureFactory::CommandQueueFixture, public FixtureFactory::KernelFixture, public ClDeviceFixture { typedef typename FixtureFactory::IndirectHeapFixture IndirectHeapFixture; typedef typename FixtureFactory::CommandStreamFixture CommandStreamFixture; typedef typename FixtureFactory::CommandQueueFixture CommandQueueFixture; typedef typename FixtureFactory::KernelFixture KernelFixture; using AUBCommandStreamFixture::SetUp; using CommandQueueFixture::pCmdQ; using CommandStreamFixture::pCS; using IndirectHeapFixture::SetUp; using KernelFixture::pKernel; using KernelFixture::SetUp; public: void SetUp() override { ClDeviceFixture::SetUp(); ASSERT_NE(nullptr, pClDevice); CommandQueueFixture::SetUp(pClDevice, 0); ASSERT_NE(nullptr, pCmdQ); CommandStreamFixture::SetUp(pCmdQ); ASSERT_NE(nullptr, pCS); IndirectHeapFixture::SetUp(pCmdQ); KernelFixture::SetUp(pClDevice); ASSERT_NE(nullptr, pKernel); argVal = static_cast(0x22222222); pDestMemory = alignedMalloc(sizeUserMemory, 4096); ASSERT_NE(nullptr, pDestMemory); pExpectedMemory = alignedMalloc(sizeUserMemory, 4096); ASSERT_NE(nullptr, pExpectedMemory); // Initialize user memory to known values memset(pDestMemory, 0x11, sizeUserMemory); memset(pExpectedMemory, 0x22, sizeUserMemory); pKernel->setArg(0, sizeof(int), &argVal); pKernel->setArgSvm(1, sizeUserMemory, pDestMemory, nullptr, 0u); outBuffer = AUBCommandStreamFixture::createResidentAllocationAndStoreItInCsr(pDestMemory, sizeUserMemory); ASSERT_NE(nullptr, outBuffer); outBuffer->setAllocationType(AllocationType::BUFFER); outBuffer->setMemObjectsAllocationWithWritableFlags(true); } void TearDown() override { if (pExpectedMemory) { alignedFree(pExpectedMemory); pExpectedMemory = nullptr; } if (pDestMemory) { alignedFree(pDestMemory); pDestMemory = nullptr; } KernelFixture::TearDown(); IndirectHeapFixture::TearDown(); CommandStreamFixture::TearDown(); CommandQueueFixture::TearDown(); ClDeviceFixture::TearDown(); } int argVal = 0; void *pDestMemory = nullptr; void *pExpectedMemory = nullptr; size_t sizeUserMemory = 128 * sizeof(float); GraphicsAllocation *outBuffer = nullptr; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/fixtures/unified_memory_fixture.h000066400000000000000000000054021422164147700324240ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/constants.h" #include "opencl/source/api/api.h" #include "opencl/test/unit_test/aub_tests/fixtures/aub_fixture.h" #include "opencl/test/unit_test/mocks/mock_platform.h" namespace NEO { namespace PagaFaultManagerTestConfig { extern bool disabled; } class UnifiedMemoryAubFixture : public AUBFixture { public: using AUBFixture::TearDown; cl_int retVal = CL_SUCCESS; const size_t dataSize = MemoryConstants::megaByte; bool skipped = false; void SetUp() override { if (PagaFaultManagerTestConfig::disabled) { skipped = true; GTEST_SKIP(); } AUBFixture::SetUp(nullptr); if (!platform()->peekExecutionEnvironment()->memoryManager->getPageFaultManager()) { skipped = true; GTEST_SKIP(); } } void *allocateUSM(InternalMemoryType type) { void *ptr = nullptr; if (!this->skipped) { switch (type) { case DEVICE_UNIFIED_MEMORY: ptr = clDeviceMemAllocINTEL(this->context, this->device.get(), nullptr, dataSize, 0, &retVal); break; case HOST_UNIFIED_MEMORY: ptr = clHostMemAllocINTEL(this->context, nullptr, dataSize, 0, &retVal); break; case SHARED_UNIFIED_MEMORY: ptr = clSharedMemAllocINTEL(this->context, this->device.get(), nullptr, dataSize, 0, &retVal); break; default: ptr = new char[dataSize]; break; } EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_NE(ptr, nullptr); } return ptr; } void freeUSM(void *ptr, InternalMemoryType type) { if (!this->skipped) { switch (type) { case DEVICE_UNIFIED_MEMORY: case HOST_UNIFIED_MEMORY: case SHARED_UNIFIED_MEMORY: retVal = clMemFreeINTEL(this->context, ptr); break; default: delete[] static_cast(ptr); break; } EXPECT_EQ(retVal, CL_SUCCESS); } } void writeToUsmMemory(std::vector data, void *ptr, InternalMemoryType type) { if (!this->skipped) { switch (type) { case DEVICE_UNIFIED_MEMORY: retVal = clEnqueueMemcpyINTEL(this->pCmdQ, true, ptr, data.data(), dataSize, 0, nullptr, nullptr); break; default: std::copy(data.begin(), data.end(), static_cast(ptr)); break; } EXPECT_EQ(retVal, CL_SUCCESS); } } }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/gen11/000077500000000000000000000000001422164147700245335ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/gen11/CMakeLists.txt000066400000000000000000000003531422164147700272740ustar00rootroot00000000000000# # Copyright (C) 2019-2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_GEN11) target_sources(igdrcl_aub_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) add_subdirectories() endif() compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/gen11/batch_buffer/000077500000000000000000000000001422164147700271455ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/gen11/batch_buffer/CMakeLists.txt000066400000000000000000000005141422164147700317050ustar00rootroot00000000000000# # Copyright (C) 2019-2021 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(igdrcl_aub_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/aub_batch_buffer_tests_gen11.h ${CMAKE_CURRENT_SOURCE_DIR}/aub_batch_buffer_tests_gen11.cpp ) aub_batch_buffer_tests_gen11.cpp000066400000000000000000000010671422164147700352540ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/gen11/batch_buffer/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "aub_batch_buffer_tests_gen11.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" using Gen11AubBatchBufferTests = Test; static constexpr auto gpuBatchBufferAddr = 0x800400001000; // 48-bit GPU address GEN11TEST_F(Gen11AubBatchBufferTests, givenSimpleRCSWithBatchBufferWhenItHasMSBSetInGpuAddressThenAUBShouldBeSetupSuccessfully) { setupAUBWithBatchBuffer(pDevice, aub_stream::ENGINE_RCS, gpuBatchBufferAddr); } aub_batch_buffer_tests_gen11.h000066400000000000000000000153621422164147700347240ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/gen11/batch_buffer/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/aub/aub_helper.h" #include "opencl/test/unit_test/aub_tests/command_stream/aub_mem_dump_tests.h" template void setupAUBWithBatchBuffer(const NEO::Device *pDevice, aub_stream::EngineType engineType, uint64_t gpuBatchBufferAddr) { typedef typename NEO::AUBFamilyMapper::AUB AUB; const auto &csTraits = NEO::CommandStreamReceiverSimulatedCommonHw::getCsTraits(engineType); auto mmioBase = csTraits.mmioBase; uint64_t physAddress = 0x10000; NEO::AUBCommandStreamReceiver::AubFileStream aubFile; std::string filePath(NEO::folderAUB); filePath.append(Os::fileSeparator); std::string baseName("simple"); baseName.append(csTraits.name); baseName.append("WithBatchBuffer"); baseName.append(".aub"); filePath.append(getAubFileName(pDevice, baseName)); aubFile.fileHandle.open(filePath.c_str(), std::ofstream::binary); // Header auto &hwInfo = pDevice->getHardwareInfo(); const auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily); aubFile.init(hwInfoConfig.getAubStreamSteppingFromHwRevId(hwInfo), AUB::Traits::device); aubFile.writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x229c), 0xffff8280); const size_t sizeHWSP = 0x1000; const size_t alignHWSP = 0x1000; auto pGlobalHWStatusPage = alignedMalloc(sizeHWSP, alignHWSP); uint32_t ggttGlobalHardwareStatusPage = (uint32_t)((uintptr_t)pGlobalHWStatusPage); AubGTTData data = {true, false}; AUB::reserveAddressGGTT(aubFile, ggttGlobalHardwareStatusPage, sizeHWSP, physAddress, data); physAddress += sizeHWSP; aubFile.writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2080), ggttGlobalHardwareStatusPage); using MI_NOOP = typename FamilyType::MI_NOOP; using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; // create a user mode batch buffer auto physBatchBuffer = physAddress; const auto sizeBatchBuffer = 0x1000; auto gpuBatchBuffer = static_cast(gpuBatchBufferAddr); physAddress += sizeBatchBuffer; NEO::AubHelperHw aubHelperHw(false); AUB::reserveAddressPPGTT(aubFile, gpuBatchBuffer, sizeBatchBuffer, physBatchBuffer, 7, aubHelperHw); uint8_t batchBuffer[sizeBatchBuffer]; auto noop = FamilyType::cmdInitNoop; uint32_t noopId = 0xbaadd; { auto pBatchBuffer = (void *)batchBuffer; *(MI_NOOP *)pBatchBuffer = noop; pBatchBuffer = ptrOffset(pBatchBuffer, sizeof(MI_NOOP)); *(MI_NOOP *)pBatchBuffer = noop; pBatchBuffer = ptrOffset(pBatchBuffer, sizeof(MI_NOOP)); *(MI_NOOP *)pBatchBuffer = noop; pBatchBuffer = ptrOffset(pBatchBuffer, sizeof(MI_NOOP)); noop.TheStructure.Common.IdentificationNumberRegisterWriteEnable = true; noop.TheStructure.Common.IdentificationNumber = noopId++; *(MI_NOOP *)pBatchBuffer = noop; pBatchBuffer = ptrOffset(pBatchBuffer, sizeof(MI_NOOP)); *(MI_BATCH_BUFFER_END *)pBatchBuffer = FamilyType::cmdInitBatchBufferEnd; pBatchBuffer = ptrOffset(pBatchBuffer, sizeof(MI_BATCH_BUFFER_END)); auto sizeBufferUsed = ptrDiff(pBatchBuffer, batchBuffer); AUB::addMemoryWrite(aubFile, physBatchBuffer, batchBuffer, sizeBufferUsed, AubMemDump::AddressSpaceValues::TraceNonlocal, AubMemDump::DataTypeHintValues::TraceBatchBuffer); } const size_t sizeRing = 0x4 * 0x1000; const size_t alignRing = 0x1000; size_t sizeCommands = 0; auto pRing = alignedMalloc(sizeRing, alignRing); auto ggttRing = (uint32_t)(uintptr_t)pRing; auto physRing = physAddress; physAddress += sizeRing; auto rRing = AUB::reserveAddressGGTT(aubFile, ggttRing, sizeRing, physRing, data); ASSERT_NE(static_cast(-1), rRing); EXPECT_EQ(rRing, physRing); auto cur = (uint32_t *)pRing; auto bbs = FamilyType::cmdInitBatchBufferStart; bbs.setBatchBufferStartAddress(gpuBatchBuffer); bbs.setAddressSpaceIndicator(MI_BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR_PPGTT); *(MI_BATCH_BUFFER_START *)cur = bbs; cur = ptrOffset(cur, sizeof(MI_BATCH_BUFFER_START)); noop.TheStructure.Common.IdentificationNumberRegisterWriteEnable = true; noop.TheStructure.Common.IdentificationNumber = noopId; *cur++ = noop.TheStructure.RawData[0]; sizeCommands = ptrDiff(cur, pRing); AUB::addMemoryWrite(aubFile, physRing, pRing, sizeCommands, AubMemDump::AddressSpaceValues::TraceNonlocal, csTraits.aubHintCommandBuffer); auto sizeLRCA = csTraits.sizeLRCA; auto pLRCABase = alignedMalloc(csTraits.sizeLRCA, csTraits.alignLRCA); csTraits.initialize(pLRCABase); csTraits.setRingHead(pLRCABase, 0x0000); csTraits.setRingTail(pLRCABase, static_cast(sizeCommands)); csTraits.setRingBase(pLRCABase, ggttRing); auto ringCtrl = static_cast((sizeRing - 0x1000) | 1); csTraits.setRingCtrl(pLRCABase, ringCtrl); auto ggttLRCA = static_cast(reinterpret_cast(pLRCABase)); auto physLRCA = physAddress; physAddress += sizeLRCA; AUB::reserveAddressGGTT(aubFile, ggttLRCA, sizeLRCA, physLRCA, data); AUB::addMemoryWrite(aubFile, physLRCA, pLRCABase, sizeLRCA, AubMemDump::AddressSpaceValues::TraceNonlocal, csTraits.aubHintLRCA); typename AUB::MiContextDescriptorReg contextDescriptor = {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}; contextDescriptor.sData.Valid = true; contextDescriptor.sData.ForcePageDirRestore = false; contextDescriptor.sData.ForceRestore = false; contextDescriptor.sData.Legacy = true; contextDescriptor.sData.FaultSupport = 0; contextDescriptor.sData.PrivilegeAccessOrPPGTT = true; contextDescriptor.sData.ADor64bitSupport = AUB::Traits::addressingBits > 32; contextDescriptor.sData.LogicalRingCtxAddress = (uintptr_t)pLRCABase / 4096; contextDescriptor.sData.ContextID = 0; aubFile.writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2510), contextDescriptor.ulData[0]); aubFile.writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2514), contextDescriptor.ulData[1]); // Load our new exec list aubFile.writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2550), 1); // Poll until HW complete using AubMemDump::CmdServicesMemTraceRegisterPoll; aubFile.registerPoll( AubMemDump::computeRegisterOffset(mmioBase, 0x2234), //EXECLIST_STATUS 0x00008000, 0x00008000, false, CmdServicesMemTraceRegisterPoll::TimeoutActionValues::Abort); alignedFree(pRing); alignedFree(pLRCABase); alignedFree(pGlobalHWStatusPage); aubFile.fileHandle.close(); } compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/gen12lp/000077500000000000000000000000001422164147700250705ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/gen12lp/CMakeLists.txt000066400000000000000000000004711422164147700276320ustar00rootroot00000000000000# # Copyright (C) 2019-2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_GEN12LP) target_sources(igdrcl_aub_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/aub_mem_dump_tests_gen12lp.cpp ) add_subdirectories() endif() compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/gen12lp/aub_mem_dump_tests_gen12lp.cpp000066400000000000000000000007121422164147700330000ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/aub_tests/command_stream/aub_mem_dump_tests.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" namespace NEO { using Gen12LPAubMemDumpTests = Test; GEN12LPTEST_F(Gen12LPAubMemDumpTests, GivenCcsThenExpectationsAreMet) { setupAUB(pDevice, aub_stream::ENGINE_CCS); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/gen12lp/batch_buffer/000077500000000000000000000000001422164147700275025ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/gen12lp/batch_buffer/CMakeLists.txt000066400000000000000000000005201422164147700322370ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(igdrcl_aub_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/aub_batch_buffer_tests_gen12lp.h ${CMAKE_CURRENT_SOURCE_DIR}/aub_batch_buffer_tests_gen12lp.cpp ) aub_batch_buffer_tests_gen12lp.cpp000066400000000000000000000030611422164147700361420ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/gen12lp/batch_buffer/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "aub_batch_buffer_tests_gen12lp.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/utilities/tag_allocator.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/event/event.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/aub_tests/command_stream/aub_command_stream_fixture.h" #include "opencl/test/unit_test/aub_tests/fixtures/hello_world_fixture.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" using Gen12LPAubBatchBufferTests = Test; using Gen12LPTimestampTests = Test>; static constexpr auto gpuBatchBufferAddr = 0x400400001000; // 47-bit GPU address GEN12LPTEST_F(Gen12LPAubBatchBufferTests, givenSimpleRCSWithBatchBufferWhenItHasMSBSetInGpuAddressThenAUBShouldBeSetupSuccessfully) { setupAUBWithBatchBuffer(pDevice, aub_stream::ENGINE_RCS, gpuBatchBufferAddr); } GEN12LPTEST_F(Gen12LPAubBatchBufferTests, givenSimpleCCSWithBatchBufferWhenItHasMSBSetInGpuAddressThenAUBShouldBeSetupSuccessfully) { setupAUBWithBatchBuffer(pDevice, aub_stream::ENGINE_CCS, gpuBatchBufferAddr); } aub_batch_buffer_tests_gen12lp.h000066400000000000000000000156411422164147700356160ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/gen12lp/batch_buffer/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/aub/aub_helper.h" #include "opencl/test/unit_test/aub_tests/command_stream/aub_mem_dump_tests.h" template void setupAUBWithBatchBuffer(const NEO::Device *pDevice, aub_stream::EngineType engineType, uint64_t gpuBatchBufferAddr) { typedef typename NEO::AUBFamilyMapper::AUB AUB; const auto &csTraits = NEO::CommandStreamReceiverSimulatedCommonHw::getCsTraits(engineType); auto mmioBase = csTraits.mmioBase; uint64_t physAddress = 0x10000; NEO::AUBCommandStreamReceiver::AubFileStream aubFile; std::string filePath(NEO::folderAUB); filePath.append(Os::fileSeparator); std::string baseName("simple"); baseName.append(csTraits.name); baseName.append("WithBatchBuffer"); baseName.append(".aub"); filePath.append(getAubFileName(pDevice, baseName)); aubFile.fileHandle.open(filePath.c_str(), std::ofstream::binary); // Header auto &hwInfo = pDevice->getHardwareInfo(); const auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily); aubFile.init(hwInfoConfig.getAubStreamSteppingFromHwRevId(hwInfo), AUB::Traits::device); aubFile.writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x229c), 0xffff8280); // enable CCS if (engineType == aub_stream::ENGINE_CCS) { aubFile.writeMMIO(0x0000ce90, 0x00010001); aubFile.writeMMIO(0x00014800, 0x00010001); } const size_t sizeHWSP = 0x1000; const size_t alignHWSP = 0x1000; auto pGlobalHWStatusPage = alignedMalloc(sizeHWSP, alignHWSP); uint32_t ggttGlobalHardwareStatusPage = (uint32_t)((uintptr_t)pGlobalHWStatusPage); AubGTTData data = {true, false}; AUB::reserveAddressGGTT(aubFile, ggttGlobalHardwareStatusPage, sizeHWSP, physAddress, data); physAddress += sizeHWSP; aubFile.writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2080), ggttGlobalHardwareStatusPage); using MI_NOOP = typename FamilyType::MI_NOOP; using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; // create a user mode batch buffer auto physBatchBuffer = physAddress; const auto sizeBatchBuffer = 0x1000; auto gpuBatchBuffer = static_cast(gpuBatchBufferAddr); physAddress += sizeBatchBuffer; NEO::AubHelperHw aubHelperHw(false); AUB::reserveAddressPPGTT(aubFile, gpuBatchBuffer, sizeBatchBuffer, physBatchBuffer, 3, aubHelperHw); uint8_t batchBuffer[sizeBatchBuffer]; auto noop = FamilyType::cmdInitNoop; uint32_t noopId = 0xbaadd; { auto pBatchBuffer = (void *)batchBuffer; *(MI_NOOP *)pBatchBuffer = noop; pBatchBuffer = ptrOffset(pBatchBuffer, sizeof(MI_NOOP)); *(MI_NOOP *)pBatchBuffer = noop; pBatchBuffer = ptrOffset(pBatchBuffer, sizeof(MI_NOOP)); *(MI_NOOP *)pBatchBuffer = noop; pBatchBuffer = ptrOffset(pBatchBuffer, sizeof(MI_NOOP)); noop.TheStructure.Common.IdentificationNumberRegisterWriteEnable = true; noop.TheStructure.Common.IdentificationNumber = noopId++; *(MI_NOOP *)pBatchBuffer = noop; pBatchBuffer = ptrOffset(pBatchBuffer, sizeof(MI_NOOP)); *(MI_BATCH_BUFFER_END *)pBatchBuffer = FamilyType::cmdInitBatchBufferEnd; pBatchBuffer = ptrOffset(pBatchBuffer, sizeof(MI_BATCH_BUFFER_END)); auto sizeBufferUsed = ptrDiff(pBatchBuffer, batchBuffer); AUB::addMemoryWrite(aubFile, physBatchBuffer, batchBuffer, sizeBufferUsed, AubMemDump::AddressSpaceValues::TraceNonlocal, AubMemDump::DataTypeHintValues::TraceBatchBuffer); } const size_t sizeRing = 0x4 * 0x1000; const size_t alignRing = 0x1000; size_t sizeCommands = 0; auto pRing = alignedMalloc(sizeRing, alignRing); auto ggttRing = (uint32_t)(uintptr_t)pRing; auto physRing = physAddress; physAddress += sizeRing; auto rRing = AUB::reserveAddressGGTT(aubFile, ggttRing, sizeRing, physRing, data); ASSERT_NE(static_cast(-1), rRing); EXPECT_EQ(rRing, physRing); auto cur = (uint32_t *)pRing; auto bbs = FamilyType::cmdInitBatchBufferStart; bbs.setBatchBufferStartAddress(gpuBatchBuffer); bbs.setAddressSpaceIndicator(MI_BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR_PPGTT); *(MI_BATCH_BUFFER_START *)cur = bbs; cur = ptrOffset(cur, sizeof(MI_BATCH_BUFFER_START)); noop.TheStructure.Common.IdentificationNumberRegisterWriteEnable = true; noop.TheStructure.Common.IdentificationNumber = noopId; *cur++ = noop.TheStructure.RawData[0]; sizeCommands = ptrDiff(cur, pRing); AUB::addMemoryWrite(aubFile, physRing, pRing, sizeCommands, AubMemDump::AddressSpaceValues::TraceNonlocal, csTraits.aubHintCommandBuffer); auto sizeLRCA = csTraits.sizeLRCA; auto pLRCABase = alignedMalloc(csTraits.sizeLRCA, csTraits.alignLRCA); csTraits.initialize(pLRCABase); csTraits.setRingHead(pLRCABase, 0x0000); csTraits.setRingTail(pLRCABase, static_cast(sizeCommands)); csTraits.setRingBase(pLRCABase, ggttRing); auto ringCtrl = static_cast((sizeRing - 0x1000) | 1); csTraits.setRingCtrl(pLRCABase, ringCtrl); auto ggttLRCA = static_cast(reinterpret_cast(pLRCABase)); auto physLRCA = physAddress; physAddress += sizeLRCA; AUB::reserveAddressGGTT(aubFile, ggttLRCA, sizeLRCA, physLRCA, data); AUB::addMemoryWrite(aubFile, physLRCA, pLRCABase, sizeLRCA, AubMemDump::AddressSpaceValues::TraceNonlocal, csTraits.aubHintLRCA); typename AUB::MiContextDescriptorReg contextDescriptor = {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}; contextDescriptor.sData.Valid = true; contextDescriptor.sData.ForcePageDirRestore = false; contextDescriptor.sData.ForceRestore = false; contextDescriptor.sData.Legacy = true; contextDescriptor.sData.FaultSupport = 0; contextDescriptor.sData.PrivilegeAccessOrPPGTT = true; contextDescriptor.sData.ADor64bitSupport = AUB::Traits::addressingBits > 32; contextDescriptor.sData.LogicalRingCtxAddress = (uintptr_t)pLRCABase / 4096; contextDescriptor.sData.ContextID = 0; aubFile.writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2510), contextDescriptor.ulData[0]); aubFile.writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2514), contextDescriptor.ulData[1]); // Load our new exec list aubFile.writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2550), 1); // Poll until HW complete using AubMemDump::CmdServicesMemTraceRegisterPoll; aubFile.registerPoll( AubMemDump::computeRegisterOffset(mmioBase, 0x2234), //EXECLIST_STATUS 0x00008000, 0x00008000, false, CmdServicesMemTraceRegisterPoll::TimeoutActionValues::Abort); alignedFree(pRing); alignedFree(pLRCABase); alignedFree(pGlobalHWStatusPage); aubFile.fileHandle.close(); } compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/gen9/000077500000000000000000000000001422164147700244625ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/gen9/CMakeLists.txt000066400000000000000000000003521422164147700272220ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_GEN9) target_sources(igdrcl_aub_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) add_subdirectories() endif() compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/gen9/batch_buffer/000077500000000000000000000000001422164147700270745ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/gen9/batch_buffer/CMakeLists.txt000066400000000000000000000005001422164147700316270ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(igdrcl_aub_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/aub_batch_buffer_tests.h ${CMAKE_CURRENT_SOURCE_DIR}/aub_batch_buffer_tests.cpp ) aub_batch_buffer_tests.cpp000066400000000000000000000010511422164147700342010ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/gen9/batch_buffer/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "aub_batch_buffer_tests.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" using AubBatchBufferTests = Test; static constexpr auto gpuBatchBufferAddr = 0x800400001000ull; // 48-bit GPU address GEN9TEST_F(AubBatchBufferTests, givenSimpleRCSWithBatchBufferWhenItHasMSBSetInGpuAddressThenAUBShouldBeSetupSuccessfully) { setupAUBWithBatchBuffer(pDevice, aub_stream::ENGINE_RCS, gpuBatchBufferAddr); } aub_batch_buffer_tests.h000066400000000000000000000155421422164147700336600ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/gen9/batch_buffer/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/aub/aub_helper.h" #include "opencl/test/unit_test/aub_tests/command_stream/aub_mem_dump_tests.h" template void setupAUBWithBatchBuffer(const NEO::Device *pDevice, aub_stream::EngineType engineType, uint64_t gpuBatchBufferAddr) { typedef typename NEO::AUBFamilyMapper::AUB AUB; const auto &csTraits = NEO::CommandStreamReceiverSimulatedCommonHw::getCsTraits(engineType); auto mmioBase = csTraits.mmioBase; uint64_t physAddress = 0x10000; NEO::AUBCommandStreamReceiver::AubFileStream aubFile; std::string filePath(NEO::folderAUB); filePath.append(Os::fileSeparator); std::string baseName("simple"); baseName.append(csTraits.name); baseName.append("WithBatchBuffer"); baseName.append(".aub"); filePath.append(getAubFileName(pDevice, baseName)); aubFile.fileHandle.open(filePath.c_str(), std::ofstream::binary); // Header auto &hwInfo = pDevice->getHardwareInfo(); auto deviceId = hwInfo.capabilityTable.aubDeviceId; const auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily); aubFile.init(hwInfoConfig.getAubStreamSteppingFromHwRevId(hwInfo), deviceId); aubFile.writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x229c), 0xffff8280); const size_t sizeHWSP = 0x1000; const size_t alignHWSP = 0x1000; auto pGlobalHWStatusPage = alignedMalloc(sizeHWSP, alignHWSP); uint32_t ggttGlobalHardwareStatusPage = (uint32_t)((uintptr_t)pGlobalHWStatusPage); AubGTTData data = {true, false}; AUB::reserveAddressGGTT(aubFile, ggttGlobalHardwareStatusPage, sizeHWSP, physAddress, data); physAddress += sizeHWSP; aubFile.writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2080), ggttGlobalHardwareStatusPage); using MI_NOOP = typename FamilyType::MI_NOOP; using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; // create a user mode batch buffer auto physBatchBuffer = physAddress; const auto sizeBatchBuffer = 0x1000; auto gpuBatchBuffer = static_cast(gpuBatchBufferAddr); physAddress += sizeBatchBuffer; NEO::AubHelperHw aubHelperHw(false); AUB::reserveAddressPPGTT(aubFile, gpuBatchBuffer, sizeBatchBuffer, physBatchBuffer, 7, aubHelperHw); uint8_t batchBuffer[sizeBatchBuffer]; auto noop = FamilyType::cmdInitNoop; uint32_t noopId = 0xbaadd; { auto pBatchBuffer = (void *)batchBuffer; *(MI_NOOP *)pBatchBuffer = noop; pBatchBuffer = ptrOffset(pBatchBuffer, sizeof(MI_NOOP)); *(MI_NOOP *)pBatchBuffer = noop; pBatchBuffer = ptrOffset(pBatchBuffer, sizeof(MI_NOOP)); *(MI_NOOP *)pBatchBuffer = noop; pBatchBuffer = ptrOffset(pBatchBuffer, sizeof(MI_NOOP)); noop.TheStructure.Common.IdentificationNumberRegisterWriteEnable = true; noop.TheStructure.Common.IdentificationNumber = noopId++; *(MI_NOOP *)pBatchBuffer = noop; pBatchBuffer = ptrOffset(pBatchBuffer, sizeof(MI_NOOP)); *(MI_BATCH_BUFFER_END *)pBatchBuffer = FamilyType::cmdInitBatchBufferEnd; pBatchBuffer = ptrOffset(pBatchBuffer, sizeof(MI_BATCH_BUFFER_END)); auto sizeBufferUsed = ptrDiff(pBatchBuffer, batchBuffer); AUB::addMemoryWrite(aubFile, physBatchBuffer, batchBuffer, sizeBufferUsed, AubMemDump::AddressSpaceValues::TraceNonlocal, AubMemDump::DataTypeHintValues::TraceBatchBuffer); } const size_t sizeRing = 0x4 * 0x1000; const size_t alignRing = 0x1000; size_t sizeCommands = 0; auto pRing = alignedMalloc(sizeRing, alignRing); auto ggttRing = (uint32_t)(uintptr_t)pRing; auto physRing = physAddress; physAddress += sizeRing; auto rRing = AUB::reserveAddressGGTT(aubFile, ggttRing, sizeRing, physRing, data); ASSERT_NE(static_cast(-1), rRing); EXPECT_EQ(rRing, physRing); auto cur = (uint32_t *)pRing; auto bbs = FamilyType::cmdInitBatchBufferStart; bbs.setBatchBufferStartAddress(gpuBatchBuffer); bbs.setAddressSpaceIndicator(MI_BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR_PPGTT); *(MI_BATCH_BUFFER_START *)cur = bbs; cur = ptrOffset(cur, sizeof(MI_BATCH_BUFFER_START)); noop.TheStructure.Common.IdentificationNumberRegisterWriteEnable = true; noop.TheStructure.Common.IdentificationNumber = noopId; *cur++ = noop.TheStructure.RawData[0]; sizeCommands = ptrDiff(cur, pRing); AUB::addMemoryWrite(aubFile, physRing, pRing, sizeCommands, AubMemDump::AddressSpaceValues::TraceNonlocal, csTraits.aubHintCommandBuffer); auto sizeLRCA = csTraits.sizeLRCA; auto pLRCABase = alignedMalloc(csTraits.sizeLRCA, csTraits.alignLRCA); csTraits.initialize(pLRCABase); csTraits.setRingHead(pLRCABase, 0x0000); csTraits.setRingTail(pLRCABase, static_cast(sizeCommands)); csTraits.setRingBase(pLRCABase, ggttRing); auto ringCtrl = static_cast((sizeRing - 0x1000) | 1); csTraits.setRingCtrl(pLRCABase, ringCtrl); auto ggttLRCA = static_cast(reinterpret_cast(pLRCABase)); auto physLRCA = physAddress; physAddress += sizeLRCA; AUB::reserveAddressGGTT(aubFile, ggttLRCA, sizeLRCA, physLRCA, data); AUB::addMemoryWrite(aubFile, physLRCA, pLRCABase, sizeLRCA, AubMemDump::AddressSpaceValues::TraceNonlocal, csTraits.aubHintLRCA); typename AUB::MiContextDescriptorReg contextDescriptor = {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}; contextDescriptor.sData.Valid = true; contextDescriptor.sData.ForcePageDirRestore = false; contextDescriptor.sData.ForceRestore = false; contextDescriptor.sData.Legacy = true; contextDescriptor.sData.FaultSupport = 0; contextDescriptor.sData.PrivilegeAccessOrPPGTT = true; contextDescriptor.sData.ADor64bitSupport = AUB::Traits::addressingBits > 32; contextDescriptor.sData.LogicalRingCtxAddress = (uintptr_t)pLRCABase / 4096; contextDescriptor.sData.ContextID = 0; // Submit our exec-list aubFile.writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2230), 0); aubFile.writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2230), 0); aubFile.writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2230), contextDescriptor.ulData[1]); aubFile.writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2230), contextDescriptor.ulData[0]); // Poll until HW complete using AubMemDump::CmdServicesMemTraceRegisterPoll; aubFile.registerPoll( AubMemDump::computeRegisterOffset(mmioBase, 0x2234), //EXECLIST_STATUS 0x100, 0x100, false, CmdServicesMemTraceRegisterPoll::TimeoutActionValues::Abort); alignedFree(pRing); alignedFree(pLRCABase); alignedFree(pGlobalHWStatusPage); aubFile.fileHandle.close(); } compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/gen9/skl/000077500000000000000000000000001422164147700252535ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/gen9/skl/CMakeLists.txt000066400000000000000000000001771422164147700300200ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_SKL) add_subdirectories() endif() compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/gen9/skl/command_queue/000077500000000000000000000000001422164147700300755ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/gen9/skl/command_queue/CMakeLists.txt000066400000000000000000000003761422164147700326430ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(igdrcl_aub_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/run_kernel_aub_tests_skl.cpp ) run_kernel_aub_tests_skl.cpp000066400000000000000000000432671422164147700356240ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/gen9/skl/command_queue/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/aub_tests/fixtures/run_kernel_fixture.h" #include "opencl/test/unit_test/fixtures/two_walker_fixture.h" using namespace NEO; namespace ULT { class AUBRunKernelIntegrateTest : public RunKernelFixture, public ::testing::Test { typedef RunKernelFixture ParentClass; protected: void SetUp() override { ParentClass::SetUp(); } void TearDown() override { ParentClass::TearDown(); } }; SKLTEST_F(AUBRunKernelIntegrateTest, GivenOoqExecutionThenExpectationsMet) { cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {16, 1, 1}; size_t localWorkSize[3] = {16, 1, 1}; cl_uint numEventsInWaitList = 0; cl_event *event0 = nullptr; cl_event *event1 = nullptr; cl_event *event2 = nullptr; cl_int retVal = CL_FALSE; std::string kernelFilename; retrieveBinaryKernelFilename(kernelFilename, "simple_kernels_", ".bin"); Program *pProgram = CreateProgramFromBinary(kernelFilename); ASSERT_NE(nullptr, pProgram); retVal = pProgram->build( pProgram->getDevices(), nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); const KernelInfo *pKernelInfo0 = pProgram->getKernelInfo("simple_kernel_0", rootDeviceIndex); ASSERT_NE(nullptr, pKernelInfo0); auto pMultiDeviceKernel0 = MultiDeviceKernel::create( pProgram, MockKernel::toKernelInfoContainer(*pKernelInfo0, rootDeviceIndex), &retVal); ASSERT_NE(nullptr, pMultiDeviceKernel0); const KernelInfo *pKernelInfo1 = pProgram->getKernelInfo("simple_kernel_1", rootDeviceIndex); ASSERT_NE(nullptr, pKernelInfo1); auto pMultiDeviceKernel1 = MultiDeviceKernel::create( pProgram, MockKernel::toKernelInfoContainer(*pKernelInfo1, rootDeviceIndex), &retVal); ASSERT_NE(nullptr, pMultiDeviceKernel1); const KernelInfo *pKernelInfo2 = pProgram->getKernelInfo("simple_kernel_2", rootDeviceIndex); ASSERT_NE(nullptr, pKernelInfo2); auto pMultiDeviceKernel2 = MultiDeviceKernel::create( pProgram, MockKernel::toKernelInfoContainer(*pKernelInfo2, rootDeviceIndex), &retVal); ASSERT_NE(nullptr, pMultiDeviceKernel2); const cl_int NUM_ELEMS = 64; const size_t BUFFER_SIZE = NUM_ELEMS * sizeof(cl_uint); cl_uint *destinationMemory1; cl_uint *destinationMemory2; cl_uint expectedMemory1[NUM_ELEMS]; cl_uint expectedMemory2[NUM_ELEMS]; cl_uint arg0 = 2; cl_float arg1 = 3.0f; cl_uint arg3 = 4; cl_uint arg5 = 0xBBBBBBBB; cl_uint bad_value = 0; // set to non-zero to force failure destinationMemory1 = (cl_uint *)::alignedMalloc(BUFFER_SIZE, 4096); ASSERT_NE(nullptr, destinationMemory1); destinationMemory2 = (cl_uint *)::alignedMalloc(BUFFER_SIZE, 4096); ASSERT_NE(nullptr, destinationMemory2); for (cl_int i = 0; i < NUM_ELEMS; i++) { destinationMemory1[i] = 0xA1A1A1A1; destinationMemory2[i] = 0xA2A2A2A2; expectedMemory1[i] = (arg0 + static_cast(arg1) + arg3 + bad_value); expectedMemory2[i] = arg5 + bad_value; } auto pDestinationMemory1 = &destinationMemory1[0]; auto pDestinationMemory2 = &destinationMemory2[0]; auto pExpectedMemory1 = &expectedMemory1[0]; auto pExpectedMemory2 = &expectedMemory2[0]; auto intermediateBuffer = Buffer::create( context, CL_MEM_READ_WRITE, BUFFER_SIZE, nullptr, retVal); ASSERT_NE(nullptr, intermediateBuffer); auto destinationBuffer1 = Buffer::create( context, CL_MEM_USE_HOST_PTR, BUFFER_SIZE, pDestinationMemory1, retVal); ASSERT_NE(nullptr, destinationBuffer1); //buffer may not be zero copied pDestinationMemory1 = reinterpret_cast(destinationBuffer1->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress()); auto destinationBuffer2 = Buffer::create( context, CL_MEM_USE_HOST_PTR, BUFFER_SIZE, pDestinationMemory2, retVal); ASSERT_NE(nullptr, destinationBuffer2); //buffer may not be zero copied pDestinationMemory2 = reinterpret_cast(destinationBuffer2->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress()); cl_mem arg2 = intermediateBuffer; cl_mem arg4 = destinationBuffer1; cl_mem arg6 = destinationBuffer2; //__kernel void simple_kernel_0(const uint arg0, const float arg1, __global uint *dst) //{ dst = arg0 + arg1 } retVal = clSetKernelArg(pMultiDeviceKernel0, 0, sizeof(cl_uint), &arg0); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArg(pMultiDeviceKernel0, 1, sizeof(cl_float), &arg1); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArg(pMultiDeviceKernel0, 2, sizeof(cl_mem), &arg2); ASSERT_EQ(CL_SUCCESS, retVal); //__kernel void simple_kernel_1(__global uint *src, const uint arg1, __global uint *dst) //{ dst = src + arg1 } retVal = clSetKernelArg(pMultiDeviceKernel1, 0, sizeof(cl_mem), &arg2); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArg(pMultiDeviceKernel1, 1, sizeof(cl_uint), &arg3); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArg(pMultiDeviceKernel1, 2, sizeof(cl_mem), &arg4); ASSERT_EQ(CL_SUCCESS, retVal); //__kernel void simple_kernel_2(const uint arg1, __global uint *dst) //{ dst = arg1 } retVal = clSetKernelArg(pMultiDeviceKernel2, 0, sizeof(cl_mem), &arg5); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArg(pMultiDeviceKernel2, 1, sizeof(cl_mem), &arg6); ASSERT_EQ(CL_SUCCESS, retVal); // Create a second command queue (beyond the default one) CommandQueue *pCmdQ2 = nullptr; pCmdQ2 = createCommandQueue(pClDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE); ASSERT_NE(nullptr, pCmdQ2); auto &csr = pCmdQ2->getGpgpuCommandStreamReceiver(); csr.overrideDispatchPolicy(DispatchMode::ImmediateDispatch); retVal = pCmdQ2->enqueueKernel( pMultiDeviceKernel0->getKernel(rootDeviceIndex), workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, nullptr, event0); ASSERT_EQ(CL_SUCCESS, retVal); // depends on kernel0 retVal = pCmdQ2->enqueueKernel( pMultiDeviceKernel1->getKernel(rootDeviceIndex), workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, event0, event1); ASSERT_EQ(CL_SUCCESS, retVal); // independent from other kernels, can be run asynchronously retVal = pCmdQ2->enqueueKernel( pMultiDeviceKernel2->getKernel(rootDeviceIndex), workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, nullptr, event2); ASSERT_EQ(CL_SUCCESS, retVal); ClHardwareParse::parseCommands(*pCmdQ2); // Compute our memory expecations based on kernel execution auto globalWorkItems = globalWorkSize[0] * globalWorkSize[1] * globalWorkSize[2]; auto sizeWritten = globalWorkItems * sizeof(cl_uint); AUBCommandStreamFixture::expectMemory(pDestinationMemory1, pExpectedMemory1, sizeWritten); AUBCommandStreamFixture::expectMemory(pDestinationMemory2, pExpectedMemory2, sizeWritten); // ensure we didn't overwrite existing memory if (sizeWritten < BUFFER_SIZE) { auto sizeRemaining = BUFFER_SIZE - sizeWritten; auto pUnwrittenMemory1 = (pDestinationMemory1 + sizeWritten / sizeof(cl_uint)); auto pUnwrittenMemory2 = (pDestinationMemory2 + sizeWritten / sizeof(cl_uint)); auto pExpectedUnwrittenMemory1 = &destinationMemory1[globalWorkItems]; auto pExpectedUnwrittenMemory2 = &destinationMemory2[globalWorkItems]; AUBCommandStreamFixture::expectMemory(pUnwrittenMemory1, pExpectedUnwrittenMemory1, sizeRemaining); AUBCommandStreamFixture::expectMemory(pUnwrittenMemory2, pExpectedUnwrittenMemory2, sizeRemaining); } ::alignedFree(destinationMemory1); ::alignedFree(destinationMemory2); delete intermediateBuffer; delete destinationBuffer1; delete destinationBuffer2; delete pMultiDeviceKernel0; delete pMultiDeviceKernel1; delete pMultiDeviceKernel2; delete pProgram; delete pCmdQ2; } SKLTEST_F(AUBRunKernelIntegrateTest, GivenDeviceSideVmeThenExpectationsMet) { const cl_int testWidth = 32; const cl_int testHeight = 16; const cl_uint workDim = 2; const size_t globalWorkSize[2] = {testWidth, testHeight}; const size_t *localWorkSize = nullptr; cl_uint numEventsInWaitList = 0; auto retVal = CL_INVALID_VALUE; // VME works on 16x16 macroblocks const cl_int mbWidth = testWidth / 16; const cl_int mbHeight = testHeight / 16; // 1 per macroblock (there is 1 macroblock in this test): const int PRED_BUFFER_SIZE = mbWidth * mbHeight; const int SHAPES_BUFFER_SIZE = mbWidth * mbHeight; // 4 per macroblock (valid for 8x8 mode only): const int MV_BUFFER_SIZE = testWidth * mbHeight / 4; const int RESIDUALS_BUFFER_SIZE = MV_BUFFER_SIZE; std::string kernelFilename; retrieveBinaryKernelFilename(kernelFilename, "vme_kernels_", ".bin"); Program *pProgram = CreateProgramFromBinary(kernelFilename); ASSERT_NE(nullptr, pProgram); retVal = pProgram->build( pProgram->getDevices(), "", false); ASSERT_EQ(CL_SUCCESS, retVal); const KernelInfo *pKernelInfo = pProgram->getKernelInfo("device_side_block_motion_estimate_intel", rootDeviceIndex); EXPECT_NE(nullptr, pKernelInfo); auto *pMultiDeviceKernel = MultiDeviceKernel::create( pProgram, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), &retVal); ASSERT_NE(pMultiDeviceKernel, nullptr); auto pKernel = pMultiDeviceKernel->getKernel(rootDeviceIndex); EXPECT_EQ(true, pKernel->isVmeKernel()); cl_image_format imageFormat; cl_image_desc imageDesc; imageFormat.image_channel_order = CL_R; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = testWidth; imageDesc.image_height = testHeight; imageDesc.image_depth = 0; imageDesc.image_array_size = 0; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = nullptr; const int INPUT_SIZE = testWidth * testHeight; ASSERT_GT(INPUT_SIZE, 0); auto srcMemory = (cl_uchar *)::alignedMalloc(INPUT_SIZE, 4096); ASSERT_NE(srcMemory, nullptr); memset(srcMemory, 0x00, INPUT_SIZE); auto refMemory = (cl_uchar *)::alignedMalloc(INPUT_SIZE, 4096); ASSERT_NE(refMemory, nullptr); memset(refMemory, 0x00, INPUT_SIZE); int xMovement = 7; int yMovement = 9; // pixel movement: 0xFF, 0xFF values moved from 0x0 to 7x9 for vme kernel to detect srcMemory[0] = 0xFF; // 1.0 srcMemory[1] = 0xFF; // 1.0 refMemory[xMovement + yMovement * testWidth] = 0xFF; refMemory[xMovement + yMovement * testWidth + 1] = 0xFF; cl_mem_flags flags = CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto srcImage = Image::create( context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, srcMemory, retVal); ASSERT_NE(nullptr, srcImage); auto refImage = Image::create( context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, refMemory, retVal); ASSERT_NE(nullptr, refImage); cl_short2 *predMem = new cl_short2[PRED_BUFFER_SIZE]; for (int i = 0; i < PRED_BUFFER_SIZE; i++) { predMem[i].s[0] = 0; predMem[i].s[1] = 0; } auto predMvBuffer = Buffer::create( context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, PRED_BUFFER_SIZE * sizeof(cl_short2), predMem, retVal); ASSERT_NE(nullptr, predMvBuffer); auto motionVectorBuffer = Buffer::create( context, CL_MEM_WRITE_ONLY, MV_BUFFER_SIZE * sizeof(cl_short2), nullptr, retVal); ASSERT_NE(nullptr, motionVectorBuffer); auto residualsBuffer = Buffer::create( context, CL_MEM_WRITE_ONLY, RESIDUALS_BUFFER_SIZE * sizeof(cl_short), nullptr, retVal); ASSERT_NE(nullptr, residualsBuffer); auto shapesBuffer = Buffer::create( context, CL_MEM_WRITE_ONLY, SHAPES_BUFFER_SIZE * sizeof(cl_char2), nullptr, retVal); ASSERT_NE(nullptr, shapesBuffer); // kernel decl: //void block_motion_estimate_intel_noacc( // __read_only image2d_t srcImg, // IN // __read_only image2d_t refImg, // IN // __global short2* prediMVbuffer, // IN // __global short2* motion_vector_buffer, // OUT // __global ushort* residuals_buffer, // OUT // __global uchar2* shapes_buffer, // OUT // int iterations, // IN // int partition_mask) // IN cl_mem arg0 = srcImage; cl_mem arg1 = refImage; cl_mem arg2 = predMvBuffer; cl_mem arg3 = motionVectorBuffer; cl_mem arg4 = residualsBuffer; cl_mem arg5 = shapesBuffer; cl_int arg6 = mbHeight; cl_int arg7 = CL_AVC_ME_PARTITION_MASK_8x8_INTEL; retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &arg0); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArg(pMultiDeviceKernel, 1, sizeof(cl_mem), &arg1); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArg(pMultiDeviceKernel, 2, sizeof(cl_mem), &arg2); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArg(pMultiDeviceKernel, 3, sizeof(cl_mem), &arg3); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArg(pMultiDeviceKernel, 4, sizeof(cl_mem), &arg4); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArg(pMultiDeviceKernel, 5, sizeof(cl_mem), &arg5); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArg(pMultiDeviceKernel, 6, sizeof(cl_int), &arg6); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArg(pMultiDeviceKernel, 7, sizeof(cl_int), &arg7); ASSERT_EQ(CL_SUCCESS, retVal); retVal = pCmdQ->enqueueKernel( pKernel, workDim, nullptr, globalWorkSize, localWorkSize, numEventsInWaitList, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); cl_short2 destinationMV[MV_BUFFER_SIZE]; cl_short destinationResiduals[RESIDUALS_BUFFER_SIZE]; cl_uchar2 destinationShapes[SHAPES_BUFFER_SIZE]; motionVectorBuffer->forceDisallowCPUCopy = true; residualsBuffer->forceDisallowCPUCopy = true; shapesBuffer->forceDisallowCPUCopy = true; retVal = pCmdQ->enqueueReadBuffer(motionVectorBuffer, true, 0, sizeof(destinationMV), destinationMV, nullptr, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); retVal = pCmdQ->enqueueReadBuffer(residualsBuffer, true, 0, sizeof(destinationResiduals), destinationResiduals, nullptr, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); retVal = pCmdQ->enqueueReadBuffer(shapesBuffer, true, 0, sizeof(destinationShapes), destinationShapes, nullptr, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); // Check if our buffers matches expectations cl_short2 expectedMV[MV_BUFFER_SIZE]; cl_short expectedResiduals[RESIDUALS_BUFFER_SIZE]; cl_uchar2 expectedShapes[SHAPES_BUFFER_SIZE]; // This test uses 8x8 sub blocks (4 per macroblock) for (int i = 0; i < SHAPES_BUFFER_SIZE; i++) { expectedShapes[i].s0 = CL_AVC_ME_MAJOR_8x8_INTEL; expectedShapes[i].s1 = CL_AVC_ME_MINOR_8x8_INTEL; } for (int i = 0; i < MV_BUFFER_SIZE; i++) { expectedResiduals[i] = 0; // Second and fourth block not moved, set 0 as default. expectedMV[i].s0 = 0; expectedMV[i].s1 = 0; // First 8x8 subblock moved by 7x9 vecor as xMovement is 7 and // yMovement is 9 if (i == 0) { // times 4 since VME returns data in quarter pixels. expectedMV[i].s0 = 4 * xMovement; expectedMV[i].s1 = 4 * yMovement; } // In this test all other subblocks are empty, in 16x12 mode used in // this test vme should find match at -16 x -12 else { expectedMV[i].s0 = 4 * -16; expectedMV[i].s1 = 4 * -12; } } AUBCommandStreamFixture::expectMemory(destinationMV, expectedMV, sizeof(expectedMV)); AUBCommandStreamFixture::expectMemory(destinationResiduals, expectedResiduals, sizeof(expectedResiduals)); AUBCommandStreamFixture::expectMemory(destinationShapes, expectedShapes, sizeof(expectedShapes)); delete predMvBuffer; delete motionVectorBuffer; delete residualsBuffer; delete shapesBuffer; delete[] predMem; ::alignedFree(srcMemory); srcMemory = nullptr; delete srcImage; ::alignedFree(refMemory); refMemory = nullptr; delete refImage; delete pMultiDeviceKernel; delete pProgram; } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/mem_obj/000077500000000000000000000000001422164147700252305ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/mem_obj/CMakeLists.txt000066400000000000000000000003741422164147700277740ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(igdrcl_aub_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/create_image_aub_tests.cpp ) compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/mem_obj/create_image_aub_tests.cpp000066400000000000000000000507221422164147700324200ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/aligned_memory.h" #include "shared/source/memory_manager/os_agnostic_memory_manager.h" #include "shared/test/common/mocks/mock_gmm.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/aub_tests/command_stream/aub_command_stream_fixture.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include extern GFXCORE_FAMILY renderCoreFamily; using namespace NEO; static const unsigned int testImageDimensions = 17; auto const elementSize = 4; //sizeof CL_RGBA * CL_UNORM_INT8 struct AUBCreateImage : public CommandDeviceFixture, public AUBCommandStreamFixture, public ::testing::Test { typedef AUBCommandStreamFixture CommandStreamFixture; using AUBCommandStreamFixture::SetUp; void SetUp() override { if (!(defaultHwInfo->capabilityTable.supportsImages)) { GTEST_SKIP(); } CommandDeviceFixture::SetUp(cl_command_queue_properties(0)); CommandStreamFixture::SetUp(pCmdQ); imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_RGBA; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = testImageDimensions; imageDesc.image_height = testImageDimensions; imageDesc.image_depth = 0; imageDesc.image_array_size = 10; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; } void TearDown() override { image.reset(); CommandStreamFixture::TearDown(); CommandDeviceFixture::TearDown(); } std::unique_ptr image; cl_image_format imageFormat; cl_image_desc imageDesc; cl_int retVal; unsigned char pHostPtr[512 * testImageDimensions * elementSize * 4]; }; struct AUBCreateImageArray : public AUBCreateImage, public ::testing::WithParamInterface { void SetUp() override { if (!(defaultHwInfo->capabilityTable.supportsImages)) { GTEST_SKIP(); } AUBCreateImage::SetUp(); } void TearDown() override { AUBCreateImage::TearDown(); } }; HWTEST_F(AUBCreateImageArray, Given1DImageArrayThenExpectationsMet) { auto &hwHelper = HwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily); imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY; imageDesc.image_height = 1; cl_mem_flags flags = CL_MEM_COPY_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto imageDescriptor = Image::convertDescriptor(imageDesc); auto imgInfo = MockGmm::initImgInfo(imageDescriptor, 0, &surfaceFormat->surfaceFormat); imgInfo.linearStorage = !hwHelper.tilingAllowed(false, Image::isImage1d(imageDesc), false); auto queryGmm = MockGmm::queryImgParams(pDevice->getGmmClientContext(), imgInfo, false); //allocate host_ptr auto pixelSize = 4; auto storageSize = imageDesc.image_array_size * pixelSize * imageDesc.image_width * imageDesc.image_height; std::unique_ptr hostPtr(new int[storageSize]); for (auto i = 0u; i < storageSize; i++) { hostPtr[i] = i; } image.reset(Image::create( context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, hostPtr.get(), retVal)); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, image); EXPECT_EQ(image->getSize(), imgInfo.size); EXPECT_EQ(image->getImageDesc().image_slice_pitch, imgInfo.slicePitch); EXPECT_EQ(image->getImageDesc().image_row_pitch, imgInfo.rowPitch); EXPECT_GE(image->getImageDesc().image_slice_pitch, image->getImageDesc().image_row_pitch); EXPECT_EQ(image->getQPitch(), imgInfo.qPitch); EXPECT_EQ(image->getCubeFaceIndex(), static_cast(__GMM_NO_CUBE_MAP)); auto imageHeight = imageDesc.image_height; std::unique_ptr readMemory(new uint32_t[image->getSize() / sizeof(uint32_t)]); auto allocation = createResidentAllocationAndStoreItInCsr(readMemory.get(), image->getSize()); size_t imgOrigin[] = {0, 0, 0}; size_t imgRegion[] = {imageDesc.image_width, 1, 1}; imgRegion[1] = imageDesc.image_array_size; retVal = pCmdQ->enqueueReadImage(image.get(), CL_FALSE, imgOrigin, imgRegion, imgInfo.rowPitch, imgInfo.slicePitch, readMemory.get(), nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); allocation = pCommandStreamReceiver->getTemporaryAllocations().peekHead(); while (allocation && allocation->getUnderlyingBuffer() != readMemory.get()) { allocation = allocation->next; } auto destGpuAddress = reinterpret_cast(allocation->getGpuAddress()); pCmdQ->flush(); auto address = (int *)image->getCpuAddress(); auto currentCounter = 0; for (auto array = 0u; array < imageDesc.image_array_size; array++) { for (auto height = 0u; height < imageHeight; height++) { for (auto element = 0u; element < imageDesc.image_width; element++) { auto offset = (array * imgInfo.slicePitch + element * pixelSize + height * imgInfo.rowPitch) / 4; if (MemoryPool::isSystemMemoryPool(image->getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex())->getMemoryPool()) == false) { AUBCommandStreamFixture::expectMemory(&destGpuAddress[offset], ¤tCounter, pixelSize); } else { EXPECT_EQ(currentCounter, address[offset]); } currentCounter++; } } } } HWTEST_F(AUBCreateImageArray, Given2DImageArrayThenExpectationsMet) { auto &hwHelper = HwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily); imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY; cl_mem_flags flags = CL_MEM_COPY_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto imageDescriptor = Image::convertDescriptor(imageDesc); auto imgInfo = MockGmm::initImgInfo(imageDescriptor, 0, &surfaceFormat->surfaceFormat); imgInfo.linearStorage = !hwHelper.tilingAllowed(false, Image::isImage1d(imageDesc), false); auto queryGmm = MockGmm::queryImgParams(pDevice->getGmmClientContext(), imgInfo, false); //allocate host_ptr auto pixelSize = 4; auto storageSize = imageDesc.image_array_size * pixelSize * imageDesc.image_width * imageDesc.image_height; std::unique_ptr hostPtr(new int[storageSize]); for (auto i = 0u; i < storageSize; i++) { hostPtr[i] = i; } image.reset(Image::create( context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, hostPtr.get(), retVal)); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, image); EXPECT_EQ(image->getSize(), imgInfo.size); EXPECT_EQ(image->getImageDesc().image_slice_pitch, imgInfo.slicePitch); EXPECT_EQ(image->getImageDesc().image_row_pitch, imgInfo.rowPitch); EXPECT_GE(image->getImageDesc().image_slice_pitch, image->getImageDesc().image_row_pitch); EXPECT_EQ(image->getQPitch(), imgInfo.qPitch); EXPECT_EQ(image->getCubeFaceIndex(), static_cast(__GMM_NO_CUBE_MAP)); auto imageHeight = imageDesc.image_height; std::unique_ptr readMemory(new uint32_t[image->getSize() / sizeof(uint32_t)]); auto allocation = createResidentAllocationAndStoreItInCsr(readMemory.get(), image->getSize()); size_t imgOrigin[] = {0, 0, 0}; size_t imgRegion[] = {imageDesc.image_width, 1, 1}; imgRegion[1] = imageDesc.image_height; imgRegion[2] = imageDesc.image_array_size; retVal = pCmdQ->enqueueReadImage(image.get(), CL_FALSE, imgOrigin, imgRegion, imgInfo.rowPitch, imgInfo.slicePitch, readMemory.get(), nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); allocation = pCommandStreamReceiver->getTemporaryAllocations().peekHead(); while (allocation && allocation->getUnderlyingBuffer() != readMemory.get()) { allocation = allocation->next; } auto destGpuAddress = reinterpret_cast(allocation->getGpuAddress()); pCmdQ->flush(); auto address = (int *)image->getCpuAddress(); auto currentCounter = 0; for (auto array = 0u; array < imageDesc.image_array_size; array++) { for (auto height = 0u; height < imageHeight; height++) { for (auto element = 0u; element < imageDesc.image_width; element++) { auto offset = (array * imgInfo.slicePitch + element * pixelSize + height * imgInfo.rowPitch) / 4; if (MemoryPool::isSystemMemoryPool(image->getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex())->getMemoryPool()) == false) { AUBCommandStreamFixture::expectMemory(&destGpuAddress[offset], ¤tCounter, pixelSize); } else { EXPECT_EQ(currentCounter, address[offset]); } currentCounter++; } } } } struct AUBCreateImageHostPtr : public AUBCreateImage, public ::testing::WithParamInterface { void SetUp() override { if (!(defaultHwInfo->capabilityTable.supportsImages)) { GTEST_SKIP(); } flags = GetParam(); AUBCreateImage::SetUp(); } void TearDown() override { AUBCreateImage::TearDown(); } uint64_t flags; }; static cl_mem_flags useHostPtrFlags[] = { 0 | CL_MEM_USE_HOST_PTR, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, CL_MEM_HOST_READ_ONLY | CL_MEM_USE_HOST_PTR, CL_MEM_HOST_WRITE_ONLY | CL_MEM_USE_HOST_PTR, CL_MEM_HOST_NO_ACCESS | CL_MEM_USE_HOST_PTR}; static cl_mem_flags copyHostPtrFlags[] = { 0 | CL_MEM_COPY_HOST_PTR, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, CL_MEM_HOST_READ_ONLY | CL_MEM_COPY_HOST_PTR, CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, CL_MEM_HOST_NO_ACCESS | CL_MEM_COPY_HOST_PTR}; using UseHostPtrTest = AUBCreateImageHostPtr; using CopyHostPtrTest = AUBCreateImageHostPtr; INSTANTIATE_TEST_CASE_P( CreateImgTest_UseHostPtr, UseHostPtrTest, testing::ValuesIn(useHostPtrFlags)); INSTANTIATE_TEST_CASE_P( CreateImgTest_CopyHostPtr, CopyHostPtrTest, testing::ValuesIn(copyHostPtrFlags)); HWTEST_P(CopyHostPtrTest, GivenImageWithDoubledRowPitchWhenCreatedWithCopyHostPtrFlagThenHasProperRowPitchSet) { auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto imageDescriptor = Image::convertDescriptor(imageDesc); auto imgInfo = MockGmm::initImgInfo(imageDescriptor, 0, &surfaceFormat->surfaceFormat); MockGmm::queryImgParams(pDevice->getGmmClientContext(), imgInfo, false); auto lineWidth = imageDesc.image_width * elementSize; auto passedRowPitch = imgInfo.rowPitch * 2; imageDesc.image_row_pitch = passedRowPitch; char counter = 0; char *data = (char *)pHostPtr; auto heightToCopy = imageDesc.image_height; while (heightToCopy--) { for (unsigned int i = 0; i < imageDesc.image_width * elementSize; i++) { data[i] = counter++; } data += passedRowPitch; } image.reset(Image::create(context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, pHostPtr, retVal)); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(image->getImageDesc().image_row_pitch, imgInfo.rowPitch); EXPECT_EQ(image->getHostPtrRowPitch(), (size_t)passedRowPitch); EXPECT_EQ(image->getSize(), imgInfo.size); EXPECT_EQ(image->getImageDesc().image_slice_pitch, imgInfo.slicePitch); EXPECT_GE(image->getImageDesc().image_slice_pitch, image->getImageDesc().image_row_pitch); EXPECT_EQ(image->getQPitch(), imgInfo.qPitch); EXPECT_EQ(image->getCubeFaceIndex(), static_cast(__GMM_NO_CUBE_MAP)); //now check if data is properly propagated to image heightToCopy = imageDesc.image_height; auto imageStorage = static_cast(image->getCpuAddress()); data = (char *)pHostPtr; uint8_t *readMemory = nullptr; bool isGpuCopy = image->isTiledAllocation() || !MemoryPool::isSystemMemoryPool(image->getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex())->getMemoryPool()); if (isGpuCopy) { readMemory = new uint8_t[testImageDimensions * testImageDimensions * elementSize * 4]; size_t imgOrigin[] = {0, 0, 0}; size_t imgRegion[] = {imageDesc.image_width, imageDesc.image_height, imageDesc.image_depth ? imageDesc.image_depth : 1}; retVal = pCmdQ->enqueueReadImage(image.get(), CL_FALSE, imgOrigin, imgRegion, 0, 0, readMemory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pCmdQ->flush(); EXPECT_EQ(CL_SUCCESS, retVal); imageStorage = readMemory; } while (heightToCopy--) { for (unsigned int i = 0; i < imageDesc.image_width * elementSize; i++) { if (isGpuCopy) { AUBCommandStreamFixture::expectMemory(&imageStorage[i], &data[i], 1); } else { EXPECT_EQ(imageStorage[i], data[i]); } } data += passedRowPitch; imageStorage += lineWidth; } if (readMemory) delete readMemory; } HWTEST_P(UseHostPtrTest, GivenImageWithRowPitchWhenCreatedWithUseHostPtrFlagThenExpectationsMet) { imageDesc.image_width = 546; imageDesc.image_height = 1; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto imageDescriptor = Image::convertDescriptor(imageDesc); auto imgInfo = MockGmm::initImgInfo(imageDescriptor, 0, &surfaceFormat->surfaceFormat); MockGmm::queryImgParams(pDevice->getGmmClientContext(), imgInfo, false); auto passedRowPitch = imgInfo.rowPitch + 32; imageDesc.image_row_pitch = passedRowPitch; unsigned char *pUseHostPtr = new unsigned char[passedRowPitch * imageDesc.image_height * elementSize]; char counter = 0; char *data = (char *)pUseHostPtr; auto heightToCopy = imageDesc.image_height; while (heightToCopy--) { for (unsigned int i = 0; i < imageDesc.image_width * elementSize; i++) { data[i] = counter++; } data += passedRowPitch; } image.reset(Image::create( context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, pUseHostPtr, retVal)); ASSERT_EQ(CL_SUCCESS, retVal); //now check if data is properly propagated to image auto mapFlags = CL_MAP_READ; const size_t origin[3] = {0, 0, 0}; const size_t region[3] = {imageDesc.image_width, imageDesc.image_height, 1}; size_t imageRowPitch = 0; size_t imageSlicePitch = 0; auto ptr = pCmdQ->enqueueMapImage( image.get(), true, mapFlags, origin, region, &imageRowPitch, &imageSlicePitch, 0, nullptr, nullptr, retVal); if (image->isMemObjZeroCopy()) { EXPECT_EQ(image->getCpuAddress(), ptr); } else { EXPECT_NE(image->getCpuAddress(), ptr); } size_t imageRowPitchRef = 0; image->getImageInfo(CL_IMAGE_ROW_PITCH, sizeof(imageRowPitchRef), &imageRowPitchRef, nullptr); // Only ZeroCopy HOST_PTR image has the same row_pitch as the one from map, otherwise mapped ptr may have different row_pitch if (image->isMemObjZeroCopy()) { EXPECT_EQ(imageRowPitch, imageRowPitchRef); } size_t imageSlicePitchRef = 0; image->getImageInfo(CL_IMAGE_SLICE_PITCH, sizeof(imageSlicePitchRef), &imageSlicePitchRef, nullptr); // Only ZeroCopy HOST_PTR image has the same slice_pitch as the one from map, otherwise mapped ptr may have different slice_pitch if (image->isMemObjZeroCopy()) { EXPECT_EQ(imageSlicePitch, imageSlicePitchRef); } heightToCopy = imageDesc.image_height; char *imageStorage = (char *)ptr; data = (char *)pUseHostPtr; bool isGpuCopy = image->isTiledAllocation() || !MemoryPool::isSystemMemoryPool( image->getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex())->getMemoryPool()); while (heightToCopy--) { for (unsigned int i = 0; i < imageDesc.image_width * elementSize; i++) { if (isGpuCopy) { AUBCommandStreamFixture::expectMemory(&imageStorage[i], &data[i], 1); } else { EXPECT_EQ(imageStorage[i], data[i]); } } data += passedRowPitch; imageStorage += imageRowPitch; } retVal = clEnqueueUnmapMemObject(pCmdQ, image.get(), ptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); delete[] pUseHostPtr; } HWTEST_F(AUBCreateImage, GivenImage3DCreatedWithDoubledSlicePitchWhenQueriedForDataThenReturnsProperData) { imageDesc.image_type = CL_MEM_OBJECT_IMAGE3D; imageDesc.image_depth = testImageDimensions; auto imageSize = imageDesc.image_width * imageDesc.image_height * imageDesc.image_depth * 4 * 2; auto computedSlicePitch = imageDesc.image_width * alignUp(imageDesc.image_height, 4) * 4; auto inputSlicePitch = computedSlicePitch * 2; imageDesc.image_slice_pitch = inputSlicePitch; auto host_ptr = alignedMalloc(inputSlicePitch * imageDesc.image_depth, 4096); auto counter = 0; char *data = (char *)host_ptr; auto depthToCopy = imageDesc.image_depth; while (depthToCopy--) { for (unsigned int i = 0; i < imageDesc.image_width * 4 * imageDesc.image_height; i++) { data[i] = counter++; } data += inputSlicePitch; } cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); image.reset(Image::create(context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, host_ptr, retVal)); depthToCopy = imageDesc.image_depth; auto imageStorage = (uint8_t *)image->getCpuAddress(); data = (char *)host_ptr; uint8_t *readMemory = nullptr; bool isGpuCopy = image->isTiledAllocation() || !MemoryPool::isSystemMemoryPool( image->getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex())->getMemoryPool()); if (isGpuCopy) { readMemory = new uint8_t[imageSize]; size_t imgOrigin[] = {0, 0, 0}; size_t imgRegion[] = {imageDesc.image_width, imageDesc.image_height, imageDesc.image_depth}; retVal = pCmdQ->enqueueReadImage(image.get(), CL_FALSE, imgOrigin, imgRegion, 0, computedSlicePitch, readMemory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pCmdQ->flush(); EXPECT_EQ(CL_SUCCESS, retVal); imageStorage = readMemory; } while (depthToCopy--) { for (unsigned int i = 0; i < imageDesc.image_width * 4 * imageDesc.image_height; i++) { if (isGpuCopy) { AUBCommandStreamFixture::expectMemory(&imageStorage[i], &data[i], 1); } else { EXPECT_EQ(imageStorage[i], data[i]); } } data += inputSlicePitch; imageStorage += computedSlicePitch; } alignedFree(host_ptr); if (readMemory) { delete readMemory; } } compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/unified_memory/000077500000000000000000000000001422164147700266335ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/unified_memory/CMakeLists.txt000066400000000000000000000005131422164147700313720ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(igdrcl_aub_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/unified_memory_aub_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/unified_memory_copy_aub_tests.cpp ) unified_memory_aub_tests.cpp000066400000000000000000000051751422164147700343540ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/unified_memory/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/aub_tests/fixtures/unified_memory_fixture.h" namespace NEO { class UnifiedMemoryAubTest : public UnifiedMemoryAubFixture, public ::testing::Test { public: using UnifiedMemoryAubFixture::TearDown; std::vector values; void SetUp() override { UnifiedMemoryAubFixture::SetUp(); values = std::vector(dataSize, 11); }; }; HWTEST_F(UnifiedMemoryAubTest, givenDeviceMemoryAllocWhenWriteIntoItThenValuesMatch) { auto unifiedMemoryType = InternalMemoryType::DEVICE_UNIFIED_MEMORY; auto unifiedMemoryPtr = allocateUSM(unifiedMemoryType); writeToUsmMemory(values, unifiedMemoryPtr, unifiedMemoryType); expectMemory(unifiedMemoryPtr, values.data(), dataSize); freeUSM(unifiedMemoryPtr, unifiedMemoryType); } HWTEST_F(UnifiedMemoryAubTest, givenSharedMemoryAllocWhenWriteIntoCPUPartThenValuesMatchAfterUsingAllocAsKernelParam) { auto unifiedMemoryType = InternalMemoryType::SHARED_UNIFIED_MEMORY; auto unifiedMemoryPtr = allocateUSM(unifiedMemoryType); retVal = clEnqueueMemsetINTEL(this->pCmdQ, unifiedMemoryPtr, 0, dataSize, 0, nullptr, nullptr); EXPECT_EQ(retVal, CL_SUCCESS); writeToUsmMemory(values, unifiedMemoryPtr, unifiedMemoryType); expectNotEqualMemory(unifiedMemoryPtr, values.data(), dataSize); auto mockPtr = std::make_unique(dataSize); retVal = clEnqueueMemcpyINTEL(this->pCmdQ, true, mockPtr.get(), unifiedMemoryPtr, dataSize, 0, nullptr, nullptr); EXPECT_EQ(retVal, CL_SUCCESS); expectMemory(unifiedMemoryPtr, values.data(), dataSize); freeUSM(unifiedMemoryPtr, unifiedMemoryType); } HWTEST_F(UnifiedMemoryAubTest, givenSharedMemoryAllocWhenWriteIntoGPUPartThenValuesMatchAfterUsingAlloc) { auto unifiedMemoryType = InternalMemoryType::SHARED_UNIFIED_MEMORY; auto unifiedMemoryPtr = allocateUSM(unifiedMemoryType); std::vector input(dataSize, 11); retVal = clEnqueueMemcpyINTEL(this->pCmdQ, true, unifiedMemoryPtr, input.data(), dataSize, 0, nullptr, nullptr); EXPECT_EQ(retVal, CL_SUCCESS); expectNotEqualMemory(unifiedMemoryPtr, unifiedMemoryPtr, dataSize); expectMemory(unifiedMemoryPtr, input.data(), dataSize); auto mockRead = reinterpret_cast(unifiedMemoryPtr)[0]; mockRead = 0; expectMemory(unifiedMemoryPtr, unifiedMemoryPtr, dataSize); freeUSM(unifiedMemoryPtr, unifiedMemoryType); } } // namespace NEO unified_memory_copy_aub_tests.cpp000066400000000000000000000040631422164147700354010ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/unified_memory/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/aub_tests/fixtures/unified_memory_fixture.h" namespace NEO { class UnifiedMemoryCopyAubTest : public UnifiedMemoryAubFixture, public ::testing::TestWithParam> { public: void *srcPtr, *dstPtr; InternalMemoryType srcMemoryType, dstMemoryType; std::vector srcValues, dstValues; void SetUp() override { UnifiedMemoryAubFixture::SetUp(); srcMemoryType = std::get<0>(GetParam()); dstMemoryType = std::get<1>(GetParam()); srcPtr = this->allocateUSM(srcMemoryType); dstPtr = this->allocateUSM(dstMemoryType); srcValues = std::vector(dataSize, 11); dstValues = std::vector(dataSize, 22); this->writeToUsmMemory(srcValues, srcPtr, srcMemoryType); this->writeToUsmMemory(dstValues, dstPtr, dstMemoryType); } void TearDown() override { this->freeUSM(srcPtr, srcMemoryType); this->freeUSM(dstPtr, dstMemoryType); UnifiedMemoryAubFixture::TearDown(); } }; HWTEST_P(UnifiedMemoryCopyAubTest, givenTwoUnifiedMemoryAllocsWhenCopyingOneToAnotherThenValuesMatch) { clEnqueueMemcpyINTEL(this->pCmdQ, true, dstPtr, srcPtr, dataSize, 0, nullptr, nullptr); expectMemory(dstPtr, srcValues.data(), dataSize); } InternalMemoryType memoryTypes[] = {InternalMemoryType::HOST_UNIFIED_MEMORY, InternalMemoryType::DEVICE_UNIFIED_MEMORY, InternalMemoryType::SHARED_UNIFIED_MEMORY, InternalMemoryType::NOT_SPECIFIED}; INSTANTIATE_TEST_CASE_P(UnifiedMemoryCopyAubTest, UnifiedMemoryCopyAubTest, ::testing::Combine(::testing::ValuesIn(memoryTypes), ::testing::ValuesIn(memoryTypes))); } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/xe_hp_core/000077500000000000000000000000001422164147700257335ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/xe_hp_core/CMakeLists.txt000066400000000000000000000006351422164147700304770ustar00rootroot00000000000000# # Copyright (C) 2022 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_XE_HP_CORE) target_sources(igdrcl_aub_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/aub_mem_dump_tests_xe_hp_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/aub_tests_stateless_compression_in_sba_xe_hp_core.cpp ) add_subdirectories() endif() aub_mem_dump_tests_xe_hp_core.cpp000066400000000000000000000025741422164147700344370ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/xe_hp_core/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/ult_hw_config.h" #include "shared/test/common/helpers/variable_backup.h" #include "opencl/test/unit_test/aub_tests/command_stream/aub_mem_dump_tests.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" namespace NEO { extern bool overrideCommandStreamReceiverCreation; using XeHpCoreAubMemDumpTests = Test; XE_HP_CORE_TEST_F(XeHpCoreAubMemDumpTests, GivenCcsThenExpectationsAreMet) { setupAUB(pDevice, aub_stream::ENGINE_CCS); } XE_HP_CORE_TEST_F(XeHpCoreAubMemDumpTests, whenAubCsrIsCreatedThenCreateHardwareContext) { DebugManagerStateRestore restore; VariableBackup backup(&ultHwConfig); ultHwConfig.useHwCsr = true; DebugManager.flags.SetCommandStreamReceiver.set(CommandStreamReceiverType::CSR_AUB); std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); auto &baseCsr = device->getGpgpuCommandStreamReceiver(); auto &aubCsr = static_cast &>(baseCsr); EXPECT_NE(nullptr, aubCsr.hardwareContextController.get()); EXPECT_NE(0u, aubCsr.hardwareContextController->hardwareContexts.size()); } } // namespace NEO aub_tests_stateless_compression_in_sba_xe_hp_core.cpp000066400000000000000000000705111422164147700405730ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/xe_hp_core/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/resource_info.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/extensions/public/cl_ext_private.h" #include "opencl/source/api/api.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/aub_tests/fixtures/aub_fixture.h" #include "opencl/test/unit_test/aub_tests/fixtures/multicontext_aub_fixture.h" #include "opencl/test/unit_test/fixtures/simple_arg_kernel_fixture.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" using namespace NEO; struct StatelessCompressionInSBA : public KernelAUBFixture, public ::testing::Test, public ::testing::WithParamInterface { void SetUp() override { DebugManager.flags.EnableStatelessCompression.set(1); DebugManager.flags.RenderCompressedBuffersEnabled.set(true); DebugManager.flags.RenderCompressedImagesEnabled.set(true); DebugManager.flags.EnableLocalMemory.set(true); DebugManager.flags.NodeOrdinal.set(GetParam()); DebugManager.flags.ForceAuxTranslationMode.set(static_cast(AuxTranslationMode::Builtin)); KernelAUBFixture::SetUp(); } void TearDown() override { KernelAUBFixture::TearDown(); } DebugManagerStateRestore debugRestorer; cl_int retVal = CL_SUCCESS; }; XE_HP_CORE_TEST_P(StatelessCompressionInSBA, GENERATEONLY_givenCompressedBuffersWhenStatelessAccessToLocalMemoryThenEnableCompressionInSBA) { const size_t bufferSize = 2048; uint8_t writePattern[bufferSize]; std::fill(writePattern, writePattern + sizeof(writePattern), 1); device->getGpgpuCommandStreamReceiver().overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto unCompressedBuffer = std::unique_ptr(Buffer::create(context, CL_MEM_UNCOMPRESSED_HINT_INTEL, bufferSize, nullptr, retVal)); auto unCompressedAllocation = unCompressedBuffer->getGraphicsAllocation(device->getRootDeviceIndex()); EXPECT_EQ(AllocationType::BUFFER, unCompressedAllocation->getAllocationType()); EXPECT_FALSE(unCompressedAllocation->isCompressionEnabled()); EXPECT_EQ(MemoryPool::LocalMemory, unCompressedAllocation->getMemoryPool()); auto compressedBuffer1 = std::unique_ptr(Buffer::create(context, CL_MEM_COMPRESSED_HINT_INTEL, bufferSize, nullptr, retVal)); auto compressedAllocation1 = compressedBuffer1->getGraphicsAllocation(device->getRootDeviceIndex()); EXPECT_TRUE(compressedAllocation1->isCompressionEnabled()); EXPECT_EQ(MemoryPool::LocalMemory, compressedAllocation1->getMemoryPool()); EXPECT_TRUE(compressedAllocation1->getDefaultGmm()->isCompressionEnabled); auto compressedBuffer2 = std::unique_ptr(Buffer::create(context, CL_MEM_COMPRESSED_HINT_INTEL, bufferSize, nullptr, retVal)); auto compressedAllocation2 = compressedBuffer2->getGraphicsAllocation(device->getRootDeviceIndex()); EXPECT_TRUE(compressedAllocation2->isCompressionEnabled()); EXPECT_EQ(MemoryPool::LocalMemory, compressedAllocation2->getMemoryPool()); EXPECT_TRUE(compressedAllocation2->getDefaultGmm()->isCompressionEnabled); retVal = pCmdQ->enqueueWriteBuffer(compressedBuffer1.get(), CL_FALSE, 0, bufferSize, writePattern, nullptr, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); retVal = kernel->setArg(0, compressedBuffer1.get()); ASSERT_EQ(CL_SUCCESS, retVal); retVal = kernel->setArg(1, compressedBuffer2.get()); ASSERT_EQ(CL_SUCCESS, retVal); size_t globalWorkSize[3] = {bufferSize, 1, 1}; retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); pCmdQ->finish(); expectNotEqualMemory(AUBFixture::getGpuPointer(compressedAllocation1), writePattern, bufferSize); expectNotEqualMemory(AUBFixture::getGpuPointer(compressedAllocation2), writePattern, bufferSize); retVal = pCmdQ->enqueueCopyBuffer(compressedBuffer2.get(), unCompressedBuffer.get(), 0, 0, bufferSize, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); pCmdQ->finish(); expectMemory(AUBFixture::getGpuPointer(unCompressedAllocation), writePattern, bufferSize); } XE_HP_CORE_TEST_P(StatelessCompressionInSBA, GENERATEONLY_givenCompressedDeviceMemoryWhenAccessedStatelesslyThenEnableCompressionInSBA) { const size_t bufferSize = 2048; uint8_t writePattern[bufferSize]; std::fill(writePattern, writePattern + sizeof(writePattern), 1); device->getGpgpuCommandStreamReceiver().overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto compressedDeviceMemAllocPtr1 = clDeviceMemAllocINTEL(context, device.get(), nullptr, bufferSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, compressedDeviceMemAllocPtr1); auto compressedDeviceMemAlloc1 = context->getSVMAllocsManager()->getSVMAllocs()->get(compressedDeviceMemAllocPtr1)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); EXPECT_NE(nullptr, compressedDeviceMemAlloc1); EXPECT_TRUE(compressedDeviceMemAlloc1->isCompressionEnabled()); EXPECT_EQ(MemoryPool::LocalMemory, compressedDeviceMemAlloc1->getMemoryPool()); EXPECT_TRUE(compressedDeviceMemAlloc1->getDefaultGmm()->isCompressionEnabled); auto compressedDeviceMemAllocPtr2 = clDeviceMemAllocINTEL(context, device.get(), nullptr, bufferSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, compressedDeviceMemAllocPtr2); auto compressedDeviceMemAlloc2 = context->getSVMAllocsManager()->getSVMAllocs()->get(compressedDeviceMemAllocPtr2)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); EXPECT_NE(nullptr, compressedDeviceMemAlloc2); EXPECT_TRUE(compressedDeviceMemAlloc2->isCompressionEnabled()); EXPECT_EQ(MemoryPool::LocalMemory, compressedDeviceMemAlloc2->getMemoryPool()); EXPECT_TRUE(compressedDeviceMemAlloc2->getDefaultGmm()->isCompressionEnabled); retVal = clEnqueueMemcpyINTEL(pCmdQ, true, compressedDeviceMemAllocPtr1, writePattern, bufferSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArgSVMPointer(multiDeviceKernel.get(), 0, compressedDeviceMemAllocPtr1); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArgSVMPointer(multiDeviceKernel.get(), 1, compressedDeviceMemAllocPtr2); EXPECT_EQ(CL_SUCCESS, retVal); size_t globalWorkSize[3] = {bufferSize, 1, 1}; retVal = clEnqueueNDRangeKernel(pCmdQ, multiDeviceKernel.get(), 1, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clFinish(pCmdQ); EXPECT_EQ(CL_SUCCESS, retVal); expectNotEqualMemory(compressedDeviceMemAllocPtr2, writePattern, bufferSize); expectNotEqualMemory(compressedDeviceMemAllocPtr2, writePattern, bufferSize); retVal = clMemFreeINTEL(context, compressedDeviceMemAllocPtr1); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(context, compressedDeviceMemAllocPtr2); EXPECT_EQ(CL_SUCCESS, retVal); } XE_HP_CORE_TEST_P(StatelessCompressionInSBA, givenUncompressibleBufferInHostMemoryWhenAccessedStatelesslyThenDisableCompressionInSBA) { const size_t bufferSize = 2048; uint8_t writePattern[bufferSize]; std::fill(writePattern, writePattern + sizeof(writePattern), 1); device->getGpgpuCommandStreamReceiver().overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto unCompressedBuffer = std::unique_ptr(Buffer::create(context, CL_MEM_UNCOMPRESSED_HINT_INTEL, bufferSize, nullptr, retVal)); auto unCompressedAllocation = unCompressedBuffer->getGraphicsAllocation(device->getRootDeviceIndex()); EXPECT_EQ(AllocationType::BUFFER, unCompressedAllocation->getAllocationType()); EXPECT_FALSE(unCompressedAllocation->isCompressionEnabled()); EXPECT_EQ(MemoryPool::LocalMemory, unCompressedAllocation->getMemoryPool()); auto compressedBuffer = std::unique_ptr(Buffer::create(context, CL_MEM_COMPRESSED_HINT_INTEL, bufferSize, nullptr, retVal)); auto compressedAllocation = compressedBuffer->getGraphicsAllocation(device->getRootDeviceIndex()); EXPECT_TRUE(compressedAllocation->isCompressionEnabled()); EXPECT_EQ(MemoryPool::LocalMemory, compressedAllocation->getMemoryPool()); EXPECT_TRUE(compressedAllocation->getDefaultGmm()->isCompressionEnabled); auto uncompressibleBufferInHostMemory = std::unique_ptr(Buffer::create(context, CL_MEM_FORCE_HOST_MEMORY_INTEL, bufferSize, nullptr, retVal)); auto uncompressibleAllocationInHostMemory = uncompressibleBufferInHostMemory->getGraphicsAllocation(device->getRootDeviceIndex()); EXPECT_EQ(AllocationType::BUFFER_HOST_MEMORY, uncompressibleAllocationInHostMemory->getAllocationType()); EXPECT_TRUE(MemoryPool::isSystemMemoryPool(uncompressibleAllocationInHostMemory->getMemoryPool())); retVal = pCmdQ->enqueueWriteBuffer(compressedBuffer.get(), CL_FALSE, 0, bufferSize, writePattern, nullptr, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); retVal = kernel->setArg(0, compressedBuffer.get()); ASSERT_EQ(CL_SUCCESS, retVal); retVal = kernel->setArg(1, uncompressibleBufferInHostMemory.get()); ASSERT_EQ(CL_SUCCESS, retVal); size_t globalWorkSize[3] = {bufferSize, 1, 1}; retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); pCmdQ->finish(); expectNotEqualMemory(AUBFixture::getGpuPointer(compressedAllocation), writePattern, bufferSize); expectMemory(AUBFixture::getGpuPointer(uncompressibleAllocationInHostMemory), writePattern, bufferSize); retVal = pCmdQ->enqueueCopyBuffer(uncompressibleBufferInHostMemory.get(), unCompressedBuffer.get(), 0, 0, bufferSize, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); pCmdQ->finish(); expectMemory(AUBFixture::getGpuPointer(unCompressedAllocation), writePattern, bufferSize); } XE_HP_CORE_TEST_P(StatelessCompressionInSBA, givenUncompressibleHostMemoryAllocationWhenAccessedStatelesslyThenDisableCompressionInSBA) { const size_t bufferSize = 2048; uint8_t writePattern[bufferSize]; std::fill(writePattern, writePattern + sizeof(writePattern), 1); device->getGpgpuCommandStreamReceiver().overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto compressedDeviceMemAllocPtr = clDeviceMemAllocINTEL(context, device.get(), nullptr, bufferSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, compressedDeviceMemAllocPtr); auto compressedDeviceMemAlloc = context->getSVMAllocsManager()->getSVMAllocs()->get(compressedDeviceMemAllocPtr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); EXPECT_NE(nullptr, compressedDeviceMemAlloc); EXPECT_TRUE(compressedDeviceMemAlloc->isCompressionEnabled()); EXPECT_EQ(MemoryPool::LocalMemory, compressedDeviceMemAlloc->getMemoryPool()); EXPECT_TRUE(compressedDeviceMemAlloc->getDefaultGmm()->isCompressionEnabled); auto uncompressibleHostMemAllocPtr = clHostMemAllocINTEL(context, nullptr, bufferSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, uncompressibleHostMemAllocPtr); auto uncompressibleHostMemAlloc = context->getSVMAllocsManager()->getSVMAllocs()->get(uncompressibleHostMemAllocPtr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); EXPECT_NE(nullptr, uncompressibleHostMemAlloc); EXPECT_EQ(AllocationType::BUFFER_HOST_MEMORY, uncompressibleHostMemAlloc->getAllocationType()); EXPECT_TRUE(MemoryPool::isSystemMemoryPool(uncompressibleHostMemAlloc->getMemoryPool())); retVal = clEnqueueMemcpyINTEL(pCmdQ, true, compressedDeviceMemAllocPtr, writePattern, bufferSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArgSVMPointer(multiDeviceKernel.get(), 0, compressedDeviceMemAllocPtr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArgSVMPointer(multiDeviceKernel.get(), 1, uncompressibleHostMemAllocPtr); EXPECT_EQ(CL_SUCCESS, retVal); size_t globalWorkSize[3] = {bufferSize, 1, 1}; retVal = clEnqueueNDRangeKernel(pCmdQ, multiDeviceKernel.get(), 1, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clFinish(pCmdQ); EXPECT_EQ(CL_SUCCESS, retVal); expectNotEqualMemory(compressedDeviceMemAllocPtr, writePattern, bufferSize); expectMemory(uncompressibleHostMemAllocPtr, writePattern, bufferSize); retVal = clMemFreeINTEL(context, compressedDeviceMemAllocPtr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(context, uncompressibleHostMemAllocPtr); EXPECT_EQ(CL_SUCCESS, retVal); } INSTANTIATE_TEST_CASE_P(, StatelessCompressionInSBA, ::testing::Values(aub_stream::ENGINE_RCS, aub_stream::ENGINE_CCS)); struct UmStatelessCompressionInSBA : public KernelAUBFixture, public ::testing::Test, public ::testing::WithParamInterface { void SetUp() override { DebugManager.flags.EnableStatelessCompression.set(1); DebugManager.flags.RenderCompressedBuffersEnabled.set(true); DebugManager.flags.RenderCompressedImagesEnabled.set(true); DebugManager.flags.EnableLocalMemory.set(true); DebugManager.flags.NodeOrdinal.set(GetParam()); DebugManager.flags.ForceAuxTranslationMode.set(static_cast(AuxTranslationMode::Builtin)); KernelAUBFixture::SetUp(); EXPECT_TRUE(multiDeviceKernel->getKernel(rootDeviceIndex)->getKernelInfo().hasIndirectStatelessAccess); } void TearDown() override { KernelAUBFixture::TearDown(); } DebugManagerStateRestore debugRestorer; cl_int retVal = CL_SUCCESS; }; XE_HP_CORE_TEST_P(UmStatelessCompressionInSBA, GENERATEONLY_givenStatelessKernelWhenItHasIndirectDeviceAccessThenEnableCompressionInSBA) { const size_t bufferSize = MemoryConstants::kiloByte; uint8_t bufferData[bufferSize] = {}; auto uncompressibleHostMemAlloc = clHostMemAllocINTEL(context, nullptr, bufferSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, uncompressibleHostMemAlloc); auto compressedDeviceMemAlloc1 = clDeviceMemAllocINTEL(context, device.get(), nullptr, bufferSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, compressedDeviceMemAlloc1); auto compressedDeviceMemAlloc2 = clDeviceMemAllocINTEL(context, device.get(), nullptr, bufferSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, compressedDeviceMemAlloc2); retVal = clEnqueueMemcpyINTEL(pCmdQ, true, compressedDeviceMemAlloc2, bufferData, bufferSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); reinterpret_cast(bufferData)[0] = reinterpret_cast(compressedDeviceMemAlloc2); retVal = clEnqueueMemcpyINTEL(pCmdQ, true, compressedDeviceMemAlloc1, bufferData, bufferSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArgSVMPointer(multiDeviceKernel.get(), 0, compressedDeviceMemAlloc1); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelExecInfo(multiDeviceKernel.get(), CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL, sizeof(compressedDeviceMemAlloc2), &compressedDeviceMemAlloc2); EXPECT_EQ(CL_SUCCESS, retVal); size_t globalWorkSize[3] = {bufferSize, 1, 1}; retVal = clEnqueueNDRangeKernel(pCmdQ, multiDeviceKernel.get(), 1, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clFinish(pCmdQ); EXPECT_EQ(CL_SUCCESS, retVal); reinterpret_cast(bufferData)[0] = 1; expectNotEqualMemory(compressedDeviceMemAlloc2, bufferData, bufferSize); retVal = clEnqueueMemcpyINTEL(pCmdQ, true, uncompressibleHostMemAlloc, compressedDeviceMemAlloc2, bufferSize, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); expectMemory(uncompressibleHostMemAlloc, bufferData, bufferSize); retVal = clMemFreeINTEL(context, uncompressibleHostMemAlloc); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(context, compressedDeviceMemAlloc1); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(context, compressedDeviceMemAlloc2); EXPECT_EQ(CL_SUCCESS, retVal); } XE_HP_CORE_TEST_P(UmStatelessCompressionInSBA, GENERATEONLY_givenKernelExecInfoWhenItHasIndirectDeviceAccessThenEnableCompressionInSBA) { const size_t bufferSize = MemoryConstants::kiloByte; uint8_t bufferData[bufferSize] = {}; auto uncompressibleHostMemAlloc = clHostMemAllocINTEL(context, nullptr, bufferSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, uncompressibleHostMemAlloc); auto compressedDeviceMemAlloc1 = clDeviceMemAllocINTEL(context, device.get(), nullptr, bufferSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, compressedDeviceMemAlloc1); auto compressedDeviceMemAlloc2 = clDeviceMemAllocINTEL(context, device.get(), nullptr, bufferSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, compressedDeviceMemAlloc2); retVal = clEnqueueMemcpyINTEL(pCmdQ, true, compressedDeviceMemAlloc2, bufferData, bufferSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); reinterpret_cast(bufferData)[0] = reinterpret_cast(compressedDeviceMemAlloc2); retVal = clEnqueueMemcpyINTEL(pCmdQ, true, compressedDeviceMemAlloc1, bufferData, bufferSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArgSVMPointer(multiDeviceKernel.get(), 0, compressedDeviceMemAlloc1); EXPECT_EQ(CL_SUCCESS, retVal); cl_bool enableIndirectDeviceAccess = CL_TRUE; retVal = clSetKernelExecInfo(multiDeviceKernel.get(), CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, sizeof(cl_bool), &enableIndirectDeviceAccess); EXPECT_EQ(CL_SUCCESS, retVal); size_t globalWorkSize[3] = {bufferSize, 1, 1}; retVal = clEnqueueNDRangeKernel(pCmdQ, multiDeviceKernel.get(), 1, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clFinish(pCmdQ); EXPECT_EQ(CL_SUCCESS, retVal); reinterpret_cast(bufferData)[0] = 1; expectNotEqualMemory(compressedDeviceMemAlloc2, bufferData, bufferSize); retVal = clEnqueueMemcpyINTEL(pCmdQ, true, uncompressibleHostMemAlloc, compressedDeviceMemAlloc2, bufferSize, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); expectMemory(uncompressibleHostMemAlloc, bufferData, bufferSize); retVal = clMemFreeINTEL(context, uncompressibleHostMemAlloc); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(context, compressedDeviceMemAlloc1); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(context, compressedDeviceMemAlloc2); EXPECT_EQ(CL_SUCCESS, retVal); } XE_HP_CORE_TEST_P(UmStatelessCompressionInSBA, givenStatelessKernelWhenItHasIndirectHostAccessThenDisableCompressionInSBA) { const size_t bufferSize = MemoryConstants::kiloByte; uint8_t bufferData[bufferSize] = {}; auto compressedDeviceMemAlloc = clDeviceMemAllocINTEL(context, device.get(), nullptr, bufferSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, compressedDeviceMemAlloc); auto uncompressibleHostMemAlloc = clHostMemAllocINTEL(context, nullptr, bufferSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, uncompressibleHostMemAlloc); memset(uncompressibleHostMemAlloc, 0, bufferSize); reinterpret_cast(bufferData)[0] = reinterpret_cast(uncompressibleHostMemAlloc); retVal = clEnqueueMemcpyINTEL(pCmdQ, true, compressedDeviceMemAlloc, bufferData, bufferSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArgSVMPointer(multiDeviceKernel.get(), 0, compressedDeviceMemAlloc); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelExecInfo(multiDeviceKernel.get(), CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL, sizeof(uncompressibleHostMemAlloc), &uncompressibleHostMemAlloc); EXPECT_EQ(CL_SUCCESS, retVal); size_t globalWorkSize[3] = {bufferSize, 1, 1}; retVal = clEnqueueNDRangeKernel(pCmdQ, multiDeviceKernel.get(), 1, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clFinish(pCmdQ); EXPECT_EQ(CL_SUCCESS, retVal); reinterpret_cast(bufferData)[0] = 1; expectMemory(uncompressibleHostMemAlloc, bufferData, bufferSize); retVal = clMemFreeINTEL(context, compressedDeviceMemAlloc); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(context, uncompressibleHostMemAlloc); EXPECT_EQ(CL_SUCCESS, retVal); } XE_HP_CORE_TEST_P(UmStatelessCompressionInSBA, givenKernelExecInfoWhenItHasIndirectHostAccessThenDisableCompressionInSBA) { const size_t bufferSize = MemoryConstants::kiloByte; uint8_t bufferData[bufferSize] = {}; auto compressedDeviceMemAlloc = clDeviceMemAllocINTEL(context, device.get(), nullptr, bufferSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, compressedDeviceMemAlloc); auto uncompressibleHostMemAlloc = clHostMemAllocINTEL(context, nullptr, bufferSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, uncompressibleHostMemAlloc); memset(uncompressibleHostMemAlloc, 0, bufferSize); reinterpret_cast(bufferData)[0] = reinterpret_cast(uncompressibleHostMemAlloc); retVal = clEnqueueMemcpyINTEL(pCmdQ, true, compressedDeviceMemAlloc, bufferData, bufferSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArgSVMPointer(multiDeviceKernel.get(), 0, compressedDeviceMemAlloc); EXPECT_EQ(CL_SUCCESS, retVal); cl_bool enableIndirectHostAccess = CL_TRUE; retVal = clSetKernelExecInfo(multiDeviceKernel.get(), CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, sizeof(cl_bool), &enableIndirectHostAccess); EXPECT_EQ(CL_SUCCESS, retVal); size_t globalWorkSize[3] = {bufferSize, 1, 1}; retVal = clEnqueueNDRangeKernel(pCmdQ, multiDeviceKernel.get(), 1, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clFinish(pCmdQ); EXPECT_EQ(CL_SUCCESS, retVal); reinterpret_cast(bufferData)[0] = 1; expectMemory(uncompressibleHostMemAlloc, bufferData, bufferSize); retVal = clMemFreeINTEL(context, compressedDeviceMemAlloc); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(context, uncompressibleHostMemAlloc); EXPECT_EQ(CL_SUCCESS, retVal); } INSTANTIATE_TEST_CASE_P(, UmStatelessCompressionInSBA, ::testing::Values(aub_stream::ENGINE_RCS, aub_stream::ENGINE_CCS)); struct StatelessCompressionInSBAWithBCS : public MulticontextAubFixture, public StatelessCopyKernelFixture, public ::testing::Test { void SetUp() override { DebugManager.flags.EnableStatelessCompression.set(1); DebugManager.flags.ForceAuxTranslationMode.set(static_cast(AuxTranslationMode::Blit)); DebugManager.flags.EnableBlitterOperationsSupport.set(true); MulticontextAubFixture::SetUp(1, EnabledCommandStreamers::Single, true); StatelessCopyKernelFixture::SetUp(tileDevices[0], context.get()); if (!tileDevices[0]->getHardwareInfo().featureTable.flags.ftrLocalMemory) { GTEST_SKIP(); } } void TearDown() override { MulticontextAubFixture::TearDown(); StatelessCopyKernelFixture::TearDown(); } DebugManagerStateRestore debugRestorer; cl_int retVal = CL_SUCCESS; }; XE_HP_CORE_TEST_F(StatelessCompressionInSBAWithBCS, GENERATEONLY_givenCompressedBufferInDeviceMemoryWhenAccessedStatelesslyThenEnableCompressionInSBA) { const size_t bufferSize = 2048; uint8_t writePattern[bufferSize]; std::fill(writePattern, writePattern + sizeof(writePattern), 1); auto unCompressedBuffer = std::unique_ptr(Buffer::create(context.get(), CL_MEM_UNCOMPRESSED_HINT_INTEL, bufferSize, nullptr, retVal)); auto unCompressedAllocation = unCompressedBuffer->getGraphicsAllocation(tileDevices[0]->getRootDeviceIndex()); EXPECT_EQ(AllocationType::BUFFER, unCompressedAllocation->getAllocationType()); EXPECT_FALSE(unCompressedAllocation->isCompressionEnabled()); EXPECT_EQ(MemoryPool::LocalMemory, unCompressedAllocation->getMemoryPool()); auto compressedBuffer = std::unique_ptr(Buffer::create(context.get(), CL_MEM_COMPRESSED_HINT_INTEL, bufferSize, nullptr, retVal)); auto compressedAllocation = compressedBuffer->getGraphicsAllocation(tileDevices[0]->getRootDeviceIndex()); EXPECT_TRUE(compressedAllocation->isCompressionEnabled()); EXPECT_EQ(MemoryPool::LocalMemory, compressedAllocation->getMemoryPool()); EXPECT_TRUE(compressedAllocation->getDefaultGmm()->isCompressionEnabled); retVal = commandQueues[0][0]->enqueueWriteBuffer(compressedBuffer.get(), CL_FALSE, 0, bufferSize, writePattern, nullptr, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); retVal = kernel->setArg(0, compressedBuffer.get()); ASSERT_EQ(CL_SUCCESS, retVal); retVal = kernel->setArg(1, unCompressedBuffer.get()); ASSERT_EQ(CL_SUCCESS, retVal); size_t globalWorkSize[3] = {bufferSize, 1, 1}; retVal = commandQueues[0][0]->enqueueKernel(kernel, 1, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); commandQueues[0][0]->finish(); expectMemoryNotEqual(AUBFixture::getGpuPointer(compressedAllocation), writePattern, bufferSize, 0, 0); expectMemory(AUBFixture::getGpuPointer(unCompressedAllocation), writePattern, bufferSize, 0, 0); } XE_HP_CORE_TEST_F(StatelessCompressionInSBAWithBCS, givenUncompressibleBufferInHostMemoryWhenAccessedStatelesslyThenDisableCompressionInSBA) { const size_t bufferSize = 2048; uint8_t writePattern[bufferSize]; std::fill(writePattern, writePattern + sizeof(writePattern), 1); auto compressedBuffer = std::unique_ptr(Buffer::create(context.get(), CL_MEM_COMPRESSED_HINT_INTEL, bufferSize, nullptr, retVal)); auto compressedAllocation = compressedBuffer->getGraphicsAllocation(tileDevices[0]->getRootDeviceIndex()); EXPECT_TRUE(compressedAllocation->isCompressionEnabled()); EXPECT_EQ(MemoryPool::LocalMemory, compressedAllocation->getMemoryPool()); EXPECT_TRUE(compressedAllocation->getDefaultGmm()->isCompressionEnabled); auto uncompressibleBufferInHostMemory = std::unique_ptr(Buffer::create(context.get(), CL_MEM_FORCE_HOST_MEMORY_INTEL, bufferSize, nullptr, retVal)); auto uncompressibleAllocationInHostMemory = uncompressibleBufferInHostMemory->getGraphicsAllocation(tileDevices[0]->getRootDeviceIndex()); EXPECT_EQ(AllocationType::BUFFER_HOST_MEMORY, uncompressibleAllocationInHostMemory->getAllocationType()); EXPECT_TRUE(MemoryPool::isSystemMemoryPool(uncompressibleAllocationInHostMemory->getMemoryPool())); retVal = commandQueues[0][0]->enqueueWriteBuffer(compressedBuffer.get(), CL_FALSE, 0, bufferSize, writePattern, nullptr, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); retVal = kernel->setArg(0, compressedBuffer.get()); ASSERT_EQ(CL_SUCCESS, retVal); retVal = kernel->setArg(1, uncompressibleBufferInHostMemory.get()); ASSERT_EQ(CL_SUCCESS, retVal); size_t globalWorkSize[3] = {bufferSize, 1, 1}; retVal = commandQueues[0][0]->enqueueKernel(kernel, 1, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); commandQueues[0][0]->finish(); expectMemoryNotEqual(AUBFixture::getGpuPointer(compressedAllocation), writePattern, bufferSize, 0, 0); expectMemory(AUBFixture::getGpuPointer(uncompressibleAllocationInHostMemory), writePattern, bufferSize, 0, 0); } compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/xe_hpc_core/000077500000000000000000000000001422164147700260765ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/xe_hpc_core/CMakeLists.txt000066400000000000000000000013531422164147700306400ustar00rootroot00000000000000# # Copyright (C) 2022 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_XE_HPC_CORE) target_sources(igdrcl_aub_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/aub_mem_dump_tests_xe_hpc_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/copy_engine_aub_tests_xe_hpc_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_kernel_aub_tests_xe_hpc_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/system_memfence_aub_tests_xe_hpc_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/um_stateless_compression_aub_tests_xe_hpc_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/aub_test_excludes_xe_hpc_core.cpp ) add_subdirectories() endif() aub_mem_dump_tests_xe_hpc_core.cpp000066400000000000000000000024561422164147700347440ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/xe_hpc_core/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/ult_hw_config.h" #include "shared/test/common/helpers/variable_backup.h" #include "opencl/test/unit_test/aub_tests/command_stream/aub_mem_dump_tests.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" using XeHpcCoreAubMemDumpTests = Test; XE_HPC_CORETEST_F(XeHpcCoreAubMemDumpTests, GivenCcsThenExpectationsAreMet) { setupAUB(pDevice, aub_stream::ENGINE_CCS); } XE_HPC_CORETEST_F(XeHpcCoreAubMemDumpTests, whenAubCsrIsCreatedThenCreateHardwareContext) { DebugManagerStateRestore restore; VariableBackup backup(&ultHwConfig); ultHwConfig.useHwCsr = true; DebugManager.flags.SetCommandStreamReceiver.set(CommandStreamReceiverType::CSR_AUB); std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); auto &baseCsr = device->getGpgpuCommandStreamReceiver(); auto &aubCsr = static_cast &>(baseCsr); EXPECT_NE(nullptr, aubCsr.hardwareContextController.get()); EXPECT_NE(0u, aubCsr.hardwareContextController->hardwareContexts.size()); } aub_test_excludes_xe_hpc_core.cpp000066400000000000000000000004301422164147700345600ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/xe_hpc_core/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" HWTEST_EXCLUDE_PRODUCT(MiAtomicAubTest, GivenSystemMemoryWhenDispatchingAtomicMove4BytesOperationThenExpectCorrectEndValues, IGFX_XE_HPC_CORE); copy_engine_aub_tests_xe_hpc_core.cpp000066400000000000000000000120071422164147700354310ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/xe_hpc_core/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/test/unit_test/aub_tests/command_stream/copy_engine_aub_tests_xehp_and_later.h" using namespace NEO; template struct CopyEnginesPvcFixture : public CopyEngineXeHPAndLater, public ::testing::WithParamInterface { using MEM_COPY = typename FamilyType::MEM_COPY; void SetUp() override { this->bcsEngineType = static_cast(GetParam()); CopyEngineXeHPAndLater::SetUp(); } }; constexpr uint32_t allSupportedCopyEngines[] = { aub_stream::EngineType::ENGINE_BCS, aub_stream::EngineType::ENGINE_BCS1, aub_stream::EngineType::ENGINE_BCS2, aub_stream::EngineType::ENGINE_BCS3, aub_stream::EngineType::ENGINE_BCS4, aub_stream::EngineType::ENGINE_BCS5, aub_stream::EngineType::ENGINE_BCS6, aub_stream::EngineType::ENGINE_BCS7, aub_stream::EngineType::ENGINE_BCS8, }; using OneTilePvcTests = CopyEnginesPvcFixture<1, XE_HPC_COREFamily>; INSTANTIATE_TEST_CASE_P( MemCopyBcsCmd, OneTilePvcTests, testing::ValuesIn(allSupportedCopyEngines)); XE_HPC_CORETEST_P(OneTilePvcTests, givenNotCompressedBufferWhenBltExecutedThenCompressDataAndResolve) { givenNotCompressedBufferWhenBltExecutedThenCompressDataAndResolveImpl(); } XE_HPC_CORETEST_P(OneTilePvcTests, givenHostPtrWhenBlitCommandToCompressedBufferIsDispatchedThenCopiedDataIsValid) { givenHostPtrWhenBlitCommandToCompressedBufferIsDispatchedThenCopiedDataIsValidImpl(); } XE_HPC_CORETEST_P(OneTilePvcTests, givenDstHostPtrWhenBlitCommandFromCompressedBufferIsDispatchedThenCopiedDataIsValid) { givenDstHostPtrWhenBlitCommandFromCompressedBufferIsDispatchedThenCopiedDataIsValidImpl(); } XE_HPC_CORETEST_P(OneTilePvcTests, givenDstHostPtrWhenBlitCommandFromNotCompressedBufferIsDispatchedThenCopiedDataIsValid) { givenDstHostPtrWhenBlitCommandFromNotCompressedBufferIsDispatchedThenCopiedDataIsValidImpl(); } XE_HPC_CORETEST_P(OneTilePvcTests, givenSrcHostPtrWhenBlitCommandToNotCompressedBufferIsDispatchedThenCopiedDataIsValid) { givenSrcHostPtrWhenBlitCommandToNotCompressedBufferIsDispatchedThenCopiedDataIsValidImpl(); } XE_HPC_CORETEST_P(OneTilePvcTests, givenBufferWithOffsetWhenHostPtrBlitCommandIsDispatchedFromHostPtrThenDataIsCorrectlyCopied) { givenBufferWithOffsetWhenHostPtrBlitCommandIsDispatchedFromHostPtrThenDataIsCorrectlyCopiedImpl(); } XE_HPC_CORETEST_P(OneTilePvcTests, givenBufferWithOffsetWhenHostPtrBlitCommandIsDispatchedToHostPtrThenDataIsCorrectlyCopied) { givenBufferWithOffsetWhenHostPtrBlitCommandIsDispatchedToHostPtrThenDataIsCorrectlyCopiedImpl(); } XE_HPC_CORETEST_P(OneTilePvcTests, givenOffsetsWhenBltExecutedThenCopiedDataIsValid) { givenOffsetsWhenBltExecutedThenCopiedDataIsValidImpl(); } XE_HPC_CORETEST_P(OneTilePvcTests, givenSrcCompressedBufferWhenBlitCommandToDstCompressedBufferIsDispatchedThenCopiedDataIsValid) { givenSrcCompressedBufferWhenBlitCommandToDstCompressedBufferIsDispatchedThenCopiedDataIsValidImpl(); } XE_HPC_CORETEST_P(OneTilePvcTests, givenCompressedBufferWhenAuxTranslationCalledThenResolveAndCompress) { givenCompressedBufferWhenAuxTranslationCalledThenResolveAndCompressImpl(); } XE_HPC_CORETEST_P(OneTilePvcTests, givenCopyBufferRectWithBigSizesWhenHostPtrBlitCommandIsDispatchedToHostPtrThenDataIsCorrectlyCopied) { givenCopyBufferRectWithBigSizesWhenHostPtrBlitCommandIsDispatchedToHostPtrThenDataIsCorrectlyCopiedImpl(); } using OneTileSystemMemoryPvcTests = CopyEnginesPvcFixture<1, XE_HPC_COREFamily, false>; INSTANTIATE_TEST_CASE_P( MemCopyBcsCmd, OneTileSystemMemoryPvcTests, testing::ValuesIn(allSupportedCopyEngines)); XE_HPC_CORETEST_P(OneTileSystemMemoryPvcTests, givenSrcSystemBufferWhenBlitCommandToDstSystemBufferIsDispatchedThenCopiedDataIsValid) { givenSrcSystemBufferWhenBlitCommandToDstSystemBufferIsDispatchedThenCopiedDataIsValidImpl(); } XE_HPC_CORETEST_P(OneTilePvcTests, givenReadBufferRectWithOffsetWhenHostPtrBlitCommandIsDispatchedToHostPtrThenDataIsCorrectlyCopied) { givenReadBufferRectWithOffsetWhenHostPtrBlitCommandIsDispatchedToHostPtrThenDataIsCorrectlyCopiedImpl(); } XE_HPC_CORETEST_P(OneTilePvcTests, givenWriteBufferRectWithOffsetWhenHostPtrBlitCommandIsDispatchedToHostPtrThenDataIsCorrectlyCopied) { givenWriteBufferRectWithOffsetWhenHostPtrBlitCommandIsDispatchedToHostPtrThenDataIsCorrectlyCopiedImpl(); } XE_HPC_CORETEST_P(OneTilePvcTests, givenCopyBufferRectWithOffsetWhenHostPtrBlitCommandIsDispatchedToHostPtrThenDataIsCorrectlyCopied) { givenCopyBufferRectWithOffsetWhenHostPtrBlitCommandIsDispatchedToHostPtrThenDataIsCorrectlyCopiedImpl(); } enqueue_kernel_aub_tests_xe_hpc_core.cpp000066400000000000000000000034061422164147700361440ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/xe_hpc_core/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/aub_tests/fixtures/aub_fixture.h" #include "opencl/test/unit_test/fixtures/simple_arg_fixture.h" using namespace NEO; extern const HardwareInfo *defaultHwInfo; using AUBSimpleKernelStatelessTest = Test>; HWTEST_F(AUBSimpleKernelStatelessTest, givenPrefetchEnabledWhenEnqueuedKernelThenDataIsCorrect) { DebugManagerStateRestore restore; DebugManager.flags.EnableMemoryPrefetch.set(1); constexpr size_t bufferSize = MemoryConstants::pageSize; cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {bufferSize, 1, 1}; size_t localWorkSize[3] = {1, 1, 1}; uint8_t bufferData[bufferSize] = {}; uint8_t bufferExpected[bufferSize]; memset(bufferExpected, 0xCD, bufferSize); auto buffer = std::unique_ptr(Buffer::create(context, CL_MEM_USE_HOST_PTR | CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL, bufferSize, bufferData, retVal)); ASSERT_NE(nullptr, buffer); kernel->setArg(0, buffer.get()); retVal = this->pCmdQ->enqueueKernel(kernel.get(), workDim, globalWorkOffset, globalWorkSize, localWorkSize, 0, nullptr, nullptr); this->pCmdQ->flush(); expectMemory(reinterpret_cast(buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress()), bufferExpected, bufferSize); } system_memfence_aub_tests_xe_hpc_core.cpp000066400000000000000000000222511422164147700363170ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/xe_hpc_core/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/api/api.h" #include "opencl/test/unit_test/aub_tests/fixtures/aub_fixture.h" #include "opencl/test/unit_test/aub_tests/fixtures/multicontext_aub_fixture.h" #include "opencl/test/unit_test/fixtures/program_fixture.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" using namespace NEO; class SystemMemFenceViaMiMemFence : public AUBFixture, public ::testing::Test { public: void SetUp() override { DebugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(1); DebugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0); DebugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0); AUBFixture::SetUp(defaultHwInfo.get()); } void TearDown() override { AUBFixture::TearDown(); } DebugManagerStateRestore debugRestorer; cl_int retVal = CL_SUCCESS; }; XE_HPC_CORETEST_F(SystemMemFenceViaMiMemFence, givenSystemMemFenceWhenMiMemFenceInCommandStreamThenWritesToSystemMemoryAreGloballyObservable) { const size_t bufferSize = MemoryConstants::kiloByte; std::vector buffer(bufferSize, 0x11); auto deviceMemAlloc = clDeviceMemAllocINTEL(this->context, this->device.get(), nullptr, bufferSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, deviceMemAlloc); retVal = clEnqueueMemcpyINTEL(this->pCmdQ, true, deviceMemAlloc, buffer.data(), bufferSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); expectMemory(deviceMemAlloc, buffer.data(), bufferSize); auto hostMemAlloc = clHostMemAllocINTEL(this->context, nullptr, bufferSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, hostMemAlloc); retVal = clEnqueueMemcpyINTEL(this->pCmdQ, true, hostMemAlloc, deviceMemAlloc, bufferSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); expectMemory(hostMemAlloc, buffer.data(), bufferSize); retVal = clMemFreeINTEL(this->context, deviceMemAlloc); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(this->context, hostMemAlloc); EXPECT_EQ(CL_SUCCESS, retVal); } class SystemMemFenceViaComputeWalker : public AUBFixture, public ::testing::Test { public: void SetUp() override { DebugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0); DebugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(1); DebugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0); AUBFixture::SetUp(defaultHwInfo.get()); } void TearDown() override { AUBFixture::TearDown(); } DebugManagerStateRestore debugRestorer; cl_int retVal = CL_SUCCESS; }; XE_HPC_CORETEST_F(SystemMemFenceViaComputeWalker, givenSystemMemFenceWhenPostSyncOperationThenWritesToSystemMemoryAreGloballyObservable) { const size_t bufferSize = MemoryConstants::kiloByte; std::vector buffer(bufferSize, 0x11); auto deviceMemAlloc = clDeviceMemAllocINTEL(this->context, this->device.get(), nullptr, bufferSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, deviceMemAlloc); retVal = clEnqueueMemcpyINTEL(this->pCmdQ, true, deviceMemAlloc, buffer.data(), bufferSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); expectMemory(deviceMemAlloc, buffer.data(), bufferSize); auto hostMemAlloc = clHostMemAllocINTEL(this->context, nullptr, bufferSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, hostMemAlloc); retVal = clEnqueueMemcpyINTEL(this->pCmdQ, true, hostMemAlloc, deviceMemAlloc, bufferSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); expectMemory(hostMemAlloc, buffer.data(), bufferSize); retVal = clMemFreeINTEL(this->context, deviceMemAlloc); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(this->context, hostMemAlloc); EXPECT_EQ(CL_SUCCESS, retVal); } class SystemMemFenceWithBlitter : public MulticontextAubFixture, public ::testing::Test { public: void SetUp() override { DebugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(1); DebugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0); DebugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0); DebugManager.flags.EnableBlitterOperationsSupport.set(1); DebugManager.flags.EnableBlitterForEnqueueOperations.set(1); MulticontextAubFixture::SetUp(1, EnabledCommandStreamers::Single, true); } void TearDown() override { MulticontextAubFixture::TearDown(); } DebugManagerStateRestore debugRestorer; cl_int retVal = CL_SUCCESS; }; XE_HPC_CORETEST_F(SystemMemFenceWithBlitter, givenSystemMemFenceWhenGeneratedAsMiMemFenceCmdInBCSThenWritesToSystemMemoryAreGloballyObservable) { const size_t bufferSize = MemoryConstants::kiloByte; std::vector buffer(bufferSize, 0x11); auto deviceMemAlloc = clDeviceMemAllocINTEL(context.get(), tileDevices[0], nullptr, bufferSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, deviceMemAlloc); retVal = clEnqueueMemcpyINTEL(commandQueues[0][0].get(), true, deviceMemAlloc, buffer.data(), bufferSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); expectMemory(deviceMemAlloc, buffer.data(), bufferSize, 0, 0); auto hostMemAlloc = clHostMemAllocINTEL(context.get(), nullptr, bufferSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, hostMemAlloc); retVal = clEnqueueMemcpyINTEL(commandQueues[0][0].get(), true, hostMemAlloc, deviceMemAlloc, bufferSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); expectMemory(hostMemAlloc, buffer.data(), bufferSize, 0, 0); retVal = clMemFreeINTEL(context.get(), deviceMemAlloc); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(context.get(), hostMemAlloc); EXPECT_EQ(CL_SUCCESS, retVal); } class SystemMemFenceViaKernel : public ProgramFixture, public MulticontextAubFixture, public ::testing::Test { public: void SetUp() override { DebugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0); DebugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0); DebugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(1); ProgramFixture::SetUp(); MulticontextAubFixture::SetUp(1, EnabledCommandStreamers::Single, true); } void TearDown() override { MulticontextAubFixture::TearDown(); ProgramFixture::TearDown(); } DebugManagerStateRestore debugRestorer; cl_int retVal = CL_SUCCESS; }; XE_HPC_CORETEST_F(SystemMemFenceViaKernel, givenSystemMemFenceWhenKernelInstructionThenWritesToSystemMemoryAreGloballyObservable) { const size_t bufferSize = MemoryConstants::kiloByte; std::vector buffer(bufferSize, 0x11); auto deviceMemAlloc = clDeviceMemAllocINTEL(context.get(), tileDevices[0], nullptr, bufferSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, deviceMemAlloc); retVal = clEnqueueMemcpyINTEL(commandQueues[0][0].get(), true, deviceMemAlloc, buffer.data(), bufferSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); expectMemory(deviceMemAlloc, buffer.data(), bufferSize, 0, 0); auto hostMemAlloc = clHostMemAllocINTEL(context.get(), nullptr, bufferSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, hostMemAlloc); CreateProgramFromBinary(context.get(), context->getDevices(), "system_memfence"); retVal = pProgram->build(pProgram->getDevices(), nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); const KernelInfo *pKernelInfo = pProgram->getKernelInfo("SystemMemFence", rootDeviceIndex); ASSERT_NE(nullptr, pKernelInfo); auto pMultiDeviceKernel = clUniquePtr(MultiDeviceKernel::create(pProgram, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), &retVal)); ASSERT_NE(nullptr, pMultiDeviceKernel); retVal = clSetKernelArgSVMPointer(pMultiDeviceKernel.get(), 0, deviceMemAlloc); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArgSVMPointer(pMultiDeviceKernel.get(), 1, hostMemAlloc); ASSERT_EQ(CL_SUCCESS, retVal); size_t globalWorkSize[3] = {bufferSize, 1, 1}; retVal = commandQueues[0][0]->enqueueKernel(pMultiDeviceKernel->getKernel(rootDeviceIndex), 1, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); commandQueues[0][0]->finish(); expectMemory(hostMemAlloc, buffer.data(), bufferSize, 0, 0); retVal = clMemFreeINTEL(context.get(), deviceMemAlloc); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(context.get(), hostMemAlloc); EXPECT_EQ(CL_SUCCESS, retVal); } um_stateless_compression_aub_tests_xe_hpc_core.cpp000066400000000000000000000206021422164147700402630ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/xe_hpc_core/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/api/api.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/aub_tests/fixtures/aub_fixture.h" #include "opencl/test/unit_test/aub_tests/fixtures/multicontext_aub_fixture.h" #include "opencl/test/unit_test/fixtures/program_fixture.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" using namespace NEO; class UmStatelessCompression : public AUBFixture, public ::testing::Test, public ::testing::WithParamInterface { public: void SetUp() override { DebugManager.flags.EnableStatelessCompressionWithUnifiedMemory.set(1); compareCompressedMemory = GetParam(); AUBFixture::SetUp(defaultHwInfo.get()); } void TearDown() override { AUBFixture::TearDown(); } DebugManagerStateRestore debugRestorer; cl_int retVal = CL_SUCCESS; bool compareCompressedMemory = false; }; XE_HPC_CORETEST_P(UmStatelessCompression, givenDeviceMemAllocWhenStatelessCompressionIsEnabledThenAllocationDataIsCompressed) { const size_t bufferSize = MemoryConstants::kiloByte; std::vector buffer(bufferSize, 0x11); auto deviceMemAlloc = clDeviceMemAllocINTEL(this->context, this->device.get(), nullptr, bufferSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, deviceMemAlloc); retVal = clEnqueueMemcpyINTEL(this->pCmdQ, true, deviceMemAlloc, buffer.data(), bufferSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); if (compareCompressedMemory) { expectCompressedMemory(deviceMemAlloc, buffer.data(), bufferSize); } else { expectMemory(deviceMemAlloc, buffer.data(), bufferSize); } auto hostMemAlloc = clHostMemAllocINTEL(this->context, nullptr, bufferSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, hostMemAlloc); retVal = clEnqueueMemcpyINTEL(this->pCmdQ, true, hostMemAlloc, deviceMemAlloc, bufferSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); expectMemory(hostMemAlloc, buffer.data(), bufferSize); retVal = clMemFreeINTEL(this->context, deviceMemAlloc); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(this->context, hostMemAlloc); EXPECT_EQ(CL_SUCCESS, retVal); } INSTANTIATE_TEST_CASE_P(, UmStatelessCompression, ::testing::Bool()); class UmStatelessCompressionWithBlitter : public MulticontextAubFixture, public ::testing::Test, public ::testing::WithParamInterface { public: void SetUp() override { DebugManager.flags.EnableStatelessCompressionWithUnifiedMemory.set(1); DebugManager.flags.EnableBlitterOperationsSupport.set(1); DebugManager.flags.EnableBlitterForEnqueueOperations.set(1); compareCompressedMemory = GetParam(); MulticontextAubFixture::SetUp(1, EnabledCommandStreamers::Single, true); } void TearDown() override { MulticontextAubFixture::TearDown(); } DebugManagerStateRestore debugRestorer; cl_int retVal = CL_SUCCESS; bool compareCompressedMemory = false; }; XE_HPC_CORETEST_P(UmStatelessCompressionWithBlitter, givenDeviceMemAllocWhenItIsAccessedWithBlitterThenProgramBlitterWithCompressionSettings) { const size_t bufferSize = MemoryConstants::kiloByte; std::vector buffer(bufferSize, 0x11); auto deviceMemAlloc = clDeviceMemAllocINTEL(context.get(), tileDevices[0], nullptr, bufferSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, deviceMemAlloc); retVal = clEnqueueMemcpyINTEL(commandQueues[0][0].get(), true, deviceMemAlloc, buffer.data(), bufferSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); if (compareCompressedMemory) { expectMemoryCompressed(deviceMemAlloc, buffer.data(), bufferSize, 0, 0); } else { expectMemory(deviceMemAlloc, buffer.data(), bufferSize, 0, 0); } auto hostMemAlloc = clHostMemAllocINTEL(context.get(), nullptr, bufferSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, hostMemAlloc); retVal = clEnqueueMemcpyINTEL(commandQueues[0][0].get(), true, hostMemAlloc, deviceMemAlloc, bufferSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); expectMemory(hostMemAlloc, buffer.data(), bufferSize, 0, 0); retVal = clMemFreeINTEL(context.get(), deviceMemAlloc); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(context.get(), hostMemAlloc); EXPECT_EQ(CL_SUCCESS, retVal); } INSTANTIATE_TEST_CASE_P(, UmStatelessCompressionWithBlitter, ::testing::Bool()); class UmStatelessCompressionWithStatefulAccess : public ProgramFixture, public MulticontextAubFixture, public ::testing::Test, public ::testing::WithParamInterface { public: void SetUp() override { DebugManager.flags.EnableStatelessCompressionWithUnifiedMemory.set(1); DebugManager.flags.EnableSharedSystemUsmSupport.set(0); compareCompressedMemory = GetParam(); ProgramFixture::SetUp(); MulticontextAubFixture::SetUp(1, EnabledCommandStreamers::Single, true); } void TearDown() override { MulticontextAubFixture::TearDown(); ProgramFixture::TearDown(); } DebugManagerStateRestore debugRestorer; cl_int retVal = CL_SUCCESS; bool compareCompressedMemory = false; }; XE_HPC_CORETEST_P(UmStatelessCompressionWithStatefulAccess, givenDeviceMemAllocWhenItIsAccessedStatefullyThenProgramStateWithCompressionSettings) { const size_t bufferSize = MemoryConstants::kiloByte; std::vector buffer(bufferSize, 0x11); auto deviceMemAlloc = clDeviceMemAllocINTEL(context.get(), tileDevices[0], nullptr, bufferSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, deviceMemAlloc); retVal = clEnqueueMemcpyINTEL(commandQueues[0][0].get(), true, deviceMemAlloc, buffer.data(), bufferSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); if (compareCompressedMemory) { expectMemoryCompressed(deviceMemAlloc, buffer.data(), bufferSize, 0, 0); } else { expectMemory(deviceMemAlloc, buffer.data(), bufferSize, 0, 0); } auto hostMemAlloc = clHostMemAllocINTEL(context.get(), nullptr, bufferSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, hostMemAlloc); CreateProgramFromBinary(context.get(), context->getDevices(), "stateful_copy_buffer"); retVal = pProgram->build(context->getDevices(), nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); const KernelInfo *pKernelInfo = pProgram->getKernelInfo("StatefulCopyBuffer", rootDeviceIndex); ASSERT_NE(nullptr, pKernelInfo); auto pMultiDeviceKernel = clUniquePtr(MultiDeviceKernel::create(pProgram, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), &retVal)); ASSERT_NE(nullptr, pMultiDeviceKernel); retVal = clSetKernelArgSVMPointer(pMultiDeviceKernel.get(), 0, deviceMemAlloc); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArgSVMPointer(pMultiDeviceKernel.get(), 1, hostMemAlloc); ASSERT_EQ(CL_SUCCESS, retVal); size_t globalWorkSize[3] = {bufferSize, 1, 1}; retVal = commandQueues[0][0]->enqueueKernel(pMultiDeviceKernel->getKernel(rootDeviceIndex), 1, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); commandQueues[0][0]->finish(); expectMemory(hostMemAlloc, buffer.data(), bufferSize, 0, 0); retVal = clMemFreeINTEL(context.get(), deviceMemAlloc); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(context.get(), hostMemAlloc); EXPECT_EQ(CL_SUCCESS, retVal); } INSTANTIATE_TEST_CASE_P(, UmStatelessCompressionWithStatefulAccess, ::testing::Bool()); compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/xe_hpg_core/000077500000000000000000000000001422164147700261025ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/xe_hpg_core/CMakeLists.txt000066400000000000000000000006401422164147700306420ustar00rootroot00000000000000# # Copyright (C) 2022 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_XE_HPG_CORE) target_sources(igdrcl_aub_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/aub_mem_dump_tests_xe_hpg_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/aub_tests_stateless_compression_in_sba_xe_hpg_core.cpp ) add_subdirectories() endif() aub_mem_dump_tests_xe_hpg_core.cpp000066400000000000000000000025771422164147700347600ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/xe_hpg_core/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/ult_hw_config.h" #include "shared/test/common/helpers/variable_backup.h" #include "opencl/test/unit_test/aub_tests/command_stream/aub_mem_dump_tests.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" namespace NEO { extern bool overrideCommandStreamReceiverCreation; using XeHpgCoreAubMemDumpTests = Test; XE_HPG_CORETEST_F(XeHpgCoreAubMemDumpTests, GivenCcsThenExpectationsAreMet) { setupAUB(pDevice, aub_stream::ENGINE_CCS); } XE_HPG_CORETEST_F(XeHpgCoreAubMemDumpTests, whenAubCsrIsCreatedThenCreateHardwareContext) { DebugManagerStateRestore restore; VariableBackup backup(&ultHwConfig); ultHwConfig.useHwCsr = true; DebugManager.flags.SetCommandStreamReceiver.set(CommandStreamReceiverType::CSR_AUB); std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); auto &baseCsr = device->getGpgpuCommandStreamReceiver(); auto &aubCsr = static_cast &>(baseCsr); EXPECT_NE(nullptr, aubCsr.hardwareContextController.get()); EXPECT_NE(0u, aubCsr.hardwareContextController->hardwareContexts.size()); } } // namespace NEO aub_tests_stateless_compression_in_sba_xe_hpg_core.cpp000066400000000000000000000712441422164147700411150ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/aub_tests/xe_hpg_core/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/resource_info.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/extensions/public/cl_ext_private.h" #include "opencl/source/api/api.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/aub_tests/fixtures/aub_fixture.h" #include "opencl/test/unit_test/aub_tests/fixtures/multicontext_aub_fixture.h" #include "opencl/test/unit_test/fixtures/simple_arg_kernel_fixture.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" using namespace NEO; struct XeHpgCoreStatelessCompressionInSBA : public KernelAUBFixture, public ::testing::Test, public ::testing::WithParamInterface { void SetUp() override { DebugManager.flags.EnableStatelessCompression.set(1); DebugManager.flags.RenderCompressedBuffersEnabled.set(true); DebugManager.flags.RenderCompressedImagesEnabled.set(true); DebugManager.flags.EnableLocalMemory.set(true); DebugManager.flags.NodeOrdinal.set(GetParam()); DebugManager.flags.ForceAuxTranslationMode.set(static_cast(AuxTranslationMode::Builtin)); KernelAUBFixture::SetUp(); if (!device->getHardwareInfo().featureTable.flags.ftrLocalMemory) { GTEST_SKIP(); } } void TearDown() override { KernelAUBFixture::TearDown(); } DebugManagerStateRestore debugRestorer; cl_int retVal = CL_SUCCESS; }; XE_HPG_CORETEST_P(XeHpgCoreStatelessCompressionInSBA, GENERATEONLY_givenCompressedBuffersWhenStatelessAccessToLocalMemoryThenEnableCompressionInSBA) { const size_t bufferSize = 2048; uint8_t writePattern[bufferSize]; std::fill(writePattern, writePattern + sizeof(writePattern), 1); device->getGpgpuCommandStreamReceiver().overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto unCompressedBuffer = std::unique_ptr(Buffer::create(context, CL_MEM_UNCOMPRESSED_HINT_INTEL, bufferSize, nullptr, retVal)); auto unCompressedAllocation = unCompressedBuffer->getGraphicsAllocation(device->getRootDeviceIndex()); EXPECT_EQ(AllocationType::BUFFER, unCompressedAllocation->getAllocationType()); EXPECT_FALSE(unCompressedAllocation->isCompressionEnabled()); EXPECT_EQ(MemoryPool::LocalMemory, unCompressedAllocation->getMemoryPool()); auto compressedBuffer1 = std::unique_ptr(Buffer::create(context, CL_MEM_COMPRESSED_HINT_INTEL, bufferSize, nullptr, retVal)); auto compressedAllocation1 = compressedBuffer1->getGraphicsAllocation(device->getRootDeviceIndex()); EXPECT_TRUE(compressedAllocation1->isCompressionEnabled()); EXPECT_EQ(MemoryPool::LocalMemory, compressedAllocation1->getMemoryPool()); EXPECT_TRUE(compressedAllocation1->getDefaultGmm()->isCompressionEnabled); auto compressedBuffer2 = std::unique_ptr(Buffer::create(context, CL_MEM_COMPRESSED_HINT_INTEL, bufferSize, nullptr, retVal)); auto compressedAllocation2 = compressedBuffer2->getGraphicsAllocation(device->getRootDeviceIndex()); EXPECT_TRUE(compressedAllocation2->isCompressionEnabled()); EXPECT_EQ(MemoryPool::LocalMemory, compressedAllocation2->getMemoryPool()); EXPECT_TRUE(compressedAllocation2->getDefaultGmm()->isCompressionEnabled); retVal = pCmdQ->enqueueWriteBuffer(compressedBuffer1.get(), CL_FALSE, 0, bufferSize, writePattern, nullptr, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); retVal = kernel->setArg(0, compressedBuffer1.get()); ASSERT_EQ(CL_SUCCESS, retVal); retVal = kernel->setArg(1, compressedBuffer2.get()); ASSERT_EQ(CL_SUCCESS, retVal); size_t globalWorkSize[3] = {bufferSize, 1, 1}; retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); pCmdQ->finish(); expectNotEqualMemory(AUBFixture::getGpuPointer(compressedAllocation1), writePattern, bufferSize); expectNotEqualMemory(AUBFixture::getGpuPointer(compressedAllocation2), writePattern, bufferSize); retVal = pCmdQ->enqueueCopyBuffer(compressedBuffer2.get(), unCompressedBuffer.get(), 0, 0, bufferSize, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); pCmdQ->finish(); expectMemory(AUBFixture::getGpuPointer(unCompressedAllocation), writePattern, bufferSize); } XE_HPG_CORETEST_P(XeHpgCoreStatelessCompressionInSBA, GENERATEONLY_givenCompressedDeviceMemoryWhenAccessedStatelesslyThenEnableCompressionInSBA) { const size_t bufferSize = 2048; uint8_t writePattern[bufferSize]; std::fill(writePattern, writePattern + sizeof(writePattern), 1); device->getGpgpuCommandStreamReceiver().overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto compressedDeviceMemAllocPtr1 = clDeviceMemAllocINTEL(context, device.get(), nullptr, bufferSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, compressedDeviceMemAllocPtr1); auto compressedDeviceMemAlloc1 = context->getSVMAllocsManager()->getSVMAllocs()->get(compressedDeviceMemAllocPtr1)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); EXPECT_NE(nullptr, compressedDeviceMemAlloc1); EXPECT_TRUE(compressedDeviceMemAlloc1->isCompressionEnabled()); EXPECT_EQ(MemoryPool::LocalMemory, compressedDeviceMemAlloc1->getMemoryPool()); EXPECT_TRUE(compressedDeviceMemAlloc1->getDefaultGmm()->isCompressionEnabled); auto compressedDeviceMemAllocPtr2 = clDeviceMemAllocINTEL(context, device.get(), nullptr, bufferSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, compressedDeviceMemAllocPtr2); auto compressedDeviceMemAlloc2 = context->getSVMAllocsManager()->getSVMAllocs()->get(compressedDeviceMemAllocPtr2)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); EXPECT_NE(nullptr, compressedDeviceMemAlloc2); EXPECT_TRUE(compressedDeviceMemAlloc2->isCompressionEnabled()); EXPECT_EQ(MemoryPool::LocalMemory, compressedDeviceMemAlloc2->getMemoryPool()); EXPECT_TRUE(compressedDeviceMemAlloc2->getDefaultGmm()->isCompressionEnabled); retVal = clEnqueueMemcpyINTEL(pCmdQ, true, compressedDeviceMemAllocPtr1, writePattern, bufferSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArgSVMPointer(multiDeviceKernel.get(), 0, compressedDeviceMemAllocPtr1); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArgSVMPointer(multiDeviceKernel.get(), 1, compressedDeviceMemAllocPtr2); EXPECT_EQ(CL_SUCCESS, retVal); size_t globalWorkSize[3] = {bufferSize, 1, 1}; retVal = clEnqueueNDRangeKernel(pCmdQ, multiDeviceKernel.get(), 1, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clFinish(pCmdQ); EXPECT_EQ(CL_SUCCESS, retVal); expectNotEqualMemory(compressedDeviceMemAllocPtr2, writePattern, bufferSize); expectNotEqualMemory(compressedDeviceMemAllocPtr2, writePattern, bufferSize); retVal = clMemFreeINTEL(context, compressedDeviceMemAllocPtr1); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(context, compressedDeviceMemAllocPtr2); EXPECT_EQ(CL_SUCCESS, retVal); } XE_HPG_CORETEST_P(XeHpgCoreStatelessCompressionInSBA, givenUncompressibleBufferInHostMemoryWhenAccessedStatelesslyThenDisableCompressionInSBA) { const size_t bufferSize = 2048; uint8_t writePattern[bufferSize]; std::fill(writePattern, writePattern + sizeof(writePattern), 1); device->getGpgpuCommandStreamReceiver().overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto unCompressedBuffer = std::unique_ptr(Buffer::create(context, CL_MEM_UNCOMPRESSED_HINT_INTEL, bufferSize, nullptr, retVal)); auto unCompressedAllocation = unCompressedBuffer->getGraphicsAllocation(device->getRootDeviceIndex()); EXPECT_EQ(AllocationType::BUFFER, unCompressedAllocation->getAllocationType()); EXPECT_FALSE(unCompressedAllocation->isCompressionEnabled()); EXPECT_EQ(MemoryPool::LocalMemory, unCompressedAllocation->getMemoryPool()); auto compressedBuffer = std::unique_ptr(Buffer::create(context, CL_MEM_COMPRESSED_HINT_INTEL, bufferSize, nullptr, retVal)); auto compressedAllocation = compressedBuffer->getGraphicsAllocation(device->getRootDeviceIndex()); EXPECT_TRUE(compressedAllocation->isCompressionEnabled()); EXPECT_EQ(MemoryPool::LocalMemory, compressedAllocation->getMemoryPool()); EXPECT_TRUE(compressedAllocation->getDefaultGmm()->isCompressionEnabled); auto uncompressibleBufferInHostMemory = std::unique_ptr(Buffer::create(context, CL_MEM_FORCE_HOST_MEMORY_INTEL, bufferSize, nullptr, retVal)); auto uncompressibleAllocationInHostMemory = uncompressibleBufferInHostMemory->getGraphicsAllocation(device->getRootDeviceIndex()); EXPECT_EQ(AllocationType::BUFFER_HOST_MEMORY, uncompressibleAllocationInHostMemory->getAllocationType()); EXPECT_TRUE(MemoryPool::isSystemMemoryPool(uncompressibleAllocationInHostMemory->getMemoryPool())); retVal = pCmdQ->enqueueWriteBuffer(compressedBuffer.get(), CL_FALSE, 0, bufferSize, writePattern, nullptr, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); retVal = kernel->setArg(0, compressedBuffer.get()); ASSERT_EQ(CL_SUCCESS, retVal); retVal = kernel->setArg(1, uncompressibleBufferInHostMemory.get()); ASSERT_EQ(CL_SUCCESS, retVal); size_t globalWorkSize[3] = {bufferSize, 1, 1}; retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); pCmdQ->finish(); expectNotEqualMemory(AUBFixture::getGpuPointer(compressedAllocation), writePattern, bufferSize); expectMemory(AUBFixture::getGpuPointer(uncompressibleAllocationInHostMemory), writePattern, bufferSize); retVal = pCmdQ->enqueueCopyBuffer(uncompressibleBufferInHostMemory.get(), unCompressedBuffer.get(), 0, 0, bufferSize, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); pCmdQ->finish(); expectMemory(AUBFixture::getGpuPointer(unCompressedAllocation), writePattern, bufferSize); } XE_HPG_CORETEST_P(XeHpgCoreStatelessCompressionInSBA, givenUncompressibleHostMemoryAllocationWhenAccessedStatelesslyThenDisableCompressionInSBA) { const size_t bufferSize = 2048; uint8_t writePattern[bufferSize]; std::fill(writePattern, writePattern + sizeof(writePattern), 1); device->getGpgpuCommandStreamReceiver().overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto compressedDeviceMemAllocPtr = clDeviceMemAllocINTEL(context, device.get(), nullptr, bufferSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, compressedDeviceMemAllocPtr); auto compressedDeviceMemAlloc = context->getSVMAllocsManager()->getSVMAllocs()->get(compressedDeviceMemAllocPtr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); EXPECT_NE(nullptr, compressedDeviceMemAlloc); EXPECT_TRUE(compressedDeviceMemAlloc->isCompressionEnabled()); EXPECT_EQ(MemoryPool::LocalMemory, compressedDeviceMemAlloc->getMemoryPool()); EXPECT_TRUE(compressedDeviceMemAlloc->getDefaultGmm()->isCompressionEnabled); auto uncompressibleHostMemAllocPtr = clHostMemAllocINTEL(context, nullptr, bufferSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, uncompressibleHostMemAllocPtr); auto uncompressibleHostMemAlloc = context->getSVMAllocsManager()->getSVMAllocs()->get(uncompressibleHostMemAllocPtr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); EXPECT_NE(nullptr, uncompressibleHostMemAlloc); EXPECT_EQ(AllocationType::BUFFER_HOST_MEMORY, uncompressibleHostMemAlloc->getAllocationType()); EXPECT_TRUE(MemoryPool::isSystemMemoryPool(uncompressibleHostMemAlloc->getMemoryPool())); retVal = clEnqueueMemcpyINTEL(pCmdQ, true, compressedDeviceMemAllocPtr, writePattern, bufferSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArgSVMPointer(multiDeviceKernel.get(), 0, compressedDeviceMemAllocPtr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArgSVMPointer(multiDeviceKernel.get(), 1, uncompressibleHostMemAllocPtr); EXPECT_EQ(CL_SUCCESS, retVal); size_t globalWorkSize[3] = {bufferSize, 1, 1}; retVal = clEnqueueNDRangeKernel(pCmdQ, multiDeviceKernel.get(), 1, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clFinish(pCmdQ); EXPECT_EQ(CL_SUCCESS, retVal); expectNotEqualMemory(compressedDeviceMemAllocPtr, writePattern, bufferSize); expectMemory(uncompressibleHostMemAllocPtr, writePattern, bufferSize); retVal = clMemFreeINTEL(context, compressedDeviceMemAllocPtr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(context, uncompressibleHostMemAllocPtr); EXPECT_EQ(CL_SUCCESS, retVal); } INSTANTIATE_TEST_CASE_P(, XeHpgCoreStatelessCompressionInSBA, ::testing::Values(aub_stream::ENGINE_RCS, aub_stream::ENGINE_CCS)); struct XeHpgCoreUmStatelessCompressionInSBA : public KernelAUBFixture, public ::testing::Test, public ::testing::WithParamInterface { void SetUp() override { DebugManager.flags.EnableStatelessCompression.set(1); DebugManager.flags.RenderCompressedBuffersEnabled.set(true); DebugManager.flags.RenderCompressedImagesEnabled.set(true); DebugManager.flags.EnableLocalMemory.set(true); DebugManager.flags.NodeOrdinal.set(GetParam()); DebugManager.flags.ForceAuxTranslationMode.set(static_cast(AuxTranslationMode::Builtin)); KernelAUBFixture::SetUp(); if (!device->getHardwareInfo().featureTable.flags.ftrLocalMemory) { GTEST_SKIP(); } EXPECT_TRUE(multiDeviceKernel->getKernel(rootDeviceIndex)->getKernelInfo().hasIndirectStatelessAccess); } void TearDown() override { KernelAUBFixture::TearDown(); } DebugManagerStateRestore debugRestorer; cl_int retVal = CL_SUCCESS; }; XE_HPG_CORETEST_P(XeHpgCoreUmStatelessCompressionInSBA, GENERATEONLY_givenStatelessKernelWhenItHasIndirectDeviceAccessThenEnableCompressionInSBA) { const size_t bufferSize = MemoryConstants::kiloByte; uint8_t bufferData[bufferSize] = {}; auto uncompressibleHostMemAlloc = clHostMemAllocINTEL(context, nullptr, bufferSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, uncompressibleHostMemAlloc); auto compressedDeviceMemAlloc1 = clDeviceMemAllocINTEL(context, device.get(), nullptr, bufferSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, compressedDeviceMemAlloc1); auto compressedDeviceMemAlloc2 = clDeviceMemAllocINTEL(context, device.get(), nullptr, bufferSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, compressedDeviceMemAlloc2); retVal = clEnqueueMemcpyINTEL(pCmdQ, true, compressedDeviceMemAlloc2, bufferData, bufferSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); reinterpret_cast(bufferData)[0] = reinterpret_cast(compressedDeviceMemAlloc2); retVal = clEnqueueMemcpyINTEL(pCmdQ, true, compressedDeviceMemAlloc1, bufferData, bufferSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArgSVMPointer(multiDeviceKernel.get(), 0, compressedDeviceMemAlloc1); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelExecInfo(multiDeviceKernel.get(), CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL, sizeof(compressedDeviceMemAlloc2), &compressedDeviceMemAlloc2); EXPECT_EQ(CL_SUCCESS, retVal); size_t globalWorkSize[3] = {bufferSize, 1, 1}; retVal = clEnqueueNDRangeKernel(pCmdQ, multiDeviceKernel.get(), 1, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clFinish(pCmdQ); EXPECT_EQ(CL_SUCCESS, retVal); reinterpret_cast(bufferData)[0] = 1; expectNotEqualMemory(compressedDeviceMemAlloc2, bufferData, bufferSize); retVal = clEnqueueMemcpyINTEL(pCmdQ, true, uncompressibleHostMemAlloc, compressedDeviceMemAlloc2, bufferSize, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); expectMemory(uncompressibleHostMemAlloc, bufferData, bufferSize); retVal = clMemFreeINTEL(context, uncompressibleHostMemAlloc); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(context, compressedDeviceMemAlloc1); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(context, compressedDeviceMemAlloc2); EXPECT_EQ(CL_SUCCESS, retVal); } XE_HPG_CORETEST_P(XeHpgCoreUmStatelessCompressionInSBA, GENERATEONLY_givenKernelExecInfoWhenItHasIndirectDeviceAccessThenEnableCompressionInSBA) { const size_t bufferSize = MemoryConstants::kiloByte; uint8_t bufferData[bufferSize] = {}; auto uncompressibleHostMemAlloc = clHostMemAllocINTEL(context, nullptr, bufferSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, uncompressibleHostMemAlloc); auto compressedDeviceMemAlloc1 = clDeviceMemAllocINTEL(context, device.get(), nullptr, bufferSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, compressedDeviceMemAlloc1); auto compressedDeviceMemAlloc2 = clDeviceMemAllocINTEL(context, device.get(), nullptr, bufferSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, compressedDeviceMemAlloc2); retVal = clEnqueueMemcpyINTEL(pCmdQ, true, compressedDeviceMemAlloc2, bufferData, bufferSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); reinterpret_cast(bufferData)[0] = reinterpret_cast(compressedDeviceMemAlloc2); retVal = clEnqueueMemcpyINTEL(pCmdQ, true, compressedDeviceMemAlloc1, bufferData, bufferSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArgSVMPointer(multiDeviceKernel.get(), 0, compressedDeviceMemAlloc1); EXPECT_EQ(CL_SUCCESS, retVal); cl_bool enableIndirectDeviceAccess = CL_TRUE; retVal = clSetKernelExecInfo(multiDeviceKernel.get(), CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, sizeof(cl_bool), &enableIndirectDeviceAccess); EXPECT_EQ(CL_SUCCESS, retVal); size_t globalWorkSize[3] = {bufferSize, 1, 1}; retVal = clEnqueueNDRangeKernel(pCmdQ, multiDeviceKernel.get(), 1, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clFinish(pCmdQ); EXPECT_EQ(CL_SUCCESS, retVal); reinterpret_cast(bufferData)[0] = 1; expectNotEqualMemory(compressedDeviceMemAlloc2, bufferData, bufferSize); retVal = clEnqueueMemcpyINTEL(pCmdQ, true, uncompressibleHostMemAlloc, compressedDeviceMemAlloc2, bufferSize, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); expectMemory(uncompressibleHostMemAlloc, bufferData, bufferSize); retVal = clMemFreeINTEL(context, uncompressibleHostMemAlloc); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(context, compressedDeviceMemAlloc1); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(context, compressedDeviceMemAlloc2); EXPECT_EQ(CL_SUCCESS, retVal); } XE_HPG_CORETEST_P(XeHpgCoreUmStatelessCompressionInSBA, givenStatelessKernelWhenItHasIndirectHostAccessThenDisableCompressionInSBA) { const size_t bufferSize = MemoryConstants::kiloByte; uint8_t bufferData[bufferSize] = {}; auto compressedDeviceMemAlloc = clDeviceMemAllocINTEL(context, device.get(), nullptr, bufferSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, compressedDeviceMemAlloc); auto uncompressibleHostMemAlloc = clHostMemAllocINTEL(context, nullptr, bufferSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, uncompressibleHostMemAlloc); memset(uncompressibleHostMemAlloc, 0, bufferSize); reinterpret_cast(bufferData)[0] = reinterpret_cast(uncompressibleHostMemAlloc); retVal = clEnqueueMemcpyINTEL(pCmdQ, true, compressedDeviceMemAlloc, bufferData, bufferSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArgSVMPointer(multiDeviceKernel.get(), 0, compressedDeviceMemAlloc); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelExecInfo(multiDeviceKernel.get(), CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL, sizeof(uncompressibleHostMemAlloc), &uncompressibleHostMemAlloc); EXPECT_EQ(CL_SUCCESS, retVal); size_t globalWorkSize[3] = {bufferSize, 1, 1}; retVal = clEnqueueNDRangeKernel(pCmdQ, multiDeviceKernel.get(), 1, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clFinish(pCmdQ); EXPECT_EQ(CL_SUCCESS, retVal); reinterpret_cast(bufferData)[0] = 1; expectMemory(uncompressibleHostMemAlloc, bufferData, bufferSize); retVal = clMemFreeINTEL(context, compressedDeviceMemAlloc); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(context, uncompressibleHostMemAlloc); EXPECT_EQ(CL_SUCCESS, retVal); } XE_HPG_CORETEST_P(XeHpgCoreUmStatelessCompressionInSBA, givenKernelExecInfoWhenItHasIndirectHostAccessThenDisableCompressionInSBA) { const size_t bufferSize = MemoryConstants::kiloByte; uint8_t bufferData[bufferSize] = {}; auto compressedDeviceMemAlloc = clDeviceMemAllocINTEL(context, device.get(), nullptr, bufferSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, compressedDeviceMemAlloc); auto uncompressibleHostMemAlloc = clHostMemAllocINTEL(context, nullptr, bufferSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, uncompressibleHostMemAlloc); memset(uncompressibleHostMemAlloc, 0, bufferSize); reinterpret_cast(bufferData)[0] = reinterpret_cast(uncompressibleHostMemAlloc); retVal = clEnqueueMemcpyINTEL(pCmdQ, true, compressedDeviceMemAlloc, bufferData, bufferSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArgSVMPointer(multiDeviceKernel.get(), 0, compressedDeviceMemAlloc); EXPECT_EQ(CL_SUCCESS, retVal); cl_bool enableIndirectHostAccess = CL_TRUE; retVal = clSetKernelExecInfo(multiDeviceKernel.get(), CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, sizeof(cl_bool), &enableIndirectHostAccess); EXPECT_EQ(CL_SUCCESS, retVal); size_t globalWorkSize[3] = {bufferSize, 1, 1}; retVal = clEnqueueNDRangeKernel(pCmdQ, multiDeviceKernel.get(), 1, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clFinish(pCmdQ); EXPECT_EQ(CL_SUCCESS, retVal); reinterpret_cast(bufferData)[0] = 1; expectMemory(uncompressibleHostMemAlloc, bufferData, bufferSize); retVal = clMemFreeINTEL(context, compressedDeviceMemAlloc); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(context, uncompressibleHostMemAlloc); EXPECT_EQ(CL_SUCCESS, retVal); } INSTANTIATE_TEST_CASE_P(, XeHpgCoreUmStatelessCompressionInSBA, ::testing::Values(aub_stream::ENGINE_RCS, aub_stream::ENGINE_CCS)); struct XeHpgCoreStatelessCompressionInSBAWithBCS : public MulticontextAubFixture, public StatelessCopyKernelFixture, public ::testing::Test { void SetUp() override { DebugManager.flags.EnableStatelessCompression.set(1); DebugManager.flags.ForceAuxTranslationMode.set(static_cast(AuxTranslationMode::Blit)); DebugManager.flags.EnableBlitterOperationsSupport.set(true); MulticontextAubFixture::SetUp(1, EnabledCommandStreamers::Single, true); StatelessCopyKernelFixture::SetUp(tileDevices[0], context.get()); if (!tileDevices[0]->getHardwareInfo().featureTable.flags.ftrLocalMemory) { GTEST_SKIP(); } } void TearDown() override { MulticontextAubFixture::TearDown(); StatelessCopyKernelFixture::TearDown(); } DebugManagerStateRestore debugRestorer; cl_int retVal = CL_SUCCESS; }; XE_HPG_CORETEST_F(XeHpgCoreStatelessCompressionInSBAWithBCS, GENERATEONLY_givenCompressedBufferInDeviceMemoryWhenAccessedStatelesslyThenEnableCompressionInSBA) { const size_t bufferSize = 2048; uint8_t writePattern[bufferSize]; std::fill(writePattern, writePattern + sizeof(writePattern), 1); auto unCompressedBuffer = std::unique_ptr(Buffer::create(context.get(), CL_MEM_UNCOMPRESSED_HINT_INTEL, bufferSize, nullptr, retVal)); auto unCompressedAllocation = unCompressedBuffer->getGraphicsAllocation(tileDevices[0]->getRootDeviceIndex()); EXPECT_EQ(AllocationType::BUFFER, unCompressedAllocation->getAllocationType()); EXPECT_EQ(MemoryPool::LocalMemory, unCompressedAllocation->getMemoryPool()); auto compressedBuffer = std::unique_ptr(Buffer::create(context.get(), CL_MEM_COMPRESSED_HINT_INTEL, bufferSize, nullptr, retVal)); auto compressedAllocation = compressedBuffer->getGraphicsAllocation(tileDevices[0]->getRootDeviceIndex()); EXPECT_TRUE(compressedAllocation->isCompressionEnabled()); EXPECT_EQ(MemoryPool::LocalMemory, compressedAllocation->getMemoryPool()); EXPECT_TRUE(compressedAllocation->getDefaultGmm()->isCompressionEnabled); retVal = commandQueues[0][0]->enqueueWriteBuffer(compressedBuffer.get(), CL_FALSE, 0, bufferSize, writePattern, nullptr, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); retVal = kernel->setArg(0, compressedBuffer.get()); ASSERT_EQ(CL_SUCCESS, retVal); retVal = kernel->setArg(1, unCompressedBuffer.get()); ASSERT_EQ(CL_SUCCESS, retVal); size_t globalWorkSize[3] = {bufferSize, 1, 1}; retVal = commandQueues[0][0]->enqueueKernel(kernel, 1, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); commandQueues[0][0]->finish(); expectMemoryNotEqual(AUBFixture::getGpuPointer(compressedAllocation), writePattern, bufferSize, 0, 0); expectMemory(AUBFixture::getGpuPointer(unCompressedAllocation), writePattern, bufferSize, 0, 0); } XE_HPG_CORETEST_F(XeHpgCoreStatelessCompressionInSBAWithBCS, givenUncompressibleBufferInHostMemoryWhenAccessedStatelesslyThenDisableCompressionInSBA) { const size_t bufferSize = 2048; uint8_t writePattern[bufferSize]; std::fill(writePattern, writePattern + sizeof(writePattern), 1); auto compressedBuffer = std::unique_ptr(Buffer::create(context.get(), CL_MEM_COMPRESSED_HINT_INTEL, bufferSize, nullptr, retVal)); auto compressedAllocation = compressedBuffer->getGraphicsAllocation(tileDevices[0]->getRootDeviceIndex()); EXPECT_TRUE(compressedAllocation->isCompressionEnabled()); EXPECT_EQ(MemoryPool::LocalMemory, compressedAllocation->getMemoryPool()); EXPECT_TRUE(compressedAllocation->getDefaultGmm()->isCompressionEnabled); auto uncompressibleBufferInHostMemory = std::unique_ptr(Buffer::create(context.get(), CL_MEM_FORCE_HOST_MEMORY_INTEL, bufferSize, nullptr, retVal)); auto uncompressibleAllocationInHostMemory = uncompressibleBufferInHostMemory->getGraphicsAllocation(tileDevices[0]->getRootDeviceIndex()); EXPECT_EQ(AllocationType::BUFFER_HOST_MEMORY, uncompressibleAllocationInHostMemory->getAllocationType()); EXPECT_TRUE(MemoryPool::isSystemMemoryPool(uncompressibleAllocationInHostMemory->getMemoryPool())); retVal = commandQueues[0][0]->enqueueWriteBuffer(compressedBuffer.get(), CL_FALSE, 0, bufferSize, writePattern, nullptr, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); retVal = kernel->setArg(0, compressedBuffer.get()); ASSERT_EQ(CL_SUCCESS, retVal); retVal = kernel->setArg(1, uncompressibleBufferInHostMemory.get()); ASSERT_EQ(CL_SUCCESS, retVal); size_t globalWorkSize[3] = {bufferSize, 1, 1}; retVal = commandQueues[0][0]->enqueueKernel(kernel, 1, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); commandQueues[0][0]->finish(); expectMemoryNotEqual(AUBFixture::getGpuPointer(compressedAllocation), writePattern, bufferSize, 0, 0); expectMemory(AUBFixture::getGpuPointer(uncompressibleAllocationInHostMemory), writePattern, bufferSize, 0, 0); } compute-runtime-22.14.22890/opencl/test/unit_test/built_ins/000077500000000000000000000000001422164147700236175ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/built_ins/CMakeLists.txt000066400000000000000000000011231422164147700263540ustar00rootroot00000000000000# # Copyright (C) 2018-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_built_in ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/built_ins_file_names.h ${CMAKE_CURRENT_SOURCE_DIR}/built_ins_file_names.cpp ${CMAKE_CURRENT_SOURCE_DIR}/built_in_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/built_in_tests_ocl.cpp ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}get_built_ins_file_names.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sip_tests.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_built_in}) add_subdirectories() compute-runtime-22.14.22890/opencl/test/unit_test/built_ins/built_in_tests.cpp000066400000000000000000003217611422164147700273640ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/compiler_hw_info_config.h" #include "shared/source/helpers/file_io.h" #include "shared/source/helpers/hash.h" #include "shared/source/helpers/string.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/libult/global_environment.h" #include "shared/test/common/mocks/mock_builtins.h" #include "shared/test/common/mocks/mock_builtinslib.h" #include "shared/test/common/mocks/mock_compiler_interface.h" #include "shared/test/common/mocks/mock_compilers.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/built_ins/aux_translation_builtin.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/built_ins/vme_builtin.h" #include "opencl/source/built_ins/vme_dispatch_builder.h" #include "opencl/source/helpers/dispatch_info_builder.h" #include "opencl/source/kernel/kernel.h" #include "opencl/test/unit_test/built_ins/built_ins_file_names.h" #include "opencl/test/unit_test/fixtures/built_in_fixture.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/fixtures/run_kernel_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" #include "compiler_options.h" #include "gtest/gtest.h" #include "os_inc.h" #include "test_traits_common.h" #include using namespace NEO; class BuiltInTests : public BuiltInFixture, public ClDeviceFixture, public ContextFixture, public ::testing::Test { using BuiltInFixture::SetUp; using ContextFixture::SetUp; public: BuiltInTests() { // reserving space here to avoid the appearance of a memory management // leak being reported allBuiltIns.reserve(5000); } void SetUp() override { DebugManager.flags.ForceAuxTranslationMode.set(static_cast(AuxTranslationMode::Builtin)); ClDeviceFixture::SetUp(); cl_device_id device = pClDevice; ContextFixture::SetUp(1, &device); BuiltInFixture::SetUp(pDevice); } void TearDown() override { allBuiltIns.clear(); BuiltInFixture::TearDown(); ContextFixture::TearDown(); ClDeviceFixture::TearDown(); } void AppendBuiltInStringFromFile(std::string builtInFile, size_t &size) { std::string src; auto pData = loadDataFromFile( builtInFile.c_str(), size); ASSERT_NE(nullptr, pData); src = (const char *)pData.get(); size_t start = src.find("R\"===("); size_t stop = src.find(")===\""); // assert that pattern was found ASSERT_NE(std::string::npos, start); ASSERT_NE(std::string::npos, stop); start += strlen("R\"===("); size = stop - start; allBuiltIns.append(src, start, size); } bool compareBuiltinOpParams(const BuiltinOpParams &left, const BuiltinOpParams &right) { return left.srcPtr == right.srcPtr && left.dstPtr == right.dstPtr && left.size == right.size && left.srcOffset == right.srcOffset && left.dstOffset == right.dstOffset && left.dstMemObj == right.dstMemObj && left.srcMemObj == right.srcMemObj; } DebugManagerStateRestore restore; std::string allBuiltIns; }; struct VmeBuiltInTests : BuiltInTests { void SetUp() override { BuiltInTests::SetUp(); if (!pDevice->getHardwareInfo().capabilityTable.supportsVme) { GTEST_SKIP(); } } }; struct AuxBuiltInTests : BuiltInTests, public ::testing::WithParamInterface { void SetUp() override { BuiltInTests::SetUp(); kernelObjType = GetParam(); } KernelObjForAuxTranslation::Type kernelObjType; }; struct AuxBuiltinsMatcher { template static constexpr bool isMatched() { return TestTraits::get()>::auxBuiltinsSupported; } }; HWTEST2_F(BuiltInTests, GivenBuiltinTypeBinaryWhenGettingAuxTranslationBuiltinThenResourceSizeIsNonZero, MatchAny) { auto mockBuiltinsLib = std::unique_ptr(new MockBuiltinsLib()); EXPECT_EQ(TestTraits::get()>::auxBuiltinsSupported, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::AuxTranslation, BuiltinCode::ECodeType::Binary, *pDevice).size() != 0); } INSTANTIATE_TEST_CASE_P(, AuxBuiltInTests, testing::ValuesIn({KernelObjForAuxTranslation::Type::MEM_OBJ, KernelObjForAuxTranslation::Type::GFX_ALLOC})); TEST_F(BuiltInTests, WhenBuildingListOfBuiltinsThenBuiltinsHaveBeenGenerated) { for (auto supportsImages : ::testing::Bool()) { allBuiltIns.clear(); size_t size = 0; for (auto &fileName : getBuiltInFileNames(supportsImages)) { AppendBuiltInStringFromFile(fileName, size); ASSERT_NE(0u, size); } // convert /r/n to /n size_t start_pos = 0; while ((start_pos = allBuiltIns.find("\r\n", start_pos)) != std::string::npos) { allBuiltIns.replace(start_pos, 2, "\n"); } // convert /r to /n start_pos = 0; while ((start_pos = allBuiltIns.find("\r", start_pos)) != std::string::npos) { allBuiltIns.replace(start_pos, 1, "\n"); } uint64_t hash = Hash::hash(allBuiltIns.c_str(), allBuiltIns.length()); auto hashName = getBuiltInHashFileName(hash, supportsImages); //First fail, if we are inconsistent EXPECT_EQ(true, fileExists(hashName)) << "**********\nBuilt in kernels need to be regenerated for the mock compilers!\n**********"; //then write to file if needed #define GENERATE_NEW_HASH_FOR_BUILT_INS 0 #if GENERATE_NEW_HASH_FOR_BUILT_INS std::cout << "writing builtins to file: " << hashName << std::endl; const char *pData = allBuiltIns.c_str(); writeDataToFile(hashName.c_str(), pData, allBuiltIns.length()); #endif } } TEST_F(BuiltInTests, GivenCopyBufferToBufferWhenDispatchInfoIsCreatedThenParamsAreCorrect) { BuiltinDispatchInfoBuilder &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, *pClDevice); MockBuffer *srcPtr = new MockBuffer(); MockBuffer *dstPtr = new MockBuffer(); MockBuffer &src = *srcPtr; MockBuffer &dst = *dstPtr; BuiltinOpParams builtinOpsParams; builtinOpsParams.srcMemObj = &src; builtinOpsParams.dstMemObj = &dst; builtinOpsParams.srcPtr = src.getCpuAddress(); builtinOpsParams.dstPtr = dst.getCpuAddress(); builtinOpsParams.size = {dst.getSize(), 0, 0}; MultiDispatchInfo multiDispatchInfo(builtinOpsParams); ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo)); size_t leftSize = reinterpret_cast(dst.getCpuAddress()) % MemoryConstants::cacheLineSize; if (leftSize > 0) { leftSize = MemoryConstants::cacheLineSize - leftSize; } size_t rightSize = (reinterpret_cast(dst.getCpuAddress()) + dst.getSize()) % MemoryConstants::cacheLineSize; size_t middleSize = (dst.getSize() - leftSize - rightSize) / (sizeof(uint32_t) * 4); int i = 0; int leftKernel = 0; int middleKernel = 0; int rightKernel = 0; if (leftSize > 0) { middleKernel++; rightKernel++; } else { leftKernel = -1; } if (middleSize > 0) { rightKernel++; } else { middleKernel = -1; } if (rightSize == 0) { rightKernel = -1; } for (auto &dispatchInfo : multiDispatchInfo) { EXPECT_EQ(1u, dispatchInfo.getDim()); if (i == leftKernel) { EXPECT_EQ(Vec3(leftSize, 1, 1), dispatchInfo.getGWS()); } else if (i == middleKernel) { EXPECT_EQ(Vec3(middleSize, 1, 1), dispatchInfo.getGWS()); } else if (i == rightKernel) { EXPECT_EQ(Vec3(rightSize, 1, 1), dispatchInfo.getGWS()); } i++; } EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), builtinOpsParams)); delete srcPtr; delete dstPtr; } HWTEST2_P(AuxBuiltInTests, givenInputBufferWhenBuildingNonAuxDispatchInfoForAuxTranslationThenPickAndSetupCorrectKernels, AuxBuiltinsMatcher) { BuiltinDispatchInfoBuilder &baseBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pClDevice); auto &builder = static_cast &>(baseBuilder); KernelObjsForAuxTranslation kernelObjsForAuxTranslation; MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.setKernelObjsForAuxTranslation(kernelObjsForAuxTranslation); std::vector builtinKernels; std::vector mockKernelObjForAuxTranslation; mockKernelObjForAuxTranslation.push_back(MockKernelObjForAuxTranslation(kernelObjType, 0x1000)); mockKernelObjForAuxTranslation.push_back(MockKernelObjForAuxTranslation(kernelObjType, 0x20000)); mockKernelObjForAuxTranslation.push_back(MockKernelObjForAuxTranslation(kernelObjType, 0x30000)); BuiltinOpParams builtinOpsParams; builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::AuxToNonAux; for (auto &kernelObj : mockKernelObjForAuxTranslation) { kernelObjsForAuxTranslation.insert(kernelObj); } EXPECT_TRUE(builder.buildDispatchInfosForAuxTranslation(multiDispatchInfo, builtinOpsParams)); EXPECT_EQ(3u, multiDispatchInfo.size()); for (auto &dispatchInfo : multiDispatchInfo) { auto kernel = dispatchInfo.getKernel(); builtinKernels.push_back(kernel); if (kernelObjType == KernelObjForAuxTranslation::Type::MEM_OBJ) { auto buffer = castToObject(kernel->getKernelArguments().at(0).object); auto kernelObj = *kernelObjsForAuxTranslation.find({KernelObjForAuxTranslation::Type::MEM_OBJ, buffer}); EXPECT_NE(nullptr, kernelObj.object); EXPECT_EQ(KernelObjForAuxTranslation::Type::MEM_OBJ, kernelObj.type); kernelObjsForAuxTranslation.erase(kernelObj); cl_mem clMem = buffer; EXPECT_EQ(clMem, kernel->getKernelArguments().at(0).object); EXPECT_EQ(clMem, kernel->getKernelArguments().at(1).object); EXPECT_EQ(1u, dispatchInfo.getDim()); size_t xGws = alignUp(buffer->getSize(), 512) / 16; Vec3 gws = {xGws, 1, 1}; EXPECT_EQ(gws, dispatchInfo.getGWS()); } else { auto gfxAllocation = static_cast(kernel->getKernelArguments().at(0).object); auto kernelObj = *kernelObjsForAuxTranslation.find({KernelObjForAuxTranslation::Type::GFX_ALLOC, gfxAllocation}); EXPECT_NE(nullptr, kernelObj.object); EXPECT_EQ(KernelObjForAuxTranslation::Type::GFX_ALLOC, kernelObj.type); kernelObjsForAuxTranslation.erase(kernelObj); EXPECT_EQ(gfxAllocation, kernel->getKernelArguments().at(0).object); EXPECT_EQ(gfxAllocation, kernel->getKernelArguments().at(1).object); EXPECT_EQ(1u, dispatchInfo.getDim()); size_t xGws = alignUp(gfxAllocation->getUnderlyingBufferSize(), 512) / 16; Vec3 gws = {xGws, 1, 1}; EXPECT_EQ(gws, dispatchInfo.getGWS()); } } EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), builtinOpsParams)); // always pick different kernel EXPECT_EQ(3u, builtinKernels.size()); EXPECT_NE(builtinKernels[0], builtinKernels[1]); EXPECT_NE(builtinKernels[0], builtinKernels[2]); EXPECT_NE(builtinKernels[1], builtinKernels[2]); } HWTEST2_P(AuxBuiltInTests, givenInputBufferWhenBuildingAuxDispatchInfoForAuxTranslationThenPickAndSetupCorrectKernels, AuxBuiltinsMatcher) { BuiltinDispatchInfoBuilder &baseBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pClDevice); auto &builder = static_cast &>(baseBuilder); KernelObjsForAuxTranslation kernelObjsForAuxTranslation; MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.setKernelObjsForAuxTranslation(kernelObjsForAuxTranslation); std::vector builtinKernels; std::vector mockKernelObjForAuxTranslation; mockKernelObjForAuxTranslation.push_back(MockKernelObjForAuxTranslation(kernelObjType, 0x1000)); mockKernelObjForAuxTranslation.push_back(MockKernelObjForAuxTranslation(kernelObjType, 0x20000)); mockKernelObjForAuxTranslation.push_back(MockKernelObjForAuxTranslation(kernelObjType, 0x30000)); BuiltinOpParams builtinOpsParams; builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::NonAuxToAux; for (auto &kernelObj : mockKernelObjForAuxTranslation) { kernelObjsForAuxTranslation.insert(kernelObj); } EXPECT_TRUE(builder.buildDispatchInfosForAuxTranslation(multiDispatchInfo, builtinOpsParams)); EXPECT_EQ(3u, multiDispatchInfo.size()); for (auto &dispatchInfo : multiDispatchInfo) { auto kernel = dispatchInfo.getKernel(); builtinKernels.push_back(kernel); if (kernelObjType == KernelObjForAuxTranslation::Type::MEM_OBJ) { auto buffer = castToObject(kernel->getKernelArguments().at(0).object); auto kernelObj = *kernelObjsForAuxTranslation.find({KernelObjForAuxTranslation::Type::MEM_OBJ, buffer}); EXPECT_NE(nullptr, kernelObj.object); EXPECT_EQ(KernelObjForAuxTranslation::Type::MEM_OBJ, kernelObj.type); kernelObjsForAuxTranslation.erase(kernelObj); cl_mem clMem = buffer; EXPECT_EQ(clMem, kernel->getKernelArguments().at(0).object); EXPECT_EQ(clMem, kernel->getKernelArguments().at(1).object); EXPECT_EQ(1u, dispatchInfo.getDim()); size_t xGws = alignUp(buffer->getSize(), 4) / 4; Vec3 gws = {xGws, 1, 1}; EXPECT_EQ(gws, dispatchInfo.getGWS()); } else { auto gfxAllocation = static_cast(kernel->getKernelArguments().at(0).object); auto kernelObj = *kernelObjsForAuxTranslation.find({KernelObjForAuxTranslation::Type::GFX_ALLOC, gfxAllocation}); EXPECT_NE(nullptr, kernelObj.object); EXPECT_EQ(KernelObjForAuxTranslation::Type::GFX_ALLOC, kernelObj.type); kernelObjsForAuxTranslation.erase(kernelObj); EXPECT_EQ(gfxAllocation, kernel->getKernelArguments().at(0).object); EXPECT_EQ(gfxAllocation, kernel->getKernelArguments().at(1).object); EXPECT_EQ(1u, dispatchInfo.getDim()); size_t xGws = alignUp(gfxAllocation->getUnderlyingBufferSize(), 512) / 16; Vec3 gws = {xGws, 1, 1}; EXPECT_EQ(gws, dispatchInfo.getGWS()); } } EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), builtinOpsParams)); // always pick different kernel EXPECT_EQ(3u, builtinKernels.size()); EXPECT_NE(builtinKernels[0], builtinKernels[1]); EXPECT_NE(builtinKernels[0], builtinKernels[2]); EXPECT_NE(builtinKernels[1], builtinKernels[2]); } HWTEST2_P(AuxBuiltInTests, givenInputBufferWhenBuildingAuxTranslationDispatchThenPickDifferentKernelsDependingOnRequest, AuxBuiltinsMatcher) { BuiltinDispatchInfoBuilder &baseBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pClDevice); auto &builder = static_cast &>(baseBuilder); KernelObjsForAuxTranslation kernelObjsForAuxTranslation; std::vector mockKernelObjForAuxTranslation; for (int i = 0; i < 3; i++) { mockKernelObjForAuxTranslation.push_back(MockKernelObjForAuxTranslation(kernelObjType)); } std::vector builtinKernels; MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.setKernelObjsForAuxTranslation(kernelObjsForAuxTranslation); BuiltinOpParams builtinOpsParams; for (auto &kernelObj : mockKernelObjForAuxTranslation) { kernelObjsForAuxTranslation.insert(kernelObj); } builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::AuxToNonAux; EXPECT_TRUE(builder.buildDispatchInfosForAuxTranslation(multiDispatchInfo, builtinOpsParams)); builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::NonAuxToAux; EXPECT_TRUE(builder.buildDispatchInfosForAuxTranslation(multiDispatchInfo, builtinOpsParams)); EXPECT_EQ(6u, multiDispatchInfo.size()); for (auto &dispatchInfo : multiDispatchInfo) { builtinKernels.push_back(dispatchInfo.getKernel()); } EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), builtinOpsParams)); // nonAux vs Aux instance EXPECT_EQ(6u, builtinKernels.size()); EXPECT_NE(builtinKernels[0], builtinKernels[3]); EXPECT_NE(builtinKernels[1], builtinKernels[4]); EXPECT_NE(builtinKernels[2], builtinKernels[5]); } HWTEST2_P(AuxBuiltInTests, givenInvalidAuxTranslationDirectionWhenBuildingDispatchInfosThenAbort, AuxBuiltinsMatcher) { BuiltinDispatchInfoBuilder &baseBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pClDevice); auto &builder = static_cast &>(baseBuilder); KernelObjsForAuxTranslation kernelObjsForAuxTranslation; MockKernelObjForAuxTranslation mockKernelObjForAuxTranslation(kernelObjType); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.setKernelObjsForAuxTranslation(kernelObjsForAuxTranslation); BuiltinOpParams builtinOpsParams; kernelObjsForAuxTranslation.insert(mockKernelObjForAuxTranslation); builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::None; EXPECT_THROW(builder.buildDispatchInfosForAuxTranslation(multiDispatchInfo, builtinOpsParams), std::exception); } class MockAuxBuilInOp : public BuiltInOp { public: using BuiltinDispatchInfoBuilder::populate; using BaseClass = BuiltInOp; using BaseClass::baseKernel; using BaseClass::convertToAuxKernel; using BaseClass::convertToNonAuxKernel; using BaseClass::resizeKernelInstances; using BaseClass::usedKernels; using BaseClass::BuiltInOp; }; TEST_F(BuiltInTests, whenAuxBuiltInIsConstructedThenResizeKernelInstancedTo5) { MockAuxBuilInOp mockAuxBuiltInOp(*pBuiltIns, *pClDevice); EXPECT_EQ(5u, mockAuxBuiltInOp.convertToAuxKernel.size()); EXPECT_EQ(5u, mockAuxBuiltInOp.convertToNonAuxKernel.size()); } HWTEST2_P(AuxBuiltInTests, givenMoreKernelObjectsForAuxTranslationThanKernelInstancesWhenDispatchingThenResize, AuxBuiltinsMatcher) { MockAuxBuilInOp mockAuxBuiltInOp(*pBuiltIns, *pClDevice); EXPECT_EQ(5u, mockAuxBuiltInOp.convertToAuxKernel.size()); EXPECT_EQ(5u, mockAuxBuiltInOp.convertToNonAuxKernel.size()); KernelObjsForAuxTranslation kernelObjsForAuxTranslation; BuiltinOpParams builtinOpsParams; MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.setKernelObjsForAuxTranslation(kernelObjsForAuxTranslation); std::vector mockKernelObjForAuxTranslation; for (int i = 0; i < 7; i++) { mockKernelObjForAuxTranslation.push_back(MockKernelObjForAuxTranslation(kernelObjType)); } builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::AuxToNonAux; for (auto &kernelObj : mockKernelObjForAuxTranslation) { kernelObjsForAuxTranslation.insert(kernelObj); } EXPECT_TRUE(mockAuxBuiltInOp.buildDispatchInfosForAuxTranslation(multiDispatchInfo, builtinOpsParams)); EXPECT_EQ(7u, mockAuxBuiltInOp.convertToAuxKernel.size()); EXPECT_EQ(7u, mockAuxBuiltInOp.convertToNonAuxKernel.size()); } TEST_F(BuiltInTests, givenkAuxBuiltInWhenResizeIsCalledThenCloneAllNewInstancesFromBaseKernel) { MockAuxBuilInOp mockAuxBuiltInOp(*pBuiltIns, *pClDevice); size_t newSize = mockAuxBuiltInOp.convertToAuxKernel.size() + 3; mockAuxBuiltInOp.resizeKernelInstances(newSize); EXPECT_EQ(newSize, mockAuxBuiltInOp.convertToAuxKernel.size()); for (auto &convertToAuxKernel : mockAuxBuiltInOp.convertToAuxKernel) { EXPECT_EQ(&mockAuxBuiltInOp.baseKernel->getKernelInfo(), &convertToAuxKernel->getKernelInfo()); } EXPECT_EQ(newSize, mockAuxBuiltInOp.convertToNonAuxKernel.size()); for (auto &convertToNonAuxKernel : mockAuxBuiltInOp.convertToNonAuxKernel) { EXPECT_EQ(&mockAuxBuiltInOp.baseKernel->getKernelInfo(), &convertToNonAuxKernel->getKernelInfo()); } } HWTEST2_P(AuxBuiltInTests, givenKernelWithAuxTranslationRequiredWhenEnqueueCalledThenLockOnBuiltin, AuxBuiltinsMatcher) { BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pClDevice); auto mockAuxBuiltInOp = new MockAuxBuilInOp(*pBuiltIns, *pClDevice); pClExecutionEnvironment->setBuiltinDispatchInfoBuilder(rootDeviceIndex, EBuiltInOps::AuxTranslation, std::unique_ptr(mockAuxBuiltInOp)); auto mockProgram = clUniquePtr(new MockProgram(toClDeviceVector(*pClDevice))); auto mockBuiltinKernel = MockKernel::create(*pDevice, mockProgram.get()); auto kernelInfos = MockKernel::toKernelInfoContainer(mockBuiltinKernel->getKernelInfo(), rootDeviceIndex); auto pMultiDeviceKernel = new MockMultiDeviceKernel(MockMultiDeviceKernel::toKernelVector(mockBuiltinKernel), kernelInfos); mockAuxBuiltInOp->usedKernels.at(0).reset(pMultiDeviceKernel); MockKernelWithInternals mockKernel(*pClDevice, pContext); MockCommandQueueHw cmdQ(pContext, pClDevice, nullptr); size_t gws[3] = {1, 0, 0}; mockKernel.kernelInfo.addArgBuffer(0, 0, sizeof(void *)); mockKernel.mockKernel->initialize(); std::unique_ptr gmm; MockKernelObjForAuxTranslation mockKernelObjForAuxTranslation(kernelObjType); if (kernelObjType == KernelObjForAuxTranslation::Type::MEM_OBJ) { MockBuffer::setAllocationType(mockKernelObjForAuxTranslation.mockBuffer->getGraphicsAllocation(0), pDevice->getRootDeviceEnvironment().getGmmClientContext(), true); cl_mem clMem = mockKernelObjForAuxTranslation.mockBuffer.get(); mockKernel.mockKernel->setArgBuffer(0, sizeof(cl_mem *), &clMem); } else { auto gfxAllocation = mockKernelObjForAuxTranslation.mockGraphicsAllocation.get(); MockBuffer::setAllocationType(gfxAllocation, pDevice->getRootDeviceEnvironment().getGmmClientContext(), true); auto ptr = reinterpret_cast(gfxAllocation->getGpuAddressToPatch()); mockKernel.mockKernel->setArgSvmAlloc(0, ptr, gfxAllocation, 0u); gmm.reset(gfxAllocation->getDefaultGmm()); } mockKernel.mockKernel->auxTranslationRequired = false; cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(0u, pMultiDeviceKernel->takeOwnershipCalls); EXPECT_EQ(0u, pMultiDeviceKernel->releaseOwnershipCalls); mockKernel.mockKernel->auxTranslationRequired = true; cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(1u, pMultiDeviceKernel->takeOwnershipCalls); EXPECT_EQ(1u, pMultiDeviceKernel->releaseOwnershipCalls); } HWCMDTEST_P(IGFX_GEN8_CORE, AuxBuiltInTests, givenAuxTranslationKernelWhenSettingKernelArgsThenSetValidMocs) { if (this->pDevice->areSharedSystemAllocationsAllowed()) { GTEST_SKIP(); } using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; MockAuxBuilInOp mockAuxBuiltInOp(*pBuiltIns, *pClDevice); MultiDispatchInfo multiDispatchInfo; KernelObjsForAuxTranslation kernelObjsForAuxTranslation; multiDispatchInfo.setKernelObjsForAuxTranslation(kernelObjsForAuxTranslation); BuiltinOpParams builtinOpParamsToAux; builtinOpParamsToAux.auxTranslationDirection = AuxTranslationDirection::NonAuxToAux; BuiltinOpParams builtinOpParamsToNonAux; builtinOpParamsToNonAux.auxTranslationDirection = AuxTranslationDirection::AuxToNonAux; std::unique_ptr buffer = nullptr; std::unique_ptr gfxAllocation = nullptr; if (kernelObjType == MockKernelObjForAuxTranslation::Type::MEM_OBJ) { cl_int retVal = CL_SUCCESS; buffer.reset(Buffer::create(pContext, 0, MemoryConstants::pageSize, nullptr, retVal)); kernelObjsForAuxTranslation.insert({KernelObjForAuxTranslation::Type::MEM_OBJ, buffer.get()}); } else { gfxAllocation.reset(new MockGraphicsAllocation(nullptr, MemoryConstants::pageSize)); kernelObjsForAuxTranslation.insert({KernelObjForAuxTranslation::Type::GFX_ALLOC, gfxAllocation.get()}); } mockAuxBuiltInOp.buildDispatchInfosForAuxTranslation(multiDispatchInfo, builtinOpParamsToAux); mockAuxBuiltInOp.buildDispatchInfosForAuxTranslation(multiDispatchInfo, builtinOpParamsToNonAux); { // read args auto argNum = 0; auto expectedMocs = pDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED); auto sshBase = mockAuxBuiltInOp.convertToAuxKernel[0]->getSurfaceStateHeap(); auto sshOffset = mockAuxBuiltInOp.convertToAuxKernel[0]->getKernelInfo().getArgDescriptorAt(argNum).as().bindful; auto surfaceState = reinterpret_cast(ptrOffset(sshBase, sshOffset)); EXPECT_EQ(expectedMocs, surfaceState->getMemoryObjectControlState()); sshBase = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getSurfaceStateHeap(); sshOffset = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getKernelInfo().getArgDescriptorAt(argNum).as().bindful; surfaceState = reinterpret_cast(ptrOffset(sshBase, sshOffset)); EXPECT_EQ(expectedMocs, surfaceState->getMemoryObjectControlState()); } { // write args auto argNum = 1; auto expectedMocs = pDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); auto sshBase = mockAuxBuiltInOp.convertToAuxKernel[0]->getSurfaceStateHeap(); auto sshOffset = mockAuxBuiltInOp.convertToAuxKernel[0]->getKernelInfo().getArgDescriptorAt(argNum).as().bindful; auto surfaceState = reinterpret_cast(ptrOffset(sshBase, sshOffset)); EXPECT_EQ(expectedMocs, surfaceState->getMemoryObjectControlState()); sshBase = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getSurfaceStateHeap(); sshOffset = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getKernelInfo().getArgDescriptorAt(argNum).as().bindful; surfaceState = reinterpret_cast(ptrOffset(sshBase, sshOffset)); EXPECT_EQ(expectedMocs, surfaceState->getMemoryObjectControlState()); } } HWTEST2_P(AuxBuiltInTests, givenAuxToNonAuxTranslationWhenSettingSurfaceStateThenSetValidAuxMode, AuxBuiltinsMatcher) { const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(defaultHwInfo->platform.eProductFamily); if (compilerHwInfoConfig.isForceToStatelessRequired()) { GTEST_SKIP(); } using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; MockAuxBuilInOp mockAuxBuiltInOp(*pBuiltIns, *pClDevice); MultiDispatchInfo multiDispatchInfo; KernelObjsForAuxTranslation kernelObjsForAuxTranslation; multiDispatchInfo.setKernelObjsForAuxTranslation(kernelObjsForAuxTranslation); BuiltinOpParams builtinOpParams; builtinOpParams.auxTranslationDirection = AuxTranslationDirection::AuxToNonAux; std::unique_ptr buffer = nullptr; std::unique_ptr gfxAllocation = nullptr; auto gmm = std::unique_ptr(new Gmm(pDevice->getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); gmm->isCompressionEnabled = true; if (kernelObjType == MockKernelObjForAuxTranslation::Type::MEM_OBJ) { cl_int retVal = CL_SUCCESS; buffer.reset(Buffer::create(pContext, 0, MemoryConstants::pageSize, nullptr, retVal)); buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->setDefaultGmm(gmm.release()); kernelObjsForAuxTranslation.insert({KernelObjForAuxTranslation::Type::MEM_OBJ, buffer.get()}); } else { gfxAllocation.reset(new MockGraphicsAllocation(nullptr, MemoryConstants::pageSize)); gfxAllocation->setDefaultGmm(gmm.get()); kernelObjsForAuxTranslation.insert({KernelObjForAuxTranslation::Type::GFX_ALLOC, gfxAllocation.get()}); } mockAuxBuiltInOp.buildDispatchInfosForAuxTranslation(multiDispatchInfo, builtinOpParams); { // read arg auto argNum = 0; auto sshBase = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getSurfaceStateHeap(); auto sshOffset = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getKernelInfo().getArgDescriptorAt(argNum).as().bindful; auto surfaceState = reinterpret_cast(ptrOffset(sshBase, sshOffset)); EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E, surfaceState->getAuxiliarySurfaceMode()); } { // write arg auto argNum = 1; auto sshBase = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getSurfaceStateHeap(); auto sshOffset = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getKernelInfo().getArgDescriptorAt(argNum).as().bindful; auto surfaceState = reinterpret_cast(ptrOffset(sshBase, sshOffset)); EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE, surfaceState->getAuxiliarySurfaceMode()); } } HWTEST2_P(AuxBuiltInTests, givenNonAuxToAuxTranslationWhenSettingSurfaceStateThenSetValidAuxMode, AuxBuiltinsMatcher) { const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(defaultHwInfo->platform.eProductFamily); if (compilerHwInfoConfig.isForceToStatelessRequired()) { GTEST_SKIP(); } using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; MockAuxBuilInOp mockAuxBuiltInOp(*pBuiltIns, *pClDevice); MultiDispatchInfo multiDispatchInfo; KernelObjsForAuxTranslation kernelObjsForAuxTranslation; multiDispatchInfo.setKernelObjsForAuxTranslation(kernelObjsForAuxTranslation); BuiltinOpParams builtinOpParams; builtinOpParams.auxTranslationDirection = AuxTranslationDirection::NonAuxToAux; MockKernelObjForAuxTranslation mockKernelObjForAuxTranslation(kernelObjType); auto gmm = std::make_unique(pDevice->getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, StorageInfo{}, true); gmm->isCompressionEnabled = true; if (kernelObjType == MockKernelObjForAuxTranslation::Type::MEM_OBJ) { mockKernelObjForAuxTranslation.mockBuffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->setDefaultGmm(gmm.release()); } else { mockKernelObjForAuxTranslation.mockGraphicsAllocation->setDefaultGmm(gmm.get()); } kernelObjsForAuxTranslation.insert(mockKernelObjForAuxTranslation); mockAuxBuiltInOp.buildDispatchInfosForAuxTranslation(multiDispatchInfo, builtinOpParams); { // read arg auto argNum = 0; auto sshBase = mockAuxBuiltInOp.convertToAuxKernel[0]->getSurfaceStateHeap(); auto sshOffset = mockAuxBuiltInOp.convertToAuxKernel[0]->getKernelInfo().getArgDescriptorAt(argNum).as().bindful; auto surfaceState = reinterpret_cast(ptrOffset(sshBase, sshOffset)); EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE, surfaceState->getAuxiliarySurfaceMode()); } { // write arg auto argNum = 1; auto sshBase = mockAuxBuiltInOp.convertToAuxKernel[0]->getSurfaceStateHeap(); auto sshOffset = mockAuxBuiltInOp.convertToAuxKernel[0]->getKernelInfo().getArgDescriptorAt(argNum).as().bindful; auto surfaceState = reinterpret_cast(ptrOffset(sshBase, sshOffset)); EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E, surfaceState->getAuxiliarySurfaceMode()); } } TEST_F(BuiltInTests, GivenCopyBufferToBufferWhenDispatchInfoIsCreatedThenSizeIsAlignedToCachLineSize) { BuiltinDispatchInfoBuilder &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, *pClDevice); AlignedBuffer src; AlignedBuffer dst; BuiltinOpParams builtinOpsParams; builtinOpsParams.srcMemObj = &src; builtinOpsParams.dstMemObj = &dst; builtinOpsParams.size = {src.getSize(), 0, 0}; MultiDispatchInfo multiDispatchInfo(builtinOpsParams); ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo)); EXPECT_EQ(1u, multiDispatchInfo.size()); const DispatchInfo *dispatchInfo = multiDispatchInfo.begin(); EXPECT_EQ(1u, dispatchInfo->getDim()); size_t leftSize = reinterpret_cast(dst.getCpuAddress()) % MemoryConstants::cacheLineSize; EXPECT_EQ(0u, leftSize); size_t rightSize = (reinterpret_cast(dst.getCpuAddress()) + dst.getSize()) % MemoryConstants::cacheLineSize; EXPECT_EQ(0u, rightSize); size_t middleElSize = sizeof(uint32_t) * 4; size_t middleSize = dst.getSize() / middleElSize; EXPECT_EQ(Vec3(middleSize, 1, 1), dispatchInfo->getGWS()); EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), builtinOpsParams)); } TEST_F(BuiltInTests, givenBigOffsetAndSizeWhenBuilderCopyBufferToBufferStatelessIsUsedThenParamsAreCorrect) { if (is32bit) { GTEST_SKIP(); } BuiltinDispatchInfoBuilder &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBufferStateless, *pClDevice); uint64_t bigSize = 10ull * MemoryConstants::gigaByte; uint64_t bigOffset = 4ull * MemoryConstants::gigaByte; uint64_t size = 4ull * MemoryConstants::gigaByte; MockBuffer srcBuffer; srcBuffer.size = static_cast(bigSize); MockBuffer dstBuffer; dstBuffer.size = static_cast(bigSize); BuiltinOpParams builtinOpsParams; builtinOpsParams.srcMemObj = &srcBuffer; builtinOpsParams.srcOffset = {static_cast(bigOffset), 0, 0}; builtinOpsParams.dstMemObj = &dstBuffer; builtinOpsParams.dstOffset = {0, 0, 0}; builtinOpsParams.size = {static_cast(size), 0, 0}; MultiDispatchInfo multiDispatchInfo(builtinOpsParams); ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo)); EXPECT_EQ(1u, multiDispatchInfo.size()); EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), builtinOpsParams)); } TEST_F(BuiltInTests, givenBigOffsetAndSizeWhenBuilderCopyBufferToBufferRectStatelessIsUsedThenParamsAreCorrect) { if (is32bit) { GTEST_SKIP(); } BuiltinDispatchInfoBuilder &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferRectStateless, *pClDevice); uint64_t bigSize = 10ull * MemoryConstants::gigaByte; uint64_t bigOffset = 4ull * MemoryConstants::gigaByte; uint64_t size = 4ull * MemoryConstants::gigaByte; MockBuffer srcBuffer; srcBuffer.size = static_cast(bigSize); MockBuffer dstBuffer; dstBuffer.size = static_cast(bigSize); BuiltinOpParams dc; dc.srcMemObj = &srcBuffer; dc.dstMemObj = &dstBuffer; dc.srcOffset = {static_cast(bigOffset), 0, 0}; dc.dstOffset = {0, 0, 0}; dc.size = {static_cast(size), 1, 1}; dc.srcRowPitch = static_cast(size); dc.srcSlicePitch = 0; dc.dstRowPitch = static_cast(size); dc.dstSlicePitch = 0; MultiDispatchInfo multiDispatchInfo(dc); ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo)); EXPECT_EQ(1u, multiDispatchInfo.size()); EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), dc)); } TEST_F(BuiltInTests, givenBigOffsetAndSizeWhenBuilderFillBufferStatelessIsUsedThenParamsAreCorrect) { if (is32bit) { GTEST_SKIP(); } BuiltinDispatchInfoBuilder &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::FillBufferStateless, *pClDevice); uint64_t bigSize = 10ull * MemoryConstants::gigaByte; uint64_t bigOffset = 4ull * MemoryConstants::gigaByte; uint64_t size = 4ull * MemoryConstants::gigaByte; MockBuffer srcBuffer; srcBuffer.size = static_cast(bigSize); MockBuffer dstBuffer; dstBuffer.size = static_cast(bigSize); BuiltinOpParams dc; dc.srcMemObj = &srcBuffer; dc.dstMemObj = &dstBuffer; dc.dstOffset = {static_cast(bigOffset), 0, 0}; dc.size = {static_cast(size), 0, 0}; MultiDispatchInfo multiDispatchInfo(dc); ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo)); EXPECT_EQ(1u, multiDispatchInfo.size()); EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), dc)); } HWTEST_F(BuiltInTests, givenBigOffsetAndSizeWhenBuilderCopyBufferToImageStatelessIsUsedThenParamsAreCorrect) { REQUIRE_64BIT_OR_SKIP(); REQUIRE_IMAGES_OR_SKIP(defaultHwInfo); uint64_t bigSize = 10ull * MemoryConstants::gigaByte; uint64_t bigOffset = 4ull * MemoryConstants::gigaByte; MockBuffer srcBuffer; srcBuffer.size = static_cast(bigSize); std ::unique_ptr pDstImage(Image2dHelper<>::create(pContext)); ASSERT_NE(nullptr, pDstImage.get()); auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToImage3dStateless, *pClDevice); BuiltinOpParams dc; dc.srcPtr = &srcBuffer; dc.dstMemObj = pDstImage.get(); dc.srcOffset = {static_cast(bigOffset), 0, 0}; dc.dstOffset = {0, 0, 0}; dc.size = {1, 1, 1}; dc.dstRowPitch = 0; dc.dstSlicePitch = 0; MultiDispatchInfo multiDispatchInfo(dc); ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo)); EXPECT_EQ(1u, multiDispatchInfo.size()); EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), dc)); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb()); EXPECT_FALSE(kernel->getKernelInfo().getArgDescriptorAt(0).as().isPureStateful()); } HWTEST_F(BuiltInTests, givenBigOffsetAndSizeWhenBuilderCopyImageToBufferStatelessIsUsedThenParamsAreCorrect) { if (is32bit) { GTEST_SKIP(); } uint64_t bigSize = 10ull * MemoryConstants::gigaByte; uint64_t bigOffset = 4ull * MemoryConstants::gigaByte; MockBuffer dstBuffer; dstBuffer.size = static_cast(bigSize); std ::unique_ptr pSrcImage(Image2dHelper<>::create(pContext)); ASSERT_NE(nullptr, pSrcImage.get()); auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyImage3dToBufferStateless, *pClDevice); BuiltinOpParams dc; dc.srcMemObj = pSrcImage.get(); dc.dstMemObj = &dstBuffer; dc.srcOffset = {0, 0, 0}; dc.dstOffset = {static_cast(bigOffset), 0, 0}; dc.size = {1, 1, 1}; MultiDispatchInfo multiDispatchInfo(dc); ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo)); EXPECT_EQ(1u, multiDispatchInfo.size()); EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), dc)); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb()); } TEST_F(BuiltInTests, GivenUnalignedCopyBufferToBufferWhenDispatchInfoIsCreatedThenParamsAreCorrect) { BuiltinDispatchInfoBuilder &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, *pClDevice); AlignedBuffer src; AlignedBuffer dst; BuiltinOpParams builtinOpsParams; builtinOpsParams.srcMemObj = &src; builtinOpsParams.srcOffset.x = 5; // causes misalignment from 4-byte boundary by 1 byte (8 bits) builtinOpsParams.dstMemObj = &dst; builtinOpsParams.size = {src.getSize(), 0, 0}; MultiDispatchInfo multiDispatchInfo(builtinOpsParams); ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo)); EXPECT_EQ(1u, multiDispatchInfo.size()); const Kernel *kernel = multiDispatchInfo.begin()->getKernel(); EXPECT_EQ(kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName, "CopyBufferToBufferMiddleMisaligned"); const auto crossThreadData = kernel->getCrossThreadData(); const auto crossThreadOffset = kernel->getKernelInfo().getArgDescriptorAt(4).as().elements[0].offset; EXPECT_EQ(8u, *reinterpret_cast(ptrOffset(crossThreadData, crossThreadOffset))); EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), builtinOpsParams)); } TEST_F(BuiltInTests, GivenReadBufferAlignedWhenDispatchInfoIsCreatedThenParamsAreCorrect) { BuiltinDispatchInfoBuilder &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, *pClDevice); AlignedBuffer srcMemObj; auto size = 10 * MemoryConstants::cacheLineSize; auto dstPtr = alignedMalloc(size, MemoryConstants::cacheLineSize); BuiltinOpParams builtinOpsParams; builtinOpsParams.srcMemObj = &srcMemObj; builtinOpsParams.dstPtr = dstPtr; builtinOpsParams.size = {size, 0, 0}; MultiDispatchInfo multiDispatchInfo(builtinOpsParams); ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo)); EXPECT_EQ(1u, multiDispatchInfo.size()); const DispatchInfo *dispatchInfo = multiDispatchInfo.begin(); EXPECT_EQ(1u, dispatchInfo->getDim()); size_t leftSize = reinterpret_cast(dstPtr) % MemoryConstants::cacheLineSize; EXPECT_EQ(0u, leftSize); size_t rightSize = (reinterpret_cast(dstPtr) + size) % MemoryConstants::cacheLineSize; EXPECT_EQ(0u, rightSize); size_t middleElSize = sizeof(uint32_t) * 4; size_t middleSize = size / middleElSize; EXPECT_EQ(Vec3(middleSize, 1, 1), dispatchInfo->getGWS()); EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), builtinOpsParams)); alignedFree(dstPtr); } TEST_F(BuiltInTests, GivenWriteBufferAlignedWhenDispatchInfoIsCreatedThenParamsAreCorrect) { BuiltinDispatchInfoBuilder &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, *pClDevice); auto size = 10 * MemoryConstants::cacheLineSize; auto srcPtr = alignedMalloc(size, MemoryConstants::cacheLineSize); AlignedBuffer dstMemObj; BuiltinOpParams builtinOpsParams; builtinOpsParams.srcPtr = srcPtr; builtinOpsParams.dstMemObj = &dstMemObj; builtinOpsParams.size = {size, 0, 0}; MultiDispatchInfo multiDispatchInfo(builtinOpsParams); ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo)); EXPECT_EQ(1u, multiDispatchInfo.size()); const DispatchInfo *dispatchInfo = multiDispatchInfo.begin(); EXPECT_EQ(1u, dispatchInfo->getDim()); size_t leftSize = reinterpret_cast(srcPtr) % MemoryConstants::cacheLineSize; EXPECT_EQ(0u, leftSize); size_t rightSize = (reinterpret_cast(srcPtr) + size) % MemoryConstants::cacheLineSize; EXPECT_EQ(0u, rightSize); size_t middleElSize = sizeof(uint32_t) * 4; size_t middleSize = size / middleElSize; EXPECT_EQ(Vec3(middleSize, 1, 1), dispatchInfo->getGWS()); EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), builtinOpsParams)); alignedFree(srcPtr); } TEST_F(BuiltInTests, WhenGettingBuilderInfoTwiceThenPointerIsSame) { BuiltinDispatchInfoBuilder &builder1 = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, *pClDevice); BuiltinDispatchInfoBuilder &builder2 = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, *pClDevice); EXPECT_EQ(&builder1, &builder2); } TEST_F(BuiltInTests, GivenUnknownBuiltInOpWhenGettingBuilderInfoThenExceptionThrown) { bool caughtException = false; try { BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::COUNT, *pClDevice); } catch (const std::runtime_error &) { caughtException = true; } EXPECT_TRUE(caughtException); } TEST_F(BuiltInTests, GivenUnsupportedBuildTypeWhenBuildingDispatchInfoThenFalseIsReturned) { auto &builtIns = *pDevice->getBuiltIns(); BuiltinDispatchInfoBuilder dispatchInfoBuilder{builtIns, *pClDevice}; BuiltinOpParams params; MultiDispatchInfo multiDispatchInfo(params); auto ret = dispatchInfoBuilder.buildDispatchInfos(multiDispatchInfo); EXPECT_FALSE(ret); ASSERT_EQ(0U, multiDispatchInfo.size()); ret = dispatchInfoBuilder.buildDispatchInfos(multiDispatchInfo, nullptr, 0, Vec3{0, 0, 0}, Vec3{0, 0, 0}, Vec3{0, 0, 0}); EXPECT_FALSE(ret); EXPECT_EQ(0U, multiDispatchInfo.size()); } TEST_F(BuiltInTests, GivenDefaultBuiltinDispatchInfoBuilderWhenValidateDispatchIsCalledThenClSuccessIsReturned) { auto &builtIns = *pDevice->getBuiltIns(); BuiltinDispatchInfoBuilder dispatchInfoBuilder{builtIns, *pClDevice}; auto ret = dispatchInfoBuilder.validateDispatch(nullptr, 1, Vec3{0, 0, 0}, Vec3{0, 0, 0}, Vec3{0, 0, 0}); EXPECT_EQ(CL_SUCCESS, ret); } TEST_F(BuiltInTests, WhenSettingExplictArgThenTrueIsReturned) { auto &builtIns = *pDevice->getBuiltIns(); BuiltinDispatchInfoBuilder dispatchInfoBuilder{builtIns, *pClDevice}; MultiDispatchInfo multiDispatchInfo; BuiltinOpParams params; cl_int err; auto ret = dispatchInfoBuilder.setExplicitArg(1, 5, nullptr, err); EXPECT_TRUE(ret); } TEST_F(VmeBuiltInTests, GivenVmeBuilderWhenGettingDispatchInfoThenValidPointerIsReturned) { overwriteBuiltInBinaryName("media_kernels_backend"); EBuiltInOps::Type vmeOps[] = {EBuiltInOps::VmeBlockMotionEstimateIntel, EBuiltInOps::VmeBlockAdvancedMotionEstimateCheckIntel, EBuiltInOps::VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel}; for (auto op : vmeOps) { BuiltinDispatchInfoBuilder &builder = Vme::getBuiltinDispatchInfoBuilder(op, *pClDevice); EXPECT_NE(nullptr, &builder); } restoreBuiltInBinaryName(); } TEST_F(VmeBuiltInTests, givenInvalidBuiltInOpWhenGetVmeBuilderInfoThenExceptionIsThrown) { EXPECT_THROW(Vme::getBuiltinDispatchInfoBuilder(EBuiltInOps::COUNT, *pClDevice), std::exception); } TEST_F(VmeBuiltInTests, GivenVmeBuilderAndInvalidParamsWhenGettingDispatchInfoThenEmptyKernelIsReturned) { overwriteBuiltInBinaryName("media_kernels_backend"); EBuiltInOps::Type vmeOps[] = {EBuiltInOps::VmeBlockMotionEstimateIntel, EBuiltInOps::VmeBlockAdvancedMotionEstimateCheckIntel, EBuiltInOps::VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel}; for (auto op : vmeOps) { BuiltinDispatchInfoBuilder &builder = Vme::getBuiltinDispatchInfoBuilder(op, *pClDevice); MultiDispatchInfo outMdi; Vec3 gws{352, 288, 0}; Vec3 elws{0, 0, 0}; Vec3 offset{0, 0, 0}; auto ret = builder.buildDispatchInfos(outMdi, nullptr, 0, gws, elws, offset); EXPECT_FALSE(ret); EXPECT_EQ(0U, outMdi.size()); } restoreBuiltInBinaryName(); } TEST_F(VmeBuiltInTests, GivenVmeBuilderWhenGettingDispatchInfoThenParamsAreCorrect) { MockKernelWithInternals mockKernel{*pClDevice}; mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 16; mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] = 16; mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1] = 0; mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2] = 0; overwriteBuiltInBinaryName("media_kernels_backend"); BuiltinDispatchInfoBuilder &builder = Vme::getBuiltinDispatchInfoBuilder(EBuiltInOps::VmeBlockMotionEstimateIntel, *pClDevice); restoreBuiltInBinaryName(); MultiDispatchInfo outMdi; Vec3 gws{352, 288, 0}; Vec3 elws{0, 0, 0}; Vec3 offset{16, 0, 0}; MockBuffer mb; cl_mem bufferArg = static_cast(&mb); cl_int err; constexpr uint32_t bufferArgNum = 3; bool ret = builder.setExplicitArg(bufferArgNum, sizeof(cl_mem), &bufferArg, err); EXPECT_FALSE(ret); EXPECT_EQ(CL_SUCCESS, err); ret = builder.buildDispatchInfos(outMdi, mockKernel.mockKernel, 0, gws, elws, offset); EXPECT_TRUE(ret); EXPECT_EQ(1U, outMdi.size()); auto outDi = outMdi.begin(); EXPECT_EQ(Vec3(352, 1, 1), outDi->getGWS()); EXPECT_EQ(Vec3(16, 1, 1), outDi->getEnqueuedWorkgroupSize()); EXPECT_EQ(Vec3(16, 0, 0), outDi->getOffset()); EXPECT_NE(mockKernel.mockKernel, outDi->getKernel()); EXPECT_EQ(bufferArg, outDi->getKernel()->getKernelArg(bufferArgNum)); constexpr uint32_t vmeImplicitArgsBase = 6; constexpr uint32_t vmeImplicitArgs = 3; ASSERT_EQ(vmeImplicitArgsBase + vmeImplicitArgs, outDi->getKernel()->getKernelInfo().kernelDescriptor.payloadMappings.explicitArgs.size()); uint32_t vmeExtraArgsExpectedVals[] = {18, 22, 18}; // height, width, stride for (uint32_t i = 0; i < vmeImplicitArgs; ++i) { auto &argAsVal = outDi->getKernel()->getKernelInfo().getArgDescriptorAt(vmeImplicitArgsBase + i).as(); EXPECT_EQ(vmeExtraArgsExpectedVals[i], *((uint32_t *)(outDi->getKernel()->getCrossThreadData() + argAsVal.elements[0].offset))); } } TEST_F(VmeBuiltInTests, GivenAdvancedVmeBuilderWhenGettingDispatchInfoThenParamsAreCorrect) { MockKernelWithInternals mockKernel{*pClDevice}; mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 16; mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] = 16; mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1] = 0; mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2] = 0; Vec3 gws{352, 288, 0}; Vec3 elws{0, 0, 0}; Vec3 offset{0, 0, 0}; cl_int err; constexpr uint32_t bufferArgNum = 7; MockBuffer mb; cl_mem bufferArg = static_cast(&mb); constexpr uint32_t srcImageArgNum = 1; auto image = std::unique_ptr(Image2dHelper<>::create(pContext)); cl_mem srcImageArg = static_cast(image.get()); EBuiltInOps::Type vmeOps[] = {EBuiltInOps::VmeBlockAdvancedMotionEstimateCheckIntel, EBuiltInOps::VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel}; for (auto op : vmeOps) { MultiDispatchInfo outMdi; overwriteBuiltInBinaryName("media_kernels_backend"); BuiltinDispatchInfoBuilder &builder = Vme::getBuiltinDispatchInfoBuilder(op, *pClDevice); restoreBuiltInBinaryName(); bool ret = builder.setExplicitArg(srcImageArgNum, sizeof(cl_mem), &srcImageArg, err); EXPECT_FALSE(ret); EXPECT_EQ(CL_SUCCESS, err); ret = builder.setExplicitArg(bufferArgNum, sizeof(cl_mem), &bufferArg, err); EXPECT_FALSE(ret); EXPECT_EQ(CL_SUCCESS, err); ret = builder.buildDispatchInfos(outMdi, mockKernel.mockKernel, 0, gws, elws, offset); EXPECT_TRUE(ret); EXPECT_EQ(1U, outMdi.size()); auto outDi = outMdi.begin(); EXPECT_EQ(Vec3(352, 1, 1), outDi->getGWS()); EXPECT_EQ(Vec3(16, 1, 1), outDi->getEnqueuedWorkgroupSize()); EXPECT_NE(mockKernel.mockKernel, outDi->getKernel()); EXPECT_EQ(srcImageArg, outDi->getKernel()->getKernelArg(srcImageArgNum)); uint32_t vmeImplicitArgsBase = outDi->getKernel()->getKernelInfo().getArgNumByName("intraSrcImg"); uint32_t vmeImplicitArgs = 4; ASSERT_EQ(vmeImplicitArgsBase + vmeImplicitArgs, outDi->getKernel()->getKernelInfo().getExplicitArgs().size()); EXPECT_EQ(srcImageArg, outDi->getKernel()->getKernelArg(vmeImplicitArgsBase)); ++vmeImplicitArgsBase; --vmeImplicitArgs; uint32_t vmeExtraArgsExpectedVals[] = {18, 22, 18}; // height, width, stride for (uint32_t i = 0; i < vmeImplicitArgs; ++i) { auto &argAsVal = outDi->getKernel()->getKernelInfo().getArgDescriptorAt(vmeImplicitArgsBase + i).as(); EXPECT_EQ(vmeExtraArgsExpectedVals[i], *((uint32_t *)(outDi->getKernel()->getCrossThreadData() + argAsVal.elements[0].offset))); } } } TEST_F(VmeBuiltInTests, WhenGettingBuiltinAsStringThenCorrectStringIsReturned) { EXPECT_EQ(0, strcmp("aux_translation.builtin_kernel", getBuiltinAsString(EBuiltInOps::AuxTranslation))); EXPECT_EQ(0, strcmp("copy_buffer_to_buffer.builtin_kernel", getBuiltinAsString(EBuiltInOps::CopyBufferToBuffer))); EXPECT_EQ(0, strcmp("copy_buffer_rect.builtin_kernel", getBuiltinAsString(EBuiltInOps::CopyBufferRect))); EXPECT_EQ(0, strcmp("fill_buffer.builtin_kernel", getBuiltinAsString(EBuiltInOps::FillBuffer))); EXPECT_EQ(0, strcmp("copy_buffer_to_image3d.builtin_kernel", getBuiltinAsString(EBuiltInOps::CopyBufferToImage3d))); EXPECT_EQ(0, strcmp("copy_image3d_to_buffer.builtin_kernel", getBuiltinAsString(EBuiltInOps::CopyImage3dToBuffer))); EXPECT_EQ(0, strcmp("copy_image_to_image1d.builtin_kernel", getBuiltinAsString(EBuiltInOps::CopyImageToImage1d))); EXPECT_EQ(0, strcmp("copy_image_to_image2d.builtin_kernel", getBuiltinAsString(EBuiltInOps::CopyImageToImage2d))); EXPECT_EQ(0, strcmp("copy_image_to_image3d.builtin_kernel", getBuiltinAsString(EBuiltInOps::CopyImageToImage3d))); EXPECT_EQ(0, strcmp("copy_kernel_timestamps.builtin_kernel", getBuiltinAsString(EBuiltInOps::QueryKernelTimestamps))); EXPECT_EQ(0, strcmp("fill_image1d.builtin_kernel", getBuiltinAsString(EBuiltInOps::FillImage1d))); EXPECT_EQ(0, strcmp("fill_image2d.builtin_kernel", getBuiltinAsString(EBuiltInOps::FillImage2d))); EXPECT_EQ(0, strcmp("fill_image3d.builtin_kernel", getBuiltinAsString(EBuiltInOps::FillImage3d))); EXPECT_EQ(0, strcmp("vme_block_motion_estimate_intel.builtin_kernel", getBuiltinAsString(EBuiltInOps::VmeBlockMotionEstimateIntel))); EXPECT_EQ(0, strcmp("vme_block_advanced_motion_estimate_check_intel.builtin_kernel", getBuiltinAsString(EBuiltInOps::VmeBlockAdvancedMotionEstimateCheckIntel))); EXPECT_EQ(0, strcmp("vme_block_advanced_motion_estimate_bidirectional_check_intel", getBuiltinAsString(EBuiltInOps::VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel))); EXPECT_EQ(0, strcmp("unknown", getBuiltinAsString(EBuiltInOps::COUNT))); } TEST_F(BuiltInTests, WhenUnknownOperationIsSpecifiedThenUnknownNameIsReturned) { EXPECT_EQ(0, strcmp("unknown", getUnknownBuiltinAsString(EBuiltInOps::CopyImage3dToBuffer))); EXPECT_EQ(0, strcmp("unknown", getUnknownBuiltinAsString(EBuiltInOps::COUNT))); } TEST_F(BuiltInTests, GivenEncodeTypeWhenGettingExtensionThenCorrectStringIsReturned) { EXPECT_EQ(0, strcmp("", BuiltinCode::getExtension(BuiltinCode::ECodeType::Any))); EXPECT_EQ(0, strcmp(".bin", BuiltinCode::getExtension(BuiltinCode::ECodeType::Binary))); EXPECT_EQ(0, strcmp(".bc", BuiltinCode::getExtension(BuiltinCode::ECodeType::Intermediate))); EXPECT_EQ(0, strcmp(".cl", BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))); EXPECT_EQ(0, strcmp("", BuiltinCode::getExtension(BuiltinCode::ECodeType::COUNT))); EXPECT_EQ(0, strcmp("", BuiltinCode::getExtension(BuiltinCode::ECodeType::INVALID))); } TEST_F(BuiltInTests, GivenBuiltinResourceWhenCreatingBuiltinResourceThenSizesAreEqual) { std::string resource = "__kernel"; auto br1 = createBuiltinResource(resource.data(), resource.size()); EXPECT_NE(0u, br1.size()); auto br2 = createBuiltinResource(br1); EXPECT_NE(0u, br2.size()); EXPECT_EQ(br1, br2); } TEST_F(BuiltInTests, WhenCreatingBuiltinResourceNameThenCorrectStringIsReturned) { EBuiltInOps::Type builtin = EBuiltInOps::CopyBufferToBuffer; const std::string extension = ".cl"; const std::string platformName = "skl"; const uint32_t deviceRevId = 9; std::string resourceNameGeneric = createBuiltinResourceName(builtin, extension); std::string resourceNameForPlatform = createBuiltinResourceName(builtin, extension, platformName); std::string resourceNameForPlatformAndStepping = createBuiltinResourceName(builtin, extension, platformName, deviceRevId); EXPECT_EQ(0, strcmp("copy_buffer_to_buffer.builtin_kernel.cl", resourceNameGeneric.c_str())); EXPECT_EQ(0, strcmp("skl_0_copy_buffer_to_buffer.builtin_kernel.cl", resourceNameForPlatform.c_str())); EXPECT_EQ(0, strcmp("skl_9_copy_buffer_to_buffer.builtin_kernel.cl", resourceNameForPlatformAndStepping.c_str())); } TEST_F(BuiltInTests, WhenJoiningPathThenPathsAreJoinedWithCorrectSeparator) { std::string resourceName = "copy_buffer_to_buffer.builtin_kernel.cl"; std::string resourcePath = "path"; EXPECT_EQ(0, strcmp(resourceName.c_str(), joinPath("", resourceName).c_str())); EXPECT_EQ(0, strcmp(resourcePath.c_str(), joinPath(resourcePath, "").c_str())); EXPECT_EQ(0, strcmp((resourcePath + PATH_SEPARATOR + resourceName).c_str(), joinPath(resourcePath + PATH_SEPARATOR, resourceName).c_str())); EXPECT_EQ(0, strcmp((resourcePath + PATH_SEPARATOR + resourceName).c_str(), joinPath(resourcePath, resourceName).c_str())); } TEST_F(BuiltInTests, GivenFileNameWhenGettingKernelFromEmbeddedStorageRegistryThenValidPtrIsReturnedForExisitngKernels) { EmbeddedStorageRegistry storageRegistry; std::string resource = "__kernel"; storageRegistry.store("kernel.cl", createBuiltinResource(resource.data(), resource.size() + 1)); const BuiltinResourceT *br = storageRegistry.get("kernel.cl"); EXPECT_NE(nullptr, br); EXPECT_EQ(0, strcmp(resource.data(), br->data())); const BuiltinResourceT *bnr = storageRegistry.get("unknown.cl"); EXPECT_EQ(nullptr, bnr); } TEST_F(BuiltInTests, WhenStoringRootPathThenPathIsSavedCorrectly) { class MockStorage : Storage { public: MockStorage(const std::string &rootPath) : Storage(rootPath){}; std::string &getRootPath() { return Storage::rootPath; } protected: BuiltinResourceT loadImpl(const std::string &fullResourceName) override { BuiltinResourceT ret; return ret; } }; const std::string rootPath("root"); MockStorage mockStorage(rootPath); EXPECT_EQ(0, strcmp(rootPath.data(), mockStorage.getRootPath().data())); } TEST_F(BuiltInTests, GivenFiledNameWhenLoadingImplKernelFromEmbeddedStorageRegistryThenValidPtrIsReturnedForExisitngKernels) { class MockEmbeddedStorage : EmbeddedStorage { public: MockEmbeddedStorage(const std::string &rootPath) : EmbeddedStorage(rootPath){}; BuiltinResourceT loadImpl(const std::string &fullResourceName) override { return EmbeddedStorage::loadImpl(fullResourceName); } }; MockEmbeddedStorage mockEmbeddedStorage("root"); BuiltinResourceT br = mockEmbeddedStorage.loadImpl("copy_buffer_to_buffer.builtin_kernel.cl"); EXPECT_NE(0u, br.size()); BuiltinResourceT bnr = mockEmbeddedStorage.loadImpl("unknown.cl"); EXPECT_EQ(0u, bnr.size()); } TEST_F(BuiltInTests, GivenFiledNameWhenLoadingImplKernelFromFileStorageThenValidPtrIsReturnedForExisitngKernels) { class MockFileStorage : FileStorage { public: MockFileStorage(const std::string &rootPath) : FileStorage(rootPath){}; BuiltinResourceT loadImpl(const std::string &fullResourceName) override { return FileStorage::loadImpl(fullResourceName); } }; MockFileStorage mockEmbeddedStorage("root"); BuiltinResourceT br = mockEmbeddedStorage.loadImpl("test_files/copybuffer.cl"); EXPECT_NE(0u, br.size()); BuiltinResourceT bnr = mockEmbeddedStorage.loadImpl("unknown.cl"); EXPECT_EQ(0u, bnr.size()); } TEST_F(BuiltInTests, WhenBuiltinsLibIsCreatedThenAllStoragesSizeIsTwo) { auto mockBuiltinsLib = std::unique_ptr(new MockBuiltinsLib()); EXPECT_EQ(2u, mockBuiltinsLib->allStorages.size()); } TEST_F(BuiltInTests, GivenTypeAnyWhenGettingBuiltinCodeThenCorrectBuiltinReturned) { auto builtinsLib = std::unique_ptr(new BuiltinsLib()); BuiltinCode code = builtinsLib->getBuiltinCode(EBuiltInOps::CopyBufferToBuffer, BuiltinCode::ECodeType::Any, *pDevice); EXPECT_EQ(BuiltinCode::ECodeType::Binary, code.type); EXPECT_NE(0u, code.resource.size()); EXPECT_EQ(pDevice, code.targetDevice); } TEST_F(BuiltInTests, GivenTypeBinaryWhenGettingBuiltinCodeThenCorrectBuiltinReturned) { auto builtinsLib = std::unique_ptr(new BuiltinsLib()); BuiltinCode code = builtinsLib->getBuiltinCode(EBuiltInOps::CopyBufferToBuffer, BuiltinCode::ECodeType::Binary, *pDevice); EXPECT_EQ(BuiltinCode::ECodeType::Binary, code.type); EXPECT_NE(0u, code.resource.size()); EXPECT_EQ(pDevice, code.targetDevice); } TEST_F(BuiltInTests, GivenTypeIntermediateWhenGettingBuiltinCodeThenCorrectBuiltinReturned) { auto builtinsLib = std::unique_ptr(new BuiltinsLib()); BuiltinCode code = builtinsLib->getBuiltinCode(EBuiltInOps::CopyBufferToBuffer, BuiltinCode::ECodeType::Intermediate, *pDevice); EXPECT_EQ(BuiltinCode::ECodeType::Intermediate, code.type); EXPECT_EQ(0u, code.resource.size()); EXPECT_EQ(pDevice, code.targetDevice); } TEST_F(BuiltInTests, GivenTypeSourceWhenGettingBuiltinCodeThenCorrectBuiltinReturned) { auto builtinsLib = std::unique_ptr(new BuiltinsLib()); BuiltinCode code = builtinsLib->getBuiltinCode(EBuiltInOps::CopyBufferToBuffer, BuiltinCode::ECodeType::Source, *pDevice); EXPECT_EQ(BuiltinCode::ECodeType::Source, code.type); EXPECT_NE(0u, code.resource.size()); EXPECT_EQ(pDevice, code.targetDevice); } TEST_F(BuiltInTests, GivenTypeInvalidWhenGettingBuiltinCodeThenKernelIsEmpty) { auto builtinsLib = std::unique_ptr(new BuiltinsLib()); BuiltinCode code = builtinsLib->getBuiltinCode(EBuiltInOps::CopyBufferToBuffer, BuiltinCode::ECodeType::INVALID, *pDevice); EXPECT_EQ(BuiltinCode::ECodeType::INVALID, code.type); EXPECT_EQ(0u, code.resource.size()); EXPECT_EQ(pDevice, code.targetDevice); } TEST_F(BuiltInTests, GivenBuiltinTypeSourceWhenGettingBuiltinResourceThenResourceSizeIsNonZero) { auto mockBuiltinsLib = std::unique_ptr(new MockBuiltinsLib()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::AuxTranslation, BuiltinCode::ECodeType::Source, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyBufferToBuffer, BuiltinCode::ECodeType::Source, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyBufferRect, BuiltinCode::ECodeType::Source, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::FillBuffer, BuiltinCode::ECodeType::Source, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyBufferToImage3d, BuiltinCode::ECodeType::Source, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyImage3dToBuffer, BuiltinCode::ECodeType::Source, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyImageToImage1d, BuiltinCode::ECodeType::Source, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyImageToImage2d, BuiltinCode::ECodeType::Source, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyImageToImage3d, BuiltinCode::ECodeType::Source, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::FillImage1d, BuiltinCode::ECodeType::Source, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::FillImage2d, BuiltinCode::ECodeType::Source, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::FillImage3d, BuiltinCode::ECodeType::Source, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::VmeBlockMotionEstimateIntel, BuiltinCode::ECodeType::Source, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::VmeBlockAdvancedMotionEstimateCheckIntel, BuiltinCode::ECodeType::Source, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel, BuiltinCode::ECodeType::Source, *pDevice).size()); EXPECT_EQ(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::COUNT, BuiltinCode::ECodeType::Source, *pDevice).size()); } HWCMDTEST_F(IGFX_GEN8_CORE, BuiltInTests, GivenBuiltinTypeBinaryWhenGettingBuiltinResourceThenResourceSizeIsNonZero) { auto mockBuiltinsLib = std::unique_ptr(new MockBuiltinsLib()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyBufferToBuffer, BuiltinCode::ECodeType::Binary, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyBufferRect, BuiltinCode::ECodeType::Binary, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::FillBuffer, BuiltinCode::ECodeType::Binary, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyBufferToImage3d, BuiltinCode::ECodeType::Binary, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyImage3dToBuffer, BuiltinCode::ECodeType::Binary, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyImageToImage1d, BuiltinCode::ECodeType::Binary, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyImageToImage2d, BuiltinCode::ECodeType::Binary, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyImageToImage3d, BuiltinCode::ECodeType::Binary, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::FillImage1d, BuiltinCode::ECodeType::Binary, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::FillImage2d, BuiltinCode::ECodeType::Binary, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::FillImage3d, BuiltinCode::ECodeType::Binary, *pDevice).size()); EXPECT_EQ(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::VmeBlockMotionEstimateIntel, BuiltinCode::ECodeType::Binary, *pDevice).size()); EXPECT_EQ(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::VmeBlockAdvancedMotionEstimateCheckIntel, BuiltinCode::ECodeType::Binary, *pDevice).size()); EXPECT_EQ(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel, BuiltinCode::ECodeType::Binary, *pDevice).size()); EXPECT_EQ(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::COUNT, BuiltinCode::ECodeType::Binary, *pDevice).size()); } TEST_F(BuiltInTests, GivenBuiltinTypeSourceWhenGettingBuiltinResourceForNotRegisteredRevisionThenResourceSizeIsNonZero) { pDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->platform.usRevId += 0xdead; auto mockBuiltinsLib = std::unique_ptr(new MockBuiltinsLib()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyBufferToBuffer, BuiltinCode::ECodeType::Source, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyBufferRect, BuiltinCode::ECodeType::Source, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::FillBuffer, BuiltinCode::ECodeType::Source, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyBufferToImage3d, BuiltinCode::ECodeType::Source, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyImage3dToBuffer, BuiltinCode::ECodeType::Source, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyImageToImage1d, BuiltinCode::ECodeType::Source, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyImageToImage2d, BuiltinCode::ECodeType::Source, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyImageToImage3d, BuiltinCode::ECodeType::Source, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::FillImage1d, BuiltinCode::ECodeType::Source, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::FillImage2d, BuiltinCode::ECodeType::Source, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::FillImage3d, BuiltinCode::ECodeType::Source, *pDevice).size()); EXPECT_EQ(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::COUNT, BuiltinCode::ECodeType::Source, *pDevice).size()); } TEST_F(BuiltInTests, GivenTypeAnyWhenCreatingProgramFromCodeThenValidPointerIsReturned) { auto builtinsLib = std::unique_ptr(new BuiltinsLib()); const BuiltinCode bc = builtinsLib->getBuiltinCode(EBuiltInOps::CopyBufferToBuffer, BuiltinCode::ECodeType::Any, *pDevice); EXPECT_NE(0u, bc.resource.size()); auto program = std::unique_ptr(BuiltinDispatchInfoBuilder::createProgramFromCode(bc, toClDeviceVector(*pClDevice))); EXPECT_NE(nullptr, program.get()); } TEST_F(BuiltInTests, GivenTypeSourceWhenCreatingProgramFromCodeThenValidPointerIsReturned) { auto builtinsLib = std::unique_ptr(new BuiltinsLib()); const BuiltinCode bc = builtinsLib->getBuiltinCode(EBuiltInOps::CopyBufferToBuffer, BuiltinCode::ECodeType::Source, *pDevice); EXPECT_NE(0u, bc.resource.size()); auto program = std::unique_ptr(BuiltinDispatchInfoBuilder::createProgramFromCode(bc, toClDeviceVector(*pClDevice))); EXPECT_NE(nullptr, program.get()); } TEST_F(BuiltInTests, givenCreateProgramFromSourceWhenDeviceSupportSharedSystemAllocationThenInternalOptionsDisableStosoFlag) { auto builtinsLib = std::unique_ptr(new BuiltinsLib()); pClDevice->deviceInfo.sharedSystemMemCapabilities = CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL; pDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.sharedSystemMemCapabilities = 1; const BuiltinCode bc = builtinsLib->getBuiltinCode(EBuiltInOps::CopyBufferToBuffer, BuiltinCode::ECodeType::Source, *pDevice); EXPECT_NE(0u, bc.resource.size()); auto program = std::unique_ptr(BuiltinDispatchInfoBuilder::createProgramFromCode(bc, toClDeviceVector(*pClDevice))); EXPECT_NE(nullptr, program.get()); auto builtinInternalOptions = program->getInternalOptions(); EXPECT_TRUE(hasSubstr(builtinInternalOptions, std::string(CompilerOptions::greaterThan4gbBuffersRequired))); } TEST_F(BuiltInTests, GivenTypeIntermediateWhenCreatingProgramFromCodeThenNullPointerIsReturned) { auto builtinsLib = std::unique_ptr(new BuiltinsLib()); const BuiltinCode bc = builtinsLib->getBuiltinCode(EBuiltInOps::CopyBufferToBuffer, BuiltinCode::ECodeType::Intermediate, *pDevice); EXPECT_EQ(0u, bc.resource.size()); auto program = std::unique_ptr(BuiltinDispatchInfoBuilder::createProgramFromCode(bc, toClDeviceVector(*pClDevice))); EXPECT_EQ(nullptr, program.get()); } TEST_F(BuiltInTests, GivenTypeBinaryWhenCreatingProgramFromCodeThenValidPointerIsReturned) { auto builtinsLib = std::unique_ptr(new BuiltinsLib()); const BuiltinCode bc = builtinsLib->getBuiltinCode(EBuiltInOps::CopyBufferToBuffer, BuiltinCode::ECodeType::Binary, *pDevice); EXPECT_NE(0u, bc.resource.size()); auto program = std::unique_ptr(BuiltinDispatchInfoBuilder::createProgramFromCode(bc, toClDeviceVector(*pClDevice))); EXPECT_NE(nullptr, program.get()); } TEST_F(BuiltInTests, GivenTypeInvalidWhenCreatingProgramFromCodeThenNullPointerIsReturned) { auto builtinsLib = std::unique_ptr(new BuiltinsLib()); const BuiltinCode bc = builtinsLib->getBuiltinCode(EBuiltInOps::CopyBufferToBuffer, BuiltinCode::ECodeType::INVALID, *pDevice); EXPECT_EQ(0u, bc.resource.size()); auto program = std::unique_ptr(BuiltinDispatchInfoBuilder::createProgramFromCode(bc, toClDeviceVector(*pClDevice))); EXPECT_EQ(nullptr, program.get()); } TEST_F(BuiltInTests, GivenInvalidBuiltinWhenCreatingProgramFromCodeThenNullPointerIsReturned) { auto builtinsLib = std::unique_ptr(new BuiltinsLib()); const BuiltinCode bc = builtinsLib->getBuiltinCode(EBuiltInOps::COUNT, BuiltinCode::ECodeType::Any, *pDevice); EXPECT_EQ(0u, bc.resource.size()); auto program = std::unique_ptr(BuiltinDispatchInfoBuilder::createProgramFromCode(bc, toClDeviceVector(*pClDevice))); EXPECT_EQ(nullptr, program.get()); } TEST_F(BuiltInTests, GivenForce32bitWhenCreatingProgramThenCorrectKernelIsCreated) { bool force32BitAddressess = pDevice->getDeviceInfo().force32BitAddressess; const_cast(&pDevice->getDeviceInfo())->force32BitAddressess = true; auto builtinsLib = std::unique_ptr(new BuiltinsLib()); const BuiltinCode bc = builtinsLib->getBuiltinCode(EBuiltInOps::CopyBufferToBuffer, BuiltinCode::ECodeType::Source, *pDevice); ASSERT_NE(0u, bc.resource.size()); auto program = std::unique_ptr(BuiltinDispatchInfoBuilder::createProgramFromCode(bc, toClDeviceVector(*pClDevice))); ASSERT_NE(nullptr, program.get()); auto builtinInternalOptions = program->getInternalOptions(); auto it = builtinInternalOptions.find(NEO::CompilerOptions::arch32bit.data()); EXPECT_EQ(std::string::npos, it); it = builtinInternalOptions.find(NEO::CompilerOptions::greaterThan4gbBuffersRequired.data()); if (is32bit || pDevice->areSharedSystemAllocationsAllowed()) { EXPECT_NE(std::string::npos, it); } else { EXPECT_EQ(std::string::npos, it); } const_cast(&pDevice->getDeviceInfo())->force32BitAddressess = force32BitAddressess; } TEST_F(BuiltInTests, GivenVmeKernelWhenGettingDeviceInfoThenCorrectVmeVersionIsReturned) { if (!pDevice->getHardwareInfo().capabilityTable.supportsVme) { GTEST_SKIP(); } cl_uint param; auto ret = pClDevice->getDeviceInfo(CL_DEVICE_ME_VERSION_INTEL, sizeof(param), ¶m, nullptr); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_EQ(static_cast(CL_ME_VERSION_ADVANCED_VER_2_INTEL), param); } TEST_F(VmeBuiltInTests, WhenVmeKernelIsCreatedThenParamsAreCorrect) { this->pBuiltIns->setCacheingEnableState(false); overwriteBuiltInBinaryName("media_kernels_backend"); BuiltInOp vmeBuilder(*this->pBuiltIns, *pClDevice); restoreBuiltInBinaryName(); cl_int err; { int32_t bufArgNum = 7; cl_mem mem = 0; vmeBuilder.setExplicitArg(bufArgNum, sizeof(cl_mem), &mem, err); EXPECT_TRUE(vmeBuilder.validateBufferSize(-1, 16)); EXPECT_TRUE(vmeBuilder.validateBufferSize(bufArgNum, 16)); MockBuffer mb; mem = &mb; vmeBuilder.setExplicitArg(bufArgNum, sizeof(cl_mem), &mem, err); EXPECT_TRUE(vmeBuilder.validateBufferSize(bufArgNum, mb.getSize())); EXPECT_TRUE(vmeBuilder.validateBufferSize(bufArgNum, mb.getSize() / 2)); EXPECT_FALSE(vmeBuilder.validateBufferSize(bufArgNum, mb.getSize() * 2)); mem = 0; vmeBuilder.setExplicitArg(bufArgNum, sizeof(cl_mem), &mem, err); } { EXPECT_TRUE(vmeBuilder.validateEnumVal(1, 1, 2, 3, 4)); EXPECT_TRUE(vmeBuilder.validateEnumVal(1, 1)); EXPECT_TRUE(vmeBuilder.validateEnumVal(3, 1, 2, 3)); EXPECT_FALSE(vmeBuilder.validateEnumVal(1, 3, 4)); EXPECT_FALSE(vmeBuilder.validateEnumVal(1)); EXPECT_FALSE(vmeBuilder.validateEnumVal(1, 2)); int32_t valArgNum = 3; uint32_t val = 7; vmeBuilder.setExplicitArg(valArgNum, sizeof(val), &val, err); EXPECT_FALSE(vmeBuilder.validateEnumArg(valArgNum, 3)); EXPECT_TRUE(vmeBuilder.validateEnumArg(valArgNum, 7)); val = 0; vmeBuilder.setExplicitArg(valArgNum, sizeof(val), &val, err); } { int32_t valArgNum = 3; uint32_t val = 7; vmeBuilder.setExplicitArg(valArgNum, sizeof(val), &val, err); EXPECT_EQ(val, vmeBuilder.getKernelArgByValValue(valArgNum)); val = 11; vmeBuilder.setExplicitArg(valArgNum, sizeof(val), &val, err); EXPECT_EQ(val, vmeBuilder.getKernelArgByValValue(valArgNum)); val = 0; vmeBuilder.setExplicitArg(valArgNum, sizeof(val), &val, err); } } TEST_F(VmeBuiltInTests, WhenVmeKernelIsCreatedThenDispatchIsBidirectional) { this->pBuiltIns->setCacheingEnableState(false); overwriteBuiltInBinaryName("media_kernels_backend"); BuiltInOp avmeBuilder(*this->pBuiltIns, *pClDevice); BuiltInOp avmeBidirBuilder(*this->pBuiltIns, *pClDevice); restoreBuiltInBinaryName(); EXPECT_FALSE(avmeBuilder.isBidirKernel()); EXPECT_TRUE(avmeBidirBuilder.isBidirKernel()); } struct ImageVmeValidFormat : Image2dDefaults { static const cl_image_format imageFormat; static const cl_image_desc iamgeDesc; }; const cl_image_format ImageVmeValidFormat::imageFormat = { CL_R, CL_UNORM_INT8}; const cl_image_desc ImageVmeValidFormat::iamgeDesc = { CL_MEM_OBJECT_IMAGE1D, 8192, 16, 1, 1, 0, 0, 0, 0, {nullptr}}; struct ImageVmeInvalidDataType : Image2dDefaults { static const cl_image_format imageFormat; }; const cl_image_format ImageVmeInvalidDataType::imageFormat = { CL_R, CL_FLOAT}; struct ImageVmeInvalidChannelOrder : Image2dDefaults { static const cl_image_format imageFormat; }; const cl_image_format ImageVmeInvalidChannelOrder::imageFormat = { CL_RGBA, CL_UNORM_INT8}; TEST_F(VmeBuiltInTests, WhenValidatingImagesThenCorrectResponses) { this->pBuiltIns->setCacheingEnableState(false); overwriteBuiltInBinaryName("media_kernels_backend"); BuiltInOp vmeBuilder(*this->pBuiltIns, *pClDevice); restoreBuiltInBinaryName(); uint32_t srcImgArgNum = 1; uint32_t refImgArgNum = 2; cl_int err; { // validate images are not null std::unique_ptr image1(ImageHelper::create(pContext)); cl_mem srcImgMem = 0; cl_mem refImgMem = 0; EXPECT_EQ(CL_INVALID_KERNEL_ARGS, vmeBuilder.validateImages(Vec3{3, 3, 0}, Vec3{0, 0, 0})); srcImgMem = image1.get(); refImgMem = 0; vmeBuilder.setExplicitArg(srcImgArgNum, sizeof(srcImgMem), &srcImgMem, err); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, vmeBuilder.validateImages(Vec3{3, 3, 0}, Vec3{0, 0, 0})); } { // validate image formats std::unique_ptr imageValid(ImageHelper::create(pContext)); std::unique_ptr imageInvalidDataType(ImageHelper::create(pContext)); std::unique_ptr imageChannelOrder(ImageHelper::create(pContext)); Image *images[] = {imageValid.get(), imageInvalidDataType.get(), imageChannelOrder.get()}; for (Image *srcImg : images) { for (Image *dstImg : images) { cl_mem srcImgMem = srcImg; cl_mem refImgMem = dstImg; vmeBuilder.setExplicitArg(srcImgArgNum, sizeof(srcImgMem), &srcImgMem, err); vmeBuilder.setExplicitArg(refImgArgNum, sizeof(refImgMem), &refImgMem, err); bool shouldSucceed = (srcImg == imageValid.get()) && (dstImg == imageValid.get()); if (shouldSucceed) { EXPECT_EQ(CL_SUCCESS, vmeBuilder.validateImages(Vec3{1, 1, 0}, Vec3{0, 0, 0})); } else { EXPECT_EQ(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, vmeBuilder.validateImages(Vec3{1, 1, 0}, Vec3{0, 0, 0})); } } } } { // validate image tiling std::unique_ptr imageValid(ImageHelper::create(pContext)); pContext->isSharedContext = true; std::unique_ptr imageLinear(ImageHelper::create(pContext)); pContext->isSharedContext = false; Image *images[] = {imageValid.get(), imageLinear.get()}; for (Image *srcImg : images) { for (Image *dstImg : images) { cl_mem srcImgMem = srcImg; cl_mem refImgMem = dstImg; vmeBuilder.setExplicitArg(srcImgArgNum, sizeof(srcImgMem), &srcImgMem, err); vmeBuilder.setExplicitArg(refImgArgNum, sizeof(refImgMem), &refImgMem, err); bool shouldSucceed = (srcImg == imageValid.get()) && (dstImg == imageValid.get()); if (shouldSucceed) { EXPECT_EQ(CL_SUCCESS, vmeBuilder.validateImages(Vec3{1, 1, 0}, Vec3{0, 0, 0})); } else { EXPECT_EQ(CL_OUT_OF_RESOURCES, vmeBuilder.validateImages(Vec3{1, 1, 0}, Vec3{0, 0, 0})); } } } } { // validate region size std::unique_ptr imageValid(ImageHelper::create(pContext)); cl_mem imgValidMem = imageValid.get(); vmeBuilder.setExplicitArg(srcImgArgNum, sizeof(imgValidMem), &imgValidMem, err); vmeBuilder.setExplicitArg(refImgArgNum, sizeof(imgValidMem), &imgValidMem, err); EXPECT_EQ(CL_INVALID_IMAGE_SIZE, vmeBuilder.validateImages(Vec3{imageValid->getImageDesc().image_width + 1, 1, 0}, Vec3{0, 0, 0})); EXPECT_EQ(CL_INVALID_IMAGE_SIZE, vmeBuilder.validateImages(Vec3{1, imageValid->getImageDesc().image_height + 1, 0}, Vec3{0, 0, 0})); } } TEST_F(VmeBuiltInTests, WhenValidatingFlagsThenValidFlagCombinationsReturnTrue) { this->pBuiltIns->setCacheingEnableState(false); overwriteBuiltInBinaryName("media_kernels_backend"); BuiltInOp vmeBuilder(*this->pBuiltIns, *pClDevice); restoreBuiltInBinaryName(); uint32_t defaultSkipBlockVal = 8192; uint32_t flagsArgNum = 3; std::tuple flagsToTest[] = { std::make_tuple(CL_ME_CHROMA_INTRA_PREDICT_ENABLED_INTEL, false, defaultSkipBlockVal), std::make_tuple(CL_ME_SKIP_BLOCK_TYPE_16x16_INTEL, true, CL_ME_MB_TYPE_16x16_INTEL), std::make_tuple(CL_ME_SKIP_BLOCK_TYPE_8x8_INTEL, true, CL_ME_MB_TYPE_8x8_INTEL), std::make_tuple(defaultSkipBlockVal, true, defaultSkipBlockVal), }; cl_int err; for (auto &conf : flagsToTest) { uint32_t skipBlock = defaultSkipBlockVal; vmeBuilder.setExplicitArg(flagsArgNum, sizeof(uint32_t), &std::get<0>(conf), err); bool validationResult = vmeBuilder.validateFlags(skipBlock); if (std::get<1>(conf)) { EXPECT_TRUE(validationResult); } else { EXPECT_FALSE(validationResult); } EXPECT_EQ(std::get<2>(conf), skipBlock); } } TEST_F(VmeBuiltInTests, WhenValidatingSkipBlockTypeThenCorrectResponses) { this->pBuiltIns->setCacheingEnableState(false); overwriteBuiltInBinaryName("media_kernels_backend"); BuiltInOp avmeBidirectionalBuilder(*this->pBuiltIns, *pClDevice); BuiltInOp avmeBuilder(*this->pBuiltIns, *pClDevice); restoreBuiltInBinaryName(); cl_int err; uint32_t skipBlockTypeArgNum = 4; uint32_t skipBlockType = 8192; bool ret = avmeBidirectionalBuilder.validateSkipBlockTypeArg(skipBlockType); EXPECT_TRUE(ret); EXPECT_EQ(8192U, skipBlockType); skipBlockType = 8192U; avmeBuilder.setExplicitArg(skipBlockTypeArgNum, sizeof(uint32_t), &skipBlockType, err); ret = avmeBuilder.validateSkipBlockTypeArg(skipBlockType); EXPECT_FALSE(ret); skipBlockType = CL_ME_MB_TYPE_16x16_INTEL; avmeBuilder.setExplicitArg(skipBlockTypeArgNum, sizeof(uint32_t), &skipBlockType, err); skipBlockType = 8192U; ret = avmeBuilder.validateSkipBlockTypeArg(skipBlockType); EXPECT_TRUE(ret); EXPECT_EQ(static_cast(CL_ME_MB_TYPE_16x16_INTEL), skipBlockType); skipBlockType = CL_ME_MB_TYPE_8x8_INTEL; avmeBuilder.setExplicitArg(skipBlockTypeArgNum, sizeof(uint32_t), &skipBlockType, err); skipBlockType = 8192U; ret = avmeBuilder.validateSkipBlockTypeArg(skipBlockType); EXPECT_TRUE(ret); EXPECT_EQ(static_cast(CL_ME_MB_TYPE_8x8_INTEL), skipBlockType); } TEST_F(VmeBuiltInTests, GivenAcceleratorWhenExplicitlySettingArgThenFalseIsReturned) { this->pBuiltIns->setCacheingEnableState(false); overwriteBuiltInBinaryName("media_kernels_backend"); BuiltInOp vmeBuilder(*this->pBuiltIns, *pClDevice); restoreBuiltInBinaryName(); cl_int err; uint32_t aceleratorArgNum = 0; bool ret = vmeBuilder.setExplicitArg(aceleratorArgNum, sizeof(cl_accelerator_intel), nullptr, err); EXPECT_FALSE(ret); EXPECT_EQ(CL_INVALID_ACCELERATOR_INTEL, err); cl_motion_estimation_desc_intel acceleratorDesc; acceleratorDesc.subpixel_mode = CL_ME_SUBPIXEL_MODE_INTEGER_INTEL; acceleratorDesc.sad_adjust_mode = CL_ME_SAD_ADJUST_MODE_NONE_INTEL; acceleratorDesc.search_path_type = CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL; acceleratorDesc.mb_block_type = CL_ME_MB_TYPE_16x16_INTEL; auto neoAccelerator = std::unique_ptr(VmeAccelerator::create(pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(acceleratorDesc), &acceleratorDesc, err)); ASSERT_NE(nullptr, neoAccelerator.get()); cl_accelerator_intel clAccel = neoAccelerator.get(); ret = vmeBuilder.setExplicitArg(aceleratorArgNum, sizeof(cl_accelerator_intel), &clAccel, err); EXPECT_FALSE(ret); EXPECT_EQ(CL_SUCCESS, err); } TEST_F(VmeBuiltInTests, WhenValidatingDispatchThenCorrectReturns) { this->pBuiltIns->setCacheingEnableState(false); overwriteBuiltInBinaryName("media_kernels_backend"); struct MockVmeBuilder : BuiltInOp { using BuiltInOp::BuiltInOp; cl_int validateVmeDispatch(const Vec3 &inputRegion, const Vec3 &offset, size_t blkNum, size_t blkMul) const override { receivedInputRegion = inputRegion; receivedOffset = offset; receivedBlkNum = blkNum; receivedBlkMul = blkMul; wasValidateVmeDispatchCalled = true; return valueToReturn; } mutable bool wasValidateVmeDispatchCalled = false; mutable Vec3 receivedInputRegion = {0, 0, 0}; mutable Vec3 receivedOffset = {0, 0, 0}; mutable size_t receivedBlkNum = 0; mutable size_t receivedBlkMul = 0; mutable cl_int valueToReturn = CL_SUCCESS; }; uint32_t aaceleratorArgNum = 0; MockVmeBuilder vmeBuilder(*this->pBuiltIns, *pClDevice); restoreBuiltInBinaryName(); cl_int ret = vmeBuilder.validateDispatch(nullptr, 1, Vec3{16, 16, 0}, Vec3{16, 1, 0}, Vec3{0, 0, 0}); EXPECT_EQ(CL_INVALID_WORK_DIMENSION, ret); ret = vmeBuilder.validateDispatch(nullptr, 3, Vec3{16, 16, 0}, Vec3{16, 1, 0}, Vec3{0, 0, 0}); EXPECT_EQ(CL_INVALID_WORK_DIMENSION, ret); ret = vmeBuilder.validateDispatch(nullptr, 2, Vec3{16, 16, 0}, Vec3{16, 1, 0}, Vec3{0, 0, 0}); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, ret); // accelerator not set EXPECT_FALSE(vmeBuilder.wasValidateVmeDispatchCalled); cl_int err; cl_motion_estimation_desc_intel acceleratorDesc; acceleratorDesc.subpixel_mode = CL_ME_SUBPIXEL_MODE_INTEGER_INTEL; acceleratorDesc.sad_adjust_mode = CL_ME_SAD_ADJUST_MODE_NONE_INTEL; acceleratorDesc.search_path_type = CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL; Vec3 gws{16, 16, 0}; Vec3 lws{16, 1, 0}; Vec3 off{0, 0, 0}; size_t gwWidthInBlk = 0; size_t gwHeightInBlk = 0; vmeBuilder.getBlkTraits(gws, gwWidthInBlk, gwHeightInBlk); { acceleratorDesc.mb_block_type = CL_ME_MB_TYPE_16x16_INTEL; auto neoAccelerator = std::unique_ptr(VmeAccelerator::create(pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(acceleratorDesc), &acceleratorDesc, err)); ASSERT_NE(nullptr, neoAccelerator.get()); cl_accelerator_intel clAccel = neoAccelerator.get(); vmeBuilder.setExplicitArg(aaceleratorArgNum, sizeof(clAccel), &clAccel, err); vmeBuilder.wasValidateVmeDispatchCalled = false; auto ret = vmeBuilder.validateDispatch(nullptr, 2, gws, lws, off); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_TRUE(vmeBuilder.wasValidateVmeDispatchCalled); EXPECT_EQ(gws, vmeBuilder.receivedInputRegion); EXPECT_EQ(off, vmeBuilder.receivedOffset); EXPECT_EQ(gwWidthInBlk * gwHeightInBlk, vmeBuilder.receivedBlkNum); EXPECT_EQ(1U, vmeBuilder.receivedBlkMul); } { acceleratorDesc.mb_block_type = CL_ME_MB_TYPE_4x4_INTEL; auto neoAccelerator = std::unique_ptr(VmeAccelerator::create(pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(acceleratorDesc), &acceleratorDesc, err)); ASSERT_NE(nullptr, neoAccelerator.get()); cl_accelerator_intel clAccel = neoAccelerator.get(); vmeBuilder.setExplicitArg(aaceleratorArgNum, sizeof(clAccel), &clAccel, err); vmeBuilder.wasValidateVmeDispatchCalled = false; auto ret = vmeBuilder.validateDispatch(nullptr, 2, gws, lws, off); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_TRUE(vmeBuilder.wasValidateVmeDispatchCalled); EXPECT_EQ(gws, vmeBuilder.receivedInputRegion); EXPECT_EQ(off, vmeBuilder.receivedOffset); EXPECT_EQ(gwWidthInBlk * gwHeightInBlk, vmeBuilder.receivedBlkNum); EXPECT_EQ(16U, vmeBuilder.receivedBlkMul); } { acceleratorDesc.mb_block_type = CL_ME_MB_TYPE_8x8_INTEL; auto neoAccelerator = std::unique_ptr(VmeAccelerator::create(pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(acceleratorDesc), &acceleratorDesc, err)); ASSERT_NE(nullptr, neoAccelerator.get()); cl_accelerator_intel clAccel = neoAccelerator.get(); vmeBuilder.setExplicitArg(aaceleratorArgNum, sizeof(clAccel), &clAccel, err); vmeBuilder.wasValidateVmeDispatchCalled = false; vmeBuilder.valueToReturn = 37; auto ret = vmeBuilder.validateDispatch(nullptr, 2, gws, lws, off); EXPECT_EQ(37, ret); EXPECT_TRUE(vmeBuilder.wasValidateVmeDispatchCalled); EXPECT_EQ(gws, vmeBuilder.receivedInputRegion); EXPECT_EQ(off, vmeBuilder.receivedOffset); EXPECT_EQ(gwWidthInBlk * gwHeightInBlk, vmeBuilder.receivedBlkNum); EXPECT_EQ(4U, vmeBuilder.receivedBlkMul); } } TEST_F(VmeBuiltInTests, WhenValidatingVmeDispatchThenCorrectReturns) { this->pBuiltIns->setCacheingEnableState(false); overwriteBuiltInBinaryName("media_kernels_backend"); BuiltInOp vmeBuilder(*this->pBuiltIns, *pClDevice); restoreBuiltInBinaryName(); cl_int err; // images not set EXPECT_EQ(CL_INVALID_KERNEL_ARGS, vmeBuilder.validateVmeDispatch(Vec3{1, 1, 0}, Vec3{0, 0, 0}, 64, 1)); uint32_t srcImgArgNum = 1; uint32_t refImgArgNum = 2; std::unique_ptr imageValid(ImageHelper::create(pContext)); cl_mem srcImgMem = imageValid.get(); vmeBuilder.setExplicitArg(srcImgArgNum, sizeof(srcImgMem), &srcImgMem, err); vmeBuilder.setExplicitArg(refImgArgNum, sizeof(srcImgMem), &srcImgMem, err); // null buffers are valid EXPECT_EQ(CL_SUCCESS, vmeBuilder.validateVmeDispatch(Vec3{1, 1, 0}, Vec3{0, 0, 0}, 64, 1)); // too small buffers should fail MockBuffer mb; cl_mem mem = &mb; uint32_t predictionMotionVectorBufferArgNum = 3; uint32_t motionVectorBufferArgNum = 4; uint32_t residualsBufferArgNum = 5; for (uint32_t argNum : {predictionMotionVectorBufferArgNum, motionVectorBufferArgNum, residualsBufferArgNum}) { EXPECT_EQ(CL_SUCCESS, vmeBuilder.validateVmeDispatch(Vec3{1, 1, 0}, Vec3{0, 0, 0}, mb.getSize() * 2, 1)); vmeBuilder.setExplicitArg(argNum, sizeof(cl_mem), &mem, err); EXPECT_EQ(CL_INVALID_BUFFER_SIZE, vmeBuilder.validateVmeDispatch(Vec3{1, 1, 0}, Vec3{0, 0, 0}, mb.getSize() * 2, 1)); vmeBuilder.setExplicitArg(argNum, sizeof(cl_mem), nullptr, err); } } TEST_F(VmeBuiltInTests, GivenAdvancedVmeWhenValidatingVmeDispatchThenCorrectReturns) { this->pBuiltIns->setCacheingEnableState(false); overwriteBuiltInBinaryName("media_kernels_backend"); BuiltInOp avmeBuilder(*this->pBuiltIns, *pClDevice); restoreBuiltInBinaryName(); cl_int err; // images not set ASSERT_EQ(CL_INVALID_KERNEL_ARGS, avmeBuilder.VmeBuiltinDispatchInfoBuilder::validateVmeDispatch(Vec3{1, 1, 0}, Vec3{0, 0, 0}, 64, 1)); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, avmeBuilder.validateVmeDispatch(Vec3{1, 1, 0}, Vec3{0, 0, 0}, 64, 1)); uint32_t srcImgArgNum = 1; uint32_t refImgArgNum = 2; std::unique_ptr imageValid(ImageHelper::create(pContext)); cl_mem srcImgMem = imageValid.get(); avmeBuilder.setExplicitArg(srcImgArgNum, sizeof(srcImgMem), &srcImgMem, err); avmeBuilder.setExplicitArg(refImgArgNum, sizeof(srcImgMem), &srcImgMem, err); ASSERT_EQ(CL_SUCCESS, avmeBuilder.VmeBuiltinDispatchInfoBuilder::validateVmeDispatch(Vec3{1, 1, 0}, Vec3{0, 0, 0}, 64, 1)); uint32_t flagsArgNum = 3; uint32_t val = CL_ME_CHROMA_INTRA_PREDICT_ENABLED_INTEL; avmeBuilder.setExplicitArg(flagsArgNum, sizeof(val), &val, err); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, avmeBuilder.validateVmeDispatch(Vec3{1, 1, 0}, Vec3{0, 0, 0}, 64, 1)); val = CL_ME_SKIP_BLOCK_TYPE_8x8_INTEL; avmeBuilder.setExplicitArg(flagsArgNum, sizeof(val), &val, err); uint32_t skipBlockTypeArgNum = 4; val = 8192; avmeBuilder.setExplicitArg(skipBlockTypeArgNum, sizeof(uint32_t), &val, err); EXPECT_EQ(CL_OUT_OF_RESOURCES, avmeBuilder.validateVmeDispatch(Vec3{1, 1, 0}, Vec3{0, 0, 0}, 64, 1)); val = CL_ME_MB_TYPE_16x16_INTEL; avmeBuilder.setExplicitArg(skipBlockTypeArgNum, sizeof(uint32_t), &val, err); uint32_t searchCostPenaltyArgNum = 5; val = 8192; avmeBuilder.setExplicitArg(searchCostPenaltyArgNum, sizeof(uint32_t), &val, err); EXPECT_EQ(CL_OUT_OF_RESOURCES, avmeBuilder.validateVmeDispatch(Vec3{1, 1, 0}, Vec3{0, 0, 0}, 64, 1)); val = CL_ME_COST_PENALTY_NONE_INTEL; avmeBuilder.setExplicitArg(searchCostPenaltyArgNum, sizeof(uint32_t), &val, err); uint32_t searchCostPrecisionArgNum = 6; val = 8192; avmeBuilder.setExplicitArg(searchCostPrecisionArgNum, sizeof(uint32_t), &val, err); EXPECT_EQ(CL_OUT_OF_RESOURCES, avmeBuilder.validateVmeDispatch(Vec3{1, 1, 0}, Vec3{0, 0, 0}, 64, 1)); val = CL_ME_COST_PRECISION_QPEL_INTEL; avmeBuilder.setExplicitArg(searchCostPrecisionArgNum, sizeof(uint32_t), &val, err); // for non-bidirectional avme kernel, countMotionVectorBuffer must be set uint32_t countMotionVectorBufferArgNum = 7; EXPECT_EQ(CL_INVALID_BUFFER_SIZE, avmeBuilder.validateVmeDispatch(Vec3{1, 1, 0}, Vec3{0, 0, 0}, 64, 1)); MockBuffer mb; cl_mem mem = &mb; avmeBuilder.setExplicitArg(countMotionVectorBufferArgNum, sizeof(cl_mem), &mem, err); EXPECT_EQ(CL_SUCCESS, avmeBuilder.validateVmeDispatch(Vec3{1, 1, 0}, Vec3{0, 0, 0}, 1, 1)); } TEST_F(VmeBuiltInTests, GivenAdvancedBidirectionalVmeWhenValidatingVmeDispatchThenCorrectReturns) { this->pBuiltIns->setCacheingEnableState(false); overwriteBuiltInBinaryName("media_kernels_backend"); BuiltInOp avmeBuilder(*this->pBuiltIns, *pClDevice); restoreBuiltInBinaryName(); cl_int err; uint32_t srcImgArgNum = 1; uint32_t refImgArgNum = 2; std::unique_ptr imageValid(ImageHelper::create(pContext)); cl_mem srcImgMem = imageValid.get(); avmeBuilder.setExplicitArg(srcImgArgNum, sizeof(srcImgMem), &srcImgMem, err); avmeBuilder.setExplicitArg(refImgArgNum, sizeof(srcImgMem), &srcImgMem, err); ASSERT_EQ(CL_SUCCESS, avmeBuilder.VmeBuiltinDispatchInfoBuilder::validateVmeDispatch(Vec3{1, 1, 0}, Vec3{0, 0, 0}, 64, 1)); uint32_t flagsArgNum = 6; uint32_t val = CL_ME_SKIP_BLOCK_TYPE_8x8_INTEL; avmeBuilder.setExplicitArg(flagsArgNum, sizeof(val), &val, err); uint32_t searchCostPenaltyArgNum = 7; val = CL_ME_COST_PENALTY_NONE_INTEL; avmeBuilder.setExplicitArg(searchCostPenaltyArgNum, sizeof(uint32_t), &val, err); uint32_t searchCostPrecisionArgNum = 8; val = CL_ME_COST_PRECISION_QPEL_INTEL; avmeBuilder.setExplicitArg(searchCostPrecisionArgNum, sizeof(uint32_t), &val, err); uint32_t bidirWeightArgNum = 10; val = 255; avmeBuilder.setExplicitArg(bidirWeightArgNum, sizeof(uint8_t), &val, err); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, avmeBuilder.validateVmeDispatch(Vec3{1, 1, 0}, Vec3{0, 0, 0}, 64, 1)); val = CL_ME_BIDIR_WEIGHT_QUARTER_INTEL; avmeBuilder.setExplicitArg(bidirWeightArgNum, sizeof(uint8_t), &val, err); EXPECT_EQ(CL_SUCCESS, avmeBuilder.validateVmeDispatch(Vec3{1, 1, 0}, Vec3{0, 0, 0}, 64, 1)); // test bufferSize checking uint32_t countMotionVectorBufferArgNum = 11; MockBuffer mb; cl_mem mem = &mb; avmeBuilder.setExplicitArg(countMotionVectorBufferArgNum, sizeof(cl_mem), &mem, err); EXPECT_EQ(CL_SUCCESS, avmeBuilder.validateVmeDispatch(Vec3{1, 1, 0}, Vec3{0, 0, 0}, 1, 1)); EXPECT_EQ(CL_INVALID_BUFFER_SIZE, avmeBuilder.validateVmeDispatch(Vec3{1, 1, 0}, Vec3{0, 0, 0}, mb.getSize() * 2, 1)); } TEST_F(VmeBuiltInTests, GivenAdvancedVmeWhenGettingSkipResidualsBuffExpSizeThenDefaultSizeIsReturned) { this->pBuiltIns->setCacheingEnableState(false); overwriteBuiltInBinaryName("media_kernels_backend"); BuiltInOp vmeBuilder(*this->pBuiltIns, *pClDevice); restoreBuiltInBinaryName(); auto size16x16 = vmeBuilder.getSkipResidualsBuffExpSize(CL_ME_MB_TYPE_16x16_INTEL, 4); auto sizeDefault = vmeBuilder.getSkipResidualsBuffExpSize(8192, 4); EXPECT_EQ(size16x16, sizeDefault); } TEST_F(BuiltInTests, GivenInvalidBuiltinKernelNameWhenCreatingBuiltInProgramThenInvalidValueErrorIsReturned) { const char *kernelNames = "invalid_kernel"; cl_int retVal = CL_SUCCESS; cl_program program = Vme::createBuiltInProgram( *pContext, pContext->getDevices(), kernelNames, retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(nullptr, program); } TEST_F(BuiltInTests, WhenGettingSipKernelThenReturnProgramCreatedFromIsaAcquiredThroughCompilerInterface) { auto mockCompilerInterface = new MockCompilerInterface(); pDevice->getExecutionEnvironment()->rootDeviceEnvironments[rootDeviceIndex]->compilerInterface.reset(mockCompilerInterface); auto builtins = new BuiltIns; pDevice->getExecutionEnvironment()->rootDeviceEnvironments[rootDeviceIndex]->builtins.reset(builtins); mockCompilerInterface->sipKernelBinaryOverride = mockCompilerInterface->getDummyGenBinary(); const SipKernel &sipKernel = builtins->getSipKernel(SipKernelType::Csr, *pDevice); auto expectedMem = mockCompilerInterface->sipKernelBinaryOverride.data(); EXPECT_EQ(0, memcmp(expectedMem, sipKernel.getSipAllocation()->getUnderlyingBuffer(), mockCompilerInterface->sipKernelBinaryOverride.size())); EXPECT_EQ(SipKernelType::Csr, mockCompilerInterface->requestedSipKernel); mockCompilerInterface->releaseDummyGenBinary(); } TEST_F(BuiltInTests, givenSipKernelWhenItIsCreatedThenItHasGraphicsAllocationForKernel) { const SipKernel &sipKern = pDevice->getBuiltIns()->getSipKernel(SipKernelType::Csr, pContext->getDevice(0)->getDevice()); auto sipAllocation = sipKern.getSipAllocation(); EXPECT_NE(nullptr, sipAllocation); } TEST_F(BuiltInTests, givenSipKernelWhenAllocationFailsThenItHasNullptrGraphicsAllocation) { auto executionEnvironment = new MockExecutionEnvironment; executionEnvironment->prepareRootDeviceEnvironments(1); auto memoryManager = new MockMemoryManager(*executionEnvironment); executionEnvironment->memoryManager.reset(memoryManager); auto device = std::unique_ptr(Device::create(executionEnvironment, 0u)); EXPECT_NE(nullptr, device); memoryManager->failAllocate32Bit = true; auto builtins = std::make_unique(); const SipKernel &sipKern = builtins->getSipKernel(SipKernelType::Csr, *device); auto sipAllocation = sipKern.getSipAllocation(); EXPECT_EQ(nullptr, sipAllocation); } TEST_F(BuiltInTests, givenDebugFlagForceUseSourceWhenArgIsBinaryThenReturnBuiltinCodeBinary) { DebugManager.flags.RebuildPrecompiledKernels.set(true); auto builtinsLib = std::unique_ptr(new BuiltinsLib()); BuiltinCode code = builtinsLib->getBuiltinCode(EBuiltInOps::CopyBufferToBuffer, BuiltinCode::ECodeType::Binary, *pDevice); EXPECT_EQ(BuiltinCode::ECodeType::Binary, code.type); EXPECT_NE(0u, code.resource.size()); EXPECT_EQ(pDevice, code.targetDevice); } TEST_F(BuiltInTests, givenDebugFlagForceUseSourceWhenArgIsAnyThenReturnBuiltinCodeSource) { DebugManager.flags.RebuildPrecompiledKernels.set(true); auto builtinsLib = std::unique_ptr(new BuiltinsLib()); BuiltinCode code = builtinsLib->getBuiltinCode(EBuiltInOps::CopyBufferToBuffer, BuiltinCode::ECodeType::Any, *pDevice); EXPECT_EQ(BuiltinCode::ECodeType::Source, code.type); EXPECT_NE(0u, code.resource.size()); EXPECT_EQ(pDevice, code.targetDevice); } using BuiltInOwnershipWrapperTests = BuiltInTests; TEST_F(BuiltInOwnershipWrapperTests, givenBuiltinWhenConstructedThenLockAndUnlockOnDestruction) { MockAuxBuilInOp mockAuxBuiltInOp(*pBuiltIns, *pClDevice); MockContext context(pClDevice); { EXPECT_EQ(nullptr, mockAuxBuiltInOp.baseKernel->getProgram()->getContextPtr()); BuiltInOwnershipWrapper lock(mockAuxBuiltInOp, &context); EXPECT_TRUE(mockAuxBuiltInOp.baseKernel->getMultiDeviceKernel()->hasOwnership()); EXPECT_TRUE(mockAuxBuiltInOp.baseKernel->getProgram()->hasOwnership()); EXPECT_EQ(&context, mockAuxBuiltInOp.baseKernel->getProgram()->getContextPtr()); } EXPECT_FALSE(mockAuxBuiltInOp.baseKernel->getMultiDeviceKernel()->hasOwnership()); EXPECT_FALSE(mockAuxBuiltInOp.baseKernel->getProgram()->hasOwnership()); EXPECT_EQ(nullptr, mockAuxBuiltInOp.baseKernel->getProgram()->getContextPtr()); } TEST_F(BuiltInOwnershipWrapperTests, givenLockWithoutParametersWhenConstructingThenLockOnlyWhenRequested) { MockAuxBuilInOp mockAuxBuiltInOp(*pBuiltIns, *pClDevice); MockContext context(pClDevice); { BuiltInOwnershipWrapper lock; EXPECT_EQ(nullptr, mockAuxBuiltInOp.baseKernel->getProgram()->getContextPtr()); lock.takeOwnership(mockAuxBuiltInOp, &context); EXPECT_TRUE(mockAuxBuiltInOp.baseKernel->getMultiDeviceKernel()->hasOwnership()); EXPECT_TRUE(mockAuxBuiltInOp.baseKernel->getProgram()->hasOwnership()); EXPECT_EQ(&context, mockAuxBuiltInOp.baseKernel->getProgram()->getContextPtr()); } EXPECT_FALSE(mockAuxBuiltInOp.baseKernel->getMultiDeviceKernel()->hasOwnership()); EXPECT_FALSE(mockAuxBuiltInOp.baseKernel->getProgram()->hasOwnership()); EXPECT_EQ(nullptr, mockAuxBuiltInOp.baseKernel->getProgram()->getContextPtr()); } TEST_F(BuiltInOwnershipWrapperTests, givenLockWithAcquiredOwnershipWhenTakeOwnershipCalledThenAbort) { MockAuxBuilInOp mockAuxBuiltInOp1(*pBuiltIns, *pClDevice); MockAuxBuilInOp mockAuxBuiltInOp2(*pBuiltIns, *pClDevice); MockContext context(pClDevice); BuiltInOwnershipWrapper lock(mockAuxBuiltInOp1, &context); EXPECT_THROW(lock.takeOwnership(mockAuxBuiltInOp1, &context), std::exception); EXPECT_THROW(lock.takeOwnership(mockAuxBuiltInOp2, &context), std::exception); } HWTEST_F(BuiltInOwnershipWrapperTests, givenBuiltInOwnershipWrapperWhenAskedForTypeTraitsThenDisableCopyConstructorAndOperator) { EXPECT_FALSE(std::is_copy_constructible::value); EXPECT_FALSE(std::is_copy_assignable::value); } compute-runtime-22.14.22890/opencl/test/unit_test/built_ins/built_in_tests_ocl.cpp000066400000000000000000000045471422164147700302210ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/built_ins/built_in_tests_shared.inl" #include "gtest/gtest.h" #include using namespace NEO; TEST(BuiltInTestsOcl, givenUseBindlessBuiltinInApiDependentModeWhenBinExtensionPassedThenNameHasBindfulPrefix) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.UseBindlessMode.set(-1); EBuiltInOps::Type builtin = EBuiltInOps::CopyBufferToBuffer; const std::string extension = ".bin"; const std::string platformName = "skl"; const uint32_t deviceRevId = 9; std::string resourceNameGeneric = createBuiltinResourceName(builtin, extension); std::string resourceNameForPlatform = createBuiltinResourceName(builtin, extension, platformName); std::string resourceNameForPlatformAndStepping = createBuiltinResourceName(builtin, extension, platformName, deviceRevId); std::string expectedResourceNameGeneric = "bindful_copy_buffer_to_buffer.builtin_kernel.bin"; std::string expectedResourceNameForPlatform = platformName.c_str(); expectedResourceNameForPlatform += "_0_bindful_copy_buffer_to_buffer.builtin_kernel.bin"; std::string expectedResourceNameForPlatformAndStepping = platformName.c_str(); expectedResourceNameForPlatformAndStepping += "_"; expectedResourceNameForPlatformAndStepping += std::to_string(deviceRevId).c_str(); expectedResourceNameForPlatformAndStepping += "_bindful_copy_buffer_to_buffer.builtin_kernel.bin"; EXPECT_EQ(0, strcmp(expectedResourceNameGeneric.c_str(), resourceNameGeneric.c_str())); EXPECT_EQ(0, strcmp(expectedResourceNameForPlatform.c_str(), resourceNameForPlatform.c_str())); EXPECT_EQ(0, strcmp(expectedResourceNameForPlatformAndStepping.c_str(), resourceNameForPlatformAndStepping.c_str())); } TEST(BuiltInTestsOcl, givenUseBindlessBuiltinDisabledInOclApiWhenBinExtensionPassedThenNameHasBindfulPrefix) { givenUseBindlessBuiltinDisabledWhenBinExtensionPassedThenNameHasBindfulPrefix(); } TEST(BuiltInTestsOcl, givenUseBindlessBuiltinEnabledInOclApiWhenBinExtensionPassedThenNameHasBindlessPrefix) { givenUseBindlessBuiltinEnabledWhenBinExtensionPassedThenNameHasBindlessPrefix(); } compute-runtime-22.14.22890/opencl/test/unit_test/built_ins/built_ins_file_names.cpp000066400000000000000000000016471422164147700305050ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include #include namespace NEO { std::vector builtInFileNames = { "test_files/aux_translation.builtin_kernel", "test_files/copy_buffer_to_buffer.builtin_kernel", "test_files/fill_buffer.builtin_kernel", "test_files/copy_buffer_rect.builtin_kernel", "test_files/copy_kernel_timestamps.builtin_kernel"}; std::vector imageBuiltInFileNames = { "test_files/fill_image1d.builtin_kernel", "test_files/fill_image2d.builtin_kernel", "test_files/fill_image3d.builtin_kernel", "test_files/copy_image_to_image1d.builtin_kernel", "test_files/copy_image_to_image2d.builtin_kernel", "test_files/copy_image_to_image3d.builtin_kernel", "test_files/copy_buffer_to_image3d.builtin_kernel", "test_files/copy_image3d_to_buffer.builtin_kernel"}; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/built_ins/built_ins_file_names.h000066400000000000000000000004721422164147700301450ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include #include #pragma once namespace NEO { std::vector getBuiltInFileNames(bool imagesSupport); std::string getBuiltInHashFileName(uint64_t hash, bool imagesSupport); } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/built_ins/get_built_ins_file_names.cpp000066400000000000000000000014341422164147700313360ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/built_ins/built_ins_file_names.h" namespace NEO { extern std::vector builtInFileNames; extern std::vector imageBuiltInFileNames; std::vector getBuiltInFileNames(bool imagesSupport) { auto vec = builtInFileNames; if (imagesSupport) { vec.insert(vec.end(), imageBuiltInFileNames.begin(), imageBuiltInFileNames.end()); } return vec; } std::string getBuiltInHashFileName(uint64_t hash, bool imagesSupport) { std::string hashName = "test_files/" + std::to_string(hash); if (imagesSupport) { hashName.append("_images"); } hashName.append(".cl"); return hashName; } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/built_ins/sip_tests.cpp000066400000000000000000000054241422164147700263450ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/source/built_ins/sip.h" #include "shared/test/common/helpers/test_files.h" #include "shared/test/common/libult/global_environment.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "gtest/gtest.h" using namespace NEO; namespace SipKernelTests { TEST(Sip, WhenGettingTypeThenCorrectTypeIsReturned) { std::vector ssaHeader; SipKernel csr{SipKernelType::Csr, nullptr, ssaHeader}; EXPECT_EQ(SipKernelType::Csr, csr.getType()); SipKernel dbgCsr{SipKernelType::DbgCsr, nullptr, ssaHeader}; EXPECT_EQ(SipKernelType::DbgCsr, dbgCsr.getType()); SipKernel dbgCsrLocal{SipKernelType::DbgCsrLocal, nullptr, ssaHeader}; EXPECT_EQ(SipKernelType::DbgCsrLocal, dbgCsrLocal.getType()); SipKernel undefined{SipKernelType::COUNT, nullptr, ssaHeader}; EXPECT_EQ(SipKernelType::COUNT, undefined.getType()); } TEST(Sip, givenDebuggingInactiveWhenSipTypeIsQueriedThenCsrSipTypeIsReturned) { auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); EXPECT_NE(nullptr, mockDevice); auto sipType = SipKernel::getSipKernelType(*mockDevice); EXPECT_EQ(SipKernelType::Csr, sipType); } TEST(DebugSip, givenDebuggingActiveWhenSipTypeIsQueriedThenDbgCsrSipTypeIsReturned) { auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); EXPECT_NE(nullptr, mockDevice); mockDevice->setDebuggerActive(true); auto sipType = SipKernel::getSipKernelType(*mockDevice); EXPECT_LE(SipKernelType::DbgCsr, sipType); } TEST(DebugSip, givenBuiltInsWhenDbgCsrSipIsRequestedThenCorrectSipKernelIsReturned) { auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); EXPECT_NE(nullptr, mockDevice); auto &builtins = *mockDevice->getBuiltIns(); auto &sipKernel = builtins.getSipKernel(SipKernelType::DbgCsr, *mockDevice); EXPECT_NE(nullptr, &sipKernel); EXPECT_EQ(SipKernelType::DbgCsr, sipKernel.getType()); } TEST(DebugBindlessSip, givenBindlessDebugSipIsRequestedThenCorrectSipKernelIsReturned) { auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); EXPECT_NE(nullptr, mockDevice); auto &sipKernel = NEO::SipKernel::getBindlessDebugSipKernel(*mockDevice); EXPECT_NE(nullptr, &sipKernel); EXPECT_EQ(SipKernelType::DbgBindless, sipKernel.getType()); EXPECT_FALSE(sipKernel.getStateSaveAreaHeader().empty()); } } // namespace SipKernelTests compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/000077500000000000000000000000001422164147700244515ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/CMakeLists.txt000066400000000000000000000141511422164147700272130ustar00rootroot00000000000000# # Copyright (C) 2018-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_command_queue ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/blit_enqueue_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/buffer_operations_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/command_enqueue_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_1_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_2_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_queue_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/csr_selection_args_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/dispatch_walker_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_barrier_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_command_without_kernel_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_copy_buffer_event_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_copy_buffer_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_copy_buffer_rect_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_copy_buffer_rect_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_copy_buffer_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_copy_buffer_to_image_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_copy_buffer_to_image_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_copy_image_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_copy_image_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_copy_image_to_buffer_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_copy_image_to_buffer_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_debug_kernel_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_fill_buffer_event_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_fill_buffer_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_fill_buffer_negative_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_fill_buffer_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_fill_image_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_fill_image_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_fixture.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_handler_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_kernel_1_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_kernel_2_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_kernel_event_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_kernel_global_offset_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_kernel_local_work_size_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_kernel_two_ioq_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_kernel_two_ooq_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_kernel_two_walker_ioq_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_kernel_two_walker_ooq_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_map_buffer_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_map_buffer_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_map_image_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_marker_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_media_kernel.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_migrate_mem_objects_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_buffer_event_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_buffer_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_buffer_rect_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_buffer_rect_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_buffer_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_image_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_image_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_resource_barier_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_svm_mem_copy_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_svm_mem_fill_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_svm_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_thread_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_unmap_memobject_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_waitlist_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_with_walker_partition_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_write_buffer_event_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_write_buffer_rect_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_write_buffer_rect_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_write_buffer_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_write_image_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_write_image_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/finish_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/flush_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/get_command_queue_info_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/get_size_required_buffer_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/get_size_required_image_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/get_size_required_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ioq_task_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/local_work_size_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/multi_dispatch_info_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/multiple_map_buffer_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/multiple_map_image_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/oom_buffer_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/oom_image_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/oom_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ooq_task_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/read_write_buffer_cpu_copy.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sync_buffer_handler_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/work_group_size_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/zero_size_enqueue_tests.cpp ) if(TESTS_XEHP_AND_LATER) list(APPEND IGDRCL_SRCS_tests_command_queue ${CMAKE_CURRENT_SOURCE_DIR}/dispatch_walker_tests_xehp_and_later.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_resource_barier_tests_xehp_and_later.cpp ) endif() if(TESTS_DG2_AND_LATER) list(APPEND IGDRCL_SRCS_tests_command_queue ${CMAKE_CURRENT_SOURCE_DIR}/dispatch_walker_tests_dg2_and_later.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_command_without_kernel_tests_dg2_and_later.cpp ) endif() if(TESTS_PVC_AND_LATER) list(APPEND IGDRCL_SRCS_tests_command_queue ${CMAKE_CURRENT_SOURCE_DIR}/command_queue_tests_pvc_and_later.cpp ) endif() target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_command_queue}) add_subdirectories() compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp000066400000000000000000002725571422164147700311020ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/local_memory_access_modes.h" #include "shared/source/helpers/pause_on_gpu_properties.h" #include "shared/source/helpers/vec.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/engine_descriptor_helper.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_timestamp_container.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/compiler_interface/linker_mock.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/event/user_event.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" namespace NEO { extern CommandStreamReceiverCreateFunc commandStreamReceiverFactory[2 * IGFX_MAX_CORE]; template struct BlitEnqueueTests : public ::testing::Test { class BcsMockContext : public MockContext { public: BcsMockContext(ClDevice *device) : MockContext(device) { bcsOsContext.reset(OsContext::create(nullptr, 0, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular}))); bcsCsr.reset(createCommandStream(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield())); bcsCsr->setupContext(*bcsOsContext); bcsCsr->initializeTagAllocation(); auto mockBlitMemoryToAllocation = [this](const Device &device, GraphicsAllocation *memory, size_t offset, const void *hostPtr, Vec3 size) -> BlitOperationResult { if (!device.getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported) { return BlitOperationResult::Unsupported; } auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::HostPtrToBuffer, *bcsCsr, memory, nullptr, hostPtr, memory->getGpuAddress(), 0, 0, 0, size, 0, 0, 0, 0); BlitPropertiesContainer container; container.push_back(blitProperties); bcsCsr->flushBcsTask(container, true, false, const_cast(device)); return BlitOperationResult::Success; }; blitMemoryToAllocationFuncBackup = mockBlitMemoryToAllocation; } std::unique_ptr bcsOsContext; std::unique_ptr bcsCsr; VariableBackup blitMemoryToAllocationFuncBackup{ &BlitHelperFunctions::blitMemoryToAllocation}; }; template void SetUpT() { if (is32bit) { GTEST_SKIP(); } REQUIRE_AUX_RESOLVES(); DebugManager.flags.EnableTimestampPacket.set(timestampPacketEnabled); DebugManager.flags.EnableBlitterForEnqueueOperations.set(1); DebugManager.flags.ForceAuxTranslationMode.set(static_cast(AuxTranslationMode::Blit)); DebugManager.flags.RenderCompressedBuffersEnabled.set(1); DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(1); DebugManager.flags.CsrDispatchMode.set(static_cast(DispatchMode::ImmediateDispatch)); DebugManager.flags.EnableLocalMemory.set(1); device = std::make_unique(MockClDevice::createWithNewExecutionEnvironment(nullptr)); auto &capabilityTable = device->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable; bool createBcsEngine = !capabilityTable.blitterOperationsSupported; capabilityTable.blitterOperationsSupported = true; if (!HwInfoConfig::get(defaultHwInfo->platform.eProductFamily)->isBlitterFullySupported(device->getHardwareInfo())) { GTEST_SKIP(); } if (createBcsEngine) { auto &engine = device->getEngine(getChosenEngineType(device->getHardwareInfo()), EngineUsage::LowPriority); bcsOsContext.reset(OsContext::create(nullptr, 1, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular}, device->getDeviceBitfield()))); engine.osContext = bcsOsContext.get(); engine.commandStreamReceiver->setupContext(*bcsOsContext); } bcsMockContext = std::make_unique(device.get()); auto mockCmdQueue = new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr); commandQueue.reset(mockCmdQueue); mockKernel = std::make_unique(*device, bcsMockContext.get()); auto mockProgram = mockKernel->mockProgram; mockProgram->setAllowNonUniform(true); gpgpuCsr = mockCmdQueue->gpgpuEngine->commandStreamReceiver; bcsCsr = mockCmdQueue->bcsEngines[0]->commandStreamReceiver; } template void TearDownT() {} template void setMockKernelArgs(std::array buffers) { for (uint32_t i = 0; i < buffers.size(); i++) { mockKernel->kernelInfo.addArgBuffer(i, 0); } mockKernel->mockKernel->initialize(); EXPECT_TRUE(mockKernel->mockKernel->auxTranslationRequired); for (uint32_t i = 0; i < buffers.size(); i++) { cl_mem clMem = buffers[i]; mockKernel->mockKernel->setArgBuffer(i, sizeof(cl_mem *), &clMem); } } template void setMockKernelArgs(std::array allocs) { for (uint32_t i = 0; i < allocs.size(); i++) { mockKernel->kernelInfo.addArgBuffer(i, 0); } mockKernel->mockKernel->initialize(); EXPECT_TRUE(mockKernel->mockKernel->auxTranslationRequired); for (uint32_t i = 0; i < allocs.size(); i++) { auto alloc = allocs[i]; auto ptr = reinterpret_cast(alloc->getGpuAddressToPatch()); mockKernel->mockKernel->setArgSvmAlloc(i, ptr, alloc, 0u); } } ReleaseableObjectPtr createBuffer(size_t size, bool compressed) { auto buffer = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, size, nullptr, retVal)); auto graphicsAllocation = buffer->getGraphicsAllocation(device->getRootDeviceIndex()); setAllocationType(graphicsAllocation, compressed); return buffer; } MockGraphicsAllocation *createGfxAllocation(size_t size, bool compressed) { auto alloc = new MockGraphicsAllocation(nullptr, size); setAllocationType(alloc, compressed); return alloc; } void setAllocationType(GraphicsAllocation *graphicsAllocation, bool compressed) { graphicsAllocation->setAllocationType(AllocationType::BUFFER); if (compressed && !graphicsAllocation->getDefaultGmm()) { auto clientContext = device->getRootDeviceEnvironment().getGmmClientContext(); graphicsAllocation->setDefaultGmm(new Gmm(clientContext, nullptr, 0, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); } if (graphicsAllocation->getDefaultGmm()) { graphicsAllocation->getDefaultGmm()->isCompressionEnabled = compressed; } } template GenCmdList getCmdList(LinearStream &linearStream, size_t offset) { HardwareParse hwParser; hwParser.parseCommands(linearStream, offset); return hwParser.cmdList; } template GenCmdList::iterator expectPipeControl(GenCmdList::iterator itorStart, GenCmdList::iterator itorEnd) { using PIPE_CONTROL = typename Family::PIPE_CONTROL; PIPE_CONTROL *pipeControlCmd = nullptr; GenCmdList::iterator commandItor = itorStart; bool stallingWrite = false; do { commandItor = find(commandItor, itorEnd); if (itorEnd == commandItor) { EXPECT_TRUE(false); return itorEnd; } pipeControlCmd = genCmdCast(*commandItor); stallingWrite = pipeControlCmd->getPostSyncOperation() == PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA && pipeControlCmd->getCommandStreamerStallEnable(); ++commandItor; } while (!stallingWrite); return --commandItor; } template GenCmdList::iterator expectMiFlush(GenCmdList::iterator itorStart, GenCmdList::iterator itorEnd) { Family *miFlushCmd = nullptr; GenCmdList::iterator commandItor = itorStart; bool miFlushWithMemoryWrite = false; do { commandItor = find(commandItor, itorEnd); if (itorEnd == commandItor) { EXPECT_TRUE(false); return itorEnd; } miFlushCmd = genCmdCast(*commandItor); miFlushWithMemoryWrite = miFlushCmd->getDestinationAddress() != 0; ++commandItor; } while (!miFlushWithMemoryWrite); return --commandItor; } template GenCmdList::iterator expectCommand(GenCmdList::iterator itorStart, GenCmdList::iterator itorEnd) { auto commandItor = find(itorStart, itorEnd); EXPECT_TRUE(commandItor != itorEnd); return commandItor; } template void expectNoCommand(GenCmdList::iterator itorStart, GenCmdList::iterator itorEnd) { auto commandItor = find(itorStart, itorEnd); EXPECT_TRUE(commandItor == itorEnd); } template void verifySemaphore(GenCmdList::iterator &semaphoreItor, uint64_t expectedAddress) { using MI_SEMAPHORE_WAIT = typename Family::MI_SEMAPHORE_WAIT; auto semaphoreCmd = genCmdCast(*semaphoreItor); EXPECT_EQ(expectedAddress, semaphoreCmd->getSemaphoreGraphicsAddress()); } DebugManagerStateRestore restore; std::unique_ptr bcsOsContext; std::unique_ptr device; std::unique_ptr bcsMockContext; std::unique_ptr commandQueue; std::unique_ptr mockKernel; CommandStreamReceiver *bcsCsr = nullptr; CommandStreamReceiver *gpgpuCsr = nullptr; size_t gws[3] = {63, 0, 0}; size_t lws[3] = {16, 0, 0}; uint32_t hostPtr = 0; cl_int retVal = CL_SUCCESS; }; using BlitAuxTranslationTests = BlitEnqueueTests<1>; HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitAuxTranslationWhenConstructingCommandBufferThenEnsureCorrectOrder) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; auto buffer0 = createBuffer(1, true); auto buffer1 = createBuffer(1, false); auto buffer2 = createBuffer(1, true); setMockKernelArgs(std::array{{buffer0.get(), buffer1.get(), buffer2.get()}}); auto mockCmdQ = static_cast *>(commandQueue.get()); auto initialBcsTaskCount = mockCmdQ->peekBcsTaskCount(bcsCsr->getOsContext().getEngineType()); mockCmdQ->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, lws, 0, nullptr, nullptr); EXPECT_EQ(initialBcsTaskCount + 1, mockCmdQ->peekBcsTaskCount(bcsCsr->getOsContext().getEngineType())); // Gpgpu command buffer { auto cmdListCsr = getCmdList(gpgpuCsr->getCS(0), 0); auto cmdListQueue = getCmdList(commandQueue->getCS(0), 0); // Barrier expectPipeControl(cmdListCsr.begin(), cmdListCsr.end()); // Aux to NonAux auto cmdFound = expectCommand(cmdListQueue.begin(), cmdListQueue.end()); cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); // Walker cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); // NonAux to Aux cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); // task count expectPipeControl(++cmdFound, cmdListQueue.end()); } // BCS command buffer { auto cmdList = getCmdList(bcsCsr->getCS(0), 0); // Barrier auto cmdFound = expectCommand(cmdList.begin(), cmdList.end()); // Aux to NonAux cmdFound = expectCommand(++cmdFound, cmdList.end()); cmdFound = expectCommand(++cmdFound, cmdList.end()); cmdFound = expectCommand(++cmdFound, cmdList.end()); cmdFound = expectCommand(++cmdFound, cmdList.end()); // wait for NDR (walker split) cmdFound = expectCommand(++cmdFound, cmdList.end()); cmdFound = expectCommand(++cmdFound, cmdList.end()); // NonAux to Aux cmdFound = expectCommand(++cmdFound, cmdList.end()); cmdFound = expectCommand(++cmdFound, cmdList.end()); cmdFound = expectCommand(++cmdFound, cmdList.end()); cmdFound = expectCommand(++cmdFound, cmdList.end()); // taskCount expectCommand(++cmdFound, cmdList.end()); } } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitAuxTranslationWhenConstructingBlockedCommandBufferThenEnsureCorrectOrder) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; auto buffer0 = createBuffer(1, true); auto buffer1 = createBuffer(1, false); auto buffer2 = createBuffer(1, true); setMockKernelArgs(std::array{{buffer0.get(), buffer1.get(), buffer2.get()}}); auto mockCmdQ = static_cast *>(commandQueue.get()); auto initialBcsTaskCount = mockCmdQ->peekBcsTaskCount(bcsCsr->getOsContext().getEngineType()); UserEvent userEvent; cl_event waitlist[] = {&userEvent}; mockCmdQ->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, lws, 1, waitlist, nullptr); userEvent.setStatus(CL_COMPLETE); EXPECT_EQ(initialBcsTaskCount + 1, mockCmdQ->peekBcsTaskCount(bcsCsr->getOsContext().getEngineType())); // Gpgpu command buffer { auto cmdListCsr = getCmdList(gpgpuCsr->getCS(0), 0); auto ultCsr = static_cast *>(gpgpuCsr); auto cmdListQueue = getCmdList(*ultCsr->lastFlushedCommandStream, 0); // Barrier expectPipeControl(cmdListCsr.begin(), cmdListCsr.end()); // Aux to NonAux auto cmdFound = expectCommand(cmdListQueue.begin(), cmdListQueue.end()); cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); // Walker cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); // NonAux to Aux cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); // task count expectPipeControl(++cmdFound, cmdListQueue.end()); } // BCS command buffer { auto cmdList = getCmdList(bcsCsr->getCS(0), 0); // Barrier auto cmdFound = expectCommand(cmdList.begin(), cmdList.end()); // Aux to NonAux cmdFound = expectCommand(++cmdFound, cmdList.end()); cmdFound = expectCommand(++cmdFound, cmdList.end()); cmdFound = expectCommand(++cmdFound, cmdList.end()); cmdFound = expectCommand(++cmdFound, cmdList.end()); // wait for NDR (walker split) cmdFound = expectCommand(++cmdFound, cmdList.end()); cmdFound = expectCommand(++cmdFound, cmdList.end()); // NonAux to Aux cmdFound = expectCommand(++cmdFound, cmdList.end()); cmdFound = expectCommand(++cmdFound, cmdList.end()); cmdFound = expectCommand(++cmdFound, cmdList.end()); cmdFound = expectCommand(++cmdFound, cmdList.end()); // taskCount expectCommand(++cmdFound, cmdList.end()); } EXPECT_FALSE(mockCmdQ->isQueueBlocked()); } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitTranslationWhenConstructingCommandBufferThenSynchronizeBarrier) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto buffer = createBuffer(1, true); setMockKernelArgs(std::array{{buffer.get()}}); commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); auto cmdListCsr = getCmdList(gpgpuCsr->getCS(0), 0); auto pipeControl = expectPipeControl(cmdListCsr.begin(), cmdListCsr.end()); auto pipeControlCmd = genCmdCast(*pipeControl); uint64_t barrierGpuAddress = NEO::UnitTestHelper::getPipeControlPostSyncAddress(*pipeControlCmd); auto cmdList = getCmdList(bcsCsr->getCS(0), 0); auto semaphore = expectCommand(cmdList.begin(), cmdList.end()); verifySemaphore(semaphore, barrierGpuAddress); } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, whenFlushTagUpdateThenMiFlushDwIsFlushed) { using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; bcsCsr->flushTagUpdate(); auto cmdListBcs = getCmdList(bcsCsr->getCS(0), 0); auto cmdFound = expectCommand(cmdListBcs.begin(), cmdListBcs.end()); EXPECT_NE(cmdFound, cmdListBcs.end()); } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenNonDefaultBcsWhenFlushNonKernelTaskThenMiFlushDwIsFlushed) { using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; std::unique_ptr bcs3Context(OsContext::create(nullptr, 1, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS3, EngineUsage::Regular}, device->getDeviceBitfield()))); bcsCsr->setupContext(*bcs3Context); NEO::PipeControlArgs args; bcsCsr->flushNonKernelTask(nullptr, 0, 0, args, false, false, false); auto cmdListBcs = getCmdList(bcsCsr->getCS(0), 0); auto cmdFound = expectCommand(cmdListBcs.begin(), cmdListBcs.end()); EXPECT_NE(cmdFound, cmdListBcs.end()); bcsCsr->setupContext(*bcsOsContext); } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitTranslationWhenConstructingCommandBufferThenSynchronizeBcsOutput) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using WALKER_TYPE = typename FamilyType::WALKER_TYPE; auto buffer0 = createBuffer(1, true); auto buffer1 = createBuffer(1, true); setMockKernelArgs(std::array{{buffer0.get(), buffer1.get()}}); commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); uint64_t auxToNonAuxOutputAddress[2] = {}; uint64_t nonAuxToAuxOutputAddress[2] = {}; { auto cmdListBcs = getCmdList(bcsCsr->getCS(0), 0); auto cmdFound = expectCommand(cmdListBcs.begin(), cmdListBcs.end()); cmdFound = expectMiFlush(++cmdFound, cmdListBcs.end()); auto miflushDwCmd = genCmdCast(*cmdFound); auxToNonAuxOutputAddress[0] = miflushDwCmd->getDestinationAddress(); cmdFound = expectMiFlush(++cmdFound, cmdListBcs.end()); miflushDwCmd = genCmdCast(*cmdFound); auxToNonAuxOutputAddress[1] = miflushDwCmd->getDestinationAddress(); cmdFound = expectCommand(++cmdFound, cmdListBcs.end()); cmdFound = expectMiFlush(++cmdFound, cmdListBcs.end()); miflushDwCmd = genCmdCast(*cmdFound); nonAuxToAuxOutputAddress[0] = miflushDwCmd->getDestinationAddress(); cmdFound = expectMiFlush(++cmdFound, cmdListBcs.end()); miflushDwCmd = genCmdCast(*cmdFound); nonAuxToAuxOutputAddress[1] = miflushDwCmd->getDestinationAddress(); } { auto cmdListQueue = getCmdList(commandQueue->getCS(0), 0); // Aux to NonAux auto cmdFound = expectCommand(cmdListQueue.begin(), cmdListQueue.end()); verifySemaphore(cmdFound, auxToNonAuxOutputAddress[0]); cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); verifySemaphore(cmdFound, auxToNonAuxOutputAddress[1]); // Walker cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); // NonAux to Aux cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); verifySemaphore(cmdFound, nonAuxToAuxOutputAddress[0]); cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); verifySemaphore(cmdFound, nonAuxToAuxOutputAddress[1]); } } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitTranslationWhenConstructingCommandBufferThenSynchronizeKernel) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto buffer = createBuffer(1, true); setMockKernelArgs(std::array{{buffer.get()}}); auto mockCmdQ = static_cast *>(commandQueue.get()); mockCmdQ->overrideIsCacheFlushForBcsRequired.enabled = true; mockCmdQ->overrideIsCacheFlushForBcsRequired.returnValue = false; mockCmdQ->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); auto kernelNode = mockCmdQ->timestampPacketContainer->peekNodes()[0]; auto kernelNodeAddress = TimestampPacketHelper::getContextEndGpuAddress(*kernelNode); auto cmdList = getCmdList(bcsCsr->getCS(0), 0); // Aux to nonAux auto cmdFound = expectCommand(cmdList.begin(), cmdList.end()); // semaphore before NonAux to Aux auto semaphore = expectCommand(++cmdFound, cmdList.end()); verifySemaphore(semaphore, kernelNodeAddress); } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitTranslationWhenConstructingCommandBufferThenSynchronizeCacheFlush) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using WALKER_TYPE = typename FamilyType::WALKER_TYPE; auto buffer = createBuffer(1, true); setMockKernelArgs(std::array{{buffer.get()}}); auto mockCmdQ = static_cast *>(commandQueue.get()); mockCmdQ->overrideIsCacheFlushForBcsRequired.enabled = true; mockCmdQ->overrideIsCacheFlushForBcsRequired.returnValue = true; mockCmdQ->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); auto cmdListBcs = getCmdList(bcsCsr->getCS(0), 0); auto cmdListQueue = getCmdList(mockCmdQ->getCS(0), 0); uint64_t cacheFlushWriteAddress = 0; { auto cmdFound = expectCommand(cmdListQueue.begin(), cmdListQueue.end()); cmdFound = expectPipeControl(++cmdFound, cmdListQueue.end()); auto pipeControlCmd = genCmdCast(*cmdFound); if (!pipeControlCmd->getDcFlushEnable()) { // skip pipe control with TimestampPacket write cmdFound = expectPipeControl(++cmdFound, cmdListQueue.end()); pipeControlCmd = genCmdCast(*cmdFound); } EXPECT_TRUE(pipeControlCmd->getDcFlushEnable()); EXPECT_TRUE(pipeControlCmd->getCommandStreamerStallEnable()); cacheFlushWriteAddress = NEO::UnitTestHelper::getPipeControlPostSyncAddress(*pipeControlCmd); EXPECT_NE(0u, cacheFlushWriteAddress); } { // Aux to nonAux auto cmdFound = expectCommand(cmdListBcs.begin(), cmdListBcs.end()); // semaphore before NonAux to Aux cmdFound = expectCommand(++cmdFound, cmdListBcs.end()); verifySemaphore(cmdFound, cacheFlushWriteAddress); } } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitTranslationWhenConstructingCommandBufferThenSynchronizeEvents) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto buffer = createBuffer(1, true); setMockKernelArgs(std::array{{buffer.get()}}); auto event = make_releaseable(commandQueue.get(), CL_COMMAND_READ_BUFFER, 0, 0); MockTimestampPacketContainer eventDependencyContainer(*bcsCsr->getTimestampPacketAllocator(), 1); auto eventDependency = eventDependencyContainer.getNode(0); event->addTimestampPacketNodes(eventDependencyContainer); cl_event clEvent[] = {event.get()}; commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 1, clEvent, nullptr); auto eventDependencyAddress = TimestampPacketHelper::getContextEndGpuAddress(*eventDependency); auto cmdList = getCmdList(bcsCsr->getCS(0), 0); // Barrier auto cmdFound = expectCommand(cmdList.begin(), cmdList.end()); // Event auto semaphore = expectCommand(++cmdFound, cmdList.end()); verifySemaphore(semaphore, eventDependencyAddress); cmdFound = expectCommand(++semaphore, cmdList.end()); expectCommand(++cmdFound, cmdList.end()); } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenOutEventWhenDispatchingThenAssignNonAuxNodes) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto buffer0 = createBuffer(1, true); auto buffer1 = createBuffer(1, false); auto buffer2 = createBuffer(1, true); setMockKernelArgs(std::array{{buffer0.get(), buffer1.get(), buffer2.get()}}); cl_event clEvent; commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &clEvent); auto event = castToObject(clEvent); auto &eventNodes = event->getTimestampPacketNodes()->peekNodes(); EXPECT_EQ(5u, eventNodes.size()); auto cmdListQueue = getCmdList(commandQueue->getCS(0), 0); auto cmdFound = expectCommand(cmdListQueue.begin(), cmdListQueue.end()); // NonAux to Aux cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); auto eventNodeAddress = TimestampPacketHelper::getContextEndGpuAddress(*eventNodes[1]); verifySemaphore(cmdFound, eventNodeAddress); cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); eventNodeAddress = TimestampPacketHelper::getContextEndGpuAddress(*eventNodes[2]); verifySemaphore(cmdFound, eventNodeAddress); EXPECT_NE(0u, event->peekBcsTaskCountFromCommandQueue()); clReleaseEvent(clEvent); } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitAuxTranslationWhenDispatchingThenEstimateCmdBufferSize) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto &hwInfo = device->getHardwareInfo(); auto mockCmdQ = static_cast *>(commandQueue.get()); mockCmdQ->overrideIsCacheFlushForBcsRequired.enabled = true; mockCmdQ->overrideIsCacheFlushForBcsRequired.returnValue = false; auto buffer0 = createBuffer(1, true); auto buffer1 = createBuffer(1, false); auto buffer2 = createBuffer(1, true); KernelObjsForAuxTranslation kernelObjects; kernelObjects.insert({KernelObjForAuxTranslation::Type::MEM_OBJ, buffer0.get()}); kernelObjects.insert({KernelObjForAuxTranslation::Type::MEM_OBJ, buffer2.get()}); size_t numBuffersToEstimate = 2; size_t dependencySize = numBuffersToEstimate * TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue(); setMockKernelArgs(std::array{{buffer0.get(), buffer1.get(), buffer2.get()}}); mockCmdQ->storeMultiDispatchInfo = true; mockCmdQ->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, lws, 0, nullptr, nullptr); MultiDispatchInfo &multiDispatchInfo = mockCmdQ->storedMultiDispatchInfo; DispatchInfo *firstDispatchInfo = multiDispatchInfo.begin(); DispatchInfo *lastDispatchInfo = &(*multiDispatchInfo.rbegin()); EXPECT_NE(firstDispatchInfo, lastDispatchInfo); // walker split EXPECT_EQ(dependencySize, firstDispatchInfo->dispatchInitCommands.estimateCommandsSize(kernelObjects.size(), hwInfo, mockCmdQ->isCacheFlushForBcsRequired())); EXPECT_EQ(0u, firstDispatchInfo->dispatchEpilogueCommands.estimateCommandsSize(kernelObjects.size(), hwInfo, mockCmdQ->isCacheFlushForBcsRequired())); EXPECT_EQ(0u, lastDispatchInfo->dispatchInitCommands.estimateCommandsSize(kernelObjects.size(), hwInfo, mockCmdQ->isCacheFlushForBcsRequired())); EXPECT_EQ(dependencySize, lastDispatchInfo->dispatchEpilogueCommands.estimateCommandsSize(kernelObjects.size(), hwInfo, mockCmdQ->isCacheFlushForBcsRequired())); } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitAuxTranslationWithRequiredCacheFlushWhenDispatchingThenEstimateCmdBufferSize) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto &hwInfo = device->getHardwareInfo(); auto mockCmdQ = static_cast *>(commandQueue.get()); mockCmdQ->overrideIsCacheFlushForBcsRequired.enabled = true; mockCmdQ->overrideIsCacheFlushForBcsRequired.returnValue = true; auto buffer0 = createBuffer(1, true); auto buffer1 = createBuffer(1, false); auto buffer2 = createBuffer(1, true); KernelObjsForAuxTranslation kernelObjects; kernelObjects.insert({KernelObjForAuxTranslation::Type::MEM_OBJ, buffer0.get()}); kernelObjects.insert({KernelObjForAuxTranslation::Type::MEM_OBJ, buffer2.get()}); size_t numBuffersToEstimate = 2; size_t dependencySize = numBuffersToEstimate * TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue(); size_t cacheFlushSize = MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo); setMockKernelArgs(std::array{{buffer0.get(), buffer1.get(), buffer2.get()}}); mockCmdQ->storeMultiDispatchInfo = true; mockCmdQ->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, lws, 0, nullptr, nullptr); MultiDispatchInfo &multiDispatchInfo = mockCmdQ->storedMultiDispatchInfo; DispatchInfo *firstDispatchInfo = multiDispatchInfo.begin(); DispatchInfo *lastDispatchInfo = &(*multiDispatchInfo.rbegin()); EXPECT_NE(firstDispatchInfo, lastDispatchInfo); // walker split EXPECT_EQ(dependencySize, firstDispatchInfo->dispatchInitCommands.estimateCommandsSize(kernelObjects.size(), hwInfo, mockCmdQ->isCacheFlushForBcsRequired())); EXPECT_EQ(0u, firstDispatchInfo->dispatchEpilogueCommands.estimateCommandsSize(kernelObjects.size(), hwInfo, mockCmdQ->isCacheFlushForBcsRequired())); EXPECT_EQ(0u, lastDispatchInfo->dispatchInitCommands.estimateCommandsSize(kernelObjects.size(), hwInfo, mockCmdQ->isCacheFlushForBcsRequired())); EXPECT_EQ(dependencySize + cacheFlushSize, lastDispatchInfo->dispatchEpilogueCommands.estimateCommandsSize(kernelObjects.size(), hwInfo, mockCmdQ->isCacheFlushForBcsRequired())); } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitTranslationWhenConstructingBlockedCommandBufferThenSynchronizeBarrier) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto buffer = createBuffer(1, true); setMockKernelArgs(std::array{{buffer.get()}}); UserEvent userEvent; cl_event waitlist[] = {&userEvent}; commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 1, waitlist, nullptr); userEvent.setStatus(CL_COMPLETE); auto cmdListCsr = getCmdList(gpgpuCsr->getCS(0), 0); auto pipeControl = expectPipeControl(cmdListCsr.begin(), cmdListCsr.end()); auto pipeControlCmd = genCmdCast(*pipeControl); uint64_t barrierGpuAddress = NEO::UnitTestHelper::getPipeControlPostSyncAddress(*pipeControlCmd); auto cmdList = getCmdList(bcsCsr->getCS(0), 0); auto semaphore = expectCommand(cmdList.begin(), cmdList.end()); verifySemaphore(semaphore, barrierGpuAddress); EXPECT_FALSE(commandQueue->isQueueBlocked()); } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitTranslationWhenConstructingBlockedCommandBufferThenSynchronizeEvents) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto buffer = createBuffer(1, true); setMockKernelArgs(std::array{{buffer.get()}}); auto event = make_releaseable(commandQueue.get(), CL_COMMAND_READ_BUFFER, 0, 0); MockTimestampPacketContainer eventDependencyContainer(*bcsCsr->getTimestampPacketAllocator(), 1); auto eventDependency = eventDependencyContainer.getNode(0); event->addTimestampPacketNodes(eventDependencyContainer); UserEvent userEvent; cl_event waitlist[] = {&userEvent, event.get()}; commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 2, waitlist, nullptr); userEvent.setStatus(CL_COMPLETE); auto eventDependencyAddress = TimestampPacketHelper::getContextEndGpuAddress(*eventDependency); auto cmdList = getCmdList(bcsCsr->getCS(0), 0); // Barrier auto cmdFound = expectCommand(cmdList.begin(), cmdList.end()); // Event auto semaphore = expectCommand(++cmdFound, cmdList.end()); verifySemaphore(semaphore, eventDependencyAddress); cmdFound = expectCommand(++semaphore, cmdList.end()); expectCommand(++cmdFound, cmdList.end()); EXPECT_FALSE(commandQueue->isQueueBlocked()); } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitTranslationWhenConstructingBlockedCommandBufferThenSynchronizeKernel) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto buffer = createBuffer(1, true); setMockKernelArgs(std::array{{buffer.get()}}); auto mockCmdQ = static_cast *>(commandQueue.get()); UserEvent userEvent; cl_event waitlist[] = {&userEvent}; mockCmdQ->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 1, waitlist, nullptr); userEvent.setStatus(CL_COMPLETE); auto kernelNode = mockCmdQ->timestampPacketContainer->peekNodes()[0]; auto kernelNodeAddress = TimestampPacketHelper::getContextEndGpuAddress(*kernelNode); auto cmdList = getCmdList(bcsCsr->getCS(0), 0); // Aux to nonAux auto cmdFound = expectCommand(cmdList.begin(), cmdList.end()); // semaphore before NonAux to Aux auto semaphore = expectCommand(++cmdFound, cmdList.end()); if (mockCmdQ->isCacheFlushForBcsRequired()) { semaphore = expectCommand(++semaphore, cmdList.end()); } verifySemaphore(semaphore, kernelNodeAddress); EXPECT_FALSE(commandQueue->isQueueBlocked()); } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitTranslationWhenConstructingBlockedCommandBufferThenSynchronizeBcsOutput) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using WALKER_TYPE = typename FamilyType::WALKER_TYPE; auto buffer0 = createBuffer(1, true); auto buffer1 = createBuffer(1, true); setMockKernelArgs(std::array{{buffer0.get(), buffer1.get()}}); UserEvent userEvent; cl_event waitlist[] = {&userEvent}; commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 1, waitlist, nullptr); userEvent.setStatus(CL_COMPLETE); uint64_t auxToNonAuxOutputAddress[2] = {}; uint64_t nonAuxToAuxOutputAddress[2] = {}; { auto cmdListBcs = getCmdList(bcsCsr->getCS(0), 0); auto cmdFound = expectCommand(cmdListBcs.begin(), cmdListBcs.end()); cmdFound = expectMiFlush(++cmdFound, cmdListBcs.end()); auto miflushDwCmd = genCmdCast(*cmdFound); auxToNonAuxOutputAddress[0] = miflushDwCmd->getDestinationAddress(); cmdFound = expectMiFlush(++cmdFound, cmdListBcs.end()); miflushDwCmd = genCmdCast(*cmdFound); auxToNonAuxOutputAddress[1] = miflushDwCmd->getDestinationAddress(); cmdFound = expectCommand(++cmdFound, cmdListBcs.end()); cmdFound = expectMiFlush(++cmdFound, cmdListBcs.end()); miflushDwCmd = genCmdCast(*cmdFound); nonAuxToAuxOutputAddress[0] = miflushDwCmd->getDestinationAddress(); cmdFound = expectMiFlush(++cmdFound, cmdListBcs.end()); miflushDwCmd = genCmdCast(*cmdFound); nonAuxToAuxOutputAddress[1] = miflushDwCmd->getDestinationAddress(); } { auto ultCsr = static_cast *>(gpgpuCsr); auto cmdListQueue = getCmdList(*ultCsr->lastFlushedCommandStream, 0); // Aux to NonAux auto cmdFound = expectCommand(cmdListQueue.begin(), cmdListQueue.end()); verifySemaphore(cmdFound, auxToNonAuxOutputAddress[0]); cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); verifySemaphore(cmdFound, auxToNonAuxOutputAddress[1]); // Walker cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); // NonAux to Aux cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); verifySemaphore(cmdFound, nonAuxToAuxOutputAddress[0]); cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); verifySemaphore(cmdFound, nonAuxToAuxOutputAddress[1]); } EXPECT_FALSE(commandQueue->isQueueBlocked()); } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitTranslationWhenEnqueueIsCalledThenDoImplicitFlushOnGpgpuCsr) { auto buffer = createBuffer(1, true); setMockKernelArgs(std::array{{buffer.get()}}); auto ultCsr = static_cast *>(gpgpuCsr); EXPECT_EQ(0u, ultCsr->taskCount); commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(1u, ultCsr->taskCount); EXPECT_TRUE(ultCsr->recordedDispatchFlags.implicitFlush); } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitTranslationOnGfxAllocationWhenEnqueueIsCalledThenDoImplicitFlushOnGpgpuCsr) { auto gfxAllocation = createGfxAllocation(1, true); setMockKernelArgs(std::array{{gfxAllocation}}); auto ultCsr = static_cast *>(gpgpuCsr); EXPECT_EQ(0u, ultCsr->taskCount); commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(1u, ultCsr->taskCount); EXPECT_TRUE(ultCsr->recordedDispatchFlags.implicitFlush); device->getMemoryManager()->freeGraphicsMemory(gfxAllocation); } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenCacheFlushRequiredWhenHandlingDependenciesThenPutAllNodesToDeferredList) { DebugManager.flags.ForceCacheFlushForBcs.set(1); auto gfxAllocation = createGfxAllocation(1, true); setMockKernelArgs(std::array{{gfxAllocation}}); TimestampPacketContainer *deferredTimestampPackets = static_cast *>(commandQueue.get())->deferredTimestampPackets.get(); EXPECT_EQ(0u, deferredTimestampPackets->peekNodes().size()); commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(4u, deferredTimestampPackets->peekNodes().size()); // Barrier, CacheFlush, AuxToNonAux, NonAuxToAux device->getMemoryManager()->freeGraphicsMemory(gfxAllocation); } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenCacheFlushRequiredWhenHandlingDependenciesForBlockedEnqueueThenPutAllNodesToDeferredList) { DebugManager.flags.ForceCacheFlushForBcs.set(1); auto gfxAllocation = createGfxAllocation(1, true); setMockKernelArgs(std::array{{gfxAllocation}}); TimestampPacketContainer *deferredTimestampPackets = static_cast *>(commandQueue.get())->deferredTimestampPackets.get(); UserEvent userEvent; cl_event waitlist[] = {&userEvent}; commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 1, waitlist, nullptr); EXPECT_EQ(0u, deferredTimestampPackets->peekNodes().size()); userEvent.setStatus(CL_COMPLETE); EXPECT_FALSE(commandQueue->isQueueBlocked()); EXPECT_EQ(4u, deferredTimestampPackets->peekNodes().size()); // Barrier, CacheFlush, AuxToNonAux, NonAuxToAux device->getMemoryManager()->freeGraphicsMemory(gfxAllocation); } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenTerminatedLatestEnqueuedTaskWhenHandlingDependenciesForBlockedEnqueueThenDoNotPutAllNodesToDeferredListAndSetTimestampData) { DebugManager.flags.ForceCacheFlushForBcs.set(1); auto gfxAllocation = createGfxAllocation(1, true); setMockKernelArgs(std::array{{gfxAllocation}}); TimestampPacketContainer *deferredTimestampPackets = static_cast *>(commandQueue.get())->deferredTimestampPackets.get(); TimestampPacketContainer *timestampPacketContainer = static_cast *>(commandQueue.get())->timestampPacketContainer.get(); UserEvent userEvent; cl_event waitlist[] = {&userEvent}; commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 1, waitlist, nullptr); EXPECT_EQ(0u, deferredTimestampPackets->peekNodes().size()); userEvent.setStatus(-1); EXPECT_FALSE(commandQueue->isQueueBlocked()); EXPECT_EQ(0u, deferredTimestampPackets->peekNodes().size()); EXPECT_EQ(timestampPacketContainer->peekNodes()[0]->getContextEndValue(0u), 0xffffffff); device->getMemoryManager()->freeGraphicsMemory(gfxAllocation); } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenTerminatedTaskWhenHandlingDependenciesForBlockedEnqueueThenDoNotPutAllNodesToDeferredListAndDoNotSetTimestampData) { DebugManager.flags.ForceCacheFlushForBcs.set(1); auto gfxAllocation = createGfxAllocation(1, true); setMockKernelArgs(std::array{{gfxAllocation}}); TimestampPacketContainer *deferredTimestampPackets = static_cast *>(commandQueue.get())->deferredTimestampPackets.get(); TimestampPacketContainer *timestampPacketContainer = static_cast *>(commandQueue.get())->timestampPacketContainer.get(); UserEvent userEvent; [[maybe_unused]] UserEvent *ue = &userEvent; cl_event waitlist[] = {&userEvent}; UserEvent userEvent1; [[maybe_unused]] UserEvent *ue1 = &userEvent1; cl_event waitlist1[] = {&userEvent1}; commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 1, waitlist, nullptr); commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 1, waitlist1, nullptr); EXPECT_EQ(0u, deferredTimestampPackets->peekNodes().size()); userEvent.setStatus(-1); EXPECT_EQ(0u, deferredTimestampPackets->peekNodes().size()); EXPECT_EQ(1u, timestampPacketContainer->peekNodes().size()); EXPECT_EQ(timestampPacketContainer->peekNodes()[0]->getContextEndValue(0u), 1u); userEvent1.setStatus(-1); EXPECT_FALSE(commandQueue->isQueueBlocked()); EXPECT_EQ(0u, deferredTimestampPackets->peekNodes().size()); EXPECT_EQ(1u, timestampPacketContainer->peekNodes().size()); EXPECT_EQ(timestampPacketContainer->peekNodes()[0]->getContextEndValue(0u), 0xffffffff); device->getMemoryManager()->freeGraphicsMemory(gfxAllocation); } using BlitEnqueueWithNoTimestampPacketTests = BlitEnqueueTests<0>; HWTEST_TEMPLATED_F(BlitEnqueueWithNoTimestampPacketTests, givenNoTimestampPacketsWritewhenEnqueueingBlitOperationThenEnginesAreSynchronized) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; using WALKER_TYPE = typename FamilyType::WALKER_TYPE; const size_t bufferSize = 1u; auto buffer = createBuffer(bufferSize, false); auto ultCsr = static_cast *>(gpgpuCsr); ASSERT_EQ(0u, ultCsr->taskCount); setMockKernelArgs(std::array{{buffer.get()}}); commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); char cpuBuffer[bufferSize]{}; commandQueue->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, bufferSize, cpuBuffer, nullptr, 0, nullptr, nullptr); commandQueue->finish(); auto bcsCommands = getCmdList(bcsCsr->getCS(0), 0); auto ccsCommands = getCmdList(commandQueue->getCS(0), 0); auto cmdFound = expectCommand(bcsCommands.begin(), bcsCommands.end()); cmdFound = expectMiFlush(cmdFound++, bcsCommands.end()); cmdFound = expectCommand(ccsCommands.begin(), ccsCommands.end()); expectNoCommand(cmdFound++, ccsCommands.end()); } struct BlitEnqueueWithDebugCapabilityTests : public BlitEnqueueTests<0> { template void findSemaphores(GenCmdList &cmdList) { auto semaphore = find(cmdList.begin(), cmdList.end()); while (semaphore != cmdList.end()) { auto semaphoreCmd = genCmdCast(*semaphore); if (static_cast(DebugPauseState::hasUserStartConfirmation) == semaphoreCmd->getSemaphoreDataDword() && debugPauseStateAddress == semaphoreCmd->getSemaphoreGraphicsAddress()) { EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD, semaphoreCmd->getCompareOperation()); EXPECT_EQ(MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE, semaphoreCmd->getWaitMode()); semaphoreBeforeCopyFound++; } if (static_cast(DebugPauseState::hasUserEndConfirmation) == semaphoreCmd->getSemaphoreDataDword() && debugPauseStateAddress == semaphoreCmd->getSemaphoreGraphicsAddress()) { EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD, semaphoreCmd->getCompareOperation()); EXPECT_EQ(MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE, semaphoreCmd->getWaitMode()); semaphoreAfterCopyFound++; } semaphore = find(++semaphore, cmdList.end()); } } template void findMiFlushes(GenCmdList &cmdList) { auto miFlush = find(cmdList.begin(), cmdList.end()); while (miFlush != cmdList.end()) { auto miFlushCmd = genCmdCast(*miFlush); if (static_cast(DebugPauseState::waitingForUserStartConfirmation) == miFlushCmd->getImmediateData() && debugPauseStateAddress == miFlushCmd->getDestinationAddress()) { EXPECT_EQ(MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA_QWORD, miFlushCmd->getPostSyncOperation()); miFlushBeforeCopyFound++; } if (static_cast(DebugPauseState::waitingForUserEndConfirmation) == miFlushCmd->getImmediateData() && debugPauseStateAddress == miFlushCmd->getDestinationAddress()) { EXPECT_EQ(MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA_QWORD, miFlushCmd->getPostSyncOperation()); miFlushAfterCopyFound++; } miFlush = find(++miFlush, cmdList.end()); } } uint32_t semaphoreBeforeCopyFound = 0; uint32_t semaphoreAfterCopyFound = 0; uint32_t miFlushBeforeCopyFound = 0; uint32_t miFlushAfterCopyFound = 0; ReleaseableObjectPtr buffer; uint64_t debugPauseStateAddress = 0; int hostPtr = 0; }; HWTEST_TEMPLATED_F(BlitEnqueueWithDebugCapabilityTests, givenDebugFlagSetWhenDispatchingBlitEnqueueThenAddPausingCommands) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; auto ultBcsCsr = static_cast *>(bcsCsr); debugPauseStateAddress = ultBcsCsr->getDebugPauseStateGPUAddress(); buffer = createBuffer(1, false); buffer->forceDisallowCPUCopy = true; DebugManager.flags.PauseOnBlitCopy.set(1); commandQueue->enqueueWriteBuffer(buffer.get(), true, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueWriteBuffer(buffer.get(), true, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); HardwareParse hwParser; hwParser.parseCommands(ultBcsCsr->commandStream); findSemaphores(hwParser.cmdList); EXPECT_EQ(1u, semaphoreBeforeCopyFound); EXPECT_EQ(1u, semaphoreAfterCopyFound); findMiFlushes(hwParser.cmdList); EXPECT_EQ(1u, miFlushBeforeCopyFound); EXPECT_EQ(1u, miFlushAfterCopyFound); } HWTEST_TEMPLATED_F(BlitEnqueueWithDebugCapabilityTests, givenDebugFlagSetToMinusTwoWhenDispatchingBlitEnqueueThenAddPausingCommandsForEachEnqueue) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; auto ultBcsCsr = static_cast *>(bcsCsr); debugPauseStateAddress = ultBcsCsr->getDebugPauseStateGPUAddress(); buffer = createBuffer(1, false); buffer->forceDisallowCPUCopy = true; DebugManager.flags.PauseOnBlitCopy.set(-2); commandQueue->enqueueWriteBuffer(buffer.get(), true, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueWriteBuffer(buffer.get(), true, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); HardwareParse hwParser; hwParser.parseCommands(ultBcsCsr->commandStream); findSemaphores(hwParser.cmdList); EXPECT_EQ(2u, semaphoreBeforeCopyFound); EXPECT_EQ(2u, semaphoreAfterCopyFound); findMiFlushes(hwParser.cmdList); EXPECT_EQ(2u, miFlushBeforeCopyFound); EXPECT_EQ(2u, miFlushAfterCopyFound); } HWTEST_TEMPLATED_F(BlitEnqueueWithDebugCapabilityTests, givenPauseModeSetToBeforeOnlyWhenDispatchingBlitEnqueueThenAddPauseCommandsOnlyBeforeEnqueue) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; auto ultBcsCsr = static_cast *>(bcsCsr); debugPauseStateAddress = ultBcsCsr->getDebugPauseStateGPUAddress(); buffer = createBuffer(1, false); buffer->forceDisallowCPUCopy = true; DebugManager.flags.PauseOnBlitCopy.set(0); DebugManager.flags.PauseOnGpuMode.set(PauseOnGpuProperties::PauseMode::BeforeWorkload); commandQueue->enqueueWriteBuffer(buffer.get(), true, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); HardwareParse hwParser; hwParser.parseCommands(ultBcsCsr->commandStream); findSemaphores(hwParser.cmdList); EXPECT_EQ(1u, semaphoreBeforeCopyFound); EXPECT_EQ(0u, semaphoreAfterCopyFound); findMiFlushes(hwParser.cmdList); EXPECT_EQ(1u, miFlushBeforeCopyFound); EXPECT_EQ(0u, miFlushAfterCopyFound); } HWTEST_TEMPLATED_F(BlitEnqueueWithDebugCapabilityTests, givenPauseModeSetToAfterOnlyWhenDispatchingBlitEnqueueThenAddPauseCommandsOnlyAfterEnqueue) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; auto ultBcsCsr = static_cast *>(bcsCsr); debugPauseStateAddress = ultBcsCsr->getDebugPauseStateGPUAddress(); buffer = createBuffer(1, false); buffer->forceDisallowCPUCopy = true; DebugManager.flags.PauseOnBlitCopy.set(0); DebugManager.flags.PauseOnGpuMode.set(PauseOnGpuProperties::PauseMode::AfterWorkload); commandQueue->enqueueWriteBuffer(buffer.get(), true, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); HardwareParse hwParser; hwParser.parseCommands(ultBcsCsr->commandStream); findSemaphores(hwParser.cmdList); EXPECT_EQ(0u, semaphoreBeforeCopyFound); EXPECT_EQ(1u, semaphoreAfterCopyFound); findMiFlushes(hwParser.cmdList); EXPECT_EQ(0u, miFlushBeforeCopyFound); EXPECT_EQ(1u, miFlushAfterCopyFound); } HWTEST_TEMPLATED_F(BlitEnqueueWithDebugCapabilityTests, givenPauseModeSetToBeforeAndAfterWorkloadWhenDispatchingBlitEnqueueThenAddPauseCommandsAroundEnqueue) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; auto ultBcsCsr = static_cast *>(bcsCsr); debugPauseStateAddress = ultBcsCsr->getDebugPauseStateGPUAddress(); buffer = createBuffer(1, false); buffer->forceDisallowCPUCopy = true; DebugManager.flags.PauseOnBlitCopy.set(0); DebugManager.flags.PauseOnGpuMode.set(PauseOnGpuProperties::PauseMode::BeforeAndAfterWorkload); commandQueue->enqueueWriteBuffer(buffer.get(), true, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); HardwareParse hwParser; hwParser.parseCommands(ultBcsCsr->commandStream); findSemaphores(hwParser.cmdList); EXPECT_EQ(1u, semaphoreBeforeCopyFound); EXPECT_EQ(1u, semaphoreAfterCopyFound); findMiFlushes(hwParser.cmdList); EXPECT_EQ(1u, miFlushBeforeCopyFound); EXPECT_EQ(1u, miFlushAfterCopyFound); } HWTEST_TEMPLATED_F(BlitEnqueueWithDebugCapabilityTests, givenDebugFlagSetWhenCreatingCsrThenCreateDebugThread) { DebugManager.flags.PauseOnBlitCopy.set(1); auto localDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto ultCsr = static_cast *>(localDevice->getDefaultEngine().commandStreamReceiver); EXPECT_NE(nullptr, ultCsr->userPauseConfirmation.get()); } struct BlitEnqueueFlushTests : public BlitEnqueueTests<1> { template class MyUltCsr : public UltCommandStreamReceiver { public: using UltCommandStreamReceiver::UltCommandStreamReceiver; SubmissionStatus flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override { latestFlushedCounter = ++(*flushCounter); return UltCommandStreamReceiver::flush(batchBuffer, allocationsForResidency); } static CommandStreamReceiver *create(bool withAubDump, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield) { return new MyUltCsr(executionEnvironment, rootDeviceIndex, deviceBitfield); } uint32_t *flushCounter = nullptr; uint32_t latestFlushedCounter = 0; }; template void SetUpT() { auto csrCreateFcn = &commandStreamReceiverFactory[IGFX_MAX_CORE + defaultHwInfo->platform.eRenderCoreFamily]; variableBackup = std::make_unique>(csrCreateFcn); *csrCreateFcn = MyUltCsr::create; BlitEnqueueTests<1>::SetUpT(); } std::unique_ptr> variableBackup; }; HWTEST_TEMPLATED_F(BlitEnqueueFlushTests, givenNonBlockedQueueWhenBlitEnqueuedThenFlushGpgpuCsrFirst) { auto buffer = createBuffer(1, false); buffer->forceDisallowCPUCopy = true; int hostPtr = 0; uint32_t flushCounter = 0; auto myUltGpgpuCsr = static_cast *>(gpgpuCsr); myUltGpgpuCsr->flushCounter = &flushCounter; auto myUltBcsCsr = static_cast *>(bcsCsr); myUltBcsCsr->flushCounter = &flushCounter; commandQueue->enqueueWriteBuffer(buffer.get(), true, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(1u, myUltGpgpuCsr->latestFlushedCounter); EXPECT_EQ(2u, myUltBcsCsr->latestFlushedCounter); } HWTEST_TEMPLATED_F(BlitEnqueueFlushTests, givenGpuHangAndBlockingCallAndNonBlockedQueueWhenBlitEnqueuedThenOutOfResourcesIsReturned) { DebugManager.flags.MakeEachEnqueueBlocking.set(true); auto buffer = createBuffer(1, false); buffer->forceDisallowCPUCopy = true; int hostPtr = 0; uint32_t flushCounter = 0; auto myUltGpgpuCsr = static_cast *>(gpgpuCsr); myUltGpgpuCsr->flushCounter = &flushCounter; auto myUltBcsCsr = static_cast *>(bcsCsr); myUltBcsCsr->flushCounter = &flushCounter; auto mockCommandQueue = static_cast *>(commandQueue.get()); mockCommandQueue->waitForAllEnginesReturnValue = WaitStatus::GpuHang; const auto enqueueResult = mockCommandQueue->enqueueWriteBuffer(buffer.get(), CL_FALSE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueResult); EXPECT_EQ(1, mockCommandQueue->waitForAllEnginesCalledCount); } HWTEST_TEMPLATED_F(BlitEnqueueFlushTests, givenBlockedQueueWhenBlitEnqueuedThenFlushGpgpuCsrFirst) { auto buffer = createBuffer(1, false); buffer->forceDisallowCPUCopy = true; int hostPtr = 0; uint32_t flushCounter = 0; auto myUltGpgpuCsr = static_cast *>(gpgpuCsr); myUltGpgpuCsr->flushCounter = &flushCounter; auto myUltBcsCsr = static_cast *>(bcsCsr); myUltBcsCsr->flushCounter = &flushCounter; UserEvent userEvent; cl_event waitlist[] = {&userEvent}; commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 1, waitlist, nullptr); userEvent.setStatus(CL_COMPLETE); EXPECT_EQ(1u, myUltGpgpuCsr->latestFlushedCounter); EXPECT_EQ(2u, myUltBcsCsr->latestFlushedCounter); EXPECT_FALSE(commandQueue->isQueueBlocked()); } HWTEST_TEMPLATED_F(BlitEnqueueFlushTests, givenDebugFlagSetWhenCheckingBcsCacheFlushRequirementThenReturnCorrectValue) { auto mockCommandQueue = static_cast *>(commandQueue.get()); DebugManager.flags.ForceCacheFlushForBcs.set(0); EXPECT_FALSE(mockCommandQueue->isCacheFlushForBcsRequired()); DebugManager.flags.ForceCacheFlushForBcs.set(1); EXPECT_TRUE(mockCommandQueue->isCacheFlushForBcsRequired()); } using BlitEnqueueTaskCountTests = BlitEnqueueTests<1>; HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, whenWaitUntilCompletionCalledThenWaitForSpecificBcsTaskCount) { uint32_t gpgpuTaskCount = 123; uint32_t bcsTaskCount = 123; CopyEngineState bcsState{bcsCsr->getOsContext().getEngineType(), bcsTaskCount}; commandQueue->waitUntilComplete(gpgpuTaskCount, Range{&bcsState}, 0, false); EXPECT_EQ(gpgpuTaskCount, static_cast *>(gpgpuCsr)->latestWaitForCompletionWithTimeoutTaskCount.load()); EXPECT_EQ(bcsTaskCount, static_cast *>(bcsCsr)->latestWaitForCompletionWithTimeoutTaskCount.load()); } HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, givenEventWithNotreadyBcsTaskCountThenDontReportCompletion) { const uint32_t gpgpuTaskCount = 123; const uint32_t bcsTaskCount = 123; *gpgpuCsr->getTagAddress() = gpgpuTaskCount; *bcsCsr->getTagAddress() = bcsTaskCount - 1; commandQueue->updateBcsTaskCount(bcsCsr->getOsContext().getEngineType(), bcsTaskCount); Event event(commandQueue.get(), CL_COMMAND_WRITE_BUFFER, 1, gpgpuTaskCount); event.setupBcs(bcsCsr->getOsContext().getEngineType()); event.updateCompletionStamp(gpgpuTaskCount, bcsTaskCount, 1, 0); event.updateExecutionStatus(); EXPECT_EQ(static_cast(CL_SUBMITTED), event.peekExecutionStatus()); *bcsCsr->getTagAddress() = bcsTaskCount; event.updateExecutionStatus(); EXPECT_EQ(static_cast(CL_COMPLETE), event.peekExecutionStatus()); } HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, givenEventWhenWaitingForCompletionThenWaitForCurrentBcsTaskCount) { auto buffer = createBuffer(1, false); buffer->forceDisallowCPUCopy = true; int hostPtr = 0; auto ultGpgpuCsr = static_cast *>(gpgpuCsr); auto ultBcsCsr = static_cast *>(bcsCsr); cl_event outEvent1, outEvent2; commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, &outEvent1); commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, &outEvent2); clWaitForEvents(1, &outEvent2); EXPECT_EQ(2u, ultGpgpuCsr->latestWaitForCompletionWithTimeoutTaskCount.load()); EXPECT_EQ(2u, ultBcsCsr->latestWaitForCompletionWithTimeoutTaskCount.load()); clWaitForEvents(1, &outEvent1); EXPECT_EQ(1u, ultGpgpuCsr->latestWaitForCompletionWithTimeoutTaskCount.load()); EXPECT_EQ(1u, ultBcsCsr->latestWaitForCompletionWithTimeoutTaskCount.load()); clReleaseEvent(outEvent1); clReleaseEvent(outEvent2); } HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, givenBufferDumpingEnabledWhenEnqueueingThenSetCorrectDumpOption) { auto buffer = createBuffer(1, false); buffer->forceDisallowCPUCopy = true; int hostPtr = 0; DebugManager.flags.AUBDumpAllocsOnEnqueueReadOnly.set(true); DebugManager.flags.AUBDumpBufferFormat.set("BIN"); auto mockCommandQueue = static_cast *>(commandQueue.get()); { // BCS enqueue commandQueue->enqueueReadBuffer(buffer.get(), true, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_TRUE(mockCommandQueue->notifyEnqueueReadBufferCalled); EXPECT_TRUE(mockCommandQueue->useBcsCsrOnNotifyEnabled); mockCommandQueue->notifyEnqueueReadBufferCalled = false; } { // Non-BCS enqueue DebugManager.flags.EnableBlitterForEnqueueOperations.set(0); commandQueue->enqueueReadBuffer(buffer.get(), true, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_TRUE(mockCommandQueue->notifyEnqueueReadBufferCalled); EXPECT_FALSE(mockCommandQueue->useBcsCsrOnNotifyEnabled); } } HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, givenBlockedEventWhenWaitingForCompletionThenWaitForCurrentBcsTaskCount) { auto buffer = createBuffer(1, false); buffer->forceDisallowCPUCopy = true; int hostPtr = 0; auto ultGpgpuCsr = static_cast *>(gpgpuCsr); auto ultBcsCsr = static_cast *>(bcsCsr); cl_event outEvent1, outEvent2; UserEvent userEvent; cl_event waitlist1 = &userEvent; cl_event *waitlist2 = &outEvent1; commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 1, &waitlist1, &outEvent1); commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 1, waitlist2, &outEvent2); userEvent.setStatus(CL_COMPLETE); clWaitForEvents(1, &outEvent2); EXPECT_EQ(2u, ultGpgpuCsr->latestWaitForCompletionWithTimeoutTaskCount.load()); EXPECT_EQ(2u, ultBcsCsr->latestWaitForCompletionWithTimeoutTaskCount.load()); clWaitForEvents(1, &outEvent1); EXPECT_EQ(1u, ultGpgpuCsr->latestWaitForCompletionWithTimeoutTaskCount.load()); EXPECT_EQ(1u, ultBcsCsr->latestWaitForCompletionWithTimeoutTaskCount.load()); clReleaseEvent(outEvent1); clReleaseEvent(outEvent2); EXPECT_FALSE(commandQueue->isQueueBlocked()); } HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, givenBlockedEnqueueWithoutKernelWhenWaitingForCompletionThenWaitForCurrentBcsTaskCount) { auto ultGpgpuCsr = static_cast *>(gpgpuCsr); auto ultBcsCsr = static_cast *>(bcsCsr); cl_event outEvent1, outEvent2; UserEvent userEvent; cl_event waitlist1 = &userEvent; cl_event *waitlist2 = &outEvent1; commandQueue->enqueueMarkerWithWaitList(1, &waitlist1, &outEvent1); commandQueue->enqueueMarkerWithWaitList(1, waitlist2, &outEvent2); userEvent.setStatus(CL_COMPLETE); clWaitForEvents(1, &outEvent2); EXPECT_EQ(1u, ultGpgpuCsr->latestWaitForCompletionWithTimeoutTaskCount.load()); EXPECT_EQ(0u, ultBcsCsr->latestWaitForCompletionWithTimeoutTaskCount.load()); clWaitForEvents(1, &outEvent1); EXPECT_EQ(0u, ultGpgpuCsr->latestWaitForCompletionWithTimeoutTaskCount.load()); EXPECT_EQ(0u, ultBcsCsr->latestWaitForCompletionWithTimeoutTaskCount.load()); clReleaseEvent(outEvent1); clReleaseEvent(outEvent2); EXPECT_FALSE(commandQueue->isQueueBlocked()); } HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, givenEventFromCpuCopyWhenWaitingForCompletionThenWaitForCurrentBcsTaskCount) { auto buffer = createBuffer(1, false); int hostPtr = 0; auto ultGpgpuCsr = static_cast *>(gpgpuCsr); auto ultBcsCsr = static_cast *>(bcsCsr); ultGpgpuCsr->taskCount = 1; commandQueue->taskCount = 1; ultBcsCsr->taskCount = 2; commandQueue->updateBcsTaskCount(ultBcsCsr->getOsContext().getEngineType(), 2); cl_event outEvent1, outEvent2; commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, &outEvent1); commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, &outEvent2); clWaitForEvents(1, &outEvent2); EXPECT_EQ(3u, static_cast *>(gpgpuCsr)->latestWaitForCompletionWithTimeoutTaskCount.load()); EXPECT_EQ(4u, static_cast *>(bcsCsr)->latestWaitForCompletionWithTimeoutTaskCount.load()); clWaitForEvents(1, &outEvent1); EXPECT_EQ(2u, static_cast *>(gpgpuCsr)->latestWaitForCompletionWithTimeoutTaskCount.load()); EXPECT_EQ(3u, static_cast *>(bcsCsr)->latestWaitForCompletionWithTimeoutTaskCount.load()); clReleaseEvent(outEvent1); clReleaseEvent(outEvent2); } HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, givenMarkerThatFollowsCopyOperationWhenItIsWaitedItHasProperDependencies) { auto buffer = createBuffer(1, false); int hostPtr = 0; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto mockCmdQueue = static_cast *>(commandQueue.get()); mockCmdQueue->commandQueueProperties |= CL_QUEUE_PROFILING_ENABLE; cl_event outEvent1; commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); auto offset = mockCmdQueue->getCS(0).getUsed(); //marker needs to program semaphore commandQueue->enqueueMarkerWithWaitList(0, nullptr, &outEvent1); auto cmdListQueue = getCmdList(mockCmdQueue->getCS(0), offset); expectCommand(cmdListQueue.begin(), cmdListQueue.end()); clReleaseEvent(outEvent1); } HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, givenMarkerThatFollowsCopyOperationWhenItIsWaitedItHasProperDependenciesOnWait) { auto buffer = createBuffer(1, false); int hostPtr = 0; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; cl_event outEvent1; commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueMarkerWithWaitList(0, nullptr, &outEvent1); auto ultGpgpuCsr = static_cast *>(gpgpuCsr); auto ultBcsCsr = static_cast *>(bcsCsr); //make sure we wait for both clWaitForEvents(1, &outEvent1); EXPECT_EQ(ultBcsCsr->latestWaitForCompletionWithTimeoutTaskCount, ultBcsCsr->taskCount); EXPECT_EQ(ultGpgpuCsr->latestWaitForCompletionWithTimeoutTaskCount, ultGpgpuCsr->taskCount); clWaitForEvents(1, &outEvent1); clReleaseEvent(outEvent1); } HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, givenMarkerThatFollowsCopyOperationWhenItIsWaitedItHasProperDependenciesOnWaitEvenWhenMultipleMarkersAreSequenced) { auto buffer = createBuffer(1, false); int hostPtr = 0; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; cl_event outEvent1, outEvent2; commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueMarkerWithWaitList(0, nullptr, &outEvent1); commandQueue->enqueueMarkerWithWaitList(0, nullptr, nullptr); commandQueue->enqueueMarkerWithWaitList(0, nullptr, &outEvent2); auto ultGpgpuCsr = static_cast *>(gpgpuCsr); auto ultBcsCsr = static_cast *>(bcsCsr); //make sure we wait for both clWaitForEvents(1, &outEvent2); EXPECT_EQ(ultBcsCsr->latestWaitForCompletionWithTimeoutTaskCount, ultBcsCsr->taskCount); EXPECT_EQ(ultGpgpuCsr->latestWaitForCompletionWithTimeoutTaskCount, ultGpgpuCsr->taskCount); clWaitForEvents(1, &outEvent2); clReleaseEvent(outEvent1); clReleaseEvent(outEvent2); } using BlitEnqueueWithDisabledGpgpuSubmissionTests = BlitEnqueueTests<1>; HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushRequiredWhenDoingBcsCopyThenSubmitToGpgpuOnlyIfPreviousEnqueueWasGpgpu) { auto mockCommandQueue = static_cast *>(commandQueue.get()); EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType); DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1); mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true; mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = true; auto buffer = createBuffer(1, false); buffer->forceDisallowCPUCopy = true; int hostPtr = 0; commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType); EXPECT_EQ(0u, gpgpuCsr->peekTaskCount()); commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType); EXPECT_EQ(0u, gpgpuCsr->peekTaskCount()); commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(EnqueueProperties::Operation::GpuKernel, mockCommandQueue->latestSentEnqueueType); EXPECT_EQ(1u, gpgpuCsr->peekTaskCount()); commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType); EXPECT_EQ(2u, gpgpuCsr->peekTaskCount()); commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType); EXPECT_EQ(2u, gpgpuCsr->peekTaskCount()); } HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenProfilingEnabledWhenSubmittingWithoutFlushToGpgpuThenSetSubmitTime) { auto mockCommandQueue = static_cast *>(commandQueue.get()); EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType); DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1); mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true; mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = true; mockCommandQueue->setProfilingEnabled(); auto buffer = createBuffer(1, false); buffer->forceDisallowCPUCopy = true; int hostPtr = 0; cl_event clEvent; commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, &clEvent); EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType); EXPECT_EQ(0u, gpgpuCsr->peekTaskCount()); auto event = castToObject(clEvent); uint64_t submitTime = 0; event->getEventProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, sizeof(submitTime), &submitTime, nullptr); EXPECT_NE(0u, submitTime); clReleaseEvent(clEvent); } HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenOutEventWhenEnqueuingBcsSubmissionThenSetupBcsCsrInEvent) { auto mockCommandQueue = static_cast *>(commandQueue.get()); EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType); auto buffer = createBuffer(1, false); buffer->forceDisallowCPUCopy = true; int hostPtr = 0; { DebugManager.flags.EnableBlitterForEnqueueOperations.set(0); cl_event clEvent; commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, &clEvent); EXPECT_EQ(EnqueueProperties::Operation::GpuKernel, mockCommandQueue->latestSentEnqueueType); EXPECT_EQ(0u, bcsCsr->peekTaskCount()); EXPECT_EQ(1u, gpgpuCsr->peekTaskCount()); auto event = castToObject(clEvent); EXPECT_EQ(0u, event->peekBcsTaskCountFromCommandQueue()); clReleaseEvent(clEvent); } { DebugManager.flags.EnableBlitterForEnqueueOperations.set(1); cl_event clEvent; commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, &clEvent); EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType); EXPECT_EQ(1u, bcsCsr->peekTaskCount()); EXPECT_EQ(2u, gpgpuCsr->peekTaskCount()); auto event = castToObject(clEvent); EXPECT_EQ(1u, event->peekBcsTaskCountFromCommandQueue()); clReleaseEvent(clEvent); } } HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushNotRequiredWhenDoingBcsCopyThenDontSubmitToGpgpu) { auto mockCommandQueue = static_cast *>(commandQueue.get()); EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType); DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1); mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true; mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = false; auto buffer = createBuffer(1, false); buffer->forceDisallowCPUCopy = true; int hostPtr = 0; commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType); EXPECT_EQ(0u, gpgpuCsr->peekTaskCount()); commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType); EXPECT_EQ(0u, gpgpuCsr->peekTaskCount()); commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(EnqueueProperties::Operation::GpuKernel, mockCommandQueue->latestSentEnqueueType); EXPECT_EQ(1u, gpgpuCsr->peekTaskCount()); commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType); EXPECT_EQ(1u, gpgpuCsr->peekTaskCount()); commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType); EXPECT_EQ(1u, gpgpuCsr->peekTaskCount()); } HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushNotRequiredAndEnqueueNotFlushedWhenDoingBcsCopyThenSubmitOnlyOnceAfterEnqueue) { auto mockCommandQueue = static_cast *>(commandQueue.get()); EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType); DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1); DebugManager.flags.PerformImplicitFlushForNewResource.set(0); DebugManager.flags.PerformImplicitFlushForIdleGpu.set(0); mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true; mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = false; mockCommandQueue->getGpgpuCommandStreamReceiver().overrideDispatchPolicy(DispatchMode::BatchedDispatch); mockCommandQueue->getGpgpuCommandStreamReceiver().postInitFlagsSetup(); auto buffer = createBuffer(1, false); buffer->forceDisallowCPUCopy = true; int hostPtr = 0; commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType); EXPECT_EQ(0u, gpgpuCsr->peekTaskCount()); commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType); EXPECT_EQ(0u, gpgpuCsr->peekTaskCount()); commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(EnqueueProperties::Operation::GpuKernel, mockCommandQueue->latestSentEnqueueType); EXPECT_EQ(1u, gpgpuCsr->peekTaskCount()); commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType); EXPECT_EQ(2u, gpgpuCsr->peekTaskCount()); commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType); EXPECT_EQ(2u, gpgpuCsr->peekTaskCount()); } HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushNotRequiredWhenDoingBcsCopyAfterBarrierThenSubmitToGpgpu) { auto mockCommandQueue = static_cast *>(commandQueue.get()); EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType); DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1); mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true; mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = false; auto buffer = createBuffer(1, false); buffer->forceDisallowCPUCopy = true; int hostPtr = 0; EXPECT_EQ(0u, gpgpuCsr->peekTaskCount()); commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(1u, gpgpuCsr->peekTaskCount()); commandQueue->enqueueBarrierWithWaitList(0, nullptr, nullptr); EXPECT_EQ(1u, gpgpuCsr->peekTaskCount()); commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType); EXPECT_EQ(2u, gpgpuCsr->peekTaskCount()); } HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushNotRequiredWhenDoingBcsCopyOnBlockedQueueThenSubmitToGpgpu) { auto mockCommandQueue = static_cast *>(commandQueue.get()); EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType); DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1); mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true; mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = false; auto buffer = createBuffer(1, false); buffer->forceDisallowCPUCopy = true; int hostPtr = 0; UserEvent userEvent; cl_event waitlist = &userEvent; commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 1, &waitlist, nullptr); EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType); userEvent.setStatus(CL_COMPLETE); EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType); EXPECT_EQ(1u, gpgpuCsr->peekTaskCount()); EXPECT_FALSE(commandQueue->isQueueBlocked()); } HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushRequiredWhenDoingBcsCopyOnBlockedQueueThenSubmitToGpgpu) { auto mockCommandQueue = static_cast *>(commandQueue.get()); EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType); DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1); mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true; mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = true; auto buffer = createBuffer(1, false); buffer->forceDisallowCPUCopy = true; int hostPtr = 0; UserEvent userEvent; cl_event waitlist = &userEvent; commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 1, &waitlist, nullptr); EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType); userEvent.setStatus(CL_COMPLETE); EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType); EXPECT_EQ(1u, gpgpuCsr->peekTaskCount()); EXPECT_FALSE(commandQueue->isQueueBlocked()); } HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushRequiredWhenDoingBcsCopyThatRequiresCacheFlushThenSubmitToGpgpu) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1); auto mockCommandQueue = static_cast *>(commandQueue.get()); mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true; mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = true; auto buffer = createBuffer(1, false); buffer->forceDisallowCPUCopy = true; int hostPtr = 0; // enqueue kernel to force gpgpu submission on write buffer commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(1u, gpgpuCsr->peekTaskCount()); auto offset = mockCommandQueue->getCS(0).getUsed(); commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(2u, gpgpuCsr->peekTaskCount()); auto cmdListBcs = getCmdList(bcsCsr->getCS(0), 0); auto cmdListQueue = getCmdList(mockCommandQueue->getCS(0), offset); uint64_t cacheFlushWriteAddress = 0; { auto cmdFound = expectPipeControl(cmdListQueue.begin(), cmdListQueue.end()); auto pipeControlCmd = genCmdCast(*cmdFound); EXPECT_TRUE(pipeControlCmd->getDcFlushEnable()); EXPECT_TRUE(pipeControlCmd->getCommandStreamerStallEnable()); cacheFlushWriteAddress = NEO::UnitTestHelper::getPipeControlPostSyncAddress(*pipeControlCmd); EXPECT_NE(0u, cacheFlushWriteAddress); } { auto cmdFound = expectCommand(cmdListBcs.begin(), cmdListBcs.end()); verifySemaphore(cmdFound, cacheFlushWriteAddress); cmdFound = expectCommand(cmdListBcs.begin(), cmdListBcs.end()); EXPECT_NE(cmdListBcs.end(), cmdFound); } } HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenSubmissionToDifferentEngineWhenRequestingForNewTimestmapPacketThenClearDependencies) { auto mockCommandQueue = static_cast *>(commandQueue.get()); const bool clearDependencies = true; { TimestampPacketContainer previousNodes; mockCommandQueue->obtainNewTimestampPacketNodes(1, previousNodes, clearDependencies, *gpgpuCsr); // init EXPECT_EQ(0u, previousNodes.peekNodes().size()); } { TimestampPacketContainer previousNodes; mockCommandQueue->obtainNewTimestampPacketNodes(1, previousNodes, clearDependencies, *bcsCsr); EXPECT_EQ(0u, previousNodes.peekNodes().size()); } } using BlitCopyTests = BlitEnqueueTests<1>; HWTEST_TEMPLATED_F(BlitCopyTests, givenKernelAllocationInLocalMemoryWhenCreatingWithoutAllowedCpuAccessThenUseBcsForTransfer) { DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast(LocalMemoryAccessMode::CpuAccessDisallowed)); DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast(AllocationType::KERNEL_ISA) - 1)); uint32_t kernelHeap = 0; KernelInfo kernelInfo; kernelInfo.heapInfo.KernelHeapSize = 1; kernelInfo.heapInfo.pKernelHeap = &kernelHeap; auto initialTaskCount = bcsMockContext->bcsCsr->peekTaskCount(); kernelInfo.createKernelAllocation(device->getDevice(), false); if (kernelInfo.kernelAllocation->isAllocatedInLocalMemoryPool()) { EXPECT_EQ(initialTaskCount + 1, bcsMockContext->bcsCsr->peekTaskCount()); } else { EXPECT_EQ(initialTaskCount, bcsMockContext->bcsCsr->peekTaskCount()); } device->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation); } HWTEST_TEMPLATED_F(BlitCopyTests, givenKernelAllocationInLocalMemoryWhenCreatingWithAllowedCpuAccessThenDontUseBcsForTransfer) { DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast(LocalMemoryAccessMode::CpuAccessAllowed)); DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast(AllocationType::KERNEL_ISA) - 1)); uint32_t kernelHeap = 0; KernelInfo kernelInfo; kernelInfo.heapInfo.KernelHeapSize = 1; kernelInfo.heapInfo.pKernelHeap = &kernelHeap; auto initialTaskCount = bcsMockContext->bcsCsr->peekTaskCount(); kernelInfo.createKernelAllocation(device->getDevice(), false); EXPECT_EQ(initialTaskCount, bcsMockContext->bcsCsr->peekTaskCount()); device->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation); } HWTEST_TEMPLATED_F(BlitCopyTests, givenKernelAllocationInLocalMemoryWhenCreatingWithDisallowedCpuAccessAndDisabledBlitterThenFallbackToCpuCopy) { DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast(LocalMemoryAccessMode::CpuAccessDisallowed)); DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast(AllocationType::KERNEL_ISA) - 1)); device->getExecutionEnvironment()->rootDeviceEnvironments[0]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = false; uint32_t kernelHeap = 0; KernelInfo kernelInfo; kernelInfo.heapInfo.KernelHeapSize = 1; kernelInfo.heapInfo.pKernelHeap = &kernelHeap; auto initialTaskCount = bcsMockContext->bcsCsr->peekTaskCount(); kernelInfo.createKernelAllocation(device->getDevice(), false); EXPECT_EQ(initialTaskCount, bcsMockContext->bcsCsr->peekTaskCount()); device->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation); } HWTEST_TEMPLATED_F(BlitCopyTests, givenLocalMemoryAccessNotAllowedWhenGlobalConstantsAreExportedThenUseBlitter) { DebugManager.flags.EnableLocalMemory.set(1); DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast(LocalMemoryAccessMode::CpuAccessDisallowed)); char constantData[128] = {}; ProgramInfo programInfo; programInfo.globalConstants.initData = constantData; programInfo.globalConstants.size = sizeof(constantData); auto mockLinkerInput = std::make_unique>(); mockLinkerInput->traits.exportsGlobalConstants = true; programInfo.linkerInput = std::move(mockLinkerInput); MockProgram program(bcsMockContext.get(), false, toClDeviceVector(*device)); EXPECT_EQ(0u, bcsMockContext->bcsCsr->peekTaskCount()); program.processProgramInfo(programInfo, *device); EXPECT_EQ(1u, bcsMockContext->bcsCsr->peekTaskCount()); auto rootDeviceIndex = device->getRootDeviceIndex(); ASSERT_NE(nullptr, program.getConstantSurface(rootDeviceIndex)); auto gpuAddress = reinterpret_cast(program.getConstantSurface(rootDeviceIndex)->getGpuAddress()); EXPECT_NE(nullptr, bcsMockContext->getSVMAllocsManager()->getSVMAlloc(gpuAddress)); } HWTEST_TEMPLATED_F(BlitCopyTests, givenKernelAllocationInLocalMemoryWithoutCpuAccessAllowedWhenSubstituteKernelHeapIsCalledThenUseBcsForTransfer) { DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast(LocalMemoryAccessMode::CpuAccessDisallowed)); DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast(AllocationType::KERNEL_ISA) - 1)); device->getExecutionEnvironment()->rootDeviceEnvironments[0]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = true; MockKernelWithInternals kernel(*device); const size_t initialHeapSize = 0x40; kernel.kernelInfo.heapInfo.KernelHeapSize = initialHeapSize; kernel.kernelInfo.createKernelAllocation(device->getDevice(), false); ASSERT_NE(nullptr, kernel.kernelInfo.kernelAllocation); EXPECT_TRUE(kernel.kernelInfo.kernelAllocation->isAllocatedInLocalMemoryPool()); const size_t newHeapSize = initialHeapSize; char newHeap[newHeapSize]; auto initialTaskCount = bcsMockContext->bcsCsr->peekTaskCount(); kernel.mockKernel->substituteKernelHeap(newHeap, newHeapSize); EXPECT_EQ(initialTaskCount + 1, bcsMockContext->bcsCsr->peekTaskCount()); device->getMemoryManager()->freeGraphicsMemory(kernel.kernelInfo.kernelAllocation); } HWTEST_TEMPLATED_F(BlitCopyTests, givenKernelAllocationInLocalMemoryWithoutCpuAccessAllowedWhenLinkerRequiresPatchingOfInstructionSegmentsThenUseBcsForTransfer) { DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast(LocalMemoryAccessMode::CpuAccessDisallowed)); DebugManager.flags.ForceNonSystemMemoryPlacement.set(1 << (static_cast(AllocationType::KERNEL_ISA) - 1)); device->getExecutionEnvironment()->rootDeviceEnvironments[0]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = true; auto linkerInput = std::make_unique>(); linkerInput->traits.requiresPatchingOfInstructionSegments = true; KernelInfo kernelInfo = {}; std::vector kernelHeap; kernelHeap.resize(32, 7); kernelInfo.heapInfo.pKernelHeap = kernelHeap.data(); kernelInfo.heapInfo.KernelHeapSize = static_cast(kernelHeap.size()); kernelInfo.createKernelAllocation(device->getDevice(), false); ASSERT_NE(nullptr, kernelInfo.kernelAllocation); EXPECT_TRUE(kernelInfo.kernelAllocation->isAllocatedInLocalMemoryPool()); std::vector externalFunctions; MockProgram program{nullptr, false, toClDeviceVector(*device)}; program.getKernelInfoArray(device->getRootDeviceIndex()).push_back(&kernelInfo); program.setLinkerInput(device->getRootDeviceIndex(), std::move(linkerInput)); auto initialTaskCount = bcsMockContext->bcsCsr->peekTaskCount(); auto ret = program.linkBinary(&device->getDevice(), nullptr, nullptr, {}, externalFunctions); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_EQ(initialTaskCount + 1, bcsMockContext->bcsCsr->peekTaskCount()); program.getKernelInfoArray(device->getRootDeviceIndex()).clear(); device->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/buffer_operations_fixture.h000066400000000000000000000035721422164147700321130ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/ptr_math.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "gtest/gtest.h" namespace NEO { struct EnqueueWriteBufferTypeTest : public CommandEnqueueFixture, public ::testing::Test { EnqueueWriteBufferTypeTest(void) : srcBuffer(nullptr) { } void SetUp() override { CommandEnqueueFixture::SetUp(); BufferDefaults::context = new MockContext; zeroCopyBuffer.reset(BufferHelper<>::create()); srcBuffer.reset(BufferHelper>::create()); } void TearDown() override { srcBuffer.reset(nullptr); zeroCopyBuffer.reset(nullptr); delete BufferDefaults::context; CommandEnqueueFixture::TearDown(); } protected: template void enqueueWriteBuffer(cl_bool blocking = EnqueueWriteBufferTraits::blocking) { auto retVal = EnqueueWriteBufferHelper<>::enqueueWriteBuffer( pCmdQ, srcBuffer.get(), blocking); EXPECT_EQ(CL_SUCCESS, retVal); parseCommands(*pCmdQ); } template void enqueueWriteBuffer(bool blocking, void *inputData, int size) { auto retVal = EnqueueWriteBufferHelper<>::enqueueWriteBuffer( pCmdQ, srcBuffer.get(), blocking, 0, size, inputData); EXPECT_EQ(CL_SUCCESS, retVal); } std::unique_ptr srcBuffer; std::unique_ptr zeroCopyBuffer; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/command_enqueue_fixture.h000066400000000000000000000113071422164147700315370ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/compiler_hw_info_config.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/command_stream/command_stream_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/helpers/cl_hw_parse.h" #include "opencl/test/unit_test/indirect_heap/indirect_heap_fixture.h" namespace NEO { struct CommandDeviceFixture : public ClDeviceFixture, public CommandQueueHwFixture { using CommandQueueHwFixture::SetUp; void SetUp(cl_command_queue_properties cmdQueueProperties = 0) { ClDeviceFixture::SetUp(); CommandQueueHwFixture::SetUp(pClDevice, cmdQueueProperties); } void TearDown() override { CommandQueueHwFixture::TearDown(); ClDeviceFixture::TearDown(); } }; struct CommandEnqueueBaseFixture : CommandDeviceFixture, public IndirectHeapFixture, public ClHardwareParse { using IndirectHeapFixture::SetUp; void SetUp(cl_command_queue_properties cmdQueueProperties = 0) { CommandDeviceFixture::SetUp(cmdQueueProperties); IndirectHeapFixture::SetUp(pCmdQ); ClHardwareParse::SetUp(); } void TearDown() override { ClHardwareParse::TearDown(); IndirectHeapFixture::TearDown(); CommandDeviceFixture::TearDown(); } }; struct CommandEnqueueFixture : public CommandEnqueueBaseFixture, public CommandStreamFixture { void SetUp(cl_command_queue_properties cmdQueueProperties = 0) { CommandEnqueueBaseFixture::SetUp(cmdQueueProperties); CommandStreamFixture::SetUp(pCmdQ); } void TearDown() override { CommandEnqueueBaseFixture::TearDown(); CommandStreamFixture::TearDown(); } }; struct NegativeFailAllocationCommandEnqueueBaseFixture : public CommandEnqueueBaseFixture { void SetUp() override { CommandEnqueueBaseFixture::SetUp(); failMemManager.reset(new FailMemoryManager(*pDevice->getExecutionEnvironment())); BufferDefaults::context = context; Image2dDefaults::context = context; buffer.reset(BufferHelper<>::create()); image.reset(ImageHelper::create()); ptr = static_cast(array); oldMemManager = pDevice->getExecutionEnvironment()->memoryManager.release(); pDevice->injectMemoryManager(failMemManager.release()); } void TearDown() override { pDevice->injectMemoryManager(oldMemManager); buffer.reset(nullptr); image.reset(nullptr); BufferDefaults::context = nullptr; Image2dDefaults::context = nullptr; CommandEnqueueBaseFixture::TearDown(); } std::unique_ptr buffer; std::unique_ptr image; std::unique_ptr failMemManager; char array[MemoryConstants::cacheLineSize]; void *ptr; MemoryManager *oldMemManager; }; template struct CommandQueueStateless : public CommandQueueHw { CommandQueueStateless(Context *context, ClDevice *device) : CommandQueueHw(context, device, nullptr, false){}; void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &dispatchInfo) override { auto kernel = dispatchInfo.begin()->getKernel(); EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb()); if (kernel->getKernelInfo().getArgDescriptorAt(0).is()) { EXPECT_FALSE(kernel->getKernelInfo().getArgDescriptorAt(0).as().isPureStateful()); } } }; template struct CommandQueueStateful : public CommandQueueHw { CommandQueueStateful(Context *context, ClDevice *device) : CommandQueueHw(context, device, nullptr, false){}; void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &dispatchInfo) override { auto kernel = dispatchInfo.begin()->getKernel(); EXPECT_FALSE(kernel->getKernelInfo().kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb()); if (HwHelperHw::get().isStatelesToStatefullWithOffsetSupported()) { EXPECT_TRUE(kernel->allBufferArgsStateful); } } }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/command_queue_fixture.cpp000066400000000000000000000106471422164147700315550ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "shared/source/device/device.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_gmm.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/context/context.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "gtest/gtest.h" namespace NEO { // Global table of create functions extern CommandQueueCreateFunc commandQueueFactory[IGFX_MAX_CORE]; CommandQueue *CommandQueueHwFixture::createCommandQueue( ClDevice *pDevice, cl_command_queue_properties properties) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, properties, 0}; return createCommandQueue(pDevice, props); } CommandQueue *CommandQueueHwFixture::createCommandQueue( ClDevice *pDevice, const cl_command_queue_properties *properties) { if (pDevice == nullptr) { if (this->device == nullptr) { this->device = new MockClDevice{MockClDevice::createWithNewExecutionEnvironment(nullptr)}; createdDevice = true; } pDevice = this->device; } if (!context) { context = new MockContext(pDevice); } return createCommandQueue(pDevice, properties, context); } CommandQueue *CommandQueueHwFixture::createCommandQueue( ClDevice *pDevice, const cl_command_queue_properties *properties, Context *pContext) { auto funcCreate = commandQueueFactory[pDevice->getRenderCoreFamily()]; assert(nullptr != funcCreate); return funcCreate(pContext, pDevice, properties, false); } void CommandQueueHwFixture::forceMapBufferOnGpu(Buffer &buffer) { ClDevice *clDevice = buffer.getContext()->getDevice(0); buffer.setSharingHandler(new SharingHandler()); auto gfxAllocation = buffer.getGraphicsAllocation(clDevice->getRootDeviceIndex()); for (auto handleId = 0u; handleId < gfxAllocation->getNumGmms(); handleId++) { gfxAllocation->setGmm(new MockGmm(clDevice->getGmmClientContext()), handleId); } } void CommandQueueHwFixture::SetUp() { ASSERT_NE(nullptr, pCmdQ); context = new MockContext(); } void CommandQueueHwFixture::SetUp( ClDevice *pDevice, cl_command_queue_properties properties) { ASSERT_NE(nullptr, pDevice); context = new MockContext(pDevice); pCmdQ = createCommandQueue(pDevice, properties); ASSERT_NE(nullptr, pCmdQ); } void CommandQueueHwFixture::TearDown() { //resolve event dependencies if (pCmdQ) { auto blocked = pCmdQ->isQueueBlocked(); UNRECOVERABLE_IF(blocked); pCmdQ->release(); } if (context) { context->release(); } if (createdDevice) { delete device; } } CommandQueue *CommandQueueFixture::createCommandQueue( Context *context, ClDevice *device, cl_command_queue_properties properties, bool internalUsage) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, properties, 0}; return new MockCommandQueue( context, device, props, internalUsage); } void CommandQueueFixture::SetUp( Context *context, ClDevice *device, cl_command_queue_properties properties) { pCmdQ = createCommandQueue( context, device, properties, false); } void CommandQueueFixture::TearDown() { delete pCmdQ; pCmdQ = nullptr; } void OOQueueFixture ::SetUp(ClDevice *pDevice, cl_command_queue_properties properties) { ASSERT_NE(nullptr, pDevice); BaseClass::pCmdQ = BaseClass::createCommandQueue(pDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE); ASSERT_NE(nullptr, BaseClass::pCmdQ); } void CommandQueueHwTest::SetUp() { ClDeviceFixture::SetUp(); cl_device_id device = pClDevice; ContextFixture::SetUp(1, &device); CommandQueueHwFixture::SetUp(pClDevice, 0); } void CommandQueueHwTest::TearDown() { CommandQueueHwFixture::TearDown(); ContextFixture::TearDown(); ClDeviceFixture::TearDown(); } void OOQueueHwTest::SetUp() { ClDeviceFixture::SetUp(); cl_device_id device = pClDevice; ContextFixture::SetUp(1, &device); OOQueueFixture::SetUp(pClDevice, 0); } void OOQueueHwTest::TearDown() { OOQueueFixture::TearDown(); ContextFixture::TearDown(); ClDeviceFixture::TearDown(); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/command_queue_fixture.h000066400000000000000000000107711422164147700312200ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test_checks_shared.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "CL/cl.h" #include "gtest/gtest.h" namespace NEO { class Device; struct CommandQueueHwFixture { CommandQueue *createCommandQueue(ClDevice *device) { return createCommandQueue(device, cl_command_queue_properties{0}); } CommandQueue *createCommandQueue( ClDevice *device, cl_command_queue_properties properties); CommandQueue *createCommandQueue( ClDevice *device, const cl_command_queue_properties *properties); CommandQueue *createCommandQueue( ClDevice *device, const cl_command_queue_properties *properties, Context *context); static void forceMapBufferOnGpu(Buffer &buffer); virtual void SetUp(); virtual void SetUp(ClDevice *pDevice, cl_command_queue_properties properties); virtual void TearDown(); CommandQueue *pCmdQ = nullptr; MockClDevice *device = nullptr; MockContext *context = nullptr; bool createdDevice = false; }; struct OOQueueFixture : public CommandQueueHwFixture { typedef CommandQueueHwFixture BaseClass; void SetUp(ClDevice *pDevice, cl_command_queue_properties properties) override; }; struct CommandQueueFixture { virtual void SetUp( Context *context, ClDevice *device, cl_command_queue_properties properties); virtual void TearDown(); CommandQueue *createCommandQueue( Context *context, ClDevice *device, cl_command_queue_properties properties, bool internalUsage); CommandQueue *pCmdQ = nullptr; }; static const cl_command_queue_properties AllCommandQueueProperties[] = { 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE_DEFAULT, CL_QUEUE_PROFILING_ENABLE, CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE_DEFAULT}; static const cl_command_queue_properties DefaultCommandQueueProperties[] = { 0, CL_QUEUE_PROFILING_ENABLE, }; template struct CommandQueueHwBlitTest : ClDeviceFixture, ContextFixture, CommandQueueHwFixture, ::testing::Test { using ContextFixture::SetUp; void SetUp() override { hwInfo = *::defaultHwInfo; hwInfo.capabilityTable.blitterOperationsSupported = true; REQUIRE_FULL_BLITTER_OR_SKIP(&hwInfo); DebugManager.flags.EnableBlitterOperationsSupport.set(1); DebugManager.flags.EnableTimestampPacket.set(1); DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1); ClDeviceFixture::SetUpImpl(&hwInfo); cl_device_id device = pClDevice; ContextFixture::SetUp(1, &device); cl_command_queue_properties queueProperties = ooq ? CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE : 0; CommandQueueHwFixture::SetUp(pClDevice, queueProperties); } void TearDown() override { CommandQueueHwFixture::TearDown(); ContextFixture::TearDown(); ClDeviceFixture::TearDown(); } HardwareInfo hwInfo{}; DebugManagerStateRestore state{}; }; using IoqCommandQueueHwBlitTest = CommandQueueHwBlitTest; using OoqCommandQueueHwBlitTest = CommandQueueHwBlitTest; struct CommandQueueHwTest : public ClDeviceFixture, public ContextFixture, public CommandQueueHwFixture, ::testing::Test { using ContextFixture::SetUp; void SetUp() override; void TearDown() override; cl_command_queue_properties properties; const HardwareInfo *pHwInfo = nullptr; }; struct OOQueueHwTest : public ClDeviceFixture, public ContextFixture, public OOQueueFixture, ::testing::Test { using ContextFixture::SetUp; OOQueueHwTest() { } void SetUp() override; void SetUp(ClDevice *pDevice, cl_command_queue_properties properties) override { } void TearDown() override; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/command_queue_hw_1_tests.cpp000066400000000000000000001354231422164147700321470ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/mocks/mock_builtins.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/mocks/mock_os_library.h" #include "shared/test/common/mocks/mock_source_level_debugger.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" using namespace NEO; HWTEST_F(CommandQueueHwTest, WhenConstructingTwoCommandQueuesThenOnlyOneDebugSurfaceIsAllocated) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->rootDeviceEnvironments[0]->debugger.reset(new MockActiveSourceLevelDebugger(new MockOsLibrary)); auto device = std::make_unique(MockDevice::create(executionEnvironment, 0u)); auto sipType = SipKernel::getSipKernelType(device->getDevice()); SipKernel::initSipKernel(sipType, device->getDevice()); MockCommandQueueHw mockCmdQueueHw1(context, device.get(), nullptr); auto dbgSurface = device->getGpgpuCommandStreamReceiver().getDebugSurfaceAllocation(); EXPECT_NE(dbgSurface, nullptr); MockCommandQueueHw mockCmdQueueHw2(context, device.get(), nullptr); EXPECT_EQ(dbgSurface, device->getGpgpuCommandStreamReceiver().getDebugSurfaceAllocation()); } HWTEST_F(CommandQueueHwTest, givenNoTimestampPacketsWhenWaitForTimestampsThenNoWaitAndTagIsNotUpdated) { DebugManagerStateRestore restorer; DebugManager.flags.EnableTimestampPacket.set(0); DebugManager.flags.EnableTimestampWait.set(4); ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); auto device = std::make_unique(MockDevice::create(executionEnvironment, 0u)); device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false; MockCommandQueueHw cmdQ(context, device.get(), nullptr); auto taskCount = device->getUltCommandStreamReceiver().peekLatestFlushedTaskCount(); cmdQ.waitForTimestamps(101u); EXPECT_EQ(device->getUltCommandStreamReceiver().peekLatestFlushedTaskCount(), taskCount); } HWTEST_F(CommandQueueHwTest, WhenDebugSurfaceIsAllocatedThenBufferIsZeroed) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->rootDeviceEnvironments[0]->debugger.reset(new MockActiveSourceLevelDebugger(new MockOsLibrary)); auto device = std::make_unique(MockDevice::create(executionEnvironment, 0u)); auto sipType = SipKernel::getSipKernelType(device->getDevice()); SipKernel::initSipKernel(sipType, device->getDevice()); MockCommandQueueHw mockCmdQueueHw1(context, device.get(), nullptr); auto dbgSurface = device->getGpgpuCommandStreamReceiver().getDebugSurfaceAllocation(); EXPECT_NE(dbgSurface, nullptr); auto mem = dbgSurface->getUnderlyingBuffer(); ASSERT_NE(nullptr, mem); auto &stateSaveAreaHeader = SipKernel::getSipKernel(device->getDevice()).getStateSaveAreaHeader(); mem = ptrOffset(mem, stateSaveAreaHeader.size()); auto size = dbgSurface->getUnderlyingBufferSize() - stateSaveAreaHeader.size(); EXPECT_TRUE(memoryZeroed(mem, size)); } HWTEST_F(CommandQueueHwTest, WhenConstructingCommandQueueDebugOnButIgcDoesNotReturnSSAHDoNotCopyIt) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->rootDeviceEnvironments[0]->debugger.reset(new MockActiveSourceLevelDebugger(new MockOsLibrary)); MockGraphicsAllocation sipAlloc1; auto mockSip1 = std::make_unique(SipKernelType::DbgCsrLocal, &sipAlloc1); mockSip1->mockStateSaveAreaHeader.clear(); MockGraphicsAllocation sipAlloc2; auto mockSip2 = std::make_unique(SipKernelType::DbgCsr, &sipAlloc2); mockSip2->mockStateSaveAreaHeader.clear(); auto mockBuiltIns = new MockBuiltins(); mockBuiltIns->overrideSipKernel(std::move(mockSip1)); mockBuiltIns->overrideSipKernel(std::move(mockSip2)); executionEnvironment->rootDeviceEnvironments[0]->builtins.reset(mockBuiltIns); auto device = std::make_unique(MockDevice::create(executionEnvironment, 0u)); MockCommandQueueHw mockCmdQueueHw1(context, device.get(), nullptr); auto dbgSurface = device->getGpgpuCommandStreamReceiver().getDebugSurfaceAllocation(); EXPECT_NE(dbgSurface, nullptr); auto &stateSaveAreaHeader = SipKernel::getSipKernel(device->getDevice()).getStateSaveAreaHeader(); EXPECT_EQ(static_cast(0), stateSaveAreaHeader.size()); } HWTEST_F(CommandQueueHwTest, givenMultiDispatchInfoWhenAskingForAuxTranslationThenCheckMemObjectsCountAndDebugFlag) { DebugManagerStateRestore restore; MockBuffer buffer; KernelObjsForAuxTranslation kernelObjects; MultiDispatchInfo multiDispatchInfo; HardwareInfo *hwInfo = pClDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->getMutableHardwareInfo(); DebugManager.flags.ForceAuxTranslationMode.set(static_cast(AuxTranslationMode::Blit)); MockCommandQueueHw mockCmdQueueHw(context, pClDevice, nullptr); hwInfo->capabilityTable.blitterOperationsSupported = true; EXPECT_FALSE(mockCmdQueueHw.isBlitAuxTranslationRequired(multiDispatchInfo)); multiDispatchInfo.setKernelObjsForAuxTranslation(kernelObjects); EXPECT_FALSE(mockCmdQueueHw.isBlitAuxTranslationRequired(multiDispatchInfo)); kernelObjects.insert({KernelObjForAuxTranslation::Type::MEM_OBJ, &buffer}); EXPECT_TRUE(mockCmdQueueHw.isBlitAuxTranslationRequired(multiDispatchInfo)); hwInfo->capabilityTable.blitterOperationsSupported = false; EXPECT_FALSE(mockCmdQueueHw.isBlitAuxTranslationRequired(multiDispatchInfo)); hwInfo->capabilityTable.blitterOperationsSupported = true; DebugManager.flags.ForceAuxTranslationMode.set(static_cast(AuxTranslationMode::Builtin)); EXPECT_FALSE(mockCmdQueueHw.isBlitAuxTranslationRequired(multiDispatchInfo)); } HWTEST_F(CommandQueueHwTest, WhenEnqueuingBlockedMapUnmapOperationThenVirtualEventIsCreated) { CommandQueueHw *pHwQ = reinterpret_cast *>(pCmdQ); MockBuffer buffer; pHwQ->virtualEvent = nullptr; MockEventBuilder eventBuilder; MemObjSizeArray size = {{1, 1, 1}}; MemObjOffsetArray offset = {{0, 0, 0}}; pHwQ->enqueueBlockedMapUnmapOperation(nullptr, 0, MAP, &buffer, size, offset, false, eventBuilder); ASSERT_NE(nullptr, pHwQ->virtualEvent); pHwQ->virtualEvent->decRefInternal(); pHwQ->virtualEvent = nullptr; } class MockCommandStreamReceiverWithFailingFlushBatchedSubmission : public MockCommandStreamReceiver { public: using MockCommandStreamReceiver::MockCommandStreamReceiver; bool flushBatchedSubmissions() override { return false; } }; template struct MockCommandQueueHwWithOverwrittenCsr : public CommandQueueHw { using CommandQueueHw::CommandQueueHw; MockCommandStreamReceiverWithFailingFlushBatchedSubmission *csr; CommandStreamReceiver &getGpgpuCommandStreamReceiver() const override { return *csr; } }; HWTEST_F(CommandQueueHwTest, GivenCommandQueueWhenProcessDispatchForMarkerCalledThenEventAllocationIsMadeResident) { pDevice->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false; MockCommandStreamReceiverWithFailingFlushBatchedSubmission csr(*pDevice->getExecutionEnvironment(), 0, pDevice->getDeviceBitfield()); auto myCmdQ = std::make_unique>(pCmdQ->getContextPtr(), pClDevice, nullptr, false); myCmdQ->csr = &csr; csr.osContext = &pCmdQ->getGpgpuCommandStreamReceiver().getOsContext(); std::unique_ptr event(new Event(myCmdQ.get(), CL_COMMAND_COPY_BUFFER, 0, 0)); ASSERT_NE(nullptr, event); GraphicsAllocation *allocation = event->getHwTimeStampNode()->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation(); ASSERT_NE(nullptr, allocation); cl_event a = event.get(); EventsRequest eventsRequest(0, nullptr, &a); uint32_t streamBuffer[100] = {}; NEO::LinearStream linearStream(streamBuffer, sizeof(streamBuffer)); CsrDependencies deps = {}; myCmdQ->processDispatchForMarker(*myCmdQ.get(), &linearStream, eventsRequest, deps); EXPECT_GT(csr.makeResidentCalledTimes, 0u); } HWTEST_F(CommandQueueHwTest, GivenCommandQueueWhenItIsCreatedThenInitDirectSubmissionIsCalledOnAllBcsEngines) { MockCommandQueueHw queue(pContext, pClDevice, nullptr); for (auto engine : queue.bcsEngines) { if (engine != nullptr) { auto csr = static_cast *>(engine->commandStreamReceiver); EXPECT_EQ(1u, csr->initDirectSubmissionCalled); } } } HWTEST_F(CommandQueueHwTest, givenCommandQueueWhenAskingForCacheFlushOnBcsThenReturnTrue) { auto pHwQ = static_cast *>(pCmdQ); EXPECT_TRUE(pHwQ->isCacheFlushForBcsRequired()); } HWTEST_F(CommandQueueHwTest, givenBlockedMapBufferCallWhenMemObjectIsPassedToCommandThenItsRefCountIsBeingIncreased) { CommandQueueHw *pHwQ = reinterpret_cast *>(pCmdQ); MockBuffer buffer; pHwQ->virtualEvent = nullptr; auto currentRefCount = buffer.getRefInternalCount(); MockEventBuilder eventBuilder; MemObjSizeArray size = {{1, 1, 1}}; MemObjOffsetArray offset = {{0, 0, 0}}; pHwQ->enqueueBlockedMapUnmapOperation(nullptr, 0, MAP, &buffer, size, offset, false, eventBuilder); EXPECT_EQ(currentRefCount + 1, buffer.getRefInternalCount()); ASSERT_NE(nullptr, pHwQ->virtualEvent); pHwQ->virtualEvent->decRefInternal(); pHwQ->virtualEvent = nullptr; EXPECT_EQ(currentRefCount, buffer.getRefInternalCount()); } HWTEST_F(CommandQueueHwTest, givenNoReturnEventWhenCallingEnqueueBlockedMapUnmapOperationThenVirtualEventIncrementsCommandQueueInternalRefCount) { CommandQueueHw *pHwQ = reinterpret_cast *>(pCmdQ); MockBuffer buffer; pHwQ->virtualEvent = nullptr; auto initialRefCountInternal = pHwQ->getRefInternalCount(); MockEventBuilder eventBuilder; MemObjSizeArray size = {{1, 1, 1}}; MemObjOffsetArray offset = {{0, 0, 0}}; pHwQ->enqueueBlockedMapUnmapOperation(nullptr, 0, MAP, &buffer, size, offset, false, eventBuilder); ASSERT_NE(nullptr, pHwQ->virtualEvent); auto refCountInternal = pHwQ->getRefInternalCount(); EXPECT_EQ(initialRefCountInternal + 1, refCountInternal); pHwQ->virtualEvent->decRefInternal(); pHwQ->virtualEvent = nullptr; } HWTEST_F(CommandQueueHwTest, WhenAddMapUnmapToWaitlistEventsThenDependenciesAreNotAddedIntoChild) { auto buffer = new MockBuffer; CommandQueueHw *pHwQ = reinterpret_cast *>(pCmdQ); auto returnEvent = new Event(pHwQ, CL_COMMAND_MAP_BUFFER, 0, 0); auto event = new Event(pHwQ, CL_COMMAND_MAP_BUFFER, 0, 0); const cl_event eventWaitList = event; pHwQ->virtualEvent = nullptr; MockEventBuilder eventBuilder(returnEvent); MemObjSizeArray size = {{1, 1, 1}}; MemObjOffsetArray offset = {{0, 0, 0}}; pHwQ->enqueueBlockedMapUnmapOperation(&eventWaitList, 1, MAP, buffer, size, offset, false, eventBuilder); EXPECT_EQ(returnEvent, pHwQ->virtualEvent); ASSERT_EQ(nullptr, event->peekChildEvents()); // Release API refcount (i.e. from workload's perspective) returnEvent->release(); event->decRefInternal(); buffer->decRefInternal(); } HWTEST_F(CommandQueueHwTest, givenMapCommandWhenZeroStateCommandIsSubmittedThenTaskCountIsNotBeingWaited) { auto buffer = new MockBuffer; MockCommandQueueHw mockCmdQueueHw(context, pClDevice, nullptr); MockEventBuilder eventBuilder; MemObjSizeArray size = {{1, 1, 1}}; MemObjOffsetArray offset = {{0, 0, 0}}; mockCmdQueueHw.enqueueBlockedMapUnmapOperation(nullptr, 0, MAP, buffer, size, offset, false, eventBuilder); EXPECT_NE(nullptr, mockCmdQueueHw.virtualEvent); mockCmdQueueHw.virtualEvent->setStatus(CL_COMPLETE); EXPECT_EQ(std::numeric_limits::max(), mockCmdQueueHw.latestTaskCountWaited); buffer->decRefInternal(); } HWTEST_F(CommandQueueHwTest, givenMapCommandWhenZeroStateCommandIsSubmittedOnNonZeroCopyBufferThenTaskCountIsBeingWaited) { auto buffer = new MockBuffer; buffer->isZeroCopy = false; MockCommandQueueHw mockCmdQueueHw(context, pClDevice, nullptr); MockEventBuilder eventBuilder; MemObjSizeArray size = {{1, 1, 1}}; MemObjOffsetArray offset = {{0, 0, 0}}; mockCmdQueueHw.enqueueBlockedMapUnmapOperation(nullptr, 0, MAP, buffer, size, offset, false, eventBuilder); EXPECT_NE(nullptr, mockCmdQueueHw.virtualEvent); mockCmdQueueHw.virtualEvent->setStatus(CL_COMPLETE); EXPECT_EQ(1u, mockCmdQueueHw.latestTaskCountWaited); buffer->decRefInternal(); } HWTEST_F(CommandQueueHwTest, GivenEventWhenEnqueuingBlockedMapUnmapOperationThenEventIsRetained) { CommandQueueHw *pHwQ = reinterpret_cast *>(pCmdQ); Event *returnEvent = new Event(pHwQ, CL_COMMAND_MAP_BUFFER, 0, 0); auto buffer = new MockBuffer; pHwQ->virtualEvent = nullptr; MockEventBuilder eventBuilder(returnEvent); MemObjSizeArray size = {{1, 1, 1}}; MemObjOffsetArray offset = {{0, 0, 0}}; pHwQ->enqueueBlockedMapUnmapOperation(nullptr, 0, MAP, buffer, size, offset, false, eventBuilder); eventBuilder.finalizeAndRelease(); EXPECT_EQ(returnEvent, pHwQ->virtualEvent); EXPECT_NE(nullptr, returnEvent->peekCommand()); // CommandQueue has retained this event, release it returnEvent->release(); pHwQ->virtualEvent = nullptr; delete returnEvent; buffer->decRefInternal(); } HWTEST_F(CommandQueueHwTest, GivenEventWhenEnqueuingBlockedMapUnmapOperationThenChildIsUnaffected) { auto buffer = new MockBuffer; CommandQueueHw *pHwQ = reinterpret_cast *>(pCmdQ); Event *returnEvent = new Event(pHwQ, CL_COMMAND_MAP_BUFFER, 0, 0); Event event(pHwQ, CL_COMMAND_MAP_BUFFER, 0, 0); pHwQ->virtualEvent = nullptr; pHwQ->virtualEvent = &event; //virtual event from regular event to stored in previousVirtualEvent pHwQ->virtualEvent->incRefInternal(); MockEventBuilder eventBuilder(returnEvent); MemObjSizeArray size = {{1, 1, 1}}; MemObjOffsetArray offset = {{0, 0, 0}}; pHwQ->enqueueBlockedMapUnmapOperation(nullptr, 0, MAP, buffer, size, offset, false, eventBuilder); EXPECT_EQ(returnEvent, pHwQ->virtualEvent); ASSERT_EQ(nullptr, event.peekChildEvents()); returnEvent->release(); buffer->decRefInternal(); } HWTEST_F(CommandQueueHwTest, GivenNonEmptyQueueOnBlockingWhenMappingBufferThenWillWaitForPrecedingCommandsToComplete) { struct MockCmdQ : CommandQueueHw { MockCmdQ(Context *context, ClDevice *device) : CommandQueueHw(context, device, 0, false) { finishWasCalled = false; } cl_int finish() override { finishWasCalled = true; return 0; } bool finishWasCalled; }; MockCmdQ cmdQ(context, pCmdQ->getDevice().getSpecializedDevice()); auto b1 = clCreateBuffer(context, CL_MEM_READ_WRITE, 20, nullptr, nullptr); auto b2 = clCreateBuffer(context, CL_MEM_READ_WRITE, 20, nullptr, nullptr); auto gatingEvent = clCreateUserEvent(context, nullptr); void *ptr1 = clEnqueueMapBuffer(&cmdQ, b1, CL_FALSE, CL_MAP_READ, 0, 8, 1, &gatingEvent, nullptr, nullptr); clEnqueueUnmapMemObject(&cmdQ, b1, ptr1, 0, nullptr, nullptr); ASSERT_FALSE(cmdQ.finishWasCalled); void *ptr2 = clEnqueueMapBuffer(&cmdQ, b2, CL_TRUE, CL_MAP_READ, 0, 8, 0, nullptr, nullptr, nullptr); ASSERT_TRUE(cmdQ.finishWasCalled); clSetUserEventStatus(gatingEvent, CL_COMPLETE); clEnqueueUnmapMemObject(pCmdQ, b2, ptr2, 0, nullptr, nullptr); clReleaseMemObject(b1); clReleaseMemObject(b2); clReleaseEvent(gatingEvent); } HWTEST_F(CommandQueueHwTest, GivenEventsWaitlistOnBlockingWhenMappingBufferThenWillWaitForEvents) { struct MockEvent : UserEvent { MockEvent(Context *ctx, uint32_t updateCountBeforeCompleted) : UserEvent(ctx), updateCount(0), updateCountBeforeCompleted(updateCountBeforeCompleted) { this->updateTaskCount(0, 0); this->taskLevel = 0; } void updateExecutionStatus() override { ++updateCount; if (updateCount == updateCountBeforeCompleted) { transitionExecutionStatus(CL_COMPLETE); } unblockEventsBlockedByThis(executionStatus); } uint32_t updateCount; uint32_t updateCountBeforeCompleted; }; MockEvent *me = new MockEvent(context, 1024); auto b1 = clCreateBuffer(context, CL_MEM_READ_WRITE, 20, nullptr, nullptr); cl_event meAsClEv = me; void *ptr1 = clEnqueueMapBuffer(pCmdQ, b1, CL_TRUE, CL_MAP_READ, 0, 8, 1, &meAsClEv, nullptr, nullptr); ASSERT_TRUE(me->updateStatusAndCheckCompletion()); ASSERT_LE(me->updateCountBeforeCompleted, me->updateCount); clEnqueueUnmapMemObject(pCmdQ, b1, ptr1, 0, nullptr, nullptr); clReleaseMemObject(b1); me->release(); } HWTEST_F(CommandQueueHwTest, GivenNotCompleteUserEventPassedToEnqueueWhenEventIsUnblockedThenAllSurfacesForBlockedCommandsAreMadeResident) { int32_t executionStamp = 0; auto mockCSR = new MockCsr(executionStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCSR); auto userEvent = make_releaseable(context); KernelInfo kernelInfo; MockKernelWithInternals mockKernelWithInternals(*pClDevice); auto mockKernel = mockKernelWithInternals.mockKernel; auto mockProgram = mockKernelWithInternals.mockProgram; size_t offset = 0; size_t size = 1; GraphicsAllocation *constantSurface = mockCSR->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{mockCSR->getRootDeviceIndex(), MemoryConstants::pageSize}); mockProgram->setConstantSurface(constantSurface); GraphicsAllocation *printfSurface = mockCSR->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{mockCSR->getRootDeviceIndex(), MemoryConstants::pageSize}); GraphicsAllocation *privateSurface = mockCSR->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{mockCSR->getRootDeviceIndex(), MemoryConstants::pageSize}); mockKernel->setPrivateSurface(privateSurface, 10); cl_event blockedEvent = userEvent.get(); pCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr); userEvent->setStatus(CL_COMPLETE); EXPECT_TRUE(mockCSR->isMadeResident(constantSurface)); EXPECT_TRUE(mockCSR->isMadeResident(privateSurface)); mockKernel->setPrivateSurface(nullptr, 0); mockProgram->setConstantSurface(nullptr); mockCSR->getMemoryManager()->freeGraphicsMemory(privateSurface); mockCSR->getMemoryManager()->freeGraphicsMemory(printfSurface); mockCSR->getMemoryManager()->freeGraphicsMemory(constantSurface); } HWTEST_F(CommandQueueHwTest, whenReleaseQueueCalledThenFlushIsCalled) { cl_int retVal = 0; auto mockCmdQ = new MockCommandQueueHw(context, pClDevice, 0); mockCmdQ->incRefInternal(); releaseQueue(mockCmdQ, retVal); EXPECT_TRUE(mockCmdQ->flushCalled); //this call will release the queue mockCmdQ->decRefInternal(); } using BlockedCommandQueueTest = CommandQueueHwTest; HWTEST_F(BlockedCommandQueueTest, givenCommandQueueWhenBlockedCommandIsBeingSubmittedThenQueueHeapsAreNotUsed) { UserEvent userEvent(context); MockKernelWithInternals mockKernelWithInternals(*pClDevice); auto mockKernel = mockKernelWithInternals.mockKernel; size_t offset = 0; size_t size = 1; cl_event blockedEvent = &userEvent; pCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr); userEvent.setStatus(CL_COMPLETE); auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 4096u); auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 4096u); auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 4096u); uint32_t defaultSshUse = UnitTestHelper::getDefaultSshUsage(); EXPECT_EQ(0u, ioh.getUsed()); EXPECT_EQ(0u, dsh.getUsed()); EXPECT_EQ(defaultSshUse, ssh.getUsed()); pCmdQ->isQueueBlocked(); } HWTEST_F(BlockedCommandQueueTest, givenCommandQueueWithUsedHeapsWhenBlockedCommandIsBeingSubmittedThenQueueHeapsAreNotUsed) { UserEvent userEvent(context); MockKernelWithInternals mockKernelWithInternals(*pClDevice); auto mockKernel = mockKernelWithInternals.mockKernel; size_t offset = 0; size_t size = 1; cl_event blockedEvent = &userEvent; auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 4096u); auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 4096u); auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 4096u); auto spaceToUse = 4u; ioh.getSpace(spaceToUse); dsh.getSpace(spaceToUse); ssh.getSpace(spaceToUse); pCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr); userEvent.setStatus(CL_COMPLETE); uint32_t sshSpaceUse = spaceToUse + UnitTestHelper::getDefaultSshUsage(); EXPECT_EQ(spaceToUse, ioh.getUsed()); EXPECT_EQ(spaceToUse, dsh.getUsed()); EXPECT_EQ(sshSpaceUse, ssh.getUsed()); pCmdQ->isQueueBlocked(); } HWTEST_F(BlockedCommandQueueTest, givenCommandQueueWhichHasSomeUnusedHeapsWhenBlockedCommandIsBeingSubmittedThenThoseHeapsAreBeingUsed) { UserEvent userEvent(context); MockKernelWithInternals mockKernelWithInternals(*pClDevice); auto mockKernel = mockKernelWithInternals.mockKernel; size_t offset = 0; size_t size = 1; cl_event blockedEvent = &userEvent; auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 4096u); auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 4096u); auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 4096u); auto iohBase = ioh.getCpuBase(); auto dshBase = dsh.getCpuBase(); auto sshBase = ssh.getCpuBase(); pCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr); userEvent.setStatus(CL_COMPLETE); EXPECT_EQ(iohBase, ioh.getCpuBase()); EXPECT_EQ(dshBase, dsh.getCpuBase()); EXPECT_EQ(sshBase, ssh.getCpuBase()); pCmdQ->isQueueBlocked(); } HWTEST_F(BlockedCommandQueueTest, givenEnqueueBlockedByUserEventWhenItIsEnqueuedThenKernelReferenceCountIsIncreased) { UserEvent userEvent(context); MockKernelWithInternals mockKernelWithInternals(*pClDevice); auto mockKernel = mockKernelWithInternals.mockKernel; size_t offset = 0; size_t size = 1; cl_event blockedEvent = &userEvent; auto currentRefCount = mockKernel->getRefInternalCount(); pCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr); EXPECT_EQ(currentRefCount + 1, mockKernel->getRefInternalCount()); userEvent.setStatus(CL_COMPLETE); pCmdQ->isQueueBlocked(); EXPECT_EQ(currentRefCount, mockKernel->getRefInternalCount()); } using CommandQueueHwRefCountTest = CommandQueueHwTest; HWTEST_F(CommandQueueHwRefCountTest, givenBlockedCmdQWhenNewBlockedEnqueueReplacesVirtualEventThenPreviousVirtualEventDecrementsCmdQRefCount) { cl_int retVal = 0; auto mockCmdQ = new MockCommandQueueHw(context, pClDevice, 0); UserEvent userEvent(context); MockKernelWithInternals mockKernelWithInternals(*pClDevice); auto mockKernel = mockKernelWithInternals.mockKernel; size_t offset = 0; size_t size = 1; cl_event blockedEvent = &userEvent; EXPECT_EQ(1, mockCmdQ->getRefInternalCount()); mockCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr); // UserEvent on waitlist doesn't increments cmdQ refCount, virtualEvent increments refCount EXPECT_EQ(2, mockCmdQ->getRefInternalCount()); mockCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr); // new virtual event increments refCount EXPECT_EQ(3, mockCmdQ->getRefInternalCount()); userEvent.setStatus(CL_COMPLETE); // UserEvent is set to complete and event tree is unblocked, queue has only 1 refference to itself after this operation EXPECT_EQ(2, mockCmdQ->getRefInternalCount()); //this call will release the queue releaseQueue(mockCmdQ, retVal); } HWTEST_F(CommandQueueHwRefCountTest, givenBlockedCmdQWithOutputEventAsVirtualEventWhenNewBlockedEnqueueReplacesVirtualEventCreatedFromOutputEventThenPreviousVirtualEventDoesntDecrementRefCount) { cl_int retVal = 0; auto mockCmdQ = new MockCommandQueueHw(context, pClDevice, 0); UserEvent userEvent(context); MockKernelWithInternals mockKernelWithInternals(*pClDevice); auto mockKernel = mockKernelWithInternals.mockKernel; size_t offset = 0; size_t size = 1; cl_event eventOut = nullptr; cl_event blockedEvent = &userEvent; EXPECT_EQ(1, mockCmdQ->getRefInternalCount()); mockCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr); // UserEvent on waitlist doesn't increment cmdQ refCount, virtualEvent increments refCount EXPECT_EQ(2, mockCmdQ->getRefInternalCount()); mockCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, &eventOut); //output event increments EXPECT_EQ(3, mockCmdQ->getRefInternalCount()); mockCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr); // previous virtualEvent which was outputEvent DOES NOT decrement refCount, // new virtual event increments refCount EXPECT_EQ(4, mockCmdQ->getRefInternalCount()); // unblocking deletes 2 virtualEvents userEvent.setStatus(CL_COMPLETE); EXPECT_EQ(3, mockCmdQ->getRefInternalCount()); auto pEventOut = castToObject(eventOut); pEventOut->release(); // releasing output event decrements refCount EXPECT_EQ(2, mockCmdQ->getRefInternalCount()); mockCmdQ->isQueueBlocked(); releaseQueue(mockCmdQ, retVal); } HWTEST_F(CommandQueueHwRefCountTest, givenSeriesOfBlockedEnqueuesWhenEveryEventIsDeletedAndCmdQIsReleasedThenCmdQIsDeleted) { cl_int retVal = 0; auto mockCmdQ = new MockCommandQueueHw(context, pClDevice, 0); UserEvent *userEvent = new UserEvent(context); MockKernelWithInternals mockKernelWithInternals(*pClDevice); auto mockKernel = mockKernelWithInternals.mockKernel; size_t offset = 0; size_t size = 1; cl_event eventOut = nullptr; cl_event blockedEvent = userEvent; EXPECT_EQ(1, mockCmdQ->getRefInternalCount()); mockCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr); // UserEvent on waitlist doesn't increment cmdQ refCount, virtualEvent increments refCount EXPECT_EQ(2, mockCmdQ->getRefInternalCount()); mockCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, &eventOut); //output event increments refCount EXPECT_EQ(3, mockCmdQ->getRefInternalCount()); mockCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr); // previous virtualEvent which was outputEvent DOES NOT decrement refCount, // new virtual event increments refCount EXPECT_EQ(4, mockCmdQ->getRefInternalCount()); // unblocking deletes 2 virtualEvents userEvent->setStatus(CL_COMPLETE); userEvent->release(); EXPECT_EQ(3, mockCmdQ->getRefInternalCount()); auto pEventOut = castToObject(eventOut); pEventOut->release(); // releasing output event decrements refCount EXPECT_EQ(2, mockCmdQ->getRefInternalCount()); mockCmdQ->isQueueBlocked(); EXPECT_EQ(1, mockCmdQ->getRefInternalCount()); releaseQueue(mockCmdQ, retVal); } HWTEST_F(CommandQueueHwRefCountTest, givenSeriesOfBlockedEnqueuesWhenCmdQIsReleasedBeforeOutputEventThenOutputEventDeletesCmdQ) { cl_int retVal = 0; auto mockCmdQ = new MockCommandQueueHw(context, pClDevice, 0); UserEvent *userEvent = new UserEvent(context); MockKernelWithInternals mockKernelWithInternals(*pClDevice); auto mockKernel = mockKernelWithInternals.mockKernel; size_t offset = 0; size_t size = 1; cl_event eventOut = nullptr; cl_event blockedEvent = userEvent; EXPECT_EQ(1, mockCmdQ->getRefInternalCount()); mockCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr); // UserEvent on waitlist doesn't increment cmdQ refCount, virtualEvent increments refCount EXPECT_EQ(2, mockCmdQ->getRefInternalCount()); mockCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, &eventOut); //output event increments refCount EXPECT_EQ(3, mockCmdQ->getRefInternalCount()); mockCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr); // previous virtualEvent which was outputEvent DOES NOT decrement refCount, // new virtual event increments refCount EXPECT_EQ(4, mockCmdQ->getRefInternalCount()); userEvent->setStatus(CL_COMPLETE); userEvent->release(); // releasing UserEvent doesn't change the queue refCount EXPECT_EQ(3, mockCmdQ->getRefInternalCount()); releaseQueue(mockCmdQ, retVal); // releasing cmdQ decrements refCount EXPECT_EQ(1, mockCmdQ->getRefInternalCount()); auto pEventOut = castToObject(eventOut); pEventOut->release(); } HWTEST_F(CommandQueueHwTest, GivenEventThatIsNotCompletedWhenFinishIsCalledAndItGetsCompletedThenItStatusIsUpdatedAfterFinishCall) { cl_int ret; DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); struct ClbFuncTempStruct { static void CL_CALLBACK ClbFuncT(cl_event e, cl_int execStatus, void *valueForUpdate) { *((cl_int *)valueForUpdate) = 1; } }; auto Value = 0u; auto ev = new Event(this->pCmdQ, CL_COMMAND_COPY_BUFFER, 3, CompletionStamp::notReady + 1); clSetEventCallback(ev, CL_COMPLETE, ClbFuncTempStruct::ClbFuncT, &Value); auto &csr = this->pCmdQ->getGpgpuCommandStreamReceiver(); EXPECT_GT(3u, csr.peekTaskCount()); *csr.getTagAddress() = CompletionStamp::notReady + 1; ret = clFinish(this->pCmdQ); ASSERT_EQ(CL_SUCCESS, ret); ev->updateExecutionStatus(); EXPECT_EQ(1u, Value); ev->decRefInternal(); } HWTEST_F(CommandQueueHwTest, GivenMultiTileQueueWhenEventNotCompletedAndFinishIsCalledThenItGetsCompletedOnAllTilesAndItStatusIsUpdatedAfterFinishCall) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); auto &csr = this->pCmdQ->getGpgpuCommandStreamReceiver(); csr.setActivePartitions(2u); auto ultCsr = reinterpret_cast *>(&csr); ultCsr->postSyncWriteOffset = 32; auto tagAddress = csr.getTagAddress(); *ptrOffset(tagAddress, 32) = *tagAddress; struct ClbFuncTempStruct { static void CL_CALLBACK ClbFuncT(cl_event e, cl_int execStatus, void *valueForUpdate) { *static_cast(valueForUpdate) = 1; } }; auto value = 0u; auto ev = new Event(this->pCmdQ, CL_COMMAND_COPY_BUFFER, 3, CompletionStamp::notReady + 1); clSetEventCallback(ev, CL_COMPLETE, ClbFuncTempStruct::ClbFuncT, &value); EXPECT_GT(3u, csr.peekTaskCount()); *tagAddress = CompletionStamp::notReady + 1; tagAddress = ptrOffset(tagAddress, 32); *tagAddress = CompletionStamp::notReady + 1; cl_int ret = clFinish(this->pCmdQ); ASSERT_EQ(CL_SUCCESS, ret); ev->updateExecutionStatus(); EXPECT_EQ(1u, value); ev->decRefInternal(); } HWTEST_F(CommandQueueHwTest, givenCommandQueueThatIsBlockedAndUsesCpuCopyWhenEventIsReturnedThenItIsNotReady) { CommandQueueHw *cmdQHw = static_cast *>(this->pCmdQ); MockBuffer buffer; cl_event returnEvent = nullptr; auto retVal = CL_SUCCESS; cmdQHw->taskLevel = CompletionStamp::notReady; size_t offset = 0; size_t size = 4096u; TransferProperties transferProperties(&buffer, CL_COMMAND_READ_BUFFER, 0, false, &offset, &size, nullptr, false, pDevice->getRootDeviceIndex()); EventsRequest eventsRequest(0, nullptr, &returnEvent); cmdQHw->cpuDataTransferHandler(transferProperties, eventsRequest, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CompletionStamp::notReady, castToObject(returnEvent)->peekTaskCount()); clReleaseEvent(returnEvent); } HWTEST_F(CommandQueueHwTest, givenEventWithRecordedCommandWhenSubmitCommandIsCalledThenTaskCountMustBeUpdatedFromOtherThread) { std::atomic_bool go{false}; struct mockEvent : public Event { using Event::Event; using Event::eventWithoutCommand; using Event::submitCommand; void synchronizeTaskCount() override { *atomicFence = true; Event::synchronizeTaskCount(); } uint32_t synchronizeCallCount = 0u; std::atomic_bool *atomicFence = nullptr; }; mockEvent neoEvent(this->pCmdQ, CL_COMMAND_MAP_BUFFER, CompletionStamp::notReady, CompletionStamp::notReady); neoEvent.atomicFence = &go; EXPECT_TRUE(neoEvent.eventWithoutCommand); neoEvent.eventWithoutCommand = false; EXPECT_EQ(CompletionStamp::notReady, neoEvent.peekTaskCount()); std::thread t([&]() { while (!go) { } neoEvent.updateTaskCount(77u, 0); }); neoEvent.submitCommand(false); EXPECT_EQ(77u, neoEvent.peekTaskCount()); t.join(); } HWTEST_F(CommandQueueHwTest, givenNonBlockedEnqueueWhenEventIsPassedThenUpdateItsFlushStamp) { CommandQueueHw *cmdQHw = static_cast *>(this->pCmdQ); MockKernelWithInternals mockKernelWithInternals(*pClDevice); auto &csr = pDevice->getUltCommandStreamReceiver(); csr.flushStamp->setStamp(5); size_t offset = 0; size_t size = 1; cl_event event; auto retVal = cmdQHw->enqueueKernel(mockKernelWithInternals.mockKernel, 1, &offset, &size, nullptr, 0, nullptr, &event); ASSERT_EQ(CL_SUCCESS, retVal); auto eventObj = castToObject(event); EXPECT_EQ(csr.flushStamp->peekStamp(), eventObj->flushStamp->peekStamp()); EXPECT_EQ(csr.flushStamp->peekStamp(), pCmdQ->flushStamp->peekStamp()); eventObj->release(); } HWTEST_F(CommandQueueHwTest, givenBlockedEnqueueWhenEventIsPassedThenDontUpdateItsFlushStamp) { UserEvent userEvent; cl_event event, clUserEvent; CommandQueueHw *cmdQHw = static_cast *>(this->pCmdQ); MockKernelWithInternals mockKernelWithInternals(*pClDevice); auto &csr = pDevice->getUltCommandStreamReceiver(); csr.flushStamp->setStamp(5); size_t offset = 0; size_t size = 1; clUserEvent = &userEvent; auto retVal = cmdQHw->enqueueKernel(mockKernelWithInternals.mockKernel, 1, &offset, &size, nullptr, 1, &clUserEvent, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(cmdQHw->isQueueBlocked()); retVal = cmdQHw->enqueueKernel(mockKernelWithInternals.mockKernel, 1, &offset, &size, nullptr, 0, nullptr, &event); ASSERT_EQ(CL_SUCCESS, retVal); FlushStamp expectedFlushStamp = 0; auto eventObj = castToObject(event); EXPECT_EQ(expectedFlushStamp, eventObj->flushStamp->peekStamp()); EXPECT_EQ(expectedFlushStamp, pCmdQ->flushStamp->peekStamp()); eventObj->release(); } HWTEST_F(CommandQueueHwTest, givenBlockedInOrderCmdQueueAndAsynchronouslyCompletedEventWhenEnqueueCompletesVirtualEventThenUpdatedTaskLevelIsPassedToEnqueueAndFlushTask) { CommandQueueHw *cmdQHw = static_cast *>(this->pCmdQ); int32_t executionStamp = 0; auto mockCSR = new MockCsr(executionStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCSR); MockKernelWithInternals mockKernelWithInternals(*pClDevice); auto mockKernel = mockKernelWithInternals.mockKernel; size_t offset = 0; size_t size = 1; auto event = new Event(cmdQHw, CL_COMMAND_NDRANGE_KERNEL, 10, 0); uint32_t virtualEventTaskLevel = 77; uint32_t virtualEventTaskCount = 80; auto virtualEvent = new Event(cmdQHw, CL_COMMAND_NDRANGE_KERNEL, virtualEventTaskLevel, virtualEventTaskCount); cl_event blockedEvent = event; // Put Queue in blocked state by assigning virtualEvent event->addChild(*virtualEvent); virtualEvent->incRefInternal(); cmdQHw->virtualEvent = virtualEvent; *mockCSR->getTagAddress() = 0u; cmdQHw->taskLevel = 23; cmdQHw->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr); //new virtual event is created on enqueue, bind it to the created virtual event EXPECT_NE(cmdQHw->virtualEvent, virtualEvent); EXPECT_EQ(virtualEvent->peekExecutionStatus(), CL_QUEUED); event->setStatus(CL_SUBMITTED); EXPECT_EQ(virtualEvent->peekExecutionStatus(), CL_SUBMITTED); EXPECT_FALSE(cmdQHw->isQueueBlocked()); // +1 for next level after virtualEvent is unblocked // +1 as virtualEvent was a parent for event with actual command that is being submitted EXPECT_EQ(virtualEventTaskLevel + 2, cmdQHw->taskLevel); //command being submitted was dependant only on virtual event hence only +1 EXPECT_EQ(virtualEventTaskLevel + 1, mockCSR->lastTaskLevelToFlushTask); *mockCSR->getTagAddress() = initialHardwareTag; virtualEvent->decRefInternal(); event->decRefInternal(); } HWTEST_F(CommandQueueHwTest, givenBlockedOutOfOrderQueueWhenUserEventIsSubmittedThenNDREventIsSubmittedAsWell) { CommandQueueHw *cmdQHw = static_cast *>(this->pCmdQ); auto &mockCsr = pDevice->getUltCommandStreamReceiver(); MockKernelWithInternals mockKernelWithInternals(*pClDevice); auto mockKernel = mockKernelWithInternals.mockKernel; size_t offset = 0; size_t size = 1; cl_event userEvent = clCreateUserEvent(this->pContext, nullptr); cl_event blockedEvent = nullptr; *mockCsr.getTagAddress() = 0u; cmdQHw->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &userEvent, &blockedEvent); auto neoEvent = castToObject(blockedEvent); EXPECT_EQ(neoEvent->peekExecutionStatus(), CL_QUEUED); neoEvent->updateExecutionStatus(); EXPECT_EQ(neoEvent->peekExecutionStatus(), CL_QUEUED); EXPECT_EQ(neoEvent->peekTaskCount(), CompletionStamp::notReady); clSetUserEventStatus(userEvent, 0u); EXPECT_EQ(neoEvent->peekExecutionStatus(), CL_SUBMITTED); EXPECT_EQ(neoEvent->peekTaskCount(), 1u); *mockCsr.getTagAddress() = initialHardwareTag; clReleaseEvent(blockedEvent); clReleaseEvent(userEvent); } HWTEST_F(CommandQueueHwTest, givenWalkerSplitEnqueueNDRangeWhenNoBlockedThenKernelMakeResidentCalledOnce) { KernelInfo kernelInfo; MockKernelWithInternals mockKernelWithInternals(*pClDevice); auto mockKernel = mockKernelWithInternals.mockKernel; auto mockProgram = mockKernelWithInternals.mockProgram; mockProgram->setAllowNonUniform(true); auto &csr = pDevice->getUltCommandStreamReceiver(); csr.storeMakeResidentAllocations = true; size_t offset = 0; size_t gws = 63; size_t lws = 16; cl_int status = pCmdQ->enqueueKernel(mockKernel, 1, &offset, &gws, &lws, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, status); EXPECT_EQ(1u, mockKernel->makeResidentCalls); } HWTEST_F(CommandQueueHwTest, givenWalkerSplitEnqueueNDRangeWhenBlockedThenKernelGetResidencyCalledOnce) { UserEvent userEvent(context); KernelInfo kernelInfo; MockKernelWithInternals mockKernelWithInternals(*pClDevice); auto mockKernel = mockKernelWithInternals.mockKernel; auto mockProgram = mockKernelWithInternals.mockProgram; mockProgram->setAllowNonUniform(true); auto &csr = pDevice->getUltCommandStreamReceiver(); csr.storeMakeResidentAllocations = true; size_t offset = 0; size_t gws = 63; size_t lws = 16; cl_event blockedEvent = &userEvent; cl_int status = pCmdQ->enqueueKernel(mockKernel, 1, &offset, &gws, &lws, 1, &blockedEvent, nullptr); EXPECT_EQ(CL_SUCCESS, status); EXPECT_EQ(1u, mockKernel->getResidencyCalls); userEvent.setStatus(CL_COMPLETE); pCmdQ->isQueueBlocked(); } HWTEST_F(CommandQueueHwTest, givenKernelSplitEnqueueReadBufferWhenBlockedThenEnqueueSurfacesMakeResidentIsCalledOnce) { UserEvent userEvent(context); auto &csr = pDevice->getUltCommandStreamReceiver(); csr.storeMakeResidentAllocations = true; csr.timestampPacketWriteEnabled = false; BufferDefaults::context = context; auto buffer = clUniquePtr(BufferHelper<>::create()); GraphicsAllocation *bufferAllocation = buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex()); char array[3 * MemoryConstants::cacheLineSize]; char *ptr = &array[MemoryConstants::cacheLineSize]; ptr = alignUp(ptr, MemoryConstants::cacheLineSize); ptr -= 1; cl_event blockedEvent = &userEvent; cl_int status = pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 1, &blockedEvent, nullptr); EXPECT_EQ(CL_SUCCESS, status); userEvent.setStatus(CL_COMPLETE); std::map::iterator it = csr.makeResidentAllocations.begin(); for (; it != csr.makeResidentAllocations.end(); it++) { uint32_t expected = 1u; //Buffer surface will be added three times (for each kernel from split and as a base range of enqueueReadBuffer call) if (it->first == bufferAllocation) { expected = 3u; } EXPECT_EQ(expected, it->second); } pCmdQ->isQueueBlocked(); } HWTEST_F(CommandQueueHwTest, givenDefaultHwCommandQueueThenCacheFlushAfterWalkerIsNotNeeded) { EXPECT_FALSE(pCmdQ->getRequiresCacheFlushAfterWalker()); } HWTEST_F(CommandQueueHwTest, givenSizeWhenForceStatelessIsCalledThenCorrectValueIsReturned) { if (is32bit) { GTEST_SKIP(); } struct MockCommandQueueHw : public CommandQueueHw { using CommandQueueHw::forceStateless; }; MockCommandQueueHw *pCmdQHw = reinterpret_cast(pCmdQ); uint64_t bigSize = 4ull * MemoryConstants::gigaByte; EXPECT_TRUE(pCmdQHw->forceStateless(static_cast(bigSize))); uint64_t smallSize = bigSize - 1; EXPECT_FALSE(pCmdQHw->forceStateless(static_cast(smallSize))); } HWTEST_F(CommandQueueHwTest, givenFlushWhenFlushBatchedSubmissionsFailsThenErrorIsRetured) { MockCommandQueueHwWithOverwrittenCsr cmdQueue(context, pClDevice, nullptr, false); MockCommandStreamReceiverWithFailingFlushBatchedSubmission csr(*pDevice->executionEnvironment, 0, pDevice->getDeviceBitfield()); cmdQueue.csr = &csr; cl_int errorCode = cmdQueue.flush(); EXPECT_EQ(CL_OUT_OF_RESOURCES, errorCode); } HWTEST_F(CommandQueueHwTest, givenFinishWhenFlushBatchedSubmissionsFailsThenErrorIsRetured) { MockCommandQueueHwWithOverwrittenCsr cmdQueue(context, pClDevice, nullptr, false); MockCommandStreamReceiverWithFailingFlushBatchedSubmission csr(*pDevice->executionEnvironment, 0, pDevice->getDeviceBitfield()); cmdQueue.csr = &csr; cl_int errorCode = cmdQueue.finish(); EXPECT_EQ(CL_OUT_OF_RESOURCES, errorCode); } HWTEST_F(CommandQueueHwTest, givenGpuHangWhenFinishingCommandQueueHwThenWaitForEnginesIsCalledAndOutOfResourcesIsReturned) { MockCommandQueueHw mockCmdQueueHw{context, pClDevice, nullptr}; mockCmdQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang; mockCmdQueueHw.getUltCommandStreamReceiver().shouldFlushBatchedSubmissionsReturnSuccess = true; const auto finishResult = mockCmdQueueHw.finish(); EXPECT_EQ(1, mockCmdQueueHw.waitForAllEnginesCalledCount); EXPECT_EQ(CL_OUT_OF_RESOURCES, finishResult); } HWTEST_F(CommandQueueHwTest, givenNoGpuHangWhenFinishingCommandQueueHwThenWaitForEnginesIsCalledAndSuccessIsReturned) { MockCommandQueueHw mockCmdQueueHw{context, pClDevice, nullptr}; mockCmdQueueHw.waitForAllEnginesReturnValue = WaitStatus::Ready; mockCmdQueueHw.getUltCommandStreamReceiver().shouldFlushBatchedSubmissionsReturnSuccess = true; const auto finishResult = mockCmdQueueHw.finish(); EXPECT_EQ(1, mockCmdQueueHw.waitForAllEnginesCalledCount); EXPECT_EQ(CL_SUCCESS, finishResult); } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/command_queue_hw_2_tests.cpp000066400000000000000000000655521422164147700321550ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/mocks/mock_builtins.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/helpers/dispatch_info_builder.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" using namespace NEO; void CloneMdi(MultiDispatchInfo &dst, const MultiDispatchInfo &src) { for (auto &srcDi : src) { dst.push(srcDi); } dst.setBuiltinOpParams(src.peekBuiltinOpParams()); } struct MockBuilder : BuiltinDispatchInfoBuilder { using BuiltinDispatchInfoBuilder::BuiltinDispatchInfoBuilder; bool buildDispatchInfos(MultiDispatchInfo &d) const override { wasBuildDispatchInfosWithBuiltinOpParamsCalled = true; paramsReceived.multiDispatchInfo.setBuiltinOpParams(d.peekBuiltinOpParams()); return true; } bool buildDispatchInfos(MultiDispatchInfo &d, Kernel *kernel, const uint32_t dim, const Vec3 &gws, const Vec3 &elws, const Vec3 &offset) const override { paramsReceived.kernel = kernel; paramsReceived.gws = gws; paramsReceived.elws = elws; paramsReceived.offset = offset; wasBuildDispatchInfosWithKernelParamsCalled = true; DispatchInfoBuilder dispatchInfoBuilder(clDevice); dispatchInfoBuilder.setKernel(paramsToUse.kernel); dispatchInfoBuilder.setDispatchGeometry(dim, paramsToUse.gws, paramsToUse.elws, paramsToUse.offset); dispatchInfoBuilder.bake(d); CloneMdi(paramsReceived.multiDispatchInfo, d); return true; } mutable bool wasBuildDispatchInfosWithBuiltinOpParamsCalled = false; mutable bool wasBuildDispatchInfosWithKernelParamsCalled = false; struct Params { MultiDispatchInfo multiDispatchInfo; Kernel *kernel = nullptr; Vec3 gws = Vec3{0, 0, 0}; Vec3 elws = Vec3{0, 0, 0}; Vec3 offset = Vec3{0, 0, 0}; }; mutable Params paramsReceived; Params paramsToUse; }; struct BuiltinParamsCommandQueueHwTests : public CommandQueueHwTest { void SetUpImpl(EBuiltInOps::Type operation) { auto builtIns = new MockBuiltins(); pCmdQ->getDevice().getExecutionEnvironment()->rootDeviceEnvironments[pCmdQ->getDevice().getRootDeviceIndex()]->builtins.reset(builtIns); auto swapBuilder = pClExecutionEnvironment->setBuiltinDispatchInfoBuilder( rootDeviceIndex, operation, std::unique_ptr(new MockBuilder(*builtIns, pCmdQ->getClDevice()))); mockBuilder = static_cast(&BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder( operation, *pClDevice)); } MockBuilder *mockBuilder; }; HWTEST_F(BuiltinParamsCommandQueueHwTests, givenEnqueueReadWriteBufferCallWhenBuiltinParamsArePassedThenCheckValuesCorectness) { SetUpImpl(EBuiltInOps::CopyBufferToBuffer); BufferDefaults::context = context; auto buffer = clUniquePtr(BufferHelper<>::create()); char array[3 * MemoryConstants::cacheLineSize]; char *ptr = &array[MemoryConstants::cacheLineSize]; ptr = alignUp(ptr, MemoryConstants::cacheLineSize); ptr -= 1; cl_int status = pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, 0, ptr, nullptr, 0, 0, nullptr); EXPECT_EQ(CL_SUCCESS, status); void *alignedPtr = alignDown(ptr, 4); size_t ptrOffset = ptrDiff(ptr, alignedPtr); Vec3 offset = {0, 0, 0}; auto builtinParams = mockBuilder->paramsReceived.multiDispatchInfo.peekBuiltinOpParams(); EXPECT_EQ(alignedPtr, builtinParams.dstPtr); EXPECT_EQ(ptrOffset, builtinParams.dstOffset.x); EXPECT_EQ(offset, builtinParams.srcOffset); status = pCmdQ->enqueueWriteBuffer(buffer.get(), CL_FALSE, 0, 0, ptr, nullptr, 0, 0, nullptr); EXPECT_EQ(CL_SUCCESS, status); builtinParams = mockBuilder->paramsReceived.multiDispatchInfo.peekBuiltinOpParams(); EXPECT_EQ(alignedPtr, builtinParams.srcPtr); EXPECT_EQ(ptrOffset, builtinParams.srcOffset.x); EXPECT_EQ(offset, builtinParams.dstOffset); } HWTEST_F(BuiltinParamsCommandQueueHwTests, givenEnqueueWriteImageCallWhenBuiltinParamsArePassedThenCheckValuesCorectness) { SetUpImpl(EBuiltInOps::CopyBufferToImage3d); std::unique_ptr dstImage(ImageHelper>::create(context)); auto imageDesc = dstImage->getImageDesc(); size_t origin[] = {0, 0, 0}; size_t region[] = {imageDesc.image_width, imageDesc.image_height, 0}; size_t rowPitch = dstImage->getHostPtrRowPitch(); size_t slicePitch = dstImage->getHostPtrSlicePitch(); char array[3 * MemoryConstants::cacheLineSize]; char *ptr = &array[MemoryConstants::cacheLineSize]; ptr = alignUp(ptr, MemoryConstants::cacheLineSize); ptr -= 1; void *alignedPtr = alignDown(ptr, 4); size_t ptrOffset = ptrDiff(ptr, alignedPtr); Vec3 offset = {0, 0, 0}; cl_int status = pCmdQ->enqueueWriteImage(dstImage.get(), CL_FALSE, origin, region, rowPitch, slicePitch, ptr, nullptr, 0, 0, nullptr); EXPECT_EQ(CL_SUCCESS, status); auto builtinParams = mockBuilder->paramsReceived.multiDispatchInfo.peekBuiltinOpParams(); EXPECT_EQ(alignedPtr, builtinParams.srcPtr); EXPECT_EQ(ptrOffset, builtinParams.srcOffset.x); EXPECT_EQ(offset, builtinParams.dstOffset); } HWTEST_F(BuiltinParamsCommandQueueHwTests, givenEnqueueReadImageCallWhenBuiltinParamsArePassedThenCheckValuesCorectness) { SetUpImpl(EBuiltInOps::CopyImage3dToBuffer); std::unique_ptr dstImage(ImageHelper>::create(context)); auto imageDesc = dstImage->getImageDesc(); size_t origin[] = {0, 0, 0}; size_t region[] = {imageDesc.image_width, imageDesc.image_height, 0}; size_t rowPitch = dstImage->getHostPtrRowPitch(); size_t slicePitch = dstImage->getHostPtrSlicePitch(); char array[3 * MemoryConstants::cacheLineSize]; char *ptr = &array[MemoryConstants::cacheLineSize]; ptr = alignUp(ptr, MemoryConstants::cacheLineSize); ptr -= 1; void *alignedPtr = alignDown(ptr, 4); size_t ptrOffset = ptrDiff(ptr, alignedPtr); Vec3 offset = {0, 0, 0}; cl_int status = pCmdQ->enqueueReadImage(dstImage.get(), CL_FALSE, origin, region, rowPitch, slicePitch, ptr, nullptr, 0, 0, nullptr); EXPECT_EQ(CL_SUCCESS, status); auto builtinParams = mockBuilder->paramsReceived.multiDispatchInfo.peekBuiltinOpParams(); EXPECT_EQ(alignedPtr, builtinParams.dstPtr); EXPECT_EQ(ptrOffset, builtinParams.dstOffset.x); EXPECT_EQ(offset, builtinParams.srcOffset); } HWTEST_F(BuiltinParamsCommandQueueHwTests, givenEnqueueReadWriteBufferRectCallWhenBuiltinParamsArePassedThenCheckValuesCorectness) { SetUpImpl(EBuiltInOps::CopyBufferRect); BufferDefaults::context = context; auto buffer = clUniquePtr(BufferHelper<>::create()); size_t bufferOrigin[3] = {0, 0, 0}; size_t hostOrigin[3] = {0, 0, 0}; size_t region[3] = {0, 0, 0}; char array[3 * MemoryConstants::cacheLineSize]; char *ptr = &array[MemoryConstants::cacheLineSize]; ptr = alignUp(ptr, MemoryConstants::cacheLineSize); ptr -= 1; cl_int status = pCmdQ->enqueueReadBufferRect(buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, 0, 0, 0, 0, ptr, 0, 0, nullptr); void *alignedPtr = alignDown(ptr, 4); size_t ptrOffset = ptrDiff(ptr, alignedPtr); Vec3 offset = {0, 0, 0}; auto builtinParams = mockBuilder->paramsReceived.multiDispatchInfo.peekBuiltinOpParams(); EXPECT_EQ(alignedPtr, builtinParams.dstPtr); EXPECT_EQ(ptrOffset, builtinParams.dstOffset.x); EXPECT_EQ(offset, builtinParams.srcOffset); status = pCmdQ->enqueueWriteBufferRect(buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, 0, 0, 0, 0, ptr, 0, 0, nullptr); EXPECT_EQ(CL_SUCCESS, status); builtinParams = mockBuilder->paramsReceived.multiDispatchInfo.peekBuiltinOpParams(); EXPECT_EQ(alignedPtr, builtinParams.srcPtr); EXPECT_EQ(offset, builtinParams.dstOffset); EXPECT_EQ(ptrOffset, builtinParams.srcOffset.x); } HWTEST_F(OOQueueHwTest, givenBlockedOutOfOrderCmdQueueAndAsynchronouslyCompletedEventWhenEnqueueCompletesVirtualEventThenUpdatedTaskLevelIsPassedToEnqueueAndFlushTask) { CommandQueueHw *cmdQHw = static_cast *>(this->pCmdQ); int32_t executionStamp = 0; auto mockCSR = new MockCsr(executionStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCSR); MockKernelWithInternals mockKernelWithInternals(*pClDevice); auto mockKernel = mockKernelWithInternals.mockKernel; size_t offset = 0; size_t size = 1; class MockEventWithSetCompleteOnUpdate : public Event { public: MockEventWithSetCompleteOnUpdate(CommandQueue *cmdQueue, cl_command_type cmdType, uint32_t taskLevel, uint32_t taskCount) : Event(cmdQueue, cmdType, taskLevel, taskCount) { } void updateExecutionStatus() override { setStatus(CL_COMPLETE); } }; Event event(cmdQHw, CL_COMMAND_NDRANGE_KERNEL, 10, 0); uint32_t virtualEventTaskLevel = 77; uint32_t virtualEventTaskCount = 80; MockEventWithSetCompleteOnUpdate virtualEvent(cmdQHw, CL_COMMAND_NDRANGE_KERNEL, virtualEventTaskLevel, virtualEventTaskCount); cl_event blockedEvent = &event; // Put Queue in blocked state by assigning virtualEvent virtualEvent.incRefInternal(); event.addChild(virtualEvent); cmdQHw->virtualEvent = &virtualEvent; cmdQHw->taskLevel = 23; cmdQHw->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr); //new virtual event is created on enqueue, bind it to the created virtual event EXPECT_NE(cmdQHw->virtualEvent, &virtualEvent); event.setStatus(CL_SUBMITTED); virtualEvent.Event::updateExecutionStatus(); EXPECT_FALSE(cmdQHw->isQueueBlocked()); //+1 due to dependency between virtual event & new virtual event //new virtual event is actually responsible for command delivery EXPECT_EQ(virtualEventTaskLevel + 1, cmdQHw->taskLevel); EXPECT_EQ(virtualEventTaskLevel + 1, mockCSR->lastTaskLevelToFlushTask); } HWTEST_F(IoqCommandQueueHwBlitTest, givenGpgpuCsrWhenEnqueueingSubsequentBlitsThenGpgpuCommandStreamIsNotObtained) { auto &gpgpuCsr = pDevice->getUltCommandStreamReceiver(); auto srcBuffer = std::unique_ptr{BufferHelper<>::create(pContext)}; auto dstBuffer = std::unique_ptr{BufferHelper<>::create(pContext)}; cl_int retVal = pCmdQ->enqueueCopyBuffer( srcBuffer.get(), dstBuffer.get(), 0, 0, 1, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0, gpgpuCsr.ensureCommandBufferAllocationCalled); retVal = pCmdQ->enqueueCopyBuffer( srcBuffer.get(), dstBuffer.get(), 0, 0, 1, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0, gpgpuCsr.ensureCommandBufferAllocationCalled); } HWTEST_F(IoqCommandQueueHwBlitTest, givenGpgpuCsrWhenEnqueueingBlitAfterNotFlushedKernelThenGpgpuCommandStreamIsObtained) { auto &gpgpuCsr = pDevice->getUltCommandStreamReceiver(); auto srcBuffer = std::unique_ptr{BufferHelper<>::create(pContext)}; auto dstBuffer = std::unique_ptr{BufferHelper<>::create(pContext)}; pCmdQ->getGpgpuCommandStreamReceiver().overrideDispatchPolicy(DispatchMode::BatchedDispatch); MockKernelWithInternals mockKernelWithInternals(*pClDevice); size_t offset = 0; size_t size = 1; cl_int retVal = pCmdQ->enqueueKernel(mockKernelWithInternals.mockKernel, 1, &offset, &size, &size, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(0, gpgpuCsr.ensureCommandBufferAllocationCalled); const auto ensureCommandBufferAllocationCalledAfterKernel = gpgpuCsr.ensureCommandBufferAllocationCalled; retVal = pCmdQ->enqueueCopyBuffer( srcBuffer.get(), dstBuffer.get(), 0, 0, 1, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(ensureCommandBufferAllocationCalledAfterKernel, gpgpuCsr.ensureCommandBufferAllocationCalled); } HWTEST_F(IoqCommandQueueHwBlitTest, givenGpgpuCsrWhenEnqueueingBlitAfterFlushedKernelThenGpgpuCommandStreamIsNotObtained) { auto &gpgpuCsr = pDevice->getUltCommandStreamReceiver(); auto srcBuffer = std::unique_ptr{BufferHelper<>::create(pContext)}; auto dstBuffer = std::unique_ptr{BufferHelper<>::create(pContext)}; DebugManagerStateRestore restorer; DebugManager.flags.ForceCacheFlushForBcs.set(0); MockKernelWithInternals mockKernelWithInternals(*pClDevice); size_t offset = 0; size_t size = 1; cl_int retVal = pCmdQ->enqueueKernel(mockKernelWithInternals.mockKernel, 1, &offset, &size, &size, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(0, gpgpuCsr.ensureCommandBufferAllocationCalled); const auto ensureCommandBufferAllocationCalledAfterKernel = gpgpuCsr.ensureCommandBufferAllocationCalled; retVal = pCmdQ->enqueueCopyBuffer( srcBuffer.get(), dstBuffer.get(), 0, 0, 1, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(ensureCommandBufferAllocationCalledAfterKernel, gpgpuCsr.ensureCommandBufferAllocationCalled); } HWTEST_F(OoqCommandQueueHwBlitTest, givenBlitAfterBarrierWhenEnqueueingCommandThenWaitForBarrierOnBlit) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; if (pCmdQ->getTimestampPacketContainer() == nullptr) { GTEST_SKIP(); } DebugManagerStateRestore restore{}; DebugManager.flags.DoCpuCopyOnReadBuffer.set(0); DebugManager.flags.ForceCacheFlushForBcs.set(0); DebugManager.flags.UpdateTaskCountFromWait.set(1); MockKernelWithInternals mockKernelWithInternals(*pClDevice); MockKernel *kernel = mockKernelWithInternals.mockKernel; size_t offset = 0; size_t gws = 1; BufferDefaults::context = context; auto buffer = clUniquePtr(BufferHelper<>::create()); char ptr[1] = {}; EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueKernel(kernel, 1, &offset, &gws, nullptr, 0, nullptr, nullptr)); EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueKernel(kernel, 1, &offset, &gws, nullptr, 0, nullptr, nullptr)); auto ccsStart = pCmdQ->getGpgpuCommandStreamReceiver().getCS().getUsed(); EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueBarrierWithWaitList(0, nullptr, nullptr)); EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, 1u, ptr, nullptr, 0, nullptr, nullptr)); uint64_t barrierNodeAddress = 0u; { HardwareParse ccsHwParser; ccsHwParser.parseCommands(pCmdQ->getGpgpuCommandStreamReceiver().getCS(0), ccsStart); const auto pipeControlItor = find(ccsHwParser.cmdList.begin(), ccsHwParser.cmdList.end()); auto pipeControl = genCmdCast(*pipeControlItor); EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pipeControl->getPostSyncOperation()); barrierNodeAddress = pipeControl->getAddress() | (static_cast(pipeControl->getAddressHigh()) << 32); // There shouldn't be any semaphores before the barrier const auto semaphoreItor = find(ccsHwParser.cmdList.begin(), pipeControlItor); EXPECT_EQ(pipeControlItor, semaphoreItor); } { HardwareParse bcsHwParser; bcsHwParser.parseCommands(pCmdQ->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS)->getCS(0), 0u); const auto semaphoreItor = find(bcsHwParser.cmdList.begin(), bcsHwParser.cmdList.end()); auto semaphore = genCmdCast(*semaphoreItor); EXPECT_EQ(barrierNodeAddress, semaphore->getSemaphoreGraphicsAddress()); const auto pipeControlItor = find(semaphoreItor, bcsHwParser.cmdList.end()); EXPECT_EQ(bcsHwParser.cmdList.end(), pipeControlItor); } EXPECT_EQ(CL_SUCCESS, pCmdQ->finish()); } HWTEST_F(OoqCommandQueueHwBlitTest, givenBlitBeforeBarrierWhenEnqueueingCommandThenWaitForBlitBeforeBarrier) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; if (pCmdQ->getTimestampPacketContainer() == nullptr) { GTEST_SKIP(); } DebugManagerStateRestore restore{}; DebugManager.flags.DoCpuCopyOnReadBuffer.set(0); DebugManager.flags.ForceCacheFlushForBcs.set(0); DebugManager.flags.UpdateTaskCountFromWait.set(1); MockKernelWithInternals mockKernelWithInternals(*pClDevice); MockKernel *kernel = mockKernelWithInternals.mockKernel; size_t offset = 0; size_t gws = 1; BufferDefaults::context = context; auto buffer = clUniquePtr(BufferHelper<>::create()); char ptr[1] = {}; EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, 1u, ptr, nullptr, 0, nullptr, nullptr)); EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, 1u, ptr, nullptr, 0, nullptr, nullptr)); uint64_t lastBlitNodeAddress = TimestampPacketHelper::getContextEndGpuAddress(*pCmdQ->getTimestampPacketContainer()->peekNodes()[0]); EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueKernel(kernel, 1, &offset, &gws, nullptr, 0, nullptr, nullptr)); auto ccsStart = pCmdQ->getGpgpuCommandStreamReceiver().getCS().getUsed(); auto bcsStart = pCmdQ->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS)->getCS(0).getUsed(); EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueBarrierWithWaitList(0, nullptr, nullptr)); EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueKernel(kernel, 1, &offset, &gws, nullptr, 0, nullptr, nullptr)); EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, 1u, ptr, nullptr, 0, nullptr, nullptr)); EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, 1u, ptr, nullptr, 0, nullptr, nullptr)); uint64_t barrierNodeAddress = 0u; { HardwareParse ccsHwParser; ccsHwParser.parseCommands(pCmdQ->getGpgpuCommandStreamReceiver().getCS(0), ccsStart); const auto semaphoreItor = find(ccsHwParser.cmdList.begin(), ccsHwParser.cmdList.end()); const auto semaphore = genCmdCast(*semaphoreItor); EXPECT_EQ(lastBlitNodeAddress, semaphore->getSemaphoreGraphicsAddress()); const auto pipeControlItor = find(semaphoreItor, ccsHwParser.cmdList.end()); const auto pipeControl = genCmdCast(*pipeControlItor); EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pipeControl->getPostSyncOperation()); barrierNodeAddress = pipeControl->getAddress() | (static_cast(pipeControl->getAddressHigh()) << 32); // There shouldn't be any more semaphores before the barrier EXPECT_EQ(pipeControlItor, find(std::next(semaphoreItor), pipeControlItor)); } { HardwareParse bcsHwParser; bcsHwParser.parseCommands(pCmdQ->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS)->getCS(0), bcsStart); const auto semaphoreItor = find(bcsHwParser.cmdList.begin(), bcsHwParser.cmdList.end()); const auto semaphore = genCmdCast(*semaphoreItor); EXPECT_EQ(barrierNodeAddress, semaphore->getSemaphoreGraphicsAddress()); EXPECT_EQ(bcsHwParser.cmdList.end(), find(semaphoreItor, bcsHwParser.cmdList.end())); // Only one barrier semaphore from first BCS enqueue const auto blitItor = find(bcsHwParser.cmdList.begin(), bcsHwParser.cmdList.end()); EXPECT_EQ(1u, findAll(bcsHwParser.cmdList.begin(), blitItor).size()); } EXPECT_EQ(CL_SUCCESS, pCmdQ->finish()); } HWTEST_F(OoqCommandQueueHwBlitTest, givenBlockedBlitAfterBarrierWhenEnqueueingCommandThenWaitForBlitBeforeBarrier) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; if (pCmdQ->getTimestampPacketContainer() == nullptr) { GTEST_SKIP(); } DebugManagerStateRestore restore{}; DebugManager.flags.DoCpuCopyOnReadBuffer.set(0); DebugManager.flags.ForceCacheFlushForBcs.set(0); DebugManager.flags.UpdateTaskCountFromWait.set(1); UserEvent userEvent; cl_event userEventWaitlist[] = {&userEvent}; MockKernelWithInternals mockKernelWithInternals(*pClDevice); MockKernel *kernel = mockKernelWithInternals.mockKernel; size_t offset = 0; size_t gws = 1; BufferDefaults::context = context; auto buffer = clUniquePtr(BufferHelper<>::create()); char ptr[1] = {}; EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, 1u, ptr, nullptr, 0, nullptr, nullptr)); EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, 1u, ptr, nullptr, 0, nullptr, nullptr)); uint64_t lastBlitNodeAddress = TimestampPacketHelper::getContextEndGpuAddress(*pCmdQ->getTimestampPacketContainer()->peekNodes()[0]); EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueKernel(kernel, 1, &offset, &gws, nullptr, 0, nullptr, nullptr)); auto ccsStart = pCmdQ->getGpgpuCommandStreamReceiver().getCS().getUsed(); auto bcsStart = pCmdQ->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS)->getCS(0).getUsed(); EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueBarrierWithWaitList(0, nullptr, nullptr)); EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, 1u, ptr, nullptr, 1, userEventWaitlist, nullptr)); userEvent.setStatus(CL_COMPLETE); uint64_t barrierNodeAddress = 0u; { HardwareParse ccsHwParser; ccsHwParser.parseCommands(pCmdQ->getGpgpuCommandStreamReceiver().getCS(0), ccsStart); const auto semaphoreItor = find(ccsHwParser.cmdList.begin(), ccsHwParser.cmdList.end()); const auto semaphore = genCmdCast(*semaphoreItor); EXPECT_EQ(lastBlitNodeAddress, semaphore->getSemaphoreGraphicsAddress()); const auto pipeControlItor = find(semaphoreItor, ccsHwParser.cmdList.end()); const auto pipeControl = genCmdCast(*pipeControlItor); EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pipeControl->getPostSyncOperation()); barrierNodeAddress = pipeControl->getAddress() | (static_cast(pipeControl->getAddressHigh()) << 32); // There shouldn't be any more semaphores before the barrier EXPECT_EQ(pipeControlItor, find(std::next(semaphoreItor), pipeControlItor)); } { HardwareParse bcsHwParser; bcsHwParser.parseCommands(pCmdQ->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS)->getCS(0), bcsStart); const auto semaphoreItor = find(bcsHwParser.cmdList.begin(), bcsHwParser.cmdList.end()); const auto semaphore = genCmdCast(*semaphoreItor); EXPECT_EQ(barrierNodeAddress, semaphore->getSemaphoreGraphicsAddress()); EXPECT_EQ(bcsHwParser.cmdList.end(), find(semaphoreItor, bcsHwParser.cmdList.end())); } EXPECT_EQ(CL_SUCCESS, pCmdQ->finish()); } HWTEST_F(CommandQueueHwTest, GivenBuiltinKernelWhenBuiltinDispatchInfoBuilderIsProvidedThenThisBuilderIsUsedForCreatingDispatchInfo) { CommandQueueHw *cmdQHw = static_cast *>(this->pCmdQ); MockKernelWithInternals mockKernelToUse(*pClDevice); MockBuilder builder(*pDevice->getBuiltIns(), *pClDevice); builder.paramsToUse.gws.x = 11; builder.paramsToUse.elws.x = 13; builder.paramsToUse.offset.x = 17; builder.paramsToUse.kernel = mockKernelToUse.mockKernel; MockKernelWithInternals mockKernelToSend(*pClDevice); mockKernelToSend.kernelInfo.builtinDispatchBuilder = &builder; NullSurface s; Surface *surfaces[] = {&s}; size_t gws[3] = {3, 0, 0}; size_t lws[3] = {5, 0, 0}; size_t off[3] = {7, 0, 0}; EXPECT_FALSE(builder.wasBuildDispatchInfosWithBuiltinOpParamsCalled); EXPECT_FALSE(builder.wasBuildDispatchInfosWithKernelParamsCalled); cmdQHw->template enqueueHandler(surfaces, false, mockKernelToSend.mockKernel, 1, off, gws, lws, lws, 0, nullptr, nullptr); EXPECT_FALSE(builder.wasBuildDispatchInfosWithBuiltinOpParamsCalled); EXPECT_TRUE(builder.wasBuildDispatchInfosWithKernelParamsCalled); EXPECT_EQ(Vec3(gws[0], gws[1], gws[2]), builder.paramsReceived.gws); EXPECT_EQ(Vec3(lws[0], lws[1], lws[2]), builder.paramsReceived.elws); EXPECT_EQ(Vec3(off[0], off[1], off[2]), builder.paramsReceived.offset); EXPECT_EQ(mockKernelToSend.mockKernel, builder.paramsReceived.kernel); auto dispatchInfo = builder.paramsReceived.multiDispatchInfo.begin(); EXPECT_EQ(1U, builder.paramsReceived.multiDispatchInfo.size()); EXPECT_EQ(builder.paramsToUse.gws.x, dispatchInfo->getGWS().x); EXPECT_EQ(builder.paramsToUse.elws.x, dispatchInfo->getEnqueuedWorkgroupSize().x); EXPECT_EQ(builder.paramsToUse.offset.x, dispatchInfo->getOffset().x); EXPECT_EQ(builder.paramsToUse.kernel, dispatchInfo->getKernel()); } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/command_queue_tests.cpp000066400000000000000000004070421422164147700312300ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/wait_status.h" #include "shared/source/helpers/array_count.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/engine_node_helper.h" #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/fixtures/memory_management_fixture.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/mocks/mock_gmm_resource_info.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/mocks/mock_os_context.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/common/test_macros/test_checks_shared.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/event/event.h" #include "opencl/source/event/user_event.h" #include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/command_stream/command_stream_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/fixtures/dispatch_flags_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/fixtures/multi_tile_fixture.h" #include "opencl/test/unit_test/helpers/raii_hw_helper.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "opencl/test/unit_test/mocks/mock_image.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_mdi.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "gtest/gtest.h" using namespace NEO; struct CommandQueueMemoryDevice : public MemoryManagementFixture, public ClDeviceFixture { void SetUp() override { MemoryManagementFixture::SetUp(); ClDeviceFixture::SetUp(); } void TearDown() override { ClDeviceFixture::TearDown(); platformsImpl->clear(); MemoryManagementFixture::TearDown(); } }; struct CommandQueueTest : public CommandQueueMemoryDevice, public ContextFixture, public CommandQueueFixture, ::testing::TestWithParam { using CommandQueueFixture::SetUp; using ContextFixture::SetUp; CommandQueueTest() { } void SetUp() override { CommandQueueMemoryDevice::SetUp(); properties = GetParam(); cl_device_id device = pClDevice; ContextFixture::SetUp(1, &device); CommandQueueFixture::SetUp(pContext, pClDevice, properties); } void TearDown() override { CommandQueueFixture::TearDown(); ContextFixture::TearDown(); CommandQueueMemoryDevice::TearDown(); } cl_command_queue_properties properties; const HardwareInfo *pHwInfo = nullptr; }; TEST_P(CommandQueueTest, GivenNonFailingAllocationWhenCreatingCommandQueueThenCommandQueueIsCreated) { InjectedFunction method = [this](size_t failureIndex) { auto retVal = CL_INVALID_VALUE; auto pCmdQ = CommandQueue::create( pContext, pClDevice, nullptr, false, retVal); if (MemoryManagement::nonfailingAllocation == failureIndex) { EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, pCmdQ); } else { EXPECT_EQ(CL_OUT_OF_HOST_MEMORY, retVal) << "for allocation " << failureIndex; EXPECT_EQ(nullptr, pCmdQ); } delete pCmdQ; }; injectFailures(method); } INSTANTIATE_TEST_CASE_P(CommandQueue, CommandQueueTest, ::testing::ValuesIn(AllCommandQueueProperties)); TEST(CommandQueue, WhenConstructingCommandQueueThenTaskLevelAndTaskCountAreZero) { MockCommandQueue cmdQ(nullptr, nullptr, 0, false); EXPECT_EQ(0u, cmdQ.taskLevel); EXPECT_EQ(0u, cmdQ.taskCount); } TEST(CommandQueue, WhenConstructingCommandQueueThenQueueFamilyIsNotSelected) { MockCommandQueue cmdQ(nullptr, nullptr, 0, false); EXPECT_FALSE(cmdQ.isQueueFamilySelected()); } TEST(CommandQueue, givenEnableTimestampWaitWhenCheckIsTimestampWaitEnabledThenReturnProperValue) { DebugManagerStateRestore restorer; VariableBackup backup(&ultHwConfig); ultHwConfig.useWaitForTimestamps = true; auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockCommandQueue cmdQ(nullptr, mockDevice.get(), 0, false); { DebugManager.flags.EnableTimestampWait.set(0); EXPECT_FALSE(cmdQ.isWaitForTimestampsEnabled()); } { DebugManager.flags.EnableTimestampWait.set(1); EXPECT_EQ(cmdQ.isWaitForTimestampsEnabled(), cmdQ.getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled()); } { DebugManager.flags.EnableTimestampWait.set(2); EXPECT_EQ(cmdQ.isWaitForTimestampsEnabled(), cmdQ.getGpgpuCommandStreamReceiver().isDirectSubmissionEnabled()); } { DebugManager.flags.EnableTimestampWait.set(3); EXPECT_EQ(cmdQ.isWaitForTimestampsEnabled(), cmdQ.getGpgpuCommandStreamReceiver().isAnyDirectSubmissionEnabled()); } { DebugManager.flags.EnableTimestampWait.set(4); EXPECT_TRUE(cmdQ.isWaitForTimestampsEnabled()); } } struct GetTagTest : public ClDeviceFixture, public CommandQueueFixture, public CommandStreamFixture, public ::testing::Test { using CommandQueueFixture::SetUp; void SetUp() override { ClDeviceFixture::SetUp(); CommandQueueFixture::SetUp(nullptr, pClDevice, 0); CommandStreamFixture::SetUp(pCmdQ); } void TearDown() override { CommandStreamFixture::TearDown(); CommandQueueFixture::TearDown(); ClDeviceFixture::TearDown(); } }; TEST_F(GetTagTest, GivenSetHwTagWhenGettingHwTagThenCorrectTagIsReturned) { uint32_t tagValue = 0xdeadbeef; *pTagMemory = tagValue; EXPECT_EQ(tagValue, pCmdQ->getHwTag()); } TEST_F(GetTagTest, GivenInitialValueWhenGettingHwTagThenCorrectTagIsReturned) { MockContext context; MockCommandQueue commandQueue(&context, pClDevice, 0, false); EXPECT_EQ(initialHardwareTag, commandQueue.getHwTag()); } TEST(CommandQueue, GivenUpdatedCompletionStampWhenGettingCompletionStampThenUpdatedValueIsReturned) { MockContext context; MockCommandQueue cmdQ(&context, nullptr, 0, false); CompletionStamp cs = { cmdQ.taskCount + 100, cmdQ.taskLevel + 50, 5}; cmdQ.updateFromCompletionStamp(cs, nullptr); EXPECT_EQ(cs.taskLevel, cmdQ.taskLevel); EXPECT_EQ(cs.taskCount, cmdQ.taskCount); EXPECT_EQ(cs.flushStamp, cmdQ.flushStamp->peekStamp()); } TEST(CommandQueue, givenTimeStampWithTaskCountNotReadyStatusWhenupdateFromCompletionStampIsBeingCalledThenQueueTaskCountIsNotUpdated) { MockContext context; MockCommandQueue cmdQ(&context, nullptr, 0, false); cmdQ.taskCount = 1u; CompletionStamp cs = { CompletionStamp::notReady, 0, 0}; cmdQ.updateFromCompletionStamp(cs, nullptr); EXPECT_EQ(1u, cmdQ.taskCount); } TEST(CommandQueue, GivenOOQwhenUpdateFromCompletionStampWithTrueIsCalledThenTaskLevelIsUpdated) { MockContext context; const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0}; MockCommandQueue cmdQ(&context, nullptr, props, false); auto oldTL = cmdQ.taskLevel; CompletionStamp cs = { cmdQ.taskCount + 100, cmdQ.taskLevel + 50, 5}; cmdQ.updateFromCompletionStamp(cs, nullptr); EXPECT_NE(oldTL, cmdQ.taskLevel); EXPECT_EQ(oldTL + 50, cmdQ.taskLevel); EXPECT_EQ(cs.taskCount, cmdQ.taskCount); EXPECT_EQ(cs.flushStamp, cmdQ.flushStamp->peekStamp()); } TEST(CommandQueue, givenDeviceWhenCreatingCommandQueueThenPickCsrFromDefaultEngine) { auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockCommandQueue cmdQ(nullptr, mockDevice.get(), 0, false); auto defaultCsr = mockDevice->getDefaultEngine().commandStreamReceiver; EXPECT_EQ(defaultCsr, &cmdQ.getGpgpuCommandStreamReceiver()); } struct CommandQueueWithBlitOperationsTests : public ::testing::TestWithParam {}; TEST(CommandQueue, givenDeviceNotSupportingBlitOperationsWhenQueueIsCreatedThenDontRegisterAnyBcsCsrs) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.blitterOperationsSupported = false; auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); MockCommandQueue cmdQ(nullptr, mockDevice.get(), 0, false); EXPECT_EQ(0u, cmdQ.countBcsEngines()); auto defaultCsr = mockDevice->getDefaultEngine().commandStreamReceiver; EXPECT_EQ(defaultCsr, &cmdQ.getGpgpuCommandStreamReceiver()); } TEST(CommandQueue, givenDeviceWithSubDevicesSupportingBlitOperationsWhenQueueIsCreatedThenBcsIsTakenFromFirstSubDevice) { DebugManagerStateRestore restorer; VariableBackup mockDeviceFlagBackup{&MockDevice::createSingleDevice, false}; DebugManager.flags.CreateMultipleSubDevices.set(2); DebugManager.flags.EnableBlitterForEnqueueOperations.set(1); HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.blitterOperationsSupported = true; REQUIRE_FULL_BLITTER_OR_SKIP(&hwInfo); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); EXPECT_EQ(2u, device->getNumGenericSubDevices()); std::unique_ptr bcsOsContext; auto subDevice = device->getSubDevice(0); auto &bcsEngine = subDevice->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular); MockCommandQueue cmdQ(nullptr, device.get(), 0, false); EXPECT_NE(nullptr, cmdQ.getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS)); EXPECT_EQ(bcsEngine.commandStreamReceiver, cmdQ.getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS)); } INSTANTIATE_TEST_CASE_P(uint32_t, CommandQueueWithBlitOperationsTests, ::testing::Values(CL_COMMAND_WRITE_BUFFER, CL_COMMAND_WRITE_BUFFER_RECT, CL_COMMAND_READ_BUFFER, CL_COMMAND_READ_BUFFER_RECT, CL_COMMAND_COPY_BUFFER, CL_COMMAND_COPY_BUFFER_RECT, CL_COMMAND_SVM_MEMCPY)); TEST(CommandQueue, givenCmdQueueBlockedByReadyVirtualEventWhenUnblockingThenUpdateFlushTaskFromEvent) { auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto context = new MockContext; auto cmdQ = new MockCommandQueue(context, mockDevice.get(), 0, false); auto userEvent = new Event(cmdQ, CL_COMMAND_NDRANGE_KERNEL, 0, 0); userEvent->setStatus(CL_COMPLETE); userEvent->flushStamp->setStamp(5); userEvent->incRefInternal(); FlushStamp expectedFlushStamp = 0; EXPECT_EQ(expectedFlushStamp, cmdQ->flushStamp->peekStamp()); cmdQ->virtualEvent = userEvent; EXPECT_FALSE(cmdQ->isQueueBlocked()); EXPECT_EQ(userEvent->flushStamp->peekStamp(), cmdQ->flushStamp->peekStamp()); userEvent->decRefInternal(); cmdQ->decRefInternal(); context->decRefInternal(); } TEST(CommandQueue, givenCmdQueueBlockedByAbortedVirtualEventWhenUnblockingThenUpdateFlushTaskFromEvent) { auto context = new MockContext; auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto cmdQ = new MockCommandQueue(context, mockDevice.get(), 0, false); auto userEvent = new Event(cmdQ, CL_COMMAND_NDRANGE_KERNEL, 0, 0); userEvent->setStatus(-1); userEvent->flushStamp->setStamp(5); FlushStamp expectedFlushStamp = 0; EXPECT_EQ(expectedFlushStamp, cmdQ->flushStamp->peekStamp()); userEvent->incRefInternal(); cmdQ->virtualEvent = userEvent; EXPECT_FALSE(cmdQ->isQueueBlocked()); EXPECT_EQ(expectedFlushStamp, cmdQ->flushStamp->peekStamp()); userEvent->decRefInternal(); cmdQ->decRefInternal(); context->decRefInternal(); } struct CommandQueueCommandStreamTest : public CommandQueueMemoryDevice, public ::testing::Test { void SetUp() override { CommandQueueMemoryDevice::SetUp(); context.reset(new MockContext(pClDevice)); } void TearDown() override { context.reset(); CommandQueueMemoryDevice::TearDown(); } std::unique_ptr context; }; HWTEST_F(CommandQueueCommandStreamTest, givenCommandQueueThatWaitsOnAbortedUserEventWhenIsQueueBlockedIsCalledThenTaskLevelAlignsToCsr) { MockContext context; auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockCommandQueue cmdQ(&context, mockDevice.get(), 0, false); auto &commandStreamReceiver = mockDevice->getUltCommandStreamReceiver(); commandStreamReceiver.taskLevel = 100u; Event userEvent(&cmdQ, CL_COMMAND_NDRANGE_KERNEL, 0, 0); userEvent.setStatus(-1); userEvent.incRefInternal(); cmdQ.virtualEvent = &userEvent; EXPECT_FALSE(cmdQ.isQueueBlocked()); EXPECT_EQ(100u, cmdQ.taskLevel); } HWTEST_F(CommandQueueCommandStreamTest, WhenCheckIsTextureCacheFlushNeededThenReturnProperValue) { MockContext context; auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockCommandQueue cmdQ(&context, mockDevice.get(), 0, false); auto &commandStreamReceiver = mockDevice->getUltCommandStreamReceiver(); EXPECT_FALSE(cmdQ.isTextureCacheFlushNeeded(CL_COMMAND_COPY_BUFFER_RECT)); for (auto i = CL_COMMAND_NDRANGE_KERNEL; i < CL_COMMAND_RELEASE_GL_OBJECTS; i++) { if (i == CL_COMMAND_COPY_IMAGE) { commandStreamReceiver.directSubmissionAvailable = true; EXPECT_TRUE(cmdQ.isTextureCacheFlushNeeded(i)); commandStreamReceiver.directSubmissionAvailable = false; EXPECT_FALSE(cmdQ.isTextureCacheFlushNeeded(i)); } else { commandStreamReceiver.directSubmissionAvailable = true; EXPECT_FALSE(cmdQ.isTextureCacheFlushNeeded(i)); commandStreamReceiver.directSubmissionAvailable = false; EXPECT_FALSE(cmdQ.isTextureCacheFlushNeeded(i)); } } } TEST_F(CommandQueueCommandStreamTest, GivenValidCommandQueueWhenGettingCommandStreamThenValidObjectIsReturned) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue commandQueue(context.get(), pClDevice, props, false); auto &cs = commandQueue.getCS(1024); EXPECT_NE(nullptr, &cs); } TEST_F(CommandQueueCommandStreamTest, GivenValidCommandStreamWhenGettingGraphicsAllocationThenMaxAvailableSpaceAndUnderlyingBufferSizeAreCorrect) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue commandQueue(context.get(), pClDevice, props, false); size_t minSizeRequested = 20; auto &cs = commandQueue.getCS(minSizeRequested); ASSERT_NE(nullptr, &cs); auto *allocation = cs.getGraphicsAllocation(); ASSERT_NE(nullptr, &allocation); size_t expectedCsSize = alignUp(minSizeRequested + CSRequirements::minCommandQueueCommandStreamSize + CSRequirements::csOverfetchSize, MemoryConstants::pageSize64k) - CSRequirements::minCommandQueueCommandStreamSize - CSRequirements::csOverfetchSize; EXPECT_EQ(expectedCsSize, cs.getMaxAvailableSpace()); size_t expectedTotalSize = alignUp(minSizeRequested + CSRequirements::minCommandQueueCommandStreamSize + CSRequirements::csOverfetchSize, MemoryConstants::pageSize64k); EXPECT_EQ(expectedTotalSize, allocation->getUnderlyingBufferSize()); } TEST_F(CommandQueueCommandStreamTest, GivenRequiredSizeWhenGettingCommandStreamThenMaxAvailableSpaceIsEqualOrGreaterThanRequiredSize) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue commandQueue(context.get(), pClDevice, props, false); size_t requiredSize = 16384; const auto &commandStream = commandQueue.getCS(requiredSize); ASSERT_NE(nullptr, &commandStream); EXPECT_GE(commandStream.getMaxAvailableSpace(), requiredSize); } TEST_F(CommandQueueCommandStreamTest, WhenGettingCommandStreamWithNewSizeThenMaxAvailableSpaceIsEqualOrGreaterThanNewSize) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue commandQueue(context.get(), pClDevice, props, false); auto &commandStreamInitial = commandQueue.getCS(1024); size_t requiredSize = commandStreamInitial.getMaxAvailableSpace() + 42; const auto &commandStream = commandQueue.getCS(requiredSize); ASSERT_NE(nullptr, &commandStream); EXPECT_GE(commandStream.getMaxAvailableSpace(), requiredSize); } TEST_F(CommandQueueCommandStreamTest, givenCommandStreamReceiverWithReusableAllocationsWhenAskedForCommandStreamThenReturnsAllocationFromReusablePool) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue cmdQ(context.get(), pClDevice, props, false); auto memoryManager = pDevice->getMemoryManager(); size_t requiredSize = alignUp(100 + CSRequirements::minCommandQueueCommandStreamSize + CSRequirements::csOverfetchSize, MemoryConstants::pageSize64k); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties({pDevice->getRootDeviceIndex(), requiredSize, AllocationType::COMMAND_BUFFER, pDevice->getDeviceBitfield()}); auto &commandStreamReceiver = cmdQ.getGpgpuCommandStreamReceiver(); commandStreamReceiver.getInternalAllocationStorage()->storeAllocation(std::unique_ptr(allocation), REUSABLE_ALLOCATION); EXPECT_FALSE(commandStreamReceiver.getAllocationsForReuse().peekIsEmpty()); EXPECT_TRUE(commandStreamReceiver.getAllocationsForReuse().peekContains(*allocation)); const auto &indirectHeap = cmdQ.getCS(100); EXPECT_EQ(indirectHeap.getGraphicsAllocation(), allocation); EXPECT_TRUE(commandStreamReceiver.getAllocationsForReuse().peekIsEmpty()); } TEST_F(CommandQueueCommandStreamTest, givenCommandQueueWhenItIsDestroyedThenCommandStreamIsPutOnTheReusabeList) { auto cmdQ = new MockCommandQueue(context.get(), pClDevice, 0, false); const auto &commandStream = cmdQ->getCS(100); auto graphicsAllocation = commandStream.getGraphicsAllocation(); EXPECT_TRUE(pDevice->getDefaultEngine().commandStreamReceiver->getAllocationsForReuse().peekIsEmpty()); //now destroy command queue, heap should go to reusable list delete cmdQ; EXPECT_FALSE(pDevice->getDefaultEngine().commandStreamReceiver->getAllocationsForReuse().peekIsEmpty()); EXPECT_TRUE(pDevice->getDefaultEngine().commandStreamReceiver->getAllocationsForReuse().peekContains(*graphicsAllocation)); } TEST_F(CommandQueueCommandStreamTest, WhenAskedForNewCommandStreamThenOldHeapIsStoredForReuse) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue cmdQ(context.get(), pClDevice, props, false); EXPECT_TRUE(pDevice->getDefaultEngine().commandStreamReceiver->getAllocationsForReuse().peekIsEmpty()); const auto &indirectHeap = cmdQ.getCS(100); EXPECT_TRUE(pDevice->getDefaultEngine().commandStreamReceiver->getAllocationsForReuse().peekIsEmpty()); auto graphicsAllocation = indirectHeap.getGraphicsAllocation(); cmdQ.getCS(indirectHeap.getAvailableSpace() + 100); EXPECT_FALSE(pDevice->getDefaultEngine().commandStreamReceiver->getAllocationsForReuse().peekIsEmpty()); EXPECT_TRUE(pDevice->getDefaultEngine().commandStreamReceiver->getAllocationsForReuse().peekContains(*graphicsAllocation)); } TEST_F(CommandQueueCommandStreamTest, givenCommandQueueWhenGetCSIsCalledThenCommandStreamAllocationTypeShouldBeSetToCommandBuffer) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue cmdQ(context.get(), pClDevice, props, false); const auto &commandStream = cmdQ.getCS(100); auto commandStreamAllocation = commandStream.getGraphicsAllocation(); ASSERT_NE(nullptr, commandStreamAllocation); EXPECT_EQ(AllocationType::COMMAND_BUFFER, commandStreamAllocation->getAllocationType()); } HWTEST_F(CommandQueueCommandStreamTest, givenMultiDispatchInfoWithSingleKernelWithFlushAllocationsDisabledWhenEstimatingNodesCountThenItEqualsMultiDispatchInfoSize) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(0); MockCommandQueueHw cmdQ(context.get(), pClDevice, nullptr); pDevice->getUltCommandStreamReceiver().multiOsContextCapable = true; MockKernelWithInternals mockKernelWithInternals(*pClDevice, context.get()); mockKernelWithInternals.mockKernel->kernelArgRequiresCacheFlush.resize(1); MockGraphicsAllocation cacheRequiringAllocation; mockKernelWithInternals.mockKernel->kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; MockMultiDispatchInfo multiDispatchInfo(pClDevice, std::vector({mockKernelWithInternals.mockKernel})); size_t estimatedNodesCount = cmdQ.estimateTimestampPacketNodesCount(multiDispatchInfo); EXPECT_EQ(estimatedNodesCount, multiDispatchInfo.size()); } HWTEST_F(CommandQueueCommandStreamTest, givenMultiDispatchInfoWithSingleKernelWithFlushAllocationsEnabledWhenEstimatingNodesCountThenItEqualsMultiDispatchInfoSizePlusOne) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); MockCommandQueueHw cmdQ(context.get(), pClDevice, nullptr); MockKernelWithInternals mockKernelWithInternals(*pClDevice, context.get()); mockKernelWithInternals.mockKernel->kernelArgRequiresCacheFlush.resize(1); MockGraphicsAllocation cacheRequiringAllocation; mockKernelWithInternals.mockKernel->kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; MockMultiDispatchInfo multiDispatchInfo(pClDevice, std::vector({mockKernelWithInternals.mockKernel})); size_t estimatedNodesCount = cmdQ.estimateTimestampPacketNodesCount(multiDispatchInfo); EXPECT_EQ(estimatedNodesCount, multiDispatchInfo.size() + 1); } struct CommandQueueIndirectHeapTest : public CommandQueueMemoryDevice, public ::testing::TestWithParam { void SetUp() override { CommandQueueMemoryDevice::SetUp(); context.reset(new MockContext(pClDevice)); } void TearDown() override { context.reset(); CommandQueueMemoryDevice::TearDown(); } std::unique_ptr context; }; TEST_P(CommandQueueIndirectHeapTest, WhenGettingIndirectHeapThenValidObjectIsReturned) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue cmdQ(context.get(), pClDevice, props, false); auto &indirectHeap = cmdQ.getIndirectHeap(this->GetParam(), 8192); EXPECT_NE(nullptr, &indirectHeap); } HWTEST_P(CommandQueueIndirectHeapTest, givenIndirectObjectHeapWhenItIsQueriedForInternalAllocationThenTrueIsReturned) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue cmdQ(context.get(), pClDevice, props, false); auto &commandStreamReceiver = pClDevice->getUltCommandStreamReceiver(); auto &indirectHeap = cmdQ.getIndirectHeap(this->GetParam(), 8192); if (this->GetParam() == IndirectHeap::Type::INDIRECT_OBJECT && commandStreamReceiver.canUse4GbHeaps) { EXPECT_TRUE(indirectHeap.getGraphicsAllocation()->is32BitAllocation()); } else { EXPECT_FALSE(indirectHeap.getGraphicsAllocation()->is32BitAllocation()); } } HWTEST_P(CommandQueueIndirectHeapTest, GivenIndirectHeapWhenGettingAvailableSpaceThenCorrectSizeIsReturned) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue cmdQ(context.get(), pClDevice, props, false); auto &indirectHeap = cmdQ.getIndirectHeap(this->GetParam(), sizeof(uint32_t)); if (this->GetParam() == IndirectHeap::Type::SURFACE_STATE) { size_t expectedSshUse = cmdQ.getGpgpuCommandStreamReceiver().defaultSshSize - MemoryConstants::pageSize - UnitTestHelper::getDefaultSshUsage(); EXPECT_EQ(expectedSshUse, indirectHeap.getAvailableSpace()); } else { EXPECT_EQ(64 * KB, indirectHeap.getAvailableSpace()); } } TEST_P(CommandQueueIndirectHeapTest, GivenRequiredSizeWhenGettingIndirectHeapThenIndirectHeapHasRequiredSize) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue cmdQ(context.get(), pClDevice, props, false); size_t requiredSize = 16384; const auto &indirectHeap = cmdQ.getIndirectHeap(this->GetParam(), requiredSize); ASSERT_NE(nullptr, &indirectHeap); EXPECT_GE(indirectHeap.getMaxAvailableSpace(), requiredSize); } TEST_P(CommandQueueIndirectHeapTest, WhenGettingIndirectHeapWithNewSizeThenMaxAvailableSpaceIsEqualOrGreaterThanNewSize) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue cmdQ(context.get(), pClDevice, props, false); auto &indirectHeapInitial = cmdQ.getIndirectHeap(this->GetParam(), 10); size_t requiredSize = indirectHeapInitial.getMaxAvailableSpace() + 42; const auto &indirectHeap = cmdQ.getIndirectHeap(this->GetParam(), requiredSize); ASSERT_NE(nullptr, &indirectHeap); if (this->GetParam() == IndirectHeap::Type::SURFACE_STATE) { //no matter what SSH is always capped EXPECT_EQ(cmdQ.getGpgpuCommandStreamReceiver().defaultSshSize - MemoryConstants::pageSize, indirectHeap.getMaxAvailableSpace()); } else { EXPECT_LE(requiredSize, indirectHeap.getMaxAvailableSpace()); } } TEST_P(CommandQueueIndirectHeapTest, WhenGettingIndirectHeapThenSizeIsAlignedToCacheLine) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue cmdQ(context.get(), pClDevice, props, false); size_t minHeapSize = 64 * KB; auto &indirectHeapInitial = cmdQ.getIndirectHeap(this->GetParam(), 2 * minHeapSize + 1); EXPECT_TRUE(isAligned(indirectHeapInitial.getAvailableSpace())); indirectHeapInitial.getSpace(indirectHeapInitial.getAvailableSpace()); // use whole space to force obtain reusable const auto &indirectHeap = cmdQ.getIndirectHeap(this->GetParam(), minHeapSize + 1); ASSERT_NE(nullptr, &indirectHeap); EXPECT_TRUE(isAligned(indirectHeap.getAvailableSpace())); } HWTEST_P(CommandQueueIndirectHeapTest, givenCommandStreamReceiverWithReusableAllocationsWhenAskedForHeapAllocationThenAllocationFromReusablePoolIsReturned) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue cmdQ(context.get(), pClDevice, props, false); auto memoryManager = pDevice->getMemoryManager(); auto allocationSize = defaultHeapSize * 2; GraphicsAllocation *allocation = nullptr; auto &commandStreamReceiver = pClDevice->getUltCommandStreamReceiver(); auto allocationType = AllocationType::LINEAR_STREAM; if (this->GetParam() == IndirectHeap::Type::INDIRECT_OBJECT && commandStreamReceiver.canUse4GbHeaps) { allocationType = AllocationType::INTERNAL_HEAP; } allocation = memoryManager->allocateGraphicsMemoryWithProperties({pDevice->getRootDeviceIndex(), allocationSize, allocationType, pDevice->getDeviceBitfield()}); if (this->GetParam() == IndirectHeap::Type::SURFACE_STATE) { allocation->setSize(commandStreamReceiver.defaultSshSize * 2); } commandStreamReceiver.getInternalAllocationStorage()->storeAllocation(std::unique_ptr(allocation), REUSABLE_ALLOCATION); EXPECT_FALSE(commandStreamReceiver.getAllocationsForReuse().peekIsEmpty()); EXPECT_TRUE(commandStreamReceiver.getAllocationsForReuse().peekContains(*allocation)); const auto &indirectHeap = cmdQ.getIndirectHeap(this->GetParam(), 100); EXPECT_EQ(indirectHeap.getGraphicsAllocation(), allocation); // if we obtain heap from reusable pool, we need to keep the size of allocation // surface state heap is an exception, it is capped at (max_ssh_size_for_HW - page_size) if (this->GetParam() == IndirectHeap::Type::SURFACE_STATE) { EXPECT_EQ(commandStreamReceiver.defaultSshSize - MemoryConstants::pageSize, indirectHeap.getMaxAvailableSpace()); } else { EXPECT_EQ(allocationSize, indirectHeap.getMaxAvailableSpace()); } EXPECT_TRUE(commandStreamReceiver.getAllocationsForReuse().peekIsEmpty()); } HWTEST_P(CommandQueueIndirectHeapTest, WhenAskedForNewHeapThenOldHeapIsStoredForReuse) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue cmdQ(context.get(), pClDevice, props, false); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); EXPECT_TRUE(commandStreamReceiver.getAllocationsForReuse().peekIsEmpty()); *commandStreamReceiver.getTagAddress() = 1u; commandStreamReceiver.taskCount = 2u; const auto &indirectHeap = cmdQ.getIndirectHeap(this->GetParam(), 100); auto heapSize = indirectHeap.getAvailableSpace(); auto graphicsAllocation = indirectHeap.getGraphicsAllocation(); // Request a larger heap than the first. cmdQ.getIndirectHeap(this->GetParam(), heapSize + 6000); EXPECT_FALSE(commandStreamReceiver.getAllocationsForReuse().peekIsEmpty()); EXPECT_TRUE(commandStreamReceiver.getAllocationsForReuse().peekContains(*graphicsAllocation)); *commandStreamReceiver.getTagAddress() = 2u; } TEST_P(CommandQueueIndirectHeapTest, GivenCommandQueueWithoutHeapAllocationWhenAskedForNewHeapThenNewAllocationIsAcquiredWithoutStoring) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue cmdQ(context.get(), pClDevice, props, false); auto memoryManager = pDevice->getMemoryManager(); auto &csr = pDevice->getUltCommandStreamReceiver(); EXPECT_TRUE(pDevice->getDefaultEngine().commandStreamReceiver->getAllocationsForReuse().peekIsEmpty()); const auto &indirectHeap = cmdQ.getIndirectHeap(this->GetParam(), 100); auto heapSize = indirectHeap.getAvailableSpace(); auto graphicsAllocation = indirectHeap.getGraphicsAllocation(); csr.indirectHeap[this->GetParam()]->replaceGraphicsAllocation(nullptr); csr.indirectHeap[this->GetParam()]->replaceBuffer(nullptr, 0); // Request a larger heap than the first. cmdQ.getIndirectHeap(this->GetParam(), heapSize + 6000); EXPECT_NE(graphicsAllocation, indirectHeap.getGraphicsAllocation()); memoryManager->freeGraphicsMemory(graphicsAllocation); } TEST_P(CommandQueueIndirectHeapTest, givenCommandQueueWithResourceCachingActiveWhenQueueISDestroyedThenIndirectHeapIsNotOnReuseList) { auto cmdQ = new MockCommandQueue(context.get(), pClDevice, 0, false); cmdQ->getIndirectHeap(this->GetParam(), 100); EXPECT_TRUE(pDevice->getDefaultEngine().commandStreamReceiver->getAllocationsForReuse().peekIsEmpty()); //now destroy command queue, heap should go to reusable list delete cmdQ; EXPECT_TRUE(pDevice->getDefaultEngine().commandStreamReceiver->getAllocationsForReuse().peekIsEmpty()); } TEST_P(CommandQueueIndirectHeapTest, GivenCommandQueueWithHeapAllocatedWhenIndirectHeapIsReleasedThenHeapAllocationAndHeapBufferIsSetToNullptr) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue cmdQ(context.get(), pClDevice, props, false); EXPECT_TRUE(pDevice->getDefaultEngine().commandStreamReceiver->getAllocationsForReuse().peekIsEmpty()); const auto &indirectHeap = cmdQ.getIndirectHeap(this->GetParam(), 100); auto heapSize = indirectHeap.getMaxAvailableSpace(); EXPECT_NE(0u, heapSize); auto graphicsAllocation = indirectHeap.getGraphicsAllocation(); EXPECT_NE(nullptr, graphicsAllocation); cmdQ.releaseIndirectHeap(this->GetParam()); auto &csr = pDevice->getUltCommandStreamReceiver(); EXPECT_EQ(nullptr, csr.indirectHeap[this->GetParam()]->getGraphicsAllocation()); EXPECT_EQ(nullptr, indirectHeap.getCpuBase()); EXPECT_EQ(0u, indirectHeap.getMaxAvailableSpace()); } TEST_P(CommandQueueIndirectHeapTest, GivenCommandQueueWithoutHeapAllocatedWhenIndirectHeapIsReleasedThenIndirectHeapAllocationStaysNull) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue cmdQ(context.get(), pClDevice, props, false); cmdQ.releaseIndirectHeap(this->GetParam()); auto &csr = pDevice->getUltCommandStreamReceiver(); EXPECT_EQ(nullptr, csr.indirectHeap[this->GetParam()]); } TEST_P(CommandQueueIndirectHeapTest, GivenCommandQueueWithHeapWhenGraphicAllocationIsNullThenNothingOnReuseList) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue cmdQ(context.get(), pClDevice, props, false); auto &ih = cmdQ.getIndirectHeap(this->GetParam(), 0u); auto allocation = ih.getGraphicsAllocation(); EXPECT_NE(nullptr, allocation); auto &csr = pDevice->getUltCommandStreamReceiver(); csr.indirectHeap[this->GetParam()]->replaceGraphicsAllocation(nullptr); csr.indirectHeap[this->GetParam()]->replaceBuffer(nullptr, 0); cmdQ.releaseIndirectHeap(this->GetParam()); auto memoryManager = pDevice->getMemoryManager(); EXPECT_TRUE(pDevice->getDefaultEngine().commandStreamReceiver->getAllocationsForReuse().peekIsEmpty()); memoryManager->freeGraphicsMemory(allocation); } HWTEST_P(CommandQueueIndirectHeapTest, givenCommandQueueWhenGetIndirectHeapIsCalledThenIndirectHeapAllocationTypeShouldBeSetToInternalHeapForIohAndLinearStreamForOthers) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue cmdQ(context.get(), pClDevice, props, false); auto &commandStreamReceiver = pClDevice->getUltCommandStreamReceiver(); auto heapType = this->GetParam(); bool requireInternalHeap = IndirectHeap::Type::INDIRECT_OBJECT == heapType && commandStreamReceiver.canUse4GbHeaps; const auto &indirectHeap = cmdQ.getIndirectHeap(heapType, 100); auto indirectHeapAllocation = indirectHeap.getGraphicsAllocation(); ASSERT_NE(nullptr, indirectHeapAllocation); auto expectedAllocationType = AllocationType::LINEAR_STREAM; if (requireInternalHeap) { expectedAllocationType = AllocationType::INTERNAL_HEAP; } EXPECT_EQ(expectedAllocationType, indirectHeapAllocation->getAllocationType()); } TEST_P(CommandQueueIndirectHeapTest, givenCommandQueueWhenGetHeapMemoryIsCalledThenHeapIsCreated) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue cmdQ(context.get(), pClDevice, props, false); IndirectHeap *indirectHeap = nullptr; cmdQ.allocateHeapMemory(this->GetParam(), 100, indirectHeap); EXPECT_NE(nullptr, indirectHeap); EXPECT_NE(nullptr, indirectHeap->getGraphicsAllocation()); pDevice->getMemoryManager()->freeGraphicsMemory(indirectHeap->getGraphicsAllocation()); delete indirectHeap; } TEST_F(CommandQueueIndirectHeapTest, givenForceDefaultHeapSizeWhenGetHeapMemoryIsCalledThenHeapIsCreatedWithProperSize) { DebugManagerStateRestore restorer; DebugManager.flags.ForceDefaultHeapSize.set(64 * MemoryConstants::kiloByte); const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue cmdQ(context.get(), pClDevice, props, false); IndirectHeap *indirectHeap = nullptr; cmdQ.allocateHeapMemory(IndirectHeap::Type::INDIRECT_OBJECT, 100, indirectHeap); EXPECT_NE(nullptr, indirectHeap); EXPECT_NE(nullptr, indirectHeap->getGraphicsAllocation()); EXPECT_EQ(indirectHeap->getAvailableSpace(), 64 * MemoryConstants::megaByte); pDevice->getMemoryManager()->freeGraphicsMemory(indirectHeap->getGraphicsAllocation()); delete indirectHeap; } TEST_P(CommandQueueIndirectHeapTest, givenCommandQueueWhenGetHeapMemoryIsCalledWithAlreadyAllocatedHeapThenGraphicsAllocationIsCreated) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue cmdQ(context.get(), pClDevice, props, false); IndirectHeap heap(nullptr, size_t{100}); IndirectHeap *indirectHeap = &heap; cmdQ.allocateHeapMemory(this->GetParam(), 100, indirectHeap); EXPECT_EQ(&heap, indirectHeap); EXPECT_NE(nullptr, indirectHeap->getGraphicsAllocation()); pDevice->getMemoryManager()->freeGraphicsMemory(indirectHeap->getGraphicsAllocation()); } INSTANTIATE_TEST_CASE_P( Device, CommandQueueIndirectHeapTest, testing::Values( IndirectHeap::Type::DYNAMIC_STATE, IndirectHeap::Type::INDIRECT_OBJECT, IndirectHeap::Type::SURFACE_STATE)); using CommandQueueTests = ::testing::Test; HWTEST_F(CommandQueueTests, givenMultipleCommandQueuesWhenMarkerIsEmittedThenGraphicsAllocationIsReused) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockContext context(device.get()); std::unique_ptr commandQ(new MockCommandQueue(&context, device.get(), 0, false)); *device->getDefaultEngine().commandStreamReceiver->getTagAddress() = 0; commandQ->enqueueMarkerWithWaitList(0, nullptr, nullptr); commandQ->enqueueMarkerWithWaitList(0, nullptr, nullptr); auto commandStreamGraphicsAllocation = commandQ->getCS(0).getGraphicsAllocation(); commandQ.reset(new MockCommandQueue(&context, device.get(), 0, false)); commandQ->enqueueMarkerWithWaitList(0, nullptr, nullptr); commandQ->enqueueMarkerWithWaitList(0, nullptr, nullptr); auto commandStreamGraphicsAllocation2 = commandQ->getCS(0).getGraphicsAllocation(); EXPECT_EQ(commandStreamGraphicsAllocation, commandStreamGraphicsAllocation2); } HWTEST_F(CommandQueueTests, givenEngineUsageHintSetWithInvalidValueWhenCreatingCommandQueueThenReturnSuccess) { DebugManagerStateRestore restore; DebugManager.flags.EngineUsageHint.set(static_cast(EngineUsage::EngineUsageCount)); auto pDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockContext context(pDevice.get()); cl_int retVal = CL_SUCCESS; cl_queue_properties propertiesCooperativeQueue[] = {CL_QUEUE_FAMILY_INTEL, 0, CL_QUEUE_INDEX_INTEL, 0, 0}; auto pCmdQ = CommandQueue::create( &context, pDevice.get(), propertiesCooperativeQueue, false, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, pCmdQ); EXPECT_EQ(EngineUsage::Regular, pCmdQ->getGpgpuEngine().getEngineUsage()); delete pCmdQ; } struct WaitForQueueCompletionTests : public ::testing::Test { template struct MyCmdQueue : public CommandQueueHw { MyCmdQueue(Context *context, ClDevice *device) : CommandQueueHw(context, device, nullptr, false){}; WaitStatus waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override { requestedUseQuickKmdSleep = useQuickKmdSleep; waitUntilCompleteCounter++; return WaitStatus::Ready; } bool isQueueBlocked() override { return false; } bool requestedUseQuickKmdSleep = false; uint32_t waitUntilCompleteCounter = 0; }; void SetUp() override { device = std::make_unique(MockClDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); context.reset(new MockContext(device.get())); } std::unique_ptr device; std::unique_ptr context; }; HWTEST_F(WaitForQueueCompletionTests, givenBlockingCallAndUnblockedQueueWhenEnqueuedThenCallWaitWithoutQuickKmdSleepRequest) { std::unique_ptr> cmdQ(new MyCmdQueue(context.get(), device.get())); uint32_t tmpPtr = 0; auto buffer = std::unique_ptr(BufferHelper<>::create(context.get())); cmdQ->enqueueReadBuffer(buffer.get(), CL_TRUE, 0, 1, &tmpPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(1u, cmdQ->waitUntilCompleteCounter); EXPECT_FALSE(cmdQ->requestedUseQuickKmdSleep); } HWTEST_F(WaitForQueueCompletionTests, givenBlockingCallAndBlockedQueueWhenEnqueuedThenCallWaitWithoutQuickKmdSleepRequest) { std::unique_ptr> cmdQ(new MyCmdQueue(context.get(), device.get())); std::unique_ptr blockingEvent(new Event(cmdQ.get(), CL_COMMAND_NDRANGE_KERNEL, 0, 0)); cl_event clBlockingEvent = blockingEvent.get(); uint32_t tmpPtr = 0; auto buffer = std::unique_ptr(BufferHelper<>::create(context.get())); cmdQ->enqueueReadBuffer(buffer.get(), CL_TRUE, 0, 1, &tmpPtr, nullptr, 1, &clBlockingEvent, nullptr); EXPECT_EQ(1u, cmdQ->waitUntilCompleteCounter); EXPECT_FALSE(cmdQ->requestedUseQuickKmdSleep); } HWTEST_F(WaitForQueueCompletionTests, whenFinishIsCalledThenCallWaitWithoutQuickKmdSleepRequest) { std::unique_ptr> cmdQ(new MyCmdQueue(context.get(), device.get())); cmdQ->finish(); EXPECT_EQ(1u, cmdQ->waitUntilCompleteCounter); EXPECT_FALSE(cmdQ->requestedUseQuickKmdSleep); } template class CommandStreamReceiverHwMock : public CommandStreamReceiverHw { public: CommandStreamReceiverHwMock(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield) : CommandStreamReceiverHw(executionEnvironment, rootDeviceIndex, deviceBitfield) {} WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) override { waitForTaskCountWithKmdNotifyFallbackCounter++; return waitForTaskCountWithKmdNotifyFallbackReturnValue; } WaitStatus waitForTaskCount(uint32_t requiredTaskCount) override { waitForTaskCountCalledCounter++; return waitForTaskCountReturnValue; } WaitStatus waitForTaskCountAndCleanTemporaryAllocationList(uint32_t requiredTaskCount) override { waitForTaskCountAndCleanTemporaryAllocationListCalledCounter++; return waitForTaskCountAndCleanTemporaryAllocationListReturnValue; } int waitForTaskCountCalledCounter{0}; int waitForTaskCountWithKmdNotifyFallbackCounter{0}; int waitForTaskCountAndCleanTemporaryAllocationListCalledCounter{0}; WaitStatus waitForTaskCountReturnValue{WaitStatus::Ready}; WaitStatus waitForTaskCountWithKmdNotifyFallbackReturnValue{WaitStatus::Ready}; WaitStatus waitForTaskCountAndCleanTemporaryAllocationListReturnValue{WaitStatus::Ready}; }; struct WaitUntilCompletionTests : public ::testing::Test { template struct MyCmdQueue : public CommandQueueHw { public: using CommandQueue::gpgpuEngine; MyCmdQueue(Context *context, ClDevice *device) : CommandQueueHw(context, device, nullptr, false){}; CommandStreamReceiver *getBcsCommandStreamReceiver(aub_stream::EngineType bcsEngineType) const override { return bcsCsrToReturn; } CommandStreamReceiver *bcsCsrToReturn{nullptr}; }; void SetUp() override { device = std::make_unique(MockClDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); context.reset(new MockContext(device.get())); } std::unique_ptr device; std::unique_ptr context; }; HWTEST_F(WaitUntilCompletionTests, givenCleanTemporaryAllocationListEqualsFalseWhenWaitingUntilCompleteThenWaitForTaskCountIsCalledAndItsReturnValueIsPropagated) { std::unique_ptr> cmdStream(new CommandStreamReceiverHwMock(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield())); cmdStream->initializeTagAllocation(); cmdStream->waitForTaskCountReturnValue = WaitStatus::Ready; std::unique_ptr> cmdQ(new MyCmdQueue(context.get(), device.get())); CommandStreamReceiver *oldCommandStreamReceiver = cmdQ->gpgpuEngine->commandStreamReceiver; cmdQ->gpgpuEngine->commandStreamReceiver = cmdStream.get(); constexpr uint32_t taskCount = 0u; constexpr bool cleanTemporaryAllocationList = false; StackVec activeBcsStates{}; const auto waitStatus = cmdQ->waitUntilComplete(taskCount, activeBcsStates, cmdQ->flushStamp->peekStamp(), false, cleanTemporaryAllocationList, false); EXPECT_EQ(WaitStatus::Ready, waitStatus); EXPECT_EQ(1, cmdStream->waitForTaskCountCalledCounter); cmdQ->gpgpuEngine->commandStreamReceiver = oldCommandStreamReceiver; } HWTEST_F(WaitUntilCompletionTests, givenGpuHangAndCleanTemporaryAllocationListEqualsTrueWhenWaitingUntilCompleteThenWaitForTaskCountAndCleanAllocationIsCalledAndGpuHangIsReturned) { std::unique_ptr> cmdStream(new CommandStreamReceiverHwMock(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield())); cmdStream->initializeTagAllocation(); cmdStream->waitForTaskCountAndCleanTemporaryAllocationListReturnValue = WaitStatus::GpuHang; std::unique_ptr> cmdQ(new MyCmdQueue(context.get(), device.get())); CommandStreamReceiver *oldCommandStreamReceiver = cmdQ->gpgpuEngine->commandStreamReceiver; cmdQ->gpgpuEngine->commandStreamReceiver = cmdStream.get(); constexpr uint32_t taskCount = 0u; constexpr bool cleanTemporaryAllocationList = true; StackVec activeBcsStates{}; const auto waitStatus = cmdQ->waitUntilComplete(taskCount, activeBcsStates, cmdQ->flushStamp->peekStamp(), false, cleanTemporaryAllocationList, false); EXPECT_EQ(WaitStatus::GpuHang, waitStatus); EXPECT_EQ(1, cmdStream->waitForTaskCountAndCleanTemporaryAllocationListCalledCounter); cmdQ->gpgpuEngine->commandStreamReceiver = oldCommandStreamReceiver; } HWTEST_F(WaitUntilCompletionTests, givenEmptyBcsStatesAndSkipWaitEqualsTrueWhenWaitingUntilCompleteThenWaitForTaskCountWithKmdNotifyFallbackIsNotCalled) { std::unique_ptr> cmdStream(new CommandStreamReceiverHwMock(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield())); cmdStream->initializeTagAllocation(); std::unique_ptr> cmdQ(new MyCmdQueue(context.get(), device.get())); CommandStreamReceiver *oldCommandStreamReceiver = cmdQ->gpgpuEngine->commandStreamReceiver; cmdQ->gpgpuEngine->commandStreamReceiver = cmdStream.get(); constexpr uint32_t taskCount = 0u; constexpr bool skipWait = true; StackVec activeBcsStates{}; cmdQ->waitUntilComplete(taskCount, activeBcsStates, cmdQ->flushStamp->peekStamp(), false, false, skipWait); EXPECT_EQ(0, cmdStream->waitForTaskCountWithKmdNotifyFallbackCounter); cmdQ->gpgpuEngine->commandStreamReceiver = oldCommandStreamReceiver; } HWTEST_F(WaitUntilCompletionTests, givenGpuHangAndSkipWaitEqualsFalseWhenWaitingUntilCompleteThenOnlyWaitForTaskCountWithKmdNotifyFallbackIsCalledAndGpuHangIsReturned) { std::unique_ptr> cmdStream(new CommandStreamReceiverHwMock(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield())); cmdStream->initializeTagAllocation(); cmdStream->waitForTaskCountWithKmdNotifyFallbackReturnValue = WaitStatus::GpuHang; std::unique_ptr> cmdQ(new MyCmdQueue(context.get(), device.get())); CommandStreamReceiver *oldCommandStreamReceiver = cmdQ->gpgpuEngine->commandStreamReceiver; cmdQ->gpgpuEngine->commandStreamReceiver = cmdStream.get(); constexpr uint32_t taskCount = 0u; constexpr bool skipWait = false; StackVec activeBcsStates{}; const auto waitStatus = cmdQ->waitUntilComplete(taskCount, activeBcsStates, cmdQ->flushStamp->peekStamp(), false, false, skipWait); EXPECT_EQ(WaitStatus::GpuHang, waitStatus); EXPECT_EQ(0, cmdStream->waitForTaskCountCalledCounter); EXPECT_EQ(1, cmdStream->waitForTaskCountWithKmdNotifyFallbackCounter); EXPECT_EQ(0, cmdStream->waitForTaskCountAndCleanTemporaryAllocationListCalledCounter); cmdQ->gpgpuEngine->commandStreamReceiver = oldCommandStreamReceiver; } HWTEST_F(WaitUntilCompletionTests, givenGpuHangOnBcsCsrWhenWaitingUntilCompleteThenOnlyWaitForTaskCountWithKmdNotifyFallbackIsCalledOnBcsCsrAndGpuHangIsReturned) { std::unique_ptr> gpgpuCmdStream(new CommandStreamReceiverHwMock(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield())); gpgpuCmdStream->initializeTagAllocation(); gpgpuCmdStream->waitForTaskCountWithKmdNotifyFallbackReturnValue = WaitStatus::Ready; std::unique_ptr> bcsCmdStream(new CommandStreamReceiverHwMock(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield())); bcsCmdStream->initializeTagAllocation(); bcsCmdStream->waitForTaskCountWithKmdNotifyFallbackReturnValue = WaitStatus::GpuHang; std::unique_ptr> cmdQ(new MyCmdQueue(context.get(), device.get())); CommandStreamReceiver *oldCommandStreamReceiver = cmdQ->gpgpuEngine->commandStreamReceiver; cmdQ->gpgpuEngine->commandStreamReceiver = gpgpuCmdStream.get(); cmdQ->bcsCsrToReturn = bcsCmdStream.get(); constexpr uint32_t taskCount = 0u; constexpr bool skipWait = false; StackVec activeBcsStates{CopyEngineState{}}; const auto waitStatus = cmdQ->waitUntilComplete(taskCount, activeBcsStates, cmdQ->flushStamp->peekStamp(), false, false, skipWait); EXPECT_EQ(WaitStatus::GpuHang, waitStatus); EXPECT_EQ(0, gpgpuCmdStream->waitForTaskCountCalledCounter); EXPECT_EQ(1, gpgpuCmdStream->waitForTaskCountWithKmdNotifyFallbackCounter); EXPECT_EQ(0, gpgpuCmdStream->waitForTaskCountAndCleanTemporaryAllocationListCalledCounter); EXPECT_EQ(0, bcsCmdStream->waitForTaskCountCalledCounter); EXPECT_EQ(1, bcsCmdStream->waitForTaskCountWithKmdNotifyFallbackCounter); EXPECT_EQ(0, bcsCmdStream->waitForTaskCountAndCleanTemporaryAllocationListCalledCounter); cmdQ->gpgpuEngine->commandStreamReceiver = oldCommandStreamReceiver; } HWTEST_F(WaitUntilCompletionTests, givenGpuHangOnBcsCsrWhenWaitingUntilCompleteThenWaitForTaskCountAndCleanTemporaryAllocationListIsCalledOnBcsCsrAndGpuHangIsReturned) { std::unique_ptr> gpgpuCmdStream(new CommandStreamReceiverHwMock(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield())); gpgpuCmdStream->initializeTagAllocation(); gpgpuCmdStream->waitForTaskCountWithKmdNotifyFallbackReturnValue = WaitStatus::Ready; std::unique_ptr> bcsCmdStream(new CommandStreamReceiverHwMock(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield())); bcsCmdStream->initializeTagAllocation(); bcsCmdStream->waitForTaskCountWithKmdNotifyFallbackReturnValue = WaitStatus::Ready; bcsCmdStream->waitForTaskCountAndCleanTemporaryAllocationListReturnValue = WaitStatus::GpuHang; std::unique_ptr> cmdQ(new MyCmdQueue(context.get(), device.get())); CommandStreamReceiver *oldCommandStreamReceiver = cmdQ->gpgpuEngine->commandStreamReceiver; cmdQ->gpgpuEngine->commandStreamReceiver = gpgpuCmdStream.get(); cmdQ->bcsCsrToReturn = bcsCmdStream.get(); constexpr uint32_t taskCount = 0u; constexpr bool skipWait = false; StackVec activeBcsStates{CopyEngineState{}}; const auto waitStatus = cmdQ->waitUntilComplete(taskCount, activeBcsStates, cmdQ->flushStamp->peekStamp(), false, false, skipWait); EXPECT_EQ(WaitStatus::GpuHang, waitStatus); EXPECT_EQ(0, gpgpuCmdStream->waitForTaskCountCalledCounter); EXPECT_EQ(1, gpgpuCmdStream->waitForTaskCountWithKmdNotifyFallbackCounter); EXPECT_EQ(0, gpgpuCmdStream->waitForTaskCountAndCleanTemporaryAllocationListCalledCounter); EXPECT_EQ(0, bcsCmdStream->waitForTaskCountCalledCounter); EXPECT_EQ(1, bcsCmdStream->waitForTaskCountWithKmdNotifyFallbackCounter); EXPECT_EQ(1, bcsCmdStream->waitForTaskCountAndCleanTemporaryAllocationListCalledCounter); cmdQ->gpgpuEngine->commandStreamReceiver = oldCommandStreamReceiver; } HWTEST_F(WaitUntilCompletionTests, givenSuccessOnBcsCsrWhenWaitingUntilCompleteThenGpgpuCsrWaitStatusIsReturned) { std::unique_ptr> gpgpuCmdStream(new CommandStreamReceiverHwMock(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield())); gpgpuCmdStream->initializeTagAllocation(); gpgpuCmdStream->waitForTaskCountWithKmdNotifyFallbackReturnValue = WaitStatus::Ready; gpgpuCmdStream->waitForTaskCountReturnValue = WaitStatus::Ready; std::unique_ptr> bcsCmdStream(new CommandStreamReceiverHwMock(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield())); bcsCmdStream->initializeTagAllocation(); bcsCmdStream->waitForTaskCountWithKmdNotifyFallbackReturnValue = WaitStatus::Ready; bcsCmdStream->waitForTaskCountAndCleanTemporaryAllocationListReturnValue = WaitStatus::Ready; std::unique_ptr> cmdQ(new MyCmdQueue(context.get(), device.get())); CommandStreamReceiver *oldCommandStreamReceiver = cmdQ->gpgpuEngine->commandStreamReceiver; cmdQ->gpgpuEngine->commandStreamReceiver = gpgpuCmdStream.get(); cmdQ->bcsCsrToReturn = bcsCmdStream.get(); constexpr uint32_t taskCount = 0u; constexpr bool skipWait = false; StackVec activeBcsStates{CopyEngineState{}}; const auto waitStatus = cmdQ->waitUntilComplete(taskCount, activeBcsStates, cmdQ->flushStamp->peekStamp(), false, false, skipWait); EXPECT_EQ(WaitStatus::Ready, waitStatus); EXPECT_EQ(1, gpgpuCmdStream->waitForTaskCountCalledCounter); EXPECT_EQ(1, gpgpuCmdStream->waitForTaskCountWithKmdNotifyFallbackCounter); EXPECT_EQ(0, gpgpuCmdStream->waitForTaskCountAndCleanTemporaryAllocationListCalledCounter); EXPECT_EQ(0, bcsCmdStream->waitForTaskCountCalledCounter); EXPECT_EQ(1, bcsCmdStream->waitForTaskCountWithKmdNotifyFallbackCounter); EXPECT_EQ(1, bcsCmdStream->waitForTaskCountAndCleanTemporaryAllocationListCalledCounter); cmdQ->gpgpuEngine->commandStreamReceiver = oldCommandStreamReceiver; } TEST(CommandQueue, givenEnqueueAcquireSharedObjectsWhenNoObjectsThenReturnSuccess) { MockContext context; MockCommandQueue cmdQ(&context, nullptr, 0, false); cl_uint numObjects = 0; cl_mem *memObjects = nullptr; cl_int result = cmdQ.enqueueAcquireSharedObjects(numObjects, memObjects, 0, nullptr, nullptr, 0); EXPECT_EQ(result, CL_SUCCESS); } class MockSharingHandler : public SharingHandler { public: void synchronizeObject(UpdateData &updateData) override { updateData.synchronizationStatus = ACQUIRE_SUCCESFUL; } }; TEST(CommandQueue, givenEnqueuesForSharedObjectsWithImageWhenUsingSharingHandlerThenReturnSuccess) { MockContext context; MockCommandQueue cmdQ(&context, context.getDevice(0), 0, false); MockSharingHandler *mockSharingHandler = new MockSharingHandler; auto image = std::unique_ptr(ImageHelper::create(&context)); image->setSharingHandler(mockSharingHandler); cl_mem memObject = image.get(); cl_uint numObjects = 1; cl_mem *memObjects = &memObject; cl_int result = cmdQ.enqueueAcquireSharedObjects(numObjects, memObjects, 0, nullptr, nullptr, 0); EXPECT_EQ(result, CL_SUCCESS); result = cmdQ.enqueueReleaseSharedObjects(numObjects, memObjects, 0, nullptr, nullptr, 0); EXPECT_EQ(result, CL_SUCCESS); } TEST(CommandQueue, givenEnqueuesForSharedObjectsWithImageWhenUsingSharingHandlerWithEventThenReturnSuccess) { auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockContext context; MockCommandQueue cmdQ(&context, mockDevice.get(), 0, false); MockSharingHandler *mockSharingHandler = new MockSharingHandler; auto image = std::unique_ptr(ImageHelper::create(&context)); image->setSharingHandler(mockSharingHandler); cl_mem memObject = image.get(); cl_uint numObjects = 1; cl_mem *memObjects = &memObject; Event *eventAcquire = new Event(&cmdQ, CL_COMMAND_NDRANGE_KERNEL, 1, 5); cl_event clEventAquire = eventAcquire; cl_int result = cmdQ.enqueueAcquireSharedObjects(numObjects, memObjects, 0, nullptr, &clEventAquire, 0); EXPECT_EQ(result, CL_SUCCESS); ASSERT_NE(clEventAquire, nullptr); eventAcquire->release(); Event *eventRelease = new Event(&cmdQ, CL_COMMAND_NDRANGE_KERNEL, 1, 5); cl_event clEventRelease = eventRelease; result = cmdQ.enqueueReleaseSharedObjects(numObjects, memObjects, 0, nullptr, &clEventRelease, 0); EXPECT_EQ(result, CL_SUCCESS); ASSERT_NE(clEventRelease, nullptr); eventRelease->release(); } TEST(CommandQueue, givenEnqueueAcquireSharedObjectsWhenIncorrectArgumentsThenReturnProperError) { MockContext context; MockCommandQueue cmdQ(&context, nullptr, 0, false); cl_uint numObjects = 1; cl_mem *memObjects = nullptr; cl_int result = cmdQ.enqueueAcquireSharedObjects(numObjects, memObjects, 0, nullptr, nullptr, 0); EXPECT_EQ(result, CL_INVALID_VALUE); numObjects = 0; memObjects = (cl_mem *)1; result = cmdQ.enqueueAcquireSharedObjects(numObjects, memObjects, 0, nullptr, nullptr, 0); EXPECT_EQ(result, CL_INVALID_VALUE); numObjects = 0; memObjects = (cl_mem *)1; result = cmdQ.enqueueAcquireSharedObjects(numObjects, memObjects, 0, nullptr, nullptr, 0); EXPECT_EQ(result, CL_INVALID_VALUE); cl_mem memObject = nullptr; numObjects = 1; memObjects = &memObject; result = cmdQ.enqueueAcquireSharedObjects(numObjects, memObjects, 0, nullptr, nullptr, 0); EXPECT_EQ(result, CL_INVALID_MEM_OBJECT); auto buffer = std::unique_ptr(BufferHelper<>::create(&context)); memObject = buffer.get(); numObjects = 1; memObjects = &memObject; result = cmdQ.enqueueAcquireSharedObjects(numObjects, memObjects, 0, nullptr, nullptr, 0); EXPECT_EQ(result, CL_INVALID_MEM_OBJECT); } TEST(CommandQueue, givenEnqueueReleaseSharedObjectsWhenNoObjectsThenReturnSuccess) { MockContext context; MockCommandQueue cmdQ(&context, nullptr, 0, false); cl_uint numObjects = 0; cl_mem *memObjects = nullptr; cl_int result = cmdQ.enqueueReleaseSharedObjects(numObjects, memObjects, 0, nullptr, nullptr, 0); EXPECT_EQ(result, CL_SUCCESS); } TEST(CommandQueue, givenEnqueueReleaseSharedObjectsWhenIncorrectArgumentsThenReturnProperError) { MockContext context; MockCommandQueue cmdQ(&context, nullptr, 0, false); cl_uint numObjects = 1; cl_mem *memObjects = nullptr; cl_int result = cmdQ.enqueueReleaseSharedObjects(numObjects, memObjects, 0, nullptr, nullptr, 0); EXPECT_EQ(result, CL_INVALID_VALUE); numObjects = 0; memObjects = (cl_mem *)1; result = cmdQ.enqueueReleaseSharedObjects(numObjects, memObjects, 0, nullptr, nullptr, 0); EXPECT_EQ(result, CL_INVALID_VALUE); numObjects = 0; memObjects = (cl_mem *)1; result = cmdQ.enqueueReleaseSharedObjects(numObjects, memObjects, 0, nullptr, nullptr, 0); EXPECT_EQ(result, CL_INVALID_VALUE); cl_mem memObject = nullptr; numObjects = 1; memObjects = &memObject; result = cmdQ.enqueueReleaseSharedObjects(numObjects, memObjects, 0, nullptr, nullptr, 0); EXPECT_EQ(result, CL_INVALID_MEM_OBJECT); auto buffer = std::unique_ptr(BufferHelper<>::create(&context)); memObject = buffer.get(); numObjects = 1; memObjects = &memObject; result = cmdQ.enqueueReleaseSharedObjects(numObjects, memObjects, 0, nullptr, nullptr, 0); EXPECT_EQ(result, CL_INVALID_MEM_OBJECT); } TEST(CommandQueue, givenEnqueueAcquireSharedObjectsCallWhenAcquireFailsThenCorrectErrorIsReturned) { const auto rootDeviceIndex = 1u; class MockSharingHandler : public SharingHandler { int validateUpdateData(UpdateData &data) override { EXPECT_EQ(1u, data.rootDeviceIndex); return CL_INVALID_MEM_OBJECT; } }; UltClDeviceFactory deviceFactory{2, 0}; MockContext context(deviceFactory.rootDevices[rootDeviceIndex]); MockCommandQueue cmdQ(&context, context.getDevice(0), 0, false); auto buffer = std::unique_ptr(BufferHelper<>::create(&context)); MockSharingHandler *handler = new MockSharingHandler; buffer->setSharingHandler(handler); cl_mem memObject = buffer.get(); auto retVal = cmdQ.enqueueAcquireSharedObjects(1, &memObject, 0, nullptr, nullptr, 0); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); buffer->setSharingHandler(nullptr); } HWTEST_F(CommandQueueCommandStreamTest, givenDebugKernelWhenSetupDebugSurfaceIsCalledThenSurfaceStateIsCorrectlySet) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; MockProgram program(toClDeviceVector(*pClDevice)); program.enableKernelDebug(); std::unique_ptr kernel(MockKernel::create(*pDevice, &program)); MockCommandQueue cmdQ(context.get(), pClDevice, 0, false); const auto &systemThreadSurfaceAddress = kernel->getAllocatedKernelInfo()->kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful; kernel->setSshLocal(nullptr, sizeof(RENDER_SURFACE_STATE) + systemThreadSurfaceAddress); auto &commandStreamReceiver = cmdQ.getGpgpuCommandStreamReceiver(); auto &hwInfo = *NEO::defaultHwInfo.get(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); cmdQ.getGpgpuCommandStreamReceiver().allocateDebugSurface(hwHelper.getSipKernelMaxDbgSurfaceSize(hwInfo)); cmdQ.setupDebugSurface(kernel.get()); auto debugSurface = commandStreamReceiver.getDebugSurfaceAllocation(); ASSERT_NE(nullptr, debugSurface); RENDER_SURFACE_STATE *surfaceState = (RENDER_SURFACE_STATE *)kernel->getSurfaceStateHeap(); EXPECT_EQ(debugSurface->getGpuAddress(), surfaceState->getSurfaceBaseAddress()); } HWTEST_F(CommandQueueCommandStreamTest, givenCsrWithDebugSurfaceAllocatedWhenSetupDebugSurfaceIsCalledThenDebugSurfaceIsReused) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; MockProgram program(toClDeviceVector(*pClDevice)); program.enableKernelDebug(); std::unique_ptr kernel(MockKernel::create(*pDevice, &program)); MockCommandQueue cmdQ(context.get(), pClDevice, 0, false); const auto &systemThreadSurfaceAddress = kernel->getAllocatedKernelInfo()->kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful; kernel->setSshLocal(nullptr, sizeof(RENDER_SURFACE_STATE) + systemThreadSurfaceAddress); auto &commandStreamReceiver = cmdQ.getGpgpuCommandStreamReceiver(); auto hwInfo = *NEO::defaultHwInfo.get(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); commandStreamReceiver.allocateDebugSurface(hwHelper.getSipKernelMaxDbgSurfaceSize(hwInfo)); auto debugSurface = commandStreamReceiver.getDebugSurfaceAllocation(); ASSERT_NE(nullptr, debugSurface); cmdQ.setupDebugSurface(kernel.get()); EXPECT_EQ(debugSurface, commandStreamReceiver.getDebugSurfaceAllocation()); RENDER_SURFACE_STATE *surfaceState = (RENDER_SURFACE_STATE *)kernel->getSurfaceStateHeap(); EXPECT_EQ(debugSurface->getGpuAddress(), surfaceState->getSurfaceBaseAddress()); } struct MockTimestampPacketContainer : TimestampPacketContainer { MockTimestampPacketContainer(Context &context) : context(context) { } ~MockTimestampPacketContainer() override { EXPECT_EQ(1, context.getRefInternalCount()); } Context &context; }; TEST(CommandQueueDestructorTest, whenCommandQueueIsDestroyedThenDestroysTimestampPacketContainerBeforeReleasingContext) { auto context = new MockContext; EXPECT_EQ(1, context->getRefInternalCount()); MockCommandQueue queue(context, context->getDevice(0), nullptr, false); queue.timestampPacketContainer.reset(new MockTimestampPacketContainer(*context)); EXPECT_EQ(2, context->getRefInternalCount()); context->release(); EXPECT_EQ(1, context->getRefInternalCount()); } TEST(CommandQueuePropertiesTests, whenGetEngineIsCalledThenQueueEngineIsReturned) { MockCommandQueue queue; EngineControl engineControl; queue.gpgpuEngine = &engineControl; EXPECT_EQ(queue.gpgpuEngine, &queue.getGpgpuEngine()); } TEST(CommandQueue, GivenCommandQueueWhenEnqueueResourceBarrierCalledThenSuccessReturned) { MockContext context; MockCommandQueue cmdQ(&context, nullptr, 0, false); cl_int result = cmdQ.enqueueResourceBarrier( nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, result); } TEST(CommandQueue, GivenCommandQueueWhenCheckingIfIsCacheFlushCommandCalledThenFalseReturned) { MockContext context; MockCommandQueue cmdQ(&context, nullptr, 0, false); bool isCommandCacheFlush = cmdQ.isCacheFlushCommand(0u); EXPECT_FALSE(isCommandCacheFlush); } TEST(CommandQueue, givenBlitterOperationsSupportedWhenCreatingQueueThenTimestampPacketIsCreated) { DebugManagerStateRestore restore; DebugManager.flags.EnableTimestampPacket.set(0); MockContext context{}; HardwareInfo *hwInfo = context.getDevice(0)->getRootDeviceEnvironment().getMutableHardwareInfo(); if (!HwInfoConfig::get(defaultHwInfo->platform.eProductFamily)->isBlitterFullySupported(*defaultHwInfo.get())) { GTEST_SKIP(); } hwInfo->capabilityTable.blitterOperationsSupported = true; MockCommandQueue cmdQ(&context, context.getDevice(0), 0, false); EXPECT_NE(nullptr, cmdQ.timestampPacketContainer); } TEST(CommandQueue, givenCopyOnlyQueueWhenCallingBlitEnqueueAllowedThenReturnTrue) { MockContext context{}; HardwareInfo *hwInfo = context.getDevice(0)->getRootDeviceEnvironment().getMutableHardwareInfo(); MockCommandQueue queue(&context, context.getDevice(0), 0, false); if (queue.countBcsEngines() == 0) { queue.bcsEngines[0] = &context.getDevice(0)->getDefaultEngine(); } hwInfo->capabilityTable.blitterOperationsSupported = false; MultiGraphicsAllocation multiAlloc{1}; MockGraphicsAllocation alloc{}; multiAlloc.addAllocation(&alloc); alloc.memoryPool = MemoryPool::System4KBPages; CsrSelectionArgs selectionArgs{CL_COMMAND_READ_BUFFER, &multiAlloc, &multiAlloc, 0u, nullptr}; queue.isCopyOnly = false; EXPECT_EQ(queue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled(), queue.blitEnqueueAllowed(selectionArgs)); queue.isCopyOnly = true; EXPECT_TRUE(queue.blitEnqueueAllowed(selectionArgs)); } TEST(CommandQueue, givenSimpleClCommandWhenCallingBlitEnqueueAllowedThenReturnCorrectValue) { MockContext context{}; MockCommandQueue queue(&context, context.getDevice(0), 0, false); if (queue.countBcsEngines() == 0) { queue.bcsEngines[0] = &context.getDevice(0)->getDefaultEngine(); } MultiGraphicsAllocation multiAlloc{1}; MockGraphicsAllocation alloc{}; multiAlloc.addAllocation(&alloc); alloc.memoryPool = MemoryPool::System4KBPages; for (cl_command_type cmdType : {CL_COMMAND_READ_BUFFER, CL_COMMAND_READ_BUFFER_RECT, CL_COMMAND_WRITE_BUFFER, CL_COMMAND_WRITE_BUFFER_RECT, CL_COMMAND_COPY_BUFFER, CL_COMMAND_COPY_BUFFER_RECT, CL_COMMAND_SVM_MAP, CL_COMMAND_SVM_UNMAP, CL_COMMAND_SVM_MEMCPY}) { CsrSelectionArgs args{cmdType, &multiAlloc, &multiAlloc, 0u, nullptr}; bool expectedValue = queue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled(); if (cmdType == CL_COMMAND_COPY_IMAGE_TO_BUFFER) { expectedValue = false; } EXPECT_EQ(expectedValue, queue.blitEnqueueAllowed(args)); } } TEST(CommandQueue, givenImageTransferClCommandWhenCallingBlitEnqueueAllowedThenReturnCorrectValue) { DebugManagerStateRestore restore{}; DebugManager.flags.EnableBlitterForEnqueueOperations.set(1); DebugManager.flags.EnableBlitterForEnqueueImageOperations.set(1); MockContext context{}; MockCommandQueue queue(&context, context.getDevice(0), 0, false); if (queue.countBcsEngines() == 0) { queue.bcsEngines[0] = &context.getDevice(0)->getDefaultEngine(); } MockImageBase image{}; auto alloc = static_cast(image.getGraphicsAllocation(0)); alloc->memoryPool = MemoryPool::System4KBPages; size_t origin[3] = {0, 0, 0}; size_t region[3] = {1, 1, 1}; { CsrSelectionArgs args{CL_COMMAND_READ_IMAGE, &image, {}, 0u, region, origin, nullptr}; EXPECT_TRUE(queue.blitEnqueueAllowed(args)); } { CsrSelectionArgs args{CL_COMMAND_WRITE_IMAGE, {}, &image, 0u, region, nullptr, origin}; EXPECT_TRUE(queue.blitEnqueueAllowed(args)); } { CsrSelectionArgs args{CL_COMMAND_COPY_IMAGE, &image, &image, 0u, region, origin, origin}; EXPECT_TRUE(queue.blitEnqueueAllowed(args)); } { MockImageBase dstImage{}; dstImage.imageDesc.num_mip_levels = 2; auto dstAlloc = static_cast(dstImage.getGraphicsAllocation(0)); dstAlloc->memoryPool = MemoryPool::System4KBPages; CsrSelectionArgs args{CL_COMMAND_COPY_IMAGE, &image, &dstImage, 0u, region, origin, origin}; EXPECT_FALSE(queue.blitEnqueueAllowed(args)); } } TEST(CommandQueue, givenImageToBufferClCommandWhenCallingBlitEnqueueAllowedThenReturnCorrectValue) { MockContext context{}; MockCommandQueue queue(&context, context.getDevice(0), 0, false); if (queue.countBcsEngines() == 0) { queue.bcsEngines[0] = &context.getDevice(0)->getDefaultEngine(); } MultiGraphicsAllocation multiAlloc{1}; MockGraphicsAllocation alloc{}; multiAlloc.addAllocation(&alloc); alloc.memoryPool = MemoryPool::System4KBPages; CsrSelectionArgs args{CL_COMMAND_COPY_IMAGE_TO_BUFFER, &multiAlloc, &multiAlloc, 0u, nullptr}; EXPECT_FALSE(queue.blitEnqueueAllowed(args)); } template struct CsrSelectionCommandQueueTests : ::testing::Test { void SetUp() override { HardwareInfo hwInfo = *::defaultHwInfo; hwInfo.capabilityTable.blitterOperationsSupported = blitter; if (blitter) { REQUIRE_FULL_BLITTER_OR_SKIP(&hwInfo); } device = MockDevice::createWithNewExecutionEnvironment(&hwInfo); clDevice = std::make_unique(device); context = std::make_unique(clDevice.get()); cl_command_queue_properties queueProperties[5] = {}; if (selectBlitterWithQueueFamilies) { queueProperties[0] = CL_QUEUE_FAMILY_INTEL; queueProperties[1] = device->getEngineGroupIndexFromEngineGroupType(EngineGroupType::Copy); queueProperties[2] = CL_QUEUE_INDEX_INTEL; queueProperties[3] = 0; } queue = std::make_unique(context.get(), clDevice.get(), queueProperties, false); } MockDevice *device; std::unique_ptr clDevice; std::unique_ptr context; std::unique_ptr queue; }; using CsrSelectionCommandQueueWithoutBlitterTests = CsrSelectionCommandQueueTests; using CsrSelectionCommandQueueWithBlitterTests = CsrSelectionCommandQueueTests; using CsrSelectionCommandQueueWithQueueFamiliesBlitterTests = CsrSelectionCommandQueueTests; TEST_F(CsrSelectionCommandQueueWithoutBlitterTests, givenBlitterNotPresentWhenSelectingBlitterThenReturnGpgpuCsr) { DebugManagerStateRestore restore{}; DebugManager.flags.EnableBlitterForEnqueueOperations.set(1); BuiltinOpParams builtinOpParams{}; MockGraphicsAllocation srcGraphicsAllocation{}; MockGraphicsAllocation dstGraphicsAllocation{}; MockBuffer srcMemObj{srcGraphicsAllocation}; MockBuffer dstMemObj{dstGraphicsAllocation}; builtinOpParams.srcMemObj = &srcMemObj; builtinOpParams.dstMemObj = &dstMemObj; { srcGraphicsAllocation.memoryPool = MemoryPool::System4KBPages; dstGraphicsAllocation.memoryPool = MemoryPool::System4KBPages; CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr}; EXPECT_EQ(&queue->getGpgpuCommandStreamReceiver(), &queue->selectCsrForBuiltinOperation(args)); } { srcGraphicsAllocation.memoryPool = MemoryPool::System4KBPages; dstGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr}; EXPECT_EQ(&queue->getGpgpuCommandStreamReceiver(), &queue->selectCsrForBuiltinOperation(args)); } { srcGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; dstGraphicsAllocation.memoryPool = MemoryPool::System4KBPages; CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr}; EXPECT_EQ(&queue->getGpgpuCommandStreamReceiver(), &queue->selectCsrForBuiltinOperation(args)); } { srcGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; dstGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr}; EXPECT_EQ(&queue->getGpgpuCommandStreamReceiver(), &queue->selectCsrForBuiltinOperation(args)); } } TEST_F(CsrSelectionCommandQueueWithBlitterTests, givenBlitterPresentButDisabledWithDebugFlagWhenSelectingBlitterThenReturnGpgpuCsr) { DebugManagerStateRestore restore{}; DebugManager.flags.EnableBlitterForEnqueueOperations.set(0); BuiltinOpParams builtinOpParams{}; MockGraphicsAllocation srcGraphicsAllocation{}; MockGraphicsAllocation dstGraphicsAllocation{}; MockBuffer srcMemObj{srcGraphicsAllocation}; MockBuffer dstMemObj{dstGraphicsAllocation}; builtinOpParams.srcMemObj = &srcMemObj; builtinOpParams.dstMemObj = &dstMemObj; { srcGraphicsAllocation.memoryPool = MemoryPool::System4KBPages; dstGraphicsAllocation.memoryPool = MemoryPool::System4KBPages; CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr}; EXPECT_EQ(&queue->getGpgpuCommandStreamReceiver(), &queue->selectCsrForBuiltinOperation(args)); } { srcGraphicsAllocation.memoryPool = MemoryPool::System4KBPages; dstGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr}; EXPECT_EQ(&queue->getGpgpuCommandStreamReceiver(), &queue->selectCsrForBuiltinOperation(args)); } { srcGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; dstGraphicsAllocation.memoryPool = MemoryPool::System4KBPages; CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr}; EXPECT_EQ(&queue->getGpgpuCommandStreamReceiver(), &queue->selectCsrForBuiltinOperation(args)); } { srcGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; dstGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr}; EXPECT_EQ(&queue->getGpgpuCommandStreamReceiver(), &queue->selectCsrForBuiltinOperation(args)); } } TEST_F(CsrSelectionCommandQueueWithBlitterTests, givenBlitterPresentAndLocalToLocalCopyBufferCommandWhenSelectingBlitterThenReturnValueBasedOnDebugFlagAndHwPreference) { DebugManagerStateRestore restore{}; DebugManager.flags.EnableBlitterForEnqueueOperations.set(1); const bool hwPreference = ClHwHelper::get(::defaultHwInfo->platform.eRenderCoreFamily).preferBlitterForLocalToLocalTransfers(); const auto &hwPreferenceCsr = hwPreference ? *queue->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS) : queue->getGpgpuCommandStreamReceiver(); BuiltinOpParams builtinOpParams{}; MockGraphicsAllocation srcGraphicsAllocation{}; MockGraphicsAllocation dstGraphicsAllocation{}; MockBuffer srcMemObj{srcGraphicsAllocation}; MockBuffer dstMemObj{dstGraphicsAllocation}; builtinOpParams.srcMemObj = &srcMemObj; builtinOpParams.dstMemObj = &dstMemObj; srcGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; dstGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr}; DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1); EXPECT_EQ(&hwPreferenceCsr, &queue->selectCsrForBuiltinOperation(args)); DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0); EXPECT_EQ(&queue->getGpgpuCommandStreamReceiver(), &queue->selectCsrForBuiltinOperation(args)); DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1); EXPECT_EQ(queue->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS), &queue->selectCsrForBuiltinOperation(args)); } TEST_F(CsrSelectionCommandQueueWithBlitterTests, givenBlitterPresentAndNotLocalToLocalCopyBufferCommandWhenSelectingCsrThenUseBcsRegardlessOfDebugFlag) { DebugManagerStateRestore restore{}; DebugManager.flags.EnableBlitterForEnqueueOperations.set(1); const auto &bcsCsr = *queue->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS); BuiltinOpParams builtinOpParams{}; MockGraphicsAllocation srcGraphicsAllocation{}; MockGraphicsAllocation dstGraphicsAllocation{}; MockBuffer srcMemObj{srcGraphicsAllocation}; MockBuffer dstMemObj{dstGraphicsAllocation}; builtinOpParams.srcMemObj = &srcMemObj; builtinOpParams.dstMemObj = &dstMemObj; { srcGraphicsAllocation.memoryPool = MemoryPool::System4KBPages; dstGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr}; DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1); EXPECT_EQ(&bcsCsr, &queue->selectCsrForBuiltinOperation(args)); DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0); EXPECT_EQ(&bcsCsr, &queue->selectCsrForBuiltinOperation(args)); DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1); EXPECT_EQ(&bcsCsr, &queue->selectCsrForBuiltinOperation(args)); } { srcGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; dstGraphicsAllocation.memoryPool = MemoryPool::System4KBPages; CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr}; DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1); EXPECT_EQ(&bcsCsr, &queue->selectCsrForBuiltinOperation(args)); DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0); EXPECT_EQ(&bcsCsr, &queue->selectCsrForBuiltinOperation(args)); DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1); EXPECT_EQ(&bcsCsr, &queue->selectCsrForBuiltinOperation(args)); } { srcGraphicsAllocation.memoryPool = MemoryPool::System4KBPages; dstGraphicsAllocation.memoryPool = MemoryPool::System4KBPages; CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr}; DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1); EXPECT_EQ(&bcsCsr, &queue->selectCsrForBuiltinOperation(args)); DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0); EXPECT_EQ(&bcsCsr, &queue->selectCsrForBuiltinOperation(args)); DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1); EXPECT_EQ(&bcsCsr, &queue->selectCsrForBuiltinOperation(args)); } } TEST_F(CsrSelectionCommandQueueWithBlitterTests, givenInvalidTransferDirectionWhenSelectingCsrThenThrowError) { DebugManagerStateRestore restore{}; DebugManager.flags.EnableBlitterForEnqueueOperations.set(1); BuiltinOpParams builtinOpParams{}; MockGraphicsAllocation srcGraphicsAllocation{}; MockGraphicsAllocation dstGraphicsAllocation{}; MockBuffer srcMemObj{srcGraphicsAllocation}; MockBuffer dstMemObj{dstGraphicsAllocation}; builtinOpParams.srcMemObj = &srcMemObj; builtinOpParams.dstMemObj = &dstMemObj; CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr}; args.direction = static_cast(0xFF); EXPECT_ANY_THROW(queue->selectCsrForBuiltinOperation(args)); } TEST_F(CsrSelectionCommandQueueWithBlitterTests, givenBlitterAndAssignBCSAtEnqueueSetToFalseWhenSelectCsrThenDefaultBcsReturned) { DebugManagerStateRestore restore{}; DebugManager.flags.EnableBlitterForEnqueueOperations.set(1); DebugManager.flags.AssignBCSAtEnqueue.set(0); BuiltinOpParams builtinOpParams{}; MockGraphicsAllocation srcGraphicsAllocation{}; MockGraphicsAllocation dstGraphicsAllocation{}; MockBuffer srcMemObj{srcGraphicsAllocation}; MockBuffer dstMemObj{dstGraphicsAllocation}; builtinOpParams.srcMemObj = &srcMemObj; builtinOpParams.dstMemObj = &dstMemObj; CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr}; args.direction = TransferDirection::LocalToHost; auto &csr = queue->selectCsrForBuiltinOperation(args); EXPECT_EQ(&csr, queue->getBcsCommandStreamReceiver(queue->bcsEngineTypes[0])); } TEST_F(CsrSelectionCommandQueueWithQueueFamiliesBlitterTests, givenBlitterSelectedWithQueueFamiliesWhenSelectingBlitterThenSelectBlitter) { DebugManagerStateRestore restore{}; BuiltinOpParams builtinOpParams{}; MockGraphicsAllocation srcGraphicsAllocation{}; MockGraphicsAllocation dstGraphicsAllocation{}; MockBuffer srcMemObj{srcGraphicsAllocation}; MockBuffer dstMemObj{dstGraphicsAllocation}; builtinOpParams.srcMemObj = &srcMemObj; builtinOpParams.dstMemObj = &dstMemObj; { srcGraphicsAllocation.memoryPool = MemoryPool::System4KBPages; dstGraphicsAllocation.memoryPool = MemoryPool::System4KBPages; CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr}; EXPECT_EQ(queue->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS), &queue->selectCsrForBuiltinOperation(args)); } { srcGraphicsAllocation.memoryPool = MemoryPool::System4KBPages; dstGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr}; EXPECT_EQ(queue->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS), &queue->selectCsrForBuiltinOperation(args)); } { srcGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; dstGraphicsAllocation.memoryPool = MemoryPool::System4KBPages; CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr}; EXPECT_EQ(queue->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS), &queue->selectCsrForBuiltinOperation(args)); } { srcGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; dstGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr}; EXPECT_EQ(queue->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS), &queue->selectCsrForBuiltinOperation(args)); } } TEST_F(CsrSelectionCommandQueueWithQueueFamiliesBlitterTests, givenBlitterSelectedWithQueueFamiliesButDisabledWithDebugFlagWhenSelectingBlitterThenIgnoreDebugFlagAndSelectBlitter) { DebugManagerStateRestore restore{}; DebugManager.flags.EnableBlitterForEnqueueOperations.set(0); BuiltinOpParams builtinOpParams{}; MockGraphicsAllocation srcGraphicsAllocation{}; MockGraphicsAllocation dstGraphicsAllocation{}; MockBuffer srcMemObj{srcGraphicsAllocation}; MockBuffer dstMemObj{dstGraphicsAllocation}; builtinOpParams.srcMemObj = &srcMemObj; builtinOpParams.dstMemObj = &dstMemObj; { srcGraphicsAllocation.memoryPool = MemoryPool::System4KBPages; dstGraphicsAllocation.memoryPool = MemoryPool::System4KBPages; CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr}; EXPECT_EQ(queue->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS), &queue->selectCsrForBuiltinOperation(args)); } { srcGraphicsAllocation.memoryPool = MemoryPool::System4KBPages; dstGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr}; EXPECT_EQ(queue->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS), &queue->selectCsrForBuiltinOperation(args)); } { srcGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; dstGraphicsAllocation.memoryPool = MemoryPool::System4KBPages; CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr}; EXPECT_EQ(queue->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS), &queue->selectCsrForBuiltinOperation(args)); } { srcGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; dstGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr}; EXPECT_EQ(queue->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS), &queue->selectCsrForBuiltinOperation(args)); } } TEST(CommandQueue, givenMipMappedImageWhenCallingBlitEnqueueImageAllowedThenCorrectResultIsReturned) { DebugManagerStateRestore restorer; DebugManager.flags.EnableBlitterForEnqueueImageOperations.set(1); MockContext context{}; MockCommandQueue queue(&context, context.getDevice(0), 0, false); size_t correctRegion[3] = {10u, 10u, 0}; size_t correctOrigin[3] = {1u, 1u, 0}; MockImageBase image; image.imageDesc.num_mip_levels = 1; EXPECT_TRUE(queue.blitEnqueueImageAllowed(correctOrigin, correctRegion, image)); image.imageDesc.num_mip_levels = 2; EXPECT_FALSE(queue.blitEnqueueImageAllowed(correctOrigin, correctRegion, image)); } TEST(CommandQueue, givenImageWithDifferentImageTypesWhenCallingBlitEnqueueImageAllowedThenCorrectResultIsReturned) { DebugManagerStateRestore restorer; DebugManager.flags.EnableBlitterForEnqueueImageOperations.set(1); MockContext context{}; MockCommandQueue queue(&context, context.getDevice(0), 0, false); size_t correctRegion[3] = {10u, 10u, 0}; size_t correctOrigin[3] = {1u, 1u, 0}; MockImageBase image; int imageTypes[] = {CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE2D_ARRAY, CL_MEM_OBJECT_IMAGE3D}; for (auto imageType : imageTypes) { image.imageDesc.image_type = imageType; EXPECT_TRUE(queue.blitEnqueueImageAllowed(correctOrigin, correctRegion, image)); } } TEST(CommandQueue, given64KBTileWith3DImageTypeWhenCallingBlitEnqueueImageAllowedThenCorrectResultIsReturned) { DebugManagerStateRestore restorer; MockContext context{}; MockCommandQueue queue(&context, context.getDevice(0), 0, false); const auto &hwInfo = *defaultHwInfo; const auto &hwInfoConfig = HwInfoConfig::get(hwInfo.platform.eProductFamily); size_t correctRegion[3] = {10u, 10u, 0}; size_t correctOrigin[3] = {1u, 1u, 0}; std::array, 5> images = { std::unique_ptr(ImageHelper::create(&context)), std::unique_ptr(ImageHelper::create(&context)), std::unique_ptr(ImageHelper::create(&context)), std::unique_ptr(ImageHelper::create(&context)), std::unique_ptr(ImageHelper::create(&context))}; for (auto blitterEnabled : {0, 1}) { DebugManager.flags.EnableBlitterForEnqueueImageOperations.set(blitterEnabled); for (auto isTile64 : {0, 1}) { for (const auto &image : images) { auto imageType = image->getImageDesc().image_type; auto gfxAllocation = image->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex()); auto mockGmmResourceInfo = reinterpret_cast(gfxAllocation->getDefaultGmm()->gmmResourceInfo.get()); mockGmmResourceInfo->getResourceFlags()->Info.Tile64 = isTile64; if (isTile64 && (imageType == CL_MEM_OBJECT_IMAGE3D)) { auto supportExpected = hwInfoConfig->isTile64With3DSurfaceOnBCSSupported(hwInfo) && blitterEnabled; EXPECT_EQ(supportExpected, queue.blitEnqueueImageAllowed(correctOrigin, correctRegion, *image)); } else { EXPECT_EQ(blitterEnabled, queue.blitEnqueueImageAllowed(correctOrigin, correctRegion, *image)); } } } } } TEST(CommandQueue, givenSupportForOperationWhenValidatingSupportThenReturnSuccess) { MockCommandQueue queue{}; queue.queueCapabilities = CL_QUEUE_CAPABILITY_MAP_BUFFER_INTEL; EXPECT_FALSE(queue.validateCapabilityForOperation(CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL, 0, nullptr, nullptr)); queue.queueCapabilities |= CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL; EXPECT_TRUE(queue.validateCapabilityForOperation(CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL, 0, nullptr, nullptr)); } TEST(CommandQueue, givenSupportForWaitListAndWaitListPassedWhenValidatingSupportThenReturnSuccess) { MockContext context{}; MockCommandQueue queue{context}; MockEvent events[] = { {&queue, CL_COMMAND_READ_BUFFER, 0, 0}, {&queue, CL_COMMAND_READ_BUFFER, 0, 0}, {&queue, CL_COMMAND_READ_BUFFER, 0, 0}, }; MockEvent userEvent{&context}; const cl_event waitList[] = {events, events + 1, events + 2, &userEvent}; const cl_uint waitListSize = static_cast(arrayCount(waitList)); queue.queueCapabilities = CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL; EXPECT_TRUE(queue.validateCapabilityForOperation(CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL, 0, nullptr, nullptr)); EXPECT_FALSE(queue.validateCapabilityForOperation(CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL, waitListSize, waitList, nullptr)); queue.queueCapabilities = CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL | CL_QUEUE_CAPABILITY_SINGLE_QUEUE_EVENT_WAIT_LIST_INTEL | CL_QUEUE_CAPABILITY_CREATE_SINGLE_QUEUE_EVENTS_INTEL; EXPECT_TRUE(queue.validateCapabilityForOperation(CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL, 0, nullptr, nullptr)); EXPECT_TRUE(queue.validateCapabilityForOperation(CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL, waitListSize, waitList, nullptr)); } TEST(CommandQueue, givenCrossQueueDependencyAndBothQueuesSupportItWhenValidatingSupportThenReturnTrue) { MockContext context{}; MockCommandQueue queue{context}; MockCommandQueue otherQueue{context}; MockEvent events[] = { {&otherQueue, CL_COMMAND_READ_BUFFER, 0, 0}, {&otherQueue, CL_COMMAND_READ_BUFFER, 0, 0}, {&otherQueue, CL_COMMAND_READ_BUFFER, 0, 0}, }; const cl_event waitList[] = {events, events + 1, events + 2}; const cl_uint waitListSize = static_cast(arrayCount(waitList)); queue.queueCapabilities = CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL; otherQueue.queueCapabilities = CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL; EXPECT_FALSE(queue.validateCapabilityForOperation(CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL, waitListSize, waitList, nullptr)); queue.queueCapabilities = CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL | CL_QUEUE_CAPABILITY_CROSS_QUEUE_EVENT_WAIT_LIST_INTEL; otherQueue.queueCapabilities = CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL; EXPECT_FALSE(queue.validateCapabilityForOperation(CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL, waitListSize, waitList, nullptr)); queue.queueCapabilities = CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL; otherQueue.queueCapabilities = CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL | CL_QUEUE_CAPABILITY_CREATE_CROSS_QUEUE_EVENTS_INTEL; EXPECT_FALSE(queue.validateCapabilityForOperation(CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL, waitListSize, waitList, nullptr)); queue.queueCapabilities = CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL | CL_QUEUE_CAPABILITY_CROSS_QUEUE_EVENT_WAIT_LIST_INTEL; otherQueue.queueCapabilities = CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL | CL_QUEUE_CAPABILITY_CREATE_CROSS_QUEUE_EVENTS_INTEL; EXPECT_TRUE(queue.validateCapabilityForOperation(CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL, waitListSize, waitList, nullptr)); } TEST(CommandQueue, givenUserEventInWaitListWhenValidatingSupportThenReturnTrue) { MockContext context{}; MockCommandQueue queue{context}; MockEvent events[] = { {&queue, CL_COMMAND_READ_BUFFER, 0, 0}, {&queue, CL_COMMAND_READ_BUFFER, 0, 0}, {&queue, CL_COMMAND_READ_BUFFER, 0, 0}, }; MockEvent userEvent{&context}; const cl_event waitList[] = {events, events + 1, events + 2, &userEvent}; const cl_uint waitListSize = static_cast(arrayCount(waitList)); queue.queueCapabilities = CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL | CL_QUEUE_CAPABILITY_CREATE_SINGLE_QUEUE_EVENTS_INTEL | CL_QUEUE_CAPABILITY_SINGLE_QUEUE_EVENT_WAIT_LIST_INTEL; EXPECT_TRUE(queue.validateCapabilityForOperation(CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL, waitListSize, waitList, nullptr)); } TEST(CommandQueue, givenSupportForOutEventAndOutEventIsPassedWhenValidatingSupportThenReturnSuccess) { MockCommandQueue queue{}; cl_event outEvent{}; queue.queueCapabilities = CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL; EXPECT_TRUE(queue.validateCapabilityForOperation(CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL, 0, nullptr, nullptr)); EXPECT_FALSE(queue.validateCapabilityForOperation(CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL, 0, nullptr, &outEvent)); queue.queueCapabilities = CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL | CL_QUEUE_CAPABILITY_CREATE_SINGLE_QUEUE_EVENTS_INTEL; EXPECT_TRUE(queue.validateCapabilityForOperation(CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL, 0, nullptr, nullptr)); EXPECT_TRUE(queue.validateCapabilityForOperation(CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL, 0, nullptr, &outEvent)); queue.queueCapabilities = CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL | CL_QUEUE_CAPABILITY_CREATE_CROSS_QUEUE_EVENTS_INTEL; EXPECT_TRUE(queue.validateCapabilityForOperation(CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL, 0, nullptr, nullptr)); EXPECT_TRUE(queue.validateCapabilityForOperation(CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL, 0, nullptr, &outEvent)); queue.queueCapabilities = CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL | CL_QUEUE_CAPABILITY_CREATE_SINGLE_QUEUE_EVENTS_INTEL | CL_QUEUE_CAPABILITY_CREATE_CROSS_QUEUE_EVENTS_INTEL; EXPECT_TRUE(queue.validateCapabilityForOperation(CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL, 0, nullptr, nullptr)); EXPECT_TRUE(queue.validateCapabilityForOperation(CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL, 0, nullptr, &outEvent)); } struct CommandQueueWithTimestampPacketTests : ::testing::Test { void SetUp() override { DebugManager.flags.EnableTimestampPacket.set(1); } DebugManagerStateRestore restore{}; }; TEST_F(CommandQueueWithTimestampPacketTests, givenInOrderQueueWhenSetupBarrierTimestampForBcsEnginesCalledThenEnsureBarrierNodeIsPresent) { MockContext context{}; MockCommandQueue queue{context}; TimestampPacketDependencies dependencies{}; for (auto &containers : queue.bcsTimestampPacketContainers) { EXPECT_TRUE(containers.lastBarrierToWaitFor.peekNodes().empty()); } // No pending barrier, skip queue.setupBarrierTimestampForBcsEngines(aub_stream::EngineType::ENGINE_RCS, dependencies); EXPECT_EQ(0u, dependencies.barrierNodes.peekNodes().size()); // Add barrier node queue.getGpgpuCommandStreamReceiver().requestStallingCommandsOnNextFlush(); queue.setupBarrierTimestampForBcsEngines(aub_stream::EngineType::ENGINE_RCS, dependencies); EXPECT_EQ(1u, dependencies.barrierNodes.peekNodes().size()); auto node1 = dependencies.barrierNodes.peekNodes()[0]; // Do not add new node, if it exists queue.setupBarrierTimestampForBcsEngines(aub_stream::EngineType::ENGINE_RCS, dependencies); EXPECT_EQ(1u, dependencies.barrierNodes.peekNodes().size()); auto node2 = dependencies.barrierNodes.peekNodes()[0]; EXPECT_EQ(node2, node1); for (auto &containers : queue.bcsTimestampPacketContainers) { EXPECT_TRUE(containers.lastBarrierToWaitFor.peekNodes().empty()); } } TEST_F(CommandQueueWithTimestampPacketTests, givenOutOfOrderQueueWhenSetupBarrierTimestampForBcsEnginesCalledOnBcsEngineThenEnsureBarrierNodeIsPresentAndSaveItForOtherBcses) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0}; MockContext context{}; MockCommandQueue queue{&context, context.getDevice(0), props, false}; TimestampPacketDependencies dependencies{}; queue.getGpgpuCommandStreamReceiver().requestStallingCommandsOnNextFlush(); for (auto &containers : queue.bcsTimestampPacketContainers) { EXPECT_TRUE(containers.lastBarrierToWaitFor.peekNodes().empty()); } queue.setupBarrierTimestampForBcsEngines(aub_stream::EngineType::ENGINE_BCS, dependencies); EXPECT_EQ(1u, dependencies.barrierNodes.peekNodes().size()); auto barrierNode = dependencies.barrierNodes.peekNodes()[0]; for (auto currentBcsIndex = 0u; currentBcsIndex < queue.bcsTimestampPacketContainers.size(); currentBcsIndex++) { auto &containers = queue.bcsTimestampPacketContainers[currentBcsIndex]; if (currentBcsIndex == 0) { EXPECT_EQ(0u, containers.lastBarrierToWaitFor.peekNodes().size()); } else { EXPECT_EQ(1u, containers.lastBarrierToWaitFor.peekNodes().size()); EXPECT_EQ(barrierNode, containers.lastBarrierToWaitFor.peekNodes()[0]); } } EXPECT_EQ(queue.bcsTimestampPacketContainers.size(), barrierNode->refCountFetchSub(0)); } TEST_F(CommandQueueWithTimestampPacketTests, givenOutOfOrderQueueWhenSetupBarrierTimestampForBcsEnginesAndOverwritePreviousOneThenEnsureBarrierNodeHasDataAssigned) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0}; MockContext context{}; MockCommandQueue queue{&context, context.getDevice(0), props, false}; TimestampPacketDependencies dependencies{}; queue.getGpgpuCommandStreamReceiver().requestStallingCommandsOnNextFlush(); for (auto &containers : queue.bcsTimestampPacketContainers) { EXPECT_TRUE(containers.lastBarrierToWaitFor.peekNodes().empty()); } queue.setupBarrierTimestampForBcsEngines(aub_stream::EngineType::ENGINE_BCS, dependencies); EXPECT_EQ(1u, dependencies.barrierNodes.peekNodes().size()); auto barrierNode = dependencies.barrierNodes.peekNodes()[0]; EXPECT_EQ(1u, barrierNode->getContextEndValue(0u)); dependencies.moveNodesToNewContainer(*queue.getDeferredTimestampPackets()); queue.getGpgpuCommandStreamReceiver().requestStallingCommandsOnNextFlush(); barrierNode->incRefCount(); barrierNode->incRefCount(); barrierNode->incRefCount(); barrierNode->incRefCount(); queue.setupBarrierTimestampForBcsEngines(aub_stream::EngineType::ENGINE_BCS, dependencies); EXPECT_EQ(1u, barrierNode->getContextEndValue(0u)); EXPECT_EQ(1u, dependencies.barrierNodes.peekNodes().size()); barrierNode->refCountFetchSub(4u); barrierNode = dependencies.barrierNodes.peekNodes()[0]; EXPECT_EQ(1u, barrierNode->getContextEndValue(0u)); dependencies.moveNodesToNewContainer(*queue.getDeferredTimestampPackets()); queue.getGpgpuCommandStreamReceiver().requestStallingCommandsOnNextFlush(); queue.setupBarrierTimestampForBcsEngines(aub_stream::EngineType::ENGINE_BCS, dependencies); EXPECT_NE(1u, barrierNode->getContextEndValue(0u)); EXPECT_EQ(1u, dependencies.barrierNodes.peekNodes().size()); barrierNode = dependencies.barrierNodes.peekNodes()[0]; EXPECT_EQ(1u, barrierNode->getContextEndValue(0u)); } TEST_F(CommandQueueWithTimestampPacketTests, givenOutOfOrderQueueWhenSetupBarrierTimestampForBcsEnginesCalledOnNonBcsEngineThenEnsureBarrierNodeIsPresentAndSaveItForBcses) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0}; MockContext context{}; MockCommandQueue queue{&context, context.getDevice(0), props, false}; TimestampPacketDependencies dependencies{}; queue.getGpgpuCommandStreamReceiver().requestStallingCommandsOnNextFlush(); for (auto &containers : queue.bcsTimestampPacketContainers) { EXPECT_TRUE(containers.lastBarrierToWaitFor.peekNodes().empty()); } for (auto engineType : {aub_stream::EngineType::ENGINE_RCS, aub_stream::EngineType::ENGINE_CCS}) { queue.setupBarrierTimestampForBcsEngines(engineType, dependencies); EXPECT_EQ(1u, dependencies.barrierNodes.peekNodes().size()); auto barrierNode = dependencies.barrierNodes.peekNodes()[0]; for (auto &containers : queue.bcsTimestampPacketContainers) { EXPECT_EQ(1u, containers.lastBarrierToWaitFor.peekNodes().size()); EXPECT_EQ(barrierNode, containers.lastBarrierToWaitFor.peekNodes()[0]); } EXPECT_EQ(1u + queue.bcsTimestampPacketContainers.size(), barrierNode->refCountFetchSub(0)); } } TEST_F(CommandQueueWithTimestampPacketTests, givenSavedBarrierWhenProcessBarrierTimestampForBcsEngineCalledThenMoveSaveBarrierPacketToBarrierNodes) { MockContext context{}; MockCommandQueue queue{context}; TimestampPacketDependencies dependencies{}; // No saved barriers queue.processBarrierTimestampForBcsEngine(aub_stream::EngineType::ENGINE_BCS, dependencies); EXPECT_TRUE(dependencies.barrierNodes.peekNodes().empty()); // Save barrier TagNodeBase *node = queue.getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag(); queue.bcsTimestampPacketContainers[0].lastBarrierToWaitFor.add(node); queue.processBarrierTimestampForBcsEngine(aub_stream::EngineType::ENGINE_BCS, dependencies); EXPECT_EQ(1u, dependencies.barrierNodes.peekNodes().size()); EXPECT_EQ(node, dependencies.barrierNodes.peekNodes()[0]); EXPECT_TRUE(queue.bcsTimestampPacketContainers[0].lastBarrierToWaitFor.peekNodes().empty()); } TEST_F(CommandQueueWithTimestampPacketTests, givenOutOfOrderQueueWhenBarrierTimestampAreSetupOnComputeEngineAndProcessedOnBcsThenPacketIsInBarrierNodes) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0}; MockContext context{}; MockCommandQueue queue{&context, context.getDevice(0), props, false}; queue.getGpgpuCommandStreamReceiver().requestStallingCommandsOnNextFlush(); for (auto engineType : {aub_stream::EngineType::ENGINE_RCS, aub_stream::EngineType::ENGINE_CCS}) { TimestampPacketDependencies dependencies{}; queue.setupBarrierTimestampForBcsEngines(engineType, dependencies); TimestampPacketDependencies blitDependencies{}; queue.processBarrierTimestampForBcsEngine(aub_stream::EngineType::ENGINE_BCS, blitDependencies); EXPECT_EQ(1u, blitDependencies.barrierNodes.peekNodes().size()); } } TEST_F(CommandQueueWithTimestampPacketTests, givenOutOfOrderQueueWhenBarrierTimestampAreSetupOnBcsEngineAndProcessedOnBcsThenPacketIsInBarrierNodes) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0}; MockContext context{}; MockCommandQueue queue{&context, context.getDevice(0), props, false}; queue.getGpgpuCommandStreamReceiver().requestStallingCommandsOnNextFlush(); TimestampPacketDependencies dependencies{}; queue.setupBarrierTimestampForBcsEngines(aub_stream::EngineType::ENGINE_BCS, dependencies); queue.processBarrierTimestampForBcsEngine(aub_stream::EngineType::ENGINE_BCS, dependencies); EXPECT_EQ(1u, dependencies.barrierNodes.peekNodes().size()); } TEST_F(CommandQueueWithTimestampPacketTests, givenInOrderQueueWhenSettingLastBcsPacketThenDoNotSaveThePacket) { MockContext context{}; MockCommandQueue queue{context}; queue.setLastBcsPacket(aub_stream::EngineType::ENGINE_BCS); EXPECT_TRUE(queue.bcsTimestampPacketContainers[0].lastSignalledPacket.peekNodes().empty()); } TEST_F(CommandQueueWithTimestampPacketTests, givenOutOfOrderQueueWhenSettingLastBcsPacketThenSaveOnlyOneLastPacket) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0}; MockContext context{}; MockCommandQueue queue{&context, context.getDevice(0), props, false}; queue.timestampPacketContainer->add(queue.getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag()); queue.setLastBcsPacket(aub_stream::EngineType::ENGINE_BCS); EXPECT_EQ(queue.timestampPacketContainer->peekNodes(), queue.bcsTimestampPacketContainers[0].lastSignalledPacket.peekNodes()); EXPECT_EQ(1u, queue.timestampPacketContainer->peekNodes().size()); queue.timestampPacketContainer->moveNodesToNewContainer(*queue.getDeferredTimestampPackets()); queue.timestampPacketContainer->add(queue.getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag()); queue.setLastBcsPacket(aub_stream::EngineType::ENGINE_BCS); EXPECT_EQ(queue.timestampPacketContainer->peekNodes(), queue.bcsTimestampPacketContainers[0].lastSignalledPacket.peekNodes()); EXPECT_EQ(1u, queue.timestampPacketContainer->peekNodes().size()); } TEST_F(CommandQueueWithTimestampPacketTests, givenLastSignalledPacketWhenFillingCsrDependenciesThenMovePacketToCsrDependencies) { MockContext context{}; MockCommandQueue queue{context}; queue.bcsTimestampPacketContainers[0].lastSignalledPacket.add(queue.getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag()); CsrDependencies csrDeps; queue.fillCsrDependenciesWithLastBcsPackets(csrDeps); EXPECT_EQ(1u, queue.bcsTimestampPacketContainers[0].lastSignalledPacket.peekNodes().size()); EXPECT_EQ(&queue.bcsTimestampPacketContainers[0].lastSignalledPacket, csrDeps.timestampPacketContainer[0]); } TEST_F(CommandQueueWithTimestampPacketTests, givenLastSignalledPacketWhenClearingPacketsThenClearThePacket) { MockContext context{}; MockCommandQueue queue{context}; queue.bcsTimestampPacketContainers[0].lastSignalledPacket.add(queue.getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag()); queue.clearLastBcsPackets(); EXPECT_EQ(0u, queue.bcsTimestampPacketContainers[0].lastBarrierToWaitFor.peekNodes().size()); } TEST_F(CommandQueueWithTimestampPacketTests, givenQueueWhenSettingAndQueryingLastBcsPacketThenReturnCorrectResults) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0}; MockContext context{}; MockCommandQueue queue{&context, context.getDevice(0), props, false}; queue.timestampPacketContainer->add(queue.getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag()); queue.setLastBcsPacket(aub_stream::EngineType::ENGINE_BCS); CsrDependencies csrDeps; queue.fillCsrDependenciesWithLastBcsPackets(csrDeps); EXPECT_FALSE(csrDeps.timestampPacketContainer.empty()); queue.clearLastBcsPackets(); for (auto &containers : queue.bcsTimestampPacketContainers) { EXPECT_TRUE(containers.lastSignalledPacket.peekNodes().empty()); } } using KernelExecutionTypesTests = DispatchFlagsTests; HWTEST_F(KernelExecutionTypesTests, givenConcurrentKernelWhileDoingNonBlockedEnqueueThenCorrectKernelTypeIsSetInCSR) { using CsrType = MockCsrHw2; SetUpImpl(); auto mockCmdQ = std::make_unique>(context.get(), device.get(), nullptr); MockKernelWithInternals mockKernelWithInternals(*device.get()); auto pKernel = mockKernelWithInternals.mockKernel; pKernel->setKernelExecutionType(CL_KERNEL_EXEC_INFO_CONCURRENT_TYPE_INTEL); size_t gws[3] = {63, 0, 0}; mockCmdQ->enqueueKernel(pKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); auto &mockCsr = device->getUltCommandStreamReceiver(); EXPECT_EQ(mockCsr.lastKernelExecutionType, KernelExecutionType::Concurrent); } HWTEST_F(KernelExecutionTypesTests, givenKernelWithDifferentExecutionTypeWhileDoingNonBlockedEnqueueThenKernelTypeInCSRIsChanging) { using CsrType = MockCsrHw2; SetUpImpl(); auto mockCmdQ = std::make_unique>(context.get(), device.get(), nullptr); MockKernelWithInternals mockKernelWithInternals(*device.get()); auto pKernel = mockKernelWithInternals.mockKernel; size_t gws[3] = {63, 0, 0}; auto &mockCsr = device->getUltCommandStreamReceiver(); pKernel->setKernelExecutionType(CL_KERNEL_EXEC_INFO_CONCURRENT_TYPE_INTEL); mockCmdQ->enqueueKernel(pKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(mockCsr.lastKernelExecutionType, KernelExecutionType::Concurrent); mockCmdQ->enqueueMarkerWithWaitList(0, nullptr, nullptr); EXPECT_EQ(mockCsr.lastKernelExecutionType, KernelExecutionType::Concurrent); pKernel->setKernelExecutionType(CL_KERNEL_EXEC_INFO_DEFAULT_TYPE_INTEL); mockCmdQ->enqueueKernel(pKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(mockCsr.lastKernelExecutionType, KernelExecutionType::Default); } HWTEST_F(KernelExecutionTypesTests, givenConcurrentKernelWhileDoingBlockedEnqueueThenCorrectKernelTypeIsSetInCSR) { using CsrType = MockCsrHw2; SetUpImpl(); auto mockCmdQ = std::make_unique>(context.get(), device.get(), nullptr); MockKernelWithInternals mockKernelWithInternals(*device.get()); auto pKernel = mockKernelWithInternals.mockKernel; pKernel->setKernelExecutionType(CL_KERNEL_EXEC_INFO_CONCURRENT_TYPE_INTEL); UserEvent userEvent; cl_event waitlist[] = {&userEvent}; size_t gws[3] = {63, 0, 0}; mockCmdQ->enqueueKernel(pKernel, 1, nullptr, gws, nullptr, 1, waitlist, nullptr); userEvent.setStatus(CL_COMPLETE); auto &mockCsr = device->getUltCommandStreamReceiver(); EXPECT_EQ(mockCsr.lastKernelExecutionType, KernelExecutionType::Concurrent); mockCmdQ->isQueueBlocked(); } struct CommandQueueOnSpecificEngineTests : ::testing::Test { static void fillProperties(cl_queue_properties *properties, cl_uint queueFamily, cl_uint queueIndex) { properties[0] = CL_QUEUE_FAMILY_INTEL; properties[1] = queueFamily; properties[2] = CL_QUEUE_INDEX_INTEL; properties[3] = queueIndex; properties[4] = 0; } template class MockHwHelper : public HwHelperHw { public: const EngineInstancesContainer getGpgpuEngineInstances(const HardwareInfo &hwInfo) const override { EngineInstancesContainer result{}; for (int i = 0; i < rcsCount; i++) { result.push_back({aub_stream::ENGINE_RCS, EngineUsage::Regular}); } for (int i = 0; i < ccsCount; i++) { result.push_back({aub_stream::ENGINE_CCS, EngineUsage::Regular}); } for (int i = 0; i < bcsCount; i++) { result.push_back({aub_stream::ENGINE_BCS, EngineUsage::Regular}); } return result; } EngineGroupType getEngineGroupType(aub_stream::EngineType engineType, EngineUsage engineUsage, const HardwareInfo &hwInfo) const override { switch (engineType) { case aub_stream::ENGINE_RCS: return EngineGroupType::RenderCompute; case aub_stream::ENGINE_CCS: case aub_stream::ENGINE_CCS1: case aub_stream::ENGINE_CCS2: case aub_stream::ENGINE_CCS3: return EngineGroupType::Compute; case aub_stream::ENGINE_BCS: return EngineGroupType::Copy; default: UNRECOVERABLE_IF(true); } } }; template auto overrideHwHelper() { return RAIIHwHelperFactory{::defaultHwInfo->platform.eRenderCoreFamily}; } }; HWTEST_F(CommandQueueOnSpecificEngineTests, givenMultipleFamiliesWhenCreatingQueueOnSpecificEngineThenUseCorrectEngine) { auto raiiHwHelper = overrideHwHelper>(); HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.blitterOperationsSupported = true; MockDevice *device = MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0); MockClDevice clDevice{device}; MockContext context{&clDevice}; cl_command_queue_properties properties[5] = {}; fillProperties(properties, 0, 0); EngineControl &engineCcs = context.getDevice(0)->getEngine(aub_stream::ENGINE_CCS, EngineUsage::Regular); MockCommandQueue queueRcs(&context, context.getDevice(0), properties, false); EXPECT_EQ(&engineCcs, &queueRcs.getGpgpuEngine()); EXPECT_FALSE(queueRcs.isCopyOnly); EXPECT_TRUE(queueRcs.isQueueFamilySelected()); EXPECT_EQ(properties[1], queueRcs.getQueueFamilyIndex()); EXPECT_EQ(properties[3], queueRcs.getQueueIndexWithinFamily()); fillProperties(properties, 1, 0); EngineControl &engineBcs = context.getDevice(0)->getEngine(aub_stream::ENGINE_BCS, EngineUsage::Regular); MockCommandQueue queueBcs(&context, context.getDevice(0), properties, false); EXPECT_EQ(engineBcs.commandStreamReceiver, queueBcs.getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS)); EXPECT_TRUE(queueBcs.isCopyOnly); EXPECT_TRUE(queueBcs.isQueueFamilySelected()); EXPECT_EQ(properties[1], queueBcs.getQueueFamilyIndex()); EXPECT_EQ(properties[3], queueBcs.getQueueIndexWithinFamily()); EXPECT_NE(nullptr, queueBcs.getTimestampPacketContainer()); } HWTEST_F(CommandQueueOnSpecificEngineTests, givenRootDeviceAndMultipleFamiliesWhenCreatingQueueOnSpecificEngineThenUseDefaultEngine) { VariableBackup backupHwInfo(defaultHwInfo.get()); defaultHwInfo->capabilityTable.blitterOperationsSupported = true; auto raiiHwHelper = overrideHwHelper>(); UltClDeviceFactory deviceFactory{1, 2}; MockContext context{deviceFactory.rootDevices[0]}; cl_command_queue_properties properties[5] = {}; fillProperties(properties, 0, 0); EngineControl &defaultEngine = context.getDevice(0)->getDefaultEngine(); MockCommandQueue defaultQueue(&context, context.getDevice(0), properties, false); EXPECT_EQ(&defaultEngine, &defaultQueue.getGpgpuEngine()); EXPECT_FALSE(defaultQueue.isCopyOnly); EXPECT_TRUE(defaultQueue.isQueueFamilySelected()); EXPECT_EQ(properties[1], defaultQueue.getQueueFamilyIndex()); EXPECT_EQ(properties[3], defaultQueue.getQueueIndexWithinFamily()); } HWTEST_F(CommandQueueOnSpecificEngineTests, givenSubDeviceAndMultipleFamiliesWhenCreatingQueueOnSpecificEngineThenUseDefaultEngine) { auto raiiHwHelper = overrideHwHelper>(); VariableBackup backupHwInfo(defaultHwInfo.get()); defaultHwInfo->capabilityTable.blitterOperationsSupported = true; UltClDeviceFactory deviceFactory{1, 2}; MockContext context{deviceFactory.subDevices[0]}; cl_command_queue_properties properties[5] = {}; fillProperties(properties, 0, 0); EngineControl &engineCcs = context.getDevice(0)->getEngine(aub_stream::ENGINE_CCS, EngineUsage::Regular); MockCommandQueue queueRcs(&context, context.getDevice(0), properties, false); EXPECT_EQ(&engineCcs, &queueRcs.getGpgpuEngine()); EXPECT_FALSE(queueRcs.isCopyOnly); EXPECT_TRUE(queueRcs.isQueueFamilySelected()); EXPECT_EQ(properties[1], queueRcs.getQueueFamilyIndex()); EXPECT_EQ(properties[3], queueRcs.getQueueIndexWithinFamily()); fillProperties(properties, 1, 0); EngineControl &engineBcs = context.getDevice(0)->getEngine(aub_stream::ENGINE_BCS, EngineUsage::Regular); MockCommandQueue queueBcs(&context, context.getDevice(0), properties, false); EXPECT_EQ(engineBcs.commandStreamReceiver, queueBcs.getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS)); EXPECT_TRUE(queueBcs.isCopyOnly); EXPECT_NE(nullptr, queueBcs.getTimestampPacketContainer()); EXPECT_TRUE(queueBcs.isQueueFamilySelected()); EXPECT_EQ(properties[1], queueBcs.getQueueFamilyIndex()); EXPECT_EQ(properties[3], queueBcs.getQueueIndexWithinFamily()); } HWTEST_F(CommandQueueOnSpecificEngineTests, givenBcsFamilySelectedWhenCreatingQueueOnSpecificEngineThenInitializeBcsProperly) { auto raiiHwHelper = overrideHwHelper>(); VariableBackup backupHwInfo(defaultHwInfo.get()); defaultHwInfo->capabilityTable.blitterOperationsSupported = true; MockContext context{}; cl_command_queue_properties properties[5] = {}; fillProperties(properties, 0, 0); EngineControl &engineBcs = context.getDevice(0)->getEngine(aub_stream::ENGINE_BCS, EngineUsage::Regular); MockCommandQueue queueBcs(&context, context.getDevice(0), properties, false); EXPECT_EQ(engineBcs.commandStreamReceiver, queueBcs.getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS)); EXPECT_TRUE(queueBcs.isCopyOnly); EXPECT_NE(nullptr, queueBcs.getTimestampPacketContainer()); EXPECT_TRUE(queueBcs.isQueueFamilySelected()); EXPECT_EQ(properties[1], queueBcs.getQueueFamilyIndex()); EXPECT_EQ(properties[3], queueBcs.getQueueIndexWithinFamily()); } HWTEST_F(CommandQueueOnSpecificEngineTests, givenNotInitializedRcsOsContextWhenCreatingQueueThenInitializeOsContext) { VariableBackup backupHwInfo(defaultHwInfo.get()); defaultHwInfo->capabilityTable.blitterOperationsSupported = true; DebugManagerStateRestore restore{}; DebugManager.flags.NodeOrdinal.set(static_cast(aub_stream::EngineType::ENGINE_RCS)); DebugManager.flags.DeferOsContextInitialization.set(1); auto raiiHwHelper = overrideHwHelper>(); MockContext context{}; cl_command_queue_properties properties[5] = {}; OsContext &osContext = *context.getDevice(0)->getEngine(aub_stream::ENGINE_CCS, EngineUsage::Regular).osContext; EXPECT_FALSE(osContext.isInitialized()); const auto ccsFamilyIndex = static_cast(context.getDevice(0)->getDevice().getEngineGroupIndexFromEngineGroupType(EngineGroupType::Compute)); fillProperties(properties, ccsFamilyIndex, 0); MockCommandQueueHw queue(&context, context.getDevice(0), properties); ASSERT_EQ(&osContext, queue.gpgpuEngine->osContext); EXPECT_TRUE(osContext.isInitialized()); } HWTEST_F(CommandQueueOnSpecificEngineTests, givenNotInitializedCcsOsContextWhenCreatingQueueThenInitializeOsContext) { VariableBackup backupHwInfo(defaultHwInfo.get()); defaultHwInfo->capabilityTable.blitterOperationsSupported = true; DebugManagerStateRestore restore{}; DebugManager.flags.NodeOrdinal.set(static_cast(aub_stream::EngineType::ENGINE_CCS)); DebugManager.flags.DeferOsContextInitialization.set(1); auto raiiHwHelper = overrideHwHelper>(); MockContext context{}; cl_command_queue_properties properties[5] = {}; OsContext &osContext = *context.getDevice(0)->getEngine(aub_stream::ENGINE_RCS, EngineUsage::Regular).osContext; EXPECT_FALSE(osContext.isInitialized()); const auto rcsFamilyIndex = static_cast(context.getDevice(0)->getDevice().getEngineGroupIndexFromEngineGroupType(EngineGroupType::RenderCompute)); fillProperties(properties, rcsFamilyIndex, 0); MockCommandQueueHw queue(&context, context.getDevice(0), properties); ASSERT_EQ(&osContext, queue.gpgpuEngine->osContext); EXPECT_TRUE(osContext.isInitialized()); } TEST_F(MultiTileFixture, givenSubDeviceWhenQueueIsCreatedThenItContainsProperDevice) { VariableBackup backupHwInfo(defaultHwInfo.get()); defaultHwInfo->capabilityTable.blitterOperationsSupported = true; auto tile0 = platform()->getClDevice(0)->getSubDevice(0); const cl_device_id deviceId = tile0; auto returnStatus = CL_SUCCESS; auto context = clCreateContext(nullptr, 1, &deviceId, nullptr, nullptr, &returnStatus); EXPECT_EQ(CL_SUCCESS, returnStatus); EXPECT_NE(nullptr, context); auto commandQueue = clCreateCommandQueueWithProperties(context, tile0, nullptr, &returnStatus); EXPECT_EQ(CL_SUCCESS, returnStatus); EXPECT_NE(nullptr, commandQueue); auto neoQueue = castToObject(commandQueue); EXPECT_EQ(&tile0->getDevice(), &neoQueue->getDevice()); clReleaseCommandQueue(commandQueue); clReleaseContext(context); } TEST_F(MultiTileFixture, givenTile1WhenQueueIsCreatedThenItContainsTile1Device) { auto tile1 = platform()->getClDevice(0)->getSubDevice(1); const cl_device_id deviceId = tile1; auto returnStatus = CL_SUCCESS; auto context = clCreateContext(nullptr, 1, &deviceId, nullptr, nullptr, &returnStatus); EXPECT_EQ(CL_SUCCESS, returnStatus); EXPECT_NE(nullptr, context); auto commandQueue = clCreateCommandQueueWithProperties(context, tile1, nullptr, &returnStatus); EXPECT_EQ(CL_SUCCESS, returnStatus); EXPECT_NE(nullptr, commandQueue); auto neoQueue = castToObject(commandQueue); EXPECT_EQ(&tile1->getDevice(), &neoQueue->getDevice()); clReleaseCommandQueue(commandQueue); clReleaseContext(context); } struct CopyOnlyQueueTests : ::testing::Test { void SetUp() override { typeUsageRcs.first = EngineHelpers::remapEngineTypeToHwSpecific(typeUsageRcs.first, *defaultHwInfo); auto device = MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()); auto copyEngineGroup = std::find_if(device->regularEngineGroups.begin(), device->regularEngineGroups.end(), [](const auto &engineGroup) { return engineGroup.engineGroupType == EngineGroupType::Copy; }); if (copyEngineGroup == device->regularEngineGroups.end()) { GTEST_SKIP(); } device->regularEngineGroups.clear(); device->allEngines.clear(); device->createEngine(0, typeUsageRcs); device->createEngine(1, typeUsageBcs); bcsEngine = &device->getAllEngines().back(); clDevice = std::make_unique(device); context = std::make_unique(clDevice.get()); properties[1] = device->getEngineGroupIndexFromEngineGroupType(EngineGroupType::Copy); } EngineTypeUsage typeUsageBcs = EngineTypeUsage{aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular}; EngineTypeUsage typeUsageRcs = EngineTypeUsage{aub_stream::EngineType::ENGINE_RCS, EngineUsage::Regular}; std::unique_ptr clDevice{}; std::unique_ptr context{}; std::unique_ptr queue{}; const EngineControl *bcsEngine = nullptr; cl_queue_properties properties[5] = {CL_QUEUE_FAMILY_INTEL, 0, CL_QUEUE_INDEX_INTEL, 0, 0}; }; TEST_F(CopyOnlyQueueTests, givenBcsSelectedWhenCreatingCommandQueueThenItIsCopyOnly) { MockCommandQueue queue{context.get(), clDevice.get(), properties, false}; EXPECT_EQ(bcsEngine->commandStreamReceiver, queue.getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS)); EXPECT_EQ(1u, queue.countBcsEngines()); EXPECT_NE(nullptr, queue.timestampPacketContainer); EXPECT_TRUE(queue.isCopyOnly); } HWTEST_F(CopyOnlyQueueTests, givenBcsSelectedWhenEnqueuingCopyThenBcsIsUsed) { auto srcBuffer = std::unique_ptr{BufferHelper<>::create(context.get())}; auto dstBuffer = std::unique_ptr{BufferHelper<>::create(context.get())}; MockCommandQueueHw queue{context.get(), clDevice.get(), properties}; auto commandStream = &bcsEngine->commandStreamReceiver->getCS(1024); auto usedCommandStream = commandStream->getUsed(); cl_int retVal = queue.enqueueCopyBuffer( srcBuffer.get(), dstBuffer.get(), 0, 0, 1, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(usedCommandStream, commandStream->getUsed()); } HWTEST_F(CopyOnlyQueueTests, givenBlitterEnabledWhenCreatingBcsCommandQueueThenReturnSuccess) { DebugManagerStateRestore restore{}; DebugManager.flags.EnableBlitterOperationsSupport.set(1); cl_int retVal{}; auto commandQueue = clCreateCommandQueueWithProperties(context.get(), clDevice.get(), properties, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, commandQueue); EXPECT_EQ(CL_SUCCESS, clReleaseCommandQueue(commandQueue)); } using MultiEngineQueueHwTests = ::testing::Test; HWCMDTEST_F(IGFX_XE_HP_CORE, MultiEngineQueueHwTests, givenQueueFamilyPropertyWhenQueueIsCreatedThenSelectValidEngine) { initPlatform(); HardwareInfo localHwInfo = *defaultHwInfo; localHwInfo.featureTable.flags.ftrCCSNode = true; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&localHwInfo)); MockContext context(device.get()); context.contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE; bool ccsFound = false; for (auto &engine : device->allEngines) { if (engine.osContext->getEngineType() == aub_stream::EngineType::ENGINE_CCS) { ccsFound = true; break; } } struct CommandQueueTestValues { CommandQueueTestValues() = delete; CommandQueueTestValues(cl_queue_properties engineFamily, cl_queue_properties engineIndex, aub_stream::EngineType expectedEngine) : expectedEngine(expectedEngine) { properties[1] = engineFamily; properties[3] = engineIndex; }; cl_command_queue clCommandQueue = nullptr; CommandQueue *commandQueueObj = nullptr; cl_queue_properties properties[5] = {CL_QUEUE_FAMILY_INTEL, 0, CL_QUEUE_INDEX_INTEL, 0, 0}; aub_stream::EngineType expectedEngine; }; auto addTestValueIfAvailable = [&](std::vector &vec, EngineGroupType engineGroup, cl_queue_properties queueIndex, aub_stream::EngineType engineType, bool csEnabled) { if (csEnabled) { const auto familyIndex = device->getDevice().getEngineGroupIndexFromEngineGroupType(engineGroup); vec.push_back(CommandQueueTestValues(static_cast(familyIndex), queueIndex, engineType)); } }; auto retVal = CL_SUCCESS; const auto &ccsInstances = localHwInfo.gtSystemInfo.CCSInfo.Instances.Bits; std::vector commandQueueTestValues; addTestValueIfAvailable(commandQueueTestValues, EngineGroupType::RenderCompute, 0, EngineHelpers::remapEngineTypeToHwSpecific(aub_stream::EngineType::ENGINE_RCS, device->getHardwareInfo()), true); addTestValueIfAvailable(commandQueueTestValues, EngineGroupType::Compute, 0, aub_stream::ENGINE_CCS, ccsFound); addTestValueIfAvailable(commandQueueTestValues, EngineGroupType::Compute, 1, aub_stream::ENGINE_CCS1, ccsInstances.CCS1Enabled); addTestValueIfAvailable(commandQueueTestValues, EngineGroupType::Compute, 2, aub_stream::ENGINE_CCS2, ccsInstances.CCS2Enabled); addTestValueIfAvailable(commandQueueTestValues, EngineGroupType::Compute, 3, aub_stream::ENGINE_CCS3, ccsInstances.CCS3Enabled); for (auto &commandQueueTestValue : commandQueueTestValues) { if (commandQueueTestValue.properties[1] >= device->getHardwareInfo().gtSystemInfo.CCSInfo.NumberOfCCSEnabled) { continue; } commandQueueTestValue.clCommandQueue = clCreateCommandQueueWithProperties(&context, device.get(), &commandQueueTestValue.properties[0], &retVal); EXPECT_EQ(CL_SUCCESS, retVal); commandQueueTestValue.commandQueueObj = castToObject(commandQueueTestValue.clCommandQueue); auto &cmdQueueEngine = commandQueueTestValue.commandQueueObj->getGpgpuCommandStreamReceiver().getOsContext().getEngineType(); EXPECT_EQ(commandQueueTestValue.expectedEngine, cmdQueueEngine); clReleaseCommandQueue(commandQueueTestValue.commandQueueObj); } } TEST_F(MultiTileFixture, givenDefaultContextWithRootDeviceWhenQueueIsCreatedThenQueueIsMultiEngine) { auto rootDevice = platform()->getClDevice(0); MockContext context(rootDevice); context.contextType = ContextType::CONTEXT_TYPE_DEFAULT; auto rootCsr = rootDevice->getDefaultEngine().commandStreamReceiver; MockCommandQueue queue(&context, rootDevice, nullptr, false); ASSERT_NE(nullptr, queue.gpgpuEngine); EXPECT_EQ(rootCsr->isMultiOsContextCapable(), queue.getGpgpuCommandStreamReceiver().isMultiOsContextCapable()); EXPECT_EQ(rootCsr, queue.gpgpuEngine->commandStreamReceiver); } TEST_F(MultiTileFixture, givenDefaultContextWithSubdeviceWhenQueueIsCreatedThenQueueIsNotMultiEngine) { auto subdevice = platform()->getClDevice(0)->getSubDevice(0); MockContext context(subdevice); context.contextType = ContextType::CONTEXT_TYPE_DEFAULT; MockCommandQueue queue(&context, subdevice, nullptr, false); ASSERT_NE(nullptr, queue.gpgpuEngine); EXPECT_FALSE(queue.getGpgpuCommandStreamReceiver().isMultiOsContextCapable()); } TEST_F(MultiTileFixture, givenUnrestrictiveContextWithRootDeviceWhenQueueIsCreatedThenQueueIsMultiEngine) { auto rootDevice = platform()->getClDevice(0); MockContext context(rootDevice); context.contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE; auto rootCsr = rootDevice->getDefaultEngine().commandStreamReceiver; MockCommandQueue queue(&context, rootDevice, nullptr, false); ASSERT_NE(nullptr, queue.gpgpuEngine); EXPECT_EQ(rootCsr->isMultiOsContextCapable(), queue.getGpgpuCommandStreamReceiver().isMultiOsContextCapable()); EXPECT_EQ(rootCsr, queue.gpgpuEngine->commandStreamReceiver); } TEST_F(MultiTileFixture, givenNotDefaultContextWithRootDeviceAndTileIdMaskWhenQueueIsCreatedThenQueueIsMultiEngine) { auto rootClDevice = platform()->getClDevice(0); auto rootDevice = static_cast(&rootClDevice->getDevice()); MockContext context(rootClDevice); context.contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE; auto rootCsr = rootDevice->getDefaultEngine().commandStreamReceiver; MockCommandQueue queue(&context, rootClDevice, nullptr, false); ASSERT_NE(nullptr, queue.gpgpuEngine); EXPECT_EQ(rootCsr->isMultiOsContextCapable(), queue.getGpgpuCommandStreamReceiver().isMultiOsContextCapable()); EXPECT_EQ(rootCsr, queue.gpgpuEngine->commandStreamReceiver); } command_queue_tests_pvc_and_later.cpp000066400000000000000000000635421422164147700340350ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/engine_node_helper.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" using namespace NEO; using CommandQueuePvcAndLaterTests = ::testing::Test; HWTEST2_F(CommandQueuePvcAndLaterTests, givenMultipleBcsEnginesWhenGetBcsCommandStreamReceiverIsCalledThenReturnProperCsrs, IsAtLeastXeHpcCore) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.featureTable.ftrBcsInfo = maxNBitValue(9); hwInfo.capabilityTable.blitterOperationsSupported = true; MockDevice *device = MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0); MockClDevice clDevice{device}; MockContext context{&clDevice}; MockCommandQueue queue{context}; queue.clearBcsEngines(); ASSERT_EQ(0u, queue.countBcsEngines()); queue.insertBcsEngine(aub_stream::EngineType::ENGINE_BCS); queue.insertBcsEngine(aub_stream::EngineType::ENGINE_BCS3); queue.insertBcsEngine(aub_stream::EngineType::ENGINE_BCS7); ASSERT_EQ(3u, queue.countBcsEngines()); EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS, queue.getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS)->getOsContext().getEngineType()); EXPECT_EQ(nullptr, queue.getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS1)); EXPECT_EQ(nullptr, queue.getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS2)); EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS3, queue.getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS3)->getOsContext().getEngineType()); EXPECT_EQ(nullptr, queue.getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS4)); EXPECT_EQ(nullptr, queue.getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS5)); EXPECT_EQ(nullptr, queue.getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS6)); EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS7, queue.getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS7)->getOsContext().getEngineType()); EXPECT_EQ(nullptr, queue.getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS8)); } HWTEST2_F(CommandQueuePvcAndLaterTests, givenAdditionalBcsWhenCreatingCommandQueueThenUseCorrectEngine, IsAtLeastXeHpcCore) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.featureTable.ftrBcsInfo = maxNBitValue(9); hwInfo.capabilityTable.blitterOperationsSupported = true; MockDevice *device = MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0); MockClDevice clDevice{device}; MockContext context{&clDevice}; const auto familyIndex = device->getEngineGroupIndexFromEngineGroupType(EngineGroupType::LinkedCopy); cl_command_queue_properties queueProperties[5] = { CL_QUEUE_FAMILY_INTEL, familyIndex, CL_QUEUE_INDEX_INTEL, 0, 0, }; queueProperties[3] = 0; auto queue = std::make_unique(&context, context.getDevice(0), queueProperties, false); EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS1, queue->bcsEngines[EngineHelpers::getBcsIndex(aub_stream::ENGINE_BCS1)]->getEngineType()); EXPECT_EQ(1u, queue->countBcsEngines()); queueProperties[3] = 4; queue = std::make_unique(&context, context.getDevice(0), queueProperties, false); EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS5, queue->bcsEngines[EngineHelpers::getBcsIndex(aub_stream::ENGINE_BCS5)]->getEngineType()); EXPECT_EQ(1u, queue->countBcsEngines()); queueProperties[3] = 7; queue = std::make_unique(&context, context.getDevice(0), queueProperties, false); EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS8, queue->bcsEngines[EngineHelpers::getBcsIndex(aub_stream::ENGINE_BCS8)]->getEngineType()); EXPECT_EQ(1u, queue->countBcsEngines()); } HWTEST2_F(CommandQueuePvcAndLaterTests, givenQueueWithMainBcsIsReleasedWhenNewQueueIsCreatedThenMainBcsCanBeUsedAgain, IsAtLeastXeHpcCore) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.featureTable.ftrBcsInfo = maxNBitValue(9); hwInfo.capabilityTable.blitterOperationsSupported = true; MockDevice *device = MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0); MockClDevice clDevice{device}; cl_device_id clDeviceId = static_cast(&clDevice); ClDeviceVector clDevices{&clDeviceId, 1u}; cl_int retVal{}; auto context = std::unique_ptr{Context::create(nullptr, clDevices, nullptr, nullptr, retVal)}; EXPECT_EQ(CL_SUCCESS, retVal); auto queue1 = std::make_unique(*context); auto queue2 = std::make_unique(*context); auto queue3 = std::make_unique(*context); auto queue4 = std::make_unique(*context); EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS, queue1->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS)->getOsContext().getEngineType()); EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS1, queue2->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS1)->getOsContext().getEngineType()); EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS2, queue3->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS2)->getOsContext().getEngineType()); EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS1, queue4->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS1)->getOsContext().getEngineType()); // Releasing main BCS. Next creation should be able to grab it queue1.reset(); queue1 = std::make_unique(*context); EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS, queue1->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS)->getOsContext().getEngineType()); // Releasing link BCS. Shouldn't change anything queue2.reset(); queue2 = std::make_unique(*context); EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS2, queue2->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS2)->getOsContext().getEngineType()); } HWTEST2_F(CommandQueuePvcAndLaterTests, givenCooperativeEngineUsageHintAndCcsWhenCreatingCommandQueueThenCreateQueueWithCooperativeEngine, IsAtLeastXeHpcCore) { DebugManagerStateRestore restore; DebugManager.flags.EngineUsageHint.set(static_cast(EngineUsage::Cooperative)); auto hwInfo = *defaultHwInfo; hwInfo.featureTable.flags.ftrCCSNode = true; hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 4; auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily); auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily); uint32_t revisions[] = {REVISION_A0, REVISION_B}; for (auto &revision : revisions) { auto hwRevId = hwInfoConfig.getHwRevIdFromStepping(revision, hwInfo); hwInfo.platform.usRevId = hwRevId; if (hwRevId == CommonConstants::invalidStepping || !hwHelper.isCooperativeEngineSupported(hwInfo)) { continue; } auto pDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); MockContext context(pDevice.get()); cl_queue_properties propertiesCooperativeQueue[] = {CL_QUEUE_FAMILY_INTEL, 0, CL_QUEUE_INDEX_INTEL, 0, 0}; propertiesCooperativeQueue[1] = pDevice->getDevice().getEngineGroupIndexFromEngineGroupType(EngineGroupType::Compute); for (size_t i = 0; i < 4; i++) { propertiesCooperativeQueue[3] = i; auto pCommandQueue = std::make_unique>(&context, pDevice.get(), propertiesCooperativeQueue); EXPECT_EQ(aub_stream::ENGINE_CCS + i, pCommandQueue->gpgpuEngine->osContext->getEngineType()); EXPECT_EQ(EngineUsage::Cooperative, pCommandQueue->gpgpuEngine->osContext->getEngineUsage()); } } } struct BcsCsrSelectionCommandQueueTests : ::testing::Test { void SetUp() override { HardwareInfo hwInfo = *::defaultHwInfo; hwInfo.capabilityTable.blitterOperationsSupported = true; hwInfo.featureTable.ftrBcsInfo = maxNBitValue(bcsInfoMaskSize); device = MockDevice::createWithNewExecutionEnvironment(&hwInfo); clDevice = std::make_unique(device); context = std::make_unique(clDevice.get()); } std::unique_ptr createQueueWithEngines(std::initializer_list engineTypes) { auto queue = createQueue(nullptr); queue->clearBcsEngines(); for (auto engineType : engineTypes) { queue->insertBcsEngine(engineType); } EXPECT_EQ(engineTypes.size(), queue->countBcsEngines()); return queue; } std::unique_ptr createQueueWithLinkBcsSelectedWithQueueFamilies(size_t linkBcsIndex) { cl_command_queue_properties queueProperties[5] = {}; queueProperties[0] = CL_QUEUE_FAMILY_INTEL; queueProperties[1] = device->getEngineGroupIndexFromEngineGroupType(EngineGroupType::LinkedCopy); queueProperties[2] = CL_QUEUE_INDEX_INTEL; queueProperties[3] = linkBcsIndex; auto queue = createQueue(queueProperties); EXPECT_EQ(1u, queue->countBcsEngines()); return queue; } std::unique_ptr createQueue(const cl_queue_properties *properties) { return std::make_unique(context.get(), clDevice.get(), properties, false); } MockDevice *device; std::unique_ptr clDevice; std::unique_ptr context; }; HWTEST2_F(BcsCsrSelectionCommandQueueTests, givenBcsSelectedWithQueueFamiliesWhenSelectingCsrThenSelectProperBcs, IsAtLeastXeHpcCore) { DebugManagerStateRestore restore{}; DebugManager.flags.EnableBlitterForEnqueueOperations.set(1); BuiltinOpParams builtinOpParams{}; MockGraphicsAllocation srcGraphicsAllocation{}; MockGraphicsAllocation dstGraphicsAllocation{}; MockBuffer srcMemObj{srcGraphicsAllocation}; MockBuffer dstMemObj{dstGraphicsAllocation}; builtinOpParams.srcMemObj = &srcMemObj; builtinOpParams.dstMemObj = &dstMemObj; constexpr auto linkBcsType = aub_stream::ENGINE_BCS6; constexpr auto linkBcsIndex = 5; auto queue = createQueueWithLinkBcsSelectedWithQueueFamilies(linkBcsIndex); { srcGraphicsAllocation.memoryPool = MemoryPool::System4KBPages; dstGraphicsAllocation.memoryPool = MemoryPool::System4KBPages; CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr}; CommandStreamReceiver &selectedCsr = queue->selectCsrForBuiltinOperation(args); EXPECT_EQ(queue->getBcsCommandStreamReceiver(linkBcsType), &selectedCsr); EXPECT_EQ(linkBcsType, selectedCsr.getOsContext().getEngineType()); } { srcGraphicsAllocation.memoryPool = MemoryPool::System4KBPages; dstGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr}; CommandStreamReceiver &selectedCsr = queue->selectCsrForBuiltinOperation(args); EXPECT_EQ(queue->getBcsCommandStreamReceiver(linkBcsType), &selectedCsr); EXPECT_EQ(linkBcsType, selectedCsr.getOsContext().getEngineType()); } { srcGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; dstGraphicsAllocation.memoryPool = MemoryPool::System4KBPages; CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr}; CommandStreamReceiver &selectedCsr = queue->selectCsrForBuiltinOperation(args); EXPECT_EQ(queue->getBcsCommandStreamReceiver(linkBcsType), &selectedCsr); EXPECT_EQ(linkBcsType, selectedCsr.getOsContext().getEngineType()); } { srcGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; dstGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr}; CommandStreamReceiver &selectedCsr = queue->selectCsrForBuiltinOperation(args); EXPECT_EQ(queue->getBcsCommandStreamReceiver(linkBcsType), &selectedCsr); EXPECT_EQ(linkBcsType, selectedCsr.getOsContext().getEngineType()); } } HWTEST2_F(BcsCsrSelectionCommandQueueTests, givenBcsSelectedWithForceBcsEngineIndexWhenSelectingCsrThenSelectProperBcs, IsAtLeastXeHpcCore) { DebugManagerStateRestore restore{}; DebugManager.flags.EnableBlitterForEnqueueOperations.set(1); BuiltinOpParams builtinOpParams{}; MockGraphicsAllocation srcGraphicsAllocation{}; MockGraphicsAllocation dstGraphicsAllocation{}; MockBuffer srcMemObj{srcGraphicsAllocation}; MockBuffer dstMemObj{dstGraphicsAllocation}; builtinOpParams.srcMemObj = &srcMemObj; builtinOpParams.dstMemObj = &dstMemObj; constexpr auto linkBcsType = aub_stream::ENGINE_BCS5; constexpr auto linkBcsIndex = 5; DebugManager.flags.ForceBcsEngineIndex.set(linkBcsIndex); auto queue = createQueue(nullptr); { srcGraphicsAllocation.memoryPool = MemoryPool::System4KBPages; dstGraphicsAllocation.memoryPool = MemoryPool::System4KBPages; CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr}; CommandStreamReceiver &selectedCsr = queue->selectCsrForBuiltinOperation(args); EXPECT_EQ(queue->getBcsCommandStreamReceiver(linkBcsType), &selectedCsr); EXPECT_EQ(linkBcsType, selectedCsr.getOsContext().getEngineType()); } { srcGraphicsAllocation.memoryPool = MemoryPool::System4KBPages; dstGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr}; CommandStreamReceiver &selectedCsr = queue->selectCsrForBuiltinOperation(args); EXPECT_EQ(queue->getBcsCommandStreamReceiver(linkBcsType), &selectedCsr); EXPECT_EQ(linkBcsType, selectedCsr.getOsContext().getEngineType()); } { srcGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; dstGraphicsAllocation.memoryPool = MemoryPool::System4KBPages; CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr}; CommandStreamReceiver &selectedCsr = queue->selectCsrForBuiltinOperation(args); EXPECT_EQ(queue->getBcsCommandStreamReceiver(linkBcsType), &selectedCsr); EXPECT_EQ(linkBcsType, selectedCsr.getOsContext().getEngineType()); } { srcGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; dstGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr}; CommandStreamReceiver &selectedCsr = queue->selectCsrForBuiltinOperation(args); EXPECT_EQ(&queue->getGpgpuCommandStreamReceiver(), &selectedCsr); } } HWTEST2_F(BcsCsrSelectionCommandQueueTests, givenBcsSelectedWithQueueFamiliesAndForceBcsIndexIsUsedWhenSelectingCsrThenUseBcsFromQueueFamilies, IsAtLeastXeHpcCore) { DebugManagerStateRestore restore{}; DebugManager.flags.EnableBlitterForEnqueueOperations.set(1); BuiltinOpParams builtinOpParams{}; MockGraphicsAllocation srcGraphicsAllocation{}; MockGraphicsAllocation dstGraphicsAllocation{}; MockBuffer srcMemObj{srcGraphicsAllocation}; MockBuffer dstMemObj{dstGraphicsAllocation}; builtinOpParams.srcMemObj = &srcMemObj; builtinOpParams.dstMemObj = &dstMemObj; constexpr auto linkBcsType = aub_stream::ENGINE_BCS6; constexpr auto linkBcsIndex = 5; DebugManager.flags.ForceBcsEngineIndex.set(2); // this should be ignored, because of queue families auto queue = createQueueWithLinkBcsSelectedWithQueueFamilies(linkBcsIndex); { srcGraphicsAllocation.memoryPool = MemoryPool::System4KBPages; dstGraphicsAllocation.memoryPool = MemoryPool::System4KBPages; CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr}; CommandStreamReceiver &selectedCsr = queue->selectCsrForBuiltinOperation(args); EXPECT_EQ(queue->getBcsCommandStreamReceiver(linkBcsType), &selectedCsr); EXPECT_EQ(linkBcsType, selectedCsr.getOsContext().getEngineType()); } { srcGraphicsAllocation.memoryPool = MemoryPool::System4KBPages; dstGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr}; CommandStreamReceiver &selectedCsr = queue->selectCsrForBuiltinOperation(args); EXPECT_EQ(queue->getBcsCommandStreamReceiver(linkBcsType), &selectedCsr); EXPECT_EQ(linkBcsType, selectedCsr.getOsContext().getEngineType()); } { srcGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; dstGraphicsAllocation.memoryPool = MemoryPool::System4KBPages; CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr}; CommandStreamReceiver &selectedCsr = queue->selectCsrForBuiltinOperation(args); EXPECT_EQ(queue->getBcsCommandStreamReceiver(linkBcsType), &selectedCsr); EXPECT_EQ(linkBcsType, selectedCsr.getOsContext().getEngineType()); } { srcGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; dstGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr}; CommandStreamReceiver &selectedCsr = queue->selectCsrForBuiltinOperation(args); EXPECT_EQ(queue->getBcsCommandStreamReceiver(linkBcsType), &selectedCsr); EXPECT_EQ(linkBcsType, selectedCsr.getOsContext().getEngineType()); } } HWTEST2_F(BcsCsrSelectionCommandQueueTests, givenOneBcsEngineInQueueWhenSelectingCsrThenTheBcs, IsAtLeastXeHpcCore) { DebugManagerStateRestore restore{}; DebugManager.flags.EnableBlitterForEnqueueOperations.set(1); BuiltinOpParams builtinOpParams{}; MockGraphicsAllocation srcGraphicsAllocation{}; MockGraphicsAllocation dstGraphicsAllocation{}; MockBuffer srcMemObj{srcGraphicsAllocation}; MockBuffer dstMemObj{dstGraphicsAllocation}; builtinOpParams.srcMemObj = &srcMemObj; builtinOpParams.dstMemObj = &dstMemObj; constexpr auto linkBcsType = aub_stream::ENGINE_BCS6; auto queue = createQueueWithEngines({linkBcsType}); { srcGraphicsAllocation.memoryPool = MemoryPool::System4KBPages; dstGraphicsAllocation.memoryPool = MemoryPool::System4KBPages; CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr}; CommandStreamReceiver &selectedCsr = queue->selectCsrForBuiltinOperation(args); EXPECT_EQ(queue->getBcsCommandStreamReceiver(linkBcsType), &selectedCsr); EXPECT_EQ(linkBcsType, selectedCsr.getOsContext().getEngineType()); } { srcGraphicsAllocation.memoryPool = MemoryPool::System4KBPages; dstGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr}; CommandStreamReceiver &selectedCsr = queue->selectCsrForBuiltinOperation(args); EXPECT_EQ(queue->getBcsCommandStreamReceiver(linkBcsType), &selectedCsr); EXPECT_EQ(linkBcsType, selectedCsr.getOsContext().getEngineType()); } { srcGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; dstGraphicsAllocation.memoryPool = MemoryPool::System4KBPages; CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr}; CommandStreamReceiver &selectedCsr = queue->selectCsrForBuiltinOperation(args); EXPECT_EQ(queue->getBcsCommandStreamReceiver(linkBcsType), &selectedCsr); EXPECT_EQ(linkBcsType, selectedCsr.getOsContext().getEngineType()); } { srcGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; dstGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr}; CommandStreamReceiver &selectedCsr = queue->selectCsrForBuiltinOperation(args); EXPECT_EQ(&queue->getGpgpuCommandStreamReceiver(), &selectedCsr); } } HWTEST2_F(BcsCsrSelectionCommandQueueTests, givenMultipleEnginesInQueueWhenSelectingCsrForLocalToLocalOperationThenSelectProperGpGpuCsr, IsAtLeastXeHpcCore) { DebugManagerStateRestore restore{}; DebugManager.flags.EnableBlitterForEnqueueOperations.set(1); BuiltinOpParams builtinOpParams{}; MockGraphicsAllocation srcGraphicsAllocation{}; MockGraphicsAllocation dstGraphicsAllocation{}; MockBuffer srcMemObj{srcGraphicsAllocation}; MockBuffer dstMemObj{dstGraphicsAllocation}; builtinOpParams.srcMemObj = &srcMemObj; builtinOpParams.dstMemObj = &dstMemObj; srcGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; dstGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr}; { auto queue = createQueueWithEngines({ aub_stream::ENGINE_BCS, aub_stream::ENGINE_BCS1, aub_stream::ENGINE_BCS2, aub_stream::ENGINE_BCS3, aub_stream::ENGINE_BCS4, aub_stream::ENGINE_BCS5, aub_stream::ENGINE_BCS6, aub_stream::ENGINE_BCS7, aub_stream::ENGINE_BCS8, }); CommandStreamReceiver &selectedCsr = queue->selectCsrForBuiltinOperation(args); EXPECT_EQ(&queue->getGpgpuCommandStreamReceiver(), &selectedCsr); } { auto queue = createQueueWithEngines({ aub_stream::ENGINE_BCS5, aub_stream::ENGINE_BCS6, aub_stream::ENGINE_BCS7, aub_stream::ENGINE_BCS8, }); CommandStreamReceiver &selectedCsr = queue->selectCsrForBuiltinOperation(args); EXPECT_EQ(&queue->getGpgpuCommandStreamReceiver(), &selectedCsr); } } HWTEST2_F(BcsCsrSelectionCommandQueueTests, givenMultipleEnginesInQueueWhenSelectingCsrForNonLocalToLocalOperationThenSelectProperBcsCsr, IsAtLeastXeHpcCore) { DebugManagerStateRestore restore{}; DebugManager.flags.EnableBlitterForEnqueueOperations.set(1); BuiltinOpParams builtinOpParams{}; MockGraphicsAllocation srcGraphicsAllocation{}; MockGraphicsAllocation dstGraphicsAllocation{}; MockBuffer srcMemObj{srcGraphicsAllocation}; MockBuffer dstMemObj{dstGraphicsAllocation}; builtinOpParams.srcMemObj = &srcMemObj; builtinOpParams.dstMemObj = &dstMemObj; srcGraphicsAllocation.memoryPool = MemoryPool::System4KBPages; dstGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &srcMemObj, &dstMemObj, 0u, nullptr}; { auto queue = createQueueWithEngines({ aub_stream::ENGINE_BCS, aub_stream::ENGINE_BCS1, aub_stream::ENGINE_BCS2, aub_stream::ENGINE_BCS3, aub_stream::ENGINE_BCS4, aub_stream::ENGINE_BCS5, aub_stream::ENGINE_BCS6, aub_stream::ENGINE_BCS7, aub_stream::ENGINE_BCS8, }); EXPECT_EQ(queue->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS1), &queue->selectCsrForBuiltinOperation(args)); EXPECT_EQ(queue->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS2), &queue->selectCsrForBuiltinOperation(args)); EXPECT_EQ(queue->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS1), &queue->selectCsrForBuiltinOperation(args)); EXPECT_EQ(queue->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS2), &queue->selectCsrForBuiltinOperation(args)); } } HWTEST2_F(OoqCommandQueueHwBlitTest, givenBarrierBeforeFirstKernelWhenEnqueueNDRangeThenProgramBarrierBeforeGlobalAllocation, IsPVC) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename FamilyType::STATE_SYSTEM_MEM_FENCE_ADDRESS; using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE; if (pCmdQ->getTimestampPacketContainer() == nullptr) { GTEST_SKIP(); } DebugManagerStateRestore restore{}; DebugManager.flags.DoCpuCopyOnReadBuffer.set(0); DebugManager.flags.ForceCacheFlushForBcs.set(0); DebugManager.flags.UpdateTaskCountFromWait.set(1); DebugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(1); MockKernelWithInternals mockKernelWithInternals(*pClDevice); MockKernel *kernel = mockKernelWithInternals.mockKernel; size_t offset = 0; size_t gws = 1; BufferDefaults::context = context; auto buffer = clUniquePtr(BufferHelper<>::create()); char ptr[1] = {}; EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, 1u, ptr, nullptr, 0, nullptr, nullptr)); EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, 1u, ptr, nullptr, 0, nullptr, nullptr)); EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueBarrierWithWaitList(0, nullptr, nullptr)); auto ccsStart = pCmdQ->getGpgpuCommandStreamReceiver().getCS().getUsed(); EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueKernel(kernel, 1, &offset, &gws, nullptr, 0, nullptr, nullptr)); HardwareParse ccsHwParser; ccsHwParser.parseCommands(pCmdQ->getGpgpuCommandStreamReceiver().getCS(0), ccsStart); const auto memFenceStateItor = find(ccsHwParser.cmdList.begin(), ccsHwParser.cmdList.end()); const auto memFenceItor = find(memFenceStateItor, ccsHwParser.cmdList.end()); EXPECT_NE(ccsHwParser.cmdList.end(), memFenceItor); EXPECT_NE(ccsHwParser.cmdList.end(), memFenceStateItor); } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/csr_selection_args_tests.cpp000066400000000000000000000245121422164147700322530ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/source/command_queue/csr_selection_args.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_image.h" namespace NEO { TEST(CsrSelectionArgsTests, givenBuffersWhenCreatingCsrSelectionArgsThenSetupArgsCorrectly) { const uint32_t rootDeviceIndex = 2u; const size_t *size = reinterpret_cast(0x1234); MockGraphicsAllocation allocation1{rootDeviceIndex, nullptr, 1024u}; MockGraphicsAllocation allocation2{rootDeviceIndex, nullptr, 1024u}; MockBuffer buffer1{allocation1}; MockBuffer buffer2{allocation2}; { allocation1.memoryPool = MemoryPool::System4KBPages; CsrSelectionArgs args{CL_COMMAND_WRITE_BUFFER, {}, &buffer1, rootDeviceIndex, size}; EXPECT_EQ(static_cast(CL_COMMAND_WRITE_BUFFER), args.cmdType); EXPECT_EQ(TransferDirection::HostToHost, args.direction); EXPECT_EQ(size, args.size); EXPECT_EQ(&allocation1, args.dstResource.allocation); } { allocation1.memoryPool = MemoryPool::LocalMemory; CsrSelectionArgs args{CL_COMMAND_WRITE_BUFFER, {}, &buffer1, rootDeviceIndex, size}; EXPECT_EQ(static_cast(CL_COMMAND_WRITE_BUFFER), args.cmdType); EXPECT_EQ(TransferDirection::HostToLocal, args.direction); EXPECT_EQ(size, args.size); EXPECT_EQ(&allocation1, args.dstResource.allocation); } { allocation1.memoryPool = MemoryPool::LocalMemory; CsrSelectionArgs args{CL_COMMAND_READ_BUFFER, &buffer1, {}, rootDeviceIndex, size}; EXPECT_EQ(static_cast(CL_COMMAND_READ_BUFFER), args.cmdType); EXPECT_EQ(TransferDirection::LocalToHost, args.direction); EXPECT_EQ(size, args.size); EXPECT_EQ(&allocation1, args.srcResource.allocation); } { allocation1.memoryPool = MemoryPool::System4KBPages; CsrSelectionArgs args{CL_COMMAND_READ_BUFFER, &buffer1, {}, rootDeviceIndex, size}; EXPECT_EQ(static_cast(CL_COMMAND_READ_BUFFER), args.cmdType); EXPECT_EQ(TransferDirection::HostToHost, args.direction); EXPECT_EQ(size, args.size); EXPECT_EQ(&allocation1, args.srcResource.allocation); } { allocation1.memoryPool = MemoryPool::LocalMemory; allocation2.memoryPool = MemoryPool::System4KBPages; CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &buffer1, &buffer2, rootDeviceIndex, size}; EXPECT_EQ(static_cast(CL_COMMAND_COPY_BUFFER), args.cmdType); EXPECT_EQ(TransferDirection::LocalToHost, args.direction); EXPECT_EQ(size, args.size); EXPECT_EQ(&allocation1, args.srcResource.allocation); EXPECT_EQ(&allocation2, args.dstResource.allocation); } { allocation1.memoryPool = MemoryPool::System4KBPages; allocation2.memoryPool = MemoryPool::LocalMemory; CsrSelectionArgs args{CL_COMMAND_COPY_BUFFER, &buffer1, &buffer2, rootDeviceIndex, size}; EXPECT_EQ(static_cast(CL_COMMAND_COPY_BUFFER), args.cmdType); EXPECT_EQ(TransferDirection::HostToLocal, args.direction); EXPECT_EQ(size, args.size); EXPECT_EQ(&allocation1, args.srcResource.allocation); EXPECT_EQ(&allocation2, args.dstResource.allocation); } } TEST(CsrSelectionArgsTests, givenImagesWhenCreatingCsrSelectionArgsThenSetupArgsCorrectly) { const uint32_t rootDeviceIndex = 2u; const size_t *size = reinterpret_cast(0x1234); const size_t *origin1 = reinterpret_cast(0x12345); const size_t *origin2 = reinterpret_cast(0x123456); MockImageBase image1{rootDeviceIndex}; MockImageBase image2{rootDeviceIndex}; MockGraphicsAllocation &allocation1 = *image1.graphicsAllocation; MockGraphicsAllocation &allocation2 = *image2.graphicsAllocation; { allocation1.memoryPool = MemoryPool::System4KBPages; CsrSelectionArgs args{CL_COMMAND_WRITE_IMAGE, {}, &image1, rootDeviceIndex, size, nullptr, origin1}; EXPECT_EQ(static_cast(CL_COMMAND_WRITE_IMAGE), args.cmdType); EXPECT_EQ(TransferDirection::HostToHost, args.direction); EXPECT_EQ(size, args.size); EXPECT_EQ(&image1, args.dstResource.image); EXPECT_EQ(&allocation1, args.dstResource.allocation); EXPECT_EQ(origin1, args.dstResource.imageOrigin); } { allocation1.memoryPool = MemoryPool::LocalMemory; CsrSelectionArgs args{CL_COMMAND_WRITE_IMAGE, {}, &image1, rootDeviceIndex, size, nullptr, origin1}; EXPECT_EQ(static_cast(CL_COMMAND_WRITE_IMAGE), args.cmdType); EXPECT_EQ(TransferDirection::HostToLocal, args.direction); EXPECT_EQ(size, args.size); EXPECT_EQ(&image1, args.dstResource.image); EXPECT_EQ(&allocation1, args.dstResource.allocation); EXPECT_EQ(origin1, args.dstResource.imageOrigin); } { allocation1.memoryPool = MemoryPool::System4KBPages; CsrSelectionArgs args{CL_COMMAND_READ_IMAGE, &image1, nullptr, rootDeviceIndex, size, origin1, nullptr}; EXPECT_EQ(static_cast(CL_COMMAND_READ_IMAGE), args.cmdType); EXPECT_EQ(TransferDirection::HostToHost, args.direction); EXPECT_EQ(size, args.size); EXPECT_EQ(&image1, args.srcResource.image); EXPECT_EQ(&allocation1, args.srcResource.allocation); EXPECT_EQ(origin1, args.srcResource.imageOrigin); } { allocation1.memoryPool = MemoryPool::LocalMemory; CsrSelectionArgs args{CL_COMMAND_READ_IMAGE, &image1, nullptr, rootDeviceIndex, size, origin1, nullptr}; EXPECT_EQ(static_cast(CL_COMMAND_READ_IMAGE), args.cmdType); EXPECT_EQ(TransferDirection::LocalToHost, args.direction); EXPECT_EQ(size, args.size); EXPECT_EQ(&image1, args.srcResource.image); EXPECT_EQ(&allocation1, args.srcResource.allocation); EXPECT_EQ(origin1, args.srcResource.imageOrigin); } { allocation1.memoryPool = MemoryPool::System4KBPages; allocation2.memoryPool = MemoryPool::LocalMemory; CsrSelectionArgs args{CL_COMMAND_COPY_IMAGE, &image1, &image2, rootDeviceIndex, size, origin1, origin2}; EXPECT_EQ(static_cast(CL_COMMAND_COPY_IMAGE), args.cmdType); EXPECT_EQ(TransferDirection::HostToLocal, args.direction); EXPECT_EQ(size, args.size); EXPECT_EQ(&image1, args.srcResource.image); EXPECT_EQ(&allocation1, args.srcResource.allocation); EXPECT_EQ(origin1, args.srcResource.imageOrigin); EXPECT_EQ(&image2, args.dstResource.image); EXPECT_EQ(&allocation2, args.dstResource.allocation); EXPECT_EQ(origin2, args.dstResource.imageOrigin); } { allocation1.memoryPool = MemoryPool::LocalMemory; allocation2.memoryPool = MemoryPool::System4KBPages; CsrSelectionArgs args{CL_COMMAND_COPY_IMAGE, &image1, &image2, rootDeviceIndex, size, origin1, origin2}; EXPECT_EQ(static_cast(CL_COMMAND_COPY_IMAGE), args.cmdType); EXPECT_EQ(TransferDirection::LocalToHost, args.direction); EXPECT_EQ(size, args.size); EXPECT_EQ(&image1, args.srcResource.image); EXPECT_EQ(&allocation1, args.srcResource.allocation); EXPECT_EQ(origin1, args.srcResource.imageOrigin); EXPECT_EQ(&image2, args.dstResource.image); EXPECT_EQ(&allocation2, args.dstResource.allocation); EXPECT_EQ(origin2, args.dstResource.imageOrigin); } } TEST(CsrSelectionArgsTests, givenGraphicsAllocationsWhenCreatingCsrSelectionArgsThenSetupArgsCorrectly) { const uint32_t rootDeviceIndex = 2u; const size_t *size = reinterpret_cast(0x1234); MockGraphicsAllocation allocation1{rootDeviceIndex, nullptr, 1024u}; MockGraphicsAllocation allocation2{rootDeviceIndex, nullptr, 1024u}; MultiGraphicsAllocation multiAlloc1 = GraphicsAllocationHelper::toMultiGraphicsAllocation(&allocation1); MultiGraphicsAllocation multiAlloc2 = GraphicsAllocationHelper::toMultiGraphicsAllocation(&allocation2); { allocation1.memoryPool = MemoryPool::System4KBPages; allocation2.memoryPool = MemoryPool::System4KBPages; CsrSelectionArgs args{CL_COMMAND_SVM_MEMCPY, &multiAlloc1, &multiAlloc2, rootDeviceIndex, size}; EXPECT_EQ(static_cast(CL_COMMAND_SVM_MEMCPY), args.cmdType); EXPECT_EQ(TransferDirection::HostToHost, args.direction); EXPECT_EQ(size, args.size); EXPECT_EQ(&allocation1, args.srcResource.allocation); EXPECT_EQ(&allocation2, args.dstResource.allocation); } { allocation1.memoryPool = MemoryPool::System4KBPages; allocation2.memoryPool = MemoryPool::LocalMemory; CsrSelectionArgs args{CL_COMMAND_SVM_MEMCPY, &multiAlloc1, &multiAlloc2, rootDeviceIndex, size}; EXPECT_EQ(static_cast(CL_COMMAND_SVM_MEMCPY), args.cmdType); EXPECT_EQ(TransferDirection::HostToLocal, args.direction); EXPECT_EQ(size, args.size); EXPECT_EQ(&allocation1, args.srcResource.allocation); EXPECT_EQ(&allocation2, args.dstResource.allocation); } { allocation1.memoryPool = MemoryPool::LocalMemory; allocation2.memoryPool = MemoryPool::System4KBPages; CsrSelectionArgs args{CL_COMMAND_SVM_MEMCPY, &multiAlloc1, &multiAlloc2, rootDeviceIndex, size}; EXPECT_EQ(static_cast(CL_COMMAND_SVM_MEMCPY), args.cmdType); EXPECT_EQ(TransferDirection::LocalToHost, args.direction); EXPECT_EQ(size, args.size); EXPECT_EQ(&allocation1, args.srcResource.allocation); EXPECT_EQ(&allocation2, args.dstResource.allocation); } { allocation1.memoryPool = MemoryPool::LocalMemory; allocation2.memoryPool = MemoryPool::LocalMemory; CsrSelectionArgs args{CL_COMMAND_SVM_MEMCPY, &multiAlloc1, &multiAlloc2, rootDeviceIndex, size}; EXPECT_EQ(static_cast(CL_COMMAND_SVM_MEMCPY), args.cmdType); EXPECT_EQ(TransferDirection::LocalToLocal, args.direction); EXPECT_EQ(size, args.size); EXPECT_EQ(&allocation1, args.srcResource.allocation); EXPECT_EQ(&allocation2, args.dstResource.allocation); } } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp000066400000000000000000002054471422164147700315570ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/local_work_size.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/utilities/perf_counter.h" #include "shared/source/utilities/tag_allocator.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/mocks/mock_timestamp_container.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/built_ins/aux_translation_builtin.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/command_queue/hardware_interface.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/helpers/task_information.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_mdi.h" #include "opencl/test/unit_test/mocks/mock_program.h" using namespace NEO; struct DispatchWalkerTest : public CommandQueueFixture, public ClDeviceFixture, public ::testing::Test { using CommandQueueFixture::SetUp; void SetUp() override { DebugManager.flags.EnableTimestampPacket.set(0); ClDeviceFixture::SetUp(); context = std::make_unique(pClDevice); CommandQueueFixture::SetUp(context.get(), pClDevice, 0); program = std::make_unique(toClDeviceVector(*pClDevice)); kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 32; kernelInfo.setCrossThreadDataSize(64); kernelInfo.setLocalIds({1, 1, 1}); kernelInfo.heapInfo.pKernelHeap = kernelIsa; kernelInfo.heapInfo.KernelHeapSize = sizeof(kernelIsa); kernelInfoWithSampler.kernelDescriptor.kernelAttributes.simdSize = 32; kernelInfoWithSampler.setCrossThreadDataSize(64); kernelInfoWithSampler.setLocalIds({1, 1, 1}); kernelInfoWithSampler.setSamplerTable(0, 1, 4); kernelInfoWithSampler.heapInfo.pKernelHeap = kernelIsa; kernelInfoWithSampler.heapInfo.KernelHeapSize = sizeof(kernelIsa); kernelInfoWithSampler.heapInfo.pDsh = static_cast(dsh); kernelInfoWithSampler.heapInfo.DynamicStateHeapSize = sizeof(dsh); } void TearDown() override { CommandQueueFixture::TearDown(); context.reset(); ClDeviceFixture::TearDown(); } std::unique_ptr createBlockedCommandsData(CommandQueue &commandQueue) { auto commandStream = new LinearStream(); auto &gpgpuCsr = commandQueue.getGpgpuCommandStreamReceiver(); gpgpuCsr.ensureCommandBufferAllocation(*commandStream, 1, 1); return std::make_unique(commandStream, *gpgpuCsr.getInternalAllocationStorage()); } std::unique_ptr context; std::unique_ptr program; MockKernelInfo kernelInfo; MockKernelInfo kernelInfoWithSampler; uint32_t kernelIsa[32]; uint32_t dsh[32]; DebugManagerStateRestore dbgRestore; }; struct DispatchWalkerTestForAuxTranslation : DispatchWalkerTest, public ::testing::WithParamInterface { void SetUp() override { DispatchWalkerTest::SetUp(); kernelObjType = GetParam(); } KernelObjForAuxTranslation::Type kernelObjType; }; INSTANTIATE_TEST_CASE_P(, DispatchWalkerTestForAuxTranslation, testing::ValuesIn({KernelObjForAuxTranslation::Type::MEM_OBJ, KernelObjForAuxTranslation::Type::GFX_ALLOC})); HWTEST_F(DispatchWalkerTest, WhenGettingComputeDimensionsThenCorrectNumberOfDimensionsIsReturned) { const size_t workItems1D[] = {100, 1, 1}; EXPECT_EQ(1u, computeDimensions(workItems1D)); const size_t workItems2D[] = {100, 100, 1}; EXPECT_EQ(2u, computeDimensions(workItems2D)); const size_t workItems3D[] = {100, 100, 100}; EXPECT_EQ(3u, computeDimensions(workItems3D)); } HWTEST_F(DispatchWalkerTest, givenSimd1WhenSetGpgpuWalkerThreadDataThenSimdInWalkerIsSetTo32Value) { uint32_t pCmdBuffer[1024]; MockGraphicsAllocation gfxAllocation(static_cast(pCmdBuffer), sizeof(pCmdBuffer)); LinearStream linearStream(&gfxAllocation); using WALKER_TYPE = typename FamilyType::WALKER_TYPE; WALKER_TYPE *computeWalker = static_cast(linearStream.getSpace(sizeof(WALKER_TYPE))); *computeWalker = FamilyType::cmdInitGpgpuWalker; size_t globalOffsets[] = {0, 0, 0}; size_t startWorkGroups[] = {0, 0, 0}; size_t numWorkGroups[] = {1, 1, 1}; size_t localWorkSizesIn[] = {32, 1, 1}; uint32_t simd = 1; KernelDescriptor kd; GpgpuWalkerHelper::setGpgpuWalkerThreadData( computeWalker, kd, globalOffsets, startWorkGroups, numWorkGroups, localWorkSizesIn, simd, 3, true, false, 5u); EXPECT_EQ(computeWalker->getSimdSize(), 32 >> 4); } HWTEST_F(DispatchWalkerTest, WhenDispatchingWalkerThenCommandStreamMemoryIsntChanged) { MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); auto &commandStream = pCmdQ->getCS(4096); // Consume all memory except what is needed for this enqueue auto sizeDispatchWalkerNeeds = sizeof(typename FamilyType::WALKER_TYPE) + HardwareCommandsHelper::getSizeRequiredCS(); //cs has a minimum required size auto sizeThatNeedsToBeSubstracted = sizeDispatchWalkerNeeds + CSRequirements::minCommandQueueCommandStreamSize; commandStream.getSpace(commandStream.getMaxAvailableSpace() - sizeThatNeedsToBeSubstracted); ASSERT_EQ(commandStream.getAvailableSpace(), sizeThatNeedsToBeSubstracted); auto commandStreamStart = commandStream.getUsed(); auto commandStreamBuffer = commandStream.getCpuBase(); ASSERT_NE(0u, commandStreamStart); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {1, 1, 1}; cl_uint dimensions = 1; DispatchInfo dispatchInfo(pClDevice, const_cast(&kernel), dimensions, workItems, nullptr, globalOffsets); dispatchInfo.setNumberOfWorkgroups({1, 1, 1}); dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1}); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); EXPECT_EQ(commandStreamBuffer, commandStream.getCpuBase()); EXPECT_LT(commandStreamStart, commandStream.getUsed()); EXPECT_EQ(sizeDispatchWalkerNeeds, commandStream.getUsed() - commandStreamStart); } HWTEST_F(DispatchWalkerTest, GivenNoLocalIdsWhenDispatchingWalkerThenWalkerIsDispatched) { kernelInfo.setLocalIds({0, 0, 0}); kernelInfo.kernelDescriptor.kernelAttributes.flags.perThreadDataUnusedGrfIsPresent = true; MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); auto &commandStream = pCmdQ->getCS(4096); // Consume all memory except what is needed for this enqueue auto sizeDispatchWalkerNeeds = sizeof(typename FamilyType::WALKER_TYPE) + HardwareCommandsHelper::getSizeRequiredCS(); //cs has a minimum required size auto sizeThatNeedsToBeSubstracted = sizeDispatchWalkerNeeds + CSRequirements::minCommandQueueCommandStreamSize; commandStream.getSpace(commandStream.getMaxAvailableSpace() - sizeThatNeedsToBeSubstracted); ASSERT_EQ(commandStream.getAvailableSpace(), sizeThatNeedsToBeSubstracted); auto commandStreamStart = commandStream.getUsed(); auto commandStreamBuffer = commandStream.getCpuBase(); ASSERT_NE(0u, commandStreamStart); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {1, 1, 1}; cl_uint dimensions = 1; DispatchInfo dispatchInfo(pClDevice, const_cast(&kernel), dimensions, workItems, nullptr, globalOffsets); dispatchInfo.setNumberOfWorkgroups({1, 1, 1}); dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1}); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); EXPECT_EQ(commandStreamBuffer, commandStream.getCpuBase()); EXPECT_LT(commandStreamStart, commandStream.getUsed()); EXPECT_EQ(sizeDispatchWalkerNeeds, commandStream.getUsed() - commandStreamStart); } HWTEST_F(DispatchWalkerTest, GivenDefaultLwsAlgorithmWhenDispatchingWalkerThenDimensionsAreCorrect) { MockKernel kernel(program.get(), kernelInfo, *pClDevice); kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.workDim = 0; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {1, 1, 1}; for (uint32_t dimension = 1; dimension <= 3; ++dimension) { workItems[dimension - 1] = 256; DispatchInfo dispatchInfo(pClDevice, const_cast(&kernel), dimension, workItems, nullptr, globalOffsets); dispatchInfo.setNumberOfWorkgroups({1, 1, 1}); dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1}); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); EXPECT_EQ(dimension, *kernel.getWorkDim()); } } HWTEST_F(DispatchWalkerTest, GivenSquaredLwsAlgorithmWhenDispatchingWalkerThenDimensionsAreCorrect) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeND.set(false); DebugManager.flags.EnableComputeWorkSizeSquared.set(true); MockKernel kernel(program.get(), kernelInfo, *pClDevice); kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.workDim = 0; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {1, 1, 1}; for (uint32_t dimension = 1; dimension <= 3; ++dimension) { workItems[dimension - 1] = 256; DispatchInfo dispatchInfo(pClDevice, const_cast(&kernel), dimension, workItems, nullptr, globalOffsets); dispatchInfo.setNumberOfWorkgroups({1, 1, 1}); dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1}); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); EXPECT_EQ(dimension, *kernel.getWorkDim()); } } HWTEST_F(DispatchWalkerTest, GivenNdLwsAlgorithmWhenDispatchingWalkerThenDimensionsAreCorrect) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeND.set(true); MockKernel kernel(program.get(), kernelInfo, *pClDevice); kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.workDim = 0; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {1, 1, 1}; for (uint32_t dimension = 1; dimension <= 3; ++dimension) { workItems[dimension - 1] = 256; DispatchInfo dispatchInfo(pClDevice, const_cast(&kernel), dimension, workItems, nullptr, globalOffsets); dispatchInfo.setNumberOfWorkgroups({1, 1, 1}); dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1}); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); EXPECT_EQ(dimension, *kernel.getWorkDim()); } } HWTEST_F(DispatchWalkerTest, GivenOldLwsAlgorithmWhenDispatchingWalkerThenDimensionsAreCorrect) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeND.set(false); DebugManager.flags.EnableComputeWorkSizeSquared.set(false); MockKernel kernel(program.get(), kernelInfo, *pClDevice); kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.workDim = 0; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {1, 1, 1}; for (uint32_t dimension = 1; dimension <= 3; ++dimension) { workItems[dimension - 1] = 256; DispatchInfo dispatchInfo(pClDevice, const_cast(&kernel), dimension, workItems, nullptr, globalOffsets); dispatchInfo.setNumberOfWorkgroups({1, 1, 1}); dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1}); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); EXPECT_EQ(dimension, *kernel.getWorkDim()); } } HWTEST_F(DispatchWalkerTest, GivenNumWorkGroupsWhenDispatchingWalkerThenNumWorkGroupsIsCorrectlySet) { MockKernel kernel(program.get(), kernelInfo, *pClDevice); kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.numWorkGroups[0] = 0; kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.numWorkGroups[1] = 4; kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.numWorkGroups[2] = 8; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {2, 5, 10}; size_t workGroupSize[3] = {1, 1, 1}; cl_uint dimensions = 3; DispatchInfo dispatchInfo(pClDevice, const_cast(&kernel), dimensions, workItems, workGroupSize, globalOffsets); dispatchInfo.setNumberOfWorkgroups(workItems); dispatchInfo.setTotalNumberOfWorkgroups(workItems); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); auto numWorkGroups = kernel.getNumWorkGroupsValues(); EXPECT_EQ(2u, *numWorkGroups[0]); EXPECT_EQ(5u, *numWorkGroups[1]); EXPECT_EQ(10u, *numWorkGroups[2]); } HWTEST_F(DispatchWalkerTest, GivenGlobalWorkOffsetWhenDispatchingWalkerThenGlobalWorkOffsetIsCorrectlySet) { kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.globalWorkOffset[0] = 0u; kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.globalWorkOffset[1] = 4u; kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.globalWorkOffset[2] = 8u; MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {1, 2, 3}; size_t workItems[3] = {2, 5, 10}; size_t workGroupSize[3] = {1, 1, 1}; cl_uint dimensions = 3; DispatchInfo dispatchInfo(pClDevice, &kernel, dimensions, workItems, workGroupSize, globalOffsets); dispatchInfo.setNumberOfWorkgroups({1, 1, 1}); dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1}); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); auto gwo = kernel.getGlobalWorkOffsetValues(); EXPECT_EQ(1u, *gwo[0]); EXPECT_EQ(2u, *gwo[1]); EXPECT_EQ(3u, *gwo[2]); } HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndDefaultAlgorithmWhenDispatchingWalkerThenLwsIsCorrect) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeND.set(false); MockKernel kernel(program.get(), kernelInfo, *pClDevice); kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[0] = 0; kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[1] = 4; kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[2] = 8; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {2, 5, 10}; cl_uint dimensions = 3; DispatchInfo dispatchInfo(pClDevice, const_cast(&kernel), dimensions, workItems, nullptr, globalOffsets); dispatchInfo.setNumberOfWorkgroups({1, 1, 1}); dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1}); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); auto localWorkSize = kernel.getLocalWorkSizeValues(); EXPECT_EQ(2u, *localWorkSize[0]); EXPECT_EQ(5u, *localWorkSize[1]); EXPECT_EQ(1u, *localWorkSize[2]); } HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndNdOnWhenDispatchingWalkerThenLwsIsCorrect) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeND.set(true); MockKernel kernel(program.get(), kernelInfo, *pClDevice); kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[0] = 0; kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[1] = 4; kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[2] = 8; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {2, 3, 5}; cl_uint dimensions = 3; DispatchInfo dispatchInfo(pClDevice, const_cast(&kernel), dimensions, workItems, nullptr, globalOffsets); dispatchInfo.setNumberOfWorkgroups({1, 1, 1}); dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1}); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); auto localWorkSize = kernel.getLocalWorkSizeValues(); EXPECT_EQ(2u, *localWorkSize[0]); EXPECT_EQ(3u, *localWorkSize[1]); EXPECT_EQ(5u, *localWorkSize[2]); } HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndSquaredAlgorithmWhenDispatchingWalkerThenLwsIsCorrect) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(true); DebugManager.flags.EnableComputeWorkSizeND.set(false); MockKernel kernel(program.get(), kernelInfo, *pClDevice); kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[0] = 0; kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[1] = 4; kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[2] = 8; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {2, 5, 10}; cl_uint dimensions = 3; DispatchInfo dispatchInfo(pClDevice, const_cast(&kernel), dimensions, workItems, nullptr, globalOffsets); dispatchInfo.setNumberOfWorkgroups({1, 1, 1}); dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1}); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); auto localWorkSize = kernel.getLocalWorkSizeValues(); EXPECT_EQ(2u, *localWorkSize[0]); EXPECT_EQ(5u, *localWorkSize[1]); EXPECT_EQ(1u, *localWorkSize[2]); } HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndSquaredAlgorithmOffAndNdOffWhenDispatchingWalkerThenLwsIsCorrect) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(false); DebugManager.flags.EnableComputeWorkSizeND.set(false); MockKernel kernel(program.get(), kernelInfo, *pClDevice); kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[0] = 0; kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[1] = 4; kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[2] = 8; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {2, 5, 10}; cl_uint dimensions = 3; DispatchInfo dispatchInfo(pClDevice, const_cast(&kernel), dimensions, workItems, nullptr, globalOffsets); dispatchInfo.setNumberOfWorkgroups({1, 1, 1}); dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1}); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); auto localWorkSize = kernel.getLocalWorkSizeValues(); EXPECT_EQ(2u, *localWorkSize[0]); EXPECT_EQ(5u, *localWorkSize[1]); EXPECT_EQ(1u, *localWorkSize[2]); } HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeWhenDispatchingWalkerThenLwsIsCorrect) { MockKernel kernel(program.get(), kernelInfo, *pClDevice); kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[0] = 0; kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[1] = 4; kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[2] = 8; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {2, 5, 10}; size_t workGroupSize[3] = {1, 2, 3}; cl_uint dimensions = 3; DispatchInfo dispatchInfo(pClDevice, const_cast(&kernel), dimensions, workItems, workGroupSize, globalOffsets); dispatchInfo.setNumberOfWorkgroups({1, 1, 1}); dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1}); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); auto localWorkSize = kernel.getLocalWorkSizeValues(); EXPECT_EQ(1u, *localWorkSize[0]); EXPECT_EQ(2u, *localWorkSize[1]); EXPECT_EQ(3u, *localWorkSize[2]); } HWTEST_F(DispatchWalkerTest, GivenTwoSetsOfLwsOffsetsWhenDispatchingWalkerThenLwsIsCorrect) { MockKernel kernel(program.get(), kernelInfo, *pClDevice); kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[0] = 0; kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[1] = 4; kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[2] = 8; kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize2[0] = 12; kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize2[1] = 16; kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize2[2] = 20; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {2, 5, 10}; size_t workGroupSize[3] = {1, 2, 3}; cl_uint dimensions = 3; DispatchInfo dispatchInfo(pClDevice, const_cast(&kernel), dimensions, workItems, workGroupSize, globalOffsets); dispatchInfo.setNumberOfWorkgroups({1, 1, 1}); dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1}); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); auto localWorkSize = kernel.getLocalWorkSizeValues(); EXPECT_EQ(1u, *localWorkSize[0]); EXPECT_EQ(2u, *localWorkSize[1]); EXPECT_EQ(3u, *localWorkSize[2]); auto localWorkSize2 = kernel.getLocalWorkSize2Values(); EXPECT_EQ(1u, *localWorkSize2[0]); EXPECT_EQ(2u, *localWorkSize2[1]); EXPECT_EQ(3u, *localWorkSize2[2]); } HWTEST_F(DispatchWalkerTest, GivenSplitKernelWhenDispatchingWalkerThenLwsIsCorrect) { MockKernel kernel1(program.get(), kernelInfo, *pClDevice); kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[0] = 0; kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[1] = 4; kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[2] = 8; ASSERT_EQ(CL_SUCCESS, kernel1.initialize()); MockKernel kernel2(program.get(), kernelInfoWithSampler, *pClDevice); kernelInfoWithSampler.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[0] = 12; kernelInfoWithSampler.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[1] = 16; kernelInfoWithSampler.kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[2] = 20; ASSERT_EQ(CL_SUCCESS, kernel2.initialize()); DispatchInfo di1(pClDevice, &kernel1, 3, {10, 10, 10}, {1, 2, 3}, {0, 0, 0}); di1.setNumberOfWorkgroups({1, 1, 1}); di1.setTotalNumberOfWorkgroups({2, 2, 2}); DispatchInfo di2(pClDevice, &kernel2, 3, {10, 10, 10}, {4, 5, 6}, {0, 0, 0}); di2.setNumberOfWorkgroups({1, 1, 1}); di2.setTotalNumberOfWorkgroups({2, 2, 2}); MockMultiDispatchInfo multiDispatchInfo(std::vector({&di1, &di2})); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); auto dispatchId = 0; for (auto &dispatchInfo : multiDispatchInfo) { auto &kernel = static_cast(*dispatchInfo.getKernel()); auto localWorkSize = kernel.getLocalWorkSizeValues(); if (dispatchId == 0) { EXPECT_EQ(1u, *localWorkSize[0]); EXPECT_EQ(2u, *localWorkSize[1]); EXPECT_EQ(3u, *localWorkSize[2]); } if (dispatchId == 1) { EXPECT_EQ(4u, *localWorkSize[0]); EXPECT_EQ(5u, *localWorkSize[1]); EXPECT_EQ(6u, *localWorkSize[2]); } dispatchId++; } } HWTEST_F(DispatchWalkerTest, GivenSplitWalkerWhenDispatchingWalkerThenLwsIsCorrect) { MockKernel kernel1(program.get(), kernelInfo, *pClDevice); MockKernel mainKernel(program.get(), kernelInfo, *pClDevice); auto &dispatchTraits = kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits; dispatchTraits.localWorkSize[0] = 0; dispatchTraits.localWorkSize[1] = 4; dispatchTraits.localWorkSize[2] = 8; dispatchTraits.localWorkSize2[0] = 12; dispatchTraits.localWorkSize2[1] = 16; dispatchTraits.localWorkSize2[2] = 20; dispatchTraits.numWorkGroups[0] = 24; dispatchTraits.numWorkGroups[1] = 28; dispatchTraits.numWorkGroups[2] = 32; ASSERT_EQ(CL_SUCCESS, kernel1.initialize()); ASSERT_EQ(CL_SUCCESS, mainKernel.initialize()); DispatchInfo di1(pClDevice, &kernel1, 3, {10, 10, 10}, {1, 2, 3}, {0, 0, 0}); di1.setNumberOfWorkgroups({1, 1, 1}); di1.setTotalNumberOfWorkgroups({3, 2, 2}); DispatchInfo di2(pClDevice, &mainKernel, 3, {10, 10, 10}, {4, 5, 6}, {0, 0, 0}); di2.setNumberOfWorkgroups({1, 1, 1}); di2.setTotalNumberOfWorkgroups({3, 2, 2}); MultiDispatchInfo multiDispatchInfo(&mainKernel); multiDispatchInfo.push(di1); multiDispatchInfo.push(di2); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); for (auto &dispatchInfo : multiDispatchInfo) { auto &kernel = static_cast(*dispatchInfo.getKernel()); auto localWorkSize = kernel.getLocalWorkSizeValues(); auto localWorkSize2 = kernel.getLocalWorkSize2Values(); auto numWorkGroups = kernel.getNumWorkGroupsValues(); if (&kernel == &mainKernel) { EXPECT_EQ(4u, *localWorkSize[0]); EXPECT_EQ(5u, *localWorkSize[1]); EXPECT_EQ(6u, *localWorkSize[2]); EXPECT_EQ(4u, *localWorkSize2[0]); EXPECT_EQ(5u, *localWorkSize2[1]); EXPECT_EQ(6u, *localWorkSize2[2]); EXPECT_EQ(3u, *numWorkGroups[0]); EXPECT_EQ(2u, *numWorkGroups[1]); EXPECT_EQ(2u, *numWorkGroups[2]); } else { EXPECT_EQ(0u, *localWorkSize[0]); EXPECT_EQ(0u, *localWorkSize[1]); EXPECT_EQ(0u, *localWorkSize[2]); EXPECT_EQ(1u, *localWorkSize2[0]); EXPECT_EQ(2u, *localWorkSize2[1]); EXPECT_EQ(3u, *localWorkSize2[2]); EXPECT_EQ(0u, *numWorkGroups[0]); EXPECT_EQ(0u, *numWorkGroups[1]); EXPECT_EQ(0u, *numWorkGroups[2]); } } } HWTEST_F(DispatchWalkerTest, GivenBlockedQueueWhenDispatchingWalkerThenCommandSteamIsNotConsumed) { MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {1, 1, 1}; size_t workGroupSize[3] = {2, 5, 10}; cl_uint dimensions = 1; auto blockedCommandsData = createBlockedCommandsData(*pCmdQ); DispatchInfo dispatchInfo(pClDevice, const_cast(&kernel), dimensions, workItems, workGroupSize, globalOffsets); dispatchInfo.setNumberOfWorkgroups({1, 1, 1}); dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1}); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), blockedCommandsData.get(), nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); auto &commandStream = pCmdQ->getCS(1024); EXPECT_EQ(0u, commandStream.getUsed()); EXPECT_NE(nullptr, blockedCommandsData); EXPECT_NE(nullptr, blockedCommandsData->commandStream); EXPECT_NE(nullptr, blockedCommandsData->dsh); EXPECT_NE(nullptr, blockedCommandsData->ioh); EXPECT_NE(nullptr, blockedCommandsData->ssh); } HWTEST_F(DispatchWalkerTest, GivenBlockedQueueWhenDispatchingWalkerThenRequiredHeaSizesAreTakenFromKernel) { MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {1, 1, 1}; size_t workGroupSize[3] = {2, 5, 10}; cl_uint dimensions = 1; auto blockedCommandsData = createBlockedCommandsData(*pCmdQ); DispatchInfo dispatchInfo(pClDevice, const_cast(&kernel), dimensions, workItems, workGroupSize, globalOffsets); dispatchInfo.setNumberOfWorkgroups({1, 1, 1}); dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1}); MultiDispatchInfo multiDispatchInfo(&kernel); multiDispatchInfo.push(dispatchInfo); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), blockedCommandsData.get(), nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); Vec3 localWorkgroupSize(workGroupSize); auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(kernel); auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(kernel, Math::computeTotalElementsCount(localWorkgroupSize)); auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(kernel); EXPECT_LE(expectedSizeDSH, blockedCommandsData->dsh->getMaxAvailableSpace()); EXPECT_LE(expectedSizeIOH, blockedCommandsData->ioh->getMaxAvailableSpace()); EXPECT_LE(expectedSizeSSH, blockedCommandsData->ssh->getMaxAvailableSpace()); } HWTEST_F(DispatchWalkerTest, givenBlockedEnqueueWhenObtainingCommandStreamThenAllocateEnoughSpaceAndBlockedKernelData) { DispatchInfo dispatchInfo; MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); std::unique_ptr blockedKernelData; MockCommandQueueHw mockCmdQ(nullptr, pClDevice, nullptr); auto expectedSizeCSAllocation = MemoryConstants::pageSize64k; auto expectedSizeCS = MemoryConstants::pageSize64k - CSRequirements::csOverfetchSize; CsrDependencies csrDependencies; EventsRequest eventsRequest(0, nullptr, nullptr); auto cmdStream = mockCmdQ.template obtainCommandStream(csrDependencies, false, true, multiDispatchInfo, eventsRequest, blockedKernelData, nullptr, 0u, false); EXPECT_EQ(expectedSizeCS, cmdStream->getMaxAvailableSpace()); EXPECT_EQ(expectedSizeCSAllocation, cmdStream->getGraphicsAllocation()->getUnderlyingBufferSize()); EXPECT_NE(nullptr, blockedKernelData); EXPECT_EQ(cmdStream, blockedKernelData->commandStream.get()); } HWTEST_F(DispatchWalkerTest, GivenBlockedQueueWhenDispatchingWalkerThenRequiredHeapSizesAreTakenFromMdi) { MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); MockMultiDispatchInfo multiDispatchInfo(pClDevice, &kernel); auto blockedCommandsData = createBlockedCommandsData(*pCmdQ); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), blockedCommandsData.get(), nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); auto expectedSizeDSH = HardwareCommandsHelper::getTotalSizeRequiredDSH(multiDispatchInfo); auto expectedSizeIOH = HardwareCommandsHelper::getTotalSizeRequiredIOH(multiDispatchInfo); auto expectedSizeSSH = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo); EXPECT_LE(expectedSizeDSH, blockedCommandsData->dsh->getMaxAvailableSpace()); EXPECT_LE(expectedSizeIOH, blockedCommandsData->ioh->getMaxAvailableSpace()); EXPECT_LE(expectedSizeSSH, blockedCommandsData->ssh->getMaxAvailableSpace()); } HWTEST_F(DispatchWalkerTest, givenBlockedQueueWhenDispatchWalkerIsCalledThenCommandStreamHasGpuAddress) { MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); MockMultiDispatchInfo multiDispatchInfo(pClDevice, &kernel); auto blockedCommandsData = createBlockedCommandsData(*pCmdQ); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), blockedCommandsData.get(), nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); EXPECT_NE(nullptr, blockedCommandsData->commandStream->getGraphicsAllocation()); EXPECT_NE(0ull, blockedCommandsData->commandStream->getGraphicsAllocation()->getGpuAddress()); } HWTEST_F(DispatchWalkerTest, givenThereAreAllocationsForReuseWhenDispatchWalkerIsCalledThenCommandStreamObtainsReusableAllocation) { MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); MockMultiDispatchInfo multiDispatchInfo(pClDevice, &kernel); auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto allocation = csr.getMemoryManager()->allocateGraphicsMemoryWithProperties({csr.getRootDeviceIndex(), MemoryConstants::pageSize64k + CSRequirements::csOverfetchSize, AllocationType::COMMAND_BUFFER, csr.getOsContext().getDeviceBitfield()}); csr.getInternalAllocationStorage()->storeAllocation(std::unique_ptr{allocation}, REUSABLE_ALLOCATION); ASSERT_FALSE(csr.getInternalAllocationStorage()->getAllocationsForReuse().peekIsEmpty()); auto blockedCommandsData = createBlockedCommandsData(*pCmdQ); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), blockedCommandsData.get(), nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); EXPECT_TRUE(csr.getInternalAllocationStorage()->getAllocationsForReuse().peekIsEmpty()); EXPECT_EQ(allocation, blockedCommandsData->commandStream->getGraphicsAllocation()); } HWTEST_F(DispatchWalkerTest, GivenMultipleKernelsWhenDispatchingWalkerThenWorkDimensionsAreCorrect) { kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.workDim = 0; MockKernel kernel1(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel1.initialize()); MockKernel kernel2(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel2.initialize()); MockMultiDispatchInfo multiDispatchInfo(pClDevice, std::vector({&kernel1, &kernel2})); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); for (auto &dispatchInfo : multiDispatchInfo) { auto &kernel = static_cast(*dispatchInfo.getKernel()); EXPECT_EQ(dispatchInfo.getDim(), *kernel.getWorkDim()); } } HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, GivenMultipleKernelsWhenDispatchingWalkerThenInterfaceDescriptorsAreProgrammedCorrectly) { using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; auto memoryManager = this->pDevice->getMemoryManager(); auto kernelIsaAllocation = memoryManager->allocateGraphicsMemoryWithProperties({pDevice->getRootDeviceIndex(), MemoryConstants::pageSize, AllocationType::KERNEL_ISA, pDevice->getDeviceBitfield()}); auto kernelIsaWithSamplerAllocation = memoryManager->allocateGraphicsMemoryWithProperties({pDevice->getRootDeviceIndex(), MemoryConstants::pageSize, AllocationType::KERNEL_ISA, pDevice->getDeviceBitfield()}); kernelInfo.kernelAllocation = kernelIsaAllocation; kernelInfoWithSampler.kernelAllocation = kernelIsaWithSamplerAllocation; auto gpuAddress1 = kernelIsaAllocation->getGpuAddressToPatch(); auto gpuAddress2 = kernelIsaWithSamplerAllocation->getGpuAddressToPatch(); MockKernel kernel1(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel1.initialize()); MockKernel kernel2(program.get(), kernelInfoWithSampler, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel2.initialize()); MockMultiDispatchInfo multiDispatchInfo(pClDevice, std::vector({&kernel1, &kernel2})); // create Indirect DSH heap auto &indirectHeap = pCmdQ->getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 8192); indirectHeap.align(EncodeStates::alignInterfaceDescriptorData); auto dshBeforeMultiDisptach = indirectHeap.getUsed(); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); auto dshAfterMultiDisptach = indirectHeap.getUsed(); auto numberOfDispatches = multiDispatchInfo.size(); auto interfaceDesriptorTableSize = numberOfDispatches * sizeof(INTERFACE_DESCRIPTOR_DATA); EXPECT_LE(dshBeforeMultiDisptach + interfaceDesriptorTableSize, dshAfterMultiDisptach); INTERFACE_DESCRIPTOR_DATA *pID = reinterpret_cast(ptrOffset(indirectHeap.getCpuBase(), dshBeforeMultiDisptach)); for (uint32_t index = 0; index < multiDispatchInfo.size(); index++) { uint32_t addressLow = pID[index].getKernelStartPointer(); uint32_t addressHigh = pID[index].getKernelStartPointerHigh(); uint64_t fullAddress = ((uint64_t)addressHigh << 32) | addressLow; if (index > 0) { uint32_t addressLowOfPrevious = pID[index - 1].getKernelStartPointer(); uint32_t addressHighOfPrevious = pID[index - 1].getKernelStartPointerHigh(); uint64_t addressPrevious = ((uint64_t)addressHighOfPrevious << 32) | addressLowOfPrevious; uint64_t address = ((uint64_t)addressHigh << 32) | addressLow; EXPECT_NE(addressPrevious, address); } if (index == 0) { auto samplerPointer = pID[index].getSamplerStatePointer(); auto samplerCount = pID[index].getSamplerCount(); EXPECT_EQ(0u, samplerPointer); EXPECT_EQ(0u, samplerCount); EXPECT_EQ(fullAddress, gpuAddress1); } if (index == 1) { auto samplerPointer = pID[index].getSamplerStatePointer(); auto samplerCount = pID[index].getSamplerCount(); EXPECT_NE(0u, samplerPointer); if (EncodeSurfaceState::doBindingTablePrefetch()) { EXPECT_EQ(1u, samplerCount); } else { EXPECT_EQ(0u, samplerCount); } EXPECT_EQ(fullAddress, gpuAddress2); } } HardwareParse hwParser; auto &cmdStream = pCmdQ->getCS(0); hwParser.parseCommands(cmdStream, 0); hwParser.findHardwareCommands(); auto cmd = hwParser.getCommand(); EXPECT_NE(nullptr, cmd); auto IDStartAddress = cmd->getInterfaceDescriptorDataStartAddress(); auto IDSize = cmd->getInterfaceDescriptorTotalLength(); EXPECT_EQ(dshBeforeMultiDisptach, IDStartAddress); EXPECT_EQ(interfaceDesriptorTableSize, IDSize); memoryManager->freeGraphicsMemory(kernelIsaAllocation); memoryManager->freeGraphicsMemory(kernelIsaWithSamplerAllocation); } HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, GivenMultipleKernelsWhenDispatchingWalkerThenGpgpuWalkerIdOffsetIsProgrammedCorrectly) { using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; MockKernel kernel1(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel1.initialize()); MockKernel kernel2(program.get(), kernelInfoWithSampler, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel2.initialize()); MockMultiDispatchInfo multiDispatchInfo(pClDevice, std::vector({&kernel1, &kernel2})); // create commandStream auto &cmdStream = pCmdQ->getCS(0); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); HardwareParse hwParser; hwParser.parseCommands(cmdStream, 0); hwParser.findHardwareCommands(); auto walkerItor = hwParser.itorWalker; ASSERT_NE(hwParser.cmdList.end(), walkerItor); for (uint32_t index = 0; index < multiDispatchInfo.size(); index++) { ASSERT_NE(hwParser.cmdList.end(), walkerItor); auto *gpgpuWalker = (GPGPU_WALKER *)*walkerItor; auto IDIndex = gpgpuWalker->getInterfaceDescriptorOffset(); EXPECT_EQ(index, IDIndex); // move walker iterator walkerItor++; walkerItor = find(walkerItor, hwParser.cmdList.end()); } } HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, GivenMultipleKernelsWhenDispatchingWalkerThenThreadGroupIdStartingCoordinatesAreProgrammedCorrectly) { using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; MockKernel kernel1(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel1.initialize()); MockKernel kernel2(program.get(), kernelInfoWithSampler, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel2.initialize()); MockMultiDispatchInfo multiDispatchInfo(pClDevice, std::vector({&kernel1, &kernel2})); // create commandStream auto &cmdStream = pCmdQ->getCS(0); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); HardwareParse hwParser; hwParser.parseCommands(cmdStream, 0); hwParser.findHardwareCommands(); auto walkerItor = hwParser.itorWalker; ASSERT_NE(hwParser.cmdList.end(), walkerItor); for (uint32_t index = 0; index < multiDispatchInfo.size(); index++) { ASSERT_NE(hwParser.cmdList.end(), walkerItor); auto *gpgpuWalker = (GPGPU_WALKER *)*walkerItor; auto coordinateX = gpgpuWalker->getThreadGroupIdStartingX(); EXPECT_EQ(coordinateX, 0u); auto coordinateY = gpgpuWalker->getThreadGroupIdStartingY(); EXPECT_EQ(coordinateY, 0u); auto coordinateZ = gpgpuWalker->getThreadGroupIdStartingResumeZ(); EXPECT_EQ(coordinateZ, 0u); // move walker iterator walkerItor++; walkerItor = find(walkerItor, hwParser.cmdList.end()); } } HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, GivenMultipleDispatchInfoAndSameKernelWhenDispatchingWalkerThenGpgpuWalkerThreadGroupIdStartingCoordinatesAreCorrectlyProgrammed) { using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); DispatchInfo di1(pClDevice, &kernel, 1, {100, 1, 1}, {10, 1, 1}, {0, 0, 0}, {100, 1, 1}, {10, 1, 1}, {10, 1, 1}, {10, 1, 1}, {0, 0, 0}); DispatchInfo di2(pClDevice, &kernel, 1, {100, 1, 1}, {10, 1, 1}, {0, 0, 0}, {100, 1, 1}, {10, 1, 1}, {10, 1, 1}, {10, 1, 1}, {10, 0, 0}); MockMultiDispatchInfo multiDispatchInfo(std::vector({&di1, &di2})); // create commandStream auto &cmdStream = pCmdQ->getCS(0); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); HardwareParse hwParser; hwParser.parseCommands(cmdStream, 0); hwParser.findHardwareCommands(); auto walkerItor = hwParser.itorWalker; ASSERT_NE(hwParser.cmdList.end(), walkerItor); for (uint32_t index = 0; index < multiDispatchInfo.size(); index++) { ASSERT_NE(hwParser.cmdList.end(), walkerItor); auto *gpgpuWalker = (GPGPU_WALKER *)*walkerItor; auto coordinateX = gpgpuWalker->getThreadGroupIdStartingX(); EXPECT_EQ(coordinateX, index * 10u); auto coordinateY = gpgpuWalker->getThreadGroupIdStartingY(); EXPECT_EQ(coordinateY, 0u); auto coordinateZ = gpgpuWalker->getThreadGroupIdStartingResumeZ(); EXPECT_EQ(coordinateZ, 0u); // move walker iterator walkerItor++; walkerItor = find(walkerItor, hwParser.cmdList.end()); } } HWTEST_F(DispatchWalkerTest, GivenCacheFlushAfterWalkerDisabledWhenAllocationRequiresCacheFlushThenFlushCommandNotPresentAfterWalker) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(0); MockKernel kernel1(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel1.initialize()); kernel1.kernelArgRequiresCacheFlush.resize(1); MockGraphicsAllocation cacheRequiringAllocation; kernel1.kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; MockMultiDispatchInfo multiDispatchInfo(pClDevice, std::vector({&kernel1})); // create commandStream auto &cmdStream = pCmdQ->getCS(0); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); HardwareParse hwParse; hwParse.parseCommands(cmdStream); PIPE_CONTROL *pipeControl = hwParse.getCommand(); EXPECT_EQ(nullptr, pipeControl); } HWTEST_F(DispatchWalkerTest, GivenCacheFlushAfterWalkerEnabledWhenWalkerWithTwoKernelsThenFlushCommandPresentOnce) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); MockKernel kernel1(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel1.initialize()); MockKernel kernel2(program.get(), kernelInfoWithSampler, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel2.initialize()); kernel1.kernelArgRequiresCacheFlush.resize(1); kernel2.kernelArgRequiresCacheFlush.resize(1); MockGraphicsAllocation cacheRequiringAllocation; kernel1.kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; kernel2.kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; MockMultiDispatchInfo multiDispatchInfo(pClDevice, std::vector({&kernel1, &kernel2})); // create commandStream auto &cmdStream = pCmdQ->getCS(0); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); HardwareParse hwParse; hwParse.parseCommands(cmdStream); uint32_t pipeControlCount = hwParse.getCommandCount(); EXPECT_EQ(pipeControlCount, 1u); } HWTEST_F(DispatchWalkerTest, GivenCacheFlushAfterWalkerEnabledWhenTwoWalkersForQueueThenFlushCommandPresentTwice) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); MockKernel kernel1(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel1.initialize()); MockKernel kernel2(program.get(), kernelInfoWithSampler, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel2.initialize()); kernel1.kernelArgRequiresCacheFlush.resize(1); kernel2.kernelArgRequiresCacheFlush.resize(1); MockGraphicsAllocation cacheRequiringAllocation; kernel1.kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; kernel2.kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; MockMultiDispatchInfo multiDispatchInfo1(pClDevice, std::vector({&kernel1})); MockMultiDispatchInfo multiDispatchInfo2(pClDevice, std::vector({&kernel2})); // create commandStream auto &cmdStream = pCmdQ->getCS(0); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo1, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo2, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); HardwareParse hwParse; hwParse.parseCommands(cmdStream); uint32_t pipeControlCount = hwParse.getCommandCount(); EXPECT_EQ(pipeControlCount, 2u); } TEST(DispatchWalker, WhenCalculatingDispatchDimensionsThenCorrectValuesAreReturned) { Vec3 dim0{0, 0, 0}; Vec3 dim1{2, 1, 1}; Vec3 dim2{2, 2, 1}; Vec3 dim3{2, 2, 2}; Vec3 dispatches[] = {dim0, dim1, dim2, dim3}; uint32_t testDims[] = {0, 1, 2, 3}; for (const auto &lhs : testDims) { for (const auto &rhs : testDims) { uint32_t dimTest = calculateDispatchDim(dispatches[lhs], dispatches[rhs]); uint32_t dimRef = std::max(1U, std::max(lhs, rhs)); EXPECT_EQ(dimRef, dimTest); } } } HWTEST_P(DispatchWalkerTestForAuxTranslation, givenKernelWhenAuxToNonAuxWhenTranslationRequiredThenPipeControlWithStallAndDCFlushAdded) { BuiltinDispatchInfoBuilder &baseBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pClDevice); auto &builder = static_cast &>(baseBuilder); MockKernel kernel(program.get(), kernelInfo, *pClDevice); kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.workDim = 0; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); auto &cmdStream = pCmdQ->getCS(0); void *buffer = cmdStream.getCpuBase(); kernel.auxTranslationRequired = true; MockKernelObjForAuxTranslation mockKernelObj1(kernelObjType); MockKernelObjForAuxTranslation mockKernelObj2(kernelObjType); MultiDispatchInfo multiDispatchInfo; KernelObjsForAuxTranslation kernelObjsForAuxTranslation; multiDispatchInfo.setKernelObjsForAuxTranslation(kernelObjsForAuxTranslation); kernelObjsForAuxTranslation.insert(mockKernelObj1); kernelObjsForAuxTranslation.insert(mockKernelObj2); BuiltinOpParams builtinOpsParams; builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::AuxToNonAux; builder.buildDispatchInfosForAuxTranslation(multiDispatchInfo, builtinOpsParams); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); auto sizeUsed = cmdStream.getUsed(); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, buffer, sizeUsed)); auto pipeControls = findAll(cmdList.begin(), cmdList.end()); ASSERT_EQ(2u, pipeControls.size()); auto beginPipeControl = genCmdCast(*(pipeControls[0])); EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), beginPipeControl->getDcFlushEnable()); EXPECT_TRUE(beginPipeControl->getCommandStreamerStallEnable()); auto endPipeControl = genCmdCast(*(pipeControls[1])); bool dcFlushRequired = (pClDevice->getHardwareInfo().platform.eRenderCoreFamily == IGFX_GEN8_CORE); EXPECT_EQ(dcFlushRequired, endPipeControl->getDcFlushEnable()); EXPECT_TRUE(endPipeControl->getCommandStreamerStallEnable()); } HWTEST_P(DispatchWalkerTestForAuxTranslation, givenKernelWhenNonAuxToAuxWhenTranslationRequiredThenPipeControlWithStallAdded) { BuiltinDispatchInfoBuilder &baseBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pClDevice); auto &builder = static_cast &>(baseBuilder); MockKernel kernel(program.get(), kernelInfo, *pClDevice); kernelInfo.kernelDescriptor.payloadMappings.dispatchTraits.workDim = 0; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); auto &cmdStream = pCmdQ->getCS(0); void *buffer = cmdStream.getCpuBase(); kernel.auxTranslationRequired = true; MockKernelObjForAuxTranslation mockKernelObj1(kernelObjType); MockKernelObjForAuxTranslation mockKernelObj2(kernelObjType); MultiDispatchInfo multiDispatchInfo; KernelObjsForAuxTranslation kernelObjsForAuxTranslation; multiDispatchInfo.setKernelObjsForAuxTranslation(kernelObjsForAuxTranslation); kernelObjsForAuxTranslation.insert(mockKernelObj1); kernelObjsForAuxTranslation.insert(mockKernelObj2); BuiltinOpParams builtinOpsParams; builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::NonAuxToAux; builder.buildDispatchInfosForAuxTranslation(multiDispatchInfo, builtinOpsParams); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); auto sizeUsed = cmdStream.getUsed(); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, buffer, sizeUsed)); auto pipeControls = findAll(cmdList.begin(), cmdList.end()); ASSERT_EQ(2u, pipeControls.size()); bool dcFlushRequired = (pClDevice->getHardwareInfo().platform.eRenderCoreFamily == IGFX_GEN8_CORE); auto beginPipeControl = genCmdCast(*(pipeControls[0])); EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), beginPipeControl->getDcFlushEnable()); EXPECT_TRUE(beginPipeControl->getCommandStreamerStallEnable()); auto endPipeControl = genCmdCast(*(pipeControls[1])); EXPECT_EQ(dcFlushRequired, endPipeControl->getDcFlushEnable()); EXPECT_TRUE(endPipeControl->getCommandStreamerStallEnable()); } struct ProfilingCommandsTest : public DispatchWalkerTest, ::testing::WithParamInterface { void SetUp() override { DispatchWalkerTest::SetUp(); } void TearDown() override { DispatchWalkerTest::TearDown(); } }; HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingCommandsTest, givenKernelWhenProfilingCommandStartIsTakenThenTimeStampAddressIsProgrammedCorrectly) { using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; auto &cmdStream = pCmdQ->getCS(0); MockTagAllocator timeStampAllocator(pDevice->getRootDeviceIndex(), this->pDevice->getMemoryManager(), 10, MemoryConstants::cacheLineSize, sizeof(HwTimeStamps), false, pDevice->getDeviceBitfield()); auto hwTimeStamp1 = timeStampAllocator.getTag(); ASSERT_NE(nullptr, hwTimeStamp1); GpgpuWalkerHelper::dispatchProfilingCommandsStart(*hwTimeStamp1, &cmdStream, pDevice->getHardwareInfo()); auto hwTimeStamp2 = timeStampAllocator.getTag(); ASSERT_NE(nullptr, hwTimeStamp2); GpgpuWalkerHelper::dispatchProfilingCommandsStart(*hwTimeStamp2, &cmdStream, pDevice->getHardwareInfo()); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, cmdStream.getCpuBase(), cmdStream.getUsed())); auto itorStoreReg = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itorStoreReg); auto storeReg = genCmdCast(*itorStoreReg); ASSERT_NE(nullptr, storeReg); uint64_t gpuAddress = storeReg->getMemoryAddress(); auto contextTimestampFieldOffset = offsetof(HwTimeStamps, ContextStartTS); uint64_t expectedAddress = hwTimeStamp1->getGpuAddress() + contextTimestampFieldOffset; EXPECT_EQ(expectedAddress, gpuAddress); itorStoreReg++; itorStoreReg = find(itorStoreReg, cmdList.end()); ASSERT_NE(cmdList.end(), itorStoreReg); storeReg = genCmdCast(*itorStoreReg); ASSERT_NE(nullptr, storeReg); gpuAddress = storeReg->getMemoryAddress(); expectedAddress = hwTimeStamp2->getGpuAddress() + contextTimestampFieldOffset; EXPECT_EQ(expectedAddress, gpuAddress); auto itorPipeCtrl = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itorPipeCtrl); if (MemorySynchronizationCommands::isPipeControlWArequired(pDevice->getHardwareInfo())) { itorPipeCtrl++; } if (UnitTestHelper::isAdditionalSynchronizationRequired()) { itorPipeCtrl++; } auto pipeControl = genCmdCast(*itorPipeCtrl); ASSERT_NE(nullptr, pipeControl); gpuAddress = NEO::UnitTestHelper::getPipeControlPostSyncAddress(*pipeControl); expectedAddress = hwTimeStamp1->getGpuAddress() + offsetof(HwTimeStamps, GlobalStartTS); EXPECT_EQ(expectedAddress, gpuAddress); itorPipeCtrl++; itorPipeCtrl = find(itorPipeCtrl, cmdList.end()); if (MemorySynchronizationCommands::isPipeControlWArequired(pDevice->getHardwareInfo())) { itorPipeCtrl++; } if (UnitTestHelper::isAdditionalSynchronizationRequired()) { itorPipeCtrl++; } ASSERT_NE(cmdList.end(), itorPipeCtrl); pipeControl = genCmdCast(*itorPipeCtrl); ASSERT_NE(nullptr, pipeControl); gpuAddress = NEO::UnitTestHelper::getPipeControlPostSyncAddress(*pipeControl); expectedAddress = hwTimeStamp2->getGpuAddress() + offsetof(HwTimeStamps, GlobalStartTS); EXPECT_EQ(expectedAddress, gpuAddress); } HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingCommandsTest, givenKernelWhenProfilingCommandStartIsNotTakenThenTimeStampAddressIsProgrammedCorrectly) { using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; auto &cmdStream = pCmdQ->getCS(0); MockTagAllocator timeStampAllocator(pDevice->getRootDeviceIndex(), this->pDevice->getMemoryManager(), 10, MemoryConstants::cacheLineSize, sizeof(HwTimeStamps), false, pDevice->getDeviceBitfield()); auto hwTimeStamp1 = timeStampAllocator.getTag(); ASSERT_NE(nullptr, hwTimeStamp1); GpgpuWalkerHelper::dispatchProfilingCommandsEnd(*hwTimeStamp1, &cmdStream, pDevice->getHardwareInfo()); auto hwTimeStamp2 = timeStampAllocator.getTag(); ASSERT_NE(nullptr, hwTimeStamp2); GpgpuWalkerHelper::dispatchProfilingCommandsEnd(*hwTimeStamp2, &cmdStream, pDevice->getHardwareInfo()); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, cmdStream.getCpuBase(), cmdStream.getUsed())); auto itorStoreReg = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itorStoreReg); auto storeReg = genCmdCast(*itorStoreReg); ASSERT_NE(nullptr, storeReg); uint64_t gpuAddress = storeReg->getMemoryAddress(); auto contextTimestampFieldOffset = offsetof(HwTimeStamps, ContextEndTS); uint64_t expectedAddress = hwTimeStamp1->getGpuAddress() + contextTimestampFieldOffset; EXPECT_EQ(expectedAddress, gpuAddress); itorStoreReg++; itorStoreReg = find(itorStoreReg, cmdList.end()); ASSERT_NE(cmdList.end(), itorStoreReg); storeReg = genCmdCast(*itorStoreReg); ASSERT_NE(nullptr, storeReg); gpuAddress = storeReg->getMemoryAddress(); expectedAddress = hwTimeStamp2->getGpuAddress() + contextTimestampFieldOffset; EXPECT_EQ(expectedAddress, gpuAddress); } HWTEST_F(DispatchWalkerTest, WhenKernelRequiresImplicitArgsThenIohRequiresMoreSpace) { size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {1, 1, 1}; size_t workGroupSize[3] = {2, 5, 10}; cl_uint dimensions = 1; Vec3 localWorkgroupSize(workGroupSize); auto blockedCommandsData = createBlockedCommandsData(*pCmdQ); kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 1u; kernelInfo.kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs = false; MockKernel kernelWithoutImplicitArgs(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernelWithoutImplicitArgs.initialize()); UnitTestHelper::adjustKernelDescriptorForImplicitArgs(kernelInfo.kernelDescriptor); MockKernel kernelWithImplicitArgs(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernelWithImplicitArgs.initialize()); DispatchInfo dispatchInfoWithoutImplicitArgs(pClDevice, const_cast(&kernelWithoutImplicitArgs), dimensions, workItems, workGroupSize, globalOffsets); dispatchInfoWithoutImplicitArgs.setNumberOfWorkgroups({1, 1, 1}); dispatchInfoWithoutImplicitArgs.setTotalNumberOfWorkgroups({1, 1, 1}); MultiDispatchInfo multiDispatchInfoWithoutImplicitArgs(&kernelWithoutImplicitArgs); multiDispatchInfoWithoutImplicitArgs.push(dispatchInfoWithoutImplicitArgs); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfoWithoutImplicitArgs, CsrDependencies(), blockedCommandsData.get(), nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); auto iohSizeWithoutImplicitArgs = HardwareCommandsHelper::getSizeRequiredIOH(kernelWithoutImplicitArgs, Math::computeTotalElementsCount(localWorkgroupSize)); DispatchInfo dispatchInfoWithImplicitArgs(pClDevice, const_cast(&kernelWithImplicitArgs), dimensions, workItems, workGroupSize, globalOffsets); dispatchInfoWithImplicitArgs.setNumberOfWorkgroups({1, 1, 1}); dispatchInfoWithImplicitArgs.setTotalNumberOfWorkgroups({1, 1, 1}); MultiDispatchInfo multiDispatchInfoWithImplicitArgs(&kernelWithoutImplicitArgs); multiDispatchInfoWithImplicitArgs.push(dispatchInfoWithImplicitArgs); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfoWithImplicitArgs, CsrDependencies(), blockedCommandsData.get(), nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); auto iohSizeWithImplicitArgs = HardwareCommandsHelper::getSizeRequiredIOH(kernelWithImplicitArgs, Math::computeTotalElementsCount(localWorkgroupSize)); EXPECT_LE(iohSizeWithoutImplicitArgs, iohSizeWithImplicitArgs); { auto numChannels = kernelInfo.kernelDescriptor.kernelAttributes.numLocalIdChannels; auto simdSize = kernelInfo.getMaxSimdSize(); uint32_t grfSize = sizeof(typename FamilyType::GRF); auto size = kernelWithImplicitArgs.getCrossThreadDataSize() + HardwareCommandsHelper::getPerThreadDataSizeTotal(simdSize, grfSize, numChannels, Math::computeTotalElementsCount(localWorkgroupSize)) + ImplicitArgsHelper::getSizeForImplicitArgsPatching(kernelWithImplicitArgs.getImplicitArgs(), kernelWithImplicitArgs.getDescriptor(), *defaultHwInfo); size = alignUp(size, MemoryConstants::cacheLineSize); EXPECT_EQ(size, iohSizeWithImplicitArgs); } } dispatch_walker_tests_dg2_and_later.cpp000066400000000000000000000222051422164147700342320ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/linear_stream.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/utilities/tag_allocator.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/fixtures/linear_stream_fixture.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/mocks/mock_device.h" #include "opencl/source/command_queue/hardware_interface.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_mdi.h" using namespace NEO; struct Dg2AndLaterDispatchWalkerBasicFixture : public LinearStreamFixture { void SetUp() override { LinearStreamFixture::SetUp(); memset(globalOffsets, 0, sizeof(globalOffsets)); memset(startWorkGroups, 0, sizeof(startWorkGroups)); memset(&threadPayload, 0, sizeof(threadPayload)); localWorkSizesIn[0] = 16; localWorkSizesIn[1] = localWorkSizesIn[2] = 1; numWorkGroups[0] = numWorkGroups[1] = numWorkGroups[2] = 1; simd = 16; device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get(), rootDeviceIndex)); context = std::make_unique(device.get()); kernel = std::make_unique(*device, context.get()); sizeGrf = device->getHardwareInfo().capabilityTable.grfSize; sizeGrfDwords = sizeGrf / sizeof(uint32_t); for (uint32_t i = 0; i < sizeGrfDwords; i++) { crossThreadDataGrf[i] = i; crossThreadDataTwoGrf[i] = i + 2; } for (uint32_t i = sizeGrfDwords; i < sizeGrfDwords * 2; i++) { crossThreadDataTwoGrf[i] = i + 2; } } size_t globalOffsets[3]; size_t startWorkGroups[3]; size_t numWorkGroups[3]; size_t localWorkSizesIn[3]; uint32_t simd; uint32_t sizeGrf; uint32_t sizeInlineData; uint32_t sizeGrfDwords; uint32_t crossThreadDataGrf[16]; uint32_t crossThreadDataTwoGrf[32]; iOpenCL::SPatchThreadPayload threadPayload; const uint32_t rootDeviceIndex = 1u; std::unique_ptr device; std::unique_ptr context; std::unique_ptr kernel; }; using WalkerDispatchTestDg2AndLater = ::testing::Test; using Dg2AndLaterDispatchWalkerBasicTest = Test; using matcherDG2AndLater = IsAtLeastXeHpgCore; HWTEST2_F(WalkerDispatchTestDg2AndLater, whenProgramComputeWalkerThenApplyL3WAForSpecificPlatformAndRevision, matcherDG2AndLater) { using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; auto walkerCmd = FamilyType::cmdInitGpgpuWalker; auto hwInfo = *defaultHwInfo; const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily); { hwInfo.platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_B, hwInfo); EncodeDispatchKernel::encodeAdditionalWalkerFields(hwInfo, walkerCmd, KernelExecutionType::Default); EXPECT_FALSE(walkerCmd.getL3PrefetchDisable()); } { hwInfo.platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_A0, hwInfo); EncodeDispatchKernel::encodeAdditionalWalkerFields(hwInfo, walkerCmd, KernelExecutionType::Default); if (hwInfo.platform.eProductFamily == IGFX_DG2) { EXPECT_TRUE(walkerCmd.getL3PrefetchDisable()); } else { EXPECT_FALSE(walkerCmd.getL3PrefetchDisable()); } } } HWTEST2_F(WalkerDispatchTestDg2AndLater, givenDebugVariableSetWhenProgramComputeWalkerThenApplyL3PrefetchAppropriately, matcherDG2AndLater) { using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; DebugManagerStateRestore restore; auto walkerCmd = FamilyType::cmdInitGpgpuWalker; auto hwInfo = *defaultHwInfo; for (auto forceL3PrefetchForComputeWalker : {false, true}) { DebugManager.flags.ForceL3PrefetchForComputeWalker.set(forceL3PrefetchForComputeWalker); EncodeDispatchKernel::encodeAdditionalWalkerFields(hwInfo, walkerCmd, KernelExecutionType::Default); EXPECT_EQ(!forceL3PrefetchForComputeWalker, walkerCmd.getL3PrefetchDisable()); } } HWTEST2_F(Dg2AndLaterDispatchWalkerBasicTest, givenTimestampPacketWhenDispatchingThenProgramPostSyncData, matcherDG2AndLater) { using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; MockKernelWithInternals kernel1(*device); MockKernelWithInternals kernel2(*device); device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; TimestampPacketContainer timestampPacketContainer; timestampPacketContainer.add(device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag()); timestampPacketContainer.add(device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag()); MockMultiDispatchInfo multiDispatchInfo(device.get(), std::vector({kernel1.mockKernel, kernel2.mockKernel})); MockCommandQueue cmdQ(context.get(), device.get(), nullptr, false); auto &cmdStream = cmdQ.getCS(0); HardwareInterface::dispatchWalker( cmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, ×tampPacketContainer, false); HardwareParse hwParser; hwParser.parseCommands(cmdStream, 0); hwParser.findHardwareCommands(); EXPECT_NE(hwParser.itorWalker, hwParser.cmdList.end()); auto gmmHelper = device->getGmmHelper(); auto expectedMocs = MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo) ? gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) : gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); auto walker = genCmdCast(*hwParser.itorWalker); EXPECT_EQ(FamilyType::POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walker->getPostSync().getOperation()); EXPECT_TRUE(walker->getPostSync().getDataportPipelineFlush()); EXPECT_TRUE(walker->getPostSync().getDataportSubsliceCacheFlush()); EXPECT_EQ(expectedMocs, walker->getPostSync().getMocs()); auto contextStartAddress = TimestampPacketHelper::getContextStartGpuAddress(*timestampPacketContainer.peekNodes()[0]); EXPECT_EQ(contextStartAddress, walker->getPostSync().getDestinationAddress()); auto secondWalkerItor = find(++hwParser.itorWalker, hwParser.cmdList.end()); auto secondWalker = genCmdCast(*secondWalkerItor); EXPECT_EQ(FamilyType::POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, secondWalker->getPostSync().getOperation()); EXPECT_TRUE(secondWalker->getPostSync().getDataportPipelineFlush()); EXPECT_TRUE(secondWalker->getPostSync().getDataportSubsliceCacheFlush()); EXPECT_EQ(expectedMocs, walker->getPostSync().getMocs()); contextStartAddress = TimestampPacketHelper::getContextStartGpuAddress(*timestampPacketContainer.peekNodes()[1]); EXPECT_EQ(contextStartAddress, secondWalker->getPostSync().getDestinationAddress()); } HWTEST2_F(Dg2AndLaterDispatchWalkerBasicTest, givenDebugVariableEnabledWhenEnqueueingThenWriteWalkerStamp, matcherDG2AndLater) { using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; DebugManagerStateRestore restore; DebugManager.flags.EnableTimestampPacket.set(true); auto testDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockContext testContext(testDevice.get()); auto cmdQ = std::make_unique>(&testContext, testDevice.get(), nullptr); MockKernelWithInternals testKernel(*testDevice, &testContext); size_t gws[] = {1, 1, 1}; cmdQ->enqueueKernel(testKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_NE(nullptr, cmdQ->timestampPacketContainer.get()); HardwareParse hwParser; hwParser.parseCommands(cmdQ->getCS(0), 0); hwParser.findHardwareCommands(); EXPECT_NE(hwParser.itorWalker, hwParser.cmdList.end()); auto walker = genCmdCast(*hwParser.itorWalker); auto gmmHelper = device->getGmmHelper(); auto expectedMocs = MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo) ? gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) : gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); auto &postSyncData = walker->getPostSync(); EXPECT_EQ(FamilyType::POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, postSyncData.getOperation()); EXPECT_TRUE(postSyncData.getDataportPipelineFlush()); EXPECT_TRUE(postSyncData.getDataportSubsliceCacheFlush()); EXPECT_EQ(expectedMocs, postSyncData.getMocs()); } dispatch_walker_tests_xehp_and_later.cpp000066400000000000000000002766461422164147700345460ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/implicit_scaling.h" #include "shared/source/command_container/walker_partition_xehp_and_later.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/indirect_heap/indirect_heap.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/utilities/tag_allocator.h" #include "shared/test/common/fixtures/linear_stream_fixture.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/mocks/mock_timestamp_container.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/command_queue/hardware_interface.h" #include "opencl/source/helpers/dispatch_info_builder.h" #include "opencl/source/kernel/kernel.h" #include "opencl/test/unit_test/helpers/cl_hw_parse.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_mdi.h" #include "opencl/test/unit_test/mocks/mock_platform.h" using namespace NEO; using WalkerDispatchTest = ::testing::Test; struct XeHPAndLaterDispatchWalkerBasicFixture : public LinearStreamFixture { void SetUp() override { LinearStreamFixture::SetUp(); memset(globalOffsets, 0, sizeof(globalOffsets)); memset(startWorkGroups, 0, sizeof(startWorkGroups)); localWorkSizesIn[0] = 16; localWorkSizesIn[1] = localWorkSizesIn[2] = 1; numWorkGroups[0] = numWorkGroups[1] = numWorkGroups[2] = 1; simd = 16; device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get(), rootDeviceIndex)); context = std::make_unique(device.get()); kernel = std::make_unique(*device, context.get()); sizeGrf = device->getHardwareInfo().capabilityTable.grfSize; sizeGrfDwords = sizeGrf / sizeof(uint32_t); for (uint32_t i = 0; i < sizeGrfDwords; i++) { crossThreadDataGrf[i] = i; crossThreadDataTwoGrf[i] = i + 2; } for (uint32_t i = sizeGrfDwords; i < sizeGrfDwords * 2; i++) { crossThreadDataTwoGrf[i] = i + 2; } } DebugManagerStateRestore restore; size_t globalOffsets[3]; size_t startWorkGroups[3]; size_t numWorkGroups[3]; size_t localWorkSizesIn[3]; uint32_t simd; uint32_t sizeGrf; uint32_t sizeInlineData; uint32_t sizeGrfDwords; uint32_t crossThreadDataGrf[16]; uint32_t crossThreadDataTwoGrf[32]; const uint32_t rootDeviceIndex = 1u; std::unique_ptr device; std::unique_ptr context; std::unique_ptr kernel; }; using XeHPAndLaterDispatchWalkerBasicTest = Test; HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, whenWorkDimOneThenLocalWorkSizeEqualsLocalXDim) { using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; COMPUTE_WALKER *computeWalker = static_cast(linearStream.getSpace(sizeof(COMPUTE_WALKER))); *computeWalker = FamilyType::cmdInitGpgpuWalker; auto localWorkSize = GpgpuWalkerHelper::setGpgpuWalkerThreadData( computeWalker, kernel->kernelInfo.kernelDescriptor, globalOffsets, startWorkGroups, numWorkGroups, localWorkSizesIn, simd, 3, true, false, 5u); EXPECT_EQ(localWorkSizesIn[0], localWorkSize); EXPECT_EQ(0u, computeWalker->getLocalXMaximum()); EXPECT_EQ(0u, computeWalker->getLocalYMaximum()); EXPECT_EQ(0u, computeWalker->getLocalZMaximum()); EXPECT_EQ(0u, computeWalker->getEmitLocalId()); EXPECT_EQ(0u, computeWalker->getGenerateLocalId()); EXPECT_EQ(0u, computeWalker->getEmitInlineParameter()); EXPECT_EQ(0u, computeWalker->getWalkOrder()); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, whenWorkDimTwoThenLocalWorkSizeEqualsProductLocalXandYDim) { using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; COMPUTE_WALKER *computeWalker = static_cast(linearStream.getSpace(sizeof(COMPUTE_WALKER))); *computeWalker = FamilyType::cmdInitGpgpuWalker; localWorkSizesIn[1] = 8; auto localWorkSize = GpgpuWalkerHelper::setGpgpuWalkerThreadData( computeWalker, kernel->kernelInfo.kernelDescriptor, globalOffsets, startWorkGroups, numWorkGroups, localWorkSizesIn, simd, 3, true, false, 0u); EXPECT_EQ(localWorkSizesIn[0] * localWorkSizesIn[1], localWorkSize); EXPECT_EQ(0u, computeWalker->getLocalXMaximum()); EXPECT_EQ(0u, computeWalker->getLocalYMaximum()); EXPECT_EQ(0u, computeWalker->getLocalZMaximum()); EXPECT_EQ(0u, computeWalker->getEmitLocalId()); EXPECT_EQ(0u, computeWalker->getGenerateLocalId()); EXPECT_EQ(0u, computeWalker->getEmitInlineParameter()); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, whenWorkDimThreeThenLocalWorkSizeEqualsProductLocalXandYandZDim) { using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; COMPUTE_WALKER *computeWalker = static_cast(linearStream.getSpace(sizeof(COMPUTE_WALKER))); *computeWalker = FamilyType::cmdInitGpgpuWalker; localWorkSizesIn[1] = 8; localWorkSizesIn[2] = 2; auto localWorkSize = GpgpuWalkerHelper::setGpgpuWalkerThreadData( computeWalker, kernel->kernelInfo.kernelDescriptor, globalOffsets, startWorkGroups, numWorkGroups, localWorkSizesIn, simd, 3, true, false, 0u); EXPECT_EQ(localWorkSizesIn[0] * localWorkSizesIn[1] * localWorkSizesIn[2], localWorkSize); EXPECT_EQ(0u, computeWalker->getLocalXMaximum()); EXPECT_EQ(0u, computeWalker->getLocalYMaximum()); EXPECT_EQ(0u, computeWalker->getLocalZMaximum()); EXPECT_EQ(0u, computeWalker->getEmitLocalId()); EXPECT_EQ(0u, computeWalker->getGenerateLocalId()); EXPECT_EQ(0u, computeWalker->getEmitInlineParameter()); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenWorkDimOneWhenAskHwForLocalIdsThenExpectGenerationFieldsSet) { using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; COMPUTE_WALKER *computeWalker = static_cast(linearStream.getSpace(sizeof(COMPUTE_WALKER))); *computeWalker = FamilyType::cmdInitGpgpuWalker; kernel->kernelInfo.setLocalIds({1, 0, 0}); GpgpuWalkerHelper::setGpgpuWalkerThreadData(computeWalker, kernel->kernelInfo.kernelDescriptor, globalOffsets, startWorkGroups, numWorkGroups, localWorkSizesIn, simd, 1, false, false, 4u); auto localX = static_cast(computeWalker->getLocalXMaximum() + 1); auto localY = static_cast(computeWalker->getLocalYMaximum() + 1); auto localZ = static_cast(computeWalker->getLocalZMaximum() + 1); EXPECT_EQ(localWorkSizesIn[0], localX); EXPECT_EQ(localWorkSizesIn[1], localY); EXPECT_EQ(localWorkSizesIn[2], localZ); constexpr uint32_t expectedEmit = (1 << 0); EXPECT_EQ(expectedEmit, computeWalker->getEmitLocalId()); EXPECT_EQ(1u, computeWalker->getGenerateLocalId()); EXPECT_EQ(0u, computeWalker->getEmitInlineParameter()); EXPECT_EQ(4u, computeWalker->getWalkOrder()); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenWorkDimTwoWhenOnlyYIdPresentAskHwForLocalIdsThenExpectGenerationFieldsSet) { using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; COMPUTE_WALKER *computeWalker = static_cast(linearStream.getSpace(sizeof(COMPUTE_WALKER))); *computeWalker = FamilyType::cmdInitGpgpuWalker; kernel->kernelInfo.setLocalIds({0, 1, 0}); localWorkSizesIn[1] = 16; localWorkSizesIn[0] = localWorkSizesIn[2] = 1; GpgpuWalkerHelper::setGpgpuWalkerThreadData(computeWalker, kernel->kernelInfo.kernelDescriptor, globalOffsets, startWorkGroups, numWorkGroups, localWorkSizesIn, simd, 2, false, false, 0u); auto localX = static_cast(computeWalker->getLocalXMaximum() + 1); auto localY = static_cast(computeWalker->getLocalYMaximum() + 1); auto localZ = static_cast(computeWalker->getLocalZMaximum() + 1); EXPECT_EQ(localWorkSizesIn[0], localX); EXPECT_EQ(localWorkSizesIn[1], localY); EXPECT_EQ(localWorkSizesIn[2], localZ); constexpr uint32_t expectedEmit = (1 << 1); EXPECT_EQ(expectedEmit, computeWalker->getEmitLocalId()); EXPECT_EQ(1u, computeWalker->getGenerateLocalId()); EXPECT_EQ(0u, computeWalker->getEmitInlineParameter()); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenWorkThreeTwoWhenOnlyZIdPresentAskHwForLocalIdsThenExpectGenerationFieldsSet) { using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; COMPUTE_WALKER *computeWalker = static_cast(linearStream.getSpace(sizeof(COMPUTE_WALKER))); *computeWalker = FamilyType::cmdInitGpgpuWalker; kernel->kernelInfo.setLocalIds({0, 0, 1}); localWorkSizesIn[2] = 16; localWorkSizesIn[0] = localWorkSizesIn[1] = 1; GpgpuWalkerHelper::setGpgpuWalkerThreadData(computeWalker, kernel->kernelInfo.kernelDescriptor, globalOffsets, startWorkGroups, numWorkGroups, localWorkSizesIn, simd, 2, false, false, 0u); auto localX = static_cast(computeWalker->getLocalXMaximum() + 1); auto localY = static_cast(computeWalker->getLocalYMaximum() + 1); auto localZ = static_cast(computeWalker->getLocalZMaximum() + 1); EXPECT_EQ(localWorkSizesIn[0], localX); EXPECT_EQ(localWorkSizesIn[1], localY); EXPECT_EQ(localWorkSizesIn[2], localZ); constexpr uint32_t expectedEmit = (1 << 2); EXPECT_EQ(expectedEmit, computeWalker->getEmitLocalId()); EXPECT_EQ(1u, computeWalker->getGenerateLocalId()); EXPECT_EQ(0u, computeWalker->getEmitInlineParameter()); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenDifferentSIMDsizesWhenLocalIdsGeneratedThenMessageSizeIsSetToProperValue) { using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; COMPUTE_WALKER *computeWalker = static_cast(linearStream.getSpace(sizeof(COMPUTE_WALKER))); *computeWalker = FamilyType::cmdInitGpgpuWalker; kernel->kernelInfo.setLocalIds({0, 0, 1}); localWorkSizesIn[2] = 16; localWorkSizesIn[0] = localWorkSizesIn[1] = 1; uint32_t simdProgramming[3][2] = {{32, 2}, {16, 1}, {8, 0}}; // {given, expected} bool walkerInput[4][2] = {{false, false}, {true, false}, {false, true}, {true, true}}; // {runtime local ids, inline data} for (uint32_t i = 0; i < 4; i++) { for (uint32_t j = 0; j < 3; j++) { *computeWalker = FamilyType::cmdInitGpgpuWalker; GpgpuWalkerHelper::setGpgpuWalkerThreadData(computeWalker, kernel->kernelInfo.kernelDescriptor, globalOffsets, startWorkGroups, numWorkGroups, localWorkSizesIn, simdProgramming[j][0], 2, walkerInput[i][0], walkerInput[i][1], 0u); EXPECT_EQ(simdProgramming[j][1], computeWalker->getMessageSimd()); } } } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenDebugFlagWhenItIsSetThenMessageSimdIsOverwritten) { DebugManagerStateRestore restorer; DebugManager.flags.ForceSimdMessageSizeInWalker.set(1); using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; COMPUTE_WALKER *computeWalker = static_cast(linearStream.getSpace(sizeof(COMPUTE_WALKER))); *computeWalker = FamilyType::cmdInitGpgpuWalker; kernel->kernelInfo.setLocalIds({0, 0, 1}); localWorkSizesIn[2] = 16; localWorkSizesIn[0] = localWorkSizesIn[1] = 1; uint32_t simdProgramming[3] = {32, 16, 8}; bool walkerInput[4][2] = {{false, false}, {true, false}, {false, true}, {true, true}}; // {runtime local ids, inline data} for (uint32_t i = 0; i < 4; i++) { for (uint32_t j = 0; j < 3; j++) { *computeWalker = FamilyType::cmdInitGpgpuWalker; GpgpuWalkerHelper::setGpgpuWalkerThreadData(computeWalker, kernel->kernelInfo.kernelDescriptor, globalOffsets, startWorkGroups, numWorkGroups, localWorkSizesIn, simdProgramming[j], 2, walkerInput[i][0], walkerInput[i][1], 0u); EXPECT_EQ(1u, computeWalker->getMessageSimd()); } } } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenWorkDimTwoWhenAskHwForLocalIdsThenExpectGenerationFieldsSet) { using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; COMPUTE_WALKER *computeWalker = static_cast(linearStream.getSpace(sizeof(COMPUTE_WALKER))); *computeWalker = FamilyType::cmdInitGpgpuWalker; kernel->kernelInfo.setLocalIds({1, 1, 0}); localWorkSizesIn[1] = 8; GpgpuWalkerHelper::setGpgpuWalkerThreadData(computeWalker, kernel->kernelInfo.kernelDescriptor, globalOffsets, startWorkGroups, numWorkGroups, localWorkSizesIn, simd, 2, false, false, 0u); auto localX = static_cast(computeWalker->getLocalXMaximum() + 1); auto localY = static_cast(computeWalker->getLocalYMaximum() + 1); auto localZ = static_cast(computeWalker->getLocalZMaximum() + 1); EXPECT_EQ(localWorkSizesIn[0], localX); EXPECT_EQ(localWorkSizesIn[1], localY); EXPECT_EQ(localWorkSizesIn[2], localZ); constexpr uint32_t expectedEmit = (1 << 0) | (1 << 1); EXPECT_EQ(expectedEmit, computeWalker->getEmitLocalId()); EXPECT_EQ(1u, computeWalker->getGenerateLocalId()); EXPECT_EQ(0u, computeWalker->getEmitInlineParameter()); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenWorkDimThreeWhenAskHwForLocalIdsThenExpectGenerationFieldsSet) { using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; COMPUTE_WALKER *computeWalker = static_cast(linearStream.getSpace(sizeof(COMPUTE_WALKER))); *computeWalker = FamilyType::cmdInitGpgpuWalker; kernel->kernelInfo.setLocalIds({1, 1, 1}); localWorkSizesIn[1] = 8; localWorkSizesIn[2] = 2; GpgpuWalkerHelper::setGpgpuWalkerThreadData(computeWalker, kernel->kernelInfo.kernelDescriptor, globalOffsets, startWorkGroups, numWorkGroups, localWorkSizesIn, simd, 3, false, false, 0u); auto localX = static_cast(computeWalker->getLocalXMaximum() + 1); auto localY = static_cast(computeWalker->getLocalYMaximum() + 1); auto localZ = static_cast(computeWalker->getLocalZMaximum() + 1); EXPECT_EQ(localWorkSizesIn[0], localX); EXPECT_EQ(localWorkSizesIn[1], localY); EXPECT_EQ(localWorkSizesIn[2], localZ); constexpr uint32_t expectedEmit = (1 << 0) | (1 << 1) | (1 << 2); EXPECT_EQ(expectedEmit, computeWalker->getEmitLocalId()); EXPECT_EQ(1u, computeWalker->getGenerateLocalId()); EXPECT_EQ(0u, computeWalker->getEmitInlineParameter()); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenWorkDimThreeWhenAskHwForLocalIdsAndNoLocalIdsUsedThenExpectNoGenerationFieldsSet) { using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; COMPUTE_WALKER *computeWalker = static_cast(linearStream.getSpace(sizeof(COMPUTE_WALKER))); *computeWalker = FamilyType::cmdInitGpgpuWalker; kernel->kernelInfo.setLocalIds({0, 0, 0}); localWorkSizesIn[1] = 8; localWorkSizesIn[2] = 2; GpgpuWalkerHelper::setGpgpuWalkerThreadData(computeWalker, kernel->kernelInfo.kernelDescriptor, globalOffsets, startWorkGroups, numWorkGroups, localWorkSizesIn, simd, 3, false, false, 0u); EXPECT_EQ(0u, computeWalker->getLocalXMaximum()); EXPECT_EQ(0u, computeWalker->getLocalYMaximum()); EXPECT_EQ(0u, computeWalker->getLocalZMaximum()); constexpr uint32_t expectedEmit = 0; EXPECT_EQ(expectedEmit, computeWalker->getEmitLocalId()); EXPECT_EQ(0u, computeWalker->getGenerateLocalId()); EXPECT_EQ(0u, computeWalker->getEmitInlineParameter()); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenWorkDimThreeWhenNotAskHwForLocalIdsAndLocalIdsUsedThenExpectNoGenerationFieldsSet) { using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; COMPUTE_WALKER *computeWalker = static_cast(linearStream.getSpace(sizeof(COMPUTE_WALKER))); *computeWalker = FamilyType::cmdInitGpgpuWalker; kernel->kernelInfo.setLocalIds({1, 1, 1}); localWorkSizesIn[1] = 8; localWorkSizesIn[2] = 2; GpgpuWalkerHelper::setGpgpuWalkerThreadData(computeWalker, kernel->kernelInfo.kernelDescriptor, globalOffsets, startWorkGroups, numWorkGroups, localWorkSizesIn, simd, 3, true, false, 0u); EXPECT_EQ(0u, computeWalker->getLocalXMaximum()); EXPECT_EQ(0u, computeWalker->getLocalYMaximum()); EXPECT_EQ(0u, computeWalker->getLocalZMaximum()); constexpr uint32_t expectedEmit = 0; EXPECT_EQ(expectedEmit, computeWalker->getEmitLocalId()); EXPECT_EQ(0u, computeWalker->getGenerateLocalId()); EXPECT_EQ(0u, computeWalker->getEmitInlineParameter()); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenWorkDimOneWhenAskForInlineDataAndNoLocalIdsPresentThenExpectOnlyInlineFieldSet) { using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; COMPUTE_WALKER *computeWalker = static_cast(linearStream.getSpace(sizeof(COMPUTE_WALKER))); *computeWalker = FamilyType::cmdInitGpgpuWalker; GpgpuWalkerHelper::setGpgpuWalkerThreadData(computeWalker, kernel->kernelInfo.kernelDescriptor, globalOffsets, startWorkGroups, numWorkGroups, localWorkSizesIn, simd, 1, true, true, 0u); EXPECT_EQ(0u, computeWalker->getLocalXMaximum()); EXPECT_EQ(0u, computeWalker->getLocalYMaximum()); EXPECT_EQ(0u, computeWalker->getLocalZMaximum()); EXPECT_EQ(0u, computeWalker->getEmitLocalId()); EXPECT_EQ(0u, computeWalker->getGenerateLocalId()); EXPECT_EQ(1u, computeWalker->getEmitInlineParameter()); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenWorkDimOneWhenAskForInlineDataAndLocalIdsPresentThenExpectInlineAndDoNotExpectEmitLocalIdFieldSet) { using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; COMPUTE_WALKER *computeWalker = static_cast(linearStream.getSpace(sizeof(COMPUTE_WALKER))); *computeWalker = FamilyType::cmdInitGpgpuWalker; kernel->kernelInfo.setLocalIds({1, 0, 0}); GpgpuWalkerHelper::setGpgpuWalkerThreadData(computeWalker, kernel->kernelInfo.kernelDescriptor, globalOffsets, startWorkGroups, numWorkGroups, localWorkSizesIn, simd, 1, true, true, 0u); EXPECT_EQ(0u, computeWalker->getLocalXMaximum()); EXPECT_EQ(0u, computeWalker->getLocalYMaximum()); EXPECT_EQ(0u, computeWalker->getLocalZMaximum()); constexpr uint32_t expectedEmit = 0u; EXPECT_EQ(expectedEmit, computeWalker->getEmitLocalId()); EXPECT_EQ(0u, computeWalker->getGenerateLocalId()); EXPECT_EQ(1u, computeWalker->getEmitInlineParameter()); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenWorkDimThreeWhenAskForInlineDataAndLocalIdsPresentThenDoNotExpectEmitLocalIdFieldSetButExpectInlineSet) { using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; COMPUTE_WALKER *computeWalker = static_cast(linearStream.getSpace(sizeof(COMPUTE_WALKER))); *computeWalker = FamilyType::cmdInitGpgpuWalker; kernel->kernelInfo.setLocalIds({1, 1, 1}); GpgpuWalkerHelper::setGpgpuWalkerThreadData(computeWalker, kernel->kernelInfo.kernelDescriptor, globalOffsets, startWorkGroups, numWorkGroups, localWorkSizesIn, simd, 3, true, true, 0u); EXPECT_EQ(0u, computeWalker->getLocalXMaximum()); EXPECT_EQ(0u, computeWalker->getLocalYMaximum()); EXPECT_EQ(0u, computeWalker->getLocalZMaximum()); constexpr uint32_t expectedEmit = 0u; EXPECT_EQ(expectedEmit, computeWalker->getEmitLocalId()); EXPECT_EQ(0u, computeWalker->getGenerateLocalId()); EXPECT_EQ(1u, computeWalker->getEmitInlineParameter()); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenWorkDimThreeWhenAskHwForLocalIdsAndInlineDataThenExpectGenerationFieldsSet) { using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; COMPUTE_WALKER *computeWalker = static_cast(linearStream.getSpace(sizeof(COMPUTE_WALKER))); *computeWalker = FamilyType::cmdInitGpgpuWalker; kernel->kernelInfo.setLocalIds({1, 1, 1}); localWorkSizesIn[1] = 8; localWorkSizesIn[2] = 2; GpgpuWalkerHelper::setGpgpuWalkerThreadData(computeWalker, kernel->kernelInfo.kernelDescriptor, globalOffsets, startWorkGroups, numWorkGroups, localWorkSizesIn, simd, 3, false, true, 5u); auto localX = static_cast(computeWalker->getLocalXMaximum() + 1); auto localY = static_cast(computeWalker->getLocalYMaximum() + 1); auto localZ = static_cast(computeWalker->getLocalZMaximum() + 1); EXPECT_EQ(localWorkSizesIn[0], localX); EXPECT_EQ(localWorkSizesIn[1], localY); EXPECT_EQ(localWorkSizesIn[2], localZ); constexpr uint32_t expectedEmit = (1 << 0) | (1 << 1) | (1 << 2); EXPECT_EQ(expectedEmit, computeWalker->getEmitLocalId()); EXPECT_EQ(1u, computeWalker->getGenerateLocalId()); EXPECT_EQ(1u, computeWalker->getEmitInlineParameter()); EXPECT_EQ(5u, computeWalker->getWalkOrder()); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenTimestampPacketWhenDispatchingThenProgramPostSyncData) { using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; MockKernelWithInternals kernel1(*device); MockKernelWithInternals kernel2(*device); device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; TimestampPacketContainer timestampPacketContainer; timestampPacketContainer.add(device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag()); timestampPacketContainer.add(device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag()); MockMultiDispatchInfo multiDispatchInfo(device.get(), std::vector({kernel1.mockKernel, kernel2.mockKernel})); MockCommandQueue cmdQ(context.get(), device.get(), nullptr, false); auto &cmdStream = cmdQ.getCS(0); HardwareInterface::dispatchWalker( cmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, ×tampPacketContainer, false); HardwareParse hwParser; hwParser.parseCommands(cmdStream, 0); hwParser.findHardwareCommands(); EXPECT_NE(hwParser.itorWalker, hwParser.cmdList.end()); auto gmmHelper = device->getGmmHelper(); auto expectedMocs = MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo) ? gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) : gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); auto walker = genCmdCast(*hwParser.itorWalker); EXPECT_EQ(FamilyType::POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walker->getPostSync().getOperation()); EXPECT_TRUE(walker->getPostSync().getDataportPipelineFlush()); EXPECT_EQ(expectedMocs, walker->getPostSync().getMocs()); auto contextStartAddress = TimestampPacketHelper::getContextStartGpuAddress(*timestampPacketContainer.peekNodes()[0]); EXPECT_EQ(contextStartAddress, walker->getPostSync().getDestinationAddress()); auto secondWalkerItor = find(++hwParser.itorWalker, hwParser.cmdList.end()); auto secondWalker = genCmdCast(*secondWalkerItor); EXPECT_EQ(FamilyType::POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, secondWalker->getPostSync().getOperation()); EXPECT_TRUE(secondWalker->getPostSync().getDataportPipelineFlush()); EXPECT_EQ(expectedMocs, walker->getPostSync().getMocs()); contextStartAddress = TimestampPacketHelper::getContextStartGpuAddress(*timestampPacketContainer.peekNodes()[1]); EXPECT_EQ(contextStartAddress, secondWalker->getPostSync().getDestinationAddress()); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenDebugVariableEnabledWhenEnqueueingThenWriteWalkerStamp) { using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; DebugManager.flags.EnableTimestampPacket.set(true); auto testDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockContext testContext(testDevice.get()); auto cmdQ = std::make_unique>(&testContext, testDevice.get(), nullptr); MockKernelWithInternals testKernel(*testDevice, &testContext); size_t gws[] = {1, 1, 1}; cmdQ->enqueueKernel(testKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_NE(nullptr, cmdQ->timestampPacketContainer.get()); HardwareParse hwParser; hwParser.parseCommands(cmdQ->getCS(0), 0); hwParser.findHardwareCommands(); EXPECT_NE(hwParser.itorWalker, hwParser.cmdList.end()); auto walker = genCmdCast(*hwParser.itorWalker); auto gmmHelper = device->getGmmHelper(); auto expectedMocs = MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo) ? gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) : gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); auto &postSyncData = walker->getPostSync(); EXPECT_EQ(FamilyType::POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, postSyncData.getOperation()); EXPECT_TRUE(postSyncData.getDataportPipelineFlush()); EXPECT_EQ(expectedMocs, postSyncData.getMocs()); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenDebugVariableEnabledWhenMocsValueIsOverwrittenThenPostSyncContainsProperSetting) { using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; auto mocsValue = 8u; DebugManager.flags.EnableTimestampPacket.set(true); DebugManager.flags.OverridePostSyncMocs.set(mocsValue); auto testDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockContext testContext(testDevice.get()); auto cmdQ = std::make_unique>(&testContext, testDevice.get(), nullptr); MockKernelWithInternals testKernel(*testDevice, &testContext); size_t gws[] = {1, 1, 1}; cmdQ->enqueueKernel(testKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_NE(nullptr, cmdQ->timestampPacketContainer.get()); HardwareParse hwParser; hwParser.parseCommands(cmdQ->getCS(0), 0); hwParser.findHardwareCommands(); EXPECT_NE(hwParser.itorWalker, hwParser.cmdList.end()); auto walker = genCmdCast(*hwParser.itorWalker); auto &postSyncData = walker->getPostSync(); EXPECT_EQ(mocsValue, postSyncData.getMocs()); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenTimestampPacketWriteEnabledWhenEstimatingStreamSizeThenAddEnoughSpace) { MockCommandQueueHw cmdQ(context.get(), device.get(), nullptr); MockKernelWithInternals kernel1(*device); MockKernelWithInternals kernel2(*device); MockMultiDispatchInfo multiDispatchInfo(device.get(), std::vector({kernel1.mockKernel, kernel2.mockKernel})); device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false; getCommandStream(cmdQ, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false); auto sizeWithDisabled = cmdQ.requestedCmdStreamSize; device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; getCommandStream(cmdQ, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false); auto sizeWithEnabled = cmdQ.requestedCmdStreamSize; EXPECT_EQ(sizeWithEnabled, sizeWithDisabled + 0); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenDebugVariableEnabledWhenEnqueueingThenWritePostsyncOperationInImmWriteMode) { DebugManager.flags.UseImmDataWriteModeOnPostSyncOperation.set(true); device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; auto cmdQ = std::make_unique>(context.get(), device.get(), nullptr); size_t gws[] = {1, 1, 1}; cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); HardwareParse hwParser; hwParser.parseCommands(cmdQ->getCS(0), 0); hwParser.findHardwareCommands(); EXPECT_NE(hwParser.itorWalker, hwParser.cmdList.end()); auto walker = genCmdCast(*hwParser.itorWalker); auto &postSyncData = walker->getPostSync(); EXPECT_EQ(FamilyType::POSTSYNC_DATA::OPERATION::OPERATION_WRITE_IMMEDIATE_DATA, postSyncData.getOperation()); auto contextEndAddress = TimestampPacketHelper::getContextEndGpuAddress(*cmdQ->timestampPacketContainer->peekNodes()[0]); EXPECT_EQ(contextEndAddress, postSyncData.getDestinationAddress()); EXPECT_EQ(0x2'0000'0002u, postSyncData.getImmediateData()); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenDebugVariableEnabledWhenEnqueueingThenSystolicIsProgrammed) { DebugManager.flags.OverrideSystolicInComputeWalker.set(true); device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; auto cmdQ = std::make_unique>(context.get(), device.get(), nullptr); size_t gws[] = {1, 1, 1}; cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); HardwareParse hwParser; hwParser.parseCommands(cmdQ->getCS(0), 0); hwParser.findHardwareCommands(); EXPECT_NE(hwParser.itorWalker, hwParser.cmdList.end()); auto walker = genCmdCast(*hwParser.itorWalker); EXPECT_TRUE(walker->getSystolicModeEnable()); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenAutoLocalIdsGenerationEnabledWhenDispatchMeetCriteriaThenExpectNoLocalIdsAndProperIsaAddress) { using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; DebugManager.flags.EnableHwGenerationLocalIds.set(1); auto cmdQ = std::make_unique>(context.get(), device.get(), nullptr); auto &commandStream = cmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto &kd = kernel->kernelInfo.kernelDescriptor; kd.entryPoints.skipPerThreadDataLoad = 128; kd.kernelAttributes.localId[0] = 1; kd.kernelAttributes.localId[1] = 0; kd.kernelAttributes.localId[2] = 0; kd.kernelAttributes.numLocalIdChannels = 1; auto memoryManager = device->getUltCommandStreamReceiver().getMemoryManager(); kernel->kernelInfo.kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); size_t gws[] = {16, 1, 1}; size_t lws[] = {16, 1, 1}; size_t globalOffsets[] = {0, 0, 0}; MultiDispatchInfo multiDispatchInfo(kernel->mockKernel); DispatchInfoBuilder builder(*device); builder.setDispatchGeometry(1, gws, lws, globalOffsets); builder.setKernel(kernel->mockKernel); builder.bake(multiDispatchInfo); cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, lws, 0, nullptr, nullptr); auto usedAfterCS = commandStream.getUsed(); HardwareParse hwParser; hwParser.parseCommands(cmdQ->getCS(0), 0); hwParser.findHardwareCommands(); EXPECT_NE(hwParser.itorWalker, hwParser.cmdList.end()); auto walker = genCmdCast(*hwParser.itorWalker); EXPECT_EQ(COMPUTE_WALKER::DWORD_LENGTH_FIXED_SIZE, walker->getDwordLength()); EXPECT_EQ(0u, walker->getEmitInlineParameter()); EXPECT_EQ(1u, walker->getGenerateLocalId()); EXPECT_EQ(1u, walker->getEmitLocalId()); uint32_t expectedIndirectDataLength = alignUp(kernel->mockKernel->getCrossThreadDataSize(), COMPUTE_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); EXPECT_EQ(expectedIndirectDataLength, walker->getIndirectDataLength()); INTERFACE_DESCRIPTOR_DATA &idd = walker->getInterfaceDescriptor(); uint64_t expectedKernelStartOffset = kernel->mockKernel->getKernelInfo().getGraphicsAllocation()->getGpuAddressToPatch() + kernel->kernelInfo.kernelDescriptor.entryPoints.skipPerThreadDataLoad; EXPECT_EQ((uint32_t)(expectedKernelStartOffset), idd.getKernelStartPointer()); auto expectedSizeCS = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, CsrDependencies(), false, false, false, *cmdQ.get(), multiDispatchInfo, false, false); expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); expectedSizeCS = alignUp(expectedSizeCS, MemoryConstants::cacheLineSize); EXPECT_GE(expectedSizeCS, usedAfterCS - usedBeforeCS); memoryManager->freeGraphicsMemory(kernel->kernelInfo.kernelAllocation); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenPassInlineDataEnabledWhenLocalIdsUsedThenDoNotExpectCrossThreadDataInWalkerEmitLocalFieldSet) { using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using INLINE_DATA = typename FamilyType::INLINE_DATA; DebugManager.flags.EnablePassInlineData.set(true); DebugManager.flags.EnableHwGenerationLocalIds.set(0); auto cmdQ = std::make_unique>(context.get(), device.get(), nullptr); auto &commandStream = cmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto &kd = kernel->kernelInfo.kernelDescriptor; kd.kernelAttributes.flags.passInlineData = true; kd.kernelAttributes.localId[0] = 1; kd.kernelAttributes.localId[1] = 0; kd.kernelAttributes.localId[2] = 0; kd.kernelAttributes.numLocalIdChannels = 1; kernel->mockKernel->setCrossThreadData(crossThreadDataGrf, sizeof(INLINE_DATA)); auto memoryManager = device->getUltCommandStreamReceiver().getMemoryManager(); kernel->kernelInfo.kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); size_t gws[] = {16, 1, 1}; size_t lws[] = {16, 1, 1}; size_t globalOffsets[] = {0, 0, 0}; MultiDispatchInfo multiDispatchInfo(kernel->mockKernel); DispatchInfoBuilder builder(*device); builder.setDispatchGeometry(1, gws, lws, globalOffsets); builder.setKernel(kernel->mockKernel); builder.bake(multiDispatchInfo); cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, lws, 0, nullptr, nullptr); auto usedAfterCS = commandStream.getUsed(); HardwareParse hwParser; hwParser.parseCommands(cmdQ->getCS(0), 0); hwParser.findHardwareCommands(); EXPECT_NE(hwParser.itorWalker, hwParser.cmdList.end()); auto walker = genCmdCast(*hwParser.itorWalker); EXPECT_EQ(1u, walker->getEmitInlineParameter()); EXPECT_EQ(0u, walker->getGenerateLocalId()); constexpr uint32_t expectedEmit = 0u; EXPECT_EQ(expectedEmit, walker->getEmitLocalId()); EXPECT_EQ(0, memcmp(walker->getInlineDataPointer(), crossThreadDataGrf, sizeof(INLINE_DATA))); uint32_t simd = kernel->mockKernel->getKernelInfo().getMaxSimdSize(); //only X is present auto sizePerThreadData = getPerThreadSizeLocalIDs(simd, sizeGrf, 1); sizePerThreadData = std::max(sizePerThreadData, sizeGrf); size_t perThreadTotalDataSize = getThreadsPerWG(simd, lws[0]) * sizePerThreadData; uint32_t expectedIndirectDataLength = alignUp(static_cast(perThreadTotalDataSize), COMPUTE_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); EXPECT_EQ(expectedIndirectDataLength, walker->getIndirectDataLength()); INTERFACE_DESCRIPTOR_DATA &idd = walker->getInterfaceDescriptor(); uint64_t expectedKernelStartOffset = kernel->mockKernel->getKernelInfo().getGraphicsAllocation()->getGpuAddressToPatch(); EXPECT_EQ((uint32_t)(expectedKernelStartOffset), idd.getKernelStartPointer()); auto expectedSizeCS = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, CsrDependencies(), false, false, false, *cmdQ.get(), multiDispatchInfo, false, false); expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); expectedSizeCS = alignUp(expectedSizeCS, MemoryConstants::cacheLineSize); EXPECT_GE(expectedSizeCS, usedAfterCS - usedBeforeCS); memoryManager->freeGraphicsMemory(kernel->kernelInfo.kernelAllocation); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenExecutionMaskWithoutReminderWhenProgrammingWalkerThenSetValidNumberOfBitsInMask) { using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; auto cmdQ = std::make_unique>(context.get(), device.get(), nullptr); std::array testedSimd = {{1, 8, 16, 32}}; for (auto simd : testedSimd) { kernel->kernelInfo.kernelDescriptor.kernelAttributes.simdSize = simd; auto kernelSimd = kernel->mockKernel->getKernelInfo().getMaxSimdSize(); EXPECT_EQ(simd, kernelSimd); size_t gws[] = {kernelSimd, 1, 1}; size_t lws[] = {kernelSimd, 1, 1}; auto streamOffset = cmdQ->getCS(0).getUsed(); cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, lws, 0, nullptr, nullptr); HardwareParse hwParser; hwParser.parseCommands(cmdQ->getCS(0), streamOffset); hwParser.findHardwareCommands(); auto walker = genCmdCast(*hwParser.itorWalker); if (simd == 1) { EXPECT_EQ(maxNBitValue(32), walker->getExecutionMask()); } else { EXPECT_EQ(maxNBitValue(simd), walker->getExecutionMask()); } } } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenPassInlineDataEnabledWhenLocalIdsUsedAndCrossThreadIsTwoGrfsThenExpectFirstCrossThreadDataInWalkerSecondInPayloadWithPerThread) { using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; using INLINE_DATA = typename FamilyType::INLINE_DATA; DebugManager.flags.EnablePassInlineData.set(true); DebugManager.flags.EnableHwGenerationLocalIds.set(false); auto cmdQ = std::make_unique>(context.get(), device.get(), nullptr); IndirectHeap &ih = cmdQ->getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 2048); auto &kd = kernel->kernelInfo.kernelDescriptor; kd.kernelAttributes.flags.passInlineData = true; kd.kernelAttributes.localId[0] = 1; kd.kernelAttributes.localId[1] = 0; kd.kernelAttributes.localId[2] = 0; kd.kernelAttributes.numLocalIdChannels = 1; kernel->mockKernel->setCrossThreadData(crossThreadDataTwoGrf, sizeof(INLINE_DATA) * 2); auto memoryManager = device->getUltCommandStreamReceiver().getMemoryManager(); kernel->kernelInfo.kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); size_t gws[] = {16, 1, 1}; size_t lws[] = {16, 1, 1}; size_t globalOffsets[] = {0, 0, 0}; MultiDispatchInfo multiDispatchInfo(kernel->mockKernel); DispatchInfoBuilder builder(*device); builder.setDispatchGeometry(1, gws, lws, globalOffsets); builder.setKernel(kernel->mockKernel); builder.bake(multiDispatchInfo); cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, lws, 0, nullptr, nullptr); HardwareParse hwParser; hwParser.parseCommands(cmdQ->getCS(0), 0); hwParser.findHardwareCommands(); EXPECT_NE(hwParser.itorWalker, hwParser.cmdList.end()); auto walker = genCmdCast(*hwParser.itorWalker); EXPECT_EQ(1u, walker->getEmitInlineParameter()); EXPECT_EQ(0u, walker->getGenerateLocalId()); constexpr uint32_t expectedEmit = 0u; EXPECT_EQ(expectedEmit, walker->getEmitLocalId()); EXPECT_EQ(0, memcmp(walker->getInlineDataPointer(), crossThreadDataTwoGrf, sizeof(INLINE_DATA))); void *payloadData = ih.getCpuBase(); EXPECT_EQ(0, memcmp(payloadData, &crossThreadDataTwoGrf[sizeof(INLINE_DATA) / sizeof(uint32_t)], sizeof(INLINE_DATA))); uint32_t simd = kernel->mockKernel->getKernelInfo().getMaxSimdSize(); //only X is present uint32_t localIdSizePerThread = PerThreadDataHelper::getLocalIdSizePerThread(simd, sizeGrf, 1); localIdSizePerThread = std::max(localIdSizePerThread, sizeGrf); auto sizePerThreadData = getThreadsPerWG(simd, lws[0]) * localIdSizePerThread; auto crossThreadDataSize = kernel->mockKernel->getCrossThreadDataSize(); crossThreadDataSize -= std::min(static_cast(sizeof(INLINE_DATA)), crossThreadDataSize); //second GRF in indirect uint32_t expectedIndirectDataLength = static_cast(sizePerThreadData + crossThreadDataSize); expectedIndirectDataLength = alignUp(expectedIndirectDataLength, COMPUTE_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); EXPECT_EQ(expectedIndirectDataLength, walker->getIndirectDataLength()); memoryManager->freeGraphicsMemory(kernel->kernelInfo.kernelAllocation); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenPassInlineDataEnabledWhenNoLocalIdsUsedThenExpectCrossThreadDataInWalkerAndNoEmitLocalFieldSet) { using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; using INLINE_DATA = typename FamilyType::INLINE_DATA; DebugManager.flags.EnablePassInlineData.set(true); DebugManager.flags.EnableHwGenerationLocalIds.set(false); auto cmdQ = std::make_unique>(context.get(), device.get(), nullptr); auto &kd = kernel->kernelInfo.kernelDescriptor; kd.kernelAttributes.flags.passInlineData = true; kd.kernelAttributes.localId[0] = 0; kd.kernelAttributes.localId[1] = 0; kd.kernelAttributes.localId[2] = 0; kd.kernelAttributes.numLocalIdChannels = 0; kernel->mockKernel->setCrossThreadData(crossThreadDataGrf, sizeof(INLINE_DATA)); auto memoryManager = device->getUltCommandStreamReceiver().getMemoryManager(); kernel->kernelInfo.kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); size_t gws[] = {16, 1, 1}; size_t lws[] = {16, 1, 1}; cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, lws, 0, nullptr, nullptr); HardwareParse hwParser; hwParser.parseCommands(cmdQ->getCS(0), 0); hwParser.findHardwareCommands(); EXPECT_NE(hwParser.itorWalker, hwParser.cmdList.end()); auto walker = genCmdCast(*hwParser.itorWalker); EXPECT_EQ(1u, walker->getEmitInlineParameter()); EXPECT_EQ(0u, walker->getGenerateLocalId()); EXPECT_EQ(0u, walker->getEmitLocalId()); EXPECT_EQ(0, memcmp(walker->getInlineDataPointer(), crossThreadDataGrf, sizeof(INLINE_DATA))); uint32_t simd = kernel->mockKernel->getKernelInfo().getMaxSimdSize(); //only X is present auto sizePerThreadData = getPerThreadSizeLocalIDs(simd, 1); sizePerThreadData = std::max(sizePerThreadData, sizeGrf); size_t perThreadTotalDataSize = getThreadsPerWG(simd, lws[0]) * sizePerThreadData; uint32_t expectedIndirectDataLength = static_cast(perThreadTotalDataSize); expectedIndirectDataLength = alignUp(expectedIndirectDataLength, COMPUTE_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); EXPECT_EQ(expectedIndirectDataLength, walker->getIndirectDataLength()); memoryManager->freeGraphicsMemory(kernel->kernelInfo.kernelAllocation); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenPassInlineDataEnabledWhenNoLocalIdsUsedAndCrossThreadIsTwoGrfsThenExpectFirstCrossThreadDataInWalkerSecondInPayload) { using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; using INLINE_DATA = typename FamilyType::INLINE_DATA; DebugManager.flags.EnablePassInlineData.set(true); DebugManager.flags.EnableHwGenerationLocalIds.set(false); auto cmdQ = std::make_unique>(context.get(), device.get(), nullptr); IndirectHeap &ih = cmdQ->getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 2048); auto &kd = kernel->kernelInfo.kernelDescriptor; kd.kernelAttributes.flags.passInlineData = true; kd.kernelAttributes.localId[0] = 0; kd.kernelAttributes.localId[1] = 0; kd.kernelAttributes.localId[2] = 0; kd.kernelAttributes.numLocalIdChannels = 0; kernel->mockKernel->setCrossThreadData(crossThreadDataTwoGrf, sizeof(INLINE_DATA) * 2); auto memoryManager = device->getUltCommandStreamReceiver().getMemoryManager(); kernel->kernelInfo.kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); size_t gws[] = {16, 1, 1}; size_t lws[] = {16, 1, 1}; cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, lws, 0, nullptr, nullptr); HardwareParse hwParser; hwParser.parseCommands(cmdQ->getCS(0), 0); hwParser.findHardwareCommands(); EXPECT_NE(hwParser.itorWalker, hwParser.cmdList.end()); auto walker = genCmdCast(*hwParser.itorWalker); EXPECT_EQ(1u, walker->getEmitInlineParameter()); EXPECT_EQ(0u, walker->getGenerateLocalId()); EXPECT_EQ(0u, walker->getEmitLocalId()); EXPECT_EQ(0, memcmp(walker->getInlineDataPointer(), crossThreadDataTwoGrf, sizeof(INLINE_DATA))); void *payloadData = ih.getCpuBase(); EXPECT_EQ(0, memcmp(payloadData, &crossThreadDataTwoGrf[sizeof(INLINE_DATA) / sizeof(uint32_t)], sizeof(INLINE_DATA))); uint32_t simd = kernel->mockKernel->getKernelInfo().getMaxSimdSize(); //only X is present auto sizePerThreadData = getPerThreadSizeLocalIDs(simd, 1); sizePerThreadData = std::max(sizePerThreadData, sizeGrf); size_t perThreadTotalDataSize = getThreadsPerWG(simd, lws[0]) * sizePerThreadData; //second GRF in indirect uint32_t expectedIndirectDataLength = static_cast(perThreadTotalDataSize + sizeof(INLINE_DATA)); expectedIndirectDataLength = alignUp(expectedIndirectDataLength, COMPUTE_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); EXPECT_EQ(expectedIndirectDataLength, walker->getIndirectDataLength()); memoryManager->freeGraphicsMemory(kernel->kernelInfo.kernelAllocation); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenAllChannelsActiveWithWorkDimOneDimensionThenHwGenerationIsEnabledWithOverwrittenWalkOrder) { using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; DebugManager.flags.EnableHwGenerationLocalIds.set(true); auto cmdQ = std::make_unique>(context.get(), device.get(), nullptr); auto &kd = kernel->kernelInfo.kernelDescriptor; kd.kernelAttributes.flags.passInlineData = true; kd.kernelAttributes.localId[0] = 1; kd.kernelAttributes.localId[1] = 1; kd.kernelAttributes.localId[2] = 1; kd.kernelAttributes.numLocalIdChannels = 3; kernel->mockKernel->setCrossThreadData(crossThreadDataTwoGrf, sizeGrf * 2); auto memoryManager = device->getUltCommandStreamReceiver().getMemoryManager(); kernel->kernelInfo.kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); size_t gws[] = {4000, 1, 1}; size_t lws[] = {40, 1, 1}; cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, lws, 0, nullptr, nullptr); HardwareParse hwParser; hwParser.parseCommands(cmdQ->getCS(0), 0); hwParser.findHardwareCommands(); EXPECT_NE(hwParser.itorWalker, hwParser.cmdList.end()); auto walker = genCmdCast(*hwParser.itorWalker); EXPECT_EQ(1u, walker->getGenerateLocalId()); EXPECT_EQ(7u, walker->getEmitLocalId()); EXPECT_EQ(4u, walker->getWalkOrder()); memoryManager->freeGraphicsMemory(kernel->kernelInfo.kernelAllocation); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenPassInlineDataAndHwLocalIdsGenerationEnabledWhenLocalIdsUsedThenExpectCrossThreadDataInWalkerAndEmitFields) { using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using INLINE_DATA = typename FamilyType::INLINE_DATA; DebugManager.flags.EnablePassInlineData.set(true); DebugManager.flags.EnableHwGenerationLocalIds.set(1); auto cmdQ = std::make_unique>(context.get(), device.get(), nullptr); auto &kd = kernel->kernelInfo.kernelDescriptor; kd.entryPoints.skipPerThreadDataLoad = 128; kd.kernelAttributes.flags.passInlineData = true; kd.kernelAttributes.localId[0] = 1; kd.kernelAttributes.localId[1] = 0; kd.kernelAttributes.localId[2] = 0; kd.kernelAttributes.numLocalIdChannels = 1; kernel->mockKernel->setCrossThreadData(crossThreadDataGrf, sizeof(INLINE_DATA)); auto memoryManager = device->getUltCommandStreamReceiver().getMemoryManager(); kernel->kernelInfo.kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); size_t gws[] = {16, 1, 1}; size_t lws[] = {16, 1, 1}; cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, lws, 0, nullptr, nullptr); HardwareParse hwParser; hwParser.parseCommands(cmdQ->getCS(0), 0); hwParser.findHardwareCommands(); EXPECT_NE(hwParser.itorWalker, hwParser.cmdList.end()); auto walker = genCmdCast(*hwParser.itorWalker); EXPECT_EQ(1u, walker->getEmitInlineParameter()); EXPECT_EQ(1u, walker->getGenerateLocalId()); constexpr uint32_t expectedEmit = (1 << 0); EXPECT_EQ(expectedEmit, walker->getEmitLocalId()); EXPECT_EQ(0, memcmp(walker->getInlineDataPointer(), crossThreadDataGrf, sizeof(INLINE_DATA))); constexpr uint32_t expectedIndirectDataLength = 0; EXPECT_EQ(expectedIndirectDataLength, walker->getIndirectDataLength()); INTERFACE_DESCRIPTOR_DATA &idd = walker->getInterfaceDescriptor(); uint64_t expectedKernelStartOffset = kernel->mockKernel->getKernelInfo().getGraphicsAllocation()->getGpuAddressToPatch() + kernel->kernelInfo.kernelDescriptor.entryPoints.skipPerThreadDataLoad; EXPECT_EQ((uint32_t)(expectedKernelStartOffset), idd.getKernelStartPointer()); memoryManager->freeGraphicsMemory(kernel->kernelInfo.kernelAllocation); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenPassInlineDataAndHwLocalIdsGenerationEnabledWhenLocalIdsNotUsedThenExpectCrossThreadDataInWalkerAndNoHwLocalIdGeneration) { using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using INLINE_DATA = typename FamilyType::INLINE_DATA; DebugManager.flags.EnablePassInlineData.set(true); DebugManager.flags.EnableHwGenerationLocalIds.set(1); auto cmdQ = std::make_unique>(context.get(), device.get(), nullptr); auto &kd = kernel->kernelInfo.kernelDescriptor; kd.entryPoints.skipPerThreadDataLoad = 128; kd.kernelAttributes.flags.passInlineData = true; kd.kernelAttributes.localId[0] = 0; kd.kernelAttributes.localId[1] = 0; kd.kernelAttributes.localId[2] = 0; kd.kernelAttributes.numLocalIdChannels = 0; kernel->mockKernel->setCrossThreadData(crossThreadDataGrf, sizeof(INLINE_DATA)); auto memoryManager = device->getUltCommandStreamReceiver().getMemoryManager(); kernel->kernelInfo.kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); size_t gws[] = {16, 1, 1}; size_t lws[] = {16, 1, 1}; cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, lws, 0, nullptr, nullptr); HardwareParse hwParser; hwParser.parseCommands(cmdQ->getCS(0), 0); hwParser.findHardwareCommands(); EXPECT_NE(hwParser.itorWalker, hwParser.cmdList.end()); auto walker = genCmdCast(*hwParser.itorWalker); EXPECT_EQ(1u, walker->getEmitInlineParameter()); EXPECT_EQ(0u, walker->getGenerateLocalId()); constexpr uint32_t expectedEmit = 0; EXPECT_EQ(expectedEmit, walker->getEmitLocalId()); EXPECT_EQ(0, memcmp(walker->getInlineDataPointer(), crossThreadDataGrf, sizeof(INLINE_DATA))); constexpr uint32_t expectedIndirectDataLength = 0; EXPECT_EQ(expectedIndirectDataLength, walker->getIndirectDataLength()); INTERFACE_DESCRIPTOR_DATA &idd = walker->getInterfaceDescriptor(); uint64_t expectedKernelStartOffset = kernel->mockKernel->getKernelInfo().getGraphicsAllocation()->getGpuAddressToPatch(); EXPECT_EQ((uint32_t)(expectedKernelStartOffset), idd.getKernelStartPointer()); memoryManager->freeGraphicsMemory(kernel->kernelInfo.kernelAllocation); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeControlIsRequiredWhenWalkerPartitionIsOnThenSizeIsProperlyEstimated) { DebugManager.flags.EnableWalkerPartition.set(1u); VariableBackup pipeControlConfigBackup(&ImplicitScalingDispatch::getPipeControlStallRequired(), true); UltClDeviceFactory deviceFactory{1, 2}; MockClDevice *device = deviceFactory.rootDevices[0]; MockContext context{device}; auto cmdQ = std::make_unique>(&context, device, nullptr); auto &csr = cmdQ->getUltCommandStreamReceiver(); size_t numPipeControls = MemorySynchronizationCommands::isPipeControlWArequired(device->getHardwareInfo()) ? 2 : 1; auto baseSize = sizeof(typename FamilyType::COMPUTE_WALKER) + (sizeof(typename FamilyType::PIPE_CONTROL) * numPipeControls) + HardwareCommandsHelper::getSizeRequiredCS() + EncodeMemoryPrefetch::getSizeForMemoryPrefetch(kernel->kernelInfo.heapInfo.KernelHeapSize); DispatchInfo dispatchInfo{}; dispatchInfo.setNumberOfWorkgroups({32, 1, 1}); WalkerPartition::WalkerPartitionArgs testArgs = {}; testArgs.initializeWparidRegister = false; testArgs.crossTileAtomicSynchronization = true; testArgs.emitPipeControlStall = true; testArgs.partitionCount = 2u; testArgs.tileCount = static_cast(device->getDeviceBitfield().count()); DebugManager.flags.SynchronizeWalkerInWparidMode.set(0); testArgs.staticPartitioning = false; testArgs.synchronizeBeforeExecution = false; csr.staticWorkPartitioningEnabled = false; auto partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer(testArgs); auto returnedSize = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo); EXPECT_EQ(returnedSize, partitionSize + baseSize); testArgs.staticPartitioning = true; csr.staticWorkPartitioningEnabled = true; partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer(testArgs); returnedSize = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo); EXPECT_EQ(returnedSize, partitionSize + baseSize); DebugManager.flags.SynchronizeWalkerInWparidMode.set(1); testArgs.synchronizeBeforeExecution = true; testArgs.staticPartitioning = false; csr.staticWorkPartitioningEnabled = false; partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer(testArgs); returnedSize = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo); EXPECT_EQ(returnedSize, partitionSize + baseSize); testArgs.synchronizeBeforeExecution = true; testArgs.staticPartitioning = true; csr.staticWorkPartitioningEnabled = true; partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer(testArgs); returnedSize = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo); EXPECT_EQ(returnedSize, partitionSize + baseSize); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeControlIsNotRequiredWhenWalkerPartitionIsOnThenSizeIsProperlyEstimated) { DebugManager.flags.EnableWalkerPartition.set(1u); VariableBackup pipeControlConfigBackup(&ImplicitScalingDispatch::getPipeControlStallRequired(), false); UltClDeviceFactory deviceFactory{1, 2}; MockClDevice *device = deviceFactory.rootDevices[0]; MockContext context{device}; auto cmdQ = std::make_unique>(&context, device, nullptr); auto &csr = cmdQ->getUltCommandStreamReceiver(); size_t numPipeControls = MemorySynchronizationCommands::isPipeControlWArequired(device->getHardwareInfo()) ? 2 : 1; auto baseSize = sizeof(typename FamilyType::COMPUTE_WALKER) + (sizeof(typename FamilyType::PIPE_CONTROL) * numPipeControls) + HardwareCommandsHelper::getSizeRequiredCS() + EncodeMemoryPrefetch::getSizeForMemoryPrefetch(kernel->kernelInfo.heapInfo.KernelHeapSize); DispatchInfo dispatchInfo{}; dispatchInfo.setNumberOfWorkgroups({32, 1, 1}); WalkerPartition::WalkerPartitionArgs testArgs = {}; testArgs.initializeWparidRegister = false; testArgs.crossTileAtomicSynchronization = false; testArgs.emitPipeControlStall = false; testArgs.partitionCount = 2u; testArgs.tileCount = static_cast(device->getDeviceBitfield().count()); DebugManager.flags.SynchronizeWalkerInWparidMode.set(0); testArgs.staticPartitioning = false; testArgs.synchronizeBeforeExecution = false; csr.staticWorkPartitioningEnabled = false; auto partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer(testArgs); auto returnedSize = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo); EXPECT_EQ(returnedSize, partitionSize + baseSize); testArgs.staticPartitioning = true; csr.staticWorkPartitioningEnabled = true; partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer(testArgs); returnedSize = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo); EXPECT_EQ(returnedSize, partitionSize + baseSize); DebugManager.flags.SynchronizeWalkerInWparidMode.set(1); testArgs.synchronizeBeforeExecution = true; testArgs.staticPartitioning = false; csr.staticWorkPartitioningEnabled = false; partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer(testArgs); returnedSize = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo); EXPECT_EQ(returnedSize, partitionSize + baseSize); testArgs.synchronizeBeforeExecution = true; testArgs.staticPartitioning = true; csr.staticWorkPartitioningEnabled = true; partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer(testArgs); returnedSize = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo); EXPECT_EQ(returnedSize, partitionSize + baseSize); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, whenWalkerPartitionIsDisabledThenSizeIsProperlyEstimated) { DebugManager.flags.EnableWalkerPartition.set(0u); auto cmdQ = std::make_unique>(context.get(), device.get(), nullptr); size_t numPipeControls = MemorySynchronizationCommands::isPipeControlWArequired(device->getHardwareInfo()) ? 2 : 1; DispatchInfo dispatchInfo{}; dispatchInfo.setNumberOfWorkgroups({32, 1, 1}); auto baseSize = sizeof(typename FamilyType::COMPUTE_WALKER) + (sizeof(typename FamilyType::PIPE_CONTROL) * numPipeControls) + HardwareCommandsHelper::getSizeRequiredCS() + EncodeMemoryPrefetch::getSizeForMemoryPrefetch(kernel->kernelInfo.heapInfo.KernelHeapSize); auto returnedSize = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo); EXPECT_EQ(returnedSize, baseSize); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, whenPipeControlPrecedingPostSyncCommandIsDisabledAndLocalMemoryIsEnabledThenSizeIsProperlyEstimated) { DebugManager.flags.DisablePipeControlPrecedingPostSyncCommand.set(1); auto &hwInfo = *device->getRootDeviceEnvironment().getMutableHardwareInfo(); hwInfo.featureTable.flags.ftrLocalMemory = true; auto cmdQ = std::make_unique>(context.get(), device.get(), nullptr); size_t numPipeControls = MemorySynchronizationCommands::isPipeControlWArequired(hwInfo) ? 2 : 1; auto baseSize = sizeof(typename FamilyType::COMPUTE_WALKER) + (sizeof(typename FamilyType::PIPE_CONTROL) * numPipeControls) + HardwareCommandsHelper::getSizeRequiredCS() + EncodeMemoryPrefetch::getSizeForMemoryPrefetch(kernel->kernelInfo.heapInfo.KernelHeapSize); auto returnedSize = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, {}); EXPECT_EQ(returnedSize, baseSize); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeControlIsRequiredWhenQueueIsMultiEngineCapableThenWalkerPartitionsAreEstimated) { DebugManager.flags.EnableWalkerPartition.set(1u); VariableBackup pipeControlConfigBackup(&ImplicitScalingDispatch::getPipeControlStallRequired(), true); auto cmdQ = std::make_unique>(context.get(), device.get(), nullptr); size_t numPipeControls = MemorySynchronizationCommands::isPipeControlWArequired(device->getHardwareInfo()) ? 2 : 1; auto baseSize = sizeof(typename FamilyType::COMPUTE_WALKER) + (sizeof(typename FamilyType::PIPE_CONTROL) * numPipeControls) + HardwareCommandsHelper::getSizeRequiredCS() + EncodeMemoryPrefetch::getSizeForMemoryPrefetch(kernel->kernelInfo.heapInfo.KernelHeapSize); WalkerPartition::WalkerPartitionArgs testArgs = {}; testArgs.initializeWparidRegister = true; testArgs.emitPipeControlStall = true; testArgs.crossTileAtomicSynchronization = true; testArgs.partitionCount = 16u; testArgs.tileCount = static_cast(device->getDeviceBitfield().count()); auto partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer(testArgs); DispatchInfo dispatchInfo{}; dispatchInfo.setNumberOfWorkgroups({32, 1, 1}); auto returnedSize = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo); EXPECT_EQ(returnedSize, partitionSize + baseSize); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeControlIsNotRequiredWhenQueueIsMultiEngineCapableThenWalkerPartitionsAreEstimated) { DebugManager.flags.EnableWalkerPartition.set(1u); VariableBackup pipeControlConfigBackup(&ImplicitScalingDispatch::getPipeControlStallRequired(), false); auto cmdQ = std::make_unique>(context.get(), device.get(), nullptr); size_t numPipeControls = MemorySynchronizationCommands::isPipeControlWArequired(device->getHardwareInfo()) ? 2 : 1; auto baseSize = sizeof(typename FamilyType::COMPUTE_WALKER) + (sizeof(typename FamilyType::PIPE_CONTROL) * numPipeControls) + HardwareCommandsHelper::getSizeRequiredCS() + EncodeMemoryPrefetch::getSizeForMemoryPrefetch(kernel->kernelInfo.heapInfo.KernelHeapSize); WalkerPartition::WalkerPartitionArgs testArgs = {}; testArgs.initializeWparidRegister = true; testArgs.emitPipeControlStall = false; testArgs.crossTileAtomicSynchronization = false; testArgs.partitionCount = 16u; testArgs.tileCount = static_cast(device->getDeviceBitfield().count()); auto partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer(testArgs); DispatchInfo dispatchInfo{}; dispatchInfo.setNumberOfWorkgroups({32, 1, 1}); auto returnedSize = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo); EXPECT_EQ(returnedSize, partitionSize + baseSize); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, whenProgramWalkerIsCalledThenWalkerPartitionLogicIsExecuted) { if (!OSInterface::osEnableLocalMemory) { GTEST_SKIP(); } DebugManager.flags.EnableWalkerPartition.set(1u); auto cmdQ = std::make_unique>(context.get(), device.get(), nullptr); size_t gws[] = {2, 1, 1}; size_t lws[] = {1, 1, 1}; cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, lws, 0, nullptr, nullptr); ClHardwareParse hwParser; hwParser.parseCommands(*cmdQ); auto computeWalker = reinterpret_cast(hwParser.cmdWalker); ASSERT_NE(nullptr, computeWalker); EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, computeWalker->getPartitionType()); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, whenProgramWalkerIsCalledAndForceSynchronizeWalkerInWpariModeThenWalkerPartitionLogicIsExecuted) { if (!OSInterface::osEnableLocalMemory) { GTEST_SKIP(); } DebugManager.flags.EnableWalkerPartition.set(1u); DebugManager.flags.SynchronizeWalkerInWparidMode.set(1); auto cmdQ = std::make_unique>(context.get(), device.get(), nullptr); size_t gws[] = {2, 1, 1}; size_t lws[] = {1, 1, 1}; cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, lws, 0, nullptr, nullptr); ClHardwareParse hwParser; hwParser.parseCommands(*cmdQ); auto computeWalker = reinterpret_cast(hwParser.cmdWalker); ASSERT_NE(nullptr, computeWalker); EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, computeWalker->getPartitionType()); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenKernelThatPrefersSingleSubdeviceWhenProgramWalkerThenPartitioningIsNotUsed) { if (!OSInterface::osEnableLocalMemory) { GTEST_SKIP(); } struct SingleSubdeviceKernel : public MockKernel { using MockKernel::MockKernel; bool isSingleSubdevicePreferred() const override { return true; } }; auto cmdQ = std::make_unique>(context.get(), device.get(), nullptr); size_t gws[] = {2, 1, 1}; size_t lws[] = {1, 1, 1}; SingleSubdeviceKernel subdeviceKernel(kernel->mockProgram, kernel->kernelInfo, *device); cmdQ->enqueueKernel(&subdeviceKernel, 1, nullptr, gws, lws, 0, nullptr, nullptr); ClHardwareParse hwParser; hwParser.parseCommands(*cmdQ); auto computeWalker = reinterpret_cast(hwParser.cmdWalker); ASSERT_NE(nullptr, computeWalker); EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED, computeWalker->getPartitionType()); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, whenProgramWalkerIsCalledWithPartitionLogicDisabledThenWalkerPartitionLogicIsNotExecuted) { if (!OSInterface::osEnableLocalMemory) { GTEST_SKIP(); } DebugManager.flags.EnableWalkerPartition.set(0u); auto cmdQ = std::make_unique>(context.get(), device.get(), nullptr); size_t gws[] = {2, 1, 1}; size_t lws[] = {1, 1, 1}; cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, lws, 0, nullptr, nullptr); ClHardwareParse hwParser; hwParser.parseCommands(*cmdQ); auto computeWalker = reinterpret_cast(hwParser.cmdWalker); ASSERT_NE(nullptr, computeWalker); EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED, computeWalker->getPartitionType()); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, whenQueueIsCreatedWithMultiEngineSupportAndEnqueueIsDoneThenWalkerIsPartitioned) { if (!OSInterface::osEnableLocalMemory) { GTEST_SKIP(); } DebugManager.flags.EnableWalkerPartition.set(1u); auto cmdQ = std::make_unique>(context.get(), device.get(), nullptr); size_t gws[] = {128, 1, 1}; size_t lws[] = {1, 1, 1}; cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, lws, 0, nullptr, nullptr); ClHardwareParse hwParser; hwParser.parseCommands(*cmdQ); auto computeWalker = reinterpret_cast(hwParser.cmdWalker); ASSERT_NE(nullptr, computeWalker); EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, computeWalker->getPartitionType()); EXPECT_EQ(64u, computeWalker->getPartitionSize()); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, whenProgramWalkerIsCalledWithDebugRegistryOverridesThenWalkerContainsProperParameters) { if (!OSInterface::osEnableLocalMemory) { GTEST_SKIP(); } DebugManager.flags.EnableWalkerPartition.set(1u); DebugManager.flags.ExperimentalSetWalkerPartitionCount.set(2u); DebugManager.flags.ExperimentalSetWalkerPartitionType.set(2u); auto cmdQ = std::make_unique>(context.get(), device.get(), nullptr); size_t gws[] = {1, 1, 1}; cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); ClHardwareParse hwParser; hwParser.parseCommands(*cmdQ); auto computeWalker = reinterpret_cast(hwParser.cmdWalker); auto timestampPacket = cmdQ->timestampPacketContainer->peekNodes().at(0); auto expectedPartitionCount = timestampPacket->getPacketsUsed(); ASSERT_NE(nullptr, computeWalker); EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y, computeWalker->getPartitionType()); EXPECT_EQ(1u, computeWalker->getPartitionSize()); EXPECT_EQ(expectedPartitionCount, static_cast(DebugManager.flags.ExperimentalSetWalkerPartitionCount.get())); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, whenProgramWalkerIsCalledWithDebugRegistryOverridesToPartitionCountOneThenProgramProperParameters) { DebugManager.flags.EnableWalkerPartition.set(1u); DebugManager.flags.ExperimentalSetWalkerPartitionCount.set(1u); DebugManager.flags.ExperimentalSetWalkerPartitionType.set(2u); auto cmdQ = std::make_unique>(context.get(), device.get(), nullptr); size_t gws[] = {1, 1, 1}; cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); ClHardwareParse hwParser; hwParser.parseCommands(*cmdQ); auto computeWalker = reinterpret_cast(hwParser.cmdWalker); ASSERT_NE(nullptr, computeWalker); EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED, computeWalker->getPartitionType()); EXPECT_EQ(0u, computeWalker->getPartitionSize()); EXPECT_FALSE(computeWalker->getWorkloadPartitionEnable()); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, whenThereIsNoLocalMemorySupportThenDoNotPartition) { DebugManager.flags.EnableWalkerPartition.set(1u); DebugManager.flags.ExperimentalSetWalkerPartitionCount.set(2u); DebugManager.flags.ExperimentalSetWalkerPartitionType.set(2u); VariableBackup backup(&OSInterface::osEnableLocalMemory, false); auto cmdQ = std::make_unique>(context.get(), device.get(), nullptr); size_t gws[] = {1, 1, 1}; cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); ClHardwareParse hwParser; hwParser.parseCommands(*cmdQ); auto computeWalker = reinterpret_cast(hwParser.cmdWalker); ASSERT_NE(nullptr, computeWalker); EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED, computeWalker->getPartitionType()); EXPECT_EQ(0u, computeWalker->getPartitionSize()); EXPECT_FALSE(computeWalker->getWorkloadPartitionEnable()); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, whenEnqueueIsBlockedOnUserEventThenDoNotPartition) { if (!OSInterface::osEnableLocalMemory) { GTEST_SKIP(); } DebugManager.flags.EnableWalkerPartition.set(1u); DebugManager.flags.ExperimentalSetWalkerPartitionCount.set(2u); DebugManager.flags.ExperimentalSetWalkerPartitionType.set(2u); cl_event userEvent = clCreateUserEvent(context.get(), nullptr); auto cmdQ = std::make_unique>(context.get(), device.get(), nullptr); size_t gws[] = {1, 1, 1}; cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 1, &userEvent, nullptr); clSetUserEventStatus(userEvent, 0u); HardwareParse hwParser; hwParser.parseCommands(*cmdQ->getUltCommandStreamReceiver().lastFlushedCommandStream); hwParser.findHardwareCommands(&cmdQ->getGpgpuCommandStreamReceiver().getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 0)); auto computeWalker = reinterpret_cast(hwParser.cmdWalker); ASSERT_NE(nullptr, computeWalker); EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y, computeWalker->getPartitionType()); EXPECT_EQ(1u, computeWalker->getPartitionSize()); EXPECT_TRUE(computeWalker->getWorkloadPartitionEnable()); clReleaseEvent(userEvent); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, whenDispatchProfilingCalledThenDoNothing) { MockCommandQueue cmdQ(context.get(), device.get(), nullptr, false); auto &cmdStream = cmdQ.getCS(0); MockTagAllocator timeStampAllocator(device->getRootDeviceIndex(), device->getMemoryManager(), 10, MemoryConstants::cacheLineSize, sizeof(HwTimeStamps), false, device->getDeviceBitfield()); auto hwTimeStamp1 = timeStampAllocator.getTag(); GpgpuWalkerHelper::dispatchProfilingCommandsStart(*hwTimeStamp1, &cmdStream, device->getHardwareInfo()); GpgpuWalkerHelper::dispatchProfilingCommandsEnd(*hwTimeStamp1, &cmdStream, device->getHardwareInfo()); EXPECT_EQ(0u, cmdStream.getUsed()); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenOpenClWhenEnqueuePartitionWalkerThenExpectNoSelfCleanupSection) { using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; if (!OSInterface::osEnableLocalMemory) { GTEST_SKIP(); } DebugManager.flags.EnableWalkerPartition.set(1u); auto cmdQ = std::make_unique>(context.get(), device.get(), nullptr); size_t gws[] = {128, 1, 1}; size_t lws[] = {8, 1, 1}; cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, lws, 0, nullptr, nullptr); ClHardwareParse hwParser; hwParser.parseCommands(*cmdQ); auto computeWalker = reinterpret_cast(hwParser.cmdWalker); ASSERT_NE(nullptr, computeWalker); EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, computeWalker->getPartitionType()); EXPECT_EQ(8u, computeWalker->getPartitionSize()); GenCmdList storeDataImmList = hwParser.getCommandsList(); EXPECT_EQ(0u, storeDataImmList.size()); } struct XeHPAndLaterDispatchWalkerBasicTestDynamicPartition : public XeHPAndLaterDispatchWalkerBasicTest { void SetUp() override { DebugManager.flags.CreateMultipleSubDevices.set(2); DebugManager.flags.EnableStaticPartitioning.set(0); DebugManager.flags.EnableWalkerPartition.set(1u); XeHPAndLaterDispatchWalkerBasicTest::SetUp(); } void TearDown() override { XeHPAndLaterDispatchWalkerBasicTest::TearDown(); } }; HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTestDynamicPartition, givenDynamicPartitioningWhenEnqueueingKernelThenExpectNoMultipleActivePartitionsSetInCsr) { if (!OSInterface::osEnableLocalMemory) { GTEST_SKIP(); } auto cmdQ = std::make_unique>(context.get(), device.get(), nullptr); size_t gws[] = {128, 1, 1}; size_t lws[] = {8, 1, 1}; auto &commandStreamReceiver = cmdQ->getUltCommandStreamReceiver(); if (device->getPreemptionMode() == PreemptionMode::MidThread || device->isDebuggerActive()) { commandStreamReceiver.createPreemptionAllocation(); } EXPECT_EQ(1u, commandStreamReceiver.activePartitions); cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, lws, 0, nullptr, nullptr); EXPECT_EQ(1u, commandStreamReceiver.activePartitions); ClHardwareParse hwParser; hwParser.parseCommands(*cmdQ); auto computeWalker = reinterpret_cast(hwParser.cmdWalker); ASSERT_NE(nullptr, computeWalker); EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, computeWalker->getPartitionType()); EXPECT_EQ(8u, computeWalker->getPartitionSize()); } struct XeHPAndLaterDispatchWalkerBasicTestStaticPartition : public XeHPAndLaterDispatchWalkerBasicTest { void SetUp() override { DebugManager.flags.CreateMultipleSubDevices.set(2); DebugManager.flags.EnableStaticPartitioning.set(1); DebugManager.flags.EnableWalkerPartition.set(1u); XeHPAndLaterDispatchWalkerBasicTest::SetUp(); } void TearDown() override { XeHPAndLaterDispatchWalkerBasicTest::TearDown(); } }; HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTestStaticPartition, givenStaticPartitioningWhenEnqueueingKernelThenMultipleActivePartitionsAreSetInCsr) { if (!OSInterface::osEnableLocalMemory) { GTEST_SKIP(); } auto cmdQ = std::make_unique>(context.get(), device.get(), nullptr); size_t gws[] = {128, 1, 1}; size_t lws[] = {8, 1, 1}; auto &commandStreamReceiver = cmdQ->getUltCommandStreamReceiver(); if (device->getPreemptionMode() == PreemptionMode::MidThread || device->isDebuggerActive()) { commandStreamReceiver.createPreemptionAllocation(); } EXPECT_EQ(2u, commandStreamReceiver.activePartitions); cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, lws, 0, nullptr, nullptr); EXPECT_EQ(2u, commandStreamReceiver.activePartitions); ClHardwareParse hwParser; hwParser.parseCommands(*cmdQ); auto computeWalker = reinterpret_cast(hwParser.cmdWalker); ASSERT_NE(nullptr, computeWalker); EXPECT_EQ(FamilyType::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, computeWalker->getPartitionType()); EXPECT_EQ(8u, computeWalker->getPartitionSize()); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTestStaticPartition, givenStaticPartitioningWhenEnqueueingNonUnifromKernelThenMultipleActivePartitionsAreSetInCsrAndWparidRegisterIsReconfiguredToStatic) { using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; using MI_LOAD_REGISTER_MEM = typename FamilyType::MI_LOAD_REGISTER_MEM; if (!OSInterface::osEnableLocalMemory) { GTEST_SKIP(); } auto cmdQ = std::make_unique>(context.get(), device.get(), nullptr); size_t gws[] = {129, 1, 1}; size_t lws[] = {8, 1, 1}; auto &commandStreamReceiver = cmdQ->getUltCommandStreamReceiver(); if (device->getPreemptionMode() == PreemptionMode::MidThread || device->isDebuggerActive()) { commandStreamReceiver.createPreemptionAllocation(); } EXPECT_EQ(2u, commandStreamReceiver.activePartitions); kernel->mockProgram->allowNonUniform = true; cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, lws, 0, nullptr, nullptr); EXPECT_EQ(2u, commandStreamReceiver.activePartitions); HardwareParse hwParser; hwParser.parseCommands(*cmdQ->commandStream); auto firstComputeWalkerItor = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), firstComputeWalkerItor); auto computeWalker = reinterpret_cast(*firstComputeWalkerItor); EXPECT_EQ(COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X, computeWalker->getPartitionType()); EXPECT_EQ(8u, computeWalker->getPartitionSize()); auto nextCmdItor = firstComputeWalkerItor; ++nextCmdItor; auto secondComputeWalkerItor = find(nextCmdItor, hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), secondComputeWalkerItor); computeWalker = reinterpret_cast(*secondComputeWalkerItor); EXPECT_EQ(COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED, computeWalker->getPartitionType()); auto workPartitionAllocationGpuVa = commandStreamReceiver.getWorkPartitionAllocationGpuAddress(); auto expectedRegister = 0x221Cu; auto loadRegisterMem = hwParser.getCommand(firstComputeWalkerItor, secondComputeWalkerItor); ASSERT_NE(nullptr, loadRegisterMem); EXPECT_EQ(workPartitionAllocationGpuVa, loadRegisterMem->getMemoryAddress()); EXPECT_EQ(expectedRegister, loadRegisterMem->getRegisterAddress()); } using NonDefaultPlatformGpuWalkerTest = XeHPAndLaterDispatchWalkerBasicTest; HWCMDTEST_F(IGFX_XE_HP_CORE, NonDefaultPlatformGpuWalkerTest, givenNonDefaultPlatformWhenSetupTimestampPacketThenGmmHelperIsTakenFromNonDefaultPlatform) { auto executionEnvironment = std::make_unique(); auto rootDeviceEnvironment = executionEnvironment->rootDeviceEnvironments[0].get(); rootDeviceEnvironment->initGmm(); auto cmdQ = std::make_unique>(context.get(), device.get(), nullptr); size_t gws[] = {1, 1, 1}; cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); auto &cmdStream = cmdQ->getCS(0); TagNode> timestamp; ClHardwareParse hwParser; hwParser.parseCommands(*cmdQ); auto computeWalker = reinterpret_cast(hwParser.cmdWalker); ASSERT_NE(nullptr, computeWalker); platformsImpl->clear(); EXPECT_EQ(platform(), nullptr); GpgpuWalkerHelper::setupTimestampPacket(&cmdStream, computeWalker, static_cast(×tamp), *rootDeviceEnvironment); } HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerDispatchTest, givenDefaultLocalIdsGenerationWhenPassingFittingParametersThenReturnFalse) { uint32_t workDim = 1; uint32_t simd = 8; size_t lws[3] = {16, 1, 1}; std::array walkOrder = {}; uint32_t requiredWalkOrder = 0u; EXPECT_FALSE(EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired( workDim, lws, walkOrder, true, requiredWalkOrder, simd)); } HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerDispatchTest, givenEnabledLocalIdsGenerationWhenPassingFittingOneDimParametersThenReturnFalse) { DebugManagerStateRestore restore; DebugManager.flags.EnableHwGenerationLocalIds.set(1); uint32_t workDim = 1; uint32_t simd = 8; size_t lws[3] = {16, 1, 1}; std::array walkOrder = {{0, 1, 2}}; uint32_t requiredWalkOrder = 4u; EXPECT_FALSE(EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired( workDim, lws, walkOrder, true, requiredWalkOrder, simd)); EXPECT_EQ(0u, requiredWalkOrder); lws[0] = 15; EXPECT_FALSE(EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired( workDim, lws, walkOrder, false, requiredWalkOrder, simd)); EXPECT_EQ(0u, requiredWalkOrder); } HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerDispatchTest, givenEnabledLocalIdsGenerationWhenPassingFittingTwoDimParametersThenReturnFalse) { DebugManagerStateRestore restore; DebugManager.flags.EnableHwGenerationLocalIds.set(1); uint32_t workDim = 2; uint32_t simd = 8; size_t lws[3] = {16, 16, 1}; std::array walkOrder = {{1, 0, 2}}; uint32_t requiredWalkOrder = 77u; EXPECT_FALSE(EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired(workDim, lws, walkOrder, true, requiredWalkOrder, simd)); EXPECT_EQ(2u, requiredWalkOrder); lws[0] = 15; EXPECT_FALSE(EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired( workDim, lws, walkOrder, true, requiredWalkOrder, simd)); EXPECT_EQ(2u, requiredWalkOrder); } HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerDispatchTest, givenWalkOrderThatNeedsToBeFollowedWithCompatibleDimSizesArePassedThenRuntimeGenerationIsNotRequired) { DebugManagerStateRestore restore; DebugManager.flags.EnableHwGenerationLocalIds.set(1); uint32_t workDim = 3; uint32_t simd = 8; size_t lws[3] = {200, 1, 1}; std::array walkOrder = {{2, 1, 0}}; uint32_t requiredWalkOrder = 77u; EXPECT_FALSE(EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired( workDim, lws, walkOrder, true, requiredWalkOrder, simd)); EXPECT_EQ(5u, requiredWalkOrder); } HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerDispatchTest, givenLocalWorkgroupSizeGreaterThen1024ThenRuntimeMustGenerateLocalIds) { DebugManagerStateRestore restore; DebugManager.flags.EnableHwGenerationLocalIds.set(1); uint32_t workDim = 3; uint32_t simd = 8; std::array lws = {1025, 1, 1}; std::array walkOrder = {{0, 1, 2}}; uint32_t requiredWalkOrder = 77u; EXPECT_TRUE(EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired( workDim, lws.data(), walkOrder, false, requiredWalkOrder, simd)); lws = {1, 1, 1025}; EXPECT_TRUE(EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired( workDim, lws.data(), walkOrder, false, requiredWalkOrder, simd)); lws = {32, 32, 4}; EXPECT_TRUE(EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired( workDim, lws.data(), walkOrder, false, requiredWalkOrder, simd)); workDim = 2; EXPECT_FALSE(EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired( workDim, lws.data(), walkOrder, false, requiredWalkOrder, simd)); } HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerDispatchTest, givenWalkOrderThatDoesntNeedToBeFollowedWhenIncompatibleDimSizesArePassedThenRuntimeGenerationIsReuqired) { DebugManagerStateRestore restore; DebugManager.flags.EnableHwGenerationLocalIds.set(1); uint32_t workDim = 3; uint32_t simd = 8; std::array lws = {200, 1, 1}; std::array walkOrder = {{0, 2, 1}}; uint32_t requiredWalkOrder = 77u; EXPECT_FALSE(EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired( workDim, lws.data(), walkOrder, false, requiredWalkOrder, simd)); EXPECT_EQ(4u, requiredWalkOrder); lws = {16, 17, 2}; EXPECT_FALSE(EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired( workDim, lws.data(), walkOrder, false, requiredWalkOrder, simd)); EXPECT_EQ(1u, requiredWalkOrder); lws = {16, 2, 17}; EXPECT_FALSE(EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired( workDim, lws.data(), walkOrder, false, requiredWalkOrder, simd)); EXPECT_EQ(0u, requiredWalkOrder); lws = {17, 2, 17}; EXPECT_TRUE(EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired( workDim, lws.data(), walkOrder, false, requiredWalkOrder, simd)); lws = {3, 4, 32}; EXPECT_FALSE(EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired( workDim, lws.data(), walkOrder, false, requiredWalkOrder, simd)); EXPECT_EQ(4u, requiredWalkOrder); workDim = 2; lws = {17, 2, 17}; EXPECT_FALSE(EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired( workDim, lws.data(), walkOrder, false, requiredWalkOrder, simd)); EXPECT_EQ(2u, requiredWalkOrder); lws = {2, 17, 17}; EXPECT_FALSE(EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired( workDim, lws.data(), walkOrder, false, requiredWalkOrder, simd)); EXPECT_EQ(0u, requiredWalkOrder); lws = {2, 4, 17}; EXPECT_FALSE(EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired( workDim, lws.data(), walkOrder, false, requiredWalkOrder, simd)); EXPECT_EQ(0u, requiredWalkOrder); workDim = 1; lws = {17, 2, 17}; EXPECT_FALSE(EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired( workDim, lws.data(), walkOrder, false, requiredWalkOrder, simd)); EXPECT_EQ(0u, requiredWalkOrder); workDim = 1; lws = {2, 17, 17}; EXPECT_FALSE(EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired( workDim, lws.data(), walkOrder, false, requiredWalkOrder, simd)); EXPECT_EQ(0u, requiredWalkOrder); } HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerDispatchTest, givenDisabledLocalIdsGenerationWhenPassingFittingThreeDimParametersThenReturnTrue) { DebugManagerStateRestore restore; DebugManager.flags.EnableHwGenerationLocalIds.set(0); uint32_t workDim = 3; uint32_t simd = 8; size_t lws[3] = {16, 16, 4}; std::array walkOrder = {{1, 0, 2}}; uint32_t requiredWalkOrder = 77u; EXPECT_TRUE(EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired( workDim, lws, walkOrder, true, requiredWalkOrder, simd)); } HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerDispatchTest, givenEnabledLocalIdsGenerationWhenPassingFittingThreeDimParametersThenReturnFalseAndProperWalkOrder) { DebugManagerStateRestore restore; DebugManager.flags.EnableHwGenerationLocalIds.set(1); uint32_t workDim = 3; uint32_t simd = 8; size_t lws[3] = {16, 16, 2}; std::array walkOrder = {{2, 1, 0}}; uint32_t requiredWalkOrder = 77u; EXPECT_FALSE(EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired( workDim, lws, walkOrder, true, requiredWalkOrder, simd)); EXPECT_EQ(5u, requiredWalkOrder); walkOrder = {2, 0, 1}; EXPECT_FALSE(EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired( workDim, lws, walkOrder, true, requiredWalkOrder, simd)); EXPECT_EQ(3u, requiredWalkOrder); walkOrder = {1, 2, 0}; EXPECT_FALSE(EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired( workDim, lws, walkOrder, true, requiredWalkOrder, simd)); EXPECT_EQ(4u, requiredWalkOrder); walkOrder = {1, 0, 2}; EXPECT_FALSE(EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired( workDim, lws, walkOrder, true, requiredWalkOrder, simd)); EXPECT_EQ(2u, requiredWalkOrder); walkOrder = {0, 2, 1}; EXPECT_FALSE(EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired( workDim, lws, walkOrder, true, requiredWalkOrder, simd)); EXPECT_EQ(1u, requiredWalkOrder); walkOrder = {0, 1, 2}; EXPECT_FALSE(EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired( workDim, lws, walkOrder, true, requiredWalkOrder, simd)); EXPECT_EQ(0u, requiredWalkOrder); //incorrect walkOrder returns 6 walkOrder = {2, 2, 0}; EXPECT_FALSE(EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired( workDim, lws, walkOrder, true, requiredWalkOrder, simd)); EXPECT_EQ(6u, requiredWalkOrder); } HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerDispatchTest, givenEnabledLocalIdsGenerationWhenPassingInvalidLwsTwoDimParametersThenReturnTrue) { DebugManagerStateRestore restore; DebugManager.flags.EnableHwGenerationLocalIds.set(1); uint32_t workDim = 2; uint32_t simd = 8; size_t lws[3] = {15, 15, 1}; std::array walkOrder = {{0, 1, 2}}; uint32_t requiredWalkOrder = 4u; EXPECT_TRUE(EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired( workDim, lws, walkOrder, true, requiredWalkOrder, simd)); } HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerDispatchTest, givenEnabledLocalIdsGenerationWhenPassingInvalidLwsThreeDimParametersThenReturnTrue) { DebugManagerStateRestore restore; DebugManager.flags.EnableHwGenerationLocalIds.set(1); uint32_t workDim = 3; uint32_t simd = 8; size_t lws[3] = {16, 15, 15}; std::array walkOrder = {{0, 1, 2}}; uint32_t requiredWalkOrder = 4u; EXPECT_TRUE(EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired( workDim, lws, walkOrder, true, requiredWalkOrder, simd)); lws[0] = 15; EXPECT_TRUE(EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired( workDim, lws, walkOrder, true, requiredWalkOrder, simd)); } HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerDispatchTest, givenSimdSize1TWhenCheckToGeneratHwIdsThenReturnedFalse) { DebugManagerStateRestore restore; DebugManager.flags.EnableHwGenerationLocalIds.set(1); uint32_t workDim = 3; uint32_t simd = 8; std::array lws = {200, 1, 1}; std::array walkOrder = {{0, 2, 1}}; uint32_t requiredWalkOrder = 77u; EXPECT_FALSE(EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired( workDim, lws.data(), walkOrder, false, requiredWalkOrder, simd)); simd = 1; EXPECT_TRUE(EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired( workDim, lws.data(), walkOrder, false, requiredWalkOrder, simd)); } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_barrier_tests.cpp000066400000000000000000000256771422164147700315750ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/event/user_event.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" using namespace NEO; using BarrierTest = Test; HWTEST_F(BarrierTest, givenCsrWithHigherLevelThenCommandQueueWhenEnqueueBarrierIsCalledThenCommandQueueAlignsToCsrWithoutSendingAnyCommands) { auto pCmdQ = this->pCmdQ; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); // Set task levels to known values. uint32_t originalCSRLevel = 2; commandStreamReceiver.taskLevel = originalCSRLevel; pCmdQ->taskLevel = originalCSRLevel; uint32_t originalTaskCount = 15; commandStreamReceiver.taskCount = originalTaskCount; auto &csrCommandStream = commandStreamReceiver.commandStream; auto csrUsed = csrCommandStream.getUsed(); cl_uint numEventsInWaitList = 0; const cl_event *eventWaitList = nullptr; cl_event *event = nullptr; auto &commandStream = pCmdQ->getCS(0); auto used = commandStream.getUsed(); auto retVal = pCmdQ->enqueueBarrierWithWaitList( numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); // csr is untouched as we do not submit anything, cmd queue task level goes up as this is barrier call EXPECT_EQ(2u, commandStreamReceiver.peekTaskLevel()); EXPECT_EQ(3u, pCmdQ->taskLevel); //make sure nothing was added to CommandStream or CSR-CommandStream and command queue still uses this stream EXPECT_EQ(used, commandStream.getUsed()); EXPECT_EQ(&commandStream, &pCmdQ->getCS(0)); EXPECT_EQ(csrUsed, csrCommandStream.getUsed()); EXPECT_EQ(&csrCommandStream, &commandStreamReceiver.commandStream); } HWTEST_F(BarrierTest, GivenCsrTaskLevelGreaterThenCmdqTaskLevelWhenEnqueingBarrierWithWaitListThenAddPipeControlIsNotAdded) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; auto pCS = this->pCS; auto pCmdQ = this->pCmdQ; auto pCmdBuffer = this->pCmdBuffer; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.setMediaVFEStateDirty(false); // Set task levels to known values. commandStreamReceiver.taskLevel = 2; pCmdQ->taskLevel = 1; cl_uint numEventsInWaitList = 0; const cl_event *eventWaitList = nullptr; cl_event *event = nullptr; auto retVal = pCmdQ->enqueueBarrierWithWaitList( numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); // Should sync CSR & CmdQ levels. EXPECT_GE(commandStreamReceiver.peekTaskLevel(), pCmdQ->taskLevel); auto sizeUsed = pCS->getUsed(); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, pCmdBuffer, sizeUsed)); // If CSR > CQ then a PC isn't required. auto itorCmd = find(cmdList.begin(), cmdList.end()); ASSERT_EQ(cmdList.end(), itorCmd); } HWTEST_F(BarrierTest, GivenEventWhenEnqueingBarrierWithWaitListThenEventIsSetupCorrectly) { auto pCmdQ = this->pCmdQ; cl_uint numEventsInWaitList = 0; const cl_event *eventWaitList = nullptr; cl_event event = nullptr; auto retVal = pCmdQ->enqueueBarrierWithWaitList( numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, event); // Check CL_EVENT_COMMAND_TYPE { auto pEvent = (Event *)event; cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_BARRIER), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); delete pEvent; } } HWTEST_F(BarrierTest, GivenGpuHangAndBlockingCallWhenEnqueingBarrierWithWaitListThenOutOfResourcesIsReturned) { DebugManagerStateRestore stateRestore; DebugManager.flags.MakeEachEnqueueBlocking.set(true); std::unique_ptr device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment(nullptr)}); cl_queue_properties props = {}; MockCommandQueueHw mockCommandQueueHw(context, device.get(), &props); mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang; cl_uint numEventsInWaitList = 0; const cl_event *eventWaitList = nullptr; const auto enqueueResult = mockCommandQueueHw.enqueueBarrierWithWaitList( numEventsInWaitList, eventWaitList, nullptr); EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueResult); EXPECT_EQ(1, mockCommandQueueHw.waitForAllEnginesCalledCount); } HWTEST_F(BarrierTest, WhenEnqueingBarrierWithWaitListThenReturnedEventShouldHaveEqualDepth) { auto pCmdQ = this->pCmdQ; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); // Set task levels to known values. commandStreamReceiver.taskLevel = 2; pCmdQ->taskLevel = 1; cl_uint numEventsInWaitList = 0; const cl_event *eventWaitList = nullptr; cl_event event = nullptr; auto retVal = pCmdQ->enqueueBarrierWithWaitList( numEventsInWaitList, eventWaitList, &event); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; // Should sync all 3 (CSR, CmdQ, Event) levels. EXPECT_GE(commandStreamReceiver.peekTaskLevel(), pEvent->taskLevel); EXPECT_EQ(pCmdQ->taskLevel, pEvent->taskLevel); delete pEvent; } HWTEST_F(BarrierTest, WhenEnqueingBarrierWithWaitListThenDependenciesShouldSync) { auto pCmdQ = this->pCmdQ; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); // In N:1, CSR is always highest task level. commandStreamReceiver.taskLevel = 7; // In N:1, pCmdQ.level <= CSR.level pCmdQ->taskLevel = 7; // In N:1, event.level <= pCmdQ.level Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 5, 15); Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 6, 16); Event event3(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 1, 17); cl_event eventWaitList[] = { &event1, &event2, &event3}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto retVal = pCmdQ->enqueueBarrierWithWaitList( numEventsInWaitList, eventWaitList, &event); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = castToObject(event); auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); // in this case only cmdQ raises the taskLevel why csr stay intact EXPECT_EQ(8u, pCmdQ->taskLevel); if (csr.peekTimestampPacketWriteEnabled()) { EXPECT_EQ(pCmdQ->taskLevel + 1, commandStreamReceiver.peekTaskLevel()); } else { EXPECT_EQ(7u, commandStreamReceiver.peekTaskLevel()); } EXPECT_EQ(pCmdQ->taskLevel, pEvent->taskLevel); EXPECT_EQ(8u, pEvent->taskLevel); delete pEvent; } HWTEST_F(BarrierTest, givenNotBlockedCommandQueueAndEnqueueBarrierWithWaitlistReturningEventWhenCallIsMadeThenDontWaitUntilEventIsSignaled) { MockCommandQueueHw mockCmdQueue(context, pClDevice, nullptr); // In N:1, event.level <= pCmdQ.level Event event1(&mockCmdQueue, CL_COMMAND_NDRANGE_KERNEL, 5, 15); Event event2(&mockCmdQueue, CL_COMMAND_NDRANGE_KERNEL, 6, 16); Event event3(&mockCmdQueue, CL_COMMAND_NDRANGE_KERNEL, 1, 17); cl_event eventWaitList[] = { &event1, &event2, &event3}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto latestTaskCountWaitedBeforeEnqueue = mockCmdQueue.latestTaskCountWaited.load(); auto retVal = mockCmdQueue.enqueueBarrierWithWaitList( numEventsInWaitList, eventWaitList, &event); auto &csr = mockCmdQueue.getGpgpuCommandStreamReceiver(); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(latestTaskCountWaitedBeforeEnqueue, mockCmdQueue.latestTaskCountWaited); auto pEvent = castToObject(event); EXPECT_NE(nullptr, pEvent); if (csr.peekTimestampPacketWriteEnabled()) { EXPECT_EQ(csr.peekTaskCount(), pEvent->peekTaskCount()); } else { EXPECT_EQ(17u, pEvent->peekTaskCount()); } EXPECT_TRUE(pEvent->updateStatusAndCheckCompletion()); delete pEvent; } HWTEST_F(BarrierTest, givenBlockedCommandQueueAndEnqueueBarrierWithWaitlistReturningEventWhenCallIsMadeThenReturnEventIsNotSignaled) { UserEvent event2(&pCmdQ->getContext()); cl_event eventWaitList[] = { &event2, }; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto retVal = pCmdQ->enqueueBarrierWithWaitList( numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); auto pEvent = (Event *)event; EXPECT_EQ(pEvent->peekTaskCount(), CompletionStamp::notReady); event2.setStatus(CL_COMPLETE); clReleaseEvent(event); } HWTEST_F(BarrierTest, givenEmptyCommandStreamAndBlockedBarrierCommandWhenUserEventIsSignaledThenNewCommandStreamIsNotAcquired) { UserEvent event2(&pCmdQ->getContext()); cl_event eventWaitList[] = { &event2, }; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto &commandStream = pCmdQ->getCS(0); auto commandStreamStart = commandStream.getUsed(); auto commandStreamBuffer = commandStream.getCpuBase(); auto retVal = pCmdQ->enqueueBarrierWithWaitList( numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); // Consume all memory except what is needed for this enqueue size_t barrierCmdStreamSize = NEO::EnqueueOperation::getSizeRequiredCS(CL_COMMAND_BARRIER, false, false, *pCmdQ, nullptr, {}); commandStream.getSpace(commandStream.getMaxAvailableSpace() - barrierCmdStreamSize); //now trigger event event2.setStatus(CL_COMPLETE); auto commandStreamStart2 = commandStream.getUsed(); auto commandStreamBuffer2 = commandStream.getCpuBase(); EXPECT_EQ(0u, commandStreamStart); EXPECT_GT(commandStreamStart2, 0u); EXPECT_EQ(commandStreamBuffer2, commandStreamBuffer); EXPECT_GE(commandStream.getMaxAvailableSpace(), commandStream.getMaxAvailableSpace()); clReleaseEvent(event); } enqueue_command_without_kernel_tests.cpp000066400000000000000000000547321422164147700346230ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/memory_manager/surface.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/mocks/mock_timestamp_container.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/common/test_macros/test_checks_shared.h" #include "opencl/source/event/event_builder.h" #include "opencl/source/event/user_event.h" #include "opencl/source/helpers/enqueue_properties.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/fixtures/dispatch_flags_fixture.h" #include "opencl/test/unit_test/fixtures/enqueue_handler_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_mdi.h" namespace NEO { template class MockCommandQueueWithCacheFlush : public MockCommandQueueHw { using MockCommandQueueHw::MockCommandQueueHw; public: bool isCacheFlushCommand(uint32_t commandType) const override { return commandRequireCacheFlush; } bool commandRequireCacheFlush = false; }; HWTEST_F(EnqueueHandlerTest, GivenCommandStreamWithoutKernelWhenCommandEnqueuedThenTaskCountIncreased) { std::unique_ptr> mockCmdQ(new MockCommandQueueHw(context, pClDevice, 0)); char buffer[64]; std::unique_ptr allocation(new MockGraphicsAllocation(buffer, sizeof(buffer))); std::unique_ptr surface(new GeneralSurface(allocation.get())); EventsRequest eventsRequest(0, nullptr, nullptr); EventBuilder eventBuilder; Surface *surfaces[] = {surface.get()}; auto blocking = true; TimestampPacketDependencies timestampPacketDependencies; CsrDependencies csrDeps; EnqueueProperties enqueueProperties(false, false, false, true, false, nullptr); mockCmdQ->enqueueCommandWithoutKernel(surfaces, 1, &mockCmdQ->getCS(0), 0, blocking, enqueueProperties, timestampPacketDependencies, eventsRequest, eventBuilder, 0, csrDeps, nullptr); EXPECT_EQ(allocation->getTaskCount(mockCmdQ->getGpgpuCommandStreamReceiver().getOsContext().getContextId()), 1u); } template struct EnqueueHandlerTimestampTest : public EnqueueHandlerTest { void SetUp() override { DebugManager.flags.EnableTimestampPacket.set(enabled); EnqueueHandlerTest::SetUp(); } void TearDown() override { EnqueueHandlerTest::TearDown(); } DebugManagerStateRestore restorer; }; using EnqueueHandlerTimestampEnabledTest = EnqueueHandlerTimestampTest; HWTEST_F(EnqueueHandlerTimestampEnabledTest, givenProflingAndTimeStampPacketsEnabledWhenEnqueueCommandWithoutKernelThenSubmitTimeStampIsSet) { cl_queue_properties properties[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0}; std::unique_ptr> mockCmdQ(new MockCommandQueueHw(context, pClDevice, properties)); char buffer[64]; std::unique_ptr allocation(new MockGraphicsAllocation(buffer, sizeof(buffer))); std::unique_ptr surface(new GeneralSurface(allocation.get())); EventsRequest eventsRequest(0, nullptr, nullptr); EventBuilder eventBuilder; eventBuilder.create>(mockCmdQ.get(), CL_COMMAND_USER, CompletionStamp::notReady, CompletionStamp::notReady); auto ev = static_cast *>(eventBuilder.getEvent()); ev->setProfilingEnabled(true); Surface *surfaces[] = {surface.get()}; auto blocking = true; TimestampPacketDependencies timestampPacketDependencies; CsrDependencies csrDeps; EnqueueProperties enqueueProperties(false, false, false, true, false, nullptr); EXPECT_EQ(ev->submitTimeStamp.CPUTimeinNS, 0u); EXPECT_EQ(ev->submitTimeStamp.GPUTimeStamp, 0u); mockCmdQ->enqueueCommandWithoutKernel(surfaces, 1, &mockCmdQ->getCS(0), 0, blocking, enqueueProperties, timestampPacketDependencies, eventsRequest, eventBuilder, 0, csrDeps, nullptr); EXPECT_NE(ev->submitTimeStamp.CPUTimeinNS, 0u); EXPECT_EQ(ev->submitTimeStamp.GPUTimeStamp, 0u); DebugManagerStateRestore dbgState; DebugManager.flags.EnableDeviceBasedTimestamps.set(true); ev->queueTimeStamp.GPUTimeStamp = 1000; ev->calculateSubmitTimestampData(); EXPECT_NE(ev->submitTimeStamp.GPUTimeStamp, 0u); delete ev; } using EnqueueHandlerTimestampDisabledTest = EnqueueHandlerTimestampTest; HWTEST_F(EnqueueHandlerTimestampDisabledTest, givenProflingEnabledTimeStampPacketsDisabledWhenEnqueueCommandWithoutKernelThenSubmitTimeStampIsSet) { cl_queue_properties properties[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0}; std::unique_ptr> mockCmdQ(new MockCommandQueueHw(context, pClDevice, properties)); char buffer[64]; std::unique_ptr allocation(new MockGraphicsAllocation(buffer, sizeof(buffer))); std::unique_ptr surface(new GeneralSurface(allocation.get())); EventsRequest eventsRequest(0, nullptr, nullptr); EventBuilder eventBuilder; eventBuilder.create>(mockCmdQ.get(), CL_COMMAND_USER, CompletionStamp::notReady, CompletionStamp::notReady); auto ev = static_cast *>(eventBuilder.getEvent()); ev->setProfilingEnabled(true); Surface *surfaces[] = {surface.get()}; auto blocking = true; TimestampPacketDependencies timestampPacketDependencies; CsrDependencies csrDeps; EnqueueProperties enqueueProperties(false, false, false, true, false, nullptr); EXPECT_EQ(ev->submitTimeStamp.CPUTimeinNS, 0u); EXPECT_EQ(ev->submitTimeStamp.GPUTimeStamp, 0u); mockCmdQ->enqueueCommandWithoutKernel(surfaces, 1, &mockCmdQ->getCS(0), 0, blocking, enqueueProperties, timestampPacketDependencies, eventsRequest, eventBuilder, 0, csrDeps, nullptr); EXPECT_NE(ev->submitTimeStamp.CPUTimeinNS, 0u); EXPECT_EQ(ev->submitTimeStamp.GPUTimeStamp, 0u); DebugManagerStateRestore dbgState; DebugManager.flags.EnableDeviceBasedTimestamps.set(true); ev->queueTimeStamp.GPUTimeStamp = 1000; ev->calculateSubmitTimestampData(); EXPECT_NE(ev->submitTimeStamp.GPUTimeStamp, 0u); delete ev; } HWTEST_F(EnqueueHandlerTest, givenNonBlitPropertyWhenEnqueueIsBlockedThenDontRegisterBlitProperties) { std::unique_ptr> mockCmdQ(new MockCommandQueueHw(context, pClDevice, 0)); auto &csr = mockCmdQ->getGpgpuCommandStreamReceiver(); auto commandStream = new LinearStream(); csr.ensureCommandBufferAllocation(*commandStream, 1, 1); auto blockedCommandsDataForDependencyFlush = new KernelOperation(commandStream, *csr.getInternalAllocationStorage()); TimestampPacketDependencies timestampPacketDependencies; MultiDispatchInfo multiDispatchInfo; EventsRequest eventsRequest(0, nullptr, nullptr); EventBuilder eventBuilder; const EnqueueProperties enqueuePropertiesForDependencyFlush(false, false, false, true, false, nullptr); auto blockedCommandsData = std::unique_ptr(blockedCommandsDataForDependencyFlush); Surface *surfaces[] = {nullptr}; mockCmdQ->enqueueBlocked(CL_COMMAND_MARKER, surfaces, size_t(0), multiDispatchInfo, timestampPacketDependencies, blockedCommandsData, enqueuePropertiesForDependencyFlush, eventsRequest, eventBuilder, std::unique_ptr(nullptr), nullptr); EXPECT_FALSE(blockedCommandsDataForDependencyFlush->blitEnqueue); } HWTEST_F(EnqueueHandlerTest, givenBlitPropertyWhenEnqueueIsBlockedThenRegisterBlitProperties) { HardwareInfo *hwInfo = pClDevice->getRootDeviceEnvironment().getMutableHardwareInfo(); hwInfo->capabilityTable.blitterOperationsSupported = true; REQUIRE_BLITTER_OR_SKIP(hwInfo); std::unique_ptr> mockCmdQ(new MockCommandQueueHw(context, pClDevice, 0)); auto &csr = mockCmdQ->getGpgpuCommandStreamReceiver(); auto commandStream = new LinearStream(); csr.ensureCommandBufferAllocation(*commandStream, 1, 1); auto blockedCommandsDataForBlitEnqueue = new KernelOperation(commandStream, *csr.getInternalAllocationStorage()); TimestampPacketDependencies timestampPacketDependencies; MultiDispatchInfo multiDispatchInfo; EventsRequest eventsRequest(0, nullptr, nullptr); EventBuilder eventBuilder; BlitProperties blitProperties; blitProperties.srcAllocation = reinterpret_cast(0x12345); blitProperties.dstAllocation = reinterpret_cast(0x56789); BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties); const EnqueueProperties enqueuePropertiesForBlitEnqueue(true, false, false, false, false, &blitPropertiesContainer); auto blockedCommandsData = std::unique_ptr(blockedCommandsDataForBlitEnqueue); Surface *surfaces[] = {nullptr}; mockCmdQ->enqueueBlocked(CL_COMMAND_READ_BUFFER, surfaces, size_t(0), multiDispatchInfo, timestampPacketDependencies, blockedCommandsData, enqueuePropertiesForBlitEnqueue, eventsRequest, eventBuilder, std::unique_ptr(nullptr), mockCmdQ->getBcsForAuxTranslation()); EXPECT_TRUE(blockedCommandsDataForBlitEnqueue->blitEnqueue); EXPECT_EQ(blitProperties.srcAllocation, blockedCommandsDataForBlitEnqueue->blitPropertiesContainer.begin()->srcAllocation); EXPECT_EQ(blitProperties.dstAllocation, blockedCommandsDataForBlitEnqueue->blitPropertiesContainer.begin()->dstAllocation); } HWTEST_F(DispatchFlagsTests, whenEnqueueCommandWithoutKernelThenPassCorrectDispatchFlags) { using CsrType = MockCsrHw2; SetUpImpl(); auto mockCmdQ = std::make_unique>(context.get(), device.get(), nullptr); auto mockCsr = static_cast(&mockCmdQ->getGpgpuCommandStreamReceiver()); auto blocking = true; TimestampPacketDependencies timestampPacketDependencies; CsrDependencies csrDeps; EventsRequest eventsRequest(0, nullptr, nullptr); EventBuilder eventBuilder; EnqueueProperties enqueueProperties(false, false, false, true, false, nullptr); mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, &mockCmdQ->getCS(0), 0, blocking, enqueueProperties, timestampPacketDependencies, eventsRequest, eventBuilder, 0, csrDeps, nullptr); EXPECT_EQ(blocking, mockCsr->passedDispatchFlags.blocking); EXPECT_FALSE(mockCsr->passedDispatchFlags.implicitFlush); EXPECT_TRUE(mockCsr->passedDispatchFlags.guardCommandBufferWithPipeControl); EXPECT_EQ(L3CachingSettings::NotApplicable, mockCsr->passedDispatchFlags.l3CacheSettings); EXPECT_EQ(GrfConfig::NotApplicable, mockCsr->passedDispatchFlags.numGrfRequired); EXPECT_EQ(device->getPreemptionMode(), mockCsr->passedDispatchFlags.preemptionMode); EXPECT_EQ(mockCmdQ->flushStamp->getStampReference(), mockCsr->passedDispatchFlags.flushStampReference); } HWTEST_F(DispatchFlagsTests, whenEnqueueCommandWithoutKernelThenPassCorrectThrottleHint) { using CsrType = MockCsrHw2; SetUpImpl(); auto mockCmdQ = std::make_unique>(context.get(), device.get(), nullptr); mockCmdQ->throttle = QueueThrottle::HIGH; auto mockCsr = static_cast(&mockCmdQ->getGpgpuCommandStreamReceiver()); TimestampPacketDependencies timestampPacketDependencies; CsrDependencies csrDeps; EventsRequest eventsRequest(0, nullptr, nullptr); EventBuilder eventBuilder; EnqueueProperties enqueueProperties(false, false, false, true, false, nullptr); bool blocking = true; mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, &mockCmdQ->getCS(0), 0, blocking, enqueueProperties, timestampPacketDependencies, eventsRequest, eventBuilder, 0, csrDeps, nullptr); EXPECT_EQ(mockCmdQ->throttle, mockCsr->passedDispatchFlags.throttle); } HWTEST_F(DispatchFlagsBlitTests, givenBlitEnqueueWhenDispatchingCommandsWithoutKernelThenDoImplicitFlush) { using CsrType = MockCsrHw2; DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(1); DebugManager.flags.EnableTimestampPacket.set(1); SetUpImpl(); REQUIRE_FULL_BLITTER_OR_SKIP(&device->getHardwareInfo()); auto mockCmdQ = std::make_unique>(context.get(), device.get(), nullptr); auto mockCsr = static_cast(&mockCmdQ->getGpgpuCommandStreamReceiver()); mockCsr->skipBlitCalls = true; cl_int retVal = CL_SUCCESS; auto buffer = std::unique_ptr(Buffer::create(context.get(), 0, 1, nullptr, retVal)); auto &bcsCsr = *mockCmdQ->bcsEngines[0]->commandStreamReceiver; auto blocking = true; TimestampPacketDependencies timestampPacketDependencies; EventsRequest eventsRequest(0, nullptr, nullptr); EventBuilder eventBuilder; BuiltinOpParams builtinOpParams; builtinOpParams.srcMemObj = buffer.get(); builtinOpParams.dstPtr = reinterpret_cast(0x1234); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.setBuiltinOpParams(builtinOpParams); CsrDependencies csrDeps; mockCmdQ->obtainNewTimestampPacketNodes(1, timestampPacketDependencies.previousEnqueueNodes, true, bcsCsr); timestampPacketDependencies.cacheFlushNodes.add(mockCmdQ->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag()); BlitProperties blitProperties = mockCmdQ->processDispatchForBlitEnqueue(bcsCsr, multiDispatchInfo, timestampPacketDependencies, eventsRequest, &mockCmdQ->getCS(0), CL_COMMAND_READ_BUFFER, false); BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties); EnqueueProperties enqueueProperties(true, false, false, false, false, &blitPropertiesContainer); mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, &mockCmdQ->getCS(0), 0, blocking, enqueueProperties, timestampPacketDependencies, eventsRequest, eventBuilder, 0, csrDeps, &bcsCsr); EXPECT_TRUE(mockCsr->passedDispatchFlags.implicitFlush); EXPECT_TRUE(mockCsr->passedDispatchFlags.guardCommandBufferWithPipeControl); EXPECT_EQ(L3CachingSettings::NotApplicable, mockCsr->passedDispatchFlags.l3CacheSettings); EXPECT_EQ(GrfConfig::NotApplicable, mockCsr->passedDispatchFlags.numGrfRequired); } HWTEST_F(DispatchFlagsBlitTests, givenN1EnabledWhenDispatchingWithoutKernelThenAllowOutOfOrderExecution) { using CsrType = MockCsrHw2; DebugManager.flags.EnableTimestampPacket.set(1); DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(1); SetUpImpl(); REQUIRE_FULL_BLITTER_OR_SKIP(&device->getHardwareInfo()); auto mockCmdQ = std::make_unique>(context.get(), device.get(), nullptr); auto mockCsr = static_cast(&mockCmdQ->getGpgpuCommandStreamReceiver()); mockCsr->skipBlitCalls = true; cl_int retVal = CL_SUCCESS; auto buffer = std::unique_ptr(Buffer::create(context.get(), 0, 1, nullptr, retVal)); auto &bcsCsr = *mockCmdQ->bcsEngines[0]->commandStreamReceiver; TimestampPacketDependencies timestampPacketDependencies; EventsRequest eventsRequest(0, nullptr, nullptr); EventBuilder eventBuilder; bool blocked = false; BuiltinOpParams builtinOpParams; builtinOpParams.srcMemObj = buffer.get(); builtinOpParams.dstPtr = reinterpret_cast(0x1234); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.setBuiltinOpParams(builtinOpParams); mockCmdQ->obtainNewTimestampPacketNodes(1, timestampPacketDependencies.previousEnqueueNodes, true, bcsCsr); timestampPacketDependencies.cacheFlushNodes.add(mockCmdQ->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag()); BlitProperties blitProperties = mockCmdQ->processDispatchForBlitEnqueue(bcsCsr, multiDispatchInfo, timestampPacketDependencies, eventsRequest, &mockCmdQ->getCS(0), CL_COMMAND_READ_BUFFER, false); BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties); CsrDependencies csrDeps; EnqueueProperties enqueueProperties(true, false, false, false, false, &blitPropertiesContainer); mockCsr->nTo1SubmissionModelEnabled = false; mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, &mockCmdQ->getCS(0), 0, blocked, enqueueProperties, timestampPacketDependencies, eventsRequest, eventBuilder, 0, csrDeps, &bcsCsr); EXPECT_FALSE(mockCsr->passedDispatchFlags.outOfOrderExecutionAllowed); mockCsr->nTo1SubmissionModelEnabled = true; mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, &mockCmdQ->getCS(0), 0, blocked, enqueueProperties, timestampPacketDependencies, eventsRequest, eventBuilder, 0, csrDeps, &bcsCsr); EXPECT_TRUE(mockCsr->passedDispatchFlags.outOfOrderExecutionAllowed); } HWTEST_F(DispatchFlagsTests, givenMockKernelWhenSettingAdditionalKernelExecInfoThenCorrectValueIsSet) { using CsrType = MockCsrHw2; SetUpImpl(); auto mockCmdQ = std::make_unique>(context.get(), device.get(), nullptr); auto mockCsr = static_cast(&mockCmdQ->getGpgpuCommandStreamReceiver()); TimestampPacketDependencies timestampPacketDependencies; EventsRequest eventsRequest(0, nullptr, nullptr); EventBuilder eventBuilder; EnqueueProperties enqueueProperties(false, false, false, true, false, nullptr); auto cmdStream = new LinearStream(device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, AllocationType::COMMAND_BUFFER, device->getDeviceBitfield()})); auto blockedCommandsData = std::make_unique(cmdStream, *mockCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); MockKernelWithInternals mockKernelWithInternals(*device.get()); auto pKernel = mockKernelWithInternals.mockKernel; MockMultiDispatchInfo multiDispatchInfo(device.get(), pKernel); std::unique_ptr printfHandler(PrintfHandler::create(multiDispatchInfo, *device.get())); IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr; mockCmdQ->allocateHeapMemory(IndirectHeap::Type::DYNAMIC_STATE, 4096u, dsh); mockCmdQ->allocateHeapMemory(IndirectHeap::Type::INDIRECT_OBJECT, 4096u, ioh); mockCmdQ->allocateHeapMemory(IndirectHeap::Type::SURFACE_STATE, 4096u, ssh); blockedCommandsData->setHeaps(dsh, ioh, ssh); std::vector v; pKernel->setAdditionalKernelExecInfo(123u); std::unique_ptr cmd(new CommandComputeKernel(*mockCmdQ.get(), blockedCommandsData, v, false, false, false, std::move(printfHandler), PreemptionMode::Disabled, pKernel, 1)); cmd->submit(1u, false); EXPECT_EQ(mockCsr->passedDispatchFlags.additionalKernelExecInfo, 123u); pKernel->setAdditionalKernelExecInfo(123u); mockCsr->setMediaVFEStateDirty(true); cmd->submit(1u, false); EXPECT_EQ(mockCsr->passedDispatchFlags.additionalKernelExecInfo, 123u); } HWTEST_F(EnqueueHandlerTest, GivenCommandStreamWithoutKernelAndZeroSurfacesWhenEnqueuedHandlerThenProgramPipeControl) { std::unique_ptr> mockCmdQ(new MockCommandQueueWithCacheFlush(context, pClDevice, 0)); mockCmdQ->commandRequireCacheFlush = true; MultiDispatchInfo multiDispatch; const auto enqueueResult = mockCmdQ->template enqueueHandler(nullptr, 0, false, multiDispatch, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, enqueueResult); auto requiredCmdStreamSize = alignUp(MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation( pDevice->getHardwareInfo()), MemoryConstants::cacheLineSize); EXPECT_EQ(mockCmdQ->getCS(0).getUsed(), requiredCmdStreamSize); } HWTEST_F(EnqueueHandlerTest, givenTimestampPacketWriteEnabledAndCommandWithCacheFlushWhenEnqueueingHandlerThenObtainNewStamp) { auto &csr = pDevice->getUltCommandStreamReceiver(); csr.timestampPacketWriteEnabled = true; auto mockTagAllocator = new MockTagAllocator<>(csr.rootDeviceIndex, pDevice->getMemoryManager()); csr.timestampPacketAllocator.reset(mockTagAllocator); std::unique_ptr> mockCmdQ(new MockCommandQueueWithCacheFlush(context, pClDevice, 0)); mockCmdQ->commandRequireCacheFlush = true; cl_event event; MultiDispatchInfo multiDispatch; const auto enqueueResult = mockCmdQ->template enqueueHandler(nullptr, 0, false, multiDispatch, 0, nullptr, &event); EXPECT_EQ(CL_SUCCESS, enqueueResult); auto node1 = mockCmdQ->timestampPacketContainer->peekNodes().at(0); EXPECT_NE(nullptr, node1); clReleaseEvent(event); } HWTEST_F(EnqueueHandlerTest, givenTimestampPacketWriteDisabledAndCommandWithCacheFlushWhenEnqueueingHandlerThenTimeStampContainerIsNotCreated) { auto &csr = pDevice->getUltCommandStreamReceiver(); csr.timestampPacketWriteEnabled = false; auto mockTagAllocator = new MockTagAllocator<>(pDevice->getRootDeviceIndex(), pDevice->getMemoryManager()); csr.timestampPacketAllocator.reset(mockTagAllocator); std::unique_ptr> mockCmdQ(new MockCommandQueueWithCacheFlush(context, pClDevice, 0)); mockCmdQ->commandRequireCacheFlush = true; cl_event event; MultiDispatchInfo multiDispatch; const auto enqueueResult = mockCmdQ->template enqueueHandler(nullptr, 0, false, multiDispatch, 0, nullptr, &event); EXPECT_EQ(CL_SUCCESS, enqueueResult); auto container = mockCmdQ->timestampPacketContainer.get(); EXPECT_EQ(nullptr, container); clReleaseEvent(event); } } // namespace NEO enqueue_command_without_kernel_tests_dg2_and_later.cpp000066400000000000000000000072141422164147700373610ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/os_context.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/mocks/mock_timestamp_container.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/command_queue/resource_barrier.h" #include "opencl/source/event/event_builder.h" #include "opencl/source/event/user_event.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/memory_manager/resource_surface.h" #include "opencl/test/unit_test/fixtures/dispatch_flags_fixture.h" #include "opencl/test/unit_test/fixtures/enqueue_handler_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_mdi.h" namespace NEO { using IsDG2AndLater = IsAtLeastXeHpgCore; HWTEST2_F(DispatchFlagsTests, whenSubmittingKernelWithAdditionalKernelExecInfoThenCorrectDispatchFlagIsSet, IsDG2AndLater) { using CsrType = MockCsrHw2; SetUpImpl(); auto mockCmdQ = std::make_unique>(context.get(), device.get(), nullptr); auto mockCsr = static_cast(&mockCmdQ->getGpgpuCommandStreamReceiver()); TimestampPacketDependencies timestampPacketDependencies; EventsRequest eventsRequest(0, nullptr, nullptr); EventBuilder eventBuilder; EnqueueProperties enqueueProperties(false, false, false, true, false, nullptr); auto cmdStream = new LinearStream(device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, AllocationType::COMMAND_BUFFER, device->getDeviceBitfield()})); auto blockedCommandsData = std::make_unique(cmdStream, *mockCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); MockKernelWithInternals mockKernelWithInternals(*device.get()); auto pKernel = mockKernelWithInternals.mockKernel; MockMultiDispatchInfo multiDispatchInfo(device.get(), pKernel); std::unique_ptr printfHandler(PrintfHandler::create(multiDispatchInfo, *device.get())); IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr; mockCmdQ->allocateHeapMemory(IndirectHeap::Type::DYNAMIC_STATE, 4096u, dsh); mockCmdQ->allocateHeapMemory(IndirectHeap::Type::INDIRECT_OBJECT, 4096u, ioh); mockCmdQ->allocateHeapMemory(IndirectHeap::Type::SURFACE_STATE, 4096u, ssh); blockedCommandsData->setHeaps(dsh, ioh, ssh); std::vector v; pKernel->setAdditionalKernelExecInfo(AdditionalKernelExecInfo::DisableOverdispatch); std::unique_ptr cmd(new CommandComputeKernel(*mockCmdQ.get(), blockedCommandsData, v, false, false, false, std::move(printfHandler), PreemptionMode::Disabled, pKernel, 1)); cmd->submit(1u, false); EXPECT_EQ(mockCsr->passedDispatchFlags.additionalKernelExecInfo, AdditionalKernelExecInfo::DisableOverdispatch); pKernel->setAdditionalKernelExecInfo(AdditionalKernelExecInfo::NotApplicable); mockCsr->setMediaVFEStateDirty(true); cmd->submit(1u, false); EXPECT_EQ(mockCsr->passedDispatchFlags.additionalKernelExecInfo, AdditionalKernelExecInfo::NotApplicable); pKernel->setAdditionalKernelExecInfo(AdditionalKernelExecInfo::NotSet); cmd->submit(1u, false); EXPECT_EQ(mockCsr->passedDispatchFlags.additionalKernelExecInfo, AdditionalKernelExecInfo::NotSet); cmd->submit(1u, false); EXPECT_EQ(mockCsr->passedDispatchFlags.additionalKernelExecInfo, AdditionalKernelExecInfo::NotSet); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_copy_buffer_event_tests.cpp000066400000000000000000000053471422164147700336430ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/event/event.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/built_in_fixture.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "gtest/gtest.h" #include using namespace NEO; typedef HelloWorldTest EnqueueCopyBuffer; TEST_F(EnqueueCopyBuffer, WhenEnqueingCopyBufferThenEventHasCorrectCommandType) { cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event event = nullptr; auto srcBuffer = std::unique_ptr(BufferHelper<>::create()); auto dstBuffer = std::unique_ptr(BufferHelper<>::create()); auto retVal = pCmdQ->enqueueCopyBuffer( srcBuffer.get(), dstBuffer.get(), 0, 0, srcBuffer->getSize(), numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(pCmdQ->taskLevel, pEvent->taskLevel); // Check CL_EVENT_COMMAND_TYPE { cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_COPY_BUFFER), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); } delete pEvent; } TEST_F(EnqueueCopyBuffer, GivenMultipleEventsWhenEnqueingCopyBufferThenReturnedEventShouldBeMaxOfInputEventsAndCmdQPlus1) { uint32_t taskLevelCmdQ = 17; pCmdQ->taskLevel = taskLevelCmdQ; uint32_t taskLevelEvent1 = 8; uint32_t taskLevelEvent2 = 19; Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4); Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10); cl_event eventWaitList[] = { &event1, &event2}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto retVal = CL_INVALID_VALUE; auto srcBuffer = std::unique_ptr(BufferHelper<>::create()); auto dstBuffer = std::unique_ptr(BufferHelper<>::create()); retVal = pCmdQ->enqueueCopyBuffer( srcBuffer.get(), dstBuffer.get(), 0, 0, srcBuffer->getSize(), numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(19u + 1u, pEvent->taskLevel); delete pEvent; } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_copy_buffer_fixture.h000066400000000000000000000042711422164147700324260ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/ptr_math.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" namespace NEO { struct EnqueueCopyBufferHelper { cl_int enqueueCopyBuffer( CommandQueue *pCmdQ, Buffer *srcBuffer, Buffer *dstBuffer, size_t srcOffset, size_t dstOffset, size_t size, cl_uint numEventsInWaitList = 0, cl_event *eventWaitList = nullptr, cl_event *event = nullptr) { cl_int retVal = pCmdQ->enqueueCopyBuffer( srcBuffer, dstBuffer, srcOffset, dstOffset, size, numEventsInWaitList, eventWaitList, event); return retVal; } }; struct EnqueueCopyBufferTest : public CommandEnqueueFixture, public EnqueueCopyBufferHelper, public ::testing::Test { void SetUp(void) override { CommandEnqueueFixture::SetUp(); BufferDefaults::context = new MockContext; srcBuffer = BufferHelper<>::create(); dstBuffer = BufferHelper<>::create(); } void TearDown(void) override { delete srcBuffer; delete dstBuffer; delete BufferDefaults::context; CommandEnqueueFixture::TearDown(); } protected: void enqueueCopyBuffer() { auto retVal = EnqueueCopyBufferHelper::enqueueCopyBuffer( pCmdQ, srcBuffer, dstBuffer, 0, 0, sizeof(float), 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } template void enqueueCopyBufferAndParse() { enqueueCopyBuffer(); parseCommands(*pCmdQ); } MockContext context; Buffer *srcBuffer = nullptr; Buffer *dstBuffer = nullptr; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_copy_buffer_rect_fixture.h000066400000000000000000000072231422164147700334430ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/ptr_math.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" namespace NEO { struct EnqueueCopyBufferRectHelper { cl_int enqueueCopyBufferRect( CommandQueue *pCmdQ, Buffer *srcBuffer, Buffer *dstBuffer, const size_t *srcOrigin, const size_t *dstOrigin, const size_t *region, size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { cl_int retVal = pCmdQ->enqueueCopyBufferRect( srcBuffer, dstBuffer, srcOrigin, dstOrigin, region, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, numEventsInWaitList, eventWaitList, event); return retVal; } }; struct EnqueueCopyBufferRectTest : public CommandEnqueueFixture, public EnqueueCopyBufferRectHelper, public ::testing::Test { struct BufferRect : public BufferDefaults { static const size_t sizeInBytes; }; void SetUp(void) override { CommandEnqueueFixture::SetUp(); BufferDefaults::context = new MockContext; srcBuffer = BufferHelper::create(); dstBuffer = BufferHelper::create(); } void TearDown(void) override { delete srcBuffer; delete dstBuffer; delete BufferDefaults::context; CommandEnqueueFixture::TearDown(); } protected: template void enqueueCopyBufferRect2D() { typedef typename FamilyType::WALKER_TYPE GPGPU_WALKER; typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT; size_t srcOrigin[] = {0, 0, 0}; size_t dstOrigin[] = {0, 0, 0}; size_t region[] = {50, 50, 1}; auto retVal = EnqueueCopyBufferRectHelper::enqueueCopyBufferRect( pCmdQ, srcBuffer, dstBuffer, srcOrigin, dstOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); parseCommands(*pCmdQ); } template void enqueueCopyBufferRect3D() { typedef typename FamilyType::WALKER_TYPE GPGPU_WALKER; typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT; size_t srcOrigin[] = {0, 0, 0}; size_t dstOrigin[] = {0, 0, 0}; size_t region[] = {50, 50, 50}; auto retVal = EnqueueCopyBufferRectHelper::enqueueCopyBufferRect( pCmdQ, srcBuffer, dstBuffer, srcOrigin, dstOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); parseCommands(*pCmdQ); } Buffer *srcBuffer = nullptr; Buffer *dstBuffer = nullptr; static const size_t rowPitch = 100; static const size_t slicePitch = 100 * 100; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_copy_buffer_rect_tests.cpp000066400000000000000000000501611422164147700334510ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/wait_status.h" #include "shared/source/helpers/constants.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/test/unit_test/command_queue/enqueue_copy_buffer_rect_fixture.h" #include "opencl/test/unit_test/gen_common/gen_commands_common_validation.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "reg_configs_common.h" using namespace NEO; const size_t EnqueueCopyBufferRectTest::BufferRect::sizeInBytes = 100 * 100 * 100 * sizeof(cl_char); HWTEST_F(EnqueueCopyBufferRectTest, GivenNullSrcMemObjWhenCopyingBufferRectThenClInvalidMemObjectErrorIsReturned) { auto retVal = CL_SUCCESS; size_t srcOrigin[] = {0, 0, 0}; size_t dstOrigin[] = {0, 0, 0}; size_t region[] = {1, 1, 0}; retVal = clEnqueueCopyBufferRect( pCmdQ, nullptr, dstBuffer, srcOrigin, dstOrigin, region, 10, 0, 10, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } HWTEST_F(EnqueueCopyBufferRectTest, GivenNullDstMemObjWhenCopyingBufferRectThenClInvalidMemObjectErrorIsReturned) { auto retVal = CL_SUCCESS; size_t srcOrigin[] = {0, 0, 0}; size_t dstOrigin[] = {0, 0, 0}; size_t region[] = {1, 1, 0}; retVal = clEnqueueCopyBufferRect( pCmdQ, srcBuffer, nullptr, srcOrigin, dstOrigin, region, 10, 0, 10, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } HWTEST_F(EnqueueCopyBufferRectTest, GivenValidParametersWhenCopyingBufferRectThenSuccessIsReturned) { auto retVal = CL_SUCCESS; size_t srcOrigin[] = {0, 0, 0}; size_t dstOrigin[] = {0, 0, 0}; size_t region[] = {1, 1, 1}; retVal = clEnqueueCopyBufferRect( pCmdQ, srcBuffer, dstBuffer, srcOrigin, dstOrigin, region, 10, 0, 10, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(EnqueueCopyBufferRectTest, GivenGpuHangAndBlockingCallAndValidParametersWhenCopyingBufferRectThenOutOfResourcesIsReturned) { DebugManagerStateRestore stateRestore; DebugManager.flags.MakeEachEnqueueBlocking.set(true); std::unique_ptr device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment(nullptr)}); cl_queue_properties props = {}; MockCommandQueueHw mockCommandQueueHw(context, device.get(), &props); mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang; size_t srcOrigin[] = {0, 0, 0}; size_t dstOrigin[] = {0, 0, 0}; size_t region[] = {1, 1, 1}; const auto enqueueResult = clEnqueueCopyBufferRect( &mockCommandQueueHw, srcBuffer, dstBuffer, srcOrigin, dstOrigin, region, 10, 0, 10, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueResult); EXPECT_EQ(1, mockCommandQueueHw.waitForAllEnginesCalledCount); } HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenTaskCountIsAlignedWithCsr) { //this test case assumes IOQ auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = pCmdQ->taskCount + 100; csr.taskLevel = pCmdQ->taskLevel + 50; enqueueCopyBufferRect2D(); EXPECT_EQ(csr.peekTaskCount(), pCmdQ->taskCount); EXPECT_EQ(csr.peekTaskLevel(), pCmdQ->taskLevel + 1); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenGpgpuWalkerIsCorrect) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; enqueueCopyBufferRect2D(); auto *cmd = (GPGPU_WALKER *)cmdWalker; ASSERT_NE(nullptr, cmd); // Verify GPGPU_WALKER parameters EXPECT_NE(0u, cmd->getThreadGroupIdXDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdYDimension()); EXPECT_EQ(1u, cmd->getThreadGroupIdZDimension()); EXPECT_NE(0u, cmd->getRightExecutionMask()); EXPECT_NE(0u, cmd->getBottomExecutionMask()); EXPECT_EQ(GPGPU_WALKER::SIMD_SIZE_SIMD32, cmd->getSimdSize()); EXPECT_NE(0u, cmd->getIndirectDataLength()); EXPECT_EQ(0u, cmd->getIndirectDataLength() % GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); EXPECT_FALSE(cmd->getIndirectParameterEnable()); // Compute the SIMD lane mask size_t simd = cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD32 ? 32 : cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD16 ? 16 : 8; uint64_t simdMask = maxNBitValue(simd); // Mask off lanes based on the execution masks auto laneMaskRight = cmd->getRightExecutionMask() & simdMask; auto lanesPerThreadX = 0; while (laneMaskRight) { lanesPerThreadX += laneMaskRight & 1; laneMaskRight >>= 1; } } HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenTaskLevelIsIncremented) { auto taskLevelBefore = pCmdQ->taskLevel; enqueueCopyBufferRect2D(); EXPECT_GT(pCmdQ->taskLevel, taskLevelBefore); } HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenCommandsAreAdded) { auto usedCmdBufferBefore = pCS->getUsed(); enqueueCopyBufferRect2D(); EXPECT_NE(usedCmdBufferBefore, pCS->getUsed()); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenIndirectDataGetsAdded) { auto dshBefore = pDSH->getUsed(); auto iohBefore = pIOH->getUsed(); auto sshBefore = pSSH->getUsed(); enqueueCopyBufferRect2D(); // Extract the kernel used auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferRect, pCmdQ->getClDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcMemObj = srcBuffer; dc.dstMemObj = dstBuffer; dc.srcOffset = {0, 0, 0}; dc.dstOffset = {0, 0, 0}; dc.size = {50, 50, 1}; dc.srcRowPitch = rowPitch; dc.srcSlicePitch = slicePitch; dc.dstRowPitch = rowPitch; dc.dstSlicePitch = slicePitch; MultiDispatchInfo multiDispatchInfo(dc); builder.buildDispatchInfos(multiDispatchInfo); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); EXPECT_NE(dshBefore, pDSH->getUsed()); EXPECT_NE(iohBefore, pIOH->getUsed()); if (kernel->usesBindfulAddressingForBuffers()) { EXPECT_NE(sshBefore, pSSH->getUsed()); } } HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRectStatelessThenStatelessKernelIsUsed) { auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferRectStateless, pCmdQ->getClDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcMemObj = srcBuffer; dc.dstMemObj = dstBuffer; dc.srcOffset = {0, 0, 0}; dc.dstOffset = {0, 0, 0}; dc.size = {50, 50, 1}; dc.srcRowPitch = rowPitch; dc.srcSlicePitch = slicePitch; dc.dstRowPitch = rowPitch; dc.dstSlicePitch = slicePitch; MultiDispatchInfo multiDispatchInfo(dc); builder.buildDispatchInfos(multiDispatchInfo); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb()); EXPECT_FALSE(kernel->getKernelInfo().getArgDescriptorAt(0).as().isPureStateful()); } HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenL3ProgrammingIsCorrect) { enqueueCopyBufferRect2D(); validateL3Programming(cmdList, itorWalker); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, When2DEnqueueIsDoneThenStateBaseAddressIsProperlyProgrammed) { enqueueCopyBufferRect2D(); auto &ultCsr = this->pDevice->getUltCommandStreamReceiver(); auto &hwHelper = HwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily); validateStateBaseAddress(ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex, pIOH->getGraphicsAllocation()->isAllocatedInLocalMemoryPool()), ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex, !hwHelper.useSystemMemoryPlacementForISA(pDevice->getHardwareInfo())), pDSH, pIOH, pSSH, itorPipelineSelect, itorWalker, cmdList, 0llu); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenMediaInterfaceDescriptorLoadIsCorrect) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueCopyBufferRect2D(); auto *cmd = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)cmdMediaInterfaceDescriptorLoad; ASSERT_NE(nullptr, cmd); // Verify we have a valid length -- multiple of INTERFACE_DESCRIPTOR_DATAs EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % sizeof(INTERFACE_DESCRIPTOR_DATA)); // Validate the start address size_t alignmentStartAddress = 64 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorDataStartAddress() % alignmentStartAddress); // Validate the length EXPECT_NE(0u, cmd->getInterfaceDescriptorTotalLength()); size_t alignmentTotalLength = 32 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % alignmentTotalLength); // Generically validate this command FamilyType::PARSE::template validateCommand(cmdList.begin(), itorMediaInterfaceDescriptorLoad); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenInterfaceDescriptorDataIsCorrect) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueCopyBufferRect2D(); auto *cmdSBA = (STATE_BASE_ADDRESS *)cmdStateBaseAddress; auto &IDD = *(INTERFACE_DESCRIPTOR_DATA *)cmdInterfaceDescriptorData; // Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value. auto kernelStartPointer = ((uint64_t)IDD.getKernelStartPointerHigh() << 32) + IDD.getKernelStartPointer(); EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize); EXPECT_NE(0u, IDD.getNumberOfThreadsInGpgpuThreadGroup()); EXPECT_NE(0u, IDD.getCrossThreadConstantDataReadLength()); EXPECT_NE(0u, IDD.getConstantIndirectUrbEntryReadLength()); } HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenNumberOfPipelineSelectsIsOne) { enqueueCopyBufferRect2D(); int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); EXPECT_EQ(1, numCommands); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenMediaVfeStateIsSetCorrectly) { enqueueCopyBufferRect2D(); validateMediaVFEState(&pDevice->getHardwareInfo(), cmdMediaVfeState, cmdList, itorMediaVfeState); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, WhenCopyingBufferRect3DThenGpgpuWalkerIsCorrect) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; enqueueCopyBufferRect3D(); auto *cmd = (GPGPU_WALKER *)cmdWalker; ASSERT_NE(nullptr, cmd); // Verify GPGPU_WALKER parameters EXPECT_NE(0u, cmd->getThreadGroupIdXDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdYDimension()); EXPECT_LT(1u, cmd->getThreadGroupIdZDimension()); EXPECT_NE(0u, cmd->getRightExecutionMask()); EXPECT_NE(0u, cmd->getBottomExecutionMask()); EXPECT_EQ(GPGPU_WALKER::SIMD_SIZE_SIMD32, cmd->getSimdSize()); EXPECT_NE(0u, cmd->getIndirectDataLength()); EXPECT_EQ(0u, cmd->getIndirectDataLength() % GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); EXPECT_FALSE(cmd->getIndirectParameterEnable()); // Compute the SIMD lane mask size_t simd = cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD32 ? 32 : cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD16 ? 16 : 8; uint64_t simdMask = maxNBitValue(simd); // Mask off lanes based on the execution masks auto laneMaskRight = cmd->getRightExecutionMask() & simdMask; auto lanesPerThreadX = 0; while (laneMaskRight) { lanesPerThreadX += laneMaskRight & 1; laneMaskRight >>= 1; } } HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRect3DThenTaskLevelIsIncremented) { auto taskLevelBefore = pCmdQ->taskLevel; enqueueCopyBufferRect3D(); EXPECT_GT(pCmdQ->taskLevel, taskLevelBefore); } HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRect3DThenCommandsAreAdded) { auto usedCmdBufferBefore = pCS->getUsed(); enqueueCopyBufferRect3D(); EXPECT_NE(usedCmdBufferBefore, pCS->getUsed()); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, WhenCopyingBufferRect3DThenIndirectDataIsAdded) { auto usedIndirectHeapBefore = pDSH->getUsed(); enqueueCopyBufferRect3D(); EXPECT_NE(usedIndirectHeapBefore, pDSH->getUsed()); } HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRect3DThenL3ProgrammingIsCorrect) { enqueueCopyBufferRect3D(); validateL3Programming(cmdList, itorWalker); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, When3DEnqueueIsDoneThenStateBaseAddressIsProperlyProgrammed) { enqueueCopyBufferRect3D(); auto &ultCsr = this->pDevice->getUltCommandStreamReceiver(); auto &hwHelper = HwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily); validateStateBaseAddress(ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex, pIOH->getGraphicsAllocation()->isAllocatedInLocalMemoryPool()), ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex, !hwHelper.useSystemMemoryPlacementForISA(pDevice->getHardwareInfo())), pDSH, pIOH, pSSH, itorPipelineSelect, itorWalker, cmdList, 0llu); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, WhenCopyingBufferRect3DThenMediaInterfaceDescriptorLoadIsCorrect) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueCopyBufferRect3D(); auto *cmd = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)cmdMediaInterfaceDescriptorLoad; ASSERT_NE(nullptr, cmd); // Verify we have a valid length -- multiple of INTERFACE_DESCRIPTOR_DATAs EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % sizeof(INTERFACE_DESCRIPTOR_DATA)); // Validate the start address size_t alignmentStartAddress = 64 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorDataStartAddress() % alignmentStartAddress); // Validate the length EXPECT_NE(0u, cmd->getInterfaceDescriptorTotalLength()); size_t alignmentTotalLength = 32 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % alignmentTotalLength); // Generically validate this command FamilyType::PARSE::template validateCommand(cmdList.begin(), itorMediaInterfaceDescriptorLoad); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, WhenCopyingBufferRect3DThenInterfaceDescriptorDataIsCorrect) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueCopyBufferRect3D(); auto *cmdSBA = (STATE_BASE_ADDRESS *)cmdStateBaseAddress; auto &IDD = *(INTERFACE_DESCRIPTOR_DATA *)cmdInterfaceDescriptorData; // Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value. auto kernelStartPointer = ((uint64_t)IDD.getKernelStartPointerHigh() << 32) + IDD.getKernelStartPointer(); EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize); EXPECT_NE(0u, IDD.getNumberOfThreadsInGpgpuThreadGroup()); EXPECT_NE(0u, IDD.getCrossThreadConstantDataReadLength()); EXPECT_NE(0u, IDD.getConstantIndirectUrbEntryReadLength()); } HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRect3DThenNumberOfPipelineSelectsIsOne) { enqueueCopyBufferRect3D(); int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); EXPECT_EQ(1, numCommands); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, WhenCopyingBufferRect3DThenMediaVfeStateIsSetCorrectly) { enqueueCopyBufferRect3D(); validateMediaVFEState(&pDevice->getHardwareInfo(), cmdMediaVfeState, cmdList, itorMediaVfeState); } struct EnqueueCopyBufferRectHw : public ::testing::Test { void SetUp() override { if (is32bit) { GTEST_SKIP(); } device = std::make_unique(MockClDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); context.reset(new MockContext(device.get())); dstBuffer = std::unique_ptr(BufferHelper::create(context.get())); } std::unique_ptr device; std::unique_ptr context; MockBuffer srcBuffer; std::unique_ptr dstBuffer; const size_t rowPitch = 100; const size_t slicePitch = 100 * 100; std::array srcOrigin = {{0, 0, 0}}; std::array dstOrigin = {{0, 0, 0}}; std::array region = {{50, 50, 1}}; uint64_t bigSize = 4ull * MemoryConstants::gigaByte; uint64_t smallSize = 4ull * MemoryConstants::gigaByte - 1; protected: template cl_int enqueueCopyBufferRectHw(CommandQueueHw *cmdQ) { auto retVal = CL_SUCCESS; retVal = clEnqueueCopyBufferRect( cmdQ, &srcBuffer, dstBuffer.get(), srcOrigin.data(), dstOrigin.data(), region.data(), rowPitch, slicePitch, rowPitch, slicePitch, 0, nullptr, nullptr); return retVal; } }; using EnqueueCopyBufferRectStateless = EnqueueCopyBufferRectHw; HWTEST_F(EnqueueCopyBufferRectStateless, GivenValidParametersWhenCopyingBufferRectStatelessThenSuccessIsReturned) { std::unique_ptr> cmdQ(new CommandQueueStateless(context.get(), device.get())); srcBuffer.size = static_cast(bigSize); auto retVal = enqueueCopyBufferRectHw(cmdQ.get()); EXPECT_EQ(CL_SUCCESS, retVal); } using EnqueueCopyBufferRectStateful = EnqueueCopyBufferRectHw; HWTEST_F(EnqueueCopyBufferRectStateful, GivenValidParametersWhenCopyingBufferRectStatefulThenSuccessIsReturned) { std::unique_ptr> cmdQ(new CommandQueueStateful(context.get(), device.get())); srcBuffer.size = static_cast(smallSize); auto retVal = enqueueCopyBufferRectHw(cmdQ.get()); EXPECT_EQ(CL_SUCCESS, retVal); } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_copy_buffer_tests.cpp000066400000000000000000000407771422164147700324500ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/source/helpers/ptr_math.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/source/kernel/kernel.h" #include "opencl/test/unit_test/api/cl_api_tests.h" #include "opencl/test/unit_test/command_queue/enqueue_copy_buffer_fixture.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/gen_common/gen_commands_common_validation.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "reg_configs_common.h" #include using namespace NEO; using clEnqueueCopyBufferTests = api_tests; HWTEST_F(clEnqueueCopyBufferTests, GivenNullSrcMemObjWhenCopyingBufferThenClInvalidMemObjectErrorIsReturned) { MockBuffer dstBuffer{}; auto retVal = clEnqueueCopyBuffer( pCommandQueue, nullptr, &dstBuffer, 0, 0, sizeof(float), 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } HWTEST_F(clEnqueueCopyBufferTests, GivenNullDstMemObjWhenCopyingBufferThenClInvalidMemObjectErrorIsReturned) { MockBuffer srcBuffer{}; auto retVal = clEnqueueCopyBuffer( pCommandQueue, &srcBuffer, nullptr, 0, 0, sizeof(float), 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } HWTEST_F(clEnqueueCopyBufferTests, GivenCorrectArgumentsWhenCopyingBufferThenSuccessIsReturned) { MockBuffer srcBuffer{}; MockBuffer dstBuffer{}; retVal = clEnqueueCopyBuffer( pCommandQueue, &srcBuffer, &dstBuffer, 0, 0, 128, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueCopyBufferTests, GivenQueueIncapableWhenCopyingBufferThenInvalidOperationIsReturned) { MockBuffer srcBuffer{}; MockBuffer dstBuffer{}; this->disableQueueCapabilities(CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL); retVal = clEnqueueCopyBuffer( pCommandQueue, &srcBuffer, &dstBuffer, 0, 0, 128, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_OPERATION, retVal); } HWTEST_F(EnqueueCopyBufferTest, GivenInvalidMemoryLocationWhenCopyingBufferThenClInvalidValueErrorIsReturned) { auto retVal = clEnqueueCopyBuffer( pCmdQ, srcBuffer, dstBuffer, 0, 8, 128, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clEnqueueCopyBuffer( pCmdQ, srcBuffer, dstBuffer, 8, 0, 128, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferThenTaskCountIsAlignedWithCsr) { //this test case assumes IOQ auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = pCmdQ->taskCount + 100; csr.taskLevel = pCmdQ->taskLevel + 50; enqueueCopyBuffer(); EXPECT_EQ(csr.peekTaskCount(), pCmdQ->taskCount); EXPECT_EQ(csr.peekTaskLevel(), pCmdQ->taskLevel + 1); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferTest, WhenCopyingBufferThenGpgpuWalkerIsCorrect) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; enqueueCopyBufferAndParse(); auto *cmd = (GPGPU_WALKER *)cmdWalker; ASSERT_NE(nullptr, cmd); // Verify GPGPU_WALKER parameters EXPECT_NE(0u, cmd->getThreadGroupIdXDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdYDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdZDimension()); EXPECT_NE(0u, cmd->getRightExecutionMask()); EXPECT_NE(0u, cmd->getBottomExecutionMask()); EXPECT_EQ(GPGPU_WALKER::SIMD_SIZE_SIMD32, cmd->getSimdSize()); EXPECT_NE(0u, cmd->getIndirectDataLength()); EXPECT_FALSE(cmd->getIndirectParameterEnable()); // Compute the SIMD lane mask size_t simd = cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD32 ? 32 : cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD16 ? 16 : 8; uint64_t simdMask = maxNBitValue(simd); // Mask off lanes based on the execution masks auto laneMaskRight = cmd->getRightExecutionMask() & simdMask; auto lanesPerThreadX = 0; while (laneMaskRight) { lanesPerThreadX += laneMaskRight & 1; laneMaskRight >>= 1; } } HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferThenTaskLevelIsIncremented) { auto taskLevelBefore = pCmdQ->taskLevel; enqueueCopyBuffer(); EXPECT_GT(pCmdQ->taskLevel, taskLevelBefore); } HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferThenCommandsAreAdded) { auto usedCmdBufferBefore = pCS->getUsed(); enqueueCopyBuffer(); EXPECT_NE(usedCmdBufferBefore, pCS->getUsed()); } HWTEST_F(EnqueueCopyBufferTest, GivenGpuHangAndBlockingCallWhenCopyingBufferThenOutOfResourcesIsReturned) { DebugManagerStateRestore stateRestore; DebugManager.flags.MakeEachEnqueueBlocking.set(true); std::unique_ptr device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment(nullptr)}); cl_queue_properties props = {}; MockCommandQueueHw mockCommandQueueHw(&context, device.get(), &props); mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang; const auto enqueueResult = EnqueueCopyBufferHelper::enqueueCopyBuffer( &mockCommandQueueHw, srcBuffer, dstBuffer, 0, 0, sizeof(float), 0, nullptr, nullptr); EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueResult); EXPECT_EQ(1, mockCommandQueueHw.waitForAllEnginesCalledCount); } HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferThenIndirectDataGetsAdded) { auto dshBefore = pDSH->getUsed(); auto iohBefore = pIOH->getUsed(); auto sshBefore = pSSH->getUsed(); enqueueCopyBuffer(); auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, pCmdQ->getClDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcMemObj = srcBuffer; dc.dstMemObj = dstBuffer; dc.srcOffset = {EnqueueCopyBufferTraits::srcOffset, 0, 0}; dc.dstOffset = {EnqueueCopyBufferTraits::dstOffset, 0, 0}; dc.size = {EnqueueCopyBufferTraits::size, 0, 0}; MultiDispatchInfo multiDispatchInfo(dc); builder.buildDispatchInfos(multiDispatchInfo); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); auto kernelDescriptor = &kernel->getKernelInfo().kernelDescriptor; EXPECT_TRUE(UnitTestHelper::evaluateDshUsage(dshBefore, pDSH->getUsed(), kernelDescriptor, rootDeviceIndex)); EXPECT_NE(iohBefore, pIOH->getUsed()); if (kernel->usesBindfulAddressingForBuffers()) { EXPECT_NE(sshBefore, pSSH->getUsed()); } } HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferStatelessThenStatelessKernelIsUsed) { auto srcBuffer = std::unique_ptr(BufferHelper<>::create()); auto dstBuffer = std::unique_ptr(BufferHelper<>::create()); auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBufferStateless, pCmdQ->getClDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcMemObj = srcBuffer.get(); dc.dstMemObj = dstBuffer.get(); dc.srcOffset = {EnqueueCopyBufferTraits::srcOffset, 0, 0}; dc.dstOffset = {EnqueueCopyBufferTraits::dstOffset, 0, 0}; dc.size = {EnqueueCopyBufferTraits::size, 0, 0}; MultiDispatchInfo multiDispatchInfo(dc); builder.buildDispatchInfos(multiDispatchInfo); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb()); EXPECT_FALSE(kernel->getKernelInfo().getArgDescriptorAt(0).as().isPureStateful()); } HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferThenL3ProgrammingIsCorrect) { enqueueCopyBufferAndParse(); validateL3Programming(cmdList, itorWalker); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferTest, WhenEnqueueIsDoneThenStateBaseAddressIsProperlyProgrammed) { enqueueCopyBufferAndParse(); auto &ultCsr = this->pDevice->getUltCommandStreamReceiver(); auto &hwHelper = HwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily); validateStateBaseAddress(ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex, pIOH->getGraphicsAllocation()->isAllocatedInLocalMemoryPool()), ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex, !hwHelper.useSystemMemoryPlacementForISA(pDevice->getHardwareInfo())), pDSH, pIOH, pSSH, itorPipelineSelect, itorWalker, cmdList, 0llu); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferTest, WhenCopyingBufferThenMediaInterfaceDescriptorLoadIsCorrect) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueCopyBufferAndParse(); auto *cmd = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)cmdMediaInterfaceDescriptorLoad; ASSERT_NE(nullptr, cmd); // Verify we have a valid length -- multiple of INTERFACE_DESCRIPTOR_DATAs EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % sizeof(INTERFACE_DESCRIPTOR_DATA)); // Validate the start address size_t alignmentStartAddress = 64 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorDataStartAddress() % alignmentStartAddress); // Validate the length EXPECT_NE(0u, cmd->getInterfaceDescriptorTotalLength()); size_t alignmentTotalLength = 32 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % alignmentTotalLength); // Generically validate this command FamilyType::PARSE::template validateCommand(cmdList.begin(), itorMediaInterfaceDescriptorLoad); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferTest, WhenCopyingBufferThenInterfaceDescriptorDataIsCorrect) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueCopyBufferAndParse(); auto cmdIDD = (INTERFACE_DESCRIPTOR_DATA *)cmdInterfaceDescriptorData; auto cmdSBA = (STATE_BASE_ADDRESS *)cmdStateBaseAddress; // Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value. auto kernelStartPointer = ((uint64_t)cmdIDD->getKernelStartPointerHigh() << 32) + cmdIDD->getKernelStartPointer(); EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize); EXPECT_NE(0u, cmdIDD->getNumberOfThreadsInGpgpuThreadGroup()); EXPECT_NE(0u, cmdIDD->getCrossThreadConstantDataReadLength()); EXPECT_NE(0u, cmdIDD->getConstantIndirectUrbEntryReadLength()); } HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferThenNumberOfPipelineSelectsIsOne) { enqueueCopyBufferAndParse(); int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); EXPECT_EQ(1, numCommands); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferTest, WhenCopyingBufferThenMediaVfeStateIsSetCorrectly) { enqueueCopyBufferAndParse(); validateMediaVFEState(&pDevice->getHardwareInfo(), cmdMediaVfeState, cmdList, itorMediaVfeState); } HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferThenArgumentZeroMatchesSourceAddress) { enqueueCopyBufferAndParse(); // Extract the kernel used auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, pCmdQ->getClDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcMemObj = srcBuffer; dc.dstMemObj = dstBuffer; dc.srcOffset = {EnqueueCopyBufferTraits::srcOffset, 0, 0}; dc.dstOffset = {EnqueueCopyBufferTraits::dstOffset, 0, 0}; dc.size = {EnqueueCopyBufferTraits::size, 0, 0}; MultiDispatchInfo multiDispatchInfo(dc); builder.buildDispatchInfos(multiDispatchInfo); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); // Determine where the argument is auto pArgument = (void **)getStatelessArgumentPointer(kernel->getKernelInfo(), 0u, pCmdQ->getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 0), rootDeviceIndex); EXPECT_EQ(reinterpret_cast(srcBuffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress()), *pArgument); } HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferThenArgumentOneMatchesDestinationAddress) { enqueueCopyBufferAndParse(); // Extract the kernel used auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, pCmdQ->getClDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcMemObj = srcBuffer; dc.dstMemObj = dstBuffer; dc.srcOffset = {EnqueueCopyBufferTraits::srcOffset, 0, 0}; dc.dstOffset = {EnqueueCopyBufferTraits::dstOffset, 0, 0}; dc.size = {EnqueueCopyBufferTraits::size, 0, 0}; MultiDispatchInfo multiDispatchInfo(dc); builder.buildDispatchInfos(multiDispatchInfo); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); // Determine where the argument is auto pArgument = (void **)getStatelessArgumentPointer(kernel->getKernelInfo(), 1u, pCmdQ->getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 0), rootDeviceIndex); EXPECT_EQ(reinterpret_cast(dstBuffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress()), *pArgument); } struct EnqueueCopyBufferHw : public ::testing::Test { void SetUp() override { if (is32bit) { GTEST_SKIP(); } device = std::make_unique(MockClDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); context.reset(new MockContext(device.get())); dstBuffer = std::unique_ptr(BufferHelper<>::create(context.get())); } std::unique_ptr device; std::unique_ptr context; std::unique_ptr dstBuffer; MockBuffer srcBuffer; uint64_t bigSize = 4ull * MemoryConstants::gigaByte; uint64_t smallSize = 4ull * MemoryConstants::gigaByte - 1; }; using EnqueueCopyBufferStatelessTest = EnqueueCopyBufferHw; HWTEST_F(EnqueueCopyBufferStatelessTest, givenBuffersWhenCopyingBufferStatelessThenSuccessIsReturned) { auto cmdQ = std::make_unique>(context.get(), device.get()); srcBuffer.size = static_cast(bigSize); auto retVal = cmdQ->enqueueCopyBuffer( &srcBuffer, dstBuffer.get(), 0, 0, sizeof(float), 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } using EnqueueCopyBufferStatefulTest = EnqueueCopyBufferHw; HWTEST_F(EnqueueCopyBufferStatefulTest, givenBuffersWhenCopyingBufferStatefulThenSuccessIsReturned) { auto cmdQ = std::make_unique>(context.get(), device.get()); srcBuffer.size = static_cast(smallSize); auto retVal = cmdQ->enqueueCopyBuffer( &srcBuffer, dstBuffer.get(), 0, 0, sizeof(float), 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } enqueue_copy_buffer_to_image_fixture.h000066400000000000000000000047131422164147700342140ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/ptr_math.h" #include "shared/test/common/test_macros/test_checks_shared.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "gtest/gtest.h" namespace NEO { struct EnqueueCopyBufferToImageTest : public CommandEnqueueFixture, public ::testing::Test { void SetUp() override { REQUIRE_IMAGES_OR_SKIP(defaultHwInfo); CommandEnqueueFixture::SetUp(); BufferDefaults::context = new MockContext(pClDevice); context = new MockContext(pClDevice); srcBuffer = BufferHelper<>::create(context); dstImage = Image2dHelper<>::create(context); } void TearDown() override { if (IsSkipped()) { return; } delete dstImage; delete srcBuffer; delete BufferDefaults::context; delete context; CommandEnqueueFixture::TearDown(); } protected: template void enqueueCopyBufferToImage() { auto retVal = EnqueueCopyBufferToImageHelper<>::enqueueCopyBufferToImage( pCmdQ, srcBuffer, dstImage); EXPECT_EQ(CL_SUCCESS, retVal); parseCommands(*pCmdQ); } MockContext *context = nullptr; Buffer *srcBuffer = nullptr; Image *dstImage = nullptr; }; struct EnqueueCopyBufferToImageMipMapTest : public CommandEnqueueFixture, public ::testing::Test, public ::testing::WithParamInterface { void SetUp(void) override { REQUIRE_IMAGES_OR_SKIP(defaultHwInfo); CommandEnqueueFixture::SetUp(); BufferDefaults::context = new MockContext(pClDevice); context = new MockContext(pClDevice); srcBuffer = BufferHelper<>::create(context); } void TearDown(void) override { if (IsSkipped()) { return; } delete srcBuffer; delete BufferDefaults::context; delete context; CommandEnqueueFixture::TearDown(); } MockContext *context = nullptr; Buffer *srcBuffer = nullptr; }; } // namespace NEO enqueue_copy_buffer_to_image_tests.cpp000066400000000000000000000433631422164147700342270ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/mocks/mock_builtins.h" #include "shared/test/common/mocks/mock_gmm_resource_info.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/common/test_macros/test_checks_shared.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/test/unit_test/command_queue/enqueue_copy_buffer_to_image_fixture.h" #include "opencl/test/unit_test/fixtures/one_mip_level_image_fixture.h" #include "opencl/test/unit_test/gen_common/gen_commands_common_validation.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_builtin_dispatch_info_builder.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "reg_configs_common.h" #include using namespace NEO; HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferToImageTest, WhenCopyingBufferToImageThenGpgpuWalkerIsCorrect) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; enqueueCopyBufferToImage(); auto *cmd = reinterpret_cast(cmdWalker); ASSERT_NE(nullptr, cmd); // Verify GPGPU_WALKER parameters EXPECT_NE(0u, cmd->getThreadGroupIdXDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdYDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdZDimension()); EXPECT_NE(0u, cmd->getRightExecutionMask()); EXPECT_NE(0u, cmd->getBottomExecutionMask()); EXPECT_EQ(GPGPU_WALKER::SIMD_SIZE_SIMD32, cmd->getSimdSize()); EXPECT_NE(0u, cmd->getIndirectDataLength()); EXPECT_FALSE(cmd->getIndirectParameterEnable()); // Compute the SIMD lane mask size_t simd = cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD32 ? 32 : cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD16 ? 16 : 8; uint64_t simdMask = maxNBitValue(simd); // Mask off lanes based on the execution masks auto laneMaskRight = cmd->getRightExecutionMask() & simdMask; auto lanesPerThreadX = 0; while (laneMaskRight) { lanesPerThreadX += laneMaskRight & 1; laneMaskRight >>= 1; } } HWTEST_F(EnqueueCopyBufferToImageTest, WhenCopyingBufferToImageThenTaskCountIsAlignedWithCsr) { //this test case assumes IOQ auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = pCmdQ->taskCount + 100; csr.taskLevel = pCmdQ->taskLevel + 50; EnqueueCopyBufferToImageHelper<>::enqueueCopyBufferToImage(pCmdQ, srcBuffer, dstImage); EXPECT_EQ(csr.peekTaskCount(), pCmdQ->taskCount); EXPECT_EQ(csr.peekTaskLevel(), pCmdQ->taskLevel + 1); } HWTEST_F(EnqueueCopyBufferToImageTest, WhenCopyingBufferToImageThenTaskLevelIsIncremented) { auto taskLevelBefore = pCmdQ->taskLevel; EnqueueCopyBufferToImageHelper<>::enqueueCopyBufferToImage(pCmdQ, srcBuffer, dstImage); EXPECT_GT(pCmdQ->taskLevel, taskLevelBefore); } HWTEST_F(EnqueueCopyBufferToImageTest, WhenCopyingBufferToImageThenCommandsAreAdded) { auto usedCmdBufferBefore = pCS->getUsed(); EnqueueCopyBufferToImageHelper<>::enqueueCopyBufferToImage(pCmdQ, srcBuffer, dstImage); EXPECT_NE(usedCmdBufferBefore, pCS->getUsed()); } HWTEST_F(EnqueueCopyBufferToImageTest, WhenCopyingBufferToImageThenIndirectDataGetsAdded) { auto dshBefore = pDSH->getUsed(); auto iohBefore = pIOH->getUsed(); auto sshBefore = pSSH->getUsed(); EnqueueCopyBufferToImageHelper<>::enqueueCopyBufferToImage(pCmdQ, srcBuffer, dstImage); EXPECT_TRUE(UnitTestHelper::evaluateDshUsage(dshBefore, pDSH->getUsed(), nullptr, rootDeviceIndex)); EXPECT_NE(iohBefore, pIOH->getUsed()); EXPECT_NE(sshBefore, pSSH->getUsed()); } HWTEST_F(EnqueueCopyBufferToImageTest, WhenCopyingBufferToImageThenL3ProgrammingIsCorrect) { enqueueCopyBufferToImage(); validateL3Programming(cmdList, itorWalker); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferToImageTest, WhenEnqueueIsDoneThenStateBaseAddressIsProperlyProgrammed) { enqueueCopyBufferToImage(); auto &ultCsr = this->pDevice->getUltCommandStreamReceiver(); auto &hwHelper = HwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily); validateStateBaseAddress(ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex, pIOH->getGraphicsAllocation()->isAllocatedInLocalMemoryPool()), ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex, !hwHelper.useSystemMemoryPlacementForISA(pDevice->getHardwareInfo())), pDSH, pIOH, pSSH, itorPipelineSelect, itorWalker, cmdList, 0llu); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferToImageTest, WhenCopyingBufferToImageThenMediaInterfaceDescriptorLoadIsCorrect) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueCopyBufferToImage(); // All state should be programmed before walker auto cmd = reinterpret_cast(cmdMediaInterfaceDescriptorLoad); ASSERT_NE(nullptr, cmd); // Verify we have a valid length -- multiple of INTERFACE_DESCRIPTOR_DATAs EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % sizeof(INTERFACE_DESCRIPTOR_DATA)); // Validate the start address size_t alignmentStartAddress = 64 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorDataStartAddress() % alignmentStartAddress); // Validate the length EXPECT_NE(0u, cmd->getInterfaceDescriptorTotalLength()); size_t alignmentTotalLength = 32 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % alignmentTotalLength); // Generically validate this command FamilyType::PARSE::template validateCommand(cmdList.begin(), itorMediaInterfaceDescriptorLoad); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferToImageTest, WhenCopyingBufferToImageThenInterfaceDescriptorDataIsCorrect) { typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueCopyBufferToImage(); // Extract the interfaceDescriptorData auto cmdSBA = (STATE_BASE_ADDRESS *)cmdStateBaseAddress; auto &interfaceDescriptorData = *(INTERFACE_DESCRIPTOR_DATA *)cmdInterfaceDescriptorData; // Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value. auto kernelStartPointer = ((uint64_t)interfaceDescriptorData.getKernelStartPointerHigh() << 32) + interfaceDescriptorData.getKernelStartPointer(); EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize); size_t maxLocalSize = 256u; auto localWorkSize = std::min( maxLocalSize, Image2dDefaults::imageDesc.image_width * Image2dDefaults::imageDesc.image_height); auto simd = 32u; auto numThreadsPerThreadGroup = Math::divideAndRoundUp(localWorkSize, simd); EXPECT_EQ(numThreadsPerThreadGroup, interfaceDescriptorData.getNumberOfThreadsInGpgpuThreadGroup()); EXPECT_NE(0u, interfaceDescriptorData.getCrossThreadConstantDataReadLength()); EXPECT_NE(0u, interfaceDescriptorData.getConstantIndirectUrbEntryReadLength()); // We shouldn't have these pointers the same. EXPECT_NE(kernelStartPointer, interfaceDescriptorData.getBindingTablePointer()); } HWTEST_F(EnqueueCopyBufferToImageTest, WhenCopyingBufferToImageThenSurfaceStateIsCorrect) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto mockCmdQ = std::make_unique>(context, pClDevice, nullptr); VariableBackup cmdQBackup(&pCmdQ, mockCmdQ.get()); mockCmdQ->storeMultiDispatchInfo = true; enqueueCopyBufferToImage(); const auto &kernelInfo = mockCmdQ->storedMultiDispatchInfo.begin()->getKernel()->getKernelInfo(); uint32_t index = static_cast(kernelInfo.getArgDescriptorAt(1).template as().bindful) / sizeof(RENDER_SURFACE_STATE); const auto &surfaceState = getSurfaceState(&pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), index); const auto &imageDesc = dstImage->getImageDesc(); // EnqueueReadImage uses multi-byte copies depending on per-pixel-size-in-bytes EXPECT_EQ(imageDesc.image_width, surfaceState.getWidth()); EXPECT_EQ(imageDesc.image_height, surfaceState.getHeight()); EXPECT_NE(0u, surfaceState.getSurfacePitch()); EXPECT_NE(0u, surfaceState.getSurfaceType()); auto surfaceFormat = surfaceState.getSurfaceFormat(); bool isRedescribedFormat = surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R32G32B32A32_UINT || surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R32G32_UINT || surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R32_UINT || surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R16_UINT || surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R8_UINT; EXPECT_TRUE(isRedescribedFormat); EXPECT_EQ(MockGmmResourceInfo::getHAlignSurfaceStateResult, surfaceState.getSurfaceHorizontalAlignment()); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4, surfaceState.getSurfaceVerticalAlignment()); EXPECT_EQ(dstImage->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress(), surfaceState.getSurfaceBaseAddress()); } HWTEST_F(EnqueueCopyBufferToImageTest, WhenCopyingBufferToImageThenNumberOfPipelineSelectsIsOne) { enqueueCopyBufferToImage(); int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); EXPECT_EQ(1, numCommands); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferToImageTest, WhenCopyingBufferToImageThenMediaVfeStateIsSetCorrectly) { enqueueCopyBufferToImage(); validateMediaVFEState(&pDevice->getHardwareInfo(), cmdMediaVfeState, cmdList, itorMediaVfeState); } typedef EnqueueCopyBufferToImageMipMapTest MipMapCopyBufferToImageTest; HWTEST_P(MipMapCopyBufferToImageTest, GivenImageWithMipLevelNonZeroWhenCopyBufferToImageIsCalledThenProperMipLevelIsSet) { auto image_type = (cl_mem_object_type)GetParam(); auto builtIns = new MockBuiltins(); pCmdQ->getDevice().getExecutionEnvironment()->rootDeviceEnvironments[pCmdQ->getDevice().getRootDeviceIndex()]->builtins.reset(builtIns); auto &origBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder( EBuiltInOps::CopyBufferToImage3d, pCmdQ->getClDevice()); // substitute original builder with mock builder auto oldBuilder = pClExecutionEnvironment->setBuiltinDispatchInfoBuilder( rootDeviceIndex, EBuiltInOps::CopyBufferToImage3d, std::unique_ptr(new MockBuiltinDispatchInfoBuilder(*builtIns, pCmdQ->getClDevice(), &origBuilder))); cl_int retVal = CL_SUCCESS; cl_image_desc imageDesc = {}; uint32_t expectedMipLevel = 3; imageDesc.image_type = image_type; imageDesc.num_mip_levels = 10; imageDesc.image_width = 4; imageDesc.image_height = 1; imageDesc.image_depth = 1; size_t origin[] = {0, 0, 0, 0}; size_t region[] = {imageDesc.image_width, 1, 1}; std::unique_ptr image; switch (image_type) { case CL_MEM_OBJECT_IMAGE1D: origin[1] = expectedMipLevel; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; case CL_MEM_OBJECT_IMAGE1D_ARRAY: imageDesc.image_array_size = 2; origin[2] = expectedMipLevel; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; case CL_MEM_OBJECT_IMAGE2D: origin[2] = expectedMipLevel; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; case CL_MEM_OBJECT_IMAGE2D_ARRAY: imageDesc.image_array_size = 2; origin[3] = expectedMipLevel; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; case CL_MEM_OBJECT_IMAGE3D: origin[3] = expectedMipLevel; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; } EXPECT_NE(nullptr, image.get()); std::unique_ptr ptr = std::unique_ptr(new uint32_t[3]); retVal = pCmdQ->enqueueCopyBufferToImage(srcBuffer, image.get(), 0, origin, region, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); auto &mockBuilder = static_cast(BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToImage3d, pCmdQ->getClDevice())); auto params = mockBuilder.getBuiltinOpParams(); EXPECT_EQ(expectedMipLevel, params->dstMipLevel); // restore original builder and retrieve mock builder auto newBuilder = pClExecutionEnvironment->setBuiltinDispatchInfoBuilder( rootDeviceIndex, EBuiltInOps::CopyBufferToImage3d, std::move(oldBuilder)); EXPECT_NE(nullptr, newBuilder); } INSTANTIATE_TEST_CASE_P(MipMapCopyBufferToImageTest_GivenImageWithMipLevelNonZeroWhenCopyBufferToImageIsCalledThenProperMipLevelIsSet, MipMapCopyBufferToImageTest, ::testing::Values(CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE2D_ARRAY, CL_MEM_OBJECT_IMAGE3D)); struct EnqueueCopyBufferToImageHw : public ::testing::Test { void SetUp() override { REQUIRE_64BIT_OR_SKIP(); REQUIRE_IMAGES_OR_SKIP(defaultHwInfo); device = std::make_unique(MockClDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); context = std::make_unique(device.get()); dstImage = std::unique_ptr(Image2dHelper<>::create(context.get())); } std::unique_ptr device; std::unique_ptr context; std::unique_ptr dstImage; MockBuffer srcBuffer; uint64_t bigSize = 5ull * MemoryConstants::gigaByte; uint64_t smallSize = 4ull * MemoryConstants::gigaByte - 1; uint64_t bigOffset = 4ull * MemoryConstants::gigaByte; const size_t dstOrigin[3] = {0, 0, 0}; const size_t region[3] = {4, 1, 1}; }; using EnqueueCopyBufferToImageStatelessTest = EnqueueCopyBufferToImageHw; HWTEST_F(EnqueueCopyBufferToImageStatelessTest, givenBigBufferWhenCopyingBufferToImageStatelessThenSuccessIsReturned) { auto cmdQ = std::make_unique>(context.get(), device.get()); srcBuffer.size = static_cast(bigSize); auto retVal = cmdQ->enqueueCopyBufferToImage( &srcBuffer, dstImage.get(), static_cast(bigOffset), dstOrigin, region, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(EnqueueCopyBufferToImageStatelessTest, givenGpuHangAndBlockingCallAndBigBufferWhenCopyingBufferToImageStatelessThenOutOfResourcesIsReturned) { DebugManagerStateRestore stateRestore; DebugManager.flags.MakeEachEnqueueBlocking.set(true); std::unique_ptr device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment(nullptr)}); cl_queue_properties props = {}; MockCommandQueueHw mockCommandQueueHw(context.get(), device.get(), &props); mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang; srcBuffer.size = static_cast(bigSize); const auto enqueueResult = mockCommandQueueHw.enqueueCopyBufferToImage( &srcBuffer, dstImage.get(), static_cast(bigOffset), dstOrigin, region, 0, nullptr, nullptr); EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueResult); EXPECT_EQ(1, mockCommandQueueHw.waitForAllEnginesCalledCount); } using EnqueueCopyBufferToImageStatefulTest = EnqueueCopyBufferToImageHw; HWTEST_F(EnqueueCopyBufferToImageStatefulTest, givenBigBufferWhenCopyingBufferToImageStatefulThenSuccessIsReturned) { auto cmdQ = std::make_unique>(context.get(), device.get()); srcBuffer.size = static_cast(smallSize); auto retVal = cmdQ->enqueueCopyBufferToImage( &srcBuffer, dstImage.get(), 0, dstOrigin, region, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } using OneMipLevelCopyBufferToImageImageTests = Test; HWTEST_F(OneMipLevelCopyBufferToImageImageTests, GivenNotMippedImageWhenCopyingBufferToImageThenDoNotProgramDestinationMipLevel) { auto srcBuffer = std::unique_ptr(createBuffer()); auto queue = createQueue(); auto retVal = queue->enqueueCopyBufferToImage( srcBuffer.get(), image.get(), 0u, origin, region, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(builtinOpsParamsCaptured); EXPECT_EQ(0u, usedBuiltinOpsParams.dstMipLevel); } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_copy_image_fixture.h000066400000000000000000000041501422164147700322330ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/ptr_math.h" #include "shared/test/common/test_macros/test_checks_shared.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/built_in_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "gtest/gtest.h" namespace NEO { struct EnqueueCopyImageTest : public CommandEnqueueFixture, public ::testing::Test { void SetUp(void) override { REQUIRE_IMAGES_OR_SKIP(defaultHwInfo); CommandEnqueueFixture::SetUp(); context = new MockContext(pClDevice); srcImage = Image2dHelper<>::create(context); dstImage = Image2dHelper<>::create(context); } void TearDown(void) override { if (IsSkipped()) { return; } delete dstImage; delete srcImage; delete context; CommandEnqueueFixture::TearDown(); } protected: template void enqueueCopyImage() { auto retVal = EnqueueCopyImageHelper<>::enqueueCopyImage( pCmdQ, srcImage, dstImage); EXPECT_EQ(CL_SUCCESS, retVal); parseCommands(*pCmdQ); } MockContext *context = nullptr; Image *srcImage = nullptr; Image *dstImage = nullptr; }; struct EnqueueCopyImageMipMapTest : public CommandEnqueueFixture, public ::testing::Test, public ::testing::WithParamInterface> { void SetUp(void) override { REQUIRE_IMAGES_OR_SKIP(defaultHwInfo); CommandEnqueueFixture::SetUp(); context = new MockContext(pClDevice); } void TearDown(void) override { if (IsSkipped()) { return; } delete context; CommandEnqueueFixture::TearDown(); } MockContext *context = nullptr; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_copy_image_tests.cpp000066400000000000000000000431331422164147700322460ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/mocks/mock_builtins.h" #include "shared/test/common/mocks/mock_gmm_resource_info.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/test/unit_test/command_queue/enqueue_copy_image_fixture.h" #include "opencl/test/unit_test/fixtures/one_mip_level_image_fixture.h" #include "opencl/test/unit_test/gen_common/gen_commands_common_validation.h" #include "opencl/test/unit_test/mocks/mock_builtin_dispatch_info_builder.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "reg_configs_common.h" #include using namespace NEO; HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyImageTest, WhenCopyingImageThenGpgpuWalkerIsCorrect) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; enqueueCopyImage(); auto *cmd = reinterpret_cast(cmdWalker); ASSERT_NE(nullptr, cmd); // Verify GPGPU_WALKER parameters EXPECT_NE(0u, cmd->getThreadGroupIdXDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdYDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdZDimension()); EXPECT_NE(0u, cmd->getRightExecutionMask()); EXPECT_NE(0u, cmd->getBottomExecutionMask()); EXPECT_EQ(GPGPU_WALKER::SIMD_SIZE_SIMD32, cmd->getSimdSize()); EXPECT_NE(0u, cmd->getIndirectDataLength()); EXPECT_FALSE(cmd->getIndirectParameterEnable()); // Compute the SIMD lane mask size_t simd = cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD32 ? 32 : cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD16 ? 16 : 8; uint64_t simdMask = maxNBitValue(simd); // Mask off lanes based on the execution masks auto laneMaskRight = cmd->getRightExecutionMask() & simdMask; auto lanesPerThreadX = 0; while (laneMaskRight) { lanesPerThreadX += laneMaskRight & 1; laneMaskRight >>= 1; } //auto numWorkItems = ( ( cmd->getThreadWidthCounterMaximum() - 1 ) * simd + lanesPerThreadX ) * cmd->getThreadGroupIdXDimension(); //EXPECT_EQ( expectedWorkItems, numWorkItems ); } HWTEST_F(EnqueueCopyImageTest, WhenCopyingImageThenTaskCountIsAlignedWithCsr) { //this test case assumes IOQ auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = pCmdQ->taskCount + 100; csr.taskLevel = pCmdQ->taskLevel + 50; EnqueueCopyImageHelper<>::enqueueCopyImage(pCmdQ, srcImage, dstImage); EXPECT_EQ(csr.peekTaskCount(), pCmdQ->taskCount); EXPECT_EQ(csr.peekTaskLevel(), pCmdQ->taskLevel + 1); } HWTEST_F(EnqueueCopyImageTest, GivenGpuHangAndBlockingCallWhenCopyingImageThenOutOfResourcesIsReturned) { DebugManagerStateRestore stateRestore; DebugManager.flags.MakeEachEnqueueBlocking.set(true); std::unique_ptr device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment(nullptr)}); cl_queue_properties props = {}; MockCommandQueueHw mockCommandQueueHw(context, device.get(), &props); mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang; const auto enqueueResult = EnqueueCopyImageHelper<>::enqueueCopyImage(&mockCommandQueueHw, srcImage, dstImage); EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueResult); EXPECT_EQ(1, mockCommandQueueHw.waitForAllEnginesCalledCount); } HWTEST_F(EnqueueCopyImageTest, WhenCopyingImageThenTaskLevelIsIncremented) { auto taskLevelBefore = pCmdQ->taskLevel; EnqueueCopyImageHelper<>::enqueueCopyImage(pCmdQ, srcImage, dstImage); EXPECT_GT(pCmdQ->taskLevel, taskLevelBefore); } HWTEST_F(EnqueueCopyImageTest, WhenCopyingImageThenCommandsAreAdded) { auto usedCmdBufferBefore = pCS->getUsed(); EnqueueCopyImageHelper<>::enqueueCopyImage(pCmdQ, srcImage, dstImage); EXPECT_NE(usedCmdBufferBefore, pCS->getUsed()); } HWTEST_F(EnqueueCopyImageTest, WhenCopyingImageThenIndirectDataGetsAdded) { auto dshBefore = pDSH->getUsed(); auto iohBefore = pIOH->getUsed(); auto sshBefore = pSSH->getUsed(); EnqueueCopyImageHelper<>::enqueueCopyImage(pCmdQ, srcImage, dstImage); EXPECT_TRUE(UnitTestHelper::evaluateDshUsage(dshBefore, pDSH->getUsed(), nullptr, rootDeviceIndex)); EXPECT_NE(iohBefore, pIOH->getUsed()); EXPECT_NE(sshBefore, pSSH->getUsed()); } HWTEST_F(EnqueueCopyImageTest, WhenCopyingImageThenL3ProgrammingIsCorrect) { enqueueCopyImage(); validateL3Programming(cmdList, itorWalker); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyImageTest, WhenEnqueueIsDoneThenStateBaseAddressIsProperlyProgrammed) { enqueueCopyImage(); auto &ultCsr = this->pDevice->getUltCommandStreamReceiver(); auto &hwHelper = HwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily); validateStateBaseAddress(ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex, pIOH->getGraphicsAllocation()->isAllocatedInLocalMemoryPool()), ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex, !hwHelper.useSystemMemoryPlacementForISA(pDevice->getHardwareInfo())), pDSH, pIOH, pSSH, itorPipelineSelect, itorWalker, cmdList, 0llu); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyImageTest, WhenCopyingImageThenMediaInterfaceDescriptorLoadIsCorrect) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueCopyImage(); // All state should be programmed before walker auto cmd = reinterpret_cast(cmdMediaInterfaceDescriptorLoad); ASSERT_NE(nullptr, cmd); // Verify we have a valid length -- multiple of INTERFACE_DESCRIPTOR_DATAs EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % sizeof(INTERFACE_DESCRIPTOR_DATA)); // Validate the start address size_t alignmentStartAddress = 64 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorDataStartAddress() % alignmentStartAddress); // Validate the length EXPECT_NE(0u, cmd->getInterfaceDescriptorTotalLength()); size_t alignmentTotalLength = 32 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % alignmentTotalLength); // Generically validate this command FamilyType::PARSE::template validateCommand(cmdList.begin(), itorMediaInterfaceDescriptorLoad); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyImageTest, WhenCopyingImageThenInterfaceDescriptorDataIsCorrect) { typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueCopyImage(); // Extract the interfaceDescriptorData auto cmdSBA = (STATE_BASE_ADDRESS *)cmdStateBaseAddress; auto &interfaceDescriptorData = *(INTERFACE_DESCRIPTOR_DATA *)cmdInterfaceDescriptorData; // Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value. auto kernelStartPointer = ((uint64_t)interfaceDescriptorData.getKernelStartPointerHigh() << 32) + interfaceDescriptorData.getKernelStartPointer(); EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize); size_t maxLocalSize = 256u; auto localWorkSize = std::min(maxLocalSize, Image2dDefaults::imageDesc.image_width * Image2dDefaults::imageDesc.image_height); auto simd = 32u; auto numThreadsPerThreadGroup = Math::divideAndRoundUp(localWorkSize, simd); EXPECT_EQ(numThreadsPerThreadGroup, interfaceDescriptorData.getNumberOfThreadsInGpgpuThreadGroup()); EXPECT_NE(0u, interfaceDescriptorData.getCrossThreadConstantDataReadLength()); EXPECT_NE(0u, interfaceDescriptorData.getConstantIndirectUrbEntryReadLength()); // We shouldn't have these pointers the same. EXPECT_NE(kernelStartPointer, interfaceDescriptorData.getBindingTablePointer()); } HWTEST_F(EnqueueCopyImageTest, WhenCopyingImageThenSurfaceStateIsCorrect) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto mockCmdQ = std::make_unique>(context, pClDevice, nullptr); VariableBackup cmdQBackup(&pCmdQ, mockCmdQ.get()); mockCmdQ->storeMultiDispatchInfo = true; enqueueCopyImage(); const auto &kernelInfo = mockCmdQ->storedMultiDispatchInfo.begin()->getKernel()->getKernelInfo(); for (uint32_t i = 0; i < 2; ++i) { uint32_t index = static_cast(kernelInfo.getArgDescriptorAt(i).template as().bindful) / sizeof(RENDER_SURFACE_STATE); const auto &surfaceState = getSurfaceState(&pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), index); const auto &imageDesc = dstImage->getImageDesc(); EXPECT_EQ(imageDesc.image_width, surfaceState.getWidth()); EXPECT_EQ(imageDesc.image_height, surfaceState.getHeight()); EXPECT_NE(0u, surfaceState.getSurfacePitch()); EXPECT_NE(0u, surfaceState.getSurfaceType()); auto surfaceFormat = surfaceState.getSurfaceFormat(); bool isRedescribedFormat = surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R32G32B32A32_UINT || surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R32G32_UINT || surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R32_UINT || surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R16_UINT || surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R8_UINT; EXPECT_TRUE(isRedescribedFormat); EXPECT_EQ(MockGmmResourceInfo::getHAlignSurfaceStateResult, surfaceState.getSurfaceHorizontalAlignment()); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4, surfaceState.getSurfaceVerticalAlignment()); } uint32_t srcIndex = static_cast(kernelInfo.getArgDescriptorAt(0).template as().bindful) / sizeof(RENDER_SURFACE_STATE); const auto &srcSurfaceState = getSurfaceState(&pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), srcIndex); EXPECT_EQ(srcImage->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress(), srcSurfaceState.getSurfaceBaseAddress()); uint32_t dstIndex = static_cast(kernelInfo.getArgDescriptorAt(1).template as().bindful) / sizeof(RENDER_SURFACE_STATE); const auto &dstSurfaceState = getSurfaceState(&pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), dstIndex); EXPECT_EQ(dstImage->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress(), dstSurfaceState.getSurfaceBaseAddress()); } HWTEST_F(EnqueueCopyImageTest, WhenCopyingImageThenNumberOfPipelineSelectsIsOne) { enqueueCopyImage(); int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); EXPECT_EQ(1, numCommands); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyImageTest, WhenCopyingImageThenMediaVfeStateIsSetCorrectly) { enqueueCopyImage(); validateMediaVFEState(&pDevice->getHardwareInfo(), cmdMediaVfeState, cmdList, itorMediaVfeState); } typedef EnqueueCopyImageMipMapTest MipMapCopyImageTest; HWTEST_P(MipMapCopyImageTest, GivenImagesWithNonZeroMipLevelsWhenCopyImageIsCalledThenProperMipLevelsAreSet) { cl_mem_object_type srcImageType, dstImageType; std::tie(srcImageType, dstImageType) = GetParam(); auto builtIns = new MockBuiltins(); pCmdQ->getDevice().getExecutionEnvironment()->rootDeviceEnvironments[pCmdQ->getDevice().getRootDeviceIndex()]->builtins.reset(builtIns); auto &origBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder( EBuiltInOps::CopyImageToImage3d, pCmdQ->getClDevice()); // substitute original builder with mock builder auto oldBuilder = pClExecutionEnvironment->setBuiltinDispatchInfoBuilder( rootDeviceIndex, EBuiltInOps::CopyImageToImage3d, std::unique_ptr(new MockBuiltinDispatchInfoBuilder(*builtIns, pCmdQ->getClDevice(), &origBuilder))); cl_int retVal = CL_SUCCESS; cl_image_desc srcImageDesc = {}; uint32_t expectedSrcMipLevel = 3; uint32_t expectedDstMipLevel = 4; srcImageDesc.image_type = srcImageType; srcImageDesc.num_mip_levels = 10; srcImageDesc.image_width = 4; srcImageDesc.image_height = 1; srcImageDesc.image_depth = 1; cl_image_desc dstImageDesc = srcImageDesc; dstImageDesc.image_type = dstImageType; size_t srcOrigin[] = {0, 0, 0, 0}; size_t dstOrigin[] = {0, 0, 0, 0}; size_t region[] = {srcImageDesc.image_width, 1, 1}; std::unique_ptr srcImage; std::unique_ptr dstImage; switch (srcImageType) { case CL_MEM_OBJECT_IMAGE1D: srcOrigin[1] = expectedSrcMipLevel; srcImage = std::unique_ptr(ImageHelper::create(context, &srcImageDesc)); break; case CL_MEM_OBJECT_IMAGE1D_ARRAY: srcImageDesc.image_array_size = 2; srcOrigin[2] = expectedSrcMipLevel; srcImage = std::unique_ptr(ImageHelper::create(context, &srcImageDesc)); break; case CL_MEM_OBJECT_IMAGE2D: srcOrigin[2] = expectedSrcMipLevel; srcImage = std::unique_ptr(ImageHelper::create(context, &srcImageDesc)); break; case CL_MEM_OBJECT_IMAGE2D_ARRAY: srcImageDesc.image_array_size = 2; srcOrigin[3] = expectedSrcMipLevel; srcImage = std::unique_ptr(ImageHelper::create(context, &srcImageDesc)); break; case CL_MEM_OBJECT_IMAGE3D: srcOrigin[3] = expectedSrcMipLevel; srcImage = std::unique_ptr(ImageHelper::create(context, &srcImageDesc)); break; } EXPECT_NE(nullptr, srcImage.get()); switch (dstImageType) { case CL_MEM_OBJECT_IMAGE1D: dstOrigin[1] = expectedDstMipLevel; dstImage = std::unique_ptr(ImageHelper::create(context, &dstImageDesc)); break; case CL_MEM_OBJECT_IMAGE1D_ARRAY: dstImageDesc.image_array_size = 2; dstOrigin[2] = expectedDstMipLevel; dstImage = std::unique_ptr(ImageHelper::create(context, &dstImageDesc)); break; case CL_MEM_OBJECT_IMAGE2D: dstOrigin[2] = expectedDstMipLevel; dstImage = std::unique_ptr(ImageHelper::create(context, &dstImageDesc)); break; case CL_MEM_OBJECT_IMAGE2D_ARRAY: dstImageDesc.image_array_size = 2; dstOrigin[3] = expectedDstMipLevel; dstImage = std::unique_ptr(ImageHelper::create(context, &dstImageDesc)); break; case CL_MEM_OBJECT_IMAGE3D: dstOrigin[3] = expectedDstMipLevel; dstImage = std::unique_ptr(ImageHelper::create(context, &dstImageDesc)); break; } EXPECT_NE(nullptr, dstImage.get()); retVal = pCmdQ->enqueueCopyImage(srcImage.get(), dstImage.get(), srcOrigin, dstOrigin, region, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); auto &mockBuilder = static_cast(BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyImageToImage3d, pCmdQ->getClDevice())); auto params = mockBuilder.getBuiltinOpParams(); EXPECT_EQ(expectedSrcMipLevel, params->srcMipLevel); EXPECT_EQ(expectedDstMipLevel, params->dstMipLevel); // restore original builder and retrieve mock builder auto newBuilder = pClExecutionEnvironment->setBuiltinDispatchInfoBuilder( rootDeviceIndex, EBuiltInOps::CopyImageToImage3d, std::move(oldBuilder)); EXPECT_NE(nullptr, newBuilder); } uint32_t types[] = {CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE2D_ARRAY, CL_MEM_OBJECT_IMAGE3D}; INSTANTIATE_TEST_CASE_P(MipMapCopyImageTest_GivenImagesWithNonZeroMipLevelsWhenCopyImageIsCalledThenProperMipLevelsAreSet, MipMapCopyImageTest, ::testing::Combine( ::testing::ValuesIn(types), ::testing::ValuesIn(types))); using OneMipLevelCopyImageImageTests = Test; HWTEST_F(OneMipLevelCopyImageImageTests, GivenNotMippedImageWhenCopyingImageThenDoNotProgramSourceAndDestinationMipLevels) { auto dstImage = std::unique_ptr(createImage()); auto queue = createQueue(); auto retVal = queue->enqueueCopyImage( image.get(), dstImage.get(), origin, origin, region, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(builtinOpsParamsCaptured); EXPECT_EQ(0u, usedBuiltinOpsParams.srcMipLevel); EXPECT_EQ(0u, usedBuiltinOpsParams.dstMipLevel); } enqueue_copy_image_to_buffer_fixture.h000066400000000000000000000047211422164147700342130ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/ptr_math.h" #include "shared/test/common/test_macros/test_checks_shared.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "gtest/gtest.h" namespace NEO { struct EnqueueCopyImageToBufferTest : public CommandEnqueueFixture, public ::testing::Test { void SetUp(void) override { REQUIRE_IMAGES_OR_SKIP(defaultHwInfo); CommandEnqueueFixture::SetUp(); BufferDefaults::context = new MockContext(pClDevice); context = new MockContext(pClDevice); srcImage = Image2dHelper<>::create(context); dstBuffer = BufferHelper<>::create(context); } void TearDown(void) override { if (IsSkipped()) { return; } delete srcImage; delete dstBuffer; delete BufferDefaults::context; delete context; CommandEnqueueFixture::TearDown(); } protected: template void enqueueCopyImageToBuffer() { auto retVal = EnqueueCopyImageToBufferHelper<>::enqueueCopyImageToBuffer( pCmdQ, srcImage, dstBuffer); EXPECT_EQ(CL_SUCCESS, retVal); parseCommands(*pCmdQ); } MockContext *context = nullptr; Image *srcImage = nullptr; Buffer *dstBuffer = nullptr; }; struct EnqueueCopyImageToBufferMipMapTest : public CommandEnqueueFixture, public ::testing::Test, public ::testing::WithParamInterface { void SetUp(void) override { REQUIRE_IMAGES_OR_SKIP(defaultHwInfo); CommandEnqueueFixture::SetUp(); BufferDefaults::context = new MockContext(pClDevice); context = new MockContext(pClDevice); dstBuffer = BufferHelper<>::create(context); } void TearDown(void) override { if (IsSkipped()) { return; } delete dstBuffer; delete BufferDefaults::context; delete context; CommandEnqueueFixture::TearDown(); } MockContext *context = nullptr; Buffer *dstBuffer = nullptr; }; } // namespace NEO enqueue_copy_image_to_buffer_tests.cpp000066400000000000000000000420601422164147700342200ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/mocks/mock_builtins.h" #include "shared/test/common/mocks/mock_gmm_resource_info.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/test/unit_test/command_queue/enqueue_copy_image_to_buffer_fixture.h" #include "opencl/test/unit_test/fixtures/one_mip_level_image_fixture.h" #include "opencl/test/unit_test/gen_common/gen_commands_common_validation.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_builtin_dispatch_info_builder.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "reg_configs_common.h" #include using namespace NEO; HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyImageToBufferTest, WhenCopyingImageToBufferThenGpgpuWalkerIsCorrect) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; enqueueCopyImageToBuffer(); auto *cmd = reinterpret_cast(cmdWalker); ASSERT_NE(nullptr, cmd); // Verify GPGPU_WALKER parameters EXPECT_NE(0u, cmd->getThreadGroupIdXDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdYDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdZDimension()); EXPECT_NE(0u, cmd->getRightExecutionMask()); EXPECT_NE(0u, cmd->getBottomExecutionMask()); EXPECT_EQ(GPGPU_WALKER::SIMD_SIZE_SIMD32, cmd->getSimdSize()); EXPECT_NE(0u, cmd->getIndirectDataLength()); EXPECT_EQ(0u, cmd->getIndirectDataLength() % GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); EXPECT_FALSE(cmd->getIndirectParameterEnable()); // Compute the SIMD lane mask size_t simd = cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD32 ? 32 : cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD16 ? 16 : 8; uint64_t simdMask = maxNBitValue(simd); // Mask off lanes based on the execution masks auto laneMaskRight = cmd->getRightExecutionMask() & simdMask; auto lanesPerThreadX = 0; while (laneMaskRight) { lanesPerThreadX += laneMaskRight & 1; laneMaskRight >>= 1; } } HWTEST_F(EnqueueCopyImageToBufferTest, WhenCopyingImageToBufferThenTaskCountIsAlignedWithCsr) { //this test case assumes IOQ auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = pCmdQ->taskCount + 100; csr.taskLevel = pCmdQ->taskLevel + 50; enqueueCopyImageToBuffer(); EXPECT_EQ(csr.peekTaskCount(), pCmdQ->taskCount); EXPECT_EQ(csr.peekTaskLevel(), pCmdQ->taskLevel + 1); } HWTEST_F(EnqueueCopyImageToBufferTest, WhenCopyingImageToBufferThenTaskLevelIsIncremented) { auto taskLevelBefore = pCmdQ->taskLevel; enqueueCopyImageToBuffer(); EXPECT_GT(pCmdQ->taskLevel, taskLevelBefore); } HWTEST_F(EnqueueCopyImageToBufferTest, WhenCopyingImageToBufferThenCommandsAreAdded) { auto usedCmdBufferBefore = pCS->getUsed(); enqueueCopyImageToBuffer(); EXPECT_NE(usedCmdBufferBefore, pCS->getUsed()); } HWTEST_F(EnqueueCopyImageToBufferTest, WhenCopyingImageToBufferThenIndirectDataGetsAdded) { auto dshBefore = pDSH->getUsed(); auto iohBefore = pIOH->getUsed(); auto sshBefore = pSSH->getUsed(); enqueueCopyImageToBuffer(); EXPECT_TRUE(UnitTestHelper::evaluateDshUsage(dshBefore, pDSH->getUsed(), nullptr, rootDeviceIndex)); EXPECT_NE(iohBefore, pIOH->getUsed()); EXPECT_NE(sshBefore, pSSH->getUsed()); } HWTEST_F(EnqueueCopyImageToBufferTest, WhenCopyingImageToBufferThenL3ProgrammingIsCorrect) { enqueueCopyImageToBuffer(); validateL3Programming(cmdList, itorWalker); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyImageToBufferTest, WhenEnqueueIsDoneThenStateBaseAddressIsProperlyProgrammed) { enqueueCopyImageToBuffer(); auto &ultCsr = this->pDevice->getUltCommandStreamReceiver(); auto &hwHelper = HwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily); validateStateBaseAddress(ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex, pIOH->getGraphicsAllocation()->isAllocatedInLocalMemoryPool()), ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex, !hwHelper.useSystemMemoryPlacementForISA(pDevice->getHardwareInfo())), pDSH, pIOH, pSSH, itorPipelineSelect, itorWalker, cmdList, 0llu); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyImageToBufferTest, WhenCopyingImageToBufferThenMediaInterfaceDescriptorLoadIsCorrect) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueCopyImageToBuffer(); // All state should be programmed before walker auto cmd = reinterpret_cast(cmdMediaInterfaceDescriptorLoad); ASSERT_NE(nullptr, cmd); // Verify we have a valid length -- multiple of INTERFACE_DESCRIPTOR_DATAs EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % sizeof(INTERFACE_DESCRIPTOR_DATA)); // Validate the start address size_t alignmentStartAddress = 64 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorDataStartAddress() % alignmentStartAddress); // Validate the length EXPECT_NE(0u, cmd->getInterfaceDescriptorTotalLength()); size_t alignmentTotalLength = 32 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % alignmentTotalLength); // Generically validate this command FamilyType::PARSE::template validateCommand(cmdList.begin(), itorMediaInterfaceDescriptorLoad); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyImageToBufferTest, WhenCopyingImageToBufferThenInterfaceDescriptorDataIsCorrect) { typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueCopyImageToBuffer(); // Extract the interfaceDescriptorData auto cmdSBA = (STATE_BASE_ADDRESS *)cmdStateBaseAddress; auto &interfaceDescriptorData = *(INTERFACE_DESCRIPTOR_DATA *)cmdInterfaceDescriptorData; // Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value. auto kernelStartPointer = ((uint64_t)interfaceDescriptorData.getKernelStartPointerHigh() << 32) + interfaceDescriptorData.getKernelStartPointer(); EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize); size_t maxLocalSize = 256u; auto localWorkSize = std::min( maxLocalSize, Image2dDefaults::imageDesc.image_width * Image2dDefaults::imageDesc.image_height); auto simd = 32u; auto numThreadsPerThreadGroup = Math::divideAndRoundUp(localWorkSize, simd); EXPECT_EQ(numThreadsPerThreadGroup, interfaceDescriptorData.getNumberOfThreadsInGpgpuThreadGroup()); EXPECT_NE(0u, interfaceDescriptorData.getCrossThreadConstantDataReadLength()); EXPECT_NE(0u, interfaceDescriptorData.getConstantIndirectUrbEntryReadLength()); // We shouldn't have these pointers the same. EXPECT_NE(kernelStartPointer, interfaceDescriptorData.getBindingTablePointer()); } HWTEST_F(EnqueueCopyImageToBufferTest, WhenCopyingImageToBufferThenSurfaceStateIsCorrect) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; enqueueCopyImageToBuffer(); const auto &surfaceState = getSurfaceState(&pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), 0); const auto &imageDesc = srcImage->getImageDesc(); // EnqueueReadImage uses multi-byte copies depending on per-pixel-size-in-bytes EXPECT_EQ(imageDesc.image_width, surfaceState.getWidth()); EXPECT_EQ(imageDesc.image_height, surfaceState.getHeight()); EXPECT_NE(0u, surfaceState.getSurfacePitch()); EXPECT_NE(0u, surfaceState.getSurfaceType()); auto surfaceFormat = surfaceState.getSurfaceFormat(); bool isRedescribedFormat = surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R32G32B32A32_UINT || surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R32G32_UINT || surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R32_UINT || surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R16_UINT || surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R8_UINT; EXPECT_TRUE(isRedescribedFormat); EXPECT_EQ(MockGmmResourceInfo::getHAlignSurfaceStateResult, surfaceState.getSurfaceHorizontalAlignment()); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4, surfaceState.getSurfaceVerticalAlignment()); EXPECT_EQ(srcImage->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress(), surfaceState.getSurfaceBaseAddress()); } HWTEST_F(EnqueueCopyImageToBufferTest, WhenCopyingImageToBufferThenNumberOfPipelineSelectsIsOne) { enqueueCopyImageToBuffer(); int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); EXPECT_EQ(1, numCommands); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyImageToBufferTest, WhenCopyingImageToBufferThenMediaVfeStateIsSetCorrectly) { enqueueCopyImageToBuffer(); validateMediaVFEState(&pDevice->getHardwareInfo(), cmdMediaVfeState, cmdList, itorMediaVfeState); } typedef EnqueueCopyImageToBufferMipMapTest MipMapCopyImageToBufferTest; HWTEST_P(MipMapCopyImageToBufferTest, GivenImageWithMipLevelNonZeroWhenCopyImageToBufferIsCalledThenProperMipLevelIsSet) { auto image_type = (cl_mem_object_type)GetParam(); auto builtIns = new MockBuiltins(); pCmdQ->getDevice().getExecutionEnvironment()->rootDeviceEnvironments[pCmdQ->getDevice().getRootDeviceIndex()]->builtins.reset(builtIns); auto &origBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder( EBuiltInOps::CopyImage3dToBuffer, pCmdQ->getClDevice()); // substitute original builder with mock builder auto oldBuilder = pClExecutionEnvironment->setBuiltinDispatchInfoBuilder( rootDeviceIndex, EBuiltInOps::CopyImage3dToBuffer, std::unique_ptr(new MockBuiltinDispatchInfoBuilder(*builtIns, pCmdQ->getClDevice(), &origBuilder))); cl_int retVal = CL_SUCCESS; cl_image_desc imageDesc = {}; uint32_t expectedMipLevel = 3; imageDesc.image_type = image_type; imageDesc.num_mip_levels = 10; imageDesc.image_width = 4; imageDesc.image_height = 1; imageDesc.image_depth = 1; size_t origin[] = {0, 0, 0, 0}; size_t region[] = {imageDesc.image_width, 1, 1}; std::unique_ptr image; switch (image_type) { case CL_MEM_OBJECT_IMAGE1D: origin[1] = expectedMipLevel; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; case CL_MEM_OBJECT_IMAGE1D_ARRAY: imageDesc.image_array_size = 2; origin[2] = expectedMipLevel; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; case CL_MEM_OBJECT_IMAGE2D: origin[2] = expectedMipLevel; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; case CL_MEM_OBJECT_IMAGE2D_ARRAY: imageDesc.image_array_size = 2; origin[3] = expectedMipLevel; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; case CL_MEM_OBJECT_IMAGE3D: origin[3] = expectedMipLevel; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; } EXPECT_NE(nullptr, image.get()); retVal = pCmdQ->enqueueCopyImageToBuffer(image.get(), dstBuffer, origin, region, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); auto &mockBuilder = static_cast(BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyImage3dToBuffer, pCmdQ->getClDevice())); auto params = mockBuilder.getBuiltinOpParams(); EXPECT_EQ(expectedMipLevel, params->srcMipLevel); // restore original builder and retrieve mock builder auto newBuilder = pClExecutionEnvironment->setBuiltinDispatchInfoBuilder( rootDeviceIndex, EBuiltInOps::CopyImage3dToBuffer, std::move(oldBuilder)); EXPECT_NE(nullptr, newBuilder); } INSTANTIATE_TEST_CASE_P(MipMapCopyImageToBufferTest_GivenImageWithMipLevelNonZeroWhenCopyImageToBufferIsCalledThenProperMipLevelIsSet, MipMapCopyImageToBufferTest, ::testing::Values(CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE2D_ARRAY, CL_MEM_OBJECT_IMAGE3D)); struct EnqueueCopyImageToBufferHw : public ::testing::Test { void SetUp() override { REQUIRE_64BIT_OR_SKIP(); REQUIRE_IMAGES_OR_SKIP(defaultHwInfo); device = std::make_unique(MockClDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); context = std::make_unique(device.get()); srcImage = std::unique_ptr(Image2dHelper<>::create(context.get())); } std::unique_ptr device; std::unique_ptr context; std::unique_ptr srcImage; MockBuffer dstBuffer; uint64_t bigSize = 5ull * MemoryConstants::gigaByte; uint64_t smallSize = 4ull * MemoryConstants::gigaByte - 1; uint64_t bigOffset = 4ull * MemoryConstants::gigaByte; const size_t srcOrigin[3] = {0, 0, 0}; const size_t region[3] = {4, 1, 1}; }; using EnqueueCopyImageToBufferHwStatelessTest = EnqueueCopyImageToBufferHw; HWTEST_F(EnqueueCopyImageToBufferHwStatelessTest, givenBigBufferWhenCopyingImageToBufferStatelessThenSuccessIsReturned) { auto cmdQ = std::make_unique>(context.get(), device.get()); dstBuffer.size = static_cast(bigSize); auto retVal = cmdQ->enqueueCopyImageToBuffer( srcImage.get(), &dstBuffer, srcOrigin, region, static_cast(bigOffset), 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(EnqueueCopyImageToBufferHwStatelessTest, givenGpuHangAndBlockingCallAndBigBufferWhenCopyingImageToBufferStatelessThenOutOfResourcesIsReturned) { DebugManagerStateRestore stateRestore; DebugManager.flags.MakeEachEnqueueBlocking.set(true); std::unique_ptr device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment(nullptr)}); cl_queue_properties props = {}; MockCommandQueueHw mockCommandQueueHw(context.get(), device.get(), &props); mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang; dstBuffer.size = static_cast(bigSize); const auto enqueueResult = mockCommandQueueHw.enqueueCopyImageToBuffer( srcImage.get(), &dstBuffer, srcOrigin, region, static_cast(bigOffset), 0, nullptr, nullptr); EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueResult); EXPECT_EQ(1, mockCommandQueueHw.waitForAllEnginesCalledCount); } using EnqueueCopyImageToBufferStatefulTest = EnqueueCopyImageToBufferHw; HWTEST_F(EnqueueCopyImageToBufferStatefulTest, givenBufferWhenCopyingImageToBufferStatefulThenSuccessIsReturned) { auto cmdQ = std::make_unique>(context.get(), device.get()); dstBuffer.size = static_cast(smallSize); auto retVal = cmdQ->enqueueCopyImageToBuffer( srcImage.get(), &dstBuffer, srcOrigin, region, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } using OneMipLevelCopyImageToBufferImageTests = Test; HWTEST_F(OneMipLevelCopyImageToBufferImageTests, GivenNotMippedImageWhenCopyingImageToBufferThenDoNotProgramSourceMipLevel) { auto dstBuffer = std::unique_ptr(createBuffer()); auto queue = createQueue(); auto retVal = queue->enqueueCopyImageToBuffer( image.get(), dstBuffer.get(), origin, region, 0u, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(builtinOpsParamsCaptured); EXPECT_EQ(0u, usedBuiltinOpsParams.srcMipLevel); } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_debug_kernel_tests.cpp000066400000000000000000000246241422164147700325640ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/os_context.h" #include "shared/source/source_level_debugger/source_level_debugger.h" #include "shared/test/common/helpers/kernel_binary_helper.h" #include "shared/test/common/helpers/kernel_filename_helper.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/test_macros/mock_method_macros.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/program/program.h" #include "opencl/test/unit_test/fixtures/enqueue_handler_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/program/program_from_binary.h" #include "compiler_options.h" using namespace NEO; using namespace ::testing; typedef EnqueueHandlerTest EnqueueDebugKernelSimpleTest; class EnqueueDebugKernelTest : public ProgramSimpleFixture, public ::testing::Test { public: void SetUp() override { ProgramSimpleFixture::SetUp(); device = pClDevice; pDevice->executionEnvironment->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->debugger.reset(new SourceLevelDebugger(nullptr)); auto sipType = SipKernel::getSipKernelType(*pDevice); SipKernel::initSipKernel(sipType, *pDevice); if (pDevice->getHardwareInfo().platform.eRenderCoreFamily >= IGFX_GEN9_CORE) { pDevice->deviceInfo.debuggerActive = true; std::string filename; std::string kernelOption(CompilerOptions::debugKernelEnable); KernelFilenameHelper::getKernelFilenameFromInternalOption(kernelOption, filename); kbHelper = new KernelBinaryHelper(filename, false); CreateProgramWithSource( pContext, "copybuffer.cl"); pProgram->enableKernelDebug(); cl_int retVal = pProgram->build(pProgram->getDevices(), nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); // create a kernel pMultiDeviceKernel = MultiDeviceKernel::create( pProgram, pProgram->getKernelInfosForKernel("CopyBuffer"), &retVal); debugKernel = pMultiDeviceKernel->getKernel(rootDeviceIndex); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, debugKernel); cl_mem src = &bufferSrc; cl_mem dst = &bufferDst; retVal = debugKernel->setArg( 0, sizeof(cl_mem), &src); retVal = debugKernel->setArg( 1, sizeof(cl_mem), &dst); } } void TearDown() override { if (pDevice->getHardwareInfo().platform.eRenderCoreFamily >= IGFX_GEN9_CORE) { delete kbHelper; pMultiDeviceKernel->release(); } ProgramSimpleFixture::TearDown(); } cl_device_id device; Kernel *debugKernel = nullptr; MultiDeviceKernel *pMultiDeviceKernel = nullptr; KernelBinaryHelper *kbHelper = nullptr; MockContext context; MockBuffer bufferSrc; MockBuffer bufferDst; }; HWTEST_F(EnqueueDebugKernelTest, givenDebugKernelWhenEnqueuedThenSSHAndBtiAreCorrectlySet) { if (pDevice->isDebuggerActive()) { using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE; using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; std::unique_ptr> mockCmdQ(new MockCommandQueueHw(&context, pClDevice, 0)); size_t gws[] = {1, 1, 1}; auto &ssh = mockCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 4096u); void *surfaceStates = ssh.getSpace(0); mockCmdQ->enqueueKernel(debugKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); auto *dstBtiTableBase = reinterpret_cast(ptrOffset(surfaceStates, debugKernel->getBindingTableOffset())); uint32_t surfaceStateOffset = dstBtiTableBase[0].getSurfaceStatePointer(); auto debugSurfaceState = reinterpret_cast(ptrOffset(ssh.getCpuBase(), surfaceStateOffset)); auto &commandStreamReceiver = mockCmdQ->getGpgpuCommandStreamReceiver(); auto debugSurface = commandStreamReceiver.getDebugSurfaceAllocation(); EXPECT_EQ(1u, debugSurface->getTaskCount(commandStreamReceiver.getOsContext().getContextId())); EXPECT_EQ(debugSurface->getGpuAddress(), debugSurfaceState->getSurfaceBaseAddress()); } } HWTEST_F(EnqueueDebugKernelTest, givenDebugKernelWhenEnqueuedThenSurfaceStateForDebugSurfaceIsSetAtBindlessOffsetZero) { if (pDevice->isDebuggerActive()) { using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE; using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; std::unique_ptr> mockCmdQ(new MockCommandQueueHw(&context, pClDevice, 0)); size_t gws[] = {1, 1, 1}; auto &ssh = mockCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 4096u); mockCmdQ->enqueueKernel(debugKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); auto debugSurfaceState = reinterpret_cast(ssh.getCpuBase()); auto &commandStreamReceiver = mockCmdQ->getGpgpuCommandStreamReceiver(); auto debugSurface = commandStreamReceiver.getDebugSurfaceAllocation(); SURFACE_STATE_BUFFER_LENGTH length; length.Length = static_cast(debugSurface->getUnderlyingBufferSize() - 1); EXPECT_EQ(length.SurfaceState.Depth + 1u, debugSurfaceState->getDepth()); EXPECT_EQ(length.SurfaceState.Width + 1u, debugSurfaceState->getWidth()); EXPECT_EQ(length.SurfaceState.Height + 1u, debugSurfaceState->getHeight()); EXPECT_EQ(debugSurface->getGpuAddress(), debugSurfaceState->getSurfaceBaseAddress()); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER, debugSurfaceState->getSurfaceType()); EXPECT_EQ(UnitTestHelper::getCoherencyTypeSupported(RENDER_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT), debugSurfaceState->getCoherencyType()); } } template class MockCommandQueueHwSetupDebugSurface : public CommandQueueHw { typedef CommandQueueHw BaseClass; public: MockCommandQueueHwSetupDebugSurface(Context *context, ClDevice *device, cl_queue_properties *properties) : BaseClass(context, device, properties, false) { } bool setupDebugSurface(Kernel *kernel) override { setupDebugSurfaceCalled++; setupDebugSurfaceParamsPassed.push_back({kernel}); return setupDebugSurfaceResult; } struct SetupDebugSurfaceParams { Kernel *kernel = nullptr; }; StackVec setupDebugSurfaceParamsPassed{}; uint32_t setupDebugSurfaceCalled = 0u; bool setupDebugSurfaceResult = true; }; HWTEST_F(EnqueueDebugKernelSimpleTest, givenKernelFromProgramWithDebugEnabledWhenEnqueuedThenDebugSurfaceIsSetup) { MockProgram program(context, false, toClDeviceVector(*pClDevice)); program.enableKernelDebug(); std::unique_ptr kernel(MockKernel::create(*pDevice, &program)); kernel->initialize(); std::unique_ptr> mockCmdQ(new MockCommandQueueHwSetupDebugSurface(context, pClDevice, 0)); auto hwInfo = *NEO::defaultHwInfo.get(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); mockCmdQ->getGpgpuCommandStreamReceiver().allocateDebugSurface(hwHelper.getSipKernelMaxDbgSurfaceSize(hwInfo)); mockCmdQ->setupDebugSurfaceParamsPassed.clear(); EXPECT_TRUE(isValidOffset(kernel->getKernelInfo().kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful)); size_t gws[] = {1, 1, 1}; mockCmdQ->enqueueKernel(kernel.get(), 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(1u, mockCmdQ->setupDebugSurfaceCalled); EXPECT_EQ(kernel.get(), mockCmdQ->setupDebugSurfaceParamsPassed[0].kernel); } HWTEST_F(EnqueueDebugKernelSimpleTest, givenKernelWithoutSystemThreadSurfaceWhenEnqueuedThenDebugSurfaceIsNotSetup) { MockProgram program(context, false, toClDeviceVector(*pClDevice)); program.enableKernelDebug(); std::unique_ptr kernel(MockKernel::create(*pDevice, &program)); kernel->initialize(); EXPECT_FALSE(isValidOffset(kernel->getKernelInfo().kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful)); std::unique_ptr> mockCmdQ(new MockCommandQueueHwSetupDebugSurface(context, pClDevice, 0)); size_t gws[] = {1, 1, 1}; mockCmdQ->enqueueKernel(kernel.get(), 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(0u, mockCmdQ->setupDebugSurfaceCalled); } HWTEST_F(EnqueueDebugKernelSimpleTest, givenKernelFromProgramWithoutDebugEnabledWhenEnqueuedThenDebugSurfaceIsNotSetup) { MockProgram program(context, false, toClDeviceVector(*pClDevice)); std::unique_ptr kernel(MockKernel::create(*pDevice, &program)); std::unique_ptr> mockCmdQ(new MockCommandQueueHwSetupDebugSurface(context, pClDevice, nullptr)); size_t gws[] = {1, 1, 1}; mockCmdQ->enqueueKernel(kernel.get(), 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(nullptr, mockCmdQ->getGpgpuCommandStreamReceiver().getDebugSurfaceAllocation()); EXPECT_EQ(0u, mockCmdQ->setupDebugSurfaceCalled); } using ActiveDebuggerTest = EnqueueDebugKernelTest; HWTEST_F(ActiveDebuggerTest, givenKernelFromProgramWithoutDebugEnabledAndActiveDebuggerWhenEnqueuedThenDebugSurfaceIsSetup) { MockProgram program(&context, false, toClDeviceVector(*pClDevice)); std::unique_ptr kernel(MockKernel::create(*pDevice, &program)); std::unique_ptr> cmdQ(new CommandQueueHw(&context, pClDevice, nullptr, false)); size_t gws[] = {1, 1, 1}; cmdQ->enqueueKernel(kernel.get(), 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_NE(nullptr, cmdQ->getGpgpuCommandStreamReceiver().getDebugSurfaceAllocation()); }compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_fill_buffer_event_tests.cpp000066400000000000000000000055101422164147700336070ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/event/event.h" #include "opencl/test/unit_test/command_queue/enqueue_fill_buffer_fixture.h" using namespace NEO; namespace ULT { struct FillBufferEventTests : public EnqueueFillBufferFixture, public ::testing::Test { typedef EnqueueFillBufferFixture BaseClass; void SetUp() override { BaseClass::SetUp(); } void TearDown() override { BaseClass::TearDown(); } }; HWTEST_F(FillBufferEventTests, WhenEnqueingFillBufferThenEventHasCorrectCommandType) { float pattern[] = {1.0f}; size_t patternSize = sizeof(pattern); size_t offset = 0; size_t size = 2 * patternSize; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event event = nullptr; auto retVal = pCmdQ->enqueueFillBuffer( buffer, pattern, patternSize, offset, size, numEventsInWaitList, eventWaitList, &event); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(pCmdQ->taskLevel, pEvent->taskLevel); // Check CL_EVENT_COMMAND_TYPE { cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_FILL_BUFFER), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); } delete pEvent; } HWTEST_F(FillBufferEventTests, GivenMultipleEventsWhenEnqueingFillBufferThenReturnedEventShouldBeMaxOfInputEventsAndCmdQPlus1) { uint32_t taskLevelCmdQ = 17; pCmdQ->taskLevel = taskLevelCmdQ; uint32_t taskLevelEvent1 = 8; uint32_t taskLevelEvent2 = 19; Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 15); Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 16); float pattern[] = {1.0f}; size_t patternSize = sizeof(pattern); size_t offset = 0; size_t size = 2 * patternSize; cl_event eventWaitList[] = { &event1, &event2}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto retVal = pCmdQ->enqueueFillBuffer( buffer, pattern, patternSize, offset, size, numEventsInWaitList, eventWaitList, &event); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(19u + 1u, pEvent->taskLevel); delete pEvent; } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_fill_buffer_fixture.h000066400000000000000000000021051422164147700323740ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" namespace NEO { struct EnqueueFillBufferFixture : public CommandEnqueueFixture { void SetUp() override { CommandEnqueueFixture::SetUp(); BufferDefaults::context = new MockContext; buffer = BufferHelper<>::create(); } void TearDown() override { delete buffer; delete BufferDefaults::context; CommandEnqueueFixture::TearDown(); } template void enqueueFillBuffer() { auto retVal = EnqueueFillBufferHelper<>::enqueueFillBuffer( pCmdQ, buffer); EXPECT_EQ(CL_SUCCESS, retVal); parseCommands(*pCmdQ); } MockContext context; Buffer *buffer = nullptr; }; } // namespace NEO enqueue_fill_buffer_negative_tests.cpp000066400000000000000000000105641422164147700342160ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/ptr_math.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/test/unit_test/command_queue/enqueue_fill_buffer_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "gtest/gtest.h" using namespace NEO; namespace ULT { struct EnqueueFillBuffer : public EnqueueFillBufferFixture, public ::testing::Test { typedef EnqueueFillBufferFixture BaseClass; void SetUp() override { BaseClass::SetUp(); } void TearDown() override { BaseClass::TearDown(); } }; TEST_F(EnqueueFillBuffer, GivenNullBufferWhenFillingBufferThenInvalidMemObjectErrorIsReturned) { cl_float pattern = 1.0f; auto retVal = clEnqueueFillBuffer( BaseClass::pCmdQ, nullptr, &pattern, sizeof(pattern), 0, sizeof(pattern), 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(EnqueueFillBuffer, GivenNullPatternWhenFillingBufferThenInvalidValueErrorIsReturned) { cl_float pattern = 1.0f; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; auto retVal = clEnqueueFillBuffer( BaseClass::pCmdQ, buffer, nullptr, sizeof(pattern), 0, sizeof(pattern), numEventsInWaitList, eventWaitList, event); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(EnqueueFillBuffer, GivenNullEventListAndNumEventsNonZeroWhenFillingBufferThenInvalidEventWaitListErrorIsReturned) { cl_float pattern = 1.0f; auto retVal = clEnqueueFillBuffer( BaseClass::pCmdQ, buffer, &pattern, sizeof(pattern), 0, sizeof(pattern), 1, nullptr, nullptr); EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, retVal); } TEST_F(EnqueueFillBuffer, GivenEventListAndNumEventsZeroWhenFillingBufferThenInvalidEventWaitListErrorIsReturned) { cl_event eventList = (cl_event)ptrGarbage; cl_float pattern = 1.0f; auto retVal = clEnqueueFillBuffer( BaseClass::pCmdQ, buffer, &pattern, sizeof(pattern), 0, sizeof(pattern), 0, &eventList, nullptr); EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, retVal); } HWTEST_F(EnqueueFillBuffer, GivenGpuHangAndBlockingCallWhenFillingBufferThenOutOfResourcesIsReturned) { DebugManagerStateRestore stateRestore; DebugManager.flags.MakeEachEnqueueBlocking.set(true); std::unique_ptr device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment(nullptr)}); cl_queue_properties props = {}; MockCommandQueueHw mockCommandQueueHw(&context, device.get(), &props); mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang; cl_uint numEventsInWaitList = 0; const cl_event *eventWaitList = nullptr; const auto enqueueResult = EnqueueFillBufferHelper<>::enqueueFillBuffer(&mockCommandQueueHw, buffer, numEventsInWaitList, eventWaitList, nullptr); EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueResult); EXPECT_EQ(1, mockCommandQueueHw.waitForAllEnginesCalledCount); } } // namespace ULT namespace ULT { struct InvalidPatternSize : public EnqueueFillBufferFixture, public ::testing::TestWithParam { typedef EnqueueFillBufferFixture BaseClass; InvalidPatternSize() { } void SetUp() override { BaseClass::SetUp(); patternSize = GetParam(); pattern = new char[patternSize]; } void TearDown() override { delete[] pattern; BaseClass::TearDown(); } size_t patternSize = 0; char *pattern = nullptr; }; TEST_P(InvalidPatternSize, GivenInvalidPatternSizeWhenFillingBufferThenInvalidValueErrorIsReturned) { auto retVal = clEnqueueFillBuffer( BaseClass::pCmdQ, buffer, &pattern, patternSize, 0, patternSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } INSTANTIATE_TEST_CASE_P(EnqueueFillBuffer, InvalidPatternSize, ::testing::Values(0, 3, 5, 256, 512, 1024)); } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_fill_buffer_tests.cpp000066400000000000000000000634201422164147700324120ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/memory_manager/allocations_list.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/test/unit_test/command_queue/enqueue_fill_buffer_fixture.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/gen_common/gen_commands_common_validation.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "reg_configs_common.h" using namespace NEO; typedef Test EnqueueFillBufferCmdTests; HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenTaskCountIsAlignedWithCsr) { //this test case assumes IOQ auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = pCmdQ->taskCount + 100; csr.taskLevel = pCmdQ->taskLevel + 50; EnqueueFillBufferHelper<>::enqueueFillBuffer(pCmdQ, buffer); EXPECT_EQ(csr.peekTaskCount(), pCmdQ->taskCount); EXPECT_EQ(csr.peekTaskLevel(), pCmdQ->taskLevel + 1); } HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenTaskLevelIsIncremented) { auto taskLevelBefore = pCmdQ->taskLevel; EnqueueFillBufferHelper<>::enqueueFillBuffer(pCmdQ, buffer); EXPECT_GT(pCmdQ->taskLevel, taskLevelBefore); } HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenCommandsAreAdded) { auto usedCmdBufferBefore = pCS->getUsed(); EnqueueFillBufferHelper<>::enqueueFillBuffer(pCmdQ, buffer); EXPECT_NE(usedCmdBufferBefore, pCS->getUsed()); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueFillBufferCmdTests, WhenFillingBufferThenGpgpuWalkerIsCorrect) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; enqueueFillBuffer(); auto *cmd = (GPGPU_WALKER *)cmdWalker; ASSERT_NE(nullptr, cmd); // Verify GPGPU_WALKER parameters EXPECT_NE(0u, cmd->getThreadGroupIdXDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdYDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdZDimension()); EXPECT_NE(0u, cmd->getRightExecutionMask()); EXPECT_NE(0u, cmd->getBottomExecutionMask()); EXPECT_EQ(GPGPU_WALKER::SIMD_SIZE_SIMD32, cmd->getSimdSize()); EXPECT_NE(0u, cmd->getIndirectDataLength()); EXPECT_FALSE(cmd->getIndirectParameterEnable()); // Compute the SIMD lane mask size_t simd = cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD32 ? 32 : cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD16 ? 16 : 8; uint64_t simdMask = maxNBitValue(simd); // Mask off lanes based on the execution masks auto laneMaskRight = cmd->getRightExecutionMask() & simdMask; auto lanesPerThreadX = 0; while (laneMaskRight) { lanesPerThreadX += laneMaskRight & 1; laneMaskRight >>= 1; } } HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenIndirectDataGetsAdded) { auto patternAllocation = context.getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), EnqueueFillBufferTraits::patternSize}); auto dshBefore = pDSH->getUsed(); auto iohBefore = pIOH->getUsed(); auto sshBefore = pSSH->getUsed(); EnqueueFillBufferHelper<>::enqueueFillBuffer(pCmdQ, buffer); auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::FillBuffer, pCmdQ->getClDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; MemObj patternMemObj(&this->context, 0, {}, 0, 0, alignUp(EnqueueFillBufferTraits::patternSize, 4), patternAllocation->getUnderlyingBuffer(), patternAllocation->getUnderlyingBuffer(), GraphicsAllocationHelper::toMultiGraphicsAllocation(patternAllocation), false, false, true); dc.srcMemObj = &patternMemObj; dc.dstMemObj = buffer; dc.dstOffset = {EnqueueFillBufferTraits::offset, 0, 0}; dc.size = {EnqueueFillBufferTraits::size, 0, 0}; MultiDispatchInfo multiDispatchInfo(dc); builder.buildDispatchInfos(multiDispatchInfo); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); auto kernelDescriptor = &kernel->getKernelInfo().kernelDescriptor; EXPECT_TRUE(UnitTestHelper::evaluateDshUsage(dshBefore, pDSH->getUsed(), kernelDescriptor, rootDeviceIndex)); EXPECT_NE(iohBefore, pIOH->getUsed()); if (kernel->usesBindfulAddressingForBuffers()) { EXPECT_NE(sshBefore, pSSH->getUsed()); } context.getMemoryManager()->freeGraphicsMemory(patternAllocation); } HWTEST_F(EnqueueFillBufferCmdTests, GivenRightLeftoverWhenFillingBufferThenFillBufferRightLeftoverKernelUsed) { auto patternAllocation = context.getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), EnqueueFillBufferTraits::patternSize}); EnqueueFillBufferHelper<>::enqueueFillBuffer(pCmdQ, buffer); auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::FillBuffer, pCmdQ->getClDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; MemObj patternMemObj(&this->context, 0, {}, 0, 0, alignUp(EnqueueFillBufferTraits::patternSize, 4), patternAllocation->getUnderlyingBuffer(), patternAllocation->getUnderlyingBuffer(), GraphicsAllocationHelper::toMultiGraphicsAllocation(patternAllocation), false, false, true); dc.srcMemObj = &patternMemObj; dc.dstMemObj = buffer; dc.dstOffset = {0, 0, 0}; dc.size = {EnqueueFillBufferTraits::patternSize, 0, 0}; MultiDispatchInfo mdi(dc); builder.buildDispatchInfos(mdi); EXPECT_EQ(1u, mdi.size()); auto kernel = mdi.begin()->getKernel(); EXPECT_STREQ("FillBufferRightLeftover", kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str()); context.getMemoryManager()->freeGraphicsMemory(patternAllocation); } HWTEST_F(EnqueueFillBufferCmdTests, GivenMiddleWhenFillingBufferThenFillBufferMiddleKernelUsed) { auto patternAllocation = context.getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), EnqueueFillBufferTraits::patternSize}); EnqueueFillBufferHelper<>::enqueueFillBuffer(pCmdQ, buffer); auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::FillBuffer, pCmdQ->getClDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; MemObj patternMemObj(&this->context, 0, {}, 0, 0, alignUp(EnqueueFillBufferTraits::patternSize, 4), patternAllocation->getUnderlyingBuffer(), patternAllocation->getUnderlyingBuffer(), GraphicsAllocationHelper::toMultiGraphicsAllocation(patternAllocation), false, false, true); dc.srcMemObj = &patternMemObj; dc.dstMemObj = buffer; dc.dstOffset = {0, 0, 0}; dc.size = {MemoryConstants::cacheLineSize, 0, 0}; MultiDispatchInfo mdi(dc); builder.buildDispatchInfos(mdi); EXPECT_EQ(1u, mdi.size()); auto kernel = mdi.begin()->getKernel(); EXPECT_STREQ("FillBufferMiddle", kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str()); context.getMemoryManager()->freeGraphicsMemory(patternAllocation); } HWTEST_F(EnqueueFillBufferCmdTests, GivenLeftLeftoverWhenFillingBufferThenFillBufferLeftLeftoverKernelUsed) { auto patternAllocation = context.getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), EnqueueFillBufferTraits::patternSize}); EnqueueFillBufferHelper<>::enqueueFillBuffer(pCmdQ, buffer); auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::FillBuffer, pCmdQ->getClDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; MemObj patternMemObj(&this->context, 0, {}, 0, 0, alignUp(EnqueueFillBufferTraits::patternSize, 4), patternAllocation->getUnderlyingBuffer(), patternAllocation->getUnderlyingBuffer(), GraphicsAllocationHelper::toMultiGraphicsAllocation(patternAllocation), false, false, true); dc.srcMemObj = &patternMemObj; dc.dstMemObj = buffer; dc.dstOffset = {EnqueueFillBufferTraits::patternSize, 0, 0}; dc.size = {EnqueueFillBufferTraits::patternSize, 0, 0}; MultiDispatchInfo mdi(dc); builder.buildDispatchInfos(mdi); EXPECT_EQ(1u, mdi.size()); auto kernel = mdi.begin()->getKernel(); EXPECT_STREQ("FillBufferLeftLeftover", kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str()); context.getMemoryManager()->freeGraphicsMemory(patternAllocation); } HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenL3ProgrammingIsCorrect) { enqueueFillBuffer(); validateL3Programming(cmdList, itorWalker); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueFillBufferCmdTests, WhenEnqueueIsDoneThenStateBaseAddressIsProperlyProgrammed) { enqueueFillBuffer(); auto &ultCsr = this->pDevice->getUltCommandStreamReceiver(); auto &hwHelper = HwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily); validateStateBaseAddress(ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex, pIOH->getGraphicsAllocation()->isAllocatedInLocalMemoryPool()), ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex, !hwHelper.useSystemMemoryPlacementForISA(pDevice->getHardwareInfo())), pDSH, pIOH, pSSH, itorPipelineSelect, itorWalker, cmdList, 0llu); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueFillBufferCmdTests, WhenFillingBufferThenMediaInterfaceDescriptorLoadIsCorrect) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueFillBuffer(); auto *cmd = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)cmdMediaInterfaceDescriptorLoad; // Verify we have a valid length -- multiple of INTERFACE_DESCRIPTOR_DATAs EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % sizeof(INTERFACE_DESCRIPTOR_DATA)); // Validate the start address size_t alignmentStartAddress = 64 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorDataStartAddress() % alignmentStartAddress); // Validate the length EXPECT_NE(0u, cmd->getInterfaceDescriptorTotalLength()); size_t alignmentTotalLength = 32 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % alignmentTotalLength); // Generically validate this command FamilyType::PARSE::template validateCommand(cmdList.begin(), itorMediaInterfaceDescriptorLoad); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueFillBufferCmdTests, WhenFillingBufferThenInterfaceDescriptorDataIsCorrect) { typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; enqueueFillBuffer(); // Extract the IDD auto cmdSBA = (STATE_BASE_ADDRESS *)cmdStateBaseAddress; auto &IDD = *(INTERFACE_DESCRIPTOR_DATA *)(cmdInterfaceDescriptorData); // Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value. auto kernelStartPointer = ((uint64_t)IDD.getKernelStartPointerHigh() << 32) + IDD.getKernelStartPointer(); EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize); EXPECT_NE(0u, IDD.getNumberOfThreadsInGpgpuThreadGroup()); EXPECT_NE(0u, IDD.getCrossThreadConstantDataReadLength()); EXPECT_NE(0u, IDD.getConstantIndirectUrbEntryReadLength()); } HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenNumberOfPipelineSelectsIsOne) { enqueueFillBuffer(); int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); EXPECT_EQ(1, numCommands); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueFillBufferCmdTests, WhenFillingBufferThenMediaVfeStateIsSetCorrectly) { enqueueFillBuffer(); validateMediaVFEState(&pDevice->getHardwareInfo(), cmdMediaVfeState, cmdList, itorMediaVfeState); } HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenArgumentZeroShouldMatchDestAddress) { auto patternAllocation = context.getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), EnqueueFillBufferTraits::patternSize}); enqueueFillBuffer(); // Extract the kernel used auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::FillBuffer, pCmdQ->getClDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; MemObj patternMemObj(&this->context, 0, {}, 0, 0, alignUp(EnqueueFillBufferTraits::patternSize, 4), patternAllocation->getUnderlyingBuffer(), patternAllocation->getUnderlyingBuffer(), GraphicsAllocationHelper::toMultiGraphicsAllocation(patternAllocation), false, false, true); dc.srcMemObj = &patternMemObj; dc.dstMemObj = buffer; dc.dstOffset = {EnqueueFillBufferTraits::offset, 0, 0}; dc.size = {EnqueueFillBufferTraits::size, 0, 0}; MultiDispatchInfo multiDispatchInfo(dc); builder.buildDispatchInfos(multiDispatchInfo); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); // Determine where the argument is auto pArgument = (void **)getStatelessArgumentPointer(kernel->getKernelInfo(), 0u, pCmdQ->getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 0), rootDeviceIndex); EXPECT_EQ((void *)((uintptr_t)buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress()), *pArgument); context.getMemoryManager()->freeGraphicsMemory(patternAllocation); } HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenArgumentTwoShouldMatchPatternPtr) { auto patternAllocation = context.getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), EnqueueFillBufferTraits::patternSize}); enqueueFillBuffer(); // Extract the kernel used auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::FillBuffer, pCmdQ->getClDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; MemObj patternMemObj(&this->context, 0, {}, 0, 0, alignUp(EnqueueFillBufferTraits::patternSize, 4), patternAllocation->getUnderlyingBuffer(), patternAllocation->getUnderlyingBuffer(), GraphicsAllocationHelper::toMultiGraphicsAllocation(patternAllocation), false, false, true); dc.srcMemObj = &patternMemObj; dc.dstMemObj = buffer; dc.dstOffset = {EnqueueFillBufferTraits::offset, 0, 0}; dc.size = {EnqueueFillBufferTraits::size, 0, 0}; MultiDispatchInfo multiDispatchInfo(dc); builder.buildDispatchInfos(multiDispatchInfo); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); // Determine where the argument is auto pArgument = (void **)getStatelessArgumentPointer(kernel->getKernelInfo(), 2u, pCmdQ->getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 0), rootDeviceIndex); EXPECT_NE(nullptr, *pArgument); context.getMemoryManager()->freeGraphicsMemory(patternAllocation); } HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferStatelessThenStatelessKernelIsUsed) { auto patternAllocation = context.getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), EnqueueFillBufferTraits::patternSize}); // Extract the kernel used auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::FillBufferStateless, pCmdQ->getClDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; MemObj patternMemObj(&this->context, 0, {}, 0, 0, alignUp(EnqueueFillBufferTraits::patternSize, 4), patternAllocation->getUnderlyingBuffer(), patternAllocation->getUnderlyingBuffer(), GraphicsAllocationHelper::toMultiGraphicsAllocation(patternAllocation), false, false, true); dc.srcMemObj = &patternMemObj; dc.dstMemObj = buffer; dc.dstOffset = {EnqueueFillBufferTraits::offset, 0, 0}; dc.size = {EnqueueFillBufferTraits::size, 0, 0}; MultiDispatchInfo multiDispatchInfo(dc); builder.buildDispatchInfos(multiDispatchInfo); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); EXPECT_TRUE(kernel->getKernelInfo().kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb()); EXPECT_FALSE(kernel->getKernelInfo().getArgDescriptorAt(0).as().isPureStateful()); context.getMemoryManager()->freeGraphicsMemory(patternAllocation); } HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenPatternShouldBeCopied) { auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); ASSERT_TRUE(csr.getTemporaryAllocations().peekIsEmpty()); EnqueueFillBufferHelper<>::enqueueFillBuffer(pCmdQ, buffer); ASSERT_FALSE(csr.getAllocationsForReuse().peekIsEmpty()); GraphicsAllocation *allocation = csr.getAllocationsForReuse().peekHead(); while (allocation != nullptr) { if ((allocation->getUnderlyingBufferSize() >= sizeof(float)) && (allocation->getUnderlyingBuffer() != nullptr) && (*(static_cast(allocation->getUnderlyingBuffer())) == EnqueueFillBufferHelper<>::Traits::pattern[0]) && (pCmdQ->taskCount == allocation->getTaskCount(csr.getOsContext().getContextId()))) { break; } allocation = allocation->next; } ASSERT_NE(nullptr, allocation); EXPECT_NE(&EnqueueFillBufferHelper<>::Traits::pattern[0], allocation->getUnderlyingBuffer()); } HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenPatternShouldBeAligned) { auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); ASSERT_TRUE(csr.getTemporaryAllocations().peekIsEmpty()); EnqueueFillBufferHelper<>::enqueueFillBuffer(pCmdQ, buffer); ASSERT_TRUE(csr.getTemporaryAllocations().peekIsEmpty()); GraphicsAllocation *allocation = csr.getAllocationsForReuse().peekHead(); while (allocation != nullptr) { if ((allocation->getUnderlyingBufferSize() >= sizeof(float)) && (allocation->getUnderlyingBuffer() != nullptr) && (*(static_cast(allocation->getUnderlyingBuffer())) == EnqueueFillBufferHelper<>::Traits::pattern[0]) && (pCmdQ->taskCount == allocation->getTaskCount(csr.getOsContext().getContextId()))) { break; } allocation = allocation->next; } ASSERT_NE(nullptr, allocation); EXPECT_EQ(alignUp(allocation->getUnderlyingBuffer(), MemoryConstants::cacheLineSize), allocation->getUnderlyingBuffer()); EXPECT_EQ(alignUp(allocation->getUnderlyingBufferSize(), MemoryConstants::cacheLineSize), allocation->getUnderlyingBufferSize()); } HWTEST_F(EnqueueFillBufferCmdTests, WhenFillBufferIsCalledTwiceThenPatternAllocationIsReused) { auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); ASSERT_TRUE(csr.getAllocationsForReuse().peekIsEmpty()); EnqueueFillBufferHelper<>::enqueueFillBuffer(pCmdQ, buffer); ASSERT_FALSE(csr.getAllocationsForReuse().peekIsEmpty()); GraphicsAllocation *allocation = csr.getAllocationsForReuse().peekHead(); EnqueueFillBufferHelper<>::enqueueFillBuffer(pCmdQ, buffer); ASSERT_FALSE(csr.getAllocationsForReuse().peekIsEmpty()); EXPECT_NE(csr.getAllocationsForReuse().peekHead(), nullptr); EXPECT_EQ(allocation, csr.getAllocationsForReuse().peekHead()); EXPECT_EQ(csr.getAllocationsForReuse().peekTail(), allocation); } HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenPatternOfSizeOneByteShouldGetPreparedForMiddleKernel) { auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); ASSERT_TRUE(csr.getAllocationsForReuse().peekIsEmpty()); ASSERT_TRUE(csr.getTemporaryAllocations().peekIsEmpty()); auto dstBuffer = std::unique_ptr(BufferHelper<>::create()); const uint8_t pattern[1] = {0x55}; const size_t patternSize = sizeof(pattern); const size_t offset = 0; const size_t size = 4 * patternSize; const uint8_t output[4] = {0x55, 0x55, 0x55, 0x55}; auto retVal = clEnqueueFillBuffer( pCmdQ, dstBuffer.get(), pattern, patternSize, offset, size, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_FALSE(csr.getAllocationsForReuse().peekIsEmpty()); ASSERT_TRUE(csr.getTemporaryAllocations().peekIsEmpty()); GraphicsAllocation *allocation = csr.getAllocationsForReuse().peekHead(); ASSERT_NE(nullptr, allocation); EXPECT_EQ(0, memcmp(allocation->getUnderlyingBuffer(), output, size)); } HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenPatternOfSizeTwoBytesShouldGetPreparedForMiddleKernel) { auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); ASSERT_TRUE(csr.getAllocationsForReuse().peekIsEmpty()); ASSERT_TRUE(csr.getTemporaryAllocations().peekIsEmpty()); auto dstBuffer = std::unique_ptr(BufferHelper<>::create()); const uint8_t pattern[2] = {0x55, 0xAA}; const size_t patternSize = sizeof(pattern); const size_t offset = 0; const size_t size = 2 * patternSize; const uint8_t output[4] = {0x55, 0xAA, 0x55, 0xAA}; auto retVal = clEnqueueFillBuffer( pCmdQ, dstBuffer.get(), pattern, patternSize, offset, size, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_FALSE(csr.getAllocationsForReuse().peekIsEmpty()); ASSERT_TRUE(csr.getTemporaryAllocations().peekIsEmpty()); GraphicsAllocation *allocation = csr.getAllocationsForReuse().peekHead(); ASSERT_NE(nullptr, allocation); EXPECT_EQ(0, memcmp(allocation->getUnderlyingBuffer(), output, size)); } HWTEST_F(EnqueueFillBufferCmdTests, givenEnqueueFillBufferWhenPatternAllocationIsObtainedThenItsTypeShouldBeSetToFillPattern) { auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); ASSERT_TRUE(csr.getTemporaryAllocations().peekIsEmpty()); auto dstBuffer = std::unique_ptr(BufferHelper<>::create()); const uint8_t pattern[1] = {0x55}; const size_t patternSize = sizeof(pattern); const size_t offset = 0; const size_t size = patternSize; auto retVal = clEnqueueFillBuffer( pCmdQ, dstBuffer.get(), pattern, patternSize, offset, size, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_FALSE(csr.getAllocationsForReuse().peekIsEmpty()); GraphicsAllocation *patternAllocation = csr.getAllocationsForReuse().peekHead(); ASSERT_NE(nullptr, patternAllocation); EXPECT_EQ(AllocationType::FILL_PATTERN, patternAllocation->getAllocationType()); } struct EnqueueFillBufferHw : public ::testing::Test { void SetUp() override { if (is32bit) { GTEST_SKIP(); } device = std::make_unique(MockClDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); context.reset(new MockContext(device.get())); } std::unique_ptr device; std::unique_ptr context; const uint8_t pattern[1] = {0x55}; const size_t patternSize = sizeof(pattern); const size_t offset = 0; const size_t size = patternSize; MockBuffer dstBuffer; uint64_t bigSize = 4ull * MemoryConstants::gigaByte; uint64_t smallSize = 4ull * MemoryConstants::gigaByte - 1; }; using EnqueueFillBufferStatelessTest = EnqueueFillBufferHw; HWTEST_F(EnqueueFillBufferStatelessTest, givenBuffersWhenFillingBufferStatelessThenSuccessIsReturned) { auto pCmdQ = std::make_unique>(context.get(), device.get()); dstBuffer.size = static_cast(bigSize); auto retVal = pCmdQ->enqueueFillBuffer( &dstBuffer, pattern, patternSize, offset, size, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); } using EnqueueFillBufferStatefullTest = EnqueueFillBufferHw; HWTEST_F(EnqueueFillBufferStatefullTest, givenBuffersWhenFillingBufferStatefullThenSuccessIsReturned) { auto pCmdQ = std::make_unique>(context.get(), device.get()); dstBuffer.size = static_cast(smallSize); auto retVal = pCmdQ->enqueueFillBuffer( &dstBuffer, pattern, patternSize, offset, size, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_fill_image_fixture.h000066400000000000000000000024521422164147700322120ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/common/test_macros/test.h" #include "shared/test/common/test_macros/test_checks_shared.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" namespace NEO { struct EnqueueFillImageTestFixture : public CommandEnqueueFixture { void SetUp(void) override { REQUIRE_IMAGES_OR_SKIP(defaultHwInfo); CommandEnqueueFixture::SetUp(); context = new MockContext(pClDevice); image = Image2dHelper<>::create(context); } void TearDown(void) override { if (testing::Test::IsSkipped()) { return; } delete image; delete context; CommandEnqueueFixture::TearDown(); } protected: template void enqueueFillImage() { auto retVal = EnqueueFillImageHelper<>::enqueueFillImage(pCmdQ, image); EXPECT_EQ(CL_SUCCESS, retVal); parseCommands(*pCmdQ); } MockContext *context = nullptr; Image *image = nullptr; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_fill_image_tests.cpp000066400000000000000000000320651422164147700322240ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/mocks/mock_gmm_resource_info.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/helpers/convert_color.h" #include "opencl/test/unit_test/command_queue/enqueue_fill_image_fixture.h" #include "opencl/test/unit_test/gen_common/gen_commands_common_validation.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "reg_configs_common.h" #include using namespace NEO; class EnqueueFillImageTest : public EnqueueFillImageTestFixture, public ::testing::Test { public: void SetUp(void) override { EnqueueFillImageTestFixture::SetUp(); } void TearDown(void) override { EnqueueFillImageTestFixture::TearDown(); } }; HWTEST_F(EnqueueFillImageTest, WhenFillingImageThenTaskCountIsAlignedWithCsr) { //this test case assumes IOQ auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = pCmdQ->taskCount + 100; csr.taskLevel = pCmdQ->taskLevel + 50; EnqueueFillImageHelper<>::enqueueFillImage(pCmdQ, image); EXPECT_EQ(csr.peekTaskCount(), pCmdQ->taskCount); EXPECT_EQ(csr.peekTaskLevel(), pCmdQ->taskLevel + 1); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueFillImageTest, WhenFillingImageThenGpgpuWalkerIsCorrect) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; enqueueFillImage(); auto *cmd = reinterpret_cast(cmdWalker); ASSERT_NE(nullptr, cmd); // Verify GPGPU_WALKER parameters EXPECT_NE(0u, cmd->getThreadGroupIdXDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdYDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdZDimension()); EXPECT_NE(0u, cmd->getRightExecutionMask()); EXPECT_NE(0u, cmd->getBottomExecutionMask()); EXPECT_EQ(GPGPU_WALKER::SIMD_SIZE_SIMD32, cmd->getSimdSize()); EXPECT_NE(0u, cmd->getIndirectDataLength()); EXPECT_FALSE(cmd->getIndirectParameterEnable()); // Compute the SIMD lane mask size_t simd = cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD32 ? 32 : cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD16 ? 16 : 8; uint64_t simdMask = maxNBitValue(simd); // Mask off lanes based on the execution masks auto laneMaskRight = cmd->getRightExecutionMask() & simdMask; auto lanesPerThreadX = 0; while (laneMaskRight) { lanesPerThreadX += laneMaskRight & 1; laneMaskRight >>= 1; } } HWTEST_F(EnqueueFillImageTest, WhenFillingImageThenTaskLevelIsIncremented) { auto taskLevelBefore = pCmdQ->taskLevel; EnqueueFillImageHelper<>::enqueueFillImage(pCmdQ, image); EXPECT_GT(pCmdQ->taskLevel, taskLevelBefore); } HWTEST_F(EnqueueFillImageTest, WhenFillingImageThenCommandsAreAdded) { auto usedCmdBufferBefore = pCS->getUsed(); EnqueueFillImageHelper<>::enqueueFillImage(pCmdQ, image); EXPECT_NE(usedCmdBufferBefore, pCS->getUsed()); } HWTEST_F(EnqueueFillImageTest, GivenGpuHangAndBlockingCallWhenFillingImageThenOutOfResourcesIsReturned) { DebugManagerStateRestore stateRestore; DebugManager.flags.MakeEachEnqueueBlocking.set(true); std::unique_ptr device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment(nullptr)}); cl_queue_properties props = {}; MockCommandQueueHw mockCommandQueueHw(context, device.get(), &props); mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang; const auto enqueueResult = EnqueueFillImageHelper<>::enqueueFillImage(&mockCommandQueueHw, image); EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueResult); EXPECT_EQ(1, mockCommandQueueHw.waitForAllEnginesCalledCount); } HWTEST_F(EnqueueFillImageTest, WhenFillingImageThenIndirectDataGetsAdded) { auto dshBefore = pDSH->getUsed(); auto iohBefore = pIOH->getUsed(); auto sshBefore = pSSH->getUsed(); const auto enqueueResult = EnqueueFillImageHelper<>::enqueueFillImage(pCmdQ, image); EXPECT_EQ(CL_SUCCESS, enqueueResult); EXPECT_TRUE(UnitTestHelper::evaluateDshUsage(dshBefore, pDSH->getUsed(), nullptr, rootDeviceIndex)); EXPECT_NE(iohBefore, pIOH->getUsed()); EXPECT_NE(sshBefore, pSSH->getUsed()); } HWTEST_F(EnqueueFillImageTest, WhenFillingImageThenL3ProgrammingIsCorrect) { enqueueFillImage(); validateL3Programming(cmdList, itorWalker); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueFillImageTest, WhenEnqueueIsDoneThenStateBaseAddressIsProperlyProgrammed) { enqueueFillImage(); auto &ultCsr = this->pDevice->getUltCommandStreamReceiver(); auto &hwHelper = HwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily); validateStateBaseAddress(ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex, pIOH->getGraphicsAllocation()->isAllocatedInLocalMemoryPool()), ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex, !hwHelper.useSystemMemoryPlacementForISA(pDevice->getHardwareInfo())), pDSH, pIOH, pSSH, itorPipelineSelect, itorWalker, cmdList, 0llu); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueFillImageTest, WhenFillingImageThenMediaInterfaceDescriptorLoadIsCorrect) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueFillImage(); // All state should be programmed before walker auto cmd = reinterpret_cast(cmdMediaInterfaceDescriptorLoad); ASSERT_NE(nullptr, cmd); // Verify we have a valid length -- multiple of INTERFACE_DESCRIPTOR_DATAs EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % sizeof(INTERFACE_DESCRIPTOR_DATA)); // Validate the start address size_t alignmentStartAddress = 64 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorDataStartAddress() % alignmentStartAddress); // Validate the length EXPECT_NE(0u, cmd->getInterfaceDescriptorTotalLength()); size_t alignmentTotalLength = 32 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % alignmentTotalLength); // Generically validate this command FamilyType::PARSE::template validateCommand(cmdList.begin(), itorMediaInterfaceDescriptorLoad); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueFillImageTest, WhenFillingImageThenInterfaceDescriptorDataIsCorrect) { typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueFillImage(); // Extract the interfaceDescriptorData auto cmdSBA = (STATE_BASE_ADDRESS *)cmdStateBaseAddress; auto &interfaceDescriptorData = *(INTERFACE_DESCRIPTOR_DATA *)cmdInterfaceDescriptorData; // Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value. auto kernelStartPointer = ((uint64_t)interfaceDescriptorData.getKernelStartPointerHigh() << 32) + interfaceDescriptorData.getKernelStartPointer(); EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize); size_t maxLocalSize = 256u; auto localWorkSize = std::min(maxLocalSize, Image2dDefaults::imageDesc.image_width * Image2dDefaults::imageDesc.image_height); auto simd = 32u; auto numThreadsPerThreadGroup = Math::divideAndRoundUp(localWorkSize, simd); EXPECT_EQ(numThreadsPerThreadGroup, interfaceDescriptorData.getNumberOfThreadsInGpgpuThreadGroup()); EXPECT_NE(0u, interfaceDescriptorData.getCrossThreadConstantDataReadLength()); EXPECT_NE(0u, interfaceDescriptorData.getConstantIndirectUrbEntryReadLength()); // We shouldn't have these pointers the same. EXPECT_NE(kernelStartPointer, interfaceDescriptorData.getBindingTablePointer()); } HWTEST_F(EnqueueFillImageTest, WhenFillingImageThenSurfaceStateIsCorrect) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto mockCmdQ = std::make_unique>(context, pClDevice, nullptr); VariableBackup cmdQBackup(&pCmdQ, mockCmdQ.get()); mockCmdQ->storeMultiDispatchInfo = true; enqueueFillImage(); const auto &kernelInfo = mockCmdQ->storedMultiDispatchInfo.begin()->getKernel()->getKernelInfo(); uint32_t index = static_cast(kernelInfo.getArgDescriptorAt(0).template as().bindful) / sizeof(RENDER_SURFACE_STATE); const auto &surfaceState = getSurfaceState(&pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), index); const auto &imageDesc = image->getImageDesc(); EXPECT_EQ(imageDesc.image_width, surfaceState.getWidth()); EXPECT_EQ(imageDesc.image_height, surfaceState.getHeight()); EXPECT_NE(0u, surfaceState.getSurfacePitch()); EXPECT_NE(0u, surfaceState.getSurfaceType()); EXPECT_EQ(MockGmmResourceInfo::getHAlignSurfaceStateResult, surfaceState.getSurfaceHorizontalAlignment()); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4, surfaceState.getSurfaceVerticalAlignment()); EXPECT_EQ(image->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress(), surfaceState.getSurfaceBaseAddress()); } HWTEST_F(EnqueueFillImageTest, WhenFillingImageThenNumberOfPipelineSelectsIsOne) { enqueueFillImage(); int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); EXPECT_EQ(1, numCommands); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueFillImageTest, WhenFillingImageThenMediaVfeStateIsSetCorrectly) { enqueueFillImage(); validateMediaVFEState(&pDevice->getHardwareInfo(), cmdMediaVfeState, cmdList, itorMediaVfeState); } TEST_F(EnqueueFillImageTest, givenSrgbFormatWhenConvertingThenUseNormalizingFactor) { float *fillColor; int iFillColor[4] = {0}; float LessThanZeroArray[4] = {-1.0f, -1.0f, -1.0f, 1.0f}; float MoreThanOneArray[4] = {2.0f, 2.0f, 2.0f, 1.0f}; float NaNArray[4] = {NAN, NAN, NAN, 1.0f}; float distance; cl_image_format oldImageFormat = {CL_sRGBA, CL_UNORM_INT8}; cl_image_format newImageFormat = {CL_RGBA, CL_UNSIGNED_INT8}; fillColor = LessThanZeroArray; convertFillColor(static_cast(fillColor), iFillColor, oldImageFormat, newImageFormat); for (int i = 0; i < 3; i++) { distance = std::fabs(0.0f - static_cast(iFillColor[i])); EXPECT_GE(0.6f, distance); } EXPECT_EQ(255, iFillColor[3]); fillColor = MoreThanOneArray; convertFillColor(static_cast(fillColor), iFillColor, oldImageFormat, newImageFormat); for (int i = 0; i < 3; i++) { distance = std::fabs(255.0f - static_cast(iFillColor[i])); EXPECT_GE(0.6f, distance); } EXPECT_EQ(255, iFillColor[3]); fillColor = NaNArray; convertFillColor(static_cast(fillColor), iFillColor, oldImageFormat, newImageFormat); for (int i = 0; i < 3; i++) { distance = std::fabs(0.0f - static_cast(iFillColor[i])); EXPECT_GE(0.6f, distance); } EXPECT_EQ(255, iFillColor[3]); } TEST(ColorConvertTest, givenSnorm8FormatWhenConvertingThenUseNormalizingFactor) { float fFillColor[4] = {0.3f, -0.3f, 0.0f, 1.0f}; int32_t iFillColor[4] = {}; int32_t expectedIFillColor[4] = {}; cl_image_format oldFormat = {CL_R, CL_SNORM_INT8}; cl_image_format newFormat = {CL_R, CL_UNSIGNED_INT8}; auto normalizingFactor = selectNormalizingFactor(oldFormat.image_channel_data_type); for (size_t i = 0; i < 4; i++) { expectedIFillColor[i] = static_cast(normalizingFactor * fFillColor[i]); expectedIFillColor[i] = expectedIFillColor[i] & 0xFF; } convertFillColor(static_cast(fFillColor), iFillColor, oldFormat, newFormat); EXPECT_TRUE(memcmp(expectedIFillColor, iFillColor, 4 * sizeof(int32_t)) == 0); } TEST(ColorConvertTest, givenSnorm16FormatWhenConvertingThenUseNormalizingFactor) { float fFillColor[4] = {0.3f, -0.3f, 0.0f, 1.0f}; int32_t iFillColor[4] = {}; int32_t expectedIFillColor[4] = {}; cl_image_format oldFormat = {CL_R, CL_SNORM_INT16}; cl_image_format newFormat = {CL_R, CL_UNSIGNED_INT16}; auto normalizingFactor = selectNormalizingFactor(oldFormat.image_channel_data_type); for (size_t i = 0; i < 4; i++) { expectedIFillColor[i] = static_cast(normalizingFactor * fFillColor[i]); expectedIFillColor[i] = expectedIFillColor[i] & 0xFFFF; } convertFillColor(static_cast(fFillColor), iFillColor, oldFormat, newFormat); EXPECT_TRUE(memcmp(expectedIFillColor, iFillColor, 4 * sizeof(int32_t)) == 0); } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_fixture.cpp000066400000000000000000000141041422164147700303720ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "shared/source/helpers/ptr_math.h" // clang-format off // EnqueueTraits using namespace NEO; cl_uint EnqueueTraits::numEventsInWaitList = 0; const cl_event *EnqueueTraits::eventWaitList = nullptr; cl_event *EnqueueTraits::event = nullptr; static const auto negOne = static_cast(-1); static int ptrOutputContent[16] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }; static auto ptrOutput = (void *)ptrOutputContent; // EnqueueCopyBufferTraits const size_t EnqueueCopyBufferTraits::srcOffset = 0; const size_t EnqueueCopyBufferTraits::dstOffset = 0; const size_t EnqueueCopyBufferTraits::size = negOne; cl_command_type EnqueueCopyBufferTraits::cmdType = CL_COMMAND_COPY_BUFFER; // EnqueueCopyBufferToImageTraits const size_t EnqueueCopyBufferToImageTraits::srcOffset = 0; const size_t EnqueueCopyBufferToImageTraits::dstOrigin[3] = { 0, 0, 0}; const size_t EnqueueCopyBufferToImageTraits::region[3] = {negOne, negOne, negOne}; cl_command_type EnqueueCopyBufferToImageTraits::cmdType = CL_COMMAND_COPY_BUFFER_TO_IMAGE; // EnqueueCopyImageToBufferTraits const size_t EnqueueCopyImageToBufferTraits::srcOrigin[3] = { 0, 0, 0}; const size_t EnqueueCopyImageToBufferTraits::region[3] = {negOne, negOne, negOne}; const size_t EnqueueCopyImageToBufferTraits::dstOffset = 0; cl_command_type EnqueueCopyImageToBufferTraits::cmdType = CL_COMMAND_COPY_IMAGE_TO_BUFFER; // EnqueueCopyImageTraits const size_t EnqueueCopyImageTraits::region[3] = {negOne, negOne, negOne}; const size_t EnqueueCopyImageTraits::srcOrigin[3] = { 0, 0, 0}; const size_t EnqueueCopyImageTraits::dstOrigin[3] = { 0, 0, 0}; cl_command_type EnqueueCopyImageTraits::cmdType = CL_COMMAND_COPY_IMAGE; // EnqueueFillBufferTraits const float EnqueueFillBufferTraits::pattern[1] = {1.2345f}; const size_t EnqueueFillBufferTraits::patternSize = sizeof(pattern); const size_t EnqueueFillBufferTraits::offset = 0; const size_t EnqueueFillBufferTraits::size = 2 * patternSize; cl_command_type EnqueueFillBufferTraits::cmdType = CL_COMMAND_FILL_BUFFER; // EnqueueFillImageTraits const float EnqueueFillImageTraits::fillColor[4] = { 1.0f, 2.0f, 3.0f, 4.0f}; const size_t EnqueueFillImageTraits::origin[3] = { 0, 0, 0}; const size_t EnqueueFillImageTraits::region[3] = {negOne, negOne, negOne}; cl_command_type EnqueueFillImageTraits::cmdType = CL_COMMAND_COPY_IMAGE; // EnqueueKernelTraits const cl_uint EnqueueKernelTraits::workDim = 1; const size_t EnqueueKernelTraits::globalWorkOffset[3] = {0, 0, 0}; const size_t EnqueueKernelTraits::globalWorkSize[3] = {1, 1, 1}; const size_t *EnqueueKernelTraits::localWorkSize = nullptr; cl_command_type EnqueueKernelTraits::cmdType = CL_COMMAND_NDRANGE_KERNEL; // EnqueueMapBufferTraits const cl_bool EnqueueMapBufferTraits::blocking = CL_TRUE; const cl_mem_flags EnqueueMapBufferTraits::flags = CL_MAP_WRITE; const size_t EnqueueMapBufferTraits::offset = 0; const size_t EnqueueMapBufferTraits::sizeInBytes = negOne; cl_int *EnqueueMapBufferTraits::errcodeRet = nullptr; cl_command_type EnqueueMapBufferTraits::cmdType = CL_COMMAND_MAP_BUFFER; // EnqueueReadBufferTraits const cl_bool EnqueueReadBufferTraits::blocking = CL_TRUE; const size_t EnqueueReadBufferTraits::offset = 0; const size_t EnqueueReadBufferTraits::sizeInBytes = negOne; void *EnqueueReadBufferTraits::hostPtr = ptrOutput; cl_command_type EnqueueReadBufferTraits::cmdType = CL_COMMAND_READ_BUFFER; GraphicsAllocation *EnqueueReadBufferTraits::mapAllocation = nullptr; // EnqueueReadImageTraits const cl_bool EnqueueReadImageTraits::blocking = CL_TRUE; const size_t EnqueueReadImageTraits::origin[3] = {0, 0, 0}; const size_t EnqueueReadImageTraits::region[3] = {2, 2, 1}; const size_t EnqueueReadImageTraits::rowPitch = 0; const size_t EnqueueReadImageTraits::slicePitch = 0; void *EnqueueReadImageTraits::hostPtr = ptrOutput; cl_command_type EnqueueReadImageTraits::cmdType = CL_COMMAND_READ_IMAGE; GraphicsAllocation *EnqueueReadImageTraits::mapAllocation = nullptr; // EnqueueWriteBufferTraits const bool EnqueueWriteBufferTraits::zeroCopy = true; const cl_bool EnqueueWriteBufferTraits::blocking = CL_TRUE; const size_t EnqueueWriteBufferTraits::offset = 0; const size_t EnqueueWriteBufferTraits::sizeInBytes = negOne; void *EnqueueWriteBufferTraits::hostPtr = ptrGarbage; cl_command_type EnqueueWriteBufferTraits::cmdType = CL_COMMAND_WRITE_BUFFER; GraphicsAllocation *EnqueueWriteBufferTraits::mapAllocation = nullptr; // EnqueueWriteBufferRectTraits const bool EnqueueWriteBufferRectTraits::zeroCopy = true; const cl_bool EnqueueWriteBufferRectTraits::blocking = CL_TRUE; const size_t EnqueueWriteBufferRectTraits::bufferOrigin[3] = { 0, 0, 0 }; const size_t EnqueueWriteBufferRectTraits::hostOrigin[3] = { 0, 0, 0 }; const size_t EnqueueWriteBufferRectTraits::region[3] = { 2, 2, 1 }; size_t EnqueueWriteBufferRectTraits::bufferRowPitch = 0; size_t EnqueueWriteBufferRectTraits::bufferSlicePitch = 0; size_t EnqueueWriteBufferRectTraits::hostRowPitch = 0; size_t EnqueueWriteBufferRectTraits::hostSlicePitch = 0; void *EnqueueWriteBufferRectTraits::hostPtr = ptrGarbage; cl_command_type EnqueueWriteBufferRectTraits::cmdType = CL_COMMAND_WRITE_BUFFER_RECT; // EnqueueWriteImageTraits const cl_bool EnqueueWriteImageTraits::blocking = CL_TRUE; const size_t EnqueueWriteImageTraits::origin[3] = {0, 0, 0}; const size_t EnqueueWriteImageTraits::region[3] = {2, 2, 1}; const size_t EnqueueWriteImageTraits::rowPitch = 0; const size_t EnqueueWriteImageTraits::slicePitch = 0; void *EnqueueWriteImageTraits::hostPtr = ptrGarbage; cl_command_type EnqueueWriteImageTraits::cmdType = CL_COMMAND_WRITE_IMAGE; GraphicsAllocation *EnqueueWriteImageTraits::mapAllocation = nullptr; // clang-format on compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_fixture.h000066400000000000000000000716611422164147700300520ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/memory_manager/graphics_allocation.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/event/user_event.h" #include "opencl/source/kernel/kernel.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "CL/cl.h" #include struct EnqueueTraits { static cl_uint numEventsInWaitList; static const cl_event *eventWaitList; static cl_event *event; }; struct EnqueueCopyBufferTraits : public EnqueueTraits { static const size_t srcOffset; static const size_t dstOffset; static const size_t size; static cl_command_type cmdType; }; template struct EnqueueCopyBufferHelper { typedef T Traits; using Buffer = NEO::Buffer; using CommandQueue = NEO::CommandQueue; static cl_int enqueueCopyBuffer(CommandQueue *pCmdQ, Buffer *srcBuffer = std::unique_ptr(BufferHelper<>::create()).get(), Buffer *dstBuffer = std::unique_ptr(BufferHelper<>::create()).get(), size_t srcOffset = Traits::srcOffset, size_t dstOffset = Traits::dstOffset, size_t size = Traits::size, cl_uint numEventsInWaitList = Traits::numEventsInWaitList, const cl_event *eventWaitList = Traits::eventWaitList, cl_event *event = Traits::event) { cl_int retVal = pCmdQ->enqueueCopyBuffer( srcBuffer, dstBuffer, srcOffset, dstOffset, size, numEventsInWaitList, eventWaitList, event); return retVal; } static cl_int enqueue(CommandQueue *pCmdQ, void *placeholder = nullptr) { return enqueueCopyBuffer(pCmdQ); } }; struct EnqueueCopyBufferToImageTraits : public EnqueueTraits { static const size_t srcOffset; static const size_t dstOrigin[3]; static const size_t region[3]; static cl_command_type cmdType; }; template struct EnqueueCopyBufferToImageHelper { typedef T Traits; using Buffer = NEO::Buffer; using Image = NEO::Image; using CommandQueue = NEO::CommandQueue; static cl_int enqueueCopyBufferToImage(CommandQueue *pCmdQ, Buffer *srcBuffer = std::unique_ptr(BufferHelper<>::create()).get(), Image *dstImage = nullptr, const size_t srcOffset = Traits::srcOffset, const size_t dstOrigin[3] = Traits::dstOrigin, const size_t region[3] = Traits::region, cl_uint numEventsInWaitList = Traits::numEventsInWaitList, const cl_event *eventWaitList = Traits::eventWaitList, cl_event *event = Traits::event) { auto &context = pCmdQ->getContext(); std::unique_ptr dstImageDelete(dstImage ? nullptr : Image2dHelper<>::create(&context)); dstImage = dstImage ? dstImage : dstImageDelete.get(); size_t regionOut[3] = { region[0] == static_cast(-1) ? dstImage->getImageDesc().image_width : region[0], region[1] == static_cast(-1) ? dstImage->getImageDesc().image_height : region[1], region[2] == static_cast(-1) ? (dstImage->getImageDesc().image_depth > 0 ? dstImage->getImageDesc().image_depth : 1) : region[2], }; return pCmdQ->enqueueCopyBufferToImage(srcBuffer, dstImage, srcOffset, dstOrigin, regionOut, numEventsInWaitList, eventWaitList, event); } static cl_int enqueue(CommandQueue *pCmdQ, void *placeholder = nullptr) { return enqueueCopyBufferToImage(pCmdQ); } }; struct EnqueueCopyImageToBufferTraits : public EnqueueTraits { static const size_t srcOrigin[3]; static const size_t region[3]; static const size_t dstOffset; static cl_command_type cmdType; }; template struct EnqueueCopyImageToBufferHelper { typedef T Traits; using Buffer = NEO::Buffer; using Image = NEO::Image; using CommandQueue = NEO::CommandQueue; static cl_int enqueueCopyImageToBuffer(CommandQueue *pCmdQ, Image *srcImage = nullptr, Buffer *dstBuffer = std::unique_ptr(BufferHelper<>::create()).get(), const size_t srcOrigin[3] = Traits::srcOrigin, const size_t region[3] = Traits::region, const size_t dstOffset = Traits::dstOffset, cl_uint numEventsInWaitList = Traits::numEventsInWaitList, const cl_event *eventWaitList = Traits::eventWaitList, cl_event *event = Traits::event) { auto &context = pCmdQ->getContext(); std::unique_ptr srcImageDelete(srcImage ? nullptr : Image2dHelper<>::create(&context)); srcImage = srcImage ? srcImage : srcImageDelete.get(); size_t regionIn[3] = { region[0] == static_cast(-1) ? srcImage->getImageDesc().image_width : region[0], region[1] == static_cast(-1) ? srcImage->getImageDesc().image_height : region[1], region[2] == static_cast(-1) ? (srcImage->getImageDesc().image_depth > 0 ? srcImage->getImageDesc().image_depth : 1) : region[2], }; return pCmdQ->enqueueCopyImageToBuffer(srcImage, dstBuffer, srcOrigin, regionIn, dstOffset, numEventsInWaitList, eventWaitList, event); } static cl_int enqueue(CommandQueue *pCmdQ, void *placeholder = nullptr) { return enqueueCopyImageToBuffer(pCmdQ); } }; struct EnqueueCopyImageTraits : public EnqueueTraits { static const size_t srcOrigin[3]; static const size_t dstOrigin[3]; static const size_t region[3]; static cl_command_type cmdType; }; template struct EnqueueCopyImageHelper { typedef T Traits; using Image = NEO::Image; using CommandQueue = NEO::CommandQueue; static cl_int enqueueCopyImage(CommandQueue *pCmdQ, Image *srcImage = nullptr, Image *dstImage = nullptr, const size_t *srcOrigin = Traits::srcOrigin, const size_t *dstOrigin = Traits::dstOrigin, const size_t *region = Traits::region, cl_uint numEventsInWaitList = Traits::numEventsInWaitList, const cl_event *eventWaitList = Traits::eventWaitList, cl_event *event = Traits::event) { auto &context = pCmdQ->getContext(); std::unique_ptr srcImageDelete(srcImage ? nullptr : Image2dHelper<>::create(&context)); std::unique_ptr dstImageDelete(dstImage ? nullptr : Image2dHelper<>::create(&context)); srcImage = srcImage ? srcImage : srcImageDelete.get(); dstImage = dstImage ? dstImage : dstImageDelete.get(); size_t regionOut[3] = { region[0] == static_cast(-1) ? srcImage->getImageDesc().image_width : region[0], region[1] == static_cast(-1) ? srcImage->getImageDesc().image_height : region[1], region[2] == static_cast(-1) ? (srcImage->getImageDesc().image_depth > 0 ? srcImage->getImageDesc().image_depth : 1) : region[2], }; return pCmdQ->enqueueCopyImage(srcImage, dstImage, srcOrigin, dstOrigin, regionOut, numEventsInWaitList, eventWaitList, event); } static cl_int enqueue(CommandQueue *pCmdQ, void *placeholder = nullptr) { return enqueueCopyImage(pCmdQ); } }; struct EnqueueFillBufferTraits : public EnqueueTraits { static const float pattern[1]; static const size_t patternSize; static const size_t offset; static const size_t size; static cl_command_type cmdType; }; template struct EnqueueFillBufferHelper { typedef T Traits; using Buffer = NEO::Buffer; using CommandQueue = NEO::CommandQueue; static cl_int enqueueFillBuffer(CommandQueue *pCmdQ, Buffer *buffer = std::unique_ptr(BufferHelper<>::create()).get(), cl_uint numEventsInWaitList = Traits::numEventsInWaitList, const cl_event *eventWaitList = Traits::eventWaitList, cl_event *event = Traits::event) { return pCmdQ->enqueueFillBuffer(buffer, Traits::pattern, Traits::patternSize, Traits::offset, Traits::size, numEventsInWaitList, eventWaitList, event); } static cl_int enqueue(CommandQueue *pCmdQ, void *placeholder = nullptr) { return enqueueFillBuffer(pCmdQ); } }; struct EnqueueFillImageTraits : public EnqueueTraits { static const float fillColor[4]; static const size_t origin[3]; static const size_t region[3]; static cl_command_type cmdType; }; template struct EnqueueFillImageHelper { typedef T Traits; using Image = NEO::Image; using CommandQueue = NEO::CommandQueue; static cl_int enqueueFillImage(CommandQueue *pCmdQ, Image *image = nullptr, const void *fillColor = Traits::fillColor, const size_t *origin = Traits::origin, const size_t *region = Traits::region, cl_uint numEventsInWaitList = Traits::numEventsInWaitList, const cl_event *eventWaitList = Traits::eventWaitList, cl_event *event = Traits::event) { auto &context = pCmdQ->getContext(); std::unique_ptr imageDelete(image ? nullptr : Image2dHelper<>::create(&context)); image = image ? image : imageDelete.get(); size_t regionOut[3] = { region[0] == static_cast(-1) ? image->getImageDesc().image_width : region[0], region[1] == static_cast(-1) ? image->getImageDesc().image_height : region[1], region[2] == static_cast(-1) ? (image->getImageDesc().image_depth > 0 ? image->getImageDesc().image_depth : 1) : region[2], }; cl_int retVal = pCmdQ->enqueueFillImage(image, fillColor, origin, regionOut, numEventsInWaitList, eventWaitList, event); return retVal; } static cl_int enqueue(CommandQueue *pCmdQ, void *placeholder = nullptr) { return enqueueFillImage(pCmdQ); } }; struct EnqueueKernelTraits : public EnqueueTraits { static const cl_uint workDim; static const size_t globalWorkOffset[3]; static const size_t globalWorkSize[3]; static const size_t *localWorkSize; static cl_command_type cmdType; }; template struct EnqueueKernelHelper { typedef T Traits; using CommandQueue = NEO::CommandQueue; using Kernel = NEO::Kernel; static cl_int enqueueKernel(CommandQueue *pCmdQ, Kernel *kernel, cl_uint workDim = Traits::workDim, const size_t *globalWorkOffset = Traits::globalWorkOffset, const size_t *globalWorkSize = Traits::globalWorkSize, const size_t *localWorkSize = Traits::localWorkSize, cl_uint numEventsInWaitList = Traits::numEventsInWaitList, const cl_event *eventWaitList = Traits::eventWaitList, cl_event *event = Traits::event) { return pCmdQ->enqueueKernel(kernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); } }; struct EnqueueMapBufferTraits : public EnqueueTraits { static const cl_bool blocking; static const cl_mem_flags flags; static const size_t offset; static const size_t sizeInBytes; static cl_int *errcodeRet; static cl_command_type cmdType; }; template struct EnqueueMapBufferHelper { typedef T Traits; using Buffer = NEO::Buffer; using CommandQueue = NEO::CommandQueue; static void *enqueueMapBuffer(CommandQueue *pCmdQ, Buffer *buffer = std::unique_ptr(BufferHelper<>::create()).get(), cl_bool blockingMap = Traits::blocking, cl_mem_flags flags = Traits::flags, size_t offset = Traits::offset, size_t size = Traits::sizeInBytes, cl_uint numEventsInWaitList = Traits::numEventsInWaitList, const cl_event *eventWaitList = Traits::eventWaitList, cl_event *event = Traits::event, cl_int *errcodeRet = Traits::errcodeRet) { size = size == static_cast(-1) ? buffer->getSize() : size; auto retCode = CL_SUCCESS; auto retPtr = pCmdQ->enqueueMapBuffer(buffer, blockingMap, flags, offset, size, numEventsInWaitList, eventWaitList, event, retCode); if (errcodeRet) { *errcodeRet = retCode; } return retPtr; } static cl_int enqueue(CommandQueue *pCmdQ, Buffer *buffer = nullptr) { auto retVal = CL_SUCCESS; enqueueMapBuffer(pCmdQ, buffer ? buffer : std::unique_ptr(BufferHelper<>::create()).get(), Traits::blocking, Traits::flags, Traits::offset, Traits::sizeInBytes, Traits::numEventsInWaitList, Traits::eventWaitList, Traits::event, &retVal); return retVal; } }; struct EnqueueReadBufferTraits : public EnqueueTraits { static const cl_bool blocking; static const size_t offset; static const size_t sizeInBytes; static void *hostPtr; static cl_command_type cmdType; static NEO::GraphicsAllocation *mapAllocation; }; template struct EnqueueReadBufferHelper { typedef T Traits; using Buffer = NEO::Buffer; using CommandQueue = NEO::CommandQueue; static cl_int enqueueReadBuffer(CommandQueue *pCmdQ, Buffer *buffer = std::unique_ptr(BufferHelper<>::create()).get(), cl_bool blockingRead = Traits::blocking, size_t offset = Traits::offset, size_t size = Traits::sizeInBytes, void *ptr = Traits::hostPtr, NEO::GraphicsAllocation *mapAllocation = Traits::mapAllocation, cl_uint numEventsInWaitList = Traits::numEventsInWaitList, const cl_event *eventWaitList = Traits::eventWaitList, cl_event *event = Traits::event) { size = size == static_cast(-1) ? buffer->getSize() : size; cl_int retVal = pCmdQ->enqueueReadBuffer(buffer, blockingRead, offset, size, ptr, mapAllocation, numEventsInWaitList, eventWaitList, event); return retVal; } static cl_int enqueue(CommandQueue *pCmdQ) { return enqueueReadBuffer(pCmdQ); } static cl_int enqueue(CommandQueue *pCmdQ, Buffer *buffer) { return enqueueReadBuffer(pCmdQ, buffer); } }; struct EnqueueReadImageTraits : public EnqueueTraits { static const cl_bool blocking; static const size_t origin[3]; static const size_t region[3]; static const size_t rowPitch; static const size_t slicePitch; static void *hostPtr; static cl_command_type cmdType; static NEO::GraphicsAllocation *mapAllocation; }; template struct EnqueueReadImageHelper { typedef T Traits; using Image = NEO::Image; using CommandQueue = NEO::CommandQueue; static cl_int enqueueReadImage(CommandQueue *pCmdQ, Image *image = nullptr, cl_bool blockingRead = Traits::blocking, const size_t *origin = Traits::origin, const size_t *region = Traits::region, size_t rowPitch = Traits::rowPitch, size_t slicePitch = Traits::slicePitch, void *ptr = Traits::hostPtr, NEO::GraphicsAllocation *mapAllocation = Traits::mapAllocation, cl_uint numEventsInWaitList = Traits::numEventsInWaitList, const cl_event *eventWaitList = Traits::eventWaitList, cl_event *event = Traits::event) { auto &context = pCmdQ->getContext(); std::unique_ptr imageDelete(image ? nullptr : Image2dHelper<>::create(&context)); image = image ? image : imageDelete.get(); size_t regionOut[3] = { region[0] == static_cast(-1) ? image->getImageDesc().image_width : region[0], region[1] == static_cast(-1) ? image->getImageDesc().image_height : region[1], region[2] == static_cast(-1) ? (image->getImageDesc().image_depth > 0 ? image->getImageDesc().image_depth : 1) : region[2], }; return pCmdQ->enqueueReadImage(image, blockingRead, origin, regionOut, rowPitch, slicePitch, ptr, mapAllocation, numEventsInWaitList, eventWaitList, event); } static cl_int enqueue(CommandQueue *pCmdQ, void *placeholder = nullptr) { return enqueueReadImage(pCmdQ); } }; struct EnqueueWriteBufferTraits : public EnqueueTraits { static const bool zeroCopy; static const cl_bool blocking; static const size_t offset; static const size_t sizeInBytes; static void *hostPtr; static cl_command_type cmdType; static NEO::GraphicsAllocation *mapAllocation; }; template struct EnqueueWriteBufferHelper { typedef T Traits; using Buffer = NEO::Buffer; using CommandQueue = NEO::CommandQueue; static cl_int enqueueWriteBuffer(CommandQueue *pCmdQ, Buffer *buffer = std::unique_ptr(BufferHelper<>::create()).get(), cl_bool blockingWrite = Traits::blocking, size_t offset = Traits::offset, size_t size = Traits::sizeInBytes, void *ptr = Traits::hostPtr, NEO::GraphicsAllocation *mapAllocation = Traits::mapAllocation, cl_uint numEventsInWaitList = Traits::numEventsInWaitList, const cl_event *eventWaitList = Traits::eventWaitList, cl_event *event = Traits::event) { size = size == static_cast(-1) ? buffer->getSize() : size; cl_int retVal = pCmdQ->enqueueWriteBuffer(buffer, blockingWrite, offset, size, ptr, mapAllocation, numEventsInWaitList, eventWaitList, event); return retVal; } static cl_int enqueue(CommandQueue *pCmdQ) { return enqueueWriteBuffer(pCmdQ); } static cl_int enqueue(CommandQueue *pCmdQ, Buffer *buffer) { return enqueueWriteBuffer(pCmdQ, buffer); } }; struct EnqueueWriteBufferRectTraits : public EnqueueTraits { static const bool zeroCopy; static const cl_bool blocking; static const size_t bufferOrigin[3]; static const size_t hostOrigin[3]; static const size_t region[3]; static size_t bufferRowPitch; static size_t bufferSlicePitch; static size_t hostRowPitch; static size_t hostSlicePitch; static void *hostPtr; static cl_command_type cmdType; }; template struct EnqueueWriteBufferRectHelper { typedef T Traits; using Buffer = NEO::Buffer; using CommandQueue = NEO::CommandQueue; static cl_int enqueueWriteBufferRect(CommandQueue *pCmdQ, Buffer *buffer = std::unique_ptr(BufferHelper<>::create()).get(), cl_bool blockingWrite = Traits::blocking, const size_t *bufferOrigin = Traits::bufferOrigin, const size_t *hostOrigin = Traits::hostOrigin, const size_t *region = Traits::region, size_t bufferRowPitch = Traits::bufferRowPitch, size_t bufferSlicePitch = Traits::bufferSlicePitch, size_t hostRowPitch = Traits::hostRowPitch, size_t hostSlicePitch = Traits::hostSlicePitch, void *hostPtr = Traits::hostPtr, cl_uint numEventsInWaitList = Traits::numEventsInWaitList, const cl_event *eventWaitList = Traits::eventWaitList, cl_event *event = Traits::event) { cl_int retVal = pCmdQ->enqueueWriteBufferRect(buffer, blockingWrite, bufferOrigin, hostOrigin, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, hostPtr, numEventsInWaitList, eventWaitList, event); return retVal; } static cl_int enqueue(CommandQueue *pCmdQ) { return enqueueWriteBufferRect(pCmdQ); } }; struct EnqueueWriteImageTraits : public EnqueueTraits { static const cl_bool blocking; static const size_t origin[3]; static const size_t region[3]; static const size_t rowPitch; static const size_t slicePitch; static void *hostPtr; static cl_command_type cmdType; static NEO::GraphicsAllocation *mapAllocation; }; template struct EnqueueWriteImageHelper { typedef T Traits; using Image = NEO::Image; using CommandQueue = NEO::CommandQueue; static cl_int enqueueWriteImage(CommandQueue *pCmdQ, Image *image = nullptr, cl_bool blockingRead = Traits::blocking, const size_t *origin = Traits::origin, const size_t *region = Traits::region, size_t rowPitch = Traits::rowPitch, size_t slicePitch = Traits::slicePitch, const void *ptr = Traits::hostPtr, NEO::GraphicsAllocation *mapAllocation = Traits::mapAllocation, cl_uint numEventsInWaitList = Traits::numEventsInWaitList, const cl_event *eventWaitList = Traits::eventWaitList, cl_event *event = Traits::event) { auto &context = pCmdQ->getContext(); std::unique_ptr imageDelete(image ? nullptr : Image2dHelper<>::create(&context)); image = image ? image : imageDelete.get(); size_t regionOut[3] = { region[0] == static_cast(-1) ? image->getImageDesc().image_width : region[0], region[1] == static_cast(-1) ? image->getImageDesc().image_height : region[1], region[2] == static_cast(-1) ? (image->getImageDesc().image_depth > 0 ? image->getImageDesc().image_depth : 1) : region[2], }; return pCmdQ->enqueueWriteImage(image, blockingRead, origin, regionOut, rowPitch, slicePitch, ptr, mapAllocation, numEventsInWaitList, eventWaitList, event); } static cl_int enqueue(CommandQueue *pCmdQ, void *placeholder = nullptr) { return enqueueWriteImage(pCmdQ); } }; compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp000066400000000000000000001164051422164147700315520ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/aub/aub_subcapture.h" #include "shared/source/command_stream/wait_status.h" #include "shared/source/program/sync_buffer_handler.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/engine_descriptor_helper.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/mocks/mock_aub_csr.h" #include "shared/test/common/mocks/mock_aub_subcapture_manager.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/mocks/mock_internal_allocation_storage.h" #include "shared/test/common/mocks/mock_os_context.h" #include "shared/test/common/mocks/mock_timestamp_container.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/event/user_event.h" #include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/command_stream/thread_arbitration_policy_helper.h" #include "opencl/test/unit_test/fixtures/enqueue_handler_fixture.h" #include "opencl/test/unit_test/helpers/cl_hw_parse.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_mdi.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" #include "test_traits_common.h" using namespace NEO; HWTEST_F(EnqueueHandlerTest, WhenEnqueingHandlerWithKernelThenProcessEvictionOnCsrIsCalled) { int32_t tag; auto csr = new MockCsrBase(tag, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(csr); MockKernelWithInternals mockKernel(*pClDevice); auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(context, pClDevice, 0)); size_t gws[] = {1, 1, 1}; mockCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_TRUE(csr->processEvictionCalled); } HWTEST_F(EnqueueHandlerTest, givenEnqueueHandlerWithKernelWhenAubCsrIsActiveThenAddCommentWithKernelName) { int32_t tag; auto aubCsr = new MockCsrAub(tag, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(aubCsr); MockKernelWithInternals mockKernel(*pClDevice); auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(context, pClDevice, 0)); size_t gws[] = {1, 1, 1}; mockKernel.kernelInfo.kernelDescriptor.kernelMetadata.kernelName = "kernel_name"; mockCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_TRUE(aubCsr->addAubCommentCalled); EXPECT_EQ(1u, aubCsr->aubCommentMessages.size()); EXPECT_STREQ("kernel_name", aubCsr->aubCommentMessages[0].c_str()); } HWTEST_F(EnqueueHandlerTest, givenEnqueueHandlerWithKernelSplitWhenAubCsrIsActiveThenAddCommentWithKernelName) { int32_t tag; auto aubCsr = new MockCsrAub(tag, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(aubCsr); MockKernelWithInternals kernel1(*pClDevice); MockKernelWithInternals kernel2(*pClDevice); kernel1.kernelInfo.kernelDescriptor.kernelMetadata.kernelName = "kernel_1"; kernel2.kernelInfo.kernelDescriptor.kernelMetadata.kernelName = "kernel_2"; auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(context, pClDevice, 0)); MockMultiDispatchInfo multiDispatchInfo(pClDevice, std::vector({kernel1.mockKernel, kernel2.mockKernel})); const auto enqueueResult = mockCmdQ->template enqueueHandler(nullptr, 0, true, multiDispatchInfo, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, enqueueResult); EXPECT_TRUE(aubCsr->addAubCommentCalled); EXPECT_EQ(2u, aubCsr->aubCommentMessages.size()); EXPECT_STREQ("kernel_1", aubCsr->aubCommentMessages[0].c_str()); EXPECT_STREQ("kernel_2", aubCsr->aubCommentMessages[1].c_str()); } HWTEST_F(EnqueueHandlerTest, givenEnqueueHandlerWithEmptyDispatchInfoWhenAubCsrIsActiveThenDontAddCommentWithKernelName) { int32_t tag; auto aubCsr = new MockCsrAub(tag, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(aubCsr); MockKernelWithInternals mockKernel(*pClDevice); auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(context, pClDevice, 0)); size_t gws[] = {0, 0, 0}; mockKernel.kernelInfo.kernelDescriptor.kernelMetadata.kernelName = "kernel_name"; mockCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_FALSE(aubCsr->addAubCommentCalled); } struct EnqueueHandlerWithAubSubCaptureTests : public EnqueueHandlerTest { template class MockCmdQWithAubSubCapture : public CommandQueueHw { public: MockCmdQWithAubSubCapture(Context *context, ClDevice *device) : CommandQueueHw(context, device, nullptr, false) {} WaitStatus waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override { waitUntilCompleteCalled = true; return CommandQueueHw::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, cleanTemporaryAllocationList, skipWait); } void obtainNewTimestampPacketNodes(size_t numberOfNodes, TimestampPacketContainer &previousNodes, bool clearAllDependencies, CommandStreamReceiver &csr) override { timestampPacketDependenciesCleared = clearAllDependencies; CommandQueueHw::obtainNewTimestampPacketNodes(numberOfNodes, previousNodes, clearAllDependencies, csr); } bool waitUntilCompleteCalled = false; bool timestampPacketDependenciesCleared = false; }; }; HWTEST_F(EnqueueHandlerWithAubSubCaptureTests, givenEnqueueHandlerWithAubSubCaptureWhenSubCaptureIsNotActiveThenEnqueueIsMadeBlocking) { DebugManagerStateRestore stateRestore; DebugManager.flags.AUBDumpSubCaptureMode.set(1); auto aubCsr = new MockAubCsr("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(aubCsr); AubSubCaptureCommon subCaptureCommon; subCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Filter; subCaptureCommon.subCaptureFilter.dumpKernelName = "invalid_kernel_name"; auto subCaptureManagerMock = new AubSubCaptureManagerMock("file_name.aub", subCaptureCommon); aubCsr->subCaptureManager.reset(subCaptureManagerMock); MockCmdQWithAubSubCapture cmdQ(context, pClDevice); MockKernelWithInternals mockKernel(*pClDevice); size_t gws[3] = {1, 0, 0}; cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_TRUE(cmdQ.waitUntilCompleteCalled); } HWTEST_F(EnqueueHandlerWithAubSubCaptureTests, givenEnqueueMarkerWithAubSubCaptureWhenSubCaptureIsNotActiveThenEnqueueIsMadeBlocking) { DebugManagerStateRestore stateRestore; DebugManager.flags.AUBDumpSubCaptureMode.set(1); auto aubCsr = new MockAubCsr("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(aubCsr); AubSubCaptureCommon subCaptureCommon; subCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Filter; subCaptureCommon.subCaptureFilter.dumpKernelName = "invalid_kernel_name"; auto subCaptureManagerMock = new AubSubCaptureManagerMock("file_name.aub", subCaptureCommon); aubCsr->subCaptureManager.reset(subCaptureManagerMock); MockCmdQWithAubSubCapture cmdQ(context, pClDevice); cmdQ.enqueueMarkerWithWaitList(0, nullptr, nullptr); EXPECT_TRUE(cmdQ.waitUntilCompleteCalled); } HWTEST_F(EnqueueHandlerWithAubSubCaptureTests, givenEnqueueHandlerWithAubSubCaptureWhenSubCaptureGetsActivatedThenTimestampPacketDependenciesAreClearedAndNextRemainUncleared) { DebugManagerStateRestore stateRestore; DebugManager.flags.AUBDumpSubCaptureMode.set(1); DebugManager.flags.EnableTimestampPacket.set(true); auto aubCsr = new MockAubCsr("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(aubCsr); AubSubCaptureCommon subCaptureCommon; subCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Filter; subCaptureCommon.subCaptureFilter.dumpKernelName = ""; subCaptureCommon.subCaptureFilter.dumpKernelStartIdx = 0; subCaptureCommon.subCaptureFilter.dumpKernelEndIdx = 1; auto subCaptureManagerMock = new AubSubCaptureManagerMock("file_name.aub", subCaptureCommon); aubCsr->subCaptureManager.reset(subCaptureManagerMock); MockCmdQWithAubSubCapture cmdQ(context, pClDevice); MockKernelWithInternals mockKernel(*pClDevice); mockKernel.kernelInfo.kernelDescriptor.kernelMetadata.kernelName = "kernelName"; size_t gws[3] = {1, 0, 0}; // activate subcapture cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_TRUE(cmdQ.timestampPacketDependenciesCleared); // keep subcapture active cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_FALSE(cmdQ.timestampPacketDependenciesCleared); // deactivate subcapture cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_FALSE(cmdQ.timestampPacketDependenciesCleared); } HWTEST_F(EnqueueHandlerWithAubSubCaptureTests, givenInputEventsWhenDispatchingEnqueueWithSubCaptureThenClearDependencies) { DebugManagerStateRestore stateRestore; DebugManager.flags.AUBDumpSubCaptureMode.set(1); DebugManager.flags.EnableTimestampPacket.set(true); auto defaultEngine = defaultHwInfo->capabilityTable.defaultEngineType; MockOsContext mockOsContext(0, EngineDescriptorHelper::getDefaultDescriptor({defaultEngine, EngineUsage::Regular})); auto aubCsr = new MockAubCsr("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); auto aubCsr2 = std::make_unique>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); aubCsr->setupContext(mockOsContext); aubCsr2->setupContext(mockOsContext); pDevice->resetCommandStreamReceiver(aubCsr); AubSubCaptureCommon subCaptureCommon; subCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Filter; subCaptureCommon.subCaptureFilter.dumpKernelName = ""; subCaptureCommon.subCaptureFilter.dumpKernelStartIdx = 0; subCaptureCommon.subCaptureFilter.dumpKernelEndIdx = 1; auto subCaptureManagerMock = new AubSubCaptureManagerMock("file_name.aub", subCaptureCommon); aubCsr->subCaptureManager.reset(subCaptureManagerMock); MockCmdQWithAubSubCapture cmdQ(context, pClDevice); MockKernelWithInternals mockKernel(*pClDevice); mockKernel.kernelInfo.kernelDescriptor.kernelMetadata.kernelName = "kernelName"; size_t gws[3] = {1, 0, 0}; MockTimestampPacketContainer onCsrTimestamp(*aubCsr->getTimestampPacketAllocator(), 1); MockTimestampPacketContainer outOfCsrTimestamp(*aubCsr2->getTimestampPacketAllocator(), 1); Event event1(&cmdQ, 0, 0, 0); Event event2(&cmdQ, 0, 0, 0); event1.addTimestampPacketNodes(onCsrTimestamp); event1.addTimestampPacketNodes(outOfCsrTimestamp); cl_event waitlist[] = {&event1, &event2}; cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 2, waitlist, nullptr); EXPECT_TRUE(cmdQ.timestampPacketDependenciesCleared); CsrDependencies &outOfCsrDeps = aubCsr->recordedDispatchFlags.csrDependencies; EXPECT_EQ(0u, outOfCsrDeps.timestampPacketContainer.size()); } template class MyCommandQueueHw : public CommandQueueHw { typedef CommandQueueHw BaseClass; public: MyCommandQueueHw(Context *context, ClDevice *device, cl_queue_properties *properties) : BaseClass(context, device, properties, false){}; Vec3 lws = {1, 1, 1}; Vec3 elws = {1, 1, 1}; void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &multiDispatchInfo) override { elws = multiDispatchInfo.begin()->getEnqueuedWorkgroupSize(); lws = multiDispatchInfo.begin()->getActualWorkgroupSize(); } }; HWTEST_F(EnqueueHandlerTest, givenLocalWorkgroupSizeGreaterThenGlobalWorkgroupSizeWhenEnqueueKernelThenLwsIsClamped) { int32_t tag; auto csr = new MockCsrBase(tag, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(csr); MockKernelWithInternals mockKernel(*pClDevice); auto mockProgram = mockKernel.mockProgram; mockProgram->setAllowNonUniform(true); MyCommandQueueHw myCmdQ(context, pClDevice, 0); size_t lws1d[] = {4, 1, 1}; size_t gws1d[] = {2, 1, 1}; myCmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws1d, lws1d, 0, nullptr, nullptr); EXPECT_EQ(myCmdQ.elws.x, lws1d[0]); EXPECT_EQ(myCmdQ.lws.x, gws1d[0]); size_t lws2d[] = {3, 3, 1}; size_t gws2d[] = {2, 1, 1}; myCmdQ.enqueueKernel(mockKernel.mockKernel, 2, nullptr, gws2d, lws2d, 0, nullptr, nullptr); EXPECT_EQ(myCmdQ.elws.x, lws2d[0]); EXPECT_EQ(myCmdQ.elws.y, lws2d[1]); EXPECT_EQ(myCmdQ.lws.x, gws2d[0]); EXPECT_EQ(myCmdQ.lws.y, gws2d[1]); size_t lws3d[] = {5, 4, 3}; size_t gws3d[] = {2, 2, 2}; myCmdQ.enqueueKernel(mockKernel.mockKernel, 3, nullptr, gws3d, lws3d, 0, nullptr, nullptr); EXPECT_EQ(myCmdQ.elws.x, lws3d[0]); EXPECT_EQ(myCmdQ.elws.y, lws3d[1]); EXPECT_EQ(myCmdQ.elws.z, lws3d[2]); EXPECT_EQ(myCmdQ.lws.x, gws3d[0]); EXPECT_EQ(myCmdQ.lws.y, gws3d[1]); EXPECT_EQ(myCmdQ.lws.z, gws3d[2]); } HWTEST_F(EnqueueHandlerTest, givenLocalWorkgroupSizeGreaterThenGlobalWorkgroupSizeAndNonUniformWorkGroupWhenEnqueueKernelThenClIvalidWorkGroupSizeIsReturned) { int32_t tag; auto csr = new MockCsrBase(tag, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(csr); MockKernelWithInternals mockKernel(*pClDevice); auto mockProgram = mockKernel.mockProgram; mockProgram->setAllowNonUniform(false); MyCommandQueueHw myCmdQ(context, pClDevice, 0); size_t lws1d[] = {4, 1, 1}; size_t gws1d[] = {2, 1, 1}; cl_int retVal = myCmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws1d, lws1d, 0, nullptr, nullptr); EXPECT_EQ(retVal, CL_INVALID_WORK_GROUP_SIZE); } HWTEST_F(EnqueueHandlerTest, WhenEnqueuingHandlerCallOnEnqueueMarkerThenCallProcessEvictionOnCsrIsNotCalled) { int32_t tag; auto csr = new MockCsrBase(tag, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(csr); auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(context, pClDevice, 0)); mockCmdQ->enqueueMarkerWithWaitList( 0, nullptr, nullptr); EXPECT_FALSE(csr->processEvictionCalled); EXPECT_EQ(0u, csr->madeResidentGfxAllocations.size()); EXPECT_EQ(0u, csr->madeNonResidentGfxAllocations.size()); } HWTEST_F(EnqueueHandlerTest, WhenEnqueuingHandlerForMarkerOnUnblockedQueueThenTaskLevelIsNotIncremented) { int32_t tag; auto csr = new MockCsrBase(tag, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(csr); auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(context, pClDevice, 0)); // put queue into initial unblocked state mockCmdQ->taskLevel = 0; mockCmdQ->enqueueMarkerWithWaitList( 0, nullptr, nullptr); EXPECT_EQ(0u, mockCmdQ->taskLevel); } HWTEST_F(EnqueueHandlerTest, WhenEnqueuingHandlerForMarkerOnBlockedQueueThenTaskLevelIsNotIncremented) { int32_t tag; auto csr = new MockCsrBase(tag, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(csr); auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(context, pClDevice, 0)); // put queue into initial blocked state mockCmdQ->taskLevel = CompletionStamp::notReady; mockCmdQ->enqueueMarkerWithWaitList( 0, nullptr, nullptr); EXPECT_EQ(CompletionStamp::notReady, mockCmdQ->taskLevel); } HWTEST_F(EnqueueHandlerTest, WhenEnqueuingBlockedWithoutReturnEventThenVirtualEventIsCreatedAndCommandQueueInternalRefCountIsIncremeted) { MockKernelWithInternals kernelInternals(*pClDevice, context); Kernel *kernel = kernelInternals.mockKernel; MockMultiDispatchInfo multiDispatchInfo(pClDevice, kernel); auto mockCmdQ = new MockCommandQueueHw(context, pClDevice, 0); // put queue into initial blocked state mockCmdQ->taskLevel = CompletionStamp::notReady; auto initialRefCountInternal = mockCmdQ->getRefInternalCount(); bool blocking = false; const auto enqueueResult = mockCmdQ->template enqueueHandler(nullptr, 0, blocking, multiDispatchInfo, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, enqueueResult); EXPECT_NE(nullptr, mockCmdQ->virtualEvent); auto refCountInternal = mockCmdQ->getRefInternalCount(); EXPECT_EQ(initialRefCountInternal + 1, refCountInternal); mockCmdQ->virtualEvent->setStatus(CL_COMPLETE); mockCmdQ->isQueueBlocked(); mockCmdQ->release(); } HWTEST_F(EnqueueHandlerTest, WhenEnqueuingBlockedThenVirtualEventIsSetAsCurrentCmdQVirtualEvent) { MockKernelWithInternals kernelInternals(*pClDevice, context); Kernel *kernel = kernelInternals.mockKernel; MockMultiDispatchInfo multiDispatchInfo(pClDevice, kernel); auto mockCmdQ = new MockCommandQueueHw(context, pClDevice, 0); // put queue into initial blocked state mockCmdQ->taskLevel = CompletionStamp::notReady; bool blocking = false; const auto enqueueResult = mockCmdQ->template enqueueHandler(nullptr, 0, blocking, multiDispatchInfo, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, enqueueResult); ASSERT_NE(nullptr, mockCmdQ->virtualEvent); mockCmdQ->virtualEvent->setStatus(CL_COMPLETE); mockCmdQ->isQueueBlocked(); mockCmdQ->release(); } HWTEST_F(EnqueueHandlerTest, WhenEnqueuingWithOutputEventThenEventIsRegistered) { MockKernelWithInternals kernelInternals(*pClDevice, context); Kernel *kernel = kernelInternals.mockKernel; MockMultiDispatchInfo multiDispatchInfo(pClDevice, kernel); cl_event outputEvent = nullptr; auto mockCmdQ = new MockCommandQueueHw(context, pClDevice, 0); bool blocking = false; const auto enqueueResult = mockCmdQ->template enqueueHandler(nullptr, 0, blocking, multiDispatchInfo, 0, nullptr, &outputEvent); EXPECT_EQ(CL_SUCCESS, enqueueResult); ASSERT_NE(nullptr, outputEvent); Event *event = castToObjectOrAbort(outputEvent); ASSERT_NE(nullptr, event); event->release(); mockCmdQ->release(); } HWTEST_F(EnqueueHandlerTest, givenEnqueueHandlerWhenAddPatchInfoCommentsForAUBDumpIsNotSetThenPatchInfoDataIsNotTransferredToCSR) { auto csr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); auto mockHelper = new MockFlatBatchBufferHelper(*pDevice->executionEnvironment); csr->overwriteFlatBatchBufferHelper(mockHelper); pDevice->resetCommandStreamReceiver(csr); MockKernelWithInternals mockKernel(*pClDevice); auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(context, pClDevice, 0)); size_t gws[] = {1, 1, 1}; PatchInfoData patchInfoData = {0xaaaaaaaa, 0, PatchInfoAllocationType::KernelArg, 0xbbbbbbbb, 0, PatchInfoAllocationType::IndirectObjectHeap}; mockKernel.mockKernel->getPatchInfoDataList().push_back(patchInfoData); mockCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(0u, mockHelper->setPatchInfoDataCalled); } HWTEST2_F(EnqueueHandlerTest, givenEnqueueHandlerWhenAddPatchInfoCommentsForAUBDumpIsSetThenPatchInfoDataIsTransferredToCSR, MatchAny) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AddPatchInfoCommentsForAUBDump.set(true); DebugManager.flags.FlattenBatchBufferForAUBDump.set(true); auto csr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); auto mockHelper = new MockFlatBatchBufferHelper(*pDevice->executionEnvironment); csr->overwriteFlatBatchBufferHelper(mockHelper); pDevice->resetCommandStreamReceiver(csr); MockKernelWithInternals mockKernel(*pClDevice); auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(context, pClDevice, 0)); size_t gws[] = {1, 1, 1}; PatchInfoData patchInfoData = {0xaaaaaaaa, 0, PatchInfoAllocationType::KernelArg, 0xbbbbbbbb, 0, PatchInfoAllocationType::IndirectObjectHeap}; mockKernel.mockKernel->getPatchInfoDataList().push_back(patchInfoData); uint32_t expectedCallsCount = TestTraits::iohInSbaSupported ? 8 : 7; if (!pDevice->getHardwareInfo().capabilityTable.supportsImages) { --expectedCallsCount; } mockCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(expectedCallsCount, mockHelper->setPatchInfoDataCalled); EXPECT_EQ(1u, mockHelper->registerCommandChunkCalled); EXPECT_EQ(1u, mockHelper->registerBatchBufferStartAddressCalled); } HWTEST_F(EnqueueHandlerTest, givenExternallySynchronizedParentEventWhenRequestingEnqueueWithoutGpuSubmissionThenTaskCountIsNotInherited) { struct ExternallySynchEvent : UserEvent { ExternallySynchEvent() : UserEvent() { setStatus(CL_COMPLETE); this->updateTaskCount(7, 0); } bool isExternallySynchronized() const override { return true; } }; auto mockCmdQ = new MockCommandQueueHw(context, pClDevice, 0); ExternallySynchEvent synchEvent; cl_event inEv = &synchEvent; cl_event outEv = nullptr; bool blocking = false; MultiDispatchInfo emptyDispatchInfo; const auto enqueueResult = mockCmdQ->template enqueueHandler(nullptr, 0, blocking, emptyDispatchInfo, 1U, &inEv, &outEv); EXPECT_EQ(CL_SUCCESS, enqueueResult); Event *ouputEvent = castToObject(outEv); ASSERT_NE(nullptr, ouputEvent); EXPECT_EQ(0U, ouputEvent->peekTaskCount()); ouputEvent->release(); mockCmdQ->release(); } HWTEST_F(EnqueueHandlerTest, givenEnqueueHandlerWhenSubCaptureIsOffThenActivateSubCaptureIsNotCalled) { DebugManagerStateRestore stateRestore; DebugManager.flags.AUBDumpSubCaptureMode.set(static_cast(AubSubCaptureManager::SubCaptureMode::Off)); MockKernelWithInternals kernelInternals(*pClDevice, context); Kernel *kernel = kernelInternals.mockKernel; MockMultiDispatchInfo multiDispatchInfo(pClDevice, kernel); auto mockCmdQ = new MockCommandQueueHw(context, pClDevice, 0); const auto enqueueResult = mockCmdQ->template enqueueHandler(nullptr, 0, false, multiDispatchInfo, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, enqueueResult); EXPECT_FALSE(pDevice->getUltCommandStreamReceiver().checkAndActivateAubSubCaptureCalled); mockCmdQ->release(); } HWTEST_F(EnqueueHandlerTest, givenEnqueueHandlerWhenSubCaptureIsOnThenActivateSubCaptureIsCalled) { DebugManagerStateRestore stateRestore; DebugManager.flags.AUBDumpSubCaptureMode.set(static_cast(AubSubCaptureManager::SubCaptureMode::Filter)); MockKernelWithInternals kernelInternals(*pClDevice, context); Kernel *kernel = kernelInternals.mockKernel; MockMultiDispatchInfo multiDispatchInfo(pClDevice, kernel); auto mockCmdQ = new MockCommandQueueHw(context, pClDevice, 0); const auto enqueueResult = mockCmdQ->template enqueueHandler(nullptr, 0, false, multiDispatchInfo, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, enqueueResult); EXPECT_TRUE(pDevice->getUltCommandStreamReceiver().checkAndActivateAubSubCaptureCalled); mockCmdQ->release(); } HWTEST_F(EnqueueHandlerTest, givenEnqueueHandlerWhenClSetKernelExecInfoAlreadySetKernelThreadArbitrationPolicyThenRequiredThreadArbitrationPolicyIsSetProperly) { REQUIRE_SVM_OR_SKIP(pClDevice); auto &hwHelper = NEO::ClHwHelper::get(pClDevice->getHardwareInfo().platform.eRenderCoreFamily); if (!hwHelper.isSupportedKernelThreadArbitrationPolicy()) { GTEST_SKIP(); } DebugManagerStateRestore stateRestore; DebugManager.flags.AUBDumpSubCaptureMode.set(static_cast(AubSubCaptureManager::SubCaptureMode::Filter)); MockKernelWithInternals kernelInternals(*pClDevice, context); Kernel *kernel = kernelInternals.mockKernel; MockMultiDispatchInfo multiDispatchInfo(pClDevice, kernel); uint32_t euThreadSetting = CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_ROUND_ROBIN_INTEL; size_t ptrSizeInBytes = 1 * sizeof(uint32_t *); clSetKernelExecInfo( kernelInternals.mockMultiDeviceKernel, // cl_kernel kernel CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_INTEL, // cl_kernel_exec_info param_name ptrSizeInBytes, // size_t param_value_size &euThreadSetting // const void *param_value ); auto mockCmdQ = new MockCommandQueueHw(context, pClDevice, 0); const auto enqueueResult = mockCmdQ->template enqueueHandler(nullptr, 0, false, multiDispatchInfo, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, enqueueResult); EXPECT_EQ(getNewKernelArbitrationPolicy(euThreadSetting), pDevice->getUltCommandStreamReceiver().streamProperties.stateComputeMode.threadArbitrationPolicy.value); mockCmdQ->release(); } HWTEST_F(EnqueueHandlerTest, givenEnqueueHandlerWhenNotSupportedPolicyChangeThenRequiredThreadArbitrationPolicyNotChangedAndIsSetAsDefault) { auto &hwHelper = NEO::ClHwHelper::get(pClDevice->getHardwareInfo().platform.eRenderCoreFamily); if (hwHelper.isSupportedKernelThreadArbitrationPolicy()) { GTEST_SKIP(); } DebugManagerStateRestore stateRestore; DebugManager.flags.AUBDumpSubCaptureMode.set(static_cast(AubSubCaptureManager::SubCaptureMode::Filter)); MockKernelWithInternals kernelInternals(*pClDevice, context); Kernel *kernel = kernelInternals.mockKernel; MockMultiDispatchInfo multiDispatchInfo(pClDevice, kernel); uint32_t euThreadSetting = CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_ROUND_ROBIN_INTEL; size_t ptrSizeInBytes = 1 * sizeof(uint32_t *); auto retVal = clSetKernelExecInfo( kernelInternals.mockMultiDeviceKernel, // cl_kernel kernel CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_INTEL, // cl_kernel_exec_info param_name ptrSizeInBytes, // size_t param_value_size &euThreadSetting // const void *param_value ); EXPECT_EQ(CL_INVALID_DEVICE, retVal); auto mockCmdQ = new MockCommandQueueHw(context, pClDevice, 0); const auto enqueueResult = mockCmdQ->template enqueueHandler(nullptr, 0, false, multiDispatchInfo, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, enqueueResult); EXPECT_NE(getNewKernelArbitrationPolicy(euThreadSetting), pDevice->getUltCommandStreamReceiver().streamProperties.stateComputeMode.threadArbitrationPolicy.value); EXPECT_EQ(0, pDevice->getUltCommandStreamReceiver().streamProperties.stateComputeMode.threadArbitrationPolicy.value); mockCmdQ->release(); } HWTEST_F(EnqueueHandlerTest, givenKernelUsingSyncBufferWhenEnqueuingKernelThenSshIsCorrectlyProgrammed) { using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE; using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; struct MockSyncBufferHandler : SyncBufferHandler { using SyncBufferHandler::graphicsAllocation; }; pDevice->allocateSyncBufferHandler(); size_t offset = 0; size_t size = 1; size_t sshUsageWithoutSyncBuffer; { MockKernelWithInternals kernelInternals{*pClDevice, context}; auto kernel = kernelInternals.mockKernel; kernel->initialize(); auto mockCmdQ = clUniquePtr(new MockCommandQueueHw(context, pClDevice, 0)); mockCmdQ->enqueueKernel(kernel, 1, &offset, &size, &size, 0, nullptr, nullptr); sshUsageWithoutSyncBuffer = mockCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0).getUsed(); } { MockKernelWithInternals kernelInternals{*pClDevice, context}; kernelInternals.kernelInfo.setSyncBuffer(sizeof(uint8_t), 0, 0); constexpr auto bindingTableOffset = sizeof(RENDER_SURFACE_STATE); kernelInternals.kernelInfo.setBindingTable(bindingTableOffset, 1); kernelInternals.kernelInfo.heapInfo.SurfaceStateHeapSize = sizeof(RENDER_SURFACE_STATE) + sizeof(BINDING_TABLE_STATE); auto kernel = kernelInternals.mockKernel; kernel->initialize(); auto bindingTableState = reinterpret_cast( ptrOffset(kernel->getSurfaceStateHeap(), bindingTableOffset)); bindingTableState->setSurfaceStatePointer(0); auto mockCmdQ = clUniquePtr(new MockCommandQueueHw(context, pClDevice, 0)); mockCmdQ->enqueueKernel(kernel, 1, &offset, &size, &size, 0, nullptr, nullptr); auto &surfaceStateHeap = mockCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0); EXPECT_EQ(sshUsageWithoutSyncBuffer + kernelInternals.kernelInfo.heapInfo.SurfaceStateHeapSize, surfaceStateHeap.getUsed()); ClHardwareParse hwParser; hwParser.parseCommands(*mockCmdQ); auto &surfaceState = hwParser.getSurfaceState(&surfaceStateHeap, 0); auto pSyncBufferHandler = static_cast(pDevice->syncBufferHandler.get()); EXPECT_EQ(pSyncBufferHandler->graphicsAllocation->getGpuAddress(), surfaceState.getSurfaceBaseAddress()); } } struct EnqueueHandlerTestBasic : public ::testing::Test { template std::unique_ptr> setupFixtureAndCreateMockCommandQueue() { auto executionEnvironment = platform()->peekExecutionEnvironment(); device = std::make_unique(MockDevice::createWithExecutionEnvironment(nullptr, executionEnvironment, 0u)); context = std::make_unique(device.get()); auto mockCmdQ = std::make_unique>(context.get(), device.get(), nullptr); auto &ultCsr = static_cast &>(mockCmdQ->getGpgpuCommandStreamReceiver()); ultCsr.taskCount = initialTaskCount; mockInternalAllocationStorage = new MockInternalAllocationStorage(ultCsr); ultCsr.internalAllocationStorage.reset(mockInternalAllocationStorage); return mockCmdQ; } MockInternalAllocationStorage *mockInternalAllocationStorage = nullptr; const uint32_t initialTaskCount = 100; std::unique_ptr device; std::unique_ptr context; }; HWTEST_F(EnqueueHandlerTestBasic, givenEnqueueHandlerWhenCommandIsBlokingThenCompletionStampTaskCountIsPassedToWaitForTaskCountAndCleanAllocationListAsRequiredTaskCount) { auto mockCmdQ = setupFixtureAndCreateMockCommandQueue(); MockKernelWithInternals kernelInternals(*device, context.get()); Kernel *kernel = kernelInternals.mockKernel; MockMultiDispatchInfo multiDispatchInfo(device.get(), kernel); const auto enqueueResult = mockCmdQ->template enqueueHandler(nullptr, 0, true, multiDispatchInfo, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, enqueueResult); EXPECT_EQ(initialTaskCount + 1, mockInternalAllocationStorage->lastCleanAllocationsTaskCount); } HWTEST_F(EnqueueHandlerTestBasic, givenBlockedEnqueueHandlerWhenCommandIsBlokingThenCompletionStampTaskCountIsPassedToWaitForTaskCountAndCleanAllocationListAsRequiredTaskCount) { auto mockCmdQ = setupFixtureAndCreateMockCommandQueue(); MockKernelWithInternals kernelInternals(*device, context.get()); Kernel *kernel = kernelInternals.mockKernel; MockMultiDispatchInfo multiDispatchInfo(device.get(), kernel); UserEvent userEvent; cl_event waitlist[] = {&userEvent}; std::thread t0([&mockCmdQ, &userEvent]() { while (!mockCmdQ->isQueueBlocked()) { } userEvent.setStatus(CL_COMPLETE); }); const auto enqueueResult = mockCmdQ->template enqueueHandler(nullptr, 0, true, multiDispatchInfo, 1, waitlist, nullptr); EXPECT_EQ(CL_SUCCESS, enqueueResult); EXPECT_EQ(initialTaskCount + 1, mockInternalAllocationStorage->lastCleanAllocationsTaskCount); t0.join(); } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp000066400000000000000000002504541422164147700316400ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/pause_on_gpu_properties.h" #include "shared/source/helpers/preamble.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/kernel_binary_helper.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/mocks/mock_submissions_aggregator.h" #include "opencl/source/api/api.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/test/unit_test/api/cl_api_tests.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "opencl/test/unit_test/helpers/cl_hw_parse.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" using namespace NEO; typedef HelloWorldFixture EnqueueKernelFixture; typedef Test EnqueueKernelTest; TEST_F(EnqueueKernelTest, GivenNullKernelWhenEnqueuingKernelThenInvalidKernelErrorIsReturned) { size_t globalWorkSize[3] = {1, 1, 1}; auto retVal = clEnqueueNDRangeKernel( pCmdQ, nullptr, 1, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL, retVal); } TEST_F(EnqueueKernelTest, givenKernelWhenAllArgsAreSetThenClEnqueueNDRangeKernelReturnsSuccess) { const size_t n = 512; size_t globalWorkSize[3] = {n, 1, 1}; size_t localWorkSize[3] = {64, 1, 1}; cl_int retVal = CL_INVALID_KERNEL; CommandQueue *pCmdQ2 = createCommandQueue(pClDevice); std::unique_ptr pMultiDeviceKernel(MultiDeviceKernel::create(pProgram, pProgram->getKernelInfosForKernel("CopyBuffer"), &retVal)); auto kernel = pMultiDeviceKernel->getKernel(rootDeviceIndex); EXPECT_EQ(CL_SUCCESS, retVal); auto b0 = clCreateBuffer(context, 0, n * sizeof(float), nullptr, nullptr); auto b1 = clCreateBuffer(context, 0, n * sizeof(float), nullptr, nullptr); EXPECT_FALSE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ2, pMultiDeviceKernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, retVal); retVal = clSetKernelArg(pMultiDeviceKernel.get(), 0, sizeof(cl_mem), &b0); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ2, pMultiDeviceKernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, retVal); retVal = clSetKernelArg(pMultiDeviceKernel.get(), 1, sizeof(cl_mem), &b1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ2, pMultiDeviceKernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(b0); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(b1); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseCommandQueue(pCmdQ2); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(EnqueueMultiDeviceKernelTest, givenMultiDeviceKernelWhenSetArgDeviceUSMThenOnlyOneKernelIsPatched) { REQUIRE_SVM_OR_SKIP(defaultHwInfo); auto deviceFactory = std::make_unique(3, 0); auto device0 = deviceFactory->rootDevices[0]; auto device1 = deviceFactory->rootDevices[1]; auto device2 = deviceFactory->rootDevices[2]; cl_device_id devices[] = {device0, device1, device2}; auto context = std::make_unique(ClDeviceVector(devices, 3), false); auto pCmdQ1 = context->getSpecialQueue(1u); auto pCmdQ2 = context->getSpecialQueue(2u); std::unique_ptr pSource = nullptr; size_t sourceSize = 0; std::string testFile; KernelBinaryHelper kbHelper("CopyBuffer_simd16"); testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); pSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pSource); const char *sources[1] = {pSource.get()}; cl_int retVal = CL_INVALID_PROGRAM; auto clProgram = clCreateProgramWithSource( context.get(), 1, sources, &sourceSize, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, clProgram); clBuildProgram(clProgram, 0, nullptr, nullptr, nullptr, nullptr); auto clKernel = clCreateKernel(clProgram, "CopyBuffer", &retVal); EXPECT_EQ(CL_SUCCESS, retVal); auto pMultiDeviceKernel = castToObject(clKernel); auto buffer0 = clCreateBuffer(context.get(), 0, MemoryConstants::pageSize, nullptr, nullptr); size_t globalWorkSize[3] = {1, 1, 1}; size_t localWorkSize[3] = {1, 1, 1}; retVal = clSetKernelArg(clKernel, 0, sizeof(cl_mem), &buffer0); EXPECT_EQ(CL_SUCCESS, retVal); auto deviceMem = clDeviceMemAllocINTEL(context.get(), device1, {}, MemoryConstants::pageSize, MemoryConstants::pageSize, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArgSVMPointer(clKernel, 1, deviceMem); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(pMultiDeviceKernel->getKernel(0u)->isPatched()); EXPECT_TRUE(pMultiDeviceKernel->getKernel(1u)->isPatched()); EXPECT_FALSE(pMultiDeviceKernel->getKernel(2u)->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ1, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueNDRangeKernel(pCmdQ2, pMultiDeviceKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, retVal); retVal = clReleaseMemObject(buffer0); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(context.get(), deviceMem); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseKernel(clKernel); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(clProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EnqueueKernelTest, givenKernelWhenNotAllArgsAreSetButSetKernelArgIsCalledTwiceThenClEnqueueNDRangeKernelReturnsError) { const size_t n = 512; size_t globalWorkSize[3] = {n, 1, 1}; size_t localWorkSize[3] = {256, 1, 1}; cl_int retVal = CL_SUCCESS; CommandQueue *pCmdQ2 = createCommandQueue(pClDevice); std::unique_ptr pMultiDeviceKernel(MultiDeviceKernel::create(pProgram, pProgram->getKernelInfosForKernel("CopyBuffer"), &retVal)); auto kernel = pMultiDeviceKernel->getKernel(rootDeviceIndex); EXPECT_EQ(CL_SUCCESS, retVal); auto b0 = clCreateBuffer(context, 0, n * sizeof(float), nullptr, nullptr); auto b1 = clCreateBuffer(context, 0, n * sizeof(float), nullptr, nullptr); EXPECT_FALSE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ2, pMultiDeviceKernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, retVal); retVal = clSetKernelArg(pMultiDeviceKernel.get(), 0, sizeof(cl_mem), &b0); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ2, pMultiDeviceKernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, retVal); retVal = clSetKernelArg(pMultiDeviceKernel.get(), 0, sizeof(cl_mem), &b1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ2, pMultiDeviceKernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, retVal); retVal = clReleaseMemObject(b0); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(b1); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseCommandQueue(pCmdQ2); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EnqueueKernelTest, givenKernelWhenSetKernelArgIsCalledForEachArgButAtLeastFailsThenClEnqueueNDRangeKernelReturnsError) { const size_t n = 512; size_t globalWorkSize[3] = {n, 1, 1}; size_t localWorkSize[3] = {256, 1, 1}; cl_int retVal = CL_SUCCESS; CommandQueue *pCmdQ2 = createCommandQueue(pClDevice); std::unique_ptr pMultiDeviceKernel(MultiDeviceKernel::create(pProgram, pProgram->getKernelInfosForKernel("CopyBuffer"), &retVal)); auto kernel = pMultiDeviceKernel->getKernel(rootDeviceIndex); EXPECT_EQ(CL_SUCCESS, retVal); auto b0 = clCreateBuffer(context, 0, n * sizeof(float), nullptr, nullptr); auto b1 = clCreateBuffer(context, 0, n * sizeof(float), nullptr, nullptr); EXPECT_FALSE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ2, pMultiDeviceKernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, retVal); retVal = clSetKernelArg(pMultiDeviceKernel.get(), 0, sizeof(cl_mem), &b0); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ2, pMultiDeviceKernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, retVal); retVal = clSetKernelArg(pMultiDeviceKernel.get(), 1, 2 * sizeof(cl_mem), &b1); EXPECT_NE(CL_SUCCESS, retVal); EXPECT_FALSE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ2, pMultiDeviceKernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, retVal); retVal = clReleaseMemObject(b0); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(b1); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseCommandQueue(pCmdQ2); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EnqueueKernelTest, GivenInvalidEventListCountWhenEnqueuingKernelThenInvalidEventWaitListErrorIsReturned) { size_t globalWorkSize[3] = {1, 1, 1}; auto retVal = clEnqueueNDRangeKernel( pCmdQ, pMultiDeviceKernel, 1, nullptr, globalWorkSize, nullptr, 1, nullptr, nullptr); EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, retVal); } TEST_F(EnqueueKernelTest, GivenInvalidWorkGroupSizeWhenEnqueuingKernelThenInvalidWorkGroupSizeErrorIsReturned) { size_t globalWorkSize[3] = {12, 12, 12}; size_t localWorkSize[3] = {11, 12, 12}; auto retVal = clEnqueueNDRangeKernel( pCmdQ, pMultiDeviceKernel, 3, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_WORK_GROUP_SIZE, retVal); } TEST_F(EnqueueKernelTest, GivenNullKernelWhenEnqueuingNDCountKernelINTELThenInvalidKernelErrorIsReturned) { size_t workgroupCount[3] = {1, 1, 1}; auto retVal = clEnqueueNDCountKernelINTEL( pCmdQ, nullptr, 1, nullptr, workgroupCount, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL, retVal); } using clEnqueueNDCountKernelTests = api_tests; TEST_F(clEnqueueNDCountKernelTests, GivenQueueIncapableWhenEnqueuingNDCountKernelINTELThenInvalidOperationIsReturned) { auto &hwHelper = HwHelper::get(::defaultHwInfo->platform.eRenderCoreFamily); auto engineGroupType = hwHelper.getEngineGroupType(pCommandQueue->getGpgpuEngine().getEngineType(), pCommandQueue->getGpgpuEngine().getEngineUsage(), *::defaultHwInfo); if (!hwHelper.isCooperativeDispatchSupported(engineGroupType, *::defaultHwInfo)) { GTEST_SKIP(); } cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t workgroupCount[3] = {1, 1, 1}; size_t localWorkSize[3] = {1, 1, 1}; this->disableQueueCapabilities(CL_QUEUE_CAPABILITY_KERNEL_INTEL); retVal = clEnqueueNDCountKernelINTEL( pCommandQueue, pMultiDeviceKernel, workDim, globalWorkOffset, workgroupCount, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_OPERATION, retVal); } TEST_F(EnqueueKernelTest, givenKernelWhenAllArgsAreSetThenClEnqueueNDCountKernelINTELReturnsSuccess) { const size_t n = 512; size_t workgroupCount[3] = {2, 1, 1}; size_t localWorkSize[3] = {64, 1, 1}; cl_int retVal = CL_SUCCESS; CommandQueue *pCmdQ2 = createCommandQueue(pClDevice); HwHelper &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); auto engineGroupType = hwHelper.getEngineGroupType(pCmdQ2->getGpgpuEngine().getEngineType(), pCmdQ2->getGpgpuEngine().getEngineUsage(), hardwareInfo); if (!hwHelper.isCooperativeDispatchSupported(engineGroupType, hardwareInfo)) { pCmdQ2->getGpgpuEngine().osContext = pCmdQ2->getDevice().getEngine(aub_stream::ENGINE_CCS, EngineUsage::LowPriority).osContext; } std::unique_ptr pMultiDeviceKernel(MultiDeviceKernel::create(pProgram, pProgram->getKernelInfosForKernel("CopyBuffer"), &retVal)); auto kernel = pMultiDeviceKernel->getKernel(rootDeviceIndex); EXPECT_EQ(CL_SUCCESS, retVal); auto b0 = clCreateBuffer(context, 0, n * sizeof(float), nullptr, nullptr); auto b1 = clCreateBuffer(context, 0, n * sizeof(float), nullptr, nullptr); EXPECT_FALSE(kernel->isPatched()); retVal = clEnqueueNDCountKernelINTEL(pCmdQ2, pMultiDeviceKernel.get(), 1, nullptr, workgroupCount, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, retVal); retVal = clSetKernelArg(pMultiDeviceKernel.get(), 0, sizeof(cl_mem), &b0); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(kernel->isPatched()); retVal = clEnqueueNDCountKernelINTEL(pCmdQ2, pMultiDeviceKernel.get(), 1, nullptr, workgroupCount, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, retVal); retVal = clSetKernelArg(pMultiDeviceKernel.get(), 1, sizeof(cl_mem), &b1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(kernel->isPatched()); retVal = clEnqueueNDCountKernelINTEL(pCmdQ2, pMultiDeviceKernel.get(), 1, nullptr, workgroupCount, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(b0); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(b1); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseCommandQueue(pCmdQ2); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EnqueueKernelTest, givenKernelWhenNotAllArgsAreSetButSetKernelArgIsCalledTwiceThenClEnqueueNDCountKernelINTELReturnsError) { const size_t n = 512; size_t workgroupCount[3] = {2, 1, 1}; size_t localWorkSize[3] = {256, 1, 1}; cl_int retVal = CL_SUCCESS; CommandQueue *pCmdQ2 = createCommandQueue(pClDevice); HwHelper &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); auto engineGroupType = hwHelper.getEngineGroupType(pCmdQ2->getGpgpuEngine().getEngineType(), pCmdQ2->getGpgpuEngine().getEngineUsage(), hardwareInfo); if (!hwHelper.isCooperativeDispatchSupported(engineGroupType, hardwareInfo)) { pCmdQ2->getGpgpuEngine().osContext = pCmdQ2->getDevice().getEngine(aub_stream::ENGINE_CCS, EngineUsage::LowPriority).osContext; } std::unique_ptr pMultiDeviceKernel(MultiDeviceKernel::create(pProgram, pProgram->getKernelInfosForKernel("CopyBuffer"), &retVal)); auto kernel = pMultiDeviceKernel->getKernel(rootDeviceIndex); EXPECT_EQ(CL_SUCCESS, retVal); auto b0 = clCreateBuffer(context, 0, n * sizeof(float), nullptr, nullptr); auto b1 = clCreateBuffer(context, 0, n * sizeof(float), nullptr, nullptr); EXPECT_FALSE(kernel->isPatched()); retVal = clEnqueueNDCountKernelINTEL(pCmdQ2, pMultiDeviceKernel.get(), 1, nullptr, workgroupCount, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, retVal); retVal = clSetKernelArg(pMultiDeviceKernel.get(), 0, sizeof(cl_mem), &b0); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(kernel->isPatched()); retVal = clEnqueueNDCountKernelINTEL(pCmdQ2, pMultiDeviceKernel.get(), 1, nullptr, workgroupCount, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, retVal); retVal = clSetKernelArg(pMultiDeviceKernel.get(), 0, sizeof(cl_mem), &b1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(kernel->isPatched()); retVal = clEnqueueNDCountKernelINTEL(pCmdQ2, pMultiDeviceKernel.get(), 1, nullptr, workgroupCount, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, retVal); retVal = clReleaseMemObject(b0); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(b1); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseCommandQueue(pCmdQ2); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EnqueueKernelTest, givenKernelWhenSetKernelArgIsCalledForEachArgButAtLeastFailsThenClEnqueueNDCountKernelINTELReturnsError) { const size_t n = 512; size_t workgroupCount[3] = {2, 1, 1}; size_t localWorkSize[3] = {256, 1, 1}; cl_int retVal = CL_SUCCESS; CommandQueue *pCmdQ2 = createCommandQueue(pClDevice); HwHelper &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); auto engineGroupType = hwHelper.getEngineGroupType(pCmdQ2->getGpgpuEngine().getEngineType(), pCmdQ2->getGpgpuEngine().getEngineUsage(), hardwareInfo); if (!hwHelper.isCooperativeDispatchSupported(engineGroupType, hardwareInfo)) { pCmdQ2->getGpgpuEngine().osContext = pCmdQ2->getDevice().getEngine(aub_stream::ENGINE_CCS, EngineUsage::LowPriority).osContext; } std::unique_ptr pMultiDeviceKernel(MultiDeviceKernel::create(pProgram, pProgram->getKernelInfosForKernel("CopyBuffer"), &retVal)); auto kernel = pMultiDeviceKernel->getKernel(rootDeviceIndex); EXPECT_EQ(CL_SUCCESS, retVal); auto b0 = clCreateBuffer(context, 0, n * sizeof(float), nullptr, nullptr); auto b1 = clCreateBuffer(context, 0, n * sizeof(float), nullptr, nullptr); EXPECT_FALSE(kernel->isPatched()); retVal = clEnqueueNDCountKernelINTEL(pCmdQ2, pMultiDeviceKernel.get(), 1, nullptr, workgroupCount, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, retVal); retVal = clSetKernelArg(pMultiDeviceKernel.get(), 0, sizeof(cl_mem), &b0); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(kernel->isPatched()); retVal = clEnqueueNDCountKernelINTEL(pCmdQ2, pMultiDeviceKernel.get(), 1, nullptr, workgroupCount, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, retVal); retVal = clSetKernelArg(pMultiDeviceKernel.get(), 1, 2 * sizeof(cl_mem), &b1); EXPECT_NE(CL_SUCCESS, retVal); EXPECT_FALSE(kernel->isPatched()); retVal = clEnqueueNDCountKernelINTEL(pCmdQ2, pMultiDeviceKernel.get(), 1, nullptr, workgroupCount, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, retVal); retVal = clReleaseMemObject(b0); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(b1); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseCommandQueue(pCmdQ2); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EnqueueKernelTest, GivenInvalidEventListCountWhenEnqueuingNDCountKernelINTELThenInvalidEventWaitListErrorIsReturned) { size_t workgroupCount[3] = {1, 1, 1}; auto retVal = clEnqueueNDCountKernelINTEL( pCmdQ, pMultiDeviceKernel, 1, nullptr, workgroupCount, nullptr, 1, nullptr, nullptr); EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, retVal); } HWTEST_F(EnqueueKernelTest, WhenEnqueingKernelThenTaskLevelIsIncremented) { auto taskLevelBefore = pCmdQ->taskLevel; callOneWorkItemNDRKernel(); EXPECT_GT(pCmdQ->taskLevel, taskLevelBefore); } HWTEST_F(EnqueueKernelTest, WhenEnqueingKernelThenCsrTaskLevelIsIncremented) { //this test case assumes IOQ auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = pCmdQ->taskCount + 100; csr.taskLevel = pCmdQ->taskLevel + 50; callOneWorkItemNDRKernel(); EXPECT_EQ(pCmdQ->taskCount, csr.peekTaskCount()); EXPECT_EQ(pCmdQ->taskLevel + 1, csr.peekTaskLevel()); } HWTEST_F(EnqueueKernelTest, WhenEnqueingKernelThenCommandsAreAdded) { auto usedCmdBufferBefore = pCS->getUsed(); callOneWorkItemNDRKernel(); EXPECT_NE(usedCmdBufferBefore, pCS->getUsed()); } HWTEST_F(EnqueueKernelTest, GivenGpuHangAndBlockingCallWhenEnqueingKernelThenOutOfResourcesIsReported) { DebugManagerStateRestore stateRestore; DebugManager.flags.MakeEachEnqueueBlocking.set(true); std::unique_ptr device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment(nullptr)}); cl_queue_properties props = {}; MockCommandQueueHw mockCommandQueueHw(context, device.get(), &props); mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang; cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {1, 1, 1}; size_t localWorkSize[3] = {1, 1, 1}; cl_event *eventWaitList = nullptr; cl_int waitListSize = 0; const auto enqueueResult = mockCommandQueueHw.enqueueKernel( pKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, waitListSize, eventWaitList, nullptr); EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueResult); EXPECT_EQ(1, mockCommandQueueHw.waitForAllEnginesCalledCount); } HWTEST_F(EnqueueKernelTest, WhenEnqueingKernelThenIndirectDataIsAdded) { auto dshBefore = pDSH->getUsed(); auto iohBefore = pIOH->getUsed(); auto sshBefore = pSSH->getUsed(); callOneWorkItemNDRKernel(); EXPECT_TRUE(UnitTestHelper::evaluateDshUsage(dshBefore, pDSH->getUsed(), &pKernel->getKernelInfo().kernelDescriptor, rootDeviceIndex)); EXPECT_NE(iohBefore, pIOH->getUsed()); if (pKernel->usesBindfulAddressingForBuffers() || pKernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.usesImages) { EXPECT_NE(sshBefore, pSSH->getUsed()); } } TEST_F(EnqueueKernelTest, GivenKernelWithBuiltinDispatchInfoBuilderWhenBeingDispatchedThenBuiltinDispatcherIsUsedForDispatchValidation) { struct MockBuiltinDispatchBuilder : BuiltinDispatchInfoBuilder { using BuiltinDispatchInfoBuilder::BuiltinDispatchInfoBuilder; cl_int validateDispatch(Kernel *kernel, uint32_t inworkDim, const Vec3 &gws, const Vec3 &elws, const Vec3 &offset) const override { receivedKernel = kernel; receivedWorkDim = inworkDim; receivedGws = gws; receivedElws = elws; receivedOffset = offset; wasValidateDispatchCalled = true; return valueToReturn; } cl_int valueToReturn = CL_SUCCESS; mutable Kernel *receivedKernel = nullptr; mutable uint32_t receivedWorkDim = 0; mutable Vec3 receivedGws = {0, 0, 0}; mutable Vec3 receivedElws = {0, 0, 0}; mutable Vec3 receivedOffset = {0, 0, 0}; mutable bool wasValidateDispatchCalled = false; }; MockBuiltinDispatchBuilder mockNuiltinDispatchBuilder(*pCmdQ->getDevice().getBuiltIns(), pCmdQ->getClDevice()); MockKernelWithInternals mockKernel(*pClDevice); mockKernel.kernelInfo.builtinDispatchBuilder = &mockNuiltinDispatchBuilder; EXPECT_FALSE(mockNuiltinDispatchBuilder.wasValidateDispatchCalled); mockNuiltinDispatchBuilder.valueToReturn = CL_SUCCESS; size_t gws[2] = {10, 1}; size_t lws[2] = {5, 1}; size_t off[2] = {7, 0}; uint32_t dim = 1; auto ret = pCmdQ->enqueueKernel(mockKernel.mockKernel, dim, off, gws, lws, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_TRUE(mockNuiltinDispatchBuilder.wasValidateDispatchCalled); EXPECT_EQ(mockKernel.mockKernel, mockNuiltinDispatchBuilder.receivedKernel); EXPECT_EQ(gws[0], mockNuiltinDispatchBuilder.receivedGws.x); EXPECT_EQ(lws[0], mockNuiltinDispatchBuilder.receivedElws.x); EXPECT_EQ(off[0], mockNuiltinDispatchBuilder.receivedOffset.x); EXPECT_EQ(dim, mockNuiltinDispatchBuilder.receivedWorkDim); mockNuiltinDispatchBuilder.wasValidateDispatchCalled = false; gws[0] = 26; lws[0] = 13; off[0] = 17; dim = 2; cl_int forcedErr = 37; mockNuiltinDispatchBuilder.valueToReturn = forcedErr; ret = pCmdQ->enqueueKernel(mockKernel.mockKernel, dim, off, gws, lws, 0, nullptr, nullptr); EXPECT_EQ(forcedErr, ret); EXPECT_TRUE(mockNuiltinDispatchBuilder.wasValidateDispatchCalled); EXPECT_EQ(mockKernel.mockKernel, mockNuiltinDispatchBuilder.receivedKernel); EXPECT_EQ(gws[0], mockNuiltinDispatchBuilder.receivedGws.x); EXPECT_EQ(lws[0], mockNuiltinDispatchBuilder.receivedElws.x); EXPECT_EQ(off[0], mockNuiltinDispatchBuilder.receivedOffset.x); EXPECT_EQ(dim, mockNuiltinDispatchBuilder.receivedWorkDim); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueKernelTest, givenSecondEnqueueWithTheSameScratchRequirementWhenPreemptionIsEnabledThenDontProgramMVSAgain) { typedef typename FamilyType::MEDIA_VFE_STATE MEDIA_VFE_STATE; pDevice->setPreemptionMode(PreemptionMode::ThreadGroup); auto &csr = pDevice->getGpgpuCommandStreamReceiver(); csr.getMemoryManager()->setForce32BitAllocations(false); ClHardwareParse hwParser; size_t off[3] = {0, 0, 0}; size_t gws[3] = {1, 1, 1}; uint32_t scratchSize = 4096u; MockKernelWithInternals mockKernel(*pClDevice); mockKernel.kernelInfo.setPerThreadScratchSize(scratchSize, 0); auto sizeToProgram = PreambleHelper::getScratchSizeValueToProgramMediaVfeState(scratchSize); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); hwParser.parseCommands(*pCmdQ); // All state should be programmed before walker auto itorCmd = find(hwParser.itorPipelineSelect, hwParser.itorWalker); ASSERT_NE(hwParser.itorWalker, itorCmd); auto *cmd = (MEDIA_VFE_STATE *)*itorCmd; EXPECT_EQ(sizeToProgram, cmd->getPerThreadScratchSpace()); EXPECT_EQ(sizeToProgram, cmd->getStackSize()); auto scratchAlloc = csr.getScratchAllocation(); auto itorfirstBBEnd = find(hwParser.itorWalker, hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), itorfirstBBEnd); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); hwParser.parseCommands(*pCmdQ); itorCmd = find(itorfirstBBEnd, hwParser.cmdList.end()); ASSERT_EQ(hwParser.cmdList.end(), itorCmd); EXPECT_EQ(csr.getScratchAllocation(), scratchAlloc); } HWTEST_F(EnqueueKernelTest, whenEnqueueingKernelThatRequirePrivateScratchThenPrivateScratchIsSetInCommandStreamReceviver) { pDevice->setPreemptionMode(PreemptionMode::ThreadGroup); auto &csr = pDevice->getUltCommandStreamReceiver(); csr.getMemoryManager()->setForce32BitAllocations(false); size_t off[3] = {0, 0, 0}; size_t gws[3] = {1, 1, 1}; uint32_t privateScratchSize = 4096u; MockKernelWithInternals mockKernel(*pClDevice); mockKernel.kernelInfo.setPerThreadScratchSize(privateScratchSize, 1); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(privateScratchSize, csr.requiredPrivateScratchSize); } HWTEST_F(EnqueueKernelTest, whenEnqueueKernelWithNoStatelessWriteWhenSbaIsBeingProgrammedThenConstPolicyIsChoosen) { auto &csr = pDevice->getUltCommandStreamReceiver(); size_t off[3] = {0, 0, 0}; size_t gws[3] = {1, 1, 1}; MockKernelWithInternals mockKernel(*pClDevice); mockKernel.mockKernel->containsStatelessWrites = false; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(csr.recordedDispatchFlags.l3CacheSettings, L3CachingSettings::l3AndL1On); auto &helper = HwHelper::get(renderCoreFamily); auto gmmHelper = this->pDevice->getGmmHelper(); auto expectedMocsIndex = helper.getMocsIndex(*gmmHelper, true, true); EXPECT_EQ(expectedMocsIndex, csr.latestSentStatelessMocsConfig); } HWTEST_F(EnqueueKernelTest, whenEnqueueKernelWithNoStatelessWriteOnBlockedCodePathWhenSbaIsBeingProgrammedThenConstPolicyIsChoosen) { auto &csr = pDevice->getUltCommandStreamReceiver(); size_t off[3] = {0, 0, 0}; size_t gws[3] = {1, 1, 1}; auto userEvent = clCreateUserEvent(this->context, nullptr); MockKernelWithInternals mockKernel(*pClDevice); mockKernel.mockKernel->containsStatelessWrites = false; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 1, &userEvent, nullptr); clSetUserEventStatus(userEvent, 0u); EXPECT_EQ(csr.recordedDispatchFlags.l3CacheSettings, L3CachingSettings::l3AndL1On); auto &helper = HwHelper::get(renderCoreFamily); auto gmmHelper = this->pDevice->getGmmHelper(); auto expectedMocsIndex = helper.getMocsIndex(*gmmHelper, true, true); EXPECT_EQ(expectedMocsIndex, csr.latestSentStatelessMocsConfig); clReleaseEvent(userEvent); } HWTEST_F(EnqueueKernelTest, givenEnqueueWithGlobalWorkSizeWhenZeroValueIsPassedInDimensionThenTheKernelCommandWillTriviallySucceed) { size_t gws[3] = {0, 0, 0}; MockKernelWithInternals mockKernel(*pClDevice); auto ret = pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, ret); } HWTEST_F(EnqueueKernelTest, givenGpuHangAndBlockingCallAndEnqueueWithGlobalWorkSizeWhenZeroValueIsPassedInDimensionThenOutOfResourcesIsReturned) { DebugManagerStateRestore stateRestore; DebugManager.flags.MakeEachEnqueueBlocking.set(true); std::unique_ptr device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment(nullptr)}); cl_queue_properties props = {}; MockCommandQueueHw mockCommandQueueHw(context, device.get(), &props); mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang; size_t gws[3] = {0, 0, 0}; MockKernelWithInternals mockKernel(*pClDevice); const auto enqueueResult = mockCommandQueueHw.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueResult); EXPECT_EQ(1, mockCommandQueueHw.waitForAllEnginesCalledCount); } HWTEST_F(EnqueueKernelTest, givenCommandStreamReceiverInBatchingModeWhenEnqueueKernelIsCalledThenKernelIsRecorded) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pClDevice, context); size_t gws[3] = {1, 0, 0}; auto ret = pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_FALSE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); auto cmdBuffer = mockedSubmissionsAggregator->peekCmdBufferList().peekHead(); //Three more surfaces from preemptionAllocation, SipKernel and clearColorAllocation size_t csrSurfaceCount = (pDevice->getPreemptionMode() == PreemptionMode::MidThread) ? 2 : 0; csrSurfaceCount -= pDevice->getHardwareInfo().capabilityTable.supportsImages ? 0 : 1; size_t timestampPacketSurfacesCount = mockCsr->peekTimestampPacketWriteEnabled() ? 1 : 0; size_t fenceSurfaceCount = mockCsr->globalFenceAllocation ? 1 : 0; size_t clearColorSize = mockCsr->clearColorAllocation ? 1 : 0; EXPECT_EQ(0, mockCsr->flushCalledCount); EXPECT_EQ(5u + csrSurfaceCount + timestampPacketSurfacesCount + fenceSurfaceCount + clearColorSize, cmdBuffer->surfaces.size()); } HWTEST_F(EnqueueKernelTest, givenReducedAddressSpaceGraphicsAllocationForHostPtrWithL3FlushRequiredWhenEnqueueKernelIsCalledThenFlushIsCalledForReducedAddressSpacePlatforms) { std::unique_ptr device; std::unique_ptr cmdQ; auto hwInfoToModify = *defaultHwInfo; hwInfoToModify.capabilityTable.gpuAddressSpace = MemoryConstants::max36BitAddress; device.reset(new MockClDevice{MockDevice::createWithNewExecutionEnvironment(&hwInfoToModify)}); auto mockCsr = new MockCsrHw2(*device->executionEnvironment, device->getRootDeviceIndex(), device->getDeviceBitfield()); device->resetCommandStreamReceiver(mockCsr); auto memoryManager = mockCsr->getMemoryManager(); uint32_t hostPtr[10]{}; AllocationProperties properties{device->getRootDeviceIndex(), false, 1, AllocationType::EXTERNAL_HOST_PTR, false, device->getDeviceBitfield()}; properties.flags.flushL3RequiredForRead = properties.flags.flushL3RequiredForWrite = true; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties, hostPtr); MockKernelWithInternals mockKernel(*device, context); size_t gws[3] = {1, 0, 0}; mockCsr->makeResident(*allocation); cmdQ.reset(createCommandQueue(device.get())); auto ret = cmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_TRUE(mockCsr->passedDispatchFlags.dcFlush); memoryManager->freeGraphicsMemory(allocation); } HWTEST_F(EnqueueKernelTest, givenReducedAddressSpaceGraphicsAllocationForHostPtrWithL3FlushUnrequiredWhenEnqueueKernelIsCalledThenFlushIsNotForcedByGraphicsAllocation) { std::unique_ptr device; std::unique_ptr cmdQ; auto hwInfoToModify = *defaultHwInfo; hwInfoToModify.capabilityTable.gpuAddressSpace = MemoryConstants::max36BitAddress; device.reset(new MockClDevice{MockDevice::createWithNewExecutionEnvironment(&hwInfoToModify)}); auto mockCsr = new MockCsrHw2(*device->executionEnvironment, device->getRootDeviceIndex(), device->getDeviceBitfield()); device->resetCommandStreamReceiver(mockCsr); auto memoryManager = mockCsr->getMemoryManager(); uint32_t hostPtr[10]{}; AllocationProperties properties{device->getRootDeviceIndex(), false, 1, AllocationType::EXTERNAL_HOST_PTR, false, device->getDeviceBitfield()}; properties.flags.flushL3RequiredForRead = properties.flags.flushL3RequiredForWrite = false; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties, hostPtr); MockKernelWithInternals mockKernel(*device, context); size_t gws[3] = {1, 0, 0}; mockCsr->makeResident(*allocation); cmdQ.reset(createCommandQueue(device.get())); auto ret = cmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_FALSE(mockCsr->passedDispatchFlags.dcFlush); memoryManager->freeGraphicsMemory(allocation); } HWTEST_F(EnqueueKernelTest, givenFullAddressSpaceGraphicsAllocationWhenEnqueueKernelIsCalledThenFlushIsNotForcedByGraphicsAllocation) { HardwareInfo hwInfoToModify; std::unique_ptr device; std::unique_ptr cmdQ; hwInfoToModify = *defaultHwInfo; hwInfoToModify.capabilityTable.gpuAddressSpace = MemoryConstants::max48BitAddress; device.reset(new MockClDevice{MockDevice::createWithNewExecutionEnvironment(&hwInfoToModify)}); auto mockCsr = new MockCsrHw2(*device->executionEnvironment, device->getRootDeviceIndex(), device->getDeviceBitfield()); device->resetCommandStreamReceiver(mockCsr); auto memoryManager = mockCsr->getMemoryManager(); uint32_t hostPtr[10]{}; AllocationProperties properties{device->getRootDeviceIndex(), false, 1, AllocationType::EXTERNAL_HOST_PTR, false, device->getDeviceBitfield()}; properties.flags.flushL3RequiredForRead = properties.flags.flushL3RequiredForWrite = false; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties, hostPtr); MockKernelWithInternals mockKernel(*device, context); size_t gws[3] = {1, 0, 0}; mockCsr->makeResident(*allocation); cmdQ.reset(createCommandQueue(device.get())); auto ret = cmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_FALSE(mockCsr->passedDispatchFlags.dcFlush); memoryManager->freeGraphicsMemory(allocation); properties.flags.flushL3RequiredForRead = properties.flags.flushL3RequiredForWrite = false; allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties, hostPtr); mockCsr->makeResident(*allocation); cmdQ.reset(createCommandQueue(device.get())); ret = cmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_FALSE(mockCsr->passedDispatchFlags.dcFlush); memoryManager->freeGraphicsMemory(allocation); } HWTEST_F(EnqueueKernelTest, givenDefaultCommandStreamReceiverWhenClFlushIsCalledThenSuccessIsReturned) { MockKernelWithInternals mockKernel(*pClDevice); size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); auto ret = clFlush(pCmdQ); EXPECT_EQ(CL_SUCCESS, ret); } HWTEST_F(EnqueueKernelTest, givenCommandStreamReceiverInBatchingModeAndBatchedKernelWhenFlushIsCalledThenKernelIsSubmitted) { auto mockCsrmockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); mockCsrmockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); mockCsrmockCsr->useNewResourceImplicitFlush = false; mockCsrmockCsr->useGpuIdleImplicitFlush = false; pDevice->resetCommandStreamReceiver(mockCsrmockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsrmockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pClDevice); size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_FALSE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); auto ret = clFlush(pCmdQ); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_TRUE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); EXPECT_EQ(1, mockCsrmockCsr->flushCalledCount); } HWTEST_F(EnqueueKernelTest, givenCommandStreamReceiverInBatchingModeAndBatchedKernelWhenFlushIsCalledTwiceThenNothingChanges) { auto mockCsrmockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); mockCsrmockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsrmockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsrmockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pClDevice); size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); auto ret = clFlush(pCmdQ); EXPECT_EQ(CL_SUCCESS, ret); ret = clFlush(pCmdQ); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_TRUE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); EXPECT_EQ(1, mockCsrmockCsr->flushCalledCount); } HWTEST_F(EnqueueKernelTest, givenCommandStreamReceiverInBatchingModeWhenKernelIsEnqueuedTwiceThenTwoSubmissionsAreRecorded) { auto &mockCsrmockCsr = pDevice->getUltCommandStreamReceiver(); mockCsrmockCsr.overrideDispatchPolicy(DispatchMode::BatchedDispatch); mockCsrmockCsr.useNewResourceImplicitFlush = false; mockCsrmockCsr.useGpuIdleImplicitFlush = false; auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsrmockCsr.submissionAggregator.reset(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pClDevice, context); size_t gws[3] = {1, 0, 0}; //make sure csr emits something mockCsrmockCsr.mediaVfeStateDirty = true; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); mockCsrmockCsr.mediaVfeStateDirty = true; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_FALSE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); auto &cmdBufferList = mockedSubmissionsAggregator->peekCmdBufferList(); EXPECT_NE(nullptr, cmdBufferList.peekHead()); EXPECT_NE(cmdBufferList.peekTail(), cmdBufferList.peekHead()); auto cmdBuffer1 = cmdBufferList.peekHead(); auto cmdBuffer2 = cmdBufferList.peekTail(); EXPECT_EQ(cmdBuffer1->surfaces.size(), cmdBuffer2->surfaces.size()); EXPECT_EQ(cmdBuffer1->batchBuffer.commandBufferAllocation, cmdBuffer2->batchBuffer.commandBufferAllocation); EXPECT_GT(cmdBuffer2->batchBuffer.startOffset, cmdBuffer1->batchBuffer.startOffset); } HWTEST_F(EnqueueKernelTest, givenCommandStreamReceiverInBatchingModeWhenFlushIsCalledOnTwoBatchedKernelsThenTheyAreExecutedInOrder) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; pDevice->resetCommandStreamReceiver(mockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pClDevice); size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); pCmdQ->flush(); EXPECT_TRUE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); EXPECT_EQ(1, mockCsr->flushCalledCount); } HWCMDTEST_F(IGFX_XE_HP_CORE, EnqueueKernelTest, givenTwoEnqueueProgrammedWithinSameCommandBufferWhenBatchedThenNoBBSBetweenThem) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; pDevice->resetCommandStreamReceiver(mockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); ClHardwareParse hwParse; MockKernelWithInternals mockKernel(*pClDevice); size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); pCmdQ->flush(); hwParse.parseCommands(*pCmdQ); auto bbsCommands = findAll(hwParse.cmdList.begin(), hwParse.cmdList.end()); EXPECT_EQ(bbsCommands.size(), 1u); } HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenFinishIsCalledThenBatchesSubmissionsAreFlushed) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; pDevice->resetCommandStreamReceiver(mockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pClDevice); size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); pCmdQ->finish(); EXPECT_TRUE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); EXPECT_EQ(1, mockCsr->flushCalledCount); } HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenThressEnqueueKernelsAreCalledThenBatchesSubmissionsAreFlushed) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; pDevice->resetCommandStreamReceiver(mockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pClDevice); size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); pCmdQ->finish(); EXPECT_TRUE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); EXPECT_EQ(1, mockCsr->flushCalledCount); } HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenWaitForEventsIsCalledThenBatchedSubmissionsAreFlushed) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; pDevice->resetCommandStreamReceiver(mockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pClDevice); size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); cl_event event; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &event); auto status = clWaitForEvents(1, &event); EXPECT_EQ(CL_SUCCESS, status); EXPECT_TRUE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); EXPECT_EQ(1, mockCsr->flushCalledCount); status = clReleaseEvent(event); EXPECT_EQ(CL_SUCCESS, status); } HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenCommandIsFlushedThenFlushStampIsUpdatedInCommandQueueCsrAndEvent) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; pDevice->resetCommandStreamReceiver(mockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pClDevice); size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); cl_event event; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &event); auto neoEvent = castToObject(event); EXPECT_EQ(0u, mockCsr->flushStamp->peekStamp()); EXPECT_EQ(0u, neoEvent->flushStamp->peekStamp()); EXPECT_EQ(0u, pCmdQ->flushStamp->peekStamp()); auto status = clWaitForEvents(1, &event); EXPECT_EQ(1, neoEvent->getRefInternalCount()); EXPECT_EQ(1u, mockCsr->flushStamp->peekStamp()); EXPECT_EQ(1u, neoEvent->flushStamp->peekStamp()); EXPECT_EQ(1u, pCmdQ->flushStamp->peekStamp()); status = clFinish(pCmdQ); EXPECT_EQ(1u, pCmdQ->flushStamp->peekStamp()); status = clReleaseEvent(event); } HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenNonBlockingMapFollowsNdrCallThenFlushStampIsUpdatedProperly) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); EXPECT_TRUE(this->destBuffer->isMemObjZeroCopy()); MockKernelWithInternals mockKernel(*pClDevice); size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); cl_event event; pCmdQ->enqueueMapBuffer(this->destBuffer, false, CL_MAP_READ, 0u, 1u, 0, nullptr, &event, this->retVal); pCmdQ->flush(); auto neoEvent = castToObject(event); EXPECT_EQ(1u, neoEvent->flushStamp->peekStamp()); clReleaseEvent(event); } HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenCommandWithEventIsFollowedByCommandWithoutEventThenFlushStampIsUpdatedInCommandQueueCsrAndEvent) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; pDevice->resetCommandStreamReceiver(mockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pClDevice); size_t gws[3] = {1, 0, 0}; cl_event event; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &event); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); auto neoEvent = castToObject(event); EXPECT_EQ(0u, mockCsr->flushStamp->peekStamp()); EXPECT_EQ(0u, neoEvent->flushStamp->peekStamp()); EXPECT_EQ(0u, pCmdQ->flushStamp->peekStamp()); auto status = clWaitForEvents(1, &event); EXPECT_EQ(1, neoEvent->getRefInternalCount()); EXPECT_EQ(1u, mockCsr->flushStamp->peekStamp()); EXPECT_EQ(1u, neoEvent->flushStamp->peekStamp()); EXPECT_EQ(1u, pCmdQ->flushStamp->peekStamp()); status = clFinish(pCmdQ); EXPECT_EQ(1u, pCmdQ->flushStamp->peekStamp()); status = clReleaseEvent(event); } HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenClFlushIsCalledThenQueueFlushStampIsUpdated) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; pDevice->resetCommandStreamReceiver(mockCsr); MockKernelWithInternals mockKernel(*pClDevice); size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(0u, mockCsr->flushStamp->peekStamp()); EXPECT_EQ(0u, pCmdQ->flushStamp->peekStamp()); clFlush(pCmdQ); EXPECT_EQ(1u, mockCsr->flushStamp->peekStamp()); EXPECT_EQ(1u, pCmdQ->flushStamp->peekStamp()); } HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenWaitForEventsIsCalledWithUnflushedTaskCountThenBatchedSubmissionsAreFlushed) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pClDevice); size_t gws[3] = {1, 0, 0}; cl_event event; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); pCmdQ->enqueueMarkerWithWaitList(0, nullptr, &event); auto status = clWaitForEvents(1, &event); EXPECT_EQ(CL_SUCCESS, status); EXPECT_TRUE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); EXPECT_EQ(1, mockCsr->flushCalledCount); status = clReleaseEvent(event); EXPECT_EQ(CL_SUCCESS, status); } HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenFinishIsCalledWithUnflushedTaskCountThenBatchedSubmissionsAreFlushed) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pClDevice); size_t gws[3] = {1, 0, 0}; cl_event event; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); pCmdQ->enqueueMarkerWithWaitList(0, nullptr, &event); auto status = clFinish(pCmdQ); EXPECT_EQ(CL_SUCCESS, status); EXPECT_TRUE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); EXPECT_EQ(1, mockCsr->flushCalledCount); status = clReleaseEvent(event); EXPECT_EQ(CL_SUCCESS, status); } HWTEST_F(EnqueueKernelTest, givenOutOfOrderCommandQueueWhenEnqueueKernelIsMadeThenPipeControlPositionIsRecorded) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0}; auto ooq = clCreateCommandQueueWithProperties(context, pClDevice, props, nullptr); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; pDevice->resetCommandStreamReceiver(mockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pClDevice, context); size_t gws[3] = {1, 0, 0}; clEnqueueNDRangeKernel(ooq, mockKernel.mockMultiDeviceKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_FALSE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); auto cmdBuffer = mockedSubmissionsAggregator->peekCmdBufferList().peekHead(); EXPECT_NE(nullptr, cmdBuffer->pipeControlThatMayBeErasedLocation); clReleaseCommandQueue(ooq); } HWTEST_F(EnqueueKernelTest, givenInOrderCommandQueueWhenEnqueueKernelIsMadeThenPipeControlPositionIsRecorded) { const cl_queue_properties props[] = {0}; auto inOrderQueue = clCreateCommandQueueWithProperties(context, pClDevice, props, nullptr); auto &mockCsr = pDevice->getUltCommandStreamReceiver(); mockCsr.overrideDispatchPolicy(DispatchMode::BatchedDispatch); mockCsr.useNewResourceImplicitFlush = false; mockCsr.useGpuIdleImplicitFlush = false; auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr.submissionAggregator.reset(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pClDevice, context); size_t gws[3] = {1, 0, 0}; clEnqueueNDRangeKernel(inOrderQueue, mockKernel.mockMultiDeviceKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_FALSE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); auto cmdBuffer = mockedSubmissionsAggregator->peekCmdBufferList().peekHead(); EXPECT_NE(nullptr, cmdBuffer->pipeControlThatMayBeErasedLocation); clReleaseCommandQueue(inOrderQueue); } HWTEST_F(EnqueueKernelTest, givenInOrderCommandQueueWhenEnqueueKernelThatHasSharedObjectsAsArgIsMadeThenPipeControlPositionIsRecorded) { const cl_queue_properties props[] = {0}; auto inOrderQueue = clCreateCommandQueueWithProperties(context, pClDevice, props, nullptr); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; pDevice->resetCommandStreamReceiver(mockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pClDevice, context); size_t gws[3] = {1, 0, 0}; mockKernel.mockKernel->setUsingSharedArgs(true); clEnqueueNDRangeKernel(inOrderQueue, mockKernel.mockMultiDeviceKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_FALSE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); auto cmdBuffer = mockedSubmissionsAggregator->peekCmdBufferList().peekHead(); EXPECT_NE(nullptr, cmdBuffer->pipeControlThatMayBeErasedLocation); EXPECT_NE(nullptr, cmdBuffer->epiloguePipeControlLocation); clReleaseCommandQueue(inOrderQueue); } HWTEST_F(EnqueueKernelTest, givenInOrderCommandQueueWhenEnqueueKernelThatHasSharedObjectsAsArgIsMadeThenPipeControlDoesntHaveDcFlush) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); MockKernelWithInternals mockKernel(*pClDevice, context); size_t gws[3] = {1, 0, 0}; mockKernel.mockKernel->setUsingSharedArgs(true); clEnqueueNDRangeKernel(this->pCmdQ, mockKernel.mockMultiDeviceKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_FALSE(mockCsr->passedDispatchFlags.dcFlush); } HWTEST_F(EnqueueKernelTest, givenInOrderCommandQueueWhenEnqueueKernelReturningEventIsMadeThenPipeControlPositionIsNotRecorded) { const cl_queue_properties props[] = {0}; auto inOrderQueue = clCreateCommandQueueWithProperties(context, pClDevice, props, nullptr); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->timestampPacketWriteEnabled = false; pDevice->resetCommandStreamReceiver(mockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pClDevice, context); size_t gws[3] = {1, 0, 0}; cl_event event; clEnqueueNDRangeKernel(inOrderQueue, mockKernel.mockMultiDeviceKernel, 1, nullptr, gws, nullptr, 0, nullptr, &event); EXPECT_FALSE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); auto cmdBuffer = mockedSubmissionsAggregator->peekCmdBufferList().peekHead(); EXPECT_EQ(nullptr, cmdBuffer->pipeControlThatMayBeErasedLocation); EXPECT_NE(nullptr, cmdBuffer->epiloguePipeControlLocation); clReleaseCommandQueue(inOrderQueue); clReleaseEvent(event); } HWTEST_F(EnqueueKernelTest, givenInOrderCommandQueueWhenEnqueueKernelReturningEventIsMadeAndCommandStreamReceiverIsInNTo1ModeThenPipeControlPositionIsRecorded) { const cl_queue_properties props[] = {0}; auto inOrderQueue = clCreateCommandQueueWithProperties(context, pClDevice, props, nullptr); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->enableNTo1SubmissionModel(); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pClDevice, context); size_t gws[3] = {1, 0, 0}; cl_event event; clEnqueueNDRangeKernel(inOrderQueue, mockKernel.mockMultiDeviceKernel, 1, nullptr, gws, nullptr, 0, nullptr, &event); EXPECT_FALSE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); auto cmdBuffer = mockedSubmissionsAggregator->peekCmdBufferList().peekHead(); EXPECT_NE(nullptr, cmdBuffer->pipeControlThatMayBeErasedLocation); EXPECT_NE(nullptr, cmdBuffer->epiloguePipeControlLocation); clReleaseCommandQueue(inOrderQueue); clReleaseEvent(event); } HWTEST_F(EnqueueKernelTest, givenOutOfOrderCommandQueueWhenEnqueueKernelReturningEventIsMadeThenPipeControlPositionIsRecorded) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; pDevice->resetCommandStreamReceiver(mockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0}; auto inOrderQueue = clCreateCommandQueueWithProperties(context, pClDevice, props, nullptr); MockKernelWithInternals mockKernel(*pClDevice, context); size_t gws[3] = {1, 0, 0}; cl_event event; clEnqueueNDRangeKernel(inOrderQueue, mockKernel.mockMultiDeviceKernel, 1, nullptr, gws, nullptr, 0, nullptr, &event); EXPECT_FALSE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); auto cmdBuffer = mockedSubmissionsAggregator->peekCmdBufferList().peekHead(); EXPECT_NE(nullptr, cmdBuffer->pipeControlThatMayBeErasedLocation); EXPECT_EQ(cmdBuffer->epiloguePipeControlLocation, cmdBuffer->pipeControlThatMayBeErasedLocation); clReleaseCommandQueue(inOrderQueue); clReleaseEvent(event); } HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenBlockingCallIsMadeThenEventAssociatedWithCommandHasProperFlushStamp) { DebugManagerStateRestore stateRestore; DebugManager.flags.MakeEachEnqueueBlocking.set(true); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); MockKernelWithInternals mockKernel(*pClDevice); size_t gws[3] = {1, 0, 0}; cl_event event; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &event); auto neoEvent = castToObject(event); EXPECT_EQ(1u, neoEvent->flushStamp->peekStamp()); EXPECT_EQ(1, mockCsr->flushCalledCount); auto status = clReleaseEvent(event); EXPECT_EQ(CL_SUCCESS, status); } HWTEST_F(EnqueueKernelTest, givenKernelWhenItIsEnqueuedThenAllResourceGraphicsAllocationsAreUpdatedWithCsrTaskCount) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); MockKernelWithInternals mockKernel(*pClDevice); size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(1, mockCsr->flushCalledCount); auto csrTaskCount = mockCsr->peekTaskCount(); auto &passedAllocationPack = mockCsr->copyOfAllocations; for (auto &allocation : passedAllocationPack) { EXPECT_EQ(csrTaskCount, allocation->getTaskCount(mockCsr->getOsContext().getContextId())); } } HWTEST_F(EnqueueKernelTest, givenKernelWhenItIsSubmittedFromTwoDifferentCommandQueuesThenCsrDoesntReloadAnyCommands) { auto &csr = this->pDevice->getUltCommandStreamReceiver(); MockKernelWithInternals mockKernel(*pClDevice); size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); auto currentUsed = csr.commandStream.getUsed(); const cl_queue_properties props[] = {0}; auto inOrderQueue = clCreateCommandQueueWithProperties(context, pClDevice, props, nullptr); clEnqueueNDRangeKernel(inOrderQueue, mockKernel.mockMultiDeviceKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); auto usedAfterSubmission = csr.commandStream.getUsed(); EXPECT_EQ(usedAfterSubmission, currentUsed); clReleaseCommandQueue(inOrderQueue); } TEST_F(EnqueueKernelTest, givenKernelWhenAllArgsAreNotAndEventExistSetThenClEnqueueNDRangeKernelReturnsInvalidKernelArgsAndSetEventToNull) { const size_t n = 512; size_t globalWorkSize[3] = {n, 1, 1}; size_t localWorkSize[3] = {256, 1, 1}; cl_int retVal = CL_SUCCESS; CommandQueue *pCmdQ2 = createCommandQueue(pClDevice); std::unique_ptr pMultiDeviceKernel(MultiDeviceKernel::create(pProgram, pProgram->getKernelInfosForKernel("CopyBuffer"), &retVal)); auto kernel = pMultiDeviceKernel->getKernel(rootDeviceIndex); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(kernel->isPatched()); cl_event event; retVal = clEnqueueNDRangeKernel(pCmdQ2, pMultiDeviceKernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, &event); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, retVal); clFlush(pCmdQ2); clReleaseCommandQueue(pCmdQ2); } TEST_F(EnqueueKernelTest, givenEnqueueCommandThatLwsExceedsDeviceCapabilitiesWhenEnqueueNDRangeKernelIsCalledThenErrorIsReturned) { MockKernelWithInternals mockKernel(*pClDevice); mockKernel.mockKernel->maxKernelWorkGroupSize = static_cast(pDevice->getDeviceInfo().maxWorkGroupSize / 2); auto maxKernelWorkgroupSize = mockKernel.mockKernel->maxKernelWorkGroupSize; size_t globalWorkSize[3] = {maxKernelWorkgroupSize + 1, 1, 1}; size_t localWorkSize[3] = {maxKernelWorkgroupSize + 1, 1, 1}; auto status = pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_WORK_GROUP_SIZE, status); } TEST_F(EnqueueKernelTest, givenEnqueueCommandThatLocalWorkgroupSizeContainsZeroWhenEnqueueNDRangeKernelIsCalledThenClInvalidWorkGroupSizeIsReturned) { size_t globalWorkSize[3] = {1, 1, 1}; size_t localWorkSize[3] = {1, 0, 1}; MockKernelWithInternals mockKernel(*pClDevice); auto status = pCmdQ->enqueueKernel(mockKernel.mockKernel, 3, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_WORK_GROUP_SIZE, status); } TEST_F(EnqueueKernelTest, givenEnqueueCommandWithWorkDimLargerThanAllowedWhenEnqueueNDRangeKernelIsCalledThenClInvalidWorkDimensionIsReturned) { size_t globalWorkSize[3] = {1, 1, 1}; size_t localWorkSize[3] = {1, 1, 1}; MockKernelWithInternals mockKernel(*pClDevice); auto testedWorkDim = pClDevice->deviceInfo.maxWorkItemDimensions; auto status = clEnqueueNDRangeKernel(pCmdQ, mockKernel.mockMultiDeviceKernel, testedWorkDim, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, status); testedWorkDim += 1; status = clEnqueueNDRangeKernel(pCmdQ, mockKernel.mockMultiDeviceKernel, testedWorkDim, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_WORK_DIMENSION, status); } HWTEST_F(EnqueueKernelTest, givenVMEKernelWhenEnqueueKernelThenDispatchFlagsHaveMediaSamplerRequired) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); MockKernelWithInternals mockKernel(*pClDevice, context); size_t gws[3] = {1, 0, 0}; mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.flags.usesVme = true; clEnqueueNDRangeKernel(this->pCmdQ, mockKernel.mockMultiDeviceKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_TRUE(mockCsr->passedDispatchFlags.pipelineSelectArgs.mediaSamplerRequired); } HWTEST_F(EnqueueKernelTest, givenUseGlobalAtomicsSetWhenEnqueueKernelThenDispatchFlagsUseGlobalAtomicsIsSet) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); MockKernelWithInternals mockKernel(*pClDevice, context); size_t gws[3] = {1, 0, 0}; mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics = true; clEnqueueNDRangeKernel(this->pCmdQ, mockKernel.mockMultiDeviceKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_TRUE(mockCsr->passedDispatchFlags.useGlobalAtomics); } HWTEST_F(EnqueueKernelTest, givenUseGlobalAtomicsIsNotSetWhenEnqueueKernelThenDispatchFlagsUseGlobalAtomicsIsNotSet) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); MockKernelWithInternals mockKernel(*pClDevice, context); size_t gws[3] = {1, 0, 0}; mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics = false; clEnqueueNDRangeKernel(this->pCmdQ, mockKernel.mockMultiDeviceKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_FALSE(mockCsr->passedDispatchFlags.useGlobalAtomics); } HWTEST_F(EnqueueKernelTest, givenContextWithSeveralDevicesWhenEnqueueKernelThenDispatchFlagsHaveCorrectInfoAboutMultipleSubDevicesInContext) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); MockKernelWithInternals mockKernel(*pClDevice, context); size_t gws[3] = {1, 0, 0}; clEnqueueNDRangeKernel(this->pCmdQ, mockKernel.mockMultiDeviceKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_FALSE(mockCsr->passedDispatchFlags.areMultipleSubDevicesInContext); context->deviceBitfields[rootDeviceIndex].set(7, true); clEnqueueNDRangeKernel(this->pCmdQ, mockKernel.mockMultiDeviceKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_TRUE(mockCsr->passedDispatchFlags.areMultipleSubDevicesInContext); context->deviceBitfields[rootDeviceIndex].set(7, false); } HWTEST_F(EnqueueKernelTest, givenNonVMEKernelWhenEnqueueKernelThenDispatchFlagsDoesntHaveMediaSamplerRequired) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); MockKernelWithInternals mockKernel(*pClDevice, context); size_t gws[3] = {1, 0, 0}; mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.flags.usesVme = false; clEnqueueNDRangeKernel(this->pCmdQ, mockKernel.mockMultiDeviceKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_FALSE(mockCsr->passedDispatchFlags.pipelineSelectArgs.mediaSamplerRequired); } HWTEST_F(EnqueueKernelTest, whenEnqueueKernelWithEngineHintsThenEpilogRequiredIsSet) { auto &csr = pDevice->getUltCommandStreamReceiver(); size_t off[3] = {0, 0, 0}; size_t gws[3] = {1, 1, 1}; MockKernelWithInternals mockKernel(*pClDevice); pCmdQ->dispatchHints = 1; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(csr.recordedDispatchFlags.epilogueRequired, true); EXPECT_EQ(csr.recordedDispatchFlags.engineHints, 1u); } struct PauseOnGpuTests : public EnqueueKernelTest { void SetUp() override { EnqueueKernelTest::SetUp(); auto &csr = pDevice->getGpgpuCommandStreamReceiver(); debugPauseStateAddress = csr.getDebugPauseStateGPUAddress(); } template bool verifySemaphore(const GenCmdList::iterator &iterator, uint64_t debugPauseStateAddress, DebugPauseState requiredDebugPauseState) { auto semaphoreCmd = genCmdCast(*iterator); if ((static_cast(requiredDebugPauseState) == semaphoreCmd->getSemaphoreDataDword()) && (debugPauseStateAddress == semaphoreCmd->getSemaphoreGraphicsAddress())) { EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD, semaphoreCmd->getCompareOperation()); EXPECT_EQ(MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE, semaphoreCmd->getWaitMode()); return true; } return false; } template bool verifyPipeControl(const GenCmdList::iterator &iterator, uint64_t debugPauseStateAddress, DebugPauseState requiredDebugPauseState) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; auto pipeControlCmd = genCmdCast(*iterator); if ((static_cast(requiredDebugPauseState) == pipeControlCmd->getImmediateData()) && (debugPauseStateAddress == NEO::UnitTestHelper::getPipeControlPostSyncAddress(*pipeControlCmd))) { EXPECT_TRUE(pipeControlCmd->getCommandStreamerStallEnable()); EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), pipeControlCmd->getDcFlushEnable()); EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pipeControlCmd->getPostSyncOperation()); return true; } return false; } template bool verifyLoadRegImm(const GenCmdList::iterator &iterator) { using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; uint32_t expectedRegisterOffset = DebugManager.flags.GpuScratchRegWriteRegisterOffset.get(); uint32_t expectedRegisterData = DebugManager.flags.GpuScratchRegWriteRegisterData.get(); auto loadRegImm = genCmdCast(*iterator); if ((expectedRegisterOffset == loadRegImm->getRegisterOffset()) && (expectedRegisterData == loadRegImm->getDataDword())) { return true; } return false; } template void findSemaphores(GenCmdList &cmdList) { auto semaphore = find(cmdList.begin(), cmdList.end()); while (semaphore != cmdList.end()) { if (verifySemaphore(semaphore, debugPauseStateAddress, DebugPauseState::hasUserStartConfirmation)) { semaphoreBeforeWalkerFound++; } if (verifySemaphore(semaphore, debugPauseStateAddress, DebugPauseState::hasUserEndConfirmation)) { semaphoreAfterWalkerFound++; } semaphore = find(++semaphore, cmdList.end()); } } template void findPipeControls(GenCmdList &cmdList) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; auto pipeControl = find(cmdList.begin(), cmdList.end()); while (pipeControl != cmdList.end()) { if (verifyPipeControl(pipeControl, debugPauseStateAddress, DebugPauseState::waitingForUserStartConfirmation)) { pipeControlBeforeWalkerFound++; } if (verifyPipeControl(pipeControl, debugPauseStateAddress, DebugPauseState::waitingForUserEndConfirmation)) { pipeControlAfterWalkerFound++; } pipeControl = find(++pipeControl, cmdList.end()); } } template void findLoadRegImms(GenCmdList &cmdList) { using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; auto loadRegImm = find(cmdList.begin(), cmdList.end()); while (loadRegImm != cmdList.end()) { if (verifyLoadRegImm(loadRegImm)) { loadRegImmsFound++; } loadRegImm = find(++loadRegImm, cmdList.end()); } } DebugManagerStateRestore restore; const size_t off[3] = {0, 0, 0}; const size_t gws[3] = {1, 1, 1}; uint64_t debugPauseStateAddress = 0; uint32_t semaphoreBeforeWalkerFound = 0; uint32_t semaphoreAfterWalkerFound = 0; uint32_t pipeControlBeforeWalkerFound = 0; uint32_t pipeControlAfterWalkerFound = 0; uint32_t loadRegImmsFound = 0; }; HWTEST_F(PauseOnGpuTests, givenPauseOnEnqueueFlagSetWhenDispatchWalkersThenInsertPauseCommandsAroundSpecifiedEnqueue) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; DebugManager.flags.PauseOnEnqueue.set(1); MockKernelWithInternals mockKernel(*pClDevice); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); ClHardwareParse hwParser; hwParser.parseCommands(*pCmdQ); findSemaphores(hwParser.cmdList); EXPECT_EQ(1u, semaphoreBeforeWalkerFound); EXPECT_EQ(1u, semaphoreAfterWalkerFound); findPipeControls(hwParser.cmdList); EXPECT_EQ(1u, pipeControlBeforeWalkerFound); EXPECT_EQ(1u, pipeControlAfterWalkerFound); } HWTEST_F(PauseOnGpuTests, givenPauseOnEnqueueFlagSetToMinusTwoWhenDispatchWalkersThenInsertPauseCommandsAroundEachEnqueue) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; DebugManager.flags.PauseOnEnqueue.set(-2); MockKernelWithInternals mockKernel(*pClDevice); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); ClHardwareParse hwParser; hwParser.parseCommands(*pCmdQ); findSemaphores(hwParser.cmdList); findPipeControls(hwParser.cmdList); EXPECT_EQ(2u, semaphoreBeforeWalkerFound); EXPECT_EQ(2u, semaphoreAfterWalkerFound); EXPECT_EQ(2u, pipeControlBeforeWalkerFound); EXPECT_EQ(2u, pipeControlAfterWalkerFound); } HWTEST_F(PauseOnGpuTests, givenPauseModeSetToBeforeOnlyWhenDispatchingThenInsertPauseOnlyBeforeEnqueue) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; DebugManager.flags.PauseOnEnqueue.set(0); DebugManager.flags.PauseOnGpuMode.set(PauseOnGpuProperties::PauseMode::BeforeWorkload); MockKernelWithInternals mockKernel(*pClDevice); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); ClHardwareParse hwParser; hwParser.parseCommands(*pCmdQ); findSemaphores(hwParser.cmdList); findPipeControls(hwParser.cmdList); EXPECT_EQ(1u, semaphoreBeforeWalkerFound); EXPECT_EQ(0u, semaphoreAfterWalkerFound); EXPECT_EQ(1u, pipeControlBeforeWalkerFound); EXPECT_EQ(0u, pipeControlAfterWalkerFound); } HWTEST_F(PauseOnGpuTests, givenPauseModeSetToAfterOnlyWhenDispatchingThenInsertPauseOnlyAfterEnqueue) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; DebugManager.flags.PauseOnEnqueue.set(0); DebugManager.flags.PauseOnGpuMode.set(PauseOnGpuProperties::PauseMode::AfterWorkload); MockKernelWithInternals mockKernel(*pClDevice); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); ClHardwareParse hwParser; hwParser.parseCommands(*pCmdQ); findSemaphores(hwParser.cmdList); findPipeControls(hwParser.cmdList); EXPECT_EQ(0u, semaphoreBeforeWalkerFound); EXPECT_EQ(1u, semaphoreAfterWalkerFound); EXPECT_EQ(0u, pipeControlBeforeWalkerFound); EXPECT_EQ(1u, pipeControlAfterWalkerFound); } HWTEST_F(PauseOnGpuTests, givenPauseModeSetToBeforeAndAfterWhenDispatchingThenInsertPauseAroundEnqueue) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; DebugManager.flags.PauseOnEnqueue.set(0); DebugManager.flags.PauseOnGpuMode.set(PauseOnGpuProperties::PauseMode::BeforeAndAfterWorkload); MockKernelWithInternals mockKernel(*pClDevice); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); ClHardwareParse hwParser; hwParser.parseCommands(*pCmdQ); findSemaphores(hwParser.cmdList); findPipeControls(hwParser.cmdList); EXPECT_EQ(1u, semaphoreBeforeWalkerFound); EXPECT_EQ(1u, semaphoreAfterWalkerFound); EXPECT_EQ(1u, pipeControlBeforeWalkerFound); EXPECT_EQ(1u, pipeControlAfterWalkerFound); } HWTEST_F(PauseOnGpuTests, givenPauseOnEnqueueFlagSetWhenDispatchWalkersThenDontInsertPauseCommandsWhenUsingSpecialQueue) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; DebugManager.flags.PauseOnEnqueue.set(0); pCmdQ->setIsSpecialCommandQueue(true); MockKernelWithInternals mockKernel(*pClDevice); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); ClHardwareParse hwParser; hwParser.parseCommands(*pCmdQ); findSemaphores(hwParser.cmdList); findPipeControls(hwParser.cmdList); EXPECT_EQ(0u, semaphoreBeforeWalkerFound); EXPECT_EQ(0u, semaphoreAfterWalkerFound); EXPECT_EQ(0u, pipeControlBeforeWalkerFound); EXPECT_EQ(0u, pipeControlAfterWalkerFound); pCmdQ->setIsSpecialCommandQueue(false); } HWTEST_F(PauseOnGpuTests, givenGpuScratchWriteEnabledWhenDispatchWalkersThenInsertLoadRegisterImmCommandAroundSpecifiedEnqueue) { DebugManager.flags.GpuScratchRegWriteAfterWalker.set(1); DebugManager.flags.GpuScratchRegWriteRegisterData.set(0x1234); DebugManager.flags.GpuScratchRegWriteRegisterOffset.set(0x5678); MockKernelWithInternals mockKernel(*pClDevice); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); ClHardwareParse hwParser; hwParser.parseCommands(*pCmdQ); findLoadRegImms(hwParser.cmdList); EXPECT_EQ(0u, loadRegImmsFound); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); hwParser.parseCommands(*pCmdQ); findLoadRegImms(hwParser.cmdList); EXPECT_EQ(1u, loadRegImmsFound); } HWTEST_F(PauseOnGpuTests, givenGpuScratchWriteEnabledWhenDispatcMultiplehWalkersThenInsertLoadRegisterImmCommandOnlyOnce) { DebugManager.flags.GpuScratchRegWriteAfterWalker.set(1); DebugManager.flags.GpuScratchRegWriteRegisterData.set(0x1234); DebugManager.flags.GpuScratchRegWriteRegisterOffset.set(0x5678); MockKernelWithInternals mockKernel(*pClDevice); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); ClHardwareParse hwParser; hwParser.parseCommands(*pCmdQ); findLoadRegImms(hwParser.cmdList); EXPECT_EQ(1u, loadRegImmsFound); } HWTEST_F(PauseOnGpuTests, givenGpuScratchWriteEnabledWhenEstimatingCommandStreamSizeThenMiLoadRegisterImmCommandSizeIsIncluded) { MockKernelWithInternals mockKernel(*pClDevice); DispatchInfo dispatchInfo; MultiDispatchInfo multiDispatchInfo(mockKernel.mockKernel); dispatchInfo.setKernel(mockKernel.mockKernel); multiDispatchInfo.push(dispatchInfo); auto baseCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false); DebugManager.flags.GpuScratchRegWriteAfterWalker.set(1); auto extendedCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false); EXPECT_EQ(baseCommandStreamSize + sizeof(typename FamilyType::MI_LOAD_REGISTER_IMM), extendedCommandStreamSize); } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp000066400000000000000000001310771422164147700316400ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/implicit_scaling.h" #include "shared/source/command_stream/scratch_space_controller.h" #include "shared/source/command_stream/wait_status.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/memory_manager/allocations_list.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "opencl/test/unit_test/gen_common/gen_commands_common_validation.h" #include "opencl/test/unit_test/helpers/cl_hw_parse.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" #include "reg_configs_common.h" using namespace NEO; struct TestParam2 { uint32_t scratchSize; } TestParamTable2[] = {{1024u}, {2048u}, {4096u}, {8192u}, {16384u}}; struct TestParam { cl_uint globalWorkSizeX; cl_uint globalWorkSizeY; cl_uint globalWorkSizeZ; cl_uint localWorkSizeX; cl_uint localWorkSizeY; cl_uint localWorkSizeZ; } TestParamTable[] = { {1, 1, 1, 1, 1, 1}, {16, 1, 1, 1, 1, 1}, {16, 1, 1, 16, 1, 1}, {32, 1, 1, 1, 1, 1}, {32, 1, 1, 16, 1, 1}, {32, 1, 1, 32, 1, 1}, {64, 1, 1, 1, 1, 1}, {64, 1, 1, 16, 1, 1}, {64, 1, 1, 32, 1, 1}, {64, 1, 1, 64, 1, 1}, {190, 1, 1, 95, 1, 1}, {510, 1, 1, 255, 1, 1}, {512, 1, 1, 256, 1, 1}}, OneEntryTestParamTable[] = { {1, 1, 1, 1, 1, 1}, }; template struct EnqueueKernelTypeTest : public HelloWorldFixture, public ClHardwareParse, ::testing::TestWithParam { typedef HelloWorldFixture ParentClass; using ParentClass::pCmdBuffer; using ParentClass::pCS; EnqueueKernelTypeTest() { } void FillValues() { globalWorkSize[0] = 1; globalWorkSize[1] = 1; globalWorkSize[2] = 1; localWorkSize[0] = 1; localWorkSize[1] = 1; localWorkSize[2] = 1; }; template typename std::enable_if::type enqueueKernel(Kernel *inputKernel = nullptr) { cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; FillValues(); // Compute # of expected work items expectedWorkItems = 1; for (auto i = 0u; i < workDim; i++) { expectedWorkItems *= globalWorkSize[i]; } auto usedKernel = inputKernel ? inputKernel : pKernel; auto retVal = pCmdQ->enqueueKernel( usedKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); } template typename std::enable_if::type enqueueKernel(Kernel *inputKernel = nullptr) { enqueueKernel(inputKernel); parseCommands(*pCmdQ); } template void enqueueKernel(Kernel *inputKernel = nullptr) { enqueueKernel(inputKernel); } void SetUp() override { ParentClass::SetUp(); ClHardwareParse::SetUp(); } void TearDown() override { ClHardwareParse::TearDown(); ParentClass::TearDown(); } size_t globalWorkSize[3]; size_t localWorkSize[3]; size_t expectedWorkItems = 0; }; template <> void EnqueueKernelTypeTest::FillValues() { const TestParam ¶m = GetParam(); globalWorkSize[0] = param.globalWorkSizeX; globalWorkSize[1] = param.globalWorkSizeY; globalWorkSize[2] = param.globalWorkSizeZ; localWorkSize[0] = param.localWorkSizeX; localWorkSize[1] = param.localWorkSizeY; localWorkSize[2] = param.localWorkSizeZ; } typedef EnqueueKernelTypeTest EnqueueWorkItemTests; typedef EnqueueKernelTypeTest EnqueueWorkItemTestsWithLimitedParamSet; HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueWorkItemTests, WhenEnqueingKernelThenGpgpuWalkerIsProgrammedCorrectly) { typedef typename FamilyType::PARSE PARSE; typedef typename PARSE::GPGPU_WALKER GPGPU_WALKER; enqueueKernel(); ASSERT_NE(cmdList.end(), itorWalker); auto *cmd = (GPGPU_WALKER *)*itorWalker; // Verify GPGPU_WALKER parameters EXPECT_NE(0u, cmd->getThreadGroupIdXDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdYDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdZDimension()); EXPECT_NE(0u, cmd->getRightExecutionMask()); EXPECT_NE(0u, cmd->getBottomExecutionMask()); EXPECT_EQ(GPGPU_WALKER::SIMD_SIZE_SIMD32, cmd->getSimdSize()); EXPECT_NE(0u, cmd->getIndirectDataLength()); EXPECT_FALSE(cmd->getIndirectParameterEnable()); // Compute the SIMD lane mask size_t simd = cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD32 ? 32 : cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD16 ? 16 : 8; uint64_t simdMask = maxNBitValue(simd); // Mask off lanes based on the execution masks auto laneMaskRight = cmd->getRightExecutionMask() & simdMask; auto lanesPerThreadX = 0; while (laneMaskRight) { lanesPerThreadX += laneMaskRight & 1; laneMaskRight >>= 1; } auto numWorkItems = ((cmd->getThreadWidthCounterMaximum() - 1) * simd + lanesPerThreadX) * cmd->getThreadGroupIdXDimension(); EXPECT_EQ(expectedWorkItems, numWorkItems); } HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueWorkItemTestsWithLimitedParamSet, WhenEnqueingKernelThenLoadRegisterImmediateL3CntrlregIsCorrect) { enqueueKernel(); validateL3Programming(cmdList, itorWalker); } HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueWorkItemTestsWithLimitedParamSet, WhenEnqueueIsDoneThenStateBaseAddressIsProperlyProgrammed) { enqueueKernel(); auto &ultCsr = this->pDevice->getUltCommandStreamReceiver(); auto &hwHelper = HwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily); validateStateBaseAddress(ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex, pIOH->getGraphicsAllocation()->isAllocatedInLocalMemoryPool()), ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex, !hwHelper.useSystemMemoryPlacementForISA(pDevice->getHardwareInfo())), pDSH, pIOH, pSSH, itorPipelineSelect, itorWalker, cmdList, context->getMemoryManager()->peekForce32BitAllocations() ? context->getMemoryManager()->getExternalHeapBaseAddress(ultCsr.rootDeviceIndex, false) : 0llu); } HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueWorkItemTestsWithLimitedParamSet, WhenEnqueingKernelThenMediaInterfaceDescriptorLoadIsCorrect) { typedef typename FamilyType::PARSE PARSE; typedef typename PARSE::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename PARSE::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueKernel(); // All state should be programmed before walker auto itorCmd = find(itorPipelineSelect, itorWalker); ASSERT_NE(itorWalker, itorCmd); auto *cmd = genCmdCast(*itorCmd); // Verify we have a valid length -- multiple of INTERFACE_DESCRIPTOR_DATAs EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % sizeof(INTERFACE_DESCRIPTOR_DATA)); // Validate the start address size_t alignmentStartAddress = 64 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorDataStartAddress() % alignmentStartAddress); // Validate the length EXPECT_NE(0u, cmd->getInterfaceDescriptorTotalLength()); size_t alignmentTotalLength = 32 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % alignmentTotalLength); // Generically validate this command PARSE::template validateCommand(cmdList.begin(), itorCmd); } HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueWorkItemTestsWithLimitedParamSet, WhenEnqueingKernelThenInterfaceDescriptorDataIsCorrect) { typedef typename FamilyType::PARSE PARSE; typedef typename PARSE::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename PARSE::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename PARSE::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueKernel(); // Extract the MIDL command auto itorCmd = find(itorPipelineSelect, itorWalker); ASSERT_NE(itorWalker, itorCmd); auto *cmdMIDL = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)*itorCmd; // Extract the SBA command itorCmd = find(cmdList.begin(), itorWalker); ASSERT_NE(itorWalker, itorCmd); auto *cmdSBA = (STATE_BASE_ADDRESS *)*itorCmd; // Extrach the DSH auto DSH = cmdSBA->getDynamicStateBaseAddress(); ASSERT_NE(0u, DSH); // IDD should be located within DSH auto iddStart = cmdMIDL->getInterfaceDescriptorDataStartAddress(); auto IDDEnd = iddStart + cmdMIDL->getInterfaceDescriptorTotalLength(); ASSERT_LE(IDDEnd, cmdSBA->getDynamicStateBufferSize() * MemoryConstants::pageSize); auto &IDD = *(INTERFACE_DESCRIPTOR_DATA *)cmdInterfaceDescriptorData; // Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value. auto kernelStartPointer = ((uint64_t)IDD.getKernelStartPointerHigh() << 32) + IDD.getKernelStartPointer(); EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize); EXPECT_NE(0u, IDD.getNumberOfThreadsInGpgpuThreadGroup()); EXPECT_NE(0u, IDD.getCrossThreadConstantDataReadLength()); EXPECT_NE(0u, IDD.getConstantIndirectUrbEntryReadLength()); } HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueWorkItemTestsWithLimitedParamSet, givenDebugVariableToOverrideMOCSWhenStateBaseAddressIsBeingProgrammedThenItContainsDesiredIndex) { DebugManagerStateRestore restore; DebugManager.flags.OverrideStatelessMocsIndex.set(1); typedef typename FamilyType::PARSE PARSE; typedef typename PARSE::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; enqueueKernel(); // Extract the SBA command auto itorCmd = find(cmdList.begin(), cmdList.end()); ASSERT_NE(itorWalker, itorCmd); auto *cmdSBA = (STATE_BASE_ADDRESS *)*itorCmd; auto mocsProgrammed = cmdSBA->getStatelessDataPortAccessMemoryObjectControlState() >> 1; EXPECT_EQ(1u, mocsProgrammed); } HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueWorkItemTestsWithLimitedParamSet, WhenEnqueingKernelThenOnePipelineSelectIsProgrammed) { enqueueKernel(); int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); EXPECT_EQ(1, numCommands); } HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueWorkItemTestsWithLimitedParamSet, WhenEnqueingKernelThenMediaVfeStateIsCorrect) { enqueueKernel(); validateMediaVFEState(&pDevice->getHardwareInfo(), cmdMediaVfeState, cmdList, itorMediaVfeState); } INSTANTIATE_TEST_CASE_P(EnqueueKernel, EnqueueWorkItemTests, ::testing::ValuesIn(TestParamTable)); INSTANTIATE_TEST_CASE_P(EnqueueKernel, EnqueueWorkItemTestsWithLimitedParamSet, ::testing::ValuesIn(OneEntryTestParamTable)); typedef EnqueueKernelTypeTest EnqueueScratchSpaceTests; HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueScratchSpaceTests, GivenKernelRequiringScratchWhenItIsEnqueuedWithDifferentScratchSizesThenMediaVFEStateAndStateBaseAddressAreProperlyProgrammed) { typedef typename FamilyType::PARSE PARSE; typedef typename PARSE::MEDIA_VFE_STATE MEDIA_VFE_STATE; typedef typename PARSE::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; auto &csr = pDevice->getUltCommandStreamReceiver(); csr.getMemoryManager()->setForce32BitAllocations(false); EXPECT_TRUE(csr.getAllocationsForReuse().peekIsEmpty()); auto scratchSize = GetParam().scratchSize; MockKernelWithInternals mockKernel(*pClDevice); mockKernel.kernelInfo.setPerThreadScratchSize(scratchSize, 0); uint32_t sizeToProgram = (scratchSize / static_cast(MemoryConstants::kiloByte)); uint32_t bitValue = 0u; while (sizeToProgram >>= 1) { bitValue++; } auto valueToProgram = PreambleHelper::getScratchSizeValueToProgramMediaVfeState(scratchSize); EXPECT_EQ(bitValue, valueToProgram); enqueueKernel(mockKernel); // All state should be programmed before walker auto itorCmd = find(itorPipelineSelect, itorWalker); auto itorCmdForStateBase = find(itorPipelineSelect, itorWalker); ASSERT_NE(itorWalker, itorCmd); ASSERT_NE(itorWalker, itorCmdForStateBase); auto *cmd = (MEDIA_VFE_STATE *)*itorCmd; auto *sba = (STATE_BASE_ADDRESS *)*itorCmdForStateBase; const HardwareInfo &hwInfo = *defaultHwInfo; uint32_t threadPerEU = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount) + hwInfo.capabilityTable.extraQuantityThreadsPerEU; uint32_t maxNumberOfThreads = hwInfo.gtSystemInfo.EUCount * threadPerEU; // Verify we have a valid length EXPECT_EQ(maxNumberOfThreads, cmd->getMaximumNumberOfThreads()); EXPECT_NE(0u, cmd->getNumberOfUrbEntries()); EXPECT_NE(0u, cmd->getUrbEntryAllocationSize()); EXPECT_EQ(bitValue, cmd->getPerThreadScratchSpace()); EXPECT_EQ(bitValue, cmd->getStackSize()); auto graphicsAllocation = csr.getScratchAllocation(); auto GSHaddress = sba->getGeneralStateBaseAddress(); if (is32bit) { EXPECT_NE(0u, cmd->getScratchSpaceBasePointer()); EXPECT_EQ(0u, GSHaddress); } else { EXPECT_EQ(ScratchSpaceConstants::scratchSpaceOffsetFor64Bit, cmd->getScratchSpaceBasePointer()); EXPECT_EQ(GSHaddress + ScratchSpaceConstants::scratchSpaceOffsetFor64Bit, graphicsAllocation->getGpuAddress()); } auto allocationSize = scratchSize * pDevice->getDeviceInfo().computeUnitsUsedForScratch; EXPECT_EQ(graphicsAllocation->getUnderlyingBufferSize(), allocationSize); // Generically validate this command PARSE::template validateCommand(cmdList.begin(), itorCmd); //skip if size to big 4MB, no point in stressing memory allocator. if (allocationSize > 4194304) { return; } scratchSize *= 2; mockKernel.kernelInfo.setPerThreadScratchSize(scratchSize, 0); auto itorfirstBBEnd = find(itorWalker, cmdList.end()); ASSERT_NE(cmdList.end(), itorfirstBBEnd); enqueueKernel(mockKernel); bitValue++; itorCmd = find(itorfirstBBEnd, cmdList.end()); itorCmdForStateBase = find(itorWalker, cmdList.end()); ASSERT_NE(itorWalker, itorCmd); if constexpr (is64bit) { ASSERT_NE(itorCmdForStateBase, itorCmd); } else { //no SBA not dirty ASSERT_EQ(itorCmdForStateBase, cmdList.end()); } auto *cmd2 = (MEDIA_VFE_STATE *)*itorCmd; // Verify we have a valid length EXPECT_EQ(maxNumberOfThreads, cmd2->getMaximumNumberOfThreads()); EXPECT_NE(0u, cmd2->getNumberOfUrbEntries()); EXPECT_NE(0u, cmd2->getUrbEntryAllocationSize()); EXPECT_EQ(bitValue, cmd2->getPerThreadScratchSpace()); EXPECT_EQ(bitValue, cmd2->getStackSize()); auto graphicsAllocation2 = csr.getScratchAllocation(); if (is32bit) { auto scratchBase = cmd2->getScratchSpaceBasePointer(); EXPECT_NE(0u, scratchBase); auto graphicsAddress = graphicsAllocation2->getGpuAddress(); EXPECT_EQ(graphicsAddress, scratchBase); } else { auto *sba2 = (STATE_BASE_ADDRESS *)*itorCmdForStateBase; auto GSHaddress2 = sba2->getGeneralStateBaseAddress(); EXPECT_NE(0u, GSHaddress2); EXPECT_EQ(ScratchSpaceConstants::scratchSpaceOffsetFor64Bit, cmd2->getScratchSpaceBasePointer()); EXPECT_NE(GSHaddress2, GSHaddress); } EXPECT_EQ(graphicsAllocation->getUnderlyingBufferSize(), allocationSize); EXPECT_NE(graphicsAllocation2, graphicsAllocation); // Generically validate this command PARSE::template validateCommand(cmdList.begin(), itorCmd); // Trigger SBA generation IndirectHeap dirtyDsh(nullptr); csr.dshState.updateAndCheck(&dirtyDsh); enqueueKernel(mockKernel); auto finalItorToSBA = find(itorCmd, cmdList.end()); ASSERT_NE(finalItorToSBA, cmdList.end()); auto *finalSba2 = (STATE_BASE_ADDRESS *)*finalItorToSBA; auto GSBaddress = finalSba2->getGeneralStateBaseAddress(); if constexpr (is32bit) { EXPECT_EQ(0u, GSBaddress); } else { EXPECT_EQ(graphicsAllocation2->getGpuAddress(), GSBaddress + ScratchSpaceConstants::scratchSpaceOffsetFor64Bit); } EXPECT_TRUE(csr.getAllocationsForReuse().peekIsEmpty()); } INSTANTIATE_TEST_CASE_P(EnqueueKernel, EnqueueScratchSpaceTests, ::testing::ValuesIn(TestParamTable2)); typedef EnqueueKernelTypeTest EnqueueKernelWithScratch; HWTEST_P(EnqueueKernelWithScratch, GivenKernelRequiringScratchWhenItIsEnqueuedWithDifferentScratchSizesThenPreviousScratchAllocationIsMadeNonResidentPriorStoringOnResueList) { auto mockCsr = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); uint32_t scratchSize = 1024u; MockKernelWithInternals mockKernel(*pClDevice); mockKernel.kernelInfo.setPerThreadScratchSize(scratchSize, 0); uint32_t sizeToProgram = (scratchSize / static_cast(MemoryConstants::kiloByte)); uint32_t bitValue = 0u; while (sizeToProgram >>= 1) { bitValue++; } auto valueToProgram = PreambleHelper::getScratchSizeValueToProgramMediaVfeState(scratchSize); EXPECT_EQ(bitValue, valueToProgram); enqueueKernel(mockKernel); auto graphicsAllocation = mockCsr->getScratchAllocation(); EXPECT_TRUE(mockCsr->isMadeResident(graphicsAllocation)); // Enqueue With ScratchSize bigger than previous scratchSize = 8196; mockKernel.kernelInfo.setPerThreadScratchSize(scratchSize, 0); enqueueKernel(mockKernel); EXPECT_TRUE(mockCsr->isMadeNonResident(graphicsAllocation)); } HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueKernelWithScratch, givenDeviceForcing32bitAllocationsWhenKernelWithScratchIsEnqueuedThenGeneralStateHeapBaseAddressIsCorrectlyProgrammedAndMediaVFEStateContainsProgramming) { typedef typename FamilyType::PARSE PARSE; typedef typename PARSE::MEDIA_VFE_STATE MEDIA_VFE_STATE; typedef typename PARSE::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; if constexpr (is64bit) { CommandStreamReceiver *csr = &pDevice->getGpgpuCommandStreamReceiver(); auto memoryManager = csr->getMemoryManager(); memoryManager->setForce32BitAllocations(true); auto scratchSize = 1024; MockKernelWithInternals mockKernel(*pClDevice); mockKernel.kernelInfo.setPerThreadScratchSize(scratchSize, 0); enqueueKernel(mockKernel); auto graphicsAllocation = csr->getScratchAllocation(); EXPECT_TRUE(graphicsAllocation->is32BitAllocation()); auto graphicsAddress = (uint64_t)graphicsAllocation->getGpuAddress(); auto baseAddress = graphicsAllocation->getGpuBaseAddress(); // All state should be programmed before walker auto itorCmd = find(itorPipelineSelect, itorWalker); auto itorCmdForStateBase = find(itorPipelineSelect, itorWalker); auto *mediaVfeState = (MEDIA_VFE_STATE *)*itorCmd; auto scratchBaseLowPart = (uint64_t)mediaVfeState->getScratchSpaceBasePointer(); auto scratchBaseHighPart = (uint64_t)mediaVfeState->getScratchSpaceBasePointerHigh(); uint64_t scratchBaseAddr = scratchBaseHighPart << 32 | scratchBaseLowPart; EXPECT_EQ(graphicsAddress - baseAddress, scratchBaseAddr); ASSERT_NE(itorCmdForStateBase, itorWalker); auto *sba = (STATE_BASE_ADDRESS *)*itorCmdForStateBase; auto GSHaddress = sba->getGeneralStateBaseAddress(); EXPECT_EQ(memoryManager->getExternalHeapBaseAddress(graphicsAllocation->getRootDeviceIndex(), graphicsAllocation->isAllocatedInLocalMemoryPool()), GSHaddress); //now re-try to see if SBA is not programmed scratchSize *= 2; mockKernel.kernelInfo.setPerThreadScratchSize(scratchSize, 0); enqueueKernel(mockKernel); itorCmdForStateBase = find(itorWalker, cmdList.end()); EXPECT_EQ(itorCmdForStateBase, cmdList.end()); } } INSTANTIATE_TEST_CASE_P(EnqueueKernel, EnqueueKernelWithScratch, testing::Values(1)); TestParam TestParamPrintf[] = { {1, 1, 1, 1, 1, 1}}; typedef EnqueueKernelTypeTest EnqueueKernelPrintfTest; HWTEST_P(EnqueueKernelPrintfTest, GivenKernelWithPrintfThenPatchCrossThreadData) { typedef typename FamilyType::PARSE PARSE; MockKernelWithInternals mockKernel(*pClDevice); mockKernel.crossThreadData[64] = 0; mockKernel.kernelInfo.setPrintfSurface(sizeof(uintptr_t), 64); enqueueKernel(mockKernel); EXPECT_EQ(mockKernel.crossThreadData[64], 0); } HWTEST_P(EnqueueKernelPrintfTest, GivenKernelWithPrintfWhenBeingDispatchedThenL3CacheIsFlushed) { typedef typename FamilyType::PARSE PARSE; MockCommandQueueHw mockCmdQueue(context, pClDevice, nullptr); MockKernelWithInternals mockKernel(*pClDevice); mockKernel.crossThreadData[64] = 0; mockKernel.kernelInfo.setPrintfSurface(sizeof(uintptr_t), 64); auto &csr = mockCmdQueue.getGpgpuCommandStreamReceiver(); auto latestSentTaskCount = csr.peekTaskCount(); cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; FillValues(); // Compute # of expected work items expectedWorkItems = 1; for (auto i = 0u; i < workDim; i++) { expectedWorkItems *= globalWorkSize[i]; } auto retVal = mockCmdQueue.enqueueKernel( mockKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); auto newLatestSentTaskCount = csr.peekTaskCount(); EXPECT_GT(newLatestSentTaskCount, latestSentTaskCount); EXPECT_EQ(mockCmdQueue.latestTaskCountWaited, newLatestSentTaskCount); } HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueKernelPrintfTest, GivenKernelWithPrintfBlockedByEventWhenEventUnblockedThenL3CacheIsFlushed) { typedef typename FamilyType::PARSE PARSE; UserEvent userEvent(context); MockCommandQueueHw mockCommandQueue(context, pClDevice, nullptr); MockKernelWithInternals mockKernel(*pClDevice); mockKernel.crossThreadData[64] = 0; mockKernel.kernelInfo.setPrintfSurface(sizeof(uintptr_t), 64); auto &csr = mockCommandQueue.getGpgpuCommandStreamReceiver(); auto latestSentDcFlushTaskCount = csr.peekTaskCount(); cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; FillValues(); cl_event blockedEvent = &userEvent; auto retVal = mockCommandQueue.enqueueKernel( mockKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, 1, &blockedEvent, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); userEvent.setStatus(CL_COMPLETE); parseCommands(mockCommandQueue); auto newLatestSentDCFlushTaskCount = csr.peekTaskCount(); EXPECT_GT(newLatestSentDCFlushTaskCount, latestSentDcFlushTaskCount); EXPECT_EQ(mockCommandQueue.latestTaskCountWaited, newLatestSentDCFlushTaskCount); } HWTEST_P(EnqueueKernelPrintfTest, GivenKernelWithPrintfBlockedByEventWhenEventUnblockedThenOutputPrinted) { auto userEvent = make_releaseable(context); MockKernelWithInternals mockKernel(*pClDevice); std::string testString = "test"; mockKernel.kernelInfo.addToPrintfStringsMap(0, testString); mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.flags.usesPrintf = false; mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.flags.usesStringMapForPrintf = true; mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.binaryFormat = DeviceBinaryFormat::Patchtokens; UnitTestHelper::adjustKernelDescriptorForImplicitArgs(mockKernel.kernelInfo.kernelDescriptor); mockKernel.mockKernel->pImplicitArgs = std::make_unique(); *mockKernel.mockKernel->pImplicitArgs = {}; cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; FillValues(); cl_event blockedEvent = userEvent.get(); cl_event outEvent{}; auto retVal = pCmdQ->enqueueKernel( mockKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, 1, &blockedEvent, &outEvent); ASSERT_EQ(CL_SUCCESS, retVal); auto pOutEvent = castToObject(outEvent); auto printfAllocation = reinterpret_cast(static_cast(pOutEvent->peekCommand())->peekPrintfHandler()->getSurface()->getUnderlyingBuffer()); printfAllocation[0] = 8; printfAllocation[1] = 0; pOutEvent->release(); testing::internal::CaptureStdout(); userEvent->setStatus(CL_COMPLETE); std::string output = testing::internal::GetCapturedStdout(); EXPECT_STREQ("test", output.c_str()); } HWTEST_P(EnqueueKernelPrintfTest, GivenKernelWithPrintfWithStringMapDisbaledAndImplicitArgsBlockedByEventWhenEventUnblockedThenOutputPrinted) { auto userEvent = make_releaseable(context); MockKernelWithInternals mockKernel(*pClDevice); std::string testString = "test"; mockKernel.kernelInfo.addToPrintfStringsMap(0, testString); mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.flags.usesPrintf = false; mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.flags.usesStringMapForPrintf = false; UnitTestHelper::adjustKernelDescriptorForImplicitArgs(mockKernel.kernelInfo.kernelDescriptor); mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.binaryFormat = DeviceBinaryFormat::Patchtokens; mockKernel.mockKernel->pImplicitArgs = std::make_unique(); *mockKernel.mockKernel->pImplicitArgs = {}; cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; FillValues(); cl_event blockedEvent = userEvent.get(); cl_event outEvent{}; auto retVal = pCmdQ->enqueueKernel( mockKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, 1, &blockedEvent, &outEvent); ASSERT_EQ(CL_SUCCESS, retVal); auto pOutEvent = castToObject(outEvent); auto printfAllocation = reinterpret_cast(static_cast(pOutEvent->peekCommand())->peekPrintfHandler()->getSurface()->getUnderlyingBuffer()); printfAllocation[0] = 8; printfAllocation[1] = 0; pOutEvent->release(); testing::internal::CaptureStdout(); userEvent->setStatus(CL_COMPLETE); std::string output = testing::internal::GetCapturedStdout(); EXPECT_STREQ("test", output.c_str()); } INSTANTIATE_TEST_CASE_P(EnqueueKernel, EnqueueKernelPrintfTest, ::testing::ValuesIn(TestParamPrintf)); using EnqueueKernelTests = ::testing::Test; HWTEST_F(EnqueueKernelTests, whenEnqueueingKernelThenCsrCorrectlySetsRequiredThreadArbitrationPolicy) { struct myCsr : public UltCommandStreamReceiver { using CommandStreamReceiverHw::streamProperties; }; cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {1, 1, 1}; size_t localWorkSize[3] = {1, 1, 1}; UltClDeviceFactory clDeviceFactory{1, 0}; MockContext context{clDeviceFactory.rootDevices[0]}; SPatchExecutionEnvironment sPatchExecEnv = {}; sPatchExecEnv.SubgroupIndependentForwardProgressRequired = true; MockKernelWithInternals mockKernelWithInternalsWithIfpRequired{*clDeviceFactory.rootDevices[0], sPatchExecEnv}; sPatchExecEnv.SubgroupIndependentForwardProgressRequired = false; MockKernelWithInternals mockKernelWithInternalsWithIfpNotRequired{*clDeviceFactory.rootDevices[0], sPatchExecEnv}; cl_int retVal; std::unique_ptr pCommandQueue{CommandQueue::create(&context, clDeviceFactory.rootDevices[0], nullptr, true, retVal)}; auto &csr = static_cast(pCommandQueue->getGpgpuCommandStreamReceiver()); pCommandQueue->enqueueKernel( mockKernelWithInternalsWithIfpRequired.mockKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, 0, nullptr, nullptr); pCommandQueue->flush(); EXPECT_EQ(HwHelperHw::get().getDefaultThreadArbitrationPolicy(), csr.streamProperties.stateComputeMode.threadArbitrationPolicy.value); pCommandQueue->enqueueKernel( mockKernelWithInternalsWithIfpNotRequired.mockKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, 0, nullptr, nullptr); pCommandQueue->flush(); EXPECT_EQ(ThreadArbitrationPolicy::AgeBased, csr.streamProperties.stateComputeMode.threadArbitrationPolicy.value); pCommandQueue->enqueueKernel( mockKernelWithInternalsWithIfpRequired.mockKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, 0, nullptr, nullptr); pCommandQueue->flush(); EXPECT_EQ(HwHelperHw::get().getDefaultThreadArbitrationPolicy(), csr.streamProperties.stateComputeMode.threadArbitrationPolicy.value); } typedef HelloWorldFixture EnqueueKernelFixture; typedef Test EnqueueKernelTest; template class MyCmdQ : public MockCommandQueueHw { public: using CommandQueueHw::commandStream; using CommandQueueHw::gpgpuEngine; using CommandQueueHw::bcsEngines; MyCmdQ(Context *context, ClDevice *device) : MockCommandQueueHw(context, device, nullptr) {} void dispatchAuxTranslationBuiltin(MultiDispatchInfo &multiDispatchInfo, AuxTranslationDirection auxTranslationDirection) override { CommandQueueHw::dispatchAuxTranslationBuiltin(multiDispatchInfo, auxTranslationDirection); auxTranslationDirections.push_back(auxTranslationDirection); Kernel *lastKernel = nullptr; for (const auto &dispatchInfo : multiDispatchInfo) { lastKernel = dispatchInfo.getKernel(); dispatchInfos.emplace_back(dispatchInfo); } dispatchAuxTranslationInputs.emplace_back(lastKernel, multiDispatchInfo.size(), *multiDispatchInfo.getKernelObjsForAuxTranslation(), auxTranslationDirection); } WaitStatus waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override { waitCalled++; return MockCommandQueueHw::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, cleanTemporaryAllocationList, skipWait); } std::vector auxTranslationDirections; std::vector dispatchInfos; std::vector> dispatchAuxTranslationInputs; uint32_t waitCalled = 0; }; struct EnqueueAuxKernelTests : public EnqueueKernelTest { void SetUp() override { DebugManager.flags.ForceAuxTranslationMode.set(static_cast(AuxTranslationMode::Builtin)); EnqueueKernelTest::SetUp(); } DebugManagerStateRestore dbgRestore; }; HWTEST_F(EnqueueAuxKernelTests, givenKernelWithRequiredAuxTranslationAndWithoutArgumentsWhenEnqueuedThenNoGuardKernelWithAuxTranslations) { MockKernelWithInternals mockKernel(*pClDevice, context); MyCmdQ cmdQ(context, pClDevice); size_t gws[3] = {1, 0, 0}; mockKernel.mockKernel->auxTranslationRequired = true; cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(0u, cmdQ.dispatchAuxTranslationInputs.size()); } HWTEST_F(EnqueueAuxKernelTests, givenMultipleArgsWhenAuxTranslationIsRequiredThenPickOnlyApplicableBuffers) { REQUIRE_AUX_RESOLVES(); DebugManagerStateRestore dbgRestore; DebugManager.flags.RenderCompressedBuffersEnabled.set(1); MyCmdQ cmdQ(context, pClDevice); size_t gws[3] = {1, 0, 0}; MockBuffer buffer0, buffer1, buffer2, buffer3; cl_mem clMem0 = &buffer0; cl_mem clMem1 = &buffer1; cl_mem clMem2 = &buffer2; cl_mem clMem3 = &buffer3; buffer0.setAllocationType(pClDevice->getRootDeviceIndex(), false); buffer1.setAllocationType(pClDevice->getRootDeviceIndex(), false); buffer2.setAllocationType(pClDevice->getRootDeviceIndex(), true); buffer3.setAllocationType(pClDevice->getRootDeviceIndex(), true); MockKernelWithInternals mockKernel(*pClDevice, context); auto &args = mockKernel.kernelInfo.kernelDescriptor.payloadMappings.explicitArgs; args.resize(6); args[0].as(true).accessedUsingStatelessAddressingMode = true; args[1].as(true).accessedUsingStatelessAddressingMode = false; args[2].as(true).accessedUsingStatelessAddressingMode = true; args[3].as(true).accessedUsingStatelessAddressingMode = false; args[4].as(true).accessedUsingStatelessAddressingMode = true; args[5].as(true).accessedUsingStatelessAddressingMode = true; mockKernel.mockKernel->initialize(); EXPECT_TRUE(mockKernel.mockKernel->auxTranslationRequired); mockKernel.mockKernel->setArgBuffer(0, sizeof(cl_mem *), &clMem0); // stateless on regular buffer - dont insert mockKernel.mockKernel->setArgBuffer(1, sizeof(cl_mem *), &clMem1); // stateful on regular buffer - dont insert mockKernel.mockKernel->setArgBuffer(2, sizeof(cl_mem *), &clMem2); // stateless on compressed BUFFER - insert mockKernel.mockKernel->setArgBuffer(3, sizeof(cl_mem *), &clMem3); // stateful on compressed BUFFER - dont insert mockKernel.mockKernel->setArgBuffer(4, sizeof(cl_mem *), nullptr); // nullptr - dont insert mockKernel.mockKernel->kernelArguments.at(5).type = Kernel::kernelArgType::IMAGE_OBJ; // non-buffer arg - dont insert cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(2u, cmdQ.dispatchAuxTranslationInputs.size()); EXPECT_EQ(1u, std::get(cmdQ.dispatchAuxTranslationInputs.at(0)).size()); // before kernel EXPECT_EQ(1u, std::get(cmdQ.dispatchAuxTranslationInputs.at(1)).size()); // after kernel EXPECT_EQ(&buffer2, (*std::get(cmdQ.dispatchAuxTranslationInputs.at(0)).begin()).object); EXPECT_EQ(&buffer2, (*std::get(cmdQ.dispatchAuxTranslationInputs.at(1)).begin()).object); auto cmdStream = cmdQ.commandStream; auto sizeUsed = cmdStream->getUsed(); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, cmdStream->getCpuBase(), sizeUsed)); auto pipeControls = findAll(cmdList.begin(), cmdList.end()); auto additionalPcCount = MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation( pDevice->getHardwareInfo()) / sizeof(typename FamilyType::PIPE_CONTROL); // |AuxToNonAux|NDR|NonAuxToAux| ASSERT_EQ(4u + additionalPcCount, pipeControls.size()); ASSERT_EQ(2u, cmdQ.auxTranslationDirections.size()); EXPECT_EQ(AuxTranslationDirection::AuxToNonAux, cmdQ.auxTranslationDirections[0]); EXPECT_EQ(AuxTranslationDirection::NonAuxToAux, cmdQ.auxTranslationDirections[1]); } HWTEST_F(EnqueueAuxKernelTests, givenKernelWithRequiredAuxTranslationWhenEnqueuedThenDispatchAuxTranslationBuiltin) { MockKernelWithInternals mockKernel(*pClDevice, context); MyCmdQ cmdQ(context, pClDevice); size_t gws[3] = {1, 0, 0}; MockBuffer buffer; cl_mem clMem = &buffer; buffer.setAllocationType(pClDevice->getRootDeviceIndex(), true); mockKernel.kernelInfo.kernelDescriptor.payloadMappings.explicitArgs.resize(1); mockKernel.kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[0].as(true).accessedUsingStatelessAddressingMode = true; mockKernel.mockKernel->initialize(); mockKernel.mockKernel->auxTranslationRequired = true; mockKernel.mockKernel->setArgBuffer(0, sizeof(cl_mem *), &clMem); cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(2u, cmdQ.dispatchAuxTranslationInputs.size()); // before kernel EXPECT_EQ(1u, std::get(cmdQ.dispatchAuxTranslationInputs.at(0))); // aux before NDR auto kernelBefore = std::get(cmdQ.dispatchAuxTranslationInputs.at(0)); EXPECT_EQ("fullCopy", kernelBefore->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName); EXPECT_TRUE(kernelBefore->isBuiltIn); // after kernel EXPECT_EQ(3u, std::get(cmdQ.dispatchAuxTranslationInputs.at(1))); // aux + NDR + aux auto kernelAfter = std::get(cmdQ.dispatchAuxTranslationInputs.at(1)); EXPECT_EQ("fullCopy", kernelAfter->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName); EXPECT_TRUE(kernelAfter->isBuiltIn); } using BlitAuxKernelTests = ::testing::Test; HWTEST_F(BlitAuxKernelTests, givenDebugVariableDisablingBuiltinTranslationWhenDispatchingKernelWithRequiredAuxTranslationThenDontDispatch) { DebugManagerStateRestore dbgRestore; DebugManager.flags.ForceAuxTranslationMode.set(static_cast(AuxTranslationMode::Blit)); VariableBackup backupHwInfo(defaultHwInfo.get()); defaultHwInfo->capabilityTable.blitterOperationsSupported = true; UltClDeviceFactory factory{1, 0}; auto rootDeviceIndex = 0u; auto pClDevice = factory.rootDevices[rootDeviceIndex]; auto pDevice = factory.pUltDeviceFactory->rootDevices[rootDeviceIndex]; pDevice->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; auto hwInfo = pDevice->getExecutionEnvironment()->rootDeviceEnvironments[rootDeviceIndex]->getMutableHardwareInfo(); hwInfo->capabilityTable.blitterOperationsSupported = true; REQUIRE_FULL_BLITTER_OR_SKIP(hwInfo); MockContext context(pClDevice); MockKernelWithInternals mockKernel(context.getDevices(), &context); MyCmdQ cmdQ(&context, pClDevice); size_t gws[3] = {1, 0, 0}; MockBuffer buffer; cl_mem clMem = &buffer; buffer.setAllocationType(pClDevice->getRootDeviceIndex(), true); mockKernel.kernelInfo.kernelDescriptor.payloadMappings.explicitArgs.resize(1); mockKernel.kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[0].as(true).accessedUsingStatelessAddressingMode = true; mockKernel.mockKernel->initialize(); mockKernel.mockKernel->auxTranslationRequired = true; mockKernel.mockKernel->setArgBuffer(0, sizeof(cl_mem *), &clMem); cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(0u, cmdQ.dispatchAuxTranslationInputs.size()); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueKernelTest, givenCacheFlushAfterWalkerEnabledWhenAllocationRequiresCacheFlushThenFlushCommandPresentAfterWalker) { using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); MockKernelWithInternals mockKernel(*pClDevice, context); CommandQueueHw cmdQ(context, pClDevice, nullptr, false); size_t gws[3] = {1, 0, 0}; mockKernel.mockKernel->svmAllocationsRequireCacheFlush = true; cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); HardwareParse hwParse; hwParse.parseCommands(cmdQ.getCS(0), 0); auto itorCmd = find(hwParse.cmdList.begin(), hwParse.cmdList.end()); ASSERT_NE(hwParse.cmdList.end(), itorCmd); itorCmd = find(itorCmd, hwParse.cmdList.end()); auto pipeControl = genCmdCast(*itorCmd); ASSERT_NE(nullptr, pipeControl); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); EXPECT_TRUE(pipeControl->getDcFlushEnable()); } HWTEST_F(EnqueueKernelTest, givenTimestampWriteEnableWhenMarkerProfilingWithoutWaitListThenSizeHasFourMMIOStoresAndPipeControll) { pDevice->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; MockKernelWithInternals mockKernel(*pClDevice); DispatchInfo dispatchInfo; MultiDispatchInfo multiDispatchInfo(mockKernel.mockKernel); dispatchInfo.setKernel(mockKernel.mockKernel); multiDispatchInfo.push(dispatchInfo); auto baseCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false); auto extendedCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, true, false); EXPECT_EQ(baseCommandStreamSize + 4 * EncodeStoreMMIO::size + MemorySynchronizationCommands::getSizeForSinglePipeControl(), extendedCommandStreamSize); } HWCMDTEST_F(IGFX_XE_HP_CORE, EnqueueKernelTest, givenTimestampWriteEnableOnMultiTileQueueWhenMarkerProfilingWithoutWaitListThenSizeHasFourMMIOStoresAndCrossTileBarrier) { auto &csr = pDevice->getUltCommandStreamReceiver(); csr.timestampPacketWriteEnabled = true; csr.activePartitions = 2; csr.activePartitionsConfig = 2; csr.staticWorkPartitioningEnabled = true; MockKernelWithInternals mockKernel(*pClDevice); DispatchInfo dispatchInfo; MultiDispatchInfo multiDispatchInfo(mockKernel.mockKernel); dispatchInfo.setKernel(mockKernel.mockKernel); multiDispatchInfo.push(dispatchInfo); auto baseCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false); auto extendedCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, true, false); EXPECT_EQ(baseCommandStreamSize + 4 * EncodeStoreMMIO::size + ImplicitScalingDispatch::getBarrierSize(csr.peekHwInfo(), false, false), extendedCommandStreamSize); } HWTEST_F(EnqueueKernelTest, givenTimestampWriteEnableWhenMarkerProfilingWithWaitListThenSizeHasFourMMIOStores) { pDevice->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; MockKernelWithInternals mockKernel(*pClDevice); DispatchInfo dispatchInfo; MultiDispatchInfo multiDispatchInfo(mockKernel.mockKernel); dispatchInfo.setKernel(mockKernel.mockKernel); multiDispatchInfo.push(dispatchInfo); auto baseCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false); auto extendedCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, true, true); EXPECT_EQ(baseCommandStreamSize + 4 * EncodeStoreMMIO::size, extendedCommandStreamSize); } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_kernel_event_tests.cpp000066400000000000000000000161661422164147700326210ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/event/event.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "gtest/gtest.h" using namespace NEO; typedef HelloWorldTest EventTests; TEST_F(EventTests, WhenEnqueingKernelThenCorrectEventIsReturned) { cl_event event = nullptr; auto retVal = callOneWorkItemNDRKernel(nullptr, 0, &event); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(pCmdQ->taskLevel, pEvent->taskLevel); // Check CL_EVENT_COMMAND_TYPE { cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_NDRANGE_KERNEL), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); } delete pEvent; } TEST_F(EventTests, WhenEnqueingKernelThenEventReturnedShouldBeMaxOfInputEventsAndCmdQPlus1) { uint32_t taskLevelCmdQ = 17; pCmdQ->taskLevel = taskLevelCmdQ; uint32_t taskLevelEvent1 = 8; uint32_t taskLevelEvent2 = 19; Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 15); Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 16); cl_event eventWaitList[] = { &event1, &event2}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto retVal = callOneWorkItemNDRKernel(eventWaitList, numEventsInWaitList, &event); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(19u + 1u, pEvent->taskLevel); delete pEvent; } TEST_F(EventTests, WhenWaitingForEventThenPipeControlIsNotInserted) { cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event event = nullptr; auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto retVal = callOneWorkItemNDRKernel(eventWaitList, numEventsInWaitList, &event); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(pCmdQ->taskLevel, pEvent->taskLevel); retVal = Event::waitForEvents(1, &event); EXPECT_EQ(CL_SUCCESS, retVal); //we expect event is completed uint32_t taskCountOfEvent = pEvent->peekTaskCount(); EXPECT_LE(taskCountOfEvent, pCmdQ->getHwTag()); // no more tasks after WFE, no need to write PC EXPECT_EQ(pEvent->taskLevel + 1, csr.peekTaskLevel()); pCmdQ->finish(); // Check CL_EVENT_COMMAND_TYPE { cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_NDRANGE_KERNEL), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); } delete pEvent; } TEST_F(EventTests, GivenTwoEnqueuesWhenWaitingForBothEventsThenTaskLevelIsCorrect) { cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event event[2] = {}; auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto retVal = callOneWorkItemNDRKernel(eventWaitList, numEventsInWaitList, &event[0]); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event[0]); auto pEvent0 = castToObject(event[0]); EXPECT_EQ(pCmdQ->taskLevel, pEvent0->taskLevel); retVal = callOneWorkItemNDRKernel(eventWaitList, numEventsInWaitList, &event[1]); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event[1]); auto pEvent1 = castToObject(event[1]); EXPECT_EQ(pCmdQ->taskLevel, pEvent1->taskLevel); EXPECT_GT(pEvent1->taskLevel, pEvent0->taskLevel); retVal = Event::waitForEvents(2, event); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pEvent1->taskLevel + 1, csr.peekTaskLevel()); pCmdQ->finish(); EXPECT_EQ(pEvent1->taskLevel + 1, csr.peekTaskLevel()); // Check CL_EVENT_COMMAND_TYPE { cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent0, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_NDRANGE_KERNEL), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); } delete pEvent0; delete pEvent1; } TEST_F(EventTests, GivenNoEventsWhenEnqueuingKernelThenTaskLevelIsIncremented) { cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event event = nullptr; auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto retVal = callOneWorkItemNDRKernel(eventWaitList, numEventsInWaitList, &event); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(pCmdQ->taskLevel, pEvent->taskLevel); retVal = Event::waitForEvents(1, &event); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pEvent->taskLevel + 1, csr.peekTaskLevel()); retVal = callOneWorkItemNDRKernel(eventWaitList, numEventsInWaitList, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pEvent->taskLevel + 2, csr.peekTaskLevel()); pCmdQ->finish(); EXPECT_EQ(pEvent->taskLevel + 2, csr.peekTaskLevel()); // Check CL_EVENT_COMMAND_TYPE { cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_NDRANGE_KERNEL), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); } delete pEvent; } TEST_F(EventTests, WhenEnqueuingMarkerThenPassedEventHasTheSameLevelAsPreviousCommand) { cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event event = nullptr; auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto retVal = callOneWorkItemNDRKernel(eventWaitList, numEventsInWaitList, &event); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(pEvent->taskLevel + 1, csr.peekTaskLevel()); cl_event event2 = nullptr; retVal = clEnqueueMarkerWithWaitList(pCmdQ, 1, &event, &event2); auto pEvent2 = castToObject(event2); if (csr.peekTimestampPacketWriteEnabled()) { EXPECT_EQ(pEvent2->taskLevel, pEvent->taskLevel + 1); } else { EXPECT_EQ(pEvent2->taskLevel, pEvent->taskLevel); } ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event2); retVal = clWaitForEvents(1, &event2); ASSERT_EQ(CL_SUCCESS, retVal); if (csr.peekTimestampPacketWriteEnabled()) { EXPECT_EQ(csr.peekTaskLevel(), pCmdQ->taskLevel + 1); } else { EXPECT_EQ(csr.peekTaskLevel(), pEvent->taskLevel + 1); } clReleaseEvent(event); clReleaseEvent(event2); } enqueue_kernel_global_offset_tests.cpp000066400000000000000000000013211422164147700342120ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "gtest/gtest.h" using namespace NEO; using GlobalWorkOffset = HelloWorldTest; TEST_F(GlobalWorkOffset, GivenNullGlobalWorkOffsetWhenEnqueuingKernelThenSuccessIsReturned) { size_t globalWorkSize[3] = {1, 1, 1}; size_t localWorkSize[3] = {1, 1, 1}; auto retVal = pCmdQ->enqueueKernel( pKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } enqueue_kernel_local_work_size_tests.cpp000066400000000000000000000077431422164147700346100ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "gtest/gtest.h" using namespace NEO; typedef HelloWorldTest EnqueueKernelLocalWorkSize; TEST_F(EnqueueKernelLocalWorkSize, GivenNullLwsInWhenEnqueuingKernelThenSuccessIsReturned) { size_t globalWorkOffset[3] = {0, 999, 9999}; size_t globalWorkSize[3] = {1, 999, 9999}; auto retVal = pCmdQ->enqueueKernel( pKernel, 1, globalWorkOffset, globalWorkSize, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } struct EnqueueKernelRequiredWorkSize : public HelloWorldTest { typedef HelloWorldTest Parent; void SetUp() override { Parent::kernelFilename = "required_work_group"; Parent::kernelName = "CopyBuffer"; Parent::SetUp(); } void TearDown() override { Parent::TearDown(); } }; // Kernel specifies the optional reqd_work_group_size() attribute but it wasn't // specified. We'll permit the user to not specify the local work group size // and pick up the correct values instead. TEST_F(EnqueueKernelRequiredWorkSize, GivenUnspecifiedWorkGroupSizeWhenEnqueueingKernelThenLwsIsSetCorrectly) { size_t globalWorkSize[3] = {32, 32, 32}; size_t *localWorkSize = nullptr; auto retVal = pCmdQ->enqueueKernel( pKernel, 3, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); auto localWorkSizeVal = pKernel->getLocalWorkSizeValues(); EXPECT_EQ(8u, *localWorkSizeVal[0]); EXPECT_EQ(2u, *localWorkSizeVal[1]); EXPECT_EQ(2u, *localWorkSizeVal[2]); auto enqueuedLocalWorkSize = pKernel->getEnqueuedLocalWorkSizeValues(); EXPECT_EQ(8u, *enqueuedLocalWorkSize[0]); EXPECT_EQ(2u, *enqueuedLocalWorkSize[1]); EXPECT_EQ(2u, *enqueuedLocalWorkSize[2]); } // Fully specified TEST_F(EnqueueKernelRequiredWorkSize, GivenRequiredWorkGroupSizeWhenEnqueueingKernelThenLwsIsSetCorrectly) { size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {32, 32, 32}; size_t localWorkSize[3] = {8, 2, 2}; auto retVal = pCmdQ->enqueueKernel( pKernel, 3, globalWorkOffset, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); auto localWorkSizeVal = pKernel->getLocalWorkSizeValues(); EXPECT_EQ(8u, *localWorkSizeVal[0]); EXPECT_EQ(2u, *localWorkSizeVal[1]); EXPECT_EQ(2u, *localWorkSizeVal[2]); auto enqueuedLocalWorkSize = pKernel->getEnqueuedLocalWorkSizeValues(); EXPECT_EQ(8u, *enqueuedLocalWorkSize[0]); EXPECT_EQ(2u, *enqueuedLocalWorkSize[1]); EXPECT_EQ(2u, *enqueuedLocalWorkSize[2]); } // Underspecified. Won't permit. TEST_F(EnqueueKernelRequiredWorkSize, givenKernelRequiringLocalWorkgroupSizeWhen1DimensionIsPassedThatIsCorrectThenNdRangeIsSuccesful) { size_t globalWorkOffset[1] = {0}; size_t globalWorkSize[1] = {32}; size_t localWorkSize[1] = {8}; auto retVal = pCmdQ->enqueueKernel( pKernel, 1, globalWorkOffset, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } // Incorrectly specified TEST_F(EnqueueKernelRequiredWorkSize, GivenInvalidRequiredWorkgroupSizeWhenEnqueuingKernelThenInvalidWorkGroupSizeErrorIsReturned) { size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {32, 32, 32}; size_t localWorkSize[3] = {16, 8, 1}; auto retVal = pCmdQ->enqueueKernel( pKernel, 3, globalWorkOffset, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_WORK_GROUP_SIZE, retVal); } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_kernel_mt_tests.cpp000066400000000000000000000043371422164147700321150ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/mocks/mock_submissions_aggregator.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" typedef HelloWorldFixture EnqueueKernelFixture; typedef Test EnqueueKernelTest; HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenFinishIsCalledThenBatchesSubmissionsAreFlushed) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); std::atomic startEnqueueProcess(false); MockKernelWithInternals mockKernel(*pClDevice); size_t gws[3] = {1, 0, 0}; auto enqueueCount = 10; auto threadCount = 4; auto function = [&]() { //wait until we are signalled while (!startEnqueueProcess) ; for (int enqueue = 0; enqueue < enqueueCount; enqueue++) { pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); } }; std::vector threads; for (auto thread = 0; thread < threadCount; thread++) { threads.push_back(std::thread(function)); } auto currentTaskCount = 0; startEnqueueProcess = true; //call a flush while other threads enqueue, we can't drop anything while (currentTaskCount < enqueueCount * threadCount) { clFlush(pCmdQ); auto locker = mockCsr->obtainUniqueOwnership(); currentTaskCount = mockCsr->peekTaskCount(); } for (auto &thread : threads) { thread.join(); } pCmdQ->finish(); EXPECT_GE(mockCsr->flushCalledCount, 1); EXPECT_LE(mockCsr->flushCalledCount, enqueueCount * threadCount); EXPECT_EQ(mockedSubmissionsAggregator->peekInspectionId() - 1, (uint32_t)mockCsr->flushCalledCount); } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_kernel_two_ioq_tests.cpp000066400000000000000000000073751422164147700331630ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/event/event.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "opencl/test/unit_test/helpers/cl_hw_parse.h" using namespace NEO; struct TwoIOQsTwoDependentWalkers : public HelloWorldTest, public ClHardwareParse { typedef HelloWorldTest Parent; using Parent::createCommandQueue; using Parent::pCmdQ; using Parent::pDevice; using Parent::pKernel; TwoIOQsTwoDependentWalkers() { } void SetUp() override { Parent::SetUp(); ClHardwareParse::SetUp(); } void TearDown() override { delete pCmdQ2; ClHardwareParse::TearDown(); Parent::TearDown(); } template void parseWalkers() { cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {1, 1, 1}; size_t localWorkSize[3] = {1, 1, 1}; cl_event event1 = nullptr; cl_event event2 = nullptr; auto retVal = pCmdQ->enqueueKernel( pKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, 0, nullptr, &event1); ASSERT_EQ(CL_SUCCESS, retVal); ClHardwareParse::parseCommands(*pCmdQ); // Create a second command queue (beyond the default one) pCmdQ2 = createCommandQueue(pClDevice); ASSERT_NE(nullptr, pCmdQ2); retVal = pCmdQ2->enqueueKernel( pKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, 1, &event1, &event2); ASSERT_EQ(CL_SUCCESS, retVal); ClHardwareParse::parseCommands(*pCmdQ2); Event *E1 = castToObject(event1); ASSERT_NE(nullptr, E1); Event *E2 = castToObject(event2); ASSERT_NE(nullptr, E2); delete E1; delete E2; typedef typename FamilyType::WALKER_TYPE GPGPU_WALKER; itorWalker1 = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itorWalker1); itorWalker2 = itorWalker1; ++itorWalker2; itorWalker2 = find(itorWalker2, cmdList.end()); ASSERT_NE(cmdList.end(), itorWalker2); } GenCmdList::iterator itorWalker1; GenCmdList::iterator itorWalker2; CommandQueue *pCmdQ2 = nullptr; }; HWTEST_F(TwoIOQsTwoDependentWalkers, GivenTwoCommandQueuesWhenEnqueuingKernelThenTwoDifferentWalkersAreCreated) { parseWalkers(); EXPECT_NE(itorWalker1, itorWalker2); } HWTEST_F(TwoIOQsTwoDependentWalkers, GivenTwoCommandQueuesWhenEnqueuingKernelThenOnePipelineSelectExists) { parseWalkers(); int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); EXPECT_EQ(1, numCommands); } HWCMDTEST_F(IGFX_GEN8_CORE, TwoIOQsTwoDependentWalkers, GivenTwoCommandQueuesWhenEnqueuingKernelThenThereIsOneVfeState) { parseWalkers(); auto numCommands = getCommandsList().size(); EXPECT_EQ(1u, numCommands); } HWTEST_F(TwoIOQsTwoDependentWalkers, GivenTwoCommandQueuesWhenEnqueuingKernelThenOnePipeControlIsInsertedBetweenWalkers) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; parseWalkers(); auto itorCmd = find(itorWalker1, itorWalker2); // Should find a PC. EXPECT_NE(itorWalker2, itorCmd); } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_kernel_two_ooq_tests.cpp000066400000000000000000000147151422164147700331650ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/event/event.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "opencl/test/unit_test/helpers/cl_hw_parse.h" using namespace NEO; struct OOQFixtureFactory : public HelloWorldFixtureFactory { typedef OOQueueFixture CommandQueueFixture; }; struct TwoOOQsTwoDependentWalkers : public HelloWorldTest, public ClHardwareParse { typedef HelloWorldTest Parent; using Parent::createCommandQueue; using Parent::pCmdQ; using Parent::pDevice; using Parent::pKernel; TwoOOQsTwoDependentWalkers() { } void SetUp() override { Parent::SetUp(); ClHardwareParse::SetUp(); } void TearDown() override { delete pCmdQ2; ClHardwareParse::TearDown(); Parent::TearDown(); } template void parseWalkers() { cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {1, 1, 1}; size_t localWorkSize[3] = {1, 1, 1}; cl_event event1 = nullptr; cl_event event2 = nullptr; auto &commandStream = pCmdQ->getGpgpuCommandStreamReceiver().getCS(2048); auto pCommandStreamBuffer = reinterpret_cast(commandStream.getCpuBase()); std::fill(pCommandStreamBuffer + commandStream.getUsed(), pCommandStreamBuffer + commandStream.getMaxAvailableSpace(), 0); auto retVal = pCmdQ->enqueueKernel( pKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, 0, nullptr, &event1); ASSERT_EQ(CL_SUCCESS, retVal); // Create a second command queue (beyond the default one) pCmdQ2 = createCommandQueue(pClDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE); ASSERT_NE(nullptr, pCmdQ2); auto &commandStream2 = pCmdQ2->getGpgpuCommandStreamReceiver().getCS(2048); auto pCommandStreamBuffer2 = reinterpret_cast(commandStream2.getCpuBase()); std::fill(pCommandStreamBuffer2 + commandStream2.getUsed(), pCommandStreamBuffer2 + commandStream2.getMaxAvailableSpace(), 0); retVal = pCmdQ2->enqueueKernel( pKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, 1, &event1, &event2); ASSERT_EQ(CL_SUCCESS, retVal); pCmdQ->flush(); pCmdQ2->flush(); ClHardwareParse::parseCommands(*pCmdQ); ClHardwareParse::parseCommands(*pCmdQ2); Event *E1 = castToObject(event1); ASSERT_NE(nullptr, E1); Event *E2 = castToObject(event2); ASSERT_NE(nullptr, E2); delete E1; delete E2; typedef typename FamilyType::WALKER_TYPE GPGPU_WALKER; itorWalker1 = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itorWalker1); itorWalker2 = itorWalker1; ++itorWalker2; itorWalker2 = find(itorWalker2, cmdList.end()); ASSERT_NE(cmdList.end(), itorWalker2); } void hexDump(void *ptr, size_t size) { uint8_t *byte = reinterpret_cast(ptr); uint8_t bytesNum = 0; while (bytesNum < size) { std::cout << std::hex << "0x" << static_cast(byte[bytesNum++]) << " "; } std::cout << std::endl; } GenCmdList::iterator itorWalker1; GenCmdList::iterator itorWalker2; CommandQueue *pCmdQ2 = nullptr; }; HWTEST_F(TwoOOQsTwoDependentWalkers, GivenTwoCommandQueuesWhenEnqueuingKernelThenTwoDifferentWalkersAreCreated) { parseWalkers(); EXPECT_NE(itorWalker1, itorWalker2); } HWTEST_F(TwoOOQsTwoDependentWalkers, GivenTwoCommandQueuesWhenEnqueuingKernelThenOnePipelineSelectExists) { parseWalkers(); int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); EXPECT_EQ(1, numCommands); } HWCMDTEST_F(IGFX_GEN8_CORE, TwoOOQsTwoDependentWalkers, GivenTwoCommandQueuesWhenEnqueuingKernelThenThereIsOneVfeState) { using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE; parseWalkers(); auto commandsList = getCommandsList(); auto numCommands = commandsList.size(); EXPECT_EQ(1u, numCommands); auto expectedCmd = FamilyType::cmdInitMediaVfeState; if (numCommands > 1) { uint32_t commandIndex = 0; for (auto &cmd : commandsList) { auto offset = reinterpret_cast(cmd) - reinterpret_cast(*cmdList.begin()); std::cout << "MEDIA_VFE_STATE [" << commandIndex << "] : 0x" << std::hex << cmd << ". Byte offset in command buffer: 0x" << offset << std::endl; commandIndex++; if (memcmp(&expectedCmd, cmd, sizeof(MEDIA_VFE_STATE)) == 0) { std::cout << "matches expected MEDIA_VFE_STATE command" << std::endl; } else { std::cout << "doesn't match expected MEDIA_VFE_STATE command." << std::endl; } std::cout << "Expected:" << std::endl; hexDump(&expectedCmd, sizeof(MEDIA_VFE_STATE)); std::cout << "Actual:" << std::endl; hexDump(cmd, sizeof(MEDIA_VFE_STATE)); } std::cout << std::endl << "Command buffer content:" << std::endl; auto it = cmdList.begin(); uint32_t cmdNum = 0; std::string cmdBuffStr; while (it != cmdList.end()) { cmdBuffStr += std::to_string(cmdNum) + ":" + HardwareParse::getCommandName(*it) + " "; ++cmdNum; ++it; } std::cout << cmdBuffStr << std::endl; } } HWTEST_F(TwoOOQsTwoDependentWalkers, DISABLED_GivenTwoCommandQueuesWhenEnqueuingKernelThenOnePipeControlIsInsertedBetweenWalkers) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; pDevice->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false; parseWalkers(); auto itorCmd = find(itorWalker1, itorWalker2); // Should find a PC. EXPECT_NE(itorWalker2, itorCmd); } enqueue_kernel_two_walker_ioq_tests.cpp000066400000000000000000000047211422164147700344410ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_helper.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/two_walker_fixture.h" using namespace NEO; typedef TwoWalkerTest IOQWithTwoWalkers; HWTEST_F(IOQWithTwoWalkers, GivenTwoCommandQueuesWhenEnqueuingKernelThenTwoDifferentWalkersAreCreated) { enqueueTwoKernels(); EXPECT_NE(itorWalker1, itorWalker2); } HWTEST_F(IOQWithTwoWalkers, GivenTwoCommandQueuesWhenEnqueuingKernelThenOnePipelineSelectExists) { enqueueTwoKernels(); int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); EXPECT_EQ(1, numCommands); } HWCMDTEST_F(IGFX_GEN8_CORE, IOQWithTwoWalkers, GivenTwoCommandQueuesWhenEnqueuingKernelThenThereIsOneVfeState) { enqueueTwoKernels(); auto numCommands = getCommandsList().size(); EXPECT_EQ(1u, numCommands); } HWTEST_F(IOQWithTwoWalkers, GivenTwoCommandQueuesWhenEnqueuingKernelThenOnePipeControlIsInsertedBetweenWalkers) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.timestampPacketWriteEnabled = false; enqueueTwoKernels(); typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; auto WaNeeded = MemorySynchronizationCommands::isPipeControlWArequired(pDevice->getHardwareInfo()); auto itorCmd = find(itorWalker1, itorWalker2); ASSERT_NE(itorWalker2, itorCmd); auto pipeControl = genCmdCast(*itorCmd); if (WaNeeded) { EXPECT_EQ(0u, pipeControl->getPostSyncOperation()); itorCmd++; itorCmd = find(itorCmd, itorWalker2); } pipeControl = genCmdCast(*itorCmd); ASSERT_NE(nullptr, pipeControl); // We should be writing a tag value to an address EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pipeControl->getPostSyncOperation()); // The PC address should match the CS tag address EXPECT_EQ(commandStreamReceiver.getTagAllocation()->getGpuAddress(), NEO::UnitTestHelper::getPipeControlPostSyncAddress(*pipeControl)); EXPECT_EQ(1u, pipeControl->getImmediateData()); } enqueue_kernel_two_walker_ooq_tests.cpp000066400000000000000000000030151422164147700344420ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "opencl/test/unit_test/fixtures/two_walker_fixture.h" using namespace NEO; struct OOQFixtureFactory : public HelloWorldFixtureFactory { typedef OOQueueFixture CommandQueueFixture; }; typedef TwoWalkerTest OOQWithTwoWalkers; HWTEST_F(OOQWithTwoWalkers, GivenTwoCommandQueuesWhenEnqueuingKernelThenTwoDifferentWalkersAreCreated) { enqueueTwoKernels(); EXPECT_NE(itorWalker1, itorWalker2); } HWTEST_F(OOQWithTwoWalkers, GivenTwoCommandQueuesWhenEnqueuingKernelThenOnePipelineSelectExists) { enqueueTwoKernels(); int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); EXPECT_EQ(1, numCommands); } HWCMDTEST_F(IGFX_GEN8_CORE, OOQWithTwoWalkers, GivenTwoCommandQueuesWhenEnqueuingKernelThenThereIsOneVfeState) { enqueueTwoKernels(); auto numCommands = getCommandsList().size(); EXPECT_EQ(1u, numCommands); } HWTEST_F(OOQWithTwoWalkers, GivenTwoCommandQueuesWhenEnqueuingKernelThenOnePipeControlIsInsertedBetweenWalkers) { enqueueTwoKernels(); auto itorCmd = find(itorWalker1, itorWalker2); // Workaround for DRM i915 coherency patch // EXPECT_EQ(itorWalker2, itorCmd); EXPECT_NE(itorWalker2, itorCmd); } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_map_buffer_fixture.h000066400000000000000000000026661422164147700322370ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" namespace NEO { struct EnqueueMapBufferTypeTest : public CommandEnqueueFixture, public ::testing::Test { void SetUp() override { CommandEnqueueFixture::SetUp(); BufferDefaults::context = new MockContext; srcBuffer = BufferHelper<>::create(); } void TearDown() override { delete srcBuffer; delete BufferDefaults::context; CommandEnqueueFixture::TearDown(); } protected: template void enqueueMapBuffer(cl_bool blocking = CL_TRUE) { cl_int retVal; EnqueueMapBufferHelper<>::Traits::errcodeRet = &retVal; auto mappedPointer = EnqueueMapBufferHelper<>::enqueueMapBuffer( pCmdQ, srcBuffer, blocking); EXPECT_EQ(CL_SUCCESS, *EnqueueMapBufferHelper<>::Traits::errcodeRet); EXPECT_NE(nullptr, mappedPointer); EnqueueMapBufferHelper<>::Traits::errcodeRet = nullptr; parseCommands(*pCmdQ); } Buffer *srcBuffer = nullptr; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_map_buffer_tests.cpp000066400000000000000000000570341422164147700322450ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_gmm.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/command_queue/enqueue_map_buffer_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "gtest/gtest.h" using namespace NEO; struct EnqueueMapBufferTest : public ClDeviceFixture, public CommandQueueHwFixture, public ::testing::Test { typedef CommandQueueHwFixture CommandQueueFixture; EnqueueMapBufferTest() { } void SetUp() override { ClDeviceFixture::SetUp(); CommandQueueFixture::SetUp(pClDevice, 0); BufferDefaults::context = new MockContext; buffer = BufferHelper>::create(); } void TearDown() override { delete buffer; delete BufferDefaults::context; CommandQueueFixture::TearDown(); ClDeviceFixture::TearDown(); } cl_int retVal = CL_SUCCESS; Buffer *buffer = nullptr; char srcMemory[128]; }; TEST_F(EnqueueMapBufferTest, GivenBufferAddressesWhenMappingBufferThenCpuAndGpuAddressAreEqualWhenZeroCopyIsUsed) { auto mapFlags = CL_MAP_READ; auto size = 0; auto offset = 0; cl_int retVal; auto ptr = pCmdQ->enqueueMapBuffer( buffer, true, mapFlags, offset, size, 0, nullptr, nullptr, retVal); if (buffer->isMemObjZeroCopy()) { EXPECT_EQ(buffer->getCpuAddress(), ptr); } else { EXPECT_NE(buffer->getCpuAddress(), ptr); } } TEST_F(EnqueueMapBufferTest, givenBufferWithUseHostPtrFlagWhenMappedThenReturnHostPtr) { auto hostPtr = buffer->getHostPtr(); EXPECT_NE(nullptr, hostPtr); auto mapFlags = CL_MAP_READ; auto size = 2; auto offset = 2; cl_int retVal; auto ptr = pCmdQ->enqueueMapBuffer(buffer, true, mapFlags, offset, size, 0, nullptr, nullptr, retVal); EXPECT_EQ(ptr, ptrOffset(hostPtr, offset)); } TEST_F(EnqueueMapBufferTest, GivenCmdqAndValidArgsWhenMappingBufferThenSuccessIsReturned) { auto mapFlags = CL_MAP_READ; auto size = 0; auto offset = 0; auto retVal = CL_INVALID_VALUE; auto ptr = pCmdQ->enqueueMapBuffer( buffer, true, mapFlags, offset, size, 0, nullptr, nullptr, retVal); EXPECT_NE(nullptr, ptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EnqueueMapBufferTest, GivenChangesInHostBufferWhenMappingBufferThenChangesArePropagatedToDeviceMemory) { //size not aligned to cacheline size int bufferSize = 20; void *ptrHost = malloc(bufferSize); char *charHostPtr = static_cast(ptrHost); //first fill with data for (int i = 0; i < bufferSize; i++) { charHostPtr[i] = 1; } auto buffer = clCreateBuffer( BufferDefaults::context, CL_MEM_USE_HOST_PTR, bufferSize, charHostPtr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); auto ptrResult = clEnqueueMapBuffer( pCmdQ, buffer, CL_TRUE, CL_MAP_WRITE, 0, 8, 0, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(ptrResult, charHostPtr) << "Map Buffer should return host_pointer used during creation with CL_MEM_USE_HOST_PTR"; //check data for (int i = 0; i < bufferSize; i++) { EXPECT_EQ(charHostPtr[i], 1); //change the data charHostPtr[i] = 2; } retVal = clEnqueueUnmapMemObject( pCmdQ, buffer, ptrResult, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); //now map again and see if data propagated clEnqueueMapBuffer( pCmdQ, buffer, CL_TRUE, CL_MAP_WRITE, 0, 8, 0, nullptr, nullptr, &retVal); //check data for (int i = 0; i < bufferSize; i++) { EXPECT_EQ(charHostPtr[i], 2); } retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); free(ptrHost); } TEST_F(EnqueueMapBufferTest, GivenChangesInHostBufferWithOffsetWhenMappingBufferThenChangesArePropagatedToDeviceMemory) { //size not aligned to cacheline size int bufferSize = 20; void *ptrHost = malloc(bufferSize); char *charHostPtr = static_cast(ptrHost); size_t offset = 4; //first fill with data for (int i = 0; i < bufferSize; i++) { charHostPtr[i] = 1; } auto buffer = clCreateBuffer( BufferDefaults::context, CL_MEM_USE_HOST_PTR, bufferSize, charHostPtr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); auto ptrResult = clEnqueueMapBuffer( pCmdQ, buffer, CL_TRUE, CL_MAP_WRITE, offset, 8, 0, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(ptrResult, charHostPtr + offset) << "Map Buffer should return host_pointer used during creation with CL_MEM_USE_HOST_PTR"; //check data for (int i = (int)offset; i < (int)(bufferSize - (int)offset); i++) { EXPECT_EQ(charHostPtr[i], 1); } retVal = clEnqueueUnmapMemObject( pCmdQ, buffer, ptrResult, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); free(ptrHost); } TEST_F(EnqueueMapBufferTest, GivenValidArgsWhenMappingBufferThenSuccessIsReturned) { auto buffer = clCreateBuffer( BufferDefaults::context, CL_MEM_READ_WRITE, 20, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); auto ptrResult = clEnqueueMapBuffer( pCmdQ, buffer, CL_TRUE, CL_MAP_READ, 0, 8, 0, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, ptrResult); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(EnqueueMapBufferTest, givenNonBlockingReadOnlyMapBufferOnZeroCopyBufferWhenItIsCalledThenSynchronizationIsNotMadeUntilWaitForEvents) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); cl_event mapEventReturned = nullptr; cl_event unmapEventReturned = nullptr; *pTagMemory = 0; MockKernelWithInternals kernel(*pClDevice); size_t GWS = 1; struct E2Clb { static void CL_CALLBACK SignalEv2(cl_event e, cl_int status, void *data) { uint32_t *callbackCalled = static_cast(data); *callbackCalled = 1; } }; auto buffer = clCreateBuffer( BufferDefaults::context, CL_MEM_READ_WRITE, 20, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); MockCommandQueueHw mockCmdQueue(context, pClDevice, nullptr); auto &commandStreamReceiver = mockCmdQueue.getGpgpuCommandStreamReceiver(); uint32_t taskCount = commandStreamReceiver.peekTaskCount(); EXPECT_EQ(0u, taskCount); // enqueue something that can be finished... retVal = clEnqueueNDRangeKernel(&mockCmdQueue, kernel.mockMultiDeviceKernel, 1, 0, &GWS, nullptr, 0, nullptr, nullptr); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(1u, commandStreamReceiver.peekTaskCount()); auto ptrResult = clEnqueueMapBuffer( &mockCmdQueue, buffer, CL_FALSE, CL_MAP_READ, 0, 8, 0, nullptr, &mapEventReturned, &retVal); EXPECT_NE(nullptr, ptrResult); EXPECT_EQ(CL_SUCCESS, retVal); //no dc flush required at this point EXPECT_EQ(1u, commandStreamReceiver.peekTaskCount()); taskCount = commandStreamReceiver.peekTaskCount(); EXPECT_EQ(1u, taskCount); auto neoEvent = castToObject(mapEventReturned); //if task count of csr is higher then event task count with proper dc flushing then we are fine EXPECT_EQ(1u, neoEvent->getCompletionStamp()); //this can't be completed as task count is not reached yet EXPECT_FALSE(neoEvent->updateStatusAndCheckCompletion()); EXPECT_TRUE(CL_COMMAND_MAP_BUFFER == neoEvent->getCommandType()); auto callbackCalled = 0u; *pTagMemory += 4; clSetEventCallback(mapEventReturned, CL_COMPLETE, E2Clb::SignalEv2, (void *)&callbackCalled); //wait for events needs to flush DC as event requires this. retVal = clWaitForEvents(1, &mapEventReturned); EXPECT_EQ(CL_SUCCESS, retVal); //wait for event do not sent flushTask EXPECT_EQ(1u, commandStreamReceiver.peekTaskCount()); EXPECT_EQ(1u, mockCmdQueue.latestTaskCountWaited); EXPECT_TRUE(neoEvent->updateStatusAndCheckCompletion()); EXPECT_EQ(1u, callbackCalled); retVal = clEnqueueUnmapMemObject( &mockCmdQueue, buffer, ptrResult, 0, nullptr, &unmapEventReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, commandStreamReceiver.peekTaskCount()); auto unmapEvent = castToObject(unmapEventReturned); EXPECT_TRUE(CL_COMMAND_UNMAP_MEM_OBJECT == unmapEvent->getCommandType()); retVal = clWaitForEvents(1, &unmapEventReturned); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseEvent(mapEventReturned); clReleaseEvent(unmapEventReturned); } TEST_F(EnqueueMapBufferTest, givenNonReadOnlyBufferWhenMappedOnGpuThenSetValidEventCmds) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); cl_event mapEventReturned = nullptr; cl_event unmapEventReturned = nullptr; *pTagMemory = 5; std::unique_ptr buffer(Buffer::create(BufferDefaults::context, CL_MEM_READ_WRITE, 20, nullptr, retVal)); buffer->setSharingHandler(new SharingHandler()); auto gfxAllocation = buffer->getGraphicsAllocation(pDevice->getRootDeviceIndex()); for (auto handleId = 0u; handleId < gfxAllocation->getNumGmms(); handleId++) { gfxAllocation->setGmm(new MockGmm(pDevice->getGmmClientContext()), handleId); } buffer->forceDisallowCPUCopy = true; EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer.get()); auto &commandStreamReceiver = pCmdQ->getGpgpuCommandStreamReceiver(); EXPECT_EQ(0u, commandStreamReceiver.peekTaskCount()); auto ptrResult = clEnqueueMapBuffer(pCmdQ, buffer.get(), CL_FALSE, CL_MAP_WRITE, 0, 8, 0, nullptr, &mapEventReturned, &retVal); EXPECT_NE(nullptr, ptrResult); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, commandStreamReceiver.peekTaskCount()); auto mapEvent = castToObject(mapEventReturned); EXPECT_TRUE(CL_COMMAND_MAP_BUFFER == mapEvent->getCommandType()); retVal = clWaitForEvents(1, &mapEventReturned); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueUnmapMemObject(pCmdQ, buffer.get(), ptrResult, 0, nullptr, &unmapEventReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2u, commandStreamReceiver.peekTaskCount()); auto unmapEvent = castToObject(unmapEventReturned); EXPECT_TRUE(CL_COMMAND_UNMAP_MEM_OBJECT == unmapEvent->getCommandType()); retVal = clWaitForEvents(1, &unmapEventReturned); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseEvent(mapEventReturned); clReleaseEvent(unmapEventReturned); } TEST_F(EnqueueMapBufferTest, givenReadOnlyBufferWhenMappedOnGpuThenSetValidEventCmds) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); cl_event mapEventReturned = nullptr; cl_event unmapEventReturned = nullptr; *pTagMemory = 5; std::unique_ptr buffer(Buffer::create(BufferDefaults::context, CL_MEM_READ_WRITE, 20, nullptr, retVal)); buffer->setSharingHandler(new SharingHandler()); auto gfxAllocation = buffer->getGraphicsAllocation(pDevice->getRootDeviceIndex()); for (auto handleId = 0u; handleId < gfxAllocation->getNumGmms(); handleId++) { gfxAllocation->setGmm(new MockGmm(pDevice->getGmmClientContext()), handleId); } EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer.get()); auto &commandStreamReceiver = pCmdQ->getGpgpuCommandStreamReceiver(); EXPECT_EQ(0u, commandStreamReceiver.peekTaskCount()); auto ptrResult = clEnqueueMapBuffer(pCmdQ, buffer.get(), CL_FALSE, CL_MAP_READ, 0, 8, 0, nullptr, &mapEventReturned, &retVal); EXPECT_NE(nullptr, ptrResult); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, commandStreamReceiver.peekTaskCount()); auto mapEvent = castToObject(mapEventReturned); EXPECT_TRUE(CL_COMMAND_MAP_BUFFER == mapEvent->getCommandType()); retVal = clWaitForEvents(1, &mapEventReturned); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueUnmapMemObject(pCmdQ, buffer.get(), ptrResult, 0, nullptr, &unmapEventReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, commandStreamReceiver.peekTaskCount()); auto unmapEvent = castToObject(unmapEventReturned); EXPECT_TRUE(CL_COMMAND_UNMAP_MEM_OBJECT == unmapEvent->getCommandType()); retVal = clWaitForEvents(1, &unmapEventReturned); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseEvent(mapEventReturned); clReleaseEvent(unmapEventReturned); } TEST_F(EnqueueMapBufferTest, givenNonBlockingMapBufferAfterL3IsAlreadyFlushedThenEventIsSignaledAsCompleted) { cl_event eventReturned = nullptr; uint32_t tagHW = 0; *pTagMemory = tagHW; MockKernelWithInternals kernel(*pClDevice); size_t GWS = 1; auto buffer = clCreateBuffer( BufferDefaults::context, CL_MEM_READ_WRITE, 20, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); auto &commandStreamReceiver = pCmdQ->getGpgpuCommandStreamReceiver(); uint32_t taskCount = commandStreamReceiver.peekTaskCount(); EXPECT_EQ(0u, taskCount); // enqueue something that map buffer needs to wait for retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.mockMultiDeviceKernel, 1, 0, &GWS, nullptr, 0, nullptr, nullptr); EXPECT_EQ(retVal, CL_SUCCESS); auto NDRcompletionStamp = commandStreamReceiver.peekTaskCount(); //simulate that NDR is done and DC was flushed auto forcedLatestSentDC = NDRcompletionStamp + 1; *pTagMemory = forcedLatestSentDC; auto ptrResult = clEnqueueMapBuffer( pCmdQ, buffer, CL_FALSE, CL_MAP_READ, 0, 8, 0, nullptr, &eventReturned, &retVal); EXPECT_NE(nullptr, ptrResult); EXPECT_EQ(CL_SUCCESS, retVal); taskCount = commandStreamReceiver.peekTaskCount(); EXPECT_EQ(1u, taskCount); auto neoEvent = castToObject(eventReturned); //if task count of csr is higher then event task count with proper dc flushing then we are fine EXPECT_EQ(1u, neoEvent->getCompletionStamp()); EXPECT_TRUE(neoEvent->updateStatusAndCheckCompletion()); //flush task was not called EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount()); //wait for events shouldn't call flush task retVal = clWaitForEvents(1, &eventReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount()); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseEvent(eventReturned); } HWTEST_F(EnqueueMapBufferTest, GivenBufferThatIsNotZeroCopyWhenNonBlockingMapIsCalledThenFinishIsCalledAndDataTransferred) { const auto bufferSize = 100; auto localSize = bufferSize; char misaligned[bufferSize] = {1}; MockKernelWithInternals kernel(*pClDevice); size_t GWS = 1; uintptr_t address = (uintptr_t)&misaligned[0]; if (!(address & (MemoryConstants::cacheLineSize - 1))) { address++; localSize--; } auto buffer = clCreateBuffer( BufferDefaults::context, CL_MEM_USE_HOST_PTR, localSize, (void *)address, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); auto pBuffer = castToObject(buffer); ASSERT_FALSE(pBuffer->isMemObjZeroCopy()); MockCommandQueueHw mockCmdQueue(context, pClDevice, nullptr); // enqueue something that can be finished retVal = clEnqueueNDRangeKernel(&mockCmdQueue, kernel.mockMultiDeviceKernel, 1, 0, &GWS, nullptr, 0, nullptr, nullptr); EXPECT_EQ(retVal, CL_SUCCESS); auto &commandStreamReceiver = mockCmdQueue.getGpgpuCommandStreamReceiver(); uint32_t taskCount = commandStreamReceiver.peekTaskCount(); EXPECT_EQ(1u, taskCount); auto ptrResult = clEnqueueMapBuffer( &mockCmdQueue, buffer, CL_FALSE, CL_MAP_READ, 0, localSize, 0, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, ptrResult); EXPECT_EQ(CL_SUCCESS, retVal); commandStreamReceiver.peekTaskCount(); EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount()); EXPECT_EQ(1u, mockCmdQueue.latestTaskCountWaited); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EnqueueMapBufferTest, GivenWrongMemObjectWhenMapIsCalledThenInvalidMemObjectErrorCodeIsReturned) { MockBuffer buffer; cl_mem mem = &buffer; buffer.magic = -1; auto ptrResult = clEnqueueMapBuffer( pCmdQ, mem, CL_FALSE, CL_MAP_READ, 0, 8, 0, nullptr, nullptr, &retVal); EXPECT_EQ(nullptr, ptrResult); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } HWTEST_F(EnqueueMapBufferTest, GivenPtrToReturnEventWhenMappingBufferThenEventIsNotNull) { cl_event eventReturned = NULL; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.taskCount = 100; auto buffer = clCreateBuffer( BufferDefaults::context, CL_MEM_READ_WRITE, 20, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); auto ptrResult = clEnqueueMapBuffer( pCmdQ, buffer, CL_FALSE, CL_MAP_READ, 0, 8, 0, nullptr, &eventReturned, &retVal); EXPECT_NE(nullptr, ptrResult); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, eventReturned); auto eventObject = castToObject(eventReturned); EXPECT_EQ(0u, eventObject->peekTaskCount()); EXPECT_TRUE(eventObject->updateStatusAndCheckCompletion()); retVal = clEnqueueUnmapMemObject( pCmdQ, buffer, ptrResult, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseEvent(eventReturned); clReleaseMemObject(buffer); } TEST_F(EnqueueMapBufferTest, GivenZeroCopyBufferWhenMapBufferWithoutEventsThenCommandStreamReceiverUpdatesRequiredDCFlushCount) { auto &commandStreamReceiver = pCmdQ->getGpgpuCommandStreamReceiver(); auto buffer = clCreateBuffer( BufferDefaults::context, CL_MEM_READ_WRITE, 20, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); auto ptrResult = clEnqueueMapBuffer( pCmdQ, buffer, CL_FALSE, CL_MAP_READ, 0, 8, 0, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, ptrResult); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, commandStreamReceiver.peekLatestSentTaskCount()); clReleaseMemObject(buffer); } TEST_F(EnqueueMapBufferTest, givenBufferWithoutUseHostPtrFlagWhenMappedOnCpuThenSetAllMapParams) { std::unique_ptr buffer(Buffer::create(BufferDefaults::context, CL_MEM_READ_WRITE, 10, nullptr, retVal)); EXPECT_NE(nullptr, buffer); EXPECT_TRUE(buffer->mappingOnCpuAllowed()); size_t mapSize = 3; size_t mapOffset = 2; auto mappedPtr = clEnqueueMapBuffer(pCmdQ, buffer.get(), CL_FALSE, CL_MAP_READ, mapOffset, mapSize, 0, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, mappedPtr); MapInfo mappedInfo; auto success = buffer->findMappedPtr(mappedPtr, mappedInfo); EXPECT_TRUE(success); EXPECT_NE(nullptr, mappedInfo.ptr); EXPECT_EQ(mapOffset, mappedInfo.offset[0]); EXPECT_EQ(0u, mappedInfo.offset[1]); EXPECT_EQ(0u, mappedInfo.offset[2]); EXPECT_EQ(mapSize, mappedInfo.size[0]); EXPECT_EQ(0u, mappedInfo.size[1]); EXPECT_EQ(0u, mappedInfo.size[2]); auto expectedPtr = ptrOffset(buffer->getCpuAddressForMapping(), mapOffset); EXPECT_EQ(mappedPtr, expectedPtr); } TEST_F(EnqueueMapBufferTest, givenBufferWithUseHostPtrFlagWhenMappedOnCpuThenSetAllMapParams) { uint8_t hostPtr[10] = {}; std::unique_ptr buffer(Buffer::create(BufferDefaults::context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, 10, hostPtr, retVal)); EXPECT_NE(nullptr, buffer); EXPECT_TRUE(buffer->mappingOnCpuAllowed()); size_t mapSize = 3; size_t mapOffset = 2; auto mappedPtr = clEnqueueMapBuffer(pCmdQ, buffer.get(), CL_FALSE, CL_MAP_READ, mapOffset, mapSize, 0, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, mappedPtr); MapInfo mappedInfo; auto success = buffer->findMappedPtr(mappedPtr, mappedInfo); EXPECT_TRUE(success); EXPECT_NE(nullptr, mappedInfo.ptr); EXPECT_EQ(mapOffset, mappedInfo.offset[0]); EXPECT_EQ(0u, mappedInfo.offset[1]); EXPECT_EQ(0u, mappedInfo.offset[2]); EXPECT_EQ(mapSize, mappedInfo.size[0]); EXPECT_EQ(0u, mappedInfo.size[1]); EXPECT_EQ(0u, mappedInfo.size[2]); auto expectedPtr = ptrOffset(buffer->getCpuAddressForMapping(), mapOffset); EXPECT_EQ(mappedPtr, expectedPtr); } HWTEST_F(EnqueueMapBufferTest, givenMapBufferOnGpuWhenMappingBufferThenStoreGraphicsAllocationInMapInfo) { uint8_t hostPtr[10] = {}; std::unique_ptr bufferForCpuMap(Buffer::create(context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, 10, hostPtr, retVal)); ASSERT_NE(nullptr, bufferForCpuMap); ASSERT_TRUE(bufferForCpuMap->mappingOnCpuAllowed()); std::unique_ptr bufferForGpuMap(Buffer::create(context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, 10, hostPtr, retVal)); ASSERT_NE(nullptr, bufferForGpuMap); forceMapBufferOnGpu(*bufferForGpuMap); ASSERT_FALSE(bufferForGpuMap->mappingOnCpuAllowed()); cl_int retVal{}; void *pointerMappedOnCpu = clEnqueueMapBuffer(pCmdQ, bufferForCpuMap.get(), CL_FALSE, CL_MAP_READ, 0, 10, 0, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); void *pointerMappedOnGpu = clEnqueueMapBuffer(pCmdQ, bufferForGpuMap.get(), CL_FALSE, CL_MAP_READ, 0, 10, 0, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); MapInfo mapInfo{}; EXPECT_TRUE(bufferForCpuMap->findMappedPtr(pointerMappedOnCpu, mapInfo)); EXPECT_EQ(nullptr, mapInfo.graphicsAllocation); EXPECT_TRUE(bufferForGpuMap->findMappedPtr(pointerMappedOnGpu, mapInfo)); EXPECT_NE(nullptr, mapInfo.graphicsAllocation); } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_map_image_tests.cpp000066400000000000000000001134341422164147700320530ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/common/test_macros/test_checks_shared.h" #include "opencl/source/event/user_event.h" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" using namespace NEO; struct EnqueueMapImageTest : public ClDeviceFixture, public CommandQueueHwFixture, public ::testing::Test { typedef CommandQueueHwFixture CommandQueueFixture; EnqueueMapImageTest() { } void SetUp() override { REQUIRE_IMAGES_OR_SKIP(defaultHwInfo); ClDeviceFixture::SetUp(); CommandQueueFixture::SetUp(pClDevice, 0); context = new MockContext(pClDevice); image = ImageHelper>::create(context); } void TearDown() override { if (IsSkipped()) { return; } delete image; context->release(); CommandQueueFixture::TearDown(); ClDeviceFixture::TearDown(); } MockContext *context; cl_int retVal = CL_INVALID_VALUE; Image *image = nullptr; char srcMemory[128]; }; struct EnqueueMapImageParamsTest : public EnqueueMapImageTest, public ::testing::WithParamInterface { }; TEST_F(EnqueueMapImageTest, GivenTiledImageWhenMappingImageThenPointerIsReused) { auto mapFlags = CL_MAP_READ; const size_t origin[3] = {0, 0, 0}; const size_t region[3] = {1, 1, 1}; auto mapAllocation = image->getMapAllocation(pClDevice->getRootDeviceIndex()); EXPECT_NE(nullptr, mapAllocation); auto ptr1 = pCmdQ->enqueueMapImage( image, true, mapFlags, origin, region, nullptr, nullptr, 0, nullptr, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image->getHostPtr()); mapAllocation = image->getMapAllocation(pClDevice->getRootDeviceIndex()); EXPECT_NE(nullptr, mapAllocation); auto ptr2 = pCmdQ->enqueueMapImage( image, true, mapFlags, origin, region, nullptr, nullptr, 0, nullptr, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(ptr1, ptr2); retVal = pCmdQ->enqueueUnmapMemObject(image, ptr1, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(EnqueueMapImageTest, givenAllocatedMapPtrAndMapWithDifferentOriginIsCalledThenReturnDifferentPointers) { if (!UnitTestHelper::tiledImagesSupported) { GTEST_SKIP(); } std::unique_ptr img(Image2dHelper::create(context)); auto mapFlags = CL_MAP_READ; const size_t origin1[3] = {0, 0, 0}; const size_t origin2[3] = {2, 2, 0}; const size_t region[3] = {1, 1, 1}; auto ptr1 = pCmdQ->enqueueMapImage(img.get(), true, mapFlags, origin1, region, nullptr, nullptr, 0, nullptr, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); auto ptr2 = pCmdQ->enqueueMapImage(img.get(), true, mapFlags, origin2, region, nullptr, nullptr, 0, nullptr, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(ptr1, ptr2); EXPECT_NE(nullptr, img->getAllocatedMapPtr()); size_t mapOffset = img->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes * origin2[0] + img->getHostPtrRowPitch() * origin2[1]; EXPECT_EQ(ptr2, ptrOffset(ptr1, mapOffset)); } typedef EnqueueMapImageParamsTest MipMapMapImageParamsTest; TEST_P(MipMapMapImageParamsTest, givenAllocatedMapPtrWhenMapsWithDifferentMipMapsAreCalledThenReturnDifferentPointers) { auto image_type = (cl_mem_object_type)GetParam(); cl_int retVal = CL_SUCCESS; cl_image_desc imageDesc = {}; imageDesc.image_type = image_type; imageDesc.num_mip_levels = 10; imageDesc.image_width = 4; imageDesc.image_height = 1; imageDesc.image_depth = 1; const size_t origin1[4] = {0, 0, 0, 0}; size_t origin2[4] = {0, 0, 0, 0}; std::unique_ptr image; size_t mapOffset = 16u; switch (image_type) { case CL_MEM_OBJECT_IMAGE1D: origin2[1] = 1; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; case CL_MEM_OBJECT_IMAGE1D_ARRAY: origin2[2] = 1; imageDesc.image_array_size = 2; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; case CL_MEM_OBJECT_IMAGE2D: origin2[2] = 1; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; case CL_MEM_OBJECT_IMAGE2D_ARRAY: origin2[3] = 1; imageDesc.image_array_size = 2; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; case CL_MEM_OBJECT_IMAGE3D: origin2[3] = 1; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; } EXPECT_NE(nullptr, image.get()); auto mapFlags = CL_MAP_READ; const size_t region[3] = {1, 1, 1}; auto ptr1 = pCmdQ->enqueueMapImage(image.get(), true, mapFlags, origin1, region, nullptr, nullptr, 0, nullptr, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); auto ptr2 = pCmdQ->enqueueMapImage(image.get(), true, mapFlags, origin2, region, nullptr, nullptr, 0, nullptr, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(ptr1, ptr2); if (image->mappingOnCpuAllowed() == false) { EXPECT_NE(nullptr, image->getAllocatedMapPtr()); } EXPECT_EQ(ptr2, ptrOffset(ptr1, mapOffset)); } INSTANTIATE_TEST_CASE_P(MipMapMapImageParamsTest_givenAllocatedMapPtrAndMapWithDifferentMipMapsIsCalledThenReturnDifferentPointers, MipMapMapImageParamsTest, ::testing::Values(CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE2D_ARRAY, CL_MEM_OBJECT_IMAGE3D)); template struct mockedImage : public ImageHw { using ImageHw::ImageHw; void setAllocatedMapPtr(void *allocatedMapPtr) override { ownershipTaken = this->hasOwnership(); MemObj::setAllocatedMapPtr(allocatedMapPtr); } bool ownershipTaken = false; }; HWTEST_F(EnqueueMapImageTest, givenTiledImageWhenMapImageIsCalledThenStorageIsSetWithImageMutexTaken) { if (!UnitTestHelper::tiledImagesSupported) { GTEST_SKIP(); } auto imageFormat = image->getImageFormat(); auto imageDesc = image->getImageDesc(); auto graphicsAllocation = image->getGraphicsAllocation(pClDevice->getRootDeviceIndex()); auto surfaceFormatInfo = image->getSurfaceFormatInfo(); mockedImage mockImage(context, {}, 0, 0, 4096u, nullptr, nullptr, imageFormat, imageDesc, false, GraphicsAllocationHelper::toMultiGraphicsAllocation(graphicsAllocation), true, 0, 0, surfaceFormatInfo, nullptr); mockImage.createFunction = image->createFunction; auto mapAllocation = mockImage.getMapAllocation(pClDevice->getRootDeviceIndex()); EXPECT_EQ(nullptr, mapAllocation); EXPECT_EQ(nullptr, mockImage.getHostPtr()); auto mapFlags = CL_MAP_READ; const size_t origin[3] = {0, 0, 0}; const size_t region[3] = {1, 1, 1}; auto apiMapPtr = pCmdQ->enqueueMapImage( &mockImage, true, mapFlags, origin, region, nullptr, nullptr, 0, nullptr, nullptr, retVal); EXPECT_TRUE(mockImage.ownershipTaken); auto mapPtr = mockImage.getAllocatedMapPtr(); EXPECT_EQ(apiMapPtr, mapPtr); mapAllocation = mockImage.getMapAllocation(pClDevice->getRootDeviceIndex()); EXPECT_NE(nullptr, mapAllocation); EXPECT_EQ(apiMapPtr, mapAllocation->getUnderlyingBuffer()); auto osContextId = pCmdQ->getGpgpuCommandStreamReceiver().getOsContext().getContextId(); auto expectedTaskCount = pCmdQ->getGpgpuCommandStreamReceiver().peekTaskCount(); auto actualMapAllocationTaskCount = mapAllocation->getTaskCount(osContextId); EXPECT_EQ(expectedTaskCount, actualMapAllocationTaskCount); pDevice->getMemoryManager()->freeGraphicsMemory(mockImage.getMapAllocation(pClDevice->getRootDeviceIndex())); mockImage.releaseAllocatedMapPtr(); } TEST_F(EnqueueMapImageTest, WhenMappingImageThenCpuAndGpuAddressAreEqualWhenZeroCopyIsUsed) { auto mapFlags = CL_MAP_READ; const size_t origin[3] = {0, 0, 0}; const size_t region[3] = {1, 1, 1}; size_t imageRowPitch = 0; size_t imageSlicePitch = 0; auto ptr = pCmdQ->enqueueMapImage( image, true, mapFlags, origin, region, &imageRowPitch, &imageSlicePitch, 0, nullptr, nullptr, retVal); if (image->isMemObjZeroCopy()) { EXPECT_EQ(image->getCpuAddress(), ptr); } else { EXPECT_NE(image->getCpuAddress(), ptr); } size_t imageRowPitchRef = 0; image->getImageInfo(CL_IMAGE_ROW_PITCH, sizeof(imageRowPitchRef), &imageRowPitchRef, nullptr); EXPECT_EQ(imageRowPitch, imageRowPitchRef); size_t imageSlicePitchRef = 0; image->getImageInfo(CL_IMAGE_SLICE_PITCH, sizeof(imageSlicePitchRef), &imageSlicePitchRef, nullptr); EXPECT_EQ(imageSlicePitch, imageSlicePitchRef); } TEST_F(EnqueueMapImageTest, GivenCmdqAndValidArgsWhenMappingImageThenSuccessIsReturned) { auto mapFlags = CL_MAP_READ; const size_t origin[3] = {0, 0, 0}; const size_t region[3] = {1, 1, 1}; size_t imageRowPitch = 0; size_t imageSlicePitch = 0; auto ptr = pCmdQ->enqueueMapImage( image, true, mapFlags, origin, region, &imageRowPitch, &imageSlicePitch, 0, nullptr, nullptr, retVal); EXPECT_NE(nullptr, ptr); EXPECT_EQ(CL_SUCCESS, retVal); size_t imageRowPitchRef = 0; image->getImageInfo(CL_IMAGE_ROW_PITCH, sizeof(imageRowPitchRef), &imageRowPitchRef, nullptr); EXPECT_EQ(imageRowPitch, imageRowPitchRef); size_t imageSlicePitchRef = 0; image->getImageInfo(CL_IMAGE_SLICE_PITCH, sizeof(imageSlicePitchRef), &imageSlicePitchRef, nullptr); EXPECT_EQ(imageSlicePitch, imageSlicePitchRef); } HWTEST_F(EnqueueMapImageTest, givenNonReadOnlyMapWithOutEventWhenMappedThenSetEventAndIncraseTaskCountFromWriteImage) { if (!UnitTestHelper::tiledImagesSupported) { GTEST_SKIP(); } DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); cl_event mapEventReturned = nullptr; cl_event unmapEventReturned = nullptr; uint32_t tagHW = 0; auto mapFlags = CL_MAP_WRITE; const size_t origin[3] = {0, 0, 0}; const size_t region[3] = {1, 1, 1}; size_t imageRowPitch = 0; size_t imageSlicePitch = 0; size_t GWS = 1; MockKernelWithInternals kernel(*pClDevice); *pTagMemory = tagHW; auto &commandStreamReceiver = pCmdQ->getGpgpuCommandStreamReceiver(); auto tag_address = commandStreamReceiver.getTagAddress(); EXPECT_TRUE(pTagMemory == tag_address); struct E2Clb { static void CL_CALLBACK SignalEv2(cl_event e, cl_int status, void *data) { uint32_t *pTagMem = static_cast(data); *pTagMem = 4; } }; uint32_t taskCount = commandStreamReceiver.peekTaskCount(); EXPECT_EQ(1u, taskCount); // enqueue something that can be finished... retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.mockMultiDeviceKernel, 1, 0, &GWS, nullptr, 0, nullptr, nullptr); EXPECT_EQ(retVal, CL_SUCCESS); *pTagMemory = tagHW += 3; auto ptr = pCmdQ->enqueueMapImage( image, false, mapFlags, origin, region, &imageRowPitch, &imageSlicePitch, 0, nullptr, &mapEventReturned, retVal); EXPECT_NE(nullptr, ptr); EXPECT_EQ(CL_SUCCESS, retVal); auto mapEvent = castToObject(mapEventReturned); EXPECT_TRUE(CL_COMMAND_MAP_IMAGE == mapEvent->getCommandType()); taskCount = commandStreamReceiver.peekTaskCount(); EXPECT_EQ(3u, taskCount); clSetEventCallback(mapEventReturned, CL_COMPLETE, E2Clb::SignalEv2, (void *)pTagMemory); retVal = clWaitForEvents(1, &mapEventReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(4u, *pTagMemory); taskCount = commandStreamReceiver.peekTaskCount(); EXPECT_EQ(3u, taskCount); (*pTagMemory)++; retVal = clEnqueueUnmapMemObject( pCmdQ, image, ptr, 0, nullptr, &unmapEventReturned); EXPECT_EQ(CL_SUCCESS, retVal); auto unmapEvent = castToObject(unmapEventReturned); EXPECT_TRUE(CL_COMMAND_UNMAP_MEM_OBJECT == unmapEvent->getCommandType()); retVal = clWaitForEvents(1, &unmapEventReturned); taskCount = commandStreamReceiver.peekTaskCount(); EXPECT_EQ(4u, taskCount); clReleaseEvent(mapEventReturned); clReleaseEvent(unmapEventReturned); } HWTEST_F(EnqueueMapImageTest, givenReadOnlyMapWithOutEventWhenMappedThenSetEventAndDontIncraseTaskCountFromWriteImage) { if (!UnitTestHelper::tiledImagesSupported) { GTEST_SKIP(); } DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); cl_event mapEventReturned = nullptr; cl_event unmapEventReturned = nullptr; auto mapFlags = CL_MAP_READ; const size_t origin[3] = {0, 0, 0}; const size_t region[3] = {1, 1, 1}; *pTagMemory = 5; auto &commandStreamReceiver = pCmdQ->getGpgpuCommandStreamReceiver(); EXPECT_EQ(1u, commandStreamReceiver.peekTaskCount()); auto ptr = pCmdQ->enqueueMapImage(image, false, mapFlags, origin, region, nullptr, nullptr, 0, nullptr, &mapEventReturned, retVal); EXPECT_NE(nullptr, ptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2u, commandStreamReceiver.peekTaskCount()); auto mapEvent = castToObject(mapEventReturned); EXPECT_TRUE(CL_COMMAND_MAP_IMAGE == mapEvent->getCommandType()); retVal = clWaitForEvents(1, &mapEventReturned); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueUnmapMemObject(pCmdQ, image, ptr, 0, nullptr, &unmapEventReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2u, commandStreamReceiver.peekTaskCount()); auto unmapEvent = castToObject(unmapEventReturned); EXPECT_TRUE(CL_COMMAND_UNMAP_MEM_OBJECT == unmapEvent->getCommandType()); retVal = clWaitForEvents(1, &unmapEventReturned); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseEvent(mapEventReturned); clReleaseEvent(unmapEventReturned); } HWTEST_F(EnqueueMapImageTest, GivenPtrToReturnEventWhenMappingImageThenEventIsNotNull) { if (!UnitTestHelper::tiledImagesSupported) { GTEST_SKIP(); } cl_event eventReturned = nullptr; auto mapFlags = CL_MAP_READ; const size_t origin[3] = {0, 0, 0}; const size_t region[3] = {1, 1, 1}; size_t imageRowPitch = 0; size_t imageSlicePitch = 0; uint32_t forceTaskCount = 100; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.taskCount = forceTaskCount; auto ptr = clEnqueueMapImage( pCmdQ, image, CL_FALSE, mapFlags, origin, region, &imageRowPitch, &imageSlicePitch, 0, nullptr, &eventReturned, &retVal); EXPECT_NE(nullptr, ptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, eventReturned); auto eventObject = castToObject(eventReturned); EXPECT_EQ(forceTaskCount + 1, eventObject->peekTaskCount()); EXPECT_TRUE(eventObject->updateStatusAndCheckCompletion()); retVal = clEnqueueUnmapMemObject( pCmdQ, image, ptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseEvent(eventReturned); } HWTEST_F(EnqueueMapImageTest, givenZeroCopyImageWhenItIsMappedAndReturnsEventThenEventHasCorrectProperties) { cl_event eventReturned = nullptr; auto mapFlags = CL_MAP_READ; const size_t origin[3] = {0, 0, 0}; const size_t region[3] = {1, 1, 1}; size_t imageRowPitch = 0; size_t imageSlicePitch = 0; uint32_t forceTaskCount = 100; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.taskCount = forceTaskCount; std::unique_ptr zero_copy_image(ImageHelper>::create(context)); ASSERT_TRUE(zero_copy_image->isMemObjZeroCopy()); pCmdQ->taskCount = 40u; auto ptr = clEnqueueMapImage( pCmdQ, zero_copy_image.get(), CL_FALSE, mapFlags, origin, region, &imageRowPitch, &imageSlicePitch, 0, nullptr, &eventReturned, &retVal); EXPECT_NE(nullptr, ptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, eventReturned); EXPECT_EQ(ptr, zero_copy_image->getCpuAddressForMemoryTransfer()); auto eventObject = castToObject(eventReturned); EXPECT_EQ(pCmdQ->taskCount, eventObject->peekTaskCount()); EXPECT_TRUE(eventObject->updateStatusAndCheckCompletion()); retVal = clEnqueueUnmapMemObject( pCmdQ, zero_copy_image.get(), ptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseEvent(eventReturned); } TEST_F(EnqueueMapImageTest, GivenNonZeroCopyImageWhenMappedWithOffsetThenCorrectPointerIsReturned) { auto mapFlags = CL_MAP_WRITE; const size_t origin[3] = {1, 0, 0}; const size_t region[3] = {1, 1, 1}; size_t imageRowPitch = 0; size_t imageSlicePitch = 0; Image *nonZeroCopyImage = ImageHelper>::create(context); EXPECT_FALSE(nonZeroCopyImage->isMemObjZeroCopy()); auto ptr = clEnqueueMapImage( pCmdQ, nonZeroCopyImage, CL_TRUE, mapFlags, origin, region, &imageRowPitch, &imageSlicePitch, 0, nullptr, nullptr, &retVal); float *HostPtrOffseted = (float *)Image1dDefaults::hostPtr + 1; // EXPECT_NE(nullptr, ptr); if (!image->isTiledAllocation()) { EXPECT_EQ(HostPtrOffseted, ptr); // Returned pointer should be offseted } EXPECT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueUnmapMemObject( pCmdQ, nonZeroCopyImage, ptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); delete nonZeroCopyImage; } HWTEST_F(EnqueueMapImageTest, givenSharingHandlerWhenNonReadOnlyMapAndUnmapOnNonTiledImageIsCalledThenMakeGpuCopy) { std::unique_ptr image(ImageHelper>::create(context)); ASSERT_NE(nullptr, image); image->setSharingHandler(new SharingHandler()); EXPECT_FALSE(image->isTiledAllocation()); auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = 1; csr.taskLevel = 1; pCmdQ->taskCount = 1; pCmdQ->taskLevel = 1; size_t origin[] = {0, 0, 0}; size_t region[] = {1, 1, 1}; void *data = clEnqueueMapImage(pCmdQ, image.get(), CL_TRUE, CL_MAP_WRITE, origin, region, nullptr, nullptr, 0, NULL, NULL, &retVal); EXPECT_NE(nullptr, data); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2u, pCmdQ->taskCount); EXPECT_EQ(2u, pCmdQ->taskLevel); retVal = clEnqueueUnmapMemObject(pCmdQ, image.get(), data, 0, NULL, NULL); EXPECT_EQ(3u, pCmdQ->taskCount); EXPECT_EQ(3u, pCmdQ->taskLevel); } HWTEST_F(EnqueueMapImageTest, givenSharingHandlerWhenReadOnlyMapAndUnmapOnNonTiledImageIsCalledThenMakeGpuCopy) { std::unique_ptr image(ImageHelper>::create(context)); ASSERT_NE(nullptr, image); image->setSharingHandler(new SharingHandler()); EXPECT_FALSE(image->isTiledAllocation()); auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = 1; csr.taskLevel = 1; pCmdQ->taskCount = 1; pCmdQ->taskLevel = 1; size_t origin[] = {0, 0, 0}; size_t region[] = {1, 1, 1}; void *data = clEnqueueMapImage(pCmdQ, image.get(), CL_TRUE, CL_MAP_READ, origin, region, nullptr, nullptr, 0, NULL, NULL, &retVal); EXPECT_NE(nullptr, data); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2u, pCmdQ->taskCount); EXPECT_EQ(2u, pCmdQ->taskLevel); retVal = clEnqueueUnmapMemObject(pCmdQ, image.get(), data, 0, NULL, NULL); EXPECT_EQ(2u, pCmdQ->taskCount); EXPECT_EQ(2u, pCmdQ->taskLevel); } HWTEST_F(EnqueueMapImageTest, givenImageWithouUsetHostPtrFlagWhenMappedOnCpuThenSetAllMapProperties) { std::unique_ptr image(ImageHelper::create(context)); ASSERT_NE(nullptr, image); EXPECT_TRUE(image->mappingOnCpuAllowed()); size_t origin[] = {2, 0, 0}; size_t region[] = {2, 1, 1}; void *mappedPtr = clEnqueueMapImage(pCmdQ, image.get(), CL_TRUE, CL_MAP_READ, origin, region, nullptr, nullptr, 0, NULL, NULL, &retVal); EXPECT_NE(nullptr, mappedPtr); MapInfo mappedInfo; auto success = image->findMappedPtr(mappedPtr, mappedInfo); EXPECT_TRUE(success); EXPECT_NE(nullptr, mappedInfo.ptr); EXPECT_EQ(origin[0], mappedInfo.offset[0]); EXPECT_EQ(origin[1], mappedInfo.offset[1]); EXPECT_EQ(origin[2], mappedInfo.offset[2]); EXPECT_EQ(region[0], mappedInfo.size[0]); EXPECT_EQ(region[1], mappedInfo.size[1]); EXPECT_EQ(region[2], mappedInfo.size[2]); auto expectedPtr = ptrOffset(image->getCpuAddressForMapping(), image->calculateOffsetForMapping(mappedInfo.offset)); EXPECT_EQ(mappedPtr, expectedPtr); } HWTEST_F(EnqueueMapImageTest, givenImageWithUseHostPtrFlagWhenMappedOnCpuThenSetAllMapProperties) { std::unique_ptr image(ImageHelper>::create(context)); ASSERT_NE(nullptr, image); EXPECT_TRUE(image->mappingOnCpuAllowed()); size_t origin[] = {2, 0, 0}; size_t region[] = {2, 1, 1}; void *mappedPtr = clEnqueueMapImage(pCmdQ, image.get(), CL_TRUE, CL_MAP_READ, origin, region, nullptr, nullptr, 0, NULL, NULL, &retVal); EXPECT_NE(nullptr, mappedPtr); MapInfo mappedInfo; auto success = image->findMappedPtr(mappedPtr, mappedInfo); EXPECT_TRUE(success); EXPECT_NE(nullptr, mappedInfo.ptr); EXPECT_EQ(origin[0], mappedInfo.offset[0]); EXPECT_EQ(origin[1], mappedInfo.offset[1]); EXPECT_EQ(origin[2], mappedInfo.offset[2]); EXPECT_EQ(region[0], mappedInfo.size[0]); EXPECT_EQ(region[1], mappedInfo.size[1]); EXPECT_EQ(region[2], mappedInfo.size[2]); auto expectedPtr = ptrOffset(image->getCpuAddressForMapping(), image->calculateOffsetForMapping(mappedInfo.offset)); EXPECT_EQ(mappedPtr, expectedPtr); } TEST_F(EnqueueMapImageTest, givenBlockedCommandQueueWhenBlockingMapWith2DImageIsEnqueuedAndEventAsynchrounouslyCompletedThenEnqueueFinishesWithoutStall) { auto mapFlags = CL_MAP_READ; const size_t origin[3] = {0, 0, 0}; const size_t region[3] = {1, 1, 1}; size_t imageRowPitch = 0; size_t imageSlicePitch = 0; class MockEventWithSetCompleteOnUpdate : public Event { public: MockEventWithSetCompleteOnUpdate(CommandQueue *cmdQueue, cl_command_type cmdType, uint32_t taskLevel, uint32_t taskCount) : Event(cmdQueue, cmdType, taskLevel, taskCount) { } void updateExecutionStatus() override { setStatus(CL_COMPLETE); } }; MockEventWithSetCompleteOnUpdate blockingEvent(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 0, 1); cl_event blockingClEvent = &blockingEvent; int32_t initialRefCountCmdQ = pCmdQ->getRefInternalCount(); auto ptr = pCmdQ->enqueueMapImage( image, true, mapFlags, origin, region, &imageRowPitch, &imageSlicePitch, 1, &blockingClEvent, nullptr, retVal); EXPECT_NE(nullptr, ptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(initialRefCountCmdQ, pCmdQ->getRefInternalCount()); } TEST_F(EnqueueMapImageTest, givenBlockedCommandQueueWhenBlockingMapWith1DImageIsEnqueuedAndEventAsynchrounouslyCompletedThenEnqueueFinishesWithoutStall) { auto mapFlags = CL_MAP_READ; const size_t origin[3] = {0, 0, 0}; const size_t region[3] = {1, 1, 1}; size_t imageRowPitch = 0; size_t imageSlicePitch = 0; Image *image1D = ImageHelper>::create(context); ASSERT_NE(nullptr, image1D); class MockEventWithSetCompleteOnUpdate : public Event { public: MockEventWithSetCompleteOnUpdate(CommandQueue *cmdQueue, cl_command_type cmdType, uint32_t taskLevel, uint32_t taskCount) : Event(cmdQueue, cmdType, taskLevel, taskCount) { } void updateExecutionStatus() override { setStatus(CL_COMPLETE); } }; MockEventWithSetCompleteOnUpdate blockingEvent(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 0, 1); cl_event blockingClEvent = &blockingEvent; int32_t initialRefCountCmdQ = pCmdQ->getRefInternalCount(); auto ptr = pCmdQ->enqueueMapImage( image1D, true, mapFlags, origin, region, &imageRowPitch, &imageSlicePitch, 1, &blockingClEvent, nullptr, retVal); EXPECT_NE(nullptr, ptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(initialRefCountCmdQ, pCmdQ->getRefInternalCount()); delete image1D; } TEST_F(EnqueueMapImageTest, givenBlockedCommandQueueWhenBlockingCpuMapIsCalledThenReturnRowPitchAndSlicePitch) { const size_t origin[3] = {0, 0, 0}; const size_t region[3] = {1, 1, 1}; size_t retImageRowPitch = 0; size_t retImageSlicePitch = 0; struct MyMockUserEvent : public UserEvent { MyMockUserEvent() : UserEvent(nullptr) {} void updateExecutionStatus() override { setStatus(CL_COMPLETE); } }; std::unique_ptr image(ImageHelper::create(context)); EXPECT_TRUE(image->mappingOnCpuAllowed()); MyMockUserEvent blockingEvent; cl_event blockingClEvent = &blockingEvent; pCmdQ->enqueueMapImage(image.get(), true, CL_MAP_READ, origin, region, &retImageRowPitch, &retImageSlicePitch, 1, &blockingClEvent, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(0u, retImageRowPitch); EXPECT_NE(0u, retImageSlicePitch); image.reset(ImageHelper::create(context)); pCmdQ->enqueueMapImage(image.get(), true, CL_MAP_READ, origin, region, &retImageRowPitch, &retImageSlicePitch, 1, &blockingClEvent, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(0u, retImageRowPitch); EXPECT_EQ(0u, retImageSlicePitch); } TEST_F(EnqueueMapImageTest, givenZeroCopyImageWhenMappedOnCpuThenReturnImageRowAndSlicePitch) { const size_t origin[3] = {0, 0, 0}; const size_t region[3] = {1, 1, 1}; size_t retImageRowPitch = 0; size_t retImageSlicePitch = 0; std::unique_ptr image(ImageHelper::create(context)); EXPECT_TRUE(image->mappingOnCpuAllowed()); EXPECT_TRUE(image->isMemObjZeroCopy()); pCmdQ->enqueueMapImage(image.get(), true, CL_MAP_READ, origin, region, &retImageRowPitch, &retImageSlicePitch, 0, nullptr, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(image->getImageDesc().image_row_pitch, retImageRowPitch); EXPECT_EQ(image->getImageDesc().image_slice_pitch, retImageSlicePitch); } TEST_F(EnqueueMapImageTest, givenNonZeroCopyImageWhenMappedOnCpuThenReturnHostRowAndSlicePitch) { const size_t origin[3] = {0, 0, 0}; const size_t region[3] = {1, 1, 1}; size_t retImageRowPitch = 0; size_t retImageSlicePitch = 0; std::unique_ptr image(ImageHelper>::create(context)); EXPECT_TRUE(image->mappingOnCpuAllowed()); EXPECT_FALSE(image->isMemObjZeroCopy()); pCmdQ->enqueueMapImage(image.get(), true, CL_MAP_READ, origin, region, &retImageRowPitch, &retImageSlicePitch, 0, nullptr, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(image->getHostPtrRowPitch(), retImageRowPitch); EXPECT_EQ(image->getHostPtrSlicePitch(), retImageSlicePitch); } TEST_F(EnqueueMapImageTest, givenZeroCopyImageWhenMappedOnGpuThenReturnHostRowAndSlicePitch) { const size_t origin[3] = {0, 0, 0}; const size_t region[3] = {1, 1, 1}; size_t retImageRowPitch = 0; size_t retImageSlicePitch = 0; std::unique_ptr image(ImageHelper::create(context)); image->setSharingHandler(new SharingHandler()); EXPECT_FALSE(image->mappingOnCpuAllowed()); EXPECT_TRUE(image->isMemObjZeroCopy()); pCmdQ->enqueueMapImage(image.get(), true, CL_MAP_READ, origin, region, &retImageRowPitch, &retImageSlicePitch, 0, nullptr, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(image->getHostPtrRowPitch(), retImageRowPitch); EXPECT_EQ(image->getHostPtrSlicePitch(), retImageSlicePitch); } TEST_F(EnqueueMapImageTest, givenNonZeroCopyImageWhenMappedOnGpuThenReturnHostRowAndSlicePitch) { const size_t origin[3] = {0, 0, 0}; const size_t region[3] = {1, 1, 1}; size_t retImageRowPitch = 0; size_t retImageSlicePitch = 0; std::unique_ptr image(ImageHelper>::create(context)); image->setSharingHandler(new SharingHandler()); EXPECT_FALSE(image->mappingOnCpuAllowed()); EXPECT_FALSE(image->isMemObjZeroCopy()); pCmdQ->enqueueMapImage(image.get(), true, CL_MAP_READ, origin, region, &retImageRowPitch, &retImageSlicePitch, 0, nullptr, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(image->getHostPtrRowPitch(), retImageRowPitch); EXPECT_EQ(image->getHostPtrSlicePitch(), retImageSlicePitch); } TEST_F(EnqueueMapImageTest, givenMipMapImageWhenMappedThenReturnHostRowAndSlicePitch) { const size_t origin[4] = {0, 0, 0, 1}; const size_t region[3] = {1, 1, 1}; size_t retImageRowPitch = 0; size_t retImageSlicePitch = 0; cl_image_desc imageDesc = {}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE3D; imageDesc.num_mip_levels = 10; imageDesc.image_width = 4; imageDesc.image_height = 4; imageDesc.image_depth = 4; std::unique_ptr image(ImageHelper::create(context, &imageDesc)); image->setSharingHandler(new SharingHandler()); EXPECT_FALSE(image->mappingOnCpuAllowed()); pCmdQ->enqueueMapImage(image.get(), true, CL_MAP_READ, origin, region, &retImageRowPitch, &retImageSlicePitch, 0, nullptr, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(image->getHostPtrRowPitch(), retImageRowPitch); EXPECT_EQ(image->getHostPtrSlicePitch(), retImageSlicePitch); } TEST_F(EnqueueMapImageTest, givenImage1DArrayWhenEnqueueMapImageIsCalledThenReturnRowAndSlicePitchAreEqual) { class MockImage : public Image { public: MockImage(Context *context, cl_mem_flags flags, GraphicsAllocation *allocation, const ClSurfaceFormatInfo &surfaceFormat, const cl_image_format &imageFormat, const cl_image_desc &imageDesc) : Image(context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, 0, nullptr, nullptr, imageFormat, imageDesc, true, GraphicsAllocationHelper::toMultiGraphicsAllocation(allocation), false, 0, 0, surfaceFormat, nullptr) { } void setImageArg(void *memory, bool isMediaBlockImage, uint32_t mipLevel, uint32_t rootDeviceIndex, bool useGlobalAtomics) override {} void setMediaImageArg(void *memory, uint32_t rootDeviceIndex) override {} void setMediaSurfaceRotation(void *memory) override {} void setSurfaceMemoryObjectControlState(void *memory, uint32_t value) override {} void transformImage2dArrayTo3d(void *memory) override {} void transformImage3dTo2dArray(void *memory) override {} }; const size_t origin[3] = {0, 0, 0}; const size_t region[3] = {1, 1, 1}; size_t retImageRowPitch = 0; size_t retImageSlicePitch = 0; cl_mem_flags flags = CL_MEM_READ_ONLY; cl_image_desc imageDesc = {}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY; imageDesc.image_width = 329; imageDesc.image_array_size = 48; imageDesc.image_row_pitch = 2688; imageDesc.image_slice_pitch = 10752; imageDesc.num_mip_levels = 0; size_t imgSize = imageDesc.image_slice_pitch * imageDesc.image_array_size; cl_image_format imageFormat = {}; imageFormat.image_channel_order = CL_RGBA; imageFormat.image_channel_data_type = CL_UNSIGNED_INT16; const ClSurfaceFormatInfo *surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto allocation = context->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{context->getDevice(0)->getRootDeviceIndex(), imgSize}); ASSERT_NE(allocation, nullptr); MockImage image(context, flags, allocation, *surfaceFormat, imageFormat, imageDesc); EXPECT_TRUE(image.mappingOnCpuAllowed()); EXPECT_TRUE(image.isMemObjZeroCopy()); pCmdQ->enqueueMapImage(&image, true, CL_MAP_READ, origin, region, &retImageRowPitch, &retImageSlicePitch, 0, nullptr, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(retImageRowPitch, retImageSlicePitch); } struct EnqueueMapImageTypeTest : public CommandEnqueueFixture, public ::testing::Test { typedef CommandQueueHwFixture CommandQueueFixture; using CommandQueueHwFixture::pCmdQ; EnqueueMapImageTypeTest(void) { } void SetUp() override { CommandEnqueueFixture::SetUp(); image = ImageHelper>::create(&context); } void TearDown() override { delete image; CommandEnqueueFixture::TearDown(); } protected: template void enqueueMapImage(cl_bool blocking = CL_TRUE) { typedef ImageUseHostPtr Traits; size_t imageRowPitch; size_t imageSlicePitch; size_t origin[3] = {0, 0, 0}; size_t region[3] = {Traits::imageDesc.image_width, Traits::imageDesc.image_height, Traits::imageDesc.image_depth}; cl_int retVal = 0; auto mappedPtr = pCmdQ->enqueueMapImage( image, blocking, Traits::flags, origin, region, &imageRowPitch, &imageSlicePitch, 0, nullptr, nullptr, retVal); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(CL_SUCCESS, retVal); parseCommands(*pCmdQ); } MockContext context; Image *image = nullptr; }; HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueMapImageTypeTest, GiveRequirementForPipeControlWorkaroundWhenMappingImageThenAdditionalPipeControlIsProgrammed) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; // Set taskCount to 1 to call finish on map operation pCmdQ->taskCount = 1; bool blocking = true; enqueueMapImage(blocking); auto itorWalker = find(cmdList.begin(), cmdList.end()); auto itorCmd = find(itorWalker, cmdList.end()); auto *cmd = (PIPE_CONTROL *)*itorCmd; EXPECT_NE(cmdList.end(), itorCmd); if (UnitTestHelper::isPipeControlWArequired(pDevice->getHardwareInfo())) { // SKL: two PIPE_CONTROLs following GPGPU_WALKER: first has DcFlush and second has Write HwTag EXPECT_FALSE(cmd->getDcFlushEnable()); // Move to next PPC auto itorCmdP = ++((GenCmdList::iterator)itorCmd); EXPECT_NE(cmdList.end(), itorCmdP); auto itorCmd2 = find(itorCmdP, cmdList.end()); cmd = (PIPE_CONTROL *)*itorCmd2; EXPECT_TRUE(cmd->getDcFlushEnable()); } else { // single PIPE_CONTROL following GPGPU_WALKER has DcFlush and Write HwTag EXPECT_TRUE(cmd->getDcFlushEnable()); } } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_marker_tests.cpp000066400000000000000000000254661422164147700314240ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/event/user_event.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" using namespace NEO; using MarkerTest = Test; HWTEST_F(MarkerTest, GivenCsrAndCmdqWithSameTaskLevelWhenEnqueingMarkerThenPipeControlIsAdded) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); // Set task levels to known values. uint32_t originalCSRLevel = 2; commandStreamReceiver.taskLevel = originalCSRLevel; pCmdQ->taskLevel = originalCSRLevel; uint32_t originalTaskCount = 15; commandStreamReceiver.taskCount = originalTaskCount; cl_uint numEventsInWaitList = 0; const cl_event *eventWaitList = nullptr; cl_event *event = nullptr; auto retVal = pCmdQ->enqueueMarkerWithWaitList( numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); // Should sync CSR & CmdQ levels. EXPECT_EQ(commandStreamReceiver.peekTaskLevel(), pCmdQ->taskLevel); parseCommands(*pCmdQ); // If CSR == CQ then a PC is required. auto itorCmd = reverse_find(cmdList.rbegin(), cmdList.rend()); EXPECT_EQ(cmdList.rend(), itorCmd); } HWTEST_F(MarkerTest, GivenCsrAndCmdqWithDifferentTaskLevelsWhenEnqueingMarkerThenPipeControlIsNotAdded) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); // Set task levels to known values. commandStreamReceiver.taskLevel = 2; pCmdQ->taskLevel = 1; cl_uint numEventsInWaitList = 0; const cl_event *eventWaitList = nullptr; cl_event *event = nullptr; auto retVal = pCmdQ->enqueueMarkerWithWaitList( numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); // Should sync CSR & CmdQ levels. EXPECT_EQ(1u, pCmdQ->taskLevel); EXPECT_EQ(2u, commandStreamReceiver.peekTaskLevel()); auto sizeUsed = pCS->getUsed(); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, pCmdBuffer, sizeUsed)); // If CSR > CQ then a PC isn't required. auto itorCmd = find(cmdList.begin(), cmdList.end()); ASSERT_EQ(cmdList.end(), itorCmd); } TEST_F(MarkerTest, WhenEnqueingMarkerThenEventIsReturned) { cl_uint numEventsInWaitList = 0; const cl_event *eventWaitList = nullptr; cl_event event = nullptr; auto retVal = pCmdQ->enqueueMarkerWithWaitList( numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, event); // Check CL_EVENT_COMMAND_TYPE { std::unique_ptr pEvent((Event *)(event)); cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent.get(), CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); } } HWTEST_F(MarkerTest, GivenGpuHangAndBlockingCallWhenEnqueingMarkerThenOutOfResourcesIsReturned) { DebugManagerStateRestore stateRestore; DebugManager.flags.MakeEachEnqueueBlocking.set(true); std::unique_ptr device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment(nullptr)}); cl_queue_properties props = {}; MockCommandQueueHw mockCommandQueueHw(context, device.get(), &props); mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang; cl_uint numEventsInWaitList = 0; const cl_event *eventWaitList = nullptr; const auto enqueueResult = mockCommandQueueHw.enqueueMarkerWithWaitList( numEventsInWaitList, eventWaitList, nullptr); EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueResult); EXPECT_EQ(1, mockCommandQueueHw.waitForAllEnginesCalledCount); } HWTEST_F(MarkerTest, WhenEnqueingMarkerThenReturnedEventShouldHaveEqualDepthToLastCommandPacketInCommandQueue) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); // Set task levels to known values. commandStreamReceiver.taskLevel = 2; pCmdQ->taskLevel = 1; cl_uint numEventsInWaitList = 0; const cl_event *eventWaitList = nullptr; cl_event event = nullptr; auto retVal = pCmdQ->enqueueMarkerWithWaitList( numEventsInWaitList, eventWaitList, &event); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); std::unique_ptr pEvent((Event *)(event)); // Shouldn't sync to CSR // should sync to command queue last packet EXPECT_EQ(1u, pEvent->taskLevel); EXPECT_EQ(pCmdQ->taskLevel, pEvent->taskLevel); } HWTEST_F(MarkerTest, GivenEventWithWaitDependenciesWhenEnqueingMarkerThenCsrLevelAndCmdqLevelShouldSync) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); uint32_t initialTaskLevel = 7; // In N:1, CSR is always highest task level. commandStreamReceiver.taskLevel = initialTaskLevel; // In N:1, pCmdQ.level <= CSR.level pCmdQ->taskLevel = initialTaskLevel; // In N:1, event.level <= pCmdQ.level Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 5, 15); Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 6, 16); Event event3(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 1, 6); cl_event eventWaitList[] = { &event1, &event2, &event3}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto retVal = pCmdQ->enqueueMarkerWithWaitList( numEventsInWaitList, eventWaitList, &event); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); std::unique_ptr pEvent((Event *)(event)); // Should sync CSR & CmdQ levels. if (pCmdQ->getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { EXPECT_EQ(initialTaskLevel, pCmdQ->taskLevel); EXPECT_EQ(initialTaskLevel + 1, commandStreamReceiver.peekTaskLevel()); } else { EXPECT_EQ(commandStreamReceiver.peekTaskLevel(), pCmdQ->taskLevel); } EXPECT_EQ(pCmdQ->taskLevel, pEvent->taskLevel); EXPECT_EQ(7u, pEvent->taskLevel); } TEST_F(MarkerTest, givenMultipleEventWhenTheyArePassedToMarkerThenOutputEventHasHighestTaskCount) { // combine events with different task counts, max is 16 Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 5, 15); Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 6, 16); Event event3(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 1, 6); cl_event eventWaitList[] = { &event1, &event2, &event3}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto initialTaskCount = pCmdQ->taskCount; pCmdQ->enqueueMarkerWithWaitList( numEventsInWaitList, eventWaitList, &event); std::unique_ptr pEvent((Event *)(event)); if (pCmdQ->getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { EXPECT_EQ(initialTaskCount + 1, pCmdQ->taskCount); EXPECT_EQ(initialTaskCount + 1, pEvent->peekTaskCount()); } else { EXPECT_EQ(16u, pCmdQ->taskCount); EXPECT_EQ(16u, pEvent->peekTaskCount()); } } TEST_F(MarkerTest, givenMultipleEventsAndCompletedUserEventWhenTheyArePassedToMarkerThenOutputEventHasHighestTaskCount) { // combine events with different task counts, max is 16 Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 5, 15); Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 6, 16); Event event3(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 1, 6); UserEvent userEvent(&pCmdQ->getContext()); userEvent.setStatus(CL_COMPLETE); cl_event eventWaitList[] = { &event1, &event2, &event3, &userEvent}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto initialTaskCount = pCmdQ->taskCount; pCmdQ->enqueueMarkerWithWaitList( numEventsInWaitList, eventWaitList, &event); std::unique_ptr pEvent((Event *)(event)); if (pCmdQ->getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { EXPECT_EQ(initialTaskCount + 1, pCmdQ->taskCount); EXPECT_EQ(initialTaskCount + 1, pEvent->peekTaskCount()); } else { EXPECT_EQ(16u, pCmdQ->taskCount); EXPECT_EQ(16u, pEvent->peekTaskCount()); } } HWTEST_F(MarkerTest, givenMarkerCallFollowingNdrangeCallInBatchedModeWhenWaitForEventsIsCalledThenFlushStampIsProperlyUpdated) { MockKernelWithInternals mockKernel(*this->pClDevice, this->context); auto &ultCommandStreamReceiver = this->pDevice->getUltCommandStreamReceiver(); ultCommandStreamReceiver.overrideDispatchPolicy(DispatchMode::BatchedDispatch); cl_event eventFromNdr = nullptr; size_t gws[] = {1}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &eventFromNdr); cl_event eventFromMarker = nullptr; pCmdQ->enqueueMarkerWithWaitList(1u, &eventFromNdr, &eventFromMarker); ultCommandStreamReceiver.flushStamp->setStamp(1u); clEnqueueWaitForEvents(pCmdQ, 1u, &eventFromMarker); auto neoEvent = castToObject(eventFromMarker); EXPECT_EQ(1u, neoEvent->flushStamp->peekStamp()); clReleaseEvent(eventFromMarker); clReleaseEvent(eventFromNdr); } struct MarkerWithProfilingTest : public MarkerTest { void SetUp() override { dbgRestore = std::make_unique(); DebugManager.flags.EnableTimestampPacket.set(0); MarkerTest::SetUp(); } void TearDown() override { MarkerTest::TearDown(); dbgRestore.reset(nullptr); } std::unique_ptr dbgRestore; }; struct WhiteBoxCommandQueue : public CommandQueue { using CommandQueue::isBlockedCommandStreamRequired; }; HWTEST_F(MarkerWithProfilingTest, givenMarkerWithProfilingAndBlockedEnqueueThenBlockedCommandStreamIsRequired) { auto cmdQueueWB = static_cast(pCmdQ); EventsRequest eventsRequest(0, nullptr, nullptr); bool ret = cmdQueueWB->isBlockedCommandStreamRequired(CL_COMMAND_MARKER, eventsRequest, true, true); EXPECT_TRUE(ret); } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_media_kernel.cpp000066400000000000000000000017561422164147700313340ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/media_kernel_fixture.h" using namespace NEO; typedef MediaKernelFixture MediaKernelTest; TEST_F(MediaKernelTest, GivenKernelWhenCheckingIsVmeKernelThenOnlyVmeKernelReportsTrue) { ASSERT_NE(true, pKernel->isVmeKernel()); ASSERT_EQ(true, pVmeKernel->isVmeKernel()); } HWTEST_F(MediaKernelTest, GivenVmeKernelWhenEnqueuingKernelThenSinglePipelineSelectIsProgrammed) { enqueueVmeKernel(); auto numCommands = getCommandsList().size(); EXPECT_EQ(1u, numCommands); } HWTEST_F(MediaKernelTest, GivenNonVmeKernelWhenEnqueuingKernelThenSinglePipelineSelectIsProgrammed) { enqueueRegularKernel(); auto numCommands = getCommandsList().size(); EXPECT_EQ(1u, numCommands); } enqueue_migrate_mem_objects_tests.cpp000066400000000000000000000066551422164147700340620ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/event/user_event.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/command_stream/command_stream_fixture.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" using namespace NEO; class MigrateMemObjectsFixture : public ClDeviceFixture, public CommandQueueHwFixture { public: void SetUp() override { ClDeviceFixture::SetUp(); CommandQueueHwFixture::SetUp(pClDevice, 0); ASSERT_NE(nullptr, pCmdQ); } void TearDown() override { CommandQueueHwFixture::TearDown(); ClDeviceFixture::TearDown(); } }; typedef Test MigrateMemObjectsTest; TEST_F(MigrateMemObjectsTest, GivenNullEventWhenMigratingEventsThenSuccessIsReturned) { MockBuffer buffer; auto bufferMemObj = static_cast(&buffer); auto pBufferMemObj = &bufferMemObj; auto retVal = pCmdQ->enqueueMigrateMemObjects( 1, pBufferMemObj, CL_MIGRATE_MEM_OBJECT_HOST, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(MigrateMemObjectsTest, GivenValidEventListWhenMigratingEventsThenSuccessIsReturned) { MockBuffer buffer; auto bufferMemObj = static_cast(&buffer); auto pBufferMemObj = &bufferMemObj; UserEvent uEvent; cl_event eventWaitList[] = {&uEvent}; auto retVal = pCmdQ->enqueueMigrateMemObjects( 1, pBufferMemObj, CL_MIGRATE_MEM_OBJECT_HOST, 1, eventWaitList, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(MigrateMemObjectsTest, GivenGpuHangAndBlockingCallsAndValidEventListWhenMigratingEventsThenOutOfResourcesIsReturned) { DebugManagerStateRestore stateRestore; DebugManager.flags.MakeEachEnqueueBlocking.set(true); std::unique_ptr device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment(nullptr)}); cl_queue_properties props = {}; MockCommandQueueHw mockCommandQueueHw(context, device.get(), &props); mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang; MockBuffer buffer; auto bufferMemObj = static_cast(&buffer); UserEvent uEvent; cl_event eventWaitList[] = {&uEvent}; const auto enqueueResult = mockCommandQueueHw.enqueueMigrateMemObjects( 1, &bufferMemObj, CL_MIGRATE_MEM_OBJECT_HOST, 1, eventWaitList, nullptr); EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueResult); EXPECT_EQ(1, mockCommandQueueHw.waitForAllEnginesCalledCount); } TEST_F(MigrateMemObjectsTest, GivenEventPointerWhenMigratingEventsThenEventIsReturned) { MockBuffer buffer; auto bufferMemObj = static_cast(&buffer); auto pBufferMemObj = &bufferMemObj; cl_event event = nullptr; auto retVal = pCmdQ->enqueueMigrateMemObjects( 1, pBufferMemObj, CL_MIGRATE_MEM_OBJECT_HOST, 0, nullptr, &event); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, event); Event *eventObject = (Event *)event; delete eventObject; } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_read_buffer_event_tests.cpp000066400000000000000000000315771422164147700336100ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/local_memory_access_modes.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/event/event.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "gtest/gtest.h" #include using namespace NEO; typedef HelloWorldTest EnqueueReadBuffer; TEST_F(EnqueueReadBuffer, GivenPointerToEventListWhenReadingBufferThenEventIsReturned) { cl_bool blockingRead = CL_TRUE; size_t offset = 0; size_t size = sizeof(cl_float); cl_float pDestMemory[] = {0.0f, 0.0f, 0.0f, 0.0f}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event event = nullptr; auto srcBuffer = std::unique_ptr(BufferHelper<>::create()); auto retVal = pCmdQ->enqueueReadBuffer( srcBuffer.get(), blockingRead, offset, size, pDestMemory, nullptr, numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(pCmdQ->taskLevel, pEvent->taskLevel); // Check CL_EVENT_COMMAND_TYPE { cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_READ_BUFFER), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); } delete pEvent; } TEST_F(EnqueueReadBuffer, WhenReadingBufferThenEventReturnedShouldBeMaxOfInputEventsAndCmdQPlus1) { uint32_t taskLevelCmdQ = 17; pCmdQ->taskLevel = taskLevelCmdQ; uint32_t taskLevelEvent1 = 8; uint32_t taskLevelEvent2 = 19; Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4); Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10); cl_bool blockingRead = CL_TRUE; size_t offset = 0; size_t size = sizeof(cl_float); cl_float pDestMemory[] = {0.0f, 0.0f, 0.0f, 0.0f}; cl_event eventWaitList[] = { &event1, &event2}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto srcBuffer = std::unique_ptr(BufferHelper<>::create()); auto retVal = pCmdQ->enqueueReadBuffer( srcBuffer.get(), blockingRead, offset, size, pDestMemory, nullptr, numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(19u + 1u, pEvent->taskLevel); delete pEvent; } TEST_F(EnqueueReadBuffer, givenInOrderQueueAndForcedCpuCopyOnReadBufferAndDstPtrEqualSrcPtrWithEventsNotBlockedWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnReadBuffer.set(1); DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast(LocalMemoryAccessMode::Default)); cl_int retVal = CL_SUCCESS; uint32_t taskLevelCmdQ = 17; pCmdQ->taskLevel = taskLevelCmdQ; uint32_t taskLevelEvent1 = 8; uint32_t taskLevelEvent2 = 19; Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4); Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10); cl_bool blockingRead = CL_TRUE; size_t size = sizeof(cl_float); cl_event eventWaitList[] = { &event1, &event2}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto srcBuffer = std::unique_ptr(BufferHelper<>::create()); void *ptr = srcBuffer->getCpuAddressForMemoryTransfer(); retVal = pCmdQ->enqueueReadBuffer(srcBuffer.get(), blockingRead, 0, size, ptr, nullptr, numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(19u, pEvent->taskLevel); EXPECT_EQ(17u, pCmdQ->taskLevel); pEvent->release(); } TEST_F(EnqueueReadBuffer, givenInOrderQueueAndForcedCpuCopyOnReadBufferAndDstPtrEqualSrcPtrWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnReadBuffer.set(1); DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast(LocalMemoryAccessMode::Default)); cl_int retVal = CL_SUCCESS; uint32_t taskLevelCmdQ = 17; pCmdQ->taskLevel = taskLevelCmdQ; cl_bool blockingRead = CL_TRUE; size_t size = sizeof(cl_float); cl_event event = nullptr; auto srcBuffer = std::unique_ptr(BufferHelper<>::create()); void *ptr = srcBuffer->getCpuAddressForMemoryTransfer(); retVal = pCmdQ->enqueueReadBuffer(srcBuffer.get(), blockingRead, 0, size, ptr, nullptr, 0, nullptr, &event); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(17u, pEvent->taskLevel); EXPECT_EQ(17u, pCmdQ->taskLevel); pEvent->release(); } TEST_F(EnqueueReadBuffer, givenOutOfOrderQueueAndForcedCpuCopyOnReadBufferAndDstPtrEqualSrcPtrWithEventsNotBlockedWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnReadBuffer.set(1); DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast(LocalMemoryAccessMode::Default)); std::unique_ptr pCmdOOQ(createCommandQueue(pClDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)); cl_int retVal = CL_SUCCESS; uint32_t taskLevelCmdQ = 17; pCmdOOQ->taskLevel = taskLevelCmdQ; uint32_t taskLevelEvent1 = 8; uint32_t taskLevelEvent2 = 19; Event event1(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4); Event event2(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10); cl_bool blockingRead = CL_TRUE; size_t size = sizeof(cl_float); cl_event eventWaitList[] = { &event1, &event2}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto srcBuffer = std::unique_ptr(BufferHelper<>::create()); void *ptr = srcBuffer->getCpuAddressForMemoryTransfer(); retVal = pCmdOOQ->enqueueReadBuffer(srcBuffer.get(), blockingRead, 0, size, ptr, nullptr, numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(19u, pEvent->taskLevel); EXPECT_EQ(17u, pCmdOOQ->taskLevel); pEvent->release(); } TEST_F(EnqueueReadBuffer, givenInOrderQueueAndForcedCpuCopyOnReadBufferAndEventNotReadyWhenReadBufferIsExecutedThenTaskLevelShouldBeIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnReadBuffer.set(1); cl_int retVal = CL_SUCCESS; uint32_t taskLevelCmdQ = 17; pCmdQ->taskLevel = taskLevelCmdQ; Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, CompletionStamp::notReady, 4); cl_bool blockingRead = CL_FALSE; size_t size = sizeof(cl_float); cl_event eventWaitList[] = {&event1}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto dstBuffer = std::unique_ptr(BufferHelper<>::create()); cl_float mem[4]; retVal = pCmdQ->enqueueReadBuffer(dstBuffer.get(), blockingRead, 0, size, mem, nullptr, numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(CompletionStamp::notReady, pEvent->taskLevel); EXPECT_EQ(CompletionStamp::notReady, pCmdQ->taskLevel); event1.taskLevel = 20; event1.setStatus(CL_COMPLETE); pEvent->updateExecutionStatus(); pCmdQ->isQueueBlocked(); pEvent->release(); } TEST_F(EnqueueReadBuffer, givenInOrderQueueAndDisabledSupportCpuCopiesAndDstPtrEqualSrcPtrWithEventsWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnReadBuffer.set(0); cl_int retVal = CL_SUCCESS; uint32_t taskLevelCmdQ = 17; pCmdQ->taskLevel = taskLevelCmdQ; uint32_t taskLevelEvent1 = 8; uint32_t taskLevelEvent2 = 19; Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4); Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10); cl_bool blockingRead = CL_TRUE; size_t size = sizeof(cl_float); cl_event eventWaitList[] = { &event1, &event2}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto srcBuffer = std::unique_ptr(BufferHelper<>::create()); void *ptr = srcBuffer->getCpuAddressForMemoryTransfer(); retVal = pCmdQ->enqueueReadBuffer(srcBuffer.get(), blockingRead, 0, size, ptr, nullptr, numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(19u, pEvent->taskLevel); EXPECT_EQ(19u, pCmdQ->taskLevel); pEvent->release(); } TEST_F(EnqueueReadBuffer, givenOutOfOrderQueueAndDisabledSupportCpuCopiesAndDstPtrEqualSrcPtrWithEventsWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnReadBuffer.set(0); std::unique_ptr pCmdOOQ(createCommandQueue(pClDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)); cl_int retVal = CL_SUCCESS; uint32_t taskLevelCmdQ = 17; pCmdOOQ->taskLevel = taskLevelCmdQ; uint32_t taskLevelEvent1 = 8; uint32_t taskLevelEvent2 = 19; Event event1(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4); Event event2(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10); cl_bool blockingRead = CL_TRUE; size_t size = sizeof(cl_float); cl_event eventWaitList[] = { &event1, &event2}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto srcBuffer = std::unique_ptr(BufferHelper<>::create()); void *ptr = srcBuffer->getCpuAddressForMemoryTransfer(); retVal = pCmdOOQ->enqueueReadBuffer(srcBuffer.get(), blockingRead, 0, size, ptr, nullptr, numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = castToObject(event); if (pCmdOOQ->getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { EXPECT_EQ(taskLevelEvent2 + 1, pCmdOOQ->taskLevel); EXPECT_EQ(taskLevelEvent2 + 1, pEvent->taskLevel); } else { EXPECT_EQ(19u, pCmdOOQ->taskLevel); EXPECT_EQ(19u, pEvent->taskLevel); } pEvent->release(); } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_read_buffer_fixture.h000066400000000000000000000027241422164147700323700ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" namespace NEO { struct EnqueueReadBufferTypeTest : public CommandEnqueueFixture, public ::testing::Test { EnqueueReadBufferTypeTest(void) : srcBuffer(nullptr) { } void SetUp() override { CommandEnqueueFixture::SetUp(); BufferDefaults::context = new MockContext; srcBuffer.reset(BufferHelper<>::create()); nonZeroCopyBuffer.reset(BufferHelper>::create()); } void TearDown() override { srcBuffer.reset(nullptr); nonZeroCopyBuffer.reset(nullptr); delete BufferDefaults::context; CommandEnqueueFixture::TearDown(); } protected: template void enqueueReadBuffer(cl_bool blocking = CL_TRUE) { auto retVal = EnqueueReadBufferHelper<>::enqueueReadBuffer( pCmdQ, srcBuffer.get(), blocking); EXPECT_EQ(CL_SUCCESS, retVal); parseCommands(*pCmdQ); } std::unique_ptr srcBuffer; std::unique_ptr nonZeroCopyBuffer; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_read_buffer_rect_fixture.h000066400000000000000000000047311422164147700334050ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/ptr_math.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" namespace NEO { struct EnqueueReadBufferRectTest : public CommandEnqueueFixture, public ::testing::Test { void SetUp() override { CommandEnqueueFixture::SetUp(); context.reset(new MockContext(pCmdQ->getDevice().getSpecializedDevice())); BufferDefaults::context = context.get(); //For 3D hostPtr = ::alignedMalloc(slicePitch * rowPitch, 4096); auto retVal = CL_INVALID_VALUE; buffer.reset(Buffer::create( context.get(), CL_MEM_READ_WRITE, slicePitch * rowPitch, nullptr, retVal)); ASSERT_NE(nullptr, buffer.get()); nonZeroCopyBuffer.reset(BufferHelper>::create()); } void TearDown() override { nonZeroCopyBuffer.reset(); buffer.reset(); ::alignedFree(hostPtr); context.reset(); CommandEnqueueFixture::TearDown(); } protected: template void enqueueReadBufferRect2D(cl_bool blocking = CL_FALSE) { typedef typename FamilyType::WALKER_TYPE GPGPU_WALKER; typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT; size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {50, 50, 1}; auto retVal = pCmdQ->enqueueReadBufferRect( buffer.get(), blocking, //non-blocking bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, hostPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); parseCommands(*pCmdQ); } std::unique_ptr context; std::unique_ptr buffer; std::unique_ptr nonZeroCopyBuffer; void *hostPtr = nullptr; static const size_t rowPitch = 100; static const size_t slicePitch = 100 * 100; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_read_buffer_rect_tests.cpp000066400000000000000000000725461422164147700334250ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/source/helpers/compiler_hw_info_config.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/event/event.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/test/unit_test/command_queue/enqueue_read_buffer_rect_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_enqueue_fixture.h" #include "opencl/test/unit_test/gen_common/gen_commands_common_validation.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "reg_configs_common.h" using namespace NEO; HWTEST_F(EnqueueReadBufferRectTest, GivenNullBufferWhenReadingBufferThenInvalidMemObjectErrorIsReturned) { auto retVal = CL_SUCCESS; size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {1, 1, 0}; retVal = clEnqueueReadBufferRect( pCmdQ, nullptr, CL_FALSE, bufferOrigin, hostOrigin, region, 10, 0, 10, 0, hostPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } HWTEST_F(EnqueueReadBufferRectTest, GivenNullHostPtrWhenReadingBufferThenInvalidValueErrorIsReturned) { auto retVal = CL_SUCCESS; size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {1, 1, 0}; retVal = clEnqueueReadBufferRect( pCmdQ, buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, 10, 0, 10, 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } HWTEST_F(EnqueueReadBufferRectTest, GivenValidParamsWhenReadingBufferThenSuccessIsReturned) { auto retVal = CL_SUCCESS; size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {1, 1, 1}; retVal = clEnqueueReadBufferRect( pCmdQ, buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, 10, 0, 10, 0, hostPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(EnqueueReadBufferRectTest, GivenGpuHangAndBlockingCallAndValidParamsWhenReadingBufferThenOutOfResourcesIsReturned) { std::unique_ptr device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment(nullptr)}); cl_queue_properties props = {}; MockCommandQueueHw mockCommandQueueHw(context.get(), device.get(), &props); mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang; size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {1, 1, 1}; const auto enqueueResult = clEnqueueReadBufferRect( &mockCommandQueueHw, buffer.get(), CL_TRUE, bufferOrigin, hostOrigin, region, 10, 0, 10, 0, hostPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueResult); EXPECT_EQ(1, mockCommandQueueHw.waitForAllEnginesCalledCount); } HWTEST_F(EnqueueReadBufferRectTest, GivenBlockingEnqueueWhenReadingBufferThenTaskLevelIsNotIncremented) { //this test case assumes IOQ auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = pCmdQ->taskCount + 100; csr.taskLevel = pCmdQ->taskLevel + 50; auto oldCsrTaskLevel = csr.peekTaskLevel(); enqueueReadBufferRect2D(CL_TRUE); EXPECT_EQ(csr.peekTaskCount(), pCmdQ->taskCount); EXPECT_EQ(oldCsrTaskLevel, pCmdQ->taskLevel); } HWTEST_F(EnqueueReadBufferRectTest, GivenNonBlockingEnqueueWhenReadingBufferThenTaskLevelIsIncremented) { //this test case assumes IOQ auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = pCmdQ->taskCount + 100; csr.taskLevel = pCmdQ->taskLevel + 50; enqueueReadBufferRect2D(CL_FALSE); EXPECT_EQ(csr.peekTaskCount(), pCmdQ->taskCount); EXPECT_EQ(csr.peekTaskLevel(), pCmdQ->taskLevel + 1); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferRectTest, Given2dRegionWhenReadingBufferThenCommandsAreProgrammedCorrectly) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; enqueueReadBufferRect2D(); ASSERT_NE(cmdList.end(), itorWalker); auto *cmd = (GPGPU_WALKER *)*itorWalker; // Verify GPGPU_WALKER parameters EXPECT_NE(0u, cmd->getThreadGroupIdXDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdYDimension()); EXPECT_EQ(1u, cmd->getThreadGroupIdZDimension()); EXPECT_NE(0u, cmd->getRightExecutionMask()); EXPECT_NE(0u, cmd->getBottomExecutionMask()); EXPECT_EQ(GPGPU_WALKER::SIMD_SIZE_SIMD32, cmd->getSimdSize()); EXPECT_NE(0u, cmd->getIndirectDataLength()); EXPECT_FALSE(cmd->getIndirectParameterEnable()); // Compute the SIMD lane mask size_t simd = cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD32 ? 32 : cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD16 ? 16 : 8; uint64_t simdMask = maxNBitValue(simd); // Mask off lanes based on the execution masks auto laneMaskRight = cmd->getRightExecutionMask() & simdMask; auto lanesPerThreadX = 0; while (laneMaskRight) { lanesPerThreadX += laneMaskRight & 1; laneMaskRight >>= 1; } } HWTEST_F(EnqueueReadBufferRectTest, WhenReadingBufferThenTaskLevelIsIncremented) { auto taskLevelBefore = pCmdQ->taskLevel; enqueueReadBufferRect2D(); EXPECT_GT(pCmdQ->taskLevel, taskLevelBefore); } HWTEST_F(EnqueueReadBufferRectTest, WhenReadingBufferThenCommandsAreAdded) { auto usedCmdBufferBefore = pCS->getUsed(); enqueueReadBufferRect2D(); EXPECT_NE(usedCmdBufferBefore, pCS->getUsed()); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferRectTest, WhenReadingBufferThenIndirectDataIsAdded) { auto dshBefore = pDSH->getUsed(); auto iohBefore = pIOH->getUsed(); auto sshBefore = pSSH->getUsed(); enqueueReadBufferRect2D(); // Extract the kernel used auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferRect, pCmdQ->getClDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcMemObj = buffer.get(); dc.dstPtr = hostPtr; dc.srcOffset = {0, 0, 0}; dc.dstOffset = {0, 0, 0}; dc.size = {50, 50, 1}; dc.srcRowPitch = rowPitch; dc.srcSlicePitch = slicePitch; dc.dstRowPitch = rowPitch; dc.dstSlicePitch = slicePitch; MultiDispatchInfo multiDispatchInfo(dc); builder.buildDispatchInfos(multiDispatchInfo); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); EXPECT_NE(dshBefore, pDSH->getUsed()); EXPECT_NE(iohBefore, pIOH->getUsed()); if (kernel->usesBindfulAddressingForBuffers()) { EXPECT_NE(sshBefore, pSSH->getUsed()); } } HWTEST_F(EnqueueReadBufferRectTest, WhenReadingBufferThenL3ProgrammingIsCorrect) { enqueueReadBufferRect2D(); validateL3Programming(cmdList, itorWalker); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferRectTest, When2DEnqueueIsDoneThenStateBaseAddressIsProperlyProgrammed) { enqueueReadBufferRect2D(); auto &ultCsr = this->pDevice->getUltCommandStreamReceiver(); auto &hwHelper = HwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily); validateStateBaseAddress(ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex, pIOH->getGraphicsAllocation()->isAllocatedInLocalMemoryPool()), ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex, !hwHelper.useSystemMemoryPlacementForISA(pDevice->getHardwareInfo())), pDSH, pIOH, pSSH, itorPipelineSelect, itorWalker, cmdList, 0llu); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferRectTest, WhenReadingBufferThenMediaInterfaceDescriptorIsCorrect) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueReadBufferRect2D(); // All state should be programmed before walker auto itorCmd = find(itorPipelineSelect, itorWalker); ASSERT_NE(itorWalker, itorCmd); auto *cmd = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)*itorCmd; // Verify we have a valid length -- multiple of INTERFACE_DESCRIPTOR_DATAs EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % sizeof(INTERFACE_DESCRIPTOR_DATA)); // Validate the start address size_t alignmentStartAddress = 64 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorDataStartAddress() % alignmentStartAddress); // Validate the length EXPECT_NE(0u, cmd->getInterfaceDescriptorTotalLength()); size_t alignmentTotalLength = 32 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % alignmentTotalLength); // Generically validate this command FamilyType::PARSE::template validateCommand(cmdList.begin(), itorCmd); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferRectTest, WhenReadingBufferThenInterfaceDescriptorDataIsCorrect) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueReadBufferRect2D(); // Extract the MIDL command auto itorCmd = find(itorPipelineSelect, itorWalker); ASSERT_NE(itorWalker, itorCmd); auto *cmdMIDL = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)*itorCmd; // Extract the SBA command itorCmd = find(cmdList.begin(), itorWalker); ASSERT_NE(itorWalker, itorCmd); auto *cmdSBA = (STATE_BASE_ADDRESS *)*itorCmd; // Extrach the DSH auto DSH = cmdSBA->getDynamicStateBaseAddress(); ASSERT_NE(0u, DSH); // IDD should be located within DSH auto iddStart = cmdMIDL->getInterfaceDescriptorDataStartAddress(); auto IDDEnd = iddStart + cmdMIDL->getInterfaceDescriptorTotalLength(); ASSERT_LE(IDDEnd, cmdSBA->getDynamicStateBufferSize() * MemoryConstants::pageSize); auto &IDD = *(INTERFACE_DESCRIPTOR_DATA *)cmdInterfaceDescriptorData; // Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value. auto kernelStartPointer = ((uint64_t)IDD.getKernelStartPointerHigh() << 32) + IDD.getKernelStartPointer(); EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize); EXPECT_NE(0u, IDD.getNumberOfThreadsInGpgpuThreadGroup()); EXPECT_NE(0u, IDD.getCrossThreadConstantDataReadLength()); EXPECT_NE(0u, IDD.getConstantIndirectUrbEntryReadLength()); } HWTEST_F(EnqueueReadBufferRectTest, WhenReadingBufferThenOnePipelineSelectIsProgrammed) { enqueueReadBufferRect2D(); int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); EXPECT_EQ(1, numCommands); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferRectTest, WhenReadingBufferThenMediaVfeStateIsCorrect) { enqueueReadBufferRect2D(); validateMediaVFEState(&pDevice->getHardwareInfo(), cmdMediaVfeState, cmdList, itorMediaVfeState); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferRectTest, GivenBlockingEnqueueWhenReadingBufferThenPipeControlIsProgrammedAfterWalkerWithDcFlushSet) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; auto blocking = CL_TRUE; enqueueReadBufferRect2D(blocking); auto itorWalker = find(cmdList.begin(), cmdList.end()); // All state should be programmed after walker auto itorCmd = find(itorWalker, cmdList.end()); auto *cmd = (PIPE_CONTROL *)*itorCmd; EXPECT_NE(cmdList.end(), itorCmd); if (UnitTestHelper::isPipeControlWArequired(pDevice->getHardwareInfo())) { // SKL: two PIPE_CONTROLs following GPGPU_WALKER: first has DcFlush and second has Write HwTag EXPECT_FALSE(cmd->getDcFlushEnable()); // Move to next PPC auto itorCmdP = ++((GenCmdList::iterator)itorCmd); EXPECT_NE(cmdList.end(), itorCmdP); auto itorCmd2 = find(itorCmdP, cmdList.end()); cmd = (PIPE_CONTROL *)*itorCmd2; EXPECT_TRUE(cmd->getDcFlushEnable()); } else { // single PIPE_CONTROL following GPGPU_WALKER has DcFlush and Write HwTag EXPECT_TRUE(cmd->getDcFlushEnable()); } } HWTEST_F(EnqueueReadBufferRectTest, givenInOrderQueueAndDstPtrEqualSrcPtrWithEventsWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { cl_int retVal = CL_SUCCESS; uint32_t taskLevelCmdQ = 17; pCmdQ->taskLevel = taskLevelCmdQ; uint32_t taskLevelEvent1 = 8; uint32_t taskLevelEvent2 = 19; Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4); Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10); cl_event eventWaitList[] = { &event1, &event2}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {50, 50, 1}; void *ptr = buffer->getCpuAddressForMemoryTransfer(); retVal = pCmdQ->enqueueReadBufferRect( buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, ptr, numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(19u, pEvent->taskLevel); EXPECT_EQ(19u, pCmdQ->taskLevel); EXPECT_EQ(CL_COMMAND_READ_BUFFER_RECT, (const int)pEvent->getCommandType()); pEvent->release(); } HWTEST_F(EnqueueReadBufferRectTest, givenOutOfOrderQueueAndDstPtrEqualSrcPtrWithEventsWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { cl_int retVal = CL_SUCCESS; std::unique_ptr pCmdOOQ(createCommandQueue(pClDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)); UltCommandStreamReceiver &mockCsr = reinterpret_cast &>(pCmdOOQ->getGpgpuCommandStreamReceiver()); mockCsr.useNewResourceImplicitFlush = false; mockCsr.useGpuIdleImplicitFlush = false; uint32_t taskLevelCmdQ = 17; pCmdOOQ->taskLevel = taskLevelCmdQ; uint32_t taskLevelEvent1 = 8; uint32_t taskLevelEvent2 = 19; Event event1(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4); Event event2(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10); cl_event eventWaitList[] = { &event1, &event2}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {50, 50, 1}; void *ptr = buffer->getCpuAddressForMemoryTransfer(); retVal = pCmdOOQ->enqueueReadBufferRect( buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, ptr, numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(19u, pEvent->taskLevel); EXPECT_EQ(19u, pCmdOOQ->taskLevel); EXPECT_EQ(CL_COMMAND_READ_BUFFER_RECT, (const int)pEvent->getCommandType()); pEvent->release(); } HWTEST_F(EnqueueReadBufferRectTest, givenInOrderQueueAndRowPitchEqualZeroAndDstPtrEqualSrcPtrWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { cl_int retVal = CL_SUCCESS; void *ptr = buffer->getCpuAddressForMemoryTransfer(); size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {50, 50, 1}; retVal = pCmdQ->enqueueReadBufferRect( buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, 0, slicePitch, 0, slicePitch, ptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 0u); } HWTEST_F(EnqueueReadBufferRectTest, givenInOrderQueueAndSlicePitchEqualZeroAndDstPtrEqualSrcPtrWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { cl_int retVal = CL_SUCCESS; void *ptr = buffer->getCpuAddressForMemoryTransfer(); size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {50, 50, 1}; retVal = pCmdQ->enqueueReadBufferRect( buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, rowPitch, 0, rowPitch, 0, ptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 0u); } HWTEST_F(EnqueueReadBufferRectTest, givenInOrderQueueAndMemObjWithOffsetPointTheSameStorageWithHostWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { cl_int retVal = CL_SUCCESS; void *ptr = buffer->getCpuAddressForMemoryTransfer(); size_t bufferOrigin[] = {50, 50, 0}; size_t hostOrigin[] = {20, 20, 0}; size_t region[] = {50, 50, 1}; size_t hostOffset = (bufferOrigin[2] - hostOrigin[2]) * slicePitch + (bufferOrigin[1] - hostOrigin[1]) * rowPitch + (bufferOrigin[0] - hostOrigin[0]); auto hostStorage = ptrOffset(ptr, hostOffset); retVal = pCmdQ->enqueueReadBufferRect( buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, hostStorage, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 0u); } HWTEST_F(EnqueueReadBufferRectTest, givenInOrderQueueAndMemObjWithOffsetPointDiffrentStorageWithHostWhenReadBufferIsExecutedThenTaskLevelShouldBeIncreased) { cl_int retVal = CL_SUCCESS; void *ptr = buffer->getCpuAddressForMemoryTransfer(); size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {1, 1, 0}; size_t region[] = {1, 1, 1}; retVal = pCmdQ->enqueueReadBufferRect( buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, ptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 1u); } HWTEST_F(EnqueueReadBufferRectTest, givenInOrderQueueAndDstPtrEqualSrcPtrAndNonZeroCopyBufferWhenReadBufferIsExecutedThenTaskLevelShouldBeIncreased) { cl_int retVal = CL_SUCCESS; void *ptr = nonZeroCopyBuffer->getCpuAddressForMemoryTransfer(); size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {1, 1, 1}; retVal = pCmdQ->enqueueReadBufferRect( nonZeroCopyBuffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, ptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 1u); } HWTEST_F(EnqueueReadWriteBufferRectDispatch, givenOffsetResultingInMisalignedPtrWhenEnqueueReadBufferRectForNon3DCaseIsCalledThenAddressInStateBaseAddressIsAlignedAndMatchesKernelDispatchInfoParams) { hwInfo->capabilityTable.blitterOperationsSupported = false; initializeFixture(); const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(defaultHwInfo->platform.eProductFamily); if (compilerHwInfoConfig.isForceToStatelessRequired()) { GTEST_SKIP(); } auto cmdQ = std::make_unique>(context.get(), device.get(), &properties); buffer->forceDisallowCPUCopy = true; Vec3 hostOffset(hostOrigin); auto misalignedDstPtr = ptrOffset(reinterpret_cast(memory), hostOffset.z * hostSlicePitch); auto retVal = cmdQ->enqueueReadBufferRect(buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, memory, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(0u, cmdQ->lastEnqueuedKernels.size()); Kernel *kernel = cmdQ->lastEnqueuedKernels[0]; cmdQ->finish(); parseCommands(*cmdQ); auto &kernelInfo = kernel->getKernelInfo(); if (hwInfo->capabilityTable.gpuAddressSpace == MemoryConstants::max48BitAddress) { const auto &surfaceStateDst = getSurfaceState(&cmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), 1); if (kernelInfo.getArgDescriptorAt(1).as().pointerSize == sizeof(uint64_t)) { auto pKernelArg = (uint64_t *)(kernel->getCrossThreadData() + kernelInfo.getArgDescriptorAt(1).as().stateless); EXPECT_EQ(reinterpret_cast(alignDown(misalignedDstPtr, 4)), *pKernelArg); EXPECT_EQ(*pKernelArg, surfaceStateDst.getSurfaceBaseAddress()); } else if (kernelInfo.getArgDescriptorAt(1).as().pointerSize == sizeof(uint32_t)) { auto pKernelArg = (uint32_t *)(kernel->getCrossThreadData() + kernelInfo.getArgDescriptorAt(1).as().stateless); EXPECT_EQ(reinterpret_cast(alignDown(misalignedDstPtr, 4)), static_cast(*pKernelArg)); EXPECT_EQ(static_cast(*pKernelArg), surfaceStateDst.getSurfaceBaseAddress()); } } if (kernelInfo.getArgDescriptorAt(3).as().elements[0].size == 4 * sizeof(uint32_t)) { // size of uint4 DstOrigin auto dstOffset = (uint32_t *)(kernel->getCrossThreadData() + kernelInfo.getArgDescriptorAt(3).as().elements[0].offset); EXPECT_EQ(hostOffset.x + ptrDiff(misalignedDstPtr, alignDown(misalignedDstPtr, 4)), *dstOffset); } else { // DstOrigin arg should be 16 bytes in size, if that changes, above if path should be modified EXPECT_TRUE(false); } } using NegativeFailAllocationTest = Test; HWTEST_F(NegativeFailAllocationTest, givenEnqueueReadBufferRectWhenHostPtrAllocationCreationFailsThenReturnOutOfResource) { cl_int retVal = CL_SUCCESS; constexpr size_t rowPitch = 100; constexpr size_t slicePitch = 100 * 100; size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {50, 50, 1}; retVal = pCmdQ->enqueueReadBufferRect( buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, ptr, 0, nullptr, nullptr); EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal); } struct EnqueueReadBufferRectHw : public ::testing::Test { void SetUp() override { if (is32bit) { GTEST_SKIP(); } device = std::make_unique(MockClDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); context.reset(new MockContext(device.get())); } std::unique_ptr device; std::unique_ptr context; MockBuffer srcBuffer; size_t bufferOrigin[3] = {0, 0, 0}; size_t hostOrigin[3] = {0, 0, 0}; size_t region[3] = {1, 1, 1}; size_t bufferRowPitch = 10; size_t bufferSlicePitch = 0; size_t hostRowPitch = 10; size_t hostSlicePitch = 10; uint64_t bigSize = 4ull * MemoryConstants::gigaByte; uint64_t smallSize = 4ull * MemoryConstants::gigaByte - 1; }; using EnqueueReadBufferRectStatelessTest = EnqueueReadBufferRectHw; HWTEST_F(EnqueueReadBufferRectStatelessTest, WhenReadingBufferRectStatelessThenSuccessIsReturned) { auto pCmdQ = std::make_unique>(context.get(), device.get()); void *missAlignedPtr = reinterpret_cast(0x1041); srcBuffer.size = static_cast(bigSize); auto retVal = pCmdQ->enqueueReadBufferRect(&srcBuffer, CL_FALSE, bufferOrigin, hostOrigin, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, missAlignedPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } using EnqueueReadBufferRectStatefulTest = EnqueueReadBufferRectHw; HWTEST_F(EnqueueReadBufferRectStatefulTest, WhenReadingBufferRectStatefulThenSuccessIsReturned) { auto pCmdQ = std::make_unique>(context.get(), device.get()); void *missAlignedPtr = reinterpret_cast(0x1041); srcBuffer.size = static_cast(smallSize); auto retVal = pCmdQ->enqueueReadBufferRect(&srcBuffer, CL_FALSE, bufferOrigin, hostOrigin, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, missAlignedPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(EnqueueReadBufferRectHw, givenHostPtrIsFromMappedBufferWhenReadBufferRectIsCalledThenReuseGraphicsAllocation) { DebugManagerStateRestore restore{}; DebugManager.flags.DisableZeroCopyForBuffers.set(1); MockCommandQueueHw queue(context.get(), device.get(), nullptr); auto &csr = device->getUltCommandStreamReceiver(); BufferDefaults::context = context.get(); auto bufferForMap = clUniquePtr(BufferHelper<>::create()); auto bufferForRead = clUniquePtr(BufferHelper<>::create()); cl_int retVal{}; void *mappedPtr = queue.enqueueMapBuffer(bufferForMap.get(), CL_TRUE, CL_MAP_READ, 0, bufferForMap->getSize(), 0, nullptr, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled); MapOperationsHandler *mapOperationsHandler = context->getMapOperationsStorage().getHandlerIfExists(bufferForMap.get()); EXPECT_NE(nullptr, mapOperationsHandler); MapInfo mapInfo{}; EXPECT_TRUE(mapOperationsHandler->find(mappedPtr, mapInfo)); EXPECT_NE(nullptr, mapInfo.graphicsAllocation); auto unmappedPtr = std::make_unique(bufferForRead->getSize()); retVal = queue.enqueueReadBufferRect(bufferForRead.get(), CL_TRUE, bufferOrigin, hostOrigin, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, unmappedPtr.get(), 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, csr.createAllocationForHostSurfaceCalled); retVal = queue.enqueueReadBufferRect(bufferForRead.get(), CL_TRUE, bufferOrigin, hostOrigin, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, mappedPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, csr.createAllocationForHostSurfaceCalled); } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_read_buffer_tests.cpp000066400000000000000000001134141422164147700323760ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/cache_policy.h" #include "shared/source/helpers/local_memory_access_modes.h" #include "shared/source/memory_manager/allocations_list.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/command_queue/enqueue_read_buffer_fixture.h" #include "opencl/test/unit_test/gen_common/gen_commands_common_validation.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "reg_configs_common.h" using namespace NEO; HWTEST_F(EnqueueReadBufferTypeTest, GivenNullBufferWhenReadingBufferThenInvalidMemObjectErrorIsReturned) { auto data = 1; auto retVal = clEnqueueReadBuffer( pCmdQ, nullptr, false, 0, sizeof(data), &data, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } HWTEST_F(EnqueueReadBufferTypeTest, GivenNullUserPointerWhenReadingBufferThenInvalidValueErrorIsReturned) { auto data = 1; auto retVal = clEnqueueReadBuffer( pCmdQ, srcBuffer.get(), false, 0, sizeof(data), nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferTypeTest, WhenReadingBufferThenGpgpuWalkerIsProgrammedCorrectly) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; srcBuffer->forceDisallowCPUCopy = true; enqueueReadBuffer(); ASSERT_NE(cmdList.end(), itorWalker); auto *cmd = (GPGPU_WALKER *)*itorWalker; // Verify GPGPU_WALKER parameters EXPECT_NE(0u, cmd->getThreadGroupIdXDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdYDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdZDimension()); EXPECT_NE(0u, cmd->getRightExecutionMask()); EXPECT_NE(0u, cmd->getBottomExecutionMask()); EXPECT_EQ(GPGPU_WALKER::SIMD_SIZE_SIMD32, cmd->getSimdSize()); EXPECT_NE(0u, cmd->getIndirectDataLength()); EXPECT_FALSE(cmd->getIndirectParameterEnable()); // Compute the SIMD lane mask size_t simd = cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD32 ? 32 : cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD16 ? 16 : 8; uint64_t simdMask = maxNBitValue(simd); // Mask off lanes based on the execution masks auto laneMaskRight = cmd->getRightExecutionMask() & simdMask; auto lanesPerThreadX = 0; while (laneMaskRight) { lanesPerThreadX += laneMaskRight & 1; laneMaskRight >>= 1; } } HWTEST_F(EnqueueReadBufferTypeTest, WhenReadingBufferThenTaskLevelIsIncremented) { auto taskLevelBefore = pCmdQ->taskLevel; srcBuffer->forceDisallowCPUCopy = true; EnqueueReadBufferHelper<>::enqueueReadBuffer(pCmdQ, srcBuffer.get(), CL_TRUE); EXPECT_GT(pCmdQ->taskLevel, taskLevelBefore); } HWTEST_F(EnqueueReadBufferTypeTest, GivenGpuHangAndBlockingCallWhenReadingBufferThenOutOfResourcesIsReturned) { std::unique_ptr device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment(nullptr)}); cl_queue_properties props{}; MockCommandQueueHw mockCommandQueueHw(context, device.get(), &props); mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang; srcBuffer->forceDisallowCPUCopy = true; const auto enqueueResult = EnqueueReadBufferHelper<>::enqueueReadBuffer(&mockCommandQueueHw, srcBuffer.get(), CL_TRUE); EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueResult); EXPECT_EQ(1, mockCommandQueueHw.waitForAllEnginesCalledCount); } HWTEST_F(EnqueueReadBufferTypeTest, GivenBlockingWhenReadingBufferThenAlignedToCsr) { //this test case assumes IOQ auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = pCmdQ->taskCount + 100; csr.taskLevel = pCmdQ->taskLevel + 50; auto oldCsrTaskLevel = csr.peekTaskLevel(); srcBuffer->forceDisallowCPUCopy = true; EnqueueReadBufferHelper<>::enqueueReadBuffer(pCmdQ, srcBuffer.get(), CL_TRUE); EXPECT_EQ(csr.peekTaskCount(), pCmdQ->taskCount); EXPECT_EQ(oldCsrTaskLevel, pCmdQ->taskLevel); } HWTEST_F(EnqueueReadBufferTypeTest, GivenNonBlockingWhenReadingBufferThenAlignedToCsr) { //this test case assumes IOQ auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = pCmdQ->taskCount + 100; csr.taskLevel = pCmdQ->taskLevel + 50; EnqueueReadBufferHelper<>::enqueueReadBuffer(pCmdQ, srcBuffer.get(), CL_FALSE); EXPECT_EQ(csr.peekTaskCount(), pCmdQ->taskCount); EXPECT_EQ(csr.peekTaskLevel(), pCmdQ->taskLevel + 1); } HWTEST_F(EnqueueReadBufferTypeTest, WhenReadingBufferThenCommandsAreAdded) { auto usedCmdBufferBefore = pCS->getUsed(); srcBuffer->forceDisallowCPUCopy = true; EnqueueReadBufferHelper<>::enqueueReadBuffer(pCmdQ, srcBuffer.get(), CL_TRUE); EXPECT_NE(usedCmdBufferBefore, pCS->getUsed()); } HWTEST_F(EnqueueReadBufferTypeTest, WhenReadingBufferThenIndirectDataIsAdded) { auto dshBefore = pDSH->getUsed(); auto iohBefore = pIOH->getUsed(); auto sshBefore = pSSH->getUsed(); srcBuffer->forceDisallowCPUCopy = true; EnqueueReadBufferHelper<>::enqueueReadBuffer(pCmdQ, srcBuffer.get(), CL_TRUE); auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, pCmdQ->getClDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.dstPtr = EnqueueReadBufferTraits::hostPtr; dc.srcMemObj = srcBuffer.get(); dc.srcOffset = {EnqueueReadBufferTraits::offset, 0, 0}; dc.size = {srcBuffer->getSize(), 0, 0}; MultiDispatchInfo multiDispatchInfo(dc); builder.buildDispatchInfos(multiDispatchInfo); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); auto kernelDescriptor = &kernel->getKernelInfo().kernelDescriptor; EXPECT_TRUE(UnitTestHelper::evaluateDshUsage(dshBefore, pDSH->getUsed(), kernelDescriptor, rootDeviceIndex)); EXPECT_NE(iohBefore, pIOH->getUsed()); if (kernel->usesBindfulAddressingForBuffers()) { EXPECT_NE(sshBefore, pSSH->getUsed()); } } HWTEST_F(EnqueueReadBufferTypeTest, WhenReadingBufferThenLoadRegisterImmediateL3CntlregIsCorrect) { srcBuffer->forceDisallowCPUCopy = true; enqueueReadBuffer(); validateL3Programming(cmdList, itorWalker); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferTypeTest, WhenEnqueueIsDoneThenStateBaseAddressIsProperlyProgrammed) { srcBuffer->forceDisallowCPUCopy = true; enqueueReadBuffer(); auto &ultCsr = this->pDevice->getUltCommandStreamReceiver(); auto &hwHelper = HwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily); validateStateBaseAddress(ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex, pIOH->getGraphicsAllocation()->isAllocatedInLocalMemoryPool()), ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex, !hwHelper.useSystemMemoryPlacementForISA(pDevice->getHardwareInfo())), pDSH, pIOH, pSSH, itorPipelineSelect, itorWalker, cmdList, 0llu); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferTypeTest, WhenReadingBufferThenMediaInterfaceDescriptorLoadIsCorrect) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; srcBuffer->forceDisallowCPUCopy = true; enqueueReadBuffer(); // All state should be programmed before walker auto itorCmd = find(itorPipelineSelect, itorWalker); ASSERT_NE(itorWalker, itorCmd); auto *cmd = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)*itorCmd; // Verify we have a valid length -- multiple of INTERFACE_DESCRIPTOR_DATAs EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % sizeof(INTERFACE_DESCRIPTOR_DATA)); // Validate the start address size_t alignmentStartAddress = 64 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorDataStartAddress() % alignmentStartAddress); // Validate the length EXPECT_NE(0u, cmd->getInterfaceDescriptorTotalLength()); size_t alignmentTotalLength = 32 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % alignmentTotalLength); // Generically validate this command FamilyType::PARSE::template validateCommand(cmdList.begin(), itorCmd); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferTypeTest, WhenReadingBufferThenInterfaceDescriptorDataIsCorrect) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; srcBuffer->forceDisallowCPUCopy = true; enqueueReadBuffer(); // Extract the MIDL command auto itorCmd = find(itorPipelineSelect, itorWalker); ASSERT_NE(itorWalker, itorCmd); auto *cmdMIDL = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)*itorCmd; // Extract the SBA command itorCmd = find(cmdList.begin(), itorWalker); ASSERT_NE(itorWalker, itorCmd); auto *cmdSBA = (STATE_BASE_ADDRESS *)*itorCmd; // Extrach the DSH auto DSH = cmdSBA->getDynamicStateBaseAddress(); ASSERT_NE(0u, DSH); // IDD should be located within DSH auto iddStart = cmdMIDL->getInterfaceDescriptorDataStartAddress(); auto IDDEnd = iddStart + cmdMIDL->getInterfaceDescriptorTotalLength(); ASSERT_LE(IDDEnd, cmdSBA->getDynamicStateBufferSize() * MemoryConstants::pageSize); auto &IDD = *(INTERFACE_DESCRIPTOR_DATA *)cmdInterfaceDescriptorData; // Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value. auto kernelStartPointer = ((uint64_t)IDD.getKernelStartPointerHigh() << 32) + IDD.getKernelStartPointer(); EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize); EXPECT_NE(0u, IDD.getNumberOfThreadsInGpgpuThreadGroup()); EXPECT_NE(0u, IDD.getCrossThreadConstantDataReadLength()); EXPECT_NE(0u, IDD.getConstantIndirectUrbEntryReadLength()); } HWTEST_F(EnqueueReadBufferTypeTest, WhenReadingBufferThenPipelineSelectIsProgrammedOnce) { srcBuffer->forceDisallowCPUCopy = true; enqueueReadBuffer(); int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); EXPECT_EQ(1, numCommands); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferTypeTest, WhenReadingBufferThenMediaVfeStateIsCorrect) { srcBuffer->forceDisallowCPUCopy = true; enqueueReadBuffer(); validateMediaVFEState(&pDevice->getHardwareInfo(), cmdMediaVfeState, cmdList, itorMediaVfeState); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferTypeTest, GivenBlockingWhenReadingBufferThenPipeControlAfterWalkerWithDcFlushSetIsAdded) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; srcBuffer->forceDisallowCPUCopy = true; enqueueReadBuffer(CL_TRUE); // All state should be programmed after walker auto itorWalker = find(cmdList.begin(), cmdList.end()); auto itorCmd = find(itorWalker, cmdList.end()); auto *cmd = (PIPE_CONTROL *)*itorCmd; EXPECT_NE(cmdList.end(), itorCmd); if (UnitTestHelper::isPipeControlWArequired(pDevice->getHardwareInfo())) { // SKL: two PIPE_CONTROLs following GPGPU_WALKER: first has DcFlush and second has Write HwTag EXPECT_FALSE(cmd->getDcFlushEnable()); // Move to next PPC auto itorCmdP = ++((GenCmdList::iterator)itorCmd); EXPECT_NE(cmdList.end(), itorCmdP); auto itorCmd2 = find(itorCmdP, cmdList.end()); cmd = (PIPE_CONTROL *)*itorCmd2; EXPECT_TRUE(cmd->getDcFlushEnable()); } else { // BDW: single PIPE_CONTROL following GPGPU_WALKER has DcFlush and Write HwTag EXPECT_TRUE(cmd->getDcFlushEnable()); } } HWTEST_F(EnqueueReadBufferTypeTest, givenAlignedPointerAndAlignedSizeWhenReadBufferIsCalledThenRecordedL3IndexIsL3OrL1ON) { void *ptr = (void *)0x1040; cl_int retVal = pCmdQ->enqueueReadBuffer(srcBuffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); auto &csr = pDevice->getUltCommandStreamReceiver(); auto gmmHelper = pDevice->getGmmHelper(); auto mocsIndexL3on = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1; auto mocsIndexL1on = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST) >> 1; EXPECT_TRUE(mocsIndexL3on == csr.latestSentStatelessMocsConfig || mocsIndexL1on == csr.latestSentStatelessMocsConfig); } HWTEST_F(EnqueueReadBufferTypeTest, givenNotAlignedPointerAndAlignedSizeWhenReadBufferIsCalledThenRecordedL3IndexIsL3Off) { void *ptr = (void *)0x1039; cl_int retVal = pCmdQ->enqueueReadBuffer(srcBuffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); auto &csr = pDevice->getUltCommandStreamReceiver(); auto gmmHelper = pDevice->getGmmHelper(); auto mocsIndexL3off = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) >> 1; auto mocsIndexL3on = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1; auto mocsIndexL1on = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST) >> 1; EXPECT_EQ(mocsIndexL3off, csr.latestSentStatelessMocsConfig); void *ptr2 = (void *)0x1040; retVal = pCmdQ->enqueueReadBuffer(srcBuffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr2, nullptr, 0, nullptr, nullptr); EXPECT_TRUE(mocsIndexL3on == csr.latestSentStatelessMocsConfig || mocsIndexL1on == csr.latestSentStatelessMocsConfig); } HWTEST_F(EnqueueReadBufferTypeTest, givenNotAlignedPointerAndSizeWhenBlockedReadBufferIsCalledThenRecordedL3IndexIsL3Off) { auto ptr = reinterpret_cast(0x1039); auto userEvent = clCreateUserEvent(pCmdQ->getContextPtr(), nullptr); cl_int retVal = pCmdQ->enqueueReadBuffer(srcBuffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 1, &userEvent, nullptr); clSetUserEventStatus(userEvent, 0u); EXPECT_EQ(CL_SUCCESS, retVal); auto &csr = pDevice->getUltCommandStreamReceiver(); auto gmmHelper = pDevice->getGmmHelper(); auto mocsIndexL3off = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) >> 1; EXPECT_EQ(mocsIndexL3off, csr.latestSentStatelessMocsConfig); clReleaseEvent(userEvent); } HWTEST_F(EnqueueReadBufferTypeTest, givenOOQWithEnabledSupportCpuCopiesAndDstPtrEqualSrcPtrAndZeroCopyBufferWhenReadBufferIsExecutedThenTaskLevelNotIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnReadBuffer.set(1); cl_int retVal = CL_SUCCESS; std::unique_ptr pCmdOOQ(createCommandQueue(pClDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)); void *ptr = srcBuffer->getCpuAddressForMemoryTransfer(); EXPECT_EQ(retVal, CL_SUCCESS); retVal = pCmdOOQ->enqueueReadBuffer(srcBuffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdOOQ->taskLevel, 0u); } HWTEST_F(EnqueueReadBufferTypeTest, givenOOQWithDisabledSupportCpuCopiesAndDstPtrEqualSrcPtrAndZeroCopyBufferWhenReadBufferIsExecutedThenTaskLevelNotIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnReadBuffer.set(0); cl_int retVal = CL_SUCCESS; std::unique_ptr pCmdOOQ(createCommandQueue(pClDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)); void *ptr = srcBuffer->getCpuAddressForMemoryTransfer(); EXPECT_EQ(retVal, CL_SUCCESS); retVal = pCmdOOQ->enqueueReadBuffer(srcBuffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdOOQ->taskLevel, 0u); } HWTEST_F(EnqueueReadBufferTypeTest, givenGpuHangAndBlockingCallAndOOQWithDisabledSupportCpuCopiesAndDstPtrEqualSrcPtrAndZeroCopyBufferWhenReadBufferIsExecutedThenOutOfResourcesIsReturned) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnReadBuffer.set(0); std::unique_ptr device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment(nullptr)}); cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0}; MockCommandQueueHw mockCommandQueueHw(context, device.get(), props); mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang; void *ptr = srcBuffer->getCpuAddressForMemoryTransfer(); const auto enqueueResult = mockCommandQueueHw.enqueueReadBuffer(srcBuffer.get(), CL_TRUE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueResult); EXPECT_EQ(1, mockCommandQueueHw.waitForAllEnginesCalledCount); } HWTEST_F(EnqueueReadBufferTypeTest, givenInOrderQueueAndEnabledSupportCpuCopiesAndDstPtrEqualSrcPtrAndZeroCopyBufferWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnReadBuffer.set(1); cl_int retVal = CL_SUCCESS; void *ptr = srcBuffer->getCpuAddressForMemoryTransfer(); EXPECT_EQ(retVal, CL_SUCCESS); retVal = pCmdQ->enqueueReadBuffer(srcBuffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 0u); } HWTEST_F(EnqueueReadBufferTypeTest, givenInOrderQueueAndDisabledSupportCpuCopiesAndDstPtrEqualSrcPtrAndZeroCopyBufferWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnReadBuffer.set(0); cl_int retVal = CL_SUCCESS; void *ptr = srcBuffer->getCpuAddressForMemoryTransfer(); EXPECT_EQ(retVal, CL_SUCCESS); retVal = pCmdQ->enqueueReadBuffer(srcBuffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 0u); } HWTEST_F(EnqueueReadBufferTypeTest, givenInOrderQueueAndDisabledSupportCpuCopiesAndDstPtrEqualSrcPtrAndNonZeroCopyBufferWhenReadBufferIsExecutedThenTaskLevelShouldBeIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnReadBuffer.set(0); cl_int retVal = CL_SUCCESS; void *ptr = nonZeroCopyBuffer->getCpuAddressForMemoryTransfer(); EXPECT_EQ(retVal, CL_SUCCESS); retVal = pCmdQ->enqueueReadBuffer(nonZeroCopyBuffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 1u); } HWTEST_F(EnqueueReadBufferTypeTest, givenInOrderQueueAndEnabledSupportCpuCopiesAndDstPtrEqualSrcPtrAndNonZeroCopyWhenReadBufferIsExecutedThenTaskLevelShouldBeIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnReadBuffer.set(1); cl_int retVal = CL_SUCCESS; void *ptr = nonZeroCopyBuffer->getCpuAddressForMemoryTransfer(); EXPECT_EQ(retVal, CL_SUCCESS); retVal = pCmdQ->enqueueReadBuffer(nonZeroCopyBuffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 1u); } HWTEST_F(EnqueueReadBufferTypeTest, givenCommandQueueWhenEnqueueReadBufferIsCalledThenItCallsNotifyFunction) { auto mockCmdQ = std::make_unique>(context, pClDevice, nullptr); void *ptr = nonZeroCopyBuffer->getCpuAddressForMemoryTransfer(); auto retVal = mockCmdQ->enqueueReadBuffer(srcBuffer.get(), CL_TRUE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(mockCmdQ->notifyEnqueueReadBufferCalled); } HWTEST_F(EnqueueReadBufferTypeTest, givenCommandQueueWhenEnqueueReadBufferWithMapAllocationIsCalledThenItDoesntCallNotifyFunction) { auto mockCmdQ = std::make_unique>(context, pClDevice, nullptr); void *ptr = nonZeroCopyBuffer->getCpuAddressForMemoryTransfer(); GraphicsAllocation mapAllocation{0, AllocationType::UNKNOWN, nullptr, 0, 0, 0, MemoryPool::MemoryNull}; auto retVal = mockCmdQ->enqueueReadBuffer(srcBuffer.get(), CL_TRUE, 0, MemoryConstants::cacheLineSize, ptr, &mapAllocation, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(mockCmdQ->notifyEnqueueReadBufferCalled); } HWTEST_F(EnqueueReadBufferTypeTest, givenEnqueueReadBufferCalledWhenLockedPtrInTransferPropertisIsAvailableThenItIsNotUnlocked) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnReadBuffer.set(1); MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, true, executionEnvironment); MockContext ctx; cl_int retVal; ctx.memoryManager = &memoryManager; auto mockCmdQ = std::make_unique>(context, pClDevice, nullptr); std::unique_ptr buffer(Buffer::create(&ctx, 0, 1, nullptr, retVal)); static_cast(buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex()))->overrideMemoryPool(MemoryPool::SystemCpuInaccessible); void *ptr = nonZeroCopyBuffer->getCpuAddressForMemoryTransfer(); retVal = mockCmdQ->enqueueReadBuffer(buffer.get(), CL_TRUE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, memoryManager.unlockResourceCalled); } HWTEST_F(EnqueueReadBufferTypeTest, givenForcedCpuCopyWhenEnqueueReadCompressedBufferThenDontCopyOnCpu) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnReadBuffer.set(1); DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast(LocalMemoryAccessMode::Default)); MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, true, executionEnvironment); MockContext ctx(pClDevice); cl_int retVal; ctx.memoryManager = &memoryManager; auto mockCmdQ = std::make_unique>(context, pClDevice, nullptr); std::unique_ptr buffer(Buffer::create(&ctx, 0, 1, nullptr, retVal)); auto graphicsAllocation = buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex()); static_cast(graphicsAllocation)->overrideMemoryPool(MemoryPool::SystemCpuInaccessible); void *ptr = nonZeroCopyBuffer->getCpuAddressForMemoryTransfer(); MockBuffer::setAllocationType(graphicsAllocation, pDevice->getRootDeviceEnvironment().getGmmClientContext(), true); retVal = mockCmdQ->enqueueReadBuffer(buffer.get(), CL_TRUE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(graphicsAllocation->isLocked()); EXPECT_FALSE(mockCmdQ->cpuDataTransferHandlerCalled); MockBuffer::setAllocationType(graphicsAllocation, pDevice->getRootDeviceEnvironment().getGmmClientContext(), false); retVal = mockCmdQ->enqueueReadBuffer(buffer.get(), CL_TRUE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(graphicsAllocation->isLocked()); EXPECT_TRUE(mockCmdQ->cpuDataTransferHandlerCalled); } HWTEST_F(EnqueueReadBufferTypeTest, givenEnqueueReadBufferCalledWhenLockedPtrInTransferPropertisIsNotAvailableThenItIsNotUnlocked) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnReadBuffer.set(1); MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, true, executionEnvironment); MockContext ctx; cl_int retVal; ctx.memoryManager = &memoryManager; auto mockCmdQ = std::make_unique>(context, pClDevice, nullptr); std::unique_ptr buffer(Buffer::create(&ctx, 0, 1, nullptr, retVal)); static_cast(buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex()))->overrideMemoryPool(MemoryPool::System4KBPages); void *ptr = nonZeroCopyBuffer->getCpuAddressForMemoryTransfer(); retVal = mockCmdQ->enqueueReadBuffer(buffer.get(), CL_TRUE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, memoryManager.unlockResourceCalled); } HWTEST_F(EnqueueReadBufferTypeTest, givenEnqueueReadBufferBlockingWhenAUBDumpAllocsOnEnqueueReadOnlyIsOnThenBufferShouldBeSetDumpable) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AUBDumpAllocsOnEnqueueReadOnly.set(true); ASSERT_FALSE(srcBuffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->isAllocDumpable()); cl_int retVal = CL_SUCCESS; void *ptr = nonZeroCopyBuffer->getCpuAddressForMemoryTransfer(); retVal = pCmdQ->enqueueReadBuffer(srcBuffer.get(), CL_TRUE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(srcBuffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->isAllocDumpable()); EXPECT_TRUE(srcBuffer->forceDisallowCPUCopy); } HWTEST_F(EnqueueReadBufferTypeTest, givenEnqueueReadBufferNonBlockingWhenAUBDumpAllocsOnEnqueueReadOnlyIsOnThenBufferShouldntBeSetDumpable) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AUBDumpAllocsOnEnqueueReadOnly.set(true); ASSERT_FALSE(srcBuffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->isAllocDumpable()); cl_int retVal = CL_SUCCESS; void *ptr = nonZeroCopyBuffer->getCpuAddressForMemoryTransfer(); retVal = pCmdQ->enqueueReadBuffer(srcBuffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(srcBuffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->isAllocDumpable()); EXPECT_FALSE(srcBuffer->forceDisallowCPUCopy); } using NegativeFailAllocationTest = Test; HWTEST_F(NegativeFailAllocationTest, givenEnqueueReadBufferWhenHostPtrAllocationCreationFailsThenReturnOutOfResource) { cl_int retVal = CL_SUCCESS; retVal = pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal); } struct EnqueueReadBufferHw : public ::testing::Test { void SetUp() override { if (is32bit) { GTEST_SKIP(); } device = std::make_unique(MockClDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); context.reset(new MockContext(device.get())); } std::unique_ptr device; std::unique_ptr context; MockBuffer srcBuffer; uint64_t bigSize = 4ull * MemoryConstants::gigaByte; uint64_t smallSize = 4ull * MemoryConstants::gigaByte - 1; }; using EnqueueReadBufferStatelessTest = EnqueueReadBufferHw; HWTEST_F(EnqueueReadBufferStatelessTest, WhenReadingBufferStatelessThenSuccessIsReturned) { auto pCmdQ = std::make_unique>(context.get(), device.get()); void *missAlignedPtr = reinterpret_cast(0x1041); srcBuffer.size = static_cast(bigSize); auto retVal = pCmdQ->enqueueReadBuffer(&srcBuffer, CL_FALSE, 0, MemoryConstants::cacheLineSize, missAlignedPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } using EnqueueReadBufferStatefulTest = EnqueueReadBufferHw; HWTEST_F(EnqueueReadBufferStatefulTest, WhenReadingBufferStatefulThenSuccessIsReturned) { auto pCmdQ = std::make_unique>(context.get(), device.get()); void *missAlignedPtr = reinterpret_cast(0x1041); srcBuffer.size = static_cast(smallSize); auto retVal = pCmdQ->enqueueReadBuffer(&srcBuffer, CL_FALSE, 0, MemoryConstants::cacheLineSize, missAlignedPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(EnqueueReadBufferHw, givenHostPtrIsFromMappedBufferWhenReadBufferIsCalledThenReuseGraphicsAllocation) { DebugManagerStateRestore restore{}; DebugManager.flags.DisableZeroCopyForBuffers.set(1); DebugManager.flags.DoCpuCopyOnReadBuffer.set(0); MockCommandQueueHw queue(context.get(), device.get(), nullptr); auto &csr = device->getUltCommandStreamReceiver(); BufferDefaults::context = context.get(); auto bufferForMap = clUniquePtr(BufferHelper<>::create()); auto bufferForRead = clUniquePtr(BufferHelper<>::create()); cl_int retVal{}; void *mappedPtr = queue.enqueueMapBuffer(bufferForMap.get(), CL_TRUE, CL_MAP_READ, 0, bufferForMap->getSize(), 0, nullptr, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled); MapOperationsHandler *mapOperationsHandler = context->getMapOperationsStorage().getHandlerIfExists(bufferForMap.get()); EXPECT_NE(nullptr, mapOperationsHandler); MapInfo mapInfo{}; EXPECT_TRUE(mapOperationsHandler->find(mappedPtr, mapInfo)); EXPECT_NE(nullptr, mapInfo.graphicsAllocation); auto unmappedPtr = std::make_unique(bufferForRead->getSize()); retVal = queue.enqueueReadBuffer(bufferForRead.get(), CL_TRUE, 0, bufferForRead->getSize(), unmappedPtr.get(), nullptr, 0, 0, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, csr.createAllocationForHostSurfaceCalled); retVal = queue.enqueueReadBuffer(bufferForRead.get(), CL_TRUE, 0, bufferForRead->getSize(), mappedPtr, nullptr, 0, 0, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, csr.createAllocationForHostSurfaceCalled); } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_read_image_fixture.h000066400000000000000000000041421422164147700321750ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/ptr_math.h" #include "shared/test/common/test_macros/test_checks_shared.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" namespace NEO { struct EnqueueReadImageTest : public CommandEnqueueFixture, public ::testing::Test { typedef CommandQueueHwFixture CommandQueueFixture; using CommandQueueHwFixture::pCmdQ; void SetUp(void) override { REQUIRE_IMAGES_OR_SKIP(defaultHwInfo); CommandEnqueueFixture::SetUp(); context = new MockContext(pClDevice); srcImage = Image2dHelper<>::create(context); srcAllocation = srcImage->getGraphicsAllocation(pClDevice->getRootDeviceIndex()); const auto &imageDesc = srcImage->getImageDesc(); dstPtr = new float[imageDesc.image_width * imageDesc.image_height]; } void TearDown(void) override { if (IsSkipped()) { return; } delete srcImage; delete[] dstPtr; delete context; CommandEnqueueFixture::TearDown(); } protected: template void enqueueReadImage(cl_bool blocking = EnqueueReadImageTraits::blocking) { auto retVal = EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ, srcImage, blocking); EXPECT_EQ(CL_SUCCESS, retVal); parseCommands(*pCmdQ); } float *dstPtr = nullptr; Image *srcImage = nullptr; GraphicsAllocation *srcAllocation = nullptr; MockContext *context = nullptr; }; struct EnqueueReadImageMipMapTest : public EnqueueReadImageTest, public ::testing::WithParamInterface { }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp000066400000000000000000001470041422164147700322110ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/allocations_list.h" #include "shared/source/memory_manager/migration_sync_data.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/mocks/mock_builtins.h" #include "shared/test/common/mocks/mock_gmm_resource_info.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/test/unit_test/command_queue/enqueue_read_image_fixture.h" #include "opencl/test/unit_test/fixtures/one_mip_level_image_fixture.h" #include "opencl/test/unit_test/gen_common/gen_commands_common_validation.h" #include "opencl/test/unit_test/mocks/mock_builtin_dispatch_info_builder.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "reg_configs_common.h" using namespace NEO; HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadImageTest, WhenReadingImageThenGpgpuWalkerIsProgrammedCorrectly) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; enqueueReadImage(); auto *cmd = reinterpret_cast(cmdWalker); ASSERT_NE(nullptr, cmd); // Verify GPGPU_WALKER parameters EXPECT_NE(0u, cmd->getThreadGroupIdXDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdYDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdZDimension()); EXPECT_NE(0u, cmd->getRightExecutionMask()); EXPECT_NE(0u, cmd->getBottomExecutionMask()); EXPECT_EQ(GPGPU_WALKER::SIMD_SIZE_SIMD32, cmd->getSimdSize()); EXPECT_NE(0u, cmd->getIndirectDataLength()); EXPECT_FALSE(cmd->getIndirectParameterEnable()); // Compute the SIMD lane mask size_t simd = cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD32 ? 32 : cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD16 ? 16 : 8; uint64_t simdMask = maxNBitValue(simd); // Mask off lanes based on the execution masks auto laneMaskRight = cmd->getRightExecutionMask() & simdMask; auto lanesPerThreadX = 0; while (laneMaskRight) { lanesPerThreadX += laneMaskRight & 1; laneMaskRight >>= 1; } } HWTEST_F(EnqueueReadImageTest, GivenBlockingEnqueueWhenReadingImageThenTaskLevelIsNotIncremented) { //this test case assumes IOQ auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = pCmdQ->taskCount + 100; csr.taskLevel = pCmdQ->taskLevel + 50; auto oldCsrTaskLevel = csr.peekTaskLevel(); EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ, srcImage, CL_TRUE); EXPECT_EQ(csr.peekTaskCount(), pCmdQ->taskCount); EXPECT_EQ(oldCsrTaskLevel, pCmdQ->taskLevel); } HWTEST_F(EnqueueReadImageTest, whenEnqueueReadImageThenBuiltinKernelIsResolved) { UserEvent userEvent{}; cl_event inputEvent = &userEvent; cl_event outputEvent{}; EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ, srcImage, CL_FALSE, EnqueueReadImageTraits::origin, EnqueueReadImageTraits::region, EnqueueReadImageTraits::rowPitch, EnqueueReadImageTraits::slicePitch, EnqueueReadImageTraits::hostPtr, EnqueueReadImageTraits::mapAllocation, 1u, &inputEvent, &outputEvent); auto pEvent = castToObject(outputEvent); auto pCommand = static_cast(pEvent->peekCommand()); EXPECT_FALSE(pCommand->peekKernel()->Kernel::canTransformImages()); EXPECT_TRUE(pCommand->peekKernel()->isPatched()); userEvent.setStatus(CL_COMPLETE); pEvent->release(); pCmdQ->finish(); } template struct CreateAllocationForHostSurfaceFailCsr : public CommandStreamReceiverHw { using CommandStreamReceiverHw::CommandStreamReceiverHw; bool createAllocationForHostSurface(HostPtrSurface &surface, bool requiresL3Flush) override { return CL_FALSE; } }; HWTEST_F(EnqueueReadImageTest, givenCommandQueueAndFailingAllocationForHostSurfaceWhenEnqueueReadImageThenOutOfResourceIsReturned) { MockCommandQueueHw cmdQ(context, pClDevice, nullptr); auto failCsr = std::make_unique>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); failCsr->setupContext(*pDevice->getDefaultEngine().osContext); CommandStreamReceiver *oldCommandStreamReceiver = cmdQ.gpgpuEngine->commandStreamReceiver; cmdQ.gpgpuEngine->commandStreamReceiver = failCsr.get(); auto srcImage = Image2dHelper<>::create(context); auto retVal = cmdQ.enqueueReadImage(srcImage, CL_FALSE, EnqueueReadImageTraits::origin, EnqueueReadImageTraits::region, EnqueueReadImageTraits::rowPitch, EnqueueReadImageTraits::slicePitch, EnqueueReadImageTraits::hostPtr, EnqueueReadImageTraits::mapAllocation, 0u, nullptr, nullptr); EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal); cmdQ.gpgpuEngine->commandStreamReceiver = oldCommandStreamReceiver; srcImage->release(); } HWTEST_F(EnqueueReadImageTest, givenCommandQueueAndFailingAllocationForHostSurfaceWhenBlockingEnqueueReadImageThenOutOfResourceIsReturned) { MockCommandQueueHw cmdQ(context, pClDevice, nullptr); auto failCsr = std::make_unique>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); failCsr->setupContext(*pDevice->getDefaultEngine().osContext); CommandStreamReceiver *oldCommandStreamReceiver = cmdQ.gpgpuEngine->commandStreamReceiver; cmdQ.gpgpuEngine->commandStreamReceiver = failCsr.get(); auto srcImage = Image2dHelper<>::create(context); auto retVal = cmdQ.enqueueReadImage(srcImage, CL_TRUE, EnqueueReadImageTraits::origin, EnqueueReadImageTraits::region, EnqueueReadImageTraits::rowPitch, EnqueueReadImageTraits::slicePitch, EnqueueReadImageTraits::hostPtr, EnqueueReadImageTraits::mapAllocation, 0u, nullptr, nullptr); EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal); cmdQ.gpgpuEngine->commandStreamReceiver = oldCommandStreamReceiver; srcImage->release(); } template struct CreateAllocationForHostSurfaceCsr : public CommandStreamReceiverHw { using CommandStreamReceiverHw::CommandStreamReceiverHw; bool createAllocationForHostSurface(HostPtrSurface &surface, bool requiresL3Flush) override { if (surface.peekIsPtrCopyAllowed()) { return CommandStreamReceiverHw::createAllocationForHostSurface(surface, requiresL3Flush); } else { return CL_FALSE; } } CompletionStamp flushTask(LinearStream &commandStream, size_t commandStreamStart, const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, uint32_t taskLevel, DispatchFlags &dispatchFlags, Device &device) override { return CompletionStamp{0u, 0u, static_cast(0u)}; } }; HWTEST_F(EnqueueReadImageTest, givenCommandQueueAndPtrCopyAllowedForHostSurfaceWhenBlockingEnqueueReadImageThenSuccessIsReturned) { auto csr = std::make_unique>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); auto cmdQ = std::make_unique>(context, pClDevice, nullptr); csr->setupContext(*pDevice->getDefaultEngine().osContext); CommandStreamReceiver *oldCommandStreamReceiver = cmdQ->gpgpuEngine->commandStreamReceiver; cmdQ->gpgpuEngine->commandStreamReceiver = csr.get(); csr->initializeTagAllocation(); auto retVal = cmdQ->enqueueReadImage(srcImage, CL_TRUE, EnqueueReadImageTraits::origin, EnqueueReadImageTraits::region, EnqueueReadImageTraits::rowPitch, EnqueueReadImageTraits::slicePitch, EnqueueReadImageTraits::hostPtr, EnqueueReadImageTraits::mapAllocation, 0u, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); cmdQ->gpgpuEngine->commandStreamReceiver = oldCommandStreamReceiver; } HWTEST_F(EnqueueReadImageTest, givenGpuHangAndCommandQueueAndPtrCopyAllowedForHostSurfaceWhenBlockingEnqueueReadImageThenOutOfResourcesIsReturned) { auto csr = std::make_unique>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); auto cmdQ = std::make_unique>(context, pClDevice, nullptr); cmdQ->waitForAllEnginesReturnValue = WaitStatus::GpuHang; csr->setupContext(*pDevice->getDefaultEngine().osContext); CommandStreamReceiver *oldCommandStreamReceiver = cmdQ->gpgpuEngine->commandStreamReceiver; cmdQ->gpgpuEngine->commandStreamReceiver = csr.get(); csr->initializeTagAllocation(); auto retVal = cmdQ->enqueueReadImage(srcImage, CL_TRUE, EnqueueReadImageTraits::origin, EnqueueReadImageTraits::region, EnqueueReadImageTraits::rowPitch, EnqueueReadImageTraits::slicePitch, EnqueueReadImageTraits::hostPtr, EnqueueReadImageTraits::mapAllocation, 0u, nullptr, nullptr); EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal); cmdQ->gpgpuEngine->commandStreamReceiver = oldCommandStreamReceiver; } HWTEST_F(EnqueueReadImageTest, givenMultiRootDeviceImageWhenEnqueueReadImageThenKernelRequiresMigration) { MockDefaultContext context; auto pCmdQ1 = createCommandQueue(context.getDevice(0), nullptr, &context); auto pImage = Image2dHelper<>::create(&context); EXPECT_TRUE(pImage->getMultiGraphicsAllocation().requiresMigrations()); UserEvent userEvent{}; cl_event inputEvent = &userEvent; cl_event outputEvent{}; EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ1, pImage, CL_FALSE, EnqueueReadImageTraits::origin, EnqueueReadImageTraits::region, EnqueueReadImageTraits::rowPitch, EnqueueReadImageTraits::slicePitch, EnqueueReadImageTraits::hostPtr, EnqueueReadImageTraits::mapAllocation, 1u, &inputEvent, &outputEvent); auto pEvent = castToObject(outputEvent); auto pCommand = static_cast(pEvent->peekCommand()); auto pKernel = pCommand->peekKernel(); EXPECT_FALSE(pKernel->Kernel::canTransformImages()); EXPECT_TRUE(pKernel->isPatched()); EXPECT_TRUE(pKernel->requiresMemoryMigration()); auto &memObjectsForMigration = pKernel->getMemObjectsToMigrate(); ASSERT_EQ(1u, memObjectsForMigration.size()); auto memObj = memObjectsForMigration.begin()->second; for (auto &rootDeviceIndex : context.getRootDeviceIndices()) { EXPECT_EQ(pImage->getMultiGraphicsAllocation().getGraphicsAllocation(rootDeviceIndex), memObj->getMultiGraphicsAllocation().getGraphicsAllocation(rootDeviceIndex)); } EXPECT_TRUE(memObj->getMultiGraphicsAllocation().requiresMigrations()); EXPECT_EQ(MigrationSyncData::locationUndefined, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); userEvent.setStatus(CL_COMPLETE); EXPECT_EQ(0u, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); pEvent->release(); pCmdQ1->finish(); pCmdQ1->release(); pImage->release(); } HWTEST_F(EnqueueReadImageTest, givenMultiRootDeviceImageWhenEnqueueReadImageIsCalledMultipleTimesThenEachKernelUsesDifferentImage) { MockDefaultContext context; auto pCmdQ1 = createCommandQueue(context.getDevice(0), nullptr, &context); auto pImage = Image2dHelper<>::create(&context); EXPECT_TRUE(pImage->getMultiGraphicsAllocation().requiresMigrations()); UserEvent userEvent{}; cl_event inputEvent = &userEvent; cl_event outputEvent0{}; cl_event outputEvent1{}; EXPECT_EQ(MigrationSyncData::locationUndefined, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ1, pImage, CL_FALSE, EnqueueReadImageTraits::origin, EnqueueReadImageTraits::region, EnqueueReadImageTraits::rowPitch, EnqueueReadImageTraits::slicePitch, EnqueueReadImageTraits::hostPtr, EnqueueReadImageTraits::mapAllocation, 1u, &inputEvent, &outputEvent0); EXPECT_EQ(MigrationSyncData::locationUndefined, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); auto pEvent0 = castToObject(outputEvent0); auto pCommand0 = static_cast(pEvent0->peekCommand()); auto pKernel0 = pCommand0->peekKernel(); EXPECT_FALSE(pKernel0->Kernel::canTransformImages()); EXPECT_TRUE(pKernel0->isPatched()); EXPECT_TRUE(pKernel0->requiresMemoryMigration()); auto &memObjectsForMigration0 = pKernel0->getMemObjectsToMigrate(); ASSERT_EQ(1u, memObjectsForMigration0.size()); auto memObj0 = memObjectsForMigration0.begin()->second; for (auto &rootDeviceIndex : context.getRootDeviceIndices()) { EXPECT_EQ(pImage->getMultiGraphicsAllocation().getGraphicsAllocation(rootDeviceIndex), memObj0->getMultiGraphicsAllocation().getGraphicsAllocation(rootDeviceIndex)); } EXPECT_TRUE(memObj0->getMultiGraphicsAllocation().requiresMigrations()); EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ1, pImage, CL_FALSE, EnqueueReadImageTraits::origin, EnqueueReadImageTraits::region, EnqueueReadImageTraits::rowPitch, EnqueueReadImageTraits::slicePitch, EnqueueReadImageTraits::hostPtr, EnqueueReadImageTraits::mapAllocation, 1u, &outputEvent0, &outputEvent1); EXPECT_EQ(MigrationSyncData::locationUndefined, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); auto pEvent1 = castToObject(outputEvent1); auto pCommand1 = static_cast(pEvent1->peekCommand()); auto pKernel1 = pCommand1->peekKernel(); EXPECT_FALSE(pKernel1->Kernel::canTransformImages()); EXPECT_TRUE(pKernel1->isPatched()); EXPECT_TRUE(pKernel1->requiresMemoryMigration()); auto &memObjectsForMigration1 = pKernel1->getMemObjectsToMigrate(); ASSERT_EQ(1u, memObjectsForMigration1.size()); auto memObj1 = memObjectsForMigration1.begin()->second; for (auto &rootDeviceIndex : context.getRootDeviceIndices()) { EXPECT_EQ(pImage->getMultiGraphicsAllocation().getGraphicsAllocation(rootDeviceIndex), memObj1->getMultiGraphicsAllocation().getGraphicsAllocation(rootDeviceIndex)); } EXPECT_TRUE(memObj1->getMultiGraphicsAllocation().requiresMigrations()); EXPECT_NE(memObj0, memObj1); userEvent.setStatus(CL_COMPLETE); EXPECT_EQ(0u, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); pEvent0->release(); pEvent1->release(); pCmdQ1->finish(); pCmdQ1->release(); pImage->release(); } HWTEST_F(EnqueueReadImageTest, givenMultiRootDeviceImageWhenNonBlockedEnqueueReadImageIsCalledThenCommandQueueIsFlushed) { MockDefaultContext context; auto pCmdQ1 = createCommandQueue(context.getDevice(0), nullptr, &context); auto pImage = Image2dHelper<>::create(&context); EXPECT_TRUE(pImage->getMultiGraphicsAllocation().requiresMigrations()); auto &ultCsr = static_cast &>(pCmdQ1->getGpgpuCommandStreamReceiver()); EXPECT_FALSE(ultCsr.flushBatchedSubmissionsCalled); EXPECT_EQ(MigrationSyncData::locationUndefined, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ1, pImage, CL_FALSE); EXPECT_EQ(0u, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); EXPECT_TRUE(ultCsr.flushBatchedSubmissionsCalled); pCmdQ1->finish(); pCmdQ1->release(); pImage->release(); } HWTEST_F(EnqueueReadImageTest, givenMultiRootDeviceImageWhenNonBlockedEnqueueReadImageIsCalledThenTlbCacheIsInvalidated) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; MockDefaultContext context; auto pCmdQ1 = createCommandQueue(context.getDevice(0), nullptr, &context); auto pImage = Image2dHelper<>::create(&context); EXPECT_TRUE(pImage->getMultiGraphicsAllocation().requiresMigrations()); EXPECT_EQ(MigrationSyncData::locationUndefined, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ1, pImage, CL_FALSE); EXPECT_EQ(0u, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); pCmdQ1->finish(); { HardwareParse hwParser; hwParser.parseCommands(pCmdQ1->getCS(0), 0); auto pipeControls = findAll(hwParser.cmdList.begin(), hwParser.cmdList.end()); EXPECT_LT(0u, pipeControls.size()); bool pipeControlWithTlbInvalidateFound = false; for (auto &pipeControl : pipeControls) { auto pipeControlCmd = genCmdCast(*pipeControl); if (pipeControlCmd->getTlbInvalidate()) { EXPECT_TRUE(pipeControlCmd->getCommandStreamerStallEnable()); pipeControlWithTlbInvalidateFound = true; } } EXPECT_TRUE(pipeControlWithTlbInvalidateFound); } pCmdQ1->release(); pImage->release(); } HWTEST_F(EnqueueReadImageTest, givenMultiRootDeviceImageWhenEnqueueReadImageIsCalledToDifferentDevicesThenCorrectLocationIsSet) { MockDefaultContext context; auto pCmdQ1 = createCommandQueue(context.getDevice(0), nullptr, &context); auto pCmdQ2 = createCommandQueue(context.getDevice(1), nullptr, &context); auto pImage = Image2dHelper<>::create(&context); EXPECT_TRUE(pImage->getMultiGraphicsAllocation().requiresMigrations()); auto &ultCsr1 = static_cast &>(pCmdQ1->getGpgpuCommandStreamReceiver()); auto &ultCsr2 = static_cast &>(pCmdQ2->getGpgpuCommandStreamReceiver()); EXPECT_FALSE(ultCsr1.flushBatchedSubmissionsCalled); EXPECT_FALSE(ultCsr2.flushBatchedSubmissionsCalled); EXPECT_EQ(MigrationSyncData::locationUndefined, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ1, pImage, CL_FALSE, EnqueueReadImageTraits::origin, EnqueueReadImageTraits::region, EnqueueReadImageTraits::rowPitch, EnqueueReadImageTraits::slicePitch, EnqueueReadImageTraits::hostPtr, EnqueueReadImageTraits::mapAllocation, 0u, nullptr, nullptr); EXPECT_EQ(0u, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); EXPECT_TRUE(ultCsr1.flushBatchedSubmissionsCalled); EXPECT_FALSE(ultCsr2.flushBatchedSubmissionsCalled); pCmdQ1->finish(); EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ2, pImage, CL_FALSE, EnqueueReadImageTraits::origin, EnqueueReadImageTraits::region, EnqueueReadImageTraits::rowPitch, EnqueueReadImageTraits::slicePitch, EnqueueReadImageTraits::hostPtr, EnqueueReadImageTraits::mapAllocation, 0u, nullptr, nullptr); EXPECT_EQ(1u, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); EXPECT_TRUE(ultCsr2.flushBatchedSubmissionsCalled); pCmdQ2->finish(); EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ1, pImage, CL_FALSE, EnqueueReadImageTraits::origin, EnqueueReadImageTraits::region, EnqueueReadImageTraits::rowPitch, EnqueueReadImageTraits::slicePitch, EnqueueReadImageTraits::hostPtr, EnqueueReadImageTraits::mapAllocation, 0u, nullptr, nullptr); EXPECT_EQ(0u, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); pCmdQ1->finish(); pCmdQ1->release(); pCmdQ2->release(); pImage->release(); } HWTEST_F(EnqueueReadImageTest, givenImageFromBufferThatRequiresMigrationWhenEnqueueReadImageThenBufferObjectIsTakenForMigration) { MockDefaultContext context; auto pCmdQ1 = createCommandQueue(context.getDevice(0), nullptr, &context); auto pBuffer = BufferHelper<>::create(&context); auto imageDesc = Image2dDefaults::imageDesc; cl_mem clBuffer = pBuffer; imageDesc.mem_object = clBuffer; const_cast(pBuffer->getMultiGraphicsAllocation()).setMultiStorage(true); EXPECT_TRUE(pBuffer->getMultiGraphicsAllocation().requiresMigrations()); auto pImage = Image2dHelper<>::create(&context, &imageDesc); EXPECT_TRUE(pImage->getMultiGraphicsAllocation().requiresMigrations()); UserEvent userEvent{}; cl_event inputEvent = &userEvent; cl_event outputEvent{}; EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ1, pImage, CL_FALSE, EnqueueReadImageTraits::origin, EnqueueReadImageTraits::region, EnqueueReadImageTraits::rowPitch, EnqueueReadImageTraits::slicePitch, EnqueueReadImageTraits::hostPtr, EnqueueReadImageTraits::mapAllocation, 1u, &inputEvent, &outputEvent); auto pEvent = castToObject(outputEvent); auto pCommand = static_cast(pEvent->peekCommand()); auto pKernel = pCommand->peekKernel(); EXPECT_FALSE(pKernel->Kernel::canTransformImages()); EXPECT_TRUE(pKernel->isPatched()); EXPECT_TRUE(pKernel->requiresMemoryMigration()); auto &memObjectsForMigration = pKernel->getMemObjectsToMigrate(); ASSERT_EQ(1u, memObjectsForMigration.size()); auto memObj = memObjectsForMigration.begin()->second; EXPECT_EQ(static_cast(pBuffer), memObj); EXPECT_TRUE(memObj->getMultiGraphicsAllocation().requiresMigrations()); EXPECT_EQ(MigrationSyncData::locationUndefined, pBuffer->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); userEvent.setStatus(CL_COMPLETE); EXPECT_EQ(0u, pBuffer->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); pEvent->release(); pCmdQ1->finish(); pCmdQ1->release(); pImage->release(); pBuffer->release(); } HWTEST_F(EnqueueReadImageTest, GivenNonBlockingEnqueueWhenReadingImageThenTaskLevelIsIncremented) { //this test case assumes IOQ auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = pCmdQ->taskCount + 100; csr.taskLevel = pCmdQ->taskLevel + 50; EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ, srcImage, CL_FALSE); EXPECT_EQ(csr.peekTaskCount(), pCmdQ->taskCount); EXPECT_EQ(csr.peekTaskLevel(), pCmdQ->taskLevel + 1); } HWTEST_F(EnqueueReadImageTest, WhenReadingImageThenTaskLevelIsIncremented) { auto taskLevelBefore = pCmdQ->taskLevel; EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ, srcImage, EnqueueReadImageTraits::blocking); EXPECT_GT(pCmdQ->taskLevel, taskLevelBefore); } HWTEST_F(EnqueueReadImageTest, WhenReadingImageThenCommandsAreAdded) { auto usedCmdBufferBefore = pCS->getUsed(); EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ, srcImage, EnqueueReadImageTraits::blocking); EXPECT_NE(usedCmdBufferBefore, pCS->getUsed()); } HWTEST_F(EnqueueReadImageTest, WhenReadingImageThenIndirectDataIsAdded) { auto dshBefore = pDSH->getUsed(); auto iohBefore = pIOH->getUsed(); auto sshBefore = pSSH->getUsed(); EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ, srcImage, EnqueueReadImageTraits::blocking); EXPECT_TRUE(UnitTestHelper::evaluateDshUsage(dshBefore, pDSH->getUsed(), nullptr, rootDeviceIndex)); EXPECT_NE(iohBefore, pIOH->getUsed()); EXPECT_NE(sshBefore, pSSH->getUsed()); } HWTEST_F(EnqueueReadImageTest, WhenReadingImageThenL3ProgrammingIsCorrect) { enqueueReadImage(); validateL3Programming(cmdList, itorWalker); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadImageTest, WhenEnqueueIsDoneThenStateBaseAddressIsProperlyProgrammed) { enqueueReadImage(); auto &ultCsr = this->pDevice->getUltCommandStreamReceiver(); auto &hwHelper = HwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily); validateStateBaseAddress(ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex, pIOH->getGraphicsAllocation()->isAllocatedInLocalMemoryPool()), ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex, !hwHelper.useSystemMemoryPlacementForISA(pDevice->getHardwareInfo())), pDSH, pIOH, pSSH, itorPipelineSelect, itorWalker, cmdList, 0llu); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadImageTest, WhenReadingImageThenMediaInterfaceDescriptorIsCorrect) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueReadImage(); // All state should be programmed before walker auto cmd = reinterpret_cast(cmdMediaInterfaceDescriptorLoad); ASSERT_NE(nullptr, cmd); // Verify we have a valid length -- multiple of INTERFACE_DESCRIPTOR_DATAs EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % sizeof(INTERFACE_DESCRIPTOR_DATA)); // Validate the start address size_t alignmentStartAddress = 64 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorDataStartAddress() % alignmentStartAddress); // Validate the length EXPECT_NE(0u, cmd->getInterfaceDescriptorTotalLength()); size_t alignmentTotalLength = 32 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % alignmentTotalLength); // Generically validate this command FamilyType::PARSE::template validateCommand(cmdList.begin(), itorMediaInterfaceDescriptorLoad); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadImageTest, WhenReadingImageThenInterfaceDescriptorData) { typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueReadImage(); // Extract the interfaceDescriptorData auto cmdSBA = (STATE_BASE_ADDRESS *)cmdStateBaseAddress; auto &interfaceDescriptorData = *(INTERFACE_DESCRIPTOR_DATA *)cmdInterfaceDescriptorData; // Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value. auto kernelStartPointer = ((uint64_t)interfaceDescriptorData.getKernelStartPointerHigh() << 32) + interfaceDescriptorData.getKernelStartPointer(); EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize); auto localWorkSize = 4u; auto simd = 32u; auto numThreadsPerThreadGroup = Math::divideAndRoundUp(localWorkSize, simd); EXPECT_EQ(numThreadsPerThreadGroup, interfaceDescriptorData.getNumberOfThreadsInGpgpuThreadGroup()); EXPECT_NE(0u, interfaceDescriptorData.getCrossThreadConstantDataReadLength()); EXPECT_NE(0u, interfaceDescriptorData.getConstantIndirectUrbEntryReadLength()); // We shouldn't have these pointers the same. EXPECT_NE(kernelStartPointer, interfaceDescriptorData.getBindingTablePointer()); } HWTEST_F(EnqueueReadImageTest, WhenReadingImageThenSurfaceStateIsCorrect) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; enqueueReadImage(); // BufferToImage kernel uses BTI=1 for destSurface uint32_t bindingTableIndex = 0; const auto &surfaceState = getSurfaceState(&pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), bindingTableIndex); // EnqueueReadImage uses multi-byte copies depending on per-pixel-size-in-bytes const auto &imageDesc = srcImage->getImageDesc(); EXPECT_EQ(imageDesc.image_width, surfaceState.getWidth()); EXPECT_EQ(imageDesc.image_height, surfaceState.getHeight()); EXPECT_NE(0u, surfaceState.getSurfacePitch()); EXPECT_NE(0u, surfaceState.getSurfaceType()); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_FORMAT_R32_UINT, surfaceState.getSurfaceFormat()); EXPECT_EQ(MockGmmResourceInfo::getHAlignSurfaceStateResult, surfaceState.getSurfaceHorizontalAlignment()); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4, surfaceState.getSurfaceVerticalAlignment()); EXPECT_EQ(srcAllocation->getGpuAddress(), surfaceState.getSurfaceBaseAddress()); } HWTEST_F(EnqueueReadImageTest, WhenReadingImageThenPipelineSelectIsProgrammed) { enqueueReadImage(); int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); EXPECT_EQ(1, numCommands); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadImageTest, WhenReadingImageThenMediaVfeStateIsCorrect) { enqueueReadImage(); validateMediaVFEState(&pDevice->getHardwareInfo(), cmdMediaVfeState, cmdList, itorMediaVfeState); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadImageTest, GivenBlockingEnqueueWhenReadingImageThenPipeControlWithDcFlushIsSetAfterWalker) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; bool blocking = true; enqueueReadImage(blocking); auto itorWalker = find(cmdList.begin(), cmdList.end()); auto itorCmd = find(itorWalker, cmdList.end()); auto *cmd = (PIPE_CONTROL *)*itorCmd; EXPECT_NE(cmdList.end(), itorCmd); if (UnitTestHelper::isPipeControlWArequired(pDevice->getHardwareInfo())) { // SKL: two PIPE_CONTROLs following GPGPU_WALKER: first has DcFlush and second has Write HwTag EXPECT_FALSE(cmd->getDcFlushEnable()); // Move to next PPC auto itorCmdP = ++((GenCmdList::iterator)itorCmd); EXPECT_NE(cmdList.end(), itorCmdP); auto itorCmd2 = find(itorCmdP, cmdList.end()); cmd = (PIPE_CONTROL *)*itorCmd2; EXPECT_TRUE(cmd->getDcFlushEnable()); } else { // BDW: single PIPE_CONTROL following GPGPU_WALKER has DcFlush and Write HwTag EXPECT_TRUE(cmd->getDcFlushEnable()); } } HWTEST_F(EnqueueReadImageTest, GivenImage1DarrayWhenReadImageIsCalledThenHostPtrSizeIsCalculatedProperly) { std::unique_ptr srcImage(Image1dArrayHelper<>::create(context)); auto &imageDesc = srcImage->getImageDesc(); auto imageSize = imageDesc.image_width * imageDesc.image_array_size * 4; size_t origin[] = {0, 0, 0}; size_t region[] = {imageDesc.image_width, imageDesc.image_array_size, 1}; EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ, srcImage.get(), CL_FALSE, origin, region); auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto temporaryAllocation = csr.getTemporaryAllocations().peekHead(); ASSERT_NE(nullptr, temporaryAllocation); EXPECT_EQ(temporaryAllocation->getUnderlyingBufferSize(), imageSize); } HWTEST_F(EnqueueReadImageTest, GivenImage1DarrayWhenReadImageIsCalledThenRowPitchIsSetToSlicePitch) { auto builtIns = new MockBuiltins(); pCmdQ->getDevice().getExecutionEnvironment()->rootDeviceEnvironments[pCmdQ->getDevice().getRootDeviceIndex()]->builtins.reset(builtIns); EBuiltInOps::Type copyBuiltIn = EBuiltInOps::CopyImage3dToBuffer; auto &origBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder( copyBuiltIn, pCmdQ->getClDevice()); // substitute original builder with mock builder auto oldBuilder = pClExecutionEnvironment->setBuiltinDispatchInfoBuilder( rootDeviceIndex, copyBuiltIn, std::unique_ptr(new MockBuiltinDispatchInfoBuilder(*builtIns, pCmdQ->getClDevice(), &origBuilder))); std::unique_ptr srcImage(Image1dArrayHelper<>::create(context)); auto &imageDesc = srcImage->getImageDesc(); size_t origin[] = {0, 0, 0}; size_t region[] = {imageDesc.image_width, imageDesc.image_array_size, 1}; size_t rowPitch = 64; size_t slicePitch = 128; EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ, srcImage.get(), CL_TRUE, origin, region, rowPitch, slicePitch); auto &mockBuilder = static_cast(BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(copyBuiltIn, pCmdQ->getClDevice())); auto params = mockBuilder.getBuiltinOpParams(); EXPECT_EQ(params->dstRowPitch, slicePitch); // restore original builder and retrieve mock builder auto newBuilder = pClExecutionEnvironment->setBuiltinDispatchInfoBuilder( rootDeviceIndex, copyBuiltIn, std::move(oldBuilder)); EXPECT_NE(nullptr, newBuilder); } HWTEST_F(EnqueueReadImageTest, GivenImage2DarrayWhenReadImageIsCalledThenHostPtrSizeIsCalculatedProperly) { std::unique_ptr srcImage(Image2dArrayHelper<>::create(context)); auto &imageDesc = srcImage->getImageDesc(); auto imageSize = imageDesc.image_width * imageDesc.image_height * imageDesc.image_array_size * 4; size_t origin[] = {0, 0, 0}; size_t region[] = {imageDesc.image_width, imageDesc.image_height, imageDesc.image_array_size}; EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ, srcImage.get(), CL_FALSE, origin, region); auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto temporaryAllocation = csr.getTemporaryAllocations().peekHead(); ASSERT_NE(nullptr, temporaryAllocation); EXPECT_EQ(temporaryAllocation->getUnderlyingBufferSize(), imageSize); } HWTEST_F(EnqueueReadImageTest, GivenImage1DAndImageShareTheSameStorageWithHostPtrWhenReadReadImageIsCalledThenImageIsNotRead) { cl_int retVal = CL_SUCCESS; std::unique_ptr dstImage2(Image1dHelper<>::create(context)); auto &imageDesc = dstImage2->getImageDesc(); std::unique_ptr pCmdOOQ(createCommandQueue(pClDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)); size_t origin[] = {0, 0, 0}; size_t region[] = {imageDesc.image_width, 1, 1}; void *ptr = dstImage2->getCpuAddressForMemoryTransfer(); size_t rowPitch = dstImage2->getHostPtrRowPitch(); size_t slicePitch = dstImage2->getHostPtrSlicePitch(); retVal = pCmdOOQ->enqueueReadImage(dstImage2.get(), CL_FALSE, origin, region, rowPitch, slicePitch, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdOOQ->taskLevel, 0u); } HWTEST_F(EnqueueReadImageTest, GivenImage1DArrayAndImageShareTheSameStorageWithHostPtrWhenReadReadImageIsCalledThenImageIsNotRead) { cl_int retVal = CL_SUCCESS; std::unique_ptr dstImage2(Image1dArrayHelper<>::create(context)); auto &imageDesc = dstImage2->getImageDesc(); size_t origin[] = {imageDesc.image_width / 2, imageDesc.image_array_size / 2, 0}; size_t region[] = {imageDesc.image_width - (imageDesc.image_width / 2), imageDesc.image_array_size - (imageDesc.image_array_size / 2), 1}; void *ptr = dstImage2->getCpuAddressForMemoryTransfer(); auto bytesPerPixel = 4; size_t rowPitch = dstImage2->getHostPtrRowPitch(); size_t slicePitch = dstImage2->getHostPtrSlicePitch(); auto pOffset = origin[2] * rowPitch + origin[1] * slicePitch + origin[0] * bytesPerPixel; void *ptrStorage = ptrOffset(ptr, pOffset); retVal = pCmdQ->enqueueReadImage(dstImage2.get(), CL_FALSE, origin, region, rowPitch, slicePitch, ptrStorage, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 0u); } HWTEST_F(EnqueueReadImageTest, GivenSharedContextZeroCopy2DImageWhenEnqueueReadImageWithMappedPointerIsCalledThenImageIsNotRead) { cl_int retVal = CL_SUCCESS; context->isSharedContext = true; std::unique_ptr dstImage(ImageHelper>::create(context)); EXPECT_TRUE(dstImage->isMemObjZeroCopy()); auto &imageDesc = dstImage->getImageDesc(); size_t origin[] = {0, 0, 0}; size_t region[] = {imageDesc.image_width, imageDesc.image_height, 1}; void *ptr = dstImage->getCpuAddressForMemoryTransfer(); size_t rowPitch = dstImage->getHostPtrRowPitch(); size_t slicePitch = dstImage->getHostPtrSlicePitch(); retVal = pCmdQ->enqueueReadImage(dstImage.get(), CL_FALSE, origin, region, rowPitch, slicePitch, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 0u); } HWTEST_F(EnqueueReadImageTest, GivenImage1DThatIsZeroCopyWhenReadImageWithTheSamePointerAndOutputEventIsPassedThenEventHasCorrectCommandTypeSet) { cl_int retVal = CL_SUCCESS; std::unique_ptr dstImage(Image1dHelper<>::create(context)); auto &imageDesc = dstImage->getImageDesc(); size_t origin[] = {0, 0, 0}; size_t region[] = {imageDesc.image_width, imageDesc.image_height, 1}; void *ptr = dstImage->getCpuAddressForMemoryTransfer(); size_t rowPitch = dstImage->getHostPtrRowPitch(); size_t slicePitch = dstImage->getHostPtrSlicePitch(); cl_uint numEventsInWaitList = 0; cl_event event = nullptr; retVal = pCmdQ->enqueueReadImage(dstImage.get(), CL_FALSE, origin, region, rowPitch, slicePitch, ptr, nullptr, numEventsInWaitList, nullptr, &event); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = static_cast(event); EXPECT_EQ(static_cast(CL_COMMAND_READ_IMAGE), pEvent->getCommandType()); pEvent->release(); } struct EnqueueReadImageTestWithBcs : EnqueueReadImageTest { void SetUp() override { VariableBackup backupHwInfo(defaultHwInfo.get()); defaultHwInfo->capabilityTable.blitterOperationsSupported = true; EnqueueReadImageTest::SetUp(); } }; HWTEST_F(EnqueueReadImageTest, givenCommandQueueWhenEnqueueReadImageIsCalledThenItCallsNotifyFunction) { auto mockCmdQ = std::make_unique>(context, pClDevice, nullptr); std::unique_ptr srcImage(Image2dArrayHelper<>::create(context)); auto &imageDesc = srcImage->getImageDesc(); size_t origin[] = {0, 0, 0}; size_t region[] = {imageDesc.image_width, imageDesc.image_height, imageDesc.image_array_size}; EnqueueReadImageHelper<>::enqueueReadImage(mockCmdQ.get(), srcImage.get(), CL_TRUE, origin, region); EXPECT_TRUE(mockCmdQ->notifyEnqueueReadImageCalled); } HWTEST_F(EnqueueReadImageTest, givenCommandQueueWhenEnqueueReadImageWithMapAllocationIsCalledThenItDoesntCallNotifyFunction) { auto mockCmdQ = std::make_unique>(context, pClDevice, nullptr); std::unique_ptr srcImage(Image2dArrayHelper<>::create(context)); auto &imageDesc = srcImage->getImageDesc(); size_t origin[] = {0, 0, 0}; size_t region[] = {imageDesc.image_width, imageDesc.image_height, imageDesc.image_array_size}; size_t rowPitch = srcImage->getHostPtrRowPitch(); size_t slicePitch = srcImage->getHostPtrSlicePitch(); GraphicsAllocation mapAllocation{0, AllocationType::UNKNOWN, nullptr, 0, 0, 0, MemoryPool::MemoryNull}; EnqueueReadImageHelper<>::enqueueReadImage(mockCmdQ.get(), srcImage.get(), CL_TRUE, origin, region, rowPitch, slicePitch, dstPtr, &mapAllocation); EXPECT_FALSE(mockCmdQ->notifyEnqueueReadImageCalled); } HWTEST_F(EnqueueReadImageTest, givenEnqueueReadImageBlockingWhenAUBDumpAllocsOnEnqueueReadOnlyIsOnThenImageShouldBeSetDumpable) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AUBDumpAllocsOnEnqueueReadOnly.set(true); std::unique_ptr srcImage(Image2dArrayHelper<>::create(context)); srcAllocation = srcImage->getGraphicsAllocation(pClDevice->getRootDeviceIndex()); auto &imageDesc = srcImage->getImageDesc(); size_t origin[] = {0, 0, 0}; size_t region[] = {imageDesc.image_width, imageDesc.image_height, imageDesc.image_array_size}; ASSERT_FALSE(srcAllocation->isAllocDumpable()); EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ, srcImage.get(), CL_TRUE, origin, region); EXPECT_TRUE(srcAllocation->isAllocDumpable()); } HWTEST_F(EnqueueReadImageTest, givenEnqueueReadImageNonBlockingWhenAUBDumpAllocsOnEnqueueReadOnlyIsOnThenImageShouldntBeSetDumpable) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AUBDumpAllocsOnEnqueueReadOnly.set(true); std::unique_ptr srcImage(Image2dArrayHelper<>::create(context)); srcAllocation = srcImage->getGraphicsAllocation(pClDevice->getRootDeviceIndex()); auto &imageDesc = srcImage->getImageDesc(); size_t origin[] = {0, 0, 0}; size_t region[] = {imageDesc.image_width, imageDesc.image_height, imageDesc.image_array_size}; ASSERT_FALSE(srcAllocation->isAllocDumpable()); EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ, srcImage.get(), CL_FALSE, origin, region); EXPECT_FALSE(srcAllocation->isAllocDumpable()); } typedef EnqueueReadImageMipMapTest MipMapReadImageTest; HWTEST_P(MipMapReadImageTest, GivenImageWithMipLevelNonZeroWhenReadImageIsCalledThenProperMipLevelIsSet) { auto builtIns = new MockBuiltins(); pCmdQ->getDevice().getExecutionEnvironment()->rootDeviceEnvironments[pCmdQ->getDevice().getRootDeviceIndex()]->builtins.reset(builtIns); auto image_type = (cl_mem_object_type)GetParam(); auto &origBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder( EBuiltInOps::CopyImage3dToBuffer, pCmdQ->getClDevice()); // substitute original builder with mock builder auto oldBuilder = pClExecutionEnvironment->setBuiltinDispatchInfoBuilder( rootDeviceIndex, EBuiltInOps::CopyImage3dToBuffer, std::unique_ptr(new MockBuiltinDispatchInfoBuilder(*builtIns, pCmdQ->getClDevice(), &origBuilder))); cl_int retVal = CL_SUCCESS; cl_image_desc imageDesc = {}; uint32_t expectedMipLevel = 3; imageDesc.image_type = image_type; imageDesc.num_mip_levels = 10; imageDesc.image_width = 4; imageDesc.image_height = 1; imageDesc.image_depth = 1; size_t origin[] = {0, 0, 0, 0}; size_t region[] = {imageDesc.image_width, 1, 1}; std::unique_ptr image; switch (image_type) { case CL_MEM_OBJECT_IMAGE1D: origin[1] = expectedMipLevel; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; case CL_MEM_OBJECT_IMAGE1D_ARRAY: imageDesc.image_array_size = 2; origin[2] = expectedMipLevel; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; case CL_MEM_OBJECT_IMAGE2D: origin[2] = expectedMipLevel; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; case CL_MEM_OBJECT_IMAGE2D_ARRAY: imageDesc.image_array_size = 2; origin[3] = expectedMipLevel; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; case CL_MEM_OBJECT_IMAGE3D: origin[3] = expectedMipLevel; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; } EXPECT_NE(nullptr, image.get()); std::unique_ptr ptr = std::unique_ptr(new uint32_t[3]); retVal = pCmdQ->enqueueReadImage(image.get(), CL_FALSE, origin, region, 0, 0, ptr.get(), nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); auto &mockBuilder = static_cast(BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyImage3dToBuffer, pCmdQ->getClDevice())); auto params = mockBuilder.getBuiltinOpParams(); EXPECT_EQ(expectedMipLevel, params->srcMipLevel); // restore original builder and retrieve mock builder auto newBuilder = pClExecutionEnvironment->setBuiltinDispatchInfoBuilder( rootDeviceIndex, EBuiltInOps::CopyImage3dToBuffer, std::move(oldBuilder)); EXPECT_NE(nullptr, newBuilder); } INSTANTIATE_TEST_CASE_P(MipMapReadImageTest_GivenImageWithMipLevelNonZeroWhenWriteImageIsCalledThenProperMipLevelIsSet, MipMapReadImageTest, ::testing::Values(CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE2D_ARRAY, CL_MEM_OBJECT_IMAGE3D)); using NegativeFailAllocationTest = Test; HWTEST_F(NegativeFailAllocationTest, givenEnqueueWriteImageWhenHostPtrAllocationCreationFailsThenReturnOutOfResource) { cl_int retVal = CL_SUCCESS; auto &imageDesc = image->getImageDesc(); size_t origin[] = {0, 0, 0}; size_t region[] = {imageDesc.image_width, imageDesc.image_height, 1}; size_t rowPitch = image->getHostPtrRowPitch(); size_t slicePitch = image->getHostPtrSlicePitch(); retVal = pCmdQ->enqueueWriteImage(image.get(), CL_FALSE, origin, region, rowPitch, slicePitch, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal); } using OneMipLevelReadImageTests = Test; HWTEST_F(OneMipLevelReadImageTests, GivenNotMippedImageWhenReadingImageThenDoNotProgramSourceMipLevel) { auto queue = createQueue(); auto retVal = queue->enqueueReadImage( image.get(), CL_TRUE, origin, region, 0, 0, cpuPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(builtinOpsParamsCaptured); EXPECT_EQ(0u, usedBuiltinOpsParams.srcMipLevel); } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_resource_barier_tests.cpp000066400000000000000000000077571422164147700333210ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/command_queue/resource_barrier.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" using namespace NEO; using ResourceBarrierTest = Test; HWTEST_F(ResourceBarrierTest, givenNullArgsAndHWCommandQueueWhenEnqueueResourceBarrierCalledThenCorrectStatusReturned) { cl_resource_barrier_descriptor_intel descriptor{}; auto retVal = CL_INVALID_VALUE; size_t bufferSize = MemoryConstants::pageSize; std::unique_ptr buffer(Buffer::create( &pCmdQ->getContext(), CL_MEM_READ_WRITE, bufferSize, nullptr, retVal)); descriptor.mem_object = buffer.get(); descriptor.svm_allocation_pointer = nullptr; BarrierCommand barrierCommand(pCmdQ, &descriptor, 1); auto surface = reinterpret_cast(barrierCommand.surfacePtrs.begin()[0]); EXPECT_EQ(surface->getGraphicsAllocation(), buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())); retVal = pCmdQ->enqueueResourceBarrier( &barrierCommand, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(ResourceBarrierTest, whenEnqueueResourceBarrierCalledThenUpdateQueueCompletionStamp) { auto retVal = CL_INVALID_VALUE; size_t bufferSize = MemoryConstants::pageSize; std::unique_ptr buffer(Buffer::create(&pCmdQ->getContext(), CL_MEM_READ_WRITE, bufferSize, nullptr, retVal)); ASSERT_EQ(CL_SUCCESS, retVal); cl_resource_barrier_descriptor_intel descriptor{}; descriptor.mem_object = buffer.get(); descriptor.svm_allocation_pointer = nullptr; BarrierCommand barrierCommand(pCmdQ, &descriptor, 1); auto previousTaskCount = pCmdQ->taskCount; auto previousTaskLevel = pCmdQ->taskLevel; const auto enqueueResult = pCmdQ->enqueueResourceBarrier(&barrierCommand, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, enqueueResult); bool resourceBarrierSupported = pCmdQ->isCacheFlushCommand(CL_COMMAND_RESOURCE_BARRIER); if (resourceBarrierSupported) { EXPECT_EQ(pCmdQ->taskCount, previousTaskCount + 1); } else { EXPECT_EQ(pCmdQ->taskCount, previousTaskCount); } EXPECT_EQ(pCmdQ->taskLevel, previousTaskLevel); } HWTEST_F(ResourceBarrierTest, GivenGpuHangAndBlockingCallsWhenEnqueueResourceBarrierIsCalledThenOutOfResourcesIsReturned) { DebugManagerStateRestore stateRestore; DebugManager.flags.MakeEachEnqueueBlocking.set(true); std::unique_ptr device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment(nullptr)}); cl_queue_properties props = {}; MockCommandQueueHw mockCommandQueueHw(context, device.get(), &props); mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang; auto retVal = CL_INVALID_VALUE; size_t bufferSize = MemoryConstants::pageSize; std::unique_ptr buffer(Buffer::create(&mockCommandQueueHw.getContext(), CL_MEM_READ_WRITE, bufferSize, nullptr, retVal)); ASSERT_EQ(CL_SUCCESS, retVal); cl_resource_barrier_descriptor_intel descriptor{}; descriptor.mem_object = buffer.get(); descriptor.svm_allocation_pointer = nullptr; BarrierCommand barrierCommand(&mockCommandQueueHw, &descriptor, 1); const auto enqueueResult = mockCommandQueueHw.enqueueResourceBarrier(&barrierCommand, 0, nullptr, nullptr); EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueResult); EXPECT_EQ(1, mockCommandQueueHw.waitForAllEnginesCalledCount); } HWTEST_F(ResourceBarrierTest, whenBarierCommandCreatedWithInvalidSvmPointerThenExceptionIsThrown) { cl_resource_barrier_descriptor_intel descriptor{}; descriptor.svm_allocation_pointer = nullptr; EXPECT_THROW(BarrierCommand barrierCommand(pCmdQ, &descriptor, 1), std::exception); } enqueue_resource_barier_tests_xehp_and_later.cpp000066400000000000000000000112711422164147700362610ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/os_context.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/mocks/mock_timestamp_container.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/command_queue/resource_barrier.h" #include "opencl/source/event/event_builder.h" #include "opencl/source/event/user_event.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/memory_manager/resource_surface.h" #include "opencl/test/unit_test/fixtures/enqueue_handler_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" namespace NEO { template class MockCommandQueueWithCacheFlush : public MockCommandQueueHw { using MockCommandQueueHw::MockCommandQueueHw; public: bool isCacheFlushCommand(uint32_t commandType) const override { return commandRequireCacheFlush; } bool commandRequireCacheFlush = false; }; using EnqueueResourceBarrierTestXeHpCoreAndLater = EnqueueHandlerTest; HWCMDTEST_F(IGFX_XE_HP_CORE, EnqueueResourceBarrierTestXeHpCoreAndLater, GivenCommandStreamWithoutKernelAndTimestampPacketEnabledWhenEnqueuedResourceBarrierWithEventThenTimestampAddedToEvent) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableTimestampPacket.set(1); pDevice->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; std::unique_ptr> mockCmdQ(new MockCommandQueueWithCacheFlush(context, pClDevice, 0)); mockCmdQ->commandRequireCacheFlush = true; auto retVal = CL_INVALID_VALUE; size_t bufferSize = MemoryConstants::pageSize; std::unique_ptr buffer(Buffer::create( context, CL_MEM_READ_WRITE, bufferSize, nullptr, retVal)); auto allocation = buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex()); std::unique_ptr surface(new ResourceSurface(allocation, CL_RESOURCE_BARRIER_TYPE_RELEASE, CL_MEMORY_SCOPE_DEVICE)); MockTimestampPacketContainer timestamp1(*pDevice->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); Event event1(mockCmdQ.get(), 0, 0, 0); cl_event event2; event1.addTimestampPacketNodes(timestamp1); cl_event waitlist[] = {&event1}; cl_resource_barrier_descriptor_intel descriptor{}; descriptor.mem_object = buffer.get(); descriptor.svm_allocation_pointer = nullptr; BarrierCommand barrierCommand(mockCmdQ.get(), &descriptor, 1); retVal = mockCmdQ->enqueueResourceBarrier( &barrierCommand, 1, waitlist, &event2); auto eventObj = castToObjectOrAbort(event2); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(eventObj->getTimestampPacketNodes()->peekNodes().size(), 1u); eventObj->release(); } HWCMDTEST_F(IGFX_XE_HP_CORE, EnqueueResourceBarrierTestXeHpCoreAndLater, GivenCommandStreamWithoutKernelAndTimestampPacketDisabledWhenEnqueuedResourceBarrierWithEventThenTimestampNotAddedToEvent) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableTimestampPacket.set(0); static_cast *>(&pDevice->getGpgpuCommandStreamReceiver())->timestampPacketWriteEnabled = false; std::unique_ptr> mockCmdQ(new MockCommandQueueWithCacheFlush(context, pClDevice, 0)); mockCmdQ->commandRequireCacheFlush = true; mockCmdQ->timestampPacketContainer.reset(); auto retVal = CL_INVALID_VALUE; size_t bufferSize = MemoryConstants::pageSize; std::unique_ptr buffer(Buffer::create( context, CL_MEM_READ_WRITE, bufferSize, nullptr, retVal)); auto allocation = buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex()); std::unique_ptr surface(new ResourceSurface(allocation, CL_RESOURCE_BARRIER_TYPE_RELEASE, CL_MEMORY_SCOPE_DEVICE)); Event event1(mockCmdQ.get(), 0, 0, 0); cl_event event2; cl_event waitlist[] = {&event1}; cl_resource_barrier_descriptor_intel descriptor{}; descriptor.mem_object = buffer.get(); descriptor.svm_allocation_pointer = nullptr; BarrierCommand barrierCommand(mockCmdQ.get(), &descriptor, 1); retVal = mockCmdQ->enqueueResourceBarrier( &barrierCommand, 1, waitlist, &event2); auto eventObj = castToObjectOrAbort(event2); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, eventObj->getTimestampPacketNodes()); eventObj->release(); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_svm_mem_copy_tests.cpp000066400000000000000000000440141422164147700326260ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/engine_node_helper.h" #include "shared/source/memory_manager/allocations_list.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/mocks/mock_builtins.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_builtin_dispatch_info_builder.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" using namespace NEO; struct EnqueueSvmMemCopyTest : public ClDeviceFixture, public CommandQueueHwFixture, public ::testing::Test { typedef CommandQueueHwFixture CommandQueueFixture; EnqueueSvmMemCopyTest() { } void SetUp() override { ClDeviceFixture::SetUp(); if (!pDevice->isFullRangeSvm()) { return; } CommandQueueFixture::SetUp(pClDevice, 0); srcSvmPtr = context->getSVMAllocsManager()->createSVMAlloc(256, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); ASSERT_NE(nullptr, srcSvmPtr); dstSvmPtr = context->getSVMAllocsManager()->createSVMAlloc(256, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); ASSERT_NE(nullptr, dstSvmPtr); auto srcSvmData = context->getSVMAllocsManager()->getSVMAlloc(srcSvmPtr); ASSERT_NE(nullptr, srcSvmData); srcSvmAlloc = srcSvmData->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex()); ASSERT_NE(nullptr, srcSvmAlloc); auto dstSvmData = context->getSVMAllocsManager()->getSVMAlloc(dstSvmPtr); ASSERT_NE(nullptr, dstSvmData); dstSvmAlloc = dstSvmData->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex()); ASSERT_NE(nullptr, dstSvmAlloc); } void TearDown() override { if (pDevice->isFullRangeSvm()) { context->getSVMAllocsManager()->freeSVMAlloc(srcSvmPtr); context->getSVMAllocsManager()->freeSVMAlloc(dstSvmPtr); CommandQueueFixture::TearDown(); } ClDeviceFixture::TearDown(); } void *srcSvmPtr = nullptr; void *dstSvmPtr = nullptr; GraphicsAllocation *srcSvmAlloc = nullptr; GraphicsAllocation *dstSvmAlloc = nullptr; }; HWTEST_F(EnqueueSvmMemCopyTest, givenEnqueueSVMMemcpyWhenUsingCopyBufferToBufferBuilderThenItConfiguredWithBuiltinOpsAndProducesDispatchInfo) { if (!pDevice->isFullRangeSvm()) { return; } auto builtIns = new MockBuiltins(); pCmdQ->getDevice().getExecutionEnvironment()->rootDeviceEnvironments[pCmdQ->getDevice().getRootDeviceIndex()]->builtins.reset(builtIns); // retrieve original builder auto &origBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder( EBuiltInOps::CopyBufferToBuffer, pCmdQ->getClDevice()); ASSERT_NE(nullptr, &origBuilder); // substitute original builder with mock builder auto oldBuilder = pClExecutionEnvironment->setBuiltinDispatchInfoBuilder( rootDeviceIndex, EBuiltInOps::CopyBufferToBuffer, std::unique_ptr(new MockBuiltinDispatchInfoBuilder(*builtIns, pCmdQ->getClDevice(), &origBuilder))); EXPECT_EQ(&origBuilder, oldBuilder.get()); // call enqueue on mock builder auto retVal = pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy dstSvmPtr, // void *dst_ptr srcSvmPtr, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_event *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); // restore original builder and retrieve mock builder auto newBuilder = pClExecutionEnvironment->setBuiltinDispatchInfoBuilder( rootDeviceIndex, EBuiltInOps::CopyBufferToBuffer, std::move(oldBuilder)); EXPECT_NE(nullptr, newBuilder); // check if original builder is restored correctly auto &restoredBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder( EBuiltInOps::CopyBufferToBuffer, pCmdQ->getClDevice()); EXPECT_EQ(&origBuilder, &restoredBuilder); // use mock builder to validate builder's input / output auto mockBuilder = static_cast(newBuilder.get()); // validate builder's input - builtin ops auto params = mockBuilder->getBuiltinOpParams(); EXPECT_EQ(srcSvmPtr, params->srcPtr); EXPECT_EQ(dstSvmPtr, params->dstPtr); EXPECT_EQ(nullptr, params->srcMemObj); EXPECT_EQ(nullptr, params->dstMemObj); EXPECT_EQ(srcSvmAlloc, params->srcSvmAlloc); EXPECT_EQ(dstSvmAlloc, params->dstSvmAlloc); EXPECT_EQ(Vec3(0, 0, 0), params->srcOffset); EXPECT_EQ(Vec3(0, 0, 0), params->dstOffset); EXPECT_EQ(Vec3(256, 0, 0), params->size); // validate builder's output - multi dispatch info auto mdi = mockBuilder->getMultiDispatchInfo(); EXPECT_EQ(1u, mdi->size()); auto di = mdi->begin(); size_t middleElSize = 4 * sizeof(uint32_t); EXPECT_EQ(Vec3(256 / middleElSize, 1, 1), di->getGWS()); auto kernel = mdi->begin()->getKernel(); EXPECT_EQ("CopyBufferToBufferMiddle", kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName); } HWTEST_F(EnqueueSvmMemCopyTest, givenEnqueueSVMMemcpyWhenUsingCopyBufferToBufferBuilderAndSrcHostPtrThenItConfiguredWithBuiltinOpsAndProducesDispatchInfo) { if (!pDevice->isFullRangeSvm()) { return; } auto builtIns = new MockBuiltins(); pCmdQ->getDevice().getExecutionEnvironment()->rootDeviceEnvironments[pCmdQ->getDevice().getRootDeviceIndex()]->builtins.reset(builtIns); void *srcHostPtr = alignedMalloc(512, 64); size_t hostPtrOffset = 2; // retrieve original builder auto &origBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder( EBuiltInOps::CopyBufferToBuffer, pCmdQ->getClDevice()); ASSERT_NE(nullptr, &origBuilder); // substitute original builder with mock builder auto oldBuilder = pClExecutionEnvironment->setBuiltinDispatchInfoBuilder( rootDeviceIndex, EBuiltInOps::CopyBufferToBuffer, std::unique_ptr(new MockBuiltinDispatchInfoBuilder(*builtIns, pCmdQ->getClDevice(), &origBuilder))); EXPECT_EQ(&origBuilder, oldBuilder.get()); // call enqueue on mock builder auto retVal = pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy dstSvmPtr, // void *dst_ptr ptrOffset(srcHostPtr, hostPtrOffset), // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_event *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); // restore original builder and retrieve mock builder auto newBuilder = pClExecutionEnvironment->setBuiltinDispatchInfoBuilder( rootDeviceIndex, EBuiltInOps::CopyBufferToBuffer, std::move(oldBuilder)); EXPECT_NE(nullptr, newBuilder); // check if original builder is restored correctly auto &restoredBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder( EBuiltInOps::CopyBufferToBuffer, pCmdQ->getClDevice()); EXPECT_EQ(&origBuilder, &restoredBuilder); auto &ultCsr = pDevice->getUltCommandStreamReceiver(); GraphicsAllocation *srcSvmAlloc = nullptr; auto head = ultCsr.getTemporaryAllocations().peekHead(); while (head) { if (ptrOffset(srcHostPtr, hostPtrOffset) == head->getUnderlyingBuffer()) { srcSvmAlloc = head; break; } head = head->next; } EXPECT_NE(nullptr, srcSvmAlloc); // use mock builder to validate builder's input / output auto mockBuilder = static_cast(newBuilder.get()); // validate builder's input - builtin ops auto params = mockBuilder->getBuiltinOpParams(); EXPECT_EQ(alignDown(srcSvmAlloc->getGpuAddress(), 4), castToUint64(params->srcPtr)); EXPECT_EQ(dstSvmPtr, params->dstPtr); EXPECT_EQ(nullptr, params->srcMemObj); EXPECT_EQ(nullptr, params->dstMemObj); EXPECT_EQ(srcSvmAlloc, params->srcSvmAlloc); EXPECT_EQ(dstSvmAlloc, params->dstSvmAlloc); EXPECT_EQ(Vec3(2, 0, 0), params->srcOffset); EXPECT_EQ(Vec3(0, 0, 0), params->dstOffset); EXPECT_EQ(Vec3(256, 0, 0), params->size); alignedFree(srcHostPtr); } HWTEST_F(EnqueueSvmMemCopyTest, givenEnqueueSVMMemcpyWhenUsingCopyBufferToBufferBuilderAndDstHostPtrThenItConfiguredWithBuiltinOpsAndProducesDispatchInfo) { if (!pDevice->isFullRangeSvm()) { return; } auto builtIns = new MockBuiltins(); pCmdQ->getDevice().getExecutionEnvironment()->rootDeviceEnvironments[pCmdQ->getDevice().getRootDeviceIndex()]->builtins.reset(builtIns); auto dstHostPtr = alignedMalloc(512, 64); size_t hostPtrOffset = 2; // retrieve original builder auto &origBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder( EBuiltInOps::CopyBufferToBuffer, pCmdQ->getClDevice()); ASSERT_NE(nullptr, &origBuilder); // substitute original builder with mock builder auto oldBuilder = pClExecutionEnvironment->setBuiltinDispatchInfoBuilder( rootDeviceIndex, EBuiltInOps::CopyBufferToBuffer, std::unique_ptr(new MockBuiltinDispatchInfoBuilder(*builtIns, pCmdQ->getClDevice(), &origBuilder))); EXPECT_EQ(&origBuilder, oldBuilder.get()); // call enqueue on mock builder auto retVal = pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy ptrOffset(dstHostPtr, hostPtrOffset), // void *dst_ptr srcSvmPtr, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_event *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); // restore original builder and retrieve mock builder auto newBuilder = pClExecutionEnvironment->setBuiltinDispatchInfoBuilder( rootDeviceIndex, EBuiltInOps::CopyBufferToBuffer, std::move(oldBuilder)); EXPECT_NE(nullptr, newBuilder); // check if original builder is restored correctly auto &restoredBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder( EBuiltInOps::CopyBufferToBuffer, pCmdQ->getClDevice()); EXPECT_EQ(&origBuilder, &restoredBuilder); auto &ultCsr = pDevice->getUltCommandStreamReceiver(); GraphicsAllocation *dstSvmAlloc = nullptr; auto head = ultCsr.getTemporaryAllocations().peekHead(); while (head) { if (ptrOffset(dstHostPtr, hostPtrOffset) == head->getUnderlyingBuffer()) { dstSvmAlloc = head; break; } head = head->next; } EXPECT_NE(nullptr, dstSvmAlloc); // use mock builder to validate builder's input / output auto mockBuilder = static_cast(newBuilder.get()); // validate builder's input - builtin ops auto params = mockBuilder->getBuiltinOpParams(); EXPECT_EQ(srcSvmPtr, params->srcPtr); EXPECT_EQ(alignDown(dstSvmAlloc->getGpuAddress(), 4), castToUint64(params->dstPtr)); EXPECT_EQ(nullptr, params->srcMemObj); EXPECT_EQ(nullptr, params->dstMemObj); EXPECT_EQ(srcSvmAlloc, params->srcSvmAlloc); EXPECT_EQ(dstSvmAlloc, params->dstSvmAlloc); EXPECT_EQ(Vec3(0, 0, 0), params->srcOffset); EXPECT_EQ(Vec3(2, 0, 0), params->dstOffset); EXPECT_EQ(Vec3(256, 0, 0), params->size); alignedFree(dstHostPtr); } HWTEST_F(EnqueueSvmMemCopyTest, givenCommandQueueWhenEnqueueSVMMemcpyIsCalledThenSetAllocDumpable) { if (!pDevice->isFullRangeSvm()) { return; } DebugManagerStateRestore dbgRestore; DebugManager.flags.AUBDumpAllocsOnEnqueueSVMMemcpyOnly.set(true); DebugManager.flags.AUBDumpBufferFormat.set("BIN"); auto dstHostPtr = alignedMalloc(256, 64); EXPECT_FALSE(srcSvmAlloc->isAllocDumpable()); auto retVal = pCmdQ->enqueueSVMMemcpy( CL_TRUE, // cl_bool blocking_copy dstHostPtr, // void *dst_ptr srcSvmPtr, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_event *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(srcSvmAlloc->isAllocDumpable()); alignedFree(dstHostPtr); } HWTEST_F(EnqueueSvmMemCopyTest, givenCommandQueueWhenEnqueueSVMMemcpyIsCalledThenItCallsNotifyFunction) { if (!pDevice->isFullRangeSvm()) { return; } auto mockCmdQ = std::make_unique>(context, pClDevice, nullptr); auto dstHostPtr = alignedMalloc(256, 64); auto retVal = mockCmdQ->enqueueSVMMemcpy( CL_TRUE, // cl_bool blocking_copy dstHostPtr, // void *dst_ptr srcSvmPtr, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_event *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(mockCmdQ->notifyEnqueueSVMMemcpyCalled); MultiGraphicsAllocation &srcSvmAlloc = context->getSVMAllocsManager()->getSVMAlloc(srcSvmPtr)->gpuAllocations; CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, &srcSvmAlloc, {}, 0, nullptr}; CommandStreamReceiver &csr = mockCmdQ->selectCsrForBuiltinOperation(csrSelectionArgs); EXPECT_EQ(EngineHelpers::isBcs(csr.getOsContext().getEngineType()), mockCmdQ->useBcsCsrOnNotifyEnabled); alignedFree(dstHostPtr); } HWTEST_F(EnqueueSvmMemCopyTest, givenConstHostMemoryAsSourceWhenEnqueueSVMMemcpyThenCpuCopyIsAllowed) { if (!pDevice->isFullRangeSvm()) { GTEST_SKIP(); } constexpr double srcConstHostPtr[] = {42.0}; auto mockCmdQ = std::make_unique>(context, pClDevice, nullptr); auto retVal = mockCmdQ->enqueueSVMMemcpy( CL_TRUE, // cl_bool blocking_copy dstSvmPtr, // void *dst_ptr srcConstHostPtr, // const void *src_ptr sizeof(double), // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_event *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); auto &ultCSR = mockCmdQ->getUltCommandStreamReceiver(); EXPECT_GT(ultCSR.createAllocationForHostSurfaceCalled, 0u); EXPECT_TRUE(ultCSR.cpuCopyForHostPtrSurfaceAllowed); } struct EnqueueSvmMemCopyHw : public ::testing::Test { void SetUp() override { device = std::make_unique(MockClDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); if (is32bit || !device->isFullRangeSvm()) { GTEST_SKIP(); } context = std::make_unique(device.get()); srcSvmPtr = context->getSVMAllocsManager()->createSVMAlloc(256, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); ASSERT_NE(nullptr, srcSvmPtr); dstHostPtr = alignedMalloc(256, 64); } void TearDown() override { if (is32bit || !device->isFullRangeSvm()) { return; } context->getSVMAllocsManager()->freeSVMAlloc(srcSvmPtr); alignedFree(dstHostPtr); } std::unique_ptr device; std::unique_ptr context; uint64_t bigSize = 5ull * MemoryConstants::gigaByte; uint64_t smallSize = 4ull * MemoryConstants::gigaByte - 1; void *srcSvmPtr = nullptr; void *dstHostPtr = nullptr; }; using EnqueueSvmMemCopyHwTest = EnqueueSvmMemCopyHw; HWTEST_F(EnqueueSvmMemCopyHwTest, givenEnqueueSVMMemCopyWhenUsingCopyBufferToBufferStatelessBuilderThenSuccessIsReturned) { auto cmdQ = std::make_unique>(context.get(), device.get()); auto srcSvmData = context->getSVMAllocsManager()->getSVMAlloc(srcSvmPtr); srcSvmData->size = static_cast(bigSize); auto retVal = cmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy dstHostPtr, // void *dst_ptr srcSvmPtr, // const void *src_ptr static_cast(bigSize), // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_event *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(EnqueueSvmMemCopyHwTest, givenEnqueueSVMMemCopyWhenUsingCopyBufferToBufferStatefulBuilderThenSuccessIsReturned) { auto cmdQ = std::make_unique>(context.get(), device.get()); auto retVal = cmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy dstHostPtr, // void *dst_ptr srcSvmPtr, // const void *src_ptr static_cast(smallSize), // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_event *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_svm_mem_fill_tests.cpp000066400000000000000000000216541422164147700326070ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/test/common/mocks/mock_builtins.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_builtin_dispatch_info_builder.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" using namespace NEO; struct EnqueueSvmMemFillTest : public ClDeviceFixture, public CommandQueueHwFixture, public ::testing::TestWithParam { typedef CommandQueueHwFixture CommandQueueFixture; EnqueueSvmMemFillTest() { } void SetUp() override { ClDeviceFixture::SetUp(); CommandQueueFixture::SetUp(pClDevice, 0); REQUIRE_SVM_OR_SKIP(pDevice); patternSize = (size_t)GetParam(); ASSERT_TRUE((0 < patternSize) && (patternSize <= 128)); SVMAllocsManager::SvmAllocationProperties svmProperties; svmProperties.coherent = true; svmPtr = context->getSVMAllocsManager()->createSVMAlloc(256, svmProperties, context->getRootDeviceIndices(), context->getDeviceBitfields()); ASSERT_NE(nullptr, svmPtr); auto svmData = context->getSVMAllocsManager()->getSVMAlloc(svmPtr); ASSERT_NE(nullptr, svmData); svmAlloc = svmData->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex()); ASSERT_NE(nullptr, svmAlloc); } void TearDown() override { if (svmPtr) { context->getSVMAllocsManager()->freeSVMAlloc(svmPtr); } CommandQueueFixture::TearDown(); ClDeviceFixture::TearDown(); } const uint64_t pattern[16] = {0x0011223344556677, 0x8899AABBCCDDEEFF, 0xFFEEDDCCBBAA9988, 0x7766554433221100, 0xFFEEDDCCBBAA9988, 0x7766554433221100, 0x0011223344556677, 0x8899AABBCCDDEEFF}; size_t patternSize = 0; void *svmPtr = nullptr; GraphicsAllocation *svmAlloc = nullptr; }; HWTEST_P(EnqueueSvmMemFillTest, givenEnqueueSVMMemFillWhenUsingFillBufferBuilderThenItIsConfiguredWithBuitinOpParamsAndProducesDispatchInfo) { struct MockFillBufferBuilder : MockBuiltinDispatchInfoBuilder { MockFillBufferBuilder(BuiltIns &kernelLib, ClDevice &clDevice, BuiltinDispatchInfoBuilder *origBuilder, const void *pattern, size_t patternSize) : MockBuiltinDispatchInfoBuilder(kernelLib, clDevice, origBuilder), pattern(pattern), patternSize(patternSize) { } void validateInput(const BuiltinOpParams &conf) const override { auto patternAllocation = conf.srcMemObj->getMultiGraphicsAllocation().getDefaultGraphicsAllocation(); EXPECT_EQ(patternSize, patternAllocation->getUnderlyingBufferSize()); EXPECT_EQ(0, memcmp(pattern, patternAllocation->getUnderlyingBuffer(), patternSize)); }; const void *pattern; size_t patternSize; }; auto builtIns = new MockBuiltins(); pCmdQ->getDevice().getExecutionEnvironment()->rootDeviceEnvironments[pCmdQ->getDevice().getRootDeviceIndex()]->builtins.reset(builtIns); // retrieve original builder auto &origBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder( EBuiltInOps::FillBuffer, pCmdQ->getClDevice()); ASSERT_NE(nullptr, &origBuilder); // substitute original builder with mock builder auto oldBuilder = pClExecutionEnvironment->setBuiltinDispatchInfoBuilder( rootDeviceIndex, EBuiltInOps::FillBuffer, std::unique_ptr(new MockFillBufferBuilder(*builtIns, pCmdQ->getClDevice(), &origBuilder, pattern, patternSize))); EXPECT_EQ(&origBuilder, oldBuilder.get()); // call enqueue on mock builder auto retVal = pCmdQ->enqueueSVMMemFill( svmPtr, // void *svm_ptr pattern, // const void *pattern patternSize, // size_t pattern_size 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_event *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); // restore original builder and retrieve mock builder auto newBuilder = pClExecutionEnvironment->setBuiltinDispatchInfoBuilder( rootDeviceIndex, EBuiltInOps::FillBuffer, std::move(oldBuilder)); EXPECT_NE(nullptr, newBuilder); // check if original builder is restored correctly auto &restoredBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder( EBuiltInOps::FillBuffer, pCmdQ->getClDevice()); EXPECT_EQ(&origBuilder, &restoredBuilder); // use mock builder to validate builder's input / output auto mockBuilder = static_cast(newBuilder.get()); // validate builder's input - builtin ops auto params = mockBuilder->getBuiltinOpParams(); EXPECT_EQ(nullptr, params->srcPtr); EXPECT_EQ(svmPtr, params->dstPtr); EXPECT_NE(nullptr, params->srcMemObj); EXPECT_EQ(nullptr, params->dstMemObj); EXPECT_EQ(nullptr, params->srcSvmAlloc); EXPECT_EQ(svmAlloc, params->dstSvmAlloc); EXPECT_EQ(Vec3(0, 0, 0), params->srcOffset); EXPECT_EQ(Vec3(0, 0, 0), params->dstOffset); EXPECT_EQ(Vec3(256, 0, 0), params->size); // validate builder's output - multi dispatch info auto mdi = mockBuilder->getMultiDispatchInfo(); EXPECT_EQ(1u, mdi->size()); auto di = mdi->begin(); size_t middleElSize = sizeof(uint32_t); EXPECT_EQ(Vec3(256 / middleElSize, 1, 1), di->getGWS()); auto kernel = di->getKernel(); EXPECT_STREQ("FillBufferMiddle", kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str()); } INSTANTIATE_TEST_CASE_P(size_t, EnqueueSvmMemFillTest, ::testing::Values(1, 2, 4, 8, 16, 32, 64, 128)); struct EnqueueSvmMemFillHw : public ::testing::Test { void SetUp() override { device = std::make_unique(MockClDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); if (is32bit || !device->isFullRangeSvm()) { GTEST_SKIP(); } context = std::make_unique(device.get()); svmPtr = context->getSVMAllocsManager()->createSVMAlloc(256, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); ASSERT_NE(nullptr, svmPtr); } void TearDown() override { if (is32bit || !device->isFullRangeSvm()) { return; } context->getSVMAllocsManager()->freeSVMAlloc(svmPtr); } std::unique_ptr device; std::unique_ptr context; uint64_t bigSize = 5ull * MemoryConstants::gigaByte; uint64_t smallSize = 4ull * MemoryConstants::gigaByte - 1; void *svmPtr = nullptr; const uint64_t pattern[4] = {0x0011223344556677, 0x8899AABBCCDDEEFF, 0xFFEEDDCCBBAA9988, 0x7766554433221100}; size_t patternSize = 0; }; using EnqueueSvmMemFillHwTest = EnqueueSvmMemFillHw; HWTEST_F(EnqueueSvmMemFillHwTest, givenEnqueueSVMMemFillWhenUsingCopyBufferToBufferStatelessBuilderThenSuccessIsReturned) { auto cmdQ = std::make_unique>(context.get(), device.get()); auto svmData = context->getSVMAllocsManager()->getSVMAlloc(svmPtr); svmData->size = static_cast(bigSize); auto retVal = cmdQ->enqueueSVMMemFill( svmPtr, // void *svm_ptr pattern, // const void *pattern patternSize, // size_t pattern_size static_cast(bigSize), // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_event *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(EnqueueSvmMemFillHwTest, givenEnqueueSVMMemFillWhenUsingCopyBufferToBufferStatefulBuilderThenSuccessIsReturned) { auto cmdQ = std::make_unique>(context.get(), device.get()); auto retVal = cmdQ->enqueueSVMMemFill( svmPtr, // void *svm_ptr pattern, // const void *pattern patternSize, // size_t pattern_size static_cast(smallSize), // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_event *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp000066400000000000000000003011451422164147700307370ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/memory_manager/allocations_list.h" #include "shared/source/memory_manager/surface.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/source/os_interface/device_factory.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/mocks/mock_svm_manager.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/page_fault_manager/mock_cpu_page_fault_manager.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/event/user_event.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/command_queue/enqueue_map_buffer_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" using namespace NEO; struct EnqueueSvmTest : public ClDeviceFixture, public CommandQueueHwFixture, public ::testing::Test { typedef CommandQueueHwFixture CommandQueueFixture; EnqueueSvmTest() { } void SetUp() override { REQUIRE_SVM_OR_SKIP(defaultHwInfo); ClDeviceFixture::SetUp(); CommandQueueFixture::SetUp(pClDevice, 0); ptrSVM = context->getSVMAllocsManager()->createSVMAlloc(256, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); } void TearDown() override { if (defaultHwInfo->capabilityTable.ftrSvm == false) { return; } context->getSVMAllocsManager()->freeSVMAlloc(ptrSVM); CommandQueueFixture::TearDown(); ClDeviceFixture::TearDown(); } std::pair, void *> createBufferAndMapItOnGpu() { DebugManagerStateRestore restore{}; DebugManager.flags.DisableZeroCopyForBuffers.set(1); BufferDefaults::context = this->context; ReleaseableObjectPtr buffer = clUniquePtr(BufferHelper<>::create()); void *mappedPtr = pCmdQ->enqueueMapBuffer(buffer.get(), CL_TRUE, CL_MAP_READ, 0, buffer->getSize(), 0, nullptr, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, mappedPtr); return {std::move(buffer), mappedPtr}; } cl_int retVal = CL_SUCCESS; void *ptrSVM = nullptr; }; TEST_F(EnqueueSvmTest, GivenInvalidSvmPtrWhenMappingSvmThenInvalidValueErrorIsReturned) { void *svmPtr = nullptr; retVal = this->pCmdQ->enqueueSVMMap( CL_FALSE, // cl_bool blocking_map CL_MAP_READ, // cl_map_flags map_flags svmPtr, // void *svm_ptr 0, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // const cL_event *event_wait_list nullptr, // cl_event *event false); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(EnqueueSvmTest, GivenValidParamsWhenMappingSvmThenSuccessIsReturned) { retVal = this->pCmdQ->enqueueSVMMap( CL_FALSE, // cl_bool blocking_map CL_MAP_READ, // cl_map_flags map_flags ptrSVM, // void *svm_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // const cL_event *event_wait_list nullptr, // cl_event *event false); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(EnqueueSvmTest, GivenGpuHangAndBlockingCallAndValidParamsWhenMappingSvmThenOutOfResourcesIsReturned) { DebugManagerStateRestore stateRestore; DebugManager.flags.MakeEachEnqueueBlocking.set(true); std::unique_ptr device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment(nullptr)}); cl_queue_properties props = {}; MockCommandQueueHw mockCommandQueueHw(context, device.get(), &props); mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang; const auto enqueueResult = mockCommandQueueHw.enqueueSVMMap( CL_TRUE, // cl_bool blocking_map CL_MAP_READ, // cl_map_flags map_flags ptrSVM, // void *svm_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // const cL_event *event_wait_list nullptr, // cl_event *event false); // bool externalAppCall EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueResult); EXPECT_EQ(1, mockCommandQueueHw.waitForAllEnginesCalledCount); } TEST_F(EnqueueSvmTest, GivenValidParamsWhenMappingSvmWithBlockingThenSuccessIsReturned) { retVal = this->pCmdQ->enqueueSVMMap( CL_TRUE, // cl_bool blocking_map CL_MAP_READ, // cl_map_flags map_flags ptrSVM, // void *svm_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // const cL_event *event_wait_list nullptr, // cl_event *event false); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EnqueueSvmTest, GivenValidParamsWhenMappingSvmWithEventsThenSuccessIsReturned) { UserEvent uEvent; cl_event eventWaitList[] = {&uEvent}; retVal = this->pCmdQ->enqueueSVMMap( CL_FALSE, // cl_bool blocking_map CL_MAP_READ, // cl_map_flags map_flags ptrSVM, // void *svm_ptr 256, // size_t size 1, // cl_uint num_events_in_wait_list eventWaitList, // const cL_event *event_wait_list nullptr, // cl_event *event false); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EnqueueSvmTest, GivenInvalidSvmPtrWhenUnmappingSvmThenInvalidValueErrorIsReturned) { void *svmPtr = nullptr; retVal = this->pCmdQ->enqueueSVMUnmap( svmPtr, // void *svm_ptr 0, // cl_uint num_events_in_wait_list nullptr, // const cL_event *event_wait_list nullptr, // cl_event *event false); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(EnqueueSvmTest, GivenValidParamsWhenUnmappingSvmThenSuccessIsReturned) { retVal = this->pCmdQ->enqueueSVMUnmap( ptrSVM, // void *svm_ptr 0, // cl_uint num_events_in_wait_list nullptr, // const cL_event *event_wait_list nullptr, // cl_event *event false); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(EnqueueSvmTest, GivenGpuHangAndBlockingCallAndValidParamsWhenUnmappingSvmThenOutOfResourcesIsReturned) { DebugManagerStateRestore stateRestore; DebugManager.flags.MakeEachEnqueueBlocking.set(true); std::unique_ptr device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment(nullptr)}); cl_queue_properties props = {}; MockCommandQueueHw mockCommandQueueHw(context, device.get(), &props); mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang; const auto enqueueResult = mockCommandQueueHw.enqueueSVMUnmap( ptrSVM, 0, nullptr, nullptr, false); EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueResult); EXPECT_EQ(1, mockCommandQueueHw.waitForAllEnginesCalledCount); } TEST_F(EnqueueSvmTest, GivenValidParamsWhenUnmappingSvmWithEventsThenSuccessIsReturned) { UserEvent uEvent; cl_event eventWaitList[] = {&uEvent}; retVal = this->pCmdQ->enqueueSVMUnmap( ptrSVM, // void *svm_ptr 1, // cl_uint num_events_in_wait_list eventWaitList, // const cL_event *event_wait_list nullptr, // cl_event *event false); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EnqueueSvmTest, GivenValidParamsWhenFreeingSvmThenSuccessIsReturned) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); ASSERT_EQ(1U, this->context->getSVMAllocsManager()->getNumAllocs()); void *svmPtrs[] = {ptrSVM}; retVal = this->pCmdQ->enqueueSVMFree( 1, // cl_uint num_svm_pointers svmPtrs, // void *svm_pointers[] nullptr, // (CL_CALLBACK *pfn_free_func) (cl_command_queue queue, cl_uint num_svm_pointers, void *svm_pointers[]) nullptr, // void *user_data 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(0U, this->context->getSVMAllocsManager()->getNumAllocs()); } TEST_F(EnqueueSvmTest, GivenValidParamsWhenFreeingSvmWithCallbackThenSuccessIsReturned) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); void *svmPtrs[] = {ptrSVM}; bool callbackWasCalled = false; struct ClbHelper { ClbHelper(bool &callbackWasCalled) : callbackWasCalled(callbackWasCalled) {} static void CL_CALLBACK Clb(cl_command_queue queue, cl_uint numSvmPointers, void *svmPointers[], void *usrData) { ClbHelper *data = (ClbHelper *)usrData; data->callbackWasCalled = true; } bool &callbackWasCalled; } userData(callbackWasCalled); retVal = this->pCmdQ->enqueueSVMFree( 1, // cl_uint num_svm_pointers svmPtrs, // void *svm_pointers[] ClbHelper::Clb, // (CL_CALLBACK *pfn_free_func) (cl_command_queue queue, cl_uint num_svm_pointers, void *svm_pointers[]) &userData, // void *user_data 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(callbackWasCalled); } TEST_F(EnqueueSvmTest, GivenValidParamsWhenFreeingSvmWithCallbackAndEventThenSuccessIsReturned) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); void *svmPtrs[] = {ptrSVM}; bool callbackWasCalled = false; struct ClbHelper { ClbHelper(bool &callbackWasCalled) : callbackWasCalled(callbackWasCalled) {} static void CL_CALLBACK Clb(cl_command_queue queue, cl_uint numSvmPointers, void *svmPointers[], void *usrData) { ClbHelper *data = (ClbHelper *)usrData; data->callbackWasCalled = true; } bool &callbackWasCalled; } userData(callbackWasCalled); cl_event event = nullptr; retVal = this->pCmdQ->enqueueSVMFree( 1, // cl_uint num_svm_pointers svmPtrs, // void *svm_pointers[] ClbHelper::Clb, // (CL_CALLBACK *pfn_free_func) (cl_command_queue queue, cl_uint num_svm_pointers, void *svm_pointers[]) &userData, // void *user_data 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list &event // cl_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(callbackWasCalled); auto pEvent = (Event *)event; delete pEvent; } TEST_F(EnqueueSvmTest, GivenValidParamsWhenFreeingSvmWithBlockingThenSuccessIsReturned) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); void *svmPtrs[] = {ptrSVM}; UserEvent uEvent; cl_event eventWaitList[] = {&uEvent}; retVal = this->pCmdQ->enqueueSVMFree( 1, // cl_uint num_svm_pointers svmPtrs, // void *svm_pointers[] nullptr, // (CL_CALLBACK *pfn_free_func) (cl_command_queue queue, cl_uint num_svm_pointers, void *svm_pointers[]) nullptr, // void *user_data 1, // cl_uint num_events_in_wait_list eventWaitList, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(EnqueueSvmTest, GivenEventAndGpuHangAndBlockingCallAndValidParamsWhenFreeingSvmWithBlockingThenEventIsNotDeletedAndOutOfResourcesIsReturned) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); DebugManager.flags.MakeEachEnqueueBlocking.set(true); std::unique_ptr device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment(nullptr)}); cl_queue_properties props = {}; MockCommandQueueHw mockCommandQueueHw(context, device.get(), &props); mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang; const cl_uint numOfSvmPointers = 1; void *svmPtrs[numOfSvmPointers] = {ptrSVM}; UserEvent uEvent; const cl_uint numOfEvents = 1; cl_event eventWaitList[numOfEvents] = {&uEvent}; cl_event retEvent = nullptr; const auto enqueueResult = mockCommandQueueHw.enqueueSVMFree( numOfSvmPointers, svmPtrs, nullptr, nullptr, numOfEvents, eventWaitList, &retEvent); EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueResult); EXPECT_EQ(1, mockCommandQueueHw.waitForAllEnginesCalledCount); ASSERT_NE(nullptr, retEvent); castToObjectOrAbort(retEvent)->release(); } HWTEST_F(EnqueueSvmTest, GivenGpuHangAndBlockingCallAndValidParamsWhenFreeingSvmWithBlockingThenOutOfResourcesIsReturned) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); DebugManager.flags.MakeEachEnqueueBlocking.set(true); std::unique_ptr device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment(nullptr)}); cl_queue_properties props = {}; MockCommandQueueHw mockCommandQueueHw(context, device.get(), &props); mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang; const cl_uint numOfSvmPointers = 1; void *svmPtrs[numOfSvmPointers] = {ptrSVM}; UserEvent uEvent; const cl_uint numOfEvents = 1; cl_event eventWaitList[numOfEvents] = {&uEvent}; const auto enqueueResult = mockCommandQueueHw.enqueueSVMFree( numOfSvmPointers, svmPtrs, nullptr, nullptr, numOfEvents, eventWaitList, nullptr); EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueResult); EXPECT_EQ(1, mockCommandQueueHw.waitForAllEnginesCalledCount); } TEST_F(EnqueueSvmTest, GivenNullDstPtrWhenCopyingMemoryThenInvalidVaueErrorIsReturned) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); void *pDstSVM = nullptr; void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(256, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); retVal = this->pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_INVALID_VALUE, retVal); context->getSVMAllocsManager()->freeSVMAlloc(pSrcSVM); } TEST_F(EnqueueSvmTest, GivenNullSrcPtrWhenCopyingMemoryThenInvalidVaueErrorIsReturned) { void *pDstSVM = ptrSVM; void *pSrcSVM = nullptr; retVal = this->pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(EnqueueSvmTest, givenSrcHostPtrAndEventWhenEnqueueSVMMemcpyThenEventCommandTypeIsCorrectlySet) { char srcHostPtr[260] = {}; void *pDstSVM = ptrSVM; void *pSrcSVM = srcHostPtr; cl_event event = nullptr; retVal = this->pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list &event // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); constexpr cl_command_type expectedCmd = CL_COMMAND_SVM_MEMCPY; cl_command_type actualCmd = castToObjectOrAbort(event)->getCommandType(); EXPECT_EQ(expectedCmd, actualCmd); clReleaseEvent(event); } TEST_F(EnqueueSvmTest, givenSrcHostPtrAndSizeZeroWhenEnqueueSVMMemcpyThenReturnSuccess) { char srcHostPtr[260] = {}; void *pDstSVM = ptrSVM; void *pSrcSVM = srcHostPtr; retVal = this->pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 0, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(EnqueueSvmTest, givenSrcHostPtrWhenEnqueueSVMMemcpyThenEnqueuWriteBufferIsCalled) { char srcHostPtr[260]; void *pSrcSVM = srcHostPtr; void *pDstSVM = ptrSVM; MockCommandQueueHw myCmdQ(context, pClDevice, 0); retVal = myCmdQ.enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(myCmdQ.lastCommandType, static_cast(CL_COMMAND_WRITE_BUFFER)); auto tempAlloc = myCmdQ.getGpgpuCommandStreamReceiver().getTemporaryAllocations().peekHead(); EXPECT_EQ(0u, tempAlloc->countSuccessors()); EXPECT_EQ(pSrcSVM, reinterpret_cast(tempAlloc->getGpuAddress())); auto srcAddress = myCmdQ.kernelParams.srcPtr; auto srcOffset = myCmdQ.kernelParams.srcOffset.x; auto dstAddress = myCmdQ.kernelParams.dstPtr; auto dstOffset = myCmdQ.kernelParams.dstOffset.x; EXPECT_EQ(alignDown(pSrcSVM, 4), srcAddress); EXPECT_EQ(ptrDiff(pSrcSVM, alignDown(pSrcSVM, 4)), srcOffset); EXPECT_EQ(alignDown(pDstSVM, 4), dstAddress); EXPECT_EQ(ptrDiff(pDstSVM, alignDown(pDstSVM, 4)), dstOffset); } HWTEST_F(EnqueueSvmTest, givenDstHostPtrWhenEnqueueSVMMemcpyThenEnqueuReadBufferIsCalled) { char dstHostPtr[260]; void *pDstSVM = dstHostPtr; void *pSrcSVM = ptrSVM; MockCommandQueueHw myCmdQ(context, pClDevice, 0); retVal = myCmdQ.enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(myCmdQ.lastCommandType, static_cast(CL_COMMAND_READ_BUFFER)); auto tempAlloc = myCmdQ.getGpgpuCommandStreamReceiver().getTemporaryAllocations().peekHead(); EXPECT_EQ(0u, tempAlloc->countSuccessors()); EXPECT_EQ(pDstSVM, reinterpret_cast(tempAlloc->getGpuAddress())); auto srcAddress = myCmdQ.kernelParams.srcPtr; auto srcOffset = myCmdQ.kernelParams.srcOffset.x; auto dstAddress = myCmdQ.kernelParams.dstPtr; auto dstOffset = myCmdQ.kernelParams.dstOffset.x; EXPECT_EQ(alignDown(pSrcSVM, 4), srcAddress); EXPECT_EQ(ptrDiff(pSrcSVM, alignDown(pSrcSVM, 4)), srcOffset); EXPECT_EQ(alignDown(pDstSVM, 4), dstAddress); EXPECT_EQ(ptrDiff(pDstSVM, alignDown(pDstSVM, 4)), dstOffset); } TEST_F(EnqueueSvmTest, givenDstHostPtrAndEventWhenEnqueueSVMMemcpyThenEventCommandTypeIsCorrectlySet) { char dstHostPtr[260]; void *pDstSVM = dstHostPtr; void *pSrcSVM = ptrSVM; cl_event event = nullptr; retVal = this->pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list &event // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); constexpr cl_command_type expectedCmd = CL_COMMAND_SVM_MEMCPY; cl_command_type actualCmd = castToObjectOrAbort(event)->getCommandType(); EXPECT_EQ(expectedCmd, actualCmd); clReleaseEvent(event); } TEST_F(EnqueueSvmTest, givenDstHostPtrAndSizeZeroWhenEnqueueSVMMemcpyThenReturnSuccess) { char dstHostPtr[260]; void *pDstSVM = dstHostPtr; void *pSrcSVM = ptrSVM; retVal = this->pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 0, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(EnqueueSvmTest, givenDstHostPtrAndSrcHostPtrWhenEnqueueNonBlockingSVMMemcpyThenEnqueuWriteBufferIsCalled) { char dstHostPtr[] = {0, 0, 0}; char srcHostPtr[] = {1, 2, 3}; void *pDstSVM = dstHostPtr; void *pSrcSVM = srcHostPtr; MockCommandQueueHw myCmdQ(context, pClDevice, 0); retVal = myCmdQ.enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 3, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(myCmdQ.lastCommandType, static_cast(CL_COMMAND_WRITE_BUFFER)); auto tempAlloc = myCmdQ.getGpgpuCommandStreamReceiver().getTemporaryAllocations().peekHead(); EXPECT_EQ(1u, tempAlloc->countSuccessors()); EXPECT_EQ(pSrcSVM, reinterpret_cast(tempAlloc->getGpuAddress())); EXPECT_EQ(pDstSVM, reinterpret_cast(tempAlloc->next->getGpuAddress())); auto srcAddress = myCmdQ.kernelParams.srcPtr; auto srcOffset = myCmdQ.kernelParams.srcOffset.x; auto dstAddress = myCmdQ.kernelParams.dstPtr; auto dstOffset = myCmdQ.kernelParams.dstOffset.x; EXPECT_EQ(alignDown(pSrcSVM, 4), srcAddress); EXPECT_EQ(ptrDiff(pSrcSVM, alignDown(pSrcSVM, 4)), srcOffset); EXPECT_EQ(alignDown(pDstSVM, 4), dstAddress); EXPECT_EQ(ptrDiff(pDstSVM, alignDown(pDstSVM, 4)), dstOffset); } HWTEST_F(EnqueueSvmTest, givenDstHostPtrAndSrcHostPtrWhenEnqueueBlockingSVMMemcpyThenEnqueuWriteBufferIsCalled) { char dstHostPtr[] = {0, 0, 0}; char srcHostPtr[] = {1, 2, 3}; void *pDstSVM = dstHostPtr; void *pSrcSVM = srcHostPtr; MockCommandQueueHw myCmdQ(context, pClDevice, 0); retVal = myCmdQ.enqueueSVMMemcpy( true, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 3, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(myCmdQ.lastCommandType, static_cast(CL_COMMAND_WRITE_BUFFER)); auto srcAddress = myCmdQ.kernelParams.srcPtr; auto srcOffset = myCmdQ.kernelParams.srcOffset.x; auto dstAddress = myCmdQ.kernelParams.dstPtr; auto dstOffset = myCmdQ.kernelParams.dstOffset.x; EXPECT_EQ(alignDown(pSrcSVM, 4), srcAddress); EXPECT_EQ(ptrDiff(pSrcSVM, alignDown(pSrcSVM, 4)), srcOffset); EXPECT_EQ(alignDown(pDstSVM, 4), dstAddress); EXPECT_EQ(ptrDiff(pDstSVM, alignDown(pDstSVM, 4)), dstOffset); } TEST_F(EnqueueSvmTest, givenDstHostPtrAndSrcHostPtrAndSizeZeroWhenEnqueueSVMMemcpyThenReturnSuccess) { char dstHostPtr[260] = {}; char srcHostPtr[260] = {}; void *pDstSVM = dstHostPtr; void *pSrcSVM = srcHostPtr; retVal = this->pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 0, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(EnqueueSvmTest, givenSvmToSvmCopyTypeWhenEnqueueNonBlockingSVMMemcpyThenSvmMemcpyCommandIsEnqueued) { void *pDstSVM = ptrSVM; void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(256, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); MockCommandQueueHw myCmdQ(context, pClDevice, 0); retVal = myCmdQ.enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(myCmdQ.lastCommandType, static_cast(CL_COMMAND_SVM_MEMCPY)); auto tempAlloc = myCmdQ.getGpgpuCommandStreamReceiver().getTemporaryAllocations().peekHead(); EXPECT_EQ(nullptr, tempAlloc); auto srcAddress = myCmdQ.kernelParams.srcPtr; auto srcOffset = myCmdQ.kernelParams.srcOffset.x; auto dstAddress = myCmdQ.kernelParams.dstPtr; auto dstOffset = myCmdQ.kernelParams.dstOffset.x; EXPECT_EQ(alignDown(pSrcSVM, 4), srcAddress); EXPECT_EQ(ptrDiff(pSrcSVM, alignDown(pSrcSVM, 4)), srcOffset); EXPECT_EQ(alignDown(pDstSVM, 4), dstAddress); EXPECT_EQ(ptrDiff(pDstSVM, alignDown(pDstSVM, 4)), dstOffset); context->getSVMAllocsManager()->freeSVMAlloc(pSrcSVM); } TEST_F(EnqueueSvmTest, givenSvmToSvmCopyTypeWhenEnqueueBlockingSVMMemcpyThenSuccessIsReturned) { void *pDstSVM = ptrSVM; void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(256, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); retVal = this->pCmdQ->enqueueSVMMemcpy( true, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); context->getSVMAllocsManager()->freeSVMAlloc(pSrcSVM); } TEST_F(EnqueueSvmTest, GivenValidParamsWhenCopyingMemoryWithBlockingThenSuccessisReturned) { void *pDstSVM = ptrSVM; void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(256, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); auto uEvent = make_releaseable(); cl_event eventWaitList[] = {uEvent.get()}; retVal = this->pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 256, // size_t size 1, // cl_uint num_events_in_wait_list eventWaitList, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); context->getSVMAllocsManager()->freeSVMAlloc(pSrcSVM); uEvent->setStatus(-1); } TEST_F(EnqueueSvmTest, GivenCoherencyWhenCopyingMemoryThenSuccessIsReturned) { void *pDstSVM = ptrSVM; SVMAllocsManager::SvmAllocationProperties svmProperties; svmProperties.coherent = true; void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(256, svmProperties, context->getRootDeviceIndices(), context->getDeviceBitfields()); retVal = this->pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); context->getSVMAllocsManager()->freeSVMAlloc(pSrcSVM); } TEST_F(EnqueueSvmTest, GivenCoherencyWhenCopyingMemoryWithBlockingThenSuccessIsReturned) { void *pDstSVM = ptrSVM; SVMAllocsManager::SvmAllocationProperties svmProperties; svmProperties.coherent = true; void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(256, svmProperties, context->getRootDeviceIndices(), context->getDeviceBitfields()); auto uEvent = make_releaseable(); cl_event eventWaitList[] = {uEvent.get()}; retVal = this->pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 256, // size_t size 1, // cl_uint num_events_in_wait_list eventWaitList, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); context->getSVMAllocsManager()->freeSVMAlloc(pSrcSVM); uEvent->setStatus(-1); } HWTEST_F(EnqueueSvmTest, givenUnalignedAddressWhenEnqueueMemcpyThenDispatchInfoHasAlignedAddressAndProperOffset) { void *pDstSVM = reinterpret_cast(0x17); void *pSrcSVM = ptrSVM; MockCommandQueueHw myCmdQ(context, pClDevice, 0); retVal = myCmdQ.enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 0, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); auto srcAddress = myCmdQ.kernelParams.srcPtr; auto srcOffset = myCmdQ.kernelParams.srcOffset.x; auto dstAddress = myCmdQ.kernelParams.dstPtr; auto dstOffset = myCmdQ.kernelParams.dstOffset.x; EXPECT_EQ(alignDown(pSrcSVM, 4), srcAddress); EXPECT_EQ(ptrDiff(pSrcSVM, alignDown(pSrcSVM, 4)), srcOffset); EXPECT_EQ(alignDown(pDstSVM, 4), dstAddress); EXPECT_EQ(ptrDiff(pDstSVM, alignDown(pDstSVM, 4)), dstOffset); } TEST_F(EnqueueSvmTest, GivenNullSvmPtrWhenFillingMemoryThenInvalidValueErrorIsReturned) { void *svmPtr = nullptr; const float pattern[1] = {1.2345f}; const size_t patternSize = sizeof(pattern); retVal = this->pCmdQ->enqueueSVMMemFill( svmPtr, // void *svm_ptr pattern, // const void *pattern patternSize, // size_t pattern_size 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_INVALID_VALUE, retVal); } HWTEST_F(EnqueueSvmTest, givenSvmAllocWhenEnqueueSvmFillThenSuccesIsReturnedAndAddressIsProperlyAligned) { const float pattern[1] = {1.2345f}; const size_t patternSize = sizeof(pattern); MockCommandQueueHw myCmdQ(context, pClDevice, 0); retVal = myCmdQ.enqueueSVMMemFill( ptrSVM, // void *svm_ptr pattern, // const void *pattern patternSize, // size_t pattern_size 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); auto dstAddress = myCmdQ.kernelParams.dstPtr; auto dstOffset = myCmdQ.kernelParams.dstOffset.x; EXPECT_EQ(alignDown(ptrSVM, 4), dstAddress); EXPECT_EQ(ptrDiff(ptrSVM, alignDown(ptrSVM, 4)), dstOffset); } TEST_F(EnqueueSvmTest, GivenValidParamsWhenFillingMemoryWithBlockingThenSuccessIsReturned) { const float pattern[1] = {1.2345f}; const size_t patternSize = sizeof(pattern); auto uEvent = make_releaseable(); cl_event eventWaitList[] = {uEvent.get()}; retVal = this->pCmdQ->enqueueSVMMemFill( ptrSVM, // void *svm_ptr pattern, // const void *pattern patternSize, // size_t pattern_size 256, // size_t size 1, // cl_uint num_events_in_wait_list eventWaitList, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); uEvent->setStatus(-1); } HWTEST_F(EnqueueSvmTest, GivenGpuHangAndBlockingCallAndValidParamsWhenFillingMemoryThenOutOfResourcesIsReturned) { DebugManagerStateRestore stateRestore; DebugManager.flags.MakeEachEnqueueBlocking.set(true); std::unique_ptr device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment(nullptr)}); cl_queue_properties props = {}; MockCommandQueueHw mockCommandQueueHw(context, device.get(), &props); mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang; const float pattern[1] = {1.2345f}; const size_t patternSize = sizeof(pattern); auto uEvent = make_releaseable(); const cl_uint numOfEvents = 1; cl_event eventWaitList[numOfEvents] = {uEvent.get()}; const auto enqueueResult = mockCommandQueueHw.enqueueSVMMemFill( ptrSVM, pattern, patternSize, 256, numOfEvents, eventWaitList, nullptr); EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueResult); EXPECT_EQ(1, mockCommandQueueHw.waitForAllEnginesCalledCount); uEvent->setStatus(-1); } TEST_F(EnqueueSvmTest, GivenRepeatCallsWhenFillingMemoryThenSuccessIsReturnedForEachCall) { const float pattern[1] = {1.2345f}; const size_t patternSize = sizeof(pattern); retVal = this->pCmdQ->enqueueSVMMemFill( ptrSVM, // void *svm_ptr pattern, // const void *pattern patternSize, // size_t pattern_size 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); retVal = this->pCmdQ->enqueueSVMMemFill( ptrSVM, // void *svm_ptr pattern, // const void *pattern patternSize, // size_t pattern_size 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EnqueueSvmTest, givenEnqueueSVMMemFillWhenPatternAllocationIsObtainedThenItsTypeShouldBeSetToFillPattern) { auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); ASSERT_TRUE(csr.getAllocationsForReuse().peekIsEmpty()); const float pattern[1] = {1.2345f}; const size_t patternSize = sizeof(pattern); const size_t size = patternSize; retVal = this->pCmdQ->enqueueSVMMemFill( ptrSVM, pattern, patternSize, size, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_FALSE(csr.getAllocationsForReuse().peekIsEmpty()); GraphicsAllocation *patternAllocation = csr.getAllocationsForReuse().peekHead(); ASSERT_NE(nullptr, patternAllocation); EXPECT_EQ(AllocationType::FILL_PATTERN, patternAllocation->getAllocationType()); } TEST_F(EnqueueSvmTest, GivenSvmAllocationWhenEnqueingKernelThenSuccessIsReturned) { auto svmData = context->getSVMAllocsManager()->getSVMAlloc(ptrSVM); ASSERT_NE(nullptr, svmData); GraphicsAllocation *pSvmAlloc = svmData->gpuAllocations.getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex()); EXPECT_NE(nullptr, ptrSVM); std::unique_ptr program(Program::createBuiltInFromSource("FillBufferBytes", context, context->getDevices(), &retVal)); program->build(program->getDevices(), nullptr, false); std::unique_ptr kernel(Kernel::create(program.get(), program->getKernelInfoForKernel("FillBufferBytes"), *context->getDevice(0), &retVal)); kernel->setSvmKernelExecInfo(pSvmAlloc); size_t offset = 0; size_t size = 1; retVal = this->pCmdQ->enqueueKernel( kernel.get(), 1, &offset, &size, &size, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, kernel->kernelSvmGfxAllocations.size()); } TEST_F(EnqueueSvmTest, givenEnqueueTaskBlockedOnUserEventWhenItIsEnqueuedThenSurfacesAreMadeResident) { auto svmData = context->getSVMAllocsManager()->getSVMAlloc(ptrSVM); ASSERT_NE(nullptr, svmData); GraphicsAllocation *pSvmAlloc = svmData->gpuAllocations.getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex()); EXPECT_NE(nullptr, ptrSVM); auto program = clUniquePtr(Program::createBuiltInFromSource("FillBufferBytes", context, context->getDevices(), &retVal)); program->build(program->getDevices(), nullptr, false); auto pMultiDeviceKernel = clUniquePtr(MultiDeviceKernel::create(program.get(), program->getKernelInfosForKernel("FillBufferBytes"), &retVal)); auto kernel = static_cast(pMultiDeviceKernel->getKernel(rootDeviceIndex)); std::vector allSurfaces; kernel->getResidency(allSurfaces); EXPECT_EQ(1u, allSurfaces.size()); kernel->setSvmKernelExecInfo(pSvmAlloc); auto uEvent = make_releaseable(); cl_event eventWaitList[] = {uEvent.get()}; size_t offset = 0; size_t size = 1; retVal = this->pCmdQ->enqueueKernel( kernel, 1, &offset, &size, &size, 1, eventWaitList, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); kernel->getResidency(allSurfaces); EXPECT_EQ(3u, allSurfaces.size()); for (auto &surface : allSurfaces) delete surface; EXPECT_EQ(1u, kernel->kernelSvmGfxAllocations.size()); uEvent->setStatus(-1); } TEST_F(EnqueueSvmTest, GivenMultipleThreasWhenAllocatingSvmThenOnlyOneAllocationIsCreated) { std::atomic flag(0); std::atomic ready(0); void *svmPtrs[15] = {}; auto allocSvm = [&](uint32_t from, uint32_t to) { for (uint32_t i = from; i <= to; i++) { svmPtrs[i] = context->getSVMAllocsManager()->createSVMAlloc(1, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); auto svmData = context->getSVMAllocsManager()->getSVMAlloc(svmPtrs[i]); ASSERT_NE(nullptr, svmData); auto ga = svmData->gpuAllocations.getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex()); EXPECT_NE(nullptr, ga); EXPECT_EQ(ga->getUnderlyingBuffer(), svmPtrs[i]); } }; auto freeSvm = [&](uint32_t from, uint32_t to) { for (uint32_t i = from; i <= to; i++) { context->getSVMAllocsManager()->freeSVMAlloc(svmPtrs[i]); } }; auto asyncFcn = [&](bool alloc, uint32_t from, uint32_t to) { flag++; while (flag < 3) ; if (alloc) { allocSvm(from, to); } freeSvm(from, to); ready++; }; EXPECT_EQ(1u, context->getSVMAllocsManager()->getNumAllocs()); allocSvm(10, 14); auto t1 = std::unique_ptr(new std::thread(asyncFcn, true, 0, 4)); auto t2 = std::unique_ptr(new std::thread(asyncFcn, true, 5, 9)); auto t3 = std::unique_ptr(new std::thread(asyncFcn, false, 10, 14)); while (ready < 3) { std::this_thread::yield(); } EXPECT_EQ(1u, context->getSVMAllocsManager()->getNumAllocs()); t1->join(); t2->join(); t3->join(); } TEST_F(EnqueueSvmTest, GivenValidParamsWhenMigratingMemoryThenSuccessIsReturned) { const void *svmPtrs[] = {ptrSVM}; retVal = this->pCmdQ->enqueueSVMMigrateMem( 1, // cl_uint num_svm_pointers svmPtrs, // const void **svm_pointers nullptr, // const size_t *sizes 0, // const cl_mem_migration_flags flags 0, // cl_uint num_events_in_wait_list nullptr, // cl_event *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(EnqueueSvmTest, GivenGpuHangAndBlockingCallAndValidParamsWhenMigratingMemoryThenOutOfResourcesIsReturned) { DebugManagerStateRestore dbgRestore; DebugManager.flags.MakeEachEnqueueBlocking.set(true); std::unique_ptr device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment(nullptr)}); cl_queue_properties props = {}; MockCommandQueueHw mockCommandQueueHw(context, device.get(), &props); mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang; const void *svmPtrs[] = {ptrSVM}; const auto enqueueResult = mockCommandQueueHw.enqueueSVMMigrateMem( 1, // cl_uint num_svm_pointers svmPtrs, // const void **svm_pointers nullptr, // const size_t *sizes 0, // const cl_mem_migration_flags flags 0, // cl_uint num_events_in_wait_list nullptr, // cl_event *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueResult); EXPECT_EQ(1, mockCommandQueueHw.waitForAllEnginesCalledCount); } HWTEST_F(EnqueueSvmTest, WhenMigratingMemoryThenSvmMigrateMemCommandTypeIsUsed) { MockCommandQueueHw commandQueue{context, pClDevice, nullptr}; const void *svmPtrs[] = {ptrSVM}; retVal = commandQueue.enqueueSVMMigrateMem( 1, // cl_uint num_svm_pointers svmPtrs, // const void **svm_pointers nullptr, // const size_t *sizes 0, // const cl_mem_migration_flags flags 0, // cl_uint num_events_in_wait_list nullptr, // cl_event *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); uint32_t expectedCommandType = CL_COMMAND_SVM_MIGRATE_MEM; EXPECT_EQ(expectedCommandType, commandQueue.lastCommandType); } TEST(CreateSvmAllocTests, givenVariousSvmAllocationPropertiesWhenAllocatingSvmThenSvmIsCorrectlyAllocated) { if (!defaultHwInfo->capabilityTable.ftrSvm) { return; } DebugManagerStateRestore dbgRestore; SVMAllocsManager::SvmAllocationProperties svmAllocationProperties; for (auto isLocalMemorySupported : ::testing::Bool()) { DebugManager.flags.EnableLocalMemory.set(isLocalMemorySupported); auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); auto mockContext = std::make_unique(mockDevice.get()); for (auto isReadOnly : ::testing::Bool()) { for (auto isHostPtrReadOnly : ::testing::Bool()) { svmAllocationProperties.readOnly = isReadOnly; svmAllocationProperties.hostPtrReadOnly = isHostPtrReadOnly; auto ptrSVM = mockContext->getSVMAllocsManager()->createSVMAlloc(256, svmAllocationProperties, mockContext->getRootDeviceIndices(), mockContext->getDeviceBitfields()); EXPECT_NE(nullptr, ptrSVM); mockContext->getSVMAllocsManager()->freeSVMAlloc(ptrSVM); } } } } struct EnqueueSvmTestLocalMemory : public ClDeviceFixture, public ::testing::Test { void SetUp() override { REQUIRE_SVM_OR_SKIP(defaultHwInfo); dbgRestore = std::make_unique(); DebugManager.flags.EnableLocalMemory.set(1); ClDeviceFixture::SetUp(); context = std::make_unique(pClDevice, true); size = 256; svmPtr = context->getSVMAllocsManager()->createSVMAlloc(size, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); ASSERT_NE(nullptr, svmPtr); mockSvmManager = reinterpret_cast(context->getSVMAllocsManager()); } void TearDown() override { if (defaultHwInfo->capabilityTable.ftrSvm == false) { return; } context->getSVMAllocsManager()->freeSVMAlloc(svmPtr); context.reset(nullptr); ClDeviceFixture::TearDown(); } cl_int retVal = CL_SUCCESS; void *svmPtr = nullptr; size_t size; MockSVMAllocsManager *mockSvmManager; std::unique_ptr dbgRestore; std::unique_ptr context; HardwareParse hwParse; }; HWTEST_F(EnqueueSvmTestLocalMemory, givenWriteInvalidateRegionFlagWhenMappingSvmThenMapIsSuccessfulAndReadOnlyFlagIsFalse) { MockCommandQueueHw queue(context.get(), pClDevice, nullptr); uintptr_t offset = 64; void *regionSvmPtr = ptrOffset(svmPtr, offset); size_t regionSize = 64; retVal = queue.enqueueSVMMap( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, regionSvmPtr, regionSize, 0, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); auto svmMap = mockSvmManager->svmMapOperations.get(regionSvmPtr); EXPECT_FALSE(svmMap->readOnlyMap); } HWTEST_F(EnqueueSvmTestLocalMemory, givenGpuHangAndBlockingCallAndWriteInvalidateRegionFlagWhenMappingSvmThenOutOfResourcesIsReturned) { MockCommandQueueHw queue(context.get(), pClDevice, nullptr); queue.waitForAllEnginesReturnValue = WaitStatus::GpuHang; uintptr_t offset = 64; void *regionSvmPtr = ptrOffset(svmPtr, offset); size_t regionSize = 64; const auto enqueueResult = queue.enqueueSVMMap( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, regionSvmPtr, regionSize, 0, nullptr, nullptr, false); EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueResult); EXPECT_EQ(1, queue.waitForAllEnginesCalledCount); } HWTEST_F(EnqueueSvmTestLocalMemory, givenMapWriteFlagWhenMappingSvmThenMapIsSuccessfulAndReadOnlyFlagIsFalse) { MockCommandQueueHw queue(context.get(), pClDevice, nullptr); uintptr_t offset = 64; void *regionSvmPtr = ptrOffset(svmPtr, offset); size_t regionSize = 64; retVal = queue.enqueueSVMMap( CL_TRUE, CL_MAP_WRITE, regionSvmPtr, regionSize, 0, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); auto svmMap = mockSvmManager->svmMapOperations.get(regionSvmPtr); EXPECT_FALSE(svmMap->readOnlyMap); } HWTEST_F(EnqueueSvmTestLocalMemory, givenMapReadFlagWhenMappingSvmThenMapIsSuccessfulAndReadOnlyFlagIsTrue) { MockCommandQueueHw queue(context.get(), pClDevice, nullptr); uintptr_t offset = 64; void *regionSvmPtr = ptrOffset(svmPtr, offset); size_t regionSize = 64; retVal = queue.enqueueSVMMap( CL_TRUE, CL_MAP_READ, regionSvmPtr, regionSize, 0, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); auto svmMap = mockSvmManager->svmMapOperations.get(regionSvmPtr); EXPECT_TRUE(svmMap->readOnlyMap); } HWTEST_F(EnqueueSvmTestLocalMemory, givenMapReadAndWriteFlagWhenMappingSvmThenDontSetReadOnlyProperty) { MockCommandQueueHw queue(context.get(), pClDevice, nullptr); uintptr_t offset = 64; void *regionSvmPtr = ptrOffset(svmPtr, offset); size_t regionSize = 64; retVal = queue.enqueueSVMMap( CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, regionSvmPtr, regionSize, 0, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); auto svmMap = mockSvmManager->svmMapOperations.get(regionSvmPtr); EXPECT_FALSE(svmMap->readOnlyMap); } HWTEST_F(EnqueueSvmTestLocalMemory, givenSvmAllocWithoutFlagsWhenMappingSvmThenMapIsSuccessfulAndReadOnlyFlagIsTrue) { MockCommandQueueHw queue(context.get(), pClDevice, nullptr); uintptr_t offset = 64; void *regionSvmPtr = ptrOffset(svmPtr, offset); size_t regionSize = 64; retVal = queue.enqueueSVMMap( CL_TRUE, 0, regionSvmPtr, regionSize, 0, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); auto svmMap = mockSvmManager->svmMapOperations.get(regionSvmPtr); EXPECT_FALSE(svmMap->readOnlyMap); } HWTEST_F(EnqueueSvmTestLocalMemory, givenEnabledLocalMemoryWhenEnqueueMapValidSvmPtrThenExpectSingleWalker) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; MockCommandQueueHw queue(context.get(), pClDevice, nullptr); LinearStream &stream = queue.getCS(0x1000); cl_event event = nullptr; uintptr_t offset = 64; void *regionSvmPtr = ptrOffset(svmPtr, offset); size_t regionSize = 64; retVal = queue.enqueueSVMMap( CL_FALSE, CL_MAP_READ, regionSvmPtr, regionSize, 0, nullptr, &event, false); EXPECT_EQ(CL_SUCCESS, retVal); auto svmMap = mockSvmManager->svmMapOperations.get(regionSvmPtr); ASSERT_NE(nullptr, svmMap); EXPECT_EQ(regionSvmPtr, svmMap->regionSvmPtr); EXPECT_EQ(svmPtr, svmMap->baseSvmPtr); EXPECT_EQ(regionSize, svmMap->regionSize); EXPECT_EQ(offset, svmMap->offset); EXPECT_TRUE(svmMap->readOnlyMap); queue.flush(); hwParse.parseCommands(stream); auto walkerCount = hwParse.getCommandCount(); EXPECT_EQ(1u, walkerCount); constexpr cl_command_type expectedCmd = CL_COMMAND_SVM_MAP; cl_command_type actualCmd = castToObjectOrAbort(event)->getCommandType(); EXPECT_EQ(expectedCmd, actualCmd); clReleaseEvent(event); } HWTEST_F(EnqueueSvmTestLocalMemory, givenEnabledLocalMemoryWhenEnqueueMapSvmPtrTwiceThenExpectSingleWalker) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; MockCommandQueueHw queue(context.get(), pClDevice, nullptr); LinearStream &stream = queue.getCS(0x1000); uintptr_t offset = 64; void *regionSvmPtr = ptrOffset(svmPtr, offset); size_t regionSize = 64; retVal = queue.enqueueSVMMap( CL_FALSE, CL_MAP_WRITE, regionSvmPtr, regionSize, 0, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); auto svmMap = mockSvmManager->svmMapOperations.get(regionSvmPtr); ASSERT_NE(nullptr, svmMap); EXPECT_EQ(regionSvmPtr, svmMap->regionSvmPtr); EXPECT_EQ(svmPtr, svmMap->baseSvmPtr); EXPECT_EQ(regionSize, svmMap->regionSize); EXPECT_EQ(offset, svmMap->offset); EXPECT_FALSE(svmMap->readOnlyMap); EXPECT_EQ(1u, mockSvmManager->svmMapOperations.getNumMapOperations()); cl_event event = nullptr; retVal = queue.enqueueSVMMap( CL_FALSE, CL_MAP_WRITE, regionSvmPtr, regionSize, 0, nullptr, &event, false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, mockSvmManager->svmMapOperations.getNumMapOperations()); queue.flush(); hwParse.parseCommands(stream); auto walkerCount = hwParse.getCommandCount(); EXPECT_EQ(1u, walkerCount); constexpr cl_command_type expectedCmd = CL_COMMAND_SVM_MAP; cl_command_type actualCmd = castToObjectOrAbort(event)->getCommandType(); EXPECT_EQ(expectedCmd, actualCmd); clReleaseEvent(event); } HWTEST_F(EnqueueSvmTestLocalMemory, givenEnabledLocalMemoryAndBlockingCallAndGpuHangOnSecondMapWhenEnqueueMapSvmPtrTwiceThenSecondCallReturnsOutOfresources) { std::unique_ptr device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment(nullptr)}); cl_queue_properties props = {}; MockCommandQueueHw mockCommandQueueHw(context.get(), device.get(), &props); mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::Ready; uintptr_t offset = 64; void *regionSvmPtr = ptrOffset(svmPtr, offset); size_t regionSize = 64; const auto firstMapResult = mockCommandQueueHw.enqueueSVMMap( CL_TRUE, CL_MAP_WRITE, regionSvmPtr, regionSize, 0, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, firstMapResult); mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang; const auto secondMapResult = mockCommandQueueHw.enqueueSVMMap( CL_TRUE, CL_MAP_WRITE, regionSvmPtr, regionSize, 0, nullptr, nullptr, false); EXPECT_EQ(CL_OUT_OF_RESOURCES, secondMapResult); } HWTEST_F(EnqueueSvmTestLocalMemory, givenEnabledLocalMemoryWhenNoMappedSvmPtrThenExpectNoUnmapCopyKernel) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; MockCommandQueueHw queue(context.get(), pClDevice, nullptr); LinearStream &stream = queue.getCS(0x1000); cl_event event = nullptr; retVal = queue.enqueueSVMUnmap( svmPtr, 0, nullptr, &event, false); EXPECT_EQ(CL_SUCCESS, retVal); queue.flush(); hwParse.parseCommands(stream); auto walkerCount = hwParse.getCommandCount(); EXPECT_EQ(0u, walkerCount); constexpr cl_command_type expectedCmd = CL_COMMAND_SVM_UNMAP; cl_command_type actualCmd = castToObjectOrAbort(event)->getCommandType(); EXPECT_EQ(expectedCmd, actualCmd); clReleaseEvent(event); } HWTEST_F(EnqueueSvmTestLocalMemory, givenEnabledLocalMemoryAndGpuHangAndBlockingCallWhenUnmappingThenReturnOutOfResources) { DebugManagerStateRestore stateRestore; DebugManager.flags.MakeEachEnqueueBlocking.set(true); std::unique_ptr device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment(nullptr)}); cl_queue_properties props = {}; MockCommandQueueHw mockCommandQueueHw(context.get(), device.get(), &props); mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang; const auto enqueueResult = mockCommandQueueHw.enqueueSVMUnmap( svmPtr, 0, nullptr, nullptr, false); EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueResult); EXPECT_EQ(1, mockCommandQueueHw.waitForAllEnginesCalledCount); } HWTEST_F(EnqueueSvmTestLocalMemory, givenEnabledLocalMemoryWhenMappedSvmRegionIsReadOnlyThenExpectNoUnmapCopyKernel) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; MockCommandQueueHw queue(context.get(), pClDevice, nullptr); LinearStream &stream = queue.getCS(0x1000); retVal = queue.enqueueSVMMap( CL_FALSE, CL_MAP_READ, svmPtr, size, 0, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, mockSvmManager->svmMapOperations.getNumMapOperations()); auto svmMap = mockSvmManager->svmMapOperations.get(svmPtr); ASSERT_NE(nullptr, svmMap); queue.flush(); size_t offset = stream.getUsed(); hwParse.parseCommands(stream); auto walkerCount = hwParse.getCommandCount(); EXPECT_EQ(1u, walkerCount); hwParse.TearDown(); cl_event event = nullptr; retVal = queue.enqueueSVMUnmap( svmPtr, 0, nullptr, &event, false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, mockSvmManager->svmMapOperations.getNumMapOperations()); queue.flush(); hwParse.parseCommands(stream, offset); walkerCount = hwParse.getCommandCount(); EXPECT_EQ(0u, walkerCount); constexpr cl_command_type expectedCmd = CL_COMMAND_SVM_UNMAP; cl_command_type actualCmd = castToObjectOrAbort(event)->getCommandType(); EXPECT_EQ(expectedCmd, actualCmd); clReleaseEvent(event); } HWTEST_F(EnqueueSvmTestLocalMemory, givenEnabledLocalMemoryAndBlockingCallAndGpuHangForUnmapWhenUnmapingThenOutOfResourcesIsReturnedFromUnmap) { DebugManagerStateRestore dbgRestore; DebugManager.flags.MakeEachEnqueueBlocking.set(true); std::unique_ptr device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment(nullptr)}); cl_queue_properties props = {}; MockCommandQueueHw mockCommandQueueHw(context.get(), device.get(), &props); mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::Ready; const auto enqueueMapResult = mockCommandQueueHw.enqueueSVMMap( CL_FALSE, CL_MAP_READ, svmPtr, size, 0, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, enqueueMapResult); mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang; const auto enqueueUnmapResult = mockCommandQueueHw.enqueueSVMUnmap( svmPtr, 0, nullptr, nullptr, false); EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueUnmapResult); } HWTEST_F(EnqueueSvmTestLocalMemory, givenNonReadOnlyMapWhenUnmappingThenSetAubTbxWritableBeforeUnmapEnqueue) { class MyQueue : public MockCommandQueueHw { public: using MockCommandQueueHw::MockCommandQueueHw; void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &dispatchInfo) override { waitUntilCompleteCalled++; if (allocationToVerify) { EXPECT_TRUE(allocationToVerify->isAubWritable(GraphicsAllocation::defaultBank)); EXPECT_TRUE(allocationToVerify->isTbxWritable(GraphicsAllocation::defaultBank)); } } uint32_t waitUntilCompleteCalled = 0; GraphicsAllocation *allocationToVerify = nullptr; }; MyQueue myQueue(context.get(), pClDevice, nullptr); retVal = myQueue.enqueueSVMMap(CL_TRUE, CL_MAP_WRITE, svmPtr, size, 0, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); auto gpuAllocation = mockSvmManager->getSVMAlloc(svmPtr)->gpuAllocations.getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex()); myQueue.allocationToVerify = gpuAllocation; gpuAllocation->setAubWritable(false, GraphicsAllocation::defaultBank); gpuAllocation->setTbxWritable(false, GraphicsAllocation::defaultBank); EXPECT_EQ(1u, myQueue.waitUntilCompleteCalled); retVal = myQueue.enqueueSVMUnmap(svmPtr, 0, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2u, myQueue.waitUntilCompleteCalled); } HWTEST_F(EnqueueSvmTestLocalMemory, givenReadOnlyMapWhenUnmappingThenDontResetAubTbxWritable) { MockCommandQueueHw queue(context.get(), pClDevice, nullptr); retVal = queue.enqueueSVMMap(CL_TRUE, CL_MAP_READ, svmPtr, size, 0, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); auto gpuAllocation = mockSvmManager->getSVMAlloc(svmPtr)->gpuAllocations.getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex()); gpuAllocation->setAubWritable(false, GraphicsAllocation::defaultBank); gpuAllocation->setTbxWritable(false, GraphicsAllocation::defaultBank); retVal = queue.enqueueSVMUnmap(svmPtr, 0, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(gpuAllocation->isAubWritable(GraphicsAllocation::defaultBank)); EXPECT_FALSE(gpuAllocation->isTbxWritable(GraphicsAllocation::defaultBank)); } HWTEST_F(EnqueueSvmTestLocalMemory, givenEnabledLocalMemoryWhenMappedSvmRegionIsWritableThenExpectMapAndUnmapCopyKernel) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; MockCommandQueueHw queue(context.get(), pClDevice, nullptr); LinearStream &stream = queue.getCS(0x1000); cl_event eventMap = nullptr; retVal = queue.enqueueSVMMap( CL_FALSE, CL_MAP_WRITE, svmPtr, size, 0, nullptr, &eventMap, false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, mockSvmManager->svmMapOperations.getNumMapOperations()); auto svmMap = mockSvmManager->svmMapOperations.get(svmPtr); ASSERT_NE(nullptr, svmMap); cl_event eventUnmap = nullptr; retVal = queue.enqueueSVMUnmap( svmPtr, 0, nullptr, &eventUnmap, false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, mockSvmManager->svmMapOperations.getNumMapOperations()); queue.flush(); hwParse.parseCommands(stream); auto walkerCount = hwParse.getCommandCount(); EXPECT_EQ(2u, walkerCount); constexpr cl_command_type expectedMapCmd = CL_COMMAND_SVM_MAP; cl_command_type actualMapCmd = castToObjectOrAbort(eventMap)->getCommandType(); EXPECT_EQ(expectedMapCmd, actualMapCmd); constexpr cl_command_type expectedUnmapCmd = CL_COMMAND_SVM_UNMAP; cl_command_type actualUnmapCmd = castToObjectOrAbort(eventUnmap)->getCommandType(); EXPECT_EQ(expectedUnmapCmd, actualUnmapCmd); clReleaseEvent(eventMap); clReleaseEvent(eventUnmap); } HWTEST_F(EnqueueSvmTestLocalMemory, givenGpuHangAndBlockingCallAndEnabledLocalMemoryWhenMappedSvmRegionIsWritableThenUnmapReturnsOutOfResources) { DebugManagerStateRestore stateRestore; DebugManager.flags.MakeEachEnqueueBlocking.set(true); MockCommandQueueHw queue(context.get(), pClDevice, nullptr); queue.waitForAllEnginesReturnValue = WaitStatus::Ready; const auto enqueueMapResult = queue.enqueueSVMMap( CL_TRUE, CL_MAP_WRITE, svmPtr, size, 0, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, enqueueMapResult); queue.waitForAllEnginesReturnValue = WaitStatus::GpuHang; const auto enqueueUnmapResult = queue.enqueueSVMUnmap( svmPtr, 0, nullptr, nullptr, false); EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueUnmapResult); EXPECT_EQ(2, queue.waitForAllEnginesCalledCount); } HWTEST_F(EnqueueSvmTestLocalMemory, givenEnabledLocalMemoryWhenMappedSvmRegionAndNoEventIsUsedIsWritableThenExpectMapAndUnmapCopyKernelAnNo) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; MockCommandQueueHw queue(context.get(), pClDevice, nullptr); LinearStream &stream = queue.getCS(0x1000); retVal = queue.enqueueSVMMap( CL_FALSE, CL_MAP_WRITE, svmPtr, size, 0, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, mockSvmManager->svmMapOperations.getNumMapOperations()); auto svmMap = mockSvmManager->svmMapOperations.get(svmPtr); ASSERT_NE(nullptr, svmMap); retVal = queue.enqueueSVMUnmap( svmPtr, 0, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, mockSvmManager->svmMapOperations.getNumMapOperations()); queue.flush(); hwParse.parseCommands(stream); auto walkerCount = hwParse.getCommandCount(); EXPECT_EQ(2u, walkerCount); } template struct FailCsr : public CommandStreamReceiverHw { using CommandStreamReceiverHw::CommandStreamReceiverHw; bool createAllocationForHostSurface(HostPtrSurface &surface, bool requiresL3Flush) override { return CL_FALSE; } }; HWTEST_F(EnqueueSvmTest, whenInternalAllocationsAreMadeResidentThenOnlyNonSvmAllocationsAreAdded) { SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY, context->getRootDeviceIndices(), context->getDeviceBitfields()); unifiedMemoryProperties.device = pDevice; auto allocationSize = 4096u; auto svmManager = this->context->getSVMAllocsManager(); EXPECT_NE(0u, svmManager->getNumAllocs()); auto unifiedMemoryPtr = svmManager->createUnifiedMemoryAllocation(allocationSize, unifiedMemoryProperties); EXPECT_NE(nullptr, unifiedMemoryPtr); EXPECT_EQ(2u, svmManager->getNumAllocs()); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto &residentAllocations = commandStreamReceiver.getResidencyAllocations(); EXPECT_EQ(0u, residentAllocations.size()); svmManager->makeInternalAllocationsResident(commandStreamReceiver, InternalMemoryType::DEVICE_UNIFIED_MEMORY); //only unified memory allocation is made resident EXPECT_EQ(1u, residentAllocations.size()); EXPECT_EQ(residentAllocations[0]->getGpuAddress(), castToUint64(unifiedMemoryPtr)); svmManager->freeSVMAlloc(unifiedMemoryPtr); } HWTEST_F(EnqueueSvmTest, whenInternalAllocationsAreAddedToResidencyContainerThenOnlyExpectedAllocationsAreAdded) { SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY, context->getRootDeviceIndices(), context->getDeviceBitfields()); unifiedMemoryProperties.device = pDevice; auto allocationSize = 4096u; auto svmManager = this->context->getSVMAllocsManager(); EXPECT_NE(0u, svmManager->getNumAllocs()); auto unifiedMemoryPtr = svmManager->createUnifiedMemoryAllocation(allocationSize, unifiedMemoryProperties); EXPECT_NE(nullptr, unifiedMemoryPtr); EXPECT_EQ(2u, svmManager->getNumAllocs()); ResidencyContainer residencyContainer; EXPECT_EQ(0u, residencyContainer.size()); svmManager->addInternalAllocationsToResidencyContainer(pDevice->getRootDeviceIndex(), residencyContainer, InternalMemoryType::DEVICE_UNIFIED_MEMORY); //only unified memory allocation is added to residency container EXPECT_EQ(1u, residencyContainer.size()); EXPECT_EQ(residencyContainer[0]->getGpuAddress(), castToUint64(unifiedMemoryPtr)); svmManager->freeSVMAlloc(unifiedMemoryPtr); } HWTEST_F(EnqueueSvmTest, whenInternalAllocationIsTriedToBeAddedTwiceToResidencyContainerThenItIsAdded) { SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY, context->getRootDeviceIndices(), context->getDeviceBitfields()); unifiedMemoryProperties.device = pDevice; auto allocationSize = 4096u; auto svmManager = this->context->getSVMAllocsManager(); EXPECT_NE(0u, svmManager->getNumAllocs()); auto unifiedMemoryPtr = svmManager->createUnifiedMemoryAllocation(allocationSize, unifiedMemoryProperties); EXPECT_NE(nullptr, unifiedMemoryPtr); EXPECT_EQ(2u, svmManager->getNumAllocs()); ResidencyContainer residencyContainer; EXPECT_EQ(0u, residencyContainer.size()); svmManager->addInternalAllocationsToResidencyContainer(pDevice->getRootDeviceIndex(), residencyContainer, InternalMemoryType::DEVICE_UNIFIED_MEMORY); //only unified memory allocation is added to residency container EXPECT_EQ(1u, residencyContainer.size()); EXPECT_EQ(residencyContainer[0]->getGpuAddress(), castToUint64(unifiedMemoryPtr)); svmManager->addInternalAllocationsToResidencyContainer(pDevice->getRootDeviceIndex(), residencyContainer, InternalMemoryType::DEVICE_UNIFIED_MEMORY); EXPECT_EQ(2u, residencyContainer.size()); svmManager->freeSVMAlloc(unifiedMemoryPtr); } struct createHostUnifiedMemoryAllocationTest : public ::testing::Test { void SetUp() override { REQUIRE_SVM_OR_SKIP(defaultHwInfo); device0 = context.pRootDevice0; device1 = context.pRootDevice1; device2 = context.pRootDevice2; svmManager = context.getSVMAllocsManager(); EXPECT_EQ(0u, svmManager->getNumAllocs()); } const size_t allocationSize = 4096u; const uint32_t numDevices = 3u; MockDefaultContext context; MockClDevice *device2; MockClDevice *device1; MockClDevice *device0; SVMAllocsManager *svmManager = nullptr; }; HWTEST_F(createHostUnifiedMemoryAllocationTest, whenCreatingHostUnifiedMemoryAllocationThenOneAllocDataIsCreatedWithOneGraphicsAllocationPerDevice) { NEO::SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY, context.getRootDeviceIndices(), context.getDeviceBitfields()); EXPECT_EQ(0u, svmManager->getNumAllocs()); auto unifiedMemoryPtr = svmManager->createHostUnifiedMemoryAllocation(allocationSize, unifiedMemoryProperties); EXPECT_NE(nullptr, unifiedMemoryPtr); EXPECT_EQ(1u, svmManager->getNumAllocs()); auto allocData = svmManager->getSVMAlloc(unifiedMemoryPtr); EXPECT_EQ(numDevices, allocData->gpuAllocations.getGraphicsAllocations().size()); for (uint32_t i = 0; i < allocData->gpuAllocations.getGraphicsAllocations().size(); i++) { auto alloc = allocData->gpuAllocations.getGraphicsAllocation(i); EXPECT_EQ(i, alloc->getRootDeviceIndex()); } svmManager->freeSVMAlloc(unifiedMemoryPtr); } HWTEST_F(createHostUnifiedMemoryAllocationTest, whenCreatingMultiGraphicsAllocationThenGraphicsAllocationPerDeviceIsCreated) { NEO::SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY, context.getRootDeviceIndices(), context.getDeviceBitfields()); auto alignedSize = alignUp(allocationSize, MemoryConstants::pageSize64k); auto memoryManager = context.getMemoryManager(); auto allocationType = AllocationType::BUFFER_HOST_MEMORY; auto maxRootDeviceIndex = numDevices - 1u; std::vector rootDeviceIndices; rootDeviceIndices.reserve(numDevices); rootDeviceIndices.push_back(0u); rootDeviceIndices.push_back(1u); rootDeviceIndices.push_back(2u); auto rootDeviceIndex = rootDeviceIndices.at(0); auto deviceBitfield = device0->getDeviceBitfield(); AllocationProperties allocationProperties{rootDeviceIndex, true, alignedSize, allocationType, deviceBitfield.count() > 1, deviceBitfield.count() > 1, deviceBitfield}; allocationProperties.flags.shareable = unifiedMemoryProperties.allocationFlags.flags.shareable; SvmAllocationData allocData(maxRootDeviceIndex); void *unifiedMemoryPtr = memoryManager->createMultiGraphicsAllocationInSystemMemoryPool(rootDeviceIndices, allocationProperties, allocData.gpuAllocations); EXPECT_NE(nullptr, unifiedMemoryPtr); EXPECT_EQ(numDevices, allocData.gpuAllocations.getGraphicsAllocations().size()); for (auto rootDeviceIndex = 0u; rootDeviceIndex <= maxRootDeviceIndex; rootDeviceIndex++) { auto alloc = allocData.gpuAllocations.getGraphicsAllocation(rootDeviceIndex); EXPECT_NE(nullptr, alloc); EXPECT_EQ(rootDeviceIndex, alloc->getRootDeviceIndex()); } for (auto gpuAllocation : allocData.gpuAllocations.getGraphicsAllocations()) { memoryManager->freeGraphicsMemory(gpuAllocation); } } HWTEST_F(createHostUnifiedMemoryAllocationTest, whenCreatingMultiGraphicsAllocationForSpecificRootDeviceIndicesThenOnlyGraphicsAllocationPerSpecificRootDeviceIndexIsCreated) { NEO::SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY, context.getRootDeviceIndices(), context.getDeviceBitfields()); auto alignedSize = alignUp(allocationSize, MemoryConstants::pageSize64k); auto memoryManager = context.getMemoryManager(); auto allocationType = AllocationType::BUFFER_HOST_MEMORY; auto maxRootDeviceIndex = numDevices - 1u; std::vector rootDeviceIndices; rootDeviceIndices.reserve(numDevices); rootDeviceIndices.push_back(0u); rootDeviceIndices.push_back(2u); auto noProgramedRootDeviceIndex = 1u; auto rootDeviceIndex = rootDeviceIndices.at(0); auto deviceBitfield = device0->getDeviceBitfield(); AllocationProperties allocationProperties{rootDeviceIndex, true, alignedSize, allocationType, deviceBitfield.count() > 1, deviceBitfield.count() > 1, deviceBitfield}; allocationProperties.flags.shareable = unifiedMemoryProperties.allocationFlags.flags.shareable; SvmAllocationData allocData(maxRootDeviceIndex); void *unifiedMemoryPtr = memoryManager->createMultiGraphicsAllocationInSystemMemoryPool(rootDeviceIndices, allocationProperties, allocData.gpuAllocations); EXPECT_NE(nullptr, unifiedMemoryPtr); EXPECT_EQ(numDevices, allocData.gpuAllocations.getGraphicsAllocations().size()); for (auto rootDeviceIndex = 0u; rootDeviceIndex <= maxRootDeviceIndex; rootDeviceIndex++) { auto alloc = allocData.gpuAllocations.getGraphicsAllocation(rootDeviceIndex); if (rootDeviceIndex == noProgramedRootDeviceIndex) { EXPECT_EQ(nullptr, alloc); } else { EXPECT_NE(nullptr, alloc); EXPECT_EQ(rootDeviceIndex, alloc->getRootDeviceIndex()); } } for (auto gpuAllocation : allocData.gpuAllocations.getGraphicsAllocations()) { memoryManager->freeGraphicsMemory(gpuAllocation); } } struct MemoryAllocationTypeArray { const InternalMemoryType allocationType[3] = {InternalMemoryType::HOST_UNIFIED_MEMORY, InternalMemoryType::DEVICE_UNIFIED_MEMORY, InternalMemoryType::SHARED_UNIFIED_MEMORY}; }; struct UpdateResidencyContainerMultipleDevicesTest : public ::testing::WithParamInterface>, public ::testing::Test { void SetUp() override { device = context.pRootDevice0; subDevice0 = context.pSubDevice00; subDevice1 = context.pSubDevice01; peerDevice = context.pRootDevice1; peerSubDevice0 = context.pSubDevice10; peerSubDevice1 = context.pSubDevice11; svmManager = context.getSVMAllocsManager(); EXPECT_EQ(0u, svmManager->getNumAllocs()); } MockUnrestrictiveContextMultiGPU context; MockClDevice *device; ClDevice *subDevice0 = nullptr; ClDevice *subDevice1 = nullptr; MockClDevice *peerDevice; ClDevice *peerSubDevice0 = nullptr; ClDevice *peerSubDevice1 = nullptr; SVMAllocsManager *svmManager = nullptr; const uint32_t numRootDevices = 2; const uint32_t maxRootDeviceIndex = numRootDevices - 1; }; HWTEST_F(UpdateResidencyContainerMultipleDevicesTest, givenNoAllocationsCreatedThenNoInternalAllocationsAreAddedToResidencyContainer) { ResidencyContainer residencyContainer; EXPECT_EQ(0u, residencyContainer.size()); svmManager->addInternalAllocationsToResidencyContainer(device->getDevice().getRootDeviceIndex(), residencyContainer, InternalMemoryType::DEVICE_UNIFIED_MEMORY); EXPECT_EQ(0u, residencyContainer.size()); } HWTEST_P(UpdateResidencyContainerMultipleDevicesTest, givenAllocationThenItIsAddedToContainerOnlyIfMaskMatches) { uint32_t pCmdBuffer[1024]; MockGraphicsAllocation gfxAllocation(device->getDevice().getRootDeviceIndex(), static_cast(pCmdBuffer), sizeof(pCmdBuffer)); InternalMemoryType type = std::get<0>(GetParam()); uint32_t mask = std::get<1>(GetParam()); SvmAllocationData allocData(maxRootDeviceIndex); allocData.gpuAllocations.addAllocation(&gfxAllocation); allocData.memoryType = type; allocData.device = &device->getDevice(); svmManager->insertSVMAlloc(allocData); EXPECT_EQ(1u, svmManager->getNumAllocs()); ResidencyContainer residencyContainer; EXPECT_EQ(0u, residencyContainer.size()); svmManager->addInternalAllocationsToResidencyContainer(device->getDevice().getRootDeviceIndex(), residencyContainer, mask); if (mask == static_cast(type)) { EXPECT_EQ(1u, residencyContainer.size()); EXPECT_EQ(residencyContainer[0]->getGpuAddress(), gfxAllocation.getGpuAddress()); } else { EXPECT_EQ(0u, residencyContainer.size()); } } HWTEST_P(UpdateResidencyContainerMultipleDevicesTest, whenUsingRootDeviceIndexGreaterThanMultiGraphicsAllocationSizeThenNoAllocationsAreAdded) { uint32_t pCmdBuffer[1024]; MockGraphicsAllocation gfxAllocation(device->getDevice().getRootDeviceIndex(), static_cast(pCmdBuffer), sizeof(pCmdBuffer)); SvmAllocationData allocData(maxRootDeviceIndex); allocData.gpuAllocations.addAllocation(&gfxAllocation); allocData.memoryType = InternalMemoryType::DEVICE_UNIFIED_MEMORY; allocData.device = &device->getDevice(); uint32_t pCmdBufferPeer[1024]; MockGraphicsAllocation gfxAllocationPeer(peerDevice->getDevice().getRootDeviceIndex(), (void *)pCmdBufferPeer, sizeof(pCmdBufferPeer)); SvmAllocationData allocDataPeer(maxRootDeviceIndex); allocDataPeer.gpuAllocations.addAllocation(&gfxAllocationPeer); allocDataPeer.memoryType = InternalMemoryType::DEVICE_UNIFIED_MEMORY; allocDataPeer.device = &peerDevice->getDevice(); svmManager->insertSVMAlloc(allocData); svmManager->insertSVMAlloc(allocDataPeer); EXPECT_EQ(2u, svmManager->getNumAllocs()); ResidencyContainer residencyContainer; EXPECT_EQ(0u, residencyContainer.size()); svmManager->addInternalAllocationsToResidencyContainer(numRootDevices + 1, residencyContainer, InternalMemoryType::DEVICE_UNIFIED_MEMORY); EXPECT_EQ(0u, residencyContainer.size()); svmManager->addInternalAllocationsToResidencyContainer(device->getDevice().getRootDeviceIndex(), residencyContainer, InternalMemoryType::DEVICE_UNIFIED_MEMORY); EXPECT_EQ(1u, residencyContainer.size()); EXPECT_EQ(residencyContainer[0]->getGpuAddress(), gfxAllocation.getGpuAddress()); } MemoryAllocationTypeArray memoryTypeArray; INSTANTIATE_TEST_SUITE_P(UpdateResidencyContainerMultipleDevicesTests, UpdateResidencyContainerMultipleDevicesTest, ::testing::Combine( ::testing::ValuesIn(memoryTypeArray.allocationType), ::testing::ValuesIn(memoryTypeArray.allocationType))); HWTEST_F(UpdateResidencyContainerMultipleDevicesTest, whenInternalAllocationsAreAddedToResidencyContainerThenOnlyAllocationsFromSameDeviceAreAdded) { uint32_t pCmdBuffer[1024]; MockGraphicsAllocation gfxAllocation(device->getDevice().getRootDeviceIndex(), static_cast(pCmdBuffer), sizeof(pCmdBuffer)); SvmAllocationData allocData(maxRootDeviceIndex); allocData.gpuAllocations.addAllocation(&gfxAllocation); allocData.memoryType = InternalMemoryType::DEVICE_UNIFIED_MEMORY; allocData.device = &device->getDevice(); uint32_t pCmdBufferPeer[1024]; MockGraphicsAllocation gfxAllocationPeer(peerDevice->getDevice().getRootDeviceIndex(), (void *)pCmdBufferPeer, sizeof(pCmdBufferPeer)); SvmAllocationData allocDataPeer(maxRootDeviceIndex); allocDataPeer.gpuAllocations.addAllocation(&gfxAllocationPeer); allocDataPeer.memoryType = InternalMemoryType::DEVICE_UNIFIED_MEMORY; allocDataPeer.device = &peerDevice->getDevice(); svmManager->insertSVMAlloc(allocData); svmManager->insertSVMAlloc(allocDataPeer); EXPECT_EQ(2u, svmManager->getNumAllocs()); ResidencyContainer residencyContainer; EXPECT_EQ(0u, residencyContainer.size()); svmManager->addInternalAllocationsToResidencyContainer(device->getDevice().getRootDeviceIndex(), residencyContainer, InternalMemoryType::DEVICE_UNIFIED_MEMORY); EXPECT_EQ(1u, residencyContainer.size()); EXPECT_EQ(residencyContainer[0]->getGpuAddress(), gfxAllocation.getGpuAddress()); } HWTEST_F(UpdateResidencyContainerMultipleDevicesTest, givenSharedAllocationWithNullDevicePointerThenAllocationIsAddedToResidencyContainer) { uint32_t pCmdBuffer[1024]; MockGraphicsAllocation gfxAllocation(device->getDevice().getRootDeviceIndex(), static_cast(pCmdBuffer), sizeof(pCmdBuffer)); SvmAllocationData allocData(maxRootDeviceIndex); allocData.gpuAllocations.addAllocation(&gfxAllocation); allocData.memoryType = InternalMemoryType::SHARED_UNIFIED_MEMORY; allocData.device = nullptr; svmManager->insertSVMAlloc(allocData); EXPECT_EQ(1u, svmManager->getNumAllocs()); ResidencyContainer residencyContainer; EXPECT_EQ(0u, residencyContainer.size()); svmManager->addInternalAllocationsToResidencyContainer(device->getDevice().getRootDeviceIndex(), residencyContainer, InternalMemoryType::SHARED_UNIFIED_MEMORY); EXPECT_EQ(1u, residencyContainer.size()); EXPECT_EQ(residencyContainer[0]->getGpuAddress(), gfxAllocation.getGpuAddress()); } HWTEST_F(UpdateResidencyContainerMultipleDevicesTest, givenSharedAllocationWithNonNullDevicePointerAndDifferentDeviceToOnePassedToResidencyCallThenAllocationIsNotAddedToResidencyContainer) { uint32_t pCmdBuffer[1024]; MockGraphicsAllocation gfxAllocation(peerDevice->getDevice().getRootDeviceIndex(), static_cast(pCmdBuffer), sizeof(pCmdBuffer)); SvmAllocationData allocData(maxRootDeviceIndex); allocData.gpuAllocations.addAllocation(&gfxAllocation); allocData.memoryType = InternalMemoryType::SHARED_UNIFIED_MEMORY; allocData.device = &peerDevice->getDevice(); svmManager->insertSVMAlloc(allocData); EXPECT_EQ(1u, svmManager->getNumAllocs()); ResidencyContainer residencyContainer; EXPECT_EQ(0u, residencyContainer.size()); svmManager->addInternalAllocationsToResidencyContainer(device->getDevice().getRootDeviceIndex(), residencyContainer, InternalMemoryType::SHARED_UNIFIED_MEMORY); EXPECT_EQ(0u, residencyContainer.size()); } HWTEST_F(UpdateResidencyContainerMultipleDevicesTest, givenAllocationsFromSubDevicesBelongingToTheSameTargetDeviceThenTheyAreAddedToTheResidencyContainer) { uint32_t pCmdBuffer[1024]; MockGraphicsAllocation gfxAllocation(device->getDevice().getRootDeviceIndex(), static_cast(pCmdBuffer), sizeof(pCmdBuffer)); SvmAllocationData allocData0(maxRootDeviceIndex); allocData0.gpuAllocations.addAllocation(&gfxAllocation); allocData0.memoryType = InternalMemoryType::DEVICE_UNIFIED_MEMORY; allocData0.device = &subDevice0->getDevice(); uint32_t pCmdBufferPeer[1024]; MockGraphicsAllocation gfxAllocationPeer(device->getDevice().getRootDeviceIndex(), (void *)pCmdBufferPeer, sizeof(pCmdBufferPeer)); SvmAllocationData allocData1(maxRootDeviceIndex); allocData1.gpuAllocations.addAllocation(&gfxAllocationPeer); allocData1.memoryType = InternalMemoryType::DEVICE_UNIFIED_MEMORY; allocData1.device = &subDevice1->getDevice(); svmManager->insertSVMAlloc(allocData0); svmManager->insertSVMAlloc(allocData1); EXPECT_EQ(2u, svmManager->getNumAllocs()); ResidencyContainer residencyContainer; EXPECT_EQ(0u, residencyContainer.size()); svmManager->addInternalAllocationsToResidencyContainer(device->getDevice().getRootDeviceIndex(), residencyContainer, InternalMemoryType::DEVICE_UNIFIED_MEMORY); EXPECT_EQ(2u, residencyContainer.size()); } HWTEST_F(UpdateResidencyContainerMultipleDevicesTest, givenAllocationsFromSubDevicesNotBelongingToTheSameTargetDeviceThenTheyAreNotAddedToTheResidencyContainer) { uint32_t pCmdBuffer[1024]; MockGraphicsAllocation gfxAllocation(device->getDevice().getRootDeviceIndex(), static_cast(pCmdBuffer), sizeof(pCmdBuffer)); SvmAllocationData allocData0(maxRootDeviceIndex); allocData0.gpuAllocations.addAllocation(&gfxAllocation); allocData0.memoryType = InternalMemoryType::DEVICE_UNIFIED_MEMORY; allocData0.device = &subDevice0->getDevice(); uint32_t pCmdBufferPeer[1024]; MockGraphicsAllocation gfxAllocationPeer(device->getDevice().getRootDeviceIndex(), (void *)pCmdBufferPeer, sizeof(pCmdBufferPeer)); SvmAllocationData allocData1(maxRootDeviceIndex); allocData1.gpuAllocations.addAllocation(&gfxAllocationPeer); allocData1.memoryType = InternalMemoryType::DEVICE_UNIFIED_MEMORY; allocData1.device = &subDevice1->getDevice(); svmManager->insertSVMAlloc(allocData0); svmManager->insertSVMAlloc(allocData1); EXPECT_EQ(2u, svmManager->getNumAllocs()); ResidencyContainer residencyContainer; EXPECT_EQ(0u, residencyContainer.size()); svmManager->addInternalAllocationsToResidencyContainer(peerDevice->getDevice().getRootDeviceIndex(), residencyContainer, InternalMemoryType::DEVICE_UNIFIED_MEMORY); EXPECT_EQ(0u, residencyContainer.size()); } HWTEST_F(EnqueueSvmTest, GivenDstHostPtrWhenHostPtrAllocationCreationFailsThenReturnOutOfResource) { char dstHostPtr[260]; void *pDstSVM = dstHostPtr; void *pSrcSVM = ptrSVM; MockCommandQueueHw cmdQ(context, pClDevice, nullptr); auto failCsr = std::make_unique>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); CommandStreamReceiver *oldCommandStreamReceiver = cmdQ.gpgpuEngine->commandStreamReceiver; cmdQ.gpgpuEngine->commandStreamReceiver = failCsr.get(); retVal = cmdQ.enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal); cmdQ.gpgpuEngine->commandStreamReceiver = oldCommandStreamReceiver; } HWTEST_F(EnqueueSvmTest, GivenSrcHostPtrAndSizeZeroWhenHostPtrAllocationCreationFailsThenReturnOutOfResource) { char srcHostPtr[260]; void *pDstSVM = ptrSVM; void *pSrcSVM = srcHostPtr; MockCommandQueueHw cmdQ(context, pClDevice, nullptr); auto failCsr = std::make_unique>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); CommandStreamReceiver *oldCommandStreamReceiver = cmdQ.gpgpuEngine->commandStreamReceiver; cmdQ.gpgpuEngine->commandStreamReceiver = failCsr.get(); retVal = cmdQ.enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal); cmdQ.gpgpuEngine->commandStreamReceiver = oldCommandStreamReceiver; } HWTEST_F(EnqueueSvmTest, givenDstHostPtrAndSrcHostPtrWhenHostPtrAllocationCreationFailsThenReturnOutOfResource) { char dstHostPtr[260]; char srcHostPtr[260]; void *pDstSVM = dstHostPtr; void *pSrcSVM = srcHostPtr; MockCommandQueueHw cmdQ(context, pClDevice, nullptr); auto failCsr = std::make_unique>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); CommandStreamReceiver *oldCommandStreamReceiver = cmdQ.gpgpuEngine->commandStreamReceiver; cmdQ.gpgpuEngine->commandStreamReceiver = failCsr.get(); retVal = cmdQ.enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal); cmdQ.gpgpuEngine->commandStreamReceiver = oldCommandStreamReceiver; } TEST_F(EnqueueSvmTest, givenPageFaultManagerWhenEnqueueMemcpyThenAllocIsDecommitted) { auto mockMemoryManager = std::make_unique(); mockMemoryManager->pageFaultManager.reset(new MockPageFaultManager()); auto memoryManager = context->getMemoryManager(); context->memoryManager = mockMemoryManager.get(); auto srcSvm = context->getSVMAllocsManager()->createSVMAlloc(256, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); mockMemoryManager->getPageFaultManager()->insertAllocation(srcSvm, 256, context->getSVMAllocsManager(), context->getSpecialQueue(pDevice->getRootDeviceIndex()), {}); mockMemoryManager->getPageFaultManager()->insertAllocation(ptrSVM, 256, context->getSVMAllocsManager(), context->getSpecialQueue(pDevice->getRootDeviceIndex()), {}); EXPECT_EQ(static_cast(mockMemoryManager->getPageFaultManager())->transferToCpuCalled, 0); this->pCmdQ->enqueueSVMMemcpy(false, ptrSVM, srcSvm, 256, 0, nullptr, nullptr); EXPECT_EQ(static_cast(mockMemoryManager->getPageFaultManager())->allowMemoryAccessCalled, 0); EXPECT_EQ(static_cast(mockMemoryManager->getPageFaultManager())->protectMemoryCalled, 2); EXPECT_EQ(static_cast(mockMemoryManager->getPageFaultManager())->transferToCpuCalled, 0); EXPECT_EQ(static_cast(mockMemoryManager->getPageFaultManager())->transferToGpuCalled, 2); context->getSVMAllocsManager()->freeSVMAlloc(srcSvm); context->memoryManager = memoryManager; } TEST_F(EnqueueSvmTest, givenPageFaultManagerWhenEnqueueMemFillThenAllocIsDecommitted) { char pattern[256]; auto mockMemoryManager = std::make_unique(); mockMemoryManager->pageFaultManager.reset(new MockPageFaultManager()); auto memoryManager = context->getMemoryManager(); context->memoryManager = mockMemoryManager.get(); mockMemoryManager->getPageFaultManager()->insertAllocation(ptrSVM, 256, context->getSVMAllocsManager(), context->getSpecialQueue(0u), {}); EXPECT_EQ(static_cast(mockMemoryManager->getPageFaultManager())->transferToCpuCalled, 0); EXPECT_EQ(static_cast(mockMemoryManager->getPageFaultManager())->protectMemoryCalled, 0); pCmdQ->enqueueSVMMemFill(ptrSVM, &pattern, 256, 256, 0, nullptr, nullptr); EXPECT_EQ(static_cast(mockMemoryManager->getPageFaultManager())->allowMemoryAccessCalled, 0); EXPECT_EQ(static_cast(mockMemoryManager->getPageFaultManager())->protectMemoryCalled, 1); EXPECT_EQ(static_cast(mockMemoryManager->getPageFaultManager())->transferToCpuCalled, 0); EXPECT_EQ(static_cast(mockMemoryManager->getPageFaultManager())->transferToGpuCalled, 1); context->memoryManager = memoryManager; } HWTEST_F(EnqueueSvmTest, givenCopyFromMappedPtrToSvmAllocWhenCallingSvmMemcpyThenReuseMappedAllocations) { constexpr size_t size = 1u; auto &csr = pDevice->getUltCommandStreamReceiver(); { auto [buffer, mappedPtr] = createBufferAndMapItOnGpu(); std::ignore = buffer; EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled); retVal = this->pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy ptrSVM, // void *dst_ptr mappedPtr, // const void *src_ptr size, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled); } { auto notMappedPtr = std::make_unique(size); EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled); retVal = this->pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy ptrSVM, // void *dst_ptr notMappedPtr.get(), // const void *src_ptr size, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, csr.createAllocationForHostSurfaceCalled); } } HWTEST_F(EnqueueSvmTest, givenCopyFromSvmAllocToMappedPtrWhenCallingSvmMemcpyThenReuseMappedAllocations) { constexpr size_t size = 1u; auto &csr = pDevice->getUltCommandStreamReceiver(); { auto [buffer, mappedPtr] = createBufferAndMapItOnGpu(); std::ignore = buffer; EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled); retVal = this->pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy mappedPtr, // void *dst_ptr ptrSVM, // const void *src_ptr size, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled); } { auto notMappedPtr = std::make_unique(size); EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled); retVal = this->pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy notMappedPtr.get(), // void *dst_ptr ptrSVM, // const void *src_ptr size, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, csr.createAllocationForHostSurfaceCalled); } } HWTEST_F(EnqueueSvmTest, givenCopyFromMappedPtrToMappedPtrWhenCallingSvmMemcpyThenReuseMappedAllocations) { constexpr size_t size = 1u; auto &csr = pDevice->getUltCommandStreamReceiver(); { auto [buffer1, mappedPtr1] = createBufferAndMapItOnGpu(); auto [buffer2, mappedPtr2] = createBufferAndMapItOnGpu(); std::ignore = buffer1; std::ignore = buffer2; EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled); retVal = this->pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy mappedPtr2, // void *dst_ptr mappedPtr1, // const void *src_ptr size, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled); } { auto [buffer, mappedPtr] = createBufferAndMapItOnGpu(); std::ignore = buffer; auto notMappedPtr = std::make_unique(size); EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled); retVal = this->pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy mappedPtr, // void *dst_ptr notMappedPtr.get(), // const void *src_ptr size, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, csr.createAllocationForHostSurfaceCalled); } { auto notMappedPtr = std::make_unique(size); auto [buffer, mappedPtr] = createBufferAndMapItOnGpu(); std::ignore = buffer; EXPECT_EQ(1u, csr.createAllocationForHostSurfaceCalled); retVal = this->pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy notMappedPtr.get(), // void *dst_ptr mappedPtr, // const void *src_ptr size, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2u, csr.createAllocationForHostSurfaceCalled); } } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_thread_tests.cpp000066400000000000000000000415201422164147700313770ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/memory_manager/os_agnostic_memory_manager.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/common/test_macros/test_checks_shared.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" using namespace NEO; namespace ULT { template class CommandStreamReceiverMock : public UltCommandStreamReceiver { private: std::vector toFree; // pointers to be freed on destruction Device *pDevice; ClDevice *pClDevice; public: size_t expectedToFreeCount = (size_t)-1; CommandStreamReceiverMock(Device *pDevice) : UltCommandStreamReceiver(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()) { this->pDevice = pDevice; this->pClDevice = pDevice->getSpecializedDevice(); } SubmissionStatus flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override { EXPECT_NE(nullptr, batchBuffer.commandBufferAllocation->getUnderlyingBuffer()); toFree.push_back(batchBuffer.commandBufferAllocation); batchBuffer.stream->replaceBuffer(nullptr, 0); batchBuffer.stream->replaceGraphicsAllocation(nullptr); EXPECT_TRUE(this->ownershipMutex.try_lock()); this->ownershipMutex.unlock(); return SubmissionStatus::SUCCESS; } ~CommandStreamReceiverMock() override { EXPECT_FALSE(pClDevice->hasOwnership()); if (expectedToFreeCount == (size_t)-1) { EXPECT_GT(toFree.size(), 0u); //make sure flush was called } else { EXPECT_EQ(toFree.size(), expectedToFreeCount); } auto memoryManager = this->getMemoryManager(); //Now free memory. if CQ/CSR did the same, we will hit double-free for (auto p : toFree) memoryManager->freeGraphicsMemory(p); } }; struct EnqueueThreadingFixture : public ClDeviceFixture { void SetUp() { ClDeviceFixture::SetUp(); context = new MockContext(pClDevice); pCmdQ = nullptr; } void TearDown() { delete pCmdQ; context->release(); ClDeviceFixture::TearDown(); } template class MyCommandQueue : public CommandQueueHw { public: MyCommandQueue(Context *context, ClDevice *device, const cl_queue_properties *props) : CommandQueueHw(context, device, props, false), kernel(nullptr) { } static CommandQueue *create(Context *context, ClDevice *device, cl_command_queue_properties props) { const cl_queue_properties properties[3] = {CL_QUEUE_PROPERTIES, props, 0}; return new MyCommandQueue(context, device, properties); } protected: ~MyCommandQueue() override { if (kernel) { EXPECT_FALSE(kernel->getMultiDeviceKernel()->hasOwnership()); } } void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &multiDispatchInfo) override { for (auto &dispatchInfo : multiDispatchInfo) { auto &kernel = *dispatchInfo.getKernel(); EXPECT_TRUE(kernel.getMultiDeviceKernel()->hasOwnership()); } } Kernel *kernel; }; CommandQueue *pCmdQ; MockContext *context; template void createCQ() { pCmdQ = MyCommandQueue::create(context, pClDevice, 0); ASSERT_NE(nullptr, pCmdQ); auto pCommandStreamReceiver = new CommandStreamReceiverMock(pDevice); pDevice->resetCommandStreamReceiver(pCommandStreamReceiver); } }; typedef Test EnqueueThreading; struct EnqueueThreadingImage : EnqueueThreading { void SetUp() override { REQUIRE_IMAGES_OR_SKIP(defaultHwInfo); EnqueueThreading::SetUp(); } void TearDown() override { if (!IsSkipped()) { EnqueueThreading::TearDown(); } } }; HWTEST_F(EnqueueThreading, WhenEnqueuingReadBufferThenKernelHasOwnership) { createCQ(); cl_int retVal; std::unique_ptr buffer(Buffer::create(context, CL_MEM_READ_WRITE, 1024u, nullptr, retVal)); ASSERT_NE(nullptr, buffer.get()); void *ptr = ::alignedMalloc(1024u, 4096); ASSERT_NE(nullptr, ptr); buffer->forceDisallowCPUCopy = true; pCmdQ->enqueueReadBuffer(buffer.get(), true, 0, 1024u, ptr, nullptr, 0, nullptr, nullptr); alignedFree(ptr); } HWTEST_F(EnqueueThreading, WhenEnqueuingWriteBufferThenKernelHasOwnership) { createCQ(); cl_int retVal; std::unique_ptr buffer(Buffer::create(context, CL_MEM_READ_WRITE, 1024u, nullptr, retVal)); ASSERT_NE(nullptr, buffer.get()); void *ptr = ::alignedMalloc(1024u, 4096); ASSERT_NE(nullptr, ptr); buffer->forceDisallowCPUCopy = true; pCmdQ->enqueueWriteBuffer(buffer.get(), true, 0, 1024u, ptr, nullptr, 0, nullptr, nullptr); alignedFree(ptr); } HWTEST_F(EnqueueThreading, WhenEnqueuingCopyBufferThenKernelHasOwnership) { createCQ(); cl_int retVal; std::unique_ptr srcBuffer(Buffer::create(context, CL_MEM_READ_WRITE, 1024u, nullptr, retVal)); ASSERT_NE(nullptr, srcBuffer.get()); std::unique_ptr dstBuffer(Buffer::create(context, CL_MEM_READ_WRITE, 1024u, nullptr, retVal)); ASSERT_NE(nullptr, dstBuffer.get()); pCmdQ->enqueueCopyBuffer(srcBuffer.get(), dstBuffer.get(), 0, 0, 1024u, 0, nullptr, nullptr); } HWTEST_F(EnqueueThreading, WhenEnqueuingCopyBufferRectThenKernelHasOwnership) { createCQ(); cl_int retVal; std::unique_ptr srcBuffer(Buffer::create(context, CL_MEM_READ_WRITE, 1024u, nullptr, retVal)); ASSERT_NE(nullptr, srcBuffer.get()); std::unique_ptr dstBuffer(Buffer::create(context, CL_MEM_READ_WRITE, 1024u, nullptr, retVal)); ASSERT_NE(nullptr, dstBuffer.get()); size_t srcOrigin[3] = {1024u, 1, 0}; size_t dstOrigin[3] = {1024u, 1, 0}; size_t region[3] = {1024u, 1, 1}; pCmdQ->enqueueCopyBufferRect(srcBuffer.get(), dstBuffer.get(), srcOrigin, dstOrigin, region, 0, 0, 0, 0, 0, nullptr, nullptr); } HWTEST_F(EnqueueThreadingImage, WhenEnqueuingCopyBufferToImageThenKernelHasOwnership) { createCQ(); cl_int retVal; std::unique_ptr srcBuffer(Buffer::create(context, CL_MEM_READ_WRITE, 1024u, nullptr, retVal)); ASSERT_NE(nullptr, srcBuffer.get()); cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; cl_image_desc imageDesc; memset(&imageDesc, 0, sizeof(imageDesc)); imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_width = 1024u; cl_mem_flags flags = CL_MEM_WRITE_ONLY; auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); std::unique_ptr dstImage( Image::create(context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); ASSERT_NE(nullptr, dstImage.get()); size_t dstOrigin[3] = {1024u, 1, 0}; size_t region[3] = {1024u, 1, 1}; pCmdQ->enqueueCopyBufferToImage(srcBuffer.get(), dstImage.get(), 0, dstOrigin, region, 0, nullptr, nullptr); } HWTEST_F(EnqueueThreadingImage, WhenEnqueuingCopyImageThenKernelHasOwnership) { createCQ(); cl_int retVal; cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; cl_image_desc imageDesc; memset(&imageDesc, 0, sizeof(imageDesc)); imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_width = 1024u; cl_mem_flags flags = CL_MEM_WRITE_ONLY; auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); std::unique_ptr srcImage( Image::create(context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); ASSERT_NE(nullptr, srcImage.get()); std::unique_ptr dstImage( Image::create(context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); ASSERT_NE(nullptr, srcImage.get()); size_t srcOrigin[3] = {1024u, 1, 0}; size_t dstOrigin[3] = {1024u, 1, 0}; size_t region[3] = {1024u, 1, 1}; pCmdQ->enqueueCopyImage(srcImage.get(), dstImage.get(), srcOrigin, dstOrigin, region, 0, nullptr, nullptr); } HWTEST_F(EnqueueThreadingImage, WhenEnqueuingCopyImageToBufferThenKernelHasOwnership) { createCQ(); cl_int retVal; cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; cl_image_desc imageDesc; memset(&imageDesc, 0, sizeof(imageDesc)); imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_width = 1024u; cl_mem_flags flags = CL_MEM_WRITE_ONLY; auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); std::unique_ptr srcImage( Image::create(context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); ASSERT_NE(nullptr, srcImage.get()); std::unique_ptr dstBuffer(Buffer::create(context, CL_MEM_READ_WRITE, 1024u, nullptr, retVal)); ASSERT_NE(nullptr, dstBuffer.get()); size_t srcOrigin[3] = {1024u, 1, 0}; size_t region[3] = {1024u, 1, 1}; pCmdQ->enqueueCopyImageToBuffer(srcImage.get(), dstBuffer.get(), srcOrigin, region, 0, 0, nullptr, nullptr); } HWTEST_F(EnqueueThreading, WhenEnqueuingFillBufferThenKernelHasOwnership) { createCQ(); cl_int retVal; std::unique_ptr buffer(Buffer::create(context, CL_MEM_READ_WRITE, 1024u, nullptr, retVal)); ASSERT_NE(nullptr, buffer.get()); cl_int pattern = 0xDEADBEEF; pCmdQ->enqueueFillBuffer(buffer.get(), &pattern, sizeof(pattern), 0, 1024u, 0, nullptr, nullptr); } HWTEST_F(EnqueueThreadingImage, WhenEnqueuingFillImageThenKernelHasOwnership) { createCQ(); cl_int retVal; cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; cl_image_desc imageDesc; memset(&imageDesc, 0, sizeof(imageDesc)); imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_width = 1024u; cl_mem_flags flags = CL_MEM_WRITE_ONLY; auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); std::unique_ptr image( Image::create(context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); ASSERT_NE(nullptr, image.get()); size_t origin[3] = {1024u, 1, 0}; size_t region[3] = {1024u, 1, 1}; int32_t fillColor[4] = {0xCC, 0xCC, 0xCC, 0xCC}; pCmdQ->enqueueFillImage(image.get(), &fillColor, origin, region, 0, nullptr, nullptr); } HWTEST_F(EnqueueThreading, WhenEnqueuingReadBufferRectThenKernelHasOwnership) { createCQ(); cl_int retVal; std::unique_ptr buffer(Buffer::create(context, CL_MEM_READ_WRITE, 1024u, nullptr, retVal)); ASSERT_NE(nullptr, buffer.get()); void *ptr = ::alignedMalloc(1024u, 4096); ASSERT_NE(nullptr, ptr); size_t bufferOrigin[3] = {1024u, 1, 0}; size_t hostOrigin[3] = {1024u, 1, 0}; size_t region[3] = {1024u, 1, 1}; pCmdQ->enqueueReadBufferRect(buffer.get(), CL_TRUE, bufferOrigin, hostOrigin, region, 0, 0, 0, 0, ptr, 0, nullptr, nullptr); ::alignedFree(ptr); } HWTEST_F(EnqueueThreadingImage, WhenEnqueuingReadImageThenKernelHasOwnership) { createCQ(); cl_int retVal; cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; cl_image_desc imageDesc; memset(&imageDesc, 0, sizeof(imageDesc)); imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_width = 1024u; cl_mem_flags flags = CL_MEM_WRITE_ONLY; auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); std::unique_ptr image(Image::create( context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); ASSERT_NE(nullptr, image.get()); void *ptr = ::alignedMalloc(1024u, 4096); ASSERT_NE(nullptr, ptr); size_t origin[3] = {1024u, 1, 0}; size_t region[3] = {1024u, 1, 1}; pCmdQ->enqueueReadImage(image.get(), CL_TRUE, origin, region, 0, 0, ptr, nullptr, 0, nullptr, nullptr); ::alignedFree(ptr); } HWTEST_F(EnqueueThreading, WhenEnqueuingWriteBufferRectThenKernelHasOwnership) { createCQ(); cl_int retVal; std::unique_ptr buffer(Buffer::create(context, CL_MEM_READ_WRITE, 1024u, nullptr, retVal)); ASSERT_NE(nullptr, buffer.get()); size_t bufferOrigin[3] = {1024u, 1, 0}; size_t hostOrigin[3] = {1024u, 1, 0}; size_t region[3] = {1024u, 1, 1}; auto hostPtrSize = Buffer::calculateHostPtrSize(hostOrigin, region, 0, 0); void *ptr = ::alignedMalloc(hostPtrSize, MemoryConstants::pageSize); ASSERT_NE(nullptr, ptr); pCmdQ->enqueueWriteBufferRect(buffer.get(), CL_TRUE, bufferOrigin, hostOrigin, region, 0, 0, 0, 0, ptr, 0, nullptr, nullptr); ::alignedFree(ptr); } HWTEST_F(EnqueueThreadingImage, WhenEnqueuingWriteImageThenKernelHasOwnership) { createCQ(); cl_int retVal; cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; cl_image_desc imageDesc; memset(&imageDesc, 0, sizeof(imageDesc)); imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_width = 1024u; cl_mem_flags flags = CL_MEM_READ_ONLY; auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); std::unique_ptr image( Image::create(context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); ASSERT_NE(nullptr, image.get()); void *ptr = ::alignedMalloc(1024u, 4096); ASSERT_NE(nullptr, ptr); size_t origin[3] = {1024u, 1, 0}; size_t region[3] = {1024u, 1, 1}; pCmdQ->enqueueWriteImage(image.get(), CL_TRUE, origin, region, 0, 0, ptr, nullptr, 0, nullptr, nullptr); ::alignedFree(ptr); } HWTEST_F(EnqueueThreading, WhenFinishingThenKernelHasOwnership) { createCQ(); // set something to finish pCmdQ->taskCount = 1; pCmdQ->taskLevel = 1; auto csr = (CommandStreamReceiverMock *)&this->pCmdQ->getGpgpuCommandStreamReceiver(); csr->expectedToFreeCount = 0u; csr->latestSentTaskCount = 1; csr->latestFlushedTaskCount = 1; pCmdQ->finish(); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_unmap_memobject_tests.cpp000066400000000000000000000253751422164147700333070ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/local_memory_access_modes.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/event/event.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include using namespace NEO; struct EnqueueUnmapMemObjTest : public ClDeviceFixture, public CommandQueueHwFixture, public ::testing::Test { typedef CommandQueueHwFixture CommandQueueFixture; EnqueueUnmapMemObjTest() { } void SetUp() override { ClDeviceFixture::SetUp(); CommandQueueFixture::SetUp(pClDevice, 0); BufferDefaults::context = new MockContext; buffer = BufferHelper>::create(); mappedPtr = pCmdQ->enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_READ, 0, 8, 0, nullptr, nullptr, retVal); } void TearDown() override { delete buffer; delete BufferDefaults::context; CommandQueueFixture::TearDown(); ClDeviceFixture::TearDown(); } cl_int retVal = CL_SUCCESS; Buffer *buffer = nullptr; void *mappedPtr; }; TEST_F(EnqueueUnmapMemObjTest, GivenValidParamsWhenUnmappingMemoryObjectThenSuccessIsReturned) { auto retVal = pCmdQ->enqueueUnmapMemObject( buffer, mappedPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EnqueueUnmapMemObjTest, GivenPointerToEventThenUnmappingMemoryObjectThenEventIsReturned) { cl_event event = nullptr; auto retVal = pCmdQ->enqueueUnmapMemObject( buffer, mappedPtr, 0, nullptr, &event); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(pCmdQ->taskLevel, pEvent->taskLevel); // Check CL_EVENT_COMMAND_TYPE { cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_UNMAP_MEM_OBJECT), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); } delete pEvent; } TEST_F(EnqueueUnmapMemObjTest, WhenUnmappingMemoryObjectThenReturnedEventHasGreaterThanOrEqualTaskLevelThanParentEvent) { uint32_t taskLevelCmdQ = 17; uint32_t taskLevelEvent1 = 8; uint32_t taskLevelEvent2 = 19; auto taskLevelMax = std::max({taskLevelCmdQ, taskLevelEvent1, taskLevelEvent2}); pCmdQ->taskLevel = taskLevelCmdQ; Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 15); Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 16); cl_event eventWaitList[] = { &event1, &event2}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto retVal = pCmdQ->enqueueUnmapMemObject( buffer, mappedPtr, numEventsInWaitList, eventWaitList, &event); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_GE(pEvent->taskLevel, taskLevelMax); delete pEvent; } HWTEST_F(EnqueueUnmapMemObjTest, WhenUnmappingMemoryObjectThenEventIsUpdated) { cl_event eventReturned = NULL; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.taskCount = 100; retVal = pCmdQ->enqueueUnmapMemObject( buffer, mappedPtr, 0, nullptr, &eventReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, eventReturned); auto eventObject = castToObject(eventReturned); EXPECT_EQ(0u, eventObject->peekTaskCount()); EXPECT_TRUE(eventObject->updateStatusAndCheckCompletion()); clReleaseEvent(eventReturned); } TEST_F(EnqueueUnmapMemObjTest, WhenUnmappingMemoryObjectThenWaitEventIsUpdated) { cl_event waitEvent = nullptr; cl_event retEvent = nullptr; auto buffer = clCreateBuffer( BufferDefaults::context, CL_MEM_READ_WRITE, 20, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); auto ptrResult = clEnqueueMapBuffer( pCmdQ, buffer, CL_FALSE, CL_MAP_READ, 0, 8, 0, nullptr, &waitEvent, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, ptrResult); EXPECT_NE(nullptr, waitEvent); retVal = clEnqueueUnmapMemObject( pCmdQ, buffer, ptrResult, 1, &waitEvent, &retEvent); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, retEvent); Event *wEvent = castToObject(waitEvent); EXPECT_EQ(CL_COMPLETE, wEvent->peekExecutionStatus()); Event *rEvent = castToObject(retEvent); EXPECT_EQ(CL_COMPLETE, rEvent->peekExecutionStatus()); retVal = clWaitForEvents(1, &retEvent); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseEvent(waitEvent); clReleaseEvent(retEvent); } HWTEST_F(EnqueueUnmapMemObjTest, givenEnqueueUnmapMemObjectWhenNonAubWritableBufferObjectMappedToHostPtrForWritingThenItShouldBeResetToAubAndTbxWritable) { auto buffer = std::unique_ptr(BufferHelper<>::create()); ASSERT_NE(nullptr, buffer); auto graphicsAllocation = buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex()); graphicsAllocation->setAubWritable(false, GraphicsAllocation::defaultBank); graphicsAllocation->setTbxWritable(false, GraphicsAllocation::defaultBank); auto mappedForWritingPtr = pCmdQ->enqueueMapBuffer(buffer.get(), CL_TRUE, CL_MAP_WRITE, 0, 8, 0, nullptr, nullptr, retVal); ASSERT_NE(nullptr, mappedForWritingPtr); retVal = pCmdQ->enqueueUnmapMemObject( buffer.get(), mappedForWritingPtr, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(graphicsAllocation->isAubWritable(GraphicsAllocation::defaultBank)); EXPECT_TRUE(graphicsAllocation->isTbxWritable(GraphicsAllocation::defaultBank)); } HWTEST_F(EnqueueUnmapMemObjTest, givenWriteBufferIsServicedOnCPUWhenBufferIsNonAubTbxWriteableThenFlagsChange) { DebugManagerStateRestore restorer; DebugManager.flags.DoCpuCopyOnWriteBuffer.set(1); DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast(LocalMemoryAccessMode::Default)); auto buffer = std::unique_ptr(BufferHelper<>::create()); ASSERT_NE(nullptr, buffer); auto graphicsAllocation = buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex()); graphicsAllocation->setAubWritable(false, GraphicsAllocation::defaultBank); graphicsAllocation->setTbxWritable(false, GraphicsAllocation::defaultBank); EXPECT_FALSE(graphicsAllocation->isAubWritable(GraphicsAllocation::defaultBank)); EXPECT_FALSE(graphicsAllocation->isTbxWritable(GraphicsAllocation::defaultBank)); auto ptr = allocateAlignedMemory(buffer->getSize(), MemoryConstants::cacheLineSize); retVal = pCmdQ->enqueueWriteBuffer(buffer.get(), true, 0u, buffer->getSize(), ptr.get(), nullptr, 0u, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0, memcmp(ptr.get(), graphicsAllocation->getUnderlyingBuffer(), buffer->getSize())); EXPECT_TRUE(graphicsAllocation->isAubWritable(GraphicsAllocation::defaultBank)); EXPECT_TRUE(graphicsAllocation->isTbxWritable(GraphicsAllocation::defaultBank)); } HWTEST_F(EnqueueUnmapMemObjTest, givenMemObjWhenUnmappingThenSetAubWritableBeforeEnqueueWrite) { DebugManagerStateRestore restore; DebugManager.flags.DisableZeroCopyForBuffers.set(true); auto buffer = std::unique_ptr(BufferHelper<>::create()); auto image = std::unique_ptr(Image2dHelper<>::create(BufferDefaults::context)); class MyMockCommandQueue : public MockCommandQueue { public: using MockCommandQueue::MockCommandQueue; cl_int enqueueWriteBuffer(Buffer *buffer, cl_bool blockingWrite, size_t offset, size_t cb, const void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { EXPECT_TRUE(buffer->getMapAllocation(device->getRootDeviceIndex())->isAubWritable(GraphicsAllocation::defaultBank)); EXPECT_TRUE(buffer->getMapAllocation(device->getRootDeviceIndex())->isTbxWritable(GraphicsAllocation::defaultBank)); return CL_SUCCESS; } cl_int enqueueWriteImage(Image *dstImage, cl_bool blockingWrite, const size_t *origin, const size_t *region, size_t inputRowPitch, size_t inputSlicePitch, const void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { EXPECT_TRUE(dstImage->getMapAllocation(device->getRootDeviceIndex())->isAubWritable(GraphicsAllocation::defaultBank)); EXPECT_TRUE(dstImage->getMapAllocation(device->getRootDeviceIndex())->isTbxWritable(GraphicsAllocation::defaultBank)); return CL_SUCCESS; } }; MyMockCommandQueue myMockCmdQ(BufferDefaults::context, pClDevice, nullptr, false); { auto mapPtr = myMockCmdQ.enqueueMapBuffer(buffer.get(), CL_TRUE, CL_MAP_WRITE, 0, 8, 0, nullptr, nullptr, retVal); buffer->getMapAllocation(pClDevice->getRootDeviceIndex())->setAubWritable(false, GraphicsAllocation::defaultBank); buffer->getMapAllocation(pClDevice->getRootDeviceIndex())->setTbxWritable(false, GraphicsAllocation::defaultBank); myMockCmdQ.enqueueUnmapMemObject(buffer.get(), mapPtr, 0, nullptr, nullptr); } { size_t region[] = {1, 0, 0}; size_t origin[] = {0, 0, 0}; auto mapPtr = myMockCmdQ.enqueueMapImage(image.get(), CL_TRUE, CL_MAP_WRITE, origin, region, nullptr, nullptr, 0, nullptr, nullptr, retVal); image->getMapAllocation(pClDevice->getRootDeviceIndex())->setAubWritable(false, GraphicsAllocation::defaultBank); image->getMapAllocation(pClDevice->getRootDeviceIndex())->setTbxWritable(false, GraphicsAllocation::defaultBank); myMockCmdQ.enqueueUnmapMemObject(image.get(), mapPtr, 0, nullptr, nullptr); } } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_waitlist_tests.cpp000066400000000000000000000245731422164147700320010ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" class clEventWrapper { public: clEventWrapper() { mMem = NULL; } clEventWrapper(cl_event mem) { mMem = mem; } clEventWrapper(const clEventWrapper &rhs) : mMem(rhs.mMem) { if (mMem != NULL) clRetainEvent(mMem); } ~clEventWrapper() { if (mMem != NULL) clReleaseEvent(mMem); } clEventWrapper &operator=(const cl_event &rhs) { mMem = rhs; return *this; } clEventWrapper &operator=(clEventWrapper rhs) { std::swap(mMem, rhs.mMem); return *this; } operator cl_event() const { return mMem; } cl_event *operator&() { return &mMem; } bool operator==(const cl_event &rhs) { return mMem == rhs; } protected: cl_event mMem; }; using namespace NEO; namespace ULT { struct EnqueueWaitlistTest; typedef HelloWorldTestWithParam EnqueueWaitlistFixture; typedef void (*ExecuteEnqueue)(EnqueueWaitlistTest *, uint32_t /*cl_uint*/, cl_event *, cl_event *, bool); struct EnqueueWaitlistTest : public EnqueueWaitlistFixture, public ::testing::TestWithParam { public: typedef CommandQueueHwFixture CommandQueueFixture; using CommandQueueHwFixture::pCmdQ; EnqueueWaitlistTest(void) { buffer = nullptr; } void SetUp() override { EnqueueWaitlistFixture::SetUp(); buffer = BufferHelper<>::create(); bufferNonZeroCopy = new UnalignedBuffer(BufferDefaults::context, &bufferNonZeroCopyAlloc); image = Image1dHelper<>::create(BufferDefaults::context); imageNonZeroCopy = ImageHelper>::create(BufferDefaults::context); } void TearDown() override { buffer->decRefInternal(); bufferNonZeroCopy->decRefInternal(); image->decRefInternal(); imageNonZeroCopy->decRefInternal(); EnqueueWaitlistFixture::TearDown(); } cl_int retVal = CL_SUCCESS; cl_int error = CL_SUCCESS; MockGraphicsAllocation bufferNonZeroCopyAlloc{nullptr, MemoryConstants::pageSize}; Buffer *buffer; Buffer *bufferNonZeroCopy; Image *image; Image *imageNonZeroCopy; void test_error(cl_int error, std::string str) { EXPECT_EQ(CL_SUCCESS, error) << str << std::endl; } static void EnqueueNDRange(EnqueueWaitlistTest *test, cl_uint numWaits, cl_event *waits, cl_event *outEvent, bool blocking = false) { size_t threadNum = 10; size_t threads[1] = {threadNum}; cl_int error = clEnqueueNDRangeKernel(test->pCmdQ, test->pMultiDeviceKernel, 1, NULL, threads, threads, numWaits, waits, outEvent); test->test_error(error, "Unable to execute kernel"); return; } static void EnqueueMapBuffer(EnqueueWaitlistTest *test, cl_uint numWaits, cl_event *waits, cl_event *outEvent, bool blocking = false) { cl_int error; void *mappedPtr = clEnqueueMapBuffer(test->pCmdQ, test->buffer, blocking ? CL_TRUE : CL_FALSE, CL_MAP_READ, 0, test->buffer->getSize(), numWaits, waits, outEvent, &error); EXPECT_NE(nullptr, mappedPtr); test->test_error(error, "Unable to enqueue buffer map"); error = clEnqueueUnmapMemObject(test->pCmdQ, test->buffer, mappedPtr, 0, nullptr, nullptr); return; } static void TwoEnqueueMapBuffer(EnqueueWaitlistTest *test, cl_uint numWaits, cl_event *waits, cl_event *outEvent, bool blocking = false) { cl_int error; void *mappedPtr = clEnqueueMapBuffer(test->pCmdQ, test->buffer, blocking ? CL_TRUE : CL_FALSE, CL_MAP_READ, 0, test->buffer->getSize(), numWaits, waits, outEvent, &error); EXPECT_NE(nullptr, mappedPtr); test->test_error(error, "Unable to enqueue buffer map"); void *mappedPtr2 = clEnqueueMapBuffer(test->pCmdQ, test->bufferNonZeroCopy, blocking ? CL_TRUE : CL_FALSE, CL_MAP_READ, 0, test->bufferNonZeroCopy->getSize(), 0, nullptr, nullptr, &error); EXPECT_NE(nullptr, mappedPtr2); test->test_error(error, "Unable to enqueue buffer map"); error = clEnqueueUnmapMemObject(test->pCmdQ, test->buffer, mappedPtr, 0, nullptr, nullptr); error = clEnqueueUnmapMemObject(test->pCmdQ, test->bufferNonZeroCopy, mappedPtr2, 0, nullptr, nullptr); return; } static void EnqueueUnMapBuffer(EnqueueWaitlistTest *test, cl_uint numWaits, cl_event *waits, cl_event *outEvent, bool blocking = false) { cl_int error; void *mappedPtr = clEnqueueMapBuffer(test->pCmdQ, test->buffer, CL_TRUE, CL_MAP_READ, 0, test->buffer->getSize(), 0, nullptr, nullptr, &error); EXPECT_NE(nullptr, mappedPtr); ASSERT_NE(test->buffer, nullptr); error = clEnqueueUnmapMemObject(test->pCmdQ, test->buffer, mappedPtr, numWaits, waits, outEvent); test->test_error(error, "Unable to unmap buffer"); return; } static void EnqueueMapImage(EnqueueWaitlistTest *test, cl_uint numWaits, cl_event *waits, cl_event *outEvent, bool blocking = false) { cl_int error; cl_image_desc desc = test->image->getImageDesc(); size_t origin[3] = {0, 0, 0}, region[3] = {desc.image_width, desc.image_height, 1}; size_t outPitch; void *mappedPtr = clEnqueueMapImage(test->pCmdQ, test->image, blocking ? CL_TRUE : CL_FALSE, CL_MAP_READ, origin, region, &outPitch, NULL, numWaits, waits, outEvent, &error); test->test_error(error, "Unable to enqueue image map"); EXPECT_NE(nullptr, mappedPtr); test->test_error(error, "Unable to enqueue buffer map"); error = clEnqueueUnmapMemObject(test->pCmdQ, test->image, mappedPtr, 0, nullptr, nullptr); return; } static void TwoEnqueueMapImage(EnqueueWaitlistTest *test, cl_uint numWaits, cl_event *waits, cl_event *outEvent, bool blocking = false) { cl_int error; cl_image_desc desc = test->image->getImageDesc(); size_t origin[3] = {0, 0, 0}, region[3] = {desc.image_width, desc.image_height, 1}; size_t outPitch; size_t origin2[3] = {0, 0, 0}, region2[3] = {desc.image_width, desc.image_height, 1}; size_t outPitch2; void *mappedPtr = clEnqueueMapImage(test->pCmdQ, test->image, blocking ? CL_TRUE : CL_FALSE, CL_MAP_READ, origin, region, &outPitch, NULL, numWaits, waits, outEvent, &error); test->test_error(error, "Unable to enqueue image map"); EXPECT_NE(nullptr, mappedPtr); test->test_error(error, "Unable to enqueue buffer map"); void *mappedPtr2 = clEnqueueMapImage(test->pCmdQ, test->imageNonZeroCopy, blocking ? CL_TRUE : CL_FALSE, CL_MAP_READ, origin2, region2, &outPitch2, NULL, 0, nullptr, nullptr, &error); test->test_error(error, "Unable to enqueue image map"); EXPECT_NE(nullptr, mappedPtr2); test->test_error(error, "Unable to enqueue buffer map"); error = clEnqueueUnmapMemObject(test->pCmdQ, test->image, mappedPtr, 0, nullptr, nullptr); error = clEnqueueUnmapMemObject(test->pCmdQ, test->imageNonZeroCopy, mappedPtr2, 0, nullptr, nullptr); return; } }; TEST_P(EnqueueWaitlistTest, GivenCompletedUserEventOnWaitlistWhenWaitingForOutputEventThenOutputEventIsCompleted) { // Set up a user event, which we use as a gate for the second event clEventWrapper gateEvent = clCreateUserEvent(context, &error); test_error(error, "Unable to set up user gate event"); // Set up the execution of the action with its actual event clEventWrapper actualEvent; // call the function to execute GetParam()(this, 1, &gateEvent, &actualEvent, false); // Now release the user event, which will allow our actual action to run error = clSetUserEventStatus(gateEvent, CL_COMPLETE); test_error(error, "Unable to trigger gate event"); // Now we wait for completion. Note that we can actually wait on the event itself, at least at first error = clWaitForEvents(1, &actualEvent); test_error(error, "Unable to wait for actual test event"); } typedef EnqueueWaitlistTest EnqueueWaitlistTestTwoMapEnqueues; TEST_P(EnqueueWaitlistTestTwoMapEnqueues, GivenCompletedUserEventOnWaitlistWhenWaitingForOutputEventThenOutputEventIsCompleted) { // Set up a user event, which we use as a gate for the second event clEventWrapper gateEvent = clCreateUserEvent(context, &error); test_error(error, "Unable to set up user gate event"); // Set up the execution of the action with its actual event clEventWrapper actualEvent; // call the function to execute GetParam()(this, 1, &gateEvent, &actualEvent, false); // Now release the user event, which will allow our actual action to run error = clSetUserEventStatus(gateEvent, CL_COMPLETE); // Now we wait for completion. Note that we can actually wait on the event itself, at least at first error = clWaitForEvents(1, &actualEvent); test_error(error, "Unable to wait for actual test event"); } TEST_P(EnqueueWaitlistTest, GivenCompletedUserEventOnWaitlistWhenFinishingCommandQueueThenSuccessIsReturned) { // Set up a user event, which we use as a gate for the second event clEventWrapper gateEvent = clCreateUserEvent(context, &error); test_error(error, "Unable to set up user gate event"); // call the function to execute GetParam()(this, 1, &gateEvent, nullptr, false); // Now release the user event, which will allow our actual action to run error = clSetUserEventStatus(gateEvent, CL_COMPLETE); test_error(error, "Unable to trigger gate event"); // Now we wait for completion. Note that we can actually wait on the event itself, at least at first error = clFinish(pCmdQ); test_error(error, "Finish FAILED"); } ExecuteEnqueue Enqueues[] = { &EnqueueWaitlistTest::EnqueueNDRange, &EnqueueWaitlistTest::EnqueueMapBuffer, &EnqueueWaitlistTest::EnqueueUnMapBuffer, &EnqueueWaitlistTest::EnqueueMapImage}; ExecuteEnqueue TwoEnqueueMap[] = { &EnqueueWaitlistTest::TwoEnqueueMapBuffer, &EnqueueWaitlistTest::TwoEnqueueMapImage}; INSTANTIATE_TEST_CASE_P( UnblockedEvent, EnqueueWaitlistTest, ::testing::ValuesIn(Enqueues)); INSTANTIATE_TEST_CASE_P( TwoEnqueueMap, EnqueueWaitlistTestTwoMapEnqueues, ::testing::ValuesIn(TwoEnqueueMap)); } // namespace ULT enqueue_with_walker_partition_tests.cpp000066400000000000000000000057301422164147700344650ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/implicit_scaling.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" struct EnqueueWithWalkerPartitionTests : public ::testing::Test { void SetUp() override { if (!OSInterface::osEnableLocalMemory) { GTEST_SKIP(); } DebugManager.flags.EnableWalkerPartition.set(1u); DebugManager.flags.CreateMultipleSubDevices.set(numberOfTiles); rootDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr, 0)); context = std::make_unique(rootDevice.get()); engineControlForFusedQueue = rootDevice->getDefaultEngine(); } DebugManagerStateRestore restore; VariableBackup mockDeviceFlagBackup{&MockDevice::createSingleDevice, false}; const uint32_t numberOfTiles = 3; EngineControl engineControlForFusedQueue = {}; std::unique_ptr rootDevice; std::unique_ptr context; }; HWCMDTEST_F(IGFX_XE_HP_CORE, EnqueueWithWalkerPartitionTests, givenCsrWithSpecificNumberOfTilesAndPipeControlWithStallRequiredWhenDispatchingThenConstructCmdBufferForAllSupportedTiles) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; VariableBackup pipeControlConfigBackup(&ImplicitScalingDispatch::getPipeControlStallRequired(), true); MockCommandQueueHw commandQueue(context.get(), rootDevice.get(), nullptr); commandQueue.gpgpuEngine = &engineControlForFusedQueue; rootDevice->setPreemptionMode(PreemptionMode::Disabled); MockKernelWithInternals kernel(*rootDevice, context.get()); size_t offset[3] = {0, 0, 0}; size_t gws[3] = {32, 32, 32}; commandQueue.enqueueKernel(kernel, 3, offset, gws, nullptr, 0, nullptr, nullptr); auto &cmdStream = commandQueue.getCS(0); HardwareParse hwParser; hwParser.parseCommands(cmdStream, 0); bool lastSemaphoreFound = false; for (auto it = hwParser.cmdList.rbegin(); it != hwParser.cmdList.rend(); it++) { auto semaphoreCmd = genCmdCast(*it); if (semaphoreCmd) { if (UnitTestHelper::isAdditionalMiSemaphoreWait(*semaphoreCmd)) { continue; } EXPECT_EQ(numberOfTiles, semaphoreCmd->getSemaphoreDataDword()); lastSemaphoreFound = true; break; } } EXPECT_TRUE(lastSemaphoreFound); } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_write_buffer_event_tests.cpp000066400000000000000000000315611422164147700340200ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/local_memory_access_modes.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/event/event.h" #include "opencl/test/unit_test/command_queue/buffer_operations_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "gtest/gtest.h" #include using namespace NEO; TEST_F(EnqueueWriteBufferTypeTest, WhenWritingBufferThenReturnedEventIsCorrect) { cl_bool blockingWrite = CL_TRUE; size_t offset = 0; size_t size = sizeof(cl_float); cl_float pDestMemory[] = {0.0f, 0.0f, 0.0f, 0.0f}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event event = nullptr; auto retVal = CL_INVALID_VALUE; auto srcBuffer = std::unique_ptr(BufferHelper<>::create()); retVal = pCmdQ->enqueueWriteBuffer( srcBuffer.get(), blockingWrite, offset, size, pDestMemory, nullptr, numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(pCmdQ->taskLevel, pEvent->taskLevel); // Check CL_EVENT_COMMAND_TYPE { cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_WRITE_BUFFER), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); } delete pEvent; } TEST_F(EnqueueWriteBufferTypeTest, WhenWritingBufferThenReturnedEventShouldBeMaxOfInputEventsAndCmdQPlusOne) { uint32_t taskLevelCmdQ = 17; pCmdQ->taskLevel = taskLevelCmdQ; uint32_t taskLevelEvent1 = 8; uint32_t taskLevelEvent2 = 19; Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4); Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10); cl_bool blockingWrite = CL_TRUE; size_t offset = 0; size_t size = sizeof(cl_float); cl_float pDestMemory[] = {0.0f, 0.0f, 0.0f, 0.0f}; cl_event eventWaitList[] = { &event1, &event2}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto srcBuffer = std::unique_ptr(BufferHelper<>::create()); auto retVal = pCmdQ->enqueueWriteBuffer( srcBuffer.get(), blockingWrite, offset, size, pDestMemory, nullptr, numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_LE(19u, pEvent->taskLevel); delete pEvent; } TEST_F(EnqueueWriteBufferTypeTest, givenInOrderQueueAndForcedCpuCopyOnWriteBufferAndDstPtrEqualSrcPtrWithEventsNotBlockedWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnWriteBuffer.set(1); DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast(LocalMemoryAccessMode::Default)); cl_int retVal = CL_SUCCESS; uint32_t taskLevelCmdQ = 17; pCmdQ->taskLevel = taskLevelCmdQ; uint32_t taskLevelEvent1 = 8; uint32_t taskLevelEvent2 = 19; Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4); Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10); cl_bool blockingRead = CL_TRUE; size_t size = sizeof(cl_float); cl_event eventWaitList[] = { &event1, &event2}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto srcBuffer = std::unique_ptr(BufferHelper<>::create()); void *ptr = srcBuffer->getCpuAddressForMemoryTransfer(); retVal = pCmdQ->enqueueWriteBuffer(srcBuffer.get(), blockingRead, 0, size, ptr, nullptr, numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(19u, pEvent->taskLevel); EXPECT_EQ(17u, pCmdQ->taskLevel); pEvent->release(); } TEST_F(EnqueueWriteBufferTypeTest, givenInOrderQueueAndForcedCpuCopyOnWriteBufferAndEventNotReadyWhenWriteBufferIsExecutedThenTaskLevelShouldBeIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnWriteBuffer.set(1); cl_int retVal = CL_SUCCESS; uint32_t taskLevelCmdQ = 17; pCmdQ->taskLevel = taskLevelCmdQ; Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, CompletionStamp::notReady, 4); cl_bool blockingWrite = CL_FALSE; size_t size = sizeof(cl_float); cl_event eventWaitList[] = {&event1}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto srcBuffer = std::unique_ptr(BufferHelper<>::create()); cl_float mem[4]; retVal = pCmdQ->enqueueWriteBuffer(srcBuffer.get(), blockingWrite, 0, size, mem, nullptr, numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(CompletionStamp::notReady, pEvent->taskLevel); EXPECT_EQ(CompletionStamp::notReady, pCmdQ->taskLevel); event1.taskLevel = 20; event1.setStatus(CL_COMPLETE); pEvent->updateExecutionStatus(); pCmdQ->isQueueBlocked(); pEvent->release(); } TEST_F(EnqueueWriteBufferTypeTest, givenInOrderQueueAndForcedCpuCopyOnWriteBufferAndDstPtrEqualSrcPtrWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnWriteBuffer.set(1); cl_int retVal = CL_SUCCESS; uint32_t taskLevelCmdQ = 17; pCmdQ->taskLevel = taskLevelCmdQ; cl_bool blockingRead = CL_TRUE; size_t size = sizeof(cl_float); cl_event event = nullptr; auto srcBuffer = std::unique_ptr(BufferHelper<>::create()); void *ptr = srcBuffer->getCpuAddressForMemoryTransfer(); retVal = pCmdQ->enqueueWriteBuffer(srcBuffer.get(), blockingRead, 0, size, ptr, nullptr, 0, nullptr, &event); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(17u, pEvent->taskLevel); EXPECT_EQ(17u, pCmdQ->taskLevel); pEvent->release(); } TEST_F(EnqueueWriteBufferTypeTest, givenOutOfOrderQueueAndForcedCpuCopyOnWriteBufferAndDstPtrEqualSrcPtrWithEventsWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnWriteBuffer.set(1); DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast(LocalMemoryAccessMode::Default)); std::unique_ptr pCmdOOQ(createCommandQueue(pClDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)); cl_int retVal = CL_SUCCESS; uint32_t taskLevelCmdQ = 17; pCmdOOQ->taskLevel = taskLevelCmdQ; uint32_t taskLevelEvent1 = 8; uint32_t taskLevelEvent2 = 19; Event event1(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4); Event event2(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10); cl_bool blockingRead = CL_TRUE; size_t size = sizeof(cl_float); cl_event eventWaitList[] = { &event1, &event2}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto srcBuffer = std::unique_ptr(BufferHelper<>::create()); void *ptr = srcBuffer->getCpuAddressForMemoryTransfer(); retVal = pCmdOOQ->enqueueWriteBuffer(srcBuffer.get(), blockingRead, 0, size, ptr, nullptr, numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(19u, pEvent->taskLevel); EXPECT_EQ(17u, pCmdOOQ->taskLevel); pEvent->release(); } TEST_F(EnqueueWriteBufferTypeTest, givenInOrderQueueAndDisabledSupportCpuCopiesAndDstPtrEqualSrcPtrWithEventsWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnWriteBuffer.set(0); cl_int retVal = CL_SUCCESS; uint32_t taskLevelCmdQ = 17; pCmdQ->taskLevel = taskLevelCmdQ; uint32_t taskLevelEvent1 = 8; uint32_t taskLevelEvent2 = 19; Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4); Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10); cl_bool blockingRead = CL_TRUE; size_t size = sizeof(cl_float); cl_event eventWaitList[] = { &event1, &event2}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto srcBuffer = std::unique_ptr(BufferHelper<>::create()); void *ptr = srcBuffer->getCpuAddressForMemoryTransfer(); retVal = pCmdQ->enqueueWriteBuffer(srcBuffer.get(), blockingRead, 0, size, ptr, nullptr, numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(19u, pEvent->taskLevel); EXPECT_EQ(19u, pCmdQ->taskLevel); pEvent->release(); } TEST_F(EnqueueWriteBufferTypeTest, givenOutOfOrderQueueAndDisabledSupportCpuCopiesAndDstPtrEqualSrcPtrWithEventsWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnWriteBuffer.set(0); std::unique_ptr pCmdOOQ(createCommandQueue(pClDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)); cl_int retVal = CL_SUCCESS; uint32_t taskLevelCmdQ = 17; pCmdOOQ->taskLevel = taskLevelCmdQ; uint32_t taskLevelEvent1 = 8; uint32_t taskLevelEvent2 = 19; Event event1(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4); Event event2(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10); cl_bool blockingRead = CL_TRUE; size_t size = sizeof(cl_float); cl_event eventWaitList[] = { &event1, &event2}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto srcBuffer = std::unique_ptr(BufferHelper<>::create()); void *ptr = srcBuffer->getCpuAddressForMemoryTransfer(); retVal = pCmdOOQ->enqueueWriteBuffer(srcBuffer.get(), blockingRead, 0, size, ptr, nullptr, numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = castToObject(event); if (pCmdOOQ->getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { EXPECT_EQ(taskLevelEvent2 + 1, pCmdOOQ->taskLevel); EXPECT_EQ(taskLevelEvent2 + 1, pEvent->taskLevel); } else { EXPECT_EQ(19u, pCmdOOQ->taskLevel); EXPECT_EQ(19u, pEvent->taskLevel); } pEvent->release(); } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_write_buffer_rect_fixture.h000066400000000000000000000047171422164147700336300ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/ptr_math.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" namespace NEO { struct EnqueueWriteBufferRectTest : public CommandEnqueueFixture, public ::testing::Test { void SetUp() override { CommandEnqueueFixture::SetUp(); context.reset(new MockContext(pClDevice)); BufferDefaults::context = context.get(); //For 3D hostPtr = ::alignedMalloc(slicePitch * rowPitch, 4096); auto retVal = CL_INVALID_VALUE; buffer.reset(Buffer::create( context.get(), CL_MEM_READ_WRITE, slicePitch * rowPitch, nullptr, retVal)); nonZeroCopyBuffer.reset(BufferHelper>::create()); ASSERT_NE(nullptr, buffer.get()); } void TearDown() override { buffer.reset(); nonZeroCopyBuffer.reset(); ::alignedFree(hostPtr); context.reset(); CommandEnqueueFixture::TearDown(); } protected: template void enqueueWriteBufferRect2D(cl_bool blocking = CL_FALSE) { typedef typename FamilyType::WALKER_TYPE GPGPU_WALKER; typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT; size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {50, 50, 1}; auto retVal = pCmdQ->enqueueWriteBufferRect( buffer.get(), blocking, bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, hostPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); parseCommands(*pCmdQ); } std::unique_ptr context; std::unique_ptr buffer; std::unique_ptr nonZeroCopyBuffer; void *hostPtr = nullptr; static const size_t rowPitch = 100; static const size_t slicePitch = 100 * 100; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_write_buffer_rect_tests.cpp000066400000000000000000000723461422164147700336420ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/source/helpers/compiler_hw_info_config.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/event/event.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/test/unit_test/command_queue/enqueue_write_buffer_rect_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/gen_common/gen_commands_common_validation.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "reg_configs_common.h" using namespace NEO; HWTEST_F(EnqueueWriteBufferRectTest, GivenNullBufferWhenWritingBufferThenInvalidMemObjectErrorIsReturned) { auto retVal = CL_SUCCESS; size_t srcOrigin[] = {0, 0, 0}; size_t dstOrigin[] = {0, 0, 0}; size_t region[] = {1, 1, 1}; retVal = clEnqueueWriteBufferRect( pCmdQ, nullptr, CL_FALSE, srcOrigin, dstOrigin, region, 10, 0, 10, 0, hostPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } HWTEST_F(EnqueueWriteBufferRectTest, GivenValidParamsWhenWritingBufferThenSuccessIsReturned) { auto retVal = CL_SUCCESS; size_t srcOrigin[] = {0, 0, 0}; size_t dstOrigin[] = {0, 0, 0}; size_t region[] = {1, 1, 1}; retVal = clEnqueueWriteBufferRect( pCmdQ, buffer.get(), CL_TRUE, srcOrigin, dstOrigin, region, 10, 0, 10, 0, hostPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(EnqueueWriteBufferRectTest, GivenGpuHangAndBlockingCallAndValidParamsWhenWritingBufferThenOutOfResourcesIsReturned) { size_t srcOrigin[] = {0, 0, 0}; size_t dstOrigin[] = {0, 0, 0}; size_t region[] = {1, 1, 1}; std::unique_ptr device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment(nullptr)}); cl_queue_properties props = {}; MockCommandQueueHw mockCommandQueueHw(context.get(), device.get(), &props); mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang; const auto enqueueResult = clEnqueueWriteBufferRect( &mockCommandQueueHw, buffer.get(), CL_TRUE, srcOrigin, dstOrigin, region, 10, 0, 10, 0, hostPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueResult); EXPECT_EQ(1, mockCommandQueueHw.waitForAllEnginesCalledCount); } HWTEST_F(EnqueueWriteBufferRectTest, GivenBlockingEnqueueWhenWritingBufferThenTaskLevelIsNotIncremented) { //this test case assumes IOQ auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = pCmdQ->taskCount + 100; csr.taskLevel = pCmdQ->taskLevel + 50; auto oldCsrTaskLevel = csr.peekTaskLevel(); enqueueWriteBufferRect2D(CL_TRUE); EXPECT_EQ(csr.peekTaskCount(), pCmdQ->taskCount); EXPECT_EQ(oldCsrTaskLevel, pCmdQ->taskLevel); } HWTEST_F(EnqueueWriteBufferRectTest, GivenNonBlockingEnqueueWhenWritingBufferThenTaskLevelIsIncremented) { //this test case assumes IOQ auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = pCmdQ->taskCount + 100; csr.taskLevel = pCmdQ->taskLevel + 50; enqueueWriteBufferRect2D(CL_FALSE); EXPECT_EQ(csr.peekTaskCount(), pCmdQ->taskCount); EXPECT_EQ(csr.peekTaskLevel(), pCmdQ->taskLevel + 1); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueWriteBufferRectTest, Given2dRegionWhenWritingBufferThenCommandsAreProgrammedCorrectly) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; enqueueWriteBufferRect2D(); ASSERT_NE(cmdList.end(), itorWalker); auto *cmd = (GPGPU_WALKER *)*itorWalker; // Verify GPGPU_WALKER parameters EXPECT_NE(0u, cmd->getThreadGroupIdXDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdYDimension()); EXPECT_EQ(1u, cmd->getThreadGroupIdZDimension()); EXPECT_NE(0u, cmd->getRightExecutionMask()); EXPECT_NE(0u, cmd->getBottomExecutionMask()); EXPECT_EQ(GPGPU_WALKER::SIMD_SIZE_SIMD32, cmd->getSimdSize()); EXPECT_NE(0u, cmd->getIndirectDataLength()); EXPECT_FALSE(cmd->getIndirectParameterEnable()); // Compute the SIMD lane mask size_t simd = cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD32 ? 32 : cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD16 ? 16 : 8; uint64_t simdMask = maxNBitValue(simd); // Mask off lanes based on the execution masks auto laneMaskRight = cmd->getRightExecutionMask() & simdMask; auto lanesPerThreadX = 0; while (laneMaskRight) { lanesPerThreadX += laneMaskRight & 1; laneMaskRight >>= 1; } } HWTEST_F(EnqueueWriteBufferRectTest, WhenWritingBufferThenTaskLevelIsIncremented) { auto taskLevelBefore = pCmdQ->taskLevel; enqueueWriteBufferRect2D(); EXPECT_GT(pCmdQ->taskLevel, taskLevelBefore); } HWTEST_F(EnqueueWriteBufferRectTest, WhenWritingBufferThenCommandsAreAdded) { auto usedCmdBufferBefore = pCS->getUsed(); enqueueWriteBufferRect2D(); EXPECT_NE(usedCmdBufferBefore, pCS->getUsed()); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueWriteBufferRectTest, WhenWritingBufferThenIndirectDataIsAdded) { auto dshBefore = pDSH->getUsed(); auto iohBefore = pIOH->getUsed(); auto sshBefore = pSSH->getUsed(); enqueueWriteBufferRect2D(); auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferRect, pCmdQ->getClDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcPtr = hostPtr; dc.dstMemObj = buffer.get(); dc.srcOffset = {0, 0, 0}; dc.dstOffset = {0, 0, 0}; dc.size = {50, 50, 1}; dc.srcRowPitch = rowPitch; dc.srcSlicePitch = slicePitch; dc.dstRowPitch = rowPitch; dc.dstSlicePitch = slicePitch; MultiDispatchInfo multiDispatchInfo(dc); builder.buildDispatchInfos(multiDispatchInfo); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); EXPECT_NE(dshBefore, pDSH->getUsed()); EXPECT_NE(iohBefore, pIOH->getUsed()); if (kernel->usesBindfulAddressingForBuffers()) { EXPECT_NE(sshBefore, pSSH->getUsed()); } } HWTEST_F(EnqueueWriteBufferRectTest, WhenWritingBufferThenL3ProgrammingIsCorrect) { enqueueWriteBufferRect2D(); validateL3Programming(cmdList, itorWalker); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueWriteBufferRectTest, When2DEnqueueIsDoneThenStateBaseAddressIsProperlyProgrammed) { enqueueWriteBufferRect2D(); auto &ultCsr = this->pDevice->getUltCommandStreamReceiver(); auto &hwHelper = HwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily); validateStateBaseAddress(ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex, pIOH->getGraphicsAllocation()->isAllocatedInLocalMemoryPool()), ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex, !hwHelper.useSystemMemoryPlacementForISA(pDevice->getHardwareInfo())), pDSH, pIOH, pSSH, itorPipelineSelect, itorWalker, cmdList, 0llu); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueWriteBufferRectTest, WhenWritingBufferThenMediaInterfaceDescriptorIsCorrect) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueWriteBufferRect2D(); // All state should be programmed before walker auto itorCmd = find(itorPipelineSelect, itorWalker); ASSERT_NE(itorWalker, itorCmd); auto *cmd = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)*itorCmd; // Verify we have a valid length -- multiple of INTERFACE_DESCRIPTOR_DATAs EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % sizeof(INTERFACE_DESCRIPTOR_DATA)); // Validate the start address size_t alignmentStartAddress = 64 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorDataStartAddress() % alignmentStartAddress); // Validate the length EXPECT_NE(0u, cmd->getInterfaceDescriptorTotalLength()); size_t alignmentTotalLength = 32 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % alignmentTotalLength); // Generically validate this command FamilyType::PARSE::template validateCommand(cmdList.begin(), itorCmd); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueWriteBufferRectTest, WhenWritingBufferThenInterfaceDescriptorDataIsCorrect) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueWriteBufferRect2D(); // Extract the MIDL command auto itorCmd = find(itorPipelineSelect, itorWalker); ASSERT_NE(itorWalker, itorCmd); auto *cmdMIDL = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)*itorCmd; // Extract the SBA command itorCmd = find(cmdList.begin(), itorWalker); ASSERT_NE(itorWalker, itorCmd); auto *cmdSBA = (STATE_BASE_ADDRESS *)*itorCmd; // Extrach the DSH auto DSH = cmdSBA->getDynamicStateBaseAddress(); ASSERT_NE(0u, DSH); // IDD should be located within DSH auto iddStart = cmdMIDL->getInterfaceDescriptorDataStartAddress(); auto IDDEnd = iddStart + cmdMIDL->getInterfaceDescriptorTotalLength(); ASSERT_LE(IDDEnd, cmdSBA->getDynamicStateBufferSize() * MemoryConstants::pageSize); auto &IDD = *(INTERFACE_DESCRIPTOR_DATA *)cmdInterfaceDescriptorData; // Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value. auto kernelStartPointer = ((uint64_t)IDD.getKernelStartPointerHigh() << 32) + IDD.getKernelStartPointer(); EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize); EXPECT_NE(0u, IDD.getNumberOfThreadsInGpgpuThreadGroup()); EXPECT_NE(0u, IDD.getCrossThreadConstantDataReadLength()); EXPECT_NE(0u, IDD.getConstantIndirectUrbEntryReadLength()); } HWTEST_F(EnqueueWriteBufferRectTest, WhenWritingBufferThenOnePipelineSelectIsProgrammed) { enqueueWriteBufferRect2D(); int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); EXPECT_EQ(1, numCommands); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueWriteBufferRectTest, WhenWritingBufferThenMediaVfeStateIsCorrect) { enqueueWriteBufferRect2D(); validateMediaVFEState(&pDevice->getHardwareInfo(), cmdMediaVfeState, cmdList, itorMediaVfeState); } HWTEST_F(EnqueueWriteBufferRectTest, givenInOrderQueueAndDstPtrEqualSrcPtrWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { cl_int retVal = CL_SUCCESS; void *ptr = buffer->getCpuAddressForMemoryTransfer(); size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {50, 50, 1}; retVal = pCmdQ->enqueueWriteBufferRect( buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, ptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 0u); } HWTEST_F(EnqueueWriteBufferRectTest, givenOutOfOrderQueueAndDstPtrEqualSrcPtrWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { cl_int retVal = CL_SUCCESS; std::unique_ptr pCmdOOQ(createCommandQueue(pClDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)); void *ptr = buffer->getCpuAddressForMemoryTransfer(); size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {50, 50, 1}; retVal = pCmdOOQ->enqueueWriteBufferRect( buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, ptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdOOQ->taskLevel, 0u); } HWTEST_F(EnqueueWriteBufferRectTest, givenInOrderQueueAndDstPtrEqualSrcPtrWithEventsWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { cl_int retVal = CL_SUCCESS; uint32_t taskLevelCmdQ = 17; pCmdQ->taskLevel = taskLevelCmdQ; uint32_t taskLevelEvent1 = 8; uint32_t taskLevelEvent2 = 19; Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4); Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10); cl_event eventWaitList[] = { &event1, &event2}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {50, 50, 1}; void *ptr = buffer->getCpuAddressForMemoryTransfer(); retVal = pCmdQ->enqueueWriteBufferRect( buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, ptr, numEventsInWaitList, eventWaitList, &event); ; EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(19u, pEvent->taskLevel); EXPECT_EQ(19u, pCmdQ->taskLevel); EXPECT_EQ(CL_COMMAND_WRITE_BUFFER_RECT, (const int)pEvent->getCommandType()); pEvent->release(); } HWTEST_F(EnqueueWriteBufferRectTest, givenOutOfOrderQueueAndDstPtrEqualSrcPtrWithEventsWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { cl_int retVal = CL_SUCCESS; std::unique_ptr pCmdOOQ(createCommandQueue(pClDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)); UltCommandStreamReceiver &mockCsr = reinterpret_cast &>(pCmdOOQ->getGpgpuCommandStreamReceiver()); mockCsr.useNewResourceImplicitFlush = false; mockCsr.useGpuIdleImplicitFlush = false; uint32_t taskLevelCmdQ = 17; pCmdOOQ->taskLevel = taskLevelCmdQ; uint32_t taskLevelEvent1 = 8; uint32_t taskLevelEvent2 = 19; Event event1(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4); Event event2(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10); cl_event eventWaitList[] = { &event1, &event2}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {50, 50, 1}; void *ptr = buffer->getCpuAddressForMemoryTransfer(); retVal = pCmdOOQ->enqueueWriteBufferRect( buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, ptr, numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(19u, pEvent->taskLevel); EXPECT_EQ(19u, pCmdOOQ->taskLevel); EXPECT_EQ(CL_COMMAND_WRITE_BUFFER_RECT, (const int)pEvent->getCommandType()); pEvent->release(); } HWTEST_F(EnqueueWriteBufferRectTest, givenInOrderQueueAndRowPitchEqualZeroAndDstPtrEqualSrcPtrWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { cl_int retVal = CL_SUCCESS; void *ptr = buffer->getCpuAddressForMemoryTransfer(); size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {50, 50, 1}; retVal = pCmdQ->enqueueWriteBufferRect( buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, 0, slicePitch, 0, slicePitch, ptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 0u); } HWTEST_F(EnqueueWriteBufferRectTest, givenInOrderQueueAndSlicePitchEqualZeroAndDstPtrEqualSrcPtrWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { cl_int retVal = CL_SUCCESS; void *ptr = buffer->getCpuAddressForMemoryTransfer(); size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {50, 50, 1}; retVal = pCmdQ->enqueueWriteBufferRect( buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, rowPitch, 0, rowPitch, 0, ptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 0u); } HWTEST_F(EnqueueWriteBufferRectTest, givenInOrderQueueAndMemObjWithOffsetPointTheSameStorageWithHostWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { cl_int retVal = CL_SUCCESS; void *ptr = buffer->getCpuAddressForMemoryTransfer(); size_t bufferOrigin[] = {50, 50, 0}; size_t hostOrigin[] = {20, 20, 0}; size_t region[] = {50, 50, 1}; size_t hostOffset = (bufferOrigin[2] - hostOrigin[2]) * slicePitch + (bufferOrigin[1] - hostOrigin[1]) * rowPitch + (bufferOrigin[0] - hostOrigin[0]); auto hostStorage = ptrOffset(ptr, hostOffset); retVal = pCmdQ->enqueueWriteBufferRect( buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, hostStorage, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 0u); } HWTEST_F(EnqueueWriteBufferRectTest, givenInOrderQueueAndMemObjWithOffsetPointDiffrentStorageWithHostWhenWriteBufferIsExecutedThenTaskLevelShouldBeIncreased) { cl_int retVal = CL_SUCCESS; void *ptr = buffer->getCpuAddressForMemoryTransfer(); size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {1, 1, 0}; size_t region[] = {1, 1, 1}; retVal = pCmdQ->enqueueWriteBufferRect( buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, ptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 1u); } HWTEST_F(EnqueueWriteBufferRectTest, givenInOrderQueueAndDstPtrEqualSrcPtrAndNonZeroCopyBufferWhenWriteBufferIsExecutedThenTaskLevelShouldBeIncreased) { cl_int retVal = CL_SUCCESS; void *ptr = nonZeroCopyBuffer->getCpuAddressForMemoryTransfer(); size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {1, 1, 1}; retVal = pCmdQ->enqueueWriteBufferRect( nonZeroCopyBuffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, ptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 1u); } HWTEST_F(EnqueueReadWriteBufferRectDispatch, givenOffsetResultingInMisalignedPtrWhenEnqueueWriteBufferRectForNon3DCaseIsCalledThenAddressInStateBaseAddressIsAlignedAndMatchesKernelDispatchInfoParams) { hwInfo->capabilityTable.blitterOperationsSupported = false; initializeFixture(); const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(defaultHwInfo->platform.eProductFamily); if (compilerHwInfoConfig.isForceToStatelessRequired()) { GTEST_SKIP(); } auto cmdQ = std::make_unique>(context.get(), device.get(), &properties); buffer->forceDisallowCPUCopy = true; Vec3 hostOffset(hostOrigin); auto misalignedHostPtr = ptrOffset(reinterpret_cast(memory), hostOffset.z * hostSlicePitch); auto retVal = cmdQ->enqueueWriteBufferRect(buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, memory, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(0u, cmdQ->lastEnqueuedKernels.size()); Kernel *kernel = cmdQ->lastEnqueuedKernels[0]; cmdQ->finish(); parseCommands(*cmdQ); auto &kernelInfo = kernel->getKernelInfo(); if (hwInfo->capabilityTable.gpuAddressSpace == MemoryConstants::max48BitAddress) { const auto &surfaceState = getSurfaceState(&cmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), 0); if (kernelInfo.getArgDescriptorAt(0).as().pointerSize == sizeof(uint64_t)) { auto pKernelArg = (uint64_t *)(kernel->getCrossThreadData() + kernelInfo.getArgDescriptorAt(0).as().stateless); EXPECT_EQ(reinterpret_cast(alignDown(misalignedHostPtr, 4)), *pKernelArg); EXPECT_EQ(*pKernelArg, surfaceState.getSurfaceBaseAddress()); } else if (kernelInfo.getArgDescriptorAt(0).as().pointerSize == sizeof(uint32_t)) { auto pKernelArg = (uint32_t *)(kernel->getCrossThreadData() + kernelInfo.getArgDescriptorAt(0).as().stateless); EXPECT_EQ(reinterpret_cast(alignDown(misalignedHostPtr, 4)), static_cast(*pKernelArg)); EXPECT_EQ(static_cast(*pKernelArg), surfaceState.getSurfaceBaseAddress()); } } if (kernelInfo.getArgDescriptorAt(2).as().elements[0].size == 4 * sizeof(uint32_t)) { // size of uint4 SrcOrigin auto dstOffset = (uint32_t *)(kernel->getCrossThreadData() + kernelInfo.getArgDescriptorAt(2).as().elements[0].offset); EXPECT_EQ(hostOffset.x + ptrDiff(misalignedHostPtr, alignDown(misalignedHostPtr, 4)), *dstOffset); } else { // SrcOrigin arg should be 16 bytes in size, if that changes, above if path should be modified EXPECT_TRUE(false); } } using NegativeFailAllocationTest = Test; HWTEST_F(NegativeFailAllocationTest, givenEnqueueWriteBufferRectWhenHostPtrAllocationCreationFailsThenReturnOutOfResource) { cl_int retVal = CL_SUCCESS; size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {50, 50, 1}; constexpr size_t rowPitch = 100; constexpr size_t slicePitch = 100 * 100; retVal = pCmdQ->enqueueWriteBufferRect( buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, ptr, 0, nullptr, nullptr); EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal); } struct EnqueueWriteBufferRectHw : public ::testing::Test { void SetUp() override { if (is32bit) { GTEST_SKIP(); } device = std::make_unique(MockClDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); context.reset(new MockContext(device.get())); } std::unique_ptr device; std::unique_ptr context; MockBuffer srcBuffer; size_t bufferOrigin[3] = {0, 0, 0}; size_t hostOrigin[3] = {0, 0, 0}; size_t region[3] = {1, 1, 1}; size_t bufferRowPitch = 10; size_t bufferSlicePitch = 0; size_t hostRowPitch = 10; size_t hostSlicePitch = 10; uint64_t bigSize = 4ull * MemoryConstants::gigaByte; uint64_t smallSize = 4ull * MemoryConstants::gigaByte - 1; }; using EnqueueWriteBufferRectStatelessTest = EnqueueWriteBufferRectHw; HWTEST_F(EnqueueWriteBufferRectStatelessTest, WhenWritingBufferRectStatelessThenSuccessIsReturned) { auto pCmdQ = std::make_unique>(context.get(), device.get()); void *missAlignedPtr = reinterpret_cast(0x1041); srcBuffer.size = static_cast(bigSize); auto retVal = pCmdQ->enqueueWriteBufferRect(&srcBuffer, CL_FALSE, bufferOrigin, hostOrigin, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, missAlignedPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } using EnqueueWriteBufferRectStatefulTest = EnqueueWriteBufferRectHw; HWTEST_F(EnqueueWriteBufferRectStatefulTest, WhenWritingBufferRectStatefulThenSuccessIsReturned) { auto pCmdQ = std::make_unique>(context.get(), device.get()); void *missAlignedPtr = reinterpret_cast(0x1041); srcBuffer.size = static_cast(smallSize); auto retVal = pCmdQ->enqueueWriteBufferRect(&srcBuffer, CL_FALSE, bufferOrigin, hostOrigin, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, missAlignedPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(EnqueueWriteBufferRectHw, givenHostPtrIsFromMappedBufferWhenWriteBufferRectIsCalledThenReuseGraphicsAllocation) { DebugManagerStateRestore restore{}; DebugManager.flags.DisableZeroCopyForBuffers.set(1); MockCommandQueueHw queue(context.get(), device.get(), nullptr); auto &csr = device->getUltCommandStreamReceiver(); BufferDefaults::context = context.get(); auto bufferForMap = clUniquePtr(BufferHelper<>::create()); auto bufferForWrite = clUniquePtr(BufferHelper<>::create()); cl_int retVal{}; void *mappedPtr = queue.enqueueMapBuffer(bufferForMap.get(), CL_TRUE, CL_MAP_READ, 0, bufferForMap->getSize(), 0, nullptr, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled); MapOperationsHandler *mapOperationsHandler = context->getMapOperationsStorage().getHandlerIfExists(bufferForMap.get()); EXPECT_NE(nullptr, mapOperationsHandler); MapInfo mapInfo{}; EXPECT_TRUE(mapOperationsHandler->find(mappedPtr, mapInfo)); EXPECT_NE(nullptr, mapInfo.graphicsAllocation); auto unmappedPtr = std::make_unique(bufferForWrite->getSize()); retVal = queue.enqueueWriteBufferRect(bufferForWrite.get(), CL_TRUE, bufferOrigin, hostOrigin, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, unmappedPtr.get(), 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, csr.createAllocationForHostSurfaceCalled); retVal = queue.enqueueWriteBufferRect(bufferForWrite.get(), CL_TRUE, bufferOrigin, hostOrigin, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, mappedPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, csr.createAllocationForHostSurfaceCalled); } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_write_buffer_tests.cpp000066400000000000000000000667651422164147700326350ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/source/helpers/local_memory_access_modes.h" #include "shared/source/memory_manager/allocations_list.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/test/unit_test/command_queue/buffer_operations_fixture.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/gen_common/gen_commands_common_validation.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "reg_configs_common.h" using namespace NEO; HWTEST_F(EnqueueWriteBufferTypeTest, GivenNullBufferWhenWrtingBufferThenInvalidMemObjectErrorIsReturned) { auto data = 1; auto retVal = clEnqueueWriteBuffer( pCmdQ, nullptr, false, 0, sizeof(data), &data, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } HWTEST_F(EnqueueWriteBufferTypeTest, GivenNullUserPointerWhenWritingBufferThenInvalidValueErrorIsReturned) { auto data = 1; auto retVal = clEnqueueWriteBuffer( pCmdQ, srcBuffer.get(), false, 0, sizeof(data), nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } HWTEST_F(EnqueueWriteBufferTypeTest, GivenBlockingEnqueueWhenWritingBufferThenTaskLevelIsNotIncremented) { //this test case assumes IOQ auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = pCmdQ->taskCount + 100; csr.taskLevel = pCmdQ->taskLevel + 50; auto oldCsrTaskLevel = csr.peekTaskLevel(); srcBuffer->forceDisallowCPUCopy = true; EnqueueWriteBufferHelper<>::enqueueWriteBuffer(pCmdQ, srcBuffer.get(), CL_TRUE); EXPECT_EQ(csr.peekTaskCount(), pCmdQ->taskCount); EXPECT_EQ(oldCsrTaskLevel, pCmdQ->taskLevel); } HWTEST_F(EnqueueWriteBufferTypeTest, GivenGpuHangAndBlockingEnqueueWhenWritingBufferThenOutOfResourcesIsReturned) { std::unique_ptr device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment(nullptr)}); cl_queue_properties props = {}; MockCommandQueueHw mockCommandQueueHw(context, device.get(), &props); mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang; srcBuffer->forceDisallowCPUCopy = true; const auto enqueueResult = EnqueueWriteBufferHelper<>::enqueueWriteBuffer(&mockCommandQueueHw, srcBuffer.get(), CL_TRUE); EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueResult); EXPECT_EQ(1, mockCommandQueueHw.waitForAllEnginesCalledCount); } HWTEST_F(EnqueueWriteBufferTypeTest, GivenNonBlockingEnqueueWhenWritingBufferThenTaskLevelIsIncremented) { //this test case assumes IOQ auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = pCmdQ->taskCount + 100; csr.taskLevel = pCmdQ->taskLevel + 50; EnqueueWriteBufferHelper<>::enqueueWriteBuffer(pCmdQ, srcBuffer.get(), CL_FALSE); EXPECT_EQ(csr.peekTaskCount(), pCmdQ->taskCount); EXPECT_EQ(csr.peekTaskLevel(), pCmdQ->taskLevel + 1); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueWriteBufferTypeTest, WhenWritingBufferThenCommandsAreProgrammedCorrectly) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; srcBuffer->forceDisallowCPUCopy = true; enqueueWriteBuffer(); ASSERT_NE(cmdList.end(), itorWalker); auto *cmd = (GPGPU_WALKER *)*itorWalker; // Verify GPGPU_WALKER parameters EXPECT_NE(0u, cmd->getThreadGroupIdXDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdYDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdZDimension()); EXPECT_NE(0u, cmd->getRightExecutionMask()); EXPECT_NE(0u, cmd->getBottomExecutionMask()); EXPECT_EQ(GPGPU_WALKER::SIMD_SIZE_SIMD32, cmd->getSimdSize()); EXPECT_NE(0u, cmd->getIndirectDataLength()); EXPECT_FALSE(cmd->getIndirectParameterEnable()); // Compute the SIMD lane mask size_t simd = cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD32 ? 32 : cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD16 ? 16 : 8; uint64_t simdMask = maxNBitValue(simd); // Mask off lanes based on the execution masks auto laneMaskRight = cmd->getRightExecutionMask() & simdMask; auto lanesPerThreadX = 0; while (laneMaskRight) { lanesPerThreadX += laneMaskRight & 1; laneMaskRight >>= 1; } } HWTEST_F(EnqueueWriteBufferTypeTest, WhenWritingBufferThenTaskLevelIsIncremented) { auto taskLevelBefore = pCmdQ->taskLevel; srcBuffer->forceDisallowCPUCopy = true; EnqueueWriteBufferHelper<>::enqueueWriteBuffer(pCmdQ, srcBuffer.get(), EnqueueWriteBufferTraits::blocking); EXPECT_GT(pCmdQ->taskLevel, taskLevelBefore); } HWTEST_F(EnqueueWriteBufferTypeTest, WhenWritingBufferThenCommandsAreAdded) { auto usedCmdBufferBefore = pCS->getUsed(); srcBuffer->forceDisallowCPUCopy = true; EnqueueWriteBufferHelper<>::enqueueWriteBuffer(pCmdQ, srcBuffer.get(), EnqueueWriteBufferTraits::blocking); EXPECT_NE(usedCmdBufferBefore, pCS->getUsed()); } HWTEST_F(EnqueueWriteBufferTypeTest, WhenWritingBufferThenIndirectDataIsAdded) { auto dshBefore = pDSH->getUsed(); auto iohBefore = pIOH->getUsed(); auto sshBefore = pSSH->getUsed(); srcBuffer->forceDisallowCPUCopy = true; EnqueueWriteBufferHelper<>::enqueueWriteBuffer(pCmdQ, srcBuffer.get(), EnqueueWriteBufferTraits::blocking); auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, pCmdQ->getClDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcPtr = EnqueueWriteBufferTraits::hostPtr; dc.dstMemObj = srcBuffer.get(); dc.dstOffset = {EnqueueWriteBufferTraits::offset, 0, 0}; dc.size = {srcBuffer->getSize(), 0, 0}; MultiDispatchInfo multiDispatchInfo(dc); builder.buildDispatchInfos(multiDispatchInfo); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); auto kernelDescriptor = &kernel->getKernelInfo().kernelDescriptor; EXPECT_TRUE(UnitTestHelper::evaluateDshUsage(dshBefore, pDSH->getUsed(), kernelDescriptor, rootDeviceIndex)); EXPECT_NE(iohBefore, pIOH->getUsed()); if (kernel->usesBindfulAddressingForBuffers()) { EXPECT_NE(sshBefore, pSSH->getUsed()); } } HWTEST_F(EnqueueWriteBufferTypeTest, WhenWritingBufferThenL3ProgrammingIsCorrect) { srcBuffer->forceDisallowCPUCopy = true; enqueueWriteBuffer(); validateL3Programming(cmdList, itorWalker); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueWriteBufferTypeTest, WhenEnqueueIsDoneThenStateBaseAddressIsProperlyProgrammed) { srcBuffer->forceDisallowCPUCopy = true; enqueueWriteBuffer(); auto &ultCsr = this->pDevice->getUltCommandStreamReceiver(); auto &hwHelper = HwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily); validateStateBaseAddress(ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex, pIOH->getGraphicsAllocation()->isAllocatedInLocalMemoryPool()), ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex, !hwHelper.useSystemMemoryPlacementForISA(pDevice->getHardwareInfo())), pDSH, pIOH, pSSH, itorPipelineSelect, itorWalker, cmdList, 0llu); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueWriteBufferTypeTest, WhenWritingBufferThenMediaInterfaceDescriptorIsCorrect) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; srcBuffer->forceDisallowCPUCopy = true; enqueueWriteBuffer(); // All state should be programmed before walker auto itorCmd = find(itorPipelineSelect, itorWalker); ASSERT_NE(itorWalker, itorCmd); auto *cmd = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)*itorCmd; // Verify we have a valid length -- multiple of INTERFACE_DESCRIPTOR_DATAs EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % sizeof(INTERFACE_DESCRIPTOR_DATA)); // Validate the start address size_t alignmentStartAddress = 64 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorDataStartAddress() % alignmentStartAddress); // Validate the length EXPECT_NE(0u, cmd->getInterfaceDescriptorTotalLength()); size_t alignmentTotalLength = 32 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % alignmentTotalLength); // Generically validate this command FamilyType::PARSE::template validateCommand(cmdList.begin(), itorCmd); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueWriteBufferTypeTest, WhenWritingBufferThenInterfaceDescriptorDataIsCorrect) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; srcBuffer->forceDisallowCPUCopy = true; enqueueWriteBuffer(); // Extract the MIDL command auto itorCmd = find(itorPipelineSelect, itorWalker); ASSERT_NE(itorWalker, itorCmd); auto *cmdMIDL = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)*itorCmd; // Extract the SBA command itorCmd = find(cmdList.begin(), itorWalker); ASSERT_NE(itorWalker, itorCmd); auto *cmdSBA = (STATE_BASE_ADDRESS *)*itorCmd; // Extrach the DSH auto DSH = cmdSBA->getDynamicStateBaseAddress(); ASSERT_NE(0u, DSH); // IDD should be located within DSH auto iddStart = cmdMIDL->getInterfaceDescriptorDataStartAddress(); auto IDDEnd = iddStart + cmdMIDL->getInterfaceDescriptorTotalLength(); ASSERT_LE(IDDEnd, cmdSBA->getDynamicStateBufferSize() * MemoryConstants::pageSize); auto &IDD = *(INTERFACE_DESCRIPTOR_DATA *)cmdInterfaceDescriptorData; // Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value. auto kernelStartPointer = ((uint64_t)IDD.getKernelStartPointerHigh() << 32) + IDD.getKernelStartPointer(); EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize); EXPECT_NE(0u, IDD.getNumberOfThreadsInGpgpuThreadGroup()); EXPECT_NE(0u, IDD.getCrossThreadConstantDataReadLength()); EXPECT_NE(0u, IDD.getConstantIndirectUrbEntryReadLength()); } HWTEST_F(EnqueueWriteBufferTypeTest, WhenWritingBufferThenOnePipelineSelectIsProgrammed) { srcBuffer->forceDisallowCPUCopy = true; enqueueWriteBuffer(); int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); EXPECT_EQ(1, numCommands); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueWriteBufferTypeTest, WhenWritingBufferThenMediaVfeStateIsCorrect) { srcBuffer->forceDisallowCPUCopy = true; enqueueWriteBuffer(); validateMediaVFEState(&pDevice->getHardwareInfo(), cmdMediaVfeState, cmdList, itorMediaVfeState); } HWTEST_F(EnqueueWriteBufferTypeTest, givenOOQWithEnabledSupportCpuCopiesAndDstPtrEqualSrcPtrAndZeroCopyBufferTrueWhenWriteBufferIsExecutedThenTaskLevelNotIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnWriteBuffer.set(1); cl_int retVal = CL_SUCCESS; std::unique_ptr pCmdOOQ(createCommandQueue(pClDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)); void *ptr = zeroCopyBuffer->getCpuAddressForMemoryTransfer(); EXPECT_EQ(retVal, CL_SUCCESS); retVal = pCmdOOQ->enqueueWriteBuffer(zeroCopyBuffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdOOQ->taskLevel, 0u); pCmdOOQ->flush(); } HWTEST_F(EnqueueWriteBufferTypeTest, givenOOQWithDisabledSupportCpuCopiesAndDstPtrEqualSrcPtrZeroCopyBufferWhenWriteBufferIsExecutedThenTaskLevelNotIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnWriteBuffer.set(0); cl_int retVal = CL_SUCCESS; std::unique_ptr pCmdOOQ(createCommandQueue(pClDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)); UltCommandStreamReceiver &mockCsr = reinterpret_cast &>(pCmdOOQ->getGpgpuCommandStreamReceiver()); mockCsr.useNewResourceImplicitFlush = false; mockCsr.useGpuIdleImplicitFlush = false; void *ptr = srcBuffer->getCpuAddressForMemoryTransfer(); EXPECT_EQ(retVal, CL_SUCCESS); retVal = pCmdOOQ->enqueueWriteBuffer(zeroCopyBuffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, pCmdOOQ->taskLevel); pCmdOOQ->flush(); } HWTEST_F(EnqueueWriteBufferTypeTest, givenInOrderQueueAndEnabledSupportCpuCopiesAndDstPtrZeroCopyBufferEqualSrcPtrWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnWriteBuffer.set(1); cl_int retVal = CL_SUCCESS; void *ptr = zeroCopyBuffer->getCpuAddressForMemoryTransfer(); EXPECT_EQ(retVal, CL_SUCCESS); retVal = pCmdQ->enqueueWriteBuffer(zeroCopyBuffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 0u); } HWTEST_F(EnqueueWriteBufferTypeTest, givenInOrderQueueAndDisabledSupportCpuCopiesAndDstPtrZeroCopyBufferEqualSrcPtrWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnWriteBuffer.set(0); cl_int retVal = CL_SUCCESS; void *ptr = zeroCopyBuffer->getCpuAddressForMemoryTransfer(); EXPECT_EQ(retVal, CL_SUCCESS); retVal = pCmdQ->enqueueWriteBuffer(zeroCopyBuffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 0u); } HWTEST_F(EnqueueWriteBufferTypeTest, givenInOrderQueueAndDisabledSupportCpuCopiesAndDstPtrZeroCopyBufferEqualSrcPtrWhenWriteBufferIsExecutedThenTaskLevelShouldBeIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnWriteBuffer.set(0); cl_int retVal = CL_SUCCESS; void *ptr = srcBuffer->getCpuAddressForMemoryTransfer(); EXPECT_EQ(retVal, CL_SUCCESS); retVal = pCmdQ->enqueueWriteBuffer(srcBuffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 1u); } HWTEST_F(EnqueueWriteBufferTypeTest, givenInOrderQueueAndEnabledSupportCpuCopiesAndDstPtrNonZeroCopyBufferEqualSrcPtrWhenWriteBufferIsExecutedThenTaskLevelShouldBeIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnWriteBuffer.set(1); cl_int retVal = CL_SUCCESS; void *ptr = srcBuffer->getCpuAddressForMemoryTransfer(); EXPECT_EQ(retVal, CL_SUCCESS); retVal = pCmdQ->enqueueWriteBuffer(srcBuffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 1u); } HWTEST_F(EnqueueWriteBufferTypeTest, givenEnqueueWriteBufferCalledWhenLockedPtrInTransferPropertisIsAvailableThenItIsNotUnlocked) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnWriteBuffer.set(1); MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, true, executionEnvironment); MockContext ctx; cl_int retVal; ctx.memoryManager = &memoryManager; auto mockCmdQ = std::make_unique>(context, pClDevice, nullptr); std::unique_ptr buffer(Buffer::create(&ctx, 0, 1, nullptr, retVal)); static_cast(buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex()))->overrideMemoryPool(MemoryPool::SystemCpuInaccessible); void *ptr = srcBuffer->getCpuAddressForMemoryTransfer(); retVal = mockCmdQ->enqueueWriteBuffer(buffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, memoryManager.unlockResourceCalled); } HWTEST_F(EnqueueWriteBufferTypeTest, givenForcedCpuCopyWhenEnqueueWriteCompressedBufferThenDontCopyOnCpu) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnWriteBuffer.set(1); DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast(LocalMemoryAccessMode::Default)); MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, true, executionEnvironment); MockContext ctx; cl_int retVal; ctx.memoryManager = &memoryManager; auto mockCmdQ = std::make_unique>(context, pClDevice, nullptr); std::unique_ptr buffer(Buffer::create(&ctx, 0, 1, nullptr, retVal)); auto allocation = static_cast(buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())); allocation->overrideMemoryPool(MemoryPool::SystemCpuInaccessible); void *ptr = srcBuffer->getCpuAddressForMemoryTransfer(); MockBuffer::setAllocationType(allocation, pDevice->getRootDeviceEnvironment().getGmmClientContext(), true); retVal = mockCmdQ->enqueueWriteBuffer(buffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(allocation->isLocked()); EXPECT_FALSE(mockCmdQ->cpuDataTransferHandlerCalled); MockBuffer::setAllocationType(allocation, pDevice->getRootDeviceEnvironment().getGmmClientContext(), false); retVal = mockCmdQ->enqueueWriteBuffer(buffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(allocation->isLocked()); EXPECT_TRUE(mockCmdQ->cpuDataTransferHandlerCalled); } HWTEST_F(EnqueueWriteBufferTypeTest, givenEnqueueWriteBufferCalledWhenLockedPtrInTransferPropertisIsNotAvailableThenItIsNotUnlocked) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnWriteBuffer.set(1); MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, true, executionEnvironment); MockContext ctx; cl_int retVal; ctx.memoryManager = &memoryManager; auto mockCmdQ = std::make_unique>(context, pClDevice, nullptr); std::unique_ptr buffer(Buffer::create(&ctx, 0, 1, nullptr, retVal)); static_cast(buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex()))->overrideMemoryPool(MemoryPool::System4KBPages); void *ptr = srcBuffer->getCpuAddressForMemoryTransfer(); retVal = mockCmdQ->enqueueWriteBuffer(buffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, memoryManager.unlockResourceCalled); } using NegativeFailAllocationTest = Test; HWTEST_F(NegativeFailAllocationTest, givenEnqueueWriteBufferWhenHostPtrAllocationCreationFailsThenReturnOutOfResource) { cl_int retVal = CL_SUCCESS; retVal = pCmdQ->enqueueWriteBuffer(buffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal); } struct EnqueueWriteBufferHw : public ::testing::Test { void SetUp() override { if (is32bit) { GTEST_SKIP(); } device = std::make_unique(MockClDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); context.reset(new MockContext(device.get())); } std::unique_ptr device; std::unique_ptr context; MockBuffer srcBuffer; uint64_t bigSize = 4ull * MemoryConstants::gigaByte; uint64_t smallSize = 4ull * MemoryConstants::gigaByte - 1; }; using EnqueueReadWriteStatelessTest = EnqueueWriteBufferHw; HWTEST_F(EnqueueReadWriteStatelessTest, WhenWritingBufferStatelessThenSuccessIsReturned) { auto pCmdQ = std::make_unique>(context.get(), device.get()); void *missAlignedPtr = reinterpret_cast(0x1041); srcBuffer.size = static_cast(bigSize); auto retVal = pCmdQ->enqueueWriteBuffer(&srcBuffer, CL_FALSE, 0, MemoryConstants::cacheLineSize, missAlignedPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } using EnqueueWriteBufferStatefulTest = EnqueueWriteBufferHw; HWTEST_F(EnqueueWriteBufferStatefulTest, WhenWritingBufferStatefulThenSuccessIsReturned) { auto pCmdQ = std::make_unique>(context.get(), device.get()); void *missAlignedPtr = reinterpret_cast(0x1041); srcBuffer.size = static_cast(smallSize); auto retVal = pCmdQ->enqueueWriteBuffer(&srcBuffer, CL_FALSE, 0, MemoryConstants::cacheLineSize, missAlignedPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(EnqueueWriteBufferHw, givenHostPtrIsFromMappedBufferWhenWriteBufferIsCalledThenReuseGraphicsAllocation) { DebugManagerStateRestore restore{}; DebugManager.flags.DisableZeroCopyForBuffers.set(1); DebugManager.flags.DoCpuCopyOnWriteBuffer.set(0); MockCommandQueueHw queue(context.get(), device.get(), nullptr); auto &csr = device->getUltCommandStreamReceiver(); BufferDefaults::context = context.get(); auto bufferForMap = clUniquePtr(BufferHelper<>::create()); auto bufferForRead = clUniquePtr(BufferHelper<>::create()); cl_int retVal{}; void *mappedPtr = queue.enqueueMapBuffer(bufferForMap.get(), CL_TRUE, CL_MAP_READ, 0, bufferForMap->getSize(), 0, nullptr, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled); MapOperationsHandler *mapOperationsHandler = context->getMapOperationsStorage().getHandlerIfExists(bufferForMap.get()); EXPECT_NE(nullptr, mapOperationsHandler); MapInfo mapInfo{}; EXPECT_TRUE(mapOperationsHandler->find(mappedPtr, mapInfo)); EXPECT_NE(nullptr, mapInfo.graphicsAllocation); auto unmappedPtr = std::make_unique(bufferForRead->getSize()); retVal = queue.enqueueWriteBuffer(bufferForRead.get(), CL_TRUE, 0, bufferForRead->getSize(), unmappedPtr.get(), nullptr, 0, 0, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, csr.createAllocationForHostSurfaceCalled); retVal = queue.enqueueWriteBuffer(bufferForRead.get(), CL_TRUE, 0, bufferForRead->getSize(), mappedPtr, nullptr, 0, 0, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, csr.createAllocationForHostSurfaceCalled); } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_write_image_fixture.h000066400000000000000000000036571422164147700324260ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/ptr_math.h" #include "shared/test/common/test_macros/test_checks_shared.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" namespace NEO { struct EnqueueWriteImageTest : public CommandEnqueueFixture, public ::testing::Test { void SetUp(void) override { REQUIRE_IMAGES_OR_SKIP(defaultHwInfo); CommandEnqueueFixture::SetUp(); context = new MockContext(pClDevice); dstImage = Image2dHelper<>::create(context); dstAllocation = dstImage->getGraphicsAllocation(pClDevice->getRootDeviceIndex()); const auto &imageDesc = dstImage->getImageDesc(); srcPtr = new float[imageDesc.image_width * imageDesc.image_height]; } void TearDown(void) override { if (IsSkipped()) { return; } delete dstImage; delete[] srcPtr; delete context; CommandEnqueueFixture::TearDown(); } protected: template void enqueueWriteImage(cl_bool blocking = EnqueueWriteImageTraits::blocking) { auto retVal = EnqueueWriteImageHelper<>::enqueueWriteImage( pCmdQ, dstImage, blocking); EXPECT_EQ(CL_SUCCESS, retVal); parseCommands(*pCmdQ); } float *srcPtr = nullptr; Image *dstImage = nullptr; GraphicsAllocation *dstAllocation = nullptr; MockContext *context = nullptr; }; struct EnqueueWriteImageMipMapTest : public EnqueueWriteImageTest, public ::testing::WithParamInterface { }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/enqueue_write_image_tests.cpp000066400000000000000000001103121422164147700324200ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/wait_status.h" #include "shared/source/memory_manager/allocations_list.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/migration_sync_data.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/mocks/mock_builtins.h" #include "shared/test/common/mocks/mock_gmm_resource_info.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/test/unit_test/command_queue/enqueue_write_image_fixture.h" #include "opencl/test/unit_test/fixtures/one_mip_level_image_fixture.h" #include "opencl/test/unit_test/gen_common/gen_commands_common_validation.h" #include "opencl/test/unit_test/mocks/mock_builtin_dispatch_info_builder.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "reg_configs_common.h" using namespace NEO; HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueWriteImageTest, WhenWritingImageThenCommandsAreProgrammedCorrectly) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; enqueueWriteImage(); auto *cmd = reinterpret_cast(cmdWalker); ASSERT_NE(nullptr, cmd); // Verify GPGPU_WALKER parameters EXPECT_NE(0u, cmd->getThreadGroupIdXDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdYDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdZDimension()); EXPECT_NE(0u, cmd->getRightExecutionMask()); EXPECT_NE(0u, cmd->getBottomExecutionMask()); EXPECT_EQ(GPGPU_WALKER::SIMD_SIZE_SIMD32, cmd->getSimdSize()); EXPECT_NE(0u, cmd->getIndirectDataLength()); EXPECT_FALSE(cmd->getIndirectParameterEnable()); // Compute the SIMD lane mask size_t simd = cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD32 ? 32 : cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD16 ? 16 : 8; uint64_t simdMask = maxNBitValue(simd); // Mask off lanes based on the execution masks auto laneMaskRight = cmd->getRightExecutionMask() & simdMask; auto lanesPerThreadX = 0; while (laneMaskRight) { lanesPerThreadX += laneMaskRight & 1; laneMaskRight >>= 1; } } HWTEST_F(EnqueueWriteImageTest, GivenBlockingEnqueueWhenWritingImageThenTaskLevelIsNotIncremented) { //this test case assumes IOQ auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = pCmdQ->taskCount + 100; csr.taskLevel = pCmdQ->taskLevel + 50; auto oldCsrTaskLevel = csr.peekTaskLevel(); EnqueueWriteImageHelper<>::enqueueWriteImage(pCmdQ, dstImage, CL_TRUE); EXPECT_EQ(csr.peekTaskCount(), pCmdQ->taskCount); EXPECT_EQ(oldCsrTaskLevel, pCmdQ->taskLevel); } HWTEST_F(EnqueueWriteImageTest, GivenGpuHangAndBlockingEnqueueWhenWritingImageThenOutOfResourcesIsReturned) { std::unique_ptr device(new MockClDevice{MockClDevice::createWithNewExecutionEnvironment(nullptr)}); cl_queue_properties props = {}; MockCommandQueueHw mockCommandQueueHw(context, device.get(), &props); mockCommandQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang; const auto enqueueResult = EnqueueWriteImageHelper<>::enqueueWriteImage(&mockCommandQueueHw, dstImage, CL_TRUE); EXPECT_EQ(CL_OUT_OF_RESOURCES, enqueueResult); EXPECT_EQ(1, mockCommandQueueHw.waitForAllEnginesCalledCount); } HWTEST_F(EnqueueWriteImageTest, GivenNonBlockingEnqueueWhenWritingImageThenTaskLevelIsIncremented) { //this test case assumes IOQ auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = pCmdQ->taskCount + 100; csr.taskLevel = pCmdQ->taskLevel + 50; EnqueueWriteImageHelper<>::enqueueWriteImage(pCmdQ, dstImage, CL_FALSE); EXPECT_EQ(csr.peekTaskCount(), pCmdQ->taskCount); EXPECT_EQ(csr.peekTaskLevel(), pCmdQ->taskLevel + 1); } HWTEST_F(EnqueueWriteImageTest, WhenWritingImageThenTaskLevelIsIncremented) { auto taskLevelBefore = pCmdQ->taskLevel; EnqueueWriteImageHelper<>::enqueueWriteImage(pCmdQ, dstImage, EnqueueWriteImageTraits::blocking); EXPECT_GT(pCmdQ->taskLevel, taskLevelBefore); } HWTEST_F(EnqueueWriteImageTest, WhenWritingImageThenCommandsAreAdded) { auto usedCmdBufferBefore = pCS->getUsed(); EnqueueWriteImageHelper<>::enqueueWriteImage(pCmdQ, dstImage, EnqueueWriteImageTraits::blocking); EXPECT_NE(usedCmdBufferBefore, pCS->getUsed()); } HWTEST_F(EnqueueWriteImageTest, WhenWritingImageThenIndirectDataIsAdded) { auto dshBefore = pDSH->getUsed(); auto iohBefore = pIOH->getUsed(); auto sshBefore = pSSH->getUsed(); EnqueueWriteImageHelper<>::enqueueWriteImage(pCmdQ, dstImage, EnqueueWriteImageTraits::blocking); EXPECT_TRUE(UnitTestHelper::evaluateDshUsage(dshBefore, pDSH->getUsed(), nullptr, rootDeviceIndex)); EXPECT_NE(iohBefore, pIOH->getUsed()); EXPECT_NE(sshBefore, pSSH->getUsed()); } HWTEST_F(EnqueueWriteImageTest, WhenWritingImageThenL3ProgrammingIsCorrect) { enqueueWriteImage(); validateL3Programming(cmdList, itorWalker); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueWriteImageTest, WhenEnqueueIsDoneThenStateBaseAddressIsProperlyProgrammed) { enqueueWriteImage(); auto &ultCsr = this->pDevice->getUltCommandStreamReceiver(); auto &hwHelper = HwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily); validateStateBaseAddress(ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex, pIOH->getGraphicsAllocation()->isAllocatedInLocalMemoryPool()), ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex, !hwHelper.useSystemMemoryPlacementForISA(pDevice->getHardwareInfo())), pDSH, pIOH, pSSH, itorPipelineSelect, itorWalker, cmdList, 0llu); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueWriteImageTest, WhenWritingImageThenMediaInterfaceDescriptorIsCorrect) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueWriteImage(); // All state should be programmed before walker auto cmd = reinterpret_cast(cmdMediaInterfaceDescriptorLoad); ASSERT_NE(nullptr, cmd); // Verify we have a valid length -- multiple of INTERFACE_DESCRIPTOR_DATAs EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % sizeof(INTERFACE_DESCRIPTOR_DATA)); // Validate the start address size_t alignmentStartAddress = 64 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorDataStartAddress() % alignmentStartAddress); // Validate the length EXPECT_NE(0u, cmd->getInterfaceDescriptorTotalLength()); size_t alignmentTotalLength = 32 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % alignmentTotalLength); // Generically validate this command FamilyType::PARSE::template validateCommand(cmdList.begin(), itorMediaInterfaceDescriptorLoad); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueWriteImageTest, WhenWritingImageThenInterfaceDescriptorDataIsCorrect) { typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueWriteImage(); // Extract the interfaceDescriptorData auto cmdSBA = (STATE_BASE_ADDRESS *)cmdStateBaseAddress; auto &interfaceDescriptorData = *(INTERFACE_DESCRIPTOR_DATA *)cmdInterfaceDescriptorData; // Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value. auto kernelStartPointer = ((uint64_t)interfaceDescriptorData.getKernelStartPointerHigh() << 32) + interfaceDescriptorData.getKernelStartPointer(); EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize); // EnqueueWriteImage uses a byte copy. Need to convert to bytes. auto localWorkSize = 2 * 2 * sizeof(float); auto simd = 32; auto numThreadsPerThreadGroup = Math::divideAndRoundUp(localWorkSize, simd); EXPECT_EQ(numThreadsPerThreadGroup, interfaceDescriptorData.getNumberOfThreadsInGpgpuThreadGroup()); EXPECT_NE(0u, interfaceDescriptorData.getCrossThreadConstantDataReadLength()); EXPECT_NE(0u, interfaceDescriptorData.getConstantIndirectUrbEntryReadLength()); // We shouldn't have these pointers the same. EXPECT_NE(kernelStartPointer, interfaceDescriptorData.getBindingTablePointer()); } HWTEST_F(EnqueueWriteImageTest, WhenWritingImageThenSurfaceStateIsProgrammedCorrectly) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto mockCmdQ = std::make_unique>(context, pClDevice, nullptr); VariableBackup cmdQBackup(&pCmdQ, mockCmdQ.get()); mockCmdQ->storeMultiDispatchInfo = true; enqueueWriteImage(); auto index = mockCmdQ->storedMultiDispatchInfo.begin()->getKernel()->getKernelInfo().getArgDescriptorAt(1).template as().bindful / sizeof(RENDER_SURFACE_STATE); const auto &surfaceState = getSurfaceState(&pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), static_cast(index)); // EnqueueWriteImage uses multi-byte copies depending on per-pixel-size-in-bytes const auto &imageDesc = dstImage->getImageDesc(); EXPECT_EQ(imageDesc.image_width, surfaceState.getWidth()); EXPECT_EQ(imageDesc.image_height, surfaceState.getHeight()); EXPECT_NE(0u, surfaceState.getSurfacePitch()); EXPECT_NE(0u, surfaceState.getSurfaceType()); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_FORMAT_R32_UINT, surfaceState.getSurfaceFormat()); EXPECT_EQ(MockGmmResourceInfo::getHAlignSurfaceStateResult, surfaceState.getSurfaceHorizontalAlignment()); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4, surfaceState.getSurfaceVerticalAlignment()); EXPECT_EQ(dstAllocation->getGpuAddress(), surfaceState.getSurfaceBaseAddress()); } HWTEST_F(EnqueueWriteImageTest, WhenWritingImageThenOnePipelineSelectIsProgrammed) { enqueueWriteImage(); int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); EXPECT_EQ(1, numCommands); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueWriteImageTest, WhenWritingImageThenMediaVfeStateIsCorrect) { enqueueWriteImage(); validateMediaVFEState(&pDevice->getHardwareInfo(), cmdMediaVfeState, cmdList, itorMediaVfeState); } HWTEST_F(EnqueueWriteImageTest, GivenImage1DarrayWhenReadWriteImageIsCalledThenHostPtrSizeIsCalculatedProperly) { std::unique_ptr dstImage2(Image1dArrayHelper<>::create(context)); auto &imageDesc = dstImage2->getImageDesc(); auto imageSize = imageDesc.image_width * imageDesc.image_array_size * 4; size_t origin[] = {0, 0, 0}; size_t region[] = {imageDesc.image_width, imageDesc.image_array_size, 1}; EnqueueWriteImageHelper<>::enqueueWriteImage(pCmdQ, dstImage2.get(), CL_FALSE, origin, region); auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto temporaryAllocation1 = csr.getTemporaryAllocations().peekHead(); ASSERT_NE(nullptr, temporaryAllocation1); EXPECT_EQ(temporaryAllocation1->getUnderlyingBufferSize(), imageSize); EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ, dstImage2.get(), CL_FALSE, origin, region); auto temporaryAllocation2 = temporaryAllocation1->next; ASSERT_NE(nullptr, temporaryAllocation2); EXPECT_EQ(temporaryAllocation2->getUnderlyingBufferSize(), imageSize); } HWTEST_F(EnqueueWriteImageTest, GivenImage1DarrayWhenWriteImageIsCalledThenRowPitchIsSetToSlicePitch) { auto builtIns = new MockBuiltins(); pCmdQ->getDevice().getExecutionEnvironment()->rootDeviceEnvironments[pCmdQ->getDevice().getRootDeviceIndex()]->builtins.reset(builtIns); EBuiltInOps::Type copyBuiltIn = EBuiltInOps::CopyBufferToImage3d; auto &origBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder( copyBuiltIn, pCmdQ->getClDevice()); // substitute original builder with mock builder auto oldBuilder = pClExecutionEnvironment->setBuiltinDispatchInfoBuilder( rootDeviceIndex, copyBuiltIn, std::unique_ptr(new MockBuiltinDispatchInfoBuilder(*builtIns, pCmdQ->getClDevice(), &origBuilder))); std::unique_ptr image; std::unique_ptr destImage(Image1dArrayHelper<>::create(context)); auto &imageDesc = destImage->getImageDesc(); size_t origin[] = {0, 0, 0}; size_t region[] = {imageDesc.image_width, imageDesc.image_array_size, 1}; size_t rowPitch = 64; size_t slicePitch = 128; EnqueueWriteImageHelper<>::enqueueWriteImage(pCmdQ, destImage.get(), CL_FALSE, origin, region, rowPitch, slicePitch); auto &mockBuilder = static_cast(BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(copyBuiltIn, pCmdQ->getClDevice())); auto params = mockBuilder.getBuiltinOpParams(); EXPECT_EQ(params->srcRowPitch, slicePitch); // restore original builder and retrieve mock builder auto newBuilder = pClExecutionEnvironment->setBuiltinDispatchInfoBuilder( rootDeviceIndex, copyBuiltIn, std::move(oldBuilder)); EXPECT_NE(nullptr, newBuilder); } HWTEST_F(EnqueueWriteImageTest, GivenImage2DarrayWhenReadWriteImageIsCalledThenHostPtrSizeIsCalculatedProperly) { std::unique_ptr dstImage(Image2dArrayHelper<>::create(context)); auto &imageDesc = dstImage->getImageDesc(); auto imageSize = imageDesc.image_width * imageDesc.image_height * imageDesc.image_array_size * 4; size_t origin[] = {0, 0, 0}; size_t region[] = {imageDesc.image_width, imageDesc.image_height, imageDesc.image_array_size}; EnqueueWriteImageHelper<>::enqueueWriteImage(pCmdQ, dstImage.get(), CL_FALSE, origin, region); auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto temporaryAllocation1 = csr.getTemporaryAllocations().peekHead(); ASSERT_NE(nullptr, temporaryAllocation1); EXPECT_EQ(temporaryAllocation1->getUnderlyingBufferSize(), imageSize); EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ, dstImage.get(), CL_FALSE, origin, region); auto temporaryAllocation2 = temporaryAllocation1->next; ASSERT_NE(nullptr, temporaryAllocation2); EXPECT_EQ(temporaryAllocation1->getUnderlyingBufferSize(), imageSize); } HWTEST_F(EnqueueWriteImageTest, GivenImage1DAndImageShareTheSameStorageWithHostPtrWhenReadWriteImageIsCalledThenImageIsNotWritten) { cl_int retVal = CL_SUCCESS; std::unique_ptr dstImage(Image1dHelper<>::create(context)); auto &imageDesc = dstImage->getImageDesc(); std::unique_ptr pCmdOOQ(createCommandQueue(pClDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)); size_t origin[] = {0, 0, 0}; size_t region[] = {imageDesc.image_width, 1, 1}; void *ptr = dstImage->getCpuAddressForMemoryTransfer(); size_t rowPitch = dstImage->getHostPtrRowPitch(); size_t slicePitch = dstImage->getHostPtrSlicePitch(); retVal = pCmdOOQ->enqueueWriteImage(dstImage.get(), CL_FALSE, origin, region, rowPitch, slicePitch, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdOOQ->taskLevel, 0u); } HWTEST_F(EnqueueWriteImageTest, GivenImage1DArrayAndImageShareTheSameStorageWithHostPtrWhenReadWriteImageIsCalledThenImageIsNotWritten) { cl_int retVal = CL_SUCCESS; std::unique_ptr dstImage(Image1dArrayHelper<>::create(context)); auto &imageDesc = dstImage->getImageDesc(); size_t origin[] = {imageDesc.image_width / 2, imageDesc.image_array_size / 2, 0}; size_t region[] = {imageDesc.image_width - (imageDesc.image_width / 2), imageDesc.image_array_size - (imageDesc.image_array_size / 2), 1}; void *ptr = dstImage->getCpuAddressForMemoryTransfer(); auto bytesPerPixel = 4; size_t rowPitch = dstImage->getHostPtrRowPitch(); size_t slicePitch = dstImage->getHostPtrSlicePitch(); auto pOffset = origin[2] * rowPitch + origin[1] * slicePitch + origin[0] * bytesPerPixel; void *ptrStorage = ptrOffset(ptr, pOffset); retVal = pCmdQ->enqueueWriteImage(dstImage.get(), CL_FALSE, origin, region, rowPitch, slicePitch, ptrStorage, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 0u); } HWTEST_F(EnqueueWriteImageTest, GivenSharedContextZeroCopy2DImageWhenEnqueueWriteImageWithMappedPointerIsCalledThenImageIsNotWritten) { cl_int retVal = CL_SUCCESS; context->isSharedContext = true; std::unique_ptr dstImage(ImageHelper>::create(context)); EXPECT_TRUE(dstImage->isMemObjZeroCopy()); auto &imageDesc = dstImage->getImageDesc(); size_t origin[] = {0, 0, 0}; size_t region[] = {imageDesc.image_width, imageDesc.image_height, 1}; void *ptr = dstImage->getCpuAddressForMemoryTransfer(); size_t rowPitch = dstImage->getHostPtrRowPitch(); size_t slicePitch = dstImage->getHostPtrSlicePitch(); retVal = pCmdQ->enqueueReadImage(dstImage.get(), CL_FALSE, origin, region, rowPitch, slicePitch, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 0u); } HWTEST_F(EnqueueWriteImageTest, GivenImage1DThatIsZeroCopyWhenWriteImageWithTheSamePointerAndOutputEventIsPassedThenEventHasCorrectCommandTypeSet) { cl_int retVal = CL_SUCCESS; std::unique_ptr srcImage(Image1dHelper<>::create(context)); auto &imageDesc = srcImage->getImageDesc(); size_t origin[] = {0, 0, 0}; size_t region[] = {imageDesc.image_width, imageDesc.image_height, 1}; void *ptr = srcImage->getCpuAddressForMemoryTransfer(); size_t rowPitch = srcImage->getHostPtrRowPitch(); size_t slicePitch = srcImage->getHostPtrSlicePitch(); cl_uint numEventsInWaitList = 0; cl_event event = nullptr; retVal = pCmdQ->enqueueWriteImage(srcImage.get(), CL_FALSE, origin, region, rowPitch, slicePitch, ptr, nullptr, numEventsInWaitList, nullptr, &event); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = static_cast(event); EXPECT_EQ(static_cast(CL_COMMAND_WRITE_IMAGE), pEvent->getCommandType()); pEvent->release(); } typedef EnqueueWriteImageMipMapTest MipMapWriteImageTest; HWTEST_P(MipMapWriteImageTest, GivenImageWithMipLevelNonZeroWhenReadImageIsCalledThenProperMipLevelIsSet) { auto image_type = (cl_mem_object_type)GetParam(); auto builtIns = new MockBuiltins(); pCmdQ->getDevice().getExecutionEnvironment()->rootDeviceEnvironments[pCmdQ->getDevice().getRootDeviceIndex()]->builtins.reset(builtIns); auto &origBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder( EBuiltInOps::CopyBufferToImage3d, pCmdQ->getClDevice()); // substitute original builder with mock builder auto oldBuilder = pClExecutionEnvironment->setBuiltinDispatchInfoBuilder( rootDeviceIndex, EBuiltInOps::CopyBufferToImage3d, std::unique_ptr(new MockBuiltinDispatchInfoBuilder(*builtIns, pCmdQ->getClDevice(), &origBuilder))); cl_int retVal = CL_SUCCESS; cl_image_desc imageDesc = {}; uint32_t expectedMipLevel = 3; imageDesc.image_type = image_type; imageDesc.num_mip_levels = 10; imageDesc.image_width = 4; imageDesc.image_height = 1; imageDesc.image_depth = 1; size_t origin[] = {0, 0, 0, 0}; size_t region[] = {imageDesc.image_width, 1, 1}; std::unique_ptr image; switch (image_type) { case CL_MEM_OBJECT_IMAGE1D: origin[1] = expectedMipLevel; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; case CL_MEM_OBJECT_IMAGE1D_ARRAY: imageDesc.image_array_size = 2; origin[2] = expectedMipLevel; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; case CL_MEM_OBJECT_IMAGE2D: origin[2] = expectedMipLevel; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; case CL_MEM_OBJECT_IMAGE2D_ARRAY: imageDesc.image_array_size = 2; origin[3] = expectedMipLevel; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; case CL_MEM_OBJECT_IMAGE3D: origin[3] = expectedMipLevel; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; } EXPECT_NE(nullptr, image.get()); auto hostPtrSize = Image::calculateHostPtrSize(region, image->getHostPtrRowPitch(), image->getHostPtrSlicePitch(), image->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes, image_type); std::unique_ptr ptr = std::unique_ptr(new uint32_t[hostPtrSize]); retVal = pCmdQ->enqueueWriteImage(image.get(), CL_FALSE, origin, region, 0, 0, ptr.get(), nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); auto &mockBuilder = static_cast(BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToImage3d, pCmdQ->getClDevice())); auto params = mockBuilder.getBuiltinOpParams(); EXPECT_EQ(expectedMipLevel, params->dstMipLevel); // restore original builder and retrieve mock builder auto newBuilder = pClExecutionEnvironment->setBuiltinDispatchInfoBuilder( rootDeviceIndex, EBuiltInOps::CopyBufferToImage3d, std::move(oldBuilder)); EXPECT_NE(nullptr, newBuilder); } INSTANTIATE_TEST_CASE_P(MipMapWriteImageTest_GivenImageWithMipLevelNonZeroWhenReadImageIsCalledThenProperMipLevelIsSet, MipMapWriteImageTest, ::testing::Values(CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE2D_ARRAY, CL_MEM_OBJECT_IMAGE3D)); using NegativeFailAllocationTest = Test; HWTEST_F(NegativeFailAllocationTest, givenEnqueueReadImageWhenHostPtrAllocationCreationFailsThenReturnOutOfResource) { cl_int retVal = CL_SUCCESS; auto &imageDesc = image->getImageDesc(); size_t origin[] = {0, 0, 0}; size_t region[] = {imageDesc.image_width, imageDesc.image_height, 1}; size_t rowPitch = image->getHostPtrRowPitch(); size_t slicePitch = image->getHostPtrSlicePitch(); retVal = pCmdQ->enqueueReadImage(image.get(), CL_FALSE, origin, region, rowPitch, slicePitch, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal); } using OneMipLevelWriteImageTests = Test; HWTEST_F(OneMipLevelWriteImageTests, GivenNotMippedImageWhenWritingImageThenDoNotProgramDestinationMipLevel) { auto queue = createQueue(); auto retVal = queue->enqueueWriteImage( image.get(), CL_TRUE, origin, region, 0, 0, cpuPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(builtinOpsParamsCaptured); EXPECT_EQ(0u, usedBuiltinOpsParams.dstMipLevel); } HWTEST_F(EnqueueWriteImageTest, whenEnqueueWriteImageThenBuiltinKernelIsResolved) { UserEvent userEvent{}; cl_event inputEvent = &userEvent; cl_event outputEvent{}; EnqueueWriteImageHelper<>::enqueueWriteImage(pCmdQ, dstImage, CL_FALSE, EnqueueWriteImageTraits::origin, EnqueueWriteImageTraits::region, EnqueueWriteImageTraits::rowPitch, EnqueueWriteImageTraits::slicePitch, EnqueueWriteImageTraits::hostPtr, EnqueueWriteImageTraits::mapAllocation, 1u, &inputEvent, &outputEvent); auto pEvent = castToObject(outputEvent); auto pCommand = static_cast(pEvent->peekCommand()); EXPECT_FALSE(pCommand->peekKernel()->Kernel::canTransformImages()); EXPECT_TRUE(pCommand->peekKernel()->isPatched()); userEvent.setStatus(CL_COMPLETE); pEvent->release(); pCmdQ->finish(); } HWTEST_F(EnqueueWriteImageTest, givenMultiRootDeviceImageWhenEnqueueWriteImageThenKernelRequiresMigration) { MockDefaultContext context; auto pCmdQ1 = createCommandQueue(context.getDevice(0), nullptr, &context); auto pImage = Image2dHelper<>::create(&context); EXPECT_TRUE(pImage->getMultiGraphicsAllocation().requiresMigrations()); UserEvent userEvent{}; cl_event inputEvent = &userEvent; cl_event outputEvent{}; EnqueueWriteImageHelper<>::enqueueWriteImage(pCmdQ1, pImage, CL_FALSE, EnqueueWriteImageTraits::origin, EnqueueWriteImageTraits::region, EnqueueWriteImageTraits::rowPitch, EnqueueWriteImageTraits::slicePitch, EnqueueWriteImageTraits::hostPtr, EnqueueWriteImageTraits::mapAllocation, 1u, &inputEvent, &outputEvent); auto pEvent = castToObject(outputEvent); auto pCommand = static_cast(pEvent->peekCommand()); auto pKernel = pCommand->peekKernel(); EXPECT_FALSE(pKernel->Kernel::canTransformImages()); EXPECT_TRUE(pKernel->isPatched()); EXPECT_TRUE(pKernel->requiresMemoryMigration()); auto &memObjectsForMigration = pKernel->getMemObjectsToMigrate(); ASSERT_EQ(1u, memObjectsForMigration.size()); auto memObj = memObjectsForMigration.begin()->second; for (auto &rootDeviceIndex : context.getRootDeviceIndices()) { EXPECT_EQ(pImage->getMultiGraphicsAllocation().getGraphicsAllocation(rootDeviceIndex), memObj->getMultiGraphicsAllocation().getGraphicsAllocation(rootDeviceIndex)); } EXPECT_TRUE(memObj->getMultiGraphicsAllocation().requiresMigrations()); EXPECT_EQ(MigrationSyncData::locationUndefined, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); userEvent.setStatus(CL_COMPLETE); EXPECT_EQ(0u, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); pEvent->release(); pCmdQ1->finish(); pCmdQ1->release(); pImage->release(); } HWTEST_F(EnqueueWriteImageTest, givenMultiRootDeviceImageWhenEnqueueWriteImageIsCalledMultipleTimesThenEachKernelUsesDifferentImage) { MockDefaultContext context; auto pCmdQ1 = createCommandQueue(context.getDevice(0), nullptr, &context); auto pImage = Image2dHelper<>::create(&context); EXPECT_TRUE(pImage->getMultiGraphicsAllocation().requiresMigrations()); UserEvent userEvent{}; cl_event inputEvent = &userEvent; cl_event outputEvent0{}; cl_event outputEvent1{}; EXPECT_EQ(MigrationSyncData::locationUndefined, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); EnqueueWriteImageHelper<>::enqueueWriteImage(pCmdQ1, pImage, CL_FALSE, EnqueueWriteImageTraits::origin, EnqueueWriteImageTraits::region, EnqueueWriteImageTraits::rowPitch, EnqueueWriteImageTraits::slicePitch, EnqueueWriteImageTraits::hostPtr, EnqueueWriteImageTraits::mapAllocation, 1u, &inputEvent, &outputEvent0); EXPECT_EQ(MigrationSyncData::locationUndefined, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); auto pEvent0 = castToObject(outputEvent0); auto pCommand0 = static_cast(pEvent0->peekCommand()); auto pKernel0 = pCommand0->peekKernel(); EXPECT_FALSE(pKernel0->Kernel::canTransformImages()); EXPECT_TRUE(pKernel0->isPatched()); EXPECT_TRUE(pKernel0->requiresMemoryMigration()); auto &memObjectsForMigration0 = pKernel0->getMemObjectsToMigrate(); ASSERT_EQ(1u, memObjectsForMigration0.size()); auto memObj0 = memObjectsForMigration0.begin()->second; for (auto &rootDeviceIndex : context.getRootDeviceIndices()) { EXPECT_EQ(pImage->getMultiGraphicsAllocation().getGraphicsAllocation(rootDeviceIndex), memObj0->getMultiGraphicsAllocation().getGraphicsAllocation(rootDeviceIndex)); } EXPECT_TRUE(memObj0->getMultiGraphicsAllocation().requiresMigrations()); EnqueueWriteImageHelper<>::enqueueWriteImage(pCmdQ1, pImage, CL_FALSE, EnqueueWriteImageTraits::origin, EnqueueWriteImageTraits::region, EnqueueWriteImageTraits::rowPitch, EnqueueWriteImageTraits::slicePitch, EnqueueWriteImageTraits::hostPtr, EnqueueWriteImageTraits::mapAllocation, 1u, &outputEvent0, &outputEvent1); EXPECT_EQ(MigrationSyncData::locationUndefined, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); auto pEvent1 = castToObject(outputEvent1); auto pCommand1 = static_cast(pEvent1->peekCommand()); auto pKernel1 = pCommand1->peekKernel(); EXPECT_FALSE(pKernel1->Kernel::canTransformImages()); EXPECT_TRUE(pKernel1->isPatched()); EXPECT_TRUE(pKernel1->requiresMemoryMigration()); auto &memObjectsForMigration1 = pKernel1->getMemObjectsToMigrate(); ASSERT_EQ(1u, memObjectsForMigration1.size()); auto memObj1 = memObjectsForMigration1.begin()->second; for (auto &rootDeviceIndex : context.getRootDeviceIndices()) { EXPECT_EQ(pImage->getMultiGraphicsAllocation().getGraphicsAllocation(rootDeviceIndex), memObj1->getMultiGraphicsAllocation().getGraphicsAllocation(rootDeviceIndex)); } EXPECT_TRUE(memObj1->getMultiGraphicsAllocation().requiresMigrations()); EXPECT_NE(memObj0, memObj1); userEvent.setStatus(CL_COMPLETE); EXPECT_EQ(0u, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); pEvent0->release(); pEvent1->release(); pCmdQ1->finish(); pCmdQ1->release(); pImage->release(); } HWTEST_F(EnqueueWriteImageTest, givenMultiRootDeviceImageWhenNonBlockedEnqueueWriteImageIsCalledThenCommandQueueIsFlushed) { MockDefaultContext context; auto pCmdQ1 = createCommandQueue(context.getDevice(0), nullptr, &context); auto pImage = Image2dHelper<>::create(&context); EXPECT_TRUE(pImage->getMultiGraphicsAllocation().requiresMigrations()); auto &ultCsr = static_cast &>(pCmdQ1->getGpgpuCommandStreamReceiver()); EXPECT_FALSE(ultCsr.flushBatchedSubmissionsCalled); EXPECT_EQ(MigrationSyncData::locationUndefined, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); EnqueueWriteImageHelper<>::enqueueWriteImage(pCmdQ1, pImage, CL_FALSE); EXPECT_EQ(0u, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); EXPECT_TRUE(ultCsr.flushBatchedSubmissionsCalled); pCmdQ1->finish(); pCmdQ1->release(); pImage->release(); } HWTEST_F(EnqueueWriteImageTest, givenMultiRootDeviceImageWhenNonBlockedEnqueueWriteImageIsCalledThenTlbCacheIsInvalidated) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; MockDefaultContext context; auto pCmdQ1 = createCommandQueue(context.getDevice(0), nullptr, &context); auto pImage = Image2dHelper<>::create(&context); EXPECT_TRUE(pImage->getMultiGraphicsAllocation().requiresMigrations()); EXPECT_EQ(MigrationSyncData::locationUndefined, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); EnqueueWriteImageHelper<>::enqueueWriteImage(pCmdQ1, pImage, CL_FALSE); EXPECT_EQ(0u, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); pCmdQ1->finish(); { HardwareParse hwParser; hwParser.parseCommands(pCmdQ1->getCS(0), 0); auto pipeControls = findAll(hwParser.cmdList.begin(), hwParser.cmdList.end()); EXPECT_LT(0u, pipeControls.size()); bool pipeControlWithTlbInvalidateFound = false; for (auto &pipeControl : pipeControls) { auto pipeControlCmd = genCmdCast(*pipeControl); if (pipeControlCmd->getTlbInvalidate()) { EXPECT_TRUE(pipeControlCmd->getCommandStreamerStallEnable()); pipeControlWithTlbInvalidateFound = true; } } EXPECT_TRUE(pipeControlWithTlbInvalidateFound); } pCmdQ1->release(); pImage->release(); } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/finish_tests.cpp000066400000000000000000000064671422164147700276740ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/command_stream/command_stream_fixture.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "gtest/gtest.h" using namespace NEO; struct FinishFixture : public ClDeviceFixture, public CommandQueueHwFixture, public CommandStreamFixture, public HardwareParse { void SetUp() override { ClDeviceFixture::SetUp(); CommandQueueHwFixture::SetUp(pClDevice, 0); ASSERT_NE(nullptr, pCmdQ); CommandStreamFixture::SetUp(pCmdQ); ASSERT_NE(nullptr, pCS); HardwareParse::SetUp(); } void TearDown() override { HardwareParse::TearDown(); CommandStreamFixture::TearDown(); CommandQueueHwFixture::TearDown(); ClDeviceFixture::TearDown(); } }; typedef Test FinishTest; HWTEST_F(FinishTest, GivenCsGreaterThanCqWhenFinishIsCalledThenPipeControlIsNotAdded) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); // HW = 1, CQ = 1, CS = 2 (last PC was 1) // This means there is no work in CQ that needs a PC uint32_t originalHwTag = 1; uint32_t originalCSRLevel = 2; uint32_t originalCQLevel = 1; *commandStreamReceiver.getTagAddress() = originalHwTag; commandStreamReceiver.taskLevel = originalCSRLevel; // Must be greater than or equal to HW pCmdQ->taskLevel = originalCQLevel; auto retVal = pCmdQ->finish(); ASSERT_EQ(CL_SUCCESS, retVal); // Don't need to artificially execute PIPE_CONTROL. // Nothing should have been sent //*pCS->getTagAddress() = originalCSRLevel; EXPECT_EQ(commandStreamReceiver.peekTaskLevel(), originalCSRLevel); EXPECT_EQ(pCmdQ->taskLevel, originalCQLevel); EXPECT_GE(pCmdQ->getHwTag(), pCmdQ->taskLevel); auto sizeUsed = pCS->getUsed(); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, pCmdBuffer, sizeUsed)); auto itorCmd = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(cmdList.end(), itorCmd); } HWTEST_F(FinishTest, WhenFinishIsCalledThenPipeControlIsNotAddedToCqCommandStream) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; auto retVal = pCmdQ->finish(); ASSERT_EQ(CL_SUCCESS, retVal); // Check for PIPE_CONTROL parseCommands(pCmdQ->getCS(1024)); auto itorCmd = reverse_find(cmdList.rbegin(), cmdList.rend()); EXPECT_EQ(cmdList.rend(), itorCmd); } HWTEST_F(FinishTest, givenFreshQueueWhenFinishIsCalledThenCommandStreamIsNotAllocated) { MockContext contextWithMockCmdQ(pClDevice, true); MockCommandQueueHw cmdQ(&contextWithMockCmdQ, pClDevice, 0); auto retVal = cmdQ.finish(); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, cmdQ.peekCommandStream()); } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/flush_tests.cpp000066400000000000000000000020301422164147700275130ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/command_stream/command_stream_fixture.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "gtest/gtest.h" using namespace NEO; struct FlushTest : public ClDeviceFixture, public CommandQueueFixture, public CommandStreamFixture, public ::testing::Test { using CommandQueueFixture::SetUp; void SetUp() override { ClDeviceFixture::SetUp(); CommandQueueFixture::SetUp(nullptr, pClDevice, 0); CommandStreamFixture::SetUp(pCmdQ); } void TearDown() override { CommandStreamFixture::TearDown(); CommandQueueFixture::TearDown(); ClDeviceFixture::TearDown(); } }; TEST_F(FlushTest, WhenFlushingThenSuccessIsReturned) { auto retVal = pCmdQ->flush(); EXPECT_EQ(retVal, CL_SUCCESS); } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/get_command_queue_info_tests.cpp000066400000000000000000000234421422164147700331000ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_platform.h" using namespace NEO; struct GetCommandQueueInfoTest : public ClDeviceFixture, public ContextFixture, public CommandQueueFixture, ::testing::TestWithParam { using CommandQueueFixture::SetUp; using ContextFixture::SetUp; GetCommandQueueInfoTest() { } void SetUp() override { properties = GetParam(); ClDeviceFixture::SetUp(); cl_device_id device = pClDevice; ContextFixture::SetUp(1, &device); CommandQueueFixture::SetUp(pContext, pClDevice, properties); } void TearDown() override { CommandQueueFixture::TearDown(); ContextFixture::TearDown(); ClDeviceFixture::TearDown(); } const HardwareInfo *pHwInfo = nullptr; cl_command_queue_properties properties; }; TEST_P(GetCommandQueueInfoTest, GivenClQueueContextWhenGettingCommandQueueInfoThenSuccessIsReturned) { cl_context contextReturned = nullptr; auto retVal = pCmdQ->getCommandQueueInfo( CL_QUEUE_CONTEXT, sizeof(contextReturned), &contextReturned, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ((cl_context)pContext, contextReturned); } TEST_P(GetCommandQueueInfoTest, GivenClQueueDeviceWhenGettingCommandQueueInfoThenSuccessIsReturned) { cl_device_id deviceExpected = pClDevice; cl_device_id deviceReturned = nullptr; auto retVal = pCmdQ->getCommandQueueInfo( CL_QUEUE_DEVICE, sizeof(deviceReturned), &deviceReturned, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(deviceExpected, deviceReturned); } TEST_P(GetCommandQueueInfoTest, GivenClQueuePropertiesWhenGettingCommandQueueInfoThenSuccessIsReturned) { cl_command_queue_properties cmdqPropertiesReturned = 0; auto retVal = pCmdQ->getCommandQueueInfo( CL_QUEUE_PROPERTIES, sizeof(cmdqPropertiesReturned), &cmdqPropertiesReturned, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(properties, cmdqPropertiesReturned); } TEST_P(GetCommandQueueInfoTest, givenNonDeviceQueueWhenQueryingQueueSizeThenInvalidCommandQueueErrorIsReturned) { cl_uint queueSize = 0; auto retVal = pCmdQ->getCommandQueueInfo( CL_QUEUE_SIZE, sizeof(queueSize), &queueSize, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_P(GetCommandQueueInfoTest, GivenClQueueDeviceDefaultWhenGettingCommandQueueInfoThenSuccessIsReturned) { cl_command_queue commandQueueReturned = reinterpret_cast(static_cast(0x1234)); size_t sizeReturned = 0u; auto retVal = pCmdQ->getCommandQueueInfo( CL_QUEUE_DEVICE_DEFAULT, sizeof(commandQueueReturned), &commandQueueReturned, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, commandQueueReturned); EXPECT_EQ(sizeof(cl_command_queue), sizeReturned); } TEST_P(GetCommandQueueInfoTest, GivenInvalidParameterWhenGettingCommandQueueInfoThenInvalidValueIsReturned) { cl_uint parameterReturned = 0; cl_command_queue_info invalidParameter = 0xdeadbeef; auto retVal = pCmdQ->getCommandQueueInfo( invalidParameter, sizeof(parameterReturned), ¶meterReturned, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } INSTANTIATE_TEST_CASE_P( GetCommandQueueInfoTest, GetCommandQueueInfoTest, ::testing::ValuesIn(DefaultCommandQueueProperties)); using GetCommandQueueFamilyInfoTests = ::testing::Test; TEST_F(GetCommandQueueFamilyInfoTests, givenQueueFamilyNotSelectedWhenGettingFamilyAndQueueIndexThenValuesAreReturned) { MockContext context{}; MockCommandQueue queue{context}; queue.queueFamilySelected = false; queue.queueFamilyIndex = 12u; cl_int retVal{}; const auto &hwInfo = context.getDevice(0)->getHardwareInfo(); const auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); const auto engineGroupType = hwHelper.getEngineGroupType(context.getDevice(0)->getDefaultEngine().getEngineType(), context.getDevice(0)->getDefaultEngine().getEngineUsage(), hwInfo); const auto expectedFamilyIndex = context.getDevice(0)->getDevice().getEngineGroupIndexFromEngineGroupType(engineGroupType); cl_uint familyIndex{}; retVal = queue.getCommandQueueInfo( CL_QUEUE_FAMILY_INTEL, sizeof(cl_uint), &familyIndex, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedFamilyIndex, familyIndex); cl_uint queueIndex{}; retVal = queue.getCommandQueueInfo( CL_QUEUE_INDEX_INTEL, sizeof(cl_uint), &queueIndex, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, queueIndex); } TEST_F(GetCommandQueueFamilyInfoTests, givenQueueFamilySelectedWhenGettingFamilyAndQueueIndexThenValuesAreReturned) { MockCommandQueue queue; queue.queueFamilySelected = true; queue.queueFamilyIndex = 12u; queue.queueIndexWithinFamily = 1432u; cl_int retVal{}; cl_uint familyIndex{}; retVal = queue.getCommandQueueInfo( CL_QUEUE_FAMILY_INTEL, sizeof(cl_uint), &familyIndex, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(queue.queueFamilyIndex, familyIndex); cl_uint queueIndex{}; retVal = queue.getCommandQueueInfo( CL_QUEUE_INDEX_INTEL, sizeof(cl_uint), &queueIndex, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(queue.queueIndexWithinFamily, queueIndex); } HWCMDTEST_F(IGFX_XE_HP_CORE, GetCommandQueueFamilyInfoTests, givenFamilyIdWhenGettingCommandQueueInfoThenCorrectValueIsReturned) { HardwareInfo hwInfo = *defaultHwInfo.get(); hwInfo.featureTable.flags.ftrCCSNode = true; MockClDevice mockClDevice{MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)}; const cl_device_id deviceId = &mockClDevice; auto context = clCreateContext(nullptr, 1, &deviceId, nullptr, nullptr, nullptr); auto ccsFamily = mockClDevice.getDevice().getEngineGroupIndexFromEngineGroupType(EngineGroupType::Compute); cl_command_queue_properties properties[] = {CL_QUEUE_FAMILY_INTEL, ccsFamily, CL_QUEUE_INDEX_INTEL, 0, 0}; EXPECT_EQ(0u, mockClDevice.getNumGenericSubDevices()); auto commandQueue = clCreateCommandQueueWithProperties(context, deviceId, properties, nullptr); auto neoQueue = castToObject(commandQueue); cl_uint familyParameter; auto retVal = neoQueue->getCommandQueueInfo( CL_QUEUE_FAMILY_INTEL, sizeof(familyParameter), &familyParameter, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(ccsFamily, familyParameter); cl_uint indexParameter; retVal = neoQueue->getCommandQueueInfo( CL_QUEUE_INDEX_INTEL, sizeof(indexParameter), &indexParameter, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, indexParameter); clReleaseCommandQueue(commandQueue); clReleaseContext(context); } HWCMDTEST_F(IGFX_XE_HP_CORE, GetCommandQueueFamilyInfoTests, givenNonZeroFamilyIdWhenCreatingCommandQueueForRootDeviceWithMultipleSubDevicesThenInvalidValueIsReturned) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(2); initPlatform(); auto rootDevice = platform()->getClDevice(0); const cl_device_id deviceId = rootDevice; auto context = clCreateContext(nullptr, 1, &deviceId, nullptr, nullptr, nullptr); cl_command_queue_properties properties[] = {CL_QUEUE_FAMILY_INTEL, 1u, CL_QUEUE_INDEX_INTEL, 0, 0}; EXPECT_EQ(2u, rootDevice->getNumGenericSubDevices()); cl_int retVal; auto commandQueue = clCreateCommandQueueWithProperties(context, rootDevice, properties, &retVal); EXPECT_EQ(CL_INVALID_QUEUE_PROPERTIES, retVal); EXPECT_EQ(nullptr, commandQueue); clReleaseContext(context); } using MultiEngineQueueHwTests = ::testing::Test; HWCMDTEST_F(IGFX_XE_HP_CORE, MultiEngineQueueHwTests, givenLimitedNumberOfCcsWhenCreatingCmdQueueThenFailOnNotSupportedCcs) { HardwareInfo localHwInfo = *defaultHwInfo; localHwInfo.gtSystemInfo.CCSInfo.IsValid = true; localHwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 4; localHwInfo.gtSystemInfo.CCSInfo.Instances.CCSEnableMask = 0b1111; localHwInfo.featureTable.flags.ftrCCSNode = true; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&localHwInfo)); MockContext context(device.get()); context.contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE; const uint32_t ccsCount = 4; auto ccsEngine = device->getDevice().getEngineGroupIndexFromEngineGroupType(EngineGroupType::Compute); cl_queue_properties properties[5] = {CL_QUEUE_FAMILY_INTEL, ccsEngine, CL_QUEUE_INDEX_INTEL, 0, 0}; auto mutableHwInfo = device->getRootDeviceEnvironment().getMutableHardwareInfo(); for (uint32_t i = 0; i < ccsCount; i++) { properties[3] = i; mutableHwInfo->gtSystemInfo.CCSInfo.Instances.CCSEnableMask = (1 << i); cl_int retVal = CL_SUCCESS; cl_command_queue clCommandQueue = clCreateCommandQueueWithProperties(&context, device.get(), properties, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseCommandQueue(clCommandQueue); } } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/get_size_required_buffer_tests.cpp000066400000000000000000000565531422164147700334570ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/command_queue/enqueue_fill_buffer.h" #include "opencl/source/command_queue/enqueue_kernel.h" #include "opencl/source/command_queue/enqueue_read_buffer.h" #include "opencl/source/command_queue/enqueue_write_buffer.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/event/event.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/kernel/kernel.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/hello_world_kernel_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/fixtures/simple_arg_kernel_fixture.h" using namespace NEO; struct GetSizeRequiredBufferTest : public CommandEnqueueFixture, public SimpleArgKernelFixture, public HelloWorldKernelFixture, public ::testing::Test { using HelloWorldKernelFixture::SetUp; using SimpleArgKernelFixture::SetUp; GetSizeRequiredBufferTest() { } void SetUp() override { CommandEnqueueFixture::SetUp(); SimpleArgKernelFixture::SetUp(pClDevice); HelloWorldKernelFixture::SetUp(pClDevice, "CopyBuffer_simd", "CopyBuffer"); BufferDefaults::context = new MockContext; srcBuffer = BufferHelper<>::create(); dstBuffer = BufferHelper<>::create(); patternAllocation = context->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), EnqueueFillBufferTraits::patternSize}); pDevice->setPreemptionMode(PreemptionMode::Disabled); } void TearDown() override { context->getMemoryManager()->freeGraphicsMemory(patternAllocation); delete dstBuffer; delete srcBuffer; delete BufferDefaults::context; HelloWorldKernelFixture::TearDown(); SimpleArgKernelFixture::TearDown(); CommandEnqueueFixture::TearDown(); } Buffer *srcBuffer = nullptr; Buffer *dstBuffer = nullptr; GraphicsAllocation *patternAllocation = nullptr; }; HWTEST_F(GetSizeRequiredBufferTest, WhenFillingBufferThenHeapsAndCommandBufferConsumedMinimumRequiredSize) { typedef typename FamilyType::WALKER_TYPE GPGPU_WALKER; auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 0u); auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 0u); auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0u); auto usedBeforeDSH = dsh.getUsed(); auto usedBeforeIOH = ioh.getUsed(); auto usedBeforeSSH = ssh.getUsed(); auto retVal = EnqueueFillBufferHelper<>::enqueue(pCmdQ); EXPECT_EQ(CL_SUCCESS, retVal); auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::FillBuffer, pCmdQ->getClDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; MemObj patternMemObj(this->context, 0, {}, 0, 0, alignUp(EnqueueFillBufferTraits::patternSize, 4), patternAllocation->getUnderlyingBuffer(), patternAllocation->getUnderlyingBuffer(), GraphicsAllocationHelper::toMultiGraphicsAllocation(patternAllocation), false, false, true); dc.srcMemObj = &patternMemObj; dc.dstMemObj = dstBuffer; dc.dstOffset = {EnqueueFillBufferTraits::offset, 0, 0}; dc.size = {EnqueueFillBufferTraits::size, 0, 0}; MultiDispatchInfo multiDispatchInfo(dc); builder.buildDispatchInfos(multiDispatchInfo); EXPECT_NE(0u, multiDispatchInfo.size()); auto usedAfterCS = commandStream.getUsed(); auto usedAfterDSH = dsh.getUsed(); auto usedAfterIOH = ioh.getUsed(); auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_FILL_BUFFER, CsrDependencies(), false, false, false, *pCmdQ, multiDispatchInfo, false, false); auto expectedSizeDSH = HardwareCommandsHelper::getTotalSizeRequiredDSH(multiDispatchInfo); auto expectedSizeIOH = HardwareCommandsHelper::getTotalSizeRequiredIOH(multiDispatchInfo); auto expectedSizeSSH = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo); EXPECT_EQ(0u, expectedSizeIOH % GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); EXPECT_EQ(0u, expectedSizeDSH % 64); // Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended. expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); expectedSizeCS = alignUp(expectedSizeCS, MemoryConstants::cacheLineSize); EXPECT_GE(expectedSizeCS, usedAfterCS - usedBeforeCS); EXPECT_GE(expectedSizeDSH, usedAfterDSH - usedBeforeDSH); EXPECT_GE(expectedSizeIOH, usedAfterIOH - usedBeforeIOH); EXPECT_GE(expectedSizeSSH, usedAfterSSH - usedBeforeSSH); } HWTEST_F(GetSizeRequiredBufferTest, WhenCopyingBufferThenHeapsAndCommandBufferConsumedMinimumRequiredSize) { typedef typename FamilyType::WALKER_TYPE GPGPU_WALKER; auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 0u); auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 0u); auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0u); auto usedBeforeDSH = dsh.getUsed(); auto usedBeforeIOH = ioh.getUsed(); auto usedBeforeSSH = ssh.getUsed(); auto retVal = EnqueueCopyBufferHelper<>::enqueue(pCmdQ); EXPECT_EQ(CL_SUCCESS, retVal); auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, pCmdQ->getClDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcMemObj = srcBuffer; dc.srcMemObj = dstBuffer; dc.srcOffset = {EnqueueCopyBufferTraits::srcOffset, 0, 0}; dc.dstOffset = {EnqueueCopyBufferTraits::dstOffset, 0, 0}; dc.size = {EnqueueCopyBufferTraits::size, 0, 0}; MultiDispatchInfo multiDispatchInfo(dc); builder.buildDispatchInfos(multiDispatchInfo); EXPECT_NE(0u, multiDispatchInfo.size()); auto usedAfterCS = commandStream.getUsed(); auto usedAfterDSH = dsh.getUsed(); auto usedAfterIOH = ioh.getUsed(); auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_COPY_BUFFER, CsrDependencies(), false, false, false, *pCmdQ, multiDispatchInfo, false, false); auto expectedSizeDSH = HardwareCommandsHelper::getTotalSizeRequiredDSH(multiDispatchInfo); auto expectedSizeIOH = HardwareCommandsHelper::getTotalSizeRequiredIOH(multiDispatchInfo); auto expectedSizeSSH = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo); EXPECT_EQ(0u, expectedSizeIOH % GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); EXPECT_EQ(0u, expectedSizeDSH % 64); // Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended. expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); expectedSizeCS = alignUp(expectedSizeCS, MemoryConstants::cacheLineSize); EXPECT_GE(expectedSizeCS, usedAfterCS - usedBeforeCS); EXPECT_GE(expectedSizeDSH, usedAfterDSH - usedBeforeDSH); EXPECT_GE(expectedSizeIOH, usedAfterIOH - usedBeforeIOH); EXPECT_GE(expectedSizeSSH, usedAfterSSH - usedBeforeSSH); } HWTEST_F(GetSizeRequiredBufferTest, WhenReadingBufferNonBlockingThenHeapsAndCommandBufferConsumedMinimumRequiredSize) { typedef typename FamilyType::WALKER_TYPE GPGPU_WALKER; auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 0u); auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 0u); auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0u); auto usedBeforeDSH = dsh.getUsed(); auto usedBeforeIOH = ioh.getUsed(); auto usedBeforeSSH = ssh.getUsed(); EnqueueReadBufferHelper<>::enqueueReadBuffer( pCmdQ, srcBuffer, CL_FALSE); auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, pCmdQ->getClDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.dstPtr = EnqueueReadBufferTraits::hostPtr; dc.srcMemObj = srcBuffer; dc.srcOffset = {EnqueueReadBufferTraits::offset, 0, 0}; dc.size = {srcBuffer->getSize(), 0, 0}; MultiDispatchInfo multiDispatchInfo(dc); builder.buildDispatchInfos(multiDispatchInfo); EXPECT_NE(0u, multiDispatchInfo.size()); auto usedAfterCS = commandStream.getUsed(); auto usedAfterDSH = dsh.getUsed(); auto usedAfterIOH = ioh.getUsed(); auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_READ_BUFFER, CsrDependencies(), false, false, false, *pCmdQ, multiDispatchInfo, false, false); auto expectedSizeDSH = HardwareCommandsHelper::getTotalSizeRequiredDSH(multiDispatchInfo); auto expectedSizeIOH = HardwareCommandsHelper::getTotalSizeRequiredIOH(multiDispatchInfo); auto expectedSizeSSH = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo); EXPECT_EQ(0u, expectedSizeIOH % GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); EXPECT_EQ(0u, expectedSizeDSH % 64); // Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended. expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); expectedSizeCS = alignUp(expectedSizeCS, MemoryConstants::cacheLineSize); EXPECT_GE(expectedSizeCS, usedAfterCS - usedBeforeCS); EXPECT_GE(expectedSizeDSH, usedAfterDSH - usedBeforeDSH); EXPECT_GE(expectedSizeIOH, usedAfterIOH - usedBeforeIOH); EXPECT_GE(expectedSizeSSH, usedAfterSSH - usedBeforeSSH); } HWTEST_F(GetSizeRequiredBufferTest, WhenReadingBufferBlockingThenThenHeapsAndCommandBufferConsumedMinimumRequiredSize) { typedef typename FamilyType::WALKER_TYPE GPGPU_WALKER; auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 0u); auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 0u); auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0u); auto usedBeforeDSH = dsh.getUsed(); auto usedBeforeIOH = ioh.getUsed(); auto usedBeforeSSH = ssh.getUsed(); srcBuffer->forceDisallowCPUCopy = true; EnqueueReadBufferHelper<>::enqueueReadBuffer( pCmdQ, srcBuffer, CL_TRUE); auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, pCmdQ->getClDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.dstPtr = EnqueueReadBufferTraits::hostPtr; dc.srcMemObj = srcBuffer; dc.srcOffset = {EnqueueReadBufferTraits::offset, 0, 0}; dc.size = {srcBuffer->getSize(), 0, 0}; MultiDispatchInfo multiDispatchInfo(dc); builder.buildDispatchInfos(multiDispatchInfo); EXPECT_NE(0u, multiDispatchInfo.size()); auto usedAfterCS = commandStream.getUsed(); auto usedAfterDSH = dsh.getUsed(); auto usedAfterIOH = ioh.getUsed(); auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_READ_BUFFER, CsrDependencies(), false, false, false, *pCmdQ, multiDispatchInfo, false, false); auto expectedSizeDSH = HardwareCommandsHelper::getTotalSizeRequiredDSH(multiDispatchInfo); auto expectedSizeIOH = HardwareCommandsHelper::getTotalSizeRequiredIOH(multiDispatchInfo); auto expectedSizeSSH = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo); EXPECT_EQ(0u, expectedSizeIOH % GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); EXPECT_EQ(0u, expectedSizeDSH % 64); // Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended. expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); expectedSizeCS = alignUp(expectedSizeCS, MemoryConstants::cacheLineSize); EXPECT_GE(expectedSizeCS, usedAfterCS - usedBeforeCS); EXPECT_GE(expectedSizeDSH, usedAfterDSH - usedBeforeDSH); EXPECT_GE(expectedSizeIOH, usedAfterIOH - usedBeforeIOH); EXPECT_GE(expectedSizeSSH, usedAfterSSH - usedBeforeSSH); } HWTEST_F(GetSizeRequiredBufferTest, WhenWritingBufferNonBlockingThenHeapsAndCommandBufferConsumedMinimumRequiredSize) { typedef typename FamilyType::WALKER_TYPE GPGPU_WALKER; auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 0u); auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 0u); auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0u); auto usedBeforeDSH = dsh.getUsed(); auto usedBeforeIOH = ioh.getUsed(); auto usedBeforeSSH = ssh.getUsed(); auto retVal = EnqueueWriteBufferHelper<>::enqueueWriteBuffer( pCmdQ, dstBuffer, CL_FALSE); EXPECT_EQ(CL_SUCCESS, retVal); auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, pCmdQ->getClDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcPtr = EnqueueWriteBufferTraits::hostPtr; dc.dstMemObj = dstBuffer; dc.dstOffset = {EnqueueWriteBufferTraits::offset, 0, 0}; dc.size = {dstBuffer->getSize(), 0, 0}; MultiDispatchInfo multiDispatchInfo(dc); builder.buildDispatchInfos(multiDispatchInfo); EXPECT_NE(0u, multiDispatchInfo.size()); auto usedAfterCS = commandStream.getUsed(); auto usedAfterDSH = dsh.getUsed(); auto usedAfterIOH = ioh.getUsed(); auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_WRITE_BUFFER, CsrDependencies(), false, false, false, *pCmdQ, multiDispatchInfo, false, false); auto expectedSizeDSH = HardwareCommandsHelper::getTotalSizeRequiredDSH(multiDispatchInfo); auto expectedSizeIOH = HardwareCommandsHelper::getTotalSizeRequiredIOH(multiDispatchInfo); auto expectedSizeSSH = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo); // Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended. expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); expectedSizeCS = alignUp(expectedSizeCS, MemoryConstants::cacheLineSize); EXPECT_GE(expectedSizeCS, usedAfterCS - usedBeforeCS); EXPECT_GE(expectedSizeDSH, usedAfterDSH - usedBeforeDSH); EXPECT_GE(expectedSizeIOH, usedAfterIOH - usedBeforeIOH); EXPECT_GE(expectedSizeSSH, usedAfterSSH - usedBeforeSSH); } HWTEST_F(GetSizeRequiredBufferTest, WhenWritingBufferBlockingThenHeapsAndCommandBufferConsumedMinimumRequiredSize) { auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 0u); auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 0u); auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0u); auto usedBeforeDSH = dsh.getUsed(); auto usedBeforeIOH = ioh.getUsed(); auto usedBeforeSSH = ssh.getUsed(); dstBuffer->forceDisallowCPUCopy = true; auto retVal = EnqueueWriteBufferHelper<>::enqueueWriteBuffer( pCmdQ, dstBuffer, CL_TRUE); EXPECT_EQ(CL_SUCCESS, retVal); auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, pCmdQ->getClDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcPtr = EnqueueWriteBufferTraits::hostPtr; dc.dstMemObj = dstBuffer; dc.dstOffset = {EnqueueWriteBufferTraits::offset, 0, 0}; dc.size = {dstBuffer->getSize(), 0, 0}; MultiDispatchInfo multiDispatchInfo(dc); builder.buildDispatchInfos(multiDispatchInfo); EXPECT_NE(0u, multiDispatchInfo.size()); auto usedAfterCS = commandStream.getUsed(); auto usedAfterDSH = dsh.getUsed(); auto usedAfterIOH = ioh.getUsed(); auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_WRITE_BUFFER, CsrDependencies(), false, false, false, *pCmdQ, multiDispatchInfo, false, false); auto expectedSizeDSH = HardwareCommandsHelper::getTotalSizeRequiredDSH(multiDispatchInfo); auto expectedSizeIOH = HardwareCommandsHelper::getTotalSizeRequiredIOH(multiDispatchInfo); auto expectedSizeSSH = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo); // Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended. expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); expectedSizeCS = alignUp(expectedSizeCS, MemoryConstants::cacheLineSize); EXPECT_GE(expectedSizeCS, usedAfterCS - usedBeforeCS); EXPECT_GE(expectedSizeDSH, usedAfterDSH - usedBeforeDSH); EXPECT_GE(expectedSizeIOH, usedAfterIOH - usedBeforeIOH); EXPECT_GE(expectedSizeSSH, usedAfterSSH - usedBeforeSSH); } HWTEST_F(GetSizeRequiredBufferTest, givenMultipleKernelRequiringSshWhenTotalSizeIsComputedThenItIsProperlyAligned) { auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, pCmdQ->getClDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcPtr = EnqueueWriteBufferTraits::hostPtr; dc.dstMemObj = dstBuffer; dc.dstOffset = {EnqueueWriteBufferTraits::offset, 0, 0}; dc.size = {dstBuffer->getSize(), 0, 0}; MultiDispatchInfo multiDispatchInfo(dc); builder.buildDispatchInfos(multiDispatchInfo); builder.buildDispatchInfos(multiDispatchInfo); builder.buildDispatchInfos(multiDispatchInfo); builder.buildDispatchInfos(multiDispatchInfo); auto sizeSSH = multiDispatchInfo.begin()->getKernel()->getSurfaceStateHeapSize(); sizeSSH += sizeSSH ? FamilyType::BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE : 0; sizeSSH = alignUp(sizeSSH, MemoryConstants::cacheLineSize); sizeSSH *= 4u; sizeSSH = alignUp(sizeSSH, MemoryConstants::pageSize); EXPECT_EQ(4u, multiDispatchInfo.size()); auto expectedSizeSSH = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo); EXPECT_EQ(sizeSSH, expectedSizeSSH); } HWTEST_F(GetSizeRequiredBufferTest, GivenHelloWorldKernelWhenEnqueingKernelThenHeapsAndCommandBufferConsumedMinimumRequiredSize) { typedef HelloWorldKernelFixture KernelFixture; auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto dshBefore = pDSH->getUsed(); auto iohBefore = pIOH->getUsed(); auto sshBefore = pSSH->getUsed(); size_t workSize[] = {64}; auto retVal = EnqueueKernelHelper<>::enqueueKernel( pCmdQ, KernelFixture::pKernel, 1, nullptr, workSize, workSize); EXPECT_EQ(CL_SUCCESS, retVal); auto usedAfterCS = commandStream.getUsed(); auto dshAfter = pDSH->getUsed(); auto iohAfter = pIOH->getUsed(); auto sshAfter = pSSH->getUsed(); auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *pCmdQ, KernelFixture::pKernel, {}); auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(*KernelFixture::pKernel); auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(*KernelFixture::pKernel, workSize[0]); auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*KernelFixture::pKernel); // Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended. expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); expectedSizeCS = alignUp(expectedSizeCS, MemoryConstants::cacheLineSize); EXPECT_GE(expectedSizeCS, usedAfterCS - usedBeforeCS); EXPECT_GE(expectedSizeDSH, dshAfter - dshBefore); EXPECT_GE(expectedSizeIOH, iohAfter - iohBefore); EXPECT_GE(expectedSizeSSH, sshAfter - sshBefore); } HWTEST_F(GetSizeRequiredBufferTest, GivenKernelWithSimpleArgWhenEnqueingKernelThenHeapsAndCommandBufferConsumedMinimumRequiredSize) { typedef typename FamilyType::WALKER_TYPE GPGPU_WALKER; typedef SimpleArgKernelFixture KernelFixture; auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto dshBefore = pDSH->getUsed(); auto iohBefore = pIOH->getUsed(); auto sshBefore = pSSH->getUsed(); size_t workSize[] = {64}; auto retVal = EnqueueKernelHelper<>::enqueueKernel( pCmdQ, KernelFixture::pKernel, 1, nullptr, workSize, workSize); EXPECT_EQ(CL_SUCCESS, retVal); auto usedAfterCS = commandStream.getUsed(); auto dshAfter = pDSH->getUsed(); auto iohAfter = pIOH->getUsed(); auto sshAfter = pSSH->getUsed(); auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *pCmdQ, KernelFixture::pKernel, {}); auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(*KernelFixture::pKernel); auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(*KernelFixture::pKernel, workSize[0]); auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*KernelFixture::pKernel); EXPECT_EQ(0u, expectedSizeIOH % GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); EXPECT_EQ(0u, expectedSizeDSH % 64); // Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended. expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); expectedSizeCS = alignUp(expectedSizeCS, MemoryConstants::cacheLineSize); EXPECT_GE(expectedSizeCS, usedAfterCS - usedBeforeCS); EXPECT_GE(expectedSizeDSH, dshAfter - dshBefore); EXPECT_GE(expectedSizeIOH, iohAfter - iohBefore); EXPECT_GE(expectedSizeSSH, sshAfter - sshBefore); } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/get_size_required_image_tests.cpp000066400000000000000000000421541422164147700332600ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/source/utilities/perf_counter.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/enqueue_copy_image.h" #include "opencl/source/command_queue/enqueue_fill_image.h" #include "opencl/source/command_queue/enqueue_read_image.h" #include "opencl/source/command_queue/enqueue_write_image.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/event/event.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/kernel/kernel.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/command_queue/enqueue_write_image_fixture.h" #include "opencl/test/unit_test/fixtures/built_in_fixture.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" using namespace NEO; struct GetSizeRequiredImageTest : public CommandEnqueueFixture, public ::testing::Test { GetSizeRequiredImageTest() { } void SetUp() override { REQUIRE_IMAGES_OR_SKIP(defaultHwInfo); CommandEnqueueFixture::SetUp(); srcImage = Image2dHelper<>::create(context); dstImage = Image2dHelper<>::create(context); pDevice->setPreemptionMode(PreemptionMode::Disabled); } void TearDown() override { if (IsSkipped()) { return; } delete dstImage; delete srcImage; CommandEnqueueFixture::TearDown(); } Image *srcImage = nullptr; Image *dstImage = nullptr; }; HWTEST_F(GetSizeRequiredImageTest, WhenCopyingImageThenHeapsAndCommandBufferConsumedMinimumRequiredSize) { auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 0u); auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 0u); auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0u); auto usedBeforeDSH = dsh.getUsed(); auto usedBeforeIOH = ioh.getUsed(); auto usedBeforeSSH = ssh.getUsed(); auto retVal = EnqueueCopyImageHelper<>::enqueueCopyImage(pCmdQ); EXPECT_EQ(CL_SUCCESS, retVal); auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyImageToImage3d, pCmdQ->getClDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcMemObj = srcImage; dc.dstMemObj = dstImage; dc.srcOffset = EnqueueCopyImageTraits::srcOrigin; dc.dstOffset = EnqueueCopyImageTraits::dstOrigin; dc.size = {1, 1, 1}; MultiDispatchInfo multiDispatchInfo(dc); builder.buildDispatchInfos(multiDispatchInfo); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); auto usedAfterCS = commandStream.getUsed(); auto usedAfterDSH = dsh.getUsed(); auto usedAfterIOH = ioh.getUsed(); auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_COPY_IMAGE, false, false, *pCmdQ, kernel, {}); auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(*kernel); auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(*kernel); auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel); // Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended. expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); expectedSizeCS = alignUp(expectedSizeCS, MemoryConstants::cacheLineSize); EXPECT_GE(expectedSizeCS, usedAfterCS - usedBeforeCS); EXPECT_GE(expectedSizeDSH, usedAfterDSH - usedBeforeDSH); EXPECT_GE(expectedSizeIOH, usedAfterIOH - usedBeforeIOH); EXPECT_GE(expectedSizeSSH, usedAfterSSH - usedBeforeSSH); } HWTEST_F(GetSizeRequiredImageTest, WhenCopyingReadWriteImageThenHeapsAndCommandBufferConsumedMinimumRequiredSize) { auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 0u); auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 0u); auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0u); auto usedBeforeDSH = dsh.getUsed(); auto usedBeforeIOH = ioh.getUsed(); auto usedBeforeSSH = ssh.getUsed(); std::unique_ptr program(Program::createBuiltInFromSource("CopyImageToImage3d", context, context->getDevices(), nullptr)); program->build(program->getDevices(), nullptr, false); std::unique_ptr kernel(Kernel::create(program.get(), program->getKernelInfoForKernel("CopyImageToImage3d"), *context->getDevice(0), nullptr)); EXPECT_NE(nullptr, kernel); // This kernel does not operate on OpenCL 2.0 Read and Write images EXPECT_FALSE(kernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages); // Simulate that the kernel actually operates on OpenCL 2.0 Read and Write images. // Such kernel may require special WA DisableLSQCROPERFforOCL during construction of Command Buffer const_cast(kernel->getKernelInfo().kernelDescriptor).kernelAttributes.flags.usesFencesForReadWriteImages = true; // Enqueue kernel that may require special WA DisableLSQCROPERFforOCL auto retVal = EnqueueKernelHelper<>::enqueueKernel(pCmdQ, kernel.get()); EXPECT_EQ(CL_SUCCESS, retVal); auto usedAfterCS = commandStream.getUsed(); auto usedAfterDSH = dsh.getUsed(); auto usedAfterIOH = ioh.getUsed(); auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_COPY_IMAGE, false, false, *pCmdQ, kernel.get(), {}); auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(*kernel.get()); auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(*kernel.get()); auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel.get()); // Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended. expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); expectedSizeCS = alignUp(expectedSizeCS, MemoryConstants::cacheLineSize); const_cast(kernel->getKernelInfo().kernelDescriptor).kernelAttributes.flags.usesFencesForReadWriteImages = false; EXPECT_GE(expectedSizeCS, usedAfterCS - usedBeforeCS); EXPECT_GE(expectedSizeDSH, usedAfterDSH - usedBeforeDSH); EXPECT_GE(expectedSizeIOH, usedAfterIOH - usedBeforeIOH); EXPECT_GE(expectedSizeSSH, usedAfterSSH - usedBeforeSSH); } HWTEST_F(GetSizeRequiredImageTest, WhenReadingImageNonBlockingThenHeapsAndCommandBufferConsumedMinimumRequiredSize) { auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 0u); auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 0u); auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0u); auto usedBeforeDSH = dsh.getUsed(); auto usedBeforeIOH = ioh.getUsed(); auto usedBeforeSSH = ssh.getUsed(); auto retVal = EnqueueReadImageHelper<>::enqueueReadImage( pCmdQ, srcImage, CL_FALSE); EXPECT_EQ(CL_SUCCESS, retVal); auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyImage3dToBuffer, pCmdQ->getClDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcMemObj = srcImage; dc.dstPtr = EnqueueReadImageTraits::hostPtr; dc.srcOffset = EnqueueReadImageTraits::origin; dc.size = EnqueueReadImageTraits::region; dc.srcRowPitch = EnqueueReadImageTraits::rowPitch; dc.srcSlicePitch = EnqueueReadImageTraits::slicePitch; MultiDispatchInfo multiDispatchInfo(dc); builder.buildDispatchInfos(multiDispatchInfo); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); auto usedAfterCS = commandStream.getUsed(); auto usedAfterDSH = dsh.getUsed(); auto usedAfterIOH = ioh.getUsed(); auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_READ_IMAGE, false, false, *pCmdQ, kernel, {}); auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(*kernel); auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(*kernel); auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel); // Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended. expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); expectedSizeCS = alignUp(expectedSizeCS, MemoryConstants::cacheLineSize); EXPECT_GE(expectedSizeCS, usedAfterCS - usedBeforeCS); EXPECT_GE(expectedSizeDSH, usedAfterDSH - usedBeforeDSH); EXPECT_GE(expectedSizeIOH, usedAfterIOH - usedBeforeIOH); EXPECT_GE(expectedSizeSSH, usedAfterSSH - usedBeforeSSH); } HWTEST_F(GetSizeRequiredImageTest, WhenReadingImageBlockingThenHeapsAndCommandBufferConsumedMinimumRequiredSize) { auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 0u); auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 0u); auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0u); auto usedBeforeDSH = dsh.getUsed(); auto usedBeforeIOH = ioh.getUsed(); auto usedBeforeSSH = ssh.getUsed(); auto retVal = EnqueueReadImageHelper<>::enqueueReadImage( pCmdQ, srcImage, CL_TRUE); EXPECT_EQ(CL_SUCCESS, retVal); auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyImage3dToBuffer, pCmdQ->getClDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcMemObj = srcImage; dc.dstPtr = EnqueueReadImageTraits::hostPtr; dc.srcOffset = EnqueueReadImageTraits::origin; dc.size = EnqueueReadImageTraits::region; dc.srcRowPitch = EnqueueReadImageTraits::rowPitch; dc.srcSlicePitch = EnqueueReadImageTraits::slicePitch; MultiDispatchInfo multiDispatchInfo(dc); builder.buildDispatchInfos(multiDispatchInfo); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); auto usedAfterCS = commandStream.getUsed(); auto usedAfterDSH = dsh.getUsed(); auto usedAfterIOH = ioh.getUsed(); auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_READ_IMAGE, false, false, *pCmdQ, kernel, {}); auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(*kernel); auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(*kernel); auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel); // Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended. expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); expectedSizeCS = alignUp(expectedSizeCS, MemoryConstants::cacheLineSize); EXPECT_GE(expectedSizeCS, usedAfterCS - usedBeforeCS); EXPECT_GE(expectedSizeDSH, usedAfterDSH - usedBeforeDSH); EXPECT_GE(expectedSizeIOH, usedAfterIOH - usedBeforeIOH); EXPECT_GE(expectedSizeSSH, usedAfterSSH - usedBeforeSSH); } HWTEST_F(GetSizeRequiredImageTest, WhenWritingImageNonBlockingThenHeapsAndCommandBufferConsumedMinimumRequiredSize) { auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 0u); auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 0u); auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0u); auto usedBeforeDSH = dsh.getUsed(); auto usedBeforeIOH = ioh.getUsed(); auto usedBeforeSSH = ssh.getUsed(); auto retVal = EnqueueWriteImageHelper<>::enqueueWriteImage( pCmdQ, dstImage, CL_FALSE); EXPECT_EQ(CL_SUCCESS, retVal); auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToImage3d, pCmdQ->getClDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcPtr = EnqueueWriteImageTraits::hostPtr; dc.dstMemObj = dstImage; dc.dstOffset = EnqueueWriteImageTraits::origin; dc.size = EnqueueWriteImageTraits::region; dc.dstRowPitch = EnqueueWriteImageTraits::rowPitch; dc.dstSlicePitch = EnqueueWriteImageTraits::slicePitch; MultiDispatchInfo multiDispatchInfo(dc); builder.buildDispatchInfos(multiDispatchInfo); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); auto usedAfterCS = commandStream.getUsed(); auto usedAfterDSH = dsh.getUsed(); auto usedAfterIOH = ioh.getUsed(); auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_WRITE_IMAGE, false, false, *pCmdQ, kernel, {}); auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(*kernel); auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(*kernel); auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel); // Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended. expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); expectedSizeCS = alignUp(expectedSizeCS, MemoryConstants::cacheLineSize); EXPECT_GE(expectedSizeCS, usedAfterCS - usedBeforeCS); EXPECT_GE(expectedSizeDSH, usedAfterDSH - usedBeforeDSH); EXPECT_GE(expectedSizeIOH, usedAfterIOH - usedBeforeIOH); EXPECT_GE(expectedSizeSSH, usedAfterSSH - usedBeforeSSH); } HWTEST_F(GetSizeRequiredImageTest, WhenWritingImageBlockingThenHeapsAndCommandBufferConsumedMinimumRequiredSize) { auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 0u); auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 0u); auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0u); auto usedBeforeDSH = dsh.getUsed(); auto usedBeforeIOH = ioh.getUsed(); auto usedBeforeSSH = ssh.getUsed(); auto retVal = EnqueueWriteImageHelper<>::enqueueWriteImage( pCmdQ, dstImage, CL_TRUE); EXPECT_EQ(CL_SUCCESS, retVal); auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToImage3d, pCmdQ->getClDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcPtr = EnqueueWriteImageTraits::hostPtr; dc.dstMemObj = dstImage; dc.dstOffset = EnqueueWriteImageTraits::origin; dc.size = EnqueueWriteImageTraits::region; dc.dstRowPitch = EnqueueWriteImageTraits::rowPitch; dc.dstSlicePitch = EnqueueWriteImageTraits::slicePitch; MultiDispatchInfo multiDispatchInfo(dc); builder.buildDispatchInfos(multiDispatchInfo); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); auto usedAfterCS = commandStream.getUsed(); auto usedAfterDSH = dsh.getUsed(); auto usedAfterIOH = ioh.getUsed(); auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_WRITE_IMAGE, false, false, *pCmdQ, kernel, {}); auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(*kernel); auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(*kernel); auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel); // Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended. expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); expectedSizeCS = alignUp(expectedSizeCS, MemoryConstants::cacheLineSize); EXPECT_GE(expectedSizeCS, usedAfterCS - usedBeforeCS); EXPECT_GE(expectedSizeDSH, usedAfterDSH - usedBeforeDSH); EXPECT_GE(expectedSizeIOH, usedAfterIOH - usedBeforeIOH); EXPECT_GE(expectedSizeSSH, usedAfterSSH - usedBeforeSSH); } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/get_size_required_tests.cpp000066400000000000000000000075161422164147700321210ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/enqueue_barrier.h" #include "opencl/source/command_queue/enqueue_marker.h" #include "opencl/source/event/event.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" using namespace NEO; struct GetSizeRequiredTest : public CommandEnqueueFixture, public ::testing::Test { void SetUp() override { CommandEnqueueFixture::SetUp(); dsh = &pCmdQ->getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 0u); ioh = &pCmdQ->getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 0u); ssh = &pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0u); usedBeforeDSH = dsh->getUsed(); usedBeforeIOH = ioh->getUsed(); usedBeforeSSH = ssh->getUsed(); } void TearDown() override { CommandEnqueueFixture::TearDown(); } IndirectHeap *dsh; IndirectHeap *ioh; IndirectHeap *ssh; size_t usedBeforeDSH; size_t usedBeforeIOH; size_t usedBeforeSSH; }; HWTEST_F(GetSizeRequiredTest, WhenFinishingThenHeapsAndCommandBufferAreNotConsumed) { auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto retVal = pCmdQ->finish(); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, commandStream.getUsed() - usedBeforeCS); EXPECT_EQ(0u, dsh->getUsed() - usedBeforeDSH); EXPECT_EQ(0u, ioh->getUsed() - usedBeforeIOH); EXPECT_EQ(0u, ssh->getUsed() - usedBeforeSSH); } HWTEST_F(GetSizeRequiredTest, WhenEnqueuingMarkerThenHeapsAndCommandBufferAreNotConsumed) { auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 5, 15); cl_event eventBeingWaitedOn = &event1; cl_event eventReturned = nullptr; auto retVal = pCmdQ->enqueueMarkerWithWaitList( 1, &eventBeingWaitedOn, &eventReturned); EXPECT_EQ(CL_SUCCESS, retVal); size_t expectedStreamSize = 0; if (pCmdQ->getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { expectedStreamSize = alignUp(MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation( pDevice->getHardwareInfo()), MemoryConstants::cacheLineSize); } EXPECT_EQ(expectedStreamSize, commandStream.getUsed() - usedBeforeCS); EXPECT_EQ(0u, dsh->getUsed() - usedBeforeDSH); EXPECT_EQ(0u, ioh->getUsed() - usedBeforeIOH); EXPECT_EQ(0u, ssh->getUsed() - usedBeforeSSH); clReleaseEvent(eventReturned); } HWTEST_F(GetSizeRequiredTest, WhenEnqueuingBarrierThenHeapsAndCommandBufferAreNotConsumed) { auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 5, 15); cl_event eventBeingWaitedOn = &event1; cl_event eventReturned = nullptr; auto retVal = pCmdQ->enqueueBarrierWithWaitList( 1, &eventBeingWaitedOn, &eventReturned); EXPECT_EQ(CL_SUCCESS, retVal); size_t expectedStreamSize = 0; if (pCmdQ->getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { expectedStreamSize = alignUp(MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation( pDevice->getHardwareInfo()), MemoryConstants::cacheLineSize); } EXPECT_EQ(expectedStreamSize, commandStream.getUsed() - usedBeforeCS); clReleaseEvent(eventReturned); } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/gl/000077500000000000000000000000001422164147700250535ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/gl/CMakeLists.txt000066400000000000000000000001471422164147700276150ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # add_subdirectories() compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/gl/windows/000077500000000000000000000000001422164147700265455ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/gl/windows/CMakeLists.txt000066400000000000000000000005401422164147700313040ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) set(IGDRCL_SRCS_tests_command_queue_gl_windows ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_kernel_gl_tests_windows.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_command_queue_gl_windows}) endif() enqueue_kernel_gl_tests_windows.cpp000066400000000000000000000057201422164147700356630ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/gl/windows/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/source/helpers/constants.h" #include "shared/source/helpers/preamble.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/fixtures/memory_management_fixture.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/mocks/mock_gmm.h" #include "shared/test/common/mocks/mock_submissions_aggregator.h" #include "opencl/source/sharings/gl/gl_buffer.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "opencl/test/unit_test/mocks/gl/windows/mock_gl_sharing_windows.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" using namespace NEO; typedef HelloWorldFixture EnqueueKernelFixture; typedef Test EnqueueKernelTest; TEST_F(EnqueueKernelTest, givenKernelWithSharedObjArgsWhenEnqueueIsCalledThenResetPatchAddress) { auto nonSharedBuffer = new MockBuffer; MockGlSharing glSharing; MockGmm mockGmm(pDevice->getGmmClientContext()); glSharing.uploadDataToBufferInfo(1, 0, mockGmm.gmmResourceInfo->peekGmmResourceInfo()); pContext->setSharingFunctions(glSharing.sharingFunctions.release()); auto retVal = CL_SUCCESS; auto sharedBuffer = GlBuffer::createSharedGlBuffer(pContext, CL_MEM_READ_WRITE, 1, &retVal); auto graphicsAllocation = sharedBuffer->getGraphicsAllocation(pContext->getDevice(0)->getRootDeviceIndex()); auto sharedMem = static_cast(sharedBuffer); auto nonSharedMem = static_cast(nonSharedBuffer); pKernel->setArg(0, sizeof(cl_mem *), &sharedMem); pKernel->setArg(1, sizeof(cl_mem *), &nonSharedMem); EXPECT_TRUE(pKernel->isUsingSharedObjArgs()); auto &kernelInfo = pKernel->getKernelInfo(); auto pKernelArg = (uint32_t *)(pKernel->getCrossThreadData() + kernelInfo.getArgDescriptorAt(0).as().stateless); auto address1 = static_cast(*pKernelArg); auto sharedBufferGpuAddress = pKernel->isBuiltIn ? graphicsAllocation->getGpuAddress() : graphicsAllocation->getGpuAddressToPatch(); EXPECT_EQ(sharedBufferGpuAddress, address1); // update address glSharing.uploadDataToBufferInfo(1, 1, mockGmm.gmmResourceInfo->peekGmmResourceInfo()); pCmdQ->enqueueAcquireSharedObjects(1, &sharedMem, 0, nullptr, nullptr, CL_COMMAND_ACQUIRE_GL_OBJECTS); callOneWorkItemNDRKernel(); auto address2 = static_cast(*pKernelArg); EXPECT_NE(address1, address2); sharedBufferGpuAddress = pKernel->isBuiltIn ? graphicsAllocation->getGpuAddress() : graphicsAllocation->getGpuAddressToPatch(); EXPECT_EQ(sharedBufferGpuAddress, address2); delete sharedBuffer; delete nonSharedBuffer; } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/ioq_task_tests.cpp000066400000000000000000000101331422164147700302070ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" using namespace NEO; typedef HelloWorldTest IOQ; TEST_F(IOQ, WhenEnqueueingKernelThenTaskLevelIsIncremented) { auto previousTaskLevel = pCmdQ->taskLevel; EnqueueKernelHelper<>::enqueueKernel( pCmdQ, pKernel); EXPECT_LT(previousTaskLevel, pCmdQ->taskLevel); } TEST_F(IOQ, WhenFillingBufferThenTaskLevelIsIncremented) { auto previousTaskLevel = pCmdQ->taskLevel; EnqueueFillBufferHelper<>::enqueue(pCmdQ); EXPECT_LT(previousTaskLevel, pCmdQ->taskLevel); } TEST_F(IOQ, WhenReadingBufferThenTaskLevelIsIncremented) { auto previousTaskLevel = pCmdQ->taskLevel; auto buffer = std::unique_ptr(BufferHelper<>::create()); buffer->forceDisallowCPUCopy = true; // task level is not increased if doing cpu copy EnqueueReadBufferHelper<>::enqueueReadBuffer(pCmdQ, buffer.get()); EXPECT_LT(previousTaskLevel, pCmdQ->taskLevel); } TEST_F(IOQ, WhenEnqueueingKernelThenTaskCountIsIncremented) { auto &commandStreamReceiver = pCmdQ->getGpgpuCommandStreamReceiver(); auto previousTaskCount = commandStreamReceiver.peekTaskCount(); EnqueueKernelHelper<>::enqueueKernel(pCmdQ, pKernel); EXPECT_LT(previousTaskCount, commandStreamReceiver.peekTaskCount()); EXPECT_EQ(pCmdQ->taskCount, commandStreamReceiver.peekTaskCount()); } TEST_F(IOQ, WhenFillingBufferThenTaskCountIsIncremented) { auto &commandStreamReceiver = pCmdQ->getGpgpuCommandStreamReceiver(); auto previousTaskCount = commandStreamReceiver.peekTaskCount(); EnqueueFillBufferHelper<>::enqueue(pCmdQ); EXPECT_LT(previousTaskCount, commandStreamReceiver.peekTaskCount()); EXPECT_LE(pCmdQ->taskCount, commandStreamReceiver.peekTaskCount()); } TEST_F(IOQ, WhenReadingBufferThenTaskCountIsIncremented) { auto &commandStreamReceiver = pCmdQ->getGpgpuCommandStreamReceiver(); auto previousTaskCount = commandStreamReceiver.peekTaskCount(); auto buffer = std::unique_ptr(BufferHelper<>::create()); buffer->forceDisallowCPUCopy = true; // task level is not increased if doing cpu copy EnqueueReadBufferHelper<>::enqueueReadBuffer(pCmdQ, buffer.get()); EXPECT_LT(previousTaskCount, commandStreamReceiver.peekTaskCount()); EXPECT_LE(pCmdQ->taskCount, commandStreamReceiver.peekTaskCount()); } TEST_F(IOQ, GivenUserEventWhenReadingBufferThenTaskCountAndTaskLevelAreIncremented) { auto buffer = std::unique_ptr(BufferHelper<>::create()); auto alignedReadPtr = alignedMalloc(BufferDefaults::sizeInBytes, MemoryConstants::cacheLineSize); ASSERT_NE(nullptr, alignedReadPtr); auto previousTaskCount = pCmdQ->taskCount; auto previousTaskLevel = pCmdQ->taskLevel; auto userEvent = clCreateUserEvent(pContext, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetUserEventStatus(userEvent, CL_COMPLETE); ASSERT_EQ(CL_SUCCESS, retVal); buffer->forceDisallowCPUCopy = true; // task level is not increased if doing cpu copy retVal = EnqueueReadBufferHelper<>::enqueueReadBuffer(pCmdQ, buffer.get(), CL_TRUE, 0, BufferDefaults::sizeInBytes, alignedReadPtr, nullptr, 1, &userEvent, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_LT(previousTaskCount, pCmdQ->taskCount); EXPECT_LT(previousTaskLevel, pCmdQ->taskLevel); retVal = clReleaseEvent(userEvent); EXPECT_EQ(CL_SUCCESS, retVal); alignedFree(alignedReadPtr); } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/local_work_size_tests.cpp000066400000000000000000001041661422164147700315750ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/local_work_size.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/command_queue/cl_local_work_size.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" using namespace NEO; TEST(localWorkSizeTest, givenDisableEUFusionWhenCreatingWorkSizeInfoThenCorrectMinWorkGroupSizeIsSet) { uint32_t simdSize = 8u; uint32_t numThreadsPerSubS = 8u; WorkSizeInfo wsInfo(256, // maxWorkGroupSize 1u, // hasBariers simdSize, // simdSize 0u, // slmTotalSize defaultHwInfo.get(), // hardwareInfo numThreadsPerSubS, // numThreadsPerSubS 0u, // localMemorySize false, // imgUsed false, // yTiledSurface true // disableEUFusion ); auto &hwHelper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily); bool fusedDispatchEnabled = hwHelper.isFusedEuDispatchEnabled(*defaultHwInfo, true); auto WGSMultiple = fusedDispatchEnabled ? 2 : 1; uint32_t maxBarriersPerHSlice = (defaultHwInfo.get()->platform.eRenderCoreFamily >= IGFX_GEN9_CORE) ? 32 : 16; uint32_t expectedMinWGS = WGSMultiple * simdSize * numThreadsPerSubS / maxBarriersPerHSlice; EXPECT_EQ(expectedMinWGS, wsInfo.minWorkGroupSize); } TEST(localWorkSizeTest, given3DimWorkGroupAndSimdEqual8AndBarriersWhenComputeCalledThenLocalGroupComputedCorrectly) { WorkSizeInfo wsInfo(256, // maxWorkGroupSize 1u, // hasBariers 8, // simdSize 0u, // slmTotalSize defaultHwInfo.get(), // hardwareInfo 32u, // numThreadsPerSubSlice 0u, // localMemorySize false, // imgUsed false, // yTiledSurface false // disableEUFusion ); uint32_t workDim = 3; size_t workGroup[3] = {10000, 10000, 10000}; size_t workGroupSize[3]; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 200u); EXPECT_EQ(workGroupSize[1], 1u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 50; workGroup[1] = 2000; workGroup[2] = 100; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 50u); EXPECT_EQ(workGroupSize[1], 4u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, givenSmallerLocalMemSizeThanSlmTotalSizeThenExceptionIsThrown) { EXPECT_THROW(WorkSizeInfo wsInfo(256, // maxWorkGroupSize 1u, // hasBariers 8, // simdSize 128u, // slmTotalSize defaultHwInfo.get(), // hardwareInfo 32u, // numThreadsPerSubSlice 64u, // localMemorySize false, // imgUsed false, // yTiledSurface false // disableEUFusion ), std::exception); } TEST(localWorkSizeTest, given2DimWorkGroupAndSimdEqual8AndNoBarriersWhenComputeCalledThenLocalGroupComputedCorrectly) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(true); //wsInfo maxWorkGroupSize, hasBariers, simdSize, slmTotalSize, hardwareInfo, numThreadsPerSubSlice, localMemorySize, imgUsed, yTiledSurface, disableEUFusion WorkSizeInfo wsInfo(256, 0u, 8, 0u, defaultHwInfo.get(), 32u, 0u, false, false, false); uint32_t workDim = 2; size_t workGroup[3] = {10003, 10003, 1}; size_t workGroupSize[3]; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 7u); EXPECT_EQ(workGroupSize[1], 7u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 21; workGroup[1] = 3000; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 21u); EXPECT_EQ(workGroupSize[1], 8u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, given1DimWorkGroupAndSimdEqual8WhenComputeCalledThenLocalGroupComputed) { //wsInfo maxWorkGroupSize, hasBariers, simdSize, slmTotalSize, hardwareInfo, numThreadsPerSubSlice, localMemorySize, imgUsed, yTiledSurface, disableEUFusion WorkSizeInfo wsInfo(256, 0u, 8, 0u, defaultHwInfo.get(), 32u, 0u, false, false, false); uint32_t workDim = 1; size_t workGroup[3] = {6144, 1, 1}; size_t workGroupSize[3]; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 256u); EXPECT_EQ(workGroupSize[1], 1u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 48; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 16u); EXPECT_EQ(workGroupSize[1], 1u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 1536; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 256u); EXPECT_EQ(workGroupSize[1], 1u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 333; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 9u); EXPECT_EQ(workGroupSize[1], 1u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, given1DimWorkGroupAndSimdEqual32WhenComputeCalledThenLocalGroupComputed) { WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo.get(), 32u, 0u, false, false, false); uint32_t workDim = 1; size_t workGroup[3] = {6144, 1, 1}; size_t workGroupSize[3]; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 256u); EXPECT_EQ(workGroupSize[1], 1u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 48; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 48u); EXPECT_EQ(workGroupSize[1], 1u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 512; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 256u); EXPECT_EQ(workGroupSize[1], 1u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, given2DimWorkGroupAndSimdEqual8WhenComputeCalledThenLocalGroupComputed) { WorkSizeInfo wsInfo(256, 0u, 8, 0u, defaultHwInfo.get(), 56u, 0u, false, false, false); uint32_t workDim = 2; size_t workGroup[3] = {384, 96, 1}; size_t workGroupSize[3]; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 128u); EXPECT_EQ(workGroupSize[1], 2u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 48; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 16u); EXPECT_EQ(workGroupSize[1], 16u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 12; workGroup[1] = 512; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 4u); EXPECT_EQ(workGroupSize[1], 64u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, given2DimWorkGroupAndSimdEqual32WhenComputeCalledThenLocalGroupComputed) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(false); WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo.get(), 32u, 0u, false, false, false); uint32_t workDim = 2; size_t workGroup[3] = {384, 96, 1}; size_t workGroupSize[3]; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 128u); EXPECT_EQ(workGroupSize[1], 2u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 48; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 16u); EXPECT_EQ(workGroupSize[1], 16u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 12; workGroup[1] = 512; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 4u); EXPECT_EQ(workGroupSize[1], 64u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 1; workGroup[1] = 384; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 1u); EXPECT_EQ(workGroupSize[1], 128u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, given3DimWorkGroupAndSimdEqual8WhenComputeCalledThenLocalGroupComputed) { WorkSizeInfo wsInfo(256, 0u, 8, 0u, defaultHwInfo.get(), 56u, 0u, false, false, false); uint32_t workDim = 3; size_t workGroup[3] = {384, 384, 384}; size_t workGroupSize[3]; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 128u); EXPECT_EQ(workGroupSize[1], 2u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 96; workGroup[1] = 4; workGroup[2] = 4; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 32u); EXPECT_EQ(workGroupSize[1], 4u); EXPECT_EQ(workGroupSize[2], 2u); workGroup[0] = 12; workGroup[1] = 512; workGroup[2] = 48; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 4u); EXPECT_EQ(workGroupSize[1], 64u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 2; workGroup[1] = 2; workGroup[2] = 3; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 2u); EXPECT_EQ(workGroupSize[1], 2u); EXPECT_EQ(workGroupSize[2], 3u); } TEST(localWorkSizeTest, given3DimWorkGroupAndSimdEqual32WhenComputeCalledThenLocalGroupComputed) { NEO::WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo.get(), 32u, 0u, false, false, false); uint32_t workDim = 3; size_t workGroup[3] = {384, 384, 384}; size_t workGroupSize[3]; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 128u); EXPECT_EQ(workGroupSize[1], 2u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 96; workGroup[1] = 6; workGroup[2] = 4; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 32u); EXPECT_EQ(workGroupSize[1], 2u); EXPECT_EQ(workGroupSize[2], 4u); workGroup[0] = 12; workGroup[1] = 512; workGroup[2] = 48; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 4u); EXPECT_EQ(workGroupSize[1], 64u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 6; workGroup[1] = 4; workGroup[2] = 64; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 2u); EXPECT_EQ(workGroupSize[1], 4u); EXPECT_EQ(workGroupSize[2], 32u); workGroup[0] = 113; workGroup[1] = 113; workGroup[2] = 113; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 113u); EXPECT_EQ(workGroupSize[1], 1u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, given2DimWorkGroupAndSquaredAlgorithmWhenComputeCalledThenLocalGroupComputed) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(true); WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo.get(), 32u, 0u, false, false, false); uint32_t workDim = 2; size_t workGroup[3] = {384, 96, 1}; size_t workGroupSize[3]; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 16u); EXPECT_EQ(workGroupSize[1], 16u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, given1DimWorkGroupAndSquaredAlgorithmOnWhenComputeCalledThenSquaredAlgorithmIsNotExecuted) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(true); WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo.get(), 32u, 0u, false, false, false); uint32_t workDim = 1; size_t workGroup[3] = {1024, 1, 1}; size_t workGroupSize[3]; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 256u); EXPECT_EQ(workGroupSize[1], 1u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, given2DdispatchWithImagesAndSquaredAlgorithmOnWhenLwsIsComputedThenSquaredAlgorithmIsNotExecuted) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(true); WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo.get(), 32u, 0u, true, false, false); uint32_t workDim = 2; size_t workGroup[3] = {256, 96, 1}; size_t workGroupSize[3]; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 64u); EXPECT_EQ(workGroupSize[1], 4u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, givenKernelWithTileYImagesAndBarrierWhenWorkgroupSizeIsComputedThenItMimicsTilingPattern) { WorkSizeInfo wsInfo(256, true, 32, 0u, defaultHwInfo.get(), 32u, 0u, true, true, false); uint32_t workDim = 2; size_t workGroup[3] = {1, 1, 1}; size_t workGroupSize[3]; workGroup[0] = 2048; workGroup[1] = 2048; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 32u); EXPECT_EQ(workGroupSize[1], 8u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 1920; workGroup[1] = 1080; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 32u); EXPECT_EQ(workGroupSize[1], 8u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, givenKernelWithTileYImagesAndNoBarriersWhenWorkgroupSizeIsComputedThenItMimicsTilingPattern) { WorkSizeInfo wsInfo(256, false, 32, 0u, defaultHwInfo.get(), 32u, 0u, true, true, false); uint32_t workDim = 2; size_t workGroup[3] = {1, 1, 1}; size_t workGroupSize[3]; workGroup[0] = 2048; workGroup[1] = 2048; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 64u); EXPECT_EQ(workGroupSize[1], 4u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 1920; workGroup[1] = 1080; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 64u); EXPECT_EQ(workGroupSize[1], 4u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, givenSimd16KernelWithTileYImagesAndNoBarriersWhenWorkgroupSizeIsComputedThenItMimicsTilingPattern) { WorkSizeInfo wsInfo(256, false, 16, 0u, defaultHwInfo.get(), 32u, 0u, true, true, false); uint32_t workDim = 2; size_t workGroup[3] = {1, 1, 1}; size_t workGroupSize[3]; workGroup[0] = 2048; workGroup[1] = 2048; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 32u); EXPECT_EQ(workGroupSize[1], 4u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 1920; workGroup[1] = 1080; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 32u); EXPECT_EQ(workGroupSize[1], 4u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, givenKernelWithTwoDimensionalGlobalSizesWhenLwsIsComputedThenItHasMaxWorkgroupSize) { WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo.get(), 32u, 0u, false, false, false); uint32_t workDim = 2; size_t workGroup[3] = {1, 1, 1}; size_t workGroupSize[3]; workGroup[0] = 1024; workGroup[1] = 1024; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 256u); EXPECT_EQ(workGroupSize[1], 1u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, givenKernelWithBarriersAndTiledImagesWithYdimensionHigherThenXDimensionWhenLwsIsComputedThenItMimicsTiling) { WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo.get(), 32u, 0u, true, true, false); uint32_t workDim = 2; size_t workGroup[3] = {1, 1, 1}; size_t workGroupSize[3]; workGroup[0] = 256; workGroup[1] = 1024; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 64u); EXPECT_EQ(workGroupSize[1], 4u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 512; workGroup[1] = 2048; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 64u); EXPECT_EQ(workGroupSize[1], 4u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 480; workGroup[1] = 1080; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 32u); EXPECT_EQ(workGroupSize[1], 4u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 196; workGroup[1] = 30; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 49u); EXPECT_EQ(workGroupSize[1], 5u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, givenHighOneDimensionalGwsWhenLwsIsComputedThenMaxWorkgoupSizeIsUsed) { WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo.get(), 32u, 0u, false, false, false); uint32_t workDim = 2; size_t workGroup[3] = {1, 1, 1}; size_t workGroupSize[3]; workGroup[0] = 65536; workGroup[1] = 1; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 256u); EXPECT_EQ(workGroupSize[1], 1u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 524288; workGroup[1] = 1; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 256u); EXPECT_EQ(workGroupSize[1], 1u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, givenVeriousGwsSizesWithImagesWhenLwsIsComputedThenProperSizesAreReturned) { WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo.get(), 32u, 0u, true, true, false); uint32_t workDim = 2; size_t workGroup[3] = {1, 1, 1}; size_t workGroupSize[3]; workGroup[0] = 256; workGroup[1] = 1024; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 64u); EXPECT_EQ(workGroupSize[1], 4u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 512; workGroup[1] = 2048; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 64u); EXPECT_EQ(workGroupSize[1], 4u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 208; workGroup[1] = 2; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 16u); EXPECT_EQ(workGroupSize[1], 2u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 6; workGroup[1] = 128; wsInfo.simdSize = 8; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 2u); EXPECT_EQ(workGroupSize[1], 4u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 3; workGroup[1] = 128; wsInfo.simdSize = 8; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 1u); EXPECT_EQ(workGroupSize[1], 128u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, givenHigh1DGwsAndSimdSize16WhenLwsIsComputedThenMaxWorkgroupSizeIsChoosen) { WorkSizeInfo wsInfo(256u, 0u, 16, 0u, defaultHwInfo.get(), 56u, 0, false, false, false); size_t workGroup[3] = {1, 1, 1}; size_t workGroupSize[3]; workGroup[0] = 1048576; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, 1); EXPECT_EQ(workGroupSize[0], 256u); EXPECT_EQ(workGroupSize[1], 1u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, givenHigh1DGwsAndSimdSize8WhenLwsIsComputedThenMaxWorkgroupSizeIsChoosen) { WorkSizeInfo wsInfo(256u, 0u, 8, 0u, defaultHwInfo.get(), 32u, 0, false, false, false); size_t workGroup[3] = {1, 1, 1}; size_t workGroupSize[3]; workGroup[0] = 1048576; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, 1); EXPECT_EQ(workGroupSize[0], 256u); EXPECT_EQ(workGroupSize[1], 1u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, givenKernelUtilizingImagesAndSlmWhenLwsIsBeingComputedThenItMimicsGlobalWorkgroupSizes) { WorkSizeInfo wsInfo(256u, 1u, 32, 4096u, defaultHwInfo.get(), 56u, 65536u, true, true, false); uint32_t workDim = 2; size_t workGroup[3] = {1, 1, 1}; size_t workGroupSize[3]; workGroup[0] = 2048; workGroup[1] = 2048; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 16u); EXPECT_EQ(workGroupSize[1], 16u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 1920; workGroup[1] = 1080; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 32u); EXPECT_EQ(workGroupSize[1], 8u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, GivenUseStrictRatioWhenLwsIsBeingComputedThenWgsIsCalculatedCorrectly) { WorkSizeInfo wsInfo(256u, 0u, 32u, 0u, defaultHwInfo.get(), 0u, 0u, true, true, false); uint32_t workDim = 2; size_t workGroup[3] = {194, 234, 1}; size_t workGroupSize[3]; workGroup[0] = 194; workGroup[1] = 234; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 2u); EXPECT_EQ(workGroupSize[1], 117u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 100; workGroup[1] = 100; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 20u); EXPECT_EQ(workGroupSize[1], 5u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 54; workGroup[1] = 154; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 27u); EXPECT_EQ(workGroupSize[1], 7u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, GivenUseBarriersWhenLwsIsBeingComputedThenWgsIsCalculatedCorrectly) { WorkSizeInfo wsInfo(256u, 1u, 32u, 0u, defaultHwInfo.get(), 56u, 0u, true, true, false); uint32_t workDim = 2; size_t workGroup[3] = {194, 234, 1}; size_t workGroupSize[3]; workGroup[0] = 194; workGroup[1] = 234; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 97u); EXPECT_EQ(workGroupSize[1], 2u); EXPECT_EQ(workGroupSize[2], 1u); wsInfo.useRatio = false; wsInfo.useStrictRatio = false; wsInfo.yTiledSurfaces = false; wsInfo.imgUsed = false; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 2u); EXPECT_EQ(workGroupSize[1], 78u); EXPECT_EQ(workGroupSize[2], 1u); wsInfo.useRatio = false; wsInfo.useStrictRatio = false; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 2u); EXPECT_EQ(workGroupSize[1], 78u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, given2DimWorkWhenComputeSquaredCalledThenLocalGroupComputed) { WorkSizeInfo wsInfo(256, 0u, 16, 0u, defaultHwInfo.get(), 6u, 0u, false, false, false); uint32_t workDim = 2; size_t workGroup[3] = {2048, 272, 1}; size_t workGroupSize[3]; NEO::computeWorkgroupSizeSquared(wsInfo.maxWorkGroupSize, workGroupSize, workGroup, wsInfo.simdSize, workDim); EXPECT_EQ(workGroupSize[0], 16u); EXPECT_EQ(workGroupSize[1], 16u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 1024; workGroup[1] = 1024; NEO::computeWorkgroupSizeSquared(wsInfo.maxWorkGroupSize, workGroupSize, workGroup, wsInfo.simdSize, workDim); EXPECT_EQ(workGroupSize[0], 16u); EXPECT_EQ(workGroupSize[1], 16u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 512; workGroup[1] = 104; NEO::computeWorkgroupSizeSquared(wsInfo.maxWorkGroupSize, workGroupSize, workGroup, wsInfo.simdSize, workDim); EXPECT_EQ(workGroupSize[0], 32u); EXPECT_EQ(workGroupSize[1], 8u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 104; workGroup[1] = 512; NEO::computeWorkgroupSizeSquared(wsInfo.maxWorkGroupSize, workGroupSize, workGroup, wsInfo.simdSize, workDim); EXPECT_EQ(workGroupSize[0], 8u); EXPECT_EQ(workGroupSize[1], 32u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 184; workGroup[1] = 368; NEO::computeWorkgroupSizeSquared(wsInfo.maxWorkGroupSize, workGroupSize, workGroup, wsInfo.simdSize, workDim); EXPECT_EQ(workGroupSize[0], 8u); EXPECT_EQ(workGroupSize[1], 16u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 113; workGroup[1] = 2; NEO::computeWorkgroupSizeSquared(wsInfo.maxWorkGroupSize, workGroupSize, workGroup, wsInfo.simdSize, workDim); EXPECT_EQ(workGroupSize[0], 113u); EXPECT_EQ(workGroupSize[1], 2u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, givenDeviceSupportingLws1024AndKernelCompiledInSimd8WhenGwsIs1024ThenLwsIsComputedAsMaxOptimalMultipliedBySimd) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(false); WorkSizeInfo wsInfo(1024, 0u, 8, 0u, defaultHwInfo.get(), 56u, 0u, false, false, false); uint32_t workDim = 2; size_t workGroup[3] = {32, 32, 1}; size_t workGroupSize[3]; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 32u); EXPECT_EQ(workGroupSize[1], 8u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, givenDeviceWith36ThreadsPerSubsliceWhenSimd16KernelIsBeingSubmittedThenWorkgroupContainsOf8HwThreads) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(false); WorkSizeInfo wsInfo(256, 0u, 16, 0u, defaultHwInfo.get(), 36u, 0u, false, false, false); uint32_t workDim = 2; size_t workGroup[3] = {1024, 1024, 1}; size_t workGroupSize[3]; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 128u); EXPECT_EQ(workGroupSize[1], 1u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, givenDeviceWith56ThreadsPerSubsliceWhenSimd16KernelIsBeingSubmittedThenWorkgroupContainsOf16HwThreads) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(false); WorkSizeInfo wsInfo(256, 0u, 16, 0u, defaultHwInfo.get(), 56u, 0u, false, false, false); uint32_t workDim = 2; size_t workGroup[3] = {1024, 1024, 1}; size_t workGroupSize[3]; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 256u); EXPECT_EQ(workGroupSize[1], 1u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, givenDispatchInfoWhenWorkSizeInfoIsCreatedThenItHasCorrectNumberOfThreads) { MockClDevice device{new MockDevice}; MockKernelWithInternals kernel(device); DispatchInfo dispatchInfo; dispatchInfo.setClDevice(&device); dispatchInfo.setKernel(kernel.mockKernel); auto threadsPerEu = defaultHwInfo->gtSystemInfo.ThreadCount / defaultHwInfo->gtSystemInfo.EUCount; auto euPerSubSlice = defaultHwInfo->gtSystemInfo.ThreadCount / defaultHwInfo->gtSystemInfo.MaxEuPerSubSlice; auto &deviceInfo = device.sharedDeviceInfo; deviceInfo.maxNumEUsPerSubSlice = euPerSubSlice; deviceInfo.numThreadsPerEU = threadsPerEu; WorkSizeInfo workSizeInfo = createWorkSizeInfoFromDispatchInfo(dispatchInfo); EXPECT_EQ(workSizeInfo.numThreadsPerSubSlice, threadsPerEu * euPerSubSlice); } using LocalWorkSizeTest = ::testing::Test; HWTEST2_F(LocalWorkSizeTest, givenDispatchInfoWhenWorkSizeInfoIsCreatedThenWorkgroupSizeIsCorrect, IsAtMostGen11) { MockClDevice device{new MockDevice}; MockKernelWithInternals kernel(device); kernel.kernelInfo.kernelDescriptor.kernelAttributes.barrierCount = 1; DispatchInfo dispatchInfo; dispatchInfo.setClDevice(&device); dispatchInfo.setKernel(kernel.mockKernel); const uint32_t maxBarriersPerHSlice = (defaultHwInfo->platform.eRenderCoreFamily >= IGFX_GEN9_CORE) ? 32 : 16; const uint32_t nonFusedMinWorkGroupSize = static_cast(device.getSharedDeviceInfo().maxNumEUsPerSubSlice) * device.getSharedDeviceInfo().numThreadsPerEU * static_cast(kernel.mockKernel->getKernelInfo().getMaxSimdSize()) / maxBarriersPerHSlice; WorkSizeInfo workSizeInfo = createWorkSizeInfoFromDispatchInfo(dispatchInfo); EXPECT_EQ(nonFusedMinWorkGroupSize, workSizeInfo.minWorkGroupSize); } using IsCoreWithFusedEu = IsWithinGfxCore; HWTEST2_F(LocalWorkSizeTest, givenDispatchInfoWhenWorkSizeInfoIsCreatedThenTestEuFusionFtr, IsCoreWithFusedEu) { MockClDevice device{new MockDevice}; MockKernelWithInternals kernel(device); kernel.kernelInfo.kernelDescriptor.kernelAttributes.barrierCount = 1; DispatchInfo dispatchInfo; dispatchInfo.setClDevice(&device); dispatchInfo.setKernel(kernel.mockKernel); const uint32_t maxBarriersPerHSlice = (defaultHwInfo->platform.eRenderCoreFamily >= IGFX_GEN9_CORE) ? 32 : 16; const uint32_t nonFusedMinWorkGroupSize = static_cast(device.getSharedDeviceInfo().maxNumEUsPerSubSlice) * device.getSharedDeviceInfo().numThreadsPerEU * static_cast(kernel.mockKernel->getKernelInfo().getMaxSimdSize()) / maxBarriersPerHSlice; const uint32_t fusedMinWorkGroupSize = 2 * nonFusedMinWorkGroupSize; WorkSizeInfo workSizeInfo = createWorkSizeInfoFromDispatchInfo(dispatchInfo); EXPECT_NE(nonFusedMinWorkGroupSize, workSizeInfo.minWorkGroupSize); EXPECT_EQ(fusedMinWorkGroupSize, workSizeInfo.minWorkGroupSize); } HWTEST2_F(LocalWorkSizeTest, givenDispatchInfoWhenWorkSizeInfoIsCreatedThenTestEuFusionFtrForcedByDebugManager, IsAtLeastGen12lp) { DebugManagerStateRestore dbgRestore; MockClDevice device{new MockDevice}; MockKernelWithInternals kernel(device); kernel.kernelInfo.kernelDescriptor.kernelAttributes.barrierCount = 1; DispatchInfo dispatchInfo; dispatchInfo.setClDevice(&device); dispatchInfo.setKernel(kernel.mockKernel); const uint32_t nonFusedMinWorkGroupSize = static_cast(device.getSharedDeviceInfo().maxNumEUsPerSubSlice) * device.getSharedDeviceInfo().numThreadsPerEU * static_cast(kernel.mockKernel->getKernelInfo().getMaxSimdSize()) / 32; const uint32_t fusedMinWorkGroupSize = 2 * nonFusedMinWorkGroupSize; EXPECT_NE(0u, nonFusedMinWorkGroupSize); { const bool fusedEuDispatchDisabled = true; DebugManager.flags.CFEFusedEUDispatch.set(fusedEuDispatchDisabled); WorkSizeInfo workSizeInfo = createWorkSizeInfoFromDispatchInfo(dispatchInfo); EXPECT_EQ(nonFusedMinWorkGroupSize, workSizeInfo.minWorkGroupSize); } { const bool fusedEuDispatchDisabled = false; DebugManager.flags.CFEFusedEUDispatch.set(fusedEuDispatchDisabled); WorkSizeInfo workSizeInfo = createWorkSizeInfoFromDispatchInfo(dispatchInfo); EXPECT_EQ(fusedMinWorkGroupSize, workSizeInfo.minWorkGroupSize); } } HWTEST2_F(LocalWorkSizeTest, givenWorkSizeInfoIsCreatedWithHwInfoThenTestEuFusionFtrForcedByDebugManager, IsAtLeastGen12lp) { DebugManagerStateRestore dbgRestore; const uint32_t nonFusedMinWorkGroupSize = 36 * 16 / 32; const uint32_t fusedMinWorkGroupSize = 2 * nonFusedMinWorkGroupSize; EXPECT_NE(0u, nonFusedMinWorkGroupSize); { const bool fusedEuDispatchDisabled = true; DebugManager.flags.CFEFusedEUDispatch.set(fusedEuDispatchDisabled); WorkSizeInfo workSizeInfo(512, 1u, 16, 0u, defaultHwInfo.get(), 36u, 0u, false, false, false); EXPECT_EQ(nonFusedMinWorkGroupSize, workSizeInfo.minWorkGroupSize); } { const bool fusedEuDispatchDisabled = false; DebugManager.flags.CFEFusedEUDispatch.set(fusedEuDispatchDisabled); WorkSizeInfo workSizeInfo(512, 1u, 16, 0u, defaultHwInfo.get(), 36u, 0u, false, false, false); EXPECT_EQ(fusedMinWorkGroupSize, workSizeInfo.minWorkGroupSize); } } TEST(localWorkSizeTest, givenDispatchInfoWhenWorkSizeInfoIsCreatedThenHasBarriersIsCorrectlySet) { MockClDevice device{new MockDevice}; MockKernelWithInternals kernel(device); DispatchInfo dispatchInfo; dispatchInfo.setClDevice(&device); dispatchInfo.setKernel(kernel.mockKernel); kernel.kernelInfo.kernelDescriptor.kernelAttributes.barrierCount = 0; EXPECT_FALSE(createWorkSizeInfoFromDispatchInfo(dispatchInfo).hasBarriers); kernel.kernelInfo.kernelDescriptor.kernelAttributes.barrierCount = 1; EXPECT_TRUE(createWorkSizeInfoFromDispatchInfo(dispatchInfo).hasBarriers); } TEST(localWorkSizeTest, givenMaxWorkgroupSizeEqualToSimdSizeWhenLwsIsCalculatedThenItIsDownsizedToMaxWorkgroupSize) { WorkSizeInfo wsInfo(32, 0u, 32, 0u, defaultHwInfo.get(), 32u, 0u, false, false, false); uint32_t workDim = 2; size_t workGroup[3] = {32, 32, 1}; size_t workGroupSize[3]; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 32u); EXPECT_EQ(workGroupSize[1], 1u); EXPECT_EQ(workGroupSize[2], 1u); } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/multi_dispatch_info_tests.cpp000066400000000000000000000013141422164147700324220ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/mocks/mock_mdi.h" using namespace NEO; struct MultiDispatchInfoTest : public ::testing::Test { void SetUp() override { } void TearDown() override { } }; TEST_F(MultiDispatchInfoTest, GivenNullKernelWhenCreatingMultiDispatchInfoThenExpectationsAreMet) { MockMultiDispatchInfo multiDispatchInfo(nullptr, nullptr); EXPECT_FALSE(multiDispatchInfo.begin()->usesSlm()); EXPECT_FALSE(multiDispatchInfo.begin()->usesStatelessPrintfSurface()); EXPECT_EQ(0u, multiDispatchInfo.begin()->getRequiredScratchSize()); }compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/multiple_map_buffer_tests.cpp000066400000000000000000000443201422164147700324230ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/mocks/mock_gmm.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/event/user_event.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" using namespace NEO; struct MultipleMapBufferTest : public ClDeviceFixture, public ::testing::Test { template struct MockBuffer : public BufferHw { template MockBuffer(Params... params) : BufferHw(params...) { this->createFunction = BufferHw::create; }; void transferDataToHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) override { this->copySize = copySize[0]; this->copyOffset = copyOffset[0]; transferToHostPtrCalled++; }; void transferDataFromHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) override { this->copySize = copySize[0]; this->copyOffset = copyOffset[0]; transferFromHostPtrCalled++; }; size_t copySize = 0; size_t copyOffset = 0; uint32_t transferToHostPtrCalled = 0; uint32_t transferFromHostPtrCalled = 0; }; template struct MockCmdQ : public CommandQueueHw { MockCmdQ(Context *context, ClDevice *device) : CommandQueueHw(context, device, 0, false) {} cl_int enqueueReadBuffer(Buffer *buffer, cl_bool blockingRead, size_t offset, size_t size, void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { enqueueSize = size; enqueueOffset = offset; readBufferCalled++; if (failOnReadBuffer) { return CL_OUT_OF_RESOURCES; } return CommandQueueHw::enqueueReadBuffer(buffer, blockingRead, offset, size, ptr, mapAllocation, numEventsInWaitList, eventWaitList, event); } cl_int enqueueWriteBuffer(Buffer *buffer, cl_bool blockingWrite, size_t offset, size_t cb, const void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { enqueueSize = cb; enqueueOffset = offset; unmapPtr = ptr; writeBufferCalled++; if (failOnWriteBuffer) { return CL_OUT_OF_RESOURCES; } return CommandQueueHw::enqueueWriteBuffer(buffer, blockingWrite, offset, cb, ptr, mapAllocation, numEventsInWaitList, eventWaitList, event); } cl_int enqueueMarkerWithWaitList(cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { enqueueMarkerCalled++; return CommandQueueHw::enqueueMarkerWithWaitList(numEventsInWaitList, eventWaitList, event); } uint32_t writeBufferCalled = 0; uint32_t readBufferCalled = 0; uint32_t enqueueMarkerCalled = 0; bool failOnReadBuffer = false; bool failOnWriteBuffer = false; size_t enqueueSize = 0; size_t enqueueOffset = 0; const void *unmapPtr = nullptr; }; template std::unique_ptr> createMockBuffer(bool mapOnGpu) { MemoryProperties memoryProperties; auto mockAlloc = pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); auto buffer = new MockBuffer(context, memoryProperties, 0, 0, 1024, mockAlloc->getUnderlyingBuffer(), mockAlloc->getUnderlyingBuffer(), GraphicsAllocationHelper::toMultiGraphicsAllocation(mockAlloc), false, false, false); if (mapOnGpu) { buffer->setSharingHandler(new SharingHandler()); auto gfxAllocation = buffer->getGraphicsAllocation(pDevice->getRootDeviceIndex()); for (auto handleId = 0u; handleId < gfxAllocation->getNumGmms(); handleId++) { gfxAllocation->setGmm(new MockGmm(pDevice->getGmmClientContext()), handleId); } } return std::unique_ptr>(buffer); } template std::unique_ptr> createMockCmdQ() { return std::unique_ptr>(new MockCmdQ(context, pClDevice)); } void SetUp() override { ClDeviceFixture::SetUp(); context = new MockContext(pClDevice); } void TearDown() override { delete context; ClDeviceFixture::TearDown(); } MockContext *context = nullptr; cl_int retVal = CL_INVALID_VALUE; }; HWTEST_F(MultipleMapBufferTest, givenValidReadAndWriteBufferWhenMappedOnGpuThenAddMappedPtrAndRemoveOnUnmap) { auto buffer = createMockBuffer(true); auto cmdQ = createMockCmdQ(); EXPECT_FALSE(buffer->mappingOnCpuAllowed()); size_t offset = 1; size_t size = 3; void *mappedPtr = clEnqueueMapBuffer(cmdQ.get(), buffer.get(), CL_FALSE, CL_MAP_WRITE, offset, size, 0, nullptr, nullptr, nullptr); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(1u, buffer->getMapOperationsHandler().size()); EXPECT_EQ(cmdQ->readBufferCalled, 1u); EXPECT_EQ(cmdQ->enqueueSize, size); EXPECT_EQ(cmdQ->enqueueOffset, offset); retVal = clEnqueueUnmapMemObject(cmdQ.get(), buffer.get(), mappedPtr, 0, nullptr, nullptr); EXPECT_EQ(0u, buffer->getMapOperationsHandler().size()); EXPECT_EQ(cmdQ->writeBufferCalled, 1u); EXPECT_EQ(cmdQ->enqueueSize, size); EXPECT_EQ(cmdQ->enqueueOffset, offset); EXPECT_EQ(cmdQ->unmapPtr, mappedPtr); } HWTEST_F(MultipleMapBufferTest, givenReadOnlyMapWhenUnmappedOnGpuThenEnqueueMarker) { auto buffer = createMockBuffer(true); auto cmdQ = createMockCmdQ(); EXPECT_FALSE(buffer->mappingOnCpuAllowed()); size_t offset = 1; size_t size = 3; void *mappedPtr = clEnqueueMapBuffer(cmdQ.get(), buffer.get(), CL_FALSE, CL_MAP_READ, offset, size, 0, nullptr, nullptr, nullptr); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(1u, buffer->getMapOperationsHandler().size()); EXPECT_EQ(cmdQ->readBufferCalled, 1u); retVal = clEnqueueUnmapMemObject(cmdQ.get(), buffer.get(), mappedPtr, 0, nullptr, nullptr); EXPECT_EQ(0u, buffer->getMapOperationsHandler().size()); EXPECT_EQ(cmdQ->writeBufferCalled, 0u); EXPECT_EQ(cmdQ->enqueueMarkerCalled, 1u); } HWTEST_F(MultipleMapBufferTest, givenWriteInvalidateMapWhenMappedOnGpuThenCallEnqueueMarker) { auto buffer = createMockBuffer(true); auto cmdQ = createMockCmdQ(); EXPECT_FALSE(buffer->mappingOnCpuAllowed()); size_t offset = 1; size_t size = 3; void *mappedPtr = clEnqueueMapBuffer(cmdQ.get(), buffer.get(), CL_FALSE, CL_MAP_WRITE_INVALIDATE_REGION, offset, size, 0, nullptr, nullptr, nullptr); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(1u, buffer->getMapOperationsHandler().size()); EXPECT_EQ(cmdQ->readBufferCalled, 0u); EXPECT_EQ(cmdQ->enqueueMarkerCalled, 1u); retVal = clEnqueueUnmapMemObject(cmdQ.get(), buffer.get(), mappedPtr, 0, nullptr, nullptr); EXPECT_EQ(0u, buffer->getMapOperationsHandler().size()); EXPECT_EQ(cmdQ->writeBufferCalled, 1u); EXPECT_EQ(cmdQ->enqueueMarkerCalled, 1u); } HWTEST_F(MultipleMapBufferTest, givenNotMappedPtrWhenUnmapedOnGpuThenReturnError) { auto buffer = createMockBuffer(true); auto cmdQ = createMockCmdQ(); EXPECT_FALSE(buffer->mappingOnCpuAllowed()); EXPECT_EQ(0u, buffer->getMapOperationsHandler().size()); retVal = clEnqueueUnmapMemObject(cmdQ.get(), buffer.get(), buffer->getBasePtrForMap(cmdQ->getDevice().getRootDeviceIndex()), 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } HWTEST_F(MultipleMapBufferTest, givenErrorFromReadBufferWhenMappedOnGpuThenDontAddMappedPtr) { auto buffer = createMockBuffer(true); auto cmdQ = createMockCmdQ(); EXPECT_FALSE(buffer->mappingOnCpuAllowed()); cmdQ->failOnReadBuffer = true; size_t offset = 1; size_t size = 3; void *mappedPtr = clEnqueueMapBuffer(cmdQ.get(), buffer.get(), CL_FALSE, CL_MAP_READ, offset, size, 0, nullptr, nullptr, &retVal); EXPECT_EQ(nullptr, mappedPtr); EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal); EXPECT_EQ(0u, buffer->getMapOperationsHandler().size()); } HWTEST_F(MultipleMapBufferTest, givenErrorFromWriteBufferWhenUnmappedOnGpuThenDontRemoveMappedPtr) { auto buffer = createMockBuffer(true); auto cmdQ = createMockCmdQ(); EXPECT_FALSE(buffer->mappingOnCpuAllowed()); cmdQ->failOnWriteBuffer = true; size_t offset = 1; size_t size = 3; void *mappedPtr = clEnqueueMapBuffer(cmdQ.get(), buffer.get(), CL_FALSE, CL_MAP_WRITE, offset, size, 0, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(1u, buffer->getMapOperationsHandler().size()); retVal = clEnqueueUnmapMemObject(cmdQ.get(), buffer.get(), mappedPtr, 0, nullptr, nullptr); EXPECT_EQ(1u, cmdQ->writeBufferCalled); EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal); EXPECT_EQ(1u, buffer->getMapOperationsHandler().size()); } HWTEST_F(MultipleMapBufferTest, givenUnblockedQueueWhenMappedOnCpuThenAddMappedPtrAndRemoveOnUnmap) { auto buffer = createMockBuffer(false); auto cmdQ = createMockCmdQ(); EXPECT_TRUE(buffer->mappingOnCpuAllowed()); size_t offset = 1; size_t size = 3; void *mappedPtr = clEnqueueMapBuffer(cmdQ.get(), buffer.get(), CL_FALSE, CL_MAP_WRITE, offset, size, 0, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(1u, buffer->getMapOperationsHandler().size()); EXPECT_EQ(1u, buffer->transferToHostPtrCalled); EXPECT_EQ(buffer->copySize, size); EXPECT_EQ(buffer->copyOffset, offset); retVal = clEnqueueUnmapMemObject(cmdQ.get(), buffer.get(), mappedPtr, 0, nullptr, nullptr); EXPECT_EQ(0u, buffer->getMapOperationsHandler().size()); EXPECT_EQ(1u, buffer->transferFromHostPtrCalled); EXPECT_EQ(buffer->copySize, size); EXPECT_EQ(buffer->copyOffset, offset); } HWTEST_F(MultipleMapBufferTest, givenUnblockedQueueWhenReadOnlyMappedOnCpuThenDontMakeCpuCopy) { auto buffer = createMockBuffer(false); auto cmdQ = createMockCmdQ(); EXPECT_TRUE(buffer->mappingOnCpuAllowed()); size_t offset = 1; size_t size = 3; void *mappedPtr = clEnqueueMapBuffer(cmdQ.get(), buffer.get(), CL_FALSE, CL_MAP_READ, offset, size, 0, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(1u, buffer->getMapOperationsHandler().size()); EXPECT_EQ(1u, buffer->transferToHostPtrCalled); retVal = clEnqueueUnmapMemObject(cmdQ.get(), buffer.get(), mappedPtr, 0, nullptr, nullptr); EXPECT_EQ(0u, buffer->getMapOperationsHandler().size()); EXPECT_EQ(0u, buffer->transferFromHostPtrCalled); } HWTEST_F(MultipleMapBufferTest, givenUnblockedQueueWhenWriteInvalidateMappedOnCpuThenDontMakeCpuCopy) { auto buffer = createMockBuffer(false); auto cmdQ = createMockCmdQ(); EXPECT_TRUE(buffer->mappingOnCpuAllowed()); size_t offset = 1; size_t size = 3; void *mappedPtr = clEnqueueMapBuffer(cmdQ.get(), buffer.get(), CL_FALSE, CL_MAP_WRITE_INVALIDATE_REGION, offset, size, 0, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(1u, buffer->getMapOperationsHandler().size()); EXPECT_EQ(0u, buffer->transferToHostPtrCalled); retVal = clEnqueueUnmapMemObject(cmdQ.get(), buffer.get(), mappedPtr, 0, nullptr, nullptr); EXPECT_EQ(0u, buffer->getMapOperationsHandler().size()); EXPECT_EQ(1u, buffer->transferFromHostPtrCalled); } HWTEST_F(MultipleMapBufferTest, givenBlockedQueueWhenMappedOnCpuThenAddMappedPtrAndRemoveOnUnmap) { auto buffer = createMockBuffer(false); auto cmdQ = createMockCmdQ(); EXPECT_TRUE(buffer->mappingOnCpuAllowed()); UserEvent mapEvent, unmapEvent; cl_event clMapEvent = &mapEvent; cl_event clUnmapEvent = &unmapEvent; size_t offset = 1; size_t size = 3; void *mappedPtr = clEnqueueMapBuffer(cmdQ.get(), buffer.get(), CL_FALSE, CL_MAP_WRITE, offset, size, 1, &clMapEvent, nullptr, &retVal); mapEvent.setStatus(CL_COMPLETE); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(1u, buffer->getMapOperationsHandler().size()); EXPECT_EQ(buffer->copySize, size); EXPECT_EQ(buffer->copyOffset, offset); EXPECT_EQ(1u, buffer->transferToHostPtrCalled); retVal = clEnqueueUnmapMemObject(cmdQ.get(), buffer.get(), mappedPtr, 1, &clUnmapEvent, nullptr); unmapEvent.setStatus(CL_COMPLETE); EXPECT_EQ(0u, buffer->getMapOperationsHandler().size()); EXPECT_EQ(buffer->copySize, size); EXPECT_EQ(buffer->copyOffset, offset); EXPECT_EQ(1u, buffer->transferFromHostPtrCalled); } HWTEST_F(MultipleMapBufferTest, givenBlockedQueueWhenMappedReadOnlyOnCpuThenDontMakeCpuCopy) { auto buffer = createMockBuffer(false); auto cmdQ = createMockCmdQ(); EXPECT_TRUE(buffer->mappingOnCpuAllowed()); UserEvent mapEvent, unmapEvent; cl_event clMapEvent = &mapEvent; cl_event clUnmapEvent = &unmapEvent; size_t offset = 1; size_t size = 3; void *mappedPtr = clEnqueueMapBuffer(cmdQ.get(), buffer.get(), CL_FALSE, CL_MAP_READ, offset, size, 1, &clMapEvent, nullptr, &retVal); mapEvent.setStatus(CL_COMPLETE); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(1u, buffer->transferToHostPtrCalled); EXPECT_EQ(1u, buffer->getMapOperationsHandler().size()); retVal = clEnqueueUnmapMemObject(cmdQ.get(), buffer.get(), mappedPtr, 1, &clUnmapEvent, nullptr); unmapEvent.setStatus(CL_COMPLETE); EXPECT_EQ(0u, buffer->getMapOperationsHandler().size()); EXPECT_EQ(0u, buffer->transferFromHostPtrCalled); } HWTEST_F(MultipleMapBufferTest, givenInvalidPtrWhenUnmappedOnCpuThenReturnError) { auto buffer = createMockBuffer(false); auto cmdQ = createMockCmdQ(); EXPECT_TRUE(buffer->mappingOnCpuAllowed()); retVal = clEnqueueUnmapMemObject(cmdQ.get(), buffer.get(), buffer->getBasePtrForMap(cmdQ->getDevice().getRootDeviceIndex()), 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } HWTEST_F(MultipleMapBufferTest, givenMultimpleMapsWhenUnmappingThenRemoveCorrectPointers) { auto buffer = createMockBuffer(true); auto cmdQ = createMockCmdQ(); MapInfo mappedPtrs[3] = { {nullptr, 1, {{1, 0, 0}}, {{1, 0, 0}}, 0}, {nullptr, 1, {{2, 0, 0}}, {{2, 0, 0}}, 0}, {nullptr, 1, {{5, 0, 0}}, {{5, 0, 0}}, 0}, }; for (size_t i = 0; i < 3; i++) { mappedPtrs[i].ptr = clEnqueueMapBuffer(cmdQ.get(), buffer.get(), CL_FALSE, CL_MAP_WRITE, mappedPtrs[i].offset[0], mappedPtrs[i].size[0], 0, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, mappedPtrs[i].ptr); EXPECT_EQ(i + 1, buffer->getMapOperationsHandler().size()); EXPECT_EQ(cmdQ->enqueueSize, mappedPtrs[i].size[0]); EXPECT_EQ(cmdQ->enqueueOffset, mappedPtrs[i].offset[0]); } // reordered unmap clEnqueueUnmapMemObject(cmdQ.get(), buffer.get(), mappedPtrs[1].ptr, 0, nullptr, nullptr); EXPECT_EQ(2u, buffer->getMapOperationsHandler().size()); EXPECT_EQ(cmdQ->unmapPtr, mappedPtrs[1].ptr); EXPECT_EQ(cmdQ->enqueueSize, mappedPtrs[1].size[0]); EXPECT_EQ(cmdQ->enqueueOffset, mappedPtrs[1].offset[0]); clEnqueueUnmapMemObject(cmdQ.get(), buffer.get(), mappedPtrs[2].ptr, 0, nullptr, nullptr); EXPECT_EQ(1u, buffer->getMapOperationsHandler().size()); EXPECT_EQ(cmdQ->unmapPtr, mappedPtrs[2].ptr); EXPECT_EQ(cmdQ->enqueueSize, mappedPtrs[2].size[0]); EXPECT_EQ(cmdQ->enqueueOffset, mappedPtrs[2].offset[0]); clEnqueueUnmapMemObject(cmdQ.get(), buffer.get(), mappedPtrs[0].ptr, 0, nullptr, nullptr); EXPECT_EQ(0u, buffer->getMapOperationsHandler().size()); EXPECT_EQ(cmdQ->unmapPtr, mappedPtrs[0].ptr); EXPECT_EQ(cmdQ->enqueueSize, mappedPtrs[0].size[0]); EXPECT_EQ(cmdQ->enqueueOffset, mappedPtrs[0].offset[0]); } HWTEST_F(MultipleMapBufferTest, givenOverlapingPtrWhenMappingOnGpuForWriteThenReturnError) { auto buffer = createMockBuffer(true); auto cmdQ = createMockCmdQ(); EXPECT_FALSE(buffer->mappingOnCpuAllowed()); size_t offset = 1; size_t size = 3; void *mappedPtr = clEnqueueMapBuffer(cmdQ.get(), buffer.get(), CL_FALSE, CL_MAP_READ, offset, size, 0, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, buffer->getMapOperationsHandler().size()); offset++; void *mappedPtr2 = clEnqueueMapBuffer(cmdQ.get(), buffer.get(), CL_FALSE, CL_MAP_WRITE, offset, size, 0, nullptr, nullptr, &retVal); EXPECT_EQ(nullptr, mappedPtr2); EXPECT_EQ(CL_INVALID_OPERATION, retVal); EXPECT_EQ(1u, buffer->getMapOperationsHandler().size()); } HWTEST_F(MultipleMapBufferTest, givenOverlapingPtrWhenMappingOnCpuForWriteThenReturnError) { auto buffer = createMockBuffer(false); auto cmdQ = createMockCmdQ(); EXPECT_TRUE(buffer->mappingOnCpuAllowed()); size_t offset = 1; size_t size = 3; void *mappedPtr = clEnqueueMapBuffer(cmdQ.get(), buffer.get(), CL_FALSE, CL_MAP_READ, offset, size, 0, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, buffer->getMapOperationsHandler().size()); offset++; void *mappedPtr2 = clEnqueueMapBuffer(cmdQ.get(), buffer.get(), CL_FALSE, CL_MAP_WRITE, offset, size, 0, nullptr, nullptr, &retVal); EXPECT_EQ(nullptr, mappedPtr2); EXPECT_EQ(CL_INVALID_OPERATION, retVal); EXPECT_EQ(1u, buffer->getMapOperationsHandler().size()); } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/multiple_map_image_tests.cpp000066400000000000000000000535531422164147700322440ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/event/user_event.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" namespace NEO { extern ImageFactoryFuncs imageFactory[IGFX_MAX_CORE]; struct MultipleMapImageTest : public ClDeviceFixture, public ::testing::Test { template struct MockImage : public ImageHw { using ImageHw::isZeroCopy; using ImageHw::ImageHw; static Image *createMockImage(Context *context, const MemoryProperties &memoryProperties, uint64_t flags, uint64_t flagsIntel, size_t size, void *hostPtr, const cl_image_format &imageFormat, const cl_image_desc &imageDesc, bool zeroCopy, MultiGraphicsAllocation multiGraphicsAllocation, bool isObjectRedescribed, uint32_t baseMipLevel, uint32_t mipCount, const ClSurfaceFormatInfo *surfaceFormatInfo, const SurfaceOffsets *surfaceOffsets) { auto memoryStorage = multiGraphicsAllocation.getDefaultGraphicsAllocation()->getUnderlyingBuffer(); return new MockImage(context, memoryProperties, flags, flagsIntel, size, memoryStorage, hostPtr, imageFormat, imageDesc, zeroCopy, std::move(multiGraphicsAllocation), isObjectRedescribed, baseMipLevel, mipCount, *surfaceFormatInfo, surfaceOffsets); }; void transferDataToHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) override { copyRegion = copySize; copyOrigin = copyOffset; transferToHostPtrCalled++; }; void transferDataFromHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) override { copyRegion = copySize; copyOrigin = copyOffset; transferFromHostPtrCalled++; }; MemObjSizeArray copyRegion = {{0, 0, 0}}; MemObjOffsetArray copyOrigin = {{0, 0, 0}}; uint32_t transferToHostPtrCalled = 0; uint32_t transferFromHostPtrCalled = 0; }; template struct MockCmdQ : public CommandQueueHw { MockCmdQ(Context *context, ClDevice *device) : CommandQueueHw(context, device, 0, false) {} cl_int enqueueReadImage(Image *srcImage, cl_bool blockingRead, const size_t *origin, const size_t *region, size_t rowPitch, size_t slicePitch, void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { enqueueRegion = {{region[0], region[1], region[2]}}; enqueueOrigin = {{origin[0], origin[1], origin[2]}}; readImageCalled++; if (failOnReadImage) { return CL_OUT_OF_RESOURCES; } return CommandQueueHw::enqueueReadImage(srcImage, blockingRead, origin, region, rowPitch, slicePitch, ptr, mapAllocation, numEventsInWaitList, eventWaitList, event); } cl_int enqueueWriteImage(Image *dstImage, cl_bool blockingWrite, const size_t *origin, const size_t *region, size_t inputRowPitch, size_t inputSlicePitch, const void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { enqueueRegion = {{region[0], region[1], region[2]}}; enqueueOrigin = {{origin[0], origin[1], origin[2]}}; unmapPtr = ptr; writeImageCalled++; if (failOnWriteImage) { return CL_OUT_OF_RESOURCES; } return CommandQueueHw::enqueueWriteImage(dstImage, blockingWrite, origin, region, inputRowPitch, inputSlicePitch, ptr, mapAllocation, numEventsInWaitList, eventWaitList, event); } cl_int enqueueMarkerWithWaitList(cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { enqueueMarkerCalled++; return CommandQueueHw::enqueueMarkerWithWaitList(numEventsInWaitList, eventWaitList, event); } uint32_t writeImageCalled = 0; uint32_t readImageCalled = 0; uint32_t enqueueMarkerCalled = 0; bool failOnReadImage = false; bool failOnWriteImage = false; MemObjSizeArray enqueueRegion = {{0, 0, 0}}; MemObjOffsetArray enqueueOrigin = {{0, 0, 0}}; const void *unmapPtr = nullptr; }; template std::unique_ptr> createMockImage() { auto eRenderCoreFamily = pDevice->getHardwareInfo().platform.eRenderCoreFamily; VariableBackup backup(&imageFactory[eRenderCoreFamily].createImageFunction); imageFactory[eRenderCoreFamily].createImageFunction = MockImage::createMockImage; auto surfaceFormat = Image::getSurfaceFormatFromTable(Traits::flags, &Traits::imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); cl_int retVal = CL_SUCCESS; auto img = Image::create( context, ClMemoryPropertiesHelper::createMemoryProperties(Traits::flags, 0, 0, &context->getDevice(0)->getDevice()), Traits::flags, 0, surfaceFormat, &Traits::imageDesc, Traits::hostPtr, retVal); auto mockImage = static_cast *>(img); return std::unique_ptr>(mockImage); } template std::unique_ptr> createMockCmdQ() { return std::unique_ptr>(new MockCmdQ(context, pClDevice)); } void SetUp() override { ClDeviceFixture::SetUp(); context = new MockContext(pClDevice); } void TearDown() override { delete context; ClDeviceFixture::TearDown(); } MockContext *context = nullptr; cl_int retVal = CL_INVALID_VALUE; }; HWTEST_F(MultipleMapImageTest, givenValidReadAndWriteImageWhenMappedOnGpuThenAddMappedPtrAndRemoveOnUnmap) { if (!UnitTestHelper::tiledImagesSupported) { GTEST_SKIP(); } auto image = createMockImage(); auto cmdQ = createMockCmdQ(); EXPECT_FALSE(image->mappingOnCpuAllowed()); MemObjOffsetArray origin = {{1, 2, 1}}; MemObjSizeArray region = {{3, 4, 1}}; void *mappedPtr = clEnqueueMapImage(cmdQ.get(), image.get(), CL_TRUE, CL_MAP_WRITE, &origin[0], ®ion[0], nullptr, nullptr, 0, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(1u, image->getMapOperationsHandler().size()); EXPECT_EQ(cmdQ->enqueueRegion, region); EXPECT_EQ(cmdQ->enqueueOrigin, origin); EXPECT_EQ(cmdQ->readImageCalled, 1u); retVal = clEnqueueUnmapMemObject(cmdQ.get(), image.get(), mappedPtr, 0, nullptr, nullptr); EXPECT_EQ(0u, image->getMapOperationsHandler().size()); EXPECT_EQ(cmdQ->enqueueRegion, region); EXPECT_EQ(cmdQ->enqueueOrigin, origin); EXPECT_EQ(cmdQ->unmapPtr, mappedPtr); EXPECT_EQ(cmdQ->writeImageCalled, 1u); } HWTEST_F(MultipleMapImageTest, givenReadOnlyMapWhenUnmappedOnGpuThenEnqueueMarker) { if (!UnitTestHelper::tiledImagesSupported) { GTEST_SKIP(); } auto image = createMockImage(); auto cmdQ = createMockCmdQ(); EXPECT_FALSE(image->mappingOnCpuAllowed()); MemObjOffsetArray origin = {{1, 2, 1}}; MemObjSizeArray region = {{3, 4, 1}}; void *mappedPtr = clEnqueueMapImage(cmdQ.get(), image.get(), CL_TRUE, CL_MAP_READ, &origin[0], ®ion[0], nullptr, nullptr, 0, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(1u, image->getMapOperationsHandler().size()); EXPECT_EQ(cmdQ->enqueueRegion, region); EXPECT_EQ(cmdQ->enqueueOrigin, origin); EXPECT_EQ(cmdQ->readImageCalled, 1u); retVal = clEnqueueUnmapMemObject(cmdQ.get(), image.get(), mappedPtr, 0, nullptr, nullptr); EXPECT_EQ(0u, image->getMapOperationsHandler().size()); EXPECT_EQ(cmdQ->writeImageCalled, 0u); EXPECT_EQ(cmdQ->enqueueMarkerCalled, 1u); } HWTEST_F(MultipleMapImageTest, givenWriteInvalidateMapWhenMappedOnGpuThenEnqueueMarker) { if (!UnitTestHelper::tiledImagesSupported) { GTEST_SKIP(); } auto image = createMockImage(); auto cmdQ = createMockCmdQ(); EXPECT_FALSE(image->mappingOnCpuAllowed()); MemObjOffsetArray origin = {{1, 2, 1}}; MemObjSizeArray region = {{3, 4, 1}}; void *mappedPtr = clEnqueueMapImage(cmdQ.get(), image.get(), CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, &origin[0], ®ion[0], nullptr, nullptr, 0, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(1u, image->getMapOperationsHandler().size()); EXPECT_EQ(cmdQ->readImageCalled, 0u); EXPECT_EQ(cmdQ->enqueueMarkerCalled, 1u); retVal = clEnqueueUnmapMemObject(cmdQ.get(), image.get(), mappedPtr, 0, nullptr, nullptr); EXPECT_EQ(0u, image->getMapOperationsHandler().size()); EXPECT_EQ(cmdQ->writeImageCalled, 1u); EXPECT_EQ(cmdQ->enqueueMarkerCalled, 1u); EXPECT_EQ(cmdQ->enqueueRegion, region); EXPECT_EQ(cmdQ->enqueueOrigin, origin); } HWTEST_F(MultipleMapImageTest, givenNotMappedPtrWhenUnmapedThenReturnError) { auto image = createMockImage(); auto cmdQ = createMockCmdQ(); EXPECT_EQ(!UnitTestHelper::tiledImagesSupported, image->mappingOnCpuAllowed()); EXPECT_EQ(0u, image->getMapOperationsHandler().size()); retVal = clEnqueueUnmapMemObject(cmdQ.get(), image.get(), image->getBasePtrForMap(cmdQ->getDevice().getRootDeviceIndex()), 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } HWTEST_F(MultipleMapImageTest, givenErrorFromReadImageWhenMappedOnGpuThenDontAddMappedPtr) { if (!UnitTestHelper::tiledImagesSupported) { GTEST_SKIP(); } auto image = createMockImage(); auto cmdQ = createMockCmdQ(); EXPECT_FALSE(image->mappingOnCpuAllowed()); cmdQ->failOnReadImage = true; size_t origin[] = {2, 1, 1}; size_t region[] = {2, 1, 1}; void *mappedPtr = clEnqueueMapImage(cmdQ.get(), image.get(), CL_TRUE, CL_MAP_READ, origin, region, nullptr, nullptr, 0, nullptr, nullptr, &retVal); EXPECT_EQ(nullptr, mappedPtr); EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal); EXPECT_EQ(0u, image->getMapOperationsHandler().size()); } HWTEST_F(MultipleMapImageTest, givenErrorFromWriteImageWhenUnmappedOnGpuThenDontRemoveMappedPtr) { if (!UnitTestHelper::tiledImagesSupported) { GTEST_SKIP(); } auto image = createMockImage(); auto cmdQ = createMockCmdQ(); EXPECT_FALSE(image->mappingOnCpuAllowed()); cmdQ->failOnWriteImage = true; size_t origin[] = {2, 1, 1}; size_t region[] = {2, 1, 1}; void *mappedPtr = clEnqueueMapImage(cmdQ.get(), image.get(), CL_TRUE, CL_MAP_WRITE, origin, region, nullptr, nullptr, 0, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(1u, image->getMapOperationsHandler().size()); retVal = clEnqueueUnmapMemObject(cmdQ.get(), image.get(), mappedPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal); EXPECT_EQ(cmdQ->writeImageCalled, 1u); EXPECT_EQ(1u, image->getMapOperationsHandler().size()); } HWTEST_F(MultipleMapImageTest, givenUnblockedQueueWhenMappedOnCpuThenAddMappedPtrAndRemoveOnUnmap) { auto image = createMockImage(); auto cmdQ = createMockCmdQ(); image->isZeroCopy = false; EXPECT_TRUE(image->mappingOnCpuAllowed()); MemObjOffsetArray origin = {{1, 0, 0}}; MemObjSizeArray region = {{3, 1, 1}}; void *mappedPtr = clEnqueueMapImage(cmdQ.get(), image.get(), CL_TRUE, CL_MAP_WRITE, &origin[0], ®ion[0], nullptr, nullptr, 0, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(1u, image->getMapOperationsHandler().size()); EXPECT_EQ(1u, image->transferToHostPtrCalled); EXPECT_EQ(image->copyRegion, region); EXPECT_EQ(image->copyOrigin, origin); retVal = clEnqueueUnmapMemObject(cmdQ.get(), image.get(), mappedPtr, 0, nullptr, nullptr); EXPECT_EQ(0u, image->getMapOperationsHandler().size()); EXPECT_EQ(1u, image->transferFromHostPtrCalled); EXPECT_EQ(image->copyRegion, region); EXPECT_EQ(image->copyOrigin, origin); } HWTEST_F(MultipleMapImageTest, givenUnblockedQueueWhenReadOnlyUnmappedOnCpuThenDontMakeCpuCopy) { auto image = createMockImage(); auto cmdQ = createMockCmdQ(); image->isZeroCopy = false; EXPECT_TRUE(image->mappingOnCpuAllowed()); MemObjOffsetArray origin = {{1, 0, 0}}; MemObjSizeArray region = {{3, 1, 1}}; void *mappedPtr = clEnqueueMapImage(cmdQ.get(), image.get(), CL_TRUE, CL_MAP_READ, &origin[0], ®ion[0], nullptr, nullptr, 0, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(1u, image->getMapOperationsHandler().size()); EXPECT_EQ(1u, image->transferToHostPtrCalled); EXPECT_EQ(image->copyRegion, region); EXPECT_EQ(image->copyOrigin, origin); retVal = clEnqueueUnmapMemObject(cmdQ.get(), image.get(), mappedPtr, 0, nullptr, nullptr); EXPECT_EQ(0u, image->getMapOperationsHandler().size()); EXPECT_EQ(0u, image->transferFromHostPtrCalled); } HWTEST_F(MultipleMapImageTest, givenUnblockedQueueWhenWriteInvalidateMappedOnCpuThenDontMakeCpuCopy) { auto image = createMockImage(); auto cmdQ = createMockCmdQ(); image->isZeroCopy = false; EXPECT_TRUE(image->mappingOnCpuAllowed()); MemObjOffsetArray origin = {{1, 0, 0}}; MemObjSizeArray region = {{3, 1, 1}}; void *mappedPtr = clEnqueueMapImage(cmdQ.get(), image.get(), CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, &origin[0], ®ion[0], nullptr, nullptr, 0, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(1u, image->getMapOperationsHandler().size()); EXPECT_EQ(0u, image->transferToHostPtrCalled); retVal = clEnqueueUnmapMemObject(cmdQ.get(), image.get(), mappedPtr, 0, nullptr, nullptr); EXPECT_EQ(0u, image->getMapOperationsHandler().size()); EXPECT_EQ(1u, image->transferFromHostPtrCalled); EXPECT_EQ(image->copyRegion, region); EXPECT_EQ(image->copyOrigin, origin); } HWTEST_F(MultipleMapImageTest, givenBlockedQueueWhenMappedOnCpuThenAddMappedPtrAndRemoveOnUnmap) { auto image = createMockImage(); auto cmdQ = createMockCmdQ(); image->isZeroCopy = false; EXPECT_TRUE(image->mappingOnCpuAllowed()); UserEvent mapEvent, unmapEvent; cl_event clMapEvent = &mapEvent; cl_event clUnmapEvent = &unmapEvent; MemObjOffsetArray origin = {{1, 0, 0}}; MemObjSizeArray region = {{3, 1, 1}}; void *mappedPtr = clEnqueueMapImage(cmdQ.get(), image.get(), CL_FALSE, CL_MAP_WRITE, &origin[0], ®ion[0], nullptr, nullptr, 1, &clMapEvent, nullptr, &retVal); mapEvent.setStatus(CL_COMPLETE); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(1u, image->transferToHostPtrCalled); EXPECT_EQ(1u, image->getMapOperationsHandler().size()); EXPECT_EQ(image->copyRegion, region); EXPECT_EQ(image->copyOrigin, origin); retVal = clEnqueueUnmapMemObject(cmdQ.get(), image.get(), mappedPtr, 1, &clUnmapEvent, nullptr); unmapEvent.setStatus(CL_COMPLETE); EXPECT_EQ(0u, image->getMapOperationsHandler().size()); EXPECT_EQ(1u, image->transferFromHostPtrCalled); EXPECT_EQ(image->copyRegion, region); EXPECT_EQ(image->copyOrigin, origin); } HWTEST_F(MultipleMapImageTest, givenBlockedQueueWhenMappedReadOnlyOnCpuThenDontMakeCpuCopy) { auto image = createMockImage(); auto cmdQ = createMockCmdQ(); image->isZeroCopy = false; EXPECT_TRUE(image->mappingOnCpuAllowed()); UserEvent mapEvent, unmapEvent; cl_event clMapEvent = &mapEvent; cl_event clUnmapEvent = &unmapEvent; MemObjOffsetArray origin = {{1, 0, 0}}; MemObjSizeArray region = {{3, 1, 1}}; void *mappedPtr = clEnqueueMapImage(cmdQ.get(), image.get(), CL_FALSE, CL_MAP_READ, &origin[0], ®ion[0], nullptr, nullptr, 1, &clMapEvent, nullptr, &retVal); mapEvent.setStatus(CL_COMPLETE); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(1u, image->transferToHostPtrCalled); EXPECT_EQ(1u, image->getMapOperationsHandler().size()); EXPECT_EQ(image->copyRegion, region); EXPECT_EQ(image->copyOrigin, origin); retVal = clEnqueueUnmapMemObject(cmdQ.get(), image.get(), mappedPtr, 1, &clUnmapEvent, nullptr); unmapEvent.setStatus(CL_COMPLETE); EXPECT_EQ(0u, image->getMapOperationsHandler().size()); EXPECT_EQ(0u, image->transferFromHostPtrCalled); } HWTEST_F(MultipleMapImageTest, givenInvalidPtrWhenUnmappedOnCpuThenReturnError) { auto image = createMockImage(); auto cmdQ = createMockCmdQ(); EXPECT_TRUE(image->mappingOnCpuAllowed()); retVal = clEnqueueUnmapMemObject(cmdQ.get(), image.get(), image->getBasePtrForMap(cmdQ->getDevice().getRootDeviceIndex()), 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } HWTEST_F(MultipleMapImageTest, givenMultimpleMapsWhenUnmappingThenRemoveCorrectPointers) { if (!UnitTestHelper::tiledImagesSupported) { GTEST_SKIP(); } auto image = createMockImage(); auto cmdQ = createMockCmdQ(); MapInfo mappedPtrs[3] = {{nullptr, 1, {{1, 1, 1}}, {{1, 1, 1}}, 0}, {nullptr, 1, {{2, 2, 2}}, {{2, 2, 2}}, 0}, {nullptr, 1, {{3, 5, 7}}, {{4, 4, 4}}, 0}}; for (size_t i = 0; i < 3; i++) { mappedPtrs[i].ptr = clEnqueueMapImage(cmdQ.get(), image.get(), CL_TRUE, CL_MAP_WRITE, &mappedPtrs[i].offset[0], &mappedPtrs[i].size[0], nullptr, nullptr, 0, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, mappedPtrs[i].ptr); EXPECT_EQ(i + 1, image->getMapOperationsHandler().size()); EXPECT_EQ(cmdQ->enqueueRegion, mappedPtrs[i].size); EXPECT_EQ(cmdQ->enqueueOrigin, mappedPtrs[i].offset); } // reordered unmap clEnqueueUnmapMemObject(cmdQ.get(), image.get(), mappedPtrs[1].ptr, 0, nullptr, nullptr); EXPECT_EQ(2u, image->getMapOperationsHandler().size()); EXPECT_EQ(cmdQ->unmapPtr, mappedPtrs[1].ptr); EXPECT_EQ(cmdQ->enqueueRegion, mappedPtrs[1].size); EXPECT_EQ(cmdQ->enqueueOrigin, mappedPtrs[1].offset); clEnqueueUnmapMemObject(cmdQ.get(), image.get(), mappedPtrs[2].ptr, 0, nullptr, nullptr); EXPECT_EQ(1u, image->getMapOperationsHandler().size()); EXPECT_EQ(cmdQ->unmapPtr, mappedPtrs[2].ptr); EXPECT_EQ(cmdQ->enqueueRegion, mappedPtrs[2].size); EXPECT_EQ(cmdQ->enqueueOrigin, mappedPtrs[2].offset); clEnqueueUnmapMemObject(cmdQ.get(), image.get(), mappedPtrs[0].ptr, 0, nullptr, nullptr); EXPECT_EQ(0u, image->getMapOperationsHandler().size()); EXPECT_EQ(cmdQ->unmapPtr, mappedPtrs[0].ptr); EXPECT_EQ(cmdQ->enqueueRegion, mappedPtrs[0].size); EXPECT_EQ(cmdQ->enqueueOrigin, mappedPtrs[0].offset); } HWTEST_F(MultipleMapImageTest, givenOverlapingPtrWhenMappingForWriteThenReturnError) { auto image = createMockImage(); auto cmdQ = createMockCmdQ(); EXPECT_EQ(!UnitTestHelper::tiledImagesSupported, image->mappingOnCpuAllowed()); MemObjOffsetArray origin = {{1, 2, 1}}; MemObjSizeArray region = {{3, 4, 1}}; void *mappedPtr = clEnqueueMapImage(cmdQ.get(), image.get(), CL_TRUE, CL_MAP_READ, &origin[0], ®ion[0], nullptr, nullptr, 0, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, image->getMapOperationsHandler().size()); origin[0]++; void *mappedPtr2 = clEnqueueMapImage(cmdQ.get(), image.get(), CL_TRUE, CL_MAP_WRITE, &origin[0], ®ion[0], nullptr, nullptr, 0, nullptr, nullptr, &retVal); EXPECT_EQ(nullptr, mappedPtr2); EXPECT_EQ(CL_INVALID_OPERATION, retVal); EXPECT_EQ(1u, image->getMapOperationsHandler().size()); } HWTEST_F(MultipleMapImageTest, givenOverlapingPtrWhenMappingOnCpuForWriteThenReturnError) { auto image = createMockImage(); auto cmdQ = createMockCmdQ(); EXPECT_TRUE(image->mappingOnCpuAllowed()); MemObjOffsetArray origin = {{1, 0, 0}}; MemObjSizeArray region = {{3, 1, 1}}; void *mappedPtr = clEnqueueMapImage(cmdQ.get(), image.get(), CL_TRUE, CL_MAP_READ, &origin[0], ®ion[0], nullptr, nullptr, 0, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, image->getMapOperationsHandler().size()); origin[0]++; void *mappedPtr2 = clEnqueueMapImage(cmdQ.get(), image.get(), CL_TRUE, CL_MAP_WRITE, &origin[0], ®ion[0], nullptr, nullptr, 0, nullptr, nullptr, &retVal); EXPECT_EQ(nullptr, mappedPtr2); EXPECT_EQ(CL_INVALID_OPERATION, retVal); EXPECT_EQ(1u, image->getMapOperationsHandler().size()); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/oom_buffer_tests.cpp000066400000000000000000000243751422164147700305350ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/memory_manager.h" #include "shared/test/common/fixtures/memory_management_fixture.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/event/event.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/hello_world_kernel_fixture.h" #include "opencl/test/unit_test/fixtures/simple_arg_kernel_fixture.h" using namespace NEO; struct OOMSetting { bool oomCS; bool oomISH; }; static OOMSetting oomSettings[] = { {true, false}, {false, true}, {true, true}}; struct OOMCommandQueueBufferTest : public MemoryManagementFixture, public ClDeviceFixture, public CommandQueueFixture, public SimpleArgKernelFixture, public HelloWorldKernelFixture, public ::testing::TestWithParam { using CommandQueueFixture::SetUp; using HelloWorldKernelFixture::SetUp; using SimpleArgKernelFixture::SetUp; OOMCommandQueueBufferTest() { } void SetUp() override { MemoryManagement::breakOnAllocationEvent = 77; MemoryManagementFixture::SetUp(); ClDeviceFixture::SetUp(); context = new MockContext(pClDevice); BufferDefaults::context = context; CommandQueueFixture::SetUp(context, pClDevice, 0); SimpleArgKernelFixture::SetUp(pClDevice); HelloWorldKernelFixture::SetUp(pClDevice, "CopyBuffer_simd", "CopyBuffer"); srcBuffer = BufferHelper<>::create(); dstBuffer = BufferHelper<>::create(); const auto &oomSetting = GetParam(); auto oomSize = 10u; if (oomSetting.oomCS) { auto &cs = pCmdQ->getCS(oomSize); // CommandStream may be larger than requested so grab what wasnt requested cs.getSpace(cs.getAvailableSpace() - oomSize); ASSERT_EQ(oomSize, cs.getAvailableSpace()); } if (oomSetting.oomISH) { auto &ish = pCmdQ->getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, oomSize); // IndirectHeap may be larger than requested so grab what wasnt requested ish.getSpace(ish.getAvailableSpace() - oomSize); ASSERT_EQ(oomSize, ish.getAvailableSpace()); } } void TearDown() override { delete dstBuffer; delete srcBuffer; context->release(); HelloWorldKernelFixture::TearDown(); SimpleArgKernelFixture::TearDown(); CommandQueueFixture::TearDown(); ClDeviceFixture::TearDown(); MemoryManagementFixture::TearDown(); } MockContext *context; Buffer *srcBuffer = nullptr; Buffer *dstBuffer = nullptr; }; HWTEST_P(OOMCommandQueueBufferTest, WhenCopyingBufferThenMaxAvailableSpaceIsNotExceeded) { CommandQueueHw cmdQ(context, pClDevice, 0, false); auto &commandStream = pCmdQ->getCS(1024); auto &indirectHeap = pCmdQ->getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 10); auto usedBeforeCS = commandStream.getUsed(); auto usedBeforeISH = indirectHeap.getUsed(); auto retVal1 = EnqueueCopyBufferHelper<>::enqueue(pCmdQ); EXPECT_EQ(CL_SUCCESS, retVal1); auto retVal2 = EnqueueCopyBufferHelper<>::enqueue(&cmdQ); EXPECT_EQ(CL_SUCCESS, retVal2); auto usedAfterCS = commandStream.getUsed(); auto usedAfterISH = indirectHeap.getUsed(); EXPECT_LE(usedAfterCS - usedBeforeCS, commandStream.getMaxAvailableSpace()); if (usedAfterISH > usedBeforeISH) { EXPECT_LE(usedAfterISH - usedBeforeISH, indirectHeap.getMaxAvailableSpace()); } else { EXPECT_LE(usedAfterISH, indirectHeap.getMaxAvailableSpace()); } } HWTEST_P(OOMCommandQueueBufferTest, WhenFillingBufferThenMaxAvailableSpaceIsNotExceeded) { CommandQueueHw cmdQ(context, pClDevice, 0, false); auto &commandStream = pCmdQ->getCS(1024); auto &indirectHeap = pCmdQ->getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 10); auto usedBeforeCS = commandStream.getUsed(); auto usedBeforeISH = indirectHeap.getUsed(); auto retVal1 = EnqueueFillBufferHelper<>::enqueue(pCmdQ); EXPECT_EQ(CL_SUCCESS, retVal1); auto retVal2 = EnqueueFillBufferHelper<>::enqueue(&cmdQ); EXPECT_EQ(CL_SUCCESS, retVal2); auto usedAfterCS = commandStream.getUsed(); auto usedAfterISH = indirectHeap.getUsed(); EXPECT_LE(usedAfterCS - usedBeforeCS, commandStream.getMaxAvailableSpace()); if (usedAfterISH > usedBeforeISH) { EXPECT_LE(usedAfterISH - usedBeforeISH, indirectHeap.getMaxAvailableSpace()); } else { EXPECT_LE(usedAfterISH, indirectHeap.getMaxAvailableSpace()); } } HWTEST_P(OOMCommandQueueBufferTest, WhenReadingBufferThenMaxAvailableSpaceIsNotExceeded) { CommandQueueHw cmdQ(context, pClDevice, 0, false); auto &commandStream = pCmdQ->getCS(1024); auto &indirectHeap = pCmdQ->getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 10); auto usedBeforeCS = commandStream.getUsed(); auto usedBeforeISH = indirectHeap.getUsed(); auto retVal1 = EnqueueReadBufferHelper<>::enqueue(pCmdQ); EXPECT_EQ(CL_SUCCESS, retVal1); auto retVal2 = EnqueueReadBufferHelper<>::enqueue(&cmdQ); EXPECT_EQ(CL_SUCCESS, retVal2); auto usedAfterCS = commandStream.getUsed(); auto usedAfterISH = indirectHeap.getUsed(); EXPECT_LE(usedAfterCS - usedBeforeCS, commandStream.getMaxAvailableSpace()); if (usedAfterISH > usedBeforeISH) { EXPECT_LE(usedAfterISH - usedBeforeISH, indirectHeap.getMaxAvailableSpace()); } else { EXPECT_LE(usedAfterISH, indirectHeap.getMaxAvailableSpace()); } } HWTEST_P(OOMCommandQueueBufferTest, WhenWritingBufferThenMaxAvailableSpaceIsNotExceeded) { CommandQueueHw cmdQ(context, pClDevice, 0, false); auto &commandStream = pCmdQ->getCS(1024); auto &indirectHeap = pCmdQ->getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 10); auto usedBeforeCS = commandStream.getUsed(); auto usedBeforeISH = indirectHeap.getUsed(); auto retVal1 = EnqueueWriteBufferHelper<>::enqueue(pCmdQ); EXPECT_EQ(CL_SUCCESS, retVal1); auto retVal2 = EnqueueWriteBufferHelper<>::enqueue(&cmdQ); EXPECT_EQ(CL_SUCCESS, retVal2); auto usedAfterCS = commandStream.getUsed(); auto usedAfterISH = indirectHeap.getUsed(); EXPECT_LE(usedAfterCS - usedBeforeCS, commandStream.getMaxAvailableSpace()); if (usedAfterISH > usedBeforeISH) { EXPECT_LE(usedAfterISH - usedBeforeISH, indirectHeap.getMaxAvailableSpace()); } else { EXPECT_LE(usedAfterISH, indirectHeap.getMaxAvailableSpace()); } } HWTEST_P(OOMCommandQueueBufferTest, WhenWritingBufferRectThenMaxAvailableSpaceIsNotExceeded) { CommandQueueHw cmdQ(context, pClDevice, 0, false); auto &commandStream = pCmdQ->getCS(1024); auto &indirectHeap = pCmdQ->getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 10); auto usedBeforeCS = commandStream.getUsed(); auto usedBeforeISH = indirectHeap.getUsed(); auto retVal1 = EnqueueWriteBufferRectHelper<>::enqueue(pCmdQ); EXPECT_EQ(CL_SUCCESS, retVal1); auto retVal2 = EnqueueWriteBufferRectHelper<>::enqueue(&cmdQ); EXPECT_EQ(CL_SUCCESS, retVal2); auto usedAfterCS = commandStream.getUsed(); auto usedAfterISH = indirectHeap.getUsed(); EXPECT_LE(usedAfterCS - usedBeforeCS, commandStream.getMaxAvailableSpace()); if (usedAfterISH > usedBeforeISH) { EXPECT_LE(usedAfterISH - usedBeforeISH, indirectHeap.getMaxAvailableSpace()); } else { EXPECT_LE(usedAfterISH, indirectHeap.getMaxAvailableSpace()); } } HWTEST_P(OOMCommandQueueBufferTest, GivenHelloWorldWhenEnqueingKernelThenMaxAvailableSpaceIsNotExceeded) { typedef HelloWorldKernelFixture KernelFixture; CommandQueueHw cmdQ(context, pClDevice, 0, false); auto &commandStream = pCmdQ->getCS(1024); auto &indirectHeap = pCmdQ->getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 10); auto usedBeforeCS = commandStream.getUsed(); auto usedBeforeISH = indirectHeap.getUsed(); auto retVal1 = EnqueueKernelHelper<>::enqueueKernel( pCmdQ, KernelFixture::pKernel); auto retVal2 = EnqueueKernelHelper<>::enqueueKernel( &cmdQ, KernelFixture::pKernel); auto usedAfterCS = commandStream.getUsed(); auto usedAfterISH = indirectHeap.getUsed(); EXPECT_LE(usedAfterCS - usedBeforeCS, commandStream.getMaxAvailableSpace()); if (usedAfterISH > usedBeforeISH) { EXPECT_LE(usedAfterISH - usedBeforeISH, indirectHeap.getMaxAvailableSpace()); } else { EXPECT_LE(usedAfterISH, indirectHeap.getMaxAvailableSpace()); } EXPECT_EQ(CL_SUCCESS, retVal1); EXPECT_EQ(CL_SUCCESS, retVal2); } HWTEST_P(OOMCommandQueueBufferTest, GivenSimpleArgWhenEnqueingKernelThenMaxAvailableSpaceIsNotExceeded) { typedef SimpleArgKernelFixture KernelFixture; CommandQueueHw cmdQ(context, pClDevice, 0, false); auto &commandStream = pCmdQ->getCS(1024); auto &indirectHeap = pCmdQ->getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 10); auto usedBeforeCS = commandStream.getUsed(); auto usedBeforeISH = indirectHeap.getUsed(); auto retVal1 = EnqueueKernelHelper<>::enqueueKernel( pCmdQ, KernelFixture::pKernel); auto retVal2 = EnqueueKernelHelper<>::enqueueKernel( &cmdQ, KernelFixture::pKernel); auto usedAfterCS = commandStream.getUsed(); auto usedAfterISH = indirectHeap.getUsed(); EXPECT_LE(usedAfterCS - usedBeforeCS, commandStream.getMaxAvailableSpace()); if (usedAfterISH > usedBeforeISH) { EXPECT_LE(usedAfterISH - usedBeforeISH, indirectHeap.getMaxAvailableSpace()); } else { EXPECT_LE(usedAfterISH, indirectHeap.getMaxAvailableSpace()); } EXPECT_EQ(CL_SUCCESS, retVal1); EXPECT_EQ(CL_SUCCESS, retVal2); } INSTANTIATE_TEST_CASE_P( OOM, OOMCommandQueueBufferTest, testing::ValuesIn(oomSettings)); compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/oom_image_tests.cpp000066400000000000000000000144171422164147700303420ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/memory_manager.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/common/test_macros/test_checks_shared.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/event/event.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" using namespace NEO; struct OOMSetting { bool oomCS; bool oomISH; }; static OOMSetting oomSettings[] = { {true, false}, {false, true}, {true, true}}; struct OOMCommandQueueImageTest : public ClDeviceFixture, public CommandQueueFixture, public ::testing::TestWithParam { using CommandQueueFixture::SetUp; OOMCommandQueueImageTest() { } void SetUp() override { REQUIRE_IMAGES_OR_SKIP(defaultHwInfo); ClDeviceFixture::SetUp(); context = new MockContext(pClDevice); CommandQueueFixture::SetUp(context, pClDevice, 0); srcImage = Image2dHelper<>::create(context); dstImage = Image2dHelper<>::create(context); const auto &oomSetting = GetParam(); auto oomSize = 10u; if (oomSetting.oomCS) { auto &cs = pCmdQ->getCS(oomSize); // CommandStream may be larger than requested so grab what wasnt requested cs.getSpace(cs.getAvailableSpace() - oomSize); ASSERT_EQ(oomSize, cs.getAvailableSpace()); } if (oomSetting.oomISH) { auto &ish = pCmdQ->getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, oomSize); // IndirectHeap may be larger than requested so grab what wasnt requested ish.getSpace(ish.getAvailableSpace() - oomSize); ASSERT_EQ(oomSize, ish.getAvailableSpace()); } } void TearDown() override { if (IsSkipped()) { return; } delete dstImage; delete srcImage; context->release(); CommandQueueFixture::TearDown(); ClDeviceFixture::TearDown(); } MockContext *context; Image *srcImage = nullptr; Image *dstImage = nullptr; }; HWTEST_P(OOMCommandQueueImageTest, WhenCopyingImageThenMaxAvailableSpaceIsNotExceeded) { CommandQueueHw cmdQ(context, pClDevice, 0, false); auto &commandStream = pCmdQ->getCS(1024); auto &indirectHeap = pCmdQ->getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 10); auto usedBeforeCS = commandStream.getUsed(); auto usedBeforeISH = indirectHeap.getUsed(); auto retVal1 = EnqueueCopyImageHelper<>::enqueue(pCmdQ); auto retVal2 = EnqueueCopyImageHelper<>::enqueue(&cmdQ); auto usedAfterCS = commandStream.getUsed(); auto usedAfterISH = indirectHeap.getUsed(); EXPECT_LE(usedAfterCS - usedBeforeCS, commandStream.getMaxAvailableSpace()); if (usedAfterISH > usedBeforeISH) { EXPECT_LE(usedAfterISH - usedBeforeISH, indirectHeap.getMaxAvailableSpace()); } else { EXPECT_LE(usedAfterISH, indirectHeap.getMaxAvailableSpace()); } EXPECT_EQ(CL_SUCCESS, retVal1); EXPECT_EQ(CL_SUCCESS, retVal2); } HWTEST_P(OOMCommandQueueImageTest, WhenFillingImageThenMaxAvailableSpaceIsNotExceeded) { CommandQueueHw cmdQ(context, pClDevice, 0, false); auto &commandStream = pCmdQ->getCS(1024); auto &indirectHeap = pCmdQ->getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 10); auto usedBeforeCS = commandStream.getUsed(); auto usedBeforeISH = indirectHeap.getUsed(); auto retVal1 = EnqueueFillImageHelper<>::enqueue(pCmdQ); auto retVal2 = EnqueueFillImageHelper<>::enqueue(&cmdQ); auto usedAfterCS = commandStream.getUsed(); auto usedAfterISH = indirectHeap.getUsed(); EXPECT_LE(usedAfterCS - usedBeforeCS, commandStream.getMaxAvailableSpace()); if (usedAfterISH > usedBeforeISH) { EXPECT_LE(usedAfterISH - usedBeforeISH, indirectHeap.getMaxAvailableSpace()); } else { EXPECT_LE(usedAfterISH, indirectHeap.getMaxAvailableSpace()); } EXPECT_EQ(CL_SUCCESS, retVal1); EXPECT_EQ(CL_SUCCESS, retVal2); } HWTEST_P(OOMCommandQueueImageTest, WhenReadingImageThenMaxAvailableSpaceIsNotExceeded) { CommandQueueHw cmdQ(context, pClDevice, 0, false); auto &commandStream = pCmdQ->getCS(1024); auto &indirectHeap = pCmdQ->getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 10); auto usedBeforeCS = commandStream.getUsed(); auto usedBeforeISH = indirectHeap.getUsed(); auto retVal1 = EnqueueReadImageHelper<>::enqueue(pCmdQ); auto retVal2 = EnqueueReadImageHelper<>::enqueue(&cmdQ); auto usedAfterCS = commandStream.getUsed(); auto usedAfterISH = indirectHeap.getUsed(); EXPECT_LE(usedAfterCS - usedBeforeCS, commandStream.getMaxAvailableSpace()); if (usedAfterISH > usedBeforeISH) { EXPECT_LE(usedAfterISH - usedBeforeISH, indirectHeap.getMaxAvailableSpace()); } else { EXPECT_LE(usedAfterISH, indirectHeap.getMaxAvailableSpace()); } EXPECT_EQ(CL_SUCCESS, retVal1); EXPECT_EQ(CL_SUCCESS, retVal2); } HWTEST_P(OOMCommandQueueImageTest, WhenWritingImageThenMaxAvailableSpaceIsNotExceeded) { CommandQueueHw cmdQ(context, pClDevice, 0, false); auto &commandStream = pCmdQ->getCS(1024); auto &indirectHeap = pCmdQ->getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 10); auto usedBeforeCS = commandStream.getUsed(); auto usedBeforeISH = indirectHeap.getUsed(); auto retVal1 = EnqueueWriteImageHelper<>::enqueue(pCmdQ); auto retVal2 = EnqueueWriteImageHelper<>::enqueue(&cmdQ); auto usedAfterCS = commandStream.getUsed(); auto usedAfterISH = indirectHeap.getUsed(); EXPECT_LE(usedAfterCS - usedBeforeCS, commandStream.getMaxAvailableSpace()); if (usedAfterISH > usedBeforeISH) { EXPECT_LE(usedAfterISH - usedBeforeISH, indirectHeap.getMaxAvailableSpace()); } else { EXPECT_LE(usedAfterISH, indirectHeap.getMaxAvailableSpace()); } EXPECT_EQ(CL_SUCCESS, retVal1); EXPECT_EQ(CL_SUCCESS, retVal2); } INSTANTIATE_TEST_CASE_P( OOM, OOMCommandQueueImageTest, testing::ValuesIn(oomSettings)); compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/oom_tests.cpp000066400000000000000000000106651422164147700272010ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/indirect_heap/indirect_heap.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/event/event.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" using namespace NEO; struct OOMSetting { bool oomCS; bool oomISH; }; static OOMSetting oomSettings[] = { {true, false}, {false, true}, {true, true}}; struct OOMCommandQueueTest : public ClDeviceFixture, public CommandQueueFixture, public ::testing::TestWithParam { using CommandQueueFixture::SetUp; OOMCommandQueueTest() { } void SetUp() override { ClDeviceFixture::SetUp(); context = new MockContext(pClDevice); CommandQueueFixture::SetUp(context, pClDevice, 0); const auto &oomSetting = GetParam(); auto oomSize = 10u; if (oomSetting.oomCS) { auto &cs = pCmdQ->getCS(oomSize); // CommandStream may be larger than requested so grab what wasnt requested cs.getSpace(cs.getAvailableSpace() - oomSize); ASSERT_EQ(oomSize, cs.getAvailableSpace()); } if (oomSetting.oomISH) { auto &ish = pCmdQ->getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, oomSize); // IndirectHeap may be larger than requested so grab what wasnt requested ish.getSpace(ish.getAvailableSpace() - oomSize); ASSERT_EQ(oomSize, ish.getAvailableSpace()); } } void TearDown() override { CommandQueueFixture::TearDown(); context->release(); ClDeviceFixture::TearDown(); } MockContext *context; }; HWTEST_P(OOMCommandQueueTest, WhenFinishingThenMaxAvailableSpaceIsNotExceeded) { auto &commandStream = pCmdQ->getCS(1024); auto &indirectHeap = pCmdQ->getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 10); auto usedBeforeCS = commandStream.getUsed(); auto usedBeforeISH = indirectHeap.getUsed(); auto retVal = pCmdQ->finish(); auto usedAfterCS = commandStream.getUsed(); auto usedAfterISH = indirectHeap.getUsed(); EXPECT_LE(usedAfterCS - usedBeforeCS, commandStream.getMaxAvailableSpace()); EXPECT_LE(usedAfterISH - usedBeforeISH, indirectHeap.getMaxAvailableSpace()); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_P(OOMCommandQueueTest, WhenEnqueingMarkerThenMaxAvailableSpaceIsNotExceeded) { auto &commandStream = pCmdQ->getCS(1024); auto &indirectHeap = pCmdQ->getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 10); auto usedBeforeCS = commandStream.getUsed(); auto usedBeforeISH = indirectHeap.getUsed(); Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 5, 15); cl_event eventBeingWaitedOn = &event1; cl_event eventReturned = nullptr; auto retVal = pCmdQ->enqueueMarkerWithWaitList( 1, &eventBeingWaitedOn, &eventReturned); EXPECT_EQ(CL_SUCCESS, retVal); auto usedAfterCS = commandStream.getUsed(); auto usedAfterISH = indirectHeap.getUsed(); EXPECT_LE(usedAfterCS - usedBeforeCS, commandStream.getMaxAvailableSpace()); EXPECT_LE(usedAfterISH - usedBeforeISH, indirectHeap.getMaxAvailableSpace()); delete (Event *)eventReturned; } HWTEST_P(OOMCommandQueueTest, WhenEnqueingBarrierThenMaxAvailableSpaceIsNotExceeded) { auto &commandStream = pCmdQ->getCS(1024); auto &indirectHeap = pCmdQ->getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 10); auto usedBeforeCS = commandStream.getUsed(); auto usedBeforeISH = indirectHeap.getUsed(); Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 5, 15); cl_event eventBeingWaitedOn = &event1; cl_event eventReturned = nullptr; auto retVal = pCmdQ->enqueueBarrierWithWaitList( 1, &eventBeingWaitedOn, &eventReturned); EXPECT_EQ(CL_SUCCESS, retVal); auto usedAfterCS = commandStream.getUsed(); auto usedAfterISH = indirectHeap.getUsed(); EXPECT_LE(usedAfterCS - usedBeforeCS, commandStream.getMaxAvailableSpace()); EXPECT_LE(usedAfterISH - usedBeforeISH, indirectHeap.getMaxAvailableSpace()); delete (Event *)eventReturned; } INSTANTIATE_TEST_CASE_P( OOM, OOMCommandQueueTest, testing::ValuesIn(oomSettings)); compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/ooq_task_tests.cpp000066400000000000000000000343151422164147700302250ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/test_macros/test_checks_shared.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" using namespace NEO; struct OOQFixtureFactory : public HelloWorldFixtureFactory { typedef OOQueueFixture CommandQueueFixture; }; template struct OOQTaskTypedTests : public HelloWorldTest { void SetUp() override { if (std::is_same>::value || std::is_same>::value || std::is_same>::value || std::is_same>::value) { REQUIRE_IMAGES_OR_SKIP(defaultHwInfo); } DebugManager.flags.PerformImplicitFlushForNewResource.set(0); DebugManager.flags.PerformImplicitFlushForIdleGpu.set(0); HelloWorldTest::SetUp(); } void TearDown() override { if (!IsSkipped()) { HelloWorldTest::TearDown(); } } DebugManagerStateRestore stateRestore; }; TYPED_TEST_CASE_P(OOQTaskTypedTests); bool isBlockingCall(unsigned int cmdType) { if (cmdType == CL_COMMAND_WRITE_BUFFER || cmdType == CL_COMMAND_READ_BUFFER || cmdType == CL_COMMAND_WRITE_IMAGE || cmdType == CL_COMMAND_READ_IMAGE) { return true; } else { return false; } } TYPED_TEST_P(OOQTaskTypedTests, givenNonBlockingCallWhenDoneOnOutOfOrderQueueThenTaskLevelDoesntChange) { auto &commandStreamReceiver = this->pCmdQ->getGpgpuCommandStreamReceiver(); auto tagAddress = commandStreamReceiver.getTagAddress(); auto blockingCall = isBlockingCall(TypeParam::Traits::cmdType); auto taskLevelClosed = blockingCall ? 1u : 0u; // for blocking commands task level will be closed //for non blocking calls make sure that resources are added to defer free list instaed of being destructed in place if (!blockingCall) { *tagAddress = 0; } auto previousTaskLevel = this->pCmdQ->taskLevel; if (TypeParam::Traits::cmdType == CL_COMMAND_WRITE_BUFFER || TypeParam::Traits::cmdType == CL_COMMAND_READ_BUFFER) { auto buffer = std::unique_ptr(BufferHelper<>::create()); buffer->forceDisallowCPUCopy = true; // no task level logic when cpu copy TypeParam::enqueue(this->pCmdQ, buffer.get()); this->pCmdQ->flush(); } else { TypeParam::enqueue(this->pCmdQ, nullptr); } EXPECT_EQ(previousTaskLevel + taskLevelClosed, this->pCmdQ->taskLevel); *tagAddress = initialHardwareTag; } TYPED_TEST_P(OOQTaskTypedTests, givenTaskWhenEnqueuedOnOutOfOrderQueueThenTaskCountIsUpdated) { auto &commandStreamReceiver = this->pCmdQ->getGpgpuCommandStreamReceiver(); auto previousTaskCount = commandStreamReceiver.peekTaskCount(); auto tagAddress = commandStreamReceiver.getTagAddress(); auto blockingCall = isBlockingCall(TypeParam::Traits::cmdType); //for non blocking calls make sure that resources are added to defer free list instaed of being destructed in place if (!blockingCall) { *tagAddress = 0; } if (TypeParam::Traits::cmdType == CL_COMMAND_WRITE_BUFFER || TypeParam::Traits::cmdType == CL_COMMAND_READ_BUFFER) { auto buffer = std::unique_ptr(BufferHelper<>::create()); buffer->forceDisallowCPUCopy = true; // no task level logic when cpu copy TypeParam::enqueue(this->pCmdQ, buffer.get()); this->pCmdQ->flush(); } else { TypeParam::enqueue(this->pCmdQ, nullptr); } EXPECT_LT(previousTaskCount, commandStreamReceiver.peekTaskCount()); EXPECT_LE(this->pCmdQ->taskCount, commandStreamReceiver.peekTaskCount()); *tagAddress = initialHardwareTag; } typedef ::testing::Types< EnqueueCopyBufferHelper<>, EnqueueCopyImageHelper<>, EnqueueFillBufferHelper<>, EnqueueFillImageHelper<>, EnqueueReadBufferHelper<>, EnqueueReadImageHelper<>, EnqueueWriteBufferHelper<>, EnqueueWriteImageHelper<>> EnqueueParams; REGISTER_TYPED_TEST_CASE_P(OOQTaskTypedTests, givenNonBlockingCallWhenDoneOnOutOfOrderQueueThenTaskLevelDoesntChange, givenTaskWhenEnqueuedOnOutOfOrderQueueThenTaskCountIsUpdated); // Instantiate all of these parameterized tests INSTANTIATE_TYPED_TEST_CASE_P(OOQ, OOQTaskTypedTests, EnqueueParams); typedef OOQTaskTypedTests> OOQTaskTests; TEST_F(OOQTaskTests, WhenEnqueuingKernelThenTaskCountIsIncremented) { auto &commandStreamReceiver = pCmdQ->getGpgpuCommandStreamReceiver(); auto previousTaskCount = commandStreamReceiver.peekTaskCount(); EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel); EXPECT_LT(previousTaskCount, commandStreamReceiver.peekTaskCount()); EXPECT_EQ(this->pCmdQ->taskCount, commandStreamReceiver.peekTaskCount()); } HWTEST_F(OOQTaskTests, givenCommandQueueWithLowerTaskLevelThenCsrWhenItIsSubmittedThenCommandQueueObtainsTaskLevelFromCsrWithoutSendingPipeControl) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); UltCommandStreamReceiver &mockCsr = reinterpret_cast &>(commandStreamReceiver); mockCsr.useNewResourceImplicitFlush = false; mockCsr.useGpuIdleImplicitFlush = false; commandStreamReceiver.taskLevel = 100; EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel); EXPECT_EQ(100u, this->pCmdQ->taskLevel); } HWTEST_F(OOQTaskTests, givenCommandQueueAtTaskLevel100WhenMultipleEnqueueAreDoneThenTaskLevelDoesntChange) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->taskLevel = 100; EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel); EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel); EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel); EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel); EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel); EXPECT_EQ(100u, this->pCmdQ->taskLevel); EXPECT_EQ(100u, mockCsr->peekTaskLevel()); } HWTEST_F(OOQTaskTests, givenCommandQueueAtTaskLevel100WhenItIsFlushedAndFollowedByNewCommandsThenTheyHaveHigherTaskLevel) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->taskLevel = 100; EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel); EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel); EXPECT_EQ(100u, this->pCmdQ->taskLevel); EXPECT_EQ(100u, mockCsr->peekTaskLevel()); mockCsr->flushBatchedSubmissions(); EXPECT_EQ(101u, mockCsr->peekTaskLevel()); EXPECT_EQ(100u, this->pCmdQ->taskLevel); EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel); EXPECT_EQ(101u, this->pCmdQ->taskLevel); } HWTEST_F(OOQTaskTests, givenCommandQueueAtTaskLevel100WhenItIsFlushedAndFollowedByNewCommandsAndBarrierThenCsrTaskLevelIncreases) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->taskLevel = 100; EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel); mockCsr->flushBatchedSubmissions(); EXPECT_EQ(101u, mockCsr->peekTaskLevel()); EXPECT_EQ(100u, this->pCmdQ->taskLevel); EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel); EXPECT_EQ(101u, this->pCmdQ->taskLevel); this->pCmdQ->enqueueBarrierWithWaitList(0, nullptr, nullptr); EXPECT_EQ(102u, this->pCmdQ->taskLevel); EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel); EXPECT_EQ(102u, this->pCmdQ->taskLevel); EXPECT_EQ(102u, mockCsr->peekTaskLevel()); } HWTEST_F(OOQTaskTests, givenCommandQueueAtTaskLevel100WhenItIsFlushedAndFollowedByNewCommandsAndMarkerThenCsrTaskLevelIsNotIncreasing) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->taskLevel = 100; EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel); mockCsr->flushBatchedSubmissions(); EXPECT_EQ(101u, mockCsr->peekTaskLevel()); EXPECT_EQ(100u, this->pCmdQ->taskLevel); EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel); EXPECT_EQ(101u, this->pCmdQ->taskLevel); this->pCmdQ->enqueueMarkerWithWaitList(0, nullptr, nullptr); EXPECT_EQ(101u, this->pCmdQ->taskLevel); EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel); EXPECT_EQ(101u, this->pCmdQ->taskLevel); EXPECT_EQ(101u, mockCsr->peekTaskLevel()); } HWTEST_F(OOQTaskTests, givenTwoEnqueueCommandSynchronizedByEventsWhenTheyAreEnqueueThenSecondHasHigherTaskLevelThenFirst) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; auto currentTaskLevel = this->pCmdQ->taskLevel; cl_event retEvent; EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel, EnqueueKernelTraits::workDim, EnqueueKernelTraits::globalWorkOffset, EnqueueKernelTraits::globalWorkSize, EnqueueKernelTraits::localWorkSize, 0, nullptr, &retEvent); auto neoEvent = castToObject(retEvent); EXPECT_EQ(currentTaskLevel, neoEvent->taskLevel); cl_event retEvent2; EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel, EnqueueKernelTraits::workDim, EnqueueKernelTraits::globalWorkOffset, EnqueueKernelTraits::globalWorkSize, EnqueueKernelTraits::localWorkSize, 1, &retEvent, &retEvent2); auto neoEvent2 = castToObject(retEvent2); EXPECT_EQ(neoEvent2->taskLevel, neoEvent->taskLevel + 1); clReleaseEvent(retEvent2); clReleaseEvent(retEvent); } HWTEST_F(OOQTaskTests, WhenEnqueingKernelThenTaskLevelIsNotIncremented) { auto previousTaskLevel = this->pCmdQ->taskLevel; UltCommandStreamReceiver &mockCsr = reinterpret_cast &>(pCmdQ->getGpgpuCommandStreamReceiver()); mockCsr.useNewResourceImplicitFlush = false; mockCsr.useGpuIdleImplicitFlush = false; EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel); EXPECT_EQ(previousTaskLevel, this->pCmdQ->taskLevel); } HWTEST_F(OOQTaskTests, GivenBlockingAndNonBlockedOnUserEventWhenReadingBufferThenTaskCountIsIncrementedAndTaskLevelIsUnchanged) { UltCommandStreamReceiver &mockCsr = reinterpret_cast &>(pCmdQ->getGpgpuCommandStreamReceiver()); mockCsr.useNewResourceImplicitFlush = false; mockCsr.useGpuIdleImplicitFlush = false; auto buffer = std::unique_ptr(BufferHelper<>::create()); auto alignedReadPtr = alignedMalloc(BufferDefaults::sizeInBytes, MemoryConstants::cacheLineSize); ASSERT_NE(nullptr, alignedReadPtr); pCmdQ->taskLevel = 1; auto previousTaskCount = pCmdQ->taskCount; auto previousTaskLevel = pCmdQ->taskLevel; auto userEvent = clCreateUserEvent(pContext, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetUserEventStatus(userEvent, CL_COMPLETE); ASSERT_EQ(CL_SUCCESS, retVal); buffer->forceDisallowCPUCopy = true; // no task level incrasing when cpu copy retVal = EnqueueReadBufferHelper<>::enqueueReadBuffer(pCmdQ, buffer.get(), CL_FALSE, 0, BufferDefaults::sizeInBytes, alignedReadPtr, nullptr, 1, &userEvent, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_LT(previousTaskCount, pCmdQ->taskCount); EXPECT_EQ(previousTaskLevel, pCmdQ->taskLevel); retVal = clReleaseEvent(userEvent); EXPECT_EQ(CL_SUCCESS, retVal); pCmdQ->flush(); alignedFree(alignedReadPtr); } TEST_F(OOQTaskTests, givenOutOfOrderCommandQueueWhenBarrierIsCalledThenTaskLevelIsUpdated) { EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel); auto currentTaskLevel = this->pCmdQ->taskLevel; clEnqueueBarrierWithWaitList(this->pCmdQ, 0, nullptr, nullptr); auto newTaskLevel = this->pCmdQ->taskLevel; EXPECT_GT(newTaskLevel, currentTaskLevel); } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/read_write_buffer_cpu_copy.cpp000066400000000000000000000436761422164147700325540ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/gmm.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/local_memory_access_modes.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/command_queue/enqueue_read_buffer_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" using namespace NEO; typedef EnqueueReadBufferTypeTest ReadWriteBufferCpuCopyTest; HWTEST_F(ReadWriteBufferCpuCopyTest, givenCompressedGmmWhenAskingForCpuOperationThenDisallow) { DebugManagerStateRestore restorer; DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast(LocalMemoryAccessMode::Default)); cl_int retVal; auto rootDeviceIndex = context->getDevice(0)->getRootDeviceIndex(); std::unique_ptr buffer(Buffer::create(context, CL_MEM_READ_WRITE, 1, nullptr, retVal)); auto gmm = new Gmm(pDevice->getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true); gmm->isCompressionEnabled = false; auto allocation = buffer->getGraphicsAllocation(rootDeviceIndex); allocation->setDefaultGmm(gmm); auto alignedPtr = alignedMalloc(2, MemoryConstants::cacheLineSize); auto unalignedPtr = ptrOffset(alignedPtr, 1); EXPECT_EQ(1u, allocation->storageInfo.getNumBanks()); EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(*pDevice)); EXPECT_TRUE(buffer->isReadWriteOnCpuPreferred(unalignedPtr, 1, *pDevice)); gmm->isCompressionEnabled = true; EXPECT_FALSE(buffer->isReadWriteOnCpuAllowed(*pDevice)); EXPECT_TRUE(buffer->isReadWriteOnCpuPreferred(unalignedPtr, 1, *pDevice)); alignedFree(alignedPtr); } HWTEST_F(ReadWriteBufferCpuCopyTest, GivenUnalignedReadPtrWhenReadingBufferThenMemoryIsReadCorrectly) { DebugManagerStateRestore restorer; DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast(LocalMemoryAccessMode::Default)); cl_int retVal; size_t offset = 1; size_t size = 4; auto alignedReadPtr = alignedMalloc(size + 1, MemoryConstants::cacheLineSize); memset(alignedReadPtr, 0x00, size + 1); auto unalignedReadPtr = ptrOffset(alignedReadPtr, 1); std::unique_ptr bufferPtr(new uint8_t[size]); for (uint8_t i = 0; i < size; i++) { bufferPtr[i] = i + 1; } std::unique_ptr buffer(Buffer::create(context, CL_MEM_USE_HOST_PTR, size, bufferPtr.get(), retVal)); EXPECT_EQ(retVal, CL_SUCCESS); bool aligned = (reinterpret_cast(unalignedReadPtr) & (MemoryConstants::cacheLineSize - 1)) == 0; EXPECT_TRUE(!aligned || buffer->isMemObjZeroCopy()); ASSERT_TRUE(buffer->isReadWriteOnCpuAllowed(pCmdQ->getDevice())); ASSERT_TRUE(buffer->isReadWriteOnCpuPreferred(unalignedReadPtr, size, context->getDevice(0)->getDevice())); retVal = EnqueueReadBufferHelper<>::enqueueReadBuffer(pCmdQ, buffer.get(), CL_TRUE, offset, size - offset, unalignedReadPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(retVal, CL_SUCCESS); auto pBufferPtr = ptrOffset(bufferPtr.get(), offset); EXPECT_EQ(memcmp(unalignedReadPtr, pBufferPtr, size - offset), 0); alignedFree(alignedReadPtr); } HWTEST_F(ReadWriteBufferCpuCopyTest, GivenUnalignedSrcPtrWhenWritingBufferThenMemoryIsWrittenCorrectly) { DebugManagerStateRestore restorer; DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast(LocalMemoryAccessMode::Default)); cl_int retVal; size_t offset = 1; size_t size = 4; auto alignedWritePtr = alignedMalloc(size + 1, MemoryConstants::cacheLineSize); auto unalignedWritePtr = static_cast(ptrOffset(alignedWritePtr, 1)); auto bufferPtrBase = new uint8_t[size]; auto bufferPtr = new uint8_t[size]; for (uint8_t i = 0; i < size; i++) { unalignedWritePtr[i] = i + 5; bufferPtrBase[i] = i + 1; bufferPtr[i] = i + 1; } std::unique_ptr buffer(Buffer::create(context, CL_MEM_USE_HOST_PTR, size, bufferPtr, retVal)); EXPECT_EQ(retVal, CL_SUCCESS); bool aligned = (reinterpret_cast(unalignedWritePtr) & (MemoryConstants::cacheLineSize - 1)) == 0; EXPECT_TRUE(!aligned || buffer->isMemObjZeroCopy()); ASSERT_TRUE(buffer->isReadWriteOnCpuAllowed(pCmdQ->getDevice())); ASSERT_TRUE(buffer->isReadWriteOnCpuPreferred(unalignedWritePtr, size, context->getDevice(0)->getDevice())); retVal = EnqueueWriteBufferHelper<>::enqueueWriteBuffer(pCmdQ, buffer.get(), CL_TRUE, offset, size - offset, unalignedWritePtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(retVal, CL_SUCCESS); auto pBufferPtr = buffer->getCpuAddress(); EXPECT_EQ(memcmp(pBufferPtr, bufferPtrBase, offset), 0); // untouched pBufferPtr = ptrOffset(pBufferPtr, offset); EXPECT_EQ(memcmp(pBufferPtr, unalignedWritePtr, size - offset), 0); // updated alignedFree(alignedWritePtr); delete[] bufferPtr; delete[] bufferPtrBase; } HWTEST_F(ReadWriteBufferCpuCopyTest, GivenSpecificMemoryStructuresWhenReadingWritingMemoryThenCpuReadWriteIsAllowed) { DebugManagerStateRestore restorer; DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast(LocalMemoryAccessMode::Default)); cl_int retVal; size_t size = MemoryConstants::cacheLineSize; auto alignedBufferPtr = alignedMalloc(MemoryConstants::cacheLineSize + 1, MemoryConstants::cacheLineSize); auto unalignedBufferPtr = ptrOffset(alignedBufferPtr, 1); auto alignedHostPtr = alignedMalloc(MemoryConstants::cacheLineSize + 1, MemoryConstants::cacheLineSize); auto unalignedHostPtr = ptrOffset(alignedHostPtr, 1); auto smallBufferPtr = alignedMalloc(1 * MB, MemoryConstants::cacheLineSize); size_t largeBufferSize = 11u * MemoryConstants::megaByte; auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto mockContext = std::unique_ptr(new MockContext(mockDevice.get())); auto memoryManager = static_cast(mockDevice->getMemoryManager()); memoryManager->turnOnFakingBigAllocations(); std::unique_ptr buffer(Buffer::create(context, CL_MEM_USE_HOST_PTR, size, alignedBufferPtr, retVal)); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_TRUE(buffer->isMemObjZeroCopy()); // zeroCopy == true && aligned/unaligned hostPtr EXPECT_TRUE(buffer->isReadWriteOnCpuPreferred(alignedHostPtr, MemoryConstants::cacheLineSize + 1, mockDevice->getDevice())); EXPECT_TRUE(buffer->isReadWriteOnCpuPreferred(unalignedHostPtr, MemoryConstants::cacheLineSize, mockDevice->getDevice())); buffer.reset(Buffer::create(context, CL_MEM_USE_HOST_PTR, size, unalignedBufferPtr, retVal)); EXPECT_EQ(retVal, CL_SUCCESS); // zeroCopy == false && unaligned hostPtr EXPECT_TRUE(buffer->isReadWriteOnCpuPreferred(unalignedHostPtr, MemoryConstants::cacheLineSize, mockDevice->getDevice())); buffer.reset(Buffer::create(mockContext.get(), CL_MEM_USE_HOST_PTR, 1 * MB, smallBufferPtr, retVal)); // platform LP == true && size <= 10 MB mockDevice->deviceInfo.platformLP = true; EXPECT_TRUE(buffer->isReadWriteOnCpuPreferred(smallBufferPtr, 1 * MB, mockDevice->getDevice())); // platform LP == false && size <= 10 MB mockDevice->deviceInfo.platformLP = false; EXPECT_TRUE(buffer->isReadWriteOnCpuPreferred(smallBufferPtr, 1 * MB, mockDevice->getDevice())); buffer.reset(Buffer::create(mockContext.get(), CL_MEM_ALLOC_HOST_PTR, largeBufferSize, nullptr, retVal)); // platform LP == false && size > 10 MB mockDevice->deviceInfo.platformLP = false; EXPECT_TRUE(buffer->isReadWriteOnCpuPreferred(buffer->getCpuAddress(), largeBufferSize, mockDevice->getDevice())); alignedFree(smallBufferPtr); alignedFree(alignedHostPtr); alignedFree(alignedBufferPtr); } HWTEST_F(ReadWriteBufferCpuCopyTest, GivenSpecificMemoryStructuresWhenReadingWritingMemoryThenCpuReadWriteIsNotAllowed) { cl_int retVal; size_t size = MemoryConstants::cacheLineSize; auto alignedBufferPtr = alignedMalloc(MemoryConstants::cacheLineSize + 1, MemoryConstants::cacheLineSize); auto unalignedBufferPtr = ptrOffset(alignedBufferPtr, 1); auto alignedHostPtr = alignedMalloc(MemoryConstants::cacheLineSize + 1, MemoryConstants::cacheLineSize); auto unalignedHostPtr = ptrOffset(alignedHostPtr, 1); size_t largeBufferSize = 11u * MemoryConstants::megaByte; auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto mockContext = std::make_unique(mockDevice.get()); auto mockCommandQueue = std::make_unique(*mockContext); auto memoryManager = static_cast(mockDevice->getMemoryManager()); memoryManager->turnOnFakingBigAllocations(); std::unique_ptr buffer(Buffer::create(context, CL_MEM_USE_HOST_PTR, size, alignedBufferPtr, retVal)); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_TRUE(buffer->isMemObjZeroCopy()); // non blocking EXPECT_FALSE(mockCommandQueue->bufferCpuCopyAllowed(buffer.get(), CL_COMMAND_NDRANGE_KERNEL, false, size, unalignedHostPtr, 0u, nullptr)); buffer.reset(Buffer::create(context, CL_MEM_USE_HOST_PTR, size, unalignedBufferPtr, retVal)); EXPECT_EQ(retVal, CL_SUCCESS); // zeroCopy == false && aligned hostPtr EXPECT_FALSE(buffer->isReadWriteOnCpuPreferred(alignedHostPtr, MemoryConstants::cacheLineSize + 1, mockDevice->getDevice())); buffer.reset(Buffer::create(mockContext.get(), CL_MEM_ALLOC_HOST_PTR, largeBufferSize, nullptr, retVal)); // platform LP == true && size > 10 MB mockDevice->deviceInfo.platformLP = true; EXPECT_FALSE(buffer->isReadWriteOnCpuPreferred(buffer->getCpuAddress(), largeBufferSize, mockDevice->getDevice())); alignedFree(alignedHostPtr); alignedFree(alignedBufferPtr); } HWTEST_F(ReadWriteBufferCpuCopyTest, givenDebugVariableToDisableCpuCopiesWhenBufferCpuCopyAllowedIsCalledThenItReturnsFalse) { DebugManagerStateRestore restorer; DebugManager.flags.EnableLocalMemory.set(false); DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast(LocalMemoryAccessMode::Default)); cl_int retVal; auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto mockContext = std::make_unique(mockDevice.get()); auto mockCommandQueue = std::make_unique(*mockContext); std::unique_ptr buffer(Buffer::create(context, CL_MEM_ALLOC_HOST_PTR, MemoryConstants::pageSize, nullptr, retVal)); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_TRUE(buffer->isMemObjZeroCopy()); EXPECT_TRUE(mockCommandQueue->bufferCpuCopyAllowed(buffer.get(), CL_COMMAND_READ_BUFFER, true, MemoryConstants::pageSize, reinterpret_cast(0x1000), 0u, nullptr)); EXPECT_TRUE(mockCommandQueue->bufferCpuCopyAllowed(buffer.get(), CL_COMMAND_WRITE_BUFFER, true, MemoryConstants::pageSize, reinterpret_cast(0x1000), 0u, nullptr)); DebugManager.flags.DoCpuCopyOnReadBuffer.set(0); EXPECT_FALSE(mockCommandQueue->bufferCpuCopyAllowed(buffer.get(), CL_COMMAND_READ_BUFFER, true, MemoryConstants::pageSize, reinterpret_cast(0x1000), 0u, nullptr)); EXPECT_TRUE(mockCommandQueue->bufferCpuCopyAllowed(buffer.get(), CL_COMMAND_WRITE_BUFFER, true, MemoryConstants::pageSize, reinterpret_cast(0x1000), 0u, nullptr)); DebugManager.flags.DoCpuCopyOnWriteBuffer.set(0); EXPECT_FALSE(mockCommandQueue->bufferCpuCopyAllowed(buffer.get(), CL_COMMAND_READ_BUFFER, true, MemoryConstants::pageSize, reinterpret_cast(0x1000), 0u, nullptr)); EXPECT_FALSE(mockCommandQueue->bufferCpuCopyAllowed(buffer.get(), CL_COMMAND_WRITE_BUFFER, true, MemoryConstants::pageSize, reinterpret_cast(0x1000), 0u, nullptr)); } TEST(ReadWriteBufferOnCpu, givenNoHostPtrAndAlignedSizeWhenMemoryAllocationIsInNonSystemMemoryPoolThenIsReadWriteOnCpuAllowedReturnsFalse) { DebugManagerStateRestore restorer; DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast(LocalMemoryAccessMode::Default)); MockExecutionEnvironment *executionEnvironment = new MockExecutionEnvironment(nullptr, false, 1u); MockMemoryManager *memoryManager = new MockMemoryManager(*executionEnvironment); executionEnvironment->memoryManager.reset(memoryManager); auto device = std::make_unique(MockDevice::createWithExecutionEnvironment(nullptr, executionEnvironment, 0u)); MockContext ctx(device.get()); cl_int retVal = 0; cl_mem_flags flags = CL_MEM_READ_WRITE; std::unique_ptr buffer(Buffer::create(&ctx, flags, MemoryConstants::pageSize, nullptr, retVal)); ASSERT_NE(nullptr, buffer.get()); EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(device->getDevice())); EXPECT_TRUE(buffer->isReadWriteOnCpuPreferred(reinterpret_cast(0x1000), MemoryConstants::pageSize, device->getDevice())); reinterpret_cast(buffer->getGraphicsAllocation(device->getRootDeviceIndex()))->overrideMemoryPool(MemoryPool::SystemCpuInaccessible); //read write on CPU is allowed, but not preferred. We can access this memory via Lock. EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(device->getDevice())); EXPECT_FALSE(buffer->isReadWriteOnCpuPreferred(reinterpret_cast(0x1000), MemoryConstants::pageSize, device->getDevice())); } TEST(ReadWriteBufferOnCpu, givenPointerThatRequiresCpuCopyWhenCpuCopyIsEvaluatedThenTrueIsReturned) { DebugManagerStateRestore restorer; DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast(LocalMemoryAccessMode::Default)); MockExecutionEnvironment *executionEnvironment = new MockExecutionEnvironment(nullptr, false, 1u); MockMemoryManager *memoryManager = new MockMemoryManager(*executionEnvironment); executionEnvironment->memoryManager.reset(memoryManager); auto device = std::make_unique(MockDevice::createWithExecutionEnvironment(nullptr, executionEnvironment, 0u)); MockContext context(device.get()); cl_int retVal = 0; cl_mem_flags flags = CL_MEM_READ_WRITE; std::unique_ptr buffer(Buffer::create(&context, flags, MemoryConstants::pageSize, nullptr, retVal)); ASSERT_NE(nullptr, buffer.get()); auto mockCommandQueue = std::make_unique(context); EXPECT_FALSE(mockCommandQueue->bufferCpuCopyAllowed(buffer.get(), CL_COMMAND_READ_BUFFER, false, MemoryConstants::pageSize, nullptr, 0u, nullptr)); memoryManager->cpuCopyRequired = true; EXPECT_TRUE(mockCommandQueue->bufferCpuCopyAllowed(buffer.get(), CL_COMMAND_READ_BUFFER, false, MemoryConstants::pageSize, nullptr, 0u, nullptr)); } TEST(ReadWriteBufferOnCpu, givenPointerThatRequiresCpuCopyButItIsNotPossibleWhenCpuCopyIsEvaluatedThenFalseIsReturned) { DebugManagerStateRestore restorer; DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast(LocalMemoryAccessMode::Default)); MockExecutionEnvironment *executionEnvironment = new MockExecutionEnvironment(nullptr, false, 1u); MockMemoryManager *memoryManager = new MockMemoryManager(*executionEnvironment); executionEnvironment->memoryManager.reset(memoryManager); auto device = std::make_unique(MockDevice::createWithExecutionEnvironment(nullptr, executionEnvironment, 0u)); MockContext context(device.get()); cl_int retVal = 0; cl_mem_flags flags = CL_MEM_READ_WRITE; std::unique_ptr buffer(Buffer::create(&context, flags, MemoryConstants::pageSize, nullptr, retVal)); ASSERT_NE(nullptr, buffer.get()); auto mockCommandQueue = std::make_unique(context); buffer->forceDisallowCPUCopy = true; EXPECT_FALSE(mockCommandQueue->bufferCpuCopyAllowed(buffer.get(), CL_COMMAND_READ_BUFFER, true, MemoryConstants::pageSize, nullptr, 0u, nullptr)); memoryManager->cpuCopyRequired = true; EXPECT_FALSE(mockCommandQueue->bufferCpuCopyAllowed(buffer.get(), CL_COMMAND_READ_BUFFER, true, MemoryConstants::pageSize, nullptr, 0u, nullptr)); } TEST(ReadWriteBufferOnCpu, whenLocalMemoryPoolAllocationIsAskedForPreferenceThenCpuIsNotChoosen) { DebugManagerStateRestore restorer; DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast(LocalMemoryAccessMode::Default)); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockContext ctx(device.get()); cl_int retVal = 0; std::unique_ptr buffer(Buffer::create(&ctx, CL_MEM_READ_WRITE, MemoryConstants::pageSize, nullptr, retVal)); ASSERT_NE(nullptr, buffer.get()); reinterpret_cast(buffer->getGraphicsAllocation(device->getRootDeviceIndex()))->overrideMemoryPool(MemoryPool::LocalMemory); EXPECT_FALSE(buffer->isReadWriteOnCpuAllowed(device->getDevice())); EXPECT_FALSE(buffer->isReadWriteOnCpuPreferred(reinterpret_cast(0x1000), MemoryConstants::pageSize, device->getDevice())); } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/sync_buffer_handler_tests.cpp000066400000000000000000000243541422164147700324110ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/program/sync_buffer_handler.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/api/api.h" #include "opencl/test/unit_test/fixtures/enqueue_handler_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_mdi.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "engine_node.h" using namespace NEO; class MockSyncBufferHandler : public SyncBufferHandler { public: using SyncBufferHandler::bufferSize; using SyncBufferHandler::graphicsAllocation; using SyncBufferHandler::usedBufferSize; }; class SyncBufferEnqueueHandlerTest : public EnqueueHandlerTest { public: void SetUp() override { hardwareInfo = *defaultHwInfo; hardwareInfo.capabilityTable.blitterOperationsSupported = true; uint64_t hwInfoConfig = defaultHardwareInfoConfigTable[productFamily]; hardwareInfoSetup[productFamily](&hardwareInfo, true, hwInfoConfig); SetUpImpl(&hardwareInfo); } void TearDown() override { context->decRefInternal(); delete pClDevice; pClDevice = nullptr; pDevice = nullptr; } void SetUpImpl(const NEO::HardwareInfo *hardwareInfo) { pDevice = MockDevice::createWithNewExecutionEnvironment(hardwareInfo); ASSERT_NE(nullptr, pDevice); pClDevice = new MockClDevice{pDevice}; ASSERT_NE(nullptr, pClDevice); auto &commandStreamReceiver = pDevice->getGpgpuCommandStreamReceiver(); pTagMemory = commandStreamReceiver.getTagAddress(); ASSERT_NE(nullptr, const_cast(pTagMemory)); context = new NEO::MockContext(pClDevice); } }; class SyncBufferHandlerTest : public SyncBufferEnqueueHandlerTest { public: void SetUp() override {} void TearDown() override {} template void SetUpT() { SyncBufferEnqueueHandlerTest::SetUp(); kernelInternals = std::make_unique(*pClDevice, context); kernelInternals->kernelInfo.kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless; kernel = kernelInternals->mockKernel; kernel->executionType = KernelExecutionType::Concurrent; commandQueue = reinterpret_cast(new MockCommandQueueHw(context, pClDevice, 0)); hwHelper = &HwHelper::get(pClDevice->getHardwareInfo().platform.eRenderCoreFamily); if (hwHelper->isCooperativeEngineSupported(pClDevice->getHardwareInfo())) { commandQueue->gpgpuEngine = &pClDevice->getEngine(aub_stream::EngineType::ENGINE_CCS, EngineUsage::Cooperative); } } template void TearDownT() { commandQueue->release(); kernelInternals.reset(); SyncBufferEnqueueHandlerTest::TearDown(); } void patchAllocateSyncBuffer() { kernelInternals->kernelInfo.setSyncBuffer(sizeof(uint8_t), 0, 0); } MockSyncBufferHandler *getSyncBufferHandler() { return reinterpret_cast(pDevice->syncBufferHandler.get()); } cl_int enqueueNDCount() { return clEnqueueNDCountKernelINTEL(commandQueue, kernelInternals->mockMultiDeviceKernel, workDim, gwOffset, workgroupCount, lws, 0, nullptr, nullptr); } bool isCooperativeDispatchSupported() { auto engineGroupType = hwHelper->getEngineGroupType(commandQueue->getGpgpuEngine().getEngineType(), commandQueue->getGpgpuEngine().getEngineUsage(), hardwareInfo); return hwHelper->isCooperativeDispatchSupported(engineGroupType, pDevice->getHardwareInfo()); } const cl_uint workDim = 1; const size_t gwOffset[3] = {0, 0, 0}; const size_t workItemsCount = 16; const size_t lws[3] = {workItemsCount, 1, 1}; size_t workgroupCount[3] = {workItemsCount, 1, 1}; std::unique_ptr kernelInternals; MockKernel *kernel; MockCommandQueue *commandQueue; HwHelper *hwHelper; }; HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenAllocateSyncBufferPatchAndConcurrentKernelWhenEnqueuingKernelThenSyncBufferIsUsed) { patchAllocateSyncBuffer(); enqueueNDCount(); auto syncBufferHandler = getSyncBufferHandler(); EXPECT_EQ(workItemsCount, syncBufferHandler->usedBufferSize); commandQueue->flush(); auto pCsr = commandQueue->getGpgpuEngine().commandStreamReceiver; EXPECT_EQ(syncBufferHandler->graphicsAllocation->getTaskCount(pCsr->getOsContext().getContextId()), static_cast *>(pCsr)->latestSentTaskCount); } HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenAllocateSyncBufferPatchAndConcurrentKernelWhenEnqueuingKernelThenSyncBufferOffsetIsProperlyAligned) { patchAllocateSyncBuffer(); workgroupCount[0] = 1; enqueueNDCount(); auto syncBufferHandler = getSyncBufferHandler(); EXPECT_EQ(CommonConstants::maximalSizeOfAtomicType, syncBufferHandler->usedBufferSize); enqueueNDCount(); EXPECT_EQ(2u * CommonConstants::maximalSizeOfAtomicType, syncBufferHandler->usedBufferSize); } HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenConcurrentKernelWithoutAllocateSyncBufferPatchWhenEnqueuingConcurrentKernelThenSyncBufferIsNotCreated) { auto retVal = enqueueNDCount(); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, getSyncBufferHandler()); } HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenDefaultKernelUsingSyncBufferWhenEnqueuingKernelThenErrorIsReturnedAndSyncBufferIsNotCreated) { patchAllocateSyncBuffer(); kernel->executionType = KernelExecutionType::Default; auto retVal = enqueueNDCount(); EXPECT_EQ(CL_INVALID_KERNEL, retVal); EXPECT_EQ(nullptr, getSyncBufferHandler()); } HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenConcurrentKernelWithAllocateSyncBufferPatchWhenEnqueuingConcurrentKernelThenSyncBufferIsCreated) { patchAllocateSyncBuffer(); auto retVal = enqueueNDCount(); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, getSyncBufferHandler()); } HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenMaxWorkgroupCountWhenEnqueuingConcurrentKernelThenSuccessIsReturned) { auto maxWorkGroupCount = kernel->getMaxWorkGroupCount(workDim, lws, commandQueue); workgroupCount[0] = maxWorkGroupCount; auto retVal = enqueueNDCount(); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenTooHighWorkgroupCountWhenEnqueuingConcurrentKernelThenErrorIsReturned) { size_t maxWorkGroupCount = kernel->getMaxWorkGroupCount(workDim, lws, commandQueue); workgroupCount[0] = maxWorkGroupCount + 1; auto retVal = enqueueNDCount(); EXPECT_EQ(CL_INVALID_VALUE, retVal); } HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenSyncBufferFullWhenEnqueuingKernelThenNewBufferIsAllocated) { patchAllocateSyncBuffer(); enqueueNDCount(); auto syncBufferHandler = getSyncBufferHandler(); syncBufferHandler->usedBufferSize = syncBufferHandler->bufferSize; enqueueNDCount(); EXPECT_EQ(workItemsCount, syncBufferHandler->usedBufferSize); } HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenSshRequiredWhenPatchingSyncBufferThenSshIsProperlyPatched) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; kernelInternals->kernelInfo.setBufferAddressingMode(KernelDescriptor::BindfulAndStateless); patchAllocateSyncBuffer(); pDevice->allocateSyncBufferHandler(); auto syncBufferHandler = getSyncBufferHandler(); auto surfaceState = reinterpret_cast(ptrOffset(kernel->getSurfaceStateHeap(), kernel->getKernelInfo().kernelDescriptor.payloadMappings.implicitArgs.syncBufferAddress.bindful)); auto bufferAddress = syncBufferHandler->graphicsAllocation->getGpuAddress(); surfaceState->setSurfaceBaseAddress(bufferAddress + 1); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_NE(bufferAddress, surfaceAddress); kernel->patchSyncBuffer(syncBufferHandler->graphicsAllocation, syncBufferHandler->usedBufferSize); surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(bufferAddress, surfaceAddress); } TEST(SyncBufferHandlerDeviceTest, GivenRootDeviceWhenAllocateSyncBufferIsCalledTwiceThenTheObjectIsCreatedOnlyOnce) { const size_t testUsedBufferSize = 100; MockDevice rootDevice; rootDevice.allocateSyncBufferHandler(); auto syncBufferHandler = reinterpret_cast(rootDevice.syncBufferHandler.get()); ASSERT_NE(syncBufferHandler->usedBufferSize, testUsedBufferSize); syncBufferHandler->usedBufferSize = testUsedBufferSize; rootDevice.allocateSyncBufferHandler(); syncBufferHandler = reinterpret_cast(rootDevice.syncBufferHandler.get()); EXPECT_EQ(testUsedBufferSize, syncBufferHandler->usedBufferSize); } TEST(SyncBufferHandlerDeviceTest, GivenSubDeviceWhenAllocateSyncBufferIsCalledTwiceThenTheObjectIsCreatedOnlyOnce) { UltDeviceFactory ultDeviceFactory{1, 2}; auto pSubDevice = ultDeviceFactory.subDevices[0]; pSubDevice->allocateSyncBufferHandler(); auto syncBufferHandler = reinterpret_cast(pSubDevice->syncBufferHandler.get()); const size_t testUsedBufferSize = 100; ASSERT_NE(syncBufferHandler->usedBufferSize, testUsedBufferSize); syncBufferHandler->usedBufferSize = testUsedBufferSize; pSubDevice->allocateSyncBufferHandler(); syncBufferHandler = reinterpret_cast(pSubDevice->syncBufferHandler.get()); EXPECT_EQ(testUsedBufferSize, syncBufferHandler->usedBufferSize); } TEST(SyncBufferHandlerDeviceTest, givenMultipleSubDevicesWhenAllocatingSyncBufferThenClonePageTables) { UltDeviceFactory ultDeviceFactory{1, 2}; auto rootDevice = ultDeviceFactory.rootDevices[0]; rootDevice->allocateSyncBufferHandler(); auto syncBufferHandler = reinterpret_cast(rootDevice->syncBufferHandler.get()); EXPECT_TRUE(syncBufferHandler->graphicsAllocation->storageInfo.cloningOfPageTables); } compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/work_group_size_tests.cpp000066400000000000000000000331201422164147700316260ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/local_work_size.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "patch_shared.h" using namespace NEO; struct WorkGroupSizeBase { template size_t computeWalkerWorkItems(typename FamilyType::GPGPU_WALKER &pCmd) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; // Compute the SIMD lane mask size_t simd = pCmd.getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD32 ? 32 : pCmd.getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD16 ? 16 : 8; uint64_t simdMask = maxNBitValue(simd); // Mask off lanes based on the execution masks auto laneMaskRight = pCmd.getRightExecutionMask() & simdMask; auto lanesPerThreadX = 0; while (laneMaskRight) { lanesPerThreadX += laneMaskRight & 1; laneMaskRight >>= 1; } auto numWorkItems = ((pCmd.getThreadWidthCounterMaximum() - 1) * simd + lanesPerThreadX) * pCmd.getThreadGroupIdXDimension(); numWorkItems *= pCmd.getThreadGroupIdYDimension(); numWorkItems *= pCmd.getThreadGroupIdZDimension(); return numWorkItems; } template void verify(uint32_t simdSize, size_t dimX, size_t dimY, size_t dimZ) { size_t globalOffsets[] = {0, 0, 0}; size_t workItems[] = { dimX, dimY, dimZ}; int dims = (dimX > 1 ? 1 : 0) + (dimY > 1 ? 1 : 0) + (dimZ > 1 ? 1 : 0); size_t workGroupSize[3]; auto maxWorkGroupSize = 256u; if (DebugManager.flags.EnableComputeWorkSizeND.get()) { WorkSizeInfo wsInfo(maxWorkGroupSize, 0u, simdSize, 0u, ::defaultHwInfo.get(), 32u, 0u, false, false, false); computeWorkgroupSizeND(wsInfo, workGroupSize, workItems, dims); } else { if (dims == 1) { computeWorkgroupSize1D(maxWorkGroupSize, workGroupSize, workItems, simdSize); } else { computeWorkgroupSize2D(maxWorkGroupSize, workGroupSize, workItems, simdSize); } } auto totalWorkItems = workItems[0] * workItems[1] * workItems[2]; auto localWorkItems = workGroupSize[0] * workGroupSize[1] * workGroupSize[2]; EXPECT_GT(localWorkItems, 0u); EXPECT_LE(localWorkItems, 256u); auto xRemainder = workItems[0] % workGroupSize[0]; auto yRemainder = workItems[1] % workGroupSize[1]; auto zRemainder = workItems[2] % workGroupSize[2]; //No remainders EXPECT_EQ(0u, xRemainder); EXPECT_EQ(0u, yRemainder); EXPECT_EQ(0u, zRemainder); //Now setup GPGPU Walker typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; GPGPU_WALKER pCmd = FamilyType::cmdInitGpgpuWalker; const size_t workGroupsStart[3] = {0, 0, 0}; const size_t workGroupsNum[3] = { Math::divideAndRoundUp(workItems[0], workGroupSize[0]), Math::divideAndRoundUp(workItems[1], workGroupSize[1]), Math::divideAndRoundUp(workItems[2], workGroupSize[2])}; KernelDescriptor kd; GpgpuWalkerHelper::setGpgpuWalkerThreadData(&pCmd, kd, globalOffsets, workGroupsStart, workGroupsNum, workGroupSize, simdSize, dims, true, false, 0u); //And check if it is programmed correctly auto numWorkItems = computeWalkerWorkItems(pCmd); EXPECT_EQ(totalWorkItems, numWorkItems); if (xRemainder | yRemainder | zRemainder | (totalWorkItems != numWorkItems)) { std::stringstream regionString; regionString << "workItems = <" << workItems[0] << ", " << workItems[1] << ", " << workItems[2] << ">; "; regionString << "LWS = <" << workGroupSize[0] << ", " << workGroupSize[1] << ", " << workGroupSize[2] << ">; "; regionString << "thread = <" << pCmd.getThreadGroupIdXDimension() << ", " << pCmd.getThreadGroupIdYDimension() << ", " << pCmd.getThreadGroupIdZDimension() << ">; "; regionString << "threadWidth = " << std::dec << pCmd.getThreadWidthCounterMaximum() << std::dec << "; "; regionString << "rightMask = " << std::hex << pCmd.getRightExecutionMask() << std::dec << "; "; EXPECT_FALSE(true) << regionString.str(); } } }; struct WorkGroupSizeChannels : public WorkGroupSizeBase, public ::testing::TestWithParam> { }; HWCMDTEST_P(IGFX_GEN8_CORE, WorkGroupSizeChannels, GivenAllChannelsWhenEnablingComputeWorkSizeNDDefaultThenSizeAndDimAreCorrect) { uint32_t simdSize; size_t workDim; std::tie(simdSize, workDim) = GetParam(); verify(simdSize, workDim, workDim, workDim); } HWCMDTEST_P(IGFX_GEN8_CORE, WorkGroupSizeChannels, GivenAllChannelsWhenEnablingComputeWorkSizeNDEnabledThenSizeAndDimAreCorrect) { bool isWorkGroupSizeEnabled = DebugManager.flags.EnableComputeWorkSizeND.get(); DebugManager.flags.EnableComputeWorkSizeND.set(true); uint32_t simdSize; size_t workDim; std::tie(simdSize, workDim) = GetParam(); verify(simdSize, workDim, workDim, workDim); DebugManager.flags.EnableComputeWorkSizeND.set(isWorkGroupSizeEnabled); } HWCMDTEST_P(IGFX_GEN8_CORE, WorkGroupSizeChannels, GivenAllChannelsWhenEnablingComputeWorkSizeNDDisabledThenSizeAndDimAreCorrect) { bool isWorkGroupSizeEnabled = DebugManager.flags.EnableComputeWorkSizeND.get(); DebugManager.flags.EnableComputeWorkSizeND.set(false); uint32_t simdSize; size_t workDim; std::tie(simdSize, workDim) = GetParam(); verify(simdSize, workDim, workDim, workDim); DebugManager.flags.EnableComputeWorkSizeND.set(isWorkGroupSizeEnabled); } HWCMDTEST_P(IGFX_GEN8_CORE, WorkGroupSizeChannels, GivenAllChannelsWhenEnablingComputeWorkSizeSquaredDefaultThenSizeAndDimAreCorrect) { uint32_t simdSize; size_t workDim; std::tie(simdSize, workDim) = GetParam(); verify(simdSize, workDim, workDim, workDim); } HWCMDTEST_P(IGFX_GEN8_CORE, WorkGroupSizeChannels, GivenAllChannelsWhenEnablingComputeWorkSizeSquaredEnabledThenSizeAndDimAreCorrect) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(true); DebugManager.flags.EnableComputeWorkSizeND.set(false); uint32_t simdSize; size_t workDim; std::tie(simdSize, workDim) = GetParam(); verify(simdSize, workDim, workDim, workDim); } HWCMDTEST_P(IGFX_GEN8_CORE, WorkGroupSizeChannels, GivenAllChannelsWhenEnablingComputeWorkSizeSquaredDisabledThenSizeAndDimAreCorrect) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(false); DebugManager.flags.EnableComputeWorkSizeND.set(false); uint32_t simdSize; size_t workDim; std::tie(simdSize, workDim) = GetParam(); verify(simdSize, workDim, workDim, workDim); } HWCMDTEST_P(IGFX_GEN8_CORE, WorkGroupSizeChannels, GivenOnlyXWhenEnablingComputeWorkSizeNDDefaultThenSizeAndDimAreCorrect) { uint32_t simdSize; size_t workDim; std::tie(simdSize, workDim) = GetParam(); verify(simdSize, workDim, 1, 1); } HWCMDTEST_P(IGFX_GEN8_CORE, WorkGroupSizeChannels, GivenOnlyXWhenEnablingComputeWorkSizeNDEnabledThenSizeAndDimAreCorrect) { bool isWorkGroupSizeEnabled = DebugManager.flags.EnableComputeWorkSizeND.get(); DebugManager.flags.EnableComputeWorkSizeND.set(true); uint32_t simdSize; size_t workDim; std::tie(simdSize, workDim) = GetParam(); verify(simdSize, workDim, 1, 1); DebugManager.flags.EnableComputeWorkSizeND.set(isWorkGroupSizeEnabled); } HWCMDTEST_P(IGFX_GEN8_CORE, WorkGroupSizeChannels, GivenOnlyXWhenEnablingComputeWorkSizeNDDisabledThenSizeAndDimAreCorrect) { bool isWorkGroupSizeEnabled = DebugManager.flags.EnableComputeWorkSizeND.get(); DebugManager.flags.EnableComputeWorkSizeND.set(false); uint32_t simdSize; size_t workDim; std::tie(simdSize, workDim) = GetParam(); verify(simdSize, workDim, 1, 1); DebugManager.flags.EnableComputeWorkSizeND.set(isWorkGroupSizeEnabled); } HWCMDTEST_P(IGFX_GEN8_CORE, WorkGroupSizeChannels, GivenOnlyYWhenEnablingComputeWorkSizeNDDefaultThenSizeAndDimAreCorrect) { uint32_t simdSize; size_t workDim; std::tie(simdSize, workDim) = GetParam(); verify(simdSize, 1, workDim, 1); } HWCMDTEST_P(IGFX_GEN8_CORE, WorkGroupSizeChannels, GivenOnlyYWhenEnablingComputeWorkSizeNDEnabledThenSizeAndDimAreCorrect) { bool isWorkGroupSizeEnabled = DebugManager.flags.EnableComputeWorkSizeND.get(); DebugManager.flags.EnableComputeWorkSizeND.set(true); uint32_t simdSize; size_t workDim; std::tie(simdSize, workDim) = GetParam(); verify(simdSize, 1, workDim, 1); DebugManager.flags.EnableComputeWorkSizeND.set(isWorkGroupSizeEnabled); } HWCMDTEST_P(IGFX_GEN8_CORE, WorkGroupSizeChannels, GivenOnlyYWhenEnablingComputeWorkSizeNDDisabledThenSizeAndDimAreCorrect) { bool isWorkGroupSizeEnabled = DebugManager.flags.EnableComputeWorkSizeND.get(); DebugManager.flags.EnableComputeWorkSizeND.set(false); uint32_t simdSize; size_t workDim; std::tie(simdSize, workDim) = GetParam(); verify(simdSize, 1, workDim, 1); DebugManager.flags.EnableComputeWorkSizeND.set(isWorkGroupSizeEnabled); } HWCMDTEST_P(IGFX_GEN8_CORE, WorkGroupSizeChannels, GivenOnlyZWhenEnablingComputeWorkSizeNDDefaultThenSizeAndDimAreCorrect) { uint32_t simdSize; size_t workDim; std::tie(simdSize, workDim) = GetParam(); verify(simdSize, 1, 1, workDim); } HWCMDTEST_P(IGFX_GEN8_CORE, WorkGroupSizeChannels, GivenOnlyZWhenEnablingComputeWorkSizeNDEnabledThenSizeAndDimAreCorrect) { bool isWorkGroupSizeEnabled = DebugManager.flags.EnableComputeWorkSizeND.get(); DebugManager.flags.EnableComputeWorkSizeND.set(true); uint32_t simdSize; size_t workDim; std::tie(simdSize, workDim) = GetParam(); verify(simdSize, 1, 1, workDim); DebugManager.flags.EnableComputeWorkSizeND.set(isWorkGroupSizeEnabled); } HWCMDTEST_P(IGFX_GEN8_CORE, WorkGroupSizeChannels, GivenOnlyZWhenEnablingComputeWorkSizeNDDisabledThenSizeAndDimAreCorrect) { bool isWorkGroupSizeEnabled = DebugManager.flags.EnableComputeWorkSizeND.get(); DebugManager.flags.EnableComputeWorkSizeND.set(false); uint32_t simdSize; size_t workDim; std::tie(simdSize, workDim) = GetParam(); verify(simdSize, 1, 1, workDim); DebugManager.flags.EnableComputeWorkSizeND.set(isWorkGroupSizeEnabled); } static uint32_t simdSizes[] = { 8, 16, 32}; static size_t workItemCases1D[] = { 1, 2, 3, 4, 5, 7, 8, 9, 15, 16, 17, 31, 32, 33, 63, 64, 65, 127, 128, 129, 189, 190, 191, 255, 256, 257, 399, 400, 401, 511, 512, 513, 1007, 1008, 1009, 1023, 1024, 1025, 1400, 1401, 1402}; INSTANTIATE_TEST_CASE_P(wgs, WorkGroupSizeChannels, ::testing::Combine( ::testing::ValuesIn(simdSizes), ::testing::ValuesIn(workItemCases1D))); // ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== // ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== struct WorkGroupSize2D : public WorkGroupSizeBase, public ::testing::TestWithParam> { }; HWCMDTEST_P(IGFX_GEN8_CORE, WorkGroupSize2D, GivenUsingXandYWorkGroupDimensionsWhenComputingWorkgroupSizeThenSizeAndDimAreCorrect) { uint32_t simdSize; size_t dimX, dimY; std::tie(simdSize, dimX, dimY) = GetParam(); verify(simdSize, dimX, dimY, 1); } static size_t workItemCases2D[] = {1, 2, 3, 7, 15, 31, 63, 127, 255, 511, 1007, 1023, 2047}; INSTANTIATE_TEST_CASE_P(wgs, WorkGroupSize2D, ::testing::Combine( ::testing::ValuesIn(simdSizes), ::testing::ValuesIn(workItemCases2D), ::testing::ValuesIn(workItemCases2D))); // ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== // ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== struct Region { size_t r[3]; }; struct WorkGroupSizeRegion : public WorkGroupSizeBase, public ::testing::TestWithParam> { }; HWCMDTEST_P(IGFX_GEN8_CORE, WorkGroupSizeRegion, GivenUsingAllChannelsWhenComputingWorkgroupSizeThenSizeAndDimAreCorrect) { uint32_t simdSize; Region region; std::tie(simdSize, region) = GetParam(); verify(simdSize, region.r[0], region.r[1], region.r[2]); } Region regionCases[] = { {{1, 1, 1}}, // Trivial case {{9, 9, 10}} // This test case was hit by some AUBCopyBufferRect regressions }; INSTANTIATE_TEST_CASE_P(wgs, WorkGroupSizeRegion, ::testing::Combine( ::testing::ValuesIn(simdSizes), ::testing::ValuesIn(regionCases))); compute-runtime-22.14.22890/opencl/test/unit_test/command_queue/zero_size_enqueue_tests.cpp000066400000000000000000001267371422164147700321570ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/event/event.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" using namespace NEO; class ZeroSizeEnqueueHandlerTest : public Test { public: MockContext context; cl_int retVal; }; class ZeroSizeEnqueueHandlerTestZeroGws : public ZeroSizeEnqueueHandlerTest { public: void SetUp() override { ZeroSizeEnqueueHandlerTest::SetUp(); testGwsInputs[0] = std::make_tuple(1, nullptr); testGwsInputs[1] = std::make_tuple(2, nullptr); testGwsInputs[2] = std::make_tuple(3, nullptr); testGwsInputs[3] = std::make_tuple(1, zeroGWS0); testGwsInputs[4] = std::make_tuple(2, zeroGWS00); testGwsInputs[5] = std::make_tuple(2, zeroGWS01); testGwsInputs[6] = std::make_tuple(2, zeroGWS10); testGwsInputs[7] = std::make_tuple(3, zeroGWS000); testGwsInputs[8] = std::make_tuple(3, zeroGWS011); testGwsInputs[9] = std::make_tuple(3, zeroGWS101); testGwsInputs[10] = std::make_tuple(3, zeroGWS110); testGwsInputs[11] = std::make_tuple(3, zeroGWS001); testGwsInputs[12] = std::make_tuple(3, zeroGWS010); testGwsInputs[13] = std::make_tuple(3, zeroGWS100); } size_t zeroGWS0[1] = {0}; size_t zeroGWS00[2] = {0, 0}; size_t zeroGWS01[2] = {0, 1}; size_t zeroGWS10[2] = {1, 0}; size_t zeroGWS000[3] = {0, 0, 0}; size_t zeroGWS011[3] = {0, 1, 1}; size_t zeroGWS101[3] = {1, 0, 1}; size_t zeroGWS110[3] = {1, 1, 0}; size_t zeroGWS001[3] = {0, 0, 1}; size_t zeroGWS010[3] = {0, 1, 0}; size_t zeroGWS100[3] = {1, 0, 0}; std::tuple testGwsInputs[14]; }; HWTEST_F(ZeroSizeEnqueueHandlerTestZeroGws, GivenZeroSizeEnqueueIsDetectedWhenEnqueingKernelThenCommandMarkerShouldBeEnqueued) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); MockKernelWithInternals mockKernel(*pClDevice); for (auto testInput : testGwsInputs) { auto workDim = std::get<0>(testInput); auto gws = std::get<1>(testInput); mockCmdQ->lastCommandType = static_cast(CL_COMMAND_COPY_BUFFER); retVal = mockCmdQ->enqueueKernel(mockKernel.mockKernel, workDim, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); } } HWTEST_F(ZeroSizeEnqueueHandlerTestZeroGws, GivenZeroSizeEnqueueIsDetectedAndLocalWorkSizeIsSetWhenEnqueingKernelThenNoExceptionIsThrown) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); MockKernelWithInternals mockKernel(*pClDevice); mockKernel.mockProgram->setAllowNonUniform(true); auto workDim = 1; auto gws = zeroGWS0; size_t lws[1] = {1}; EXPECT_NO_THROW(retVal = mockCmdQ->enqueueKernel(mockKernel.mockKernel, workDim, nullptr, gws, lws, 0, nullptr, nullptr)); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenEnqueingKernelThenEventCommandTypeShoudBeUnchanged) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); cl_event event; MockKernelWithInternals mockKernel(*pClDevice); size_t zeroGWS[] = {0, 0, 0}; mockCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, zeroGWS, nullptr, 0, nullptr, &event); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); auto pEvent = (Event *)event; cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_NDRANGE_KERNEL), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); delete pEvent; } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenReadingBufferThenCommandMarkerShouldBeEnqueued) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); MockBuffer buffer; size_t memory[1]; size_t zeroSize = 0; mockCmdQ->enqueueReadBuffer(&buffer, CL_FALSE, 0, zeroSize, memory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenReadingBufferThenEventCommandTypeShouldBeUnchanged) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); cl_event event; MockBuffer buffer; size_t memory[1]; size_t zeroSize = 0; mockCmdQ->enqueueReadBuffer(&buffer, CL_FALSE, 0, zeroSize, memory, nullptr, 0, nullptr, &event); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); auto pEvent = (Event *)event; cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_READ_BUFFER), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); delete pEvent; } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenReadingBufferRectThenCommandMarkerShouldBeEnqueued) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); MockBuffer buffer; size_t memory[1]; size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t zeroRegion000[] = {0, 0, 0}; mockCmdQ->enqueueReadBufferRect(&buffer, CL_FALSE, bufferOrigin, hostOrigin, zeroRegion000, 0, 0, 0, 0, memory, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion011[] = {0, 1, 1}; mockCmdQ->enqueueReadBufferRect(&buffer, CL_FALSE, bufferOrigin, hostOrigin, zeroRegion011, 0, 0, 0, 0, memory, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion101[] = {1, 0, 1}; mockCmdQ->enqueueReadBufferRect(&buffer, CL_FALSE, bufferOrigin, hostOrigin, zeroRegion101, 0, 0, 0, 0, memory, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion110[] = {1, 1, 0}; mockCmdQ->enqueueReadBufferRect(&buffer, CL_FALSE, bufferOrigin, hostOrigin, zeroRegion110, 0, 0, 0, 0, memory, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion001[] = {0, 0, 1}; mockCmdQ->enqueueReadBufferRect(&buffer, CL_FALSE, bufferOrigin, hostOrigin, zeroRegion001, 0, 0, 0, 0, memory, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion010[] = {0, 1, 0}; mockCmdQ->enqueueReadBufferRect(&buffer, CL_FALSE, bufferOrigin, hostOrigin, zeroRegion010, 0, 0, 0, 0, memory, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion100[] = {1, 0, 0}; mockCmdQ->enqueueReadBufferRect(&buffer, CL_FALSE, bufferOrigin, hostOrigin, zeroRegion100, 0, 0, 0, 0, memory, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenReadingBufferRectThenEventCommandTypeShouldBeUnchanged) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); cl_event event; MockBuffer buffer; size_t memory[1]; size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t zeroRegion[] = {0, 0, 0}; mockCmdQ->enqueueReadBufferRect(&buffer, CL_FALSE, bufferOrigin, hostOrigin, zeroRegion, 0, 0, 0, 0, memory, 0, nullptr, &event); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); auto pEvent = (Event *)event; cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_READ_BUFFER_RECT), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); delete pEvent; } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenWritingBufferThenCommandMarkerShouldBeEnqueued) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); MockBuffer buffer; size_t memory[1]; size_t zeroSize = 0; mockCmdQ->enqueueWriteBuffer(&buffer, CL_FALSE, 0, zeroSize, memory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenWritingBufferThenEventCommandTypeShouldBeUnchanged) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); cl_event event; MockBuffer buffer; size_t memory[1]; size_t zeroSize = 0; mockCmdQ->enqueueWriteBuffer(&buffer, CL_FALSE, 0, zeroSize, memory, nullptr, 0, nullptr, &event); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); auto pEvent = (Event *)event; cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_WRITE_BUFFER), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); delete pEvent; } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenWritingBufferRectThenCommandMarkerShouldBeEnqueued) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); MockBuffer buffer; size_t memory[1]; size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t zeroRegion000[] = {0, 0, 0}; mockCmdQ->enqueueWriteBufferRect(&buffer, CL_FALSE, bufferOrigin, hostOrigin, zeroRegion000, 0, 0, 0, 0, memory, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion011[] = {0, 1, 1}; mockCmdQ->enqueueWriteBufferRect(&buffer, CL_FALSE, bufferOrigin, hostOrigin, zeroRegion011, 0, 0, 0, 0, memory, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion101[] = {1, 0, 1}; mockCmdQ->enqueueWriteBufferRect(&buffer, CL_FALSE, bufferOrigin, hostOrigin, zeroRegion101, 0, 0, 0, 0, memory, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion110[] = {1, 1, 0}; mockCmdQ->enqueueWriteBufferRect(&buffer, CL_FALSE, bufferOrigin, hostOrigin, zeroRegion110, 0, 0, 0, 0, memory, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion001[] = {0, 0, 1}; mockCmdQ->enqueueWriteBufferRect(&buffer, CL_FALSE, bufferOrigin, hostOrigin, zeroRegion001, 0, 0, 0, 0, memory, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion010[] = {0, 1, 0}; mockCmdQ->enqueueWriteBufferRect(&buffer, CL_FALSE, bufferOrigin, hostOrigin, zeroRegion010, 0, 0, 0, 0, memory, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion100[] = {1, 0, 0}; mockCmdQ->enqueueWriteBufferRect(&buffer, CL_FALSE, bufferOrigin, hostOrigin, zeroRegion100, 0, 0, 0, 0, memory, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenWritingBufferRectThenEventCommandTypeShouldBeUnchanged) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); cl_event event; MockBuffer buffer; size_t memory[1]; size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t zeroRegion[] = {0, 0, 0}; mockCmdQ->enqueueWriteBufferRect(&buffer, CL_FALSE, bufferOrigin, hostOrigin, zeroRegion, 0, 0, 0, 0, memory, 0, nullptr, &event); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); auto pEvent = (Event *)event; cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_WRITE_BUFFER_RECT), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); delete pEvent; } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenCopyingBufferThenCommandMarkerShouldBeEnqueued) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); MockBuffer srcBuffer; MockBuffer dstBuffer; size_t zeroSize = 0; mockCmdQ->enqueueCopyBuffer(&srcBuffer, &dstBuffer, 0, 0, zeroSize, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenCopyingBufferThenEventCommandTypeShouldBeUnchanged) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); cl_event event; MockBuffer srcBuffer; MockBuffer dstBuffer; size_t zeroSize = 0; mockCmdQ->enqueueCopyBuffer(&srcBuffer, &dstBuffer, 0, 0, zeroSize, 0, nullptr, &event); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); auto pEvent = (Event *)event; cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_COPY_BUFFER), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); delete pEvent; } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenCopyingBufferRectThenCommandMarkerShouldBeEnqueued) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); MockBuffer srcBuffer; MockBuffer dstBuffer; size_t srcOrigin[3] = {1024u, 1, 0}; size_t dstOrigin[3] = {1024u, 1, 0}; size_t zeroRegion000[3] = {0, 0, 0}; mockCmdQ->enqueueCopyBufferRect(&srcBuffer, &dstBuffer, srcOrigin, dstOrigin, zeroRegion000, 0, 0, 0, 0, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion011[3] = {0, 1, 1}; mockCmdQ->enqueueCopyBufferRect(&srcBuffer, &dstBuffer, srcOrigin, dstOrigin, zeroRegion011, 0, 0, 0, 0, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion101[3] = {1, 0, 1}; mockCmdQ->enqueueCopyBufferRect(&srcBuffer, &dstBuffer, srcOrigin, dstOrigin, zeroRegion101, 0, 0, 0, 0, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion110[3] = {1, 1, 0}; mockCmdQ->enqueueCopyBufferRect(&srcBuffer, &dstBuffer, srcOrigin, dstOrigin, zeroRegion110, 0, 0, 0, 0, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion001[3] = {0, 0, 1}; mockCmdQ->enqueueCopyBufferRect(&srcBuffer, &dstBuffer, srcOrigin, dstOrigin, zeroRegion001, 0, 0, 0, 0, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion010[3] = {0, 1, 0}; mockCmdQ->enqueueCopyBufferRect(&srcBuffer, &dstBuffer, srcOrigin, dstOrigin, zeroRegion010, 0, 0, 0, 0, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion100[3] = {1, 0, 0}; mockCmdQ->enqueueCopyBufferRect(&srcBuffer, &dstBuffer, srcOrigin, dstOrigin, zeroRegion100, 0, 0, 0, 0, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); } HWTEST_F(ZeroSizeEnqueueHandlerTest, WhenCopyingBufferZeroSizeEnqueueIsDetectedWhenCopyingBufferRectThenEventCommandTypeShouldBeUnchanged) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); cl_event event; MockBuffer srcBuffer; MockBuffer dstBuffer; size_t srcOrigin[3] = {1024u, 1, 0}; size_t dstOrigin[3] = {1024u, 1, 0}; size_t zeroRegion[3] = {0, 0, 0}; mockCmdQ->enqueueCopyBufferRect(&srcBuffer, &dstBuffer, srcOrigin, dstOrigin, zeroRegion, 0, 0, 0, 0, 0, nullptr, &event); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); auto pEvent = (Event *)event; cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_COPY_BUFFER_RECT), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); delete pEvent; } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenFillingBufferThenCommandMarkerShouldBeEnqueued) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); MockBuffer buffer; cl_int pattern = 0xDEADBEEF; size_t zeroSize = 0; mockCmdQ->enqueueFillBuffer(&buffer, &pattern, sizeof(pattern), 0, zeroSize, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenFillingBufferThenEventCommandTypeShouldBeUnchanged) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); cl_event event; MockBuffer buffer; cl_int pattern = 0xDEADBEEF; size_t zeroSize = 0; mockCmdQ->enqueueFillBuffer(&buffer, &pattern, sizeof(pattern), 0, zeroSize, 0, nullptr, &event); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); auto pEvent = (Event *)event; cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_FILL_BUFFER), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); delete pEvent; } struct ZeroSizeEnqueueHandlerImageTest : ZeroSizeEnqueueHandlerTest { void SetUp() override { REQUIRE_IMAGES_OR_SKIP(defaultHwInfo); ZeroSizeEnqueueHandlerTest::SetUp(); } }; HWTEST_F(ZeroSizeEnqueueHandlerImageTest, GivenZeroSizeEnqueueIsDetectedWhenReadingImageThenCommandMarkerShouldBeEnqueued) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); std::unique_ptr image(Image2dHelper<>::create(&context)); size_t memory[1]; size_t origin[3] = {1024u, 1, 0}; size_t zeroRegion000[3] = {0, 0, 0}; mockCmdQ->enqueueReadImage(image.get(), CL_FALSE, origin, zeroRegion000, 0, 0, memory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion011[3] = {0, 1, 1}; mockCmdQ->enqueueReadImage(image.get(), CL_FALSE, origin, zeroRegion011, 0, 0, memory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion101[3] = {1, 0, 1}; mockCmdQ->enqueueReadImage(image.get(), CL_FALSE, origin, zeroRegion101, 0, 0, memory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion110[3] = {1, 1, 0}; mockCmdQ->enqueueReadImage(image.get(), CL_FALSE, origin, zeroRegion110, 0, 0, memory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion001[3] = {0, 0, 1}; mockCmdQ->enqueueReadImage(image.get(), CL_FALSE, origin, zeroRegion001, 0, 0, memory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion010[3] = {0, 1, 0}; mockCmdQ->enqueueReadImage(image.get(), CL_FALSE, origin, zeroRegion010, 0, 0, memory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion100[3] = {1, 0, 0}; mockCmdQ->enqueueReadImage(image.get(), CL_FALSE, origin, zeroRegion100, 0, 0, memory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); } HWTEST_F(ZeroSizeEnqueueHandlerImageTest, GivenZeroSizeEnqueueIsDetectedWhenReadingImageThenEventCommandTypeShouldBeUnchanged) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); cl_event event; std::unique_ptr image(Image2dHelper<>::create(&context)); size_t memory[1]; size_t origin[3] = {1024u, 1, 0}; size_t zeroRegion[3] = {0, 0, 0}; mockCmdQ->enqueueReadImage(image.get(), CL_FALSE, origin, zeroRegion, 0, 0, memory, nullptr, 0, nullptr, &event); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); auto pEvent = (Event *)event; cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_READ_IMAGE), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); delete pEvent; } HWTEST_F(ZeroSizeEnqueueHandlerImageTest, GivenZeroSizeEnqueueIsDetectedWhenWritingImageThenCommandMarkerShouldBeEnqueued) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); std::unique_ptr image(Image2dHelper<>::create(&context)); size_t memory[1]; size_t origin[3] = {1024u, 1, 0}; size_t zeroRegion000[3] = {0, 0, 0}; mockCmdQ->enqueueWriteImage(image.get(), CL_FALSE, origin, zeroRegion000, 0, 0, memory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion011[3] = {0, 1, 1}; mockCmdQ->enqueueWriteImage(image.get(), CL_FALSE, origin, zeroRegion011, 0, 0, memory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion101[3] = {1, 0, 1}; mockCmdQ->enqueueWriteImage(image.get(), CL_FALSE, origin, zeroRegion101, 0, 0, memory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion110[3] = {1, 1, 0}; mockCmdQ->enqueueWriteImage(image.get(), CL_FALSE, origin, zeroRegion110, 0, 0, memory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion001[3] = {0, 0, 1}; mockCmdQ->enqueueWriteImage(image.get(), CL_FALSE, origin, zeroRegion001, 0, 0, memory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion010[3] = {0, 1, 0}; mockCmdQ->enqueueWriteImage(image.get(), CL_FALSE, origin, zeroRegion010, 0, 0, memory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion100[3] = {1, 0, 0}; mockCmdQ->enqueueWriteImage(image.get(), CL_FALSE, origin, zeroRegion100, 0, 0, memory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); } HWTEST_F(ZeroSizeEnqueueHandlerImageTest, GivenZeroSizeEnqueueIsDetectedWhenWritingImageThenEventCommandTypeShouldBeUnchanged) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); cl_event event; std::unique_ptr image(Image2dHelper<>::create(&context)); size_t memory[1]; size_t origin[3] = {1024u, 1, 0}; size_t zeroRegion[3] = {0, 0, 0}; mockCmdQ->enqueueWriteImage(image.get(), CL_FALSE, origin, zeroRegion, 0, 0, memory, nullptr, 0, nullptr, &event); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); auto pEvent = (Event *)event; cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_WRITE_IMAGE), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); delete pEvent; } HWTEST_F(ZeroSizeEnqueueHandlerImageTest, GivenZeroSizeEnqueueIsDetectedWhenCopyingImageThenCommandMarkerShouldBeEnqueued) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); std::unique_ptr srcImage(Image2dHelper<>::create(&context)); std::unique_ptr dstImage(Image2dHelper<>::create(&context)); size_t srcOrigin[3] = {1024u, 1, 0}; size_t dstOrigin[3] = {1024u, 1, 0}; size_t zeroRegion000[3] = {0, 0, 0}; mockCmdQ->enqueueCopyImage(srcImage.get(), dstImage.get(), srcOrigin, dstOrigin, zeroRegion000, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion011[3] = {0, 1, 1}; mockCmdQ->enqueueCopyImage(srcImage.get(), dstImage.get(), srcOrigin, dstOrigin, zeroRegion011, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion101[3] = {1, 0, 1}; mockCmdQ->enqueueCopyImage(srcImage.get(), dstImage.get(), srcOrigin, dstOrigin, zeroRegion101, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion110[3] = {1, 1, 0}; mockCmdQ->enqueueCopyImage(srcImage.get(), dstImage.get(), srcOrigin, dstOrigin, zeroRegion110, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion001[3] = {0, 0, 1}; mockCmdQ->enqueueCopyImage(srcImage.get(), dstImage.get(), srcOrigin, dstOrigin, zeroRegion001, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion010[3] = {0, 1, 0}; mockCmdQ->enqueueCopyImage(srcImage.get(), dstImage.get(), srcOrigin, dstOrigin, zeroRegion010, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion100[3] = {1, 0, 0}; mockCmdQ->enqueueCopyImage(srcImage.get(), dstImage.get(), srcOrigin, dstOrigin, zeroRegion100, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); } HWTEST_F(ZeroSizeEnqueueHandlerImageTest, GivenZeroSizeEnqueueIsDetectedWhenCopyingImageThenEventCommandTypeShouldBeUnchanged) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); cl_event event; std::unique_ptr srcImage(Image2dHelper<>::create(&context)); std::unique_ptr dstImage(Image2dHelper<>::create(&context)); size_t srcOrigin[3] = {1024u, 1, 0}; size_t dstOrigin[3] = {1024u, 1, 0}; size_t zeroRegion[3] = {0, 0, 0}; mockCmdQ->enqueueCopyImage(srcImage.get(), dstImage.get(), srcOrigin, dstOrigin, zeroRegion, 0, nullptr, &event); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); auto pEvent = (Event *)event; cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_COPY_IMAGE), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); delete pEvent; } HWTEST_F(ZeroSizeEnqueueHandlerImageTest, GivenZeroSizeEnqueueIsDetectedWhenCopyingImageToBufferThenCommandMarkerShouldBeEnqueued) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); std::unique_ptr srcImage(Image2dHelper<>::create(&context)); std::unique_ptr dstBuffer(Buffer::create(&context, CL_MEM_READ_WRITE, 1024u, nullptr, retVal)); size_t srcOrigin[3] = {1024u, 1, 0}; size_t zeroRegion000[3] = {0, 0, 0}; mockCmdQ->enqueueCopyImageToBuffer(srcImage.get(), dstBuffer.get(), srcOrigin, zeroRegion000, 0, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion011[3] = {0, 1, 1}; mockCmdQ->enqueueCopyImageToBuffer(srcImage.get(), dstBuffer.get(), srcOrigin, zeroRegion011, 0, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion101[3] = {1, 0, 1}; mockCmdQ->enqueueCopyImageToBuffer(srcImage.get(), dstBuffer.get(), srcOrigin, zeroRegion101, 0, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion110[3] = {1, 1, 0}; mockCmdQ->enqueueCopyImageToBuffer(srcImage.get(), dstBuffer.get(), srcOrigin, zeroRegion110, 0, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion001[3] = {0, 0, 1}; mockCmdQ->enqueueCopyImageToBuffer(srcImage.get(), dstBuffer.get(), srcOrigin, zeroRegion001, 0, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion010[3] = {0, 1, 0}; mockCmdQ->enqueueCopyImageToBuffer(srcImage.get(), dstBuffer.get(), srcOrigin, zeroRegion010, 0, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion100[3] = {1, 0, 0}; mockCmdQ->enqueueCopyImageToBuffer(srcImage.get(), dstBuffer.get(), srcOrigin, zeroRegion100, 0, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); } HWTEST_F(ZeroSizeEnqueueHandlerImageTest, GivenZeroSizeEnqueueIsDetectedWhenCopyingImageToBufferThenEventCommandTypeShouldBeUnchanged) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); cl_event event; std::unique_ptr srcImage(Image2dHelper<>::create(&context)); std::unique_ptr dstBuffer(Buffer::create(&context, CL_MEM_READ_WRITE, 1024u, nullptr, retVal)); size_t srcOrigin[3] = {1024u, 1, 0}; size_t zeroRegion[3] = {0, 0, 0}; mockCmdQ->enqueueCopyImageToBuffer(srcImage.get(), dstBuffer.get(), srcOrigin, zeroRegion, 0, 0, nullptr, &event); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); auto pEvent = (Event *)event; cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_COPY_IMAGE_TO_BUFFER), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); delete pEvent; } HWTEST_F(ZeroSizeEnqueueHandlerImageTest, GivenZeroSizeEnqueueIsDetectedWhenCopyingBufferToImageThenCommandMarkerShouldBeEnqueued) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); std::unique_ptr srcBuffer(Buffer::create(&context, CL_MEM_READ_WRITE, 1024u, nullptr, retVal)); std::unique_ptr dstImage(Image2dHelper<>::create(&context)); size_t dstOrigin[3] = {1024u, 1, 0}; size_t zeroRegion000[3] = {0, 0, 0}; mockCmdQ->enqueueCopyBufferToImage(srcBuffer.get(), dstImage.get(), 0, dstOrigin, zeroRegion000, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion011[3] = {0, 1, 1}; mockCmdQ->enqueueCopyBufferToImage(srcBuffer.get(), dstImage.get(), 0, dstOrigin, zeroRegion011, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion101[3] = {1, 0, 1}; mockCmdQ->enqueueCopyBufferToImage(srcBuffer.get(), dstImage.get(), 0, dstOrigin, zeroRegion101, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion110[3] = {1, 1, 0}; mockCmdQ->enqueueCopyBufferToImage(srcBuffer.get(), dstImage.get(), 0, dstOrigin, zeroRegion110, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion001[3] = {0, 0, 1}; mockCmdQ->enqueueCopyBufferToImage(srcBuffer.get(), dstImage.get(), 0, dstOrigin, zeroRegion001, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion010[3] = {0, 1, 0}; mockCmdQ->enqueueCopyBufferToImage(srcBuffer.get(), dstImage.get(), 0, dstOrigin, zeroRegion010, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion100[3] = {1, 0, 0}; mockCmdQ->enqueueCopyBufferToImage(srcBuffer.get(), dstImage.get(), 0, dstOrigin, zeroRegion100, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); } HWTEST_F(ZeroSizeEnqueueHandlerImageTest, GivenZeroSizeEnqueueIsDetectedWhenCopyingBufferToImageThenEventCommandTypeShouldBeUnchanged) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); cl_event event; std::unique_ptr srcBuffer(Buffer::create(&context, CL_MEM_READ_WRITE, 1024u, nullptr, retVal)); std::unique_ptr dstImage(Image2dHelper<>::create(&context)); size_t dstOrigin[3] = {1024u, 1, 0}; size_t zeroRegion[3] = {0, 0, 0}; mockCmdQ->enqueueCopyBufferToImage(srcBuffer.get(), dstImage.get(), 0, dstOrigin, zeroRegion, 0, nullptr, &event); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); auto pEvent = (Event *)event; cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_COPY_BUFFER_TO_IMAGE), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); delete pEvent; } HWTEST_F(ZeroSizeEnqueueHandlerImageTest, GivenZeroSizeEnqueueIsDetectedWhenFillingImageThenCommandMarkerShouldBeEnqueued) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); std::unique_ptr image(Image2dHelper<>::create(&context)); size_t origin[3] = {1024u, 1, 1}; int32_t fillColor[4] = {0xCC, 0xCC, 0xCC, 0xCC}; size_t zeroRegion000[3] = {0, 0, 0}; mockCmdQ->enqueueFillImage(image.get(), &fillColor, origin, zeroRegion000, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion011[3] = {0, 1, 1}; mockCmdQ->enqueueFillImage(image.get(), &fillColor, origin, zeroRegion011, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion101[3] = {1, 0, 1}; mockCmdQ->enqueueFillImage(image.get(), &fillColor, origin, zeroRegion101, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion110[3] = {1, 1, 0}; mockCmdQ->enqueueFillImage(image.get(), &fillColor, origin, zeroRegion110, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion001[3] = {0, 0, 1}; mockCmdQ->enqueueFillImage(image.get(), &fillColor, origin, zeroRegion001, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion010[3] = {0, 1, 0}; mockCmdQ->enqueueFillImage(image.get(), &fillColor, origin, zeroRegion010, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion100[3] = {1, 0, 0}; mockCmdQ->enqueueFillImage(image.get(), &fillColor, origin, zeroRegion100, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); } HWTEST_F(ZeroSizeEnqueueHandlerImageTest, GivenZeroSizeEnqueueIsDetectedWhenFillingImageThenEventCommandTypeShouldBeUnchanged) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); cl_event event; std::unique_ptr image(Image2dHelper<>::create(&context)); size_t origin[3] = {1024u, 1, 1}; int32_t fillColor[4] = {0xCC, 0xCC, 0xCC, 0xCC}; size_t zeroRegion[3] = {0, 0, 0}; mockCmdQ->enqueueFillImage(image.get(), &fillColor, origin, zeroRegion, 0, nullptr, &event); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); auto pEvent = (Event *)event; cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_FILL_IMAGE), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); delete pEvent; } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenCopyingSvmMemThenCommandMarkerShouldBeEnqueued) { REQUIRE_SVM_OR_SKIP(pDevice); auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); void *pSrcSVM = context.getSVMAllocsManager()->createSVMAlloc(256, {}, context.getRootDeviceIndices(), context.getDeviceBitfields()); void *pDstSVM = context.getSVMAllocsManager()->createSVMAlloc(256, {}, context.getRootDeviceIndices(), context.getDeviceBitfields()); size_t zeroSize = 0; mockCmdQ->enqueueSVMMemcpy(false, pSrcSVM, pDstSVM, zeroSize, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); context.getSVMAllocsManager()->freeSVMAlloc(pSrcSVM); context.getSVMAllocsManager()->freeSVMAlloc(pDstSVM); } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenCopyingSvmMemThenEventCommandTypeShouldBeUnchanged) { REQUIRE_SVM_OR_SKIP(pDevice); auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); cl_event event; void *pSrcSVM = context.getSVMAllocsManager()->createSVMAlloc(256, {}, context.getRootDeviceIndices(), context.getDeviceBitfields()); void *pDstSVM = context.getSVMAllocsManager()->createSVMAlloc(256, {}, context.getRootDeviceIndices(), context.getDeviceBitfields()); size_t zeroSize = 0; mockCmdQ->enqueueSVMMemcpy(false, pSrcSVM, pDstSVM, zeroSize, 0, nullptr, &event); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); auto pEvent = (Event *)event; cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_SVM_MEMCPY), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); delete pEvent; context.getSVMAllocsManager()->freeSVMAlloc(pSrcSVM); context.getSVMAllocsManager()->freeSVMAlloc(pDstSVM); } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenFillingSvmMemThenCommandMarkerShouldBeEnqueued) { REQUIRE_SVM_OR_SKIP(pDevice); auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); void *pSVM = context.getSVMAllocsManager()->createSVMAlloc(256, {}, context.getRootDeviceIndices(), context.getDeviceBitfields()); const float pattern[1] = {1.2345f}; size_t zeroSize = 0; mockCmdQ->enqueueSVMMemFill(pSVM, &pattern, sizeof(pattern), zeroSize, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); context.getSVMAllocsManager()->freeSVMAlloc(pSVM); } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenFillingSvmMemThenEventCommandTypeShouldBeUnchanged) { REQUIRE_SVM_OR_SKIP(pDevice); auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); cl_event event; void *pSVM = context.getSVMAllocsManager()->createSVMAlloc(256, {}, context.getRootDeviceIndices(), context.getDeviceBitfields()); const float pattern[1] = {1.2345f}; size_t zeroSize = 0; mockCmdQ->enqueueSVMMemFill(pSVM, &pattern, sizeof(pattern), zeroSize, 0, nullptr, &event); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); auto pEvent = (Event *)event; cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_SVM_MEMFILL), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); delete pEvent; context.getSVMAllocsManager()->freeSVMAlloc(pSVM); } compute-runtime-22.14.22890/opencl/test/unit_test/command_stream/000077500000000000000000000000001422164147700246205ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/command_stream/CMakeLists.txt000066400000000000000000000043761422164147700273720ustar00rootroot00000000000000# # Copyright (C) 2018-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_command_stream ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cl_command_stream_receiver_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_tbx_command_stream_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cmd_parse_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_1_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_2_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_mt_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_flush_task_1_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_flush_task_2_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_flush_task_3_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_flush_task_4_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_flush_task_gmock_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_with_aub_dump_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/create_command_stream_receiver_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/get_devices_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/experimental_command_buffer_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/implicit_scaling_ocl_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/submissions_aggregator_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/thread_arbitration_policy_helper.cpp ${CMAKE_CURRENT_SOURCE_DIR}/thread_arbitration_policy_helper.h ) if(TESTS_XEHP_AND_LATER) list(APPEND IGDRCL_SRCS_tests_command_stream ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_flush_task_tests_xehp_and_later.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_tests_xehp_and_later.cpp ) endif() if(TESTS_DG2_AND_LATER) list(APPEND IGDRCL_SRCS_tests_command_stream ${CMAKE_CURRENT_SOURCE_DIR}/cache_flush_tests_dg2_and_later.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_tests_dg2_and_later.cpp ) endif() target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_command_stream}) add_subdirectories() compute-runtime-22.14.22890/opencl/test/unit_test/command_stream/cache_flush_tests_dg2_and_later.cpp000066400000000000000000000043661422164147700335700ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/l3_range.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/helpers/cmd_buffer_validator.h" #include "opencl/test/unit_test/helpers/hardware_commands_helper_tests.h" using namespace NEO; using CacheFlushTestsDg2AndLater = HardwareCommandsTest; HWTEST2_F(CacheFlushTestsDg2AndLater, WhenProgrammingCacheFlushAfterWalkerThenExpectProperCacheFlushCommand, IsAtLeastXeHpgCore) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); CommandQueueHw cmdQ(nullptr, pClDevice, 0, false); auto &commandStream = cmdQ.getCS(1024); void *allocPtr = reinterpret_cast(static_cast(6 * MemoryConstants::pageSize)); MockGraphicsAllocation globalAllocation{allocPtr, MemoryConstants::pageSize * 2}; this->mockKernelWithInternal->mockProgram->setGlobalSurface(&globalAllocation); constexpr uint64_t postSyncAddress = 1024; HardwareCommandsHelper::programCacheFlushAfterWalkerCommand(&commandStream, cmdQ, this->mockKernelWithInternal->mockKernel, postSyncAddress); std::string err; std::vector expectedCommands; expectedCommands.push_back(new MatchHwCmd( 1, Expects{EXPECT_MEMBER(PIPE_CONTROL, getUnTypedDataPortCacheFlush, true)})); if constexpr (FamilyType::isUsingL3Control) { using L3_CONTROL = typename FamilyType::L3_CONTROL; expectedCommands.push_back(new MatchHwCmd( 1, Expects{EXPECT_MEMBER(L3_CONTROL, getUnTypedDataPortCacheFlush, false)})); } bool cmdBuffOk = expectCmdBuff(cmdQ.getCS(0), 0, std::move(expectedCommands), &err); EXPECT_TRUE(cmdBuffOk) << err; this->mockKernelWithInternal->mockProgram->setGlobalSurface(nullptr); } cl_command_stream_receiver_tests.cpp000066400000000000000000000137551422164147700340350ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/command_stream/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/surface.h" #include "shared/test/common/fixtures/device_fixture.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/mocks/mock_hw_helper.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "opencl/test/unit_test/helpers/raii_hw_helper.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" using namespace NEO; TEST(ClCommandStreamReceiverTest, WhenMakingResidentThenBufferResidencyFlagIsSet) { MockContext context; auto commandStreamReceiver = context.getDevice(0)->getDefaultEngine().commandStreamReceiver; float srcMemory[] = {1.0f}; auto retVal = CL_INVALID_VALUE; auto buffer = Buffer::create( &context, CL_MEM_USE_HOST_PTR, sizeof(srcMemory), srcMemory, retVal); ASSERT_NE(nullptr, buffer); auto graphicsAllocation = buffer->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex()); EXPECT_FALSE(graphicsAllocation->isResident(commandStreamReceiver->getOsContext().getContextId())); commandStreamReceiver->makeResident(*graphicsAllocation); EXPECT_TRUE(graphicsAllocation->isResident(commandStreamReceiver->getOsContext().getContextId())); delete buffer; } using ClCommandStreamReceiverTests = Test; HWTEST_F(ClCommandStreamReceiverTests, givenCommandStreamReceiverWhenFenceAllocationIsRequiredAndCreateGlobalFenceAllocationIsCalledThenFenceAllocationIsAllocated) { RAIIHwHelperFactory> hwHelperBackup{pDevice->getHardwareInfo().platform.eRenderCoreFamily}; MockCsrHw csr(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); csr.setupContext(*pDevice->getDefaultEngine().osContext); EXPECT_EQ(nullptr, csr.globalFenceAllocation); EXPECT_TRUE(csr.createGlobalFenceAllocation()); ASSERT_NE(nullptr, csr.globalFenceAllocation); EXPECT_EQ(AllocationType::GLOBAL_FENCE, csr.globalFenceAllocation->getAllocationType()); } HWTEST_F(ClCommandStreamReceiverTests, givenCommandStreamReceiverWhenGettingFenceAllocationThenCorrectFenceAllocationIsReturned) { RAIIHwHelperFactory> hwHelperBackup{pDevice->getHardwareInfo().platform.eRenderCoreFamily}; CommandStreamReceiverHw csr(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); csr.setupContext(*pDevice->getDefaultEngine().osContext); EXPECT_EQ(nullptr, csr.getGlobalFenceAllocation()); EXPECT_TRUE(csr.createGlobalFenceAllocation()); ASSERT_NE(nullptr, csr.getGlobalFenceAllocation()); EXPECT_EQ(AllocationType::GLOBAL_FENCE, csr.getGlobalFenceAllocation()->getAllocationType()); } using CommandStreamReceiverMultiRootDeviceTest = MultiRootDeviceFixture; TEST_F(CommandStreamReceiverMultiRootDeviceTest, WhenCreatingCommandStreamGraphicsAllocationsThenTheyHaveCorrectRootDeviceIndex) { auto commandStreamReceiver = &device1->getGpgpuCommandStreamReceiver(); ASSERT_NE(nullptr, commandStreamReceiver); EXPECT_EQ(expectedRootDeviceIndex, commandStreamReceiver->getRootDeviceIndex()); // Linear stream / Command buffer GraphicsAllocation *allocation = mockMemoryManager->allocateGraphicsMemoryWithProperties({expectedRootDeviceIndex, 128u, AllocationType::COMMAND_BUFFER, device1->getDeviceBitfield()}); LinearStream commandStream{allocation}; commandStreamReceiver->ensureCommandBufferAllocation(commandStream, 100u, 0u); EXPECT_EQ(allocation, commandStream.getGraphicsAllocation()); EXPECT_EQ(128u, commandStream.getMaxAvailableSpace()); EXPECT_EQ(expectedRootDeviceIndex, commandStream.getGraphicsAllocation()->getRootDeviceIndex()); commandStreamReceiver->ensureCommandBufferAllocation(commandStream, 1024u, 0u); EXPECT_NE(allocation, commandStream.getGraphicsAllocation()); EXPECT_EQ(0u, commandStream.getMaxAvailableSpace() % MemoryConstants::pageSize64k); EXPECT_EQ(expectedRootDeviceIndex, commandStream.getGraphicsAllocation()->getRootDeviceIndex()); mockMemoryManager->freeGraphicsMemory(commandStream.getGraphicsAllocation()); // Debug surface auto debugSurface = commandStreamReceiver->allocateDebugSurface(MemoryConstants::pageSize); ASSERT_NE(nullptr, debugSurface); EXPECT_EQ(expectedRootDeviceIndex, debugSurface->getRootDeviceIndex()); // Indirect heaps IndirectHeap::Type heapTypes[]{IndirectHeap::Type::DYNAMIC_STATE, IndirectHeap::Type::INDIRECT_OBJECT, IndirectHeap::Type::SURFACE_STATE}; for (auto heapType : heapTypes) { IndirectHeap *heap = nullptr; commandStreamReceiver->allocateHeapMemory(heapType, MemoryConstants::pageSize, heap); ASSERT_NE(nullptr, heap); ASSERT_NE(nullptr, heap->getGraphicsAllocation()); EXPECT_EQ(expectedRootDeviceIndex, heap->getGraphicsAllocation()->getRootDeviceIndex()); mockMemoryManager->freeGraphicsMemory(heap->getGraphicsAllocation()); delete heap; } // Tag allocation ASSERT_NE(nullptr, commandStreamReceiver->getTagAllocation()); EXPECT_EQ(expectedRootDeviceIndex, commandStreamReceiver->getTagAllocation()->getRootDeviceIndex()); // Preemption allocation if (nullptr == commandStreamReceiver->getPreemptionAllocation()) { commandStreamReceiver->createPreemptionAllocation(); } EXPECT_EQ(expectedRootDeviceIndex, commandStreamReceiver->getPreemptionAllocation()->getRootDeviceIndex()); // HostPtr surface char memory[8] = {1, 2, 3, 4, 5, 6, 7, 8}; HostPtrSurface surface(memory, sizeof(memory), true); EXPECT_TRUE(commandStreamReceiver->createAllocationForHostSurface(surface, false)); ASSERT_NE(nullptr, surface.getAllocation()); EXPECT_EQ(expectedRootDeviceIndex, surface.getAllocation()->getRootDeviceIndex()); } compute-runtime-22.14.22890/opencl/test/unit_test/command_stream/cl_tbx_command_stream_tests.cpp000066400000000000000000000041771422164147700331030ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/engine_control.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/engine_descriptor_helper.h" #include "shared/test/common/mocks/mock_os_context.h" #include "shared/test/common/mocks/mock_tbx_csr.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" using namespace NEO; using ClTbxCommandStreamTests = Test; HWTEST_F(ClTbxCommandStreamTests, givenTbxCsrWhenDispatchBlitEnqueueThenProcessCorrectly) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableBlitterOperationsSupport.set(1); DebugManager.flags.EnableBlitterForEnqueueOperations.set(1); MockContext context(pClDevice); MockTbxCsr tbxCsr0{*pDevice->executionEnvironment, pDevice->getDeviceBitfield()}; tbxCsr0.initializeTagAllocation(); MockTbxCsr tbxCsr1{*pDevice->executionEnvironment, pDevice->getDeviceBitfield()}; tbxCsr1.initializeTagAllocation(); MockOsContext osContext0(0, EngineDescriptorHelper::getDefaultDescriptor(pDevice->getDeviceBitfield())); tbxCsr0.setupContext(osContext0); EngineControl engineControl0{&tbxCsr0, &osContext0}; MockOsContext osContext1(1, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular}, pDevice->getDeviceBitfield())); tbxCsr1.setupContext(osContext0); EngineControl engineControl1{&tbxCsr1, &osContext1}; MockCommandQueueHw cmdQ(&context, pClDevice, nullptr); cmdQ.gpgpuEngine = &engineControl0; cmdQ.clearBcsEngines(); cmdQ.bcsEngines[0] = &engineControl1; cl_int error = CL_SUCCESS; std::unique_ptr buffer(Buffer::create(&context, 0, 1, nullptr, error)); uint32_t hostPtr = 0; error = cmdQ.enqueueWriteBuffer(buffer.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, error); } compute-runtime-22.14.22890/opencl/test/unit_test/command_stream/cmd_parse_tests.cpp000066400000000000000000000032051422164147700305030ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/fixtures/device_fixture.h" #include "shared/test/common/test_macros/test.h" using namespace NEO; using CommandParse = Test; HWTEST_F(CommandParse, WhenGeneratingCommandBufferThenIsNotNull) { typedef typename FamilyType::PARSE PARSE; GenCmdList cmds; EXPECT_FALSE(PARSE::parseCommandBuffer(cmds, nullptr, sizeof(void *))); } HWTEST_F(CommandParse, WhenGeneratingCommandBufferThenDoesNotContainGarbage) { typedef typename FamilyType::PARSE PARSE; uint32_t buffer[30] = {0xbaadf00d}; GenCmdList cmds; EXPECT_FALSE(PARSE::parseCommandBuffer(cmds, buffer, sizeof(uint32_t))); } HWTEST_F(CommandParse, GivenGarbageWhenGeneratingCommandBufferThenLengthIsZero) { typedef typename FamilyType::PARSE PARSE; uint32_t buffer[30] = {0xbaadf00d}; EXPECT_EQ(0u, PARSE::getCommandLength(buffer)); } HWTEST_F(CommandParse, WhenGeneratingCommandBufferThenBufferIsCorrect) { typedef typename FamilyType::PARSE PARSE; typedef typename FamilyType::WALKER_TYPE WALKER_TYPE; GenCmdList cmds; WALKER_TYPE buffer = FamilyType::cmdInitGpgpuWalker; EXPECT_TRUE(PARSE::parseCommandBuffer(cmds, &buffer, 0)); EXPECT_FALSE(PARSE::parseCommandBuffer(cmds, &buffer, 1)); EXPECT_FALSE(PARSE::parseCommandBuffer(cmds, &buffer, 2)); EXPECT_FALSE(PARSE::parseCommandBuffer(cmds, &buffer, 3)); EXPECT_FALSE(PARSE::parseCommandBuffer(cmds, &buffer, 4)); EXPECT_TRUE(PARSE::parseCommandBuffer(cmds, &buffer, sizeof(buffer))); } compute-runtime-22.14.22890/opencl/test/unit_test/command_stream/command_stream_fixture.h000066400000000000000000000007271422164147700315360ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/command_queue/command_queue.h" #include namespace NEO { struct CommandStreamFixture { void SetUp(CommandQueue *pCmdQ) { pCS = &pCmdQ->getCS(1024); pCmdBuffer = pCS->getCpuBase(); } virtual void TearDown() { } LinearStream *pCS = nullptr; void *pCmdBuffer = nullptr; }; } // namespace NEO command_stream_receiver_flush_task_1_tests.cpp000066400000000000000000002003521422164147700360110ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/command_stream/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/dispatch_flags_helper.h" #include "shared/test/common/helpers/ult_hw_helper.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/mocks/mock_submissions_aggregator.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_event.h" using namespace NEO; typedef UltCommandStreamReceiverTest CommandStreamReceiverFlushTaskTests; HWTEST_F(CommandStreamReceiverFlushTaskTests, WhenFlushingTaskThenCommandStreamReceiverGetsUpdated) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); flushTask(commandStreamReceiver); EXPECT_GT(commandStreamReceiver.commandStream.getUsed(), 0u); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenForceCsrReprogrammingDebugVariableSetWhenFlushingThenInitProgrammingFlagsShouldBeCalled) { DebugManagerStateRestore restore; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); DebugManager.flags.ForceCsrReprogramming.set(true); flushTask(commandStreamReceiver); EXPECT_TRUE(commandStreamReceiver.initProgrammingFlagsCalled); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenForceCsrFlushingDebugVariableSetWhenFlushingThenFlushBatchedSubmissionsShouldBeCalled) { DebugManagerStateRestore restore; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); DebugManager.flags.ForceCsrFlushing.set(true); flushTask(commandStreamReceiver); EXPECT_TRUE(commandStreamReceiver.flushBatchedSubmissionsCalled); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenForceImplicitFlushDebugVariableSetWhenFlushingThenFlushBatchedSubmissionsShouldBeCalled) { DebugManagerStateRestore restore; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.dispatchMode = DispatchMode::BatchedDispatch; DebugManager.flags.ForceImplicitFlush.set(true); flushTask(commandStreamReceiver); EXPECT_TRUE(commandStreamReceiver.flushBatchedSubmissionsCalled); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenOverrideThreadArbitrationPolicyDebugVariableSetWhenFlushingThenRequestRequiredMode) { DebugManagerStateRestore restore; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); DebugManager.flags.OverrideThreadArbitrationPolicy.set(ThreadArbitrationPolicy::RoundRobin); EXPECT_EQ(-1, commandStreamReceiver.streamProperties.stateComputeMode.threadArbitrationPolicy.value); flushTask(commandStreamReceiver); EXPECT_EQ(ThreadArbitrationPolicy::RoundRobin, commandStreamReceiver.streamProperties.stateComputeMode.threadArbitrationPolicy.value); } HWTEST_F(CommandStreamReceiverFlushTaskTests, WhenFlushingTaskThenTaskCountIsIncremented) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); flushTask(commandStreamReceiver); EXPECT_EQ(1u, commandStreamReceiver.peekTaskCount()); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenconfigureCSRtoNonDirtyStateWhenFlushTaskIsCalledThenNoCommandsAreAdded) { configureCSRtoNonDirtyState(false); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); flushTask(commandStreamReceiver); EXPECT_EQ(0u, commandStreamReceiver.commandStream.getUsed()); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenMultiOsContextCommandStreamReceiverWhenFlushTaskIsCalledThenCommandStreamReceiverStreamIsUsed) { configureCSRtoNonDirtyState(false); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.multiOsContextCapable = true; commandStream.getSpace(4); flushTask(commandStreamReceiver); EXPECT_EQ(MemoryConstants::cacheLineSize, commandStreamReceiver.commandStream.getUsed()); auto batchBufferStart = genCmdCast(commandStreamReceiver.commandStream.getCpuBase()); EXPECT_NE(nullptr, batchBufferStart); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenTaskIsSubmittedViaCsrThenBbEndCoversPaddingEnoughToFitMiBatchBufferStart) { auto &mockCsr = pDevice->getUltCommandStreamReceiver(); mockCsr.overrideDispatchPolicy(DispatchMode::BatchedDispatch); mockCsr.timestampPacketWriteEnabled = false; configureCSRtoNonDirtyState(false); mockCsr.getCS(1024u); auto &csrCommandStream = mockCsr.commandStream; //we do level change that will emit PPC, fill all the space so only BB end fits. taskLevel++; auto ppcSize = MemorySynchronizationCommands::getSizeForSinglePipeControl(); auto fillSize = MemoryConstants::cacheLineSize - ppcSize - sizeof(typename FamilyType::MI_BATCH_BUFFER_END); csrCommandStream.getSpace(fillSize); auto expectedUsedSize = 2 * MemoryConstants::cacheLineSize; flushTask(mockCsr); EXPECT_EQ(expectedUsedSize, mockCsr.commandStream.getUsed()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenTaskIsSubmittedViaCommandStreamThenBbEndCoversPaddingEnoughToFitMiBatchBufferStart) { auto &mockCsr = pDevice->getUltCommandStreamReceiver(); mockCsr.overrideDispatchPolicy(DispatchMode::BatchedDispatch); CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); configureCSRtoNonDirtyState(false); auto fillSize = MemoryConstants::cacheLineSize - sizeof(typename FamilyType::MI_BATCH_BUFFER_END); commandStream.getSpace(fillSize); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); mockCsr.flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); auto expectedUsedSize = 2 * MemoryConstants::cacheLineSize; EXPECT_EQ(expectedUsedSize, commandStream.getUsed()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrWhenflushTaskThenDshAndIohNotEvictable) { auto &mockCsr = pDevice->getUltCommandStreamReceiver(); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); mockCsr.flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); EXPECT_EQ(dsh.getGraphicsAllocation()->peekEvictable(), true); EXPECT_EQ(ssh.getGraphicsAllocation()->peekEvictable(), true); EXPECT_EQ(ioh.getGraphicsAllocation()->peekEvictable(), true); dsh.getGraphicsAllocation()->setEvictable(false); EXPECT_EQ(dsh.getGraphicsAllocation()->peekEvictable(), false); dsh.getGraphicsAllocation()->setEvictable(true); EXPECT_EQ(dsh.getGraphicsAllocation()->peekEvictable(), true); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeAndMidThreadPreemptionWhenFlushTaskIsCalledThenSipKernelIsMadeResident) { auto &mockCsr = pDevice->getUltCommandStreamReceiver(); mockCsr.overrideDispatchPolicy(DispatchMode::BatchedDispatch); mockCsr.useNewResourceImplicitFlush = false; mockCsr.useGpuIdleImplicitFlush = false; auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr.submissionAggregator.reset(mockedSubmissionsAggregator); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionMode::MidThread; auto sipType = SipKernel::getSipKernelType(*pDevice); SipKernel::initSipKernel(sipType, *pDevice); mockCsr.flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); auto cmdBuffer = mockedSubmissionsAggregator->peekCommandBuffers().peekHead(); auto sipAllocation = SipKernel::getSipKernel(*pDevice).getSipAllocation(); bool found = false; for (auto allocation : cmdBuffer->surfaces) { if (allocation == sipAllocation) { found = true; break; } } EXPECT_TRUE(found); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInDefaultModeAndMidThreadPreemptionWhenFlushTaskIsCalledThenSipKernelIsMadeResident) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionMode::MidThread; auto sipType = SipKernel::getSipKernelType(*pDevice); SipKernel::initSipKernel(sipType, *pDevice); mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); auto sipAllocation = SipKernel::getSipKernel(*pDevice).getSipAllocation(); bool found = false; for (auto allocation : mockCsr->copyOfAllocations) { if (allocation == sipAllocation) { found = true; break; } } EXPECT_TRUE(found); } HWTEST_F(CommandStreamReceiverFlushTaskTests, whenFlushThenCommandBufferAlreadyHasProperTaskCountsAndIsNotIncludedInResidencyVector) { struct MockCsrFlushCmdBuffer : public MockCommandStreamReceiver { using MockCommandStreamReceiver::MockCommandStreamReceiver; NEO::SubmissionStatus flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override { EXPECT_EQ(batchBuffer.commandBufferAllocation->getResidencyTaskCount(this->osContext->getContextId()), this->taskCount + 1); EXPECT_EQ(batchBuffer.commandBufferAllocation->getTaskCount(this->osContext->getContextId()), this->taskCount + 1); EXPECT_EQ(std::find(allocationsForResidency.begin(), allocationsForResidency.end(), batchBuffer.commandBufferAllocation), allocationsForResidency.end()); return NEO::SubmissionStatus::SUCCESS; } }; auto mockCsr = new MockCsrFlushCmdBuffer(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionMode::MidThread; mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, GivenSameTaskLevelWhenFlushingTaskThenDoNotSendPipeControl) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); // Configure the CSR to not need to submit any state or commands. configureCSRtoNonDirtyState(false); flushTask(commandStreamReceiver); EXPECT_EQ(taskLevel, commandStreamReceiver.taskLevel); auto sizeUsed = commandStreamReceiver.commandStream.getUsed(); EXPECT_EQ(sizeUsed, 0u); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenDeviceWithThreadGroupPreemptionSupportThenDontSendMediaVfeStateIfNotDirty) { DebugManagerStateRestore dbgRestore; DebugManager.flags.ForcePreemptionMode.set(static_cast(PreemptionMode::ThreadGroup)); auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->setPreemptionMode(PreemptionMode::ThreadGroup); pDevice->resetCommandStreamReceiver(commandStreamReceiver); // Configure the CSR to not need to submit any state or commands. configureCSRtoNonDirtyState(false); flushTask(*commandStreamReceiver); EXPECT_EQ(taskLevel, commandStreamReceiver->peekTaskLevel()); auto sizeUsed = commandStreamReceiver->commandStream.getUsed(); EXPECT_EQ(0u, sizeUsed); } HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenHigherTaskLevelWhenFlushingTaskThenSendPipeControl) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = true; commandStreamReceiver.timestampPacketWriteEnabled = false; configureCSRtoNonDirtyState(false); commandStreamReceiver.taskLevel = taskLevel / 2; flushTask(commandStreamReceiver); EXPECT_EQ(taskLevel, commandStreamReceiver.peekTaskLevel()); EXPECT_GT(commandStreamReceiver.commandStream.getUsed(), 0u); parseCommands(commandStreamReceiver.commandStream, 0); auto itorPC = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itorPC); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenCommandStreamReceiverWithInstructionCacheRequestWhenFlushTaskIsCalledThenPipeControlWithInstructionCacheIsEmitted) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); configureCSRtoNonDirtyState(false); commandStreamReceiver.registerInstructionCacheFlush(); EXPECT_EQ(1u, commandStreamReceiver.recursiveLockCounter); flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.commandStream, 0); auto itorPC = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itorPC); auto pipeControlCmd = reinterpret_cast(*itorPC); EXPECT_TRUE(pipeControlCmd->getInstructionCacheInvalidateEnable()); EXPECT_FALSE(commandStreamReceiver.requiresInstructionCacheFlush); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenHigherTaskLevelWhenTimestampPacketWriteIsEnabledThenDontAddPipeControl) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.timestampPacketWriteEnabled = true; commandStreamReceiver.isPreambleSent = true; configureCSRtoNonDirtyState(false); commandStreamReceiver.taskLevel = taskLevel; taskLevel++; // submit with higher taskLevel flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.commandStream, 0); auto itorPC = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(cmdList.end(), itorPC); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, WhenForcePipeControlPriorToWalkerIsSetThenAddExtraPipeControls) { DebugManagerStateRestore stateResore; DebugManager.flags.ForcePipeControlPriorToWalker.set(true); DebugManager.flags.FlushAllCaches.set(true); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = true; configureCSRtoNonDirtyState(false); commandStreamReceiver.taskLevel = taskLevel; flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.commandStream, 0); GenCmdList::iterator itor = cmdList.begin(); int counterPC = 0; while (itor != cmdList.end()) { auto pipeControl = genCmdCast(*itor); if (pipeControl) { switch (counterPC) { case 0: // First pipe control with CS Stall EXPECT_EQ(bool(pipeControl->getCommandStreamerStallEnable()), true); if (pDevice->getHardwareInfo().platform.eProductFamily == IGFX_BROADWELL) { EXPECT_EQ(bool(pipeControl->getDcFlushEnable()), true); } else { EXPECT_EQ(bool(pipeControl->getDcFlushEnable()), false); } EXPECT_EQ(bool(pipeControl->getRenderTargetCacheFlushEnable()), false); EXPECT_EQ(bool(pipeControl->getInstructionCacheInvalidateEnable()), false); EXPECT_EQ(bool(pipeControl->getTextureCacheInvalidationEnable()), false); EXPECT_EQ(bool(pipeControl->getPipeControlFlushEnable()), false); EXPECT_EQ(bool(pipeControl->getVfCacheInvalidationEnable()), false); EXPECT_EQ(bool(pipeControl->getConstantCacheInvalidationEnable()), false); EXPECT_EQ(bool(pipeControl->getStateCacheInvalidationEnable()), false); EXPECT_EQ(bool(pipeControl->getTlbInvalidate()), false); break; case 1: // Second pipe control with all flushes EXPECT_EQ(bool(pipeControl->getCommandStreamerStallEnable()), true); EXPECT_EQ(bool(pipeControl->getDcFlushEnable()), true); EXPECT_EQ(bool(pipeControl->getRenderTargetCacheFlushEnable()), true); EXPECT_EQ(bool(pipeControl->getInstructionCacheInvalidateEnable()), true); EXPECT_EQ(bool(pipeControl->getTextureCacheInvalidationEnable()), true); EXPECT_EQ(bool(pipeControl->getPipeControlFlushEnable()), true); EXPECT_EQ(bool(pipeControl->getVfCacheInvalidationEnable()), true); EXPECT_EQ(bool(pipeControl->getConstantCacheInvalidationEnable()), true); EXPECT_EQ(bool(pipeControl->getStateCacheInvalidationEnable()), true); EXPECT_EQ(bool(pipeControl->getTlbInvalidate()), true); default: break; } counterPC++; } ++itor; } EXPECT_EQ(counterPC, 2); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, whenSamplerCacheFlushNotRequiredThenDontSendPipecontrol) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); NEO::WorkaroundTable *waTable = &pDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->workaroundTable; commandStreamReceiver.isPreambleSent = true; commandStreamReceiver.lastPreemptionMode = pDevice->getPreemptionMode(); commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushNotRequired); configureCSRtoNonDirtyState(false); commandStreamReceiver.taskLevel = taskLevel; waTable->flags.waSamplerCacheFlushBetweenRedescribedSurfaceReads = true; flushTask(commandStreamReceiver); EXPECT_EQ(commandStreamReceiver.commandStream.getUsed(), 0u); EXPECT_EQ(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushNotRequired, commandStreamReceiver.samplerCacheFlushRequired); parseCommands(commandStreamReceiver.commandStream, 0); auto itorPC = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(cmdList.end(), itorPC); } HWTEST_F(CommandStreamReceiverFlushTaskTests, whenSamplerCacheFlushBeforeThenSendPipecontrol) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = true; commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore); configureCSRtoNonDirtyState(false); commandStreamReceiver.taskLevel = taskLevel; NEO::WorkaroundTable *waTable = &pDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->workaroundTable; waTable->flags.waSamplerCacheFlushBetweenRedescribedSurfaceReads = true; flushTask(commandStreamReceiver); EXPECT_GT(commandStreamReceiver.commandStream.getUsed(), 0u); EXPECT_EQ(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushAfter, commandStreamReceiver.samplerCacheFlushRequired); parseCommands(commandStreamReceiver.commandStream, 0); auto itorPC = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itorPC); auto pipeControlCmd = (typename FamilyType::PIPE_CONTROL *)*itorPC; EXPECT_TRUE(pipeControlCmd->getTextureCacheInvalidationEnable()); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, whenSamplerCacheFlushBeforeAndWaSamplerCacheFlushBetweenRedescribedSurfaceReadsDasabledThenDontSendPipecontrol) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = true; commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore); configureCSRtoNonDirtyState(false); commandStreamReceiver.taskLevel = taskLevel; NEO::WorkaroundTable *waTable = &pDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->workaroundTable; waTable->flags.waSamplerCacheFlushBetweenRedescribedSurfaceReads = false; flushTask(commandStreamReceiver); EXPECT_EQ(commandStreamReceiver.commandStream.getUsed(), 0u); EXPECT_EQ(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore, commandStreamReceiver.samplerCacheFlushRequired); parseCommands(commandStreamReceiver.commandStream, 0); auto itorPC = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(cmdList.end(), itorPC); } HWTEST_F(CommandStreamReceiverFlushTaskTests, whenSamplerCacheFlushAfterThenSendPipecontrol) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = true; commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushAfter); configureCSRtoNonDirtyState(false); commandStreamReceiver.taskLevel = taskLevel; NEO::WorkaroundTable *waTable = &pDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->workaroundTable; waTable->flags.waSamplerCacheFlushBetweenRedescribedSurfaceReads = true; flushTask(commandStreamReceiver); EXPECT_GT(commandStreamReceiver.commandStream.getUsed(), 0u); EXPECT_EQ(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushNotRequired, commandStreamReceiver.samplerCacheFlushRequired); parseCommands(commandStreamReceiver.commandStream, 0); auto itorPC = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itorPC); auto pipeControlCmd = (typename FamilyType::PIPE_CONTROL *)*itorPC; EXPECT_TRUE(pipeControlCmd->getTextureCacheInvalidationEnable()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenStaleCqWhenFlushingTaskThenCompletionStampIsValid) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); //simulate our CQ is stale for 10 TL's commandStreamReceiver.taskLevel = taskLevel + 10; auto completionStamp = flushTask(commandStreamReceiver); EXPECT_EQ(completionStamp.taskLevel, commandStreamReceiver.peekTaskLevel()); EXPECT_EQ(completionStamp.taskCount, commandStreamReceiver.peekTaskCount()); EXPECT_EQ(completionStamp.flushStamp, commandStreamReceiver.flushStamp->peekStamp()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, WhenFlushingTaskThenCompletionStampIsValid) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto completionStamp = flushTask(commandStreamReceiver); EXPECT_EQ(1u, completionStamp.taskCount); EXPECT_EQ(taskLevel, completionStamp.taskLevel); EXPECT_EQ(commandStreamReceiver.flushStamp->peekStamp(), completionStamp.flushStamp); } HWTEST_F(CommandStreamReceiverFlushTaskTests, WhenFlushingTaskThenStateBaseAddressIsCorrect) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); flushTask(commandStreamReceiver); if (!pDevice->getHardwareInfo().capabilityTable.supportsImages) { EXPECT_TRUE(commandStreamReceiver.dshState.updateAndCheck(&dsh)); } else { EXPECT_FALSE(commandStreamReceiver.dshState.updateAndCheck(&dsh)); } EXPECT_FALSE(commandStreamReceiver.iohState.updateAndCheck(&ioh)); EXPECT_FALSE(commandStreamReceiver.sshState.updateAndCheck(&ssh)); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, WhenFlushingTaskThenStateBaseAddressProgrammingShouldMatchTracking) { typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; auto gmmHelper = pDevice->getGmmHelper(); auto stateHeapMocs = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_STATE_HEAP_BUFFER); auto l3CacheOnMocs = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); flushTask(commandStreamReceiver); auto &commandStreamCSR = commandStreamReceiver.commandStream; parseCommands(commandStreamCSR, 0); HardwareParse::findHardwareCommands(); ASSERT_NE(nullptr, cmdStateBaseAddress); auto &cmd = *reinterpret_cast(cmdStateBaseAddress); auto &hwHelper = HwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily); auto instructionHeapBaseAddress = commandStreamReceiver.getMemoryManager()->getInternalHeapBaseAddress(commandStreamReceiver.rootDeviceIndex, !hwHelper.useSystemMemoryPlacementForISA(pDevice->getHardwareInfo())); EXPECT_EQ(dsh.getCpuBase(), reinterpret_cast(cmd.getDynamicStateBaseAddress())); EXPECT_EQ(instructionHeapBaseAddress, cmd.getInstructionBaseAddress()); EXPECT_EQ(ioh.getCpuBase(), reinterpret_cast(cmd.getIndirectObjectBaseAddress())); EXPECT_EQ(ssh.getCpuBase(), reinterpret_cast(cmd.getSurfaceStateBaseAddress())); EXPECT_EQ(l3CacheOnMocs, cmd.getStatelessDataPortAccessMemoryObjectControlState()); EXPECT_EQ(stateHeapMocs, cmd.getInstructionMemoryObjectControlState()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenStateBaseAddressWhenItIsRequiredThenThereIsPipeControlPriorToItWithTextureCacheFlush) { typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); configureCSRtoNonDirtyState(false); ioh.replaceBuffer(ptrOffset(ioh.getCpuBase(), +1u), ioh.getMaxAvailableSpace() + MemoryConstants::pageSize * 3); flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.commandStream, 0); auto stateBaseAddressItor = find(cmdList.begin(), cmdList.end()); auto pipeControlItor = find(cmdList.begin(), stateBaseAddressItor); EXPECT_NE(stateBaseAddressItor, pipeControlItor); auto pipeControlCmd = (typename FamilyType::PIPE_CONTROL *)*pipeControlItor; EXPECT_TRUE(pipeControlCmd->getTextureCacheInvalidationEnable()); EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), pipeControlCmd->getDcFlushEnable()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenNotApplicableL3ConfigWhenFlushingTaskThenDontReloadSba) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); { flushTaskFlags.l3CacheSettings = L3CachingSettings::l3CacheOn; flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.commandStream, 0); auto stateBaseAddressItor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), stateBaseAddressItor); } { flushTaskFlags.l3CacheSettings = L3CachingSettings::NotApplicable; auto offset = commandStreamReceiver.commandStream.getUsed(); flushTask(commandStreamReceiver); cmdList.clear(); parseCommands(commandStreamReceiver.commandStream, offset); auto stateBaseAddressItor = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(cmdList.end(), stateBaseAddressItor); } } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenNotApplicableGrfConfigWhenFlushingTaskThenDontReloadSba) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); { flushTaskFlags.numGrfRequired = GrfConfig::DefaultGrfNumber; flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.commandStream, 0); auto stateBaseAddressItor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), stateBaseAddressItor); } { flushTaskFlags.numGrfRequired = GrfConfig::NotApplicable; auto offset = commandStreamReceiver.commandStream.getUsed(); flushTask(commandStreamReceiver); cmdList.clear(); parseCommands(commandStreamReceiver.commandStream, offset); auto stateBaseAddressItor = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(cmdList.end(), stateBaseAddressItor); } } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenMultiOsContextCapableSetAndDispatchFlagsWhenFlushingTaskThenReloadSbaProperly) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); // 1. Ignore dispatchFlags.useGlobalAtomics flip if csr is not multi context capable commandStreamReceiver.multiOsContextCapable = false; flushTaskFlags.useGlobalAtomics = false; auto offset = commandStreamReceiver.commandStream.getUsed(); flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.commandStream, 0); auto stateBaseAddressItor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), stateBaseAddressItor); flushTaskFlags.useGlobalAtomics ^= true; offset = commandStreamReceiver.commandStream.getUsed(); flushTask(commandStreamReceiver); cmdList.clear(); parseCommands(commandStreamReceiver.commandStream, offset); stateBaseAddressItor = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(cmdList.end(), stateBaseAddressItor); // 2. Reprogram SBA only if dispatchFlags.useGlobalAtomics flips and csr is multi context capable or context has several devices commandStreamReceiver.multiOsContextCapable = true; flushTaskFlags.useGlobalAtomics = true; flushTaskFlags.areMultipleSubDevicesInContext = false; offset = commandStreamReceiver.commandStream.getUsed(); flushTask(commandStreamReceiver); cmdList.clear(); parseCommands(commandStreamReceiver.commandStream, offset); stateBaseAddressItor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), stateBaseAddressItor); flushTaskFlags.useGlobalAtomics ^= true; offset = commandStreamReceiver.commandStream.getUsed(); flushTask(commandStreamReceiver); cmdList.clear(); parseCommands(commandStreamReceiver.commandStream, offset); stateBaseAddressItor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), stateBaseAddressItor); offset = commandStreamReceiver.commandStream.getUsed(); flushTask(commandStreamReceiver); cmdList.clear(); parseCommands(commandStreamReceiver.commandStream, offset); stateBaseAddressItor = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(cmdList.end(), stateBaseAddressItor); commandStreamReceiver.multiOsContextCapable = false; flushTaskFlags.useGlobalAtomics = true; flushTaskFlags.areMultipleSubDevicesInContext = true; offset = commandStreamReceiver.commandStream.getUsed(); flushTask(commandStreamReceiver); cmdList.clear(); parseCommands(commandStreamReceiver.commandStream, offset); stateBaseAddressItor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), stateBaseAddressItor); flushTaskFlags.useGlobalAtomics ^= true; offset = commandStreamReceiver.commandStream.getUsed(); flushTask(commandStreamReceiver); cmdList.clear(); parseCommands(commandStreamReceiver.commandStream, offset); stateBaseAddressItor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), stateBaseAddressItor); offset = commandStreamReceiver.commandStream.getUsed(); flushTask(commandStreamReceiver); cmdList.clear(); parseCommands(commandStreamReceiver.commandStream, offset); stateBaseAddressItor = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(cmdList.end(), stateBaseAddressItor); } HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenPreambleNotSentWhenFlushingTaskThenPreambleIsSent) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = false; flushTask(commandStreamReceiver); EXPECT_TRUE(commandStreamReceiver.isPreambleSent); EXPECT_GT(commandStreamReceiver.commandStream.getUsed(), 0u); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenFlushTaskWhenInitProgrammingFlagsIsCalledThenBindingTableBaseAddressRequiredIsSetCorrecty) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.initProgrammingFlags(); EXPECT_TRUE(commandStreamReceiver.bindingTableBaseAddressRequired); flushTask(commandStreamReceiver); EXPECT_FALSE(commandStreamReceiver.bindingTableBaseAddressRequired); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenFlushTaskWhenInitProgrammingFlagsIsNotCalledThenBindingTableBaseAddressRequiredIsSetCorrectly) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); EXPECT_FALSE(commandStreamReceiver.bindingTableBaseAddressRequired); flushTask(commandStreamReceiver); EXPECT_FALSE(commandStreamReceiver.bindingTableBaseAddressRequired); } HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenPreambleNotSentAndMediaSamplerRequirementChangedWhenFlushingTaskThenPipelineSelectIsSent) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = false; commandStreamReceiver.lastMediaSamplerConfig = -1; flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.commandStream, 0); EXPECT_NE(nullptr, getCommand()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenPreambleNotSentAndMediaSamplerRequirementNotChangedWhenFlushingTaskThenPipelineSelectIsSent) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = false; commandStreamReceiver.lastMediaSamplerConfig = 0; flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.commandStream, 0); EXPECT_NE(nullptr, getCommand()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenPreambleSentAndMediaSamplerRequirementNotChangedWhenFlushingTaskThenPipelineSelectIsNotSent) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = true; commandStreamReceiver.lastMediaSamplerConfig = 0; flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.commandStream, 0); const auto &hwInfoConfig = *HwInfoConfig::get(pDevice->getHardwareInfo().platform.eProductFamily); if (hwInfoConfig.is3DPipelineSelectWARequired()) { EXPECT_NE(nullptr, getCommand()); } else { EXPECT_EQ(nullptr, getCommand()); } } HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenPreambleSentAndMediaSamplerRequirementChangedWhenFlushingTaskThenPipelineSelectIsSent) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = true; commandStreamReceiver.lastMediaSamplerConfig = 1; flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.commandStream, 0); EXPECT_NE(nullptr, getCommand()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenStateBaseAddressNotSentWhenFlushingTaskThenStateBaseAddressIsSent) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = true; commandStreamReceiver.setMediaVFEStateDirty(false); flushTask(commandStreamReceiver); EXPECT_GT(commandStreamReceiver.commandStream.getUsed(), 0u); parseCommands(commandStreamReceiver.commandStream, 0); auto stateBaseAddressItor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), stateBaseAddressItor); } HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenSizeChangedWhenFlushingTaskThenStateBaseAddressIsSent) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto dshSize = dsh.getMaxAvailableSpace(); auto iohSize = ioh.getMaxAvailableSpace(); auto sshSize = ssh.getMaxAvailableSpace(); dsh.replaceBuffer(dsh.getCpuBase(), 0); ioh.replaceBuffer(ioh.getCpuBase(), 0); ssh.replaceBuffer(ssh.getCpuBase(), 0); commandStreamReceiver.isPreambleSent = true; commandStreamReceiver.setMediaVFEStateDirty(false); configureCSRHeapStatesToNonDirty(); dsh.replaceBuffer(dsh.getCpuBase(), dshSize); ioh.replaceBuffer(ioh.getCpuBase(), iohSize); ssh.replaceBuffer(ssh.getCpuBase(), sshSize); flushTask(commandStreamReceiver); EXPECT_GT(commandStreamReceiver.commandStream.getUsed(), 0u); parseCommands(commandStreamReceiver.commandStream, 0); auto stateBaseAddressItor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), stateBaseAddressItor); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDshHeapChangeWhenFlushTaskIsCalledThenSbaIsReloaded) { bool deviceUsesDsh = pDevice->getHardwareInfo().capabilityTable.supportsImages; if (!deviceUsesDsh) { GTEST_SKIP(); } auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); configureCSRtoNonDirtyState(false); dsh.replaceBuffer(nullptr, 0); flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.commandStream, 0); auto stateBaseAddressItor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), stateBaseAddressItor); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenSshHeapChangeWhenFlushTaskIsCalledThenSbaIsReloaded) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); configureCSRtoNonDirtyState(false); ssh.replaceBuffer(nullptr, 0); flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.commandStream, 0); auto stateBaseAddressItor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), stateBaseAddressItor); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenIohHeapChangeWhenFlushTaskIsCalledThenSbaIsReloaded) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); configureCSRtoNonDirtyState(false); ioh.replaceBuffer(nullptr, 0); flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.commandStream, 0); auto stateBaseAddressItor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), stateBaseAddressItor); } HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenStateBaseAddressNotChangedWhenFlushingTaskThenStateBaseAddressIsNotSent) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = true; configureCSRHeapStatesToNonDirty(); flushTask(commandStreamReceiver); auto base = commandStreamReceiver.commandStream.getCpuBase(); auto stateBaseAddress = base ? genCmdCast(base) : nullptr; EXPECT_EQ(nullptr, stateBaseAddress); } HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEmptyCqsWhenFlushingTaskThenCommandNotAdded) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto usedBefore = commandStream.getUsed(); flushTask(commandStreamReceiver); EXPECT_EQ(usedBefore, commandStream.getUsed()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenBlockingWhenFlushingTaskThenPipeControlIsAdded) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto blocking = true; flushTask(commandStreamReceiver, blocking); parseCommands(commandStream, 0); auto itorPC = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itorPC); } HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenBlockingWithNoPreviousDependenciesWhenFlushingTaskThenTaskLevelIsIncremented) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); taskLevel = 5; commandStreamReceiver.taskLevel = 6; auto blocking = true; flushTask(commandStreamReceiver, blocking); EXPECT_EQ(7u, commandStreamReceiver.peekTaskLevel()); EXPECT_EQ(1u, commandStreamReceiver.peekTaskCount()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenNonBlockingWithNoPreviousDependenciesWhenFlushingTaskThenTaskLevelIsNotIncremented) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); taskLevel = 5; commandStreamReceiver.taskLevel = 6; auto blocking = false; flushTask(commandStreamReceiver, blocking); EXPECT_EQ(6u, commandStreamReceiver.peekTaskLevel()); EXPECT_EQ(1u, commandStreamReceiver.peekTaskCount()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEnoughMemoryOnlyForPreambleWhenFlushingTaskThenOnlyAvailableMemoryIsUsed) { typedef typename FamilyType::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START; typedef typename FamilyType::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END; hardwareInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 1; auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hardwareInfo, 0u)); auto &commandStreamReceiver = mockDevice->getUltCommandStreamReceiver(); commandStreamReceiver.timestampPacketWriteEnabled = false; // Force a PIPE_CONTROL through a taskLevel transition taskLevel = commandStreamReceiver.peekTaskLevel() + 1; commandStreamReceiver.streamProperties.stateComputeMode.isCoherencyRequired.value = 0; auto l3Config = PreambleHelper::getL3Config(pDevice->getHardwareInfo(), false); commandStreamReceiver.lastSentL3Config = l3Config; auto &csrCS = commandStreamReceiver.getCS(); size_t sizeNeededForPreamble = commandStreamReceiver.getRequiredCmdSizeForPreamble(*mockDevice); size_t sizeNeeded = commandStreamReceiver.getRequiredCmdStreamSize(flushTaskFlags, *mockDevice); sizeNeeded -= sizeof(MI_BATCH_BUFFER_START); // no task to submit sizeNeeded += sizeof(MI_BATCH_BUFFER_END); // no task to submit, add BBE to CSR stream sizeNeeded = alignUp(sizeNeeded, MemoryConstants::cacheLineSize); csrCS.getSpace(csrCS.getAvailableSpace() - sizeNeededForPreamble); commandStreamReceiver.streamProperties.stateComputeMode.setProperties(flushTaskFlags.requiresCoherency, flushTaskFlags.numGrfRequired, flushTaskFlags.threadArbitrationPolicy, *defaultHwInfo); flushTask(commandStreamReceiver); EXPECT_EQ(sizeNeeded, csrCS.getUsed()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEnoughMemoryOnlyForPreambleAndSbaWhenFlushingTaskThenOnlyAvailableMemoryIsUsed) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename FamilyType::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START; typedef typename FamilyType::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END; hardwareInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 1; auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hardwareInfo, 0u)); auto &commandStreamReceiver = mockDevice->getUltCommandStreamReceiver(); commandStreamReceiver.timestampPacketWriteEnabled = false; // Force a PIPE_CONTROL through a taskLevel transition taskLevel = commandStreamReceiver.peekTaskLevel() + 1; auto l3Config = PreambleHelper::getL3Config(mockDevice->getHardwareInfo(), false); commandStreamReceiver.lastSentL3Config = l3Config; auto &csrCS = commandStreamReceiver.getCS(); size_t sizeNeededForPreamble = commandStreamReceiver.getRequiredCmdSizeForPreamble(*mockDevice); size_t sizeNeededForStateBaseAddress = sizeof(STATE_BASE_ADDRESS) + sizeof(PIPE_CONTROL); size_t sizeNeeded = commandStreamReceiver.getRequiredCmdStreamSize(flushTaskFlags, *mockDevice); sizeNeeded -= sizeof(MI_BATCH_BUFFER_START); // no task to submit sizeNeeded += sizeof(MI_BATCH_BUFFER_END); // no task to submit, add BBE to CSR stream sizeNeeded = alignUp(sizeNeeded, MemoryConstants::cacheLineSize); csrCS.getSpace(csrCS.getAvailableSpace() - sizeNeededForPreamble - sizeNeededForStateBaseAddress); commandStreamReceiver.streamProperties.stateComputeMode.setProperties(flushTaskFlags.requiresCoherency, flushTaskFlags.numGrfRequired, flushTaskFlags.threadArbitrationPolicy, *defaultHwInfo); flushTask(commandStreamReceiver); EXPECT_EQ(sizeNeeded, csrCS.getUsed()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEnoughMemoryOnlyForPreambleAndSbaAndPipeControlWhenFlushingTaskThenOnlyAvailableMemoryIsUsed) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT; typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename FamilyType::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START; commandStream.getSpace(sizeof(PIPE_CONTROL)); hardwareInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 1; auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hardwareInfo, 0u)); auto &commandStreamReceiver = mockDevice->getUltCommandStreamReceiver(); commandStreamReceiver.timestampPacketWriteEnabled = false; // Force a PIPE_CONTROL through a taskLevel transition taskLevel = commandStreamReceiver.peekTaskLevel() + 1; commandStreamReceiver.streamProperties.stateComputeMode.isCoherencyRequired.value = 0; auto l3Config = PreambleHelper::getL3Config(mockDevice->getHardwareInfo(), false); commandStreamReceiver.lastSentL3Config = l3Config; auto &csrCS = commandStreamReceiver.getCS(); size_t sizeNeeded = commandStreamReceiver.getRequiredCmdStreamSizeAligned(flushTaskFlags, *mockDevice); csrCS.getSpace(csrCS.getAvailableSpace() - sizeNeeded); auto expectedBase = csrCS.getCpuBase(); // This case handles when we have *just* enough space auto expectedUsed = csrCS.getUsed() + sizeNeeded; flushTaskFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(mockDevice->getHardwareInfo()); commandStreamReceiver.streamProperties.stateComputeMode.setProperties(flushTaskFlags.requiresCoherency, flushTaskFlags.numGrfRequired, flushTaskFlags.threadArbitrationPolicy, *defaultHwInfo); commandStreamReceiver.flushTask( commandStream, 0, &dsh, &ioh, &ssh, taskLevel, flushTaskFlags, *mockDevice); // Verify that we didn't grab a new CS buffer EXPECT_EQ(expectedUsed, csrCS.getUsed()); EXPECT_EQ(expectedBase, csrCS.getCpuBase()); } template struct CommandStreamReceiverHwLog : public UltCommandStreamReceiver { CommandStreamReceiverHwLog(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield) : UltCommandStreamReceiver(executionEnvironment, rootDeviceIndex, deviceBitfield), flushCount(0) { } SubmissionStatus flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override { ++flushCount; return SubmissionStatus::SUCCESS; } int flushCount; }; HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenBothCsWhenFlushingTaskThenFlushOnce) { CommandStreamReceiverHwLog commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); commandStreamReceiver.setupContext(*pDevice->getDefaultEngine().osContext); commandStreamReceiver.initializeTagAllocation(); commandStreamReceiver.createPreemptionAllocation(); commandStream.getSpace(sizeof(typename FamilyType::MI_NOOP)); flushTask(commandStreamReceiver); EXPECT_EQ(1, commandStreamReceiver.flushCount); } HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenBothCsWhenFlushingTaskThenChainWithBatchBufferStart) { typedef typename FamilyType::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START; typedef typename FamilyType::MI_NOOP MI_NOOP; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); // Reserve space for 16 NOOPs commandStream.getSpace(16 * sizeof(MI_NOOP)); // Submit starting at 8 NOOPs size_t startOffset = 8 * sizeof(MI_NOOP); flushTask(commandStreamReceiver, false, startOffset); // Locate the MI_BATCH_BUFFER_START parseCommands(commandStreamReceiver.commandStream, 0); auto itorBBS = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itorBBS); auto bbs = genCmdCast(*itorBBS); ASSERT_NE(nullptr, bbs); // Expect to see address based on startOffset of task auto expectedAddress = static_cast(reinterpret_cast(ptrOffset(commandStream.getCpuBase(), startOffset))); EXPECT_EQ(expectedAddress, bbs->getBatchBufferStartAddress()); // MI_BATCH_BUFFER_START from UMD must be PPGTT for security reasons EXPECT_EQ(MI_BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR_PPGTT, bbs->getAddressSpaceIndicator()); } typedef Test CommandStreamReceiverCQFlushTaskTests; HWTEST_F(CommandStreamReceiverCQFlushTaskTests, WhenGettingCsThenReturnCsWithEnoughSize) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStreamReceiver = commandQueue.getGpgpuCommandStreamReceiver(); // NOTE: This test attempts to reserve the maximum amount // of memory such that if a client gets everything he wants // we don't overflow/corrupt memory when CSR appends its // work. size_t sizeCQReserves = CSRequirements::minCommandQueueCommandStreamSize; size_t sizeRequested = MemoryConstants::pageSize64k - sizeCQReserves; auto &commandStream = commandQueue.getCS(sizeRequested); auto expect = alignUp(sizeRequested + CSRequirements::csOverfetchSize, MemoryConstants::pageSize64k); ASSERT_GE(expect, commandStream.getMaxAvailableSpace()); EXPECT_GE(commandStream.getAvailableSpace(), sizeRequested); commandStream.getSpace(sizeRequested - sizeCQReserves); MockGraphicsAllocation allocation((void *)MemoryConstants::pageSize64k, 1); IndirectHeap linear(&allocation); auto blocking = true; DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.blocking = blocking; commandStreamReceiver.flushTask( commandStream, 0, &linear, &linear, &linear, 1, dispatchFlags, *pDevice); auto expectedSize = MemoryConstants::pageSize64k - sizeCQReserves; if (::renderCoreFamily == IGFX_GEN8_CORE) { expectedSize -= sizeof(typename FamilyType::PIPE_CONTROL); } expectedSize = alignUp(expectedSize, MemoryConstants::cacheLineSize); auto currentUsed = commandStream.getUsed(); EXPECT_EQ(0u, currentUsed % MemoryConstants::cacheLineSize); //depending on the size of commands we may need whole additional cacheline for alignment if (currentUsed != expectedSize) { EXPECT_EQ(expectedSize - MemoryConstants::cacheLineSize, currentUsed); } else { EXPECT_EQ(expectedSize, currentUsed); } } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, GivenBlockingWhenFlushingTaskThenAddPipeControlOnlyToTaskCs) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(commandStreamReceiver); // Configure the CSR to not need to submit any state or commands configureCSRtoNonDirtyState(false); // Force a PIPE_CONTROL through a blocking flag auto blocking = true; auto &commandStreamTask = commandQueue.getCS(1024); auto &commandStreamCSR = commandStreamReceiver->getCS(); commandStreamReceiver->streamProperties.stateComputeMode.isCoherencyRequired.value = 0; DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.blocking = blocking; dispatchFlags.guardCommandBufferWithPipeControl = true; commandStreamReceiver->flushTask( commandStreamTask, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); // Verify that taskCS got modified, while csrCS remained intact EXPECT_GT(commandStreamTask.getUsed(), 0u); EXPECT_EQ(0u, commandStreamCSR.getUsed()); // Parse command list to verify that PC got added to taskCS cmdList.clear(); parseCommands(commandStreamTask, 0); auto itorTaskCS = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itorTaskCS); // Parse command list to verify that PC wasn't added to csrCS cmdList.clear(); parseCommands(commandStreamCSR, 0); auto numberOfPC = getCommandsList().size(); EXPECT_EQ(0u, numberOfPC); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, GivenBlockingWhenFlushingTaskThenAddPipeControlWithDcFlush) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); configureCSRtoNonDirtyState(false); auto &commandStreamReceiver = commandQueue.getGpgpuCommandStreamReceiver(); size_t pipeControlCount = UltMemorySynchronizationCommands::getExpectedPipeControlCount(pDevice->getHardwareInfo()); auto &commandStreamTask = commandQueue.getCS(1024); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.blocking = true; dispatchFlags.dcFlush = true; dispatchFlags.guardCommandBufferWithPipeControl = true; commandStreamReceiver.flushTask( commandStreamTask, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); parseCommands(commandStreamTask, 0); auto itorPC = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itorPC); if (UnitTestHelper::isPipeControlWArequired(pDevice->getHardwareInfo())) { // Verify that the dcFlushEnabled bit is set in PC auto pCmdWA = reinterpret_cast(*itorPC); EXPECT_FALSE(pCmdWA->getDcFlushEnable()); if (pipeControlCount > 1) { // Search taskCS for PC to analyze itorPC = find(++itorPC, cmdList.end()); auto pipeControlTask = genCmdCast(*itorPC); ASSERT_NE(nullptr, pipeControlTask); // Verify that the dcFlushEnabled bit is not set in PC auto pCmd = reinterpret_cast(pipeControlTask); EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), pCmd->getDcFlushEnable()); } } else { auto pCmd = reinterpret_cast(*itorPC); EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), pCmd->getDcFlushEnable()); } } HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenBlockedKernelRequiringDCFlushWhenUnblockedThenDCFlushIsAdded) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; MockContext ctx(pClDevice); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.timestampPacketWriteEnabled = false; CommandQueueHw commandQueue(&ctx, pClDevice, 0, false); cl_event blockingEvent; MockEvent mockEvent(&ctx); blockingEvent = &mockEvent; size_t tempBuffer[] = {0, 1, 2}; size_t dstBuffer[] = {0, 1, 2}; cl_int retVal = 0; auto buffer = Buffer::create(&ctx, CL_MEM_USE_HOST_PTR, sizeof(tempBuffer), tempBuffer, retVal); auto &commandStreamCSR = commandStreamReceiver.getCS(); commandQueue.enqueueReadBuffer(buffer, CL_FALSE, 0, sizeof(tempBuffer), dstBuffer, nullptr, 1, &blockingEvent, 0); // Expect nothing was sent EXPECT_EQ(0u, commandStreamCSR.getUsed()); // Unblock Event mockEvent.setStatus(CL_COMPLETE); auto &commandStreamTask = *commandStreamReceiver.lastFlushedCommandStream; cmdList.clear(); // Parse command list parseCommands(commandStreamTask, 0); auto itorPC = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itorPC); if (UnitTestHelper::isPipeControlWArequired(pDevice->getHardwareInfo())) { itorPC++; itorPC = find(itorPC, cmdList.end()); EXPECT_NE(cmdList.end(), itorPC); } // Verify that the dcFlushEnabled bit is set in PC auto pCmdWA = reinterpret_cast(*itorPC); EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), pCmdWA->getDcFlushEnable()); buffer->release(); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDispatchFlagsWhenCallFlushTaskThenThreadArbitrationPolicyIsSetProperly) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.threadArbitrationPolicy = ThreadArbitrationPolicy::RoundRobin; mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); EXPECT_EQ(dispatchFlags.threadArbitrationPolicy, mockCsr->streamProperties.stateComputeMode.threadArbitrationPolicy.value); } class CommandStreamReceiverFlushTaskMemoryCompressionTests : public UltCommandStreamReceiverTest, public ::testing::WithParamInterface {}; HWTEST_P(CommandStreamReceiverFlushTaskMemoryCompressionTests, givenCsrWithMemoryCompressionStateNotApplicableWhenFlushTaskIsCalledThenUseLastMemoryCompressionState) { auto &mockCsr = pDevice->getUltCommandStreamReceiver(); CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.memoryCompressionState = MemoryCompressionState::NotApplicable; mockCsr.lastMemoryCompressionState = GetParam(); MemoryCompressionState lastMemoryCompressionState = mockCsr.lastMemoryCompressionState; mockCsr.flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); EXPECT_EQ(lastMemoryCompressionState, mockCsr.lastMemoryCompressionState); } HWTEST_P(CommandStreamReceiverFlushTaskMemoryCompressionTests, givenCsrWithMemoryCompressionStateApplicableWhenFlushTaskIsCalledThenUpdateLastMemoryCompressionState) { auto &mockCsr = pDevice->getUltCommandStreamReceiver(); CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.memoryCompressionState = GetParam(); if (dispatchFlags.memoryCompressionState == MemoryCompressionState::NotApplicable) { for (auto memoryCompressionState : {MemoryCompressionState::NotApplicable, MemoryCompressionState::Disabled, MemoryCompressionState::Enabled}) { mockCsr.lastMemoryCompressionState = memoryCompressionState; MemoryCompressionState lastMemoryCompressionState = mockCsr.lastMemoryCompressionState; mockCsr.flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); EXPECT_EQ(lastMemoryCompressionState, mockCsr.lastMemoryCompressionState); } } else { for (auto memoryCompressionState : {MemoryCompressionState::NotApplicable, MemoryCompressionState::Disabled, MemoryCompressionState::Enabled}) { mockCsr.lastMemoryCompressionState = memoryCompressionState; mockCsr.flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); EXPECT_EQ(dispatchFlags.memoryCompressionState, mockCsr.lastMemoryCompressionState); } } } INSTANTIATE_TEST_CASE_P( CommandStreamReceiverFlushTaskMemoryCompressionTestsValues, CommandStreamReceiverFlushTaskMemoryCompressionTests, testing::Values(MemoryCompressionState::NotApplicable, MemoryCompressionState::Disabled, MemoryCompressionState::Enabled)); command_stream_receiver_flush_task_2_tests.cpp000066400000000000000000002417241422164147700360220ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/command_stream/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/csr_definitions.h" #include "shared/source/command_stream/scratch_space_controller.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/state_base_address.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/dispatch_flags_helper.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/mocks/mock_gmm_page_table_mngr.h" #include "shared/test/common/mocks/mock_hw_helper.h" #include "shared/test/common/mocks/mock_submissions_aggregator.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h" #include "opencl/test/unit_test/helpers/raii_hw_helper.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "reg_configs_common.h" using namespace NEO; using CommandStreamReceiverFlushTaskTests = UltCommandStreamReceiverTest; HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenBlockedKernelNotRequiringDCFlushWhenUnblockedThenDCFlushIsNotAdded) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; MockContext ctx(pClDevice); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.timestampPacketWriteEnabled = false; CommandQueueHw commandQueue(&ctx, pClDevice, 0, false); cl_event blockingEvent; MockEvent mockEvent(&ctx); blockingEvent = &mockEvent; size_t tempBuffer[] = {0, 1, 2}; size_t dstBuffer[] = {0, 1, 2}; cl_int retVal = 0; auto buffer = Buffer::create(&ctx, CL_MEM_USE_HOST_PTR, sizeof(tempBuffer), tempBuffer, retVal); auto &commandStreamCSR = commandStreamReceiver.getCS(); commandQueue.enqueueWriteBuffer(buffer, CL_FALSE, 0, sizeof(tempBuffer), dstBuffer, nullptr, 1, &blockingEvent, 0); // Expect nothing was sent EXPECT_EQ(0u, commandStreamCSR.getUsed()); // Unblock Event mockEvent.setStatus(CL_COMPLETE); auto &commandStreamTask = *commandStreamReceiver.lastFlushedCommandStream; cmdList.clear(); // Parse command list parseCommands(commandStreamTask, 0); auto itorPC = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itorPC); if (UnitTestHelper::isPipeControlWArequired(pDevice->getHardwareInfo())) { itorPC++; itorPC = find(itorPC, cmdList.end()); EXPECT_NE(cmdList.end(), itorPC); } // Verify that the dcFlushEnabled bit is set in PC auto pCmdWA = reinterpret_cast(*itorPC); EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), pCmdWA->getDcFlushEnable()); buffer->release(); } HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEnableUpdateTaskFromWaitWhenNonBlockingCallIsMadeThenNoPipeControlInsertedOnDevicesWithoutDCFlushRequirements) { DebugManagerStateRestore restorer; DebugManager.flags.UpdateTaskCountFromWait.set(3u); typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; MockContext ctx(pClDevice); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.timestampPacketWriteEnabled = false; CommandQueueHw commandQueue(&ctx, pClDevice, 0, false); size_t tempBuffer[] = {0, 1, 2}; size_t dstBuffer[] = {0, 1, 2}; cl_int retVal = 0; auto buffer = Buffer::create(&ctx, CL_MEM_USE_HOST_PTR, sizeof(tempBuffer), tempBuffer, retVal); commandQueue.enqueueWriteBuffer(buffer, CL_FALSE, 0, sizeof(tempBuffer), dstBuffer, nullptr, 0u, nullptr, 0); auto &commandStreamTask = *commandStreamReceiver.lastFlushedCommandStream; cmdList.clear(); // Parse command list parseCommands(commandStreamTask, 0); auto itorPC = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(cmdList.end(), itorPC); buffer->release(); } HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenTaskCsPassedAsCommandStreamParamWhenFlushingTaskThenCompletionStampIsCorrect) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto &commandStreamTask = commandQueue.getCS(1024); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); // Pass taskCS as command stream parameter auto cs = commandStreamReceiver.flushTask( commandStreamTask, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); // Verify that flushTask returned a valid completion stamp EXPECT_EQ(commandStreamReceiver.peekTaskCount(), cs.taskCount); EXPECT_EQ(commandStreamReceiver.peekTaskLevel(), cs.taskLevel); } HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEmptyQueueWhenFinishingThenTaskCountIsNotIncremented) { MockContext ctx(pClDevice); MockCommandQueueHw mockCmdQueue(&ctx, pClDevice, nullptr); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); uint32_t taskCount = 0; taskLevel = taskCount; mockCmdQueue.taskCount = taskCount; mockCmdQueue.taskLevel = taskCount; commandStreamReceiver.taskLevel = taskCount; commandStreamReceiver.taskCount = taskCount; EXPECT_EQ(0u, commandStreamReceiver.peekLatestSentTaskCount()); mockCmdQueue.finish(); EXPECT_EQ(0u, commandStreamReceiver.peekLatestSentTaskCount()); mockCmdQueue.finish(); //nothings sent to the HW, no need to bump tags EXPECT_EQ(0u, commandStreamReceiver.peekLatestSentTaskCount()); EXPECT_EQ(0u, mockCmdQueue.latestTaskCountWaited); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenTaskCountToWaitBiggerThanLatestSentTaskCountWhenWaitForCompletionThenFlushPipeControl) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; DebugManagerStateRestore restorer; DebugManager.flags.UpdateTaskCountFromWait.set(3); auto &csr = pDevice->getUltCommandStreamReceiver(); csr.waitForCompletionWithTimeout(false, 0, 1u); auto &commandStreamTask = csr.getCS(); parseCommands(commandStreamTask, 0); auto itorPC = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itorPC); } HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenNonDcFlushWithInitialTaskCountZeroWhenFinishingThenTaskCountIncremented) { MockContext ctx(pClDevice); MockKernelWithInternals kernel(*pClDevice); MockCommandQueueHw mockCmdQueue(&ctx, pClDevice, nullptr); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); size_t GWS = 1; uint32_t taskCount = 0; taskLevel = taskCount; mockCmdQueue.taskCount = taskCount; mockCmdQueue.taskLevel = taskCount; commandStreamReceiver.taskLevel = taskCount; commandStreamReceiver.taskCount = taskCount; EXPECT_EQ(0u, commandStreamReceiver.peekLatestSentTaskCount()); // finish after enqueued kernel(cmdq task count = 1) mockCmdQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr); mockCmdQueue.finish(); EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount()); EXPECT_EQ(1u, mockCmdQueue.latestTaskCountWaited); EXPECT_EQ(1u, commandStreamReceiver.peekTaskCount()); // finish again - dont call flush task mockCmdQueue.finish(); EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount()); EXPECT_EQ(1u, mockCmdQueue.latestTaskCountWaited); EXPECT_EQ(1u, commandStreamReceiver.peekTaskCount()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenDcFlushWhenFinishingThenTaskCountIncremented) { MockContext ctx(pClDevice); MockKernelWithInternals kernel(*pClDevice); MockCommandQueueHw mockCmdQueue(&ctx, pClDevice, nullptr); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); size_t GWS = 1; size_t tempBuffer[] = {0, 1, 2}; cl_int retVal; auto buffer = Buffer::create(&ctx, CL_MEM_USE_HOST_PTR, sizeof(tempBuffer), tempBuffer, retVal); EXPECT_EQ(retVal, CL_SUCCESS); uint32_t taskCount = 0; taskLevel = taskCount; mockCmdQueue.taskCount = taskCount; mockCmdQueue.taskLevel = taskCount; commandStreamReceiver.taskLevel = taskCount; commandStreamReceiver.taskCount = taskCount; EXPECT_EQ(0u, commandStreamReceiver.peekLatestSentTaskCount()); // finish(dcFlush=true) from blocking MapBuffer after enqueued kernel mockCmdQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr); EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount()); auto ptr = mockCmdQueue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_READ, 0, sizeof(tempBuffer), 0, nullptr, nullptr, retVal); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount()); // cmdQ task count = 2, finish again mockCmdQueue.finish(); EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount()); // finish again - dont flush task again mockCmdQueue.finish(); EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount()); // finish(dcFlush=true) from MapBuffer again - dont call FinishTask n finished queue retVal = mockCmdQueue.enqueueUnmapMemObject(buffer, ptr, 0, nullptr, nullptr); EXPECT_EQ(retVal, CL_SUCCESS); ptr = mockCmdQueue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_READ, 0, sizeof(tempBuffer), 0, nullptr, nullptr, retVal); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount()); //cleanup retVal = mockCmdQueue.enqueueUnmapMemObject(buffer, ptr, 0, nullptr, nullptr); EXPECT_EQ(retVal, CL_SUCCESS); retVal = clReleaseMemObject(buffer); EXPECT_EQ(retVal, CL_SUCCESS); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenPowerOfTwoGlobalWorkSizeAndNullLocalWorkgroupSizeWhenEnqueueKernelIsCalledThenGpGpuWalkerHasOptimalSIMDmask) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; MockContext ctx(pClDevice); MockKernelWithInternals kernel(*pClDevice); size_t GWS = 1024; CommandQueueHw commandQueue(&ctx, pClDevice, 0, false); commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr); auto &commandStreamTask = commandQueue.getCS(1024); parseCommands(commandStreamTask, 0); auto itorCmd = find(cmdList.begin(), cmdList.end()); ASSERT_NE(itorCmd, cmdList.end()); auto cmdGpGpuWalker = genCmdCast(*itorCmd); //execution masks should be all active EXPECT_EQ(0xffffffffu, cmdGpGpuWalker->getBottomExecutionMask()); EXPECT_EQ(0xffffffffu, cmdGpGpuWalker->getRightExecutionMask()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEventIsQueriedWhenEnqueuingThenTaskCountIncremented) { MockContext ctx(pClDevice); CommandQueueHw commandQueue(&ctx, pClDevice, 0, false); cl_event event = nullptr; Event *pEvent; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); size_t tempBuffer[] = {0, 1, 2}; size_t dstBuffer[] = {5, 5, 5}; cl_int retVal; auto buffer = Buffer::create(&ctx, CL_MEM_USE_HOST_PTR, sizeof(tempBuffer), tempBuffer, retVal); EXPECT_EQ(retVal, CL_SUCCESS); uint32_t taskCount = 0; taskLevel = taskCount; commandQueue.taskCount = taskCount; commandQueue.taskLevel = taskCount; commandStreamReceiver.taskLevel = taskCount; commandStreamReceiver.taskCount = taskCount; EXPECT_EQ(0u, commandStreamReceiver.peekLatestSentTaskCount()); commandQueue.enqueueReadBuffer(buffer, CL_FALSE, 0, sizeof(tempBuffer), dstBuffer, nullptr, 0, 0, &event); EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount()); pEvent = (Event *)event; retVal = Event::waitForEvents(1, &event); EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount()); retVal = clReleaseEvent(pEvent); retVal = clReleaseMemObject(buffer); } HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenNonBlockingMapEnqueueWhenFinishingThenNothingIsSubmittedToTheHardware) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; MockContext ctx(pClDevice); CommandQueueHw commandQueue(&ctx, pClDevice, 0, false); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); size_t tempBuffer[] = {0, 1, 2}; cl_int retVal; auto cpuAllocation = std::make_unique(MemoryConstants::pageSize); MockGraphicsAllocation allocation{cpuAllocation.get(), MemoryConstants::pageSize}; AlignedBuffer mockBuffer{&ctx, &allocation}; uint32_t taskCount = 0; taskLevel = taskCount; commandQueue.taskCount = taskCount; commandQueue.taskLevel = taskCount; commandStreamReceiver.taskLevel = taskCount; commandStreamReceiver.taskCount = taskCount; EXPECT_EQ(0u, commandStreamReceiver.peekLatestSentTaskCount()); auto ptr = commandQueue.enqueueMapBuffer(&mockBuffer, CL_FALSE, CL_MAP_READ, 0, sizeof(tempBuffer), 0, nullptr, nullptr, retVal); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_NE(nullptr, ptr); EXPECT_EQ(0u, commandStreamReceiver.peekLatestSentTaskCount()); commandQueue.finish(); EXPECT_EQ(0u, commandStreamReceiver.peekLatestSentTaskCount()); auto &commandStreamTask = commandQueue.getCS(1024); parseCommands(commandStreamTask, 0); auto itorPC = find(cmdList.begin(), cmdList.end()); ASSERT_EQ(cmdList.end(), itorPC); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, GivenFlushedCallRequiringDCFlushWhenBlockingEnqueueIsCalledThenPipeControlWithDCFlushIsAdded) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; MockContext ctx(pClDevice); MockCommandQueueHw mockCmdQueue(&ctx, pClDevice, nullptr); cl_event event = nullptr; auto &commandStreamReceiver = pDevice->getGpgpuCommandStreamReceiver(); auto &commandStreamTask = mockCmdQueue.getCS(1024); size_t tempBuffer[] = {0, 1, 2}; size_t dstBuffer[] = {5, 5, 5}; cl_int retVal; auto buffer = Buffer::create(&ctx, CL_MEM_USE_HOST_PTR, sizeof(tempBuffer), tempBuffer, retVal); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(0u, commandStreamReceiver.peekLatestSentTaskCount()); // Call requiring DCFlush, nonblocking buffer->forceDisallowCPUCopy = true; mockCmdQueue.enqueueReadBuffer(buffer, CL_FALSE, 0, sizeof(tempBuffer), dstBuffer, nullptr, 0, 0, 0); EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount()); mockCmdQueue.enqueueReadBuffer(buffer, CL_TRUE, 0, sizeof(tempBuffer), dstBuffer, nullptr, 0, 0, &event); EXPECT_EQ(2u, commandStreamReceiver.peekLatestSentTaskCount()); EXPECT_EQ(2u, mockCmdQueue.latestTaskCountWaited); // Parse command list to verify that PC was added to taskCS cmdList.clear(); parseCommands(commandStreamTask, 0); auto itorWalker = find(cmdList.begin(), cmdList.end()); auto itorCmd = find(itorWalker, cmdList.end()); ASSERT_NE(cmdList.end(), itorCmd); auto cmdPC = genCmdCast(*itorCmd); ASSERT_NE(nullptr, cmdPC); if (UnitTestHelper::isPipeControlWArequired(pDevice->getHardwareInfo())) { // SKL: two PIPE_CONTROLs following GPGPU_WALKER: first has DcFlush and second has Write HwTag EXPECT_FALSE(cmdPC->getDcFlushEnable()); auto itorCmdP = ++((GenCmdList::iterator)itorCmd); EXPECT_NE(cmdList.end(), itorCmdP); auto itorCmd2 = find(itorCmdP, cmdList.end()); cmdPC = (PIPE_CONTROL *)*itorCmd2; EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), cmdPC->getDcFlushEnable()); } else { // single PIPE_CONTROL following GPGPU_WALKER has DcFlush and Write HwTag EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), cmdPC->getDcFlushEnable()); } retVal = clReleaseEvent(event); retVal = clReleaseMemObject(buffer); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDefaultCommandStreamReceiverThenRoundRobinPolicyIsSelected) { auto pCommandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(pCommandStreamReceiver); EXPECT_EQ(ThreadArbitrationPolicy::NotPresent, pCommandStreamReceiver->peekThreadArbitrationPolicy()); flushTask(*pCommandStreamReceiver); EXPECT_EQ(HwHelperHw::get().getDefaultThreadArbitrationPolicy(), pCommandStreamReceiver->peekThreadArbitrationPolicy()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenKernelWithSlmWhenPreviousSLML3WasSentThenDontProgramL3) { typedef typename FamilyType::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; size_t GWS = 1; MockContext ctx(pClDevice); MockKernelWithInternals kernel(*pClDevice); CommandQueueHw commandQueue(&ctx, pClDevice, 0, false); auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(commandStreamReceiver); auto &commandStreamCSR = commandStreamReceiver->getCS(); uint32_t L3Config = PreambleHelper::getL3Config(*defaultHwInfo, true); // Mark Pramble as sent, override L3Config to SLM config commandStreamReceiver->isPreambleSent = true; commandStreamReceiver->lastSentL3Config = L3Config; ((MockKernel *)kernel)->setTotalSLMSize(1024); cmdList.clear(); commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr); // Parse command list to verify that PC was added to taskCS parseCommands(commandStreamCSR, 0); auto itorCmd = findMmio(cmdList.begin(), cmdList.end(), L3CNTLRegisterOffset::registerOffset); EXPECT_EQ(cmdList.end(), itorCmd); } HWTEST_F(CommandStreamReceiverFlushTaskTests, WhenCreatingCommandStreamReceiverHwThenValidPointerIsReturned) { DebugManagerStateRestore dbgRestorer; auto csrHw = CommandStreamReceiverHw::create(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); EXPECT_NE(nullptr, csrHw); GmmPageTableMngr *ptm = csrHw->createPageTableManager(); EXPECT_EQ(nullptr, ptm); delete csrHw; DebugManager.flags.SetCommandStreamReceiver.set(0); int32_t GetCsr = DebugManager.flags.SetCommandStreamReceiver.get(); EXPECT_EQ(0, GetCsr); auto csr = NEO::createCommandStream(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); EXPECT_NE(nullptr, csr); delete csr; DebugManager.flags.SetCommandStreamReceiver.set(0); } HWTEST_F(CommandStreamReceiverFlushTaskTests, WhenFlushingThenScratchAllocationIsReused) { auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(commandStreamReceiver); commandStreamReceiver->setRequiredScratchSizes(1024, 0); // whatever > 0 flushTask(*commandStreamReceiver); auto tagAllocation = commandStreamReceiver->getTagAllocation(); auto scratchAllocation = commandStreamReceiver->getScratchAllocation(); ASSERT_NE(tagAllocation, nullptr); ASSERT_NE(scratchAllocation, nullptr); EXPECT_TRUE(commandStreamReceiver->isMadeResident(tagAllocation)); EXPECT_TRUE(commandStreamReceiver->isMadeResident(scratchAllocation)); EXPECT_TRUE(commandStreamReceiver->isMadeNonResident(tagAllocation)); EXPECT_TRUE(commandStreamReceiver->isMadeNonResident(scratchAllocation)); // call makeResident on tag and scratch allocations per each flush // DONT skip residency calls when scratch allocation is the same(new required size <= previous size) commandStreamReceiver->madeResidentGfxAllocations.clear(); // this is only history - we can clean this commandStreamReceiver->madeNonResidentGfxAllocations.clear(); flushTask(*commandStreamReceiver); // 2nd flush auto newScratchAllocation = commandStreamReceiver->getScratchAllocation(); EXPECT_EQ(scratchAllocation, newScratchAllocation); // Allocation unchanged. Dont skip residency handling EXPECT_TRUE(commandStreamReceiver->isMadeResident(tagAllocation)); EXPECT_TRUE(commandStreamReceiver->isMadeResident(scratchAllocation)); EXPECT_TRUE(commandStreamReceiver->isMadeNonResident(tagAllocation)); EXPECT_TRUE(commandStreamReceiver->isMadeNonResident(scratchAllocation)); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCommandStreamReceiverWhenFenceAllocationIsRequiredAndFlushTaskIsCalledThenFenceAllocationIsMadeResident) { RAIIHwHelperFactory> hwHelperBackup{pDevice->getHardwareInfo().platform.eRenderCoreFamily}; auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(commandStreamReceiver); auto globalFenceAllocation = commandStreamReceiver->globalFenceAllocation; ASSERT_NE(globalFenceAllocation, nullptr); EXPECT_FALSE(commandStreamReceiver->isMadeResident(globalFenceAllocation)); EXPECT_FALSE(commandStreamReceiver->isMadeNonResident(globalFenceAllocation)); flushTask(*commandStreamReceiver); EXPECT_TRUE(commandStreamReceiver->isMadeResident(globalFenceAllocation)); EXPECT_TRUE(commandStreamReceiver->isMadeNonResident(globalFenceAllocation)); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCommandStreamReceiverWhenFenceAllocationIsRequiredAndCreatedThenItIsMadeResidentDuringFlushSmallTask) { RAIIHwHelperFactory> hwHelperBackup{pDevice->getHardwareInfo().platform.eRenderCoreFamily}; MockCsrHw csr(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); csr.setupContext(*pDevice->getDefaultEngine().osContext); EXPECT_EQ(nullptr, csr.globalFenceAllocation); EXPECT_TRUE(csr.createGlobalFenceAllocation()); EXPECT_FALSE(csr.isMadeResident(csr.globalFenceAllocation)); EXPECT_FALSE(csr.isMadeNonResident(csr.globalFenceAllocation)); flushSmallTask(csr); EXPECT_TRUE(csr.isMadeResident(csr.globalFenceAllocation)); EXPECT_TRUE(csr.isMadeNonResident(csr.globalFenceAllocation)); ASSERT_NE(nullptr, csr.globalFenceAllocation); EXPECT_EQ(AllocationType::GLOBAL_FENCE, csr.globalFenceAllocation->getAllocationType()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCommandStreamReceiverWhenFenceAllocationIsRequiredButNotCreatedThenItIsNotMadeResidentDuringFlushSmallTask) { RAIIHwHelperFactory> hwHelperBackup{pDevice->getHardwareInfo().platform.eRenderCoreFamily}; MockCsrHw csr(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); csr.setupContext(*pDevice->getDefaultEngine().osContext); EXPECT_EQ(nullptr, csr.globalFenceAllocation); flushSmallTask(csr); ASSERT_EQ(nullptr, csr.globalFenceAllocation); } struct MockScratchController : public ScratchSpaceController { using ScratchSpaceController::privateScratchAllocation; using ScratchSpaceController::scratchAllocation; using ScratchSpaceController::ScratchSpaceController; void setRequiredScratchSpace(void *sshBaseAddress, uint32_t scratchSlot, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, uint32_t currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty) override { if (requiredPerThreadScratchSize > scratchSizeBytes) { scratchSizeBytes = requiredPerThreadScratchSize; scratchAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, requiredPerThreadScratchSize}); } if (requiredPerThreadPrivateScratchSize > privateScratchSizeBytes) { privateScratchSizeBytes = requiredPerThreadPrivateScratchSize; privateScratchAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, requiredPerThreadPrivateScratchSize}); } } uint64_t calculateNewGSH() override { return 0u; }; uint64_t getScratchPatchAddress() override { return 0u; }; void programHeaps(HeapContainer &heapContainer, uint32_t scratchSlot, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, uint32_t currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty) override { } void programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, uint32_t currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty, NEO::CommandStreamReceiver *csr) override { } void reserveHeap(IndirectHeap::Type heapType, IndirectHeap *&indirectHeap) override{}; }; HWTEST_F(CommandStreamReceiverFlushTaskTests, whenScratchIsRequiredForFirstFlushAndPrivateScratchForSecondFlushThenHandleResidencyProperly) { auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); auto scratchController = new MockScratchController(pDevice->getRootDeviceIndex(), *pDevice->executionEnvironment, *commandStreamReceiver->getInternalAllocationStorage()); commandStreamReceiver->scratchSpaceController.reset(scratchController); pDevice->resetCommandStreamReceiver(commandStreamReceiver); commandStreamReceiver->setRequiredScratchSizes(1024, 0); flushTask(*commandStreamReceiver); EXPECT_NE(nullptr, scratchController->scratchAllocation); EXPECT_EQ(nullptr, scratchController->privateScratchAllocation); auto scratchAllocation = scratchController->scratchAllocation; EXPECT_TRUE(commandStreamReceiver->isMadeResident(scratchAllocation)); EXPECT_TRUE(commandStreamReceiver->isMadeNonResident(scratchAllocation)); commandStreamReceiver->madeResidentGfxAllocations.clear(); // this is only history - we can clean this commandStreamReceiver->madeNonResidentGfxAllocations.clear(); commandStreamReceiver->setRequiredScratchSizes(0, 1024); flushTask(*commandStreamReceiver); // 2nd flush EXPECT_NE(nullptr, scratchController->scratchAllocation); EXPECT_NE(nullptr, scratchController->privateScratchAllocation); auto privateScratchAllocation = scratchController->privateScratchAllocation; EXPECT_TRUE(commandStreamReceiver->isMadeResident(scratchAllocation)); EXPECT_TRUE(commandStreamReceiver->isMadeNonResident(scratchAllocation)); EXPECT_TRUE(commandStreamReceiver->isMadeResident(privateScratchAllocation)); EXPECT_TRUE(commandStreamReceiver->isMadeNonResident(privateScratchAllocation)); } HWTEST_F(CommandStreamReceiverFlushTaskTests, whenPrivateScratchIsRequiredForFirstFlushAndCommonScratchForSecondFlushThenHandleResidencyProperly) { auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); auto scratchController = new MockScratchController(pDevice->getRootDeviceIndex(), *pDevice->executionEnvironment, *commandStreamReceiver->getInternalAllocationStorage()); commandStreamReceiver->scratchSpaceController.reset(scratchController); pDevice->resetCommandStreamReceiver(commandStreamReceiver); commandStreamReceiver->setRequiredScratchSizes(0, 1024); flushTask(*commandStreamReceiver); EXPECT_EQ(nullptr, scratchController->scratchAllocation); EXPECT_NE(nullptr, scratchController->privateScratchAllocation); auto privateScratchAllocation = scratchController->privateScratchAllocation; EXPECT_TRUE(commandStreamReceiver->isMadeResident(privateScratchAllocation)); EXPECT_TRUE(commandStreamReceiver->isMadeNonResident(privateScratchAllocation)); commandStreamReceiver->madeResidentGfxAllocations.clear(); // this is only history - we can clean this commandStreamReceiver->madeNonResidentGfxAllocations.clear(); commandStreamReceiver->setRequiredScratchSizes(1024, 0); flushTask(*commandStreamReceiver); // 2nd flush EXPECT_NE(nullptr, scratchController->scratchAllocation); EXPECT_NE(nullptr, scratchController->privateScratchAllocation); auto scratchAllocation = scratchController->scratchAllocation; EXPECT_TRUE(commandStreamReceiver->isMadeResident(scratchAllocation)); EXPECT_TRUE(commandStreamReceiver->isMadeNonResident(scratchAllocation)); EXPECT_TRUE(commandStreamReceiver->isMadeResident(privateScratchAllocation)); EXPECT_TRUE(commandStreamReceiver->isMadeNonResident(privateScratchAllocation)); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenTwoConsecutiveNdRangeKernelsThenStateBaseAddressIsProgrammedOnceAndScratchAddressInMediaVfeStateIsProgrammedTwiceBothWithCorrectAddress) { typedef typename FamilyType::PARSE PARSE; typedef typename PARSE::MEDIA_VFE_STATE MEDIA_VFE_STATE; typedef typename PARSE::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; MockContext ctx(pClDevice); MockKernelWithInternals kernel(*pClDevice); CommandQueueHw commandQueue(&ctx, pClDevice, 0, false); auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(commandStreamReceiver); size_t GWS = 1; uint32_t scratchSize = 1024; kernel.kernelInfo.setPerThreadScratchSize(scratchSize, 0); EXPECT_EQ(false, kernel.mockKernel->isBuiltIn); auto deviceInfo = pClDevice->getDeviceInfo(); auto sharedDeviceInfo = pDevice->getDeviceInfo(); if (sharedDeviceInfo.force32BitAddressess) { EXPECT_FALSE(commandStreamReceiver->getGSBAFor32BitProgrammed()); } commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr); if (sharedDeviceInfo.force32BitAddressess) { EXPECT_TRUE(commandStreamReceiver->getGSBAFor32BitProgrammed()); } cmdList.clear(); // Parse command list parseCommands(commandQueue); // All state should be programmed before walker auto itorCmdForStateBase = itorStateBaseAddress; auto *mediaVfeState = (MEDIA_VFE_STATE *)*itorMediaVfeState; auto graphicsAllocationScratch = commandStreamReceiver->getScratchAllocation(); ASSERT_NE(itorCmdForStateBase, itorWalker); auto *sba = (STATE_BASE_ADDRESS *)*itorCmdForStateBase; auto GSHaddress = (uintptr_t)sba->getGeneralStateBaseAddress(); uint64_t graphicsAddress = 0; // Get address ( offset in 32 bit addressing ) of sratch graphicsAddress = (uint64_t)graphicsAllocationScratch->getGpuAddressToPatch(); if (sharedDeviceInfo.force32BitAddressess && is64bit) { EXPECT_TRUE(graphicsAllocationScratch->is32BitAllocation()); EXPECT_EQ(GmmHelper::decanonize(graphicsAllocationScratch->getGpuAddress()) - GSHaddress, graphicsAddress); } else if (!deviceInfo.svmCapabilities && is64bit) { EXPECT_EQ(ScratchSpaceConstants::scratchSpaceOffsetFor64Bit, mediaVfeState->getScratchSpaceBasePointer()); EXPECT_EQ(GSHaddress + ScratchSpaceConstants::scratchSpaceOffsetFor64Bit, graphicsAllocationScratch->getGpuAddressToPatch()); } else { EXPECT_EQ((uint64_t)graphicsAllocationScratch->getUnderlyingBuffer(), graphicsAddress); } uint64_t lowPartGraphicsAddress = (uint64_t)(graphicsAddress & 0xffffffff); uint64_t highPartGraphicsAddress = (uint64_t)((graphicsAddress >> 32) & 0xffffffff); uint64_t scratchBaseLowPart = (uint64_t)mediaVfeState->getScratchSpaceBasePointer(); uint64_t scratchBaseHighPart = (uint64_t)mediaVfeState->getScratchSpaceBasePointerHigh(); if (is64bit && !sharedDeviceInfo.force32BitAddressess) { uint64_t expectedAddress = ScratchSpaceConstants::scratchSpaceOffsetFor64Bit; EXPECT_EQ(expectedAddress, scratchBaseLowPart); EXPECT_EQ(0u, scratchBaseHighPart); } else { EXPECT_EQ(lowPartGraphicsAddress, scratchBaseLowPart); EXPECT_EQ(highPartGraphicsAddress, scratchBaseHighPart); } if (sharedDeviceInfo.force32BitAddressess) { EXPECT_EQ(pDevice->getMemoryManager()->getExternalHeapBaseAddress(graphicsAllocationScratch->getRootDeviceIndex(), false), GSHaddress); } else { if constexpr (is64bit) { EXPECT_EQ(graphicsAddress - ScratchSpaceConstants::scratchSpaceOffsetFor64Bit, GSHaddress); } else { EXPECT_EQ(0u, GSHaddress); } } //now re-try to see if SBA is not programmed scratchSize *= 2; kernel.kernelInfo.setPerThreadScratchSize(scratchSize, 0); commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr); // Parse command list parseCommands(commandQueue); itorCmdForStateBase = find(itorWalker, cmdList.end()); // In 32 Bit addressing sba shouldn't be reprogrammed if (sharedDeviceInfo.force32BitAddressess == true) { EXPECT_EQ(itorCmdForStateBase, cmdList.end()); } auto itorMediaVfeStateSecond = find(itorWalker, cmdList.end()); auto *cmdMediaVfeStateSecond = (MEDIA_VFE_STATE *)*itorMediaVfeStateSecond; EXPECT_NE(mediaVfeState, cmdMediaVfeStateSecond); uint64_t oldScratchAddr = ((uint64_t)scratchBaseHighPart << 32u) | scratchBaseLowPart; uint64_t newScratchAddr = ((uint64_t)cmdMediaVfeStateSecond->getScratchSpaceBasePointerHigh() << 32u) | cmdMediaVfeStateSecond->getScratchSpaceBasePointer(); if (sharedDeviceInfo.force32BitAddressess == true) { EXPECT_NE(oldScratchAddr, newScratchAddr); } } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenNdRangeKernelAndReadBufferStateBaseAddressAndScratchAddressInMediaVfeStateThenProgrammingIsCorrect) { typedef typename FamilyType::PARSE PARSE; typedef typename PARSE::MEDIA_VFE_STATE MEDIA_VFE_STATE; typedef typename PARSE::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; MockContext ctx(pClDevice); MockKernelWithInternals kernel(*pClDevice); CommandQueueHw commandQueue(&ctx, pClDevice, 0, false); auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(commandStreamReceiver); size_t GWS = 1; uint32_t scratchSize = 1024; kernel.kernelInfo.setPerThreadScratchSize(scratchSize, 0); EXPECT_EQ(false, kernel.mockKernel->isBuiltIn); auto deviceInfo = pClDevice->getDeviceInfo(); auto sharedDeviceInfo = pDevice->getDeviceInfo(); if (sharedDeviceInfo.force32BitAddressess) { EXPECT_FALSE(commandStreamReceiver->getGSBAFor32BitProgrammed()); } commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr); if (sharedDeviceInfo.force32BitAddressess) { EXPECT_TRUE(commandStreamReceiver->getGSBAFor32BitProgrammed()); } cmdList.clear(); // Parse command list parseCommands(commandQueue); // All state should be programmed before walker auto itorCmdForStateBase = itorStateBaseAddress; auto *mediaVfeState = (MEDIA_VFE_STATE *)*itorMediaVfeState; auto graphicsAllocationScratch = commandStreamReceiver->getScratchAllocation(); ASSERT_NE(itorCmdForStateBase, itorWalker); auto *sba = (STATE_BASE_ADDRESS *)*itorCmdForStateBase; auto GSHaddress = (uintptr_t)sba->getGeneralStateBaseAddress(); uint64_t graphicsAddress = 0; // Get address ( offset in 32 bit addressing ) of sratch graphicsAddress = (uint64_t)graphicsAllocationScratch->getGpuAddressToPatch(); if (sharedDeviceInfo.force32BitAddressess && is64bit) { EXPECT_TRUE(graphicsAllocationScratch->is32BitAllocation()); EXPECT_EQ(GmmHelper::decanonize(graphicsAllocationScratch->getGpuAddress()) - GSHaddress, graphicsAddress); } else if (!deviceInfo.svmCapabilities && is64bit) { EXPECT_EQ(ScratchSpaceConstants::scratchSpaceOffsetFor64Bit, mediaVfeState->getScratchSpaceBasePointer()); EXPECT_EQ(GSHaddress + ScratchSpaceConstants::scratchSpaceOffsetFor64Bit, graphicsAllocationScratch->getGpuAddressToPatch()); } else { EXPECT_EQ((uint64_t)graphicsAllocationScratch->getUnderlyingBuffer(), graphicsAddress); } uint64_t lowPartGraphicsAddress = (uint64_t)(graphicsAddress & 0xffffffff); uint64_t highPartGraphicsAddress = (uint64_t)((graphicsAddress >> 32) & 0xffffffff); uint64_t scratchBaseLowPart = (uint64_t)mediaVfeState->getScratchSpaceBasePointer(); uint64_t scratchBaseHighPart = (uint64_t)mediaVfeState->getScratchSpaceBasePointerHigh(); if (is64bit && !sharedDeviceInfo.force32BitAddressess) { lowPartGraphicsAddress = ScratchSpaceConstants::scratchSpaceOffsetFor64Bit; highPartGraphicsAddress = 0u; } EXPECT_EQ(lowPartGraphicsAddress, scratchBaseLowPart); EXPECT_EQ(highPartGraphicsAddress, scratchBaseHighPart); if (sharedDeviceInfo.force32BitAddressess) { EXPECT_EQ(pDevice->getMemoryManager()->getExternalHeapBaseAddress(graphicsAllocationScratch->getRootDeviceIndex(), false), GSHaddress); } else { if constexpr (is64bit) { EXPECT_EQ(graphicsAddress - ScratchSpaceConstants::scratchSpaceOffsetFor64Bit, GSHaddress); } else { EXPECT_EQ(0u, GSHaddress); } } size_t tempBuffer[] = {0, 1, 2}; size_t dstBuffer[] = {0, 0, 0}; cl_int retVal = 0; auto buffer = Buffer::create(&ctx, CL_MEM_USE_HOST_PTR, sizeof(tempBuffer), tempBuffer, retVal); commandQueue.enqueueReadBuffer(buffer, CL_FALSE, 0, sizeof(tempBuffer), dstBuffer, nullptr, 0, 0, 0); // Parse command list parseCommands(commandQueue); itorCmdForStateBase = find(itorWalker, cmdList.end()); if (sharedDeviceInfo.force32BitAddressess) { EXPECT_NE(itorWalker, itorCmdForStateBase); if (itorCmdForStateBase != cmdList.end()) { auto *sba2 = (STATE_BASE_ADDRESS *)*itorCmdForStateBase; auto GSHaddress2 = (uintptr_t)sba2->getGeneralStateBaseAddress(); EXPECT_NE(sba, sba2); EXPECT_EQ(0u, GSHaddress2); if (sharedDeviceInfo.force32BitAddressess) { EXPECT_FALSE(commandStreamReceiver->getGSBAFor32BitProgrammed()); } } } delete buffer; if (sharedDeviceInfo.force32BitAddressess) { // Asserts placed after restoring old CSR to avoid heap corruption ASSERT_NE(itorCmdForStateBase, cmdList.end()); } } HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenForced32BitAllocationsModeStore32bitWhenFlushingTaskThenScratchAllocationIsNotReused) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.Force32bitAddressing.set(true); auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->getMemoryManager()->setForce32BitAllocations(true); pDevice->resetCommandStreamReceiver(commandStreamReceiver); commandStreamReceiver->setRequiredScratchSizes(4096, 0); // whatever > 0 (in page size) flushTask(*commandStreamReceiver); auto scratchAllocation = commandStreamReceiver->getScratchAllocation(); ASSERT_NE(scratchAllocation, nullptr); commandStreamReceiver->setRequiredScratchSizes(8196, 0); // whatever > first size flushTask(*commandStreamReceiver); // 2nd flush auto newScratchAllocation = commandStreamReceiver->getScratchAllocation(); EXPECT_NE(scratchAllocation, newScratchAllocation); // Allocation changed std::unique_ptr allocationReusable = commandStreamReceiver->getInternalAllocationStorage()->obtainReusableAllocation(4096, AllocationType::LINEAR_STREAM); if (allocationReusable.get() != nullptr) { if constexpr (is64bit) { EXPECT_NE(scratchAllocation, allocationReusable.get()); } pDevice->getMemoryManager()->freeGraphicsMemory(allocationReusable.release()); } } HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenForced32BitAllocationsModeStore32bitWhenFlushingTaskThenScratchAllocationStoredOnTemporaryAllocationList) { if constexpr (is64bit) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.Force32bitAddressing.set(true); auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->getMemoryManager()->setForce32BitAllocations(true); pDevice->resetCommandStreamReceiver(commandStreamReceiver); commandStreamReceiver->setRequiredScratchSizes(4096, 0); // whatever > 0 (in page size) flushTask(*commandStreamReceiver); auto scratchAllocation = commandStreamReceiver->getScratchAllocation(); ASSERT_NE(scratchAllocation, nullptr); commandStreamReceiver->setRequiredScratchSizes(8196, 0); // whatever > first size flushTask(*commandStreamReceiver); // 2nd flush auto newScratchAllocation = commandStreamReceiver->getScratchAllocation(); EXPECT_NE(scratchAllocation, newScratchAllocation); // Allocation changed CommandStreamReceiver *csrPtr = reinterpret_cast(commandStreamReceiver); std::unique_ptr allocationTemporary = commandStreamReceiver->getTemporaryAllocations().detachAllocation(0, nullptr, csrPtr, AllocationType::SCRATCH_SURFACE); EXPECT_EQ(scratchAllocation, allocationTemporary.get()); pDevice->getMemoryManager()->freeGraphicsMemory(allocationTemporary.release()); } } HWTEST_F(UltCommandStreamReceiverTest, WhenFlushingAllCachesThenPipeControlIsAdded) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; DebugManagerStateRestore dbgRestorer; DebugManager.flags.FlushAllCaches.set(true); char buff[sizeof(PIPE_CONTROL) * 3]; LinearStream stream(buff, sizeof(PIPE_CONTROL) * 3); PipeControlArgs args; MemorySynchronizationCommands::addPipeControl(stream, args); parseCommands(stream, 0); PIPE_CONTROL *pipeControl = getCommand(); ASSERT_NE(nullptr, pipeControl); // WA pipeControl added if (cmdList.size() == 2) { pipeControl++; } EXPECT_TRUE(pipeControl->getDcFlushEnable()); EXPECT_TRUE(pipeControl->getRenderTargetCacheFlushEnable()); EXPECT_TRUE(pipeControl->getInstructionCacheInvalidateEnable()); EXPECT_TRUE(pipeControl->getTextureCacheInvalidationEnable()); EXPECT_TRUE(pipeControl->getPipeControlFlushEnable()); EXPECT_TRUE(pipeControl->getVfCacheInvalidationEnable()); EXPECT_TRUE(pipeControl->getConstantCacheInvalidationEnable()); EXPECT_TRUE(pipeControl->getStateCacheInvalidationEnable()); EXPECT_TRUE(pipeControl->getTlbInvalidate()); } HWTEST_F(UltCommandStreamReceiverTest, givenDebugDisablingCacheFlushWhenAddingPipeControlWithCacheFlushThenOverrideRequestAndDisableCacheFlushFlags) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; DebugManagerStateRestore dbgRestorer; DebugManager.flags.DoNotFlushCaches.set(true); char buff[sizeof(PIPE_CONTROL) * 3]; LinearStream stream(buff, sizeof(PIPE_CONTROL) * 3); PipeControlArgs args; args.dcFlushEnable = true; args.constantCacheInvalidationEnable = true; args.instructionCacheInvalidateEnable = true; args.pipeControlFlushEnable = true; args.renderTargetCacheFlushEnable = true; args.stateCacheInvalidationEnable = true; args.textureCacheInvalidationEnable = true; args.vfCacheInvalidationEnable = true; MemorySynchronizationCommands::addPipeControl(stream, args); parseCommands(stream, 0); PIPE_CONTROL *pipeControl = getCommand(); ASSERT_NE(nullptr, pipeControl); // WA pipeControl added if (cmdList.size() == 2) { pipeControl++; } EXPECT_FALSE(pipeControl->getDcFlushEnable()); EXPECT_FALSE(pipeControl->getRenderTargetCacheFlushEnable()); EXPECT_FALSE(pipeControl->getInstructionCacheInvalidateEnable()); EXPECT_FALSE(pipeControl->getTextureCacheInvalidationEnable()); EXPECT_FALSE(pipeControl->getPipeControlFlushEnable()); EXPECT_FALSE(pipeControl->getVfCacheInvalidationEnable()); EXPECT_FALSE(pipeControl->getConstantCacheInvalidationEnable()); EXPECT_FALSE(pipeControl->getStateCacheInvalidationEnable()); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenEnabledPreemptionWhenFlushTaskCalledThenDontProgramMediaVfeStateAgain) { pDevice->setPreemptionMode(PreemptionMode::ThreadGroup); auto &csr = pDevice->getUltCommandStreamReceiver(); HardwareParse hwParser; flushTask(csr, false, 0); hwParser.parseCommands(csr.commandStream, 0); auto cmd = hwParser.getCommand(); EXPECT_NE(nullptr, cmd); // program again csr.setMediaVFEStateDirty(false); auto offset = csr.commandStream.getUsed(); flushTask(csr, false, commandStream.getUsed()); hwParser.cmdList.clear(); hwParser.parseCommands(csr.commandStream, offset); cmd = hwParser.getCommand(); EXPECT_EQ(nullptr, cmd); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, GivenPreambleSentAndL3ConfigChangedWhenFlushingTaskThenPipeControlIsAdded) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename FamilyType::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START; typedef typename FamilyType::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; typedef typename FamilyType::MEDIA_VFE_STATE MEDIA_VFE_STATE; CsrSizeRequestFlags csrSizeRequest = {}; commandStream.getSpace(sizeof(PIPE_CONTROL)); flushTaskFlags.useSLM = true; flushTaskFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); // Force a PIPE_CONTROL through a taskLevel transition taskLevel = commandStreamReceiver.peekTaskLevel() + 1; commandStreamReceiver.isPreambleSent = true; commandStreamReceiver.lastPreemptionMode = pDevice->getPreemptionMode(); commandStreamReceiver.lastMediaSamplerConfig = 0; commandStreamReceiver.streamProperties.stateComputeMode.isCoherencyRequired.value = 0; csrSizeRequest.l3ConfigChanged = true; commandStreamReceiver.overrideCsrSizeReqFlags(csrSizeRequest); auto &csrCS = commandStreamReceiver.getCS(); size_t sizeNeeded = commandStreamReceiver.getRequiredCmdStreamSize(flushTaskFlags, *pDevice); auto expectedUsed = csrCS.getUsed() + sizeNeeded; expectedUsed = alignUp(expectedUsed, MemoryConstants::cacheLineSize); commandStreamReceiver.streamProperties.stateComputeMode.setProperties(flushTaskFlags.requiresCoherency, flushTaskFlags.numGrfRequired, flushTaskFlags.threadArbitrationPolicy, *defaultHwInfo); commandStreamReceiver.flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, flushTaskFlags, *pDevice); // Verify that we didn't grab a new CS buffer EXPECT_EQ(expectedUsed, csrCS.getUsed()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrWhenPreambleSentThenRequiredCsrSizeDependsOnL3ConfigChanged) { UltCommandStreamReceiver &commandStreamReceiver = (UltCommandStreamReceiver &)pDevice->getGpgpuCommandStreamReceiver(); CsrSizeRequestFlags csrSizeRequest = {}; commandStreamReceiver.isPreambleSent = true; csrSizeRequest.l3ConfigChanged = true; commandStreamReceiver.overrideCsrSizeReqFlags(csrSizeRequest); auto l3ConfigChangedSize = commandStreamReceiver.getRequiredCmdStreamSize(flushTaskFlags, *pDevice); auto expectedDifference = commandStreamReceiver.getCmdSizeForL3Config(); csrSizeRequest.l3ConfigChanged = false; commandStreamReceiver.overrideCsrSizeReqFlags(csrSizeRequest); auto l3ConfigNotChangedSize = commandStreamReceiver.getRequiredCmdStreamSize(flushTaskFlags, *pDevice); auto difference = l3ConfigChangedSize - l3ConfigNotChangedSize; EXPECT_EQ(expectedDifference, difference); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrWhenPreambleNotSentThenRequiredCsrSizeDoesntDependOnL3ConfigChanged) { UltCommandStreamReceiver &commandStreamReceiver = (UltCommandStreamReceiver &)pDevice->getGpgpuCommandStreamReceiver(); CsrSizeRequestFlags csrSizeRequest = {}; commandStreamReceiver.isPreambleSent = false; csrSizeRequest.l3ConfigChanged = true; commandStreamReceiver.overrideCsrSizeReqFlags(csrSizeRequest); auto l3ConfigChangedSize = commandStreamReceiver.getRequiredCmdStreamSize(flushTaskFlags, *pDevice); csrSizeRequest.l3ConfigChanged = false; commandStreamReceiver.overrideCsrSizeReqFlags(csrSizeRequest); auto l3ConfigNotChangedSize = commandStreamReceiver.getRequiredCmdStreamSize(flushTaskFlags, *pDevice); EXPECT_EQ(l3ConfigNotChangedSize, l3ConfigChangedSize); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrWhenPreambleNotSentThenRequiredCsrSizeDoesntDependOnmediaSamplerConfigChanged) { UltCommandStreamReceiver &commandStreamReceiver = (UltCommandStreamReceiver &)pDevice->getGpgpuCommandStreamReceiver(); CsrSizeRequestFlags csrSizeRequest = {}; DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags(); commandStreamReceiver.isPreambleSent = false; csrSizeRequest.mediaSamplerConfigChanged = false; commandStreamReceiver.overrideCsrSizeReqFlags(csrSizeRequest); auto mediaSamplerConfigNotChangedSize = commandStreamReceiver.getRequiredCmdStreamSize(flags, *pDevice); csrSizeRequest.mediaSamplerConfigChanged = true; commandStreamReceiver.overrideCsrSizeReqFlags(csrSizeRequest); auto mediaSamplerConfigChangedSize = commandStreamReceiver.getRequiredCmdStreamSize(flags, *pDevice); EXPECT_EQ(mediaSamplerConfigChangedSize, mediaSamplerConfigNotChangedSize); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenSpecialPipelineSelectModeChangedWhenGetCmdSizeForPielineSelectIsCalledThenCorrectSizeIsReturned) { using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; UltCommandStreamReceiver &commandStreamReceiver = (UltCommandStreamReceiver &)pDevice->getGpgpuCommandStreamReceiver(); CsrSizeRequestFlags csrSizeRequest = {}; DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags(); csrSizeRequest.specialPipelineSelectModeChanged = true; commandStreamReceiver.overrideCsrSizeReqFlags(csrSizeRequest); size_t size = commandStreamReceiver.getCmdSizeForPipelineSelect(); size_t expectedSize = sizeof(PIPELINE_SELECT); if (MemorySynchronizationCommands::isPipeControlPriorToPipelineSelectWArequired(pDevice->getHardwareInfo())) { expectedSize += sizeof(PIPE_CONTROL); } EXPECT_EQ(expectedSize, size); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrWhenPreambleSentThenRequiredCsrSizeDependsOnmediaSamplerConfigChanged) { using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; UltCommandStreamReceiver &commandStreamReceiver = (UltCommandStreamReceiver &)pDevice->getGpgpuCommandStreamReceiver(); CsrSizeRequestFlags csrSizeRequest = {}; DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags(); commandStreamReceiver.isPreambleSent = true; csrSizeRequest.mediaSamplerConfigChanged = false; commandStreamReceiver.overrideCsrSizeReqFlags(csrSizeRequest); auto mediaSamplerConfigNotChangedSize = commandStreamReceiver.getRequiredCmdStreamSize(flags, *pDevice); csrSizeRequest.mediaSamplerConfigChanged = true; commandStreamReceiver.overrideCsrSizeReqFlags(csrSizeRequest); auto mediaSamplerConfigChangedSize = commandStreamReceiver.getRequiredCmdStreamSize(flags, *pDevice); EXPECT_NE(mediaSamplerConfigChangedSize, mediaSamplerConfigNotChangedSize); auto difference = mediaSamplerConfigChangedSize - mediaSamplerConfigNotChangedSize; size_t expectedDifference = sizeof(PIPELINE_SELECT); if (MemorySynchronizationCommands::isPipeControlPriorToPipelineSelectWArequired(pDevice->getHardwareInfo())) { expectedDifference += sizeof(PIPE_CONTROL); } EXPECT_EQ(expectedDifference, difference); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrWhenSamplerCacheFlushSentThenRequiredCsrSizeContainsPipecontrolSize) { typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT; UltCommandStreamReceiver &commandStreamReceiver = (UltCommandStreamReceiver &)pDevice->getGpgpuCommandStreamReceiver(); CsrSizeRequestFlags csrSizeRequest = {}; DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags(); commandStreamReceiver.isPreambleSent = true; commandStreamReceiver.overrideCsrSizeReqFlags(csrSizeRequest); commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushNotRequired); auto samplerCacheNotFlushedSize = commandStreamReceiver.getRequiredCmdStreamSize(flags, *pDevice); commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore); auto samplerCacheFlushBeforeSize = commandStreamReceiver.getRequiredCmdStreamSize(flags, *pDevice); EXPECT_EQ(samplerCacheNotFlushedSize, samplerCacheFlushBeforeSize); NEO::WorkaroundTable *waTable = &pDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->workaroundTable; waTable->flags.waSamplerCacheFlushBetweenRedescribedSurfaceReads = true; samplerCacheFlushBeforeSize = commandStreamReceiver.getRequiredCmdStreamSize(flags, *pDevice); auto difference = samplerCacheFlushBeforeSize - samplerCacheNotFlushedSize; EXPECT_EQ(sizeof(typename FamilyType::PIPE_CONTROL), difference); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenCsrInNonDirtyStateWhenflushTaskIsCalledThenNoFlushIsCalled) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); configureCSRtoNonDirtyState(false); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); EXPECT_EQ(0, mockCsr->flushCalledCount); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenCsrInNonDirtyStateAndBatchingModeWhenflushTaskIsCalledWithDisabledPreemptionThenSubmissionIsNotRecorded) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); configureCSRtoNonDirtyState(false); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); EXPECT_EQ(0, mockCsr->flushCalledCount); EXPECT_TRUE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); //surfaces are non resident auto &surfacesForResidency = mockCsr->getResidencyAllocations(); EXPECT_EQ(0u, surfacesForResidency.size()); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenCsrWhenGeneralStateBaseAddressIsProgrammedThenDecanonizedAddressIsWritten) { uint64_t generalStateBaseAddress = 0xffff800400010000ull; DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); typename FamilyType::STATE_BASE_ADDRESS sbaCmd; StateBaseAddressHelper::programStateBaseAddress(&sbaCmd, &dsh, &ioh, &ssh, generalStateBaseAddress, true, 0, 0, generalStateBaseAddress, 0, true, false, pDevice->getGmmHelper(), false, MemoryCompressionState::NotApplicable, false, 1u); EXPECT_NE(generalStateBaseAddress, sbaCmd.getGeneralStateBaseAddress()); EXPECT_EQ(GmmHelper::decanonize(generalStateBaseAddress), sbaCmd.getGeneralStateBaseAddress()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenNonZeroGeneralStateBaseAddressWhenProgrammingIsDisabledThenExpectCommandValueZero) { uint64_t generalStateBaseAddress = 0x80010000ull; typename FamilyType::STATE_BASE_ADDRESS sbaCmd; StateBaseAddressHelper::programStateBaseAddress(&sbaCmd, &dsh, &ioh, &ssh, generalStateBaseAddress, false, 0, 0, generalStateBaseAddress, 0, true, false, pDevice->getGmmHelper(), false, MemoryCompressionState::NotApplicable, false, 1u); EXPECT_EQ(0ull, sbaCmd.getGeneralStateBaseAddress()); EXPECT_EQ(0u, sbaCmd.getGeneralStateBufferSize()); EXPECT_FALSE(sbaCmd.getGeneralStateBaseAddressModifyEnable()); EXPECT_FALSE(sbaCmd.getGeneralStateBufferSizeModifyEnable()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenNonZeroInternalHeapBaseAddressWhenProgrammingIsDisabledThenExpectCommandValueZero) { uint64_t internalHeapBaseAddress = 0x80010000ull; typename FamilyType::STATE_BASE_ADDRESS sbaCmd; StateBaseAddressHelper::programStateBaseAddress(&sbaCmd, &dsh, &ioh, &ssh, internalHeapBaseAddress, true, 0, internalHeapBaseAddress, 0, 0, false, false, pDevice->getGmmHelper(), false, MemoryCompressionState::NotApplicable, false, 1u); EXPECT_FALSE(sbaCmd.getInstructionBaseAddressModifyEnable()); EXPECT_EQ(0ull, sbaCmd.getInstructionBaseAddress()); EXPECT_FALSE(sbaCmd.getInstructionBufferSizeModifyEnable()); EXPECT_EQ(0u, sbaCmd.getInstructionBufferSize()); EXPECT_EQ(0u, sbaCmd.getInstructionMemoryObjectControlState()); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenSbaProgrammingWhenHeapsAreNotProvidedThenDontProgram) { DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); uint64_t internalHeapBase = 0x10000; uint64_t instructionHeapBase = 0x10000; uint64_t generalStateBase = 0x30000; typename FamilyType::STATE_BASE_ADDRESS sbaCmd; StateBaseAddressHelper::programStateBaseAddress(&sbaCmd, nullptr, nullptr, nullptr, generalStateBase, true, 0, internalHeapBase, instructionHeapBase, 0, true, false, pDevice->getGmmHelper(), false, MemoryCompressionState::NotApplicable, false, 1u); EXPECT_FALSE(sbaCmd.getDynamicStateBaseAddressModifyEnable()); EXPECT_FALSE(sbaCmd.getDynamicStateBufferSizeModifyEnable()); EXPECT_EQ(0u, sbaCmd.getDynamicStateBaseAddress()); EXPECT_EQ(0u, sbaCmd.getDynamicStateBufferSize()); EXPECT_FALSE(sbaCmd.getIndirectObjectBaseAddressModifyEnable()); EXPECT_FALSE(sbaCmd.getIndirectObjectBufferSizeModifyEnable()); EXPECT_EQ(0u, sbaCmd.getIndirectObjectBaseAddress()); EXPECT_EQ(0u, sbaCmd.getIndirectObjectBufferSize()); EXPECT_FALSE(sbaCmd.getSurfaceStateBaseAddressModifyEnable()); EXPECT_EQ(0u, sbaCmd.getSurfaceStateBaseAddress()); EXPECT_TRUE(sbaCmd.getInstructionBaseAddressModifyEnable()); EXPECT_EQ(instructionHeapBase, sbaCmd.getInstructionBaseAddress()); EXPECT_TRUE(sbaCmd.getInstructionBufferSizeModifyEnable()); EXPECT_EQ(MemoryConstants::sizeOf4GBinPageEntities, sbaCmd.getInstructionBufferSize()); EXPECT_TRUE(sbaCmd.getGeneralStateBaseAddressModifyEnable()); EXPECT_TRUE(sbaCmd.getGeneralStateBufferSizeModifyEnable()); EXPECT_EQ(GmmHelper::decanonize(generalStateBase), sbaCmd.getGeneralStateBaseAddress()); EXPECT_EQ(0xfffffu, sbaCmd.getGeneralStateBufferSize()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCommandStreamReceiverWhenFlushTaskIsCalledThenInitializePageTableManagerRegister) { auto csr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); auto csr2 = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(csr); MockGmmPageTableMngr *pageTableManager = new MockGmmPageTableMngr(); csr->pageTableManager.reset(pageTableManager); MockGmmPageTableMngr *pageTableManager2 = new MockGmmPageTableMngr(); csr2->pageTableManager.reset(pageTableManager2); auto memoryManager = pDevice->getMemoryManager(); auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); IndirectHeap cs(graphicsAllocation); EXPECT_FALSE(csr->pageTableManagerInitialized); EXPECT_FALSE(csr2->pageTableManagerInitialized); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); csr->flushTask(cs, 0u, &cs, &cs, &cs, 0u, dispatchFlags, *pDevice); EXPECT_TRUE(csr->pageTableManagerInitialized); EXPECT_FALSE(csr2->pageTableManagerInitialized); csr->flushTask(cs, 0u, &cs, &cs, &cs, 0u, dispatchFlags, *pDevice); EXPECT_EQ(1u, pageTableManager->initContextAuxTableRegisterCalled); EXPECT_EQ(1u, pageTableManager->initContextAuxTableRegisterParamsPassed.size()); EXPECT_EQ(csr, pageTableManager->initContextAuxTableRegisterParamsPassed[0].initialBBHandle); pDevice->resetCommandStreamReceiver(csr2); csr2->flushTask(cs, 0u, &cs, &cs, &cs, 0u, dispatchFlags, *pDevice); EXPECT_TRUE(csr2->pageTableManagerInitialized); memoryManager->freeGraphicsMemory(graphicsAllocation); EXPECT_EQ(1u, pageTableManager2->initContextAuxTableRegisterCalled); EXPECT_EQ(csr2, pageTableManager2->initContextAuxTableRegisterParamsPassed[0].initialBBHandle); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenPageTableManagerPointerWhenCallBlitBufferThenPageTableManagerInitializedForProperCsr) { auto bcsCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); auto bcsCsr2 = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(bcsCsr); MockGmmPageTableMngr *pageTableManager = new MockGmmPageTableMngr(); bcsCsr->pageTableManager.reset(pageTableManager); MockGmmPageTableMngr *pageTableManager2 = new MockGmmPageTableMngr(); bcsCsr2->pageTableManager.reset(pageTableManager2); auto memoryManager = pDevice->getMemoryManager(); auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); EXPECT_FALSE(bcsCsr->pageTableManagerInitialized); EXPECT_FALSE(bcsCsr2->pageTableManagerInitialized); auto blitProperties = BlitProperties::constructPropertiesForCopy(graphicsAllocation, //dstAllocation graphicsAllocation, //srcAllocation 0, //dstOffset 0, //srcOffset 0, //copySize 0, //srcRowPitch 0, //srcSlicePitch 0, //dstRowPitch 0, //dstSlicePitch bcsCsr->getClearColorAllocation() //clearColorAllocation ); BlitPropertiesContainer container; container.push_back(blitProperties); bcsCsr->flushBcsTask(container, true, false, *pDevice); EXPECT_TRUE(bcsCsr->pageTableManagerInitialized); EXPECT_FALSE(bcsCsr2->pageTableManagerInitialized); EXPECT_EQ(1u, pageTableManager->initContextAuxTableRegisterCalled); EXPECT_EQ(bcsCsr, pageTableManager->initContextAuxTableRegisterParamsPassed[0].initialBBHandle); pDevice->resetCommandStreamReceiver(bcsCsr2); bcsCsr2->flushBcsTask(container, true, false, *pDevice); EXPECT_TRUE(bcsCsr2->pageTableManagerInitialized); memoryManager->freeGraphicsMemory(graphicsAllocation); EXPECT_EQ(1u, pageTableManager2->initContextAuxTableRegisterCalled); EXPECT_EQ(bcsCsr2, pageTableManager2->initContextAuxTableRegisterParamsPassed[0].initialBBHandle); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenPageTableManagerPointerWhenCallBlitBufferAndPageTableManagerInitializedThenNotInitializeAgain) { auto bcsCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(bcsCsr); MockGmmPageTableMngr *pageTableManager = new MockGmmPageTableMngr(); bcsCsr->pageTableManager.reset(pageTableManager); auto memoryManager = pDevice->getMemoryManager(); auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); EXPECT_FALSE(bcsCsr->pageTableManagerInitialized); auto blitProperties = BlitProperties::constructPropertiesForCopy(graphicsAllocation, //dstAllocation graphicsAllocation, //srcAllocation 0, //dstOffset 0, //srcOffset 0, //copySize 0, //srcRowPitch 0, //srcSlicePitch 0, //dstRowPitch 0, //dstSlicePitch bcsCsr->getClearColorAllocation() //clearColorAllocation ); BlitPropertiesContainer container; container.push_back(blitProperties); bcsCsr->flushBcsTask(container, true, false, *pDevice); EXPECT_TRUE(bcsCsr->pageTableManagerInitialized); bcsCsr->flushBcsTask(container, true, false, *pDevice); memoryManager->freeGraphicsMemory(graphicsAllocation); EXPECT_EQ(1u, pageTableManager->initContextAuxTableRegisterCalled); EXPECT_EQ(1u, pageTableManager->initContextAuxTableRegisterParamsPassed.size()); EXPECT_EQ(bcsCsr, pageTableManager->initContextAuxTableRegisterParamsPassed[0].initialBBHandle); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenNullPageTableManagerWhenCallBlitBufferThenPageTableManagerIsNotInitialized) { auto bcsCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); auto bcsCsr2 = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(bcsCsr); bcsCsr->pageTableManager.reset(nullptr); bcsCsr2->pageTableManager.reset(nullptr); auto memoryManager = pDevice->getMemoryManager(); auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); EXPECT_FALSE(bcsCsr->pageTableManagerInitialized); EXPECT_FALSE(bcsCsr2->pageTableManagerInitialized); auto blitProperties = BlitProperties::constructPropertiesForCopy(graphicsAllocation, //dstAllocation graphicsAllocation, //srcAllocation 0, //dstOffset 0, //srcOffset 0, //copySize 0, //srcRowPitch 0, //srcSlicePitch 0, //dstRowPitch 0, //dstSlicePitch bcsCsr->getClearColorAllocation() //clearColorAllocation ); BlitPropertiesContainer container; container.push_back(blitProperties); bcsCsr->flushBcsTask(container, true, false, *pDevice); EXPECT_FALSE(bcsCsr->pageTableManagerInitialized); EXPECT_FALSE(bcsCsr2->pageTableManagerInitialized); pDevice->resetCommandStreamReceiver(bcsCsr2); bcsCsr2->flushBcsTask(container, true, false, *pDevice); EXPECT_FALSE(bcsCsr2->pageTableManagerInitialized); bcsCsr2->pageTableManagerInitialized = true; EXPECT_NO_THROW(bcsCsr2->flushBcsTask(container, true, false, *pDevice)); memoryManager->freeGraphicsMemory(graphicsAllocation); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCommandStreamReceiverWhenInitializingPageTableManagerRegisterFailsThenPageTableManagerIsNotInitialized) { auto csr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(csr); MockGmmPageTableMngr *pageTableManager = new MockGmmPageTableMngr(); csr->pageTableManager.reset(pageTableManager); pageTableManager->initContextAuxTableRegisterResult = GMM_ERROR; auto memoryManager = pDevice->getMemoryManager(); auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); IndirectHeap cs(graphicsAllocation); EXPECT_FALSE(csr->pageTableManagerInitialized); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); csr->flushTask(cs, 0u, &cs, &cs, &cs, 0u, dispatchFlags, *pDevice); EXPECT_FALSE(csr->pageTableManagerInitialized); csr->flushTask(cs, 0u, &cs, &cs, &cs, 0u, dispatchFlags, *pDevice); EXPECT_FALSE(csr->pageTableManagerInitialized); memoryManager->freeGraphicsMemory(graphicsAllocation); EXPECT_EQ(2u, pageTableManager->initContextAuxTableRegisterCalled); EXPECT_EQ(csr, pageTableManager->initContextAuxTableRegisterParamsPassed[0].initialBBHandle); EXPECT_EQ(csr, pageTableManager->initContextAuxTableRegisterParamsPassed[1].initialBBHandle); } HWTEST_F(CommandStreamReceiverFlushTaskTests, WhenCsrIsMarkedWithNewResourceThenCallBatchedSubmission) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.dispatchMode = DispatchMode::BatchedDispatch; commandStreamReceiver.newResources = true; flushTask(commandStreamReceiver); EXPECT_TRUE(commandStreamReceiver.flushBatchedSubmissionsCalled); } HWTEST_F(CommandStreamReceiverFlushTaskTests, whenSubmissionChangesFromSingleSubdeviceThenCallBatchedSubmission) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.dispatchMode = DispatchMode::BatchedDispatch; commandStreamReceiver.wasSubmittedToSingleSubdevice = true; flushTask(commandStreamReceiver); EXPECT_TRUE(commandStreamReceiver.flushBatchedSubmissionsCalled); } HWTEST_F(CommandStreamReceiverFlushTaskTests, whenSubmissionChangesToSingleSubdeviceThenCallBatchedSubmission) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.dispatchMode = DispatchMode::BatchedDispatch; flushTaskFlags.useSingleSubdevice = true; flushTask(commandStreamReceiver); EXPECT_TRUE(commandStreamReceiver.flushBatchedSubmissionsCalled); } HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenGpuIsIdleWhenCsrIsEnabledToFlushOnGpuIdleThenCallBatchedSubmission) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.dispatchMode = DispatchMode::BatchedDispatch; commandStreamReceiver.useGpuIdleImplicitFlush = true; commandStreamReceiver.taskCount = 1u; *commandStreamReceiver.getTagAddress() = 1u; flushTask(commandStreamReceiver); EXPECT_TRUE(commandStreamReceiver.flushBatchedSubmissionsCalled); *commandStreamReceiver.getTagAddress() = 2u; } using SingleRootDeviceCommandStreamReceiverTests = CommandStreamReceiverFlushTaskTests; HWTEST_F(SingleRootDeviceCommandStreamReceiverTests, givenMultipleEventInSingleRootDeviceEnvironmentWhenTheyArePassedToEnqueueWithoutSubmissionThenSemaphoreWaitCommandIsNotProgrammed) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto deviceFactory = std::make_unique(1, 0); auto device0 = deviceFactory->rootDevices[0]; auto mockCsr0 = new MockCommandStreamReceiver(*device0->executionEnvironment, device0->getRootDeviceIndex(), device0->getDeviceBitfield()); device0->resetCommandStreamReceiver(mockCsr0); cl_device_id devices[] = {device0}; auto context = std::make_unique(ClDeviceVector(devices, 1), false); auto pCmdQ0 = context.get()->getSpecialQueue(0u); Event event1(pCmdQ0, CL_COMMAND_NDRANGE_KERNEL, 5, 15); Event event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, 6, 16); Event event3(pCmdQ0, CL_COMMAND_NDRANGE_KERNEL, 4, 20); UserEvent userEvent1(&pCmdQ0->getContext()); userEvent1.setStatus(CL_COMPLETE); cl_event eventWaitList[] = { &event1, &event2, &event3, &userEvent1, }; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); { pCmdQ0->enqueueMarkerWithWaitList( numEventsInWaitList, eventWaitList, nullptr); HardwareParse csHwParser; csHwParser.parseCommands(pCmdQ0->getCS(0)); auto semaphores = findAll(csHwParser.cmdList.begin(), csHwParser.cmdList.end()); EXPECT_EQ(0u, semaphores.size()); } } command_stream_receiver_flush_task_3_tests.cpp000066400000000000000000002553411422164147700360230ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/command_stream/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/wait_status.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/dispatch_flags_helper.h" #include "shared/test/common/helpers/engine_descriptor_helper.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_os_context.h" #include "shared/test/common/mocks/mock_submissions_aggregator.h" #include "shared/test/common/mocks/mock_svm_manager.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/mocks/mock_program.h" using namespace NEO; using CommandStreamReceiverFlushTaskTests = UltCommandStreamReceiverTest; HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenFlushTaskIsCalledThenNothingIsSubmittedToTheHwAndSubmissionIsRecorded) { typedef typename FamilyType::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END; CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); auto &cmdBufferList = mockedSubmissionsAggregator->peekCommandBuffers(); EXPECT_FALSE(cmdBufferList.peekIsEmpty()); EXPECT_EQ(cmdBufferList.peekHead(), cmdBufferList.peekTail()); auto cmdBuffer = cmdBufferList.peekHead(); //two more because of preemption allocation and sipKernel in Mid Thread preemption mode size_t csrSurfaceCount = (pDevice->getPreemptionMode() == PreemptionMode::MidThread) ? 2 : 0; csrSurfaceCount -= pDevice->getHardwareInfo().capabilityTable.supportsImages ? 0 : 1; csrSurfaceCount += mockCsr->globalFenceAllocation ? 1 : 0; csrSurfaceCount += mockCsr->clearColorAllocation ? 1 : 0; //we should have 3 heaps, tag allocation and csr command stream + cq EXPECT_EQ(5u + csrSurfaceCount, cmdBuffer->surfaces.size()); EXPECT_EQ(0, mockCsr->flushCalledCount); //we should be submitting via csr EXPECT_EQ(cmdBuffer->batchBuffer.commandBufferAllocation, mockCsr->commandStream.getGraphicsAllocation()); EXPECT_EQ(cmdBuffer->batchBuffer.startOffset, 0u); EXPECT_FALSE(cmdBuffer->batchBuffer.requiresCoherency); EXPECT_FALSE(cmdBuffer->batchBuffer.low_priority); //find BB END parseCommands(commandStream, 0); auto itBBend = find(cmdList.begin(), cmdList.end()); void *bbEndAddress = *itBBend; EXPECT_EQ(bbEndAddress, cmdBuffer->batchBufferEndLocation); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeAndTwoRecordedCommandBuffersWhenFlushTaskIsCalledThenBatchBuffersAreCombined) { typedef typename FamilyType::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END; typedef typename FamilyType::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START; CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.guardCommandBufferWithPipeControl = true; mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); auto primaryBatch = mockedSubmissionsAggregator->peekCommandBuffers().peekHead(); auto secondBatchBuffer = primaryBatch->next; auto bbEndLocation = primaryBatch->batchBufferEndLocation; auto secondBatchBufferAddress = (uint64_t)ptrOffset(secondBatchBuffer->batchBuffer.commandBufferAllocation->getGpuAddress(), secondBatchBuffer->batchBuffer.startOffset); auto lastbbEndPtr = secondBatchBuffer->batchBuffer.endCmdPtr; mockCsr->flushBatchedSubmissions(); auto batchBufferStart = genCmdCast(bbEndLocation); ASSERT_NE(nullptr, batchBufferStart); EXPECT_EQ(secondBatchBufferAddress, batchBufferStart->getBatchBufferStartAddress()); EXPECT_EQ(mockCsr->recordedCommandBuffer->batchBuffer.endCmdPtr, lastbbEndPtr); } HWTEST_F(CommandStreamReceiverFlushTaskTests, whenFlushSmallTaskThenCommandStreamAlignedToCacheLine) { using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; auto &csr = pDevice->getUltCommandStreamReceiver(); auto &stream = csr.getCS(2 * MemoryConstants::cacheLineSize); stream.getSpace(MemoryConstants::cacheLineSize - sizeof(MI_BATCH_BUFFER_END) - 2); csr.flushSmallTask(stream, stream.getUsed()); auto used = csr.commandStream.getUsed(); auto expected = 2 * MemoryConstants::cacheLineSize; EXPECT_EQ(used, expected); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeAndThreeRecordedCommandBuffersWhenFlushTaskIsCalledThenBatchBuffersAreCombined) { typedef typename FamilyType::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END; typedef typename FamilyType::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START; CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); auto primaryBatch = mockedSubmissionsAggregator->peekCommandBuffers().peekHead(); auto lastBatchBuffer = primaryBatch->next->next; auto bbEndLocation = primaryBatch->next->batchBufferEndLocation; auto lastBatchBufferAddress = (uint64_t)ptrOffset(lastBatchBuffer->batchBuffer.commandBufferAllocation->getGpuAddress(), lastBatchBuffer->batchBuffer.startOffset); auto lastbbEndPtr = lastBatchBuffer->batchBuffer.endCmdPtr; mockCsr->flushBatchedSubmissions(); auto batchBufferStart = genCmdCast(bbEndLocation); ASSERT_NE(nullptr, batchBufferStart); EXPECT_EQ(lastBatchBufferAddress, batchBufferStart->getBatchBufferStartAddress()); EXPECT_EQ(1, mockCsr->flushCalledCount); EXPECT_EQ(mockCsr->recordedCommandBuffer->batchBuffer.endCmdPtr, lastbbEndPtr); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeAndThreeRecordedCommandBuffersThatUsesAllResourceWhenFlushTaskIsCalledThenBatchBuffersAreNotCombined) { typedef typename FamilyType::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END; typedef typename FamilyType::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START; CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); auto memorySize = (size_t)pDevice->getDeviceInfo().globalMemSize; MockGraphicsAllocation largeAllocation(nullptr, memorySize); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); mockCsr->makeResident(largeAllocation); mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); mockCsr->makeResident(largeAllocation); mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); auto primaryBatch = mockedSubmissionsAggregator->peekCommandBuffers().peekHead(); auto bbEndLocation = primaryBatch->next->batchBufferEndLocation; mockCsr->flushBatchedSubmissions(); auto batchBufferStart = genCmdCast(bbEndLocation); ASSERT_EQ(nullptr, batchBufferStart); auto bbEnd = genCmdCast(bbEndLocation); EXPECT_NE(nullptr, bbEnd); EXPECT_EQ(3, mockCsr->flushCalledCount); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenFlushTaskIsCalledTwiceThenNothingIsSubmittedToTheHwAndTwoSubmissionAreRecorded) { typedef typename FamilyType::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END; CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto initialBase = commandStream.getCpuBase(); auto initialUsed = commandStream.getUsed(); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); //ensure command stream still used EXPECT_EQ(initialBase, commandStream.getCpuBase()); auto baseAfterFirstFlushTask = commandStream.getCpuBase(); auto usedAfterFirstFlushTask = commandStream.getUsed(); dispatchFlags.requiresCoherency = true; dispatchFlags.lowPriority = true; mockCsr->flushTask(commandStream, commandStream.getUsed(), &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); auto baseAfterSecondFlushTask = commandStream.getCpuBase(); auto usedAfterSecondFlushTask = commandStream.getUsed(); EXPECT_EQ(initialBase, commandStream.getCpuBase()); EXPECT_EQ(baseAfterSecondFlushTask, baseAfterFirstFlushTask); EXPECT_EQ(baseAfterFirstFlushTask, initialBase); EXPECT_GT(usedAfterFirstFlushTask, initialUsed); EXPECT_GT(usedAfterSecondFlushTask, usedAfterFirstFlushTask); auto &cmdBufferList = mockedSubmissionsAggregator->peekCommandBuffers(); EXPECT_FALSE(cmdBufferList.peekIsEmpty()); EXPECT_NE(cmdBufferList.peekHead(), cmdBufferList.peekTail()); EXPECT_NE(nullptr, cmdBufferList.peekTail()); EXPECT_NE(nullptr, cmdBufferList.peekHead()); auto cmdBuffer1 = cmdBufferList.peekHead(); auto cmdBuffer2 = cmdBufferList.peekTail(); EXPECT_GT(cmdBuffer2->batchBufferEndLocation, cmdBuffer1->batchBufferEndLocation); EXPECT_FALSE(cmdBuffer1->batchBuffer.requiresCoherency); EXPECT_TRUE(cmdBuffer2->batchBuffer.requiresCoherency); EXPECT_FALSE(cmdBuffer1->batchBuffer.low_priority); EXPECT_TRUE(cmdBuffer2->batchBuffer.low_priority); EXPECT_GT(cmdBuffer2->batchBuffer.startOffset, cmdBuffer1->batchBuffer.startOffset); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenRecordedBatchBufferIsBeingSubmittedThenFlushIsCalledWithRecordedCommandBuffer) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); configureCSRtoNonDirtyState(false); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; dispatchFlags.requiresCoherency = false; mockCsr->streamProperties.stateComputeMode.isCoherencyRequired.value = 0; commandStream.getSpace(4); mockCsr->flushTask(commandStream, 4, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); EXPECT_EQ(0, mockCsr->flushCalledCount); auto &surfacesForResidency = mockCsr->getResidencyAllocations(); EXPECT_EQ(0u, surfacesForResidency.size()); auto &cmdBufferList = mockedSubmissionsAggregator->peekCommandBuffers(); EXPECT_FALSE(cmdBufferList.peekIsEmpty()); auto cmdBuffer = cmdBufferList.peekHead(); //preemption allocation + sip kernel size_t csrSurfaceCount = (pDevice->getPreemptionMode() == PreemptionMode::MidThread) ? 2 : 0; csrSurfaceCount += mockCsr->globalFenceAllocation ? 1 : 0; EXPECT_EQ(4u + csrSurfaceCount, cmdBuffer->surfaces.size()); //copy those surfaces std::vector residentSurfaces = cmdBuffer->surfaces; for (auto &graphicsAllocation : residentSurfaces) { EXPECT_TRUE(graphicsAllocation->isResident(mockCsr->getOsContext().getContextId())); EXPECT_EQ(1u, graphicsAllocation->getResidencyTaskCount(mockCsr->getOsContext().getContextId())); } mockCsr->flushBatchedSubmissions(); EXPECT_FALSE(mockCsr->recordedCommandBuffer->batchBuffer.low_priority); EXPECT_FALSE(mockCsr->recordedCommandBuffer->batchBuffer.requiresCoherency); EXPECT_EQ(mockCsr->recordedCommandBuffer->batchBuffer.commandBufferAllocation, commandStream.getGraphicsAllocation()); EXPECT_EQ(4u, mockCsr->recordedCommandBuffer->batchBuffer.startOffset); EXPECT_EQ(1, mockCsr->flushCalledCount); EXPECT_TRUE(mockedSubmissionsAggregator->peekCommandBuffers().peekIsEmpty()); EXPECT_EQ(0u, surfacesForResidency.size()); for (auto &graphicsAllocation : residentSurfaces) { EXPECT_FALSE(graphicsAllocation->isResident(mockCsr->getOsContext().getContextId())); } } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrCreatedWithDedicatedDebugFlagWhenItIsCreatedThenItHasProperDispatchMode) { DebugManagerStateRestore stateRestore; DebugManager.flags.CsrDispatchMode.set(static_cast(DispatchMode::AdaptiveDispatch)); std::unique_ptr> mockCsr(new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield())); EXPECT_EQ(DispatchMode::AdaptiveDispatch, mockCsr->dispatchMode); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenBlockingCommandIsSendThenItIsFlushedAndNotBatched) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); configureCSRtoNonDirtyState(false); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.blocking = true; dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); EXPECT_EQ(1, mockCsr->flushCalledCount); EXPECT_TRUE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenBufferToFlushWhenFlushTaskCalledThenUpdateFlushStamp) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); commandStream.getSpace(1); EXPECT_EQ(0, mockCsr->flushCalledCount); auto previousFlushStamp = mockCsr->flushStamp->peekStamp(); auto cmplStamp = flushTask(*mockCsr); EXPECT_GT(mockCsr->flushStamp->peekStamp(), previousFlushStamp); EXPECT_EQ(mockCsr->flushStamp->peekStamp(), cmplStamp.flushStamp); EXPECT_EQ(1, mockCsr->flushCalledCount); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenNothingToFlushWhenFlushTaskCalledThenDontFlushStamp) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); configureCSRtoNonDirtyState(false); EXPECT_EQ(0, mockCsr->flushCalledCount); auto previousFlushStamp = mockCsr->flushStamp->peekStamp(); auto cmplStamp = flushTask(*mockCsr); EXPECT_EQ(mockCsr->flushStamp->peekStamp(), previousFlushStamp); EXPECT_EQ(previousFlushStamp, cmplStamp.flushStamp); EXPECT_EQ(0, mockCsr->flushCalledCount); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenFlushTaskIsCalledThenFlushedTaskCountIsNotModifed) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); EXPECT_EQ(1u, mockCsr->peekLatestSentTaskCount()); EXPECT_EQ(0u, mockCsr->peekLatestFlushedTaskCount()); mockCsr->flushBatchedSubmissions(); EXPECT_EQ(1u, mockCsr->peekLatestSentTaskCount()); EXPECT_EQ(1u, mockCsr->peekLatestFlushedTaskCount()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenUpdateTaskCountFromWaitWhenFlushBatchedIsCalledThenFlushedTaskCountIsNotModifed) { DebugManagerStateRestore restorer; DebugManager.flags.UpdateTaskCountFromWait.set(3); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); EXPECT_EQ(1u, mockCsr->peekLatestSentTaskCount()); EXPECT_EQ(0u, mockCsr->peekLatestFlushedTaskCount()); mockCsr->flushBatchedSubmissions(); EXPECT_EQ(1u, mockCsr->peekLatestSentTaskCount()); EXPECT_EQ(0u, mockCsr->peekLatestFlushedTaskCount()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInDefaultModeWhenFlushTaskIsCalledThenFlushedTaskCountIsModifed) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; auto &csr = commandQueue.getGpgpuCommandStreamReceiver(); csr.flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); EXPECT_EQ(1u, csr.peekLatestSentTaskCount()); EXPECT_EQ(1u, csr.peekLatestFlushedTaskCount()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenFlushTaskIsCalledGivenNumberOfTimesThenFlushIsCalled) { DebugManagerStateRestore restorer; DebugManager.flags.PerformImplicitFlushEveryEnqueueCount.set(2); CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; auto &csr = reinterpret_cast &>(commandQueue.getGpgpuCommandStreamReceiver()); csr.overrideDispatchPolicy(DispatchMode::BatchedDispatch); csr.useNewResourceImplicitFlush = false; csr.useGpuIdleImplicitFlush = false; dispatchFlags.implicitFlush = false; csr.flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); EXPECT_EQ(1u, csr.peekLatestSentTaskCount()); EXPECT_EQ(0u, csr.peekLatestFlushedTaskCount()); dispatchFlags.implicitFlush = false; csr.flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); EXPECT_EQ(2u, csr.peekLatestSentTaskCount()); EXPECT_EQ(2u, csr.peekLatestFlushedTaskCount()); dispatchFlags.implicitFlush = false; csr.flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); EXPECT_EQ(3u, csr.peekLatestSentTaskCount()); EXPECT_EQ(2u, csr.peekLatestFlushedTaskCount()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenWaitForTaskCountIsCalledWithTaskCountThatWasNotYetFlushedThenBatchedCommandBuffersAreSubmitted) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); auto &cmdBufferList = mockedSubmissionsAggregator->peekCommandBuffers(); EXPECT_FALSE(cmdBufferList.peekIsEmpty()); EXPECT_EQ(0u, mockCsr->peekLatestFlushedTaskCount()); auto cmdBuffer = cmdBufferList.peekHead(); EXPECT_EQ(1u, cmdBuffer->taskCount); mockCsr->waitForCompletionWithTimeout(WaitParams{false, false, 1}, 1); EXPECT_EQ(1u, mockCsr->peekLatestFlushedTaskCount()); EXPECT_TRUE(cmdBufferList.peekIsEmpty()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenEnqueueIsMadeThenCurrentMemoryUsedIsTracked) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); auto cmdBuffer = mockedSubmissionsAggregator->peekCommandBuffers().peekHead(); uint64_t expectedUsed = 0; for (const auto &resource : cmdBuffer->surfaces) { expectedUsed += resource->getUnderlyingBufferSize(); } EXPECT_EQ(expectedUsed, mockCsr->peekTotalMemoryUsed()); //after flush it goes to 0 mockCsr->flushBatchedSubmissions(); EXPECT_EQ(0u, mockCsr->peekTotalMemoryUsed()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenSusbsequentEnqueueIsMadeThenOnlyNewResourcesAreTrackedForMemoryUsage) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); auto cmdBuffer = mockedSubmissionsAggregator->peekCommandBuffers().peekHead(); uint64_t expectedUsed = 0; for (const auto &resource : cmdBuffer->surfaces) { expectedUsed += resource->getUnderlyingBufferSize(); } auto additionalSize = 1234; MockGraphicsAllocation graphicsAllocation(nullptr, additionalSize); mockCsr->makeResident(graphicsAllocation); mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); EXPECT_EQ(expectedUsed + additionalSize, mockCsr->peekTotalMemoryUsed()); mockCsr->flushBatchedSubmissions(); } struct MockedMemoryManager : public OsAgnosticMemoryManager { MockedMemoryManager(ExecutionEnvironment &executionEnvironment) : OsAgnosticMemoryManager(executionEnvironment) {} bool isMemoryBudgetExhausted() const override { return budgetExhausted; } bool budgetExhausted = false; }; HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenTotalResourceUsedExhaustsTheBudgetThenDoImplicitFlush) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); auto mockedMemoryManager = new MockedMemoryManager(*executionEnvironment); executionEnvironment->memoryManager.reset(mockedMemoryManager); auto mockCsr = std::make_unique>(*executionEnvironment, 0, pDevice->getDeviceBitfield()); mockCsr->setupContext(*pDevice->getDefaultEngine().osContext); if (pDevice->getPreemptionMode() == PreemptionMode::MidThread || pDevice->isDebuggerActive()) { mockCsr->createPreemptionAllocation(); } mockCsr->initializeTagAllocation(); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; mockedMemoryManager->budgetExhausted = true; mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); auto cmdBuffer = mockedSubmissionsAggregator->peekCommandBuffers().peekHead(); uint64_t expectedUsed = 0; for (const auto &resource : cmdBuffer->surfaces) { expectedUsed += resource->getUnderlyingBufferSize(); } EXPECT_EQ(expectedUsed, mockCsr->peekTotalMemoryUsed()); auto budgetSize = (size_t)pDevice->getDeviceInfo().globalMemSize; MockGraphicsAllocation hugeAllocation(nullptr, budgetSize / 4); mockCsr->makeResident(hugeAllocation); mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); //expect 2 flushes, since we cannot batch those submissions EXPECT_EQ(2u, mockCsr->peekLatestFlushedTaskCount()); EXPECT_EQ(0u, mockCsr->peekTotalMemoryUsed()); EXPECT_TRUE(mockedSubmissionsAggregator->peekCommandBuffers().peekIsEmpty()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenTwoTasksArePassedWithTheSameLevelThenThereIsNoPipeControlBetweenThemAfterFlush) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; dispatchFlags.outOfOrderExecutionAllowed = true; auto taskLevelPriorToSubmission = mockCsr->peekTaskLevel(); mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevelPriorToSubmission, dispatchFlags, *pDevice); //now emit with the same taskLevel mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevelPriorToSubmission, dispatchFlags, *pDevice); EXPECT_EQ(taskLevelPriorToSubmission, mockCsr->peekTaskLevel()); //validate if we recorded ppc positions auto firstCmdBuffer = mockedSubmissionsAggregator->peekCommandBuffers().peekHead(); EXPECT_NE(nullptr, firstCmdBuffer->pipeControlThatMayBeErasedLocation); auto secondCmdBuffer = firstCmdBuffer->next; EXPECT_NE(nullptr, secondCmdBuffer->pipeControlThatMayBeErasedLocation); EXPECT_NE(firstCmdBuffer->pipeControlThatMayBeErasedLocation, secondCmdBuffer->pipeControlThatMayBeErasedLocation); auto ppc = genCmdCast(firstCmdBuffer->pipeControlThatMayBeErasedLocation); EXPECT_NE(nullptr, ppc); auto ppc2 = genCmdCast(secondCmdBuffer->pipeControlThatMayBeErasedLocation); EXPECT_NE(nullptr, ppc2); //flush needs to bump the taskLevel mockCsr->flushBatchedSubmissions(); EXPECT_EQ(taskLevelPriorToSubmission + 1, mockCsr->peekTaskLevel()); //decode commands to confirm no pipe controls between Walkers parseCommands(commandQueue); auto itorBatchBufferStartFirst = find(cmdList.begin(), cmdList.end()); auto itorBatchBufferStartSecond = find(++itorBatchBufferStartFirst, cmdList.end()); //make sure they are not the same EXPECT_NE(cmdList.end(), itorBatchBufferStartFirst); EXPECT_NE(cmdList.end(), itorBatchBufferStartSecond); EXPECT_NE(itorBatchBufferStartFirst, itorBatchBufferStartSecond); auto itorPipeControl = find(itorBatchBufferStartFirst, itorBatchBufferStartSecond); EXPECT_EQ(itorPipeControl, itorBatchBufferStartSecond); //first pipe control is nooped, second pipe control is untouched auto noop1 = genCmdCast(ppc); auto noop2 = genCmdCast(ppc2); EXPECT_NE(nullptr, noop1); EXPECT_EQ(nullptr, noop2); auto ppcAfterChange = genCmdCast(ppc2); EXPECT_NE(nullptr, ppcAfterChange); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenDcFlushIsNotRequiredThenItIsNotSet) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); parseCommands(commandStream); auto itorPipeControl = find(cmdList.begin(), cmdList.end()); auto pipeControl = genCmdCast(*itorPipeControl); EXPECT_FALSE(pipeControl->getDcFlushEnable()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenCommandAreSubmittedThenDcFlushIsAdded) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; dispatchFlags.outOfOrderExecutionAllowed = true; mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); parseCommands(commandStream); auto itorPipeControl = find(cmdList.begin(), cmdList.end()); auto pipeControl = genCmdCast(*itorPipeControl); mockCsr->flushBatchedSubmissions(); EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), pipeControl->getDcFlushEnable()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWithOutOfOrderModeFisabledWhenCommandAreSubmittedThenDcFlushIsAdded) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; dispatchFlags.outOfOrderExecutionAllowed = false; mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); parseCommands(commandStream); auto itorPipeControl = find(cmdList.begin(), cmdList.end()); auto pipeControl = genCmdCast(*itorPipeControl); mockCsr->flushBatchedSubmissions(); EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), pipeControl->getDcFlushEnable()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenUpdateTaskCountFromWaitSetAndGuardCommandBufferWithPipeControlWhenFlushTaskThenThereIsPipeControlForUpdateTaskCount) { DebugManagerStateRestore restorer; DebugManager.flags.UpdateTaskCountFromWait.set(3); CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); parseCommands(commandStream); auto itorPipeControl = find(cmdList.begin(), cmdList.end()); EXPECT_NE(itorPipeControl, cmdList.end()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenUpdateTaskCountFromWaitSetWhenFlushTaskThenThereIsNoPipeControlForUpdateTaskCount) { DebugManagerStateRestore restorer; DebugManager.flags.UpdateTaskCountFromWait.set(3); CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); parseCommands(commandStream); auto itorPipeControl = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(itorPipeControl, cmdList.end()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenUpdateTaskCountFromWaitSetWhenFlushTaskThenPipeControlIsFlushed) { DebugManagerStateRestore restorer; DebugManager.flags.UpdateTaskCountFromWait.set(3); CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); commandQueue.taskCount = 10; auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); mockCsr->taskCount.store(10); mockCsr->latestFlushedTaskCount.store(5); const auto waitStatus = commandQueue.waitForAllEngines(false, nullptr); EXPECT_EQ(WaitStatus::Ready, waitStatus); parseCommands(mockCsr->getCS(4096u)); auto itorPipeControl = find(cmdList.begin(), cmdList.end()); EXPECT_NE(itorPipeControl, cmdList.end()); EXPECT_EQ(mockCsr->flushCalledCount, 1); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenEnabledDirectSubmissionUpdateTaskCountFromWaitSetWhenFlushTaskThenPipeControlAndBBSIsFlushed) { DebugManagerStateRestore restorer; DebugManager.flags.UpdateTaskCountFromWait.set(1); struct MockCsrHwDirectSubmission : public MockCsrHw2 { using MockCsrHw2::MockCsrHw2; bool isDirectSubmissionEnabled() const override { return true; } }; CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); commandQueue.taskCount = 10; auto mockCsr = new MockCsrHwDirectSubmission(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); mockCsr->taskCount.store(10); mockCsr->latestFlushedTaskCount.store(5); const auto waitStatus = commandQueue.waitForAllEngines(false, nullptr); EXPECT_EQ(WaitStatus::Ready, waitStatus); parseCommands(mockCsr->getCS(4096u)); auto itorPipeControl = find(cmdList.begin(), cmdList.end()); auto itorBBS = find(cmdList.begin(), cmdList.end()); EXPECT_NE(itorPipeControl, cmdList.end()); EXPECT_NE(itorBBS, cmdList.end()); EXPECT_EQ(mockCsr->flushCalledCount, 1); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenDcFlushIsRequiredThenPipeControlIsNotRegistredForNooping) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.dcFlush = true; dispatchFlags.outOfOrderExecutionAllowed = true; mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); auto cmdBuffer = mockedSubmissionsAggregator->peekCommandBuffers().peekHead(); EXPECT_EQ(nullptr, cmdBuffer->pipeControlThatMayBeErasedLocation); EXPECT_NE(nullptr, cmdBuffer->epiloguePipeControlLocation); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenEpiloguePipeControlThenDcFlushIsEnabled) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; dispatchFlags.outOfOrderExecutionAllowed = false; mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); auto cmdBuffer = mockedSubmissionsAggregator->peekCommandBuffers().peekHead(); ASSERT_NE(nullptr, cmdBuffer->epiloguePipeControlLocation); auto pipeControl = genCmdCast(cmdBuffer->epiloguePipeControlLocation); ASSERT_NE(nullptr, pipeControl); mockCsr->flushBatchedSubmissions(); EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), pipeControl->getDcFlushEnable()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenEpiloguePipeControlWhendDcFlushDisabledByDebugFlagThenDcFlushIsDisabled) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; DebugManagerStateRestore debugRestorer; DebugManager.flags.DisableDcFlushInEpilogue.set(true); CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; dispatchFlags.outOfOrderExecutionAllowed = false; mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); auto cmdBuffer = mockedSubmissionsAggregator->peekCommandBuffers().peekHead(); ASSERT_NE(nullptr, cmdBuffer->epiloguePipeControlLocation); auto pipeControl = genCmdCast(cmdBuffer->epiloguePipeControlLocation); ASSERT_NE(nullptr, pipeControl); mockCsr->flushBatchedSubmissions(); EXPECT_FALSE(pipeControl->getDcFlushEnable()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeAndOoqFlagSetToFalseWhenTwoTasksArePassedWithTheSameLevelThenThereIsPipeControlBetweenThemAfterFlush) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->timestampPacketWriteEnabled = false; mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.guardCommandBufferWithPipeControl = true; dispatchFlags.outOfOrderExecutionAllowed = false; auto taskLevelPriorToSubmission = mockCsr->peekTaskLevel(); mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevelPriorToSubmission, dispatchFlags, *pDevice); //now emit with the same taskLevel mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevelPriorToSubmission, dispatchFlags, *pDevice); EXPECT_EQ(taskLevelPriorToSubmission, mockCsr->peekTaskLevel()); //validate if we recorded ppc positions auto firstCmdBuffer = mockedSubmissionsAggregator->peekCommandBuffers().peekHead(); EXPECT_EQ(nullptr, firstCmdBuffer->pipeControlThatMayBeErasedLocation); auto secondCmdBuffer = firstCmdBuffer->next; EXPECT_EQ(nullptr, secondCmdBuffer->pipeControlThatMayBeErasedLocation); mockCsr->flushBatchedSubmissions(); //decode commands to confirm no pipe controls between Walkers parseCommands(commandQueue); auto itorBatchBufferStartFirst = find(cmdList.begin(), cmdList.end()); auto itorBatchBufferStartSecond = find(++itorBatchBufferStartFirst, cmdList.end()); auto itorPipeControl = find(itorBatchBufferStartFirst, itorBatchBufferStartSecond); EXPECT_NE(itorPipeControl, itorBatchBufferStartSecond); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeAndOoqFlagSetToFalseWhenTimestampPacketWriteIsEnabledThenNoopPipeControl) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.guardCommandBufferWithPipeControl = true; dispatchFlags.outOfOrderExecutionAllowed = false; auto taskLevelPriorToSubmission = mockCsr->peekTaskLevel(); mockCsr->timestampPacketWriteEnabled = false; mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevelPriorToSubmission, dispatchFlags, *pDevice); mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevelPriorToSubmission, dispatchFlags, *pDevice); auto firstCmdBuffer = mockedSubmissionsAggregator->peekCommandBuffers().peekHead(); EXPECT_EQ(nullptr, firstCmdBuffer->pipeControlThatMayBeErasedLocation); auto secondCmdBuffer = firstCmdBuffer->next; EXPECT_EQ(nullptr, secondCmdBuffer->pipeControlThatMayBeErasedLocation); mockCsr->flushBatchedSubmissions(); mockCsr->timestampPacketWriteEnabled = true; mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevelPriorToSubmission, dispatchFlags, *pDevice); mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevelPriorToSubmission, dispatchFlags, *pDevice); firstCmdBuffer = mockedSubmissionsAggregator->peekCommandBuffers().peekHead(); EXPECT_NE(nullptr, firstCmdBuffer->pipeControlThatMayBeErasedLocation); secondCmdBuffer = firstCmdBuffer->next; EXPECT_NE(nullptr, secondCmdBuffer->pipeControlThatMayBeErasedLocation); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenPipeControlForNoopAddressIsNullThenPipeControlIsNotNooped) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; dispatchFlags.outOfOrderExecutionAllowed = true; auto taskLevelPriorToSubmission = mockCsr->peekTaskLevel(); mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevelPriorToSubmission, dispatchFlags, *pDevice); //now emit with the same taskLevel mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevelPriorToSubmission, dispatchFlags, *pDevice); //validate if we recorded ppc positions auto firstCmdBuffer = mockedSubmissionsAggregator->peekCommandBuffers().peekHead(); auto ppc1Location = firstCmdBuffer->pipeControlThatMayBeErasedLocation; firstCmdBuffer->pipeControlThatMayBeErasedLocation = nullptr; auto ppc = genCmdCast(ppc1Location); EXPECT_NE(nullptr, ppc); //call flush, both pipe controls must remain untouched mockCsr->flushBatchedSubmissions(); EXPECT_EQ(taskLevelPriorToSubmission + 1, mockCsr->peekTaskLevel()); //decode commands to confirm no pipe controls between Walkers parseCommands(commandQueue); auto itorBatchBufferStartFirst = find(cmdList.begin(), cmdList.end()); auto itorBatchBufferStartSecond = find(++itorBatchBufferStartFirst, cmdList.end()); auto itorPipeControl = find(itorBatchBufferStartFirst, itorBatchBufferStartSecond); EXPECT_NE(itorPipeControl, itorBatchBufferStartSecond); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenThreeTasksArePassedWithTheSameLevelThenThereIsNoPipeControlBetweenThemAfterFlush) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; dispatchFlags.outOfOrderExecutionAllowed = true; auto taskLevelPriorToSubmission = mockCsr->peekTaskLevel(); mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevelPriorToSubmission, dispatchFlags, *pDevice); //now emit with the same taskLevel mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevelPriorToSubmission, dispatchFlags, *pDevice); mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevelPriorToSubmission, dispatchFlags, *pDevice); EXPECT_EQ(taskLevelPriorToSubmission, mockCsr->peekTaskLevel()); //validate if we recorded ppc positions auto firstCmdBuffer = mockedSubmissionsAggregator->peekCommandBuffers().peekHead(); auto secondCmdBuffer = firstCmdBuffer->next; auto thirdCmdBuffer = firstCmdBuffer->next->next; EXPECT_NE(nullptr, thirdCmdBuffer->pipeControlThatMayBeErasedLocation); EXPECT_NE(firstCmdBuffer->pipeControlThatMayBeErasedLocation, thirdCmdBuffer->pipeControlThatMayBeErasedLocation); auto ppc = genCmdCast(firstCmdBuffer->pipeControlThatMayBeErasedLocation); auto ppc2 = genCmdCast(secondCmdBuffer->pipeControlThatMayBeErasedLocation); auto ppc3 = genCmdCast(thirdCmdBuffer->pipeControlThatMayBeErasedLocation); EXPECT_NE(nullptr, ppc2); EXPECT_NE(nullptr, ppc3); //flush needs to bump the taskLevel mockCsr->flushBatchedSubmissions(); EXPECT_EQ(taskLevelPriorToSubmission + 1, mockCsr->peekTaskLevel()); //decode commands to confirm no pipe controls between Walkers parseCommands(commandQueue); auto itorBatchBufferStartFirst = find(cmdList.begin(), cmdList.end()); auto itorBatchBufferStartSecond = find(++itorBatchBufferStartFirst, cmdList.end()); auto itorBatchBufferStartThird = find(++itorBatchBufferStartSecond, cmdList.end()); //make sure they are not the same EXPECT_NE(cmdList.end(), itorBatchBufferStartFirst); EXPECT_NE(cmdList.end(), itorBatchBufferStartSecond); EXPECT_NE(cmdList.end(), itorBatchBufferStartThird); EXPECT_NE(itorBatchBufferStartFirst, itorBatchBufferStartSecond); EXPECT_NE(itorBatchBufferStartThird, itorBatchBufferStartSecond); auto itorPipeControl = find(itorBatchBufferStartFirst, itorBatchBufferStartSecond); EXPECT_EQ(itorPipeControl, itorBatchBufferStartSecond); itorPipeControl = find(itorBatchBufferStartSecond, itorBatchBufferStartThird); EXPECT_EQ(itorPipeControl, itorBatchBufferStartThird); //first pipe control is nooped, second pipe control is untouched auto noop1 = genCmdCast(ppc); auto noop2 = genCmdCast(ppc2); auto noop3 = genCmdCast(ppc3); EXPECT_NE(nullptr, noop1); EXPECT_NE(nullptr, noop2); EXPECT_EQ(nullptr, noop3); auto ppcAfterChange = genCmdCast(ppc3); EXPECT_NE(nullptr, ppcAfterChange); } typedef UltCommandStreamReceiverTest CommandStreamReceiverCleanupTests; HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrWhenTemporaryAndReusableAllocationsArePresentThenCleanupResourcesOnlyCleansThoseAboveLatestFlushTaskLevel) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto memoryManager = pDevice->getMemoryManager(); auto temporaryToClean = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); auto temporaryToHold = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); auto reusableToClean = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); auto reusableToHold = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); commandStreamReceiver.getInternalAllocationStorage()->storeAllocation(std::unique_ptr(temporaryToClean), TEMPORARY_ALLOCATION); commandStreamReceiver.getInternalAllocationStorage()->storeAllocation(std::unique_ptr(temporaryToHold), TEMPORARY_ALLOCATION); commandStreamReceiver.getInternalAllocationStorage()->storeAllocation(std::unique_ptr(reusableToClean), REUSABLE_ALLOCATION); commandStreamReceiver.getInternalAllocationStorage()->storeAllocation(std::unique_ptr(reusableToHold), REUSABLE_ALLOCATION); auto osContextId = commandStreamReceiver.getOsContext().getContextId(); temporaryToClean->updateTaskCount(1, osContextId); reusableToClean->updateTaskCount(1, osContextId); temporaryToHold->updateTaskCount(10, osContextId); reusableToHold->updateTaskCount(10, osContextId); commandStreamReceiver.latestFlushedTaskCount = 9; commandStreamReceiver.cleanupResources(); EXPECT_EQ(reusableToHold, commandStreamReceiver.getAllocationsForReuse().peekHead()); EXPECT_EQ(reusableToHold, commandStreamReceiver.getAllocationsForReuse().peekTail()); EXPECT_EQ(temporaryToHold, commandStreamReceiver.getTemporaryAllocations().peekHead()); EXPECT_EQ(temporaryToHold, commandStreamReceiver.getTemporaryAllocations().peekTail()); commandStreamReceiver.latestFlushedTaskCount = 11; commandStreamReceiver.cleanupResources(); EXPECT_TRUE(commandStreamReceiver.getAllocationsForReuse().peekIsEmpty()); EXPECT_TRUE(commandStreamReceiver.getTemporaryAllocations().peekIsEmpty()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDispatchFlagsWithThrottleSetToLowWhenFlushTaskIsCalledThenThrottleIsSetInBatchBuffer) { typedef typename FamilyType::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END; CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.throttle = QueueThrottle::LOW; mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); auto &cmdBufferList = mockedSubmissionsAggregator->peekCommandBuffers(); auto cmdBuffer = cmdBufferList.peekHead(); EXPECT_EQ(cmdBuffer->batchBuffer.throttle, QueueThrottle::LOW); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDispatchFlagsWithThrottleSetToMediumWhenFlushTaskIsCalledThenThrottleIsSetInBatchBuffer) { typedef typename FamilyType::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END; CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.throttle = QueueThrottle::MEDIUM; mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); auto &cmdBufferList = mockedSubmissionsAggregator->peekCommandBuffers(); auto cmdBuffer = cmdBufferList.peekHead(); EXPECT_EQ(cmdBuffer->batchBuffer.throttle, QueueThrottle::MEDIUM); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCommandQueueWithThrottleHintWhenFlushingThenPassThrottleHintToCsr) { MockContext context(pClDevice); cl_queue_properties properties[] = {CL_QUEUE_THROTTLE_KHR, CL_QUEUE_THROTTLE_LOW_KHR, 0}; CommandQueueHw commandQueue(&context, pClDevice, properties, false); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); cl_int retVal = CL_SUCCESS; auto buffer = std::unique_ptr(Buffer::create(&context, 0, 1, nullptr, retVal)); buffer->forceDisallowCPUCopy = true; uint32_t outPtr; commandQueue.enqueueReadBuffer(buffer.get(), CL_TRUE, 0, 1, &outPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(QueueThrottle::LOW, mockCsr->passedDispatchFlags.throttle); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDispatchFlagsWithThrottleSetToHighWhenFlushTaskIsCalledThenThrottleIsSetInBatchBuffer) { typedef typename FamilyType::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END; CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.throttle = QueueThrottle::HIGH; mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); auto &cmdBufferList = mockedSubmissionsAggregator->peekCommandBuffers(); auto cmdBuffer = cmdBufferList.peekHead(); EXPECT_EQ(cmdBuffer->batchBuffer.throttle, QueueThrottle::HIGH); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenEpilogueRequiredFlagWhenTaskIsSubmittedDirectlyThenItPointsBackToCsr) { configureCSRtoNonDirtyState(false); auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver(); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); EXPECT_EQ(0u, commandStreamReceiver.getCmdSizeForEpilogue(dispatchFlags)); dispatchFlags.epilogueRequired = true; dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); EXPECT_EQ(MemoryConstants::cacheLineSize, commandStreamReceiver.getCmdSizeForEpilogue(dispatchFlags)); auto data = commandStream.getSpace(MemoryConstants::cacheLineSize); memset(data, 0, MemoryConstants::cacheLineSize); commandStreamReceiver.storeMakeResidentAllocations = true; commandStreamReceiver.flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); auto &commandStreamReceiverStream = commandStreamReceiver.getCS(0u); EXPECT_EQ(MemoryConstants::cacheLineSize * 2, commandStream.getUsed()); EXPECT_EQ(MemoryConstants::cacheLineSize, commandStreamReceiverStream.getUsed()); parseCommands(commandStream, 0); auto itBBend = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(itBBend, cmdList.end()); auto itBatchBufferStart = find(cmdList.begin(), cmdList.end()); EXPECT_NE(itBatchBufferStart, cmdList.end()); auto batchBufferStart = genCmdCast(*itBatchBufferStart); EXPECT_EQ(batchBufferStart->getBatchBufferStartAddress(), commandStreamReceiverStream.getGraphicsAllocation()->getGpuAddress()); parseCommands(commandStreamReceiverStream, 0); itBBend = find(cmdList.begin(), cmdList.end()); void *bbEndAddress = *itBBend; EXPECT_EQ(commandStreamReceiverStream.getCpuBase(), bbEndAddress); EXPECT_TRUE(commandStreamReceiver.isMadeResident(commandStreamReceiverStream.getGraphicsAllocation())); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDispatchFlagsWithNewSliceCountWhenFlushTaskThenNewSliceCountIsSet) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); uint64_t newSliceCount = 1; DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.sliceCount = newSliceCount; mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); auto &cmdBufferList = mockedSubmissionsAggregator->peekCommandBuffers(); auto cmdBuffer = cmdBufferList.peekHead(); EXPECT_EQ(cmdBuffer->batchBuffer.sliceCount, newSliceCount); } template class UltCommandStreamReceiverForDispatchFlags : public UltCommandStreamReceiver { using BaseClass = UltCommandStreamReceiver; public: UltCommandStreamReceiverForDispatchFlags(ExecutionEnvironment &executionEnvironment, const DeviceBitfield deviceBitfield) : BaseClass(executionEnvironment, 0, deviceBitfield) {} CompletionStamp flushTask(LinearStream &commandStream, size_t commandStreamStart, const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, uint32_t taskLevel, DispatchFlags &dispatchFlags, Device &device) override { savedDispatchFlags = dispatchFlags; return BaseClass::flushTask(commandStream, commandStreamStart, dsh, ioh, ssh, taskLevel, dispatchFlags, device); } DispatchFlags savedDispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); }; HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenBlockedKernelWhenItIsUnblockedThenDispatchFlagsAreSetCorrectly) { MockContext mockContext; auto csr = new UltCommandStreamReceiverForDispatchFlags(*pDevice->executionEnvironment, pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(csr); uint32_t numGrfRequired = 666u; auto pCmdQ = std::make_unique(&mockContext, pClDevice, nullptr, false); auto mockProgram = std::make_unique(&mockContext, false, toClDeviceVector(*pClDevice)); auto pKernel = MockKernel::create(*pDevice, mockProgram.get(), numGrfRequired); auto kernelInfos = MockKernel::toKernelInfoContainer(pKernel->getKernelInfo(), rootDeviceIndex); MultiDeviceKernel multiDeviceKernel(MockMultiDeviceKernel::toKernelVector(pKernel), kernelInfos); auto event = std::make_unique>(pCmdQ.get(), CL_COMMAND_MARKER, 0, 0); auto cmdStream = new LinearStream(pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties({pDevice->getRootDeviceIndex(), 4096, AllocationType::COMMAND_BUFFER, pDevice->getDeviceBitfield()})); IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr; pCmdQ->allocateHeapMemory(IndirectHeap::Type::DYNAMIC_STATE, 4096u, dsh); pCmdQ->allocateHeapMemory(IndirectHeap::Type::INDIRECT_OBJECT, 4096u, ioh); pCmdQ->allocateHeapMemory(IndirectHeap::Type::SURFACE_STATE, 4096u, ssh); auto blockedCommandsData = std::make_unique(cmdStream, *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); blockedCommandsData->setHeaps(dsh, ioh, ssh); std::vector surfaces; event->setCommand(std::make_unique(*pCmdQ, blockedCommandsData, surfaces, false, false, false, nullptr, pDevice->getPreemptionMode(), pKernel, 1)); event->submitCommand(false); EXPECT_EQ(numGrfRequired, csr->savedDispatchFlags.numGrfRequired); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDcFlushArgumentIsTrueWhenCallingAddPipeControlThenDcFlushIsEnabled) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; std::unique_ptr buffer(new uint8_t[128]); LinearStream commandStream(buffer.get(), 128); PipeControlArgs args; args.dcFlushEnable = true; MemorySynchronizationCommands::addPipeControl(commandStream, args); PIPE_CONTROL *pipeControl = genCmdCast(buffer.get()); ASSERT_NE(nullptr, pipeControl); EXPECT_TRUE(pipeControl->getDcFlushEnable()); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDcFlushArgumentIsFalseWhenCallingAddPipeControlThenDcFlushIsEnabledOnlyOnGen8) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; std::unique_ptr buffer(new uint8_t[128]); LinearStream commandStream(buffer.get(), 128); PipeControlArgs args; MemorySynchronizationCommands::addPipeControl(commandStream, args); PIPE_CONTROL *pipeControl = genCmdCast(buffer.get()); ASSERT_NE(nullptr, pipeControl); const bool expectedDcFlush = ::renderCoreFamily == IGFX_GEN8_CORE; EXPECT_EQ(expectedDcFlush, pipeControl->getDcFlushEnable()); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, whenPerDssBackBufferIsAllocatedThenItIsClearedInCleanupResources) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); ASSERT_NE(nullptr, pDevice); commandStreamReceiver.createPerDssBackedBuffer(*pDevice); EXPECT_NE(nullptr, commandStreamReceiver.perDssBackedBuffer); commandStreamReceiver.cleanupResources(); EXPECT_EQ(nullptr, commandStreamReceiver.perDssBackedBuffer); } HWTEST_F(CommandStreamReceiverFlushTaskTests, whenPerDssBackBufferProgrammingEnabledThenAllocationIsCreated) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.usePerDssBackedBuffer = true; commandStreamReceiver.flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); EXPECT_EQ(1u, commandStreamReceiver.createPerDssBackedBufferCalled); EXPECT_NE(nullptr, commandStreamReceiver.perDssBackedBuffer); } HWTEST_F(CommandStreamReceiverFlushTaskTests, whenPerDssBackBufferProgrammingEnabledAndPerDssBackedBufferAlreadyPresentThenNewAllocationIsNotCreated) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto memoryManager = pDevice->getMemoryManager(); commandStreamReceiver.perDssBackedBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.usePerDssBackedBuffer = true; commandStreamReceiver.flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); EXPECT_EQ(0u, commandStreamReceiver.createPerDssBackedBufferCalled); } template class MockCsrWithFailingFlush : public CommandStreamReceiverHw { public: using CommandStreamReceiverHw::latestSentTaskCount; using CommandStreamReceiverHw::submissionAggregator; MockCsrWithFailingFlush(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield) : CommandStreamReceiverHw(executionEnvironment, rootDeviceIndex, deviceBitfield) { this->dispatchMode = DispatchMode::BatchedDispatch; this->tagAddress = &tag; } SubmissionStatus flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override { return SubmissionStatus::FAILED; } uint32_t tag = 0; }; HWTEST_F(CommandStreamReceiverFlushTaskTests, givenWaitForCompletionWithTimeoutIsCalledWhenFlushBatchedSubmissionsReturnsFailureThenItIsPropagated) { MockCsrWithFailingFlush mockCsr(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); MockOsContext osContext(0, EngineDescriptorHelper::getDefaultDescriptor(8)); mockCsr.setupContext(osContext); mockCsr.latestSentTaskCount = 1; auto cmdBuffer = std::make_unique(*pDevice); mockCsr.submissionAggregator->recordCommandBuffer(cmdBuffer.release()); EXPECT_EQ(NEO::WaitStatus::NotReady, mockCsr.waitForCompletionWithTimeout(WaitParams{false, false, 0}, 1)); } command_stream_receiver_flush_task_4_tests.cpp000066400000000000000000000737411422164147700360260ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/command_stream/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/wait_status.h" #include "shared/test/common/mocks/mock_command_stream_receiver.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/event/user_event.h" #include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" #include "test_traits_common.h" using namespace NEO; using MultiRootDeviceCommandStreamReceiverBufferTests = MultiRootDeviceFixture; HWTEST_F(MultiRootDeviceCommandStreamReceiverBufferTests, givenMultipleEventInMultiRootDeviceEnvironmentWhenTheyArePassedToEnqueueWithSubmissionThenCsIsWaitingForEventsFromPreviousDevices) { REQUIRE_SVM_OR_SKIP(device1); REQUIRE_SVM_OR_SKIP(device2); using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; cl_int retVal = 0; size_t offset = 0; size_t size = 1; auto pCmdQ1 = context.get()->getSpecialQueue(1u); auto pCmdQ2 = context.get()->getSpecialQueue(2u); std::unique_ptr program(Program::createBuiltInFromSource("FillBufferBytes", context.get(), context.get()->getDevices(), &retVal)); program->build(program->getDevices(), nullptr, false); std::unique_ptr kernel(Kernel::create(program.get(), program->getKernelInfoForKernel("FillBufferBytes"), *context.get()->getDevice(0), &retVal)); size_t svmSize = 4096; void *svmPtr = alignedMalloc(svmSize, MemoryConstants::pageSize); MockGraphicsAllocation svmAlloc(svmPtr, svmSize); Event event1(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 5, 15); Event event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, 6, 16); Event event3(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 4, 20); Event event4(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 3, 4); Event event5(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 2, 7); UserEvent userEvent1(&pCmdQ1->getContext()); UserEvent userEvent2(&pCmdQ2->getContext()); userEvent1.setStatus(CL_COMPLETE); userEvent2.setStatus(CL_COMPLETE); cl_event eventWaitList[] = { &event1, &event2, &event3, &event4, &event5, &userEvent1, &userEvent2, }; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); { kernel->setSvmKernelExecInfo(&svmAlloc); retVal = pCmdQ1->enqueueKernel( kernel.get(), 1, &offset, &size, &size, numEventsInWaitList, eventWaitList, nullptr); HardwareParse csHwParser; csHwParser.parseCommands(pCmdQ1->getCS(0)); auto semaphores = findAll(csHwParser.cmdList.begin(), csHwParser.cmdList.end()); EXPECT_EQ(2u, semaphores.size()); auto semaphoreCmd0 = genCmdCast(*(semaphores[0])); EXPECT_EQ(4u, semaphoreCmd0->getSemaphoreDataDword()); EXPECT_EQ(reinterpret_cast(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress()); auto semaphoreCmd1 = genCmdCast(*(semaphores[1])); EXPECT_EQ(7u, semaphoreCmd1->getSemaphoreDataDword()); EXPECT_EQ(reinterpret_cast(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress()); } { kernel->setSvmKernelExecInfo(&svmAlloc); retVal = pCmdQ2->enqueueKernel( kernel.get(), 1, &offset, &size, &size, numEventsInWaitList, eventWaitList, nullptr); HardwareParse csHwParser; csHwParser.parseCommands(pCmdQ2->getCS(0)); auto semaphores = findAll(csHwParser.cmdList.begin(), csHwParser.cmdList.end()); EXPECT_EQ(2u, semaphores.size()); auto semaphoreCmd0 = genCmdCast(*(semaphores[0])); EXPECT_EQ(15u, semaphoreCmd0->getSemaphoreDataDword()); EXPECT_EQ(reinterpret_cast(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress()); auto semaphoreCmd1 = genCmdCast(*(semaphores[1])); EXPECT_EQ(20u, semaphoreCmd1->getSemaphoreDataDword()); EXPECT_EQ(reinterpret_cast(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress()); } alignedFree(svmPtr); } using CommandStreamReceiverFlushTaskTests = UltCommandStreamReceiverTest; using MultiRootDeviceCommandStreamReceiverTests = CommandStreamReceiverFlushTaskTests; HWTEST_F(MultiRootDeviceCommandStreamReceiverTests, givenMultipleEventInMultiRootDeviceEnvironmentWhenTheyArePassedToEnqueueWithoutSubmissionThenCsIsWaitingForEventsFromPreviousDevices) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto deviceFactory = std::make_unique(4, 0); auto device1 = deviceFactory->rootDevices[1]; auto device2 = deviceFactory->rootDevices[2]; auto device3 = deviceFactory->rootDevices[3]; auto mockCsr1 = new MockCommandStreamReceiver(*device1->executionEnvironment, device1->getRootDeviceIndex(), device1->getDeviceBitfield()); auto mockCsr2 = new MockCommandStreamReceiver(*device2->executionEnvironment, device2->getRootDeviceIndex(), device2->getDeviceBitfield()); auto mockCsr3 = new MockCommandStreamReceiver(*device3->executionEnvironment, device3->getRootDeviceIndex(), device3->getDeviceBitfield()); device1->resetCommandStreamReceiver(mockCsr1); device2->resetCommandStreamReceiver(mockCsr2); device3->resetCommandStreamReceiver(mockCsr3); cl_device_id devices[] = {device1, device2, device3}; auto context = std::make_unique(ClDeviceVector(devices, 3), false); auto pCmdQ1 = context.get()->getSpecialQueue(1u); auto pCmdQ2 = context.get()->getSpecialQueue(2u); auto pCmdQ3 = context.get()->getSpecialQueue(3u); Event event1(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 5, 15); Event event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, 6, 16); Event event3(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 4, 20); Event event4(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 3, 4); Event event5(pCmdQ3, CL_COMMAND_NDRANGE_KERNEL, 7, 21); Event event6(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 2, 7); UserEvent userEvent1(&pCmdQ1->getContext()); UserEvent userEvent2(&pCmdQ2->getContext()); userEvent1.setStatus(CL_COMPLETE); userEvent2.setStatus(CL_COMPLETE); cl_event eventWaitList[] = { &event1, &event2, &event3, &event4, &event5, &event6, &userEvent1, &userEvent2, }; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); { pCmdQ1->enqueueMarkerWithWaitList( numEventsInWaitList, eventWaitList, nullptr); HardwareParse csHwParser; csHwParser.parseCommands(pCmdQ1->getCS(0)); auto semaphores = findAll(csHwParser.cmdList.begin(), csHwParser.cmdList.end()); EXPECT_EQ(3u, semaphores.size()); auto semaphoreCmd0 = genCmdCast(*(semaphores[0])); EXPECT_EQ(4u, semaphoreCmd0->getSemaphoreDataDword()); EXPECT_EQ(reinterpret_cast(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress()); auto semaphoreCmd1 = genCmdCast(*(semaphores[1])); EXPECT_EQ(21u, semaphoreCmd1->getSemaphoreDataDword()); EXPECT_EQ(reinterpret_cast(pCmdQ3->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress()); auto semaphoreCmd2 = genCmdCast(*(semaphores[2])); EXPECT_EQ(7u, semaphoreCmd2->getSemaphoreDataDword()); EXPECT_EQ(reinterpret_cast(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd2->getSemaphoreGraphicsAddress()); } { pCmdQ2->enqueueMarkerWithWaitList( numEventsInWaitList, eventWaitList, nullptr); HardwareParse csHwParser; csHwParser.parseCommands(pCmdQ2->getCS(0)); auto semaphores = findAll(csHwParser.cmdList.begin(), csHwParser.cmdList.end()); EXPECT_EQ(3u, semaphores.size()); auto semaphoreCmd0 = genCmdCast(*(semaphores[0])); EXPECT_EQ(15u, semaphoreCmd0->getSemaphoreDataDword()); EXPECT_EQ(reinterpret_cast(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress()); auto semaphoreCmd1 = genCmdCast(*(semaphores[1])); EXPECT_EQ(20u, semaphoreCmd1->getSemaphoreDataDword()); EXPECT_EQ(reinterpret_cast(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress()); auto semaphoreCmd2 = genCmdCast(*(semaphores[2])); EXPECT_EQ(21u, semaphoreCmd2->getSemaphoreDataDword()); EXPECT_EQ(reinterpret_cast(pCmdQ3->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd2->getSemaphoreGraphicsAddress()); } { cl_event eventWaitList[] = { &event1, &event2, &event5, &userEvent1, }; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); pCmdQ3->enqueueMarkerWithWaitList( numEventsInWaitList, eventWaitList, nullptr); HardwareParse csHwParser; csHwParser.parseCommands(pCmdQ3->getCS(0)); auto semaphores = findAll(csHwParser.cmdList.begin(), csHwParser.cmdList.end()); EXPECT_EQ(1u, semaphores.size()); auto semaphoreCmd0 = genCmdCast(*(semaphores[0])); EXPECT_EQ(15u, semaphoreCmd0->getSemaphoreDataDword()); EXPECT_EQ(reinterpret_cast(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress()); } } struct CrossDeviceDependenciesTests : public ::testing::Test { void SetUp() override { VariableBackup backupHwInfo(defaultHwInfo.get()); defaultHwInfo->capabilityTable.blitterOperationsSupported = true; deviceFactory = std::make_unique(3, 0); auto device1 = deviceFactory->rootDevices[1]; auto device2 = deviceFactory->rootDevices[2]; cl_device_id devices[] = {device1, device2}; context = std::make_unique(ClDeviceVector(devices, 2), false); pCmdQ1 = context.get()->getSpecialQueue(1u); pCmdQ2 = context.get()->getSpecialQueue(2u); } void TearDown() override { } std::unique_ptr deviceFactory; std::unique_ptr context; CommandQueue *pCmdQ1 = nullptr; CommandQueue *pCmdQ2 = nullptr; }; HWTEST_F(CrossDeviceDependenciesTests, givenMultipleEventInMultiRootDeviceEnvironmentWhenTheyArePassedToMarkerThenMiSemaphoreWaitCommandSizeIsIncluded) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; Event event1(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 5, 15); Event event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, 6, 16); Event event3(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 1, 6); Event event4(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 4, 20); Event event5(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 3, 4); Event event6(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 2, 7); UserEvent userEvent1(&pCmdQ1->getContext()); UserEvent userEvent2(&pCmdQ2->getContext()); userEvent1.setStatus(CL_COMPLETE); userEvent2.setStatus(CL_COMPLETE); { cl_event eventWaitList[] = { &event1, &event2, &event3, &event4, &userEvent1, &userEvent2, }; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); pCmdQ1->enqueueMarkerWithWaitList( numEventsInWaitList, eventWaitList, nullptr); EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, nullptr); CsrDependencies csrDeps; eventsRequest.fillCsrDependenciesForTaskCountContainer(csrDeps, pCmdQ1->getGpgpuCommandStreamReceiver()); EXPECT_EQ(0u, csrDeps.taskCountContainer.size()); EXPECT_EQ(0u, TimestampPacketHelper::getRequiredCmdStreamSizeForTaskCountContainer(csrDeps)); } { cl_event eventWaitList[] = { &event1, &event2, &event3, &event4, &event5, &event6, &userEvent1, }; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); pCmdQ2->enqueueMarkerWithWaitList( numEventsInWaitList, eventWaitList, nullptr); EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, nullptr); CsrDependencies csrDeps; eventsRequest.fillCsrDependenciesForTaskCountContainer(csrDeps, pCmdQ2->getGpgpuCommandStreamReceiver()); EXPECT_EQ(3u, csrDeps.taskCountContainer.size()); EXPECT_EQ(3u * sizeof(MI_SEMAPHORE_WAIT), TimestampPacketHelper::getRequiredCmdStreamSizeForTaskCountContainer(csrDeps)); } } HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventWhenProgrammingCrossDeviceDependenciesForGpgpuCsrThenProgramSemaphoreWaitOnUnblockingEvent) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; UserEvent userEvent1(&pCmdQ1->getContext()); cl_event outputEvent1{}; cl_event inputEvent1 = &userEvent1; pCmdQ1->enqueueMarkerWithWaitList( 1, &inputEvent1, &outputEvent1); auto event1 = castToObject(outputEvent1); ASSERT_NE(nullptr, event1); EXPECT_EQ(CompletionStamp::notReady, event1->peekTaskCount()); cl_int retVal = CL_INVALID_PLATFORM; auto buffer = Buffer::create(context.get(), 0, MemoryConstants::pageSize, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); char hostPtr[MemoryConstants::pageSize]{}; cl_event outputEvent2{}; pCmdQ2->enqueueReadBuffer(buffer, CL_FALSE, 0, MemoryConstants::pageSize, hostPtr, nullptr, 1, &outputEvent1, &outputEvent2); { HardwareParse csHwParser; csHwParser.parseCommands(pCmdQ2->getCS(0)); auto semaphores = findAll(csHwParser.cmdList.begin(), csHwParser.cmdList.end()); EXPECT_EQ(0u, semaphores.size()); } auto event2 = castToObject(outputEvent2); ASSERT_NE(nullptr, event2); EXPECT_EQ(CompletionStamp::notReady, event2->peekTaskCount()); pCmdQ1->enqueueMarkerWithWaitList( 1, &outputEvent2, nullptr); { HardwareParse csHwParser; csHwParser.parseCommands(pCmdQ1->getCS(0)); auto semaphores = findAll(csHwParser.cmdList.begin(), csHwParser.cmdList.end()); EXPECT_EQ(0u, semaphores.size()); } userEvent1.setStatus(CL_COMPLETE); event1->release(); event2->release(); pCmdQ1->finish(); pCmdQ2->finish(); { HardwareParse csHwParser; csHwParser.parseCommands(pCmdQ1->getGpgpuCommandStreamReceiver().getCS(0)); auto semaphores = findAll(csHwParser.cmdList.begin(), csHwParser.cmdList.end()); EXPECT_EQ(1u, semaphores.size()); auto semaphoreCmd = genCmdCast(*(semaphores[0])); EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword()); EXPECT_EQ(reinterpret_cast(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd->getSemaphoreGraphicsAddress()); } { HardwareParse csHwParser; csHwParser.parseCommands(pCmdQ2->getGpgpuCommandStreamReceiver().getCS(0)); auto semaphores = findAll(csHwParser.cmdList.begin(), csHwParser.cmdList.end()); EXPECT_EQ(1u, semaphores.size()); auto semaphoreCmd = genCmdCast(*(semaphores[0])); EXPECT_EQ(0u, semaphoreCmd->getSemaphoreDataDword()); EXPECT_EQ(reinterpret_cast(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd->getSemaphoreGraphicsAddress()); } buffer->release(); } HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventWhenProgrammingSingleDeviceDependenciesForGpgpuCsrThenNoSemaphoreWaitIsProgrammed) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; UserEvent userEvent1(&pCmdQ1->getContext()); cl_event outputEvent1{}; cl_event inputEvent1 = &userEvent1; pCmdQ1->enqueueMarkerWithWaitList( 1, &inputEvent1, &outputEvent1); auto event1 = castToObject(outputEvent1); ASSERT_NE(nullptr, event1); EXPECT_EQ(CompletionStamp::notReady, event1->peekTaskCount()); cl_int retVal = CL_INVALID_PLATFORM; auto buffer = Buffer::create(context.get(), 0, MemoryConstants::pageSize, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); char hostPtr[MemoryConstants::pageSize]{}; cl_event outputEvent2{}; pCmdQ1->enqueueReadBuffer(buffer, CL_FALSE, 0, MemoryConstants::pageSize, hostPtr, nullptr, 1, &outputEvent1, &outputEvent2); { HardwareParse csHwParser; csHwParser.parseCommands(pCmdQ1->getCS(0)); auto semaphores = findAll(csHwParser.cmdList.begin(), csHwParser.cmdList.end()); EXPECT_EQ(0u, semaphores.size()); } auto event2 = castToObject(outputEvent2); ASSERT_NE(nullptr, event2); EXPECT_EQ(CompletionStamp::notReady, event2->peekTaskCount()); pCmdQ1->enqueueMarkerWithWaitList( 1, &outputEvent2, nullptr); { HardwareParse csHwParser; csHwParser.parseCommands(pCmdQ1->getCS(0)); auto semaphores = findAll(csHwParser.cmdList.begin(), csHwParser.cmdList.end()); EXPECT_EQ(0u, semaphores.size()); } userEvent1.setStatus(CL_COMPLETE); event1->release(); event2->release(); pCmdQ1->finish(); { HardwareParse csHwParser; csHwParser.parseCommands(pCmdQ1->getGpgpuCommandStreamReceiver().getCS(0)); auto semaphores = findAll(csHwParser.cmdList.begin(), csHwParser.cmdList.end()); EXPECT_EQ(0u, semaphores.size()); } buffer->release(); } HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventWhenProgrammingCrossDeviceDependenciesForBlitCsrThenProgramSemaphoreWaitOnUnblockingEvent) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; DebugManagerStateRestore restorer; DebugManager.flags.EnableBlitterForEnqueueOperations.set(true); for (auto &rootDeviceEnvironment : deviceFactory->rootDevices[0]->getExecutionEnvironment()->rootDeviceEnvironments) { REQUIRE_FULL_BLITTER_OR_SKIP(rootDeviceEnvironment->getHardwareInfo()); } auto clCmdQ1 = clCreateCommandQueue(context.get(), deviceFactory->rootDevices[1], {}, nullptr); auto clCmdQ2 = clCreateCommandQueue(context.get(), deviceFactory->rootDevices[2], {}, nullptr); pCmdQ1 = castToObject(clCmdQ1); pCmdQ2 = castToObject(clCmdQ2); ASSERT_NE(nullptr, pCmdQ1); ASSERT_NE(nullptr, pCmdQ2); UserEvent userEvent1(&pCmdQ1->getContext()); cl_event outputEvent1{}; cl_event inputEvent1 = &userEvent1; pCmdQ1->enqueueMarkerWithWaitList( 1, &inputEvent1, &outputEvent1); auto event1 = castToObject(outputEvent1); ASSERT_NE(nullptr, event1); EXPECT_EQ(CompletionStamp::notReady, event1->peekTaskCount()); cl_int retVal = CL_INVALID_PLATFORM; auto buffer = Buffer::create(context.get(), 0, MemoryConstants::pageSize, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); char hostPtr[MemoryConstants::pageSize]{}; cl_event outputEvent2{}; pCmdQ2->enqueueReadBuffer(buffer, CL_FALSE, 0, MemoryConstants::pageSize, hostPtr, nullptr, 1, &outputEvent1, &outputEvent2); auto event2 = castToObject(outputEvent2); ASSERT_NE(nullptr, event2); EXPECT_EQ(CompletionStamp::notReady, event2->peekTaskCount()); { HardwareParse csHwParser; csHwParser.parseCommands(pCmdQ2->getCS(0)); auto semaphores = findAll(csHwParser.cmdList.begin(), csHwParser.cmdList.end()); EXPECT_EQ(0u, semaphores.size()); } cl_event outputEvent3{}; pCmdQ1->enqueueReadBuffer(buffer, CL_FALSE, 0, MemoryConstants::pageSize, hostPtr, nullptr, 1, &outputEvent2, &outputEvent3); auto event3 = castToObject(outputEvent3); ASSERT_NE(nullptr, event3); EXPECT_EQ(CompletionStamp::notReady, event3->peekTaskCount()); { HardwareParse csHwParser; csHwParser.parseCommands(pCmdQ2->getCS(0)); auto semaphores = findAll(csHwParser.cmdList.begin(), csHwParser.cmdList.end()); EXPECT_EQ(0u, semaphores.size()); } pCmdQ2->enqueueMarkerWithWaitList( 1, &outputEvent3, nullptr); { HardwareParse csHwParser; csHwParser.parseCommands(pCmdQ2->getCS(0)); auto semaphores = findAll(csHwParser.cmdList.begin(), csHwParser.cmdList.end()); EXPECT_EQ(0u, semaphores.size()); } userEvent1.setStatus(CL_COMPLETE); event1->release(); event2->release(); event3->release(); pCmdQ1->finish(); pCmdQ2->finish(); { HardwareParse csHwParser; csHwParser.parseCommands(pCmdQ1->getGpgpuCommandStreamReceiver().getCS(0)); auto semaphores = findAll(csHwParser.cmdList.begin(), csHwParser.cmdList.end()); EXPECT_EQ(1u, semaphores.size()); auto semaphoreCmd = genCmdCast(*(semaphores[0])); EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword()); EXPECT_EQ(reinterpret_cast(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd->getSemaphoreGraphicsAddress()); } { HardwareParse csHwParser; csHwParser.parseCommands(pCmdQ1->getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS)->getCS(0)); auto semaphores = findAll(csHwParser.cmdList.begin(), csHwParser.cmdList.end()); EXPECT_LE(1u, semaphores.size()); } { HardwareParse csHwParser; csHwParser.parseCommands(pCmdQ2->getGpgpuCommandStreamReceiver().getCS(0)); auto semaphores = findAll(csHwParser.cmdList.begin(), csHwParser.cmdList.end()); EXPECT_EQ(2u, semaphores.size()); auto semaphoreCmd0 = genCmdCast(*(semaphores[0])); EXPECT_EQ(0u, semaphoreCmd0->getSemaphoreDataDword()); EXPECT_EQ(reinterpret_cast(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress()); } { HardwareParse csHwParser; csHwParser.parseCommands(pCmdQ2->getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS)->getCS(0)); auto semaphores = findAll(csHwParser.cmdList.begin(), csHwParser.cmdList.end()); EXPECT_LE(1u, semaphores.size()); } buffer->release(); pCmdQ1->release(); pCmdQ2->release(); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenStaticPartitioningEnabledWhenFlushingTaskThenWorkPartitionAllocationIsMadeResident) { DebugManagerStateRestore restore{}; DebugManager.flags.EnableStaticPartitioning.set(1); DebugManager.flags.EnableImplicitScaling.set(1); DebugManager.flags.ForcePreemptionMode.set(PreemptionMode::Disabled); UltDeviceFactory deviceFactory{1, 2}; MockDevice *device = deviceFactory.rootDevices[0]; auto &mockCsr = device->getUltCommandStreamReceiver(); ASSERT_NE(nullptr, mockCsr.getWorkPartitionAllocation()); mockCsr.overrideDispatchPolicy(DispatchMode::BatchedDispatch); mockCsr.storeMakeResidentAllocations = true; DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); mockCsr.flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *device); bool found = false; for (auto allocation : mockCsr.makeResidentAllocations) { if (allocation.first == mockCsr.getWorkPartitionAllocation()) { found = true; break; } } EXPECT_TRUE(found); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenEnqueueWithoutArbitrationPolicyWhenPolicyIsAlreadyProgrammedThenReuse) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto &csrThreadArbitrationPolicy = commandStreamReceiver.streamProperties.stateComputeMode.threadArbitrationPolicy.value; int32_t sentThreadArbitrationPolicy = ThreadArbitrationPolicy::RoundRobinAfterDependency; flushTaskFlags.threadArbitrationPolicy = sentThreadArbitrationPolicy; flushTask(commandStreamReceiver); EXPECT_EQ(csrThreadArbitrationPolicy, sentThreadArbitrationPolicy); flushTaskFlags.threadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent; flushTask(commandStreamReceiver); EXPECT_EQ(csrThreadArbitrationPolicy, sentThreadArbitrationPolicy); } struct PreambleThreadArbitrationMatcher { template static constexpr bool isMatched() { if constexpr (HwMapper::GfxProduct::supportsCmdSet(IGFX_GEN8_CORE)) { return TestTraits::get()>::implementsPreambleThreadArbitration; } return false; } }; HWTEST2_F(CommandStreamReceiverFlushTaskTests, givenPolicyValueChangedWhenFlushingTaskThenProgramThreadArbitrationPolicy, PreambleThreadArbitrationMatcher) { using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; auto &hwHelper = HwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = true; flushTask(commandStreamReceiver); size_t parsingOffset = commandStreamReceiver.commandStream.getUsed(); for (auto arbitrationChanged : ::testing::Bool()) { commandStreamReceiver.streamProperties.stateComputeMode.threadArbitrationPolicy.value = arbitrationChanged ? -1 : hwHelper.getDefaultThreadArbitrationPolicy(); flushTask(commandStreamReceiver); HardwareParse csHwParser; csHwParser.parseCommands(commandStreamReceiver.commandStream, parsingOffset); auto miLoadRegisterCommandsCount = findAll(csHwParser.cmdList.begin(), csHwParser.cmdList.end()).size(); if (arbitrationChanged) { EXPECT_GE(miLoadRegisterCommandsCount, 1u); } else { EXPECT_EQ(0u, miLoadRegisterCommandsCount); } parsingOffset = commandStreamReceiver.commandStream.getUsed(); } } namespace CpuIntrinsicsTests { extern volatile uint32_t *pauseAddress; extern uint32_t pauseValue; } // namespace CpuIntrinsicsTests HWTEST_F(CommandStreamReceiverFlushTaskTests, givenTagValueNotMeetingTaskCountToWaitWhenTagValueSwitchesThenWaitFunctionReturnsTrue) { VariableBackup backupPauseAddress(&CpuIntrinsicsTests::pauseAddress); VariableBackup backupPauseValue(&CpuIntrinsicsTests::pauseValue); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); uint32_t taskCountToWait = 2u; *mockCsr->tagAddress = 1u; CpuIntrinsicsTests::pauseAddress = mockCsr->tagAddress; CpuIntrinsicsTests::pauseValue = taskCountToWait; const auto ret = mockCsr->waitForCompletionWithTimeout(WaitParams{false, false, 1}, taskCountToWait); EXPECT_EQ(NEO::WaitStatus::Ready, ret); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenTagValueNotMeetingTaskCountToWaitAndIndefinitelyPollWhenWaitForCompletionThenDoNotCallWaitUtils) { VariableBackup backupPauseAddress(&CpuIntrinsicsTests::pauseAddress); VariableBackup backupPauseValue(&CpuIntrinsicsTests::pauseValue); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); uint32_t taskCountToWait = 2u; *mockCsr->tagAddress = 1u; CpuIntrinsicsTests::pauseAddress = mockCsr->tagAddress; CpuIntrinsicsTests::pauseValue = taskCountToWait; const auto ret = mockCsr->waitForCompletionWithTimeout(WaitParams{true, true, 10}, taskCountToWait); EXPECT_EQ(NEO::WaitStatus::NotReady, ret); }command_stream_receiver_flush_task_gmock_tests.cpp000066400000000000000000000375551422164147700367660ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/command_stream/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/command_stream/scratch_space_controller.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/cache_policy.h" #include "shared/source/helpers/preamble.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/debug_env_reader.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/dispatch_flags_helper.h" #include "shared/test/common/helpers/engine_descriptor_helper.h" #include "shared/test/common/helpers/ult_hw_helper.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/mocks/mock_os_context.h" #include "shared/test/common/mocks/mock_submissions_aggregator.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/event/user_event.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/fixtures/built_in_fixture.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "gtest/gtest.h" #include "reg_configs_common.h" #include "test_traits_common.h" using namespace NEO; using CommandStreamReceiverFlushTaskGmockTests = UltCommandStreamReceiverTest; HWTEST2_F(CommandStreamReceiverFlushTaskGmockTests, givenCsrInBatchingModeThreeRecordedCommandBufferEnabledBatchBufferFlatteningAndPatchInfoCollectionWhenFlushBatchedSubmissionsIsCalledThenBatchBuffersAndPatchInfoAreCollected, MatchAny) { DebugManagerStateRestore stateRestore; DebugManager.flags.CsrDispatchMode.set(static_cast(DispatchMode::BatchedDispatch)); DebugManager.flags.AddPatchInfoCommentsForAUBDump.set(true); DebugManager.flags.FlattenBatchBufferForAUBDump.set(true); typedef typename FamilyType::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END; typedef typename FamilyType::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START; typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); auto mockHelper = new MockFlatBatchBufferHelper(*pDevice->executionEnvironment); mockCsr->overwriteFlatBatchBufferHelper(mockHelper); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.guardCommandBufferWithPipeControl = true; dispatchFlags.outOfOrderExecutionAllowed = true; uint32_t expectedCallsCount = TestTraits::iohInSbaSupported ? 10 : 9; if (!pDevice->getHardwareInfo().capabilityTable.supportsImages) { --expectedCallsCount; } size_t removePatchInfoDataCount = 4 * UltMemorySynchronizationCommands::getExpectedPipeControlCount(pDevice->getHardwareInfo()); mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); auto primaryBatch = mockedSubmissionsAggregator->peekCommandBuffers().peekHead(); auto lastBatchBuffer = primaryBatch->next->next; auto bbEndLocation = primaryBatch->next->batchBufferEndLocation; auto lastBatchBufferAddress = (uint64_t)ptrOffset(lastBatchBuffer->batchBuffer.commandBufferAllocation->getGpuAddress(), lastBatchBuffer->batchBuffer.startOffset); mockCsr->flushBatchedSubmissions(); auto batchBufferStart = genCmdCast(bbEndLocation); ASSERT_NE(nullptr, batchBufferStart); EXPECT_EQ(lastBatchBufferAddress, batchBufferStart->getBatchBufferStartAddress()); EXPECT_EQ(1, mockCsr->flushCalledCount); EXPECT_EQ(expectedCallsCount, mockHelper->setPatchInfoDataCalled); EXPECT_EQ(static_cast(removePatchInfoDataCount), mockHelper->removePatchInfoDataCalled); EXPECT_EQ(4u, mockHelper->registerCommandChunkCalled); EXPECT_EQ(3u, mockHelper->registerBatchBufferStartAddressCalled); } HWTEST_F(CommandStreamReceiverFlushTaskGmockTests, givenMockCommandStreamerWhenAddPatchInfoCommentsForAUBDumpIsNotSetThenAddPatchInfoDataIsNotCollected) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); auto mockHelper = new MockFlatBatchBufferHelper(*pDevice->executionEnvironment); mockCsr->overwriteFlatBatchBufferHelper(mockHelper); pDevice->resetCommandStreamReceiver(mockCsr); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.throttle = QueueThrottle::MEDIUM; mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); EXPECT_EQ(0u, mockHelper->setPatchInfoDataCalled); } HWTEST2_F(CommandStreamReceiverFlushTaskGmockTests, givenMockCommandStreamerWhenAddPatchInfoCommentsForAUBDumpIsSetThenAddPatchInfoDataIsCollected, MatchAny) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AddPatchInfoCommentsForAUBDump.set(true); CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); auto mockHelper = new MockFlatBatchBufferHelper(*pDevice->executionEnvironment); mockCsr->overwriteFlatBatchBufferHelper(mockHelper); pDevice->resetCommandStreamReceiver(mockCsr); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); uint32_t expectedCallsCount = TestTraits::iohInSbaSupported ? 4 : 3; if (!pDevice->getHardwareInfo().capabilityTable.supportsImages) { --expectedCallsCount; } mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); EXPECT_EQ(expectedCallsCount, mockHelper->patchInfoDataVector.size()); for (auto &patchInfoData : mockHelper->patchInfoDataVector) { uint64_t expectedAddress = 0u; switch (patchInfoData.sourceType) { case PatchInfoAllocationType::DynamicStateHeap: expectedAddress = dsh.getGraphicsAllocation()->getGpuAddress(); break; case PatchInfoAllocationType::SurfaceStateHeap: expectedAddress = ssh.getGraphicsAllocation()->getGpuAddress(); break; case PatchInfoAllocationType::IndirectObjectHeap: expectedAddress = ioh.getGraphicsAllocation()->getGpuAddress(); break; default: expectedAddress = 0u; } EXPECT_EQ(expectedAddress, patchInfoData.sourceAllocation); EXPECT_EQ(0u, patchInfoData.sourceAllocationOffset); EXPECT_EQ(PatchInfoAllocationType::Default, patchInfoData.targetType); } EXPECT_EQ(expectedCallsCount, mockHelper->setPatchInfoDataCalled); } HWTEST2_F(CommandStreamReceiverFlushTaskGmockTests, givenMockCsrWhenCollectStateBaseAddresPatchInfoIsCalledThenAppropriateAddressesAreTaken, MatchAny) { typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; std::unique_ptr> mockCsr(new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield())); auto mockHelper = new MockFlatBatchBufferHelper(*pDevice->executionEnvironment); mockCsr->overwriteFlatBatchBufferHelper(mockHelper); uint32_t expectedCallsCount = TestTraits::iohInSbaSupported ? 4 : 3; auto dshPatchIndex = 0u; auto gshPatchIndex = 1u; auto sshPatchIndex = 2u; auto iohPatchIndex = 3u; const bool deviceUsesDsh = pDevice->getHardwareInfo().capabilityTable.supportsImages; if (!deviceUsesDsh) { --expectedCallsCount; gshPatchIndex = 0u; sshPatchIndex = 1u; iohPatchIndex = 2u; } uint64_t baseAddress = 0xabcdef; uint64_t commandOffset = 0xa; uint64_t generalStateBase = 0xff; mockCsr->collectStateBaseAddresPatchInfo(baseAddress, commandOffset, &dsh, &ioh, &ssh, generalStateBase); ASSERT_EQ(mockHelper->patchInfoDataVector.size(), expectedCallsCount); for (auto &patch : mockHelper->patchInfoDataVector) { EXPECT_EQ(patch.targetAllocation, baseAddress); EXPECT_EQ(patch.sourceAllocationOffset, 0u); } //DSH if (deviceUsesDsh) { PatchInfoData dshPatch = mockHelper->patchInfoDataVector[dshPatchIndex]; EXPECT_EQ(dshPatch.sourceAllocation, dsh.getGraphicsAllocation()->getGpuAddress()); EXPECT_EQ(dshPatch.targetAllocationOffset, commandOffset + STATE_BASE_ADDRESS::PATCH_CONSTANTS::DYNAMICSTATEBASEADDRESS_BYTEOFFSET); } if constexpr (TestTraits::iohInSbaSupported) { //IOH PatchInfoData iohPatch = mockHelper->patchInfoDataVector[iohPatchIndex]; EXPECT_EQ(iohPatch.sourceAllocation, ioh.getGraphicsAllocation()->getGpuAddress()); EXPECT_EQ(iohPatch.targetAllocationOffset, commandOffset + STATE_BASE_ADDRESS::PATCH_CONSTANTS::INDIRECTOBJECTBASEADDRESS_BYTEOFFSET); } //SSH PatchInfoData sshPatch = mockHelper->patchInfoDataVector[sshPatchIndex]; EXPECT_EQ(sshPatch.sourceAllocation, ssh.getGraphicsAllocation()->getGpuAddress()); EXPECT_EQ(sshPatch.targetAllocationOffset, commandOffset + STATE_BASE_ADDRESS::PATCH_CONSTANTS::SURFACESTATEBASEADDRESS_BYTEOFFSET); //GSH PatchInfoData gshPatch = mockHelper->patchInfoDataVector[gshPatchIndex]; EXPECT_EQ(gshPatch.sourceAllocation, generalStateBase); EXPECT_EQ(gshPatch.targetAllocationOffset, commandOffset + STATE_BASE_ADDRESS::PATCH_CONSTANTS::GENERALSTATEBASEADDRESS_BYTEOFFSET); EXPECT_EQ(0u, mockHelper->registerCommandChunkCalled); EXPECT_EQ(expectedCallsCount, mockHelper->setPatchInfoDataCalled); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskGmockTests, givenPatchInfoCollectionEnabledWhenScratchSpaceIsProgrammedThenPatchInfoIsCollected) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AddPatchInfoCommentsForAUBDump.set(true); CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); std::unique_ptr> mockCsr(new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield())); mockCsr->overwriteFlatBatchBufferHelper(new MockFlatBatchBufferHelper(*pDevice->executionEnvironment)); bool stateBaseAddressDirty; bool vfeStateDirty; MockOsContext osContext(0, EngineDescriptorHelper::getDefaultDescriptor(DeviceBitfield(8))); mockCsr->setupContext(osContext); mockCsr->getScratchSpaceController()->setRequiredScratchSpace(nullptr, 0u, 10u, 0u, 1u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, vfeStateDirty); DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags(); mockCsr->requiredScratchSize = 0x200000; mockCsr->programVFEState(commandStream, flags, 10); ASSERT_EQ(1u, mockCsr->getFlatBatchBufferHelper().getPatchInfoCollection().size()); EXPECT_EQ(mockCsr->getScratchSpaceController()->getScratchPatchAddress(), mockCsr->getFlatBatchBufferHelper().getPatchInfoCollection().at(0).sourceAllocation); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskGmockTests, givenPatchInfoCollectionDisabledWhenScratchSpaceIsProgrammedThenPatchInfoIsNotCollected) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); std::unique_ptr> mockCsr(new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield())); mockCsr->overwriteFlatBatchBufferHelper(new MockFlatBatchBufferHelper(*pDevice->executionEnvironment)); bool stateBaseAddressDirty; bool vfeStateDirty; MockOsContext osContext(0, EngineDescriptorHelper::getDefaultDescriptor(DeviceBitfield(8))); mockCsr->setupContext(osContext); mockCsr->getScratchSpaceController()->setRequiredScratchSpace(nullptr, 0u, 10u, 0u, 1u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, vfeStateDirty); DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags(); mockCsr->requiredScratchSize = 0x200000; mockCsr->programVFEState(commandStream, flags, 10); EXPECT_EQ(0u, mockCsr->getFlatBatchBufferHelper().getPatchInfoCollection().size()); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskGmockTests, givenPatchInfoCollectionEnabledWhenMediaVfeStateIsProgrammedWithEmptyScratchThenPatchInfoIsNotCollected) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AddPatchInfoCommentsForAUBDump.set(true); CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); std::unique_ptr> mockCsr(new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield())); mockCsr->overwriteFlatBatchBufferHelper(new MockFlatBatchBufferHelper(*pDevice->executionEnvironment)); DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags(); mockCsr->requiredScratchSize = 0x200000; MockOsContext osContext(0, EngineDescriptorHelper::getDefaultDescriptor(DeviceBitfield(8))); mockCsr->setupContext(osContext); mockCsr->programVFEState(commandStream, flags, 10); EXPECT_EQ(0u, mockCsr->getFlatBatchBufferHelper().getPatchInfoCollection().size()); } command_stream_receiver_flush_task_tests_xehp_and_later.cpp000066400000000000000000001613311422164147700406310ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/command_stream/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/state_base_address.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/engine_descriptor_helper.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/mocks/mock_debugger.h" #include "shared/test/common/mocks/mock_os_context.h" #include "shared/test/common/mocks/mock_submissions_aggregator.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "test_traits_common.h" using namespace NEO; typedef UltCommandStreamReceiverTest CommandStreamReceiverFlushTaskXeHPAndLaterTests; HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, whenReprogrammingSshThenBindingTablePoolIsProgrammed) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.getCS(0)); auto bindingTablePoolAlloc = getCommand(); ASSERT_NE(nullptr, bindingTablePoolAlloc); EXPECT_EQ(reinterpret_cast(ssh.getCpuBase()), bindingTablePoolAlloc->getBindingTablePoolBaseAddress()); EXPECT_EQ(ssh.getHeapSizeInPages(), bindingTablePoolAlloc->getBindingTablePoolBufferSize()); EXPECT_EQ(pDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_STATE_HEAP_BUFFER), bindingTablePoolAlloc->getSurfaceObjectControlStateIndexToMocsTables()); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, whenReprogrammingSshThenBindingTablePoolIsProgrammedWithCachingOffWhenDebugKeyPresent) { DebugManagerStateRestore restorer; DebugManager.flags.DisableCachingForHeaps.set(1); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.getCS(0)); auto bindingTablePoolAlloc = getCommand(); ASSERT_NE(nullptr, bindingTablePoolAlloc); EXPECT_EQ(reinterpret_cast(ssh.getCpuBase()), bindingTablePoolAlloc->getBindingTablePoolBaseAddress()); EXPECT_EQ(ssh.getHeapSizeInPages(), bindingTablePoolAlloc->getBindingTablePoolBufferSize()); EXPECT_EQ(pDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_SYSTEM_MEMORY_BUFFER_CACHELINE_MISALIGNED), bindingTablePoolAlloc->getSurfaceObjectControlStateIndexToMocsTables()); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, whenNotReprogrammingSshThenBindingTablePoolIsNotProgrammed) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.getCS(0)); auto stateBaseAddress = getCommand(); EXPECT_NE(nullptr, stateBaseAddress); auto bindingTablePoolAlloc = getCommand(); ASSERT_NE(nullptr, bindingTablePoolAlloc); EXPECT_EQ(reinterpret_cast(ssh.getCpuBase()), bindingTablePoolAlloc->getBindingTablePoolBaseAddress()); EXPECT_EQ(ssh.getHeapSizeInPages(), bindingTablePoolAlloc->getBindingTablePoolBufferSize()); EXPECT_EQ(pDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_STATE_HEAP_BUFFER), bindingTablePoolAlloc->getSurfaceObjectControlStateIndexToMocsTables()); auto offset = commandStreamReceiver.getCS(0).getUsed(); // make SBA dirty (using ioh as dsh and dsh as ioh just to force SBA reprogramming) commandStreamReceiver.flushTask(commandStream, 0, &ioh, &dsh, &ssh, taskLevel, flushTaskFlags, *pDevice); HardwareParse hwParser; hwParser.parseCommands(commandStreamReceiver.getCS(0), offset); stateBaseAddress = hwParser.getCommand(); EXPECT_NE(nullptr, stateBaseAddress); bindingTablePoolAlloc = hwParser.getCommand(); EXPECT_EQ(nullptr, bindingTablePoolAlloc); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, givenStateBaseAddressWhenItIsRequiredThenThereIsPipeControlPriorToItWithTextureCacheFlushAndHdc) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); configureCSRtoNonDirtyState(false); ioh.replaceBuffer(ptrOffset(ioh.getCpuBase(), +1u), ioh.getMaxAvailableSpace() + MemoryConstants::pageSize * 3); flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.getCS(0)); auto stateBaseAddressItor = find(cmdList.begin(), cmdList.end()); auto pipeControlItor = find(cmdList.begin(), stateBaseAddressItor); EXPECT_NE(stateBaseAddressItor, pipeControlItor); auto pipeControlCmd = reinterpret_cast(*pipeControlItor); EXPECT_TRUE(pipeControlCmd->getTextureCacheInvalidationEnable()); EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), pipeControlCmd->getDcFlushEnable()); EXPECT_TRUE(UnitTestHelper::getPipeControlHdcPipelineFlush(*pipeControlCmd)); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, givenProgramExtendedPipeControlPriorToNonPipelinedStateCommandEnabledAndStateBaseAddressWhenItIsRequiredThenThereIsPipeControlPriorToIt) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ProgramExtendedPipeControlPriorToNonPipelinedStateCommand.set(true); using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); configureCSRtoNonDirtyState(false); ioh.replaceBuffer(ptrOffset(ioh.getCpuBase(), +1u), ioh.getMaxAvailableSpace() + MemoryConstants::pageSize * 3); flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.getCS(0)); auto stateBaseAddressItor = find(cmdList.begin(), cmdList.end()); auto pipeControlItor = find(cmdList.begin(), stateBaseAddressItor); EXPECT_NE(stateBaseAddressItor, pipeControlItor); auto pipeControlCmd = reinterpret_cast(*pipeControlItor); EXPECT_TRUE(UnitTestHelper::getPipeControlHdcPipelineFlush(*pipeControlCmd)); EXPECT_TRUE(pipeControlCmd->getAmfsFlushEnable()); EXPECT_TRUE(pipeControlCmd->getCommandStreamerStallEnable()); EXPECT_TRUE(pipeControlCmd->getInstructionCacheInvalidateEnable()); EXPECT_TRUE(pipeControlCmd->getTextureCacheInvalidationEnable()); EXPECT_TRUE(pipeControlCmd->getConstantCacheInvalidationEnable()); EXPECT_TRUE(pipeControlCmd->getStateCacheInvalidationEnable()); EXPECT_TRUE(UnitTestHelper::getPipeControlHdcPipelineFlush(*pipeControlCmd)); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, givenProgramExtendedPipeControlPriorToNonPipelinedStateCommandEnabledAndStateSipWhenItIsRequiredThenThereIsPipeControlPriorToIt) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ProgramExtendedPipeControlPriorToNonPipelinedStateCommand.set(true); using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; using STATE_SIP = typename FamilyType::STATE_SIP; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; pDevice->executionEnvironment->rootDeviceEnvironments[0]->debugger.reset(new MockDebugger); auto sipType = SipKernel::getSipKernelType(*pDevice); SipKernel::initSipKernel(sipType, *pDevice); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); configureCSRtoNonDirtyState(false); flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.getCS(0)); auto requiredCmdSize = PreemptionHelper::getRequiredStateSipCmdSize(*pDevice, false); auto cmdSize = sizeof(STATE_SIP) + sizeof(PIPE_CONTROL); EXPECT_EQ(cmdSize, requiredCmdSize); auto pipeControlIterator = find(cmdList.begin(), cmdList.end()); auto pipeControlCmd = genCmdCast(*pipeControlIterator); EXPECT_TRUE(UnitTestHelper::getPipeControlHdcPipelineFlush(*pipeControlCmd)); EXPECT_TRUE(pipeControlCmd->getAmfsFlushEnable()); EXPECT_TRUE(pipeControlCmd->getCommandStreamerStallEnable()); EXPECT_TRUE(pipeControlCmd->getInstructionCacheInvalidateEnable()); EXPECT_TRUE(pipeControlCmd->getTextureCacheInvalidationEnable()); EXPECT_TRUE(pipeControlCmd->getConstantCacheInvalidationEnable()); EXPECT_TRUE(pipeControlCmd->getStateCacheInvalidationEnable()); auto sipIterator = find(cmdList.begin(), cmdList.end()); auto sipCmd = genCmdCast(*sipIterator); auto sipAllocation = SipKernel::getSipKernel(*pDevice).getSipAllocation(); EXPECT_EQ(sipAllocation->getGpuAddressToPatch(), sipCmd->getSystemInstructionPointer()); } HWTEST2_F(CommandStreamReceiverFlushTaskXeHPAndLaterTests, givenProgramPipeControlPriorToNonPipelinedStateCommandAndStateSipWhenItIsRequiredThenThereIsPipeControlPriorToIt, IsXeHpgCore) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; using STATE_SIP = typename FamilyType::STATE_SIP; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; pDevice->executionEnvironment->rootDeviceEnvironments[0]->debugger.reset(new MockDebugger); auto sipType = SipKernel::getSipKernelType(*pDevice); SipKernel::initSipKernel(sipType, *pDevice); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); configureCSRtoNonDirtyState(false); flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.getCS(0)); auto requiredCmdSize = PreemptionHelper::getRequiredStateSipCmdSize(*pDevice, false); auto cmdSize = sizeof(STATE_SIP) + sizeof(PIPE_CONTROL); EXPECT_EQ(cmdSize, requiredCmdSize); // first PC prior SBA auto pipeControlIterator = find(cmdList.begin(), cmdList.end()); pipeControlIterator = find(++pipeControlIterator, cmdList.end()); auto pipeControlCmd = genCmdCast(*pipeControlIterator); EXPECT_TRUE(UnitTestHelper::getPipeControlHdcPipelineFlush(*pipeControlCmd)); EXPECT_TRUE(pipeControlCmd->getUnTypedDataPortCacheFlush()); EXPECT_FALSE(pipeControlCmd->getAmfsFlushEnable()); EXPECT_FALSE(pipeControlCmd->getInstructionCacheInvalidateEnable()); EXPECT_FALSE(pipeControlCmd->getTextureCacheInvalidationEnable()); EXPECT_FALSE(pipeControlCmd->getConstantCacheInvalidationEnable()); EXPECT_FALSE(pipeControlCmd->getStateCacheInvalidationEnable()); auto sipIterator = find(cmdList.begin(), cmdList.end()); auto sipCmd = genCmdCast(*sipIterator); auto sipAllocation = SipKernel::getSipKernel(*pDevice).getSipAllocation(); EXPECT_EQ(sipAllocation->getGpuAddressToPatch(), sipCmd->getSystemInstructionPointer()); } HWTEST2_F(CommandStreamReceiverFlushTaskXeHPAndLaterTests, givenProgramExtendedPipeControlPriorToNonPipelinedStateCommandEnabledAndStateSipWhenA0SteppingIsActivatedThenOnlyGlobalSipIsProgrammed, IsXEHP) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ProgramExtendedPipeControlPriorToNonPipelinedStateCommand.set(true); using STATE_SIP = typename FamilyType::STATE_SIP; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; const auto &hwInfoConfig = *HwInfoConfig::get(productFamily); hardwareInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 1; hardwareInfo.platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_A0, hardwareInfo); auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hardwareInfo, 0u)); auto &commandStreamReceiver = mockDevice->getUltCommandStreamReceiver(); mockDevice->executionEnvironment->rootDeviceEnvironments[0]->debugger.reset(new MockDebugger); auto sipType = SipKernel::getSipKernelType(*mockDevice); SipKernel::initSipKernel(sipType, *mockDevice); configureCSRtoNonDirtyState(false); ioh.replaceBuffer(ptrOffset(ioh.getCpuBase(), +1u), ioh.getMaxAvailableSpace() + MemoryConstants::pageSize * 3); flushTaskFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(mockDevice->getHardwareInfo()); commandStreamReceiver.flushTask( commandStream, 0, &dsh, &ioh, &ssh, taskLevel, flushTaskFlags, *mockDevice); parseCommands(commandStreamReceiver.getCS(0)); auto itorLRI = findMmio(cmdList.begin(), cmdList.end(), 0xE42C); EXPECT_NE(cmdList.end(), itorLRI); auto cmdLRI = genCmdCast(*itorLRI); auto sipAddress = cmdLRI->getDataDword() & 0xfffffff8; auto sipAllocation = SipKernel::getSipKernel(*mockDevice).getSipAllocation(); EXPECT_EQ(sipAllocation->getGpuAddressToPatch(), sipAddress); } HWTEST2_F(CommandStreamReceiverFlushTaskXeHPAndLaterTests, givenSBACommandToProgramOnSingleCCSSetupThenThereIsPipeControlPriorToIt, IsWithinXeGfxFamily) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; hardwareInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 1; auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hardwareInfo, 0u)); auto &commandStreamReceiver = mockDevice->getUltCommandStreamReceiver(); MockOsContext ccsOsContext(0, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_CCS, EngineUsage::Regular})); commandStreamReceiver.setupContext(ccsOsContext); configureCSRtoNonDirtyState(false); flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.getCS(0)); auto stateBaseAddressItor = find(cmdList.begin(), cmdList.end()); auto pipeControlItor = find(cmdList.begin(), stateBaseAddressItor); EXPECT_NE(stateBaseAddressItor, pipeControlItor); auto pipeControlCmd = reinterpret_cast(*pipeControlItor); EXPECT_TRUE(UnitTestHelper::getPipeControlHdcPipelineFlush(*pipeControlCmd)); if constexpr (TestTraits::isUnTypedDataPortCacheFlushSupported) { EXPECT_TRUE(pipeControlCmd->getUnTypedDataPortCacheFlush()); } EXPECT_FALSE(pipeControlCmd->getAmfsFlushEnable()); EXPECT_FALSE(pipeControlCmd->getInstructionCacheInvalidateEnable()); EXPECT_FALSE(pipeControlCmd->getTextureCacheInvalidationEnable()); EXPECT_FALSE(pipeControlCmd->getConstantCacheInvalidationEnable()); EXPECT_FALSE(pipeControlCmd->getStateCacheInvalidationEnable()); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, whenNotReprogrammingSshButInitProgrammingFlagsThenBindingTablePoolIsProgrammed) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.getCS(0)); auto stateBaseAddress = getCommand(); EXPECT_NE(nullptr, stateBaseAddress); auto bindingTablePoolAlloc = getCommand(); ASSERT_NE(nullptr, bindingTablePoolAlloc); EXPECT_EQ(reinterpret_cast(ssh.getCpuBase()), bindingTablePoolAlloc->getBindingTablePoolBaseAddress()); EXPECT_EQ(ssh.getHeapSizeInPages(), bindingTablePoolAlloc->getBindingTablePoolBufferSize()); EXPECT_EQ(pDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_STATE_HEAP_BUFFER), bindingTablePoolAlloc->getSurfaceObjectControlStateIndexToMocsTables()); auto offset = commandStreamReceiver.getCS(0).getUsed(); commandStreamReceiver.initProgrammingFlags(); flushTask(commandStreamReceiver); HardwareParse hwParser; hwParser.parseCommands(commandStreamReceiver.getCS(0), offset); stateBaseAddress = hwParser.getCommand(); EXPECT_NE(nullptr, stateBaseAddress); bindingTablePoolAlloc = hwParser.getCommand(); EXPECT_NE(nullptr, bindingTablePoolAlloc); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, givenNoHeapsProvidedWhenSBAIsProgrammedThenBaseAddressesAreNotSetAndBindlessSurfaceStateSizeSetToMax) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); uint64_t instructionHeapBase = 0x10000; uint64_t internalHeapBase = 0x10000; uint64_t generalStateBase = 0x30000; STATE_BASE_ADDRESS sbaCmd; StateBaseAddressHelper::programStateBaseAddress(&sbaCmd, nullptr, nullptr, nullptr, generalStateBase, true, 0, internalHeapBase, instructionHeapBase, 0, true, false, pDevice->getGmmHelper(), false, MemoryCompressionState::NotApplicable, false, 1u); EXPECT_FALSE(sbaCmd.getDynamicStateBaseAddressModifyEnable()); EXPECT_FALSE(sbaCmd.getDynamicStateBufferSizeModifyEnable()); EXPECT_EQ(0u, sbaCmd.getDynamicStateBaseAddress()); EXPECT_EQ(0u, sbaCmd.getDynamicStateBufferSize()); EXPECT_FALSE(sbaCmd.getSurfaceStateBaseAddressModifyEnable()); EXPECT_EQ(0u, sbaCmd.getSurfaceStateBaseAddress()); EXPECT_TRUE(sbaCmd.getInstructionBaseAddressModifyEnable()); EXPECT_EQ(instructionHeapBase, sbaCmd.getInstructionBaseAddress()); EXPECT_TRUE(sbaCmd.getInstructionBufferSizeModifyEnable()); EXPECT_EQ(MemoryConstants::sizeOf4GBinPageEntities, sbaCmd.getInstructionBufferSize()); EXPECT_TRUE(sbaCmd.getGeneralStateBaseAddressModifyEnable()); EXPECT_TRUE(sbaCmd.getGeneralStateBufferSizeModifyEnable()); if constexpr (is64bit) { EXPECT_EQ(GmmHelper::decanonize(internalHeapBase), sbaCmd.getGeneralStateBaseAddress()); } else { EXPECT_EQ(generalStateBase, sbaCmd.getGeneralStateBaseAddress()); } EXPECT_EQ(0xfffffu, sbaCmd.getGeneralStateBufferSize()); EXPECT_EQ(0u, sbaCmd.getBindlessSurfaceStateBaseAddress()); EXPECT_FALSE(sbaCmd.getBindlessSurfaceStateBaseAddressModifyEnable()); auto surfaceStateCount = StateBaseAddressHelper::getMaxBindlessSurfaceStates(); EXPECT_EQ(surfaceStateCount, sbaCmd.getBindlessSurfaceStateSize()); } using isXeHPOrAbove = IsAtLeastProduct; HWTEST2_F(CommandStreamReceiverFlushTaskXeHPAndLaterTests, whenFlushAllCachesVariableIsSetAndAddPipeControlIsCalledThenFieldsAreProperlySet, isXeHPOrAbove) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; DebugManagerStateRestore dbgRestorer; DebugManager.flags.FlushAllCaches.set(true); char buff[sizeof(PIPE_CONTROL) * 3]; LinearStream stream(buff, sizeof(PIPE_CONTROL) * 3); PipeControlArgs args; MemorySynchronizationCommands::addPipeControl(stream, args); parseCommands(stream, 0); PIPE_CONTROL *pipeControl = getCommand(); ASSERT_NE(nullptr, pipeControl); // WA pipeControl added if (cmdList.size() == 2) { pipeControl++; } EXPECT_TRUE(pipeControl->getDcFlushEnable()); EXPECT_TRUE(pipeControl->getRenderTargetCacheFlushEnable()); EXPECT_TRUE(pipeControl->getInstructionCacheInvalidateEnable()); EXPECT_TRUE(pipeControl->getTextureCacheInvalidationEnable()); EXPECT_TRUE(pipeControl->getPipeControlFlushEnable()); EXPECT_TRUE(pipeControl->getVfCacheInvalidationEnable()); EXPECT_TRUE(pipeControl->getConstantCacheInvalidationEnable()); EXPECT_TRUE(pipeControl->getStateCacheInvalidationEnable()); // XeHP+ only field EXPECT_TRUE(pipeControl->getCompressionControlSurfaceCcsFlush()); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, givenconfigureCSRtoNonDirtyStateWhenFlushTaskIsCalledThenNoCommandsAreAdded) { configureCSRtoNonDirtyState(true); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); flushTask(commandStreamReceiver); EXPECT_EQ(0u, commandStreamReceiver.commandStream.getUsed()); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, givenMultiOsContextCommandStreamReceiverWhenFlushTaskIsCalledThenCommandStreamReceiverStreamIsUsed) { configureCSRtoNonDirtyState(true); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.multiOsContextCapable = true; commandStream.getSpace(4); flushTask(commandStreamReceiver); EXPECT_EQ(MemoryConstants::cacheLineSize, commandStreamReceiver.commandStream.getUsed()); auto batchBufferStart = genCmdCast(commandStreamReceiver.commandStream.getCpuBase()); EXPECT_NE(nullptr, batchBufferStart); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, givenCsrInBatchingModeWhenTaskIsSubmittedViaCsrThenBbEndCoversPaddingEnoughToFitMiBatchBufferStart) { auto &mockCsr = pDevice->getUltCommandStreamReceiver(); mockCsr.overrideDispatchPolicy(DispatchMode::BatchedDispatch); mockCsr.timestampPacketWriteEnabled = false; configureCSRtoNonDirtyState(true); mockCsr.getCS(1024u); auto &csrCommandStream = mockCsr.commandStream; //we do level change that will emit PPC, fill all the space so only BB end fits. taskLevel++; auto ppcSize = MemorySynchronizationCommands::getSizeForSinglePipeControl(); auto fillSize = MemoryConstants::cacheLineSize - ppcSize - sizeof(typename FamilyType::MI_BATCH_BUFFER_END); csrCommandStream.getSpace(fillSize); auto expectedUsedSize = 2 * MemoryConstants::cacheLineSize; flushTask(mockCsr); EXPECT_EQ(expectedUsedSize, mockCsr.commandStream.getUsed()); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, GivenSameTaskLevelThenDontSendPipeControl) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); // Configure the CSR to not need to submit any state or commands. configureCSRtoNonDirtyState(true); flushTask(commandStreamReceiver); EXPECT_EQ(taskLevel, commandStreamReceiver.taskLevel); auto sizeUsed = commandStreamReceiver.commandStream.getUsed(); EXPECT_EQ(sizeUsed, 0u); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, givenHigherTaskLevelWhenFlushTaskCalledThenPipeControlEmittedWhenDebugFlagSet) { DebugManagerStateRestore restorer; DebugManager.flags.ResolveDependenciesViaPipeControls.set(1); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); // Configure the CSR to not need to submit any state or commands. configureCSRtoNonDirtyState(true); commandStreamReceiver.timestampPacketWriteEnabled = true; this->taskLevel++; flushTask(commandStreamReceiver); EXPECT_EQ(taskLevel, commandStreamReceiver.taskLevel); parseCommands(commandStreamReceiver.commandStream, 0); auto itorPC = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itorPC); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, givenDeviceWithThreadGroupPreemptionSupportThenDontSendMediaVfeStateIfNotDirty) { DebugManagerStateRestore dbgRestore; DebugManager.flags.ForcePreemptionMode.set(static_cast(PreemptionMode::ThreadGroup)); auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->setPreemptionMode(PreemptionMode::ThreadGroup); pDevice->resetCommandStreamReceiver(commandStreamReceiver); // Configure the CSR to not need to submit any state or commands. configureCSRtoNonDirtyState(true); flushTask(*commandStreamReceiver); EXPECT_EQ(taskLevel, commandStreamReceiver->peekTaskLevel()); auto sizeUsed = commandStreamReceiver->commandStream.getUsed(); EXPECT_EQ(0u, sizeUsed); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, givenCommandStreamReceiverWithInstructionCacheRequestWhenFlushTaskIsCalledThenPipeControlWithInstructionCacheIsEmitted) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); configureCSRtoNonDirtyState(true); commandStreamReceiver.registerInstructionCacheFlush(); EXPECT_EQ(1u, commandStreamReceiver.recursiveLockCounter); flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.commandStream, 0); auto itorPC = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itorPC); auto pipeControlCmd = reinterpret_cast(*itorPC); EXPECT_TRUE(pipeControlCmd->getInstructionCacheInvalidateEnable()); EXPECT_FALSE(commandStreamReceiver.requiresInstructionCacheFlush); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, givenHigherTaskLevelWhenTimestampPacketWriteIsEnabledThenDontAddPipeControl) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.timestampPacketWriteEnabled = true; commandStreamReceiver.isPreambleSent = true; configureCSRtoNonDirtyState(true); commandStreamReceiver.taskLevel = taskLevel; taskLevel++; // submit with higher taskLevel flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.commandStream, 0); auto itorPC = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(cmdList.end(), itorPC); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, WhenForcePipeControlPriorToWalkerIsSetThenAddExtraPipeControls) { DebugManagerStateRestore stateResore; DebugManager.flags.ForcePipeControlPriorToWalker.set(true); DebugManager.flags.FlushAllCaches.set(true); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = true; configureCSRtoNonDirtyState(true); commandStreamReceiver.taskLevel = taskLevel; flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.commandStream, 0); GenCmdList::iterator itor = cmdList.begin(); int counterPC = 0; while (itor != cmdList.end()) { auto pipeControl = genCmdCast(*itor); if (pipeControl) { switch (counterPC) { case 0: // First pipe control with CS Stall EXPECT_EQ(bool(pipeControl->getCommandStreamerStallEnable()), true); EXPECT_EQ(bool(pipeControl->getDcFlushEnable()), false); EXPECT_EQ(bool(pipeControl->getRenderTargetCacheFlushEnable()), false); EXPECT_EQ(bool(pipeControl->getInstructionCacheInvalidateEnable()), false); EXPECT_EQ(bool(pipeControl->getTextureCacheInvalidationEnable()), false); EXPECT_EQ(bool(pipeControl->getPipeControlFlushEnable()), false); EXPECT_EQ(bool(pipeControl->getVfCacheInvalidationEnable()), false); EXPECT_EQ(bool(pipeControl->getConstantCacheInvalidationEnable()), false); EXPECT_EQ(bool(pipeControl->getStateCacheInvalidationEnable()), false); break; case 1: // Second pipe control with all flushes EXPECT_EQ(bool(pipeControl->getCommandStreamerStallEnable()), true); EXPECT_EQ(bool(pipeControl->getDcFlushEnable()), true); EXPECT_EQ(bool(pipeControl->getRenderTargetCacheFlushEnable()), true); EXPECT_EQ(bool(pipeControl->getInstructionCacheInvalidateEnable()), true); EXPECT_EQ(bool(pipeControl->getTextureCacheInvalidationEnable()), true); EXPECT_EQ(bool(pipeControl->getPipeControlFlushEnable()), true); EXPECT_EQ(bool(pipeControl->getVfCacheInvalidationEnable()), true); EXPECT_EQ(bool(pipeControl->getConstantCacheInvalidationEnable()), true); EXPECT_EQ(bool(pipeControl->getStateCacheInvalidationEnable()), true); default: break; } counterPC++; } ++itor; } EXPECT_EQ(counterPC, 2); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, whenSamplerCacheFlushNotRequiredThenDontSendPipecontrol) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); NEO::WorkaroundTable *waTable = &pDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->workaroundTable; commandStreamReceiver.isPreambleSent = true; commandStreamReceiver.lastPreemptionMode = pDevice->getPreemptionMode(); commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushNotRequired); configureCSRtoNonDirtyState(true); commandStreamReceiver.taskLevel = taskLevel; waTable->flags.waSamplerCacheFlushBetweenRedescribedSurfaceReads = true; flushTask(commandStreamReceiver); EXPECT_EQ(commandStreamReceiver.commandStream.getUsed(), 0u); EXPECT_EQ(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushNotRequired, commandStreamReceiver.samplerCacheFlushRequired); parseCommands(commandStreamReceiver.commandStream, 0); auto itorPC = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(cmdList.end(), itorPC); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, whenSamplerCacheFlushBeforeAndWaSamplerCacheFlushBetweenRedescribedSurfaceReadsDasabledThenDontSendPipecontrol) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = true; commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore); configureCSRtoNonDirtyState(true); commandStreamReceiver.taskLevel = taskLevel; NEO::WorkaroundTable *waTable = &pDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->workaroundTable; waTable->flags.waSamplerCacheFlushBetweenRedescribedSurfaceReads = false; flushTask(commandStreamReceiver); EXPECT_EQ(commandStreamReceiver.commandStream.getUsed(), 0u); EXPECT_EQ(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore, commandStreamReceiver.samplerCacheFlushRequired); parseCommands(commandStreamReceiver.commandStream, 0); auto itorPC = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(cmdList.end(), itorPC); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, WhenFlushingTaskThenStateBaseAddressProgrammingShouldMatchTracking) { typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; auto gmmHelper = pDevice->getGmmHelper(); auto stateHeapMocs = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_STATE_HEAP_BUFFER); auto l1CacheOnMocs = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); flushTask(commandStreamReceiver); auto &commandStreamCSR = commandStreamReceiver.commandStream; parseCommands(commandStreamCSR, 0); HardwareParse::findHardwareCommands(); ASSERT_NE(nullptr, cmdStateBaseAddress); auto &cmd = *reinterpret_cast(cmdStateBaseAddress); EXPECT_EQ(dsh.getCpuBase(), reinterpret_cast(cmd.getDynamicStateBaseAddress())); EXPECT_EQ(commandStreamReceiver.getMemoryManager()->getInternalHeapBaseAddress(commandStreamReceiver.rootDeviceIndex, ioh.getGraphicsAllocation()->isAllocatedInLocalMemoryPool()), cmd.getInstructionBaseAddress()); EXPECT_EQ(ssh.getCpuBase(), reinterpret_cast(cmd.getSurfaceStateBaseAddress())); EXPECT_EQ(l1CacheOnMocs, cmd.getStatelessDataPortAccessMemoryObjectControlState()); EXPECT_EQ(stateHeapMocs, cmd.getInstructionMemoryObjectControlState()); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, GivenBlockingWhenFlushingTaskThenPipeControlProgrammedCorrectly) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(commandStreamReceiver); // Configure the CSR to not need to submit any state or commands configureCSRtoNonDirtyState(true); // Force a PIPE_CONTROL through a blocking flag auto blocking = true; auto &commandStreamTask = commandQueue.getCS(1024); auto &commandStreamCSR = commandStreamReceiver->getCS(); commandStreamReceiver->streamProperties.stateComputeMode.isCoherencyRequired.value = 0; DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.blocking = blocking; dispatchFlags.guardCommandBufferWithPipeControl = true; commandStreamReceiver->flushTask( commandStreamTask, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); // Verify that taskCS got modified, while csrCS remained intact EXPECT_GT(commandStreamTask.getUsed(), 0u); EXPECT_EQ(0u, commandStreamCSR.getUsed()); // Parse command list to verify that PC got added to taskCS cmdList.clear(); parseCommands(commandStreamTask, 0); auto itorTaskCS = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itorTaskCS); // Parse command list to verify that PC wasn't added to csrCS cmdList.clear(); parseCommands(commandStreamCSR, 0); auto numberOfPC = getCommandsList().size(); EXPECT_EQ(0u, numberOfPC); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, givenCsrInNonDirtyStateWhenflushTaskIsCalledThenNoFlushIsCalled) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); configureCSRtoNonDirtyState(true); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); EXPECT_EQ(0, mockCsr->flushCalledCount); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, givenCsrInNonDirtyStateAndBatchingModeWhenflushTaskIsCalledWithDisabledPreemptionThenSubmissionIsNotRecorded) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); configureCSRtoNonDirtyState(true); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); mockCsr->flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); EXPECT_EQ(0, mockCsr->flushCalledCount); EXPECT_TRUE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); //surfaces are non resident auto &surfacesForResidency = mockCsr->getResidencyAllocations(); EXPECT_EQ(0u, surfacesForResidency.size()); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, givenCsrInBatchingModeWhenRecordedBatchBufferIsBeingSubmittedThenFlushIsCalledWithRecordedCommandBuffer) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); configureCSRtoNonDirtyState(true); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; dispatchFlags.requiresCoherency = true; mockCsr->streamProperties.stateComputeMode.isCoherencyRequired.value = 1; commandStream.getSpace(4); mockCsr->flushTask(commandStream, 4, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); EXPECT_EQ(0, mockCsr->flushCalledCount); auto &surfacesForResidency = mockCsr->getResidencyAllocations(); EXPECT_EQ(0u, surfacesForResidency.size()); auto &cmdBufferList = mockedSubmissionsAggregator->peekCommandBuffers(); EXPECT_FALSE(cmdBufferList.peekIsEmpty()); auto cmdBuffer = cmdBufferList.peekHead(); //preemption allocation + sip kernel size_t csrSurfaceCount = (pDevice->getPreemptionMode() == PreemptionMode::MidThread) ? 2 : 0; csrSurfaceCount -= pDevice->getHardwareInfo().capabilityTable.supportsImages ? 0 : 1; csrSurfaceCount += mockCsr->globalFenceAllocation ? 1 : 0; csrSurfaceCount += mockCsr->clearColorAllocation ? 1 : 0; EXPECT_EQ(4u + csrSurfaceCount, cmdBuffer->surfaces.size()); //copy those surfaces std::vector residentSurfaces = cmdBuffer->surfaces; for (auto &graphicsAllocation : residentSurfaces) { EXPECT_TRUE(graphicsAllocation->isResident(mockCsr->getOsContext().getContextId())); EXPECT_EQ(1u, graphicsAllocation->getResidencyTaskCount(mockCsr->getOsContext().getContextId())); } mockCsr->flushBatchedSubmissions(); EXPECT_FALSE(mockCsr->recordedCommandBuffer->batchBuffer.low_priority); EXPECT_TRUE(mockCsr->recordedCommandBuffer->batchBuffer.requiresCoherency); EXPECT_EQ(mockCsr->recordedCommandBuffer->batchBuffer.commandBufferAllocation, commandStream.getGraphicsAllocation()); EXPECT_EQ(4u, mockCsr->recordedCommandBuffer->batchBuffer.startOffset); EXPECT_EQ(1, mockCsr->flushCalledCount); EXPECT_TRUE(mockedSubmissionsAggregator->peekCommandBuffers().peekIsEmpty()); EXPECT_EQ(0u, surfacesForResidency.size()); for (auto &graphicsAllocation : residentSurfaces) { EXPECT_FALSE(graphicsAllocation->isResident(mockCsr->getOsContext().getContextId())); } } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, givenNothingToFlushWhenFlushTaskCalledThenDontFlushStamp) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); configureCSRtoNonDirtyState(true); EXPECT_EQ(0, mockCsr->flushCalledCount); auto previousFlushStamp = mockCsr->flushStamp->peekStamp(); auto cmplStamp = flushTask(*mockCsr); EXPECT_EQ(mockCsr->flushStamp->peekStamp(), previousFlushStamp); EXPECT_EQ(previousFlushStamp, cmplStamp.flushStamp); EXPECT_EQ(0, mockCsr->flushCalledCount); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, givenEpilogueRequiredFlagWhenTaskIsSubmittedDirectlyThenItPointsBackToCsr) { configureCSRtoNonDirtyState(true); auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver(); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); EXPECT_EQ(0u, commandStreamReceiver.getCmdSizeForEpilogue(dispatchFlags)); dispatchFlags.epilogueRequired = true; dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); EXPECT_EQ(MemoryConstants::cacheLineSize, commandStreamReceiver.getCmdSizeForEpilogue(dispatchFlags)); auto data = commandStream.getSpace(MemoryConstants::cacheLineSize); memset(data, 0, MemoryConstants::cacheLineSize); commandStreamReceiver.storeMakeResidentAllocations = true; commandStreamReceiver.flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); auto &commandStreamReceiverStream = commandStreamReceiver.getCS(0u); EXPECT_EQ(MemoryConstants::cacheLineSize * 2, commandStream.getUsed()); EXPECT_EQ(MemoryConstants::cacheLineSize, commandStreamReceiverStream.getUsed()); parseCommands(commandStream, 0); auto itBBend = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(itBBend, cmdList.end()); auto itBatchBufferStart = find(cmdList.begin(), cmdList.end()); EXPECT_NE(itBatchBufferStart, cmdList.end()); auto batchBufferStart = genCmdCast(*itBatchBufferStart); EXPECT_EQ(batchBufferStart->getBatchBufferStartAddress(), commandStreamReceiverStream.getGraphicsAllocation()->getGpuAddress()); parseCommands(commandStreamReceiverStream, 0); itBBend = find(cmdList.begin(), cmdList.end()); void *bbEndAddress = *itBBend; EXPECT_EQ(commandStreamReceiverStream.getCpuBase(), bbEndAddress); EXPECT_TRUE(commandStreamReceiver.isMadeResident(commandStreamReceiverStream.getGraphicsAllocation())); } struct CommandStreamReceiverFlushTaskXeHPAndLaterMultiTileTests : public CommandStreamReceiverFlushTaskXeHPAndLaterTests { void SetUp() override { DebugManager.flags.CreateMultipleSubDevices.set(2); DebugManager.flags.EnableImplicitScaling.set(1); parsePipeControl = true; CommandStreamReceiverFlushTaskXeHPAndLaterTests::SetUp(); } template void verifyPipeControl(UltCommandStreamReceiver &commandStreamReceiver, uint32_t expectedTaskCount, bool workLoadPartition) { using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; uint64_t gpuAddressTagAllocation = commandStreamReceiver.getTagAllocation()->getGpuAddress(); bool pipeControlTagUpdate = false; bool pipeControlWorkloadPartition = false; auto itorPipeControl = pipeControlList.begin(); while (itorPipeControl != pipeControlList.end()) { auto pipeControl = reinterpret_cast(*itorPipeControl); if (pipeControl->getPostSyncOperation() == PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { pipeControlTagUpdate = true; if (pipeControl->getWorkloadPartitionIdOffsetEnable()) { pipeControlWorkloadPartition = true; } EXPECT_EQ(gpuAddressTagAllocation, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*pipeControl)); EXPECT_EQ(expectedTaskCount, pipeControl->getImmediateData()); break; } itorPipeControl++; } EXPECT_TRUE(pipeControlTagUpdate); if (workLoadPartition) { EXPECT_TRUE(pipeControlWorkloadPartition); } else { EXPECT_FALSE(pipeControlWorkloadPartition); } } template void verifyActivePartitionConfig(UltCommandStreamReceiver &commandStreamReceiver, bool activePartitionExists) { using MI_LOAD_REGISTER_MEM = typename GfxFamily::MI_LOAD_REGISTER_MEM; using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM; uint64_t expectedWparidData = 0u; if (activePartitionExists) { expectedWparidData = commandStreamReceiver.getWorkPartitionAllocationGpuAddress(); } uint32_t expectedWparidRegister = 0x221C; uint32_t expectedAddressOffsetData = commandStreamReceiver.getPostSyncWriteOffset(); uint32_t expectedAddressOffsetRegister = 0x23B4; bool wparidConfiguration = false; bool addressOffsetConfiguration = false; auto lrmList = getCommandsList(); auto itorWparidRegister = lrmList.begin(); while (itorWparidRegister != lrmList.end()) { auto loadRegisterMem = reinterpret_cast(*itorWparidRegister); if (loadRegisterMem->getRegisterAddress() == expectedWparidRegister) { wparidConfiguration = true; EXPECT_EQ(expectedWparidData, loadRegisterMem->getMemoryAddress()); break; } itorWparidRegister++; } auto itorAddressOffsetRegister = lriList.begin(); while (itorAddressOffsetRegister != lriList.end()) { auto loadRegisterImm = reinterpret_cast(*itorAddressOffsetRegister); if (loadRegisterImm->getRegisterOffset() == expectedAddressOffsetRegister) { addressOffsetConfiguration = true; EXPECT_EQ(expectedAddressOffsetData, loadRegisterImm->getDataDword()); break; } itorAddressOffsetRegister++; } if (activePartitionExists) { EXPECT_TRUE(wparidConfiguration); EXPECT_TRUE(addressOffsetConfiguration); } else { EXPECT_FALSE(wparidConfiguration); EXPECT_FALSE(addressOffsetConfiguration); } } template void prepareLinearStream(LinearStream &parsedStream, size_t offset) { cmdList.clear(); lriList.clear(); pipeControlList.clear(); parseCommands(parsedStream, offset); findHardwareCommands(); } DebugManagerStateRestore restorer; }; HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTileTests, givenMultipleStaticActivePartitionsWhenFlushingTaskThenExpectTagUpdatePipeControlWithPartitionFlagOnAndActivePartitionConfig) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); if (pDevice->getPreemptionMode() == PreemptionMode::MidThread || pDevice->isDebuggerActive()) { commandStreamReceiver.createPreemptionAllocation(); } EXPECT_EQ(1u, commandStreamReceiver.activePartitionsConfig); commandStreamReceiver.activePartitions = 2; commandStreamReceiver.taskCount = 3; EXPECT_TRUE(commandStreamReceiver.staticWorkPartitioningEnabled); flushTask(commandStreamReceiver, true); EXPECT_EQ(2u, commandStreamReceiver.activePartitionsConfig); prepareLinearStream(commandStream, 0); verifyPipeControl(commandStreamReceiver, 4, true); prepareLinearStream(commandStreamReceiver.commandStream, 0); verifyActivePartitionConfig(commandStreamReceiver, true); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTileTests, givenMultipleStaticActivePartitionsWhenFlushingTagUpdateThenExpectTagUpdatePipeControlWithPartitionFlagOnAndActivePartitionConfig) { DebugManagerStateRestore restorer; DebugManager.flags.UpdateTaskCountFromWait.set(3); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); if (pDevice->getPreemptionMode() == PreemptionMode::MidThread || pDevice->isDebuggerActive()) { commandStreamReceiver.createPreemptionAllocation(); } EXPECT_EQ(1u, commandStreamReceiver.activePartitionsConfig); commandStreamReceiver.activePartitions = 2; commandStreamReceiver.taskCount = 3; EXPECT_TRUE(commandStreamReceiver.staticWorkPartitioningEnabled); flushTask(commandStreamReceiver, true); commandStreamReceiver.flushTagUpdate(); EXPECT_EQ(2u, commandStreamReceiver.activePartitionsConfig); prepareLinearStream(commandStream, 0); verifyPipeControl(commandStreamReceiver, 4, true); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTileTests, givenSingleStaticActivePartitionWhenFlushingTaskThenExpectTagUpdatePipeControlWithoutPartitionFlagOnAndNoActivePartitionConfig) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); if (pDevice->getPreemptionMode() == PreemptionMode::MidThread || pDevice->isDebuggerActive()) { commandStreamReceiver.createPreemptionAllocation(); } commandStreamReceiver.activePartitions = 1; commandStreamReceiver.taskCount = 3; flushTask(commandStreamReceiver, true); parseCommands(commandStream, 0); parsePipeControl = true; findHardwareCommands(); prepareLinearStream(commandStream, 0); verifyPipeControl(commandStreamReceiver, 4, false); prepareLinearStream(commandStreamReceiver.commandStream, 0); verifyActivePartitionConfig(commandStreamReceiver, false); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTileTests, givenMultipleStaticActivePartitionsWhenFlushingTaskTwiceThenExpectTagUpdatePipeControlWithPartitionFlagOnAndNoActivePartitionConfigAtSecondFlush) { DebugManagerStateRestore restorer; DebugManager.flags.UpdateTaskCountFromWait.set(3); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); if (pDevice->getPreemptionMode() == PreemptionMode::MidThread || pDevice->isDebuggerActive()) { commandStreamReceiver.createPreemptionAllocation(); } EXPECT_EQ(1u, commandStreamReceiver.activePartitionsConfig); commandStreamReceiver.activePartitions = 2; commandStreamReceiver.taskCount = 3; EXPECT_TRUE(commandStreamReceiver.staticWorkPartitioningEnabled); flushTask(commandStreamReceiver, true); EXPECT_EQ(2u, commandStreamReceiver.activePartitionsConfig); prepareLinearStream(commandStream, 0); verifyPipeControl(commandStreamReceiver, 4, true); prepareLinearStream(commandStreamReceiver.commandStream, 0); verifyActivePartitionConfig(commandStreamReceiver, true); size_t usedBeforeCmdStream = commandStream.getUsed(); size_t usedBeforeCsrCmdStream = commandStreamReceiver.commandStream.getUsed(); flushTask(commandStreamReceiver, true); prepareLinearStream(commandStream, usedBeforeCmdStream); verifyPipeControl(commandStreamReceiver, 5, true); prepareLinearStream(commandStreamReceiver.commandStream, usedBeforeCsrCmdStream); verifyActivePartitionConfig(commandStreamReceiver, false); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTileTests, givenMultipleDynamicActivePartitionsWhenFlushingTaskTwiceThenExpectTagUpdatePipeControlWithoutPartitionFlagAndPartitionRegisters) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); if (pDevice->getPreemptionMode() == PreemptionMode::MidThread || pDevice->isDebuggerActive()) { commandStreamReceiver.createPreemptionAllocation(); } commandStreamReceiver.activePartitions = 2; commandStreamReceiver.taskCount = 3; commandStreamReceiver.staticWorkPartitioningEnabled = false; flushTask(commandStreamReceiver, true); EXPECT_EQ(2u, commandStreamReceiver.activePartitionsConfig); prepareLinearStream(commandStream, 0); verifyPipeControl(commandStreamReceiver, 4, false); prepareLinearStream(commandStreamReceiver.commandStream, 0); verifyActivePartitionConfig(commandStreamReceiver, false); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTileTests, givenMultipleDynamicActivePartitionsWhenFlushingTagUpdateThenExpectTagUpdatePipeControlWithoutPartitionFlag) { DebugManagerStateRestore restorer; DebugManager.flags.UpdateTaskCountFromWait.set(1); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); if (pDevice->getPreemptionMode() == PreemptionMode::MidThread || pDevice->isDebuggerActive()) { commandStreamReceiver.createPreemptionAllocation(); } commandStreamReceiver.activePartitions = 2; commandStreamReceiver.taskCount = 3; commandStreamReceiver.staticWorkPartitioningEnabled = false; commandStreamReceiver.flushTagUpdate(); prepareLinearStream(commandStreamReceiver.commandStream, 0); verifyPipeControl(commandStreamReceiver, 4, false); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTileTests, givenMultipleStaticActivePartitionsAndDirectSubmissionActiveWhenFlushingTaskThenExpectTagUpdatePipeControlWithPartitionFlagOnAndNoActivePartitionConfig) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); if (pDevice->getPreemptionMode() == PreemptionMode::MidThread || pDevice->isDebuggerActive()) { commandStreamReceiver.createPreemptionAllocation(); } commandStreamReceiver.directSubmissionAvailable = true; EXPECT_EQ(1u, commandStreamReceiver.activePartitionsConfig); EXPECT_EQ(2u, commandStreamReceiver.activePartitions); EXPECT_TRUE(commandStreamReceiver.staticWorkPartitioningEnabled); commandStreamReceiver.taskCount = 3; flushTask(commandStreamReceiver, true); EXPECT_EQ(1u, commandStreamReceiver.activePartitionsConfig); prepareLinearStream(commandStream, 0); verifyPipeControl(commandStreamReceiver, 4, true); prepareLinearStream(commandStreamReceiver.commandStream, 0); verifyActivePartitionConfig(commandStreamReceiver, false); } command_stream_receiver_hw_1_tests.cpp000066400000000000000000002412531422164147700342710ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/command_stream/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_stream/scratch_space_controller.h" #include "shared/source/command_stream/scratch_space_controller_base.h" #include "shared/source/command_stream/wait_status.h" #include "shared/source/helpers/constants.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/engine_descriptor_helper.h" #include "shared/test/common/mocks/mock_command_stream_receiver.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/mocks/mock_os_context.h" #include "shared/test/common/mocks/mock_timestamp_container.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/event/user_event.h" #include "opencl/source/helpers/cl_blit_properties.h" #include "opencl/test/unit_test/command_stream/command_stream_receiver_hw_fixture.h" #include "opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" using namespace NEO; HWCMDTEST_F(IGFX_GEN8_CORE, UltCommandStreamReceiverTest, givenPreambleSentAndThreadArbitrationPolicyNotChangedWhenEstimatingPreambleCmdSizeThenReturnItsValue) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = true; auto expectedCmdSize = sizeof(typename FamilyType::PIPE_CONTROL) + sizeof(typename FamilyType::MEDIA_VFE_STATE); EXPECT_EQ(expectedCmdSize, commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice)); } HWCMDTEST_F(IGFX_GEN8_CORE, UltCommandStreamReceiverTest, givenNotSentStateSipWhenFirstTaskIsFlushedThenStateSipCmdIsAddedAndIsStateSipSentSetToTrue) { using STATE_SIP = typename FamilyType::STATE_SIP; auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); if (mockDevice->getHardwareInfo().capabilityTable.defaultPreemptionMode == PreemptionMode::MidThread) { mockDevice->setPreemptionMode(PreemptionMode::MidThread); auto &csr = mockDevice->getUltCommandStreamReceiver(); csr.isPreambleSent = true; CommandQueueHw commandQueue(nullptr, mockDevice.get(), 0, false); auto &commandStream = commandQueue.getCS(4096u); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionMode::MidThread; MockGraphicsAllocation allocation(nullptr, 0); IndirectHeap heap(&allocation); csr.flushTask(commandStream, 0, &heap, &heap, &heap, 0, dispatchFlags, mockDevice->getDevice()); EXPECT_TRUE(csr.isStateSipSent); HardwareParse hwParser; hwParser.parseCommands(csr.getCS(0)); auto stateSipItor = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); EXPECT_NE(hwParser.cmdList.end(), stateSipItor); } } HWTEST_F(UltCommandStreamReceiverTest, givenCsrWhenProgramStateSipIsCalledThenIsStateSipCalledIsSetToTrue) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto requiredSize = PreemptionHelper::getRequiredStateSipCmdSize(*pDevice, commandStreamReceiver.isRcs()); StackVec buffer(requiredSize); LinearStream cmdStream(buffer.begin(), buffer.size()); commandStreamReceiver.programStateSip(cmdStream, *pDevice); EXPECT_TRUE(commandStreamReceiver.isStateSipSent); } HWTEST_F(UltCommandStreamReceiverTest, givenSentStateSipFlagSetWhenGetRequiredStateSipCmdSizeIsCalledThenStateSipCmdSizeIsNotIncluded) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); commandStreamReceiver.isStateSipSent = false; auto sizeWithStateSipIsNotSent = commandStreamReceiver.getRequiredCmdStreamSize(dispatchFlags, *pDevice); commandStreamReceiver.isStateSipSent = true; auto sizeWhenSipIsSent = commandStreamReceiver.getRequiredCmdStreamSize(dispatchFlags, *pDevice); auto sizeForStateSip = PreemptionHelper::getRequiredStateSipCmdSize(*pDevice, commandStreamReceiver.isRcs()); EXPECT_EQ(sizeForStateSip, sizeWithStateSipIsNotSent - sizeWhenSipIsSent); } HWTEST_F(UltCommandStreamReceiverTest, whenGetCmdSizeForPerDssBackedBufferIsCalledThenCorrectResultIsReturned) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.usePerDssBackedBuffer = false; commandStreamReceiver.isPerDssBackedBufferSent = true; auto basicSize = commandStreamReceiver.getRequiredCmdStreamSize(dispatchFlags, *pDevice); { dispatchFlags.usePerDssBackedBuffer = true; commandStreamReceiver.isPerDssBackedBufferSent = true; auto newSize = commandStreamReceiver.getRequiredCmdStreamSize(dispatchFlags, *pDevice); EXPECT_EQ(basicSize, newSize); } { dispatchFlags.usePerDssBackedBuffer = true; commandStreamReceiver.isPerDssBackedBufferSent = false; auto newSize = commandStreamReceiver.getRequiredCmdStreamSize(dispatchFlags, *pDevice); EXPECT_EQ(basicSize, newSize - commandStreamReceiver.getCmdSizeForPerDssBackedBuffer(pDevice->getHardwareInfo())); } } HWTEST_F(UltCommandStreamReceiverTest, givenSentStateSipFlagSetAndSourceLevelDebuggerIsActiveWhenGetRequiredStateSipCmdSizeIsCalledThenStateSipCmdSizeIsIncluded) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); commandStreamReceiver.isStateSipSent = true; auto sizeWithoutSourceKernelDebugging = commandStreamReceiver.getRequiredCmdStreamSize(dispatchFlags, *pDevice); pDevice->setDebuggerActive(true); commandStreamReceiver.isStateSipSent = true; auto sizeWithSourceKernelDebugging = commandStreamReceiver.getRequiredCmdStreamSize(dispatchFlags, *pDevice); auto sizeForStateSip = PreemptionHelper::getRequiredStateSipCmdSize(*pDevice, commandStreamReceiver.isRcs()); EXPECT_EQ(sizeForStateSip, sizeWithSourceKernelDebugging - sizeWithoutSourceKernelDebugging - PreambleHelper::getKernelDebuggingCommandsSize(true)); pDevice->setDebuggerActive(false); } HWTEST_F(UltCommandStreamReceiverTest, givenPreambleSentAndThreadArbitrationPolicyChangedWhenEstimatingFlushTaskSizeThenResultDependsOnPolicyProgrammingCmdSize) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = true; auto policyNotChangedPreamble = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice); auto policyNotChangedFlush = commandStreamReceiver.getRequiredCmdStreamSize(flushTaskFlags, *pDevice); commandStreamReceiver.streamProperties.stateComputeMode.threadArbitrationPolicy.isDirty = true; commandStreamReceiver.streamProperties.stateComputeMode.isCoherencyRequired.isDirty = true; auto policyChangedPreamble = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice); auto policyChangedFlush = commandStreamReceiver.getRequiredCmdStreamSize(flushTaskFlags, *pDevice); auto actualDifferenceForPreamble = policyChangedPreamble - policyNotChangedPreamble; auto actualDifferenceForFlush = policyChangedFlush - policyNotChangedFlush; auto expectedDifference = EncodeComputeMode::getCmdSizeForComputeMode(*defaultHwInfo, false, commandStreamReceiver.isRcs()); EXPECT_EQ(0u, actualDifferenceForPreamble); EXPECT_EQ(expectedDifference, actualDifferenceForFlush); } HWTEST_F(UltCommandStreamReceiverTest, givenPreambleSentWhenEstimatingFlushTaskSizeThenResultDependsOnAdditionalCmdsSize) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = false; auto preambleNotSentPreamble = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice); auto preambleNotSentFlush = commandStreamReceiver.getRequiredCmdStreamSize(flushTaskFlags, *pDevice); commandStreamReceiver.isPreambleSent = true; auto preambleSentPreamble = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice); auto preambleSentFlush = commandStreamReceiver.getRequiredCmdStreamSize(flushTaskFlags, *pDevice); auto actualDifferenceForPreamble = preambleNotSentPreamble - preambleSentPreamble; auto actualDifferenceForFlush = preambleNotSentFlush - preambleSentFlush; commandStreamReceiver.isPreambleSent = false; auto expectedDifferenceForPreamble = PreambleHelper::getAdditionalCommandsSize(*pDevice); auto expectedDifferenceForFlush = expectedDifferenceForPreamble + commandStreamReceiver.getCmdSizeForL3Config() + PreambleHelper::getCmdSizeForPipelineSelect(pDevice->getHardwareInfo()); EXPECT_EQ(expectedDifferenceForPreamble, actualDifferenceForPreamble); EXPECT_EQ(expectedDifferenceForFlush, actualDifferenceForFlush); } HWCMDTEST_F(IGFX_GEN8_CORE, UltCommandStreamReceiverTest, givenMediaVfeStateDirtyEstimatingPreambleCmdSizeThenResultDependsVfeStateProgrammingCmdSize) { typedef typename FamilyType::MEDIA_VFE_STATE MEDIA_VFE_STATE; typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.setMediaVFEStateDirty(false); auto notDirty = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice); commandStreamReceiver.setMediaVFEStateDirty(true); auto dirty = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice); auto actualDifference = dirty - notDirty; auto expectedDifference = sizeof(PIPE_CONTROL) + sizeof(MEDIA_VFE_STATE); EXPECT_EQ(expectedDifference, actualDifference); } HWTEST_F(UltCommandStreamReceiverTest, givenCommandStreamReceiverInInitialStateWhenHeapsAreAskedForDirtyStatusThenTrueIsReturned) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); EXPECT_EQ(0u, commandStreamReceiver.peekTaskCount()); EXPECT_EQ(0u, commandStreamReceiver.peekTaskLevel()); EXPECT_TRUE(commandStreamReceiver.dshState.updateAndCheck(&dsh)); EXPECT_TRUE(commandStreamReceiver.iohState.updateAndCheck(&ioh)); EXPECT_TRUE(commandStreamReceiver.sshState.updateAndCheck(&ssh)); } HWTEST_F(UltCommandStreamReceiverTest, givenPreambleSentAndForceSemaphoreDelayBetweenWaitsFlagWhenEstimatingPreambleCmdSizeThenResultIsExpected) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); DebugManagerStateRestore debugManagerStateRestore; DebugManager.flags.ForceSemaphoreDelayBetweenWaits.set(-1); commandStreamReceiver.isPreambleSent = false; auto preambleNotSentAndSemaphoreDelayNotReprogrammed = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice); DebugManager.flags.ForceSemaphoreDelayBetweenWaits.set(0); commandStreamReceiver.isPreambleSent = false; auto preambleNotSentAndSemaphoreDelayReprogrammed = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice); commandStreamReceiver.isPreambleSent = true; auto preambleSent = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice); auto actualDifferenceWhenSemaphoreDelayNotReprogrammed = preambleNotSentAndSemaphoreDelayNotReprogrammed - preambleSent; auto expectedDifference = PreambleHelper::getAdditionalCommandsSize(*pDevice); EXPECT_EQ(expectedDifference, actualDifferenceWhenSemaphoreDelayNotReprogrammed); auto actualDifferenceWhenSemaphoreDelayReprogrammed = preambleNotSentAndSemaphoreDelayReprogrammed - preambleSent; expectedDifference = PreambleHelper::getAdditionalCommandsSize(*pDevice) + PreambleHelper::getSemaphoreDelayCommandSize(); EXPECT_EQ(expectedDifference, actualDifferenceWhenSemaphoreDelayReprogrammed); } HWTEST_F(UltCommandStreamReceiverTest, givenNoBlitterOverrideWhenBlitterNotSupportedThenExpectFalseReturned) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto osContext = static_cast(commandStreamReceiver.osContext); DirectSubmissionProperties properties; properties.engineSupported = false; properties.submitOnInit = false; bool startOnInit = true; bool startInContext = false; EXPECT_FALSE(osContext->checkDirectSubmissionSupportsEngine(properties, aub_stream::ENGINE_BCS, startOnInit, startInContext)); EXPECT_FALSE(startOnInit); EXPECT_FALSE(startInContext); } HWTEST_F(UltCommandStreamReceiverTest, givenNoBlitterOverrideWhenBlitterSupportedThenExpectTrueReturned) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto osContext = static_cast(commandStreamReceiver.osContext); DirectSubmissionProperties properties; properties.engineSupported = true; properties.submitOnInit = true; bool startOnInit = false; bool startInContext = false; EXPECT_TRUE(osContext->checkDirectSubmissionSupportsEngine(properties, aub_stream::ENGINE_BCS, startOnInit, startInContext)); EXPECT_TRUE(startOnInit); EXPECT_FALSE(startInContext); } HWTEST_F(UltCommandStreamReceiverTest, givenBlitterOverrideEnableWhenBlitterNotSupportedThenExpectTrueReturned) { DebugManagerStateRestore debugManagerStateRestore; DebugManager.flags.DirectSubmissionOverrideBlitterSupport.set(1); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto osContext = static_cast(commandStreamReceiver.osContext); DirectSubmissionProperties properties; properties.engineSupported = false; properties.submitOnInit = false; bool startOnInit = false; bool startInContext = false; EXPECT_TRUE(osContext->checkDirectSubmissionSupportsEngine(properties, aub_stream::ENGINE_BCS, startOnInit, startInContext)); EXPECT_TRUE(startOnInit); EXPECT_TRUE(startInContext); } HWTEST_F(UltCommandStreamReceiverTest, givenBlitterOverrideEnableAndNoStartWhenBlitterNotSupportedThenExpectTrueReturnedStartOnInitSetToTrue) { DebugManagerStateRestore debugManagerStateRestore; DebugManager.flags.DirectSubmissionOverrideBlitterSupport.set(2); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto osContext = static_cast(commandStreamReceiver.osContext); DirectSubmissionProperties properties; properties.engineSupported = false; properties.submitOnInit = true; bool startOnInit = true; bool startInContext = false; EXPECT_TRUE(osContext->checkDirectSubmissionSupportsEngine(properties, aub_stream::ENGINE_BCS, startOnInit, startInContext)); EXPECT_FALSE(startOnInit); EXPECT_TRUE(startInContext); } HWTEST_F(UltCommandStreamReceiverTest, givenBlitterOverrideDisableWhenBlitterSupportedThenExpectFalseReturned) { DebugManagerStateRestore debugManagerStateRestore; DebugManager.flags.DirectSubmissionOverrideBlitterSupport.set(0); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto osContext = static_cast(commandStreamReceiver.osContext); DirectSubmissionProperties properties; properties.engineSupported = true; properties.submitOnInit = false; bool startOnInit = true; bool startInContext = false; EXPECT_FALSE(osContext->checkDirectSubmissionSupportsEngine(properties, aub_stream::ENGINE_BCS, startOnInit, startInContext)); EXPECT_FALSE(startOnInit); EXPECT_FALSE(startInContext); } HWTEST_F(UltCommandStreamReceiverTest, givenNoRenderOverrideWhenRenderNotSupportedThenExpectFalseReturned) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto osContext = static_cast(commandStreamReceiver.osContext); DirectSubmissionProperties properties; properties.engineSupported = false; properties.submitOnInit = false; bool startOnInit = true; bool startInContext = false; EXPECT_FALSE(osContext->checkDirectSubmissionSupportsEngine(properties, aub_stream::ENGINE_RCS, startOnInit, startInContext)); EXPECT_FALSE(startOnInit); EXPECT_FALSE(startInContext); } HWTEST_F(UltCommandStreamReceiverTest, givenNoRenderOverrideWhenRenderSupportedThenExpectTrueReturned) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto osContext = static_cast(commandStreamReceiver.osContext); DirectSubmissionProperties properties; properties.engineSupported = true; properties.submitOnInit = true; bool startOnInit = false; bool startInContext = false; EXPECT_TRUE(osContext->checkDirectSubmissionSupportsEngine(properties, aub_stream::ENGINE_RCS, startOnInit, startInContext)); EXPECT_TRUE(startOnInit); EXPECT_FALSE(startInContext); } HWTEST_F(UltCommandStreamReceiverTest, givenRenderOverrideEnableWhenRenderNotSupportedThenExpectTrueReturned) { DebugManagerStateRestore debugManagerStateRestore; DebugManager.flags.DirectSubmissionOverrideRenderSupport.set(1); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto osContext = static_cast(commandStreamReceiver.osContext); DirectSubmissionProperties properties; properties.engineSupported = false; properties.submitOnInit = false; bool startOnInit = false; bool startInContext = false; EXPECT_TRUE(osContext->checkDirectSubmissionSupportsEngine(properties, aub_stream::ENGINE_RCS, startOnInit, startInContext)); EXPECT_TRUE(startOnInit); EXPECT_TRUE(startInContext); } HWTEST_F(UltCommandStreamReceiverTest, givenRenderOverrideEnableAndNoStartWhenRenderNotSupportedThenExpectTrueReturnedAndStartOnInitSetFalse) { DebugManagerStateRestore debugManagerStateRestore; DebugManager.flags.DirectSubmissionOverrideRenderSupport.set(2); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto osContext = static_cast(commandStreamReceiver.osContext); DirectSubmissionProperties properties; properties.engineSupported = false; properties.submitOnInit = true; bool startOnInit = true; bool startInContext = false; EXPECT_TRUE(osContext->checkDirectSubmissionSupportsEngine(properties, aub_stream::ENGINE_RCS, startOnInit, startInContext)); EXPECT_FALSE(startOnInit); EXPECT_TRUE(startInContext); } HWTEST_F(UltCommandStreamReceiverTest, givenRenderOverrideDisableWhenRenderSupportedThenExpectFalseReturned) { DebugManagerStateRestore debugManagerStateRestore; DebugManager.flags.DirectSubmissionOverrideRenderSupport.set(0); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto osContext = static_cast(commandStreamReceiver.osContext); DirectSubmissionProperties properties; properties.engineSupported = true; properties.submitOnInit = false; bool startOnInit = true; bool startInContext = false; EXPECT_FALSE(osContext->checkDirectSubmissionSupportsEngine(properties, aub_stream::ENGINE_RCS, startOnInit, startInContext)); EXPECT_FALSE(startOnInit); EXPECT_FALSE(startInContext); } HWTEST_F(UltCommandStreamReceiverTest, givenNoComputeOverrideWhenComputeNotSupportedThenExpectFalseReturned) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto osContext = static_cast(commandStreamReceiver.osContext); DirectSubmissionProperties properties; properties.engineSupported = false; properties.submitOnInit = false; bool startOnInit = true; bool startInContext = false; EXPECT_FALSE(osContext->checkDirectSubmissionSupportsEngine(properties, aub_stream::ENGINE_CCS, startOnInit, startInContext)); EXPECT_FALSE(startOnInit); EXPECT_FALSE(startInContext); } HWTEST_F(UltCommandStreamReceiverTest, givenNoComputeOverrideWhenComputeSupportedThenExpectTrueReturned) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto osContext = static_cast(commandStreamReceiver.osContext); DirectSubmissionProperties properties; properties.engineSupported = true; properties.submitOnInit = true; bool startOnInit = false; bool startInContext = false; EXPECT_TRUE(osContext->checkDirectSubmissionSupportsEngine(properties, aub_stream::ENGINE_CCS, startOnInit, startInContext)); EXPECT_TRUE(startOnInit); EXPECT_FALSE(startInContext); } HWTEST_F(UltCommandStreamReceiverTest, givenComputeOverrideEnableWhenComputeNotSupportedThenExpectTrueReturned) { DebugManagerStateRestore debugManagerStateRestore; DebugManager.flags.DirectSubmissionOverrideComputeSupport.set(1); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto osContext = static_cast(commandStreamReceiver.osContext); DirectSubmissionProperties properties; properties.engineSupported = false; properties.submitOnInit = false; bool startOnInit = false; bool startInContext = false; EXPECT_TRUE(osContext->checkDirectSubmissionSupportsEngine(properties, aub_stream::ENGINE_CCS, startOnInit, startInContext)); EXPECT_TRUE(startOnInit); EXPECT_TRUE(startInContext); } HWTEST_F(UltCommandStreamReceiverTest, givenComputeOverrideEnableAndNoStartWhenComputeNotSupportedThenExpectTrueReturnedAndStartOnInitSetToFalse) { DebugManagerStateRestore debugManagerStateRestore; DebugManager.flags.DirectSubmissionOverrideComputeSupport.set(2); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto osContext = static_cast(commandStreamReceiver.osContext); DirectSubmissionProperties properties; properties.engineSupported = false; properties.submitOnInit = true; bool startOnInit = true; bool startInContext = false; EXPECT_TRUE(osContext->checkDirectSubmissionSupportsEngine(properties, aub_stream::ENGINE_CCS, startOnInit, startInContext)); EXPECT_FALSE(startOnInit); EXPECT_TRUE(startInContext); } HWTEST_F(UltCommandStreamReceiverTest, givenComputeOverrideDisableWhenComputeSupportedThenExpectFalseReturned) { DebugManagerStateRestore debugManagerStateRestore; DebugManager.flags.DirectSubmissionOverrideComputeSupport.set(0); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto osContext = static_cast(commandStreamReceiver.osContext); DirectSubmissionProperties properties; properties.engineSupported = true; properties.submitOnInit = false; bool startOnInit = true; bool startInContext = false; EXPECT_FALSE(osContext->checkDirectSubmissionSupportsEngine(properties, aub_stream::ENGINE_CCS, startOnInit, startInContext)); EXPECT_FALSE(startOnInit); EXPECT_FALSE(startInContext); } HWTEST_F(UltCommandStreamReceiverTest, givenSinglePartitionWhenCallingWaitKmdNotifyThenExpectImplicitBusyLoopWaitCalled) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.callBaseWaitForCompletionWithTimeout = false; commandStreamReceiver.returnWaitForCompletionWithTimeout = NEO::WaitStatus::NotReady; commandStreamReceiver.waitForTaskCountWithKmdNotifyFallback(0, 0, false, QueueThrottle::MEDIUM); EXPECT_EQ(2u, commandStreamReceiver.waitForCompletionWithTimeoutTaskCountCalled); } HWTEST_F(UltCommandStreamReceiverTest, givenMultiplePartitionsWhenCallingWaitKmdNotifyThenExpectExplicitBusyLoopWaitCalled) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.callBaseWaitForCompletionWithTimeout = false; commandStreamReceiver.returnWaitForCompletionWithTimeout = NEO::WaitStatus::NotReady; commandStreamReceiver.waitForTaskCountWithKmdNotifyFallback(0, 0, false, QueueThrottle::MEDIUM); EXPECT_EQ(2u, commandStreamReceiver.waitForCompletionWithTimeoutTaskCountCalled); } typedef UltCommandStreamReceiverTest CommandStreamReceiverFlushTests; HWTEST_F(CommandStreamReceiverFlushTests, WhenAddingBatchBufferEndThenBatchBufferEndIsAppendedCorrectly) { auto usedPrevious = commandStream.getUsed(); CommandStreamReceiverHw::addBatchBufferEnd(commandStream, nullptr); EXPECT_EQ(commandStream.getUsed(), usedPrevious + sizeof(typename FamilyType::MI_BATCH_BUFFER_END)); auto batchBufferEnd = genCmdCast( ptrOffset(commandStream.getCpuBase(), usedPrevious)); EXPECT_NE(nullptr, batchBufferEnd); } typedef Test CommandStreamReceiverHwTest; HWTEST_F(CommandStreamReceiverHwTest, givenCsrHwWhenTypeIsCheckedThenCsrHwIsReturned) { auto csr = std::unique_ptr(CommandStreamReceiverHw::create(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield())); EXPECT_EQ(CommandStreamReceiverType::CSR_HW, csr->getType()); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverHwTest, WhenCommandStreamReceiverHwIsCreatedThenDefaultSshSizeIs64KB) { auto &commandStreamReceiver = pDevice->getGpgpuCommandStreamReceiver(); EXPECT_EQ(64 * KB, commandStreamReceiver.defaultSshSize); } HWTEST_F(CommandStreamReceiverHwTest, WhenScratchSpaceIsNotRequiredThenScratchAllocationIsNotCreated) { auto commandStreamReceiver = std::make_unique>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); auto scratchController = commandStreamReceiver->getScratchSpaceController(); bool stateBaseAddressDirty = false; bool cfeStateDirty = false; scratchController->setRequiredScratchSpace(reinterpret_cast(0x2000), 0u, 0u, 0u, 0u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty); EXPECT_FALSE(cfeStateDirty); EXPECT_FALSE(stateBaseAddressDirty); EXPECT_EQ(nullptr, scratchController->getScratchSpaceAllocation()); EXPECT_EQ(nullptr, scratchController->getPrivateScratchSpaceAllocation()); } HWTEST_F(CommandStreamReceiverHwTest, WhenScratchSpaceIsRequiredThenCorrectAddressIsReturned) { auto commandStreamReceiver = std::make_unique>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); commandStreamReceiver->setupContext(*pDevice->getDefaultEngine().osContext); auto scratchController = commandStreamReceiver->getScratchSpaceController(); bool cfeStateDirty = false; bool stateBaseAddressDirty = false; std::unique_ptr> surfaceHeap(alignedMalloc(0x1000, 0x1000), alignedFree); scratchController->setRequiredScratchSpace(surfaceHeap.get(), 0u, 0x1000u, 0u, 0u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty); uint64_t expectedScratchAddress = 0xAAABBBCCCDDD000ull; auto scratchAllocation = scratchController->getScratchSpaceAllocation(); scratchAllocation->setCpuPtrAndGpuAddress(scratchAllocation->getUnderlyingBuffer(), expectedScratchAddress); EXPECT_TRUE(UnitTestHelper::evaluateGshAddressForScratchSpace((scratchAllocation->getGpuAddress() - MemoryConstants::pageSize), scratchController->calculateNewGSH())); } HWTEST_F(CommandStreamReceiverHwTest, WhenScratchSpaceIsNotRequiredThenGshAddressZeroIsReturned) { auto commandStreamReceiver = std::make_unique>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); auto scratchController = commandStreamReceiver->getScratchSpaceController(); EXPECT_EQ(nullptr, scratchController->getScratchSpaceAllocation()); EXPECT_EQ(0u, scratchController->calculateNewGSH()); } HWTEST_F(CommandStreamReceiverHwTest, givenDefaultPlatformCapabilityWhenNoDebugKeysSetThenExpectDefaultPlatformSettings) { auto commandStreamReceiver = std::make_unique>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); commandStreamReceiver->setupContext(*osContext); commandStreamReceiver->postInitFlagsSetup(); if (commandStreamReceiver->checkPlatformSupportsNewResourceImplicitFlush()) { EXPECT_TRUE(commandStreamReceiver->useNewResourceImplicitFlush); } else { EXPECT_FALSE(commandStreamReceiver->useNewResourceImplicitFlush); } } HWTEST_F(CommandStreamReceiverHwTest, givenDefaultGpuIdleImplicitFlushWhenNoDebugKeysSetThenExpectDefaultPlatformSettings) { auto commandStreamReceiver = std::make_unique>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); commandStreamReceiver->setupContext(*osContext); commandStreamReceiver->postInitFlagsSetup(); if (commandStreamReceiver->checkPlatformSupportsGpuIdleImplicitFlush()) { EXPECT_TRUE(commandStreamReceiver->useGpuIdleImplicitFlush); } else { EXPECT_FALSE(commandStreamReceiver->useGpuIdleImplicitFlush); } } HWTEST_F(CommandStreamReceiverHwTest, WhenForceDisableNewResourceImplicitFlushThenExpectFlagSetFalse) { DebugManagerStateRestore restore; DebugManager.flags.PerformImplicitFlushForNewResource.set(0); auto commandStreamReceiver = std::make_unique>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); commandStreamReceiver->setupContext(*osContext); commandStreamReceiver->postInitFlagsSetup(); EXPECT_FALSE(commandStreamReceiver->useNewResourceImplicitFlush); } HWTEST_F(CommandStreamReceiverHwTest, WhenForceEnableNewResourceImplicitFlushThenExpectFlagSetTrue) { DebugManagerStateRestore restore; DebugManager.flags.PerformImplicitFlushForNewResource.set(1); auto commandStreamReceiver = std::make_unique>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); commandStreamReceiver->setupContext(*osContext); commandStreamReceiver->postInitFlagsSetup(); EXPECT_TRUE(commandStreamReceiver->useNewResourceImplicitFlush); } HWTEST_F(CommandStreamReceiverHwTest, WhenForceDisableGpuIdleImplicitFlushThenExpectFlagSetFalse) { DebugManagerStateRestore restore; DebugManager.flags.PerformImplicitFlushForIdleGpu.set(0); auto commandStreamReceiver = std::make_unique>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); commandStreamReceiver->setupContext(*osContext); commandStreamReceiver->postInitFlagsSetup(); EXPECT_FALSE(commandStreamReceiver->useGpuIdleImplicitFlush); } HWTEST_F(CommandStreamReceiverHwTest, WhenForceEnableGpuIdleImplicitFlushThenExpectFlagSetTrue) { DebugManagerStateRestore restore; DebugManager.flags.PerformImplicitFlushForIdleGpu.set(1); auto commandStreamReceiver = std::make_unique>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); commandStreamReceiver->setupContext(*osContext); commandStreamReceiver->postInitFlagsSetup(); EXPECT_TRUE(commandStreamReceiver->useGpuIdleImplicitFlush); } HWTEST2_F(CommandStreamReceiverHwTest, whenProgramVFEStateIsCalledThenCorrectComputeOverdispatchDisableValueIsProgrammed, IsAtLeastXeHpCore) { using CFE_STATE = typename FamilyType::CFE_STATE; UltDeviceFactory deviceFactory{1, 0}; auto pDevice = deviceFactory.rootDevices[0]; auto pHwInfo = pDevice->getRootDeviceEnvironment().getMutableHardwareInfo(); const auto &hwInfoConfig = *HwInfoConfig::get(pHwInfo->platform.eProductFamily); uint8_t memory[1 * KB]; auto mockCsr = std::make_unique>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); MockOsContext osContext{0, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_CCS, EngineUsage::Regular}, DeviceBitfield(0))}; mockCsr->setupContext(osContext); uint32_t revisions[] = {REVISION_A0, REVISION_B}; for (auto revision : revisions) { pHwInfo->platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(revision, *pHwInfo); { auto flags = DispatchFlagsHelper::createDefaultDispatchFlags(); LinearStream commandStream{&memory, sizeof(memory)}; mockCsr->mediaVfeStateDirty = true; mockCsr->programVFEState(commandStream, flags, 10); auto pCommand = reinterpret_cast(&memory); auto expectedDisableOverdispatch = hwInfoConfig.isDisableOverdispatchAvailable(*pHwInfo); EXPECT_EQ(expectedDisableOverdispatch, pCommand->getComputeOverdispatchDisable()); } { auto flags = DispatchFlagsHelper::createDefaultDispatchFlags(); flags.additionalKernelExecInfo = AdditionalKernelExecInfo::NotSet; LinearStream commandStream{&memory, sizeof(memory)}; mockCsr->mediaVfeStateDirty = true; mockCsr->programVFEState(commandStream, flags, 10); auto pCommand = reinterpret_cast(&memory); EXPECT_FALSE(pCommand->getComputeOverdispatchDisable()); } } } HWTEST_F(BcsTests, WhenGetNumberOfBlitsForCopyPerRowIsCalledThenCorrectValuesAreReturned) { auto &rootDeviceEnvironment = pClDevice->getRootDeviceEnvironment(); auto maxWidthToCopy = static_cast(BlitCommandsHelper::getMaxBlitWidth(rootDeviceEnvironment)); auto maxHeightToCopy = static_cast(BlitCommandsHelper::getMaxBlitHeight(rootDeviceEnvironment)); { Vec3 copySize = {(maxWidthToCopy * maxHeightToCopy - 1), 1, 1}; size_t expectednBlitsCopyPerRow = 2; auto nBlitsCopyPerRow = BlitCommandsHelper::getNumberOfBlitsForCopyPerRow(copySize, rootDeviceEnvironment); EXPECT_EQ(expectednBlitsCopyPerRow, nBlitsCopyPerRow); } { Vec3 copySize = {(maxWidthToCopy * maxHeightToCopy), 1, 1}; size_t expectednBlitsCopyPerRow = 1; auto nBlitsCopyPerRow = BlitCommandsHelper::getNumberOfBlitsForCopyPerRow(copySize, rootDeviceEnvironment); EXPECT_EQ(expectednBlitsCopyPerRow, nBlitsCopyPerRow); } { Vec3 copySize = {(maxWidthToCopy * maxHeightToCopy + 1), 1, 1}; size_t expectednBlitsCopyPerRow = 2; auto nBlitsCopyPerRow = BlitCommandsHelper::getNumberOfBlitsForCopyPerRow(copySize, rootDeviceEnvironment); EXPECT_EQ(expectednBlitsCopyPerRow, nBlitsCopyPerRow); } { Vec3 copySize = {(maxWidthToCopy * maxHeightToCopy + maxWidthToCopy), 1, 1}; size_t expectednBlitsCopyPerRow = 2; auto nBlitsCopyPerRow = BlitCommandsHelper::getNumberOfBlitsForCopyPerRow(copySize, rootDeviceEnvironment); EXPECT_EQ(expectednBlitsCopyPerRow, nBlitsCopyPerRow); } { Vec3 copySize = {(maxWidthToCopy * maxHeightToCopy + maxWidthToCopy + 1), 1, 1}; size_t expectednBlitsCopyPerRow = 3; auto nBlitsCopyPerRow = BlitCommandsHelper::getNumberOfBlitsForCopyPerRow(copySize, rootDeviceEnvironment); EXPECT_EQ(expectednBlitsCopyPerRow, nBlitsCopyPerRow); } { Vec3 copySize = {(maxWidthToCopy * maxHeightToCopy + 2 * maxWidthToCopy), 1, 1}; size_t expectednBlitsCopyPerRow = 2; auto nBlitsCopyPerRow = BlitCommandsHelper::getNumberOfBlitsForCopyPerRow(copySize, rootDeviceEnvironment); EXPECT_EQ(expectednBlitsCopyPerRow, nBlitsCopyPerRow); EXPECT_FALSE(BlitCommandsHelper::isCopyRegionPreferred(copySize, rootDeviceEnvironment)); } } HWTEST_F(BcsTests, whenAskingForCmdSizeForMiFlushDwWithMemoryWriteThenReturnCorrectValue) { size_t waSize = EncodeMiFlushDW::getMiFlushDwWaSize(); size_t totalSize = EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); constexpr size_t miFlushDwSize = sizeof(typename FamilyType::MI_FLUSH_DW); size_t additionalSize = UnitTestHelper::additionalMiFlushDwRequired ? miFlushDwSize : 0; EXPECT_EQ(additionalSize, waSize); EXPECT_EQ(miFlushDwSize + additionalSize, totalSize); } HWTEST_F(BcsTests, givenBlitPropertiesContainerWhenEstimatingCommandsSizeThenCalculateForAllAttachedProperites) { const auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight; const uint32_t numberOfBlts = 3; const size_t bltSize = (3 * max2DBlitSize); const uint32_t numberOfBlitOperations = 4; auto baseSize = EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite() + sizeof(typename FamilyType::MI_BATCH_BUFFER_END); size_t cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK); if (BlitCommandsHelper::miArbCheckWaRequired()) { cmdsSizePerBlit += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); } auto expectedBlitInstructionsSize = cmdsSizePerBlit * numberOfBlts; if (BlitCommandsHelper::preBlitCommandWARequired()) { expectedBlitInstructionsSize += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); } auto expectedAlignedSize = baseSize + MemorySynchronizationCommands::getSizeForAdditonalSynchronization(pDevice->getHardwareInfo()); BlitPropertiesContainer blitPropertiesContainer; for (uint32_t i = 0; i < numberOfBlitOperations; i++) { BlitProperties blitProperties; blitProperties.copySize = {bltSize, 1, 1}; blitPropertiesContainer.push_back(blitProperties); expectedAlignedSize += expectedBlitInstructionsSize; } expectedAlignedSize = alignUp(expectedAlignedSize, MemoryConstants::cacheLineSize); auto alignedEstimatedSize = BlitCommandsHelper::estimateBlitCommandsSize( blitPropertiesContainer, false, false, false, pClDevice->getRootDeviceEnvironment()); EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize); } HWTEST_F(BcsTests, givenBlitPropertiesContainerWhenDirectsubmissionEnabledEstimatingCommandsSizeThenCalculateForAllAttachedProperites) { const auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight; const uint32_t numberOfBlts = 3; const size_t bltSize = (3 * max2DBlitSize); const uint32_t numberOfBlitOperations = 4; auto baseSize = EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite() + sizeof(typename FamilyType::MI_BATCH_BUFFER_START); size_t cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK); if (BlitCommandsHelper::miArbCheckWaRequired()) { cmdsSizePerBlit += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); } auto expectedBlitInstructionsSize = cmdsSizePerBlit * numberOfBlts; if (BlitCommandsHelper::preBlitCommandWARequired()) { expectedBlitInstructionsSize += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); } auto expectedAlignedSize = baseSize + MemorySynchronizationCommands::getSizeForAdditonalSynchronization(pDevice->getHardwareInfo()); BlitPropertiesContainer blitPropertiesContainer; for (uint32_t i = 0; i < numberOfBlitOperations; i++) { BlitProperties blitProperties; blitProperties.copySize = {bltSize, 1, 1}; blitPropertiesContainer.push_back(blitProperties); expectedAlignedSize += expectedBlitInstructionsSize; } expectedAlignedSize = alignUp(expectedAlignedSize, MemoryConstants::cacheLineSize); auto alignedEstimatedSize = BlitCommandsHelper::estimateBlitCommandsSize( blitPropertiesContainer, false, false, true, pClDevice->getRootDeviceEnvironment()); EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize); } HWTEST_F(BcsTests, givenBlitPropertiesContainerWhenEstimatingCommandsSizeForWriteReadBufferRectThenCalculateForAllAttachedProperites) { constexpr auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight; const Vec3 bltSize = {(3 * max2DBlitSize), 4, 2}; const size_t numberOfBlts = 3 * bltSize.y * bltSize.z; const size_t numberOfBlitOperations = 4 * bltSize.y * bltSize.z; size_t cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK); if (BlitCommandsHelper::miArbCheckWaRequired()) { cmdsSizePerBlit += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); } auto baseSize = EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite() + sizeof(typename FamilyType::MI_BATCH_BUFFER_END); auto expectedBlitInstructionsSize = cmdsSizePerBlit * numberOfBlts; if (BlitCommandsHelper::preBlitCommandWARequired()) { expectedBlitInstructionsSize += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); } auto expectedAlignedSize = baseSize + MemorySynchronizationCommands::getSizeForAdditonalSynchronization(pDevice->getHardwareInfo()); BlitPropertiesContainer blitPropertiesContainer; for (uint32_t i = 0; i < numberOfBlitOperations; i++) { BlitProperties blitProperties; blitProperties.copySize = bltSize; blitPropertiesContainer.push_back(blitProperties); expectedAlignedSize += expectedBlitInstructionsSize; } expectedAlignedSize = alignUp(expectedAlignedSize, MemoryConstants::cacheLineSize); auto alignedEstimatedSize = BlitCommandsHelper::estimateBlitCommandsSize( blitPropertiesContainer, false, false, false, pClDevice->getRootDeviceEnvironment()); EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize); } HWTEST_F(BcsTests, givenBlitPropertiesContainerWhenDirectSubmissionEnabledEstimatingCommandsSizeForWriteReadBufferRectThenCalculateForAllAttachedProperites) { constexpr auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight; const Vec3 bltSize = {(3 * max2DBlitSize), 4, 2}; const size_t numberOfBlts = 3 * bltSize.y * bltSize.z; const size_t numberOfBlitOperations = 4 * bltSize.y * bltSize.z; size_t cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK); if (BlitCommandsHelper::miArbCheckWaRequired()) { cmdsSizePerBlit += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); } auto baseSize = EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite() + sizeof(typename FamilyType::MI_BATCH_BUFFER_START); auto expectedBlitInstructionsSize = cmdsSizePerBlit * numberOfBlts; if (BlitCommandsHelper::preBlitCommandWARequired()) { expectedBlitInstructionsSize += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); } auto expectedAlignedSize = baseSize + MemorySynchronizationCommands::getSizeForAdditonalSynchronization(pDevice->getHardwareInfo()); BlitPropertiesContainer blitPropertiesContainer; for (uint32_t i = 0; i < numberOfBlitOperations; i++) { BlitProperties blitProperties; blitProperties.copySize = bltSize; blitPropertiesContainer.push_back(blitProperties); expectedAlignedSize += expectedBlitInstructionsSize; } expectedAlignedSize = alignUp(expectedAlignedSize, MemoryConstants::cacheLineSize); auto alignedEstimatedSize = BlitCommandsHelper::estimateBlitCommandsSize( blitPropertiesContainer, false, false, true, pClDevice->getRootDeviceEnvironment()); EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize); } HWTEST_F(BcsTests, givenTimestampPacketWriteRequestWhenEstimatingSizeForCommandsThenAddMiFlushDw) { size_t expectedBaseSize = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK); if (BlitCommandsHelper::miArbCheckWaRequired()) { expectedBaseSize += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); } if (BlitCommandsHelper::preBlitCommandWARequired()) { expectedBaseSize += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); } auto expectedSizeWithTimestampPacketWrite = expectedBaseSize + EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); auto expectedSizeWithoutTimestampPacketWrite = expectedBaseSize; auto estimatedSizeWithTimestampPacketWrite = BlitCommandsHelper::estimateBlitCommandSize( {1, 1, 1}, csrDependencies, true, false, false, pClDevice->getRootDeviceEnvironment()); auto estimatedSizeWithoutTimestampPacketWrite = BlitCommandsHelper::estimateBlitCommandSize( {1, 1, 1}, csrDependencies, false, false, false, pClDevice->getRootDeviceEnvironment()); EXPECT_EQ(expectedSizeWithTimestampPacketWrite, estimatedSizeWithTimestampPacketWrite); EXPECT_EQ(expectedSizeWithoutTimestampPacketWrite, estimatedSizeWithoutTimestampPacketWrite); } HWTEST_F(BcsTests, givenTimestampPacketWriteRequestWhenEstimatingSizeForCommandsWithProfilingThenAddMiFlushDw) { size_t expectedBaseSize = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK) + EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); if (BlitCommandsHelper::miArbCheckWaRequired()) { expectedBaseSize += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); } if (BlitCommandsHelper::preBlitCommandWARequired()) { expectedBaseSize += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); } auto expectedSizeWithTimestampPacketWriteAndProfiling = expectedBaseSize + BlitCommandsHelper::getProfilingMmioCmdsSize(); auto estimatedSizeWithTimestampPacketWrite = BlitCommandsHelper::estimateBlitCommandSize( {1, 1, 1}, csrDependencies, true, false, false, pClDevice->getRootDeviceEnvironment()); auto estimatedSizeWithTimestampPacketWriteAndProfiling = BlitCommandsHelper::estimateBlitCommandSize( {1, 1, 1}, csrDependencies, true, true, false, pClDevice->getRootDeviceEnvironment()); EXPECT_EQ(expectedSizeWithTimestampPacketWriteAndProfiling, estimatedSizeWithTimestampPacketWriteAndProfiling); EXPECT_EQ(expectedBaseSize, estimatedSizeWithTimestampPacketWrite); } HWTEST_F(BcsTests, givenBltSizeAndCsrDependenciesWhenEstimatingCommandSizeThenAddAllRequiredCommands) { uint32_t numberOfBlts = 1; size_t numberNodesPerContainer = 5; auto &csr = pDevice->getUltCommandStreamReceiver(); MockTimestampPacketContainer timestamp0(*csr.getTimestampPacketAllocator(), numberNodesPerContainer); MockTimestampPacketContainer timestamp1(*csr.getTimestampPacketAllocator(), numberNodesPerContainer); csrDependencies.timestampPacketContainer.push_back(×tamp0); csrDependencies.timestampPacketContainer.push_back(×tamp1); size_t cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK); if (BlitCommandsHelper::miArbCheckWaRequired()) { cmdsSizePerBlit += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); } size_t expectedSize = (cmdsSizePerBlit * numberOfBlts) + TimestampPacketHelper::getRequiredCmdStreamSize(csrDependencies); if (BlitCommandsHelper::preBlitCommandWARequired()) { expectedSize += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); } auto estimatedSize = BlitCommandsHelper::estimateBlitCommandSize( {1, 1, 1}, csrDependencies, false, false, false, pClDevice->getRootDeviceEnvironment()); EXPECT_EQ(expectedSize, estimatedSize); } HWTEST_F(BcsTests, givenImageAndBufferWhenEstimateBlitCommandSizeThenReturnCorrectCommandSize) { for (auto isImage : {false, true}) { auto expectedSize = sizeof(typename FamilyType::MI_ARB_CHECK); expectedSize += isImage ? sizeof(typename FamilyType::XY_BLOCK_COPY_BLT) : sizeof(typename FamilyType::XY_COPY_BLT); if (BlitCommandsHelper::miArbCheckWaRequired()) { expectedSize += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); } if (BlitCommandsHelper::preBlitCommandWARequired()) { expectedSize += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); } auto estimatedSize = BlitCommandsHelper::estimateBlitCommandSize( {1, 1, 1}, csrDependencies, false, false, isImage, pClDevice->getRootDeviceEnvironment()); EXPECT_EQ(expectedSize, estimatedSize); } } HWTEST_F(BcsTests, givenImageAndBufferBlitDirectionsWhenIsImageOperationIsCalledThenReturnCorrectValue) { BlitProperties blitProperties{}; std::pair params[] = {{false, BlitterConstants::BlitDirection::HostPtrToBuffer}, {false, BlitterConstants::BlitDirection::BufferToHostPtr}, {false, BlitterConstants::BlitDirection::BufferToBuffer}, {true, BlitterConstants::BlitDirection::HostPtrToImage}, {true, BlitterConstants::BlitDirection::ImageToHostPtr}, {true, BlitterConstants::BlitDirection::ImageToImage}}; for (auto [isImageDirection, blitDirection] : params) { blitProperties.blitDirection = blitDirection; EXPECT_EQ(isImageDirection, blitProperties.isImageOperation()); } } HWTEST_F(BcsTests, givenBltSizeWithLeftoverWhenDispatchedThenProgramAllRequiredCommands) { using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; constexpr auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight; auto &csr = pDevice->getUltCommandStreamReceiver(); static_cast(csr.getMemoryManager())->turnOnFakingBigAllocations(); uint32_t bltLeftover = 17; size_t bltSize = (2 * max2DBlitSize) + bltLeftover; uint32_t numberOfBlts = 3; cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, static_cast(bltSize), nullptr, retVal)); void *hostPtr = reinterpret_cast(0x12340000); uint32_t newTaskCount = 19; csr.taskCount = newTaskCount - 1; uint32_t expectedResursiveLockCount = 0u; EXPECT_EQ(expectedResursiveLockCount, csr.recursiveLockCounter.load()); auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::HostPtrToBuffer, csr, buffer->getGraphicsAllocation(pDevice->getRootDeviceIndex()), nullptr, hostPtr, buffer->getGraphicsAllocation(pDevice->getRootDeviceIndex())->getGpuAddress(), 0, 0, 0, {bltSize, 1, 1}, 0, 0, 0, 0); if (csr.getClearColorAllocation()) { expectedResursiveLockCount++; } EXPECT_EQ(expectedResursiveLockCount, csr.recursiveLockCounter.load()); flushBcsTask(&csr, blitProperties, true, *pDevice); EXPECT_EQ(newTaskCount, csr.taskCount); EXPECT_EQ(newTaskCount, csr.latestFlushedTaskCount); EXPECT_EQ(newTaskCount, csr.latestSentTaskCount); EXPECT_EQ(newTaskCount, csr.latestSentTaskCountValueDuringFlush); expectedResursiveLockCount++; EXPECT_EQ(expectedResursiveLockCount, csr.recursiveLockCounter.load()); HardwareParse hwParser; hwParser.parseCommands(csr.commandStream); auto &cmdList = hwParser.cmdList; auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), cmdIterator); for (uint32_t i = 0; i < numberOfBlts; i++) { auto bltCmd = genCmdCast(*(cmdIterator++)); EXPECT_NE(nullptr, bltCmd); uint32_t expectedWidth = static_cast(BlitterConstants::maxBlitWidth); uint32_t expectedHeight = static_cast(BlitterConstants::maxBlitHeight); if (i == (numberOfBlts - 1)) { expectedWidth = bltLeftover; expectedHeight = 1; } EXPECT_EQ(expectedWidth, bltCmd->getDestinationX2CoordinateRight()); EXPECT_EQ(expectedHeight, bltCmd->getDestinationY2CoordinateBottom()); EXPECT_EQ(expectedWidth, bltCmd->getDestinationPitch()); EXPECT_EQ(expectedWidth, bltCmd->getSourcePitch()); if (BlitCommandsHelper::miArbCheckWaRequired()) { auto miFlush = genCmdCast(*(cmdIterator++)); EXPECT_NE(nullptr, miFlush); if (EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite() == 2 * sizeof(typename FamilyType::MI_FLUSH_DW)) { miFlush = genCmdCast(*(cmdIterator++)); EXPECT_NE(nullptr, miFlush); } } auto miArbCheckCmd = genCmdCast(*(cmdIterator++)); EXPECT_NE(nullptr, miArbCheckCmd); EXPECT_TRUE(memcmp(&FamilyType::cmdInitArbCheck, miArbCheckCmd, sizeof(typename FamilyType::MI_ARB_CHECK)) == 0); } if (UnitTestHelper::isAdditionalSynchronizationRequired()) { if (UnitTestHelper::isAdditionalMiSemaphoreWaitRequired(*defaultHwInfo)) { auto miSemaphoreWaitCmd = genCmdCast(*(cmdIterator++)); EXPECT_NE(nullptr, miSemaphoreWaitCmd); EXPECT_TRUE(UnitTestHelper::isAdditionalMiSemaphoreWait(*miSemaphoreWaitCmd)); } else { cmdIterator++; } } auto miFlushCmd = genCmdCast(*(cmdIterator++)); if (UnitTestHelper::additionalMiFlushDwRequired) { uint64_t gpuAddress = 0x0; uint64_t immData = 0; EXPECT_NE(nullptr, miFlushCmd); EXPECT_EQ(MI_FLUSH_DW::POST_SYNC_OPERATION_NO_WRITE, miFlushCmd->getPostSyncOperation()); EXPECT_EQ(gpuAddress, miFlushCmd->getDestinationAddress()); EXPECT_EQ(immData, miFlushCmd->getImmediateData()); miFlushCmd = genCmdCast(*(cmdIterator++)); } EXPECT_NE(cmdIterator, cmdList.end()); EXPECT_EQ(MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA_QWORD, miFlushCmd->getPostSyncOperation()); EXPECT_EQ(csr.getTagAllocation()->getGpuAddress(), miFlushCmd->getDestinationAddress()); EXPECT_EQ(newTaskCount, miFlushCmd->getImmediateData()); if (UnitTestHelper::isAdditionalSynchronizationRequired()) { if (UnitTestHelper::isAdditionalMiSemaphoreWaitRequired(*defaultHwInfo)) { auto miSemaphoreWaitCmd = genCmdCast(*(cmdIterator++)); EXPECT_NE(nullptr, miSemaphoreWaitCmd); EXPECT_TRUE(UnitTestHelper::isAdditionalMiSemaphoreWait(*miSemaphoreWaitCmd)); } else { cmdIterator++; } } EXPECT_NE(nullptr, genCmdCast(*(cmdIterator++))); // padding while (cmdIterator != cmdList.end()) { EXPECT_NE(nullptr, genCmdCast(*(cmdIterator++))); } } HWTEST_F(BcsTests, givenCommandTypeWhenObtainBlitDirectionIsCalledThenReturnCorrectBlitDirection) { std::array, 10> testParams = {{{CL_COMMAND_WRITE_BUFFER, BlitterConstants::BlitDirection::HostPtrToBuffer}, {CL_COMMAND_WRITE_BUFFER_RECT, BlitterConstants::BlitDirection::HostPtrToBuffer}, {CL_COMMAND_READ_BUFFER, BlitterConstants::BlitDirection::BufferToHostPtr}, {CL_COMMAND_READ_BUFFER_RECT, BlitterConstants::BlitDirection::BufferToHostPtr}, {CL_COMMAND_COPY_BUFFER_RECT, BlitterConstants::BlitDirection::BufferToBuffer}, {CL_COMMAND_SVM_MEMCPY, BlitterConstants::BlitDirection::BufferToBuffer}, {CL_COMMAND_WRITE_IMAGE, BlitterConstants::BlitDirection::HostPtrToImage}, {CL_COMMAND_READ_IMAGE, BlitterConstants::BlitDirection::ImageToHostPtr}, {CL_COMMAND_COPY_BUFFER, BlitterConstants::BlitDirection::BufferToBuffer}, {CL_COMMAND_COPY_IMAGE, BlitterConstants::BlitDirection::ImageToImage}}}; for (const auto &[commandType, expectedBlitDirection] : testParams) { auto blitDirection = ClBlitProperties::obtainBlitDirection(commandType); EXPECT_EQ(expectedBlitDirection, blitDirection); } } HWTEST_F(BcsTests, givenWrongCommandTypeWhenObtainBlitDirectionIsCalledThenExpectThrow) { uint32_t wrongCommandType = CL_COMMAND_NDRANGE_KERNEL; EXPECT_THROW(ClBlitProperties::obtainBlitDirection(wrongCommandType), std::exception); } struct BcsTestParam { Vec3 copySize; Vec3 hostPtrOffset; Vec3 copyOffset; size_t dstRowPitch; size_t dstSlicePitch; size_t srcRowPitch; size_t srcSlicePitch; } BlitterProperties[] = { {{(2 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + 17, 1, 1}, {0, 1, 1}, {BlitterConstants::maxBlitWidth, 1, 1}, (2 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + 17, (2 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + 17, (2 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + 17, (2 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + 17}, {{(2 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + 17, 2, 1}, {BlitterConstants::maxBlitWidth, 2, 2}, {BlitterConstants::maxBlitWidth, 1, 1}, 0, ((2 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + 17) * 2, 0, ((2 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + 17) * 2}, {{(2 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + 17, 1, 3}, {BlitterConstants::maxBlitWidth, 2, 2}, {BlitterConstants::maxBlitWidth, 1, 1}, 0, ((2 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + 17) * 2, 0, ((2 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + 17) * 2}, {{(2 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + 17, 4, 2}, {0, 0, 0}, {0, 0, 0}, (2 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + 17, ((2 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + 17) * 4, (2 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + 17, ((2 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + 17) * 4}, {{(2 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + 17, 3, 2}, {BlitterConstants::maxBlitWidth, 2, 2}, {BlitterConstants::maxBlitWidth, 1, 1}, ((2 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + 17) + 2, (((2 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + 17) * 3) + 2, ((2 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + 17) + 2, (((2 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + 17) * 3) + 2}}; template struct BcsDetaliedTests : public BcsTests, public ::testing::WithParamInterface { void SetUp() override { BcsTests::SetUp(); } void TearDown() override { BcsTests::TearDown(); } }; using BcsDetaliedTestsWithParams = BcsDetaliedTests>; HWTEST_P(BcsDetaliedTestsWithParams, givenBltSizeWithLeftoverWhenDispatchedThenProgramAddresseForWriteReadBufferRect) { auto &csr = pDevice->getUltCommandStreamReceiver(); static_cast(csr.getMemoryManager())->turnOnFakingBigAllocations(); uint32_t bltLeftover = 17; Vec3 bltSize = std::get<0>(GetParam()).copySize; size_t numberOfBltsForSingleBltSizeProgramm = 3; size_t totalNumberOfBits = numberOfBltsForSingleBltSizeProgramm * bltSize.y * bltSize.z; cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, static_cast(8 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight), nullptr, retVal)); void *hostPtr = reinterpret_cast(0x12340000); Vec3 hostPtrOffset = std::get<0>(GetParam()).hostPtrOffset; Vec3 copyOffset = std::get<0>(GetParam()).copyOffset; size_t dstRowPitch = std::get<0>(GetParam()).dstRowPitch; size_t dstSlicePitch = std::get<0>(GetParam()).dstSlicePitch; size_t srcRowPitch = std::get<0>(GetParam()).srcRowPitch; size_t srcSlicePitch = std::get<0>(GetParam()).srcSlicePitch; auto allocation = buffer->getGraphicsAllocation(pDevice->getRootDeviceIndex()); auto memoryManager = static_cast(pDevice->getMemoryManager()); memoryManager->returnFakeAllocation = true; auto blitProperties = BlitProperties::constructPropertiesForReadWrite(std::get<1>(GetParam()), //blitDirection csr, allocation, //commandStreamReceiver nullptr, //memObjAllocation hostPtr, //preallocatedHostAllocation allocation->getGpuAddress(), //memObjGpuVa 0, //hostAllocGpuVa hostPtrOffset, //hostPtrOffset copyOffset, //copyOffset bltSize, //copySize dstRowPitch, //hostRowPitch dstSlicePitch, //hostSlicePitch srcRowPitch, //gpuRowPitch srcSlicePitch //gpuSlicePitch ); memoryManager->returnFakeAllocation = false; flushBcsTask(&csr, blitProperties, true, *pDevice); HardwareParse hwParser; hwParser.parseCommands(csr.commandStream); auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), cmdIterator); uint64_t offset = 0; for (uint32_t i = 0; i < totalNumberOfBits; i++) { auto bltCmd = genCmdCast(*(cmdIterator++)); EXPECT_NE(nullptr, bltCmd); uint32_t expectedWidth = static_cast(BlitterConstants::maxBlitWidth); uint32_t expectedHeight = static_cast(BlitterConstants::maxBlitHeight); if (i % numberOfBltsForSingleBltSizeProgramm == numberOfBltsForSingleBltSizeProgramm - 1) { expectedWidth = bltLeftover; expectedHeight = 1; } if (i % numberOfBltsForSingleBltSizeProgramm == 0) { offset = 0; } auto rowIndex = (i / numberOfBltsForSingleBltSizeProgramm) % blitProperties.copySize.y; auto sliceIndex = i / (numberOfBltsForSingleBltSizeProgramm * blitProperties.copySize.y); auto expectedDstAddr = blitProperties.dstGpuAddress + blitProperties.dstOffset.x + offset + blitProperties.dstOffset.y * blitProperties.dstRowPitch + blitProperties.dstOffset.z * blitProperties.dstSlicePitch + rowIndex * blitProperties.dstRowPitch + sliceIndex * blitProperties.dstSlicePitch; auto expectedSrcAddr = blitProperties.srcGpuAddress + blitProperties.srcOffset.x + offset + blitProperties.srcOffset.y * blitProperties.srcRowPitch + blitProperties.srcOffset.z * blitProperties.srcSlicePitch + rowIndex * blitProperties.srcRowPitch + sliceIndex * blitProperties.srcSlicePitch; auto dstAddr = NEO::BlitCommandsHelper::calculateBlitCommandDestinationBaseAddress(blitProperties, offset, rowIndex, sliceIndex); auto srcAddr = NEO::BlitCommandsHelper::calculateBlitCommandSourceBaseAddress(blitProperties, offset, rowIndex, sliceIndex); EXPECT_EQ(dstAddr, expectedDstAddr); EXPECT_EQ(srcAddr, expectedSrcAddr); offset += (expectedWidth * expectedHeight); if (BlitCommandsHelper::miArbCheckWaRequired()) { auto miFlush = genCmdCast(*(cmdIterator++)); EXPECT_NE(nullptr, miFlush); if (EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite() == 2 * sizeof(typename FamilyType::MI_FLUSH_DW)) { miFlush = genCmdCast(*(cmdIterator++)); EXPECT_NE(nullptr, miFlush); } } auto miArbCheckCmd = genCmdCast(*(cmdIterator++)); EXPECT_NE(nullptr, miArbCheckCmd); EXPECT_TRUE(memcmp(&FamilyType::cmdInitArbCheck, miArbCheckCmd, sizeof(typename FamilyType::MI_ARB_CHECK)) == 0); } } HWTEST_P(BcsDetaliedTestsWithParams, givenBltSizeWithLeftoverWhenDispatchedThenProgramAllRequiredCommandsForWriteReadBufferRect) { auto &csr = pDevice->getUltCommandStreamReceiver(); static_cast(csr.getMemoryManager())->turnOnFakingBigAllocations(); uint32_t bltLeftover = 17; Vec3 bltSize = std::get<0>(GetParam()).copySize; size_t numberOfBltsForSingleBltSizeProgramm = 3; size_t totalNumberOfBits = numberOfBltsForSingleBltSizeProgramm * bltSize.y * bltSize.z; cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, static_cast(8 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight), nullptr, retVal)); void *hostPtr = reinterpret_cast(0x12340000); Vec3 hostPtrOffset = std::get<0>(GetParam()).hostPtrOffset; Vec3 copyOffset = std::get<0>(GetParam()).copyOffset; size_t dstRowPitch = std::get<0>(GetParam()).dstRowPitch; size_t dstSlicePitch = std::get<0>(GetParam()).dstSlicePitch; size_t srcRowPitch = std::get<0>(GetParam()).srcRowPitch; size_t srcSlicePitch = std::get<0>(GetParam()).srcSlicePitch; auto allocation = buffer->getGraphicsAllocation(pDevice->getRootDeviceIndex()); auto memoryManager = static_cast(pDevice->getMemoryManager()); memoryManager->returnFakeAllocation = true; auto blitProperties = BlitProperties::constructPropertiesForReadWrite(std::get<1>(GetParam()), //blitDirection csr, allocation, //commandStreamReceiver nullptr, //memObjAllocation hostPtr, //preallocatedHostAllocation allocation->getGpuAddress(), //memObjGpuVa 0, //hostAllocGpuVa hostPtrOffset, //hostPtrOffset copyOffset, //copyOffset bltSize, //copySize dstRowPitch, //hostRowPitch dstSlicePitch, //hostSlicePitch srcRowPitch, //gpuRowPitch srcSlicePitch //gpuSlicePitch ); memoryManager->returnFakeAllocation = false; flushBcsTask(&csr, blitProperties, true, *pDevice); HardwareParse hwParser; hwParser.parseCommands(csr.commandStream); auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), cmdIterator); uint64_t offset = 0; for (uint32_t i = 0; i < totalNumberOfBits; i++) { auto bltCmd = genCmdCast(*(cmdIterator++)); EXPECT_NE(nullptr, bltCmd); uint32_t expectedWidth = static_cast(BlitterConstants::maxBlitWidth); uint32_t expectedHeight = static_cast(BlitterConstants::maxBlitHeight); if (i % numberOfBltsForSingleBltSizeProgramm == numberOfBltsForSingleBltSizeProgramm - 1) { expectedWidth = bltLeftover; expectedHeight = 1; } if (i % numberOfBltsForSingleBltSizeProgramm == 0) { offset = 0; } EXPECT_EQ(expectedWidth, bltCmd->getDestinationX2CoordinateRight()); EXPECT_EQ(expectedHeight, bltCmd->getDestinationY2CoordinateBottom()); EXPECT_EQ(expectedWidth, bltCmd->getDestinationPitch()); EXPECT_EQ(expectedWidth, bltCmd->getSourcePitch()); auto rowIndex = (i / numberOfBltsForSingleBltSizeProgramm) % blitProperties.copySize.y; auto sliceIndex = i / (numberOfBltsForSingleBltSizeProgramm * blitProperties.copySize.y); auto dstAddr = NEO::BlitCommandsHelper::calculateBlitCommandDestinationBaseAddress(blitProperties, offset, rowIndex, sliceIndex); auto srcAddr = NEO::BlitCommandsHelper::calculateBlitCommandSourceBaseAddress(blitProperties, offset, rowIndex, sliceIndex); EXPECT_EQ(dstAddr, bltCmd->getDestinationBaseAddress()); EXPECT_EQ(srcAddr, bltCmd->getSourceBaseAddress()); offset += (expectedWidth * expectedHeight); if (BlitCommandsHelper::miArbCheckWaRequired()) { auto miFlush = genCmdCast(*(cmdIterator++)); EXPECT_NE(nullptr, miFlush); if (EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite() == 2 * sizeof(typename FamilyType::MI_FLUSH_DW)) { miFlush = genCmdCast(*(cmdIterator++)); EXPECT_NE(nullptr, miFlush); } } auto miArbCheckCmd = genCmdCast(*(cmdIterator++)); EXPECT_NE(nullptr, miArbCheckCmd); EXPECT_TRUE(memcmp(&FamilyType::cmdInitArbCheck, miArbCheckCmd, sizeof(typename FamilyType::MI_ARB_CHECK)) == 0); } } HWTEST_P(BcsDetaliedTestsWithParams, givenBltSizeWithLeftoverWhenDispatchedThenProgramAllRequiredCommandsForCopyBufferRect) { auto &csr = pDevice->getUltCommandStreamReceiver(); static_cast(csr.getMemoryManager())->turnOnFakingBigAllocations(); uint32_t bltLeftover = 17; Vec3 bltSize = std::get<0>(GetParam()).copySize; size_t numberOfBltsForSingleBltSizeProgramm = 3; size_t totalNumberOfBits = numberOfBltsForSingleBltSizeProgramm * bltSize.y * bltSize.z; cl_int retVal = CL_SUCCESS; auto buffer1 = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, static_cast(8 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight), nullptr, retVal)); Vec3 buffer1Offset = std::get<0>(GetParam()).hostPtrOffset; Vec3 buffer2Offset = std::get<0>(GetParam()).copyOffset; size_t buffer1RowPitch = std::get<0>(GetParam()).dstRowPitch; size_t buffer1SlicePitch = std::get<0>(GetParam()).dstSlicePitch; size_t buffer2RowPitch = std::get<0>(GetParam()).srcRowPitch; size_t buffer2SlicePitch = std::get<0>(GetParam()).srcSlicePitch; auto allocation = buffer1->getGraphicsAllocation(pDevice->getRootDeviceIndex()); auto blitProperties = BlitProperties::constructPropertiesForCopy(allocation, //dstAllocation allocation, //srcAllocation buffer1Offset, //dstOffset buffer2Offset, //srcOffset bltSize, //copySize buffer1RowPitch, //srcRowPitch buffer1SlicePitch, //srcSlicePitch buffer2RowPitch, //dstRowPitch buffer2SlicePitch, //dstSlicePitch csr.getClearColorAllocation() //clearColorAllocation ); flushBcsTask(&csr, blitProperties, true, *pDevice); HardwareParse hwParser; hwParser.parseCommands(csr.commandStream); auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), cmdIterator); uint64_t offset = 0; for (uint32_t i = 0; i < totalNumberOfBits; i++) { auto bltCmd = genCmdCast(*(cmdIterator++)); EXPECT_NE(nullptr, bltCmd); uint32_t expectedWidth = static_cast(BlitterConstants::maxBlitWidth); uint32_t expectedHeight = static_cast(BlitterConstants::maxBlitHeight); if (i % numberOfBltsForSingleBltSizeProgramm == numberOfBltsForSingleBltSizeProgramm - 1) { expectedWidth = bltLeftover; expectedHeight = 1; } if (i % numberOfBltsForSingleBltSizeProgramm == 0) { offset = 0; } EXPECT_EQ(expectedWidth, bltCmd->getDestinationX2CoordinateRight()); EXPECT_EQ(expectedHeight, bltCmd->getDestinationY2CoordinateBottom()); EXPECT_EQ(expectedWidth, bltCmd->getDestinationPitch()); EXPECT_EQ(expectedWidth, bltCmd->getSourcePitch()); auto rowIndex = (i / numberOfBltsForSingleBltSizeProgramm) % blitProperties.copySize.y; auto sliceIndex = i / (numberOfBltsForSingleBltSizeProgramm * blitProperties.copySize.y); auto dstAddr = NEO::BlitCommandsHelper::calculateBlitCommandDestinationBaseAddress(blitProperties, offset, rowIndex, sliceIndex); auto srcAddr = NEO::BlitCommandsHelper::calculateBlitCommandSourceBaseAddress(blitProperties, offset, rowIndex, sliceIndex); EXPECT_EQ(dstAddr, bltCmd->getDestinationBaseAddress()); EXPECT_EQ(srcAddr, bltCmd->getSourceBaseAddress()); offset += (expectedWidth * expectedHeight); if (BlitCommandsHelper::miArbCheckWaRequired()) { auto miFlush = genCmdCast(*(cmdIterator++)); EXPECT_NE(nullptr, miFlush); if (EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite() == 2 * sizeof(typename FamilyType::MI_FLUSH_DW)) { miFlush = genCmdCast(*(cmdIterator++)); EXPECT_NE(nullptr, miFlush); } } auto miArbCheckCmd = genCmdCast(*(cmdIterator++)); EXPECT_NE(nullptr, miArbCheckCmd); EXPECT_TRUE(memcmp(&FamilyType::cmdInitArbCheck, miArbCheckCmd, sizeof(typename FamilyType::MI_ARB_CHECK)) == 0); } } INSTANTIATE_TEST_CASE_P(BcsDetaliedTest, BcsDetaliedTestsWithParams, ::testing::Combine( ::testing::ValuesIn(BlitterProperties), ::testing::Values(BlitterConstants::BlitDirection::HostPtrToBuffer, BlitterConstants::BlitDirection::BufferToHostPtr))); HWCMDTEST_F(IGFX_GEN8_CORE, UltCommandStreamReceiverTest, WhenProgrammingActivePartitionsThenExpectNoAction) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); size_t expectedCmdSize = 0; EXPECT_EQ(expectedCmdSize, commandStreamReceiver.getCmdSizeForActivePartitionConfig()); size_t usedBefore = commandStreamReceiver.commandStream.getUsed(); commandStreamReceiver.programActivePartitionConfig(commandStreamReceiver.commandStream); size_t usedAfter = commandStreamReceiver.commandStream.getUsed(); EXPECT_EQ(usedBefore, usedAfter); } HWCMDTEST_F(IGFX_GEN8_CORE, UltCommandStreamReceiverTest, givenBarrierNodeSetWhenProgrammingBarrierCommandThenExpectPostSyncPipeControl) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; auto &hwInfo = pDevice->getHardwareInfo(); auto commandStreamReceiver = &pDevice->getUltCommandStreamReceiver(); auto &commandStreamCSR = commandStreamReceiver->getCS(); TagNodeBase *tagNode = commandStreamReceiver->getTimestampPacketAllocator()->getTag(); uint64_t gpuAddress = TimestampPacketHelper::getContextEndGpuAddress(*tagNode); TimestampPacketDependencies timestampPacketDependencies; timestampPacketDependencies.barrierNodes.add(tagNode); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.barrierTimestampPacketNodes = ×tampPacketDependencies.barrierNodes; size_t expectedCmdSize = MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo); size_t estimatedCmdSize = commandStreamReceiver->getCmdSizeForStallingCommands(dispatchFlags); EXPECT_EQ(expectedCmdSize, estimatedCmdSize); commandStreamReceiver->programStallingCommandsForBarrier(commandStreamCSR, dispatchFlags); EXPECT_EQ(estimatedCmdSize, commandStreamCSR.getUsed()); parseCommands(commandStreamCSR, 0); findHardwareCommands(); auto cmdItor = cmdList.begin(); if (MemorySynchronizationCommands::isPipeControlWArequired(hwInfo)) { PIPE_CONTROL *pipeControl = genCmdCast(*cmdItor); ASSERT_NE(nullptr, pipeControl); cmdItor++; if (MemorySynchronizationCommands::getSizeForSingleAdditionalSynchronization(hwInfo) > 0) { cmdItor++; } } PIPE_CONTROL *pipeControl = genCmdCast(*cmdItor); ASSERT_NE(nullptr, pipeControl); EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pipeControl->getPostSyncOperation()); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); EXPECT_EQ(0u, pipeControl->getImmediateData()); EXPECT_EQ(gpuAddress, UnitTestHelper::getPipeControlPostSyncAddress(*pipeControl)); } command_stream_receiver_hw_2_tests.cpp000066400000000000000000003151151422164147700342710ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/command_stream/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/scratch_space_controller_base.h" #include "shared/source/command_stream/wait_status.h" #include "shared/source/direct_submission/dispatchers/blitter_dispatcher.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/engine_descriptor_helper.h" #include "shared/test/common/helpers/ult_hw_config.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/mocks/mock_direct_submission_hw.h" #include "shared/test/common/mocks/mock_hw_helper.h" #include "shared/test/common/mocks/mock_internal_allocation_storage.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/mocks/mock_timestamp_container.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/event/user_event.h" #include "opencl/source/helpers/cl_blit_properties.h" #include "opencl/source/mem_obj/mem_obj_helper.h" #include "opencl/test/unit_test/command_stream/command_stream_receiver_hw_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h" #include "opencl/test/unit_test/helpers/raii_hw_helper.h" #include "opencl/test/unit_test/mocks/mock_image.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" using namespace NEO; HWTEST_F(BcsTests, givenBltSizeWhenEstimatingCommandSizeThenAddAllRequiredCommands) { constexpr auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight; size_t cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK); if (BlitCommandsHelper::miArbCheckWaRequired()) { cmdsSizePerBlit += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); } size_t notAlignedBltSize = (3 * max2DBlitSize) + 1; size_t alignedBltSize = (3 * max2DBlitSize); uint32_t alignedNumberOfBlts = 3; uint32_t notAlignedNumberOfBlts = 4; auto expectedAlignedSize = cmdsSizePerBlit * alignedNumberOfBlts; auto expectedNotAlignedSize = cmdsSizePerBlit * notAlignedNumberOfBlts; if (BlitCommandsHelper::preBlitCommandWARequired()) { expectedAlignedSize += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); expectedNotAlignedSize += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); } auto alignedCopySize = Vec3{alignedBltSize, 1, 1}; auto notAlignedCopySize = Vec3{notAlignedBltSize, 1, 1}; auto alignedEstimatedSize = BlitCommandsHelper::estimateBlitCommandSize( alignedCopySize, csrDependencies, false, false, false, pClDevice->getRootDeviceEnvironment()); auto notAlignedEstimatedSize = BlitCommandsHelper::estimateBlitCommandSize( notAlignedCopySize, csrDependencies, false, false, false, pClDevice->getRootDeviceEnvironment()); EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize); EXPECT_EQ(expectedNotAlignedSize, notAlignedEstimatedSize); EXPECT_FALSE(BlitCommandsHelper::isCopyRegionPreferred(alignedCopySize, pClDevice->getRootDeviceEnvironment())); EXPECT_FALSE(BlitCommandsHelper::isCopyRegionPreferred(notAlignedCopySize, pClDevice->getRootDeviceEnvironment())); } HWTEST_F(BcsTests, givenDebugCapabilityWhenEstimatingCommandSizeThenAddAllRequiredCommands) { constexpr auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight; size_t cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK); if (BlitCommandsHelper::miArbCheckWaRequired()) { cmdsSizePerBlit += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); } const size_t debugCommandsSize = (EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite() + EncodeSempahore::getSizeMiSemaphoreWait()) * 2; constexpr uint32_t numberOfBlts = 3; constexpr size_t bltSize = (numberOfBlts * max2DBlitSize); auto expectedSize = (cmdsSizePerBlit * numberOfBlts) + debugCommandsSize + MemorySynchronizationCommands::getSizeForAdditonalSynchronization(pDevice->getHardwareInfo()) + EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite() + sizeof(typename FamilyType::MI_BATCH_BUFFER_END); expectedSize = alignUp(expectedSize, MemoryConstants::cacheLineSize); BlitProperties blitProperties{}; blitProperties.copySize = {bltSize, 1, 1}; BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties); auto estimatedSize = BlitCommandsHelper::estimateBlitCommandsSize( blitPropertiesContainer, false, true, false, pClDevice->getRootDeviceEnvironment()); EXPECT_EQ(expectedSize, estimatedSize); EXPECT_FALSE(BlitCommandsHelper::isCopyRegionPreferred(blitProperties.copySize, pClDevice->getRootDeviceEnvironment())); } HWTEST_F(BcsTests, givenBltSizeWhenEstimatingCommandSizeForReadBufferRectThenAddAllRequiredCommands) { constexpr auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight; size_t cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK); if (BlitCommandsHelper::miArbCheckWaRequired()) { cmdsSizePerBlit += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); } Vec3 notAlignedBltSize = {(3 * max2DBlitSize) + 1, 4, 2}; Vec3 alignedBltSize = {(3 * max2DBlitSize), 4, 2}; size_t alignedNumberOfBlts = 3 * alignedBltSize.y * alignedBltSize.z; size_t notAlignedNumberOfBlts = 4 * notAlignedBltSize.y * notAlignedBltSize.z; auto expectedAlignedSize = cmdsSizePerBlit * alignedNumberOfBlts; auto expectedNotAlignedSize = cmdsSizePerBlit * notAlignedNumberOfBlts; if (BlitCommandsHelper::preBlitCommandWARequired()) { expectedAlignedSize += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); expectedNotAlignedSize += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); } auto alignedEstimatedSize = BlitCommandsHelper::estimateBlitCommandSize( alignedBltSize, csrDependencies, false, false, false, pClDevice->getRootDeviceEnvironment()); auto notAlignedEstimatedSize = BlitCommandsHelper::estimateBlitCommandSize( notAlignedBltSize, csrDependencies, false, false, false, pClDevice->getRootDeviceEnvironment()); EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize); EXPECT_EQ(expectedNotAlignedSize, notAlignedEstimatedSize); EXPECT_FALSE(BlitCommandsHelper::isCopyRegionPreferred(notAlignedBltSize, pClDevice->getRootDeviceEnvironment())); EXPECT_FALSE(BlitCommandsHelper::isCopyRegionPreferred(alignedBltSize, pClDevice->getRootDeviceEnvironment())); } HWTEST_F(BcsTests, givenBltWithBigCopySizeWhenEstimatingCommandSizeForReadBufferRectThenAddAllRequiredCommands) { auto &rootDeviceEnvironment = pClDevice->getRootDeviceEnvironment(); auto maxWidthToCopy = static_cast(BlitCommandsHelper::getMaxBlitWidth(rootDeviceEnvironment)); auto maxHeightToCopy = static_cast(BlitCommandsHelper::getMaxBlitHeight(rootDeviceEnvironment)); size_t cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK); if (BlitCommandsHelper::miArbCheckWaRequired()) { cmdsSizePerBlit += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); } Vec3 alignedBltSize = {(3 * maxWidthToCopy), (4 * maxHeightToCopy), 2}; Vec3 notAlignedBltSize = {(3 * maxWidthToCopy + 1), (4 * maxHeightToCopy), 2}; EXPECT_TRUE(BlitCommandsHelper::isCopyRegionPreferred(alignedBltSize, rootDeviceEnvironment)); size_t alignedNumberOfBlts = (3 * 4 * alignedBltSize.z); size_t notAlignedNumberOfBlts = (4 * 4 * notAlignedBltSize.z); auto expectedAlignedSize = cmdsSizePerBlit * alignedNumberOfBlts; auto expectedNotAlignedSize = cmdsSizePerBlit * notAlignedNumberOfBlts; if (BlitCommandsHelper::preBlitCommandWARequired()) { expectedAlignedSize += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); expectedNotAlignedSize += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); } auto alignedEstimatedSize = BlitCommandsHelper::estimateBlitCommandSize( alignedBltSize, csrDependencies, false, false, false, rootDeviceEnvironment); auto notAlignedEstimatedSize = BlitCommandsHelper::estimateBlitCommandSize( notAlignedBltSize, csrDependencies, false, false, false, rootDeviceEnvironment); EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize); EXPECT_EQ(expectedNotAlignedSize, notAlignedEstimatedSize); EXPECT_TRUE(BlitCommandsHelper::isCopyRegionPreferred(notAlignedBltSize, rootDeviceEnvironment)); EXPECT_TRUE(BlitCommandsHelper::isCopyRegionPreferred(alignedBltSize, rootDeviceEnvironment)); } HWTEST_F(BcsTests, WhenGetNumberOfBlitsIsCalledThenCorrectValuesAreReturned) { auto &rootDeviceEnvironment = pClDevice->getRootDeviceEnvironment(); auto maxWidthToCopy = static_cast(BlitCommandsHelper::getMaxBlitWidth(rootDeviceEnvironment)); auto maxHeightToCopy = static_cast(BlitCommandsHelper::getMaxBlitHeight(rootDeviceEnvironment)); { Vec3 copySize = {maxWidthToCopy * maxHeightToCopy, 1, 3}; size_t expectednBlitsCopyRegion = maxHeightToCopy * 3; size_t expectednBlitsCopyPerRow = 3; auto nBlitsCopyRegion = BlitCommandsHelper::getNumberOfBlitsForCopyRegion(copySize, rootDeviceEnvironment); auto nBlitsCopyPerRow = BlitCommandsHelper::getNumberOfBlitsForCopyPerRow(copySize, rootDeviceEnvironment); EXPECT_EQ(expectednBlitsCopyPerRow, nBlitsCopyPerRow); EXPECT_EQ(expectednBlitsCopyRegion, nBlitsCopyRegion); EXPECT_FALSE(BlitCommandsHelper::isCopyRegionPreferred(copySize, rootDeviceEnvironment)); } { Vec3 copySize = {2 * maxWidthToCopy, 16, 3}; size_t expectednBlitsCopyRegion = 2 * 3; size_t expectednBlitsCopyPerRow = 16 * 3; auto nBlitsCopyRegion = BlitCommandsHelper::getNumberOfBlitsForCopyRegion(copySize, rootDeviceEnvironment); auto nBlitsCopyPerRow = BlitCommandsHelper::getNumberOfBlitsForCopyPerRow(copySize, rootDeviceEnvironment); EXPECT_EQ(expectednBlitsCopyPerRow, nBlitsCopyPerRow); EXPECT_EQ(expectednBlitsCopyRegion, nBlitsCopyRegion); EXPECT_TRUE(BlitCommandsHelper::isCopyRegionPreferred(copySize, rootDeviceEnvironment)); } { Vec3 copySize = {2 * maxWidthToCopy, 3 * maxHeightToCopy, 4}; size_t expectednBlitsCopyRegion = 2 * 3 * 4; size_t expectednBlitsCopyPerRow = 3 * maxHeightToCopy * 4; auto nBlitsCopyRegion = BlitCommandsHelper::getNumberOfBlitsForCopyRegion(copySize, rootDeviceEnvironment); auto nBlitsCopyPerRow = BlitCommandsHelper::getNumberOfBlitsForCopyPerRow(copySize, rootDeviceEnvironment); EXPECT_EQ(expectednBlitsCopyPerRow, nBlitsCopyPerRow); EXPECT_EQ(expectednBlitsCopyRegion, nBlitsCopyRegion); EXPECT_TRUE(BlitCommandsHelper::isCopyRegionPreferred(copySize, rootDeviceEnvironment)); } } HWTEST_F(BcsTests, givenCsrDependenciesWhenProgrammingCommandStreamThenAddSemaphore) { auto &csr = pDevice->getUltCommandStreamReceiver(); cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); constexpr size_t hostAllocationSize = MemoryConstants::pageSize; auto hostAllocationPtr = allocateAlignedMemory(hostAllocationSize, MemoryConstants::pageSize); void *hostPtr = reinterpret_cast(hostAllocationPtr.get()); uint32_t numberOfDependencyContainers = 2; size_t numberNodesPerContainer = 5; auto graphicsAllocation = buffer->getGraphicsAllocation(pDevice->getRootDeviceIndex()); auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::HostPtrToBuffer, csr, graphicsAllocation, nullptr, hostPtr, graphicsAllocation->getGpuAddress(), 0, 0, 0, {1, 1, 1}, 0, 0, 0, 0); MockTimestampPacketContainer timestamp0(*csr.getTimestampPacketAllocator(), numberNodesPerContainer); MockTimestampPacketContainer timestamp1(*csr.getTimestampPacketAllocator(), numberNodesPerContainer); blitProperties.csrDependencies.timestampPacketContainer.push_back(×tamp0); blitProperties.csrDependencies.timestampPacketContainer.push_back(×tamp1); flushBcsTask(&csr, blitProperties, true, *pDevice); HardwareParse hwParser; hwParser.parseCommands(csr.commandStream); auto &cmdList = hwParser.cmdList; bool xyCopyBltCmdFound = false; bool dependenciesFound = false; for (auto cmdIterator = cmdList.begin(); cmdIterator != cmdList.end(); cmdIterator++) { if (genCmdCast(*cmdIterator)) { xyCopyBltCmdFound = true; continue; } auto miSemaphore = genCmdCast(*cmdIterator); if (miSemaphore) { if (UnitTestHelper::isAdditionalMiSemaphoreWait(*miSemaphore)) { continue; } dependenciesFound = true; EXPECT_FALSE(xyCopyBltCmdFound); for (uint32_t i = 1; i < numberOfDependencyContainers * numberNodesPerContainer; i++) { EXPECT_NE(nullptr, genCmdCast(*(++cmdIterator))); } } } EXPECT_TRUE(xyCopyBltCmdFound); EXPECT_TRUE(dependenciesFound); } HWTEST_F(BcsTests, givenMultipleBlitPropertiesWhenDispatchingThenProgramCommandsInCorrectOrder) { auto &csr = pDevice->getUltCommandStreamReceiver(); cl_int retVal = CL_SUCCESS; auto buffer1 = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); auto buffer2 = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); constexpr size_t hostAllocationSize = MemoryConstants::pageSize; auto hostAllocationPtr1 = allocateAlignedMemory(hostAllocationSize, MemoryConstants::pageSize); void *hostPtr1 = reinterpret_cast(hostAllocationPtr1.get()); auto hostAllocationPtr2 = allocateAlignedMemory(hostAllocationSize, MemoryConstants::pageSize); void *hostPtr2 = reinterpret_cast(hostAllocationPtr2.get()); auto graphicsAllocation1 = buffer1->getGraphicsAllocation(pDevice->getRootDeviceIndex()); auto graphicsAllocation2 = buffer2->getGraphicsAllocation(pDevice->getRootDeviceIndex()); auto blitProperties1 = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::HostPtrToBuffer, csr, graphicsAllocation1, nullptr, hostPtr1, graphicsAllocation1->getGpuAddress(), 0, 0, 0, {1, 1, 1}, 0, 0, 0, 0); auto blitProperties2 = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::HostPtrToBuffer, csr, graphicsAllocation2, nullptr, hostPtr2, graphicsAllocation2->getGpuAddress(), 0, 0, 0, {1, 1, 1}, 0, 0, 0, 0); MockTimestampPacketContainer timestamp1(*csr.getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp2(*csr.getTimestampPacketAllocator(), 1); blitProperties1.csrDependencies.timestampPacketContainer.push_back(×tamp1); blitProperties2.csrDependencies.timestampPacketContainer.push_back(×tamp2); BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties1); blitPropertiesContainer.push_back(blitProperties2); csr.flushBcsTask(blitPropertiesContainer, true, false, *pDevice); HardwareParse hwParser; hwParser.parseCommands(csr.commandStream); auto &cmdList = hwParser.cmdList; uint32_t xyCopyBltCmdFound = 0; uint32_t dependenciesFound = 0; for (auto cmdIterator = cmdList.begin(); cmdIterator != cmdList.end(); cmdIterator++) { if (genCmdCast(*cmdIterator)) { xyCopyBltCmdFound++; EXPECT_EQ(xyCopyBltCmdFound, dependenciesFound); continue; } auto miSemaphore = genCmdCast(*cmdIterator); if (miSemaphore) { if (UnitTestHelper::isAdditionalMiSemaphoreWait(*miSemaphore)) { continue; } dependenciesFound++; EXPECT_EQ(xyCopyBltCmdFound, dependenciesFound - 1); } } EXPECT_EQ(2u, xyCopyBltCmdFound); EXPECT_EQ(2u, dependenciesFound); } HWTEST_F(BcsTests, whenBlitBufferThenCommandBufferHasProperTaskCount) { auto &csr = pDevice->getUltCommandStreamReceiver(); cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); constexpr size_t hostAllocationSize = MemoryConstants::pageSize; auto hostAllocationPtr = allocateAlignedMemory(hostAllocationSize, MemoryConstants::pageSize); void *hostPtr = reinterpret_cast(hostAllocationPtr.get()); auto graphicsAllocation = buffer->getGraphicsAllocation(pDevice->getRootDeviceIndex()); auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::HostPtrToBuffer, csr, graphicsAllocation, nullptr, hostPtr, graphicsAllocation->getGpuAddress(), 0, 0, 0, {1, 1, 1}, 0, 0, 0, 0); BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties); csr.flushBcsTask(blitPropertiesContainer, true, false, *pDevice); EXPECT_EQ(csr.getCS(0u).getGraphicsAllocation()->getTaskCount(csr.getOsContext().getContextId()), csr.peekTaskCount()); EXPECT_EQ(csr.getCS(0u).getGraphicsAllocation()->getResidencyTaskCount(csr.getOsContext().getContextId()), csr.peekTaskCount()); } HWTEST_F(BcsTests, givenUpdateTaskCountFromWaitWhenBlitBufferThenCsrHasProperTaskCounts) { DebugManagerStateRestore restorer; DebugManager.flags.UpdateTaskCountFromWait.set(3); auto &csr = pDevice->getUltCommandStreamReceiver(); cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); constexpr size_t hostAllocationSize = MemoryConstants::pageSize; auto hostAllocationPtr = allocateAlignedMemory(hostAllocationSize, MemoryConstants::pageSize); void *hostPtr = reinterpret_cast(hostAllocationPtr.get()); auto graphicsAllocation = buffer->getGraphicsAllocation(pDevice->getRootDeviceIndex()); auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::HostPtrToBuffer, csr, graphicsAllocation, nullptr, hostPtr, graphicsAllocation->getGpuAddress(), 0, 0, 0, {1, 1, 1}, 0, 0, 0, 0); BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties); auto taskCount = csr.peekTaskCount(); csr.flushBcsTask(blitPropertiesContainer, false, false, *pDevice); EXPECT_EQ(csr.peekTaskCount(), taskCount + 1); EXPECT_EQ(csr.peekLatestFlushedTaskCount(), taskCount); } HWTEST_F(BcsTests, givenProfilingEnabledWhenBlitBufferThenCommandBufferIsConstructedProperly) { auto bcsOsContext = std::unique_ptr(OsContext::create(nullptr, 0, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular}, pDevice->getDeviceBitfield()))); auto bcsCsr = std::make_unique>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); bcsCsr->setupContext(*bcsOsContext); bcsCsr->initializeTagAllocation(); cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); constexpr size_t hostAllocationSize = MemoryConstants::pageSize; auto hostAllocationPtr = allocateAlignedMemory(hostAllocationSize, MemoryConstants::pageSize); void *hostPtr = reinterpret_cast(hostAllocationPtr.get()); auto graphicsAllocation = buffer->getGraphicsAllocation(pDevice->getRootDeviceIndex()); auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::HostPtrToBuffer, *bcsCsr, graphicsAllocation, nullptr, hostPtr, graphicsAllocation->getGpuAddress(), 0, 0, 0, {1, 1, 1}, 0, 0, 0, 0); MockTimestampPacketContainer timestamp(*bcsCsr->getTimestampPacketAllocator(), 1u); blitProperties.outputTimestampPacket = timestamp.getNode(0); BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties); bcsCsr->flushBcsTask(blitPropertiesContainer, false, true, *pDevice); HardwareParse hwParser; hwParser.parseCommands(bcsCsr->commandStream); auto &cmdList = hwParser.cmdList; auto cmdIterator = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), cmdIterator); cmdIterator = find(++cmdIterator, cmdList.end()); ASSERT_NE(cmdList.end(), cmdIterator); cmdIterator = find(++cmdIterator, cmdList.end()); ASSERT_NE(cmdList.end(), cmdIterator); cmdIterator = find(++cmdIterator, cmdList.end()); ASSERT_NE(cmdList.end(), cmdIterator); cmdIterator = find(++cmdIterator, cmdList.end()); ASSERT_NE(cmdList.end(), cmdIterator); cmdIterator = find(++cmdIterator, cmdList.end()); ASSERT_NE(cmdList.end(), cmdIterator); } HWTEST_F(BcsTests, givenNotInitializedOsContextWhenBlitBufferIsCalledThenInitializeContext) { auto bcsOsContext = std::unique_ptr(OsContext::create(nullptr, 0, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular}, pDevice->getDeviceBitfield()))); auto bcsCsr = std::make_unique>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); bcsCsr->setupContext(*bcsOsContext); bcsCsr->initializeTagAllocation(); cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); constexpr size_t hostAllocationSize = MemoryConstants::pageSize; auto hostAllocationPtr = allocateAlignedMemory(hostAllocationSize, MemoryConstants::pageSize); void *hostPtr = reinterpret_cast(hostAllocationPtr.get()); auto graphicsAllocation = buffer->getGraphicsAllocation(pDevice->getRootDeviceIndex()); auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::HostPtrToBuffer, *bcsCsr, graphicsAllocation, nullptr, hostPtr, graphicsAllocation->getGpuAddress(), 0, 0, 0, {1, 1, 1}, 0, 0, 0, 0); MockTimestampPacketContainer timestamp(*bcsCsr->getTimestampPacketAllocator(), 1u); blitProperties.outputTimestampPacket = timestamp.getNode(0); BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties); EXPECT_FALSE(bcsOsContext->isInitialized()); bcsCsr->flushBcsTask(blitPropertiesContainer, false, true, *pDevice); EXPECT_TRUE(bcsOsContext->isInitialized()); } HWTEST_F(BcsTests, givenInputAllocationsWhenBlitDispatchedThenMakeAllAllocationsResident) { auto &csr = pDevice->getUltCommandStreamReceiver(); csr.storeMakeResidentAllocations = true; cl_int retVal = CL_SUCCESS; auto buffer1 = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); auto buffer2 = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); constexpr size_t hostAllocationSize = MemoryConstants::pageSize; auto hostAllocationPtr1 = allocateAlignedMemory(hostAllocationSize, MemoryConstants::pageSize); void *hostPtr1 = reinterpret_cast(hostAllocationPtr1.get()); auto hostAllocationPtr2 = allocateAlignedMemory(hostAllocationSize, MemoryConstants::pageSize); void *hostPtr2 = reinterpret_cast(hostAllocationPtr2.get()); EXPECT_EQ(0u, csr.makeSurfacePackNonResidentCalled); auto graphicsAllocation1 = buffer1->getGraphicsAllocation(pDevice->getRootDeviceIndex()); auto graphicsAllocation2 = buffer2->getGraphicsAllocation(pDevice->getRootDeviceIndex()); auto blitProperties1 = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::HostPtrToBuffer, csr, graphicsAllocation1, nullptr, hostPtr1, graphicsAllocation1->getGpuAddress(), 0, 0, 0, {1, 1, 1}, 0, 0, 0, 0); auto blitProperties2 = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::HostPtrToBuffer, csr, graphicsAllocation2, nullptr, hostPtr2, graphicsAllocation2->getGpuAddress(), 0, 0, 0, {1, 1, 1}, 0, 0, 0, 0); BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties1); blitPropertiesContainer.push_back(blitProperties2); csr.flushBcsTask(blitPropertiesContainer, false, false, *pDevice); uint32_t residentAllocationsNum = 5u; EXPECT_TRUE(csr.isMadeResident(graphicsAllocation1)); EXPECT_TRUE(csr.isMadeResident(graphicsAllocation2)); EXPECT_TRUE(csr.isMadeResident(csr.getTagAllocation())); EXPECT_EQ(1u, csr.makeSurfacePackNonResidentCalled); if (csr.clearColorAllocation) { EXPECT_TRUE(csr.isMadeResident(csr.clearColorAllocation)); residentAllocationsNum++; } if (csr.globalFenceAllocation) { EXPECT_TRUE(csr.isMadeResident(csr.globalFenceAllocation)); residentAllocationsNum++; } EXPECT_EQ(residentAllocationsNum, csr.makeResidentAllocations.size()); } HWTEST_F(BcsTests, givenFenceAllocationIsRequiredWhenBlitDispatchedThenMakeAllAllocationsResident) { RAIIHwHelperFactory> hwHelperBackup{pDevice->getHardwareInfo().platform.eRenderCoreFamily}; auto bcsOsContext = std::unique_ptr(OsContext::create(nullptr, 0, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular}, pDevice->getDeviceBitfield()))); auto bcsCsr = std::make_unique>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); bcsCsr->setupContext(*bcsOsContext); bcsCsr->initializeTagAllocation(); bcsCsr->createGlobalFenceAllocation(); bcsCsr->storeMakeResidentAllocations = true; cl_int retVal = CL_SUCCESS; auto buffer1 = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); auto buffer2 = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); constexpr size_t hostAllocationSize = MemoryConstants::pageSize; auto hostAllocationPtr1 = allocateAlignedMemory(hostAllocationSize, MemoryConstants::pageSize); void *hostPtr1 = reinterpret_cast(hostAllocationPtr1.get()); auto hostAllocationPtr2 = allocateAlignedMemory(hostAllocationSize, MemoryConstants::pageSize); void *hostPtr2 = reinterpret_cast(hostAllocationPtr2.get()); EXPECT_EQ(0u, bcsCsr->makeSurfacePackNonResidentCalled); auto graphicsAllocation1 = buffer1->getGraphicsAllocation(pDevice->getRootDeviceIndex()); auto graphicsAllocation2 = buffer2->getGraphicsAllocation(pDevice->getRootDeviceIndex()); auto blitProperties1 = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::HostPtrToBuffer, *bcsCsr, graphicsAllocation1, nullptr, hostPtr1, graphicsAllocation1->getGpuAddress(), 0, 0, 0, {1, 1, 1}, 0, 0, 0, 0); auto blitProperties2 = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::HostPtrToBuffer, *bcsCsr, graphicsAllocation2, nullptr, hostPtr2, graphicsAllocation2->getGpuAddress(), 0, 0, 0, {1, 1, 1}, 0, 0, 0, 0); BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties1); blitPropertiesContainer.push_back(blitProperties2); bcsCsr->flushBcsTask(blitPropertiesContainer, false, false, *pDevice); uint32_t residentAllocationsNum = 6u; EXPECT_TRUE(bcsCsr->isMadeResident(graphicsAllocation1)); EXPECT_TRUE(bcsCsr->isMadeResident(graphicsAllocation2)); EXPECT_TRUE(bcsCsr->isMadeResident(bcsCsr->getTagAllocation())); EXPECT_TRUE(bcsCsr->isMadeResident(bcsCsr->globalFenceAllocation)); if (bcsCsr->clearColorAllocation) { EXPECT_TRUE(bcsCsr->isMadeResident(bcsCsr->clearColorAllocation)); residentAllocationsNum++; } EXPECT_EQ(1u, bcsCsr->makeSurfacePackNonResidentCalled); EXPECT_EQ(residentAllocationsNum, bcsCsr->makeResidentAllocations.size()); } HWTEST_F(BcsTests, givenBufferWhenBlitCalledThenFlushCommandBuffer) { auto &csr = pDevice->getUltCommandStreamReceiver(); csr.recordFlusheBatchBuffer = true; cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); constexpr size_t hostAllocationSize = MemoryConstants::pageSize; auto hostAllocationPtr = allocateAlignedMemory(hostAllocationSize, MemoryConstants::pageSize); void *hostPtr = reinterpret_cast(hostAllocationPtr.get()); auto graphicsAllocation = buffer->getGraphicsAllocation(pDevice->getRootDeviceIndex()); auto &commandStream = csr.getCS(MemoryConstants::pageSize); size_t commandStreamOffset = 4; commandStream.getSpace(commandStreamOffset); uint32_t newTaskCount = 17; csr.taskCount = newTaskCount - 1; auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::HostPtrToBuffer, csr, graphicsAllocation, nullptr, hostPtr, graphicsAllocation->getGpuAddress(), 0, 0, 0, {1, 1, 1}, 0, 0, 0, 0); flushBcsTask(&csr, blitProperties, true, *pDevice); EXPECT_EQ(commandStream.getGraphicsAllocation(), csr.latestFlushedBatchBuffer.commandBufferAllocation); EXPECT_EQ(commandStreamOffset, csr.latestFlushedBatchBuffer.startOffset); EXPECT_EQ(0u, csr.latestFlushedBatchBuffer.chainedBatchBufferStartOffset); EXPECT_EQ(nullptr, csr.latestFlushedBatchBuffer.chainedBatchBuffer); EXPECT_FALSE(csr.latestFlushedBatchBuffer.requiresCoherency); EXPECT_FALSE(csr.latestFlushedBatchBuffer.low_priority); EXPECT_EQ(QueueThrottle::MEDIUM, csr.latestFlushedBatchBuffer.throttle); EXPECT_EQ(commandStream.getUsed(), csr.latestFlushedBatchBuffer.usedSize); EXPECT_EQ(&commandStream, csr.latestFlushedBatchBuffer.stream); EXPECT_EQ(newTaskCount, csr.latestWaitForCompletionWithTimeoutTaskCount.load()); } HWTEST_F(BcsTests, whenBlitFromHostPtrCalledThenCallWaitWithKmdFallback) { class MyMockCsr : public UltCommandStreamReceiver { public: using UltCommandStreamReceiver::UltCommandStreamReceiver; WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) override { waitForTaskCountWithKmdNotifyFallbackCalled++; taskCountToWaitPassed = taskCountToWait; flushStampToWaitPassed = flushStampToWait; useQuickKmdSleepPassed = useQuickKmdSleep; throttlePassed = throttle; return WaitStatus::Ready; } FlushStamp flushStampToWaitPassed = 0; uint32_t taskCountToWaitPassed = 0; uint32_t waitForTaskCountWithKmdNotifyFallbackCalled = 0; bool useQuickKmdSleepPassed = false; QueueThrottle throttlePassed = QueueThrottle::MEDIUM; }; auto myMockCsr = std::make_unique(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); auto &bcsOsContext = pDevice->getUltCommandStreamReceiver().getOsContext(); myMockCsr->initializeTagAllocation(); myMockCsr->setupContext(bcsOsContext); cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); constexpr size_t hostAllocationSize = MemoryConstants::pageSize; auto hostAllocationPtr = allocateAlignedMemory(hostAllocationSize, MemoryConstants::pageSize); void *hostPtr = reinterpret_cast(hostAllocationPtr.get()); auto graphicsAllocation = buffer->getGraphicsAllocation(pDevice->getRootDeviceIndex()); auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::HostPtrToBuffer, *myMockCsr, graphicsAllocation, nullptr, hostPtr, graphicsAllocation->getGpuAddress(), 0, 0, 0, {1, 1, 1}, 0, 0, 0, 0); flushBcsTask(myMockCsr.get(), blitProperties, false, *pDevice); EXPECT_EQ(0u, myMockCsr->waitForTaskCountWithKmdNotifyFallbackCalled); flushBcsTask(myMockCsr.get(), blitProperties, true, *pDevice); EXPECT_EQ(1u, myMockCsr->waitForTaskCountWithKmdNotifyFallbackCalled); EXPECT_EQ(myMockCsr->taskCount, myMockCsr->taskCountToWaitPassed); EXPECT_EQ(myMockCsr->flushStamp->peekStamp(), myMockCsr->flushStampToWaitPassed); EXPECT_FALSE(myMockCsr->useQuickKmdSleepPassed); EXPECT_EQ(myMockCsr->throttlePassed, QueueThrottle::MEDIUM); EXPECT_EQ(1u, myMockCsr->activePartitions); } HWTEST_F(BcsTests, whenBlitFromHostPtrCalledThenCleanTemporaryAllocations) { auto &bcsCsr = pDevice->getUltCommandStreamReceiver(); auto mockInternalAllocationsStorage = new MockInternalAllocationStorage(bcsCsr); bcsCsr.internalAllocationStorage.reset(mockInternalAllocationsStorage); cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); constexpr size_t hostAllocationSize = MemoryConstants::pageSize; auto hostAllocationPtr = allocateAlignedMemory(hostAllocationSize, MemoryConstants::pageSize); void *hostPtr = reinterpret_cast(hostAllocationPtr.get()); auto graphicsAllocation = buffer->getGraphicsAllocation(pDevice->getRootDeviceIndex()); bcsCsr.taskCount = 17; EXPECT_EQ(0u, mockInternalAllocationsStorage->cleanAllocationsCalled); auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::HostPtrToBuffer, bcsCsr, graphicsAllocation, nullptr, hostPtr, graphicsAllocation->getGpuAddress(), 0, 0, 0, {1, 1, 1}, 0, 0, 0, 0); flushBcsTask(&bcsCsr, blitProperties, false, *pDevice); EXPECT_EQ(0u, mockInternalAllocationsStorage->cleanAllocationsCalled); flushBcsTask(&bcsCsr, blitProperties, true, *pDevice); EXPECT_EQ(1u, mockInternalAllocationsStorage->cleanAllocationsCalled); EXPECT_EQ(bcsCsr.taskCount, mockInternalAllocationsStorage->lastCleanAllocationsTaskCount); EXPECT_TRUE(TEMPORARY_ALLOCATION == mockInternalAllocationsStorage->lastCleanAllocationUsage); } HWTEST_F(BcsTests, givenBufferWhenBlitOperationCalledThenProgramCorrectGpuAddresses) { auto &csr = pDevice->getUltCommandStreamReceiver(); cl_int retVal = CL_SUCCESS; auto buffer1 = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 100, nullptr, retVal)); auto buffer2 = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 100, nullptr, retVal)); auto graphicsAllocation1 = buffer1->getGraphicsAllocation(pDevice->getRootDeviceIndex()); auto graphicsAllocation2 = buffer2->getGraphicsAllocation(pDevice->getRootDeviceIndex()); constexpr size_t hostAllocationSize = MemoryConstants::pageSize * 2; auto hostAllocationPtr = allocateAlignedMemory(hostAllocationSize, MemoryConstants::pageSize); void *hostPtr = reinterpret_cast(hostAllocationPtr.get()); const size_t hostPtrOffset = 0x1234; const size_t subBuffer1Offset = 0x23; cl_buffer_region subBufferRegion1 = {subBuffer1Offset, 1}; auto subBuffer1 = clUniquePtr(buffer1->createSubBuffer(CL_MEM_READ_WRITE, 0, &subBufferRegion1, retVal)); Vec3 copySizes[2] = {{1, 1, 1}, {1, 2, 1}}; EXPECT_FALSE(BlitCommandsHelper::isCopyRegionPreferred(copySizes[0], pDevice->getRootDeviceEnvironment())); EXPECT_TRUE(BlitCommandsHelper::isCopyRegionPreferred(copySizes[1], pDevice->getRootDeviceEnvironment())); for (auto ©Size : copySizes) { { // from hostPtr HardwareParse hwParser; auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::HostPtrToBuffer, csr, graphicsAllocation1, nullptr, hostPtr, graphicsAllocation1->getGpuAddress() + subBuffer1->getOffset(), 0, {hostPtrOffset, 0, 0}, 0, copySize, 0, 0, 0, 0); flushBcsTask(&csr, blitProperties, true, *pDevice); hwParser.parseCommands(csr.commandStream); auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), cmdIterator); auto bltCmd = genCmdCast(*cmdIterator); ASSERT_NE(nullptr, bltCmd); if (pDevice->isFullRangeSvm()) { EXPECT_EQ(reinterpret_cast(ptrOffset(hostPtr, hostPtrOffset)), bltCmd->getSourceBaseAddress()); } EXPECT_EQ(graphicsAllocation1->getGpuAddress() + subBuffer1Offset, bltCmd->getDestinationBaseAddress()); } { // to hostPtr HardwareParse hwParser; auto offset = csr.commandStream.getUsed(); auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::BufferToHostPtr, csr, graphicsAllocation1, nullptr, hostPtr, graphicsAllocation1->getGpuAddress() + subBuffer1->getOffset(), 0, {hostPtrOffset, 0, 0}, 0, copySize, 0, 0, 0, 0); flushBcsTask(&csr, blitProperties, true, *pDevice); hwParser.parseCommands(csr.commandStream, offset); auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), cmdIterator); auto bltCmd = genCmdCast(*cmdIterator); ASSERT_NE(nullptr, bltCmd); if (pDevice->isFullRangeSvm()) { EXPECT_EQ(reinterpret_cast(ptrOffset(hostPtr, hostPtrOffset)), bltCmd->getDestinationBaseAddress()); } EXPECT_EQ(graphicsAllocation1->getGpuAddress() + subBuffer1Offset, bltCmd->getSourceBaseAddress()); } { // Buffer to Buffer HardwareParse hwParser; auto offset = csr.commandStream.getUsed(); auto blitProperties = BlitProperties::constructPropertiesForCopy(graphicsAllocation1, graphicsAllocation2, 0, 0, copySize, 0, 0, 0, 0, csr.getClearColorAllocation()); flushBcsTask(&csr, blitProperties, true, *pDevice); hwParser.parseCommands(csr.commandStream, offset); auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), cmdIterator); auto bltCmd = genCmdCast(*cmdIterator); ASSERT_NE(nullptr, bltCmd); EXPECT_EQ(graphicsAllocation1->getGpuAddress(), bltCmd->getDestinationBaseAddress()); EXPECT_EQ(graphicsAllocation2->getGpuAddress(), bltCmd->getSourceBaseAddress()); } { // Buffer to Buffer - with object offset const size_t subBuffer2Offset = 0x20; cl_buffer_region subBufferRegion2 = {subBuffer2Offset, 1}; auto subBuffer2 = clUniquePtr(buffer2->createSubBuffer(CL_MEM_READ_WRITE, 0, &subBufferRegion2, retVal)); BuiltinOpParams builtinOpParams = {}; builtinOpParams.dstMemObj = subBuffer2.get(); builtinOpParams.srcMemObj = subBuffer1.get(); builtinOpParams.size.x = copySize.x; builtinOpParams.size.y = copySize.y; auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::BufferToBuffer, csr, builtinOpParams); auto offset = csr.commandStream.getUsed(); flushBcsTask(&csr, blitProperties, true, *pDevice); HardwareParse hwParser; hwParser.parseCommands(csr.commandStream, offset); auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), cmdIterator); auto bltCmd = genCmdCast(*cmdIterator); EXPECT_NE(nullptr, bltCmd); EXPECT_EQ(graphicsAllocation2->getGpuAddress() + subBuffer2Offset, bltCmd->getDestinationBaseAddress()); EXPECT_EQ(graphicsAllocation1->getGpuAddress() + subBuffer1Offset, bltCmd->getSourceBaseAddress()); } } } HWTEST_F(BcsTests, givenMapAllocationWhenDispatchReadWriteOperationThenSetValidGpuAddress) { auto &csr = pDevice->getUltCommandStreamReceiver(); auto memoryManager = csr.getMemoryManager(); constexpr size_t mapAllocationSize = MemoryConstants::pageSize * 2; auto mapAllocationPtr = allocateAlignedMemory(mapAllocationSize, MemoryConstants::pageSize); AllocationProperties properties{csr.getRootDeviceIndex(), false, mapAllocationSize, AllocationType::MAP_ALLOCATION, false, pDevice->getDeviceBitfield()}; GraphicsAllocation *mapAllocation = memoryManager->allocateGraphicsMemoryWithProperties(properties, mapAllocationPtr.get()); auto mapAllocationOffset = 0x1234; auto mapPtr = reinterpret_cast(mapAllocation->getGpuAddress() + mapAllocationOffset); cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 100, nullptr, retVal)); auto graphicsAllocation = buffer->getGraphicsAllocation(pDevice->getRootDeviceIndex()); const size_t hostPtrOffset = 0x1234; Vec3 copySizes[2] = {{4, 1, 1}, {4, 2, 1}}; EXPECT_FALSE(BlitCommandsHelper::isCopyRegionPreferred(copySizes[0], pDevice->getRootDeviceEnvironment())); EXPECT_TRUE(BlitCommandsHelper::isCopyRegionPreferred(copySizes[1], pDevice->getRootDeviceEnvironment())); for (auto ©Size : copySizes) { { // from hostPtr HardwareParse hwParser; auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::HostPtrToBuffer, csr, graphicsAllocation, mapAllocation, mapPtr, graphicsAllocation->getGpuAddress(), castToUint64(mapPtr), {hostPtrOffset, 0, 0}, 0, copySize, 0, 0, 0, 0); flushBcsTask(&csr, blitProperties, true, *pDevice); hwParser.parseCommands(csr.commandStream); auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), cmdIterator); auto bltCmd = genCmdCast(*cmdIterator); EXPECT_NE(nullptr, bltCmd); if (pDevice->isFullRangeSvm()) { EXPECT_EQ(reinterpret_cast(ptrOffset(mapPtr, hostPtrOffset)), bltCmd->getSourceBaseAddress()); } EXPECT_EQ(graphicsAllocation->getGpuAddress(), bltCmd->getDestinationBaseAddress()); } { // to hostPtr HardwareParse hwParser; auto offset = csr.commandStream.getUsed(); auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::BufferToHostPtr, csr, graphicsAllocation, mapAllocation, mapPtr, graphicsAllocation->getGpuAddress(), castToUint64(mapPtr), {hostPtrOffset, 0, 0}, 0, copySize, 0, 0, 0, 0); flushBcsTask(&csr, blitProperties, true, *pDevice); hwParser.parseCommands(csr.commandStream, offset); auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), cmdIterator); auto bltCmd = genCmdCast(*cmdIterator); EXPECT_NE(nullptr, bltCmd); if (pDevice->isFullRangeSvm()) { EXPECT_EQ(reinterpret_cast(ptrOffset(mapPtr, hostPtrOffset)), bltCmd->getDestinationBaseAddress()); } EXPECT_EQ(graphicsAllocation->getGpuAddress(), bltCmd->getSourceBaseAddress()); } { // bufferRect to hostPtr HardwareParse hwParser; auto offset = csr.commandStream.getUsed(); auto copySize = Vec3(4, 2, 1); auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::BufferToHostPtr, csr, graphicsAllocation, mapAllocation, mapPtr, graphicsAllocation->getGpuAddress(), castToUint64(mapPtr), {hostPtrOffset, 0, 0}, 0, copySize, 0, 0, 0, 0); flushBcsTask(&csr, blitProperties, true, *pDevice); hwParser.parseCommands(csr.commandStream, offset); auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), cmdIterator); auto bltCmd = genCmdCast(*cmdIterator); EXPECT_NE(nullptr, bltCmd); if (pDevice->isFullRangeSvm()) { EXPECT_EQ(reinterpret_cast(ptrOffset(mapPtr, hostPtrOffset)), bltCmd->getDestinationBaseAddress()); } EXPECT_EQ(graphicsAllocation->getGpuAddress(), bltCmd->getSourceBaseAddress()); } { // bufferWrite from hostPtr HardwareParse hwParser; auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::HostPtrToBuffer, csr, graphicsAllocation, mapAllocation, mapPtr, graphicsAllocation->getGpuAddress(), castToUint64(mapPtr), {hostPtrOffset, 0, 0}, 0, copySize, 0, 0, 0, 0); flushBcsTask(&csr, blitProperties, true, *pDevice); hwParser.parseCommands(csr.commandStream); auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), cmdIterator); auto bltCmd = genCmdCast(*cmdIterator); EXPECT_NE(nullptr, bltCmd); if (pDevice->isFullRangeSvm()) { EXPECT_EQ(reinterpret_cast(ptrOffset(mapPtr, hostPtrOffset)), bltCmd->getSourceBaseAddress()); } EXPECT_EQ(graphicsAllocation->getGpuAddress(), bltCmd->getDestinationBaseAddress()); } } memoryManager->freeGraphicsMemory(mapAllocation); } HWTEST_F(BcsTests, givenMapAllocationInBuiltinOpParamsWhenConstructingThenUseItAsSourceOrDstAllocation) { auto &csr = pDevice->getUltCommandStreamReceiver(); auto memoryManager = static_cast(csr.getMemoryManager()); constexpr size_t mapAllocationSize = MemoryConstants::pageSize * 2; auto mapAllocationPtr = allocateAlignedMemory(mapAllocationSize, MemoryConstants::pageSize); AllocationProperties properties{csr.getRootDeviceIndex(), false, mapAllocationSize, AllocationType::MAP_ALLOCATION, false, pDevice->getDeviceBitfield()}; GraphicsAllocation *mapAllocation = memoryManager->allocateGraphicsMemoryWithProperties(properties, reinterpret_cast(mapAllocationPtr.get())); auto mapAllocationOffset = 0x1234; auto mapPtr = reinterpret_cast(mapAllocation->getGpuAddress() + mapAllocationOffset); cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 100, nullptr, retVal)); memoryManager->returnFakeAllocation = true; { // from hostPtr BuiltinOpParams builtinOpParams = {}; builtinOpParams.dstMemObj = buffer.get(); builtinOpParams.srcPtr = mapPtr; builtinOpParams.size = {1, 1, 1}; builtinOpParams.transferAllocation = mapAllocation; auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::HostPtrToBuffer, csr, builtinOpParams); EXPECT_EQ(mapAllocation, blitProperties.srcAllocation); } { // to hostPtr BuiltinOpParams builtinOpParams = {}; builtinOpParams.srcMemObj = buffer.get(); builtinOpParams.dstPtr = mapPtr; builtinOpParams.size = {1, 1, 1}; builtinOpParams.transferAllocation = mapAllocation; auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::BufferToHostPtr, csr, builtinOpParams); EXPECT_EQ(mapAllocation, blitProperties.dstAllocation); } memoryManager->returnFakeAllocation = false; memoryManager->freeGraphicsMemory(mapAllocation); } HWTEST_F(BcsTests, givenNonZeroCopySvmAllocationWhenConstructingBlitPropertiesForReadWriteBufferCallThenSetValidAllocations) { auto &csr = pDevice->getUltCommandStreamReceiver(); MockMemoryManager mockMemoryManager(true, true); SVMAllocsManager svmAllocsManager(&mockMemoryManager, false); auto svmAllocationProperties = MemObjHelper::getSvmAllocationProperties(CL_MEM_READ_WRITE); auto svmAlloc = svmAllocsManager.createSVMAlloc(1, svmAllocationProperties, context->getRootDeviceIndices(), context->getDeviceBitfields()); auto svmData = svmAllocsManager.getSVMAlloc(svmAlloc); auto gpuAllocation = svmData->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex()); EXPECT_NE(nullptr, gpuAllocation); EXPECT_NE(nullptr, svmData->cpuAllocation); EXPECT_NE(gpuAllocation, svmData->cpuAllocation); { // from hostPtr BuiltinOpParams builtinOpParams = {}; builtinOpParams.dstSvmAlloc = gpuAllocation; builtinOpParams.srcSvmAlloc = svmData->cpuAllocation; builtinOpParams.srcPtr = reinterpret_cast(svmData->cpuAllocation->getGpuAddress()); builtinOpParams.size = {1, 1, 1}; auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::HostPtrToBuffer, csr, builtinOpParams); EXPECT_EQ(svmData->cpuAllocation, blitProperties.srcAllocation); EXPECT_EQ(gpuAllocation, blitProperties.dstAllocation); } { // to hostPtr BuiltinOpParams builtinOpParams = {}; builtinOpParams.srcSvmAlloc = gpuAllocation; builtinOpParams.dstSvmAlloc = svmData->cpuAllocation; builtinOpParams.dstPtr = reinterpret_cast(svmData->cpuAllocation->getGpuAddress()); builtinOpParams.size = {1, 1, 1}; auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::BufferToHostPtr, csr, builtinOpParams); EXPECT_EQ(svmData->cpuAllocation, blitProperties.dstAllocation); EXPECT_EQ(gpuAllocation, blitProperties.srcAllocation); } svmAllocsManager.freeSVMAlloc(svmAlloc); } HWTEST_F(BcsTests, givenSvmAllocationWhenBlitCalledThenUsePassedPointers) { auto &csr = pDevice->getUltCommandStreamReceiver(); MockMemoryManager mockMemoryManager(true, true); SVMAllocsManager svmAllocsManager(&mockMemoryManager, false); auto svmAllocationProperties = MemObjHelper::getSvmAllocationProperties(CL_MEM_READ_WRITE); auto svmAlloc = svmAllocsManager.createSVMAlloc(1, svmAllocationProperties, context->getRootDeviceIndices(), context->getDeviceBitfields()); auto svmData = svmAllocsManager.getSVMAlloc(svmAlloc); auto gpuAllocation = svmData->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex()); EXPECT_NE(nullptr, gpuAllocation); EXPECT_NE(nullptr, svmData->cpuAllocation); EXPECT_NE(gpuAllocation, svmData->cpuAllocation); uint64_t srcOffset = 2; uint64_t dstOffset = 3; Vec3 copySizes[2] = {{1, 1, 1}, {1, 2, 1}}; for (auto ©Size : copySizes) { { // from hostPtr BuiltinOpParams builtinOpParams = {}; builtinOpParams.dstSvmAlloc = svmData->cpuAllocation; builtinOpParams.srcSvmAlloc = gpuAllocation; builtinOpParams.srcPtr = reinterpret_cast(svmData->cpuAllocation->getGpuAddress() + srcOffset); builtinOpParams.dstPtr = reinterpret_cast(svmData->cpuAllocation->getGpuAddress() + dstOffset); builtinOpParams.size = copySize; auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::HostPtrToBuffer, csr, builtinOpParams); EXPECT_EQ(gpuAllocation, blitProperties.srcAllocation); EXPECT_EQ(svmData->cpuAllocation, blitProperties.dstAllocation); flushBcsTask(&csr, blitProperties, true, *pDevice); HardwareParse hwParser; hwParser.parseCommands(csr.commandStream, 0); auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), cmdIterator); auto bltCmd = genCmdCast(*cmdIterator); EXPECT_EQ(castToUint64(builtinOpParams.dstPtr), bltCmd->getDestinationBaseAddress()); EXPECT_EQ(castToUint64(builtinOpParams.srcPtr), bltCmd->getSourceBaseAddress()); } { // to hostPtr BuiltinOpParams builtinOpParams = {}; builtinOpParams.srcSvmAlloc = gpuAllocation; builtinOpParams.dstSvmAlloc = svmData->cpuAllocation; builtinOpParams.dstPtr = reinterpret_cast(svmData->cpuAllocation + dstOffset); builtinOpParams.srcPtr = reinterpret_cast(gpuAllocation + srcOffset); builtinOpParams.size = copySize; auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::BufferToHostPtr, csr, builtinOpParams); auto offset = csr.commandStream.getUsed(); flushBcsTask(&csr, blitProperties, true, *pDevice); HardwareParse hwParser; hwParser.parseCommands(csr.commandStream, offset); auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), cmdIterator); auto bltCmd = genCmdCast(*cmdIterator); EXPECT_EQ(castToUint64(builtinOpParams.dstPtr), bltCmd->getDestinationBaseAddress()); EXPECT_EQ(castToUint64(builtinOpParams.srcPtr), bltCmd->getSourceBaseAddress()); } } svmAllocsManager.freeSVMAlloc(svmAlloc); } HWTEST_F(BcsTests, givenBufferWithOffsetWhenBlitOperationCalledThenProgramCorrectGpuAddresses) { auto &csr = pDevice->getUltCommandStreamReceiver(); cl_int retVal = CL_SUCCESS; auto buffer1 = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); auto buffer2 = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); constexpr size_t hostAllocationSize = MemoryConstants::pageSize; auto hostAllocationPtr = allocateAlignedMemory(hostAllocationSize, MemoryConstants::pageSize); void *hostPtr = reinterpret_cast(hostAllocationPtr.get()); auto graphicsAllocation1 = buffer1->getGraphicsAllocation(pDevice->getRootDeviceIndex()); auto graphicsAllocation2 = buffer2->getGraphicsAllocation(pDevice->getRootDeviceIndex()); size_t addressOffsets[] = {0, 1, 1234}; Vec3 copySizes[2] = {{1, 1, 1}, {1, 2, 1}}; for (auto ©Size : copySizes) { for (auto buffer1Offset : addressOffsets) { { // from hostPtr HardwareParse hwParser; auto offset = csr.commandStream.getUsed(); auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::HostPtrToBuffer, csr, graphicsAllocation1, nullptr, hostPtr, graphicsAllocation1->getGpuAddress(), 0, 0, {buffer1Offset, 0, 0}, copySize, 0, 0, 0, 0); flushBcsTask(&csr, blitProperties, true, *pDevice); hwParser.parseCommands(csr.commandStream, offset); auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), cmdIterator); auto bltCmd = genCmdCast(*cmdIterator); EXPECT_NE(nullptr, bltCmd); if (pDevice->isFullRangeSvm()) { EXPECT_EQ(reinterpret_cast(hostPtr), bltCmd->getSourceBaseAddress()); } EXPECT_EQ(ptrOffset(graphicsAllocation1->getGpuAddress(), buffer1Offset), bltCmd->getDestinationBaseAddress()); } { // to hostPtr HardwareParse hwParser; auto offset = csr.commandStream.getUsed(); auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::BufferToHostPtr, csr, graphicsAllocation1, nullptr, hostPtr, graphicsAllocation1->getGpuAddress(), 0, 0, {buffer1Offset, 0, 0}, copySize, 0, 0, 0, 0); flushBcsTask(&csr, blitProperties, true, *pDevice); hwParser.parseCommands(csr.commandStream, offset); auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), cmdIterator); auto bltCmd = genCmdCast(*cmdIterator); EXPECT_NE(nullptr, bltCmd); if (pDevice->isFullRangeSvm()) { EXPECT_EQ(reinterpret_cast(hostPtr), bltCmd->getDestinationBaseAddress()); } EXPECT_EQ(ptrOffset(graphicsAllocation1->getGpuAddress(), buffer1Offset), bltCmd->getSourceBaseAddress()); } for (auto buffer2Offset : addressOffsets) { // Buffer to Buffer HardwareParse hwParser; auto offset = csr.commandStream.getUsed(); auto blitProperties = BlitProperties::constructPropertiesForCopy(graphicsAllocation1, graphicsAllocation2, {buffer1Offset, 0, 0}, {buffer2Offset, 0, 0}, copySize, 0, 0, 0, 0, csr.getClearColorAllocation()); flushBcsTask(&csr, blitProperties, true, *pDevice); hwParser.parseCommands(csr.commandStream, offset); auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), cmdIterator); auto bltCmd = genCmdCast(*cmdIterator); EXPECT_NE(nullptr, bltCmd); EXPECT_EQ(ptrOffset(graphicsAllocation1->getGpuAddress(), buffer1Offset), bltCmd->getDestinationBaseAddress()); EXPECT_EQ(ptrOffset(graphicsAllocation2->getGpuAddress(), buffer2Offset), bltCmd->getSourceBaseAddress()); } } } } HWTEST_F(BcsTests, givenBufferWithBigSizesWhenBlitOperationCalledThenProgramCorrectGpuAddresses) { auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment(); auto maxWidthToCopy = static_cast(BlitCommandsHelper::getMaxBlitWidth(rootDeviceEnvironment)); auto maxHeightToCopy = static_cast(BlitCommandsHelper::getMaxBlitHeight(rootDeviceEnvironment)); auto &csr = pDevice->getUltCommandStreamReceiver(); cl_int retVal = CL_SUCCESS; auto buffer1 = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); auto buffer2 = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); constexpr size_t hostAllocationSize = MemoryConstants::pageSize; auto hostAllocationPtr = allocateAlignedMemory(hostAllocationSize, MemoryConstants::pageSize); void *hostPtr = reinterpret_cast(hostAllocationPtr.get()); auto graphicsAllocation = buffer1->getGraphicsAllocation(pDevice->getRootDeviceIndex()); size_t srcOrigin[] = {1, 2, 0}; size_t dstOrigin[] = {4, 3, 1}; size_t region[] = {maxWidthToCopy + 16, maxHeightToCopy + 16, 2}; size_t srcRowPitch = region[0] + 34; size_t srcSlicePitch = srcRowPitch * region[1] + 36; size_t dstRowPitch = region[0] + 40; size_t dstSlicePitch = dstRowPitch * region[1] + 44; auto srcAddressOffset = srcOrigin[0] + (srcOrigin[1] * srcRowPitch) + (srcOrigin[2] * srcSlicePitch); auto dstAddressOffset = dstOrigin[0] + (dstOrigin[1] * dstRowPitch) + (dstOrigin[2] * dstSlicePitch); EXPECT_TRUE(BlitCommandsHelper::isCopyRegionPreferred(region, rootDeviceEnvironment)); auto memoryManager = static_cast(pDevice->getMemoryManager()); memoryManager->returnFakeAllocation = true; // from hostPtr HardwareParse hwParser; auto offset = csr.commandStream.getUsed(); auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::HostPtrToBuffer, csr, graphicsAllocation, nullptr, hostPtr, graphicsAllocation->getGpuAddress(), 0, srcOrigin, dstOrigin, region, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch); memoryManager->returnFakeAllocation = false; flushBcsTask(&csr, blitProperties, true, *pDevice); hwParser.parseCommands(csr.commandStream, offset); //1st rectangle xCopy = maxWidthToCopy, yCopy = maxHeightToCopy, zCopy = 1 auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), cmdIterator); auto bltCmd = genCmdCast(*cmdIterator); EXPECT_NE(nullptr, bltCmd); if (pDevice->isFullRangeSvm()) { EXPECT_EQ(ptrOffset(reinterpret_cast(hostPtr), srcAddressOffset), bltCmd->getSourceBaseAddress()); } EXPECT_EQ(ptrOffset(graphicsAllocation->getGpuAddress(), dstAddressOffset), bltCmd->getDestinationBaseAddress()); srcAddressOffset += maxWidthToCopy; dstAddressOffset += maxWidthToCopy; // 2nd rectangle xCopy = (region[0] - maxWidthToCopy), yCopy = (region[0] - maxHeightToCopy), zCopy = 1 cmdIterator = find(++cmdIterator, hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), cmdIterator); bltCmd = genCmdCast(*cmdIterator); EXPECT_NE(nullptr, bltCmd); if (pDevice->isFullRangeSvm()) { EXPECT_EQ(ptrOffset(reinterpret_cast(hostPtr), srcAddressOffset), bltCmd->getSourceBaseAddress()); } EXPECT_EQ(ptrOffset(graphicsAllocation->getGpuAddress(), dstAddressOffset), bltCmd->getDestinationBaseAddress()); srcAddressOffset += (region[0] - maxWidthToCopy); srcAddressOffset += (srcRowPitch - region[0]); srcAddressOffset += (srcRowPitch * (maxHeightToCopy - 1)); dstAddressOffset += (region[0] - maxWidthToCopy); dstAddressOffset += (dstRowPitch - region[0]); dstAddressOffset += (dstRowPitch * (maxHeightToCopy - 1)); // 3rd rectangle xCopy = maxWidthToCopy, yCopy = maxHeightToCopy, zCopy = 1 cmdIterator = find(++cmdIterator, hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), cmdIterator); bltCmd = genCmdCast(*cmdIterator); EXPECT_NE(nullptr, bltCmd); if (pDevice->isFullRangeSvm()) { EXPECT_EQ(ptrOffset(reinterpret_cast(hostPtr), srcAddressOffset), bltCmd->getSourceBaseAddress()); } EXPECT_EQ(ptrOffset(graphicsAllocation->getGpuAddress(), dstAddressOffset), bltCmd->getDestinationBaseAddress()); srcAddressOffset += maxWidthToCopy; dstAddressOffset += maxWidthToCopy; //4th rectangle xCopy = (region[0] - maxWidthToCopy), yCopy = (region[0] - maxHeightToCopy), zCopy = 1 cmdIterator = find(++cmdIterator, hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), cmdIterator); bltCmd = genCmdCast(*cmdIterator); EXPECT_NE(nullptr, bltCmd); if (pDevice->isFullRangeSvm()) { EXPECT_EQ(ptrOffset(reinterpret_cast(hostPtr), srcAddressOffset), bltCmd->getSourceBaseAddress()); } EXPECT_EQ(ptrOffset(graphicsAllocation->getGpuAddress(), dstAddressOffset), bltCmd->getDestinationBaseAddress()); srcAddressOffset += (region[0] - maxWidthToCopy); srcAddressOffset += (srcRowPitch - region[0]); srcAddressOffset += (srcRowPitch * (region[1] - maxHeightToCopy - 1)); srcAddressOffset += (srcSlicePitch - (srcRowPitch * region[1])); dstAddressOffset += (region[0] - maxWidthToCopy); dstAddressOffset += (dstRowPitch - region[0]); dstAddressOffset += (dstRowPitch * (region[1] - maxHeightToCopy - 1)); dstAddressOffset += (dstSlicePitch - (dstRowPitch * region[1])); //5th rectangle xCopy = maxWidthToCopy, yCopy = maxHeightToCopy, zCopy = 1 cmdIterator = find(++cmdIterator, hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), cmdIterator); bltCmd = genCmdCast(*cmdIterator); EXPECT_NE(nullptr, bltCmd); if (pDevice->isFullRangeSvm()) { EXPECT_EQ(ptrOffset(reinterpret_cast(hostPtr), srcAddressOffset), bltCmd->getSourceBaseAddress()); } EXPECT_EQ(ptrOffset(graphicsAllocation->getGpuAddress(), dstAddressOffset), bltCmd->getDestinationBaseAddress()); } HWTEST_F(BcsTests, givenAuxTranslationRequestWhenBlitCalledThenProgramCommandCorrectly) { auto &csr = pDevice->getUltCommandStreamReceiver(); cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 123, nullptr, retVal)); auto graphicsAllocation = buffer->getGraphicsAllocation(pDevice->getRootDeviceIndex()); auto allocationGpuAddress = graphicsAllocation->getGpuAddress(); auto allocationSize = graphicsAllocation->getUnderlyingBufferSize(); AuxTranslationDirection translationDirection[] = {AuxTranslationDirection::AuxToNonAux, AuxTranslationDirection::NonAuxToAux}; for (int i = 0; i < 2; i++) { auto blitProperties = BlitProperties::constructPropertiesForAuxTranslation(translationDirection[i], graphicsAllocation, csr.getClearColorAllocation()); auto offset = csr.commandStream.getUsed(); flushBcsTask(&csr, blitProperties, false, *pDevice); HardwareParse hwParser; hwParser.parseCommands(csr.commandStream, offset); uint32_t xyCopyBltCmdFound = 0; for (auto &cmd : hwParser.cmdList) { if (auto bltCmd = genCmdCast(cmd)) { xyCopyBltCmdFound++; EXPECT_EQ(static_cast(allocationSize), bltCmd->getDestinationX2CoordinateRight()); EXPECT_EQ(1u, bltCmd->getDestinationY2CoordinateBottom()); EXPECT_EQ(allocationGpuAddress, bltCmd->getDestinationBaseAddress()); EXPECT_EQ(allocationGpuAddress, bltCmd->getSourceBaseAddress()); } } EXPECT_EQ(1u, xyCopyBltCmdFound); } } HWTEST_F(BcsTests, givenInvalidBlitDirectionWhenConstructPropertiesThenExceptionIsThrow) { auto &csr = pDevice->getUltCommandStreamReceiver(); EXPECT_THROW(ClBlitProperties::constructProperties(static_cast(7), csr, {}), std::exception); } HWTEST_F(BcsTests, givenBlitterDirectSubmissionEnabledWhenProgrammingBlitterThenExpectRingBufferDispatched) { using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; VariableBackup backup(&ultHwConfig); ultHwConfig.csrBaseCallBlitterDirectSubmissionAvailable = true; auto &csr = pDevice->getUltCommandStreamReceiver(); using DirectSubmission = MockDirectSubmissionHw>; csr.blitterDirectSubmission = std::make_unique(*pDevice, *csr.osContext); csr.recordFlusheBatchBuffer = true; DirectSubmission *directSubmission = reinterpret_cast(csr.blitterDirectSubmission.get()); bool initRet = directSubmission->initialize(true, false); EXPECT_TRUE(initRet); cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); void *hostPtr = reinterpret_cast(0x12340000); size_t numberNodesPerContainer = 5; auto graphicsAllocation = buffer->getGraphicsAllocation(pDevice->getRootDeviceIndex()); auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::HostPtrToBuffer, csr, graphicsAllocation, nullptr, hostPtr, graphicsAllocation->getGpuAddress(), 0, 0, 0, {1, 1, 1}, 0, 0, 0, 0); MockTimestampPacketContainer timestamp0(*csr.getTimestampPacketAllocator(), numberNodesPerContainer); MockTimestampPacketContainer timestamp1(*csr.getTimestampPacketAllocator(), numberNodesPerContainer); blitProperties.csrDependencies.timestampPacketContainer.push_back(×tamp0); blitProperties.csrDependencies.timestampPacketContainer.push_back(×tamp1); flushBcsTask(&csr, blitProperties, true, *pDevice); HardwareParse hwParser; hwParser.parseCommands(csr.commandStream, 0u); ASSERT_NE(nullptr, csr.latestFlushedBatchBuffer.endCmdPtr); MI_BATCH_BUFFER_START *bbStart = hwParser.getCommand(); ASSERT_NE(nullptr, bbStart); EXPECT_EQ(csr.latestFlushedBatchBuffer.endCmdPtr, bbStart); EXPECT_NE(0ull, bbStart->getBatchBufferStartAddress()); } HWTEST_F(BcsTests, givenBlitterDirectSubmissionEnabledWhenFlushTagUpdateThenBatchBufferStartIsProgrammed) { using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; VariableBackup backup(&ultHwConfig); ultHwConfig.csrBaseCallBlitterDirectSubmissionAvailable = true; auto &csr = pDevice->getUltCommandStreamReceiver(); using DirectSubmission = MockDirectSubmissionHw>; csr.blitterDirectSubmission = std::make_unique(*pDevice, *csr.osContext); csr.recordFlusheBatchBuffer = true; DirectSubmission *directSubmission = reinterpret_cast(csr.blitterDirectSubmission.get()); bool initRet = directSubmission->initialize(true, false); EXPECT_TRUE(initRet); csr.flushTagUpdate(); HardwareParse hwParser; hwParser.parseCommands(csr.commandStream, 0u); MI_BATCH_BUFFER_START *bbStart = hwParser.getCommand(); ASSERT_NE(nullptr, bbStart); } struct BcsTestsImages : public BcsTests { size_t rowPitch = 0; size_t slicePitch = 0; }; HWTEST_F(BcsTestsImages, givenImage1DWhenAdjustBlitPropertiesForImageIsCalledThenValuesAreSetCorrectly) { cl_image_desc imgDesc = Image1dDefaults::imageDesc; imgDesc.image_width = 10u; imgDesc.image_height = 0u; imgDesc.image_depth = 0u; std::unique_ptr image(Image1dHelper<>::create(context.get(), &imgDesc)); size_t expectedBytesPerPixel = image->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes; size_t expectedRowPitch = image->getImageDesc().image_row_pitch; size_t expectedSlicePitch = image->getImageDesc().image_slice_pitch; BlitProperties blitProperties{}; blitProperties.dstGpuAddress = image->getGraphicsAllocation(0)->getGpuAddress(); ClBlitProperties::adjustBlitPropertiesForImage(image.get(), blitProperties, rowPitch, slicePitch, false); EXPECT_EQ(imgDesc.image_width, blitProperties.dstSize.x); EXPECT_EQ(1u, blitProperties.dstSize.y); EXPECT_EQ(1u, blitProperties.dstSize.z); EXPECT_EQ(expectedBytesPerPixel, blitProperties.bytesPerPixel); EXPECT_EQ(expectedRowPitch, rowPitch); EXPECT_EQ(expectedSlicePitch, slicePitch); } HWTEST_F(BcsTestsImages, givenImage1DBufferWhenAdjustBlitPropertiesForImageIsCalledThenValuesAreSetCorrectly) { using BlitterConstants::BlitDirection; std::array, 3> testParams = {{{BlitDirection::HostPtrToImage, BlitDirection::HostPtrToBuffer}, {BlitDirection::ImageToHostPtr, BlitDirection::BufferToHostPtr}, {BlitDirection::ImageToImage, BlitDirection::BufferToBuffer}}}; cl_image_desc imgDesc = Image1dBufferDefaults::imageDesc; imgDesc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; cl_image_format imgFormat{}; imgFormat.image_channel_order = CL_RGBA; imgFormat.image_channel_data_type = CL_UNSIGNED_INT8; std::unique_ptr image(Image1dHelper<>::create(context.get(), &imgDesc, &imgFormat)); size_t originalBytesPerPixel = image->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes; size_t expectedBytesPerPixel = 1; BlitProperties blitProperties{}; blitProperties.srcGpuAddress = image->getGraphicsAllocation(0)->getGpuAddress(); for (auto &[blitDirection, expectedBlitDirection] : testParams) { blitProperties.blitDirection = blitDirection; blitProperties.copySize = {1, 1, 1}; blitProperties.srcSize = {imgDesc.image_width, imgDesc.image_height, imgDesc.image_depth}; ClBlitProperties::adjustBlitPropertiesForImage(image.get(), blitProperties, rowPitch, slicePitch, true); EXPECT_EQ(expectedBlitDirection, blitProperties.blitDirection); EXPECT_EQ(expectedBytesPerPixel, blitProperties.bytesPerPixel); EXPECT_EQ(imgDesc.image_width, blitProperties.srcSize.x / originalBytesPerPixel); EXPECT_EQ(imgDesc.image_height, blitProperties.srcSize.y); EXPECT_EQ(imgDesc.image_depth, blitProperties.srcSize.z); EXPECT_EQ(1u, blitProperties.copySize.x / originalBytesPerPixel); EXPECT_EQ(1u, blitProperties.copySize.y); EXPECT_EQ(1u, blitProperties.copySize.z); } } HWTEST_F(BcsTestsImages, givenImage2DArrayWhenAdjustBlitPropertiesForImageIsCalledThenValuesAreSetCorrectly) { cl_image_desc imgDesc = Image1dDefaults::imageDesc; imgDesc.image_width = 10u; imgDesc.image_height = 3u; imgDesc.image_depth = 0u; imgDesc.image_array_size = 4u; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY; std::unique_ptr image(Image2dArrayHelper<>::create(context.get(), &imgDesc)); size_t expectedBytesPerPixel = image->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes; size_t expectedRowPitch = image->getImageDesc().image_row_pitch; size_t expectedSlicePitch = image->getImageDesc().image_slice_pitch; BlitProperties blitProperties{}; blitProperties.dstGpuAddress = image->getGraphicsAllocation(0)->getGpuAddress(); ClBlitProperties::adjustBlitPropertiesForImage(image.get(), blitProperties, rowPitch, slicePitch, false); EXPECT_EQ(imgDesc.image_width, blitProperties.dstSize.x); EXPECT_EQ(imgDesc.image_height, blitProperties.dstSize.y); EXPECT_EQ(imgDesc.image_array_size, blitProperties.dstSize.z); EXPECT_EQ(expectedBytesPerPixel, blitProperties.bytesPerPixel); EXPECT_EQ(expectedRowPitch, rowPitch); EXPECT_EQ(expectedSlicePitch, slicePitch); } HWTEST_F(BcsTestsImages, givenImageWithSurfaceOffsetWhenAdjustBlitPropertiesForImageIsCalledThenGpuAddressIsCorrect) { cl_image_desc imgDesc = Image1dDefaults::imageDesc; std::unique_ptr image(Image2dArrayHelper<>::create(context.get(), &imgDesc)); uint64_t surfaceOffset = 0x01000; image->setSurfaceOffsets(surfaceOffset, 0, 0, 0); BlitProperties blitProperties{}; blitProperties.dstGpuAddress = image->getGraphicsAllocation(0)->getGpuAddress(); uint64_t expectedGpuAddress = blitProperties.dstGpuAddress + surfaceOffset; ClBlitProperties::adjustBlitPropertiesForImage(image.get(), blitProperties, rowPitch, slicePitch, false); EXPECT_EQ(blitProperties.dstGpuAddress, expectedGpuAddress); } HWTEST_F(BcsTests, givenHostPtrToImageWhenConstructPropertiesIsCalledThenValuesAreSetCorrectly) { constexpr size_t hostAllocationSize = MemoryConstants::pageSize; auto hostAllocationPtr = allocateAlignedMemory(hostAllocationSize, MemoryConstants::pageSize); void *hostPtr = reinterpret_cast(hostAllocationPtr.get()); cl_image_desc imgDesc = Image2dDefaults::imageDesc; imgDesc.image_width = 10u; imgDesc.image_height = 12u; std::unique_ptr image(Image2dHelper<>::create(context.get(), &imgDesc)); BuiltinOpParams builtinOpParams{}; builtinOpParams.srcPtr = hostPtr; builtinOpParams.srcMemObj = nullptr; builtinOpParams.dstMemObj = image.get(); builtinOpParams.size = {2, 3, 1}; auto &csr = pDevice->getUltCommandStreamReceiver(); auto expectedDstPtr = image.get()->getGraphicsAllocation(csr.getRootDeviceIndex())->getGpuAddress(); auto expectedBytesPerPixel = image.get()->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes; auto srcRowPitchExpected = expectedBytesPerPixel * builtinOpParams.size.x; auto dstRowPitchExpected = image.get()->getImageDesc().image_row_pitch; auto srcSlicePitchExpected = srcRowPitchExpected * builtinOpParams.size.y; auto dstSlicePitchExpected = image.get()->getImageDesc().image_slice_pitch; auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::HostPtrToImage, csr, builtinOpParams); EXPECT_EQ(builtinOpParams.size, blitProperties.copySize); EXPECT_EQ(expectedDstPtr, blitProperties.dstGpuAddress); EXPECT_EQ(builtinOpParams.srcOffset, blitProperties.srcOffset); EXPECT_EQ(builtinOpParams.dstOffset, blitProperties.dstOffset); EXPECT_EQ(expectedBytesPerPixel, blitProperties.bytesPerPixel); EXPECT_EQ(srcRowPitchExpected, blitProperties.srcRowPitch); EXPECT_EQ(dstRowPitchExpected, blitProperties.dstRowPitch); EXPECT_EQ(srcSlicePitchExpected, blitProperties.srcSlicePitch); EXPECT_EQ(dstSlicePitchExpected, blitProperties.dstSlicePitch); } HWTEST_F(BcsTests, givenImageToHostPtrWhenConstructPropertiesIsCalledThenValuesAreSetCorrectly) { constexpr size_t hostAllocationSize = MemoryConstants::pageSize; auto hostAllocationPtr = allocateAlignedMemory(hostAllocationSize, MemoryConstants::pageSize); void *hostPtr = reinterpret_cast(hostAllocationPtr.get()); cl_image_desc imgDesc = Image2dDefaults::imageDesc; imgDesc.image_width = 10u; imgDesc.image_height = 12u; std::unique_ptr image(Image2dHelper<>::create(context.get(), &imgDesc)); BuiltinOpParams builtinOpParams{}; builtinOpParams.dstPtr = hostPtr; builtinOpParams.srcMemObj = image.get(); builtinOpParams.dstMemObj = nullptr; builtinOpParams.size = {2, 3, 1}; auto &csr = pDevice->getUltCommandStreamReceiver(); auto expectedSrcPtr = image.get()->getGraphicsAllocation(csr.getRootDeviceIndex())->getGpuAddress(); auto expectedBytesPerPixel = image.get()->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes; auto srcRowPitchExpected = image.get()->getImageDesc().image_row_pitch; auto dstRowPitchExpected = expectedBytesPerPixel * builtinOpParams.size.x; auto srcSlicePitchExpected = image.get()->getImageDesc().image_slice_pitch; auto dstSlicePitchExpected = dstRowPitchExpected * builtinOpParams.size.y; auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::ImageToHostPtr, csr, builtinOpParams); EXPECT_EQ(builtinOpParams.size, blitProperties.copySize); EXPECT_EQ(expectedSrcPtr, blitProperties.srcGpuAddress); EXPECT_EQ(builtinOpParams.srcOffset, blitProperties.srcOffset); EXPECT_EQ(builtinOpParams.dstOffset, blitProperties.dstOffset); EXPECT_EQ(expectedBytesPerPixel, blitProperties.bytesPerPixel); EXPECT_EQ(srcRowPitchExpected, blitProperties.srcRowPitch); EXPECT_EQ(dstRowPitchExpected, blitProperties.dstRowPitch); EXPECT_EQ(srcSlicePitchExpected, blitProperties.srcSlicePitch); EXPECT_EQ(dstSlicePitchExpected, blitProperties.dstSlicePitch); } HWTEST_F(BcsTests, givenHostPtrToImageWithInputRowSlicePitchesWhenConstructPropertiesIsCalledThenValuesAreSetCorrectly) { constexpr size_t hostAllocationSize = MemoryConstants::pageSize; auto hostAllocationPtr = allocateAlignedMemory(hostAllocationSize, MemoryConstants::pageSize); void *hostPtr = reinterpret_cast(hostAllocationPtr.get()); cl_image_desc imgDesc = Image2dDefaults::imageDesc; std::unique_ptr image(Image2dHelper<>::create(context.get(), &imgDesc)); BuiltinOpParams builtinOpParams{}; builtinOpParams.srcPtr = hostPtr; builtinOpParams.srcMemObj = nullptr; builtinOpParams.dstMemObj = image.get(); builtinOpParams.size = {2, 3, 1}; auto inputRowPitch = 0x20u; auto inputSlicePitch = 0x400u; builtinOpParams.srcRowPitch = inputRowPitch; builtinOpParams.srcSlicePitch = inputSlicePitch; auto dstRowPitchExpected = image.get()->getImageDesc().image_row_pitch; auto dstSlicePitchExpected = image.get()->getImageDesc().image_slice_pitch; auto &csr = pDevice->getUltCommandStreamReceiver(); auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::HostPtrToImage, csr, builtinOpParams); EXPECT_EQ(inputRowPitch, blitProperties.srcRowPitch); EXPECT_EQ(dstRowPitchExpected, blitProperties.dstRowPitch); EXPECT_EQ(inputSlicePitch, blitProperties.srcSlicePitch); EXPECT_EQ(dstSlicePitchExpected, blitProperties.dstSlicePitch); } HWTEST_F(BcsTests, givenImageToHostPtrWithInputRowSlicePitchesWhenConstructPropertiesIsCalledThenValuesAreSetCorrectly) { constexpr size_t hostAllocationSize = MemoryConstants::pageSize; auto hostAllocationPtr = allocateAlignedMemory(hostAllocationSize, MemoryConstants::pageSize); void *hostPtr = reinterpret_cast(hostAllocationPtr.get()); cl_image_desc imgDesc = Image2dDefaults::imageDesc; std::unique_ptr image(Image2dHelper<>::create(context.get(), &imgDesc)); BuiltinOpParams builtinOpParams{}; builtinOpParams.dstPtr = hostPtr; builtinOpParams.srcMemObj = image.get(); builtinOpParams.dstMemObj = nullptr; builtinOpParams.size = {2, 3, 1}; auto inputRowPitch = 0x20u; auto inputSlicePitch = 0x400u; builtinOpParams.dstRowPitch = inputRowPitch; builtinOpParams.dstSlicePitch = inputSlicePitch; auto srcRowPitchExpected = image.get()->getImageDesc().image_row_pitch; auto srcSlicePitchExpected = image.get()->getImageDesc().image_slice_pitch; auto &csr = pDevice->getUltCommandStreamReceiver(); auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::ImageToHostPtr, csr, builtinOpParams); EXPECT_EQ(srcRowPitchExpected, blitProperties.srcRowPitch); EXPECT_EQ(inputRowPitch, blitProperties.dstRowPitch); EXPECT_EQ(srcSlicePitchExpected, blitProperties.srcSlicePitch); EXPECT_EQ(inputSlicePitch, blitProperties.dstSlicePitch); } HWTEST_F(BcsTests, givenHostPtrToImageWhenBlitBufferIsCalledThenBlitCmdIsFound) { if (!pDevice->getHardwareInfo().capabilityTable.supportsImages) { GTEST_SKIP(); } constexpr size_t hostAllocationSize = MemoryConstants::pageSize; auto hostAllocationPtr = allocateAlignedMemory(hostAllocationSize, MemoryConstants::pageSize); void *hostPtr = reinterpret_cast(hostAllocationPtr.get()); std::unique_ptr image(Image2dHelper<>::create(context.get())); BuiltinOpParams builtinOpParams{}; builtinOpParams.srcPtr = hostPtr; builtinOpParams.dstMemObj = image.get(); builtinOpParams.size = {1, 1, 1}; auto &csr = pDevice->getUltCommandStreamReceiver(); auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::HostPtrToImage, csr, builtinOpParams); flushBcsTask(&csr, blitProperties, true, *pDevice); HardwareParse hwParser; hwParser.parseCommands(csr.commandStream, 0); auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); EXPECT_NE(hwParser.cmdList.end(), cmdIterator); } HWTEST_F(BcsTests, given3dImageWhenBlitBufferIsCalledThenBlitCmdIsFoundZtimes) { if (!pDevice->getHardwareInfo().capabilityTable.supportsImages) { GTEST_SKIP(); } constexpr size_t hostAllocationSize = MemoryConstants::pageSize; auto hostAllocationPtr = allocateAlignedMemory(hostAllocationSize, MemoryConstants::pageSize); void *hostPtr = reinterpret_cast(hostAllocationPtr.get()); std::unique_ptr image(Image3dHelper<>::create(context.get())); BuiltinOpParams builtinOpParams{}; builtinOpParams.srcPtr = hostPtr; builtinOpParams.dstMemObj = image.get(); builtinOpParams.size = {1, 1, 10}; auto &csr = pDevice->getUltCommandStreamReceiver(); auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::HostPtrToImage, csr, builtinOpParams); flushBcsTask(&csr, blitProperties, true, *pDevice); HardwareParse hwParser; hwParser.parseCommands(csr.commandStream, 0); uint32_t xyCopyBltCmdFound = 0; for (auto &cmd : hwParser.cmdList) { if (auto bltCmd = genCmdCast(cmd)) { ++xyCopyBltCmdFound; } } EXPECT_EQ(static_cast(builtinOpParams.size.z), xyCopyBltCmdFound); } HWTEST_F(BcsTests, givenImageToHostPtrWhenBlitBufferIsCalledThenBlitCmdIsFound) { if (!pDevice->getHardwareInfo().capabilityTable.supportsImages) { GTEST_SKIP(); } constexpr size_t hostAllocationSize = MemoryConstants::pageSize; auto hostAllocationPtr = allocateAlignedMemory(hostAllocationSize, MemoryConstants::pageSize); void *hostPtr = reinterpret_cast(hostAllocationPtr.get()); std::unique_ptr image(Image2dHelper<>::create(context.get())); BuiltinOpParams builtinOpParams{}; builtinOpParams.dstPtr = hostPtr; builtinOpParams.srcMemObj = image.get(); builtinOpParams.size = {1, 1, 1}; auto &csr = pDevice->getUltCommandStreamReceiver(); auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::ImageToHostPtr, csr, builtinOpParams); flushBcsTask(&csr, blitProperties, true, *pDevice); HardwareParse hwParser; hwParser.parseCommands(csr.commandStream, 0); auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); EXPECT_NE(hwParser.cmdList.end(), cmdIterator); } HWTEST_F(BcsTests, givenHostPtrToImageWhenBlitBufferIsCalledThenBlitCmdIsCorrectlyProgrammed) { if (!pDevice->getHardwareInfo().capabilityTable.supportsImages) { GTEST_SKIP(); } constexpr size_t hostAllocationSize = MemoryConstants::pageSize; auto hostAllocationPtr = allocateAlignedMemory(hostAllocationSize, MemoryConstants::pageSize); void *hostPtr = reinterpret_cast(hostAllocationPtr.get()); cl_image_desc imgDesc = Image2dDefaults::imageDesc; imgDesc.image_width = 10; imgDesc.image_height = 12; std::unique_ptr image(Image2dHelper<>::create(context.get(), &imgDesc)); BuiltinOpParams builtinOpParams{}; builtinOpParams.srcPtr = hostPtr; builtinOpParams.srcMemObj = nullptr; builtinOpParams.dstMemObj = image.get(); builtinOpParams.size = {6, 8, 1}; auto &csr = pDevice->getUltCommandStreamReceiver(); auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::HostPtrToImage, csr, builtinOpParams); flushBcsTask(&csr, blitProperties, true, *pDevice); HardwareParse hwParser; hwParser.parseCommands(csr.commandStream, 0); auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), cmdIterator); auto bltCmd = genCmdCast(*cmdIterator); auto dstPtr = builtinOpParams.dstMemObj->getGraphicsAllocation(csr.getRootDeviceIndex())->getGpuAddress(); EXPECT_EQ(blitProperties.srcGpuAddress, bltCmd->getSourceBaseAddress()); EXPECT_EQ(dstPtr, bltCmd->getDestinationBaseAddress()); } HWTEST_F(BcsTests, givenImageToHostPtrWhenBlitBufferIsCalledThenBlitCmdIsCorrectlyProgrammed) { if (!pDevice->getHardwareInfo().capabilityTable.supportsImages) { GTEST_SKIP(); } constexpr size_t hostAllocationSize = MemoryConstants::pageSize; auto hostAllocationPtr = allocateAlignedMemory(hostAllocationSize, MemoryConstants::pageSize); void *hostPtr = reinterpret_cast(hostAllocationPtr.get()); cl_image_desc imgDesc = Image2dDefaults::imageDesc; imgDesc.image_width = 10u; imgDesc.image_height = 12u; std::unique_ptr image(Image2dHelper<>::create(context.get(), &imgDesc)); BuiltinOpParams builtinOpParams{}; builtinOpParams.dstPtr = hostPtr; builtinOpParams.srcMemObj = image.get(); builtinOpParams.dstMemObj = nullptr; builtinOpParams.size = {2, 3, 1}; auto &csr = pDevice->getUltCommandStreamReceiver(); auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::ImageToHostPtr, csr, builtinOpParams); flushBcsTask(&csr, blitProperties, true, *pDevice); HardwareParse hwParser; hwParser.parseCommands(csr.commandStream, 0); auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), cmdIterator); auto bltCmd = genCmdCast(*cmdIterator); auto srcPtr = builtinOpParams.srcMemObj->getGraphicsAllocation(csr.getRootDeviceIndex())->getGpuAddress(); EXPECT_EQ(srcPtr, bltCmd->getSourceBaseAddress()); EXPECT_EQ(blitProperties.dstGpuAddress, bltCmd->getDestinationBaseAddress()); } HWTEST_F(BcsTests, givenImageToImageWhenBlitBufferIsCalledThenBlitCmdIsCorrectlyProgrammed) { if (!pDevice->getHardwareInfo().capabilityTable.supportsImages) { GTEST_SKIP(); } cl_image_desc imgDesc = Image2dDefaults::imageDesc; imgDesc.image_width = 10u; imgDesc.image_height = 12u; std::unique_ptr srcImage(Image2dHelper<>::create(context.get(), &imgDesc)); std::unique_ptr dstImage(Image2dHelper<>::create(context.get(), &imgDesc)); BuiltinOpParams builtinOpParams{}; builtinOpParams.srcMemObj = srcImage.get(); builtinOpParams.dstMemObj = dstImage.get(); builtinOpParams.size = {2, 3, 1}; auto &csr = pDevice->getUltCommandStreamReceiver(); auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::ImageToImage, csr, builtinOpParams); flushBcsTask(&csr, blitProperties, true, *pDevice); HardwareParse hwParser; hwParser.parseCommands(csr.commandStream, 0); auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), cmdIterator); auto bltCmd = genCmdCast(*cmdIterator); EXPECT_EQ(blitProperties.srcGpuAddress, bltCmd->getSourceBaseAddress()); EXPECT_EQ(blitProperties.dstGpuAddress, bltCmd->getDestinationBaseAddress()); } HWTEST_F(BcsTests, givenBlitBufferCalledWhenClearColorAllocationIseSetThenItIsMadeResident) { MockGraphicsAllocation graphicsAllocation1; MockGraphicsAllocation graphicsAllocation2; MockGraphicsAllocation clearColorAllocation; auto &csr = pDevice->getUltCommandStreamReceiver(); csr.storeMakeResidentAllocations = true; Vec3 copySize = {1, 1, 1}; auto blitProperties = BlitProperties::constructPropertiesForCopy(&graphicsAllocation1, &graphicsAllocation2, 0, 0, copySize, 0, 0, 0, 0, &clearColorAllocation); flushBcsTask(&csr, blitProperties, false, *pDevice); auto iter = csr.makeResidentAllocations.find(&clearColorAllocation); ASSERT_NE(iter, csr.makeResidentAllocations.end()); EXPECT_EQ(&clearColorAllocation, iter->first); EXPECT_EQ(1u, iter->second); } struct MockScratchSpaceController : ScratchSpaceControllerBase { using ScratchSpaceControllerBase::privateScratchAllocation; using ScratchSpaceControllerBase::ScratchSpaceControllerBase; }; using ScratchSpaceControllerTest = Test; TEST_F(ScratchSpaceControllerTest, whenScratchSpaceControllerIsDestroyedThenItReleasePrivateScratchSpaceAllocation) { MockScratchSpaceController scratchSpaceController(pDevice->getRootDeviceIndex(), *pDevice->getExecutionEnvironment(), *pDevice->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); scratchSpaceController.privateScratchAllocation = pDevice->getExecutionEnvironment()->memoryManager->allocateGraphicsMemoryInPreferredPool(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}, nullptr); EXPECT_NE(nullptr, scratchSpaceController.privateScratchAllocation); //no memory leak is expected } TEST(BcsConstantsTests, givenBlitConstantsThenTheyHaveDesiredValues) { EXPECT_EQ(BlitterConstants::maxBlitWidth, 0x4000u); EXPECT_EQ(BlitterConstants::maxBlitHeight, 0x4000u); EXPECT_EQ(BlitterConstants::maxBlitSetWidth, 0x1FF80u); EXPECT_EQ(BlitterConstants::maxBlitSetHeight, 0x1FFC0u); } command_stream_receiver_hw_fixture.h000066400000000000000000000017671422164147700340460ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/command_stream/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" using namespace NEO; struct BcsTests : public Test { void SetUp() override { Test::SetUp(); context = std::make_unique(pClDevice); } void TearDown() override { context.reset(); Test::TearDown(); } uint32_t flushBcsTask(CommandStreamReceiver *bcsCsr, const BlitProperties &blitProperties, bool blocking, Device &device) { BlitPropertiesContainer container; container.push_back(blitProperties); return bcsCsr->flushBcsTask(container, blocking, false, device); } TimestampPacketContainer timestampPacketContainer; CsrDependencies csrDependencies; std::unique_ptr context; }; command_stream_receiver_hw_tests.inl000066400000000000000000000105601422164147700340440ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/command_stream/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ using namespace NEO; template struct CommandStreamReceiverHwTest : public ClDeviceFixture, public HardwareParse, public ::testing::Test { void SetUp() override { ClDeviceFixture::SetUp(); HardwareParse::SetUp(); } void TearDown() override { HardwareParse::TearDown(); ClDeviceFixture::TearDown(); } void givenKernelWithSlmWhenPreviousNOSLML3WasSentThenProgramL3WithSLML3ConfigImpl(); void givenBlockedKernelWithSlmWhenPreviousNOSLML3WasSentThenProgramL3WithSLML3ConfigAfterUnblockingImpl(); }; template void CommandStreamReceiverHwTest::givenKernelWithSlmWhenPreviousNOSLML3WasSentThenProgramL3WithSLML3ConfigImpl() { typedef typename GfxFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; typedef typename GfxFamily::PIPE_CONTROL PIPE_CONTROL; size_t GWS = 1; MockContext ctx(pClDevice); MockKernelWithInternals kernel(*pClDevice); CommandQueueHw commandQueue(&ctx, pClDevice, 0, false); auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(commandStreamReceiver); auto &commandStreamCSR = commandStreamReceiver->getCS(); // Mark Preamble as sent, override L3Config to invalid to programL3 commandStreamReceiver->isPreambleSent = true; commandStreamReceiver->lastSentL3Config = 0; static_cast(kernel)->setTotalSLMSize(1024); cmdList.clear(); commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr); // Parse command list to verify that PC was added to taskCS parseCommands(commandStreamCSR, 0); auto itorCmd = findMmio(cmdList.begin(), cmdList.end(), L3CNTLRegisterOffset::registerOffset); ASSERT_NE(cmdList.end(), itorCmd); auto cmdMILoad = genCmdCast(*itorCmd); ASSERT_NE(nullptr, cmdMILoad); // MI_LOAD_REGISTER should be preceded by PC EXPECT_NE(cmdList.begin(), itorCmd); --itorCmd; auto cmdPC = genCmdCast(*itorCmd); ASSERT_NE(nullptr, cmdPC); uint32_t L3Config = PreambleHelper::getL3Config(*defaultHwInfo, true); EXPECT_EQ(L3Config, static_cast(cmdMILoad->getDataDword())); } template void CommandStreamReceiverHwTest::givenBlockedKernelWithSlmWhenPreviousNOSLML3WasSentThenProgramL3WithSLML3ConfigAfterUnblockingImpl() { typedef typename GfxFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; size_t GWS = 1; MockContext ctx(pClDevice); MockKernelWithInternals kernel(*pClDevice); CommandQueueHw commandQueue(&ctx, pClDevice, 0, false); auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(commandStreamReceiver); cl_event blockingEvent; MockEvent mockEvent(&ctx); blockingEvent = &mockEvent; auto &commandStreamCSR = commandStreamReceiver->getCS(); uint32_t L3Config = PreambleHelper::getL3Config(*defaultHwInfo, false); // Mark Pramble as sent, override L3Config to SLM config commandStreamReceiver->isPreambleSent = true; commandStreamReceiver->lastSentL3Config = 0; static_cast(kernel)->setTotalSLMSize(1024); commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 1, &blockingEvent, nullptr); // Expect nothing was sent EXPECT_EQ(0u, commandStreamCSR.getUsed()); // Unblock Event mockEvent.setStatus(CL_COMPLETE); cmdList.clear(); // Parse command list parseCommands(commandStreamCSR, 0); // Expect L3 was programmed auto itorCmd = findMmio(cmdList.begin(), cmdList.end(), L3CNTLRegisterOffset::registerOffset); ASSERT_NE(cmdList.end(), itorCmd); auto cmdMILoad = genCmdCast(*itorCmd); ASSERT_NE(nullptr, cmdMILoad); L3Config = PreambleHelper::getL3Config(*defaultHwInfo, true); EXPECT_EQ(L3Config, static_cast(cmdMILoad->getDataDword())); } command_stream_receiver_hw_tests_dg2_and_later.cpp000066400000000000000000000227101422164147700366110ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/command_stream/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/ray_tracing_helper.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/engine_descriptor_helper.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/mocks/mock_os_context.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h" using namespace NEO; using MatcherIsRTCapable = IsAtLeastXeHpgCore; struct CommandStreamReceiverHwTestDg2AndLater : public ClDeviceFixture, public HardwareParse, public ::testing::Test { void SetUp() override { ClDeviceFixture::SetUp(); HardwareParse::SetUp(); } void TearDown() override { HardwareParse::TearDown(); ClDeviceFixture::TearDown(); } }; HWTEST2_F(CommandStreamReceiverHwTestDg2AndLater, givenGen12AndLaterWhenRayTracingEnabledThenCommandIsAddedToBatchBuffer, MatcherIsRTCapable) { using _3DSTATE_BTD = typename FamilyType::_3DSTATE_BTD; MockCsrHw commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); commandStreamReceiver.setupContext(*osContext); auto cmdSize = commandStreamReceiver.getCmdSizeForPerDssBackedBuffer(pDevice->getHardwareInfo()); EXPECT_EQ(sizeof(_3DSTATE_BTD), cmdSize); auto memoryManager = pDevice->getExecutionEnvironment()->memoryManager.get(); AllocationProperties properties(pDevice->getRootDeviceIndex(), MemoryConstants::pageSize, AllocationType::SCRATCH_SURFACE, pDevice->getDeviceBitfield()); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties); commandStreamReceiver.perDssBackedBuffer = allocation; std::unique_ptr buffer(new char[cmdSize]); LinearStream cs(buffer.get(), cmdSize); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.usePerDssBackedBuffer = true; EXPECT_FALSE(commandStreamReceiver.isPerDssBackedBufferSent); commandStreamReceiver.programPerDssBackedBuffer(cs, *pDevice, dispatchFlags); EXPECT_EQ(sizeof(_3DSTATE_BTD), cs.getUsed()); _3DSTATE_BTD *cmd = genCmdCast<_3DSTATE_BTD *>(cs.getCpuBase()); ASSERT_NE(nullptr, cmd); EXPECT_EQ(RayTracingHelper::getMemoryBackedFifoSizeToPatch(), cmd->getBtdStateBody().getPerDssMemoryBackedBufferSize()); EXPECT_EQ(allocation->getGpuAddressToPatch(), cmd->getBtdStateBody().getMemoryBackedBufferBasePointer()); EXPECT_TRUE(commandStreamReceiver.isPerDssBackedBufferSent); } typedef UltCommandStreamReceiverTest CommandStreamReceiverFlushTaskDg2AndLaterTests; HWTEST2_F(CommandStreamReceiverFlushTaskDg2AndLaterTests, givenProgramExtendedPipeControlPriorToNonPipelinedStateCommandEnabledWhenPerDssBackedBufferThenThereIsPipeControlPriorToIt, MatcherIsRTCapable) { DebugManagerStateRestore restore; DebugManager.flags.ProgramExtendedPipeControlPriorToNonPipelinedStateCommand.set(true); using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using _3DSTATE_BTD = typename FamilyType::_3DSTATE_BTD; auto expectedCmdSize = sizeof(_3DSTATE_BTD) + sizeof(PIPE_CONTROL); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto cmdSize = commandStreamReceiver.getCmdSizeForPerDssBackedBuffer(pDevice->getHardwareInfo()); EXPECT_EQ(expectedCmdSize, cmdSize); auto memoryManager = pDevice->getExecutionEnvironment()->memoryManager.get(); AllocationProperties properties(pDevice->getRootDeviceIndex(), MemoryConstants::pageSize, AllocationType::SCRATCH_SURFACE, pDevice->getDeviceBitfield()); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties); commandStreamReceiver.perDssBackedBuffer = allocation; StackVec buffer(4096); NEO::LinearStream cmdStream(buffer.begin(), buffer.size()); EXPECT_FALSE(commandStreamReceiver.isPerDssBackedBufferSent); configureCSRtoNonDirtyState(false); ioh.replaceBuffer(ptrOffset(ioh.getCpuBase(), +1u), ioh.getMaxAvailableSpace() + MemoryConstants::pageSize * 3); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.usePerDssBackedBuffer = true; auto &hwHelper = NEO::HwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily); dispatchFlags.threadArbitrationPolicy = hwHelper.getDefaultThreadArbitrationPolicy(); commandStreamReceiver.streamProperties.stateComputeMode.setProperties(dispatchFlags.requiresCoherency, dispatchFlags.numGrfRequired, dispatchFlags.threadArbitrationPolicy, *defaultHwInfo); auto cmdSizeForAllCommands = commandStreamReceiver.getRequiredCmdStreamSize(dispatchFlags, *pDevice); commandStreamReceiver.flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); parseCommands(commandStreamReceiver.getCS(0)); auto _3dStateBtdIterator = find<_3DSTATE_BTD *>(cmdList.begin(), cmdList.end()); auto _3dStateBtdCmd = genCmdCast<_3DSTATE_BTD *>(*_3dStateBtdIterator); ASSERT_NE(nullptr, _3dStateBtdCmd); EXPECT_EQ(RayTracingHelper::getMemoryBackedFifoSizeToPatch(), _3dStateBtdCmd->getBtdStateBody().getPerDssMemoryBackedBufferSize()); EXPECT_EQ(allocation->getGpuAddressToPatch(), _3dStateBtdCmd->getBtdStateBody().getMemoryBackedBufferBasePointer()); EXPECT_TRUE(commandStreamReceiver.isPerDssBackedBufferSent); --_3dStateBtdIterator; auto pipeControlCmd = genCmdCast(*_3dStateBtdIterator); EXPECT_TRUE(UnitTestHelper::getPipeControlHdcPipelineFlush(*pipeControlCmd)); EXPECT_TRUE(pipeControlCmd->getAmfsFlushEnable()); EXPECT_TRUE(pipeControlCmd->getCommandStreamerStallEnable()); EXPECT_TRUE(pipeControlCmd->getInstructionCacheInvalidateEnable()); EXPECT_TRUE(pipeControlCmd->getTextureCacheInvalidationEnable()); EXPECT_TRUE(pipeControlCmd->getConstantCacheInvalidationEnable()); EXPECT_TRUE(pipeControlCmd->getStateCacheInvalidationEnable()); EXPECT_TRUE(pipeControlCmd->getUnTypedDataPortCacheFlush()); commandStreamReceiver.flushTask(commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); auto cmdSizeForAllCommandsWithoutPCand3dState = commandStreamReceiver.getRequiredCmdStreamSize(dispatchFlags, *pDevice); EXPECT_EQ(cmdSizeForAllCommandsWithoutPCand3dState + expectedCmdSize, cmdSizeForAllCommands); } HWTEST2_F(CommandStreamReceiverHwTestDg2AndLater, givenGen12AndLaterWhenRayTracingEnabledButAlreadySentThenCommandIsNotAddedToBatchBuffer, MatcherIsRTCapable) { using _3DSTATE_BTD = typename FamilyType::_3DSTATE_BTD; MockCsrHw commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); commandStreamReceiver.setupContext(*osContext); auto cmdSize = commandStreamReceiver.getCmdSizeForPerDssBackedBuffer(pDevice->getHardwareInfo()); EXPECT_EQ(sizeof(_3DSTATE_BTD), cmdSize); auto memoryManager = pDevice->getExecutionEnvironment()->memoryManager.get(); AllocationProperties properties(pDevice->getRootDeviceIndex(), MemoryConstants::pageSize, AllocationType::SCRATCH_SURFACE, pDevice->getDeviceBitfield()); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties); commandStreamReceiver.perDssBackedBuffer = allocation; std::unique_ptr buffer(new char[cmdSize]); LinearStream cs(buffer.get(), cmdSize); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.usePerDssBackedBuffer = true; commandStreamReceiver.isPerDssBackedBufferSent = true; commandStreamReceiver.programPerDssBackedBuffer(cs, *pDevice, dispatchFlags); EXPECT_EQ(0u, cs.getUsed()); } HWTEST2_F(CommandStreamReceiverHwTestDg2AndLater, givenNotXE_HP_COREWhenCheckingNewResourceImplicitFlushThenReturnFalse, IsAtLeastXeHpgCore) { MockCsrHw commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); commandStreamReceiver.setupContext(*osContext); EXPECT_FALSE(commandStreamReceiver.checkPlatformSupportsNewResourceImplicitFlush()); } HWTEST2_F(CommandStreamReceiverHwTestDg2AndLater, givenNotXE_HP_COREWhenCheckingNewResourceGpuIdleThenReturnFalse, IsAtLeastXeHpgCore) { MockCsrHw commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); commandStreamReceiver.setupContext(*osContext); EXPECT_FALSE(commandStreamReceiver.checkPlatformSupportsGpuIdleImplicitFlush()); } command_stream_receiver_hw_tests_xehp_and_later.cpp000066400000000000000000001753571422164147700371210ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/command_stream/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/command_stream/scratch_space_controller.h" #include "shared/source/command_stream/scratch_space_controller_xehp_and_later.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/blit_commands_helper.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/os_interface/os_interface.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/engine_descriptor_helper.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/mocks/mock_scratch_space_controller_xehp_and_later.h" #include "shared/test/common/mocks/mock_timestamp_container.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/resource_barrier.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "gtest/gtest.h" #include "reg_configs_common.h" using namespace NEO; namespace NEO { template class ImplicitFlushSettings { public: static bool &getSettingForNewResource(); static bool &getSettingForGpuIdle(); private: static bool defaultSettingForNewResource; static bool defaultSettingForGpuIdle; }; } // namespace NEO struct CommandStreamReceiverHwTestXeHPAndLater : public ClDeviceFixture, public HardwareParse, public ::testing::Test { void SetUp() override { ClDeviceFixture::SetUp(); HardwareParse::SetUp(); } void TearDown() override { HardwareParse::TearDown(); ClDeviceFixture::TearDown(); } }; HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenPreambleSentWhenL3ConfigRequestChangedThenDontProgramL3Register) { using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; size_t GWS = 1; MockContext ctx(pClDevice); MockKernelWithInternals kernel(*pClDevice); CommandQueueHw commandQueue(&ctx, pClDevice, 0, false); auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(commandStreamReceiver); auto &commandStreamCSR = commandStreamReceiver->getCS(); PreemptionMode initialPreemptionMode = commandStreamReceiver->lastPreemptionMode; PreemptionMode devicePreemptionMode = pDevice->getPreemptionMode(); commandStreamReceiver->isPreambleSent = true; commandStreamReceiver->lastSentL3Config = 0; commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr); parseCommands(commandStreamCSR, 0); auto itorCmd = find(cmdList.begin(), cmdList.end()); if (PreemptionHelper::getRequiredCmdStreamSize(initialPreemptionMode, devicePreemptionMode) > 0u) { ASSERT_NE(cmdList.end(), itorCmd); } else { EXPECT_EQ(cmdList.end(), itorCmd); } } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, WhenCommandStreamReceiverHwIsCreatedThenDefaultSshSizeIs2MB) { auto &commandStreamReceiver = pDevice->getGpgpuCommandStreamReceiver(); EXPECT_EQ(2 * MB, commandStreamReceiver.defaultSshSize); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, WhenScratchSpaceExistsThenReturnNonZeroGpuAddressToPatch) { auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(commandStreamReceiver); void *ssh = alignedMalloc(512, 4096); uint32_t perThreadScratchSize = 0x400; bool stateBaseAddressDirty = false; bool cfeStateDirty = false; commandStreamReceiver->getScratchSpaceController()->setRequiredScratchSpace(ssh, 0u, perThreadScratchSize, 0u, 0u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty); ASSERT_NE(nullptr, commandStreamReceiver->getScratchAllocation()); EXPECT_TRUE(cfeStateDirty); auto scratchSpaceAddr = commandStreamReceiver->getScratchPatchAddress(); constexpr uint64_t notExpectedScratchGpuAddr = 0; EXPECT_NE(notExpectedScratchGpuAddr, scratchSpaceAddr); alignedFree(ssh); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, WhenOsContextSupportsMultipleDevicesThenScratchSpaceAllocationIsPlacedOnEachSupportedDevice) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(2u); ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->memoryManager.reset(new MockMemoryManager(false, true, *executionEnvironment)); uint32_t tileMask = 0b11; std::unique_ptr osContext(OsContext::create(nullptr, 0u, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_CCS, EngineUsage::Regular}, PreemptionMode::MidThread, tileMask))); auto commandStreamReceiver = std::make_unique>(*executionEnvironment, 0, tileMask); initPlatform(); void *ssh = alignedMalloc(512, 4096); uint32_t perThreadScratchSize = 0x400; bool stateBaseAddressDirty = false; bool cfeStateDirty = false; commandStreamReceiver->getScratchSpaceController()->setRequiredScratchSpace(ssh, 0u, perThreadScratchSize, 0u, 0u, *osContext, stateBaseAddressDirty, cfeStateDirty); auto allocation = commandStreamReceiver->getScratchAllocation(); EXPECT_EQ(tileMask, static_cast(allocation->storageInfo.memoryBanks.to_ulong())); alignedFree(ssh); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, WhenScratchSpaceNotExistThenReturnZeroGpuAddressToPatch) { MockCsrHw commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); auto scratchSpaceAddr = commandStreamReceiver.getScratchPatchAddress(); constexpr uint64_t expectedScratchGpuAddr = 0; EXPECT_EQ(expectedScratchGpuAddr, scratchSpaceAddr); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, whenProgrammingMiSemaphoreWaitThenSetRegisterPollModeMemoryPoll) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; MI_SEMAPHORE_WAIT miSemaphoreWait = FamilyType::cmdInitMiSemaphoreWait; EXPECT_EQ(MI_SEMAPHORE_WAIT::REGISTER_POLL_MODE::REGISTER_POLL_MODE_MEMORY_POLL, miSemaphoreWait.getRegisterPollMode()); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScratchSpaceSurfaceStateEnabledWhenSratchAllocationRequestedThenProgramCfeStateWithScratchAllocation) { using CFE_STATE = typename FamilyType::CFE_STATE; using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; const HardwareInfo &hwInfo = *defaultHwInfo; size_t GWS = 1; MockContext ctx(pClDevice); MockKernelWithInternals kernel(*pClDevice); CommandQueueHw commandQueue(&ctx, pClDevice, 0, false); auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); auto scratchController = static_cast(commandStreamReceiver->getScratchSpaceController()); scratchController->slotId = 2u; pDevice->resetCommandStreamReceiver(commandStreamReceiver); auto &commandStreamCSR = commandStreamReceiver->getCS(); kernel.kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[0] = 0x1000; auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); uint32_t computeUnits = hwHelper.getComputeUnitsUsedForScratch(&hwInfo); size_t scratchSpaceSize = kernel.kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[0] * computeUnits; commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr); commandQueue.flush(); parseCommands(commandStreamCSR, 0); findHardwareCommands(); EXPECT_EQ(kernel.kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[0], commandStreamReceiver->requiredScratchSize); EXPECT_EQ(scratchSpaceSize, scratchController->scratchSizeBytes); EXPECT_EQ(scratchSpaceSize, scratchController->getScratchSpaceAllocation()->getUnderlyingBufferSize()); ASSERT_NE(nullptr, cmdMediaVfeState); auto cfeState = static_cast(cmdMediaVfeState); uint32_t bufferOffset = static_cast(scratchController->slotId * scratchController->singleSurfaceStateSize * 2); EXPECT_EQ(bufferOffset, cfeState->getScratchSpaceBuffer()); RENDER_SURFACE_STATE *scratchState = reinterpret_cast(scratchController->surfaceStateHeap + bufferOffset); EXPECT_EQ(scratchController->scratchAllocation->getGpuAddress(), scratchState->getSurfaceBaseAddress()); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_SCRATCH, scratchState->getSurfaceType()); SURFACE_STATE_BUFFER_LENGTH length = {0}; length.Length = static_cast(computeUnits - 1); EXPECT_EQ(length.SurfaceState.Depth + 1u, scratchState->getDepth()); EXPECT_EQ(length.SurfaceState.Width + 1u, scratchState->getWidth()); EXPECT_EQ(length.SurfaceState.Height + 1u, scratchState->getHeight()); EXPECT_EQ(kernel.kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[0], scratchState->getSurfacePitch()); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScratchSpaceSurfaceStateEnabledWhenNewSshProvidedAndNoScratchAllocationExistThenNoDirtyBitSet) { auto commandStreamReceiver = std::make_unique>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); auto scratchController = static_cast(commandStreamReceiver->getScratchSpaceController()); bool stateBaseAddressDirty = false; bool cfeStateDirty = false; scratchController->surfaceStateHeap = reinterpret_cast(0x1000); scratchController->setRequiredScratchSpace(reinterpret_cast(0x2000), 0u, 0u, 0u, 0u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty); EXPECT_EQ(scratchController->surfaceStateHeap, reinterpret_cast(0x2000)); EXPECT_FALSE(cfeStateDirty); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScratchSpaceSurfaceStateEnabledWhenRequiredScratchSpaceIsSetThenPerThreadScratchSizeIsAlignedTo64) { auto commandStreamReceiver = std::make_unique>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); auto scratchController = static_cast(commandStreamReceiver->getScratchSpaceController()); uint32_t perThreadScratchSize = 1; uint32_t expectedValue = 1 << 6; bool stateBaseAddressDirty = false; bool cfeStateDirty = false; uint8_t surfaceHeap[1000]; scratchController->setRequiredScratchSpace(surfaceHeap, 0u, perThreadScratchSize, 0u, commandStreamReceiver->taskCount, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty); EXPECT_EQ(expectedValue, scratchController->perThreadScratchSize); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScratchSpaceSurfaceStateEnabledWhenNewSshProvidedAndScratchAllocationExistsThenSetDirtyBitCopyCurrentState) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; auto commandStreamReceiver = std::make_unique>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); auto scratchController = static_cast(commandStreamReceiver->getScratchSpaceController()); scratchController->slotId = 0; bool stateBaseAddressDirty = false; bool cfeStateDirty = false; void *oldSurfaceHeap = alignedMalloc(0x1000, 0x1000); scratchController->setRequiredScratchSpace(oldSurfaceHeap, 0u, 0x1000u, 0u, commandStreamReceiver->taskCount, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty); EXPECT_TRUE(cfeStateDirty); EXPECT_EQ(1u, scratchController->slotId); EXPECT_EQ(scratchController->surfaceStateHeap, oldSurfaceHeap); char *surfaceStateBuf = static_cast(oldSurfaceHeap) + scratchController->slotId * sizeof(RENDER_SURFACE_STATE) * 2; GraphicsAllocation *scratchAllocation = scratchController->scratchAllocation; RENDER_SURFACE_STATE *surfaceState = reinterpret_cast(surfaceStateBuf); EXPECT_EQ(scratchController->scratchAllocation->getGpuAddress(), surfaceState->getSurfaceBaseAddress()); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_SCRATCH, surfaceState->getSurfaceType()); void *newSurfaceHeap = alignedMalloc(0x1000, 0x1000); scratchController->setRequiredScratchSpace(newSurfaceHeap, 0u, 0x1000u, 0u, commandStreamReceiver->taskCount, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty); EXPECT_TRUE(cfeStateDirty); EXPECT_EQ(1u, scratchController->slotId); EXPECT_EQ(scratchController->surfaceStateHeap, newSurfaceHeap); EXPECT_EQ(scratchAllocation, scratchController->scratchAllocation); surfaceStateBuf = static_cast(newSurfaceHeap) + scratchController->slotId * sizeof(RENDER_SURFACE_STATE) * 2; surfaceState = reinterpret_cast(surfaceStateBuf); EXPECT_EQ(scratchController->scratchAllocation->getGpuAddress(), surfaceState->getSurfaceBaseAddress()); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_SCRATCH, surfaceState->getSurfaceType()); alignedFree(oldSurfaceHeap); alignedFree(newSurfaceHeap); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScratchSpaceSurfaceStateEnabledWhenBiggerScratchSpaceRequiredThenReplaceAllocation) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); auto scratchController = static_cast(commandStreamReceiver->getScratchSpaceController()); scratchController->slotId = 6; pDevice->resetCommandStreamReceiver(commandStreamReceiver); bool cfeStateDirty = false; bool stateBaseAddressDirty = false; void *surfaceHeap = alignedMalloc(0x1000, 0x1000); scratchController->setRequiredScratchSpace(surfaceHeap, 0u, 0x1000u, 0u, commandStreamReceiver->taskCount, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty); EXPECT_TRUE(cfeStateDirty); EXPECT_EQ(7u, scratchController->slotId); uint64_t offset = static_cast(scratchController->slotId * sizeof(RENDER_SURFACE_STATE) * 2); EXPECT_EQ(offset, scratchController->getScratchPatchAddress()); EXPECT_EQ(0u, scratchController->calculateNewGSH()); uint64_t gpuVa = scratchController->scratchAllocation->getGpuAddress(); char *surfaceStateBuf = static_cast(scratchController->surfaceStateHeap) + offset; RENDER_SURFACE_STATE *surfaceState = reinterpret_cast(surfaceStateBuf); EXPECT_EQ(gpuVa, surfaceState->getSurfaceBaseAddress()); scratchController->setRequiredScratchSpace(surfaceHeap, 0u, 0x2000u, 0u, commandStreamReceiver->taskCount, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty); EXPECT_TRUE(cfeStateDirty); EXPECT_EQ(8u, scratchController->slotId); offset = static_cast(scratchController->slotId * sizeof(RENDER_SURFACE_STATE) * 2); EXPECT_EQ(offset, scratchController->getScratchPatchAddress()); EXPECT_NE(gpuVa, scratchController->scratchAllocation->getGpuAddress()); gpuVa = scratchController->scratchAllocation->getGpuAddress(); surfaceStateBuf = static_cast(scratchController->surfaceStateHeap) + offset; surfaceState = reinterpret_cast(surfaceStateBuf); EXPECT_EQ(gpuVa, surfaceState->getSurfaceBaseAddress()); alignedFree(surfaceHeap); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScratchSpaceSurfaceStateEnabledWhenScratchSlotIsNonZeroThenSlotIdIsUpdatedAndCorrectOffsetIsSet) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); auto scratchController = static_cast(commandStreamReceiver->getScratchSpaceController()); pDevice->resetCommandStreamReceiver(commandStreamReceiver); bool cfeStateDirty = false; bool stateBaseAddressDirty = false; void *surfaceHeap = alignedMalloc(0x1000, 0x1000); scratchController->setRequiredScratchSpace(surfaceHeap, 1u, 0x1000u, 0u, commandStreamReceiver->taskCount, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty); EXPECT_TRUE(cfeStateDirty); EXPECT_EQ(1u, scratchController->slotId); EXPECT_TRUE(scratchController->updateSlots); uint64_t offset = static_cast(scratchController->slotId * sizeof(RENDER_SURFACE_STATE) * 2); EXPECT_EQ(offset, scratchController->getScratchPatchAddress()); EXPECT_EQ(0u, scratchController->calculateNewGSH()); uint64_t gpuVa = scratchController->scratchAllocation->getGpuAddress(); char *surfaceStateBuf = static_cast(scratchController->surfaceStateHeap) + offset; RENDER_SURFACE_STATE *surfaceState = reinterpret_cast(surfaceStateBuf); EXPECT_EQ(gpuVa, surfaceState->getSurfaceBaseAddress()); alignedFree(surfaceHeap); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScratchSpaceSurfaceStateEnabledWhenProgramHeapsThenSetReqScratchSpaceAndProgramSurfaceStateAreCalled) { class MockScratchSpaceControllerXeHPAndLater : public ScratchSpaceControllerXeHPAndLater { public: uint32_t requiredScratchSpaceCalledTimes = 0u; uint32_t programSurfaceStateCalledTimes = 0u; MockScratchSpaceControllerXeHPAndLater(uint32_t rootDeviceIndex, ExecutionEnvironment &environment, InternalAllocationStorage &allocationStorage) : ScratchSpaceControllerXeHPAndLater(rootDeviceIndex, environment, allocationStorage) {} using ScratchSpaceControllerXeHPAndLater::scratchAllocation; void setRequiredScratchSpace(void *sshBaseAddress, uint32_t scratchSlot, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, uint32_t currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty) override { requiredScratchSpaceCalledTimes++; } protected: void programSurfaceState() override { programSurfaceStateCalledTimes++; }; }; auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(commandStreamReceiver); std::unique_ptr scratchController = std::make_unique(pDevice->getRootDeviceIndex(), *pDevice->executionEnvironment, *commandStreamReceiver->getInternalAllocationStorage()); bool cfeStateDirty = false; bool stateBaseAddressDirty = false; void *surfaceHeap = alignedMalloc(0x1000, 0x1000); NEO::GraphicsAllocation heap1(1u, NEO::AllocationType::BUFFER, surfaceHeap, 0u, 0u, 0u, MemoryPool::System4KBPages, 0u); NEO::GraphicsAllocation heap2(1u, NEO::AllocationType::BUFFER, surfaceHeap, 0u, 0u, 0u, MemoryPool::System4KBPages, 0u); NEO::GraphicsAllocation heap3(1u, NEO::AllocationType::BUFFER, surfaceHeap, 0u, 0u, 0u, MemoryPool::System4KBPages, 0u); HeapContainer container; container.push_back(&heap1); container.push_back(&heap2); container.push_back(&heap3); scratchController->programHeaps(container, 0u, 1u, 0u, 0u, commandStreamReceiver->getOsContext(), stateBaseAddressDirty, cfeStateDirty); auto scratch = static_cast(scratchController.get()); EXPECT_EQ(scratch->requiredScratchSpaceCalledTimes, 1u); EXPECT_EQ(scratch->programSurfaceStateCalledTimes, 2u); alignedFree(surfaceHeap); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScratchWhenSetNewSshPtrAndChangeIdIsFalseThenSlotIdIsNotChanged) { class MockScratchSpaceControllerXeHPAndLater : public ScratchSpaceControllerXeHPAndLater { public: uint32_t programSurfaceStateCalledTimes = 0u; MockScratchSpaceControllerXeHPAndLater(uint32_t rootDeviceIndex, ExecutionEnvironment &environment, InternalAllocationStorage &allocationStorage) : ScratchSpaceControllerXeHPAndLater(rootDeviceIndex, environment, allocationStorage) {} using ScratchSpaceControllerXeHPAndLater::scratchAllocation; using ScratchSpaceControllerXeHPAndLater::slotId; protected: void programSurfaceState() override { programSurfaceStateCalledTimes++; }; }; auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(commandStreamReceiver); std::unique_ptr scratchController = std::make_unique(pDevice->getRootDeviceIndex(), *pDevice->executionEnvironment, *commandStreamReceiver->getInternalAllocationStorage()); NEO::GraphicsAllocation graphicsAllocation(1u, NEO::AllocationType::BUFFER, nullptr, 0u, 0u, 0u, MemoryPool::System4KBPages, 0u); bool cfeStateDirty = false; void *surfaceHeap = alignedMalloc(0x1000, 0x1000); auto scratch = static_cast(scratchController.get()); scratch->slotId = 10; scratch->scratchAllocation = &graphicsAllocation; scratch->setNewSshPtr(surfaceHeap, cfeStateDirty, false); scratch->scratchAllocation = nullptr; EXPECT_EQ(10u, scratch->slotId); EXPECT_EQ(scratch->programSurfaceStateCalledTimes, 1u); EXPECT_TRUE(cfeStateDirty); alignedFree(surfaceHeap); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScratchWhenProgramSurfaceStateAndUpdateSlotIsFalseThenSlotIdIsNotChanged) { class MockScratchSpaceControllerXeHPAndLater : public ScratchSpaceControllerXeHPAndLater { public: MockScratchSpaceControllerXeHPAndLater(uint32_t rootDeviceIndex, ExecutionEnvironment &environment, InternalAllocationStorage &allocationStorage) : ScratchSpaceControllerXeHPAndLater(rootDeviceIndex, environment, allocationStorage) {} using ScratchSpaceControllerXeHPAndLater::programSurfaceState; using ScratchSpaceControllerXeHPAndLater::scratchAllocation; using ScratchSpaceControllerXeHPAndLater::slotId; using ScratchSpaceControllerXeHPAndLater::surfaceStateHeap; using ScratchSpaceControllerXeHPAndLater::updateSlots; }; auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(commandStreamReceiver); std::unique_ptr scratchController = std::make_unique(pDevice->getRootDeviceIndex(), *pDevice->executionEnvironment, *commandStreamReceiver->getInternalAllocationStorage()); NEO::GraphicsAllocation graphicsAllocation(1u, NEO::AllocationType::BUFFER, nullptr, 0u, 0u, 0u, MemoryPool::System4KBPages, 0u); void *surfaceHeap = alignedMalloc(0x1000, 0x1000); auto scratch = static_cast(scratchController.get()); scratch->surfaceStateHeap = static_cast(surfaceHeap); scratch->slotId = 10; scratch->updateSlots = false; scratch->scratchAllocation = &graphicsAllocation; scratch->programSurfaceState(); scratch->scratchAllocation = nullptr; EXPECT_EQ(10u, scratch->slotId); alignedFree(surfaceHeap); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScratchSpaceSurfaceStateEnabledWhenBiggerPrivateScratchSpaceRequiredThenReplaceAllocation) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; DebugManagerStateRestore restorer; DebugManager.flags.EnablePrivateScratchSlot1.set(1); RENDER_SURFACE_STATE surfaceState[6]; MockCsrHw commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); commandStreamReceiver.setupContext(pDevice->getGpgpuCommandStreamReceiver().getOsContext()); auto scratchController = static_cast(commandStreamReceiver.getScratchSpaceController()); bool cfeStateDirty = false; bool stateBaseAddressDirty = false; uint32_t sizeForPrivateScratch = MemoryConstants::pageSize; scratchController->setRequiredScratchSpace(surfaceState, 0u, 0u, sizeForPrivateScratch, 0u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty); EXPECT_TRUE(cfeStateDirty); uint64_t gpuVa = scratchController->privateScratchAllocation->getGpuAddress(); EXPECT_EQ(gpuVa, surfaceState[3].getSurfaceBaseAddress()); scratchController->setRequiredScratchSpace(surfaceState, 0u, 0u, sizeForPrivateScratch * 2, 0u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty); EXPECT_TRUE(cfeStateDirty); EXPECT_NE(gpuVa, scratchController->privateScratchAllocation->getGpuAddress()); EXPECT_EQ(scratchController->privateScratchAllocation->getGpuAddress(), surfaceState[5].getSurfaceBaseAddress()); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScratchSpaceControllerWithOnlyPrivateScratchSpaceWhenGettingPatchAddressThenGetCorrectValue) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; DebugManagerStateRestore restorer; DebugManager.flags.EnablePrivateScratchSlot1.set(1); RENDER_SURFACE_STATE surfaceState[6]; MockCsrHw commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); commandStreamReceiver.setupContext(pDevice->getGpgpuCommandStreamReceiver().getOsContext()); auto scratchController = static_cast(commandStreamReceiver.getScratchSpaceController()); bool cfeStateDirty = false; bool stateBaseAddressDirty = false; uint32_t sizeForPrivateScratch = MemoryConstants::pageSize; EXPECT_EQ(nullptr, scratchController->getScratchSpaceAllocation()); EXPECT_EQ(nullptr, scratchController->getPrivateScratchSpaceAllocation()); EXPECT_EQ(0u, scratchController->getScratchPatchAddress()); scratchController->setRequiredScratchSpace(surfaceState, 0u, 0u, sizeForPrivateScratch, 0u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty); EXPECT_TRUE(cfeStateDirty); auto expectedPatchAddress = 2 * sizeof(RENDER_SURFACE_STATE); EXPECT_EQ(nullptr, scratchController->getScratchSpaceAllocation()); EXPECT_NE(nullptr, scratchController->getPrivateScratchSpaceAllocation()); EXPECT_EQ(expectedPatchAddress, scratchController->getScratchPatchAddress()); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScratchSpaceSurfaceStateEnabledWhenNotBiggerPrivateScratchSpaceRequiredThenCfeStateIsNotDirty) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; DebugManagerStateRestore restorer; DebugManager.flags.EnablePrivateScratchSlot1.set(1); RENDER_SURFACE_STATE surfaceState[4]; MockCsrHw commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); commandStreamReceiver.setupContext(pDevice->getGpgpuCommandStreamReceiver().getOsContext()); auto scratchController = static_cast(commandStreamReceiver.getScratchSpaceController()); bool cfeStateDirty = false; bool stateBaseAddressDirty = false; uint32_t sizeForPrivateScratch = MemoryConstants::pageSize; scratchController->setRequiredScratchSpace(surfaceState, 0u, 0u, sizeForPrivateScratch, 0u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty); EXPECT_TRUE(cfeStateDirty); uint64_t gpuVa = scratchController->privateScratchAllocation->getGpuAddress(); cfeStateDirty = false; scratchController->setRequiredScratchSpace(surfaceState, 0u, 0u, sizeForPrivateScratch, 0u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty); EXPECT_FALSE(cfeStateDirty); EXPECT_EQ(gpuVa, scratchController->privateScratchAllocation->getGpuAddress()); EXPECT_EQ(gpuVa, surfaceState[3].getSurfaceBaseAddress()); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScratchSpaceSurfaceStateWithoutPrivateScratchSpaceWhenDoubleAllocationsScratchSpaceIsUsedThenPrivateScratchAddressIsZero) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; DebugManagerStateRestore restorer; DebugManager.flags.EnablePrivateScratchSlot1.set(1); RENDER_SURFACE_STATE surfaceState[4]; MockCsrHw commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); commandStreamReceiver.setupContext(pDevice->getGpgpuCommandStreamReceiver().getOsContext()); auto scratchController = static_cast(commandStreamReceiver.getScratchSpaceController()); bool cfeStateDirty = false; bool stateBaseAddressDirty = false; uint32_t sizeForScratch = MemoryConstants::pageSize; scratchController->setRequiredScratchSpace(surfaceState, 0u, sizeForScratch, 0u, 0u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty); EXPECT_TRUE(cfeStateDirty); EXPECT_EQ(nullptr, scratchController->privateScratchAllocation); EXPECT_EQ(0u, surfaceState[3].getSurfaceBaseAddress()); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScratchSpaceControllerWhenDebugKeyForPrivateScratchIsDisabledThenThereAre16Slots) { DebugManagerStateRestore restorer; DebugManager.flags.EnablePrivateScratchSlot1.set(0); MockCsrHw commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); commandStreamReceiver.setupContext(pDevice->getGpgpuCommandStreamReceiver().getOsContext()); auto scratchController = static_cast(commandStreamReceiver.getScratchSpaceController()); EXPECT_EQ(16u, scratchController->stateSlotsCount); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScratchSpaceControllerWhenDebugKeyForPrivateScratchIsEnabledThenThereAre32Slots) { DebugManagerStateRestore restorer; DebugManager.flags.EnablePrivateScratchSlot1.set(1); MockCsrHw commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); commandStreamReceiver.setupContext(pDevice->getGpgpuCommandStreamReceiver().getOsContext()); auto scratchController = static_cast(commandStreamReceiver.getScratchSpaceController()); EXPECT_EQ(32u, scratchController->stateSlotsCount); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScratchSpaceSurfaceStateEnabledWhenSizeForPrivateScratchSpaceIsMisalignedThenAlignItTo64) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; DebugManagerStateRestore restorer; DebugManager.flags.EnablePrivateScratchSlot1.set(1); RENDER_SURFACE_STATE surfaceState[4]; MockCsrHw commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); auto scratchController = static_cast(commandStreamReceiver.getScratchSpaceController()); uint32_t misalignedSizeForPrivateScratch = MemoryConstants::pageSize + 1; bool cfeStateDirty = false; bool stateBaseAddressDirty = false; scratchController->setRequiredScratchSpace(surfaceState, 0u, 0u, misalignedSizeForPrivateScratch, 0u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty); EXPECT_NE(scratchController->privateScratchSizeBytes, misalignedSizeForPrivateScratch * scratchController->computeUnitsUsedForScratch); EXPECT_EQ(scratchController->privateScratchSizeBytes, alignUp(misalignedSizeForPrivateScratch, 64) * scratchController->computeUnitsUsedForScratch); EXPECT_EQ(scratchController->privateScratchSizeBytes, scratchController->getPrivateScratchSpaceAllocation()->getUnderlyingBufferSize()); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenDisabledPrivateScratchSpaceWhenSizeForPrivateScratchSpaceIsProvidedThenItIsNotCreated) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; DebugManagerStateRestore restorer; DebugManager.flags.EnablePrivateScratchSlot1.set(0); RENDER_SURFACE_STATE surfaceState[4]; MockCsrHw commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); auto scratchController = static_cast(commandStreamReceiver.getScratchSpaceController()); bool cfeStateDirty = false; bool stateBaseAddressDirty = false; scratchController->setRequiredScratchSpace(surfaceState, 0u, MemoryConstants::pageSize, MemoryConstants::pageSize, 0u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty); EXPECT_EQ(0u, scratchController->privateScratchSizeBytes); EXPECT_EQ(nullptr, scratchController->getPrivateScratchSpaceAllocation()); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenDisabledPrivateScratchSpaceWhenGettingOffsetForSlotThenEachSlotContainsOnlyOneSurfaceState) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; DebugManagerStateRestore restorer; DebugManager.flags.EnablePrivateScratchSlot1.set(0); MockCsrHw commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); auto scratchController = static_cast(commandStreamReceiver.getScratchSpaceController()); EXPECT_EQ(sizeof(RENDER_SURFACE_STATE), scratchController->getOffsetToSurfaceState(1u)); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenBlockedCacheFlushCmdWhenSubmittingThenDispatchBlockedCommands) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; MockContext context(pClDevice); auto mockCsr = new MockCsrHw2(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->timestampPacketWriteEnabled = true; mockCsr->storeFlushedTaskStream = true; auto cmdQ0 = clUniquePtr(new MockCommandQueueHw(&context, pClDevice, nullptr)); auto &secondEngine = pDevice->getEngine(pDevice->getHardwareInfo().capabilityTable.defaultEngineType, EngineUsage::LowPriority); static_cast *>(secondEngine.commandStreamReceiver)->timestampPacketWriteEnabled = true; auto cmdQ1 = clUniquePtr(new MockCommandQueueHw(&context, pClDevice, nullptr)); cmdQ1->gpgpuEngine = &secondEngine; cmdQ1->timestampPacketContainer = std::make_unique(); EXPECT_NE(&cmdQ0->getGpgpuCommandStreamReceiver(), &cmdQ1->getGpgpuCommandStreamReceiver()); MockTimestampPacketContainer node0(*pDevice->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer node1(*pDevice->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); Event event0(cmdQ0.get(), 0, 0, 0); // on the same CSR event0.addTimestampPacketNodes(node0); Event event1(cmdQ1.get(), 0, 0, 0); // on different CSR event1.addTimestampPacketNodes(node1); uint32_t numEventsOnWaitlist = 3; UserEvent userEvent; cl_event waitlist[] = {&event0, &event1, &userEvent}; cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(&context, 0, MemoryConstants::pageSize, nullptr, retVal)); cl_resource_barrier_descriptor_intel descriptor = {}; descriptor.mem_object = buffer.get(); BarrierCommand barrierCommand(cmdQ0.get(), &descriptor, 1); cmdQ0->enqueueResourceBarrier(&barrierCommand, numEventsOnWaitlist, waitlist, nullptr); userEvent.setStatus(CL_COMPLETE); HardwareParse hwParserCsr; HardwareParse hwParserCmdQ; LinearStream taskStream(mockCsr->storedTaskStream.get(), mockCsr->storedTaskStreamSize); taskStream.getSpace(mockCsr->storedTaskStreamSize); hwParserCsr.parseCommands(mockCsr->commandStream, 0); hwParserCmdQ.parseCommands(taskStream, 0); { auto queueSemaphores = findAll(hwParserCmdQ.cmdList.begin(), hwParserCmdQ.cmdList.end()); auto expectedQueueSemaphoresCount = 1u; if (UnitTestHelper::isAdditionalMiSemaphoreWaitRequired(pDevice->getHardwareInfo())) { expectedQueueSemaphoresCount += 1; } EXPECT_EQ(expectedQueueSemaphoresCount, queueSemaphores.size()); auto semaphoreCmd = genCmdCast(*(queueSemaphores[0])); EXPECT_EQ(semaphoreCmd->getCompareOperation(), MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD); EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword()); auto dataAddress = TimestampPacketHelper::getContextEndGpuAddress(*node0.getNode(0)); EXPECT_EQ(dataAddress, semaphoreCmd->getSemaphoreGraphicsAddress()); } { auto csrSemaphores = findAll(hwParserCsr.cmdList.begin(), hwParserCsr.cmdList.end()); EXPECT_EQ(1u, csrSemaphores.size()); auto semaphoreCmd = genCmdCast(*(csrSemaphores[0])); EXPECT_EQ(semaphoreCmd->getCompareOperation(), MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD); EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword()); auto dataAddress = TimestampPacketHelper::getContextEndGpuAddress(*node1.getNode(0)); EXPECT_EQ(dataAddress, semaphoreCmd->getSemaphoreGraphicsAddress()); } EXPECT_TRUE(mockCsr->passedDispatchFlags.blocking); EXPECT_TRUE(mockCsr->passedDispatchFlags.guardCommandBufferWithPipeControl); EXPECT_EQ(pDevice->getPreemptionMode(), mockCsr->passedDispatchFlags.preemptionMode); cmdQ0->isQueueBlocked(); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, WhenOsContextSupportsMultipleDevicesThenCommandStreamReceiverIsMultiOsContextCapable) { uint32_t multiDeviceMask = 0b11; uint32_t singleDeviceMask = 0b10; std::unique_ptr multiDeviceOsContext(OsContext::create(nullptr, 0u, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::Regular}, PreemptionMode::MidThread, multiDeviceMask))); std::unique_ptr singleDeviceOsContext(OsContext::create(nullptr, 0u, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::Regular}, PreemptionMode::MidThread, singleDeviceMask))); EXPECT_EQ(2u, multiDeviceOsContext->getNumSupportedDevices()); EXPECT_EQ(1u, singleDeviceOsContext->getNumSupportedDevices()); UltCommandStreamReceiver commandStreamReceiverMulti(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), multiDeviceMask); commandStreamReceiverMulti.callBaseIsMultiOsContextCapable = true; EXPECT_TRUE(commandStreamReceiverMulti.isMultiOsContextCapable()); EXPECT_EQ(2u, commandStreamReceiverMulti.deviceBitfield.count()); UltCommandStreamReceiver commandStreamReceiverSingle(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), singleDeviceMask); commandStreamReceiverSingle.callBaseIsMultiOsContextCapable = true; EXPECT_FALSE(commandStreamReceiverSingle.isMultiOsContextCapable()); EXPECT_EQ(1u, commandStreamReceiverSingle.deviceBitfield.count()); } HWTEST2_F(CommandStreamReceiverHwTestXeHPAndLater, givenXE_HP_COREDefaultSupportEnabledWhenOsSupportsNewResourceImplicitFlushThenReturnOsSupportValue, IsXeHpCore) { MockCsrHw commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); commandStreamReceiver.setupContext(*osContext); EXPECT_TRUE(ImplicitFlushSettings::getSettingForNewResource()); VariableBackup defaultSettingForNewResourceBackup(&ImplicitFlushSettings::getSettingForNewResource(), true); if (commandStreamReceiver.getOSInterface()->newResourceImplicitFlush) { EXPECT_TRUE(commandStreamReceiver.checkPlatformSupportsNewResourceImplicitFlush()); } else { EXPECT_FALSE(commandStreamReceiver.checkPlatformSupportsNewResourceImplicitFlush()); } } HWTEST2_F(CommandStreamReceiverHwTestXeHPAndLater, givenXE_HP_COREDefaultSupportDisabledWhenOsSupportsNewResourceImplicitFlushThenReturnOsSupportValue, IsXeHpCore) { MockCsrHw commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); commandStreamReceiver.setupContext(*osContext); VariableBackup defaultSettingForNewResourceBackup(&ImplicitFlushSettings::getSettingForNewResource(), false); EXPECT_FALSE(commandStreamReceiver.checkPlatformSupportsNewResourceImplicitFlush()); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenPlatformSupportsImplicitFlushForNewResourceWhenCsrIsMultiContextThenExpectNoSupport) { VariableBackup defaultSettingForNewResourceBackup(&ImplicitFlushSettings::getSettingForNewResource(), true); MockCsrHw commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); commandStreamReceiver.setupContext(*osContext); commandStreamReceiver.multiOsContextCapable = true; EXPECT_TRUE(ImplicitFlushSettings::getSettingForNewResource()); EXPECT_FALSE(commandStreamReceiver.checkPlatformSupportsNewResourceImplicitFlush()); } HWTEST2_F(CommandStreamReceiverHwTestXeHPAndLater, givenXE_HP_COREDefaultSupportEnabledWhenOsSupportsGpuIdleImplicitFlushThenReturnOsSupportValue, IsXeHpCore) { MockCsrHw commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); commandStreamReceiver.setupContext(*osContext); EXPECT_TRUE(ImplicitFlushSettings::getSettingForGpuIdle()); VariableBackup defaultSettingForGpuIdleBackup(&ImplicitFlushSettings::getSettingForGpuIdle(), true); if (commandStreamReceiver.getOSInterface()->newResourceImplicitFlush) { EXPECT_TRUE(commandStreamReceiver.checkPlatformSupportsGpuIdleImplicitFlush()); } else { EXPECT_FALSE(commandStreamReceiver.checkPlatformSupportsGpuIdleImplicitFlush()); } } HWTEST2_F(CommandStreamReceiverHwTestXeHPAndLater, givenXE_HP_COREDefaultSupportDisabledWhenOsSupportsGpuIdleImplicitFlushThenReturnOsSupportValue, IsXeHpCore) { MockCsrHw commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); commandStreamReceiver.setupContext(*osContext); VariableBackup defaultSettingForGpuIdleBackup(&ImplicitFlushSettings::getSettingForGpuIdle(), false); EXPECT_FALSE(commandStreamReceiver.checkPlatformSupportsGpuIdleImplicitFlush()); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenPlatformSupportsImplicitFlushForIdleGpuWhenCsrIsMultiContextThenExpectNoSupport) { VariableBackup defaultSettingForGpuIdleBackup(&ImplicitFlushSettings::getSettingForGpuIdle(), true); MockCsrHw commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); commandStreamReceiver.setupContext(*osContext); commandStreamReceiver.multiOsContextCapable = true; EXPECT_TRUE(ImplicitFlushSettings::getSettingForGpuIdle()); EXPECT_FALSE(commandStreamReceiver.checkPlatformSupportsGpuIdleImplicitFlush()); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenPlatformSupportsImplicitFlushForIdleGpuWhenCsrIsMultiContextAndDirectSubmissionActiveThenExpectSupportTrue) { VariableBackup defaultSettingForGpuIdleBackup(&ImplicitFlushSettings::getSettingForGpuIdle(), true); VariableBackup backupOsSettingForGpuIdle(&OSInterface::gpuIdleImplicitFlush, true); osContext->setDirectSubmissionActive(); MockCsrHw commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); commandStreamReceiver.setupContext(*osContext); commandStreamReceiver.multiOsContextCapable = true; EXPECT_TRUE(ImplicitFlushSettings::getSettingForGpuIdle()); EXPECT_TRUE(commandStreamReceiver.checkPlatformSupportsGpuIdleImplicitFlush()); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, whenCreatingWorkPartitionAllocationThenItsPropertiesAreCorrect) { DebugManagerStateRestore restore{}; DebugManager.flags.EnableStaticPartitioning.set(1); DebugManager.flags.EnableImplicitScaling.set(1); DebugManager.flags.EnableLocalMemory.set(1); UltDeviceFactory deviceFactory{1, 2}; MockDevice &rootDevice = *deviceFactory.rootDevices[0]; CommandStreamReceiver &csr = rootDevice.getGpgpuCommandStreamReceiver(); StorageInfo workPartitionAllocationStorageInfo = csr.getWorkPartitionAllocation()->storageInfo; EXPECT_EQ(rootDevice.getDeviceBitfield(), workPartitionAllocationStorageInfo.memoryBanks); EXPECT_EQ(rootDevice.getDeviceBitfield(), workPartitionAllocationStorageInfo.pageTablesVisibility); EXPECT_FALSE(workPartitionAllocationStorageInfo.cloningOfPageTables); EXPECT_TRUE(workPartitionAllocationStorageInfo.tileInstanced); } HWTEST2_F(CommandStreamReceiverHwTestXeHPAndLater, givenXeHpWhenRayTracingEnabledThenDoNotAddCommandBatchBuffer, IsXEHP) { MockCsrHw commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); auto cmdSize = commandStreamReceiver.getCmdSizeForPerDssBackedBuffer(pDevice->getHardwareInfo()); EXPECT_EQ(0u, cmdSize); std::unique_ptr buffer(new char[cmdSize]); LinearStream cs(buffer.get(), cmdSize); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.usePerDssBackedBuffer = true; commandStreamReceiver.programPerDssBackedBuffer(cs, *pDevice, dispatchFlags); EXPECT_EQ(0u, cs.getUsed()); } HWTEST2_F(CommandStreamReceiverHwTestXeHPAndLater, givenStaticPartitionEnabledWhenOnlySinglePartitionUsedThenExpectSinglePipeControlAsBarrier, IsAtLeastXeHpCore) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; MockCsrHw commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); constexpr size_t cmdSize = 256; std::unique_ptr buffer(new char[cmdSize]); LinearStream cs(buffer.get(), cmdSize); commandStreamReceiver.staticWorkPartitioningEnabled = true; commandStreamReceiver.activePartitions = 1; size_t estimatedCmdSize = commandStreamReceiver.getCmdSizeForStallingNoPostSyncCommands(); EXPECT_EQ(sizeof(PIPE_CONTROL), estimatedCmdSize); commandStreamReceiver.programStallingNoPostSyncCommandsForBarrier(cs); EXPECT_EQ(estimatedCmdSize, cs.getUsed()); PIPE_CONTROL *pipeControl = genCmdCast(buffer.get()); ASSERT_NE(nullptr, pipeControl); } HWTEST2_F(CommandStreamReceiverHwTestXeHPAndLater, givenStaticPartitionDisabledWhenMultiplePartitionsUsedThenExpectSinglePipeControlAsBarrier, IsAtLeastXeHpCore) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; MockCsrHw commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); constexpr size_t cmdSize = 256; std::unique_ptr buffer(new char[cmdSize]); LinearStream cs(buffer.get(), cmdSize); commandStreamReceiver.staticWorkPartitioningEnabled = false; commandStreamReceiver.activePartitions = 2; size_t estimatedCmdSize = commandStreamReceiver.getCmdSizeForStallingNoPostSyncCommands(); EXPECT_EQ(sizeof(PIPE_CONTROL), estimatedCmdSize); commandStreamReceiver.programStallingNoPostSyncCommandsForBarrier(cs); EXPECT_EQ(estimatedCmdSize, cs.getUsed()); PIPE_CONTROL *pipeControl = genCmdCast(buffer.get()); ASSERT_NE(nullptr, pipeControl); } HWTEST2_F(CommandStreamReceiverHwTestXeHPAndLater, givenStaticPartitionEnabledWhenMultiplePartitionsUsedThenExpectImplicitScalingWithoutSelfCleanupBarrier, IsAtLeastXeHpCore) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using MI_ATOMIC = typename FamilyType::MI_ATOMIC; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; MockCsrHw commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); constexpr size_t cmdSize = 256; std::unique_ptr buffer(new char[cmdSize]); MockGraphicsAllocation allocation(buffer.get(), cmdSize); allocation.gpuAddress = 0xFF000; LinearStream cs(buffer.get(), cmdSize); cs.replaceGraphicsAllocation(&allocation); commandStreamReceiver.staticWorkPartitioningEnabled = true; commandStreamReceiver.activePartitions = 2; size_t expectedSize = sizeof(PIPE_CONTROL) + sizeof(MI_ATOMIC) + sizeof(MI_SEMAPHORE_WAIT) + sizeof(MI_BATCH_BUFFER_START) + 2 * sizeof(uint32_t); size_t estimatedCmdSize = commandStreamReceiver.getCmdSizeForStallingNoPostSyncCommands(); EXPECT_EQ(expectedSize, estimatedCmdSize); commandStreamReceiver.programStallingNoPostSyncCommandsForBarrier(cs); EXPECT_EQ(estimatedCmdSize, cs.getUsed()); void *cmdBuffer = buffer.get(); size_t offset = 0; PIPE_CONTROL *pipeControl = genCmdCast(cmdBuffer); ASSERT_NE(nullptr, pipeControl); offset += sizeof(PIPE_CONTROL); MI_ATOMIC *miAtomic = genCmdCast(ptrOffset(cmdBuffer, offset)); ASSERT_NE(nullptr, miAtomic); offset += sizeof(MI_ATOMIC); MI_SEMAPHORE_WAIT *miSemaphore = genCmdCast(ptrOffset(cmdBuffer, offset)); ASSERT_NE(nullptr, miSemaphore); offset += sizeof(MI_SEMAPHORE_WAIT); MI_BATCH_BUFFER_START *bbStart = genCmdCast(ptrOffset(cmdBuffer, offset)); ASSERT_NE(nullptr, bbStart); offset += sizeof(MI_BATCH_BUFFER_START); uint32_t *data = reinterpret_cast(ptrOffset(cmdBuffer, offset)); EXPECT_EQ(0u, *data); offset += sizeof(uint32_t); data = reinterpret_cast(ptrOffset(cmdBuffer, offset)); EXPECT_EQ(0u, *data); offset += sizeof(uint32_t); EXPECT_EQ(estimatedCmdSize, offset); } HWTEST2_F(CommandStreamReceiverHwTestXeHPAndLater, givenStaticPartitionEnabledWhenSinglePartitionUsedForPostSyncBarrierThenExpectOnlyPostSyncCommands, IsAtLeastXeHpCore) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; auto &hwInfo = pDevice->getHardwareInfo(); auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(commandStreamReceiver); auto &commandStreamCSR = commandStreamReceiver->getCS(); TagNodeBase *tagNode = commandStreamReceiver->getTimestampPacketAllocator()->getTag(); uint64_t gpuAddress = TimestampPacketHelper::getContextEndGpuAddress(*tagNode); TimestampPacketDependencies timestampPacketDependencies; timestampPacketDependencies.barrierNodes.add(tagNode); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.barrierTimestampPacketNodes = ×tampPacketDependencies.barrierNodes; commandStreamReceiver->staticWorkPartitioningEnabled = true; commandStreamReceiver->activePartitions = 1; size_t expectedCmdSize = MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo); size_t estimatedCmdSize = commandStreamReceiver->getCmdSizeForStallingCommands(dispatchFlags); EXPECT_EQ(expectedCmdSize, estimatedCmdSize); commandStreamReceiver->programStallingCommandsForBarrier(commandStreamCSR, dispatchFlags); EXPECT_EQ(estimatedCmdSize, commandStreamCSR.getUsed()); parseCommands(commandStreamCSR, 0); findHardwareCommands(); auto cmdItor = cmdList.begin(); if (MemorySynchronizationCommands::isPipeControlWArequired(hwInfo)) { PIPE_CONTROL *pipeControl = genCmdCast(*cmdItor); ASSERT_NE(nullptr, pipeControl); cmdItor++; if (MemorySynchronizationCommands::getSizeForSingleAdditionalSynchronization(hwInfo) > 0) { cmdItor++; } } PIPE_CONTROL *pipeControl = genCmdCast(*cmdItor); ASSERT_NE(nullptr, pipeControl); EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pipeControl->getPostSyncOperation()); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); EXPECT_EQ(0u, pipeControl->getImmediateData()); EXPECT_EQ(gpuAddress, UnitTestHelper::getPipeControlPostSyncAddress(*pipeControl)); } HWTEST2_F(CommandStreamReceiverHwTestXeHPAndLater, givenStaticPartitionDisabledWhenMultiplePartitionsUsedForPostSyncBarrierThenExpectOnlyPostSyncCommands, IsAtLeastXeHpCore) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; auto &hwInfo = pDevice->getHardwareInfo(); auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(commandStreamReceiver); auto &commandStreamCSR = commandStreamReceiver->getCS(); TagNodeBase *tagNode = commandStreamReceiver->getTimestampPacketAllocator()->getTag(); uint64_t gpuAddress = TimestampPacketHelper::getContextEndGpuAddress(*tagNode); TimestampPacketDependencies timestampPacketDependencies; timestampPacketDependencies.barrierNodes.add(tagNode); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.barrierTimestampPacketNodes = ×tampPacketDependencies.barrierNodes; commandStreamReceiver->staticWorkPartitioningEnabled = false; commandStreamReceiver->activePartitions = 2; size_t expectedCmdSize = MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo); size_t estimatedCmdSize = commandStreamReceiver->getCmdSizeForStallingCommands(dispatchFlags); EXPECT_EQ(expectedCmdSize, estimatedCmdSize); commandStreamReceiver->programStallingCommandsForBarrier(commandStreamCSR, dispatchFlags); EXPECT_EQ(estimatedCmdSize, commandStreamCSR.getUsed()); parseCommands(commandStreamCSR, 0); findHardwareCommands(); auto cmdItor = cmdList.begin(); if (MemorySynchronizationCommands::isPipeControlWArequired(hwInfo)) { PIPE_CONTROL *pipeControl = genCmdCast(*cmdItor); ASSERT_NE(nullptr, pipeControl); cmdItor++; if (MemorySynchronizationCommands::getSizeForSingleAdditionalSynchronization(hwInfo) > 0) { cmdItor++; } } PIPE_CONTROL *pipeControl = genCmdCast(*cmdItor); ASSERT_NE(nullptr, pipeControl); EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pipeControl->getPostSyncOperation()); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); EXPECT_EQ(0u, pipeControl->getImmediateData()); EXPECT_EQ(gpuAddress, UnitTestHelper::getPipeControlPostSyncAddress(*pipeControl)); } HWTEST2_F(CommandStreamReceiverHwTestXeHPAndLater, givenStaticPartitionEnabledWhenMultiplePartitionsUsedThenExpectImplicitScalingPostSyncBarrierWithoutSelfCleanup, IsAtLeastXeHpCore) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using MI_ATOMIC = typename FamilyType::MI_ATOMIC; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto &hwInfo = pDevice->getHardwareInfo(); auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(commandStreamReceiver); auto &commandStreamCSR = commandStreamReceiver->getCS(); TagNodeBase *tagNode = commandStreamReceiver->getTimestampPacketAllocator()->getTag(); uint64_t gpuAddress = TimestampPacketHelper::getContextEndGpuAddress(*tagNode); TimestampPacketDependencies timestampPacketDependencies; timestampPacketDependencies.barrierNodes.add(tagNode); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.barrierTimestampPacketNodes = ×tampPacketDependencies.barrierNodes; commandStreamReceiver->staticWorkPartitioningEnabled = true; commandStreamReceiver->activePartitions = 2; size_t expectedSize = MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo) + sizeof(MI_ATOMIC) + sizeof(MI_SEMAPHORE_WAIT) + sizeof(MI_BATCH_BUFFER_START) + 2 * sizeof(uint32_t); size_t estimatedCmdSize = commandStreamReceiver->getCmdSizeForStallingCommands(dispatchFlags); EXPECT_EQ(expectedSize, estimatedCmdSize); commandStreamReceiver->programStallingCommandsForBarrier(commandStreamCSR, dispatchFlags); EXPECT_EQ(estimatedCmdSize, commandStreamCSR.getUsed()); EXPECT_EQ(2u, tagNode->getPacketsUsed()); parseCommands(commandStreamCSR, 0); findHardwareCommands(); auto cmdItor = cmdList.begin(); if (MemorySynchronizationCommands::isPipeControlWArequired(hwInfo)) { PIPE_CONTROL *pipeControl = genCmdCast(*cmdItor); ASSERT_NE(nullptr, pipeControl); cmdItor++; if (MemorySynchronizationCommands::getSizeForSingleAdditionalSynchronization(hwInfo) > 0) { cmdItor++; } } PIPE_CONTROL *pipeControl = genCmdCast(*cmdItor); ASSERT_NE(nullptr, pipeControl); EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pipeControl->getPostSyncOperation()); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); EXPECT_EQ(0u, pipeControl->getImmediateData()); EXPECT_EQ(gpuAddress, UnitTestHelper::getPipeControlPostSyncAddress(*pipeControl)); EXPECT_TRUE(pipeControl->getWorkloadPartitionIdOffsetEnable()); cmdItor++; if (MemorySynchronizationCommands::getSizeForSingleAdditionalSynchronization(hwInfo) > 0) { cmdItor++; } MI_ATOMIC *miAtomic = genCmdCast(*cmdItor); ASSERT_NE(nullptr, miAtomic); cmdItor++; MI_SEMAPHORE_WAIT *miSemaphore = genCmdCast(*cmdItor); ASSERT_NE(nullptr, miSemaphore); cmdItor++; MI_BATCH_BUFFER_START *bbStart = genCmdCast(*cmdItor); ASSERT_NE(nullptr, bbStart); } command_stream_receiver_mt_tests.cpp000066400000000000000000000420311422164147700340440ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/command_stream/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" using namespace NEO; struct CommandStreamReceiverMtTest : public ClDeviceFixture, public ::testing::Test { void SetUp() override { ClDeviceFixture::SetUp(); commandStreamReceiver = &pDevice->getGpgpuCommandStreamReceiver(); ASSERT_NE(nullptr, commandStreamReceiver); } void TearDown() override { ClDeviceFixture::TearDown(); } CommandStreamReceiver *commandStreamReceiver; }; HWTEST_F(CommandStreamReceiverMtTest, givenDebugPauseThreadWhenSettingFlagProgressThenFunctionAsksTwiceForConfirmation) { DebugManagerStateRestore restore; DebugManager.flags.PauseOnEnqueue.set(0); testing::internal::CaptureStdout(); int32_t executionStamp = 0; auto mockCSR = new MockCsr(executionStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); uint32_t confirmationCounter = 0; mockCSR->debugConfirmationFunction = [&confirmationCounter, &mockCSR]() { if (confirmationCounter == 0) { { std::unique_lock lock{mockCSR->debugPauseStateLock}; EXPECT_TRUE(DebugPauseState::waitingForUserStartConfirmation == *mockCSR->debugPauseStateAddress); } confirmationCounter++; } else if (confirmationCounter == 1) { { std::unique_lock lock{mockCSR->debugPauseStateLock}; EXPECT_TRUE(DebugPauseState::waitingForUserEndConfirmation == *mockCSR->debugPauseStateAddress); } confirmationCounter++; } }; pDevice->resetCommandStreamReceiver(mockCSR); { std::unique_lock lock{mockCSR->debugPauseStateLock}; *mockCSR->debugPauseStateAddress = DebugPauseState::waitingForUserStartConfirmation; } auto currentValue = DebugPauseState::waitingForUserStartConfirmation; while (currentValue != DebugPauseState::hasUserStartConfirmation) { std::unique_lock lock{mockCSR->debugPauseStateLock}; currentValue = *mockCSR->debugPauseStateAddress; } { std::unique_lock lock{mockCSR->debugPauseStateLock}; *mockCSR->debugPauseStateAddress = DebugPauseState::waitingForUserEndConfirmation; } while (currentValue != DebugPauseState::hasUserEndConfirmation) { std::unique_lock lock{mockCSR->debugPauseStateLock}; currentValue = *mockCSR->debugPauseStateAddress; } EXPECT_EQ(2u, confirmationCounter); auto output = testing::internal::GetCapturedStdout(); EXPECT_TRUE(hasSubstr(output, std::string("Debug break: Press enter to start workload"))); EXPECT_TRUE(hasSubstr(output, std::string("Debug break: Workload ended, press enter to continue"))); mockCSR->userPauseConfirmation->join(); mockCSR->userPauseConfirmation.reset(); } HWTEST_F(CommandStreamReceiverMtTest, givenDebugPauseThreadBeforeWalkerOnlyWhenSettingFlagProgressThenFunctionAsksOnceForConfirmation) { DebugManagerStateRestore restore; DebugManager.flags.PauseOnEnqueue.set(0); DebugManager.flags.PauseOnGpuMode.set(0); testing::internal::CaptureStdout(); int32_t executionStamp = 0; auto mockCSR = new MockCsr(executionStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); uint32_t confirmationCounter = 0; mockCSR->debugConfirmationFunction = [&confirmationCounter, &mockCSR]() { EXPECT_EQ(0u, confirmationCounter); EXPECT_TRUE(DebugPauseState::waitingForUserStartConfirmation == *mockCSR->debugPauseStateAddress); confirmationCounter++; }; pDevice->resetCommandStreamReceiver(mockCSR); { std::unique_lock lock{mockCSR->debugPauseStateLock}; *mockCSR->debugPauseStateAddress = DebugPauseState::waitingForUserStartConfirmation; } auto currentValue = DebugPauseState::waitingForUserStartConfirmation; while (currentValue != DebugPauseState::hasUserStartConfirmation) { std::unique_lock lock{mockCSR->debugPauseStateLock}; currentValue = *mockCSR->debugPauseStateAddress; } { std::unique_lock lock{mockCSR->debugPauseStateLock}; *mockCSR->debugPauseStateAddress = DebugPauseState::waitingForUserEndConfirmation; } EXPECT_EQ(1u, confirmationCounter); auto output = testing::internal::GetCapturedStdout(); EXPECT_TRUE(hasSubstr(output, std::string("Debug break: Press enter to start workload"))); EXPECT_FALSE(hasSubstr(output, std::string("Debug break: Workload ended, press enter to continue"))); mockCSR->userPauseConfirmation->join(); mockCSR->userPauseConfirmation.reset(); } HWTEST_F(CommandStreamReceiverMtTest, givenDebugPauseThreadAfterWalkerOnlyWhenSettingFlagProgressThenFunctionAsksOnceForConfirmation) { DebugManagerStateRestore restore; DebugManager.flags.PauseOnEnqueue.set(0); DebugManager.flags.PauseOnGpuMode.set(1); testing::internal::CaptureStdout(); int32_t executionStamp = 0; auto mockCSR = new MockCsr(executionStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); uint32_t confirmationCounter = 0; mockCSR->debugConfirmationFunction = [&confirmationCounter, &mockCSR]() { EXPECT_EQ(0u, confirmationCounter); EXPECT_TRUE(DebugPauseState::waitingForUserEndConfirmation == *mockCSR->debugPauseStateAddress); confirmationCounter++; }; pDevice->resetCommandStreamReceiver(mockCSR); { std::unique_lock lock{mockCSR->debugPauseStateLock}; *mockCSR->debugPauseStateAddress = DebugPauseState::waitingForUserEndConfirmation; } auto currentValue = DebugPauseState::waitingForUserEndConfirmation; while (currentValue != DebugPauseState::hasUserEndConfirmation) { std::unique_lock lock{mockCSR->debugPauseStateLock}; currentValue = *mockCSR->debugPauseStateAddress; } { std::unique_lock lock{mockCSR->debugPauseStateLock}; *mockCSR->debugPauseStateAddress = DebugPauseState::waitingForUserEndConfirmation; } EXPECT_EQ(1u, confirmationCounter); auto output = testing::internal::GetCapturedStdout(); EXPECT_FALSE(hasSubstr(output, std::string("Debug break: Press enter to start workload"))); EXPECT_TRUE(hasSubstr(output, std::string("Debug break: Workload ended, press enter to continue"))); mockCSR->userPauseConfirmation->join(); mockCSR->userPauseConfirmation.reset(); } HWTEST_F(CommandStreamReceiverMtTest, givenDebugPauseThreadOnEachEnqueueWhenSettingFlagProgressThenFunctionAsksMultipleTimesForConfirmation) { DebugManagerStateRestore restore; DebugManager.flags.PauseOnEnqueue.set(-2); testing::internal::CaptureStdout(); int32_t executionStamp = 0; auto mockCSR = new MockCsr(executionStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); uint32_t confirmationCounter = 0; mockCSR->debugConfirmationFunction = [&confirmationCounter, &mockCSR]() { if (confirmationCounter == 0) { { std::unique_lock lock{mockCSR->debugPauseStateLock}; EXPECT_TRUE(DebugPauseState::waitingForUserStartConfirmation == *mockCSR->debugPauseStateAddress); } confirmationCounter++; } else if (confirmationCounter == 1) { { std::unique_lock lock{mockCSR->debugPauseStateLock}; EXPECT_TRUE(DebugPauseState::waitingForUserEndConfirmation == *mockCSR->debugPauseStateAddress); } confirmationCounter++; } else if (confirmationCounter == 2) { { std::unique_lock lock{mockCSR->debugPauseStateLock}; EXPECT_TRUE(DebugPauseState::waitingForUserStartConfirmation == *mockCSR->debugPauseStateAddress); } confirmationCounter++; } else if (confirmationCounter == 3) { { std::unique_lock lock{mockCSR->debugPauseStateLock}; EXPECT_TRUE(DebugPauseState::waitingForUserEndConfirmation == *mockCSR->debugPauseStateAddress); } confirmationCounter++; DebugManager.flags.PauseOnEnqueue.set(-1); } }; pDevice->resetCommandStreamReceiver(mockCSR); { std::unique_lock lock{mockCSR->debugPauseStateLock}; *mockCSR->debugPauseStateAddress = DebugPauseState::waitingForUserStartConfirmation; } auto currentValue = DebugPauseState::waitingForUserStartConfirmation; while (currentValue != DebugPauseState::hasUserStartConfirmation) { std::unique_lock lock{mockCSR->debugPauseStateLock}; currentValue = *mockCSR->debugPauseStateAddress; } { std::unique_lock lock{mockCSR->debugPauseStateLock}; *mockCSR->debugPauseStateAddress = DebugPauseState::waitingForUserEndConfirmation; } while (currentValue != DebugPauseState::hasUserEndConfirmation) { std::unique_lock lock{mockCSR->debugPauseStateLock}; currentValue = *mockCSR->debugPauseStateAddress; } { std::unique_lock lock{mockCSR->debugPauseStateLock}; *mockCSR->debugPauseStateAddress = DebugPauseState::waitingForUserStartConfirmation; } while (currentValue != DebugPauseState::hasUserStartConfirmation) { std::unique_lock lock{mockCSR->debugPauseStateLock}; currentValue = *mockCSR->debugPauseStateAddress; } { std::unique_lock lock{mockCSR->debugPauseStateLock}; *mockCSR->debugPauseStateAddress = DebugPauseState::waitingForUserEndConfirmation; } while (currentValue != DebugPauseState::hasUserEndConfirmation) { std::unique_lock lock{mockCSR->debugPauseStateLock}; currentValue = *mockCSR->debugPauseStateAddress; } EXPECT_EQ(4u, confirmationCounter); auto output = testing::internal::GetCapturedStdout(); EXPECT_TRUE(hasSubstr(output, std::string("Debug break: Press enter to start workload"))); EXPECT_TRUE(hasSubstr(output, std::string("Debug break: Workload ended, press enter to continue"))); mockCSR->userPauseConfirmation->join(); mockCSR->userPauseConfirmation.reset(); } HWTEST_F(CommandStreamReceiverMtTest, givenDebugPauseThreadOnEachBlitWhenSettingFlagProgressThenFunctionAsksMultipleTimesForConfirmation) { DebugManagerStateRestore restore; DebugManager.flags.PauseOnBlitCopy.set(-2); testing::internal::CaptureStdout(); int32_t executionStamp = 0; auto mockCSR = new MockCsr(executionStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); uint32_t confirmationCounter = 0; mockCSR->debugConfirmationFunction = [&confirmationCounter, &mockCSR]() { if (confirmationCounter == 0) { { std::unique_lock lock{mockCSR->debugPauseStateLock}; EXPECT_TRUE(DebugPauseState::waitingForUserStartConfirmation == *mockCSR->debugPauseStateAddress); } confirmationCounter++; } else if (confirmationCounter == 1) { { std::unique_lock lock{mockCSR->debugPauseStateLock}; EXPECT_TRUE(DebugPauseState::waitingForUserEndConfirmation == *mockCSR->debugPauseStateAddress); } confirmationCounter++; } else if (confirmationCounter == 2) { { std::unique_lock lock{mockCSR->debugPauseStateLock}; EXPECT_TRUE(DebugPauseState::waitingForUserStartConfirmation == *mockCSR->debugPauseStateAddress); } confirmationCounter++; } else if (confirmationCounter == 3) { { std::unique_lock lock{mockCSR->debugPauseStateLock}; EXPECT_TRUE(DebugPauseState::waitingForUserEndConfirmation == *mockCSR->debugPauseStateAddress); } confirmationCounter++; DebugManager.flags.PauseOnBlitCopy.set(-1); } }; pDevice->resetCommandStreamReceiver(mockCSR); { std::unique_lock lock{mockCSR->debugPauseStateLock}; *mockCSR->debugPauseStateAddress = DebugPauseState::waitingForUserStartConfirmation; } auto currentValue = DebugPauseState::waitingForUserStartConfirmation; while (currentValue != DebugPauseState::hasUserStartConfirmation) { std::unique_lock lock{mockCSR->debugPauseStateLock}; currentValue = *mockCSR->debugPauseStateAddress; } { std::unique_lock lock{mockCSR->debugPauseStateLock}; *mockCSR->debugPauseStateAddress = DebugPauseState::waitingForUserEndConfirmation; } while (currentValue != DebugPauseState::hasUserEndConfirmation) { std::unique_lock lock{mockCSR->debugPauseStateLock}; currentValue = *mockCSR->debugPauseStateAddress; } { std::unique_lock lock{mockCSR->debugPauseStateLock}; *mockCSR->debugPauseStateAddress = DebugPauseState::waitingForUserStartConfirmation; } while (currentValue != DebugPauseState::hasUserStartConfirmation) { std::unique_lock lock{mockCSR->debugPauseStateLock}; currentValue = *mockCSR->debugPauseStateAddress; } { std::unique_lock lock{mockCSR->debugPauseStateLock}; *mockCSR->debugPauseStateAddress = DebugPauseState::waitingForUserEndConfirmation; } while (currentValue != DebugPauseState::hasUserEndConfirmation) { std::unique_lock lock{mockCSR->debugPauseStateLock}; currentValue = *mockCSR->debugPauseStateAddress; } EXPECT_EQ(4u, confirmationCounter); auto output = testing::internal::GetCapturedStdout(); EXPECT_TRUE(hasSubstr(output, std::string("Debug break: Press enter to start workload"))); EXPECT_TRUE(hasSubstr(output, std::string("Debug break: Workload ended, press enter to continue"))); mockCSR->userPauseConfirmation->join(); mockCSR->userPauseConfirmation.reset(); } HWTEST_F(CommandStreamReceiverMtTest, givenDebugPauseThreadWhenTerminatingAtFirstStageThenFunctionEndsCorrectly) { DebugManagerStateRestore restore; DebugManager.flags.PauseOnEnqueue.set(0); testing::internal::CaptureStdout(); int32_t executionStamp = 0; auto mockCSR = new MockCsr(executionStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); uint32_t confirmationCounter = 0; mockCSR->debugConfirmationFunction = [&confirmationCounter]() { confirmationCounter++; }; pDevice->resetCommandStreamReceiver(mockCSR); { std::unique_lock lock{mockCSR->debugPauseStateLock}; *mockCSR->debugPauseStateAddress = DebugPauseState::terminate; } EXPECT_EQ(0u, confirmationCounter); auto output = testing::internal::GetCapturedStdout(); EXPECT_EQ(0u, output.length()); mockCSR->userPauseConfirmation->join(); mockCSR->userPauseConfirmation.reset(); } HWTEST_F(CommandStreamReceiverMtTest, givenDebugPauseThreadWhenTerminatingAtSecondStageThenFunctionEndsCorrectly) { DebugManagerStateRestore restore; DebugManager.flags.PauseOnEnqueue.set(0); testing::internal::CaptureStdout(); int32_t executionStamp = 0; auto mockCSR = new MockCsr(executionStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); uint32_t confirmationCounter = 0; mockCSR->debugConfirmationFunction = [&confirmationCounter]() { confirmationCounter++; }; pDevice->resetCommandStreamReceiver(mockCSR); { std::unique_lock lock{mockCSR->debugPauseStateLock}; *mockCSR->debugPauseStateAddress = DebugPauseState::waitingForUserStartConfirmation; } auto currentValue = DebugPauseState::waitingForUserStartConfirmation; while (currentValue != DebugPauseState::hasUserStartConfirmation) { std::unique_lock lock{mockCSR->debugPauseStateLock}; currentValue = *mockCSR->debugPauseStateAddress; } { std::unique_lock lock{mockCSR->debugPauseStateLock}; *mockCSR->debugPauseStateAddress = DebugPauseState::terminate; } auto output = testing::internal::GetCapturedStdout(); EXPECT_TRUE(hasSubstr(output, std::string("Debug break: Press enter to start workload"))); EXPECT_FALSE(hasSubstr(output, std::string("Debug break: Workload ended, press enter to continue"))); EXPECT_EQ(1u, confirmationCounter); mockCSR->userPauseConfirmation->join(); mockCSR->userPauseConfirmation.reset(); } command_stream_receiver_with_aub_dump_tests.cpp000066400000000000000000001037661422164147700362700ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/command_stream/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/aub_command_stream_receiver_hw.h" #include "shared/source/command_stream/command_stream_receiver_with_aub_dump.h" #include "shared/source/command_stream/command_stream_receiver_with_aub_dump.inl" #include "shared/source/command_stream/preemption.h" #include "shared/source/command_stream/tbx_command_stream_receiver_hw.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/helpers/flush_stamp.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/os_interface/os_context.h" #include "shared/source/utilities/tag_allocator.h" #include "shared/test/common/fixtures/device_fixture.h" #include "shared/test/common/helpers/engine_descriptor_helper.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/mocks/mock_aub_center.h" #include "shared/test/common/mocks/mock_aub_csr.h" #include "shared/test/common/mocks/mock_aub_manager.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/mocks/mock_os_context.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/fixtures/mock_aub_center_fixture.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/mocks/mock_platform.h" using namespace NEO; struct MyMockCsr : UltCommandStreamReceiver { MyMockCsr(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield) : UltCommandStreamReceiver(executionEnvironment, rootDeviceIndex, deviceBitfield) { } SubmissionStatus flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override { flushParametrization.wasCalled = true; flushParametrization.receivedBatchBuffer = &batchBuffer; flushParametrization.receivedEngine = osContext->getEngineType(); flushParametrization.receivedAllocationsForResidency = &allocationsForResidency; processResidency(allocationsForResidency, 0u); flushStamp->setStamp(flushParametrization.flushStampToReturn); return SubmissionStatus::SUCCESS; } void makeResident(GraphicsAllocation &gfxAllocation) override { makeResidentParameterization.wasCalled = true; makeResidentParameterization.receivedGfxAllocation = &gfxAllocation; gfxAllocation.updateResidencyTaskCount(1, osContext->getContextId()); } void processResidency(const ResidencyContainer &allocationsForResidency, uint32_t handleId) override { processResidencyParameterization.wasCalled = true; processResidencyParameterization.receivedAllocationsForResidency = &allocationsForResidency; } void makeNonResident(GraphicsAllocation &gfxAllocation) override { if (gfxAllocation.isResident(this->osContext->getContextId())) { makeNonResidentParameterization.wasCalled = true; makeNonResidentParameterization.receivedGfxAllocation = &gfxAllocation; gfxAllocation.releaseResidencyInOsContext(this->osContext->getContextId()); } } AubSubCaptureStatus checkAndActivateAubSubCapture(const std::string &kernelName) override { checkAndActivateAubSubCaptureParameterization.wasCalled = true; checkAndActivateAubSubCaptureParameterization.kernelName = &kernelName; return {false, false}; } bool expectMemory(const void *gfxAddress, const void *srcAddress, size_t length, uint32_t compareOperation) override { expectMemoryParameterization.wasCalled = true; expectMemoryParameterization.gfxAddress = gfxAddress; expectMemoryParameterization.srcAddress = srcAddress; expectMemoryParameterization.length = length; expectMemoryParameterization.compareOperation = compareOperation; return true; } struct FlushParameterization { bool wasCalled = false; FlushStamp flushStampToReturn = 1; BatchBuffer *receivedBatchBuffer = nullptr; aub_stream::EngineType receivedEngine = aub_stream::ENGINE_RCS; ResidencyContainer *receivedAllocationsForResidency = nullptr; } flushParametrization; struct MakeResidentParameterization { bool wasCalled = false; GraphicsAllocation *receivedGfxAllocation = nullptr; } makeResidentParameterization; struct ProcessResidencyParameterization { bool wasCalled = false; const ResidencyContainer *receivedAllocationsForResidency = nullptr; } processResidencyParameterization; struct MakeNonResidentParameterization { bool wasCalled = false; GraphicsAllocation *receivedGfxAllocation = nullptr; } makeNonResidentParameterization; struct CheckAndActivateAubSubCaptureParameterization { bool wasCalled = false; const std::string *kernelName = nullptr; } checkAndActivateAubSubCaptureParameterization; struct ExpectMemoryParameterization { bool wasCalled = false; const void *gfxAddress = nullptr; const void *srcAddress = nullptr; size_t length = 0; uint32_t compareOperation = AubMemDump::CmdServicesMemTraceMemoryCompare::CompareOperationValues::CompareEqual; } expectMemoryParameterization; }; template struct MyMockCsrWithAubDump : CommandStreamReceiverWithAUBDump { MyMockCsrWithAubDump(bool createAubCSR, ExecutionEnvironment &executionEnvironment, const DeviceBitfield deviceBitfield) : CommandStreamReceiverWithAUBDump("aubfile", executionEnvironment, 0, deviceBitfield) { this->aubCSR.reset(createAubCSR ? new MyMockCsr(executionEnvironment, 0, deviceBitfield) : nullptr); } MyMockCsr &getAubMockCsr() const { return static_cast(*this->aubCSR); } }; struct CommandStreamReceiverWithAubDumpTest : public ::testing::TestWithParam, MockAubCenterFixture, DeviceFixture { void SetUp() override { DeviceFixture::SetUp(); MockAubCenterFixture::SetUp(); setMockAubCenter(pDevice->getRootDeviceEnvironmentRef()); executionEnvironment = pDevice->getExecutionEnvironment(); executionEnvironment->initializeMemoryManager(); memoryManager = executionEnvironment->memoryManager.get(); ASSERT_NE(nullptr, memoryManager); createAubCSR = GetParam(); DeviceBitfield deviceBitfield(1); csrWithAubDump = new MyMockCsrWithAubDump(createAubCSR, *executionEnvironment, deviceBitfield); ASSERT_NE(nullptr, csrWithAubDump); auto engineDescriptor = EngineDescriptorHelper::getDefaultDescriptor({getChosenEngineType(DEFAULT_TEST_PLATFORM::hwInfo), EngineUsage::Regular}, PreemptionHelper::getDefaultPreemptionMode(DEFAULT_TEST_PLATFORM::hwInfo)); auto osContext = executionEnvironment->memoryManager->createAndRegisterOsContext(csrWithAubDump, engineDescriptor); csrWithAubDump->setupContext(*osContext); } void TearDown() override { delete csrWithAubDump; MockAubCenterFixture::TearDown(); DeviceFixture::TearDown(); } ExecutionEnvironment *executionEnvironment; MyMockCsrWithAubDump *csrWithAubDump; MemoryManager *memoryManager; bool createAubCSR; }; struct CommandStreamReceiverWithAubDumpSimpleTest : Test, DeviceFixture { void SetUp() override { DeviceFixture::SetUp(); MockAubCenterFixture::SetUp(); setMockAubCenter(pDevice->getRootDeviceEnvironmentRef()); } void TearDown() override { MockAubCenterFixture::TearDown(); DeviceFixture::TearDown(); } }; HWTEST_F(CommandStreamReceiverWithAubDumpSimpleTest, givenCsrWithAubDumpWhenSettingOsContextThenReplicateItToAubCsr) { ExecutionEnvironment *executionEnvironment = pDevice->getExecutionEnvironment(); executionEnvironment->initializeMemoryManager(); DeviceBitfield deviceBitfield(1); CommandStreamReceiverWithAUBDump> csrWithAubDump("aubfile", *executionEnvironment, 0, deviceBitfield); auto hwInfo = executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo(); MockOsContext osContext(0, EngineDescriptorHelper::getDefaultDescriptor(HwHelper::get(hwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*hwInfo)[0], PreemptionHelper::getDefaultPreemptionMode(*hwInfo))); csrWithAubDump.setupContext(osContext); EXPECT_EQ(&osContext, &csrWithAubDump.getOsContext()); EXPECT_EQ(&osContext, &csrWithAubDump.aubCSR->getOsContext()); } HWTEST_F(CommandStreamReceiverWithAubDumpSimpleTest, givenAubManagerAvailableWhenTbxCsrWithAubDumpIsCreatedThenAubCsrIsNotCreated) { ExecutionEnvironment *executionEnvironment = pDevice->getExecutionEnvironment(); executionEnvironment->initializeMemoryManager(); std::string fileName = "file_name.aub"; MockAubManager *mockManager = new MockAubManager(); auto gmmHelper = executionEnvironment->rootDeviceEnvironments[0]->getGmmHelper(); MockAubCenter *mockAubCenter = new MockAubCenter(defaultHwInfo.get(), *gmmHelper, false, fileName, CommandStreamReceiverType::CSR_TBX_WITH_AUB); mockAubCenter->aubManager = std::unique_ptr(mockManager); executionEnvironment->rootDeviceEnvironments[0]->aubCenter = std::unique_ptr(mockAubCenter); DeviceBitfield deviceBitfield(1); CommandStreamReceiverWithAUBDump> csrWithAubDump("aubfile", *executionEnvironment, 0, deviceBitfield); ASSERT_EQ(nullptr, csrWithAubDump.aubCSR); EXPECT_EQ(CommandStreamReceiverType::CSR_TBX_WITH_AUB, csrWithAubDump.getType()); } HWTEST_F(CommandStreamReceiverWithAubDumpSimpleTest, givenAubManagerAvailableWhenHwCsrWithAubDumpIsCreatedThenAubCsrIsCreated) { ExecutionEnvironment *executionEnvironment = pDevice->getExecutionEnvironment(); executionEnvironment->initializeMemoryManager(); auto gmmHelper = executionEnvironment->rootDeviceEnvironments[0]->getGmmHelper(); std::string fileName = "file_name.aub"; MockAubManager *mockManager = new MockAubManager(); MockAubCenter *mockAubCenter = new MockAubCenter(defaultHwInfo.get(), *gmmHelper, false, fileName, CommandStreamReceiverType::CSR_HW_WITH_AUB); mockAubCenter->aubManager = std::unique_ptr(mockManager); executionEnvironment->rootDeviceEnvironments[0]->aubCenter = std::unique_ptr(mockAubCenter); DeviceBitfield deviceBitfield(1); CommandStreamReceiverWithAUBDump> csrWithAubDump("aubfile", *executionEnvironment, 0, deviceBitfield); ASSERT_NE(nullptr, csrWithAubDump.aubCSR); EXPECT_EQ(CommandStreamReceiverType::CSR_HW_WITH_AUB, csrWithAubDump.getType()); } HWTEST_F(CommandStreamReceiverWithAubDumpSimpleTest, givenCsrWithAubDumpWhenWaitingForTaskCountThenAddPollForCompletion) { auto executionEnvironment = pDevice->getExecutionEnvironment(); executionEnvironment->initializeMemoryManager(); auto gmmHelper = executionEnvironment->rootDeviceEnvironments[0]->getGmmHelper(); MockAubCenter *mockAubCenter = new MockAubCenter(defaultHwInfo.get(), *gmmHelper, false, "file_name.aub", CommandStreamReceiverType::CSR_HW_WITH_AUB); mockAubCenter->aubManager = std::unique_ptr(new MockAubManager()); executionEnvironment->rootDeviceEnvironments[0]->aubCenter = std::unique_ptr(mockAubCenter); DeviceBitfield deviceBitfield(1); CommandStreamReceiverWithAUBDump> csrWithAubDump("file_name.aub", *executionEnvironment, 0, deviceBitfield); csrWithAubDump.initializeTagAllocation(); auto mockAubCsr = new MockAubCsr("file_name.aub", false, *executionEnvironment, 0, deviceBitfield); mockAubCsr->initializeTagAllocation(); csrWithAubDump.aubCSR.reset(mockAubCsr); EXPECT_FALSE(mockAubCsr->pollForCompletionCalled); csrWithAubDump.waitForTaskCountWithKmdNotifyFallback(1, 0, false, QueueThrottle::MEDIUM); EXPECT_TRUE(mockAubCsr->pollForCompletionCalled); csrWithAubDump.aubCSR.reset(nullptr); csrWithAubDump.waitForTaskCountWithKmdNotifyFallback(1, 0, false, QueueThrottle::MEDIUM); } HWTEST_F(CommandStreamReceiverWithAubDumpSimpleTest, givenCsrWithAubDumpWhenPollForCompletionCalledThenAubCsrPollForCompletionCalled) { auto executionEnvironment = pDevice->getExecutionEnvironment(); executionEnvironment->initializeMemoryManager(); auto gmmHelper = executionEnvironment->rootDeviceEnvironments[0]->getGmmHelper(); MockAubCenter *mockAubCenter = new MockAubCenter(defaultHwInfo.get(), *gmmHelper, false, "file_name.aub", CommandStreamReceiverType::CSR_HW_WITH_AUB); mockAubCenter->aubManager = std::unique_ptr(new MockAubManager()); executionEnvironment->rootDeviceEnvironments[0]->aubCenter = std::unique_ptr(mockAubCenter); DeviceBitfield deviceBitfield(1); CommandStreamReceiverWithAUBDump> csrWithAubDump("file_name.aub", *executionEnvironment, 0, deviceBitfield); csrWithAubDump.initializeTagAllocation(); csrWithAubDump.aubCSR.reset(nullptr); csrWithAubDump.pollForCompletion(); auto mockAubCsr = new MockAubCsr("file_name.aub", false, *executionEnvironment, 0, deviceBitfield); mockAubCsr->initializeTagAllocation(); csrWithAubDump.aubCSR.reset(mockAubCsr); EXPECT_FALSE(mockAubCsr->pollForCompletionCalled); csrWithAubDump.pollForCompletion(); EXPECT_TRUE(mockAubCsr->pollForCompletionCalled); } HWTEST_P(CommandStreamReceiverWithAubDumpTest, givenCommandStreamReceiverWithAubDumpWhenExpectMemoryIsCalledThenBothCommandStreamReceiversAreCalled) { uint32_t compareOperation = AubMemDump::CmdServicesMemTraceMemoryCompare::CompareOperationValues::CompareEqual; uint8_t buffer[0x10000]{}; size_t length = sizeof(buffer); auto gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csrWithAubDump->getRootDeviceIndex(), false, length}, buffer); ASSERT_NE(nullptr, gfxAllocation); csrWithAubDump->makeResidentHostPtrAllocation(gfxAllocation); csrWithAubDump->expectMemory(reinterpret_cast(gfxAllocation->getGpuAddress()), buffer, length, compareOperation); EXPECT_TRUE(csrWithAubDump->expectMemoryParameterization.wasCalled); EXPECT_EQ(reinterpret_cast(gfxAllocation->getGpuAddress()), csrWithAubDump->expectMemoryParameterization.gfxAddress); EXPECT_EQ(buffer, csrWithAubDump->expectMemoryParameterization.srcAddress); EXPECT_EQ(length, csrWithAubDump->expectMemoryParameterization.length); EXPECT_EQ(compareOperation, csrWithAubDump->expectMemoryParameterization.compareOperation); if (createAubCSR) { EXPECT_TRUE(csrWithAubDump->getAubMockCsr().expectMemoryParameterization.wasCalled); EXPECT_EQ(reinterpret_cast(gfxAllocation->getGpuAddress()), csrWithAubDump->getAubMockCsr().expectMemoryParameterization.gfxAddress); EXPECT_EQ(buffer, csrWithAubDump->getAubMockCsr().expectMemoryParameterization.srcAddress); EXPECT_EQ(length, csrWithAubDump->getAubMockCsr().expectMemoryParameterization.length); EXPECT_EQ(compareOperation, csrWithAubDump->getAubMockCsr().expectMemoryParameterization.compareOperation); } memoryManager->freeGraphicsMemoryImpl(gfxAllocation); } HWTEST_F(CommandStreamReceiverWithAubDumpSimpleTest, givenCsrWithAubDumpWhenCreatingAubCsrThenInitializeTagAllocation) { auto executionEnvironment = pDevice->getExecutionEnvironment(); executionEnvironment->initializeMemoryManager(); auto gmmHelper = executionEnvironment->rootDeviceEnvironments[0]->getGmmHelper(); MockAubCenter *mockAubCenter = new MockAubCenter(defaultHwInfo.get(), *gmmHelper, false, "file_name.aub", CommandStreamReceiverType::CSR_HW_WITH_AUB); mockAubCenter->aubManager = std::unique_ptr(new MockAubManager()); executionEnvironment->rootDeviceEnvironments[0]->aubCenter = std::unique_ptr(mockAubCenter); uint32_t subDevicesCount = 4; DeviceBitfield deviceBitfield = maxNBitValue(subDevicesCount); CommandStreamReceiverWithAUBDump> csrWithAubDump("file_name.aub", *executionEnvironment, 0, deviceBitfield); EXPECT_NE(nullptr, csrWithAubDump.aubCSR->getTagAllocation()); EXPECT_NE(nullptr, csrWithAubDump.aubCSR->getTagAddress()); auto tagAddressToInitialize = csrWithAubDump.aubCSR->getTagAddress(); for (uint32_t i = 0; i < subDevicesCount; i++) { EXPECT_EQ(std::numeric_limits::max(), *tagAddressToInitialize); tagAddressToInitialize = ptrOffset(tagAddressToInitialize, csrWithAubDump.aubCSR->getPostSyncWriteOffset()); } } HWTEST_F(CommandStreamReceiverWithAubDumpSimpleTest, givenAubCsrWithHwWhenAddingCommentThenAddCommentToAubManager) { auto executionEnvironment = pDevice->getExecutionEnvironment(); executionEnvironment->initializeMemoryManager(); auto gmmHelper = executionEnvironment->rootDeviceEnvironments[0]->getGmmHelper(); MockAubCenter *mockAubCenter = new MockAubCenter(defaultHwInfo.get(), *gmmHelper, false, "file_name.aub", CommandStreamReceiverType::CSR_HW_WITH_AUB); auto mockAubManager = new MockAubManager(); mockAubCenter->aubManager.reset(mockAubManager); executionEnvironment->rootDeviceEnvironments[0]->aubCenter = std::unique_ptr(mockAubCenter); EXPECT_FALSE(mockAubManager->addCommentCalled); DeviceBitfield deviceBitfield(1); CommandStreamReceiverWithAUBDump> csrWithAubDump("file_name.aub", *executionEnvironment, 0, deviceBitfield); csrWithAubDump.addAubComment("test"); EXPECT_TRUE(mockAubManager->addCommentCalled); } HWTEST_F(CommandStreamReceiverWithAubDumpSimpleTest, givenAubCsrWithTbxWhenAddingCommentThenDontAddCommentToAubManager) { auto executionEnvironment = pDevice->getExecutionEnvironment(); executionEnvironment->initializeMemoryManager(); auto gmmHelper = executionEnvironment->rootDeviceEnvironments[0]->getGmmHelper(); MockAubCenter *mockAubCenter = new MockAubCenter(defaultHwInfo.get(), *gmmHelper, false, "file_name.aub", CommandStreamReceiverType::CSR_TBX_WITH_AUB); auto mockAubManager = new MockAubManager(); mockAubCenter->aubManager.reset(mockAubManager); executionEnvironment->rootDeviceEnvironments[0]->aubCenter = std::unique_ptr(mockAubCenter); DeviceBitfield deviceBitfield(1); CommandStreamReceiverWithAUBDump> csrWithAubDump("file_name.aub", *executionEnvironment, 0, deviceBitfield); csrWithAubDump.addAubComment("test"); EXPECT_FALSE(mockAubManager->addCommentCalled); } struct CommandStreamReceiverTagTests : public ::testing::Test { template using AubWithHw = CommandStreamReceiverWithAUBDump>; template using AubWithTbx = CommandStreamReceiverWithAUBDump>; template bool isTimestampPacketNodeReleasable(Args &&...args) { CsrT csr(std::forward(args)...); auto hwInfo = csr.peekExecutionEnvironment().rootDeviceEnvironments[0]->getHardwareInfo(); MockOsContext osContext(0, EngineDescriptorHelper::getDefaultDescriptor(HwHelper::get(hwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*hwInfo)[0], PreemptionHelper::getDefaultPreemptionMode(*hwInfo))); csr.setupContext(osContext); auto allocator = csr.getTimestampPacketAllocator(); auto tag = allocator->getTag(); typename FamilyType::TimestampPacketType zeros[4] = {}; for (uint32_t i = 0; i < TimestampPacketSizeControl::preferredPacketCount; i++) { tag->assignDataToAllTimestamps(i, zeros); } bool canBeReleased = tag->canBeReleased(); allocator->returnTag(tag); return canBeReleased; }; template size_t getPreferredTagPoolSize(Args &&...args) { CsrT csr(std::forward(args)...); return csr.getPreferredTagPoolSize(); }; void SetUp() override { executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->initializeMemoryManager(); auto gmmHelper = executionEnvironment->rootDeviceEnvironments[0]->getGmmHelper(); MockAubManager *mockManager = new MockAubManager(); MockAubCenter *mockAubCenter = new MockAubCenter(defaultHwInfo.get(), *gmmHelper, false, fileName, CommandStreamReceiverType::CSR_HW_WITH_AUB); mockAubCenter->aubManager = std::unique_ptr(mockManager); executionEnvironment->rootDeviceEnvironments[0]->aubCenter = std::unique_ptr(mockAubCenter); } const std::string fileName = "file_name.aub"; ExecutionEnvironment *executionEnvironment = nullptr; }; HWTEST_F(CommandStreamReceiverTagTests, givenCsrTypeWhenCreatingTimestampPacketAllocatorThenSetDefaultCompletionCheckType) { bool result = isTimestampPacketNodeReleasable, FamilyType>(*executionEnvironment, 0, 1); EXPECT_TRUE(result); result = isTimestampPacketNodeReleasable, FamilyType>(fileName, false, *executionEnvironment, 0, 1); EXPECT_FALSE(result); result = isTimestampPacketNodeReleasable, FamilyType>(fileName, *executionEnvironment, 0, 1); EXPECT_FALSE(result); result = isTimestampPacketNodeReleasable, FamilyType>(fileName, *executionEnvironment, 0, 1); EXPECT_FALSE(result); } HWTEST_F(CommandStreamReceiverTagTests, givenCsrTypeWhenAskingForTagPoolSizeThenReturnOneForAubTbxMode) { EXPECT_EQ(2048u, getPreferredTagPoolSize>(*executionEnvironment, 0, 1)); EXPECT_EQ(1u, getPreferredTagPoolSize>(fileName, false, *executionEnvironment, 0, 1)); EXPECT_EQ(1u, getPreferredTagPoolSize>(fileName, *executionEnvironment, 0, 1)); EXPECT_EQ(1u, getPreferredTagPoolSize>(fileName, *executionEnvironment, 0, 1)); } using SimulatedCsrTest = ::testing::Test; HWTEST_F(SimulatedCsrTest, givenHwWithAubDumpCsrTypeWhenCreateCommandStreamReceiverThenProperAubCenterIsInitialized) { uint32_t expectedRootDeviceIndex = 10; MockExecutionEnvironment executionEnvironment(defaultHwInfo.get(), true, expectedRootDeviceIndex + 2); executionEnvironment.initializeMemoryManager(); auto rootDeviceEnvironment = static_cast(executionEnvironment.rootDeviceEnvironments[expectedRootDeviceIndex].get()); rootDeviceEnvironment->setHwInfo(defaultHwInfo.get()); EXPECT_EQ(nullptr, executionEnvironment.rootDeviceEnvironments[expectedRootDeviceIndex]->aubCenter.get()); EXPECT_FALSE(rootDeviceEnvironment->initAubCenterCalled); DeviceBitfield deviceBitfield(1); auto csr = std::make_unique>>("", executionEnvironment, expectedRootDeviceIndex, deviceBitfield); EXPECT_TRUE(rootDeviceEnvironment->initAubCenterCalled); EXPECT_NE(nullptr, rootDeviceEnvironment->aubCenter.get()); } HWTEST_F(SimulatedCsrTest, givenTbxWithAubDumpCsrTypeWhenCreateCommandStreamReceiverThenProperAubCenterIsInitialized) { uint32_t expectedRootDeviceIndex = 10; MockExecutionEnvironment executionEnvironment(defaultHwInfo.get(), true, expectedRootDeviceIndex + 2); executionEnvironment.initializeMemoryManager(); auto rootDeviceEnvironment = new MockRootDeviceEnvironment(executionEnvironment); executionEnvironment.rootDeviceEnvironments[expectedRootDeviceIndex].reset(rootDeviceEnvironment); rootDeviceEnvironment->setHwInfo(defaultHwInfo.get()); EXPECT_EQ(nullptr, executionEnvironment.rootDeviceEnvironments[expectedRootDeviceIndex]->aubCenter.get()); EXPECT_FALSE(rootDeviceEnvironment->initAubCenterCalled); DeviceBitfield deviceBitfield(1); auto csr = std::make_unique>>("", executionEnvironment, expectedRootDeviceIndex, deviceBitfield); EXPECT_TRUE(rootDeviceEnvironment->initAubCenterCalled); EXPECT_NE(nullptr, rootDeviceEnvironment->aubCenter.get()); } HWTEST_F(CommandStreamReceiverWithAubDumpSimpleTest, givenNullAubManagerAvailableWhenTbxCsrWithAubDumpIsCreatedThenAubCsrIsCreated) { MockAubCenter *mockAubCenter = new MockAubCenter(); ExecutionEnvironment *executionEnvironment = pDevice->getExecutionEnvironment(); executionEnvironment->initializeMemoryManager(); executionEnvironment->rootDeviceEnvironments[0]->aubCenter = std::unique_ptr(mockAubCenter); DeviceBitfield deviceBitfield(1); CommandStreamReceiverWithAUBDump> csrWithAubDump("aubfile", *executionEnvironment, 0, deviceBitfield); EXPECT_NE(nullptr, csrWithAubDump.aubCSR); } HWTEST_F(CommandStreamReceiverWithAubDumpSimpleTest, givenAubManagerNotAvailableWhenHwCsrWithAubDumpIsCreatedThenAubCsrIsCreated) { std::string fileName = "file_name.aub"; MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); executionEnvironment.initializeMemoryManager(); DeviceBitfield deviceBitfield(1); CommandStreamReceiverWithAUBDump> csrWithAubDump("aubfile", executionEnvironment, 0, deviceBitfield); ASSERT_NE(nullptr, csrWithAubDump.aubCSR); } HWTEST_P(CommandStreamReceiverWithAubDumpTest, givenCommandStreamReceiverWithAubDumpWhenCtorIsCalledThenAubCsrIsInitialized) { if (createAubCSR) { EXPECT_NE(nullptr, csrWithAubDump->aubCSR); } else { EXPECT_EQ(nullptr, csrWithAubDump->aubCSR); } } HWTEST_P(CommandStreamReceiverWithAubDumpTest, givenCommandStreamReceiverWithAubDumpWhenFlushIsCalledThenBaseCsrFlushStampIsReturned) { GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csrWithAubDump->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; auto engineType = csrWithAubDump->getOsContext().getEngineType(); ResidencyContainer allocationsForResidency; csrWithAubDump->flush(batchBuffer, allocationsForResidency); EXPECT_EQ(csrWithAubDump->obtainCurrentFlushStamp(), csrWithAubDump->flushParametrization.flushStampToReturn); EXPECT_TRUE(csrWithAubDump->flushParametrization.wasCalled); EXPECT_EQ(&batchBuffer, csrWithAubDump->flushParametrization.receivedBatchBuffer); EXPECT_EQ(engineType, csrWithAubDump->flushParametrization.receivedEngine); EXPECT_EQ(&allocationsForResidency, csrWithAubDump->flushParametrization.receivedAllocationsForResidency); if (createAubCSR) { EXPECT_TRUE(csrWithAubDump->getAubMockCsr().flushParametrization.wasCalled); EXPECT_EQ(&batchBuffer, csrWithAubDump->getAubMockCsr().flushParametrization.receivedBatchBuffer); EXPECT_EQ(engineType, csrWithAubDump->getAubMockCsr().flushParametrization.receivedEngine); EXPECT_EQ(&allocationsForResidency, csrWithAubDump->getAubMockCsr().flushParametrization.receivedAllocationsForResidency); } memoryManager->freeGraphicsMemoryImpl(commandBuffer); } HWTEST_P(CommandStreamReceiverWithAubDumpTest, givenCommandStreamReceiverWithAubDumpWhenMakeResidentIsCalledThenBaseCsrMakeResidentIsCalled) { auto gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csrWithAubDump->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, gfxAllocation); csrWithAubDump->makeResident(*gfxAllocation); EXPECT_TRUE(csrWithAubDump->makeResidentParameterization.wasCalled); EXPECT_EQ(gfxAllocation, csrWithAubDump->makeResidentParameterization.receivedGfxAllocation); if (createAubCSR) { EXPECT_FALSE(csrWithAubDump->getAubMockCsr().makeResidentParameterization.wasCalled); EXPECT_EQ(nullptr, csrWithAubDump->getAubMockCsr().makeResidentParameterization.receivedGfxAllocation); } memoryManager->freeGraphicsMemoryImpl(gfxAllocation); } HWTEST_P(CommandStreamReceiverWithAubDumpTest, givenCommandStreamReceiverWithAubDumpWhenFlushIsCalledThenBothBaseAndAubCsrProcessResidencyIsCalled) { GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csrWithAubDump->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; auto gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csrWithAubDump->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, gfxAllocation); ResidencyContainer allocationsForResidency = {gfxAllocation}; csrWithAubDump->flush(batchBuffer, allocationsForResidency); EXPECT_EQ(csrWithAubDump->obtainCurrentFlushStamp(), csrWithAubDump->flushParametrization.flushStampToReturn); EXPECT_TRUE(csrWithAubDump->processResidencyParameterization.wasCalled); EXPECT_EQ(&allocationsForResidency, csrWithAubDump->processResidencyParameterization.receivedAllocationsForResidency); if (createAubCSR) { EXPECT_TRUE(csrWithAubDump->getAubMockCsr().processResidencyParameterization.wasCalled); EXPECT_EQ(&allocationsForResidency, csrWithAubDump->getAubMockCsr().processResidencyParameterization.receivedAllocationsForResidency); } memoryManager->freeGraphicsMemoryImpl(gfxAllocation); memoryManager->freeGraphicsMemoryImpl(commandBuffer); } HWTEST_P(CommandStreamReceiverWithAubDumpTest, givenCommandStreamReceiverWithAubDumpWhenFlushIsCalledThenLatestSentTaskCountShouldBeUpdatedForAubCsr) { GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csrWithAubDump->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; ResidencyContainer allocationsForResidency; EXPECT_EQ(0u, csrWithAubDump->peekLatestSentTaskCount()); if (createAubCSR) { EXPECT_EQ(0u, csrWithAubDump->getAubMockCsr().peekLatestSentTaskCount()); } csrWithAubDump->setLatestSentTaskCount(1u); csrWithAubDump->flush(batchBuffer, allocationsForResidency); EXPECT_EQ(1u, csrWithAubDump->peekLatestSentTaskCount()); if (createAubCSR) { EXPECT_EQ(csrWithAubDump->peekLatestSentTaskCount(), csrWithAubDump->getAubMockCsr().peekLatestSentTaskCount()); EXPECT_EQ(csrWithAubDump->peekLatestSentTaskCount(), csrWithAubDump->getAubMockCsr().peekLatestFlushedTaskCount()); } memoryManager->freeGraphicsMemoryImpl(commandBuffer); } HWTEST_P(CommandStreamReceiverWithAubDumpTest, givenCommandStreamReceiverWithAubDumpWhenMakeNonResidentIsCalledThenBothBaseAndAubCsrMakeNonResidentIsCalled) { auto gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csrWithAubDump->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, gfxAllocation); csrWithAubDump->makeResident(*gfxAllocation); csrWithAubDump->makeNonResident(*gfxAllocation); EXPECT_TRUE(csrWithAubDump->makeNonResidentParameterization.wasCalled); EXPECT_EQ(gfxAllocation, csrWithAubDump->makeNonResidentParameterization.receivedGfxAllocation); EXPECT_FALSE(gfxAllocation->isResident(csrWithAubDump->getOsContext().getContextId())); if (createAubCSR) { EXPECT_TRUE(csrWithAubDump->getAubMockCsr().makeNonResidentParameterization.wasCalled); EXPECT_EQ(gfxAllocation, csrWithAubDump->getAubMockCsr().makeNonResidentParameterization.receivedGfxAllocation); } memoryManager->freeGraphicsMemoryImpl(gfxAllocation); } HWTEST_P(CommandStreamReceiverWithAubDumpTest, givenCommandStreamReceiverWithAubDumpWhenCheckAndActivateAubSubCaptureIsCalledThenBaseCsrCommandStreamReceiverIsCalled) { std::string kernelName = ""; csrWithAubDump->checkAndActivateAubSubCapture(kernelName); EXPECT_TRUE(csrWithAubDump->checkAndActivateAubSubCaptureParameterization.wasCalled); EXPECT_EQ(&kernelName, csrWithAubDump->checkAndActivateAubSubCaptureParameterization.kernelName); if (createAubCSR) { EXPECT_TRUE(csrWithAubDump->getAubMockCsr().checkAndActivateAubSubCaptureParameterization.wasCalled); EXPECT_EQ(&kernelName, csrWithAubDump->getAubMockCsr().checkAndActivateAubSubCaptureParameterization.kernelName); } } HWTEST_P(CommandStreamReceiverWithAubDumpTest, givenCommandStreamReceiverWithAubDumpWhenCreateMemoryManagerIsCalledThenItIsUsedByBothBaseAndAubCsr) { EXPECT_EQ(memoryManager, csrWithAubDump->getMemoryManager()); if (createAubCSR) { EXPECT_EQ(memoryManager, csrWithAubDump->aubCSR->getMemoryManager()); } } static bool createAubCSR[] = { false, true}; INSTANTIATE_TEST_CASE_P( CommandStreamReceiverWithAubDumpTest_Create, CommandStreamReceiverWithAubDumpTest, testing::ValuesIn(createAubCSR)); create_command_stream_receiver_tests.cpp000066400000000000000000000044151422164147700346730ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/command_stream/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/command_stream_receiver_with_aub_dump.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/memory_manager/os_agnostic_memory_manager.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/execution_environment_helper.h" #include "shared/test/common/helpers/ult_hw_config.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/libult/create_command_stream.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/fixtures/mock_aub_center_fixture.h" using namespace NEO; struct CreateCommandStreamReceiverTest : public ::testing::TestWithParam {}; HWTEST_P(CreateCommandStreamReceiverTest, givenCreateCommandStreamWhenCsrIsSetToValidTypeThenTheFuntionReturnsCommandStreamReceiver) { DebugManagerStateRestore stateRestorer; HardwareInfo *hwInfo = nullptr; ExecutionEnvironment *executionEnvironment = getExecutionEnvironmentImpl(hwInfo, 1); ASSERT_NE(nullptr, executionEnvironment->memoryManager.get()); executionEnvironment->incRefInternal(); MockAubCenterFixture::setMockAubCenter(*executionEnvironment->rootDeviceEnvironments[0]); CommandStreamReceiverType csrType = GetParam(); VariableBackup backup(&ultHwConfig); ultHwConfig.useHwCsr = true; DebugManager.flags.SetCommandStreamReceiver.set(csrType); { auto csr = std::unique_ptr(createCommandStream(*executionEnvironment, 0, 1)); if (csrType < CommandStreamReceiverType::CSR_TYPES_NUM) { EXPECT_NE(nullptr, csr.get()); } else { EXPECT_EQ(nullptr, csr.get()); } } executionEnvironment->decRefInternal(); } static CommandStreamReceiverType commandStreamReceiverTypes[] = { CSR_HW, CSR_AUB, CSR_TBX, CSR_HW_WITH_AUB, CSR_TBX_WITH_AUB, CSR_TYPES_NUM}; INSTANTIATE_TEST_CASE_P( CreateCommandStreamReceiverTest_Create, CreateCommandStreamReceiverTest, testing::ValuesIn(commandStreamReceiverTypes)); experimental_command_buffer_tests.cpp000066400000000000000000000463431422164147700342250ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/command_stream/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/constants.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_experimental_command_buffer.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h" #include "gtest/gtest.h" using namespace NEO; struct ExperimentalCommandBufferTest : public UltCommandStreamReceiverTest { void SetUp() override { dbgRestore.reset(new DebugManagerStateRestore()); DebugManager.flags.EnableExperimentalCommandBuffer.set(1); UltCommandStreamReceiverTest::SetUp(); } std::unique_ptr dbgRestore; }; struct MockExperimentalCommandBufferTest : public UltCommandStreamReceiverTest { void SetUp() override { UltCommandStreamReceiverTest::SetUp(); pDevice->getGpgpuCommandStreamReceiver().setExperimentalCmdBuffer( std::unique_ptr(new MockExperimentalCommandBuffer(&pDevice->getGpgpuCommandStreamReceiver()))); } }; HWTEST_F(MockExperimentalCommandBufferTest, givenEnabledExperimentalCmdBufferWhenCsrIsFlushedThenExpectProperlyFilledExperimentalCmdBuffer) { using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.storeMakeResidentAllocations = true; MockExperimentalCommandBuffer *mockExCmdBuffer = static_cast(commandStreamReceiver.experimentalCmdBuffer.get()); flushTask(commandStreamReceiver); ASSERT_NE(nullptr, mockExCmdBuffer->currentStream.get()); ASSERT_NE(nullptr, mockExCmdBuffer->currentStream->getGraphicsAllocation()); uint64_t exCmdBufferGpuAddr = mockExCmdBuffer->currentStream->getGraphicsAllocation()->getGpuAddress(); EXPECT_TRUE(commandStreamReceiver.isMadeResident(mockExCmdBuffer->currentStream->getGraphicsAllocation())); ASSERT_NE(nullptr, mockExCmdBuffer->experimentalAllocation); uint64_t exAllocationGpuAddr = mockExCmdBuffer->experimentalAllocation->getGpuAddress(); EXPECT_TRUE(commandStreamReceiver.isMadeResident(mockExCmdBuffer->experimentalAllocation)); ASSERT_NE(nullptr, mockExCmdBuffer->timestamps); EXPECT_TRUE(commandStreamReceiver.isMadeResident(mockExCmdBuffer->timestamps)); constexpr uint32_t expectedTsOffset = 2 * sizeof(uint64_t); EXPECT_EQ(expectedTsOffset, mockExCmdBuffer->timestampsOffset); constexpr uint32_t expectedExOffset = 0; EXPECT_EQ(expectedExOffset, mockExCmdBuffer->experimentalAllocationOffset); constexpr uint32_t expectedSemaphoreVal = 1; uintptr_t actualSemaphoreAddr = reinterpret_cast(mockExCmdBuffer->experimentalAllocation->getUnderlyingBuffer()) + mockExCmdBuffer->experimentalAllocationOffset; uint32_t *actualSemaphoreVal = reinterpret_cast(actualSemaphoreAddr); EXPECT_EQ(expectedSemaphoreVal, *actualSemaphoreVal); HardwareParse hwParserCsr; hwParserCsr.parseCommands(commandStreamReceiver.commandStream, 0); GenCmdList bbList = hwParserCsr.getCommandsList(); MI_BATCH_BUFFER_START *bbStart = nullptr; GenCmdList::iterator it = bbList.begin(); ASSERT_NE(bbList.end(), it); bbStart = reinterpret_cast(*it); ASSERT_NE(nullptr, bbStart); EXPECT_EQ(exCmdBufferGpuAddr, bbStart->getBatchBufferStartAddress()); EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer()); MI_BATCH_BUFFER_END *bbEnd = nullptr; PIPE_CONTROL *pipeControl = nullptr; MI_SEMAPHORE_WAIT *semaphoreCmd = nullptr; HardwareParse hwParserExCmdBuffer; hwParserExCmdBuffer.parseCommands(*mockExCmdBuffer->currentStream, 0); it = hwParserExCmdBuffer.cmdList.begin(); GenCmdList::iterator end = hwParserExCmdBuffer.cmdList.end(); if (MemorySynchronizationCommands::isPipeControlWArequired(pDevice->getHardwareInfo())) { //1st PIPE_CONTROL with CS Stall ASSERT_NE(end, it); pipeControl = genCmdCast(*it); ASSERT_NE(nullptr, pipeControl); EXPECT_EQ(1u, pipeControl->getCommandStreamerStallEnable()); it++; if (UnitTestHelper::isAdditionalSynchronizationRequired()) { it++; } } //2nd PIPE_CONTROL with ts addr uint64_t timeStampAddress = mockExCmdBuffer->timestamps->getGpuAddress(); ASSERT_NE(end, it); pipeControl = genCmdCast(*it); ASSERT_NE(nullptr, pipeControl); EXPECT_EQ(1u, pipeControl->getCommandStreamerStallEnable()); EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, pipeControl->getPostSyncOperation()); EXPECT_EQ(timeStampAddress, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*pipeControl)); if (UnitTestHelper::isAdditionalSynchronizationRequired()) { it++; } //MI_SEMAPHORE_WAIT it++; ASSERT_NE(end, it); semaphoreCmd = genCmdCast(*it); ASSERT_NE(nullptr, semaphoreCmd); EXPECT_EQ(expectedSemaphoreVal, semaphoreCmd->getSemaphoreDataDword()); EXPECT_EQ(exAllocationGpuAddr, semaphoreCmd->getSemaphoreGraphicsAddress()); EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION_SAD_EQUAL_SDD, semaphoreCmd->getCompareOperation()); if (MemorySynchronizationCommands::isPipeControlWArequired(pDevice->getHardwareInfo())) { //3rd PIPE_CONTROL with CS stall it++; ASSERT_NE(end, it); pipeControl = genCmdCast(*it); ASSERT_NE(nullptr, pipeControl); EXPECT_EQ(1u, pipeControl->getCommandStreamerStallEnable()); if (UnitTestHelper::isAdditionalSynchronizationRequired()) { it++; } } //4th PIPE_CONTROL with ts addr timeStampAddress = mockExCmdBuffer->timestamps->getGpuAddress() + sizeof(uint64_t); it++; ASSERT_NE(end, it); pipeControl = genCmdCast(*it); ASSERT_NE(nullptr, pipeControl); EXPECT_EQ(1u, pipeControl->getCommandStreamerStallEnable()); EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, pipeControl->getPostSyncOperation()); EXPECT_EQ(timeStampAddress, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*pipeControl)); if (UnitTestHelper::isAdditionalSynchronizationRequired()) { it++; } //BB_END it++; ASSERT_NE(end, it); bbEnd = genCmdCast(*it); ASSERT_NE(nullptr, bbEnd); } HWTEST_F(MockExperimentalCommandBufferTest, givenEnabledExperimentalCmdBufferWhenCsrIsNotFlushedThenExperimentalBufferLinearStreamIsNotCreatedAndCmdBufferCommandsHaveProperlyOffsetedAddresses) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.storeMakeResidentAllocations = true; MockExperimentalCommandBuffer *mockExCmdBuffer = static_cast(commandStreamReceiver.experimentalCmdBuffer.get()); EXPECT_EQ(nullptr, mockExCmdBuffer->currentStream.get()); EXPECT_NE(nullptr, mockExCmdBuffer->experimentalAllocation); EXPECT_FALSE(commandStreamReceiver.isMadeResident(mockExCmdBuffer->experimentalAllocation)); EXPECT_NE(nullptr, mockExCmdBuffer->timestamps); EXPECT_FALSE(commandStreamReceiver.isMadeResident(mockExCmdBuffer->timestamps)); constexpr uint32_t expectedTsOffset = 0; EXPECT_EQ(expectedTsOffset, mockExCmdBuffer->timestampsOffset); constexpr uint32_t expectedExOffset = 0; EXPECT_EQ(expectedExOffset, mockExCmdBuffer->experimentalAllocationOffset); } HWTEST_F(MockExperimentalCommandBufferTest, givenEnabledExperimentalCmdBufferWhenCsrIsFlushedTwiceThenExpectProperlyFilledExperimentalCmdBufferAndTimestampOffset) { using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.storeMakeResidentAllocations = true; MockExperimentalCommandBuffer *mockExCmdBuffer = static_cast(commandStreamReceiver.experimentalCmdBuffer.get()); flushTask(commandStreamReceiver); size_t csrCmdBufferOffset = commandStreamReceiver.commandStream.getUsed(); ASSERT_NE(nullptr, mockExCmdBuffer->currentStream.get()); ASSERT_NE(nullptr, mockExCmdBuffer->currentStream->getGraphicsAllocation()); uint64_t exCmdBufferGpuAddr = mockExCmdBuffer->currentStream->getGraphicsAllocation()->getGpuAddress(); EXPECT_TRUE(commandStreamReceiver.isMadeResident(mockExCmdBuffer->currentStream->getGraphicsAllocation())); ASSERT_NE(nullptr, mockExCmdBuffer->experimentalAllocation); EXPECT_TRUE(commandStreamReceiver.isMadeResident(mockExCmdBuffer->experimentalAllocation)); ASSERT_NE(nullptr, mockExCmdBuffer->timestamps); EXPECT_TRUE(commandStreamReceiver.isMadeResident(mockExCmdBuffer->timestamps)); size_t cmbBufferOffset = mockExCmdBuffer->currentStream->getUsed(); flushTask(commandStreamReceiver); //two pairs of TS constexpr uint32_t expectedTsOffset = 4 * sizeof(uint64_t); EXPECT_EQ(expectedTsOffset, mockExCmdBuffer->timestampsOffset); constexpr uint32_t expectedExOffset = 0; EXPECT_EQ(expectedExOffset, mockExCmdBuffer->experimentalAllocationOffset); HardwareParse hwParserCsr; hwParserCsr.parseCommands(commandStreamReceiver.commandStream, csrCmdBufferOffset); GenCmdList bbList = hwParserCsr.getCommandsList(); MI_BATCH_BUFFER_START *bbStart = nullptr; exCmdBufferGpuAddr += cmbBufferOffset; GenCmdList::iterator it = bbList.begin(); ASSERT_NE(bbList.end(), it); bbStart = reinterpret_cast(*it); ASSERT_NE(nullptr, bbStart); EXPECT_EQ(exCmdBufferGpuAddr, bbStart->getBatchBufferStartAddress()); EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer()); PIPE_CONTROL *pipeControl = nullptr; HardwareParse hwParserExCmdBuffer; hwParserExCmdBuffer.parseCommands(*mockExCmdBuffer->currentStream, cmbBufferOffset); it = hwParserExCmdBuffer.cmdList.begin(); GenCmdList::iterator end = hwParserExCmdBuffer.cmdList.end(); if (MemorySynchronizationCommands::isPipeControlWArequired(pDevice->getHardwareInfo())) { it++; if (UnitTestHelper::isAdditionalSynchronizationRequired()) { it++; } } //2nd PIPE_CONTROL uint64_t timeStampAddress = mockExCmdBuffer->timestamps->getGpuAddress() + 2 * sizeof(uint64_t); ASSERT_NE(end, it); pipeControl = genCmdCast(*it); ASSERT_NE(nullptr, pipeControl); EXPECT_EQ(1u, pipeControl->getCommandStreamerStallEnable()); EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, pipeControl->getPostSyncOperation()); EXPECT_EQ(timeStampAddress, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*pipeControl)); //omit SEMAPHORE_WAIT and 3rd PIPE_CONTROL if (MemorySynchronizationCommands::isPipeControlWArequired(pDevice->getHardwareInfo())) { it++; if (UnitTestHelper::isAdditionalSynchronizationRequired()) { it++; } } it++; //get 4th PIPE_CONTROL timeStampAddress = mockExCmdBuffer->timestamps->getGpuAddress() + 3 * sizeof(uint64_t); it++; if (UnitTestHelper::isAdditionalSynchronizationRequired()) { it++; } ASSERT_NE(end, it); pipeControl = genCmdCast(*it); ASSERT_NE(nullptr, pipeControl); EXPECT_EQ(1u, pipeControl->getCommandStreamerStallEnable()); EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, pipeControl->getPostSyncOperation()); EXPECT_EQ(timeStampAddress, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*pipeControl)); } HWTEST_F(MockExperimentalCommandBufferTest, givenEnabledExperimentalCmdBufferWhenMemoryManagerAlreadyStoresAllocationThenUseItForLinearSteam) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto storage = commandStreamReceiver.getInternalAllocationStorage(); commandStreamReceiver.storeMakeResidentAllocations = true; MemoryManager *memoryManager = commandStreamReceiver.getMemoryManager(); //Make two allocations, since CSR will try to reuse it also auto rootDeviceIndex = pDevice->getRootDeviceIndex(); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties({rootDeviceIndex, 3 * MemoryConstants::pageSize64k, AllocationType::COMMAND_BUFFER, pDevice->getDeviceBitfield()}); storage->storeAllocation(std::unique_ptr(allocation), REUSABLE_ALLOCATION); allocation = memoryManager->allocateGraphicsMemoryWithProperties({rootDeviceIndex, 3 * MemoryConstants::pageSize64k, AllocationType::COMMAND_BUFFER, pDevice->getDeviceBitfield()}); storage->storeAllocation(std::unique_ptr(allocation), REUSABLE_ALLOCATION); MockExperimentalCommandBuffer *mockExCmdBuffer = static_cast(commandStreamReceiver.experimentalCmdBuffer.get()); flushTask(commandStreamReceiver); ASSERT_NE(nullptr, mockExCmdBuffer->currentStream.get()); ASSERT_NE(nullptr, mockExCmdBuffer->currentStream->getGraphicsAllocation()); EXPECT_EQ(allocation->getUnderlyingBuffer(), mockExCmdBuffer->currentStream->getGraphicsAllocation()->getUnderlyingBuffer()); EXPECT_TRUE(commandStreamReceiver.isMadeResident(mockExCmdBuffer->currentStream->getGraphicsAllocation())); } HWTEST_F(MockExperimentalCommandBufferTest, givenEnabledExperimentalCmdBufferWhenLinearStreamIsExhaustedThenStoreOldAllocationForReuseAndObtainNewAllocationForLinearStream) { using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.storeMakeResidentAllocations = true; MockExperimentalCommandBuffer *mockExCmdBuffer = static_cast(commandStreamReceiver.experimentalCmdBuffer.get()); flushTask(commandStreamReceiver); size_t csrCmdBufferOffset = commandStreamReceiver.commandStream.getUsed(); ASSERT_NE(nullptr, mockExCmdBuffer->currentStream.get()); ASSERT_NE(nullptr, mockExCmdBuffer->currentStream->getGraphicsAllocation()); uintptr_t oldCmdBufferAddress = reinterpret_cast(mockExCmdBuffer->currentStream->getGraphicsAllocation()); uint64_t oldExCmdBufferGpuAddr = mockExCmdBuffer->currentStream->getGraphicsAllocation()->getGpuAddress(); //leave space for single DWORD mockExCmdBuffer->currentStream->getSpace(mockExCmdBuffer->currentStream->getAvailableSpace() - sizeof(uint32_t)); HardwareParse hwParserCsr; hwParserCsr.parseCommands(commandStreamReceiver.commandStream, 0); GenCmdList bbList = hwParserCsr.getCommandsList(); MI_BATCH_BUFFER_START *bbStart = nullptr; GenCmdList::iterator it = bbList.begin(); ASSERT_NE(bbList.end(), it); bbStart = reinterpret_cast(*it); ASSERT_NE(nullptr, bbStart); EXPECT_EQ(oldExCmdBufferGpuAddr, bbStart->getBatchBufferStartAddress()); EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer()); flushTask(commandStreamReceiver); ASSERT_NE(nullptr, mockExCmdBuffer->currentStream.get()); ASSERT_NE(nullptr, mockExCmdBuffer->currentStream->getGraphicsAllocation()); EXPECT_TRUE(commandStreamReceiver.isMadeResident(mockExCmdBuffer->currentStream->getGraphicsAllocation())); uintptr_t newCmdBufferAddress = reinterpret_cast(mockExCmdBuffer->currentStream->getGraphicsAllocation()); uint64_t newExCmdBufferGpuAddr = mockExCmdBuffer->currentStream->getGraphicsAllocation()->getGpuAddress(); EXPECT_NE(oldCmdBufferAddress, newCmdBufferAddress); EXPECT_NE(oldExCmdBufferGpuAddr, newExCmdBufferGpuAddr); hwParserCsr.TearDown(); hwParserCsr.parseCommands(commandStreamReceiver.commandStream, csrCmdBufferOffset); bbList = hwParserCsr.getCommandsList(); bbStart = nullptr; it = bbList.begin(); ASSERT_NE(bbList.end(), it); bbStart = reinterpret_cast(*it); ASSERT_NE(nullptr, bbStart); EXPECT_EQ(newExCmdBufferGpuAddr, bbStart->getBatchBufferStartAddress()); EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer()); } HWTEST_F(ExperimentalCommandBufferTest, givenEnabledExperimentalCmdBufferWhenCommandStreamReceiverIsCreatedThenExperimentalCmdBufferIsNotNull) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); EXPECT_NE(nullptr, commandStreamReceiver.experimentalCmdBuffer.get()); } HWTEST_F(ExperimentalCommandBufferTest, givenEnabledExperimentalCmdBufferWhenCommandStreamReceiverIsFlushedThenExpectPrintAfterDtor) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); flushTask(commandStreamReceiver); //forced dtor to get printed timestamps testing::internal::CaptureStdout(); commandStreamReceiver.setExperimentalCmdBuffer(std::move(std::unique_ptr(nullptr))); std::string output = testing::internal::GetCapturedStdout(); EXPECT_STRNE(output.c_str(), ""); } HWTEST_F(ExperimentalCommandBufferTest, givenEnabledExperimentalCmdBufferWhenCommandStreamReceiverIsNotFlushedThenExpectNoPrintAfterDtor) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); //forced dtor to try to get printed timestamps testing::internal::CaptureStdout(); commandStreamReceiver.setExperimentalCmdBuffer(std::move(std::unique_ptr(nullptr))); std::string output = testing::internal::GetCapturedStdout(); EXPECT_STREQ(output.c_str(), ""); } using ExperimentalCommandBufferRootDeviceIndexTest = MultiRootDeviceFixture; TEST_F(ExperimentalCommandBufferRootDeviceIndexTest, GivenExperimentalCommandBufferGraphicsAllocationsThenItHasCorrectRootDeviceIndex) { auto experimentalCommandBuffer = std::make_unique(&device1->getGpgpuCommandStreamReceiver()); ASSERT_NE(nullptr, experimentalCommandBuffer); EXPECT_EQ(expectedRootDeviceIndex, experimentalCommandBuffer->experimentalAllocation->getRootDeviceIndex()); EXPECT_EQ(expectedRootDeviceIndex, experimentalCommandBuffer->timestamps->getRootDeviceIndex()); } compute-runtime-22.14.22890/opencl/test/unit_test/command_stream/get_devices_tests.cpp000066400000000000000000000266501422164147700310400ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/memory_manager/os_agnostic_memory_manager.h" #include "shared/source/os_interface/device_factory.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/helpers/ult_hw_config.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/libult/create_command_stream.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/mocks/mock_platform.h" namespace NEO { bool operator==(const HardwareInfo &hwInfoIn, const HardwareInfo &hwInfoOut) { bool result = (0 == memcmp(&hwInfoIn.platform, &hwInfoOut.platform, sizeof(PLATFORM))); result &= (hwInfoIn.featureTable.asHash() == hwInfoOut.featureTable.asHash()); result &= (hwInfoIn.workaroundTable.asHash() == hwInfoOut.workaroundTable.asHash()); result &= (hwInfoIn.capabilityTable == hwInfoOut.capabilityTable); return result; } TEST(PrepareDeviceEnvironmentTest, givenPrepareDeviceEnvironmentWhenCsrIsSetToVariousTypesThenFunctionReturnsExpectedValueOfHardwareInfo) { const HardwareInfo *hwInfo = nullptr; VariableBackup backup{&ultHwConfig}; DebugManagerStateRestore stateRestorer; ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false; uint32_t expectedDevices = 1; DebugManager.flags.CreateMultipleRootDevices.set(expectedDevices); for (int productFamilyIndex = 0; productFamilyIndex < IGFX_MAX_PRODUCT; productFamilyIndex++) { const char *hwPrefix = hardwarePrefix[productFamilyIndex]; auto hwInfoConfig = hwInfoConfigFactory[productFamilyIndex]; if (hwPrefix == nullptr || hwInfoConfig == nullptr) { continue; } const std::string productFamily(hwPrefix); for (int csrTypes = -1; csrTypes <= CSR_TYPES_NUM; csrTypes++) { CommandStreamReceiverType csrType; if (csrTypes != -1) { csrType = static_cast(csrTypes); DebugManager.flags.SetCommandStreamReceiver.set(csrType); } else { csrType = CSR_HW; DebugManager.flags.SetCommandStreamReceiver.set(-1); } DebugManager.flags.ProductFamilyOverride.set(productFamily); platformsImpl->clear(); ExecutionEnvironment *exeEnv = constructPlatform()->peekExecutionEnvironment(); std::string pciPath = "0000:00:02.0"; exeEnv->rootDeviceEnvironments.resize(1u); const auto ret = prepareDeviceEnvironment(*exeEnv, pciPath, 0u); EXPECT_EQ(expectedDevices, exeEnv->rootDeviceEnvironments.size()); for (auto i = 0u; i < expectedDevices; i++) { hwInfo = exeEnv->rootDeviceEnvironments[i]->getHardwareInfo(); switch (csrType) { case CSR_HW: case CSR_HW_WITH_AUB: case CSR_TYPES_NUM: EXPECT_TRUE(ret); EXPECT_NE(nullptr, hwInfo); break; default: EXPECT_FALSE(ret); break; } } } } } struct PrepareDeviceEnvironmentsTest : ::testing::Test { void SetUp() override { ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false; } void TearDown() override { } int i = 0; const HardwareInfo *hwInfo = nullptr; VariableBackup backup{&ultHwConfig}; DebugManagerStateRestore stateRestorer; }; HWTEST_F(PrepareDeviceEnvironmentsTest, givenPrepareDeviceEnvironmentsWhenCsrIsSetToVariousTypesThenTheFunctionReturnsTheExpectedValueOfHardwareInfo) { uint32_t expectedDevices = 1; DebugManager.flags.CreateMultipleRootDevices.set(expectedDevices); for (int productFamilyIndex = 0; productFamilyIndex < IGFX_MAX_PRODUCT; productFamilyIndex++) { const char *hwPrefix = hardwarePrefix[productFamilyIndex]; auto hwInfoConfig = hwInfoConfigFactory[productFamilyIndex]; if (hwPrefix == nullptr || hwInfoConfig == nullptr) { continue; } const std::string productFamily(hwPrefix); for (int csrTypes = -1; csrTypes <= CSR_TYPES_NUM; csrTypes++) { CommandStreamReceiverType csrType; if (csrTypes != -1) { csrType = static_cast(csrTypes); DebugManager.flags.SetCommandStreamReceiver.set(csrType); } else { csrType = CSR_HW; DebugManager.flags.SetCommandStreamReceiver.set(-1); } DebugManager.flags.ProductFamilyOverride.set(productFamily); platformsImpl->clear(); ExecutionEnvironment *exeEnv = constructPlatform()->peekExecutionEnvironment(); const auto ret = prepareDeviceEnvironments(*exeEnv); EXPECT_EQ(expectedDevices, exeEnv->rootDeviceEnvironments.size()); for (auto i = 0u; i < expectedDevices; i++) { hwInfo = exeEnv->rootDeviceEnvironments[i]->getHardwareInfo(); switch (csrType) { case CSR_HW: case CSR_HW_WITH_AUB: EXPECT_TRUE(ret); EXPECT_NE(nullptr, hwInfo); break; case CSR_AUB: case CSR_TBX: case CSR_TBX_WITH_AUB: { EXPECT_TRUE(ret); EXPECT_NE(nullptr, hwInfo); for (i = 0; i < IGFX_MAX_PRODUCT; i++) { auto hardwareInfo = hardwareInfoTable[i]; if (hardwareInfo == nullptr) continue; if (hardwareInfoTable[i]->platform.eProductFamily == hwInfo->platform.eProductFamily) break; } EXPECT_TRUE(i < IGFX_MAX_PRODUCT); ASSERT_NE(nullptr, hardwarePrefix[i]); HardwareInfo hwInfoFromTable = *hardwareInfoTable[i]; hwInfoFromTable.featureTable = {}; hwInfoFromTable.workaroundTable = {}; hwInfoFromTable.gtSystemInfo = {}; hardwareInfoSetup[hwInfoFromTable.platform.eProductFamily](&hwInfoFromTable, true, 0x0); HwInfoConfig *hwConfig = HwInfoConfig::get(hwInfoFromTable.platform.eProductFamily); hwConfig->configureHardwareCustom(&hwInfoFromTable, nullptr); EXPECT_EQ(0, memcmp(&hwInfoFromTable.platform, &hwInfo->platform, sizeof(PLATFORM))); EXPECT_STREQ(hardwarePrefix[i], productFamily.c_str()); break; } default: break; } } } } } HWTEST_F(PrepareDeviceEnvironmentsTest, givenUpperCaseProductFamilyOverrideFlagSetWhenCreatingDevicesThenFindExpectedPlatform) { std::string hwPrefix; std::string hwPrefixUpperCase; PRODUCT_FAMILY productFamily; for (int productFamilyIndex = 0; productFamilyIndex < IGFX_MAX_PRODUCT; productFamilyIndex++) { if (hardwarePrefix[productFamilyIndex] && hwInfoConfigFactory[productFamilyIndex]) { hwPrefix = hardwarePrefix[productFamilyIndex]; productFamily = static_cast(productFamilyIndex); break; } } EXPECT_NE(0u, hwPrefix.length()); hwPrefixUpperCase.resize(hwPrefix.length()); std::transform(hwPrefix.begin(), hwPrefix.end(), hwPrefixUpperCase.begin(), ::toupper); EXPECT_NE(hwPrefix, hwPrefixUpperCase); DebugManager.flags.ProductFamilyOverride.set(hwPrefixUpperCase); DebugManager.flags.SetCommandStreamReceiver.set(CommandStreamReceiverType::CSR_AUB); ExecutionEnvironment *exeEnv = platform()->peekExecutionEnvironment(); bool ret = prepareDeviceEnvironments(*exeEnv); EXPECT_TRUE(ret); EXPECT_EQ(productFamily, exeEnv->rootDeviceEnvironments[0]->getHardwareInfo()->platform.eProductFamily); } HWTEST_F(PrepareDeviceEnvironmentsTest, givenPrepareDeviceEnvironmentsAndUnknownProductFamilyWhenCsrIsSetToValidTypeThenTheFunctionReturnsTheExpectedValueOfHardwareInfo) { uint32_t expectedDevices = 1; DebugManager.flags.CreateMultipleRootDevices.set(expectedDevices); for (int csrTypes = 0; csrTypes <= CSR_TYPES_NUM; csrTypes++) { CommandStreamReceiverType csrType = static_cast(csrTypes); std::string productFamily("unk"); DebugManager.flags.SetCommandStreamReceiver.set(csrType); DebugManager.flags.ProductFamilyOverride.set(productFamily); platformsImpl->clear(); ExecutionEnvironment *exeEnv = constructPlatform()->peekExecutionEnvironment(); auto ret = prepareDeviceEnvironments(*exeEnv); EXPECT_EQ(expectedDevices, exeEnv->rootDeviceEnvironments.size()); for (auto i = 0u; i < expectedDevices; i++) { hwInfo = exeEnv->rootDeviceEnvironments[i]->getHardwareInfo(); switch (csrType) { case CSR_HW: case CSR_HW_WITH_AUB: EXPECT_TRUE(ret); break; case CSR_AUB: case CSR_TBX: case CSR_TBX_WITH_AUB: { EXPECT_TRUE(ret); EXPECT_NE(nullptr, hwInfo); for (i = 0; i < IGFX_MAX_PRODUCT; i++) { auto hardwareInfo = hardwareInfoTable[i]; if (hardwareInfo == nullptr) continue; if (hardwareInfoTable[i]->platform.eProductFamily == hwInfo->platform.eProductFamily) break; } EXPECT_TRUE(i < IGFX_MAX_PRODUCT); ASSERT_NE(nullptr, hardwarePrefix[i]); HardwareInfo baseHwInfo = *defaultHwInfo; baseHwInfo.featureTable = {}; baseHwInfo.workaroundTable = {}; baseHwInfo.gtSystemInfo = {}; hardwareInfoSetup[baseHwInfo.platform.eProductFamily](&baseHwInfo, true, 0x0); HwInfoConfig *hwConfig = HwInfoConfig::get(baseHwInfo.platform.eProductFamily); hwConfig->configureHardwareCustom(&baseHwInfo, nullptr); EXPECT_EQ(0, memcmp(&baseHwInfo.platform, &hwInfo->platform, sizeof(PLATFORM))); break; } default: break; } } } } TEST(MultiDeviceTests, givenCreateMultipleRootDevicesAndLimitAmountOfReturnedDevicesFlagWhenClGetDeviceIdsIsCalledThenLowerValueIsReturned) { platformsImpl->clear(); VariableBackup backup(&ultHwConfig); ultHwConfig.useHwCsr = true; ultHwConfig.forceOsAgnosticMemoryManager = false; ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false; DebugManagerStateRestore stateRestore; DebugManager.flags.CreateMultipleRootDevices.set(2); DebugManager.flags.LimitAmountOfReturnedDevices.set(1); cl_uint numDevices = 0; auto retVal = clGetDeviceIDs(nullptr, CL_DEVICE_TYPE_GPU, 0, nullptr, &numDevices); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, numDevices); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/command_stream/implicit_scaling_ocl_tests.cpp000066400000000000000000000005221422164147700327140ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/implicit_scaling.h" #include "shared/test/common/test_macros/test.h" using namespace NEO; TEST(ImplicitScalingApiTests, givenOpenClApiUsedThenSupportEnabled) { EXPECT_TRUE(ImplicitScaling::apiSupport); } compute-runtime-22.14.22890/opencl/test/unit_test/command_stream/submissions_aggregator_tests.cpp000066400000000000000000000737071422164147700333440ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/submissions_aggregator.h" #include "shared/source/helpers/flush_stamp.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/event/event.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" using namespace NEO; struct MockSubmissionAggregator : public SubmissionAggregator { CommandBufferList &peekCommandBuffersList() { return this->cmdBuffers; } }; TEST(SubmissionsAggregator, givenDefaultSubmissionsAggregatorWhenItIsCreatedThenCreationIsSuccesful) { MockSubmissionAggregator submissionsAggregator; EXPECT_TRUE(submissionsAggregator.peekCommandBuffersList().peekIsEmpty()); } TEST(SubmissionsAggregator, givenCommandBufferWhenItIsPassedToSubmissionsAggregatorThenItIsRecorded) { MockSubmissionAggregator submissionsAggregator; std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(nullptr)); CommandBuffer *cmdBuffer = new CommandBuffer(*device); submissionsAggregator.recordCommandBuffer(cmdBuffer); EXPECT_FALSE(submissionsAggregator.peekCommandBuffersList().peekIsEmpty()); EXPECT_EQ(cmdBuffer, submissionsAggregator.peekCommandBuffersList().peekHead()); EXPECT_EQ(cmdBuffer, submissionsAggregator.peekCommandBuffersList().peekTail()); EXPECT_EQ(cmdBuffer->surfaces.size(), 0u); //idlist holds the ownership } TEST(SubmissionsAggregator, givenTwoCommandBuffersWhenMergeResourcesIsCalledThenDuplicatesAreEliminated) { MockSubmissionAggregator submissionsAggregator; std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(nullptr)); CommandBuffer *cmdBuffer = new CommandBuffer(*device); CommandBuffer *cmdBuffer2 = new CommandBuffer(*device); MockGraphicsAllocation alloc1(nullptr, 1); MockGraphicsAllocation alloc2(nullptr, 2); MockGraphicsAllocation alloc3(nullptr, 3); MockGraphicsAllocation alloc4(nullptr, 4); MockGraphicsAllocation alloc5(nullptr, 5); MockGraphicsAllocation alloc6(nullptr, 6); cmdBuffer->surfaces.push_back(&alloc1); cmdBuffer->surfaces.push_back(&alloc6); cmdBuffer->surfaces.push_back(&alloc5); cmdBuffer->surfaces.push_back(&alloc3); cmdBuffer->surfaces.push_back(&alloc6); cmdBuffer2->surfaces.push_back(&alloc1); cmdBuffer2->surfaces.push_back(&alloc2); cmdBuffer2->surfaces.push_back(&alloc5); cmdBuffer2->surfaces.push_back(&alloc4); size_t totalUsedSize = 0; size_t totalMemoryBudget = -1; ResourcePackage resourcePackage; submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u); EXPECT_EQ(0u, totalUsedSize); submissionsAggregator.recordCommandBuffer(cmdBuffer); submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u); EXPECT_EQ(15u, totalUsedSize); totalUsedSize = 0; resourcePackage.clear(); submissionsAggregator.recordCommandBuffer(cmdBuffer2); EXPECT_EQ(cmdBuffer, submissionsAggregator.peekCommandBuffersList().peekHead()); EXPECT_EQ(cmdBuffer2, submissionsAggregator.peekCommandBuffersList().peekTail()); EXPECT_NE(submissionsAggregator.peekCommandBuffersList().peekHead(), submissionsAggregator.peekCommandBuffersList().peekTail()); EXPECT_EQ(5u, cmdBuffer->surfaces.size()); EXPECT_EQ(4u, cmdBuffer2->surfaces.size()); submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u); //command buffer 2 is aggregated to command buffer 1 auto primaryBatchInstepctionId = submissionsAggregator.peekCommandBuffersList().peekHead()->inspectionId; EXPECT_EQ(primaryBatchInstepctionId, submissionsAggregator.peekCommandBuffersList().peekHead()->next->inspectionId); EXPECT_EQ(submissionsAggregator.peekCommandBuffersList().peekHead(), cmdBuffer); EXPECT_EQ(6u, resourcePackage.size()); EXPECT_EQ(21u, totalUsedSize); } TEST(SubmissionsAggregator, givenSubmissionAggregatorWhenThreeCommandBuffersAreSubmittedThenTheyAreAggregated) { MockSubmissionAggregator submissionsAggregator; std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(nullptr)); CommandBuffer *cmdBuffer = new CommandBuffer(*device); CommandBuffer *cmdBuffer2 = new CommandBuffer(*device); CommandBuffer *cmdBuffer3 = new CommandBuffer(*device); MockGraphicsAllocation alloc1(nullptr, 1); MockGraphicsAllocation alloc2(nullptr, 2); MockGraphicsAllocation alloc3(nullptr, 3); MockGraphicsAllocation alloc4(nullptr, 4); MockGraphicsAllocation alloc5(nullptr, 5); MockGraphicsAllocation alloc6(nullptr, 6); MockGraphicsAllocation alloc7(nullptr, 7); cmdBuffer->surfaces.push_back(&alloc5); cmdBuffer->surfaces.push_back(&alloc6); cmdBuffer->surfaces.push_back(&alloc5); cmdBuffer->surfaces.push_back(&alloc3); cmdBuffer->surfaces.push_back(&alloc6); cmdBuffer2->surfaces.push_back(&alloc1); cmdBuffer2->surfaces.push_back(&alloc2); cmdBuffer2->surfaces.push_back(&alloc5); cmdBuffer2->surfaces.push_back(&alloc4); cmdBuffer3->surfaces.push_back(&alloc7); cmdBuffer3->surfaces.push_back(&alloc5); size_t totalUsedSize = 0; size_t totalMemoryBudget = -1; ResourcePackage resourcePackage; submissionsAggregator.recordCommandBuffer(cmdBuffer); submissionsAggregator.recordCommandBuffer(cmdBuffer2); submissionsAggregator.recordCommandBuffer(cmdBuffer3); EXPECT_EQ(cmdBuffer, submissionsAggregator.peekCommandBuffersList().peekHead()); EXPECT_EQ(cmdBuffer3, submissionsAggregator.peekCommandBuffersList().peekTail()); EXPECT_EQ(cmdBuffer3->prev, cmdBuffer2); EXPECT_EQ(cmdBuffer2->next, cmdBuffer3); EXPECT_EQ(cmdBuffer->next, cmdBuffer2); EXPECT_EQ(cmdBuffer2->prev, cmdBuffer); EXPECT_NE(submissionsAggregator.peekCommandBuffersList().peekHead(), submissionsAggregator.peekCommandBuffersList().peekTail()); EXPECT_EQ(5u, cmdBuffer->surfaces.size()); EXPECT_EQ(4u, cmdBuffer2->surfaces.size()); EXPECT_EQ(2u, cmdBuffer3->surfaces.size()); submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u); //command buffer 3 and 2 is aggregated to command buffer 1 auto primaryBatchInstepctionId = submissionsAggregator.peekCommandBuffersList().peekHead()->inspectionId; EXPECT_EQ(primaryBatchInstepctionId, submissionsAggregator.peekCommandBuffersList().peekHead()->next->inspectionId); EXPECT_EQ(primaryBatchInstepctionId, submissionsAggregator.peekCommandBuffersList().peekHead()->next->next->inspectionId); EXPECT_EQ(submissionsAggregator.peekCommandBuffersList().peekHead(), cmdBuffer); EXPECT_EQ(7u, resourcePackage.size()); EXPECT_EQ(28u, totalUsedSize); } TEST(SubmissionsAggregator, givenMultipleCommandBuffersWhenTheyAreAggreagateWithCertainMemoryLimitThenOnlyThatFitAreAggregated) { MockSubmissionAggregator submissionsAggregator; std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(nullptr)); CommandBuffer *cmdBuffer = new CommandBuffer(*device); CommandBuffer *cmdBuffer2 = new CommandBuffer(*device); CommandBuffer *cmdBuffer3 = new CommandBuffer(*device); MockGraphicsAllocation alloc1(nullptr, 1); MockGraphicsAllocation alloc2(nullptr, 2); MockGraphicsAllocation alloc3(nullptr, 3); MockGraphicsAllocation alloc4(nullptr, 4); MockGraphicsAllocation alloc5(nullptr, 5); MockGraphicsAllocation alloc6(nullptr, 6); MockGraphicsAllocation alloc7(nullptr, 7); //14 bytes consumed cmdBuffer->surfaces.push_back(&alloc5); cmdBuffer->surfaces.push_back(&alloc6); cmdBuffer->surfaces.push_back(&alloc5); cmdBuffer->surfaces.push_back(&alloc3); cmdBuffer->surfaces.push_back(&alloc6); //12 bytes total , only 7 new cmdBuffer2->surfaces.push_back(&alloc1); cmdBuffer2->surfaces.push_back(&alloc2); cmdBuffer2->surfaces.push_back(&alloc5); cmdBuffer2->surfaces.push_back(&alloc4); //12 bytes total, only 7 new cmdBuffer3->surfaces.push_back(&alloc7); cmdBuffer3->surfaces.push_back(&alloc5); size_t totalUsedSize = 0; size_t totalMemoryBudget = 22; ResourcePackage resourcePackage; submissionsAggregator.recordCommandBuffer(cmdBuffer); submissionsAggregator.recordCommandBuffer(cmdBuffer2); submissionsAggregator.recordCommandBuffer(cmdBuffer3); submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u); //command buffer 2 is aggregated to command buffer 1, comand buffer 3 becomes command buffer 2 EXPECT_EQ(submissionsAggregator.peekCommandBuffersList().peekHead(), cmdBuffer); EXPECT_EQ(submissionsAggregator.peekCommandBuffersList().peekTail(), cmdBuffer3); EXPECT_EQ(cmdBuffer->next, cmdBuffer2); EXPECT_EQ(cmdBuffer3->prev, cmdBuffer2); EXPECT_EQ(cmdBuffer2->inspectionId, cmdBuffer->inspectionId); EXPECT_NE(cmdBuffer3->inspectionId, cmdBuffer2->inspectionId); EXPECT_EQ(0u, cmdBuffer3->inspectionId); EXPECT_EQ(6u, resourcePackage.size()); EXPECT_EQ(21u, totalUsedSize); } TEST(SubmissionsAggregator, givenMultipleCommandBuffersWhenAggregateIsCalledMultipleTimesThenFurtherInspectionAreHandledCorrectly) { MockSubmissionAggregator submissionsAggregator; std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(nullptr)); CommandBuffer *cmdBuffer = new CommandBuffer(*device); CommandBuffer *cmdBuffer2 = new CommandBuffer(*device); CommandBuffer *cmdBuffer3 = new CommandBuffer(*device); MockGraphicsAllocation alloc1(nullptr, 1); MockGraphicsAllocation alloc2(nullptr, 2); MockGraphicsAllocation alloc3(nullptr, 3); MockGraphicsAllocation alloc4(nullptr, 4); MockGraphicsAllocation alloc5(nullptr, 5); MockGraphicsAllocation alloc6(nullptr, 6); MockGraphicsAllocation alloc7(nullptr, 7); //14 bytes consumed cmdBuffer->surfaces.push_back(&alloc5); cmdBuffer->surfaces.push_back(&alloc6); cmdBuffer->surfaces.push_back(&alloc5); cmdBuffer->surfaces.push_back(&alloc3); cmdBuffer->surfaces.push_back(&alloc6); //12 bytes total , only 7 new cmdBuffer2->surfaces.push_back(&alloc1); cmdBuffer2->surfaces.push_back(&alloc2); cmdBuffer2->surfaces.push_back(&alloc5); cmdBuffer2->surfaces.push_back(&alloc4); //12 bytes total, only 7 new cmdBuffer3->surfaces.push_back(&alloc7); cmdBuffer3->surfaces.push_back(&alloc5); size_t totalUsedSize = 0; size_t totalMemoryBudget = 14; ResourcePackage resourcePackage; submissionsAggregator.recordCommandBuffer(cmdBuffer); submissionsAggregator.recordCommandBuffer(cmdBuffer2); submissionsAggregator.recordCommandBuffer(cmdBuffer3); submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u); //command buffers not aggregated due to too low limit EXPECT_EQ(submissionsAggregator.peekCommandBuffersList().peekHead(), cmdBuffer); EXPECT_EQ(cmdBuffer->next, cmdBuffer2); EXPECT_EQ(submissionsAggregator.peekCommandBuffersList().peekTail(), cmdBuffer3); //budget is now larger we can fit everything totalMemoryBudget = 28; resourcePackage.clear(); totalUsedSize = 0; submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u); //all cmd buffers are merged to 1 EXPECT_EQ(cmdBuffer3->inspectionId, cmdBuffer2->inspectionId); EXPECT_EQ(cmdBuffer->inspectionId, cmdBuffer2->inspectionId); EXPECT_EQ(submissionsAggregator.peekCommandBuffersList().peekTail(), cmdBuffer3); EXPECT_EQ(submissionsAggregator.peekCommandBuffersList().peekHead(), cmdBuffer); EXPECT_EQ(totalMemoryBudget, totalUsedSize); EXPECT_EQ(7u, resourcePackage.size()); } TEST(SubmissionsAggregator, givenMultipleCommandBuffersWithDifferentGraphicsAllocationsWhenAggregateIsCalledThenResourcePackContainSecondBatchBuffer) { MockSubmissionAggregator submissionsAggregator; std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(nullptr)); CommandBuffer *cmdBuffer = new CommandBuffer(*device); CommandBuffer *cmdBuffer2 = new CommandBuffer(*device); MockGraphicsAllocation alloc1(nullptr, 1); MockGraphicsAllocation alloc2(nullptr, 2); MockGraphicsAllocation alloc5(nullptr, 5); MockGraphicsAllocation alloc7(nullptr, 7); //5 bytes consumed cmdBuffer->surfaces.push_back(&alloc5); //10 bytes total cmdBuffer2->surfaces.push_back(&alloc1); cmdBuffer2->surfaces.push_back(&alloc2); cmdBuffer2->batchBuffer.commandBufferAllocation = &alloc7; size_t totalUsedSize = 0; size_t totalMemoryBudget = 200; ResourcePackage resourcePackage; submissionsAggregator.recordCommandBuffer(cmdBuffer); submissionsAggregator.recordCommandBuffer(cmdBuffer2); submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u); EXPECT_EQ(4u, resourcePackage.size()); EXPECT_EQ(15u, totalUsedSize); } TEST(SubmissionsAggregator, givenTwoCommandBufferWhereSecondContainsFirstOnResourceListWhenItIsAggregatedThenResourcePackDoesntContainPrimaryBatch) { MockSubmissionAggregator submissionsAggregator; std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(nullptr)); CommandBuffer *cmdBuffer = new CommandBuffer(*device); CommandBuffer *cmdBuffer2 = new CommandBuffer(*device); MockGraphicsAllocation cmdBufferAllocation1(nullptr, 1); MockGraphicsAllocation cmdBufferAllocation2(nullptr, 2); MockGraphicsAllocation alloc5(nullptr, 5); MockGraphicsAllocation alloc7(nullptr, 7); cmdBuffer->batchBuffer.commandBufferAllocation = &cmdBufferAllocation1; cmdBuffer2->batchBuffer.commandBufferAllocation = &cmdBufferAllocation2; //cmdBuffer2 has commandBufferAllocation on the surface list cmdBuffer2->surfaces.push_back(&cmdBufferAllocation1); cmdBuffer2->surfaces.push_back(&alloc7); cmdBuffer->surfaces.push_back(&alloc5); size_t totalUsedSize = 0; size_t totalMemoryBudget = 200; ResourcePackage resourcePackage; submissionsAggregator.recordCommandBuffer(cmdBuffer); submissionsAggregator.recordCommandBuffer(cmdBuffer2); submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u); //resource pack shuold have 3 surfaces EXPECT_EQ(3u, resourcePackage.size()); EXPECT_EQ(14u, totalUsedSize); } TEST(SubmissionsAggregator, givenTwoCommandBufferWhereSecondContainsTheFirstCommandBufferGraphicsAllocaitonWhenItIsAggregatedThenResourcePackDoesntContainPrimaryBatch) { MockSubmissionAggregator submissionsAggregator; std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(nullptr)); CommandBuffer *cmdBuffer = new CommandBuffer(*device); CommandBuffer *cmdBuffer2 = new CommandBuffer(*device); MockGraphicsAllocation cmdBufferAllocation1(nullptr, 1); MockGraphicsAllocation alloc5(nullptr, 5); MockGraphicsAllocation alloc7(nullptr, 7); cmdBuffer->batchBuffer.commandBufferAllocation = &cmdBufferAllocation1; cmdBuffer2->batchBuffer.commandBufferAllocation = &cmdBufferAllocation1; //cmdBuffer2 has commandBufferAllocation on the surface list cmdBuffer2->surfaces.push_back(&alloc7); cmdBuffer->surfaces.push_back(&alloc5); size_t totalUsedSize = 0; size_t totalMemoryBudget = 200; ResourcePackage resourcePackage; submissionsAggregator.recordCommandBuffer(cmdBuffer); submissionsAggregator.recordCommandBuffer(cmdBuffer2); submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u); //resource pack shuold have 3 surfaces EXPECT_EQ(2u, resourcePackage.size()); EXPECT_EQ(12u, totalUsedSize); } TEST(SubmissionsAggregator, givenCommandBuffersRequiringDifferentCoherencySettingWhenAggregateIsCalledThenTheyAreNotAgggregated) { MockSubmissionAggregator submissionsAggregator; std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(nullptr)); CommandBuffer *cmdBuffer = new CommandBuffer(*device); CommandBuffer *cmdBuffer2 = new CommandBuffer(*device); MockGraphicsAllocation alloc1(nullptr, 1); MockGraphicsAllocation alloc7(nullptr, 7); cmdBuffer->batchBuffer.requiresCoherency = true; cmdBuffer2->batchBuffer.requiresCoherency = false; cmdBuffer->surfaces.push_back(&alloc1); cmdBuffer2->surfaces.push_back(&alloc7); submissionsAggregator.recordCommandBuffer(cmdBuffer); submissionsAggregator.recordCommandBuffer(cmdBuffer2); ResourcePackage resourcePackage; size_t totalUsedSize = 0; size_t totalMemoryBudget = 200; submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u); EXPECT_EQ(1u, totalUsedSize); EXPECT_EQ(1u, resourcePackage.size()); EXPECT_NE(cmdBuffer->inspectionId, cmdBuffer2->inspectionId); EXPECT_EQ(1u, cmdBuffer->inspectionId); } TEST(SubmissionsAggregator, givenCommandBuffersRequiringDifferentThrottleSettingWhenAggregateIsCalledThenTheyAreNotAgggregated) { MockSubmissionAggregator submissionsAggregator; std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(nullptr)); CommandBuffer *cmdBuffer = new CommandBuffer(*device); CommandBuffer *cmdBuffer2 = new CommandBuffer(*device); MockGraphicsAllocation alloc1(nullptr, 1); MockGraphicsAllocation alloc7(nullptr, 7); cmdBuffer->batchBuffer.throttle = QueueThrottle::LOW; cmdBuffer2->batchBuffer.throttle = QueueThrottle::MEDIUM; cmdBuffer->surfaces.push_back(&alloc1); cmdBuffer2->surfaces.push_back(&alloc7); submissionsAggregator.recordCommandBuffer(cmdBuffer); submissionsAggregator.recordCommandBuffer(cmdBuffer2); ResourcePackage resourcePackage; size_t totalUsedSize = 0; size_t totalMemoryBudget = 200; submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u); EXPECT_EQ(1u, totalUsedSize); EXPECT_EQ(1u, resourcePackage.size()); EXPECT_NE(cmdBuffer->inspectionId, cmdBuffer2->inspectionId); EXPECT_EQ(1u, cmdBuffer->inspectionId); } TEST(SubmissionsAggregator, givenCommandBuffersRequiringDifferentPrioritySettingWhenAggregateIsCalledThenTheyAreNotAgggregated) { MockSubmissionAggregator submissionsAggregator; std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(nullptr)); CommandBuffer *cmdBuffer = new CommandBuffer(*device); CommandBuffer *cmdBuffer2 = new CommandBuffer(*device); MockGraphicsAllocation alloc1(nullptr, 1); MockGraphicsAllocation alloc7(nullptr, 7); cmdBuffer->batchBuffer.low_priority = true; cmdBuffer2->batchBuffer.low_priority = false; cmdBuffer->surfaces.push_back(&alloc1); cmdBuffer2->surfaces.push_back(&alloc7); submissionsAggregator.recordCommandBuffer(cmdBuffer); submissionsAggregator.recordCommandBuffer(cmdBuffer2); ResourcePackage resourcePackage; size_t totalUsedSize = 0; size_t totalMemoryBudget = 200; submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u); EXPECT_EQ(1u, totalUsedSize); EXPECT_EQ(1u, resourcePackage.size()); EXPECT_NE(cmdBuffer->inspectionId, cmdBuffer2->inspectionId); EXPECT_EQ(1u, cmdBuffer->inspectionId); } TEST(SubmissionsAggregator, WhenAggregatorIsCreatedThenFlushStampIsNotAllocated) { std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(nullptr)); CommandBuffer cmdBuffer(*device); EXPECT_EQ(nullptr, cmdBuffer.flushStamp->getStampReference()); } TEST(SubmissionsAggregator, givenMultipleOsContextsWhenAggregatingGraphicsAllocationsThenUseInspectionIdCorrespondingWithOsContextId) { SubmissionAggregator submissionsAggregator; ResourcePackage resourcePackage; const auto totalMemoryBudget = 3u; size_t totalUsedSize = 0; std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(nullptr)); CommandBuffer *cmdBuffer0 = new CommandBuffer(*device); CommandBuffer *cmdBuffer1 = new CommandBuffer(*device); MockGraphicsAllocation alloc0(nullptr, 1); MockGraphicsAllocation alloc1(nullptr, 1); MockGraphicsAllocation alloc2(nullptr, 1); MockGraphicsAllocation alloc3(nullptr, 1); cmdBuffer0->surfaces.push_back(&alloc0); cmdBuffer0->surfaces.push_back(&alloc1); cmdBuffer1->surfaces.push_back(&alloc2); cmdBuffer1->surfaces.push_back(&alloc3); submissionsAggregator.recordCommandBuffer(cmdBuffer0); submissionsAggregator.recordCommandBuffer(cmdBuffer1); EXPECT_EQ(0u, alloc0.getInspectionId(1u)); EXPECT_EQ(0u, alloc1.getInspectionId(1u)); EXPECT_EQ(0u, alloc2.getInspectionId(1u)); EXPECT_EQ(0u, alloc3.getInspectionId(1u)); submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 1u); EXPECT_EQ(1u, alloc0.getInspectionId(1u)); EXPECT_EQ(1u, alloc1.getInspectionId(1u)); EXPECT_EQ(1u, alloc2.getInspectionId(1u)); EXPECT_EQ(1u, alloc3.getInspectionId(1u)); } TEST(SubmissionsAggregator, givenMultipleOsContextsWhenAggregatingGraphicsAllocationsThenDoNotUpdateInspectionIdsOfOtherContexts) { SubmissionAggregator submissionsAggregator; ResourcePackage resourcePackage; const auto totalMemoryBudget = 2u; size_t totalUsedSize = 0; std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(nullptr)); CommandBuffer *cmdBuffer0 = new CommandBuffer(*device); CommandBuffer *cmdBuffer1 = new CommandBuffer(*device); MockGraphicsAllocation alloc0(nullptr, 1); MockGraphicsAllocation alloc1(nullptr, 1); cmdBuffer0->surfaces.push_back(&alloc0); cmdBuffer0->surfaces.push_back(&alloc1); submissionsAggregator.recordCommandBuffer(cmdBuffer0); submissionsAggregator.recordCommandBuffer(cmdBuffer1); submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 1u); for (auto osContextId = 0u; osContextId < alloc1.usageInfos.size(); osContextId++) { if (osContextId != 1u) { EXPECT_EQ(0u, alloc0.getInspectionId(osContextId)); } } for (auto osContextId = 0u; osContextId < alloc0.usageInfos.size(); osContextId++) { if (osContextId != 1u) { EXPECT_EQ(0u, alloc0.getInspectionId(osContextId)); } } } TEST(SubmissionsAggregator, givenCommandBuffersRequiringDifferentSliceCountSettingWhenAggregateIsCalledThenTheyAreNotAgggregated) { MockSubmissionAggregator submissionsAggregator; std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(nullptr)); CommandBuffer *cmdBuffer = new CommandBuffer(*device); CommandBuffer *cmdBuffer2 = new CommandBuffer(*device); MockGraphicsAllocation alloc1(nullptr, 1); MockGraphicsAllocation alloc7(nullptr, 7); cmdBuffer->batchBuffer.sliceCount = 1; cmdBuffer2->batchBuffer.sliceCount = 2; cmdBuffer->surfaces.push_back(&alloc1); cmdBuffer2->surfaces.push_back(&alloc7); submissionsAggregator.recordCommandBuffer(cmdBuffer); submissionsAggregator.recordCommandBuffer(cmdBuffer2); ResourcePackage resourcePackage; size_t totalUsedSize = 0; size_t totalMemoryBudget = 200; submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u); EXPECT_EQ(1u, totalUsedSize); EXPECT_EQ(1u, resourcePackage.size()); EXPECT_NE(cmdBuffer->inspectionId, cmdBuffer2->inspectionId); EXPECT_EQ(1u, cmdBuffer->inspectionId); } struct SubmissionsAggregatorTests : public ::testing::Test { void SetUp() override { device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); context.reset(new MockContext(device.get())); } void overrideCsr(CommandStreamReceiver *newCsr) { device->resetCommandStreamReceiver(newCsr); newCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); } std::unique_ptr device; std::unique_ptr context; }; HWTEST_F(SubmissionsAggregatorTests, givenMultipleQueuesWhenCmdBuffersAreRecordedThenAssignFlushStampObjFromCmdQueue) { MockKernelWithInternals kernel(*device.get()); CommandQueueHw cmdQ1(context.get(), device.get(), 0, false); CommandQueueHw cmdQ2(context.get(), device.get(), 0, false); auto mockCsr = new MockCsrHw2(*device->executionEnvironment, device->getRootDeviceIndex(), device->getDeviceBitfield()); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; size_t GWS = 1; overrideCsr(mockCsr); auto expectRefCounts = [&](int32_t cmdQRef1, int32_t cmdQRef2) { EXPECT_EQ(cmdQRef1, cmdQ1.flushStamp->getStampReference()->getRefInternalCount()); EXPECT_EQ(cmdQRef2, cmdQ2.flushStamp->getStampReference()->getRefInternalCount()); }; expectRefCounts(1, 1); cmdQ1.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr); expectRefCounts(2, 1); cmdQ2.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr); expectRefCounts(2, 2); { auto cmdBuffer = mockCsr->peekSubmissionAggregator()->peekCmdBufferList().removeFrontOne(); EXPECT_EQ(cmdQ1.flushStamp->getStampReference(), cmdBuffer->flushStamp->getStampReference()); } expectRefCounts(1, 2); { auto cmdBuffer = mockCsr->peekSubmissionAggregator()->peekCmdBufferList().removeFrontOne(); EXPECT_EQ(cmdQ2.flushStamp->getStampReference(), cmdBuffer->flushStamp->getStampReference()); } expectRefCounts(1, 1); } HWTEST_F(SubmissionsAggregatorTests, givenCmdQueueWhenCmdBufferWithEventIsRecordedThenAssignFlushStampObjForEveryone) { MockKernelWithInternals kernel(*device.get()); CommandQueueHw cmdQ1(context.get(), device.get(), 0, false); auto mockCsr = new MockCsrHw2(*device->executionEnvironment, device->getRootDeviceIndex(), device->getDeviceBitfield()); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; size_t GWS = 1; overrideCsr(mockCsr); cl_event event1; EXPECT_EQ(1, cmdQ1.flushStamp->getStampReference()->getRefInternalCount()); cmdQ1.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, &event1); EXPECT_EQ(3, cmdQ1.flushStamp->getStampReference()->getRefInternalCount()); EXPECT_EQ(castToObject(event1)->flushStamp->getStampReference(), cmdQ1.flushStamp->getStampReference()); { auto cmdBuffer = mockCsr->peekSubmissionAggregator()->peekCmdBufferList().removeFrontOne(); EXPECT_EQ(cmdQ1.flushStamp->getStampReference(), cmdBuffer->flushStamp->getStampReference()); } EXPECT_EQ(2, cmdQ1.flushStamp->getStampReference()->getRefInternalCount()); castToObject(event1)->release(); EXPECT_EQ(1, cmdQ1.flushStamp->getStampReference()->getRefInternalCount()); } HWTEST_F(SubmissionsAggregatorTests, givenMultipleCmdBuffersWhenFlushThenUpdateAllRelatedFlushStamps) { MockKernelWithInternals kernel(*device.get()); CommandQueueHw cmdQ1(context.get(), device.get(), 0, false); CommandQueueHw cmdQ2(context.get(), device.get(), 0, false); auto mockCsr = new MockCsrHw2(*device->executionEnvironment, device->getRootDeviceIndex(), device->getDeviceBitfield()); mockCsr->useNewResourceImplicitFlush = false; mockCsr->useGpuIdleImplicitFlush = false; size_t GWS = 1; overrideCsr(mockCsr); mockCsr->taskCount = 5; mockCsr->flushStamp->setStamp(5); cl_event event1, event2; cmdQ1.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, &event1); cmdQ2.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, &event2); mockCsr->flushBatchedSubmissions(); auto expectedFlushStamp = mockCsr->flushStamp->peekStamp(); EXPECT_EQ(expectedFlushStamp, cmdQ1.flushStamp->peekStamp()); EXPECT_EQ(expectedFlushStamp, cmdQ2.flushStamp->peekStamp()); EXPECT_EQ(expectedFlushStamp, castToObject(event1)->flushStamp->peekStamp()); EXPECT_EQ(expectedFlushStamp, castToObject(event2)->flushStamp->peekStamp()); castToObject(event1)->release(); castToObject(event2)->release(); } HWTEST_F(SubmissionsAggregatorTests, givenMultipleCmdBuffersWhenNotAggregatedDuringFlushThenUpdateAllRelatedFlushStamps) { MockKernelWithInternals kernel(*device.get()); CommandQueueHw cmdQ1(context.get(), device.get(), 0, false); CommandQueueHw cmdQ2(context.get(), device.get(), 0, false); auto mockCsr = new MockCsrHw2(*device->executionEnvironment, device->getRootDeviceIndex(), device->getDeviceBitfield()); size_t GWS = 1; overrideCsr(mockCsr); mockCsr->taskCount = 5; mockCsr->flushStamp->setStamp(5); cl_event event1, event2; cmdQ1.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, &event1); cmdQ2.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, &event2); // dont aggregate mockCsr->peekSubmissionAggregator()->peekCmdBufferList().peekHead()->batchBuffer.low_priority = true; mockCsr->peekSubmissionAggregator()->peekCmdBufferList().peekTail()->batchBuffer.low_priority = false; mockCsr->flushBatchedSubmissions(); EXPECT_EQ(6u, cmdQ1.flushStamp->peekStamp()); EXPECT_EQ(6u, castToObject(event1)->flushStamp->peekStamp()); EXPECT_EQ(7u, cmdQ2.flushStamp->peekStamp()); EXPECT_EQ(7u, castToObject(event2)->flushStamp->peekStamp()); castToObject(event1)->release(); castToObject(event2)->release(); } thread_arbitration_policy_helper.cpp000066400000000000000000000016601422164147700340330ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/command_stream/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/thread_arbitration_policy.h" #include "opencl/extensions/public/cl_ext_private.h" #include namespace NEO { int32_t getNewKernelArbitrationPolicy(uint32_t policy) { if (policy == CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_ROUND_ROBIN_INTEL) { return ThreadArbitrationPolicy::RoundRobin; } else if (policy == CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_OLDEST_FIRST_INTEL) { return ThreadArbitrationPolicy::AgeBased; } else if (policy == CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_STALL_BASED_ROUND_ROBIN_INTEL || policy == CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_AFTER_DEPENDENCY_ROUND_ROBIN_INTEL) { return ThreadArbitrationPolicy::RoundRobinAfterDependency; } else { return ThreadArbitrationPolicy::NotPresent; } } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/command_stream/thread_arbitration_policy_helper.h000066400000000000000000000003111422164147700335470ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include namespace NEO { int32_t getNewKernelArbitrationPolicy(uint32_t policy); } // namespace NEOthread_arbitration_policy_helper_tests.cpp000066400000000000000000000023001422164147700352450ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/command_stream/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_stream/thread_arbitration_policy.h" #include "gtest/gtest.h" #include "public/cl_ext_private.h" namespace NEO { TEST(ThreadArbitrationPolicy, givenClKrenelExecThreadArbitrationPolicyWhenGetNewKernelArbitrationPolicyIsCalledThenExpectedThreadArbitrationPolicyIsReturned) { int32_t retVal = ThreadArbitrationPolicy::getNewKernelArbitrationPolicy(CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_ROUND_ROBIN_INTEL); EXPECT_EQ(retVal, ThreadArbitrationPolicy::RoundRobin); retVal = ThreadArbitrationPolicy::getNewKernelArbitrationPolicy(CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_OLDEST_FIRST_INTEL); EXPECT_EQ(retVal, ThreadArbitrationPolicy::AgeBased); retVal = ThreadArbitrationPolicy::getNewKernelArbitrationPolicy(CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_AFTER_DEPENDENCY_ROUND_ROBIN_INTEL); EXPECT_EQ(retVal, ThreadArbitrationPolicy::RoundRobinAfterDependency); uint32_t randomValue = 0xFFFFu; retVal = ThreadArbitrationPolicy::getNewKernelArbitrationPolicy(randomValue); EXPECT_EQ(retVal, ThreadArbitrationPolicy::NotPresent); } } // namespace NEOcompute-runtime-22.14.22890/opencl/test/unit_test/compiler_interface/000077500000000000000000000000001422164147700254615ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/compiler_interface/CMakeLists.txt000066400000000000000000000005741422164147700302270ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_compiler_interface ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cl_compiler_interface_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/default_cl_cache_config_tests.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_compiler_interface}) compute-runtime-22.14.22890/opencl/test/unit_test/compiler_interface/cl_compiler_interface_tests.cpp000066400000000000000000000062351422164147700337250ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/file_io.h" #include "shared/test/common/helpers/test_files.h" #include "shared/test/common/libult/global_environment.h" #include "shared/test/common/mocks/mock_compiler_interface.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" using namespace NEO; class ClCompilerInterfaceTest : public ClDeviceFixture, public ::testing::Test { public: void SetUp() override { ClDeviceFixture::SetUp(); // create the compiler interface this->pCompilerInterface = new MockCompilerInterface(); bool initRet = pCompilerInterface->initialize(std::make_unique(CompilerCacheConfig{}), true); ASSERT_TRUE(initRet); pDevice->getExecutionEnvironment()->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->compilerInterface.reset(pCompilerInterface); std::string testFile; testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); pSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pSource); inputArgs.src = ArrayRef(pSource.get(), sourceSize); inputArgs.internalOptions = ArrayRef(pClDevice->peekCompilerExtensions().c_str(), pClDevice->peekCompilerExtensions().size()); } void TearDown() override { pSource.reset(); ClDeviceFixture::TearDown(); } MockCompilerInterface *pCompilerInterface; TranslationInput inputArgs = {IGC::CodeType::oclC, IGC::CodeType::oclGenBin}; std::unique_ptr pSource = nullptr; size_t sourceSize = 0; }; TEST_F(ClCompilerInterfaceTest, WhenBuildIsInvokedThenFclReceivesListOfExtensionsInInternalOptions) { std::string receivedInternalOptions; auto debugVars = NEO::getFclDebugVars(); debugVars.receivedInternalOptionsOutput = &receivedInternalOptions; gEnvironment->fclPushDebugVars(debugVars); TranslationOutput translationOutput = {}; auto err = pCompilerInterface->build(*pDevice, inputArgs, translationOutput); EXPECT_EQ(TranslationOutput::ErrorCode::Success, err); EXPECT_TRUE(hasSubstr(receivedInternalOptions, pClDevice->peekCompilerExtensions())); gEnvironment->fclPopDebugVars(); } TEST_F(ClCompilerInterfaceTest, WhenCompileIsInvokedThenFclReceivesListOfExtensionsInInternalOptions) { std::string receivedInternalOptions; MockCompilerDebugVars fclDebugVars; retrieveBinaryKernelFilename(fclDebugVars.fileName, "CopyBuffer_simd16_", ".bc"); fclDebugVars.receivedInternalOptionsOutput = &receivedInternalOptions; gEnvironment->fclPushDebugVars(fclDebugVars); TranslationOutput translationOutput = {}; auto err = pCompilerInterface->compile(*pDevice, inputArgs, translationOutput); EXPECT_EQ(TranslationOutput::ErrorCode::Success, err); EXPECT_TRUE(hasSubstr(receivedInternalOptions, pClDevice->peekCompilerExtensions())); gEnvironment->fclPopDebugVars(); } default_cl_cache_config_tests.cpp000066400000000000000000000021071422164147700341020ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/compiler_interface/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/source/compiler_interface/default_cl_cache_config.h" TEST(CompilerCache, GivenDefaultClCacheConfigThenValuesAreProperlyPopulated) { auto cacheConfig = NEO::getDefaultClCompilerCacheConfig(); EXPECT_STREQ("cl_cache", cacheConfig.cacheDir.c_str()); EXPECT_STREQ(".cl_cache", cacheConfig.cacheFileExtension.c_str()); EXPECT_TRUE(cacheConfig.enabled); } TEST(CompilerCacheTests, GivenExistingConfigWhenLoadingFromCacheThenBinaryIsLoaded) { NEO::CompilerCache cache(NEO::getDefaultClCompilerCacheConfig()); static const char *hash = "SOME_HASH"; std::unique_ptr data(new char[32]); for (size_t i = 0; i < 32; i++) data.get()[i] = static_cast(i); bool ret = cache.cacheBinary(hash, static_cast(data.get()), 32); EXPECT_TRUE(ret); size_t size; auto loadedBin = cache.loadCachedBinary(hash, size); EXPECT_NE(nullptr, loadedBin); EXPECT_NE(0U, size); }compute-runtime-22.14.22890/opencl/test/unit_test/context/000077500000000000000000000000001422164147700233135ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/context/CMakeLists.txt000066400000000000000000000013421422164147700260530ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_context ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/context_get_info_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/context_multi_device_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/context_negative_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/context_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/driver_diagnostics_enqueue_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/driver_diagnostics_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/driver_diagnostics_tests.h ${CMAKE_CURRENT_SOURCE_DIR}/get_supported_image_formats_tests.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_context}) add_subdirectories() compute-runtime-22.14.22890/opencl/test/unit_test/context/context_get_info_tests.cpp000066400000000000000000000076641422164147700306140ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" using namespace NEO; struct ContextGetInfoTest : public PlatformFixture, public ContextFixture, public ::testing::Test { using ContextFixture::SetUp; using PlatformFixture::SetUp; ContextGetInfoTest() { } void SetUp() override { PlatformFixture::SetUp(); ContextFixture::SetUp(num_devices, devices); } void TearDown() override { ContextFixture::TearDown(); PlatformFixture::TearDown(); } cl_int retVal = CL_SUCCESS; }; TEST_F(ContextGetInfoTest, GivenInvalidParamNameWhenGettingInfoThenInvalidValueErrorIsReturned) { size_t retSize = 0; retVal = pContext->getInfo( 0, 0, nullptr, &retSize); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(ContextGetInfoTest, GivenInvalidParametersWhenGettingContextInfoThenValueSizeRetIsNotUpdated) { size_t retSize = 0x1234; retVal = pContext->getInfo( 0, 0, nullptr, &retSize); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(0x1234u, retSize); } TEST_F(ContextGetInfoTest, GivenNumDevicesParamNameWhenGettingInfoThenNumberOfDevicesIsReturned) { cl_uint numDevices = 0; size_t retSize = 0; retVal = pContext->getInfo( CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint), &numDevices, nullptr); EXPECT_EQ(this->num_devices, numDevices); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pContext->getInfo( CL_CONTEXT_DEVICES, 0, nullptr, &retSize); // make sure we get the same answer through a different query EXPECT_EQ(numDevices * sizeof(cl_device_id), retSize); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(ContextGetInfoTest, GivenContextDevicesParamNameWhenGettingInfoThenCorrectDeviceIdsAreReturned) { auto devicesReturned = new cl_device_id[this->num_devices]; retVal = pContext->getInfo( CL_CONTEXT_DEVICES, this->num_devices * sizeof(cl_device_id), devicesReturned, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); for (size_t deviceOrdinal = 0; deviceOrdinal < this->num_devices; ++deviceOrdinal) { EXPECT_EQ(devices[deviceOrdinal], devicesReturned[deviceOrdinal]); } delete[] devicesReturned; } TEST_F(ContextGetInfoTest, GivenContextPropertiesWhenGettingInfoThenSuccessIsReturned) { cl_context_properties props; size_t size; auto retVal = pContext->getInfo( CL_CONTEXT_PROPERTIES, sizeof(props), &props, &size); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, size); } TEST_F(ContextGetInfoTest, givenMultipleContextPropertiesWhenTheyAreBeingQueriedThenGetInfoReturnProperProperties) { cl_context_properties properties[] = {CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL, (cl_context_properties)0xff, 0}; constexpr auto propertiesCount = sizeof(properties) / sizeof(cl_context_properties); auto retValue = CL_SUCCESS; auto contextWithProperties = clCreateContext(properties, 1, &this->devices[0], nullptr, nullptr, &retValue); EXPECT_EQ(CL_SUCCESS, retValue); auto pContextWithProperties = castToObject(contextWithProperties); size_t size = 6; cl_context_properties obtainedProperties[propertiesCount] = {0}; auto retVal = pContextWithProperties->getInfo( CL_CONTEXT_PROPERTIES, sizeof(properties), obtainedProperties, &size); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(properties), size); for (auto property = 0u; property < propertiesCount; property++) { EXPECT_EQ(obtainedProperties[property], properties[property]); } clReleaseContext(contextWithProperties); } compute-runtime-22.14.22890/opencl/test/unit_test/context/context_multi_device_tests.cpp000066400000000000000000000252331422164147700314630ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/array_count.h" #include "shared/source/os_interface/device_factory.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/context/context.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/ult_cl_device_factory.h" #include "gtest/gtest.h" using namespace NEO; TEST(ContextMultiDevice, GivenSingleDeviceWhenCreatingContextThenContextIsCreated) { cl_device_id devices[] = { new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr)}}; auto numDevices = static_cast(arrayCount(devices)); auto retVal = CL_SUCCESS; auto pContext = Context::create(nullptr, ClDeviceVector(devices, numDevices), nullptr, nullptr, retVal); ASSERT_NE(nullptr, pContext); auto numDevicesReturned = pContext->getNumDevices(); EXPECT_EQ(numDevices, numDevicesReturned); ClDeviceVector ctxDevices; for (size_t deviceOrdinal = 0; deviceOrdinal < numDevicesReturned; ++deviceOrdinal) { ctxDevices.push_back(pContext->getDevice(deviceOrdinal)); } delete pContext; for (size_t deviceOrdinal = 0; deviceOrdinal < numDevicesReturned; ++deviceOrdinal) { auto pDevice = (ClDevice *)devices[deviceOrdinal]; ASSERT_NE(nullptr, pDevice); EXPECT_EQ(pDevice, ctxDevices[deviceOrdinal]); delete pDevice; } } TEST(ContextMultiDevice, GivenMultipleDevicesWhenCreatingContextThenContextIsCreatedForEachDevice) { cl_device_id devices[] = { new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr)}, new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr)}, new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr)}, new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr)}, new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr)}, new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr)}, new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr)}, new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr)}}; auto numDevices = static_cast(arrayCount(devices)); ASSERT_EQ(8u, numDevices); auto retVal = CL_SUCCESS; auto pContext = Context::create(nullptr, ClDeviceVector(devices, numDevices), nullptr, nullptr, retVal); ASSERT_NE(nullptr, pContext); auto numDevicesReturned = pContext->getNumDevices(); EXPECT_EQ(numDevices, numDevicesReturned); ClDeviceVector ctxDevices; for (size_t deviceOrdinal = 0; deviceOrdinal < numDevicesReturned; ++deviceOrdinal) { ctxDevices.push_back(pContext->getDevice(deviceOrdinal)); } delete pContext; for (size_t deviceOrdinal = 0; deviceOrdinal < numDevicesReturned; ++deviceOrdinal) { auto pDevice = (ClDevice *)devices[deviceOrdinal]; ASSERT_NE(nullptr, pDevice); EXPECT_EQ(pDevice, ctxDevices[deviceOrdinal]); delete pDevice; } } TEST(ContextMultiDevice, WhenGettingSubDeviceByIndexFromContextThenCorrectDeviceIsReturned) { DebugManagerStateRestore restorer; VariableBackup createSingleDeviceBackup{&MockDevice::createSingleDevice, false}; VariableBackup createRootDeviceFuncBackup{&DeviceFactory::createRootDeviceFunc}; DebugManager.flags.CreateMultipleSubDevices.set(2); createRootDeviceFuncBackup = [](ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) -> std::unique_ptr { return std::unique_ptr(MockDevice::create(&executionEnvironment, rootDeviceIndex)); }; auto executionEnvironment = new ExecutionEnvironment; auto devices = DeviceFactory::createDevices(*executionEnvironment); auto pRootDevice = std::make_unique(static_cast(devices[0].release())); auto pSubDevice0 = pRootDevice->subDevices[0].get(); auto pSubDevice1 = pRootDevice->subDevices[1].get(); cl_device_id allDevices[3]{}; cl_device_id onlyRootDevices[1]{}; cl_device_id onlySubDevices[2]{}; allDevices[0] = onlyRootDevices[0] = pRootDevice.get(); allDevices[1] = onlySubDevices[0] = pSubDevice0; allDevices[2] = onlySubDevices[1] = pSubDevice1; cl_int retVal; auto pContextWithAllDevices = std::unique_ptr(Context::create(nullptr, ClDeviceVector(allDevices, 3), nullptr, nullptr, retVal)); EXPECT_NE(nullptr, pContextWithAllDevices); auto pContextWithRootDevices = std::unique_ptr(Context::create(nullptr, ClDeviceVector(onlyRootDevices, 1), nullptr, nullptr, retVal)); EXPECT_NE(nullptr, pContextWithRootDevices); auto pContextWithSubDevices = std::unique_ptr(Context::create(nullptr, ClDeviceVector(onlySubDevices, 2), nullptr, nullptr, retVal)); EXPECT_NE(nullptr, pContextWithSubDevices); EXPECT_EQ(pSubDevice0, pContextWithAllDevices->getSubDeviceByIndex(0)); EXPECT_EQ(nullptr, pContextWithRootDevices->getSubDeviceByIndex(0)); EXPECT_EQ(pSubDevice0, pContextWithSubDevices->getSubDeviceByIndex(0)); EXPECT_EQ(pSubDevice1, pContextWithAllDevices->getSubDeviceByIndex(1)); EXPECT_EQ(nullptr, pContextWithRootDevices->getSubDeviceByIndex(1)); EXPECT_EQ(pSubDevice1, pContextWithSubDevices->getSubDeviceByIndex(1)); } TEST(ContextMultiDevice, givenContextWithNonDefaultContextTypeWhenSetupContextTypeThenDoNothing) { UltClDeviceFactory deviceFactory{1, 2}; MockContext context0(deviceFactory.rootDevices[0]); context0.contextType = ContextType::CONTEXT_TYPE_DEFAULT; context0.setupContextType(); EXPECT_EQ(ContextType::CONTEXT_TYPE_DEFAULT, context0.peekContextType()); MockContext context1(deviceFactory.rootDevices[0]); context1.contextType = ContextType::CONTEXT_TYPE_SPECIALIZED; context1.setupContextType(); EXPECT_EQ(ContextType::CONTEXT_TYPE_SPECIALIZED, context1.peekContextType()); MockContext context2(deviceFactory.rootDevices[0]); context2.contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE; context2.setupContextType(); EXPECT_EQ(ContextType::CONTEXT_TYPE_UNRESTRICTIVE, context2.peekContextType()); MockContext context3(deviceFactory.subDevices[0]); context3.contextType = ContextType::CONTEXT_TYPE_DEFAULT; context3.setupContextType(); EXPECT_EQ(ContextType::CONTEXT_TYPE_SPECIALIZED, context3.peekContextType()); MockContext context4(deviceFactory.subDevices[0]); context4.contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE; context4.setupContextType(); EXPECT_EQ(ContextType::CONTEXT_TYPE_UNRESTRICTIVE, context4.peekContextType()); } TEST(ContextMultiDevice, givenRootDeviceWhenCreatingContextThenItHasDefaultType) { UltClDeviceFactory deviceFactory{1, 2}; cl_int retVal = CL_INVALID_CONTEXT; cl_device_id device = deviceFactory.rootDevices[0]; auto context = clUniquePtr(Context::create(nullptr, ClDeviceVector(&device, 1), nullptr, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context.get()); EXPECT_EQ(ContextType::CONTEXT_TYPE_DEFAULT, context->peekContextType()); } TEST(ContextMultiDevice, givenSubsetOfSubdevicesWhenCreatingContextThenItHasSpecializedType) { UltClDeviceFactory deviceFactory{1, 2}; cl_int retVal = CL_INVALID_CONTEXT; cl_device_id firstSubDevice = deviceFactory.subDevices[0]; cl_device_id secondSubDevice = deviceFactory.subDevices[1]; cl_device_id bothSubDevices[]{firstSubDevice, secondSubDevice}; auto context0 = clUniquePtr(Context::create(nullptr, ClDeviceVector(&firstSubDevice, 1), nullptr, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context0.get()); EXPECT_EQ(ContextType::CONTEXT_TYPE_SPECIALIZED, context0->peekContextType()); retVal = CL_INVALID_CONTEXT; auto context1 = clUniquePtr(Context::create(nullptr, ClDeviceVector(&secondSubDevice, 1), nullptr, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context1.get()); EXPECT_EQ(ContextType::CONTEXT_TYPE_SPECIALIZED, context1->peekContextType()); retVal = CL_INVALID_CONTEXT; auto context2 = clUniquePtr(Context::create(nullptr, ClDeviceVector(bothSubDevices, 2), nullptr, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context2.get()); EXPECT_EQ(ContextType::CONTEXT_TYPE_SPECIALIZED, context2->peekContextType()); } TEST(ContextMultiDevice, givenRootDeviceAndSubsetOfSubdevicesWhenCreatingContextThenItHasUnrestrictiveType) { UltClDeviceFactory deviceFactory{1, 2}; cl_int retVal = CL_INVALID_CONTEXT; cl_device_id rootDeviceAndFirstSubDevice[]{deviceFactory.subDevices[0], deviceFactory.rootDevices[0]}; cl_device_id rootDeviceAndSecondSubDevice[]{deviceFactory.subDevices[1], deviceFactory.rootDevices[0]}; cl_device_id rootDeviceAndBothSubDevices[]{deviceFactory.subDevices[0], deviceFactory.subDevices[1], deviceFactory.rootDevices[0]}; auto context0 = clUniquePtr(Context::create(nullptr, ClDeviceVector(rootDeviceAndFirstSubDevice, 2), nullptr, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context0.get()); EXPECT_EQ(ContextType::CONTEXT_TYPE_UNRESTRICTIVE, context0->peekContextType()); retVal = CL_INVALID_CONTEXT; auto context1 = clUniquePtr(Context::create(nullptr, ClDeviceVector(rootDeviceAndSecondSubDevice, 2), nullptr, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context1.get()); EXPECT_EQ(ContextType::CONTEXT_TYPE_UNRESTRICTIVE, context1->peekContextType()); retVal = CL_INVALID_CONTEXT; auto context2 = clUniquePtr(Context::create(nullptr, ClDeviceVector(rootDeviceAndBothSubDevices, 3), nullptr, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context2.get()); EXPECT_EQ(ContextType::CONTEXT_TYPE_UNRESTRICTIVE, context2->peekContextType()); } compute-runtime-22.14.22890/opencl/test/unit_test/context/context_negative_tests.cpp000066400000000000000000000053301422164147700306100ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/fixtures/memory_management_fixture.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/context/context.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "CL/cl_gl.h" #include "gtest/gtest.h" #include using namespace NEO; //////////////////////////////////////////////////////////////////////////////// typedef Test ContextFailureInjection; TEST_F(ContextFailureInjection, GivenFailedAllocationInjectionWhenCreatingContextThenOutOfHostMemoryErrorIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); cl_device_id deviceID = device.get(); InjectedFunction method = [deviceID](size_t failureIndex) { auto retVal = CL_INVALID_VALUE; auto context = Context::create(nullptr, ClDeviceVector(&deviceID, 1), nullptr, nullptr, retVal); if (MemoryManagement::nonfailingAllocation == failureIndex) { EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); } else { EXPECT_EQ(CL_OUT_OF_HOST_MEMORY, retVal) << "for allocation " << failureIndex; EXPECT_EQ(nullptr, context); } delete context; context = nullptr; }; injectFailures(method); } TEST(InvalidPropertyContextTest, GivenInvalidPropertiesWhenContextIsCreatedThenErrorIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); cl_device_id deviceID = device.get(); auto pPlatform = NEO::platform(); cl_platform_id pid[1]; pid[0] = pPlatform; cl_context_properties invalidProperties[5] = {CL_CONTEXT_PLATFORM, (cl_context_properties)pid[0], CL_CGL_SHAREGROUP_KHR, 0x10000, 0}; cl_context_properties invalidProperties2[5] = {CL_CONTEXT_PLATFORM, (cl_context_properties)pid[0], (cl_context_properties)0xdeadbeef, 0x10000, 0}; cl_int retVal = 0; auto context = Context::create(invalidProperties, ClDeviceVector(&deviceID, 1), nullptr, nullptr, retVal); EXPECT_EQ(CL_INVALID_PROPERTY, retVal); EXPECT_EQ(nullptr, context); delete context; context = Context::create(invalidProperties2, ClDeviceVector(&deviceID, 1), nullptr, nullptr, retVal); EXPECT_EQ(CL_INVALID_PROPERTY, retVal); EXPECT_EQ(nullptr, context); delete context; } compute-runtime-22.14.22890/opencl/test/unit_test/context/context_tests.cpp000066400000000000000000000703451422164147700267360ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/source/helpers/blit_commands_helper.h" #include "shared/source/helpers/local_memory_access_modes.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/mocks/mock_deferred_deleter.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.inl" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/sharings/sharing.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" using namespace NEO; class WhiteBoxContext : public Context { public: MemoryManager *getMM() { return this->memoryManager; } const cl_context_properties *getProperties() const { return properties; } size_t getNumProperties() const { return numProperties; } WhiteBoxContext(void(CL_CALLBACK *pfnNotify)(const char *, const void *, size_t, void *), void *userData) : Context(pfnNotify, userData){}; }; struct ContextTest : public PlatformFixture, public ::testing::Test { using PlatformFixture::SetUp; void SetUp() override { PlatformFixture::SetUp(); cl_platform_id platform = pPlatform; properties = new cl_context_properties[3]; properties[0] = CL_CONTEXT_PLATFORM; properties[1] = (cl_context_properties)platform; properties[2] = 0; context = Context::create(properties, ClDeviceVector(devices, num_devices), nullptr, nullptr, retVal); ASSERT_NE(nullptr, context); } void TearDown() override { delete[] properties; delete context; PlatformFixture::TearDown(); } uint32_t getRootDeviceIndex() { return context->getDevice(0)->getRootDeviceIndex(); } cl_int retVal = CL_SUCCESS; WhiteBoxContext *context = nullptr; cl_context_properties *properties = nullptr; }; TEST_F(ContextTest, WhenCreatingContextThenDevicesAllDevicesExist) { for (size_t deviceOrdinal = 0; deviceOrdinal < context->getNumDevices(); ++deviceOrdinal) { EXPECT_NE(nullptr, context->getDevice(deviceOrdinal)); } } TEST_F(ContextTest, WhenCreatingContextThenMemoryManagerForContextIsSet) { EXPECT_NE(nullptr, context->getMM()); } TEST_F(ContextTest, WhenCreatingContextThenPropertiesAreCopied) { auto contextProperties = context->getProperties(); EXPECT_NE(properties, contextProperties); } TEST_F(ContextTest, WhenCreatingContextThenPropertiesAreValid) { auto contextProperties = context->getProperties(); ASSERT_NE(nullptr, contextProperties); EXPECT_EQ(3u, context->getNumProperties()); while (*contextProperties) { switch (*contextProperties) { case CL_CONTEXT_PLATFORM: ++contextProperties; break; default: ASSERT_FALSE(!"Unknown context property"); break; } ++contextProperties; } } TEST_F(ContextTest, WhenCreatingContextThenSpecialQueueIsAvailable) { auto specialQ = context->getSpecialQueue(0u); EXPECT_NE(specialQ, nullptr); } TEST_F(ContextTest, WhenSettingSpecialQueueThenQueueIsAvailable) { MockContext context((ClDevice *)devices[0], true); auto specialQ = context.getSpecialQueue(0u); EXPECT_EQ(specialQ, nullptr); auto cmdQ = new MockCommandQueue(&context, (ClDevice *)devices[0], 0, false); context.setSpecialQueue(cmdQ, 0u); specialQ = context.getSpecialQueue(0u); EXPECT_NE(specialQ, nullptr); } TEST_F(ContextTest, givenCmdQueueWithoutContextWhenBeingCreatedNextDeletedThenContextRefCountShouldNeitherBeIncrementedNorNextDecremented) { MockContext context((ClDevice *)devices[0]); EXPECT_EQ(1, context.getRefInternalCount()); auto cmdQ1 = new MockCommandQueue(); EXPECT_EQ(1, context.getRefInternalCount()); delete cmdQ1; EXPECT_EQ(1, context.getRefInternalCount()); auto cmdQ2 = new MockCommandQueue(nullptr, (ClDevice *)devices[0], 0, false); EXPECT_EQ(1, context.getRefInternalCount()); delete cmdQ2; EXPECT_EQ(1, context.getRefInternalCount()); } TEST_F(ContextTest, givenCmdQueueWithContextWhenBeingCreatedNextDeletedThenContextRefCountShouldBeIncrementedNextDecremented) { MockContext context((ClDevice *)devices[0]); EXPECT_EQ(1, context.getRefInternalCount()); auto cmdQ = new MockCommandQueue(&context, (ClDevice *)devices[0], 0, false); EXPECT_EQ(2, context.getRefInternalCount()); delete cmdQ; EXPECT_EQ(1, context.getRefInternalCount()); } TEST_F(ContextTest, givenContextWhenItIsCreatedFromDeviceThenItAddsRefCountToThisDevice) { auto device = castToObject(devices[0]); EXPECT_EQ(2, device->getRefInternalCount()); cl_device_id deviceID = devices[0]; std::unique_ptr context(Context::create(0, ClDeviceVector(&deviceID, 1), nullptr, nullptr, retVal)); EXPECT_EQ(3, device->getRefInternalCount()); context.reset(nullptr); EXPECT_EQ(2, device->getRefInternalCount()); } TEST_F(ContextTest, givenContextWhenItIsCreatedFromMultipleDevicesThenItAddsRefCountToThoseDevices) { auto device = castToObject(devices[0]); EXPECT_EQ(2, device->getRefInternalCount()); ClDeviceVector devicesVector; devicesVector.push_back(device); devicesVector.push_back(device); std::unique_ptr context(Context::create(0, devicesVector, nullptr, nullptr, retVal)); EXPECT_EQ(4, device->getRefInternalCount()); context.reset(nullptr); EXPECT_EQ(2, device->getRefInternalCount()); } TEST_F(ContextTest, givenSpecialCmdQueueWithContextWhenBeingCreatedNextAutoDeletedThenContextRefCountShouldNeitherBeIncrementedNorNextDecremented) { MockContext context((ClDevice *)devices[0], true); EXPECT_EQ(1, context.getRefInternalCount()); auto cmdQ = new MockCommandQueue(&context, (ClDevice *)devices[0], 0, false); context.overrideSpecialQueueAndDecrementRefCount(cmdQ, 0u); EXPECT_EQ(1, context.getRefInternalCount()); //special queue is to be deleted implicitly by context } TEST_F(ContextTest, givenSpecialCmdQueueWithContextWhenBeingCreatedNextDeletedThenContextRefCountShouldNeitherBeIncrementedNorNextDecremented) { MockContext context((ClDevice *)devices[0], true); EXPECT_EQ(1, context.getRefInternalCount()); auto cmdQ = new MockCommandQueue(&context, (ClDevice *)devices[0], 0, false); context.overrideSpecialQueueAndDecrementRefCount(cmdQ, 0u); EXPECT_EQ(1, context.getRefInternalCount()); delete cmdQ; EXPECT_EQ(1, context.getRefInternalCount()); context.setSpecialQueue(nullptr, 0u); } TEST_F(ContextTest, GivenInteropSyncParamWhenCreateContextThenSetContextParam) { cl_device_id deviceID = devices[0]; auto pPlatform = NEO::platform(); cl_platform_id pid[1]; pid[0] = pPlatform; cl_context_properties validProperties[5] = {CL_CONTEXT_PLATFORM, (cl_context_properties)pid[0], CL_CONTEXT_INTEROP_USER_SYNC, 1, 0}; cl_int retVal = CL_SUCCESS; auto context = Context::create(validProperties, ClDeviceVector(&deviceID, 1), nullptr, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); EXPECT_TRUE(context->getInteropUserSyncEnabled()); delete context; validProperties[3] = 0; // false context = Context::create(validProperties, ClDeviceVector(&deviceID, 1), nullptr, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); EXPECT_FALSE(context->getInteropUserSyncEnabled()); delete context; } class MockSharingFunctions : public SharingFunctions { public: uint32_t getId() const override { return sharingId; } static const uint32_t sharingId = 0; }; TEST_F(ContextTest, givenContextWhenSharingTableEmptyThenReturnsNullptr) { MockContext context; context.clearSharingFunctions(); auto *sharingF = context.getSharing(); EXPECT_EQ(sharingF, nullptr); } TEST_F(ContextTest, givenNullptrWhenRegisteringSharingToContextThenAbortExecution) { MockContext context; context.clearSharingFunctions(); EXPECT_THROW(context.registerSharing(nullptr), std::exception); } TEST_F(ContextTest, givenContextWhenSharingTableIsNotEmptyThenReturnsSharingFunctionPointer) { MockContext context; MockSharingFunctions *sharingFunctions = new MockSharingFunctions; context.registerSharing(sharingFunctions); auto *sharingF = context.getSharing(); EXPECT_EQ(sharingF, sharingFunctions); } TEST(Context, givenFtrSvmFalseWhenContextIsCreatedThenSVMAllocsManagerIsNotCreated) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(1u); auto hwInfo = executionEnvironment->rootDeviceEnvironments[0]->getMutableHardwareInfo(); hwInfo->capabilityTable.ftrSvm = false; auto device = std::make_unique(MockDevice::createWithExecutionEnvironment(hwInfo, executionEnvironment, 0)); cl_device_id clDevice = device.get(); cl_int retVal = CL_SUCCESS; auto context = std::unique_ptr(Context::create(nullptr, ClDeviceVector(&clDevice, 1), nullptr, nullptr, retVal)); ASSERT_NE(nullptr, context); auto svmManager = context->getSVMAllocsManager(); EXPECT_EQ(nullptr, svmManager); } TEST(Context, whenCreateContextThenSpecialQueueUsesInternalEngine) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); cl_device_id clDevice = device.get(); cl_int retVal = CL_SUCCESS; auto context = std::unique_ptr(Context::create(nullptr, ClDeviceVector(&clDevice, 1), nullptr, nullptr, retVal)); ASSERT_NE(nullptr, context); EXPECT_EQ(CL_SUCCESS, retVal); auto specialQueueEngine = context->getSpecialQueue(device->getRootDeviceIndex())->getGpgpuEngine(); auto internalEngine = device->getInternalEngine(); EXPECT_EQ(internalEngine.commandStreamReceiver, specialQueueEngine.commandStreamReceiver); } TEST(MultiDeviceContextTest, givenContextWithMultipleDevicesWhenGettingInfoAboutSubDevicesThenCorrectValueIsReturned) { MockSpecializedContext context1; MockUnrestrictiveContext context2; MockDefaultContext context3; EXPECT_EQ(2u, context1.getNumDevices()); EXPECT_TRUE(context1.containsMultipleSubDevices(0)); EXPECT_EQ(3u, context2.getNumDevices()); EXPECT_TRUE(context2.containsMultipleSubDevices(0)); EXPECT_EQ(3u, context3.getNumDevices()); EXPECT_FALSE(context3.containsMultipleSubDevices(0)); EXPECT_FALSE(context3.containsMultipleSubDevices(1)); EXPECT_FALSE(context3.containsMultipleSubDevices(2)); } class ContextWithAsyncDeleterTest : public ::testing::WithParamInterface, public ::testing::Test { public: void SetUp() override { memoryManager = new MockMemoryManager(); device = new MockClDevice{MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())}; deleter = new MockDeferredDeleter(); device->allEngines.clear(); device->injectMemoryManager(memoryManager); device->createEngines(); memoryManager->setDeferredDeleter(deleter); } void TearDown() override { delete device; } Context *context; MockMemoryManager *memoryManager; MockDeferredDeleter *deleter; MockClDevice *device; }; TEST_P(ContextWithAsyncDeleterTest, givenContextWithMemoryManagerWhenAsyncDeleterIsEnabledThenUsesDeletersMethods) { cl_device_id clDevice = device; cl_int retVal; ClDeviceVector deviceVector(&clDevice, 1); bool asyncDeleterEnabled = GetParam(); memoryManager->overrideAsyncDeleterFlag(asyncDeleterEnabled); EXPECT_EQ(0, deleter->getClientsNum()); context = Context::create(0, deviceVector, nullptr, nullptr, retVal); if (asyncDeleterEnabled) { EXPECT_EQ(1, deleter->getClientsNum()); } else { EXPECT_EQ(0, deleter->getClientsNum()); } delete context; EXPECT_EQ(0, deleter->getClientsNum()); } INSTANTIATE_TEST_CASE_P(ContextTests, ContextWithAsyncDeleterTest, ::testing::Bool()); TEST(DefaultContext, givenDefaultContextWhenItIsQueriedForTypeThenDefaultTypeIsReturned) { MockContext context; EXPECT_EQ(ContextType::CONTEXT_TYPE_DEFAULT, context.peekContextType()); } TEST(Context, givenContextWhenCheckIfAllocationsAreMultiStorageThenReturnProperValueAccordingToContextType) { MockContext context; EXPECT_TRUE(context.areMultiStorageAllocationsPreferred()); context.contextType = ContextType::CONTEXT_TYPE_SPECIALIZED; EXPECT_FALSE(context.areMultiStorageAllocationsPreferred()); context.contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE; EXPECT_TRUE(context.areMultiStorageAllocationsPreferred()); } TEST(Context, givenContextWhenIsDeviceAssociatedIsCalledWithItsDeviceThenTrueIsReturned) { MockContext context; EXPECT_TRUE(context.isDeviceAssociated(*context.getDevice(0))); } TEST(Context, givenContextWhenIsDeviceAssociatedIsCalledWithNotAssociatedDeviceThenFalseIsReturned) { MockContext context0; MockContext context1; EXPECT_FALSE(context0.isDeviceAssociated(*context1.getDevice(0))); EXPECT_FALSE(context1.isDeviceAssociated(*context0.getDevice(0))); } TEST(Context, givenContextWithSingleDevicesWhenGettingDeviceBitfieldForAllocationThenDeviceBitfieldForDeviceIsReturned) { UltClDeviceFactory deviceFactory{1, 3}; auto device = deviceFactory.subDevices[1]; auto expectedDeviceBitfield = device->getDeviceBitfield(); MockContext context(device); EXPECT_EQ(expectedDeviceBitfield.to_ulong(), context.getDeviceBitfieldForAllocation(device->getRootDeviceIndex()).to_ulong()); } TEST(Context, givenContextWithMultipleSubDevicesWhenGettingDeviceBitfieldForAllocationThenMergedDeviceBitfieldIsReturned) { UltClDeviceFactory deviceFactory{1, 3}; cl_int retVal; cl_device_id devices[]{deviceFactory.subDevices[0], deviceFactory.subDevices[2]}; ClDeviceVector deviceVector(devices, 2); auto expectedDeviceBitfield = deviceFactory.subDevices[0]->getDeviceBitfield() | deviceFactory.subDevices[2]->getDeviceBitfield(); auto context = Context::create(0, deviceVector, nullptr, nullptr, retVal); EXPECT_NE(nullptr, context); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedDeviceBitfield.to_ulong(), context->getDeviceBitfieldForAllocation(deviceFactory.rootDevices[0]->getRootDeviceIndex()).to_ulong()); context->release(); } TEST(MultiDeviceContextTest, givenContextWithTwoDifferentSubDevicesFromDifferentRootDevicesWhenGettingDeviceBitfieldForAllocationThenSeparatedDeviceBitfieldsAreReturned) { DebugManagerStateRestore restorer; DebugManager.flags.EnableMultiRootDeviceContexts.set(true); UltClDeviceFactory deviceFactory{2, 2}; cl_int retVal; cl_device_id devices[]{deviceFactory.subDevices[1], deviceFactory.subDevices[2]}; ClDeviceVector deviceVector(devices, 2); auto expectedDeviceBitfieldForRootDevice0 = deviceFactory.subDevices[1]->getDeviceBitfield(); auto expectedDeviceBitfieldForRootDevice1 = deviceFactory.subDevices[2]->getDeviceBitfield(); auto context = Context::create(0, deviceVector, nullptr, nullptr, retVal); EXPECT_NE(nullptr, context); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedDeviceBitfieldForRootDevice0.to_ulong(), context->getDeviceBitfieldForAllocation(deviceFactory.rootDevices[0]->getRootDeviceIndex()).to_ulong()); EXPECT_EQ(expectedDeviceBitfieldForRootDevice1.to_ulong(), context->getDeviceBitfieldForAllocation(deviceFactory.rootDevices[1]->getRootDeviceIndex()).to_ulong()); context->release(); } TEST(Context, WhenSettingContextDestructorCallbackThenCallOrderIsPreserved) { struct UserDataType { cl_context expectedContext; std::vector &vectorToModify; size_t valueToAdd; }; auto callback = [](cl_context context, void *userData) -> void { auto pUserData = reinterpret_cast(userData); EXPECT_EQ(pUserData->expectedContext, context); pUserData->vectorToModify.push_back(pUserData->valueToAdd); }; auto pContext = new MockContext{}; std::vector callbacksReturnValues; UserDataType userDataArray[]{ {pContext, callbacksReturnValues, 1}, {pContext, callbacksReturnValues, 2}, {pContext, callbacksReturnValues, 3}}; for (auto &userData : userDataArray) { cl_int retVal = clSetContextDestructorCallback(pContext, callback, &userData); ASSERT_EQ(CL_SUCCESS, retVal); } delete pContext; ASSERT_EQ(3u, callbacksReturnValues.size()); EXPECT_EQ(3u, callbacksReturnValues[0]); EXPECT_EQ(2u, callbacksReturnValues[1]); EXPECT_EQ(1u, callbacksReturnValues[2]); } TEST(Context, givenContextAndDevicesWhenIsTileOnlyThenProperValueReturned) { UltClDeviceFactory deviceFactoryWithSubDevices{1, 2}; UltClDeviceFactory deviceFactoryWithMultipleDevices{2, 0}; cl_device_id devices[] = {deviceFactoryWithMultipleDevices.rootDevices[0], deviceFactoryWithMultipleDevices.rootDevices[1]}; MockContext tileOnlyContext(deviceFactoryWithMultipleDevices.rootDevices[0]); MockContext subDevicesContext(deviceFactoryWithSubDevices.rootDevices[0]); MockContext multipleDevicesContext(ClDeviceVector(devices, 2)); EXPECT_TRUE(tileOnlyContext.isSingleDeviceContext()); EXPECT_FALSE(subDevicesContext.isSingleDeviceContext()); EXPECT_FALSE(multipleDevicesContext.isSingleDeviceContext()); } TEST(InvalidExtraPropertiesTests, givenInvalidExtraPropertiesWhenCreatingContextThenContextIsNotCreated) { constexpr cl_context_properties INVALID_PROPERTY_TYPE = (1 << 31); constexpr cl_context_properties INVALID_CONTEXT_FLAG = (1 << 31); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); cl_device_id deviceID = device.get(); cl_int retVal = 0; std::unique_ptr context; { cl_context_properties properties[] = {INVALID_PROPERTY_TYPE, INVALID_CONTEXT_FLAG, 0}; context.reset(Context::create(properties, ClDeviceVector(&deviceID, 1), nullptr, nullptr, retVal)); EXPECT_EQ(CL_INVALID_PROPERTY, retVal); EXPECT_EQ(nullptr, context.get()); } } using ContextCreateTests = ::testing::Test; HWCMDTEST_F(IGFX_XE_HP_CORE, ContextCreateTests, givenLocalMemoryAllocationWhenBlitMemoryToAllocationIsCalledThenSuccessIsReturned) { if (is32bit) { GTEST_SKIP(); } DebugManagerStateRestore restore; DebugManager.flags.EnableLocalMemory.set(true); DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast(LocalMemoryAccessMode::Default)); VariableBackup backupHwInfo(defaultHwInfo.get()); defaultHwInfo->capabilityTable.blitterOperationsSupported = true; UltClDeviceFactory deviceFactory{1, 2}; ClDevice *devicesToTest[] = {deviceFactory.rootDevices[0], deviceFactory.subDevices[0], deviceFactory.subDevices[1]}; for (const auto &testedDevice : devicesToTest) { MockContext context(testedDevice); cl_int retVal; auto buffer = std::unique_ptr(Buffer::create(&context, {}, 1, nullptr, retVal)); auto memory = buffer->getGraphicsAllocation(testedDevice->getRootDeviceIndex()); uint8_t hostMemory[1]; auto executionEnv = testedDevice->getExecutionEnvironment(); executionEnv->rootDeviceEnvironments[0]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = false; EXPECT_EQ(BlitOperationResult::Unsupported, BlitHelper::blitMemoryToAllocation(buffer->getContext()->getDevice(0)->getDevice(), memory, buffer->getOffset(), hostMemory, {1, 1, 1})); executionEnv->rootDeviceEnvironments[0]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = true; EXPECT_EQ(BlitOperationResult::Success, BlitHelper::blitMemoryToAllocation(buffer->getContext()->getDevice(0)->getDevice(), memory, buffer->getOffset(), hostMemory, {1, 1, 1})); } } struct AllocationReuseContextTest : ContextTest { void addMappedPtr(Buffer &buffer, void *ptr, size_t ptrLength) { auto &handler = context->getMapOperationsStorage().getHandler(&buffer); MemObjSizeArray size{}; MemObjSizeArray offset{}; cl_map_flags mapFlag = CL_MAP_READ; EXPECT_TRUE(handler.add(ptr, ptrLength, mapFlag, size, offset, 0, buffer.getMultiGraphicsAllocation().getDefaultGraphicsAllocation())); } void addSvmPtr(InternalMemoryType type, GraphicsAllocation &allocation) { SvmAllocationData svmEntry{getRootDeviceIndex()}; svmEntry.memoryType = type; svmEntry.size = allocation.getUnderlyingBufferSize(); svmEntry.gpuAllocations.addAllocation(&allocation); if (type != InternalMemoryType::DEVICE_UNIFIED_MEMORY) { svmEntry.cpuAllocation = &allocation; } context->getSVMAllocsManager()->insertSVMAlloc(svmEntry); } }; TEST_F(AllocationReuseContextTest, givenSharedSvmAllocPresentWhenGettingExistingHostPtrAllocThenRetrieveTheAllocation) { REQUIRE_SVM_OR_SKIP(context->getDevice(0)); uint64_t svmPtrGpu = 0x1234; void *svmPtr = reinterpret_cast(svmPtrGpu); MockGraphicsAllocation allocation{svmPtr, svmPtrGpu, 400}; addSvmPtr(InternalMemoryType::SHARED_UNIFIED_MEMORY, allocation); GraphicsAllocation *retrievedAllocation{}; InternalMemoryType retrievedMemoryType{}; bool retrievedCpuCopyStatus = true; retVal = context->tryGetExistingHostPtrAllocation(svmPtr, allocation.getUnderlyingBufferSize(), getRootDeviceIndex(), retrievedAllocation, retrievedMemoryType, retrievedCpuCopyStatus); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(&allocation, retrievedAllocation); EXPECT_EQ(InternalMemoryType::SHARED_UNIFIED_MEMORY, retrievedMemoryType); EXPECT_TRUE(retrievedCpuCopyStatus); } TEST_F(AllocationReuseContextTest, givenHostSvmAllocPresentWhenGettingExistingHostPtrAllocThenRetrieveTheAllocation) { REQUIRE_SVM_OR_SKIP(context->getDevice(0)); uint64_t svmPtrGpu = 0x1234; void *svmPtr = reinterpret_cast(svmPtrGpu); MockGraphicsAllocation allocation{svmPtr, svmPtrGpu, 400}; addSvmPtr(InternalMemoryType::HOST_UNIFIED_MEMORY, allocation); GraphicsAllocation *retrievedAllocation{}; InternalMemoryType retrievedMemoryType{}; bool retrievedCpuCopyStatus = true; retVal = context->tryGetExistingHostPtrAllocation(svmPtr, allocation.getUnderlyingBufferSize(), getRootDeviceIndex(), retrievedAllocation, retrievedMemoryType, retrievedCpuCopyStatus); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(&allocation, retrievedAllocation); EXPECT_EQ(InternalMemoryType::HOST_UNIFIED_MEMORY, retrievedMemoryType); EXPECT_TRUE(retrievedCpuCopyStatus); } TEST_F(AllocationReuseContextTest, givenDeviceSvmAllocPresentWhenGettingExistingHostPtrAllocThenRetrieveTheAllocationAndDisallowCpuCopy) { REQUIRE_SVM_OR_SKIP(context->getDevice(0)); uint64_t svmPtrGpu = 0x1234; void *svmPtr = reinterpret_cast(svmPtrGpu); MockGraphicsAllocation allocation{svmPtr, svmPtrGpu, 400}; addSvmPtr(InternalMemoryType::DEVICE_UNIFIED_MEMORY, allocation); GraphicsAllocation *retrievedAllocation{}; InternalMemoryType retrievedMemoryType{}; bool retrievedCpuCopyStatus = true; retVal = context->tryGetExistingHostPtrAllocation(svmPtr, allocation.getUnderlyingBufferSize(), getRootDeviceIndex(), retrievedAllocation, retrievedMemoryType, retrievedCpuCopyStatus); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(&allocation, retrievedAllocation); EXPECT_EQ(InternalMemoryType::DEVICE_UNIFIED_MEMORY, retrievedMemoryType); EXPECT_FALSE(retrievedCpuCopyStatus); } TEST_F(AllocationReuseContextTest, givenHostSvmAllocPresentButRequestingTooBigSizeWhenGettingExistingHostPtrAllocThenReturnError) { REQUIRE_SVM_OR_SKIP(context->getDevice(0)); uint64_t svmPtrGpu = 0x1234; void *svmPtr = reinterpret_cast(svmPtrGpu); MockGraphicsAllocation allocation{svmPtr, svmPtrGpu, 400}; addSvmPtr(InternalMemoryType::HOST_UNIFIED_MEMORY, allocation); size_t ptrSizeToRetrieve = allocation.getUnderlyingBufferSize() + 1; GraphicsAllocation *retrievedAllocation{}; InternalMemoryType retrievedMemoryType{}; bool retrievedCpuCopyStatus = true; retVal = context->tryGetExistingHostPtrAllocation(svmPtr, ptrSizeToRetrieve, getRootDeviceIndex(), retrievedAllocation, retrievedMemoryType, retrievedCpuCopyStatus); EXPECT_EQ(CL_INVALID_OPERATION, retVal); } TEST_F(AllocationReuseContextTest, givenHostPtrStoredInMapOperationsStorageWhenGettingExistingHostPtrAllocThenRetrieveTheAllocation) { MockGraphicsAllocation allocation{}; MockBuffer buffer{context, allocation}; void *mappedPtr = reinterpret_cast(0x1234); size_t mappedPtrSize = 10u; addMappedPtr(buffer, mappedPtr, mappedPtrSize); GraphicsAllocation *retrievedAllocation{}; InternalMemoryType retrievedMemoryType{}; bool retrievedCpuCopyStatus = true; retVal = context->tryGetExistingHostPtrAllocation(mappedPtr, mappedPtrSize, getRootDeviceIndex(), retrievedAllocation, retrievedMemoryType, retrievedCpuCopyStatus); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(&allocation, retrievedAllocation); EXPECT_EQ(InternalMemoryType::NOT_SPECIFIED, retrievedMemoryType); EXPECT_TRUE(retrievedCpuCopyStatus); } TEST_F(AllocationReuseContextTest, givenHostPtrNotStoredInMapOperationsStorageWhenGettingExistingHostPtrAllocThenFailToRetrieveTheAllocation) { MockGraphicsAllocation allocation{}; MockBuffer buffer{context, allocation}; void *mappedPtr = reinterpret_cast(0x1234); size_t mappedPtrSize = 10u; addMappedPtr(buffer, mappedPtr, mappedPtrSize); void *differentPtr = reinterpret_cast(0x12345); GraphicsAllocation *retrievedAllocation{}; InternalMemoryType retrievedMemoryType{}; bool retrievedCpuCopyStatus = true; retVal = context->tryGetExistingHostPtrAllocation(differentPtr, mappedPtrSize, getRootDeviceIndex(), retrievedAllocation, retrievedMemoryType, retrievedCpuCopyStatus); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, retrievedAllocation); EXPECT_EQ(InternalMemoryType::NOT_SPECIFIED, retrievedMemoryType); EXPECT_TRUE(retrievedCpuCopyStatus); } TEST_F(AllocationReuseContextTest, givenHostPtrStoredInMapOperationsStorageAndRequestedPtrToBigWhenGettingExistingHostPtrAllocThenFailRetrieveTheAllocation) { MockGraphicsAllocation allocation{}; MockBuffer buffer{context, allocation}; void *mappedPtr = reinterpret_cast(0x1234); size_t mappedPtrSize = 10u; addMappedPtr(buffer, mappedPtr, mappedPtrSize); size_t ptrSizeToRetrieve = mappedPtrSize + 1; GraphicsAllocation *retrievedAllocation{}; InternalMemoryType retrievedMemoryType{}; bool retrievedCpuCopyStatus = true; retVal = context->tryGetExistingHostPtrAllocation(mappedPtr, ptrSizeToRetrieve, getRootDeviceIndex(), retrievedAllocation, retrievedMemoryType, retrievedCpuCopyStatus); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, retrievedAllocation); EXPECT_EQ(InternalMemoryType::NOT_SPECIFIED, retrievedMemoryType); EXPECT_TRUE(retrievedCpuCopyStatus); } compute-runtime-22.14.22890/opencl/test/unit_test/context/driver_diagnostics_enqueue_tests.cpp000066400000000000000000001110151422164147700326510ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/local_work_size.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "opencl/source/event/user_event.h" #include "opencl/test/unit_test/context/driver_diagnostics_tests.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" using namespace NEO; TEST_F(PerformanceHintEnqueueBufferTest, GivenBlockingReadWhenEnqueueReadBufferIsCallingWithCPUCopyThenContextProvidesProperHint) { buffer->forceDisallowCPUCopy = false; void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); pCmdQ->enqueueReadBuffer( buffer, CL_TRUE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA], static_cast(buffer), ptr); EXPECT_TRUE(containsHint(expectedHint, userData)); alignedFree(ptr); } TEST_P(PerformanceHintEnqueueReadBufferTest, GivenHostPtrAndSizeAlignmentsWhenEnqueueReadBufferIsCallingThenContextProvidesHintsAboutAlignments) { void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); uintptr_t addressForReadBuffer = (uintptr_t)ptr; size_t sizeForReadBuffer = MemoryConstants::cacheLineSize; if (!alignedAddress) { addressForReadBuffer++; } if (!alignedSize) { sizeForReadBuffer--; } pCmdQ->enqueueReadBuffer(buffer, CL_FALSE, 0, sizeForReadBuffer, (void *)addressForReadBuffer, nullptr, 0, nullptr, nullptr); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA], static_cast(buffer), addressForReadBuffer); EXPECT_TRUE(containsHint(expectedHint, userData)); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS], addressForReadBuffer, sizeForReadBuffer, MemoryConstants::pageSize, MemoryConstants::pageSize); EXPECT_EQ(!(alignedSize && alignedAddress), containsHint(expectedHint, userData)); alignedFree(ptr); } TEST_P(PerformanceHintEnqueueReadBufferTest, GivenHostPtrAndSizeAlignmentsWhenEnqueueReadBufferRectIsCallingThenContextProvidesHintsAboutAlignments) { void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); uintptr_t addressForReadBufferRect = (uintptr_t)ptr; size_t sizeForReadBufferRect = MemoryConstants::cacheLineSize; if (!alignedAddress) { addressForReadBufferRect++; } if (!alignedSize) { sizeForReadBufferRect--; } size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {sizeForReadBufferRect, 1, 1}; pCmdQ->enqueueReadBufferRect( buffer, CL_TRUE, bufferOrigin, hostOrigin, region, 0, 0, 0, 0, (void *)addressForReadBufferRect, 0, nullptr, nullptr); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_RECT_REQUIRES_COPY_DATA], static_cast(buffer), addressForReadBufferRect); EXPECT_TRUE(containsHint(expectedHint, userData)); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_RECT_DOESNT_MEET_ALIGNMENT_RESTRICTIONS], addressForReadBufferRect, sizeForReadBufferRect, MemoryConstants::pageSize, MemoryConstants::pageSize); EXPECT_EQ(!(alignedSize && alignedAddress), containsHint(expectedHint, userData)); alignedFree(ptr); } TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingReadAndNotSharedMemWhenEnqueueReadBufferRectIsCallingThenContextProvidesProperHint) { size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {1, 2, 1}; void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); pCmdQ->enqueueReadBufferRect( buffer, CL_FALSE, bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, ptr, 0, nullptr, nullptr); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_RECT_REQUIRES_COPY_DATA], static_cast(buffer), ptr); EXPECT_TRUE(containsHint(expectedHint, userData)); alignedFree(ptr); } TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingReadAndSharedMemWhenEnqueueReadBufferRectIsCallingThenContextProvidesProperHint) { size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {1, 2, 1}; pCmdQ->enqueueReadBufferRect( buffer, CL_FALSE, bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, address, 0, nullptr, nullptr); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_RECT_DOESNT_REQUIRES_COPY_DATA], static_cast(buffer), address); EXPECT_TRUE(containsHint(expectedHint, userData)); } TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingWriteAndBufferDoesntShareMemWithCPUWhenEnqueueWriteBufferIsCallingWithoutCPUCopyThenContextProvidesRequiedCopyHint) { buffer->forceDisallowCPUCopy = true; void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); pCmdQ->enqueueWriteBuffer( buffer, CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA], static_cast(buffer)); EXPECT_TRUE(containsHint(expectedHint, userData)); alignedFree(ptr); } TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingWriteAndBufferSharesMemWithCPUWhenEnqueueWriteBufferIsCallingWithoutCPUCopyThenContextProvidesCopyDoenstRequiedHint) { buffer->forceDisallowCPUCopy = true; pCmdQ->enqueueWriteBuffer( buffer, CL_FALSE, 0, MemoryConstants::cacheLineSize, address, nullptr, 0, nullptr, nullptr); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA], static_cast(buffer), address); EXPECT_TRUE(containsHint(expectedHint, userData)); } TEST_F(PerformanceHintEnqueueBufferTest, GivenBlockingWriteAndBufferDoesntShareMemWithCPUWhenEnqueueWriteBufferIsCallingWithCPUCopyThenContextProvidesRequiedCopyHint) { buffer->forceDisallowCPUCopy = false; void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); pCmdQ->enqueueWriteBuffer( buffer, CL_TRUE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA], static_cast(buffer)); EXPECT_TRUE(containsHint(expectedHint, userData)); alignedFree(ptr); } TEST_F(PerformanceHintEnqueueBufferTest, GivenBlockingWriteAndBufferSharesMemWithCPUWhenEnqueueWriteBufferIsCallingWithCPUCopyThenContextProvidesCopyDoenstRequiedHint) { buffer->forceDisallowCPUCopy = false; pCmdQ->enqueueWriteBuffer( buffer, CL_TRUE, 0, MemoryConstants::cacheLineSize, address, nullptr, 0, nullptr, nullptr); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA], static_cast(buffer), address); EXPECT_TRUE(containsHint(expectedHint, userData)); } TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingReadAndBufferDoesntShareMemWithCPUWhenEnqueueReadBufferIsCallingWithoutCPUCopyThenContextProvidesRequiedCopyHint) { buffer->forceDisallowCPUCopy = true; void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); pCmdQ->enqueueReadBuffer( buffer, CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA], static_cast(buffer), ptr); EXPECT_TRUE(containsHint(expectedHint, userData)); alignedFree(ptr); } TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingReadAndBufferSharesMemWithCPUWhenEnqueueReadBufferIsCallingWithoutCPUCopyThenContextProvidesCopyDoenstRequiedHint) { buffer->forceDisallowCPUCopy = true; pCmdQ->enqueueReadBuffer( buffer, CL_FALSE, 0, MemoryConstants::cacheLineSize, address, nullptr, 0, nullptr, nullptr); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_DOESNT_REQUIRE_COPY_DATA], static_cast(buffer), address); EXPECT_TRUE(containsHint(expectedHint, userData)); } TEST_F(PerformanceHintEnqueueBufferTest, GivenBlockingReadAndBufferDoesntShareMemWithCPUWhenEnqueueReadBufferIsCallingWithCPUCopyThenContextProvidesRequiedCopyHint) { buffer->forceDisallowCPUCopy = false; void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); pCmdQ->enqueueReadBuffer( buffer, CL_TRUE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA], static_cast(buffer), ptr); EXPECT_TRUE(containsHint(expectedHint, userData)); alignedFree(ptr); } TEST_F(PerformanceHintEnqueueBufferTest, GivenBlockingReadAndBufferSharesMemWithCPUWhenEnqueueReadBufferIsCallingWithCPUCopyThenContextProvidesCopyDoenstRequiedHint) { buffer->forceDisallowCPUCopy = false; pCmdQ->enqueueReadBuffer( buffer, CL_TRUE, 0, MemoryConstants::cacheLineSize, address, nullptr, 0, nullptr, nullptr); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_DOESNT_REQUIRE_COPY_DATA], static_cast(buffer), address); EXPECT_TRUE(containsHint(expectedHint, userData)); } TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingWriteAndNotSharedMemWhenEnqueueWriteBufferRectIsCallingThenContextProvidesProperHint) { size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {1, 2, 1}; void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); pCmdQ->enqueueWriteBufferRect( buffer, CL_FALSE, bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, ptr, 0, nullptr, nullptr); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_WRITE_BUFFER_RECT_REQUIRES_COPY_DATA], static_cast(buffer)); EXPECT_TRUE(containsHint(expectedHint, userData)); alignedFree(ptr); } TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingWriteAndSharedMemWhenEnqueueWriteBufferRectIsCallingThenContextProvidesProperHint) { size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {1, 2, 1}; pCmdQ->enqueueWriteBufferRect( buffer, CL_FALSE, bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, address, 0, nullptr, nullptr); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_WRITE_BUFFER_RECT_DOESNT_REQUIRE_COPY_DATA], static_cast(buffer)); EXPECT_TRUE(containsHint(expectedHint, userData)); } TEST_P(PerformanceHintEnqueueReadImageTest, GivenHostPtrAndSizeAlignmentsWhenEnqueueReadImageIsCallingThenContextProvidesHintsAboutAlignments) { REQUIRE_IMAGES_OR_SKIP(defaultHwInfo); size_t hostOrigin[] = {0, 0, 0}; void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); uintptr_t addressForReadImage = (uintptr_t)ptr; size_t sizeForReadImageInPixels = MemoryConstants::cacheLineSize; bool hintWithMisalignment = !(alignedAddress && alignedSize); if (!alignedAddress) { addressForReadImage++; } if (!alignedSize) { sizeForReadImageInPixels--; } size_t region[] = {sizeForReadImageInPixels, 1, 1}; pCmdQ->enqueueReadImage(image, CL_FALSE, hostOrigin, region, 0, 0, (void *)addressForReadImage, nullptr, 0, nullptr, nullptr); size_t sizeForReadImage = sizeForReadImageInPixels * image->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes; ASSERT_EQ(alignedSize, isAligned(sizeForReadImage)); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_IMAGE_DOESNT_MEET_ALIGNMENT_RESTRICTIONS], addressForReadImage, sizeForReadImage, MemoryConstants::pageSize, MemoryConstants::pageSize); EXPECT_EQ(hintWithMisalignment, containsHint(expectedHint, userData)); alignedFree(ptr); } TEST_F(PerformanceHintEnqueueImageTest, GivenNonBlockingWriteWhenEnqueueWriteImageIsCallingThenContextProvidesProperHint) { size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {1, 1, 1}; pCmdQ->enqueueWriteImage( image, CL_FALSE, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, address, nullptr, 0, nullptr, nullptr); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_WRITE_IMAGE_REQUIRES_COPY_DATA], static_cast(image)); EXPECT_TRUE(containsHint(expectedHint, userData)); } TEST_F(PerformanceHintEnqueueImageTest, GivenNonBlockingWriteImageSharesStorageWithDstPtrWhenEnqueueWriteImageIsCallingThenContextProvidesProperHint) { size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {1, 1, 1}; void *ptr = zeroCopyImage->getCpuAddressForMemoryTransfer(); pCmdQ->enqueueWriteImage( zeroCopyImage.get(), CL_FALSE, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_WRITE_IMAGE_DOESNT_REQUIRES_COPY_DATA], static_cast(zeroCopyImage.get())); EXPECT_TRUE(containsHint(expectedHint, userData)); } TEST_F(PerformanceHintEnqueueImageTest, GivenNonBlockingReadImageSharesStorageWithDstPtrWhenEnqueueReadImageIsCallingThenContextProvidesProperHint) { size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {1, 1, 1}; void *ptr = zeroCopyImage->getCpuAddressForMemoryTransfer(); pCmdQ->enqueueReadImage( zeroCopyImage.get(), CL_FALSE, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_IMAGE_DOESNT_REQUIRES_COPY_DATA], static_cast(zeroCopyImage.get())); EXPECT_TRUE(containsHint(expectedHint, userData)); } TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagWhenEnqueueMapBufferIsCallingThenContextProvidesProperHint) { Buffer *buffer; void *address; bool zeroCopyBuffer = GetParam(); size_t sizeForBuffer = MemoryConstants::cacheLineSize; if (!zeroCopyBuffer) { sizeForBuffer++; } address = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); buffer = Buffer::create(context, CL_MEM_USE_HOST_PTR, sizeForBuffer, address, retVal); pCmdQ->enqueueMapBuffer(buffer, CL_FALSE, 0, 0, MemoryConstants::cacheLineSize, 0, nullptr, nullptr, retVal); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_MAP_BUFFER_DOESNT_REQUIRE_COPY_DATA], static_cast(buffer)); EXPECT_EQ(zeroCopyBuffer, containsHint(expectedHint, userData)); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_MAP_BUFFER_REQUIRES_COPY_DATA], static_cast(buffer)); EXPECT_EQ(!zeroCopyBuffer, containsHint(expectedHint, userData)); alignedFree(address); delete buffer; } TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagAndBlockingEventWhenEnqueueMapBufferIsCallingThenContextProvidesProperHint) { void *address; bool zeroCopyBuffer = GetParam(); UserEvent userEvent(context); cl_event blockedEvent = &userEvent; size_t sizeForBuffer = MemoryConstants::cacheLineSize; if (!zeroCopyBuffer) { sizeForBuffer++; } address = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); auto buffer = std::unique_ptr(Buffer::create(context, CL_MEM_USE_HOST_PTR, sizeForBuffer, address, retVal)); EXPECT_EQ(buffer->isMemObjZeroCopy(), zeroCopyBuffer); pCmdQ->enqueueMapBuffer(buffer.get(), CL_FALSE, 0, 0, MemoryConstants::cacheLineSize, 1, &blockedEvent, nullptr, retVal); EXPECT_TRUE(pCmdQ->isQueueBlocked()); userEvent.setStatus(CL_COMPLETE); EXPECT_FALSE(pCmdQ->isQueueBlocked()); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_MAP_BUFFER_DOESNT_REQUIRE_COPY_DATA], static_cast(buffer.get())); EXPECT_EQ(zeroCopyBuffer, containsHint(expectedHint, userData)); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_MAP_BUFFER_REQUIRES_COPY_DATA], static_cast(buffer.get())); EXPECT_EQ(!zeroCopyBuffer, containsHint(expectedHint, userData)); alignedFree(address); } TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagWhenEnqueueMapImageIsCallingThenContextProvidesProperHint) { Image *image; bool isZeroCopyImage; isZeroCopyImage = GetParam(); size_t origin[] = {0, 0, 0}; size_t region[] = {1, 1, 1}; if (isZeroCopyImage) { image = ImageHelper>::create(context); } else { image = ImageHelper>::create(context); } EXPECT_EQ(isZeroCopyImage, image->isMemObjZeroCopy()); pCmdQ->enqueueMapImage( image, CL_FALSE, 0, origin, region, nullptr, nullptr, 0, nullptr, nullptr, retVal); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_MAP_IMAGE_DOESNT_REQUIRE_COPY_DATA], static_cast(image)); EXPECT_EQ(isZeroCopyImage, containsHint(expectedHint, userData)); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_MAP_IMAGE_REQUIRES_COPY_DATA], static_cast(image)); EXPECT_EQ(!isZeroCopyImage, containsHint(expectedHint, userData)); delete image; } TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagAndBlockingEventWhenEnqueueMapImageIsCallingThenContextProvidesProperHint) { auto image = std::unique_ptr(ImageHelper>::create(context)); bool isZeroCopyImage = GetParam(); size_t origin[] = {0, 0, 0}; size_t region[] = {1, 1, 1}; if (!isZeroCopyImage) { image.reset(ImageHelper>::create(context)); } EXPECT_EQ(isZeroCopyImage, image->isMemObjZeroCopy()); UserEvent userEvent(context); cl_event blockedEvent = &userEvent; void *mapPtr = pCmdQ->enqueueMapImage( image.get(), CL_FALSE, 0, origin, region, nullptr, nullptr, 1, &blockedEvent, nullptr, retVal); EXPECT_TRUE(pCmdQ->isQueueBlocked()); userEvent.setStatus(CL_COMPLETE); pCmdQ->enqueueUnmapMemObject(image.get(), mapPtr, 0, nullptr, nullptr); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_MAP_IMAGE_DOESNT_REQUIRE_COPY_DATA], static_cast(image.get())); EXPECT_EQ(isZeroCopyImage, containsHint(expectedHint, userData)); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_MAP_IMAGE_REQUIRES_COPY_DATA], static_cast(image.get())); EXPECT_EQ(!isZeroCopyImage, containsHint(expectedHint, userData)); } TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagWhenEnqueueUnmapIsCallingWithBufferThenContextProvidesProperHint) { Buffer *buffer; void *address; bool zeroCopyBuffer = GetParam(); size_t sizeForBuffer = MemoryConstants::cacheLineSize; if (!zeroCopyBuffer) { sizeForBuffer++; } address = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); buffer = Buffer::create(context, CL_MEM_USE_HOST_PTR, sizeForBuffer, address, retVal); void *mapPtr = pCmdQ->enqueueMapBuffer(buffer, CL_FALSE, 0, 0, MemoryConstants::cacheLineSize, 0, nullptr, nullptr, retVal); pCmdQ->enqueueUnmapMemObject(buffer, mapPtr, 0, nullptr, nullptr); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_UNMAP_MEM_OBJ_REQUIRES_COPY_DATA], mapPtr, static_cast(buffer)); EXPECT_EQ(!zeroCopyBuffer, containsHint(expectedHint, userData)); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_UNMAP_MEM_OBJ_DOESNT_REQUIRE_COPY_DATA], mapPtr); EXPECT_EQ(zeroCopyBuffer, containsHint(expectedHint, userData)); alignedFree(address); delete buffer; } TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyAndBlockedEventFlagWhenEnqueueUnmapIsCallingWithBufferThenContextProvidesProperHint) { void *address; bool zeroCopyBuffer = GetParam(); UserEvent userEvent(context); cl_event blockedEvent = &userEvent; size_t sizeForBuffer = MemoryConstants::cacheLineSize; if (!zeroCopyBuffer) { sizeForBuffer++; } address = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); auto buffer = std::unique_ptr(Buffer::create(context, CL_MEM_USE_HOST_PTR, sizeForBuffer, address, retVal)); EXPECT_EQ(buffer->isMemObjZeroCopy(), zeroCopyBuffer); void *mapPtr = pCmdQ->enqueueMapBuffer(buffer.get(), CL_FALSE, 0, 0, MemoryConstants::cacheLineSize, 1, &blockedEvent, nullptr, retVal); EXPECT_TRUE(pCmdQ->isQueueBlocked()); pCmdQ->enqueueUnmapMemObject(buffer.get(), mapPtr, 0, nullptr, nullptr); userEvent.setStatus(CL_COMPLETE); EXPECT_FALSE(pCmdQ->isQueueBlocked()); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_UNMAP_MEM_OBJ_REQUIRES_COPY_DATA], mapPtr, static_cast(buffer.get())); EXPECT_EQ(!zeroCopyBuffer, containsHint(expectedHint, userData)); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_UNMAP_MEM_OBJ_DOESNT_REQUIRE_COPY_DATA], mapPtr); EXPECT_EQ(zeroCopyBuffer, containsHint(expectedHint, userData)); alignedFree(address); } TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagWhenEnqueueUnmapIsCallingWithImageThenContextProvidesProperHint) { Image *image; bool isZeroCopyImage; isZeroCopyImage = GetParam(); size_t origin[] = {0, 0, 0}; size_t region[] = {1, 1, 1}; if (isZeroCopyImage) { image = ImageHelper>::create(context); } else { image = ImageHelper>::create(context); } EXPECT_EQ(isZeroCopyImage, image->isMemObjZeroCopy()); void *mapPtr = pCmdQ->enqueueMapImage(image, CL_FALSE, 0, origin, region, nullptr, nullptr, 0, nullptr, nullptr, retVal); pCmdQ->enqueueUnmapMemObject(image, mapPtr, 0, nullptr, nullptr); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_UNMAP_MEM_OBJ_REQUIRES_COPY_DATA], mapPtr, static_cast(image)); EXPECT_EQ(!isZeroCopyImage, containsHint(expectedHint, userData)); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_UNMAP_MEM_OBJ_DOESNT_REQUIRE_COPY_DATA], mapPtr); EXPECT_EQ(isZeroCopyImage, containsHint(expectedHint, userData)); delete image; } TEST_F(PerformanceHintEnqueueTest, GivenSVMPointerWhenEnqueueSVMMapIsCallingThenContextProvidesProperHint) { REQUIRE_SVM_OR_SKIP(pPlatform->getClDevice(0)); void *svmPtr = context->getSVMAllocsManager()->createSVMAlloc(256, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); pCmdQ->enqueueSVMMap(CL_FALSE, 0, svmPtr, 256, 0, nullptr, nullptr, false); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_SVM_MAP_DOESNT_REQUIRE_COPY_DATA], svmPtr); EXPECT_TRUE(containsHint(expectedHint, userData)); context->getSVMAllocsManager()->freeSVMAlloc(svmPtr); } TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkSizeNDIsDefaultWhenEnqueueKernelIsCallingThenContextProvidesProperHint) { retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkGroupSize, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); auto localWorkSize = kernel->getLocalWorkSizeValues(); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE], kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(), *localWorkSize[0], *localWorkSize[1], *localWorkSize[2]); EXPECT_TRUE(containsHint(expectedHint, userData)); } TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkSizeNDIsTrueWhenEnqueueKernelIsCallingThenContextProvidesProperHint) { bool isWorkGroupSizeEnabled = DebugManager.flags.EnableComputeWorkSizeND.get(); DebugManager.flags.EnableComputeWorkSizeND.set(true); retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkGroupSize, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); auto localWorkSize = kernel->getLocalWorkSizeValues(); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE], kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(), *localWorkSize[0], *localWorkSize[1], *localWorkSize[2]); EXPECT_TRUE(containsHint(expectedHint, userData)); DebugManager.flags.EnableComputeWorkSizeND.set(isWorkGroupSizeEnabled); } TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkSizeNDIsFalseWhenEnqueueKernelIsCallingThenContextProvidesProperHint) { bool isWorkGroupSizeEnabled = DebugManager.flags.EnableComputeWorkSizeND.get(); DebugManager.flags.EnableComputeWorkSizeND.set(false); retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkGroupSize, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); auto localWorkSize = kernel->getLocalWorkSizeValues(); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE], kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(), *localWorkSize[0], *localWorkSize[1], *localWorkSize[2]); EXPECT_TRUE(containsHint(expectedHint, userData)); DebugManager.flags.EnableComputeWorkSizeND.set(isWorkGroupSizeEnabled); } TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkSizeSquaredIsDefaultWhenEnqueueKernelIsCallingThenContextProvidesProperHint) { retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkGroupSize, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); auto localWorkSize = kernel->getLocalWorkSizeValues(); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE], kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(), *localWorkSize[0], *localWorkSize[1], *localWorkSize[2]); EXPECT_TRUE(containsHint(expectedHint, userData)); } TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkSizeSquaredIsTrueWhenEnqueueKernelIsCallingThenContextProvidesProperHint) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(true); DebugManager.flags.EnableComputeWorkSizeND.set(false); retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkGroupSize, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); auto localWorkSize = kernel->getLocalWorkSizeValues(); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE], kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(), *localWorkSize[0], *localWorkSize[1], *localWorkSize[2]); EXPECT_TRUE(containsHint(expectedHint, userData)); } TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkSizeSquaredIsFalseWhenEnqueueKernelIsCallingThenContextProvidesProperHint) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(false); DebugManager.flags.EnableComputeWorkSizeND.set(false); retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkGroupSize, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); auto localWorkSize = kernel->getLocalWorkSizeValues(); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE], kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(), *localWorkSize[0], *localWorkSize[1], *localWorkSize[2]); EXPECT_TRUE(containsHint(expectedHint, userData)); } TEST_P(PerformanceHintEnqueueKernelBadSizeTest, GivenBadLocalWorkGroupSizeWhenEnqueueKernelIsCallingThenContextProvidesProperHint) { size_t localWorkGroupSize[3]; int badSizeDimension; uint32_t workDim = globalWorkGroupSize[1] == 1 ? 1 : globalWorkGroupSize[2] == 1 ? 2 : 3; DispatchInfo dispatchInfo(&pCmdQ->getClDevice(), kernel, workDim, Vec3(globalWorkGroupSize), Vec3(0u, 0u, 0u), Vec3(0u, 0u, 0u)); auto computedLocalWorkgroupSize = computeWorkgroupSize(dispatchInfo); localWorkGroupSize[0] = computedLocalWorkgroupSize.x; localWorkGroupSize[1] = computedLocalWorkgroupSize.y; localWorkGroupSize[2] = computedLocalWorkgroupSize.z; badSizeDimension = GetParam(); if (localWorkGroupSize[badSizeDimension] > 1) { localWorkGroupSize[badSizeDimension] /= 2; } else { localWorkGroupSize[0] /= 2; } retVal = pCmdQ->enqueueKernel(kernel, 3, nullptr, globalWorkGroupSize, localWorkGroupSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[BAD_LOCAL_WORKGROUP_SIZE], localWorkGroupSize[0], localWorkGroupSize[1], localWorkGroupSize[2], kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(), computedLocalWorkgroupSize.x, computedLocalWorkgroupSize.y, computedLocalWorkgroupSize.z); EXPECT_TRUE(containsHint(expectedHint, userData)); } TEST_F(PerformanceHintEnqueueKernelPrintfTest, GivenKernelWithPrintfWhenEnqueueKernelIsCalledWithWorkDim3ThenContextProvidesProperHint) { size_t preferredWorkGroupSize[3]; auto maxWorkGroupSize = static_cast(pPlatform->getClDevice(0)->getSharedDeviceInfo().maxWorkGroupSize); if (DebugManager.flags.EnableComputeWorkSizeND.get()) { WorkSizeInfo wsInfo(maxWorkGroupSize, 0u, 32u, 0u, ::defaultHwInfo.get(), 32u, 0u, false, false, false); computeWorkgroupSizeND(wsInfo, preferredWorkGroupSize, globalWorkGroupSize, 2); } else computeWorkgroupSize2D(maxWorkGroupSize, preferredWorkGroupSize, globalWorkGroupSize, 32); retVal = pCmdQ->enqueueKernel(kernel, 3, nullptr, globalWorkGroupSize, preferredWorkGroupSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[PRINTF_DETECTED_IN_KERNEL], kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str()); EXPECT_TRUE(containsHint(expectedHint, userData)); } TEST_F(PerformanceHintEnqueueTest, GivenKernelWithCoherentPtrWhenEnqueueKernelIsCalledWithWorkDim2ThenContextProvidesProperHint) { size_t preferredWorkGroupSize[3]; size_t globalWorkGroupSize[3] = {1, 1, 1}; auto maxWorkGroupSize = static_cast(pPlatform->getClDevice(0)->getSharedDeviceInfo().maxWorkGroupSize); MockKernelWithInternals mockKernel(*pPlatform->getClDevice(0), context); Kernel::SimpleKernelArgInfo kernelArgInfo; if (DebugManager.flags.EnableComputeWorkSizeND.get()) { WorkSizeInfo wsInfo(maxWorkGroupSize, 0u, 32u, 0u, ::defaultHwInfo.get(), 32u, 0u, false, false, false); computeWorkgroupSizeND(wsInfo, preferredWorkGroupSize, globalWorkGroupSize, 2); } else computeWorkgroupSize2D(maxWorkGroupSize, preferredWorkGroupSize, globalWorkGroupSize, 32); auto buffer = new MockBuffer(); buffer->getGraphicsAllocation(mockRootDeviceIndex)->setCoherent(true); auto clBuffer = (cl_mem)buffer; kernelArgInfo.object = clBuffer; kernelArgInfo.type = Kernel::kernelArgType::BUFFER_OBJ; std::vector kernelArguments; kernelArguments.resize(1); kernelArguments[0] = kernelArgInfo; mockKernel.kernelInfo.kernelDescriptor.payloadMappings.explicitArgs.resize(1); mockKernel.mockKernel->setKernelArguments(kernelArguments); retVal = pCmdQ->enqueueKernel(mockKernel.mockKernel, 2, nullptr, globalWorkGroupSize, preferredWorkGroupSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[KERNEL_REQUIRES_COHERENCY], mockKernel.mockKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str()); EXPECT_TRUE(containsHint(expectedHint, userData)); delete buffer; } const int validDimensions[] = {0, 1, 2}; INSTANTIATE_TEST_CASE_P( DriverDiagnosticsTests, PerformanceHintEnqueueReadBufferTest, testing::Combine( ::testing::Bool(), ::testing::Bool())); INSTANTIATE_TEST_CASE_P( DriverDiagnosticsTests, PerformanceHintEnqueueReadImageTest, testing::Combine( ::testing::Bool(), ::testing::Bool())); INSTANTIATE_TEST_CASE_P( DriverDiagnosticsTests, PerformanceHintEnqueueMapTest, testing::Bool()); INSTANTIATE_TEST_CASE_P( DriverDiagnosticsTests, PerformanceHintEnqueueKernelBadSizeTest, testing::ValuesIn(validDimensions)); compute-runtime-22.14.22890/opencl/test/unit_test/context/driver_diagnostics_tests.cpp000066400000000000000000001307451422164147700311350ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "driver_diagnostics_tests.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_gmm.h" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "opencl/source/mem_obj/mem_obj_helper.h" #include using namespace NEO; bool containsHint(const char *providedHint, char *userData) { for (auto i = 0; i < maxHintCounter; i++) { if (strcmp(providedHint, userData + i * DriverDiagnostics::maxHintStringSize) == 0) { return true; } } return false; } void CL_CALLBACK callbackFunction(const char *providedHint, const void *flags, size_t size, void *userData) { int offset = 0; while (((char *)userData + offset)[0] != 0) { offset += DriverDiagnostics::maxHintStringSize; } strcpy_s((char *)userData + offset, DriverDiagnostics::maxHintStringSize, providedHint); } cl_diagnostics_verbose_level diagnosticsVerboseLevels[] = { CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL}; TEST_P(VerboseLevelTest, GivenVerboseLevelWhenProvidedHintLevelIsSameOrAllThenCallbackFunctionTakesProvidedHint) { cl_device_id deviceID = devices[0]; cl_diagnostics_verbose_level diagnosticsLevel = GetParam(); cl_context_properties validProperties[3] = {CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL, (cl_context_properties)diagnosticsLevel, 0}; retVal = CL_SUCCESS; auto context = Context::create(validProperties, ClDeviceVector(&deviceID, 1), callbackFunction, (void *)userData, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); for (auto hintLevel : validLevels) { memset(userData, 0, maxHintCounter * DriverDiagnostics::maxHintStringSize); context->providePerformanceHint(hintLevel, hintId); if (hintLevel == diagnosticsLevel || hintLevel == CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL) { EXPECT_TRUE(containsHint(expectedHint, userData)); } else { EXPECT_FALSE(containsHint(expectedHint, userData)); } } delete context; } TEST_P(VerboseLevelTest, GivenVerboseLevelAllWhenAnyHintIsProvidedThenCallbackFunctionTakesProvidedHint) { cl_device_id deviceID = devices[0]; cl_diagnostics_verbose_level providedHintLevel = GetParam(); cl_context_properties validProperties[3] = {CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL, CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL, 0}; retVal = CL_SUCCESS; auto context = Context::create(validProperties, ClDeviceVector(&deviceID, 1), callbackFunction, (void *)userData, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); context->providePerformanceHint(providedHintLevel, hintId); EXPECT_TRUE(containsHint(expectedHint, userData)); delete context; } TEST_P(PerformanceHintBufferTest, GivenHostPtrAndSizeAlignmentsWhenBufferIsCreatingThenContextProvidesHintsAboutAlignmentsAndAllocatingMemory) { uintptr_t addressForBuffer = (uintptr_t)address; size_t sizeForBuffer = MemoryConstants::cacheLineSize; if (!alignedAddress) { addressForBuffer++; } if (!alignedSize) { sizeForBuffer--; } auto flags = CL_MEM_USE_HOST_PTR; if (alignedAddress && alignedSize) { flags |= CL_MEM_FORCE_HOST_MEMORY_INTEL; } buffer = Buffer::create( context, flags, sizeForBuffer, (void *)addressForBuffer, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS], addressForBuffer, sizeForBuffer, MemoryConstants::pageSize, MemoryConstants::pageSize); EXPECT_EQ(!(alignedSize && alignedAddress), containsHint(expectedHint, userData)); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_BUFFER_NEEDS_ALLOCATE_MEMORY], 0); EXPECT_EQ(!(alignedSize && alignedAddress), containsHint(expectedHint, userData)); } TEST_P(PerformanceHintCommandQueueTest, GivenProfilingFlagAndPreemptionFlagWhenCommandQueueIsCreatingThenContextProvidesProperHints) { cl_command_queue_properties properties = 0; if (profilingEnabled) { properties = CL_QUEUE_PROFILING_ENABLE; } cmdQ = clCreateCommandQueue(context, context->getDevice(0), properties, &retVal); ASSERT_NE(nullptr, cmdQ); ASSERT_EQ(CL_SUCCESS, retVal); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[DRIVER_CALLS_INTERNAL_CL_FLUSH], 0); EXPECT_TRUE(containsHint(expectedHint, userData)); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[PROFILING_ENABLED], 0); EXPECT_EQ(profilingEnabled, containsHint(expectedHint, userData)); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[PROFILING_ENABLED_WITH_DISABLED_PREEMPTION], 0); if (context->getDevice(0)->getHardwareInfo().platform.eProductFamily < IGFX_SKYLAKE && preemptionSupported && profilingEnabled) { EXPECT_TRUE(containsHint(expectedHint, userData)); } else { EXPECT_FALSE(containsHint(expectedHint, userData)); } } TEST_P(PerformanceHintCommandQueueTest, GivenEnabledProfilingFlagAndSupportedPreemptionFlagWhenCommandQueueIsCreatingWithPropertiesThenContextProvidesProperHints) { cl_command_queue_properties properties[3] = {0}; if (profilingEnabled) { properties[0] = CL_QUEUE_PROPERTIES; properties[1] = CL_QUEUE_PROFILING_ENABLE; } cmdQ = clCreateCommandQueueWithProperties(context, context->getDevice(0), properties, &retVal); ASSERT_NE(nullptr, cmdQ); ASSERT_EQ(CL_SUCCESS, retVal); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[DRIVER_CALLS_INTERNAL_CL_FLUSH], 0); EXPECT_TRUE(containsHint(expectedHint, userData)); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[PROFILING_ENABLED], 0); EXPECT_EQ(profilingEnabled, containsHint(expectedHint, userData)); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[PROFILING_ENABLED_WITH_DISABLED_PREEMPTION], 0); if (context->getDevice(0)->getHardwareInfo().platform.eProductFamily < IGFX_SKYLAKE && preemptionSupported && profilingEnabled) { EXPECT_TRUE(containsHint(expectedHint, userData)); } else { EXPECT_FALSE(containsHint(expectedHint, userData)); } } TEST_F(PerformanceHintTest, GivenAlignedHostPtrWhenSubbufferIsCreatingThenContextProvidesHintAboutSharingMemoryWithParentBuffer) { cl_mem_flags flg = CL_MEM_USE_HOST_PTR; cl_buffer_region region = {0, MemoryConstants::cacheLineSize - 1}; void *address = alignedMalloc(MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); auto buffer = clCreateBuffer(context, flg, MemoryConstants::cacheLineSize, address, &retVal); EXPECT_NE(nullptr, buffer); EXPECT_EQ(CL_SUCCESS, retVal); auto subBuffer = clCreateSubBuffer(buffer, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &retVal); EXPECT_NE(nullptr, subBuffer); EXPECT_EQ(CL_SUCCESS, retVal); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[SUBBUFFER_SHARES_MEMORY], buffer); EXPECT_TRUE(containsHint(expectedHint, userData)); retVal = clReleaseMemObject(subBuffer); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); alignedFree(address); } TEST_F(PerformanceHintTest, GivenContextWhenSVMAllocIsCreatingThenContextProvidesHintAboutAlignment) { const ClDeviceInfo &devInfo = pPlatform->getClDevice(0)->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { cl_mem_flags flg = CL_MEM_READ_WRITE; size_t size = 4096; auto SVMPtr = clSVMAlloc(context, flg, size, 128); EXPECT_NE(nullptr, SVMPtr); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_SVM_ALLOC_MEETS_ALIGNMENT_RESTRICTIONS], SVMPtr, size); EXPECT_TRUE(containsHint(expectedHint, userData)); clSVMFree(context, SVMPtr); } } TEST_F(PerformanceHintTest, GivenNullContextAndEmptyDispatchinfoAndEnableComputeWorkSizeNDIsDefaultWhenProvideLocalWorkGroupSizeIsCalledThenItDoesntCrash) { DispatchInfo emptyDispatchInfo; provideLocalWorkGroupSizeHints(nullptr, emptyDispatchInfo); } TEST_F(PerformanceHintTest, GivenNullContextAndEmptyDispatchinfoAndEnableComputeWorkSizeNDIsTrueWhenProvideLocalWorkGroupSizeIsCalledThenItDoesntCrash) { bool isWorkGroupSizeEnabled = DebugManager.flags.EnableComputeWorkSizeND.get(); DebugManager.flags.EnableComputeWorkSizeND.set(true); DispatchInfo emptyDispatchInfo; provideLocalWorkGroupSizeHints(nullptr, emptyDispatchInfo); DebugManager.flags.EnableComputeWorkSizeND.set(isWorkGroupSizeEnabled); } TEST_F(PerformanceHintTest, GivenNullContextAndEmptyDispatchinfoAndEnableComputeWorkSizeNDIsFalseWhenProvideLocalWorkGroupSizeIsCalledThenItDoesntCrash) { bool isWorkGroupSizeEnabled = DebugManager.flags.EnableComputeWorkSizeND.get(); DebugManager.flags.EnableComputeWorkSizeND.set(false); DispatchInfo emptyDispatchInfo; provideLocalWorkGroupSizeHints(nullptr, emptyDispatchInfo); DebugManager.flags.EnableComputeWorkSizeND.set(isWorkGroupSizeEnabled); } TEST_F(PerformanceHintTest, GivenNullContextAndEmptyDispatchinfoAndEnableComputeWorkSizeSquaredIsDefaultWhenProvideLocalWorkGroupSizeIsCalledThenItDoesntCrash) { DispatchInfo emptyDispatchInfo; provideLocalWorkGroupSizeHints(nullptr, emptyDispatchInfo); } TEST_F(PerformanceHintTest, GivenNullContextAndEmptyDispatchinfoAndEnableComputeWorkSizeSquaredIsTrueWhenProvideLocalWorkGroupSizeIsCalledThenItDoesntCrash) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(true); DebugManager.flags.EnableComputeWorkSizeND.set(false); DispatchInfo emptyDispatchInfo; provideLocalWorkGroupSizeHints(nullptr, emptyDispatchInfo); } TEST_F(PerformanceHintTest, GivenNullContextAndEmptyDispatchinfoAndEnableComputeWorkSizeSquaredIsFalseWhenProvideLocalWorkGroupSizeIsCalledThenItDoesntCrash) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(false); DebugManager.flags.EnableComputeWorkSizeND.set(false); DispatchInfo emptyDispatchInfo; provideLocalWorkGroupSizeHints(nullptr, emptyDispatchInfo); } TEST_F(PerformanceHintTest, GivenNullContextAndInvalidDispatchinfoAndEnableComputeWorkSizeNDIsDefaultWhenProvideLocalWorkGroupSizeIsCalledThenItDoesntCrash) { auto pDevice = castToObject(devices[0]); MockKernelWithInternals mockKernel(*pDevice, context); DispatchInfo invalidDispatchInfo(pDevice, mockKernel, 100, {32, 32, 32}, {1, 1, 1}, {0, 0, 0}); provideLocalWorkGroupSizeHints(context, invalidDispatchInfo); } TEST_F(PerformanceHintTest, GivenNullContextAndInvalidDispatchinfoAndEnableComputeWorkSizeNDIsTrueWhenProvideLocalWorkGroupSizeIsCalledThenItDoesntCrash) { bool isWorkGroupSizeEnabled = DebugManager.flags.EnableComputeWorkSizeND.get(); DebugManager.flags.EnableComputeWorkSizeND.set(true); auto pDevice = castToObject(devices[0]); MockKernelWithInternals mockKernel(*pDevice, context); DispatchInfo invalidDispatchInfo(pDevice, mockKernel, 100, {32, 32, 32}, {1, 1, 1}, {0, 0, 0}); provideLocalWorkGroupSizeHints(context, invalidDispatchInfo); DebugManager.flags.EnableComputeWorkSizeND.set(isWorkGroupSizeEnabled); } TEST_F(PerformanceHintTest, GivenNullContextAndInvalidDispatchinfoAndEnableComputeWorkSizeNDIsFalseWhenProvideLocalWorkGroupSizeIsCalledThenItDoesntCrash) { bool isWorkGroupSizeEnabled = DebugManager.flags.EnableComputeWorkSizeND.get(); DebugManager.flags.EnableComputeWorkSizeND.set(false); auto pDevice = castToObject(devices[0]); MockKernelWithInternals mockKernel(*pDevice, context); DispatchInfo invalidDispatchInfo(pDevice, mockKernel, 100, {32, 32, 32}, {1, 1, 1}, {0, 0, 0}); provideLocalWorkGroupSizeHints(context, invalidDispatchInfo); DebugManager.flags.EnableComputeWorkSizeND.set(isWorkGroupSizeEnabled); } TEST_F(PerformanceHintTest, GivenNullContextAndInvalidDispatchinfoAndEnableComputeWorkSizeSquaredIsDefaultWhenProvideLocalWorkGroupSizeIsCalledThenItDoesntCrash) { auto pDevice = castToObject(devices[0]); MockKernelWithInternals mockKernel(*pDevice, context); DispatchInfo invalidDispatchInfo(pDevice, mockKernel, 100, {32, 32, 32}, {1, 1, 1}, {0, 0, 0}); provideLocalWorkGroupSizeHints(context, invalidDispatchInfo); } TEST_F(PerformanceHintTest, GivenNullContextAndInvalidDispatchinfoAndEnableComputeWorkSizeSquaredIsTrueWhenProvideLocalWorkGroupSizeIsCalledThenItDoesntCrash) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(true); DebugManager.flags.EnableComputeWorkSizeND.set(false); auto pDevice = castToObject(devices[0]); MockKernelWithInternals mockKernel(*pDevice, context); DispatchInfo invalidDispatchInfo(pDevice, mockKernel, 100, {32, 32, 32}, {1, 1, 1}, {0, 0, 0}); provideLocalWorkGroupSizeHints(context, invalidDispatchInfo); } TEST_F(PerformanceHintTest, GivenNullContextAndInvalidDispatchinfoAndEnableComputeWorkSizeSquaredIsFalseWhenProvideLocalWorkGroupSizeIsCalledThenItDoesntCrash) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(false); DebugManager.flags.EnableComputeWorkSizeND.set(false); auto pDevice = castToObject(devices[0]); MockKernelWithInternals mockKernel(*pDevice, context); DispatchInfo invalidDispatchInfo(pDevice, mockKernel, 100, {32, 32, 32}, {1, 1, 1}, {0, 0, 0}); provideLocalWorkGroupSizeHints(context, invalidDispatchInfo); } TEST_F(PerformanceHintTest, GivenContextAndDispatchinfoAndEnableComputeWorkSizeSquaredIsDefaultWhenProvideLocalWorkGroupSizeIsCalledThenItDoesntCrash) { auto pDevice = castToObject(devices[0]); MockKernelWithInternals mockKernel(*pDevice, context); DispatchInfo invalidDispatchInfo(pDevice, mockKernel, 100, {32, 32, 32}, {1, 1, 1}, {0, 0, 0}); provideLocalWorkGroupSizeHints(context, invalidDispatchInfo); } TEST_F(PerformanceHintTest, GivenContextAndDispatchinfoAndEnableComputeWorkSizeSquaredIsTrueWhenProvideLocalWorkGroupSizeIsCalledThenItDoesntCrash) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(true); DebugManager.flags.EnableComputeWorkSizeND.set(false); auto pDevice = castToObject(devices[0]); MockKernelWithInternals mockKernel(*pDevice, context); DispatchInfo invalidDispatchInfo(pDevice, mockKernel, 2, {32, 32, 1}, {1, 1, 1}, {0, 0, 0}); provideLocalWorkGroupSizeHints(context, invalidDispatchInfo); } TEST_F(PerformanceHintTest, GivenContextAndDispatchinfoAndEnableComputeWorkSizeSquaredIsFalseWhenProvideLocalWorkGroupSizeIsCalledThenItDoesntCrash) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(false); DebugManager.flags.EnableComputeWorkSizeND.set(false); auto pDevice = castToObject(devices[0]); MockKernelWithInternals mockKernel(*pDevice, context); DispatchInfo invalidDispatchInfo(pDevice, mockKernel, 2, {32, 32, 1}, {1, 1, 1}, {0, 0, 0}); provideLocalWorkGroupSizeHints(context, invalidDispatchInfo); } TEST_F(PerformanceHintTest, GivenZeroCopyImageAndContextWhenCreateImageThenContextProvidesHintAboutAlignment) { std::unique_ptr image(ImageHelper::create(context)); EXPECT_TRUE(image->isMemObjZeroCopy()); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_IMAGE_MEETS_ALIGNMENT_RESTRICTIONS], static_cast(image.get())); EXPECT_TRUE(containsHint(expectedHint, userData)); } TEST_F(PerformanceHintTest, GivenNonZeroCopyImageAndContextWhenCreateImageThenContextDoesntProvidesHintAboutAlignment) { std::unique_ptr image(ImageHelper>::create(context)); EXPECT_FALSE(image->isMemObjZeroCopy()); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_IMAGE_MEETS_ALIGNMENT_RESTRICTIONS], static_cast(image.get())); EXPECT_FALSE(containsHint(expectedHint, userData)); } TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticValueWhenContextIsCreatedThenItHasHintLevelSetToThatValue) { DebugManagerStateRestore dbgRestore; auto hintLevel = 1; DebugManager.flags.PrintDriverDiagnostics.set(hintLevel); auto pDevice = castToObject(devices[0]); cl_device_id clDevice = pDevice; auto context = Context::create(nullptr, ClDeviceVector(&clDevice, 1), nullptr, nullptr, retVal); EXPECT_TRUE(!!context->isProvidingPerformanceHints()); auto driverDiagnostics = context->driverDiagnostics; ASSERT_NE(nullptr, driverDiagnostics); EXPECT_TRUE(driverDiagnostics->validFlags(hintLevel)); context->release(); } TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeEnabledWhenHintIsCalledThenDriverProvidedOutputOnCout) { DebugManagerStateRestore dbgRestore; auto hintLevel = 255; DebugManager.flags.PrintDriverDiagnostics.set(hintLevel); auto pDevice = castToObject(devices[0]); cl_device_id clDevice = pDevice; auto context = Context::create(nullptr, ClDeviceVector(&clDevice, 1), nullptr, nullptr, retVal); testing::internal::CaptureStdout(); auto buffer = std::unique_ptr(Buffer::create( context, CL_MEM_READ_ONLY, 4096, nullptr, retVal)); std::string output = testing::internal::GetCapturedStdout(); EXPECT_NE(0u, output.size()); EXPECT_EQ('\n', output[0]); context->release(); } TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsAndBadHintLevelWhenActionForHintOccursThenNothingIsProvidedToCout) { DebugManagerStateRestore dbgRestore; auto hintLevel = 8; DebugManager.flags.PrintDriverDiagnostics.set(hintLevel); auto pDevice = castToObject(devices[0]); cl_device_id clDevice = pDevice; auto context = Context::create(nullptr, ClDeviceVector(&clDevice, 1), nullptr, nullptr, retVal); testing::internal::CaptureStdout(); auto buffer = Buffer::create( context, CL_MEM_READ_ONLY, 4096, nullptr, retVal); std::string output = testing::internal::GetCapturedStdout(); EXPECT_EQ(0u, output.size()); buffer->release(); context->release(); } TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeEnabledWhenContextIsBeingCreatedThenPropertiesPassedToContextAreOverwritten) { DebugManagerStateRestore dbgRestore; auto hintLevel = 1; DebugManager.flags.PrintDriverDiagnostics.set(hintLevel); auto pDevice = castToObject(devices[0]); cl_device_id clDevice = pDevice; cl_context_properties validProperties[3] = {CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL, CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL, 0}; auto retValue = CL_SUCCESS; auto context = Context::create(validProperties, ClDeviceVector(&clDevice, 1), callbackFunction, (void *)userData, retVal); EXPECT_EQ(CL_SUCCESS, retValue); auto driverDiagnostics = context->driverDiagnostics; ASSERT_NE(nullptr, driverDiagnostics); EXPECT_TRUE(driverDiagnostics->validFlags(hintLevel)); EXPECT_FALSE(driverDiagnostics->validFlags(2)); context->release(); } TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeEnabledWhenCallFillWithKernelObjsForAuxTranslationOnMemObjectThenContextProvidesProperHint) { DebugManagerStateRestore dbgRestore; DebugManager.flags.PrintDriverDiagnostics.set(1); auto pDevice = castToObject(devices[0]); MockKernelWithInternals mockKernel(*pDevice, context); MockBuffer buffer; cl_mem clMem = &buffer; MockBuffer::setAllocationType(buffer.getGraphicsAllocation(0), pDevice->getRootDeviceEnvironment().getGmmClientContext(), true); mockKernel.kernelInfo.addArgBuffer(0, 0, 0, 0); mockKernel.kernelInfo.addExtendedMetadata(0, "arg0"); mockKernel.mockKernel->initialize(); mockKernel.mockKernel->auxTranslationRequired = true; mockKernel.mockKernel->setArgBuffer(0, sizeof(cl_mem *), &clMem); testing::internal::CaptureStdout(); KernelObjsForAuxTranslation kernelObjects; mockKernel.mockKernel->fillWithKernelObjsForAuxTranslation(kernelObjects); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[KERNEL_ARGUMENT_AUX_TRANSLATION], mockKernel.mockKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(), 0, mockKernel.mockKernel->getKernelInfo().getExtendedMetadata(0).argName.c_str()); std::string output = testing::internal::GetCapturedStdout(); EXPECT_NE(0u, output.size()); EXPECT_TRUE(containsHint(expectedHint, userData)); } TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeEnabledWhenCallFillWithKernelObjsForAuxTranslationOnGfxAllocationThenContextProvidesProperHint) { DebugManagerStateRestore dbgRestore; DebugManager.flags.PrintDriverDiagnostics.set(1); auto pDevice = castToObject(devices[0]); MockKernelWithInternals mockKernel(*pDevice, context); char data[128]; void *ptr = &data; MockGraphicsAllocation gfxAllocation(ptr, 128); MockBuffer::setAllocationType(&gfxAllocation, pDevice->getRootDeviceEnvironment().getGmmClientContext(), true); mockKernel.kernelInfo.addExtendedMetadata(0, "arg0"); mockKernel.kernelInfo.addArgBuffer(0, 0, 0, 0); mockKernel.mockKernel->initialize(); mockKernel.mockKernel->auxTranslationRequired = true; mockKernel.mockKernel->setArgSvmAlloc(0, ptr, &gfxAllocation, 0u); testing::internal::CaptureStdout(); KernelObjsForAuxTranslation kernelObjects; mockKernel.mockKernel->fillWithKernelObjsForAuxTranslation(kernelObjects); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[KERNEL_ARGUMENT_AUX_TRANSLATION], mockKernel.mockKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(), 0, mockKernel.mockKernel->getKernelInfo().getExtendedMetadata(0).argName.c_str()); std::string output = testing::internal::GetCapturedStdout(); EXPECT_NE(0u, output.size()); EXPECT_TRUE(containsHint(expectedHint, userData)); delete gfxAllocation.getDefaultGmm(); } TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeEnabledWhenCallFillWithKernelObjsForAuxTranslationOnUnifiedMemoryThenContextProvidesProperHint) { DebugManagerStateRestore dbgRestore; DebugManager.flags.PrintDriverDiagnostics.set(1); DebugManager.flags.EnableStatelessCompression.set(1); auto pDevice = castToObject(devices[0]); MockKernelWithInternals mockKernel(*pDevice, context); char data[128]; void *ptr = &data; MockGraphicsAllocation gfxAllocation(ptr, 128); MockBuffer::setAllocationType(&gfxAllocation, pDevice->getRootDeviceEnvironment().getGmmClientContext(), true); mockKernel.mockKernel->initialize(); mockKernel.mockKernel->setUnifiedMemoryExecInfo(&gfxAllocation); testing::internal::CaptureStdout(); KernelObjsForAuxTranslation kernelObjects; mockKernel.mockKernel->fillWithKernelObjsForAuxTranslation(kernelObjects); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[KERNEL_ALLOCATION_AUX_TRANSLATION], mockKernel.mockKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(), ptr, 128); std::string output = testing::internal::GetCapturedStdout(); EXPECT_NE(0u, output.size()); EXPECT_TRUE(containsHint(expectedHint, userData)); delete gfxAllocation.getDefaultGmm(); } TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeEnabledWhenCallFillWithKernelObjsForAuxTranslationOnAllocationInSvmAllocsManagerThenContextProvidesProperHint) { if (context->getSVMAllocsManager() == nullptr) { return; } DebugManagerStateRestore dbgRestore; DebugManager.flags.PrintDriverDiagnostics.set(1); DebugManager.flags.EnableStatelessCompression.set(1); auto pDevice = castToObject(devices[0]); MockKernelWithInternals mockKernel(*pDevice, context); char data[128]; void *ptr = &data; MockGraphicsAllocation gfxAllocation(ptr, 128); MockBuffer::setAllocationType(&gfxAllocation, pDevice->getRootDeviceEnvironment().getGmmClientContext(), true); mockKernel.mockKernel->initialize(); SvmAllocationData allocData(0); allocData.gpuAllocations.addAllocation(&gfxAllocation); allocData.memoryType = InternalMemoryType::DEVICE_UNIFIED_MEMORY; allocData.device = &pDevice->getDevice(); context->getSVMAllocsManager()->insertSVMAlloc(allocData); testing::internal::CaptureStdout(); KernelObjsForAuxTranslation kernelObjects; mockKernel.mockKernel->fillWithKernelObjsForAuxTranslation(kernelObjects); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[KERNEL_ALLOCATION_AUX_TRANSLATION], mockKernel.mockKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(), ptr, 128); std::string output = testing::internal::GetCapturedStdout(); EXPECT_NE(0u, output.size()); EXPECT_TRUE(containsHint(expectedHint, userData)); delete gfxAllocation.getDefaultGmm(); } TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeEnabledWhenKernelObjectWithGraphicsAllocationAccessedStatefullyOnlyThenDontReportAnyHint) { DebugManagerStateRestore dbgRestore; DebugManager.flags.PrintDriverDiagnostics.set(1); auto pDevice = castToObject(devices[0]); MockKernelWithInternals mockKernel(*pDevice, context); char data[128]; void *ptr = &data; MockGraphicsAllocation gfxAllocation(ptr, 128); MockBuffer::setAllocationType(&gfxAllocation, pDevice->getRootDeviceEnvironment().getGmmClientContext(), true); mockKernel.kernelInfo.addExtendedMetadata(0, "arg0"); mockKernel.kernelInfo.addArgBuffer(0, 0, 0, 0); mockKernel.kernelInfo.setBufferStateful(0); mockKernel.mockKernel->initialize(); mockKernel.mockKernel->auxTranslationRequired = true; mockKernel.mockKernel->setArgSvmAlloc(0, ptr, &gfxAllocation, 0u); testing::internal::CaptureStdout(); KernelObjsForAuxTranslation kernelObjects; mockKernel.mockKernel->fillWithKernelObjsForAuxTranslation(kernelObjects); std::string output = testing::internal::GetCapturedStdout(); EXPECT_EQ(0u, output.size()); delete gfxAllocation.getDefaultGmm(); } TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeDisabledWhenCallFillWithKernelObjsForAuxTranslationOnGfxAllocationThenDontReportAnyHint) { auto pDevice = castToObject(devices[0]); MockKernelWithInternals mockKernel(*pDevice, context); char data[128]; void *ptr = &data; MockGraphicsAllocation gfxAllocation(ptr, 128); MockBuffer::setAllocationType(&gfxAllocation, pDevice->getRootDeviceEnvironment().getGmmClientContext(), true); mockKernel.kernelInfo.addExtendedMetadata(0, "arg0"); mockKernel.kernelInfo.addArgBuffer(0, 0, 0, 0); mockKernel.mockKernel->initialize(); mockKernel.mockKernel->auxTranslationRequired = true; mockKernel.mockKernel->setArgSvmAlloc(0, ptr, &gfxAllocation, 0u); testing::internal::CaptureStdout(); KernelObjsForAuxTranslation kernelObjects; mockKernel.mockKernel->fillWithKernelObjsForAuxTranslation(kernelObjects); std::string output = testing::internal::GetCapturedStdout(); EXPECT_EQ(0u, output.size()); delete gfxAllocation.getDefaultGmm(); } TEST_F(PerformanceHintTest, whenCallingFillWithKernelObjsForAuxTranslationOnNullGfxAllocationThenDontReportAnyHint) { auto pDevice = castToObject(devices[0]); MockKernelWithInternals mockKernel(*pDevice, context); mockKernel.kernelInfo.addExtendedMetadata(0, "arg0"); mockKernel.kernelInfo.addArgBuffer(0, 0, 0, 0); mockKernel.mockKernel->initialize(); mockKernel.mockKernel->setArgSvmAlloc(0, nullptr, nullptr, 0u); testing::internal::CaptureStdout(); KernelObjsForAuxTranslation kernelObjects; mockKernel.mockKernel->fillWithKernelObjsForAuxTranslation(kernelObjects); std::string output = testing::internal::GetCapturedStdout(); EXPECT_EQ(0u, output.size()); } TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeDisabledWhenCallFillWithKernelObjsForAuxTranslationOnUnifiedMemoryThenDontReportAnyHint) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableStatelessCompression.set(1); auto pDevice = castToObject(devices[0]); MockKernelWithInternals mockKernel(*pDevice, context); char data[128]; void *ptr = &data; MockGraphicsAllocation gfxAllocation(ptr, 128); MockBuffer::setAllocationType(&gfxAllocation, pDevice->getRootDeviceEnvironment().getGmmClientContext(), true); mockKernel.mockKernel->initialize(); mockKernel.mockKernel->setUnifiedMemoryExecInfo(&gfxAllocation); testing::internal::CaptureStdout(); KernelObjsForAuxTranslation kernelObjects; mockKernel.mockKernel->fillWithKernelObjsForAuxTranslation(kernelObjects); std::string output = testing::internal::GetCapturedStdout(); EXPECT_EQ(0u, output.size()); delete gfxAllocation.getDefaultGmm(); } HWTEST2_F(PerformanceHintTest, given64bitCompressedBufferWhenItsCreatedThenProperPerformanceHintIsProvided, IsAtLeastGen12lp) { cl_int retVal; HardwareInfo hwInfo = context->getDevice(0)->getHardwareInfo(); hwInfo.capabilityTable.ftrRenderCompressedBuffers = true; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); cl_device_id deviceId = device.get(); size_t size = 8192u; cl_context_properties validProperties[3] = {CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL, CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL, 0}; auto context = std::unique_ptr(Context::create(validProperties, ClDeviceVector(&deviceId, 1), callbackFunction, static_cast(userData), retVal)); context->isSharedContext = false; auto buffer = std::unique_ptr( Buffer::create(context.get(), ClMemoryPropertiesHelper::createMemoryProperties(0, 0, 0, &context->getDevice(0)->getDevice()), 0, 0, size, static_cast(NULL), retVal)); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[BUFFER_IS_COMPRESSED], buffer.get()); auto compressionSupported = HwHelper::get(hwInfo.platform.eRenderCoreFamily).isBufferSizeSuitableForCompression(size, hwInfo) && HwHelper::compressedBuffersSupported(hwInfo); if (compressionSupported) { EXPECT_TRUE(containsHint(expectedHint, userData)); } else { EXPECT_FALSE(containsHint(expectedHint, userData)); } } TEST_F(PerformanceHintTest, givenUncompressedBufferWhenItsCreatedThenProperPerformanceHintIsProvided) { cl_int retVal; HardwareInfo hwInfo = context->getDevice(0)->getHardwareInfo(); hwInfo.capabilityTable.ftrRenderCompressedBuffers = true; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); cl_device_id deviceId = device.get(); MemoryProperties memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_READ_WRITE, 0, 0, &context->getDevice(0)->getDevice()); size_t size = 0u; cl_context_properties validProperties[3] = {CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL, CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL, 0}; auto context = std::unique_ptr(Context::create(validProperties, ClDeviceVector(&deviceId, 1), callbackFunction, static_cast(userData), retVal)); std::unique_ptr buffer; bool isCompressed = true; if (context->getMemoryManager()) { isCompressed = MemObjHelper::isSuitableForCompression( HwHelper::compressedBuffersSupported(hwInfo), memoryProperties, *context, HwHelper::get(hwInfo.platform.eRenderCoreFamily).isBufferSizeSuitableForCompression(size, hwInfo)) && !is32bit && !context->isSharedContext && (!memoryProperties.flags.useHostPtr || context->getMemoryManager()->isLocalMemorySupported(device->getRootDeviceIndex())) && !memoryProperties.flags.forceHostMemory; buffer = std::unique_ptr(Buffer::create(context.get(), memoryProperties, CL_MEM_READ_WRITE, 0, size, static_cast(NULL), retVal)); } snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[BUFFER_IS_NOT_COMPRESSED], buffer.get()); if (isCompressed || is32bit) { Buffer::provideCompressionHint(false, context.get(), buffer.get()); } EXPECT_TRUE(containsHint(expectedHint, userData)); } HWTEST_F(PerformanceHintTest, givenCompressedImageWhenItsCreatedThenProperPerformanceHintIsProvided) { HardwareInfo hwInfo = context->getDevice(0)->getHardwareInfo(); hwInfo.capabilityTable.ftrRenderCompressedImages = true; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); cl_device_id deviceId = device.get(); cl_context_properties validProperties[3] = {CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL, CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL, 0}; auto context = std::unique_ptr(Context::create(validProperties, ClDeviceVector(&deviceId, 1), callbackFunction, static_cast(userData), retVal)); const size_t width = 5; const size_t height = 3; const size_t depth = 2; cl_int retVal = CL_SUCCESS; auto const elementSize = 4; char *hostPtr = static_cast(alignedMalloc(width * height * depth * elementSize * 2, 64)); cl_image_format imageFormat; cl_image_desc imageDesc; auto mockBuffer = std::unique_ptr(new MockBuffer()); StorageInfo info; size_t t = 4; auto gmm = new Gmm(device->getGmmClientContext(), static_cast(nullptr), t, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, true, info, true); gmm->isCompressionEnabled = true; auto graphicsAllocation = mockBuffer->getGraphicsAllocation(device->getRootDeviceIndex()); graphicsAllocation->setDefaultGmm(gmm); if (!HwHelperHw::get().checkResourceCompatibility(*graphicsAllocation)) { GTEST_SKIP(); } cl_mem mem = mockBuffer.get(); imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_RGBA; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = mem; imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; imageDesc.image_width = width; imageDesc.image_height = 0; imageDesc.image_depth = 0; imageDesc.image_array_size = 0; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto image = std::unique_ptr(Image::create( context.get(), ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, hostPtr, retVal)); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[IMAGE_IS_COMPRESSED], image.get()); alignedFree(hostPtr); if (HwHelper::compressedImagesSupported(hwInfo)) { EXPECT_TRUE(containsHint(expectedHint, userData)); } else { EXPECT_FALSE(containsHint(expectedHint, userData)); } } TEST_F(PerformanceHintTest, givenUncompressedImageWhenItsCreatedThenProperPerformanceHintIsProvided) { HardwareInfo hwInfo = context->getDevice(0)->getHardwareInfo(); hwInfo.capabilityTable.ftrRenderCompressedImages = true; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); cl_device_id deviceId = device.get(); cl_context_properties validProperties[3] = {CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL, CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL, 0}; auto context = std::unique_ptr(Context::create(validProperties, ClDeviceVector(&deviceId, 1), callbackFunction, static_cast(userData), retVal)); const size_t width = 5; const size_t height = 3; const size_t depth = 2; cl_int retVal = CL_SUCCESS; auto const elementSize = 4; char *hostPtr = static_cast(alignedMalloc(width * height * depth * elementSize * 2, 64)); cl_image_format imageFormat; cl_image_desc imageDesc; auto mockBuffer = std::unique_ptr(new MockBuffer()); StorageInfo info; size_t t = 4; auto gmm = new Gmm(device->getGmmClientContext(), (const void *)nullptr, t, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, true, info, true); gmm->isCompressionEnabled = false; mockBuffer->getGraphicsAllocation(device->getRootDeviceIndex())->setDefaultGmm(gmm); cl_mem mem = mockBuffer.get(); imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_RGBA; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = mem; imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; imageDesc.image_width = width; imageDesc.image_height = 0; imageDesc.image_depth = 0; imageDesc.image_array_size = 0; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto image = std::unique_ptr(Image::create( context.get(), ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, hostPtr, retVal)); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[IMAGE_IS_NOT_COMPRESSED], image.get()); alignedFree(hostPtr); if (HwHelper::compressedImagesSupported(hwInfo)) { EXPECT_TRUE(containsHint(expectedHint, userData)); } else { EXPECT_FALSE(containsHint(expectedHint, userData)); } } TEST_P(PerformanceHintKernelTest, GivenSpillFillWhenKernelIsInitializedThenContextProvidesProperHint) { auto scratchSize = zeroSized ? 0 : 1024; MockKernelWithInternals mockKernel(context->getDevices(), context); mockKernel.kernelInfo.setPerThreadScratchSize(scratchSize, 0); uint32_t computeUnitsForScratch[] = {0x10, 0x20}; auto pClDevice = &mockKernel.mockKernel->getDevice(); auto &deviceInfo = const_cast(pClDevice->getSharedDeviceInfo()); deviceInfo.computeUnitsUsedForScratch = computeUnitsForScratch[pClDevice->getRootDeviceIndex()]; mockKernel.mockKernel->initialize(); auto expectedSize = scratchSize * pClDevice->getSharedDeviceInfo().computeUnitsUsedForScratch * mockKernel.mockKernel->getKernelInfo().getMaxSimdSize(); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[REGISTER_PRESSURE_TOO_HIGH], mockKernel.mockKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(), expectedSize); EXPECT_EQ(!zeroSized, containsHint(expectedHint, userData)); } TEST_P(PerformanceHintKernelTest, GivenPrivateSurfaceWhenKernelIsInitializedThenContextProvidesProperHint) { auto pDevice = castToObject(devices[1]); static_cast(pDevice->getMemoryManager())->turnOnFakingBigAllocations(); for (auto isSimtThread : {false, true}) { auto size = zeroSized ? 0 : 1024; MockKernelWithInternals mockKernel(*pDevice, context); mockKernel.kernelInfo.setPrivateMemory(size, isSimtThread, 8, 16, 0); size *= pDevice->getSharedDeviceInfo().computeUnitsUsedForScratch; size *= isSimtThread ? mockKernel.mockKernel->getKernelInfo().getMaxSimdSize() : 1; mockKernel.mockKernel->initialize(); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[PRIVATE_MEMORY_USAGE_TOO_HIGH], mockKernel.mockKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(), size); EXPECT_EQ(!zeroSized, containsHint(expectedHint, userData)); } } INSTANTIATE_TEST_CASE_P( DriverDiagnosticsTests, VerboseLevelTest, testing::ValuesIn(diagnosticsVerboseLevels)); INSTANTIATE_TEST_CASE_P( DriverDiagnosticsTests, PerformanceHintBufferTest, testing::Combine( ::testing::Bool(), ::testing::Bool())); INSTANTIATE_TEST_CASE_P( DriverDiagnosticsTests, PerformanceHintCommandQueueTest, testing::Combine( ::testing::Bool(), ::testing::Bool())); INSTANTIATE_TEST_CASE_P( DriverDiagnosticsTests, PerformanceHintKernelTest, testing::Bool()); TEST(PerformanceHintsDebugVariables, givenDefaultDebugManagerWhenPrintDriverDiagnosticsIsCalledThenMinusOneIsReturned) { EXPECT_EQ(-1, DebugManager.flags.PrintDriverDiagnostics.get()); } TEST(PerformanceHintsTransferTest, givenCommandTypeAndMemoryTransferRequiredWhenAskingForHintThenReturnCorrectValue) { DriverDiagnostics driverDiagnostics(0); const uint32_t numHints = 8; std::tuple commandHints[numHints] = { // commandType, transfer required, transfer not required std::make_tuple(CL_COMMAND_MAP_BUFFER, CL_ENQUEUE_MAP_BUFFER_REQUIRES_COPY_DATA, CL_ENQUEUE_MAP_BUFFER_DOESNT_REQUIRE_COPY_DATA), std::make_tuple(CL_COMMAND_MAP_IMAGE, CL_ENQUEUE_MAP_IMAGE_REQUIRES_COPY_DATA, CL_ENQUEUE_MAP_IMAGE_DOESNT_REQUIRE_COPY_DATA), std::make_tuple(CL_COMMAND_UNMAP_MEM_OBJECT, CL_ENQUEUE_UNMAP_MEM_OBJ_REQUIRES_COPY_DATA, CL_ENQUEUE_UNMAP_MEM_OBJ_DOESNT_REQUIRE_COPY_DATA), std::make_tuple(CL_COMMAND_WRITE_BUFFER, CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA, CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA), std::make_tuple(CL_COMMAND_READ_BUFFER, CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA, CL_ENQUEUE_READ_BUFFER_DOESNT_REQUIRE_COPY_DATA), std::make_tuple(CL_COMMAND_WRITE_BUFFER_RECT, CL_ENQUEUE_WRITE_BUFFER_RECT_REQUIRES_COPY_DATA, CL_ENQUEUE_WRITE_BUFFER_RECT_DOESNT_REQUIRE_COPY_DATA), std::make_tuple(CL_COMMAND_READ_BUFFER_RECT, CL_ENQUEUE_READ_BUFFER_RECT_REQUIRES_COPY_DATA, CL_ENQUEUE_READ_BUFFER_RECT_DOESNT_REQUIRES_COPY_DATA), std::make_tuple(CL_COMMAND_WRITE_IMAGE, CL_ENQUEUE_WRITE_IMAGE_REQUIRES_COPY_DATA, CL_ENQUEUE_WRITE_IMAGE_DOESNT_REQUIRES_COPY_DATA), }; for (uint32_t i = 0; i < numHints; i++) { auto hintWithTransferRequired = driverDiagnostics.obtainHintForTransferOperation(std::get<0>(commandHints[i]), true); auto hintWithoutTransferRequired = driverDiagnostics.obtainHintForTransferOperation(std::get<0>(commandHints[i]), false); EXPECT_EQ(std::get<1>(commandHints[i]), hintWithTransferRequired); EXPECT_EQ(std::get<2>(commandHints[i]), hintWithoutTransferRequired); } EXPECT_THROW(driverDiagnostics.obtainHintForTransferOperation(CL_COMMAND_READ_IMAGE, true), std::exception); // no hint for this scenario EXPECT_EQ(CL_ENQUEUE_READ_IMAGE_DOESNT_REQUIRES_COPY_DATA, driverDiagnostics.obtainHintForTransferOperation(CL_COMMAND_READ_IMAGE, false)); } TEST_F(DriverDiagnosticsTest, givenInvalidCommandTypeWhenAskingForZeroCopyOperatonThenAbort) { cl_device_id deviceId = devices[0]; cl_context_properties validProperties[3] = {CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL, CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL, 0}; auto context = std::unique_ptr(Context::create(validProperties, ClDeviceVector(&deviceId, 1), callbackFunction, (void *)userData, retVal)); auto buffer = std::unique_ptr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); auto address = reinterpret_cast(0x12345); EXPECT_THROW(context->providePerformanceHintForMemoryTransfer(CL_COMMAND_BARRIER, true, buffer.get(), address), std::exception); } compute-runtime-22.14.22890/opencl/test/unit_test/context/driver_diagnostics_tests.h000066400000000000000000000243121422164147700305720ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/aligned_memory.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/test_macros/test_checks_shared.h" #include "opencl/source/command_queue/cl_local_work_size.h" #include "opencl/source/context/context.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/test/unit_test/fixtures/program_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "gtest/gtest.h" using namespace NEO; const int maxHintCounter = 6; bool containsHint(const char *providedHint, char *userData); void CL_CALLBACK callbackFunction(const char *providedHint, const void *flags, size_t size, void *userData); struct DriverDiagnosticsTest : public PlatformFixture, public ::testing::Test { using PlatformFixture::SetUp; void SetUp() override { PlatformFixture::SetUp(); memset(userData, 0, maxHintCounter * DriverDiagnostics::maxHintStringSize); } void TearDown() override { PlatformFixture::TearDown(); } cl_int retVal = CL_SUCCESS; char userData[maxHintCounter * DriverDiagnostics::maxHintStringSize]{}; char expectedHint[DriverDiagnostics::maxHintStringSize]{}; }; struct VerboseLevelTest : public DriverDiagnosticsTest, public ::testing::WithParamInterface { void SetUp() override { DriverDiagnosticsTest::SetUp(); hintId = CL_BUFFER_NEEDS_ALLOCATE_MEMORY; snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[hintId], 0); } void TearDown() override { DriverDiagnosticsTest::TearDown(); } std::vector validLevels{ CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL, CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL}; PerformanceHints hintId; }; struct PerformanceHintTest : public DriverDiagnosticsTest, public CommandQueueHwFixture { void SetUp() override { DriverDiagnosticsTest::SetUp(); cl_context_properties validProperties[3] = {CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL, CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL, 0}; context = Context::create(validProperties, ClDeviceVector(devices, num_devices), callbackFunction, (void *)userData, retVal); EXPECT_EQ(CL_SUCCESS, retVal); } void TearDown() override { CommandQueueHwFixture::TearDown(); DriverDiagnosticsTest::TearDown(); } }; struct PerformanceHintBufferTest : public PerformanceHintTest, public ::testing::WithParamInterface> { void SetUp() override { PerformanceHintTest::SetUp(); std::tie(alignedAddress, alignedSize) = GetParam(); address = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); } void TearDown() override { delete buffer; alignedFree(address); PerformanceHintTest::TearDown(); } bool alignedSize = false; bool alignedAddress = false; void *address = nullptr; Buffer *buffer = nullptr; }; struct PerformanceHintCommandQueueTest : public PerformanceHintTest, public ::testing::WithParamInterface> { void SetUp() override { PerformanceHintTest::SetUp(); std::tie(profilingEnabled, preemptionSupported) = GetParam(); static_cast(context->getDevice(0))->deviceInfo.preemptionSupported = preemptionSupported; } void TearDown() override { clReleaseCommandQueue(cmdQ); PerformanceHintTest::TearDown(); } cl_command_queue cmdQ = nullptr; bool profilingEnabled = false; bool preemptionSupported = false; }; struct PerformanceHintEnqueueTest : public PerformanceHintTest { void SetUp() override { PerformanceHintTest::SetUp(); pCmdQ = createCommandQueue(pPlatform->getClDevice(0)); } void TearDown() override { PerformanceHintTest::TearDown(); } }; struct PerformanceHintEnqueueBufferTest : public PerformanceHintEnqueueTest { void SetUp() override { PerformanceHintEnqueueTest::SetUp(); address = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); buffer = Buffer::create( context, CL_MEM_USE_HOST_PTR, MemoryConstants::cacheLineSize, address, retVal); } void TearDown() override { delete buffer; alignedFree(address); PerformanceHintEnqueueTest::TearDown(); } void *address = nullptr; Buffer *buffer = nullptr; }; struct PerformanceHintEnqueueReadBufferTest : public PerformanceHintEnqueueBufferTest, public ::testing::WithParamInterface> { void SetUp() override { PerformanceHintEnqueueBufferTest::SetUp(); std::tie(alignedAddress, alignedSize) = GetParam(); } void TearDown() override { PerformanceHintEnqueueBufferTest::TearDown(); } bool alignedSize = false; bool alignedAddress = false; }; struct PerformanceHintEnqueueImageTest : public PerformanceHintEnqueueTest { void SetUp() override { REQUIRE_IMAGES_OR_SKIP(defaultHwInfo); PerformanceHintEnqueueTest::SetUp(); address = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); image = ImageHelper>::create(context); zeroCopyImage.reset(ImageHelper::create(context)); } void TearDown() override { if (IsSkipped()) { return; } delete image; zeroCopyImage.reset(nullptr); alignedFree(address); PerformanceHintEnqueueTest::TearDown(); } void *address = nullptr; Image *image = nullptr; std::unique_ptr zeroCopyImage; }; struct PerformanceHintEnqueueReadImageTest : public PerformanceHintEnqueueImageTest, public ::testing::WithParamInterface> { void SetUp() override { PerformanceHintEnqueueImageTest::SetUp(); std::tie(alignedAddress, alignedSize) = GetParam(); } void TearDown() override { PerformanceHintEnqueueImageTest::TearDown(); } bool alignedSize = false; bool alignedAddress = false; }; struct PerformanceHintEnqueueMapTest : public PerformanceHintEnqueueTest, public ::testing::WithParamInterface { void SetUp() override { PerformanceHintEnqueueTest::SetUp(); } void TearDown() override { PerformanceHintEnqueueTest::TearDown(); } }; struct PerformanceHintEnqueueKernelTest : public PerformanceHintEnqueueTest, public ProgramFixture { void SetUp() override { PerformanceHintEnqueueTest::SetUp(); CreateProgramFromBinary(context, context->getDevices(), "CopyBuffer_simd32"); retVal = pProgram->build(pProgram->getDevices(), nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); kernel = Kernel::create(pProgram, pProgram->getKernelInfoForKernel("CopyBuffer"), *context->getDevice(0), &retVal); globalWorkGroupSize[0] = globalWorkGroupSize[1] = globalWorkGroupSize[2] = 1; rootDeviceIndex = context->getDevice(0)->getRootDeviceIndex(); } void TearDown() override { delete kernel; ProgramFixture::TearDown(); PerformanceHintEnqueueTest::TearDown(); } MockKernel *kernel = nullptr; uint32_t rootDeviceIndex = std::numeric_limits::max(); size_t globalWorkGroupSize[3]{}; }; struct PerformanceHintEnqueueKernelBadSizeTest : public PerformanceHintEnqueueKernelTest, public ::testing::WithParamInterface { void SetUp() override { PerformanceHintEnqueueKernelTest::SetUp(); globalWorkGroupSize[0] = globalWorkGroupSize[1] = globalWorkGroupSize[2] = 32; } void TearDown() override { PerformanceHintEnqueueKernelTest::TearDown(); } }; struct PerformanceHintEnqueueKernelPrintfTest : public PerformanceHintEnqueueTest, public ProgramFixture { void SetUp() override { PerformanceHintEnqueueTest::SetUp(); CreateProgramFromBinary(context, context->getDevices(), "printf"); retVal = pProgram->build(pProgram->getDevices(), nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); kernel = Kernel::create(pProgram, pProgram->getKernelInfoForKernel("test"), *context->getDevice(0), &retVal); globalWorkGroupSize[0] = globalWorkGroupSize[1] = globalWorkGroupSize[2] = 1; } void TearDown() override { delete kernel; ProgramFixture::TearDown(); PerformanceHintEnqueueTest::TearDown(); } Kernel *kernel = nullptr; size_t globalWorkGroupSize[3]{}; }; struct PerformanceHintKernelTest : public PerformanceHintTest, public ::testing::WithParamInterface { void SetUp() override { DebugManager.flags.CreateMultipleRootDevices.set(2); DebugManager.flags.EnableMultiRootDeviceContexts.set(true); PerformanceHintTest::SetUp(); zeroSized = GetParam(); } void TearDown() override { PerformanceHintTest::TearDown(); } DebugManagerStateRestore restorer; bool zeroSized = false; }; compute-runtime-22.14.22890/opencl/test/unit_test/context/get_supported_image_formats_tests.cpp000066400000000000000000000373521422164147700330340ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/mocks/mock_device.h" #include "opencl/source/helpers/surface_formats.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" #include using namespace NEO; struct GetSupportedImageFormatsTest : public PlatformFixture, public ContextFixture, public ::testing::TestWithParam> { using ContextFixture::SetUp; using PlatformFixture::SetUp; GetSupportedImageFormatsTest() { } void SetUp() override { PlatformFixture::SetUp(); ContextFixture::SetUp(num_devices, devices); } void TearDown() override { ContextFixture::TearDown(); PlatformFixture::TearDown(); } cl_int retVal = CL_SUCCESS; }; TEST_P(GetSupportedImageFormatsTest, WhenGettingNumImageFormatsThenGreaterThanZeroIsReturned) { cl_uint numImageFormats = 0; uint64_t imageFormatsFlags; uint32_t imageFormats; std::tie(imageFormatsFlags, imageFormats) = GetParam(); retVal = pContext->getSupportedImageFormats( &castToObject(devices[0])->getDevice(), imageFormatsFlags, imageFormats, 0, nullptr, &numImageFormats); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_GT(numImageFormats, 0u); } TEST_P(GetSupportedImageFormatsTest, WhenRetrievingImageFormatsThenListIsNonEmpty) { cl_uint numImageFormats = 0; uint64_t imageFormatsFlags; uint32_t imageFormats; std::tie(imageFormatsFlags, imageFormats) = GetParam(); retVal = pContext->getSupportedImageFormats( &castToObject(devices[0])->getDevice(), imageFormatsFlags, imageFormats, 0, nullptr, &numImageFormats); EXPECT_GT(numImageFormats, 0u); auto imageFormatList = new cl_image_format[numImageFormats]; memset(imageFormatList, 0, numImageFormats * sizeof(cl_image_format)); retVal = pContext->getSupportedImageFormats( &castToObject(devices[0])->getDevice(), imageFormatsFlags, imageFormats, numImageFormats, imageFormatList, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); for (cl_uint entry = 0; entry < numImageFormats; ++entry) { EXPECT_NE(0u, imageFormatList[entry].image_channel_order); EXPECT_NE(0u, imageFormatList[entry].image_channel_data_type); } retVal = pContext->getSupportedImageFormats( &castToObject(devices[0])->getDevice(), CL_MEM_KERNEL_READ_AND_WRITE, imageFormats, numImageFormats, imageFormatList, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); delete[] imageFormatList; } TEST_P(GetSupportedImageFormatsTest, WhenRetrievingImageFormatsSRGBThenListIsNonEmpty) { cl_uint numImageFormats = 0; uint64_t imageFormatsFlags; uint32_t imageFormats; bool sRGBAFormatFound = false; bool sBGRAFormatFound = false; bool isReadOnly = false; std::tie(imageFormatsFlags, imageFormats) = GetParam(); retVal = pContext->getSupportedImageFormats( &castToObject(devices[0])->getDevice(), imageFormatsFlags, imageFormats, 0, nullptr, &numImageFormats); EXPECT_GT(numImageFormats, 0u); auto imageFormatList = new cl_image_format[numImageFormats]; memset(imageFormatList, 0, numImageFormats * sizeof(cl_image_format)); retVal = pContext->getSupportedImageFormats( &castToObject(devices[0])->getDevice(), imageFormatsFlags, imageFormats, numImageFormats, imageFormatList, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); isReadOnly |= (imageFormatsFlags == CL_MEM_READ_ONLY); for (cl_uint entry = 0; entry < numImageFormats; ++entry) { EXPECT_NE(0u, imageFormatList[entry].image_channel_order); EXPECT_NE(0u, imageFormatList[entry].image_channel_data_type); if (imageFormatList[entry].image_channel_order == CL_sRGBA) { sRGBAFormatFound = true; } if (imageFormatList[entry].image_channel_order == CL_sBGRA) { sBGRAFormatFound = true; } } if (isReadOnly && ((&castToObject(devices[0])->getDevice())->getHardwareInfo().capabilityTable.supportsOcl21Features)) { EXPECT_TRUE(sRGBAFormatFound & sBGRAFormatFound); } else { EXPECT_FALSE(sRGBAFormatFound | sBGRAFormatFound); } delete[] imageFormatList; } TEST(ImageFormats, WhenCheckingIsDepthFormatThenCorrectValueReturned) { for (auto &format : SurfaceFormats::readOnly20()) { EXPECT_FALSE(Image::isDepthFormat(format.OCLImageFormat)); } for (auto &format : SurfaceFormats::readOnlyDepth()) { EXPECT_TRUE(Image::isDepthFormat(format.OCLImageFormat)); } } struct PackedYuvExtensionSupportedImageFormatsTest : public ::testing::TestWithParam> { void SetUp() override { device = std::make_unique(new MockDevice()); context = std::unique_ptr(new MockContext(device.get(), true)); } void TearDown() override { } std::unique_ptr device; std::unique_ptr context; cl_int retVal; }; TEST_P(PackedYuvExtensionSupportedImageFormatsTest, WhenRetrievingImageFormatsPackedYUVThenListIsNonEmpty) { cl_uint numImageFormats = 0; uint64_t imageFormatsFlags; uint32_t imageFormats; bool YUYVFormatFound = false; bool UYVYFormatFound = false; bool YVYUFormatFound = false; bool VYUYFormatFound = false; bool isReadOnly = false; std::tie(imageFormatsFlags, imageFormats) = GetParam(); device->deviceInfo.nv12Extension = false; device->deviceInfo.packedYuvExtension = true; retVal = context->getSupportedImageFormats( &device->getDevice(), imageFormatsFlags, imageFormats, 0, nullptr, &numImageFormats); EXPECT_GT(numImageFormats, 0u); auto imageFormatList = new cl_image_format[numImageFormats]; memset(imageFormatList, 0, numImageFormats * sizeof(cl_image_format)); retVal = context->getSupportedImageFormats( &device->getDevice(), imageFormatsFlags, imageFormats, numImageFormats, imageFormatList, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); isReadOnly |= (imageFormatsFlags == CL_MEM_READ_ONLY); for (cl_uint entry = 0; entry < numImageFormats; ++entry) { EXPECT_NE(0u, imageFormatList[entry].image_channel_order); EXPECT_NE(0u, imageFormatList[entry].image_channel_data_type); if (imageFormatList[entry].image_channel_order == CL_YUYV_INTEL) { YUYVFormatFound = true; } if (imageFormatList[entry].image_channel_order == CL_UYVY_INTEL) { UYVYFormatFound = true; } if (imageFormatList[entry].image_channel_order == CL_YVYU_INTEL) { YVYUFormatFound = true; } if (imageFormatList[entry].image_channel_order == CL_VYUY_INTEL) { VYUYFormatFound = true; } } if (isReadOnly && imageFormats == CL_MEM_OBJECT_IMAGE2D) { EXPECT_TRUE(YUYVFormatFound); EXPECT_TRUE(UYVYFormatFound); EXPECT_TRUE(YVYUFormatFound); EXPECT_TRUE(VYUYFormatFound); } else { EXPECT_FALSE(YUYVFormatFound); EXPECT_FALSE(UYVYFormatFound); EXPECT_FALSE(YVYUFormatFound); EXPECT_FALSE(VYUYFormatFound); } delete[] imageFormatList; } struct NV12ExtensionSupportedImageFormatsTest : public ::testing::TestWithParam> { void SetUp() override { device = std::make_unique(new MockDevice()); context = std::unique_ptr(new MockContext(device.get(), true)); } void TearDown() override { } std::unique_ptr device; std::unique_ptr context; cl_int retVal; }; typedef NV12ExtensionSupportedImageFormatsTest NV12ExtensionUnsupportedImageFormatsTest; TEST_P(NV12ExtensionSupportedImageFormatsTest, givenNV12ExtensionWhenQueriedForImageFormatsThenNV12FormatIsReturnedOnlyFor2DImages) { cl_uint numImageFormats = 0; uint64_t imageFormatsFlags; uint32_t imageFormats; bool Nv12FormatFound = false; std::tie(imageFormatsFlags, imageFormats) = GetParam(); device->deviceInfo.nv12Extension = true; device->deviceInfo.packedYuvExtension = false; retVal = context->getSupportedImageFormats( &device->getDevice(), imageFormatsFlags, imageFormats, 0, nullptr, &numImageFormats); auto supportsOcl20Features = device.get()->getHardwareInfo().capabilityTable.supportsOcl21Features; size_t expectedNumReadOnlyFormats = (supportsOcl20Features) ? SurfaceFormats::readOnly20().size() : SurfaceFormats::readOnly12().size(); if (Image::isImage2dOr2dArray(imageFormats) && imageFormatsFlags == CL_MEM_READ_ONLY) { expectedNumReadOnlyFormats += SurfaceFormats::readOnlyDepth().size(); } if (Image::isImage2d(imageFormats)) { if (imageFormatsFlags == CL_MEM_READ_ONLY) { EXPECT_EQ(expectedNumReadOnlyFormats + SurfaceFormats::planarYuv().size(), static_cast(numImageFormats)); } if (imageFormatsFlags == CL_MEM_NO_ACCESS_INTEL) { EXPECT_EQ(expectedNumReadOnlyFormats + SurfaceFormats::planarYuv().size(), static_cast(numImageFormats)); } } else { if (imageFormatsFlags == CL_MEM_READ_ONLY) { EXPECT_EQ(expectedNumReadOnlyFormats, static_cast(numImageFormats)); } if (imageFormatsFlags == CL_MEM_NO_ACCESS_INTEL) { EXPECT_EQ(expectedNumReadOnlyFormats, static_cast(numImageFormats)); } } auto imageFormatList = new cl_image_format[numImageFormats]; memset(imageFormatList, 0, numImageFormats * sizeof(cl_image_format)); retVal = context->getSupportedImageFormats( &device->getDevice(), imageFormatsFlags, imageFormats, numImageFormats, imageFormatList, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); for (cl_uint entry = 0; entry < numImageFormats; ++entry) { EXPECT_NE(0u, imageFormatList[entry].image_channel_order); EXPECT_NE(0u, imageFormatList[entry].image_channel_data_type); if (imageFormatList[entry].image_channel_order == CL_NV12_INTEL) { Nv12FormatFound = true; } } if (imageFormats == CL_MEM_OBJECT_IMAGE2D) { EXPECT_TRUE(Nv12FormatFound); } else { EXPECT_FALSE(Nv12FormatFound); } delete[] imageFormatList; } TEST_P(NV12ExtensionUnsupportedImageFormatsTest, givenNV12ExtensionWhenQueriedForWriteOnlyOrReadWriteImageFormatsThenNV12FormatIsNotReturned) { cl_uint numImageFormats = 0; uint64_t imageFormatsFlags; uint32_t imageFormats; bool Nv12FormatFound = false; std::tie(imageFormatsFlags, imageFormats) = GetParam(); device->deviceInfo.nv12Extension = true; retVal = context->getSupportedImageFormats( &device->getDevice(), imageFormatsFlags, imageFormats, 0, nullptr, &numImageFormats); if (imageFormatsFlags == CL_MEM_WRITE_ONLY) { if (!Image::isImage2dOr2dArray(imageFormats)) { EXPECT_EQ(SurfaceFormats::writeOnly().size(), static_cast(numImageFormats)); } else { EXPECT_EQ(SurfaceFormats::writeOnly().size() + SurfaceFormats::readWriteDepth().size(), static_cast(numImageFormats)); } } if (imageFormatsFlags == CL_MEM_READ_WRITE) { if (!Image::isImage2dOr2dArray(imageFormats)) { EXPECT_EQ(SurfaceFormats::readWrite().size(), static_cast(numImageFormats)); } else { EXPECT_EQ(SurfaceFormats::readWrite().size() + SurfaceFormats::readWriteDepth().size(), static_cast(numImageFormats)); } } auto imageFormatList = new cl_image_format[numImageFormats]; memset(imageFormatList, 0, numImageFormats * sizeof(cl_image_format)); retVal = context->getSupportedImageFormats( &device->getDevice(), imageFormatsFlags, imageFormats, numImageFormats, imageFormatList, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); for (cl_uint entry = 0; entry < numImageFormats; ++entry) { EXPECT_NE(0u, imageFormatList[entry].image_channel_order); EXPECT_NE(0u, imageFormatList[entry].image_channel_data_type); if (imageFormatList[entry].image_channel_order == CL_NV12_INTEL) { Nv12FormatFound = true; } } EXPECT_FALSE(Nv12FormatFound); delete[] imageFormatList; } TEST_P(NV12ExtensionSupportedImageFormatsTest, WhenRetrievingLessImageFormatsThanAvailableThenListIsNonEmpty) { cl_uint numImageFormats = 0; uint64_t imageFormatsFlags; uint32_t imageFormats; std::tie(imageFormatsFlags, imageFormats) = GetParam(); device->deviceInfo.nv12Extension = true; retVal = context->getSupportedImageFormats( &device->getDevice(), imageFormatsFlags, imageFormats, 0, nullptr, &numImageFormats); EXPECT_GT(numImageFormats, 0u); if (numImageFormats > 1) numImageFormats--; auto imageFormatList = new cl_image_format[numImageFormats]; memset(imageFormatList, 0, numImageFormats * sizeof(cl_image_format)); retVal = context->getSupportedImageFormats( &device->getDevice(), imageFormatsFlags, imageFormats, numImageFormats, imageFormatList, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); for (cl_uint entry = 0; entry < numImageFormats; ++entry) { EXPECT_NE(0u, imageFormatList[entry].image_channel_order); EXPECT_NE(0u, imageFormatList[entry].image_channel_data_type); } delete[] imageFormatList; } cl_mem_flags GetSupportedImageFormatsFlags[] = { CL_MEM_READ_WRITE, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY}; cl_mem_object_type GetSupportedImageFormats[] = { CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE1D_BUFFER, CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE2D_ARRAY, CL_MEM_OBJECT_IMAGE3D}; INSTANTIATE_TEST_CASE_P( Context, GetSupportedImageFormatsTest, ::testing::Combine( ::testing::ValuesIn(GetSupportedImageFormatsFlags), ::testing::ValuesIn(GetSupportedImageFormats))); INSTANTIATE_TEST_CASE_P( Context, PackedYuvExtensionSupportedImageFormatsTest, ::testing::Combine( ::testing::ValuesIn(GetSupportedImageFormatsFlags), ::testing::ValuesIn(GetSupportedImageFormats))); cl_mem_flags NV12ExtensionSupportedImageFormatsFlags[] = { CL_MEM_NO_ACCESS_INTEL, CL_MEM_READ_ONLY}; cl_mem_flags NV12ExtensionUnsupportedImageFormatsFlags[] = { CL_MEM_READ_WRITE, CL_MEM_WRITE_ONLY}; cl_mem_object_type NV12ExtensionSupportedImageFormats[] = { CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE2D}; INSTANTIATE_TEST_CASE_P( Context, NV12ExtensionSupportedImageFormatsTest, ::testing::Combine( ::testing::ValuesIn(NV12ExtensionSupportedImageFormatsFlags), ::testing::ValuesIn(NV12ExtensionSupportedImageFormats))); INSTANTIATE_TEST_CASE_P( Context, NV12ExtensionUnsupportedImageFormatsTest, ::testing::Combine( ::testing::ValuesIn(NV12ExtensionUnsupportedImageFormatsFlags), ::testing::ValuesIn(NV12ExtensionSupportedImageFormats))); compute-runtime-22.14.22890/opencl/test/unit_test/context/gl/000077500000000000000000000000001422164147700237155ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/context/gl/CMakeLists.txt000066400000000000000000000006041422164147700264550ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) set(IGDRCL_SRCS_tests_context_gl ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/context_gl_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/context_gl_tests.h ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_context_gl}) endif() add_subdirectories() compute-runtime-22.14.22890/opencl/test/unit_test/context/gl/context_gl_tests.cpp000066400000000000000000000022101422164147700300040ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/context/gl/context_gl_tests.h" #include "opencl/source/sharings/gl/gl_sharing.h" #include "opencl/test/unit_test/mocks/mock_platform.h" namespace NEO { TEST_F(GlContextTest, GivenDefaultContextThenGlSharingIsDisabled) { ASSERT_EQ(context->getSharing(), nullptr); } TEST_F(GlContextTest, GivenGlContextParamWhenCreateContextThenInitSharingFunctions) { cl_device_id deviceID = devices[0]; auto pPlatform = NEO::platform(); cl_platform_id pid[1]; pid[0] = pPlatform; cl_context_properties validProperties[5] = {CL_CONTEXT_PLATFORM, (cl_context_properties)pid[0], CL_GL_CONTEXT_KHR, 0x10000, 0}; cl_int retVal = CL_SUCCESS; auto ctx = Context::create(validProperties, ClDeviceVector(&deviceID, 1), nullptr, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, ctx); auto sharing = ctx->getSharing(); ASSERT_NE(nullptr, sharing); EXPECT_FALSE(context->getInteropUserSyncEnabled()); delete ctx; } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/context/gl/context_gl_tests.h000066400000000000000000000042001422164147700274520ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "gtest/gtest.h" #include namespace NEO { struct GlContextTest : public PlatformFixture, public ::testing::Test { using PlatformFixture::SetUp; void SetUp() override { PlatformFixture::SetUp(); properties[0] = CL_CONTEXT_PLATFORM; properties[1] = reinterpret_cast(static_cast(pPlatform)); properties[2] = 0; context = Context::create(properties, ClDeviceVector(devices, num_devices), nullptr, nullptr, retVal); ASSERT_NE(nullptr, context); } void TearDown() override { delete context; PlatformFixture::TearDown(); } void testContextCreation(cl_context_properties contextType) { const cl_device_id deviceID = devices[0]; const auto platformId = reinterpret_cast(static_cast(platform())); const cl_context_properties propertiesOneContext[] = {CL_CONTEXT_PLATFORM, platformId, contextType, 0x10000, 0}; auto context = std::unique_ptr(Context::create(propertiesOneContext, ClDeviceVector(&deviceID, 1), nullptr, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, context.get()); EXPECT_FALSE(context->getInteropUserSyncEnabled()); const cl_context_properties propertiesTwoContexts[] = {CL_CONTEXT_PLATFORM, platformId, contextType, 0x10000, contextType, 0x10000, 0}; context = std::unique_ptr(Context::create(propertiesTwoContexts, ClDeviceVector(&deviceID, 1), nullptr, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, context.get()); EXPECT_FALSE(context->getInteropUserSyncEnabled()); } cl_int retVal = CL_SUCCESS; MockContext *context = nullptr; cl_context_properties properties[3] = {}; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/context/gl/windows/000077500000000000000000000000001422164147700254075ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/context/gl/windows/CMakeLists.txt000066400000000000000000000005151422164147700301500ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) set(IGDRCL_SRCS_tests_context_gl_windows ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/context_gl_tests_windows.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_context_gl_windows}) endif() compute-runtime-22.14.22890/opencl/test/unit_test/context/gl/windows/context_gl_tests_windows.cpp000066400000000000000000000012261422164147700332560ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/context/gl/context_gl_tests.h" namespace NEO { TEST_F(GlContextTest, GivenClGlContextWhenContextIsCreatedThenSuccess) { testContextCreation(CL_GL_CONTEXT_KHR); } TEST_F(GlContextTest, GivenEglContextWhenContextIsCreatedThenSuccess) { testContextCreation(CL_EGL_DISPLAY_KHR); } TEST_F(GlContextTest, GivenGlxContextWhenContextIsCreatedThenSuccess) { testContextCreation(CL_GLX_DISPLAY_KHR); } TEST_F(GlContextTest, GivenWglContextWhenContextIsCreatedThenSuccess) { testContextCreation(CL_WGL_HDC_KHR); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/d3d_sharing/000077500000000000000000000000001422164147700240145ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/d3d_sharing/CMakeLists.txt000066400000000000000000000010001422164147700265430ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_d3d_sharing ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/d3d9_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/d3d_tests_part1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/d3d_tests_part2.cpp ${CMAKE_CURRENT_SOURCE_DIR}/d3d_aux_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_dx_sharing_tests.cpp ) if(WIN32) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_d3d_sharing}) endif() compute-runtime-22.14.22890/opencl/test/unit_test/d3d_sharing/cl_dx_sharing_tests.cpp000066400000000000000000000325661422164147700305620ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/utilities/arrayref.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/sharings/d3d/cl_d3d_api.h" #include "opencl/source/sharings/d3d/d3d_buffer.h" #include "opencl/source/sharings/d3d/d3d_sharing.h" #include "opencl/source/sharings/d3d/d3d_surface.h" #include "opencl/source/sharings/d3d/d3d_texture.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_d3d_objects.h" #include "gtest/gtest.h" static const DXGI_FORMAT DXGIformats[] = { DXGI_FORMAT_R32G32B32A32_TYPELESS, DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_R32G32B32A32_UINT, DXGI_FORMAT_R32G32B32A32_SINT, DXGI_FORMAT_R32G32B32_TYPELESS, DXGI_FORMAT_R32G32B32_FLOAT, DXGI_FORMAT_R32G32B32_UINT, DXGI_FORMAT_R32G32B32_SINT, DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_UNORM, DXGI_FORMAT_R16G16B16A16_UINT, DXGI_FORMAT_R16G16B16A16_SNORM, DXGI_FORMAT_R16G16B16A16_SINT, DXGI_FORMAT_R32G32_TYPELESS, DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32_UINT, DXGI_FORMAT_R32G32_SINT, DXGI_FORMAT_R32G8X24_TYPELESS, DXGI_FORMAT_D32_FLOAT_S8X24_UINT, DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS, DXGI_FORMAT_X32_TYPELESS_G8X24_UINT, DXGI_FORMAT_R10G10B10A2_TYPELESS, DXGI_FORMAT_R10G10B10A2_UNORM, DXGI_FORMAT_R10G10B10A2_UINT, DXGI_FORMAT_R11G11B10_FLOAT, DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_R8G8B8A8_UNORM_SRGB, DXGI_FORMAT_R8G8B8A8_UINT, DXGI_FORMAT_R8G8B8A8_SNORM, DXGI_FORMAT_R8G8B8A8_SINT, DXGI_FORMAT_R16G16_TYPELESS, DXGI_FORMAT_R16G16_FLOAT, DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_R16G16_UINT, DXGI_FORMAT_R16G16_SNORM, DXGI_FORMAT_R16G16_SINT, DXGI_FORMAT_R32_TYPELESS, DXGI_FORMAT_D32_FLOAT, DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32_UINT, DXGI_FORMAT_R32_SINT, DXGI_FORMAT_R24G8_TYPELESS, DXGI_FORMAT_D24_UNORM_S8_UINT, DXGI_FORMAT_R24_UNORM_X8_TYPELESS, DXGI_FORMAT_X24_TYPELESS_G8_UINT, DXGI_FORMAT_R8G8_TYPELESS, DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8_UINT, DXGI_FORMAT_R8G8_SNORM, DXGI_FORMAT_R8G8_SINT, DXGI_FORMAT_R16_TYPELESS, DXGI_FORMAT_R16_FLOAT, DXGI_FORMAT_D16_UNORM, DXGI_FORMAT_R16_UNORM, DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16_SNORM, DXGI_FORMAT_R16_SINT, DXGI_FORMAT_R8_TYPELESS, DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8_UINT, DXGI_FORMAT_R8_SNORM, DXGI_FORMAT_R8_SINT, DXGI_FORMAT_A8_UNORM, DXGI_FORMAT_R1_UNORM, DXGI_FORMAT_R9G9B9E5_SHAREDEXP, DXGI_FORMAT_R8G8_B8G8_UNORM, DXGI_FORMAT_G8R8_G8B8_UNORM, DXGI_FORMAT_BC1_TYPELESS, DXGI_FORMAT_BC1_UNORM, DXGI_FORMAT_BC1_UNORM_SRGB, DXGI_FORMAT_BC2_TYPELESS, DXGI_FORMAT_BC2_UNORM, DXGI_FORMAT_BC2_UNORM_SRGB, DXGI_FORMAT_BC3_TYPELESS, DXGI_FORMAT_BC3_UNORM, DXGI_FORMAT_BC3_UNORM_SRGB, DXGI_FORMAT_BC4_TYPELESS, DXGI_FORMAT_BC4_UNORM, DXGI_FORMAT_BC4_SNORM, DXGI_FORMAT_BC5_TYPELESS, DXGI_FORMAT_BC5_UNORM, DXGI_FORMAT_BC5_SNORM, DXGI_FORMAT_B5G6R5_UNORM, DXGI_FORMAT_B5G5R5A1_UNORM, DXGI_FORMAT_B8G8R8A8_UNORM, DXGI_FORMAT_B8G8R8X8_UNORM, DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM, DXGI_FORMAT_B8G8R8A8_TYPELESS, DXGI_FORMAT_B8G8R8A8_UNORM_SRGB, DXGI_FORMAT_B8G8R8X8_TYPELESS, DXGI_FORMAT_B8G8R8X8_UNORM_SRGB, DXGI_FORMAT_BC6H_TYPELESS, DXGI_FORMAT_BC6H_UF16, DXGI_FORMAT_BC6H_SF16, DXGI_FORMAT_BC7_TYPELESS, DXGI_FORMAT_BC7_UNORM, DXGI_FORMAT_BC7_UNORM_SRGB, DXGI_FORMAT_AYUV, DXGI_FORMAT_Y410, DXGI_FORMAT_Y416, DXGI_FORMAT_NV12, DXGI_FORMAT_P010, DXGI_FORMAT_P016, DXGI_FORMAT_420_OPAQUE, DXGI_FORMAT_YUY2, DXGI_FORMAT_Y210, DXGI_FORMAT_Y216, DXGI_FORMAT_NV11, DXGI_FORMAT_AI44, DXGI_FORMAT_IA44, DXGI_FORMAT_P8, DXGI_FORMAT_A8P8, DXGI_FORMAT_B4G4R4A4_UNORM, DXGI_FORMAT_P208, DXGI_FORMAT_V208, DXGI_FORMAT_V408, DXGI_FORMAT_FORCE_UINT}; template struct clIntelSharingFormatQueryDX1X : public PlatformFixture, public ::testing::Test { std::vector retrievedFormats; ArrayRef availableFormats; MockD3DSharingFunctions *mockSharingFcns; MockContext *context; cl_uint numImageFormats; cl_int retVal; size_t retSize; void SetUp() override { PlatformFixture::SetUp(); context = new MockContext(pPlatform->getClDevice(0)); mockSharingFcns = new MockD3DSharingFunctions(); context->setSharingFunctions(mockSharingFcns); mockSharingFcns->checkFormatSupportSetParam1 = true; mockSharingFcns->checkFormatSupportParamsSet.pFormat = D3D11_FORMAT_SUPPORT_BUFFER | D3D11_FORMAT_SUPPORT_TEXTURE2D | D3D11_FORMAT_SUPPORT_TEXTURE3D; availableFormats = ArrayRef(DXGIformats); retrievedFormats.assign(availableFormats.size(), DXGI_FORMAT_UNKNOWN); } void TearDown() override { delete context; PlatformFixture::TearDown(); } }; typedef clIntelSharingFormatQueryDX1X clIntelSharingFormatQueryDX10; typedef clIntelSharingFormatQueryDX1X clIntelSharingFormatQueryDX11; TEST_F(clIntelSharingFormatQueryDX10, givenInvalidContextWhenDX10TextureFormatsRequestedThenInvalidContextError) { retVal = clGetSupportedD3D10TextureFormatsINTEL(NULL, CL_MEM_READ_WRITE, 0, static_cast(retrievedFormats.size()), &retrievedFormats[0], &numImageFormats); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } TEST_F(clIntelSharingFormatQueryDX10, givenValidParametersWhenRequestedDX10TextureFormatsThenTheResultIsASubsetOfKnownFormatsWithoutUnsupportedPlanars) { retVal = clGetSupportedD3D10TextureFormatsINTEL(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, static_cast(retrievedFormats.size()), &retrievedFormats[0], &numImageFormats); ASSERT_EQ(retVal, CL_SUCCESS); for (cl_uint i = 0; i < numImageFormats; ++i) { EXPECT_NE(std::find(availableFormats.begin(), availableFormats.end(), retrievedFormats[i]), availableFormats.end()); } EXPECT_EQ(std::find(retrievedFormats.begin(), retrievedFormats.end(), DXGI_FORMAT_420_OPAQUE), retrievedFormats.end()); EXPECT_EQ(std::find(retrievedFormats.begin(), retrievedFormats.end(), DXGI_FORMAT_NV11), retrievedFormats.end()); EXPECT_EQ(std::find(retrievedFormats.begin(), retrievedFormats.end(), DXGI_FORMAT_P208), retrievedFormats.end()); } TEST_F(clIntelSharingFormatQueryDX10, givenValidParametersWhenRequestedDX10TextureFormatsTwiceThenTheResultsAreTheSame) { retVal = clGetSupportedD3D10TextureFormatsINTEL(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, static_cast(retrievedFormats.size()), &retrievedFormats[0], &numImageFormats); ASSERT_EQ(retVal, CL_SUCCESS); std::vector formatsRetrievedForTheSecondTime(availableFormats.size()); cl_uint anotherNumImageFormats; retVal = clGetSupportedD3D10TextureFormatsINTEL(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, static_cast(formatsRetrievedForTheSecondTime.size()), &formatsRetrievedForTheSecondTime[0], &anotherNumImageFormats); ASSERT_EQ(retVal, CL_SUCCESS); ASSERT_EQ(numImageFormats, anotherNumImageFormats); ASSERT_EQ(memcmp(&retrievedFormats[0], &formatsRetrievedForTheSecondTime[0], numImageFormats * sizeof(DXGI_FORMAT)), 0); } TEST_F(clIntelSharingFormatQueryDX10, givenNullFormatsWhenRequestedDX10TextureFormatsThenNumImageFormatsIsSane) { retVal = clGetSupportedD3D10TextureFormatsINTEL(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, nullptr, &numImageFormats); ASSERT_EQ(retVal, CL_SUCCESS); ASSERT_LE(0U, numImageFormats); ASSERT_LE(numImageFormats, static_cast(availableFormats.size())); } TEST_F(clIntelSharingFormatQueryDX10, givenNullPointersWhenRequestedDX10TextureFormatsThenCLSuccessIsReturned) { retVal = clGetSupportedD3D10TextureFormatsINTEL(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, static_cast(retrievedFormats.size()), nullptr, nullptr); ASSERT_EQ(retVal, CL_SUCCESS); } TEST_F(clIntelSharingFormatQueryDX11, givenInvalidContextWhenDX11TextureFormatsRequestedThenInvalidContextError) { retVal = clGetSupportedD3D11TextureFormatsINTEL(nullptr, CL_MEM_READ_WRITE, 0, 0, static_cast(retrievedFormats.size()), &retrievedFormats[0], &numImageFormats); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } TEST_F(clIntelSharingFormatQueryDX11, givenValidParametersWhenRequestedDX11TextureFormatsThenTheResultIsASubsetOfKnownFormats) { retVal = clGetSupportedD3D11TextureFormatsINTEL(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, static_cast(retrievedFormats.size()), &retrievedFormats[0], &numImageFormats); ASSERT_EQ(retVal, CL_SUCCESS); for (cl_uint i = 0; i < numImageFormats; ++i) { EXPECT_NE(std::find(availableFormats.begin(), availableFormats.end(), retrievedFormats[i]), availableFormats.end()); } } TEST_F(clIntelSharingFormatQueryDX11, givenNullFormatsWhenRequestedDX11TextureFormatsThenNumImageFormatsIsSane) { retVal = clGetSupportedD3D11TextureFormatsINTEL(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, 0, nullptr, &numImageFormats); ASSERT_EQ(retVal, CL_SUCCESS); ASSERT_LE(0U, numImageFormats); ASSERT_LE(numImageFormats, static_cast(availableFormats.size())); } TEST_F(clIntelSharingFormatQueryDX11, givenNullPointersWhenRequestedDX11TextureFormatsThenCLSuccessIsReturned) { retVal = clGetSupportedD3D11TextureFormatsINTEL(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, static_cast(retrievedFormats.size()), nullptr, nullptr); ASSERT_EQ(retVal, CL_SUCCESS); } TEST_F(clIntelSharingFormatQueryDX11, givenValidParametersWhenRequestedDX11TextureFormatsTwiceThenTheResultsAreTheSame) { retVal = clGetSupportedD3D11TextureFormatsINTEL(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, static_cast(retrievedFormats.size()), &retrievedFormats[0], &numImageFormats); ASSERT_EQ(retVal, CL_SUCCESS); std::vector formatsRetrievedForTheSecondTime(availableFormats.size()); cl_uint anotherNumImageFormats; retVal = clGetSupportedD3D11TextureFormatsINTEL(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, static_cast(formatsRetrievedForTheSecondTime.size()), &formatsRetrievedForTheSecondTime[0], &anotherNumImageFormats); ASSERT_EQ(retVal, CL_SUCCESS); ASSERT_EQ(numImageFormats, anotherNumImageFormats); ASSERT_EQ(memcmp(&retrievedFormats[0], &formatsRetrievedForTheSecondTime[0], numImageFormats * sizeof(DXGI_FORMAT)), 0); } TEST_F(clIntelSharingFormatQueryDX11, givenValidParametersWhenRequestingDX11TextureFormatsForPlane1ThenPlanarFormatsAreReturned) { retVal = clGetSupportedD3D11TextureFormatsINTEL(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 1, static_cast(retrievedFormats.size()), &retrievedFormats[0], &numImageFormats); EXPECT_EQ(retVal, CL_SUCCESS); std::vector expectedPlanarFormats = {DXGI_FORMAT_NV12, DXGI_FORMAT_P010, DXGI_FORMAT_P016}; EXPECT_EQ(expectedPlanarFormats.size(), numImageFormats); for (auto format : expectedPlanarFormats) { auto found = std::find(retrievedFormats.begin(), retrievedFormats.end(), format); EXPECT_NE(found, retrievedFormats.end()); } } TEST_F(clIntelSharingFormatQueryDX11, givenValidParametersWhenRequestingDX11TextureFormatsForPlane2AndAboveThenZeroFormatsIsReturned) { retVal = clGetSupportedD3D11TextureFormatsINTEL(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 2, static_cast(retrievedFormats.size()), &retrievedFormats[0], &numImageFormats); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(0, numImageFormats); retVal = clGetSupportedD3D11TextureFormatsINTEL(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 3, static_cast(retrievedFormats.size()), &retrievedFormats[0], &numImageFormats); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(0, numImageFormats); } compute-runtime-22.14.22890/opencl/test/unit_test/d3d_sharing/d3d9_tests.cpp000066400000000000000000002030351422164147700265100ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/os_agnostic_memory_manager.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_gmm.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/api/api.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/os_interface/windows/d3d_sharing_functions.h" #include "opencl/source/sharings/d3d/cl_d3d_api.h" #include "opencl/source/sharings/d3d/d3d_surface.h" #include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_d3d_objects.h" #include "opencl/test/unit_test/mocks/mock_platform.h" namespace NEO { template <> uint32_t MockD3DSharingFunctions::getDxgiDescCalled = 0; template <> DXGI_ADAPTER_DESC MockD3DSharingFunctions::mockDxgiDesc = {{0}}; template <> IDXGIAdapter *MockD3DSharingFunctions::getDxgiDescAdapterRequested = nullptr; class MockMM : public OsAgnosticMemoryManager { public: MockMM(const ExecutionEnvironment &executionEnvironment) : OsAgnosticMemoryManager(const_cast(executionEnvironment)){}; GraphicsAllocation *createGraphicsAllocationFromSharedHandle(osHandle handle, const AllocationProperties &properties, bool requireSpecificBitness, bool isHostIpcAllocation) override { auto alloc = OsAgnosticMemoryManager::createGraphicsAllocationFromSharedHandle(handle, properties, requireSpecificBitness, isHostIpcAllocation); alloc->setDefaultGmm(forceGmm); gmmOwnershipPassed = true; return alloc; } GraphicsAllocation *allocateGraphicsMemoryForImage(const AllocationData &allocationData) override { auto gmm = std::make_unique(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmClientContext(), *allocationData.imgInfo, StorageInfo{}, false); AllocationProperties properties(allocationData.rootDeviceIndex, nullptr, false, AllocationType::SHARED_IMAGE, false, {}); auto alloc = OsAgnosticMemoryManager::createGraphicsAllocationFromSharedHandle(1, properties, false, false); alloc->setDefaultGmm(forceGmm); gmmOwnershipPassed = true; return alloc; } void *lockResourceImpl(GraphicsAllocation &allocation) override { lockResourceCalled++; EXPECT_EQ(expectedLockingAllocation, &allocation); return lockResourceReturnValue; } void unlockResourceImpl(GraphicsAllocation &allocation) override { unlockResourceCalled++; EXPECT_EQ(expectedLockingAllocation, &allocation); } int32_t lockResourceCalled = 0; int32_t unlockResourceCalled = 0; GraphicsAllocation *expectedLockingAllocation = nullptr; void *lockResourceReturnValue = nullptr; Gmm *forceGmm = nullptr; bool gmmOwnershipPassed = false; }; class D3D9Tests : public PlatformFixture, public ::testing::Test { public: typedef typename D3DTypesHelper::D3D9 D3D9; typedef typename D3D9::D3DDevice D3DDevice; typedef typename D3D9::D3DQuery D3DQuery; typedef typename D3D9::D3DQueryDesc D3DQueryDesc; typedef typename D3D9::D3DResource D3DResource; typedef typename D3D9::D3DTexture2dDesc D3DTexture2dDesc; typedef typename D3D9::D3DTexture2d D3DTexture2d; void setupMockGmm() { ImageDescriptor imgDesc = {}; imgDesc.imageHeight = 10; imgDesc.imageWidth = 10; imgDesc.imageDepth = 1; imgDesc.imageType = ImageType::Image2D; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); gmm = MockGmm::queryImgParams(pPlatform->getClDevice(0)->getGmmClientContext(), imgInfo, false).release(); mockGmmResInfo = static_cast(gmm->gmmResourceInfo.get()); memoryManager->forceGmm = gmm; } void SetUp() override { PlatformFixture::SetUp(); memoryManager = std::make_unique(*pPlatform->peekExecutionEnvironment()); context = new MockContext(pPlatform->getClDevice(0)); context->preferD3dSharedResources = true; context->memoryManager = memoryManager.get(); mockSharingFcns = new MockD3DSharingFunctions(); context->setSharingFunctions(mockSharingFcns); cmdQ = new MockCommandQueue(context, context->getDevice(0), 0, false); DebugManager.injectFcn = &mockSharingFcns->mockGetDxgiDesc; surfaceInfo.resource = reinterpret_cast(&dummyD3DSurface); mockSharingFcns->mockTexture2dDesc.Format = D3DFMT_R32F; mockSharingFcns->mockTexture2dDesc.Height = 10; mockSharingFcns->mockTexture2dDesc.Width = 10; setupMockGmm(); } void TearDown() override { delete cmdQ; delete context; if (!memoryManager->gmmOwnershipPassed) { delete gmm; } PlatformFixture::TearDown(); } MockD3DSharingFunctions *mockSharingFcns; MockContext *context; MockCommandQueue *cmdQ; DebugManagerStateRestore dbgRestore; char dummyD3DSurface; char dummyD3DSurfaceStaging; cl_dx9_surface_info_khr surfaceInfo = {}; Gmm *gmm = nullptr; MockGmmResourceInfo *mockGmmResInfo = nullptr; std::unique_ptr memoryManager; }; TEST_F(D3D9Tests, givenD3DDeviceParamWhenContextCreationThenSetProperValues) { cl_device_id deviceID = context->getDevice(0); cl_platform_id pid[1] = {pPlatform}; char expectedDevice; cl_context_properties validAdapters[6] = {CL_CONTEXT_ADAPTER_D3D9_KHR, CL_CONTEXT_ADAPTER_D3D9EX_KHR, CL_CONTEXT_ADAPTER_DXVA_KHR, CL_CONTEXT_D3D9_DEVICE_INTEL, CL_CONTEXT_D3D9EX_DEVICE_INTEL, CL_CONTEXT_DXVA_DEVICE_INTEL}; cl_context_properties validProperties[5] = {CL_CONTEXT_PLATFORM, (cl_context_properties)pid[0], CL_CONTEXT_ADAPTER_D3D9_KHR, (cl_context_properties)&expectedDevice, 0}; std::unique_ptr ctx(nullptr); cl_int retVal = CL_SUCCESS; for (int i = 0; i < 6; i++) { validProperties[2] = validAdapters[i]; ctx.reset(Context::create(validProperties, ClDeviceVector(&deviceID, 1), nullptr, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, ctx.get()); EXPECT_NE(nullptr, ctx->getSharing>()); EXPECT_TRUE(reinterpret_cast(&expectedDevice) == ctx->getSharing>()->getDevice()); } } TEST_F(D3D9Tests, WhenGetDeviceIdThenOneCorrectDeviceIsReturned) { cl_device_id expectedDevice = *devices; cl_device_id device = 0; cl_uint numDevices = 0; auto retVal = clGetDeviceIDsFromDX9MediaAdapterKHR(platform(), 1, nullptr, nullptr, 1, 1, &device, &numDevices); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedDevice, device); EXPECT_EQ(1u, numDevices); retVal = clGetDeviceIDsFromDX9INTEL(platform(), 1, nullptr, 1, 1, &device, &numDevices); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedDevice, device); EXPECT_EQ(1u, numDevices); } TEST_F(D3D9Tests, WhenCreatingSurfaceThenImagePropertiesAreSetCorrectly) { cl_int retVal; cl_image_format expectedImgFormat = {}; ImagePlane imagePlane = ImagePlane::NO_PLANE; D3DSurface::findImgFormat(mockSharingFcns->mockTexture2dDesc.Format, expectedImgFormat, 0, imagePlane); mockSharingFcns->getTexture2dDescSetParams = true; mockSharingFcns->getTexture2dDescParamsSet.textureDesc = mockSharingFcns->mockTexture2dDesc; auto memObj = clCreateFromDX9MediaSurfaceKHR(context, CL_MEM_READ_WRITE, 0, &surfaceInfo, 0, &retVal); ASSERT_NE(nullptr, memObj); EXPECT_EQ(0u, mockGmmResInfo->getOffsetCalled); auto image = castToObject(memObj); EXPECT_NE(nullptr, image->getSharingHandler()); EXPECT_TRUE(CL_MEM_READ_WRITE == image->getFlags()); EXPECT_TRUE(expectedImgFormat.image_channel_data_type == image->getImageFormat().image_channel_data_type); EXPECT_TRUE(expectedImgFormat.image_channel_order == image->getImageFormat().image_channel_order); EXPECT_TRUE(CL_MEM_OBJECT_IMAGE2D == image->getImageDesc().image_type); EXPECT_EQ(mockSharingFcns->mockTexture2dDesc.Width, image->getImageDesc().image_width); EXPECT_EQ(mockSharingFcns->mockTexture2dDesc.Height, image->getImageDesc().image_height); clReleaseMemObject(memObj); EXPECT_EQ(1u, mockSharingFcns->getTexture2dDescCalled); EXPECT_EQ(1u, mockSharingFcns->updateDeviceCalled); EXPECT_EQ(reinterpret_cast(&dummyD3DSurface), mockSharingFcns->updateDeviceParamsPassed[0].resource); } TEST(D3D9SimpleTests, givenWrongFormatWhenFindIsCalledThenErrorIsReturned) { cl_image_format expectedImgFormat = {}; ImagePlane imagePlane = ImagePlane::NO_PLANE; auto status = D3DSurface::findImgFormat(D3DFMT_FORCE_DWORD, expectedImgFormat, 0, imagePlane); EXPECT_EQ(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, status); } TEST_F(D3D9Tests, WhenCreatingSurfaceIntelThenImagePropertiesAreSetCorrectly) { cl_int retVal; cl_image_format expectedImgFormat = {}; ImagePlane imagePlane = ImagePlane::NO_PLANE; D3DSurface::findImgFormat(mockSharingFcns->mockTexture2dDesc.Format, expectedImgFormat, 0, imagePlane); mockSharingFcns->getTexture2dDescSetParams = true; mockSharingFcns->getTexture2dDescParamsSet.textureDesc = mockSharingFcns->mockTexture2dDesc; auto memObj = clCreateFromDX9MediaSurfaceINTEL(context, CL_MEM_READ_WRITE, surfaceInfo.resource, surfaceInfo.shared_handle, 0, &retVal); ASSERT_NE(nullptr, memObj); auto image = castToObject(memObj); EXPECT_NE(nullptr, image->getSharingHandler()); EXPECT_TRUE(CL_MEM_READ_WRITE == image->getFlags()); EXPECT_TRUE(expectedImgFormat.image_channel_data_type == image->getImageFormat().image_channel_data_type); EXPECT_TRUE(expectedImgFormat.image_channel_order == image->getImageFormat().image_channel_order); EXPECT_TRUE(CL_MEM_OBJECT_IMAGE2D == image->getImageDesc().image_type); EXPECT_EQ(mockSharingFcns->mockTexture2dDesc.Width, image->getImageDesc().image_width); EXPECT_EQ(mockSharingFcns->mockTexture2dDesc.Height, image->getImageDesc().image_height); clReleaseMemObject(memObj); EXPECT_EQ(1u, mockSharingFcns->getTexture2dDescCalled); EXPECT_EQ(1u, mockSharingFcns->updateDeviceCalled); EXPECT_EQ(reinterpret_cast(&dummyD3DSurface), mockSharingFcns->updateDeviceParamsPassed[0].resource); } TEST_F(D3D9Tests, givenD3DHandleWhenCreatingSharedSurfaceThenAllocationTypeImageIsSet) { mockSharingFcns->mockTexture2dDesc.Format = (D3DFORMAT)MAKEFOURCC('Y', 'V', '1', '2'); surfaceInfo.shared_handle = reinterpret_cast(1); mockSharingFcns->getTexture2dDescSetParams = true; mockSharingFcns->getTexture2dDescParamsSet.textureDesc = mockSharingFcns->mockTexture2dDesc; auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 2, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); auto graphicsAllocation = sharedImg->getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex()); ASSERT_NE(nullptr, graphicsAllocation); EXPECT_EQ(AllocationType::SHARED_IMAGE, graphicsAllocation->getAllocationType()); EXPECT_EQ(1u, mockSharingFcns->getTexture2dDescCalled); } TEST_F(D3D9Tests, givenUPlaneWhenCreateSurfaceThenChangeWidthHeightAndPitch) { mockSharingFcns->mockTexture2dDesc.Format = (D3DFORMAT)MAKEFOURCC('Y', 'V', '1', '2'); surfaceInfo.shared_handle = (HANDLE)1; mockSharingFcns->getTexture2dDescSetParams = true; mockSharingFcns->getTexture2dDescParamsSet.textureDesc = mockSharingFcns->mockTexture2dDesc; auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 2, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); EXPECT_EQ(mockSharingFcns->mockTexture2dDesc.Width / 2, sharedImg->getImageDesc().image_width); EXPECT_EQ(mockSharingFcns->mockTexture2dDesc.Height / 2, sharedImg->getImageDesc().image_height); size_t expectedRowPitch = static_cast(mockGmmResInfo->getRenderPitch()) / 2; EXPECT_EQ(expectedRowPitch, sharedImg->getImageDesc().image_row_pitch); EXPECT_EQ(1u, mockSharingFcns->getTexture2dDescCalled); } TEST_F(D3D9Tests, givenVPlaneWhenCreateSurfaceThenChangeWidthHeightAndPitch) { mockSharingFcns->mockTexture2dDesc.Format = (D3DFORMAT)MAKEFOURCC('Y', 'V', '1', '2'); surfaceInfo.shared_handle = (HANDLE)1; mockSharingFcns->getTexture2dDescSetParams = true; mockSharingFcns->getTexture2dDescParamsSet.textureDesc = mockSharingFcns->mockTexture2dDesc; auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 1, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); EXPECT_EQ(mockSharingFcns->mockTexture2dDesc.Width / 2, sharedImg->getImageDesc().image_width); EXPECT_EQ(mockSharingFcns->mockTexture2dDesc.Height / 2, sharedImg->getImageDesc().image_height); size_t expectedRowPitch = static_cast(mockGmmResInfo->getRenderPitch()) / 2; EXPECT_EQ(expectedRowPitch, sharedImg->getImageDesc().image_row_pitch); EXPECT_EQ(1u, mockSharingFcns->getTexture2dDescCalled); } TEST_F(D3D9Tests, givenUVPlaneWhenCreateSurfaceThenChangeWidthHeightAndPitch) { mockSharingFcns->mockTexture2dDesc.Format = (D3DFORMAT)MAKEFOURCC('N', 'V', '1', '2'); surfaceInfo.shared_handle = (HANDLE)1; mockSharingFcns->getTexture2dDescSetParams = true; mockSharingFcns->getTexture2dDescParamsSet.textureDesc = mockSharingFcns->mockTexture2dDesc; auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 1, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); EXPECT_EQ(mockSharingFcns->mockTexture2dDesc.Width / 2, sharedImg->getImageDesc().image_width); EXPECT_EQ(mockSharingFcns->mockTexture2dDesc.Height / 2, sharedImg->getImageDesc().image_height); size_t expectedRowPitch = static_cast(mockGmmResInfo->getRenderPitch()); EXPECT_EQ(expectedRowPitch, sharedImg->getImageDesc().image_row_pitch); EXPECT_EQ(1u, mockSharingFcns->getTexture2dDescCalled); } TEST_F(D3D9Tests, givenYPlaneWhenCreateSurfaceThenDontChangeWidthHeightAndPitch) { mockSharingFcns->mockTexture2dDesc.Format = (D3DFORMAT)MAKEFOURCC('N', 'V', '1', '2'); surfaceInfo.shared_handle = (HANDLE)1; mockSharingFcns->getTexture2dDescSetParams = true; mockSharingFcns->getTexture2dDescParamsSet.textureDesc = mockSharingFcns->mockTexture2dDesc; auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 0, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); EXPECT_EQ(mockSharingFcns->mockTexture2dDesc.Width, sharedImg->getImageDesc().image_width); EXPECT_EQ(mockSharingFcns->mockTexture2dDesc.Height, sharedImg->getImageDesc().image_height); size_t expectedRowPitch = static_cast(mockGmmResInfo->getRenderPitch()); EXPECT_EQ(expectedRowPitch, sharedImg->getImageDesc().image_row_pitch); EXPECT_EQ(1u, mockSharingFcns->getTexture2dDescCalled); } TEST_F(D3D9Tests, givenUPlaneWhenCreateNonSharedSurfaceThenChangeWidthHeightAndPitch) { mockSharingFcns->mockTexture2dDesc.Format = (D3DFORMAT)MAKEFOURCC('Y', 'V', '1', '2'); surfaceInfo.shared_handle = (HANDLE)0; mockSharingFcns->getTexture2dDescSetParams = true; mockSharingFcns->getTexture2dDescParamsSet.textureDesc = mockSharingFcns->mockTexture2dDesc; auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 2, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); EXPECT_EQ(mockSharingFcns->mockTexture2dDesc.Width / 2, sharedImg->getImageDesc().image_width); EXPECT_EQ(mockSharingFcns->mockTexture2dDesc.Height / 2, sharedImg->getImageDesc().image_height); size_t expectedRowPitch = static_cast(mockGmmResInfo->getRenderPitch()); EXPECT_EQ(expectedRowPitch, sharedImg->getImageDesc().image_row_pitch); EXPECT_EQ(1u, mockSharingFcns->getTexture2dDescCalled); } TEST_F(D3D9Tests, givenVPlaneWhenCreateNonSharedSurfaceThenChangeWidthHeightAndPitch) { mockSharingFcns->mockTexture2dDesc.Format = (D3DFORMAT)MAKEFOURCC('Y', 'V', '1', '2'); surfaceInfo.shared_handle = (HANDLE)0; mockSharingFcns->getTexture2dDescSetParams = true; mockSharingFcns->getTexture2dDescParamsSet.textureDesc = mockSharingFcns->mockTexture2dDesc; auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 1, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); EXPECT_EQ(mockSharingFcns->mockTexture2dDesc.Width / 2, sharedImg->getImageDesc().image_width); EXPECT_EQ(mockSharingFcns->mockTexture2dDesc.Height / 2, sharedImg->getImageDesc().image_height); size_t expectedRowPitch = static_cast(mockGmmResInfo->getRenderPitch()); EXPECT_EQ(expectedRowPitch, sharedImg->getImageDesc().image_row_pitch); EXPECT_EQ(1u, mockSharingFcns->getTexture2dDescCalled); } TEST_F(D3D9Tests, givenUVPlaneWhenCreateNonSharedSurfaceThenChangeWidthHeightAndPitch) { mockSharingFcns->mockTexture2dDesc.Format = (D3DFORMAT)MAKEFOURCC('N', 'V', '1', '2'); surfaceInfo.shared_handle = (HANDLE)0; mockSharingFcns->getTexture2dDescSetParams = true; mockSharingFcns->getTexture2dDescParamsSet.textureDesc = mockSharingFcns->mockTexture2dDesc; auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 1, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); EXPECT_EQ(mockSharingFcns->mockTexture2dDesc.Width / 2, sharedImg->getImageDesc().image_width); EXPECT_EQ(mockSharingFcns->mockTexture2dDesc.Height / 2, sharedImg->getImageDesc().image_height); size_t expectedRowPitch = static_cast(mockGmmResInfo->getRenderPitch()); EXPECT_EQ(expectedRowPitch, sharedImg->getImageDesc().image_row_pitch); EXPECT_EQ(1u, mockSharingFcns->getTexture2dDescCalled); } TEST_F(D3D9Tests, givenYPlaneWhenCreateNonSharedSurfaceThenDontChangeWidthHeightAndPitch) { mockSharingFcns->mockTexture2dDesc.Format = (D3DFORMAT)MAKEFOURCC('N', 'V', '1', '2'); surfaceInfo.shared_handle = (HANDLE)0; mockSharingFcns->getTexture2dDescSetParams = true; mockSharingFcns->getTexture2dDescParamsSet.textureDesc = mockSharingFcns->mockTexture2dDesc; auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 0, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); EXPECT_EQ(mockSharingFcns->mockTexture2dDesc.Width, sharedImg->getImageDesc().image_width); EXPECT_EQ(mockSharingFcns->mockTexture2dDesc.Height, sharedImg->getImageDesc().image_height); size_t expectedRowPitch = static_cast(mockGmmResInfo->getRenderPitch()); EXPECT_EQ(expectedRowPitch, sharedImg->getImageDesc().image_row_pitch); EXPECT_EQ(1u, mockSharingFcns->getTexture2dDescCalled); } TEST_F(D3D9Tests, givenNV12FormatAndInvalidPlaneWhenSurfaceIsCreatedThenReturnError) { cl_int retVal = CL_SUCCESS; mockSharingFcns->mockTexture2dDesc.Format = (D3DFORMAT)MAKEFOURCC('N', 'V', '1', '2'); surfaceInfo.shared_handle = (HANDLE)0; mockSharingFcns->getTexture2dDescSetParams = true; mockSharingFcns->getTexture2dDescParamsSet.textureDesc = mockSharingFcns->mockTexture2dDesc; auto img = D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 2, &retVal); EXPECT_EQ(nullptr, img); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(D3D9Tests, givenYV12FormatAndInvalidPlaneWhenSurfaceIsCreatedThenReturnError) { cl_int retVal = CL_SUCCESS; mockSharingFcns->mockTexture2dDesc.Format = (D3DFORMAT)MAKEFOURCC('Y', 'V', '1', '2'); surfaceInfo.shared_handle = (HANDLE)0; mockSharingFcns->getTexture2dDescSetParams = true; mockSharingFcns->getTexture2dDescParamsSet.textureDesc = mockSharingFcns->mockTexture2dDesc; auto img = D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 3, &retVal); EXPECT_EQ(nullptr, img); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(D3D9Tests, givenNonPlaneFormatAndNonZeroPlaneWhenSurfaceIsCreatedThenReturnError) { cl_int retVal = CL_SUCCESS; mockSharingFcns->mockTexture2dDesc.Format = D3DFORMAT::D3DFMT_A16B16G16R16; surfaceInfo.shared_handle = (HANDLE)0; mockSharingFcns->getTexture2dDescSetParams = true; mockSharingFcns->getTexture2dDescParamsSet.textureDesc = mockSharingFcns->mockTexture2dDesc; auto img = D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 1, &retVal); EXPECT_EQ(nullptr, img); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(D3D9Tests, givenNullResourceWhenSurfaceIsCreatedThenReturnError) { cl_int retVal = CL_SUCCESS; auto img = clCreateFromDX9MediaSurfaceINTEL(context, CL_MEM_READ_WRITE, nullptr, 0, 0, &retVal); EXPECT_EQ(nullptr, img); EXPECT_EQ(CL_INVALID_DX9_RESOURCE_INTEL, retVal); } TEST_F(D3D9Tests, givenNonDefaultPoolWhenSurfaceIsCreatedThenReturnError) { cl_int retVal = CL_SUCCESS; mockSharingFcns->mockTexture2dDesc.Pool = D3DPOOL_SYSTEMMEM; mockSharingFcns->getTexture2dDescSetParams = true; mockSharingFcns->getTexture2dDescParamsSet.textureDesc = mockSharingFcns->mockTexture2dDesc; auto img = D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 1, &retVal); EXPECT_EQ(nullptr, img); EXPECT_EQ(CL_INVALID_DX9_RESOURCE_INTEL, retVal); } TEST_F(D3D9Tests, givenAlreadyUsedSurfaceWhenSurfaceIsCreatedThenReturnError) { cl_int retVal = CL_SUCCESS; surfaceInfo.resource = reinterpret_cast(1); mockSharingFcns->getTexture2dDescSetParams = true; mockSharingFcns->getTexture2dDescParamsSet.textureDesc = mockSharingFcns->mockTexture2dDesc; std::unique_ptr img(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 0, &retVal)); EXPECT_NE(nullptr, img.get()); img.reset(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 0, &retVal)); EXPECT_EQ(nullptr, img.get()); EXPECT_EQ(CL_INVALID_DX9_RESOURCE_INTEL, retVal); } TEST_F(D3D9Tests, givenNotSupportedFormatWhenSurfaceIsCreatedThenReturnError) { cl_int retVal = CL_SUCCESS; mockSharingFcns->mockTexture2dDesc.Format = (D3DFORMAT)MAKEFOURCC('I', '4', '2', '0'); surfaceInfo.shared_handle = (HANDLE)0; mockSharingFcns->getTexture2dDescSetParams = true; mockSharingFcns->getTexture2dDescParamsSet.textureDesc = mockSharingFcns->mockTexture2dDesc; auto img = D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 0, &retVal); EXPECT_EQ(nullptr, img); EXPECT_EQ(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, retVal); } TEST_F(D3D9Tests, GivenMediaSurfaceInfoKhrWhenGetMemObjInfoThenCorrectInfoIsReturned) { mockSharingFcns->mockTexture2dDesc.Format = (D3DFORMAT)MAKEFOURCC('Y', 'V', '1', '2'); cl_dx9_media_adapter_type_khr expectedAdapterType = 5; cl_uint expectedPlane = 2; cl_dx9_surface_info_khr getSurfaceInfo = {}; surfaceInfo.shared_handle = (HANDLE)1; size_t retSize = 0; mockSharingFcns->getTexture2dDescSetParams = true; mockSharingFcns->getTexture2dDescParamsSet.textureDesc = mockSharingFcns->mockTexture2dDesc; auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, expectedAdapterType, expectedPlane, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); sharedImg->getMemObjectInfo(CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR, sizeof(cl_dx9_surface_info_khr), &getSurfaceInfo, &retSize); EXPECT_EQ(getSurfaceInfo.resource, surfaceInfo.resource); EXPECT_EQ(getSurfaceInfo.shared_handle, surfaceInfo.shared_handle); EXPECT_EQ(sizeof(cl_dx9_surface_info_khr), retSize); EXPECT_EQ(1u, mockSharingFcns->getTexture2dDescCalled); } TEST_F(D3D9Tests, GivenResourceIntelWhenGetMemObjInfoThenCorrectInfoIsReturned) { mockSharingFcns->mockTexture2dDesc.Format = (D3DFORMAT)MAKEFOURCC('Y', 'V', '1', '2'); cl_dx9_media_adapter_type_khr expectedAdapterType = 5; cl_uint expectedPlane = 2; cl_dx9_surface_info_khr getSurfaceInfo = {}; surfaceInfo.shared_handle = (HANDLE)1; size_t retSize = 0; mockSharingFcns->getTexture2dDescSetParams = true; mockSharingFcns->getTexture2dDescParamsSet.textureDesc = mockSharingFcns->mockTexture2dDesc; auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, expectedAdapterType, expectedPlane, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); getSurfaceInfo = {}; sharedImg->getMemObjectInfo(CL_MEM_DX9_RESOURCE_INTEL, sizeof(IDirect3DSurface9 *), &getSurfaceInfo.resource, &retSize); EXPECT_EQ(getSurfaceInfo.resource, surfaceInfo.resource); EXPECT_EQ(sizeof(IDirect3DSurface9 *), retSize); EXPECT_EQ(1u, mockSharingFcns->getTexture2dDescCalled); } TEST_F(D3D9Tests, GivenSharedHandleIntelWhenGetMemObjInfoThenCorrectInfoIsReturned) { mockSharingFcns->mockTexture2dDesc.Format = (D3DFORMAT)MAKEFOURCC('Y', 'V', '1', '2'); cl_dx9_media_adapter_type_khr expectedAdapterType = 5; cl_uint expectedPlane = 2; cl_dx9_surface_info_khr getSurfaceInfo = {}; surfaceInfo.shared_handle = (HANDLE)1; size_t retSize = 0; mockSharingFcns->getTexture2dDescSetParams = true; mockSharingFcns->getTexture2dDescParamsSet.textureDesc = mockSharingFcns->mockTexture2dDesc; auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, expectedAdapterType, expectedPlane, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); sharedImg->getMemObjectInfo(CL_MEM_DX9_SHARED_HANDLE_INTEL, sizeof(IDirect3DSurface9 *), &getSurfaceInfo.shared_handle, &retSize); EXPECT_EQ(getSurfaceInfo.shared_handle, surfaceInfo.shared_handle); EXPECT_EQ(sizeof(IDirect3DSurface9 *), retSize); EXPECT_EQ(1u, mockSharingFcns->getTexture2dDescCalled); } TEST_F(D3D9Tests, GivenMediaAdapterTypeKhrWhenGetMemObjInfoThenCorrectInfoIsReturned) { mockSharingFcns->mockTexture2dDesc.Format = (D3DFORMAT)MAKEFOURCC('Y', 'V', '1', '2'); cl_dx9_media_adapter_type_khr adapterType = 0; cl_dx9_media_adapter_type_khr expectedAdapterType = 5; cl_uint expectedPlane = 2; surfaceInfo.shared_handle = (HANDLE)1; size_t retSize = 0; mockSharingFcns->getTexture2dDescSetParams = true; mockSharingFcns->getTexture2dDescParamsSet.textureDesc = mockSharingFcns->mockTexture2dDesc; auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, expectedAdapterType, expectedPlane, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); sharedImg->getMemObjectInfo(CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR, sizeof(cl_dx9_media_adapter_type_khr), &adapterType, &retSize); EXPECT_EQ(expectedAdapterType, adapterType); EXPECT_EQ(sizeof(cl_dx9_media_adapter_type_khr), retSize); EXPECT_EQ(1u, mockSharingFcns->getTexture2dDescCalled); } TEST_F(D3D9Tests, GivenMediaPlaneKhrWhenGetMemObjInfoThenCorrectInfoIsReturned) { mockSharingFcns->mockTexture2dDesc.Format = (D3DFORMAT)MAKEFOURCC('Y', 'V', '1', '2'); cl_dx9_media_adapter_type_khr expectedAdapterType = 5; cl_uint plane = 0; cl_uint expectedPlane = 2; surfaceInfo.shared_handle = (HANDLE)1; size_t retSize = 0; mockSharingFcns->getTexture2dDescSetParams = true; mockSharingFcns->getTexture2dDescParamsSet.textureDesc = mockSharingFcns->mockTexture2dDesc; auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, expectedAdapterType, expectedPlane, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); sharedImg->getImageInfo(CL_IMAGE_DX9_MEDIA_PLANE_KHR, sizeof(cl_uint), &plane, &retSize); EXPECT_EQ(expectedPlane, plane); EXPECT_EQ(sizeof(cl_uint), retSize); EXPECT_EQ(1u, mockSharingFcns->getTexture2dDescCalled); } TEST_F(D3D9Tests, GivenPlaneIntelWhenGetMemObjInfoThenCorrectInfoIsReturned) { mockSharingFcns->mockTexture2dDesc.Format = (D3DFORMAT)MAKEFOURCC('Y', 'V', '1', '2'); cl_dx9_media_adapter_type_khr expectedAdapterType = 5; cl_uint plane = 0; cl_uint expectedPlane = 2; surfaceInfo.shared_handle = (HANDLE)1; size_t retSize = 0; mockSharingFcns->getTexture2dDescSetParams = true; mockSharingFcns->getTexture2dDescParamsSet.textureDesc = mockSharingFcns->mockTexture2dDesc; auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, expectedAdapterType, expectedPlane, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); sharedImg->getImageInfo(CL_IMAGE_DX9_PLANE_INTEL, sizeof(cl_uint), &plane, &retSize); EXPECT_EQ(expectedPlane, plane); EXPECT_EQ(sizeof(cl_uint), retSize); EXPECT_EQ(1u, mockSharingFcns->getTexture2dDescCalled); } TEST_F(D3D9Tests, givenSharedHandleWhenCreateThenDontCreateStagingSurface) { surfaceInfo.shared_handle = (HANDLE)1; mockSharingFcns->getTexture2dDescSetParams = true; mockSharingFcns->getTexture2dDescParamsSet.textureDesc = mockSharingFcns->mockTexture2dDesc; auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 0, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); EXPECT_EQ(1u, mockGmmResInfo->getOffsetCalled); EXPECT_EQ(0u, mockGmmResInfo->arrayIndexPassedToGetOffset); auto surface = static_cast(sharedImg->getSharingHandler().get()); EXPECT_TRUE(surface->isSharedResource()); EXPECT_EQ(nullptr, surface->getResourceStaging()); EXPECT_EQ(0u, mockSharingFcns->createTexture2dCalled); EXPECT_EQ(1u, mockSharingFcns->getTexture2dDescCalled); EXPECT_EQ(1u, mockSharingFcns->addRefCalled); } TEST_F(D3D9Tests, givenZeroSharedHandleAndLockableFlagWhenCreateThenDontCreateStagingSurface) { surfaceInfo.shared_handle = (HANDLE)0; mockSharingFcns->mockTexture2dDesc.Usage = 0; mockSharingFcns->getTexture2dDescSetParams = true; mockSharingFcns->getTexture2dDescParamsSet.textureDesc = mockSharingFcns->mockTexture2dDesc; auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 0, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); EXPECT_EQ(0u, mockGmmResInfo->getOffsetCalled); auto surface = static_cast(sharedImg->getSharingHandler().get()); EXPECT_FALSE(surface->isSharedResource()); EXPECT_EQ(nullptr, surface->getResourceStaging()); EXPECT_TRUE(surface->lockable); EXPECT_EQ(0u, mockSharingFcns->createTexture2dCalled); EXPECT_EQ(1u, mockSharingFcns->getTexture2dDescCalled); EXPECT_EQ(1u, mockSharingFcns->addRefCalled); } TEST_F(D3D9Tests, givenZeroSharedHandleAndNonLockableFlagWhenCreateThenCreateStagingSurface) { surfaceInfo.shared_handle = (HANDLE)0; mockSharingFcns->mockTexture2dDesc.Usage = D3DResourceFlags::USAGE_RENDERTARGET; mockSharingFcns->getTexture2dDescSetParams = true; mockSharingFcns->getTexture2dDescParamsSet.textureDesc = mockSharingFcns->mockTexture2dDesc; mockSharingFcns->createTexture2dSetParams = true; mockSharingFcns->createTexture2dParamsSet.texture = (D3DTexture2d *)&dummyD3DSurfaceStaging; auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 0, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); EXPECT_EQ(0u, mockGmmResInfo->getOffsetCalled); auto surface = static_cast(sharedImg->getSharingHandler().get()); EXPECT_FALSE(surface->isSharedResource()); EXPECT_NE(nullptr, surface->getResourceStaging()); EXPECT_FALSE(surface->lockable); EXPECT_EQ(1u, mockSharingFcns->createTexture2dCalled); EXPECT_EQ(1u, mockSharingFcns->getTexture2dDescCalled); EXPECT_EQ(1u, mockSharingFcns->addRefCalled); } TEST_F(D3D9Tests, GivenSharedResourceSurfaceAndEnabledInteropUserSyncWhenReleasingThenResourcesAreReleased) { context->setInteropUserSyncEnabled(true); surfaceInfo.shared_handle = (HANDLE)1; mockGmmResInfo->cpuBltCalled = 0u; mockSharingFcns->getTexture2dDescSetParams = true; mockSharingFcns->getTexture2dDescParamsSet.textureDesc = mockSharingFcns->mockTexture2dDesc; auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 0, nullptr)); ASSERT_NE(nullptr, sharedImg); EXPECT_EQ(1u, mockGmmResInfo->getOffsetCalled); EXPECT_EQ(0u, mockGmmResInfo->arrayIndexPassedToGetOffset); cl_mem clMem = sharedImg.get(); auto retVal = clEnqueueAcquireDX9MediaSurfacesKHR(cmdQ, 1, &clMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQ, 1, &clMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0, memoryManager->lockResourceCalled); EXPECT_EQ(0, memoryManager->unlockResourceCalled); EXPECT_EQ(0u, mockGmmResInfo->cpuBltCalled); EXPECT_EQ(1u, mockSharingFcns->getTexture2dDescCalled); EXPECT_EQ(0u, mockSharingFcns->flushAndWaitCalled); EXPECT_EQ(0u, mockSharingFcns->lockRectCalled); EXPECT_EQ(0u, mockSharingFcns->unlockRectCalled); EXPECT_EQ(0u, mockSharingFcns->getRenderTargetDataCalled); EXPECT_EQ(0u, mockSharingFcns->updateSurfaceCalled); EXPECT_EQ(1u, mockSharingFcns->updateDeviceCalled); EXPECT_EQ(reinterpret_cast(&dummyD3DSurface), mockSharingFcns->updateDeviceParamsPassed[0].resource); } TEST_F(D3D9Tests, GivenSharedResourceSurfaceAndDisabledInteropUserSyncWhenReleasingThenResourcesAreReleased) { context->setInteropUserSyncEnabled(false); surfaceInfo.shared_handle = (HANDLE)1; mockGmmResInfo->cpuBltCalled = 0u; mockSharingFcns->getTexture2dDescSetParams = true; mockSharingFcns->getTexture2dDescParamsSet.textureDesc = mockSharingFcns->mockTexture2dDesc; auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 0, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); EXPECT_EQ(1u, mockGmmResInfo->getOffsetCalled); cl_mem clMem = sharedImg.get(); auto retVal = clEnqueueAcquireDX9MediaSurfacesKHR(cmdQ, 1, &clMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQ, 1, &clMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0, memoryManager->lockResourceCalled); EXPECT_EQ(0, memoryManager->unlockResourceCalled); EXPECT_EQ(0u, mockGmmResInfo->cpuBltCalled); EXPECT_EQ(1u, mockSharingFcns->getTexture2dDescCalled); EXPECT_EQ(1u, mockSharingFcns->flushAndWaitCalled); EXPECT_EQ(0u, mockSharingFcns->lockRectCalled); EXPECT_EQ(0u, mockSharingFcns->unlockRectCalled); EXPECT_EQ(0u, mockSharingFcns->getRenderTargetDataCalled); EXPECT_EQ(0u, mockSharingFcns->updateSurfaceCalled); EXPECT_EQ(1u, mockSharingFcns->updateDeviceCalled); EXPECT_EQ(reinterpret_cast(&dummyD3DSurface), mockSharingFcns->updateDeviceParamsPassed[0].resource); } TEST_F(D3D9Tests, GivenSharedResourceSurfaceAndDisabledInteropUserSyncIntelWhenReleasingThenResourcesAreReleased) { context->setInteropUserSyncEnabled(false); surfaceInfo.shared_handle = (HANDLE)1; mockGmmResInfo->cpuBltCalled = 0u; mockSharingFcns->getTexture2dDescSetParams = true; mockSharingFcns->getTexture2dDescParamsSet.textureDesc = mockSharingFcns->mockTexture2dDesc; auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 0, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); cl_mem clMem = sharedImg.get(); auto retVal = clEnqueueAcquireDX9ObjectsINTEL(cmdQ, 1, &clMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueReleaseDX9ObjectsINTEL(cmdQ, 1, &clMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0, memoryManager->lockResourceCalled); EXPECT_EQ(0, memoryManager->unlockResourceCalled); EXPECT_EQ(0u, mockGmmResInfo->cpuBltCalled); EXPECT_EQ(1u, mockSharingFcns->getTexture2dDescCalled); EXPECT_EQ(1u, mockSharingFcns->flushAndWaitCalled); EXPECT_EQ(0u, mockSharingFcns->lockRectCalled); EXPECT_EQ(0u, mockSharingFcns->unlockRectCalled); EXPECT_EQ(0u, mockSharingFcns->getRenderTargetDataCalled); EXPECT_EQ(0u, mockSharingFcns->updateSurfaceCalled); EXPECT_EQ(1u, mockSharingFcns->updateDeviceCalled); EXPECT_EQ(reinterpret_cast(&dummyD3DSurface), mockSharingFcns->updateDeviceParamsPassed[0].resource); } TEST_F(D3D9Tests, GivenNonSharedResourceSurfaceAndLockableWhenReleasingThenResourcesAreReleased) { surfaceInfo.shared_handle = (HANDLE)0; mockSharingFcns->mockTexture2dDesc.Usage = 0; D3DLOCKED_RECT lockedRect = {10u, (void *)100}; mockSharingFcns->getTexture2dDescSetParams = true; mockSharingFcns->getTexture2dDescParamsSet.textureDesc = mockSharingFcns->mockTexture2dDesc; auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 0, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); auto graphicsAllocation = sharedImg->getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex()); EXPECT_EQ(0u, mockGmmResInfo->getOffsetCalled); cl_mem clMem = sharedImg.get(); auto imgHeight = static_cast(sharedImg->getImageDesc().image_height); void *returnedLockedRes = (void *)100; mockSharingFcns->lockRectSetParams = true; mockSharingFcns->lockRectParamsSet.lockedRect = lockedRect; memoryManager->lockResourceReturnValue = returnedLockedRes; memoryManager->expectedLockingAllocation = graphicsAllocation; GMM_RES_COPY_BLT &requestedResCopyBlt = mockGmmResInfo->requestedResCopyBlt; GMM_RES_COPY_BLT expectedResCopyBlt = {}; mockGmmResInfo->cpuBltCalled = 0u; expectedResCopyBlt.Sys.pData = lockedRect.pBits; expectedResCopyBlt.Gpu.pData = returnedLockedRes; expectedResCopyBlt.Sys.RowPitch = lockedRect.Pitch; expectedResCopyBlt.Blt.Upload = 1; expectedResCopyBlt.Sys.BufferSize = lockedRect.Pitch * imgHeight; auto retVal = clEnqueueAcquireDX9MediaSurfacesKHR(cmdQ, 1, &clMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1, memoryManager->lockResourceCalled); EXPECT_EQ(1, memoryManager->unlockResourceCalled); EXPECT_TRUE(memcmp(&requestedResCopyBlt, &expectedResCopyBlt, sizeof(GMM_RES_COPY_BLT)) == 0); EXPECT_EQ(1u, mockGmmResInfo->cpuBltCalled); EXPECT_EQ(1u, mockSharingFcns->lockRectCalled); mockSharingFcns->lockRectParamsSet.lockedRect = lockedRect; requestedResCopyBlt = {}; expectedResCopyBlt.Blt.Upload = 0; EXPECT_EQ(1u, mockSharingFcns->unlockRectCalled); retVal = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQ, 1, &clMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(memcmp(&requestedResCopyBlt, &expectedResCopyBlt, sizeof(GMM_RES_COPY_BLT)) == 0); EXPECT_EQ(2, memoryManager->lockResourceCalled); EXPECT_EQ(2, memoryManager->unlockResourceCalled); EXPECT_EQ(2u, mockGmmResInfo->cpuBltCalled); EXPECT_EQ(1u, mockSharingFcns->getTexture2dDescCalled); EXPECT_EQ(1u, mockSharingFcns->flushAndWaitCalled); EXPECT_EQ(2u, mockSharingFcns->lockRectCalled); EXPECT_EQ(2u, mockSharingFcns->unlockRectCalled); EXPECT_EQ(0u, mockSharingFcns->getRenderTargetDataCalled); EXPECT_EQ(0u, mockSharingFcns->updateSurfaceCalled); EXPECT_EQ(1u, mockSharingFcns->updateDeviceCalled); EXPECT_EQ(reinterpret_cast(&dummyD3DSurface), mockSharingFcns->lockRectParamsPassed[0].d3dResource); EXPECT_EQ(D3DLOCK_READONLY, mockSharingFcns->lockRectParamsPassed[0].flags); EXPECT_EQ(reinterpret_cast(&dummyD3DSurface), mockSharingFcns->lockRectParamsPassed[1].d3dResource); EXPECT_EQ(0u, mockSharingFcns->lockRectParamsPassed[1].flags); EXPECT_EQ(reinterpret_cast(&dummyD3DSurface), mockSharingFcns->unlockRectParamsPassed[0].d3dResource); EXPECT_EQ(reinterpret_cast(&dummyD3DSurface), mockSharingFcns->unlockRectParamsPassed[1].d3dResource); EXPECT_EQ(reinterpret_cast(&dummyD3DSurface), mockSharingFcns->updateDeviceParamsPassed[0].resource); } TEST_F(D3D9Tests, GivenNonSharedResourceSurfaceAndLockableIntelWhenReleasingThenResourcesAreReleased) { surfaceInfo.shared_handle = (HANDLE)0; mockSharingFcns->mockTexture2dDesc.Usage = 0; D3DLOCKED_RECT lockedRect = {10u, (void *)100}; mockSharingFcns->getTexture2dDescSetParams = true; mockSharingFcns->getTexture2dDescParamsSet.textureDesc = mockSharingFcns->mockTexture2dDesc; auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 0, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); auto graphicsAllocation = sharedImg->getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex()); cl_mem clMem = sharedImg.get(); auto imgHeight = static_cast(sharedImg->getImageDesc().image_height); void *returnedLockedRes = (void *)100; mockSharingFcns->lockRectSetParams = true; mockSharingFcns->lockRectParamsSet.lockedRect = lockedRect; memoryManager->lockResourceReturnValue = returnedLockedRes; memoryManager->expectedLockingAllocation = graphicsAllocation; GMM_RES_COPY_BLT &requestedResCopyBlt = mockGmmResInfo->requestedResCopyBlt; GMM_RES_COPY_BLT expectedResCopyBlt = {}; mockGmmResInfo->cpuBltCalled = 0u; expectedResCopyBlt.Sys.pData = lockedRect.pBits; expectedResCopyBlt.Gpu.pData = returnedLockedRes; expectedResCopyBlt.Sys.RowPitch = lockedRect.Pitch; expectedResCopyBlt.Blt.Upload = 1; expectedResCopyBlt.Sys.BufferSize = lockedRect.Pitch * imgHeight; auto retVal = clEnqueueAcquireDX9ObjectsINTEL(cmdQ, 1, &clMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(memcmp(&requestedResCopyBlt, &expectedResCopyBlt, sizeof(GMM_RES_COPY_BLT)) == 0); EXPECT_EQ(1, memoryManager->lockResourceCalled); EXPECT_EQ(1, memoryManager->unlockResourceCalled); EXPECT_EQ(1u, mockGmmResInfo->cpuBltCalled); EXPECT_EQ(1u, mockSharingFcns->lockRectCalled); mockSharingFcns->lockRectParamsSet.lockedRect = lockedRect; requestedResCopyBlt = {}; expectedResCopyBlt.Blt.Upload = 0; EXPECT_EQ(1u, mockSharingFcns->unlockRectCalled); retVal = clEnqueueReleaseDX9ObjectsINTEL(cmdQ, 1, &clMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(memcmp(&requestedResCopyBlt, &expectedResCopyBlt, sizeof(GMM_RES_COPY_BLT)) == 0); EXPECT_EQ(2, memoryManager->lockResourceCalled); EXPECT_EQ(2, memoryManager->unlockResourceCalled); EXPECT_EQ(2u, mockGmmResInfo->cpuBltCalled); EXPECT_EQ(1u, mockSharingFcns->getTexture2dDescCalled); EXPECT_EQ(1u, mockSharingFcns->flushAndWaitCalled); EXPECT_EQ(2u, mockSharingFcns->lockRectCalled); EXPECT_EQ(2u, mockSharingFcns->unlockRectCalled); EXPECT_EQ(0u, mockSharingFcns->getRenderTargetDataCalled); EXPECT_EQ(0u, mockSharingFcns->updateSurfaceCalled); EXPECT_EQ(1u, mockSharingFcns->updateDeviceCalled); EXPECT_EQ(reinterpret_cast(&dummyD3DSurface), mockSharingFcns->lockRectParamsPassed[0].d3dResource); EXPECT_EQ(D3DLOCK_READONLY, mockSharingFcns->lockRectParamsPassed[0].flags); EXPECT_EQ(reinterpret_cast(&dummyD3DSurface), mockSharingFcns->lockRectParamsPassed[1].d3dResource); EXPECT_EQ(0u, mockSharingFcns->lockRectParamsPassed[1].flags); EXPECT_EQ(reinterpret_cast(&dummyD3DSurface), mockSharingFcns->unlockRectParamsPassed[0].d3dResource); EXPECT_EQ(reinterpret_cast(&dummyD3DSurface), mockSharingFcns->unlockRectParamsPassed[1].d3dResource); EXPECT_EQ(reinterpret_cast(&dummyD3DSurface), mockSharingFcns->updateDeviceParamsPassed[0].resource); } TEST_F(D3D9Tests, GivenNonSharedResourceSurfaceAndNonLockableWhenReleasingThenResourcesAreReleased) { surfaceInfo.shared_handle = (HANDLE)0; mockSharingFcns->mockTexture2dDesc.Usage = D3DResourceFlags::USAGE_RENDERTARGET; D3DLOCKED_RECT lockedRect = {10u, (void *)100}; mockSharingFcns->getTexture2dDescSetParams = true; mockSharingFcns->getTexture2dDescParamsSet.textureDesc = mockSharingFcns->mockTexture2dDesc; mockSharingFcns->createTexture2dSetParams = true; mockSharingFcns->createTexture2dParamsSet.texture = (D3DTexture2d *)&dummyD3DSurfaceStaging; auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 0, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); auto graphicsAllocation = sharedImg->getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex()); EXPECT_EQ(0u, mockGmmResInfo->getOffsetCalled); cl_mem clMem = sharedImg.get(); auto imgHeight = static_cast(sharedImg->getImageDesc().image_height); void *returnedLockedRes = (void *)100; mockSharingFcns->lockRectSetParams = true; mockSharingFcns->lockRectParamsSet.lockedRect = lockedRect; memoryManager->lockResourceReturnValue = returnedLockedRes; memoryManager->expectedLockingAllocation = graphicsAllocation; GMM_RES_COPY_BLT &requestedResCopyBlt = mockGmmResInfo->requestedResCopyBlt; mockGmmResInfo->cpuBltCalled = 0u; GMM_RES_COPY_BLT expectedResCopyBlt = {}; expectedResCopyBlt.Sys.pData = lockedRect.pBits; expectedResCopyBlt.Gpu.pData = returnedLockedRes; expectedResCopyBlt.Sys.RowPitch = lockedRect.Pitch; expectedResCopyBlt.Blt.Upload = 1; expectedResCopyBlt.Sys.BufferSize = lockedRect.Pitch * imgHeight; auto retVal = clEnqueueAcquireDX9MediaSurfacesKHR(cmdQ, 1, &clMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(memcmp(&requestedResCopyBlt, &expectedResCopyBlt, sizeof(GMM_RES_COPY_BLT)) == 0); EXPECT_EQ(1, memoryManager->lockResourceCalled); EXPECT_EQ(1, memoryManager->unlockResourceCalled); EXPECT_EQ(1u, mockGmmResInfo->cpuBltCalled); EXPECT_EQ(1u, mockSharingFcns->lockRectCalled); mockSharingFcns->lockRectParamsSet.lockedRect = lockedRect; expectedResCopyBlt.Blt.Upload = 0; EXPECT_EQ(1u, mockSharingFcns->unlockRectCalled); retVal = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQ, 1, &clMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(memcmp(&requestedResCopyBlt, &expectedResCopyBlt, sizeof(GMM_RES_COPY_BLT)) == 0); EXPECT_EQ(2, memoryManager->lockResourceCalled); EXPECT_EQ(2, memoryManager->unlockResourceCalled); EXPECT_EQ(2u, mockGmmResInfo->cpuBltCalled); EXPECT_EQ(1u, mockSharingFcns->createTexture2dCalled); EXPECT_EQ(1u, mockSharingFcns->getTexture2dDescCalled); EXPECT_EQ(1u, mockSharingFcns->flushAndWaitCalled); EXPECT_EQ(2u, mockSharingFcns->lockRectCalled); EXPECT_EQ(2u, mockSharingFcns->unlockRectCalled); EXPECT_EQ(1u, mockSharingFcns->getRenderTargetDataCalled); EXPECT_EQ(1u, mockSharingFcns->updateSurfaceCalled); EXPECT_EQ(1u, mockSharingFcns->updateDeviceCalled); EXPECT_EQ(reinterpret_cast(&dummyD3DSurfaceStaging), mockSharingFcns->lockRectParamsPassed[0].d3dResource); EXPECT_EQ(D3DLOCK_READONLY, mockSharingFcns->lockRectParamsPassed[0].flags); EXPECT_EQ(reinterpret_cast(&dummyD3DSurfaceStaging), mockSharingFcns->lockRectParamsPassed[1].d3dResource); EXPECT_EQ(0u, mockSharingFcns->lockRectParamsPassed[1].flags); EXPECT_EQ(reinterpret_cast(&dummyD3DSurfaceStaging), mockSharingFcns->unlockRectParamsPassed[0].d3dResource); EXPECT_EQ(reinterpret_cast(&dummyD3DSurfaceStaging), mockSharingFcns->unlockRectParamsPassed[1].d3dResource); EXPECT_EQ(reinterpret_cast(&dummyD3DSurface), mockSharingFcns->getRenderTargetDataParamsPassed[0].renderTarget); EXPECT_EQ(reinterpret_cast(&dummyD3DSurfaceStaging), mockSharingFcns->getRenderTargetDataParamsPassed[0].dstSurface); EXPECT_EQ(reinterpret_cast(&dummyD3DSurfaceStaging), mockSharingFcns->updateSurfaceParamsPassed[0].renderTarget); EXPECT_EQ(reinterpret_cast(&dummyD3DSurface), mockSharingFcns->updateSurfaceParamsPassed[0].dstSurface); EXPECT_EQ(reinterpret_cast(&dummyD3DSurface), mockSharingFcns->updateDeviceParamsPassed[0].resource); } TEST_F(D3D9Tests, givenInvalidClMemObjectPassedOnReleaseListWhenCallIsMadeThenFailureIsReturned) { auto fakeObject = reinterpret_cast(cmdQ); auto retVal = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQ, 1, &fakeObject, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(D3D9Tests, givenResourcesCreatedFromDifferentDevicesWhenAcquireReleaseCalledThenUpdateDevice) { mockSharingFcns->getTexture2dDescSetParams = true; mockSharingFcns->getTexture2dDescParamsSet.textureDesc = mockSharingFcns->mockTexture2dDesc; auto createdResourceDevice = reinterpret_cast(123); mockSharingFcns->setDevice(createdResourceDevice); // create call will pick this device auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 0, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); memoryManager->expectedLockingAllocation = sharedImg->getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex()); mockSharingFcns->setDevice(nullptr); // force device change sharedImg->getSharingHandler()->acquire(sharedImg.get(), context->getDevice(0)->getRootDeviceIndex()); EXPECT_EQ(createdResourceDevice, mockSharingFcns->getDevice()); mockSharingFcns->setDevice(nullptr); // force device change sharedImg->getSharingHandler()->release(sharedImg.get(), context->getDevice(0)->getRootDeviceIndex()); EXPECT_EQ(createdResourceDevice, mockSharingFcns->getDevice()); EXPECT_EQ(1u, mockSharingFcns->getTexture2dDescCalled); } TEST_F(D3D9Tests, givenNullD3dDeviceWhenContextIsCreatedThenReturnErrorOnSurfaceCreation) { cl_device_id deviceID = context->getDevice(0); cl_int retVal = CL_SUCCESS; cl_context_properties properties[5] = {CL_CONTEXT_PLATFORM, (cl_context_properties)(cl_platform_id)pPlatform, CL_CONTEXT_ADAPTER_D3D9_KHR, 0, 0}; std::unique_ptr ctx(Context::create(properties, ClDeviceVector(&deviceID, 1), nullptr, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, ctx->getSharing>()->getDevice()); auto img = D3DSurface::create(ctx.get(), nullptr, CL_MEM_READ_WRITE, 0, 0, &retVal); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); EXPECT_EQ(nullptr, img); } TEST_F(D3D9Tests, givenInvalidContextWhenSurfaceIsCreatedThenReturnError) { cl_device_id deviceID = context->getDevice(0); cl_int retVal = CL_SUCCESS; cl_context_properties properties[5] = {CL_CONTEXT_PLATFORM, (cl_context_properties)(cl_platform_id)pPlatform, 0}; std::unique_ptr ctx(Context::create(properties, ClDeviceVector(&deviceID, 1), nullptr, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, ctx->getSharing>()); auto img = D3DSurface::create(ctx.get(), nullptr, CL_MEM_READ_WRITE, 0, 0, &retVal); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); EXPECT_EQ(nullptr, img); img = D3DSurface::create(nullptr, nullptr, CL_MEM_READ_WRITE, 0, 0, &retVal); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); EXPECT_EQ(nullptr, img); } TEST_F(D3D9Tests, givenInvalidFlagsWhenSurfaceIsCreatedThenReturnError) { cl_int retVal = CL_SUCCESS; auto img = clCreateFromDX9MediaSurfaceINTEL(context, CL_MEM_USE_HOST_PTR, surfaceInfo.resource, surfaceInfo.shared_handle, 0, &retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(nullptr, img); } TEST_F(D3D9Tests, givenInvalidContextWhenImageIsCreatedThenErrorIsReturned) { auto invalidContext = reinterpret_cast(this->cmdQ); auto retVal = CL_SUCCESS; auto img = clCreateFromDX9MediaSurfaceINTEL(invalidContext, CL_MEM_READ_WRITE, surfaceInfo.resource, surfaceInfo.shared_handle, 0, &retVal); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); EXPECT_EQ(nullptr, img); } TEST_F(D3D9Tests, givenTheSameResourceAndPlaneWhenSurfaceIsCreatedThenReturnError) { mockSharingFcns->mockTexture2dDesc.Format = (D3DFORMAT)MAKEFOURCC('Y', 'V', '1', '2'); surfaceInfo.shared_handle = (HANDLE)1; cl_int retVal = CL_SUCCESS; cl_uint plane = 0; mockSharingFcns->getTexture2dDescSetParams = true; mockSharingFcns->getTexture2dDescParamsSet.textureDesc = mockSharingFcns->mockTexture2dDesc; auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, plane, &retVal)); EXPECT_NE(nullptr, sharedImg.get()); EXPECT_EQ(CL_SUCCESS, retVal); auto sharedImg2 = D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, plane, &retVal); EXPECT_EQ(nullptr, sharedImg2); EXPECT_EQ(CL_INVALID_DX9_RESOURCE_INTEL, retVal); EXPECT_EQ(1u, mockSharingFcns->getTexture2dDescCalled); } TEST_F(D3D9Tests, WhenFillingBufferDescThenBufferContentsAreCorrect) { D3D9::D3DBufferDesc requestedDesc = {}; D3D9::D3DBufferDesc expectedDesc = {}; mockSharingFcns->fillCreateBufferDesc(requestedDesc, 10); EXPECT_TRUE(memcmp(&requestedDesc, &expectedDesc, sizeof(D3D9::D3DBufferDesc)) == 0); } TEST_F(D3D9Tests, WhenFillingTexture2dDescThenTextureContentsAreCorrect) { D3D9::D3DTexture2dDesc requestedDesc = {}; D3D9::D3DTexture2dDesc expectedDesc = {}; D3D9::D3DTexture2dDesc srcDesc = {}; cl_uint subresource = 4; mockSharingFcns->fillCreateTexture2dDesc(requestedDesc, &srcDesc, subresource); EXPECT_TRUE(memcmp(&requestedDesc, &expectedDesc, sizeof(D3D9::D3DTexture2dDesc)) == 0); } TEST_F(D3D9Tests, WhenFillingTexture3dDescThenTextureContentsAreCorrect) { D3D9::D3DTexture3dDesc requestedDesc = {}; D3D9::D3DTexture3dDesc expectedDesc = {}; D3D9::D3DTexture3dDesc srcDesc = {}; cl_uint subresource = 4; mockSharingFcns->fillCreateTexture3dDesc(requestedDesc, &srcDesc, subresource); EXPECT_TRUE(memcmp(&requestedDesc, &expectedDesc, sizeof(D3D9::D3DTexture3dDesc)) == 0); } TEST_F(D3D9Tests, givenImproperPlatformWhenGettingDeviceIDsFromDX9ThenReturnError) { cl_int retVal = CL_SUCCESS; retVal = clGetDeviceIDsFromDX9INTEL(nullptr, 1, nullptr, 1, 1, nullptr, nullptr); EXPECT_EQ(CL_INVALID_PLATFORM, retVal); } TEST_F(D3D9Tests, givenImproperCommandQueueWhenDX9ObjectsAreAcquiredThenReturnError) { cl_int retVal = CL_SUCCESS; retVal = clEnqueueAcquireDX9ObjectsINTEL(nullptr, 1, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(D3D9Tests, givenImproperCommandQueueWhenDX9ObjectsAreReleasedThenReturnError) { cl_int retVal = CL_SUCCESS; retVal = clEnqueueReleaseDX9ObjectsINTEL(nullptr, 1, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(D3D9Tests, givenImproperPlatformWhenGettingDeviceIDsFromDX9MediaAdapterThenReturnError) { cl_int retVal = CL_SUCCESS; retVal = clGetDeviceIDsFromDX9MediaAdapterKHR(nullptr, 1, nullptr, nullptr, 1, 1, nullptr, nullptr); EXPECT_EQ(CL_INVALID_PLATFORM, retVal); } TEST_F(D3D9Tests, givenImproperCommandQueueWhenDX9MediaSurfacesAreAcquiredThenReturnError) { cl_int retVal = CL_SUCCESS; retVal = clEnqueueAcquireDX9MediaSurfacesKHR(nullptr, 1, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(D3D9Tests, givenImproperCommandQueueWhenDX9MediaSurfacesAreReleasedThenReturnError) { cl_int retVal = CL_SUCCESS; retVal = clEnqueueReleaseDX9MediaSurfacesKHR(nullptr, 1, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(D3D9Tests, givenImproperPlatformWhenGettingDeviceIDsFromD3D10ThenReturnError) { cl_int retVal = CL_SUCCESS; retVal = clGetDeviceIDsFromD3D10KHR(nullptr, 0, nullptr, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_PLATFORM, retVal); } TEST_F(D3D9Tests, givenImproperContextWhenCreatingFromD3D10BufferThenReturnError) { cl_int retVal = CL_SUCCESS; clCreateFromD3D10BufferKHR(nullptr, 0, nullptr, &retVal); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } TEST_F(D3D9Tests, givenImproperContextWhenCreatingFromD3D10Texture2DThenReturnError) { cl_int retVal = CL_SUCCESS; clCreateFromD3D10Texture2DKHR(nullptr, 0, nullptr, 0u, &retVal); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } TEST_F(D3D9Tests, givenImproperContextWhenCreatingFromD3D10Texture3DThenReturnError) { cl_int retVal = CL_SUCCESS; clCreateFromD3D10Texture3DKHR(nullptr, 0, nullptr, 0u, &retVal); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } TEST_F(D3D9Tests, givenImproperCommandQueueWhenD3D10ObjectsAreAcquiredThenReturnError) { cl_int retVal = CL_SUCCESS; retVal = clEnqueueAcquireD3D10ObjectsKHR(nullptr, 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(D3D9Tests, givenImproperCommandQueueWhenD3D10ObjectsAreReleasedThenReturnError) { cl_int retVal = CL_SUCCESS; retVal = clEnqueueReleaseD3D10ObjectsKHR(nullptr, 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(D3D9Tests, givenImproperPlatformWhenGettingDeviceIDsFromD3D11ThenReturnError) { cl_int retVal = CL_SUCCESS; retVal = clGetDeviceIDsFromD3D11KHR(nullptr, 0, nullptr, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_PLATFORM, retVal); } TEST_F(D3D9Tests, givenImproperContextWhenCreatingFromD3D11BufferThenReturnError) { cl_int retVal = CL_SUCCESS; clCreateFromD3D11BufferKHR(nullptr, 0, nullptr, &retVal); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } TEST_F(D3D9Tests, givenImproperContextWhenCreatingFromD3D11Texture2DThenReturnError) { cl_int retVal = CL_SUCCESS; clCreateFromD3D11Texture2DKHR(nullptr, 0, nullptr, 0u, &retVal); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } TEST_F(D3D9Tests, givenImproperContextWhenCreatingFromD3D11Texture3DThenReturnError) { cl_int retVal = CL_SUCCESS; clCreateFromD3D11Texture3DKHR(nullptr, 0, nullptr, 0u, &retVal); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } TEST_F(D3D9Tests, givenImproperCommandQueueWhenD3D11ObjectsAreAcquiredThenReturnError) { cl_int retVal = CL_SUCCESS; retVal = clEnqueueAcquireD3D11ObjectsKHR(nullptr, 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(D3D9Tests, givenImproperCommandQueueWhenD3D11ObjectsAreReleasedThenReturnError) { cl_int retVal = CL_SUCCESS; retVal = clEnqueueReleaseD3D11ObjectsKHR(nullptr, 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } namespace D3D9Formats { static const std::tuple allImageFormats[] = { // input, input, output, output std::make_tuple(D3DFMT_R32F, 0, CL_R, CL_FLOAT, ImagePlane::NO_PLANE), std::make_tuple(D3DFMT_R16F, 0, CL_R, CL_HALF_FLOAT, ImagePlane::NO_PLANE), std::make_tuple(D3DFMT_L16, 0, CL_R, CL_UNORM_INT16, ImagePlane::NO_PLANE), std::make_tuple(D3DFMT_A8, 0, CL_A, CL_UNORM_INT8, ImagePlane::NO_PLANE), std::make_tuple(D3DFMT_L8, 0, CL_R, CL_UNORM_INT8, ImagePlane::NO_PLANE), std::make_tuple(D3DFMT_G32R32F, 0, CL_RG, CL_FLOAT, ImagePlane::NO_PLANE), std::make_tuple(D3DFMT_G16R16F, 0, CL_RG, CL_HALF_FLOAT, ImagePlane::NO_PLANE), std::make_tuple(D3DFMT_G16R16, 0, CL_RG, CL_UNORM_INT16, ImagePlane::NO_PLANE), std::make_tuple(D3DFMT_A8L8, 0, CL_RG, CL_UNORM_INT8, ImagePlane::NO_PLANE), std::make_tuple(D3DFMT_A32B32G32R32F, 0, CL_RGBA, CL_FLOAT, ImagePlane::NO_PLANE), std::make_tuple(D3DFMT_A16B16G16R16F, 0, CL_RGBA, CL_HALF_FLOAT, ImagePlane::NO_PLANE), std::make_tuple(D3DFMT_A16B16G16R16, 0, CL_RGBA, CL_UNORM_INT16, ImagePlane::NO_PLANE), std::make_tuple(D3DFMT_A8B8G8R8, 0, CL_RGBA, CL_UNORM_INT8, ImagePlane::NO_PLANE), std::make_tuple(D3DFMT_X8B8G8R8, 0, CL_RGBA, CL_UNORM_INT8, ImagePlane::NO_PLANE), std::make_tuple(D3DFMT_A8R8G8B8, 0, CL_BGRA, CL_UNORM_INT8, ImagePlane::NO_PLANE), std::make_tuple(D3DFMT_X8R8G8B8, 0, CL_BGRA, CL_UNORM_INT8, ImagePlane::NO_PLANE), std::make_tuple(MAKEFOURCC('N', 'V', '1', '2'), 0, CL_R, CL_UNORM_INT8, ImagePlane::PLANE_Y), std::make_tuple(MAKEFOURCC('N', 'V', '1', '2'), 1, CL_RG, CL_UNORM_INT8, ImagePlane::PLANE_UV), std::make_tuple(MAKEFOURCC('N', 'V', '1', '2'), 2, 0, 0, ImagePlane::NO_PLANE), std::make_tuple(MAKEFOURCC('Y', 'V', '1', '2'), 0, CL_R, CL_UNORM_INT8, ImagePlane::PLANE_Y), std::make_tuple(MAKEFOURCC('Y', 'V', '1', '2'), 1, CL_R, CL_UNORM_INT8, ImagePlane::PLANE_V), std::make_tuple(MAKEFOURCC('Y', 'V', '1', '2'), 2, CL_R, CL_UNORM_INT8, ImagePlane::PLANE_U), std::make_tuple(MAKEFOURCC('Y', 'V', '1', '2'), 3, 0, 0, ImagePlane::NO_PLANE), std::make_tuple(D3DFMT_YUY2, 0, CL_YUYV_INTEL, CL_UNORM_INT8, ImagePlane::NO_PLANE), std::make_tuple(D3DFMT_UYVY, 0, CL_UYVY_INTEL, CL_UNORM_INT8, ImagePlane::NO_PLANE), std::make_tuple(MAKEFOURCC('Y', 'V', 'Y', 'U'), 0, CL_YVYU_INTEL, CL_UNORM_INT8, ImagePlane::NO_PLANE), std::make_tuple(MAKEFOURCC('V', 'Y', 'U', 'Y'), 0, CL_VYUY_INTEL, CL_UNORM_INT8, ImagePlane::NO_PLANE), std::make_tuple(CL_INVALID_VALUE, 0, 0, 0, ImagePlane::NO_PLANE)}; } struct D3D9ImageFormatTests : public ::testing::WithParamInterface>, public ::testing::Test { }; INSTANTIATE_TEST_CASE_P( D3D9ImageFormatTests, D3D9ImageFormatTests, testing::ValuesIn(D3D9Formats::allImageFormats)); TEST_P(D3D9ImageFormatTests, WhenGettingImageFormatThenValidFormatDetailsAreReturned) { cl_image_format imgFormat = {}; auto format = std::get<0>(GetParam()); auto plane = std::get<1>(GetParam()); ImagePlane imagePlane = ImagePlane::NO_PLANE; auto expectedImagePlane = std::get<4>(GetParam()); auto expectedClChannelType = static_cast(std::get<3>(GetParam())); auto expectedClChannelOrder = static_cast(std::get<2>(GetParam())); D3DSurface::findImgFormat((D3DFORMAT)format, imgFormat, plane, imagePlane); EXPECT_EQ(imgFormat.image_channel_data_type, expectedClChannelType); EXPECT_EQ(imgFormat.image_channel_order, expectedClChannelOrder); EXPECT_TRUE(imagePlane == expectedImagePlane); } using D3D9MultiRootDeviceTest = MultiRootDeviceFixture; TEST_F(D3D9MultiRootDeviceTest, givenD3DHandleIsNullWhenCreatingSharedSurfaceAndRootDeviceIndexIsSpecifiedThenAllocationHasCorrectRootDeviceIndex) { ImageDescriptor imgDesc = {}; imgDesc.imageHeight = 10; imgDesc.imageWidth = 10; imgDesc.imageDepth = 1; imgDesc.imageType = ImageType::Image2D; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); auto gmm = MockGmm::queryImgParams(device1->getGmmClientContext(), imgInfo, false).release(); auto memoryManager = std::make_unique(*device1->executionEnvironment); memoryManager->forceGmm = gmm; auto mockSharingFcns = new MockD3DSharingFunctions(); mockSharingFcns->mockTexture2dDesc.Format = D3DFMT_R32F; mockSharingFcns->mockTexture2dDesc.Height = 10; mockSharingFcns->mockTexture2dDesc.Width = 10; cl_dx9_surface_info_khr surfaceInfo = {}; surfaceInfo.shared_handle = reinterpret_cast(0); mockSharingFcns->getTexture2dDescSetParams = true; mockSharingFcns->getTexture2dDescParamsSet.textureDesc = mockSharingFcns->mockTexture2dDesc; MockContext ctx(device1); ctx.setSharingFunctions(mockSharingFcns); ctx.memoryManager = memoryManager.get(); auto sharedImg = std::unique_ptr(D3DSurface::create(&ctx, &surfaceInfo, CL_MEM_READ_WRITE, 0, 0, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); auto graphicsAllocation = sharedImg->getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex()); ASSERT_NE(nullptr, graphicsAllocation); EXPECT_EQ(expectedRootDeviceIndex, graphicsAllocation->getRootDeviceIndex()); } TEST_F(D3D9MultiRootDeviceTest, givenD3DHandleIsNotNullWhenCreatingSharedSurfaceAndRootDeviceIndexIsSpecifiedThenAllocationHasCorrectRootDeviceIndex) { ImageDescriptor imgDesc = {}; imgDesc.imageHeight = 10; imgDesc.imageWidth = 10; imgDesc.imageDepth = 1; imgDesc.imageType = ImageType::Image2D; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); auto gmm = MockGmm::queryImgParams(device1->getGmmClientContext(), imgInfo, false).release(); auto memoryManager = std::make_unique(*device1->executionEnvironment); memoryManager->forceGmm = gmm; auto mockSharingFcns = new MockD3DSharingFunctions(); mockSharingFcns->mockTexture2dDesc.Format = D3DFMT_R32F; mockSharingFcns->mockTexture2dDesc.Height = 10; mockSharingFcns->mockTexture2dDesc.Width = 10; cl_dx9_surface_info_khr surfaceInfo = {}; surfaceInfo.shared_handle = reinterpret_cast(1); mockSharingFcns->getTexture2dDescSetParams = true; mockSharingFcns->getTexture2dDescParamsSet.textureDesc = mockSharingFcns->mockTexture2dDesc; MockContext ctx(device1); ctx.setSharingFunctions(mockSharingFcns); ctx.memoryManager = memoryManager.get(); auto sharedImg = std::unique_ptr(D3DSurface::create(&ctx, &surfaceInfo, CL_MEM_READ_WRITE, 0, 0, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); auto graphicsAllocation = sharedImg->getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex()); ASSERT_NE(nullptr, graphicsAllocation); EXPECT_EQ(expectedRootDeviceIndex, graphicsAllocation->getRootDeviceIndex()); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/d3d_sharing/d3d_aux_tests.cpp000066400000000000000000000242141422164147700272740ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/os_agnostic_memory_manager.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/utilities/arrayref.h" #include "opencl/source/api/api.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/platform/platform.h" #include "opencl/source/sharings/d3d/cl_d3d_api.h" #include "opencl/source/sharings/d3d/d3d_buffer.h" #include "opencl/source/sharings/d3d/d3d_sharing.h" #include "opencl/source/sharings/d3d/d3d_surface.h" #include "opencl/source/sharings/d3d/d3d_texture.h" #include "opencl/test/unit_test/fixtures/d3d_test_fixture.h" #include "gtest/gtest.h" namespace NEO { template class D3DAuxTests : public D3DTests {}; TYPED_TEST_CASE_P(D3DAuxTests); TYPED_TEST_P(D3DAuxTests, given2dSharableTextureWithUnifiedAuxFlagsWhenCreatingThenMapAuxTableAndSetAsCompressed) { this->mockSharingFcns->mockTexture2dDesc.MiscFlags = D3DResourceFlags::MISC_SHARED; this->mockSharingFcns->mockTexture2dDesc.ArraySize = 4; this->mockSharingFcns->mockTexture2dDesc.MipLevels = 4; mockGmmResInfo->setUnifiedAuxTranslationCapable(); this->mockSharingFcns->getTexture2dDescSetParams = true; this->mockSharingFcns->getTexture2dDescParamsSet.textureDesc = this->mockSharingFcns->mockTexture2dDesc; auto image = std::unique_ptr(D3DTexture::create2d(this->context, reinterpret_cast(&this->dummyD3DTexture), CL_MEM_READ_WRITE, 4, nullptr)); ASSERT_NE(nullptr, image.get()); const auto &hwInfo = context->getDevice(0)->getHardwareInfo(); const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily); uint32_t expectedMapAuxGpuVaCalls = hwInfoConfig.isPageTableManagerSupported(hwInfo) ? 1 : 0; EXPECT_EQ(expectedMapAuxGpuVaCalls, mockMM->mapAuxGpuVACalled); EXPECT_TRUE(gmm->isCompressionEnabled); EXPECT_EQ(1u, this->mockSharingFcns->getTexture2dDescCalled); } TYPED_TEST_P(D3DAuxTests, given2dSharableTextureWithUnifiedAuxFlagsWhenFailOnAuxMappingThenDontSetAsCompressed) { this->mockSharingFcns->mockTexture2dDesc.MiscFlags = D3DResourceFlags::MISC_SHARED; this->mockSharingFcns->mockTexture2dDesc.ArraySize = 4; this->mockSharingFcns->mockTexture2dDesc.MipLevels = 4; mockGmmResInfo->setUnifiedAuxTranslationCapable(); this->mockSharingFcns->getTexture2dDescSetParams = true; this->mockSharingFcns->getTexture2dDescParamsSet.textureDesc = this->mockSharingFcns->mockTexture2dDesc; mockMM->mapAuxGpuVaRetValue = false; auto image = std::unique_ptr(D3DTexture::create2d(this->context, reinterpret_cast(&this->dummyD3DTexture), CL_MEM_READ_WRITE, 4, nullptr)); ASSERT_NE(nullptr, image.get()); const auto &hwInfo = context->getDevice(0)->getHardwareInfo(); const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily); uint32_t expectedMapAuxGpuVaCalls = hwInfoConfig.isPageTableManagerSupported(hwInfo) ? 1 : 0; EXPECT_EQ(expectedMapAuxGpuVaCalls, mockMM->mapAuxGpuVACalled); EXPECT_EQ(!hwInfoConfig.isPageTableManagerSupported(hwInfo), gmm->isCompressionEnabled); EXPECT_EQ(1u, this->mockSharingFcns->getTexture2dDescCalled); } TYPED_TEST_P(D3DAuxTests, given2dSharableTextureWithoutUnifiedAuxFlagsWhenCreatingThenDontMapAuxTable) { this->mockSharingFcns->mockTexture2dDesc.MiscFlags = D3DResourceFlags::MISC_SHARED; this->mockSharingFcns->mockTexture2dDesc.ArraySize = 4; this->mockSharingFcns->mockTexture2dDesc.MipLevels = 4; EXPECT_FALSE(gmm->unifiedAuxTranslationCapable()); this->mockSharingFcns->getTexture2dDescSetParams = true; this->mockSharingFcns->getTexture2dDescParamsSet.textureDesc = this->mockSharingFcns->mockTexture2dDesc; auto image = std::unique_ptr(D3DTexture::create2d(this->context, reinterpret_cast(&this->dummyD3DTexture), CL_MEM_READ_WRITE, 4, nullptr)); ASSERT_NE(nullptr, image.get()); EXPECT_EQ(0u, mockMM->mapAuxGpuVACalled); EXPECT_FALSE(gmm->isCompressionEnabled); EXPECT_EQ(1u, this->mockSharingFcns->getTexture2dDescCalled); } TYPED_TEST_P(D3DAuxTests, given2dNonSharableTextureWithUnifiedAuxFlagsWhenCreatingThenMapAuxTableAndSetCompressed) { mockGmmResInfo->setUnifiedAuxTranslationCapable(); this->mockSharingFcns->getTexture2dDescSetParams = true; this->mockSharingFcns->getTexture2dDescParamsSet.textureDesc = this->mockSharingFcns->mockTexture2dDesc; mockGmmResInfo->setUnifiedAuxTranslationCapable(); auto image = std::unique_ptr(D3DTexture::create2d(this->context, reinterpret_cast(&this->dummyD3DTexture), CL_MEM_READ_WRITE, 1, nullptr)); ASSERT_NE(nullptr, image.get()); const auto &hwInfo = context->getDevice(0)->getHardwareInfo(); const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily); uint32_t expectedMapAuxGpuVaCalls = hwInfoConfig.isPageTableManagerSupported(hwInfo) ? 1 : 0; EXPECT_EQ(expectedMapAuxGpuVaCalls, mockMM->mapAuxGpuVACalled); EXPECT_TRUE(gmm->isCompressionEnabled); EXPECT_EQ(1u, this->mockSharingFcns->getTexture2dDescCalled); } TYPED_TEST_P(D3DAuxTests, given3dSharableTextureWithUnifiedAuxFlagsWhenCreatingThenMapAuxTableAndSetAsCompressed) { this->mockSharingFcns->mockTexture3dDesc.MiscFlags = D3DResourceFlags::MISC_SHARED; mockGmmResInfo->setUnifiedAuxTranslationCapable(); this->mockSharingFcns->getTexture3dDescSetParams = true; this->mockSharingFcns->getTexture3dDescParamsSet.textureDesc = this->mockSharingFcns->mockTexture3dDesc; std::unique_ptr image(D3DTexture::create3d(this->context, reinterpret_cast(&this->dummyD3DTexture), CL_MEM_READ_WRITE, 1, nullptr)); ASSERT_NE(nullptr, image.get()); const auto &hwInfo = context->getDevice(0)->getHardwareInfo(); const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily); uint32_t expectedMapAuxGpuVaCalls = hwInfoConfig.isPageTableManagerSupported(hwInfo) ? 1 : 0; EXPECT_EQ(expectedMapAuxGpuVaCalls, mockMM->mapAuxGpuVACalled); EXPECT_TRUE(gmm->isCompressionEnabled); EXPECT_EQ(1u, this->mockSharingFcns->getTexture3dDescCalled); } TYPED_TEST_P(D3DAuxTests, given3dSharableTextureWithUnifiedAuxFlagsWhenFailOnAuxMappingThenDontSetAsCompressed) { this->mockSharingFcns->mockTexture3dDesc.MiscFlags = D3DResourceFlags::MISC_SHARED; mockGmmResInfo->setUnifiedAuxTranslationCapable(); this->mockSharingFcns->getTexture3dDescSetParams = true; this->mockSharingFcns->getTexture3dDescParamsSet.textureDesc = this->mockSharingFcns->mockTexture3dDesc; mockMM->mapAuxGpuVaRetValue = false; std::unique_ptr image(D3DTexture::create3d(this->context, reinterpret_cast(&this->dummyD3DTexture), CL_MEM_READ_WRITE, 1, nullptr)); ASSERT_NE(nullptr, image.get()); const auto &hwInfo = context->getDevice(0)->getHardwareInfo(); const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily); uint32_t expectedMapAuxGpuVaCalls = hwInfoConfig.isPageTableManagerSupported(hwInfo) ? 1 : 0; EXPECT_EQ(expectedMapAuxGpuVaCalls, mockMM->mapAuxGpuVACalled); EXPECT_EQ(!hwInfoConfig.isPageTableManagerSupported(hwInfo), gmm->isCompressionEnabled); EXPECT_EQ(1u, this->mockSharingFcns->getTexture3dDescCalled); } TYPED_TEST_P(D3DAuxTests, given3dSharableTextureWithoutUnifiedAuxFlagsWhenCreatingThenDontMapAuxTable) { this->mockSharingFcns->mockTexture3dDesc.MiscFlags = D3DResourceFlags::MISC_SHARED; EXPECT_FALSE(gmm->unifiedAuxTranslationCapable()); this->mockSharingFcns->getTexture3dDescSetParams = true; this->mockSharingFcns->getTexture3dDescParamsSet.textureDesc = this->mockSharingFcns->mockTexture3dDesc; std::unique_ptr image(D3DTexture::create3d(this->context, reinterpret_cast(&this->dummyD3DTexture), CL_MEM_READ_WRITE, 1, nullptr)); ASSERT_NE(nullptr, image.get()); EXPECT_EQ(0u, mockMM->mapAuxGpuVACalled); EXPECT_FALSE(gmm->isCompressionEnabled); EXPECT_EQ(1u, this->mockSharingFcns->getTexture3dDescCalled); } TYPED_TEST_P(D3DAuxTests, given3dNonSharableTextureWithUnifiedAuxFlagsWhenCreatingThenMapAuxTableAndSetCompressed) { mockGmmResInfo->setUnifiedAuxTranslationCapable(); this->mockSharingFcns->getTexture3dDescSetParams = true; this->mockSharingFcns->getTexture3dDescParamsSet.textureDesc = this->mockSharingFcns->mockTexture3dDesc; mockGmmResInfo->setUnifiedAuxTranslationCapable(); std::unique_ptr image(D3DTexture::create3d(this->context, reinterpret_cast(&this->dummyD3DTexture), CL_MEM_READ_WRITE, 1, nullptr)); ASSERT_NE(nullptr, image.get()); const auto &hwInfo = context->getDevice(0)->getHardwareInfo(); const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily); uint32_t expectedMapAuxGpuVaCalls = hwInfoConfig.isPageTableManagerSupported(hwInfo) ? 1 : 0; EXPECT_EQ(expectedMapAuxGpuVaCalls, mockMM->mapAuxGpuVACalled); EXPECT_TRUE(gmm->isCompressionEnabled); EXPECT_EQ(1u, this->mockSharingFcns->getTexture3dDescCalled); } REGISTER_TYPED_TEST_CASE_P(D3DAuxTests, given2dSharableTextureWithUnifiedAuxFlagsWhenCreatingThenMapAuxTableAndSetAsCompressed, given2dSharableTextureWithUnifiedAuxFlagsWhenFailOnAuxMappingThenDontSetAsCompressed, given2dSharableTextureWithoutUnifiedAuxFlagsWhenCreatingThenDontMapAuxTable, given2dNonSharableTextureWithUnifiedAuxFlagsWhenCreatingThenMapAuxTableAndSetCompressed, given3dSharableTextureWithUnifiedAuxFlagsWhenCreatingThenMapAuxTableAndSetAsCompressed, given3dSharableTextureWithUnifiedAuxFlagsWhenFailOnAuxMappingThenDontSetAsCompressed, given3dSharableTextureWithoutUnifiedAuxFlagsWhenCreatingThenDontMapAuxTable, given3dNonSharableTextureWithUnifiedAuxFlagsWhenCreatingThenMapAuxTableAndSetCompressed); INSTANTIATE_TYPED_TEST_CASE_P(D3DSharingTests, D3DAuxTests, D3DTypes); } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/d3d_sharing/d3d_tests_part1.cpp000066400000000000000000001366241422164147700275370ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/os_agnostic_memory_manager.h" #include "shared/source/os_interface/driver_info.h" #include "shared/source/utilities/arrayref.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "opencl/source/api/api.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/platform/platform.h" #include "opencl/source/sharings/d3d/cl_d3d_api.h" #include "opencl/source/sharings/d3d/d3d_buffer.h" #include "opencl/source/sharings/d3d/d3d_sharing.h" #include "opencl/source/sharings/d3d/d3d_surface.h" #include "opencl/source/sharings/d3d/d3d_texture.h" #include "opencl/source/sharings/d3d/enable_d3d.h" #include "opencl/test/unit_test/fixtures/d3d_test_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_sharing_factory.h" #include "gtest/gtest.h" namespace NEO { TYPED_TEST_CASE_P(D3DTests); TYPED_TEST_P(D3DTests, GivenSpecificDeviceSetWhenGettingDeviceIDsFromD3DThenOnlySelectedDevicesAreReturned) { cl_device_id expectedDevice = *this->devices; cl_device_id device = 0; cl_uint numDevices = 0; auto deviceSourceParam = this->pickParam(CL_D3D10_DEVICE_KHR, CL_D3D11_DEVICE_KHR); auto deviceSetParam = this->pickParam(CL_PREFERRED_DEVICES_FOR_D3D10_KHR, CL_PREFERRED_DEVICES_FOR_D3D11_KHR); cl_int retVal = this->getDeviceIDsFromD3DApi(this->mockSharingFcns, this->pPlatform, deviceSourceParam, &this->dummyD3DBuffer, deviceSetParam, 0, &device, &numDevices); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedDevice, device); EXPECT_EQ(1u, numDevices); device = 0; numDevices = 0; deviceSetParam = this->pickParam(CL_ALL_DEVICES_FOR_D3D10_KHR, CL_ALL_DEVICES_FOR_D3D11_KHR); retVal = this->getDeviceIDsFromD3DApi(this->mockSharingFcns, this->pPlatform, deviceSourceParam, &this->dummyD3DBuffer, deviceSetParam, 0, &device, &numDevices); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedDevice, device); EXPECT_EQ(1u, numDevices); device = 0; numDevices = 0; retVal = this->getDeviceIDsFromD3DApi(this->mockSharingFcns, this->pPlatform, deviceSourceParam, &this->dummyD3DBuffer, CL_INVALID_OPERATION, 0, &device, &numDevices); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_NE(expectedDevice, device); EXPECT_EQ(0u, numDevices); } TYPED_TEST_P(D3DTests, GivenSpecificDeviceSourceWhenGettingDeviceIDsFromD3DThenOnlySelectedDevicesAreReturned) { cl_device_id expectedDevice = *this->devices; cl_device_id device = 0; cl_uint numDevices = 0; auto deviceSourceParam = this->pickParam(CL_D3D10_DEVICE_KHR, CL_D3D11_DEVICE_KHR); auto deviceSetParam = this->pickParam(CL_ALL_DEVICES_FOR_D3D10_KHR, CL_ALL_DEVICES_FOR_D3D11_KHR); cl_int retVal = this->getDeviceIDsFromD3DApi(this->mockSharingFcns, this->pPlatform, deviceSourceParam, &this->dummyD3DBuffer, deviceSetParam, 0, &device, &numDevices); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedDevice, device); EXPECT_EQ(1u, numDevices); EXPECT_EQ(1u, this->mockSharingFcns->getDxgiDescCalled); EXPECT_EQ(nullptr, this->mockSharingFcns->getDxgiDescAdapterRequested); device = 0; numDevices = 0; deviceSourceParam = this->pickParam(CL_D3D10_DXGI_ADAPTER_KHR, CL_D3D11_DXGI_ADAPTER_KHR); retVal = this->getDeviceIDsFromD3DApi(this->mockSharingFcns, this->pPlatform, deviceSourceParam, &this->dummyD3DBuffer, deviceSetParam, 0, &device, &numDevices); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedDevice, device); EXPECT_EQ(1u, numDevices); EXPECT_EQ(2u, this->mockSharingFcns->getDxgiDescCalled); EXPECT_NE(nullptr, this->mockSharingFcns->getDxgiDescAdapterRequested); device = 0; numDevices = 0; retVal = this->getDeviceIDsFromD3DApi(this->mockSharingFcns, this->pPlatform, CL_INVALID_OPERATION, &this->dummyD3DBuffer, deviceSetParam, 0, &device, &numDevices); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_NE(expectedDevice, device); EXPECT_EQ(0u, numDevices); EXPECT_EQ(2u, this->mockSharingFcns->getDxgiDescCalled); } TYPED_TEST_P(D3DTests, givenNonIntelVendorWhenGetDeviceIdIsCalledThenReturnError) { DXGI_ADAPTER_DESC desc = {{0}}; desc.VendorId = INTEL_VENDOR_ID + 1u; this->mockSharingFcns->mockDxgiDesc = desc; cl_device_id device = 0; cl_uint numDevices = 0; auto deviceSourceParam = this->pickParam(CL_D3D10_DEVICE_KHR, CL_D3D11_DEVICE_KHR); auto deviceSetParam = this->pickParam(CL_ALL_DEVICES_FOR_D3D10_KHR, CL_ALL_DEVICES_FOR_D3D11_KHR); cl_int retVal = this->getDeviceIDsFromD3DApi(this->mockSharingFcns, this->pPlatform, deviceSourceParam, nullptr, deviceSetParam, 0, &device, &numDevices); EXPECT_EQ(CL_DEVICE_NOT_FOUND, retVal); EXPECT_TRUE(0 == device); EXPECT_EQ(0u, numDevices); EXPECT_EQ(1u, this->mockSharingFcns->getDxgiDescCalled); } TYPED_TEST_P(D3DTests, WhenCreatingFromD3DBufferKhrApiThenValidBufferIsReturned) { cl_int retVal; auto memObj = this->createFromD3DBufferApi(this->context, CL_MEM_READ_WRITE, reinterpret_cast(&this->dummyD3DBuffer), &retVal); ASSERT_NE(nullptr, memObj); EXPECT_EQ(CL_SUCCESS, retVal); auto buffer = castToObject(memObj); ASSERT_NE(nullptr, buffer); ASSERT_NE(nullptr, buffer->getSharingHandler().get()); auto bufferObj = static_cast *>(buffer->getSharingHandler().get()); EXPECT_EQ(reinterpret_cast(&this->dummyD3DBuffer), *bufferObj->getResourceHandler()); EXPECT_TRUE(buffer->getFlags() == CL_MEM_READ_WRITE); clReleaseMemObject(memObj); EXPECT_EQ(1u, this->mockSharingFcns->getSharedHandleCalled); EXPECT_EQ(0u, this->mockSharingFcns->getSharedNTHandleCalled); } TYPED_TEST_P(D3DTests, givenNV12FormatAndEvenPlaneWhen2dCreatedThenSetPlaneParams) { this->mockSharingFcns->mockTexture2dDesc.Format = DXGI_FORMAT_NV12; this->mockSharingFcns->getTexture2dDescSetParams = true; this->mockSharingFcns->getTexture2dDescParamsSet.textureDesc = this->mockSharingFcns->mockTexture2dDesc; auto image = std::unique_ptr(D3DTexture::create2d(this->context, reinterpret_cast(&this->dummyD3DTexture), CL_MEM_READ_WRITE, 4, nullptr)); ASSERT_NE(nullptr, image.get()); EXPECT_EQ(GMM_PLANE_Y, image->getPlane()); auto expectedFormat = D3DTexture::findYuvSurfaceFormatInfo(DXGI_FORMAT_NV12, ImagePlane::PLANE_Y, CL_MEM_READ_WRITE); EXPECT_TRUE(memcmp(expectedFormat, &image->getSurfaceFormatInfo(), sizeof(SurfaceFormatInfo)) == 0); EXPECT_EQ(1u, mockGmmResInfo->getOffsetCalled); EXPECT_EQ(2u, mockGmmResInfo->arrayIndexPassedToGetOffset); EXPECT_EQ(1u, this->mockSharingFcns->getTexture2dDescCalled); } TYPED_TEST_P(D3DTests, givenSharedObjectFromInvalidContextWhen2dCreatedThenReturnCorrectCode) { this->mockSharingFcns->mockTexture2dDesc.Format = DXGI_FORMAT_NV12; this->mockSharingFcns->mockTexture2dDesc.MiscFlags = D3DResourceFlags::MISC_SHARED; this->mockSharingFcns->getTexture2dDescSetParams = true; this->mockSharingFcns->getTexture2dDescParamsSet.textureDesc = this->mockSharingFcns->mockTexture2dDesc; cl_int retCode = 0; mockMM.get()->verifyValue = false; auto image = std::unique_ptr(D3DTexture::create2d(this->context, reinterpret_cast(&this->dummyD3DTexture), CL_MEM_READ_WRITE, 4, &retCode)); mockMM.get()->verifyValue = true; EXPECT_EQ(nullptr, image.get()); EXPECT_EQ(retCode, CL_INVALID_D3D11_RESOURCE_KHR); EXPECT_EQ(1u, this->mockSharingFcns->getTexture2dDescCalled); } TYPED_TEST_P(D3DTests, givenSharedObjectFromInvalidContextAndNTHandleWhen2dCreatedThenReturnCorrectCode) { this->mockSharingFcns->mockTexture2dDesc.Format = DXGI_FORMAT_NV12; this->mockSharingFcns->mockTexture2dDesc.MiscFlags = D3DResourceFlags::MISC_SHARED_NTHANDLE; this->mockSharingFcns->getTexture2dDescSetParams = true; this->mockSharingFcns->getTexture2dDescParamsSet.textureDesc = this->mockSharingFcns->mockTexture2dDesc; cl_int retCode = 0; mockMM.get()->verifyValue = false; auto image = std::unique_ptr(D3DTexture::create2d(this->context, reinterpret_cast(&this->dummyD3DTexture), CL_MEM_READ_WRITE, 4, &retCode)); mockMM.get()->verifyValue = true; EXPECT_EQ(nullptr, image.get()); EXPECT_EQ(retCode, CL_INVALID_D3D11_RESOURCE_KHR); EXPECT_EQ(1u, this->mockSharingFcns->getTexture2dDescCalled); } TYPED_TEST_P(D3DTests, givenSharedObjectAndAlocationFailedWhen2dCreatedThenReturnCorrectCode) { this->mockSharingFcns->mockTexture2dDesc.Format = DXGI_FORMAT_NV12; this->mockSharingFcns->mockTexture2dDesc.MiscFlags = D3DResourceFlags::MISC_SHARED; this->mockSharingFcns->getTexture2dDescSetParams = true; this->mockSharingFcns->getTexture2dDescParamsSet.textureDesc = this->mockSharingFcns->mockTexture2dDesc; cl_int retCode = 0; mockMM.get()->failAlloc = true; auto image = std::unique_ptr(D3DTexture::create2d(this->context, reinterpret_cast(&this->dummyD3DTexture), CL_MEM_READ_WRITE, 4, &retCode)); mockMM.get()->failAlloc = false; EXPECT_EQ(nullptr, image.get()); EXPECT_EQ(retCode, CL_OUT_OF_HOST_MEMORY); EXPECT_EQ(1u, this->mockSharingFcns->getTexture2dDescCalled); } TYPED_TEST_P(D3DTests, givenSharedObjectAndNTHandleAndAllocationFailedWhen2dCreatedThenReturnCorrectCode) { this->mockSharingFcns->mockTexture2dDesc.Format = DXGI_FORMAT_NV12; this->mockSharingFcns->mockTexture2dDesc.MiscFlags = D3DResourceFlags::MISC_SHARED_NTHANDLE; this->mockSharingFcns->getTexture2dDescSetParams = true; this->mockSharingFcns->getTexture2dDescParamsSet.textureDesc = this->mockSharingFcns->mockTexture2dDesc; cl_int retCode = 0; mockMM.get()->failAlloc = true; auto image = std::unique_ptr(D3DTexture::create2d(this->context, reinterpret_cast(&this->dummyD3DTexture), CL_MEM_READ_WRITE, 4, &retCode)); mockMM.get()->failAlloc = false; EXPECT_EQ(nullptr, image.get()); EXPECT_EQ(retCode, CL_OUT_OF_HOST_MEMORY); EXPECT_EQ(1u, this->mockSharingFcns->getTexture2dDescCalled); } TYPED_TEST_P(D3DTests, givenNV12FormatAndOddPlaneWhen2dCreatedThenSetPlaneParams) { this->mockSharingFcns->mockTexture2dDesc.Format = DXGI_FORMAT_NV12; this->mockSharingFcns->getTexture2dDescSetParams = true; this->mockSharingFcns->getTexture2dDescParamsSet.textureDesc = this->mockSharingFcns->mockTexture2dDesc; auto image = std::unique_ptr(D3DTexture::create2d(this->context, reinterpret_cast(&this->dummyD3DTexture), CL_MEM_READ_WRITE, 7, nullptr)); ASSERT_NE(nullptr, image.get()); EXPECT_EQ(GMM_PLANE_U, image->getPlane()); auto expectedFormat = D3DTexture::findYuvSurfaceFormatInfo(DXGI_FORMAT_NV12, ImagePlane::PLANE_UV, CL_MEM_READ_WRITE); EXPECT_TRUE(memcmp(expectedFormat, &image->getSurfaceFormatInfo(), sizeof(SurfaceFormatInfo)) == 0); EXPECT_EQ(1u, mockGmmResInfo->getOffsetCalled); EXPECT_EQ(3u, mockGmmResInfo->arrayIndexPassedToGetOffset); EXPECT_EQ(1u, this->mockSharingFcns->getTexture2dDescCalled); } TYPED_TEST_P(D3DTests, givenP010FormatAndEvenPlaneWhen2dCreatedThenSetPlaneParams) { this->mockSharingFcns->mockTexture2dDesc.Format = DXGI_FORMAT_P010; this->mockSharingFcns->getTexture2dDescSetParams = true; this->mockSharingFcns->getTexture2dDescParamsSet.textureDesc = this->mockSharingFcns->mockTexture2dDesc; auto image = std::unique_ptr(D3DTexture::create2d(this->context, reinterpret_cast(&this->dummyD3DTexture), CL_MEM_READ_WRITE, 4, nullptr)); ASSERT_NE(nullptr, image.get()); auto expectedFormat = D3DTexture::findYuvSurfaceFormatInfo(DXGI_FORMAT_P010, ImagePlane::PLANE_Y, CL_MEM_READ_WRITE); EXPECT_TRUE(memcmp(expectedFormat, &image->getSurfaceFormatInfo(), sizeof(SurfaceFormatInfo)) == 0); EXPECT_EQ(1u, mockGmmResInfo->getOffsetCalled); EXPECT_EQ(2u, mockGmmResInfo->arrayIndexPassedToGetOffset); EXPECT_EQ(1u, this->mockSharingFcns->getTexture2dDescCalled); } TYPED_TEST_P(D3DTests, givenP010FormatAndOddPlaneWhen2dCreatedThenSetPlaneParams) { this->mockSharingFcns->mockTexture2dDesc.Format = DXGI_FORMAT_P010; this->mockSharingFcns->getTexture2dDescSetParams = true; this->mockSharingFcns->getTexture2dDescParamsSet.textureDesc = this->mockSharingFcns->mockTexture2dDesc; auto image = std::unique_ptr(D3DTexture::create2d(this->context, reinterpret_cast(&this->dummyD3DTexture), CL_MEM_READ_WRITE, 7, nullptr)); ASSERT_NE(nullptr, image.get()); auto expectedFormat = D3DTexture::findYuvSurfaceFormatInfo(DXGI_FORMAT_P010, ImagePlane::PLANE_UV, CL_MEM_READ_WRITE); EXPECT_TRUE(memcmp(expectedFormat, &image->getSurfaceFormatInfo(), sizeof(SurfaceFormatInfo)) == 0); EXPECT_EQ(1u, mockGmmResInfo->getOffsetCalled); EXPECT_EQ(3u, mockGmmResInfo->arrayIndexPassedToGetOffset); EXPECT_EQ(1u, this->mockSharingFcns->getTexture2dDescCalled); } TYPED_TEST_P(D3DTests, givenP016FormatAndEvenPlaneWhen2dCreatedThenSetPlaneParams) { this->mockSharingFcns->mockTexture2dDesc.Format = DXGI_FORMAT_P016; this->mockSharingFcns->getTexture2dDescSetParams = true; this->mockSharingFcns->getTexture2dDescParamsSet.textureDesc = this->mockSharingFcns->mockTexture2dDesc; auto image = std::unique_ptr(D3DTexture::create2d(this->context, reinterpret_cast(&this->dummyD3DTexture), CL_MEM_READ_WRITE, 4, nullptr)); ASSERT_NE(nullptr, image.get()); auto expectedFormat = D3DTexture::findYuvSurfaceFormatInfo(DXGI_FORMAT_P016, ImagePlane::PLANE_Y, CL_MEM_READ_WRITE); EXPECT_TRUE(memcmp(expectedFormat, &image->getSurfaceFormatInfo(), sizeof(SurfaceFormatInfo)) == 0); EXPECT_EQ(1u, mockGmmResInfo->getOffsetCalled); EXPECT_EQ(2u, mockGmmResInfo->arrayIndexPassedToGetOffset); EXPECT_EQ(1u, this->mockSharingFcns->getTexture2dDescCalled); } TYPED_TEST_P(D3DTests, givenP016FormatAndOddPlaneWhen2dCreatedThenSetPlaneParams) { this->mockSharingFcns->mockTexture2dDesc.Format = DXGI_FORMAT_P016; this->mockSharingFcns->getTexture2dDescSetParams = true; this->mockSharingFcns->getTexture2dDescParamsSet.textureDesc = this->mockSharingFcns->mockTexture2dDesc; auto image = std::unique_ptr(D3DTexture::create2d(this->context, reinterpret_cast(&this->dummyD3DTexture), CL_MEM_READ_WRITE, 7, nullptr)); ASSERT_NE(nullptr, image.get()); auto expectedFormat = D3DTexture::findYuvSurfaceFormatInfo(DXGI_FORMAT_P016, ImagePlane::PLANE_UV, CL_MEM_READ_WRITE); EXPECT_TRUE(memcmp(expectedFormat, &image->getSurfaceFormatInfo(), sizeof(SurfaceFormatInfo)) == 0); EXPECT_EQ(1u, mockGmmResInfo->getOffsetCalled); EXPECT_EQ(3u, mockGmmResInfo->arrayIndexPassedToGetOffset); EXPECT_EQ(1u, this->mockSharingFcns->getTexture2dDescCalled); } TYPED_TEST_P(D3DTests, WhenCreatingFromD3D2dTextureKhrApiThenValidImageIsReturned) { cl_int retVal; this->mockSharingFcns->getTexture2dDescSetParams = true; this->mockSharingFcns->getTexture2dDescParamsSet.textureDesc = this->mockSharingFcns->mockTexture2dDesc; auto memObj = this->createFromD3DTexture2DApi(this->context, CL_MEM_READ_WRITE, reinterpret_cast(&this->dummyD3DTexture), 1, &retVal); ASSERT_NE(nullptr, memObj); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, mockGmmResInfo->getOffsetCalled); EXPECT_EQ(0u, mockGmmResInfo->arrayIndexPassedToGetOffset); auto image = castToObject(memObj); ASSERT_NE(nullptr, image); ASSERT_NE(nullptr, image->getSharingHandler().get()); auto textureObj = static_cast *>(image->getSharingHandler().get()); EXPECT_EQ(reinterpret_cast(&this->dummyD3DTexture), *textureObj->getResourceHandler()); EXPECT_TRUE(image->getFlags() == CL_MEM_READ_WRITE); EXPECT_TRUE(image->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE2D); EXPECT_EQ(1u, textureObj->getSubresource()); clReleaseMemObject(memObj); EXPECT_EQ(1u, this->mockSharingFcns->createQueryCalled); EXPECT_EQ(1u, this->mockSharingFcns->getTexture2dDescCalled); EXPECT_EQ(1u, this->mockSharingFcns->getSharedHandleCalled); EXPECT_EQ(0u, this->mockSharingFcns->getSharedNTHandleCalled); } TYPED_TEST_P(D3DTests, WhenCreatingFromD3D3dTextureKhrApiThenValidImageIsReturned) { cl_int retVal; this->mockSharingFcns->getTexture3dDescSetParams = true; this->mockSharingFcns->getTexture3dDescParamsSet.textureDesc = this->mockSharingFcns->mockTexture3dDesc; auto memObj = this->createFromD3DTexture3DApi(this->context, CL_MEM_READ_WRITE, reinterpret_cast(&this->dummyD3DTexture), 1, &retVal); ASSERT_NE(nullptr, memObj); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, mockGmmResInfo->getOffsetCalled); EXPECT_EQ(0u, mockGmmResInfo->arrayIndexPassedToGetOffset); auto image = castToObject(memObj); ASSERT_NE(nullptr, image); ASSERT_NE(nullptr, image->getSharingHandler().get()); auto textureObj = static_cast *>(image->getSharingHandler().get()); EXPECT_EQ(reinterpret_cast(&this->dummyD3DTexture), *textureObj->getResourceHandler()); EXPECT_TRUE(image->getFlags() == CL_MEM_READ_WRITE); EXPECT_TRUE(image->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE3D); EXPECT_EQ(1u, textureObj->getSubresource()); clReleaseMemObject(memObj); EXPECT_EQ(1u, this->mockSharingFcns->createQueryCalled); EXPECT_EQ(1u, this->mockSharingFcns->getTexture3dDescCalled); EXPECT_EQ(1u, this->mockSharingFcns->getSharedHandleCalled); EXPECT_EQ(0u, this->mockSharingFcns->getSharedNTHandleCalled); } TYPED_TEST_P(D3DTests, givenSharedResourceFlagWhenCreateBufferThenStagingBufferEqualsPassedBuffer) { std::vector releaseExpectedParams{}; { this->mockSharingFcns->mockBufferDesc.MiscFlags = D3DResourceFlags::MISC_SHARED; this->mockSharingFcns->getBufferDescSetParams = true; this->mockSharingFcns->getBufferDescParamsSet.bufferDesc = this->mockSharingFcns->mockBufferDesc; this->mockSharingFcns->createQuerySetParams = true; this->mockSharingFcns->createQueryParamsSet.query = reinterpret_cast(1); auto buffer = std::unique_ptr(D3DBuffer::create(this->context, reinterpret_cast(&this->dummyD3DBuffer), CL_MEM_READ_WRITE, nullptr)); ASSERT_NE(nullptr, buffer.get()); auto d3dBuffer = static_cast *>(buffer->getSharingHandler().get()); ASSERT_NE(nullptr, d3dBuffer); EXPECT_NE(nullptr, d3dBuffer->getQuery()); EXPECT_TRUE(d3dBuffer->isSharedResource()); EXPECT_EQ(&this->dummyD3DBuffer, d3dBuffer->getResourceStaging()); releaseExpectedParams.push_back(reinterpret_cast(&this->dummyD3DBuffer)); releaseExpectedParams.push_back(reinterpret_cast(d3dBuffer->getQuery())); EXPECT_EQ(0u, this->mockSharingFcns->createBufferCalled); EXPECT_EQ(1u, this->mockSharingFcns->createQueryCalled); EXPECT_EQ(1u, this->mockSharingFcns->getBufferDescCalled); EXPECT_EQ(1u, this->mockSharingFcns->getSharedHandleCalled); EXPECT_EQ(1u, this->mockSharingFcns->addRefCalled); EXPECT_EQ(reinterpret_cast(&this->dummyD3DBuffer), this->mockSharingFcns->getSharedHandleParamsPassed[0].resource); EXPECT_EQ(reinterpret_cast(&this->dummyD3DBuffer), this->mockSharingFcns->addRefParamsPassed[0].resource); } EXPECT_EQ(2u, this->mockSharingFcns->releaseCalled); EXPECT_EQ(releaseExpectedParams[0], this->mockSharingFcns->releaseParamsPassed[0].resource); EXPECT_EQ(releaseExpectedParams[1], this->mockSharingFcns->releaseParamsPassed[1].resource); } TYPED_TEST_P(D3DTests, givenNonSharedResourceFlagWhenCreateBufferThenCreateNewStagingBuffer) { std::vector releaseExpectedParams{}; { this->mockSharingFcns->createBufferSetParams = true; this->mockSharingFcns->createBufferParamsSet.buffer = reinterpret_cast(&this->dummyD3DBufferStaging); this->mockSharingFcns->createQuerySetParams = true; this->mockSharingFcns->createQueryParamsSet.query = reinterpret_cast(1); auto buffer = std::unique_ptr(D3DBuffer::create(this->context, reinterpret_cast(&this->dummyD3DBuffer), CL_MEM_READ_WRITE, nullptr)); ASSERT_NE(nullptr, buffer.get()); auto d3dBuffer = static_cast *>(buffer->getSharingHandler().get()); ASSERT_NE(nullptr, d3dBuffer); EXPECT_NE(nullptr, d3dBuffer->getQuery()); EXPECT_FALSE(d3dBuffer->isSharedResource()); EXPECT_EQ(&this->dummyD3DBufferStaging, d3dBuffer->getResourceStaging()); releaseExpectedParams.push_back(reinterpret_cast(&this->dummyD3DBufferStaging)); releaseExpectedParams.push_back(reinterpret_cast(&this->dummyD3DBuffer)); releaseExpectedParams.push_back(reinterpret_cast(d3dBuffer->getQuery())); EXPECT_EQ(1u, this->mockSharingFcns->createBufferCalled); EXPECT_EQ(1u, this->mockSharingFcns->createQueryCalled); EXPECT_EQ(1u, this->mockSharingFcns->getSharedHandleCalled); EXPECT_EQ(1u, this->mockSharingFcns->addRefCalled); EXPECT_EQ(reinterpret_cast(&this->dummyD3DBufferStaging), this->mockSharingFcns->getSharedHandleParamsPassed[0].resource); EXPECT_EQ(reinterpret_cast(&this->dummyD3DBuffer), this->mockSharingFcns->addRefParamsPassed[0].resource); } EXPECT_EQ(3u, this->mockSharingFcns->releaseCalled); EXPECT_EQ(releaseExpectedParams[0], this->mockSharingFcns->releaseParamsPassed[0].resource); EXPECT_EQ(releaseExpectedParams[1], this->mockSharingFcns->releaseParamsPassed[1].resource); EXPECT_EQ(releaseExpectedParams[2], this->mockSharingFcns->releaseParamsPassed[2].resource); } TYPED_TEST_P(D3DTests, givenNonSharedResourceBufferWhenAcquiredThenCopySubregion) { this->context->setInteropUserSyncEnabled(true); this->mockSharingFcns->createBufferSetParams = true; this->mockSharingFcns->createBufferParamsSet.buffer = reinterpret_cast(&this->dummyD3DBufferStaging); auto buffer = std::unique_ptr(D3DBuffer::create(this->context, reinterpret_cast(&this->dummyD3DBuffer), CL_MEM_READ_WRITE, nullptr)); ASSERT_NE(nullptr, buffer.get()); cl_mem bufferMem = (cl_mem)buffer.get(); // acquireCount == 0, acquire EXPECT_EQ(0u, buffer->acquireCount); auto retVal = this->enqueueAcquireD3DObjectsApi(this->mockSharingFcns, this->cmdQ, 1, &bufferMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, buffer->acquireCount); // acquireCount == 1, don't acquire retVal = this->enqueueAcquireD3DObjectsApi(this->mockSharingFcns, this->cmdQ, 1, &bufferMem, 0, nullptr, nullptr); EXPECT_EQ(this->pickParam(CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR, CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR), retVal); EXPECT_EQ(1u, buffer->acquireCount); retVal = this->enqueueReleaseD3DObjectsApi(this->mockSharingFcns, this->cmdQ, 1, &bufferMem, 0, nullptr, nullptr); // acquireCount == 0 EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, buffer->acquireCount); retVal = this->enqueueReleaseD3DObjectsApi(this->mockSharingFcns, this->cmdQ, 1, &bufferMem, 0, nullptr, nullptr); EXPECT_EQ(this->pickParam(CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR, CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR), retVal); EXPECT_EQ(0u, buffer->acquireCount); EXPECT_EQ(1u, this->mockSharingFcns->createBufferCalled); EXPECT_EQ(1u, this->mockSharingFcns->getSharedHandleCalled); EXPECT_EQ(2u, this->mockSharingFcns->getDeviceContextCalled); EXPECT_EQ(2u, this->mockSharingFcns->releaseDeviceContextCalled); EXPECT_EQ(2u, this->mockSharingFcns->copySubresourceRegionCalled); EXPECT_EQ(1u, this->mockSharingFcns->flushAndWaitCalled); EXPECT_EQ(reinterpret_cast(&this->dummyD3DBufferStaging), this->mockSharingFcns->copySubresourceRegionParamsPassed[0].dst); EXPECT_EQ(0u, this->mockSharingFcns->copySubresourceRegionParamsPassed[0].dstSubresource); EXPECT_EQ(reinterpret_cast(&this->dummyD3DBuffer), this->mockSharingFcns->copySubresourceRegionParamsPassed[0].src); EXPECT_EQ(0u, this->mockSharingFcns->copySubresourceRegionParamsPassed[0].srcSubresource); EXPECT_EQ(reinterpret_cast(&this->dummyD3DBuffer), this->mockSharingFcns->copySubresourceRegionParamsPassed[1].dst); EXPECT_EQ(0u, this->mockSharingFcns->copySubresourceRegionParamsPassed[1].dstSubresource); EXPECT_EQ(reinterpret_cast(&this->dummyD3DBufferStaging), this->mockSharingFcns->copySubresourceRegionParamsPassed[1].src); EXPECT_EQ(0u, this->mockSharingFcns->copySubresourceRegionParamsPassed[1].srcSubresource); } TYPED_TEST_P(D3DTests, givenSharedResourceBufferWhenAcquiredThenDontCopySubregion) { this->context->setInteropUserSyncEnabled(true); this->mockSharingFcns->mockBufferDesc.MiscFlags = D3DResourceFlags::MISC_SHARED; this->mockSharingFcns->getBufferDescSetParams = true; this->mockSharingFcns->getBufferDescParamsSet.bufferDesc = this->mockSharingFcns->mockBufferDesc; auto buffer = std::unique_ptr(D3DBuffer::create(this->context, reinterpret_cast(&this->dummyD3DBuffer), CL_MEM_READ_WRITE, nullptr)); ASSERT_NE(nullptr, buffer.get()); cl_mem bufferMem = (cl_mem)buffer.get(); auto retVal = this->enqueueAcquireD3DObjectsApi(this->mockSharingFcns, this->cmdQ, 1, &bufferMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = this->enqueueReleaseD3DObjectsApi(this->mockSharingFcns, this->cmdQ, 1, &bufferMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, this->mockSharingFcns->getBufferDescCalled); EXPECT_EQ(1u, this->mockSharingFcns->getSharedHandleCalled); EXPECT_EQ(1u, this->mockSharingFcns->getDeviceContextCalled); EXPECT_EQ(1u, this->mockSharingFcns->releaseDeviceContextCalled); EXPECT_EQ(0u, this->mockSharingFcns->copySubresourceRegionCalled); EXPECT_EQ(0u, this->mockSharingFcns->flushAndWaitCalled); } TYPED_TEST_P(D3DTests, givenSharedResourceBufferAndInteropUserSyncDisabledWhenAcquiredThenFlushOnAcquire) { this->context->setInteropUserSyncEnabled(false); this->mockSharingFcns->mockBufferDesc.MiscFlags = D3DResourceFlags::MISC_SHARED; this->mockSharingFcns->getBufferDescSetParams = true; this->mockSharingFcns->getBufferDescParamsSet.bufferDesc = this->mockSharingFcns->mockBufferDesc; auto buffer = std::unique_ptr(D3DBuffer::create(this->context, reinterpret_cast(&this->dummyD3DBuffer), CL_MEM_READ_WRITE, nullptr)); ASSERT_NE(nullptr, buffer.get()); cl_mem bufferMem = (cl_mem)buffer.get(); auto retVal = this->enqueueAcquireD3DObjectsApi(this->mockSharingFcns, this->cmdQ, 1, &bufferMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = this->enqueueReleaseD3DObjectsApi(this->mockSharingFcns, this->cmdQ, 1, &bufferMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, this->mockSharingFcns->getBufferDescCalled); EXPECT_EQ(1u, this->mockSharingFcns->getSharedHandleCalled); EXPECT_EQ(1u, this->mockSharingFcns->getDeviceContextCalled); EXPECT_EQ(1u, this->mockSharingFcns->releaseDeviceContextCalled); EXPECT_EQ(0u, this->mockSharingFcns->copySubresourceRegionCalled); EXPECT_EQ(1u, this->mockSharingFcns->flushAndWaitCalled); } TYPED_TEST_P(D3DTests, WhenGettingPreferD3DSharedResourcesThenCorrectValueIsReturned) { auto ctx = std::unique_ptr(new MockContext()); cl_bool retBool = 0; size_t size = 0; auto param = this->pickParam(CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR, CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR); ctx->preferD3dSharedResources = 1u; auto retVal = ctx->getInfo(param, sizeof(retBool), &retBool, &size); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_bool), size); EXPECT_EQ(1u, retBool); ctx->preferD3dSharedResources = 0u; retVal = ctx->getInfo(param, sizeof(retBool), &retBool, &size); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_bool), size); EXPECT_EQ(0u, retBool); } TYPED_TEST_P(D3DTests, WhenGettingD3DResourceInfoFromMemObjThenCorrectInfoIsReturned) { auto memObj = this->createFromD3DBufferApi(this->context, CL_MEM_READ_WRITE, reinterpret_cast(&this->dummyD3DBuffer), nullptr); ASSERT_NE(nullptr, memObj); auto param = this->pickParam(CL_MEM_D3D10_RESOURCE_KHR, CL_MEM_D3D11_RESOURCE_KHR); void *retBuffer = nullptr; size_t retSize = 0; clGetMemObjectInfo(memObj, param, sizeof(D3DBufferObj), &retBuffer, &retSize); EXPECT_EQ(sizeof(D3DBufferObj), retSize); EXPECT_EQ(&this->dummyD3DBuffer, retBuffer); clReleaseMemObject(memObj); } TYPED_TEST_P(D3DTests, WhenGettingD3DSubresourceInfoFromMemObjThenCorrectInfoIsReturned) { cl_int retVal; cl_uint subresource = 1u; auto param = this->pickParam(CL_IMAGE_D3D10_SUBRESOURCE_KHR, CL_IMAGE_D3D11_SUBRESOURCE_KHR); this->mockSharingFcns->getTexture2dDescSetParams = true; this->mockSharingFcns->getTexture2dDescParamsSet.textureDesc = this->mockSharingFcns->mockTexture2dDesc; auto memObj = this->createFromD3DTexture2DApi(this->context, CL_MEM_READ_WRITE, reinterpret_cast(&this->dummyD3DTexture), subresource, &retVal); ASSERT_NE(nullptr, memObj); EXPECT_EQ(CL_SUCCESS, retVal); cl_uint retSubresource = 0; size_t retSize = 0; clGetImageInfo(memObj, param, sizeof(cl_uint), &retSubresource, &retSize); EXPECT_EQ(sizeof(cl_uint), retSize); EXPECT_EQ(subresource, retSubresource); clReleaseMemObject(memObj); EXPECT_EQ(1u, this->mockSharingFcns->getTexture2dDescCalled); } TYPED_TEST_P(D3DTests, givenTheSameD3DBufferWhenNextCreateIsCalledThenFail) { cl_int retVal; EXPECT_EQ(0u, this->mockSharingFcns->getTrackedResourcesVector()->size()); auto memObj = this->createFromD3DBufferApi(this->context, CL_MEM_READ_WRITE, reinterpret_cast(&this->dummyD3DBuffer), &retVal); ASSERT_NE(nullptr, memObj); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, this->mockSharingFcns->getTrackedResourcesVector()->size()); EXPECT_EQ(0u, this->mockSharingFcns->getTrackedResourcesVector()->at(0).second); auto memObj2 = this->createFromD3DBufferApi(this->context, CL_MEM_READ_WRITE, reinterpret_cast(&this->dummyD3DBuffer), &retVal); EXPECT_EQ(nullptr, memObj2); EXPECT_EQ(this->pickParam(CL_INVALID_D3D10_RESOURCE_KHR, CL_INVALID_D3D11_RESOURCE_KHR), retVal); EXPECT_EQ(1u, this->mockSharingFcns->getTrackedResourcesVector()->size()); clReleaseMemObject(memObj); EXPECT_EQ(0u, this->mockSharingFcns->getTrackedResourcesVector()->size()); } TYPED_TEST_P(D3DTests, givenD3DTextureWithTheSameSubresourceWhenNextCreateIsCalledThenFail) { cl_int retVal; cl_uint subresource = 1; this->mockSharingFcns->getTexture2dDescSetParams = true; this->mockSharingFcns->getTexture2dDescParamsSet.textureDesc = this->mockSharingFcns->mockTexture2dDesc; EXPECT_EQ(0u, this->mockSharingFcns->getTrackedResourcesVector()->size()); auto memObj = this->createFromD3DTexture2DApi(this->context, CL_MEM_READ_WRITE, reinterpret_cast(&this->dummyD3DTexture), subresource, &retVal); ASSERT_NE(nullptr, memObj); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, this->mockSharingFcns->getTrackedResourcesVector()->size()); auto memObj2 = this->createFromD3DTexture2DApi(this->context, CL_MEM_READ_WRITE, reinterpret_cast(&this->dummyD3DTexture), subresource, &retVal); EXPECT_EQ(nullptr, memObj2); EXPECT_EQ(this->pickParam(CL_INVALID_D3D10_RESOURCE_KHR, CL_INVALID_D3D11_RESOURCE_KHR), retVal); EXPECT_EQ(1u, this->mockSharingFcns->getTrackedResourcesVector()->size()); EXPECT_EQ(1u, this->mockSharingFcns->getTexture2dDescCalled); this->mockSharingFcns->getTexture2dDescParamsSet.textureDesc = this->mockSharingFcns->mockTexture2dDesc; subresource++; this->setupMockGmm(); // setup new mock for new resource auto memObj3 = this->createFromD3DTexture2DApi(this->context, CL_MEM_READ_WRITE, reinterpret_cast(&this->dummyD3DTexture), subresource, &retVal); ASSERT_NE(nullptr, memObj); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2u, this->mockSharingFcns->getTrackedResourcesVector()->size()); clReleaseMemObject(memObj); EXPECT_EQ(1u, this->mockSharingFcns->getTrackedResourcesVector()->size()); clReleaseMemObject(memObj3); EXPECT_EQ(0u, this->mockSharingFcns->getTrackedResourcesVector()->size()); EXPECT_EQ(2u, this->mockSharingFcns->getTexture2dDescCalled); } TYPED_TEST_P(D3DTests, givenInvalidSubresourceWhenCreateTexture2dIsCalledThenFail) { cl_int retVal; this->mockSharingFcns->mockTexture2dDesc.ArraySize = 4; this->mockSharingFcns->mockTexture2dDesc.MipLevels = 4; cl_uint subresource = 16; this->mockSharingFcns->getTexture2dDescSetParams = true; this->mockSharingFcns->getTexture2dDescParamsSet.textureDesc = this->mockSharingFcns->mockTexture2dDesc; auto memObj = this->createFromD3DTexture2DApi(this->context, CL_MEM_READ_WRITE, reinterpret_cast(&this->dummyD3DTexture), subresource, &retVal); EXPECT_EQ(nullptr, memObj); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(1u, this->mockSharingFcns->getTexture2dDescCalled); this->mockSharingFcns->getTexture2dDescParamsSet.textureDesc = this->mockSharingFcns->mockTexture2dDesc; subresource = 20; memObj = this->createFromD3DTexture2DApi(this->context, CL_MEM_READ_WRITE, reinterpret_cast(&this->dummyD3DTexture), subresource, &retVal); EXPECT_EQ(nullptr, memObj); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(2u, this->mockSharingFcns->getTexture2dDescCalled); } TYPED_TEST_P(D3DTests, givenInvalidSubresourceWhenCreateTexture3dIsCalledThenFail) { cl_int retVal; this->mockSharingFcns->mockTexture3dDesc.MipLevels = 4; cl_uint subresource = 16; this->mockSharingFcns->getTexture3dDescSetParams = true; this->mockSharingFcns->getTexture3dDescParamsSet.textureDesc = this->mockSharingFcns->mockTexture3dDesc; auto memObj = this->createFromD3DTexture3DApi(this->context, CL_MEM_READ_WRITE, reinterpret_cast(&this->dummyD3DTexture), subresource, &retVal); EXPECT_EQ(nullptr, memObj); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(1u, this->mockSharingFcns->getTexture3dDescCalled); this->mockSharingFcns->getTexture3dDescParamsSet.textureDesc = this->mockSharingFcns->mockTexture3dDesc; subresource = 20; memObj = this->createFromD3DTexture3DApi(this->context, CL_MEM_READ_WRITE, reinterpret_cast(&this->dummyD3DTexture), subresource, &retVal); EXPECT_EQ(nullptr, memObj); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(2u, this->mockSharingFcns->getTexture3dDescCalled); } TYPED_TEST_P(D3DTests, givenPackedFormatWhenLookingForSurfaceFormatWithPackedNotSupportedThenReturnNull) { EXPECT_GT(SurfaceFormats::packed().size(), 0u); for (auto &format : SurfaceFormats::packed()) { auto surfaceFormat = D3DSharing::findSurfaceFormatInfo(format.surfaceFormat.GMMSurfaceFormat, CL_MEM_READ_ONLY, false /* supportsOcl20Features */, false /* packedSupported */); ASSERT_EQ(nullptr, surfaceFormat); } } TYPED_TEST_P(D3DTests, givenPackedFormatWhenLookingForSurfaceFormatWithPackedSupportedThenReturnValidFormat) { EXPECT_GT(SurfaceFormats::packed().size(), 0u); uint32_t counter = 0; for (auto &format : SurfaceFormats::packed()) { auto surfaceFormat = D3DSharing::findSurfaceFormatInfo(format.surfaceFormat.GMMSurfaceFormat, CL_MEM_READ_ONLY, false /* supportsOcl20Features */, true /* packedSupported */); ASSERT_NE(nullptr, surfaceFormat); counter++; EXPECT_EQ(&format, surfaceFormat); } EXPECT_NE(counter, 0U); } TYPED_TEST_P(D3DTests, givenReadonlyFormatWhenLookingForSurfaceFormatThenReturnValidFormat) { EXPECT_GT(SurfaceFormats::readOnly12().size(), 0u); for (auto &format : SurfaceFormats::readOnly12()) { // only RGBA, BGRA, RG, R allowed for D3D if (format.OCLImageFormat.image_channel_order == CL_RGBA || format.OCLImageFormat.image_channel_order == CL_BGRA || format.OCLImageFormat.image_channel_order == CL_RG || format.OCLImageFormat.image_channel_order == CL_R) { auto surfaceFormat = D3DSharing::findSurfaceFormatInfo(format.surfaceFormat.GMMSurfaceFormat, CL_MEM_READ_ONLY, false /* supportsOcl20Features */, true); ASSERT_NE(nullptr, surfaceFormat); EXPECT_EQ(&format, surfaceFormat); } } } TYPED_TEST_P(D3DTests, givenWriteOnlyFormatWhenLookingForSurfaceFormatThenReturnValidFormat) { EXPECT_GT(SurfaceFormats::writeOnly().size(), 0u); for (auto &format : SurfaceFormats::writeOnly()) { // only RGBA, BGRA, RG, R allowed for D3D if (format.OCLImageFormat.image_channel_order == CL_RGBA || format.OCLImageFormat.image_channel_order == CL_BGRA || format.OCLImageFormat.image_channel_order == CL_RG || format.OCLImageFormat.image_channel_order == CL_R) { auto surfaceFormat = D3DSharing::findSurfaceFormatInfo(format.surfaceFormat.GMMSurfaceFormat, CL_MEM_WRITE_ONLY, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features, true); ASSERT_NE(nullptr, surfaceFormat); EXPECT_EQ(&format, surfaceFormat); } } } TYPED_TEST_P(D3DTests, givenReadWriteFormatWhenLookingForSurfaceFormatThenReturnValidFormat) { EXPECT_GT(SurfaceFormats::readWrite().size(), 0u); for (auto &format : SurfaceFormats::readWrite()) { // only RGBA, BGRA, RG, R allowed for D3D if (format.OCLImageFormat.image_channel_order == CL_RGBA || format.OCLImageFormat.image_channel_order == CL_BGRA || format.OCLImageFormat.image_channel_order == CL_RG || format.OCLImageFormat.image_channel_order == CL_R) { auto surfaceFormat = D3DSharing::findSurfaceFormatInfo(format.surfaceFormat.GMMSurfaceFormat, CL_MEM_READ_WRITE, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features, true); ASSERT_NE(nullptr, surfaceFormat); EXPECT_EQ(&format, surfaceFormat); } } } REGISTER_TYPED_TEST_CASE_P(D3DTests, GivenSpecificDeviceSetWhenGettingDeviceIDsFromD3DThenOnlySelectedDevicesAreReturned, GivenSpecificDeviceSourceWhenGettingDeviceIDsFromD3DThenOnlySelectedDevicesAreReturned, givenNonIntelVendorWhenGetDeviceIdIsCalledThenReturnError, WhenCreatingFromD3DBufferKhrApiThenValidBufferIsReturned, WhenCreatingFromD3D2dTextureKhrApiThenValidImageIsReturned, WhenCreatingFromD3D3dTextureKhrApiThenValidImageIsReturned, givenSharedResourceFlagWhenCreateBufferThenStagingBufferEqualsPassedBuffer, givenNonSharedResourceFlagWhenCreateBufferThenCreateNewStagingBuffer, givenNonSharedResourceBufferWhenAcquiredThenCopySubregion, givenSharedResourceBufferWhenAcquiredThenDontCopySubregion, givenSharedResourceBufferAndInteropUserSyncDisabledWhenAcquiredThenFlushOnAcquire, WhenGettingPreferD3DSharedResourcesThenCorrectValueIsReturned, WhenGettingD3DResourceInfoFromMemObjThenCorrectInfoIsReturned, WhenGettingD3DSubresourceInfoFromMemObjThenCorrectInfoIsReturned, givenTheSameD3DBufferWhenNextCreateIsCalledThenFail, givenD3DTextureWithTheSameSubresourceWhenNextCreateIsCalledThenFail, givenInvalidSubresourceWhenCreateTexture2dIsCalledThenFail, givenInvalidSubresourceWhenCreateTexture3dIsCalledThenFail, givenReadonlyFormatWhenLookingForSurfaceFormatThenReturnValidFormat, givenWriteOnlyFormatWhenLookingForSurfaceFormatThenReturnValidFormat, givenReadWriteFormatWhenLookingForSurfaceFormatThenReturnValidFormat, givenNV12FormatAndEvenPlaneWhen2dCreatedThenSetPlaneParams, givenSharedObjectFromInvalidContextWhen2dCreatedThenReturnCorrectCode, givenSharedObjectFromInvalidContextAndNTHandleWhen2dCreatedThenReturnCorrectCode, givenSharedObjectAndAlocationFailedWhen2dCreatedThenReturnCorrectCode, givenSharedObjectAndNTHandleAndAllocationFailedWhen2dCreatedThenReturnCorrectCode, givenP010FormatAndEvenPlaneWhen2dCreatedThenSetPlaneParams, givenP016FormatAndEvenPlaneWhen2dCreatedThenSetPlaneParams, givenNV12FormatAndOddPlaneWhen2dCreatedThenSetPlaneParams, givenP010FormatAndOddPlaneWhen2dCreatedThenSetPlaneParams, givenP016FormatAndOddPlaneWhen2dCreatedThenSetPlaneParams, givenPackedFormatWhenLookingForSurfaceFormatWithPackedNotSupportedThenReturnNull, givenPackedFormatWhenLookingForSurfaceFormatWithPackedSupportedThenReturnValidFormat); INSTANTIATE_TYPED_TEST_CASE_P(D3DSharingTests, D3DTests, D3DTypes); TEST(D3DSurfaceTest, givenD3DSurfaceWhenInvalidMemObjectIsPassedToValidateUpdateDataThenInvalidMemObjectErrorIsReturned) { class MockD3DSurface : public D3DSurface { public: MockD3DSurface(Context *context, cl_dx9_surface_info_khr *surfaceInfo, D3DTypesHelper::D3D9::D3DTexture2d *surfaceStaging, cl_uint plane, ImagePlane imagePlane, cl_dx9_media_adapter_type_khr adapterType, bool sharedResource, bool lockable) : D3DSurface(context, surfaceInfo, surfaceStaging, plane, imagePlane, adapterType, sharedResource, lockable) {} }; MockContext context; cl_dx9_surface_info_khr surfaceInfo = {}; ImagePlane imagePlane = ImagePlane::NO_PLANE; std::unique_ptr surface(new MockD3DSurface(&context, &surfaceInfo, nullptr, 0, imagePlane, 0, false, false)); MockBuffer buffer; UpdateData updateData{context.getDevice(0)->getRootDeviceIndex()}; updateData.memObject = &buffer; auto result = surface->validateUpdateData(updateData); EXPECT_EQ(CL_INVALID_MEM_OBJECT, result); } TEST(D3D9, givenD3D9BuilderAndExtensionEnableTrueWhenGettingExtensionsThenCorrectExtensionsListIsReturned) { auto builderFactory = std::make_unique>(); builderFactory.get()->extensionEnabled = true; EXPECT_TRUE(hasSubstr(builderFactory->getExtensions(nullptr), std::string("cl_intel_dx9_media_sharing"))); EXPECT_TRUE(hasSubstr(builderFactory->getExtensions(nullptr), std::string("cl_khr_dx9_media_sharing"))); } TEST(D3D9, givenD3D9BuilderAndExtensionEnableFalseWhenGettingExtensionsThenDx9MediaSheringExtensionsAreNotReturned) { auto builderFactory = std::make_unique>(); builderFactory.get()->extensionEnabled = false; EXPECT_FALSE(hasSubstr(builderFactory->getExtensions(nullptr), std::string("cl_intel_dx9_media_sharing"))); EXPECT_FALSE(hasSubstr(builderFactory->getExtensions(nullptr), std::string("cl_khr_dx9_media_sharing"))); } TEST(D3D10, givenD3D10BuilderWhenGettingExtensionsThenCorrectExtensionsListIsReturned) { auto builderFactory = std::make_unique>(); EXPECT_TRUE(hasSubstr(builderFactory->getExtensions(nullptr), std::string("cl_khr_d3d10_sharing"))); } TEST(D3D11, givenD3D11BuilderWhenGettingExtensionsThenCorrectExtensionsListIsReturned) { auto builderFactory = std::make_unique>(); EXPECT_TRUE(hasSubstr(builderFactory->getExtensions(nullptr), std::string("cl_khr_d3d11_sharing"))); EXPECT_TRUE(hasSubstr(builderFactory->getExtensions(nullptr), std::string("cl_intel_d3d11_nv12_media_sharing"))); } TEST(D3DSharingFactory, givenEnabledFormatQueryAndFactoryWithD3DSharingsWhenGettingExtensionFunctionAddressThenFormatQueryFunctionsAreReturned) { DebugManagerStateRestore restorer; DebugManager.flags.EnableFormatQuery.set(true); SharingFactoryMock sharingFactory; auto function = sharingFactory.getExtensionFunctionAddress("clGetSupportedDX9MediaSurfaceFormatsINTEL"); EXPECT_EQ(reinterpret_cast(clGetSupportedDX9MediaSurfaceFormatsINTEL), function); function = sharingFactory.getExtensionFunctionAddress("clGetSupportedD3D10TextureFormatsINTEL"); EXPECT_EQ(reinterpret_cast(clGetSupportedD3D10TextureFormatsINTEL), function); function = sharingFactory.getExtensionFunctionAddress("clGetSupportedD3D11TextureFormatsINTEL"); EXPECT_EQ(reinterpret_cast(clGetSupportedD3D11TextureFormatsINTEL), function); } TEST(D3D9SharingFactory, givenDriverInfoWhenVerifyExtensionSupportThenExtensionEnableIsSetCorrect) { class MockDriverInfo : public DriverInfo { public: bool getMediaSharingSupport() override { return support; }; bool support = true; }; class MockSharingFactory : public SharingFactory { public: MockSharingFactory() { memcpy_s(savedState, sizeof(savedState), sharingContextBuilder, sizeof(sharingContextBuilder)); } ~MockSharingFactory() { memcpy_s(sharingContextBuilder, sizeof(sharingContextBuilder), savedState, sizeof(savedState)); } void prepare() { for (auto &builder : sharingContextBuilder) { builder = nullptr; } d3d9SharingBuilderFactory = std::make_unique>(); sharingContextBuilder[SharingType::D3D9_SHARING] = d3d9SharingBuilderFactory.get(); } using SharingFactory::sharingContextBuilder; std::unique_ptr> d3d9SharingBuilderFactory; decltype(SharingFactory::sharingContextBuilder) savedState; }; auto driverInfo = std::make_unique(); auto mockSharingFactory = std::make_unique(); mockSharingFactory->prepare(); driverInfo->support = true; mockSharingFactory->verifyExtensionSupport(driverInfo.get()); EXPECT_TRUE(mockSharingFactory->d3d9SharingBuilderFactory->extensionEnabled); driverInfo->support = false; mockSharingFactory->verifyExtensionSupport(driverInfo.get()); EXPECT_FALSE(mockSharingFactory->d3d9SharingBuilderFactory->extensionEnabled); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/d3d_sharing/d3d_tests_part2.cpp000066400000000000000000001135721422164147700275350ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/os_agnostic_memory_manager.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/utilities/arrayref.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_wddm.h" #include "opencl/source/api/api.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/platform/platform.h" #include "opencl/source/sharings/d3d/cl_d3d_api.h" #include "opencl/source/sharings/d3d/d3d_buffer.h" #include "opencl/source/sharings/d3d/d3d_sharing.h" #include "opencl/source/sharings/d3d/d3d_surface.h" #include "opencl/source/sharings/d3d/d3d_texture.h" #include "opencl/test/unit_test/fixtures/d3d_test_fixture.h" #include "gtest/gtest.h" namespace NEO { TYPED_TEST_CASE_P(D3DTests); TYPED_TEST_P(D3DTests, givenSharedResourceBufferAndInteropUserSyncEnabledWhenReleaseIsCalledThenDontDoExplicitFinish) { this->context->setInteropUserSyncEnabled(true); this->mockSharingFcns->mockBufferDesc.MiscFlags = D3DResourceFlags::MISC_SHARED; this->mockSharingFcns->getBufferDescSetParams = true; this->mockSharingFcns->getBufferDescParamsSet.bufferDesc = this->mockSharingFcns->mockBufferDesc; class MockCmdQ : public MockCommandQueue { public: MockCmdQ(Context *context, ClDevice *device, const cl_queue_properties *properties) : MockCommandQueue(context, device, properties, false){}; cl_int finish() override { finishCalled++; return CL_SUCCESS; } uint32_t finishCalled = 0; }; auto mockCmdQ = std::unique_ptr(new MockCmdQ(this->context, this->context->getDevice(0), 0)); auto buffer = std::unique_ptr(D3DBuffer::create(this->context, (D3DBufferObj *)&this->dummyD3DBuffer, CL_MEM_READ_WRITE, nullptr)); ASSERT_NE(nullptr, buffer.get()); cl_mem bufferMem = (cl_mem)buffer.get(); auto retVal = this->enqueueAcquireD3DObjectsApi(this->mockSharingFcns, mockCmdQ.get(), 1, &bufferMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, mockCmdQ->finishCalled); retVal = this->enqueueReleaseD3DObjectsApi(this->mockSharingFcns, mockCmdQ.get(), 1, &bufferMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, mockCmdQ->finishCalled); EXPECT_EQ(1u, this->mockSharingFcns->getBufferDescCalled); } TYPED_TEST_P(D3DTests, givenNonSharedResourceBufferAndInteropUserSyncDisabledWhenReleaseIsCalledThenDoExplicitFinishTwice) { this->context->setInteropUserSyncEnabled(false); class MockCmdQ : public MockCommandQueue { public: MockCmdQ(Context *context, ClDevice *device, const cl_queue_properties *properties) : MockCommandQueue(context, device, properties, false){}; cl_int finish() override { finishCalled++; return CL_SUCCESS; } uint32_t finishCalled = 0; }; auto mockCmdQ = std::unique_ptr(new MockCmdQ(this->context, this->context->getDevice(0), 0)); auto buffer = std::unique_ptr(D3DBuffer::create(this->context, (D3DBufferObj *)&this->dummyD3DBuffer, CL_MEM_READ_WRITE, nullptr)); ASSERT_NE(nullptr, buffer.get()); cl_mem bufferMem = (cl_mem)buffer.get(); auto retVal = this->enqueueAcquireD3DObjectsApi(this->mockSharingFcns, mockCmdQ.get(), 1, &bufferMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, mockCmdQ->finishCalled); retVal = this->enqueueReleaseD3DObjectsApi(this->mockSharingFcns, mockCmdQ.get(), 1, &bufferMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2u, mockCmdQ->finishCalled); } TYPED_TEST_P(D3DTests, givenSharedResourceBufferAndInteropUserSyncDisabledWhenReleaseIsCalledThenDoExplicitFinishOnce) { this->context->setInteropUserSyncEnabled(false); this->mockSharingFcns->mockBufferDesc.MiscFlags = D3DResourceFlags::MISC_SHARED; this->mockSharingFcns->getBufferDescSetParams = true; this->mockSharingFcns->getBufferDescParamsSet.bufferDesc = this->mockSharingFcns->mockBufferDesc; class MockCmdQ : public MockCommandQueue { public: MockCmdQ(Context *context, ClDevice *device, const cl_queue_properties *properties) : MockCommandQueue(context, device, properties, false){}; cl_int finish() override { finishCalled++; return CL_SUCCESS; } uint32_t finishCalled = 0; }; auto mockCmdQ = std::unique_ptr(new MockCmdQ(this->context, this->context->getDevice(0), 0)); auto buffer = std::unique_ptr(D3DBuffer::create(this->context, (D3DBufferObj *)&this->dummyD3DBuffer, CL_MEM_READ_WRITE, nullptr)); ASSERT_NE(nullptr, buffer.get()); cl_mem bufferMem = (cl_mem)buffer.get(); auto retVal = this->enqueueAcquireD3DObjectsApi(this->mockSharingFcns, mockCmdQ.get(), 1, &bufferMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, mockCmdQ->finishCalled); retVal = this->enqueueReleaseD3DObjectsApi(this->mockSharingFcns, mockCmdQ.get(), 1, &bufferMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, mockCmdQ->finishCalled); EXPECT_EQ(1u, this->mockSharingFcns->getBufferDescCalled); } TYPED_TEST_P(D3DTests, givenNonSharedResourceBufferAndInteropUserSyncEnabledWhenReleaseIsCalledThenDoExplicitFinishOnce) { this->context->setInteropUserSyncEnabled(true); class MockCmdQ : public MockCommandQueue { public: MockCmdQ(Context *context, ClDevice *device, const cl_queue_properties *properties) : MockCommandQueue(context, device, properties, false){}; cl_int finish() override { finishCalled++; return CL_SUCCESS; } uint32_t finishCalled = 0; }; auto mockCmdQ = std::unique_ptr(new MockCmdQ(this->context, this->context->getDevice(0), 0)); auto buffer = std::unique_ptr(D3DBuffer::create(this->context, (D3DBufferObj *)&this->dummyD3DBuffer, CL_MEM_READ_WRITE, nullptr)); ASSERT_NE(nullptr, buffer.get()); cl_mem bufferMem = (cl_mem)buffer.get(); auto retVal = this->enqueueAcquireD3DObjectsApi(this->mockSharingFcns, mockCmdQ.get(), 1, &bufferMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, mockCmdQ->finishCalled); retVal = this->enqueueReleaseD3DObjectsApi(this->mockSharingFcns, mockCmdQ.get(), 1, &bufferMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, mockCmdQ->finishCalled); } TYPED_TEST_P(D3DTests, givenSharedResourceFlagWhenCreate2dTextureThenStagingTextureEqualsPassedTexture) { std::vector releaseExpectedParams{}; { this->mockSharingFcns->mockTexture2dDesc.MiscFlags = D3DResourceFlags::MISC_SHARED; this->mockSharingFcns->mockTexture2dDesc.ArraySize = 4; this->mockSharingFcns->mockTexture2dDesc.MipLevels = 4; this->mockSharingFcns->getTexture2dDescSetParams = true; this->mockSharingFcns->getTexture2dDescParamsSet.textureDesc = this->mockSharingFcns->mockTexture2dDesc; auto image = std::unique_ptr(D3DTexture::create2d(this->context, reinterpret_cast(&this->dummyD3DTexture), CL_MEM_READ_WRITE, 4, nullptr)); ASSERT_NE(nullptr, image.get()); auto d3dTexture = static_cast *>(image->getSharingHandler().get()); ASSERT_NE(nullptr, d3dTexture); EXPECT_TRUE(d3dTexture->isSharedResource()); EXPECT_EQ(&this->dummyD3DTexture, d3dTexture->getResourceStaging()); releaseExpectedParams.push_back(reinterpret_cast(&this->dummyD3DTexture)); releaseExpectedParams.push_back(reinterpret_cast(d3dTexture->getQuery())); EXPECT_EQ(0u, this->mockSharingFcns->createTexture2dCalled); EXPECT_EQ(1u, this->mockSharingFcns->getTexture2dDescCalled); EXPECT_EQ(1u, this->mockSharingFcns->getSharedHandleCalled); EXPECT_EQ(1u, this->mockSharingFcns->addRefCalled); EXPECT_EQ(reinterpret_cast(&this->dummyD3DTexture), this->mockSharingFcns->getSharedHandleParamsPassed[0].resource); EXPECT_EQ(reinterpret_cast(&this->dummyD3DTexture), this->mockSharingFcns->addRefParamsPassed[0].resource); } EXPECT_EQ(2u, this->mockSharingFcns->releaseCalled); EXPECT_EQ(releaseExpectedParams[0], this->mockSharingFcns->releaseParamsPassed[0].resource); EXPECT_EQ(releaseExpectedParams[1], this->mockSharingFcns->releaseParamsPassed[1].resource); } TYPED_TEST_P(D3DTests, givenNonSharedResourceFlagWhenCreate2dTextureThenCreateStagingTexture) { std::vector releaseExpectedParams{}; { this->mockSharingFcns->mockTexture2dDesc.MiscFlags = 0; this->mockSharingFcns->getTexture2dDescSetParams = true; this->mockSharingFcns->getTexture2dDescParamsSet.textureDesc = this->mockSharingFcns->mockTexture2dDesc; mockSharingFcns->createTexture2dSetParams = true; mockSharingFcns->createTexture2dParamsSet.texture = reinterpret_cast(&this->dummyD3DTextureStaging); auto image = std::unique_ptr(D3DTexture::create2d(this->context, reinterpret_cast(&this->dummyD3DTexture), CL_MEM_READ_WRITE, 1, nullptr)); ASSERT_NE(nullptr, image.get()); auto d3dTexture = static_cast *>(image->getSharingHandler().get()); ASSERT_NE(nullptr, d3dTexture); EXPECT_FALSE(d3dTexture->isSharedResource()); EXPECT_EQ(&this->dummyD3DTextureStaging, d3dTexture->getResourceStaging()); releaseExpectedParams.push_back(reinterpret_cast(&this->dummyD3DTextureStaging)); releaseExpectedParams.push_back(reinterpret_cast(&this->dummyD3DTexture)); releaseExpectedParams.push_back(reinterpret_cast(d3dTexture->getQuery())); EXPECT_EQ(1u, this->mockSharingFcns->createTexture2dCalled); EXPECT_EQ(1u, this->mockSharingFcns->getTexture2dDescCalled); EXPECT_EQ(1u, this->mockSharingFcns->getSharedHandleCalled); EXPECT_EQ(1u, this->mockSharingFcns->addRefCalled); EXPECT_EQ(reinterpret_cast(&this->dummyD3DTextureStaging), this->mockSharingFcns->getSharedHandleParamsPassed[0].resource); EXPECT_EQ(reinterpret_cast(&this->dummyD3DTexture), this->mockSharingFcns->addRefParamsPassed[0].resource); } EXPECT_EQ(3u, this->mockSharingFcns->releaseCalled); EXPECT_EQ(releaseExpectedParams[0], this->mockSharingFcns->releaseParamsPassed[0].resource); EXPECT_EQ(releaseExpectedParams[1], this->mockSharingFcns->releaseParamsPassed[1].resource); EXPECT_EQ(releaseExpectedParams[2], this->mockSharingFcns->releaseParamsPassed[2].resource); } TYPED_TEST_P(D3DTests, givenSharedResourceFlagWhenCreate3dTextureThenStagingTextureEqualsPassedTexture) { std::vector releaseExpectedParams{}; { this->mockSharingFcns->mockTexture3dDesc.MiscFlags = D3DResourceFlags::MISC_SHARED; this->mockSharingFcns->mockTexture3dDesc.MipLevels = 4; this->mockSharingFcns->getTexture3dDescSetParams = true; this->mockSharingFcns->getTexture3dDescParamsSet.textureDesc = this->mockSharingFcns->mockTexture3dDesc; auto image = std::unique_ptr(D3DTexture::create3d(this->context, reinterpret_cast(&this->dummyD3DTexture), CL_MEM_READ_WRITE, 0, nullptr)); ASSERT_NE(nullptr, image.get()); auto d3dTexture = static_cast *>(image->getSharingHandler().get()); ASSERT_NE(nullptr, d3dTexture); EXPECT_TRUE(d3dTexture->isSharedResource()); EXPECT_EQ(&this->dummyD3DTexture, d3dTexture->getResourceStaging()); releaseExpectedParams.push_back(reinterpret_cast(&this->dummyD3DTexture)); releaseExpectedParams.push_back(reinterpret_cast(d3dTexture->getQuery())); EXPECT_EQ(0u, this->mockSharingFcns->createTexture3dCalled); EXPECT_EQ(1u, this->mockSharingFcns->getTexture3dDescCalled); EXPECT_EQ(1u, this->mockSharingFcns->getSharedHandleCalled); EXPECT_EQ(1u, this->mockSharingFcns->addRefCalled); EXPECT_EQ(reinterpret_cast(&this->dummyD3DTexture), this->mockSharingFcns->getSharedHandleParamsPassed[0].resource); EXPECT_EQ(reinterpret_cast(&this->dummyD3DTexture), this->mockSharingFcns->addRefParamsPassed[0].resource); } EXPECT_EQ(2u, this->mockSharingFcns->releaseCalled); EXPECT_EQ(releaseExpectedParams[0], this->mockSharingFcns->releaseParamsPassed[0].resource); EXPECT_EQ(releaseExpectedParams[1], this->mockSharingFcns->releaseParamsPassed[1].resource); } TYPED_TEST_P(D3DTests, givenNonSharedResourceFlagWhenCreate3dTextureThenCreateStagingTexture) { std::vector releaseExpectedParams{}; { this->mockSharingFcns->mockTexture3dDesc.MiscFlags = 0; this->mockSharingFcns->getTexture3dDescSetParams = true; this->mockSharingFcns->getTexture3dDescParamsSet.textureDesc = this->mockSharingFcns->mockTexture3dDesc; this->mockSharingFcns->createTexture3dSetParams = true; this->mockSharingFcns->createTexture3dParamsSet.texture = reinterpret_cast(&this->dummyD3DTextureStaging); auto image = std::unique_ptr(D3DTexture::create3d(this->context, reinterpret_cast(&this->dummyD3DTexture), CL_MEM_READ_WRITE, 1, nullptr)); ASSERT_NE(nullptr, image.get()); auto d3dTexture = static_cast *>(image->getSharingHandler().get()); ASSERT_NE(nullptr, d3dTexture); EXPECT_FALSE(d3dTexture->isSharedResource()); EXPECT_EQ(&this->dummyD3DTextureStaging, d3dTexture->getResourceStaging()); releaseExpectedParams.push_back(reinterpret_cast(&this->dummyD3DTextureStaging)); releaseExpectedParams.push_back(reinterpret_cast(&this->dummyD3DTexture)); releaseExpectedParams.push_back(reinterpret_cast(d3dTexture->getQuery())); EXPECT_EQ(1u, this->mockSharingFcns->createTexture3dCalled); EXPECT_EQ(1u, this->mockSharingFcns->getTexture3dDescCalled); EXPECT_EQ(1u, this->mockSharingFcns->getSharedHandleCalled); EXPECT_EQ(1u, this->mockSharingFcns->addRefCalled); EXPECT_EQ(reinterpret_cast(&this->dummyD3DTextureStaging), this->mockSharingFcns->getSharedHandleParamsPassed[0].resource); EXPECT_EQ(reinterpret_cast(&this->dummyD3DTexture), this->mockSharingFcns->addRefParamsPassed[0].resource); } EXPECT_EQ(3u, this->mockSharingFcns->releaseCalled); EXPECT_EQ(releaseExpectedParams[0], this->mockSharingFcns->releaseParamsPassed[0].resource); EXPECT_EQ(releaseExpectedParams[1], this->mockSharingFcns->releaseParamsPassed[1].resource); EXPECT_EQ(releaseExpectedParams[2], this->mockSharingFcns->releaseParamsPassed[2].resource); } TYPED_TEST_P(D3DTests, givenD3DDeviceParamWhenContextCreationThenSetProperValues) { cl_device_id deviceID = this->context->getDevice(0); cl_platform_id pid[1] = {this->pPlatform}; auto param = this->pickParam(CL_CONTEXT_D3D10_DEVICE_KHR, CL_CONTEXT_D3D11_DEVICE_KHR); cl_context_properties validProperties[5] = {CL_CONTEXT_PLATFORM, (cl_context_properties)pid[0], param, 0, 0}; cl_int retVal = CL_SUCCESS; auto ctx = std::unique_ptr(Context::create(validProperties, ClDeviceVector(&deviceID, 1), nullptr, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, ctx.get()); EXPECT_EQ(1u, ctx->preferD3dSharedResources); EXPECT_NE(nullptr, ctx->getSharing>()); } TYPED_TEST_P(D3DTests, givenSharedNtHandleFlagWhenCreate2dTextureThenGetNtHandle) { this->mockSharingFcns->mockTexture2dDesc.MiscFlags = D3DResourceFlags::MISC_SHARED_NTHANDLE; this->mockSharingFcns->getTexture2dDescSetParams = true; this->mockSharingFcns->getTexture2dDescParamsSet.textureDesc = this->mockSharingFcns->mockTexture2dDesc; mockSharingFcns->createTexture2dSetParams = true; mockSharingFcns->createTexture2dParamsSet.texture = reinterpret_cast(&this->dummyD3DTextureStaging); auto image = std::unique_ptr(D3DTexture::create2d(this->context, reinterpret_cast(&this->dummyD3DTexture), CL_MEM_READ_WRITE, 1, nullptr)); ASSERT_NE(nullptr, image.get()); auto d3dTexture = static_cast *>(image->getSharingHandler().get()); ASSERT_NE(nullptr, d3dTexture); EXPECT_EQ(1u, this->mockSharingFcns->createTexture2dCalled); EXPECT_EQ(1u, this->mockSharingFcns->getTexture2dDescCalled); EXPECT_EQ(0u, this->mockSharingFcns->getSharedHandleCalled); EXPECT_EQ(1u, this->mockSharingFcns->getSharedNTHandleCalled); } TYPED_TEST_P(D3DTests, givenSharedNtHandleFlagWhenCreate3dTextureThenGetNtHandle) { this->mockSharingFcns->mockTexture3dDesc.MiscFlags = D3DResourceFlags::MISC_SHARED_NTHANDLE; this->mockSharingFcns->getTexture3dDescSetParams = true; this->mockSharingFcns->getTexture3dDescParamsSet.textureDesc = this->mockSharingFcns->mockTexture3dDesc; this->mockSharingFcns->createTexture3dSetParams = true; this->mockSharingFcns->createTexture3dParamsSet.texture = reinterpret_cast(&this->dummyD3DTextureStaging); auto image = std::unique_ptr(D3DTexture::create3d(this->context, reinterpret_cast(&this->dummyD3DTexture), CL_MEM_READ_WRITE, 1, nullptr)); ASSERT_NE(nullptr, image.get()); auto d3dTexture = static_cast *>(image->getSharingHandler().get()); ASSERT_NE(nullptr, d3dTexture); EXPECT_EQ(1u, this->mockSharingFcns->createTexture3dCalled); EXPECT_EQ(1u, this->mockSharingFcns->getTexture3dDescCalled); EXPECT_EQ(0u, this->mockSharingFcns->getSharedHandleCalled); EXPECT_EQ(1u, this->mockSharingFcns->getSharedNTHandleCalled); } TYPED_TEST_P(D3DTests, WhenFillingBufferDescThenBufferContentIsCorrect) { D3DBufferDesc requestedDesc = {}; D3DBufferDesc expectedDesc = {}; expectedDesc.ByteWidth = 10; expectedDesc.MiscFlags = D3DResourceFlags::MISC_SHARED; this->mockSharingFcns->fillCreateBufferDesc(requestedDesc, 10); EXPECT_TRUE(memcmp(&requestedDesc, &expectedDesc, sizeof(D3DBufferDesc)) == 0); } TYPED_TEST_P(D3DTests, WhenFillingTexture2dDescThenImageContentIsCorrect) { D3DTexture2dDesc requestedDesc = {}; D3DTexture2dDesc expectedDesc = {}; D3DTexture2dDesc srcDesc = {}; cl_uint subresource = 4; srcDesc.Width = 10; srcDesc.Height = 20; srcDesc.MipLevels = 9; srcDesc.ArraySize = 5; srcDesc.Format = DXGI_FORMAT::DXGI_FORMAT_A8_UNORM; srcDesc.SampleDesc = {8, 9}; srcDesc.BindFlags = 123; srcDesc.CPUAccessFlags = 456; srcDesc.MiscFlags = 789; expectedDesc.Width = srcDesc.Width; expectedDesc.Height = srcDesc.Height; expectedDesc.MipLevels = 1; expectedDesc.ArraySize = 1; expectedDesc.Format = srcDesc.Format; expectedDesc.MiscFlags = D3DResourceFlags::MISC_SHARED; expectedDesc.SampleDesc = srcDesc.SampleDesc; for (uint32_t i = 0u; i < (subresource % srcDesc.MipLevels); i++) { expectedDesc.Width /= 2; expectedDesc.Height /= 2; } this->mockSharingFcns->fillCreateTexture2dDesc(requestedDesc, &srcDesc, subresource); EXPECT_TRUE(memcmp(&requestedDesc, &expectedDesc, sizeof(D3DTexture2dDesc)) == 0); } TYPED_TEST_P(D3DTests, WhenFillingTexture3dDescThenImageContentIsCorrect) { D3DTexture3dDesc requestedDesc = {}; D3DTexture3dDesc expectedDesc = {}; D3DTexture3dDesc srcDesc = {}; cl_uint subresource = 4; srcDesc.Width = 10; srcDesc.Height = 20; srcDesc.Depth = 30; srcDesc.MipLevels = 9; srcDesc.Format = DXGI_FORMAT::DXGI_FORMAT_A8_UNORM; srcDesc.BindFlags = 123; srcDesc.CPUAccessFlags = 456; srcDesc.MiscFlags = 789; expectedDesc.Width = srcDesc.Width; expectedDesc.Height = srcDesc.Height; expectedDesc.Depth = srcDesc.Depth; expectedDesc.MipLevels = 1; expectedDesc.Format = srcDesc.Format; expectedDesc.MiscFlags = D3DResourceFlags::MISC_SHARED; for (uint32_t i = 0u; i < (subresource % srcDesc.MipLevels); i++) { expectedDesc.Width /= 2; expectedDesc.Height /= 2; expectedDesc.Depth /= 2; } this->mockSharingFcns->fillCreateTexture3dDesc(requestedDesc, &srcDesc, subresource); EXPECT_TRUE(memcmp(&requestedDesc, &expectedDesc, sizeof(D3DTexture3dDesc)) == 0); } TYPED_TEST_P(D3DTests, givenPlaneWhenFindYuvSurfaceCalledThenReturnValidImgFormat) { const ClSurfaceFormatInfo *surfaceFormat; DXGI_FORMAT testFormat[] = {DXGI_FORMAT::DXGI_FORMAT_NV12, DXGI_FORMAT::DXGI_FORMAT_P010, DXGI_FORMAT::DXGI_FORMAT_P016}; int channelDataType[] = {CL_UNORM_INT8, CL_UNORM_INT16, CL_UNORM_INT16}; for (int n = 0; n < 3; n++) { surfaceFormat = D3DTexture::findYuvSurfaceFormatInfo(testFormat[n], ImagePlane::NO_PLANE, CL_MEM_READ_WRITE); EXPECT_TRUE(surfaceFormat->OCLImageFormat.image_channel_order == CL_RG); EXPECT_TRUE(surfaceFormat->OCLImageFormat.image_channel_data_type == channelDataType[n]); surfaceFormat = D3DTexture::findYuvSurfaceFormatInfo(testFormat[n], ImagePlane::PLANE_U, CL_MEM_READ_WRITE); EXPECT_TRUE(surfaceFormat->OCLImageFormat.image_channel_order == CL_RG); EXPECT_TRUE(surfaceFormat->OCLImageFormat.image_channel_data_type == channelDataType[n]); surfaceFormat = D3DTexture::findYuvSurfaceFormatInfo(testFormat[n], ImagePlane::PLANE_UV, CL_MEM_READ_WRITE); EXPECT_TRUE(surfaceFormat->OCLImageFormat.image_channel_order == CL_RG); EXPECT_TRUE(surfaceFormat->OCLImageFormat.image_channel_data_type == channelDataType[n]); surfaceFormat = D3DTexture::findYuvSurfaceFormatInfo(testFormat[n], ImagePlane::PLANE_V, CL_MEM_READ_WRITE); EXPECT_TRUE(surfaceFormat->OCLImageFormat.image_channel_order == CL_RG); EXPECT_TRUE(surfaceFormat->OCLImageFormat.image_channel_data_type == channelDataType[n]); surfaceFormat = D3DTexture::findYuvSurfaceFormatInfo(testFormat[n], ImagePlane::PLANE_Y, CL_MEM_READ_WRITE); EXPECT_TRUE(surfaceFormat->OCLImageFormat.image_channel_order == CL_R); EXPECT_TRUE(surfaceFormat->OCLImageFormat.image_channel_data_type == channelDataType[n]); } } TYPED_TEST_P(D3DTests, GivenForced32BitAddressingWhenCreatingBufferThenBufferHas32BitAllocation) { auto buffer = std::unique_ptr(D3DBuffer::create(this->context, (D3DBufferObj *)&this->dummyD3DBuffer, CL_MEM_READ_WRITE, nullptr)); ASSERT_NE(nullptr, buffer.get()); auto *allocation = buffer->getGraphicsAllocation(rootDeviceIndex); EXPECT_NE(nullptr, allocation); EXPECT_TRUE(allocation->is32BitAllocation()); } TYPED_TEST_P(D3DTests, givenD3DTexture2dWhenOclImageIsCreatedThenSharedImageAllocationTypeIsSet) { this->mockSharingFcns->mockTexture2dDesc.Format = DXGI_FORMAT_P016; this->mockSharingFcns->getTexture2dDescSetParams = true; this->mockSharingFcns->getTexture2dDescParamsSet.textureDesc = this->mockSharingFcns->mockTexture2dDesc; auto image = std::unique_ptr(D3DTexture::create2d(this->context, reinterpret_cast(&this->dummyD3DTexture), CL_MEM_READ_WRITE, 7, nullptr)); ASSERT_NE(nullptr, image.get()); ASSERT_NE(nullptr, image->getGraphicsAllocation(rootDeviceIndex)); EXPECT_EQ(AllocationType::SHARED_IMAGE, image->getGraphicsAllocation(rootDeviceIndex)->getAllocationType()); EXPECT_EQ(1u, this->mockSharingFcns->getTexture2dDescCalled); } TYPED_TEST_P(D3DTests, givenD3DTexture3dWhenOclImageIsCreatedThenSharedImageAllocationTypeIsSet) { this->mockSharingFcns->mockTexture3dDesc.MiscFlags = D3DResourceFlags::MISC_SHARED; this->mockSharingFcns->getTexture3dDescSetParams = true; this->mockSharingFcns->getTexture3dDescParamsSet.textureDesc = this->mockSharingFcns->mockTexture3dDesc; this->mockSharingFcns->createTexture3dSetParams = true; this->mockSharingFcns->createTexture3dParamsSet.texture = reinterpret_cast(&this->dummyD3DTextureStaging); auto image = std::unique_ptr(D3DTexture::create3d(this->context, reinterpret_cast(&this->dummyD3DTexture), CL_MEM_READ_WRITE, 1, nullptr)); ASSERT_NE(nullptr, image.get()); ASSERT_NE(nullptr, image->getGraphicsAllocation(rootDeviceIndex)); EXPECT_EQ(AllocationType::SHARED_IMAGE, image->getGraphicsAllocation(rootDeviceIndex)->getAllocationType()); EXPECT_EQ(1u, this->mockSharingFcns->createTexture3dCalled); EXPECT_EQ(1u, this->mockSharingFcns->getTexture3dDescCalled); } TYPED_TEST_P(D3DTests, givenSharedObjectFromInvalidContextWhen3dCreatedThenReturnCorrectCode) { this->mockSharingFcns->mockTexture3dDesc.MiscFlags = D3DResourceFlags::MISC_SHARED; this->mockSharingFcns->getTexture3dDescSetParams = true; this->mockSharingFcns->getTexture3dDescParamsSet.textureDesc = this->mockSharingFcns->mockTexture3dDesc; this->mockSharingFcns->createTexture3dSetParams = true; this->mockSharingFcns->createTexture3dParamsSet.texture = reinterpret_cast(&this->dummyD3DTextureStaging); cl_int retCode = 0; mockMM.get()->verifyValue = false; auto image = std::unique_ptr(D3DTexture::create3d(this->context, reinterpret_cast(&this->dummyD3DTexture), CL_MEM_READ_WRITE, 1, &retCode)); mockMM.get()->verifyValue = true; EXPECT_EQ(nullptr, image.get()); EXPECT_EQ(retCode, CL_INVALID_D3D11_RESOURCE_KHR); EXPECT_EQ(1u, this->mockSharingFcns->createTexture3dCalled); EXPECT_EQ(1u, this->mockSharingFcns->getTexture3dDescCalled); } TYPED_TEST_P(D3DTests, givenSharedObjectFromInvalidContextAndNTHandleWhen3dCreatedThenReturnCorrectCode) { this->mockSharingFcns->mockTexture3dDesc.MiscFlags = D3DResourceFlags::MISC_SHARED_NTHANDLE; this->mockSharingFcns->getTexture3dDescSetParams = true; this->mockSharingFcns->getTexture3dDescParamsSet.textureDesc = this->mockSharingFcns->mockTexture3dDesc; this->mockSharingFcns->createTexture3dSetParams = true; this->mockSharingFcns->createTexture3dParamsSet.texture = reinterpret_cast(&this->dummyD3DTextureStaging); cl_int retCode = 0; mockMM.get()->verifyValue = false; auto image = std::unique_ptr(D3DTexture::create3d(this->context, reinterpret_cast(&this->dummyD3DTexture), CL_MEM_READ_WRITE, 1, &retCode)); mockMM.get()->verifyValue = true; EXPECT_EQ(nullptr, image.get()); EXPECT_EQ(retCode, CL_INVALID_D3D11_RESOURCE_KHR); EXPECT_EQ(1u, this->mockSharingFcns->createTexture3dCalled); EXPECT_EQ(1u, this->mockSharingFcns->getTexture3dDescCalled); } TYPED_TEST_P(D3DTests, givenSharedObjectAndAlocationFailedWhen3dCreatedThenReturnCorrectCode) { this->mockSharingFcns->mockTexture3dDesc.MiscFlags = D3DResourceFlags::MISC_SHARED; this->mockSharingFcns->getTexture3dDescSetParams = true; this->mockSharingFcns->getTexture3dDescParamsSet.textureDesc = this->mockSharingFcns->mockTexture3dDesc; this->mockSharingFcns->createTexture3dSetParams = true; this->mockSharingFcns->createTexture3dParamsSet.texture = reinterpret_cast(&this->dummyD3DTextureStaging); cl_int retCode = 0; mockMM.get()->failAlloc = true; auto image = std::unique_ptr(D3DTexture::create3d(this->context, reinterpret_cast(&this->dummyD3DTexture), CL_MEM_READ_WRITE, 1, &retCode)); mockMM.get()->failAlloc = false; EXPECT_EQ(nullptr, image.get()); EXPECT_EQ(retCode, CL_OUT_OF_HOST_MEMORY); EXPECT_EQ(1u, this->mockSharingFcns->createTexture3dCalled); EXPECT_EQ(1u, this->mockSharingFcns->getTexture3dDescCalled); } TYPED_TEST_P(D3DTests, givenSharedObjectAndNTHandleAndAllocationFailedWhen3dCreatedThenReturnCorrectCode) { this->mockSharingFcns->mockTexture3dDesc.MiscFlags = D3DResourceFlags::MISC_SHARED_NTHANDLE; this->mockSharingFcns->getTexture3dDescSetParams = true; this->mockSharingFcns->getTexture3dDescParamsSet.textureDesc = this->mockSharingFcns->mockTexture3dDesc; this->mockSharingFcns->createTexture3dSetParams = true; this->mockSharingFcns->createTexture3dParamsSet.texture = reinterpret_cast(&this->dummyD3DTextureStaging); cl_int retCode = 0; mockMM.get()->failAlloc = true; auto image = std::unique_ptr(D3DTexture::create3d(this->context, reinterpret_cast(&this->dummyD3DTexture), CL_MEM_READ_WRITE, 1, &retCode)); mockMM.get()->failAlloc = false; EXPECT_EQ(nullptr, image.get()); EXPECT_EQ(retCode, CL_OUT_OF_HOST_MEMORY); EXPECT_EQ(1u, this->mockSharingFcns->createTexture3dCalled); EXPECT_EQ(1u, this->mockSharingFcns->getTexture3dDescCalled); } TYPED_TEST_P(D3DTests, givenFormatNotSupportedByDxWhenGettingSupportedFormatsThenOnlySupportedFormatsAreReturned) { std::vector unsupportedDXGIformats = { DXGI_FORMAT_BC6H_TYPELESS, DXGI_FORMAT_BC6H_UF16, DXGI_FORMAT_BC6H_SF16, DXGI_FORMAT_BC7_TYPELESS, DXGI_FORMAT_BC7_UNORM, DXGI_FORMAT_BC7_UNORM_SRGB, DXGI_FORMAT_AYUV, DXGI_FORMAT_Y410, DXGI_FORMAT_Y416, DXGI_FORMAT_420_OPAQUE, DXGI_FORMAT_YUY2, DXGI_FORMAT_Y210, DXGI_FORMAT_Y216, DXGI_FORMAT_NV11, DXGI_FORMAT_AI44, DXGI_FORMAT_IA44, DXGI_FORMAT_P8, DXGI_FORMAT_A8P8, DXGI_FORMAT_B4G4R4A4_UNORM, DXGI_FORMAT_P208, DXGI_FORMAT_V208, DXGI_FORMAT_V408, DXGI_FORMAT_FORCE_UINT}; mockSharingFcns->checkFormatSupportSetParam1 = true; mockSharingFcns->checkUnsupportedDXGIformats = true; mockSharingFcns->checkFormatSupportParamsSet.pFormat = D3D11_FORMAT_SUPPORT_BUFFER | D3D11_FORMAT_SUPPORT_TEXTURE2D | D3D11_FORMAT_SUPPORT_TEXTURE3D; mockSharingFcns->unsupportedDXGIformats = unsupportedDXGIformats; std::vector formats; cl_uint numTextureFormats = 0; auto retVal = getSupportedDXTextureFormats(context, CL_MEM_OBJECT_IMAGE3D, 0, 0, nullptr, &numTextureFormats); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(0u, numTextureFormats); formats.resize(numTextureFormats); retVal = getSupportedDXTextureFormats(context, CL_MEM_OBJECT_IMAGE3D, 0, static_cast(formats.size()), formats.data(), &numTextureFormats); EXPECT_EQ(CL_SUCCESS, retVal); bool foundUnsupported = false; for (auto format : formats) { auto iter = std::find(unsupportedDXGIformats.begin(), unsupportedDXGIformats.end(), format); if (iter != unsupportedDXGIformats.end()) { foundUnsupported = true; } } EXPECT_FALSE(foundUnsupported); } TYPED_TEST_P(D3DTests, givenUnsupportedFormatWhenCreatingTexture2dThenInvalidImageFormatDescriptorIsReturned) { mockSharingFcns->checkFormatSupportSetParam1 = true; mockSharingFcns->checkUnsupportedDXGIformats = true; mockSharingFcns->checkFormatSupportSetParam0 = true; mockSharingFcns->checkFormatSupportParamsSet.pFormat = D3D11_FORMAT_SUPPORT_BUFFER | D3D11_FORMAT_SUPPORT_TEXTURE2D | D3D11_FORMAT_SUPPORT_TEXTURE3D; mockSharingFcns->checkFormatSupportParamsSet.format = DXGI_FORMAT_R32_FLOAT; mockSharingFcns->unsupportedDXGIformats = {DXGI_FORMAT_R32_FLOAT}; mockSharingFcns->callBaseValidateFormatSupport = true; this->mockSharingFcns->mockTexture2dDesc.Format = DXGI_FORMAT_R32_FLOAT; this->mockSharingFcns->getTexture2dDescSetParams = true; this->mockSharingFcns->getTexture2dDescParamsSet.textureDesc = this->mockSharingFcns->mockTexture2dDesc; cl_int retCode = CL_SUCCESS; auto image = std::unique_ptr(D3DTexture::create2d(this->context, reinterpret_cast(&this->dummyD3DTexture), CL_MEM_READ_WRITE, 0, &retCode)); EXPECT_EQ(nullptr, image.get()); EXPECT_EQ(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, retCode); EXPECT_EQ(1u, this->mockSharingFcns->getTexture2dDescCalled); } TYPED_TEST_P(D3DTests, givenUnsupportedFormatWhenCreatingTexture3dThenInvalidImageFormatDescriptorIsReturned) { mockSharingFcns->checkFormatSupportSetParam1 = true; mockSharingFcns->checkUnsupportedDXGIformats = true; mockSharingFcns->checkFormatSupportSetParam0 = true; mockSharingFcns->checkFormatSupportParamsSet.pFormat = D3D11_FORMAT_SUPPORT_BUFFER | D3D11_FORMAT_SUPPORT_TEXTURE2D | D3D11_FORMAT_SUPPORT_TEXTURE3D; mockSharingFcns->checkFormatSupportParamsSet.format = DXGI_FORMAT_R32_FLOAT; mockSharingFcns->unsupportedDXGIformats = {DXGI_FORMAT_R32_FLOAT}; mockSharingFcns->callBaseValidateFormatSupport = true; this->mockSharingFcns->mockTexture3dDesc.Format = DXGI_FORMAT_R32_FLOAT; this->mockSharingFcns->getTexture3dDescSetParams = true; this->mockSharingFcns->getTexture3dDescParamsSet.textureDesc = this->mockSharingFcns->mockTexture3dDesc; cl_int retCode = CL_SUCCESS; auto image = std::unique_ptr(D3DTexture::create3d(this->context, reinterpret_cast(&this->dummyD3DTexture), CL_MEM_READ_WRITE, 0, &retCode)); EXPECT_EQ(nullptr, image.get()); EXPECT_EQ(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, retCode); EXPECT_EQ(1u, this->mockSharingFcns->getTexture3dDescCalled); } REGISTER_TYPED_TEST_CASE_P(D3DTests, givenSharedResourceBufferAndInteropUserSyncEnabledWhenReleaseIsCalledThenDontDoExplicitFinish, givenNonSharedResourceBufferAndInteropUserSyncDisabledWhenReleaseIsCalledThenDoExplicitFinishTwice, givenSharedResourceBufferAndInteropUserSyncDisabledWhenReleaseIsCalledThenDoExplicitFinishOnce, givenNonSharedResourceBufferAndInteropUserSyncEnabledWhenReleaseIsCalledThenDoExplicitFinishOnce, givenSharedResourceFlagWhenCreate2dTextureThenStagingTextureEqualsPassedTexture, givenNonSharedResourceFlagWhenCreate2dTextureThenCreateStagingTexture, givenSharedResourceFlagWhenCreate3dTextureThenStagingTextureEqualsPassedTexture, givenNonSharedResourceFlagWhenCreate3dTextureThenCreateStagingTexture, givenD3DDeviceParamWhenContextCreationThenSetProperValues, givenSharedNtHandleFlagWhenCreate2dTextureThenGetNtHandle, givenSharedNtHandleFlagWhenCreate3dTextureThenGetNtHandle, WhenFillingBufferDescThenBufferContentIsCorrect, WhenFillingTexture2dDescThenImageContentIsCorrect, WhenFillingTexture3dDescThenImageContentIsCorrect, givenPlaneWhenFindYuvSurfaceCalledThenReturnValidImgFormat, GivenForced32BitAddressingWhenCreatingBufferThenBufferHas32BitAllocation, givenD3DTexture2dWhenOclImageIsCreatedThenSharedImageAllocationTypeIsSet, givenD3DTexture3dWhenOclImageIsCreatedThenSharedImageAllocationTypeIsSet, givenSharedObjectFromInvalidContextWhen3dCreatedThenReturnCorrectCode, givenSharedObjectFromInvalidContextAndNTHandleWhen3dCreatedThenReturnCorrectCode, givenSharedObjectAndAlocationFailedWhen3dCreatedThenReturnCorrectCode, givenSharedObjectAndNTHandleAndAllocationFailedWhen3dCreatedThenReturnCorrectCode, givenFormatNotSupportedByDxWhenGettingSupportedFormatsThenOnlySupportedFormatsAreReturned, givenUnsupportedFormatWhenCreatingTexture2dThenInvalidImageFormatDescriptorIsReturned, givenUnsupportedFormatWhenCreatingTexture3dThenInvalidImageFormatDescriptorIsReturned); INSTANTIATE_TYPED_TEST_CASE_P(D3DSharingTests, D3DTests, D3DTypes); using D3D10Test = D3DTests; TEST_F(D3D10Test, givenIncompatibleAdapterLuidWhenGettingDeviceIdsThenNoDevicesAreReturned) { cl_device_id deviceID; cl_uint numDevices = 15; static_cast(context->getDevice(0)->getRootDeviceEnvironment().osInterface->getDriverModel()->as())->verifyAdapterLuidReturnValue = false; auto retVal = clGetDeviceIDsFromD3D10KHR(pPlatform, CL_D3D10_DEVICE_KHR, nullptr, CL_ALL_DEVICES_FOR_D3D10_KHR, 1, &deviceID, &numDevices); EXPECT_EQ(CL_DEVICE_NOT_FOUND, retVal); EXPECT_EQ(0, numDevices); } using D3D11Test = D3DTests; TEST_F(D3D11Test, givenIncompatibleAdapterLuidWhenGettingDeviceIdsThenNoDevicesAreReturned) { cl_device_id deviceID; cl_uint numDevices = 15; static_cast(context->getDevice(0)->getRootDeviceEnvironment().osInterface->getDriverModel()->as())->verifyAdapterLuidReturnValue = false; auto retVal = clGetDeviceIDsFromD3D11KHR(pPlatform, CL_D3D11_DEVICE_KHR, nullptr, CL_ALL_DEVICES_FOR_D3D11_KHR, 1, &deviceID, &numDevices); EXPECT_EQ(CL_DEVICE_NOT_FOUND, retVal); EXPECT_EQ(0, numDevices); } TEST(D3D11, GivenPlanarFormatsWhenCallingIsFormatWithPlane1ThenTrueIsReturned) { std::array planarFormats = {DXGI_FORMAT_NV12, DXGI_FORMAT_P010, DXGI_FORMAT_P016, DXGI_FORMAT_420_OPAQUE, DXGI_FORMAT_NV11, DXGI_FORMAT_P208}; for (auto format : planarFormats) { EXPECT_TRUE(D3DSharing::isFormatWithPlane1(format)); } } TEST(D3D11, GivenNonPlanarFormatsWhenCallingIsFormatWithPlane1ThenFalseIsReturned) { std::array planarFormats = {DXGI_FORMAT_R32G32B32_FLOAT, DXGI_FORMAT_R32G32B32_UINT, DXGI_FORMAT_R32G32B32_SINT}; for (auto format : planarFormats) { EXPECT_FALSE(D3DSharing::isFormatWithPlane1(format)); } } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/device/000077500000000000000000000000001422164147700230665ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/device/CMakeLists.txt000066400000000000000000000013721422164147700256310ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_device ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/device_caps_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/device_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/device_timers_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/get_device_info_size_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/get_device_info_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sub_device_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/get_device_name_tests.cpp ) if(WIN32) list(APPEND IGDRCL_SRCS_tests_device ${CMAKE_CURRENT_SOURCE_DIR}/device_win_timers_tests.cpp ) endif() target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_device}) add_subdirectories() compute-runtime-22.14.22890/opencl/test/unit_test/device/device_caps_tests.cpp000066400000000000000000002257641422164147700273010ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/compiler_interface/oclc_extensions.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/memory_manager/os_agnostic_memory_manager.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/os_interface/os_interface.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/hw_helper_tests.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_driver_info.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/mocks/mock_sip.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/test/unit_test/fixtures/device_info_fixture.h" #include "opencl/test/unit_test/helpers/raii_hw_helper.h" #include "opencl/test/unit_test/mocks/ult_cl_device_factory.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" #include "driver_version.h" #include "gtest/gtest.h" #include namespace NEO { extern const char *familyName[]; } // namespace NEO using namespace NEO; struct DeviceGetCapsTest : public ::testing::Test { void SetUp() override { MockSipData::clearUseFlags(); backupSipInitType = std::make_unique>(&MockSipData::useMockSip, true); } void TearDown() override { MockSipData::clearUseFlags(); } void verifyOpenclCAllVersions(MockClDevice &clDevice) { EXPECT_FALSE(clDevice.getDeviceInfo().openclCAllVersions.usesDynamicMem()); for (auto &openclCVersion : clDevice.getDeviceInfo().openclCAllVersions) { EXPECT_STREQ("OpenCL C", openclCVersion.name); } auto openclCWithVersionIterator = clDevice.getDeviceInfo().openclCAllVersions.begin(); EXPECT_EQ(CL_MAKE_VERSION(1u, 0u, 0u), openclCWithVersionIterator->version); EXPECT_EQ(CL_MAKE_VERSION(1u, 1u, 0u), (++openclCWithVersionIterator)->version); EXPECT_EQ(CL_MAKE_VERSION(1u, 2u, 0u), (++openclCWithVersionIterator)->version); if (clDevice.getEnabledClVersion() == 30) { EXPECT_EQ(CL_MAKE_VERSION(3u, 0u, 0u), (++openclCWithVersionIterator)->version); } EXPECT_EQ(clDevice.getDeviceInfo().openclCAllVersions.end(), ++openclCWithVersionIterator); } void verifyOpenclCFeatures(MockClDevice &clDevice) { EXPECT_FALSE(clDevice.getDeviceInfo().openclCFeatures.usesDynamicMem()); for (auto &openclCFeature : clDevice.getDeviceInfo().openclCFeatures) { EXPECT_EQ(CL_MAKE_VERSION(3u, 0u, 0u), openclCFeature.version); } auto &hwInfo = clDevice.getHardwareInfo(); auto openclCFeatureIterator = clDevice.getDeviceInfo().openclCFeatures.begin(); EXPECT_STREQ("__opencl_c_int64", openclCFeatureIterator->name); if (hwInfo.capabilityTable.supportsImages) { EXPECT_STREQ("__opencl_c_3d_image_writes", (++openclCFeatureIterator)->name); EXPECT_STREQ("__opencl_c_images", (++openclCFeatureIterator)->name); EXPECT_STREQ("__opencl_c_read_write_images", (++openclCFeatureIterator)->name); } if (hwInfo.capabilityTable.supportsOcl21Features) { EXPECT_STREQ("__opencl_c_atomic_order_acq_rel", (++openclCFeatureIterator)->name); EXPECT_STREQ("__opencl_c_atomic_order_seq_cst", (++openclCFeatureIterator)->name); EXPECT_STREQ("__opencl_c_atomic_scope_all_devices", (++openclCFeatureIterator)->name); EXPECT_STREQ("__opencl_c_atomic_scope_device", (++openclCFeatureIterator)->name); EXPECT_STREQ("__opencl_c_generic_address_space", (++openclCFeatureIterator)->name); EXPECT_STREQ("__opencl_c_program_scope_global_variables", (++openclCFeatureIterator)->name); EXPECT_STREQ("__opencl_c_work_group_collective_functions", (++openclCFeatureIterator)->name); EXPECT_STREQ("__opencl_c_subgroups", (++openclCFeatureIterator)->name); } if (hwInfo.capabilityTable.supportsDeviceEnqueue) { EXPECT_STREQ("__opencl_c_device_enqueue", (++openclCFeatureIterator)->name); } if (hwInfo.capabilityTable.supportsPipes) { EXPECT_STREQ("__opencl_c_pipes", (++openclCFeatureIterator)->name); } if (hwInfo.capabilityTable.ftrSupportsFP64) { EXPECT_STREQ("__opencl_c_fp64", (++openclCFeatureIterator)->name); } EXPECT_EQ(clDevice.getDeviceInfo().openclCFeatures.end(), ++openclCFeatureIterator); } std::unique_ptr> backupSipInitType; }; TEST_F(DeviceGetCapsTest, WhenCreatingDeviceThenCapsArePopulatedCorrectly) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); const auto &sharedCaps = device->getSharedDeviceInfo(); const auto &sysInfo = defaultHwInfo->gtSystemInfo; auto &hwHelper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily); EXPECT_NE(nullptr, caps.builtInKernels); std::string strDriverName = caps.name; std::string strDeviceName = device->getClDeviceName(*defaultHwInfo.get()); EXPECT_NE(std::string::npos, strDriverName.find(strDeviceName)); EXPECT_NE(nullptr, caps.name); EXPECT_NE(nullptr, caps.vendor); EXPECT_NE(nullptr, caps.driverVersion); EXPECT_NE(nullptr, caps.profile); EXPECT_STREQ("OpenCL 3.0 NEO ", caps.clVersion); EXPECT_STREQ("OpenCL C 1.2 ", caps.clCVersion); EXPECT_NE(0u, caps.numericClVersion); EXPECT_GT(caps.openclCAllVersions.size(), 0u); EXPECT_GT(caps.openclCFeatures.size(), 0u); EXPECT_EQ(caps.extensionsWithVersion.size(), 0u); EXPECT_STREQ("v2021-06-16-00", caps.latestConformanceVersionPassed); EXPECT_NE(nullptr, caps.spirVersions); EXPECT_NE(nullptr, caps.deviceExtensions); EXPECT_EQ(static_cast(CL_TRUE), caps.deviceAvailable); EXPECT_EQ(static_cast(CL_TRUE), caps.compilerAvailable); EXPECT_EQ(16u, caps.preferredVectorWidthChar); EXPECT_EQ(8u, caps.preferredVectorWidthShort); EXPECT_EQ(4u, caps.preferredVectorWidthInt); EXPECT_EQ(1u, caps.preferredVectorWidthLong); EXPECT_EQ(1u, caps.preferredVectorWidthFloat); EXPECT_EQ(1u, caps.preferredVectorWidthDouble); EXPECT_EQ(8u, caps.preferredVectorWidthHalf); EXPECT_EQ(16u, caps.nativeVectorWidthChar); EXPECT_EQ(8u, caps.nativeVectorWidthShort); EXPECT_EQ(4u, caps.nativeVectorWidthInt); EXPECT_EQ(1u, caps.nativeVectorWidthLong); EXPECT_EQ(1u, caps.nativeVectorWidthFloat); EXPECT_EQ(1u, caps.nativeVectorWidthDouble); EXPECT_EQ(8u, caps.nativeVectorWidthHalf); EXPECT_EQ(1u, caps.linkerAvailable); EXPECT_NE(0u, sharedCaps.globalMemCachelineSize); EXPECT_NE(0u, caps.globalMemCacheSize); EXPECT_LT(0u, sharedCaps.globalMemSize); EXPECT_EQ(sharedCaps.maxMemAllocSize, caps.maxConstantBufferSize); EXPECT_STREQ("SPIR-V_1.2 ", sharedCaps.ilVersion); EXPECT_EQ(defaultHwInfo->capabilityTable.supportsIndependentForwardProgress, caps.independentForwardProgress); EXPECT_EQ(static_cast(CL_TRUE), caps.deviceAvailable); EXPECT_EQ(static_cast(CL_READ_WRITE_CACHE), caps.globalMemCacheType); EXPECT_EQ(sysInfo.EUCount, caps.maxComputUnits); EXPECT_LT(0u, caps.maxConstantArgs); EXPECT_LE(128u, sharedCaps.maxReadImageArgs); EXPECT_LE(128u, sharedCaps.maxWriteImageArgs); if (defaultHwInfo->capabilityTable.supportsImages) { EXPECT_EQ(128u, caps.maxReadWriteImageArgs); } else { EXPECT_EQ(0u, caps.maxReadWriteImageArgs); } EXPECT_LE(sharedCaps.maxReadImageArgs * sizeof(cl_mem), sharedCaps.maxParameterSize); EXPECT_LE(sharedCaps.maxWriteImageArgs * sizeof(cl_mem), sharedCaps.maxParameterSize); EXPECT_LE(128u * MB, sharedCaps.maxMemAllocSize); if (!device->areSharedSystemAllocationsAllowed()) { EXPECT_GE((4 * GB) - (8 * KB), sharedCaps.maxMemAllocSize); } EXPECT_LE(65536u, sharedCaps.imageMaxBufferSize); EXPECT_GT(sharedCaps.maxWorkGroupSize, 0u); EXPECT_EQ(sharedCaps.maxWorkItemSizes[0], sharedCaps.maxWorkGroupSize); EXPECT_EQ(sharedCaps.maxWorkItemSizes[1], sharedCaps.maxWorkGroupSize); EXPECT_EQ(sharedCaps.maxWorkItemSizes[2], sharedCaps.maxWorkGroupSize); EXPECT_EQ(hwHelper.getMaxNumSamplers(), sharedCaps.maxSamplers); // Minimum requirements for OpenCL 1.x EXPECT_EQ(static_cast(CL_FP_ROUND_TO_NEAREST), CL_FP_ROUND_TO_NEAREST & caps.singleFpConfig); EXPECT_EQ(static_cast(CL_FP_INF_NAN), CL_FP_INF_NAN & caps.singleFpConfig); cl_device_fp_config singleFpConfig = CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO | CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_FMA | CL_FP_DENORM; EXPECT_EQ(singleFpConfig, caps.singleFpConfig & singleFpConfig); EXPECT_EQ(static_cast(CL_EXEC_KERNEL), CL_EXEC_KERNEL & caps.executionCapabilities); EXPECT_EQ(static_cast(CL_QUEUE_PROFILING_ENABLE), CL_QUEUE_PROFILING_ENABLE & caps.queueOnHostProperties); EXPECT_EQ(static_cast(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE), CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE & caps.queueOnHostProperties); EXPECT_LT(128u, caps.memBaseAddressAlign); EXPECT_LT(0u, caps.minDataTypeAlignSize); EXPECT_EQ(1u, caps.endianLittle); auto expectedDeviceSubgroups = hwHelper.getDeviceSubGroupSizes(); EXPECT_EQ(expectedDeviceSubgroups.size(), sharedCaps.maxSubGroups.size()); for (uint32_t i = 0; i < expectedDeviceSubgroups.size(); i++) { EXPECT_EQ(expectedDeviceSubgroups[i], sharedCaps.maxSubGroups[i]); } auto expectedMaxNumOfSubGroups = device->areOcl21FeaturesEnabled() ? sharedCaps.maxWorkGroupSize / hwHelper.getMinimalSIMDSize() : 0u; EXPECT_EQ(expectedMaxNumOfSubGroups, caps.maxNumOfSubGroups); EXPECT_EQ(0u, caps.maxOnDeviceEvents); EXPECT_EQ(0u, caps.maxOnDeviceQueues); EXPECT_EQ(0u, caps.queueOnDeviceMaxSize); EXPECT_EQ(0u, caps.queueOnDevicePreferredSize); EXPECT_EQ(static_cast(0), caps.queueOnDeviceProperties); if (defaultHwInfo->capabilityTable.supportsPipes) { EXPECT_EQ(16u, caps.maxPipeArgs); EXPECT_EQ(1024u, caps.pipeMaxPacketSize); EXPECT_EQ(1u, caps.pipeMaxActiveReservations); } else { EXPECT_EQ(0u, caps.maxPipeArgs); EXPECT_EQ(0u, caps.pipeMaxPacketSize); EXPECT_EQ(0u, caps.pipeMaxActiveReservations); } EXPECT_EQ(64u, caps.preferredGlobalAtomicAlignment); EXPECT_EQ(64u, caps.preferredLocalAtomicAlignment); EXPECT_EQ(64u, caps.preferredPlatformAtomicAlignment); EXPECT_TRUE(caps.nonUniformWorkGroupSupport); auto expectedPreferredWorkGroupSizeMultiple = hwHelper.isFusedEuDispatchEnabled(*defaultHwInfo, false) ? CommonConstants::maximalSimdSize * 2 : CommonConstants::maximalSimdSize; EXPECT_EQ(expectedPreferredWorkGroupSizeMultiple, caps.preferredWorkGroupSizeMultiple); EXPECT_EQ(static_cast(device->getHardwareInfo().capabilityTable.supportsImages), sharedCaps.imageSupport); EXPECT_EQ(16384u, sharedCaps.image2DMaxWidth); EXPECT_EQ(16384u, sharedCaps.image2DMaxHeight); EXPECT_EQ(2048u, sharedCaps.imageMaxArraySize); if (device->getHardwareInfo().capabilityTable.supportsOcl21Features == false && is64bit) { EXPECT_TRUE(sharedCaps.force32BitAddressess); } } HWTEST_F(DeviceGetCapsTest, givenDeviceWhenAskingForSubGroupSizesThenReturnCorrectValues) { auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); auto &hwHelper = HwHelper::get(device->getHardwareInfo().platform.eRenderCoreFamily); auto deviceSubgroups = hwHelper.getDeviceSubGroupSizes(); EXPECT_EQ(3u, deviceSubgroups.size()); EXPECT_EQ(8u, deviceSubgroups[0]); EXPECT_EQ(16u, deviceSubgroups[1]); EXPECT_EQ(32u, deviceSubgroups[2]); } TEST_F(DeviceGetCapsTest, GivenPlatformWhenGettingHwInfoThenImage3dDimensionsAreCorrect) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); const auto &sharedCaps = device->getSharedDeviceInfo(); if (device->getHardwareInfo().platform.eRenderCoreFamily > IGFX_GEN8_CORE && device->getHardwareInfo().platform.eRenderCoreFamily != IGFX_GEN12LP_CORE) { EXPECT_EQ(16384u, caps.image3DMaxWidth); EXPECT_EQ(16384u, caps.image3DMaxHeight); } else { EXPECT_EQ(2048u, caps.image3DMaxWidth); EXPECT_EQ(2048u, caps.image3DMaxHeight); } EXPECT_EQ(2048u, sharedCaps.image3DMaxDepth); } TEST_F(DeviceGetCapsTest, givenForceOclVersion30WhenCapsAreCreatedThenDeviceReportsOpenCL30) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceOCLVersion.set(30); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); EXPECT_STREQ("OpenCL 3.0 NEO ", caps.clVersion); EXPECT_STREQ("OpenCL C 1.2 ", caps.clCVersion); EXPECT_EQ(CL_MAKE_VERSION(3u, 0u, 0u), caps.numericClVersion); EXPECT_FALSE(device->ocl21FeaturesEnabled); verifyOpenclCAllVersions(*device); verifyOpenclCFeatures(*device); } TEST_F(DeviceGetCapsTest, givenForceOclVersion21WhenCapsAreCreatedThenDeviceReportsOpenCL21) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceOCLVersion.set(21); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); EXPECT_STREQ("OpenCL 2.1 NEO ", caps.clVersion); EXPECT_STREQ("OpenCL C 2.0 ", caps.clCVersion); EXPECT_EQ(CL_MAKE_VERSION(2u, 1u, 0u), caps.numericClVersion); EXPECT_TRUE(device->ocl21FeaturesEnabled); verifyOpenclCAllVersions(*device); verifyOpenclCFeatures(*device); } TEST_F(DeviceGetCapsTest, givenForceOclVersion12WhenCapsAreCreatedThenDeviceReportsOpenCL12) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceOCLVersion.set(12); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); EXPECT_STREQ("OpenCL 1.2 NEO ", caps.clVersion); EXPECT_STREQ("OpenCL C 1.2 ", caps.clCVersion); EXPECT_EQ(CL_MAKE_VERSION(1u, 2u, 0u), caps.numericClVersion); EXPECT_FALSE(device->ocl21FeaturesEnabled); verifyOpenclCAllVersions(*device); verifyOpenclCFeatures(*device); } TEST_F(DeviceGetCapsTest, givenForceOCL21FeaturesSupportEnabledWhenCapsAreCreatedThenDeviceReportsSupportOfOcl21Features) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceOCLVersion.set(12); DebugManager.flags.ForceOCL21FeaturesSupport.set(1); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); EXPECT_TRUE(device->ocl21FeaturesEnabled); } TEST_F(DeviceGetCapsTest, givenForceOCL21FeaturesSupportDisabledWhenCapsAreCreatedThenDeviceReportsNoSupportOfOcl21Features) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceOCLVersion.set(21); DebugManager.flags.ForceOCL21FeaturesSupport.set(0); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); EXPECT_FALSE(device->ocl21FeaturesEnabled); } TEST_F(DeviceGetCapsTest, givenForceOcl30AndForceOCL21FeaturesSupportEnabledWhenCapsAreCreatedThenDeviceReportsSupportOfOcl21Features) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceOCLVersion.set(30); DebugManager.flags.ForceOCL21FeaturesSupport.set(1); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); EXPECT_TRUE(device->ocl21FeaturesEnabled); } TEST_F(DeviceGetCapsTest, givenForceInvalidOclVersionWhenCapsAreCreatedThenDeviceWillDefaultToOpenCL12) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceOCLVersion.set(1); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); EXPECT_STREQ("OpenCL 1.2 NEO ", caps.clVersion); EXPECT_STREQ("OpenCL C 1.2 ", caps.clCVersion); EXPECT_EQ(CL_MAKE_VERSION(1u, 2u, 0u), caps.numericClVersion); EXPECT_FALSE(device->ocl21FeaturesEnabled); verifyOpenclCAllVersions(*device); verifyOpenclCFeatures(*device); } TEST_F(DeviceGetCapsTest, givenForce32bitAddressingWhenCapsAreCreatedThenDeviceReports32bitAddressingOptimization) { DebugManagerStateRestore dbgRestorer; { DebugManager.flags.Force32bitAddressing.set(true); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); const auto &sharedCaps = device->getSharedDeviceInfo(); const auto memSizePercent = device->getMemoryManager()->getPercentOfGlobalMemoryAvailable(device->getRootDeviceIndex()); if constexpr (is64bit) { EXPECT_TRUE(sharedCaps.force32BitAddressess); } else { EXPECT_FALSE(sharedCaps.force32BitAddressess); } auto expectedSize = (cl_ulong)(4 * memSizePercent * GB); EXPECT_LE(sharedCaps.globalMemSize, expectedSize); EXPECT_LE(sharedCaps.maxMemAllocSize, expectedSize); EXPECT_LE(caps.maxConstantBufferSize, expectedSize); EXPECT_EQ(sharedCaps.addressBits, 32u); } } TEST_F(DeviceGetCapsTest, WhenDeviceIsCreatedThenGlobalMemSizeIsAlignedDownToPageSize) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &sharedCaps = device->getSharedDeviceInfo(); auto expectedSize = alignDown(sharedCaps.globalMemSize, MemoryConstants::pageSize); EXPECT_EQ(sharedCaps.globalMemSize, expectedSize); } TEST_F(DeviceGetCapsTest, Given32bitAddressingWhenDeviceIsCreatedThenGlobalMemSizeIsAlignedDownToPageSize) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &sharedCaps = device->getSharedDeviceInfo(); auto pMemManager = device->getMemoryManager(); auto enabledOcl21Features = device->areOcl21FeaturesEnabled(); bool addressing32Bit = is32bit || (is64bit && (enabledOcl21Features == false)) || DebugManager.flags.Force32bitAddressing.get(); const auto memSizePercent = pMemManager->getPercentOfGlobalMemoryAvailable(device->getRootDeviceIndex()); cl_ulong sharedMem = (cl_ulong)pMemManager->getSystemSharedMemory(0u); cl_ulong maxAppAddrSpace = (cl_ulong)pMemManager->getMaxApplicationAddress() + 1ULL; cl_ulong memSize = std::min(sharedMem, maxAppAddrSpace); memSize = (cl_ulong)((double)memSize * memSizePercent); if (addressing32Bit) { memSize = std::min(memSize, (uint64_t)(4 * GB * memSizePercent)); } cl_ulong expectedSize = alignDown(memSize, MemoryConstants::pageSize); EXPECT_EQ(sharedCaps.globalMemSize, expectedSize); } TEST_F(DeviceGetCapsTest, givenDeviceCapsWhenLocalMemoryIsEnabledThenCalculateGlobalMemSizeBasedOnLocalMemory) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableLocalMemory.set(true); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &sharedCaps = device->getSharedDeviceInfo(); auto pMemManager = device->getMemoryManager(); auto enabledOcl21Features = device->areOcl21FeaturesEnabled(); bool addressing32Bit = is32bit || (is64bit && (enabledOcl21Features == false)) || DebugManager.flags.Force32bitAddressing.get(); const auto memSizePercent = pMemManager->getPercentOfGlobalMemoryAvailable(device->getRootDeviceIndex()); auto localMem = pMemManager->getLocalMemorySize(0u, static_cast(device->getDeviceBitfield().to_ulong())); auto maxAppAddrSpace = pMemManager->getMaxApplicationAddress() + 1; auto memSize = std::min(localMem, maxAppAddrSpace); memSize = static_cast(memSize * memSizePercent); if (addressing32Bit) { memSize = std::min(memSize, static_cast(4 * GB * memSizePercent)); } cl_ulong expectedSize = alignDown(memSize, MemoryConstants::pageSize); EXPECT_EQ(sharedCaps.globalMemSize, expectedSize); } HWTEST_F(DeviceGetCapsTest, givenGlobalMemSizeAndSharedSystemAllocationsNotSupportedWhenCalculatingMaxAllocSizeThenAdjustToHWCap) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableSharedSystemUsmSupport.set(0); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getSharedDeviceInfo(); uint64_t expectedSize = std::max((caps.globalMemSize / 2), static_cast(128ULL * MemoryConstants::megaByte)); expectedSize = std::min(expectedSize, HwHelperHw::get().getMaxMemAllocSize()); EXPECT_EQ(caps.maxMemAllocSize, expectedSize); } TEST_F(DeviceGetCapsTest, givenGlobalMemSizeAndSharedSystemAllocationsSupportedWhenCalculatingMaxAllocSizeThenEqualsToGlobalMemSize) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableSharedSystemUsmSupport.set(1); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getSharedDeviceInfo(); EXPECT_EQ(caps.maxMemAllocSize, caps.globalMemSize); } TEST_F(DeviceGetCapsTest, WhenDeviceIsCreatedThenExtensionsStringEndsWithSpace) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); auto len = strlen(caps.deviceExtensions); ASSERT_LT(0U, len); EXPECT_EQ(' ', caps.deviceExtensions[len - 1]); } TEST_F(DeviceGetCapsTest, givenEnableSharingFormatQuerySetTrueAndDisabledMultipleSubDevicesWhenDeviceCapsAreCreatedThenSharingFormatQueryIsReported) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableFormatQuery.set(true); DebugManager.flags.CreateMultipleSubDevices.set(0); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_sharing_format_query "))); } TEST_F(DeviceGetCapsTest, givenEnableSharingFormatQuerySetTrueAndEnabledMultipleSubDevicesWhenDeviceCapsAreCreatedForRootDeviceThenSharingFormatQueryIsNotReported) { DebugManagerStateRestore dbgRestorer; VariableBackup mockDeviceFlagBackup{&MockDevice::createSingleDevice, false}; DebugManager.flags.EnableFormatQuery.set(true); DebugManager.flags.CreateMultipleSubDevices.set(2); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); EXPECT_FALSE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_sharing_format_query "))); } TEST_F(DeviceGetCapsTest, givenEnableSharingFormatQuerySetTrueAndEnabledMultipleSubDevicesWhenDeviceCapsAreCreatedForSubDeviceThenSharingFormatQueryIsReported) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableFormatQuery.set(true); DebugManager.flags.CreateMultipleSubDevices.set(2); auto rootDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); EXPECT_FALSE(hasSubstr(rootDevice->getDeviceInfo().deviceExtensions, std::string("cl_intel_sharing_format_query "))); auto subDevice0 = rootDevice->getSubDevice(0); EXPECT_TRUE(hasSubstr(subDevice0->getDeviceInfo().deviceExtensions, std::string("cl_intel_sharing_format_query "))); auto subDevice1 = rootDevice->getSubDevice(1); EXPECT_TRUE(hasSubstr(subDevice1->getDeviceInfo().deviceExtensions, std::string("cl_intel_sharing_format_query "))); } TEST_F(DeviceGetCapsTest, givenOpenCLVersion20WhenCapsAreCreatedThenDeviceDoesntReportClKhrSubgroupsExtension) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceOCLVersion.set(20); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); EXPECT_FALSE(hasSubstr(caps.deviceExtensions, std::string("cl_khr_subgroups"))); } TEST_F(DeviceGetCapsTest, givenOpenCLVersion21WhenCapsAreCreatedThenDeviceReportsClIntelSpirvExtensions) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceOCLVersion.set(21); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); const HardwareInfo *hwInfo = defaultHwInfo.get(); { if (hwInfo->capabilityTable.supportsVme) { EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_spirv_device_side_avc_motion_estimation"))); } else { EXPECT_FALSE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_spirv_device_side_avc_motion_estimation"))); } if (hwInfo->capabilityTable.supportsImages) { EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string(std::string("cl_khr_3d_image_writes")))); } else { EXPECT_FALSE(hasSubstr(caps.deviceExtensions, std::string("cl_khr_3d_image_writes"))); } if (hwInfo->capabilityTable.supportsMediaBlock) { EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_spirv_media_block_io"))); } else { EXPECT_FALSE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_spirv_media_block_io"))); } EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_spirv_subgroups"))); EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_khr_spirv_no_integer_wrap_decoration"))); } } TEST_F(DeviceGetCapsTest, givenSupportMediaBlockWhenCapsAreCreatedThenDeviceReportsClIntelSpirvMediaBlockIoExtensions) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceOCLVersion.set(21); HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsMediaBlock = true; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); const auto &caps = device->getDeviceInfo(); EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_spirv_media_block_io"))); } TEST_F(DeviceGetCapsTest, givenNotMediaBlockWhenCapsAreCreatedThenDeviceNotReportsClIntelSpirvMediaBlockIoExtensions) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceOCLVersion.set(21); HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsMediaBlock = false; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); const auto &caps = device->getDeviceInfo(); EXPECT_FALSE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_spirv_media_block_io"))); } TEST_F(DeviceGetCapsTest, givenSupportImagesWhenCapsAreCreatedThenDeviceReportsClKhr3dImageWritesExtensions) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsImages = true; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); const auto &caps = device->getDeviceInfo(); EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_khr_3d_image_writes"))); } TEST_F(DeviceGetCapsTest, givenNotSupportImagesWhenCapsAreCreatedThenDeviceNotReportsClKhr3dImageWritesExtensions) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsImages = false; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); const auto &caps = device->getDeviceInfo(); EXPECT_FALSE(hasSubstr(caps.deviceExtensions, std::string("cl_khr_3d_image_writes"))); } TEST_F(DeviceGetCapsTest, givenOpenCLVersion12WhenCapsAreCreatedThenDeviceDoesntReportClIntelSpirvExtensions) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceOCLVersion.set(12); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); EXPECT_FALSE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_spirv_device_side_avc_motion_estimation"))); EXPECT_FALSE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_spirv_subgroups"))); EXPECT_FALSE(hasSubstr(caps.deviceExtensions, std::string("cl_khr_spirv_no_integer_wrap_decoration"))); } TEST_F(DeviceGetCapsTest, givenEnableNV12setToTrueAndSupportImagesWhenCapsAreCreatedThenDeviceReportsNV12Extension) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableNV12.set(true); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); if (device->getHardwareInfo().capabilityTable.supportsImages) { EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_planar_yuv"))); EXPECT_TRUE(caps.nv12Extension); } else { EXPECT_FALSE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_planar_yuv"))); } } TEST_F(DeviceGetCapsTest, givenEnablePackedYuvsetToTrueAndSupportImagesWhenCapsAreCreatedThenDeviceReportsPackedYuvExtension) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnablePackedYuv.set(true); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); if (device->getHardwareInfo().capabilityTable.supportsImages) { EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_packed_yuv"))); EXPECT_TRUE(caps.packedYuvExtension); } else { EXPECT_FALSE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_packed_yuv"))); } } TEST_F(DeviceGetCapsTest, givenSupportImagesWhenCapsAreCreatedThenDeviceReportsPackedYuvAndNV12Extensions) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnablePackedYuv.set(true); DebugManager.flags.EnableNV12.set(true); HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsImages = true; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); const auto &caps = device->getDeviceInfo(); EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_packed_yuv"))); EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_planar_yuv"))); } TEST_F(DeviceGetCapsTest, givenNotSupportImagesWhenCapsAreCreatedThenDeviceNotReportsPackedYuvAndNV12Extensions) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnablePackedYuv.set(true); DebugManager.flags.EnableNV12.set(true); HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsImages = false; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); const auto &caps = device->getDeviceInfo(); EXPECT_FALSE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_packed_yuv"))); EXPECT_FALSE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_planar_yuv"))); } TEST_F(DeviceGetCapsTest, givenEnableNV12setToFalseWhenCapsAreCreatedThenDeviceDoesNotReportNV12Extension) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableNV12.set(false); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); EXPECT_FALSE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_planar_yuv"))); EXPECT_FALSE(caps.nv12Extension); } TEST_F(DeviceGetCapsTest, givenEnablePackedYuvsetToFalseWhenCapsAreCreatedThenDeviceDoesNotReportPackedYuvExtension) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnablePackedYuv.set(false); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); EXPECT_FALSE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_packed_yuv"))); EXPECT_FALSE(caps.packedYuvExtension); } TEST_F(DeviceGetCapsTest, givenEnableVmeSetToTrueAndDeviceSupportsVmeWhenCapsAreCreatedThenDeviceReportsVmeExtensionAndBuiltins) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableIntelVme.set(1); auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsVme = true; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); const auto &caps = device->getDeviceInfo(); EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_motion_estimation"))); EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_device_side_avc_motion_estimation"))); EXPECT_TRUE(caps.vmeExtension); EXPECT_TRUE(hasSubstr(caps.builtInKernels, "block_motion_estimate_intel")); } TEST_F(DeviceGetCapsTest, givenEnableVmeSetToTrueAndDeviceDoesNotSupportVmeWhenCapsAreCreatedThenDeviceReportsVmeExtensionAndBuiltins) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableIntelVme.set(1); auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsVme = false; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); const auto &caps = device->getDeviceInfo(); EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_motion_estimation"))); EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_device_side_avc_motion_estimation"))); EXPECT_TRUE(caps.vmeExtension); EXPECT_TRUE(hasSubstr(caps.builtInKernels, "block_motion_estimate_intel")); } TEST_F(DeviceGetCapsTest, givenEnableVmeSetToFalseAndDeviceDoesNotSupportVmeWhenCapsAreCreatedThenDeviceDoesNotReportVmeExtensionAndBuiltins) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableIntelVme.set(0); auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsVme = false; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); const auto &caps = device->getDeviceInfo(); EXPECT_FALSE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_motion_estimation"))); EXPECT_FALSE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_device_side_avc_motion_estimation"))); EXPECT_FALSE(caps.vmeExtension); EXPECT_FALSE(hasSubstr(caps.builtInKernels, "block_motion_estimate_intel")); } TEST_F(DeviceGetCapsTest, givenEnableVmeSetToFalseAndDeviceSupportsVmeWhenCapsAreCreatedThenDeviceDoesNotReportVmeExtensionAndBuiltins) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableIntelVme.set(0); auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsVme = true; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); const auto &caps = device->getDeviceInfo(); EXPECT_FALSE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_motion_estimation"))); EXPECT_FALSE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_device_side_avc_motion_estimation"))); EXPECT_FALSE(caps.vmeExtension); EXPECT_FALSE(hasSubstr(caps.builtInKernels, "block_motion_estimate_intel")); } TEST_F(DeviceGetCapsTest, givenEnableAdvancedVmeSetToTrueAndDeviceSupportsVmeWhenCapsAreCreatedThenDeviceReportsAdvancedVmeExtensionAndBuiltins) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableIntelAdvancedVme.set(1); auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsVme = true; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); const auto &caps = device->getDeviceInfo(); EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_advanced_motion_estimation"))); EXPECT_TRUE(hasSubstr(caps.builtInKernels, "block_advanced_motion_estimate_check_intel")); EXPECT_TRUE(hasSubstr(caps.builtInKernels, "block_advanced_motion_estimate_bidirectional_check_intel")); } TEST_F(DeviceGetCapsTest, WhenCheckingFp64ThenResultIsConsistentWithHardwareCapabilities) { auto hwInfo = *defaultHwInfo; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); const auto &caps = device->getDeviceInfo(); if (hwInfo.capabilityTable.ftrSupportsInteger64BitAtomics) { EXPECT_TRUE(hasSubstr(caps.deviceExtensions, "cl_khr_int64_base_atomics ")); EXPECT_TRUE(hasSubstr(caps.deviceExtensions, "cl_khr_int64_extended_atomics ")); } else { EXPECT_FALSE(hasSubstr(caps.deviceExtensions, "cl_khr_int64_base_atomics ")); EXPECT_FALSE(hasSubstr(caps.deviceExtensions, "cl_khr_int64_extended_atomics ")); } } TEST_F(DeviceGetCapsTest, givenEnableAdvancedVmeSetToTrueAndDeviceDoesNotSupportVmeWhenCapsAreCreatedThenDeviceReportAdvancedVmeExtensionAndBuiltins) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableIntelAdvancedVme.set(1); auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsVme = false; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); const auto &caps = device->getDeviceInfo(); EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_advanced_motion_estimation"))); EXPECT_TRUE(hasSubstr(caps.builtInKernels, "block_advanced_motion_estimate_check_intel")); EXPECT_TRUE(hasSubstr(caps.builtInKernels, "block_advanced_motion_estimate_bidirectional_check_intel")); } TEST_F(DeviceGetCapsTest, givenEnableAdvancedVmeSetToFalseAndDeviceDoesNotSupportVmeWhenCapsAreCreatedThenDeviceDoesNotReportAdvancedVmeExtensionAndBuiltins) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableIntelAdvancedVme.set(0); auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsVme = false; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); const auto &caps = device->getDeviceInfo(); EXPECT_FALSE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_advanced_motion_estimation"))); EXPECT_FALSE(hasSubstr(caps.builtInKernels, "block_advanced_motion_estimate_check_intel")); EXPECT_FALSE(hasSubstr(caps.builtInKernels, "block_advanced_motion_estimate_bidirectional_check_intel")); } TEST_F(DeviceGetCapsTest, givenEnableAdvancedVmeSetToFalseAndDeviceSupportsVmeWhenCapsAreCreatedThenDeviceDoesNotReportAdvancedVmeExtensionAndBuiltins) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableIntelAdvancedVme.set(0); auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsVme = true; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); const auto &caps = device->getDeviceInfo(); EXPECT_FALSE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_advanced_motion_estimation"))); EXPECT_FALSE(hasSubstr(caps.builtInKernels, "block_advanced_motion_estimate_check_intel")); EXPECT_FALSE(hasSubstr(caps.builtInKernels, "block_advanced_motion_estimate_bidirectional_check_intel")); } TEST_F(DeviceGetCapsTest, WhenDeviceDoesNotSupportOcl21FeaturesThenDeviceEnqueueAndPipeAreNotSupported) { UltClDeviceFactory deviceFactory{1, 0}; if (deviceFactory.rootDevices[0]->areOcl21FeaturesEnabled() == false) { EXPECT_FALSE(deviceFactory.rootDevices[0]->getDeviceInfo().deviceEnqueueSupport); EXPECT_FALSE(deviceFactory.rootDevices[0]->getDeviceInfo().pipeSupport); } } TEST_F(DeviceGetCapsTest, givenVmeRelatedFlagsSetWhenCapsAreCreatedThenDeviceReportCorrectBuiltins) { DebugManagerStateRestore dbgRestorer; for (auto isVmeEnabled : ::testing::Bool()) { DebugManager.flags.EnableIntelVme.set(isVmeEnabled); for (auto isAdvancedVmeEnabled : ::testing::Bool()) { DebugManager.flags.EnableIntelAdvancedVme.set(isAdvancedVmeEnabled); UltClDeviceFactory deviceFactory{1, 0}; const auto &caps = deviceFactory.rootDevices[0]->getDeviceInfo(); EXPECT_FALSE(caps.builtInKernelsWithVersion.usesDynamicMem()); auto builtInKernelWithVersion = caps.builtInKernelsWithVersion.begin(); if (isVmeEnabled) { EXPECT_STREQ("block_motion_estimate_intel", builtInKernelWithVersion->name); EXPECT_EQ(CL_MAKE_VERSION(1u, 0u, 0u), builtInKernelWithVersion->version); builtInKernelWithVersion++; } if (isAdvancedVmeEnabled) { EXPECT_STREQ("block_advanced_motion_estimate_check_intel", builtInKernelWithVersion->name); EXPECT_EQ(CL_MAKE_VERSION(1u, 0u, 0u), builtInKernelWithVersion->version); builtInKernelWithVersion++; EXPECT_STREQ("block_advanced_motion_estimate_bidirectional_check_intel", builtInKernelWithVersion->name); EXPECT_EQ(CL_MAKE_VERSION(1u, 0u, 0u), builtInKernelWithVersion->version); builtInKernelWithVersion++; } EXPECT_EQ(caps.builtInKernelsWithVersion.end(), builtInKernelWithVersion); } } } TEST_F(DeviceGetCapsTest, WhenDeviceIsCreatedThenPriorityHintsExtensionIsReported) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_khr_priority_hints"))); } TEST_F(DeviceGetCapsTest, WhenDeviceIsCreatedThenCreateCommandQueueExtensionIsReported) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_khr_create_command_queue"))); } TEST_F(DeviceGetCapsTest, WhenDeviceIsCreatedThenThrottleHintsExtensionIsReported) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_khr_throttle_hints"))); } TEST_F(DeviceGetCapsTest, GivenAnyDeviceWhenCheckingExtensionsThenSupportSubgroupsChar) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_subgroups_char"))); } TEST_F(DeviceGetCapsTest, GivenAnyDeviceWhenCheckingExtensionsThenSupportSubgroupsLong) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_subgroups_long"))); } TEST_F(DeviceGetCapsTest, GivenAnyDeviceWhenCheckingExtensionsThenSupportForceHostMemory) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_mem_force_host_memory"))); } TEST_F(DeviceGetCapsTest, givenAtleastOCL21DeviceThenExposesMipMapAndUnifiedMemoryExtensions) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceOCLVersion.set(21); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); std::string extensionString = caps.deviceExtensions; if (device->getHardwareInfo().capabilityTable.supportsImages) { EXPECT_TRUE(hasSubstr(extensionString, std::string("cl_khr_mipmap_image"))); EXPECT_TRUE(hasSubstr(extensionString, std::string("cl_khr_mipmap_image_writes"))); } else { EXPECT_EQ(std::string::npos, extensionString.find(std::string("cl_khr_mipmap_image"))); EXPECT_EQ(std::string::npos, extensionString.find(std::string("cl_khr_mipmap_image_writes"))); } EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_unified_shared_memory"))); } TEST_F(DeviceGetCapsTest, givenSupportImagesWhenCapsAreCreatedThenDeviceReportsMinMapExtensions) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceOCLVersion.set(21); HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsImages = true; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); const auto &caps = device->getDeviceInfo(); EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_khr_mipmap_image"))); EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_khr_mipmap_image_writes"))); } TEST_F(DeviceGetCapsTest, givenNotSupportImagesWhenCapsAreCreatedThenDeviceNotReportsMinMapExtensions) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceOCLVersion.set(20); HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsImages = false; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); const auto &caps = device->getDeviceInfo(); EXPECT_FALSE(hasSubstr(caps.deviceExtensions, std::string("cl_khr_mipmap_image"))); EXPECT_FALSE(hasSubstr(caps.deviceExtensions, std::string("cl_khr_mipmap_image_writes"))); } TEST_F(DeviceGetCapsTest, givenOCL12DeviceThenDoesNotExposesMipMapAndUnifiedMemoryExtensions) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceOCLVersion.set(12); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); EXPECT_FALSE(hasSubstr(caps.deviceExtensions, std::string("cl_khr_mipmap_image"))); EXPECT_FALSE(hasSubstr(caps.deviceExtensions, std::string("cl_khr_mipmap_image_writes"))); EXPECT_FALSE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_unified_shared_memory"))); } TEST_F(DeviceGetCapsTest, givenSupportImagesWhenCreateExtentionsListThenDeviceReportsImagesExtensions) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceOCLVersion.set(20); HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsImages = true; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); const auto extensions = device->getDeviceInfo().deviceExtensions; EXPECT_TRUE(hasSubstr(extensions, std::string("cl_khr_image2d_from_buffer"))); EXPECT_TRUE(hasSubstr(extensions, std::string("cl_khr_depth_images"))); } TEST_F(DeviceGetCapsTest, givenNotSupporteImagesWhenCreateExtentionsListThenDeviceNotReportsImagesExtensions) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceOCLVersion.set(20); HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsImages = false; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); const auto extensions = device->getDeviceInfo().deviceExtensions; EXPECT_FALSE(hasSubstr(extensions, std::string("cl_khr_image2d_from_buffer"))); EXPECT_FALSE(hasSubstr(extensions, std::string("cl_khr_depth_images"))); } TEST_F(DeviceGetCapsTest, givenDeviceWhenGettingHostUnifiedMemoryCapThenItDependsOnLocalMemory) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); auto &hwHelper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily); auto localMemoryEnabled = hwHelper.isLocalMemoryEnabled(*defaultHwInfo); EXPECT_EQ((localMemoryEnabled == false), caps.hostUnifiedMemory); } TEST_F(DeviceGetCapsTest, givenDefaultDeviceWhenQueriedForExtensionsWithVersionThenValuesMatchWithExtensionsString) { UltClDeviceFactory deviceFactory{1, 0}; auto pClDevice = deviceFactory.rootDevices[0]; std::string allExtensions; EXPECT_TRUE(pClDevice->getDeviceInfo().extensionsWithVersion.empty()); pClDevice->getDeviceInfo(CL_DEVICE_EXTENSIONS_WITH_VERSION, 0, nullptr, nullptr); EXPECT_FALSE(pClDevice->getDeviceInfo().extensionsWithVersion.empty()); for (auto extensionWithVersion : pClDevice->getDeviceInfo().extensionsWithVersion) { EXPECT_EQ(CL_MAKE_VERSION(1u, 0u, 0u), extensionWithVersion.version); allExtensions += extensionWithVersion.name; allExtensions += " "; } EXPECT_STREQ(pClDevice->deviceExtensions.c_str(), allExtensions.c_str()); } TEST_F(DeviceGetCapsTest, givenFp64SupportForcedWhenCheckingFp64SupportThenFp64IsCorrectlyReported) { DebugManagerStateRestore dbgRestorer; int32_t overrideDefaultFP64SettingsValues[] = {-1, 0, 1}; auto hwInfo = *defaultHwInfo; for (auto isFp64SupportedByHw : ::testing::Bool()) { hwInfo.capabilityTable.ftrSupportsFP64 = isFp64SupportedByHw; hwInfo.capabilityTable.ftrSupports64BitMath = isFp64SupportedByHw; for (auto overrideDefaultFP64Settings : overrideDefaultFP64SettingsValues) { DebugManager.flags.OverrideDefaultFP64Settings.set(overrideDefaultFP64Settings); auto pClDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); auto &caps = pClDevice->getDeviceInfo(); std::string extensionString = pClDevice->getDeviceInfo().deviceExtensions; size_t fp64FeaturesCount = 0; for (auto &openclCFeature : caps.openclCFeatures) { if (0 == strcmp(openclCFeature.name, "__opencl_c_fp64")) { fp64FeaturesCount++; } } bool expectedFp64Support = ((overrideDefaultFP64Settings == -1) ? isFp64SupportedByHw : overrideDefaultFP64Settings); if (expectedFp64Support) { EXPECT_NE(std::string::npos, extensionString.find(std::string("cl_khr_fp64"))); EXPECT_NE(0u, caps.doubleFpConfig); EXPECT_EQ(1u, fp64FeaturesCount); EXPECT_TRUE(isValueSet(caps.singleFpConfig, CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)); } else { EXPECT_EQ(std::string::npos, extensionString.find(std::string("cl_khr_fp64"))); EXPECT_EQ(0u, caps.doubleFpConfig); EXPECT_EQ(0u, fp64FeaturesCount); EXPECT_FALSE(isValueSet(caps.singleFpConfig, CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)); } } } } TEST(DeviceGetCaps, WhenPeekingCompilerExtensionsThenCompilerExtensionsAreReturned) { UltClDeviceFactory deviceFactory{1, 0}; auto pClDevice = deviceFactory.rootDevices[0]; EXPECT_EQ(&pClDevice->compilerExtensions, &pClDevice->peekCompilerExtensions()); } TEST(DeviceGetCaps, WhenCheckingCompilerExtensionsThenValueIsCorrect) { UltClDeviceFactory deviceFactory{1, 0}; auto pClDevice = deviceFactory.rootDevices[0]; OpenClCFeaturesContainer emptyOpenClCFeatures; auto expectedCompilerExtensions = convertEnabledExtensionsToCompilerInternalOptions(pClDevice->deviceInfo.deviceExtensions, emptyOpenClCFeatures); EXPECT_STREQ(expectedCompilerExtensions.c_str(), pClDevice->compilerExtensions.c_str()); } TEST(DeviceGetCaps, WhenPeekingCompilerExtensionsWithFeaturesThenCompilerExtensionsWithFeaturesAreReturned) { UltClDeviceFactory deviceFactory{1, 0}; auto pClDevice = deviceFactory.rootDevices[0]; EXPECT_EQ(&pClDevice->compilerExtensionsWithFeatures, &pClDevice->peekCompilerExtensionsWithFeatures()); } TEST(DeviceGetCaps, WhenCheckingCompilerExtensionsWithFeaturesThenValueIsCorrect) { UltClDeviceFactory deviceFactory{1, 0}; auto pClDevice = deviceFactory.rootDevices[0]; auto expectedCompilerExtensions = convertEnabledExtensionsToCompilerInternalOptions(pClDevice->deviceInfo.deviceExtensions, pClDevice->deviceInfo.openclCFeatures); EXPECT_STREQ(expectedCompilerExtensions.c_str(), pClDevice->compilerExtensionsWithFeatures.c_str()); } TEST(DeviceGetCaps, WhenComparingCompilerExtensionsAndCompilerExtensionsWithFeaturesThenValuesMatch) { UltClDeviceFactory deviceFactory{1, 0}; auto pClDevice = deviceFactory.rootDevices[0]; auto compilerExtensions = pClDevice->compilerExtensions; auto compilerExtensionsWithFeatures = pClDevice->compilerExtensionsWithFeatures; compilerExtensions.erase(compilerExtensions.size() - 1); EXPECT_STREQ(compilerExtensions.c_str(), compilerExtensionsWithFeatures.substr(0, compilerExtensions.size()).c_str()); } HWTEST_F(DeviceGetCapsTest, givenDisabledFtrPooledEuWhenCalculatingMaxEuPerSSThenIgnoreEuCountPerPoolMin) { HardwareInfo myHwInfo = *defaultHwInfo; GT_SYSTEM_INFO &mySysInfo = myHwInfo.gtSystemInfo; FeatureTable &mySkuTable = myHwInfo.featureTable; mySysInfo.EUCount = 20; mySysInfo.EuCountPerPoolMin = 99999; mySkuTable.flags.ftrPooledEuEnabled = 0; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&myHwInfo)); auto &deviceInfo = device->deviceInfo; auto simdSizeUsed = HwHelperHw::get().getMinimalSIMDSize(); auto hwInfoConfig = HwInfoConfig::get(myHwInfo.platform.eProductFamily); auto expectedMaxWGS = hwInfoConfig->getMaxThreadsForWorkgroupInDSSOrSS(myHwInfo, static_cast(deviceInfo.maxNumEUsPerSubSlice), static_cast(deviceInfo.maxNumEUsPerDualSubSlice)) * simdSizeUsed; expectedMaxWGS = std::min(Math::prevPowerOfTwo(expectedMaxWGS), 1024u); EXPECT_EQ(expectedMaxWGS, device->getDeviceInfo().maxWorkGroupSize); } HWTEST_F(DeviceGetCapsTest, givenEnabledFtrPooledEuWhenCalculatingMaxEuPerSSThenDontIgnoreEuCountPerPoolMin) { HardwareInfo myHwInfo = *defaultHwInfo; GT_SYSTEM_INFO &mySysInfo = myHwInfo.gtSystemInfo; FeatureTable &mySkuTable = myHwInfo.featureTable; mySysInfo.EUCount = 20; mySysInfo.EuCountPerPoolMin = 99999; mySkuTable.flags.ftrPooledEuEnabled = 1; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&myHwInfo)); auto expectedMaxWGS = mySysInfo.EuCountPerPoolMin * (mySysInfo.ThreadCount / mySysInfo.EUCount) * 8; expectedMaxWGS = std::min(Math::prevPowerOfTwo(expectedMaxWGS), 1024u); EXPECT_EQ(expectedMaxWGS, device->getDeviceInfo().maxWorkGroupSize); } TEST(DeviceGetCaps, givenDebugFlagToUseMaxSimdSizeForWkgCalculationWhenDeviceCapsAreCreatedThen1024WorkgroupSizeIsReturned) { REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); DebugManagerStateRestore dbgRestorer; DebugManager.flags.UseMaxSimdSizeToDeduceMaxWorkgroupSize.set(true); HardwareInfo myHwInfo = *defaultHwInfo; GT_SYSTEM_INFO &mySysInfo = myHwInfo.gtSystemInfo; mySysInfo.EUCount = 24; mySysInfo.SubSliceCount = 3; mySysInfo.ThreadCount = 24 * 7; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&myHwInfo)); EXPECT_EQ(1024u, device->getSharedDeviceInfo().maxWorkGroupSize); EXPECT_EQ(device->getSharedDeviceInfo().maxWorkGroupSize / CommonConstants::maximalSimdSize, device->getDeviceInfo().maxNumOfSubGroups); } HWTEST_F(DeviceGetCapsTest, givenDeviceThatHasHighNumberOfExecutionUnitsWhenMaxWorkgroupSizeIsComputedThenItIsLimitedTo1024) { REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); HardwareInfo myHwInfo = *defaultHwInfo; GT_SYSTEM_INFO &mySysInfo = myHwInfo.gtSystemInfo; auto &hwHelper = HwHelper::get(myHwInfo.platform.eRenderCoreFamily); mySysInfo.EUCount = 32; mySysInfo.SubSliceCount = 2; mySysInfo.ThreadCount = 32 * hwHelper.getMinimalSIMDSize(); // 128 threads per subslice, in simd 8 gives 1024 auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&myHwInfo)); EXPECT_EQ(1024u, device->getSharedDeviceInfo().maxWorkGroupSize); EXPECT_EQ(device->getSharedDeviceInfo().maxWorkGroupSize / hwHelper.getMinimalSIMDSize(), device->getDeviceInfo().maxNumOfSubGroups); } TEST_F(DeviceGetCapsTest, givenSystemWithDriverInfoWhenGettingNameAndVersionThenReturnValuesFromDriverInfo) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const std::string testDeviceName = "testDeviceName"; const std::string testVersion = "testVersion"; DriverInfoMock *driverInfoMock = new DriverInfoMock(); driverInfoMock->setDeviceName(testDeviceName); driverInfoMock->setVersion(testVersion); device->driverInfo.reset(driverInfoMock); device->initializeCaps(); const auto &caps = device->getDeviceInfo(); EXPECT_STREQ(testDeviceName.c_str(), caps.name); EXPECT_STREQ(testVersion.c_str(), caps.driverVersion); } TEST_F(DeviceGetCapsTest, givenNoPciBusInfoThenPciBusInfoExtensionNotAvailable) { const PhysicalDevicePciBusInfo pciBusInfo(PhysicalDevicePciBusInfo::InvalidValue, PhysicalDevicePciBusInfo::InvalidValue, PhysicalDevicePciBusInfo::InvalidValue, PhysicalDevicePciBusInfo::InvalidValue); DriverInfoMock *driverInfoMock = new DriverInfoMock(); driverInfoMock->setPciBusInfo(pciBusInfo); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); device->driverInfo.reset(driverInfoMock); device->initializeCaps(); const auto &caps = device->getDeviceInfo(); EXPECT_FALSE(hasSubstr(caps.deviceExtensions, "cl_khr_pci_bus_info")); } TEST_F(DeviceGetCapsTest, givenPciBusInfoThenPciBusInfoExtensionAvailable) { const PhysicalDevicePciBusInfo pciBusInfo(1, 2, 3, 4); DriverInfoMock *driverInfoMock = new DriverInfoMock(); driverInfoMock->setPciBusInfo(pciBusInfo); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); device->driverInfo.reset(driverInfoMock); device->initializeCaps(); const auto &caps = device->getDeviceInfo(); EXPECT_TRUE(hasSubstr(caps.deviceExtensions, "cl_khr_pci_bus_info")); EXPECT_EQ(caps.pciBusInfo.pci_domain, pciBusInfo.pciDomain); EXPECT_EQ(caps.pciBusInfo.pci_bus, pciBusInfo.pciBus); EXPECT_EQ(caps.pciBusInfo.pci_device, pciBusInfo.pciDevice); EXPECT_EQ(caps.pciBusInfo.pci_function, pciBusInfo.pciFunction); } static bool getPlanarYuvHeightCalled = false; template class MyMockHwHelper : public HwHelperHw { public: uint32_t getPlanarYuvMaxHeight() const override { getPlanarYuvHeightCalled = true; return dummyPlanarYuvValue; } uint32_t dummyPlanarYuvValue = 0x12345; }; HWTEST_F(DeviceGetCapsTest, givenDeviceWhenInitializingCapsThenPlanarYuvHeightIsTakenFromHelper) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); RAIIHwHelperFactory> hwHelperBackup{device->getHardwareInfo().platform.eRenderCoreFamily}; DriverInfoMock *driverInfoMock = new DriverInfoMock(); device->driverInfo.reset(driverInfoMock); device->initializeCaps(); EXPECT_TRUE(getPlanarYuvHeightCalled); getPlanarYuvHeightCalled = false; const auto &caps = device->getDeviceInfo(); EXPECT_EQ(hwHelperBackup.mockHwHelper.dummyPlanarYuvValue, caps.planarYuvMaxHeight); } TEST_F(DeviceGetCapsTest, givenSystemWithNoDriverInfoWhenGettingNameAndVersionThenReturnDefaultValues) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); device->driverInfo.reset(); device->name.clear(); device->initializeCaps(); const auto &caps = device->getDeviceInfo(); std::string tempName = device->getClDeviceName(*defaultHwInfo.get()); #define QTR(a) #a #define TOSTR(b) QTR(b) const std::string expectedVersion = TOSTR(NEO_OCL_DRIVER_VERSION); #undef QTR #undef TOSTR EXPECT_STREQ(tempName.c_str(), caps.name); EXPECT_STREQ(expectedVersion.c_str(), caps.driverVersion); } TEST_F(DeviceGetCapsTest, givenFlagEnabled64kbPagesWhenCallConstructorOsAgnosticMemoryManagerThenReturnCorrectValue) { DebugManagerStateRestore dbgRestore; MockExecutionEnvironment executionEnvironment; executionEnvironment.prepareRootDeviceEnvironments(1); auto &capabilityTable = executionEnvironment.rootDeviceEnvironments[0]->getMutableHardwareInfo()->capabilityTable; std::unique_ptr memoryManager; DebugManager.flags.Enable64kbpages.set(-1); capabilityTable.ftr64KBpages = false; memoryManager.reset(new OsAgnosticMemoryManager(executionEnvironment)); EXPECT_FALSE(memoryManager->peek64kbPagesEnabled(0u)); capabilityTable.ftr64KBpages = true; memoryManager.reset(new OsAgnosticMemoryManager(executionEnvironment)); EXPECT_TRUE(memoryManager->peek64kbPagesEnabled(0u)); DebugManager.flags.Enable64kbpages.set(0); // force false memoryManager.reset(new OsAgnosticMemoryManager(executionEnvironment)); EXPECT_FALSE(memoryManager->peek64kbPagesEnabled(0u)); DebugManager.flags.Enable64kbpages.set(1); // force true memoryManager.reset(new OsAgnosticMemoryManager(executionEnvironment)); EXPECT_TRUE(memoryManager->peek64kbPagesEnabled(0u)); } TEST_F(DeviceGetCapsTest, whenDeviceIsCreatedThenMaxParameterSizeIsSetCorrectly) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getSharedDeviceInfo(); EXPECT_EQ(2048u, caps.maxParameterSize); } TEST_F(DeviceGetCapsTest, givenUnifiedMemorySharedSystemFlagWhenDeviceIsCreatedThenSystemMemoryIsSetCorrectly) { DebugManagerStateRestore restorer; DebugManager.flags.EnableSharedSystemUsmSupport.set(0u); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); EXPECT_EQ(0u, device->getDeviceInfo().sharedSystemMemCapabilities); EXPECT_FALSE(device->areSharedSystemAllocationsAllowed()); DebugManager.flags.EnableSharedSystemUsmSupport.set(1u); device.reset(new MockClDevice{MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())}); cl_unified_shared_memory_capabilities_intel expectedProperties = CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL; EXPECT_EQ(expectedProperties, device->getDeviceInfo().sharedSystemMemCapabilities); EXPECT_TRUE(device->areSharedSystemAllocationsAllowed()); } TEST_F(DeviceGetCapsTest, givenDeviceWithNullSourceLevelDebuggerWhenCapsAreInitializedThenSourceLevelDebuggerActiveIsSetToFalse) { std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); EXPECT_EQ(nullptr, device->getDebugger()); EXPECT_FALSE(caps.debuggerActive); } TEST_F(DeviceGetCapsTest, givenOcl21DeviceWhenCheckingPipesSupportThenPipesAreSupported) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); if (device->getEnabledClVersion() == 21) { EXPECT_EQ(1u, device->getHardwareInfo().capabilityTable.supportsPipes); } } TEST_F(DeviceGetCapsTest, givenCapsDeviceEnqueueWhenCheckingDeviceEnqueueSupportThenNoSupportReported) { auto hwInfo = *defaultHwInfo; for (auto isDeviceEnqueueSupportedByHw : ::testing::Bool()) { hwInfo.capabilityTable.supportsDeviceEnqueue = isDeviceEnqueueSupportedByHw; auto pClDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); auto &caps = pClDevice->getDeviceInfo(); size_t deviceEnqueueFeaturesCount = 0; for (auto &openclCFeature : caps.openclCFeatures) { if (0 == strcmp(openclCFeature.name, "__opencl_c_device_enqueue")) { deviceEnqueueFeaturesCount++; } } EXPECT_EQ(0u, caps.maxOnDeviceEvents); EXPECT_EQ(0u, caps.maxOnDeviceQueues); EXPECT_EQ(0u, caps.queueOnDeviceMaxSize); EXPECT_EQ(0u, caps.queueOnDevicePreferredSize); EXPECT_EQ(static_cast(0), caps.queueOnDeviceProperties); EXPECT_EQ(0u, deviceEnqueueFeaturesCount); } } TEST_F(DeviceGetCapsTest, givenPipeSupportForcedWhenCheckingPipeSupportThenPipeIsCorrectlyReported) { DebugManagerStateRestore dbgRestorer; int32_t forcePipeSupportValues[] = {-1, 0, 1}; auto hwInfo = *defaultHwInfo; for (auto isPipeSupportedByHw : ::testing::Bool()) { hwInfo.capabilityTable.supportsPipes = isPipeSupportedByHw; for (auto forcePipeSupport : forcePipeSupportValues) { DebugManager.flags.ForcePipeSupport.set(forcePipeSupport); auto pClDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); auto &caps = pClDevice->getDeviceInfo(); size_t pipeFeaturesCount = 0; for (auto &openclCFeature : caps.openclCFeatures) { if (0 == strcmp(openclCFeature.name, "__opencl_c_pipes")) { pipeFeaturesCount++; } } bool expectedPipeSupport = ((forcePipeSupport == -1) ? isPipeSupportedByHw : forcePipeSupport); if (expectedPipeSupport) { EXPECT_TRUE(pClDevice->arePipesSupported()); EXPECT_EQ(16u, caps.maxPipeArgs); EXPECT_EQ(1024u, caps.pipeMaxPacketSize); EXPECT_EQ(1u, caps.pipeMaxActiveReservations); EXPECT_EQ(1u, pipeFeaturesCount); } else { EXPECT_FALSE(pClDevice->arePipesSupported()); EXPECT_EQ(0u, caps.maxPipeArgs); EXPECT_EQ(0u, caps.pipeMaxPacketSize); EXPECT_EQ(0u, caps.pipeMaxActiveReservations); EXPECT_EQ(0u, pipeFeaturesCount); } } } } TEST(Device_UseCaps, givenCapabilityTableWhenDeviceInitializeCapsThenVmeVersionsAreSetProperly) { HardwareInfo hwInfo = *defaultHwInfo; cl_uint expectedVmeVersion = CL_ME_VERSION_ADVANCED_VER_2_INTEL; cl_uint expectedVmeAvcVersion = CL_AVC_ME_VERSION_1_INTEL; hwInfo.capabilityTable.supportsVme = 0; hwInfo.capabilityTable.ftrSupportsVmeAvcTextureSampler = 0; hwInfo.capabilityTable.ftrSupportsVmeAvcPreemption = 0; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); { auto &caps = device->getDeviceInfo(); auto &sharedCaps = device->getSharedDeviceInfo(); EXPECT_EQ(0u, caps.vmeVersion); EXPECT_EQ(0u, caps.vmeAvcVersion); EXPECT_EQ(hwInfo.capabilityTable.ftrSupportsVmeAvcPreemption, sharedCaps.vmeAvcSupportsPreemption); EXPECT_EQ(hwInfo.capabilityTable.ftrSupportsVmeAvcTextureSampler, caps.vmeAvcSupportsTextureSampler); } hwInfo.capabilityTable.supportsVme = 1; hwInfo.capabilityTable.ftrSupportsVmeAvcTextureSampler = 1; hwInfo.capabilityTable.ftrSupportsVmeAvcPreemption = 1; device.reset(new MockClDevice{MockDevice::createWithNewExecutionEnvironment(&hwInfo)}); { auto &caps = device->getDeviceInfo(); auto &sharedCaps = device->getSharedDeviceInfo(); EXPECT_EQ(expectedVmeVersion, caps.vmeVersion); EXPECT_EQ(expectedVmeAvcVersion, caps.vmeAvcVersion); EXPECT_EQ(hwInfo.capabilityTable.ftrSupportsVmeAvcPreemption, sharedCaps.vmeAvcSupportsPreemption); EXPECT_EQ(hwInfo.capabilityTable.ftrSupportsVmeAvcTextureSampler, caps.vmeAvcSupportsTextureSampler); } } TEST(Device_UseCaps, givenOverrideSlmSizeWhenWhenInitializeDeviceThenSlmSizeInDeviceInfoIsCorrect) { DebugManagerStateRestore restorer; HardwareInfo hardwareInfo = *defaultHwInfo; uint32_t defaultSlmSize = hardwareInfo.capabilityTable.slmSize; DebugManager.flags.OverrideSlmSize.set(-1); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hardwareInfo)); auto &deviceInfoWithoutForceSlmFlag = device->getSharedDeviceInfo(); EXPECT_EQ(defaultSlmSize, static_cast(deviceInfoWithoutForceSlmFlag.localMemSize / KB)); uint32_t newSlmSize = 1; EXPECT_NE(defaultSlmSize, newSlmSize); DebugManager.flags.OverrideSlmSize.set(newSlmSize); device.reset(new MockClDevice{MockDevice::createWithNewExecutionEnvironment(&hardwareInfo)}); auto &deviceInfoWithForceSlmFlag = device->getSharedDeviceInfo(); EXPECT_EQ(newSlmSize, static_cast(deviceInfoWithForceSlmFlag.localMemSize / KB)); } typedef HwHelperTest DeviceCapsWithModifiedHwInfoTest; TEST_F(DeviceCapsWithModifiedHwInfoTest, givenPlatformWithSourceLevelDebuggerNotSupportedWhenDeviceIsCreatedThenSourceLevelDebuggerActiveIsSetToFalse) { hardwareInfo.capabilityTable.debuggerSupported = false; std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(&hardwareInfo)); const auto &caps = device->getDeviceInfo(); EXPECT_EQ(nullptr, device->getDebugger()); EXPECT_FALSE(caps.debuggerActive); } TEST_F(DeviceGetCapsTest, givenClDeviceWhenInitializingCapsThenUseGetQueueFamilyCapabilitiesMethod) { struct ClDeviceWithCustomQueueCaps : MockClDevice { using MockClDevice::MockClDevice; cl_command_queue_capabilities_intel queueCaps{}; cl_command_queue_capabilities_intel getQueueFamilyCapabilities(EngineGroupType type) override { return queueCaps; } }; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); device->deviceInfo = {}; device->queueCaps = CL_QUEUE_CAPABILITY_FILL_IMAGE_INTEL; device->initializeCaps(); EXPECT_EQ(device->queueCaps, device->getDeviceInfo().queueFamilyProperties[0].capabilities); device->deviceInfo = {}; device->queueCaps = CL_QUEUE_CAPABILITY_MAP_BUFFER_INTEL | CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL; device->initializeCaps(); EXPECT_EQ(device->queueCaps, device->getDeviceInfo().queueFamilyProperties[0].capabilities); } HWTEST_F(QueueFamilyNameTest, givenCcsWhenGettingQueueFamilyNameThenReturnProperValue) { verify(EngineGroupType::Compute, "ccs"); } HWTEST_F(QueueFamilyNameTest, givenRcsWhenGettingQueueFamilyNameThenReturnProperValue) { verify(EngineGroupType::RenderCompute, "rcs"); } HWTEST_F(QueueFamilyNameTest, givenBcsWhenGettingQueueFamilyNameThenReturnProperValue) { verify(EngineGroupType::Copy, "bcs"); } HWTEST_F(QueueFamilyNameTest, givenInvalidEngineGroupWhenGettingQueueFamilyNameThenReturnEmptyName) { verify(EngineGroupType::MaxEngineGroups, ""); } HWTEST_F(QueueFamilyNameTest, givenTooBigQueueFamilyNameWhenGettingQueueFamilyNameThenExceptionIsThrown) { struct MockClHwHelper : NEO::ClHwHelperHw { bool getQueueFamilyName(std::string &name, EngineGroupType type) const override { name = familyNameOverride; return true; } std::string familyNameOverride = ""; }; MockClHwHelper clHwHelper{}; VariableBackup clHwHelperFactoryBackup{ &NEO::clHwHelperFactory[static_cast(defaultHwInfo->platform.eRenderCoreFamily)]}; clHwHelperFactoryBackup = &clHwHelper; char name[CL_QUEUE_FAMILY_MAX_NAME_SIZE_INTEL] = ""; clHwHelper.familyNameOverride = std::string(CL_QUEUE_FAMILY_MAX_NAME_SIZE_INTEL - 1, 'a'); device->getQueueFamilyName(name, EngineGroupType::MaxEngineGroups); EXPECT_EQ(0, std::strcmp(name, clHwHelper.familyNameOverride.c_str())); clHwHelper.familyNameOverride = std::string(CL_QUEUE_FAMILY_MAX_NAME_SIZE_INTEL, 'a'); EXPECT_ANY_THROW(device->getQueueFamilyName(name, EngineGroupType::MaxEngineGroups)); } HWCMDTEST_F(IGFX_GEN8_CORE, DeviceGetCapsTest, givenSysInfoWhenDeviceCreatedThenMaxWorkGroupCalculatedCorrectly) { HardwareInfo myHwInfo = *defaultHwInfo; GT_SYSTEM_INFO &mySysInfo = myHwInfo.gtSystemInfo; PLATFORM &myPlatform = myHwInfo.platform; mySysInfo.EUCount = 16; mySysInfo.SubSliceCount = 4; mySysInfo.DualSubSliceCount = 2; mySysInfo.ThreadCount = 16 * 8; myPlatform.usRevId = 0x4; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&myHwInfo)); auto minSimd = 8; auto expectedWG = (mySysInfo.ThreadCount / mySysInfo.EUCount) * (mySysInfo.EUCount / mySysInfo.SubSliceCount) * minSimd; EXPECT_EQ(expectedWG, device->sharedDeviceInfo.maxWorkGroupSize); } HWTEST_F(DeviceGetCapsTest, givenDSSDifferentThanZeroWhenDeviceCreatedThenDualSubSliceCountIsDifferentThanSubSliceCount) { HardwareInfo myHwInfo = *defaultHwInfo; GT_SYSTEM_INFO &mySysInfo = myHwInfo.gtSystemInfo; PLATFORM &myPlatform = myHwInfo.platform; mySysInfo.EUCount = 16; mySysInfo.SubSliceCount = 4; mySysInfo.DualSubSliceCount = 2; mySysInfo.ThreadCount = 16 * 8; myPlatform.usRevId = 0x4; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&myHwInfo)); EXPECT_NE(device->sharedDeviceInfo.maxNumEUsPerSubSlice, device->sharedDeviceInfo.maxNumEUsPerDualSubSlice); } HWTEST_F(DeviceGetCapsTest, givenDSSCountEqualZeroWhenDeviceCreatedThenMaxEuPerDSSEqualMaxEuPerSS) { HardwareInfo myHwInfo = *defaultHwInfo; GT_SYSTEM_INFO &mySysInfo = myHwInfo.gtSystemInfo; PLATFORM &myPlatform = myHwInfo.platform; mySysInfo.EUCount = 16; mySysInfo.SubSliceCount = 4; mySysInfo.DualSubSliceCount = 0; mySysInfo.ThreadCount = 16 * 8; myPlatform.usRevId = 0x4; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&myHwInfo)); EXPECT_EQ(device->sharedDeviceInfo.maxNumEUsPerSubSlice, device->sharedDeviceInfo.maxNumEUsPerDualSubSlice); } compute-runtime-22.14.22890/opencl/test/unit_test/device/device_tests.cpp000066400000000000000000001223521422164147700262600ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/tbx_command_stream_receiver.h" #include "shared/source/device/device.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/indirect_heap/indirect_heap.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/engine_descriptor_helper.h" #include "shared/test/common/helpers/ult_hw_config.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/mocks/mock_driver_info.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/mocks/mock_os_context.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/common/test_macros/test_checks_shared.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/helpers/raii_hw_helper.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include using namespace NEO; typedef Test DeviceTest; TEST_F(DeviceTest, givenDeviceWhenGetProductAbbrevThenReturnsHardwarePrefix) { const auto productAbbrev = pDevice->getProductAbbrev(); const auto hwPrefix = hardwarePrefix[pDevice->getHardwareInfo().platform.eProductFamily]; EXPECT_EQ(hwPrefix, productAbbrev); } TEST_F(DeviceTest, WhenDeviceIsCreatedThenCommandStreamReceiverIsNotNull) { EXPECT_NE(nullptr, &pDevice->getGpgpuCommandStreamReceiver()); } TEST_F(DeviceTest, WhenDeviceIsCreatedThenEnabledClVersionMatchesHardwareInfo) { auto version = pClDevice->getEnabledClVersion(); auto version2 = pDevice->getHardwareInfo().capabilityTable.clVersionSupport; EXPECT_EQ(version, version2); } TEST_F(DeviceTest, givenDeviceWhenEngineIsCreatedThenSetInitialValueForTag) { for (auto &engine : pDevice->allEngines) { auto tagAddress = engine.commandStreamReceiver->getTagAddress(); ASSERT_NE(nullptr, const_cast(tagAddress)); EXPECT_EQ(initialHardwareTag, *tagAddress); } } TEST_F(DeviceTest, givenDeviceWhenAskedForSpecificEngineThenReturnIt) { auto hwInfo = *defaultHwInfo; hwInfo.featureTable.flags.ftrCCSNode = true; hwInfo.capabilityTable.blitterOperationsSupported = true; MockClDevice mockClDevice{MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)}; auto &engines = HwHelper::get(hwInfo.platform.eRenderCoreFamily).getGpgpuEngineInstances(hwInfo); for (uint32_t i = 0; i < engines.size(); i++) { auto &deviceEngine = mockClDevice.getEngine(engines[i].first, EngineUsage::Regular); EXPECT_EQ(deviceEngine.osContext->getEngineType(), engines[i].first); EXPECT_EQ(deviceEngine.osContext->isLowPriority(), false); } auto &deviceEngine = mockClDevice.getEngine(hwInfo.capabilityTable.defaultEngineType, EngineUsage::LowPriority); EXPECT_EQ(deviceEngine.osContext->getEngineType(), hwInfo.capabilityTable.defaultEngineType); EXPECT_EQ(deviceEngine.osContext->isLowPriority(), true); EXPECT_THROW(mockClDevice.getEngine(aub_stream::ENGINE_VCS, EngineUsage::Regular), std::exception); } TEST_F(DeviceTest, givenDebugVariableToAlwaysChooseEngineZeroWhenNotExistingEngineSelectedThenIndexZeroEngineIsReturned) { DebugManagerStateRestore restore; DebugManager.flags.OverrideInvalidEngineWithDefault.set(true); auto &engines = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo); auto &deviceEngine = pDevice->getEngine(engines[0].first, EngineUsage::Regular); auto ¬ExistingEngine = pDevice->getEngine(aub_stream::ENGINE_VCS, EngineUsage::Regular); EXPECT_EQ(¬ExistingEngine, &deviceEngine); } TEST_F(DeviceTest, WhenDeviceIsCreatedThenOsTimeIsNotNull) { auto pDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); OSTime *osTime = pDevice->getOSTime(); ASSERT_NE(nullptr, osTime); } TEST_F(DeviceTest, GivenDebugVariableForcing32BitAllocationsWhenDeviceIsCreatedThenMemoryManagerHasForce32BitFlagSet) { DebugManager.flags.Force32bitAddressing.set(true); auto pDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); if constexpr (is64bit) { EXPECT_TRUE(pDevice->getDeviceInfo().force32BitAddressess); EXPECT_TRUE(pDevice->getMemoryManager()->peekForce32BitAllocations()); } else { EXPECT_FALSE(pDevice->getDeviceInfo().force32BitAddressess); EXPECT_FALSE(pDevice->getMemoryManager()->peekForce32BitAllocations()); } DebugManager.flags.Force32bitAddressing.set(false); } TEST_F(DeviceTest, WhenRetainingThenReferenceIsOneAndApiIsUsed) { ASSERT_NE(nullptr, pClDevice); pClDevice->retainApi(); pClDevice->retainApi(); pClDevice->retainApi(); ASSERT_EQ(1, pClDevice->getReference()); ASSERT_FALSE(pClDevice->releaseApi().isUnused()); ASSERT_EQ(1, pClDevice->getReference()); } TEST_F(DeviceTest, givenNoPciBusInfoThenIsPciBusInfoValidReturnsFalse) { PhysicalDevicePciBusInfo invalidPciBusInfoList[] = { PhysicalDevicePciBusInfo(0, 1, 2, PhysicalDevicePciBusInfo::InvalidValue), PhysicalDevicePciBusInfo(0, 1, PhysicalDevicePciBusInfo::InvalidValue, 3), PhysicalDevicePciBusInfo(0, PhysicalDevicePciBusInfo::InvalidValue, 2, 3), PhysicalDevicePciBusInfo(PhysicalDevicePciBusInfo::InvalidValue, 1, 2, 3)}; for (auto pciBusInfo : invalidPciBusInfoList) { auto driverInfo = new DriverInfoMock(); driverInfo->setPciBusInfo(pciBusInfo); pClDevice->driverInfo.reset(driverInfo); pClDevice->initializeCaps(); EXPECT_FALSE(pClDevice->isPciBusInfoValid()); } } TEST_F(DeviceTest, givenPciBusInfoThenIsPciBusInfoValidReturnsTrue) { PhysicalDevicePciBusInfo pciBusInfo(0, 1, 2, 3); auto driverInfo = new DriverInfoMock(); driverInfo->setPciBusInfo(pciBusInfo); pClDevice->driverInfo.reset(driverInfo); pClDevice->initializeCaps(); EXPECT_TRUE(pClDevice->isPciBusInfoValid()); } HWTEST_F(DeviceTest, WhenDeviceIsCreatedThenActualEngineTypeIsSameAsDefault) { HardwareInfo hwInfo = *defaultHwInfo; if (hwInfo.capabilityTable.defaultEngineType == aub_stream::EngineType::ENGINE_CCS) { hwInfo.featureTable.flags.ftrCCSNode = true; } auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); auto actualEngineType = device->getDefaultEngine().osContext->getEngineType(); auto defaultEngineType = device->getHardwareInfo().capabilityTable.defaultEngineType; EXPECT_EQ(&device->getDefaultEngine().commandStreamReceiver->getOsContext(), device->getDefaultEngine().osContext); EXPECT_EQ(defaultEngineType, actualEngineType); int defaultCounter = 0; const auto &engines = device->getAllEngines(); for (const auto &engine : engines) { if (engine.osContext->isDefaultContext()) { defaultCounter++; } } EXPECT_EQ(defaultCounter, 1); } HWTEST_F(DeviceTest, givenNoHwCsrTypeAndModifiedDefaultEngineIndexWhenIsSimulationIsCalledThenTrueIsReturned) { EXPECT_FALSE(pDevice->isSimulation()); auto csr = TbxCommandStreamReceiver::create("", false, *pDevice->executionEnvironment, 0, 1); pDevice->defaultEngineIndex = 1; pDevice->resetCommandStreamReceiver(csr); EXPECT_TRUE(pDevice->isSimulation()); std::array exptectedEngineTypes = {CommandStreamReceiverType::CSR_HW, CommandStreamReceiverType::CSR_TBX, CommandStreamReceiverType::CSR_HW}; for (uint32_t i = 0u; i < 3u; ++i) { auto engineType = pDevice->allEngines[i].commandStreamReceiver->getType(); EXPECT_EQ(exptectedEngineTypes[i], engineType); } } TEST_F(DeviceTest, givenRootDeviceWithSubDevicesWhenCreatingThenRootDeviceContextIsInitialized) { DebugManagerStateRestore restore{}; DebugManager.flags.DeferOsContextInitialization.set(1); UltDeviceFactory factory(1, 2); MockDevice &device = *factory.rootDevices[0]; EXPECT_TRUE(device.getDefaultEngine().osContext->isInitialized()); } HWTEST_F(DeviceTest, givenDeviceWithoutSubDevicesWhenCreatingContextsThenMemoryManagerDefaultContextIsSetCorrectly) { UltDeviceFactory factory(1, 1); MockDevice &device = *factory.rootDevices[0]; auto rootDeviceIndex = device.getRootDeviceIndex(); MockMemoryManager *memoryManager = static_cast(device.getMemoryManager()); OsContext *defaultOsContextMemoryManager = memoryManager->registeredEngines[memoryManager->defaultEngineIndex[rootDeviceIndex]].osContext; OsContext *defaultOsContextRootDevice = device.getDefaultEngine().osContext; EXPECT_EQ(defaultOsContextRootDevice, defaultOsContextMemoryManager); } HWTEST_F(DeviceTest, givenDeviceWithSubDevicesWhenCreatingContextsThenMemoryManagerDefaultContextIsSetCorrectly) { UltDeviceFactory factory(1, 2); MockDevice &device = *factory.rootDevices[0]; auto rootDeviceIndex = device.getRootDeviceIndex(); MockMemoryManager *memoryManager = static_cast(device.getMemoryManager()); OsContext *defaultOsContextMemoryManager = memoryManager->registeredEngines[memoryManager->defaultEngineIndex[rootDeviceIndex]].osContext; OsContext *defaultOsContextRootDevice = device.getDefaultEngine().osContext; EXPECT_EQ(defaultOsContextRootDevice, defaultOsContextMemoryManager); } HWTEST_F(DeviceTest, givenMultiDeviceWhenCreatingContextsThenMemoryManagerDefaultContextIsSetCorrectly) { UltDeviceFactory factory(3, 2); MockDevice &device = *factory.rootDevices[2]; MockMemoryManager *memoryManager = static_cast(device.getMemoryManager()); for (auto &pRootDevice : factory.rootDevices) { OsContext *defaultOsContextMemoryManager = memoryManager->registeredEngines[memoryManager->defaultEngineIndex[pRootDevice->getRootDeviceIndex()]].osContext; OsContext *defaultOsContextRootDevice = pRootDevice->getDefaultEngine().osContext; EXPECT_EQ(defaultOsContextRootDevice, defaultOsContextMemoryManager); } } TEST(DeviceCleanup, givenDeviceWhenItIsDestroyedThenFlushBatchedSubmissionsIsCalled) { auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockCommandStreamReceiver *csr = new MockCommandStreamReceiver(*mockDevice->getExecutionEnvironment(), mockDevice->getRootDeviceIndex(), mockDevice->getDeviceBitfield()); mockDevice->resetCommandStreamReceiver(csr); int flushedBatchedSubmissionsCalledCount = 0; csr->flushBatchedSubmissionsCallCounter = &flushedBatchedSubmissionsCalledCount; mockDevice.reset(nullptr); EXPECT_EQ(1, flushedBatchedSubmissionsCalledCount); } TEST(DeviceCreation, givenSelectedAubCsrInDebugVarsWhenDeviceIsCreatedThenIsSimulationReturnsTrue) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.SetCommandStreamReceiver.set(CommandStreamReceiverType::CSR_AUB); VariableBackup backup(&ultHwConfig); ultHwConfig.useHwCsr = true; auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); EXPECT_TRUE(mockDevice->isSimulation()); } TEST(DeviceCreation, givenSelectedTbxCsrInDebugVarsWhenDeviceIsCreatedThenIsSimulationReturnsTrue) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.SetCommandStreamReceiver.set(CommandStreamReceiverType::CSR_TBX); VariableBackup backup(&ultHwConfig); ultHwConfig.useHwCsr = true; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); EXPECT_TRUE(device->isSimulation()); } TEST(DeviceCreation, givenSelectedTbxWithAubCsrInDebugVarsWhenDeviceIsCreatedThenIsSimulationReturnsTrue) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.SetCommandStreamReceiver.set(CommandStreamReceiverType::CSR_TBX_WITH_AUB); VariableBackup backup(&ultHwConfig); ultHwConfig.useHwCsr = true; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); EXPECT_TRUE(device->isSimulation()); } TEST(DeviceCreation, givenHwWithAubCsrInDebugVarsWhenDeviceIsCreatedThenIsSimulationReturnsFalse) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.SetCommandStreamReceiver.set(CommandStreamReceiverType::CSR_HW_WITH_AUB); auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); EXPECT_FALSE(device->isSimulation()); } TEST(DeviceCreation, givenDefaultHwCsrInDebugVarsWhenDeviceIsCreatedThenIsSimulationReturnsFalse) { auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); EXPECT_FALSE(device->isSimulation()); } TEST(DeviceCreation, givenDeviceWhenItIsCreatedThenOsContextIsRegistredInMemoryManager) { auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.blitterOperationsSupported = true; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); auto memoryManager = device->getMemoryManager(); auto numEnginesForDevice = HwHelper::get(hwInfo.platform.eRenderCoreFamily).getGpgpuEngineInstances(hwInfo).size(); if (device->getNumGenericSubDevices() > 1) { numEnginesForDevice *= device->getNumGenericSubDevices(); numEnginesForDevice += device->allEngines.size(); if (device->getSubDevice(0)->getNumSubDevices() > 0) { numEnginesForDevice += device->getNumSubDevices(); } } else if (device->getNumSubDevices() > 0) { numEnginesForDevice += device->getNumSubDevices(); } EXPECT_EQ(numEnginesForDevice, memoryManager->getRegisteredEnginesCount()); } TEST(DeviceCreation, givenMultiRootDeviceWhenTheyAreCreatedThenEachOsContextHasUniqueId) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); const size_t numDevices = 2; executionEnvironment->prepareRootDeviceEnvironments(numDevices); for (auto i = 0u; i < numDevices; i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(defaultHwInfo.get()); executionEnvironment->rootDeviceEnvironments[i]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = true; } auto device1 = std::unique_ptr(Device::create(executionEnvironment, 0u)); auto device2 = std::unique_ptr(Device::create(executionEnvironment, 1u)); MockDevice *devices[] = {device1.get(), device2.get()}; auto ®isteredEngines = executionEnvironment->memoryManager->getRegisteredEngines(); auto &hwInfo = device1->getHardwareInfo(); const auto &numGpgpuEngines = static_cast(HwHelper::get(hwInfo.platform.eRenderCoreFamily).getGpgpuEngineInstances(hwInfo).size()); size_t numExpectedGenericEnginesPerDevice = numGpgpuEngines; size_t numExpectedEngineInstancedEnginesPerDevice = 0; if (device1->getNumSubDevices() > 0) { numExpectedEngineInstancedEnginesPerDevice = device1->getNumSubDevices(); } auto expectedTotalRegisteredEngines = (numExpectedGenericEnginesPerDevice + numExpectedEngineInstancedEnginesPerDevice) * numDevices; EXPECT_EQ(expectedTotalRegisteredEngines, registeredEngines.size()); uint32_t contextId = 0; for (uint32_t i = 0; i < numDevices; i++) { auto device = devices[i]; for (uint32_t j = 0; j < numExpectedEngineInstancedEnginesPerDevice; j++) { auto subDevice = device->getSubDevice(j); auto &engine = subDevice->getEngine(0); EXPECT_EQ(contextId, engine.osContext->getContextId()); EXPECT_EQ(1u, engine.osContext->getDeviceBitfield().to_ulong()); EXPECT_EQ(registeredEngines[contextId].commandStreamReceiver, engine.commandStreamReceiver); contextId++; } for (uint32_t j = 0; j < numExpectedGenericEnginesPerDevice; j++) { auto &engine = device->getEngine(j); EXPECT_EQ(contextId, engine.osContext->getContextId()); EXPECT_EQ(1u, engine.osContext->getDeviceBitfield().to_ulong()); EXPECT_EQ(registeredEngines[contextId].commandStreamReceiver, engine.commandStreamReceiver); contextId++; } } EXPECT_EQ(expectedTotalRegisteredEngines, executionEnvironment->memoryManager->getRegisteredEnginesCount()); } TEST(DeviceCreation, givenMultiRootDeviceWhenTheyAreCreatedThenEachDeviceHasSeperateDeviceIndex) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); const size_t numDevices = 2; executionEnvironment->prepareRootDeviceEnvironments(numDevices); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(defaultHwInfo.get()); } auto device = std::unique_ptr(Device::create(executionEnvironment, 0u)); auto device2 = std::unique_ptr(Device::create(executionEnvironment, 1u)); EXPECT_EQ(0u, device->getRootDeviceIndex()); EXPECT_EQ(1u, device2->getRootDeviceIndex()); } TEST(DeviceCreation, givenMultiRootDeviceWhenTheyAreCreatedThenEachDeviceHasSeperateCommandStreamReceiver) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); const size_t numDevices = 2; executionEnvironment->prepareRootDeviceEnvironments(numDevices); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(defaultHwInfo.get()); executionEnvironment->rootDeviceEnvironments[i]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = true; } auto hwInfo = *executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo(); const auto &numGpgpuEngines = HwHelper::get(hwInfo.platform.eRenderCoreFamily).getGpgpuEngineInstances(hwInfo).size(); auto device1 = std::unique_ptr(Device::create(executionEnvironment, 0u)); auto device2 = std::unique_ptr(Device::create(executionEnvironment, 1u)); EXPECT_EQ(numGpgpuEngines, device1->commandStreamReceivers.size()); EXPECT_EQ(numGpgpuEngines, device2->commandStreamReceivers.size()); for (uint32_t i = 0; i < static_cast(numGpgpuEngines); i++) { EXPECT_NE(device2->allEngines[i].commandStreamReceiver, device1->allEngines[i].commandStreamReceiver); } } HWTEST_F(DeviceTest, givenDeviceWhenAskingForDefaultEngineThenReturnValidValue) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(1u); auto &hwHelper = HwHelperHw::get(); hwHelper.adjustDefaultEngineType(executionEnvironment->rootDeviceEnvironments[0]->getMutableHardwareInfo()); auto device = std::unique_ptr(Device::create(executionEnvironment, 0)); auto osContext = device->getDefaultEngine().osContext; EXPECT_EQ(device->getHardwareInfo().capabilityTable.defaultEngineType, osContext->getEngineType()); EXPECT_FALSE(osContext->isLowPriority()); } HWTEST_F(DeviceTest, givenDebugFlagWhenCreatingRootDeviceWithSubDevicesThenWorkPartitionAllocationIsCreatedForRootDevice) { DebugManagerStateRestore restore{}; DebugManager.flags.EnableImplicitScaling.set(1); { UltDeviceFactory deviceFactory{1, 2}; EXPECT_NE(nullptr, deviceFactory.rootDevices[0]->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocation()); EXPECT_EQ(nullptr, deviceFactory.subDevices[0]->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocation()); EXPECT_EQ(nullptr, deviceFactory.subDevices[1]->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocation()); EXPECT_TRUE(deviceFactory.rootDevices[0]->getDefaultEngine().commandStreamReceiver->isStaticWorkPartitioningEnabled()); EXPECT_FALSE(deviceFactory.subDevices[0]->getDefaultEngine().commandStreamReceiver->isStaticWorkPartitioningEnabled()); EXPECT_FALSE(deviceFactory.subDevices[1]->getDefaultEngine().commandStreamReceiver->isStaticWorkPartitioningEnabled()); } { DebugManager.flags.EnableStaticPartitioning.set(0); UltDeviceFactory deviceFactory{1, 2}; EXPECT_EQ(nullptr, deviceFactory.rootDevices[0]->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocation()); EXPECT_EQ(nullptr, deviceFactory.subDevices[0]->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocation()); EXPECT_EQ(nullptr, deviceFactory.subDevices[1]->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocation()); EXPECT_FALSE(deviceFactory.rootDevices[0]->getDefaultEngine().commandStreamReceiver->isStaticWorkPartitioningEnabled()); EXPECT_FALSE(deviceFactory.subDevices[0]->getDefaultEngine().commandStreamReceiver->isStaticWorkPartitioningEnabled()); EXPECT_FALSE(deviceFactory.subDevices[1]->getDefaultEngine().commandStreamReceiver->isStaticWorkPartitioningEnabled()); } { DebugManager.flags.EnableStaticPartitioning.set(1); UltDeviceFactory deviceFactory{1, 2}; EXPECT_NE(nullptr, deviceFactory.rootDevices[0]->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocation()); EXPECT_EQ(nullptr, deviceFactory.subDevices[0]->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocation()); EXPECT_EQ(nullptr, deviceFactory.subDevices[1]->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocation()); EXPECT_TRUE(deviceFactory.rootDevices[0]->getDefaultEngine().commandStreamReceiver->isStaticWorkPartitioningEnabled()); EXPECT_FALSE(deviceFactory.subDevices[0]->getDefaultEngine().commandStreamReceiver->isStaticWorkPartitioningEnabled()); EXPECT_FALSE(deviceFactory.subDevices[1]->getDefaultEngine().commandStreamReceiver->isStaticWorkPartitioningEnabled()); } } HWTEST_F(DeviceTest, givenDebugFlagWhenCreatingRootDeviceWithoutSubDevicesThenWorkPartitionAllocationIsNotCreated) { DebugManagerStateRestore restore{}; DebugManager.flags.EnableImplicitScaling.set(1); { UltDeviceFactory deviceFactory{1, 1}; EXPECT_EQ(nullptr, deviceFactory.rootDevices[0]->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocation()); } { DebugManager.flags.EnableStaticPartitioning.set(0); UltDeviceFactory deviceFactory{1, 1}; EXPECT_EQ(nullptr, deviceFactory.rootDevices[0]->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocation()); } { DebugManager.flags.EnableStaticPartitioning.set(1); UltDeviceFactory deviceFactory{1, 1}; EXPECT_EQ(nullptr, deviceFactory.rootDevices[0]->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocation()); } } TEST(DeviceCreation, givenFtrSimulationModeFlagTrueWhenNoOtherSimulationFlagsArePresentThenIsSimulationReturnsTrue) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.featureTable.flags.ftrSimulationMode = true; bool simulationFromDeviceId = hwInfo.capabilityTable.isSimulation(hwInfo.platform.usDeviceID); EXPECT_FALSE(simulationFromDeviceId); auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); EXPECT_TRUE(device->isSimulation()); } TEST(DeviceCreation, givenDeviceWhenCheckingGpgpuEnginesCountThenNumberGreaterThanZeroIsReturned) { auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto &hwHelper = HwHelper::get(renderCoreFamily); EXPECT_GT(hwHelper.getGpgpuEngineInstances(device->getHardwareInfo()).size(), 0u); } TEST(DeviceCreation, givenDeviceWhenCheckingParentDeviceThenCorrectValueIsReturned) { UltDeviceFactory deviceFactory{2, 2}; EXPECT_EQ(deviceFactory.rootDevices[0], deviceFactory.rootDevices[0]->getRootDevice()); EXPECT_EQ(deviceFactory.rootDevices[0], deviceFactory.subDevices[0]->getRootDevice()); EXPECT_EQ(deviceFactory.rootDevices[0], deviceFactory.subDevices[1]->getRootDevice()); EXPECT_EQ(deviceFactory.rootDevices[1], deviceFactory.rootDevices[1]->getRootDevice()); EXPECT_EQ(deviceFactory.rootDevices[1], deviceFactory.subDevices[2]->getRootDevice()); EXPECT_EQ(deviceFactory.rootDevices[1], deviceFactory.subDevices[3]->getRootDevice()); } TEST(DeviceCreation, givenRootDeviceWithSubDevicesWhenCheckingEngineGroupsThenItHasOneNonEmptyGroup) { UltDeviceFactory deviceFactory{1, 2}; EXPECT_EQ(1u, deviceFactory.rootDevices[0]->getRegularEngineGroups().size()); } TEST(DeviceCreation, whenCheckingEngineGroupsThenGroupsAreUnique) { VariableBackup backupHwInfo(defaultHwInfo.get()); defaultHwInfo->gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 4; for (auto ftrGpGpuMidThreadLevelPreempt : ::testing::Bool()) { defaultHwInfo->featureTable.flags.ftrGpGpuMidThreadLevelPreempt = ftrGpGpuMidThreadLevelPreempt; for (auto blitterOperationsSupported : ::testing::Bool()) { defaultHwInfo->capabilityTable.blitterOperationsSupported = blitterOperationsSupported; for (auto ftrRcsNode : ::testing::Bool()) { defaultHwInfo->featureTable.flags.ftrRcsNode = ftrRcsNode; for (auto ftrCCSNode : ::testing::Bool()) { defaultHwInfo->featureTable.flags.ftrCCSNode = ftrCCSNode; UltDeviceFactory deviceFactory{1, 0}; std::set uniqueEngineGroupTypes; for (auto &engineGroup : deviceFactory.rootDevices[0]->getRegularEngineGroups()) { uniqueEngineGroupTypes.insert(engineGroup.engineGroupType); } EXPECT_EQ(uniqueEngineGroupTypes.size(), deviceFactory.rootDevices[0]->getRegularEngineGroups().size()); } } } } } using DeviceHwTest = ::testing::Test; HWTEST_F(DeviceHwTest, givenHwHelperInputWhenInitializingCsrThenCreatePageTableManagerIfNeeded) { HardwareInfo localHwInfo = *defaultHwInfo; localHwInfo.capabilityTable.ftrRenderCompressedBuffers = false; localHwInfo.capabilityTable.ftrRenderCompressedImages = false; MockExecutionEnvironment executionEnvironment; executionEnvironment.prepareRootDeviceEnvironments(3); executionEnvironment.incRefInternal(); for (auto i = 0u; i < executionEnvironment.rootDeviceEnvironments.size(); i++) { executionEnvironment.rootDeviceEnvironments[i]->setHwInfo(&localHwInfo); } executionEnvironment.initializeMemoryManager(); std::unique_ptr device; device.reset(MockDevice::createWithExecutionEnvironment(&localHwInfo, &executionEnvironment, 0)); auto &csr0 = device->getUltCommandStreamReceiver(); EXPECT_FALSE(csr0.createPageTableManagerCalled); auto hwInfo = executionEnvironment.rootDeviceEnvironments[1]->getMutableHardwareInfo(); hwInfo->capabilityTable.ftrRenderCompressedBuffers = true; hwInfo->capabilityTable.ftrRenderCompressedImages = false; device.reset(MockDevice::createWithExecutionEnvironment(&localHwInfo, &executionEnvironment, 1)); auto &csr1 = device->getUltCommandStreamReceiver(); EXPECT_EQ(csr1.needsPageTableManager(), csr1.createPageTableManagerCalled); hwInfo = executionEnvironment.rootDeviceEnvironments[2]->getMutableHardwareInfo(); hwInfo->capabilityTable.ftrRenderCompressedBuffers = false; hwInfo->capabilityTable.ftrRenderCompressedImages = true; device.reset(MockDevice::createWithExecutionEnvironment(&localHwInfo, &executionEnvironment, 2)); auto &csr2 = device->getUltCommandStreamReceiver(); EXPECT_EQ(csr2.needsPageTableManager(), csr2.createPageTableManagerCalled); } HWTEST_F(DeviceHwTest, givenDeviceCreationWhenCsrFailsToCreateGlobalSyncAllocationThenReturnNull) { class MockUltCsrThatFailsToCreateGlobalFenceAllocation : public UltCommandStreamReceiver { public: MockUltCsrThatFailsToCreateGlobalFenceAllocation(ExecutionEnvironment &executionEnvironment, const DeviceBitfield deviceBitfield) : UltCommandStreamReceiver(executionEnvironment, 0, deviceBitfield) {} bool createGlobalFenceAllocation() override { return false; } }; class MockDeviceThatFailsToCreateGlobalFenceAllocation : public MockDevice { public: MockDeviceThatFailsToCreateGlobalFenceAllocation(ExecutionEnvironment *executionEnvironment, uint32_t deviceIndex) : MockDevice(executionEnvironment, deviceIndex) {} std::unique_ptr createCommandStreamReceiver() const override { return std::make_unique(*executionEnvironment, getDeviceBitfield()); } }; auto executionEnvironment = platform()->peekExecutionEnvironment(); auto mockDevice(MockDevice::create(executionEnvironment, 0)); EXPECT_EQ(nullptr, mockDevice); } HWTEST_F(DeviceHwTest, givenBothCcsAndRcsEnginesInDeviceWhenGettingEngineGroupsThenReturnInCorrectOrder) { struct MyHwHelper : HwHelperHw { EngineGroupType getEngineGroupType(aub_stream::EngineType engineType, EngineUsage engineUsage, const HardwareInfo &hwInfo) const override { if (engineType == aub_stream::ENGINE_RCS) { return EngineGroupType::RenderCompute; } if (EngineHelpers::isCcs(engineType)) { return EngineGroupType::Compute; } UNRECOVERABLE_IF(true); } }; RAIIHwHelperFactory overrideHwHelper{::defaultHwInfo->platform.eRenderCoreFamily}; MockOsContext rcsContext(0, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::EngineType::ENGINE_RCS, EngineUsage::Regular})); EngineControl rcsEngine{nullptr, &rcsContext}; MockOsContext ccsContext(1, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::EngineType::ENGINE_CCS, EngineUsage::Regular})); EngineControl ccsEngine{nullptr, &ccsContext}; MockDevice device{}; ASSERT_EQ(0u, device.getRegularEngineGroups().size()); device.addEngineToEngineGroup(ccsEngine); device.addEngineToEngineGroup(rcsEngine); auto &engineGroups = device.getRegularEngineGroups(); EXPECT_EQ(1u, engineGroups[0].engines.size()); EXPECT_EQ(EngineGroupType::Compute, engineGroups[0].engineGroupType); EXPECT_EQ(aub_stream::EngineType::ENGINE_CCS, engineGroups[0].engines[0].getEngineType()); EXPECT_EQ(1u, engineGroups[1].engines.size()); EXPECT_EQ(EngineGroupType::RenderCompute, engineGroups[1].engineGroupType); EXPECT_EQ(aub_stream::EngineType::ENGINE_RCS, engineGroups[1].engines[0].getEngineType()); device.getRegularEngineGroups().clear(); device.addEngineToEngineGroup(rcsEngine); device.addEngineToEngineGroup(ccsEngine); engineGroups = device.getRegularEngineGroups(); EXPECT_EQ(1u, engineGroups[0].engines.size()); EXPECT_EQ(EngineGroupType::RenderCompute, engineGroups[0].engineGroupType); EXPECT_EQ(aub_stream::EngineType::ENGINE_RCS, engineGroups[0].engines[0].getEngineType()); EXPECT_EQ(1u, engineGroups[1].engines.size()); EXPECT_EQ(EngineGroupType::Compute, engineGroups[1].engineGroupType); EXPECT_EQ(aub_stream::EngineType::ENGINE_CCS, engineGroups[1].engines[0].getEngineType()); } TEST(DeviceGetEngineTest, givenHwCsrModeWhenGetEngineThenDedicatedForInternalUsageEngineIsReturned) { auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto &internalEngine = device->getInternalEngine(); auto &defaultEngine = device->getDefaultEngine(); EXPECT_NE(defaultEngine.commandStreamReceiver, internalEngine.commandStreamReceiver); } TEST(DeviceGetEngineTest, whenCreateDeviceThenInternalEngineHasDefaultType) { auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto internalEngineType = device->getInternalEngine().osContext->getEngineType(); auto defaultEngineType = getChosenEngineType(device->getHardwareInfo()); EXPECT_EQ(defaultEngineType, internalEngineType); } TEST(DeviceGetEngineTest, givenCreatedDeviceWhenRetrievingDefaultEngineThenOsContextHasDefaultFieldSet) { auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto &defaultEngine = device->getDefaultEngine(); EXPECT_TRUE(defaultEngine.osContext->isDefaultContext()); } TEST(DeviceGetEngineTest, givenVariousIndicesWhenGettingEngineGroupIndexFromEngineGroupTypeThenReturnCorrectResults) { const auto nonEmptyEngineGroup = std::vector{EngineControl{nullptr, nullptr}}; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto &engineGroups = device->getRegularEngineGroups(); engineGroups.resize(3); engineGroups[0].engineGroupType = static_cast(4); engineGroups[1].engineGroupType = static_cast(3); engineGroups[2].engineGroupType = static_cast(2); EXPECT_EQ(0u, device->getEngineGroupIndexFromEngineGroupType(static_cast(4u))); EXPECT_EQ(1u, device->getEngineGroupIndexFromEngineGroupType(static_cast(3u))); EXPECT_EQ(2u, device->getEngineGroupIndexFromEngineGroupType(static_cast(2u))); EXPECT_ANY_THROW(device->getEngineGroupIndexFromEngineGroupType(static_cast(1u))); EXPECT_ANY_THROW(device->getEngineGroupIndexFromEngineGroupType(static_cast(0u))); } TEST(DeviceGetEngineTest, givenDeferredContextInitializationEnabledWhenCreatingEnginesThenInitializeOnlyOsContextsWhichRequireIt) { DebugManagerStateRestore restore{}; DebugManager.flags.DeferOsContextInitialization.set(1); auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); const auto defaultEngineType = getChosenEngineType(device->getHardwareInfo()); EXPECT_NE(0u, device->getAllEngines().size()); for (const EngineControl &engine : device->getAllEngines()) { OsContext *osContext = engine.osContext; const bool isDefaultEngine = defaultEngineType == osContext->getEngineType() && osContext->isRegular(); const bool shouldBeInitialized = osContext->isImmediateContextInitializationEnabled(isDefaultEngine); EXPECT_EQ(shouldBeInitialized, osContext->isInitialized()); } } TEST(DeviceGetEngineTest, givenDeferredContextInitializationDisabledWhenCreatingEnginesThenInitializeAllOsContexts) { DebugManagerStateRestore restore{}; DebugManager.flags.DeferOsContextInitialization.set(0); auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); EXPECT_NE(0u, device->getAllEngines().size()); for (const EngineControl &engine : device->getAllEngines()) { EXPECT_TRUE(engine.osContext->isInitialized()); } } TEST(DeviceGetEngineTest, givenNonHwCsrModeWhenGetEngineThenDefaultEngineIsReturned) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.SetCommandStreamReceiver.set(CommandStreamReceiverType::CSR_AUB); VariableBackup backup(&ultHwConfig); ultHwConfig.useHwCsr = true; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto &internalEngine = device->getInternalEngine(); auto &defaultEngine = device->getDefaultEngine(); EXPECT_EQ(defaultEngine.commandStreamReceiver, internalEngine.commandStreamReceiver); } using QueueFamiliesTests = ::testing::Test; HWTEST_F(QueueFamiliesTests, whenGettingQueueFamilyCapabilitiesAllThenReturnCorrectValue) { const cl_command_queue_capabilities_intel expectedProperties = CL_QUEUE_CAPABILITY_CREATE_SINGLE_QUEUE_EVENTS_INTEL | CL_QUEUE_CAPABILITY_CREATE_CROSS_QUEUE_EVENTS_INTEL | CL_QUEUE_CAPABILITY_SINGLE_QUEUE_EVENT_WAIT_LIST_INTEL | CL_QUEUE_CAPABILITY_CROSS_QUEUE_EVENT_WAIT_LIST_INTEL | CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL | CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_RECT_INTEL | CL_QUEUE_CAPABILITY_MAP_BUFFER_INTEL | CL_QUEUE_CAPABILITY_FILL_BUFFER_INTEL | CL_QUEUE_CAPABILITY_TRANSFER_IMAGE_INTEL | CL_QUEUE_CAPABILITY_MAP_IMAGE_INTEL | CL_QUEUE_CAPABILITY_FILL_IMAGE_INTEL | CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_IMAGE_INTEL | CL_QUEUE_CAPABILITY_TRANSFER_IMAGE_BUFFER_INTEL | CL_QUEUE_CAPABILITY_MARKER_INTEL | CL_QUEUE_CAPABILITY_BARRIER_INTEL | CL_QUEUE_CAPABILITY_KERNEL_INTEL; EXPECT_EQ(expectedProperties, MockClDevice::getQueueFamilyCapabilitiesAll()); } HWTEST_F(QueueFamiliesTests, givenComputeQueueWhenGettingQueueFamilyCapabilitiesThenReturnDefaultCapabilities) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); EXPECT_EQ(CL_QUEUE_DEFAULT_CAPABILITIES_INTEL, device->getQueueFamilyCapabilities(NEO::EngineGroupType::Compute)); EXPECT_EQ(CL_QUEUE_DEFAULT_CAPABILITIES_INTEL, device->getQueueFamilyCapabilities(NEO::EngineGroupType::RenderCompute)); } HWCMDTEST_F(IGFX_GEN8_CORE, QueueFamiliesTests, givenCopyQueueWhenGettingQueueFamilyCapabilitiesThenDoNotReturnUnsupportedOperations) { const cl_command_queue_capabilities_intel capabilitiesNotSupportedOnBlitter = CL_QUEUE_CAPABILITY_KERNEL_INTEL | CL_QUEUE_CAPABILITY_FILL_BUFFER_INTEL | CL_QUEUE_CAPABILITY_TRANSFER_IMAGE_INTEL | CL_QUEUE_CAPABILITY_FILL_IMAGE_INTEL | CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_IMAGE_INTEL | CL_QUEUE_CAPABILITY_TRANSFER_IMAGE_BUFFER_INTEL | CL_QUEUE_CAPABILITY_CREATE_CROSS_QUEUE_EVENTS_INTEL; const cl_command_queue_capabilities_intel expectedBlitterCapabilities = setBits(MockClDevice::getQueueFamilyCapabilitiesAll(), false, capabilitiesNotSupportedOnBlitter); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); EXPECT_EQ(expectedBlitterCapabilities, device->getQueueFamilyCapabilities(NEO::EngineGroupType::Copy)); } TEST(ClDeviceHelperTest, givenNonZeroNumberOfTilesWhenPrepareDeviceEnvironmentsCountCalledThenReturnCorrectValue) { DebugManagerStateRestore stateRestore; FeatureTable skuTable; WorkaroundTable waTable = {}; RuntimeCapabilityTable capTable = {}; GT_SYSTEM_INFO sysInfo = {}; sysInfo.MultiTileArchInfo.IsValid = true; sysInfo.MultiTileArchInfo.TileCount = 3; PLATFORM platform = {}; HardwareInfo hwInfo{&platform, &skuTable, &waTable, &sysInfo, capTable}; DebugManager.flags.CreateMultipleSubDevices.set(0); uint32_t devicesCount = HwHelper::getSubDevicesCount(&hwInfo); EXPECT_EQ(devicesCount, 3u); } TEST(ClDeviceHelperTest, givenZeroNumberOfTilesWhenPrepareDeviceEnvironmentsCountCalledThenReturnCorrectValue) { DebugManagerStateRestore stateRestore; FeatureTable skuTable; WorkaroundTable waTable = {}; RuntimeCapabilityTable capTable = {}; GT_SYSTEM_INFO sysInfo = {}; sysInfo.MultiTileArchInfo.IsValid = true; sysInfo.MultiTileArchInfo.TileCount = 0; PLATFORM platform = {}; HardwareInfo hwInfo{&platform, &skuTable, &waTable, &sysInfo, capTable}; DebugManager.flags.CreateMultipleSubDevices.set(0); uint32_t devicesCount = HwHelper::getSubDevicesCount(&hwInfo); EXPECT_EQ(devicesCount, 1u); } compute-runtime-22.14.22890/opencl/test/unit_test/device/device_timers_tests.cpp000066400000000000000000000135101422164147700276360ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_ostime.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "gtest/gtest.h" using namespace NEO; namespace ULT { TEST(MockOSTime, WhenSleepingThenDeviceAndHostTimerAreIncreased) { cl_ulong deviceTimestamp[2] = {0, 0}; cl_ulong hostTimestamp[2] = {0, 0}; auto mDev = MockDevice::createWithNewExecutionEnvironment(nullptr); mDev->setOSTime(new MockOSTime()); mDev->getDeviceAndHostTimer( &deviceTimestamp[0], &hostTimestamp[0]); std::this_thread::sleep_for(std::chrono::nanoseconds(1000)); mDev->getDeviceAndHostTimer( &deviceTimestamp[1], &hostTimestamp[1]); EXPECT_LT(deviceTimestamp[0], deviceTimestamp[1]); EXPECT_LT(hostTimestamp[0], hostTimestamp[1]); delete mDev; } TEST(MockOSTime, WhenGettingTimersThenDiffBetweenQueriesWithinAllowedError) { cl_ulong deviceTimestamp[2] = {0, 0}; cl_ulong hostTimestamp[2] = {0, 0}; cl_ulong hostOnlyTimestamp[2] = {0, 0}; cl_ulong hostDiff = 0; cl_ulong hostOnlyDiff = 0; cl_ulong observedDiff = 0; cl_ulong allowedDiff = 0; float allowedErr = 0.005f; auto mDev = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); mDev->getDeviceAndHostTimer( &deviceTimestamp[0], &hostTimestamp[0]); mDev->getHostTimer( &hostOnlyTimestamp[0]); mDev->getDeviceAndHostTimer( &deviceTimestamp[1], &hostTimestamp[1]); mDev->getHostTimer( &hostOnlyTimestamp[1]); hostDiff = hostTimestamp[1] - hostTimestamp[0]; hostOnlyDiff = hostOnlyTimestamp[1] - hostOnlyTimestamp[0]; EXPECT_LT(deviceTimestamp[0], deviceTimestamp[1]); EXPECT_LT(hostTimestamp[0], hostOnlyTimestamp[0]); EXPECT_LT(hostTimestamp[1], hostOnlyTimestamp[1]); if (hostOnlyDiff > hostDiff) { observedDiff = hostOnlyDiff - hostDiff; allowedDiff = (cl_ulong)(allowedErr * hostDiff); } else { observedDiff = hostDiff - hostOnlyDiff; allowedDiff = (cl_ulong)(allowedErr * hostOnlyDiff); } EXPECT_TRUE(observedDiff <= allowedDiff); } TEST(MockOSTime, WhenSleepingThenHostTimerIsIncreased) { cl_ulong hostTimestamp[2] = {0, 0}; auto mDev = MockDevice::createWithNewExecutionEnvironment(nullptr); mDev->setOSTime(new MockOSTime()); mDev->getHostTimer( &hostTimestamp[0]); std::this_thread::sleep_for(std::chrono::nanoseconds(1000)); mDev->getHostTimer( &hostTimestamp[1]); EXPECT_LT(hostTimestamp[0], hostTimestamp[1]); delete mDev; } TEST(MockOSTime, GivenNullWhenSettingOsTimeThenResolutionIsZero) { auto mDev = MockDevice::createWithNewExecutionEnvironment(nullptr); mDev->setOSTime(nullptr); double zeroRes; zeroRes = mDev->getPlatformHostTimerResolution(); EXPECT_EQ(zeroRes, 0.0); delete mDev; } TEST(MockOSTime, givenDeviceTimestampBaseNotEnabledWhenGetDeviceAndHostTimerThenCpuTimestampIsReturned) { auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); mockDevice->setOSTime(new MockOSTimeWithConstTimestamp()); uint64_t deviceTS = 0u, hostTS = 0u; mockDevice->getDeviceAndHostTimer(&deviceTS, &hostTS); EXPECT_EQ(deviceTS, MockDeviceTimeWithConstTimestamp::CPU_TIME_IN_NS); EXPECT_EQ(deviceTS, hostTS); } TEST(MockOSTime, givenDeviceTimestampBaseEnabledWhenGetDeviceAndHostTimerThenGpuTimestampIsReturned) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableDeviceBasedTimestamps.set(true); auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); mockDevice->setOSTime(new MockOSTimeWithConstTimestamp()); uint64_t deviceTS = 0u, hostTS = 0u; mockDevice->getDeviceAndHostTimer(&deviceTS, &hostTS); EXPECT_EQ(deviceTS, MockDeviceTimeWithConstTimestamp::GPU_TIMESTAMP); EXPECT_NE(deviceTS, hostTS); } class FailingMockOSTime : public OSTime { public: FailingMockOSTime() { this->deviceTime = std::make_unique(); } bool getCpuTime(uint64_t *timeStamp) override { return false; } double getHostTimerResolution() const override { return 0; } uint64_t getCpuRawTimestamp() override { return 0; } }; class FailingMockDeviceTime : public DeviceTime { public: bool getCpuGpuTime(TimeStampData *pGpuCpuTime, OSTime *osTime) override { return false; } double getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const override { return 1.0; } uint64_t getDynamicDeviceTimerClock(HardwareInfo const &hwInfo) const override { return static_cast(1000000000.0 / OSTime::getDeviceTimerResolution(hwInfo)); } }; class MockOSTimeWithFailingDeviceTime : public OSTime { public: MockOSTimeWithFailingDeviceTime() { this->deviceTime = std::make_unique(); } bool getCpuTime(uint64_t *timeStamp) override { return true; } double getHostTimerResolution() const override { return 0; } uint64_t getCpuRawTimestamp() override { return 0; } }; TEST(MockOSTime, givenFailingDeviceTimeWhenGetDeviceAndHostTimerThenFalseIsReturned) { auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); mockDevice->setOSTime(new MockOSTimeWithFailingDeviceTime()); uint64_t deviceTS = 0u, hostTS = 0u; bool retVal = mockDevice->getDeviceAndHostTimer(&deviceTS, &hostTS); EXPECT_FALSE(retVal); EXPECT_EQ(deviceTS, 0u); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/device/device_win_timers_tests.cpp000066400000000000000000000023561422164147700305210ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/root_device_environment.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/mocks/mock_ostime.h" #include "shared/test/common/mocks/mock_ostime_win.h" #include "shared/test/common/mocks/mock_wddm.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "gtest/gtest.h" using namespace NEO; namespace ULT { typedef ::testing::Test MockOSTimeWinTest; TEST_F(MockOSTimeWinTest, WhenCreatingTimerThenResolutionIsSetCorrectly) { MockExecutionEnvironment executionEnvironment; RootDeviceEnvironment rootDeviceEnvironment(executionEnvironment); auto wddmMock = new WddmMock(rootDeviceEnvironment); auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); wddmMock->init(); wddmMock->timestampFrequency = 1000; std::unique_ptr timeWin(new MockOSTimeWin(wddmMock)); double res = 0.0; res = timeWin->getDynamicDeviceTimerResolution(device->getHardwareInfo()); EXPECT_EQ(res, 1e+06); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/device/get_device_info_size_tests.cpp000066400000000000000000000305061422164147700311630ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/get_info.h" #include "shared/source/helpers/string.h" #include "opencl/source/cl_device/cl_device_info_map.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/mocks/ult_cl_device_factory.h" #include "gtest/gtest.h" #include namespace NEO { extern const char *latestConformanceVersionPassed; } // namespace NEO using namespace NEO; struct GetDeviceInfoSize : public ::testing::TestWithParam> { void SetUp() override { param = GetParam(); } std::pair param; }; TEST_P(GetDeviceInfoSize, GivenParamWhenGettingDeviceInfoThenSizeIsValid) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); size_t sizeReturned = GetInfo::invalidSourceSize; auto retVal = device->getDeviceInfo( param.first, 0, nullptr, &sizeReturned); if (CL_SUCCESS != retVal) { ASSERT_EQ(CL_SUCCESS, retVal) << " param = " << param.first; } ASSERT_NE(GetInfo::invalidSourceSize, sizeReturned); EXPECT_EQ(param.second, sizeReturned); } std::pair deviceInfoParams2[] = { {CL_DEVICE_ADDRESS_BITS, sizeof(cl_uint)}, {CL_DEVICE_AVAILABLE, sizeof(cl_bool)}, // {CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION, sizeof(cl_name_version[])}, {CL_DEVICE_COMPILER_AVAILABLE, sizeof(cl_bool)}, {CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(cl_device_fp_config)}, {CL_DEVICE_ENDIAN_LITTLE, sizeof(cl_bool)}, {CL_DEVICE_ERROR_CORRECTION_SUPPORT, sizeof(cl_bool)}, {CL_DEVICE_EXECUTION_CAPABILITIES, sizeof(cl_device_exec_capabilities)}, // {CL_DEVICE_EXTENSIONS, sizeof(char[])}, // {CL_DEVICE_EXTENSIONS_WITH_VERSION, sizeof(cl_name_version[])}, {CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, sizeof(cl_ulong)}, {CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, sizeof(cl_device_mem_cache_type)}, {CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, sizeof(cl_uint)}, {CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(cl_ulong)}, {CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE, sizeof(size_t)}, {CL_DEVICE_ILS_WITH_VERSION, sizeof(cl_name_version[1])}, {CL_DEVICE_IL_VERSION, sizeof(char[12])}, {CL_DEVICE_IMAGE_SUPPORT, sizeof(cl_bool)}, {CL_DEVICE_LATEST_CONFORMANCE_VERSION_PASSED, strlen(latestConformanceVersionPassed) + 1}, {CL_DEVICE_LINKER_AVAILABLE, sizeof(cl_bool)}, {CL_DEVICE_LOCAL_MEM_SIZE, sizeof(cl_ulong)}, {CL_DEVICE_LOCAL_MEM_TYPE, sizeof(cl_device_local_mem_type)}, {CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(cl_uint)}, {CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint)}, {CL_DEVICE_MAX_CONSTANT_ARGS, sizeof(cl_uint)}, {CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(cl_ulong)}, {CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE, sizeof(size_t)}, {CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong)}, {CL_DEVICE_MAX_NUM_SUB_GROUPS, sizeof(cl_uint)}, {CL_DEVICE_MAX_ON_DEVICE_EVENTS, sizeof(cl_uint)}, {CL_DEVICE_MAX_ON_DEVICE_QUEUES, sizeof(cl_uint)}, {CL_DEVICE_MAX_PARAMETER_SIZE, sizeof(size_t)}, {CL_DEVICE_MAX_PIPE_ARGS, sizeof(cl_uint)}, {CL_DEVICE_MAX_SAMPLERS, sizeof(cl_uint)}, {CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t)}, {CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint)}, {CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t[3])}, {CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(cl_uint)}, // {CL_DEVICE_NAME, sizeof(char[])}, {CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, sizeof(cl_uint)}, {CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, sizeof(cl_uint)}, {CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, sizeof(cl_uint)}, {CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, sizeof(cl_uint)}, {CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, sizeof(cl_uint)}, {CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, sizeof(cl_uint)}, {CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, sizeof(cl_uint)}, {CL_DEVICE_NUMERIC_VERSION, sizeof(cl_version)}, // {CL_DEVICE_OPENCL_C_ALL_VERSIONS, sizeof(cl_name_version[])}, // {CL_DEVICE_OPENCL_C_FEATURES, sizeof(cl_name_version[])}, // {CL_DEVICE_OPENCL_C_VERSION, sizeof(char[])}, {CL_DEVICE_PARENT_DEVICE, sizeof(cl_device_id)}, {CL_DEVICE_PARTITION_AFFINITY_DOMAIN, sizeof(cl_device_affinity_domain)}, {CL_DEVICE_PARTITION_MAX_SUB_DEVICES, sizeof(cl_uint)}, {CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS, sizeof(cl_uint)}, {CL_DEVICE_PIPE_MAX_PACKET_SIZE, sizeof(cl_uint)}, {CL_DEVICE_PLATFORM, sizeof(cl_platform_id)}, {CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT, sizeof(cl_uint)}, {CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, sizeof(cl_bool)}, {CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT, sizeof(cl_uint)}, {CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT, sizeof(cl_uint)}, {CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, sizeof(cl_uint)}, {CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, sizeof(cl_uint)}, {CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint)}, {CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, sizeof(cl_uint)}, {CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, sizeof(cl_uint)}, {CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, sizeof(cl_uint)}, {CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, sizeof(cl_uint)}, {CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(size_t)}, {CL_DEVICE_PRINTF_BUFFER_SIZE, sizeof(size_t)}, // {CL_DEVICE_PROFILE, sizeof(char[])}, {CL_DEVICE_PROFILING_TIMER_RESOLUTION, sizeof(size_t)}, {CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, sizeof(cl_uint)}, {CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE, sizeof(cl_uint)}, {CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES, sizeof(cl_command_queue_properties)}, {CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, sizeof(cl_command_queue_properties)}, {CL_DEVICE_REFERENCE_COUNT, sizeof(cl_uint)}, {CL_DEVICE_SINGLE_FP_CONFIG, sizeof(cl_device_fp_config)}, // {CL_DEVICE_SPIR_VERSIONS, sizeof(char[])}, {CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS, sizeof(cl_bool)}, {CL_DEVICE_SVM_CAPABILITIES, sizeof(cl_device_svm_capabilities)}, // {CL_DEVICE_TERMINATE_CAPABILITY_KHR, sizeof(cl_device_terminate_capability_khr)}, {CL_DEVICE_TYPE, sizeof(cl_device_type)}, // {CL_DEVICE_VENDOR, sizeof(char[])}, {CL_DEVICE_VENDOR_ID, sizeof(cl_uint)}, // {CL_DEVICE_VERSION, sizeof(char[])}, // {CL_DRIVER_VERSION, sizeof(char[])}, }; INSTANTIATE_TEST_CASE_P( Device_, GetDeviceInfoSize, testing::ValuesIn(deviceInfoParams2)); struct GetDeviceInfoForImage : public GetDeviceInfoSize {}; TEST_P(GetDeviceInfoForImage, GivenParamWhenGettingDeviceInfoThenSizeIsValid) { auto device = std::make_unique(*MockDevice::createWithNewExecutionEnvironment(nullptr), platform()); if (!device->getSharedDeviceInfo().imageSupport) { GTEST_SKIP(); } size_t sizeReturned = 0; auto retVal = device->getDeviceInfo( param.first, 0, nullptr, &sizeReturned); if (CL_SUCCESS != retVal) { ASSERT_EQ(CL_SUCCESS, retVal) << " param = " << param.first; } ASSERT_NE(0u, sizeReturned); EXPECT_EQ(param.second, sizeReturned); } TEST_P(GetDeviceInfoForImage, whenImageAreNotSupportedThenClSuccessAndSizeofCluintIsReturned) { auto device = std::make_unique(*MockDevice::createWithNewExecutionEnvironment(nullptr), platform()); if (device->getSharedDeviceInfo().imageSupport) { GTEST_SKIP(); } size_t sizeReturned = 0; auto retVal = device->getDeviceInfo( param.first, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(param.second, sizeReturned); } TEST_P(GetDeviceInfoForImage, givenInfoImageParamsWhenCallGetDeviceInfoForImageThenSizeIsValidAndTrueReturned) { auto device = std::make_unique(*MockDevice::createWithNewExecutionEnvironment(nullptr), platform()); size_t srcSize = 0; size_t retSize = 0; const void *src = nullptr; auto retVal = device->getDeviceInfoForImage( param.first, src, srcSize, retSize); EXPECT_TRUE(retVal); ASSERT_NE(0u, srcSize); EXPECT_EQ(param.second, srcSize); EXPECT_EQ(param.second, retSize); } TEST(GetDeviceInfoForImage, givenNotImageParamWhenCallGetDeviceInfoForImageThenSizeIsNotValidAndFalseReturned) { auto device = std::make_unique(*MockDevice::createWithNewExecutionEnvironment(nullptr), platform()); size_t srcSize = 0; size_t retSize = 0; const void *src = nullptr; cl_device_info notImageParam = CL_DEVICE_ADDRESS_BITS; size_t paramSize = sizeof(cl_uint); auto retVal = device->getDeviceInfoForImage( notImageParam, src, srcSize, retSize); EXPECT_FALSE(retVal); EXPECT_EQ(0u, srcSize); EXPECT_NE(paramSize, srcSize); EXPECT_NE(paramSize, retSize); } std::pair deviceInfoImageParams[] = { {CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t)}, {CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t)}, {CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(size_t)}, {CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(size_t)}, {CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(size_t)}, {CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT, sizeof(cl_uint)}, {CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, sizeof(size_t)}, {CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, sizeof(size_t)}, {CL_DEVICE_IMAGE_PITCH_ALIGNMENT, sizeof(cl_uint)}, {CL_DEVICE_MAX_READ_IMAGE_ARGS, sizeof(cl_uint)}, {CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS, sizeof(cl_uint)}, {CL_DEVICE_MAX_WRITE_IMAGE_ARGS, sizeof(cl_uint)}, }; INSTANTIATE_TEST_CASE_P( Device_, GetDeviceInfoForImage, testing::ValuesIn(deviceInfoImageParams)); TEST(DeviceInfoTests, givenDefaultDeviceWhenQueriedForDeviceVersionThenProperSizeIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); size_t sizeReturned = 0; auto retVal = device->getDeviceInfo( CL_DEVICE_VERSION, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(16u, sizeReturned); std::unique_ptr deviceVersion(new char[sizeReturned]); retVal = device->getDeviceInfo( CL_DEVICE_VERSION, sizeReturned, deviceVersion.get(), nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(DeviceInfoTests, givenDefaultDeviceWhenQueriedForBuiltInKernelsWithVersionThenProperSizeIsReturned) { UltClDeviceFactory deviceFactory{1, 0}; auto pClDevice = deviceFactory.rootDevices[0]; size_t sizeReturned = 0; auto retVal = pClDevice->getDeviceInfo( CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pClDevice->getDeviceInfo().builtInKernelsWithVersion.size() * sizeof(cl_name_version), sizeReturned); } TEST(DeviceInfoTests, givenDefaultDeviceWhenQueriedForOpenclCAllVersionsThenProperSizeIsReturned) { UltClDeviceFactory deviceFactory{1, 0}; auto pClDevice = deviceFactory.rootDevices[0]; size_t sizeReturned = 0; auto retVal = pClDevice->getDeviceInfo( CL_DEVICE_OPENCL_C_ALL_VERSIONS, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pClDevice->getDeviceInfo().openclCAllVersions.size() * sizeof(cl_name_version), sizeReturned); } TEST(DeviceInfoTests, givenDefaultDeviceWhenQueriedForOpenclCFeaturesThenProperSizeIsReturned) { UltClDeviceFactory deviceFactory{1, 0}; auto pClDevice = deviceFactory.rootDevices[0]; size_t sizeReturned = 0; auto retVal = pClDevice->getDeviceInfo( CL_DEVICE_OPENCL_C_FEATURES, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pClDevice->getDeviceInfo().openclCFeatures.size() * sizeof(cl_name_version), sizeReturned); } TEST(DeviceInfoTests, givenDefaultDeviceWhenQueriedForExtensionsWithVersionThenProperSizeIsReturned) { UltClDeviceFactory deviceFactory{1, 0}; auto pClDevice = deviceFactory.rootDevices[0]; size_t sizeReturned = 0; auto retVal = pClDevice->getDeviceInfo( CL_DEVICE_EXTENSIONS_WITH_VERSION, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pClDevice->getDeviceInfo().extensionsWithVersion.size() * sizeof(cl_name_version), sizeReturned); } compute-runtime-22.14.22890/opencl/test/unit_test/device/get_device_info_tests.cpp000066400000000000000000001274351422164147700301410ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/get_info.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_driver_info.h" #include "shared/test/common/mocks/mock_os_context.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/cl_device/cl_device_info_map.h" #include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/device_info_fixture.h" #include "opencl/test/unit_test/helpers/raii_hw_helper.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/ult_cl_device_factory.h" #include "gtest/gtest.h" #include using namespace NEO; TEST(GetDeviceInfo, GivenInvalidParamsWhenGettingDeviceInfoThenInvalidValueErrorIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto retVal = device->getDeviceInfo( 0, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST(GetDeviceInfo, GivenInvalidParametersWhenGettingDeviceInfoThenValueSizeRetIsNotUpdated) { size_t valueSizeRet = 0x1234; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto retVal = device->getDeviceInfo( 0, 0, nullptr, &valueSizeRet); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(0x1234u, valueSizeRet); } HWCMDTEST_F(IGFX_GEN8_CORE, GetDeviceInfoMemCapabilitiesTest, GivenValidParametersWhenGetDeviceInfoIsCalledForBdwAndLaterThenClSuccessIsReturned) { std::vector params = { {CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL, (CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL)}, {CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL, (CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL)}, {CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL, (CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL)}, {CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL, 0}, {CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL, 0}}; check(params); } TEST(GetDeviceInfo, GivenPlanarYuvExtensionDisabledAndSupportImageEnabledWhenGettingPlanarYuvMaxWidthHeightThenInvalidValueErrorIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); device->sharedDeviceInfo.imageSupport = true; device->deviceInfo.nv12Extension = false; uint32_t value; auto retVal = device->getDeviceInfo( CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL, 4, &value, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = device->getDeviceInfo( CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL, 4, &value, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST(GetDeviceInfo, GivenPlanarYuvExtensionEnabledAndSupportImageEnabledWhenGettingPlanarYuvMaxWidthHeightThenCorrectValuesAreReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); device->sharedDeviceInfo.imageSupport = true; device->deviceInfo.nv12Extension = true; size_t value = 0; auto retVal = device->getDeviceInfo( CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL, sizeof(size_t), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(16384u, value); } TEST(GetDeviceInfo, GivenPlanarYuvExtensionDisabledAndSupportImageDisabledWhenGettingPlanarYuvMaxWidthHeightThenInvalidValueErrorIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); device->sharedDeviceInfo.imageSupport = false; device->deviceInfo.nv12Extension = false; uint32_t value; auto retVal = device->getDeviceInfo( CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL, 4, &value, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = device->getDeviceInfo( CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL, 4, &value, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST(GetDeviceInfo, GivenPlanarYuvExtensionEnabledAndSupportImageDisabledWhenGettingPlanarYuvMaxWidthHeightThenZeroIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); device->sharedDeviceInfo.imageSupport = false; device->deviceInfo.nv12Extension = true; size_t value = 0; auto retVal = device->getDeviceInfo( CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL, sizeof(size_t), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, value); retVal = device->getDeviceInfo( CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL, sizeof(size_t), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, value); } TEST(GetDeviceInfo, GivenImageSupportDisabledWhenGettingImage2dMaxWidthHeightThenZeroIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); device->sharedDeviceInfo.imageSupport = false; size_t value = 0; auto retVal = device->getDeviceInfo( CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, value); retVal = device->getDeviceInfo( CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, value); } TEST(GetDeviceInfo, GivenImageSupportEnabledWhenGettingImage2dMaxWidthHeightThenCorrectValuesAreReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); device->sharedDeviceInfo.imageSupport = true; size_t value = 0; auto retVal = device->getDeviceInfo( CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = device->getDeviceInfo( CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(GetDeviceInfo, GivenImageSupportDisabledWhenGettingImage3dMaxWidthHeightDepthThenZeroIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); size_t value = 0; device->sharedDeviceInfo.imageSupport = false; auto retVal = device->getDeviceInfo( CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(size_t), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, value); retVal = device->getDeviceInfo( CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(size_t), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, value); retVal = device->getDeviceInfo( CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(size_t), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, value); } TEST(GetDeviceInfo, GivenImageSupportEnabledWhenGettingImage3dMaxWidthHeightDepthThenCorrectValuesAreReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); device->sharedDeviceInfo.imageSupport = true; size_t value = 0; auto retVal = device->getDeviceInfo( CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(size_t), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = device->getDeviceInfo( CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(size_t), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = device->getDeviceInfo( CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(size_t), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(GetDeviceInfo, GivenImageSupportDisabledWhenGettingImageMaxArgsThenZeroIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); device->sharedDeviceInfo.imageSupport = false; uint32_t value; auto retVal = device->getDeviceInfo( CL_DEVICE_MAX_READ_IMAGE_ARGS, sizeof(size_t), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, value); retVal = device->getDeviceInfo( CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS, sizeof(size_t), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, value); retVal = device->getDeviceInfo( CL_DEVICE_MAX_WRITE_IMAGE_ARGS, sizeof(size_t), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, value); } TEST(GetDeviceInfo, GivenImageSupportEnabledWhenGettingImageMaxArgsThenCorrectValuesAreReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); device->sharedDeviceInfo.imageSupport = true; size_t value = 0; auto retVal = device->getDeviceInfo( CL_DEVICE_MAX_READ_IMAGE_ARGS, sizeof(cl_uint), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = device->getDeviceInfo( CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS, sizeof(cl_uint), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = device->getDeviceInfo( CL_DEVICE_MAX_WRITE_IMAGE_ARGS, sizeof(cl_uint), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(GetDeviceInfo, GivenImageSupportDisabledWhenGettingImageBaseAddressAlignmentThenZeroIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); size_t value = 0; device->sharedDeviceInfo.imageSupport = false; auto retVal = device->getDeviceInfo( CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT, sizeof(cl_uint), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, value); } TEST(GetDeviceInfo, GivenImageSupportEnabledWhenGettingImageBaseAddressAlignmentThenCorrectValuesAreReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); device->sharedDeviceInfo.imageSupport = true; size_t value = 0; auto retVal = device->getDeviceInfo( CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT, sizeof(cl_uint), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(0u, value); } TEST(GetDeviceInfo, GivenImageSupportDisabledWhenGettingImageMaxArraySizeThenZeroIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); size_t value = 0; device->sharedDeviceInfo.imageSupport = false; auto retVal = device->getDeviceInfo( CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, sizeof(size_t), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, value); } TEST(GetDeviceInfo, GivenImageSupportEnabledWhenGettingImageMaxArraySizeThenCorrectValuesAreReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); device->sharedDeviceInfo.imageSupport = true; size_t value = 0; auto retVal = device->getDeviceInfo( CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, sizeof(size_t), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(GetDeviceInfo, GivenImageSupportDisabledWhenGettingImageMaxBufferSizeThenZeroIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); size_t value = 0; device->sharedDeviceInfo.imageSupport = false; auto retVal = device->getDeviceInfo( CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, sizeof(size_t), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, value); } TEST(GetDeviceInfo, GivenImageSupportEnabledWhenGettingImageMaxBufferSizeThenCorrectValuesAreReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); device->sharedDeviceInfo.imageSupport = true; size_t value = 0; auto retVal = device->getDeviceInfo( CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, sizeof(size_t), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(GetDeviceInfo, GivenImageSupportDisabledWhenGettingImagePitchAlignmentThenZeroIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); size_t value = 0; device->sharedDeviceInfo.imageSupport = false; auto retVal = device->getDeviceInfo( CL_DEVICE_IMAGE_PITCH_ALIGNMENT, sizeof(cl_uint), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, value); } TEST(GetDeviceInfo, GivenImageSupportEnabledWhenGettingImagePitchAlignmentThenCorrectValuesAreReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); device->sharedDeviceInfo.imageSupport = true; size_t value = 0; auto retVal = device->getDeviceInfo( CL_DEVICE_IMAGE_PITCH_ALIGNMENT, sizeof(cl_uint), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(0u, value); } TEST(GetDeviceInfo, GivenNumSimultaneousInteropsWhenGettingDeviceInfoThenCorrectValueIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); device->simultaneousInterops = {0}; cl_uint value = 0; size_t size = 0; auto retVal = device->getDeviceInfo(CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL, sizeof(cl_uint), &value, &size); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(0u, size); EXPECT_EQ(0u, value); device->simultaneousInterops = {1, 2, 3, 0}; retVal = device->getDeviceInfo(CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL, sizeof(cl_uint), &value, &size); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_uint), size); EXPECT_EQ(1u, value); } TEST(GetDeviceInfo, GivenSimultaneousInteropsWhenGettingDeviceInfoThenCorrectValueIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); device->simultaneousInterops = {0}; cl_uint value[4] = {}; size_t size = 0; auto retVal = device->getDeviceInfo(CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL, sizeof(cl_uint), &value, &size); EXPECT_EQ(CL_INVALID_VALUE, retVal); device->simultaneousInterops = {1, 2, 3, 0}; retVal = device->getDeviceInfo(CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL, sizeof(cl_uint) * 4u, &value, &size); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_uint) * 4u, size); EXPECT_TRUE(memcmp(value, &device->simultaneousInterops[0], 4u * sizeof(cl_uint)) == 0); } TEST(GetDeviceInfo, GivenMaxGlobalVariableSizeWhenGettingDeviceInfoThenCorrectValueIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); size_t value = 0; size_t size = 0; auto retVal = device->getDeviceInfo(CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE, sizeof(size_t), &value, &size); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(size_t), size); if (device->areOcl21FeaturesEnabled()) { EXPECT_EQ(value, 65536u); } else { EXPECT_EQ(value, 0u); } } TEST(GetDeviceInfo, GivenGlobalVariablePreferredTotalSizeWhenGettingDeviceInfoThenCorrectValueIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); size_t value = 0; size_t size = 0; auto retVal = device->getDeviceInfo(CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE, sizeof(size_t), &value, &size); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(size_t), size); if (device->areOcl21FeaturesEnabled()) { EXPECT_EQ(value, static_cast(device->getSharedDeviceInfo().maxMemAllocSize)); } else { EXPECT_EQ(value, 0u); } } TEST(GetDeviceInfo, GivenPreferredInteropsWhenGettingDeviceInfoThenCorrectValueIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); cl_bool value = 0; size_t size = 0; auto retVal = device->getDeviceInfo(CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, sizeof(cl_bool), &value, &size); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_bool), size); EXPECT_TRUE(value == 1u); } TEST(GetDeviceInfo, WhenQueryingIlsWithVersionThenProperValueIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); cl_name_version ilsWithVersion[1]; size_t paramRetSize; const auto retVal = device->getDeviceInfo(CL_DEVICE_ILS_WITH_VERSION, sizeof(ilsWithVersion), &ilsWithVersion, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_name_version), paramRetSize); EXPECT_EQ(CL_MAKE_VERSION(1u, 2u, 0u), ilsWithVersion->version); EXPECT_STREQ("SPIR-V", ilsWithVersion->name); } TEST(GetDeviceInfo, WhenQueryingAtomicMemoryCapabilitiesThenProperValueIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); cl_device_atomic_capabilities atomicMemoryCapabilities; size_t paramRetSize; const auto retVal = device->getDeviceInfo(CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES, sizeof(cl_device_atomic_capabilities), &atomicMemoryCapabilities, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_device_atomic_capabilities), paramRetSize); cl_device_atomic_capabilities expectedCapabilities = CL_DEVICE_ATOMIC_ORDER_RELAXED | CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP; if (device->areOcl21FeaturesSupported()) { expectedCapabilities |= CL_DEVICE_ATOMIC_ORDER_ACQ_REL | CL_DEVICE_ATOMIC_ORDER_SEQ_CST | CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES | CL_DEVICE_ATOMIC_SCOPE_DEVICE; } EXPECT_EQ(expectedCapabilities, atomicMemoryCapabilities); } TEST(GetDeviceInfo, WhenQueryingAtomicFenceCapabilitiesThenProperValueIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); cl_device_atomic_capabilities atomicFenceCapabilities; size_t paramRetSize; const auto retVal = device->getDeviceInfo(CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, sizeof(cl_device_atomic_capabilities), &atomicFenceCapabilities, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_device_atomic_capabilities), paramRetSize); cl_device_atomic_capabilities expectedCapabilities = CL_DEVICE_ATOMIC_ORDER_RELAXED | CL_DEVICE_ATOMIC_ORDER_ACQ_REL | CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP; if (device->areOcl21FeaturesSupported()) { expectedCapabilities |= CL_DEVICE_ATOMIC_ORDER_SEQ_CST | CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES | CL_DEVICE_ATOMIC_SCOPE_DEVICE | CL_DEVICE_ATOMIC_SCOPE_WORK_ITEM; } EXPECT_EQ(expectedCapabilities, atomicFenceCapabilities); } TEST(GetDeviceInfo, WhenQueryingDeviceEnqueueSupportThenProperValueIsReturned) { UltClDeviceFactory deviceFactory{1, 0}; cl_bool deviceEnqueueSupport; size_t paramRetSize; const auto retVal = deviceFactory.rootDevices[0]->getDeviceInfo(CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES, sizeof(cl_bool), &deviceEnqueueSupport, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_bool), paramRetSize); cl_bool expectedDeviceEnqueueSupport = CL_FALSE; EXPECT_EQ(expectedDeviceEnqueueSupport, deviceEnqueueSupport); } TEST(GetDeviceInfo, WhenQueryingDeviceEnqueueCapabilitiesThenFalseIsReturned) { UltClDeviceFactory deviceFactory{1, 0}; cl_device_device_enqueue_capabilities deviceEnqueueCapabilities; size_t paramRetSize; const auto retVal = deviceFactory.rootDevices[0]->getDeviceInfo(CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES, sizeof(cl_device_device_enqueue_capabilities), &deviceEnqueueCapabilities, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_device_device_enqueue_capabilities), paramRetSize); EXPECT_FALSE(deviceEnqueueCapabilities); } TEST(GetDeviceInfo, WhenQueryingPipesSupportThenProperValueIsReturned) { UltClDeviceFactory deviceFactory{1, 0}; cl_bool pipesSupport; size_t paramRetSize; const auto retVal = deviceFactory.rootDevices[0]->getDeviceInfo(CL_DEVICE_PIPE_SUPPORT, sizeof(cl_bool), &pipesSupport, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_bool), paramRetSize); cl_bool expectedPipesSupport = deviceFactory.rootDevices[0]->arePipesSupported() ? CL_TRUE : CL_FALSE; EXPECT_EQ(expectedPipesSupport, pipesSupport); } TEST(GetDeviceInfo, WhenQueryingNonUniformWorkGroupSupportThenProperValueIsReturned) { UltClDeviceFactory deviceFactory{1, 0}; cl_bool nonUniformGroupSupport; size_t paramRetSize; const auto retVal = deviceFactory.rootDevices[0]->getDeviceInfo(CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT, sizeof(cl_bool), &nonUniformGroupSupport, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_bool), paramRetSize); cl_bool expectedNonUniformGroupSupport = CL_TRUE; EXPECT_EQ(expectedNonUniformGroupSupport, nonUniformGroupSupport); } TEST(GetDeviceInfo, WhenQueryingWorkGroupCollectiveFunctionsSupportThenProperValueIsReturned) { UltClDeviceFactory deviceFactory{1, 0}; cl_bool workGroupCollectiveFunctionsSupport; size_t paramRetSize; const auto retVal = deviceFactory.rootDevices[0]->getDeviceInfo(CL_DEVICE_WORK_GROUP_COLLECTIVE_FUNCTIONS_SUPPORT, sizeof(cl_bool), &workGroupCollectiveFunctionsSupport, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_bool), paramRetSize); cl_bool expectedWorkGroupCollectiveFunctionsSupport = deviceFactory.rootDevices[0]->areOcl21FeaturesSupported() ? CL_TRUE : CL_FALSE; EXPECT_EQ(expectedWorkGroupCollectiveFunctionsSupport, workGroupCollectiveFunctionsSupport); } TEST(GetDeviceInfo, WhenQueryingGenericAddressSpaceSupportThenProperValueIsReturned) { UltClDeviceFactory deviceFactory{1, 0}; cl_bool genericAddressSpaceSupport; size_t paramRetSize; const auto retVal = deviceFactory.rootDevices[0]->getDeviceInfo(CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT, sizeof(cl_bool), &genericAddressSpaceSupport, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_bool), paramRetSize); cl_bool expectedGenericAddressSpaceSupport = deviceFactory.rootDevices[0]->areOcl21FeaturesSupported() ? CL_TRUE : CL_FALSE; EXPECT_EQ(expectedGenericAddressSpaceSupport, genericAddressSpaceSupport); } template class MockHwHelper : public HwHelperHw { public: const EngineInstancesContainer getGpgpuEngineInstances(const HardwareInfo &hwInfo) const override { EngineInstancesContainer result{}; for (int i = 0; i < ccsCount; i++) { result.push_back({aub_stream::ENGINE_CCS, EngineUsage::Regular}); } for (int i = 0; i < bcsCount; i++) { result.push_back({aub_stream::ENGINE_BCS, EngineUsage::Regular}); } return result; } EngineGroupType getEngineGroupType(aub_stream::EngineType engineType, EngineUsage engineUsage, const HardwareInfo &hwInfo) const override { switch (engineType) { case aub_stream::ENGINE_RCS: return EngineGroupType::RenderCompute; case aub_stream::ENGINE_CCS: case aub_stream::ENGINE_CCS1: case aub_stream::ENGINE_CCS2: case aub_stream::ENGINE_CCS3: return EngineGroupType::Compute; case aub_stream::ENGINE_BCS: return EngineGroupType::Copy; default: UNRECOVERABLE_IF(true); } } bool isSubDeviceEngineSupported(const HardwareInfo &hwInfo, const DeviceBitfield &deviceBitfield, aub_stream::EngineType engineType) const override { if ((deviceBitfield.to_ulong() == disableEngineSupportOnSubDevice) && (disabledSubDeviceEngineType == engineType)) { return false; } return true; } static auto overrideHwHelper() { return RAIIHwHelperFactory>{::defaultHwInfo->platform.eRenderCoreFamily}; } uint64_t disableEngineSupportOnSubDevice = -1; // disabled by default aub_stream::EngineType disabledSubDeviceEngineType = aub_stream::EngineType::ENGINE_BCS; }; using GetDeviceInfoQueueFamilyTest = ::testing::Test; HWTEST_F(GetDeviceInfoQueueFamilyTest, givenSingleDeviceWhenInitializingCapsThenReturnCorrectFamilies) { auto raiiHwHelper = MockHwHelper::overrideHwHelper(); VariableBackup backupHwInfo(defaultHwInfo.get()); defaultHwInfo->capabilityTable.blitterOperationsSupported = true; UltClDeviceFactory deviceFactory{1, 0}; ClDevice &clDevice = *deviceFactory.rootDevices[0]; size_t paramRetSize{}; cl_int retVal{}; cl_queue_family_properties_intel families[CommonConstants::engineGroupCount]; retVal = clDevice.getDeviceInfo(CL_DEVICE_QUEUE_FAMILY_PROPERTIES_INTEL, sizeof(families), families, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2u, paramRetSize / sizeof(cl_queue_family_properties_intel)); EXPECT_EQ(CL_QUEUE_DEFAULT_CAPABILITIES_INTEL, families[0].capabilities); EXPECT_EQ(3u, families[0].count); EXPECT_EQ(clDevice.getDeviceInfo().queueOnHostProperties, families[0].properties); EXPECT_EQ(clDevice.getQueueFamilyCapabilities(EngineGroupType::Copy), families[1].capabilities); EXPECT_EQ(1u, families[1].count); EXPECT_EQ(clDevice.getDeviceInfo().queueOnHostProperties, families[1].properties); } HWTEST_F(GetDeviceInfoQueueFamilyTest, givenSubDeviceWhenInitializingCapsThenReturnCorrectFamilies) { auto raiiHwHelper = MockHwHelper::overrideHwHelper(); VariableBackup backupHwInfo(defaultHwInfo.get()); defaultHwInfo->capabilityTable.blitterOperationsSupported = true; UltClDeviceFactory deviceFactory{1, 2}; ClDevice &clDevice = *deviceFactory.subDevices[1]; size_t paramRetSize{}; cl_int retVal{}; cl_queue_family_properties_intel families[CommonConstants::engineGroupCount]; retVal = clDevice.getDeviceInfo(CL_DEVICE_QUEUE_FAMILY_PROPERTIES_INTEL, sizeof(families), families, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2u, paramRetSize / sizeof(cl_queue_family_properties_intel)); EXPECT_EQ(CL_QUEUE_DEFAULT_CAPABILITIES_INTEL, families[0].capabilities); EXPECT_EQ(3u, families[0].count); EXPECT_EQ(clDevice.getDeviceInfo().queueOnHostProperties, families[0].properties); EXPECT_EQ(clDevice.getQueueFamilyCapabilities(EngineGroupType::Copy), families[1].capabilities); EXPECT_EQ(1u, families[1].count); EXPECT_EQ(clDevice.getDeviceInfo().queueOnHostProperties, families[1].properties); } HWTEST_F(GetDeviceInfoQueueFamilyTest, givenSubDeviceWithoutSupportedEngineWhenInitializingCapsThenReturnCorrectFamilies) { constexpr int bcsCount = 1; using MockHwHelperT = MockHwHelper; auto raiiHwHelper = MockHwHelperT::overrideHwHelper(); MockHwHelperT &mockHwHelper = static_cast(raiiHwHelper.mockHwHelper); mockHwHelper.disableEngineSupportOnSubDevice = 0b10; // subdevice 1 mockHwHelper.disabledSubDeviceEngineType = aub_stream::EngineType::ENGINE_BCS; VariableBackup backupHwInfo(defaultHwInfo.get()); defaultHwInfo->capabilityTable.blitterOperationsSupported = true; UltClDeviceFactory deviceFactory{1, 2}; ClDevice &clDevice0 = *deviceFactory.subDevices[0]; ClDevice &clDevice1 = *deviceFactory.subDevices[1]; size_t paramRetSize{}; cl_int retVal{}; // subdevice 0 { cl_queue_family_properties_intel families[CommonConstants::engineGroupCount]; retVal = clDevice0.getDeviceInfo(CL_DEVICE_QUEUE_FAMILY_PROPERTIES_INTEL, sizeof(families), families, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2u, paramRetSize / sizeof(cl_queue_family_properties_intel)); EXPECT_EQ(CL_QUEUE_DEFAULT_CAPABILITIES_INTEL, families[0].capabilities); EXPECT_EQ(3u, families[0].count); EXPECT_EQ(clDevice0.getDeviceInfo().queueOnHostProperties, families[0].properties); EXPECT_EQ(clDevice0.getQueueFamilyCapabilities(EngineGroupType::Copy), families[1].capabilities); EXPECT_EQ(1u, families[1].count); EXPECT_EQ(clDevice0.getDeviceInfo().queueOnHostProperties, families[1].properties); } // subdevice 1 { cl_queue_family_properties_intel families[CommonConstants::engineGroupCount]; retVal = clDevice1.getDeviceInfo(CL_DEVICE_QUEUE_FAMILY_PROPERTIES_INTEL, sizeof(families), families, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, paramRetSize / sizeof(cl_queue_family_properties_intel)); EXPECT_EQ(CL_QUEUE_DEFAULT_CAPABILITIES_INTEL, families[0].capabilities); EXPECT_EQ(3u, families[0].count); EXPECT_EQ(clDevice1.getDeviceInfo().queueOnHostProperties, families[0].properties); clDevice1.getExecutionEnvironment()->rootDeviceEnvironments[0]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = true; MockContext context(&clDevice1); MockCommandQueue cmdQ(&context, &clDevice1, nullptr, false); EXPECT_EQ(nullptr, cmdQ.getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS)); } } HWTEST_F(GetDeviceInfoQueueFamilyTest, givenDeviceRootDeviceWhenInitializingCapsThenReturnDefaultFamily) { UltClDeviceFactory deviceFactory{1, 2}; ClDevice &clDevice = *deviceFactory.rootDevices[0]; size_t paramRetSize{}; cl_int retVal{}; cl_queue_family_properties_intel families[CommonConstants::engineGroupCount]; retVal = clDevice.getDeviceInfo(CL_DEVICE_QUEUE_FAMILY_PROPERTIES_INTEL, sizeof(families), families, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, paramRetSize / sizeof(cl_queue_family_properties_intel)); EXPECT_EQ(CL_QUEUE_DEFAULT_CAPABILITIES_INTEL, families[0].capabilities); EXPECT_EQ(1u, families[0].count); EXPECT_EQ(clDevice.getDeviceInfo().queueOnHostProperties, families[0].properties); } struct GetDeviceInfo : public ::testing::TestWithParam { void SetUp() override { param = GetParam(); } cl_device_info param; }; TEST_P(GetDeviceInfo, GivenValidParamsWhenGettingDeviceInfoThenSuccessIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); size_t sizeReturned = GetInfo::invalidSourceSize; auto retVal = device->getDeviceInfo( param, 0, nullptr, &sizeReturned); if (CL_SUCCESS != retVal) { ASSERT_EQ(CL_SUCCESS, retVal) << " param = " << param; } ASSERT_NE(GetInfo::invalidSourceSize, sizeReturned); auto *object = new char[sizeReturned]; retVal = device->getDeviceInfo( param, sizeReturned, object, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); delete[] object; } // Define new command types to run the parameterized tests cl_device_info deviceInfoParams[] = { CL_DEVICE_ADDRESS_BITS, CL_DEVICE_AVAILABLE, CL_DEVICE_AVC_ME_SUPPORTS_PREEMPTION_INTEL, CL_DEVICE_AVC_ME_SUPPORTS_TEXTURE_SAMPLER_USE_INTEL, CL_DEVICE_AVC_ME_VERSION_INTEL, CL_DEVICE_BUILT_IN_KERNELS, CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION, CL_DEVICE_COMPILER_AVAILABLE, CL_DEVICE_ILS_WITH_VERSION, CL_DEVICE_IL_VERSION, // NOT_SUPPORTED // CL_DEVICE_TERMINATE_CAPABILITY_KHR, CL_DEVICE_DOUBLE_FP_CONFIG, CL_DEVICE_ENDIAN_LITTLE, CL_DEVICE_ERROR_CORRECTION_SUPPORT, CL_DEVICE_EXECUTION_CAPABILITIES, CL_DEVICE_EXTENSIONS, CL_DEVICE_EXTENSIONS_WITH_VERSION, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, CL_DEVICE_GLOBAL_MEM_SIZE, CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE, CL_DEVICE_HALF_FP_CONFIG, CL_DEVICE_HOST_UNIFIED_MEMORY, CL_DEVICE_IMAGE_SUPPORT, CL_DEVICE_LATEST_CONFORMANCE_VERSION_PASSED, CL_DEVICE_LINKER_AVAILABLE, CL_DEVICE_LOCAL_MEM_SIZE, CL_DEVICE_LOCAL_MEM_TYPE, CL_DEVICE_MAX_CLOCK_FREQUENCY, CL_DEVICE_MAX_COMPUTE_UNITS, CL_DEVICE_MAX_CONSTANT_ARGS, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE, CL_DEVICE_MAX_MEM_ALLOC_SIZE, CL_DEVICE_MAX_NUM_SUB_GROUPS, CL_DEVICE_MAX_ON_DEVICE_EVENTS, CL_DEVICE_MAX_ON_DEVICE_QUEUES, CL_DEVICE_MAX_PARAMETER_SIZE, CL_DEVICE_MAX_PIPE_ARGS, CL_DEVICE_MAX_SAMPLERS, CL_DEVICE_MAX_WORK_GROUP_SIZE, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, CL_DEVICE_MAX_WORK_ITEM_SIZES, CL_DEVICE_MEM_BASE_ADDR_ALIGN, CL_DEVICE_ME_VERSION_INTEL, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, CL_DEVICE_NAME, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, CL_DEVICE_NUMERIC_VERSION, CL_DEVICE_OPENCL_C_ALL_VERSIONS, CL_DEVICE_OPENCL_C_FEATURES, CL_DEVICE_OPENCL_C_VERSION, CL_DEVICE_PARENT_DEVICE, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, CL_DEVICE_PARTITION_MAX_SUB_DEVICES, CL_DEVICE_PARTITION_PROPERTIES, CL_DEVICE_PARTITION_TYPE, CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS, CL_DEVICE_PIPE_MAX_PACKET_SIZE, CL_DEVICE_PLATFORM, CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT, CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT, CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, CL_DEVICE_PRINTF_BUFFER_SIZE, CL_DEVICE_PROFILE, CL_DEVICE_PROFILING_TIMER_RESOLUTION, CL_DEVICE_QUEUE_FAMILY_PROPERTIES_INTEL, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, CL_DEVICE_REFERENCE_COUNT, CL_DEVICE_SINGLE_FP_CONFIG, CL_DEVICE_SPIR_VERSIONS, CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS, CL_DEVICE_SUB_GROUP_SIZES_INTEL, CL_DEVICE_SVM_CAPABILITIES, CL_DEVICE_TYPE, CL_DEVICE_VENDOR, CL_DEVICE_VENDOR_ID, CL_DEVICE_VERSION, CL_DRIVER_VERSION, }; INSTANTIATE_TEST_CASE_P( Device_, GetDeviceInfo, testing::ValuesIn(deviceInfoParams)); TEST(GetDeviceInfoTest, givenDeviceWithSubDevicesWhenGettingNumberOfComputeUnitsThenRootDeviceExposesAllComputeUnits) { UltClDeviceFactory deviceFactory{1, 3}; auto expectedComputeUnitsForSubDevice = deviceFactory.rootDevices[0]->getHardwareInfo().gtSystemInfo.EUCount; uint32_t expectedComputeUnitsForRootDevice = 0u; for (const auto &subDevice : deviceFactory.rootDevices[0]->subDevices) { uint32_t numComputeUnits = 0; size_t retSize = 0; auto status = clGetDeviceInfo(subDevice.get(), CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(numComputeUnits), &numComputeUnits, &retSize); EXPECT_EQ(CL_SUCCESS, status); EXPECT_EQ(expectedComputeUnitsForSubDevice, numComputeUnits); EXPECT_EQ(sizeof(numComputeUnits), retSize); expectedComputeUnitsForRootDevice += numComputeUnits; } uint32_t numComputeUnits = 0; size_t retSize = 0; auto status = clGetDeviceInfo(deviceFactory.rootDevices[0], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(numComputeUnits), &numComputeUnits, &retSize); EXPECT_EQ(CL_SUCCESS, status); EXPECT_EQ(expectedComputeUnitsForRootDevice, numComputeUnits); EXPECT_EQ(sizeof(numComputeUnits), retSize); } TEST(GetDeviceInfoTest, givenPciBusInfoWhenGettingPciBusInfoForDeviceThenPciBusInfoIsReturned) { PhysicalDevicePciBusInfo pciBusInfo(0, 1, 2, 3); auto driverInfo = new DriverInfoMock(); driverInfo->setPciBusInfo(pciBusInfo); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); device->driverInfo.reset(driverInfo); device->initializeCaps(); cl_device_pci_bus_info_khr devicePciBusInfo; size_t sizeReturned = 0; auto retVal = device->getDeviceInfo(CL_DEVICE_PCI_BUS_INFO_KHR, 0, nullptr, &sizeReturned); ASSERT_EQ(retVal, CL_SUCCESS); ASSERT_EQ(sizeReturned, sizeof(devicePciBusInfo)); retVal = device->getDeviceInfo(CL_DEVICE_PCI_BUS_INFO_KHR, sizeof(devicePciBusInfo), &devicePciBusInfo, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(devicePciBusInfo.pci_domain, pciBusInfo.pciDomain); EXPECT_EQ(devicePciBusInfo.pci_bus, pciBusInfo.pciBus); EXPECT_EQ(devicePciBusInfo.pci_device, pciBusInfo.pciDevice); EXPECT_EQ(devicePciBusInfo.pci_function, pciBusInfo.pciFunction); } TEST(GetDeviceInfoTest, givenPciBusInfoIsNotAvailableWhenGettingPciBusInfoForDeviceThenInvalidValueIsReturned) { PhysicalDevicePciBusInfo pciBusInfo(PhysicalDevicePciBusInfo::InvalidValue, PhysicalDevicePciBusInfo::InvalidValue, PhysicalDevicePciBusInfo::InvalidValue, PhysicalDevicePciBusInfo::InvalidValue); auto driverInfo = new DriverInfoMock(); driverInfo->setPciBusInfo(pciBusInfo); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); device->driverInfo.reset(driverInfo); device->initializeCaps(); auto retVal = device->getDeviceInfo(CL_DEVICE_PCI_BUS_INFO_KHR, 0, nullptr, nullptr); ASSERT_EQ(retVal, CL_INVALID_VALUE); } struct DeviceAttributeQueryTest : public ::testing::TestWithParam { void SetUp() override { param = GetParam(); } void verifyDeviceAttribute(ClDevice &device) { size_t sizeReturned = GetInfo::invalidSourceSize; auto retVal = device.getDeviceInfo( param, 0, nullptr, &sizeReturned); if (CL_SUCCESS != retVal) { ASSERT_EQ(CL_SUCCESS, retVal) << " param = " << param; } ASSERT_NE(GetInfo::invalidSourceSize, sizeReturned); auto object = std::make_unique(sizeReturned); retVal = device.getDeviceInfo( param, sizeReturned, object.get(), nullptr); EXPECT_EQ(CL_SUCCESS, retVal); switch (param) { case CL_DEVICE_IP_VERSION_INTEL: { auto pDeviceIpVersion = reinterpret_cast(object.get()); auto &hwInfo = device.getHardwareInfo(); auto &clHwHelper = NEO::ClHwHelper::get(hwInfo.platform.eRenderCoreFamily); EXPECT_EQ(clHwHelper.getDeviceIpVersion(hwInfo), *pDeviceIpVersion); EXPECT_EQ(sizeof(cl_version), sizeReturned); break; } case CL_DEVICE_ID_INTEL: { auto pDeviceId = reinterpret_cast(object.get()); EXPECT_EQ(device.getHardwareInfo().platform.usDeviceID, *pDeviceId); EXPECT_EQ(sizeof(cl_uint), sizeReturned); break; } case CL_DEVICE_NUM_SLICES_INTEL: { auto pNumSlices = reinterpret_cast(object.get()); const auto >SysInfo = device.getHardwareInfo().gtSystemInfo; EXPECT_EQ(gtSysInfo.SliceCount * std::max(device.getNumGenericSubDevices(), 1u), *pNumSlices); EXPECT_EQ(sizeof(cl_uint), sizeReturned); break; } case CL_DEVICE_NUM_SUB_SLICES_PER_SLICE_INTEL: { auto pNumSubslicesPerSlice = reinterpret_cast(object.get()); const auto >SysInfo = device.getHardwareInfo().gtSystemInfo; EXPECT_EQ(gtSysInfo.SubSliceCount / gtSysInfo.SliceCount, *pNumSubslicesPerSlice); EXPECT_EQ(sizeof(cl_uint), sizeReturned); break; } case CL_DEVICE_NUM_EUS_PER_SUB_SLICE_INTEL: { auto pNumEusPerSubslice = reinterpret_cast(object.get()); const auto >SysInfo = device.getHardwareInfo().gtSystemInfo; EXPECT_EQ(gtSysInfo.MaxEuPerSubSlice, *pNumEusPerSubslice); EXPECT_EQ(sizeof(cl_uint), sizeReturned); break; } case CL_DEVICE_NUM_THREADS_PER_EU_INTEL: { auto pNumThreadsPerEu = reinterpret_cast(object.get()); const auto >SysInfo = device.getHardwareInfo().gtSystemInfo; EXPECT_EQ(gtSysInfo.ThreadCount / gtSysInfo.EUCount, *pNumThreadsPerEu); EXPECT_EQ(sizeof(cl_uint), sizeReturned); break; } case CL_DEVICE_FEATURE_CAPABILITIES_INTEL: { auto pCapabilities = reinterpret_cast(object.get()); auto &hwInfo = device.getHardwareInfo(); auto &clHwHelper = ClHwHelper::get(hwInfo.platform.eRenderCoreFamily); EXPECT_EQ(clHwHelper.getSupportedDeviceFeatureCapabilities(), *pCapabilities); EXPECT_EQ(sizeof(cl_device_feature_capabilities_intel), sizeReturned); break; } default: EXPECT_TRUE(false); break; } } cl_device_info param; }; TEST_P(DeviceAttributeQueryTest, givenGetDeviceInfoWhenDeviceAttributeIsQueriedOnClDeviceThenReturnCorrectAttributeValue) { auto pClDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); ASSERT_EQ(0u, pClDevice->getNumGenericSubDevices()); verifyDeviceAttribute(*pClDevice); } TEST_P(DeviceAttributeQueryTest, givenGetDeviceInfoWhenDeviceAttributeIsQueriedOnRootDeviceAndSubDevicesThenReturnCorrectAttributeValues) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(2); VariableBackup mockDeviceFlagBackup(&MockDevice::createSingleDevice, false); auto pRootClDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); ASSERT_EQ(2u, pRootClDevice->subDevices.size()); verifyDeviceAttribute(*pRootClDevice); for (const auto &pClSubDevice : pRootClDevice->subDevices) { verifyDeviceAttribute(*pClSubDevice); } } cl_device_info deviceAttributeQueryParams[] = { CL_DEVICE_IP_VERSION_INTEL, CL_DEVICE_ID_INTEL, CL_DEVICE_NUM_SLICES_INTEL, CL_DEVICE_NUM_SUB_SLICES_PER_SLICE_INTEL, CL_DEVICE_NUM_EUS_PER_SUB_SLICE_INTEL, CL_DEVICE_NUM_THREADS_PER_EU_INTEL, CL_DEVICE_FEATURE_CAPABILITIES_INTEL}; INSTANTIATE_TEST_CASE_P( Device_, DeviceAttributeQueryTest, testing::ValuesIn(deviceAttributeQueryParams)); compute-runtime-22.14.22890/opencl/test/unit_test/device/get_device_name_tests.cpp000066400000000000000000000030371422164147700301150ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "gtest/gtest.h" namespace NEO { extern const char *familyName[]; } // namespace NEO using namespace NEO; using DeviceNameTest = ::testing::Test; TEST_F(DeviceNameTest, WhenCallingGetClDeviceNameThenReturnDeviceNameWithDeviceIdAppendedAtTheEnd) { auto clDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); std::string deviceName = "Intel(R) Graphics"; EXPECT_STREQ(deviceName.c_str(), clDevice->device.getDeviceName(*defaultHwInfo.get()).c_str()); std::stringstream clDeviceName; clDeviceName << deviceName; clDeviceName << " [0x" << std::hex << std::setw(4) << std::setfill('0') << defaultHwInfo->platform.usDeviceID << "]"; EXPECT_STREQ(clDeviceName.str().c_str(), clDevice->getClDeviceName(*defaultHwInfo.get()).c_str()); } TEST_F(DeviceNameTest, GivenDeviceWithNameWhenCallingGetClDeviceNameThenReturnCustomDeviceName) { HardwareInfo localHwInfo = *defaultHwInfo; localHwInfo.capabilityTable.deviceName = "Custom Device"; auto clDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&localHwInfo)); std::string deviceName = "Custom Device"; EXPECT_STREQ(deviceName.c_str(), clDevice->device.getDeviceName(localHwInfo).c_str()); } compute-runtime-22.14.22890/opencl/test/unit_test/device/gl/000077500000000000000000000000001422164147700234705ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/device/gl/CMakeLists.txt000066400000000000000000000004131422164147700262260ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) set(IGDRCL_SRCS_tests_device_gl ${CMAKE_CURRENT_SOURCE_DIR}/device_caps_gl_tests.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_device_gl}) endif() compute-runtime-22.14.22890/opencl/test/unit_test/device/gl/device_caps_gl_tests.cpp000066400000000000000000000022111422164147700303410ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/memory_manager/os_agnostic_memory_manager.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "gtest/gtest.h" #include using namespace NEO; TEST(Device_GetCaps, givenForceClGlSharingWhenCapsAreCreatedThenDeviceReportsClGlSharingExtension) { DebugManagerStateRestore dbgRestorer; { DebugManager.flags.AddClGlSharing.set(true); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_khr_gl_sharing "))); DebugManager.flags.AddClGlSharing.set(false); } } compute-runtime-22.14.22890/opencl/test/unit_test/device/sub_device_tests.cpp000066400000000000000000001456151422164147700271400ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/sub_device.h" #include "shared/source/os_interface/device_factory.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/dispatch_flags_helper.h" #include "shared/test/common/helpers/ult_hw_config.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/mocks/mock_program.h" using namespace NEO; TEST(SubDevicesTest, givenDefaultConfigWhenCreateRootDeviceThenItDoesntContainSubDevices) { auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); EXPECT_EQ(0u, device->getNumGenericSubDevices()); if (device->getNumSubDevices() > 0) { EXPECT_TRUE(device->getSubDevice(0)->isEngineInstanced()); } } TEST(SubDevicesTest, givenCreateMultipleSubDevicesFlagSetWhenCreateRootDeviceThenItsSubdevicesHaveProperRootIdSet) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(2); VariableBackup mockDeviceFlagBackup(&MockDevice::createSingleDevice, false); auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); EXPECT_EQ(2u, device->getNumSubDevices()); EXPECT_EQ(0u, device->getRootDeviceIndex()); EXPECT_EQ(0u, device->subdevices.at(0)->getRootDeviceIndex()); EXPECT_EQ(0u, device->subdevices.at(0)->getSubDeviceIndex()); EXPECT_EQ(0u, device->subdevices.at(1)->getRootDeviceIndex()); EXPECT_EQ(1u, device->subdevices.at(1)->getSubDeviceIndex()); } TEST(SubDevicesTest, givenCreateMultipleSubDevicesFlagSetWhenCreateRootDeviceThenItContainsSubDevices) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(2); VariableBackup mockDeviceFlagBackup(&MockDevice::createSingleDevice, false); auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); EXPECT_EQ(2u, device->getNumSubDevices()); EXPECT_EQ(2u, device->getNumGenericSubDevices()); EXPECT_EQ(0u, device->subdevices.at(0)->getNumGenericSubDevices()); EXPECT_EQ(0u, device->subdevices.at(1)->getNumGenericSubDevices()); } TEST(SubDevicesTest, givenDeviceWithSubDevicesWhenSubDeviceApiRefCountsAreChangedThenChangeIsPropagatedToRootDevice) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(2); VariableBackup mockDeviceFlagBackup(&MockDevice::createSingleDevice, false); initPlatform(); auto nonDefaultPlatform = std::make_unique(*platform()->peekExecutionEnvironment()); nonDefaultPlatform->initializeWithNewDevices(); auto device = nonDefaultPlatform->getClDevice(0); auto defaultDevice = platform()->getClDevice(0); auto subDevice = device->getSubDevice(1); auto baseDeviceApiRefCount = device->getRefApiCount(); auto baseDeviceInternalRefCount = device->getRefInternalCount(); auto baseSubDeviceApiRefCount = subDevice->getRefApiCount(); auto baseSubDeviceInternalRefCount = subDevice->getRefInternalCount(); auto baseDefaultDeviceApiRefCount = defaultDevice->getRefApiCount(); auto baseDefaultDeviceInternalRefCount = defaultDevice->getRefInternalCount(); subDevice->retainApi(); EXPECT_EQ(baseDeviceApiRefCount, device->getRefApiCount()); EXPECT_EQ(baseDeviceInternalRefCount + 1, device->getRefInternalCount()); EXPECT_EQ(baseSubDeviceApiRefCount + 1, subDevice->getRefApiCount()); EXPECT_EQ(baseSubDeviceInternalRefCount + 1, subDevice->getRefInternalCount()); EXPECT_EQ(baseDefaultDeviceApiRefCount, defaultDevice->getRefApiCount()); EXPECT_EQ(baseDefaultDeviceInternalRefCount, defaultDevice->getRefInternalCount()); subDevice->releaseApi(); EXPECT_EQ(baseDeviceApiRefCount, device->getRefApiCount()); EXPECT_EQ(baseDeviceInternalRefCount, device->getRefInternalCount()); EXPECT_EQ(baseSubDeviceApiRefCount, subDevice->getRefApiCount()); EXPECT_EQ(baseSubDeviceInternalRefCount, subDevice->getRefInternalCount()); EXPECT_EQ(baseDefaultDeviceApiRefCount, defaultDevice->getRefApiCount()); EXPECT_EQ(baseDefaultDeviceInternalRefCount, defaultDevice->getRefInternalCount()); } TEST(SubDevicesTest, givenDeviceWithSubDevicesWhenSubDeviceInternalRefCountsAreChangedThenChangeIsPropagatedToRootDevice) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(2); VariableBackup mockDeviceFlagBackup(&MockDevice::createSingleDevice, false); auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); device->incRefInternal(); auto subDevice = device->getSubDevice(0); auto baseDeviceInternalRefCount = device->getRefInternalCount(); auto baseSubDeviceInternalRefCount = subDevice->getRefInternalCount(); subDevice->incRefInternal(); EXPECT_EQ(baseDeviceInternalRefCount + 1, device->getRefInternalCount()); EXPECT_EQ(baseSubDeviceInternalRefCount, subDevice->getRefInternalCount()); device->incRefInternal(); EXPECT_EQ(baseDeviceInternalRefCount + 2, device->getRefInternalCount()); EXPECT_EQ(baseSubDeviceInternalRefCount, subDevice->getRefInternalCount()); subDevice->decRefInternal(); EXPECT_EQ(baseDeviceInternalRefCount + 1, device->getRefInternalCount()); EXPECT_EQ(baseSubDeviceInternalRefCount, subDevice->getRefInternalCount()); device->decRefInternal(); EXPECT_EQ(baseDeviceInternalRefCount, device->getRefInternalCount()); EXPECT_EQ(baseSubDeviceInternalRefCount, subDevice->getRefInternalCount()); } TEST(SubDevicesTest, givenClDeviceWithSubDevicesWhenSubDeviceInternalRefCountsAreChangedThenChangeIsPropagatedToRootDevice) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(2); VariableBackup mockDeviceFlagBackup(&MockDevice::createSingleDevice, false); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); device->incRefInternal(); auto &subDevice = device->subDevices[0]; auto baseDeviceInternalRefCount = device->getRefInternalCount(); auto baseSubDeviceInternalRefCount = subDevice->getRefInternalCount(); subDevice->incRefInternal(); EXPECT_EQ(baseDeviceInternalRefCount + 1, device->getRefInternalCount()); EXPECT_EQ(baseSubDeviceInternalRefCount, subDevice->getRefInternalCount()); device->incRefInternal(); EXPECT_EQ(baseDeviceInternalRefCount + 2, device->getRefInternalCount()); EXPECT_EQ(baseSubDeviceInternalRefCount, subDevice->getRefInternalCount()); subDevice->decRefInternal(); EXPECT_EQ(baseDeviceInternalRefCount + 1, device->getRefInternalCount()); EXPECT_EQ(baseSubDeviceInternalRefCount, subDevice->getRefInternalCount()); device->decRefInternal(); EXPECT_EQ(baseDeviceInternalRefCount, device->getRefInternalCount()); EXPECT_EQ(baseSubDeviceInternalRefCount, subDevice->getRefInternalCount()); } TEST(SubDevicesTest, givenDeviceWithSubDevicesWhenSubDeviceCreationFailThenWholeDeviceIsDestroyed) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(10); MockExecutionEnvironment executionEnvironment; executionEnvironment.prepareRootDeviceEnvironments(1); executionEnvironment.incRefInternal(); executionEnvironment.memoryManager.reset(new FailMemoryManager(10, executionEnvironment)); auto device = Device::create(&executionEnvironment, 0u); EXPECT_EQ(nullptr, device); } TEST(SubDevicesTest, givenCreateMultipleRootDevicesFlagsEnabledWhenDevicesAreCreatedThenEachHasUniqueDeviceIndex) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleRootDevices.set(2); VariableBackup backup{&ultHwConfig}; ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false; initPlatform(); EXPECT_EQ(0u, platform()->getClDevice(0)->getRootDeviceIndex()); EXPECT_EQ(1u, platform()->getClDevice(1)->getRootDeviceIndex()); } TEST(SubDevicesTest, givenRootDeviceWithSubDevicesWhenOsContextIsCreatedThenItsBitfieldBasesOnSubDevicesCount) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(2); VariableBackup mockDeviceFlagBackup(&MockDevice::createSingleDevice, false); auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); EXPECT_EQ(2u, device->getNumSubDevices()); uint32_t rootDeviceBitfield = 0b11; EXPECT_EQ(rootDeviceBitfield, static_cast(device->getDefaultEngine().osContext->getDeviceBitfield().to_ulong())); } TEST(SubDevicesTest, givenSubDeviceWhenOsContextIsCreatedThenItsBitfieldBasesOnSubDeviceId) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(2); VariableBackup mockDeviceFlagBackup(&MockDevice::createSingleDevice, false); auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); EXPECT_EQ(2u, device->getNumSubDevices()); auto firstSubDevice = static_cast(device->subdevices.at(0)); auto secondSubDevice = static_cast(device->subdevices.at(1)); uint32_t firstSubDeviceMask = (1u << 0); uint32_t secondSubDeviceMask = (1u << 1); EXPECT_EQ(firstSubDeviceMask, static_cast(firstSubDevice->getDefaultEngine().osContext->getDeviceBitfield().to_ulong())); EXPECT_EQ(secondSubDeviceMask, static_cast(secondSubDevice->getDefaultEngine().osContext->getDeviceBitfield().to_ulong())); } TEST(SubDevicesTest, givenDeviceWithSubDevicesWhenGettingDeviceByIdThenGetCorrectSubDevice) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(2); VariableBackup mockDeviceFlagBackup(&MockDevice::createSingleDevice, false); auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); EXPECT_EQ(2u, device->getNumSubDevices()); EXPECT_EQ(device->subdevices.at(0), device->getSubDevice(0)); EXPECT_EQ(device->subdevices.at(1), device->getSubDevice(1)); EXPECT_THROW(device->getSubDevice(2), std::exception); } TEST(SubDevicesTest, givenSubDevicesWhenGettingDeviceByIdZeroThenGetThisSubDevice) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(2); VariableBackup mockDeviceFlagBackup(&MockDevice::createSingleDevice, false); auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); EXPECT_EQ(2u, device->getNumSubDevices()); auto subDevice = device->subdevices.at(0); if (subDevice->getNumSubDevices() > 0) { EXPECT_ANY_THROW(subDevice->getSubDevice(0)->getSubDevice(0)); } else { EXPECT_ANY_THROW(subDevice->getSubDevice(0)); } } TEST(RootDevicesTest, givenRootDeviceWithoutSubdevicesWhenCreateEnginesThenDeviceCreatesCorrectNumberOfEngines) { auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.blitterOperationsSupported = true; auto &gpgpuEngines = HwHelper::get(hwInfo.platform.eRenderCoreFamily).getGpgpuEngineInstances(hwInfo); auto executionEnvironment = new MockExecutionEnvironment; executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(&hwInfo); MockDevice device(executionEnvironment, 0); EXPECT_EQ(0u, device.allEngines.size()); device.createEngines(); EXPECT_EQ(gpgpuEngines.size(), device.allEngines.size()); } TEST(RootDevicesTest, givenRootDeviceWithSubdevicesWhenCreateEnginesThenDeviceCreatesSpecialEngine) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(2); VariableBackup mockDeviceFlagBackup(&MockDevice::createSingleDevice, false); auto executionEnvironment = new MockExecutionEnvironment; MockDevice device(executionEnvironment, 0); EXPECT_EQ(0u, device.allEngines.size()); device.createSubDevices(); device.createEngines(); EXPECT_EQ(2u, device.getNumGenericSubDevices()); EXPECT_EQ(1u, device.allEngines.size()); } TEST(SubDevicesTest, givenRootDeviceWithSubDevicesAndLocalMemoryWhenGettingGlobalMemorySizeThenSubDevicesReturnReducedAmountOfGlobalMemAllocSize) { DebugManagerStateRestore restorer; DebugManager.flags.EnableLocalMemory.set(1); DebugManager.flags.CreateMultipleSubDevices.set(2); DebugManager.flags.HBMSizePerTileInGigabytes.set(1); const uint32_t numSubDevices = 2u; UltDeviceFactory deviceFactory{1, numSubDevices}; auto rootDevice = deviceFactory.rootDevices[0]; auto totalGlobalMemorySize = rootDevice->getGlobalMemorySize(static_cast(rootDevice->getDeviceBitfield().to_ulong())); auto expectedGlobalMemorySize = totalGlobalMemorySize / numSubDevices; for (const auto &subDevice : deviceFactory.subDevices) { auto mockSubDevice = static_cast(subDevice); auto subDeviceBitfield = static_cast(mockSubDevice->getDeviceBitfield().to_ulong()); EXPECT_EQ(expectedGlobalMemorySize, mockSubDevice->getGlobalMemorySize(subDeviceBitfield)); } } TEST(SubDevicesTest, givenRootDeviceWithSubDevicesWithoutLocalMemoryWhenGettingGlobalMemorySizeThenSubDevicesReturnReducedAmountOfGlobalMemAllocSize) { DebugManagerStateRestore restorer; DebugManager.flags.EnableLocalMemory.set(0); DebugManager.flags.CreateMultipleSubDevices.set(2); const uint32_t numSubDevices = 2u; UltDeviceFactory deviceFactory{1, numSubDevices}; auto rootDevice = deviceFactory.rootDevices[0]; auto totalGlobalMemorySize = rootDevice->getGlobalMemorySize(static_cast(rootDevice->getDeviceBitfield().to_ulong())); for (const auto &subDevice : deviceFactory.subDevices) { auto mockSubDevice = static_cast(subDevice); auto subDeviceBitfield = static_cast(mockSubDevice->getDeviceBitfield().to_ulong()); EXPECT_EQ(totalGlobalMemorySize, mockSubDevice->getGlobalMemorySize(subDeviceBitfield)); } } TEST(SubDevicesTest, whenCreatingEngineInstancedSubDeviceThenSetCorrectSubdeviceIndex) { class MyRootDevice : public RootDevice { public: using RootDevice::createEngineInstancedSubDevice; using RootDevice::RootDevice; }; auto executionEnvironment = new ExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); DeviceFactory::createMemoryManagerFunc(*executionEnvironment); auto rootDevice = std::unique_ptr(Device::create(executionEnvironment, 0)); auto subDevice = std::unique_ptr(rootDevice->createEngineInstancedSubDevice(1, defaultHwInfo->capabilityTable.defaultEngineType)); ASSERT_NE(nullptr, subDevice.get()); EXPECT_EQ(2u, subDevice->getDeviceBitfield().to_ulong()); } struct EngineInstancedDeviceTests : public ::testing::Test { bool createDevices(uint32_t numGenericSubDevices, uint32_t numCcs) { DebugManager.flags.CreateMultipleSubDevices.set(numGenericSubDevices); auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); auto hwInfo = executionEnvironment->rootDeviceEnvironments[0]->getMutableHardwareInfo(); hwInfo->gtSystemInfo.CCSInfo.NumberOfCCSEnabled = numCcs; hwInfo->featureTable.flags.ftrCCSNode = (numCcs > 0); hwInfo->capabilityTable.blitterOperationsSupported = true; HwHelper::get(hwInfo->platform.eRenderCoreFamily).adjustDefaultEngineType(hwInfo); if (!multiCcsDevice(*hwInfo, numCcs)) { return false; } executionEnvironment->parseAffinityMask(); deviceFactory = std::make_unique(1, numGenericSubDevices, *executionEnvironment.release()); rootDevice = deviceFactory->rootDevices[0]; EXPECT_NE(nullptr, rootDevice); return true; } bool hasRootCsrOnly(MockDevice *device) { return ((device->allEngines.size() == 1) && device->allEngines[0].osContext->isRootDevice()); } bool isEngineInstanced(MockSubDevice *subDevice, aub_stream::EngineType engineType, uint32_t subDeviceIndex, DeviceBitfield deviceBitfield) { bool isEngineInstanced = !subDevice->allEngines[0].osContext->isRootDevice(); isEngineInstanced &= subDevice->engineInstanced; isEngineInstanced &= (subDevice->getNumGenericSubDevices() == 0); isEngineInstanced &= (subDevice->getNumSubDevices() == 0); isEngineInstanced &= (engineType == subDevice->engineInstancedType); isEngineInstanced &= (subDeviceIndex == subDevice->getSubDeviceIndex()); isEngineInstanced &= (deviceBitfield == subDevice->getDeviceBitfield()); isEngineInstanced &= (subDevice->getAllEngines().size() == 1); return isEngineInstanced; } template bool hasAllEngines(MockDeviceT *device) { auto &hwInfo = device->getHardwareInfo(); auto gpgpuEngines = HwHelper::get(hwInfo.platform.eRenderCoreFamily).getGpgpuEngineInstances(hwInfo); for (size_t i = 0; i < gpgpuEngines.size(); i++) { if (device->allEngines[i].getEngineType() != gpgpuEngines[i].first) { return false; } } return true; } bool multiCcsDevice(const HardwareInfo &hwInfo, uint32_t expectedNumCcs) { auto gpgpuEngines = HwHelper::get(hwInfo.platform.eRenderCoreFamily).getGpgpuEngineInstances(hwInfo); uint32_t numCcs = 0; for (auto &engine : gpgpuEngines) { if (EngineHelpers::isCcs(engine.first) && (engine.second == EngineUsage::Regular)) { numCcs++; } } return (numCcs == expectedNumCcs); } template bool hasEngineInstancedEngines(MockDeviceT *device, aub_stream::EngineType engineType) { if (device->getAllEngines().size() != 1) { return false; } OsContext *defaultOsContext = device->getDefaultEngine().osContext; EXPECT_EQ(engineType, defaultOsContext->getEngineType()); EXPECT_EQ(EngineUsage::Regular, defaultOsContext->getEngineUsage()); EXPECT_TRUE(defaultOsContext->isDefaultContext()); auto &engine = device->getAllEngines()[0]; EXPECT_EQ(engine.getEngineType(), engineType); EXPECT_TRUE(engine.osContext->isRegular()); return true; } DebugManagerStateRestore restorer; std::unique_ptr deviceFactory; MockDevice *rootDevice = nullptr; }; TEST_F(EngineInstancedDeviceTests, givenDebugFlagSetAndMoreThanOneCcsWhenCreatingRootDeviceWithoutGenericSubDevicesThenCreateEngineInstanced) { constexpr uint32_t genericDevicesCount = 1; constexpr uint32_t ccsCount = 2; DebugManager.flags.EngineInstancedSubDevices.set(true); DebugManager.flags.AllowSingleTileEngineInstancedSubDevices.set(true); if (!createDevices(genericDevicesCount, ccsCount)) { GTEST_SKIP(); } auto &hwInfo = rootDevice->getHardwareInfo(); EXPECT_EQ(ccsCount, hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled); EXPECT_EQ(ccsCount, rootDevice->getNumSubDevices()); EXPECT_EQ(0u, rootDevice->getNumGenericSubDevices()); EXPECT_FALSE(hasRootCsrOnly(rootDevice)); EXPECT_TRUE(hasAllEngines(rootDevice)); for (uint32_t i = 0; i < ccsCount; i++) { auto engineType = static_cast(aub_stream::EngineType::ENGINE_CCS + i); auto subDevice = static_cast(rootDevice->getSubDevice(i)); ASSERT_NE(nullptr, subDevice); EXPECT_TRUE(isEngineInstanced(subDevice, engineType, 0, 1)); EXPECT_TRUE(hasEngineInstancedEngines(subDevice, engineType)); } } TEST_F(EngineInstancedDeviceTests, givenDebugFlagNotSetAndMoreThanOneCcsWhenCreatingRootDeviceWithoutGenericSubDevicesThenDontCreateEngineInstanced) { constexpr uint32_t genericDevicesCount = 1; constexpr uint32_t ccsCount = 2; if (!createDevices(genericDevicesCount, ccsCount)) { GTEST_SKIP(); } auto &hwInfo = rootDevice->getHardwareInfo(); EXPECT_EQ(ccsCount, hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled); EXPECT_EQ(0u, rootDevice->getNumSubDevices()); EXPECT_EQ(0u, rootDevice->getNumGenericSubDevices()); EXPECT_FALSE(hasRootCsrOnly(rootDevice)); EXPECT_TRUE(hasAllEngines(rootDevice)); } TEST_F(EngineInstancedDeviceTests, givenDebugFlagSetAndZeroCcsesWhenCreatingRootDeviceWithoutGenericSubDevicesThenCreateEngineInstanced) { constexpr uint32_t genericDevicesCount = 1; constexpr uint32_t ccsCount = 0; EXPECT_TRUE(createDevices(genericDevicesCount, ccsCount)); EXPECT_FALSE(hasRootCsrOnly(rootDevice)); EXPECT_TRUE(hasAllEngines(rootDevice)); EXPECT_EQ(0u, rootDevice->getNumGenericSubDevices()); EXPECT_EQ(0u, rootDevice->getNumSubDevices()); EXPECT_FALSE(rootDevice->getNearestGenericSubDevice(0)->isSubDevice()); } TEST_F(EngineInstancedDeviceTests, givenDebugFlagSetAndSingleCcsWhenCreatingRootDeviceWithoutGenericSubDevicesThenDontCreateEngineInstanced) { constexpr uint32_t genericDevicesCount = 1; constexpr uint32_t ccsCount = 1; if (!createDevices(genericDevicesCount, ccsCount)) { GTEST_SKIP(); } EXPECT_FALSE(hasRootCsrOnly(rootDevice)); EXPECT_TRUE(hasAllEngines(rootDevice)); EXPECT_EQ(0u, rootDevice->getNumGenericSubDevices()); EXPECT_EQ(0u, rootDevice->getNumSubDevices()); EXPECT_FALSE(rootDevice->getNearestGenericSubDevice(0)->isSubDevice()); } TEST_F(EngineInstancedDeviceTests, givenDebugFlagSetWhenCreatingRootDeviceWithGenericSubDevicesAndZeroCcsesThenDontCreateEngineInstanced) { constexpr uint32_t genericDevicesCount = 2; constexpr uint32_t ccsCount = 0; EXPECT_TRUE(createDevices(genericDevicesCount, ccsCount)); EXPECT_TRUE(hasRootCsrOnly(rootDevice)); for (uint32_t i = 0; i < genericDevicesCount; i++) { auto subDevice = static_cast(rootDevice->getSubDevice(i)); ASSERT_NE(nullptr, subDevice); EXPECT_FALSE(subDevice->allEngines[0].osContext->isRootDevice()); EXPECT_FALSE(subDevice->engineInstanced); EXPECT_EQ(0u, subDevice->getNumGenericSubDevices()); EXPECT_EQ(0u, subDevice->getNumSubDevices()); EXPECT_EQ(aub_stream::EngineType::NUM_ENGINES, subDevice->engineInstancedType); EXPECT_TRUE(hasAllEngines(subDevice)); } } TEST_F(EngineInstancedDeviceTests, givenDebugFlagSetWhenCreatingRootDeviceWithGenericSubDevicesAndSingleCcsThenDontCreateEngineInstanced) { constexpr uint32_t genericDevicesCount = 2; constexpr uint32_t ccsCount = 1; if (!createDevices(genericDevicesCount, ccsCount)) { GTEST_SKIP(); } EXPECT_TRUE(hasRootCsrOnly(rootDevice)); for (uint32_t i = 0; i < genericDevicesCount; i++) { auto subDevice = static_cast(rootDevice->getSubDevice(i)); ASSERT_NE(nullptr, subDevice); EXPECT_FALSE(subDevice->allEngines[0].osContext->isRootDevice()); EXPECT_FALSE(subDevice->engineInstanced); EXPECT_EQ(0u, subDevice->getNumGenericSubDevices()); EXPECT_EQ(0u, subDevice->getNumSubDevices()); EXPECT_EQ(aub_stream::EngineType::NUM_ENGINES, subDevice->engineInstancedType); EXPECT_TRUE(hasAllEngines(subDevice)); } } TEST_F(EngineInstancedDeviceTests, givenDebugFlagSetWhenCreatingRootDeviceWithGenericSubDevicesThenCreateEngineInstanced) { constexpr uint32_t genericDevicesCount = 2; constexpr uint32_t ccsCount = 2; DebugManager.flags.EngineInstancedSubDevices.set(true); if (!createDevices(genericDevicesCount, ccsCount)) { GTEST_SKIP(); } EXPECT_TRUE(hasRootCsrOnly(rootDevice)); for (uint32_t i = 0; i < genericDevicesCount; i++) { auto subDevice = static_cast(rootDevice->getSubDevice(i)); ASSERT_NE(nullptr, subDevice); EXPECT_FALSE(subDevice->allEngines[0].osContext->isRootDevice()); EXPECT_FALSE(subDevice->engineInstanced); EXPECT_EQ(0u, subDevice->getNumGenericSubDevices()); EXPECT_EQ(ccsCount, subDevice->getNumSubDevices()); EXPECT_EQ(aub_stream::EngineType::NUM_ENGINES, subDevice->engineInstancedType); EXPECT_TRUE(hasAllEngines(subDevice)); for (uint32_t j = 0; j < ccsCount; j++) { auto engineType = static_cast(aub_stream::EngineType::ENGINE_CCS + j); auto engineSubDevice = static_cast(subDevice->getSubDevice(j)); ASSERT_NE(nullptr, engineSubDevice); EXPECT_TRUE(isEngineInstanced(engineSubDevice, engineType, subDevice->getSubDeviceIndex(), subDevice->getDeviceBitfield())); EXPECT_TRUE(hasEngineInstancedEngines(engineSubDevice, engineType)); } } } TEST_F(EngineInstancedDeviceTests, givenEngineInstancedSubDeviceWhenEngineCreationFailsThenReturnFalse) { constexpr uint32_t genericDevicesCount = 2; constexpr uint32_t ccsCount = 0; EXPECT_TRUE(createDevices(genericDevicesCount, ccsCount)); auto subDevice = static_cast(rootDevice->getSubDevice(0)); auto &hwInfo = rootDevice->getHardwareInfo(); auto gpgpuEngines = HwHelper::get(hwInfo.platform.eRenderCoreFamily).getGpgpuEngineInstances(hwInfo); subDevice->engineInstanced = true; subDevice->failOnCreateEngine = true; subDevice->engineInstancedType = gpgpuEngines[0].first; EXPECT_FALSE(subDevice->createEngines()); } TEST_F(EngineInstancedDeviceTests, givenMultipleSubDevicesWhenCallingGetSubDeviceThenReturnCorrectObject) { constexpr uint32_t genericDevicesCount = 2; constexpr uint32_t ccsCount = 2; DebugManager.flags.EngineInstancedSubDevices.set(true); if (!createDevices(genericDevicesCount, ccsCount)) { GTEST_SKIP(); } auto subDevice0 = rootDevice->subdevices[0]; auto subDevice1 = rootDevice->subdevices[1]; auto subSubDevice00 = subDevice0->getSubDevice(0); auto subSubDevice01 = subDevice0->getSubDevice(1); auto subSubDevice10 = subDevice1->getSubDevice(0); auto subSubDevice11 = subDevice1->getSubDevice(1); { EXPECT_EQ(rootDevice->getSubDevice(0), subDevice0); EXPECT_EQ(rootDevice->getNearestGenericSubDevice(0), subDevice0); EXPECT_EQ(rootDevice->getSubDevice(1), subDevice1); EXPECT_EQ(rootDevice->getNearestGenericSubDevice(1), subDevice1); } { EXPECT_EQ(subDevice0->getNearestGenericSubDevice(0), subDevice0); EXPECT_EQ(subDevice0->getNearestGenericSubDevice(1), subDevice0); EXPECT_EQ(subDevice1->getNearestGenericSubDevice(0), subDevice1); EXPECT_EQ(subDevice1->getNearestGenericSubDevice(1), subDevice1); } { EXPECT_NE(subDevice0, subSubDevice00); EXPECT_NE(subDevice0, subSubDevice01); EXPECT_NE(subDevice1, subSubDevice10); EXPECT_NE(subDevice1, subSubDevice11); } { EXPECT_EQ(subSubDevice00->getNearestGenericSubDevice(0), subDevice0); EXPECT_EQ(subSubDevice01->getNearestGenericSubDevice(0), subDevice0); EXPECT_EQ(subSubDevice10->getNearestGenericSubDevice(0), subDevice1); EXPECT_EQ(subSubDevice11->getNearestGenericSubDevice(0), subDevice1); } { EXPECT_ANY_THROW(subSubDevice00->getSubDevice(0)); EXPECT_ANY_THROW(subSubDevice01->getSubDevice(0)); EXPECT_ANY_THROW(subSubDevice10->getSubDevice(0)); EXPECT_ANY_THROW(subSubDevice11->getSubDevice(0)); } } TEST_F(EngineInstancedDeviceTests, givenMultipleClSubDevicesWhenCallingGetSubDeviceThenReturnCorrectObject) { constexpr uint32_t genericDevicesCount = 2; constexpr uint32_t ccsCount = 2; DebugManager.flags.EngineInstancedSubDevices.set(true); if (!createDevices(genericDevicesCount, ccsCount)) { GTEST_SKIP(); } auto subDevice = rootDevice->subdevices[0]; auto subSubDevice = subDevice->getSubDevice(0); auto clRootDevice = std::make_unique(*rootDevice, nullptr); auto clSubDevice = std::make_unique(*subDevice, *clRootDevice, nullptr); auto clSubSubDevice = std::make_unique(*subSubDevice, *clRootDevice, nullptr); EXPECT_EQ(clRootDevice->getSubDevice(0), clRootDevice->getNearestGenericSubDevice(0)); EXPECT_EQ(clSubDevice.get(), clSubDevice->getNearestGenericSubDevice(0)); EXPECT_EQ(clRootDevice->getSubDevice(0), clSubSubDevice->getNearestGenericSubDevice(0)); } TEST_F(EngineInstancedDeviceTests, givenAffinityMaskSetWhenCreatingDevicesThenFilterMaskedDevices) { constexpr uint32_t genericDevicesCount = 3; constexpr uint32_t ccsCount = 4; constexpr uint32_t engineInstancedPerGeneric[3] = {3, 0, 2}; constexpr bool supportedGenericSubDevices[3] = {true, false, true}; constexpr bool supportedEngineDevices[3][4] = {{true, true, true, false}, {false, false, false, false}, {false, false, true, true}}; DebugManager.flags.EngineInstancedSubDevices.set(true); DebugManager.flags.ZE_AFFINITY_MASK.set("0.0.0, 0.0.1, 0.0.2, 0.2.2, 0.2.3, 0.1.5"); if (!createDevices(genericDevicesCount, ccsCount)) { GTEST_SKIP(); } EXPECT_TRUE(hasRootCsrOnly(rootDevice)); for (uint32_t i = 0; i < genericDevicesCount; i++) { if (!supportedGenericSubDevices[i]) { EXPECT_EQ(nullptr, rootDevice->getSubDevice(i)); continue; } auto subDevice = static_cast(rootDevice->getSubDevice(i)); ASSERT_NE(nullptr, subDevice); EXPECT_FALSE(subDevice->allEngines[0].osContext->isRootDevice()); EXPECT_FALSE(subDevice->engineInstanced); EXPECT_EQ(engineInstancedPerGeneric[i], subDevice->getNumSubDevices()); EXPECT_EQ(0u, subDevice->getNumGenericSubDevices()); EXPECT_EQ(aub_stream::EngineType::NUM_ENGINES, subDevice->engineInstancedType); EXPECT_TRUE(hasAllEngines(subDevice)); for (uint32_t j = 0; j < ccsCount; j++) { if (!supportedEngineDevices[i][j]) { EXPECT_EQ(nullptr, subDevice->getSubDevice(j)); continue; } auto engineType = static_cast(aub_stream::EngineType::ENGINE_CCS + j); auto engineSubDevice = static_cast(subDevice->getSubDevice(j)); ASSERT_NE(nullptr, engineSubDevice); EXPECT_TRUE(isEngineInstanced(engineSubDevice, engineType, subDevice->getSubDeviceIndex(), subDevice->getDeviceBitfield())); EXPECT_TRUE(hasEngineInstancedEngines(engineSubDevice, engineType)); } } } TEST_F(EngineInstancedDeviceTests, givenAffinityMaskForSingle3rdLevelDeviceWhenCreatingDevicesThenCreate2ndLevelAsEngineInstanced) { constexpr uint32_t genericDevicesCount = 2; constexpr uint32_t ccsCount = 2; constexpr uint32_t create2ndLevelAsEngineInstanced[2] = {false, true}; constexpr uint32_t engineInstanced2ndLevelEngineIndex = 1; DebugManager.flags.EngineInstancedSubDevices.set(true); DebugManager.flags.ZE_AFFINITY_MASK.set("0.0, 0.1.1"); if (!createDevices(genericDevicesCount, ccsCount)) { GTEST_SKIP(); } EXPECT_TRUE(hasRootCsrOnly(rootDevice)); for (uint32_t i = 0; i < genericDevicesCount; i++) { auto subDevice = static_cast(rootDevice->getSubDevice(i)); ASSERT_NE(nullptr, subDevice); EXPECT_FALSE(subDevice->allEngines[0].osContext->isRootDevice()); if (create2ndLevelAsEngineInstanced[i]) { auto engineType = static_cast(aub_stream::EngineType::ENGINE_CCS + engineInstanced2ndLevelEngineIndex); DeviceBitfield deviceBitfield = (1llu << i); EXPECT_TRUE(isEngineInstanced(subDevice, engineType, i, deviceBitfield)); EXPECT_TRUE(hasEngineInstancedEngines(subDevice, engineType)); EXPECT_EQ(0u, subDevice->getNumGenericSubDevices()); EXPECT_EQ(0u, subDevice->getNumSubDevices()); continue; } EXPECT_TRUE(hasAllEngines(subDevice)); EXPECT_FALSE(subDevice->engineInstanced); EXPECT_EQ(aub_stream::EngineType::NUM_ENGINES, subDevice->engineInstancedType); EXPECT_EQ(0u, subDevice->getNumGenericSubDevices()); EXPECT_EQ(ccsCount, subDevice->getNumSubDevices()); for (uint32_t j = 0; j < ccsCount; j++) { auto engineType = static_cast(aub_stream::EngineType::ENGINE_CCS + j); auto engineSubDevice = static_cast(subDevice->getSubDevice(j)); ASSERT_NE(nullptr, engineSubDevice); EXPECT_TRUE(isEngineInstanced(engineSubDevice, engineType, subDevice->getSubDeviceIndex(), subDevice->getDeviceBitfield())); EXPECT_TRUE(hasEngineInstancedEngines(engineSubDevice, engineType)); } } } TEST_F(EngineInstancedDeviceTests, givenAffinityMaskForSingle3rdLevelDeviceOnlyWhenCreatingDevicesThenCreate1stLevelAsEngineInstanced) { constexpr uint32_t genericDevicesCount = 2; constexpr uint32_t ccsCount = 2; constexpr uint32_t genericDeviceIndex = 1; constexpr uint32_t engineInstancedEngineIndex = 1; DebugManager.flags.ZE_AFFINITY_MASK.set("0.1.1"); if (!createDevices(genericDevicesCount, ccsCount)) { GTEST_SKIP(); } EXPECT_FALSE(hasRootCsrOnly(rootDevice)); auto engineType = static_cast(aub_stream::EngineType::ENGINE_CCS + engineInstancedEngineIndex); DeviceBitfield deviceBitfield = (1llu << genericDeviceIndex); EXPECT_FALSE(rootDevice->allEngines[0].osContext->isRootDevice()); EXPECT_TRUE(rootDevice->engineInstanced); EXPECT_TRUE(rootDevice->getNumGenericSubDevices() == 0); EXPECT_TRUE(rootDevice->getNumSubDevices() == 0); EXPECT_TRUE(engineType == rootDevice->engineInstancedType); EXPECT_TRUE(deviceBitfield == rootDevice->getDeviceBitfield()); EXPECT_EQ(1u, rootDevice->getDeviceBitfield().count()); EXPECT_TRUE(hasEngineInstancedEngines(rootDevice, engineType)); } TEST_F(EngineInstancedDeviceTests, givenAffinityMaskForSingle2rdLevelDeviceOnlyWhenCreatingDevicesThenCreate1stLevelAsEngineInstanced) { constexpr uint32_t genericDevicesCount = 1; constexpr uint32_t ccsCount = 2; constexpr uint32_t genericDeviceIndex = 0; constexpr uint32_t engineInstancedEngineIndex = 1; DebugManager.flags.ZE_AFFINITY_MASK.set("0.0.1, 0.9"); if (!createDevices(genericDevicesCount, ccsCount)) { GTEST_SKIP(); } EXPECT_FALSE(hasRootCsrOnly(rootDevice)); auto engineType = static_cast(aub_stream::EngineType::ENGINE_CCS + engineInstancedEngineIndex); DeviceBitfield deviceBitfield = (1llu << genericDeviceIndex); EXPECT_FALSE(rootDevice->allEngines[0].osContext->isRootDevice()); EXPECT_TRUE(rootDevice->engineInstanced); EXPECT_TRUE(rootDevice->getNumGenericSubDevices() == 0); EXPECT_TRUE(rootDevice->getNumSubDevices() == 0); EXPECT_TRUE(engineType == rootDevice->engineInstancedType); EXPECT_TRUE(deviceBitfield == rootDevice->getDeviceBitfield()); EXPECT_EQ(1u, rootDevice->getDeviceBitfield().count()); EXPECT_TRUE(hasEngineInstancedEngines(rootDevice, engineType)); } TEST_F(EngineInstancedDeviceTests, givenAffinityMaskForSecondLevelOnSingleTileDeviceWhenCreatingThenEnableAllEngineInstancedDevices) { constexpr uint32_t genericDevicesCount = 1; constexpr uint32_t ccsCount = 2; DebugManager.flags.EngineInstancedSubDevices.set(true); DebugManager.flags.AllowSingleTileEngineInstancedSubDevices.set(true); DebugManager.flags.ZE_AFFINITY_MASK.set("0.0, 0.4"); if (!createDevices(genericDevicesCount, ccsCount)) { GTEST_SKIP(); } EXPECT_FALSE(hasRootCsrOnly(rootDevice)); EXPECT_TRUE(rootDevice->isEngineInstanced()); EXPECT_EQ(0u, rootDevice->getNumGenericSubDevices()); EXPECT_EQ(0u, rootDevice->getNumSubDevices()); } TEST_F(EngineInstancedDeviceTests, givenAffinityMaskForSecondLevelOnSingleTileDeviceSingleEngineWhenCreatingThenDontEnableEngineInstancedDevices) { constexpr uint32_t genericDevicesCount = 1; constexpr uint32_t ccsCount = 1; DebugManager.flags.EngineInstancedSubDevices.set(true); DebugManager.flags.AllowSingleTileEngineInstancedSubDevices.set(true); DebugManager.flags.ZE_AFFINITY_MASK.set("0.0"); if (!createDevices(genericDevicesCount, ccsCount)) { GTEST_SKIP(); } EXPECT_FALSE(hasRootCsrOnly(rootDevice)); EXPECT_FALSE(rootDevice->isEngineInstanced()); EXPECT_EQ(0u, rootDevice->getNumGenericSubDevices()); EXPECT_EQ(0u, rootDevice->getNumSubDevices()); } TEST_F(EngineInstancedDeviceTests, givenAffinityMaskForSecondLevelOnSingleTileDeviceWithoutDebugFlagWhenCreatingThenDontEnableAllEngineInstancedDevices) { constexpr uint32_t genericDevicesCount = 1; constexpr uint32_t ccsCount = 2; DebugManager.flags.ZE_AFFINITY_MASK.set("0.0"); if (!createDevices(genericDevicesCount, ccsCount)) { GTEST_SKIP(); } EXPECT_FALSE(hasRootCsrOnly(rootDevice)); EXPECT_FALSE(rootDevice->isEngineInstanced()); EXPECT_EQ(0u, rootDevice->getNumGenericSubDevices()); EXPECT_EQ(0u, rootDevice->getNumSubDevices()); } TEST_F(EngineInstancedDeviceTests, givenAffinityMaskWhenCreatingClSubDevicesThenSkipDisabledDevices) { constexpr uint32_t genericDevicesCount = 3; constexpr uint32_t ccsCount = 1; DebugManager.flags.ZE_AFFINITY_MASK.set("0.0,0.2"); if (!createDevices(genericDevicesCount, ccsCount)) { GTEST_SKIP(); } auto clRootDevice = std::make_unique(*rootDevice, nullptr); ASSERT_EQ(2u, clRootDevice->getNumSubDevices()); EXPECT_EQ(0b1u, clRootDevice->getSubDevice(0)->getDeviceBitfield().to_ulong()); EXPECT_EQ(0b100u, clRootDevice->getSubDevice(1)->getDeviceBitfield().to_ulong()); } HWTEST2_F(EngineInstancedDeviceTests, givenEngineInstancedDeviceWhenProgrammingCfeStateThenSetSingleSliceDispatch, IsAtLeastXeHpCore) { using CFE_STATE = typename FamilyType::CFE_STATE; DebugManager.flags.EngineInstancedSubDevices.set(true); constexpr uint32_t genericDevicesCount = 1; constexpr uint32_t ccsCount = 2; DebugManager.flags.AllowSingleTileEngineInstancedSubDevices.set(true); if (!createDevices(genericDevicesCount, ccsCount)) { GTEST_SKIP(); } auto subDevice = static_cast(rootDevice->getSubDevice(0)); auto defaultEngine = subDevice->getDefaultEngine(); EXPECT_TRUE(defaultEngine.osContext->isEngineInstanced()); char buffer[64] = {}; MockGraphicsAllocation graphicsAllocation(buffer, sizeof(buffer)); LinearStream linearStream(&graphicsAllocation, graphicsAllocation.getUnderlyingBuffer(), graphicsAllocation.getUnderlyingBufferSize()); auto csr = static_cast *>(defaultEngine.commandStreamReceiver); auto dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); csr->programVFEState(linearStream, dispatchFlags, 1); auto cfeState = reinterpret_cast(buffer); EXPECT_TRUE(cfeState->getSingleSliceDispatchCcsMode()); } HWTEST_F(EngineInstancedDeviceTests, givenEngineInstancedDeviceWhenCreatingProgramThenAssignAllSubDevices) { constexpr uint32_t genericDevicesCount = 2; constexpr uint32_t ccsCount = 2; DebugManager.flags.EngineInstancedSubDevices.set(true); if (!createDevices(genericDevicesCount, ccsCount)) { GTEST_SKIP(); } const char *source = "text"; size_t sourceSize = strlen(source); auto clRootDevice = std::make_unique(*rootDevice, nullptr); auto clSubDevice = clRootDevice->getSubDevice(0); auto clSubSubDevice0 = clSubDevice->getSubDevice(0); auto clSubSubDevice1 = clSubDevice->getSubDevice(1); cl_device_id device_ids[] = {clSubDevice, clSubSubDevice0, clSubSubDevice1}; ClDeviceVector deviceVector{device_ids, 3}; MockContext context(deviceVector); cl_int retVal = CL_INVALID_PROGRAM; auto program = std::unique_ptr(Program::create( &context, 1, &source, &sourceSize, retVal)); ASSERT_NE(nullptr, program.get()); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_TRUE(program->deviceBuildInfos.find(clSubDevice) != program->deviceBuildInfos.end()); auto &associatedSubDevices = program->deviceBuildInfos[clSubDevice].associatedSubDevices; ASSERT_EQ(2u, associatedSubDevices.size()); EXPECT_EQ(clSubSubDevice0, associatedSubDevices[0]); EXPECT_EQ(clSubSubDevice1, associatedSubDevices[1]); } HWTEST_F(EngineInstancedDeviceTests, whenCreateMultipleCommandQueuesThenEnginesAreAssignedUsingRoundRobin) { constexpr uint32_t genericDevicesCount = 1; constexpr uint32_t ccsCount = 4; DebugManagerStateRestore restorer; DebugManager.flags.EnableCmdQRoundRobindEngineAssign.set(1); if (!createDevices(genericDevicesCount, ccsCount)) { GTEST_SKIP(); } auto &hwInfo = rootDevice->getHardwareInfo(); EXPECT_EQ(ccsCount, hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled); auto clRootDevice = std::make_unique(*rootDevice, nullptr); cl_device_id device_ids[] = {clRootDevice.get()}; ClDeviceVector deviceVector{device_ids, 1}; MockContext context(deviceVector); std::array>, 24> cmdQs; for (auto &cmdQ : cmdQs) { cmdQ = std::make_unique>(&context, clRootDevice.get(), nullptr); } const auto &defaultEngine = clRootDevice->getDefaultEngine(); const auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily); const auto engineGroupType = hwHelper.getEngineGroupType(defaultEngine.getEngineType(), defaultEngine.getEngineUsage(), hwInfo); auto defaultEngineGroupIndex = clRootDevice->getDevice().getEngineGroupIndexFromEngineGroupType(engineGroupType); auto engines = clRootDevice->getDevice().getRegularEngineGroups()[defaultEngineGroupIndex].engines; for (size_t i = 0; i < cmdQs.size(); i++) { auto engineIndex = i % engines.size(); auto expectedCsr = engines[engineIndex].commandStreamReceiver; auto csr = &cmdQs[i]->getGpgpuCommandStreamReceiver(); EXPECT_EQ(csr, expectedCsr); } } HWTEST_F(EngineInstancedDeviceTests, givenCmdQRoundRobindEngineAssignBitfieldwWenCreateMultipleCommandQueuesThenEnginesAreAssignedUsingRoundRobinSkippingNotAvailableEngines) { constexpr uint32_t genericDevicesCount = 1; constexpr uint32_t ccsCount = 4; DebugManagerStateRestore restorer; DebugManager.flags.EnableCmdQRoundRobindEngineAssign.set(1); DebugManager.flags.CmdQRoundRobindEngineAssignBitfield.set(0b1101); if (!createDevices(genericDevicesCount, ccsCount)) { GTEST_SKIP(); } auto &hwInfo = rootDevice->getHardwareInfo(); EXPECT_EQ(ccsCount, hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled); auto clRootDevice = std::make_unique(*rootDevice, nullptr); cl_device_id device_ids[] = {clRootDevice.get()}; ClDeviceVector deviceVector{device_ids, 1}; MockContext context(deviceVector); std::array>, 24> cmdQs; for (auto &cmdQ : cmdQs) { cmdQ = std::make_unique>(&context, clRootDevice.get(), nullptr); } const auto &defaultEngine = clRootDevice->getDefaultEngine(); const auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily); const auto engineGroupType = hwHelper.getEngineGroupType(defaultEngine.getEngineType(), defaultEngine.getEngineUsage(), hwInfo); auto defaultEngineGroupIndex = clRootDevice->getDevice().getEngineGroupIndexFromEngineGroupType(engineGroupType); auto engines = clRootDevice->getDevice().getRegularEngineGroups()[defaultEngineGroupIndex].engines; for (size_t i = 0, j = 0; i < cmdQs.size(); i++, j++) { if ((j % engines.size()) == 1) { j++; } auto engineIndex = j % engines.size(); auto expectedCsr = engines[engineIndex].commandStreamReceiver; auto csr = &cmdQs[i]->getGpgpuCommandStreamReceiver(); EXPECT_EQ(csr, expectedCsr); } } HWTEST_F(EngineInstancedDeviceTests, givenCmdQRoundRobindEngineAssignNTo1wWenCreateMultipleCommandQueuesThenEnginesAreAssignedUsingRoundRobinAndNQueuesShareSameCsr) { constexpr uint32_t genericDevicesCount = 1; constexpr uint32_t ccsCount = 4; DebugManagerStateRestore restorer; DebugManager.flags.EnableCmdQRoundRobindEngineAssign.set(1); DebugManager.flags.CmdQRoundRobindEngineAssignNTo1.set(3); if (!createDevices(genericDevicesCount, ccsCount)) { GTEST_SKIP(); } auto &hwInfo = rootDevice->getHardwareInfo(); EXPECT_EQ(ccsCount, hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled); auto clRootDevice = std::make_unique(*rootDevice, nullptr); cl_device_id device_ids[] = {clRootDevice.get()}; ClDeviceVector deviceVector{device_ids, 1}; MockContext context(deviceVector); std::array>, 24> cmdQs; for (auto &cmdQ : cmdQs) { cmdQ = std::make_unique>(&context, clRootDevice.get(), nullptr); } const auto &defaultEngine = clRootDevice->getDefaultEngine(); const auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily); const auto engineGroupType = hwHelper.getEngineGroupType(defaultEngine.getEngineType(), defaultEngine.getEngineUsage(), hwInfo); auto defaultEngineGroupIndex = clRootDevice->getDevice().getEngineGroupIndexFromEngineGroupType(engineGroupType); auto engines = clRootDevice->getDevice().getRegularEngineGroups()[defaultEngineGroupIndex].engines; for (size_t i = 0, j = 0; i < cmdQs.size(); i++, j++) { auto engineIndex = (j / 3) % engines.size(); auto expectedCsr = engines[engineIndex].commandStreamReceiver; auto csr = &cmdQs[i]->getGpgpuCommandStreamReceiver(); EXPECT_EQ(csr, expectedCsr); } } HWTEST_F(EngineInstancedDeviceTests, givenCmdQRoundRobindEngineAssignNTo1AndCmdQRoundRobindEngineAssignBitfieldwWenCreateMultipleCommandQueuesThenEnginesAreAssignedProperlyUsingRoundRobin) { constexpr uint32_t genericDevicesCount = 1; constexpr uint32_t ccsCount = 4; DebugManagerStateRestore restorer; DebugManager.flags.EnableCmdQRoundRobindEngineAssign.set(1); DebugManager.flags.CmdQRoundRobindEngineAssignNTo1.set(3); DebugManager.flags.CmdQRoundRobindEngineAssignBitfield.set(0b1101); if (!createDevices(genericDevicesCount, ccsCount)) { GTEST_SKIP(); } auto &hwInfo = rootDevice->getHardwareInfo(); EXPECT_EQ(ccsCount, hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled); auto clRootDevice = std::make_unique(*rootDevice, nullptr); cl_device_id device_ids[] = {clRootDevice.get()}; ClDeviceVector deviceVector{device_ids, 1}; MockContext context(deviceVector); std::array>, 24> cmdQs; for (auto &cmdQ : cmdQs) { cmdQ = std::make_unique>(&context, clRootDevice.get(), nullptr); } const auto &defaultEngine = clRootDevice->getDefaultEngine(); const auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily); const auto engineGroupType = hwHelper.getEngineGroupType(defaultEngine.getEngineType(), defaultEngine.getEngineUsage(), hwInfo); auto defaultEngineGroupIndex = clRootDevice->getDevice().getEngineGroupIndexFromEngineGroupType(engineGroupType); auto engines = clRootDevice->getDevice().getRegularEngineGroups()[defaultEngineGroupIndex].engines; for (size_t i = 0, j = 0; i < cmdQs.size(); i++, j++) { while (((j / 3) % engines.size()) == 1) { j++; } auto engineIndex = (j / 3) % engines.size(); auto expectedCsr = engines[engineIndex].commandStreamReceiver; auto csr = &cmdQs[i]->getGpgpuCommandStreamReceiver(); EXPECT_EQ(csr, expectedCsr); } } HWTEST_F(EngineInstancedDeviceTests, givenEnableCmdQRoundRobindEngineAssignDisabledWenCreateMultipleCommandQueuesThenDefaultEngineAssigned) { constexpr uint32_t genericDevicesCount = 1; constexpr uint32_t ccsCount = 4; DebugManagerStateRestore restorer; DebugManager.flags.EnableCmdQRoundRobindEngineAssign.set(0); if (!createDevices(genericDevicesCount, ccsCount)) { GTEST_SKIP(); } auto &hwInfo = rootDevice->getHardwareInfo(); EXPECT_EQ(ccsCount, hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled); auto clRootDevice = std::make_unique(*rootDevice, nullptr); cl_device_id device_ids[] = {clRootDevice.get()}; ClDeviceVector deviceVector{device_ids, 1}; MockContext context(deviceVector); std::array>, 24> cmdQs; for (auto &cmdQ : cmdQs) { cmdQ = std::make_unique>(&context, clRootDevice.get(), nullptr); } const auto &defaultEngine = clRootDevice->getDefaultEngine(); for (auto &cmdQ : cmdQs) { auto expectedCsr = defaultEngine.commandStreamReceiver; auto csr = &cmdQ->getGpgpuCommandStreamReceiver(); EXPECT_EQ(csr, expectedCsr); } } TEST(SubDevicesTest, whenInitializeRootCsrThenDirectSubmissionIsNotInitialized) { auto device = std::make_unique(); device->initializeRootCommandStreamReceiver(); auto csr = device->getEngine(1u).commandStreamReceiver; EXPECT_FALSE(csr->isDirectSubmissionEnabled()); } TEST(SubDevicesTest, givenCreateMultipleSubDevicesFlagSetWhenBindlessHeapHelperCreatedThenSubDeviceReturnRootDeviceMember) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(2); VariableBackup mockDeviceFlagBackup(&MockDevice::createSingleDevice, false); auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); device->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]->createBindlessHeapsHelper(device->getMemoryManager(), device->getNumGenericSubDevices() > 1, device->getRootDeviceIndex(), device->getDeviceBitfield()); EXPECT_EQ(device->getBindlessHeapsHelper(), device->subdevices.at(0)->getBindlessHeapsHelper()); } compute-runtime-22.14.22890/opencl/test/unit_test/event/000077500000000000000000000000001422164147700227505ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/event/CMakeLists.txt000066400000000000000000000011361422164147700255110ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_event ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/async_events_handler_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/event_builder_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/event_callbacks_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/event_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/event_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/event_tracker_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/user_events_tests.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_event}) compute-runtime-22.14.22890/opencl/test/unit_test/event/async_events_handler_tests.cpp000066400000000000000000000342631422164147700311040ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/wait_status.h" #include "shared/source/helpers/timestamp_packet.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/mock_method_macros.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/event/async_events_handler.h" #include "opencl/source/event/event.h" #include "opencl/source/event/user_event.h" #include "opencl/test/unit_test/mocks/mock_async_event_handler.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" using namespace NEO; using namespace ::testing; class AsyncEventsHandlerTests : public ::testing::Test { public: class MyEvent : public Event { public: MyEvent(Context *ctx, CommandQueue *cmdQueue, cl_command_type cmdType, uint32_t taskLevel, uint32_t taskCount) : Event(ctx, cmdQueue, cmdType, taskLevel, taskCount) { handler.reset(new MockHandler()); } int getExecutionStatus() { //return execution status without updating return executionStatus.load(); } void setTaskStamp(uint32_t taskLevel, uint32_t taskCount) { this->taskLevel.store(taskLevel); this->updateTaskCount(taskCount, 0); } WaitStatus wait(bool blocking, bool quickKmdSleep) override { waitCalled++; handler->allowAsyncProcess.store(false); return waitResult; } uint32_t waitCalled = 0u; WaitStatus waitResult = WaitStatus::Ready; std::unique_ptr handler; }; static void CL_CALLBACK callbackFcn(cl_event e, cl_int status, void *data) { ++(*(int *)data); } void SetUp() override { dbgRestore.reset(new DebugManagerStateRestore()); DebugManager.flags.EnableAsyncEventsHandler.set(false); handler.reset(new MockHandler()); context = make_releaseable(); commandQueue = make_releaseable(context.get(), context->getDevice(0), nullptr, false); *(commandQueue->getGpgpuCommandStreamReceiver().getTagAddress()) = 0; event1 = make_releaseable(context.get(), commandQueue.get(), CL_COMMAND_BARRIER, CompletionStamp::notReady, CompletionStamp::notReady); event2 = make_releaseable(context.get(), commandQueue.get(), CL_COMMAND_BARRIER, CompletionStamp::notReady, CompletionStamp::notReady); event3 = make_releaseable(context.get(), commandQueue.get(), CL_COMMAND_BARRIER, CompletionStamp::notReady, CompletionStamp::notReady); } std::unique_ptr dbgRestore; std::unique_ptr handler; int counter = 0; ReleaseableObjectPtr context; ReleaseableObjectPtr commandQueue; ReleaseableObjectPtr event1; ReleaseableObjectPtr event2; ReleaseableObjectPtr event3; }; TEST_F(AsyncEventsHandlerTests, givenEventsWhenListIsProcessedThenUpdateExecutionStatus) { event1->setTaskStamp(0, 0); event2->setTaskStamp(0, 0); handler->registerEvent(event1.get()); handler->registerEvent(event2.get()); EXPECT_EQ(CL_QUEUED, event1->getExecutionStatus()); EXPECT_EQ(CL_QUEUED, event2->getExecutionStatus()); handler->process(); EXPECT_NE(CL_QUEUED, event1->getExecutionStatus()); EXPECT_NE(CL_QUEUED, event2->getExecutionStatus()); EXPECT_TRUE(handler->peekIsListEmpty()); // auto-unregister when no callbacs } TEST_F(AsyncEventsHandlerTests, WhenProcessIsCompletedThenRefInternalCountIsDecremented) { event1->setTaskStamp(CompletionStamp::notReady, 0); handler->registerEvent(event1.get()); EXPECT_EQ(2, event1->getRefInternalCount()); handler->process(); EXPECT_TRUE(handler->peekIsListEmpty()); EXPECT_EQ(1, event1->getRefInternalCount()); } TEST_F(AsyncEventsHandlerTests, givenNotCalledCallbacksWhenListIsProcessedThenDontUnregister) { int submittedCounter(0), completeCounter(0); event1->setTaskStamp(CompletionStamp::notReady, 0); event1->addCallback(&this->callbackFcn, CL_SUBMITTED, &submittedCounter); event1->addCallback(&this->callbackFcn, CL_COMPLETE, &completeCounter); handler->registerEvent(event1.get()); auto expect = [&](int status, int sCounter, int cCounter, bool empty) { EXPECT_EQ(status, event1->getExecutionStatus()); EXPECT_EQ(sCounter, submittedCounter); EXPECT_EQ(cCounter, completeCounter); EXPECT_EQ(empty, handler->peekIsListEmpty()); }; handler->process(); expect(CL_QUEUED, 0, 0, false); event1->setStatus(CL_SUBMITTED); handler->process(); expect(CL_SUBMITTED, 1, 0, false); event1->setStatus(CL_COMPLETE); handler->process(); expect(CL_COMPLETE, 1, 1, true); } TEST_F(AsyncEventsHandlerTests, givenExternallSynchronizedEventWhenListIsProcessedAndEventIsNotInCompleteStateThenDontUnregister) { struct ExternallySynchronizedEvent : Event { ExternallySynchronizedEvent(int numUpdatesBeforeCompletion) : Event(nullptr, 0, 0, 0), numUpdatesBeforeCompletion(numUpdatesBeforeCompletion) { } void updateExecutionStatus() override { ++updateCount; if (updateCount == numUpdatesBeforeCompletion) { transitionExecutionStatus(CL_COMPLETE); } } bool isExternallySynchronized() const override { return true; } int updateCount = 0; int numUpdatesBeforeCompletion = 1; }; constexpr int numUpdatesBeforeCompletion = 5; auto *event = new ExternallySynchronizedEvent(numUpdatesBeforeCompletion); handler->registerEvent(event); for (int i = 0; i < numUpdatesBeforeCompletion * 2; ++i) { handler->process(); } EXPECT_EQ(CL_COMPLETE, event->peekExecutionStatus()); EXPECT_EQ(numUpdatesBeforeCompletion, event->updateCount); event->release(); } TEST_F(AsyncEventsHandlerTests, givenDoubleRegisteredEventWhenListIsProcessedAndNoCallbacksToProcessThenUnregister) { event1->setTaskStamp(CompletionStamp::notReady - 1, CompletionStamp::notReady + 1); event1->addCallback(&this->callbackFcn, CL_SUBMITTED, &counter); handler->registerEvent(event1.get()); handler->registerEvent(event1.get()); handler->process(); EXPECT_EQ(CL_SUBMITTED, event1->getExecutionStatus()); EXPECT_EQ(1, counter); EXPECT_TRUE(handler->peekIsListEmpty()); } TEST_F(AsyncEventsHandlerTests, givenEventsNotHandledByHandlderWhenDestructingThenUnreferenceAll) { auto myHandler = new MockHandler(); event1->setTaskStamp(CompletionStamp::notReady, 0); event2->setTaskStamp(CompletionStamp::notReady, 0); event1->addCallback(&this->callbackFcn, CL_SUBMITTED, &counter); event2->addCallback(&this->callbackFcn, CL_SUBMITTED, &counter); myHandler->registerEvent(event1.get()); myHandler->process(); myHandler->registerEvent(event2.get()); EXPECT_FALSE(myHandler->peekIsListEmpty()); EXPECT_FALSE(myHandler->peekIsRegisterListEmpty()); EXPECT_EQ(3, event1->getRefInternalCount()); EXPECT_EQ(3, event2->getRefInternalCount()); delete myHandler; // 1 left because of callbacks EXPECT_EQ(2, event1->getRefInternalCount()); EXPECT_EQ(2, event2->getRefInternalCount()); // release callbacks event1->setStatus(CL_SUBMITTED); event2->setStatus(CL_SUBMITTED); } TEST_F(AsyncEventsHandlerTests, givenEventsNotHandledByHandlderWhenAsyncExecutionInterruptedThenUnreferenceAll) { event1->setTaskStamp(CompletionStamp::notReady, 0); event2->setTaskStamp(CompletionStamp::notReady, 0); event1->addCallback(&this->callbackFcn, CL_SUBMITTED, &counter); event2->addCallback(&this->callbackFcn, CL_SUBMITTED, &counter); handler->registerEvent(event1.get()); handler->process(); handler->registerEvent(event2.get()); EXPECT_FALSE(handler->peekIsListEmpty()); EXPECT_FALSE(handler->peekIsRegisterListEmpty()); EXPECT_EQ(3, event1->getRefInternalCount()); EXPECT_EQ(3, event2->getRefInternalCount()); handler->allowAsyncProcess.store(false); MockHandler::asyncProcess(handler.get()); // enter and exit because of allowAsyncProcess == false EXPECT_EQ(2, event1->getRefInternalCount()); EXPECT_EQ(2, event2->getRefInternalCount()); EXPECT_TRUE(handler->peekIsListEmpty()); EXPECT_TRUE(handler->peekIsRegisterListEmpty()); event1->setStatus(CL_SUBMITTED); event2->setStatus(CL_SUBMITTED); } TEST_F(AsyncEventsHandlerTests, WhenHandlerIsCreatedThenThreadIsNotCreatedByDefault) { MockHandler myHandler; EXPECT_EQ(nullptr, myHandler.thread.get()); } TEST_F(AsyncEventsHandlerTests, WhenHandlerIsRegisteredThenThreadIsCreated) { event1->setTaskStamp(CompletionStamp::notReady, 0); EXPECT_FALSE(handler->openThreadCalled); handler->registerEvent(event1.get()); EXPECT_TRUE(handler->openThreadCalled); } TEST_F(AsyncEventsHandlerTests, WhenProcessingAsynchronouslyThenBothThreadsCompelete) { DebugManager.flags.EnableAsyncEventsHandler.set(true); event1->setTaskStamp(CompletionStamp::notReady, CompletionStamp::notReady + 1); event2->setTaskStamp(CompletionStamp::notReady, CompletionStamp::notReady + 1); event1->addCallback(&this->callbackFcn, CL_SUBMITTED, &counter); event2->addCallback(&this->callbackFcn, CL_SUBMITTED, &counter); EXPECT_EQ(CL_QUEUED, event1->getExecutionStatus()); EXPECT_EQ(CL_QUEUED, event2->getExecutionStatus()); // unblock to submit event1->taskLevel.store(0); event2->taskLevel.store(0); while (event1->getExecutionStatus() == CL_QUEUED || event2->getExecutionStatus() == CL_QUEUED) { std::this_thread::yield(); } EXPECT_EQ(CL_SUBMITTED, event1->getExecutionStatus()); EXPECT_EQ(CL_SUBMITTED, event2->getExecutionStatus()); context->getAsyncEventsHandler().closeThread(); } TEST_F(AsyncEventsHandlerTests, WhenThreadIsDestructedThenGetThreadReturnsNull) { handler->allowThreadCreating = true; handler->openThread(); // wait for sleep while (handler->transferCounter == 0) { std::this_thread::yield(); } std::unique_lock lock(handler->asyncMtx); lock.unlock(); handler->closeThread(); EXPECT_EQ(nullptr, handler->thread.get()); } TEST_F(AsyncEventsHandlerTests, givenReadyEventWhenCallbackIsAddedThenDontOpenThread) { DebugManager.flags.EnableAsyncEventsHandler.set(true); auto myHandler = new MockHandler(true); context->getAsyncEventsHandlerUniquePtr().reset(myHandler); event1->setTaskStamp(0, 0); event1->addCallback(&this->callbackFcn, CL_SUBMITTED, &counter); EXPECT_EQ(static_cast(&context->getAsyncEventsHandler()), myHandler); EXPECT_FALSE(event1->peekHasCallbacks()); EXPECT_FALSE(myHandler->openThreadCalled); } TEST_F(AsyncEventsHandlerTests, givenUserEventWhenCallbackIsAddedThenDontRegister) { DebugManager.flags.EnableAsyncEventsHandler.set(true); auto myHandler = new MockHandler(true); context->getAsyncEventsHandlerUniquePtr().reset(myHandler); UserEvent userEvent; userEvent.addCallback(&this->callbackFcn, CL_COMPLETE, &counter); EXPECT_TRUE(handler->peekIsListEmpty()); EXPECT_TRUE(handler->peekIsRegisterListEmpty()); EXPECT_TRUE(userEvent.peekHasCallbacks()); userEvent.decRefInternal(); } TEST_F(AsyncEventsHandlerTests, givenRegistredEventsWhenProcessIsCalledThenReturnCandidateWithLowestTaskCount) { int event1Counter(0), event2Counter(0), event3Counter(0); event1->setTaskStamp(0, 1); event2->setTaskStamp(0, 2); event3->setTaskStamp(0, 3); event2->addCallback(&this->callbackFcn, CL_COMPLETE, &event2Counter); handler->registerEvent(event2.get()); event1->addCallback(&this->callbackFcn, CL_COMPLETE, &event1Counter); handler->registerEvent(event1.get()); event3->addCallback(&this->callbackFcn, CL_COMPLETE, &event3Counter); handler->registerEvent(event3.get()); auto sleepCandidate = handler->process(); EXPECT_EQ(event1.get(), sleepCandidate); event1->setStatus(CL_COMPLETE); event2->setStatus(CL_COMPLETE); event3->setStatus(CL_COMPLETE); } TEST_F(AsyncEventsHandlerTests, givenEventWithoutCallbacksWhenProcessedThenDontReturnAsSleepCandidate) { event1->setTaskStamp(0, 1); event2->setTaskStamp(0, 2); handler->registerEvent(event1.get()); event2->addCallback(&this->callbackFcn, CL_COMPLETE, &counter); handler->registerEvent(event2.get()); auto sleepCandidate = handler->process(); EXPECT_EQ(event2.get(), sleepCandidate); event2->setStatus(CL_COMPLETE); } TEST_F(AsyncEventsHandlerTests, givenNoGpuHangAndSleepCandidateWhenProcessedThenCallWaitWithQuickKmdSleepRequest) { event1->setTaskStamp(0, 1); event1->addCallback(&this->callbackFcn, CL_COMPLETE, &counter); event1->handler->registerEvent(event1.get()); event1->handler->allowAsyncProcess.store(true); MockHandler::asyncProcess(event1->handler.get()); EXPECT_EQ(1u, event1->waitCalled); EXPECT_NE(Event::executionAbortedDueToGpuHang, event1->peekExecutionStatus()); event1->setStatus(CL_COMPLETE); } TEST_F(AsyncEventsHandlerTests, givenSleepCandidateAndGpuHangWhenProcessedThenCallWaitAndSetExecutionStatusToAbortedDueToGpuHang) { event1->setTaskStamp(0, 1); event1->addCallback(&this->callbackFcn, CL_COMPLETE, &counter); event1->handler->registerEvent(event1.get()); event1->handler->allowAsyncProcess.store(true); event1->waitResult = WaitStatus::GpuHang; MockHandler::asyncProcess(event1->handler.get()); EXPECT_EQ(1u, event1->waitCalled); EXPECT_EQ(Event::executionAbortedDueToGpuHang, event1->peekExecutionStatus()); event1->setStatus(CL_COMPLETE); } TEST_F(AsyncEventsHandlerTests, WhenReturningThenAsyncProcessWillCallProcessList) { Event *event = new Event(nullptr, CL_COMMAND_NDRANGE_KERNEL, 0, 0); handler->registerEvent(event); handler->allowAsyncProcess.store(false); MockHandler::asyncProcess(handler.get()); EXPECT_TRUE(handler->peekIsListEmpty()); EXPECT_EQ(1, event->getRefInternalCount()); event->release(); } compute-runtime-22.14.22890/opencl/test/unit_test/event/event_builder_tests.cpp000066400000000000000000000420011422164147700275220ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/utilities/arrayref.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/mocks/mock_device.h" #include "opencl/source/event/event_builder.h" #include "opencl/source/event/user_event.h" #include "opencl/source/helpers/task_information.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "gtest/gtest.h" namespace NEO { struct SmallEventBuilderEventMock : MockEvent { SmallEventBuilderEventMock(CommandQueue *commandQueue, int param1, float param2) : MockEvent(commandQueue, CL_COMMAND_NDRANGE_KERNEL, 0, 0), constructionParam1(param1), constructionParam2(param2) { } SmallEventBuilderEventMock(CommandQueue *commandQueue) : SmallEventBuilderEventMock(commandQueue, 1, 2.0f) { } void overrideMagic(cl_long newMagic) { this->magic = newMagic; } int constructionParam1 = 1; float constructionParam2 = 2.0f; }; struct SmallEventBuilderMock : EventBuilder { void clear() { EventBuilder::clear(); } void clearEvent() { event = nullptr; } ArrayRef getParentEvents() { return this->parentEvents; } }; TEST(EventBuilder, whenCreatingNewEventThenForwardArgumentsToEventConstructor) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockContext context(device.get()); MockCommandQueue cmdQ(&context, device.get(), nullptr, false); EventBuilder eventBuilder; EXPECT_EQ(nullptr, eventBuilder.getEvent()); constexpr int constrParam1 = 7; constexpr float constrParam2 = 13.0f; eventBuilder.create(&cmdQ, constrParam1, constrParam2); Event *peekedEvent = eventBuilder.getEvent(); ASSERT_NE(nullptr, peekedEvent); auto finalizedEvent = static_cast(eventBuilder.finalizeAndRelease()); EXPECT_EQ(peekedEvent, peekedEvent); EXPECT_EQ(constrParam1, finalizedEvent->constructionParam1); EXPECT_EQ(constrParam2, finalizedEvent->constructionParam2); finalizedEvent->release(); } TEST(EventBuilder, givenVirtualEventWithCommandThenFinalizeAddChild) { class MockCommandComputeKernel : public CommandComputeKernel { public: using CommandComputeKernel::eventsWaitlist; MockCommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr &kernelOperation, std::vector &surfaces, Kernel *kernel) : CommandComputeKernel(commandQueue, kernelOperation, surfaces, false, false, false, nullptr, PreemptionMode::Disabled, kernel, 0) {} }; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockContext context(device.get()); MockCommandQueue cmdQ(&context, device.get(), nullptr, false); MockKernelWithInternals kernel(*device); IndirectHeap *ih1 = nullptr, *ih2 = nullptr, *ih3 = nullptr; cmdQ.allocateHeapMemory(IndirectHeap::Type::DYNAMIC_STATE, 1, ih1); cmdQ.allocateHeapMemory(IndirectHeap::Type::INDIRECT_OBJECT, 1, ih2); cmdQ.allocateHeapMemory(IndirectHeap::Type::SURFACE_STATE, 1, ih3); auto cmdStream = new LinearStream(device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 1, AllocationType::COMMAND_BUFFER, device->getDeviceBitfield()})); std::vector surfaces; auto kernelOperation = std::make_unique(cmdStream, *device->getDefaultEngine().commandStreamReceiver->getInternalAllocationStorage()); kernelOperation->setHeaps(ih1, ih2, ih3); std::unique_ptr command = std::make_unique(cmdQ, kernelOperation, surfaces, kernel); VirtualEvent virtualEvent(&cmdQ); virtualEvent.setCommand(std::move(command)); EventBuilder eventBuilder; EXPECT_EQ(nullptr, eventBuilder.getEvent()); constexpr int constrParam1 = 7; constexpr float constrParam2 = 13.0f; eventBuilder.create(&cmdQ, constrParam1, constrParam2); Event *peekedEvent = eventBuilder.getEvent(); ASSERT_NE(nullptr, peekedEvent); virtualEvent.taskLevel = CL_SUBMITTED; eventBuilder.addParentEvent(&virtualEvent); eventBuilder.finalize(); peekedEvent->release(); } TEST(EventBuilder, givenVirtualEventWithSubmittedCommandAsParentThenFinalizeNotAddChild) { class MockVirtualEvent : public VirtualEvent { public: using VirtualEvent::eventWithoutCommand; using VirtualEvent::submittedCmd; }; class MockCommandComputeKernel : public CommandComputeKernel { public: using CommandComputeKernel::eventsWaitlist; MockCommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr &kernelOperation, std::vector &surfaces, Kernel *kernel) : CommandComputeKernel(commandQueue, kernelOperation, surfaces, false, false, false, nullptr, PreemptionMode::Disabled, kernel, 0) {} }; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockContext context(device.get()); MockCommandQueue cmdQ(&context, device.get(), nullptr, false); MockKernelWithInternals kernel(*device); IndirectHeap *ih1 = nullptr, *ih2 = nullptr, *ih3 = nullptr; cmdQ.allocateHeapMemory(IndirectHeap::Type::DYNAMIC_STATE, 1, ih1); cmdQ.allocateHeapMemory(IndirectHeap::Type::INDIRECT_OBJECT, 1, ih2); cmdQ.allocateHeapMemory(IndirectHeap::Type::SURFACE_STATE, 1, ih3); auto cmdStream = new LinearStream(device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, AllocationType::COMMAND_BUFFER, device->getDeviceBitfield()})); std::vector surfaces; auto kernelOperation = std::make_unique(cmdStream, *device->getDefaultEngine().commandStreamReceiver->getInternalAllocationStorage()); kernelOperation->setHeaps(ih1, ih2, ih3); std::unique_ptr command = std::make_unique(cmdQ, kernelOperation, surfaces, kernel); MockVirtualEvent virtualEvent; virtualEvent.eventWithoutCommand = false; virtualEvent.submittedCmd.exchange(command.release()); EventBuilder eventBuilder; EXPECT_EQ(nullptr, eventBuilder.getEvent()); constexpr int constrParam1 = 7; constexpr float constrParam2 = 13.0f; eventBuilder.create(&cmdQ, constrParam1, constrParam2); Event *peekedEvent = eventBuilder.getEvent(); ASSERT_NE(nullptr, peekedEvent); virtualEvent.taskLevel = CL_SUBMITTED; eventBuilder.addParentEvent(&virtualEvent); eventBuilder.finalize(); peekedEvent->release(); } TEST(EventBuilder, whenDestroyingEventBuilderThenImplicitFinalizeIsCalled) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockContext context(device.get()); MockCommandQueue cmdQ(&context, device.get(), nullptr, false); SmallEventBuilderEventMock *ev = nullptr; auto parentEvent = new UserEvent; { EventBuilder eventBuilder{}; eventBuilder.create(&cmdQ); eventBuilder.addParentEvent(*parentEvent); ev = static_cast(eventBuilder.getEvent()); ASSERT_NE(nullptr, ev); EXPECT_EQ(0U, ev->peekNumEventsBlockingThis()); } // make sure that finalize was called on EventBuilder's d-tor and parent was added properly EXPECT_EQ(1U, ev->peekNumEventsBlockingThis()); ev->release(); parentEvent->release(); } TEST(EventBuilder, whenFinalizeIsCalledTwiceOnEventBuilderThenSecondRequestIsDropped) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockContext context(device.get()); MockCommandQueue cmdQ(&context, device.get(), nullptr, false); SmallEventBuilderEventMock *ev = nullptr; EventBuilder eventBuilder{}; eventBuilder.create(&cmdQ); ev = static_cast(eventBuilder.getEvent()); ASSERT_NE(nullptr, ev); eventBuilder.finalize(); auto *falseParentEvent = new UserEvent(); auto *falseChildEvent = new SmallEventBuilderEventMock(&cmdQ); auto numParents = ev->peekNumEventsBlockingThis(); auto numChildren = (ev->peekChildEvents() != nullptr) ? 1U + ev->peekChildEvents()->countSuccessors() : 0; eventBuilder.addParentEvent(*falseParentEvent); eventBuilder.finalize(); // make sure that new parent was not added in second finalize EXPECT_EQ(numParents, ev->peekNumEventsBlockingThis()); EXPECT_EQ(numChildren, (ev->peekChildEvents() != nullptr) ? 1U + ev->peekChildEvents()->countSuccessors() : 0); falseParentEvent->release(); falseChildEvent->release(); ev->release(); } TEST(EventBuilder, whenFinalizeAndReleaseIsCalledThenEventBuilderReleasesReferenceToEvent) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockContext context(device.get()); MockCommandQueue cmdQ(&context, device.get(), nullptr, false); EventBuilder eventBuilder; eventBuilder.create(&cmdQ); auto ev = static_cast(eventBuilder.finalizeAndRelease()); ASSERT_NE(nullptr, ev); ASSERT_EQ(nullptr, eventBuilder.getEvent()); ASSERT_EQ(nullptr, eventBuilder.finalizeAndRelease()); ev->release(); } TEST(EventBuilder, whenClearIsCalledThenAllEventsAndReferencesAreDropped) { auto parentEvent = new UserEvent(); SmallEventBuilderMock eventBuilder; eventBuilder.addParentEvent(*parentEvent); eventBuilder.clear(); EXPECT_EQ(0U, eventBuilder.getParentEvents().size()); EXPECT_EQ(nullptr, eventBuilder.getEvent()); parentEvent->release(); } TEST(EventBuilder, whenCParentEventsGetAddedThenTheirReferenceCountGetsIncreasedUntilFinalizeIsCalled) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockContext context(device.get()); MockCommandQueue cmdQ(&context, device.get(), nullptr, false); UserEvent evParent1; UserEvent evParent2; EXPECT_EQ(1, evParent1.getRefInternalCount()); EXPECT_EQ(1, evParent2.getRefInternalCount()); EventBuilder eventBuilder; eventBuilder.create(&cmdQ); eventBuilder.addParentEvent(evParent1); EXPECT_EQ(2, evParent1.getRefInternalCount()); eventBuilder.addParentEvent(evParent2); EXPECT_EQ(2, evParent2.getRefInternalCount()); auto createdEvent = static_cast(eventBuilder.finalizeAndRelease()); EXPECT_EQ(2U, createdEvent->peekNumEventsBlockingThis()); createdEvent->release(); evParent1.setStatus(CL_COMPLETE); evParent2.setStatus(CL_COMPLETE); EXPECT_EQ(1, evParent1.getRefInternalCount()); EXPECT_EQ(1, evParent2.getRefInternalCount()); } TEST(EventBuilder, whenFinalizeIsCalledWithEmptyEventsListsThenParentAndChildListsAreEmpty) { EventBuilder eventBuilder; eventBuilder.create>(nullptr, CL_COMMAND_MARKER, 0, 0); Event *event = eventBuilder.finalizeAndRelease(); EXPECT_EQ(0U, event->peekNumEventsBlockingThis()); EXPECT_EQ(nullptr, event->peekChildEvents()); event->release(); } TEST(EventBuilder, whenFinalizeIsCalledAndBuildersEventsListAreNotEmptyThenEventsListsAreAddedToEvent) { MockEvent *parentEvent = new MockEvent(); EventBuilder eventBuilder; eventBuilder.create>(nullptr, CL_COMMAND_MARKER, 0, 0); eventBuilder.addParentEvent(*parentEvent); Event *event = eventBuilder.finalizeAndRelease(); EXPECT_EQ(1U, event->peekNumEventsBlockingThis()); ASSERT_NE(nullptr, parentEvent->peekChildEvents()); EXPECT_EQ(event, parentEvent->peekChildEvents()->ref); parentEvent->setStatus(CL_COMPLETE); EXPECT_EQ(0U, event->peekNumEventsBlockingThis()); EXPECT_EQ(nullptr, parentEvent->peekChildEvents()); event->release(); parentEvent->release(); } TEST(EventBuilder, whenFinalizeIsCalledAndParentsListContainsManyEventsFromWhichOnlyFirstOnesAreCompletedThenEventIsNotCompleted) { MockEvent *userEventNotCompleted = new MockEvent(); MockEvent *userEventCompleted = new MockEvent(); userEventCompleted->setStatus(CL_COMPLETE); EventBuilder eventBuilder; eventBuilder.create>(nullptr, CL_COMMAND_MARKER, 0, 0); eventBuilder.addParentEvent(*userEventCompleted); eventBuilder.addParentEvent(*userEventNotCompleted); Event *event = eventBuilder.finalizeAndRelease(); EXPECT_FALSE(event->updateStatusAndCheckCompletion()); EXPECT_EQ(1U, event->peekNumEventsBlockingThis()); ASSERT_EQ(nullptr, userEventCompleted->peekChildEvents()); ASSERT_NE(nullptr, userEventNotCompleted->peekChildEvents()); EXPECT_EQ(event, userEventNotCompleted->peekChildEvents()->ref); userEventNotCompleted->setStatus(CL_COMPLETE); EXPECT_EQ(0U, event->peekNumEventsBlockingThis()); EXPECT_EQ(nullptr, userEventNotCompleted->peekChildEvents()); event->release(); userEventCompleted->release(); userEventNotCompleted->release(); } TEST(EventBuilder, whenAddingNullptrAsNewParentEventThenItIsIgnored) { SmallEventBuilderMock eventBuilder; EXPECT_EQ(0U, eventBuilder.getParentEvents().size()); eventBuilder.addParentEvent(nullptr); EXPECT_EQ(0U, eventBuilder.getParentEvents().size()); } TEST(EventBuilder, whenAddingValidEventAsNewParentEventThenItIsProperlyAddedToParentsList) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockContext context(device.get()); MockCommandQueue cmdQ(&context, device.get(), nullptr, false); auto event = new SmallEventBuilderEventMock(&cmdQ); SmallEventBuilderMock eventBuilder; eventBuilder.create>(nullptr, CL_COMMAND_MARKER, 0, 0); EXPECT_EQ(0U, eventBuilder.getParentEvents().size()); eventBuilder.addParentEvent(event); EXPECT_EQ(1U, eventBuilder.getParentEvents().size()); event->release(); eventBuilder.finalize(); eventBuilder.getEvent()->release(); } TEST(EventBuilder, whenAddingMultipleEventsAsNewParentsThenOnlyValidOnesAreInsertedIntoParentsList) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockContext context(device.get()); MockCommandQueue cmdQ(&context, device.get(), nullptr, false); auto event = new SmallEventBuilderEventMock(&cmdQ); auto invalidEvent = new SmallEventBuilderEventMock(&cmdQ); invalidEvent->overrideMagic(0); cl_event eventsList[] = {nullptr, event, invalidEvent}; SmallEventBuilderMock eventBuilder; eventBuilder.create>(nullptr, CL_COMMAND_MARKER, 0, 0); EXPECT_EQ(0U, eventBuilder.getParentEvents().size()); eventBuilder.addParentEvents(ArrayRef(eventsList)); ASSERT_EQ(1U, eventBuilder.getParentEvents().size()); EXPECT_EQ(event, *eventBuilder.getParentEvents().begin()); invalidEvent->release(); event->release(); eventBuilder.finalize(); eventBuilder.getEvent()->release(); } TEST(EventBuilder, WhenAddingParentEventThenDuplicatesAreIgnored) { auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockContext mockContext; MockCommandQueue mockCommandQueue(&mockContext, mockDevice.get(), nullptr, false); SmallEventBuilderMock *eventBuilder = new SmallEventBuilderMock; eventBuilder->create>(&mockCommandQueue, CL_COMMAND_MARKER, 0, 0); Event *event = eventBuilder->getEvent(); Event *parentEvent = new Event(&mockCommandQueue, CL_COMMAND_NDRANGE_KERNEL, 0, 0); Event *parentEvent2 = new Event(&mockCommandQueue, CL_COMMAND_NDRANGE_KERNEL, 0, 0); Event *parentEvent3 = new Event(&mockCommandQueue, CL_COMMAND_NDRANGE_KERNEL, 0, 0); eventBuilder->addParentEvent(parentEvent); eventBuilder->addParentEvent(parentEvent2); eventBuilder->addParentEvent(parentEvent3); // add duplicate eventBuilder->addParentEvent(parentEvent); auto parents = eventBuilder->getParentEvents(); size_t numberOfParents = parents.size(); EXPECT_EQ(3u, numberOfParents); event->release(); parentEvent->release(); parentEvent2->release(); parentEvent3->release(); eventBuilder->clear(); eventBuilder->clearEvent(); delete eventBuilder; } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/event/event_callbacks_tests.cpp000066400000000000000000000033111422164147700300140ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/source/event/async_events_handler.h" #include "opencl/source/event/user_event.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include using namespace NEO; struct CallbackData { static void CL_CALLBACK callback(cl_event event, cl_int status, void *userData) { uint32_t *nestLevel = (uint32_t *)userData; if (*nestLevel < 4) { (*nestLevel)++; clSetEventCallback(event, CL_COMPLETE, CallbackData::callback, userData); } } }; TEST(EventCallbackTest, GivenUserEventWhenAddingCallbackThenNestedCallbacksCanBeCreated) { MockEvent event(nullptr); uint32_t nestLevel = 0; event.addCallback(CallbackData::callback, CL_COMPLETE, &nestLevel); event.setStatus(CL_COMPLETE); EXPECT_EQ(4u, nestLevel); } TEST(EventCallbackTest, GivenEventWhenAddingCallbackThenNestedCallbacksCanBeCreated) { auto device = std::make_unique(MockClDevice::createWithNewExecutionEnvironment(nullptr)); MockContext context(device.get()); MockCommandQueue queue(&context, context.getDevice(0), nullptr, false); MockEvent event(&queue, CL_COMMAND_MARKER, 0, 0); uint32_t nestLevel = 0; event.addCallback(CallbackData::callback, CL_COMPLETE, &nestLevel); event.setStatus(CL_COMPLETE); context.getAsyncEventsHandler().closeThread(); EXPECT_EQ(4u, nestLevel); } compute-runtime-22.14.22890/opencl/test/unit_test/event/event_fixture.h000066400000000000000000000070211422164147700260100ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/wait_status.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/ptr_math.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/event/user_event.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/command_stream/command_stream_fixture.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" using namespace NEO; struct EventTest : public ClDeviceFixture, public CommandQueueFixture, public CommandStreamFixture, public ::testing::Test { using CommandQueueFixture::SetUp; void SetUp() override { ClDeviceFixture::SetUp(); CommandQueueFixture::SetUp(&mockContext, pClDevice, 0); CommandStreamFixture::SetUp(pCmdQ); } void TearDown() override { CommandStreamFixture::TearDown(); CommandQueueFixture::TearDown(); ClDeviceFixture::TearDown(); } MockContext mockContext; }; struct InternalsEventTest : public ClDeviceFixture, public ::testing::Test { InternalsEventTest() { } void SetUp() override { ClDeviceFixture::SetUp(); mockContext = new MockContext(pClDevice); } void TearDown() override { delete mockContext; ClDeviceFixture::TearDown(); } MockContext *mockContext = nullptr; }; struct MyUserEvent : public VirtualEvent { WaitStatus wait(bool blocking, bool quickKmdSleep) override { return VirtualEvent::wait(blocking, quickKmdSleep); }; uint32_t getTaskLevel() override { return VirtualEvent::getTaskLevel(); }; }; struct MyEvent : public Event { MyEvent(CommandQueue *cmdQueue, cl_command_type cmdType, uint32_t taskLevel, uint32_t taskCount) : Event(cmdQueue, cmdType, taskLevel, taskCount) { } TimeStampData getQueueTimeStamp() { return this->queueTimeStamp; }; TimeStampData getSubmitTimeStamp() { return this->submitTimeStamp; }; uint64_t getStartTimeStamp() { return this->startTimeStamp; }; uint64_t getEndTimeStamp() { return this->endTimeStamp; }; uint64_t getCompleteTimeStamp() { return this->completeTimeStamp; } uint64_t getGlobalStartTimestamp() const { return this->globalStartTimestamp; } bool getDataCalcStatus() const { return this->dataCalculated; } void calculateProfilingDataInternal(uint64_t contextStartTS, uint64_t contextEndTS, uint64_t *contextCompleteTS, uint64_t globalStartTS) override { if (DebugManager.flags.ReturnRawGpuTimestamps.get()) { globalStartTimestamp = globalStartTS; } Event::calculateProfilingDataInternal(contextStartTS, contextEndTS, contextCompleteTS, globalStartTS); } uint64_t globalStartTimestamp; }; class MockEventTests : public HelloWorldTest { public: void TearDown() override { if (uEvent) { uEvent->setStatus(-1); uEvent.reset(); } HelloWorldFixture::TearDown(); } protected: ReleaseableObjectPtr uEvent = nullptr; }; compute-runtime-22.14.22890/opencl/test/unit_test/event/event_tests.cpp000066400000000000000000002133661422164147700260320ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/wait_status.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/utilities/perf_counter.h" #include "shared/source/utilities/tag_allocator.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/mocks/mock_ostime.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/common/test_macros/test_checks_shared.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/helpers/task_information.h" #include "opencl/source/memory_manager/mem_obj_surface.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_mdi.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "opencl/test/unit_test/os_interface/mock_performance_counters.h" #include "event_fixture.h" #include #include using namespace NEO; TEST(Event, GivenEventWhenCheckingTraitThenEventIsNotCopyable) { EXPECT_FALSE(std::is_move_constructible::value); EXPECT_FALSE(std::is_copy_constructible::value); } TEST(Event, GivenEventWhenCheckingTraitThenEventIsNotAssignable) { EXPECT_FALSE(std::is_move_assignable::value); EXPECT_FALSE(std::is_copy_assignable::value); } TEST(Event, WhenPeekIsCalledThenExecutionIsNotUpdated) { auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockContext ctx; MockCommandQueue cmdQ(&ctx, mockDevice.get(), 0, false); Event event(&cmdQ, CL_COMMAND_NDRANGE_KERNEL, CompletionStamp::notReady, 0); EXPECT_FALSE(event.peekIsBlocked()); EXPECT_EQ(CL_QUEUED, event.peekExecutionStatus()); event.updateExecutionStatus(); EXPECT_EQ(CL_QUEUED, event.peekExecutionStatus()); } TEST(Event, givenEventThatStatusChangeWhenPeekIsCalledThenEventIsNotUpdated) { auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockContext ctx; MockCommandQueue cmdQ(&ctx, mockDevice.get(), 0, false); struct mockEvent : public Event { using Event::Event; void updateExecutionStatus() override { callCount++; } uint32_t callCount = 0u; }; mockEvent event(&cmdQ, CL_COMMAND_NDRANGE_KERNEL, CompletionStamp::notReady, 0); EXPECT_EQ(0u, event.callCount); event.peekExecutionStatus(); EXPECT_EQ(0u, event.callCount); event.updateEventAndReturnCurrentStatus(); EXPECT_EQ(1u, event.callCount); event.updateEventAndReturnCurrentStatus(); EXPECT_EQ(2u, event.callCount); } TEST(Event, givenEventWithHigherTaskCountWhenLowerTaskCountIsBeingSetThenTaskCountRemainsUnmodifed) { Event *event = new Event(nullptr, CL_COMMAND_NDRANGE_KERNEL, 4, 10); EXPECT_EQ(10u, event->peekTaskCount()); event->updateTaskCount(8, 0); EXPECT_EQ(10u, event->peekTaskCount()); delete event; } TEST(Event, WhenGettingTaskLevelThenCorrectTaskLevelIsReturned) { class TempEvent : public Event { public: TempEvent() : Event(nullptr, CL_COMMAND_NDRANGE_KERNEL, 5, 7){}; uint32_t getTaskLevel() override { return Event::getTaskLevel(); } }; TempEvent event; // taskLevel and getTaskLevel() should give the same result EXPECT_EQ(5u, event.taskLevel); EXPECT_EQ(5u, event.getTaskLevel()); } TEST(Event, WhenGettingTaskCountThenCorrectValueIsReturned) { Event event(nullptr, CL_COMMAND_NDRANGE_KERNEL, 5, 7); EXPECT_EQ(7u, event.getCompletionStamp()); } TEST(Event, WhenGettingEventInfoThenCqIsReturned) { auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto ctx = std::unique_ptr(new MockContext()); auto cmdQ = std::unique_ptr(new MockCommandQueue(ctx.get(), mockDevice.get(), 0, false)); Event *event = new Event(cmdQ.get(), CL_COMMAND_NDRANGE_KERNEL, 1, 5); cl_event clEvent = event; cl_command_queue cmdQResult = nullptr; size_t sizeReturned = 0; auto result = clGetEventInfo(clEvent, CL_EVENT_COMMAND_QUEUE, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(sizeof(cl_command_queue), sizeReturned); result = clGetEventInfo(clEvent, CL_EVENT_COMMAND_QUEUE, sizeof(cmdQResult), &cmdQResult, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(cmdQ.get(), cmdQResult); EXPECT_EQ(sizeReturned, sizeof(cmdQResult)); delete event; } TEST(Event, givenBcsCsrSetInEventWhenPeekingBcsTaskCountThenReturnCorrectTaskCount) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.blitterOperationsSupported = true; REQUIRE_FULL_BLITTER_OR_SKIP(&hwInfo); auto device = ReleaseableObjectPtr{ new MockClDevice{MockDevice::createWithNewExecutionEnvironment(&hwInfo)}}; MockContext context{device.get()}; MockCommandQueue queue{context}; queue.updateBcsTaskCount(queue.bcsEngines[0]->getEngineType(), 19); Event event{&queue, CL_COMMAND_READ_BUFFER, 0, 0}; EXPECT_EQ(0u, event.peekBcsTaskCountFromCommandQueue()); event.setupBcs(queue.bcsEngines[0]->getEngineType()); EXPECT_EQ(19u, event.peekBcsTaskCountFromCommandQueue()); } TEST(Event, givenCommandQueueWhenEventIsCreatedWithCommandQueueThenCommandQueueInternalRefCountIsIncremented) { auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockContext ctx; MockCommandQueue cmdQ(&ctx, mockDevice.get(), 0, false); auto intitialRefCount = cmdQ.getRefInternalCount(); Event *event = new Event(&cmdQ, CL_COMMAND_NDRANGE_KERNEL, 4, 10); auto newRefCount = cmdQ.getRefInternalCount(); EXPECT_EQ(intitialRefCount + 1, newRefCount); delete event; auto finalRefCount = cmdQ.getRefInternalCount(); EXPECT_EQ(intitialRefCount, finalRefCount); } TEST(Event, givenCommandQueueWhenEventIsCreatedWithoutCommandQueueThenCommandQueueInternalRefCountIsNotModified) { MockContext ctx; MockCommandQueue cmdQ(&ctx, nullptr, 0, false); auto intitialRefCount = cmdQ.getRefInternalCount(); Event *event = new Event(nullptr, CL_COMMAND_NDRANGE_KERNEL, 4, 10); auto newRefCount = cmdQ.getRefInternalCount(); EXPECT_EQ(intitialRefCount, newRefCount); delete event; auto finalRefCount = cmdQ.getRefInternalCount(); EXPECT_EQ(intitialRefCount, finalRefCount); } TEST(Event, WhenWaitingForEventsThenAllQueuesAreFlushed) { class MockCommandQueueWithFlushCheck : public MockCommandQueue { public: MockCommandQueueWithFlushCheck() = delete; MockCommandQueueWithFlushCheck(MockCommandQueueWithFlushCheck &) = delete; MockCommandQueueWithFlushCheck(Context &context, ClDevice *device) : MockCommandQueue(&context, device, nullptr, false) { } cl_int flush() override { flushCounter++; return CL_SUCCESS; } uint32_t flushCounter = 0; }; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockContext context; std::unique_ptr cmdQ1(new MockCommandQueueWithFlushCheck(context, device.get())); std::unique_ptr event1(new Event(cmdQ1.get(), CL_COMMAND_NDRANGE_KERNEL, 4, 10)); std::unique_ptr cmdQ2(new MockCommandQueueWithFlushCheck(context, device.get())); std::unique_ptr event2(new Event(cmdQ2.get(), CL_COMMAND_NDRANGE_KERNEL, 5, 20)); cl_event eventWaitlist[] = {event1.get(), event2.get()}; Event::waitForEvents(2, eventWaitlist); EXPECT_EQ(1u, cmdQ1->flushCounter); EXPECT_EQ(1u, cmdQ2->flushCounter); } TEST(Event, GivenNotReadyEventWhenWaitingForEventsThenQueueIsNotFlushed) { class MockCommandQueueWithFlushCheck : public MockCommandQueue { public: MockCommandQueueWithFlushCheck() = delete; MockCommandQueueWithFlushCheck(MockCommandQueueWithFlushCheck &) = delete; MockCommandQueueWithFlushCheck(Context &context, ClDevice *device) : MockCommandQueue(&context, device, nullptr, false) { } cl_int flush() override { flushCounter++; return CL_SUCCESS; } uint32_t flushCounter = 0; }; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockContext context; std::unique_ptr cmdQ1(new MockCommandQueueWithFlushCheck(context, device.get())); std::unique_ptr event1(new Event(cmdQ1.get(), CL_COMMAND_NDRANGE_KERNEL, CompletionStamp::notReady, 0)); cl_event eventWaitlist[] = {event1.get()}; Event::waitForEvents(1, eventWaitlist); EXPECT_EQ(0u, cmdQ1->flushCounter); } TEST(Event, givenNotReadyEventOnWaitlistWhenCheckingUserEventDependeciesThenTrueIsReturned) { auto event1 = std::make_unique(nullptr, CL_COMMAND_NDRANGE_KERNEL, CompletionStamp::notReady, 0); cl_event eventWaitlist[] = {event1.get()}; bool userEventDependencies = Event::checkUserEventDependencies(1, eventWaitlist); EXPECT_TRUE(userEventDependencies); } TEST(Event, givenReadyEventsOnWaitlistWhenCheckingUserEventDependeciesThenFalseIsReturned) { auto event1 = std::make_unique(nullptr, CL_COMMAND_NDRANGE_KERNEL, 5, 0); cl_event eventWaitlist[] = {event1.get()}; bool userEventDependencies = Event::checkUserEventDependencies(1, eventWaitlist); EXPECT_FALSE(userEventDependencies); } TEST_F(EventTest, WhenGettingClEventCommandExecutionStatusThenCorrectSizeIsReturned) { Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 1, 5); cl_int eventStatus = -1; size_t sizeReturned = 0; auto result = clGetEventInfo(&event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(eventStatus), &eventStatus, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(sizeReturned, sizeof(eventStatus)); } TEST_F(EventTest, GivenTagCsLessThanTaskCountWhenGettingClEventCommandExecutionStatusThenClSubmittedIsReturned) { uint32_t tagHW = 4; uint32_t taskCount = 5; *pTagMemory = tagHW; Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 3, taskCount); cl_int eventStatus = -1; size_t sizeReturned = 0; auto result = clGetEventInfo(&event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(eventStatus), &eventStatus, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); // If tagCS < taskCount, we always return submitted (ie. no buffering!) EXPECT_EQ(CL_SUBMITTED, eventStatus); } TEST_F(EventTest, GivenTagCsEqualTaskCountWhenGettingClEventCommandExecutionStatusThenClCompleteIsReturned) { uint32_t tagHW = 5; uint32_t taskCount = 5; *pTagMemory = tagHW; Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 3, taskCount); cl_int eventStatus = -1; size_t sizeReturned = 0; auto result = clGetEventInfo(&event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(eventStatus), &eventStatus, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); // If tagCS == event.taskCount, the event is completed. EXPECT_EQ(CL_COMPLETE, eventStatus); } TEST_F(EventTest, GivenTagCsGreaterThanTaskCountWhenGettingClEventCommandExecutionStatusThenClCompleteIsReturned) { uint32_t tagHW = 6; uint32_t taskCount = 5; *pTagMemory = tagHW; Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 3, taskCount); cl_int eventStatus = -1; size_t sizeReturned = 0; auto result = clGetEventInfo(&event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(eventStatus), &eventStatus, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(CL_COMPLETE, eventStatus); } TEST_F(EventTest, WhenGettingClEventCommandExecutionStatusThenEventStatusIsReturned) { Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, CompletionStamp::notReady, CompletionStamp::notReady); cl_int eventStatus = -1; event.setStatus(-1); auto result = clGetEventInfo(&event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(eventStatus), &eventStatus, 0); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(-1, eventStatus); } TEST_F(EventTest, GivenNewEventWhenGettingClEventReferenceCountThenOneIsReturned) { uint32_t tagEvent = 5; Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 3, tagEvent); cl_uint refCount = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(&event, CL_EVENT_REFERENCE_COUNT, sizeof(refCount), &refCount, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(sizeof(refCount), sizeReturned); EXPECT_EQ(1u, refCount); } TEST_F(EventTest, GivenRetainedEventWhenGettingClEventReferenceCountThenTwoIsReturned) { uint32_t tagEvent = 5; Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 3, tagEvent); event.retain(); cl_uint refCount = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(&event, CL_EVENT_REFERENCE_COUNT, sizeof(refCount), &refCount, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(sizeof(refCount), sizeReturned); EXPECT_EQ(2u, refCount); event.release(); } TEST_F(EventTest, GivenRetainAndReleaseEventWhenGettingClEventReferenceCountThenOneIsReturned) { uint32_t tagEvent = 5; Event *pEvent = new Event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 3, tagEvent); ASSERT_NE(nullptr, pEvent); pEvent->retain(); auto retVal = pEvent->getReference(); EXPECT_EQ(2, retVal); cl_uint refCount = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_REFERENCE_COUNT, sizeof(refCount), &refCount, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(sizeof(refCount), sizeReturned); EXPECT_EQ(2u, refCount); pEvent->release(); retVal = pEvent->getReference(); EXPECT_EQ(1, retVal); delete pEvent; } TEST_F(EventTest, WhenGettingClEventContextThenCorrectValueIsReturned) { uint32_t tagEvent = 5; Event *pEvent = new Event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 3, tagEvent); ASSERT_NE(nullptr, pEvent); cl_context context; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_CONTEXT, sizeof(context), &context, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(sizeof(context), sizeReturned); cl_context qCtx = (cl_context)&mockContext; EXPECT_EQ(qCtx, context); delete pEvent; } TEST_F(EventTest, GivenInvalidEventWhenGettingEventInfoThenInvalidValueErrorIsReturned) { uint32_t tagEvent = 5; Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 3, tagEvent); cl_int eventStatus = -1; auto result = clGetEventInfo(&event, -1, sizeof(eventStatus), &eventStatus, nullptr); EXPECT_EQ(CL_INVALID_VALUE, result); } TEST_F(EventTest, GivenNonBlockingEventWhenWaitingThenFalseIsReturned) { Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 3, CompletionStamp::notReady); auto result = event.wait(false, false); EXPECT_EQ(WaitStatus::NotReady, result); } struct UpdateEventTest : public ::testing::Test { void SetUp() override { executionEnvironment = platform()->peekExecutionEnvironment(); memoryManager = new MockMemoryManager(*executionEnvironment); hostPtrManager = static_cast(memoryManager->getHostPtrManager()); executionEnvironment->memoryManager.reset(memoryManager); device.reset(new ClDevice{*Device::create(executionEnvironment, 0u), platform()}); context = std::make_unique(device.get()); cl_int retVal = CL_OUT_OF_RESOURCES; commandQueue.reset(CommandQueue::create(context.get(), device.get(), nullptr, false, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); } ExecutionEnvironment *executionEnvironment; MockMemoryManager *memoryManager; MockHostPtrManager *hostPtrManager; std::unique_ptr device; std::unique_ptr context; std::unique_ptr commandQueue; }; TEST_F(UpdateEventTest, givenEventContainingCommandQueueWhenItsStatusIsUpdatedToCompletedThenTemporaryAllocationsAreDeleted) { void *ptr = (void *)0x1000; size_t size = 4096; auto temporary = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), false, size, device->getDeviceBitfield()}, ptr); temporary->updateTaskCount(3, commandQueue->getGpgpuCommandStreamReceiver().getOsContext().getContextId()); commandQueue->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()->storeAllocation(std::unique_ptr(temporary), TEMPORARY_ALLOCATION); Event event(commandQueue.get(), CL_COMMAND_NDRANGE_KERNEL, 3, 3); EXPECT_EQ(1u, hostPtrManager->getFragmentCount()); event.updateExecutionStatus(); EXPECT_EQ(0u, hostPtrManager->getFragmentCount()); } TEST_F(InternalsEventTest, GivenSubmitCommandFalseWhenSubmittingCommandsThenRefApiCountAndRefInternalGetHandledCorrectly) { MockCommandQueue cmdQ(mockContext, pClDevice, nullptr, false); MockEvent event(&cmdQ, CL_COMMAND_NDRANGE_KERNEL, 0, 0); auto cmdStream = new LinearStream(pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties({pDevice->getRootDeviceIndex(), true, 4096, AllocationType::COMMAND_BUFFER, false, pDevice->getDeviceBitfield()})); IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr; cmdQ.allocateHeapMemory(IndirectHeap::Type::DYNAMIC_STATE, 4096u, dsh); cmdQ.allocateHeapMemory(IndirectHeap::Type::INDIRECT_OBJECT, 4096u, ioh); cmdQ.allocateHeapMemory(IndirectHeap::Type::SURFACE_STATE, 4096u, ssh); auto blockedCommandsData = std::make_unique(cmdStream, *cmdQ.getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); blockedCommandsData->setHeaps(dsh, ioh, ssh); MockKernelWithInternals mockKernelWithInternals(*pClDevice); auto pKernel = mockKernelWithInternals.mockKernel; auto &csr = cmdQ.getGpgpuCommandStreamReceiver(); std::vector v; MockBuffer buffer; buffer.retain(); auto initialRefCount = buffer.getRefApiCount(); auto initialInternalCount = buffer.getRefInternalCount(); auto bufferSurf = new MemObjSurface(&buffer); EXPECT_EQ(initialInternalCount + 1, buffer.getRefInternalCount()); EXPECT_EQ(initialRefCount, buffer.getRefApiCount()); PreemptionMode preemptionMode = pDevice->getPreemptionMode(); v.push_back(bufferSurf); auto cmd = new CommandComputeKernel(cmdQ, blockedCommandsData, v, false, false, false, nullptr, preemptionMode, pKernel, 1); event.setCommand(std::unique_ptr(cmd)); auto taskLevelBefore = csr.peekTaskLevel(); auto refCount = buffer.getRefApiCount(); auto refInternal = buffer.getRefInternalCount(); event.submitCommand(false); EXPECT_EQ(refCount, buffer.getRefApiCount()); EXPECT_EQ(refInternal - 1, buffer.getRefInternalCount()); auto taskLevelAfter = csr.peekTaskLevel(); EXPECT_EQ(taskLevelBefore + 1, taskLevelAfter); auto graphicsAllocation = buffer.getGraphicsAllocation(pClDevice->getRootDeviceIndex()); EXPECT_FALSE(graphicsAllocation->isResident(csr.getOsContext().getContextId())); } TEST_F(InternalsEventTest, GivenSubmitCommandTrueWhenSubmittingCommandsThenRefApiCountAndRefInternalGetHandledCorrectly) { MockCommandQueue cmdQ(mockContext, pClDevice, nullptr, false); MockEvent event(&cmdQ, CL_COMMAND_NDRANGE_KERNEL, 0, 0); auto cmdStream = new LinearStream(pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties({pDevice->getRootDeviceIndex(), 4096, AllocationType::COMMAND_BUFFER, pDevice->getDeviceBitfield()})); IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr; cmdQ.allocateHeapMemory(IndirectHeap::Type::DYNAMIC_STATE, 4096u, dsh); cmdQ.allocateHeapMemory(IndirectHeap::Type::INDIRECT_OBJECT, 4096u, ioh); cmdQ.allocateHeapMemory(IndirectHeap::Type::SURFACE_STATE, 4096u, ssh); auto blockedCommandsData = std::make_unique(cmdStream, *cmdQ.getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); blockedCommandsData->setHeaps(dsh, ioh, ssh); MockKernelWithInternals mockKernelWithInternals(*pClDevice); auto pKernel = mockKernelWithInternals.mockKernel; auto &csr = cmdQ.getGpgpuCommandStreamReceiver(); std::vector v; NullSurface *surface = new NullSurface; v.push_back(surface); PreemptionMode preemptionMode = pDevice->getPreemptionMode(); auto cmd = new CommandComputeKernel(cmdQ, blockedCommandsData, v, false, false, false, nullptr, preemptionMode, pKernel, 1); event.setCommand(std::unique_ptr(cmd)); auto taskLevelBefore = csr.peekTaskLevel(); event.submitCommand(true); auto taskLevelAfter = csr.peekTaskLevel(); EXPECT_EQ(taskLevelBefore, taskLevelAfter); } TEST_F(InternalsEventTest, givenBlockedKernelWithPrintfWhenSubmittedThenPrintOutput) { MockCommandQueue mockCmdQueue(mockContext, pClDevice, nullptr, false); testing::internal::CaptureStdout(); MockEvent event(&mockCmdQueue, CL_COMMAND_NDRANGE_KERNEL, 0, 0); auto cmdStream = new LinearStream(pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties({pDevice->getRootDeviceIndex(), 4096, AllocationType::COMMAND_BUFFER, pDevice->getDeviceBitfield()})); IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr; mockCmdQueue.allocateHeapMemory(IndirectHeap::Type::DYNAMIC_STATE, 4096u, dsh); mockCmdQueue.allocateHeapMemory(IndirectHeap::Type::INDIRECT_OBJECT, 4096u, ioh); mockCmdQueue.allocateHeapMemory(IndirectHeap::Type::SURFACE_STATE, 4096u, ssh); auto blockedCommandsData = std::make_unique(cmdStream, *mockCmdQueue.getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); blockedCommandsData->setHeaps(dsh, ioh, ssh); std::string testString = "test"; MockKernelWithInternals mockKernelWithInternals(*pClDevice); auto pKernel = mockKernelWithInternals.mockKernel; auto &kernelInfo = mockKernelWithInternals.kernelInfo; kernelInfo.kernelDescriptor.kernelAttributes.binaryFormat = DeviceBinaryFormat::Patchtokens; kernelInfo.setPrintfSurface(sizeof(uintptr_t), 0); kernelInfo.addToPrintfStringsMap(0, testString); uint64_t crossThread[10]; pKernel->setCrossThreadData(&crossThread, sizeof(uint64_t) * 8); MockMultiDispatchInfo multiDispatchInfo(pClDevice, pKernel); std::unique_ptr printfHandler(PrintfHandler::create(multiDispatchInfo, *pClDevice)); printfHandler.get()->prepareDispatch(multiDispatchInfo); auto surface = printfHandler.get()->getSurface(); auto printfSurface = reinterpret_cast(surface->getUnderlyingBuffer()); printfSurface[0] = 8; printfSurface[1] = 0; std::vector v; PreemptionMode preemptionMode = pDevice->getPreemptionMode(); auto cmd = new CommandComputeKernel(mockCmdQueue, blockedCommandsData, v, false, false, false, std::move(printfHandler), preemptionMode, pKernel, 1); event.setCommand(std::unique_ptr(cmd)); event.submitCommand(false); EXPECT_EQ(1u, mockCmdQueue.latestTaskCountWaited); std::string output = testing::internal::GetCapturedStdout(); EXPECT_STREQ("test", output.c_str()); EXPECT_FALSE(surface->isResident(pDevice->getDefaultEngine().osContext->getContextId())); } TEST_F(InternalsEventTest, GivenMapOperationWhenSubmittingCommandsThenTaskLevelIsIncremented) { auto pCmdQ = make_releaseable(mockContext, pClDevice, nullptr, false); MockEvent event(pCmdQ.get(), CL_COMMAND_NDRANGE_KERNEL, 0, 0); auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto buffer = new MockBuffer; MemObjSizeArray size = {{1, 1, 1}}; MemObjOffsetArray offset = {{0, 0, 0}}; event.setCommand(std::unique_ptr(new CommandMapUnmap(MAP, *buffer, size, offset, false, *pCmdQ))); auto taskLevelBefore = csr.peekTaskLevel(); event.submitCommand(false); auto taskLevelAfter = csr.peekTaskLevel(); EXPECT_EQ(taskLevelBefore + 1, taskLevelAfter); buffer->decRefInternal(); } TEST_F(InternalsEventTest, GivenMapOperationNonZeroCopyBufferWhenSubmittingCommandsThenTaskLevelIsIncremented) { auto pCmdQ = make_releaseable(mockContext, pClDevice, nullptr, false); MockEvent event(pCmdQ.get(), CL_COMMAND_NDRANGE_KERNEL, 0, 0); auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto buffer = new UnalignedBuffer; MemObjSizeArray size = {{1, 1, 1}}; MemObjOffsetArray offset = {{0, 0, 0}}; event.setCommand(std::unique_ptr(new CommandMapUnmap(MAP, *buffer, size, offset, false, *pCmdQ))); auto taskLevelBefore = csr.peekTaskLevel(); event.submitCommand(false); auto taskLevelAfter = csr.peekTaskLevel(); EXPECT_EQ(taskLevelBefore + 1, taskLevelAfter); buffer->decRefInternal(); } uint32_t commands[] = { CL_COMMAND_NDRANGE_KERNEL, CL_COMMAND_TASK, CL_COMMAND_NATIVE_KERNEL, CL_COMMAND_READ_BUFFER, CL_COMMAND_WRITE_BUFFER, CL_COMMAND_COPY_BUFFER, CL_COMMAND_READ_IMAGE, CL_COMMAND_WRITE_IMAGE, CL_COMMAND_COPY_IMAGE, CL_COMMAND_COPY_IMAGE_TO_BUFFER, CL_COMMAND_COPY_BUFFER_TO_IMAGE, CL_COMMAND_MAP_BUFFER, CL_COMMAND_MAP_IMAGE, CL_COMMAND_UNMAP_MEM_OBJECT, CL_COMMAND_MARKER, CL_COMMAND_ACQUIRE_GL_OBJECTS, CL_COMMAND_RELEASE_GL_OBJECTS, CL_COMMAND_READ_BUFFER_RECT, CL_COMMAND_WRITE_BUFFER_RECT, CL_COMMAND_COPY_BUFFER_RECT, CL_COMMAND_BARRIER, CL_COMMAND_MIGRATE_MEM_OBJECTS, CL_COMMAND_FILL_BUFFER, CL_COMMAND_FILL_IMAGE, CL_COMMAND_SVM_FREE, CL_COMMAND_SVM_MEMCPY, CL_COMMAND_SVM_MEMFILL, CL_COMMAND_SVM_MAP, CL_COMMAND_SVM_UNMAP, }; class InternalsEventProfilingTest : public InternalsEventTest, public ::testing::WithParamInterface { void SetUp() override { InternalsEventTest::SetUp(); } void TearDown() override { InternalsEventTest::TearDown(); } }; TEST_P(InternalsEventProfilingTest, GivenProfilingWhenEventCreatedThenProfilingSet) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0}; std::unique_ptr pCmdQ(new MockCommandQueue(mockContext, pClDevice, props, false)); std::unique_ptr> event(new MockEvent(pCmdQ.get(), GetParam(), 0, 0)); EXPECT_TRUE(event.get()->isProfilingEnabled()); } INSTANTIATE_TEST_CASE_P(InternalsEventProfilingTest, InternalsEventProfilingTest, ::testing::ValuesIn(commands)); TEST_F(InternalsEventTest, GivenProfilingWhenUserEventCreatedThenProfilingNotSet) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0}; std::unique_ptr pCmdQ(new MockCommandQueue(mockContext, pClDevice, props, false)); std::unique_ptr> event(new MockEvent(pCmdQ.get(), CL_COMMAND_USER, 0, 0)); EXPECT_FALSE(event.get()->isProfilingEnabled()); } TEST_F(InternalsEventTest, givenDeviceTimestampBaseNotEnabledWhenGetEventProfilingInfoThenCpuTimestampIsReturned) { pClDevice->setOSTime(new MockOSTimeWithConstTimestamp()); const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0}; MockCommandQueue cmdQ(mockContext, pClDevice, props, false); MockEvent event(&cmdQ, CL_COMMAND_MARKER, 0, 0); event.setCommand(std::unique_ptr(new CommandWithoutKernel(cmdQ))); event.submitCommand(false); uint64_t submitTime = 0ULL; event.getEventProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, sizeof(uint64_t), &submitTime, 0); EXPECT_EQ(submitTime, MockDeviceTimeWithConstTimestamp::CPU_TIME_IN_NS); } TEST_F(InternalsEventTest, givenDeviceTimestampBaseEnabledWhenGetEventProfilingInfoThenGpuTimestampIsReturned) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableDeviceBasedTimestamps.set(true); pClDevice->setOSTime(new MockOSTimeWithConstTimestamp()); const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0}; MockCommandQueue cmdQ(mockContext, pClDevice, props, false); MockEvent event(&cmdQ, CL_COMMAND_MARKER, 0, 0); event.queueTimeStamp.GPUTimeStamp = MockDeviceTimeWithConstTimestamp::GPU_TIMESTAMP; event.setCommand(std::unique_ptr(new CommandWithoutKernel(cmdQ))); event.submitCommand(false); uint64_t submitTime = 0ULL; event.getEventProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, sizeof(uint64_t), &submitTime, 0); auto resolution = pClDevice->getDevice().getDeviceInfo().profilingTimerResolution; EXPECT_EQ(submitTime, static_cast(MockDeviceTimeWithConstTimestamp::GPU_TIMESTAMP * resolution)); } TEST_F(InternalsEventTest, givenDeviceTimestampBaseEnabledWhenCalculateStartTimestampThenCorrectTimeIsReturned) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableDeviceBasedTimestamps.set(true); const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0}; MockCommandQueue cmdQ(mockContext, pClDevice, props, false); MockEvent event(&cmdQ, CL_COMPLETE, 0, 0); HwTimeStamps timestamp{}; timestamp.GlobalStartTS = 2; event.queueTimeStamp.GPUTimeStamp = 1; TagNode timestampNode{}; timestampNode.tagForCpuAccess = ×tamp; event.timeStampNode = ×tampNode; uint64_t start; event.getEventProfilingInfo(CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, nullptr); auto resolution = pClDevice->getDevice().getDeviceInfo().profilingTimerResolution; EXPECT_EQ(start, static_cast(timestamp.GlobalStartTS * resolution)); event.timeStampNode = nullptr; } TEST_F(InternalsEventTest, givenDeviceTimestampBaseEnabledAndGlobalStartTSSmallerThanQueueTSWhenCalculateStartTimestampThenCorrectTimeIsReturned) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableDeviceBasedTimestamps.set(true); const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0}; MockCommandQueue cmdQ(mockContext, pClDevice, props, false); MockEvent event(&cmdQ, CL_COMPLETE, 0, 0); HwTimeStamps timestamp{}; timestamp.GlobalStartTS = 1; event.queueTimeStamp.GPUTimeStamp = 2; TagNode timestampNode{}; timestampNode.tagForCpuAccess = ×tamp; event.timeStampNode = ×tampNode; uint64_t start = 0u; event.getEventProfilingInfo(CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, nullptr); auto &hwHelper = HwHelper::get(pClDevice->getHardwareInfo().platform.eRenderCoreFamily); auto resolution = pClDevice->getDevice().getDeviceInfo().profilingTimerResolution; auto refStartTime = static_cast(timestamp.GlobalStartTS * resolution + (1ULL << hwHelper.getGlobalTimeStampBits()) * resolution); EXPECT_EQ(start, refStartTime); event.timeStampNode = nullptr; } TEST_F(InternalsEventTest, givenGpuHangWhenEventWaitReportsHangThenWaititingIsAbortedAndUnfinishedEventsHaveExecutionStatusEqualsToAbortedDueToGpuHang) { MockCommandQueue cmdQ(mockContext, pClDevice, nullptr, false); MockEvent passingEvent(&cmdQ, CL_COMMAND_NDRANGE_KERNEL, 0, 0); passingEvent.waitReturnValue = WaitStatus::Ready; MockEvent hangingEvent(&cmdQ, CL_COMMAND_NDRANGE_KERNEL, 0, 0); hangingEvent.waitReturnValue = WaitStatus::GpuHang; cl_event eventWaitlist[] = {&passingEvent, &hangingEvent}; const auto result = Event::waitForEvents(2, eventWaitlist); EXPECT_EQ(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST, result); EXPECT_NE(Event::executionAbortedDueToGpuHang, passingEvent.peekExecutionStatus()); EXPECT_EQ(Event::executionAbortedDueToGpuHang, hangingEvent.peekExecutionStatus()); } TEST_F(InternalsEventTest, givenPassingEventWhenWaitingForEventsThenWaititingIsSuccessfulAndEventIsNotAborted) { MockCommandQueue cmdQ(mockContext, pClDevice, nullptr, false); MockEvent passingEvent(&cmdQ, CL_COMMAND_NDRANGE_KERNEL, 0, 0); passingEvent.waitReturnValue = WaitStatus::Ready; cl_event eventWaitlist[] = {&passingEvent}; const auto result = Event::waitForEvents(1, eventWaitlist); EXPECT_EQ(CL_SUCCESS, result); EXPECT_NE(Event::executionAbortedDueToGpuHang, passingEvent.peekExecutionStatus()); } TEST_F(InternalsEventTest, GivenProfilingWHENMapOperationTHENTimesSet) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0}; MockCommandQueue *pCmdQ = new MockCommandQueue(mockContext, pClDevice, props, false); MockEvent *event = new MockEvent(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 0, 0); auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); UnalignedBuffer buffer; MemObjSizeArray size = {{1, 1, 1}}; MemObjOffsetArray offset = {{0, 0, 0}}; event->setCommand(std::unique_ptr(new CommandMapUnmap(MAP, buffer, size, offset, false, *pCmdQ))); auto taskLevelBefore = csr.peekTaskLevel(); event->submitCommand(false); uint64_t submitTime = 0ULL; event->getEventProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, sizeof(uint64_t), &submitTime, 0); EXPECT_NE(0ULL, submitTime); auto taskLevelAfter = csr.peekTaskLevel(); delete event; EXPECT_EQ(taskLevelBefore + 1, taskLevelAfter); delete pCmdQ; } TEST_F(InternalsEventTest, GivenUnMapOperationWhenSubmittingCommandsThenTaskLevelIsIncremented) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; auto pCmdQ = make_releaseable(mockContext, pClDevice, props, false); MockEvent event(pCmdQ.get(), CL_COMMAND_NDRANGE_KERNEL, 0, 0); auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto buffer = new UnalignedBuffer; MemObjSizeArray size = {{1, 1, 1}}; MemObjOffsetArray offset = {{0, 0, 0}}; event.setCommand(std::unique_ptr(new CommandMapUnmap(UNMAP, *buffer, size, offset, false, *pCmdQ))); auto taskLevelBefore = csr.peekTaskLevel(); event.submitCommand(false); auto taskLevelAfter = csr.peekTaskLevel(); EXPECT_EQ(taskLevelBefore + 1, taskLevelAfter); buffer->decRefInternal(); } TEST_F(InternalsEventTest, givenBlockedMapCommandWhenSubmitIsCalledThenItReleasesMemObjectReference) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; auto pCmdQ = std::make_unique(mockContext, pClDevice, props, false); MockEvent event(pCmdQ.get(), CL_COMMAND_NDRANGE_KERNEL, 0, 0); auto buffer = new UnalignedBuffer; auto currentBufferRefInternal = buffer->getRefInternalCount(); MemObjSizeArray size = {{1, 1, 1}}; MemObjOffsetArray offset = {{0, 0, 0}}; event.setCommand(std::unique_ptr(new CommandMapUnmap(UNMAP, *buffer, size, offset, false, *pCmdQ))); EXPECT_EQ(currentBufferRefInternal + 1, buffer->getRefInternalCount()); event.submitCommand(false); EXPECT_EQ(currentBufferRefInternal, buffer->getRefInternalCount()); buffer->decRefInternal(); } TEST_F(InternalsEventTest, GivenUnMapOperationNonZeroCopyBufferWhenSubmittingCommandsThenTaskLevelIsIncremented) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; auto pCmdQ = std::make_unique(mockContext, pClDevice, props, false); MockEvent event(pCmdQ.get(), CL_COMMAND_NDRANGE_KERNEL, 0, 0); auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto buffer = new UnalignedBuffer; MemObjSizeArray size = {{1, 1, 1}}; MemObjOffsetArray offset = {{0, 0, 0}}; event.setCommand(std::unique_ptr(new CommandMapUnmap(UNMAP, *buffer, size, offset, false, *pCmdQ))); auto taskLevelBefore = csr.peekTaskLevel(); event.submitCommand(false); auto taskLevelAfter = csr.peekTaskLevel(); EXPECT_EQ(taskLevelBefore + 1, taskLevelAfter); buffer->decRefInternal(); } class MockCommand : public Command { public: using Command::Command; CompletionStamp &submit(uint32_t taskLevel, bool terminated) override { return completionStamp; } }; TEST_F(InternalsEventTest, GivenHangingCommandWhenSubmittingItThenTaskIsAborted) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; auto cmdQ = std::make_unique(mockContext, pClDevice, props, false); auto command = std::make_unique(*cmdQ); command->completionStamp.taskCount = CompletionStamp::gpuHang; MockEvent event(cmdQ.get(), CL_COMMAND_NDRANGE_KERNEL, 0, 0); event.setCommand(std::move(command)); event.submitCommand(false); EXPECT_EQ(Event::executionAbortedDueToGpuHang, event.peekExecutionStatus()); } HWTEST_F(InternalsEventTest, givenCpuProfilingPathWhenEnqueuedMarkerThenDontUseTimeStampNode) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0}; MockCommandQueue *pCmdQ = new MockCommandQueue(mockContext, pClDevice, props, false); MockEvent *event = new MockEvent(pCmdQ, CL_COMMAND_MARKER, 0, 0); event->setCPUProfilingPath(true); event->setCommand(std::unique_ptr(new CommandWithoutKernel(*pCmdQ))); event->submitCommand(false); uint64_t submit, start, end; event->getEventProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, sizeof(uint64_t), &submit, 0); event->getEventProfilingInfo(CL_PROFILING_COMMAND_START, sizeof(uint64_t), &start, 0); event->getEventProfilingInfo(CL_PROFILING_COMMAND_END, sizeof(uint64_t), &end, 0); EXPECT_LT(0u, submit); EXPECT_LT(submit, start); EXPECT_LT(start, end); delete event; delete pCmdQ; } struct InternalsEventWithPerfCountersTest : public InternalsEventTest, public PerformanceCountersFixture { void SetUp() override { PerformanceCountersFixture::SetUp(); InternalsEventTest::SetUp(); createPerfCounters(); pDevice->setPerfCounters(performanceCountersBase.get()); } void TearDown() override { performanceCountersBase.release(); InternalsEventTest::TearDown(); PerformanceCountersFixture::TearDown(); } }; HWTEST_F(InternalsEventWithPerfCountersTest, givenCpuProfilingPerfCountersPathWhenEnqueuedMarkerThenDontUseTimeStampNodePerfCounterNode) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0}; MockCommandQueue *pCmdQ = new MockCommandQueue(mockContext, pClDevice, props, false); bool ret = false; ret = pCmdQ->setPerfCountersEnabled(); EXPECT_TRUE(ret); MockEvent *event = new MockEvent(pCmdQ, CL_COMMAND_MARKER, 0, 0); event->setCPUProfilingPath(true); event->setCommand(std::unique_ptr(new CommandWithoutKernel(*pCmdQ))); event->submitCommand(false); uint64_t submit, start, end; event->getEventProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, sizeof(uint64_t), &submit, 0); event->getEventProfilingInfo(CL_PROFILING_COMMAND_START, sizeof(uint64_t), &start, 0); event->getEventProfilingInfo(CL_PROFILING_COMMAND_END, sizeof(uint64_t), &end, 0); EXPECT_LT(0u, submit); EXPECT_LT(submit, start); EXPECT_LT(start, end); delete event; delete pCmdQ; } HWTEST_F(InternalsEventWithPerfCountersTest, givenCpuProfilingPerfCountersPathWhenEnqueuedMarkerThenUseTimeStampNodePerfCounterNode) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0}; MockCommandQueue *pCmdQ = new MockCommandQueue(mockContext, pClDevice, props, false); pCmdQ->setPerfCountersEnabled(); MockEvent *event = new MockEvent(pCmdQ, CL_COMMAND_MARKER, 0, 0); event->setCPUProfilingPath(true); HwPerfCounter *perfCounter = static_cast *>(event->getHwPerfCounterNode())->tagForCpuAccess; ASSERT_NE(nullptr, perfCounter); auto hwTimeStampNode = static_cast *>(event->getHwTimeStampNode()); if (pCmdQ->getTimestampPacketContainer()) { EXPECT_EQ(nullptr, hwTimeStampNode); } else { ASSERT_NE(nullptr, hwTimeStampNode->tagForCpuAccess); } event->setCommand(std::unique_ptr(new CommandWithoutKernel(*pCmdQ))); event->submitCommand(false); uint64_t submit, start, end; event->getEventProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, sizeof(uint64_t), &submit, 0); event->getEventProfilingInfo(CL_PROFILING_COMMAND_START, sizeof(uint64_t), &start, 0); event->getEventProfilingInfo(CL_PROFILING_COMMAND_END, sizeof(uint64_t), &end, 0); EXPECT_LT(0u, submit); EXPECT_LT(submit, start); EXPECT_LT(start, end); delete event; delete pCmdQ; } TEST_F(InternalsEventWithPerfCountersTest, GivenPerfCountersEnabledWhenEventIsCreatedThenProfilingEnabledAndPerfCountersEnabledAreTrue) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0}; MockCommandQueue *pCmdQ = new MockCommandQueue(mockContext, pClDevice, props, false); pCmdQ->setPerfCountersEnabled(); Event *ev = new Event(pCmdQ, CL_COMMAND_COPY_BUFFER, 3, 0); EXPECT_TRUE(ev->isProfilingEnabled()); EXPECT_TRUE(ev->isPerfCountersEnabled()); delete ev; delete pCmdQ; } TEST(Event, WhenReleasingEventThenEventIsNull) { UserEvent *ue = new UserEvent(); auto autoptr = ue->release(); ASSERT_TRUE(autoptr.isUnused()); } HWTEST_F(EventTest, givenVirtualEventWhenCommandSubmittedThenLockCsrOccurs) { class MockCommandComputeKernel : public CommandComputeKernel { public: using CommandComputeKernel::eventsWaitlist; MockCommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr &kernelOperation, std::vector &surfaces, Kernel *kernel) : CommandComputeKernel(commandQueue, kernelOperation, surfaces, false, false, false, nullptr, PreemptionMode::Disabled, kernel, 0) {} }; class MockEvent : public Event { public: using Event::submitCommand; MockEvent(CommandQueue *cmdQueue, cl_command_type cmdType, uint32_t taskLevel, uint32_t taskCount) : Event(cmdQueue, cmdType, taskLevel, taskCount) {} }; MockKernelWithInternals kernel(*pClDevice); IndirectHeap *ih1 = nullptr, *ih2 = nullptr, *ih3 = nullptr; pCmdQ->allocateHeapMemory(IndirectHeap::Type::DYNAMIC_STATE, 1, ih1); pCmdQ->allocateHeapMemory(IndirectHeap::Type::INDIRECT_OBJECT, 1, ih2); pCmdQ->allocateHeapMemory(IndirectHeap::Type::SURFACE_STATE, 1, ih3); auto cmdStream = new LinearStream(pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties({pDevice->getRootDeviceIndex(), 4096, AllocationType::COMMAND_BUFFER, pDevice->getDeviceBitfield()})); std::vector surfaces; auto kernelOperation = std::make_unique(cmdStream, *pDevice->getDefaultEngine().commandStreamReceiver->getInternalAllocationStorage()); kernelOperation->setHeaps(ih1, ih2, ih3); std::unique_ptr command = std::make_unique(*pCmdQ, kernelOperation, surfaces, kernel); auto virtualEvent = make_releaseable(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, CompletionStamp::notReady, CompletionStamp::notReady); virtualEvent->setCommand(std::move(command)); virtualEvent->submitCommand(false); uint32_t expectedLockCounter = pDevice->getDefaultEngine().commandStreamReceiver->getClearColorAllocation() ? 3u : 2u; EXPECT_EQ(expectedLockCounter, pDevice->getUltCommandStreamReceiver().recursiveLockCounter); } HWTEST_F(EventTest, givenVirtualEventWhenSubmitCommandEventNotReadyAndEventWithoutCommandThenOneLockCsrNeeded) { class MockEvent : public Event { public: using Event::submitCommand; MockEvent(CommandQueue *cmdQueue, cl_command_type cmdType, uint32_t taskLevel, uint32_t taskCount) : Event(cmdQueue, cmdType, taskLevel, taskCount) {} }; auto virtualEvent = make_releaseable(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, CompletionStamp::notReady, CompletionStamp::notReady); virtualEvent->submitCommand(false); EXPECT_EQ(pDevice->getUltCommandStreamReceiver().recursiveLockCounter, 1u); } HWTEST_F(InternalsEventTest, GivenBufferWithoutZeroCopyWhenMappingOrUnmappingThenFlushPreviousTasksBeforeMappingOrUnmapping) { struct MockNonZeroCopyBuff : UnalignedBuffer { MockNonZeroCopyBuff(int32_t &executionStamp) : executionStamp(executionStamp), dataTransferedStamp(-1) { hostPtr = &dataTransferedStamp; memoryStorage = &executionStamp; size = sizeof(executionStamp); hostPtrMinSize = size; } void setIsZeroCopy() { isZeroCopy = false; } void swapCopyDirection() { std::swap(hostPtr, memoryStorage); } int32_t &executionStamp; int32_t dataTransferedStamp; }; int32_t executionStamp = 0; auto csr = new MockCsr(executionStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(csr); const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; auto pCmdQ = make_releaseable(mockContext, pClDevice, props, false); MockNonZeroCopyBuff buffer(executionStamp); MemObjSizeArray size = {{4, 1, 1}}; MemObjOffsetArray offset = {{0, 0, 0}}; auto commandMap = std::unique_ptr(new CommandMapUnmap(MAP, buffer, size, offset, false, *pCmdQ)); EXPECT_EQ(0, executionStamp); EXPECT_EQ(-1, csr->flushTaskStamp); EXPECT_EQ(-1, buffer.dataTransferedStamp); auto latestSentFlushTaskCount = csr->peekLatestSentTaskCount(); commandMap->submit(0, false); EXPECT_EQ(1, executionStamp); EXPECT_EQ(0, csr->flushTaskStamp); EXPECT_EQ(1, buffer.dataTransferedStamp); auto latestSentFlushTaskCountAfterSubmit = csr->peekLatestSentTaskCount(); EXPECT_GT(latestSentFlushTaskCountAfterSubmit, latestSentFlushTaskCount); executionStamp = 0; csr->flushTaskStamp = -1; buffer.dataTransferedStamp = -1; buffer.swapCopyDirection(); auto commandUnMap = std::unique_ptr(new CommandMapUnmap(UNMAP, buffer, size, offset, false, *pCmdQ)); EXPECT_EQ(0, executionStamp); EXPECT_EQ(-1, csr->flushTaskStamp); EXPECT_EQ(-1, buffer.dataTransferedStamp); commandUnMap->submit(0, false); EXPECT_EQ(1, executionStamp); EXPECT_EQ(0, csr->flushTaskStamp); EXPECT_EQ(1, buffer.dataTransferedStamp); EXPECT_EQ(nullptr, commandUnMap->getCommandStream()); } TEST(EventCallback, WhenOverridingStatusThenEventUsesNewStatus) { struct ClbFuncTempStruct { static void CL_CALLBACK ClbFuncT(cl_event e, cl_int status, void *retStatus) { *((cl_int *)retStatus) = status; } }; cl_int retStatus = 7; Event::Callback clb(nullptr, ClbFuncTempStruct::ClbFuncT, CL_COMPLETE, &retStatus); EXPECT_EQ(CL_COMPLETE, clb.getCallbackExecutionStatusTarget()); clb.execute(); EXPECT_EQ(CL_COMPLETE, retStatus); retStatus = 7; clb.overrideCallbackExecutionStatusTarget(-1); EXPECT_EQ(-1, clb.getCallbackExecutionStatusTarget()); clb.execute(); EXPECT_EQ(-1, retStatus); } TEST_F(EventTest, WhenSettingCpuTimeStampThenCorrectTimeIsSet) { MyEvent ev(this->pCmdQ, CL_COMMAND_COPY_BUFFER, 3, 0); ev.setProfilingEnabled(true); ev.setQueueTimeStamp(); TimeStampData outtimeStamp = {0, 0}; outtimeStamp = ev.getQueueTimeStamp(); EXPECT_NE(0ULL, outtimeStamp.CPUTimeinNS); EXPECT_EQ(0ULL, outtimeStamp.GPUTimeStamp); ev.setSubmitTimeStamp(); outtimeStamp = ev.getSubmitTimeStamp(); EXPECT_NE(0ULL, outtimeStamp.CPUTimeinNS); EXPECT_EQ(0ULL, outtimeStamp.GPUTimeStamp); ev.setStartTimeStamp(); uint64_t outCPUtimeStamp = ev.getStartTimeStamp(); EXPECT_NE(0ULL, outCPUtimeStamp); ev.setEndTimeStamp(); outCPUtimeStamp = ev.getEndTimeStamp(); EXPECT_NE(0ULL, outCPUtimeStamp); outCPUtimeStamp = ev.getCompleteTimeStamp(); EXPECT_NE(0ULL, outCPUtimeStamp); } TEST_F(EventTest, GivenNoQueueWhenSettingCpuTimeStampThenTimesIsNotSet) { MyEvent ev(nullptr, CL_COMMAND_COPY_BUFFER, 3, 0); ev.setQueueTimeStamp(); TimeStampData outtimeStamp = {0, 0}; outtimeStamp = ev.getQueueTimeStamp(); EXPECT_EQ(0ULL, outtimeStamp.CPUTimeinNS); EXPECT_EQ(0ULL, outtimeStamp.GPUTimeStamp); ev.setSubmitTimeStamp(); outtimeStamp = ev.getSubmitTimeStamp(); EXPECT_EQ(0ULL, outtimeStamp.CPUTimeinNS); EXPECT_EQ(0ULL, outtimeStamp.GPUTimeStamp); ev.setStartTimeStamp(); uint64_t outCPUtimeStamp = ev.getStartTimeStamp(); EXPECT_EQ(0ULL, outCPUtimeStamp); ev.setEndTimeStamp(); outCPUtimeStamp = ev.getEndTimeStamp(); EXPECT_EQ(0ULL, outCPUtimeStamp); outCPUtimeStamp = ev.getCompleteTimeStamp(); EXPECT_EQ(0ULL, outCPUtimeStamp); } HWTEST_F(EventTest, WhenGettingHwTimeStampsThenValidPointerIsReturned) { pDevice->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false; auto myCmdQ = std::make_unique>(pCmdQ->getContextPtr(), pClDevice, nullptr); std::unique_ptr event(new Event(myCmdQ.get(), CL_COMMAND_COPY_BUFFER, 0, 0)); ASSERT_NE(nullptr, event); HwTimeStamps *timeStamps = static_cast *>(event->getHwTimeStampNode())->tagForCpuAccess; ASSERT_NE(nullptr, timeStamps); //this should not cause any heap corruptions ASSERT_EQ(0ULL, timeStamps->GlobalStartTS); ASSERT_EQ(0ULL, timeStamps->ContextStartTS); ASSERT_EQ(0ULL, timeStamps->GlobalEndTS); ASSERT_EQ(0ULL, timeStamps->ContextEndTS); ASSERT_EQ(0ULL, timeStamps->GlobalCompleteTS); ASSERT_EQ(0ULL, timeStamps->ContextCompleteTS); HwTimeStamps *timeStamps2 = static_cast *>(event->getHwTimeStampNode())->tagForCpuAccess; ASSERT_EQ(timeStamps, timeStamps2); } HWTEST_F(EventTest, WhenGetHwTimeStampsAllocationThenValidPointerIsReturned) { pDevice->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false; auto myCmdQ = std::make_unique>(pCmdQ->getContextPtr(), pClDevice, nullptr); std::unique_ptr event(new Event(myCmdQ.get(), CL_COMMAND_COPY_BUFFER, 0, 0)); ASSERT_NE(nullptr, event); GraphicsAllocation *allocation = event->getHwTimeStampNode()->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation(); ASSERT_NE(nullptr, allocation); void *memoryStorage = allocation->getUnderlyingBuffer(); size_t memoryStorageSize = allocation->getUnderlyingBufferSize(); EXPECT_NE(nullptr, memoryStorage); EXPECT_GT(memoryStorageSize, 0u); } HWTEST_F(EventTest, WhenEventIsCreatedThenHwTimeStampsMemoryIsPlacedInGraphicsAllocation) { pDevice->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false; auto myCmdQ = std::make_unique>(pCmdQ->getContextPtr(), pClDevice, nullptr); std::unique_ptr event(new Event(myCmdQ.get(), CL_COMMAND_COPY_BUFFER, 0, 0)); ASSERT_NE(nullptr, event); HwTimeStamps *timeStamps = static_cast *>(event->getHwTimeStampNode())->tagForCpuAccess; ASSERT_NE(nullptr, timeStamps); GraphicsAllocation *allocation = event->getHwTimeStampNode()->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation(); ASSERT_NE(nullptr, allocation); void *memoryStorage = allocation->getUnderlyingBuffer(); size_t graphicsAllocationSize = allocation->getUnderlyingBufferSize(); EXPECT_GE(timeStamps, memoryStorage); EXPECT_LE(timeStamps + 1, ptrOffset(memoryStorage, graphicsAllocationSize)); } TEST_F(EventTest, GivenNullQueueWhenEventIsCreatedThenProfilingAndPerfCountersAreDisabled) { Event ev(nullptr, CL_COMMAND_COPY_BUFFER, 3, 0); EXPECT_FALSE(ev.isProfilingEnabled()); EXPECT_FALSE(ev.isPerfCountersEnabled()); } TEST_F(EventTest, GivenProfilingDisabledWhenEventIsCreatedThenPerfCountersAreDisabled) { Event ev(pCmdQ, CL_COMMAND_COPY_BUFFER, 3, 0); EXPECT_FALSE(ev.isProfilingEnabled()); EXPECT_FALSE(ev.isPerfCountersEnabled()); } TEST_F(InternalsEventTest, GivenOnlyProfilingEnabledWhenEventIsCreatedThenPerfCountersAreDisabled) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0}; MockCommandQueue *pCmdQ = new MockCommandQueue(mockContext, pClDevice, props, false); Event *ev = new Event(pCmdQ, CL_COMMAND_COPY_BUFFER, 3, 0); EXPECT_TRUE(ev->isProfilingEnabled()); EXPECT_FALSE(ev->isPerfCountersEnabled()); delete ev; delete pCmdQ; } TEST_F(EventTest, GivenClSubmittedWhenpeekIsSubmittedThenTrueIsReturned) { Event ev(this->pCmdQ, CL_COMMAND_COPY_BUFFER, 3, 0); int32_t executionStatusSnapshot = CL_SUBMITTED; bool executionStatus = ev.peekIsSubmitted(executionStatusSnapshot); EXPECT_EQ(true, executionStatus); } TEST_F(EventTest, GivenCompletedEventWhenQueryingExecutionStatusAfterFlushThenCsrIsNotFlushed) { cl_int ret; Event ev(this->pCmdQ, CL_COMMAND_COPY_BUFFER, 3, 3); auto &csr = this->pCmdQ->getGpgpuCommandStreamReceiver(); *csr.getTagAddress() = 3; auto previousTaskLevel = csr.peekTaskLevel(); EXPECT_GT(3u, previousTaskLevel); ret = clFlush(this->pCmdQ); ASSERT_EQ(CL_SUCCESS, ret); cl_int execState; ret = clGetEventInfo(&ev, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(execState), &execState, nullptr); ASSERT_EQ(CL_SUCCESS, ret); EXPECT_EQ(previousTaskLevel, csr.peekTaskLevel()); } HWTEST_F(EventTest, GivenEventCreatedOnMapBufferWithoutCommandWhenSubmittingCommandThenTaskCountIsNotUpdated) { MockEvent ev(this->pCmdQ, CL_COMMAND_MAP_BUFFER, CompletionStamp::notReady, CompletionStamp::notReady); EXPECT_EQ(CompletionStamp::notReady, ev.peekTaskCount()); ev.submitCommand(false); EXPECT_EQ(0u, ev.peekTaskCount()); } HWTEST_F(EventTest, GivenEventCreatedOnMapImageWithoutCommandWhenSubmittingCommandThenTaskCountIsNotUpdated) { MockEvent ev(this->pCmdQ, CL_COMMAND_MAP_IMAGE, CompletionStamp::notReady, CompletionStamp::notReady); EXPECT_EQ(CompletionStamp::notReady, ev.peekTaskCount()); ev.submitCommand(false); EXPECT_EQ(0u, ev.peekTaskCount()); } TEST_F(EventTest, givenCmdQueueWithoutProfilingWhenIsCpuProfilingIsCalledThenFalseIsReturned) { MockEvent ev(this->pCmdQ, CL_COMMAND_MAP_IMAGE, CompletionStamp::notReady, CompletionStamp::notReady); bool cpuProfiling = ev.isCPUProfilingPath() != 0; EXPECT_FALSE(cpuProfiling); } TEST_F(EventTest, givenOutEventWhenBlockingEnqueueHandledOnCpuThenUpdateTaskCountAndFlushStampFromCmdQ) { std::unique_ptr image(ImageHelper::create(&mockContext)); EXPECT_TRUE(image->mappingOnCpuAllowed()); pCmdQ->flushStamp->setStamp(10); pCmdQ->taskCount = 11; size_t origin[3] = {0, 0, 0}; size_t region[3] = {1, 1, 1}; cl_int retVal; cl_event clEvent; pCmdQ->enqueueMapImage(image.get(), CL_TRUE, CL_MAP_READ, origin, region, nullptr, nullptr, 0, nullptr, &clEvent, retVal); auto eventObj = castToObject(clEvent); EXPECT_EQ(pCmdQ->taskCount, eventObj->peekTaskCount()); EXPECT_EQ(pCmdQ->flushStamp->peekStamp(), eventObj->flushStamp->peekStamp()); eventObj->release(); } TEST_F(EventTest, givenCmdQueueWithProfilingWhenIsCpuProfilingIsCalledThenTrueIsReturned) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0}; std::unique_ptr pCmdQ(new MockCommandQueue(&mockContext, pClDevice, props, false)); MockEvent ev(pCmdQ.get(), CL_COMMAND_MAP_IMAGE, CompletionStamp::notReady, CompletionStamp::notReady); bool cpuProfiling = ev.isCPUProfilingPath() != 0; EXPECT_TRUE(cpuProfiling); } TEST(EventCallback, GivenEventWithCallbacksOnWhenPeekingHasCallbacksThenReturnTrue) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); struct ClbFuncTempStruct { static void CL_CALLBACK ClbFuncT(cl_event, cl_int, void *) { } }; struct SmallMockEvent : Event { SmallMockEvent() : Event(nullptr, CL_COMMAND_COPY_BUFFER, 0, 0) { this->parentCount = 1; // block event } }; { SmallMockEvent ev; EXPECT_FALSE(ev.peekHasCallbacks()); } { SmallMockEvent ev; ev.addCallback(ClbFuncTempStruct::ClbFuncT, CL_SUBMITTED, nullptr); EXPECT_TRUE(ev.peekHasCallbacks()); ev.decRefInternal(); } { SmallMockEvent ev; ev.addCallback(ClbFuncTempStruct::ClbFuncT, CL_RUNNING, nullptr); EXPECT_TRUE(ev.peekHasCallbacks()); ev.decRefInternal(); } { SmallMockEvent ev; ev.addCallback(ClbFuncTempStruct::ClbFuncT, CL_COMPLETE, nullptr); EXPECT_TRUE(ev.peekHasCallbacks()); ev.decRefInternal(); } { SmallMockEvent ev; ev.addCallback(ClbFuncTempStruct::ClbFuncT, CL_SUBMITTED, nullptr); ev.addCallback(ClbFuncTempStruct::ClbFuncT, CL_COMPLETE, nullptr); EXPECT_TRUE(ev.peekHasCallbacks()); ev.decRefInternal(); ev.decRefInternal(); } { SmallMockEvent ev; ev.addCallback(ClbFuncTempStruct::ClbFuncT, CL_RUNNING, nullptr); ev.addCallback(ClbFuncTempStruct::ClbFuncT, CL_COMPLETE, nullptr); EXPECT_TRUE(ev.peekHasCallbacks()); ev.decRefInternal(); ev.decRefInternal(); } { SmallMockEvent ev; ev.addCallback(ClbFuncTempStruct::ClbFuncT, CL_SUBMITTED, nullptr); ev.addCallback(ClbFuncTempStruct::ClbFuncT, CL_RUNNING, nullptr); ev.addCallback(ClbFuncTempStruct::ClbFuncT, CL_COMPLETE, nullptr); EXPECT_TRUE(ev.peekHasCallbacks()); ev.decRefInternal(); ev.decRefInternal(); ev.decRefInternal(); } } TEST_F(EventTest, GivenNotCompletedEventWhenAddingChildThenNumEventsBlockingThisIsGreaterThanZero) { VirtualEvent virtualEvent(pCmdQ, &mockContext); { Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 0, 0); event.addChild(virtualEvent); EXPECT_NE(0U, virtualEvent.peekNumEventsBlockingThis()); } } TEST(Event, whenCreatingRegularEventsThenExternalSynchronizationIsNotRequired) { Event *event = new Event(nullptr, 0, 0, 0); EXPECT_FALSE(event->isExternallySynchronized()); event->release(); UserEvent *userEvent = new UserEvent(); EXPECT_FALSE(userEvent->isExternallySynchronized()); userEvent->release(); VirtualEvent *virtualEvent = new VirtualEvent(); EXPECT_FALSE(virtualEvent->isExternallySynchronized()); virtualEvent->release(); } HWTEST_F(EventTest, givenEventWithNotReadyTaskLevelWhenUnblockedThenGetTaskLevelFromCsrIfGreaterThanParent) { uint32_t initialTaskLevel = 10; Event parentEventWithGreaterTaskLevel(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, initialTaskLevel + 5, 0); Event parentEventWithLowerTaskLevel(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, initialTaskLevel - 5, 0); Event childEvent0(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, CompletionStamp::notReady, CompletionStamp::notReady); Event childEvent1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, CompletionStamp::notReady, CompletionStamp::notReady); auto &csr = reinterpret_cast &>(pCmdQ->getGpgpuCommandStreamReceiver()); csr.taskLevel = initialTaskLevel; parentEventWithGreaterTaskLevel.addChild(childEvent0); parentEventWithLowerTaskLevel.addChild(childEvent1); parentEventWithGreaterTaskLevel.setStatus(CL_COMPLETE); parentEventWithLowerTaskLevel.setStatus(CL_COMPLETE); EXPECT_EQ(parentEventWithGreaterTaskLevel.getTaskLevel() + 1, childEvent0.getTaskLevel()); EXPECT_EQ(csr.taskLevel, childEvent1.getTaskLevel()); } TEST_F(EventTest, GivenCompletedEventWhenAddingChildThenNumEventsBlockingThisIsZero) { VirtualEvent virtualEvent(pCmdQ, &mockContext); { Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 0, 0); event.setStatus(CL_COMPLETE); event.addChild(virtualEvent); EXPECT_EQ(0U, virtualEvent.peekNumEventsBlockingThis()); } } template struct TestEventCsr : public UltCommandStreamReceiver { TestEventCsr(const ExecutionEnvironment &executionEnvironment, const DeviceBitfield deviceBitfield) : UltCommandStreamReceiver(const_cast(executionEnvironment), 0, deviceBitfield) {} WaitStatus waitForCompletionWithTimeout(const WaitParams ¶ms, uint32_t taskCountToWait) override { waitForCompletionWithTimeoutCalled++; waitForCompletionWithTimeoutParamsPassed.push_back({params.enableTimeout, params.waitTimeout, taskCountToWait}); return waitForCompletionWithTimeoutResult; } struct WaitForCompletionWithTimeoutParams { bool enableTimeout = false; int64_t timeoutMs{}; uint32_t taskCountToWait{}; }; uint32_t waitForCompletionWithTimeoutCalled = 0u; WaitStatus waitForCompletionWithTimeoutResult = WaitStatus::Ready; StackVec waitForCompletionWithTimeoutParamsPassed{}; }; HWTEST_F(EventTest, givenQuickKmdSleepRequestWhenWaitIsCalledThenPassRequestToWaitingFunction) { HardwareInfo localHwInfo = pDevice->getHardwareInfo(); localHwInfo.capabilityTable.kmdNotifyProperties.enableKmdNotify = true; localHwInfo.capabilityTable.kmdNotifyProperties.enableQuickKmdSleep = true; localHwInfo.capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds = 1; localHwInfo.capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds = 2; pDevice->executionEnvironment->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->setHwInfo(&localHwInfo); auto csr = new TestEventCsr(*pDevice->executionEnvironment, pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(csr); Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 0, 0); event.updateCompletionStamp(1u, 0, 1u, 1u); const auto result = event.wait(true, true); EXPECT_EQ(WaitStatus::Ready, result); EXPECT_EQ(1u, csr->waitForCompletionWithTimeoutCalled); EXPECT_EQ(localHwInfo.capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds, csr->waitForCompletionWithTimeoutParamsPassed[0].timeoutMs); } HWTEST_F(EventTest, givenNonQuickKmdSleepRequestWhenWaitIsCalledThenPassRequestToWaitingFunction) { HardwareInfo localHwInfo = pDevice->getHardwareInfo(); localHwInfo.capabilityTable.kmdNotifyProperties.enableKmdNotify = true; localHwInfo.capabilityTable.kmdNotifyProperties.enableQuickKmdSleep = true; localHwInfo.capabilityTable.kmdNotifyProperties.enableQuickKmdSleepForSporadicWaits = false; localHwInfo.capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds = 1; localHwInfo.capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds = 2; pDevice->executionEnvironment->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->setHwInfo(&localHwInfo); auto csr = new TestEventCsr(*pDevice->executionEnvironment, pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(csr); Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 0, 0); event.updateCompletionStamp(1u, 0, 1u, 1u); const auto result = event.wait(true, false); EXPECT_EQ(WaitStatus::Ready, result); EXPECT_EQ(1u, csr->waitForCompletionWithTimeoutCalled); EXPECT_EQ(localHwInfo.capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds, csr->waitForCompletionWithTimeoutParamsPassed[0].timeoutMs); } HWTEST_F(EventTest, givenGpuHangWhenWaitIsCalledThenPassRequestToWaitingFunctionAndReturnGpuHang) { auto csr = new TestEventCsr(*pDevice->executionEnvironment, pDevice->getDeviceBitfield()); csr->waitForCompletionWithTimeoutResult = WaitStatus::GpuHang; pDevice->resetCommandStreamReceiver(csr); Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 0, 0); const auto waitStatus = event.wait(true, false); EXPECT_EQ(WaitStatus::GpuHang, waitStatus); EXPECT_EQ(1u, csr->waitForCompletionWithTimeoutCalled); } HWTEST_F(InternalsEventTest, givenCommandWhenSubmitCalledThenUpdateFlushStamp) { auto pCmdQ = std::unique_ptr(new MockCommandQueue(mockContext, pClDevice, 0, false)); MockEvent *event = new MockEvent(pCmdQ.get(), CL_COMMAND_MARKER, 0, 0); auto &csr = pDevice->getUltCommandStreamReceiver(); csr.flushStamp->setStamp(5); FlushStamp expectedFlushStamp = 0; EXPECT_EQ(expectedFlushStamp, event->flushStamp->peekStamp()); event->setCommand(std::unique_ptr(new CommandWithoutKernel(*pCmdQ))); event->submitCommand(false); EXPECT_EQ(csr.flushStamp->peekStamp(), event->flushStamp->peekStamp()); delete event; } HWTEST_F(InternalsEventTest, givenAbortedCommandWhenSubmitCalledThenDontUpdateFlushStamp) { auto pCmdQ = std::unique_ptr(new MockCommandQueue(mockContext, pClDevice, 0, false)); MockEvent *event = new MockEvent(pCmdQ.get(), CL_COMMAND_MARKER, 0, 0); auto &csr = pDevice->getUltCommandStreamReceiver(); csr.flushStamp->setStamp(5); MockKernelWithInternals mockKernelWithInternals(*pClDevice); auto pKernel = mockKernelWithInternals.mockKernel; auto cmdStream = new LinearStream(pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties({pDevice->getRootDeviceIndex(), 4096, AllocationType::COMMAND_BUFFER, pDevice->getDeviceBitfield()})); IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr; pCmdQ->allocateHeapMemory(IndirectHeap::Type::DYNAMIC_STATE, 4096u, dsh); pCmdQ->allocateHeapMemory(IndirectHeap::Type::INDIRECT_OBJECT, 4096u, ioh); pCmdQ->allocateHeapMemory(IndirectHeap::Type::SURFACE_STATE, 4096u, ssh); auto blockedCommandsData = std::make_unique(cmdStream, *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); blockedCommandsData->setHeaps(dsh, ioh, ssh); PreemptionMode preemptionMode = pDevice->getPreemptionMode(); std::vector v; auto cmd = new CommandComputeKernel(*pCmdQ, blockedCommandsData, v, false, false, false, nullptr, preemptionMode, pKernel, 1); event->setCommand(std::unique_ptr(cmd)); FlushStamp expectedFlushStamp = 0; EXPECT_EQ(expectedFlushStamp, event->flushStamp->peekStamp()); event->submitCommand(true); EXPECT_EQ(expectedFlushStamp, event->flushStamp->peekStamp()); delete event; } TEST(EventLockerTests, givenEventWhenEventLockerIsUsedThenOwnershipIsAutomaticallyReleased) { Event ev(nullptr, CL_COMMAND_COPY_BUFFER, 3, 0); { TakeOwnershipWrapper locker(ev); EXPECT_TRUE(ev.hasOwnership()); } EXPECT_FALSE(ev.hasOwnership()); } TEST(EventLockerTests, givenEventWhenEventLockerIsUsedAndUnlockedThenOwnershipIsReleased) { Event ev(nullptr, CL_COMMAND_COPY_BUFFER, 3, 0); { TakeOwnershipWrapper locker(ev); locker.unlock(); EXPECT_FALSE(ev.hasOwnership()); } EXPECT_FALSE(ev.hasOwnership()); } TEST(EventLockerTests, givenEventWhenEventLockerIsUsedAndlockedThenOwnershipIsAcquiredAgain) { Event ev(nullptr, CL_COMMAND_COPY_BUFFER, 3, 0); { TakeOwnershipWrapper locker(ev); locker.unlock(); locker.lock(); EXPECT_TRUE(ev.hasOwnership()); } EXPECT_FALSE(ev.hasOwnership()); } TEST(EventLockerTests, givenEventWhenEventLockerIsLockedTwiceThenOwnershipIsReleaseAfterLeavingTheScope) { Event ev(nullptr, CL_COMMAND_COPY_BUFFER, 3, 0); { TakeOwnershipWrapper locker(ev); locker.lock(); EXPECT_TRUE(ev.hasOwnership()); } EXPECT_FALSE(ev.hasOwnership()); } TEST(EventsDebug, givenEventWhenTrackingOfParentsIsOnThenTrackParents) { DebugManagerStateRestore stateRestore; DebugManager.flags.TrackParentEvents.set(true); Event event(nullptr, CL_COMMAND_NDRANGE_KERNEL, 0, 0); Event event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, 0, 0); auto &parentEvents = event.getParentEvents(); auto &parentEvents2 = event2.getParentEvents(); EXPECT_EQ(0u, parentEvents.size()); EXPECT_EQ(0u, parentEvents2.size()); event.addChild(event2); EXPECT_EQ(0u, parentEvents.size()); EXPECT_EQ(1u, parentEvents2.size()); EXPECT_EQ(&event, parentEvents2.at(0)); event.setStatus(CL_COMPLETE); } TEST(EventsDebug, givenEventWhenTrackingOfParentsIsOffThenDoNotTrackParents) { DebugManagerStateRestore stateRestore; DebugManager.flags.TrackParentEvents.set(false); Event event(nullptr, CL_COMMAND_NDRANGE_KERNEL, 0, 0); Event event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, 0, 0); auto &parentEvents = event.getParentEvents(); auto &parentEvents2 = event2.getParentEvents(); EXPECT_EQ(0u, parentEvents.size()); EXPECT_EQ(0u, parentEvents2.size()); event.addChild(event2); EXPECT_EQ(0u, parentEvents.size()); EXPECT_EQ(0u, parentEvents2.size()); event.setStatus(CL_COMPLETE); } TEST(CommandQueue, givenTimestampPacketWritesDisabledAndQueueHasTimestampPacketContainerThenCreateTheContainerForEvent) { DebugManagerStateRestore stateRestore; DebugManager.flags.EnableTimestampPacket.set(0); MockContext context{}; MockCommandQueue queue{&context, context.getDevice(0), nullptr, false}; ASSERT_FALSE(queue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()); ASSERT_EQ(nullptr, queue.timestampPacketContainer.get()); queue.timestampPacketContainer = std::make_unique(); MockEvent event{&queue, CL_COMMAND_MARKER, 0, 0}; EXPECT_NE(nullptr, event.timestampPacketContainer); } compute-runtime-22.14.22890/opencl/test/unit_test/event/event_tracker_tests.cpp000066400000000000000000000634411422164147700275420ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/file_io.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "opencl/source/event/event.h" #include "opencl/source/event/event_tracker.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "event_fixture.h" #include #include struct ClonedStream : std::stringstream { ClonedStream(std::string &clonedOutput) : clonedOutput(clonedOutput) { } ~ClonedStream() override { clonedOutput = this->str(); } std::string &clonedOutput; }; class EventsTrackerMock : public EventsTracker { public: std::unique_ptr createDumpStream(const std::string &filename) override { return std::make_unique(streamMock); } void overrideGlobal() { originGlobal.swap(EventsTracker::globalEvTracker); EventsTracker::globalEvTracker = std::unique_ptr{new EventsTrackerMock()}; } void restoreGlobal() { EventsTrackerMock::shutdownGlobalEvTracker(); EventsTracker::globalEvTracker.swap(originGlobal); } static void shutdownGlobalEvTracker() { EventsTracker::globalEvTracker.reset(); } IFList *getList() { return &trackedEvents; } std::string streamMock; std::unique_ptr originGlobal; }; TEST(EventsTracker, whenCallingGetEventsTrackerThenGetGlobalEventsTrackerInstance) { auto &evTracker1 = EventsTracker::getEventsTracker(); auto &evTracker2 = EventsTracker::getEventsTracker(); EXPECT_EQ(&evTracker1, &evTracker2); EventsTrackerMock::shutdownGlobalEvTracker(); } TEST(EventsTracker, whenCallLabelFunctionThenGetStringWithProperEventId) { UserEvent uEvent; std::unordered_map map; map[&uEvent] = 0; EXPECT_STREQ("e0", EventsTracker::label(&uEvent, map).c_str()); } TEST(EventsTracker, whenCallLabelFunctionWhenEventIsNotInMapThenGetStringWithoutId) { UserEvent uEvent; std::unordered_map map; EXPECT_STREQ("e", EventsTracker::label(&uEvent, map).c_str()); } TEST(EventsTracker, whenCallLabelFunctionThenGetStringWithProperCmdqId) { MockCommandQueue cmdq; std::string expect = "cq" + std::to_string(reinterpret_cast(&cmdq)); EXPECT_STREQ(expect.c_str(), EventsTracker::label(&cmdq).c_str()); } TEST(EventsTracker, givenNullptrCmdqThenNotDumping) { MockCommandQueue *cmdq_ptr = nullptr; std::stringstream stream; std::set dumped; EventsTracker::dumpQueue(cmdq_ptr, stream, dumped); EXPECT_STREQ("", stream.str().c_str()); } TEST(EventsTracker, givenAlreadyDumpedCmdqThenNotDumping) { MockCommandQueue cmdq; std::stringstream stream; std::set dumped; dumped.insert(&cmdq); EventsTracker::dumpQueue(&cmdq, stream, dumped); EXPECT_STREQ("", stream.str().c_str()); } TEST(EventsTracker, givenCmqdWithTaskCountAndLevelNotReadyThenDumpingCmdqWithNotReadyLabels) { MockCommandQueue cmdq; cmdq.taskCount = CompletionStamp::notReady; cmdq.taskLevel = CompletionStamp::notReady; std::stringstream stream; std::set dumped; EventsTracker::dumpQueue(&cmdq, stream, dumped); std::stringstream expected; expected << EventsTracker::label(&cmdq) << "[label=\"{------CmdQueue, ptr=" << &cmdq << "------|task count=NOT_READY, level=NOT_READY}\",color=blue];\n"; EXPECT_STREQ(expected.str().c_str(), stream.str().c_str()); } TEST(EventsTracker, whenCallDumpQueueThenDumpingCmdqWithProperCountTaskAndLevelValues) { MockCommandQueue cmdq; cmdq.taskCount = 3; cmdq.taskLevel = 1; std::stringstream stream; std::set dumped; EventsTracker::dumpQueue(&cmdq, stream, dumped); std::stringstream expected; expected << EventsTracker::label(&cmdq) << "[label=\"{------CmdQueue, ptr=" << &cmdq << "------|task count=3, level=1}\",color=blue];\n"; EXPECT_STREQ(expected.str().c_str(), stream.str().c_str()); } TEST(EventsTracker, whenCallDumpEdgeThenGetStringWithProperLabelOfDumpedEdge) { UserEvent uEvent1; UserEvent uEvent2; std::stringstream stream; std::unordered_map map; map[&uEvent1] = 0; map[&uEvent2] = 1; EventsTracker::dumpEdge(&uEvent1, &uEvent2, stream, map); EXPECT_STREQ("e0->e1;\n", stream.str().c_str()); } TEST(EventsTracker, givenEventWithTaskLevelAndCountNotReadyThenDumpingNodeWithNotReadyLabels) { UserEvent uEvent; uEvent.taskLevel = CompletionStamp::notReady; uEvent.updateTaskCount(CompletionStamp::notReady, 0); std::stringstream stream; std::unordered_map map; map[&uEvent] = 0; EventsTracker::dumpNode(&uEvent, stream, map); std::stringstream expected; expected << "e0[label=\"{------USER_EVENT ptr=" << &uEvent << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\n"; EXPECT_STREQ(expected.str().c_str(), stream.str().c_str()); } TEST(EventsTracker, whenCallDumpNodeFunctionThenDumpingNodeWithProperTaskLevelAndCountValues) { UserEvent uEvent; uEvent.taskLevel = 1; uEvent.updateTaskCount(1, 0); std::stringstream stream; std::unordered_map map; map[&uEvent] = 0; EventsTracker::dumpNode(&uEvent, stream, map); std::stringstream expected; expected << "e0[label=\"{------USER_EVENT ptr=" << &uEvent << "------||CL_QUEUED|task count=1, level=1|CALLBACKS=FALSE}\",color=red];\n"; EXPECT_STREQ(expected.str().c_str(), stream.str().c_str()); } TEST(EventsTracker, givenNullptrEventThenNotDumpingNode) { UserEvent *uEvent = nullptr; std::stringstream stream; std::unordered_map map; map[uEvent] = 0; EventsTracker::dumpNode(uEvent, stream, map); EXPECT_STREQ("eNULL[label=\"{ptr=nullptr}\",color=red];\n", stream.str().c_str()); } TEST(EventsTracker, givenEventAndUserEventThenDumpingNodeWithProperLabels) { UserEvent uEvent; Event event(nullptr, CL_COMMAND_NDRANGE_KERNEL, CompletionStamp::notReady, CompletionStamp::notReady); std::stringstream stream; std::unordered_map map; map[&uEvent] = 0; map[&event] = 1; EventsTracker::dumpNode(&uEvent, stream, map); std::stringstream expecteduEvent; expecteduEvent << "e0[label=\"{------USER_EVENT ptr=" << &uEvent << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\n"; EXPECT_STREQ(expecteduEvent.str().c_str(), stream.str().c_str()); stream.str(std::string()); EventsTracker::dumpNode(&event, stream, map); std::stringstream expectedEvent; expectedEvent << "e1[label=\"{-----------EVENT ptr=" << &event << "------|CL_COMMAND_NDRANGE_KERNEL|CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\n"; EXPECT_STREQ(expectedEvent.str().c_str(), stream.str().c_str()); } TEST(EventsTracker, givenCmdqAndItsVirtualEventThenDumpingWithProperLabels) { MockContext ctx; MockCommandQueue cmdq; VirtualEvent vEvent(&cmdq, &ctx); vEvent.setCurrentCmdQVirtualEvent(true); vEvent.updateTaskCount(1, 0); std::stringstream stream; std::unordered_map map; map[&vEvent] = 0; EventsTracker::dumpNode(&vEvent, stream, map); std::stringstream expected; expected << "e0[label=\"{---------V_EVENT ptr=" << &vEvent << "------|CMD_UNKNOWN:" << (cl_command_type)-1 << "|CL_QUEUED|task count=1, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\n" << EventsTracker::label(&cmdq) << "->e0[label=\"VIRTUAL_EVENT\"];\n"; EXPECT_STREQ(expected.str().c_str(), stream.str().c_str()); } TEST(EventsTracker, givenEventWithCallbackThenDumpingWithProperLabel) { Event::Callback::ClbFuncT func = [](cl_event ev, cl_int i, void *data) {}; UserEvent uEvent; uEvent.addCallback(func, 0, nullptr); std::stringstream stream; std::unordered_map map; map[&uEvent] = 0; EventsTracker::dumpNode(&uEvent, stream, map); std::stringstream expected; expected << "e0[label=\"{------USER_EVENT ptr=" << &uEvent << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=TRUE}\",color=red];\n"; EXPECT_STREQ(expected.str().c_str(), stream.str().c_str()); } TEST(EventsTracker, givenSubmittedEventThenDumpingWithProperLabel) { Event event(nullptr, CL_COMMAND_NDRANGE_KERNEL, CompletionStamp::notReady, CompletionStamp::notReady); std::stringstream stream; std::unordered_map map; map[&event] = 0; std::stringstream expected; event.setStatus(CL_SUBMITTED); EventsTracker::dumpNode(&event, stream, map); expected << "e0[label=\"{-----------EVENT ptr=" << &event << "------|CL_COMMAND_NDRANGE_KERNEL|CL_SUBMITTED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=yellow];\n"; EXPECT_STREQ(expected.str().c_str(), stream.str().c_str()); } TEST(EventsTracker, givenSubmittedUserEventThenDumpingWithProperLabel) { UserEvent uEvent; std::stringstream stream; std::unordered_map map; map[&uEvent] = 0; std::stringstream expected; uEvent.setStatus(CL_SUBMITTED); EventsTracker::dumpNode(&uEvent, stream, map); expected << "e0[label=\"{------USER_EVENT ptr=" << &uEvent << "------||CL_SUBMITTED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\n"; EXPECT_STREQ(expected.str().c_str(), stream.str().c_str()); } TEST(EventsTracker, givenUserEventWithUnproperStatusThenDumpingWithProperLabel) { UserEvent uEvent; std::stringstream stream; std::unordered_map map; map[&uEvent] = 0; std::stringstream expected; uEvent.setStatus(-1); EventsTracker::dumpNode(&uEvent, stream, map); expected << "e0[label=\"{------USER_EVENT ptr=" << &uEvent << "------||ABORTED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=green];\n"; EXPECT_STREQ(expected.str().c_str(), stream.str().c_str()); } TEST(EventsTracker, givenRunningEventThenDumpingWithProperLabel) { UserEvent uEvent; std::stringstream stream; std::unordered_map map; map[&uEvent] = 0; std::stringstream expected; uEvent.setStatus(CL_RUNNING); EventsTracker::dumpNode(&uEvent, stream, map); expected << "e0[label=\"{------USER_EVENT ptr=" << &uEvent << "------||CL_RUNNING|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\n"; EXPECT_STREQ(expected.str().c_str(), stream.str().c_str()); } TEST(EventsTracker, givenQueuedEventThenDumpingWithProperLabel) { UserEvent uEvent; std::stringstream stream; std::unordered_map map; map[&uEvent] = 0; std::stringstream expected; uEvent.setStatus(CL_QUEUED); EventsTracker::dumpNode(&uEvent, stream, map); expected << "e0[label=\"{------USER_EVENT ptr=" << &uEvent << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\n"; EXPECT_STREQ(expected.str().c_str(), stream.str().c_str()); } TEST(EventsTracker, givenCompleteEventThenDumpingWithProperLabel) { UserEvent uEvent; std::stringstream stream; std::unordered_map map; map[&uEvent] = 0; std::stringstream expected; uEvent.setStatus(CL_COMPLETE); EventsTracker::dumpNode(&uEvent, stream, map); expected << "e0[label=\"{------USER_EVENT ptr=" << &uEvent << "------||CL_COMPLETE|task count=NOT_READY, level=0|CALLBACKS=FALSE}\",color=green];\n"; EXPECT_STREQ(expected.str().c_str(), stream.str().c_str()); } TEST(EventsTracker, givenNullptrEventThenNotDumpingGraph) { Event *ev = nullptr; std::stringstream stream; std::unordered_map map; map[ev] = 0; std::set dumpedCmdQs; std::set dumpedEvents; EventsTracker::dumpGraph(ev, stream, dumpedCmdQs, dumpedEvents, map); EXPECT_STREQ("", stream.str().c_str()); } TEST(EventsTracker, givenAlreadyDumpedEventThenNotDumpingGraph) { UserEvent uEvent; std::stringstream stream; std::unordered_map map; map[&uEvent] = 0; std::set dumpedCmdQs; std::set dumpedEvents; dumpedEvents.insert(&uEvent); EventsTracker::dumpGraph(&uEvent, stream, dumpedCmdQs, dumpedEvents, map); EXPECT_STREQ("", stream.str().c_str()); } TEST(EventsTracker, givenCmdqAndItsVirtualEventThenDumpingProperGraph) { MockContext ctx; MockCommandQueue cmdq; VirtualEvent vEvent(&cmdq, &ctx); vEvent.setCurrentCmdQVirtualEvent(true); vEvent.updateTaskCount(1, 0); std::stringstream stream; std::unordered_map map; map[&vEvent] = 0; std::set dumpedCmdQs; std::set dumpedEvents; EventsTracker::dumpGraph(&vEvent, stream, dumpedCmdQs, dumpedEvents, map); std::stringstream expected; expected << EventsTracker::label(&cmdq) << "[label=\"{------CmdQueue, ptr=" << &cmdq << "------|task count=0, level=0}\",color=blue];\ne0[label=\"{---------V_EVENT ptr=" << &vEvent << "------|CMD_UNKNOWN:4294967295|CL_QUEUED|task count=1, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\n" << EventsTracker::label(&cmdq) << "->e0[label=\"VIRTUAL_EVENT\"];\n"; EXPECT_STREQ(expected.str().c_str(), stream.str().c_str()); } TEST(EventsTracker, givenTwoEventsWithCommonParentEventThenDumpingProperGraph) { UserEvent uEvent, uEventChild1, uEventChild2; uEvent.addChild(uEventChild1); uEvent.addChild(uEventChild2); std::stringstream stream; std::unordered_map map; map[&uEvent] = 0; map[&uEventChild1] = 1; map[&uEventChild2] = 2; std::set dumpedCmdQs; std::set dumpedEvents; EventsTracker::dumpGraph(&uEvent, stream, dumpedCmdQs, dumpedEvents, map); std::stringstream expected; expected << "e0[label=\"{------USER_EVENT ptr=" << &uEvent << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\ne2[label=\"{------USER_EVENT ptr=" << &uEventChild2 << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\ne0->e2;\ne1[label=\"{------USER_EVENT ptr=" << &uEventChild1 << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\ne0->e1;\n"; EXPECT_STREQ(expected.str().c_str(), stream.str().c_str()); uEventChild1.updateCompletionStamp(0, 0, 0, 0); uEventChild2.updateCompletionStamp(0, 0, 0, 0); uEvent.updateCompletionStamp(0, 0, 0, 0); uEvent.setStatus(0); } TEST(EventsTracker, whenCalingCreateDumpStreamThenGettingValidFstreamInstance) { std::string testFileName("test_files\\EventsTracker_testfile.gv"); std::shared_ptr stream = EventsTracker::getEventsTracker().createDumpStream(testFileName); EXPECT_TRUE(stream->good()); static_cast(stream.get())->close(); remove(testFileName.c_str()); EventsTrackerMock::shutdownGlobalEvTracker(); } TEST(EventsTracker, whenDeletingEventTwoTimesThenDeletingIsProper) { UserEvent uEvent1; EventsTrackerMock evTrackerMock; std::stringstream expected; evTrackerMock.getList()->pushFrontOne(*new TrackedEvent{&uEvent1, 1}); evTrackerMock.getList()->pushFrontOne(*new TrackedEvent{&uEvent1, -2}); evTrackerMock.getList()->pushFrontOne(*new TrackedEvent{&uEvent1, -3}); evTrackerMock.dump(); expected << "digraph events_registry_" << &evTrackerMock << " {\nnode [shape=record]\n//pragma: somePragmaData\n\n}\n"; EXPECT_STREQ(expected.str().c_str(), evTrackerMock.streamMock.c_str()); } TEST(EventsTracker, givenTwoEventsWithSamePtrWhenFirstOneIsDeletedThenDumpingFirstProperly) { UserEvent uEvent; EventsTrackerMock evTrackerMock; std::stringstream expected; evTrackerMock.getList()->pushFrontOne(*new TrackedEvent{&uEvent, 2}); evTrackerMock.getList()->pushFrontOne(*new TrackedEvent{&uEvent, -1}); evTrackerMock.dump(); expected << "digraph events_registry_" << &evTrackerMock << " {\nnode [shape=record]\n//pragma: somePragmaData\ne2[label=\"{------USER_EVENT ptr=" << &uEvent << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\n\n}\n"; EXPECT_STREQ(expected.str().c_str(), evTrackerMock.streamMock.c_str()); } TEST(EventsTracker, whenNotifyCreationOfEventThenEventIsDumped) { Event event(nullptr, CL_COMMAND_USER, CompletionStamp::notReady, CompletionStamp::notReady); EventsTrackerMock evTrackerMock; std::stringstream expected; evTrackerMock.notifyCreation(&event); expected << "digraph events_registry_" << &evTrackerMock << " {\nnode [shape=record]\n//pragma: somePragmaData\n\n}\n"; EXPECT_STREQ(expected.str().c_str(), evTrackerMock.streamMock.c_str()); } TEST(EventsTracker, whenNotifyTransitionedExecutionStatusOfEventThenEventIsDumpedWithProperDescription) { UserEvent uEvent; EventsTrackerMock evTrackerMock; evTrackerMock.notifyCreation(&uEvent); evTrackerMock.notifyTransitionedExecutionStatus(); std::stringstream expected; expected << "digraph events_registry_" << &evTrackerMock << " {\nnode [shape=record]\n//pragma: somePragmaData\ne0[label=\"{------USER_EVENT ptr=" << &uEvent << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\n\n}\n"; EXPECT_STREQ(expected.str().c_str(), evTrackerMock.streamMock.c_str()); } TEST(EventsTracker, whenNotifyDestructionOfEventThenEventIsDumped) { UserEvent *uEvent = new UserEvent(); EventsTrackerMock evTrackerMock; evTrackerMock.notifyCreation(uEvent); evTrackerMock.notifyDestruction(uEvent); delete uEvent; std::stringstream stream; stream << "digraph events_registry_" << &evTrackerMock << " {\nnode [shape=record]\n//pragma: somePragmaData\n\n}\n"; EXPECT_STREQ(stream.str().c_str(), evTrackerMock.streamMock.c_str()); } TEST(EventsTracker, givenSeveralEventsWhenOneIsCompleteThenDumpingWithProperLabels) { UserEvent *uEvent1 = new UserEvent(); UserEvent *uEvent2 = new UserEvent(); UserEvent *uEvent3 = new UserEvent(); EventsTrackerMock evTrackerMock; evTrackerMock.notifyCreation(uEvent1); evTrackerMock.notifyCreation(uEvent2); evTrackerMock.notifyCreation(uEvent3); uEvent2->setStatus(CL_COMPLETE); evTrackerMock.notifyTransitionedExecutionStatus(); evTrackerMock.notifyDestruction(uEvent2); delete uEvent2; std::stringstream stream; stream << "digraph events_registry_" << &evTrackerMock << " {\nnode [shape=record]\n//pragma: somePragmaData\ne2[label=\"{------USER_EVENT ptr=" << uEvent3 << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\ne0[label=\"{------USER_EVENT ptr=" << uEvent1 << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\n\n}\n"; EXPECT_STREQ(stream.str().c_str(), evTrackerMock.streamMock.c_str()); delete uEvent1; delete uEvent3; } TEST(EventsTracker, givenEventsWithDependenciesBetweenThemThenDumpingProperGraph) { EventsTrackerMock evTrackerMock; UserEvent uEvent1; evTrackerMock.notifyCreation(&uEvent1); evTrackerMock.dump(); std::stringstream expected; expected << "digraph events_registry_" << &evTrackerMock << " {\nnode [shape=record]\n//pragma: somePragmaData\ne0[label=\"{------USER_EVENT ptr=" << &uEvent1 << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\n\n}\n"; EXPECT_STREQ(expected.str().c_str(), evTrackerMock.streamMock.c_str()); UserEvent uEvent2; evTrackerMock.notifyCreation(&uEvent2); evTrackerMock.dump(); expected.str(std::string()); expected << "digraph events_registry_" << &evTrackerMock << " {\nnode [shape=record]\n//pragma: somePragmaData\ne1[label=\"{------USER_EVENT ptr=" << &uEvent2 << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\ne0[label=\"{------USER_EVENT ptr=" << &uEvent1 << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\n\n}\n"; EXPECT_STREQ(expected.str().c_str(), evTrackerMock.streamMock.c_str()); UserEvent uEventChild1; evTrackerMock.notifyCreation(&uEventChild1); uEvent1.addChild(uEventChild1); evTrackerMock.dump(); expected.str(std::string()); expected << "digraph events_registry_" << &evTrackerMock << " {\nnode [shape=record]\n//pragma: somePragmaData\ne1[label=\"{------USER_EVENT ptr=" << &uEvent2 << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\ne0[label=\"{------USER_EVENT ptr=" << &uEvent1 << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\ne2[label=\"{------USER_EVENT ptr=" << &uEventChild1 << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\ne0->e2;\n\n}\n"; EXPECT_STREQ(expected.str().c_str(), evTrackerMock.streamMock.c_str()); UserEvent uEventChild2; evTrackerMock.notifyCreation(&uEventChild2); uEvent1.addChild(uEventChild2); evTrackerMock.dump(); expected.str(std::string()); expected << "digraph events_registry_" << &evTrackerMock << " {\nnode [shape=record]\n//pragma: somePragmaData\ne1[label=\"{------USER_EVENT ptr=" << &uEvent2 << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\ne0[label=\"{------USER_EVENT ptr=" << &uEvent1 << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\ne3[label=\"{------USER_EVENT ptr=" << &uEventChild2 << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\ne0->e3;\ne2[label=\"{------USER_EVENT ptr=" << &uEventChild1 << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\ne0->e2;\n\n}\n"; EXPECT_STREQ(expected.str().c_str(), evTrackerMock.streamMock.c_str()); uEvent2.addChild(uEvent1); evTrackerMock.dump(); expected.str(std::string()); expected << "digraph events_registry_" << &evTrackerMock << " {\nnode [shape=record]\n//pragma: somePragmaData\ne1[label=\"{------USER_EVENT ptr=" << &uEvent2 << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\ne0[label=\"{------USER_EVENT ptr=" << &uEvent1 << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\ne3[label=\"{------USER_EVENT ptr=" << &uEventChild2 << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\ne0->e3;\ne2[label=\"{------USER_EVENT ptr=" << &uEventChild1 << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\ne0->e2;\ne1->e0;\n\n}\n"; EXPECT_STREQ(expected.str().c_str(), evTrackerMock.streamMock.c_str()); uEventChild1.updateCompletionStamp(0, 0, 0, 0); uEventChild2.updateCompletionStamp(0, 0, 0, 0); uEvent2.updateCompletionStamp(0, 0, 0, 0); uEvent1.updateCompletionStamp(0, 0, 0, 0); uEvent2.setStatus(0); uEvent1.setStatus(0); } TEST(EventsTracker, whenEventsDebugEnableFlagIsTrueAndCreateOrChangeStatusOrDestroyEventThenDumpingGraph) { DebugManagerStateRestore dbRestore; DebugManager.flags.EventsTrackerEnable.set(true); EventsTrackerMock evTrackerMock; evTrackerMock.overrideGlobal(); Event *ev = new Event(nullptr, CL_COMMAND_NDRANGE_KERNEL, CompletionStamp::notReady, CompletionStamp::notReady); std::stringstream expected; expected << "digraph events_registry_" << &EventsTracker::getEventsTracker() << " {\nnode [shape=record]\n//pragma: somePragmaData\n\n}\n"; EXPECT_STREQ(expected.str().c_str(), static_cast(&EventsTracker::getEventsTracker())->streamMock.c_str()); ev->setStatus(1); expected.str(std::string()); expected << "digraph events_registry_" << &EventsTracker::getEventsTracker() << " {\nnode [shape=record]\n//pragma: somePragmaData\ne0[label=\"{-----------EVENT ptr=" << ev << "------|CL_COMMAND_NDRANGE_KERNEL|CL_RUNNING|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\n\n}\n"; EXPECT_STREQ(expected.str().c_str(), static_cast(&EventsTracker::getEventsTracker())->streamMock.c_str()); delete ev; expected.str(std::string()); expected << "digraph events_registry_" << &EventsTracker::getEventsTracker() << " {\nnode [shape=record]\n//pragma: somePragmaData\n\n}\n"; EXPECT_STREQ(expected.str().c_str(), static_cast(&EventsTracker::getEventsTracker())->streamMock.c_str()); evTrackerMock.restoreGlobal(); } TEST(EventsTracker, givenEventsFromDifferentThreadsThenDumpingProperly) { class EventsTrackerMockMT : public EventsTrackerMock { public: TrackedEvent *getNodes() override { auto TrackedEventsMock = std::shared_ptr>{new IFList}; return TrackedEventsMock->detachNodes(); } std::shared_ptr> *TrackedEventsMock; }; auto evTrackerMockMT = std::shared_ptr{new EventsTrackerMockMT()}; UserEvent uEvent1; UserEvent uEvent2; evTrackerMockMT->getList()->pushFrontOne(*new TrackedEvent{&uEvent1, 2}); evTrackerMockMT->getList()->pushFrontOne(*new TrackedEvent{&uEvent2, 3}); evTrackerMockMT->dump(); std::stringstream expected; expected << "digraph events_registry_" << evTrackerMockMT << " {\nnode [shape=record]\n//pragma: somePragmaData\n\n}\n"; EXPECT_STREQ(expected.str().c_str(), evTrackerMockMT->streamMock.c_str()); } compute-runtime-22.14.22890/opencl/test/unit_test/event/user_events_tests.cpp000066400000000000000000001141611422164147700272440ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/wait_status.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/test_macros/test_checks_shared.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "event_fixture.h" using namespace NEO; TEST(UserEvent, GivenUserEventWhenGettingEventCommandTypeThenClCommandUserIsReturned) { UserEvent uEvent; size_t retSize; cl_int retValue; retValue = clGetEventInfo(&uEvent, CL_EVENT_COMMAND_QUEUE, 0, nullptr, &retSize); ASSERT_EQ(CL_SUCCESS, retValue); EXPECT_EQ(sizeof(cl_command_queue), retSize); auto cmdQueue = reinterpret_cast(static_cast(0xdeadbeaf)); retValue = clGetEventInfo(&uEvent, CL_EVENT_COMMAND_QUEUE, retSize, &cmdQueue, 0); ASSERT_EQ(CL_SUCCESS, retValue); EXPECT_EQ(nullptr, cmdQueue); retValue = clGetEventInfo(&uEvent, CL_EVENT_COMMAND_TYPE, 0, nullptr, &retSize); ASSERT_EQ(CL_SUCCESS, retValue); EXPECT_EQ(sizeof(cl_event_info), retSize); auto cmdType = CL_COMMAND_SVM_UNMAP; clGetEventInfo(&uEvent, CL_EVENT_COMMAND_TYPE, retSize, &cmdType, 0); EXPECT_EQ(CL_COMMAND_USER, cmdType); } TEST(UserEvent, WhenGettingEventContextThenCorrectContextIsReturned) { MockContext mc; cl_context dummyContext = &mc; UserEvent uEvent(&mc); size_t retSize; cl_int retValue; retValue = clGetEventInfo(&uEvent, CL_EVENT_CONTEXT, 0, nullptr, &retSize); ASSERT_EQ(CL_SUCCESS, retValue); EXPECT_EQ(sizeof(cl_context), retSize); cl_context context; retValue = clGetEventInfo(&uEvent, CL_EVENT_CONTEXT, retSize, &context, 0); ASSERT_EQ(CL_SUCCESS, retValue); ASSERT_EQ(context, dummyContext); } TEST(UserEvent, GivenInitialStatusOfUserEventWhenGettingEventContextThenNullIsReturned) { UserEvent uEvent; cl_context context; auto retValue = clGetEventInfo(&uEvent, CL_EVENT_CONTEXT, sizeof(cl_context), &context, 0); ASSERT_EQ(CL_SUCCESS, retValue); ASSERT_EQ(context, nullptr); } TEST(UserEvent, GivenInitialStatusOfUserEventWhenGettingCommandExecutionStatusThenClSubmittedIsReturned) { UserEvent uEvent; size_t retSize; cl_int retValue; retValue = clGetEventInfo(&uEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, 0, nullptr, &retSize); ASSERT_EQ(CL_SUCCESS, retValue); EXPECT_EQ(sizeof(cl_int), retSize); auto cmdStatus = CL_COMPLETE; retValue = clGetEventInfo(&uEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, retSize, &cmdStatus, 0); ASSERT_EQ(CL_SUCCESS, retValue); EXPECT_EQ(CL_SUBMITTED, cmdStatus); } TEST(UserEvent, givenUserEventWhenItIsQueriedForExecutionStatusThenClQueueIsReturned) { UserEvent uEvent; EXPECT_EQ(CL_QUEUED, uEvent.peekExecutionStatus()); } TEST(UserEvent, givenUserEventWhenItIsCreatedThenItIsInInitialState) { UserEvent uEvent; EXPECT_TRUE(uEvent.isInitialEventStatus()); } TEST(UserEvent, givenUserEventWhenItIsCreatedAndSetThenItIsNotInInitialState) { UserEvent uEvent; uEvent.setStatus(CL_COMPLETE); EXPECT_FALSE(uEvent.isInitialEventStatus()); } TEST(UserEvent, GivenUserEventWhenGettingEventReferenceCountThenOneIsReturned) { UserEvent uEvent; size_t retSize; cl_int retValue; retValue = clGetEventInfo(&uEvent, CL_EVENT_REFERENCE_COUNT, 0, nullptr, &retSize); ASSERT_EQ(CL_SUCCESS, retValue); EXPECT_EQ(sizeof(cl_uint), retSize); auto refCount = 100; retValue = clGetEventInfo(&uEvent, CL_EVENT_REFERENCE_COUNT, retSize, &refCount, 0); ASSERT_EQ(CL_SUCCESS, retValue); EXPECT_EQ(1, refCount); } TEST(UserEvent, GivenSetCompleteStatusWhenGettingEventCommandExecutionStatusThenClCompleteIsReturned) { UserEvent uEvent; uEvent.setStatus(CL_COMPLETE); size_t retSize; cl_int retValue; retValue = clGetEventInfo(&uEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, 0, nullptr, &retSize); ASSERT_EQ(CL_SUCCESS, retValue); EXPECT_EQ(sizeof(cl_int), retSize); auto cmdStatus = CL_COMPLETE; retValue = clGetEventInfo(&uEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, retSize, &cmdStatus, 0); ASSERT_EQ(CL_SUCCESS, retValue); EXPECT_EQ(CL_COMPLETE, cmdStatus); } TEST(UserEvent, GivenInitialUserEventWhenGettingCommandsThenNullIsReturned) { UserEvent uEvent; EXPECT_EQ(nullptr, uEvent.peekCommand()); } TEST(UserEvent, GivenInitialUserEventStateWhenCheckingReadyForSubmissionThenFalseIsReturned) { UserEvent uEvent; EXPECT_FALSE(uEvent.isReadyForSubmission()); } TEST(UserEvent, GivenUserEventWhenGettingTaskLevelThenZeroIsReturned) { MyUserEvent uEvent; EXPECT_EQ(0U, uEvent.getTaskLevel()); EXPECT_EQ(WaitStatus::NotReady, uEvent.wait(false, false)); } TEST(UserEvent, WhenSettingStatusThenReadyForSubmissionisTrue) { UserEvent uEvent; uEvent.setStatus(0); EXPECT_TRUE(uEvent.isReadyForSubmission()); } TEST(UserEvent, givenUserEventWhenStatusIsCompletedThenReturnZeroTaskLevel) { UserEvent uEvent; uEvent.setStatus(CL_QUEUED); EXPECT_EQ(CompletionStamp::notReady, uEvent.getTaskLevel()); uEvent.setStatus(CL_SUBMITTED); EXPECT_EQ(CompletionStamp::notReady, uEvent.getTaskLevel()); uEvent.setStatus(CL_RUNNING); EXPECT_EQ(CompletionStamp::notReady, uEvent.getTaskLevel()); uEvent.setStatus(CL_COMPLETE); EXPECT_EQ(0u, uEvent.getTaskLevel()); } typedef HelloWorldTest EventTests; TEST_F(MockEventTests, GivenBlockedUserEventWhenEnqueueingNdRangeWithoutReturnEventThenDoNotSubmitToCsr) { uEvent = make_releaseable(); cl_event userEvent = uEvent.get(); cl_event *eventWaitList = &userEvent; auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto taskCount = csr.peekTaskCount(); //call NDR auto retVal = callOneWorkItemNDRKernel(eventWaitList, 1); auto taskCountAfter = csr.peekTaskCount(); //queue should be in blocked state at this moment, task level should be inherited from user event EXPECT_EQ(CompletionStamp::notReady, pCmdQ->taskLevel); //queue should be in blocked state at this moment, task count should be inherited from user event EXPECT_EQ(CompletionStamp::notReady, pCmdQ->taskCount); //queue should be in blocked state EXPECT_EQ(pCmdQ->isQueueBlocked(), true); //and virtual event should be created ASSERT_NE(nullptr, pCmdQ->virtualEvent); //check if kernel was in fact not submitted EXPECT_EQ(taskCountAfter, taskCount); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(MockEventTests, GivenBlockedUserEventWhenEnqueueingNdRangeWithReturnEventThenDoNotSubmitToCsr) { uEvent = make_releaseable(); cl_event userEvent = uEvent.get(); cl_event retEvent = nullptr; cl_event *eventWaitList = &userEvent; auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto taskCount = csr.peekTaskCount(); //call NDR auto retVal = callOneWorkItemNDRKernel(eventWaitList, 1, &retEvent); auto taskCountAfter = csr.peekTaskCount(); //queue should be in blocked state at this moment, task level should be inherited from user event EXPECT_EQ(CompletionStamp::notReady, pCmdQ->taskLevel); //queue should be in blocked state at this moment, task count should be inherited from user event EXPECT_EQ(CompletionStamp::notReady, pCmdQ->taskCount); //queue should be in blocked state EXPECT_EQ(pCmdQ->isQueueBlocked(), true); //and virtual event should be created ASSERT_NE(nullptr, pCmdQ->virtualEvent); //that matches the retEvent EXPECT_EQ(retEvent, pCmdQ->virtualEvent); //check if kernel was in fact not submitted EXPECT_EQ(taskCountAfter, taskCount); //and if normal event inherited status from user event Event *returnEvent = castToObject(retEvent); EXPECT_EQ(returnEvent->taskLevel, CompletionStamp::notReady); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseEvent(retEvent); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(MockEventTests, WhenAddingChildEventThenConnectionIsCreatedAndCountOnReturnEventIsInjected) { uEvent = make_releaseable(); cl_event userEvent = uEvent.get(); cl_event retEvent = nullptr; cl_event *eventWaitList = &userEvent; //call NDR callOneWorkItemNDRKernel(eventWaitList, 1, &retEvent); //check if dependency count is increased Event *returnEvent = castToObject(retEvent); EXPECT_EQ(1U, returnEvent->peekNumEventsBlockingThis()); //check if user event knows his childs EXPECT_TRUE(uEvent->peekHasChildEvents()); //make sure that proper event is set as child Event *childEvent = pCmdQ->virtualEvent; EXPECT_EQ(childEvent, uEvent->peekChildEvents()->ref); auto retVal = clReleaseEvent(retEvent); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EventTests, givenNormalEventThatHasParentUserEventWhenUserEventIsUnblockedThenChildEventIsCompleteIfGpuCompletedProcessing) { UserEvent uEvent; Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 0, 0); uEvent.addChild(event); EXPECT_FALSE(event.updateStatusAndCheckCompletion()); EXPECT_EQ(CL_QUEUED, event.peekExecutionStatus()); uEvent.setStatus(CL_COMPLETE); EXPECT_EQ(CL_COMPLETE, event.peekExecutionStatus()); } TEST_F(MockEventTests, WhenAddingTwoChildEventsThenConnectionIsCreatedAndCountOnReturnEventIsInjected) { uEvent = make_releaseable(); auto uEvent2 = make_releaseable(); cl_event retEvent = nullptr; cl_event eventWaitList[] = {uEvent.get(), uEvent2.get()}; int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); //call NDR callOneWorkItemNDRKernel(eventWaitList, sizeOfWaitList, &retEvent); //check if dependency count is increased Event *returnEvent = castToObject(retEvent); ASSERT_EQ(2U, returnEvent->peekNumEventsBlockingThis()); //check if user event knows his childs EXPECT_TRUE(uEvent->peekHasChildEvents()); //check if user event knows his childs EXPECT_TRUE(uEvent2->peekHasChildEvents()); //make sure that proper event is set as child Event *childEvent = pCmdQ->virtualEvent; EXPECT_EQ(childEvent, uEvent->peekChildEvents()->ref); EXPECT_FALSE(childEvent->isReadyForSubmission()); //make sure that proper event is set as child EXPECT_EQ(childEvent, uEvent2->peekChildEvents()->ref); //signal one user event, child event after this operation isn't ready for submission uEvent->setStatus(0); //check if user event knows his children EXPECT_FALSE(uEvent->peekHasChildEvents()); EXPECT_EQ(1U, returnEvent->peekNumEventsBlockingThis()); EXPECT_FALSE(returnEvent->isReadyForSubmission()); auto retVal = clReleaseEvent(retEvent); EXPECT_EQ(CL_SUCCESS, retVal); uEvent2->setStatus(-1); } TEST_F(MockEventTests, GivenTwoUserEvenstWhenCountOnNdr1IsInjectedThenItIsPropagatedToNdr2viaVirtualEvent) { uEvent = make_releaseable(context); auto uEvent2 = make_releaseable(context); cl_event eventWaitList[] = {uEvent.get(), uEvent2.get()}; int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); //call NDR, no return Event auto retVal = callOneWorkItemNDRKernel(eventWaitList, sizeOfWaitList, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); //check if dependency count is increased Event *returnEvent1 = castToObject(pCmdQ->virtualEvent); ASSERT_EQ(2U, returnEvent1->peekNumEventsBlockingThis()); //check if user event knows his childs EXPECT_TRUE(uEvent->peekHasChildEvents()); //check if user event knows his childs EXPECT_TRUE(uEvent2->peekHasChildEvents()); //make sure that proper event is set as child Event *childEvent = pCmdQ->virtualEvent; EXPECT_EQ(childEvent, uEvent->peekChildEvents()->ref); //make sure that proper event is set as child EXPECT_EQ(childEvent, uEvent2->peekChildEvents()->ref); //call NDR, no events, Virtual Event mustn't leak and will be bind to previous Virtual Event retVal = callOneWorkItemNDRKernel(); EXPECT_EQ(CL_SUCCESS, retVal); //queue must be in blocked state EXPECT_EQ(pCmdQ->isQueueBlocked(), true); //check if virtual event2 is a child of virtual event 1 VirtualEvent *returnEvent2 = castToObject(pCmdQ->virtualEvent); ASSERT_TRUE(returnEvent1->peekHasChildEvents()); EXPECT_EQ(returnEvent2, returnEvent1->peekChildEvents()->ref); //now signal both parents and see if all childs are notified uEvent->setStatus(CL_COMPLETE); uEvent2->setStatus(CL_COMPLETE); //queue shoud be in unblocked state EXPECT_EQ(pCmdQ->isQueueBlocked(), false); //finish returns immidieatly retVal = clFinish(pCmdQ); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EventTests, givenQueueThatIsBlockedByUserEventWhenIsQueueBlockedIsCalledThenVirtualEventOnlyQueriesForExecutionStatus) { struct mockEvent : public Event { using Event::Event; void updateExecutionStatus() override { updateExecutionStatusCalled = true; } bool updateExecutionStatusCalled = false; }; mockEvent mockedVirtualEvent(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, CompletionStamp::notReady, 0); pCmdQ->virtualEvent = &mockedVirtualEvent; EXPECT_TRUE(pCmdQ->isQueueBlocked()); EXPECT_FALSE(mockedVirtualEvent.updateExecutionStatusCalled); pCmdQ->virtualEvent = nullptr; } TEST_F(MockEventTests, GivenUserEventSignalingWhenFinishThenExecutionIsNotBlocked) { uEvent = make_releaseable(context); auto uEvent2 = make_releaseable(context); cl_event eventWaitList[] = {uEvent.get(), uEvent2.get()}; int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); //call NDR, no return Event auto retVal = callOneWorkItemNDRKernel(eventWaitList, sizeOfWaitList, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); uEvent->setStatus(0); uEvent2->setStatus(0); retVal = clFinish(pCmdQ); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(MockEventTests, WhenCompletingUserEventThenStatusPropagatedToNormalEvent) { uEvent = make_releaseable(); cl_event retEvent = nullptr; cl_event eventWaitList[] = {uEvent.get()}; int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); //call NDR callOneWorkItemNDRKernel(eventWaitList, sizeOfWaitList, &retEvent); //set user event status uEvent->setStatus(CL_COMPLETE); //wait for returned event auto retVal = clWaitForEvents(1, &retEvent); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseEvent(retEvent); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(EventTests, WhenSignalingThenUserEventObtainsProperTaskLevel) { UserEvent uEvent(context); auto &csr = pDevice->getUltCommandStreamReceiver(); auto taskLevel = csr.peekTaskLevel(); csr.taskCount = 3; uEvent.setStatus(CL_COMPLETE); EXPECT_EQ(taskLevel, uEvent.taskLevel); csr.taskLevel = 2; csr.taskCount = 5; uEvent.setStatus(CL_COMPLETE); //even though csr taskLevel has changed, user event taskLevel should remain constant EXPECT_EQ(0u, uEvent.taskLevel); } TEST_F(MockEventTests, GivenUserEventWhenSettingStatusCompleteThenTaskLevelIsUpdatedCorrectly) { uEvent = make_releaseable(context); auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto taskLevel = csr.peekTaskLevel(); cl_event retEvent = nullptr; cl_event eventWaitList[] = {uEvent.get()}; int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); //call NDR retVal = callOneWorkItemNDRKernel(eventWaitList, sizeOfWaitList, &retEvent); EXPECT_EQ(CL_SUCCESS, retVal); //check if dependency count is increased Event *returnEvent = castToObject(retEvent); EXPECT_EQ(CompletionStamp::notReady, returnEvent->taskLevel); EXPECT_EQ(CompletionStamp::notReady, returnEvent->peekTaskCount()); //now set user event for complete status, this triggers update of childs. uEvent->setStatus(CL_COMPLETE); //child event should have the same taskLevel as parentEvent, as parent event is top of the tree and doesn't have any commands. EXPECT_EQ(returnEvent->taskLevel, taskLevel); EXPECT_EQ(csr.peekTaskCount(), returnEvent->peekTaskCount()); retVal = clReleaseEvent(retEvent); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(MockEventTests, GivenCompleteParentWhenWaitingForEventsThenChildrenAreComplete) { uEvent = make_releaseable(context); cl_event retEvent = nullptr; cl_event eventWaitList[] = {uEvent.get()}; int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); //call NDR retVal = callOneWorkItemNDRKernel(eventWaitList, sizeOfWaitList, &retEvent); EXPECT_EQ(CL_SUCCESS, retVal); //check if dependency count is increased Event *returnEvent = castToObject(retEvent); EXPECT_EQ(CompletionStamp::notReady, returnEvent->taskLevel); //now set user event for complete status, this triggers update of childs. uEvent->setStatus(CL_COMPLETE); retVal = clWaitForEvents(1, &retEvent); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseEvent(retEvent); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EventTests, WhenStatusIsAbortedWhenWaitingForEventsThenErrorIsReturned) { UserEvent uEvent(context); cl_event eventWaitList[] = {&uEvent}; int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); //negative values indicate abortion uEvent.setStatus(-1); retVal = clWaitForEvents(sizeOfWaitList, eventWaitList); EXPECT_EQ(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST, retVal); } TEST_F(MockEventTests, GivenAbortedUserEventWhenEnqueingNdrThenDoNotFlushToCsr) { uEvent = make_releaseable(context); cl_event eventWaitList[] = {uEvent.get()}; int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); cl_event retEvent = nullptr; auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto taskCount = csr.peekTaskCount(); //call NDR retVal = callOneWorkItemNDRKernel(eventWaitList, sizeOfWaitList, &retEvent); EXPECT_EQ(CL_SUCCESS, retVal); //negative values indicate abortion uEvent->setStatus(-1); auto taskCountAfter = csr.peekTaskCount(); EXPECT_EQ(taskCount, taskCountAfter); Event *pChildEvent = (Event *)retEvent; EXPECT_EQ(CompletionStamp::notReady, pChildEvent->getTaskLevel()); cl_int eventStatus = 0; retVal = clGetEventInfo(retEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &eventStatus, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(-1, eventStatus); retVal = clReleaseEvent(retEvent); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(MockEventTests, givenDebugVariableWhenStatusIsQueriedThenNoFlushHappens) { DebugManagerStateRestore restorer; DebugManager.flags.SkipFlushingEventsOnGetStatusCalls.set(1); DebugManager.flags.PerformImplicitFlushForNewResource.set(0); DebugManager.flags.PerformImplicitFlushForIdleGpu.set(0); auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); csr.overrideDispatchPolicy(DispatchMode::BatchedDispatch); csr.postInitFlagsSetup(); cl_event retEvent = nullptr; auto latestFlushed = csr.peekLatestFlushedTaskCount(); retVal = callOneWorkItemNDRKernel(nullptr, 0u, &retEvent); EXPECT_EQ(CL_SUCCESS, retVal); cl_int eventStatus = 0; retVal = clGetEventInfo(retEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &eventStatus, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(csr.peekLatestFlushedTaskCount(), latestFlushed); retVal = clReleaseEvent(retEvent); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(MockEventTests, GivenAbortedParentWhenDestroyingChildEventThenDoNotProcessBlockedCommands) { uEvent = make_releaseable(context); cl_event eventWaitList[] = {uEvent.get()}; int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); cl_event retEvent = nullptr; auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto taskCount = csr.peekTaskCount(); //call NDR retVal = callOneWorkItemNDRKernel(eventWaitList, sizeOfWaitList, &retEvent); EXPECT_EQ(CL_SUCCESS, retVal); //call second NDR to create Virtual Event retVal = callOneWorkItemNDRKernel(&retEvent, 1, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); //negative values indicate abortion uEvent->setStatus(-1); auto taskCountAfter = csr.peekTaskCount(); EXPECT_EQ(taskCount, taskCountAfter); Event *pChildEvent = (Event *)retEvent; EXPECT_EQ(CompletionStamp::notReady, pChildEvent->taskLevel); cl_int eventStatus = 0; retVal = clGetEventInfo(retEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &eventStatus, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(-1, eventStatus); retVal = clReleaseEvent(retEvent); EXPECT_EQ(CL_SUCCESS, retVal); taskCountAfter = csr.peekTaskCount(); EXPECT_EQ(taskCount, taskCountAfter); } TEST_F(MockEventTests, GivenAbortedUserEventWhenWaitingForEventThenErrorIsReturned) { uEvent = make_releaseable(context); cl_event eventWaitList[] = {uEvent.get()}; int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); cl_event retEvent = nullptr; //call NDR retVal = callOneWorkItemNDRKernel(eventWaitList, sizeOfWaitList, &retEvent); EXPECT_EQ(CL_SUCCESS, retVal); //negative values indicate abortion uEvent->setStatus(-1); eventWaitList[0] = retEvent; retVal = clWaitForEvents(sizeOfWaitList, eventWaitList); EXPECT_EQ(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST, retVal); retVal = clReleaseEvent(retEvent); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(MockEventTests, GivenAbortedUserEventAndTwoInputsWhenWaitingForEventThenErrorIsReturned) { uEvent = make_releaseable(context); auto uEvent2 = make_releaseable(context); cl_event eventWaitList[] = {uEvent.get(), uEvent2.get()}; int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); cl_event retEvent = nullptr; //call NDR retVal = callOneWorkItemNDRKernel(eventWaitList, sizeOfWaitList, &retEvent); EXPECT_EQ(CL_SUCCESS, retVal); //negative values indicate abortion uEvent->setStatus(-1); eventWaitList[0] = retEvent; retVal = clWaitForEvents(sizeOfWaitList, eventWaitList); EXPECT_EQ(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST, retVal); uEvent2->setStatus(-1); retVal = clReleaseEvent(retEvent); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(MockEventTests, GivenAbortedQueueWhenFinishingThenSuccessIsReturned) { uEvent = make_releaseable(context); auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto taskLevel = csr.peekTaskLevel(); cl_event eventWaitList[] = {uEvent.get()}; int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); //call NDR retVal = callOneWorkItemNDRKernel(eventWaitList, sizeOfWaitList); EXPECT_EQ(CL_SUCCESS, retVal); //negative values indicate abortion uEvent->setStatus(-1); //make sure we didn't asked CSR for task level for this event, as it is aborted EXPECT_NE(taskLevel, uEvent->taskLevel); retVal = clFinish(pCmdQ); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(MockEventTests, GivenUserEventWhenEnqueingThenDependantPacketIsRegistered) { uEvent = make_releaseable(context); cl_event eventWaitList[] = {uEvent.get()}; int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); //call NDR retVal = callOneWorkItemNDRKernel(eventWaitList, sizeOfWaitList); //virtual event should register for this command packet ASSERT_NE(nullptr, pCmdQ->virtualEvent); EXPECT_NE(nullptr, pCmdQ->virtualEvent->peekCommand()); EXPECT_FALSE(pCmdQ->virtualEvent->peekIsCmdSubmitted()); } TEST_F(MockEventTests, GivenUserEventWhenEnqueingThenCommandPacketContainsValidCommandStream) { uEvent = make_releaseable(context); cl_event eventWaitList[] = {uEvent.get()}; int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); //call NDR retVal = callOneWorkItemNDRKernel(eventWaitList, sizeOfWaitList); //virtual event should register for this command packet ASSERT_NE(nullptr, pCmdQ->virtualEvent); auto cmd = static_cast(pCmdQ->virtualEvent->peekCommand()); EXPECT_NE(0u, cmd->getCommandStream()->getUsed()); } TEST_F(MockEventTests, WhenStatusIsSetThenBlockedPacketsAreSent) { uEvent = make_releaseable(context); cl_event eventWaitList[] = {uEvent.get()}; auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); //call NDR retVal = callOneWorkItemNDRKernel(eventWaitList, sizeOfWaitList); EXPECT_EQ(CL_SUCCESS, retVal); //task level untouched as queue blocked by user event EXPECT_EQ(csr.peekTaskLevel(), 0u); //virtual event have stored command packet Event *childEvent = pCmdQ->virtualEvent; EXPECT_NE(nullptr, childEvent); EXPECT_NE(nullptr, childEvent->peekCommand()); EXPECT_FALSE(childEvent->isReadyForSubmission()); EXPECT_NE(nullptr, childEvent->peekCommand()); //signal the input user event uEvent->setStatus(0); EXPECT_EQ(csr.peekTaskLevel(), 1u); } TEST_F(MockEventTests, WhenFinishingThenVirtualEventIsNullAndReleaseEventReturnsSuccess) { uEvent = make_releaseable(context); cl_event eventWaitList[] = {uEvent.get()}; int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); cl_event retEvent; //call NDR retVal = callOneWorkItemNDRKernel(eventWaitList, sizeOfWaitList, &retEvent); EXPECT_EQ(CL_SUCCESS, retVal); uEvent->setStatus(0); //call finish multiple times retVal |= clFinish(pCmdQ); retVal |= clFinish(pCmdQ); retVal |= clFinish(pCmdQ); EXPECT_EQ(CL_SUCCESS, retVal); //Virtual Event is gone, but retEvent still lives. EXPECT_EQ(nullptr, pCmdQ->virtualEvent); retVal = clReleaseEvent(retEvent); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(MockEventTests, givenBlockedQueueThenCommandStreamDoesNotChangeWhileEnqueueAndAfterSignaling) { uEvent = make_releaseable(context); cl_event eventWaitList[] = {uEvent.get()}; int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); cl_event retEvent; auto &cs = pCmdQ->getCS(1024); auto used = cs.getSpace(0); //call NDR retVal = callOneWorkItemNDRKernel(eventWaitList, sizeOfWaitList, &retEvent); EXPECT_EQ(CL_SUCCESS, retVal); auto used2 = cs.getSpace(0); EXPECT_EQ(used2, used); uEvent->setStatus(CL_COMPLETE); auto used3 = cs.getSpace(0); //call finish multiple times retVal |= clFinish(pCmdQ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(used3, used); retVal = clReleaseEvent(retEvent); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EventTests, givenUserEventThatHasCallbackAndBlockQueueWhenQueueIsQueriedForBlockedThenCallBackIsCalled) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); struct EV : UserEvent { EV(Context *ctx) : UserEvent(ctx) { } void updateExecutionStatus() override { updated++; } int updated = 0; }; auto event1 = MockEventBuilder::createAndFinalize(&pCmdQ->getContext()); struct E2Clb { static void CL_CALLBACK SignalEv2(cl_event e, cl_int status, void *data) { bool *called = (bool *)data; *called = true; } }; cl_event eventWaitList[] = {event1}; int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); cl_event retEvent; //call NDR retVal = callOneWorkItemNDRKernel(eventWaitList, sizeOfWaitList, &retEvent); ASSERT_EQ(retVal, CL_SUCCESS); bool callbackCalled = false; retVal = clSetEventCallback(event1, CL_COMPLETE, E2Clb::SignalEv2, &callbackCalled); ASSERT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(1, event1->updated); EXPECT_TRUE(pCmdQ->isQueueBlocked()); event1->setStatus(CL_COMPLETE); // Must wait for event that depend on callback event to ensure callback is called. Event::waitForEvents(1, &retEvent); EXPECT_TRUE(callbackCalled); clReleaseEvent(retEvent); event1->release(); } TEST_F(EventTests, GivenEventCallbackWithWaitWhenWaitingForEventsThenSuccessIsReturned) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); UserEvent event1; struct E2Clb { static void CL_CALLBACK SignalEv2(cl_event e, cl_int status, void *data) { UserEvent *event2 = static_cast(data); event2->setStatus(CL_COMPLETE); } }; cl_event retEvent; //call NDR retVal = callOneWorkItemNDRKernel(nullptr, 0, &retEvent); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clWaitForEvents(1, &retEvent); EXPECT_EQ(CL_SUCCESS, retVal); clSetEventCallback(retEvent, CL_COMPLETE, E2Clb::SignalEv2, &event1); cl_event events[] = {&event1}; auto result = UserEvent::waitForEvents(sizeof(events) / sizeof(events[0]), events); EXPECT_EQ(result, CL_SUCCESS); retVal = clReleaseEvent(retEvent); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EventTests, GivenEventCallbackWithoutWaitWhenWaitingForEventsThenSuccessIsReturned) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); UserEvent event1(context); struct E2Clb { static void CL_CALLBACK SignalEv2(cl_event e, cl_int status, void *data) { UserEvent *event2 = static_cast(data); event2->setStatus(CL_COMPLETE); } }; cl_event retEvent; //call NDR retVal = callOneWorkItemNDRKernel(nullptr, 0, &retEvent); EXPECT_EQ(CL_SUCCESS, retVal); clSetEventCallback(retEvent, CL_COMPLETE, E2Clb::SignalEv2, &event1); cl_event events[] = {&event1}; auto result = UserEvent::waitForEvents(sizeof(events) / sizeof(events[0]), events); EXPECT_EQ(result, CL_SUCCESS); retVal = clReleaseEvent(retEvent); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(MockEventTests, GivenEnqueueReadImageWhenWaitingforEventThenSuccessIsReturned) { REQUIRE_IMAGES_OR_SKIP(defaultHwInfo); cl_event retEvent; uEvent = make_releaseable(context); cl_event eventWaitList[] = {uEvent.get()}; auto image = clUniquePtr(Image2dHelper<>::create(this->context)); ASSERT_NE(nullptr, image); auto retVal = EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ, image.get(), false, EnqueueReadImageTraits::origin, EnqueueReadImageTraits::region, EnqueueReadImageTraits::rowPitch, EnqueueReadImageTraits::slicePitch, EnqueueReadImageTraits::hostPtr, EnqueueReadImageTraits::mapAllocation, 1, eventWaitList, &retEvent); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetUserEventStatus(uEvent.get(), CL_COMPLETE); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clWaitForEvents(1, &retEvent); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseEvent(retEvent); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EventTests, WhenWaitingForEventsThenTemporaryAllocationsAreDestroyed) { auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto memoryManager = pCmdQ->getDevice().getMemoryManager(); EXPECT_TRUE(csr.getTemporaryAllocations().peekIsEmpty()); GraphicsAllocation *temporaryAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr.getRootDeviceIndex(), MemoryConstants::pageSize}); csr.getInternalAllocationStorage()->storeAllocation(std::unique_ptr(temporaryAllocation), TEMPORARY_ALLOCATION); EXPECT_EQ(temporaryAllocation, csr.getTemporaryAllocations().peekHead()); temporaryAllocation->updateTaskCount(10, csr.getOsContext().getContextId()); Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 3, 11); cl_event eventWaitList[] = {&event}; event.waitForEvents(1, eventWaitList); EXPECT_TRUE(csr.getTemporaryAllocations().peekIsEmpty()); } TEST_F(EventTest, WhenUserEventIsCreatedThenWaitIsNonBlocking) { UserEvent event; auto result = event.wait(false, false); EXPECT_EQ(WaitStatus::NotReady, result); } TEST_F(EventTest, GivenSingleUserEventWhenWaitingForEventsThenSuccessIsReturned) { UserEvent event1; event1.setStatus(CL_COMPLETE); cl_event events[] = {&event1}; auto result = UserEvent::waitForEvents(sizeof(events) / sizeof(events[0]), events); EXPECT_EQ(result, CL_SUCCESS); } TEST_F(EventTest, GivenMultipleOutOfOrderCallbacksWhenWaitingForEventsThenSuccessIsReturned) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); UserEvent event1; struct E2Clb { static void CL_CALLBACK SignalEv2(cl_event e, cl_int status, void *data) { UserEvent *event2 = static_cast(data); event2->setStatus(CL_COMPLETE); } }; UserEvent event2; event2.addCallback(E2Clb::SignalEv2, CL_COMPLETE, &event1); event2.setStatus(CL_COMPLETE); cl_event events[] = {&event1, &event2}; auto result = UserEvent::waitForEvents(sizeof(events) / sizeof(events[0]), events); EXPECT_EQ(result, CL_SUCCESS); } TEST_F(EventTests, WhenCalbackWasRegisteredOnCallbackThenExecutionPassesCorrectExecutionStatus) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); struct HelperClb { static void CL_CALLBACK SetClbStatus(cl_event e, cl_int status, void *data) { cl_int *ret = static_cast(data); *ret = status; } }; cl_event retEvent; retVal = callOneWorkItemNDRKernel(nullptr, 0, &retEvent); ASSERT_EQ(CL_SUCCESS, retVal); cl_int submittedClbExecStatus = -1; cl_int runningClbExecStatus = -1; cl_int completeClbExecStatus = -1; retVal = clSetEventCallback(retEvent, CL_SUBMITTED, HelperClb::SetClbStatus, &submittedClbExecStatus); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clSetEventCallback(retEvent, CL_RUNNING, HelperClb::SetClbStatus, &runningClbExecStatus); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clSetEventCallback(retEvent, CL_COMPLETE, HelperClb::SetClbStatus, &completeClbExecStatus); ASSERT_EQ(CL_SUCCESS, retVal); auto result = UserEvent::waitForEvents(1, &retEvent); ASSERT_EQ(result, CL_SUCCESS); EXPECT_EQ(CL_SUBMITTED, submittedClbExecStatus); EXPECT_EQ(CL_RUNNING, runningClbExecStatus); EXPECT_EQ(CL_COMPLETE, completeClbExecStatus); clReleaseEvent(retEvent); } TEST_F(EventTests, GivenMultipleEventsWhenEventsAreCompletedThenCorrectNumberOfBlockingEventsIsReported) { UserEvent uEvent1(context); UserEvent uEvent2(context); UserEvent uEvent3(context); EXPECT_EQ(0U, uEvent1.peekNumEventsBlockingThis()); EXPECT_EQ(0U, uEvent2.peekNumEventsBlockingThis()); EXPECT_EQ(0U, uEvent3.peekNumEventsBlockingThis()); cl_event eventWaitList[] = {&uEvent1, &uEvent2, &uEvent3}; int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); cl_event retClEvent; retVal = callOneWorkItemNDRKernel(eventWaitList, sizeOfWaitList, &retClEvent); Event *retEvent = (Event *)retClEvent; ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, retEvent); EXPECT_EQ(3U, retEvent->peekNumEventsBlockingThis()); retVal = clSetUserEventStatus(&uEvent1, CL_COMPLETE); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2U, retEvent->peekNumEventsBlockingThis()); retVal = clSetUserEventStatus(&uEvent2, CL_COMPLETE); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1U, retEvent->peekNumEventsBlockingThis()); retVal = clSetUserEventStatus(&uEvent3, CL_COMPLETE); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0U, retEvent->peekNumEventsBlockingThis()); retVal |= clFinish(pCmdQ); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseEvent(retEvent); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EventTests, WhenPassingBlockedUserEventToEnqueueNdRangeThenCommandQueueIsNotRetained) { auto userEvent = clCreateUserEvent(pContext, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); auto uEvent = (UserEvent *)userEvent; ASSERT_NE(nullptr, uEvent); auto cmdQueue = uEvent->getCommandQueue(); ASSERT_EQ(nullptr, cmdQueue); auto intitialRefCount = pCmdQ->getRefInternalCount(); auto retVal = callOneWorkItemNDRKernel(&userEvent, 1); ASSERT_EQ(CL_SUCCESS, retVal); cmdQueue = uEvent->getCommandQueue(); ASSERT_EQ(nullptr, cmdQueue); // Virtual event add refference to cmq queue. EXPECT_EQ(intitialRefCount + 1, pCmdQ->getRefInternalCount()); retVal = clSetUserEventStatus(userEvent, CL_COMPLETE); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clReleaseEvent(userEvent); ASSERT_EQ(CL_SUCCESS, retVal); pCmdQ->isQueueBlocked(); // VirtualEvent should be freed, so refCount should equal initial value EXPECT_EQ(intitialRefCount, pCmdQ->getRefInternalCount()); } TEST_F(EventTests, givenUserEventWhenSetStatusIsDoneThenDeviceMutextisAcquired) { struct mockedEvent : public UserEvent { using UserEvent::UserEvent; bool setStatus(cl_int status) override { auto commandStreamReceiverOwnership = ctx->getDevice(0)->getDefaultEngine().commandStreamReceiver->obtainUniqueOwnership(); mutexProperlyAcquired = commandStreamReceiverOwnership.owns_lock(); return true; } bool mutexProperlyAcquired = false; }; mockedEvent mockEvent(this->context); clSetUserEventStatus(&mockEvent, CL_COMPLETE); EXPECT_TRUE(mockEvent.mutexProperlyAcquired); } compute-runtime-22.14.22890/opencl/test/unit_test/execution_environment/000077500000000000000000000000001422164147700262565ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/execution_environment/CMakeLists.txt000066400000000000000000000004771422164147700310260ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_execution_environment ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/execution_environment_tests.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_execution_environment}) execution_environment_tests.cpp000066400000000000000000000426341422164147700345650ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/execution_environment/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/ail/ail_configuration.h" #include "shared/source/aub/aub_center.h" #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/device/device.h" #include "shared/source/direct_submission/direct_submission_controller.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/memory_manager/os_agnostic_memory_manager.h" #include "shared/source/os_interface/device_factory.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/source_level_debugger/source_level_debugger.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/mocks/mock_memory_operations_handler.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/utilities/destructor_counted.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/test/unit_test/mocks/mock_async_event_handler.h" #include "opencl/test/unit_test/mocks/mock_cl_execution_environment.h" #include "opencl/test/unit_test/mocks/mock_platform.h" using namespace NEO; TEST(ExecutionEnvironment, givenDefaultConstructorWhenItIsCalledThenExecutionEnvironmentHasInitialRefCountZero) { ExecutionEnvironment environment; EXPECT_EQ(0, environment.getRefInternalCount()); EXPECT_EQ(0, environment.getRefApiCount()); } TEST(ExecutionEnvironment, givenPlatformWhenItIsConstructedThenItCretesExecutionEnvironmentWithOneRefCountInternal) { auto executionEnvironment = new ExecutionEnvironment(); EXPECT_EQ(0, executionEnvironment->getRefInternalCount()); std::unique_ptr platform(new Platform(*executionEnvironment)); EXPECT_EQ(executionEnvironment, platform->peekExecutionEnvironment()); EXPECT_EQ(1, executionEnvironment->getRefInternalCount()); } TEST(ExecutionEnvironment, givenPlatformAndExecutionEnvironmentWithRefCountsWhenPlatformIsDestroyedThenExecutionEnvironmentIsNotDeleted) { auto executionEnvironment = new ExecutionEnvironment(); std::unique_ptr platform(new Platform(*executionEnvironment)); executionEnvironment->incRefInternal(); platform.reset(); EXPECT_EQ(1, executionEnvironment->getRefInternalCount()); executionEnvironment->decRefInternal(); } TEST(ExecutionEnvironment, WhenCreatingDevicesThenThoseDevicesAddRefcountsToExecutionEnvironment) { auto executionEnvironment = new ExecutionEnvironment(); auto expectedRefCounts = executionEnvironment->getRefInternalCount(); auto devices = DeviceFactory::createDevices(*executionEnvironment); EXPECT_LE(0u, devices[0]->getNumSubDevices()); if (devices[0]->getNumSubDevices() > 1) { expectedRefCounts++; } expectedRefCounts += std::max(devices[0]->getNumSubDevices(), 1u); EXPECT_EQ(expectedRefCounts, executionEnvironment->getRefInternalCount()); } TEST(ExecutionEnvironment, givenDeviceThatHaveRefferencesAfterPlatformIsDestroyedThenDeviceIsStillUsable) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(1); auto executionEnvironment = new ExecutionEnvironment(); std::unique_ptr platform(new Platform(*executionEnvironment)); platform->initialize(DeviceFactory::createDevices(*executionEnvironment)); auto device = platform->getClDevice(0); EXPECT_EQ(1, device->getRefInternalCount()); device->incRefInternal(); platform.reset(nullptr); EXPECT_EQ(1, device->getRefInternalCount()); int32_t expectedRefCount = 1 + device->getNumSubDevices(); EXPECT_EQ(expectedRefCount, executionEnvironment->getRefInternalCount()); device->decRefInternal(); } TEST(ExecutionEnvironment, givenPlatformWhenItIsCreatedThenItCreatesMemoryManagerInExecutionEnvironment) { auto executionEnvironment = new ExecutionEnvironment(); Platform platform(*executionEnvironment); prepareDeviceEnvironments(*executionEnvironment); platform.initialize(DeviceFactory::createDevices(*executionEnvironment)); EXPECT_NE(nullptr, executionEnvironment->memoryManager); } TEST(ExecutionEnvironment, givenMemoryManagerIsNotInitializedInExecutionEnvironmentWhenCreatingDevicesThenEmptyDeviceVectorIsReturned) { class FailedInitializeMemoryManagerExecutionEnvironment : public MockExecutionEnvironment { bool initializeMemoryManager() override { return false; } }; auto executionEnvironment = std::make_unique(); prepareDeviceEnvironments(*executionEnvironment); auto devices = DeviceFactory::createDevices(*executionEnvironment); EXPECT_TRUE(devices.empty()); } TEST(ExecutionEnvironment, givenDeviceWhenItIsDestroyedThenMemoryManagerIsStillAvailable) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->initializeMemoryManager(); std::unique_ptr device(Device::create(executionEnvironment, 0u)); device.reset(nullptr); EXPECT_NE(nullptr, executionEnvironment->memoryManager); } TEST(RootDeviceEnvironment, givenExecutionEnvironmentWhenInitializeAubCenterIsCalledThenItIsReceivesCorrectInputParams) { MockExecutionEnvironment executionEnvironment; executionEnvironment.rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); auto rootDeviceEnvironment = static_cast(executionEnvironment.rootDeviceEnvironments[0].get()); rootDeviceEnvironment->initAubCenter(true, "test.aub", CommandStreamReceiverType::CSR_AUB); EXPECT_TRUE(rootDeviceEnvironment->initAubCenterCalled); EXPECT_TRUE(rootDeviceEnvironment->localMemoryEnabledReceived); EXPECT_STREQ(rootDeviceEnvironment->aubFileNameReceived.c_str(), "test.aub"); } TEST(RootDeviceEnvironment, givenUseAubStreamFalseWhenGetAubManagerIsCalledThenReturnNull) { DebugManagerStateRestore dbgRestore; DebugManager.flags.UseAubStream.set(false); ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); auto rootDeviceEnvironment = executionEnvironment->rootDeviceEnvironments[0].get(); rootDeviceEnvironment->initAubCenter(false, "", CommandStreamReceiverType::CSR_AUB); auto aubManager = rootDeviceEnvironment->aubCenter->getAubManager(); EXPECT_EQ(nullptr, aubManager); } TEST(RootDeviceEnvironment, givenExecutionEnvironmentWhenInitializeAubCenterIsCalledThenItIsInitalizedOnce) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); auto rootDeviceEnvironment = executionEnvironment->rootDeviceEnvironments[0].get(); rootDeviceEnvironment->initAubCenter(false, "", CommandStreamReceiverType::CSR_AUB); auto currentAubCenter = rootDeviceEnvironment->aubCenter.get(); EXPECT_NE(nullptr, currentAubCenter); auto currentAubStreamProvider = currentAubCenter->getStreamProvider(); EXPECT_NE(nullptr, currentAubStreamProvider); auto currentAubFileStream = currentAubStreamProvider->getStream(); EXPECT_NE(nullptr, currentAubFileStream); rootDeviceEnvironment->initAubCenter(false, "", CommandStreamReceiverType::CSR_AUB); EXPECT_EQ(currentAubCenter, rootDeviceEnvironment->aubCenter.get()); EXPECT_EQ(currentAubStreamProvider, rootDeviceEnvironment->aubCenter->getStreamProvider()); EXPECT_EQ(currentAubFileStream, rootDeviceEnvironment->aubCenter->getStreamProvider()->getStream()); } TEST(ExecutionEnvironment, givenExecutionEnvironmentWhenInitializeMemoryManagerIsCalledThenLocalMemorySupportedInMemoryManagerHasCorrectValue) { const HardwareInfo *hwInfo = defaultHwInfo.get(); auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(hwInfo)); auto executionEnvironment = device->getExecutionEnvironment(); auto enableLocalMemory = HwHelper::get(hwInfo->platform.eRenderCoreFamily).getEnableLocalMemory(*hwInfo); executionEnvironment->initializeMemoryManager(); EXPECT_EQ(enableLocalMemory, executionEnvironment->memoryManager->isLocalMemorySupported(device->getRootDeviceIndex())); } TEST(ExecutionEnvironment, givenEnableDirectSubmissionControllerSetWhenInitializeDirectSubmissionControllerThenNotNull) { DebugManagerStateRestore restorer; DebugManager.flags.EnableDirectSubmissionController.set(1); auto controller = platform()->peekExecutionEnvironment()->initializeDirectSubmissionController(); EXPECT_NE(controller, nullptr); } TEST(ExecutionEnvironment, givenSetCsrFlagSetWhenInitializeDirectSubmissionControllerThenNull) { DebugManagerStateRestore restorer; DebugManager.flags.SetCommandStreamReceiver.set(1); auto controller = platform()->peekExecutionEnvironment()->initializeDirectSubmissionController(); EXPECT_EQ(controller, nullptr); } TEST(ExecutionEnvironment, givenEnableDirectSubmissionControllerSetZeroWhenInitializeDirectSubmissionControllerThenNull) { DebugManagerStateRestore restorer; DebugManager.flags.EnableDirectSubmissionController.set(0); auto controller = platform()->peekExecutionEnvironment()->initializeDirectSubmissionController(); EXPECT_EQ(controller, nullptr); } TEST(ExecutionEnvironment, givenExecutionEnvironmentWhenInitializeMemoryManagerIsCalledThenItIsInitalized) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->initializeMemoryManager(); EXPECT_NE(nullptr, executionEnvironment->memoryManager); } static_assert(sizeof(ExecutionEnvironment) == sizeof(std::unique_ptr) + sizeof(std::vector) + sizeof(std::unique_ptr) + sizeof(std::unique_ptr) + sizeof(bool) + (is64bit ? 23 : 15), "New members detected in ExecutionEnvironment, please ensure that destruction sequence of objects is correct"); TEST(ExecutionEnvironment, givenExecutionEnvironmentWithVariousMembersWhenItIsDestroyedThenDeleteSequenceIsSpecified) { uint32_t destructorId = 0u; struct MemoryMangerMock : public DestructorCounted { MemoryMangerMock(uint32_t &destructorId, ExecutionEnvironment &executionEnvironment) : DestructorCounted(destructorId, executionEnvironment) { callBaseAllocateGraphicsMemoryForNonSvmHostPtr = false; callBasePopulateOsHandles = false; } }; struct DirectSubmissionControllerMock : public DestructorCounted { DirectSubmissionControllerMock(uint32_t &destructorId) : DestructorCounted(destructorId) {} }; struct GmmHelperMock : public DestructorCounted { GmmHelperMock(uint32_t &destructorId, const HardwareInfo *hwInfo) : DestructorCounted(destructorId, nullptr, hwInfo) {} }; struct OsInterfaceMock : public DestructorCounted { OsInterfaceMock(uint32_t &destructorId) : DestructorCounted(destructorId) {} }; struct MemoryOperationsHandlerMock : public DestructorCounted { MemoryOperationsHandlerMock(uint32_t &destructorId) : DestructorCounted(destructorId) {} }; struct AubCenterMock : public DestructorCounted { AubCenterMock(uint32_t &destructorId, const GmmHelper &gmmHelper) : DestructorCounted(destructorId, defaultHwInfo.get(), gmmHelper, false, "", CommandStreamReceiverType::CSR_AUB) {} }; struct CompilerInterfaceMock : public DestructorCounted { CompilerInterfaceMock(uint32_t &destructorId) : DestructorCounted(destructorId) {} }; struct BuiltinsMock : public DestructorCounted { BuiltinsMock(uint32_t &destructorId) : DestructorCounted(destructorId) {} }; struct SourceLevelDebuggerMock : public DestructorCounted { SourceLevelDebuggerMock(uint32_t &destructorId) : DestructorCounted(destructorId, nullptr) {} }; auto gmmHelper = new GmmHelperMock(destructorId, defaultHwInfo.get()); auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); executionEnvironment->rootDeviceEnvironments[0]->gmmHelper = std::unique_ptr(gmmHelper); executionEnvironment->rootDeviceEnvironments[0]->osInterface = std::make_unique(destructorId); executionEnvironment->rootDeviceEnvironments[0]->memoryOperationsInterface = std::make_unique(destructorId); executionEnvironment->memoryManager = std::make_unique(destructorId, *executionEnvironment); executionEnvironment->rootDeviceEnvironments[0]->aubCenter = std::make_unique(destructorId, *gmmHelper); executionEnvironment->rootDeviceEnvironments[0]->builtins = std::make_unique(destructorId); executionEnvironment->rootDeviceEnvironments[0]->compilerInterface = std::make_unique(destructorId); executionEnvironment->rootDeviceEnvironments[0]->debugger = std::make_unique(destructorId); executionEnvironment->directSubmissionController = std::make_unique(destructorId); executionEnvironment.reset(nullptr); EXPECT_EQ(9u, destructorId); } TEST(ExecutionEnvironment, givenMultipleRootDevicesWhenTheyAreCreatedThenReuseMemoryManager) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(2); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(defaultHwInfo.get()); } std::unique_ptr device(Device::create(executionEnvironment, 0u)); auto &commandStreamReceiver = device->getGpgpuCommandStreamReceiver(); auto memoryManager = device->getMemoryManager(); std::unique_ptr device2(Device::create(executionEnvironment, 1u)); EXPECT_NE(&commandStreamReceiver, &device2->getGpgpuCommandStreamReceiver()); EXPECT_EQ(memoryManager, device2->getMemoryManager()); } TEST(ExecutionEnvironment, givenUnproperSetCsrFlagValueWhenInitializingMemoryManagerThenCreateDefaultMemoryManager) { DebugManagerStateRestore restorer; DebugManager.flags.SetCommandStreamReceiver.set(10); auto executionEnvironment = std::make_unique(defaultHwInfo.get()); executionEnvironment->initializeMemoryManager(); EXPECT_NE(nullptr, executionEnvironment->memoryManager); } TEST(ExecutionEnvironment, whenCalculateMaxOsContexCountThenGlobalVariableHasProperValue) { DebugManagerStateRestore restore; VariableBackup osContextCountBackup(&MemoryManager::maxOsContextCount, 0); uint32_t numRootDevices = 17u; uint32_t expectedOsContextCount = 0u; uint32_t expectedOsContextCountForCcs = 0u; { DebugManager.flags.EngineInstancedSubDevices.set(false); MockExecutionEnvironment executionEnvironment(nullptr, true, numRootDevices); for (const auto &rootDeviceEnvironment : executionEnvironment.rootDeviceEnvironments) { auto hwInfo = rootDeviceEnvironment->getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo->platform.eRenderCoreFamily); auto osContextCount = hwHelper.getGpgpuEngineInstances(*hwInfo).size(); auto subDevicesCount = HwHelper::getSubDevicesCount(hwInfo); bool hasRootCsr = subDevicesCount > 1; auto ccsCount = hwInfo->gtSystemInfo.CCSInfo.NumberOfCCSEnabled; expectedOsContextCount += static_cast(osContextCount * subDevicesCount + hasRootCsr); if (ccsCount > 1) { expectedOsContextCountForCcs += ccsCount * subDevicesCount; } } EXPECT_EQ(expectedOsContextCount, MemoryManager::maxOsContextCount); } { DebugManager.flags.EngineInstancedSubDevices.set(true); MockExecutionEnvironment executionEnvironment(nullptr, true, numRootDevices); EXPECT_EQ(expectedOsContextCount + expectedOsContextCountForCcs, MemoryManager::maxOsContextCount); } } TEST(ClExecutionEnvironment, WhenExecutionEnvironmentIsDeletedThenAsyncEventHandlerThreadIsDestroyed) { auto executionEnvironment = new MockClExecutionEnvironment(); MockHandler *mockAsyncHandler = new MockHandler(); executionEnvironment->asyncEventsHandler.reset(mockAsyncHandler); EXPECT_EQ(mockAsyncHandler, executionEnvironment->getAsyncEventsHandler()); mockAsyncHandler->openThread(); delete executionEnvironment; EXPECT_TRUE(MockAsyncEventHandlerGlobals::destructorCalled); } TEST(ClExecutionEnvironment, WhenExecutionEnvironmentIsCreatedThenAsyncEventHandlerIsCreated) { auto executionEnvironment = std::make_unique(); EXPECT_NE(nullptr, executionEnvironment->getAsyncEventsHandler()); }compute-runtime-22.14.22890/opencl/test/unit_test/fixtures/000077500000000000000000000000001422164147700235005ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/fixtures/CMakeLists.txt000066400000000000000000000037641422164147700262520ustar00rootroot00000000000000# # Copyright (C) 2018-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_fixtures ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/buffer_enqueue_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/cl_preemption_fixture.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_preemption_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/d3d_test_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/dispatch_flags_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/device_info_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/device_instrumentation_fixture.cpp ${CMAKE_CURRENT_SOURCE_DIR}/device_instrumentation_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_handler_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/hello_world_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/hello_world_kernel_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/image_fixture.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_fixture.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/kernel_data_fixture.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_data_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/kernel_work_group_info_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/media_kernel_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/multi_root_device_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/multi_tile_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/one_mip_level_image_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/platform_fixture.cpp ${CMAKE_CURRENT_SOURCE_DIR}/platform_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/run_kernel_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/scenario_test_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/simple_arg_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/simple_arg_kernel_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/two_walker_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/ult_command_stream_receiver_fixture.h ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_fixtures}) set_property(GLOBAL PROPERTY IGDRCL_SRCS_tests_fixtures ${IGDRCL_SRCS_tests_fixtures}) add_subdirectories() compute-runtime-22.14.22890/opencl/test/unit_test/fixtures/buffer_enqueue_fixture.h000066400000000000000000000056231422164147700304250ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/hw_info.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/helpers/cl_execution_environment_helper.h" #include "opencl/test/unit_test/helpers/cl_hw_parse.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" using namespace NEO; struct BufferEnqueueFixture : public ClHardwareParse, public ::testing::Test { BufferEnqueueFixture(void) : buffer(nullptr) { } void SetUp() override { executionEnvironment = getClExecutionEnvironmentImpl(hwInfo, 1); } void TearDown() override { buffer.reset(nullptr); } template void initializeFixture() { EnvironmentWithCsrWrapper environment; environment.setCsrType>(); memoryManager = new MockMemoryManager(*executionEnvironment); executionEnvironment->memoryManager.reset(memoryManager); device = std::make_unique(MockDevice::create(executionEnvironment, 0)); context = std::make_unique(device.get()); bufferMemory = std::make_unique(alignUp(bufferSizeInDwords, sizeof(uint32_t))); cl_int retVal = 0; buffer.reset(Buffer::create(context.get(), CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, bufferSizeInDwords, reinterpret_cast(bufferMemory.get()), retVal)); EXPECT_EQ(CL_SUCCESS, retVal); } protected: const size_t bufferSizeInDwords = 64; HardwareInfo hardwareInfo; HardwareInfo *hwInfo = nullptr; ExecutionEnvironment *executionEnvironment; cl_queue_properties properties = {}; std::unique_ptr bufferMemory; std::unique_ptr device; std::unique_ptr context; std::unique_ptr buffer; MockMemoryManager *memoryManager = nullptr; }; struct EnqueueReadWriteBufferRectDispatch : public BufferEnqueueFixture { void SetUp() override { BufferEnqueueFixture::SetUp(); } void TearDown() override { BufferEnqueueFixture::TearDown(); } uint32_t memory[64] = {0}; size_t bufferOrigin[3] = {0, 0, 0}; size_t hostOrigin[3] = {1, 1, 1}; size_t region[3] = {1, 2, 1}; size_t bufferRowPitch = 4; size_t bufferSlicePitch = bufferSizeInDwords; size_t hostRowPitch = 5; size_t hostSlicePitch = 15; }; compute-runtime-22.14.22890/opencl/test/unit_test/fixtures/buffer_fixture.cpp000066400000000000000000000010701422164147700272210ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" using NEO::Context; // clang-format off static char bufferMemory[] = { 0x00, 0x10, 0x20, 0x30, 0x01, 0x11, 0x21, 0x31, 0x02, 0x12, 0x22, 0x32, 0x03, 0x13, 0x23, 0x33, }; // clang-format on void *BufferDefaults::hostPtr = bufferMemory; const size_t BufferDefaults::sizeInBytes = sizeof(bufferMemory); Context *BufferDefaults::context = nullptr; compute-runtime-22.14.22890/opencl/test/unit_test/fixtures/buffer_fixture.h000066400000000000000000000030311422164147700266650ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "CL/cl.h" #include #include struct BufferDefaults { enum { flags = CL_MEM_READ_WRITE }; static const size_t sizeInBytes; static void *hostPtr; static NEO::Context *context; }; template struct BufferUseHostPtr : public BaseClass { enum { flags = BaseClass::flags | CL_MEM_USE_HOST_PTR }; }; template struct BufferReadOnly : public BaseClass { enum { flags = BaseClass::flags | CL_MEM_READ_ONLY }; }; template struct BufferWriteOnly : public BaseClass { enum { flags = BaseClass::flags | CL_MEM_WRITE_ONLY }; }; template struct BufferHelper { using Buffer = NEO::Buffer; using Context = NEO::Context; using MockContext = NEO::MockContext; static Buffer *create(Context *context = Traits::context) { auto retVal = CL_SUCCESS; auto hostPtr = Traits::flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR) ? Traits::hostPtr : nullptr; auto buffer = Buffer::create( context ? context : std::shared_ptr(new MockContext).get(), Traits::flags, Traits::sizeInBytes, hostPtr, retVal); assert(buffer != nullptr); return buffer; } }; compute-runtime-22.14.22890/opencl/test/unit_test/fixtures/built_in_fixture.cpp000066400000000000000000000026451422164147700275660ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/built_in_fixture.h" #include "shared/source/built_ins/built_ins.h" #include "shared/source/device/device.h" #include "shared/test/common/helpers/kernel_binary_helper.h" #include "shared/test/common/helpers/test_files.h" #include "shared/test/common/libult/global_environment.h" #include "shared/test/common/test_macros/test_checks_shared.h" using namespace NEO; void BuiltInFixture::SetUp(Device *pDevice) { // create an instance of the builtins pBuiltIns = pDevice->getBuiltIns(); pBuiltIns->setCacheingEnableState(false); // set mock compiler to return expected kernel... MockCompilerDebugVars fclDebugVars; MockCompilerDebugVars igcDebugVars; std::string builtInsFileName; if (TestChecks::supportsImages(pDevice->getHardwareInfo())) { builtInsFileName = KernelBinaryHelper::BUILT_INS_WITH_IMAGES; } else { builtInsFileName = KernelBinaryHelper::BUILT_INS; } retrieveBinaryKernelFilename(fclDebugVars.fileName, builtInsFileName + "_", ".bc"); retrieveBinaryKernelFilename(igcDebugVars.fileName, builtInsFileName + "_", ".gen"); gEnvironment->fclPushDebugVars(fclDebugVars); gEnvironment->igcPushDebugVars(igcDebugVars); } void BuiltInFixture::TearDown() { gEnvironment->igcPopDebugVars(); gEnvironment->fclPopDebugVars(); } compute-runtime-22.14.22890/opencl/test/unit_test/fixtures/built_in_fixture.h000066400000000000000000000004631422164147700272270ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once namespace NEO { class BuiltIns; class Device; } // namespace NEO class BuiltInFixture { public: void SetUp(NEO::Device *pDevice); void TearDown(); NEO::BuiltIns *pBuiltIns = nullptr; }; compute-runtime-22.14.22890/opencl/test/unit_test/fixtures/cl_device_fixture.cpp000066400000000000000000000025561422164147700276770ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "shared/source/built_ins/sip.h" #include "gtest/gtest.h" namespace NEO { void ClDeviceFixture::SetUp() { hardwareInfo = *defaultHwInfo; SetUpImpl(&hardwareInfo); } void ClDeviceFixture::SetUpImpl(const NEO::HardwareInfo *hardwareInfo) { pDevice = MockClDevice::createWithNewExecutionEnvironment(hardwareInfo, rootDeviceIndex); ASSERT_NE(nullptr, pDevice); pClExecutionEnvironment = static_cast(pDevice->getExecutionEnvironment()); pClDevice = new MockClDevice{pDevice}; ASSERT_NE(nullptr, pClDevice); auto &commandStreamReceiver = pDevice->getGpgpuCommandStreamReceiver(); pTagMemory = commandStreamReceiver.getTagAddress(); ASSERT_NE(nullptr, const_cast(pTagMemory)); this->osContext = pDevice->getDefaultEngine().osContext; } void ClDeviceFixture::TearDown() { delete pClDevice; pClDevice = nullptr; pDevice = nullptr; } MockDevice *ClDeviceFixture::createWithUsDeviceId(unsigned short usDeviceId) { hardwareInfo = *defaultHwInfo; hardwareInfo.platform.usDeviceID = usDeviceId; return MockDevice::createWithNewExecutionEnvironment(&hardwareInfo, rootDeviceIndex); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/fixtures/cl_device_fixture.h000066400000000000000000000014331422164147700273350ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/common/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" namespace NEO { struct HardwareInfo; struct ClDeviceFixture { void SetUp(); void SetUpImpl(const NEO::HardwareInfo *hardwareInfo); void TearDown(); MockDevice *createWithUsDeviceId(unsigned short usDeviceId); MockDevice *pDevice = nullptr; MockClDevice *pClDevice = nullptr; volatile uint32_t *pTagMemory = nullptr; HardwareInfo hardwareInfo = {}; PLATFORM platformHelper = {}; OsContext *osContext = nullptr; const uint32_t rootDeviceIndex = 0u; MockClExecutionEnvironment *pClExecutionEnvironment = nullptr; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/fixtures/cl_preemption_fixture.cpp000066400000000000000000000070151422164147700306150ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/cl_preemption_fixture.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/helpers/hw_info.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/command_queue/enqueue_common.h" #include "opencl/source/command_queue/enqueue_kernel.h" #include "opencl/source/command_queue/enqueue_marker.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "gtest/gtest.h" using namespace NEO; DevicePreemptionTests::DevicePreemptionTests() = default; DevicePreemptionTests::~DevicePreemptionTests() = default; void DevicePreemptionTests::SetUp() { if (dbgRestore == nullptr) { dbgRestore.reset(new DebugManagerStateRestore()); } const cl_queue_properties properties[3] = {CL_QUEUE_PROPERTIES, 0, 0}; kernelInfo = std::make_unique(); device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr, rootDeviceIndex)); context.reset(new MockContext(device.get())); cmdQ.reset(new MockCommandQueue(context.get(), device.get(), properties, false)); program = std::make_unique(toClDeviceVector(*device)); kernel.reset(new MockKernel(program.get(), *kernelInfo, *device)); dispatchInfo.reset(new DispatchInfo(device.get(), kernel.get(), 1, Vec3(1, 1, 1), Vec3(1, 1, 1), Vec3(0, 0, 0))); ASSERT_NE(nullptr, device); ASSERT_NE(nullptr, context); ASSERT_NE(nullptr, cmdQ); waTable = &device->getRootDeviceEnvironment().getMutableHardwareInfo()->workaroundTable; } void DevicePreemptionTests::TearDown() { dbgRestore.reset(); kernel.reset(); kernelInfo.reset(); dispatchInfo.reset(); cmdQ.reset(); context.reset(); device.reset(); } void ThreadGroupPreemptionEnqueueKernelTest::SetUp() { dbgRestore.reset(new DebugManagerStateRestore()); DebugManager.flags.ForcePreemptionMode.set(static_cast(PreemptionMode::ThreadGroup)); globalHwInfo = defaultHwInfo.get(); originalPreemptionMode = globalHwInfo->capabilityTable.defaultPreemptionMode; globalHwInfo->capabilityTable.defaultPreemptionMode = PreemptionMode::ThreadGroup; HelloWorldFixture::SetUp(); pDevice->setPreemptionMode(PreemptionMode::ThreadGroup); } void ThreadGroupPreemptionEnqueueKernelTest::TearDown() { globalHwInfo->capabilityTable.defaultPreemptionMode = originalPreemptionMode; HelloWorldFixture::TearDown(); } void MidThreadPreemptionEnqueueKernelTest::SetUp() { dbgRestore.reset(new DebugManagerStateRestore()); DebugManager.flags.ForcePreemptionMode.set(static_cast(PreemptionMode::MidThread)); globalHwInfo = defaultHwInfo.get(); originalPreemptionMode = globalHwInfo->capabilityTable.defaultPreemptionMode; globalHwInfo->capabilityTable.defaultPreemptionMode = PreemptionMode::MidThread; HelloWorldFixture::SetUp(); pDevice->setPreemptionMode(PreemptionMode::MidThread); } void MidThreadPreemptionEnqueueKernelTest::TearDown() { globalHwInfo->capabilityTable.defaultPreemptionMode = originalPreemptionMode; HelloWorldFixture::TearDown(); } compute-runtime-22.14.22890/opencl/test/unit_test/fixtures/cl_preemption_fixture.h000066400000000000000000000040171422164147700302610ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "gtest/gtest.h" #include namespace NEO { class DispatchInfo; class MockCommandQueue; class MockContext; class MockDevice; class MockKernel; class MockProgram; struct KernelInfo; struct WorkaroundTable; using PreemptionEnqueueKernelFixture = HelloWorldFixture; using PreemptionEnqueueKernelTest = Test; } // namespace NEO class DevicePreemptionTests : public ::testing::Test { public: void SetUp() override; void TearDown() override; DevicePreemptionTests(); ~DevicePreemptionTests() override; NEO::PreemptionMode preemptionMode; NEO::WorkaroundTable *waTable = nullptr; std::unique_ptr dispatchInfo; std::unique_ptr kernel; std::unique_ptr cmdQ; std::unique_ptr device; std::unique_ptr context; std::unique_ptr dbgRestore; std::unique_ptr program; std::unique_ptr kernelInfo; const uint32_t rootDeviceIndex = 0u; }; struct ThreadGroupPreemptionEnqueueKernelTest : NEO::PreemptionEnqueueKernelTest { void SetUp() override; void TearDown() override; NEO::HardwareInfo *globalHwInfo = nullptr; NEO::PreemptionMode originalPreemptionMode = NEO::PreemptionMode::Initial; std::unique_ptr dbgRestore; }; struct MidThreadPreemptionEnqueueKernelTest : NEO::PreemptionEnqueueKernelTest { void SetUp() override; void TearDown() override; NEO::HardwareInfo *globalHwInfo = nullptr; NEO::PreemptionMode originalPreemptionMode = NEO::PreemptionMode::Initial; std::unique_ptr dbgRestore; }; compute-runtime-22.14.22890/opencl/test/unit_test/fixtures/context_fixture.cpp000066400000000000000000000013121422164147700274330ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" namespace NEO { void ContextFixture::SetUp(cl_uint numDevices, cl_device_id *pDeviceList) { auto retVal = CL_SUCCESS; pContext = Context::create(nullptr, ClDeviceVector(pDeviceList, numDevices), nullptr, nullptr, retVal); ASSERT_NE(nullptr, pContext); ASSERT_EQ(CL_SUCCESS, retVal); } void ContextFixture::TearDown() { if (pContext != nullptr) { pContext->release(); } } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/fixtures/context_fixture.h000066400000000000000000000005241422164147700271040ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "CL/cl.h" namespace NEO { class MockContext; class ContextFixture { protected: void SetUp(cl_uint numDevices, cl_device_id *pDeviceList); void TearDown(); MockContext *pContext = nullptr; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/fixtures/d3d_test_fixture.h000066400000000000000000000247371422164147700271450ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_gmm.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_d3d_objects.h" namespace NEO { template <> uint32_t MockD3DSharingFunctions::getDxgiDescCalled = 0; template <> uint32_t MockD3DSharingFunctions::getDxgiDescCalled = 0; template <> DXGI_ADAPTER_DESC MockD3DSharingFunctions::mockDxgiDesc = {{0}}; template <> DXGI_ADAPTER_DESC MockD3DSharingFunctions::mockDxgiDesc = {{0}}; template <> IDXGIAdapter *MockD3DSharingFunctions::getDxgiDescAdapterRequested = nullptr; template <> IDXGIAdapter *MockD3DSharingFunctions::getDxgiDescAdapterRequested = nullptr; template class D3DTests : public PlatformFixture, public ::testing::Test { public: typedef typename T::D3DDevice D3DDevice; typedef typename T::D3DQuery D3DQuery; typedef typename T::D3DQueryDesc D3DQueryDesc; typedef typename T::D3DResource D3DResource; typedef typename T::D3DBufferDesc D3DBufferDesc; typedef typename T::D3DBufferObj D3DBufferObj; typedef typename T::D3DTexture2dDesc D3DTexture2dDesc; typedef typename T::D3DTexture3dDesc D3DTexture3dDesc; typedef typename T::D3DTexture2d D3DTexture2d; typedef typename T::D3DTexture3d D3DTexture3d; class MockMM : public OsAgnosticMemoryManager { public: using OsAgnosticMemoryManager::OsAgnosticMemoryManager; bool failAlloc = false; GraphicsAllocation *createGraphicsAllocationFromSharedHandle(osHandle handle, const AllocationProperties &properties, bool requireSpecificBitness, bool isHostIpcAllocation) override { if (failAlloc) { return nullptr; } auto alloc = OsAgnosticMemoryManager::createGraphicsAllocationFromSharedHandle(handle, properties, requireSpecificBitness, isHostIpcAllocation); alloc->setDefaultGmm(forceGmm); gmmOwnershipPassed = true; return alloc; } GraphicsAllocation *createGraphicsAllocationFromNTHandle(void *handle, uint32_t rootDeviceIndex, AllocationType allocType) override { if (failAlloc) { return nullptr; } AllocationProperties properties(rootDeviceIndex, true, 0, AllocationType::INTERNAL_HOST_MEMORY, false, false, 0); auto alloc = OsAgnosticMemoryManager::createGraphicsAllocationFromSharedHandle(toOsHandle(handle), properties, false, false); alloc->setDefaultGmm(forceGmm); gmmOwnershipPassed = true; return alloc; } bool verifyValue = true; bool verifyHandle(osHandle handle, uint32_t rootDeviceIndex, bool) { return verifyValue; } bool mapAuxGpuVA(GraphicsAllocation *graphicsAllocation) override { mapAuxGpuVACalled++; return mapAuxGpuVaRetValue; } Gmm *forceGmm = nullptr; bool gmmOwnershipPassed = false; uint32_t mapAuxGpuVACalled = 0u; bool mapAuxGpuVaRetValue = true; }; void setupMockGmm() { ImageDescriptor imgDesc = {}; imgDesc.imageHeight = 4; imgDesc.imageWidth = 4; imgDesc.imageDepth = 1; imgDesc.imageType = ImageType::Image2D; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); gmm = MockGmm::queryImgParams(pPlatform->peekExecutionEnvironment()->rootDeviceEnvironments[0]->getGmmClientContext(), imgInfo, false).release(); mockGmmResInfo = static_cast(gmm->gmmResourceInfo.get()); mockMM->forceGmm = gmm; } void SetUp() override { VariableBackup backup(&ultHwConfig); ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false; PlatformFixture::SetUp(); rootDeviceIndex = pPlatform->getClDevice(0)->getRootDeviceIndex(); context = new MockContext(pPlatform->getClDevice(0)); context->preferD3dSharedResources = true; mockMM = std::make_unique(*context->getDevice(0)->getExecutionEnvironment()); mockSharingFcns = new MockD3DSharingFunctions(); mockSharingFcns->checkFormatSupportSetParam1 = true; mockSharingFcns->checkFormatSupportParamsSet.pFormat = D3D11_FORMAT_SUPPORT_BUFFER | D3D11_FORMAT_SUPPORT_TEXTURE2D | D3D11_FORMAT_SUPPORT_TEXTURE3D; context->setSharingFunctions(mockSharingFcns); context->memoryManager = mockMM.get(); cmdQ = new MockCommandQueue(context, context->getDevice(0), 0, false); DebugManager.injectFcn = &mockSharingFcns->mockGetDxgiDesc; mockSharingFcns->mockTexture2dDesc.ArraySize = 1; mockSharingFcns->mockTexture2dDesc.MipLevels = 4; mockSharingFcns->mockTexture3dDesc.MipLevels = 4; setupMockGmm(); if (context->getSharing>()) { ASSERT_EQ(0u, d3dMode); d3dMode = 10; } if (context->getSharing>()) { ASSERT_EQ(0u, d3dMode); d3dMode = 11; } ASSERT_NE(0u, d3dMode); } void TearDown() override { delete cmdQ; delete context; if (!mockMM->gmmOwnershipPassed) { delete gmm; } PlatformFixture::TearDown(); } cl_int pickParam(cl_int d3d10, cl_int d3d11) { if (d3dMode == 10u) { return d3d10; } if (d3dMode == 11u) { return d3d11; } EXPECT_TRUE(false); return 0; } cl_mem createFromD3DBufferApi(cl_context context, cl_mem_flags flags, ID3D10Buffer *resource, cl_int *errcodeRet) { return clCreateFromD3D10BufferKHR(context, flags, resource, errcodeRet); } cl_mem createFromD3DBufferApi(cl_context context, cl_mem_flags flags, ID3D11Buffer *resource, cl_int *errcodeRet) { return clCreateFromD3D11BufferKHR(context, flags, resource, errcodeRet); } cl_mem createFromD3DTexture2DApi(cl_context context, cl_mem_flags flags, ID3D10Texture2D *resource, UINT subresource, cl_int *errcodeRet) { return clCreateFromD3D10Texture2DKHR(context, flags, resource, subresource, errcodeRet); } cl_mem createFromD3DTexture2DApi(cl_context context, cl_mem_flags flags, ID3D11Texture2D *resource, UINT subresource, cl_int *errcodeRet) { return clCreateFromD3D11Texture2DKHR(context, flags, resource, subresource, errcodeRet); } cl_mem createFromD3DTexture3DApi(cl_context context, cl_mem_flags flags, ID3D10Texture3D *resource, UINT subresource, cl_int *errcodeRet) { return clCreateFromD3D10Texture3DKHR(context, flags, resource, subresource, errcodeRet); } cl_mem createFromD3DTexture3DApi(cl_context context, cl_mem_flags flags, ID3D11Texture3D *resource, UINT subresource, cl_int *errcodeRet) { return clCreateFromD3D11Texture3DKHR(context, flags, resource, subresource, errcodeRet); } cl_int enqueueAcquireD3DObjectsApi(MockD3DSharingFunctions *mockFcns, cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { return clEnqueueAcquireD3D10ObjectsKHR(commandQueue, numObjects, memObjects, numEventsInWaitList, eventWaitList, event); } cl_int enqueueAcquireD3DObjectsApi(MockD3DSharingFunctions *mockFcns, cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { return clEnqueueAcquireD3D11ObjectsKHR(commandQueue, numObjects, memObjects, numEventsInWaitList, eventWaitList, event); } cl_int enqueueReleaseD3DObjectsApi(MockD3DSharingFunctions *mockFcns, cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { return clEnqueueReleaseD3D10ObjectsKHR(commandQueue, numObjects, memObjects, numEventsInWaitList, eventWaitList, event); } cl_int enqueueReleaseD3DObjectsApi(MockD3DSharingFunctions *mockFcns, cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { return clEnqueueReleaseD3D11ObjectsKHR(commandQueue, numObjects, memObjects, numEventsInWaitList, eventWaitList, event); } cl_int getDeviceIDsFromD3DApi(MockD3DSharingFunctions *mockFcns, cl_platform_id platform, cl_d3d10_device_source_khr d3dDeviceSource, void *d3dObject, cl_d3d10_device_set_khr d3dDeviceSet, cl_uint numEntries, cl_device_id *devices, cl_uint *numDevices) { return clGetDeviceIDsFromD3D10KHR(platform, d3dDeviceSource, d3dObject, d3dDeviceSet, numEntries, devices, numDevices); } cl_int getDeviceIDsFromD3DApi(MockD3DSharingFunctions *mockFcns, cl_platform_id platform, cl_d3d10_device_source_khr d3dDeviceSource, void *d3dObject, cl_d3d10_device_set_khr d3dDeviceSet, cl_uint numEntries, cl_device_id *devices, cl_uint *numDevices) { return clGetDeviceIDsFromD3D11KHR(platform, d3dDeviceSource, d3dObject, d3dDeviceSet, numEntries, devices, numDevices); } MockD3DSharingFunctions *mockSharingFcns; MockContext *context; MockCommandQueue *cmdQ; char dummyD3DBuffer; char dummyD3DBufferStaging; char dummyD3DTexture; char dummyD3DTextureStaging; Gmm *gmm = nullptr; MockGmmResourceInfo *mockGmmResInfo = nullptr; DebugManagerStateRestore dbgRestore; std::unique_ptr mockMM; uint8_t d3dMode = 0; uint32_t rootDeviceIndex = 0; }; typedef ::testing::Types D3DTypes; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/fixtures/device_info_fixture.h000066400000000000000000000034271422164147700276770ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/common/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "gtest/gtest.h" namespace NEO { struct GetDeviceInfoMemCapabilitiesTest : ::testing::Test { struct TestParams { cl_uint paramName; cl_unified_shared_memory_capabilities_intel expectedCapabilities; }; void check(std::vector ¶ms) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); for (auto ¶m : params) { cl_unified_shared_memory_capabilities_intel unifiedSharedMemoryCapabilities{}; size_t paramRetSize; const auto retVal = device->getDeviceInfo(param.paramName, sizeof(cl_unified_shared_memory_capabilities_intel), &unifiedSharedMemoryCapabilities, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(param.expectedCapabilities, unifiedSharedMemoryCapabilities); EXPECT_EQ(sizeof(cl_unified_shared_memory_capabilities_intel), paramRetSize); } } }; struct QueueFamilyNameTest : ::testing::Test { void SetUp() override { device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); } void verify(EngineGroupType type, const char *expectedName) { char name[CL_QUEUE_FAMILY_MAX_NAME_SIZE_INTEL]; device->getQueueFamilyName(name, type); EXPECT_EQ(0, std::strcmp(name, expectedName)); } std::unique_ptr device = {}; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/fixtures/device_instrumentation_fixture.cpp000066400000000000000000000013671422164147700325430ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/device_instrumentation_fixture.h" #include "shared/test/common/helpers/execution_environment_helper.h" #include "shared/test/common/mocks/mock_device.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/test/unit_test/mocks/mock_platform.h" namespace NEO { void DeviceInstrumentationFixture::SetUp(bool instrumentation) { ExecutionEnvironment *executionEnvironment = getExecutionEnvironmentImpl(hwInfo, 1); hwInfo->capabilityTable.instrumentationEnabled = instrumentation; device = std::make_unique(*Device::create(executionEnvironment, 0), platform()); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/fixtures/device_instrumentation_fixture.h000066400000000000000000000005521422164147700322030ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include namespace NEO { class ClDevice; class Device; struct HardwareInfo; struct DeviceInstrumentationFixture { void SetUp(bool instrumentation); std::unique_ptr device = nullptr; HardwareInfo *hwInfo = nullptr; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/fixtures/dispatch_flags_fixture.h000066400000000000000000000026171422164147700304000ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/os_context.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/test_macros/test_checks_shared.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_context.h" namespace NEO { template struct DispatchFlagsTestsBase : public ::testing::Test { template void SetUpImpl() { HardwareInfo hwInfo = *defaultHwInfo; if (setupBlitter) { hwInfo.capabilityTable.blitterOperationsSupported = true; } environmentWrapper.setCsrType(); device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); context = std::make_unique(device.get()); } EnvironmentWithCsrWrapper environmentWrapper; std::unique_ptr device; std::unique_ptr context; DebugManagerStateRestore restore; }; using DispatchFlagsTests = DispatchFlagsTestsBase; using DispatchFlagsBlitTests = DispatchFlagsTestsBase; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/fixtures/enqueue_handler_fixture.h000066400000000000000000000011621422164147700305630ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" class EnqueueHandlerTest : public NEO::ClDeviceFixture, public testing::Test { public: void SetUp() override { ClDeviceFixture::SetUp(); context = new NEO::MockContext(pClDevice); } void TearDown() override { context->decRefInternal(); ClDeviceFixture::TearDown(); } NEO::MockContext *context; }; compute-runtime-22.14.22890/opencl/test/unit_test/fixtures/hello_world_fixture.h000066400000000000000000000113611422164147700277330ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/command_stream/command_stream_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/hello_world_kernel_fixture.h" #include "opencl/test/unit_test/indirect_heap/indirect_heap_fixture.h" namespace NEO { // Factory used to pick various ingredients for use in aggregate tests struct HelloWorldFixtureFactory { typedef NEO::IndirectHeapFixture IndirectHeapFixture; typedef NEO::CommandStreamFixture CommandStreamFixture; typedef NEO::CommandQueueHwFixture CommandQueueFixture; typedef NEO::HelloWorldKernelFixture KernelFixture; }; // Instantiates a fixture based on the supplied fixture factory. // Used by most tests for integration testing with command queues. template struct HelloWorldFixture : public FixtureFactory::IndirectHeapFixture, public FixtureFactory::CommandStreamFixture, public FixtureFactory::CommandQueueFixture, public FixtureFactory::KernelFixture, public ClDeviceFixture { typedef typename FixtureFactory::IndirectHeapFixture IndirectHeapFixture; typedef typename FixtureFactory::CommandStreamFixture CommandStreamFixture; typedef typename FixtureFactory::CommandQueueFixture CommandQueueFixture; typedef typename FixtureFactory::KernelFixture KernelFixture; using CommandQueueFixture::pCmdQ; using CommandQueueFixture::SetUp; using CommandStreamFixture::pCS; using CommandStreamFixture::SetUp; using HelloWorldKernelFixture::SetUp; using IndirectHeapFixture::SetUp; using KernelFixture::pKernel; public: void SetUp() override { ClDeviceFixture::SetUp(); ASSERT_NE(nullptr, pClDevice); CommandQueueFixture::SetUp(pClDevice, 0); ASSERT_NE(nullptr, pCmdQ); CommandStreamFixture::SetUp(pCmdQ); ASSERT_NE(nullptr, pCS); IndirectHeapFixture::SetUp(pCmdQ); KernelFixture::SetUp(pClDevice, kernelFilename, kernelName); ASSERT_NE(nullptr, pKernel); auto retVal = CL_INVALID_VALUE; BufferDefaults::context = new MockContext(pClDevice); destBuffer = Buffer::create( BufferDefaults::context, CL_MEM_READ_WRITE, sizeUserMemory, nullptr, retVal); srcBuffer = Buffer::create( BufferDefaults::context, CL_MEM_READ_WRITE, sizeUserMemory, nullptr, retVal); pDestMemory = destBuffer->getCpuAddressForMapping(); pSrcMemory = srcBuffer->getCpuAddressForMapping(); memset(pDestMemory, destPattern, sizeUserMemory); memset(pSrcMemory, srcPattern, sizeUserMemory); pKernel->setArg(0, srcBuffer); pKernel->setArg(1, destBuffer); } void TearDown() override { pCmdQ->flush(); srcBuffer->release(); destBuffer->release(); KernelFixture::TearDown(); IndirectHeapFixture::TearDown(); CommandStreamFixture::TearDown(); CommandQueueFixture::TearDown(); BufferDefaults::context->release(); ClDeviceFixture::TearDown(); } Buffer *srcBuffer = nullptr; Buffer *destBuffer = nullptr; void *pSrcMemory = nullptr; void *pDestMemory = nullptr; size_t sizeUserMemory = 128 * sizeof(float); const char *kernelFilename = "CopyBuffer_simd"; const char *kernelName = "CopyBuffer"; const int srcPattern = 85; const int destPattern = 170; cl_int callOneWorkItemNDRKernel(cl_event *eventWaitList = nullptr, cl_int waitListSize = 0, cl_event *returnEvent = nullptr) { cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {1, 1, 1}; size_t localWorkSize[3] = {1, 1, 1}; return pCmdQ->enqueueKernel( pKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, waitListSize, eventWaitList, returnEvent); } }; template struct HelloWorldTest : Test> { }; template struct HelloWorldTestWithParam : HelloWorldFixture { }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/fixtures/hello_world_kernel_fixture.h000066400000000000000000000060671422164147700313020ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/device/device.h" #include "shared/source/helpers/file_io.h" #include "shared/test/common/helpers/test_files.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/platform/platform.h" #include "opencl/source/program/program.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/program_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "CL/cl.h" #include "gtest/gtest.h" namespace NEO { class Kernel; class Program; struct HelloWorldKernelFixture : public ProgramFixture { using ProgramFixture::SetUp; void SetUp(ClDevice *pDevice, const char *kernelFilenameStr, const char *kernelNameStr) { SetUp(pDevice, kernelFilenameStr, kernelNameStr, nullptr); } void SetUp(ClDevice *pDevice, const char *kernelFilenameStr, const char *kernelNameStr, const char *options) { ProgramFixture::SetUp(); pTestFilename = new std::string(kernelFilenameStr); pKernelName = new std::string(kernelNameStr); if (strstr(kernelFilenameStr, "_simd") != nullptr) { pTestFilename->append(std::to_string(simd)); } auto deviceVector = toClDeviceVector(*pDevice); pContext = Context::create(nullptr, deviceVector, nullptr, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, pContext); if (options) { std::string optionsToProgram(options); CreateProgramFromBinary( pContext, deviceVector, *pTestFilename, optionsToProgram); } else { CreateProgramFromBinary( pContext, deviceVector, *pTestFilename); } ASSERT_NE(nullptr, pProgram); retVal = pProgram->build( pProgram->getDevices(), nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); // create a kernel pMultiDeviceKernel = MultiDeviceKernel::create( pProgram, pProgram->getKernelInfosForKernel(pKernelName->c_str()), &retVal); pKernel = static_cast(pMultiDeviceKernel->getKernel(pDevice->getRootDeviceIndex())); EXPECT_NE(nullptr, pKernel); EXPECT_EQ(CL_SUCCESS, retVal); } void TearDown() override { delete pKernelName; delete pTestFilename; pMultiDeviceKernel->release(); pContext->release(); ProgramFixture::TearDown(); } std::string *pTestFilename = nullptr; std::string *pKernelName = nullptr; cl_uint simd = 32; cl_int retVal = CL_SUCCESS; MockMultiDeviceKernel *pMultiDeviceKernel = nullptr; MockKernel *pKernel = nullptr; MockContext *pContext = nullptr; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/fixtures/image_fixture.cpp000066400000000000000000000034351422164147700270410ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" using NEO::MockContext; static const size_t imageWidth = 7; static const size_t imageHeight = 9; static const size_t imageDepth = 11; static const size_t imageArray = imageDepth; const cl_image_format Image1dDefaults::imageFormat = { CL_R, CL_FLOAT}; const cl_image_format LuminanceImage::imageFormat = { CL_LUMINANCE, CL_FLOAT}; const cl_image_desc Image1dDefaults::imageDesc = { CL_MEM_OBJECT_IMAGE1D, imageWidth, 1, 1, 1, 0, 0, 0, 0, {nullptr}}; const cl_image_desc Image1dBufferDefaults::imageDesc = { CL_MEM_OBJECT_IMAGE1D_BUFFER, imageWidth, 1, 1, 1, 0, 0, 0, 0, {nullptr}}; const cl_image_desc Image2dDefaults::imageDesc = { CL_MEM_OBJECT_IMAGE2D, imageWidth, imageHeight, 1, 1, 0, 0, 0, 0, {nullptr}}; const cl_image_desc Image3dDefaults::imageDesc = { CL_MEM_OBJECT_IMAGE3D, imageWidth, imageHeight, imageDepth, 1, 0, 0, 0, 0, {nullptr}}; const cl_image_desc Image2dArrayDefaults::imageDesc = { CL_MEM_OBJECT_IMAGE2D_ARRAY, imageWidth, imageHeight, 0, imageArray, 0, 0, 0, 0, {nullptr}}; const cl_image_desc Image1dArrayDefaults::imageDesc = { CL_MEM_OBJECT_IMAGE1D_ARRAY, imageWidth, 0, 0, imageArray, 0, 0, 0, 0, {nullptr}}; static float imageMemory[imageWidth * imageHeight * imageDepth] = {}; void *Image1dDefaults::hostPtr = imageMemory; void *ImageWithoutHostPtr::hostPtr = nullptr; NEO::Context *Image1dDefaults::context = nullptr; compute-runtime-22.14.22890/opencl/test/unit_test/fixtures/image_fixture.h000066400000000000000000000111271422164147700265030ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/helpers/hw_info.h" #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "CL/cl.h" #include #include struct Image1dDefaults { enum { flags = 0 }; static const cl_image_format imageFormat; static const cl_image_desc imageDesc; static void *hostPtr; static NEO::Context *context; }; struct Image1dBufferDefaults : public Image1dDefaults { static const cl_image_desc imageDesc; }; struct Image2dDefaults : public Image1dDefaults { static const cl_image_desc imageDesc; }; struct Image3dDefaults : public Image2dDefaults { static const cl_image_desc imageDesc; }; struct Image2dArrayDefaults : public Image2dDefaults { static const cl_image_desc imageDesc; }; struct Image1dArrayDefaults : public Image2dDefaults { static const cl_image_desc imageDesc; }; struct LuminanceImage : public Image2dDefaults { static const cl_image_format imageFormat; }; struct ImageWithoutHostPtr : public Image1dDefaults { enum { flags = 0 }; static void *hostPtr; }; template struct ImageUseHostPtr : public BaseClass { enum { flags = BaseClass::flags | CL_MEM_USE_HOST_PTR }; }; template struct ImageReadOnly : public BaseClass { enum { flags = BaseClass::flags | CL_MEM_READ_ONLY }; }; template struct ImageWriteOnly : public BaseClass { enum { flags = BaseClass::flags | CL_MEM_WRITE_ONLY }; }; template struct ImageHelper { using Context = NEO::Context; using Image = NEO::Image; using MockContext = NEO::MockContext; static Image *create(Context *context = Traits::context, const cl_image_desc *imgDesc = &Traits::imageDesc, const cl_image_format *imgFormat = &Traits::imageFormat) { auto retVal = CL_INVALID_VALUE; auto surfaceFormat = Image::getSurfaceFormatFromTable(Traits::flags, imgFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto image = Image::create( context, NEO::ClMemoryPropertiesHelper::createMemoryProperties(Traits::flags, 0, 0, &context->getDevice(0)->getDevice()), Traits::flags, 0, surfaceFormat, imgDesc, Traits::hostPtr, retVal); return image; } }; template struct Image1dHelper : public ImageHelper { }; template struct Image1dBufferHelper : public ImageHelper { }; template struct Image2dHelper : public ImageHelper { }; template struct Image3dHelper : public ImageHelper { }; template struct Image2dArrayHelper : public ImageHelper { }; template struct Image1dArrayHelper : public ImageHelper { }; struct ImageClearColorFixture : ::testing::Test { using MockContext = NEO::MockContext; using Image = NEO::Image; template void setUpImpl() { hardwareInfo.capabilityTable.ftrRenderCompressedImages = true; NEO::platformsImpl->clear(); NEO::constructPlatform()->peekExecutionEnvironment()->prepareRootDeviceEnvironments(1u); NEO::platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0]->setHwInfo(&hardwareInfo); NEO::platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0]->initGmm(); } template typename FamilyType::RENDER_SURFACE_STATE getSurfaceState() { using AUXILIARY_SURFACE_MODE = typename FamilyType::RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; auto surfaceState = FamilyType::cmdInitRenderSurfaceState; surfaceState.setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E); return surfaceState; } NEO::HardwareInfo hardwareInfo = *NEO::defaultHwInfo; MockContext context; std::unique_ptr image; }; compute-runtime-22.14.22890/opencl/test/unit_test/fixtures/kernel_arg_fixture.cpp000066400000000000000000000063631422164147700300730ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/kernel_arg_fixture.h" #include "shared/source/helpers/api_specific_config.h" #include "shared/source/program/kernel_info.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_image.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" using namespace NEO; KernelImageArgTest::~KernelImageArgTest() = default; void KernelImageArgTest::SetUp() { pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; pKernelInfo->heapInfo.SurfaceStateHeapSize = sizeof(surfaceStateHeap); pKernelInfo->heapInfo.pSsh = surfaceStateHeap; constexpr int numImages = 5; pKernelInfo->addArgImage(0, 0); auto &img0 = pKernelInfo->argAsImg(0); img0.metadataPayload.imgWidth = 0x4; img0.metadataPayload.flatBaseOffset = 0x8; img0.metadataPayload.flatWidth = 0x10; img0.metadataPayload.flatHeight = 0x18; img0.metadataPayload.flatPitch = 0x24; img0.metadataPayload.numSamples = 0x3c; img0.metadataPayload.numMipLevels = offsetNumMipLevelsImage0; pKernelInfo->addArgImage(1, 0); pKernelInfo->argAsImg(1).metadataPayload.imgHeight = 0xc; pKernelInfo->addArgImmediate(2, sizeof(void *), 0x20); pKernelInfo->addArgImage(3, 0); pKernelInfo->addArgImage(4, 0x20); pKernelInfo->addExtendedDeviceSideEnqueueDescriptor(4, 0); pKernelInfo->kernelDescriptor.kernelAttributes.bufferAddressingMode = ApiSpecificConfig::getBindlessConfiguration() ? KernelDescriptor::AddressingMode::BindlessAndStateless : KernelDescriptor::AddressingMode::BindfulAndStateless; pKernelInfo->kernelDescriptor.kernelAttributes.imageAddressingMode = ApiSpecificConfig::getBindlessConfiguration() ? KernelDescriptor::AddressingMode::Bindless : KernelDescriptor::AddressingMode::Bindful; ClDeviceFixture::SetUp(); context.reset(new MockContext(pClDevice)); program = std::make_unique(context.get(), false, toClDeviceVector(*pClDevice)); int32_t retVal = CL_INVALID_VALUE; pMultiDeviceKernel.reset(MultiDeviceKernel::create(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), &retVal)); pKernel = static_cast(pMultiDeviceKernel->getKernel(rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, retVal); pKernel->setKernelArgHandler(0, &Kernel::setArgImage); pKernel->setKernelArgHandler(1, &Kernel::setArgImage); pKernel->setKernelArgHandler(2, &Kernel::setArgImmediate); pKernel->setKernelArgHandler(3, &Kernel::setArgImage); pKernel->setKernelArgHandler(4, &Kernel::setArgImage); uint32_t crossThreadData[numImages * 0x20] = {}; crossThreadData[0x20 / sizeof(uint32_t)] = 0x12344321; pKernel->setCrossThreadData(crossThreadData, sizeof(crossThreadData)); image.reset(Image2dHelper<>::create(context.get())); ASSERT_NE(nullptr, image); } void KernelImageArgTest::TearDown() { image.reset(); pMultiDeviceKernel.reset(); program.reset(); context.reset(); ClDeviceFixture::TearDown(); } compute-runtime-22.14.22890/opencl/test/unit_test/fixtures/kernel_arg_fixture.h000066400000000000000000000021631422164147700275320ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include namespace NEO { class MockContext; class MockKernel; class MultiDeviceKernel; class MockProgram; class Image; class MockKernelInfo; struct KernelInfo; } // namespace NEO namespace iOpenCL { struct SKernelBinaryHeaderCommon; } class KernelImageArgTest : public Test { public: KernelImageArgTest() { } ~KernelImageArgTest() override; protected: void SetUp() override; void TearDown() override; cl_int retVal = 0; std::unique_ptr kernelHeader; std::unique_ptr context; std::unique_ptr program; std::unique_ptr pKernelInfo; std::unique_ptr pMultiDeviceKernel; NEO::MockKernel *pKernel; std::unique_ptr image; char surfaceStateHeap[0x80]; uint32_t offsetNumMipLevelsImage0 = 0x40; }; compute-runtime-22.14.22890/opencl/test/unit_test/fixtures/kernel_data_fixture.cpp000066400000000000000000000121131422164147700302210ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/kernel_data_fixture.h" #include "shared/source/device_binary_format/patchtokens_decoder.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/string.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/program/program_info_from_patchtokens.h" void KernelDataTest::buildAndDecode() { cl_int error = CL_SUCCESS; kernelBinaryHeader.CheckSum = checkSum; kernelBinaryHeader.DynamicStateHeapSize = dshSize; kernelBinaryHeader.GeneralStateHeapSize = gshSize; kernelBinaryHeader.KernelHeapSize = kernelHeapSize; kernelBinaryHeader.KernelNameSize = kernelNameSize; kernelBinaryHeader.KernelUnpaddedSize = kernelUnpaddedSize; kernelBinaryHeader.PatchListSize = patchListSize + sizeof(SPatchDataParameterStream); kernelBinaryHeader.ShaderHashCode = shaderHashCode; kernelBinaryHeader.SurfaceStateHeapSize = sshSize; kernelDataSize = sizeof(SKernelBinaryHeaderCommon) + kernelNameSize + sshSize + dshSize + gshSize + kernelHeapSize + patchListSize; kernelDataSize += sizeof(SPatchDataParameterStream); pKernelData = static_cast(alignedMalloc(kernelDataSize, MemoryConstants::cacheLineSize)); ASSERT_NE(nullptr, pKernelData); // kernel blob pCurPtr = pKernelData; // kernel header // first clear it because sizeof() > sum of sizeof(fields). this is due to packing memset(pCurPtr, 0, sizeof(SKernelBinaryHeaderCommon)); *(SKernelBinaryHeaderCommon *)pCurPtr = kernelBinaryHeader; pCurPtr += sizeof(SKernelBinaryHeaderCommon); // kernel name memset(pCurPtr, 0, kernelNameSize); strcpy_s(pCurPtr, strlen(kernelName.c_str()) + 1, kernelName.c_str()); pCurPtr += kernelNameSize; // kernel heap memcpy_s(pCurPtr, kernelHeapSize, pKernelHeap, kernelHeapSize); pCurPtr += kernelHeapSize; // general state heap memcpy_s(pCurPtr, gshSize, pGsh, gshSize); pCurPtr += gshSize; // dynamic state heap memcpy_s(pCurPtr, dshSize, pDsh, dshSize); pCurPtr += dshSize; // surface state heap memcpy_s(pCurPtr, sshSize, pSsh, sshSize); pCurPtr += sshSize; // patch list memcpy_s(pCurPtr, patchListSize, pPatchList, patchListSize); pCurPtr += patchListSize; // add a data stream member iOpenCL::SPatchDataParameterStream dataParameterStream; dataParameterStream.Token = PATCH_TOKEN_DATA_PARAMETER_STREAM; dataParameterStream.Size = sizeof(SPatchDataParameterStream); dataParameterStream.DataParameterStreamSize = 0x40; memcpy_s(pCurPtr, sizeof(SPatchDataParameterStream), &dataParameterStream, sizeof(SPatchDataParameterStream)); pCurPtr += sizeof(SPatchDataParameterStream); // now build a program with this kernel data iOpenCL::SProgramBinaryHeader header = {}; NEO::PatchTokenBinary::ProgramFromPatchtokens programFromPatchtokens; programFromPatchtokens.decodeStatus = DecodeError::Success; programFromPatchtokens.header = &header; programFromPatchtokens.kernels.resize(1); auto &kernelFromPatchtokens = *programFromPatchtokens.kernels.rbegin(); auto kernelBlob = ArrayRef(reinterpret_cast(pKernelData), kernelDataSize); bool decodeSuccess = NEO::PatchTokenBinary::decodeKernelFromPatchtokensBlob(kernelBlob, kernelFromPatchtokens); EXPECT_TRUE(decodeSuccess); ProgramInfo programInfo; NEO::populateProgramInfo(programInfo, programFromPatchtokens); error = program->processProgramInfo(programInfo, *pContext->getDevice(0)); EXPECT_EQ(CL_SUCCESS, error); // extract the kernel info pKernelInfo = program->Program::getKernelInfo(kernelName.c_str(), rootDeviceIndex); // validate name EXPECT_STREQ(pKernelInfo->kernelDescriptor.kernelMetadata.kernelName.c_str(), kernelName.c_str()); // validate each heap if (pKernelHeap != nullptr) { EXPECT_EQ(0, memcmp(pKernelInfo->heapInfo.pKernelHeap, pKernelHeap, kernelHeapSize)); } if (pGsh != nullptr) { EXPECT_EQ(0, memcmp(pKernelInfo->heapInfo.pGsh, pGsh, gshSize)); } if (pDsh != nullptr) { EXPECT_EQ(0, memcmp(pKernelInfo->heapInfo.pDsh, pDsh, dshSize)); } if (pSsh != nullptr) { EXPECT_EQ(0, memcmp(pKernelInfo->heapInfo.pSsh, pSsh, sshSize)); } if (kernelHeapSize) { auto kernelAllocation = pKernelInfo->getGraphicsAllocation(); UNRECOVERABLE_IF(kernelAllocation == nullptr); auto &device = pContext->getDevice(0)->getDevice(); auto &hwHelper = NEO::HwHelper::get(device.getHardwareInfo().platform.eRenderCoreFamily); size_t isaPadding = hwHelper.getPaddingForISAAllocation(); EXPECT_EQ(kernelAllocation->getUnderlyingBufferSize(), kernelHeapSize + isaPadding); auto kernelIsa = kernelAllocation->getUnderlyingBuffer(); EXPECT_EQ(0, memcmp(kernelIsa, pKernelInfo->heapInfo.pKernelHeap, kernelHeapSize)); } else { EXPECT_EQ(nullptr, pKernelInfo->getGraphicsAllocation()); } } compute-runtime-22.14.22890/opencl/test/unit_test/fixtures/kernel_data_fixture.h000066400000000000000000000050601422164147700276710ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/program/kernel_info.h" #include "shared/test/common/mocks/mock_kernel_info.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "gtest/gtest.h" using namespace NEO; using namespace iOpenCL; class KernelDataTest : public testing::Test { public: KernelDataTest() { memset(&kernelBinaryHeader, 0x00, sizeof(SKernelBinaryHeaderCommon)); pCurPtr = nullptr; pKernelData = nullptr; kernelName = "test"; pDsh = nullptr; pGsh = nullptr; pKernelHeap = nullptr; pSsh = nullptr; pPatchList = nullptr; kernelDataSize = 0; kernelNameSize = (uint32_t)alignUp(strlen(kernelName.c_str()) + 1, sizeof(uint32_t)); dshSize = 0; gshSize = 0; kernelHeapSize = 0; sshSize = 0; patchListSize = 0; checkSum = 0; shaderHashCode = 0; kernelUnpaddedSize = 0; pKernelInfo = nullptr; } void buildAndDecode(); protected: void SetUp() override { kernelBinaryHeader.KernelNameSize = kernelNameSize; pContext = new MockContext; rootDeviceIndex = pContext->getDevice(0)->getRootDeviceIndex(); program = std::make_unique(pContext, false, toClDeviceVector(*pContext->getDevice(0))); } void TearDown() override { if (pKernelInfo->kernelAllocation) { pContext->getDevice(0)->getMemoryManager()->freeGraphicsMemory(pKernelInfo->kernelAllocation); const_cast(pKernelInfo)->kernelAllocation = nullptr; } program.reset(); delete pContext; alignedFree(pKernelData); } char *pCurPtr; char *pKernelData; SKernelBinaryHeaderCommon kernelBinaryHeader; std::string kernelName; void *pDsh; void *pGsh; void *pKernelHeap; void *pSsh; void *pPatchList; uint32_t kernelDataSize; uint32_t kernelNameSize; uint32_t dshSize; uint32_t gshSize; uint32_t kernelHeapSize; uint32_t sshSize; uint32_t patchListSize; uint32_t checkSum; uint64_t shaderHashCode; uint32_t kernelUnpaddedSize; std::unique_ptr program; MockContext *pContext; const KernelInfo *pKernelInfo; uint32_t rootDeviceIndex = std::numeric_limits::max(); }; compute-runtime-22.14.22890/opencl/test/unit_test/fixtures/kernel_work_group_info_fixture.h000066400000000000000000000042601422164147700321720ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/file_io.h" #include "shared/test/common/helpers/kernel_binary_helper.h" #include "shared/test/common/helpers/test_files.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/api/cl_api_tests.h" using namespace NEO; struct clGetKernelWorkGroupInfoTest : public ApiFixture<>, public ::testing::Test { typedef ApiFixture BaseClass; void SetUp() override { BaseClass::SetUp(); std::unique_ptr pSource = nullptr; size_t sourceSize = 0; std::string testFile; kbHelper = new KernelBinaryHelper("CopyBuffer_simd16", false); testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); ASSERT_EQ(true, fileExists(testFile)); pSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pSource); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( pContext, 1, sources, &sourceSize, &retVal); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); pSource.reset(); retVal = clBuildProgram( pProgram, 1, &testedClDevice, nullptr, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); kernel = clCreateKernel(pProgram, "CopyBuffer", &retVal); ASSERT_EQ(CL_SUCCESS, retVal); } void TearDown() override { retVal = clReleaseKernel(kernel); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); delete kbHelper; BaseClass::TearDown(); } cl_program pProgram = nullptr; cl_kernel kernel = nullptr; KernelBinaryHelper *kbHelper; }; struct clGetKernelWorkGroupInfoTests : public clGetKernelWorkGroupInfoTest, public ::testing::WithParamInterface { }; compute-runtime-22.14.22890/opencl/test/unit_test/fixtures/media_kernel_fixture.h000066400000000000000000000055761422164147700300530ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/common/helpers/default_hw_info.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "opencl/test/unit_test/helpers/cl_hw_parse.h" namespace NEO { template struct MediaKernelFixture : public HelloWorldFixture, public ClHardwareParse, public ::testing::Test { typedef HelloWorldFixture Parent; using Parent::pCmdBuffer; using Parent::pCmdQ; using Parent::pContext; using Parent::pCS; using Parent::pDevice; using Parent::pKernel; using Parent::pProgram; using Parent::retVal; MediaKernelFixture() {} template void enqueueRegularKernel() { auto retVal = EnqueueKernelHelper<>::enqueueKernel( pCmdQ, pKernel); ASSERT_EQ(CL_SUCCESS, retVal); parseCommands(*pCmdQ); itorWalker1 = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itorWalker1); } template void enqueueVmeKernel() { auto retVal = EnqueueKernelHelper<>::enqueueKernel( pCmdQ, pVmeKernel); ASSERT_EQ(CL_SUCCESS, retVal); parseCommands(*pCmdQ); itorWalker1 = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itorWalker1); } void SetUp() override { skipVmeTest = !defaultHwInfo->capabilityTable.supportsVme; if (skipVmeTest) { GTEST_SKIP(); } Parent::kernelFilename = "vme_kernels"; Parent::kernelName = "non_vme_kernel"; Parent::SetUp(); ClHardwareParse::SetUp(); ASSERT_NE(nullptr, pKernel); ASSERT_EQ(false, pKernel->isVmeKernel()); cl_int retVal; // create the VME kernel pMultiDeviceVmeKernel = MultiDeviceKernel::create( pProgram, pProgram->getKernelInfosForKernel("device_side_block_motion_estimate_intel"), &retVal); pVmeKernel = pMultiDeviceVmeKernel->getKernel(pDevice->getRootDeviceIndex()); ASSERT_NE(nullptr, pVmeKernel); ASSERT_EQ(true, pVmeKernel->isVmeKernel()); } void TearDown() override { if (skipVmeTest) { return; } pMultiDeviceVmeKernel->release(); ClHardwareParse::TearDown(); Parent::TearDown(); } GenCmdList::iterator itorWalker1; GenCmdList::iterator itorWalker2; MultiDeviceKernel *pMultiDeviceVmeKernel = nullptr; Kernel *pVmeKernel = nullptr; bool skipVmeTest = false; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/fixtures/multi_root_device_fixture.h000066400000000000000000000042561422164147700311420ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/device_factory.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_context.h" namespace NEO { class MultiRootDeviceFixture : public ::testing::Test { public: void SetUp() override { deviceFactory = std::make_unique(3, 0); device1 = deviceFactory->rootDevices[1]; device2 = deviceFactory->rootDevices[2]; cl_device_id devices[] = {device1, device2}; context.reset(new MockContext(ClDeviceVector(devices, 2), false)); mockMemoryManager = static_cast(device1->getMemoryManager()); ASSERT_EQ(mockMemoryManager, device1->getMemoryManager()); } const uint32_t expectedRootDeviceIndex = 1; std::unique_ptr deviceFactory; MockClDevice *device1 = nullptr; MockClDevice *device2 = nullptr; std::unique_ptr context; MockMemoryManager *mockMemoryManager; }; class MultiRootDeviceWithSubDevicesFixture : public ::testing::Test { public: void SetUp() override { deviceFactory = std::make_unique(3, 2); device1 = deviceFactory->rootDevices[1]; device2 = deviceFactory->rootDevices[2]; cl_device_id devices[] = {device1, device2, deviceFactory->subDevices[2]}; context.reset(new MockContext(ClDeviceVector(devices, 3), false)); mockMemoryManager = static_cast(device1->getMemoryManager()); ASSERT_EQ(mockMemoryManager, device1->getMemoryManager()); } const uint32_t expectedRootDeviceIndex = 1; std::unique_ptr deviceFactory; MockClDevice *device1 = nullptr; MockClDevice *device2 = nullptr; std::unique_ptr context; MockMemoryManager *mockMemoryManager; }; }; // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/fixtures/multi_tile_fixture.h000066400000000000000000000024231422164147700275670ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/root_device_environment.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/helpers/ult_hw_config.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/mocks/mock_platform.h" struct MultiTileFixture : public ::testing::Test { void SetUp() override { ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false; ultHwConfig.useHwCsr = true; ultHwConfig.forceOsAgnosticMemoryManager = false; DebugManager.flags.CreateMultipleSubDevices.set(requiredDeviceCount); DebugManager.flags.DeferOsContextInitialization.set(0); platformsImpl->clear(); constructPlatform(); initPlatform(); }; protected: VariableBackup backup{&ultHwConfig}; DebugManagerStateRestore stateRestore; cl_uint requiredDeviceCount = 2u; }; struct FourTileFixture : public MultiTileFixture { FourTileFixture() : MultiTileFixture() { requiredDeviceCount = 4; } }; compute-runtime-22.14.22890/opencl/test/unit_test/fixtures/one_mip_level_image_fixture.h000066400000000000000000000040631422164147700314010ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/common/test_macros/test.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" namespace NEO { struct OneMipLevelImageFixture { template struct CommandQueueHwMock : CommandQueueHw { OneMipLevelImageFixture &fixture; CommandQueueHwMock(OneMipLevelImageFixture &fixture) : CommandQueueHw(&fixture.context, fixture.context.getDevice(0), nullptr, false), fixture(fixture) {} void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &multiDispatchInfo) override { fixture.builtinOpsParamsCaptured = true; fixture.usedBuiltinOpsParams = multiDispatchInfo.peekBuiltinOpParams(); } }; void SetUp() { REQUIRE_IMAGES_OR_SKIP(defaultHwInfo); cl_image_desc imageDesc = Image3dDefaults::imageDesc; this->region[0] = imageDesc.image_width; this->region[1] = imageDesc.image_height; this->region[2] = imageDesc.image_depth; this->image.reset(createImage()); } void TearDown() { } Image *createImage() { cl_image_desc imageDesc = Image3dDefaults::imageDesc; imageDesc.num_mip_levels = 1; return ImageHelper::create(&context, &imageDesc); } Buffer *createBuffer() { return BufferHelper<>::create(&context); } template std::unique_ptr createQueue() { return std::unique_ptr(new CommandQueueHwMock(*this)); } MockContext context; std::unique_ptr image; size_t origin[4] = {0, 0, 0, 0xdeadbeef}; size_t region[4] = {0, 0, 0, 0}; void *cpuPtr = Image3dDefaults::hostPtr; BuiltinOpParams usedBuiltinOpsParams; bool builtinOpsParamsCaptured; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/fixtures/platform_fixture.cpp000066400000000000000000000020561422164147700276010ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "gtest/gtest.h" namespace NEO { void PlatformFixture::SetUp() { pPlatform = constructPlatform(); ASSERT_EQ(0u, pPlatform->getNumDevices()); // setup platform / context bool isInitialized = initPlatform(); ASSERT_EQ(true, isInitialized); num_devices = static_cast(pPlatform->getNumDevices()); ASSERT_GT(num_devices, 0u); auto allDev = pPlatform->getClDevices(); ASSERT_NE(nullptr, allDev); devices = new cl_device_id[num_devices]; for (cl_uint deviceOrdinal = 0; deviceOrdinal < num_devices; ++deviceOrdinal) { auto device = allDev[deviceOrdinal]; ASSERT_NE(nullptr, device); devices[deviceOrdinal] = device; } } void PlatformFixture::TearDown() { platformsImpl->clear(); delete[] devices; } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/fixtures/platform_fixture.h000066400000000000000000000006311422164147700272430ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/api/cl_types.h" #include "opencl/source/platform/platform.h" namespace NEO { class PlatformFixture { protected: void SetUp(); void TearDown(); Platform *pPlatform = nullptr; cl_uint num_devices = 0u; cl_device_id *devices = nullptr; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/fixtures/program_fixture.cpp000066400000000000000000000046061422164147700274270ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/program_fixture.h" #include "opencl/source/program/create.inl" #include "opencl/test/unit_test/mocks/mock_program.h" namespace NEO { void ProgramFixture::CreateProgramWithSource(Context *pContext, const std::string &sourceFileName) { Cleanup(); cl_int retVal = CL_SUCCESS; std::string testFile; testFile.append(clFiles); testFile.append(sourceFileName); ASSERT_EQ(true, fileExists(testFile)); knownSource = loadDataFromFile( testFile.c_str(), knownSourceSize); ASSERT_NE(0u, knownSourceSize); ASSERT_NE(nullptr, knownSource); const char *sources[1] = {knownSource.get()}; pProgram = Program::create( pContext, 1, sources, &knownSourceSize, retVal); ASSERT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); } void ProgramFixture::CreateProgramFromBinary(Context *pContext, const ClDeviceVector &deviceVector, const std::string &binaryFileName, cl_int &retVal, const std::string &options) { retVal = CL_SUCCESS; std::string testFile; retrieveBinaryKernelFilename(testFile, binaryFileName + "_", ".bin", options); knownSource = loadDataFromFile( testFile.c_str(), knownSourceSize); ASSERT_NE(0u, knownSourceSize); ASSERT_NE(nullptr, knownSource); pProgram = Program::create( pContext, deviceVector, &knownSourceSize, (const unsigned char **)&knownSource, nullptr, retVal); } void ProgramFixture::CreateProgramFromBinary(Context *pContext, const ClDeviceVector &deviceVector, const std::string &binaryFileName, const std::string &options) { Cleanup(); cl_int retVal = CL_SUCCESS; CreateProgramFromBinary( pContext, deviceVector, binaryFileName, retVal, options); ASSERT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/fixtures/program_fixture.h000066400000000000000000000026251422164147700270730ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/file_io.h" #include "shared/test/common/helpers/test_files.h" #include "opencl/source/program/program.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "gtest/gtest.h" namespace NEO { class ProgramFixture { public: void CreateProgramFromBinary(Context *pContext, const ClDeviceVector &deviceVector, const std::string &binaryFileName, cl_int &retVal, const std::string &options = ""); void CreateProgramFromBinary(Context *pContext, const ClDeviceVector &deviceVector, const std::string &binaryFileName, const std::string &options = ""); void CreateProgramWithSource(Context *pContext, const std::string &sourceFileName); protected: virtual void SetUp() { } virtual void TearDown() { Cleanup(); } void Cleanup() { if (pProgram != nullptr) { pProgram->release(); } knownSource.reset(); } MockProgram *pProgram = nullptr; std::unique_ptr knownSource; size_t knownSourceSize = 0u; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/fixtures/run_kernel_fixture.h000066400000000000000000000024171422164147700275670ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/common/helpers/test_files.h" #include "shared/test/common/libult/global_environment.h" namespace NEO { struct CommandQueueHwFixture; struct CommandStreamFixture; // helper functions to enforce MockCompiler input files inline void overwriteBuiltInBinaryName( const std::string &filename, bool appendOptionsToFileName = false) { // set mock compiler to return expected kernel... MockCompilerDebugVars fclDebugVars; MockCompilerDebugVars igcDebugVars; retrieveBinaryKernelFilename(fclDebugVars.fileName, filename + "_", ".bc"); fclDebugVars.appendOptionsToFileName = appendOptionsToFileName; retrieveBinaryKernelFilename(igcDebugVars.fileName, filename + "_", ".gen"); igcDebugVars.appendOptionsToFileName = appendOptionsToFileName; gEnvironment->fclPushDebugVars(fclDebugVars); gEnvironment->igcPushDebugVars(igcDebugVars); } inline void restoreBuiltInBinaryName() { gEnvironment->igcPopDebugVars(); gEnvironment->fclPopDebugVars(); } struct RunKernelFixtureFactory { typedef NEO::CommandStreamFixture CommandStreamFixture; typedef NEO::CommandQueueHwFixture CommandQueueFixture; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/fixtures/scenario_test_fixture.h000066400000000000000000000033041422164147700302610ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "gtest/gtest.h" using namespace NEO; class ScenarioTest : public ::testing::Test, public PlatformFixture { using PlatformFixture::SetUp; protected: void SetUp() override { DebugManager.flags.EnableTimestampPacket.set(false); PlatformFixture::SetUp(); auto pDevice = pPlatform->getClDevice(0); ASSERT_NE(nullptr, pDevice); cl_device_id clDevice = pDevice; context = Context::create(nullptr, ClDeviceVector(&clDevice, 1), nullptr, nullptr, retVal); commandQueue = new MockCommandQueue(context, pDevice, 0, false); program = new MockProgram(context, false, toClDeviceVector(*pDevice)); kernelInternals = new MockKernelWithInternals(*pDevice, context); kernel = kernelInternals->mockKernel; ASSERT_NE(nullptr, kernel); } void TearDown() override { delete kernelInternals; delete commandQueue; context->release(); program->release(); PlatformFixture::TearDown(); } cl_int retVal; DebugManagerStateRestore dbgRestorer; MockCommandQueue *commandQueue = nullptr; MockContext *context = nullptr; MockKernelWithInternals *kernelInternals = nullptr; MockKernel *kernel = nullptr; MockProgram *program = nullptr; }; compute-runtime-22.14.22890/opencl/test/unit_test/fixtures/simple_arg_fixture.h000066400000000000000000000012751422164147700275460ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/test/common/fixtures/memory_management_fixture.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/command_stream/command_stream_fixture.h" #include "opencl/test/unit_test/fixtures/simple_arg_kernel_fixture.h" namespace NEO { struct SimpleArgFixtureFactory { typedef NEO::CommandStreamFixture CommandStreamFixture; typedef NEO::CommandQueueHwFixture CommandQueueFixture; typedef NEO::SimpleArgKernelFixture KernelFixture; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/fixtures/simple_arg_kernel_fixture.h000066400000000000000000000250541422164147700311070ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/device/device.h" #include "shared/source/helpers/array_count.h" #include "shared/source/helpers/file_io.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/program/program.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/program_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "CL/cl.h" #include "compiler_options.h" #include "gtest/gtest.h" #include namespace NEO { class Kernel; class Program; template inline const char *type_name(T &) { return "unknown"; } template <> inline const char *type_name(char &) { return "char"; } template <> inline const char *type_name(int &) { return "int"; } template <> inline const char *type_name(float &) { return "float"; } template <> inline const char *type_name(short &) { return "short"; } template <> inline const char *type_name(unsigned char &) { return "unsigned char"; } template <> inline const char *type_name(unsigned int &) { return "unsigned int"; } template <> inline const char *type_name(unsigned short &) { return "unsigned short"; } class SimpleArgKernelFixture : public ProgramFixture { public: using ProgramFixture::SetUp; protected: virtual void SetUp(ClDevice *pDevice) { ProgramFixture::SetUp(); std::string testFile; int forTheName = 0; testFile.append("simple_arg_"); testFile.append(type_name(forTheName)); auto pos = testFile.find(" "); if (pos != (size_t)-1) { testFile.replace(pos, 1, "_"); } auto deviceVector = toClDeviceVector(*pDevice); pContext = Context::create(nullptr, deviceVector, nullptr, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, pContext); CreateProgramFromBinary( pContext, deviceVector, testFile); ASSERT_NE(nullptr, pProgram); retVal = pProgram->build( pProgram->getDevices(), nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); // create a kernel pKernel = Kernel::create( pProgram, pProgram->getKernelInfoForKernel("SimpleArg"), *pDevice, &retVal); ASSERT_NE(nullptr, pKernel); ASSERT_EQ(CL_SUCCESS, retVal); } void TearDown() override { if (pKernel) { delete pKernel; pKernel = nullptr; } pContext->release(); ProgramFixture::TearDown(); } cl_int retVal = CL_SUCCESS; Kernel *pKernel = nullptr; MockContext *pContext = nullptr; }; class SimpleArgNonUniformKernelFixture : public ProgramFixture { public: using ProgramFixture::SetUp; protected: void SetUp(ClDevice *device, Context *context) { ProgramFixture::SetUp(); CreateProgramFromBinary( context, context->getDevices(), "simple_nonuniform", "-cl-std=CL2.0"); ASSERT_NE(nullptr, pProgram); retVal = pProgram->build( pProgram->getDevices(), "-cl-std=CL2.0", false); ASSERT_EQ(CL_SUCCESS, retVal); kernel = Kernel::create( pProgram, pProgram->getKernelInfoForKernel("simpleNonUniform"), *device, &retVal); ASSERT_NE(nullptr, kernel); ASSERT_EQ(CL_SUCCESS, retVal); } void TearDown() override { if (kernel) { delete kernel; kernel = nullptr; } ProgramFixture::TearDown(); } cl_int retVal = CL_SUCCESS; Kernel *kernel = nullptr; }; class SimpleKernelFixture : public ProgramFixture { public: using ProgramFixture::SetUp; protected: void SetUp(ClDevice *device, Context *context) { ProgramFixture::SetUp(); std::string programName("simple_kernels"); CreateProgramFromBinary( context, toClDeviceVector(*device), programName); ASSERT_NE(nullptr, pProgram); retVal = pProgram->build( pProgram->getDevices(), nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); for (size_t i = 0; i < maxKernelsCount; i++) { if ((1 << i) & kernelIds) { std::string kernelName("simple_kernel_"); kernelName.append(std::to_string(i)); kernels[i].reset(Kernel::create( pProgram, pProgram->getKernelInfoForKernel(kernelName.c_str()), *device, &retVal)); ASSERT_NE(nullptr, kernels[i]); ASSERT_EQ(CL_SUCCESS, retVal); } } } void TearDown() override { for (size_t i = 0; i < maxKernelsCount; i++) { if (kernels[i]) { kernels[i].reset(nullptr); } } ProgramFixture::TearDown(); } uint32_t kernelIds = 0; static constexpr size_t maxKernelsCount = std::numeric_limits::digits; cl_int retVal = CL_SUCCESS; std::array, maxKernelsCount> kernels; }; class SimpleKernelStatelessFixture : public ProgramFixture { public: DebugManagerStateRestore restorer; using ProgramFixture::SetUp; protected: void SetUp(ClDevice *device, Context *context) { ProgramFixture::SetUp(); DebugManager.flags.DisableStatelessToStatefulOptimization.set(true); DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.set(false); CreateProgramFromBinary( context, toClDeviceVector(*device), "stateless_kernel"); ASSERT_NE(nullptr, pProgram); retVal = pProgram->build( pProgram->getDevices(), CompilerOptions::greaterThan4gbBuffersRequired.data(), false); ASSERT_EQ(CL_SUCCESS, retVal); kernel.reset(Kernel::create( pProgram, pProgram->getKernelInfoForKernel("statelessKernel"), *device, &retVal)); ASSERT_NE(nullptr, kernel); ASSERT_EQ(CL_SUCCESS, retVal); } void TearDown() override { ProgramFixture::TearDown(); } std::unique_ptr kernel = nullptr; cl_int retVal = CL_SUCCESS; }; class StatelessCopyKernelFixture : public ProgramFixture { public: DebugManagerStateRestore restorer; using ProgramFixture::SetUp; protected: void SetUp(ClDevice *device, Context *context) { ProgramFixture::SetUp(); DebugManager.flags.DisableStatelessToStatefulOptimization.set(true); DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.set(false); CreateProgramFromBinary( context, toClDeviceVector(*device), "stateless_copy_buffer"); ASSERT_NE(nullptr, pProgram); retVal = pProgram->build( pProgram->getDevices(), CompilerOptions::greaterThan4gbBuffersRequired.data(), false); ASSERT_EQ(CL_SUCCESS, retVal); multiDeviceKernel.reset(MultiDeviceKernel::create( pProgram, pProgram->getKernelInfosForKernel("StatelessCopyBuffer"), &retVal)); kernel = static_cast(multiDeviceKernel->getKernel(device->getRootDeviceIndex())); ASSERT_NE(nullptr, kernel); ASSERT_EQ(CL_SUCCESS, retVal); } void TearDown() override { ProgramFixture::TearDown(); } std::unique_ptr multiDeviceKernel = nullptr; MockKernel *kernel = nullptr; cl_int retVal = CL_SUCCESS; }; class StatelessKernelWithIndirectAccessFixture : public ProgramFixture { public: DebugManagerStateRestore restorer; using ProgramFixture::SetUp; protected: void SetUp(ClDevice *device, Context *context) { ProgramFixture::SetUp(); DebugManager.flags.DisableStatelessToStatefulOptimization.set(true); DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.set(false); CreateProgramFromBinary( context, toClDeviceVector(*device), "indirect_access_kernel"); ASSERT_NE(nullptr, pProgram); retVal = pProgram->build( pProgram->getDevices(), CompilerOptions::greaterThan4gbBuffersRequired.data(), false); ASSERT_EQ(CL_SUCCESS, retVal); multiDeviceKernel.reset(MultiDeviceKernel::create( pProgram, pProgram->getKernelInfosForKernel("testIndirect"), &retVal)); ASSERT_NE(nullptr, multiDeviceKernel); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(multiDeviceKernel->getKernel(device->getRootDeviceIndex())->getKernelInfo().hasIndirectStatelessAccess); } void TearDown() override { ProgramFixture::TearDown(); } std::unique_ptr multiDeviceKernel = nullptr; cl_int retVal = CL_SUCCESS; }; class BindlessKernelFixture : public ProgramFixture { public: using ProgramFixture::SetUp; void SetUp(ClDevice *device, Context *context) { ProgramFixture::SetUp(); this->deviceCl = device; this->contextCl = context; } void TearDown() override { ProgramFixture::TearDown(); } void createKernel(const std::string &programName, const std::string &kernelName) { DebugManager.flags.UseBindlessMode.set(1); CreateProgramFromBinary( contextCl, contextCl->getDevices(), programName); ASSERT_NE(nullptr, pProgram); retVal = pProgram->build( pProgram->getDevices(), nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); kernel.reset(Kernel::create( pProgram, pProgram->getKernelInfoForKernel(kernelName.c_str()), *deviceCl, &retVal)); ASSERT_NE(nullptr, kernel); ASSERT_EQ(CL_SUCCESS, retVal); } DebugManagerStateRestore restorer; std::unique_ptr kernel = nullptr; cl_int retVal = CL_SUCCESS; ClDevice *deviceCl = nullptr; Context *contextCl = nullptr; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/fixtures/two_walker_fixture.h000066400000000000000000000034351422164147700276020ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/helpers/cl_hw_parse.h" #include "hello_world_fixture.h" namespace NEO { // Generates two back-to-back walkers using the same kernel for testing purposes template struct TwoWalkerTest : public HelloWorldTest, public ClHardwareParse { typedef HelloWorldTest Parent; using Parent::pCmdBuffer; using Parent::pCmdQ; using Parent::pCS; using Parent::pKernel; template void enqueueTwoKernels() { auto retVal = EnqueueKernelHelper<>::enqueueKernel( pCmdQ, pKernel); ASSERT_EQ(CL_SUCCESS, retVal); // We have to parse after each enqueue* because // the CSR CS may insert commands in between parseCommands(*pCmdQ); retVal = EnqueueKernelHelper<>::enqueueKernel( pCmdQ, pKernel); ASSERT_EQ(CL_SUCCESS, retVal); parseCommands(*pCmdQ); itorWalker1 = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itorWalker1); itorWalker2 = itorWalker1; ++itorWalker2; itorWalker2 = find(itorWalker2, cmdList.end()); ASSERT_NE(cmdList.end(), itorWalker2); } void SetUp() override { Parent::SetUp(); ClHardwareParse::SetUp(); } void TearDown() override { ClHardwareParse::TearDown(); Parent::TearDown(); } GenCmdList::iterator itorWalker1; GenCmdList::iterator itorWalker2; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h000066400000000000000000000152261422164147700331660ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/cache_policy.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/helpers/cl_hw_parse.h" namespace NEO { struct UltCommandStreamReceiverTest : public ClDeviceFixture, public ClHardwareParse, ::testing::Test { void SetUp() override { ClDeviceFixture::SetUp(); ClHardwareParse::SetUp(); size_t sizeStream = 512; size_t alignmentStream = 0x1000; cmdBuffer = alignedMalloc(sizeStream, alignmentStream); dshBuffer = alignedMalloc(sizeStream, alignmentStream); iohBuffer = alignedMalloc(sizeStream, alignmentStream); sshBuffer = alignedMalloc(sizeStream, alignmentStream); ASSERT_NE(nullptr, cmdBuffer); ASSERT_NE(nullptr, dshBuffer); ASSERT_NE(nullptr, iohBuffer); ASSERT_NE(nullptr, sshBuffer); commandStream.replaceBuffer(cmdBuffer, sizeStream); auto graphicsAllocation = new MockGraphicsAllocation(cmdBuffer, sizeStream); commandStream.replaceGraphicsAllocation(graphicsAllocation); dsh.replaceBuffer(dshBuffer, sizeStream); graphicsAllocation = new MockGraphicsAllocation(dshBuffer, sizeStream); dsh.replaceGraphicsAllocation(graphicsAllocation); ioh.replaceBuffer(iohBuffer, sizeStream); graphicsAllocation = new MockGraphicsAllocation(iohBuffer, sizeStream); ioh.replaceGraphicsAllocation(graphicsAllocation); ssh.replaceBuffer(sshBuffer, sizeStream); graphicsAllocation = new MockGraphicsAllocation(sshBuffer, sizeStream); ssh.replaceGraphicsAllocation(graphicsAllocation); flushTaskFlags.threadArbitrationPolicy = NEO::HwHelper::get(hardwareInfo.platform.eRenderCoreFamily).getDefaultThreadArbitrationPolicy(); pDevice->getGpgpuCommandStreamReceiver().setupContext(*pDevice->getDefaultEngine().osContext); } void TearDown() override { pDevice->getGpgpuCommandStreamReceiver().flushBatchedSubmissions(); delete dsh.getGraphicsAllocation(); delete ioh.getGraphicsAllocation(); delete ssh.getGraphicsAllocation(); delete commandStream.getGraphicsAllocation(); alignedFree(sshBuffer); alignedFree(iohBuffer); alignedFree(dshBuffer); alignedFree(cmdBuffer); ClHardwareParse::TearDown(); ClDeviceFixture::TearDown(); } template CompletionStamp flushTask(CommandStreamReceiverType &commandStreamReceiver, bool block = false, size_t startOffset = 0, bool requiresCoherency = false, bool lowPriority = false) { flushTaskFlags.blocking = block; flushTaskFlags.requiresCoherency = requiresCoherency; flushTaskFlags.lowPriority = lowPriority; flushTaskFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); return commandStreamReceiver.flushTask( commandStream, startOffset, &dsh, &ioh, &ssh, taskLevel, flushTaskFlags, *pDevice); } template void flushSmallTask(CommandStreamReceiverType &commandStreamReceiver, size_t startOffset = 0) { return commandStreamReceiver.flushSmallTask( commandStream, startOffset); } template void configureCSRHeapStatesToNonDirty() { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.dshState.updateAndCheck(&dsh); commandStreamReceiver.iohState.updateAndCheck(&ioh); commandStreamReceiver.sshState.updateAndCheck(&ssh); } template void configureCSRtoNonDirtyState(bool isL1CacheEnabled) { bool slmUsed = false; if (DebugManager.flags.ForceSLML3Config.get()) { slmUsed = true; } uint32_t L3Config = PreambleHelper::getL3Config(*defaultHwInfo, slmUsed); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = true; commandStreamReceiver.isEnginePrologueSent = true; commandStreamReceiver.lastPreemptionMode = pDevice->getPreemptionMode(); commandStreamReceiver.setMediaVFEStateDirty(false); auto gmmHelper = pDevice->getGmmHelper(); auto &hwHelper = HwHelper::get(defaultHwInfo->platform.eDisplayCoreFamily); auto mocsIndex = hwHelper.getMocsIndex(*gmmHelper, true, isL1CacheEnabled); commandStreamReceiver.latestSentStatelessMocsConfig = mocsIndex; commandStreamReceiver.lastSentL3Config = L3Config; configureCSRHeapStatesToNonDirty(); commandStreamReceiver.taskLevel = taskLevel; commandStreamReceiver.lastMediaSamplerConfig = 0; commandStreamReceiver.lastSentUseGlobalAtomics = false; commandStreamReceiver.streamProperties.stateComputeMode.setProperties(0, GrfConfig::DefaultGrfNumber, hwHelper.getDefaultThreadArbitrationPolicy(), *defaultHwInfo); } template UltCommandStreamReceiver &getUltCommandStreamReceiver() { return reinterpret_cast &>(pDevice->getGpgpuCommandStreamReceiver()); } DispatchFlags flushTaskFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); uint32_t taskLevel = 42; LinearStream commandStream; IndirectHeap dsh = {nullptr}; IndirectHeap ioh = {nullptr}; IndirectHeap ssh = {nullptr}; void *cmdBuffer = nullptr; void *dshBuffer = nullptr; void *iohBuffer = nullptr; void *sshBuffer = nullptr; uint32_t latestSentDcFlushTaskCount; uint32_t latestSentNonDcFlushTaskCount; uint32_t dcFlushRequiredTaskCount; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/gen11/000077500000000000000000000000001422164147700225425ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen11/CMakeLists.txt000066400000000000000000000022171422164147700253040ustar00rootroot00000000000000# # Copyright (C) 2019-2022 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_GEN11) set(IGDRCL_SRCS_tests_gen11 ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/coherency_tests_gen11.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_tests_gen11.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_kernel_gen11.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_media_kernel_gen11.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hw_helper_tests_gen11.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_tests_gen11.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_tests_gen11.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler_tests_gen11.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tbx_command_stream_receiver_tests_gen11.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_gen11.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_platform_caps_gen11.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_sample_gen11.cpp ) get_property(NEO_CORE_TESTS_GEN11 GLOBAL PROPERTY NEO_CORE_TESTS_GEN11) list(APPEND IGDRCL_SRCS_tests_gen11 ${NEO_CORE_TESTS_GEN11}) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen11}) add_subdirectories() endif() compute-runtime-22.14.22890/opencl/test/unit_test/gen11/coherency_tests_gen11.cpp000066400000000000000000000141311422164147700274420ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/gen11/reg_configs.h" #include "shared/source/helpers/hw_helper.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/helpers/dispatch_flags_helper.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/test_macros/test.h" using namespace NEO; struct Gen11CoherencyRequirements : public ::testing::Test { typedef typename ICLFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; struct myCsr : public CommandStreamReceiverHw { using CommandStreamReceiver::commandStream; using CommandStreamReceiver::streamProperties; myCsr(ExecutionEnvironment &executionEnvironment) : CommandStreamReceiverHw(executionEnvironment, 0, 1){}; CsrSizeRequestFlags *getCsrRequestFlags() { return &csrSizeRequestFlags; } }; void overrideCoherencyRequest(bool requestChanged, bool requireCoherency) { csr->streamProperties.stateComputeMode.isCoherencyRequired.isDirty = requestChanged; csr->streamProperties.stateComputeMode.isCoherencyRequired.value = requireCoherency; flags.requiresCoherency = requireCoherency; } void SetUp() override { device.reset(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); csr = new myCsr(*device->executionEnvironment); device->resetCommandStreamReceiver(csr); } myCsr *csr = nullptr; std::unique_ptr device; DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags(); }; GEN11TEST_F(Gen11CoherencyRequirements, GivenSettingsWhenCoherencyRequestedThenProgrammingIsCorrect) { overrideCoherencyRequest(false, false); EXPECT_FALSE(csr->streamProperties.stateComputeMode.isDirty()); overrideCoherencyRequest(false, true); EXPECT_FALSE(csr->streamProperties.stateComputeMode.isDirty()); overrideCoherencyRequest(true, true); EXPECT_TRUE(csr->streamProperties.stateComputeMode.isDirty()); overrideCoherencyRequest(true, false); EXPECT_TRUE(csr->streamProperties.stateComputeMode.isDirty()); } GEN11TEST_F(Gen11CoherencyRequirements, GivenSettingsWhenCoherencyRequestedThenHdcModeCmdValuesAreCorrect) { auto lriSize = sizeof(MI_LOAD_REGISTER_IMM); char buff[MemoryConstants::pageSize]; LinearStream stream(buff, MemoryConstants::pageSize); auto expectedCmd = FamilyType::cmdInitLoadRegisterImm; expectedCmd.setRegisterOffset(gen11HdcModeRegister::address); expectedCmd.setDataDword(DwordBuilder::build(gen11HdcModeRegister::forceNonCoherentEnableBit, true)); overrideCoherencyRequest(true, false); csr->programComputeMode(stream, flags, *defaultHwInfo); EXPECT_EQ(lriSize, stream.getUsed()); auto cmd = reinterpret_cast(stream.getCpuBase()); EXPECT_TRUE(memcmp(&expectedCmd, cmd, lriSize) == 0); overrideCoherencyRequest(true, true); csr->programComputeMode(stream, flags, *defaultHwInfo); EXPECT_EQ(lriSize * 2, stream.getUsed()); cmd = reinterpret_cast(ptrOffset(stream.getCpuBase(), lriSize)); expectedCmd.setDataDword(DwordBuilder::build(gen11HdcModeRegister::forceNonCoherentEnableBit, true, false)); EXPECT_TRUE(memcmp(&expectedCmd, cmd, lriSize) == 0); } struct Gen11CoherencyProgramingTest : public Gen11CoherencyRequirements { void SetUp() override { Gen11CoherencyRequirements::SetUp(); startOffset = csr->commandStream.getUsed(); } void flushTask(bool coherencyRequired) { flags.requiresCoherency = coherencyRequired; auto graphicAlloc = csr->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); IndirectHeap stream(graphicAlloc); startOffset = csr->commandStream.getUsed(); csr->flushTask(stream, 0, &stream, &stream, &stream, 0, flags, *device); csr->getMemoryManager()->freeGraphicsMemory(graphicAlloc); }; void findMmio(bool expectToBeProgrammed, uint32_t registerAddress) { HardwareParse hwParser; hwParser.parseCommands(csr->commandStream, startOffset); bool foundOne = false; for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) { auto cmd = genCmdCast(*it); if (cmd && cmd->getRegisterOffset() == registerAddress) { EXPECT_FALSE(foundOne); foundOne = true; } } EXPECT_EQ(expectToBeProgrammed, foundOne); }; void findMmio(bool expectToBeProgrammed) { findMmio(expectToBeProgrammed, gen11HdcModeRegister::address); } size_t startOffset; }; GEN11TEST_F(Gen11CoherencyProgramingTest, givenCsrWhenFlushFirstTaskWithoutCoherencyRequiredThenProgramMmio) { flushTask(false); findMmio(true); } GEN11TEST_F(Gen11CoherencyProgramingTest, givenCsrWhenFlushFirstTaskWithCoherencyRequiredThenProgramMmio) { flushTask(true); findMmio(true); } GEN11TEST_F(Gen11CoherencyProgramingTest, givenCsrWithFlushedFirstTaskWithCoherencyRequiredWhenFlushNextTaskWithoutChangingCoherencyRequirementThenDoNotProgramMmio) { flushTask(true); flushTask(true); findMmio(false); } GEN11TEST_F(Gen11CoherencyProgramingTest, givenCsrWithFlushedFirstTaskWithoutCoherencyRequiredWhenFlushNextTaskWithoutChangingCoherencyRequirementThenDoNotProgramMmio) { flushTask(false); flushTask(false); findMmio(false); } GEN11TEST_F(Gen11CoherencyProgramingTest, givenCsrWithFlushedFirstTaskWithCoherencyRequiredWhenFlushNextTaskWithChangingCoherencyRequirementThenProgramMmio) { flushTask(true); flushTask(false); findMmio(true); } GEN11TEST_F(Gen11CoherencyProgramingTest, givenCsrWithFlushedFirstTaskWithoutCoherencyRequiredWhenFlushNextTaskWithChangingCoherencyRequirementThenProgramMmio) { flushTask(false); flushTask(true); findMmio(true); } compute-runtime-22.14.22890/opencl/test/unit_test/gen11/command_stream_receiver_hw_tests_gen11.cpp000066400000000000000000000042201422164147700330340ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "gtest/gtest.h" #include "reg_configs_common.h" using namespace NEO; #include "opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests.inl" using CommandStreamReceiverHwTestGen11 = CommandStreamReceiverHwTest; GEN11TEST_F(CommandStreamReceiverHwTestGen11, GivenKernelWithSlmWhenPreviousNOSLML3WasSentThenProgramL3WithSLML3Config) { givenKernelWithSlmWhenPreviousNOSLML3WasSentThenProgramL3WithSLML3ConfigImpl(); } GEN11TEST_F(CommandStreamReceiverHwTestGen11, GivenBlockedKernelWithSlmWhenPreviousNOSLML3WasSentThenProgramL3WithSLML3ConfigAfterUnblocking) { givenBlockedKernelWithSlmWhenPreviousNOSLML3WasSentThenProgramL3WithSLML3ConfigAfterUnblockingImpl(); } GEN11TEST_F(CommandStreamReceiverHwTestGen11, whenProgrammingMiSemaphoreWaitThenSetRegisterPollModeMemoryPoll) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; MI_SEMAPHORE_WAIT miSemaphoreWait = FamilyType::cmdInitMiSemaphoreWait; EXPECT_EQ(MI_SEMAPHORE_WAIT::REGISTER_POLL_MODE::REGISTER_POLL_MODE_MEMORY_POLL, miSemaphoreWait.getRegisterPollMode()); } GEN11TEST_F(CommandStreamReceiverHwTestGen11, givenCommandStreamReceiverWhenGetClearColorAllocationIsCalledThenNothingHappens) { MockCsrHw commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); commandStreamReceiver.getClearColorAllocation(); EXPECT_EQ(nullptr, commandStreamReceiver.clearColorAllocation); } compute-runtime-22.14.22890/opencl/test/unit_test/gen11/ehl/000077500000000000000000000000001422164147700233125ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen11/ehl/CMakeLists.txt000066400000000000000000000014321422164147700260520ustar00rootroot00000000000000# # Copyright (C) 2019-2022 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_EHL) set(IGDRCL_SRCS_tests_gen11_ehl_excludes ${CMAKE_CURRENT_SOURCE_DIR}/excludes_ocl_ehl.cpp ) set_property(GLOBAL APPEND PROPERTY IGDRCL_SRCS_tests_excludes ${IGDRCL_SRCS_tests_gen11_ehl_excludes}) set(IGDRCL_SRCS_tests_gen11_ehl ${IGDRCL_SRCS_tests_gen11_ehl_excludes} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_ehl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_hw_info_config_ehl.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen11_ehl}) add_subdirectories() neo_copy_test_files_with_revision(copy_test_files_ehl_0 ehl 0) add_dependencies(copy_test_files_per_product copy_test_files_ehl_0) endif() compute-runtime-22.14.22890/opencl/test/unit_test/gen11/ehl/excludes_ocl_ehl.cpp000066400000000000000000000004511422164147700273170ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" HWTEST_EXCLUDE_PRODUCT(DeviceFactoryTest, givenInvalidHwConfigStringWhenPrepareDeviceEnvironmentsForProductFamilyOverrideThenThrowsException, IGFX_ELKHARTLAKE); compute-runtime-22.14.22890/opencl/test/unit_test/gen11/ehl/linux/000077500000000000000000000000001422164147700244515ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen11/ehl/linux/CMakeLists.txt000066400000000000000000000005331422164147700272120ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen11_ehl_linux ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_tests_ehl.cpp ) if(UNIX) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen11_ehl_linux}) add_subdirectory(dll) endif() compute-runtime-22.14.22890/opencl/test/unit_test/gen11/ehl/linux/dll/000077500000000000000000000000001422164147700252245ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen11/ehl/linux/dll/CMakeLists.txt000066400000000000000000000004751422164147700277720ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_linux_dll_tests_gen11_ehl ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/device_id_tests_ehl.cpp ) target_sources(igdrcl_linux_dll_tests PRIVATE ${IGDRCL_SRCS_linux_dll_tests_gen11_ehl}) compute-runtime-22.14.22890/opencl/test/unit_test/gen11/ehl/linux/dll/device_id_tests_ehl.cpp000066400000000000000000000020471422164147700317200ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/fixtures/linux/device_id_fixture.h" using namespace NEO; TEST_F(DeviceIdTests, GivenEhlSpportedDeviceIdWhenCheckingHwSetupThenItIsCorrect) { std::array expectedDescriptors = {{ {0x4500, &EHL_HW_CONFIG::hwInfo, &EHL_HW_CONFIG::setupHardwareInfo}, {0x4541, &EHL_HW_CONFIG::hwInfo, &EHL_HW_CONFIG::setupHardwareInfo}, {0x4551, &EHL_HW_CONFIG::hwInfo, &EHL_HW_CONFIG::setupHardwareInfo}, {0x4571, &EHL_HW_CONFIG::hwInfo, &EHL_HW_CONFIG::setupHardwareInfo}, {0x4555, &EHL_HW_CONFIG::hwInfo, &EHL_HW_CONFIG::setupHardwareInfo}, {0x4E51, &EHL_HW_CONFIG::hwInfo, &EHL_HW_CONFIG::setupHardwareInfo}, {0x4E61, &EHL_HW_CONFIG::hwInfo, &EHL_HW_CONFIG::setupHardwareInfo}, {0x4E71, &EHL_HW_CONFIG::hwInfo, &EHL_HW_CONFIG::setupHardwareInfo}, {0x4E55, &EHL_HW_CONFIG::hwInfo, &EHL_HW_CONFIG::setupHardwareInfo}, }}; testImpl(expectedDescriptors); } compute-runtime-22.14.22890/opencl/test/unit_test/gen11/ehl/linux/hw_info_config_tests_ehl.cpp000066400000000000000000000057671422164147700322240ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/os_interface/linux/hw_info_config_linux_tests.h" using namespace NEO; struct HwInfoConfigTestLinuxEhl : HwInfoConfigTestLinux { void SetUp() override { HwInfoConfigTestLinux::SetUp(); drm->storedDeviceID = IEHL_1x4x8_SUPERSKU_DEVICE_A0_ID; drm->storedSSVal = 8; } }; EHLTEST_F(HwInfoConfigTestLinuxEhl, GivenEhlThenHwInfoIsCorrect) { auto hwInfoConfig = HwInfoConfigHw::get(); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->storedDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->storedDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->storedEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->storedSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(1u, outHwInfo.gtSystemInfo.SliceCount); } EHLTEST_F(HwInfoConfigTestLinuxEhl, GivenInvalidDeviceIdWhenConfiguringHwInfoThenNegativeOneReturned) { auto hwInfoConfig = HwInfoConfigHw::get(); drm->storedRetValForDeviceID = -1; int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); drm->storedRetValForDeviceID = 0; drm->storedRetValForDeviceRevID = -1; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); drm->storedRetValForDeviceRevID = 0; drm->failRetTopology = true; drm->storedRetValForEUVal = -1; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); drm->storedRetValForEUVal = 0; drm->storedRetValForSSVal = -1; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); } template class EhlHwInfoTests : public ::testing::Test {}; TEST(EhlHwInfoTests, WhenGtIsSetupThenGtSystemInfoIsCorrect) { HardwareInfo hwInfo = *defaultHwInfo; auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); DrmMock drm(*executionEnvironment->rootDeviceEnvironments[0]); GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; DeviceDescriptor device = {0, &hwInfo, &EHL_HW_CONFIG::setupHardwareInfo}; int ret = drm.setupHardwareInfo(&device, false); EXPECT_EQ(ret, 0); EXPECT_GT(gtSystemInfo.EUCount, 0u); EXPECT_GT(gtSystemInfo.ThreadCount, 0u); EXPECT_GT(gtSystemInfo.SliceCount, 0u); EXPECT_GT(gtSystemInfo.SubSliceCount, 0u); EXPECT_GT(gtSystemInfo.DualSubSliceCount, 0u); EXPECT_GT_VAL(gtSystemInfo.L3CacheSizeInKb, 0u); EXPECT_EQ(gtSystemInfo.CsrSizeInMb, 8u); EXPECT_FALSE(gtSystemInfo.IsDynamicallyPopulated); } compute-runtime-22.14.22890/opencl/test/unit_test/gen11/ehl/test_device_caps_ehl.cpp000066400000000000000000000016671422164147700301640ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" using namespace NEO; using EhlTest = Test; EHLTEST_F(EhlTest, givenDeviceIdWhenAskingForSimulationThenReturnValidValue) { unsigned short ehlSimulationIds[2] = { IEHL_1x4x8_SUPERSKU_DEVICE_A0_ID, 0, // default, non-simulation }; for (auto id : ehlSimulationIds) { auto mockDevice = std::unique_ptr(createWithUsDeviceId(id)); EXPECT_NE(nullptr, mockDevice); if (id == 0) { EXPECT_FALSE(mockDevice->isSimulation()); } else { EXPECT_TRUE(mockDevice->isSimulation()); } } } EHLTEST_F(EhlTest, givenEhlWhenSlmSizeIsRequiredThenReturnCorrectValue) { EXPECT_EQ(64u, pDevice->getHardwareInfo().capabilityTable.slmSize); } compute-runtime-22.14.22890/opencl/test/unit_test/gen11/ehl/test_hw_info_config_ehl.cpp000066400000000000000000000057041422164147700306710ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/test_macros/test.h" using namespace NEO; using EhlHwInfo = ::testing::Test; EHLTEST_F(EhlHwInfo, givenHwInfoConfigStringThenAfterSetupResultingVmeIsDisabled) { HardwareInfo hwInfo = *defaultHwInfo; hardwareInfoSetup[productFamily](&hwInfo, false, 0x100040008); EXPECT_FALSE(hwInfo.capabilityTable.ftrSupportsVmeAvcTextureSampler); EXPECT_FALSE(hwInfo.capabilityTable.ftrSupportsVmeAvcPreemption); EXPECT_FALSE(hwInfo.capabilityTable.supportsVme); } EHLTEST_F(EhlHwInfo, givenBoolWhenCallEhlHardwareInfoSetupThenFeatureTableAndWorkaroundTableAreSetCorrect) { bool boolValue[]{ true, false}; HardwareInfo hwInfo = *defaultHwInfo; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; FeatureTable &featureTable = hwInfo.featureTable; WorkaroundTable &workaroundTable = hwInfo.workaroundTable; for (auto setParamBool : boolValue) { gtSystemInfo = {0}; featureTable = {}; workaroundTable = {}; hardwareInfoSetup[productFamily](&hwInfo, setParamBool, 0x100040008); EXPECT_EQ(setParamBool, featureTable.flags.ftrL3IACoherency); EXPECT_EQ(setParamBool, featureTable.flags.ftrPPGTT); EXPECT_EQ(setParamBool, featureTable.flags.ftrSVM); EXPECT_EQ(setParamBool, featureTable.flags.ftrIA32eGfxPTEs); EXPECT_EQ(setParamBool, featureTable.flags.ftrStandardMipTailFormat); EXPECT_EQ(setParamBool, featureTable.flags.ftrDisplayYTiling); EXPECT_EQ(setParamBool, featureTable.flags.ftrTranslationTable); EXPECT_EQ(setParamBool, featureTable.flags.ftrUserModeTranslationTable); EXPECT_EQ(setParamBool, featureTable.flags.ftrTileMappedResource); EXPECT_EQ(setParamBool, featureTable.flags.ftrEnableGuC); EXPECT_EQ(setParamBool, featureTable.flags.ftrFbc); EXPECT_EQ(setParamBool, featureTable.flags.ftrFbc2AddressTranslation); EXPECT_EQ(setParamBool, featureTable.flags.ftrFbcBlitterTracking); EXPECT_EQ(setParamBool, featureTable.flags.ftrFbcCpuTracking); EXPECT_EQ(setParamBool, featureTable.flags.ftrTileY); EXPECT_EQ(setParamBool, featureTable.flags.ftrAstcHdr2D); EXPECT_EQ(setParamBool, featureTable.flags.ftrAstcLdr2D); EXPECT_EQ(setParamBool, featureTable.flags.ftr3dMidBatchPreempt); EXPECT_EQ(setParamBool, featureTable.flags.ftrGpGpuMidBatchPreempt); EXPECT_EQ(setParamBool, featureTable.flags.ftrGpGpuMidThreadLevelPreempt); EXPECT_EQ(setParamBool, featureTable.flags.ftrGpGpuThreadGroupLevelPreempt); EXPECT_EQ(setParamBool, featureTable.flags.ftrPerCtxtPreemptionGranularityControl); EXPECT_EQ(setParamBool, workaroundTable.flags.wa4kAlignUVOffsetNV12LinearSurface); EXPECT_EQ(setParamBool, workaroundTable.flags.waReportPerfCountUseGlobalContextID); } } compute-runtime-22.14.22890/opencl/test/unit_test/gen11/enqueue_kernel_gen11.cpp000066400000000000000000000047371422164147700272630ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/helpers/cl_hw_parse.h" #include "opencl/test/unit_test/helpers/static_size3.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "reg_configs_common.h" namespace NEO { using Gen11EnqueueTest = Test; GEN11TEST_F(Gen11EnqueueTest, givenKernelRequiringIndependentForwardProgressWhenKernelIsSubmittedThenDefaultPolicyIsProgrammed) { MockContext mc; CommandQueueHw cmdQ{&mc, pClDevice, 0, false}; SPatchExecutionEnvironment sPatchExecEnv = {}; sPatchExecEnv.SubgroupIndependentForwardProgressRequired = true; MockKernelWithInternals mockKernel(*pClDevice, sPatchExecEnv); cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, StatickSize3<1, 1, 1>(), nullptr, 0, nullptr, nullptr); ClHardwareParse hwParser; hwParser.parseCommands(cmdQ); auto cmd = findMmioCmd(hwParser.cmdList.begin(), hwParser.cmdList.end(), RowChickenReg4::address); ASSERT_NE(nullptr, cmd); EXPECT_EQ(RowChickenReg4::regDataForArbitrationPolicy[HwHelperHw::get().getDefaultThreadArbitrationPolicy()], cmd->getDataDword()); EXPECT_EQ(1U, countMmio(hwParser.cmdList.begin(), hwParser.cmdList.end(), RowChickenReg4::address)); } GEN11TEST_F(Gen11EnqueueTest, givenKernelNotRequiringIndependentForwardProgressWhenKernelIsSubmittedThenAgeBasedPolicyIsProgrammed) { MockContext mc; CommandQueueHw cmdQ{&mc, pClDevice, 0, false}; SPatchExecutionEnvironment sPatchExecEnv = {}; sPatchExecEnv.SubgroupIndependentForwardProgressRequired = false; MockKernelWithInternals mockKernel(*pClDevice, sPatchExecEnv); cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, StatickSize3<1, 1, 1>(), nullptr, 0, nullptr, nullptr); ClHardwareParse hwParser; hwParser.parseCommands(cmdQ); auto cmd = findMmioCmd(hwParser.cmdList.begin(), hwParser.cmdList.end(), RowChickenReg4::address); ASSERT_NE(nullptr, cmd); EXPECT_EQ(RowChickenReg4::regDataForArbitrationPolicy[ThreadArbitrationPolicy::AgeBased], cmd->getDataDword()); EXPECT_EQ(1U, countMmio(hwParser.cmdList.begin(), hwParser.cmdList.end(), RowChickenReg4::address)); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/gen11/enqueue_media_kernel_gen11.cpp000066400000000000000000000135041422164147700304120ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/pipeline_select_helper.h" #include "shared/source/helpers/preamble.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/media_kernel_fixture.h" using namespace NEO; typedef MediaKernelFixture MediaKernelTest; auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits | pipelineSelectMediaSamplerPowerClockGateMaskBits; GEN11TEST_F(MediaKernelTest, givenGen11CsrWhenEnqueueBlockedVmeKernelFirstTimeThenProgramPipelineSelectionAndMediaSampler) { typedef typename ICLFamily::PIPELINE_SELECT PIPELINE_SELECT; cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {1, 1, 1}; UserEvent userEvent(context); cl_event blockedEvent = &userEvent; auto retVal = pCmdQ->enqueueKernel( pVmeKernel, workDim, globalWorkOffset, globalWorkSize, nullptr, 1, &blockedEvent, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); userEvent.setStatus(CL_COMPLETE); parseCommands(*pCmdQ); ASSERT_NE(cmdPipelineSelect, nullptr); auto *pCmd = genCmdCast(cmdPipelineSelect); auto expectedPipelineSelection = PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU; EXPECT_EQ(expectedMask, pCmd->getMaskBits()); EXPECT_EQ(expectedPipelineSelection, pCmd->getPipelineSelection()); EXPECT_FALSE(pCmd->getMediaSamplerDopClockGateEnable()); } GEN11TEST_F(MediaKernelTest, givenGen11CsrWhenEnqueueBlockedNonVmeKernelFirstTimeThenProgramPipelineSelectionAndMediaSampler) { typedef typename ICLFamily::PIPELINE_SELECT PIPELINE_SELECT; cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {1, 1, 1}; UserEvent userEvent(context); cl_event blockedEvent = &userEvent; auto retVal = pCmdQ->enqueueKernel( pKernel, workDim, globalWorkOffset, globalWorkSize, nullptr, 1, &blockedEvent, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); userEvent.setStatus(CL_COMPLETE); parseCommands(*pCmdQ); ASSERT_NE(cmdPipelineSelect, nullptr); auto *pCmd = genCmdCast(cmdPipelineSelect); auto expectedPipelineSelection = PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU; EXPECT_EQ(expectedMask, pCmd->getMaskBits()); EXPECT_EQ(expectedPipelineSelection, pCmd->getPipelineSelection()); EXPECT_TRUE(pCmd->getMediaSamplerDopClockGateEnable()); } GEN11TEST_F(MediaKernelTest, givenGen11CsrWhenEnqueueVmeKernelFirstTimeThenProgramPipelineSelectionAndMediaSampler) { typedef typename ICLFamily::PIPELINE_SELECT PIPELINE_SELECT; enqueueVmeKernel(); auto numCommands = getCommandsList().size(); EXPECT_EQ(1u, numCommands); auto pCmd = getCommand(); auto expectedPipelineSelection = PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU; EXPECT_EQ(expectedMask, pCmd->getMaskBits()); EXPECT_EQ(expectedPipelineSelection, pCmd->getPipelineSelection()); EXPECT_FALSE(pCmd->getMediaSamplerDopClockGateEnable()); EXPECT_EQ(1u, pCmd->getMediaSamplerPowerClockGateDisable()); } GEN11TEST_F(MediaKernelTest, givenGen11CsrWhenEnqueueNonVmeKernelFirstTimeThenProgramPipelineSelectionAndMediaSampler) { typedef typename ICLFamily::PIPELINE_SELECT PIPELINE_SELECT; enqueueRegularKernel(); auto numCommands = getCommandsList().size(); EXPECT_EQ(1u, numCommands); auto pCmd = getCommand(); auto expectedPipelineSelection = PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU; EXPECT_EQ(expectedMask, pCmd->getMaskBits()); EXPECT_EQ(expectedPipelineSelection, pCmd->getPipelineSelection()); EXPECT_TRUE(pCmd->getMediaSamplerDopClockGateEnable()); EXPECT_EQ(0u, pCmd->getMediaSamplerPowerClockGateDisable()); } GEN11TEST_F(MediaKernelTest, givenGen11CsrWhenEnqueueVmeKernelTwiceThenProgramPipelineSelectOnce) { typedef typename ICLFamily::PIPELINE_SELECT PIPELINE_SELECT; enqueueVmeKernel(); auto numCommands = getCommandsList().size(); EXPECT_EQ(1u, numCommands); } GEN11TEST_F(MediaKernelTest, givenGen11CsrWhenEnqueueNonVmeKernelTwiceThenProgramPipelineSelectOnce) { typedef typename ICLFamily::PIPELINE_SELECT PIPELINE_SELECT; enqueueVmeKernel(); auto numCommands = getCommandsList().size(); EXPECT_EQ(1u, numCommands); } GEN11TEST_F(MediaKernelTest, givenGen11CsrWhenEnqueueVmeKernelAfterNonVmeKernelThenProgramPipelineSelectionAndMediaSamplerTwice) { typedef typename ICLFamily::PIPELINE_SELECT PIPELINE_SELECT; enqueueRegularKernel(); enqueueVmeKernel(); auto commands = getCommandsList(); EXPECT_EQ(2u, commands.size()); auto pCmd = static_cast(commands.back()); EXPECT_EQ(expectedMask, pCmd->getMaskBits()); EXPECT_FALSE(pCmd->getMediaSamplerDopClockGateEnable()); EXPECT_EQ(1u, pCmd->getMediaSamplerPowerClockGateDisable()); } GEN11TEST_F(MediaKernelTest, givenGen11CsrWhenEnqueueNonVmeKernelAfterVmeKernelThenProgramProgramPipelineSelectionAndMediaSamplerTwice) { typedef typename ICLFamily::PIPELINE_SELECT PIPELINE_SELECT; enqueueVmeKernel(); enqueueRegularKernel(); auto commands = getCommandsList(); EXPECT_EQ(2u, commands.size()); auto pCmd = static_cast(commands.back()); EXPECT_EQ(expectedMask, pCmd->getMaskBits()); EXPECT_TRUE(pCmd->getMediaSamplerDopClockGateEnable()); EXPECT_EQ(0u, pCmd->getMediaSamplerPowerClockGateDisable()); } compute-runtime-22.14.22890/opencl/test/unit_test/gen11/hw_helper_tests_gen11.cpp000066400000000000000000000051211422164147700274370ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/helpers/hw_helper_tests.h" #include "shared/test/unit_test/helpers/get_gpgpu_engines_tests.inl" #include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/test/unit_test/mocks/mock_cl_hw_helper.h" using HwHelperTestGen11 = HwHelperTest; GEN11TEST_F(HwHelperTestGen11, WhenGettingMaxBarriersPerSliceThenCorrectSizeIsReturned) { auto &helper = HwHelper::get(renderCoreFamily); EXPECT_EQ(32u, helper.getMaxBarrierRegisterPerSlice()); } GEN11TEST_F(HwHelperTestGen11, WhenGettingPitchAlignmentForImageThenCorrectValueIsReturned) { auto &helper = HwHelper::get(renderCoreFamily); EXPECT_EQ(4u, helper.getPitchAlignmentForImage(&hardwareInfo)); } GEN11TEST_F(HwHelperTestGen11, WhenAdjustingDefaultEngineTypeThenEngineTypeIsSet) { auto engineType = hardwareInfo.capabilityTable.defaultEngineType; auto &helper = HwHelper::get(renderCoreFamily); helper.adjustDefaultEngineType(&hardwareInfo); EXPECT_EQ(engineType, hardwareInfo.capabilityTable.defaultEngineType); } GEN11TEST_F(HwHelperTestGen11, whenGetGpgpuEnginesThenReturnThreeRcsEngines) { whenGetGpgpuEnginesThenReturnTwoRcsEngines(pDevice->getHardwareInfo()); EXPECT_EQ(3u, pDevice->allEngines.size()); } GEN11TEST_F(HwHelperTestGen11, WhenGettingDeviceIpVersionThenMakeCorrectDeviceIpVersion) { EXPECT_EQ(ClHwHelperMock::makeDeviceIpVersion(11, 0, 0), ClHwHelper::get(renderCoreFamily).getDeviceIpVersion(*defaultHwInfo)); } GEN11TEST_F(HwHelperTestGen11, WhenGettingSupportedDeviceFeatureCapabilitiesThenReturnCorrectValue) { EXPECT_EQ(0u, ClHwHelper::get(renderCoreFamily).getSupportedDeviceFeatureCapabilities()); } using MemorySynchronizatiopCommandsTestsGen11 = ::testing::Test; GEN11TEST_F(MemorySynchronizatiopCommandsTestsGen11, WhenProgrammingCacheFlushThenExpectConstantCacheFieldSet) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; std::unique_ptr buffer(new uint8_t[128]); LinearStream stream(buffer.get(), 128); MemorySynchronizationCommands::addFullCacheFlush(stream, *defaultHwInfo); PIPE_CONTROL *pipeControl = genCmdCast(buffer.get()); ASSERT_NE(nullptr, pipeControl); EXPECT_TRUE(pipeControl->getConstantCacheInvalidationEnable()); } GEN11TEST_F(MemorySynchronizatiopCommandsTestsGen11, givenGen11WhenCallIsPackedSupportedThenReturnTrue) { auto &helper = HwHelper::get(renderCoreFamily); EXPECT_TRUE(helper.packedFormatsSupported()); } compute-runtime-22.14.22890/opencl/test/unit_test/gen11/icllp/000077500000000000000000000000001422164147700236455ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen11/icllp/CMakeLists.txt000066400000000000000000000016701422164147700264110ustar00rootroot00000000000000# # Copyright (C) 2019-2022 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_ICLLP) set(IGDRCL_SRCS_tests_gen11_icllp_excludes ${CMAKE_CURRENT_SOURCE_DIR}/excludes_ocl_icllp.cpp ) set_property(GLOBAL APPEND PROPERTY IGDRCL_SRCS_tests_excludes ${IGDRCL_SRCS_tests_gen11_icllp_excludes}) set(IGDRCL_SRCS_tests_gen11_icllp ${IGDRCL_SRCS_tests_gen11_icllp_excludes} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_hw_info_config_icllp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_icllp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_media_kernel_icllp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_program_media_sampler_icllp.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen11_icllp}) add_subdirectories() neo_copy_test_files_with_revision(copy_test_files_icllp_0 icllp 0) add_dependencies(copy_test_files_per_product copy_test_files_icllp_0) endif() compute-runtime-22.14.22890/opencl/test/unit_test/gen11/icllp/excludes_ocl_icllp.cpp000066400000000000000000000011141422164147700302020ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfAdditionalMediaSamplerProgrammingIsRequiredThenFalseIsReturned, IGFX_ICELAKE_LP) HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfInitialFlagsProgrammingIsRequiredThenFalseIsReturned, IGFX_ICELAKE_LP) HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfReturnedCmdSizeForMediaSamplerAdjustmentIsRequiredThenFalseIsReturned, IGFX_ICELAKE_LP) compute-runtime-22.14.22890/opencl/test/unit_test/gen11/icllp/linux/000077500000000000000000000000001422164147700250045ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen11/icllp/linux/CMakeLists.txt000066400000000000000000000005411422164147700275440ustar00rootroot00000000000000# # Copyright (C) 2019-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen11_icllp_linux ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_tests_icllp.cpp ) if(UNIX) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen11_icllp_linux}) add_subdirectory(dll) endif() compute-runtime-22.14.22890/opencl/test/unit_test/gen11/icllp/linux/dll/000077500000000000000000000000001422164147700255575ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen11/icllp/linux/dll/CMakeLists.txt000066400000000000000000000005031422164147700303150ustar00rootroot00000000000000# # Copyright (C) 2019-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_linux_dll_tests_gen11_icllp ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/device_id_tests_icllp.cpp ) target_sources(igdrcl_linux_dll_tests PRIVATE ${IGDRCL_SRCS_linux_dll_tests_gen11_icllp}) compute-runtime-22.14.22890/opencl/test/unit_test/gen11/icllp/linux/dll/device_id_tests_icllp.cpp000066400000000000000000000016661422164147700326140ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/fixtures/linux/device_id_fixture.h" using namespace NEO; TEST_F(DeviceIdTests, GivenIcllpSupportedDeviceIdThenHardwareInfoIsCorrect) { std::array expectedDescriptors = {{ {0xFF05, &ICLLP_1x4x8::hwInfo, &ICLLP_1x4x8::setupHardwareInfo}, {0x8A56, &ICLLP_1x4x8::hwInfo, &ICLLP_1x4x8::setupHardwareInfo}, {0x8A58, &ICLLP_1x4x8::hwInfo, &ICLLP_1x4x8::setupHardwareInfo}, {0x8A5C, &ICLLP_1x6x8::hwInfo, &ICLLP_1x6x8::setupHardwareInfo}, {0x8A5A, &ICLLP_1x6x8::hwInfo, &ICLLP_1x6x8::setupHardwareInfo}, {0x8A50, &ICLLP_1x8x8::hwInfo, &ICLLP_1x8x8::setupHardwareInfo}, {0x8A52, &ICLLP_1x8x8::hwInfo, &ICLLP_1x8x8::setupHardwareInfo}, {0x8A51, &ICLLP_1x8x8::hwInfo, &ICLLP_1x8x8::setupHardwareInfo}, }}; testImpl(expectedDescriptors); } compute-runtime-22.14.22890/opencl/test/unit_test/gen11/icllp/linux/hw_info_config_tests_icllp.cpp000066400000000000000000000063521422164147700331010ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/os_interface/linux/hw_info_config_linux_tests.h" using namespace NEO; struct HwInfoConfigTestLinuxIcllp : HwInfoConfigTestLinux { void SetUp() override { HwInfoConfigTestLinux::SetUp(); drm->storedDeviceID = IICL_LP_GT1_MOB_DEVICE_F0_ID; } }; ICLLPTEST_F(HwInfoConfigTestLinuxIcllp, GivenIcllpThenHwInfoIsCorrect) { auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->storedDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->storedDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->storedEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->storedSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(1u, outHwInfo.gtSystemInfo.SliceCount); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); EXPECT_FALSE(outHwInfo.featureTable.flags.ftrTileY); } ICLLPTEST_F(HwInfoConfigTestLinuxIcllp, GivenInvalidDeviceIdWhenConfiguringHwInfoThenNegativeOneReturned) { auto hwInfoConfig = HwInfoConfig::get(productFamily); drm->storedRetValForDeviceID = -1; int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); drm->storedRetValForDeviceID = 0; drm->storedRetValForDeviceRevID = -1; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); drm->storedRetValForDeviceRevID = 0; drm->failRetTopology = true; drm->storedRetValForEUVal = -1; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); drm->storedRetValForEUVal = 0; drm->storedRetValForSSVal = -1; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); } template class IcllpHwInfoTests : public ::testing::Test {}; typedef ::testing::Types icllpTestTypes; TYPED_TEST_CASE(IcllpHwInfoTests, icllpTestTypes); TYPED_TEST(IcllpHwInfoTests, WhenGettingSystemInfoThenParamsAreValid) { HardwareInfo hwInfo = *defaultHwInfo; auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMock drm(*executionEnvironment->rootDeviceEnvironments[0]); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; DeviceDescriptor device = {0, &hwInfo, &TypeParam::setupHardwareInfo}; int ret = drm.setupHardwareInfo(&device, false); EXPECT_EQ(ret, 0); EXPECT_GT(gtSystemInfo.EUCount, 0u); EXPECT_GT(gtSystemInfo.ThreadCount, 0u); EXPECT_GT(gtSystemInfo.SliceCount, 0u); EXPECT_GT(gtSystemInfo.SubSliceCount, 0u); EXPECT_GT(gtSystemInfo.DualSubSliceCount, 0u); EXPECT_GT_VAL(gtSystemInfo.L3CacheSizeInKb, 0u); EXPECT_EQ(gtSystemInfo.CsrSizeInMb, 5u); EXPECT_FALSE(gtSystemInfo.IsDynamicallyPopulated); } compute-runtime-22.14.22890/opencl/test/unit_test/gen11/icllp/test_device_caps_icllp.cpp000066400000000000000000000037111422164147700310420ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" using namespace NEO; using IcllpTest = Test; ICLLPTEST_F(IcllpTest, givenIcllpWhenSlmSizeIsRequiredThenReturnCorrectValue) { EXPECT_EQ(64u, pDevice->getHardwareInfo().capabilityTable.slmSize); } ICLLPTEST_F(IcllpTest, givenIclLpWhenCheckFtrSupportsInteger64BitAtomicsThenReturnFalse) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.ftrSupportsInteger64BitAtomics); } ICLLPTEST_F(IcllpTest, WhenGettingPlatformFamilyThenIcelakeIsReported) { EXPECT_EQ(IGFX_ICELAKE_LP, pDevice->getHardwareInfo().platform.eProductFamily); } ICLLPTEST_F(IcllpTest, WhenCheckingExtensionStringThenFp64IsNotSupported) { const auto &caps = pClDevice->getDeviceInfo(); std::string extensionString = caps.deviceExtensions; EXPECT_EQ(std::string::npos, extensionString.find(std::string("cl_khr_fp64"))); EXPECT_EQ(0u, caps.doubleFpConfig); } ICLLPTEST_F(IcllpTest, WhenCheckingCapsThenCorrectlyRoundedDivideSqrtIsNotSupported) { const auto &caps = pClDevice->getDeviceInfo(); EXPECT_EQ(0u, caps.singleFpConfig & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT); } ICLLPTEST_F(IcllpTest, WhenCheckingSimulationCapThenResultIsCorrect) { unsigned short iclLpSimulationIds[2] = { IICL_LP_GT1_MOB_DEVICE_F0_ID, 0, // default, non-simulation }; NEO::MockDevice *mockDevice = nullptr; for (auto id : iclLpSimulationIds) { mockDevice = createWithUsDeviceId(id); ASSERT_NE(mockDevice, nullptr); if (id == 0) EXPECT_FALSE(mockDevice->isSimulation()); else EXPECT_TRUE(mockDevice->isSimulation()); delete mockDevice; } } ICLLPTEST_F(IcllpTest, GivenICLLPWhenCheckftr64KBpagesThenFalse) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftr64KBpages); } compute-runtime-22.14.22890/opencl/test/unit_test/gen11/icllp/test_hw_info_config_icllp.cpp000066400000000000000000000104211422164147700315470ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/test_macros/test.h" using namespace NEO; using IcllpHwInfoConfig = ::testing::Test; ICLLPTEST_F(IcllpHwInfoConfig, givenInvalidSystemInfoWhenSettingHardwareInfoThenExpectThrow) { HardwareInfo hwInfo = *defaultHwInfo; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; uint64_t config = 0xdeadbeef; gtSystemInfo = {0}; EXPECT_ANY_THROW(hardwareInfoSetup[productFamily](&hwInfo, false, config)); EXPECT_EQ(0u, gtSystemInfo.SliceCount); EXPECT_EQ(0u, gtSystemInfo.SubSliceCount); EXPECT_EQ(0u, gtSystemInfo.DualSubSliceCount); EXPECT_EQ(0u, gtSystemInfo.EUCount); } ICLLPTEST_F(IcllpHwInfoConfig, givenHwInfoConfigWhenAskedIfAdditionalMediaSamplerProgrammingIsRequiredThenTrueIsReturned) { const auto &hwInfoConfig = *HwInfoConfig::get(defaultHwInfo->platform.eProductFamily); EXPECT_TRUE(hwInfoConfig.isAdditionalMediaSamplerProgrammingRequired()); } ICLLPTEST_F(IcllpHwInfoConfig, givenHwInfoConfigWhenAskedIfInitialFlagsProgrammingIsRequiredThenTrueIsReturned) { const auto &hwInfoConfig = *HwInfoConfig::get(defaultHwInfo->platform.eProductFamily); EXPECT_TRUE(hwInfoConfig.isInitialFlagsProgrammingRequired()); } ICLLPTEST_F(IcllpHwInfoConfig, givenHwInfoConfigWhenAskedIfReturnedCmdSizeForMediaSamplerAdjustmentIsRequiredThenTrueIsReturned) { const auto &hwInfoConfig = *HwInfoConfig::get(defaultHwInfo->platform.eProductFamily); EXPECT_TRUE(hwInfoConfig.isReturnedCmdSizeForMediaSamplerAdjustmentRequired()); } using IcllpHwInfo = ::testing::Test; ICLLPTEST_F(IcllpHwInfo, givenBoolWhenCallIcllpHardwareInfoSetupThenFeatureTableAndWorkaroundTableAreSetCorrect) { uint64_t configs[] = { 0x100080008, 0x100040008, 0x100060008}; bool boolValue[]{ true, false}; HardwareInfo hwInfo = *defaultHwInfo; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; FeatureTable &featureTable = hwInfo.featureTable; WorkaroundTable &workaroundTable = hwInfo.workaroundTable; for (auto config : configs) { for (auto setParamBool : boolValue) { gtSystemInfo = {0}; featureTable = {}; workaroundTable = {}; hardwareInfoSetup[productFamily](&hwInfo, setParamBool, config); EXPECT_EQ(setParamBool, featureTable.flags.ftrL3IACoherency); EXPECT_EQ(setParamBool, featureTable.flags.ftrPPGTT); EXPECT_EQ(setParamBool, featureTable.flags.ftrSVM); EXPECT_EQ(setParamBool, featureTable.flags.ftrIA32eGfxPTEs); EXPECT_EQ(setParamBool, featureTable.flags.ftrStandardMipTailFormat); EXPECT_EQ(setParamBool, featureTable.flags.ftrDisplayYTiling); EXPECT_EQ(setParamBool, featureTable.flags.ftrTranslationTable); EXPECT_EQ(setParamBool, featureTable.flags.ftrUserModeTranslationTable); EXPECT_EQ(setParamBool, featureTable.flags.ftrTileMappedResource); EXPECT_EQ(setParamBool, featureTable.flags.ftrEnableGuC); EXPECT_EQ(setParamBool, featureTable.flags.ftrFbc); EXPECT_EQ(setParamBool, featureTable.flags.ftrFbc2AddressTranslation); EXPECT_EQ(setParamBool, featureTable.flags.ftrFbcBlitterTracking); EXPECT_EQ(setParamBool, featureTable.flags.ftrFbcCpuTracking); EXPECT_EQ(setParamBool, featureTable.flags.ftrTileY); EXPECT_EQ(setParamBool, featureTable.flags.ftrAstcHdr2D); EXPECT_EQ(setParamBool, featureTable.flags.ftrAstcLdr2D); EXPECT_EQ(setParamBool, featureTable.flags.ftr3dMidBatchPreempt); EXPECT_EQ(setParamBool, featureTable.flags.ftrGpGpuMidBatchPreempt); EXPECT_EQ(setParamBool, featureTable.flags.ftrGpGpuMidThreadLevelPreempt); EXPECT_EQ(setParamBool, featureTable.flags.ftrGpGpuThreadGroupLevelPreempt); EXPECT_EQ(setParamBool, featureTable.flags.ftrPerCtxtPreemptionGranularityControl); EXPECT_EQ(setParamBool, workaroundTable.flags.wa4kAlignUVOffsetNV12LinearSurface); EXPECT_EQ(setParamBool, workaroundTable.flags.waReportPerfCountUseGlobalContextID); } } } compute-runtime-22.14.22890/opencl/test/unit_test/gen11/icllp/test_media_kernel_icllp.cpp000066400000000000000000000055721422164147700312230ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/pipeline_select_helper.h" #include "shared/source/helpers/preamble.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/media_kernel_fixture.h" using namespace NEO; typedef MediaKernelFixture MediaKernelTest; ICLLPTEST_F(MediaKernelTest, givenIcllpDefaultThenLastVmeSubsliceConfigIsFalse) { auto csr = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); EXPECT_FALSE(csr->lastVmeSubslicesConfig); } ICLLPTEST_F(MediaKernelTest, givenIcllpCSRWhenEnqueueVmeKernelThenVmeSubslicesConfigChangesToTrue) { auto csr = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); enqueueVmeKernel(); EXPECT_TRUE(csr->lastVmeSubslicesConfig); } ICLLPTEST_F(MediaKernelTest, givenIcllpCSRWhenEnqueueRegularKernelAfterVmeKernelThenVmeSubslicesConfigChangesToFalse) { auto csr = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); enqueueVmeKernel(); enqueueRegularKernel(); EXPECT_FALSE(csr->lastVmeSubslicesConfig); } ICLLPTEST_F(MediaKernelTest, givenIcllpCSRWhenEnqueueRegularKernelThenVmeSubslicesConfigDoesntChangeToTrue) { auto csr = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); enqueueRegularKernel(); EXPECT_FALSE(csr->lastVmeSubslicesConfig); } ICLLPTEST_F(MediaKernelTest, givenIcllpCSRWhenEnqueueRegularKernelAfterRegularKernelThenVmeSubslicesConfigDoesntChangeToTrue) { auto csr = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); enqueueRegularKernel(); enqueueRegularKernel(); EXPECT_FALSE(csr->lastVmeSubslicesConfig); } ICLLPTEST_F(MediaKernelTest, givenIcllpCSRWhenEnqueueVmeKernelAfterRegularKernelThenVmeSubslicesConfigChangesToTrue) { auto csr = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); enqueueRegularKernel(); enqueueVmeKernel(); EXPECT_TRUE(csr->lastVmeSubslicesConfig); } ICLLPTEST_F(MediaKernelTest, WhenProgrammingVmeThenCmdSizeIsCorrect) { typedef typename FamilyType::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; auto csr = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); size_t programVmeCmdSize = sizeof(MI_LOAD_REGISTER_IMM) + 2 * sizeof(PIPE_CONTROL); EXPECT_EQ(0u, csr->getCmdSizeForMediaSampler(false)); EXPECT_EQ(programVmeCmdSize, csr->getCmdSizeForMediaSampler(true)); } compute-runtime-22.14.22890/opencl/test/unit_test/gen11/icllp/test_program_media_sampler_icllp.cpp000066400000000000000000000201321422164147700331220ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/gen11/reg_configs.h" #include "shared/source/helpers/hw_helper.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/dispatch_flags_helper.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/test_macros/test.h" using namespace NEO; struct Gen11MediaSamplerProgramingTest : public ::testing::Test { typedef typename ICLFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; typedef typename ICLFamily::PIPE_CONTROL PIPE_CONTROL; struct myCsr : public CommandStreamReceiverHw { using CommandStreamReceiver::commandStream; using CommandStreamReceiverHw::programMediaSampler; myCsr(ExecutionEnvironment &executionEnvironment) : CommandStreamReceiverHw(executionEnvironment, 0, 1){}; void overrideLastVmeSubliceConfig(bool value) { lastVmeSubslicesConfig = value; } }; void overrideMediaRequest(bool lastVmeConfig, bool mediaSamplerRequired) { csr->overrideLastVmeSubliceConfig(lastVmeConfig); flags.pipelineSelectArgs.mediaSamplerRequired = mediaSamplerRequired; } void SetUp() override { device.reset(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); csr = new myCsr(*device->executionEnvironment); device->resetCommandStreamReceiver(csr); stream.reset(new LinearStream(buff, MemoryConstants::pageSize)); } void programMediaSampler() { csr->programMediaSampler(*stream, flags); } size_t getCmdSize() { return csr->getCmdSizeForMediaSampler(flags.pipelineSelectArgs.mediaSamplerRequired); } myCsr *csr = nullptr; std::unique_ptr device; DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags(); char buff[MemoryConstants::pageSize]; std::unique_ptr stream; }; template void setFlushAllCaches(PIPE_CONTROL &pc) { pc.setDcFlushEnable(true); pc.setRenderTargetCacheFlushEnable(true); pc.setInstructionCacheInvalidateEnable(true); pc.setTextureCacheInvalidationEnable(true); pc.setPipeControlFlushEnable(true); pc.setVfCacheInvalidationEnable(true); pc.setConstantCacheInvalidationEnable(true); pc.setStateCacheInvalidationEnable(true); } ICLLPTEST_F(Gen11MediaSamplerProgramingTest, givenVmeEnableSubsliceDisabledWhenPowerClockStateRegisterEnableThenExpectCorrectCmdValues) { uint32_t programVmeCmdSize = sizeof(MI_LOAD_REGISTER_IMM) + 2 * sizeof(PIPE_CONTROL); overrideMediaRequest(false, true); size_t estimatedCmdSize = getCmdSize(); EXPECT_EQ(programVmeCmdSize, estimatedCmdSize); auto expectedMiLrCmd = FamilyType::cmdInitLoadRegisterImm; expectedMiLrCmd.setRegisterOffset(gen11PowerClockStateRegister::address); auto expectedRegValue = (device->getHardwareInfo().gtSystemInfo.SubSliceCount / 2) << gen11PowerClockStateRegister::subSliceCountShift; expectedRegValue |= (gen11PowerClockStateRegister::vmeSliceCount << gen11PowerClockStateRegister::sliceCountShift); expectedRegValue |= (device->getHardwareInfo().gtSystemInfo.MaxEuPerSubSlice << gen11PowerClockStateRegister::minEuCountShift); expectedRegValue |= (device->getHardwareInfo().gtSystemInfo.MaxEuPerSubSlice << gen11PowerClockStateRegister::maxEuCountShift); expectedRegValue |= gen11PowerClockStateRegister::enabledValue; expectedMiLrCmd.setDataDword(expectedRegValue); programMediaSampler(); ASSERT_EQ(programVmeCmdSize, stream->getUsed()); auto expectedPipeControlCmd = FamilyType::cmdInitPipeControl; expectedPipeControlCmd.setCommandStreamerStallEnable(0x1); setFlushAllCaches(expectedPipeControlCmd); auto pipeControlCmd = genCmdCast(stream->getCpuBase()); ASSERT_NE(nullptr, pipeControlCmd); EXPECT_EQ(0, memcmp(&expectedPipeControlCmd, pipeControlCmd, sizeof(PIPE_CONTROL))); size_t cmdOffset = sizeof(PIPE_CONTROL); auto miLrCmd = genCmdCast(ptrOffset(stream->getCpuBase(), cmdOffset)); ASSERT_NE(nullptr, miLrCmd); EXPECT_EQ(0, memcmp(&expectedMiLrCmd, miLrCmd, sizeof(MI_LOAD_REGISTER_IMM))); cmdOffset += sizeof(MI_LOAD_REGISTER_IMM); expectedPipeControlCmd = FamilyType::cmdInitPipeControl; expectedPipeControlCmd.setCommandStreamerStallEnable(0x1); pipeControlCmd = genCmdCast(ptrOffset(stream->getCpuBase(), cmdOffset)); ASSERT_NE(nullptr, pipeControlCmd); EXPECT_EQ(0, memcmp(&expectedPipeControlCmd, pipeControlCmd, sizeof(PIPE_CONTROL))); } ICLLPTEST_F(Gen11MediaSamplerProgramingTest, givenVmeEnableSubsliceEnabledWhenPowerClockStateRegisterDisableThenExpectCorrectCmdValues) { constexpr uint32_t programVmeCmdSize = sizeof(MI_LOAD_REGISTER_IMM) + 3 * sizeof(PIPE_CONTROL); overrideMediaRequest(true, false); size_t estimatedCmdSize = getCmdSize(); EXPECT_EQ(programVmeCmdSize, estimatedCmdSize); programMediaSampler(); auto expectedMiLrCmd = FamilyType::cmdInitLoadRegisterImm; expectedMiLrCmd.setRegisterOffset(gen11PowerClockStateRegister::address); auto expectedRegValue = (device->getHardwareInfo().gtSystemInfo.SubSliceCount / 2) << gen11PowerClockStateRegister::subSliceCountShift; expectedRegValue |= ((device->getHardwareInfo().gtSystemInfo.SliceCount * 2) << gen11PowerClockStateRegister::sliceCountShift); expectedRegValue |= (device->getHardwareInfo().gtSystemInfo.MaxEuPerSubSlice << gen11PowerClockStateRegister::minEuCountShift); expectedRegValue |= (device->getHardwareInfo().gtSystemInfo.MaxEuPerSubSlice << gen11PowerClockStateRegister::maxEuCountShift); expectedRegValue |= gen11PowerClockStateRegister::disabledValue; expectedMiLrCmd.setDataDword(expectedRegValue); ASSERT_EQ(programVmeCmdSize, stream->getUsed()); auto expectedPipeControlCmd = FamilyType::cmdInitPipeControl; expectedPipeControlCmd.setCommandStreamerStallEnable(0x1); setFlushAllCaches(expectedPipeControlCmd); expectedPipeControlCmd.setGenericMediaStateClear(true); auto pipeControlCmd = genCmdCast(stream->getCpuBase()); ASSERT_NE(nullptr, pipeControlCmd); EXPECT_EQ(0, memcmp(&expectedPipeControlCmd, pipeControlCmd, sizeof(PIPE_CONTROL))); size_t cmdOffset = sizeof(PIPE_CONTROL); pipeControlCmd = genCmdCast(ptrOffset(stream->getCpuBase(), cmdOffset)); ASSERT_NE(nullptr, pipeControlCmd); expectedPipeControlCmd = FamilyType::cmdInitPipeControl; expectedPipeControlCmd.setCommandStreamerStallEnable(0x1); EXPECT_EQ(0, memcmp(&expectedPipeControlCmd, pipeControlCmd, sizeof(PIPE_CONTROL))); cmdOffset += sizeof(PIPE_CONTROL); auto miLrCmd = genCmdCast(ptrOffset(stream->getCpuBase(), cmdOffset)); ASSERT_NE(nullptr, miLrCmd); EXPECT_EQ(0, memcmp(&expectedMiLrCmd, miLrCmd, sizeof(MI_LOAD_REGISTER_IMM))); cmdOffset += sizeof(MI_LOAD_REGISTER_IMM); pipeControlCmd = genCmdCast(ptrOffset(stream->getCpuBase(), cmdOffset)); ASSERT_NE(nullptr, pipeControlCmd); EXPECT_EQ(0, memcmp(&expectedPipeControlCmd, pipeControlCmd, sizeof(PIPE_CONTROL))); } ICLLPTEST_F(Gen11MediaSamplerProgramingTest, givenVmeEnableSubsliceEnabledWhenPowerClockStateRegisterEnabledThenExpectNoCmds) { constexpr uint32_t programVmeCmdSize = 0; overrideMediaRequest(true, true); size_t estimatedCmdSize = getCmdSize(); EXPECT_EQ(programVmeCmdSize, estimatedCmdSize); programMediaSampler(); EXPECT_EQ(programVmeCmdSize, stream->getUsed()); } ICLLPTEST_F(Gen11MediaSamplerProgramingTest, givenVmeEnableSubsliceDisabledWhenPowerClockStateRegisterDisableThenExpectNoCmds) { constexpr uint32_t programVmeCmdSize = 0; overrideMediaRequest(false, false); size_t estimatedCmdSize = getCmdSize(); EXPECT_EQ(programVmeCmdSize, estimatedCmdSize); programMediaSampler(); EXPECT_EQ(programVmeCmdSize, stream->getUsed()); } compute-runtime-22.14.22890/opencl/test/unit_test/gen11/image_tests_gen11.cpp000066400000000000000000000076051422164147700265550ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/image/image_surface_state.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" using namespace NEO; typedef ICLFamily::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; struct AppendSurfaceStateParamsTest : public ::testing::Test { void SetUp() override { surfaceState = ICLFamily::cmdInitRenderSurfaceState; EXPECT_EQ(RENDER_SURFACE_STATE::SAMPLE_TAP_DISCARD_DISABLE_DISABLE, surfaceState.getSampleTapDiscardDisable()); imageDesc.image_width = 32; imageDesc.image_height = 0; imageDesc.image_depth = 0; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; flags = CL_MEM_READ_WRITE; } void createImage() { auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); EXPECT_NE(nullptr, surfaceFormat); image.reset(Image::create( &context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); } cl_int retVal = CL_SUCCESS; MockContext context; std::unique_ptr image; cl_image_format imageFormat; cl_image_desc imageDesc; cl_mem_flags flags; RENDER_SURFACE_STATE surfaceState; }; GEN11TEST_F(AppendSurfaceStateParamsTest, givenImageFormatWithoutAlphaChannelWhenAppendSurfaceStateParamsThenTapDiscardConfigDoesntChange) { imageFormat.image_channel_data_type = CL_FLOAT; imageFormat.image_channel_order = CL_R; createImage(); auto imageHw = static_cast *>(image.get()); imageHw->appendSurfaceStateParams(&surfaceState, context.getDevice(0)->getRootDeviceIndex(), false); bool tapDiscardConfigChanged = RENDER_SURFACE_STATE::SAMPLE_TAP_DISCARD_DISABLE_DISABLE != surfaceState.getSampleTapDiscardDisable(); EXPECT_FALSE(tapDiscardConfigChanged); } GEN11TEST_F(AppendSurfaceStateParamsTest, givenImageFormatWithAlphaChannelWhenAppendSurfaceStateParamsThenTapDiscardConfigChanges) { imageFormat.image_channel_data_type = CL_FLOAT; imageFormat.image_channel_order = CL_RGBA; createImage(); auto imageHw = static_cast *>(image.get()); imageHw->appendSurfaceStateParams(&surfaceState, context.getDevice(0)->getRootDeviceIndex(), false); bool tapDiscardConfigChanged = RENDER_SURFACE_STATE::SAMPLE_TAP_DISCARD_DISABLE_DISABLE != surfaceState.getSampleTapDiscardDisable(); EXPECT_TRUE(tapDiscardConfigChanged); } typedef ::testing::Test gen11ImageTests; GEN11TEST_F(gen11ImageTests, givenImageForGen11WhenClearColorParametersAreSetThenSurfaceStateIsNotModified) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; MockContext context; auto image = std::unique_ptr(ImageHelper::create(&context)); auto surfaceStateBefore = FamilyType::cmdInitRenderSurfaceState; auto surfaceStateAfter = FamilyType::cmdInitRenderSurfaceState; auto imageHw = static_cast *>(image.get()); EXPECT_EQ(0, memcmp(&surfaceStateBefore, &surfaceStateAfter, sizeof(RENDER_SURFACE_STATE))); EncodeSurfaceState::setClearColorParams(&surfaceStateAfter, imageHw->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex())->getDefaultGmm()); EXPECT_EQ(0, memcmp(&surfaceStateBefore, &surfaceStateAfter, sizeof(RENDER_SURFACE_STATE))); } compute-runtime-22.14.22890/opencl/test/unit_test/gen11/kernel_tests_gen11.cpp000066400000000000000000000022071422164147700267440ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" using namespace NEO; using Gen11KernelTest = Test; GEN11TEST_F(Gen11KernelTest, givenKernelWhenCanTransformImagesIsCalledThenReturnsTrue) { MockKernelWithInternals mockKernel(*pClDevice); auto retVal = mockKernel.mockKernel->Kernel::canTransformImages(); EXPECT_TRUE(retVal); } GEN11TEST_F(Gen11KernelTest, givenBuiltinKernelWhenCanTransformImagesIsCalledThenReturnsFalse) { MockKernelWithInternals mockKernel(*pClDevice); mockKernel.mockKernel->isBuiltIn = true; auto retVal = mockKernel.mockKernel->Kernel::canTransformImages(); EXPECT_FALSE(retVal); } GEN11TEST_F(Gen11KernelTest, GivenKernelWhenNotRunningOnGen12lpThenWaDisableRccRhwoOptimizationIsNotRequired) { MockKernelWithInternals kernel(*pClDevice); EXPECT_FALSE(kernel.mockKernel->requiresWaDisableRccRhwoOptimization()); } compute-runtime-22.14.22890/opencl/test/unit_test/gen11/lkf/000077500000000000000000000000001422164147700233165ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen11/lkf/CMakeLists.txt000066400000000000000000000010341422164147700260540ustar00rootroot00000000000000# # Copyright (C) 2019-2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_LKF) set(IGDRCL_SRCS_tests_gen11_lkf ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_hw_info_config_lkf.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_lkf.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen11_lkf}) add_subdirectories() neo_copy_test_files_with_revision(copy_test_files_lkf_0 lkf 0) add_dependencies(copy_test_files_per_product copy_test_files_lkf_0) endif() compute-runtime-22.14.22890/opencl/test/unit_test/gen11/lkf/linux/000077500000000000000000000000001422164147700244555ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen11/lkf/linux/CMakeLists.txt000066400000000000000000000005331422164147700272160ustar00rootroot00000000000000# # Copyright (C) 2019-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen11_lkf_linux ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_tests_lkf.cpp ) if(UNIX) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen11_lkf_linux}) add_subdirectory(dll) endif() compute-runtime-22.14.22890/opencl/test/unit_test/gen11/lkf/linux/dll/000077500000000000000000000000001422164147700252305ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen11/lkf/linux/dll/CMakeLists.txt000066400000000000000000000004751422164147700277760ustar00rootroot00000000000000# # Copyright (C) 2019-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_linux_dll_tests_gen11_lkf ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/device_id_tests_lkf.cpp ) target_sources(igdrcl_linux_dll_tests PRIVATE ${IGDRCL_SRCS_linux_dll_tests_gen11_lkf}) compute-runtime-22.14.22890/opencl/test/unit_test/gen11/lkf/linux/dll/device_id_tests_lkf.cpp000066400000000000000000000006571422164147700317350ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/fixtures/linux/device_id_fixture.h" using namespace NEO; TEST_F(DeviceIdTests, GivenLkfSupportedDeviceIdThenHardwareInfoIsCorrect) { std::array expectedDescriptors = {{ {0x9840, &LKF_1x8x8::hwInfo, &LKF_1x8x8::setupHardwareInfo}, }}; testImpl(expectedDescriptors); } compute-runtime-22.14.22890/opencl/test/unit_test/gen11/lkf/linux/hw_info_config_tests_lkf.cpp000066400000000000000000000060651422164147700322240ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/os_interface/linux/hw_info_config_linux_tests.h" using namespace NEO; struct HwInfoConfigTestLinuxLkf : HwInfoConfigTestLinux { void SetUp() override { HwInfoConfigTestLinux::SetUp(); drm->storedDeviceID = ILKF_1x8x8_DESK_DEVICE_F0_ID; drm->storedSSVal = 8; } }; LKFTEST_F(HwInfoConfigTestLinuxLkf, configureHwInfoLkf) { auto hwInfoConfig = HwInfoConfigHw::get(); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->storedDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->storedDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->storedEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->storedSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(1u, outHwInfo.gtSystemInfo.SliceCount); EXPECT_FALSE(outHwInfo.featureTable.flags.ftrTileY); } LKFTEST_F(HwInfoConfigTestLinuxLkf, negative) { auto hwInfoConfig = HwInfoConfigHw::get(); drm->storedRetValForDeviceID = -1; int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); drm->storedRetValForDeviceID = 0; drm->storedRetValForDeviceRevID = -1; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); drm->storedRetValForDeviceRevID = 0; drm->failRetTopology = true; drm->storedRetValForEUVal = -1; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); drm->storedRetValForEUVal = 0; drm->storedRetValForSSVal = -1; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); } template class LkfHwInfoTests : public ::testing::Test {}; typedef ::testing::Types lkfTestTypes; TYPED_TEST_CASE(LkfHwInfoTests, lkfTestTypes); TYPED_TEST(LkfHwInfoTests, gtSetupIsCorrect) { HardwareInfo hwInfo = *defaultHwInfo; auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); DrmMock drm(*executionEnvironment->rootDeviceEnvironments[0]); GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; DeviceDescriptor device = {0, &hwInfo, &TypeParam::setupHardwareInfo}; int ret = drm.setupHardwareInfo(&device, false); EXPECT_EQ(ret, 0); EXPECT_GT(gtSystemInfo.EUCount, 0u); EXPECT_GT(gtSystemInfo.ThreadCount, 0u); EXPECT_GT(gtSystemInfo.SliceCount, 0u); EXPECT_GT(gtSystemInfo.SubSliceCount, 0u); EXPECT_GT(gtSystemInfo.DualSubSliceCount, 0u); EXPECT_GT_VAL(gtSystemInfo.L3CacheSizeInKb, 0u); EXPECT_EQ(gtSystemInfo.CsrSizeInMb, 8u); EXPECT_FALSE(gtSystemInfo.IsDynamicallyPopulated); } compute-runtime-22.14.22890/opencl/test/unit_test/gen11/lkf/test_device_caps_lkf.cpp000066400000000000000000000032371422164147700301670ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" using namespace NEO; using LkfTest = Test; LKFTEST_F(LkfTest, givenLkfWhenSlmSizeIsRequiredThenReturnCorrectValue) { EXPECT_EQ(64u, pDevice->getHardwareInfo().capabilityTable.slmSize); } LKFTEST_F(LkfTest, givenLkfWhenCheckedSvmSupportThenNoSvmIsReported) { const auto &caps = pClDevice->getDeviceInfo(); EXPECT_EQ(caps.svmCapabilities, 0u); } LKFTEST_F(LkfTest, givenLkfWhenDoublePrecissionIsCheckedThenFalseIsReturned) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftrSupportsFP64); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftrSupports64BitMath); } LKFTEST_F(LkfTest, givenLkfWhenExtensionStringIsCheckedThenFP64IsNotReported) { const auto &caps = pClDevice->getDeviceInfo(); std::string extensionString = caps.deviceExtensions; EXPECT_EQ(std::string::npos, extensionString.find(std::string("cl_khr_fp64"))); EXPECT_EQ(0u, caps.doubleFpConfig); } LKFTEST_F(LkfTest, WhenCheckingIsSimulationThenTrueReturnedOnlyForSimulationId) { unsigned short lkfSimulationIds[2] = { ILKF_1x8x8_DESK_DEVICE_F0_ID, 0, // default, non-simulation }; NEO::MockDevice *mockDevice = nullptr; for (auto id : lkfSimulationIds) { mockDevice = createWithUsDeviceId(id); ASSERT_NE(mockDevice, nullptr); if (id == 0) EXPECT_FALSE(mockDevice->isSimulation()); else EXPECT_TRUE(mockDevice->isSimulation()); delete mockDevice; } } compute-runtime-22.14.22890/opencl/test/unit_test/gen11/lkf/test_hw_info_config_lkf.cpp000066400000000000000000000070771422164147700307060ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/test_macros/test.h" using namespace NEO; TEST(LkfHwInfoConfig, givenInvalidSystemInfoWhenSettingHardwareInfoThenExpectThrow) { if (IGFX_LAKEFIELD != productFamily) { return; } HardwareInfo hwInfo = *defaultHwInfo; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; uint64_t config = 0xdeadbeef; gtSystemInfo = {0}; EXPECT_ANY_THROW(hardwareInfoSetup[productFamily](&hwInfo, false, config)); EXPECT_EQ(0u, gtSystemInfo.SliceCount); EXPECT_EQ(0u, gtSystemInfo.SubSliceCount); EXPECT_EQ(0u, gtSystemInfo.DualSubSliceCount); EXPECT_EQ(0u, gtSystemInfo.EUCount); } using LkfHwInfo = ::testing::Test; LKFTEST_F(LkfHwInfo, givenHwInfoConfigStringThenAfterSetupResultingVmeIsDisabled) { HardwareInfo hwInfo = *defaultHwInfo; uint64_t config = 0x100080008; hardwareInfoSetup[productFamily](&hwInfo, false, config); EXPECT_FALSE(hwInfo.capabilityTable.ftrSupportsVmeAvcTextureSampler); EXPECT_FALSE(hwInfo.capabilityTable.ftrSupportsVmeAvcPreemption); EXPECT_FALSE(hwInfo.capabilityTable.supportsVme); } LKFTEST_F(LkfHwInfo, givenBoolWhenCallLkfHardwareInfoSetupThenFeatureTableAndWorkaroundTableAreSetCorrect) { bool boolValue[]{ true, false}; HardwareInfo hwInfo = *defaultHwInfo; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; FeatureTable &featureTable = hwInfo.featureTable; WorkaroundTable &workaroundTable = hwInfo.workaroundTable; uint64_t config = 0x100080008; for (auto setParamBool : boolValue) { gtSystemInfo = {0}; featureTable = {}; workaroundTable = {}; hardwareInfoSetup[productFamily](&hwInfo, setParamBool, config); EXPECT_EQ(setParamBool, featureTable.flags.ftrL3IACoherency); EXPECT_EQ(setParamBool, featureTable.flags.ftrPPGTT); EXPECT_EQ(setParamBool, featureTable.flags.ftrSVM); EXPECT_EQ(setParamBool, featureTable.flags.ftrIA32eGfxPTEs); EXPECT_EQ(setParamBool, featureTable.flags.ftrStandardMipTailFormat); EXPECT_EQ(setParamBool, featureTable.flags.ftrDisplayYTiling); EXPECT_EQ(setParamBool, featureTable.flags.ftrTranslationTable); EXPECT_EQ(setParamBool, featureTable.flags.ftrUserModeTranslationTable); EXPECT_EQ(setParamBool, featureTable.flags.ftrTileMappedResource); EXPECT_EQ(setParamBool, featureTable.flags.ftrEnableGuC); EXPECT_EQ(setParamBool, featureTable.flags.ftrFbc); EXPECT_EQ(setParamBool, featureTable.flags.ftrFbc2AddressTranslation); EXPECT_EQ(setParamBool, featureTable.flags.ftrFbcBlitterTracking); EXPECT_EQ(setParamBool, featureTable.flags.ftrFbcCpuTracking); EXPECT_EQ(setParamBool, featureTable.flags.ftrTileY); EXPECT_EQ(setParamBool, featureTable.flags.ftrAstcHdr2D); EXPECT_EQ(setParamBool, featureTable.flags.ftrAstcLdr2D); EXPECT_EQ(setParamBool, featureTable.flags.ftr3dMidBatchPreempt); EXPECT_EQ(setParamBool, featureTable.flags.ftrGpGpuMidBatchPreempt); EXPECT_EQ(setParamBool, featureTable.flags.ftrGpGpuMidThreadLevelPreempt); EXPECT_EQ(setParamBool, featureTable.flags.ftrGpGpuThreadGroupLevelPreempt); EXPECT_EQ(setParamBool, featureTable.flags.ftrPerCtxtPreemptionGranularityControl); EXPECT_EQ(setParamBool, workaroundTable.flags.wa4kAlignUVOffsetNV12LinearSurface); EXPECT_EQ(setParamBool, workaroundTable.flags.waReportPerfCountUseGlobalContextID); } } compute-runtime-22.14.22890/opencl/test/unit_test/gen11/sampler_tests_gen11.cpp000066400000000000000000000017241422164147700271320ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include using namespace NEO; typedef Test Gen11SamplerTest; GEN11TEST_F(Gen11SamplerTest, WhenAppendingSamplerStateParamsThenStateIsNotChanged) { typedef typename FamilyType::SAMPLER_STATE SAMPLER_STATE; auto stateWithoutAppendedParams = FamilyType::cmdInitSamplerState; auto stateWithAppendedParams = FamilyType::cmdInitSamplerState; EXPECT_TRUE(memcmp(&stateWithoutAppendedParams, &stateWithAppendedParams, sizeof(SAMPLER_STATE)) == 0); HwInfoConfig::get(defaultHwInfo->platform.eProductFamily)->adjustSamplerState(&stateWithAppendedParams, *defaultHwInfo); EXPECT_TRUE(memcmp(&stateWithoutAppendedParams, &stateWithAppendedParams, sizeof(SAMPLER_STATE)) == 0); } compute-runtime-22.14.22890/opencl/test/unit_test/gen11/tbx_command_stream_receiver_tests_gen11.cpp000066400000000000000000000021361422164147700332170ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/tbx_command_stream_receiver_hw.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" using namespace NEO; using Gen11TbxCommandStreamReceiverTests = Test; GEN11TEST_F(Gen11TbxCommandStreamReceiverTests, whenAskedForPollForCompletionParametersThenReturnCorrectValues) { class MyMockTbxHw : public TbxCommandStreamReceiverHw { public: MyMockTbxHw(ExecutionEnvironment &executionEnvironment) : TbxCommandStreamReceiverHw(executionEnvironment, 0, 1) {} using TbxCommandStreamReceiverHw::getpollNotEqualValueForPollForCompletion; using TbxCommandStreamReceiverHw::getMaskAndValueForPollForCompletion; }; MyMockTbxHw myMockTbxHw(*pDevice->executionEnvironment); EXPECT_EQ(0x80u, myMockTbxHw.getMaskAndValueForPollForCompletion()); EXPECT_TRUE(myMockTbxHw.getpollNotEqualValueForPollForCompletion()); } compute-runtime-22.14.22890/opencl/test/unit_test/gen11/test_device_caps_gen11.cpp000066400000000000000000000111261422164147700275460ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_helper.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" using namespace NEO; typedef Test Gen11DeviceCaps; GEN11TEST_F(Gen11DeviceCaps, GivenDefaultWhenCheckingPreemptionModeThenMidThreadIsReturned) { EXPECT_TRUE(PreemptionMode::MidThread == pDevice->getHardwareInfo().capabilityTable.defaultPreemptionMode); } GEN11TEST_F(Gen11DeviceCaps, WhenCheckingProfilingTimerResolutionThenCorrectResolutionIsReturned) { const auto &caps = pDevice->getDeviceInfo(); EXPECT_EQ(83u, caps.outProfilingTimerResolution); } GEN11TEST_F(Gen11DeviceCaps, GivenWhenGettingKmdNotifyPropertiesThenItIsDisabled) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableKmdNotify); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableQuickKmdSleep); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableQuickKmdSleepForSporadicWaits); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepForSporadicWaitsMicroseconds); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableQuickKmdSleepForDirectSubmission); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepForDirectSubmissionMicroseconds); } GEN11TEST_F(Gen11DeviceCaps, WhenCheckingCompressionThenItIsDisabled) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftrRenderCompressedBuffers); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftrRenderCompressedImages); } GEN11TEST_F(Gen11DeviceCaps, givenHwInfoWhenRequestedComputeUnitsUsedForScratchThenReturnValidValue) { const auto &hwInfo = pDevice->getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); uint32_t expectedValue = hwInfo.gtSystemInfo.MaxSubSlicesSupported * hwInfo.gtSystemInfo.MaxEuPerSubSlice * 8; EXPECT_EQ(expectedValue, hwHelper.getComputeUnitsUsedForScratch(&hwInfo)); EXPECT_EQ(expectedValue, pDevice->getDeviceInfo().computeUnitsUsedForScratch); } GEN11TEST_F(Gen11DeviceCaps, givenHwInfoWhenRequestedMaxFrontEndThreadsThenReturnValidValue) { const auto &hwInfo = pDevice->getHardwareInfo(); EXPECT_EQ(HwHelper::getMaxThreadsForVfe(hwInfo), pDevice->getDeviceInfo().maxFrontEndThreads); } GEN11TEST_F(Gen11DeviceCaps, givenGen11WhenCheckSupportCacheFlushAfterWalkerThenFalse) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.supportCacheFlushAfterWalker); } GEN11TEST_F(Gen11DeviceCaps, givenGen11WhenCheckBlitterOperationsSupportThenReturnFalse) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.blitterOperationsSupported); } GEN11TEST_F(Gen11DeviceCaps, givenGen11WhenCheckingImageSupportThenReturnTrue) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.supportsImages); } GEN11TEST_F(Gen11DeviceCaps, givenGen11WhenCheckingMediaBlockSupportThenReturnTrue) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.supportsMediaBlock); } GEN11TEST_F(Gen11DeviceCaps, givenGen11WhenCheckingCoherencySupportThenReturnFalse) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftrSupportsCoherency); } GEN11TEST_F(Gen11DeviceCaps, givenGen11WhenCheckExtensionsThenSubgroupLocalBlockIOIsSupported) { const auto &caps = pClDevice->getDeviceInfo(); EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_subgroup_local_block_io"))); } GEN11TEST_F(Gen11DeviceCaps, givenGen11WhenCheckExtensionsThenDeviceProperlyReportsClKhrSubgroupsExtension) { const auto &caps = pClDevice->getDeviceInfo(); if (pClDevice->areOcl21FeaturesEnabled()) { EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_khr_subgroups"))); } else { EXPECT_FALSE(hasSubstr(caps.deviceExtensions, std::string("cl_khr_subgroups"))); } } GEN11TEST_F(Gen11DeviceCaps, givenGen11WhenCheckingCapsThenDeviceDoesProperlyReportsIndependentForwardProgress) { const auto &caps = pClDevice->getDeviceInfo(); if (pClDevice->areOcl21FeaturesEnabled()) { EXPECT_TRUE(caps.independentForwardProgress != 0); } else { EXPECT_FALSE(caps.independentForwardProgress != 0); } } compute-runtime-22.14.22890/opencl/test/unit_test/gen11/test_platform_caps_gen11.cpp000066400000000000000000000012401422164147700301270ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" using namespace NEO; struct Gen11PlatformCaps : public PlatformFixture, public ::testing::Test { void SetUp() override { PlatformFixture::SetUp(); } void TearDown() override { PlatformFixture::TearDown(); } }; GEN11TEST_F(Gen11PlatformCaps, WhenCheckingExtensionStringThenFp64IsNotSupported) { const auto &caps = pPlatform->getPlatformInfo(); EXPECT_EQ(std::string::npos, caps.extensions.find(std::string("cl_khr_fp64"))); } compute-runtime-22.14.22890/opencl/test/unit_test/gen11/test_sample_gen11.cpp000066400000000000000000000006451422164147700265660ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" using namespace NEO; typedef Test Gen11OnlyTeset; GEN11TEST_F(Gen11OnlyTeset, WhenGettingRenderCoreFamilyThenGen11CoreIsReturned) { EXPECT_EQ(IGFX_GEN11_CORE, pDevice->getRenderCoreFamily()); } compute-runtime-22.14.22890/opencl/test/unit_test/gen11/windows/000077500000000000000000000000001422164147700242345ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen11/windows/CMakeLists.txt000066400000000000000000000005011422164147700267700ustar00rootroot00000000000000# # Copyright (C) 2019-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen11_windows ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/gmm_callbacks_tests_gen11.cpp ) if(WIN32) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen11_windows}) endif() compute-runtime-22.14.22890/opencl/test/unit_test/gen11/windows/gmm_callbacks_tests_gen11.cpp000066400000000000000000000012021422164147700317270ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/windows/gmm_callbacks.h" #include "shared/test/common/test_macros/test.h" using namespace NEO; typedef ::testing::Test Gen11GmmCallbacksTests; GEN11TEST_F(Gen11GmmCallbacksTests, GivenDefaultWhenNotifyingAubCaptureThenDeviceCallbackIsNotSupported) { EXPECT_EQ(0, DeviceCallbacks::notifyAubCapture(nullptr, 0, 0, false)); } GEN11TEST_F(Gen11GmmCallbacksTests, GivenDefaultWhenWritingL3AddressThenTtCallbackIsNotSupported) { EXPECT_EQ(0, TTCallbacks::writeL3Address(nullptr, 1, 2)); } compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/000077500000000000000000000000001422164147700230775ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/CMakeLists.txt000066400000000000000000000031531422164147700256410ustar00rootroot00000000000000# # Copyright (C) 2019-2022 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_GEN12LP) set(IGDRCL_SRCS_tests_gen12lp_excludes ${CMAKE_CURRENT_SOURCE_DIR}/excludes_ocl_gen12lp.cpp ) set_property(GLOBAL APPEND PROPERTY IGDRCL_SRCS_tests_excludes ${IGDRCL_SRCS_tests_gen12lp_excludes}) set(IGDRCL_SRCS_tests_gen12lp ${IGDRCL_SRCS_tests_gen12lp_excludes} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/buffer_tests_gen12lp.inl ${CMAKE_CURRENT_SOURCE_DIR}/coherency_tests_gen12lp.inl ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_tests_gen12lp.inl ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_media_kernel_gen12lp.inl ${CMAKE_CURRENT_SOURCE_DIR}/gen12lp_tests_wrapper.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gpgpu_walker_tests_gen12lp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hw_helper_tests_gen12lp.inl ${CMAKE_CURRENT_SOURCE_DIR}/image_tests_gen12lp.inl ${CMAKE_CURRENT_SOURCE_DIR}/kernel_tests_gen12lp.inl ${CMAKE_CURRENT_SOURCE_DIR}/profiling_tests_gen12lp.inl ${CMAKE_CURRENT_SOURCE_DIR}/sampler_tests_gen12lp.inl ${CMAKE_CURRENT_SOURCE_DIR}/tbx_command_stream_receiver_tests_gen12lp.inl ${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_gen12lp.inl ${CMAKE_CURRENT_SOURCE_DIR}/test_platform_caps_gen12lp.inl ${CMAKE_CURRENT_SOURCE_DIR}/test_sample_gen12lp.inl ) get_property(NEO_CORE_TESTS_GEN12LP GLOBAL PROPERTY NEO_CORE_TESTS_GEN12LP) list(APPEND IGDRCL_SRCS_tests_gen12lp ${NEO_CORE_TESTS_GEN12LP}) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen12lp}) add_subdirectories() endif() compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/adlp/000077500000000000000000000000001422164147700240175ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/adlp/CMakeLists.txt000066400000000000000000000016311422164147700265600ustar00rootroot00000000000000# # Copyright (C) 2021-2022 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_ADLP) set(IGDRCL_SRCS_tests_gen12lp_adlp_excludes ${CMAKE_CURRENT_SOURCE_DIR}/excludes_ocl_adlp.cpp ) set_property(GLOBAL APPEND PROPERTY IGDRCL_SRCS_tests_excludes ${IGDRCL_SRCS_tests_gen12lp_adlp_excludes}) set(IGDRCL_SRCS_tests_gen12lp_adlp ${IGDRCL_SRCS_tests_gen12lp_adlp_excludes} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_adlp.cpp ) get_property(NEO_CORE_TESTS_GEN12LP_ADLP GLOBAL PROPERTY NEO_CORE_TESTS_GEN12LP_ADLP) list(APPEND IGDRCL_SRCS_tests_gen12lp_adlp ${NEO_CORE_TESTS_GEN12LP_ADLP}) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen12lp_adlp}) add_subdirectories() neo_copy_test_files_with_revision(copy_test_files_adlp_0 adlp 0) add_dependencies(copy_test_files_per_product copy_test_files_adlp_0) endif() compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/adlp/excludes_ocl_adlp.cpp000066400000000000000000000011101422164147700301650ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" HWTEST_EXCLUDE_PRODUCT(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemoryIsUnalignedToCachelineButReadOnlyThenL3CacheShouldBeStillOn, IGFX_ALDERLAKE_P) HWTEST_EXCLUDE_PRODUCT(BufferSetSurfaceTests, givenAlignedCacheableReadOnlyBufferThenChoseOclBufferPolicy, IGFX_ALDERLAKE_P); HWTEST_EXCLUDE_PRODUCT(DeviceFactoryTest, givenInvalidHwConfigStringWhenPrepareDeviceEnvironmentsForProductFamilyOverrideThenThrowsException, IGFX_ALDERLAKE_P); compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/adlp/linux/000077500000000000000000000000001422164147700251565ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/adlp/linux/CMakeLists.txt000066400000000000000000000005311422164147700277150ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen12_adlp_linux ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_tests_adlp.cpp ) if(UNIX) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen12_adlp_linux}) add_subdirectory(dll) endif() compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/adlp/linux/dll/000077500000000000000000000000001422164147700257315ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/adlp/linux/dll/CMakeLists.txt000066400000000000000000000005161422164147700304730ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_linux_dll_tests_gen12_adlp ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}device_id_tests_adlp.cpp ) target_sources(igdrcl_linux_dll_tests PRIVATE ${IGDRCL_SRCS_linux_dll_tests_gen12_adlp}) compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/adlp/linux/dll/device_id_tests_adlp.cpp000066400000000000000000000032241422164147700325730ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/fixtures/linux/device_id_fixture.h" using namespace NEO; TEST_F(DeviceIdTests, GivenAdlpSupportedDeviceIdThenDeviceDescriptorTableExists) { std::array expectedDescriptors = {{ {0x46A0, &ADLP_CONFIG::hwInfo, &ADLP_CONFIG::setupHardwareInfo}, {0x46B0, &ADLP_CONFIG::hwInfo, &ADLP_CONFIG::setupHardwareInfo}, {0x46A1, &ADLP_CONFIG::hwInfo, &ADLP_CONFIG::setupHardwareInfo}, {0x46A2, &ADLP_CONFIG::hwInfo, &ADLP_CONFIG::setupHardwareInfo}, {0x46A3, &ADLP_CONFIG::hwInfo, &ADLP_CONFIG::setupHardwareInfo}, {0x46A6, &ADLP_CONFIG::hwInfo, &ADLP_CONFIG::setupHardwareInfo}, {0x46A8, &ADLP_CONFIG::hwInfo, &ADLP_CONFIG::setupHardwareInfo}, {0x46AA, &ADLP_CONFIG::hwInfo, &ADLP_CONFIG::setupHardwareInfo}, {0x462A, &ADLP_CONFIG::hwInfo, &ADLP_CONFIG::setupHardwareInfo}, {0x4626, &ADLP_CONFIG::hwInfo, &ADLP_CONFIG::setupHardwareInfo}, {0x4628, &ADLP_CONFIG::hwInfo, &ADLP_CONFIG::setupHardwareInfo}, {0x46B1, &ADLP_CONFIG::hwInfo, &ADLP_CONFIG::setupHardwareInfo}, {0x46B2, &ADLP_CONFIG::hwInfo, &ADLP_CONFIG::setupHardwareInfo}, {0x46B3, &ADLP_CONFIG::hwInfo, &ADLP_CONFIG::setupHardwareInfo}, {0x46C0, &ADLP_CONFIG::hwInfo, &ADLP_CONFIG::setupHardwareInfo}, {0x46C1, &ADLP_CONFIG::hwInfo, &ADLP_CONFIG::setupHardwareInfo}, {0x46C2, &ADLP_CONFIG::hwInfo, &ADLP_CONFIG::setupHardwareInfo}, {0x46C3, &ADLP_CONFIG::hwInfo, &ADLP_CONFIG::setupHardwareInfo}, }}; testImpl(expectedDescriptors); } compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/adlp/linux/hw_info_config_tests_adlp.cpp000066400000000000000000000157641422164147700330770ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/os_interface.h" #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/libult/linux/drm_mock.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/os_interface/linux/hw_info_config_linux_tests.h" using namespace NEO; struct HwInfoConfigTestLinuxAdlp : HwInfoConfigTestLinux { void SetUp() override { HwInfoConfigTestLinux::SetUp(); drm = new DrmMock(*executionEnvironment->rootDeviceEnvironments[0]); osInterface->setDriverModel(std::unique_ptr(drm)); drm->storedDeviceID = IGFX_ALDERLAKE_P; } }; ADLPTEST_F(HwInfoConfigTestLinuxAdlp, WhenConfiguringHwInfoThenInfoIsSetCorrectly) { auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(static_cast(drm->storedDeviceID), outHwInfo.platform.usDeviceID); EXPECT_EQ(static_cast(drm->storedDeviceRevID), outHwInfo.platform.usRevId); EXPECT_EQ(static_cast(drm->storedEUVal), outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ(static_cast(drm->storedSSVal), outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(1u, outHwInfo.gtSystemInfo.SliceCount); EXPECT_FALSE(outHwInfo.featureTable.flags.ftrTileY); } ADLPTEST_F(HwInfoConfigTestLinuxAdlp, GivenInvalidDeviceIdWhenConfiguringHwInfoThenErrorIsReturned) { auto hwInfoConfig = HwInfoConfig::get(productFamily); drm->storedRetValForDeviceID = -1; int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); drm->storedRetValForDeviceID = 0; drm->storedRetValForDeviceRevID = -1; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); drm->storedRetValForDeviceRevID = 0; drm->failRetTopology = true; drm->storedRetValForEUVal = -1; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); drm->storedRetValForEUVal = 0; drm->storedRetValForSSVal = -1; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); } template using AdlpConfigHwInfoTests = ::testing::Test; using adlpConfigTestTypes = ::testing::Types; TYPED_TEST_CASE(AdlpConfigHwInfoTests, adlpConfigTestTypes); TYPED_TEST(AdlpConfigHwInfoTests, givenAdlpConfigWhenSetupHardwareInfoThenGtSystemInfoAndWaAndFtrTablesAreSetCorrect) { HardwareInfo hwInfo = *defaultHwInfo; auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); DrmMock drm(*executionEnvironment->rootDeviceEnvironments[0]); GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; auto &featureTable = hwInfo.featureTable; auto &workaroundTable = hwInfo.workaroundTable; DeviceDescriptor device = {0, &hwInfo, &TypeParam::setupHardwareInfo}; int ret = drm.setupHardwareInfo(&device, false); EXPECT_EQ(ret, 0); EXPECT_EQ(8u, gtSystemInfo.CsrSizeInMb); EXPECT_FALSE(gtSystemInfo.IsL3HashModeEnabled); EXPECT_FALSE(gtSystemInfo.IsDynamicallyPopulated); EXPECT_FALSE(featureTable.flags.ftrL3IACoherency); EXPECT_FALSE(featureTable.flags.ftrPPGTT); EXPECT_FALSE(featureTable.flags.ftrSVM); EXPECT_FALSE(featureTable.flags.ftrIA32eGfxPTEs); EXPECT_FALSE(featureTable.flags.ftrStandardMipTailFormat); EXPECT_FALSE(featureTable.flags.ftrTranslationTable); EXPECT_FALSE(featureTable.flags.ftrUserModeTranslationTable); EXPECT_FALSE(featureTable.flags.ftrTileMappedResource); EXPECT_FALSE(featureTable.flags.ftrEnableGuC); EXPECT_FALSE(featureTable.flags.ftrFbc); EXPECT_FALSE(featureTable.flags.ftrFbc2AddressTranslation); EXPECT_FALSE(featureTable.flags.ftrFbcBlitterTracking); EXPECT_FALSE(featureTable.flags.ftrFbcCpuTracking); EXPECT_FALSE(featureTable.flags.ftrTileY); EXPECT_FALSE(featureTable.flags.ftrAstcHdr2D); EXPECT_FALSE(featureTable.flags.ftrAstcLdr2D); EXPECT_FALSE(featureTable.flags.ftr3dMidBatchPreempt); EXPECT_FALSE(featureTable.flags.ftrGpGpuMidBatchPreempt); EXPECT_FALSE(featureTable.flags.ftrGpGpuThreadGroupLevelPreempt); EXPECT_FALSE(featureTable.flags.ftrPerCtxtPreemptionGranularityControl); EXPECT_FALSE(workaroundTable.flags.wa4kAlignUVOffsetNV12LinearSurface); EXPECT_FALSE(workaroundTable.flags.waEnablePreemptionGranularityControlByUMD); EXPECT_FALSE(workaroundTable.flags.waUntypedBufferCompression); ret = drm.setupHardwareInfo(&device, true); EXPECT_EQ(ret, 0); EXPECT_EQ(8u, gtSystemInfo.CsrSizeInMb); EXPECT_FALSE(gtSystemInfo.IsL3HashModeEnabled); EXPECT_FALSE(gtSystemInfo.IsDynamicallyPopulated); EXPECT_TRUE(featureTable.flags.ftrL3IACoherency); EXPECT_TRUE(featureTable.flags.ftrPPGTT); EXPECT_TRUE(featureTable.flags.ftrSVM); EXPECT_TRUE(featureTable.flags.ftrIA32eGfxPTEs); EXPECT_TRUE(featureTable.flags.ftrStandardMipTailFormat); EXPECT_TRUE(featureTable.flags.ftrTranslationTable); EXPECT_TRUE(featureTable.flags.ftrUserModeTranslationTable); EXPECT_TRUE(featureTable.flags.ftrTileMappedResource); EXPECT_TRUE(featureTable.flags.ftrEnableGuC); EXPECT_TRUE(featureTable.flags.ftrFbc); EXPECT_TRUE(featureTable.flags.ftrFbc2AddressTranslation); EXPECT_TRUE(featureTable.flags.ftrFbcBlitterTracking); EXPECT_TRUE(featureTable.flags.ftrFbcCpuTracking); EXPECT_FALSE(featureTable.flags.ftrTileY); EXPECT_TRUE(featureTable.flags.ftrAstcHdr2D); EXPECT_TRUE(featureTable.flags.ftrAstcLdr2D); EXPECT_TRUE(featureTable.flags.ftr3dMidBatchPreempt); EXPECT_TRUE(featureTable.flags.ftrGpGpuMidBatchPreempt); EXPECT_TRUE(featureTable.flags.ftrGpGpuThreadGroupLevelPreempt); EXPECT_TRUE(featureTable.flags.ftrPerCtxtPreemptionGranularityControl); EXPECT_TRUE(workaroundTable.flags.wa4kAlignUVOffsetNV12LinearSurface); EXPECT_TRUE(workaroundTable.flags.waEnablePreemptionGranularityControlByUMD); EXPECT_TRUE(workaroundTable.flags.waUntypedBufferCompression); } TYPED_TEST(AdlpConfigHwInfoTests, givenSliceCountZeroWhenSetupHardwareInfoThenNotZeroValuesSetInGtSystemInfo) { HardwareInfo hwInfo = {}; hwInfo.gtSystemInfo = {0}; TypeParam::setupHardwareInfo(&hwInfo, false); EXPECT_NE(0u, hwInfo.gtSystemInfo.SliceCount); EXPECT_NE(0u, hwInfo.gtSystemInfo.SubSliceCount); EXPECT_NE(0u, hwInfo.gtSystemInfo.EUCount); EXPECT_NE(0u, hwInfo.gtSystemInfo.MaxEuPerSubSlice); EXPECT_NE(0u, hwInfo.gtSystemInfo.MaxSlicesSupported); EXPECT_NE(0u, hwInfo.gtSystemInfo.MaxSubSlicesSupported); EXPECT_NE(0u, hwInfo.gtSystemInfo.L3BankCount); EXPECT_TRUE(hwInfo.gtSystemInfo.CCSInfo.IsValid); EXPECT_NE(0u, hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled); } compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/adlp/test_device_caps_adlp.cpp000066400000000000000000000024131422164147700310270ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" using namespace NEO; using AdlpUsDeviceIdTest = Test; ADLPTEST_F(AdlpUsDeviceIdTest, GivenNonZeroIdThenIsSimulationIsTrue) { unsigned short simulationIds[] = { 0, // default, non-simulation }; for (auto id : simulationIds) { auto mockDevice = std::unique_ptr(createWithUsDeviceId(id)); ASSERT_NE(mockDevice.get(), nullptr); if (id == 0) { EXPECT_FALSE(mockDevice->isSimulation()); } else { EXPECT_TRUE(mockDevice->isSimulation()); } } } ADLPTEST_F(AdlpUsDeviceIdTest, givenADLPWhenCheckFtrSupportsInteger64BitAtomicsThenReturnFalse) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.ftrSupportsInteger64BitAtomics); } ADLPTEST_F(AdlpUsDeviceIdTest, givenAdlpWhenRequestedVmeFlagsThenReturnFalse) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.supportsVme); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftrSupportsVmeAvcTextureSampler); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftrSupportsVmeAvcPreemption); } compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/adls/000077500000000000000000000000001422164147700240225ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/adls/CMakeLists.txt000066400000000000000000000013621422164147700265640ustar00rootroot00000000000000# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_ADLS) set(IGDRCL_SRCS_tests_gen12lp_adls_excludes ${CMAKE_CURRENT_SOURCE_DIR}/excludes_ocl_adls.cpp ) set_property(GLOBAL APPEND PROPERTY IGDRCL_SRCS_tests_excludes ${IGDRCL_SRCS_tests_gen12lp_adls_excludes}) set(IGDRCL_SRCS_tests_gen12lp_adls ${IGDRCL_SRCS_tests_gen12lp_adls_excludes} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_adls.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen12lp_adls}) add_subdirectories() neo_copy_test_files_with_revision(copy_test_files_adls_0 adls 0) add_dependencies(copy_test_files_per_product copy_test_files_adls_0) endif() compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/adls/excludes_ocl_adls.cpp000066400000000000000000000004561422164147700302070ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" HWTEST_EXCLUDE_PRODUCT(DeviceFactoryTest, givenInvalidHwConfigStringWhenPrepareDeviceEnvironmentsForProductFamilyOverrideThenThrowsException, IGFX_ALDERLAKE_S); compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/adls/linux/000077500000000000000000000000001422164147700251615ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/adls/linux/CMakeLists.txt000066400000000000000000000005311422164147700277200ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen12_adls_linux ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_tests_adls.cpp ) if(UNIX) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen12_adls_linux}) add_subdirectory(dll) endif() compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/adls/linux/dll/000077500000000000000000000000001422164147700257345ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/adls/linux/dll/CMakeLists.txt000066400000000000000000000004731422164147700305000ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_linux_dll_tests_gen12_adls ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/device_id_tests_adls.cpp ) target_sources(igdrcl_linux_dll_tests PRIVATE ${IGDRCL_SRCS_linux_dll_tests_gen12_adls}) compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/adls/linux/dll/device_id_tests_adls.cpp000066400000000000000000000025571422164147700326110ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/fixtures/linux/device_id_fixture.h" using namespace NEO; TEST_F(DeviceIdTests, GivenAdlsSupportedDeviceIdThenHardwareInfoIsCorrect) { std::array expectedDescriptors = {{ {0x4680, &ADLS_HW_CONFIG::hwInfo, &ADLS_HW_CONFIG::setupHardwareInfo}, {0x4682, &ADLS_HW_CONFIG::hwInfo, &ADLS_HW_CONFIG::setupHardwareInfo}, {0x4688, &ADLS_HW_CONFIG::hwInfo, &ADLS_HW_CONFIG::setupHardwareInfo}, {0x468A, &ADLS_HW_CONFIG::hwInfo, &ADLS_HW_CONFIG::setupHardwareInfo}, {0x4690, &ADLS_HW_CONFIG::hwInfo, &ADLS_HW_CONFIG::setupHardwareInfo}, {0x4692, &ADLS_HW_CONFIG::hwInfo, &ADLS_HW_CONFIG::setupHardwareInfo}, {0x4693, &ADLS_HW_CONFIG::hwInfo, &ADLS_HW_CONFIG::setupHardwareInfo}, {0xA780, &ADLS_HW_CONFIG::hwInfo, &ADLS_HW_CONFIG::setupHardwareInfo}, {0xA781, &ADLS_HW_CONFIG::hwInfo, &ADLS_HW_CONFIG::setupHardwareInfo}, {0xA782, &ADLS_HW_CONFIG::hwInfo, &ADLS_HW_CONFIG::setupHardwareInfo}, {0xA783, &ADLS_HW_CONFIG::hwInfo, &ADLS_HW_CONFIG::setupHardwareInfo}, {0xA788, &ADLS_HW_CONFIG::hwInfo, &ADLS_HW_CONFIG::setupHardwareInfo}, {0xA789, &ADLS_HW_CONFIG::hwInfo, &ADLS_HW_CONFIG::setupHardwareInfo}, }}; testImpl(expectedDescriptors); } compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/adls/linux/hw_info_config_tests_adls.cpp000066400000000000000000000065351422164147700331010ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/os_interface.h" #include "shared/test/common/libult/linux/drm_mock.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/os_interface/linux/hw_info_config_linux_tests.h" using namespace NEO; struct HwInfoConfigTestLinuxAdls : HwInfoConfigTestLinux { void SetUp() override { HwInfoConfigTestLinux::SetUp(); drm = new DrmMock(*executionEnvironment->rootDeviceEnvironments[0]); osInterface->setDriverModel(std::unique_ptr(drm)); drm->storedDeviceID = IGFX_ALDERLAKE_S; } }; ADLSTEST_F(HwInfoConfigTestLinuxAdls, WhenConfiguringHwInfoThenConfigIsCorrect) { auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(static_cast(drm->storedDeviceID), outHwInfo.platform.usDeviceID); EXPECT_EQ(static_cast(drm->storedDeviceRevID), outHwInfo.platform.usRevId); EXPECT_EQ(static_cast(drm->storedEUVal), outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ(static_cast(drm->storedSSVal), outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(1u, outHwInfo.gtSystemInfo.SliceCount); EXPECT_FALSE(outHwInfo.featureTable.flags.ftrTileY); } ADLSTEST_F(HwInfoConfigTestLinuxAdls, GivenIncorrectDataWhenConfiguringHwInfoThenErrorIsReturned) { auto hwInfoConfig = HwInfoConfig::get(productFamily); drm->storedRetValForDeviceID = -1; int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); drm->storedRetValForDeviceID = 0; drm->storedRetValForDeviceRevID = -1; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); drm->storedRetValForDeviceRevID = 0; drm->failRetTopology = true; drm->storedRetValForEUVal = -1; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); drm->storedRetValForEUVal = 0; drm->storedRetValForSSVal = -1; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); } TEST(AdlsHwInfoTests, WhenSettingUpHwInfoThenConfigIsCorrect) { HardwareInfo hwInfo = *defaultHwInfo; auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); DrmMock drm(*executionEnvironment->rootDeviceEnvironments[0]); GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; DeviceDescriptor device = {0, &hwInfo, &ADLS_HW_CONFIG::setupHardwareInfo}; int ret = drm.setupHardwareInfo(&device, false); EXPECT_EQ(ret, 0); EXPECT_GT(gtSystemInfo.EUCount, 0u); EXPECT_GT(gtSystemInfo.ThreadCount, 0u); EXPECT_GT(gtSystemInfo.SliceCount, 0u); EXPECT_GT(gtSystemInfo.SubSliceCount, 0u); EXPECT_GT(gtSystemInfo.DualSubSliceCount, 0u); EXPECT_GT_VAL(gtSystemInfo.L3CacheSizeInKb, 0u); EXPECT_EQ(gtSystemInfo.CsrSizeInMb, 8u); EXPECT_FALSE(gtSystemInfo.IsDynamicallyPopulated); EXPECT_GT(gtSystemInfo.DualSubSliceCount, 0u); EXPECT_GT(gtSystemInfo.MaxDualSubSlicesSupported, 0u); } compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/adls/test_device_caps_adls.cpp000066400000000000000000000023431422164147700310370ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" using namespace NEO; using AdlsUsDeviceIdTest = Test; ADLSTEST_F(AdlsUsDeviceIdTest, WhenCheckingIsSimulationThenTrueReturnedOnlyForSimulationId) { unsigned short adlsSimulationIds[1] = { 0, // default, non-simulation }; NEO::MockDevice *mockDevice = nullptr; for (auto id : adlsSimulationIds) { mockDevice = createWithUsDeviceId(id); ASSERT_NE(mockDevice, nullptr); EXPECT_FALSE(mockDevice->isSimulation()); delete mockDevice; } } ADLSTEST_F(AdlsUsDeviceIdTest, givenAdlsWhenCheckFtrSupportsInteger64BitAtomicsThenReturnFalse) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.ftrSupportsInteger64BitAtomics); } ADLSTEST_F(AdlsUsDeviceIdTest, givenAdlsWhenRequestedVmeFlagsThenReturnFalse) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.supportsVme); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftrSupportsVmeAvcTextureSampler); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftrSupportsVmeAvcPreemption); } compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/buffer_tests_gen12lp.inl000066400000000000000000000151741422164147700276360ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" using namespace NEO; struct BufferTestsTgllp : ::testing::Test { void SetUp() override { context = std::make_unique(); device = context->getDevice(0); } std::unique_ptr context{}; ClDevice *device{}; cl_int retVal = CL_SUCCESS; }; GEN12LPTEST_F(BufferTestsTgllp, givenBufferNotReadonlyWhenProgrammingSurfaceStateThenUseL3) { auto buffer = std::unique_ptr(Buffer::create( context.get(), CL_MEM_READ_WRITE, MemoryConstants::pageSize, nullptr, retVal)); ASSERT_EQ(CL_SUCCESS, retVal); typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice(), false, false); const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); const auto actualMocs = surfaceState.getMemoryObjectControlState(); EXPECT_EQ(expectedMocs, actualMocs); } GEN12LPTEST_F(BufferTestsTgllp, givenBufferReadonlyWhenProgrammingSurfaceStateThenUseL3) { auto buffer = std::unique_ptr(Buffer::create( context.get(), CL_MEM_READ_WRITE, MemoryConstants::pageSize, nullptr, retVal)); ASSERT_EQ(CL_SUCCESS, retVal); typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; buffer->setArgStateful(&surfaceState, false, false, false, true, context->getDevice(0)->getDevice(), false, false); const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); const auto actualMocs = surfaceState.getMemoryObjectControlState(); EXPECT_EQ(expectedMocs, actualMocs); } GEN12LPTEST_F(BufferTestsTgllp, givenConstantSurfaceWhenProgrammingSurfaceStateThenUseL3) { auto buffer = std::unique_ptr(Buffer::create( context.get(), CL_MEM_READ_WRITE, MemoryConstants::pageSize, nullptr, retVal)); ASSERT_EQ(CL_SUCCESS, retVal); buffer->getGraphicsAllocation(0)->setAllocationType(AllocationType::CONSTANT_SURFACE); typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; buffer->setArgStateful(&surfaceState, false, false, false, false, context->getDevice(0)->getDevice(), false, false); const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); const auto actualMocs = surfaceState.getMemoryObjectControlState(); EXPECT_EQ(expectedMocs, actualMocs); } GEN12LPTEST_F(BufferTestsTgllp, givenL1ForceEnabledWhenProgrammingSurfaceStateThenUseL3) { DebugManagerStateRestore restore{}; DebugManager.flags.ForceL1Caching.set(1); auto buffer = std::unique_ptr(Buffer::create( context.get(), CL_MEM_READ_WRITE, MemoryConstants::pageSize, nullptr, retVal)); ASSERT_EQ(CL_SUCCESS, retVal); typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice(), false, false); const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); const auto actualMocs = surfaceState.getMemoryObjectControlState(); EXPECT_EQ(expectedMocs, actualMocs); } GEN12LPTEST_F(BufferTestsTgllp, givenBufferReadonlyAndL1ForceEnabledWhenProgrammingSurfaceStateThenUseL1) { DebugManagerStateRestore restore{}; DebugManager.flags.ForceL1Caching.set(1); auto buffer = std::unique_ptr(Buffer::create( context.get(), CL_MEM_READ_ONLY, MemoryConstants::pageSize, nullptr, retVal)); ASSERT_EQ(CL_SUCCESS, retVal); typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice(), false, false); const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST); const auto actualMocs = surfaceState.getMemoryObjectControlState(); EXPECT_EQ(expectedMocs, actualMocs); } GEN12LPTEST_F(BufferTestsTgllp, givenBufferReadonlyL1ForceDisabledWhenProgrammingSurfaceStateThenUseL3) { DebugManagerStateRestore restore{}; DebugManager.flags.ForceL1Caching.set(0); auto buffer = std::unique_ptr(Buffer::create( context.get(), CL_MEM_READ_WRITE, MemoryConstants::pageSize, nullptr, retVal)); ASSERT_EQ(CL_SUCCESS, retVal); typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; buffer->setArgStateful(&surfaceState, false, false, false, true, device->getDevice(), false, false); const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); const auto actualMocs = surfaceState.getMemoryObjectControlState(); EXPECT_EQ(expectedMocs, actualMocs); } using Gen12lpCreateBufferTest = ::testing::Test; GEN12LPTEST_F(Gen12lpCreateBufferTest, WhenCreatingBufferWithCopyHostPtrThenDontUseBlitOperation) { uint32_t hostPtr = 0; auto rootDeviceIndex = 1u; auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.blitterOperationsSupported = true; EXPECT_FALSE(HwInfoConfig::get(hwInfo.platform.eProductFamily)->isBlitterFullySupported(hwInfo)); std::unique_ptr newDevice = std::make_unique(MockClDevice::createWithNewExecutionEnvironment(&hwInfo, rootDeviceIndex)); std::unique_ptr newBcsMockContext = std::make_unique(newDevice.get()); auto bcsCsr = static_cast *>(newBcsMockContext->bcsCsr.get()); static_cast(newDevice->getExecutionEnvironment()->memoryManager.get())->enable64kbpages[rootDeviceIndex] = true; static_cast(newDevice->getExecutionEnvironment()->memoryManager.get())->localMemorySupported[rootDeviceIndex] = true; EXPECT_EQ(0u, bcsCsr->blitBufferCalled); cl_int retVal = 0; auto bufferForBlt = clUniquePtr(Buffer::create(newBcsMockContext.get(), CL_MEM_COPY_HOST_PTR, sizeof(hostPtr), &hostPtr, retVal)); EXPECT_EQ(0u, bcsCsr->blitBufferCalled); } compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/coherency_tests_gen12lp.inl000066400000000000000000000356741422164147700303530ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/helpers/ptr_math.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/helpers/dispatch_flags_helper.h" #include "shared/test/common/libult/gen12lp/special_ult_helper_gen12lp.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/test_macros/test.h" using namespace NEO; struct Gen12LpCoherencyRequirements : public ::testing::Test { using STATE_COMPUTE_MODE = typename TGLLPFamily::STATE_COMPUTE_MODE; using PIPE_CONTROL = typename TGLLPFamily::PIPE_CONTROL; using PIPELINE_SELECT = typename TGLLPFamily::PIPELINE_SELECT; struct myCsr : public CommandStreamReceiverHw { using CommandStreamReceiver::commandStream; using CommandStreamReceiver::streamProperties; myCsr(ExecutionEnvironment &executionEnvironment) : CommandStreamReceiverHw(executionEnvironment, 0, 1){}; CsrSizeRequestFlags *getCsrRequestFlags() { return &csrSizeRequestFlags; } }; void makeResidentSharedAlloc() { csr->getResidencyAllocations().push_back(alloc); } void overrideCoherencyRequest(bool reqestChanged, bool requireCoherency, bool hasSharedHandles) { csr->getCsrRequestFlags()->hasSharedHandles = hasSharedHandles; flags.requiresCoherency = requireCoherency; csr->streamProperties.stateComputeMode.isCoherencyRequired.value = requireCoherency; csr->streamProperties.stateComputeMode.isCoherencyRequired.isDirty = reqestChanged; if (hasSharedHandles) { makeResidentSharedAlloc(); } } void SetUp() override { device.reset(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); csr = new myCsr(*device->executionEnvironment); device->resetCommandStreamReceiver(csr); AllocationProperties properties(device->getRootDeviceIndex(), false, MemoryConstants::pageSize, AllocationType::SHARED_BUFFER, false, {}); alloc = device->getMemoryManager()->createGraphicsAllocationFromSharedHandle(static_cast(123), properties, false, false); } void TearDown() override { device->getMemoryManager()->freeGraphicsMemory(alloc); } myCsr *csr = nullptr; std::unique_ptr device; DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags(); GraphicsAllocation *alloc = nullptr; }; GEN12LPTEST_F(Gen12LpCoherencyRequirements, GivenNoSharedHandlesWhenGettingCmdSizeThenSizeIsCorrect) { auto cmdsSize = sizeof(STATE_COMPUTE_MODE); const auto &hwInfoConfig = *HwInfoConfig::get(device->getHardwareInfo().platform.eProductFamily); if (hwInfoConfig.is3DPipelineSelectWARequired()) { cmdsSize += 2 * sizeof(PIPELINE_SELECT); if (SpecialUltHelperGen12lp::isPipeControlWArequired(device->getHardwareInfo().platform.eProductFamily)) { cmdsSize += 2 * sizeof(PIPE_CONTROL); } } overrideCoherencyRequest(false, false, false); EXPECT_FALSE(csr->streamProperties.stateComputeMode.isDirty()); overrideCoherencyRequest(false, true, false); EXPECT_FALSE(csr->streamProperties.stateComputeMode.isDirty()); overrideCoherencyRequest(true, true, false); auto retSize = csr->getCmdSizeForComputeMode(); EXPECT_TRUE(csr->streamProperties.stateComputeMode.isDirty()); EXPECT_EQ(cmdsSize, retSize); overrideCoherencyRequest(true, false, false); retSize = csr->getCmdSizeForComputeMode(); EXPECT_TRUE(csr->streamProperties.stateComputeMode.isDirty()); EXPECT_EQ(cmdsSize, retSize); } GEN12LPTEST_F(Gen12LpCoherencyRequirements, GivenSharedHandlesWhenGettingCmdSizeThenSizeIsCorrect) { auto cmdsSize = sizeof(STATE_COMPUTE_MODE) + sizeof(PIPE_CONTROL); const auto &hwInfoConfig = *HwInfoConfig::get(device->getHardwareInfo().platform.eProductFamily); if (hwInfoConfig.is3DPipelineSelectWARequired()) { cmdsSize += 2 * sizeof(PIPELINE_SELECT); if (SpecialUltHelperGen12lp::isPipeControlWArequired(device->getHardwareInfo().platform.eProductFamily)) { cmdsSize += 2 * sizeof(PIPE_CONTROL); } } overrideCoherencyRequest(false, false, true); EXPECT_FALSE(csr->streamProperties.stateComputeMode.isDirty()); overrideCoherencyRequest(false, true, true); EXPECT_FALSE(csr->streamProperties.stateComputeMode.isDirty()); overrideCoherencyRequest(true, true, true); auto retSize = csr->getCmdSizeForComputeMode(); EXPECT_TRUE(csr->streamProperties.stateComputeMode.isDirty()); EXPECT_EQ(cmdsSize, retSize); overrideCoherencyRequest(true, false, true); retSize = csr->getCmdSizeForComputeMode(); EXPECT_TRUE(csr->streamProperties.stateComputeMode.isDirty()); EXPECT_EQ(cmdsSize, retSize); } GEN12LPTEST_F(Gen12LpCoherencyRequirements, GivenNoSharedHandlesThenCoherencyCmdValuesAreCorrect) { auto cmdsSize = sizeof(STATE_COMPUTE_MODE); auto cmdsSizeWABeginOffset = 0; const auto &hwInfoConfig = *HwInfoConfig::get(device->getHardwareInfo().platform.eProductFamily); if (hwInfoConfig.is3DPipelineSelectWARequired()) { cmdsSizeWABeginOffset += sizeof(PIPELINE_SELECT); cmdsSize += sizeof(PIPELINE_SELECT); if (SpecialUltHelperGen12lp::isPipeControlWArequired(device->getHardwareInfo().platform.eProductFamily)) { cmdsSizeWABeginOffset += sizeof(PIPE_CONTROL); cmdsSize += sizeof(PIPE_CONTROL); } } cmdsSize += cmdsSizeWABeginOffset; char buff[1024]; LinearStream stream(buff, 1024); auto expectedScmCmd = FamilyType::cmdInitStateComputeMode; expectedScmCmd.setForceNonCoherent(STATE_COMPUTE_MODE::FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT); expectedScmCmd.setMaskBits(FamilyType::stateComputeModeForceNonCoherentMask); overrideCoherencyRequest(true, false, false); csr->programComputeMode(stream, flags, *defaultHwInfo); EXPECT_EQ(cmdsSize, stream.getUsed()); auto scmCmd = reinterpret_cast(stream.getCpuBase()); EXPECT_TRUE(memcmp(&expectedScmCmd, scmCmd + cmdsSizeWABeginOffset, sizeof(STATE_COMPUTE_MODE)) == 0); auto startOffset = stream.getUsed(); overrideCoherencyRequest(true, true, false); csr->programComputeMode(stream, flags, *defaultHwInfo); EXPECT_EQ(cmdsSize * 2, stream.getUsed()); expectedScmCmd.setForceNonCoherent(STATE_COMPUTE_MODE::FORCE_NON_COHERENT_FORCE_DISABLED); expectedScmCmd.setMaskBits(FamilyType::stateComputeModeForceNonCoherentMask); scmCmd = reinterpret_cast(ptrOffset(stream.getCpuBase(), startOffset)); EXPECT_TRUE(memcmp(&expectedScmCmd, scmCmd + cmdsSizeWABeginOffset, sizeof(STATE_COMPUTE_MODE)) == 0); } GEN12LPTEST_F(Gen12LpCoherencyRequirements, GivenSharedHandlesThenCoherencyCmdValuesAreCorrect) { auto cmdsSize = sizeof(STATE_COMPUTE_MODE) + sizeof(PIPE_CONTROL); auto cmdsSizeWABeginOffset = 0; const auto &hwInfoConfig = *HwInfoConfig::get(device->getHardwareInfo().platform.eProductFamily); if (hwInfoConfig.is3DPipelineSelectWARequired()) { cmdsSizeWABeginOffset += sizeof(PIPELINE_SELECT); cmdsSize += sizeof(PIPELINE_SELECT); if (SpecialUltHelperGen12lp::isPipeControlWArequired(device->getHardwareInfo().platform.eProductFamily)) { cmdsSizeWABeginOffset += sizeof(PIPE_CONTROL); cmdsSize += sizeof(PIPE_CONTROL); } } cmdsSize += cmdsSizeWABeginOffset; char buff[1024]; LinearStream stream(buff, 1024); auto expectedScmCmd = FamilyType::cmdInitStateComputeMode; expectedScmCmd.setForceNonCoherent(STATE_COMPUTE_MODE::FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT); expectedScmCmd.setMaskBits(FamilyType::stateComputeModeForceNonCoherentMask); auto expectedPcCmd = FamilyType::cmdInitPipeControl; overrideCoherencyRequest(true, false, true); csr->programComputeMode(stream, flags, *defaultHwInfo); EXPECT_EQ(cmdsSize, stream.getUsed()); auto scmCmd = reinterpret_cast(stream.getCpuBase()); EXPECT_TRUE(memcmp(&expectedScmCmd, scmCmd + cmdsSizeWABeginOffset, sizeof(STATE_COMPUTE_MODE)) == 0); auto pcCmd = reinterpret_cast(ptrOffset(stream.getCpuBase(), sizeof(STATE_COMPUTE_MODE))); EXPECT_TRUE(memcmp(&expectedPcCmd, pcCmd + cmdsSizeWABeginOffset, sizeof(PIPE_CONTROL)) == 0); auto startOffset = stream.getUsed(); overrideCoherencyRequest(true, true, true); csr->programComputeMode(stream, flags, *defaultHwInfo); EXPECT_EQ(cmdsSize * 2, stream.getUsed()); expectedScmCmd.setForceNonCoherent(STATE_COMPUTE_MODE::FORCE_NON_COHERENT_FORCE_DISABLED); expectedScmCmd.setMaskBits(FamilyType::stateComputeModeForceNonCoherentMask); scmCmd = reinterpret_cast(ptrOffset(stream.getCpuBase(), startOffset)); EXPECT_TRUE(memcmp(&expectedScmCmd, scmCmd + cmdsSizeWABeginOffset, sizeof(STATE_COMPUTE_MODE)) == 0); pcCmd = reinterpret_cast(ptrOffset(stream.getCpuBase(), startOffset + sizeof(STATE_COMPUTE_MODE))); EXPECT_TRUE(memcmp(&expectedPcCmd, pcCmd + cmdsSizeWABeginOffset, sizeof(PIPE_CONTROL)) == 0); } GEN12LPTEST_F(Gen12LpCoherencyRequirements, givenCoherencyRequirementWithoutSharedHandlesWhenFlushTaskCalledThenProgramCmdOnlyIfChanged) { auto startOffset = csr->commandStream.getUsed(); auto graphicAlloc = csr->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); IndirectHeap stream(graphicAlloc); auto flushTask = [&](bool coherencyRequired) { flags.requiresCoherency = coherencyRequired; startOffset = csr->commandStream.getUsed(); csr->flushTask(stream, 0, &stream, &stream, &stream, 0, flags, *device); }; auto findCmd = [&](bool expectToBeProgrammed, bool expectCoherent, bool expectPipeControl) { HardwareParse hwParser; hwParser.parseCommands(csr->commandStream, startOffset); bool foundOne = false; STATE_COMPUTE_MODE::FORCE_NON_COHERENT expectedCoherentValue = STATE_COMPUTE_MODE::FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT; uint32_t expectedCoherentMask = FamilyType::stateComputeModeForceNonCoherentMask; for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) { auto cmd = genCmdCast(*it); if (cmd) { EXPECT_EQ(expectedCoherentValue, cmd->getForceNonCoherent()); EXPECT_EQ(expectedCoherentMask, cmd->getMaskBits()); EXPECT_FALSE(foundOne); foundOne = true; auto pc = genCmdCast(*(++it)); if (!expectPipeControl && !SpecialUltHelperGen12lp::isPipeControlWArequired(device->getHardwareInfo().platform.eProductFamily)) { EXPECT_EQ(nullptr, pc); } else { EXPECT_NE(nullptr, pc); } } } EXPECT_EQ(expectToBeProgrammed, foundOne); }; auto hwInfo = device->getHardwareInfo(); flushTask(false); if (MemorySynchronizationCommands::isPipeControlPriorToPipelineSelectWArequired(hwInfo)) { findCmd(true, false, true); // first time } else { findCmd(true, false, false); // first time } flushTask(false); findCmd(false, false, false); // not changed csr->getMemoryManager()->freeGraphicsMemory(graphicAlloc); } GEN12LPTEST_F(Gen12LpCoherencyRequirements, givenSharedHandlesWhenFlushTaskCalledThenProgramPipeControlWhenNeeded) { auto startOffset = csr->commandStream.getUsed(); auto graphicsAlloc = csr->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); IndirectHeap stream(graphicsAlloc); auto flushTask = [&](bool coherencyRequired) { flags.requiresCoherency = coherencyRequired; makeResidentSharedAlloc(); startOffset = csr->commandStream.getUsed(); csr->flushTask(stream, 0, &stream, &stream, &stream, 0, flags, *device); }; auto flushTaskAndFindCmds = [&](bool expectCoherent, bool valueChanged) { flushTask(expectCoherent); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream, startOffset); bool foundOne = false; STATE_COMPUTE_MODE::FORCE_NON_COHERENT expectedCoherentValue = STATE_COMPUTE_MODE::FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT; uint32_t expectedCoherentMask = FamilyType::stateComputeModeForceNonCoherentMask; for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) { auto cmd = genCmdCast(*it); if (cmd) { EXPECT_EQ(expectedCoherentValue, cmd->getForceNonCoherent()); EXPECT_EQ(expectedCoherentMask, cmd->getMaskBits()); EXPECT_FALSE(foundOne); foundOne = true; auto pc = genCmdCast(*(++it)); EXPECT_NE(nullptr, pc); } } EXPECT_EQ(valueChanged, foundOne); }; flushTaskAndFindCmds(false, true); // first time flushTaskAndFindCmds(false, false); // not changed csr->getMemoryManager()->freeGraphicsMemory(graphicsAlloc); } GEN12LPTEST_F(Gen12LpCoherencyRequirements, givenFlushWithoutSharedHandlesWhenPreviouslyUsedThenProgramPcAndSCM) { auto graphicAlloc = csr->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); IndirectHeap stream(graphicAlloc); makeResidentSharedAlloc(); csr->flushTask(stream, 0, &stream, &stream, &stream, 0, flags, *device); EXPECT_TRUE(csr->getCsrRequestFlags()->hasSharedHandles); auto startOffset = csr->commandStream.getUsed(); csr->streamProperties.stateComputeMode.isCoherencyRequired.set(true); csr->flushTask(stream, 0, &stream, &stream, &stream, 0, flags, *device); EXPECT_TRUE(csr->getCsrRequestFlags()->hasSharedHandles); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream, startOffset); STATE_COMPUTE_MODE::FORCE_NON_COHERENT expectedCoherentValue = STATE_COMPUTE_MODE::FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT; uint32_t expectedCoherentMask = FamilyType::stateComputeModeForceNonCoherentMask; bool foundOne = false; for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) { auto cmd = genCmdCast(*it); if (cmd) { EXPECT_EQ(expectedCoherentValue, cmd->getForceNonCoherent()); EXPECT_EQ(expectedCoherentMask, cmd->getMaskBits()); EXPECT_FALSE(foundOne); foundOne = true; auto pc = genCmdCast(*(++it)); EXPECT_NE(nullptr, pc); } } EXPECT_TRUE(foundOne); csr->getMemoryManager()->freeGraphicsMemory(graphicAlloc); } command_stream_receiver_hw_tests_gen12lp.inl000066400000000000000000000124551422164147700336600ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/libult/gen12lp/special_ult_helper_gen12lp.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "gtest/gtest.h" #include "reg_configs_common.h" using namespace NEO; #include "opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests.inl" using CommandStreamReceiverHwTestGen12lp = CommandStreamReceiverHwTest; GEN12LPTEST_F(CommandStreamReceiverHwTestGen12lp, givenPreambleSentWhenL3ConfigRequestChangedThenDontProgramL3Register) { size_t GWS = 1; MockContext ctx(pClDevice); MockKernelWithInternals kernel(*pClDevice); CommandQueueHw commandQueue(&ctx, pClDevice, 0, false); auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(commandStreamReceiver); auto &commandStreamCSR = commandStreamReceiver->getCS(); commandStreamReceiver->isPreambleSent = true; commandStreamReceiver->lastSentL3Config = 0; commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr); parseCommands(commandStreamCSR, 0); auto itorCmd = findMmio(cmdList.begin(), cmdList.end(), L3CNTLRegisterOffset::registerOffset); ASSERT_EQ(cmdList.end(), itorCmd); } GEN12LPTEST_F(CommandStreamReceiverHwTestGen12lp, whenProgrammingMiSemaphoreWaitThenSetRegisterPollModeMemoryPoll) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; MI_SEMAPHORE_WAIT miSemaphoreWait = FamilyType::cmdInitMiSemaphoreWait; EXPECT_EQ(MI_SEMAPHORE_WAIT::REGISTER_POLL_MODE::REGISTER_POLL_MODE_MEMORY_POLL, miSemaphoreWait.getRegisterPollMode()); } using CommandStreamReceiverFlushTaskTests = UltCommandStreamReceiverTest; GEN12LPTEST_F(UltCommandStreamReceiverTest, givenStateBaseAddressWhenItIsRequiredThenThereIsPipeControlPriorToItWithTextureCacheFlushAndHdc) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); configureCSRtoNonDirtyState(false); ioh.replaceBuffer(ptrOffset(ioh.getCpuBase(), +1u), ioh.getMaxAvailableSpace() + MemoryConstants::pageSize * 3); flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.commandStream, 0); auto stateBaseAddressItor = find(cmdList.begin(), cmdList.end()); auto pipeControlItor = find(cmdList.begin(), stateBaseAddressItor); EXPECT_NE(stateBaseAddressItor, pipeControlItor); auto pipeControlCmd = reinterpret_cast(*pipeControlItor); EXPECT_TRUE(pipeControlCmd->getTextureCacheInvalidationEnable()); EXPECT_TRUE(pipeControlCmd->getDcFlushEnable()); EXPECT_TRUE(pipeControlCmd->getHdcPipelineFlush()); } using UltCommandStreamReceiverTestGen12Lp = UltCommandStreamReceiverTest; GEN12LPTEST_F(UltCommandStreamReceiverTestGen12Lp, givenDebugEnablingCacheFlushWhenAddingPipeControlWithoutCacheFlushThenOverrideRequestAndEnableCacheFlushFlags) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; DebugManagerStateRestore dbgRestorer; DebugManager.flags.FlushAllCaches.set(true); char buff[sizeof(PIPE_CONTROL) * 3]; LinearStream stream(buff, sizeof(PIPE_CONTROL) * 3); PipeControlArgs args; MemorySynchronizationCommands::addPipeControl(stream, args); parseCommands(stream, 0); PIPE_CONTROL *pipeControl = getCommand(); ASSERT_NE(nullptr, pipeControl); // WA pipeControl added if (cmdList.size() == 2) { pipeControl++; } EXPECT_TRUE(pipeControl->getHdcPipelineFlush()); } GEN12LPTEST_F(UltCommandStreamReceiverTestGen12Lp, givenDebugDisablingCacheFlushWhenAddingPipeControlWithCacheFlushThenOverrideRequestAndDisableCacheFlushFlags) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; DebugManagerStateRestore dbgRestorer; DebugManager.flags.DoNotFlushCaches.set(true); char buff[sizeof(PIPE_CONTROL) * 3]; LinearStream stream(buff, sizeof(PIPE_CONTROL) * 3); PipeControlArgs args; args.dcFlushEnable = true; args.hdcPipelineFlush = true; MemorySynchronizationCommands::addPipeControl(stream, args); parseCommands(stream, 0); PIPE_CONTROL *pipeControl = getCommand(); ASSERT_NE(nullptr, pipeControl); // WA pipeControl added if (cmdList.size() == 2) { pipeControl++; } EXPECT_FALSE(pipeControl->getHdcPipelineFlush()); } compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/dg1/000077500000000000000000000000001422164147700235525ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/dg1/CMakeLists.txt000066400000000000000000000016171422164147700263170ustar00rootroot00000000000000# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_DG1) set(IGDRCL_SRCS_tests_gen12lp_dg1_excludes ${CMAKE_CURRENT_SOURCE_DIR}/excludes_ocl_dg1.cpp ) set_property(GLOBAL APPEND PROPERTY IGDRCL_SRCS_tests_excludes ${IGDRCL_SRCS_tests_gen12lp_dg1_excludes}) set(IGDRCL_SRCS_tests_dg1 ${IGDRCL_SRCS_tests_gen12lp_dg1_excludes} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/hw_helper_tests_dg1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_tests_dg1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_dg1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/execution_environment_tests_dg1.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_dg1}) add_subdirectories() neo_copy_test_files_with_revision(copy_test_files_dg1_0 dg1 0) add_dependencies(copy_test_files_per_product copy_test_files_dg1_0) endif() compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/dg1/excludes_ocl_dg1.cpp000066400000000000000000000022131422164147700274600ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" HWTEST_EXCLUDE_PRODUCT(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemoryIsUnalignedToCachelineButReadOnlyThenL3CacheShouldBeStillOn, IGFX_DG1) HWTEST_EXCLUDE_PRODUCT(BufferSetSurfaceTests, givenAlignedCacheableReadOnlyBufferThenChoseOclBufferPolicy, IGFX_DG1); HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfPipeControlWAIsRequiredThenFalseIsReturned, IGFX_DG1); HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfImagePitchAlignmentWAIsRequiredThenFalseIsReturned, IGFX_DG1); HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfForceEmuInt32DivRemSPWAIsRequiredThenFalseIsReturned, IGFX_DG1); HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHwInfoConfigWhenAskedIf3DPipelineSelectWAIsRequiredThenFalseIsReturned, IGFX_DG1); HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfStorageInfoAdjustmentIsRequiredThenFalseIsReturned, IGFX_DG1); HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, whenOverrideGfxPartitionLayoutForWslThenReturnFalse, IGFX_DG1); compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/dg1/execution_environment_tests_dg1.cpp000066400000000000000000000024111422164147700326600ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/ail/ail_configuration.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/utilities/destructor_counted.h" namespace NEO { using RootDeviceEnvironmentTests = ::testing::Test; HWTEST2_F(RootDeviceEnvironmentTests, givenRootDeviceEnvironmentWhenAILInitProcessExecutableNameReturnsFailedThenInitAilConfigurationReturnsFail, IsDG1) { MockExecutionEnvironment executionEnvironment; executionEnvironment.rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); auto rootDeviceEnvironment = static_cast(executionEnvironment.rootDeviceEnvironments[0].get()); class AILDG1 : public AILConfigurationHw { public: bool initProcessExecutableName() override { return false; } }; VariableBackup ailConfiguration(&ailConfigurationTable[productFamily]); AILDG1 ailDg1; ailConfigurationTable[productFamily] = &ailDg1; EXPECT_EQ(false, rootDeviceEnvironment->initAilConfiguration()); } } // namespace NEOcompute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/dg1/hw_helper_tests_dg1.cpp000066400000000000000000000065421422164147700302170ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/helpers/hw_helper_tests.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "opencl/source/helpers/cl_hw_helper.h" using HwHelperTestDg1 = HwHelperTest; DG1TEST_F(HwHelperTestDg1, givenDg1SteppingA0WhenAdjustDefaultEngineTypeCalledThenRcsIsReturned) { auto &helper = HwHelper::get(renderCoreFamily); const auto &hwInfoConfig = *HwInfoConfig::get(productFamily); hardwareInfo.featureTable.flags.ftrCCSNode = true; hardwareInfo.platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_A0, hardwareInfo); helper.adjustDefaultEngineType(&hardwareInfo); EXPECT_EQ(aub_stream::ENGINE_RCS, hardwareInfo.capabilityTable.defaultEngineType); } DG1TEST_F(HwHelperTestDg1, givenDg1SteppingBWhenAdjustDefaultEngineTypeCalledThenRcsIsReturned) { auto &helper = HwHelper::get(renderCoreFamily); const auto &hwInfoConfig = *HwInfoConfig::get(productFamily); hardwareInfo.featureTable.flags.ftrCCSNode = true; hardwareInfo.platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_B, hardwareInfo); helper.adjustDefaultEngineType(&hardwareInfo); EXPECT_EQ(aub_stream::ENGINE_RCS, hardwareInfo.capabilityTable.defaultEngineType); } DG1TEST_F(HwHelperTestDg1, givenDg1AndVariousSteppingsWhenGettingIsWorkaroundRequiredThenCorrectValueIsReturned) { const auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); const auto &hwInfoConfig = *HwInfoConfig::get(hardwareInfo.platform.eProductFamily); uint32_t steppings[] = { REVISION_A0, REVISION_B, CommonConstants::invalidStepping}; for (auto stepping : steppings) { hardwareInfo.platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(stepping, hardwareInfo); switch (stepping) { case REVISION_A0: EXPECT_TRUE(hwHelper.isWorkaroundRequired(REVISION_A0, REVISION_B, hardwareInfo)); [[fallthrough]]; default: EXPECT_FALSE(hwHelper.isWorkaroundRequired(REVISION_B, REVISION_A0, hardwareInfo)); EXPECT_FALSE(hwHelper.isWorkaroundRequired(REVISION_A0, REVISION_D, hardwareInfo)); } } } DG1TEST_F(HwHelperTestDg1, givenBufferAllocationTypeWhenSetExtraAllocationDataIsCalledThenIsLockableIsSet) { auto &hwHelper = HwHelper::get(renderCoreFamily); AllocationData allocData{}; allocData.flags.useSystemMemory = true; AllocationProperties allocProperties(0, 1, AllocationType::BUFFER, {}); allocData.storageInfo.isLockable = false; allocProperties.flags.shareable = false; hwHelper.setExtraAllocationData(allocData, allocProperties, *defaultHwInfo); EXPECT_TRUE(allocData.storageInfo.isLockable); } DG1TEST_F(HwHelperTestDg1, givenBufferAllocationTypeWhenSetExtraAllocationDataIsCalledWithShareableSetThenIsLockableIsFalse) { auto &hwHelper = HwHelper::get(renderCoreFamily); AllocationData allocData{}; allocData.flags.useSystemMemory = true; AllocationProperties allocProperties(0, 1, AllocationType::BUFFER, {}); allocData.storageInfo.isLockable = false; allocProperties.flags.shareable = true; hwHelper.setExtraAllocationData(allocData, allocProperties, *defaultHwInfo); EXPECT_FALSE(allocData.storageInfo.isLockable); } compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/dg1/hw_info_tests_dg1.cpp000066400000000000000000000020441422164147700276640ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_info.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" using namespace NEO; template class Dg1HwInfoTests : public ::testing::Test {}; typedef ::testing::Types dg1TestTypes; TYPED_TEST_CASE(Dg1HwInfoTests, dg1TestTypes); TYPED_TEST(Dg1HwInfoTests, WhenSetupHardwareInfoWithSetupFeatureTableFlagTrueOrFalseIsCalledThenFeatureTableHasCorrectValueOfLocalMemoryFeature) { HardwareInfo hwInfo = *defaultHwInfo; FeatureTable &featureTable = hwInfo.featureTable; EXPECT_FALSE(featureTable.flags.ftrLocalMemory); TypeParam::setupHardwareInfo(&hwInfo, false); EXPECT_FALSE(featureTable.flags.ftrLocalMemory); TypeParam::setupHardwareInfo(&hwInfo, true); EXPECT_TRUE(featureTable.flags.ftrLocalMemory); }compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/dg1/linux/000077500000000000000000000000001422164147700247115ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/dg1/linux/CMakeLists.txt000066400000000000000000000005261422164147700274540ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen12_dg1_linux ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_tests_dg1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) if(UNIX) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen12_dg1_linux}) add_subdirectory(dll) endif() compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/dg1/linux/dll/000077500000000000000000000000001422164147700254645ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/dg1/linux/dll/CMakeLists.txt000066400000000000000000000004701422164147700302250ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_linux_dll_tests_gen12_dg1 ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/device_id_tests_dg1.cpp ) target_sources(igdrcl_linux_dll_tests PRIVATE ${IGDRCL_SRCS_linux_dll_tests_gen12_dg1}) compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/dg1/linux/dll/device_id_tests_dg1.cpp000066400000000000000000000012051422164147700320560ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/fixtures/linux/device_id_fixture.h" using namespace NEO; TEST_F(DeviceIdTests, GivenDg1SupportedDeviceIdThenHardwareInfoIsCorrect) { std::array expectedDescriptors = {{ {0x4905, &DG1_CONFIG::hwInfo, &DG1_CONFIG::setupHardwareInfo}, {0x4906, &DG1_CONFIG::hwInfo, &DG1_CONFIG::setupHardwareInfo}, {0x4907, &DG1_CONFIG::hwInfo, &DG1_CONFIG::setupHardwareInfo}, {0x4908, &DG1_CONFIG::hwInfo, &DG1_CONFIG::setupHardwareInfo}, }}; testImpl(expectedDescriptors); } compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/dg1/linux/hw_info_config_tests_dg1.cpp000066400000000000000000000025071422164147700323540ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/preemption.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/os_interface/os_interface.h" #include "shared/test/common/helpers/hw_helper_tests.h" using HwHelperTestGen12Lp = HwHelperTest; DG1TEST_F(HwHelperTestGen12Lp, GivenDG1WhenConfigureHardwareCustomThenMTPIsNotSet) { HwInfoConfig *hwInfoConfig = HwInfoConfig::get(hardwareInfo.platform.eProductFamily); OSInterface osIface; hardwareInfo.capabilityTable.defaultPreemptionMode = PreemptionMode::ThreadGroup; PreemptionHelper::adjustDefaultPreemptionMode(hardwareInfo.capabilityTable, true, true, true); hwInfoConfig->configureHardwareCustom(&hardwareInfo, &osIface); EXPECT_FALSE(hardwareInfo.featureTable.flags.ftrGpGpuMidThreadLevelPreempt); } DG1TEST_F(HwHelperTestGen12Lp, GivenDG1WhenConfigureHardwareCustomThenKmdNotifyIsEnabled) { HwInfoConfig *hwInfoConfig = HwInfoConfig::get(hardwareInfo.platform.eProductFamily); OSInterface osIface; hwInfoConfig->configureHardwareCustom(&hardwareInfo, &osIface); EXPECT_TRUE(hardwareInfo.capabilityTable.kmdNotifyProperties.enableKmdNotify); EXPECT_EQ(300ll, hardwareInfo.capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds); } compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/dg1/test_device_caps_dg1.cpp000066400000000000000000000036431422164147700303230ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/driver_info.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" using namespace NEO; using Dg1DeviceCaps = Test; DG1TEST_F(Dg1DeviceCaps, givenDg1WhenCheckSupportCacheFlushAfterWalkerThenFalse) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.supportCacheFlushAfterWalker); } DG1TEST_F(Dg1DeviceCaps, givenDG1WhenCheckftr64KBpagesThenTrue) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.ftr64KBpages); } DG1TEST_F(Dg1DeviceCaps, givenDG1WhenRequestedVmeFlagsThenReturnFalse) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.supportsVme); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftrSupportsVmeAvcTextureSampler); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftrSupportsVmeAvcPreemption); } DG1TEST_F(Dg1DeviceCaps, givenDg1hpWhenInitializeCapsThenVmeIsNotSupported) { pClDevice->driverInfo.reset(); pClDevice->name.clear(); pClDevice->initializeCaps(); cl_uint expectedVmeAvcVersion = CL_AVC_ME_VERSION_0_INTEL; cl_uint expectedVmeVersion = CL_ME_VERSION_LEGACY_INTEL; EXPECT_EQ(expectedVmeVersion, pClDevice->getDeviceInfo().vmeVersion); EXPECT_EQ(expectedVmeAvcVersion, pClDevice->getDeviceInfo().vmeAvcVersion); EXPECT_FALSE(pClDevice->getDeviceInfo().vmeAvcSupportsTextureSampler); EXPECT_FALSE(pDevice->getDeviceInfo().vmeAvcSupportsPreemption); } DG1TEST_F(Dg1DeviceCaps, givenDg1WhenCheckFtrSupportsInteger64BitAtomicsThenReturnTrue) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.ftrSupportsInteger64BitAtomics); } DG1TEST_F(Dg1DeviceCaps, givenDg1WhenCheckGpuAdressSpaceThenReturn47bits) { EXPECT_EQ(MemoryConstants::max64BitAppAddress, pDevice->getHardwareInfo().capabilityTable.gpuAddressSpace); } compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/enqueue_media_kernel_gen12lp.inl000066400000000000000000000163261422164147700313110ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/media_kernel_fixture.h" using namespace NEO; typedef MediaKernelFixture MediaKernelTest; GEN12LPTEST_F(MediaKernelTest, givenGen12LpCsrWhenEnqueueBlockedVmeKernelFirstTimeThenProgramPipelineSelectionAndMediaSampler) { typedef typename TGLLPFamily::PIPELINE_SELECT PIPELINE_SELECT; cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {1, 1, 1}; UserEvent userEvent(context); cl_event blockedEvent = &userEvent; auto retVal = pCmdQ->enqueueKernel( pVmeKernel, workDim, globalWorkOffset, globalWorkSize, nullptr, 1, &blockedEvent, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); userEvent.setStatus(CL_COMPLETE); parseCommands(*pCmdQ); ASSERT_NE(cmdPipelineSelect, nullptr); auto *pCmd = genCmdCast(cmdPipelineSelect); auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits; auto expectedPipelineSelection = PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU; EXPECT_EQ(expectedMask, pCmd->getMaskBits()); EXPECT_EQ(expectedPipelineSelection, pCmd->getPipelineSelection()); EXPECT_FALSE(pCmd->getMediaSamplerDopClockGateEnable()); } GEN12LPTEST_F(MediaKernelTest, givenGen12LpCsrWhenEnqueueBlockedNonVmeKernelFirstTimeThenProgramPipelineSelectionAndMediaSampler) { typedef typename TGLLPFamily::PIPELINE_SELECT PIPELINE_SELECT; cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {1, 1, 1}; UserEvent userEvent(context); cl_event blockedEvent = &userEvent; auto retVal = pCmdQ->enqueueKernel( pKernel, workDim, globalWorkOffset, globalWorkSize, nullptr, 1, &blockedEvent, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); userEvent.setStatus(CL_COMPLETE); parseCommands(*pCmdQ); ASSERT_NE(cmdPipelineSelect, nullptr); auto *pCmd = genCmdCast(cmdPipelineSelect); auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits; auto expectedPipelineSelection = PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU; EXPECT_EQ(expectedMask, pCmd->getMaskBits()); EXPECT_EQ(expectedPipelineSelection, pCmd->getPipelineSelection()); EXPECT_TRUE(pCmd->getMediaSamplerDopClockGateEnable()); } GEN12LPTEST_F(MediaKernelTest, givenGen12LpCsrWhenEnqueueVmeKernelFirstTimeThenProgramPipelineSelectionAndMediaSampler) { typedef typename TGLLPFamily::PIPELINE_SELECT PIPELINE_SELECT; enqueueVmeKernel(); auto numCommands = getCommandsList().size(); EXPECT_EQ(1u, numCommands); auto pCmd = getCommand(); auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits; auto expectedPipelineSelection = PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU; EXPECT_EQ(expectedMask, pCmd->getMaskBits()); EXPECT_EQ(expectedPipelineSelection, pCmd->getPipelineSelection()); EXPECT_FALSE(pCmd->getMediaSamplerDopClockGateEnable()); } GEN12LPTEST_F(MediaKernelTest, givenGen12LpCsrWhenEnqueueNonVmeKernelFirstTimeThenProgramPipelineSelectionAndMediaSampler) { typedef typename TGLLPFamily::PIPELINE_SELECT PIPELINE_SELECT; enqueueRegularKernel(); auto numCommands = getCommandsList().size(); EXPECT_EQ(1u, numCommands); auto pCmd = getCommand(); auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits; auto expectedPipelineSelection = PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU; EXPECT_EQ(expectedMask, pCmd->getMaskBits()); EXPECT_EQ(expectedPipelineSelection, pCmd->getPipelineSelection()); EXPECT_TRUE(pCmd->getMediaSamplerDopClockGateEnable()); } GEN12LPTEST_F(MediaKernelTest, givenGen12LpCsrWhenEnqueueVmeKernelTwiceThenProgramPipelineSelectOnce) { typedef typename TGLLPFamily::PIPELINE_SELECT PIPELINE_SELECT; enqueueVmeKernel(); auto numCommands = getCommandsList().size(); EXPECT_EQ(1u, numCommands); } GEN12LPTEST_F(MediaKernelTest, givenGen12LpCsrWhenEnqueueNonVmeKernelTwiceThenProgramPipelineSelectOnce) { typedef typename TGLLPFamily::PIPELINE_SELECT PIPELINE_SELECT; enqueueVmeKernel(); auto numCommands = getCommandsList().size(); EXPECT_EQ(1u, numCommands); } GEN12LPTEST_F(MediaKernelTest, givenGen12LpCsrWhenEnqueueVmeKernelAfterNonVmeKernelThenProgramPipelineSelectionAndMediaSamplerTwice) { typedef typename TGLLPFamily::PIPELINE_SELECT PIPELINE_SELECT; enqueueRegularKernel(); enqueueVmeKernel(); auto commands = getCommandsList(); EXPECT_EQ(2u, commands.size()); auto pCmd = static_cast(commands.back()); auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits; EXPECT_EQ(expectedMask, pCmd->getMaskBits()); EXPECT_FALSE(pCmd->getMediaSamplerDopClockGateEnable()); } GEN12LPTEST_F(MediaKernelTest, givenGen12LpCsrWhenEnqueueNonVmeKernelAfterVmeKernelThenProgramProgramPipelineSelectionAndMediaSamplerTwice) { typedef typename TGLLPFamily::PIPELINE_SELECT PIPELINE_SELECT; enqueueVmeKernel(); enqueueRegularKernel(); auto commands = getCommandsList(); EXPECT_EQ(2u, commands.size()); auto pCmd = static_cast(commands.back()); auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits; EXPECT_EQ(expectedMask, pCmd->getMaskBits()); EXPECT_TRUE(pCmd->getMediaSamplerDopClockGateEnable()); } GEN12LPTEST_F(MediaKernelTest, givenGen12LpCsrWhenEnqueueVmeKernelThenVmeSubslicesConfigDoesntChangeToFalse) { auto csr = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); csr->lastVmeSubslicesConfig = true; enqueueVmeKernel(); EXPECT_TRUE(csr->lastVmeSubslicesConfig); } GEN12LPTEST_F(MediaKernelTest, givenGen12LpCsrWhenEnqueueVmeKernelThenVmeSubslicesConfigDoesntChangeToTrue) { auto csr = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); csr->lastVmeSubslicesConfig = false; enqueueVmeKernel(); EXPECT_FALSE(csr->lastVmeSubslicesConfig); } GEN12LPTEST_F(MediaKernelTest, GivenGen12lpWhenGettingCmdSizeForMediaSamplerThenZeroIsReturned) { auto csr = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); csr->lastVmeSubslicesConfig = false; EXPECT_EQ(0u, csr->getCmdSizeForMediaSampler(false)); EXPECT_EQ(0u, csr->getCmdSizeForMediaSampler(true)); csr->lastVmeSubslicesConfig = true; EXPECT_EQ(0u, csr->getCmdSizeForMediaSampler(false)); EXPECT_EQ(0u, csr->getCmdSizeForMediaSampler(true)); } compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/excludes_ocl_gen12lp.cpp000066400000000000000000000025451422164147700276120ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" HWTEST_EXCLUDE_PRODUCT(ProfilingTests, GivenCommandQueueBlockedWithProfilingWhenWalkerIsDispatchedThenMiStoreRegisterMemIsPresentInCS, IGFX_GEN12LP_CORE); HWTEST_EXCLUDE_PRODUCT(ProfilingTests, GivenCommandQueueWithProflingWhenWalkerIsDispatchedThenMiStoreRegisterMemIsPresentInCS, IGFX_GEN12LP_CORE); HWTEST_EXCLUDE_PRODUCT(EventProfilingTest, givenEventWhenCompleteIsZeroThenCalcProfilingDataSetsEndTimestampInCompleteTimestampAndDoesntCallOsTimeMethods, IGFX_GEN12LP_CORE); HWTEST_EXCLUDE_PRODUCT(EventProfilingTests, givenRawTimestampsDebugModeWhenDataIsQueriedThenRawDataIsReturned, IGFX_GEN12LP_CORE); HWTEST_EXCLUDE_PRODUCT(EventProfilingTest, givenRawTimestampsDebugModeWhenStartTimeStampLTQueueTimeStampThenIncreaseStartTimeStamp, IGFX_GEN12LP_CORE); HWTEST_EXCLUDE_PRODUCT(ProfilingWithPerfCountersTests, GivenCommandQueueWithProfilingPerfCountersWhenWalkerIsDispatchedThenRegisterStoresArePresentInCS, IGFX_GEN12LP_CORE); HWTEST_EXCLUDE_PRODUCT(ProfilingCommandsTest, givenKernelWhenProfilingCommandStartIsTakenThenTimeStampAddressIsProgrammedCorrectly, IGFX_GEN12LP_CORE); HWTEST_EXCLUDE_PRODUCT(ProfilingCommandsTest, givenKernelWhenProfilingCommandStartIsNotTakenThenTimeStampAddressIsProgrammedCorrectly, IGFX_GEN12LP_CORE); compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/gen12lp_tests_wrapper.cpp000066400000000000000000000017701422164147700300420ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/gen12lp/buffer_tests_gen12lp.inl" #include "opencl/test/unit_test/gen12lp/coherency_tests_gen12lp.inl" #include "opencl/test/unit_test/gen12lp/command_stream_receiver_hw_tests_gen12lp.inl" #include "opencl/test/unit_test/gen12lp/enqueue_media_kernel_gen12lp.inl" #include "opencl/test/unit_test/gen12lp/hw_helper_tests_gen12lp.inl" #include "opencl/test/unit_test/gen12lp/image_tests_gen12lp.inl" #include "opencl/test/unit_test/gen12lp/kernel_tests_gen12lp.inl" #include "opencl/test/unit_test/gen12lp/profiling_tests_gen12lp.inl" #include "opencl/test/unit_test/gen12lp/sampler_tests_gen12lp.inl" #include "opencl/test/unit_test/gen12lp/tbx_command_stream_receiver_tests_gen12lp.inl" #include "opencl/test/unit_test/gen12lp/test_device_caps_gen12lp.inl" #include "opencl/test/unit_test/gen12lp/test_platform_caps_gen12lp.inl" #include "opencl/test/unit_test/gen12lp/test_sample_gen12lp.inl" compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/gpgpu_walker_tests_gen12lp.cpp000066400000000000000000000145231422164147700310510ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen12lp/hw_info.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/fixtures/linear_stream_fixture.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/command_queue/hardware_interface.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" namespace NEO { struct GpgpuWalkerTests : public ::testing::Test { void SetUp() override { } void TearDown() override { } }; GEN12LPTEST_F(GpgpuWalkerTests, givenMiStoreRegMemWhenAdjustMiStoreRegMemModeThenMmioRemapEnableIsSet) { using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM; MI_STORE_REGISTER_MEM cmd = FamilyType::cmdInitStoreRegisterMem; GpgpuWalkerHelper::adjustMiStoreRegMemMode(&cmd); EXPECT_EQ(true, cmd.getMmioRemapEnable()); } class MockKernelWithApplicableWa : public MockKernel { public: MockKernelWithApplicableWa(Program *program, const KernelInfo &kernelInfos, ClDevice &clDeviceArg) : MockKernel(program, kernelInfos, clDeviceArg) {} bool requiresWaDisableRccRhwoOptimization() const override { return waApplicable; } bool waApplicable = false; }; struct HardwareInterfaceTests : public ClDeviceFixture, public LinearStreamFixture, public ::testing::Test { void SetUp() override { ClDeviceFixture::SetUp(); LinearStreamFixture::SetUp(); pContext = new NEO::MockContext(pClDevice); pCommandQueue = new MockCommandQueue(pContext, pClDevice, nullptr, false); pProgram = new MockProgram(pContext, false, toClDeviceVector(*pClDevice)); auto kernelInfos = MockKernel::toKernelInfoContainer(pProgram->mockKernelInfo, rootDeviceIndex); pMultiDeviceKernel = MockMultiDeviceKernel::create(pProgram, kernelInfos); pKernel = static_cast(pMultiDeviceKernel->getKernel(rootDeviceIndex)); } void TearDown() override { pMultiDeviceKernel->release(); pProgram->release(); pCommandQueue->release(); pContext->release(); LinearStreamFixture::TearDown(); ClDeviceFixture::TearDown(); } CommandQueue *pCommandQueue = nullptr; Context *pContext = nullptr; MockProgram *pProgram = nullptr; MockKernelWithApplicableWa *pKernel = nullptr; MultiDeviceKernel *pMultiDeviceKernel = nullptr; }; GEN12LPTEST_F(HardwareInterfaceTests, GivenKernelWithApplicableWaDisableRccRhwoOptimizationWhenDispatchWorkaroundsIsCalledThenWorkaroundIsApplied) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; bool enableWa = true; pKernel->waApplicable = true; HardwareInterface::dispatchWorkarounds(&linearStream, *pCommandQueue, *pKernel, enableWa); size_t expectedUsedForEnableWa = (sizeof(PIPE_CONTROL) + sizeof(MI_LOAD_REGISTER_IMM)); ASSERT_EQ(expectedUsedForEnableWa, linearStream.getUsed()); HardwareParse hwParse; hwParse.parseCommands(linearStream); auto itorPipeCtrl = find(hwParse.cmdList.begin(), hwParse.cmdList.end()); ASSERT_NE(hwParse.cmdList.end(), itorPipeCtrl); auto pipeControl = genCmdCast(*itorPipeCtrl); ASSERT_NE(nullptr, pipeControl); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); auto itorLri = find(hwParse.cmdList.begin(), hwParse.cmdList.end()); ASSERT_NE(hwParse.cmdList.end(), itorLri); auto lriCmd = genCmdCast(*itorLri); ASSERT_NE(nullptr, lriCmd); EXPECT_EQ(0x7010u, lriCmd->getRegisterOffset()); EXPECT_EQ(0x40004000u, lriCmd->getDataDword()); enableWa = false; HardwareInterface::dispatchWorkarounds(&linearStream, *pCommandQueue, *pKernel, enableWa); size_t expectedUsedForDisableWa = 2 * (sizeof(PIPE_CONTROL) + sizeof(MI_LOAD_REGISTER_IMM)); ASSERT_EQ(expectedUsedForDisableWa, linearStream.getUsed()); hwParse.TearDown(); hwParse.parseCommands(linearStream, expectedUsedForEnableWa); itorPipeCtrl = find(hwParse.cmdList.begin(), hwParse.cmdList.end()); ASSERT_NE(hwParse.cmdList.end(), itorPipeCtrl); pipeControl = genCmdCast(*itorPipeCtrl); ASSERT_NE(nullptr, pipeControl); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); itorLri = find(hwParse.cmdList.begin(), hwParse.cmdList.end()); ASSERT_NE(hwParse.cmdList.end(), itorLri); lriCmd = genCmdCast(*itorLri); ASSERT_NE(nullptr, lriCmd); EXPECT_EQ(0x7010u, lriCmd->getRegisterOffset()); EXPECT_EQ(0x40000000u, lriCmd->getDataDword()); } GEN12LPTEST_F(HardwareInterfaceTests, GivenKernelWithoutApplicableWaDisableRccRhwoOptimizationWhenDispatchWorkaroundsIsCalledThenWorkaroundIsApplied) { bool enableWa = true; pKernel->waApplicable = false; HardwareInterface::dispatchWorkarounds(&linearStream, *pCommandQueue, *pKernel, enableWa); EXPECT_EQ(0u, linearStream.getUsed()); enableWa = false; HardwareInterface::dispatchWorkarounds(&linearStream, *pCommandQueue, *pKernel, enableWa); EXPECT_EQ(0u, linearStream.getUsed()); } GEN12LPTEST_F(HardwareInterfaceTests, GivenKernelWithApplicableWaDisableRccRhwoOptimizationWhenCalculatingCommandsSizeThenAppropriateSizeIsReturned) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; pKernel->waApplicable = true; auto cmdSize = GpgpuWalkerHelper::getSizeForWaDisableRccRhwoOptimization(pKernel); size_t expectedSize = 2 * (sizeof(PIPE_CONTROL) + sizeof(MI_LOAD_REGISTER_IMM)); EXPECT_EQ(expectedSize, cmdSize); } GEN12LPTEST_F(HardwareInterfaceTests, GivenKernelWithoutApplicableWaDisableRccRhwoOptimizationWhenCalculatingCommandsSizeThenZeroIsReturned) { pKernel->waApplicable = false; auto cmdSize = GpgpuWalkerHelper::getSizeForWaDisableRccRhwoOptimization(pKernel); EXPECT_EQ(0u, cmdSize); } } // namespace NEOcompute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/hw_helper_tests_gen12lp.inl000066400000000000000000000472361422164147700303460ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/hw_helper_tests.h" #include "shared/test/common/libult/gen12lp/special_ult_helper_gen12lp.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/test/unit_test/mocks/mock_cl_hw_helper.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "engine_node.h" using HwHelperTestGen12Lp = HwHelperTest; GEN12LPTEST_F(HwHelperTestGen12Lp, givenTglLpThenAuxTranslationIsRequired) { auto &clHwHelper = ClHwHelper::get(renderCoreFamily); for (auto accessedUsingStatelessAddressingMode : {true, false}) { KernelInfo kernelInfo{}; ArgDescriptor arg; arg.as(true).accessedUsingStatelessAddressingMode = accessedUsingStatelessAddressingMode; kernelInfo.kernelDescriptor.payloadMappings.explicitArgs.push_back(std::move(arg)); EXPECT_EQ(accessedUsingStatelessAddressingMode, clHwHelper.requiresAuxResolves(kernelInfo, hardwareInfo)); } } GEN12LPTEST_F(HwHelperTestGen12Lp, WhenGettingMaxBarriersPerSliceThenCorrectSizeIsReturned) { auto &helper = HwHelper::get(renderCoreFamily); EXPECT_EQ(32u, helper.getMaxBarrierRegisterPerSlice()); } GEN12LPTEST_F(HwHelperTestGen12Lp, WhenGettingPitchAlignmentForImageThenCorrectValueIsReturned) { auto &helper = HwHelper::get(renderCoreFamily); auto stepping = hardwareInfo.platform.usRevId; if (SpecialUltHelperGen12lp::shouldPerformimagePitchAlignment(hardwareInfo.platform.eProductFamily) && stepping == 0) { EXPECT_EQ(64u, helper.getPitchAlignmentForImage(&hardwareInfo)); } else { EXPECT_EQ(4u, helper.getPitchAlignmentForImage(&hardwareInfo)); } } GEN12LPTEST_F(HwHelperTestGen12Lp, WhenAdjustingDefaultEngineTypeThenRcsIsSet) { hardwareInfo.featureTable.flags.ftrCCSNode = false; auto &helper = HwHelper::get(renderCoreFamily); helper.adjustDefaultEngineType(&hardwareInfo); EXPECT_EQ(aub_stream::ENGINE_RCS, hardwareInfo.capabilityTable.defaultEngineType); } GEN12LPTEST_F(HwHelperTestGen12Lp, givenDifferentSizesOfAllocationWhenCheckingCompressionPreferenceThenReturnCorrectValue) { auto &helper = HwHelper::get(renderCoreFamily); const size_t sizesToCheck[] = {128, 256, 512, 1023, 1024, 1025}; for (size_t size : sizesToCheck) { EXPECT_FALSE(helper.isBufferSizeSuitableForCompression(size, *defaultHwInfo)); } } GEN12LPTEST_F(HwHelperTestGen12Lp, givenFtrCcsNodeNotSetAndBcsInfoSetWhenGetGpgpuEnginesThenReturnThreeRcsEnginesAndOneBcsEngine) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.featureTable.flags.ftrCCSNode = false; hwInfo.featureTable.ftrBcsInfo = 1; hwInfo.capabilityTable.blitterOperationsSupported = true; hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_RCS; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); EXPECT_EQ(4u, device->allEngines.size()); auto &engines = HwHelperHw::get().getGpgpuEngineInstances(hwInfo); EXPECT_EQ(4u, engines.size()); EXPECT_EQ(aub_stream::ENGINE_RCS, engines[0].first); EXPECT_EQ(aub_stream::ENGINE_RCS, engines[1].first); EXPECT_EQ(aub_stream::ENGINE_RCS, engines[2].first); EXPECT_EQ(aub_stream::ENGINE_BCS, engines[3].first); } GEN12LPTEST_F(HwHelperTestGen12Lp, givenFtrCcsNodeNotSetAndCcsDefualtEngineWhenGetGpgpuEnginesThenReturnTwoRcsEnginesAndOneCcs) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.featureTable.flags.ftrCCSNode = false; hwInfo.featureTable.ftrBcsInfo = 0; hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_CCS; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); EXPECT_EQ(3u, device->allEngines.size()); auto &engines = HwHelperHw::get().getGpgpuEngineInstances(hwInfo); EXPECT_EQ(3u, engines.size()); EXPECT_EQ(aub_stream::ENGINE_RCS, engines[0].first); EXPECT_EQ(aub_stream::ENGINE_RCS, engines[1].first); EXPECT_EQ(aub_stream::ENGINE_CCS, engines[2].first); } GEN12LPTEST_F(HwHelperTestGen12Lp, givenFtrCcsNodeNotSetWhenGetGpgpuEnginesThenReturnThreeRcsEngines) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.featureTable.flags.ftrCCSNode = false; hwInfo.featureTable.ftrBcsInfo = 0; hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_RCS; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); EXPECT_EQ(3u, device->allEngines.size()); auto &engines = HwHelperHw::get().getGpgpuEngineInstances(hwInfo); EXPECT_EQ(3u, engines.size()); EXPECT_EQ(aub_stream::ENGINE_RCS, engines[0].first); EXPECT_EQ(aub_stream::ENGINE_RCS, engines[1].first); EXPECT_EQ(aub_stream::ENGINE_RCS, engines[2].first); } GEN12LPTEST_F(HwHelperTestGen12Lp, givenFtrCcsNodeSetWhenGetGpgpuEnginesThenReturnTwoRcsAndCcsEngines) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.featureTable.flags.ftrCCSNode = true; hwInfo.featureTable.ftrBcsInfo = 0; hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_CCS; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); EXPECT_EQ(4u, device->allEngines.size()); auto &engines = HwHelperHw::get().getGpgpuEngineInstances(hwInfo); EXPECT_EQ(4u, engines.size()); EXPECT_EQ(aub_stream::ENGINE_CCS, engines[0].first); EXPECT_EQ(aub_stream::ENGINE_RCS, engines[1].first); EXPECT_EQ(aub_stream::ENGINE_RCS, engines[2].first); EXPECT_EQ(aub_stream::ENGINE_CCS, engines[3].first); } GEN12LPTEST_F(HwHelperTestGen12Lp, givenFtrCcsNodeSetFtrGpGpuMidThreadLevelPreemptSetWhenGetGpgpuEnginesThenReturn2RcsAndCcsEngines) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.featureTable.flags.ftrCCSNode = true; hwInfo.featureTable.ftrBcsInfo = 0; hwInfo.featureTable.flags.ftrGpGpuMidThreadLevelPreempt = true; hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_CCS; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); EXPECT_EQ(3u, device->allEngines.size()); auto &engines = HwHelperHw::get().getGpgpuEngineInstances(hwInfo); EXPECT_EQ(3u, engines.size()); EXPECT_EQ(aub_stream::ENGINE_RCS, engines[0].first); EXPECT_EQ(aub_stream::ENGINE_RCS, engines[1].first); EXPECT_EQ(aub_stream::ENGINE_CCS, engines[2].first); } GEN12LPTEST_F(HwHelperTestGen12Lp, givenFtrCcsNodeSetFtrGpGpuMidThreadLevelPreemptNotSetWhenGetGpgpuEnginesThenReturn2RcsAnd2CcsEngines) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.featureTable.flags.ftrCCSNode = true; hwInfo.featureTable.ftrBcsInfo = 0; hwInfo.featureTable.flags.ftrGpGpuMidThreadLevelPreempt = false; hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_CCS; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); EXPECT_EQ(4u, device->allEngines.size()); auto &engines = HwHelperHw::get().getGpgpuEngineInstances(hwInfo); EXPECT_EQ(4u, engines.size()); EXPECT_EQ(aub_stream::ENGINE_CCS, engines[0].first); EXPECT_EQ(aub_stream::ENGINE_RCS, engines[1].first); EXPECT_EQ(aub_stream::ENGINE_RCS, engines[2].first); EXPECT_EQ(aub_stream::ENGINE_CCS, engines[3].first); } GEN12LPTEST_F(HwHelperTestGen12Lp, givenFtrCcsNodeSetAndDefaultRcsWhenGetGpgpuEnginesThenReturnAppropriateNumberOfRcsEngines) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.featureTable.flags.ftrCCSNode = true; hwInfo.featureTable.ftrBcsInfo = 0; hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_RCS; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); EXPECT_EQ(3u, device->allEngines.size()); auto &engines = HwHelperHw::get().getGpgpuEngineInstances(hwInfo); EXPECT_EQ(3u, engines.size()); EXPECT_EQ(aub_stream::ENGINE_RCS, engines[0].first); EXPECT_EQ(aub_stream::ENGINE_RCS, engines[1].first); EXPECT_EQ(aub_stream::ENGINE_RCS, engines[2].first); } GEN12LPTEST_F(HwHelperTestGen12Lp, givenTgllpWhenIsFusedEuDispatchEnabledIsCalledThenResultIsCorrect) { DebugManagerStateRestore restorer; auto &helper = HwHelper::get(renderCoreFamily); auto &waTable = hardwareInfo.workaroundTable; std::tuple testParams[]{ {true, false, -1}, {false, true, -1}, {true, false, 0}, {true, true, 0}, {false, false, 1}, {false, true, 1}}; for (auto &[expectedResult, wa, debugKey] : testParams) { waTable.flags.waDisableFusedThreadScheduling = wa; DebugManager.flags.CFEFusedEUDispatch.set(debugKey); EXPECT_EQ(expectedResult, helper.isFusedEuDispatchEnabled(hardwareInfo, false)); } } class HwHelperTestsGen12LpBuffer : public ::testing::Test { public: void SetUp() override { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); device = std::make_unique(Device::create(executionEnvironment, rootDeviceIndex)); context = std::make_unique(device.get(), true); context->contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE; } const uint32_t rootDeviceIndex = 0u; cl_int retVal = CL_SUCCESS; std::unique_ptr device; std::unique_ptr context; std::unique_ptr buffer; }; GEN12LPTEST_F(HwHelperTestsGen12LpBuffer, givenCompressedBufferThenCheckResourceCompatibilityReturnsFalse) { auto &helper = HwHelper::get(renderCoreFamily); buffer.reset(Buffer::create(context.get(), 0, MemoryConstants::cacheLineSize, nullptr, retVal)); MockBuffer::setAllocationType(buffer->getGraphicsAllocation(rootDeviceIndex), context->getDevice(0)->getRootDeviceEnvironment().getGmmClientContext(), true); EXPECT_FALSE(helper.checkResourceCompatibility(*buffer->getGraphicsAllocation(rootDeviceIndex))); } GEN12LPTEST_F(HwHelperTestsGen12LpBuffer, givenBufferThenCheckResourceCompatibilityReturnsTrue) { auto &helper = HwHelper::get(renderCoreFamily); buffer.reset(Buffer::create(context.get(), 0, MemoryConstants::cacheLineSize, nullptr, retVal)); buffer->getGraphicsAllocation(rootDeviceIndex)->setAllocationType(AllocationType::BUFFER); EXPECT_TRUE(helper.checkResourceCompatibility(*buffer->getGraphicsAllocation(rootDeviceIndex))); } using LriHelperTestsGen12Lp = ::testing::Test; GEN12LPTEST_F(LriHelperTestsGen12Lp, whenProgrammingLriCommandThenExpectMmioRemapEnable) { using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; std::unique_ptr buffer(new uint8_t[128]); LinearStream stream(buffer.get(), 128); uint32_t address = 0x8888; uint32_t data = 0x1234; auto expectedLri = FamilyType::cmdInitLoadRegisterImm; EXPECT_TRUE(expectedLri.getMmioRemapEnable()); expectedLri.setRegisterOffset(address); expectedLri.setDataDword(data); expectedLri.setMmioRemapEnable(false); LriHelper::program(&stream, address, data, false); MI_LOAD_REGISTER_IMM *lri = genCmdCast(buffer.get()); ASSERT_NE(nullptr, lri); EXPECT_EQ(sizeof(MI_LOAD_REGISTER_IMM), stream.getUsed()); EXPECT_EQ(lri, stream.getCpuBase()); EXPECT_TRUE(memcmp(lri, &expectedLri, sizeof(MI_LOAD_REGISTER_IMM)) == 0); } using MemorySynchronizatiopCommandsTests = ::testing::Test; GEN12LPTEST_F(MemorySynchronizatiopCommandsTests, whenSettingCacheFlushExtraFieldsThenExpectHdcFlushSet) { PipeControlArgs args; args.constantCacheInvalidationEnable = true; MemorySynchronizationCommands::setCacheFlushExtraProperties(args); EXPECT_TRUE(args.hdcPipelineFlush); EXPECT_FALSE(args.constantCacheInvalidationEnable); } GEN12LPTEST_F(HwHelperTestGen12Lp, givenGen12WhenCallIsPackedSupportedThenReturnTrue) { auto &helper = HwHelper::get(renderCoreFamily); EXPECT_TRUE(helper.packedFormatsSupported()); } GEN12LPTEST_F(HwHelperTestGen12Lp, whenRequestingMocsThenProperMocsIndicesAreBeingReturned) { auto &helper = HwHelper::get(renderCoreFamily); auto gmmHelper = this->pDevice->getGmmHelper(); const auto mocsNoCache = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) >> 1; const auto mocsL3 = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1; EXPECT_EQ(mocsNoCache, helper.getMocsIndex(*gmmHelper, false, false)); EXPECT_EQ(mocsNoCache, helper.getMocsIndex(*gmmHelper, false, true)); EXPECT_EQ(mocsL3, helper.getMocsIndex(*gmmHelper, true, false)); EXPECT_EQ(mocsL3, helper.getMocsIndex(*gmmHelper, true, true)); } GEN12LPTEST_F(HwHelperTestGen12Lp, givenL1ForceEnabledWhenRequestingMocsThenProperMocsIndicesAreBeingReturned) { DebugManagerStateRestore restore{}; DebugManager.flags.ForceL1Caching.set(1); auto &helper = HwHelper::get(renderCoreFamily); auto gmmHelper = this->pDevice->getGmmHelper(); const auto mocsNoCache = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) >> 1; const auto mocsL3 = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1; const auto mocsL1 = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST) >> 1; EXPECT_EQ(mocsNoCache, helper.getMocsIndex(*gmmHelper, false, false)); EXPECT_EQ(mocsNoCache, helper.getMocsIndex(*gmmHelper, false, true)); EXPECT_EQ(mocsL3, helper.getMocsIndex(*gmmHelper, true, false)); EXPECT_EQ(mocsL1, helper.getMocsIndex(*gmmHelper, true, true)); } GEN12LPTEST_F(HwHelperTestGen12Lp, givenL1ForceDisabledWhenRequestingMocsThenProperMocsIndicesAreBeingReturned) { DebugManagerStateRestore restore{}; DebugManager.flags.ForceL1Caching.set(0); auto &helper = HwHelper::get(renderCoreFamily); auto gmmHelper = this->pDevice->getGmmHelper(); const auto mocsNoCache = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) >> 1; const auto mocsL3 = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1; EXPECT_EQ(mocsNoCache, helper.getMocsIndex(*gmmHelper, false, false)); EXPECT_EQ(mocsNoCache, helper.getMocsIndex(*gmmHelper, false, true)); EXPECT_EQ(mocsL3, helper.getMocsIndex(*gmmHelper, true, false)); EXPECT_EQ(mocsL3, helper.getMocsIndex(*gmmHelper, true, true)); } GEN12LPTEST_F(HwHelperTestGen12Lp, givenAllocationTypeWithCpuAccessRequiredWhenCpuAccessIsDisallowedThenSystemMemoryIsRequested) { DebugManagerStateRestore restore; DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast(LocalMemoryAccessMode::CpuAccessDisallowed)); const AllocationType allocationTypesToUseSystemMemory[] = { AllocationType::COMMAND_BUFFER, AllocationType::CONSTANT_SURFACE, AllocationType::GLOBAL_SURFACE, AllocationType::INTERNAL_HEAP, AllocationType::LINEAR_STREAM, AllocationType::PIPE, AllocationType::PRINTF_SURFACE, AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, AllocationType::RING_BUFFER, AllocationType::SEMAPHORE_BUFFER}; MockMemoryManager mockMemoryManager; for (auto allocationType : allocationTypesToUseSystemMemory) { AllocationData allocData{}; AllocationProperties properties(mockRootDeviceIndex, true, 10, allocationType, false, mockDeviceBitfield); mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.requiresCpuAccess); EXPECT_TRUE(allocData.flags.useSystemMemory); } AllocationData allocData{}; AllocationProperties properties(mockRootDeviceIndex, true, 10, AllocationType::BUFFER, false, mockDeviceBitfield); mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_FALSE(allocData.flags.requiresCpuAccess); EXPECT_FALSE(allocData.flags.useSystemMemory); } HWTEST2_F(HwHelperTestGen12Lp, givenRevisionEnumThenProperValueForIsWorkaroundRequiredIsReturned, IsRKL) { std::vector steppings; HardwareInfo hardwareInfo = *defaultHwInfo; steppings.push_back(0x0); //A0 steppings.push_back(0x4); //B0 steppings.push_back(0x5); //undefined for (auto stepping : steppings) { hardwareInfo.platform.usRevId = stepping; HwHelper &hwHelper = HwHelper::get(renderCoreFamily); if (stepping == 0x0) { EXPECT_TRUE(hwHelper.isWorkaroundRequired(REVISION_A0, REVISION_B, hardwareInfo)); EXPECT_FALSE(hwHelper.isWorkaroundRequired(REVISION_B, REVISION_A0, hardwareInfo)); } else if (stepping == 0x1 || stepping == 0x5) { EXPECT_FALSE(hwHelper.isWorkaroundRequired(REVISION_A0, REVISION_D, hardwareInfo)); } } } HWTEST2_F(HwHelperTestGen12Lp, givenRevisionEnumThenProperValueForIsWorkaroundRequiredIsReturned, IsADLS) { std::vector steppings; HardwareInfo hardwareInfo = *defaultHwInfo; steppings.push_back(0x0); //A0 steppings.push_back(0x4); //B0 steppings.push_back(0x5); //undefined for (auto stepping : steppings) { hardwareInfo.platform.usRevId = stepping; HwHelper &hwHelper = HwHelper::get(renderCoreFamily); if (stepping == 0x0) { EXPECT_TRUE(hwHelper.isWorkaroundRequired(REVISION_A0, REVISION_B, hardwareInfo)); EXPECT_FALSE(hwHelper.isWorkaroundRequired(REVISION_B, REVISION_A0, hardwareInfo)); } else if (stepping == 0x4 || stepping == 0x5) { EXPECT_FALSE(hwHelper.isWorkaroundRequired(REVISION_A0, REVISION_D, hardwareInfo)); EXPECT_FALSE(hwHelper.isWorkaroundRequired(REVISION_A0, REVISION_B, hardwareInfo)); EXPECT_FALSE(hwHelper.isWorkaroundRequired(REVISION_B, REVISION_A0, hardwareInfo)); } } } HWTEST2_F(HwHelperTestGen12Lp, WhenGettingDeviceIpVersionThenMakeCorrectDeviceIpVersion, IsTGLLP) { EXPECT_EQ(ClHwHelperMock::makeDeviceIpVersion(12, 0, 0), ClHwHelper::get(renderCoreFamily).getDeviceIpVersion(*defaultHwInfo)); } HWTEST2_F(HwHelperTestGen12Lp, WhenGettingDeviceIpVersionThenMakeCorrectDeviceIpVersion, IsRKL) { EXPECT_EQ(ClHwHelperMock::makeDeviceIpVersion(12, 0, 0), ClHwHelper::get(renderCoreFamily).getDeviceIpVersion(*defaultHwInfo)); } HWTEST2_F(HwHelperTestGen12Lp, WhenGettingDeviceIpVersionThenMakeCorrectDeviceIpVersion, IsADLS) { EXPECT_EQ(ClHwHelperMock::makeDeviceIpVersion(12, 0, 0), ClHwHelper::get(renderCoreFamily).getDeviceIpVersion(*defaultHwInfo)); } GEN12LPTEST_F(HwHelperTestGen12Lp, WhenGettingSupportedDeviceFeatureCapabilitiesThenReturnCorrectValue) { cl_device_feature_capabilities_intel expectedCapabilities = CL_DEVICE_FEATURE_FLAG_DP4A_INTEL; EXPECT_EQ(expectedCapabilities, ClHwHelper::get(renderCoreFamily).getSupportedDeviceFeatureCapabilities()); } GEN12LPTEST_F(HwHelperTestGen12Lp, givenLocalMemoryFeatureDisabledWhenIsLocalMemoryEnabledIsCalledThenTrueIsReturned) { hardwareInfo.featureTable.flags.ftrLocalMemory = true; auto &helper = reinterpret_cast &>(HwHelperHw::get()); EXPECT_TRUE(helper.isLocalMemoryEnabled(hardwareInfo)); } GEN12LPTEST_F(HwHelperTestGen12Lp, givenLocalMemoryFeatureEnabledWhenIsLocalMemoryEnabledIsCalledThenFalseIsReturned) { hardwareInfo.featureTable.flags.ftrLocalMemory = false; auto &helper = reinterpret_cast &>(HwHelperHw::get()); EXPECT_FALSE(helper.isLocalMemoryEnabled(hardwareInfo)); } compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/image_tests_gen12lp.inl000066400000000000000000000211571422164147700274450ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/client_context/gmm_client_context.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/image/image_surface_state.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/libult/gen12lp/special_ult_helper_gen12lp.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/mocks/mock_gmm.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include using namespace NEO; typedef ::testing::Test gen12LpImageTests; GEN12LPTEST_F(gen12LpImageTests, WhenAppendingSurfaceStateParamsThenSurfaceStateDoesNotChange) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; MockContext context; auto image = std::unique_ptr(ImageHelper::create(&context)); auto surfaceStateBefore = FamilyType::cmdInitRenderSurfaceState; auto surfaceStateAfter = FamilyType::cmdInitRenderSurfaceState; auto imageHw = static_cast *>(image.get()); EXPECT_EQ(0, memcmp(&surfaceStateBefore, &surfaceStateAfter, sizeof(RENDER_SURFACE_STATE))); imageHw->appendSurfaceStateParams(&surfaceStateAfter, context.getDevice(0)->getRootDeviceIndex(), false); EXPECT_EQ(0, memcmp(&surfaceStateBefore, &surfaceStateAfter, sizeof(RENDER_SURFACE_STATE))); } GEN12LPTEST_F(ImageClearColorFixture, givenImageForGen12LpWhenClearColorParametersAreSetThenClearColorSurfaceInSurfaceStateIsSet) { this->setUpImpl(); auto surfaceState = this->getSurfaceState(); surfaceState.setSurfaceBaseAddress(0xABCDEF1000); EXPECT_EQ(false, surfaceState.getClearValueAddressEnable()); EXPECT_EQ(0u, surfaceState.getClearColorAddress()); EXPECT_EQ(0u, surfaceState.getClearColorAddressHigh()); std::unique_ptr> imageHw(static_cast *>(ImageHelper::create(&context))); auto gmm = imageHw->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex())->getDefaultGmm(); gmm->gmmResourceInfo->getResourceFlags()->Gpu.IndirectClearColor = 1; EncodeSurfaceState::setClearColorParams(&surfaceState, gmm); EXPECT_EQ(true, surfaceState.getClearValueAddressEnable()); EXPECT_NE(0u, surfaceState.getClearColorAddress()); EXPECT_NE(0u, surfaceState.getClearColorAddressHigh()); } GEN12LPTEST_F(ImageClearColorFixture, givenImageForGen12LpWhenCanonicalAddresForClearColorIsUsedThenItsConvertedToNonCanonicalForm) { this->setUpImpl(); auto surfaceState = this->getSurfaceState(); uint64_t canonicalAddress = 0xffffABCDABCDE000; EXPECT_THROW(surfaceState.setClearColorAddressHigh(static_cast(canonicalAddress >> 32)), std::exception); surfaceState.setSurfaceBaseAddress(canonicalAddress); std::unique_ptr> imageHw(static_cast *>(ImageHelper::create(&context))); auto gmm = imageHw->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex())->getDefaultGmm(); gmm->gmmResourceInfo->getResourceFlags()->Gpu.IndirectClearColor = 1; EXPECT_NO_THROW(EncodeSurfaceState::setClearColorParams(&surfaceState, gmm)); uint64_t nonCanonicalAddress = ((static_cast(surfaceState.getClearColorAddressHigh()) << 32) | surfaceState.getClearColorAddress()); EXPECT_EQ(GmmHelper::decanonize(canonicalAddress), nonCanonicalAddress); } GEN12LPTEST_F(ImageClearColorFixture, givenMcsAllocationWhenSetArgIsCalledWithUnifiedAuxCapabilityAndMCSThenProgramAuxFieldsForCcs) { this->setUpImpl(); using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; std::unique_ptr context(new MockContext()); McsSurfaceInfo msi = {10, 20, 3}; auto mcsAlloc = context->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{context->getDevice(0)->getRootDeviceIndex(), MemoryConstants::pageSize}); cl_image_desc imgDesc = Image2dDefaults::imageDesc; imgDesc.num_samples = 8; std::unique_ptr image(Image2dHelper<>::create(context.get(), &imgDesc)); auto surfaceState = FamilyType::cmdInitRenderSurfaceState; auto imageHw = static_cast *>(image.get()); mcsAlloc->setDefaultGmm(new Gmm(context->getDevice(0)->getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); surfaceState.setSurfaceBaseAddress(0xABCDEF1000); imageHw->setMcsSurfaceInfo(msi); imageHw->setMcsAllocation(mcsAlloc); auto mockResource = static_cast(mcsAlloc->getDefaultGmm()->gmmResourceInfo.get()); mockResource->setUnifiedAuxTranslationCapable(); mockResource->setMultisampleControlSurface(); EXPECT_EQ(0u, surfaceState.getAuxiliarySurfaceBaseAddress()); imageHw->setAuxParamsForMultisamples(&surfaceState); EXPECT_NE(0u, surfaceState.getAuxiliarySurfaceBaseAddress()); EXPECT_EQ(surfaceState.getAuxiliarySurfaceMode(), AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_MCS_LCE); } GEN12LPTEST_F(gen12LpImageTests, givenCompressionThenSurfaceStateParamsAreSetForCompression) { MockContext context; using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; cl_image_desc imgDesc = Image2dDefaults::imageDesc; imgDesc.num_samples = 8; std::unique_ptr image(Image2dHelper<>::create(&context, &imgDesc)); auto surfaceState = FamilyType::cmdInitRenderSurfaceState; auto imageHw = static_cast *>(image.get()); imageHw->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex())->getDefaultGmm()->gmmResourceInfo->getResourceFlags()->Info.RenderCompressed = true; EncodeSurfaceState::setImageAuxParamsForCCS(&surfaceState, imageHw->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex())->getDefaultGmm()); EXPECT_FALSE(surfaceState.getMemoryCompressionEnable()); EXPECT_EQ(surfaceState.getAuxiliarySurfaceMode(), RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E); } GEN12LPTEST_F(gen12LpImageTests, givenNoCompressionWhenProgramingImageSurfaceStateThenCompressionIsDisabled) { MockContext context; using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; cl_image_desc imgDesc = Image2dDefaults::imageDesc; std::unique_ptr image(Image2dHelper<>::create(&context, &imgDesc)); auto surfaceState = FamilyType::cmdInitRenderSurfaceState; surfaceState.setMemoryCompressionEnable(true); surfaceState.setAuxiliarySurfaceMode(RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E); auto imageHw = static_cast *>(image.get()); imageHw->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex())->getDefaultGmm()->isCompressionEnabled = false; imageHw->setImageArg(&surfaceState, false, 0, 0, false); EXPECT_FALSE(surfaceState.getMemoryCompressionEnable()); EXPECT_EQ(surfaceState.getAuxiliarySurfaceMode(), RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE); } GEN12LPTEST_F(gen12LpImageTests, givenMediaCompressionThenSurfaceStateParamsAreSetForMediaCompression) { MockContext context; using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; cl_image_desc imgDesc = Image2dDefaults::imageDesc; imgDesc.num_samples = 8; std::unique_ptr image(Image2dHelper<>::create(&context, &imgDesc)); auto surfaceState = FamilyType::cmdInitRenderSurfaceState; auto imageHw = static_cast *>(image.get()); imageHw->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex())->getDefaultGmm()->gmmResourceInfo->getResourceFlags()->Info.MediaCompressed = true; surfaceState.setAuxiliarySurfaceMode(RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E); EncodeSurfaceState::setImageAuxParamsForCCS(&surfaceState, imageHw->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex())->getDefaultGmm()); EXPECT_TRUE(surfaceState.getMemoryCompressionEnable()); EXPECT_EQ(surfaceState.getAuxiliarySurfaceMode(), RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE); } compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/kernel_tests_gen12lp.inl000066400000000000000000000044231422164147700276400ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" using namespace NEO; using Gen12LpKernelTest = Test; GEN12LPTEST_F(Gen12LpKernelTest, givenKernelWhenCanTransformImagesIsCalledThenReturnsFalse) { MockKernelWithInternals mockKernel(*pClDevice); auto retVal = mockKernel.mockKernel->Kernel::canTransformImages(); EXPECT_FALSE(retVal); } GEN12LPTEST_F(Gen12LpKernelTest, GivenKernelWhenNotUsingSharedObjArgsThenWaDisableRccRhwoOptimizationIsNotRequired) { MockKernelWithInternals kernel(*pClDevice); EXPECT_FALSE(kernel.mockKernel->requiresWaDisableRccRhwoOptimization()); } GEN12LPTEST_F(Gen12LpKernelTest, GivenKernelWhenAtLeastOneArgIsMediaCompressedThenWaDisableRccRhwoOptimizationIsRequired) { MockKernelWithInternals kernel(*pClDevice); kernel.kernelInfo.kernelDescriptor.payloadMappings.explicitArgs.resize(3); kernel.kernelInfo.addArgBuffer(0); kernel.kernelInfo.addArgImmediate(1); kernel.kernelInfo.addArgBuffer(2); kernel.mockKernel->initialize(); MockBuffer buffer; auto allocation = buffer.getGraphicsAllocation(pClDevice->getRootDeviceIndex()); auto gmm1 = new MockGmm(pDevice->getGmmClientContext()); allocation->setGmm(gmm1, 0); cl_mem clMem = &buffer; kernel.mockKernel->setArgBuffer(0, sizeof(cl_mem *), &clMem); uint32_t immediateArg = 0; kernel.mockKernel->setArgImmediate(1, sizeof(uint32_t), &immediateArg); MockBuffer bufferMediaCompressed; bufferMediaCompressed.setSharingHandler(new SharingHandler()); allocation = bufferMediaCompressed.getGraphicsAllocation(pClDevice->getRootDeviceIndex()); auto gmm2 = new MockGmm(pDevice->getGmmClientContext()); allocation->setGmm(gmm2, 0); allocation->getGmm(0)->gmmResourceInfo->getResourceFlags()->Info.MediaCompressed = 1; cl_mem clMem2 = &bufferMediaCompressed; kernel.mockKernel->setArgBuffer(2, sizeof(cl_mem *), &clMem2); EXPECT_TRUE(kernel.mockKernel->requiresWaDisableRccRhwoOptimization()); } compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/linux/000077500000000000000000000000001422164147700242365ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/linux/CMakeLists.txt000066400000000000000000000005321422164147700267760ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen12lp_linux ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_tests_gen12lp.cpp ) if(NOT WIN32) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen12lp_linux}) add_subdirectories() endif() compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/linux/hw_info_config_tests_gen12lp.cpp000066400000000000000000000040421422164147700324720ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_helper.h" #include "shared/source/os_interface/os_interface.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/default_hw_info.h" #include "opencl/extensions/public/cl_ext_private.h" #include "opencl/test/unit_test/os_interface/linux/hw_info_config_linux_tests.h" using namespace NEO; using HwInfoConfigTestLinuxGen12lp = HwInfoConfigTestLinux; GEN12LPTEST_F(HwInfoConfigTestLinuxGen12lp, givenGen12LpProductWhenAdjustPlatformForProductFamilyCalledThenOverrideWithCorrectFamily) { auto hwInfoConfig = HwInfoConfig::get(productFamily); PLATFORM *testPlatform = &outHwInfo.platform; testPlatform->eDisplayCoreFamily = IGFX_GEN11_CORE; testPlatform->eRenderCoreFamily = IGFX_GEN11_CORE; hwInfoConfig->adjustPlatformForProductFamily(&outHwInfo); EXPECT_EQ(IGFX_GEN12LP_CORE, testPlatform->eRenderCoreFamily); EXPECT_EQ(IGFX_GEN12LP_CORE, testPlatform->eDisplayCoreFamily); } GEN12LPTEST_F(HwInfoConfigTestLinuxGen12lp, givenCompressionFtrEnabledWhenAskingForPageTableManagerThenReturnCorrectValue) { const auto &hwInfoConfig = *HwInfoConfig::get(productFamily); outHwInfo.capabilityTable.ftrRenderCompressedBuffers = false; outHwInfo.capabilityTable.ftrRenderCompressedImages = false; EXPECT_FALSE(hwInfoConfig.isPageTableManagerSupported(outHwInfo)); outHwInfo.capabilityTable.ftrRenderCompressedBuffers = true; outHwInfo.capabilityTable.ftrRenderCompressedImages = false; EXPECT_TRUE(hwInfoConfig.isPageTableManagerSupported(outHwInfo)); outHwInfo.capabilityTable.ftrRenderCompressedBuffers = false; outHwInfo.capabilityTable.ftrRenderCompressedImages = true; EXPECT_TRUE(hwInfoConfig.isPageTableManagerSupported(outHwInfo)); outHwInfo.capabilityTable.ftrRenderCompressedBuffers = true; outHwInfo.capabilityTable.ftrRenderCompressedImages = true; EXPECT_TRUE(hwInfoConfig.isPageTableManagerSupported(outHwInfo)); } compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/profiling_tests_gen12lp.inl000066400000000000000000000131421422164147700303470ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/enqueue_common.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" using namespace NEO; struct ProfilingTestsGen12LP : public CommandEnqueueFixture, public ::testing::Test { void SetUp() override { CommandEnqueueFixture::SetUp(CL_QUEUE_PROFILING_ENABLE); mockKernelWithInternals = std::make_unique(*pClDevice, nullptr); } void TearDown() override { mockKernelWithInternals.reset(); CommandEnqueueFixture::TearDown(); } std::unique_ptr mockKernelWithInternals; }; GEN12LPTEST_F(ProfilingTestsGen12LP, GivenCommandQueueWithProflingWhenWalkerIsDispatchedThenTwoPIPECONTROLSWithOPERATION_WRITE_TIMESTAMPArePresentInCS) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {1, 1, 1}; uint32_t dimensions = 1; cl_event event; static_cast *>(pCmdQ)->enqueueKernel( *mockKernelWithInternals, dimensions, globalOffsets, workItems, nullptr, 0, nullptr, &event); parseCommands(*pCmdQ); uint32_t writeCounter = 0u; // Find GPGPU_WALKER auto itorPC = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itorPC); //auto itorPC = find(itorGPGPUWalkerCmd, cmdList.end()); while (itorPC != cmdList.end()) { auto pPipeControl = genCmdCast(*itorPC); ASSERT_NE(nullptr, pPipeControl); if (PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP == pPipeControl->getPostSyncOperation()) { ++writeCounter; } ++itorPC; itorPC = find(itorPC, cmdList.end()); } EXPECT_EQ(writeCounter, 2u); clReleaseEvent(event); } template struct MockTagNode : public TagNode { public: using TagNode::tagForCpuAccess; using TagNode::gfxAllocation; MockTagNode() { gfxAllocation = nullptr; tagForCpuAccess = nullptr; } }; class MyDeviceTime : public DeviceTime { double getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const override { EXPECT_FALSE(true); return 1.0; } uint64_t getDynamicDeviceTimerClock(HardwareInfo const &hwInfo) const override { EXPECT_FALSE(true); return 0; } bool getCpuGpuTime(TimeStampData *pGpuCpuTime, OSTime *) override { EXPECT_FALSE(true); return false; } }; class MyOSTime : public OSTime { public: static int instanceNum; MyOSTime() { instanceNum++; this->deviceTime.reset(new MyDeviceTime()); } bool getCpuTime(uint64_t *timeStamp) override { EXPECT_FALSE(true); return false; }; double getHostTimerResolution() const override { EXPECT_FALSE(true); return 0; } uint64_t getCpuRawTimestamp() override { EXPECT_FALSE(true); return 0; } }; int MyOSTime::instanceNum = 0; GEN12LPTEST_F(ProfilingTestsGen12LP, givenRawTimestampsDebugModeWhenDataIsQueriedThenRawDataIsReturnedGen12Lp) { DebugManagerStateRestore stateRestore; DebugManager.flags.ReturnRawGpuTimestamps.set(1); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MyOSTime::instanceNum = 0; device->setOSTime(new MyOSTime()); EXPECT_EQ(1, MyOSTime::instanceNum); MockContext context; cl_command_queue_properties props[5] = {0, 0, 0, 0, 0}; MockCommandQueue cmdQ(&context, device.get(), props, false); cmdQ.setProfilingEnabled(); cmdQ.device = device.get(); HwTimeStamps timestamp; timestamp.GlobalStartTS = 10; timestamp.ContextStartTS = 20; timestamp.GlobalEndTS = 80; timestamp.ContextEndTS = 56; timestamp.GlobalCompleteTS = 0; timestamp.ContextCompleteTS = 70; MockTagNode timestampNode; timestampNode.tagForCpuAccess = ×tamp; MockEvent event(&cmdQ, CL_COMPLETE, 0, 0); cl_event clEvent = &event; event.queueTimeStamp.CPUTimeinNS = 1; event.queueTimeStamp.GPUTimeStamp = 2; event.submitTimeStamp.CPUTimeinNS = 3; event.submitTimeStamp.GPUTimeStamp = 4; event.setCPUProfilingPath(false); event.timeStampNode = ×tampNode; event.calcProfilingData(); cl_ulong queued, submited, start, end, complete; clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), &queued, nullptr); clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &submited, nullptr); clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, nullptr); clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, nullptr); clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_COMPLETE, sizeof(cl_ulong), &complete, nullptr); EXPECT_EQ(timestamp.GlobalEndTS, complete); EXPECT_EQ(timestamp.GlobalEndTS, end); EXPECT_EQ(timestamp.GlobalStartTS, start); EXPECT_EQ(event.submitTimeStamp.GPUTimeStamp, submited); EXPECT_EQ(event.queueTimeStamp.GPUTimeStamp, queued); event.timeStampNode = nullptr; } compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/rkl/000077500000000000000000000000001422164147700236675ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/rkl/CMakeLists.txt000066400000000000000000000014431422164147700264310ustar00rootroot00000000000000# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_RKL) set(IGDRCL_SRCS_tests_gen12lp_rkl_excludes ${CMAKE_CURRENT_SOURCE_DIR}/excludes_ocl_rkl.cpp ) set_property(GLOBAL APPEND PROPERTY IGDRCL_SRCS_tests_excludes ${IGDRCL_SRCS_tests_gen12lp_rkl_excludes}) set(IGDRCL_SRCS_tests_gen12lp_rkl ${IGDRCL_SRCS_tests_gen12lp_rkl_excludes} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_hw_helper_rkl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_hw_info_config_rkl.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen12lp_rkl}) add_subdirectories() neo_copy_test_files_with_revision(copy_test_files_rkl_0 rkl 0) add_dependencies(copy_test_files_per_product copy_test_files_rkl_0) endif() compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/rkl/excludes_ocl_rkl.cpp000066400000000000000000000015321422164147700277150ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" HWTEST_EXCLUDE_PRODUCT(DeviceFactoryTest, givenInvalidHwConfigStringWhenPrepareDeviceEnvironmentsForProductFamilyOverrideThenThrowsException, IGFX_ROCKETLAKE); HWTEST_EXCLUDE_PRODUCT(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemoryIsUnalignedToCachelineButReadOnlyThenL3CacheShouldBeStillOn, IGFX_ROCKETLAKE) HWTEST_EXCLUDE_PRODUCT(BufferSetSurfaceTests, givenAlignedCacheableReadOnlyBufferThenChoseOclBufferPolicy, IGFX_ROCKETLAKE); HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfForceEmuInt32DivRemSPWAIsRequiredThenFalseIsReturned, IGFX_ROCKETLAKE); HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHwInfoConfigWhenAskedIf3DPipelineSelectWAIsRequiredThenFalseIsReturned, IGFX_ROCKETLAKE); compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/rkl/linux/000077500000000000000000000000001422164147700250265ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/rkl/linux/CMakeLists.txt000066400000000000000000000005331422164147700275670ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen12_rkl_linux ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_tests_rkl.cpp ) if(UNIX) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen12_rkl_linux}) add_subdirectory(dll) endif() compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/rkl/linux/dll/000077500000000000000000000000001422164147700256015ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/rkl/linux/dll/CMakeLists.txt000066400000000000000000000004751422164147700303470ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_linux_dll_tests_gen12_rkl ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/device_id_tests_rkl.cpp ) target_sources(igdrcl_linux_dll_tests PRIVATE ${IGDRCL_SRCS_linux_dll_tests_gen12_rkl}) compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/rkl/linux/dll/device_id_tests_rkl.cpp000066400000000000000000000020551422164147700323140ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/fixtures/linux/device_id_fixture.h" using namespace NEO; TEST_F(DeviceIdTests, GivenRklSupportedDeviceIdThenHardwareInfoIsCorrect) { std::array expectedDescriptors = {{{0x4C80, &RKL_HW_CONFIG::hwInfo, &RKL_HW_CONFIG::setupHardwareInfo}, {0x4C8A, &RKL_HW_CONFIG::hwInfo, &RKL_HW_CONFIG::setupHardwareInfo}, {0x4C8B, &RKL_HW_CONFIG::hwInfo, &RKL_HW_CONFIG::setupHardwareInfo}, {0x4C8C, &RKL_HW_CONFIG::hwInfo, &RKL_HW_CONFIG::setupHardwareInfo}, {0x4C90, &RKL_HW_CONFIG::hwInfo, &RKL_HW_CONFIG::setupHardwareInfo}, {0x4C9A, &RKL_HW_CONFIG::hwInfo, &RKL_HW_CONFIG::setupHardwareInfo}}}; testImpl(expectedDescriptors); } compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/rkl/linux/hw_info_config_tests_rkl.cpp000066400000000000000000000064301422164147700326050ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/os_interface.h" #include "shared/test/common/libult/linux/drm_mock.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/os_interface/linux/hw_info_config_linux_tests.h" using namespace NEO; struct HwInfoConfigTestLinuxRkl : HwInfoConfigTestLinux { void SetUp() override { HwInfoConfigTestLinux::SetUp(); drm = new DrmMock(*executionEnvironment->rootDeviceEnvironments[0]); osInterface->setDriverModel(std::unique_ptr(drm)); drm->storedDeviceID = 0x4C8A; } }; RKLTEST_F(HwInfoConfigTestLinuxRkl, WhenConfiguringHwInfoThenConfigIsCorrect) { auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->storedDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->storedDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->storedEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->storedSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(1u, outHwInfo.gtSystemInfo.SliceCount); EXPECT_FALSE(outHwInfo.featureTable.flags.ftrTileY); } RKLTEST_F(HwInfoConfigTestLinuxRkl, GivenIncorrectDataWhenConfiguringHwInfoThenErrorIsReturned) { auto hwInfoConfig = HwInfoConfig::get(productFamily); drm->storedRetValForDeviceID = -1; int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); drm->storedRetValForDeviceID = 0; drm->storedRetValForDeviceRevID = -1; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); drm->storedRetValForDeviceRevID = 0; drm->failRetTopology = true; drm->storedRetValForEUVal = -1; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); drm->storedRetValForEUVal = 0; drm->storedRetValForSSVal = -1; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); } TEST(RklHwInfoTests, WhenSettingUpHwInfoThenConfigIsCorrect) { HardwareInfo hwInfo = *defaultHwInfo; auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); DrmMock drm(*executionEnvironment->rootDeviceEnvironments[0]); GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; DeviceDescriptor device = {0, &hwInfo, &RKL_HW_CONFIG::setupHardwareInfo}; int ret = drm.setupHardwareInfo(&device, false); EXPECT_EQ(ret, 0); EXPECT_GT(gtSystemInfo.EUCount, 0u); EXPECT_GT(gtSystemInfo.ThreadCount, 0u); EXPECT_GT(gtSystemInfo.SliceCount, 0u); EXPECT_GT(gtSystemInfo.SubSliceCount, 0u); EXPECT_GT(gtSystemInfo.DualSubSliceCount, 0u); EXPECT_GT_VAL(gtSystemInfo.L3CacheSizeInKb, 0u); EXPECT_EQ(gtSystemInfo.CsrSizeInMb, 8u); EXPECT_FALSE(gtSystemInfo.IsDynamicallyPopulated); EXPECT_GT(gtSystemInfo.DualSubSliceCount, 0u); EXPECT_GT(gtSystemInfo.MaxDualSubSlicesSupported, 0u); } compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/rkl/test_hw_helper_rkl.cpp000066400000000000000000000031611422164147700302600ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/helpers/hw_helper_tests.h" using HwHelperTestRkl = HwHelperTest; RKLTEST_F(HwHelperTestRkl, givenRklSteppingA0WhenAdjustDefaultEngineTypeCalledThenRcsIsReturned) { const auto &hwInfoConfig = *HwInfoConfig::get(hardwareInfo.platform.eProductFamily); hardwareInfo.featureTable.flags.ftrCCSNode = true; hardwareInfo.platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_A0, hardwareInfo); auto &helper = HwHelper::get(renderCoreFamily); helper.adjustDefaultEngineType(&hardwareInfo); EXPECT_EQ(aub_stream::ENGINE_RCS, hardwareInfo.capabilityTable.defaultEngineType); } RKLTEST_F(HwHelperTestRkl, givenRklSteppingBWhenAdjustDefaultEngineTypeCalledThenRcsIsReturned) { const auto &hwInfoConfig = *HwInfoConfig::get(hardwareInfo.platform.eProductFamily); hardwareInfo.featureTable.flags.ftrCCSNode = true; hardwareInfo.platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_B, hardwareInfo); auto &helper = HwHelper::get(renderCoreFamily); helper.adjustDefaultEngineType(&hardwareInfo); EXPECT_EQ(aub_stream::ENGINE_RCS, hardwareInfo.capabilityTable.defaultEngineType); } RKLTEST_F(HwHelperTestRkl, givenRklWhenRequestedVmeFlagsThenReturnFalse) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.supportsVme); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftrSupportsVmeAvcTextureSampler); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftrSupportsVmeAvcPreemption); } compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/rkl/test_hw_info_config_rkl.cpp000066400000000000000000000030541422164147700312620ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/compiler_hw_info_config.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/test_macros/test.h" #include using namespace NEO; using RklHwInfoConfig = ::testing::Test; RKLTEST_F(RklHwInfoConfig, givenA0OrBSteppingAndRklPlatformWhenAskingIfWAIsRequiredThenReturnTrue) { auto hwInfoConfig = HwInfoConfig::get(productFamily); std::array, 3> revisions = { {{REVISION_A0, true}, {REVISION_B, true}, {REVISION_C, false}}}; for (const auto &[revision, paramBool] : revisions) { auto hwInfo = *defaultHwInfo; hwInfo.platform.usRevId = hwInfoConfig->getHwRevIdFromStepping(revision, hwInfo); hwInfoConfig->configureHardwareCustom(&hwInfo, nullptr); EXPECT_EQ(paramBool, hwInfoConfig->isForceEmuInt32DivRemSPWARequired(hwInfo)); } } RKLTEST_F(RklHwInfoConfig, givenHwInfoConfigWhenAskedIf3DPipelineSelectWAIsRequiredThenTrueIsReturned) { const auto &hwInfoConfig = *HwInfoConfig::get(defaultHwInfo->platform.eProductFamily); EXPECT_TRUE(hwInfoConfig.is3DPipelineSelectWARequired()); } using CompilerHwInfoConfigHelperTestsRkl = ::testing::Test; RKLTEST_F(CompilerHwInfoConfigHelperTestsRkl, givenRklWhenIsForceEmuInt32DivRemSPRequiredIsCalledThenReturnsTrue) { EXPECT_TRUE(CompilerHwInfoConfig::get(productFamily)->isForceEmuInt32DivRemSPRequired()); } compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/sampler_tests_gen12lp.inl000066400000000000000000000051521422164147700300230ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include using namespace NEO; typedef Test Gen12LpSamplerTest; HWTEST2_F(Gen12LpSamplerTest, givenTglLpSamplerWhenUsingDefaultFilteringAndAppendSamplerStateParamsThenDisableLowQualityFilter, IsTGLLP) { EXPECT_FALSE(DebugManager.flags.ForceSamplerLowFilteringPrecision.get()); typedef typename FamilyType::SAMPLER_STATE SAMPLER_STATE; auto state = FamilyType::cmdInitSamplerState; EXPECT_EQ(SAMPLER_STATE::LOW_QUALITY_FILTER_DISABLE, state.getLowQualityFilter()); HwInfoConfig::get(defaultHwInfo->platform.eProductFamily)->adjustSamplerState(&state, *defaultHwInfo); EXPECT_EQ(SAMPLER_STATE::LOW_QUALITY_FILTER_DISABLE, state.getLowQualityFilter()); } HWTEST2_F(Gen12LpSamplerTest, givenTglLpSamplerWhenForcingLowQualityFilteringAndAppendSamplerStateParamsThenEnableLowQualityFilter, IsTGLLP) { DebugManagerStateRestore dbgRestore; DebugManager.flags.ForceSamplerLowFilteringPrecision.set(true); EXPECT_TRUE(DebugManager.flags.ForceSamplerLowFilteringPrecision.get()); typedef typename FamilyType::SAMPLER_STATE SAMPLER_STATE; auto state = FamilyType::cmdInitSamplerState; EXPECT_EQ(SAMPLER_STATE::LOW_QUALITY_FILTER_DISABLE, state.getLowQualityFilter()); HwInfoConfig::get(defaultHwInfo->platform.eProductFamily)->adjustSamplerState(&state, *defaultHwInfo); EXPECT_EQ(SAMPLER_STATE::LOW_QUALITY_FILTER_ENABLE, state.getLowQualityFilter()); } GEN12LPTEST_F(Gen12LpSamplerTest, GivenDefaultWhenGettingLowLowQualityFilterStateThenItIsDisabled) { typedef typename FamilyType::SAMPLER_STATE SAMPLER_STATE; auto state = FamilyType::cmdInitSamplerState; EXPECT_EQ(SAMPLER_STATE::LOW_QUALITY_FILTER_DISABLE, state.getLowQualityFilter()); } GEN12LPTEST_F(Gen12LpSamplerTest, givenGen12LpSamplerWhenProgrammingLowQualityCubeCornerModeThenTheModeChangesAppropriately) { typedef typename FamilyType::SAMPLER_STATE SAMPLER_STATE; auto state = FamilyType::cmdInitSamplerState; EXPECT_EQ(SAMPLER_STATE::LOW_QUALITY_CUBE_CORNER_MODE_ENABLE, state.getLowQualityCubeCornerMode()); state.setLowQualityCubeCornerMode(SAMPLER_STATE::LOW_QUALITY_CUBE_CORNER_MODE_DISABLE); EXPECT_EQ(SAMPLER_STATE::LOW_QUALITY_CUBE_CORNER_MODE_DISABLE, state.getLowQualityCubeCornerMode()); } tbx_command_stream_receiver_tests_gen12lp.inl000066400000000000000000000047071422164147700340400ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/aub_mem_dump/page_table_entry_bits.h" #include "shared/source/command_stream/tbx_command_stream_receiver_hw.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" using namespace NEO; using Gen12LPTbxCommandStreamReceiverTests = Test; GEN12LPTEST_F(Gen12LPTbxCommandStreamReceiverTests, givenNullPtrGraphicsAlloctionWhenGetPPGTTAdditionalBitsIsCalledThenAppropriateValueIsReturned) { auto tbxCsr = std::make_unique>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); GraphicsAllocation *allocation = nullptr; auto bits = tbxCsr->getPPGTTAdditionalBits(allocation); constexpr uint64_t expectedBits = BIT(PageTableEntry::presentBit) | BIT(PageTableEntry::writableBit); EXPECT_EQ(expectedBits, bits); } GEN12LPTEST_F(Gen12LPTbxCommandStreamReceiverTests, givenGraphicsAlloctionWithLocalMemoryPoolWhenGetPPGTTAdditionalBitsIsCalledThenAppropriateValueIsReturned) { auto tbxCsr = std::make_unique>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); MockGraphicsAllocation allocation(nullptr, 0); allocation.overrideMemoryPool(MemoryPool::LocalMemory); auto bits = tbxCsr->getPPGTTAdditionalBits(&allocation); constexpr uint64_t expectedBits = BIT(PageTableEntry::presentBit) | BIT(PageTableEntry::writableBit) | BIT(PageTableEntry::localMemoryBit); EXPECT_EQ(expectedBits, bits); } GEN12LPTEST_F(Gen12LPTbxCommandStreamReceiverTests, whenAskedForPollForCompletionParametersThenReturnCorrectValues) { class MyMockTbxHw : public TbxCommandStreamReceiverHw { public: MyMockTbxHw(ExecutionEnvironment &executionEnvironment) : TbxCommandStreamReceiverHw(executionEnvironment, 0, 1) {} using TbxCommandStreamReceiverHw::getpollNotEqualValueForPollForCompletion; using TbxCommandStreamReceiverHw::getMaskAndValueForPollForCompletion; }; MyMockTbxHw myMockTbxHw(*pDevice->executionEnvironment); EXPECT_EQ(0x80u, myMockTbxHw.getMaskAndValueForPollForCompletion()); EXPECT_TRUE(myMockTbxHw.getpollNotEqualValueForPollForCompletion()); } compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/test_device_caps_gen12lp.inl000066400000000000000000000142101422164147700304350ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_helper.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" using namespace NEO; typedef Test Gen12LpDeviceCaps; HWTEST2_F(Gen12LpDeviceCaps, WhenCheckingExtensionStringThenFp64IsNotSupported, IsTGLLP) { const auto &caps = pClDevice->getDeviceInfo(); std::string extensionString = caps.deviceExtensions; EXPECT_EQ(std::string::npos, extensionString.find(std::string("cl_khr_fp64"))); EXPECT_EQ(0u, caps.doubleFpConfig); } HWTEST2_F(Gen12LpDeviceCaps, givenGen12lpWhenCheckExtensionsThenSubgroupLocalBlockIOIsSupported, IsTGLLP) { const auto &caps = pClDevice->getDeviceInfo(); EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_subgroup_local_block_io"))); } HWTEST2_F(Gen12LpDeviceCaps, givenGen12lpWhenCheckExtensionsThenDeviceDoesNotReportClKhrSubgroupsExtension, IsTGLLP) { const auto &caps = pClDevice->getDeviceInfo(); EXPECT_FALSE(hasSubstr(caps.deviceExtensions, std::string("cl_khr_subgroups"))); } HWTEST2_F(Gen12LpDeviceCaps, givenGen12lpWhenCheckingCapsThenDeviceDoesNotSupportIndependentForwardProgress, IsTGLLP) { const auto &caps = pClDevice->getDeviceInfo(); EXPECT_FALSE(caps.independentForwardProgress); } HWTEST2_F(Gen12LpDeviceCaps, WhenCheckingCapsThenCorrectlyRoundedDivideSqrtIsNotSupported, IsTGLLP) { const auto &caps = pClDevice->getDeviceInfo(); EXPECT_EQ(0u, caps.singleFpConfig & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT); } GEN12LPTEST_F(Gen12LpDeviceCaps, GivenDefaultWhenCheckingPreemptionModeThenMidThreadIsReported) { EXPECT_EQ(PreemptionMode::MidThread, pDevice->getHardwareInfo().capabilityTable.defaultPreemptionMode); } GEN12LPTEST_F(Gen12LpDeviceCaps, WhenCheckingCapsThenProfilingTimerResolutionIs83) { const auto &caps = pClDevice->getSharedDeviceInfo(); EXPECT_EQ(83u, caps.outProfilingTimerResolution); } GEN12LPTEST_F(Gen12LpDeviceCaps, WhenCheckingCapsThenKmdNotifyMechanismIsCorrectlyReported) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableKmdNotify); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableQuickKmdSleep); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableQuickKmdSleepForSporadicWaits); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepForSporadicWaitsMicroseconds); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableQuickKmdSleepForDirectSubmission); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepForDirectSubmissionMicroseconds); } GEN12LPTEST_F(Gen12LpDeviceCaps, WhenCheckingCapsThenCompressionIsDisabled) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftrRenderCompressedBuffers); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftrRenderCompressedImages); } GEN12LPTEST_F(Gen12LpDeviceCaps, givenHwInfoWhenRequestedComputeUnitsUsedForScratchThenReturnValidValue) { const auto &hwInfo = pDevice->getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); uint32_t expectedValue = hwInfo.gtSystemInfo.MaxSubSlicesSupported * hwInfo.gtSystemInfo.MaxEuPerSubSlice * 8; EXPECT_EQ(expectedValue, hwHelper.getComputeUnitsUsedForScratch(&hwInfo)); EXPECT_EQ(expectedValue, pDevice->getDeviceInfo().computeUnitsUsedForScratch); } GEN12LPTEST_F(Gen12LpDeviceCaps, givenHwInfoWhenSlmSizeIsRequiredThenReturnCorrectValue) { EXPECT_EQ(64u, pDevice->getHardwareInfo().capabilityTable.slmSize); } GEN12LPTEST_F(Gen12LpDeviceCaps, givenGen12LpWhenCheckBlitterOperationsSupportThenReturnFalse) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.blitterOperationsSupported); } GEN12LPTEST_F(Gen12LpDeviceCaps, givenGen12LpWhenCheckingImageSupportThenReturnTrue) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.supportsImages); } GEN12LPTEST_F(Gen12LpDeviceCaps, givenGen12LpWhenCheckingMediaBlockSupportThenReturnTrue) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.supportsMediaBlock); } GEN12LPTEST_F(Gen12LpDeviceCaps, givenGen12LpWhenCheckingCoherencySupportThenReturnFalse) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftrSupportsCoherency); } HWTEST2_F(Gen12LpDeviceCaps, givenTglLpWhenCheckSupportCacheFlushAfterWalkerThenFalse, IsTGLLP) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.supportCacheFlushAfterWalker); } GEN12LPTEST_F(Gen12LpDeviceCaps, givenGen12LpDeviceWhenCheckingDeviceEnqueueSupportThenFalseIsReturned) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.supportsDeviceEnqueue); } GEN12LPTEST_F(Gen12LpDeviceCaps, givenGen12LpDeviceWhenCheckingPipesSupportThenFalseIsReturned) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.supportsPipes); } using TglLpUsDeviceIdTest = Test; HWTEST2_F(TglLpUsDeviceIdTest, WhenCheckingSimulationCapThenResultIsCorrect, IsTGLLP) { unsigned short tglLpSimulationIds[2] = { 0xFF20, 0, // default, non-simulation }; NEO::MockDevice *mockDevice = nullptr; for (auto id : tglLpSimulationIds) { mockDevice = createWithUsDeviceId(id); ASSERT_NE(mockDevice, nullptr); if (id == 0) EXPECT_FALSE(mockDevice->isSimulation()); else EXPECT_TRUE(mockDevice->isSimulation()); delete mockDevice; } } HWTEST2_F(TglLpUsDeviceIdTest, GivenTGLLPWhenCheckftr64KBpagesThenTrue, IsTGLLP) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.ftr64KBpages); } HWTEST2_F(TglLpUsDeviceIdTest, givenGen12lpWhenCheckFtrSupportsInteger64BitAtomicsThenReturnTrue, IsTGLLP) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.ftrSupportsInteger64BitAtomics); } compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/test_platform_caps_gen12lp.inl000066400000000000000000000012531422164147700310250ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" using namespace NEO; struct Gen12LpPlatformCaps : public PlatformFixture, public ::testing::Test { void SetUp() override { PlatformFixture::SetUp(); } void TearDown() override { PlatformFixture::TearDown(); } }; HWTEST2_F(Gen12LpPlatformCaps, WhenCheckingExtensionStringThenFp64IsNotSupported, IsTGLLP) { const auto &caps = pPlatform->getPlatformInfo(); EXPECT_EQ(std::string::npos, caps.extensions.find(std::string("cl_khr_fp64"))); } compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/test_sample_gen12lp.inl000066400000000000000000000014341422164147700274550ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" using namespace NEO; typedef Test TigerlakeLpOnlyTest; HWTEST2_F(TigerlakeLpOnlyTest, WhenGettingHardwareInfoThenProductFamilyIsTigerlakeLp, IsTGLLP) { EXPECT_EQ(IGFX_TIGERLAKE_LP, pDevice->getHardwareInfo().platform.eProductFamily); } typedef Test Gen12LpOnlyTeset; GEN12LPTEST_F(Gen12LpOnlyTeset, WhenGettingRenderCoreFamilyThenGen12lpCoreIsReturned) { EXPECT_NE(IGFX_GEN9_CORE, pDevice->getRenderCoreFamily()); EXPECT_NE(IGFX_GEN11_CORE, pDevice->getRenderCoreFamily()); EXPECT_EQ(IGFX_GEN12LP_CORE, pDevice->getRenderCoreFamily()); } compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/tgllp/000077500000000000000000000000001422164147700242215ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/tgllp/CMakeLists.txt000066400000000000000000000015641422164147700267670ustar00rootroot00000000000000# # Copyright (C) 2019-2022 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_TGLLP) set(IGDRCL_SRCS_tests_gen12lp_tgllp_excludes ${CMAKE_CURRENT_SOURCE_DIR}/excludes_ocl_tgllp.cpp ) set_property(GLOBAL APPEND PROPERTY IGDRCL_SRCS_tests_excludes ${IGDRCL_SRCS_tests_gen12lp_tgllp_excludes}) set(IGDRCL_SRCS_tests_gen12lp_tgllp ${IGDRCL_SRCS_tests_gen12lp_tgllp_excludes} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/kernel_tests_tgllp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_hw_helper_tgllp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_hw_info_config_tgllp.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen12lp_tgllp}) add_subdirectories() neo_copy_test_files_with_revision(copy_test_files_tgllp_0 tgllp 0) add_dependencies(copy_test_files_per_product copy_test_files_tgllp_0) endif() compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/tgllp/buffer_tests_tgllp.cpp000066400000000000000000000106111422164147700306210ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/mocks/mock_context.h" using namespace NEO; struct BufferTestsTgllp : ::testing::Test { void SetUp() override { context = std::make_unique(); device = context->getDevice(0); } std::unique_ptr context{}; ClDevice *device{}; cl_int retVal = CL_SUCCESS; }; GEN12LPTEST_F(BufferTestsTgllp, givenBufferNotReadonlyWhenProgrammingSurfaceStateThenUseL3) { auto buffer = std::unique_ptr(Buffer::create( context.get(), CL_MEM_READ_WRITE, MemoryConstants::pageSize, nullptr, retVal)); ASSERT_EQ(CL_SUCCESS, retVal); typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice(), false, 1u); const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); const auto actualMocs = surfaceState.getMemoryObjectControlState(); EXPECT_EQ(expectedMocs, actualMocs); } GEN12LPTEST_F(BufferTestsTgllp, givenBufferReadonlyWhenProgrammingSurfaceStateThenUseL1) { auto buffer = std::unique_ptr(Buffer::create( context.get(), CL_MEM_READ_WRITE, MemoryConstants::pageSize, nullptr, retVal)); ASSERT_EQ(CL_SUCCESS, retVal); typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; buffer->setArgStateful(&surfaceState, false, false, false, true, context->getDevice(0)->getDevice(), false, 1u); const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST); const auto actualMocs = surfaceState.getMemoryObjectControlState(); EXPECT_EQ(expectedMocs, actualMocs); } GEN12LPTEST_F(BufferTestsTgllp, givenConstantSurfaceWhenProgrammingSurfaceStateThenUseL1) { auto buffer = std::unique_ptr(Buffer::create( context.get(), CL_MEM_READ_WRITE, MemoryConstants::pageSize, nullptr, retVal)); ASSERT_EQ(CL_SUCCESS, retVal); buffer->getGraphicsAllocation(0)->setAllocationType(AllocationType::CONSTANT_SURFACE); typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; buffer->setArgStateful(&surfaceState, false, false, false, false, context->getDevice(0)->getDevice(), false, 1u); const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST); const auto actualMocs = surfaceState.getMemoryObjectControlState(); EXPECT_EQ(expectedMocs, actualMocs); } GEN12LPTEST_F(BufferTestsTgllp, givenL1ForceEnabledWhenProgrammingSurfaceStateThenUseL1) { DebugManagerStateRestore restore{}; DebugManager.flags.ForceL1Caching.set(1); auto buffer = std::unique_ptr(Buffer::create( context.get(), CL_MEM_READ_WRITE, MemoryConstants::pageSize, nullptr, retVal)); ASSERT_EQ(CL_SUCCESS, retVal); typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice(), false, 1u); const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST); const auto actualMocs = surfaceState.getMemoryObjectControlState(); EXPECT_EQ(expectedMocs, actualMocs); } GEN12LPTEST_F(BufferTestsTgllp, givenBufferReadonlyL1ForceDisabledWhenProgrammingSurfaceStateThenUseL3) { DebugManagerStateRestore restore{}; DebugManager.flags.ForceL1Caching.set(0); auto buffer = std::unique_ptr(Buffer::create( context.get(), CL_MEM_READ_WRITE, MemoryConstants::pageSize, nullptr, retVal)); ASSERT_EQ(CL_SUCCESS, retVal); typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; buffer->setArgStateful(&surfaceState, false, false, false, true, device->getDevice(), false, 1u); const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); const auto actualMocs = surfaceState.getMemoryObjectControlState(); EXPECT_EQ(expectedMocs, actualMocs); } compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/tgllp/excludes_ocl_tgllp.cpp000066400000000000000000000017241422164147700306040ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" HWTEST_EXCLUDE_PRODUCT(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemoryIsUnalignedToCachelineButReadOnlyThenL3CacheShouldBeStillOn, IGFX_TIGERLAKE_LP) HWTEST_EXCLUDE_PRODUCT(BufferSetSurfaceTests, givenAlignedCacheableReadOnlyBufferThenChoseOclBufferPolicy, IGFX_TIGERLAKE_LP); HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfPipeControlWAIsRequiredThenFalseIsReturned, IGFX_TIGERLAKE_LP); HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfImagePitchAlignmentWAIsRequiredThenFalseIsReturned, IGFX_TIGERLAKE_LP); HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfForceEmuInt32DivRemSPWAIsRequiredThenFalseIsReturned, IGFX_TIGERLAKE_LP); HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHwInfoConfigWhenAskedIf3DPipelineSelectWAIsRequiredThenFalseIsReturned, IGFX_TIGERLAKE_LP); compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/tgllp/kernel_tests_tgllp.cpp000066400000000000000000000033711422164147700306350ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" using namespace NEO; using KernelTgllpTests = ::testing::Test; TGLLPTEST_F(KernelTgllpTests, GivenUseOffsetToSkipSetFFIDGPWorkaroundActiveWhenSettingKernelStartOffsetThenAdditionalOffsetIsSet) { const uint64_t defaultKernelStartOffset = 0; const uint64_t additionalOffsetDueToFfid = 0x1234; auto hwInfo = *defaultHwInfo; const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily); unsigned short steppings[] = {REVISION_A0, REVISION_A1}; for (auto stepping : steppings) { hwInfo.platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(stepping, hwInfo); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); MockKernelWithInternals mockKernelWithInternals{*device}; mockKernelWithInternals.kernelInfo.kernelDescriptor.entryPoints.skipSetFFIDGP = additionalOffsetDueToFfid; for (auto isCcsUsed : ::testing::Bool()) { uint64_t kernelStartOffset = mockKernelWithInternals.mockKernel->getKernelStartOffset(false, false, isCcsUsed); if (stepping == REVISION_A0 && isCcsUsed) { EXPECT_EQ(defaultKernelStartOffset + additionalOffsetDueToFfid, kernelStartOffset); } else { EXPECT_EQ(defaultKernelStartOffset, kernelStartOffset); } } } } compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/tgllp/linux/000077500000000000000000000000001422164147700253605ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/tgllp/linux/CMakeLists.txt000066400000000000000000000005441422164147700301230ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen12lp_tgllp_linux ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_tests_tgllp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) if(UNIX) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen12lp_tgllp_linux}) add_subdirectories() endif() compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/tgllp/linux/dll/000077500000000000000000000000001422164147700261335ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/tgllp/linux/dll/CMakeLists.txt000066400000000000000000000005071422164147700306750ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_linux_dll_tests_gen12lp_tgllp ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/device_id_tests_tgllp.cpp ) target_sources(igdrcl_linux_dll_tests PRIVATE ${IGDRCL_SRCS_linux_dll_tests_gen12lp_tgllp}) compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/tgllp/linux/dll/device_id_tests_tgllp.cpp000066400000000000000000000017041422164147700332000ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/fixtures/linux/device_id_fixture.h" using namespace NEO; TEST_F(DeviceIdTests, GivenTgllpSupportedDeviceIdThenHardwareInfoIsCorrect) { std::array expectedDescriptors = {{ {0xFF20, &TGLLP_1x6x16::hwInfo, &TGLLP_1x6x16::setupHardwareInfo}, {0x9A49, &TGLLP_1x6x16::hwInfo, &TGLLP_1x6x16::setupHardwareInfo}, {0x9A40, &TGLLP_1x6x16::hwInfo, &TGLLP_1x6x16::setupHardwareInfo}, {0x9A59, &TGLLP_1x6x16::hwInfo, &TGLLP_1x6x16::setupHardwareInfo}, {0x9A60, &TGLLP_1x2x16::hwInfo, &TGLLP_1x2x16::setupHardwareInfo}, {0x9A68, &TGLLP_1x2x16::hwInfo, &TGLLP_1x2x16::setupHardwareInfo}, {0x9A70, &TGLLP_1x2x16::hwInfo, &TGLLP_1x2x16::setupHardwareInfo}, {0x9A78, &TGLLP_1x2x16::hwInfo, &TGLLP_1x2x16::setupHardwareInfo}, }}; testImpl(expectedDescriptors); } compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/tgllp/linux/hw_info_config_tests_tgllp.cpp000066400000000000000000000100511422164147700334630ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/preemption.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/os_interface/os_interface.h" #include "shared/test/common/helpers/hw_helper_tests.h" #include "shared/test/common/libult/linux/drm_mock.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/os_interface/linux/hw_info_config_linux_tests.h" using namespace NEO; struct HwInfoConfigTestLinuxTgllp : HwInfoConfigTestLinux { void SetUp() override { HwInfoConfigTestLinux::SetUp(); drm = new DrmMock(*executionEnvironment->rootDeviceEnvironments[0]); osInterface->setDriverModel(std::unique_ptr(drm)); drm->storedDeviceID = 0xFF20; } }; TGLLPTEST_F(HwInfoConfigTestLinuxTgllp, GivenTGLLPWhenConfigureHardwareCustomThenMTPIsNotSet) { auto hwInfoConfig = HwInfoConfig::get(productFamily); pInHwInfo.capabilityTable.defaultPreemptionMode = PreemptionMode::ThreadGroup; PreemptionHelper::adjustDefaultPreemptionMode(pInHwInfo.capabilityTable, true, true, true); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_FALSE(outHwInfo.featureTable.flags.ftrGpGpuMidThreadLevelPreempt); } TGLLPTEST_F(HwInfoConfigTestLinuxTgllp, configureHwInfo) { auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->storedDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->storedDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->storedEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->storedSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(1u, outHwInfo.gtSystemInfo.SliceCount); EXPECT_FALSE(outHwInfo.featureTable.flags.ftrTileY); } TGLLPTEST_F(HwInfoConfigTestLinuxTgllp, negative) { auto hwInfoConfig = HwInfoConfig::get(productFamily); drm->storedRetValForDeviceID = -1; int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); drm->storedRetValForDeviceID = 0; drm->storedRetValForDeviceRevID = -1; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); drm->storedRetValForDeviceRevID = 0; drm->failRetTopology = true; drm->storedRetValForEUVal = -1; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); drm->storedRetValForEUVal = 0; drm->storedRetValForSSVal = -1; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); } template class TgllpHwInfoTests : public ::testing::Test {}; typedef ::testing::Types tgllpTestTypes; TYPED_TEST_CASE(TgllpHwInfoTests, tgllpTestTypes); TYPED_TEST(TgllpHwInfoTests, gtSetupIsCorrect) { HardwareInfo hwInfo = *defaultHwInfo; auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); DrmMock drm(*executionEnvironment->rootDeviceEnvironments[0]); GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; DeviceDescriptor device = {0, &hwInfo, &TypeParam::setupHardwareInfo}; int ret = drm.setupHardwareInfo(&device, false); EXPECT_EQ(ret, 0); EXPECT_GT(gtSystemInfo.EUCount, 0u); EXPECT_GT(gtSystemInfo.ThreadCount, 0u); EXPECT_GT(gtSystemInfo.SliceCount, 0u); EXPECT_GT(gtSystemInfo.SubSliceCount, 0u); EXPECT_GT(gtSystemInfo.DualSubSliceCount, 0u); EXPECT_GT_VAL(gtSystemInfo.L3CacheSizeInKb, 0u); EXPECT_EQ(gtSystemInfo.CsrSizeInMb, 8u); EXPECT_FALSE(gtSystemInfo.IsDynamicallyPopulated); EXPECT_GT(gtSystemInfo.DualSubSliceCount, 0u); EXPECT_GT(gtSystemInfo.MaxDualSubSlicesSupported, 0u); } compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/tgllp/test_hw_helper_tgllp.cpp000066400000000000000000000057441422164147700311550ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/helpers/hw_helper_tests.h" using HwHelperTestGen12Lp = HwHelperTest; TGLLPTEST_F(HwHelperTestGen12Lp, givenTgllpSteppingA0WhenAdjustDefaultEngineTypeCalledThenRcsIsReturned) { const auto &hwInfoConfig = *HwInfoConfig::get(hardwareInfo.platform.eProductFamily); hardwareInfo.featureTable.flags.ftrCCSNode = true; hardwareInfo.platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_A0, hardwareInfo); auto &helper = HwHelper::get(renderCoreFamily); helper.adjustDefaultEngineType(&hardwareInfo); EXPECT_EQ(aub_stream::ENGINE_RCS, hardwareInfo.capabilityTable.defaultEngineType); } TGLLPTEST_F(HwHelperTestGen12Lp, givenTgllpSteppingBWhenAdjustDefaultEngineTypeCalledThenRcsIsReturned) { const auto &hwInfoConfig = *HwInfoConfig::get(hardwareInfo.platform.eProductFamily); hardwareInfo.featureTable.flags.ftrCCSNode = true; hardwareInfo.platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_A1, hardwareInfo); auto &helper = HwHelper::get(renderCoreFamily); helper.adjustDefaultEngineType(&hardwareInfo); EXPECT_EQ(aub_stream::ENGINE_RCS, hardwareInfo.capabilityTable.defaultEngineType); } TGLLPTEST_F(HwHelperTestGen12Lp, givenTgllWhenWaForDefaultEngineIsNotAppliedThenCcsIsReturned) { const auto &hwInfoConfig = *HwInfoConfig::get(hardwareInfo.platform.eProductFamily); hardwareInfo.featureTable.flags.ftrCCSNode = true; hardwareInfo.platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_A0, hardwareInfo); hardwareInfo.platform.eProductFamily = IGFX_UNKNOWN; auto &helper = HwHelper::get(renderCoreFamily); helper.adjustDefaultEngineType(&hardwareInfo); EXPECT_EQ(aub_stream::ENGINE_RCS, hardwareInfo.capabilityTable.defaultEngineType); } TGLLPTEST_F(HwHelperTestGen12Lp, givenTgllpAndVariousSteppingsWhenGettingIsWorkaroundRequiredThenCorrectValueIsReturned) { auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); const auto &hwInfoConfig = *HwInfoConfig::get(hardwareInfo.platform.eProductFamily); uint32_t steppings[] = { REVISION_A0, REVISION_B, REVISION_C, CommonConstants::invalidStepping}; for (auto stepping : steppings) { hardwareInfo.platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(stepping, hardwareInfo); switch (stepping) { case REVISION_A0: EXPECT_TRUE(hwHelper.isWorkaroundRequired(REVISION_A0, REVISION_B, hardwareInfo)); [[fallthrough]]; case REVISION_B: EXPECT_TRUE(hwHelper.isWorkaroundRequired(REVISION_A0, REVISION_C, hardwareInfo)); [[fallthrough]]; default: EXPECT_FALSE(hwHelper.isWorkaroundRequired(REVISION_A0, REVISION_D, hardwareInfo)); EXPECT_FALSE(hwHelper.isWorkaroundRequired(REVISION_B, REVISION_A0, hardwareInfo)); } } } compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/tgllp/test_hw_info_config_tgllp.cpp000066400000000000000000000217531422164147700321540ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/device_factory.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/test_macros/test.h" using namespace NEO; using TgllpHwInfoConfig = ::testing::Test; TGLLPTEST_F(TgllpHwInfoConfig, givenHwInfoErrorneousConfigStringThenThrow) { HardwareInfo hwInfo = *defaultHwInfo; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; uint64_t config = 0xdeadbeef; gtSystemInfo = {0}; EXPECT_ANY_THROW(hardwareInfoSetup[productFamily](&hwInfo, false, config)); EXPECT_EQ(0u, gtSystemInfo.SliceCount); EXPECT_EQ(0u, gtSystemInfo.SubSliceCount); EXPECT_EQ(0u, gtSystemInfo.DualSubSliceCount); EXPECT_EQ(0u, gtSystemInfo.EUCount); } TGLLPTEST_F(TgllpHwInfoConfig, whenUsingCorrectConfigValueThenCorrectHwInfoIsReturned) { HardwareInfo hwInfo = *defaultHwInfo; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; uint64_t config = 0x100060010; gtSystemInfo = {0}; hardwareInfoSetup[productFamily](&hwInfo, false, config); EXPECT_EQ(1u, gtSystemInfo.SliceCount); EXPECT_EQ(6u, gtSystemInfo.DualSubSliceCount); config = 0x100020010; gtSystemInfo = {0}; hardwareInfoSetup[productFamily](&hwInfo, false, config); EXPECT_EQ(1u, gtSystemInfo.SliceCount); EXPECT_EQ(2u, gtSystemInfo.DualSubSliceCount); } TGLLPTEST_F(TgllpHwInfoConfig, givenA0SteppingAndTgllpPlatformWhenAskingIfWAIsRequiredThenReturnTrue) { auto hwInfoConfig = HwInfoConfig::get(productFamily); std::array, 3> revisions = { {{REVISION_A0, true}, {REVISION_B, false}, {REVISION_C, false}}}; for (const auto &[revision, paramBool] : revisions) { auto hwInfo = *defaultHwInfo; hwInfo.platform.usRevId = hwInfoConfig->getHwRevIdFromStepping(revision, hwInfo); hwInfoConfig->configureHardwareCustom(&hwInfo, nullptr); EXPECT_EQ(paramBool, hwInfoConfig->pipeControlWARequired(hwInfo)); EXPECT_EQ(paramBool, hwInfoConfig->imagePitchAlignmentWARequired(hwInfo)); EXPECT_EQ(paramBool, hwInfoConfig->isForceEmuInt32DivRemSPWARequired(hwInfo)); } } TGLLPTEST_F(TgllpHwInfoConfig, givenHwInfoConfigWhenAskedIf3DPipelineSelectWAIsRequiredThenTrueIsReturned) { const auto &hwInfoConfig = *HwInfoConfig::get(defaultHwInfo->platform.eProductFamily); EXPECT_TRUE(hwInfoConfig.is3DPipelineSelectWARequired()); } using TgllpHwInfo = ::testing::Test; TGLLPTEST_F(TgllpHwInfo, givenBoolWhenCallTgllpHardwareInfoSetupThenFeatureTableAndWorkaroundTableAreSetCorrect) { static bool boolValue[]{ true, false}; HardwareInfo hwInfo = *defaultHwInfo; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; FeatureTable &featureTable = hwInfo.featureTable; WorkaroundTable &workaroundTable = hwInfo.workaroundTable; uint64_t configs[] = { 0x100060010, 0x100020010}; for (auto &config : configs) { for (auto setParamBool : boolValue) { gtSystemInfo = {0}; featureTable = {}; workaroundTable = {}; hardwareInfoSetup[productFamily](&hwInfo, setParamBool, config); EXPECT_EQ(setParamBool, featureTable.flags.ftrL3IACoherency); EXPECT_EQ(setParamBool, featureTable.flags.ftrPPGTT); EXPECT_EQ(setParamBool, featureTable.flags.ftrSVM); EXPECT_EQ(setParamBool, featureTable.flags.ftrIA32eGfxPTEs); EXPECT_EQ(setParamBool, featureTable.flags.ftrStandardMipTailFormat); EXPECT_EQ(setParamBool, featureTable.flags.ftrTranslationTable); EXPECT_EQ(setParamBool, featureTable.flags.ftrUserModeTranslationTable); EXPECT_EQ(setParamBool, featureTable.flags.ftrTileMappedResource); EXPECT_EQ(setParamBool, featureTable.flags.ftrEnableGuC); EXPECT_EQ(setParamBool, featureTable.flags.ftrFbc); EXPECT_EQ(setParamBool, featureTable.flags.ftrFbc2AddressTranslation); EXPECT_EQ(setParamBool, featureTable.flags.ftrFbcBlitterTracking); EXPECT_EQ(setParamBool, featureTable.flags.ftrFbcCpuTracking); EXPECT_EQ(setParamBool, featureTable.flags.ftrTileY); EXPECT_EQ(setParamBool, featureTable.flags.ftrAstcHdr2D); EXPECT_EQ(setParamBool, featureTable.flags.ftrAstcLdr2D); EXPECT_EQ(setParamBool, featureTable.flags.ftr3dMidBatchPreempt); EXPECT_EQ(setParamBool, featureTable.flags.ftrGpGpuMidBatchPreempt); EXPECT_EQ(setParamBool, featureTable.flags.ftrGpGpuThreadGroupLevelPreempt); EXPECT_EQ(setParamBool, featureTable.flags.ftrPerCtxtPreemptionGranularityControl); EXPECT_EQ(setParamBool, workaroundTable.flags.wa4kAlignUVOffsetNV12LinearSurface); EXPECT_EQ(setParamBool, workaroundTable.flags.waEnablePreemptionGranularityControlByUMD); EXPECT_EQ(setParamBool, workaroundTable.flags.waUntypedBufferCompression); } } } TGLLPTEST_F(TgllpHwInfo, givenHwInfoConfigStringThenAfterSetupResultingVmeIsDisabled) { HardwareInfo hwInfo = *defaultHwInfo; uint64_t config = 0x100060010; hardwareInfoSetup[productFamily](&hwInfo, false, config); EXPECT_FALSE(hwInfo.capabilityTable.ftrSupportsVmeAvcTextureSampler); EXPECT_FALSE(hwInfo.capabilityTable.ftrSupportsVmeAvcPreemption); EXPECT_FALSE(hwInfo.capabilityTable.supportsVme); } TGLLPTEST_F(TgllpHwInfo, givenSetCommandStreamReceiverInAubModeForTgllpProductFamilyWhenPrepareDeviceEnvironmentsForProductFamilyOverrideIsCalledThenAubCenterIsInitializedCorrectly) { DebugManagerStateRestore stateRestore; DebugManager.flags.SetCommandStreamReceiver.set(1); DebugManager.flags.ProductFamilyOverride.set("tgllp"); MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); bool success = DeviceFactory::prepareDeviceEnvironmentsForProductFamilyOverride(executionEnvironment); ASSERT_TRUE(success); auto rootDeviceEnvironment = static_cast(executionEnvironment.rootDeviceEnvironments[0].get()); EXPECT_TRUE(rootDeviceEnvironment->initAubCenterCalled); EXPECT_FALSE(rootDeviceEnvironment->localMemoryEnabledReceived); } TGLLPTEST_F(TgllpHwInfo, givenSetCommandStreamReceiverInAubModeWithOverrideGpuAddressSpaceWhenPrepareDeviceEnvironmentsForProductFamilyOverrideIsCalledThenAubManagerIsInitializedWithCorrectGpuAddressSpace) { DebugManagerStateRestore stateRestore; DebugManager.flags.SetCommandStreamReceiver.set(1); DebugManager.flags.ProductFamilyOverride.set("tgllp"); DebugManager.flags.OverrideGpuAddressSpace.set(48); MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); bool success = DeviceFactory::prepareDeviceEnvironmentsForProductFamilyOverride(executionEnvironment); ASSERT_TRUE(success); auto rootDeviceEnvironment = static_cast(executionEnvironment.rootDeviceEnvironments[0].get()); auto mockAubManager = static_cast(rootDeviceEnvironment->aubCenter->getAubManager()); EXPECT_EQ(MemoryConstants::max48BitAddress, mockAubManager->mockAubManagerParams.gpuAddressSpace); } TGLLPTEST_F(TgllpHwInfo, givenSetCommandStreamReceiverInAubModeWhenPrepareDeviceEnvironmentsForProductFamilyOverrideIsCalledThenAllRootDeviceEnvironmentMembersAreInitialized) { DebugManagerStateRestore stateRestore; auto requiredDeviceCount = 2u; DebugManager.flags.CreateMultipleRootDevices.set(requiredDeviceCount); DebugManager.flags.SetCommandStreamReceiver.set(1); DebugManager.flags.ProductFamilyOverride.set("tgllp"); MockExecutionEnvironment executionEnvironment(defaultHwInfo.get(), true, requiredDeviceCount); bool success = DeviceFactory::prepareDeviceEnvironmentsForProductFamilyOverride(executionEnvironment); ASSERT_TRUE(success); std::set memoryOperationHandlers; for (auto rootDeviceIndex = 0u; rootDeviceIndex < requiredDeviceCount; rootDeviceIndex++) { auto rootDeviceEnvironment = static_cast(executionEnvironment.rootDeviceEnvironments[rootDeviceIndex].get()); EXPECT_TRUE(rootDeviceEnvironment->initAubCenterCalled); EXPECT_FALSE(rootDeviceEnvironment->localMemoryEnabledReceived); auto memoryOperationInterface = rootDeviceEnvironment->memoryOperationsInterface.get(); EXPECT_NE(nullptr, memoryOperationInterface); EXPECT_EQ(memoryOperationHandlers.end(), memoryOperationHandlers.find(memoryOperationInterface)); memoryOperationHandlers.insert(memoryOperationInterface); } } TGLLPTEST_F(TgllpHwInfo, givenTgllpWhenObtainingBlitterPreferenceThenReturnFalse) { const auto &hwInfoConfig = *HwInfoConfig::get(defaultHwInfo->platform.eProductFamily); const auto &hardwareInfo = *defaultHwInfo; EXPECT_FALSE(hwInfoConfig.obtainBlitterPreference(hardwareInfo)); } compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/windows/000077500000000000000000000000001422164147700245715ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/windows/CMakeLists.txt000066400000000000000000000007261422164147700273360ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen12lp_windows ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/gmm_callbacks_tests_gen12lp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_tests_gen12lp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/wddm_tests_gen12lp.cpp ) if(WIN32) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen12lp_windows}) add_subdirectories() endif() compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/windows/gmm_callbacks_tests_gen12lp.cpp000066400000000000000000000121161422164147700326270ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/aub_command_stream_receiver_hw.h" #include "shared/source/command_stream/command_stream_receiver_with_aub_dump.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/windows/gmm_callbacks.h" #include "shared/source/os_interface/windows/wddm_device_command_stream.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/fixtures/gmm_callbacks_fixture.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/test_macros/test.h" using namespace NEO; using Gen12LpGmmCallbacksTests = ::Test; template struct MockAubCsrToTestNotifyAubCapture : public AUBCommandStreamReceiverHw { using AUBCommandStreamReceiverHw::AUBCommandStreamReceiverHw; using AUBCommandStreamReceiverHw::externalAllocations; }; GEN12LPTEST_F(Gen12LpGmmCallbacksTests, givenCsrWithoutAubDumpWhenNotifyAubCaptureCallbackIsCalledThenDoNothing) { auto csr = std::make_unique>(*executionEnvironment, 0, 1); uint64_t address = 0xFEDCBA9876543210; size_t size = 1024; auto res = DeviceCallbacks::notifyAubCapture(csr.get(), address, size, true); EXPECT_EQ(1, res); } GEN12LPTEST_F(Gen12LpGmmCallbacksTests, givenWddmCsrWhenWriteL3CalledThenWriteTwoMmio) { using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; UltCommandStreamReceiver csr(*executionEnvironment, 0, 1); uint8_t buffer[128] = {}; csr.commandStream.replaceBuffer(buffer, 128); uint64_t address = 0x00234564002BCDEC; uint64_t value = 0xFEDCBA987654321C; auto res = TTCallbacks::writeL3Address(&csr, value, address); EXPECT_EQ(1, res); EXPECT_EQ(2 * sizeof(MI_LOAD_REGISTER_IMM), csr.commandStream.getUsed()); HardwareParse hwParse; hwParse.parseCommands(csr.commandStream, 0); EXPECT_EQ(2u, hwParse.cmdList.size()); auto cmd = genCmdCast(*hwParse.cmdList.begin()); ASSERT_NE(nullptr, cmd); EXPECT_EQ(address & 0xFFFFFFFF, cmd->getRegisterOffset()); EXPECT_EQ(value & 0xFFFFFFFF, cmd->getDataDword()); cmd = genCmdCast(*(++hwParse.cmdList.begin())); ASSERT_NE(nullptr, cmd); EXPECT_EQ(address >> 32, cmd->getRegisterOffset()); EXPECT_EQ(value >> 32, cmd->getDataDword()); } GEN12LPTEST_F(Gen12LpGmmCallbacksTests, givenCcsEnabledhenWriteL3CalledThenSetRemapBit) { typedef typename FamilyType::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; HardwareInfo localHwInfo = *defaultHwInfo; localHwInfo.featureTable.flags.ftrCCSNode = true; ExecutionEnvironment executionEnvironment; executionEnvironment.prepareRootDeviceEnvironments(1u); executionEnvironment.rootDeviceEnvironments[0]->setHwInfo(&localHwInfo); executionEnvironment.initializeMemoryManager(); UltCommandStreamReceiver csr(executionEnvironment, 0, 1); uint8_t buffer[128] = {}; csr.commandStream.replaceBuffer(buffer, 128); auto res = TTCallbacks::writeL3Address(&csr, 1, 1); EXPECT_EQ(1, res); HardwareParse hwParse; hwParse.parseCommands(csr.commandStream, 0); EXPECT_EQ(2u, hwParse.cmdList.size()); auto cmd = genCmdCast(*hwParse.cmdList.begin()); ASSERT_NE(nullptr, cmd); EXPECT_TRUE(cmd->getMmioRemapEnable()); cmd = genCmdCast(*(++hwParse.cmdList.begin())); ASSERT_NE(nullptr, cmd); EXPECT_TRUE(cmd->getMmioRemapEnable()); } GEN12LPTEST_F(Gen12LpGmmCallbacksTests, givenCcsDisabledhenWriteL3CalledThenSetRemapBitToTrue) { typedef typename FamilyType::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; HardwareInfo localHwInfo = *defaultHwInfo; localHwInfo.featureTable.flags.ftrCCSNode = false; ExecutionEnvironment executionEnvironment; executionEnvironment.prepareRootDeviceEnvironments(1u); executionEnvironment.rootDeviceEnvironments[0]->setHwInfo(&localHwInfo); executionEnvironment.initializeMemoryManager(); UltCommandStreamReceiver csr(executionEnvironment, 0, 1); uint8_t buffer[128] = {}; csr.commandStream.replaceBuffer(buffer, 128); auto res = TTCallbacks::writeL3Address(&csr, 1, 1); EXPECT_EQ(1, res); HardwareParse hwParse; hwParse.parseCommands(csr.commandStream, 0); EXPECT_EQ(2u, hwParse.cmdList.size()); auto cmd = genCmdCast(*hwParse.cmdList.begin()); ASSERT_NE(nullptr, cmd); EXPECT_TRUE(cmd->getMmioRemapEnable()); cmd = genCmdCast(*(++hwParse.cmdList.begin())); ASSERT_NE(nullptr, cmd); EXPECT_TRUE(cmd->getMmioRemapEnable()); } compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/windows/hw_info_config_tests_gen12lp.cpp000066400000000000000000000073631422164147700330360ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/os_interface.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/libult/gen12lp/special_ult_helper_gen12lp.h" #include "opencl/test/unit_test/os_interface/windows/hw_info_config_win_tests.h" using namespace NEO; using HwInfoConfigTestWindowsGen12lp = HwInfoConfigTestWindows; GEN12LPTEST_F(HwInfoConfigTestWindowsGen12lp, givenE2ECSetByKmdWhenConfiguringHwThenAdjustInternalImageFlag) { FeatureTable &localFeatureTable = outHwInfo.featureTable; auto hwInfoConfig = HwInfoConfig::get(productFamily); localFeatureTable.flags.ftrE2ECompression = true; hwInfoConfig->configureHardwareCustom(&outHwInfo, nullptr); EXPECT_TRUE(outHwInfo.capabilityTable.ftrRenderCompressedBuffers); EXPECT_TRUE(outHwInfo.capabilityTable.ftrRenderCompressedImages); localFeatureTable.flags.ftrE2ECompression = false; hwInfoConfig->configureHardwareCustom(&outHwInfo, nullptr); EXPECT_FALSE(outHwInfo.capabilityTable.ftrRenderCompressedBuffers); EXPECT_FALSE(outHwInfo.capabilityTable.ftrRenderCompressedImages); } GEN12LPTEST_F(HwInfoConfigTestWindowsGen12lp, givenGen12LpProductWhenAdjustPlatformForProductFamilyCalledThenOverrideWithCorrectFamily) { auto hwInfoConfig = HwInfoConfig::get(productFamily); PLATFORM *testPlatform = &outHwInfo.platform; testPlatform->eDisplayCoreFamily = IGFX_GEN11_CORE; testPlatform->eRenderCoreFamily = IGFX_GEN11_CORE; hwInfoConfig->adjustPlatformForProductFamily(&outHwInfo); EXPECT_EQ(IGFX_GEN12LP_CORE, testPlatform->eRenderCoreFamily); EXPECT_EQ(IGFX_GEN12LP_CORE, testPlatform->eDisplayCoreFamily); } GEN12LPTEST_F(HwInfoConfigTestWindowsGen12lp, givenCompressionFtrEnabledWhenAskingForPageTableManagerThenReturnCorrectValue) { const auto &hwInfoConfig = *HwInfoConfig::get(productFamily); outHwInfo.capabilityTable.ftrRenderCompressedBuffers = false; outHwInfo.capabilityTable.ftrRenderCompressedImages = false; EXPECT_FALSE(hwInfoConfig.isPageTableManagerSupported(outHwInfo)); outHwInfo.capabilityTable.ftrRenderCompressedBuffers = true; outHwInfo.capabilityTable.ftrRenderCompressedImages = false; EXPECT_TRUE(hwInfoConfig.isPageTableManagerSupported(outHwInfo)); outHwInfo.capabilityTable.ftrRenderCompressedBuffers = false; outHwInfo.capabilityTable.ftrRenderCompressedImages = true; EXPECT_TRUE(hwInfoConfig.isPageTableManagerSupported(outHwInfo)); outHwInfo.capabilityTable.ftrRenderCompressedBuffers = true; outHwInfo.capabilityTable.ftrRenderCompressedImages = true; EXPECT_TRUE(hwInfoConfig.isPageTableManagerSupported(outHwInfo)); } GEN12LPTEST_F(HwInfoConfigTestWindowsGen12lp, givenGen12LpSkuWhenGettingCapabilityCoherencyFlagThenExpectValidValue) { auto &hwInfoConfig = *HwInfoConfig::get(productFamily); bool coherency = false; hwInfoConfig.setCapabilityCoherencyFlag(outHwInfo, coherency); const bool checkDone = SpecialUltHelperGen12lp::additionalCoherencyCheck(outHwInfo.platform.eProductFamily, coherency); if (checkDone) { EXPECT_FALSE(coherency); return; } if (SpecialUltHelperGen12lp::isAdditionalCapabilityCoherencyFlagSettingRequired(outHwInfo.platform.eProductFamily)) { outHwInfo.platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_A1, outHwInfo); hwInfoConfig.setCapabilityCoherencyFlag(outHwInfo, coherency); EXPECT_TRUE(coherency); outHwInfo.platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_A0, outHwInfo); hwInfoConfig.setCapabilityCoherencyFlag(outHwInfo, coherency); EXPECT_FALSE(coherency); } else { EXPECT_TRUE(coherency); } } compute-runtime-22.14.22890/opencl/test/unit_test/gen12lp/windows/wddm_tests_gen12lp.cpp000066400000000000000000000064511422164147700310100ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/preemption.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/os_interface/windows/gdi_interface.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/mocks/mock_wddm.h" #include "shared/test/common/mocks/windows/mock_gmm_memory_base.h" #include "shared/test/common/os_interface/windows/gdi_dll_fixture.h" #include "shared/test/common/test_macros/test.h" using namespace NEO; struct Gen12LpWddmTest : public GdiDllFixture, ::testing::Test { void SetUp() override { GdiDllFixture::SetUp(); executionEnvironment = std::make_unique(); rootDeviceEnvironment = executionEnvironment->rootDeviceEnvironments[0].get(); rootDeviceEnvironment->initGmm(); wddm = static_cast(Wddm::createWddm(nullptr, *rootDeviceEnvironment)); gmmMemory = new MockGmmMemoryBase(rootDeviceEnvironment->getGmmClientContext()); wddm->gmmMemory.reset(gmmMemory); } void TearDown() override { GdiDllFixture::TearDown(); } std::unique_ptr executionEnvironment; RootDeviceEnvironment *rootDeviceEnvironment = nullptr; WddmMock *wddm = nullptr; MockGmmMemoryBase *gmmMemory = nullptr; }; GEN12LPTEST_F(Gen12LpWddmTest, whenConfigureDeviceAddressSpaceThenObtainMinAddress) { uintptr_t minAddress = 0x12345u; EXPECT_NE(NEO::windowsMinAddress, minAddress); gmmMemory->getInternalGpuVaRangeLimitResult = minAddress; wddm->init(); EXPECT_EQ(minAddress, wddm->getWddmMinAddress()); EXPECT_EQ(1u, gmmMemory->getInternalGpuVaRangeLimitCalled); } using Gen12LpWddmHwInfoTest = ::testing::Test; GEN12LPTEST_F(Gen12LpWddmHwInfoTest, givenIncorrectProductFamiliyWhenInitCalledThenOverride) { HardwareInfo localHwInfo = *defaultHwInfo; localHwInfo.platform.eRenderCoreFamily = GFXCORE_FAMILY::IGFX_UNKNOWN_CORE; localHwInfo.platform.eDisplayCoreFamily = GFXCORE_FAMILY::IGFX_UNKNOWN_CORE; std::unique_ptr mockGdiDll(setAdapterInfo(&localHwInfo.platform, &localHwInfo.gtSystemInfo, localHwInfo.capabilityTable.gpuAddressSpace)); auto executionEnvironment = std::make_unique(); auto rootDeviceEnvironment = executionEnvironment->rootDeviceEnvironments[0].get(); rootDeviceEnvironment->osInterface = std::make_unique(); auto localWddm = std::unique_ptr(Wddm::createWddm(nullptr, *rootDeviceEnvironment)); localWddm->init(); auto newHwInfo = rootDeviceEnvironment->getHardwareInfo(); EXPECT_EQ(GFXCORE_FAMILY::IGFX_GEN12LP_CORE, newHwInfo->platform.eRenderCoreFamily); EXPECT_EQ(GFXCORE_FAMILY::IGFX_GEN12LP_CORE, newHwInfo->platform.eDisplayCoreFamily); // reset mock gdi globals localHwInfo = *defaultHwInfo; mockGdiDll.reset(setAdapterInfo(&localHwInfo.platform, &localHwInfo.gtSystemInfo, localHwInfo.capabilityTable.gpuAddressSpace)); } compute-runtime-22.14.22890/opencl/test/unit_test/gen8/000077500000000000000000000000001422164147700224705ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen8/CMakeLists.txt000066400000000000000000000016131422164147700252310ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_GEN8) set(IGDRCL_SRCS_tests_gen8 ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_tests_gen8.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_hw_helper_tests_gen8.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_media_kernel_gen8.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_tests_gen8.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_tests_gen8.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_cl_device_caps_gen8.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_platform_caps_gen8.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_preemption_gen8.cpp ) get_property(NEO_CORE_TESTS_GEN8 GLOBAL PROPERTY NEO_CORE_TESTS_GEN8) list(APPEND IGDRCL_SRCS_tests_gen8 ${NEO_CORE_TESTS_GEN8}) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen8}) add_subdirectories() endif() compute-runtime-22.14.22890/opencl/test/unit_test/gen8/bdw/000077500000000000000000000000001422164147700232445ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen8/bdw/CMakeLists.txt000066400000000000000000000004061422164147700260040ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_BDW) add_subdirectories() neo_copy_test_files_with_revision(copy_test_files_bdw_0 bdw 0) add_dependencies(copy_test_files_per_product copy_test_files_bdw_0) endif() compute-runtime-22.14.22890/opencl/test/unit_test/gen8/bdw/linux/000077500000000000000000000000001422164147700244035ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen8/bdw/linux/CMakeLists.txt000066400000000000000000000005311422164147700271420ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen8_bdw_linux ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_tests_bdw.cpp ) if(UNIX) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen8_bdw_linux}) add_subdirectory(dll) endif() compute-runtime-22.14.22890/opencl/test/unit_test/gen8/bdw/linux/dll/000077500000000000000000000000001422164147700251565ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen8/bdw/linux/dll/CMakeLists.txt000066400000000000000000000004731422164147700277220ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_linux_dll_tests_gen8_bdw ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/device_id_tests_bdw.cpp ) target_sources(igdrcl_linux_dll_tests PRIVATE ${IGDRCL_SRCS_linux_dll_tests_gen8_bdw}) compute-runtime-22.14.22890/opencl/test/unit_test/gen8/bdw/linux/dll/device_id_tests_bdw.cpp000066400000000000000000000026751422164147700316650ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/fixtures/linux/device_id_fixture.h" using namespace NEO; TEST_F(DeviceIdTests, GivenBdwSupportedDeviceIdThenHardwareInfoIsCorrect) { std::array expectedDescriptors = {{ {0x1602, &BDW_1x2x6::hwInfo, &BDW_1x2x6::setupHardwareInfo}, {0x160A, &BDW_1x2x6::hwInfo, &BDW_1x2x6::setupHardwareInfo}, {0x1606, &BDW_1x2x6::hwInfo, &BDW_1x2x6::setupHardwareInfo}, {0x160E, &BDW_1x2x6::hwInfo, &BDW_1x2x6::setupHardwareInfo}, {0x160D, &BDW_1x2x6::hwInfo, &BDW_1x2x6::setupHardwareInfo}, {0x1612, &BDW_1x3x8::hwInfo, &BDW_1x3x8::setupHardwareInfo}, {0x161A, &BDW_1x3x8::hwInfo, &BDW_1x3x8::setupHardwareInfo}, {0x1616, &BDW_1x3x8::hwInfo, &BDW_1x3x8::setupHardwareInfo}, {0x161E, &BDW_1x3x8::hwInfo, &BDW_1x3x8::setupHardwareInfo}, {0x161D, &BDW_1x3x8::hwInfo, &BDW_1x3x8::setupHardwareInfo}, {0x1622, &BDW_2x3x8::hwInfo, &BDW_2x3x8::setupHardwareInfo}, {0x162A, &BDW_2x3x8::hwInfo, &BDW_2x3x8::setupHardwareInfo}, {0x1626, &BDW_2x3x8::hwInfo, &BDW_2x3x8::setupHardwareInfo}, {0x162B, &BDW_2x3x8::hwInfo, &BDW_2x3x8::setupHardwareInfo}, {0x162E, &BDW_2x3x8::hwInfo, &BDW_2x3x8::setupHardwareInfo}, {0x162D, &BDW_2x3x8::hwInfo, &BDW_2x3x8::setupHardwareInfo}, }}; testImpl(expectedDescriptors); } compute-runtime-22.14.22890/opencl/test/unit_test/gen8/bdw/linux/hw_info_config_tests_bdw.cpp000066400000000000000000000143241422164147700321470ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/os_interface/linux/hw_info_config_linux_tests.h" using namespace NEO; struct HwInfoConfigTestLinuxBdw : HwInfoConfigTestLinux { void SetUp() override { HwInfoConfigTestLinux::SetUp(); drm->storedDeviceID = 0x1616; } }; BDWTEST_F(HwInfoConfigTestLinuxBdw, WhenConfiguringHwInfoThenInformationIsCorrect) { auto hwInfoConfig = HwInfoConfig::get(productFamily); drm->storedSSVal = 3; int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->storedDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->storedDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->storedEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->storedSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(1u, outHwInfo.gtSystemInfo.SliceCount); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); drm->storedDeviceID = 0x1602; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->storedDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->storedDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->storedEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->storedSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); drm->storedDeviceID = 0x1626; drm->storedSSVal = 6; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->storedDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->storedDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->storedEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->storedSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(2u, outHwInfo.gtSystemInfo.SliceCount); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); } BDWTEST_F(HwInfoConfigTestLinuxBdw, GivenUnknownDevIdWhenConfiguringHwInfoThenErrorIsReturned) { drm->storedDeviceID = 0; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); } BDWTEST_F(HwInfoConfigTestLinuxBdw, GivenFailedIoctlDevIdWhenConfiguringHwInfoThenErrorIsReturned) { drm->storedRetValForDeviceID = -2; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-2, ret); } BDWTEST_F(HwInfoConfigTestLinuxBdw, GivenFailedIoctlDevRevIdWhenConfiguringHwInfoThenErrorIsReturned) { drm->storedRetValForDeviceRevID = -3; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-3, ret); } BDWTEST_F(HwInfoConfigTestLinuxBdw, GivenFailedIoctlEuCountWhenConfiguringHwInfoThenErrorIsReturned) { drm->failRetTopology = true; drm->storedRetValForEUVal = -4; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-4, ret); } BDWTEST_F(HwInfoConfigTestLinuxBdw, GivenFailedIoctlSsCountWhenConfiguringHwInfoThenErrorIsReturned) { drm->failRetTopology = true; drm->storedRetValForSSVal = -5; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-5, ret); } BDWTEST_F(HwInfoConfigTestLinuxBdw, GivenWaFlagsWhenConfiguringHwInfoThenInformationIsCorrect) { auto hwInfoConfig = HwInfoConfig::get(productFamily); drm->storedDeviceRevID = 0; int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); } BDWTEST_F(HwInfoConfigTestLinuxBdw, WhenConfiguringHwInfoThenEdramInformationIsCorrect) { auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ_VAL(0u, outHwInfo.gtSystemInfo.EdramSizeInKb); EXPECT_EQ(0u, outHwInfo.featureTable.flags.ftrEDram); drm->storedDeviceID = 0x1622; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ_VAL((128u * 1024u), outHwInfo.gtSystemInfo.EdramSizeInKb); EXPECT_EQ(1u, outHwInfo.featureTable.flags.ftrEDram); drm->storedDeviceID = 0x162A; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ_VAL((128u * 1024u), outHwInfo.gtSystemInfo.EdramSizeInKb); EXPECT_EQ(1u, outHwInfo.featureTable.flags.ftrEDram); } template class BdwHwInfoTests : public ::testing::Test { }; typedef ::testing::Types bdwTestTypes; TYPED_TEST_CASE(BdwHwInfoTests, bdwTestTypes); TYPED_TEST(BdwHwInfoTests, WhenGtIsSetupThenGtSystemInfoIsCorrect) { HardwareInfo hwInfo = *defaultHwInfo; auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); DrmMock drm(*executionEnvironment->rootDeviceEnvironments[0]); GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; DeviceDescriptor device = {0, &hwInfo, &TypeParam::setupHardwareInfo}; int ret = drm.setupHardwareInfo(&device, false); EXPECT_EQ(ret, 0); EXPECT_GT(gtSystemInfo.EUCount, 0u); EXPECT_GT(gtSystemInfo.ThreadCount, 0u); EXPECT_GT(gtSystemInfo.SliceCount, 0u); EXPECT_GT(gtSystemInfo.SubSliceCount, 0u); EXPECT_GT(gtSystemInfo.DualSubSliceCount, 0u); EXPECT_GT_VAL(gtSystemInfo.L3CacheSizeInKb, 0u); EXPECT_EQ(gtSystemInfo.CsrSizeInMb, 8u); EXPECT_FALSE(gtSystemInfo.IsDynamicallyPopulated); } compute-runtime-22.14.22890/opencl/test/unit_test/gen8/cl_hw_helper_tests_gen8.cpp000066400000000000000000000014251422164147700277740ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/test/unit_test/mocks/mock_cl_hw_helper.h" using namespace NEO; using ClHwHelperTestGen8 = ::testing::Test; GEN8TEST_F(ClHwHelperTestGen8, WhenGettingDeviceIpVersionThenMakeCorrectDeviceIpVersion) { EXPECT_EQ(ClHwHelperMock::makeDeviceIpVersion(8, 0, 0), ClHwHelper::get(renderCoreFamily).getDeviceIpVersion(*defaultHwInfo)); } GEN8TEST_F(ClHwHelperTestGen8, WhenGettingSupportedDeviceFeatureCapabilitiesThenReturnCorrectValue) { EXPECT_EQ(0u, ClHwHelper::get(renderCoreFamily).getSupportedDeviceFeatureCapabilities()); } compute-runtime-22.14.22890/opencl/test/unit_test/gen8/command_stream_receiver_hw_tests_gen8.cpp000066400000000000000000000047351422164147700327230ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/helpers/dispatch_flags_helper.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "gtest/gtest.h" #include "reg_configs_common.h" using namespace NEO; #include "opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests.inl" using CommandStreamReceiverHwTestGen8 = CommandStreamReceiverHwTest; GEN8TEST_F(CommandStreamReceiverHwTestGen8, GivenKernelWithSlmWhenPreviousNOSLML3WasSentThenProgramL3WithSLML3Config) { givenKernelWithSlmWhenPreviousNOSLML3WasSentThenProgramL3WithSLML3ConfigImpl(); } GEN8TEST_F(CommandStreamReceiverHwTestGen8, GivenBlockedKernelWithSlmWhenPreviousNOSLML3WasSentOnThenProgramL3WithSLML3ConfigAfterUnblocking) { givenBlockedKernelWithSlmWhenPreviousNOSLML3WasSentThenProgramL3WithSLML3ConfigAfterUnblockingImpl(); } GEN8TEST_F(CommandStreamReceiverHwTestGen8, GivenChangedL3ConfigWhenL3IsProgrammedThenClearSLMWorkAroundIsAdded) { MockCsrHw2 csr(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); csr.csrSizeRequestFlags.l3ConfigChanged = true; csr.isPreambleSent = true; size_t bufferSize = 2 * sizeof(typename FamilyType::MI_LOAD_REGISTER_IMM) + sizeof(typename FamilyType::PIPE_CONTROL); void *buffer = alignedMalloc(bufferSize, 64); LinearStream stream(buffer, bufferSize); uint32_t l3Config = 0x12345678; csr.programL3(stream, l3Config); this->parseCommands(stream); typename FamilyType::PIPE_CONTROL *pc = getCommand(); ASSERT_NE(nullptr, pc); EXPECT_TRUE(pc->getProtectedMemoryDisable() != 0); typename FamilyType::MI_LOAD_REGISTER_IMM *lri = getCommand(); ASSERT_NE(nullptr, lri); EXPECT_EQ(l3Config, lri->getDataDword()); alignedFree(buffer); } compute-runtime-22.14.22890/opencl/test/unit_test/gen8/enqueue_media_kernel_gen8.cpp000066400000000000000000000040451422164147700302660ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/media_kernel_fixture.h" using namespace NEO; typedef MediaKernelFixture MediaKernelTest; GEN8TEST_F(MediaKernelTest, givenGen8CSRWhenEnqueueVmeKernelThenProgramOnlyPipelineSelection) { typedef typename BDWFamily::PIPELINE_SELECT PIPELINE_SELECT; enqueueVmeKernel(); auto pCmd = getCommand(); auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits; auto expectedPipelineSelection = PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU; EXPECT_EQ(expectedMask, pCmd->getMaskBits()); EXPECT_EQ(expectedPipelineSelection, pCmd->getPipelineSelection()); } GEN8TEST_F(MediaKernelTest, givenGen8CsrWhenEnqueueVmeKernelThenVmeSubslicesConfigDoesntChangeToFalse) { auto csr = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); csr->lastVmeSubslicesConfig = true; enqueueVmeKernel(); EXPECT_TRUE(csr->lastVmeSubslicesConfig); } GEN8TEST_F(MediaKernelTest, givenGen8CsrWhenEnqueueVmeKernelThenVmeSubslicesConfigDoesntChangeToTrue) { auto csr = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); csr->lastVmeSubslicesConfig = false; enqueueVmeKernel(); EXPECT_FALSE(csr->lastVmeSubslicesConfig); } GEN8TEST_F(MediaKernelTest, GivenGen8WhenGettingCmdSizeForMediaSamplerThenZeroIsReturned) { auto csr = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); csr->lastVmeSubslicesConfig = false; EXPECT_EQ(0u, csr->getCmdSizeForMediaSampler(false)); EXPECT_EQ(0u, csr->getCmdSizeForMediaSampler(true)); csr->lastVmeSubslicesConfig = true; EXPECT_EQ(0u, csr->getCmdSizeForMediaSampler(false)); EXPECT_EQ(0u, csr->getCmdSizeForMediaSampler(true)); } compute-runtime-22.14.22890/opencl/test/unit_test/gen8/image_tests_gen8.cpp000066400000000000000000000021301422164147700264150ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" using namespace NEO; typedef ::testing::Test gen8ImageTests; GEN8TEST_F(gen8ImageTests, WhenAppendingSurfaceStateParamsThenSurfaceStateDoesNotChange) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; MockContext context; auto image = std::unique_ptr(ImageHelper::create(&context)); auto surfaceStateBefore = FamilyType::cmdInitRenderSurfaceState; auto surfaceStateAfter = FamilyType::cmdInitRenderSurfaceState; auto imageHw = static_cast *>(image.get()); EXPECT_EQ(0, memcmp(&surfaceStateBefore, &surfaceStateAfter, sizeof(RENDER_SURFACE_STATE))); imageHw->appendSurfaceStateParams(&surfaceStateAfter, context.getDevice(0)->getRootDeviceIndex(), false); EXPECT_EQ(0, memcmp(&surfaceStateBefore, &surfaceStateAfter, sizeof(RENDER_SURFACE_STATE))); } compute-runtime-22.14.22890/opencl/test/unit_test/gen8/kernel_tests_gen8.cpp000066400000000000000000000011601422164147700266150ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" using namespace NEO; using Gen8KernelTest = Test; GEN8TEST_F(Gen8KernelTest, givenKernelWhenCanTransformImagesIsCalledThenReturnsFalse) { MockKernelWithInternals mockKernel(*pClDevice); auto retVal = mockKernel.mockKernel->Kernel::canTransformImages(); EXPECT_FALSE(retVal); } compute-runtime-22.14.22890/opencl/test/unit_test/gen8/test_cl_device_caps_gen8.cpp000066400000000000000000000035041422164147700301010ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" using namespace NEO; using Gen8ClDeviceCaps = Test; GEN8TEST_F(Gen8ClDeviceCaps, WhenCheckingExtensionStringThenFp64IsSupported) { const auto &caps = pClDevice->getDeviceInfo(); std::string extensionString = caps.deviceExtensions; EXPECT_NE(std::string::npos, extensionString.find(std::string("cl_khr_fp64"))); EXPECT_NE(0u, caps.doubleFpConfig); } GEN8TEST_F(Gen8ClDeviceCaps, WhenGettingDeviceInfoThenCorrectlyRoundedDivideSqrtIsEnabled) { const auto &caps = pClDevice->getDeviceInfo(); EXPECT_NE(0u, caps.singleFpConfig & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT); } GEN8TEST_F(Gen8ClDeviceCaps, givenGen8WhenCheckExtensionsThenDeviceProperlyReportsClKhrSubgroupsExtension) { const auto &caps = pClDevice->getDeviceInfo(); if (pClDevice->areOcl21FeaturesEnabled()) { EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_khr_subgroups"))); } else { EXPECT_FALSE(hasSubstr(caps.deviceExtensions, std::string("cl_khr_subgroups"))); } } GEN8TEST_F(Gen8ClDeviceCaps, givenGen8WhenCheckingCapsThenDeviceDoesProperlyReportsIndependentForwardProgress) { const auto &caps = pClDevice->getDeviceInfo(); if (pClDevice->areOcl21FeaturesEnabled()) { EXPECT_TRUE(caps.independentForwardProgress != 0); } else { EXPECT_FALSE(caps.independentForwardProgress != 0); } } GEN8TEST_F(Gen8ClDeviceCaps, WhenCheckingImage3dDimensionsThenCapsAreSetCorrectly) { const auto &caps = pClDevice->getDeviceInfo(); EXPECT_EQ(2048u, caps.image3DMaxWidth); EXPECT_EQ(2048u, caps.image3DMaxHeight); } compute-runtime-22.14.22890/opencl/test/unit_test/gen8/test_platform_caps_gen8.cpp000066400000000000000000000007551422164147700300150ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" using namespace NEO; using Gen8PlatformCaps = Test; GEN8TEST_F(Gen8PlatformCaps, WhenCheckingExtensionStringThenFp64IsSupported) { const auto &caps = pPlatform->getPlatformInfo(); EXPECT_NE(std::string::npos, caps.extensions.find(std::string("cl_khr_fp64"))); } compute-runtime-22.14.22890/opencl/test/unit_test/gen8/test_preemption_gen8.cpp000066400000000000000000000115611422164147700273420ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/preemption.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/mocks/mock_command_stream_receiver.h" #include "opencl/source/event/user_event.h" #include "opencl/source/helpers/cl_preemption_helper.h" #include "opencl/test/unit_test/fixtures/cl_preemption_fixture.h" using namespace NEO; using Gen8PreemptionEnqueueKernelTest = PreemptionEnqueueKernelTest; using Gen8ClPreemptionTests = DevicePreemptionTests; GEN8TEST_F(Gen8ClPreemptionTests, GivenEmptyFlagsWhenSettingPreemptionLevelFlagsThenThreadGroupPreemptionIsAllowed) { PreemptionFlags flags = PreemptionHelper::createPreemptionLevelFlags(device->getDevice(), &kernel->getDescriptor()); EXPECT_TRUE(PreemptionHelper::allowThreadGroupPreemption(flags)); } GEN8TEST_F(Gen8PreemptionEnqueueKernelTest, givenSecondEnqueueWithTheSamePreemptionRequestThenDontReprogram) { pDevice->setPreemptionMode(PreemptionMode::ThreadGroup); auto &csr = pDevice->getUltCommandStreamReceiver(); csr.getMemoryManager()->setForce32BitAllocations(false); csr.setMediaVFEStateDirty(false); size_t off[3] = {0, 0, 0}; size_t gws[3] = {1, 1, 1}; MockKernelWithInternals mockKernel(*pClDevice); HardwareParse hwParser; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); hwParser.parseCommands(csr.commandStream); auto offset = csr.commandStream.getUsed(); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); pCmdQ->flush(); hwParser.parseCommands(csr.commandStream, offset); size_t numMmiosFound = countMmio(hwParser.cmdList.begin(), hwParser.cmdList.end(), 0x2248u); EXPECT_EQ(1U, numMmiosFound); } GEN8TEST_F(Gen8PreemptionEnqueueKernelTest, givenValidKernelForPreemptionWhenEnqueueKernelCalledThenPassDevicePreemptionMode) { pDevice->setPreemptionMode(PreemptionMode::ThreadGroup); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); MockKernelWithInternals mockKernel(*pClDevice); PreemptionFlags flags = {}; MultiDispatchInfo multiDispatch(mockKernel.mockKernel); EXPECT_EQ(PreemptionMode::ThreadGroup, ClPreemptionHelper::taskPreemptionMode(*pDevice, multiDispatch)); size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); pCmdQ->flush(); EXPECT_EQ(1, mockCsr->flushCalledCount); EXPECT_EQ(PreemptionMode::ThreadGroup, mockCsr->passedDispatchFlags.preemptionMode); } GEN8TEST_F(Gen8PreemptionEnqueueKernelTest, givenValidKernelForPreemptionWhenEnqueueKernelCalledAndBlockedThenPassDevicePreemptionMode) { pDevice->setPreemptionMode(PreemptionMode::ThreadGroup); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); MockKernelWithInternals mockKernel(*pClDevice); PreemptionFlags flags = PreemptionHelper::createPreemptionLevelFlags(*pDevice, &mockKernel.mockKernel->getDescriptor()); EXPECT_EQ(PreemptionMode::ThreadGroup, PreemptionHelper::taskPreemptionMode(pDevice->getPreemptionMode(), flags)); UserEvent userEventObj; cl_event userEvent = &userEventObj; size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 1, &userEvent, nullptr); pCmdQ->flush(); EXPECT_EQ(0, mockCsr->flushCalledCount); userEventObj.setStatus(CL_COMPLETE); pCmdQ->flush(); EXPECT_EQ(1, mockCsr->flushCalledCount); EXPECT_EQ(PreemptionMode::ThreadGroup, mockCsr->passedDispatchFlags.preemptionMode); } GEN8TEST_F(Gen8PreemptionEnqueueKernelTest, givenDisabledPreemptionWhenEnqueueKernelCalledThenPassDisabledPreemptionMode) { pDevice->setPreemptionMode(PreemptionMode::Disabled); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); MockKernelWithInternals mockKernel(*pClDevice); PreemptionFlags flags = PreemptionHelper::createPreemptionLevelFlags(*pDevice, &mockKernel.mockKernel->getDescriptor()); EXPECT_EQ(PreemptionMode::Disabled, PreemptionHelper::taskPreemptionMode(pDevice->getPreemptionMode(), flags)); size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); pCmdQ->flush(); EXPECT_EQ(1, mockCsr->flushCalledCount); EXPECT_EQ(PreemptionMode::Disabled, mockCsr->passedDispatchFlags.preemptionMode); } compute-runtime-22.14.22890/opencl/test/unit_test/gen9/000077500000000000000000000000001422164147700224715ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen9/CMakeLists.txt000066400000000000000000000023441422164147700252340ustar00rootroot00000000000000# # Copyright (C) 2018-2022 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_GEN9) set(IGDRCL_SRCS_tests_gen9 ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_platform_ids_tests_gen9.cpp ${CMAKE_CURRENT_SOURCE_DIR}/coherency_tests_gen9.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_tests_gen9.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_kernel_gen9.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_media_kernel_gen9.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hw_helper_tests_gen9.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_tests_gen9.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_tests_gen9.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler_tests_gen9.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sip_tests_gen9.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_gen9.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_platform_caps_gen9.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_preemption_gen9.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_sample_gen9.cpp ) get_property(NEO_CORE_TESTS_GEN9 GLOBAL PROPERTY NEO_CORE_TESTS_GEN9) list(APPEND IGDRCL_SRCS_tests_gen9 ${NEO_CORE_TESTS_GEN9}) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen9}) add_subdirectories() endif() compute-runtime-22.14.22890/opencl/test/unit_test/gen9/bxt/000077500000000000000000000000001422164147700232665ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen9/bxt/CMakeLists.txt000066400000000000000000000011211422164147700260210ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_BXT) set(IGDRCL_SRCS_tests_gen9_bxt ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/device_tests_bxt.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_bxt.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_hw_info_config_bxt.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen9_bxt}) add_subdirectories() neo_copy_test_files_with_revision(copy_test_files_bxt_0 bxt 0) add_dependencies(copy_test_files_per_product copy_test_files_bxt_0) endif() compute-runtime-22.14.22890/opencl/test/unit_test/gen9/bxt/device_tests_bxt.cpp000066400000000000000000000007221422164147700273310ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" using namespace NEO; typedef Test DeviceTest; BXTTEST_F(DeviceTest, givenBxtDeviceWhenAskedForProflingTimerResolutionThen52IsReturned) { auto resolution = pDevice->getProfilingTimerResolution(); EXPECT_DOUBLE_EQ(52.083, resolution); } compute-runtime-22.14.22890/opencl/test/unit_test/gen9/bxt/linux/000077500000000000000000000000001422164147700244255ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen9/bxt/linux/CMakeLists.txt000066400000000000000000000005251422164147700271670ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen9_bxt_linux ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_tests.cpp ) if(UNIX) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen9_bxt_linux}) add_subdirectory(dll) endif() compute-runtime-22.14.22890/opencl/test/unit_test/gen9/bxt/linux/dll/000077500000000000000000000000001422164147700252005ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen9/bxt/linux/dll/CMakeLists.txt000066400000000000000000000004671422164147700277470ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_linux_dll_tests_gen9_bxt ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/device_id_tests.cpp ) target_sources(igdrcl_linux_dll_tests PRIVATE ${IGDRCL_SRCS_linux_dll_tests_gen9_bxt}) compute-runtime-22.14.22890/opencl/test/unit_test/gen9/bxt/linux/dll/device_id_tests.cpp000066400000000000000000000016221422164147700310420ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/fixtures/linux/device_id_fixture.h" using namespace NEO; TEST_F(DeviceIdTests, GivenBxtSupportedDeviceIdThenHardwareInfoIsCorrect) { std::array expectedDescriptors = {{ {0x9906, &BXT_1x3x6::hwInfo, &BXT_1x3x6::setupHardwareInfo}, {0x9907, &BXT_1x3x6::hwInfo, &BXT_1x3x6::setupHardwareInfo}, {0x0A84, &BXT_1x3x6::hwInfo, &BXT_1x3x6::setupHardwareInfo}, {0x5A84, &BXT_1x3x6::hwInfo, &BXT_1x3x6::setupHardwareInfo}, {0x5A85, &BXT_1x2x6::hwInfo, &BXT_1x2x6::setupHardwareInfo}, {0x1A85, &BXT_1x2x6::hwInfo, &BXT_1x2x6::setupHardwareInfo}, {0x1A84, &BXT_1x3x6::hwInfo, &BXT_1x3x6::setupHardwareInfo}, {0x9908, &BXT_1x3x6::hwInfo, &BXT_1x3x6::setupHardwareInfo}, }}; testImpl(expectedDescriptors); } compute-runtime-22.14.22890/opencl/test/unit_test/gen9/bxt/linux/hw_info_config_tests.cpp000066400000000000000000000230701422164147700313330ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/os_interface/linux/hw_info_config_linux_tests.h" using namespace NEO; struct HwInfoConfigTestLinuxBxt : HwInfoConfigTestLinux { void SetUp() override { HwInfoConfigTestLinux::SetUp(); drm->storedDeviceID = 0x5A84; drm->storedEUVal = 18; drm->storedHasPooledEU = 1; drm->storedMinEUinPool = 3; } }; BXTTEST_F(HwInfoConfigTestLinuxBxt, WhenConfiguringHwInfoThenConfigIsCorrect) { drm->storedDeviceRevID = 0; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->storedDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->storedDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->storedEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->storedSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ((unsigned int)drm->storedHasPooledEU, outHwInfo.featureTable.flags.ftrPooledEuEnabled); EXPECT_EQ((uint32_t)drm->storedMinEUinPool, outHwInfo.gtSystemInfo.EuCountPerPoolMin); EXPECT_EQ((outHwInfo.gtSystemInfo.EUCount - outHwInfo.gtSystemInfo.EuCountPerPoolMin), outHwInfo.gtSystemInfo.EuCountPerPoolMax); EXPECT_EQ(0u, outHwInfo.featureTable.flags.ftrGttCacheInvalidation); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); //constant sysInfo/ftr flags EXPECT_TRUE(outHwInfo.gtSystemInfo.VEBoxInfo.IsValid); drm->storedDeviceID = 0x5A85; drm->storedMinEUinPool = 6; drm->storedDeviceRevID = 4; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->storedDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->storedDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->storedEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->storedSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ((unsigned int)drm->storedHasPooledEU, outHwInfo.featureTable.flags.ftrPooledEuEnabled); EXPECT_EQ((uint32_t)drm->storedMinEUinPool, outHwInfo.gtSystemInfo.EuCountPerPoolMin); EXPECT_EQ((outHwInfo.gtSystemInfo.EUCount - outHwInfo.gtSystemInfo.EuCountPerPoolMin), outHwInfo.gtSystemInfo.EuCountPerPoolMax); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); drm->storedDeviceID = 0x5A85; drm->storedMinEUinPool = 9; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->storedDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->storedDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->storedEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->storedSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ((unsigned int)drm->storedHasPooledEU, outHwInfo.featureTable.flags.ftrPooledEuEnabled); EXPECT_EQ((uint32_t)drm->storedMinEUinPool, outHwInfo.gtSystemInfo.EuCountPerPoolMin); EXPECT_EQ((outHwInfo.gtSystemInfo.EUCount - outHwInfo.gtSystemInfo.EuCountPerPoolMin), outHwInfo.gtSystemInfo.EuCountPerPoolMax); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); auto &outKmdNotifyProperties = outHwInfo.capabilityTable.kmdNotifyProperties; EXPECT_TRUE(outKmdNotifyProperties.enableKmdNotify); EXPECT_EQ(50000, outKmdNotifyProperties.delayKmdNotifyMicroseconds); EXPECT_TRUE(outKmdNotifyProperties.enableQuickKmdSleep); EXPECT_EQ(5000, outKmdNotifyProperties.delayQuickKmdSleepMicroseconds); EXPECT_TRUE(outKmdNotifyProperties.enableQuickKmdSleepForSporadicWaits); EXPECT_EQ(200000, outKmdNotifyProperties.delayQuickKmdSleepForSporadicWaitsMicroseconds); } BXTTEST_F(HwInfoConfigTestLinuxBxt, GivenUnknownDevIdWhenConfiguringHwInfoThenErrorIsReturned) { drm->storedDeviceID = 0; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); } BXTTEST_F(HwInfoConfigTestLinuxBxt, GivenFailedIoctlDevIdWhenConfiguringHwInfoThenErrorIsReturned) { drm->storedRetValForDeviceID = -2; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-2, ret); } BXTTEST_F(HwInfoConfigTestLinuxBxt, GivenFailedIoctlDevRevIdWhenConfiguringHwInfoThenErrorIsReturned) { drm->storedRetValForDeviceRevID = -3; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-3, ret); } BXTTEST_F(HwInfoConfigTestLinuxBxt, GivenFailedIoctlEuCountWhenConfiguringHwInfoThenErrorIsReturned) { drm->failRetTopology = true; drm->storedRetValForEUVal = -4; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-4, ret); } BXTTEST_F(HwInfoConfigTestLinuxBxt, GivenFailingEnabledPoolWhenConfiguringHwInfoThenZeroIsReturned) { drm->storedRetValForPooledEU = -1; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(0u, outHwInfo.featureTable.flags.ftrPooledEuEnabled); EXPECT_EQ(0u, outHwInfo.gtSystemInfo.EuCountPerPoolMin); EXPECT_EQ(0u, outHwInfo.gtSystemInfo.EuCountPerPoolMax); } BXTTEST_F(HwInfoConfigTestLinuxBxt, GivenDisabledEnabledPoolWhenConfiguringHwInfoThenZeroIsReturned) { drm->storedHasPooledEU = 0; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(0u, outHwInfo.featureTable.flags.ftrPooledEuEnabled); EXPECT_EQ(0u, outHwInfo.gtSystemInfo.EuCountPerPoolMin); EXPECT_EQ(0u, outHwInfo.gtSystemInfo.EuCountPerPoolMax); } BXTTEST_F(HwInfoConfigTestLinuxBxt, GivenFailingMinEuInPoolWhenConfiguringHwInfoThenZeroIsReturned) { drm->storedRetValForMinEUinPool = -1; drm->storedSSVal = 3; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(1u, outHwInfo.featureTable.flags.ftrPooledEuEnabled); EXPECT_EQ(9u, outHwInfo.gtSystemInfo.EuCountPerPoolMin); EXPECT_EQ((outHwInfo.gtSystemInfo.EUCount - outHwInfo.gtSystemInfo.EuCountPerPoolMin), outHwInfo.gtSystemInfo.EuCountPerPoolMax); drm->storedSSVal = 2; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(1u, outHwInfo.featureTable.flags.ftrPooledEuEnabled); EXPECT_EQ(3u, outHwInfo.gtSystemInfo.EuCountPerPoolMin); EXPECT_EQ((outHwInfo.gtSystemInfo.EUCount - outHwInfo.gtSystemInfo.EuCountPerPoolMin), outHwInfo.gtSystemInfo.EuCountPerPoolMax); } BXTTEST_F(HwInfoConfigTestLinuxBxt, GivenInvalidMinEuInPoolWhenConfiguringHwInfoThenZeroIsReturned) { drm->storedMinEUinPool = 4; drm->storedSSVal = 3; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(1u, outHwInfo.featureTable.flags.ftrPooledEuEnabled); EXPECT_EQ(9u, outHwInfo.gtSystemInfo.EuCountPerPoolMin); EXPECT_EQ((outHwInfo.gtSystemInfo.EUCount - outHwInfo.gtSystemInfo.EuCountPerPoolMin), outHwInfo.gtSystemInfo.EuCountPerPoolMax); drm->storedSSVal = 2; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(1u, outHwInfo.featureTable.flags.ftrPooledEuEnabled); EXPECT_EQ(3u, outHwInfo.gtSystemInfo.EuCountPerPoolMin); EXPECT_EQ((outHwInfo.gtSystemInfo.EUCount - outHwInfo.gtSystemInfo.EuCountPerPoolMin), outHwInfo.gtSystemInfo.EuCountPerPoolMax); } BXTTEST_F(HwInfoConfigTestLinuxBxt, WhenConfiguringHwInfoWaFlagsThenZeroIsReturned) { auto hwInfoConfig = HwInfoConfig::get(productFamily); drm->storedDeviceRevID = 0; int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); } template class BxtHwInfoTests : public ::testing::Test { }; typedef ::testing::Types bxtTestTypes; TYPED_TEST_CASE(BxtHwInfoTests, bxtTestTypes); TYPED_TEST(BxtHwInfoTests, WhenConfiguringHwInfoThenConfigIsCorrect) { HardwareInfo hwInfo = *defaultHwInfo; auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); DrmMock drm(*executionEnvironment->rootDeviceEnvironments[0]); GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; DeviceDescriptor device = {0, &hwInfo, &TypeParam::setupHardwareInfo}; int ret = drm.setupHardwareInfo(&device, false); EXPECT_EQ(ret, 0); EXPECT_GT(gtSystemInfo.EUCount, 0u); EXPECT_GT(gtSystemInfo.ThreadCount, 0u); EXPECT_GT(gtSystemInfo.SliceCount, 0u); EXPECT_GT(gtSystemInfo.SubSliceCount, 0u); EXPECT_GT_VAL(gtSystemInfo.L3CacheSizeInKb, 0u); EXPECT_EQ(gtSystemInfo.CsrSizeInMb, 8u); EXPECT_FALSE(gtSystemInfo.IsDynamicallyPopulated); } compute-runtime-22.14.22890/opencl/test/unit_test/gen9/bxt/test_device_caps_bxt.cpp000066400000000000000000000040171422164147700301550ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" using namespace NEO; typedef Test BxtDeviceCaps; BXTTEST_F(BxtDeviceCaps, WhenCheckingProfilingTimerResolutionThenCorrectResolutionIsReturned) { const auto &caps = pDevice->getDeviceInfo(); EXPECT_EQ(52u, caps.outProfilingTimerResolution); } BXTTEST_F(BxtDeviceCaps, givenBxtDeviceWhenAskedFor32BitSupportThenCorrectValuesAreReturned) { const auto &sharedCaps = pDevice->getDeviceInfo(); auto memoryManager = pDevice->getMemoryManager(); if constexpr (is64bit) { EXPECT_TRUE(memoryManager->peekForce32BitAllocations()); EXPECT_TRUE(sharedCaps.force32BitAddressess); } else { EXPECT_FALSE(memoryManager->peekForce32BitAllocations()); EXPECT_FALSE(sharedCaps.force32BitAddressess); } } BXTTEST_F(BxtDeviceCaps, WhenCheckingCapabilitiesThenSvmIsNotSupported) { const auto &caps = pClDevice->getDeviceInfo(); EXPECT_EQ(0u, caps.svmCapabilities); } BXTTEST_F(BxtDeviceCaps, WhenCheckftr64KBpagesThenFalse) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftr64KBpages); } BXTTEST_F(BxtDeviceCaps, WhenCheckFtrSupportsInteger64BitAtomicsThenReturnFalse) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftrSupportsInteger64BitAtomics); } typedef Test BxtUsDeviceIdTest; BXTTEST_F(BxtUsDeviceIdTest, WhenCheckingIsSimulationThenTrueReturnedOnlyForSimulationId) { unsigned short bxtSimulationIds[3] = { 0x9906, 0x9907, 0, // default, non-simulation }; NEO::MockDevice *mockDevice = nullptr; for (auto id : bxtSimulationIds) { mockDevice = createWithUsDeviceId(id); ASSERT_NE(mockDevice, nullptr); if (id == 0) EXPECT_FALSE(mockDevice->isSimulation()); else EXPECT_TRUE(mockDevice->isSimulation()); delete mockDevice; } } compute-runtime-22.14.22890/opencl/test/unit_test/gen9/bxt/test_hw_info_config_bxt.cpp000066400000000000000000000102561422164147700306700ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/test_macros/test.h" using namespace NEO; TEST(BxtHwInfoConfig, givenInvalidSystemInfoWhenSettingHardwareInfoThenExpectThrow) { if (IGFX_BROXTON != productFamily) { return; } HardwareInfo hwInfo = *defaultHwInfo; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; uint64_t config = 0xdeadbeef; gtSystemInfo = {0}; EXPECT_ANY_THROW(hardwareInfoSetup[productFamily](&hwInfo, false, config)); EXPECT_EQ(0u, gtSystemInfo.SliceCount); EXPECT_EQ(0u, gtSystemInfo.SubSliceCount); EXPECT_EQ(0u, gtSystemInfo.DualSubSliceCount); EXPECT_EQ(0u, gtSystemInfo.EUCount); } using BxtHwInfo = ::testing::Test; BXTTEST_F(BxtHwInfo, givenBoolWhenCallBxtHardwareInfoSetupThenFeatureTableAndWorkaroundTableAreSetCorrect) { uint64_t configs[] = { 0x100020006, 0x100030006}; bool boolValue[]{ true, false}; HardwareInfo hwInfo = *defaultHwInfo; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; FeatureTable &featureTable = hwInfo.featureTable; WorkaroundTable &workaroundTable = hwInfo.workaroundTable; PLATFORM &platform = hwInfo.platform; for (auto &config : configs) { for (auto setParamBool : boolValue) { gtSystemInfo = {0}; featureTable = {}; workaroundTable = {}; platform.usRevId = 9; hardwareInfoSetup[productFamily](&hwInfo, setParamBool, config); EXPECT_EQ(setParamBool, featureTable.flags.ftrGpGpuMidBatchPreempt); EXPECT_EQ(setParamBool, featureTable.flags.ftrGpGpuThreadGroupLevelPreempt); EXPECT_EQ(setParamBool, featureTable.flags.ftrL3IACoherency); EXPECT_EQ(setParamBool, featureTable.flags.ftrULT); EXPECT_EQ(setParamBool, featureTable.flags.ftrGpGpuMidThreadLevelPreempt); EXPECT_EQ(setParamBool, featureTable.flags.ftr3dMidBatchPreempt); EXPECT_EQ(setParamBool, featureTable.flags.ftr3dObjectLevelPreempt); EXPECT_EQ(setParamBool, featureTable.flags.ftrPerCtxtPreemptionGranularityControl); EXPECT_EQ(setParamBool, featureTable.flags.ftrLCIA); EXPECT_EQ(setParamBool, featureTable.flags.ftrPPGTT); EXPECT_EQ(setParamBool, featureTable.flags.ftrIA32eGfxPTEs); EXPECT_EQ(setParamBool, featureTable.flags.ftrDisplayYTiling); EXPECT_EQ(setParamBool, featureTable.flags.ftrTranslationTable); EXPECT_EQ(setParamBool, featureTable.flags.ftrUserModeTranslationTable); EXPECT_EQ(setParamBool, featureTable.flags.ftrEnableGuC); EXPECT_EQ(setParamBool, featureTable.flags.ftrFbc); EXPECT_EQ(setParamBool, featureTable.flags.ftrFbc2AddressTranslation); EXPECT_EQ(setParamBool, featureTable.flags.ftrFbcBlitterTracking); EXPECT_EQ(setParamBool, featureTable.flags.ftrFbcCpuTracking); EXPECT_EQ(setParamBool, featureTable.flags.ftrTileY); EXPECT_EQ(setParamBool, featureTable.flags.ftrGttCacheInvalidation); EXPECT_EQ(setParamBool, workaroundTable.flags.waLLCCachingUnsupported); EXPECT_EQ(setParamBool, workaroundTable.flags.waMsaa8xTileYDepthPitchAlignment); EXPECT_EQ(setParamBool, workaroundTable.flags.waFbcLinearSurfaceStride); EXPECT_EQ(setParamBool, workaroundTable.flags.wa4kAlignUVOffsetNV12LinearSurface); EXPECT_EQ(setParamBool, workaroundTable.flags.waEnablePreemptionGranularityControlByUMD); EXPECT_EQ(setParamBool, workaroundTable.flags.waSendMIFLUSHBeforeVFE); EXPECT_EQ(setParamBool, workaroundTable.flags.waForcePcBbFullCfgRestore); EXPECT_EQ(setParamBool, workaroundTable.flags.waReportPerfCountUseGlobalContextID); EXPECT_EQ(setParamBool, workaroundTable.flags.waSamplerCacheFlushBetweenRedescribedSurfaceReads); platform.usRevId = 1; featureTable = {}; hardwareInfoSetup[productFamily](&hwInfo, true, config); EXPECT_EQ(false, featureTable.flags.ftrGttCacheInvalidation); } } } compute-runtime-22.14.22890/opencl/test/unit_test/gen9/bxt/windows/000077500000000000000000000000001422164147700247605ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen9/bxt/windows/CMakeLists.txt000066400000000000000000000005131422164147700275170ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen9_bxt_windows ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_bxt_windows.cpp ) if(WIN32) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen9_bxt_windows}) endif() compute-runtime-22.14.22890/opencl/test/unit_test/gen9/bxt/windows/test_device_caps_bxt_windows.cpp000066400000000000000000000024121422164147700334160ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" using namespace NEO; typedef Test BxtDeviceCapsWindows; BXTTEST_F(BxtDeviceCapsWindows, GivenWhenGettingKmdNotifyPropertiesThenItIsDisabled) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableKmdNotify); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableQuickKmdSleep); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableQuickKmdSleepForSporadicWaits); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepForSporadicWaitsMicroseconds); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableQuickKmdSleepForDirectSubmission); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepForDirectSubmissionMicroseconds); } compute-runtime-22.14.22890/opencl/test/unit_test/gen9/cfl/000077500000000000000000000000001422164147700232355ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen9/cfl/CMakeLists.txt000066400000000000000000000010321422164147700257710ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_CFL) set(IGDRCL_SRCS_tests_gen9_cfl ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_cfl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_hw_info_config_cfl.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen9_cfl}) add_subdirectories() neo_copy_test_files_with_revision(copy_test_files_cfl_9 cfl 9) add_dependencies(copy_test_files_per_product copy_test_files_cfl_9) endif() compute-runtime-22.14.22890/opencl/test/unit_test/gen9/cfl/linux/000077500000000000000000000000001422164147700243745ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen9/cfl/linux/CMakeLists.txt000066400000000000000000000005311422164147700271330ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen9_cfl_linux ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_tests_cfl.cpp ) if(UNIX) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen9_cfl_linux}) add_subdirectory(dll) endif() compute-runtime-22.14.22890/opencl/test/unit_test/gen9/cfl/linux/dll/000077500000000000000000000000001422164147700251475ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen9/cfl/linux/dll/CMakeLists.txt000066400000000000000000000004731422164147700277130ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_linux_dll_tests_gen9_cfl ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/device_id_tests_cfl.cpp ) target_sources(igdrcl_linux_dll_tests PRIVATE ${IGDRCL_SRCS_linux_dll_tests_gen9_cfl}) compute-runtime-22.14.22890/opencl/test/unit_test/gen9/cfl/linux/dll/device_id_tests_cfl.cpp000066400000000000000000000063001422164147700316330ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/fixtures/linux/device_id_fixture.h" using namespace NEO; TEST_F(DeviceIdTests, GivenCflSupportedDeviceIdThenHardwareInfoIsCorrect) { std::array expectedDescriptors = {{ {0x3E90, &CFL_1x2x6::hwInfo, &CFL_1x2x6::setupHardwareInfo}, {0x3E93, &CFL_1x2x6::hwInfo, &CFL_1x2x6::setupHardwareInfo}, {0x3EA4, &CFL_1x2x6::hwInfo, &CFL_1x2x6::setupHardwareInfo}, {0x3E99, &CFL_1x2x6::hwInfo, &CFL_1x2x6::setupHardwareInfo}, {0x3EA1, &CFL_1x2x6::hwInfo, &CFL_1x2x6::setupHardwareInfo}, {0x3E92, &CFL_1x3x8::hwInfo, &CFL_1x3x8::setupHardwareInfo}, {0x3E9B, &CFL_1x3x8::hwInfo, &CFL_1x3x8::setupHardwareInfo}, {0x3E94, &CFL_1x3x8::hwInfo, &CFL_1x3x8::setupHardwareInfo}, {0x3E91, &CFL_1x3x8::hwInfo, &CFL_1x3x8::setupHardwareInfo}, {0x3E96, &CFL_1x3x8::hwInfo, &CFL_1x3x8::setupHardwareInfo}, {0x3E9A, &CFL_1x3x8::hwInfo, &CFL_1x3x8::setupHardwareInfo}, {0x3EA3, &CFL_1x3x8::hwInfo, &CFL_1x3x8::setupHardwareInfo}, {0x3EA9, &CFL_1x3x8::hwInfo, &CFL_1x3x8::setupHardwareInfo}, {0x3EA0, &CFL_1x3x8::hwInfo, &CFL_1x3x8::setupHardwareInfo}, {0x3E98, &CFL_1x3x8::hwInfo, &CFL_1x3x8::setupHardwareInfo}, {0x3E95, &CFL_2x3x8::hwInfo, &CFL_2x3x8::setupHardwareInfo}, {0x3EA6, &CFL_2x3x8::hwInfo, &CFL_2x3x8::setupHardwareInfo}, {0x3EA7, &CFL_2x3x8::hwInfo, &CFL_2x3x8::setupHardwareInfo}, {0x3EA8, &CFL_2x3x8::hwInfo, &CFL_2x3x8::setupHardwareInfo}, {0x3EA5, &CFL_2x3x8::hwInfo, &CFL_2x3x8::setupHardwareInfo}, {0x3EA2, &CFL_2x3x8::hwInfo, &CFL_2x3x8::setupHardwareInfo}, {0x9B21, &CFL_1x2x6::hwInfo, &CFL_1x2x6::setupHardwareInfo}, {0x9BAA, &CFL_1x2x6::hwInfo, &CFL_1x2x6::setupHardwareInfo}, {0x9BAB, &CFL_1x2x6::hwInfo, &CFL_1x2x6::setupHardwareInfo}, {0x9BAC, &CFL_1x2x6::hwInfo, &CFL_1x2x6::setupHardwareInfo}, {0x9BA0, &CFL_1x2x6::hwInfo, &CFL_1x2x6::setupHardwareInfo}, {0x9BA5, &CFL_1x2x6::hwInfo, &CFL_1x2x6::setupHardwareInfo}, {0x9BA8, &CFL_1x2x6::hwInfo, &CFL_1x2x6::setupHardwareInfo}, {0x9BA4, &CFL_1x2x6::hwInfo, &CFL_1x2x6::setupHardwareInfo}, {0x9BA2, &CFL_1x2x6::hwInfo, &CFL_1x2x6::setupHardwareInfo}, {0x9B41, &CFL_1x3x8::hwInfo, &CFL_1x3x8::setupHardwareInfo}, {0x9BCA, &CFL_1x3x8::hwInfo, &CFL_1x3x8::setupHardwareInfo}, {0x9BCB, &CFL_1x3x8::hwInfo, &CFL_1x3x8::setupHardwareInfo}, {0x9BCC, &CFL_1x3x8::hwInfo, &CFL_1x3x8::setupHardwareInfo}, {0x9BC0, &CFL_1x3x8::hwInfo, &CFL_1x3x8::setupHardwareInfo}, {0x9BC5, &CFL_1x3x8::hwInfo, &CFL_1x3x8::setupHardwareInfo}, {0x9BC8, &CFL_1x3x8::hwInfo, &CFL_1x3x8::setupHardwareInfo}, {0x9BC4, &CFL_1x3x8::hwInfo, &CFL_1x3x8::setupHardwareInfo}, {0x9BC2, &CFL_1x3x8::hwInfo, &CFL_1x3x8::setupHardwareInfo}, {0x9BC6, &CFL_1x3x8::hwInfo, &CFL_1x3x8::setupHardwareInfo}, {0x9BE6, &CFL_1x3x8::hwInfo, &CFL_1x3x8::setupHardwareInfo}, {0x9BF6, &CFL_1x3x8::hwInfo, &CFL_1x3x8::setupHardwareInfo}, }}; testImpl(expectedDescriptors); } compute-runtime-22.14.22890/opencl/test/unit_test/gen9/cfl/linux/hw_info_config_tests_cfl.cpp000066400000000000000000000162111422164147700321250ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/execution_environment.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/os_interface/linux/hw_info_config_linux_tests.h" using namespace NEO; struct HwInfoConfigTestLinuxCfl : HwInfoConfigTestLinux { void SetUp() override { HwInfoConfigTestLinux::SetUp(); drm->storedDeviceID = 0x3E92; } }; CFLTEST_F(HwInfoConfigTestLinuxCfl, WhenConfiguringHwInfoThenInformationIsCorrect) { auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->storedDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->storedDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->storedEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->storedSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); //constant sysInfo/ftr flags EXPECT_EQ(1u, outHwInfo.gtSystemInfo.VEBoxInfo.Instances.Bits.VEBox0Enabled); EXPECT_TRUE(outHwInfo.gtSystemInfo.VEBoxInfo.IsValid); drm->storedDeviceID = 0x3E90; drm->storedSSVal = 3; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->storedDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->storedDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->storedEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->storedSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(1u, outHwInfo.gtSystemInfo.SliceCount); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); drm->storedDeviceID = 0x3EA5; drm->storedSSVal = 6; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->storedDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->storedDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->storedEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->storedSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(2u, outHwInfo.gtSystemInfo.SliceCount); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); auto &outKmdNotifyProperties = outHwInfo.capabilityTable.kmdNotifyProperties; EXPECT_TRUE(outKmdNotifyProperties.enableKmdNotify); EXPECT_EQ(50000, outKmdNotifyProperties.delayKmdNotifyMicroseconds); EXPECT_TRUE(outKmdNotifyProperties.enableQuickKmdSleep); EXPECT_EQ(5000, outKmdNotifyProperties.delayQuickKmdSleepMicroseconds); EXPECT_TRUE(outKmdNotifyProperties.enableQuickKmdSleepForSporadicWaits); EXPECT_EQ(200000, outKmdNotifyProperties.delayQuickKmdSleepForSporadicWaitsMicroseconds); EXPECT_FALSE(outKmdNotifyProperties.enableQuickKmdSleepForDirectSubmission); EXPECT_EQ(0, outKmdNotifyProperties.delayQuickKmdSleepForDirectSubmissionMicroseconds); } CFLTEST_F(HwInfoConfigTestLinuxCfl, GivenUnknownDevIdWhenConfiguringHwInfoThenErrorIsReturned) { drm->storedDeviceID = 0; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); } CFLTEST_F(HwInfoConfigTestLinuxCfl, GivenFailedIoctlDevIdWhenConfiguringHwInfoThenErrorIsReturned) { drm->storedRetValForDeviceID = -2; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-2, ret); } CFLTEST_F(HwInfoConfigTestLinuxCfl, GivenFailedIoctlDevRevIdWhenConfiguringHwInfoThenErrorIsReturned) { drm->storedRetValForDeviceRevID = -3; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-3, ret); } CFLTEST_F(HwInfoConfigTestLinuxCfl, GivenFailedIoctlEuCountWhenConfiguringHwInfoThenErrorIsReturned) { drm->storedRetValForEUVal = -4; drm->failRetTopology = true; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-4, ret); } CFLTEST_F(HwInfoConfigTestLinuxCfl, GivenFailedIoctlSsCountWhenConfiguringHwInfoThenErrorIsReturned) { drm->storedRetValForSSVal = -5; drm->failRetTopology = true; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-5, ret); } CFLTEST_F(HwInfoConfigTestLinuxCfl, GivenWaFlagsWhenConfiguringHwInfoThenInformationIsCorrect) { auto hwInfoConfig = HwInfoConfig::get(productFamily); drm->storedDeviceRevID = 0; int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); } CFLTEST_F(HwInfoConfigTestLinuxCfl, WhenConfiguringHwInfoThenEdramInformationIsCorrect) { auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ_VAL(0u, outHwInfo.gtSystemInfo.EdramSizeInKb); EXPECT_EQ(0u, outHwInfo.featureTable.flags.ftrEDram); drm->storedDeviceID = 0x3EA8; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ_VAL((64u * 1024u), outHwInfo.gtSystemInfo.EdramSizeInKb); EXPECT_EQ(1u, outHwInfo.featureTable.flags.ftrEDram); drm->storedDeviceID = 0x3EA6; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ_VAL((64u * 1024u), outHwInfo.gtSystemInfo.EdramSizeInKb); EXPECT_EQ(1u, outHwInfo.featureTable.flags.ftrEDram); } template class CflHwInfoTests : public ::testing::Test { }; typedef ::testing::Types cflTestTypes; TYPED_TEST_CASE(CflHwInfoTests, cflTestTypes); TYPED_TEST(CflHwInfoTests, WhenGtIsSetupThenGtSystemInfoIsCorrect) { HardwareInfo hwInfo = *defaultHwInfo; auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); DrmMock drm(*executionEnvironment->rootDeviceEnvironments[0]); GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; DeviceDescriptor device = {0, &hwInfo, &TypeParam::setupHardwareInfo}; int ret = drm.setupHardwareInfo(&device, false); EXPECT_EQ(ret, 0); EXPECT_GT(gtSystemInfo.EUCount, 0u); EXPECT_GT(gtSystemInfo.ThreadCount, 0u); EXPECT_GT(gtSystemInfo.SliceCount, 0u); EXPECT_GT(gtSystemInfo.SubSliceCount, 0u); EXPECT_GT(gtSystemInfo.DualSubSliceCount, 0u); EXPECT_GT_VAL(gtSystemInfo.L3CacheSizeInKb, 0u); EXPECT_EQ(gtSystemInfo.CsrSizeInMb, 8u); EXPECT_FALSE(gtSystemInfo.IsDynamicallyPopulated); } compute-runtime-22.14.22890/opencl/test/unit_test/gen9/cfl/test_device_caps_cfl.cpp000066400000000000000000000011271422164147700300720ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" using namespace NEO; typedef Test CflDeviceCaps; CFLTEST_F(CflDeviceCaps, GivenCFLWhenCheckftr64KBpagesThenTrue) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.ftr64KBpages); } CFLTEST_F(CflDeviceCaps, givenCflWhenCheckFtrSupportsInteger64BitAtomicsThenReturnTrue) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.ftrSupportsInteger64BitAtomics); } compute-runtime-22.14.22890/opencl/test/unit_test/gen9/cfl/test_hw_info_config_cfl.cpp000066400000000000000000000073631422164147700306130ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/test_macros/test.h" using namespace NEO; TEST(CflHwInfoConfig, GivenIncorrectDataWhenConfiguringHwInfoThenErrorIsReturned) { if (IGFX_COFFEELAKE != productFamily) { return; } HardwareInfo hwInfo = *defaultHwInfo; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; uint64_t config = 0xdeadbeef; gtSystemInfo = {0}; EXPECT_ANY_THROW(hardwareInfoSetup[productFamily](&hwInfo, false, config)); EXPECT_EQ(0u, gtSystemInfo.SliceCount); EXPECT_EQ(0u, gtSystemInfo.SubSliceCount); EXPECT_EQ(0u, gtSystemInfo.DualSubSliceCount); EXPECT_EQ(0u, gtSystemInfo.EUCount); } using CflHwInfo = ::testing::Test; CFLTEST_F(CflHwInfo, givenBoolWhenCallCflHardwareInfoSetupThenFeatureTableAndWorkaroundTableAreSetCorrect) { uint64_t configs[] = { 0x100030008, 0x200030008, 0x300030008, 0x100020006, 0x100030006}; bool boolValue[]{ true, false}; HardwareInfo hwInfo = *defaultHwInfo; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; FeatureTable &featureTable = hwInfo.featureTable; WorkaroundTable &workaroundTable = hwInfo.workaroundTable; for (auto &config : configs) { for (auto setParamBool : boolValue) { gtSystemInfo = {0}; featureTable = {}; workaroundTable = {}; hardwareInfoSetup[productFamily](&hwInfo, setParamBool, config); EXPECT_EQ(setParamBool, featureTable.flags.ftrGpGpuMidBatchPreempt); EXPECT_EQ(setParamBool, featureTable.flags.ftrGpGpuThreadGroupLevelPreempt); EXPECT_EQ(setParamBool, featureTable.flags.ftrL3IACoherency); EXPECT_EQ(setParamBool, featureTable.flags.ftrGpGpuMidThreadLevelPreempt); EXPECT_EQ(setParamBool, featureTable.flags.ftr3dMidBatchPreempt); EXPECT_EQ(setParamBool, featureTable.flags.ftr3dObjectLevelPreempt); EXPECT_EQ(setParamBool, featureTable.flags.ftrPerCtxtPreemptionGranularityControl); EXPECT_EQ(setParamBool, featureTable.flags.ftrPPGTT); EXPECT_EQ(setParamBool, featureTable.flags.ftrSVM); EXPECT_EQ(setParamBool, featureTable.flags.ftrIA32eGfxPTEs); EXPECT_EQ(setParamBool, featureTable.flags.ftrDisplayYTiling); EXPECT_EQ(setParamBool, featureTable.flags.ftrTranslationTable); EXPECT_EQ(setParamBool, featureTable.flags.ftrUserModeTranslationTable); EXPECT_EQ(setParamBool, featureTable.flags.ftrEnableGuC); EXPECT_EQ(setParamBool, featureTable.flags.ftrFbc); EXPECT_EQ(setParamBool, featureTable.flags.ftrFbc2AddressTranslation); EXPECT_EQ(setParamBool, featureTable.flags.ftrFbcBlitterTracking); EXPECT_EQ(setParamBool, featureTable.flags.ftrFbcCpuTracking); EXPECT_EQ(setParamBool, featureTable.flags.ftrTileY); EXPECT_EQ(setParamBool, workaroundTable.flags.waEnablePreemptionGranularityControlByUMD); EXPECT_EQ(setParamBool, workaroundTable.flags.waSendMIFLUSHBeforeVFE); EXPECT_EQ(setParamBool, workaroundTable.flags.waReportPerfCountUseGlobalContextID); EXPECT_EQ(setParamBool, workaroundTable.flags.waMsaa8xTileYDepthPitchAlignment); EXPECT_EQ(setParamBool, workaroundTable.flags.waLosslessCompressionSurfaceStride); EXPECT_EQ(setParamBool, workaroundTable.flags.waFbcLinearSurfaceStride); EXPECT_EQ(setParamBool, workaroundTable.flags.wa4kAlignUVOffsetNV12LinearSurface); EXPECT_EQ(setParamBool, workaroundTable.flags.waSamplerCacheFlushBetweenRedescribedSurfaceReads); } } } compute-runtime-22.14.22890/opencl/test/unit_test/gen9/cfl/windows/000077500000000000000000000000001422164147700247275ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen9/cfl/windows/CMakeLists.txt000066400000000000000000000005131422164147700274660ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen9_cfl_windows ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_cfl_windows.cpp ) if(WIN32) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen9_cfl_windows}) endif() compute-runtime-22.14.22890/opencl/test/unit_test/gen9/cfl/windows/test_device_caps_cfl_windows.cpp000066400000000000000000000024121422164147700333340ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" using namespace NEO; typedef Test CflDeviceCapsWindows; CFLTEST_F(CflDeviceCapsWindows, GivenWhenGettingKmdNotifyPropertiesThenItIsDisabled) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableKmdNotify); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableQuickKmdSleep); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableQuickKmdSleepForSporadicWaits); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepForSporadicWaitsMicroseconds); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableQuickKmdSleepForDirectSubmission); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepForDirectSubmissionMicroseconds); } compute-runtime-22.14.22890/opencl/test/unit_test/gen9/cl_get_platform_ids_tests_gen9.cpp000066400000000000000000000047541422164147700313530ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/root_device.h" #include "shared/source/os_interface/device_factory.h" #include "shared/source/os_interface/hw_info_config.h" #include "opencl/test/unit_test/api/cl_api_tests.h" using namespace NEO; typedef api_tests clGetPlatformIDsTests; TEST(clGetPlatformIDsMultiPlatformTest, whenCreateDevicesWithDifferentProductFamilyThenClGetPlatformIdsCreatesMultiplePlatformsProperlySorted) { if ((HwInfoConfig::get(IGFX_SKYLAKE) == nullptr) || (HwInfoConfig::get(IGFX_KABYLAKE) == nullptr)) { GTEST_SKIP(); } DebugManagerStateRestore restorer; const size_t numRootDevices = 2u; DebugManager.flags.CreateMultipleRootDevices.set(numRootDevices); VariableBackup createFuncBackup{&DeviceFactory::createRootDeviceFunc}; DeviceFactory::createRootDeviceFunc = [](ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) -> std::unique_ptr { auto device = std::unique_ptr(Device::create(&executionEnvironment, rootDeviceIndex)); auto hwInfo = device->getRootDeviceEnvironment().getMutableHardwareInfo(); if (rootDeviceIndex == 0) { hwInfo->platform.eProductFamily = IGFX_SKYLAKE; } else { hwInfo->platform.eProductFamily = IGFX_KABYLAKE; } return device; }; platformsImpl->clear(); cl_int retVal = CL_SUCCESS; cl_platform_id platformsRet[2]; cl_uint numPlatforms = 0; retVal = clGetPlatformIDs(0, nullptr, &numPlatforms); EXPECT_EQ(2u, numPlatforms); EXPECT_EQ(CL_SUCCESS, retVal); numPlatforms = 0u; retVal = clGetPlatformIDs(2u, platformsRet, &numPlatforms); EXPECT_EQ(2u, numPlatforms); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, platformsRet[0]); auto platform0 = castToObject(platformsRet[0]); EXPECT_EQ(1u, platform0->getNumDevices()); EXPECT_EQ(IGFX_KABYLAKE, platform0->getClDevice(0)->getHardwareInfo().platform.eProductFamily); EXPECT_EQ(1u, platform0->getClDevice(0)->getRootDeviceIndex()); EXPECT_NE(nullptr, platformsRet[1]); auto platform1 = castToObject(platformsRet[1]); EXPECT_EQ(1u, platform1->getNumDevices()); EXPECT_EQ(IGFX_SKYLAKE, platform1->getClDevice(0)->getHardwareInfo().platform.eProductFamily); EXPECT_EQ(0u, platform1->getClDevice(0)->getRootDeviceIndex()); platformsImpl->clear(); } compute-runtime-22.14.22890/opencl/test/unit_test/gen9/coherency_tests_gen9.cpp000066400000000000000000000020561422164147700273230ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/helpers/dispatch_flags_helper.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/test.h" using namespace NEO; typedef ::testing::Test Gen9CoherencyRequirements; GEN9TEST_F(Gen9CoherencyRequirements, WhenMemoryManagerIsInitializedThenNoCoherencyProgramming) { UltDeviceFactory deviceFactory{1, 0}; LinearStream stream; DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags(); auto &csr = deviceFactory.rootDevices[0]->getUltCommandStreamReceiver(); csr.programComputeMode(stream, flags, *defaultHwInfo); EXPECT_EQ(0u, stream.getUsed()); flags.requiresCoherency = true; csr.programComputeMode(stream, flags, *defaultHwInfo); EXPECT_EQ(0u, stream.getUsed()); } compute-runtime-22.14.22890/opencl/test/unit_test/gen9/command_stream_receiver_hw_tests_gen9.cpp000066400000000000000000000052211422164147700327140ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/helpers/dispatch_flags_helper.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/event/user_event.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "gtest/gtest.h" #include "reg_configs_common.h" using namespace NEO; #include "opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests.inl" using CommandStreamReceiverHwTestGen9 = CommandStreamReceiverHwTest; GEN9TEST_F(UltCommandStreamReceiverTest, whenPreambleIsProgrammedThenStateSipCmdIsNotPresentInPreambleCmdStream) { using STATE_SIP = typename FamilyType::STATE_SIP; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = false; pDevice->setPreemptionMode(PreemptionMode::Disabled); pDevice->setDebuggerActive(true); uint32_t newL3Config; auto cmdSizePreamble = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice); StackVec preambleBuffer; preambleBuffer.resize(cmdSizePreamble); LinearStream preambleStream(&*preambleBuffer.begin(), preambleBuffer.size()); commandStreamReceiver.programPreamble(preambleStream, *pDevice, newL3Config); this->parseCommands(preambleStream); auto itorStateSip = find(this->cmdList.begin(), this->cmdList.end()); EXPECT_EQ(this->cmdList.end(), itorStateSip); pDevice->setDebuggerActive(false); } GEN9TEST_F(CommandStreamReceiverHwTestGen9, GivenKernelWithSlmWhenPreviousNOSLML3WasSentThenProgramL3WithSLML3Config) { givenKernelWithSlmWhenPreviousNOSLML3WasSentThenProgramL3WithSLML3ConfigImpl(); } GEN9TEST_F(CommandStreamReceiverHwTestGen9, GivenBlockedKernelWithSlmWhenPreviousNOSLML3WasSentOnThenProgramL3WithSLML3ConfigAfterUnblocking) { givenBlockedKernelWithSlmWhenPreviousNOSLML3WasSentThenProgramL3WithSLML3ConfigAfterUnblockingImpl(); } compute-runtime-22.14.22890/opencl/test/unit_test/gen9/enqueue_kernel_gen9.cpp000066400000000000000000000050231422164147700271260ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/reg_configs.h" #include "shared/test/common/fixtures/memory_management_fixture.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/helpers/cl_hw_parse.h" #include "opencl/test/unit_test/helpers/static_size3.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" namespace NEO { using Gen9EnqueueTest = Test; GEN9TEST_F(Gen9EnqueueTest, givenKernelRequiringIndependentForwardProgressWhenKernelIsSubmittedThenRoundRobinPolicyIsProgrammed) { MockContext mc; CommandQueueHw cmdQ{&mc, pClDevice, 0, false}; SPatchExecutionEnvironment sPatchExecEnv = {}; sPatchExecEnv.SubgroupIndependentForwardProgressRequired = true; MockKernelWithInternals mockKernel(*pClDevice, sPatchExecEnv); cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, StatickSize3<1, 1, 1>(), nullptr, 0, nullptr, nullptr); ClHardwareParse hwParser; hwParser.parseCommands(cmdQ); auto cmd = findMmioCmd(hwParser.cmdList.begin(), hwParser.cmdList.end(), DebugControlReg2::address); ASSERT_NE(nullptr, cmd); EXPECT_EQ(DebugControlReg2::getRegData(HwHelperHw::get().getDefaultThreadArbitrationPolicy()), cmd->getDataDword()); EXPECT_EQ(1U, countMmio(hwParser.cmdList.begin(), hwParser.cmdList.end(), DebugControlReg2::address)); } GEN9TEST_F(Gen9EnqueueTest, givenKernelNotRequiringIndependentForwardProgressWhenKernelIsSubmittedThenAgeBasedPolicyIsProgrammed) { MockContext mc; CommandQueueHw cmdQ{&mc, pClDevice, 0, false}; SPatchExecutionEnvironment sPatchExecEnv = {}; sPatchExecEnv.SubgroupIndependentForwardProgressRequired = false; MockKernelWithInternals mockKernel(*pClDevice, sPatchExecEnv); cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, StatickSize3<1, 1, 1>(), nullptr, 0, nullptr, nullptr); ClHardwareParse hwParser; hwParser.parseCommands(cmdQ); auto cmd = findMmioCmd(hwParser.cmdList.begin(), hwParser.cmdList.end(), DebugControlReg2::address); ASSERT_NE(nullptr, cmd); EXPECT_EQ(DebugControlReg2::getRegData(ThreadArbitrationPolicy::AgeBased), cmd->getDataDword()); EXPECT_EQ(1U, countMmio(hwParser.cmdList.begin(), hwParser.cmdList.end(), DebugControlReg2::address)); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/gen9/enqueue_media_kernel_gen9.cpp000066400000000000000000000163311422164147700302710ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/preamble.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/media_kernel_fixture.h" using namespace NEO; typedef MediaKernelFixture MediaKernelTest; GEN9TEST_F(MediaKernelTest, givenGen9CsrWhenEnqueueBlockedVmeKernelFirstTimeThenProgramPipelineSelectionAndMediaSampler) { typedef typename SKLFamily::PIPELINE_SELECT PIPELINE_SELECT; cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {1, 1, 1}; UserEvent userEvent(context); cl_event blockedEvent = &userEvent; auto retVal = pCmdQ->enqueueKernel( pVmeKernel, workDim, globalWorkOffset, globalWorkSize, nullptr, 1, &blockedEvent, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); userEvent.setStatus(CL_COMPLETE); parseCommands(*pCmdQ); ASSERT_NE(cmdPipelineSelect, nullptr); auto *pCmd = genCmdCast(cmdPipelineSelect); auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits; auto expectedPipelineSelection = PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU; EXPECT_EQ(expectedMask, pCmd->getMaskBits()); EXPECT_EQ(expectedPipelineSelection, pCmd->getPipelineSelection()); EXPECT_FALSE(pCmd->getMediaSamplerDopClockGateEnable()); } GEN9TEST_F(MediaKernelTest, givenGen9CsrWhenEnqueueBlockedNonVmeKernelFirstTimeThenProgramPipelineSelectionAndMediaSampler) { typedef typename SKLFamily::PIPELINE_SELECT PIPELINE_SELECT; cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {1, 1, 1}; UserEvent userEvent(context); cl_event blockedEvent = &userEvent; auto retVal = pCmdQ->enqueueKernel( pKernel, workDim, globalWorkOffset, globalWorkSize, nullptr, 1, &blockedEvent, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); userEvent.setStatus(CL_COMPLETE); parseCommands(*pCmdQ); ASSERT_NE(cmdPipelineSelect, nullptr); auto *pCmd = genCmdCast(cmdPipelineSelect); auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits; auto expectedPipelineSelection = PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU; EXPECT_EQ(expectedMask, pCmd->getMaskBits()); EXPECT_EQ(expectedPipelineSelection, pCmd->getPipelineSelection()); EXPECT_TRUE(pCmd->getMediaSamplerDopClockGateEnable()); } GEN9TEST_F(MediaKernelTest, givenGen9CsrWhenEnqueueVmeKernelFirstTimeThenProgramPipelineSelectionAndMediaSampler) { typedef typename SKLFamily::PIPELINE_SELECT PIPELINE_SELECT; enqueueVmeKernel(); auto numCommands = getCommandsList().size(); EXPECT_EQ(1u, numCommands); auto pCmd = getCommand(); auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits; auto expectedPipelineSelection = PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU; EXPECT_EQ(expectedMask, pCmd->getMaskBits()); EXPECT_EQ(expectedPipelineSelection, pCmd->getPipelineSelection()); EXPECT_FALSE(pCmd->getMediaSamplerDopClockGateEnable()); } GEN9TEST_F(MediaKernelTest, givenGen9CsrWhenEnqueueNonVmeKernelFirstTimeThenProgramPipelineSelectionAndMediaSampler) { typedef typename SKLFamily::PIPELINE_SELECT PIPELINE_SELECT; enqueueRegularKernel(); auto numCommands = getCommandsList().size(); EXPECT_EQ(1u, numCommands); auto pCmd = getCommand(); auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits; auto expectedPipelineSelection = PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU; EXPECT_EQ(expectedMask, pCmd->getMaskBits()); EXPECT_EQ(expectedPipelineSelection, pCmd->getPipelineSelection()); EXPECT_TRUE(pCmd->getMediaSamplerDopClockGateEnable()); } GEN9TEST_F(MediaKernelTest, givenGen9CsrWhenEnqueueVmeKernelTwiceThenProgramPipelineSelectOnce) { typedef typename SKLFamily::PIPELINE_SELECT PIPELINE_SELECT; enqueueVmeKernel(); auto numCommands = getCommandsList().size(); EXPECT_EQ(1u, numCommands); } GEN9TEST_F(MediaKernelTest, givenGen9CsrWhenEnqueueNonVmeKernelTwiceThenProgramPipelineSelectOnce) { typedef typename SKLFamily::PIPELINE_SELECT PIPELINE_SELECT; enqueueVmeKernel(); auto numCommands = getCommandsList().size(); EXPECT_EQ(1u, numCommands); } GEN9TEST_F(MediaKernelTest, givenGen9CsrWhenEnqueueVmeKernelAfterNonVmeKernelThenProgramPipelineSelectionAndMediaSamplerTwice) { typedef typename SKLFamily::PIPELINE_SELECT PIPELINE_SELECT; enqueueRegularKernel(); enqueueVmeKernel(); auto commands = getCommandsList(); EXPECT_EQ(2u, commands.size()); auto pCmd = static_cast(commands.back()); auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits; EXPECT_EQ(expectedMask, pCmd->getMaskBits()); EXPECT_FALSE(pCmd->getMediaSamplerDopClockGateEnable()); } GEN9TEST_F(MediaKernelTest, givenGen9CsrWhenEnqueueNonVmeKernelAfterVmeKernelThenProgramProgramPipelineSelectionAndMediaSamplerTwice) { typedef typename SKLFamily::PIPELINE_SELECT PIPELINE_SELECT; enqueueVmeKernel(); enqueueRegularKernel(); auto commands = getCommandsList(); EXPECT_EQ(2u, commands.size()); auto pCmd = static_cast(commands.back()); auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits; EXPECT_EQ(expectedMask, pCmd->getMaskBits()); EXPECT_TRUE(pCmd->getMediaSamplerDopClockGateEnable()); } GEN9TEST_F(MediaKernelTest, givenGen9CsrWhenEnqueueVmeKernelThenVmeSubslicesConfigDoesntChangeToFalse) { auto csr = static_cast *>(&pCmdQ->getGpgpuCommandStreamReceiver()); csr->lastVmeSubslicesConfig = true; enqueueVmeKernel(); EXPECT_TRUE(csr->lastVmeSubslicesConfig); } GEN9TEST_F(MediaKernelTest, givenGen9CsrWhenEnqueueVmeKernelThenVmeSubslicesConfigDoesntChangeToTrue) { auto csr = static_cast *>(&pCmdQ->getGpgpuCommandStreamReceiver()); csr->lastVmeSubslicesConfig = false; enqueueVmeKernel(); EXPECT_FALSE(csr->lastVmeSubslicesConfig); } GEN9TEST_F(MediaKernelTest, GivenGen9WhenGettingCmdSizeForMediaSamplerThenZeroIsReturned) { auto csr = static_cast *>(&pCmdQ->getGpgpuCommandStreamReceiver()); csr->lastVmeSubslicesConfig = false; EXPECT_EQ(0u, csr->getCmdSizeForMediaSampler(false)); EXPECT_EQ(0u, csr->getCmdSizeForMediaSampler(true)); csr->lastVmeSubslicesConfig = true; EXPECT_EQ(0u, csr->getCmdSizeForMediaSampler(false)); EXPECT_EQ(0u, csr->getCmdSizeForMediaSampler(true)); } compute-runtime-22.14.22890/opencl/test/unit_test/gen9/glk/000077500000000000000000000000001422164147700232465ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen9/glk/CMakeLists.txt000066400000000000000000000010321422164147700260020ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_GLK) set(IGDRCL_SRCS_tests_gen9_glk ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_glk.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_hw_info_config_glk.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen9_glk}) add_subdirectories() neo_copy_test_files_with_revision(copy_test_files_glk_0 glk 0) add_dependencies(copy_test_files_per_product copy_test_files_glk_0) endif() compute-runtime-22.14.22890/opencl/test/unit_test/gen9/glk/linux/000077500000000000000000000000001422164147700244055ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen9/glk/linux/CMakeLists.txt000066400000000000000000000005311422164147700271440ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen9_glk_linux ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_tests_glk.cpp ) if(UNIX) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen9_glk_linux}) add_subdirectory(dll) endif() compute-runtime-22.14.22890/opencl/test/unit_test/gen9/glk/linux/dll/000077500000000000000000000000001422164147700251605ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen9/glk/linux/dll/CMakeLists.txt000066400000000000000000000004731422164147700277240ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_linux_dll_tests_gen9_glk ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/device_id_tests_glk.cpp ) target_sources(igdrcl_linux_dll_tests PRIVATE ${IGDRCL_SRCS_linux_dll_tests_gen9_glk}) compute-runtime-22.14.22890/opencl/test/unit_test/gen9/glk/linux/dll/device_id_tests_glk.cpp000066400000000000000000000007641422164147700316650ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/fixtures/linux/device_id_fixture.h" using namespace NEO; TEST_F(DeviceIdTests, GivenGlkSupportedDeviceIdThenHardwareInfoIsCorrect) { std::array expectedDescriptors = {{ {0x3184, &GLK_1x3x6::hwInfo, &GLK_1x3x6::setupHardwareInfo}, {0x3185, &GLK_1x2x6::hwInfo, &GLK_1x2x6::setupHardwareInfo}, }}; testImpl(expectedDescriptors); } compute-runtime-22.14.22890/opencl/test/unit_test/gen9/glk/linux/hw_info_config_tests_glk.cpp000066400000000000000000000217631422164147700321570ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/os_interface/linux/hw_info_config_linux_tests.h" using namespace NEO; struct HwInfoConfigTestLinuxGlk : HwInfoConfigTestLinux { void SetUp() override { HwInfoConfigTestLinux::SetUp(); drm->storedDeviceID = 0x3185; drm->storedEUVal = 18; drm->storedHasPooledEU = 1; drm->storedMinEUinPool = 3; } }; GLKTEST_F(HwInfoConfigTestLinuxGlk, WhenConfiguringHwInfoThenInformationIsCorrect) { auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->storedDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->storedDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->storedEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->storedSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); //constant sysInfo/ftr flags EXPECT_EQ(1u, outHwInfo.gtSystemInfo.VEBoxInfo.Instances.Bits.VEBox0Enabled); EXPECT_TRUE(outHwInfo.gtSystemInfo.VEBoxInfo.IsValid); drm->storedDeviceID = 0x3184; drm->storedMinEUinPool = 6; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->storedDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->storedDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->storedEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->storedSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ((unsigned int)drm->storedHasPooledEU, outHwInfo.featureTable.flags.ftrPooledEuEnabled); EXPECT_EQ((uint32_t)drm->storedMinEUinPool, outHwInfo.gtSystemInfo.EuCountPerPoolMin); EXPECT_EQ((outHwInfo.gtSystemInfo.EUCount - outHwInfo.gtSystemInfo.EuCountPerPoolMin), outHwInfo.gtSystemInfo.EuCountPerPoolMax); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); drm->storedDeviceID = 0x3185; drm->storedMinEUinPool = 9; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->storedDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->storedDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->storedEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->storedSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ((unsigned int)drm->storedHasPooledEU, outHwInfo.featureTable.flags.ftrPooledEuEnabled); EXPECT_EQ((uint32_t)drm->storedMinEUinPool, outHwInfo.gtSystemInfo.EuCountPerPoolMin); EXPECT_EQ((outHwInfo.gtSystemInfo.EUCount - outHwInfo.gtSystemInfo.EuCountPerPoolMin), outHwInfo.gtSystemInfo.EuCountPerPoolMax); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); auto &outKmdNotifyProperties = outHwInfo.capabilityTable.kmdNotifyProperties; EXPECT_TRUE(outKmdNotifyProperties.enableKmdNotify); EXPECT_EQ(50000, outKmdNotifyProperties.delayKmdNotifyMicroseconds); EXPECT_TRUE(outKmdNotifyProperties.enableQuickKmdSleep); EXPECT_EQ(5000, outKmdNotifyProperties.delayQuickKmdSleepMicroseconds); EXPECT_TRUE(outKmdNotifyProperties.enableQuickKmdSleepForSporadicWaits); EXPECT_EQ(200000, outKmdNotifyProperties.delayQuickKmdSleepForSporadicWaitsMicroseconds); EXPECT_FALSE(outKmdNotifyProperties.enableQuickKmdSleepForDirectSubmission); EXPECT_EQ(0, outKmdNotifyProperties.delayQuickKmdSleepForDirectSubmissionMicroseconds); } GLKTEST_F(HwInfoConfigTestLinuxGlk, GivenInvalidInputWhenConfiguringHwInfoThenErrorIsReturned) { auto hwInfoConfig = HwInfoConfig::get(productFamily); drm->storedRetValForDeviceID = -1; int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); drm->storedRetValForDeviceID = 0; drm->storedRetValForDeviceRevID = -1; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); drm->storedRetValForDeviceRevID = 0; drm->failRetTopology = true; drm->storedRetValForEUVal = -1; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); drm->storedRetValForEUVal = 0; drm->storedRetValForSSVal = -1; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); } GLKTEST_F(HwInfoConfigTestLinuxGlk, GivenFailingEnabledPoolWhenConfiguringHwInfoThenZeroIsSet) { drm->storedRetValForPooledEU = -1; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(0u, outHwInfo.featureTable.flags.ftrPooledEuEnabled); EXPECT_EQ(0u, outHwInfo.gtSystemInfo.EuCountPerPoolMin); EXPECT_EQ(0u, outHwInfo.gtSystemInfo.EuCountPerPoolMax); } GLKTEST_F(HwInfoConfigTestLinuxGlk, GivenDisabledEnabledPoolWhenConfiguringHwInfoThenZeroIsSet) { drm->storedHasPooledEU = 0; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(0u, outHwInfo.featureTable.flags.ftrPooledEuEnabled); EXPECT_EQ(0u, outHwInfo.gtSystemInfo.EuCountPerPoolMin); EXPECT_EQ(0u, outHwInfo.gtSystemInfo.EuCountPerPoolMax); } GLKTEST_F(HwInfoConfigTestLinuxGlk, GivenFailingMinEuInPoolWhenConfiguringHwInfoThenCorrectValueSet) { drm->storedRetValForMinEUinPool = -1; drm->storedSSVal = 3; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(1u, outHwInfo.featureTable.flags.ftrPooledEuEnabled); EXPECT_EQ(9u, outHwInfo.gtSystemInfo.EuCountPerPoolMin); EXPECT_EQ((outHwInfo.gtSystemInfo.EUCount - outHwInfo.gtSystemInfo.EuCountPerPoolMin), outHwInfo.gtSystemInfo.EuCountPerPoolMax); drm->storedSSVal = 2; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(1u, outHwInfo.featureTable.flags.ftrPooledEuEnabled); EXPECT_EQ(3u, outHwInfo.gtSystemInfo.EuCountPerPoolMin); EXPECT_EQ((outHwInfo.gtSystemInfo.EUCount - outHwInfo.gtSystemInfo.EuCountPerPoolMin), outHwInfo.gtSystemInfo.EuCountPerPoolMax); } GLKTEST_F(HwInfoConfigTestLinuxGlk, GivenInvalidMinEuInPoolWhenConfiguringHwInfoThenCorrectValueSet) { drm->storedMinEUinPool = 4; drm->storedSSVal = 3; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(1u, outHwInfo.featureTable.flags.ftrPooledEuEnabled); EXPECT_EQ(9u, outHwInfo.gtSystemInfo.EuCountPerPoolMin); EXPECT_EQ((outHwInfo.gtSystemInfo.EUCount - outHwInfo.gtSystemInfo.EuCountPerPoolMin), outHwInfo.gtSystemInfo.EuCountPerPoolMax); drm->storedSSVal = 2; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(1u, outHwInfo.featureTable.flags.ftrPooledEuEnabled); EXPECT_EQ(3u, outHwInfo.gtSystemInfo.EuCountPerPoolMin); EXPECT_EQ((outHwInfo.gtSystemInfo.EUCount - outHwInfo.gtSystemInfo.EuCountPerPoolMin), outHwInfo.gtSystemInfo.EuCountPerPoolMax); } GLKTEST_F(HwInfoConfigTestLinuxGlk, GivenWaFlagsWhenConfiguringHwInfoThenInformationIsCorrect) { auto hwInfoConfig = HwInfoConfig::get(productFamily); drm->storedDeviceRevID = 0; int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); } template class GlkHwInfoTests : public ::testing::Test { }; typedef ::testing::Types glkTestTypes; TYPED_TEST_CASE(GlkHwInfoTests, glkTestTypes); TYPED_TEST(GlkHwInfoTests, WhenGtIsSetupThenGtSystemInfoIsCorrect) { HardwareInfo hwInfo = *defaultHwInfo; auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); DrmMock drm(*executionEnvironment->rootDeviceEnvironments[0]); GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; DeviceDescriptor device = {0, &hwInfo, &TypeParam::setupHardwareInfo}; int ret = drm.setupHardwareInfo(&device, false); EXPECT_EQ(ret, 0); EXPECT_GT(gtSystemInfo.EUCount, 0u); EXPECT_GT(gtSystemInfo.ThreadCount, 0u); EXPECT_GT(gtSystemInfo.SliceCount, 0u); EXPECT_GT(gtSystemInfo.SubSliceCount, 0u); EXPECT_GT(gtSystemInfo.DualSubSliceCount, 0u); EXPECT_GT_VAL(gtSystemInfo.L3CacheSizeInKb, 0u); EXPECT_EQ(gtSystemInfo.CsrSizeInMb, 8u); EXPECT_FALSE(gtSystemInfo.IsDynamicallyPopulated); } compute-runtime-22.14.22890/opencl/test/unit_test/gen9/glk/test_device_caps_glk.cpp000066400000000000000000000036251422164147700301210ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" using namespace NEO; typedef Test Gen9DeviceCaps; GLKTEST_F(Gen9DeviceCaps, WhenCheckingProfilingTimerResolutionThenCorrectResolutionIsReturned) { const auto &caps = pDevice->getDeviceInfo(); EXPECT_EQ(52u, caps.outProfilingTimerResolution); } GLKTEST_F(Gen9DeviceCaps, givenGlkDeviceWhenAskedForDoubleSupportThenTrueIsReturned) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.ftrSupportsFP64); } GLKTEST_F(Gen9DeviceCaps, GlkIs32BitOsAllocatorAvailable) { const auto &caps = pDevice->getDeviceInfo(); auto memoryManager = pDevice->getMemoryManager(); if constexpr (is64bit) { EXPECT_TRUE(memoryManager->peekForce32BitAllocations()); EXPECT_TRUE(caps.force32BitAddressess); } else { EXPECT_FALSE(memoryManager->peekForce32BitAllocations()); EXPECT_FALSE(caps.force32BitAddressess); } } typedef Test GlkUsDeviceIdTest; GLKTEST_F(GlkUsDeviceIdTest, WhenCheckingIsSimulationThenTrueReturnedOnlyForSimulationId) { unsigned short glkSimulationIds[3] = { 0x3184, 0x3185, 0, // default, non-simulation }; NEO::MockDevice *mockDevice = nullptr; for (auto id : glkSimulationIds) { mockDevice = createWithUsDeviceId(id); ASSERT_NE(mockDevice, nullptr); EXPECT_FALSE(mockDevice->isSimulation()); delete mockDevice; } } GLKTEST_F(GlkUsDeviceIdTest, GivenGLKWhenCheckftr64KBpagesThenFalse) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftr64KBpages); } GLKTEST_F(GlkUsDeviceIdTest, givenGlkWhenCheckFtrSupportsInteger64BitAtomicsThenReturnFalse) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftrSupportsInteger64BitAtomics); } compute-runtime-22.14.22890/opencl/test/unit_test/gen9/glk/test_hw_info_config_glk.cpp000066400000000000000000000072731422164147700306350ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/test_macros/test.h" using namespace NEO; TEST(GlkHwInfoConfig, GivenIncorrectDataWhenConfiguringHwInfoThenErrorIsReturned) { if (IGFX_GEMINILAKE != productFamily) { return; } HardwareInfo hwInfo = *defaultHwInfo; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; gtSystemInfo = {0}; uint64_t config = 0xdeadbeef; gtSystemInfo = {0}; EXPECT_ANY_THROW(hardwareInfoSetup[productFamily](&hwInfo, false, config)); EXPECT_EQ(0u, gtSystemInfo.SliceCount); EXPECT_EQ(0u, gtSystemInfo.SubSliceCount); EXPECT_EQ(0u, gtSystemInfo.DualSubSliceCount); EXPECT_EQ(0u, gtSystemInfo.EUCount); } using GlkHwInfo = ::testing::Test; GLKTEST_F(GlkHwInfo, givenBoolWhenCallGlkHardwareInfoSetupThenFeatureTableAndWorkaroundTableAreSetCorrect) { uint64_t configs[] = { 0x100020006, 0x100030006}; bool boolValue[]{ true, false}; HardwareInfo hwInfo = *defaultHwInfo; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; FeatureTable &featureTable = hwInfo.featureTable; WorkaroundTable &workaroundTable = hwInfo.workaroundTable; for (auto &config : configs) { for (auto setParamBool : boolValue) { gtSystemInfo = {0}; featureTable = {}; workaroundTable = {}; hardwareInfoSetup[productFamily](&hwInfo, setParamBool, config); EXPECT_EQ(setParamBool, featureTable.flags.ftrGpGpuMidBatchPreempt); EXPECT_EQ(setParamBool, featureTable.flags.ftrGpGpuThreadGroupLevelPreempt); EXPECT_EQ(setParamBool, featureTable.flags.ftrL3IACoherency); EXPECT_EQ(setParamBool, featureTable.flags.ftrGpGpuMidThreadLevelPreempt); EXPECT_EQ(setParamBool, featureTable.flags.ftr3dMidBatchPreempt); EXPECT_EQ(setParamBool, featureTable.flags.ftr3dObjectLevelPreempt); EXPECT_EQ(setParamBool, featureTable.flags.ftrPerCtxtPreemptionGranularityControl); EXPECT_EQ(setParamBool, featureTable.flags.ftrLCIA); EXPECT_EQ(setParamBool, featureTable.flags.ftrPPGTT); EXPECT_EQ(setParamBool, featureTable.flags.ftrIA32eGfxPTEs); EXPECT_EQ(setParamBool, featureTable.flags.ftrTranslationTable); EXPECT_EQ(setParamBool, featureTable.flags.ftrUserModeTranslationTable); EXPECT_EQ(setParamBool, featureTable.flags.ftrEnableGuC); EXPECT_EQ(setParamBool, featureTable.flags.ftrTileMappedResource); EXPECT_EQ(setParamBool, featureTable.flags.ftrULT); EXPECT_EQ(setParamBool, featureTable.flags.ftrAstcHdr2D); EXPECT_EQ(setParamBool, featureTable.flags.ftrAstcLdr2D); EXPECT_EQ(setParamBool, featureTable.flags.ftrTileY); EXPECT_EQ(setParamBool, workaroundTable.flags.waLLCCachingUnsupported); EXPECT_EQ(setParamBool, workaroundTable.flags.waMsaa8xTileYDepthPitchAlignment); EXPECT_EQ(setParamBool, workaroundTable.flags.waFbcLinearSurfaceStride); EXPECT_EQ(setParamBool, workaroundTable.flags.wa4kAlignUVOffsetNV12LinearSurface); EXPECT_EQ(setParamBool, workaroundTable.flags.waEnablePreemptionGranularityControlByUMD); EXPECT_EQ(setParamBool, workaroundTable.flags.waSendMIFLUSHBeforeVFE); EXPECT_EQ(setParamBool, workaroundTable.flags.waForcePcBbFullCfgRestore); EXPECT_EQ(setParamBool, workaroundTable.flags.waReportPerfCountUseGlobalContextID); EXPECT_EQ(setParamBool, workaroundTable.flags.waSamplerCacheFlushBetweenRedescribedSurfaceReads); } } } compute-runtime-22.14.22890/opencl/test/unit_test/gen9/glk/windows/000077500000000000000000000000001422164147700247405ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen9/glk/windows/CMakeLists.txt000066400000000000000000000005131422164147700274770ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen9_glk_windows ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_glk_windows.cpp ) if(WIN32) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen9_glk_windows}) endif() compute-runtime-22.14.22890/opencl/test/unit_test/gen9/glk/windows/test_device_caps_glk_windows.cpp000066400000000000000000000024301422164147700333560ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" using namespace NEO; typedef Test GlkDeviceCapsWindows; GLKTEST_F(GlkDeviceCapsWindows, WhenCheckingKmdNotifyPropertiesThenKmdNotifyIsEnabledCorrectly) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableKmdNotify); EXPECT_EQ(30000, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableQuickKmdSleep); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableQuickKmdSleepForSporadicWaits); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepForSporadicWaitsMicroseconds); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableQuickKmdSleepForDirectSubmission); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepForDirectSubmissionMicroseconds); } compute-runtime-22.14.22890/opencl/test/unit_test/gen9/hw_helper_tests_gen9.cpp000066400000000000000000000054631422164147700273260ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/helpers/hw_helper_tests.h" #include "shared/test/unit_test/helpers/get_gpgpu_engines_tests.inl" #include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/test/unit_test/mocks/mock_cl_hw_helper.h" using HwHelperTestGen9 = HwHelperTest; GEN9TEST_F(HwHelperTestGen9, WhenGettingMaxBarriersPerSliceThenCorrectSizeIsReturned) { auto &helper = HwHelper::get(renderCoreFamily); EXPECT_EQ(32u, helper.getMaxBarrierRegisterPerSlice()); } GEN9TEST_F(HwHelperTestGen9, givenGen9WhenCallIsPackedSupportedThenReturnFalse) { auto &helper = HwHelper::get(renderCoreFamily); EXPECT_FALSE(helper.packedFormatsSupported()); } GEN9TEST_F(HwHelperTestGen9, WhenGettingPitchAlignmentForImageThenCorrectValueIsReturned) { auto &helper = HwHelper::get(renderCoreFamily); EXPECT_EQ(4u, helper.getPitchAlignmentForImage(&hardwareInfo)); } GEN9TEST_F(HwHelperTestGen9, WhenAdjustingDefaultEngineTypeThenEngineTypeIsSet) { auto engineType = hardwareInfo.capabilityTable.defaultEngineType; auto &helper = HwHelper::get(renderCoreFamily); helper.adjustDefaultEngineType(&hardwareInfo); EXPECT_EQ(engineType, hardwareInfo.capabilityTable.defaultEngineType); } GEN9TEST_F(HwHelperTestGen9, givenDebuggingActiveWhenSipKernelTypeIsQueriedThenDbgCsrLocalTypeIsReturned) { auto &helper = HwHelper::get(renderCoreFamily); auto sipType = helper.getSipKernelType(true); EXPECT_EQ(SipKernelType::DbgCsrLocal, sipType); } GEN9TEST_F(HwHelperTestGen9, whenGetGpgpuEnginesThenReturnThreeRcsEngines) { whenGetGpgpuEnginesThenReturnTwoRcsEngines(pDevice->getHardwareInfo()); EXPECT_EQ(3u, pDevice->allEngines.size()); } GEN9TEST_F(HwHelperTestGen9, WhenGettingDeviceIpVersionThenMakeCorrectDeviceIpVersion) { EXPECT_EQ(ClHwHelperMock::makeDeviceIpVersion(9, 0, 0), ClHwHelper::get(renderCoreFamily).getDeviceIpVersion(*defaultHwInfo)); } GEN9TEST_F(HwHelperTestGen9, WhenGettingSupportedDeviceFeatureCapabilitiesThenReturnCorrectValue) { EXPECT_EQ(0u, ClHwHelper::get(renderCoreFamily).getSupportedDeviceFeatureCapabilities()); } using MemorySynchronizatiopCommandsTestsGen9 = ::testing::Test; GEN9TEST_F(MemorySynchronizatiopCommandsTestsGen9, WhenProgrammingCacheFlushThenExpectConstantCacheFieldSet) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; std::unique_ptr buffer(new uint8_t[128]); LinearStream stream(buffer.get(), 128); MemorySynchronizationCommands::addFullCacheFlush(stream, *defaultHwInfo); PIPE_CONTROL *pipeControl = genCmdCast(buffer.get()); ASSERT_NE(nullptr, pipeControl); EXPECT_TRUE(pipeControl->getConstantCacheInvalidationEnable()); } compute-runtime-22.14.22890/opencl/test/unit_test/gen9/image_tests_gen9.cpp000066400000000000000000000021731422164147700264260ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" using namespace NEO; typedef ::testing::Test gen9ImageTests; GEN9TEST_F(gen9ImageTests, appendSurfaceSWhenAppendingSurfaceStateParamsThenSurfaceStateDoesNotChangetateParamsDoesNothing) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; MockContext context; auto image = std::unique_ptr(ImageHelper::create(&context)); auto surfaceStateBefore = FamilyType::cmdInitRenderSurfaceState; auto surfaceStateAfter = FamilyType::cmdInitRenderSurfaceState; auto imageHw = static_cast *>(image.get()); EXPECT_EQ(0, memcmp(&surfaceStateBefore, &surfaceStateAfter, sizeof(RENDER_SURFACE_STATE))); imageHw->appendSurfaceStateParams(&surfaceStateAfter, context.getDevice(0)->getRootDeviceIndex(), false); EXPECT_EQ(0, memcmp(&surfaceStateBefore, &surfaceStateAfter, sizeof(RENDER_SURFACE_STATE))); } compute-runtime-22.14.22890/opencl/test/unit_test/gen9/kbl/000077500000000000000000000000001422164147700232415ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen9/kbl/CMakeLists.txt000066400000000000000000000010321422164147700257750ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_KBL) set(IGDRCL_SRCS_tests_gen9_kbl ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_kbl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_hw_info_config_kbl.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen9_kbl}) add_subdirectories() neo_copy_test_files_with_revision(copy_test_files_kbl_9 kbl 9) add_dependencies(copy_test_files_per_product copy_test_files_kbl_9) endif() compute-runtime-22.14.22890/opencl/test/unit_test/gen9/kbl/linux/000077500000000000000000000000001422164147700244005ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen9/kbl/linux/CMakeLists.txt000066400000000000000000000005311422164147700271370ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen9_kbl_linux ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_tests_kbl.cpp ) if(UNIX) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen9_kbl_linux}) add_subdirectory(dll) endif() compute-runtime-22.14.22890/opencl/test/unit_test/gen9/kbl/linux/dll/000077500000000000000000000000001422164147700251535ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen9/kbl/linux/dll/CMakeLists.txt000066400000000000000000000004731422164147700277170ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_linux_dll_tests_gen9_kbl ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/device_id_tests_kbl.cpp ) target_sources(igdrcl_linux_dll_tests PRIVATE ${IGDRCL_SRCS_linux_dll_tests_gen9_kbl}) compute-runtime-22.14.22890/opencl/test/unit_test/gen9/kbl/linux/dll/device_id_tests_kbl.cpp000066400000000000000000000041611422164147700316460ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/fixtures/linux/device_id_fixture.h" using namespace NEO; TEST_F(DeviceIdTests, GivenKblSupportedDeviceIdThenHardwareInfoIsCorrect) { std::array expectedDescriptors = {{ {0x5902, &KBL_1x2x6::hwInfo, &KBL_1x2x6::setupHardwareInfo}, {0x590B, &KBL_1x2x6::hwInfo, &KBL_1x2x6::setupHardwareInfo}, {0x590A, &KBL_1x2x6::hwInfo, &KBL_1x2x6::setupHardwareInfo}, {0x5906, &KBL_1x2x6::hwInfo, &KBL_1x2x6::setupHardwareInfo}, {0x590E, &KBL_1x3x6::hwInfo, &KBL_1x3x6::setupHardwareInfo}, {0x5908, &KBL_1x2x6::hwInfo, &KBL_1x2x6::setupHardwareInfo}, {0x5913, &KBL_1x3x6::hwInfo, &KBL_1x3x6::setupHardwareInfo}, {0x5915, &KBL_1x2x6::hwInfo, &KBL_1x2x6::setupHardwareInfo}, {0x5912, &KBL_1x3x8::hwInfo, &KBL_1x3x8::setupHardwareInfo}, {0x591B, &KBL_1x3x8::hwInfo, &KBL_1x3x8::setupHardwareInfo}, {0x5917, &KBL_1x3x8::hwInfo, &KBL_1x3x8::setupHardwareInfo}, {0x591A, &KBL_1x3x8::hwInfo, &KBL_1x3x8::setupHardwareInfo}, {0x5916, &KBL_1x3x8::hwInfo, &KBL_1x3x8::setupHardwareInfo}, {0x591E, &KBL_1x3x8::hwInfo, &KBL_1x3x8::setupHardwareInfo}, {0x591D, &KBL_1x3x8::hwInfo, &KBL_1x3x8::setupHardwareInfo}, {0x591C, &KBL_1x3x8::hwInfo, &KBL_1x3x8::setupHardwareInfo}, {0x5921, &KBL_1x3x8::hwInfo, &KBL_1x3x8::setupHardwareInfo}, {0x5926, &KBL_2x3x8::hwInfo, &KBL_2x3x8::setupHardwareInfo}, {0x5927, &KBL_2x3x8::hwInfo, &KBL_2x3x8::setupHardwareInfo}, {0x592B, &KBL_2x3x8::hwInfo, &KBL_2x3x8::setupHardwareInfo}, {0x592A, &KBL_2x3x8::hwInfo, &KBL_2x3x8::setupHardwareInfo}, {0x5923, &KBL_2x3x8::hwInfo, &KBL_2x3x8::setupHardwareInfo}, {0x5932, &KBL_3x3x8::hwInfo, &KBL_3x3x8::setupHardwareInfo}, {0x593B, &KBL_3x3x8::hwInfo, &KBL_3x3x8::setupHardwareInfo}, {0x593A, &KBL_3x3x8::hwInfo, &KBL_3x3x8::setupHardwareInfo}, {0x593D, &KBL_3x3x8::hwInfo, &KBL_3x3x8::setupHardwareInfo}, }}; testImpl(expectedDescriptors); } compute-runtime-22.14.22890/opencl/test/unit_test/gen9/kbl/linux/hw_info_config_tests_kbl.cpp000066400000000000000000000216731422164147700321450ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/execution_environment.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/os_interface/linux/hw_info_config_linux_tests.h" using namespace NEO; struct HwInfoConfigTestLinuxKbl : HwInfoConfigTestLinux { void SetUp() override { HwInfoConfigTestLinux::SetUp(); drm->storedDeviceID = 0x5912; } }; KBLTEST_F(HwInfoConfigTestLinuxKbl, WhenConfiguringHwInfoThenInformationIsCorrect) { auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->storedDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->storedDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->storedEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->storedSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); //constant sysInfo/ftr flags EXPECT_EQ(1u, outHwInfo.gtSystemInfo.VEBoxInfo.Instances.Bits.VEBox0Enabled); EXPECT_TRUE(outHwInfo.gtSystemInfo.VEBoxInfo.IsValid); drm->storedDeviceID = 0x5906; drm->storedSSVal = 3; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->storedDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->storedDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->storedEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->storedSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(1u, outHwInfo.gtSystemInfo.SliceCount); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); drm->storedDeviceID = 0x5915; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->storedDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->storedDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->storedEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->storedSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); drm->storedDeviceID = 0x5923; drm->storedSSVal = 6; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->storedDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->storedDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->storedEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->storedSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(2u, outHwInfo.gtSystemInfo.SliceCount); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); drm->storedDeviceID = 0x593B; drm->storedSSVal = 6; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->storedDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->storedDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->storedEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->storedSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(2u, outHwInfo.gtSystemInfo.SliceCount); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); auto &outKmdNotifyProperties = outHwInfo.capabilityTable.kmdNotifyProperties; EXPECT_TRUE(outKmdNotifyProperties.enableKmdNotify); EXPECT_EQ(50000, outKmdNotifyProperties.delayKmdNotifyMicroseconds); EXPECT_TRUE(outKmdNotifyProperties.enableQuickKmdSleep); EXPECT_EQ(5000, outKmdNotifyProperties.delayQuickKmdSleepMicroseconds); EXPECT_TRUE(outKmdNotifyProperties.enableQuickKmdSleepForSporadicWaits); EXPECT_EQ(200000, outKmdNotifyProperties.delayQuickKmdSleepForSporadicWaitsMicroseconds); EXPECT_FALSE(outKmdNotifyProperties.enableQuickKmdSleepForDirectSubmission); EXPECT_EQ(0, outKmdNotifyProperties.delayQuickKmdSleepForDirectSubmissionMicroseconds); } KBLTEST_F(HwInfoConfigTestLinuxKbl, GivenUnknownDevIdWhenConfiguringHwInfoThenErrorIsReturned) { drm->storedDeviceID = 0; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); } KBLTEST_F(HwInfoConfigTestLinuxKbl, GivenFailedIoctlDevIdWhenConfiguringHwInfoThenErrorIsReturned) { drm->storedRetValForDeviceID = -2; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-2, ret); } KBLTEST_F(HwInfoConfigTestLinuxKbl, GivenFailedIoctlDevRevIdWhenConfiguringHwInfoThenErrorIsReturned) { drm->storedRetValForDeviceRevID = -3; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-3, ret); } KBLTEST_F(HwInfoConfigTestLinuxKbl, GivenFailedIoctlEuCountWhenConfiguringHwInfoThenErrorIsReturned) { drm->failRetTopology = true; drm->storedRetValForEUVal = -4; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-4, ret); } KBLTEST_F(HwInfoConfigTestLinuxKbl, GivenFailedIoctlSsCountWhenConfiguringHwInfoThenErrorIsReturned) { drm->failRetTopology = true; drm->storedRetValForSSVal = -5; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-5, ret); } KBLTEST_F(HwInfoConfigTestLinuxKbl, GivenWaFlagsWhenConfiguringHwInfoThenInformationIsCorrect) { auto hwInfoConfig = HwInfoConfig::get(productFamily); drm->storedDeviceRevID = 0; int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); drm->storedDeviceRevID = 7; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(0u, outHwInfo.workaroundTable.flags.waDisableLSQCROPERFforOCL); EXPECT_EQ(0u, outHwInfo.workaroundTable.flags.waEncryptedEdramOnlyPartials); drm->storedDeviceRevID = 9; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(0u, outHwInfo.workaroundTable.flags.waDisableLSQCROPERFforOCL); EXPECT_EQ(0u, outHwInfo.workaroundTable.flags.waEncryptedEdramOnlyPartials); EXPECT_EQ(0u, outHwInfo.workaroundTable.flags.waForcePcBbFullCfgRestore); } KBLTEST_F(HwInfoConfigTestLinuxKbl, WhenConfiguringHwInfoThenEdramInformationIsCorrect) { auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ_VAL(0u, outHwInfo.gtSystemInfo.EdramSizeInKb); EXPECT_EQ(0u, outHwInfo.featureTable.flags.ftrEDram); drm->storedDeviceID = 0x5927; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ_VAL((64u * 1024u), outHwInfo.gtSystemInfo.EdramSizeInKb); EXPECT_EQ(1u, outHwInfo.featureTable.flags.ftrEDram); drm->storedDeviceID = 0x5926; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ_VAL((64u * 1024u), outHwInfo.gtSystemInfo.EdramSizeInKb); EXPECT_EQ(1u, outHwInfo.featureTable.flags.ftrEDram); } template class KblHwInfoTests : public ::testing::Test { }; typedef ::testing::Types kblTestTypes; TYPED_TEST_CASE(KblHwInfoTests, kblTestTypes); TYPED_TEST(KblHwInfoTests, WhenGtIsSetupThenGtSystemInfoIsCorrect) { HardwareInfo hwInfo = *defaultHwInfo; auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); DrmMock drm(*executionEnvironment->rootDeviceEnvironments[0]); GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; DeviceDescriptor device = {0, &hwInfo, &TypeParam::setupHardwareInfo}; int ret = drm.setupHardwareInfo(&device, false); EXPECT_EQ(ret, 0); EXPECT_GT(gtSystemInfo.EUCount, 0u); EXPECT_GT(gtSystemInfo.ThreadCount, 0u); EXPECT_GT(gtSystemInfo.SliceCount, 0u); EXPECT_GT(gtSystemInfo.SubSliceCount, 0u); EXPECT_GT(gtSystemInfo.DualSubSliceCount, 0u); EXPECT_GT_VAL(gtSystemInfo.L3CacheSizeInKb, 0u); EXPECT_EQ(gtSystemInfo.CsrSizeInMb, 8u); EXPECT_FALSE(gtSystemInfo.IsDynamicallyPopulated); } compute-runtime-22.14.22890/opencl/test/unit_test/gen9/kbl/test_device_caps_kbl.cpp000066400000000000000000000011271422164147700301020ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" using namespace NEO; typedef Test KblDeviceCaps; KBLTEST_F(KblDeviceCaps, GivenKBLWhenCheckftr64KBpagesThenTrue) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.ftr64KBpages); } KBLTEST_F(KblDeviceCaps, givenKblWhenCheckFtrSupportsInteger64BitAtomicsThenReturnTrue) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.ftrSupportsInteger64BitAtomics); } compute-runtime-22.14.22890/opencl/test/unit_test/gen9/kbl/test_hw_info_config_kbl.cpp000066400000000000000000000106451422164147700306200ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/test_macros/test.h" using namespace NEO; TEST(KblHwInfoConfig, GivenIncorrectDataWhenConfiguringHwInfoThenErrorIsReturned) { if (IGFX_KABYLAKE != productFamily) { return; } HardwareInfo hwInfo = *defaultHwInfo; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; uint64_t config = 0xdeadbeef; gtSystemInfo = {0}; EXPECT_ANY_THROW(hardwareInfoSetup[productFamily](&hwInfo, false, config)); EXPECT_EQ(0u, gtSystemInfo.SliceCount); EXPECT_EQ(0u, gtSystemInfo.SubSliceCount); EXPECT_EQ(0u, gtSystemInfo.DualSubSliceCount); EXPECT_EQ(0u, gtSystemInfo.EUCount); } using KblHwInfo = ::testing::Test; KBLTEST_F(KblHwInfo, givenBoolWhenCallKblHardwareInfoSetupThenFeatureTableAndWorkaroundTableAreSetCorrect) { uint64_t configs[] = { 0x100030008, 0x200030008, 0x300030008, 0x100020006, 0x100030006}; bool boolValue[]{ true, false}; HardwareInfo hwInfo = *defaultHwInfo; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; FeatureTable &featureTable = hwInfo.featureTable; WorkaroundTable &workaroundTable = hwInfo.workaroundTable; PLATFORM &platform = hwInfo.platform; for (auto &config : configs) { for (auto setParamBool : boolValue) { gtSystemInfo = {0}; featureTable = {}; workaroundTable = {}; platform.usRevId = 9; hardwareInfoSetup[productFamily](&hwInfo, setParamBool, config); EXPECT_EQ(setParamBool, featureTable.flags.ftrGpGpuMidBatchPreempt); EXPECT_EQ(setParamBool, featureTable.flags.ftrGpGpuThreadGroupLevelPreempt); EXPECT_EQ(setParamBool, featureTable.flags.ftrL3IACoherency); EXPECT_EQ(setParamBool, featureTable.flags.ftrGpGpuMidThreadLevelPreempt); EXPECT_EQ(setParamBool, featureTable.flags.ftr3dMidBatchPreempt); EXPECT_EQ(setParamBool, featureTable.flags.ftr3dObjectLevelPreempt); EXPECT_EQ(setParamBool, featureTable.flags.ftrPerCtxtPreemptionGranularityControl); EXPECT_EQ(setParamBool, featureTable.flags.ftrPPGTT); EXPECT_EQ(setParamBool, featureTable.flags.ftrSVM); EXPECT_EQ(setParamBool, featureTable.flags.ftrIA32eGfxPTEs); EXPECT_EQ(setParamBool, featureTable.flags.ftrDisplayYTiling); EXPECT_EQ(setParamBool, featureTable.flags.ftrTranslationTable); EXPECT_EQ(setParamBool, featureTable.flags.ftrUserModeTranslationTable); EXPECT_EQ(setParamBool, featureTable.flags.ftrEnableGuC); EXPECT_EQ(setParamBool, featureTable.flags.ftrFbc); EXPECT_EQ(setParamBool, featureTable.flags.ftrFbc2AddressTranslation); EXPECT_EQ(setParamBool, featureTable.flags.ftrFbcBlitterTracking); EXPECT_EQ(setParamBool, featureTable.flags.ftrFbcCpuTracking); EXPECT_EQ(setParamBool, featureTable.flags.ftrTileY); EXPECT_EQ(setParamBool, workaroundTable.flags.waEnablePreemptionGranularityControlByUMD); EXPECT_EQ(setParamBool, workaroundTable.flags.waSendMIFLUSHBeforeVFE); EXPECT_EQ(setParamBool, workaroundTable.flags.waReportPerfCountUseGlobalContextID); EXPECT_EQ(setParamBool, workaroundTable.flags.waMsaa8xTileYDepthPitchAlignment); EXPECT_EQ(setParamBool, workaroundTable.flags.waLosslessCompressionSurfaceStride); EXPECT_EQ(setParamBool, workaroundTable.flags.waFbcLinearSurfaceStride); EXPECT_EQ(setParamBool, workaroundTable.flags.wa4kAlignUVOffsetNV12LinearSurface); EXPECT_EQ(setParamBool, workaroundTable.flags.waSamplerCacheFlushBetweenRedescribedSurfaceReads); EXPECT_EQ(false, workaroundTable.flags.waDisableLSQCROPERFforOCL); EXPECT_EQ(false, workaroundTable.flags.waEncryptedEdramOnlyPartials); EXPECT_EQ(false, workaroundTable.flags.waForcePcBbFullCfgRestore); platform.usRevId = 1; workaroundTable = {}; hardwareInfoSetup[productFamily](&hwInfo, true, config); EXPECT_EQ(true, workaroundTable.flags.waDisableLSQCROPERFforOCL); EXPECT_EQ(true, workaroundTable.flags.waEncryptedEdramOnlyPartials); EXPECT_EQ(true, workaroundTable.flags.waForcePcBbFullCfgRestore); } } } compute-runtime-22.14.22890/opencl/test/unit_test/gen9/kbl/windows/000077500000000000000000000000001422164147700247335ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen9/kbl/windows/CMakeLists.txt000066400000000000000000000005131422164147700274720ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen9_kbl_windows ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_kbl_windows.cpp ) if(WIN32) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen9_kbl_windows}) endif() compute-runtime-22.14.22890/opencl/test/unit_test/gen9/kbl/windows/test_device_caps_kbl_windows.cpp000066400000000000000000000024121422164147700333440ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" using namespace NEO; typedef Test KblDeviceCapsWindows; KBLTEST_F(KblDeviceCapsWindows, GivenWhenGettingKmdNotifyPropertiesThenItIsDisabled) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableKmdNotify); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableQuickKmdSleep); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableQuickKmdSleepForSporadicWaits); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepForSporadicWaitsMicroseconds); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableQuickKmdSleepForDirectSubmission); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepForDirectSubmissionMicroseconds); } compute-runtime-22.14.22890/opencl/test/unit_test/gen9/kernel_tests_gen9.cpp000066400000000000000000000016731422164147700266300ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" using namespace NEO; using Gen9KernelTest = Test; GEN9TEST_F(Gen9KernelTest, givenKernelWhenCanTransformImagesIsCalledThenReturnsTrue) { MockKernelWithInternals mockKernel(*pClDevice); auto retVal = mockKernel.mockKernel->Kernel::canTransformImages(); EXPECT_TRUE(retVal); } GEN9TEST_F(Gen9KernelTest, givenBuiltinKernelWhenCanTransformImagesIsCalledThenReturnsFalse) { MockKernelWithInternals mockKernel(*pClDevice); mockKernel.mockKernel->isBuiltIn = true; auto retVal = mockKernel.mockKernel->Kernel::canTransformImages(); EXPECT_FALSE(retVal); } compute-runtime-22.14.22890/opencl/test/unit_test/gen9/sampler_tests_gen9.cpp000066400000000000000000000017201422164147700270040ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include using namespace NEO; typedef Test Gen9SamplerTest; GEN9TEST_F(Gen9SamplerTest, WhenAppendingSamplerStateParamsThenStateIsNotChanged) { typedef typename FamilyType::SAMPLER_STATE SAMPLER_STATE; auto stateWithoutAppendedParams = FamilyType::cmdInitSamplerState; auto stateWithAppendedParams = FamilyType::cmdInitSamplerState; EXPECT_TRUE(memcmp(&stateWithoutAppendedParams, &stateWithAppendedParams, sizeof(SAMPLER_STATE)) == 0); HwInfoConfig::get(defaultHwInfo->platform.eProductFamily)->adjustSamplerState(&stateWithAppendedParams, *defaultHwInfo); EXPECT_TRUE(memcmp(&stateWithoutAppendedParams, &stateWithAppendedParams, sizeof(SAMPLER_STATE)) == 0); } compute-runtime-22.14.22890/opencl/test/unit_test/gen9/sip_tests_gen9.cpp000066400000000000000000000027661422164147700261470ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/source/built_ins/sip.h" #include "shared/test/common/helpers/test_files.h" #include "shared/test/common/libult/global_environment.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/test_macros/test.h" #include "gtest/gtest.h" using namespace NEO; namespace SipKernelTests { typedef ::testing::Test gen9SipTests; GEN9TEST_F(gen9SipTests, givenDebugCsrSipKernelWithLocalMemoryWhenAskedForDebugSurfaceBtiAndSizeThenBtiIsZeroAndSizeGreaterThanZero) { auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); EXPECT_NE(nullptr, mockDevice); auto &builtins = *mockDevice->getBuiltIns(); auto &sipKernel = builtins.getSipKernel(SipKernelType::DbgCsrLocal, *mockDevice); EXPECT_NE(nullptr, &sipKernel); EXPECT_EQ(SipKernelType::DbgCsrLocal, sipKernel.getType()); } GEN9TEST_F(gen9SipTests, givenDebuggingActiveWhenSipTypeIsQueriedThenDbgCsrLocalIsReturned) { auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); EXPECT_NE(nullptr, mockDevice); mockDevice->isDebuggerActiveParentCall = false; mockDevice->isDebuggerActiveReturn = true; auto sipType = SipKernel::getSipKernelType(*mockDevice); EXPECT_EQ(SipKernelType::DbgCsrLocal, sipType); } } // namespace SipKernelTests compute-runtime-22.14.22890/opencl/test/unit_test/gen9/skl/000077500000000000000000000000001422164147700232625ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen9/skl/CMakeLists.txt000066400000000000000000000014361422164147700260260ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_SKL) set(IGDRCL_SRCS_tests_gen9_skl ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/device_tests_skl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_skl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_hw_info_config_skl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_sample_skl.cpp ) get_property(NEO_CORE_TESTS_GEN9_SKL GLOBAL PROPERTY NEO_CORE_TESTS_GEN9_SKL) list(APPEND IGDRCL_SRCS_tests_gen9_skl ${NEO_CORE_TESTS_GEN9_SKL}) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen9_skl}) add_subdirectories() neo_copy_test_files_with_revision(copy_test_files_skl_9 skl 9) add_dependencies(copy_test_files_per_product copy_test_files_skl_9) endif() compute-runtime-22.14.22890/opencl/test/unit_test/gen9/skl/device_tests_skl.cpp000066400000000000000000000007221422164147700273210ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" using namespace NEO; typedef Test DeviceTest; SKLTEST_F(DeviceTest, givenSklDeviceWhenAskedForProflingTimerResolutionThen83IsReturned) { auto resolution = pDevice->getProfilingTimerResolution(); EXPECT_DOUBLE_EQ(83.333, resolution); } compute-runtime-22.14.22890/opencl/test/unit_test/gen9/skl/linux/000077500000000000000000000000001422164147700244215ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen9/skl/linux/CMakeLists.txt000066400000000000000000000005311422164147700271600ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen9_skl_linux ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_tests_skl.cpp ) if(UNIX) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen9_skl_linux}) add_subdirectory(dll) endif() compute-runtime-22.14.22890/opencl/test/unit_test/gen9/skl/linux/dll/000077500000000000000000000000001422164147700251745ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen9/skl/linux/dll/CMakeLists.txt000066400000000000000000000004731422164147700277400ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_linux_dll_tests_gen9_skl ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/device_id_tests_skl.cpp ) target_sources(igdrcl_linux_dll_tests PRIVATE ${IGDRCL_SRCS_linux_dll_tests_gen9_skl}) compute-runtime-22.14.22890/opencl/test/unit_test/gen9/skl/linux/dll/device_id_tests_skl.cpp000066400000000000000000000041611422164147700317100ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/fixtures/linux/device_id_fixture.h" using namespace NEO; TEST_F(DeviceIdTests, GivenSklSupportedDeviceIdThenHardwareInfoIsCorrect) { std::array expectedDescriptors = {{ {0x1902, &SKL_1x2x6::hwInfo, &SKL_1x2x6::setupHardwareInfo}, {0x190B, &SKL_1x2x6::hwInfo, &SKL_1x2x6::setupHardwareInfo}, {0x190A, &SKL_1x2x6::hwInfo, &SKL_1x2x6::setupHardwareInfo}, {0x1906, &SKL_1x2x6::hwInfo, &SKL_1x2x6::setupHardwareInfo}, {0x190E, &SKL_1x2x6::hwInfo, &SKL_1x2x6::setupHardwareInfo}, {0x1917, &SKL_1x3x6::hwInfo, &SKL_1x3x6::setupHardwareInfo}, {0x1913, &SKL_1x3x6::hwInfo, &SKL_1x3x6::setupHardwareInfo}, {0x1915, &SKL_1x3x6::hwInfo, &SKL_1x3x6::setupHardwareInfo}, {0x1912, &SKL_1x3x8::hwInfo, &SKL_1x3x8::setupHardwareInfo}, {0x191B, &SKL_1x3x8::hwInfo, &SKL_1x3x8::setupHardwareInfo}, {0x191A, &SKL_1x3x8::hwInfo, &SKL_1x3x8::setupHardwareInfo}, {0x1916, &SKL_1x3x8::hwInfo, &SKL_1x3x8::setupHardwareInfo}, {0x191E, &SKL_1x3x8::hwInfo, &SKL_1x3x8::setupHardwareInfo}, {0x191D, &SKL_1x3x8::hwInfo, &SKL_1x3x8::setupHardwareInfo}, {0x1921, &SKL_1x3x8::hwInfo, &SKL_1x3x8::setupHardwareInfo}, {0x9905, &SKL_1x3x8::hwInfo, &SKL_1x3x8::setupHardwareInfo}, {0x192B, &SKL_2x3x8::hwInfo, &SKL_2x3x8::setupHardwareInfo}, {0x192D, &SKL_2x3x8::hwInfo, &SKL_2x3x8::setupHardwareInfo}, {0x192A, &SKL_2x3x8::hwInfo, &SKL_2x3x8::setupHardwareInfo}, {0x1923, &SKL_2x3x8::hwInfo, &SKL_2x3x8::setupHardwareInfo}, {0x1926, &SKL_2x3x8::hwInfo, &SKL_2x3x8::setupHardwareInfo}, {0x1927, &SKL_2x3x8::hwInfo, &SKL_2x3x8::setupHardwareInfo}, {0x1932, &SKL_3x3x8::hwInfo, &SKL_3x3x8::setupHardwareInfo}, {0x193B, &SKL_3x3x8::hwInfo, &SKL_3x3x8::setupHardwareInfo}, {0x193A, &SKL_3x3x8::hwInfo, &SKL_3x3x8::setupHardwareInfo}, {0x193D, &SKL_3x3x8::hwInfo, &SKL_3x3x8::setupHardwareInfo}, }}; testImpl(expectedDescriptors); } compute-runtime-22.14.22890/opencl/test/unit_test/gen9/skl/linux/hw_info_config_tests_skl.cpp000066400000000000000000000253331422164147700322040ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/os_interface/linux/hw_info_config_linux_tests.h" using namespace NEO; struct HwInfoConfigTestLinuxSkl : HwInfoConfigTestLinux { void SetUp() override { HwInfoConfigTestLinux::SetUp(); drm->storedDeviceID = 0x0902; } }; SKLTEST_F(HwInfoConfigTestLinuxSkl, WhenConfiguringHwInfoThenInformationIsCorrect) { auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->storedDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->storedDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->storedEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->storedSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); //constant sysInfo/ftr flags EXPECT_EQ(1u, outHwInfo.gtSystemInfo.VEBoxInfo.Instances.Bits.VEBox0Enabled); EXPECT_EQ(1u, outHwInfo.gtSystemInfo.VDBoxInfo.Instances.Bits.VDBox0Enabled); EXPECT_TRUE(outHwInfo.gtSystemInfo.VEBoxInfo.IsValid); EXPECT_TRUE(outHwInfo.gtSystemInfo.VDBoxInfo.IsValid); drm->storedDeviceID = 0x1902; drm->storedSSVal = 3; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->storedDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->storedDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->storedEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->storedSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(1u, outHwInfo.gtSystemInfo.SliceCount); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); drm->storedDeviceID = 0x1917; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->storedDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->storedDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->storedEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->storedSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); drm->storedDeviceID = 0x0903; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->storedDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->storedDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->storedEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->storedSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); drm->storedDeviceID = 0x0904; drm->storedSSVal = 6; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->storedDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->storedDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->storedEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->storedSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(1u, outHwInfo.gtSystemInfo.SliceCount); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); auto &outKmdNotifyProperties = outHwInfo.capabilityTable.kmdNotifyProperties; EXPECT_TRUE(outKmdNotifyProperties.enableKmdNotify); EXPECT_EQ(50000, outKmdNotifyProperties.delayKmdNotifyMicroseconds); EXPECT_TRUE(outKmdNotifyProperties.enableQuickKmdSleep); EXPECT_EQ(5000, outKmdNotifyProperties.delayQuickKmdSleepMicroseconds); EXPECT_TRUE(outKmdNotifyProperties.enableQuickKmdSleepForSporadicWaits); EXPECT_EQ(200000, outKmdNotifyProperties.delayQuickKmdSleepForSporadicWaitsMicroseconds); EXPECT_FALSE(outKmdNotifyProperties.enableQuickKmdSleepForDirectSubmission); EXPECT_EQ(0, outKmdNotifyProperties.delayQuickKmdSleepForDirectSubmissionMicroseconds); } SKLTEST_F(HwInfoConfigTestLinuxSkl, GivenUnknownDevIdWhenConfiguringHwInfoThenErrorIsReturned) { drm->storedDeviceID = 0; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); } SKLTEST_F(HwInfoConfigTestLinuxSkl, GivenFailedIoctlDevIdWhenConfiguringHwInfoThenErrorIsReturned) { drm->storedRetValForDeviceID = -2; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-2, ret); } SKLTEST_F(HwInfoConfigTestLinuxSkl, GivenFailedIoctlDevRevIdWhenConfiguringHwInfoThenErrorIsReturned) { drm->storedRetValForDeviceRevID = -3; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-3, ret); } SKLTEST_F(HwInfoConfigTestLinuxSkl, GivenFailedIoctlEuCountWhenConfiguringHwInfoThenErrorIsReturned) { drm->storedRetValForEUVal = -4; drm->failRetTopology = true; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-4, ret); } SKLTEST_F(HwInfoConfigTestLinuxSkl, GivenFailedIoctlSsCountWhenConfiguringHwInfoThenErrorIsReturned) { drm->storedRetValForSSVal = -5; drm->failRetTopology = true; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-5, ret); } SKLTEST_F(HwInfoConfigTestLinuxSkl, GivenWaFlagsWhenConfiguringHwInfoThenInformationIsCorrect) { auto hwInfoConfig = HwInfoConfig::get(productFamily); drm->storedDeviceRevID = 1; int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); drm->storedDeviceRevID = 0; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(0u, outHwInfo.workaroundTable.flags.waCompressedResourceRequiresConstVA21); drm->storedDeviceRevID = 5; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(0u, outHwInfo.workaroundTable.flags.waCompressedResourceRequiresConstVA21); EXPECT_EQ(0u, outHwInfo.workaroundTable.flags.waModifyVFEStateAfterGPGPUPreemption); EXPECT_EQ(0u, outHwInfo.workaroundTable.flags.waDisablePerCtxtPreemptionGranularityControl); drm->storedDeviceRevID = 6; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(0u, outHwInfo.workaroundTable.flags.waCompressedResourceRequiresConstVA21); EXPECT_EQ(0u, outHwInfo.workaroundTable.flags.waModifyVFEStateAfterGPGPUPreemption); EXPECT_EQ(0u, outHwInfo.workaroundTable.flags.waDisablePerCtxtPreemptionGranularityControl); EXPECT_EQ(0u, outHwInfo.workaroundTable.flags.waCSRUncachable); } SKLTEST_F(HwInfoConfigTestLinuxSkl, WhenConfiguringHwInfoThenEdramInformationIsCorrect) { auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ_VAL(0u, outHwInfo.gtSystemInfo.EdramSizeInKb); EXPECT_EQ(0u, outHwInfo.featureTable.flags.ftrEDram); drm->storedDeviceID = 0x1926; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ_VAL((64u * 1024u), outHwInfo.gtSystemInfo.EdramSizeInKb); EXPECT_EQ(1u, outHwInfo.featureTable.flags.ftrEDram); drm->storedDeviceID = 0x1927; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ_VAL((64u * 1024u), outHwInfo.gtSystemInfo.EdramSizeInKb); EXPECT_EQ(1u, outHwInfo.featureTable.flags.ftrEDram); drm->storedDeviceID = 0x192D; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ_VAL((64u * 1024u), outHwInfo.gtSystemInfo.EdramSizeInKb); EXPECT_EQ(1u, outHwInfo.featureTable.flags.ftrEDram); drm->storedDeviceID = 0x193B; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ_VAL((128u * 1024u), outHwInfo.gtSystemInfo.EdramSizeInKb); EXPECT_EQ(1u, outHwInfo.featureTable.flags.ftrEDram); drm->storedDeviceID = 0x193D; ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ_VAL((128u * 1024u), outHwInfo.gtSystemInfo.EdramSizeInKb); EXPECT_EQ(1u, outHwInfo.featureTable.flags.ftrEDram); } template class SklHwInfoTests : public ::testing::Test { }; typedef ::testing::Types sklTestTypes; TYPED_TEST_CASE(SklHwInfoTests, sklTestTypes); TYPED_TEST(SklHwInfoTests, WhenGtIsSetupThenGtSystemInfoIsCorrect) { HardwareInfo hwInfo = *defaultHwInfo; auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); DrmMock drm(*executionEnvironment->rootDeviceEnvironments[0]); GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; DeviceDescriptor device = {0, &hwInfo, &TypeParam::setupHardwareInfo}; int ret = drm.setupHardwareInfo(&device, false); EXPECT_EQ(ret, 0); EXPECT_GT(gtSystemInfo.EUCount, 0u); EXPECT_GT(gtSystemInfo.ThreadCount, 0u); EXPECT_GT(gtSystemInfo.SliceCount, 0u); EXPECT_GT(gtSystemInfo.SubSliceCount, 0u); EXPECT_GT(gtSystemInfo.DualSubSliceCount, 0u); EXPECT_GT_VAL(gtSystemInfo.L3CacheSizeInKb, 0u); EXPECT_EQ(gtSystemInfo.CsrSizeInMb, 8u); EXPECT_FALSE(gtSystemInfo.IsDynamicallyPopulated); } TYPED_TEST(SklHwInfoTests, givenGTSystemInfoTypeWhenConfigureHardwareCustomThenSliceCountDontChange) { HardwareInfo hwInfo = *defaultHwInfo; auto osInterface = std::unique_ptr(new OSInterface()); GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; TypeParam::setupHardwareInfo(&hwInfo, false); auto sliceCount = gtSystemInfo.SliceCount; HwInfoConfig *hwConfig = HwInfoConfig::get(PRODUCT_FAMILY::IGFX_SKYLAKE); hwConfig->configureHardwareCustom(&hwInfo, osInterface.get()); EXPECT_EQ(gtSystemInfo.SliceCount, sliceCount); } compute-runtime-22.14.22890/opencl/test/unit_test/gen9/skl/test_device_caps_skl.cpp000066400000000000000000000042131422164147700301430ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" using namespace NEO; typedef Test SklDeviceCaps; SKLTEST_F(SklDeviceCaps, WhenCheckingProfilingTimerResolutionThenCorrectResolutionIsReturned) { const auto &caps = pDevice->getDeviceInfo(); EXPECT_EQ(83u, caps.outProfilingTimerResolution); } SKLTEST_F(SklDeviceCaps, givenSklDeviceWhenAskedFor32BitSupportThenFalseIsReturned) { const auto &sharedCaps = pDevice->getDeviceInfo(); auto memoryManager = pDevice->getMemoryManager(); EXPECT_FALSE(memoryManager->peekForce32BitAllocations()); EXPECT_FALSE(sharedCaps.force32BitAddressess); } SKLTEST_F(SklDeviceCaps, WhenCheckingCapabilitiesThenSvmIsEnabled) { const auto &caps = pClDevice->getDeviceInfo(); cl_device_svm_capabilities expectedCaps = (CL_DEVICE_SVM_COARSE_GRAIN_BUFFER | CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_ATOMICS); EXPECT_EQ(expectedCaps, caps.svmCapabilities); } typedef Test SklUsDeviceIdTest; SKLTEST_F(SklUsDeviceIdTest, WhenCheckingIsSimulationThenTrueReturnedOnlyForSimulationId) { unsigned short sklSimulationIds[6] = { 0x0900, 0x0901, 0x0902, 0x0903, 0x0904, 0, // default, non-simulation }; NEO::MockDevice *mockDevice = nullptr; for (auto id : sklSimulationIds) { mockDevice = createWithUsDeviceId(id); ASSERT_NE(mockDevice, nullptr); if (id == 0) EXPECT_FALSE(mockDevice->isSimulation()); else EXPECT_TRUE(mockDevice->isSimulation()); delete mockDevice; } } SKLTEST_F(SklUsDeviceIdTest, GivenSKLWhenCheckftr64KBpagesThenTrue) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.ftr64KBpages); } SKLTEST_F(SklUsDeviceIdTest, givenSklWhenCheckFtrSupportsInteger64BitAtomicsThenReturnTrue) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.ftrSupportsInteger64BitAtomics); } compute-runtime-22.14.22890/opencl/test/unit_test/gen9/skl/test_hw_info_config_skl.cpp000066400000000000000000000117731422164147700306650ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/test_macros/test.h" using namespace NEO; TEST(SklHwInfoConfig, GivenIncorrectDataWhenConfiguringHwInfoThenErrorIsReturned) { if (IGFX_SKYLAKE != productFamily) { return; } HardwareInfo hwInfo = *defaultHwInfo; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; gtSystemInfo = {0}; uint64_t config = 0xdeadbeef; EXPECT_ANY_THROW(hardwareInfoSetup[productFamily](&hwInfo, false, config)); EXPECT_EQ(0u, gtSystemInfo.SliceCount); EXPECT_EQ(0u, gtSystemInfo.SubSliceCount); EXPECT_EQ(0u, gtSystemInfo.DualSubSliceCount); EXPECT_EQ(0u, gtSystemInfo.EUCount); } using SklHwInfo = ::testing::Test; SKLTEST_F(SklHwInfo, givenBoolWhenCallSklHardwareInfoSetupThenFeatureTableAndWorkaroundTableAreSetCorrect) { uint64_t configs[] = { 0x100030008, 0x200030008, 0x300030008, 0x100020006, 0x100030006}; bool boolValue[]{ true, false}; HardwareInfo hwInfo = *defaultHwInfo; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; FeatureTable &featureTable = hwInfo.featureTable; WorkaroundTable &workaroundTable = hwInfo.workaroundTable; PLATFORM &pPlatform = hwInfo.platform; for (auto &config : configs) { for (auto setParamBool : boolValue) { gtSystemInfo = {0}; featureTable = {}; workaroundTable = {}; pPlatform.usRevId = 9; hardwareInfoSetup[productFamily](&hwInfo, setParamBool, config); EXPECT_EQ(setParamBool, featureTable.flags.ftrGpGpuMidBatchPreempt); EXPECT_EQ(setParamBool, featureTable.flags.ftrGpGpuThreadGroupLevelPreempt); EXPECT_EQ(setParamBool, featureTable.flags.ftrL3IACoherency); EXPECT_EQ(setParamBool, featureTable.flags.ftrGpGpuMidThreadLevelPreempt); EXPECT_EQ(setParamBool, featureTable.flags.ftr3dMidBatchPreempt); EXPECT_EQ(setParamBool, featureTable.flags.ftr3dObjectLevelPreempt); EXPECT_EQ(setParamBool, featureTable.flags.ftrPerCtxtPreemptionGranularityControl); EXPECT_EQ(setParamBool, featureTable.flags.ftrPPGTT); EXPECT_EQ(setParamBool, featureTable.flags.ftrSVM); EXPECT_EQ(setParamBool, featureTable.flags.ftrIA32eGfxPTEs); EXPECT_EQ(setParamBool, featureTable.flags.ftrDisplayYTiling); EXPECT_EQ(setParamBool, featureTable.flags.ftrTranslationTable); EXPECT_EQ(setParamBool, featureTable.flags.ftrUserModeTranslationTable); EXPECT_EQ(setParamBool, featureTable.flags.ftrEnableGuC); EXPECT_EQ(setParamBool, featureTable.flags.ftrFbc); EXPECT_EQ(setParamBool, featureTable.flags.ftrFbc2AddressTranslation); EXPECT_EQ(setParamBool, featureTable.flags.ftrFbcBlitterTracking); EXPECT_EQ(setParamBool, featureTable.flags.ftrFbcCpuTracking); EXPECT_EQ(setParamBool, featureTable.flags.ftrTileY); EXPECT_EQ(setParamBool, workaroundTable.flags.waEnablePreemptionGranularityControlByUMD); EXPECT_EQ(setParamBool, workaroundTable.flags.waSendMIFLUSHBeforeVFE); EXPECT_EQ(setParamBool, workaroundTable.flags.waReportPerfCountUseGlobalContextID); EXPECT_EQ(setParamBool, workaroundTable.flags.waDisableLSQCROPERFforOCL); EXPECT_EQ(setParamBool, workaroundTable.flags.waMsaa8xTileYDepthPitchAlignment); EXPECT_EQ(setParamBool, workaroundTable.flags.waLosslessCompressionSurfaceStride); EXPECT_EQ(setParamBool, workaroundTable.flags.waFbcLinearSurfaceStride); EXPECT_EQ(setParamBool, workaroundTable.flags.wa4kAlignUVOffsetNV12LinearSurface); EXPECT_EQ(setParamBool, workaroundTable.flags.waEncryptedEdramOnlyPartials); EXPECT_EQ(setParamBool, workaroundTable.flags.waDisableEdramForDisplayRT); EXPECT_EQ(setParamBool, workaroundTable.flags.waForcePcBbFullCfgRestore); EXPECT_EQ(setParamBool, workaroundTable.flags.waSamplerCacheFlushBetweenRedescribedSurfaceReads); EXPECT_EQ(false, workaroundTable.flags.waCompressedResourceRequiresConstVA21); EXPECT_EQ(false, workaroundTable.flags.waDisablePerCtxtPreemptionGranularityControl); EXPECT_EQ(false, workaroundTable.flags.waModifyVFEStateAfterGPGPUPreemption); EXPECT_EQ(false, workaroundTable.flags.waCSRUncachable); pPlatform.usRevId = 1; workaroundTable = {}; featureTable = {}; hardwareInfoSetup[productFamily](&hwInfo, true, config); EXPECT_EQ(true, workaroundTable.flags.waCompressedResourceRequiresConstVA21); EXPECT_EQ(true, workaroundTable.flags.waDisablePerCtxtPreemptionGranularityControl); EXPECT_EQ(true, workaroundTable.flags.waModifyVFEStateAfterGPGPUPreemption); EXPECT_EQ(true, workaroundTable.flags.waCSRUncachable); } } } compute-runtime-22.14.22890/opencl/test/unit_test/gen9/skl/test_sample_skl.cpp000066400000000000000000000006611422164147700271620ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" using namespace NEO; typedef Test SkylakeOnlyTest; SKLTEST_F(SkylakeOnlyTest, WhenGettingProductFamilyThenSkylakeIsReturned) { EXPECT_EQ(IGFX_SKYLAKE, pDevice->getHardwareInfo().platform.eProductFamily); } compute-runtime-22.14.22890/opencl/test/unit_test/gen9/skl/windows/000077500000000000000000000000001422164147700247545ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen9/skl/windows/CMakeLists.txt000066400000000000000000000005131422164147700275130ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen9_skl_windows ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_skl_windows.cpp ) if(WIN32) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen9_skl_windows}) endif() compute-runtime-22.14.22890/opencl/test/unit_test/gen9/skl/windows/test_device_caps_skl_windows.cpp000066400000000000000000000024121422164147700334060ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" using namespace NEO; typedef Test SklDeviceCapsWindows; SKLTEST_F(SklDeviceCapsWindows, GivenWhenGettingKmdNotifyPropertiesThenItIsDisabled) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableKmdNotify); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableQuickKmdSleep); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableQuickKmdSleepForSporadicWaits); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepForSporadicWaitsMicroseconds); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableQuickKmdSleepForDirectSubmission); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepForDirectSubmissionMicroseconds); } compute-runtime-22.14.22890/opencl/test/unit_test/gen9/test_device_caps_gen9.cpp000066400000000000000000000077131422164147700274330ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_helper.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" using namespace NEO; typedef Test Gen9DeviceCaps; GEN9TEST_F(Gen9DeviceCaps, WhenCheckingExtensionStringThenFp64CorrectlyReported) { const auto &caps = pClDevice->getDeviceInfo(); std::string extensionString = caps.deviceExtensions; if (pDevice->getHardwareInfo().capabilityTable.ftrSupportsFP64) { EXPECT_NE(std::string::npos, extensionString.find(std::string("cl_khr_fp64"))); EXPECT_NE(0u, caps.doubleFpConfig); } else { EXPECT_EQ(std::string::npos, extensionString.find(std::string("cl_khr_fp64"))); EXPECT_EQ(0u, caps.doubleFpConfig); } } GEN9TEST_F(Gen9DeviceCaps, givenGen9WhenCheckExtensionsThenDeviceProperlyReportsClKhrSubgroupsExtension) { const auto &caps = pClDevice->getDeviceInfo(); if (pClDevice->areOcl21FeaturesEnabled()) { EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_khr_subgroups"))); } else { EXPECT_FALSE(hasSubstr(caps.deviceExtensions, std::string("cl_khr_subgroups"))); } } GEN9TEST_F(Gen9DeviceCaps, givenGen9WhenCheckingCapsThenDeviceDoesProperlyReportsIndependentForwardProgress) { const auto &caps = pClDevice->getDeviceInfo(); if (pClDevice->areOcl21FeaturesEnabled()) { EXPECT_TRUE(caps.independentForwardProgress != 0); } else { EXPECT_FALSE(caps.independentForwardProgress != 0); } } GEN9TEST_F(Gen9DeviceCaps, WhenGettingDeviceInfoThenCorrectlyRoundedDivideSqrtIsEnabled) { const auto &caps = pClDevice->getDeviceInfo(); EXPECT_NE(0u, caps.singleFpConfig & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT); } GEN9TEST_F(Gen9DeviceCaps, GivenDefaultWhenCheckingPreemptionModeThenMidThreadIsSupported) { EXPECT_EQ(PreemptionMode::MidThread, pDevice->getHardwareInfo().capabilityTable.defaultPreemptionMode); } GEN9TEST_F(Gen9DeviceCaps, WhenCheckingCompressionThenItIsDisabled) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftrRenderCompressedBuffers); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftrRenderCompressedImages); } GEN9TEST_F(Gen9DeviceCaps, givenHwInfoWhenRequestedComputeUnitsUsedForScratchThenReturnValidValue) { const auto &hwInfo = pDevice->getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); uint32_t expectedValue = hwInfo.gtSystemInfo.MaxSubSlicesSupported * hwInfo.gtSystemInfo.MaxEuPerSubSlice * hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount; EXPECT_EQ(expectedValue, hwHelper.getComputeUnitsUsedForScratch(&hwInfo)); EXPECT_EQ(expectedValue, pDevice->getDeviceInfo().computeUnitsUsedForScratch); } GEN9TEST_F(Gen9DeviceCaps, givenHwInfoWhenRequestedMaxFrontEndThreadsThenReturnValidValue) { const auto &hwInfo = pDevice->getHardwareInfo(); EXPECT_EQ(HwHelper::getMaxThreadsForVfe(hwInfo), pDevice->getDeviceInfo().maxFrontEndThreads); } GEN9TEST_F(Gen9DeviceCaps, givenHwInfoWhenSlmSizeIsRequiredThenReturnCorrectValue) { EXPECT_EQ(64u, pDevice->getHardwareInfo().capabilityTable.slmSize); } GEN9TEST_F(Gen9DeviceCaps, givenGen9WhenCheckSupportCacheFlushAfterWalkerThenFalse) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.supportCacheFlushAfterWalker); } GEN9TEST_F(Gen9DeviceCaps, givenGen9WhenCheckBlitterOperationsSupportThenReturnFalse) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.blitterOperationsSupported); } GEN9TEST_F(Gen9DeviceCaps, givenGen9WhenCheckingImageSupportThenReturnTrue) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.supportsImages); } GEN9TEST_F(Gen9DeviceCaps, givenGen9WhenCheckingMediaBlockSupportThenReturnTrue) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.supportsMediaBlock); } compute-runtime-22.14.22890/opencl/test/unit_test/gen9/test_platform_caps_gen9.cpp000066400000000000000000000016531422164147700300150ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" using namespace NEO; struct Gen9PlatformCaps : public PlatformFixture, public ::testing::Test { void SetUp() override { PlatformFixture::SetUp(); } void TearDown() override { PlatformFixture::TearDown(); } }; GEN9TEST_F(Gen9PlatformCaps, GivenPlatformWhenCheckingExtensionStringThenFp64SupportIsCorrectlyReported) { const auto &caps = pPlatform->getPlatformInfo(); if (pPlatform->getClDevice(0)->getHardwareInfo().capabilityTable.ftrSupportsFP64) { EXPECT_NE(std::string::npos, caps.extensions.find(std::string("cl_khr_fp64"))); } else { EXPECT_EQ(std::string::npos, caps.extensions.find(std::string("cl_khr_fp64"))); } } compute-runtime-22.14.22890/opencl/test/unit_test/gen9/test_preemption_gen9.cpp000066400000000000000000000374461422164147700273560ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/preemption.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/mocks/mock_command_stream_receiver.h" #include "opencl/source/helpers/cl_preemption_helper.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/cl_preemption_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" namespace NEO { template <> void HardwareParse::findCsrBaseAddress() { typedef typename GEN9::GPGPU_CSR_BASE_ADDRESS GPGPU_CSR_BASE_ADDRESS; itorGpgpuCsrBaseAddress = find(cmdList.begin(), itorWalker); if (itorGpgpuCsrBaseAddress != itorWalker) { cmdGpgpuCsrBaseAddress = *itorGpgpuCsrBaseAddress; } } } // namespace NEO using namespace NEO; using Gen9PreemptionEnqueueKernelTest = PreemptionEnqueueKernelTest; using Gen9MidThreadPreemptionEnqueueKernelTest = MidThreadPreemptionEnqueueKernelTest; using Gen9ThreadGroupPreemptionEnqueueKernelTest = ThreadGroupPreemptionEnqueueKernelTest; GEN9TEST_F(Gen9ThreadGroupPreemptionEnqueueKernelTest, givenSecondEnqueueWithTheSamePreemptionRequestThenDontReprogramThreadGroupNoWa) { pDevice->setPreemptionMode(PreemptionMode::ThreadGroup); pDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->workaroundTable.flags.waModifyVFEStateAfterGPGPUPreemption = false; auto &csr = pDevice->getUltCommandStreamReceiver(); csr.getMemoryManager()->setForce32BitAllocations(false); csr.setMediaVFEStateDirty(false); auto csrSurface = csr.getPreemptionAllocation(); EXPECT_EQ(nullptr, csrSurface); size_t off[3] = {0, 0, 0}; size_t gws[3] = {1, 1, 1}; MockKernelWithInternals mockKernel(*pClDevice); HardwareParse hwParserCsr; HardwareParse hwParserCmdQ; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); hwParserCsr.parseCommands(csr.commandStream); hwParserCmdQ.parseCommands(pCmdQ->getCS(1024)); auto offsetCsr = csr.commandStream.getUsed(); auto offsetCmdQ = pCmdQ->getCS(1024).getUsed(); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); pCmdQ->flush(); hwParserCsr.parseCommands(csr.commandStream, offsetCsr); hwParserCmdQ.parseCommands(pCmdQ->getCS(1024), offsetCmdQ); EXPECT_EQ(1U, countMmio(hwParserCsr.cmdList.begin(), hwParserCsr.cmdList.end(), 0x2580u)); EXPECT_EQ(0U, countMmio(hwParserCsr.cmdList.begin(), hwParserCsr.cmdList.end(), 0x2600u)); EXPECT_EQ(0U, countMmio(hwParserCmdQ.cmdList.begin(), hwParserCmdQ.cmdList.end(), 0x2580u)); EXPECT_EQ(0U, countMmio(hwParserCmdQ.cmdList.begin(), hwParserCmdQ.cmdList.end(), 0x2600u)); } GEN9TEST_F(Gen9ThreadGroupPreemptionEnqueueKernelTest, givenSecondEnqueueWithTheSamePreemptionRequestThenDontReprogramThreadGroupWa) { pDevice->setPreemptionMode(PreemptionMode::ThreadGroup); pDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->workaroundTable.flags.waModifyVFEStateAfterGPGPUPreemption = true; auto &csr = pDevice->getUltCommandStreamReceiver(); csr.getMemoryManager()->setForce32BitAllocations(false); csr.setMediaVFEStateDirty(false); auto csrSurface = csr.getPreemptionAllocation(); EXPECT_EQ(nullptr, csrSurface); HardwareParse hwCsrParser; HardwareParse hwCmdQParser; size_t off[3] = {0, 0, 0}; size_t gws[3] = {1, 1, 1}; MockKernelWithInternals mockKernel(*pClDevice); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); hwCsrParser.parseCommands(csr.commandStream); hwCsrParser.findHardwareCommands(); hwCmdQParser.parseCommands(pCmdQ->getCS(1024)); hwCmdQParser.findHardwareCommands(); auto offsetCsr = csr.commandStream.getUsed(); auto offsetCmdQ = pCmdQ->getCS(1024).getUsed(); bool foundOne = false; for (auto it : hwCsrParser.lriList) { auto cmd = genCmdCast(it); if (cmd->getRegisterOffset() == 0x2580u) { EXPECT_FALSE(foundOne); foundOne = true; } } EXPECT_TRUE(foundOne); hwCsrParser.cmdList.clear(); hwCsrParser.lriList.clear(); int foundWaLri = 0; int foundWaLriBegin = 0; int foundWaLriEnd = 0; for (auto it : hwCmdQParser.lriList) { auto cmd = genCmdCast(it); if (cmd->getRegisterOffset() == 0x2600u) { foundWaLri++; if (cmd->getDataDword() == 0xFFFFFFFF) { foundWaLriBegin++; } if (cmd->getDataDword() == 0x0) { foundWaLriEnd++; } } } EXPECT_EQ(2, foundWaLri); EXPECT_EQ(1, foundWaLriBegin); EXPECT_EQ(1, foundWaLriEnd); hwCmdQParser.cmdList.clear(); hwCmdQParser.lriList.clear(); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); hwCsrParser.parseCommands(csr.commandStream, offsetCsr); hwCsrParser.findHardwareCommands(); hwCmdQParser.parseCommands(pCmdQ->getCS(1024), offsetCmdQ); hwCmdQParser.findHardwareCommands(); for (auto it : hwCsrParser.lriList) { auto cmd = genCmdCast(it); EXPECT_FALSE(cmd->getRegisterOffset() == 0x2580u); } foundWaLri = 0; foundWaLriBegin = 0; foundWaLriEnd = 0; for (auto it : hwCmdQParser.lriList) { auto cmd = genCmdCast(it); if (cmd->getRegisterOffset() == 0x2600u) { foundWaLri++; if (cmd->getDataDword() == 0xFFFFFFFF) { foundWaLriBegin++; } if (cmd->getDataDword() == 0x0) { foundWaLriEnd++; } } } EXPECT_EQ(2, foundWaLri); EXPECT_EQ(1, foundWaLriBegin); EXPECT_EQ(1, foundWaLriEnd); } GEN9TEST_F(Gen9PreemptionEnqueueKernelTest, givenValidKernelForPreemptionWhenEnqueueKernelCalledThenPassDevicePreemptionModeThreadGroup) { pDevice->setPreemptionMode(PreemptionMode::ThreadGroup); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); MockKernelWithInternals mockKernel(*pClDevice); MultiDispatchInfo multiDispatch(mockKernel.mockKernel); EXPECT_EQ(PreemptionMode::ThreadGroup, ClPreemptionHelper::taskPreemptionMode(*pDevice, multiDispatch)); size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); pCmdQ->flush(); EXPECT_EQ(1, mockCsr->flushCalledCount); EXPECT_EQ(PreemptionMode::ThreadGroup, mockCsr->passedDispatchFlags.preemptionMode); } GEN9TEST_F(Gen9PreemptionEnqueueKernelTest, givenValidKernelForPreemptionWhenEnqueueKernelCalledAndBlockedThenPassDevicePreemptionModeThreadGroup) { pDevice->setPreemptionMode(PreemptionMode::ThreadGroup); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); MockKernelWithInternals mockKernel(*pClDevice); MultiDispatchInfo multiDispatch(mockKernel.mockKernel); EXPECT_EQ(PreemptionMode::ThreadGroup, ClPreemptionHelper::taskPreemptionMode(*pDevice, multiDispatch)); UserEvent userEventObj; cl_event userEvent = &userEventObj; size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 1, &userEvent, nullptr); pCmdQ->flush(); EXPECT_EQ(0, mockCsr->flushCalledCount); userEventObj.setStatus(CL_COMPLETE); pCmdQ->flush(); EXPECT_EQ(1, mockCsr->flushCalledCount); EXPECT_EQ(PreemptionMode::ThreadGroup, mockCsr->passedDispatchFlags.preemptionMode); } GEN9TEST_F(Gen9MidThreadPreemptionEnqueueKernelTest, givenSecondEnqueueWithTheSamePreemptionRequestThenDontReprogramMidThreadNoWa) { typedef typename FamilyType::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; typedef typename FamilyType::GPGPU_CSR_BASE_ADDRESS GPGPU_CSR_BASE_ADDRESS; pDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->workaroundTable.flags.waModifyVFEStateAfterGPGPUPreemption = false; auto &csr = pDevice->getUltCommandStreamReceiver(); csr.getMemoryManager()->setForce32BitAllocations(false); csr.setMediaVFEStateDirty(false); auto csrSurface = csr.getPreemptionAllocation(); ASSERT_NE(nullptr, csrSurface); HardwareParse hwCsrParser; HardwareParse hwCmdQParser; size_t off[3] = {0, 0, 0}; size_t gws[3] = {1, 1, 1}; MockKernelWithInternals mockKernel(*pClDevice); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); hwCsrParser.parseCommands(csr.commandStream); hwCsrParser.findHardwareCommands(); hwCmdQParser.parseCommands(pCmdQ->getCS(1024)); hwCmdQParser.findHardwareCommands(); auto offsetCsr = csr.commandStream.getUsed(); auto offsetCmdQ = pCmdQ->getCS(1024).getUsed(); bool foundOneLri = false; for (auto it : hwCsrParser.lriList) { auto cmdLri = genCmdCast(it); if (cmdLri->getRegisterOffset() == 0x2580u) { EXPECT_FALSE(foundOneLri); foundOneLri = true; } } EXPECT_TRUE(foundOneLri); bool foundWaLri = false; for (auto it : hwCmdQParser.lriList) { auto cmdLri = genCmdCast(it); if (cmdLri->getRegisterOffset() == 0x2600u) { foundWaLri = true; } } EXPECT_FALSE(foundWaLri); hwCsrParser.findCsrBaseAddress(); ASSERT_NE(nullptr, hwCsrParser.cmdGpgpuCsrBaseAddress); auto cmdCsr = genCmdCast(hwCsrParser.cmdGpgpuCsrBaseAddress); ASSERT_NE(nullptr, cmdCsr); EXPECT_EQ(csrSurface->getGpuAddressToPatch(), cmdCsr->getGpgpuCsrBaseAddress()); hwCsrParser.cmdList.clear(); hwCsrParser.lriList.clear(); hwCsrParser.cmdGpgpuCsrBaseAddress = nullptr; hwCmdQParser.cmdList.clear(); hwCmdQParser.lriList.clear(); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); hwCsrParser.parseCommands(csr.commandStream, offsetCsr); hwCsrParser.findHardwareCommands(); hwCmdQParser.parseCommands(pCmdQ->getCS(1024), offsetCmdQ); hwCmdQParser.findHardwareCommands(); for (auto it : hwCsrParser.lriList) { auto cmd = genCmdCast(it); EXPECT_FALSE(cmd->getRegisterOffset() == 0x2580u); } hwCsrParser.findCsrBaseAddress(); EXPECT_EQ(nullptr, hwCsrParser.cmdGpgpuCsrBaseAddress); for (auto it : hwCmdQParser.lriList) { auto cmd = genCmdCast(it); EXPECT_FALSE(cmd->getRegisterOffset() == 0x2600u); } } GEN9TEST_F(Gen9MidThreadPreemptionEnqueueKernelTest, givenSecondEnqueueWithTheSamePreemptionRequestThenDontReprogramMidThreadWa) { typedef typename FamilyType::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; typedef typename FamilyType::GPGPU_CSR_BASE_ADDRESS GPGPU_CSR_BASE_ADDRESS; pDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->workaroundTable.flags.waModifyVFEStateAfterGPGPUPreemption = true; auto &csr = pDevice->getUltCommandStreamReceiver(); csr.getMemoryManager()->setForce32BitAllocations(false); csr.setMediaVFEStateDirty(false); auto csrSurface = csr.getPreemptionAllocation(); ASSERT_NE(nullptr, csrSurface); HardwareParse hwCsrParser; HardwareParse hwCmdQParser; size_t off[3] = {0, 0, 0}; size_t gws[3] = {1, 1, 1}; MockKernelWithInternals mockKernel(*pClDevice); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); hwCsrParser.parseCommands(csr.commandStream); hwCsrParser.findHardwareCommands(); hwCmdQParser.parseCommands(pCmdQ->getCS(1024)); hwCmdQParser.findHardwareCommands(); auto offsetCsr = csr.commandStream.getUsed(); auto offsetCmdQ = pCmdQ->getCS(1024).getUsed(); bool foundOneLri = false; for (auto it : hwCsrParser.lriList) { auto cmdLri = genCmdCast(it); if (cmdLri->getRegisterOffset() == 0x2580u) { EXPECT_FALSE(foundOneLri); foundOneLri = true; } } EXPECT_TRUE(foundOneLri); int foundWaLri = 0; int foundWaLriBegin = 0; int foundWaLriEnd = 0; for (auto it : hwCmdQParser.lriList) { auto cmdLri = genCmdCast(it); if (cmdLri->getRegisterOffset() == 0x2600u) { foundWaLri++; if (cmdLri->getDataDword() == 0xFFFFFFFF) { foundWaLriBegin++; } if (cmdLri->getDataDword() == 0x0) { foundWaLriEnd++; } } } EXPECT_EQ(2, foundWaLri); EXPECT_EQ(1, foundWaLriBegin); EXPECT_EQ(1, foundWaLriEnd); hwCsrParser.findCsrBaseAddress(); ASSERT_NE(nullptr, hwCsrParser.cmdGpgpuCsrBaseAddress); auto cmdCsr = genCmdCast(hwCsrParser.cmdGpgpuCsrBaseAddress); ASSERT_NE(nullptr, cmdCsr); EXPECT_EQ(csrSurface->getGpuAddressToPatch(), cmdCsr->getGpgpuCsrBaseAddress()); hwCsrParser.cmdList.clear(); hwCsrParser.lriList.clear(); hwCsrParser.cmdGpgpuCsrBaseAddress = nullptr; hwCmdQParser.cmdList.clear(); hwCmdQParser.lriList.clear(); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); hwCsrParser.parseCommands(csr.commandStream, offsetCsr); hwCsrParser.findHardwareCommands(); hwCmdQParser.parseCommands(pCmdQ->getCS(1024), offsetCmdQ); hwCmdQParser.findHardwareCommands(); for (auto it : hwCsrParser.lriList) { auto cmd = genCmdCast(it); EXPECT_FALSE(cmd->getRegisterOffset() == 0x2580u); } hwCsrParser.findCsrBaseAddress(); EXPECT_EQ(nullptr, hwCsrParser.cmdGpgpuCsrBaseAddress); foundWaLri = 0; foundWaLriBegin = 0; foundWaLriEnd = 0; for (auto it : hwCmdQParser.lriList) { auto cmd = genCmdCast(it); if (cmd->getRegisterOffset() == 0x2600u) { foundWaLri++; if (cmd->getDataDword() == 0xFFFFFFFF) { foundWaLriBegin++; } if (cmd->getDataDword() == 0x0) { foundWaLriEnd++; } } } EXPECT_EQ(2, foundWaLri); EXPECT_EQ(1, foundWaLriBegin); EXPECT_EQ(1, foundWaLriEnd); } GEN9TEST_F(Gen9PreemptionEnqueueKernelTest, givenDisabledPreemptionWhenEnqueueKernelCalledThenPassDisabledPreemptionMode) { pDevice->setPreemptionMode(PreemptionMode::Disabled); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); MockKernelWithInternals mockKernel(*pClDevice); MultiDispatchInfo multiDispatch(mockKernel.mockKernel); EXPECT_EQ(PreemptionMode::Disabled, ClPreemptionHelper::taskPreemptionMode(*pDevice, multiDispatch)); size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); pCmdQ->flush(); EXPECT_EQ(1, mockCsr->flushCalledCount); EXPECT_EQ(PreemptionMode::Disabled, mockCsr->passedDispatchFlags.preemptionMode); } compute-runtime-22.14.22890/opencl/test/unit_test/gen9/test_sample_gen9.cpp000066400000000000000000000006361422164147700264440ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" using namespace NEO; typedef Test Gen9OnlyTest; GEN9TEST_F(Gen9OnlyTest, WhenGettingRenderCoreFamilyThenGen9CoreIsReturned) { EXPECT_EQ(IGFX_GEN9_CORE, pDevice->getRenderCoreFamily()); } compute-runtime-22.14.22890/opencl/test/unit_test/gen9/windows/000077500000000000000000000000001422164147700241635ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen9/windows/CMakeLists.txt000066400000000000000000000004761422164147700267320ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen9_windows ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/gmm_callbacks_tests_gen9.cpp ) if(WIN32) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen9_windows}) endif() compute-runtime-22.14.22890/opencl/test/unit_test/gen9/windows/gmm_callbacks_tests_gen9.cpp000066400000000000000000000011751422164147700316160ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/windows/gmm_callbacks.h" #include "shared/test/common/test_macros/test.h" using namespace NEO; typedef ::testing::Test Gen9GmmCallbacksTests; GEN9TEST_F(Gen9GmmCallbacksTests, GivenDefaultWhenNotifyingAubCaptureThenDeviceCallbackIsNotSupported) { EXPECT_EQ(0, DeviceCallbacks::notifyAubCapture(nullptr, 0, 0, false)); } GEN9TEST_F(Gen9GmmCallbacksTests, GivenDefaultWhenWritingL3AddressThenTtCallbackIsNotSupported) { EXPECT_EQ(0, TTCallbacks::writeL3Address(nullptr, 1, 2)); } compute-runtime-22.14.22890/opencl/test/unit_test/gen_common/000077500000000000000000000000001422164147700237505ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen_common/CMakeLists.txt000066400000000000000000000007161422164147700265140ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen_common ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/exclude_tests/exclude_test_declare.cpp ${CMAKE_CURRENT_SOURCE_DIR}/exclude_tests/exclude_test_exclude.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gen_commands_common_validation.h ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen_common}) add_subdirectories() compute-runtime-22.14.22890/opencl/test/unit_test/gen_common/exclude_tests/000077500000000000000000000000001422164147700266235ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gen_common/exclude_tests/exclude_test_declare.cpp000066400000000000000000000021031422164147700334720ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_info.h" #include "shared/test/common/test_macros/test.h" #include template struct ExcludeTest : ::testing::Test { void SetUp() override { EXPECT_NE(prohibitedValue, ::productFamily); } void TearDown() override { EXPECT_NE(prohibitedValue, ::productFamily); } }; using ExcludeTestBdw = ExcludeTest; HWCMDTEST_F(IGFX_GEN8_CORE, ExcludeTestBdw, givenHwCmdTestWhenBdwExcludedThenDontRunOnBdw) { EXPECT_NE(IGFX_BROADWELL, ::productFamily); } HWTEST_F(ExcludeTestBdw, givenHwTestWhenBdwExcludedThenDontRunOnBdw) { EXPECT_NE(IGFX_BROADWELL, ::productFamily); } using ExcludeTestSkl = ExcludeTest; HWCMDTEST_F(IGFX_GEN8_CORE, ExcludeTestSkl, givenHwCmdTestWhenSklExcludedThenDontRunOnSkl) { EXPECT_NE(IGFX_SKYLAKE, ::productFamily); } HWTEST_F(ExcludeTestSkl, givenHwTestWhenSklExcludedThenDontRunOnSkl) { EXPECT_NE(IGFX_SKYLAKE, ::productFamily); } compute-runtime-22.14.22890/opencl/test/unit_test/gen_common/exclude_tests/exclude_test_exclude.cpp000066400000000000000000000010371422164147700335310ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" HWTEST_EXCLUDE_PRODUCT(ExcludeTestBdw, givenHwCmdTestWhenBdwExcludedThenDontRunOnBdw, IGFX_BROADWELL); HWTEST_EXCLUDE_PRODUCT(ExcludeTestBdw, givenHwTestWhenBdwExcludedThenDontRunOnBdw, IGFX_BROADWELL); HWTEST_EXCLUDE_PRODUCT(ExcludeTestSkl, givenHwCmdTestWhenSklExcludedThenDontRunOnSkl, IGFX_SKYLAKE); HWTEST_EXCLUDE_PRODUCT(ExcludeTestSkl, givenHwTestWhenSklExcludedThenDontRunOnSkl, IGFX_SKYLAKE); compute-runtime-22.14.22890/opencl/test/unit_test/gen_common/gen_commands_common_validation.h000066400000000000000000000115421422164147700323400ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/constants.h" #include "shared/source/indirect_heap/indirect_heap.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "gtest/gtest.h" #include namespace NEO { template void validateStateBaseAddress(uint64_t indirectObjectHeapBase, uint64_t instructionHeapBaseAddress, IndirectHeap *pDSH, IndirectHeap *pIOH, IndirectHeap *pSSH, GenCmdList::iterator &startCommand, GenCmdList::iterator &endCommand, GenCmdList &cmdList, uint64_t expectedGeneralStateHeapBaseAddress) { typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; // All state should be programmed before walker auto itorCmd = find(startCommand, endCommand); ASSERT_NE(endCommand, itorCmd); auto *cmd = (STATE_BASE_ADDRESS *)*itorCmd; // Verify all addresses are getting programmed EXPECT_TRUE(cmd->getDynamicStateBaseAddressModifyEnable()); EXPECT_TRUE(cmd->getGeneralStateBaseAddressModifyEnable()); EXPECT_TRUE(cmd->getSurfaceStateBaseAddressModifyEnable()); EXPECT_TRUE(cmd->getIndirectObjectBaseAddressModifyEnable()); EXPECT_TRUE(cmd->getInstructionBaseAddressModifyEnable()); EXPECT_EQ(pDSH->getGraphicsAllocation()->getGpuAddress(), cmd->getDynamicStateBaseAddress()); // Stateless accesses require GSH.base to be 0. EXPECT_EQ(expectedGeneralStateHeapBaseAddress, cmd->getGeneralStateBaseAddress()); EXPECT_EQ(pSSH->getGraphicsAllocation()->getGpuAddress(), cmd->getSurfaceStateBaseAddress()); EXPECT_EQ(pIOH->getGraphicsAllocation()->getGpuBaseAddress(), cmd->getIndirectObjectBaseAddress()); EXPECT_EQ(instructionHeapBaseAddress, cmd->getInstructionBaseAddress()); // Verify all sizes are getting programmed EXPECT_TRUE(cmd->getDynamicStateBufferSizeModifyEnable()); EXPECT_TRUE(cmd->getGeneralStateBufferSizeModifyEnable()); EXPECT_TRUE(cmd->getIndirectObjectBufferSizeModifyEnable()); EXPECT_TRUE(cmd->getInstructionBufferSizeModifyEnable()); EXPECT_EQ(pDSH->getMaxAvailableSpace(), cmd->getDynamicStateBufferSize() * MemoryConstants::pageSize); EXPECT_NE(0u, cmd->getGeneralStateBufferSize()); EXPECT_EQ(MemoryConstants::sizeOf4GBinPageEntities, cmd->getIndirectObjectBufferSize()); EXPECT_EQ(MemoryConstants::sizeOf4GBinPageEntities, cmd->getInstructionBufferSize()); // Generically validate this command FamilyType::PARSE::template validateCommand(cmdList.begin(), itorCmd); } template void validateL3Programming(GenCmdList &cmdList, GenCmdList::iterator &itorWalker) { typedef typename FamilyType::PARSE PARSE; typedef typename PARSE::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; auto itorCmd = findMmio(cmdList.begin(), itorWalker, L3CNTLRegisterOffset::registerOffset); if (UnitTestHelper::isL3ConfigProgrammable()) { // All state should be programmed before walker ASSERT_NE(itorWalker, itorCmd); auto *cmd = genCmdCast(*itorCmd); ASSERT_NE(nullptr, cmd); auto registerOffset = L3CNTLRegisterOffset::registerOffset; EXPECT_EQ(registerOffset, cmd->getRegisterOffset()); auto l3Cntlreg = cmd->getDataDword(); auto numURBWays = (l3Cntlreg >> 1) & 0x7f; auto L3ClientPool = (l3Cntlreg >> 25) & 0x7f; EXPECT_NE(0u, numURBWays); EXPECT_NE(0u, L3ClientPool); } else { ASSERT_EQ(itorWalker, itorCmd); } } template void validateMediaVFEState(const HardwareInfo *hwInfo, void *cmdMediaVfeState, GenCmdList &cmdList, GenCmdList::iterator itorMediaVfeState) { typedef typename FamilyType::MEDIA_VFE_STATE MEDIA_VFE_STATE; auto *cmd = (MEDIA_VFE_STATE *)cmdMediaVfeState; ASSERT_NE(nullptr, cmd); uint32_t threadPerEU = (hwInfo->gtSystemInfo.ThreadCount / hwInfo->gtSystemInfo.EUCount) + hwInfo->capabilityTable.extraQuantityThreadsPerEU; uint32_t expected = hwInfo->gtSystemInfo.EUCount * threadPerEU; EXPECT_EQ(expected, cmd->getMaximumNumberOfThreads()); EXPECT_NE(0u, cmd->getNumberOfUrbEntries()); EXPECT_NE(0u, cmd->getUrbEntryAllocationSize()); EXPECT_EQ(0u, cmd->getScratchSpaceBasePointer()); EXPECT_EQ(0u, cmd->getPerThreadScratchSpace()); EXPECT_EQ(0u, cmd->getStackSize()); // Generically validate this command FamilyType::PARSE::template validateCommand(cmdList.begin(), itorMediaVfeState); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/gmm_helper/000077500000000000000000000000001422164147700237465ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gmm_helper/CMakeLists.txt000066400000000000000000000004631422164147700265110ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gmm_helper ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/gmm_helper_tests.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gmm_helper}) add_subdirectories() compute-runtime-22.14.22890/opencl/test/unit_test/gmm_helper/gmm_helper_tests.cpp000066400000000000000000002323051422164147700300200ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/gmm_helper/cache_settings_helper.h" #include "shared/source/gmm_helper/client_context/gmm_client_context.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/memory_manager/os_agnostic_memory_manager.h" #include "shared/source/os_interface/device_factory.h" #include "shared/source/sku_info/operations/sku_info_transfer.h" #include "shared/test/common/fixtures/mock_execution_environment_gmm_fixture.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/mocks/mock_gmm.h" #include "shared/test/common/mocks/mock_gmm_client_context.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/helpers/gmm_types_converter.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "GL/gl.h" #include "GL/glext.h" #include "igfxfmid.h" using MockExecutionEnvironmentGmmFixtureTest = Test; using namespace ::testing; namespace NEO { extern GMM_INIT_IN_ARGS passedInputArgs; extern SKU_FEATURE_TABLE passedFtrTable; extern WA_TABLE passedWaTable; extern bool copyInputArgs; struct GmmTests : public MockExecutionEnvironmentGmmFixtureTest { void SetUp() override { MockExecutionEnvironmentGmmFixture::SetUp(); rootDeviceEnvironment = executionEnvironment->rootDeviceEnvironments[0].get(); localPlatformDevice = rootDeviceEnvironment->getMutableHardwareInfo(); } RootDeviceEnvironment *rootDeviceEnvironment = nullptr; HardwareInfo *localPlatformDevice = nullptr; }; TEST(GmmGlTests, givenGmmWhenAskedforCubeFaceIndexThenProperValueIsReturned) { std::vector> v = {{__GMM_CUBE_FACE_NEG_X, GL_TEXTURE_CUBE_MAP_NEGATIVE_X}, {__GMM_CUBE_FACE_POS_X, GL_TEXTURE_CUBE_MAP_POSITIVE_X}, {__GMM_CUBE_FACE_NEG_Y, GL_TEXTURE_CUBE_MAP_NEGATIVE_Y}, {__GMM_CUBE_FACE_POS_Y, GL_TEXTURE_CUBE_MAP_POSITIVE_Y}, {__GMM_CUBE_FACE_NEG_Z, GL_TEXTURE_CUBE_MAP_NEGATIVE_Z}, {__GMM_CUBE_FACE_POS_Z, GL_TEXTURE_CUBE_MAP_POSITIVE_Z}}; uint32_t maxVal = 0; for (auto p : v) { EXPECT_TRUE(p.first == GmmTypesConverter::getCubeFaceIndex(p.second)); maxVal = std::max(maxVal, p.second); } maxVal++; EXPECT_TRUE(__GMM_NO_CUBE_MAP == GmmTypesConverter::getCubeFaceIndex(maxVal)); } TEST_F(GmmTests, WhenGmmIsCreatedThenAllResourceAreCreated) { std::unique_ptr mm(new MemoryManagerCreate(false, false, *executionEnvironment)); void *pSysMem = mm->allocateSystemMemory(4096, 4096); std::unique_ptr gmm(new Gmm(getGmmClientContext(), pSysMem, 4096, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); ASSERT_TRUE(gmm->gmmResourceInfo.get() != nullptr); void *pGmmSysMem = gmm->gmmResourceInfo->getSystemMemPointer(); EXPECT_EQ(gmm->resourceParams.Flags.Gpu.NoRestriction, 0u); EXPECT_TRUE(pSysMem == pGmmSysMem); mm->freeSystemMemory(pSysMem); } TEST_F(GmmTests, GivenUncacheableWhenGmmIsCreatedThenAllResourceAreCreated) { std::unique_ptr mm(new MemoryManagerCreate(false, false, *executionEnvironment)); void *pSysMem = mm->allocateSystemMemory(4096, 4096); std::unique_ptr gmm(new Gmm(getGmmClientContext(), pSysMem, 4096, 0, GMM_RESOURCE_USAGE_OCL_BUFFER_CSR_UC, false, {}, true)); ASSERT_TRUE(gmm->gmmResourceInfo.get() != nullptr); void *pGmmSysMem = gmm->gmmResourceInfo->getSystemMemPointer(); EXPECT_EQ(gmm->resourceParams.Flags.Gpu.NoRestriction, 0u); EXPECT_TRUE(pSysMem == pGmmSysMem); EXPECT_EQ(GMM_RESOURCE_USAGE_OCL_BUFFER_CSR_UC, gmm->resourceParams.Usage); mm->freeSystemMemory(pSysMem); } TEST_F(GmmTests, givenHostPointerWithHighestBitSetWhenGmmIsCreatedThenItHasTheSameAddress) { uintptr_t addressWithHighestBitSet = 0xffff0000; auto address = reinterpret_cast(addressWithHighestBitSet); auto expectedAddress = castToUint64(address); std::unique_ptr gmm(new Gmm(getGmmClientContext(), address, 4096, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); EXPECT_EQ(gmm->resourceParams.pExistingSysMem, expectedAddress); } TEST_F(GmmTests, GivenBufferSizeLargerThenMaxPitchWhenAskedForGmmCreationThenGmmResourceIsCreatedWithNoRestrictionsFlag) { auto maxSize = static_cast(GmmHelper::maxPossiblePitch); MemoryManager *mm = new MemoryManagerCreate(false, false, *executionEnvironment); void *pSysMem = mm->allocateSystemMemory(4096, 4096); auto gmmRes = new Gmm(getGmmClientContext(), pSysMem, maxSize, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true); ASSERT_TRUE(gmmRes->gmmResourceInfo.get() != nullptr); EXPECT_EQ(gmmRes->resourceParams.Flags.Gpu.NoRestriction, 1u); mm->freeSystemMemory(pSysMem); delete gmmRes; delete mm; } TEST_F(GmmTests, givenGmmCreatedFromExistingGmmThenHelperDoesNotReleaseParentGmm) { auto size = 4096u; void *incomingPtr = (void *)0x1000; auto gmmRes = new Gmm(getGmmClientContext(), incomingPtr, size, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true); auto gmmRes2 = new Gmm(getGmmClientContext(), gmmRes->gmmResourceInfo->peekGmmResourceInfo()); // copy is being made EXPECT_NE(gmmRes2->gmmResourceInfo->peekHandle(), gmmRes->gmmResourceInfo->peekGmmResourceInfo()); auto allocationSize = gmmRes->gmmResourceInfo->getSizeAllocation(); EXPECT_NE(0u, allocationSize); EXPECT_EQ(allocationSize, gmmRes2->gmmResourceInfo->getSizeAllocation()); // now delete parent GMM and query child, this shouldn't fail delete gmmRes; EXPECT_EQ(allocationSize, gmmRes2->gmmResourceInfo->getSizeAllocation()); delete gmmRes2; } TEST_F(GmmTests, GivenInvalidImageSizeWhenQueryingImgParamsThenImageInfoReturnsSizeZero) { ImageDescriptor imgDesc = {}; imgDesc.imageType = ImageType::Image1D; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); auto queryGmm = MockGmm::queryImgParams(getGmmClientContext(), imgInfo, false); EXPECT_EQ(imgInfo.size, 0u); } TEST_F(GmmTests, GivenInvalidImageTypeWhenQueryingImgParamsThenExceptionIsThrown) { ImageDescriptor imgDesc = {}; imgDesc.imageWidth = 10; imgDesc.imageType = ImageType::Invalid; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); EXPECT_THROW(MockGmm::queryImgParams(getGmmClientContext(), imgInfo, false), std::exception); } TEST_F(GmmTests, WhenQueryingImgParamsThenCorrectValuesAreReturned) { const HardwareInfo *hwinfo = defaultHwInfo.get(); ImageDescriptor imgDesc = {}; imgDesc.imageType = ImageType::Image3D; imgDesc.imageWidth = 17; imgDesc.imageHeight = 17; imgDesc.imageDepth = 17; size_t pixelSize = 4; size_t minSize = imgDesc.imageWidth * imgDesc.imageHeight * imgDesc.imageDepth * pixelSize; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); auto queryGmm = MockGmm::queryImgParams(getGmmClientContext(), imgInfo, false); EXPECT_GT(imgInfo.size, minSize); EXPECT_GT(imgInfo.rowPitch, 0u); EXPECT_GT(imgInfo.slicePitch, 0u); if (hwinfo->platform.eRenderCoreFamily == IGFX_GEN8_CORE) { EXPECT_EQ(imgInfo.qPitch, 0u); } else { EXPECT_GT(imgInfo.qPitch, 0u); } auto &hwHelper = HwHelper::get(hwinfo->platform.eRenderCoreFamily); EXPECT_EQ(queryGmm->resourceParams.Type, GMM_RESOURCE_TYPE::RESOURCE_3D); EXPECT_EQ(queryGmm->resourceParams.NoGfxMemory, 1u); EXPECT_EQ(queryGmm->resourceParams.Usage, GMM_RESOURCE_USAGE_TYPE::GMM_RESOURCE_USAGE_OCL_IMAGE); EXPECT_EQ(static_cast(queryGmm->resourceParams.Format), static_cast(GMM_RESOURCE_FORMAT::GMM_FORMAT_R8G8B8A8_UNORM)); EXPECT_EQ(queryGmm->resourceParams.Flags.Gpu.Texture, 1u); EXPECT_EQ(queryGmm->resourceParams.BaseWidth64, 17u); EXPECT_EQ(queryGmm->resourceParams.BaseHeight, 17u); EXPECT_EQ(queryGmm->resourceParams.Depth, 17u); EXPECT_EQ(queryGmm->resourceParams.ArraySize, 1u); EXPECT_EQ(!!queryGmm->resourceParams.Flags.Wa.__ForceOtherHVALIGN4, hwHelper.hvAlign4Required()); } TEST_F(GmmTests, givenWidthWhenCreatingResourceThenSetWidth64Field) { const void *dummyPtr = reinterpret_cast(0x123); size_t allocationSize = std::numeric_limits::max(); Gmm gmm(getGmmClientContext(), dummyPtr, allocationSize, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true); EXPECT_EQ(static_cast(allocationSize), gmm.resourceParams.BaseWidth64); } TEST_F(GmmTests, givenNullptrWhenGmmConstructorIsCalledThenNoGfxMemoryIsProperlySet) { void *pSysMem = nullptr; std::unique_ptr gmm(new Gmm(getGmmClientContext(), pSysMem, 4096, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); EXPECT_EQ(gmm->resourceParams.NoGfxMemory, 1u); } HWTEST_F(GmmTests, givenGmmWithForceLocalMemThenNonLocalIsSetToFalse) { void *pSysMem = nullptr; std::unique_ptr gmm(new Gmm(getGmmClientContext(), pSysMem, 4096, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); EXPECT_EQ(gmm->resourceParams.Flags.Info.NonLocalOnly, 0u); } TEST_F(GmmTests, givenPtrWhenGmmConstructorIsCalledThenNoGfxMemoryIsProperlySet) { void *pSysMem = reinterpret_cast(0x1111); std::unique_ptr gmm(new Gmm(getGmmClientContext(), pSysMem, 4096, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); EXPECT_EQ(gmm->resourceParams.NoGfxMemory, 0u); } TEST_F(GmmTests, given2DimageFromBufferParametersWhenGmmResourceIsCreatedThenItHasDesiredPitchAndSize) { ImageDescriptor imgDesc = {}; imgDesc.imageType = ImageType::Image2D; imgDesc.imageWidth = 329; imgDesc.imageHeight = 349; imgDesc.imageDepth = 1; imgDesc.imageRowPitch = 5312; imgDesc.fromParent = true; SurfaceFormatInfo surfaceFormat = {GMM_FORMAT_R32G32B32A32_FLOAT_TYPE, (GFX3DSTATE_SURFACEFORMAT)0, 0, 4, 4, 16}; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, &surfaceFormat); auto queryGmm = MockGmm::queryImgParams(getGmmClientContext(), imgInfo, false); auto renderSize = queryGmm->gmmResourceInfo->getSizeAllocation(); size_t expectedSize = imgDesc.imageRowPitch * imgDesc.imageHeight; EXPECT_GE(renderSize, expectedSize); EXPECT_EQ(imgDesc.imageRowPitch, queryGmm->gmmResourceInfo->getRenderPitch()); } TEST_F(GmmTests, given2DimageFromBufferParametersWhenGmmResourceIsCreatedAndPitchIsOverridenThenItHasDesiredPitchAndSize) { ImageDescriptor imgDesc = {}; imgDesc.imageType = ImageType::Image2D; imgDesc.imageWidth = 329; imgDesc.imageHeight = 349; imgDesc.imageDepth = 1; imgDesc.imageRowPitch = 5376; imgDesc.fromParent = true; SurfaceFormatInfo surfaceFormat = {GMM_FORMAT_R32G32B32A32_FLOAT_TYPE, (GFX3DSTATE_SURFACEFORMAT)0, 0, 4, 4, 16}; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, &surfaceFormat); EXPECT_EQ(imgInfo.imgDesc.imageRowPitch, imgDesc.imageRowPitch); auto queryGmm = MockGmm::queryImgParams(getGmmClientContext(), imgInfo, false); auto renderSize = queryGmm->gmmResourceInfo->getSizeAllocation(); size_t expectedSize = imgDesc.imageRowPitch * imgDesc.imageHeight; EXPECT_GE(renderSize, expectedSize); EXPECT_EQ(imgDesc.imageRowPitch, queryGmm->gmmResourceInfo->getRenderPitch()); } TEST_F(GmmTests, givenPlanarFormatsWhenQueryingImageParamsThenUvOffsetIsQueried) { ImageDescriptor imgDesc = {}; imgDesc.imageType = ImageType::Image2D; imgDesc.imageHeight = 4; imgDesc.imageWidth = 4; imgDesc.imageDepth = 1; SurfaceFormatInfo surfaceFormatNV12 = {GMM_FORMAT_NV12, GFX3DSTATE_SURFACEFORMAT_PLANAR_420_8, 0, 1, 1, 1}; SurfaceFormatInfo surfaceFormatP010 = {GMM_FORMAT_P010, GFX3DSTATE_SURFACEFORMAT_PLANAR_420_8, 0, 1, 2, 2}; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, &surfaceFormatNV12); imgInfo.yOffsetForUVPlane = 0; MockGmm::queryImgParams(getGmmClientContext(), imgInfo, false); EXPECT_NE(0u, imgInfo.yOffsetForUVPlane); imgInfo = MockGmm::initImgInfo(imgDesc, 0, &surfaceFormatP010); imgInfo.yOffsetForUVPlane = 0; MockGmm::queryImgParams(getGmmClientContext(), imgInfo, false); EXPECT_NE(0u, imgInfo.yOffsetForUVPlane); } TEST_F(GmmTests, givenTilingModeSetToTileYWhenHwSupportsTilingThenTileYFlagIsSet) { ImageDescriptor imgDesc = {}; imgDesc.imageType = ImageType::Image2D; imgDesc.imageWidth = 4; imgDesc.imageHeight = 4; imgDesc.imageDepth = 1; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); imgInfo.linearStorage = false; auto gmm = std::make_unique(getGmmClientContext(), imgInfo, StorageInfo{}, false); EXPECT_EQ(gmm->resourceParams.Flags.Info.Linear, 0u); EXPECT_EQ(gmm->resourceParams.Flags.Info.TiledY, 0u); } TEST_F(GmmTests, givenTilingModeSetToNonTiledWhenCreatingGmmThenLinearFlagIsSet) { ImageDescriptor imgDesc = {}; imgDesc.imageType = ImageType::Image2D; imgDesc.imageWidth = 4; imgDesc.imageHeight = 4; imgDesc.imageDepth = 1; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); imgInfo.linearStorage = true; auto gmm = std::make_unique(getGmmClientContext(), imgInfo, StorageInfo{}, false); EXPECT_EQ(gmm->resourceParams.Flags.Info.Linear, 1u); EXPECT_EQ(gmm->resourceParams.Flags.Info.TiledY, 0u); } TEST_F(GmmTests, givenZeroRowPitchWhenQueryImgFromBufferParamsThenCalculate) { MockGraphicsAllocation bufferAllocation(nullptr, 4096); ImageDescriptor imgDesc = {}; imgDesc.imageType = ImageType::Image2D; imgDesc.imageWidth = 5; imgDesc.imageHeight = 5; imgDesc.imageRowPitch = 0; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); size_t expectedRowPitch = imgDesc.imageWidth * imgInfo.surfaceFormat->ImageElementSizeInBytes; GmmTypesConverter::queryImgFromBufferParams(imgInfo, &bufferAllocation); EXPECT_EQ(imgInfo.rowPitch, expectedRowPitch); } TEST_F(GmmTests, givenNonZeroRowPitchWhenQueryImgFromBufferParamsThenUseUserValue) { MockGraphicsAllocation bufferAllocation(nullptr, 4096); ImageDescriptor imgDesc = {}; imgDesc.imageType = ImageType::Image2D; imgDesc.imageWidth = 5; imgDesc.imageHeight = 5; imgDesc.imageRowPitch = 123; size_t expectedRowPitch = imgDesc.imageRowPitch; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); GmmTypesConverter::queryImgFromBufferParams(imgInfo, &bufferAllocation); EXPECT_EQ(imgInfo.rowPitch, expectedRowPitch); } using GmmCanonizeTests = GmmTests; TEST_F(GmmCanonizeTests, WhenCanonizingThenCorrectAddressIsReturned) { auto hwInfo = *defaultHwInfo; // 48 bit - canonize to 48 bit hwInfo.capabilityTable.gpuAddressSpace = maxNBitValue(48); // 0x0000FFFFFFFFFFFF; auto gmmHelper = std::make_unique(nullptr, &hwInfo); uint64_t testAddr1 = 0x7777777777777777; uint64_t goodAddr1 = 0x0000777777777777; EXPECT_EQ(GmmHelper::canonize(testAddr1), goodAddr1); uint64_t testAddr2 = 0x7FFFFFFFFFFFFFFF; uint64_t goodAddr2 = 0xFFFFFFFFFFFFFFFF; EXPECT_EQ(GmmHelper::canonize(testAddr2), goodAddr2); // 36 bit - also canonize to 48 bit hwInfo.capabilityTable.gpuAddressSpace = maxNBitValue(36); // 0x0000000FFFFFFFFF; gmmHelper = std::make_unique(nullptr, &hwInfo); EXPECT_EQ(GmmHelper::canonize(testAddr1), goodAddr1); EXPECT_EQ(GmmHelper::canonize(testAddr2), goodAddr2); } TEST_F(GmmCanonizeTests, WhenDecanonizingThenCorrectAddressIsReturned) { auto hwInfo = *defaultHwInfo; // 48 bit - decanonize to 48 bit hwInfo.capabilityTable.gpuAddressSpace = maxNBitValue(48); // 0x0000FFFFFFFFFFFF; auto gmmHelper = std::make_unique(nullptr, &hwInfo); uint64_t testAddr1 = 0x7777777777777777; uint64_t goodAddr1 = 0x0000777777777777; EXPECT_EQ(GmmHelper::decanonize(testAddr1), goodAddr1); uint64_t testAddr2 = 0x7FFFFFFFFFFFFFFF; uint64_t goodAddr2 = 0x0000FFFFFFFFFFFF; EXPECT_EQ(GmmHelper::decanonize(testAddr2), goodAddr2); // 36 bit - also decanonize to 48 bit hwInfo.capabilityTable.gpuAddressSpace = maxNBitValue(36); // 0x0000000FFFFFFFFF; gmmHelper = std::make_unique(nullptr, &hwInfo); EXPECT_EQ(GmmHelper::decanonize(testAddr1), goodAddr1); EXPECT_EQ(GmmHelper::decanonize(testAddr2), goodAddr2); } TEST_F(GmmCanonizeTests, WhenCheckingIsValidCanonicalGpuAddressThenOnlyValidAddressesReturnTrue) { auto hwInfo = *defaultHwInfo; // 48 bit hwInfo.capabilityTable.gpuAddressSpace = maxNBitValue(48); // 0x0000FFFFFFFFFFFF; auto gmmHelper = std::make_unique(nullptr, &hwInfo); uint64_t testAddr1 = 0x0000400000000000; EXPECT_TRUE(gmmHelper->isValidCanonicalGpuAddress(testAddr1)); uint64_t testAddr2 = 0x00000f5670000000; EXPECT_TRUE(gmmHelper->isValidCanonicalGpuAddress(testAddr2)); uint64_t testAddr3 = 0x0000800000000000; EXPECT_FALSE(gmmHelper->isValidCanonicalGpuAddress(testAddr3)); uint64_t testAddr4 = 0xff00ffff00000000; EXPECT_FALSE(gmmHelper->isValidCanonicalGpuAddress(testAddr4)); // 36 bit hwInfo.capabilityTable.gpuAddressSpace = maxNBitValue(36); // 0x0000000FFFFFFFFF; gmmHelper = std::make_unique(nullptr, &hwInfo); uint64_t testAddr5 = 0x0000000400000000; EXPECT_TRUE(gmmHelper->isValidCanonicalGpuAddress(testAddr5)); uint64_t testAddr6 = 0x00000004ff000000; EXPECT_TRUE(gmmHelper->isValidCanonicalGpuAddress(testAddr6)); uint64_t testAddr7 = 0x0000000800000000; EXPECT_TRUE(gmmHelper->isValidCanonicalGpuAddress(testAddr7)); uint64_t testAddr8 = 0xff00000400000000; EXPECT_FALSE(gmmHelper->isValidCanonicalGpuAddress(testAddr8)); // 57 bit hwInfo.capabilityTable.gpuAddressSpace = maxNBitValue(57); // 0x01FFFFFFFFFFFFFFF; gmmHelper = std::make_unique(nullptr, &hwInfo); uint64_t testAddr9 = 0x0080000000000000; EXPECT_TRUE(gmmHelper->isValidCanonicalGpuAddress(testAddr9)); uint64_t testAddr10 = 0x00000004ff000000; EXPECT_TRUE(gmmHelper->isValidCanonicalGpuAddress(testAddr10)); uint64_t testAddr11 = 0x0000000800000000; EXPECT_TRUE(gmmHelper->isValidCanonicalGpuAddress(testAddr11)); uint64_t testAddr12 = 0xfe00fff400000000; EXPECT_FALSE(gmmHelper->isValidCanonicalGpuAddress(testAddr12)); uint64_t testAddr13 = 0xfe008ff400000000; EXPECT_FALSE(gmmHelper->isValidCanonicalGpuAddress(testAddr13)); } TEST_F(GmmTests, givenMipmapedInputWhenAskedForHalingThenNonDefaultValueIsReturned) { ImageDescriptor imgDesc = {}; imgDesc.imageType = ImageType::Image2D; imgDesc.imageWidth = 60; imgDesc.imageHeight = 40; imgDesc.imageDepth = 1; int mipLevel = 5; auto imgInfo = MockGmm::initImgInfo(imgDesc, mipLevel, nullptr); auto queryGmm = MockGmm::queryImgParams(getGmmClientContext(), imgInfo, false); EXPECT_EQ(static_cast(queryGmm->resourceParams.MaxLod), mipLevel); } TEST_F(GmmTests, givenNumSamplesWhenAskedForMultisamplesCountThenReturnValue) { uint32_t numSamples[5][2] = {{0, 0}, {2, 1}, {4, 2}, {8, 3}, {16, 4}}; //{given, expected} for (int i = 0; i < 5; i++) { auto result = GmmTypesConverter::getRenderMultisamplesCount(numSamples[i][0]); EXPECT_EQ(numSamples[i][1], result); } } struct GmmMediaCompressedTests : public GmmTests { void SetUp() override { GmmTests::SetUp(); StorageInfo info; gmm = std::make_unique(getGmmClientContext(), nullptr, 4, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, true, info, true); flags = gmm->gmmResourceInfo->getResourceFlags(); flags->Gpu.CCS = true; flags->Gpu.UnifiedAuxSurface = true; } std::unique_ptr gmm; GMM_RESOURCE_FLAG *flags; }; TEST_F(GmmMediaCompressedTests, givenMediaCompressedThenUnifiedAuxTranslationCapableIsTrue) { flags->Info.MediaCompressed = true; flags->Info.RenderCompressed = false; EXPECT_TRUE(gmm->unifiedAuxTranslationCapable()); } TEST_F(GmmMediaCompressedTests, givenRenderCompressedThenUnifiedAuxTranslationCapableIsTrue) { flags->Info.MediaCompressed = false; flags->Info.RenderCompressed = true; EXPECT_TRUE(gmm->unifiedAuxTranslationCapable()); } TEST_F(GmmMediaCompressedTests, givenMediaAndRenderCompressedThenUnifiedAuxTranslationCapableThrowsException) { flags->Info.MediaCompressed = true; flags->Info.RenderCompressed = true; EXPECT_THROW(gmm->unifiedAuxTranslationCapable(), std::exception); } TEST_F(GmmMediaCompressedTests, givenNotMediaAndNotRenderCompressedThenUnifiedAuxTranslationCapableIsFalse) { flags->Info.MediaCompressed = false; flags->Info.RenderCompressed = false; EXPECT_FALSE(gmm->unifiedAuxTranslationCapable()); } namespace GmmTestConst { static const cl_mem_object_type imgTypes[6] = { CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE1D_BUFFER, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE2D_ARRAY, CL_MEM_OBJECT_IMAGE3D}; } // namespace GmmTestConst TEST_F(GmmTests, WhenConvertingPlanesThenCorrectPlaneIsReturned) { std::vector> v = {{ImagePlane::NO_PLANE, GMM_YUV_PLANE::GMM_NO_PLANE}, {ImagePlane::PLANE_Y, GMM_YUV_PLANE::GMM_PLANE_Y}, {ImagePlane::PLANE_U, GMM_YUV_PLANE::GMM_PLANE_U}, {ImagePlane::PLANE_UV, GMM_YUV_PLANE::GMM_PLANE_U}, {ImagePlane::PLANE_V, GMM_YUV_PLANE::GMM_PLANE_V}}; for (auto p : v) { EXPECT_TRUE(p.second == GmmTypesConverter::convertPlane(p.first)); } } class GmmImgTest : public GmmTests, public ::testing::WithParamInterface {}; INSTANTIATE_TEST_CASE_P( GmmImgTests, GmmImgTest, testing::ValuesIn(GmmTestConst::imgTypes)); TEST_P(GmmImgTest, WhenUpdatingImgInfoAndDescThenInformationIsCorrect) { struct MyMockGmmResourceInfo : MockGmmResourceInfo { MyMockGmmResourceInfo(GMM_RESCREATE_PARAMS *resourceCreateParams) : MockGmmResourceInfo(resourceCreateParams) {} GMM_STATUS getOffset(GMM_REQ_OFFSET_INFO &reqOffsetInfo) override { givenReqInfo[getOffsetCalled] = reqOffsetInfo; getOffsetCalled++; return MockGmmResourceInfo::getOffset(reqOffsetInfo); } uint32_t getOffsetCalled = 0u; GMM_REQ_OFFSET_INFO givenReqInfo[2] = {}; }; ImageInfo updateImgInfo = {}; updateImgInfo.plane = GMM_YUV_PLANE::GMM_PLANE_U; uint32_t expectCalls = 1u; GMM_REQ_OFFSET_INFO expectedReqInfo[2] = {}; expectedReqInfo[0].ReqLock = 1; expectedReqInfo[1].ReqRender = 1; expectedReqInfo[1].Plane = updateImgInfo.plane; ImageDescriptor imgDesc = {}; imgDesc.imageType = Image::convertType(GetParam()); imgDesc.imageWidth = 60; imgDesc.imageHeight = 1; imgDesc.imageDepth = 1; imgDesc.imageArraySize = 1; cl_uint arrayIndex = 0; if (imgDesc.imageType == ImageType::Image2D || imgDesc.imageType == ImageType::Image2DArray || imgDesc.imageType == ImageType::Image3D) { imgDesc.imageHeight = 40; } if (imgDesc.imageType == ImageType::Image3D) { imgDesc.imageDepth = 5; expectCalls = 2u; expectedReqInfo[0].Slice = 1; } if (imgDesc.imageType == ImageType::Image2DArray || imgDesc.imageType == ImageType::Image1DArray) { imgDesc.imageArraySize = 5; expectCalls = 2u; arrayIndex = 2; expectedReqInfo[0].ArrayIndex = 1; expectedReqInfo[1].ArrayIndex = arrayIndex; } auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); auto queryGmm = MockGmm::queryImgParams(getGmmClientContext(), imgInfo, false); auto mockResInfo = new MyMockGmmResourceInfo(&queryGmm->resourceParams); queryGmm->gmmResourceInfo.reset(mockResInfo); queryGmm->updateImgInfoAndDesc(updateImgInfo, arrayIndex); EXPECT_EQ(expectCalls, mockResInfo->getOffsetCalled); EXPECT_EQ(imgDesc.imageWidth, updateImgInfo.imgDesc.imageWidth); EXPECT_EQ(imgDesc.imageHeight, updateImgInfo.imgDesc.imageHeight); EXPECT_EQ(imgDesc.imageDepth, updateImgInfo.imgDesc.imageDepth); EXPECT_EQ(imgDesc.imageArraySize, updateImgInfo.imgDesc.imageArraySize); EXPECT_GT(updateImgInfo.imgDesc.imageRowPitch, 0u); EXPECT_GT(updateImgInfo.imgDesc.imageSlicePitch, 0u); if (expectCalls == 1) { EXPECT_TRUE(memcmp(&expectedReqInfo[1], &mockResInfo->givenReqInfo[0], sizeof(GMM_REQ_OFFSET_INFO)) == 0); } else if (expectCalls == 2u) { EXPECT_TRUE(memcmp(&expectedReqInfo[0], &mockResInfo->givenReqInfo[0], sizeof(GMM_REQ_OFFSET_INFO)) == 0); EXPECT_TRUE(memcmp(&expectedReqInfo[1], &mockResInfo->givenReqInfo[1], sizeof(GMM_REQ_OFFSET_INFO)) == 0); } else { EXPECT_TRUE(false); } } TEST_F(GmmImgTest, givenImgInfoWhenUpdatingOffsetsThenGmmIsCalledToGetOffsets) { struct GmmGetOffsetOutput { uint32_t Offset; uint32_t XOffset; uint32_t YOffset; }; struct MyMockGmmResourceInfo : MockGmmResourceInfo { MyMockGmmResourceInfo(GMM_RESCREATE_PARAMS *resourceCreateParams) : MockGmmResourceInfo(resourceCreateParams) {} GMM_STATUS getOffset(GMM_REQ_OFFSET_INFO &reqOffsetInfo) override { EXPECT_EQ(1u, reqOffsetInfo.ReqRender); EXPECT_EQ(0u, reqOffsetInfo.Slice); EXPECT_EQ(expectedArrayIndex, reqOffsetInfo.ArrayIndex); EXPECT_EQ(expectedGmmPlane, reqOffsetInfo.Plane); reqOffsetInfo.Render.Offset = gmmGetOffsetOutput.Offset; reqOffsetInfo.Render.XOffset = gmmGetOffsetOutput.XOffset; reqOffsetInfo.Render.YOffset = gmmGetOffsetOutput.YOffset; return GMM_SUCCESS; } uint32_t getBitsPerPixel() override { return gmmGetBitsPerPixelOutput; } cl_uint expectedArrayIndex; GMM_YUV_PLANE_ENUM expectedGmmPlane; GmmGetOffsetOutput gmmGetOffsetOutput; uint32_t gmmGetBitsPerPixelOutput; }; ImageDescriptor imgDesc = {}; imgDesc.imageType = ImageType::Image2DArray; imgDesc.imageWidth = 60; imgDesc.imageHeight = 1; imgDesc.imageDepth = 1; imgDesc.imageArraySize = 10; ImageInfo imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); std::unique_ptr gmm = MockGmm::queryImgParams(getGmmClientContext(), imgInfo, false); MyMockGmmResourceInfo *mockGmmResourceInfo = new MyMockGmmResourceInfo(&gmm->resourceParams); gmm->gmmResourceInfo.reset(mockGmmResourceInfo); mockGmmResourceInfo->expectedArrayIndex = 7; mockGmmResourceInfo->expectedGmmPlane = imgInfo.plane; mockGmmResourceInfo->gmmGetOffsetOutput = {10, 111, 120}; mockGmmResourceInfo->gmmGetBitsPerPixelOutput = 24; gmm->updateOffsetsInImgInfo(imgInfo, mockGmmResourceInfo->expectedArrayIndex); EXPECT_EQ(mockGmmResourceInfo->gmmGetOffsetOutput.Offset, imgInfo.offset); const auto expectedXOffset = mockGmmResourceInfo->gmmGetOffsetOutput.XOffset / (mockGmmResourceInfo->gmmGetBitsPerPixelOutput / 8); EXPECT_EQ(expectedXOffset, imgInfo.xOffset); EXPECT_EQ(mockGmmResourceInfo->gmmGetOffsetOutput.YOffset, imgInfo.yOffset); } TEST_F(GmmTests, GivenPlaneWhenCopyingResourceBltThenResourceIsCopiedCorrectly) { ImageDescriptor imgDesc = {}; imgDesc.imageType = ImageType::Image3D; imgDesc.imageWidth = 17; imgDesc.imageHeight = 17; imgDesc.imageDepth = 17; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); auto gmm = MockGmm::queryImgParams(getGmmClientContext(), imgInfo, false); auto mockResInfo = static_cast(gmm->gmmResourceInfo.get()); GMM_RES_COPY_BLT &requestedCpuBlt = mockResInfo->requestedResCopyBlt; mockResInfo->cpuBltCalled = 0u; GMM_RES_COPY_BLT expectedCpuBlt = {}; char sys(0), gpu(0); uint32_t pitch = 300; uint32_t height = 400; uint8_t upload = 1u; expectedCpuBlt.Sys.pData = &sys; expectedCpuBlt.Gpu.pData = &gpu; expectedCpuBlt.Sys.RowPitch = pitch; expectedCpuBlt.Blt.Upload = upload; expectedCpuBlt.Sys.BufferSize = pitch * height; // plane Y auto retVal = gmm->resourceCopyBlt(&sys, &gpu, pitch, height, upload, ImagePlane::PLANE_Y); EXPECT_EQ(1u, retVal); EXPECT_TRUE(memcmp(&expectedCpuBlt, &requestedCpuBlt, sizeof(GMM_RES_COPY_BLT)) == 0); EXPECT_EQ(1u, mockResInfo->cpuBltCalled); // no-plane retVal = gmm->resourceCopyBlt(&sys, &gpu, pitch, height, upload, ImagePlane::NO_PLANE); EXPECT_EQ(1u, retVal); EXPECT_TRUE(memcmp(&expectedCpuBlt, &requestedCpuBlt, sizeof(GMM_RES_COPY_BLT)) == 0); EXPECT_EQ(2u, mockResInfo->cpuBltCalled); // plane UV expectedCpuBlt.Sys.pData = ptrOffset(&sys, height * pitch * 2u); retVal = gmm->resourceCopyBlt(&sys, &gpu, pitch, height, upload, ImagePlane::PLANE_UV); EXPECT_EQ(1u, retVal); EXPECT_TRUE(memcmp(&expectedCpuBlt, &requestedCpuBlt, sizeof(GMM_RES_COPY_BLT)) == 0); EXPECT_EQ(3u, mockResInfo->cpuBltCalled); // plane V expectedCpuBlt.Sys.pData = ptrOffset(&sys, height * pitch * 2u); expectedCpuBlt.Sys.RowPitch = pitch / 2; expectedCpuBlt.Sys.BufferSize = expectedCpuBlt.Sys.RowPitch * height; retVal = gmm->resourceCopyBlt(&sys, &gpu, pitch, height, upload, ImagePlane::PLANE_V); EXPECT_EQ(1u, retVal); EXPECT_TRUE(memcmp(&expectedCpuBlt, &requestedCpuBlt, sizeof(GMM_RES_COPY_BLT)) == 0); EXPECT_EQ(4u, mockResInfo->cpuBltCalled); // plane U expectedCpuBlt.Sys.pData = ptrOffset(&sys, height * pitch * 2u + height * pitch / 2u); expectedCpuBlt.Sys.RowPitch = pitch / 2; expectedCpuBlt.Sys.BufferSize = expectedCpuBlt.Sys.RowPitch * height; retVal = gmm->resourceCopyBlt(&sys, &gpu, pitch, height, upload, ImagePlane::PLANE_U); EXPECT_EQ(1u, retVal); EXPECT_TRUE(memcmp(&expectedCpuBlt, &requestedCpuBlt, sizeof(GMM_RES_COPY_BLT)) == 0); EXPECT_EQ(5u, mockResInfo->cpuBltCalled); } TEST_F(GmmTests, givenAllValidFlagsWhenAskedForUnifiedAuxTranslationCapabilityThenReturnTrue) { auto gmm = std::unique_ptr(new Gmm(getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); auto mockResource = reinterpret_cast(gmm->gmmResourceInfo.get()); mockResource->setUnifiedAuxTranslationCapable(); EXPECT_EQ(1u, mockResource->mockResourceCreateParams.Flags.Gpu.CCS); EXPECT_EQ(1u, mockResource->mockResourceCreateParams.Flags.Gpu.UnifiedAuxSurface); EXPECT_EQ(1u, mockResource->mockResourceCreateParams.Flags.Info.RenderCompressed); EXPECT_TRUE(gmm->unifiedAuxTranslationCapable()); } TEST_F(GmmTests, givenAlignmentValueWhenConstructingGmmThenSetAlignmentInResourceCreateObject) { const uint32_t alignment = 8096; Gmm gmm{getGmmClientContext(), nullptr, 1, alignment, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true}; EXPECT_EQ(alignment, gmm.resourceParams.BaseAlignment); } TEST_F(GmmTests, givenInvalidFlagsSetWhenAskedForUnifiedAuxTranslationCapabilityThenReturnFalse) { auto gmm = std::unique_ptr(new Gmm(getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); auto mockResource = reinterpret_cast(gmm->gmmResourceInfo.get()); mockResource->mockResourceCreateParams.Flags.Gpu.CCS = 0; mockResource->mockResourceCreateParams.Flags.Gpu.UnifiedAuxSurface = 1; mockResource->mockResourceCreateParams.Flags.Info.RenderCompressed = 1; EXPECT_FALSE(gmm->unifiedAuxTranslationCapable()); // CCS == 0 mockResource->mockResourceCreateParams.Flags.Gpu.CCS = 1; mockResource->mockResourceCreateParams.Flags.Gpu.UnifiedAuxSurface = 0; EXPECT_FALSE(gmm->unifiedAuxTranslationCapable()); // UnifiedAuxSurface == 0 mockResource->mockResourceCreateParams.Flags.Gpu.UnifiedAuxSurface = 1; mockResource->mockResourceCreateParams.Flags.Info.RenderCompressed = 0; EXPECT_FALSE(gmm->unifiedAuxTranslationCapable()); // RenderCompressed == 0 } TEST_F(GmmTests, whenLargePagesAreImplicitlyAllowedThenEnableOptimizationPadding) { size_t allocationSize = 128; Gmm gmm(getGmmClientContext(), nullptr, allocationSize, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true); EXPECT_FALSE(gmm.resourceParams.Flags.Info.NoOptimizationPadding); } TEST_F(GmmTests, whenLargePagesAreExplicitlyAllowedAndUserPtrIsNullThenAllowOptimizationPadding) { size_t allocationSize = 128; bool allowLargePages = true; Gmm gmm(getGmmClientContext(), nullptr, allocationSize, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, allowLargePages); EXPECT_FALSE(gmm.resourceParams.Flags.Info.NoOptimizationPadding); } TEST_F(GmmTests, whenLargePagesAreExplicitlyDisallowedButUserPtrIsNotNullThenAllowOptimizationPadding) { const void *dummyPtr = reinterpret_cast(0x123); size_t allocationSize = 128; bool allowLargePages = false; Gmm gmm(getGmmClientContext(), dummyPtr, allocationSize, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, allowLargePages); EXPECT_FALSE(gmm.resourceParams.Flags.Info.NoOptimizationPadding); } TEST_F(GmmTests, whenLargePagesAreExplicitlyDisallowedAndUserPtrIsNullThenDisableOptimizationPadding) { size_t allocationSize = 128; bool allowLargePages = false; Gmm gmm(getGmmClientContext(), nullptr, allocationSize, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, allowLargePages); EXPECT_TRUE(gmm.resourceParams.Flags.Info.NoOptimizationPadding); } TEST_F(GmmTests, givenSizeIsMisallignedTo64kbWhenForceDisablingLargePagesThenSizeIsPreserved) { const void *dummyPtr = reinterpret_cast(0x123); size_t allocationSize = 256U; bool allowLargePages = false; Gmm gmm(getGmmClientContext(), dummyPtr, allocationSize, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, allowLargePages); EXPECT_EQ(allocationSize, gmm.resourceParams.BaseWidth64); } TEST_F(GmmTests, givenSizeIsAllignedTo64kbWhenForceDisablingLargePagesThenSizeIsAlteredToBreak64kbAlignment) { size_t allocationSize = MemoryConstants::pageSize64k; bool allowLargePages = false; Gmm gmm(getGmmClientContext(), nullptr, allocationSize, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, allowLargePages); EXPECT_EQ(allocationSize + MemoryConstants::pageSize, gmm.resourceParams.BaseWidth64); } TEST(GmmTest, givenHwInfoWhenDeviceIsCreatedThenSetThisHwInfoToGmmHelper) { std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(nullptr)); EXPECT_EQ(&device->getHardwareInfo(), device->getGmmHelper()->getHardwareInfo()); } TEST(GmmTest, givenAllocationTypeWhenGettingUsageTypeThenReturnCorrectValue) { const auto hwInfoConfig = HwInfoConfig::get(defaultHwInfo->platform.eProductFamily); for (uint32_t i = 0; i < static_cast(AllocationType::COUNT); i++) { auto allocationType = static_cast(i); for (auto forceUncached : {true, false}) { auto usage = CacheSettingsHelper::getGmmUsageType(allocationType, forceUncached, *defaultHwInfo); auto expectedUsage = GMM_RESOURCE_USAGE_UNKNOWN; switch (allocationType) { case AllocationType::CONSTANT_SURFACE: expectedUsage = forceUncached ? GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED : GMM_RESOURCE_USAGE_OCL_BUFFER_CONST; break; case AllocationType::IMAGE: expectedUsage = forceUncached ? GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED : GMM_RESOURCE_USAGE_OCL_IMAGE; break; case AllocationType::PREEMPTION: expectedUsage = forceUncached ? GMM_RESOURCE_USAGE_OCL_BUFFER_CSR_UC : GMM_RESOURCE_USAGE_OCL_BUFFER; break; case AllocationType::INTERNAL_HEAP: case AllocationType::LINEAR_STREAM: expectedUsage = forceUncached ? GMM_RESOURCE_USAGE_OCL_SYSTEM_MEMORY_BUFFER_CACHELINE_MISALIGNED : GMM_RESOURCE_USAGE_OCL_STATE_HEAP_BUFFER; break; case AllocationType::GPU_TIMESTAMP_DEVICE_BUFFER: case AllocationType::TIMESTAMP_PACKET_TAG_BUFFER: expectedUsage = (forceUncached || hwInfoConfig->isDcFlushAllowed()) ? GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED : GMM_RESOURCE_USAGE_OCL_BUFFER; break; default: expectedUsage = forceUncached ? GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED : GMM_RESOURCE_USAGE_OCL_BUFFER; break; } EXPECT_EQ(expectedUsage, usage); } } } TEST(GmmTest, givenForceAllResourcesUncachedFlagSetWhenGettingUsageTypeThenReturnUncached) { DebugManagerStateRestore restore; DebugManager.flags.ForceAllResourcesUncached.set(true); for (uint32_t i = 0; i < static_cast(AllocationType::COUNT); i++) { auto allocationType = static_cast(i); auto usage = CacheSettingsHelper::getGmmUsageType(allocationType, false, *defaultHwInfo); auto expectedUsage = GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED; if (allocationType == AllocationType::PREEMPTION) { expectedUsage = GMM_RESOURCE_USAGE_OCL_BUFFER_CSR_UC; } else if ((allocationType == AllocationType::INTERNAL_HEAP) || (allocationType == AllocationType::LINEAR_STREAM)) { expectedUsage = GMM_RESOURCE_USAGE_OCL_SYSTEM_MEMORY_BUFFER_CACHELINE_MISALIGNED; } EXPECT_EQ(expectedUsage, usage); } } TEST(GmmTest, givenInternalHeapOrLinearStreamWhenDebugFlagIsSetThenReturnUncachedType) { DebugManagerStateRestore restore; DebugManager.flags.DisableCachingForHeaps.set(true); auto usage = CacheSettingsHelper::getGmmUsageType(AllocationType::INTERNAL_HEAP, false, *defaultHwInfo); EXPECT_EQ(GMM_RESOURCE_USAGE_OCL_SYSTEM_MEMORY_BUFFER_CACHELINE_MISALIGNED, usage); usage = CacheSettingsHelper::getGmmUsageType(AllocationType::LINEAR_STREAM, false, *defaultHwInfo); EXPECT_EQ(GMM_RESOURCE_USAGE_OCL_SYSTEM_MEMORY_BUFFER_CACHELINE_MISALIGNED, usage); } TEST(GmmTest, givenConstSurfaceWhenDebugFlagIsSetThenReturnUncachedType) { DebugManagerStateRestore restore; DebugManager.flags.ForceL1Caching.set(false); EXPECT_EQ(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED, CacheSettingsHelper::getGmmUsageType(AllocationType::CONSTANT_SURFACE, false, *defaultHwInfo)); } TEST(GmmTest, givenAllocationForStatefulAccessWhenDebugFlagIsSetThenReturnUncachedType) { DebugManagerStateRestore restore; DebugManager.flags.DisableCachingForStatefulBufferAccess.set(true); for (auto allocType : {AllocationType::BUFFER, AllocationType::BUFFER_HOST_MEMORY, AllocationType::EXTERNAL_HOST_PTR, AllocationType::FILL_PATTERN, AllocationType::INTERNAL_HOST_MEMORY, AllocationType::MAP_ALLOCATION, AllocationType::SHARED_BUFFER, AllocationType::SVM_CPU, AllocationType::SVM_GPU, AllocationType::SVM_ZERO_COPY, AllocationType::UNIFIED_SHARED_MEMORY}) { EXPECT_EQ(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED, CacheSettingsHelper::getGmmUsageType(allocType, false, *defaultHwInfo)); } } TEST_F(GmmTests, whenGmmIsCreatedAndForceAllResourcesUncachedIsSetThenResourceUsageIsSetToUncachedSurface) { DebugManagerStateRestore restorer; DebugManager.flags.ForceAllResourcesUncached = true; auto size = 4096u; void *incomingPtr = (void *)0x1000; auto gmm1 = std::make_unique(getGmmClientContext(), incomingPtr, size, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, StorageInfo{}, true); EXPECT_EQ(GMM_RESOURCE_USAGE_SURFACE_UNCACHED, gmm1->resourceParams.Usage); ImageDescriptor imgDesc = {}; imgDesc.imageType = ImageType::Image1D; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); auto gmm2 = MockGmm::queryImgParams(getGmmClientContext(), imgInfo, false); EXPECT_EQ(GMM_RESOURCE_USAGE_SURFACE_UNCACHED, gmm2->resourceParams.Usage); auto gmm3 = std::make_unique(getGmmClientContext(), gmm1->gmmResourceInfo->peekGmmResourceInfo()); EXPECT_EQ(GMM_RESOURCE_USAGE_SURFACE_UNCACHED, gmm3->resourceParams.Usage); } TEST_F(GmmTests, whenResourceIsCreatedThenHandleItsOwnership) { struct MyMockResourecInfo : public GmmResourceInfo { using GmmResourceInfo::resourceInfo; MyMockResourecInfo(GmmClientContext *clientContext, GMM_RESCREATE_PARAMS *inputParams) : GmmResourceInfo(clientContext, inputParams){}; MyMockResourecInfo(GmmClientContext *clientContext, GMM_RESOURCE_INFO *inputGmmResourceInfo) : GmmResourceInfo(clientContext, inputGmmResourceInfo){}; }; GMM_RESCREATE_PARAMS gmmParams = {}; gmmParams.Type = RESOURCE_BUFFER; gmmParams.Format = GMM_FORMAT_GENERIC_8BIT; gmmParams.BaseWidth64 = 1; gmmParams.BaseHeight = 1; gmmParams.Depth = 1; gmmParams.Flags.Info.Linear = 1; gmmParams.Flags.Info.Cacheable = 1; gmmParams.Flags.Gpu.Texture = 1; gmmParams.Usage = GMM_RESOURCE_USAGE_OCL_BUFFER; MyMockResourecInfo myMockResourceInfo1(getGmmClientContext(), &gmmParams); EXPECT_NE(nullptr, myMockResourceInfo1.resourceInfo.get()); MyMockResourecInfo myMockResourceInfo2(getGmmClientContext(), myMockResourceInfo1.resourceInfo.get()); EXPECT_NE(nullptr, myMockResourceInfo2.resourceInfo.get()); EXPECT_NE(myMockResourceInfo1.resourceInfo.get(), myMockResourceInfo2.resourceInfo.get()); } using GmmEnvironmentTest = MockExecutionEnvironmentGmmFixtureTest; TEST_F(GmmEnvironmentTest, givenGmmWithNotSetMCSInResourceInfoGpuFlagsWhenCallHasMultisampleControlSurfaceThenReturnFalse) { auto gmm = std::unique_ptr(new Gmm(getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); EXPECT_FALSE(gmm->hasMultisampleControlSurface()); } TEST_F(GmmEnvironmentTest, givenGmmWithSetMCSInResourceInfoGpuFlagsWhenCallhasMultisampleControlSurfaceThenReturnTrue) { auto gmm = std::unique_ptr(new Gmm(getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); auto mockResource = reinterpret_cast(gmm->gmmResourceInfo.get()); mockResource->setMultisampleControlSurface(); EXPECT_TRUE(gmm->hasMultisampleControlSurface()); } TEST_F(GmmEnvironmentTest, whenGmmHelperIsInitializedThenClientContextIsSet) { ASSERT_NE(nullptr, getGmmHelper()); EXPECT_NE(nullptr, getGmmClientContext()->getHandle()); } struct GmmHelperTests : MockExecutionEnvironmentGmmFixtureTest { using MockExecutionEnvironmentGmmFixture::executionEnvironment; }; TEST_F(GmmHelperTests, givenValidGmmFunctionsWhenCreateGmmHelperWithInitializedOsInterfaceThenProperParametersArePassed) { std::unique_ptr gmmHelper; DeviceFactory::prepareDeviceEnvironments(*executionEnvironment); VariableBackup passedInputArgsBackup(&passedInputArgs); VariableBackup passedFtrTableBackup(&passedFtrTable); VariableBackup passedWaTableBackup(&passedWaTable); VariableBackup copyInputArgsBackup(©InputArgs, true); auto hwInfo = defaultHwInfo.get(); SKU_FEATURE_TABLE expectedFtrTable = {}; WA_TABLE expectedWaTable = {}; SkuInfoTransfer::transferFtrTableForGmm(&expectedFtrTable, &hwInfo->featureTable); SkuInfoTransfer::transferWaTableForGmm(&expectedWaTable, &hwInfo->workaroundTable); gmmHelper.reset(new GmmHelper(executionEnvironment->rootDeviceEnvironments[0]->osInterface.get(), hwInfo)); EXPECT_EQ(0, memcmp(&hwInfo->platform, &passedInputArgs.Platform, sizeof(PLATFORM))); EXPECT_EQ(&hwInfo->gtSystemInfo, passedInputArgs.pGtSysInfo); EXPECT_EQ(0, memcmp(&expectedFtrTable, &passedFtrTable, sizeof(SKU_FEATURE_TABLE))); EXPECT_EQ(0, memcmp(&expectedWaTable, &passedWaTable, sizeof(WA_TABLE))); EXPECT_EQ(GMM_CLIENT::GMM_OCL_VISTA, passedInputArgs.ClientType); } TEST(GmmHelperTest, givenValidGmmFunctionsWhenCreateGmmHelperWithoutOsInterfaceThenInitializationDoesntCrashAndProperParametersArePassed) { std::unique_ptr gmmHelper; VariableBackup passedInputArgsBackup(&passedInputArgs); VariableBackup passedFtrTableBackup(&passedFtrTable); VariableBackup passedWaTableBackup(&passedWaTable); VariableBackup copyInputArgsBackup(©InputArgs, true); auto hwInfo = defaultHwInfo.get(); SKU_FEATURE_TABLE expectedFtrTable = {}; WA_TABLE expectedWaTable = {}; SkuInfoTransfer::transferFtrTableForGmm(&expectedFtrTable, &hwInfo->featureTable); SkuInfoTransfer::transferWaTableForGmm(&expectedWaTable, &hwInfo->workaroundTable); gmmHelper.reset(new GmmHelper(nullptr, hwInfo)); EXPECT_EQ(0, memcmp(&hwInfo->platform, &passedInputArgs.Platform, sizeof(PLATFORM))); EXPECT_EQ(&hwInfo->gtSystemInfo, passedInputArgs.pGtSysInfo); EXPECT_EQ(0, memcmp(&expectedFtrTable, &passedFtrTable, sizeof(SKU_FEATURE_TABLE))); EXPECT_EQ(0, memcmp(&expectedWaTable, &passedWaTable, sizeof(WA_TABLE))); EXPECT_EQ(GMM_CLIENT::GMM_OCL_VISTA, passedInputArgs.ClientType); } TEST(GmmHelperTest, givenGmmHelperAndL3CacheDisabledForDebugThenCorrectMOCSIsReturned) { decltype(GmmHelper::createGmmContextWrapperFunc) createGmmContextSave = GmmHelper::createGmmContextWrapperFunc; GmmHelper::createGmmContextWrapperFunc = GmmClientContext::create; std::unique_ptr gmmHelper; auto hwInfo = defaultHwInfo.get(); gmmHelper.reset(new GmmHelper(nullptr, hwInfo)); EXPECT_EQ(0u, gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED)); EXPECT_EQ(2u, gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_STATE_HEAP_BUFFER)); EXPECT_EQ(4u, gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_IMAGE)); EXPECT_EQ(4u, gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_IMAGE_FROM_BUFFER)); EXPECT_EQ(8u, gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST)); EXPECT_EQ(16u, gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER)); EXPECT_EQ(32u, gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_INLINE_CONST_HDC)); gmmHelper->forceAllResourcesUncached(); EXPECT_EQ(0u, gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED)); EXPECT_EQ(0u, gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_STATE_HEAP_BUFFER)); EXPECT_EQ(0u, gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_IMAGE)); EXPECT_EQ(0u, gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_IMAGE_FROM_BUFFER)); EXPECT_EQ(0u, gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST)); EXPECT_EQ(0u, gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER)); EXPECT_EQ(0u, gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_INLINE_CONST_HDC)); GmmHelper::createGmmContextWrapperFunc = createGmmContextSave; } TEST(GmmHelperTest, givenGmmHelperAndForceAllResourcesUncachedDebugVariableSetThenCorrectMOCSIsReturned) { decltype(GmmHelper::createGmmContextWrapperFunc) createGmmContextSave = GmmHelper::createGmmContextWrapperFunc; GmmHelper::createGmmContextWrapperFunc = GmmClientContext::create; std::unique_ptr gmmHelper; auto hwInfo = defaultHwInfo.get(); gmmHelper.reset(new GmmHelper(nullptr, hwInfo)); EXPECT_EQ(0u, gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED)); EXPECT_EQ(2u, gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_STATE_HEAP_BUFFER)); EXPECT_EQ(4u, gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_IMAGE)); EXPECT_EQ(4u, gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_IMAGE_FROM_BUFFER)); EXPECT_EQ(8u, gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST)); EXPECT_EQ(16u, gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER)); EXPECT_EQ(32u, gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_INLINE_CONST_HDC)); DebugManagerStateRestore restore; DebugManager.flags.ForceAllResourcesUncached.set(true); EXPECT_EQ(0u, gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED)); EXPECT_EQ(0u, gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_STATE_HEAP_BUFFER)); EXPECT_EQ(0u, gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_IMAGE)); EXPECT_EQ(0u, gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_IMAGE_FROM_BUFFER)); EXPECT_EQ(0u, gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST)); EXPECT_EQ(0u, gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER)); EXPECT_EQ(0u, gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_INLINE_CONST_HDC)); GmmHelper::createGmmContextWrapperFunc = createGmmContextSave; } struct GmmCompressionTests : public MockExecutionEnvironmentGmmFixtureTest { void SetUp() override { MockExecutionEnvironmentGmmFixtureTest::SetUp(); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); localPlatformDevice = executionEnvironment->rootDeviceEnvironments[0]->getMutableHardwareInfo(); localPlatformDevice->capabilityTable.ftrRenderCompressedImages = true; localPlatformDevice->capabilityTable.ftrRenderCompressedBuffers = true; localPlatformDevice->featureTable.flags.ftrLocalMemory = true; setupImgInfo(); } void setupImgInfo() { imgDesc.imageType = ImageType::Image2D; imgDesc.imageWidth = 2; imgDesc.imageHeight = 2; imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); imgInfo.useLocalMemory = true; // allowed for render compression: imgInfo.plane = GMM_YUV_PLANE::GMM_NO_PLANE; } HardwareInfo *localPlatformDevice = nullptr; ImageDescriptor imgDesc = {}; ImageInfo imgInfo = {}; }; TEST_F(GmmCompressionTests, givenEnabledAndNotPreferredE2ECWhenApplyingForBuffersThenDontSetValidFlags) { std::unique_ptr gmm(new Gmm(getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); gmm->resourceParams = {}; localPlatformDevice->capabilityTable.ftrRenderCompressedBuffers = true; gmm->applyAuxFlagsForBuffer(false); EXPECT_EQ(0u, gmm->resourceParams.Flags.Info.RenderCompressed); EXPECT_EQ(0u, gmm->resourceParams.Flags.Gpu.CCS); EXPECT_EQ(0u, gmm->resourceParams.Flags.Gpu.UnifiedAuxSurface); EXPECT_FALSE(gmm->isCompressionEnabled); } TEST_F(GmmCompressionTests, givenDisabledAndPreferredE2ECWhenApplyingForBuffersThenDontSetValidFlags) { std::unique_ptr gmm(new Gmm(getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); gmm->resourceParams = {}; localPlatformDevice->capabilityTable.ftrRenderCompressedBuffers = false; gmm->applyAuxFlagsForBuffer(true); EXPECT_EQ(0u, gmm->resourceParams.Flags.Info.RenderCompressed); EXPECT_EQ(0u, gmm->resourceParams.Flags.Gpu.CCS); EXPECT_EQ(0u, gmm->resourceParams.Flags.Gpu.UnifiedAuxSurface); EXPECT_FALSE(gmm->isCompressionEnabled); } HWTEST_F(GmmCompressionTests, givenAllValidInputsWhenQueryingThenSetAppropriateFlags) { EXPECT_TRUE(localPlatformDevice->capabilityTable.ftrRenderCompressedImages); EXPECT_TRUE(imgInfo.surfaceFormat->GMMSurfaceFormat != GMM_RESOURCE_FORMAT::GMM_FORMAT_NV12); EXPECT_TRUE(imgInfo.plane == GMM_YUV_PLANE_ENUM::GMM_NO_PLANE); auto queryGmm = MockGmm::queryImgParams(getGmmClientContext(), imgInfo, true); auto resourceFormat = queryGmm->gmmResourceInfo->getResourceFormat(); auto compressionFormat = getGmmClientContext()->getSurfaceStateCompressionFormat(resourceFormat); EXPECT_GT(compressionFormat, 0u); EXPECT_EQ(0u, queryGmm->resourceParams.Flags.Info.TiledY); EXPECT_EQ(0u, queryGmm->resourceParams.Flags.Info.Linear); EXPECT_EQ(1u, queryGmm->resourceParams.Flags.Info.RenderCompressed); EXPECT_EQ(1u, queryGmm->resourceParams.Flags.Gpu.CCS); EXPECT_EQ(1u, queryGmm->resourceParams.Flags.Gpu.UnifiedAuxSurface); EXPECT_EQ(1u, queryGmm->resourceParams.Flags.Gpu.IndirectClearColor); EXPECT_TRUE(queryGmm->isCompressionEnabled); } TEST_F(GmmCompressionTests, givenAllValidInputsAndNoLocalMemoryRequestWhenQueryingThenRenderCompressionFlagsAreNotSet) { EXPECT_TRUE(localPlatformDevice->capabilityTable.ftrRenderCompressedImages); EXPECT_TRUE(imgInfo.surfaceFormat->GMMSurfaceFormat != GMM_RESOURCE_FORMAT::GMM_FORMAT_NV12); EXPECT_TRUE(imgInfo.plane == GMM_YUV_PLANE_ENUM::GMM_NO_PLANE); imgInfo.useLocalMemory = false; auto queryGmm = MockGmm::queryImgParams(getGmmClientContext(), imgInfo, true); auto resourceFormat = queryGmm->gmmResourceInfo->getResourceFormat(); auto compressionFormat = getGmmClientContext()->getSurfaceStateCompressionFormat(resourceFormat); EXPECT_GT(compressionFormat, 0u); EXPECT_EQ(0u, queryGmm->resourceParams.Flags.Info.RenderCompressed); EXPECT_EQ(0u, queryGmm->resourceParams.Flags.Gpu.CCS); EXPECT_EQ(0u, queryGmm->resourceParams.Flags.Gpu.UnifiedAuxSurface); EXPECT_EQ(0u, queryGmm->resourceParams.Flags.Gpu.IndirectClearColor); EXPECT_FALSE(queryGmm->isCompressionEnabled); } TEST_F(GmmCompressionTests, givenNotAllowedRenderCompressionWhenQueryingThenSetAppropriateFlags) { localPlatformDevice->capabilityTable.ftrRenderCompressedImages = false; auto queryGmm = MockGmm::queryImgParams(getGmmClientContext(), imgInfo, true); EXPECT_EQ(0u, queryGmm->resourceParams.Flags.Info.Linear); EXPECT_EQ(0u, queryGmm->resourceParams.Flags.Info.RenderCompressed); EXPECT_EQ(0u, queryGmm->resourceParams.Flags.Gpu.CCS); EXPECT_EQ(0u, queryGmm->resourceParams.Flags.Gpu.UnifiedAuxSurface); EXPECT_EQ(0u, queryGmm->resourceParams.Flags.Gpu.IndirectClearColor); EXPECT_FALSE(queryGmm->isCompressionEnabled); } HWTEST_F(GmmCompressionTests, givenNotAllowedCompressionAndEnabledDebugFlagWhenQueryingThenSetAppropriateFlags) { DebugManagerStateRestore restore; DebugManager.flags.RenderCompressedImagesEnabled.set(1); localPlatformDevice->capabilityTable.ftrRenderCompressedImages = false; auto queryGmm = MockGmm::queryImgParams(getGmmClientContext(), imgInfo, true); EXPECT_EQ(0u, queryGmm->resourceParams.Flags.Info.Linear); EXPECT_EQ(1u, queryGmm->resourceParams.Flags.Info.RenderCompressed); EXPECT_EQ(1u, queryGmm->resourceParams.Flags.Gpu.CCS); EXPECT_EQ(1u, queryGmm->resourceParams.Flags.Gpu.UnifiedAuxSurface); EXPECT_EQ(1u, queryGmm->resourceParams.Flags.Gpu.IndirectClearColor); EXPECT_TRUE(queryGmm->isCompressionEnabled); DebugManager.flags.RenderCompressedImagesEnabled.set(0); localPlatformDevice->capabilityTable.ftrRenderCompressedImages = true; queryGmm = MockGmm::queryImgParams(getGmmClientContext(), imgInfo, true); EXPECT_EQ(0u, queryGmm->resourceParams.Flags.Info.RenderCompressed); EXPECT_EQ(0u, queryGmm->resourceParams.Flags.Gpu.CCS); EXPECT_EQ(0u, queryGmm->resourceParams.Flags.Gpu.UnifiedAuxSurface); EXPECT_EQ(0u, queryGmm->resourceParams.Flags.Gpu.IndirectClearColor); EXPECT_FALSE(queryGmm->isCompressionEnabled); } TEST_F(GmmCompressionTests, givenNotPreferredCompressionFlagWhenQueryingThenDisallow) { auto queryGmm = MockGmm::queryImgParams(getGmmClientContext(), imgInfo, false); EXPECT_FALSE(queryGmm->isCompressionEnabled); } TEST_F(GmmCompressionTests, givenNV12FormatWhenQueryingThenDisallow) { imgInfo.surfaceFormat = &SurfaceFormats::planarYuv()[0].surfaceFormat; EXPECT_TRUE(imgInfo.surfaceFormat->GMMSurfaceFormat == GMM_RESOURCE_FORMAT::GMM_FORMAT_NV12); auto queryGmm = MockGmm::queryImgParams(getGmmClientContext(), imgInfo, true); auto resourceFormat = queryGmm->gmmResourceInfo->getResourceFormat(); auto compressionFormat = getGmmClientContext()->getSurfaceStateCompressionFormat(resourceFormat); EXPECT_GT(compressionFormat, 0u); EXPECT_FALSE(queryGmm->isCompressionEnabled); } TEST_F(GmmCompressionTests, givenInvalidCompressionFormatAndFlatCcsFtrSetWhenQueryingThenDisallowOnGmmFlatCcsFormat) { auto mockGmmClient = static_cast(getGmmClientContext()); imgInfo.surfaceFormat = &SurfaceFormats::readOnlyDepth()[2].surfaceFormat; localPlatformDevice->featureTable.flags.ftrFlatPhysCCS = true; uint8_t validFormat = static_cast(GMM_E2ECOMP_FORMAT::GMM_E2ECOMP_FORMAT_INVALID); uint8_t invalidFormat = static_cast(GMM_FLATCCS_FORMAT::GMM_FLATCCS_FORMAT_INVALID); mockGmmClient->compressionFormatToReturn = invalidFormat; auto queryGmm = MockGmm::queryImgParams(getGmmClientContext(), imgInfo, true); auto resourceFormat = queryGmm->gmmResourceInfo->getResourceFormat(); auto compressionFormat = getGmmClientContext()->getSurfaceStateCompressionFormat(resourceFormat); EXPECT_EQ(compressionFormat, invalidFormat); EXPECT_FALSE(queryGmm->isCompressionEnabled); mockGmmClient->compressionFormatToReturn = validFormat; queryGmm = MockGmm::queryImgParams(getGmmClientContext(), imgInfo, true); EXPECT_TRUE(queryGmm->isCompressionEnabled); } TEST_F(GmmCompressionTests, givenInvalidCompressionFormatAndFlatCcsFtrNotSetWhenQueryingThenDisallowOnGmmE2CCompFormat) { auto mockGmmClient = static_cast(getGmmClientContext()); imgInfo.surfaceFormat = &SurfaceFormats::readOnlyDepth()[2].surfaceFormat; localPlatformDevice->featureTable.flags.ftrFlatPhysCCS = false; uint8_t invalidFormat = static_cast(GMM_E2ECOMP_FORMAT::GMM_E2ECOMP_FORMAT_INVALID); uint8_t validFormat = static_cast(GMM_FLATCCS_FORMAT::GMM_FLATCCS_FORMAT_INVALID); mockGmmClient->compressionFormatToReturn = invalidFormat; auto queryGmm = MockGmm::queryImgParams(getGmmClientContext(), imgInfo, true); auto resourceFormat = queryGmm->gmmResourceInfo->getResourceFormat(); auto compressionFormat = getGmmClientContext()->getSurfaceStateCompressionFormat(resourceFormat); EXPECT_EQ(compressionFormat, invalidFormat); EXPECT_FALSE(queryGmm->isCompressionEnabled); mockGmmClient->compressionFormatToReturn = validFormat; queryGmm = MockGmm::queryImgParams(getGmmClientContext(), imgInfo, true); EXPECT_TRUE(queryGmm->isCompressionEnabled); } TEST_F(GmmCompressionTests, givenPlaneFormatWhenQueryingThenDisallow) { GMM_YUV_PLANE gmmPlane[4] = {GMM_YUV_PLANE::GMM_NO_PLANE, GMM_YUV_PLANE::GMM_PLANE_U, GMM_YUV_PLANE::GMM_PLANE_V, GMM_YUV_PLANE::GMM_PLANE_Y}; for (auto &plane : gmmPlane) { imgInfo.plane = plane; auto queryGmm = MockGmm::queryImgParams(getGmmClientContext(), imgInfo, true); EXPECT_EQ(queryGmm->isCompressionEnabled, plane == GMM_YUV_PLANE::GMM_NO_PLANE); } } TEST_F(GmmCompressionTests, givenPackedYuvFormatWhenQueryingThenDisallow) { for (auto &surfaceFormat : SurfaceFormats::packedYuv()) { imgInfo.surfaceFormat = &surfaceFormat.surfaceFormat; auto queryGmm = MockGmm::queryImgParams(getGmmClientContext(), imgInfo, true); EXPECT_FALSE(queryGmm->isCompressionEnabled); } } HWTEST_F(GmmCompressionTests, whenConstructedWithPreferCompressionFlagThenApplyAuxFlags) { Gmm gmm1(getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true); EXPECT_EQ(0u, gmm1.resourceParams.Flags.Info.RenderCompressed); Gmm gmm2(getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true); EXPECT_EQ(0u, gmm2.resourceParams.Flags.Info.RenderCompressed); Gmm gmm3(getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, true, {}, true); EXPECT_EQ(1u, gmm3.resourceParams.Flags.Info.RenderCompressed); } TEST_F(GmmCompressionTests, givenMediaCompressedImageApplyAuxFlagsForImageThenSetFlagsToCompressed) { MockGmm gmm(getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true); gmm.resourceParams.Flags.Info.MediaCompressed = true; gmm.resourceParams.Flags.Info.RenderCompressed = false; gmm.setupImageResourceParams(imgInfo, true); EXPECT_TRUE(gmm.isCompressionEnabled); } TEST_F(GmmCompressionTests, givenRenderCompressedImageApplyAuxFlagsForImageThenSetFlagsToCompressed) { MockGmm gmm(getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true); gmm.resourceParams.Flags.Info.MediaCompressed = false; gmm.resourceParams.Flags.Info.RenderCompressed = true; gmm.setupImageResourceParams(imgInfo, true); EXPECT_TRUE(gmm.isCompressionEnabled); } HWTEST_F(GmmCompressionTests, givenEnabledAndPreferredE2ECWhenApplyingForBuffersThenSetValidFlags) { std::unique_ptr gmm(new Gmm(getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); gmm->resourceParams = {}; localPlatformDevice->capabilityTable.ftrRenderCompressedBuffers = true; gmm->applyAuxFlagsForBuffer(true); EXPECT_EQ(1u, gmm->resourceParams.Flags.Info.RenderCompressed); EXPECT_EQ(1u, gmm->resourceParams.Flags.Gpu.CCS); EXPECT_EQ(1u, gmm->resourceParams.Flags.Gpu.UnifiedAuxSurface); EXPECT_TRUE(gmm->isCompressionEnabled); } HWTEST_F(GmmCompressionTests, givenDisabledE2ECAndEnabledDebugFlagWhenApplyingForBuffersThenSetValidFlags) { DebugManagerStateRestore restore; Gmm gmm(getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true); gmm.resourceParams = {}; DebugManager.flags.RenderCompressedBuffersEnabled.set(1); localPlatformDevice->capabilityTable.ftrRenderCompressedBuffers = false; gmm.applyAuxFlagsForBuffer(true); EXPECT_EQ(1u, gmm.resourceParams.Flags.Info.RenderCompressed); EXPECT_EQ(1u, gmm.resourceParams.Flags.Gpu.CCS); EXPECT_EQ(1u, gmm.resourceParams.Flags.Gpu.UnifiedAuxSurface); EXPECT_TRUE(gmm.isCompressionEnabled); gmm.resourceParams = {}; gmm.isCompressionEnabled = false; DebugManager.flags.RenderCompressedBuffersEnabled.set(0); localPlatformDevice->capabilityTable.ftrRenderCompressedBuffers = true; gmm.applyAuxFlagsForBuffer(true); EXPECT_EQ(0u, gmm.resourceParams.Flags.Info.RenderCompressed); EXPECT_EQ(0u, gmm.resourceParams.Flags.Gpu.CCS); EXPECT_EQ(0u, gmm.resourceParams.Flags.Gpu.UnifiedAuxSurface); EXPECT_FALSE(gmm.isCompressionEnabled); } struct GmmLocalMemoryTests : public ::testing::Test, MockExecutionEnvironmentGmmFixture { GmmLocalMemoryTests() { localPlatformDevice = *defaultHwInfo; localPlatformDevice.featureTable.flags.ftrLocalMemory = true; } void SetUp() override { MockExecutionEnvironmentGmmFixture::SetUp(); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(&localPlatformDevice); } HardwareInfo localPlatformDevice{}; }; struct MultiTileGmmTests : GmmLocalMemoryTests { MultiTileGmmTests() { localPlatformDevice.featureTable.flags.ftrMultiTileArch = true; localPlatformDevice.gtSystemInfo.MultiTileArchInfo.TileMask = customTileMask; } uint8_t customTileMask = 0xD; }; TEST_F(GmmLocalMemoryTests, givenFtrLocalMemoryWhenUseSystemMemoryIsTrueThenNonLocalOnlyFlagIsSetAndLocalOnlyCleared) { auto gmm = std::make_unique(getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, StorageInfo{}, true); EXPECT_EQ(1u, gmm->resourceParams.Flags.Info.NonLocalOnly); EXPECT_EQ(0u, gmm->resourceParams.Flags.Info.LocalOnly); } TEST_F(GmmLocalMemoryTests, givenFtrLocalMemoryWhenUsingLocalMemoryAndAllocationIsLockableThenAllFlagsAreCleared) { StorageInfo storageInfo{}; storageInfo.isLockable = true; storageInfo.memoryBanks.set(1); storageInfo.systemMemoryPlacement = false; auto gmm = std::make_unique(getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, storageInfo, true); EXPECT_EQ(0u, gmm->resourceParams.Flags.Info.NonLocalOnly); EXPECT_EQ(0u, gmm->resourceParams.Flags.Info.LocalOnly); EXPECT_EQ(0u, gmm->resourceParams.Flags.Info.NotLockable); } TEST_F(GmmLocalMemoryTests, givenFtrLocalMemoryWhenUsingLocalMemoryFalseAndAllocationIsNotLockableThenNotLockableFlagsIsSetAndLocalAndNonLocalOnlyAreNotSet) { StorageInfo storageInfo{}; storageInfo.isLockable = false; storageInfo.memoryBanks.set(1); storageInfo.systemMemoryPlacement = false; auto gmm = std::make_unique(getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, storageInfo, true); EXPECT_EQ(0u, gmm->resourceParams.Flags.Info.NonLocalOnly); EXPECT_EQ(0u, gmm->resourceParams.Flags.Info.LocalOnly); EXPECT_EQ(1u, gmm->resourceParams.Flags.Info.NotLockable); } TEST_F(GmmLocalMemoryTests, givenLocalMemoryAndNotLockableAllocationAndStorageInfoWithLocalOnlyRequiredWhenPreparingFlagsForGmmThenNotLockableAndLocalOnlyIsSet) { StorageInfo storageInfo{}; storageInfo.localOnlyRequired = true; storageInfo.isLockable = false; storageInfo.memoryBanks.set(1); storageInfo.systemMemoryPlacement = false; auto gmm = std::make_unique(getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, storageInfo, true); EXPECT_EQ(1u, gmm->resourceParams.Flags.Info.LocalOnly); EXPECT_EQ(1u, gmm->resourceParams.Flags.Info.NotLockable); } TEST_F(GmmLocalMemoryTests, givenLocalMemoryAndStorageInfoWithLocalOnlyRequiredWhenPreparingFlagsForGmmThenNotLockableAndLocalOnlyAreSet) { StorageInfo storageInfo{}; storageInfo.localOnlyRequired = true; storageInfo.isLockable = false; storageInfo.memoryBanks.set(1); storageInfo.systemMemoryPlacement = false; DebugManagerStateRestore restorer; for (auto csrMode = static_cast(CommandStreamReceiverType::CSR_HW); csrMode < static_cast(CommandStreamReceiverType::CSR_TYPES_NUM); csrMode++) { DebugManager.flags.SetCommandStreamReceiver.set(csrMode); auto gmm = std::make_unique(getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, storageInfo, true); EXPECT_EQ(1u, gmm->resourceParams.Flags.Info.LocalOnly); EXPECT_EQ(1u, gmm->resourceParams.Flags.Info.NotLockable); } } TEST_F(GmmLocalMemoryTests, givenSystemMemoryAndStorageInfoWithLocalOnlyRequiredWhenPreparingFlagsForGmmThenLocalOnlyIsNotSet) { StorageInfo storageInfo{}; storageInfo.localOnlyRequired = true; auto gmm = std::make_unique(getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, storageInfo, true); EXPECT_EQ(1u, gmm->resourceParams.Flags.Info.NonLocalOnly); EXPECT_EQ(0u, gmm->resourceParams.Flags.Info.LocalOnly); } TEST_F(GmmLocalMemoryTests, givenLocalMemoryAndStorageInfoWithoutLocalOnlyRequiredWhenPreparingFlagsForGmmThenLocalOnlyIsNotSet) { StorageInfo storageInfo{}; storageInfo.localOnlyRequired = false; auto gmm = std::make_unique(getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, storageInfo, true); EXPECT_EQ(1u, gmm->resourceParams.Flags.Info.NonLocalOnly); EXPECT_EQ(0u, gmm->resourceParams.Flags.Info.LocalOnly); } TEST_F(GmmLocalMemoryTests, givenFtrLocalMemoryAndCompressionEnabledWhenUsingLocalMemoryAndAllocationIsNotLockableThenNotLockableAndLocalOnlyFlagsAreSetAndNonLocalOnlyIsNotSet) { DebugManagerStateRestore restorer; DebugManager.flags.RenderCompressedBuffersEnabled.set(1); StorageInfo storageInfo{}; storageInfo.isLockable = false; storageInfo.systemMemoryPlacement = false; storageInfo.memoryBanks.set(1); auto gmm = std::make_unique(getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, true, storageInfo, true); EXPECT_TRUE(gmm->isCompressionEnabled); EXPECT_EQ(0u, gmm->resourceParams.Flags.Info.NonLocalOnly); EXPECT_EQ(1u, gmm->resourceParams.Flags.Info.LocalOnly); EXPECT_EQ(1u, gmm->resourceParams.Flags.Info.NotLockable); } TEST_F(GmmLocalMemoryTests, givenFtrLocalMemoryWhenUseSystemMemoryIsFalseAndAllocationIsNotLockableThenLocalAndNonLocalOnlyAndNotLockableFlagsAreNotSet) { DebugManagerStateRestore restorer; for (auto csrMode = static_cast(CommandStreamReceiverType::CSR_HW); csrMode < static_cast(CommandStreamReceiverType::CSR_TYPES_NUM); csrMode++) { DebugManager.flags.SetCommandStreamReceiver.set(csrMode); StorageInfo storageInfo{}; storageInfo.memoryBanks.set(1); storageInfo.systemMemoryPlacement = false; storageInfo.isLockable = false; auto gmm = std::make_unique(getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, storageInfo, true); EXPECT_EQ(0u, gmm->resourceParams.Flags.Info.NonLocalOnly); EXPECT_EQ(0u, gmm->resourceParams.Flags.Info.LocalOnly); EXPECT_EQ(1u, gmm->resourceParams.Flags.Info.NotLockable); } } TEST_F(GmmLocalMemoryTests, givenUseLocalMemoryInImageInfoTrueWhenGmmIsCreatedThenLocalAndNonLocalOnlyFlagIsNotSetAndNotLockableIsSet) { ImageInfo imgInfo = {}; cl_image_desc desc = {0}; desc.image_type = CL_MEM_OBJECT_IMAGE1D; desc.image_width = 1; cl_image_format imageFormat = {CL_R, CL_UNSIGNED_INT8}; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, defaultHwInfo->capabilityTable.supportsOcl21Features); imgInfo.imgDesc = Image::convertDescriptor(desc); imgInfo.surfaceFormat = &surfaceFormat->surfaceFormat; imgInfo.useLocalMemory = true; StorageInfo storageInfo = {}; storageInfo.systemMemoryPlacement = false; storageInfo.memoryBanks.set(1); auto gmm = std::make_unique(getGmmClientContext(), imgInfo, storageInfo, false); EXPECT_EQ(0u, gmm->resourceParams.Flags.Info.NonLocalOnly); EXPECT_EQ(0u, gmm->resourceParams.Flags.Info.LocalOnly); EXPECT_EQ(1u, gmm->resourceParams.Flags.Info.NotLockable); } TEST_F(GmmLocalMemoryTests, givenUseCompressionAndLocalMemoryInImageInfoTrueWhenGmmIsCreatedThenNonLocalOnlyFlagIsNotSetAndNotLockableAndLocalOnlyIsSet) { DebugManagerStateRestore restorer; DebugManager.flags.RenderCompressedImagesEnabled.set(1); ImageInfo imgInfo = {}; cl_image_desc desc = {0}; desc.image_type = CL_MEM_OBJECT_IMAGE1D; desc.image_width = 1; cl_image_format imageFormat = {CL_R, CL_UNSIGNED_INT8}; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, defaultHwInfo->capabilityTable.supportsOcl21Features); imgInfo.imgDesc = Image::convertDescriptor(desc); imgInfo.surfaceFormat = &surfaceFormat->surfaceFormat; imgInfo.useLocalMemory = true; StorageInfo storageInfo = {}; storageInfo.memoryBanks.set(1); storageInfo.systemMemoryPlacement = false; auto gmm = std::make_unique(getGmmClientContext(), imgInfo, storageInfo, true); EXPECT_TRUE(gmm->isCompressionEnabled); EXPECT_EQ(0u, gmm->resourceParams.Flags.Info.NonLocalOnly); EXPECT_EQ(1u, gmm->resourceParams.Flags.Info.LocalOnly); EXPECT_EQ(1u, gmm->resourceParams.Flags.Info.NotLockable); } TEST_F(GmmLocalMemoryTests, givenUseLocalMemoryInImageInfoFalseWhenGmmIsCreatedThenLocalOnlyNotSet) { ImageInfo imgInfo = {}; cl_image_desc desc = {0}; desc.image_type = CL_MEM_OBJECT_IMAGE1D; desc.image_width = 1; cl_image_format imageFormat = {CL_R, CL_UNSIGNED_INT8}; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, defaultHwInfo->capabilityTable.supportsOcl21Features); imgInfo.imgDesc = Image::convertDescriptor(desc); imgInfo.surfaceFormat = &surfaceFormat->surfaceFormat; imgInfo.useLocalMemory = false; auto gmm = std::make_unique(getGmmClientContext(), imgInfo, StorageInfo{}, false); EXPECT_EQ(1u, gmm->resourceParams.Flags.Info.NonLocalOnly); EXPECT_EQ(0u, gmm->resourceParams.Flags.Info.LocalOnly); } TEST_F(MultiTileGmmTests, whenCreateGmmWithImageInfoThenEnableMultiTileArch) { ImageInfo imgInfo = {}; cl_image_desc desc = {0}; desc.image_type = CL_MEM_OBJECT_IMAGE1D; desc.image_width = 1; cl_image_format imageFormat = {CL_R, CL_UNSIGNED_INT8}; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, defaultHwInfo->capabilityTable.supportsOcl21Features); imgInfo.imgDesc = Image::convertDescriptor(desc); imgInfo.surfaceFormat = &surfaceFormat->surfaceFormat; imgInfo.useLocalMemory = false; auto gmm = std::make_unique(getGmmClientContext(), imgInfo, StorageInfo{}, false); EXPECT_EQ(1u, gmm->resourceParams.Flags.Info.NonLocalOnly); EXPECT_EQ(1u, gmm->resourceParams.MultiTileArch.Enable); imgInfo.useLocalMemory = true; gmm = std::make_unique(getGmmClientContext(), imgInfo, StorageInfo{}, false); EXPECT_EQ(1u, gmm->resourceParams.Flags.Info.NonLocalOnly); EXPECT_EQ(1u, gmm->resourceParams.MultiTileArch.Enable); } TEST_F(MultiTileGmmTests, givenMultiTileAllocationWhenGmmIsCreatedWithEmptyMemporyBanksThenMultitileArchIsEnabled) { StorageInfo storageInfo; storageInfo.memoryBanks = 0; Gmm gmm(getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, storageInfo, true); EXPECT_EQ(1u, gmm.resourceParams.MultiTileArch.Enable); EXPECT_EQ(0u, gmm.resourceParams.MultiTileArch.TileInstanced); } TEST_F(MultiTileGmmTests, givenMultiTileAllocationWithoutCloningWhenGmmIsCreatedThenSetMinimumOneTile) { StorageInfo storageInfo; storageInfo.memoryBanks = 1; storageInfo.cloningOfPageTables = false; storageInfo.systemMemoryPlacement = false; Gmm gmm(getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, storageInfo, true); EXPECT_EQ(1u, gmm.resourceParams.MultiTileArch.Enable); EXPECT_EQ(1u, gmm.resourceParams.MultiTileArch.GpuVaMappingSet); EXPECT_EQ(1u, gmm.resourceParams.MultiTileArch.LocalMemPreferredSet); EXPECT_EQ(1u, gmm.resourceParams.MultiTileArch.LocalMemEligibilitySet); EXPECT_EQ(0u, gmm.resourceParams.MultiTileArch.TileInstanced); } TEST_F(MultiTileGmmTests, givenMultiTileWhenGmmIsCreatedWithNonLocalMemoryThenMultitileArchIsPropertlyFilled) { StorageInfo storageInfo; Gmm gmm(getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, storageInfo, true); EXPECT_EQ(1u, gmm.resourceParams.MultiTileArch.Enable); EXPECT_EQ(customTileMask, gmm.resourceParams.MultiTileArch.GpuVaMappingSet); EXPECT_EQ(0u, gmm.resourceParams.MultiTileArch.LocalMemPreferredSet); EXPECT_EQ(0u, gmm.resourceParams.MultiTileArch.LocalMemEligibilitySet); EXPECT_EQ(0u, gmm.resourceParams.MultiTileArch.TileInstanced); } TEST_F(MultiTileGmmTests, givenMultiTileWhenGmmIsCreatedWithSpecificMemoryBanksThenMultitileArchIsEnabled) { StorageInfo storageInfo; storageInfo.systemMemoryPlacement = false; storageInfo.memoryBanks = 1u; storageInfo.cloningOfPageTables = false; Gmm gmm(getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, storageInfo, true); EXPECT_EQ(1u, gmm.resourceParams.MultiTileArch.Enable); EXPECT_EQ(storageInfo.memoryBanks, gmm.resourceParams.MultiTileArch.LocalMemPreferredSet); EXPECT_EQ(storageInfo.memoryBanks, gmm.resourceParams.MultiTileArch.GpuVaMappingSet); EXPECT_EQ(storageInfo.memoryBanks, gmm.resourceParams.MultiTileArch.LocalMemEligibilitySet); EXPECT_EQ(0u, gmm.resourceParams.MultiTileArch.TileInstanced); } TEST_F(MultiTileGmmTests, givenMultiTileWhenGmmIsCreatedWithCloningEnabledThenGpuVaMappingDependsOnPageTablesVisibityBitfield) { StorageInfo storageInfo; storageInfo.memoryBanks = 2u; storageInfo.cloningOfPageTables = true; storageInfo.systemMemoryPlacement = false; storageInfo.pageTablesVisibility = 3u; Gmm gmm(getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, storageInfo, true); EXPECT_EQ(1u, gmm.resourceParams.MultiTileArch.Enable); EXPECT_EQ(storageInfo.memoryBanks, gmm.resourceParams.MultiTileArch.LocalMemPreferredSet); EXPECT_EQ(storageInfo.pageTablesVisibility, gmm.resourceParams.MultiTileArch.GpuVaMappingSet); EXPECT_EQ(storageInfo.memoryBanks, gmm.resourceParams.MultiTileArch.LocalMemEligibilitySet); EXPECT_EQ(0u, gmm.resourceParams.MultiTileArch.TileInstanced); } TEST_F(MultiTileGmmTests, whenAllocationIsTileInstancedWithoutClonningPageTablesThenResourceParamsHaveTileInstancedEnabled) { StorageInfo storageInfo; storageInfo.cloningOfPageTables = false; storageInfo.tileInstanced = true; storageInfo.memoryBanks = 2u; storageInfo.systemMemoryPlacement = false; Gmm gmm(getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, storageInfo, true); EXPECT_EQ(1u, gmm.resourceParams.MultiTileArch.Enable); EXPECT_EQ(1u, gmm.resourceParams.MultiTileArch.TileInstanced); } TEST_F(MultiTileGmmTests, whenAllocationIsTileInstancedWithClonningPageTablesThenResourceParamsHaveTileInstancedDisabled) { StorageInfo storageInfo; storageInfo.cloningOfPageTables = true; storageInfo.tileInstanced = true; Gmm gmm(getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, storageInfo, true); EXPECT_EQ(1u, gmm.resourceParams.MultiTileArch.Enable); EXPECT_EQ(0u, gmm.resourceParams.MultiTileArch.TileInstanced); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/gtpin/000077500000000000000000000000001422164147700227505ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/gtpin/CMakeLists.txt000066400000000000000000000013431422164147700255110ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gtpin ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/gtpin_tests.cpp ) macro(macro_for_each_core_type) foreach(BRANCH_DIR ${BRANCH_DIR_LIST}) if(EXISTS ${NEO_SOURCE_DIR}/opencl/source${BRANCH_DIR}${CORE_TYPE_LOWER}/gtpin_setup_${CORE_TYPE_LOWER}.cpp) list(APPEND IGDRCL_SRCS_tests_gtpin ${NEO_SOURCE_DIR}/opencl/source${BRANCH_DIR}${CORE_TYPE_LOWER}/gtpin_setup_${CORE_TYPE_LOWER}.cpp) endif() endforeach() endmacro() apply_macro_for_each_core_type("TESTED") if(NOT DISABLED_GTPIN_SUPPORT) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gtpin}) endif() add_subdirectories() compute-runtime-22.14.22890/opencl/test/unit_test/gtpin/gtpin_tests.cpp000066400000000000000000003365361422164147700260370ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/source/device_binary_format/patchtokens_decoder.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/file_io.h" #include "shared/source/helpers/hash.h" #include "shared/source/memory_manager/surface.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/common/fixtures/memory_management_fixture.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/kernel_binary_helper.h" #include "shared/test/common/helpers/test_files.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/device_binary_format/patchtokens_tests.h" #include "shared/test/unit_test/page_fault_manager/mock_cpu_page_fault_manager.h" #include "opencl/source/api/api.h" #include "opencl/source/context/context.h" #include "opencl/source/gtpin/gtpin_defs.h" #include "opencl/source/gtpin/gtpin_helpers.h" #include "opencl/source/gtpin/gtpin_hw_helper.h" #include "opencl/source/gtpin/gtpin_init.h" #include "opencl/source/gtpin/gtpin_notify.h" #include "opencl/source/helpers/cl_validators.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/program/create.inl" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/program/program_tests.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" #include "gtest/gtest.h" #include #include using namespace NEO; using namespace gtpin; namespace NEO { extern std::deque kernelExecQueue; extern GTPinHwHelper *gtpinHwHelperFactory[IGFX_MAX_CORE]; } // namespace NEO namespace ULT { int ContextCreateCallbackCount = 0; int ContextDestroyCallbackCount = 0; int KernelCreateCallbackCount = 0; int KernelSubmitCallbackCount = 0; int CommandBufferCreateCallbackCount = 0; int CommandBufferCompleteCallbackCount = 0; uint32_t kernelOffset = 0; bool returnNullResource = false; context_handle_t currContext = nullptr; std::deque kernelResources; platform_info_t platformInfo; void OnContextCreate(context_handle_t context, platform_info_t *platformInfo, igc_init_t **igcInit) { ULT::platformInfo.gen_version = platformInfo->gen_version; currContext = context; kernelResources.clear(); ContextCreateCallbackCount++; *igcInit = reinterpret_cast(0x1234); } void OnContextDestroy(context_handle_t context) { currContext = nullptr; EXPECT_EQ(0u, kernelResources.size()); kernelResources.clear(); ContextDestroyCallbackCount++; } void OnKernelCreate(context_handle_t context, const instrument_params_in_t *paramsIn, instrument_params_out_t *paramsOut) { paramsOut->inst_kernel_binary = const_cast(paramsIn->orig_kernel_binary); paramsOut->inst_kernel_size = paramsIn->orig_kernel_size; paramsOut->kernel_id = paramsIn->igc_hash_id; KernelCreateCallbackCount++; } void OnKernelSubmit(command_buffer_handle_t cb, uint64_t kernelId, uint32_t *entryOffset, resource_handle_t *resource) { resource_handle_t currResource = nullptr; ASSERT_NE(nullptr, currContext); if (!returnNullResource) { GTPIN_DI_STATUS st = gtpinCreateBuffer(currContext, (uint32_t)256, &currResource); EXPECT_EQ(GTPIN_DI_SUCCESS, st); EXPECT_NE(nullptr, currResource); uint8_t *bufAddress = nullptr; st = gtpinMapBuffer(currContext, currResource, &bufAddress); EXPECT_EQ(GTPIN_DI_SUCCESS, st); EXPECT_NE(nullptr, bufAddress); } *entryOffset = kernelOffset; *resource = currResource; kernelResources.push_back(currResource); KernelSubmitCallbackCount++; } void OnCommandBufferCreate(context_handle_t context, command_buffer_handle_t cb) { CommandBufferCreateCallbackCount++; } void OnCommandBufferComplete(command_buffer_handle_t cb) { ASSERT_NE(nullptr, currContext); resource_handle_t currResource = kernelResources[0]; EXPECT_NE(nullptr, currResource); GTPIN_DI_STATUS st = gtpinUnmapBuffer(currContext, currResource); EXPECT_EQ(GTPIN_DI_SUCCESS, st); st = gtpinFreeBuffer(currContext, currResource); EXPECT_EQ(GTPIN_DI_SUCCESS, st); kernelResources.pop_front(); CommandBufferCompleteCallbackCount++; } class MockMemoryManagerWithFailures : public OsAgnosticMemoryManager { public: using OsAgnosticMemoryManager::pageFaultManager; MockMemoryManagerWithFailures(ExecutionEnvironment &executionEnvironment) : OsAgnosticMemoryManager(executionEnvironment){}; GraphicsAllocation *allocateGraphicsMemoryInDevicePool(const AllocationData &allocationData, AllocationStatus &status) override { if (failAllAllocationsInDevicePool) { failAllAllocationsInDevicePool = false; return nullptr; } return OsAgnosticMemoryManager::allocateGraphicsMemoryInDevicePool(allocationData, status); } bool failAllAllocationsInDevicePool = false; }; struct MockResidentTestsPageFaultManager : public MockPageFaultManager { void moveAllocationToGpuDomain(void *ptr) override { moveAllocationToGpuDomainCalledTimes++; migratedAddress = ptr; } uint32_t moveAllocationToGpuDomainCalledTimes = 0; void *migratedAddress = nullptr; }; class GTPinFixture : public ContextFixture, public MemoryManagementFixture { using ContextFixture::SetUp; public: void SetUp() override { DebugManager.flags.GTPinAllocateBufferInSharedMemory.set(false); SetUpImpl(); } void SetUpImpl() { platformsImpl->clear(); MemoryManagementFixture::SetUp(); constructPlatform(); pPlatform = platform(); auto executionEnvironment = pPlatform->peekExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); memoryManager = new MockMemoryManagerWithFailures(*executionEnvironment); memoryManager->pageFaultManager.reset(new MockResidentTestsPageFaultManager()); executionEnvironment->memoryManager.reset(memoryManager); initPlatform(); pDevice = pPlatform->getClDevice(0); rootDeviceIndex = pDevice->getRootDeviceIndex(); cl_device_id device = pDevice; ContextFixture::SetUp(1, &device); driverServices.bufferAllocate = nullptr; driverServices.bufferDeallocate = nullptr; driverServices.bufferMap = nullptr; driverServices.bufferUnMap = nullptr; gtpinCallbacks.onContextCreate = nullptr; gtpinCallbacks.onContextDestroy = nullptr; gtpinCallbacks.onKernelCreate = nullptr; gtpinCallbacks.onKernelSubmit = nullptr; gtpinCallbacks.onCommandBufferCreate = nullptr; gtpinCallbacks.onCommandBufferComplete = nullptr; NEO::isGTPinInitialized = false; kernelOffset = 0; } void TearDown() override { ContextFixture::TearDown(); platformsImpl->clear(); MemoryManagementFixture::TearDown(); NEO::isGTPinInitialized = false; } Platform *pPlatform = nullptr; ClDevice *pDevice = nullptr; cl_int retVal = CL_SUCCESS; GTPIN_DI_STATUS retFromGtPin = GTPIN_DI_SUCCESS; driver_services_t driverServices; gtpin::ocl::gtpin_events_t gtpinCallbacks; MockMemoryManagerWithFailures *memoryManager = nullptr; uint32_t rootDeviceIndex = std::numeric_limits::max(); DebugManagerStateRestore restore; }; typedef Test GTPinTests; TEST_F(GTPinTests, givenInvalidArgumentsThenGTPinInitFails) { bool isInitialized = false; retFromGtPin = GTPin_Init(nullptr, nullptr, nullptr); EXPECT_EQ(GTPIN_DI_ERROR_INVALID_ARGUMENT, retFromGtPin); isInitialized = gtpinIsGTPinInitialized(); EXPECT_FALSE(isInitialized); retFromGtPin = GTPin_Init(>pinCallbacks, nullptr, nullptr); EXPECT_EQ(GTPIN_DI_ERROR_INVALID_ARGUMENT, retFromGtPin); isInitialized = gtpinIsGTPinInitialized(); EXPECT_FALSE(isInitialized); retFromGtPin = GTPin_Init(nullptr, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_ERROR_INVALID_ARGUMENT, retFromGtPin); isInitialized = gtpinIsGTPinInitialized(); EXPECT_FALSE(isInitialized); } TEST_F(GTPinTests, givenIncompleteArgumentsThenGTPinInitFails) { interface_version_t ver; ver.common = 0; ver.specific = 0; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, &ver); EXPECT_EQ(GTPIN_DI_ERROR_INVALID_ARGUMENT, retFromGtPin); gtpinCallbacks.onContextCreate = OnContextCreate; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_ERROR_INVALID_ARGUMENT, retFromGtPin); gtpinCallbacks.onContextDestroy = OnContextDestroy; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_ERROR_INVALID_ARGUMENT, retFromGtPin); gtpinCallbacks.onKernelCreate = OnKernelCreate; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_ERROR_INVALID_ARGUMENT, retFromGtPin); gtpinCallbacks.onKernelSubmit = OnKernelSubmit; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_ERROR_INVALID_ARGUMENT, retFromGtPin); gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_ERROR_INVALID_ARGUMENT, retFromGtPin); } TEST_F(GTPinTests, givenInvalidArgumentsWhenVersionArgumentIsProvidedThenGTPinInitReturnsDriverVersion) { interface_version_t ver; ver.common = 0; ver.specific = 0; retFromGtPin = GTPin_Init(nullptr, nullptr, &ver); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); EXPECT_EQ(gtpin::ocl::GTPIN_OCL_INTERFACE_VERSION, ver.specific); EXPECT_EQ(gtpin::GTPIN_COMMON_INTERFACE_VERSION, ver.common); retFromGtPin = GTPin_Init(>pinCallbacks, nullptr, &ver); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); EXPECT_EQ(gtpin::ocl::GTPIN_OCL_INTERFACE_VERSION, ver.specific); EXPECT_EQ(gtpin::GTPIN_COMMON_INTERFACE_VERSION, ver.common); retFromGtPin = GTPin_Init(nullptr, &driverServices, &ver); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); EXPECT_EQ(gtpin::ocl::GTPIN_OCL_INTERFACE_VERSION, ver.specific); EXPECT_EQ(gtpin::GTPIN_COMMON_INTERFACE_VERSION, ver.common); } TEST_F(GTPinTests, givenValidAndCompleteArgumentsThenGTPinInitSucceeds) { bool isInitialized = false; gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); EXPECT_EQ(&NEO::gtpinCreateBuffer, driverServices.bufferAllocate); EXPECT_EQ(&NEO::gtpinFreeBuffer, driverServices.bufferDeallocate); EXPECT_EQ(&NEO::gtpinMapBuffer, driverServices.bufferMap); EXPECT_EQ(&NEO::gtpinUnmapBuffer, driverServices.bufferUnMap); isInitialized = gtpinIsGTPinInitialized(); EXPECT_TRUE(isInitialized); } TEST_F(GTPinTests, givenValidAndCompleteArgumentsWhenGTPinIsAlreadyInitializedThenGTPinInitFails) { gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); EXPECT_EQ(&NEO::gtpinCreateBuffer, driverServices.bufferAllocate); EXPECT_EQ(&NEO::gtpinFreeBuffer, driverServices.bufferDeallocate); EXPECT_EQ(&NEO::gtpinMapBuffer, driverServices.bufferMap); EXPECT_EQ(&NEO::gtpinUnmapBuffer, driverServices.bufferUnMap); retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_ERROR_INSTANCE_ALREADY_CREATED, retFromGtPin); } TEST_F(GTPinTests, givenInvalidArgumentsThenBufferAllocateFails) { resource_handle_t res; uint32_t buffSize = 400u; gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); ASSERT_EQ(&NEO::gtpinCreateBuffer, driverServices.bufferAllocate); EXPECT_EQ(&NEO::gtpinFreeBuffer, driverServices.bufferDeallocate); EXPECT_EQ(&NEO::gtpinMapBuffer, driverServices.bufferMap); EXPECT_EQ(&NEO::gtpinUnmapBuffer, driverServices.bufferUnMap); retFromGtPin = (*driverServices.bufferAllocate)(nullptr, buffSize, &res); EXPECT_NE(GTPIN_DI_SUCCESS, retFromGtPin); cl_context ctxt = (cl_context)((Context *)pContext); retFromGtPin = (*driverServices.bufferAllocate)((gtpin::context_handle_t)ctxt, buffSize, nullptr); EXPECT_NE(GTPIN_DI_SUCCESS, retFromGtPin); } TEST_F(GTPinTests, givenInvalidArgumentsThenBufferDeallocateFails) { gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); EXPECT_EQ(&NEO::gtpinCreateBuffer, driverServices.bufferAllocate); ASSERT_EQ(&NEO::gtpinFreeBuffer, driverServices.bufferDeallocate); EXPECT_EQ(&NEO::gtpinMapBuffer, driverServices.bufferMap); EXPECT_EQ(&NEO::gtpinUnmapBuffer, driverServices.bufferUnMap); retFromGtPin = (*driverServices.bufferDeallocate)(nullptr, nullptr); EXPECT_NE(GTPIN_DI_SUCCESS, retFromGtPin); cl_context ctxt = (cl_context)((Context *)pContext); retFromGtPin = (*driverServices.bufferDeallocate)((gtpin::context_handle_t)ctxt, nullptr); EXPECT_NE(GTPIN_DI_SUCCESS, retFromGtPin); retFromGtPin = (*driverServices.bufferDeallocate)((gtpin::context_handle_t)ctxt, (gtpin::resource_handle_t)ctxt); EXPECT_NE(GTPIN_DI_SUCCESS, retFromGtPin); } TEST_F(GTPinTests, givenInvalidArgumentsThenBufferMapFails) { gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); EXPECT_EQ(&NEO::gtpinCreateBuffer, driverServices.bufferAllocate); EXPECT_EQ(&NEO::gtpinFreeBuffer, driverServices.bufferDeallocate); ASSERT_EQ(&NEO::gtpinMapBuffer, driverServices.bufferMap); EXPECT_EQ(&NEO::gtpinUnmapBuffer, driverServices.bufferUnMap); uint8_t *mappedPtr; retFromGtPin = (*driverServices.bufferMap)(nullptr, nullptr, &mappedPtr); EXPECT_NE(GTPIN_DI_SUCCESS, retFromGtPin); cl_context ctxt = (cl_context)((Context *)pContext); retFromGtPin = (*driverServices.bufferMap)((gtpin::context_handle_t)ctxt, nullptr, &mappedPtr); EXPECT_NE(GTPIN_DI_SUCCESS, retFromGtPin); retFromGtPin = (*driverServices.bufferMap)((gtpin::context_handle_t)ctxt, (gtpin::resource_handle_t)ctxt, &mappedPtr); EXPECT_NE(GTPIN_DI_SUCCESS, retFromGtPin); } TEST_F(GTPinTests, givenInvalidArgumentsThenBufferUnMapFails) { gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); EXPECT_EQ(&NEO::gtpinCreateBuffer, driverServices.bufferAllocate); EXPECT_EQ(&NEO::gtpinFreeBuffer, driverServices.bufferDeallocate); EXPECT_EQ(&NEO::gtpinMapBuffer, driverServices.bufferMap); ASSERT_EQ(&NEO::gtpinUnmapBuffer, driverServices.bufferUnMap); retFromGtPin = (*driverServices.bufferUnMap)(nullptr, nullptr); EXPECT_NE(GTPIN_DI_SUCCESS, retFromGtPin); cl_context ctxt = (cl_context)((Context *)pContext); retFromGtPin = (*driverServices.bufferUnMap)((gtpin::context_handle_t)ctxt, nullptr); EXPECT_NE(GTPIN_DI_SUCCESS, retFromGtPin); retFromGtPin = (*driverServices.bufferUnMap)((gtpin::context_handle_t)ctxt, (gtpin::resource_handle_t)ctxt); EXPECT_NE(GTPIN_DI_SUCCESS, retFromGtPin); } TEST_F(GTPinTests, givenValidRequestForHugeMemoryAllocationThenBufferAllocateFails) { InjectedFunction allocBufferFunc = [this](size_t failureIndex) { resource_handle_t res; cl_context ctxt = (cl_context)((Context *)pContext); uint32_t hugeSize = 400u; // Will be handled as huge memory allocation retFromGtPin = (*driverServices.bufferAllocate)((gtpin::context_handle_t)ctxt, hugeSize, &res); if (MemoryManagement::nonfailingAllocation != failureIndex) { EXPECT_EQ(GTPIN_DI_ERROR_ALLOCATION_FAILED, retFromGtPin); } else { EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); EXPECT_NE(nullptr, res); retFromGtPin = (*driverServices.bufferDeallocate)((gtpin::context_handle_t)ctxt, res); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); } }; gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); ASSERT_EQ(&NEO::gtpinCreateBuffer, driverServices.bufferAllocate); ASSERT_EQ(&NEO::gtpinFreeBuffer, driverServices.bufferDeallocate); EXPECT_EQ(&NEO::gtpinMapBuffer, driverServices.bufferMap); EXPECT_EQ(&NEO::gtpinUnmapBuffer, driverServices.bufferUnMap); injectFailures(allocBufferFunc); } TEST_F(GTPinTests, givenValidRequestForMemoryAllocationThenBufferAllocateAndDeallocateSucceeds) { resource_handle_t res; uint32_t buffSize = 400u; gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); ASSERT_EQ(&NEO::gtpinCreateBuffer, driverServices.bufferAllocate); ASSERT_EQ(&NEO::gtpinFreeBuffer, driverServices.bufferDeallocate); EXPECT_EQ(&NEO::gtpinMapBuffer, driverServices.bufferMap); EXPECT_EQ(&NEO::gtpinUnmapBuffer, driverServices.bufferUnMap); cl_context ctxt = (cl_context)((Context *)pContext); retFromGtPin = (*driverServices.bufferAllocate)((gtpin::context_handle_t)ctxt, buffSize, &res); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); EXPECT_NE(nullptr, res); retFromGtPin = (*driverServices.bufferDeallocate)((gtpin::context_handle_t)ctxt, res); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); } TEST_F(GTPinTests, givenValidArgumentsForBufferMapWhenCallSequenceIsCorrectThenBufferMapSucceeds) { resource_handle_t res; uint32_t buffSize = 400u; gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); ASSERT_EQ(&NEO::gtpinCreateBuffer, driverServices.bufferAllocate); ASSERT_EQ(&NEO::gtpinFreeBuffer, driverServices.bufferDeallocate); ASSERT_EQ(&NEO::gtpinMapBuffer, driverServices.bufferMap); EXPECT_EQ(&NEO::gtpinUnmapBuffer, driverServices.bufferUnMap); cl_context ctxt = (cl_context)((Context *)pContext); retFromGtPin = (*driverServices.bufferAllocate)((gtpin::context_handle_t)ctxt, buffSize, &res); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); EXPECT_NE(nullptr, res); uint8_t *mappedPtr = nullptr; retFromGtPin = (*driverServices.bufferMap)((gtpin::context_handle_t)ctxt, res, &mappedPtr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); EXPECT_NE(nullptr, mappedPtr); retFromGtPin = (*driverServices.bufferDeallocate)((gtpin::context_handle_t)ctxt, res); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); } TEST_F(GTPinTests, givenMissingReturnArgumentForBufferMapWhenCallSequenceIsCorrectThenBufferMapFails) { resource_handle_t res; uint32_t buffSize = 400u; gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); ASSERT_EQ(&NEO::gtpinCreateBuffer, driverServices.bufferAllocate); ASSERT_EQ(&NEO::gtpinFreeBuffer, driverServices.bufferDeallocate); ASSERT_EQ(&NEO::gtpinMapBuffer, driverServices.bufferMap); EXPECT_EQ(&NEO::gtpinUnmapBuffer, driverServices.bufferUnMap); cl_context ctxt = (cl_context)((Context *)pContext); retFromGtPin = (*driverServices.bufferAllocate)((gtpin::context_handle_t)ctxt, buffSize, &res); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); EXPECT_NE(nullptr, res); retFromGtPin = (*driverServices.bufferMap)((gtpin::context_handle_t)ctxt, res, nullptr); EXPECT_NE(GTPIN_DI_SUCCESS, retFromGtPin); retFromGtPin = (*driverServices.bufferDeallocate)((gtpin::context_handle_t)ctxt, res); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); } TEST_F(GTPinTests, givenValidArgumentsForBufferUnMapWhenCallSequenceIsCorrectThenBufferUnMapSucceeds) { resource_handle_t res; uint32_t buffSize = 400u; gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); ASSERT_EQ(&NEO::gtpinCreateBuffer, driverServices.bufferAllocate); ASSERT_EQ(&NEO::gtpinFreeBuffer, driverServices.bufferDeallocate); ASSERT_EQ(&NEO::gtpinMapBuffer, driverServices.bufferMap); ASSERT_EQ(&NEO::gtpinUnmapBuffer, driverServices.bufferUnMap); cl_context ctxt = (cl_context)((Context *)pContext); retFromGtPin = (*driverServices.bufferAllocate)((gtpin::context_handle_t)ctxt, buffSize, &res); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); EXPECT_NE(nullptr, res); uint8_t *mappedPtr = nullptr; retFromGtPin = (*driverServices.bufferMap)((gtpin::context_handle_t)ctxt, res, &mappedPtr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); EXPECT_NE(nullptr, mappedPtr); retFromGtPin = (*driverServices.bufferUnMap)((gtpin::context_handle_t)ctxt, res); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); retFromGtPin = (*driverServices.bufferDeallocate)((gtpin::context_handle_t)ctxt, res); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); } TEST_F(GTPinTests, givenUninitializedGTPinInterfaceThenGTPinContextCallbackIsNotCalled) { int prevCount = ContextCreateCallbackCount; cl_device_id device = (cl_device_id)pDevice; auto context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); EXPECT_EQ(ContextCreateCallbackCount, prevCount); prevCount = ContextDestroyCallbackCount; retVal = clReleaseContext(context); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(ContextDestroyCallbackCount, prevCount); } TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenContextCreationArgumentsAreInvalidThenGTPinContextCallbackIsNotCalled) { gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); int prevCount = ContextCreateCallbackCount; cl_device_id device = (cl_device_id)pDevice; cl_context_properties invalidProperties[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties) nullptr, 0}; auto context = clCreateContext(invalidProperties, 1, &device, nullptr, nullptr, &retVal); EXPECT_EQ(CL_INVALID_PLATFORM, retVal); EXPECT_EQ(nullptr, context); EXPECT_EQ(ContextCreateCallbackCount, prevCount); context = clCreateContextFromType(invalidProperties, CL_DEVICE_TYPE_GPU, nullptr, nullptr, &retVal); EXPECT_EQ(CL_INVALID_PLATFORM, retVal); EXPECT_EQ(nullptr, context); EXPECT_EQ(ContextCreateCallbackCount, prevCount); } TEST_F(GTPinTests, givenInitializedGTPinInterfaceThenGTPinContextCallbackIsCalled) { gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); int prevCount = ContextCreateCallbackCount; cl_device_id device = (cl_device_id)pDevice; auto context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); EXPECT_EQ(ContextCreateCallbackCount, prevCount + 1); prevCount = ContextDestroyCallbackCount; retVal = clReleaseContext(context); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(ContextDestroyCallbackCount, prevCount + 1); prevCount = ContextCreateCallbackCount; context = clCreateContextFromType(nullptr, CL_DEVICE_TYPE_GPU, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); EXPECT_EQ(ContextCreateCallbackCount, prevCount + 1); prevCount = ContextDestroyCallbackCount; retVal = clReleaseContext(context); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(ContextDestroyCallbackCount, prevCount + 1); } TEST_F(GTPinTests, givenUninitializedGTPinInterfaceThenGTPinKernelCreateCallbackIsNotCalled) { cl_kernel kernel = nullptr; cl_program pProgram = nullptr; cl_device_id device = (cl_device_id)pDevice; size_t sourceSize = 0; std::string testFile; KernelBinaryHelper kbHelper("CopyBuffer_simd16", false); testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); auto pSource = loadDataFromFile(testFile.c_str(), sourceSize); EXPECT_NE(0u, sourceSize); EXPECT_NE(nullptr, pSource); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( (cl_context)((Context *)pContext), 1, sources, &sourceSize, &retVal); ASSERT_NE(nullptr, pProgram); retVal = clBuildProgram( pProgram, 1, &device, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); int prevCount = KernelCreateCallbackCount; kernel = clCreateKernel(pProgram, "CopyBuffer", &retVal); EXPECT_NE(nullptr, kernel); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(prevCount, KernelCreateCallbackCount); // Cleanup retVal = clReleaseKernel(kernel); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenContextIsCreatedThenCorrectVersionIsSet) { gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); cl_device_id device = static_cast(pDevice); cl_context context = nullptr; context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); GFXCORE_FAMILY genFamily = pDevice->getHardwareInfo().platform.eRenderCoreFamily; GTPinHwHelper >pinHelper = GTPinHwHelper::get(genFamily); EXPECT_EQ(ULT::platformInfo.gen_version, static_cast(gtpinHelper.getGenVersion())); retVal = clReleaseContext(context); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelIsExecutedThenGTPinCallbacksAreCalled) { gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); cl_kernel kernel1 = nullptr; cl_kernel kernel2 = nullptr; cl_program pProgram = nullptr; cl_device_id device = (cl_device_id)pDevice; size_t sourceSize = 0; std::string testFile; cl_command_queue cmdQ = nullptr; cl_queue_properties properties = 0; cl_context context = nullptr; KernelBinaryHelper kbHelper("CopyBuffer_simd16", false); testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); auto pSource = loadDataFromFile(testFile.c_str(), sourceSize); EXPECT_NE(0u, sourceSize); EXPECT_NE(nullptr, pSource); context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); cmdQ = clCreateCommandQueue(context, device, properties, &retVal); ASSERT_NE(nullptr, cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( context, 1, sources, &sourceSize, &retVal); ASSERT_NE(nullptr, pProgram); retVal = clBuildProgram( pProgram, 1, &device, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); // Create and submit first instance of "CopyBuffer" kernel int prevCount11 = KernelCreateCallbackCount; kernel1 = clCreateKernel(pProgram, "CopyBuffer", &retVal); EXPECT_NE(nullptr, kernel1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(prevCount11 + 1, KernelCreateCallbackCount); MultiDeviceKernel *pMultiDeviceKernel1 = static_cast(kernel1); Kernel *pKernel1 = pMultiDeviceKernel1->getKernel(rootDeviceIndex); const KernelInfo &kInfo1 = pKernel1->getKernelInfo(); uint64_t gtpinKernelId1 = pKernel1->getKernelId(); EXPECT_EQ(kInfo1.shaderHashCode, gtpinKernelId1); constexpr size_t n = 256; auto buff10 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr); auto buff11 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr); retVal = clSetKernelArg(pMultiDeviceKernel1, 0, sizeof(cl_mem), &buff10); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArg(pMultiDeviceKernel1, 1, sizeof(cl_mem), &buff11); EXPECT_EQ(CL_SUCCESS, retVal); int prevCount12 = KernelSubmitCallbackCount; int prevCount13 = CommandBufferCreateCallbackCount; int prevCount14 = CommandBufferCompleteCallbackCount; cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {n, 1, 1}; size_t localWorkSize[3] = {1, 1, 1}; retVal = clEnqueueNDRangeKernel(cmdQ, pMultiDeviceKernel1, workDim, globalWorkOffset, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(prevCount12 + 1, KernelSubmitCallbackCount); EXPECT_EQ(prevCount13 + 1, CommandBufferCreateCallbackCount); // Create and submit second instance of "CopyBuffer" kernel int prevCount21 = KernelCreateCallbackCount; kernel2 = clCreateKernel(pProgram, "CopyBuffer", &retVal); EXPECT_NE(nullptr, kernel2); EXPECT_EQ(CL_SUCCESS, retVal); // Verify that GT-Pin Kernel Create callback is not called multiple times for the same kernel EXPECT_EQ(prevCount21, KernelCreateCallbackCount); MultiDeviceKernel *pMultiDeviceKernel2 = static_cast(kernel2); Kernel *pKernel2 = pMultiDeviceKernel2->getKernel(rootDeviceIndex); const KernelInfo &kInfo2 = pKernel2->getKernelInfo(); uint64_t gtpinKernelId2 = pKernel2->getKernelId(); EXPECT_EQ(kInfo2.shaderHashCode, gtpinKernelId2); auto buff20 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr); auto buff21 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr); retVal = clSetKernelArg(pMultiDeviceKernel2, 0, sizeof(cl_mem), &buff20); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArg(pMultiDeviceKernel2, 1, sizeof(cl_mem), &buff21); EXPECT_EQ(CL_SUCCESS, retVal); int prevCount22 = KernelSubmitCallbackCount; int prevCount23 = CommandBufferCreateCallbackCount; int prevCount24 = CommandBufferCompleteCallbackCount; retVal = clEnqueueNDRangeKernel(cmdQ, pMultiDeviceKernel2, workDim, globalWorkOffset, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(prevCount22 + 1, KernelSubmitCallbackCount); EXPECT_EQ(prevCount23 + 1, CommandBufferCreateCallbackCount); retVal = clFinish(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(prevCount14 + 2, CommandBufferCompleteCallbackCount); EXPECT_EQ(prevCount24 + 2, CommandBufferCompleteCallbackCount); // Cleanup retVal = clReleaseKernel(kernel1); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseKernel(kernel2); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); pSource.reset(); retVal = clReleaseMemObject(buff10); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(buff11); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(buff20); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(buff21); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseCommandQueue(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseContext(context); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelINTELIsExecutedThenGTPinCallbacksAreCalled) { gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); cl_kernel kernel1 = nullptr; cl_kernel kernel2 = nullptr; cl_program pProgram = nullptr; cl_device_id device = (cl_device_id)pDevice; size_t sourceSize = 0; std::string testFile; cl_command_queue cmdQ = nullptr; cl_queue_properties properties = 0; cl_context context = nullptr; KernelBinaryHelper kbHelper("CopyBuffer_simd16", false); testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); auto pSource = loadDataFromFile(testFile.c_str(), sourceSize); EXPECT_NE(0u, sourceSize); EXPECT_NE(nullptr, pSource); context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); cmdQ = clCreateCommandQueue(context, device, properties, &retVal); ASSERT_NE(nullptr, cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( context, 1, sources, &sourceSize, &retVal); ASSERT_NE(nullptr, pProgram); retVal = clBuildProgram( pProgram, 1, &device, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); // Create and submit first instance of "CopyBuffer" kernel int prevCount11 = KernelCreateCallbackCount; kernel1 = clCreateKernel(pProgram, "CopyBuffer", &retVal); EXPECT_NE(nullptr, kernel1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(prevCount11 + 1, KernelCreateCallbackCount); MultiDeviceKernel *pMultiDeviceKernel1 = static_cast(kernel1); Kernel *pKernel1 = pMultiDeviceKernel1->getKernel(rootDeviceIndex); const KernelInfo &kInfo1 = pKernel1->getKernelInfo(); uint64_t gtpinKernelId1 = pKernel1->getKernelId(); EXPECT_EQ(kInfo1.shaderHashCode, gtpinKernelId1); cl_uint workDim = 1; size_t localWorkSize[3] = {1, 1, 1}; CommandQueue *commandQueue = nullptr; WithCastToInternal(cmdQ, &commandQueue); size_t n = 100; auto buff10 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr); auto buff11 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr); retVal = clSetKernelArg(pMultiDeviceKernel1, 0, sizeof(cl_mem), &buff10); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArg(pMultiDeviceKernel1, 1, sizeof(cl_mem), &buff11); EXPECT_EQ(CL_SUCCESS, retVal); int prevCount12 = KernelSubmitCallbackCount; int prevCount13 = CommandBufferCreateCallbackCount; int prevCount14 = CommandBufferCompleteCallbackCount; size_t globalWorkOffset[3] = {0, 0, 0}; size_t workgroupCount[3] = {n, 1, 1}; retVal = clEnqueueNDCountKernelINTEL(cmdQ, pMultiDeviceKernel1, workDim, globalWorkOffset, workgroupCount, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(prevCount12 + 1, KernelSubmitCallbackCount); EXPECT_EQ(prevCount13 + 1, CommandBufferCreateCallbackCount); // Create and submit second instance of "CopyBuffer" kernel int prevCount21 = KernelCreateCallbackCount; kernel2 = clCreateKernel(pProgram, "CopyBuffer", &retVal); EXPECT_NE(nullptr, kernel2); EXPECT_EQ(CL_SUCCESS, retVal); // Verify that GT-Pin Kernel Create callback is not called multiple times for the same kernel EXPECT_EQ(prevCount21, KernelCreateCallbackCount); MultiDeviceKernel *pMultiDeviceKernel2 = static_cast(kernel2); Kernel *pKernel2 = pMultiDeviceKernel2->getKernel(rootDeviceIndex); const KernelInfo &kInfo2 = pKernel2->getKernelInfo(); uint64_t gtpinKernelId2 = pKernel2->getKernelId(); EXPECT_EQ(kInfo2.shaderHashCode, gtpinKernelId2); auto buff20 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr); auto buff21 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr); retVal = clSetKernelArg(pMultiDeviceKernel2, 0, sizeof(cl_mem), &buff20); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArg(pMultiDeviceKernel2, 1, sizeof(cl_mem), &buff21); EXPECT_EQ(CL_SUCCESS, retVal); int prevCount22 = KernelSubmitCallbackCount; int prevCount23 = CommandBufferCreateCallbackCount; int prevCount24 = CommandBufferCompleteCallbackCount; retVal = clEnqueueNDCountKernelINTEL(cmdQ, pMultiDeviceKernel2, workDim, globalWorkOffset, workgroupCount, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(prevCount22 + 1, KernelSubmitCallbackCount); EXPECT_EQ(prevCount23 + 1, CommandBufferCreateCallbackCount); retVal = clFinish(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(prevCount14 + 2, CommandBufferCompleteCallbackCount); EXPECT_EQ(prevCount24 + 2, CommandBufferCompleteCallbackCount); // Cleanup retVal = clReleaseKernel(kernel1); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseKernel(kernel2); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); pSource.reset(); retVal = clReleaseMemObject(buff10); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(buff11); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(buff20); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(buff21); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseCommandQueue(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseContext(context); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelWithoutSSHIsUsedThenKernelCreateCallbacksIsNotCalled) { gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); cl_device_id device = (cl_device_id)pDevice; cl_context context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); auto pContext = castToObject(context); auto rootDeviceIndex = pDevice->getRootDeviceIndex(); char binary[1024] = {1, 2, 3, 4, 5, 6, 7, 8, 9, '\0'}; size_t binSize = 10; MockProgram *pProgram = Program::createBuiltInFromGenBinary(pContext, pContext->getDevices(), &binary[0], binSize, &retVal); ASSERT_NE(nullptr, pProgram); EXPECT_EQ(CL_SUCCESS, retVal); PatchTokensTestData::ValidProgramWithKernel programTokens; pProgram->buildInfos[rootDeviceIndex].unpackedDeviceBinary = makeCopy(reinterpret_cast(programTokens.storage.data()), programTokens.storage.size()); pProgram->buildInfos[rootDeviceIndex].unpackedDeviceBinarySize = programTokens.storage.size(); retVal = pProgram->processGenBinary(*pContext->getDevice(0)); EXPECT_EQ(CL_SUCCESS, retVal); int prevCount = KernelCreateCallbackCount; cl_kernel kernel = clCreateKernel(pProgram, std::string(programTokens.kernels[0].name.begin(), programTokens.kernels[0].name.size()).c_str(), &retVal); EXPECT_NE(nullptr, kernel); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(prevCount, KernelCreateCallbackCount); retVal = clReleaseKernel(kernel); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseContext(context); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelWithoutSSHIsUsedThenGTPinSubmitKernelCallbackIsNotCalled) { gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); cl_kernel kernel = nullptr; cl_program pProgram = nullptr; cl_device_id device = (cl_device_id)pDevice; size_t sourceSize = 0; std::string testFile; cl_command_queue cmdQ = nullptr; cl_queue_properties properties = 0; cl_context context = nullptr; KernelBinaryHelper kbHelper("CopyBuffer_simd16", false); testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); auto pSource = loadDataFromFile(testFile.c_str(), sourceSize); EXPECT_NE(0u, sourceSize); EXPECT_NE(nullptr, pSource); context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); cmdQ = clCreateCommandQueue(context, device, properties, &retVal); ASSERT_NE(nullptr, cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( context, 1, sources, &sourceSize, &retVal); ASSERT_NE(nullptr, pProgram); retVal = clBuildProgram( pProgram, 1, &device, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); int prevCount1 = KernelCreateCallbackCount; kernel = clCreateKernel(pProgram, "CopyBuffer", &retVal); EXPECT_NE(nullptr, kernel); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(prevCount1 + 1, KernelCreateCallbackCount); MultiDeviceKernel *pMultiDeviceKernel = static_cast(kernel); Kernel *pKernel = pMultiDeviceKernel->getKernel(rootDeviceIndex); const KernelInfo &kInfo = pKernel->getKernelInfo(); uint64_t gtpinKernelId = pKernel->getKernelId(); EXPECT_EQ(kInfo.shaderHashCode, gtpinKernelId); constexpr size_t n = 256; auto buff0 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr); auto buff1 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr); retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &buff0); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArg(pMultiDeviceKernel, 1, sizeof(cl_mem), &buff1); EXPECT_EQ(CL_SUCCESS, retVal); // Verify that when SSH is removed then during kernel execution // GT-Pin Kernel Submit, Command Buffer Create and Command Buffer Complete callbacks are not called. pKernel->resizeSurfaceStateHeap(nullptr, 0, 0, 0); int prevCount2 = KernelSubmitCallbackCount; int prevCount3 = CommandBufferCreateCallbackCount; int prevCount4 = CommandBufferCompleteCallbackCount; cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {n, 1, 1}; size_t localWorkSize[3] = {1, 1, 1}; retVal = clEnqueueNDRangeKernel(cmdQ, pMultiDeviceKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(prevCount2, KernelSubmitCallbackCount); EXPECT_EQ(prevCount3, CommandBufferCreateCallbackCount); retVal = clFinish(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(prevCount4, CommandBufferCompleteCallbackCount); // Cleanup retVal = clReleaseKernel(kernel); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); pSource.reset(); retVal = clReleaseMemObject(buff0); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(buff1); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseCommandQueue(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseContext(context); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenBlockedKernelWithoutSSHIsUsedThenGTPinSubmitKernelCallbackIsNotCalled) { gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); cl_kernel kernel = nullptr; cl_program pProgram = nullptr; cl_device_id device = (cl_device_id)pDevice; size_t sourceSize = 0; std::string testFile; cl_command_queue cmdQ = nullptr; cl_queue_properties properties = 0; cl_context context = nullptr; KernelBinaryHelper kbHelper("CopyBuffer_simd16", false); testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); auto pSource = loadDataFromFile(testFile.c_str(), sourceSize); EXPECT_NE(0u, sourceSize); EXPECT_NE(nullptr, pSource); context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); cmdQ = clCreateCommandQueue(context, device, properties, &retVal); ASSERT_NE(nullptr, cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( context, 1, sources, &sourceSize, &retVal); ASSERT_NE(nullptr, pProgram); retVal = clBuildProgram( pProgram, 1, &device, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); int prevCount1 = KernelCreateCallbackCount; kernel = clCreateKernel(pProgram, "CopyBuffer", &retVal); EXPECT_NE(nullptr, kernel); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(prevCount1 + 1, KernelCreateCallbackCount); MultiDeviceKernel *pMultiDeviceKernel = static_cast(kernel); Kernel *pKernel = pMultiDeviceKernel->getKernel(rootDeviceIndex); const KernelInfo &kInfo = pKernel->getKernelInfo(); uint64_t gtpinKernelId = pKernel->getKernelId(); EXPECT_EQ(kInfo.shaderHashCode, gtpinKernelId); constexpr size_t n = 256; auto buff0 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr); auto buff1 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr); retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &buff0); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArg(pMultiDeviceKernel, 1, sizeof(cl_mem), &buff1); EXPECT_EQ(CL_SUCCESS, retVal); // Verify that when SSH is removed then during kernel execution // GT-Pin Kernel Submit, Command Buffer Create and Command Buffer Complete callbacks are not called. pKernel->resizeSurfaceStateHeap(nullptr, 0, 0, 0); cl_event userEvent = clCreateUserEvent(context, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); int prevCount2 = KernelSubmitCallbackCount; int prevCount3 = CommandBufferCreateCallbackCount; int prevCount4 = CommandBufferCompleteCallbackCount; cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {n, 1, 1}; size_t localWorkSize[3] = {1, 1, 1}; retVal = clEnqueueNDRangeKernel(cmdQ, pMultiDeviceKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, 1, &userEvent, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(prevCount2, KernelSubmitCallbackCount); EXPECT_EQ(prevCount3, CommandBufferCreateCallbackCount); retVal = clSetUserEventStatus(userEvent, CL_COMPLETE); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clFinish(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(prevCount4, CommandBufferCompleteCallbackCount); // Cleanup retVal = clReleaseKernel(kernel); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); pSource.reset(); retVal = clReleaseMemObject(buff0); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(buff1); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseEvent(userEvent); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseCommandQueue(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseContext(context); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenTheSameKerneIsExecutedTwiceThenGTPinCreateKernelCallbackIsCalledOnce) { gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); cl_kernel kernel1 = nullptr; cl_kernel kernel2 = nullptr; cl_program pProgram = nullptr; cl_device_id device = (cl_device_id)pDevice; size_t sourceSize = 0; std::string testFile; cl_command_queue cmdQ = nullptr; cl_queue_properties properties = 0; cl_context context = nullptr; KernelBinaryHelper kbHelper("CopyBuffer_simd16", false); testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); auto pSource = loadDataFromFile(testFile.c_str(), sourceSize); EXPECT_NE(0u, sourceSize); EXPECT_NE(nullptr, pSource); context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); cmdQ = clCreateCommandQueue(context, device, properties, &retVal); ASSERT_NE(nullptr, cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( context, 1, sources, &sourceSize, &retVal); ASSERT_NE(nullptr, pProgram); retVal = clBuildProgram( pProgram, 1, &device, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); // Kernel "CopyBuffer" - called for the first time int prevCount11 = KernelCreateCallbackCount; kernel1 = clCreateKernel(pProgram, "CopyBuffer", &retVal); EXPECT_NE(nullptr, kernel1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(prevCount11 + 1, KernelCreateCallbackCount); MultiDeviceKernel *pMultiDeviceKernel1 = static_cast(kernel1); Kernel *pKernel1 = pMultiDeviceKernel1->getKernel(rootDeviceIndex); const KernelInfo &kInfo1 = pKernel1->getKernelInfo(); uint64_t gtpinKernelId1 = pKernel1->getKernelId(); EXPECT_EQ(kInfo1.shaderHashCode, gtpinKernelId1); constexpr size_t n = 256; auto buff10 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr); auto buff11 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr); retVal = clSetKernelArg(pMultiDeviceKernel1, 0, sizeof(cl_mem), &buff10); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArg(pMultiDeviceKernel1, 1, sizeof(cl_mem), &buff11); EXPECT_EQ(CL_SUCCESS, retVal); cl_event userEvent = clCreateUserEvent(context, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); int prevCount12 = KernelSubmitCallbackCount; int prevCount13 = CommandBufferCreateCallbackCount; int prevCount14 = CommandBufferCompleteCallbackCount; cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {n, 1, 1}; size_t localWorkSize[3] = {1, 1, 1}; retVal = clEnqueueNDRangeKernel(cmdQ, pMultiDeviceKernel1, workDim, globalWorkOffset, globalWorkSize, localWorkSize, 1, &userEvent, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(prevCount12 + 1, KernelSubmitCallbackCount); EXPECT_EQ(prevCount13 + 1, CommandBufferCreateCallbackCount); EXPECT_EQ(prevCount14, CommandBufferCompleteCallbackCount); // The same kernel "CopyBuffer" - called second time int prevCount21 = KernelCreateCallbackCount; kernel2 = clCreateKernel(pProgram, "CopyBuffer", &retVal); EXPECT_NE(nullptr, kernel2); EXPECT_EQ(CL_SUCCESS, retVal); // Verify that Kernel Create callback was not called now EXPECT_EQ(prevCount21, KernelCreateCallbackCount); MultiDeviceKernel *pMultiDeviceKernel2 = static_cast(kernel2); Kernel *pKernel2 = pMultiDeviceKernel2->getKernel(rootDeviceIndex); const KernelInfo &kInfo2 = pKernel2->getKernelInfo(); uint64_t gtpinKernelId2 = pKernel2->getKernelId(); EXPECT_EQ(kInfo2.shaderHashCode, gtpinKernelId2); auto buff20 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr); auto buff21 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr); retVal = clSetKernelArg(pMultiDeviceKernel2, 0, sizeof(cl_mem), &buff20); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArg(pMultiDeviceKernel2, 1, sizeof(cl_mem), &buff21); EXPECT_EQ(CL_SUCCESS, retVal); int prevCount22 = KernelSubmitCallbackCount; int prevCount23 = CommandBufferCreateCallbackCount; int prevCount24 = CommandBufferCompleteCallbackCount; EXPECT_EQ(prevCount14, prevCount24); retVal = clEnqueueNDRangeKernel(cmdQ, pMultiDeviceKernel2, workDim, globalWorkOffset, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(prevCount22 + 1, KernelSubmitCallbackCount); EXPECT_EQ(prevCount23 + 1, CommandBufferCreateCallbackCount); EXPECT_EQ(prevCount14, CommandBufferCompleteCallbackCount); EXPECT_EQ(prevCount24, CommandBufferCompleteCallbackCount); EXPECT_EQ(prevCount14, prevCount24); clSetUserEventStatus(userEvent, CL_COMPLETE); retVal = clFinish(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); // Verify that both kernel instances were completed EXPECT_EQ(prevCount14 + 2, CommandBufferCompleteCallbackCount); EXPECT_EQ(prevCount24 + 2, CommandBufferCompleteCallbackCount); // Cleanup retVal = clReleaseKernel(kernel1); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseKernel(kernel2); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); pSource.reset(); retVal = clReleaseMemObject(buff10); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(buff11); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(buff20); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(buff21); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseCommandQueue(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseEvent(userEvent); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseContext(context); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(GTPinTests, givenMultipleKernelSubmissionsWhenOneOfGtpinSurfacesIsNullThenOnlyNonNullSurfacesAreMadeResident) { gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); cl_kernel kernel1 = nullptr; cl_program pProgram = nullptr; cl_device_id device = (cl_device_id)pDevice; size_t sourceSize = 0; std::string testFile; cl_command_queue cmdQ = nullptr; cl_queue_properties properties = 0; cl_context context = nullptr; KernelBinaryHelper kbHelper("CopyBuffer_simd16", false); testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); auto pSource = loadDataFromFile(testFile.c_str(), sourceSize); EXPECT_NE(0u, sourceSize); EXPECT_NE(nullptr, pSource); context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); cmdQ = clCreateCommandQueue(context, device, properties, &retVal); ASSERT_NE(nullptr, cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( context, 1, sources, &sourceSize, &retVal); ASSERT_NE(nullptr, pProgram); retVal = clBuildProgram( pProgram, 1, &device, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); kernel1 = clCreateKernel(pProgram, "CopyBuffer", &retVal); EXPECT_NE(nullptr, kernel1); EXPECT_EQ(CL_SUCCESS, retVal); MultiDeviceKernel *pMultiDeviceKernel1 = static_cast(kernel1); Kernel *pKernel1 = pMultiDeviceKernel1->getKernel(rootDeviceIndex); returnNullResource = true; auto pCmdQueue = castToObject(cmdQ); gtpinNotifyKernelSubmit(pMultiDeviceKernel1, pCmdQueue); EXPECT_EQ(nullptr, kernelExecQueue[0].gtpinResource); CommandStreamReceiver &csr = pCmdQueue->getGpgpuCommandStreamReceiver(); gtpinNotifyMakeResident(pKernel1, &csr); EXPECT_FALSE(kernelExecQueue[0].isResourceResident); std::vector residencyVector; gtpinNotifyUpdateResidencyList(pKernel1, &residencyVector); EXPECT_EQ(0u, residencyVector.size()); returnNullResource = false; gtpinNotifyKernelSubmit(pMultiDeviceKernel1, pCmdQueue); EXPECT_NE(nullptr, kernelExecQueue[1].gtpinResource); gtpinNotifyMakeResident(pKernel1, &csr); EXPECT_TRUE(kernelExecQueue[1].isResourceResident); cl_mem gtpinBuffer1 = kernelExecQueue[1].gtpinResource; gtpinNotifyKernelSubmit(pMultiDeviceKernel1, pCmdQueue); EXPECT_NE(nullptr, kernelExecQueue[2].gtpinResource); gtpinNotifyUpdateResidencyList(pKernel1, &residencyVector); EXPECT_EQ(1u, residencyVector.size()); EXPECT_TRUE(kernelExecQueue[2].isResourceResident); EXPECT_FALSE(kernelExecQueue[0].isResourceResident); GeneralSurface *pSurf = static_cast(residencyVector[0]); delete pSurf; residencyVector.clear(); cl_mem gtpinBuffer2 = kernelExecQueue[2].gtpinResource; gtpinUnmapBuffer(reinterpret_cast(context), reinterpret_cast(gtpinBuffer1)); gtpinFreeBuffer(reinterpret_cast(context), reinterpret_cast(gtpinBuffer1)); gtpinUnmapBuffer(reinterpret_cast(context), reinterpret_cast(gtpinBuffer2)); gtpinFreeBuffer(reinterpret_cast(context), reinterpret_cast(gtpinBuffer2)); retVal = clFinish(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); // Cleanup returnNullResource = false; kernelResources.clear(); retVal = clReleaseKernel(kernel1); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); pSource.reset(); retVal = clReleaseCommandQueue(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseContext(context); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelIsCreatedThenAllKernelSubmitRelatedNotificationsAreCalled) { gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); kernelExecQueue.clear(); cl_kernel kernel = nullptr; cl_program pProgram = nullptr; cl_device_id device = (cl_device_id)pDevice; size_t sourceSize = 0; std::string testFile; cl_command_queue cmdQ = nullptr; cl_queue_properties properties = 0; cl_context context = nullptr; KernelBinaryHelper kbHelper("CopyBuffer_simd16", false); testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); auto pSource = loadDataFromFile(testFile.c_str(), sourceSize); EXPECT_NE(0u, sourceSize); EXPECT_NE(nullptr, pSource); context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); cmdQ = clCreateCommandQueue(context, device, properties, &retVal); ASSERT_NE(nullptr, cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( context, 1, sources, &sourceSize, &retVal); ASSERT_NE(nullptr, pProgram); retVal = clBuildProgram( pProgram, 1, &device, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); // Create kernel int prevCount1 = KernelCreateCallbackCount; kernel = clCreateKernel(pProgram, "CopyBuffer", &retVal); ASSERT_NE(nullptr, kernel); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(prevCount1 + 1, KernelCreateCallbackCount); // Simulate that created kernel was sent for execution auto pMultiDeviceKernel = castToObject(kernel); auto pKernel = pMultiDeviceKernel->getKernel(rootDeviceIndex); auto pCmdQueue = castToObject(cmdQ); ASSERT_NE(nullptr, pKernel); EXPECT_EQ(0u, kernelExecQueue.size()); EXPECT_EQ(0u, kernelResources.size()); int prevCount2 = CommandBufferCreateCallbackCount; int prevCount3 = KernelSubmitCallbackCount; gtpinNotifyKernelSubmit(kernel, pCmdQueue); EXPECT_EQ(prevCount2 + 1, CommandBufferCreateCallbackCount); EXPECT_EQ(prevCount3 + 1, KernelSubmitCallbackCount); EXPECT_EQ(1u, kernelExecQueue.size()); EXPECT_EQ(1u, kernelResources.size()); EXPECT_EQ(pKernel, kernelExecQueue[0].pKernel); EXPECT_EQ(kernelResources[0], (resource_handle_t)kernelExecQueue[0].gtpinResource); EXPECT_EQ(pCmdQueue, kernelExecQueue[0].pCommandQueue); EXPECT_FALSE(kernelExecQueue[0].isTaskCountValid); EXPECT_FALSE(kernelExecQueue[0].isResourceResident); // Verify that if kernel unknown to GT-Pin is about to be flushed // then its residency vector does not obtain GT-Pin resource std::vector residencyVector; EXPECT_EQ(0u, residencyVector.size()); gtpinNotifyUpdateResidencyList(nullptr, &residencyVector); EXPECT_EQ(0u, residencyVector.size()); EXPECT_FALSE(kernelExecQueue[0].isResourceResident); // Verify that if kernel known to GT-Pin is about to be flushed // then its residency vector obtains GT-Pin resource EXPECT_EQ(0u, residencyVector.size()); gtpinNotifyUpdateResidencyList(pKernel, &residencyVector); EXPECT_EQ(1u, residencyVector.size()); GeneralSurface *pSurf = (GeneralSurface *)residencyVector[0]; delete pSurf; residencyVector.clear(); EXPECT_TRUE(kernelExecQueue[0].isResourceResident); kernelExecQueue[0].isResourceResident = false; // Create second kernel ... cl_kernel kernel2 = clCreateKernel(pProgram, "CopyBuffer", &retVal); ASSERT_NE(nullptr, kernel2); EXPECT_EQ(CL_SUCCESS, retVal); // ... and simulate that it was sent for execution auto pMultiDeviceKernel2 = castToObject(kernel2); auto pKernel2 = pMultiDeviceKernel2->getKernel(rootDeviceIndex); ASSERT_NE(nullptr, pKernel2); EXPECT_EQ(1u, kernelExecQueue.size()); EXPECT_EQ(1u, kernelResources.size()); int prevCount22 = CommandBufferCreateCallbackCount; int prevCount23 = KernelSubmitCallbackCount; gtpinNotifyKernelSubmit(kernel2, pCmdQueue); EXPECT_EQ(prevCount22 + 1, CommandBufferCreateCallbackCount); EXPECT_EQ(prevCount23 + 1, KernelSubmitCallbackCount); EXPECT_EQ(2u, kernelExecQueue.size()); EXPECT_EQ(2u, kernelResources.size()); EXPECT_EQ(pKernel2, kernelExecQueue[1].pKernel); EXPECT_EQ(kernelResources[1], (resource_handle_t)kernelExecQueue[1].gtpinResource); EXPECT_EQ(pCmdQueue, kernelExecQueue[1].pCommandQueue); EXPECT_FALSE(kernelExecQueue[1].isTaskCountValid); EXPECT_FALSE(kernelExecQueue[1].isResourceResident); // Verify that correct GT-Pin resource is made resident cl_mem gtpinBuffer0 = kernelExecQueue[0].gtpinResource; auto pBuffer0 = castToObject(gtpinBuffer0); GraphicsAllocation *pGfxAlloc0 = pBuffer0->getGraphicsAllocation(pDevice->getRootDeviceIndex()); cl_mem gtpinBuffer1 = kernelExecQueue[1].gtpinResource; auto pBuffer1 = castToObject(gtpinBuffer1); GraphicsAllocation *pGfxAlloc1 = pBuffer1->getGraphicsAllocation(pDevice->getRootDeviceIndex()); CommandStreamReceiver &csr = pCmdQueue->getGpgpuCommandStreamReceiver(); EXPECT_FALSE(pGfxAlloc0->isResident(csr.getOsContext().getContextId())); EXPECT_FALSE(pGfxAlloc1->isResident(csr.getOsContext().getContextId())); gtpinNotifyMakeResident(pKernel, &csr); EXPECT_TRUE(pGfxAlloc0->isResident(csr.getOsContext().getContextId())); EXPECT_FALSE(pGfxAlloc1->isResident(csr.getOsContext().getContextId())); // Cancel information about second submitted kernel kernelExecQueue.pop_back(); EXPECT_EQ(1u, kernelExecQueue.size()); kernelResources.pop_back(); EXPECT_EQ(1u, kernelResources.size()); gtpinUnmapBuffer((context_handle_t)context, (resource_handle_t)gtpinBuffer1); gtpinFreeBuffer((context_handle_t)context, (resource_handle_t)gtpinBuffer1); retVal = clReleaseKernel(kernel2); EXPECT_EQ(CL_SUCCESS, retVal); // Verify that if flush occurs on another queue then our kernel is not flushed to CSR uint32_t taskCount = 11; gtpinNotifyPreFlushTask(nullptr); EXPECT_EQ(1u, kernelExecQueue.size()); EXPECT_FALSE(kernelExecQueue[0].isTaskCountValid); gtpinNotifyFlushTask(taskCount); EXPECT_EQ(1u, kernelExecQueue.size()); EXPECT_FALSE(kernelExecQueue[0].isTaskCountValid); // Verify that if flush occurs on current queue then our kernel is flushed to CSR gtpinNotifyPreFlushTask(pCmdQueue); EXPECT_EQ(1u, kernelExecQueue.size()); EXPECT_FALSE(kernelExecQueue[0].isTaskCountValid); gtpinNotifyFlushTask(taskCount); EXPECT_EQ(1u, kernelExecQueue.size()); EXPECT_TRUE(kernelExecQueue[0].isTaskCountValid); EXPECT_EQ(taskCount, kernelExecQueue[0].taskCount); // Verify that if previous task was completed then it does not affect our kernel uint32_t taskCompleted = taskCount - 1; int prevCount4 = CommandBufferCompleteCallbackCount; gtpinNotifyTaskCompletion(taskCompleted); EXPECT_EQ(1u, kernelExecQueue.size()); EXPECT_EQ(1u, kernelResources.size()); EXPECT_EQ(prevCount4, CommandBufferCompleteCallbackCount); // Verify that if current task was completed then it is our kernel gtpinNotifyTaskCompletion(taskCompleted + 1); EXPECT_EQ(0u, kernelExecQueue.size()); EXPECT_EQ(0u, kernelResources.size()); EXPECT_EQ(prevCount4 + 1, CommandBufferCompleteCallbackCount); // Cleanup retVal = clReleaseKernel(kernel); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); pSource.reset(); retVal = clReleaseCommandQueue(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseContext(context); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenOneKernelIsSubmittedSeveralTimesThenCorrectBuffersAreMadeResident) { gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); kernelExecQueue.clear(); cl_kernel kernel = nullptr; cl_program pProgram = nullptr; cl_device_id device = (cl_device_id)pDevice; size_t sourceSize = 0; std::string testFile; cl_command_queue cmdQ = nullptr; cl_queue_properties properties = 0; cl_context context = nullptr; KernelBinaryHelper kbHelper("CopyBuffer_simd16", false); testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); auto pSource = loadDataFromFile(testFile.c_str(), sourceSize); EXPECT_NE(0u, sourceSize); EXPECT_NE(nullptr, pSource); context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); cmdQ = clCreateCommandQueue(context, device, properties, &retVal); ASSERT_NE(nullptr, cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( context, 1, sources, &sourceSize, &retVal); ASSERT_NE(nullptr, pProgram); retVal = clBuildProgram( pProgram, 1, &device, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); // Create kernel int prevCount1 = KernelCreateCallbackCount; kernel = clCreateKernel(pProgram, "CopyBuffer", &retVal); ASSERT_NE(nullptr, kernel); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(prevCount1 + 1, KernelCreateCallbackCount); // Simulate that created kernel was sent for execution two times in a row auto pMultiDeviceKernel = castToObject(kernel); auto pKernel = pMultiDeviceKernel->getKernel(rootDeviceIndex); auto pCmdQueue = castToObject(cmdQ); ASSERT_NE(nullptr, pKernel); EXPECT_EQ(0u, kernelExecQueue.size()); EXPECT_EQ(0u, kernelResources.size()); int prevCount2 = CommandBufferCreateCallbackCount; int prevCount3 = KernelSubmitCallbackCount; // First kernel submission gtpinNotifyKernelSubmit(kernel, pCmdQueue); EXPECT_EQ(prevCount2 + 1, CommandBufferCreateCallbackCount); EXPECT_EQ(prevCount3 + 1, KernelSubmitCallbackCount); EXPECT_EQ(1u, kernelExecQueue.size()); EXPECT_EQ(1u, kernelResources.size()); EXPECT_EQ(pKernel, kernelExecQueue[0].pKernel); EXPECT_EQ(kernelResources[0], (resource_handle_t)kernelExecQueue[0].gtpinResource); EXPECT_EQ(pCmdQueue, kernelExecQueue[0].pCommandQueue); EXPECT_FALSE(kernelExecQueue[0].isTaskCountValid); EXPECT_FALSE(kernelExecQueue[0].isResourceResident); // Second kernel submission gtpinNotifyKernelSubmit(kernel, pCmdQueue); EXPECT_EQ(prevCount2 + 2, CommandBufferCreateCallbackCount); EXPECT_EQ(prevCount3 + 2, KernelSubmitCallbackCount); EXPECT_EQ(2u, kernelExecQueue.size()); EXPECT_EQ(2u, kernelResources.size()); EXPECT_EQ(pKernel, kernelExecQueue[0].pKernel); EXPECT_EQ(kernelResources[0], (resource_handle_t)kernelExecQueue[0].gtpinResource); EXPECT_EQ(pCmdQueue, kernelExecQueue[0].pCommandQueue); EXPECT_FALSE(kernelExecQueue[0].isTaskCountValid); EXPECT_FALSE(kernelExecQueue[0].isResourceResident); EXPECT_EQ(pKernel, kernelExecQueue[1].pKernel); EXPECT_EQ(kernelResources[1], (resource_handle_t)kernelExecQueue[1].gtpinResource); EXPECT_EQ(pCmdQueue, kernelExecQueue[1].pCommandQueue); EXPECT_FALSE(kernelExecQueue[1].isTaskCountValid); EXPECT_FALSE(kernelExecQueue[1].isResourceResident); // Verify that correct GT-Pin resource is made resident. // This simulates enqueuing non-blocked kernels cl_mem gtpinBuffer0 = kernelExecQueue[0].gtpinResource; auto pBuffer0 = castToObject(gtpinBuffer0); GraphicsAllocation *pGfxAlloc0 = pBuffer0->getGraphicsAllocation(pDevice->getRootDeviceIndex()); cl_mem gtpinBuffer1 = kernelExecQueue[1].gtpinResource; auto pBuffer1 = castToObject(gtpinBuffer1); GraphicsAllocation *pGfxAlloc1 = pBuffer1->getGraphicsAllocation(pDevice->getRootDeviceIndex()); CommandStreamReceiver &csr = pCmdQueue->getGpgpuCommandStreamReceiver(); // Make resident resource of first submitted kernel EXPECT_FALSE(pGfxAlloc0->isResident(csr.getOsContext().getContextId())); EXPECT_FALSE(pGfxAlloc1->isResident(csr.getOsContext().getContextId())); gtpinNotifyMakeResident(pKernel, &csr); EXPECT_TRUE(pGfxAlloc0->isResident(csr.getOsContext().getContextId())); EXPECT_FALSE(pGfxAlloc1->isResident(csr.getOsContext().getContextId())); // Make resident resource of second submitted kernel gtpinNotifyMakeResident(pKernel, &csr); EXPECT_TRUE(pGfxAlloc0->isResident(csr.getOsContext().getContextId())); EXPECT_TRUE(pGfxAlloc1->isResident(csr.getOsContext().getContextId())); // Verify that correct GT-Pin resource is added to residency list. // This simulates enqueuing blocked kernels kernelExecQueue[0].isResourceResident = false; kernelExecQueue[1].isResourceResident = false; pGfxAlloc0->releaseResidencyInOsContext(csr.getOsContext().getContextId()); pGfxAlloc1->releaseResidencyInOsContext(csr.getOsContext().getContextId()); EXPECT_FALSE(pGfxAlloc0->isResident(csr.getOsContext().getContextId())); EXPECT_FALSE(pGfxAlloc1->isResident(csr.getOsContext().getContextId())); std::vector residencyVector; EXPECT_EQ(0u, residencyVector.size()); // Add to residency list resource of first submitted kernel gtpinNotifyUpdateResidencyList(pKernel, &residencyVector); EXPECT_EQ(1u, residencyVector.size()); // Make resident first resource on residency list GeneralSurface *pSurf1 = (GeneralSurface *)residencyVector[0]; pSurf1->makeResident(csr); EXPECT_TRUE(pGfxAlloc0->isResident(csr.getOsContext().getContextId())); EXPECT_FALSE(pGfxAlloc1->isResident(csr.getOsContext().getContextId())); // Add to residency list resource of second submitted kernel gtpinNotifyUpdateResidencyList(pKernel, &residencyVector); EXPECT_EQ(2u, residencyVector.size()); // Make resident second resource on residency list GeneralSurface *pSurf2 = (GeneralSurface *)residencyVector[1]; pSurf2->makeResident(csr); EXPECT_TRUE(pGfxAlloc0->isResident(csr.getOsContext().getContextId())); EXPECT_TRUE(pGfxAlloc1->isResident(csr.getOsContext().getContextId())); // Cleanup delete pSurf1; delete pSurf2; residencyVector.clear(); kernelExecQueue.pop_back(); EXPECT_EQ(1u, kernelExecQueue.size()); kernelResources.pop_back(); EXPECT_EQ(1u, kernelResources.size()); gtpinUnmapBuffer((context_handle_t)context, (resource_handle_t)gtpinBuffer1); gtpinFreeBuffer((context_handle_t)context, (resource_handle_t)gtpinBuffer1); kernelExecQueue.pop_back(); EXPECT_EQ(0u, kernelExecQueue.size()); kernelResources.pop_back(); EXPECT_EQ(0u, kernelResources.size()); gtpinUnmapBuffer((context_handle_t)context, (resource_handle_t)gtpinBuffer0); gtpinFreeBuffer((context_handle_t)context, (resource_handle_t)gtpinBuffer0); retVal = clReleaseKernel(kernel); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); pSource.reset(); retVal = clReleaseCommandQueue(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseContext(context); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenLowMemoryConditionOccursThenKernelCreationFails) { InjectedFunction allocBufferFunc = [this](size_t failureIndex) { cl_device_id device = (cl_device_id)pDevice; cl_context context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); auto pContext = castToObject(context); char binary[1024] = {1, 2, 3, 4, 5, 6, 7, 8, 9, '\0'}; size_t binSize = 10; MockProgram *pProgram = Program::createBuiltInFromGenBinary(pContext, pContext->getDevices(), &binary[0], binSize, &retVal); ASSERT_NE(nullptr, pProgram); EXPECT_EQ(CL_SUCCESS, retVal); PatchTokensTestData::ValidProgramWithKernel programTokens; auto rootDeviceIndex = pDevice->getRootDeviceIndex(); pProgram->buildInfos[rootDeviceIndex].unpackedDeviceBinary = makeCopy(programTokens.storage.data(), programTokens.storage.size()); pProgram->buildInfos[rootDeviceIndex].unpackedDeviceBinarySize = programTokens.storage.size(); retVal = pProgram->processGenBinary(*pDevice); if (retVal == CL_OUT_OF_HOST_MEMORY) { auto nonFailingAlloc = MemoryManagement::nonfailingAllocation; EXPECT_NE(nonFailingAlloc, failureIndex); } else { EXPECT_EQ(CL_SUCCESS, retVal); // Create kernels from program cl_kernel kernels[2] = {0}; cl_uint numCreatedKernels = 0; if (MemoryManagement::nonfailingAllocation != failureIndex) { memoryManager->failAllAllocationsInDevicePool = true; } retVal = clCreateKernelsInProgram(pProgram, 2, kernels, &numCreatedKernels); if (MemoryManagement::nonfailingAllocation != failureIndex) { if (retVal != CL_SUCCESS) { EXPECT_EQ(nullptr, kernels[0]); EXPECT_EQ(1u, numCreatedKernels); } clReleaseKernel(kernels[0]); } else { EXPECT_NE(nullptr, kernels[0]); EXPECT_EQ(1u, numCreatedKernels); clReleaseKernel(kernels[0]); } } clReleaseProgram(pProgram); clReleaseContext(context); }; gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); ASSERT_EQ(&NEO::gtpinCreateBuffer, driverServices.bufferAllocate); ASSERT_EQ(&NEO::gtpinFreeBuffer, driverServices.bufferDeallocate); EXPECT_EQ(&NEO::gtpinMapBuffer, driverServices.bufferMap); EXPECT_EQ(&NEO::gtpinUnmapBuffer, driverServices.bufferUnMap); injectFailures(allocBufferFunc); } TEST_F(GTPinTests, givenKernelWithSSHThenVerifyThatSSHResizeWorksWell) { cl_kernel kernel = nullptr; cl_program pProgram = nullptr; cl_device_id device = (cl_device_id)pDevice; size_t sourceSize = 0; std::string testFile; cl_context context = nullptr; KernelBinaryHelper kbHelper("CopyBuffer_simd16", false); testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); auto pSource = loadDataFromFile(testFile.c_str(), sourceSize); EXPECT_NE(0u, sourceSize); EXPECT_NE(nullptr, pSource); context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( context, 1, sources, &sourceSize, &retVal); ASSERT_NE(nullptr, pProgram); retVal = clBuildProgram( pProgram, 1, &device, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); // Create kernel kernel = clCreateKernel(pProgram, "CopyBuffer", &retVal); ASSERT_NE(nullptr, kernel); EXPECT_EQ(CL_SUCCESS, retVal); auto pMultiDeviceKernel = castToObject(kernel); auto pKernel = pMultiDeviceKernel->getKernel(rootDeviceIndex); ASSERT_NE(nullptr, pKernel); size_t numBTS1 = pKernel->getNumberOfBindingTableStates(); EXPECT_LE(2u, numBTS1); size_t sizeSurfaceStates1 = pKernel->getSurfaceStateHeapSize(); EXPECT_NE(0u, sizeSurfaceStates1); size_t offsetBTS1 = pKernel->getBindingTableOffset(); EXPECT_NE(0u, offsetBTS1); GFXCORE_FAMILY genFamily = pDevice->getHardwareInfo().platform.eRenderCoreFamily; GTPinHwHelper >pinHelper = GTPinHwHelper::get(genFamily); void *pSS1 = gtpinHelper.getSurfaceState(pKernel, 0); EXPECT_NE(nullptr, pSS1); // Enlarge SSH by one SURFACE STATE element bool surfaceAdded = gtpinHelper.addSurfaceState(pKernel); EXPECT_TRUE(surfaceAdded); size_t numBTS2 = pKernel->getNumberOfBindingTableStates(); EXPECT_EQ(numBTS1 + 1, numBTS2); size_t sizeSurfaceStates2 = pKernel->getSurfaceStateHeapSize(); EXPECT_GT(sizeSurfaceStates2, sizeSurfaceStates1); size_t offsetBTS2 = pKernel->getBindingTableOffset(); EXPECT_GT(offsetBTS2, offsetBTS1); void *pSS2 = gtpinHelper.getSurfaceState(pKernel, 0); EXPECT_NE(pSS2, pSS1); pSS2 = gtpinHelper.getSurfaceState(pKernel, numBTS2); EXPECT_EQ(nullptr, pSS2); // Remove kernel's SSH pKernel->resizeSurfaceStateHeap(nullptr, 0, 0, 0); // Try to enlarge SSH once again, this time the operation must fail surfaceAdded = gtpinHelper.addSurfaceState(pKernel); EXPECT_FALSE(surfaceAdded); size_t numBTS3 = pKernel->getNumberOfBindingTableStates(); EXPECT_EQ(0u, numBTS3); size_t sizeSurfaceStates3 = pKernel->getSurfaceStateHeapSize(); EXPECT_EQ(0u, sizeSurfaceStates3); size_t offsetBTS3 = pKernel->getBindingTableOffset(); EXPECT_EQ(0u, offsetBTS3); void *pSS3 = gtpinHelper.getSurfaceState(pKernel, 0); EXPECT_EQ(nullptr, pSS3); // Cleanup retVal = clReleaseKernel(kernel); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseContext(context); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(GTPinTests, givenKernelThenVerifyThatKernelCodeSubstitutionWorksWell) { cl_kernel kernel = nullptr; cl_program pProgram = nullptr; cl_device_id device = (cl_device_id)pDevice; size_t sourceSize = 0; std::string testFile; cl_context context = nullptr; KernelBinaryHelper kbHelper("CopyBuffer_simd16", false); testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); auto pSource = loadDataFromFile(testFile.c_str(), sourceSize); EXPECT_NE(0u, sourceSize); EXPECT_NE(nullptr, pSource); context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( context, 1, sources, &sourceSize, &retVal); ASSERT_NE(nullptr, pProgram); retVal = clBuildProgram( pProgram, 1, &device, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); // Create kernel kernel = clCreateKernel(pProgram, "CopyBuffer", &retVal); ASSERT_NE(nullptr, kernel); EXPECT_EQ(CL_SUCCESS, retVal); auto pMultiDeviceKernel = castToObject(kernel); auto pKernel = pMultiDeviceKernel->getKernel(rootDeviceIndex); ASSERT_NE(nullptr, pKernel); bool isKernelCodeSubstituted = pKernel->isKernelHeapSubstituted(); EXPECT_FALSE(isKernelCodeSubstituted); // Substitute new kernel code constexpr size_t newCodeSize = 64; uint8_t newCode[newCodeSize] = {0x0, 0x1, 0x2, 0x3, 0x4}; pKernel->substituteKernelHeap(&newCode[0], newCodeSize); // Verify that substitution went properly isKernelCodeSubstituted = pKernel->isKernelHeapSubstituted(); EXPECT_TRUE(isKernelCodeSubstituted); uint8_t *pBin2 = reinterpret_cast(const_cast(pKernel->getKernelHeap())); EXPECT_EQ(pBin2, &newCode[0]); auto kernelIsa = pKernel->getKernelInfo().kernelAllocation->getUnderlyingBuffer(); EXPECT_EQ(0, memcmp(kernelIsa, newCode, newCodeSize)); // Cleanup retVal = clReleaseKernel(kernel); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseContext(context); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(GTPinTests, WhenGettingGtPinHwHelperThenValidPointerIsReturned) { GFXCORE_FAMILY genFamily = pDevice->getHardwareInfo().platform.eRenderCoreFamily; GTPinHwHelper *pGTPinHelper = >PinHwHelper::get(genFamily); EXPECT_NE(nullptr, pGTPinHelper); } TEST(GTPinOfflineTests, givenGtPinInDisabledStateWhenCallbacksFromEnqueuePathAreCalledThenNothingHappens) { ASSERT_FALSE(gtpinIsGTPinInitialized()); auto dummyKernel = reinterpret_cast(0x1000); auto dummyQueue = reinterpret_cast(0x1000); uint32_t dummyCompletedTask = 0u; //now call gtpin function with dummy data, this must not crash gtpinNotifyKernelSubmit(dummyKernel, dummyQueue); gtpinNotifyPreFlushTask(dummyQueue); gtpinNotifyTaskCompletion(dummyCompletedTask); gtpinNotifyFlushTask(dummyCompletedTask); EXPECT_FALSE(gtpinIsGTPinInitialized()); } TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenOnKernelSubitIsCalledThenCorrectOffsetisSetInKernel) { gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); VariableBackup returnNullResourceBckp(&returnNullResource); VariableBackup kernelOffsetBckp(&kernelOffset); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); char surfaceStateHeap[0x80]; std::unique_ptr context(new MockContext(pDevice)); EXPECT_EQ(CL_SUCCESS, retVal); auto pKernelInfo = std::make_unique(); pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->heapInfo.SurfaceStateHeapSize = sizeof(surfaceStateHeap); auto pProgramm = std::make_unique(context.get(), false, toClDeviceVector(*pDevice)); std::unique_ptr cmdQ(new MockCommandQueue(context.get(), pDevice, nullptr, false)); std::unique_ptr pMultiDeviceKernel(MockMultiDeviceKernel::create(pProgramm.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex))); auto pKernel = static_cast(pMultiDeviceKernel->getKernel(rootDeviceIndex)); pKernel->setSshLocal(nullptr, sizeof(surfaceStateHeap)); kernelOffset = 0x1234; EXPECT_NE(pKernel->getStartOffset(), kernelOffset); returnNullResource = true; cl_context ctxt = context.get(); currContext = (gtpin::context_handle_t)ctxt; gtpinNotifyKernelSubmit(pMultiDeviceKernel.get(), cmdQ.get()); EXPECT_EQ(pKernel->getStartOffset(), kernelOffset); EXPECT_EQ(CL_SUCCESS, retVal); kernelResources.clear(); } TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenOnContextCreateIsCalledThenGtpinInitIsSet) { gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); auto context = std::make_unique(); gtpinNotifyContextCreate(context.get()); EXPECT_NE(gtpinGetIgcInit(), nullptr); } TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenOnKernelCreateIsCalledWithNullptrThenCallIsIgnored) { gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); auto prevCreateCount = KernelCreateCallbackCount; gtpinNotifyKernelCreate(nullptr); EXPECT_EQ(prevCreateCount, KernelCreateCallbackCount); } TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelDoesNotHaveDebugDataThenPassNullPtrToOnKernelCreate) { static void *debugDataPtr = nullptr; static size_t debugDataSize = 0; gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = [](context_handle_t context, const instrument_params_in_t *paramsIn, instrument_params_out_t *paramsOut) { paramsOut->inst_kernel_binary = const_cast(paramsIn->orig_kernel_binary); paramsOut->inst_kernel_size = paramsIn->orig_kernel_size; paramsOut->kernel_id = paramsIn->igc_hash_id; debugDataPtr = const_cast(paramsIn->debug_data); debugDataSize = paramsIn->debug_data_size; }; gtpinCallbacks.onKernelSubmit = [](command_buffer_handle_t cb, uint64_t kernelId, uint32_t *entryOffset, resource_handle_t *resource) {}; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); MockKernelWithInternals mockKernel(*pDevice); mockKernel.kernelInfo.kernelDescriptor.external.debugData.reset(); mockKernel.kernelInfo.createKernelAllocation(pDevice->getDevice(), false); gtpinNotifyKernelCreate(static_cast(mockKernel.mockKernel->getMultiDeviceKernel())); EXPECT_EQ(debugDataPtr, nullptr); EXPECT_EQ(debugDataSize, 0u); pDevice->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.kernelAllocation); } TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelHasDebugDataThenCorrectDebugDataIsSet) { static void *debugDataPtr = nullptr; static size_t debugDataSize = 0; void *dummyDebugData = reinterpret_cast(0x123456); size_t dummyDebugDataSize = 0x2245; gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = [](context_handle_t context, const instrument_params_in_t *paramsIn, instrument_params_out_t *paramsOut) { paramsOut->inst_kernel_binary = const_cast(paramsIn->orig_kernel_binary); paramsOut->inst_kernel_size = paramsIn->orig_kernel_size; paramsOut->kernel_id = paramsIn->igc_hash_id; debugDataPtr = const_cast(paramsIn->debug_data); debugDataSize = paramsIn->debug_data_size; }; gtpinCallbacks.onKernelSubmit = [](command_buffer_handle_t cb, uint64_t kernelId, uint32_t *entryOffset, resource_handle_t *resource) {}; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); MockKernelWithInternals mockKernel(*pDevice); mockKernel.kernelInfo.kernelDescriptor.external.debugData.reset(new DebugData()); mockKernel.kernelInfo.debugData.vIsa = reinterpret_cast(dummyDebugData); mockKernel.kernelInfo.debugData.vIsaSize = static_cast(dummyDebugDataSize); mockKernel.kernelInfo.createKernelAllocation(pDevice->getDevice(), false); gtpinNotifyKernelCreate(static_cast(mockKernel.mockKernel->getMultiDeviceKernel())); EXPECT_EQ(debugDataPtr, dummyDebugData); EXPECT_EQ(debugDataSize, dummyDebugDataSize); pDevice->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.kernelAllocation); } HWTEST_F(GTPinTests, givenGtPinInitializedWhenSubmittingKernelCommandThenFlushedTaskCountIsNotified) { auto mockCmdQ = std::make_unique>(pContext, pDevice, nullptr); auto onKernelSubmitFnc = [](command_buffer_handle_t cb, uint64_t kernelId, uint32_t *entryOffset, resource_handle_t *resource) { return; }; gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = onKernelSubmitFnc; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); IndirectHeap *ih1 = nullptr, *ih2 = nullptr, *ih3 = nullptr; mockCmdQ->allocateHeapMemory(IndirectHeap::Type::DYNAMIC_STATE, 128, ih1); mockCmdQ->allocateHeapMemory(IndirectHeap::Type::INDIRECT_OBJECT, 128, ih2); mockCmdQ->allocateHeapMemory(IndirectHeap::Type::SURFACE_STATE, 128, ih3); PreemptionMode preemptionMode = pDevice->getPreemptionMode(); auto cmdStream = new LinearStream(pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties({pDevice->getRootDeviceIndex(), 128, AllocationType::COMMAND_BUFFER, pDevice->getDeviceBitfield()})); std::vector surfaces; auto kernelOperation = std::make_unique(cmdStream, *mockCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); MockKernelWithInternals kernel(*pDevice); kernelOperation->setHeaps(ih1, ih2, ih3); bool flushDC = false; bool slmUsed = false; bool ndRangeKernel = false; gtpinNotifyKernelSubmit(kernel.mockMultiDeviceKernel, mockCmdQ.get()); std::unique_ptr command(new CommandComputeKernel(*mockCmdQ, kernelOperation, surfaces, flushDC, slmUsed, ndRangeKernel, nullptr, preemptionMode, kernel, 1)); CompletionStamp stamp = command->submit(20, false); ASSERT_EQ(1u, kernelExecQueue.size()); EXPECT_TRUE(kernelExecQueue[0].isTaskCountValid); EXPECT_EQ(kernelExecQueue[0].taskCount, stamp.taskCount); } class GTPinFixtureWithLocalMemory : public GTPinFixture { public: void SetUp() override { DebugManager.flags.EnableLocalMemory.set(true); DebugManager.flags.GTPinAllocateBufferInSharedMemory.set(true); GTPinFixture::SetUpImpl(); } void TearDown() override { GTPinFixture::TearDown(); } DebugManagerStateRestore restore; }; using GTPinTestsWithLocalMemory = Test; TEST_F(GTPinTestsWithLocalMemory, whenPlatformHasNoSvmSupportThenGtPinBufferCantBeAllocatedInSharedMemory) { DebugManager.flags.GTPinAllocateBufferInSharedMemory.set(-1); GTPinHwHelper >pinHelper = GTPinHwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily); auto canUseSharedAllocation = gtpinHelper.canUseSharedAllocation(pDevice->getHardwareInfo()); if (!pDevice->getHardwareInfo().capabilityTable.ftrSvm) { EXPECT_FALSE(canUseSharedAllocation); } } HWTEST_F(GTPinTestsWithLocalMemory, givenGtPinWithSupportForSharedAllocationWhenGtPinHelperFunctionsAreCalledThenCheckIfSharedAllocationCabBeUsed) { GTPinHwHelper >pinHelper = GTPinHwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily); if (!gtpinHelper.canUseSharedAllocation(pDevice->getHardwareInfo())) { GTEST_SKIP(); } class MockGTPinHwHelperHw : public GTPinHwHelperHw { public: bool canUseSharedAllocation(const HardwareInfo &hwInfo) const override { canUseSharedAllocationCalled = true; return true; } mutable bool canUseSharedAllocationCalled = false; }; const auto family = pDevice->getHardwareInfo().platform.eRenderCoreFamily; MockGTPinHwHelperHw mockGTPinHwHelperHw; VariableBackup gtpinHwHelperBackup{>pinHwHelperFactory[family], &mockGTPinHwHelperHw}; resource_handle_t resource = nullptr; cl_context ctxt = (cl_context)((Context *)pContext); mockGTPinHwHelperHw.canUseSharedAllocationCalled = false; gtpinCreateBuffer((gtpin::context_handle_t)ctxt, 256, &resource); EXPECT_TRUE(mockGTPinHwHelperHw.canUseSharedAllocationCalled); mockGTPinHwHelperHw.canUseSharedAllocationCalled = false; uint8_t *address = nullptr; gtpinMapBuffer((gtpin::context_handle_t)ctxt, resource, &address); EXPECT_TRUE(mockGTPinHwHelperHw.canUseSharedAllocationCalled); mockGTPinHwHelperHw.canUseSharedAllocationCalled = false; gtpinUnmapBuffer((gtpin::context_handle_t)ctxt, resource); EXPECT_TRUE(mockGTPinHwHelperHw.canUseSharedAllocationCalled); mockGTPinHwHelperHw.canUseSharedAllocationCalled = false; gtpinFreeBuffer((gtpin::context_handle_t)ctxt, resource); EXPECT_TRUE(mockGTPinHwHelperHw.canUseSharedAllocationCalled); } HWTEST_F(GTPinTestsWithLocalMemory, givenGtPinCanUseSharedAllocationWhenGtPinBufferIsCreatedThenAllocateBufferInSharedMemory) { GTPinHwHelper >pinHelper = GTPinHwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily); if (!gtpinHelper.canUseSharedAllocation(pDevice->getHardwareInfo())) { GTEST_SKIP(); } resource_handle_t resource = nullptr; cl_context ctxt = (cl_context)((Context *)pContext); GTPIN_DI_STATUS status = GTPIN_DI_SUCCESS; status = gtpinCreateBuffer((gtpin::context_handle_t)ctxt, 256, &resource); EXPECT_EQ(GTPIN_DI_SUCCESS, status); EXPECT_NE(nullptr, resource); auto allocData = reinterpret_cast(resource); auto cpuAllocation = allocData->cpuAllocation; ASSERT_NE(nullptr, cpuAllocation); EXPECT_NE(AllocationType::UNIFIED_SHARED_MEMORY, cpuAllocation->getAllocationType()); auto gpuAllocation = allocData->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex()); ASSERT_NE(nullptr, gpuAllocation); EXPECT_NE(AllocationType::UNIFIED_SHARED_MEMORY, gpuAllocation->getAllocationType()); uint8_t *address = nullptr; status = gtpinMapBuffer((gtpin::context_handle_t)ctxt, resource, &address); EXPECT_EQ(GTPIN_DI_SUCCESS, status); EXPECT_EQ(allocData->cpuAllocation->getUnderlyingBuffer(), address); status = gtpinUnmapBuffer((gtpin::context_handle_t)ctxt, resource); EXPECT_EQ(GTPIN_DI_SUCCESS, status); status = gtpinFreeBuffer((gtpin::context_handle_t)ctxt, resource); EXPECT_EQ(GTPIN_DI_SUCCESS, status); } HWTEST_F(GTPinTestsWithLocalMemory, givenGtPinCanUseSharedAllocationWhenGtPinBufferIsAllocatedInSharedMemoryThenSetSurfaceStateForTheBufferAndMakeItResident) { GTPinHwHelper >pinHelper = GTPinHwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily); if (!gtpinHelper.canUseSharedAllocation(pDevice->getHardwareInfo())) { GTEST_SKIP(); } gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; GTPIN_DI_STATUS status = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, status); cl_kernel kernel = nullptr; cl_program pProgram = nullptr; cl_device_id device = (cl_device_id)pDevice; size_t sourceSize = 0; std::string testFile; cl_command_queue cmdQ = nullptr; cl_queue_properties properties = 0; cl_context context = nullptr; KernelBinaryHelper kbHelper("CopyBuffer_simd16", false); testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); auto pSource = loadDataFromFile(testFile.c_str(), sourceSize); EXPECT_NE(0u, sourceSize); EXPECT_NE(nullptr, pSource); context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); cmdQ = clCreateCommandQueue(context, device, properties, &retVal); ASSERT_NE(nullptr, cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( context, 1, sources, &sourceSize, &retVal); ASSERT_NE(nullptr, pProgram); retVal = clBuildProgram( pProgram, 1, &device, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); kernel = clCreateKernel(pProgram, "CopyBuffer", &retVal); EXPECT_NE(nullptr, kernel); EXPECT_EQ(CL_SUCCESS, retVal); auto pMultiDeviceKernel = static_cast(kernel); auto pKernel = pMultiDeviceKernel->getKernel(rootDeviceIndex); auto pCmdQueue = castToObject(cmdQ); auto &csr = pCmdQueue->getGpgpuCommandStreamReceiver(); using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; constexpr size_t renderSurfaceSize = sizeof(RENDER_SURFACE_STATE); size_t gtpinBTI = pKernel->getNumberOfBindingTableStates() - 1; void *pSurfaceState = gtpinHelper.getSurfaceState(pKernel, gtpinBTI); EXPECT_NE(nullptr, pSurfaceState); RENDER_SURFACE_STATE *surfaceState = reinterpret_cast(pSurfaceState); memset(pSurfaceState, 0, renderSurfaceSize); gtpinNotifyKernelSubmit(kernel, pCmdQueue); auto allocData = reinterpret_cast(kernelExecQueue[0].gtpinResource); EXPECT_NE(nullptr, allocData); auto gpuAllocation = allocData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex); EXPECT_NE(nullptr, gpuAllocation); RENDER_SURFACE_STATE expectedSurfaceState; memset(&expectedSurfaceState, 0, renderSurfaceSize); { void *addressToPatch = gpuAllocation->getUnderlyingBuffer(); size_t sizeToPatch = gpuAllocation->getUnderlyingBufferSize(); Buffer::setSurfaceState(&pDevice->getDevice(), &expectedSurfaceState, false, false, sizeToPatch, addressToPatch, 0, gpuAllocation, 0, 0, pKernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, pContext->getNumDevices()); } EXPECT_EQ(0, memcmp(&expectedSurfaceState, surfaceState, renderSurfaceSize)); EXPECT_FALSE(gpuAllocation->isResident(csr.getOsContext().getContextId())); gtpinNotifyMakeResident(pKernel, &csr); EXPECT_TRUE(gpuAllocation->isResident(csr.getOsContext().getContextId())); kernelExecQueue[0].isTaskCountValid = true; gtpinNotifyTaskCompletion(kernelExecQueue[0].taskCount); retVal = clReleaseKernel(kernel); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseCommandQueue(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseContext(context); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(GTPinTestsWithLocalMemory, givenGtPinCanUseSharedAllocationWhenGtpinNotifyKernelSubmitThenMoveToAllocationDomainCalled) { GTPinHwHelper >pinHelper = GTPinHwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily); if (!gtpinHelper.canUseSharedAllocation(pDevice->getHardwareInfo())) { GTEST_SKIP(); } class MockGTPinHwHelperHw : public GTPinHwHelperHw { public: void *getSurfaceState(Kernel *pKernel, size_t bti) override { return data; } uint8_t data[128]; }; struct MockResidentTestsPageFaultManager : public MockPageFaultManager { void moveAllocationToGpuDomain(void *ptr) override { moveAllocationToGpuDomainCalledTimes++; migratedAddress = ptr; } uint32_t moveAllocationToGpuDomainCalledTimes = 0; void *migratedAddress = nullptr; }; static std::unique_ptr allocDataHandle; static std::unique_ptr mockGAHandle; const auto family = pDevice->getHardwareInfo().platform.eRenderCoreFamily; MockGTPinHwHelperHw mockGTPinHwHelperHw; VariableBackup gtpinHwHelperBackup{>pinHwHelperFactory[family], &mockGTPinHwHelperHw}; gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = [](command_buffer_handle_t cb, uint64_t kernelId, uint32_t *entryOffset, resource_handle_t *resource) { auto allocData = std::make_unique(0); auto mockGA = std::make_unique(); allocData->gpuAllocations.addAllocation(mockGA.get()); *resource = reinterpret_cast(allocData.get()); allocDataHandle = std::move(allocData); mockGAHandle = std::move(mockGA); }; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; GTPIN_DI_STATUS status = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, status); MockKernelWithInternals mockkernel(*pDevice); MockCommandQueue mockCmdQueue; cl_context ctxt = (cl_context)((Context *)pContext); currContext = (gtpin::context_handle_t)(ctxt); mockCmdQueue.device = pDevice; gtpinNotifyKernelSubmit(mockkernel.mockMultiDeviceKernel, &mockCmdQueue); EXPECT_EQ(reinterpret_cast(pDevice->getExecutionEnvironment()->memoryManager->getPageFaultManager())->moveAllocationToGpuDomainCalledTimes, 1u); mockCmdQueue.device = nullptr; mockGAHandle.reset(); allocDataHandle.reset(); } TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenGtpinRemoveCommandQueueIsCalledThenAllKernelsFromCmdQueueAreRemoved) { gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); kernelExecQueue.clear(); CommandQueue *cmdQ1 = reinterpret_cast(1); CommandQueue *cmdQ2 = reinterpret_cast(2); Kernel *kernel1 = reinterpret_cast(1); Kernel *kernel2 = reinterpret_cast(2); Kernel *kernel3 = reinterpret_cast(3); Kernel *kernel4 = reinterpret_cast(4); gtpinkexec_t kExec; kExec.pKernel = kernel1; kExec.pCommandQueue = cmdQ1; kernelExecQueue.push_back(kExec); kExec.pKernel = kernel2; kExec.pCommandQueue = cmdQ1; kernelExecQueue.push_back(kExec); kExec.pKernel = kernel3; kExec.pCommandQueue = cmdQ2; kernelExecQueue.push_back(kExec); kExec.pKernel = kernel4; kExec.pCommandQueue = cmdQ2; kernelExecQueue.push_back(kExec); EXPECT_EQ(4u, kernelExecQueue.size()); gtpinRemoveCommandQueue(cmdQ1); EXPECT_EQ(2u, kernelExecQueue.size()); gtpinRemoveCommandQueue(cmdQ2); EXPECT_EQ(0u, kernelExecQueue.size()); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/helpers/000077500000000000000000000000001422164147700232715ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/helpers/CMakeLists.txt000066400000000000000000000061201422164147700260300ustar00rootroot00000000000000# # Copyright (C) 2018-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_helpers ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/api_specific_config_ocl_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/base_object_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_helper_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_hw_parse.h ${CMAKE_CURRENT_SOURCE_DIR}/cmd_buffer_validator.h ${CMAKE_CURRENT_SOURCE_DIR}/cmd_buffer_validator_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/dispatch_info_builder_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/dispatch_info_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/get_info_status_mapper_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hardware_commands_helper_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hardware_commands_helper_tests.h ${CMAKE_CURRENT_SOURCE_DIR}/heap_assigner_ocl_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hw_helper_default_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hw_helper_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hw_helper_tests_dg2_or_below.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kmd_notify_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/memory_management_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/memory_properties_helpers_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mipmap_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/per_thread_data_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/queue_helpers_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/raii_hw_helper.h ${CMAKE_CURRENT_SOURCE_DIR}/ray_tracing_helper_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler_helpers_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/task_information_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/timestamp_packet_1_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/timestamp_packet_2_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/timestamp_packet_tests.h ${CMAKE_CURRENT_SOURCE_DIR}/transfer_properties_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ult_limits.h ${CMAKE_CURRENT_SOURCE_DIR}/validator_tests.cpp ${NEO_SHARED_TEST_DIRECTORY}/common/helpers/aligned_memory_tests.cpp ${NEO_SHARED_TEST_DIRECTORY}/common/helpers/debug_manager_state_restore.h ) if(MSVC OR COMPILER_SUPPORTS_SSE42) list(APPEND IGDRCL_SRCS_tests_helpers ${CMAKE_CURRENT_SOURCE_DIR}/uint16_sse4_tests.cpp ) endif() if(TESTS_XEHP_AND_LATER) list(APPEND IGDRCL_SRCS_tests_helpers ${CMAKE_CURRENT_SOURCE_DIR}/aub_helper_hw_tests_xehp_and_later.cpp ${CMAKE_CURRENT_SOURCE_DIR}/engine_node_helper_tests_xehp_and_later.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hw_helper_tests_xehp_and_later.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_preamble_xehp_and_later.cpp ) endif() if(TESTS_DG2_AND_LATER) list(APPEND IGDRCL_SRCS_tests_helpers ${CMAKE_CURRENT_SOURCE_DIR}/hw_helper_tests_dg2_and_later.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_preamble_dg2_and_later.cpp ) endif() if(TESTS_PVC_AND_LATER) list(APPEND IGDRCL_SRCS_tests_helpers ${CMAKE_CURRENT_SOURCE_DIR}/hw_helper_tests_pvc_and_later.cpp ) endif() get_property(NEO_CORE_PREAMBLE_TESTS GLOBAL PROPERTY NEO_CORE_PREAMBLE_TESTS) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_helpers}) add_subdirectories() compute-runtime-22.14.22890/opencl/test/unit_test/helpers/api_specific_config_ocl_tests.cpp000066400000000000000000000023111422164147700320140ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/api_specific_config.h" #include "opencl/source/os_interface/ocl_reg_path.h" #include "gtest/gtest.h" namespace NEO { TEST(ApiSpecificConfigOclTests, WhenGettingApiTypeThenCorrectTypeIsReturned) { EXPECT_EQ(ApiSpecificConfig::OCL, ApiSpecificConfig::getApiType()); } TEST(ApiSpecificConfigOclTests, WhenGettingAUBPrefixByApiTypeOCLIsReturned) { EXPECT_EQ(0, strcmp("ocl_", ApiSpecificConfig::getAubPrefixForSpecificApi().c_str())); } TEST(ApiSpecificConfigOclTests, WhenGettingNameOCLIsReturned) { EXPECT_EQ(0, strcmp("ocl", ApiSpecificConfig::getName().c_str())); } TEST(ApiSpecificConfigOclTests, WhenCheckingIfStatelessCompressionIsSupportedThenReturnTrue) { EXPECT_TRUE(ApiSpecificConfig::isStatelessCompressionSupported()); } TEST(ApiSpecificConfigOclTests, givenMaxAllocSizeWhenGettingReducedMaxAllocSizeThenReturnHalfOfThat) { EXPECT_EQ(512u, ApiSpecificConfig::getReducedMaxAllocSize(1024)); } TEST(ApiSpecificConfigOclTests, WhenGettingRegistryPathThenOclRegistryPathIsReturned) { EXPECT_STREQ(oclRegPath, ApiSpecificConfig::getRegistryPath()); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/helpers/aub_helper_hw_tests_xehp_and_later.cpp000066400000000000000000000027731422164147700330710ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/aub/aub_helper.h" #include "shared/source/aub_mem_dump/aub_mem_dump.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" using namespace NEO; using AubHelperHwTestXeHPAndLater = Test; HWCMDTEST_F(IGFX_XE_HP_CORE, AubHelperHwTestXeHPAndLater, givenAubHelperWhenGetDataHintForPml4EntryIsCalledThenTracePpgttLevel4IsReturned) { AubHelperHw aubHelper(true); EXPECT_EQ(AubMemDump::DataTypeHintValues::TracePpgttLevel4, aubHelper.getDataHintForPml4Entry()); } HWCMDTEST_F(IGFX_XE_HP_CORE, AubHelperHwTestXeHPAndLater, givenAubHelperWhenGetDataHintForPml4EntryIsCalledThenTracePpgttLevel3IsReturned) { AubHelperHw aubHelper(true); EXPECT_EQ(AubMemDump::DataTypeHintValues::TracePpgttLevel3, aubHelper.getDataHintForPdpEntry()); } HWCMDTEST_F(IGFX_XE_HP_CORE, AubHelperHwTestXeHPAndLater, givenAubHelperWhenGetDataHintForPml4EntryIsCalledThenTracePpgttLevel2IsReturned) { AubHelperHw aubHelper(true); EXPECT_EQ(AubMemDump::DataTypeHintValues::TracePpgttLevel2, aubHelper.getDataHintForPdEntry()); } HWCMDTEST_F(IGFX_XE_HP_CORE, AubHelperHwTestXeHPAndLater, givenAubHelperWhenGetDataHintForPml4EntryIsCalledThenTracePpgttLevel1IsReturned) { AubHelperHw aubHelper(true); EXPECT_EQ(AubMemDump::DataTypeHintValues::TracePpgttLevel1, aubHelper.getDataHintForPtEntry()); } compute-runtime-22.14.22890/opencl/test/unit_test/helpers/base_object_tests.cpp000066400000000000000000000276731422164147700274760ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/accelerators/intel_accelerator.h" #include "opencl/source/api/api.h" #include "opencl/source/api/cl_types.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/base_object.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/mem_obj.h" #include "opencl/source/platform/platform.h" #include "opencl/source/program/program.h" #include "opencl/source/sampler/sampler.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "gtest/gtest.h" namespace NEO { typedef struct _cl_object_for_test2 *cl_object_for_test2; struct _cl_object_for_test2 : public ClDispatch { }; template <> struct OpenCLObjectMapper<_cl_object_for_test2> { typedef struct ObjectForTest2 DerivedType; }; template <> struct OpenCLObjectMapper { typedef _cl_object_for_test2 BaseType; }; struct ObjectForTest2 : public NEO::BaseObject<_cl_object_for_test2> { static const cl_ulong objectMagic = 0x13650a12b79ce4dfLL; }; template struct BaseObjectTests : public ::testing::Test { }; template class MockObjectBase : public OclObject { public: using OclObject::OclObject; void setInvalidMagic() { validMagic = this->magic; this->magic = 0x0101010101010101LL; } void setInvalidIcdDispath() { this->dispatch.icdDispatch = reinterpret_cast(this); } void setValidMagic() { this->magic = validMagic; } bool isObjectValid() const { return this->isValid(); } cl_ulong validMagic; }; template class MockObject : public MockObjectBase {}; template <> class MockObject : public MockObjectBase { public: void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device, bool useGlobalAtomics, bool areMultipleSubDevicesInContext) override {} }; template <> class MockObject : public MockObjectBase { public: MockObject() : MockObjectBase(nullptr, false, toClDeviceVector(*(new MockClDevice(new MockDevice())))), device(this->clDevices[0]) {} private: std::unique_ptr device; }; typedef ::testing::Types< MockPlatform, IntelAccelerator, //Context, //Program, //Kernel, //Sampler //others... MockCommandQueue> BaseObjectTypes; typedef ::testing::Types< MockPlatform, IntelAccelerator, Context, Program, Buffer, MockCommandQueue> BaseObjectTypesForCastInvalidMagicTest; TYPED_TEST_CASE(BaseObjectTests, BaseObjectTypes); // "typedef" BaseObjectTests template to use with different TypeParams for testing template using BaseObjectWithDefaultCtorTests = BaseObjectTests; TYPED_TEST_CASE(BaseObjectWithDefaultCtorTests, BaseObjectTypesForCastInvalidMagicTest); TYPED_TEST(BaseObjectWithDefaultCtorTests, GivenInvalidMagicWhenCastingToObjectThenNullptrIsReturned) { MockObject *object = new MockObject; EXPECT_TRUE(object->isObjectValid()); object->setInvalidMagic(); EXPECT_FALSE(object->isObjectValid()); auto objectCasted = castToObject(object); EXPECT_EQ(nullptr, objectCasted); object->setValidMagic(); delete object; } TYPED_TEST(BaseObjectWithDefaultCtorTests, whenCastToObjectWithInvalidIcdDispatchThenReturnsNullptr) { auto object = std::make_unique>(); object->setInvalidIcdDispath(); auto objectCasted = castToObject(object.get()); EXPECT_EQ(nullptr, objectCasted); } TYPED_TEST(BaseObjectTests, WhenRetainingAndReleasingThenObjectReferenceIsUpdated) { TypeParam *object = new TypeParam; object->retain(); EXPECT_EQ(2, object->getReference()); object->release(); EXPECT_EQ(1, object->getReference()); object->release(); // MemoryLeakListener will detect a leak // if release doesn't delete memory. } TYPED_TEST(BaseObjectTests, GivenNullWhenCastingToObjectThenNullptrIsReturned) { typename TypeParam::BaseType *handle = nullptr; auto object = castToObject(handle); EXPECT_EQ(nullptr, object); } TYPED_TEST(BaseObjectTests, WhenCastingToFromBaseTypeThenCorrectTypeIsDerived) { TypeParam object; typename TypeParam::BaseType *baseObject = &object; auto objectNew = castToObject(baseObject); EXPECT_EQ(&object, objectNew); } TYPED_TEST(BaseObjectTests, WhenCastingToSameTypeThenSameObjectIsReturned) { TypeParam object; auto objectNew = castToObject(&object); EXPECT_EQ(&object, objectNew); } TYPED_TEST(BaseObjectTests, WhenCastingToDifferentTypeThenNullptrIsReturned) { TypeParam object; typename TypeParam::BaseType *baseObject = &object; auto notOriginalType = reinterpret_cast(baseObject); auto invalidObject = castToObject(notOriginalType); EXPECT_EQ(nullptr, invalidObject); } TYPED_TEST(BaseObjectTests, WhenCastingToDispatchTableThenEntriesAreCorrect) { TypeParam objectDrv; // Automatic downcasting to _cl_type *. typename TypeParam::BaseType *objectCL = &objectDrv; sharingFactory.fillGlobalDispatchTable(); // Common runtime casts to generic type assuming // the dispatch table is the first ptr in the structure auto genericObject = reinterpret_cast(objectCL); EXPECT_EQ(globalDispatchTable.icdDispatch, genericObject->dispatch.icdDispatch); EXPECT_EQ(globalDispatchTable.crtDispatch, genericObject->dispatch.crtDispatch); EXPECT_EQ(reinterpret_cast(clGetKernelArgInfo), genericObject->dispatch.crtDispatch->clGetKernelArgInfo); EXPECT_EQ(reinterpret_cast(clGetImageParamsINTEL), genericObject->dispatch.crtDispatch->clGetImageParamsINTEL); EXPECT_EQ(reinterpret_cast(clCreateAcceleratorINTEL), genericObject->dispatch.crtDispatch->clCreateAcceleratorINTEL); EXPECT_EQ(reinterpret_cast(clGetAcceleratorInfoINTEL), genericObject->dispatch.crtDispatch->clGetAcceleratorInfoINTEL); EXPECT_EQ(reinterpret_cast(clRetainAcceleratorINTEL), genericObject->dispatch.crtDispatch->clRetainAcceleratorINTEL); EXPECT_EQ(reinterpret_cast(clReleaseAcceleratorINTEL), genericObject->dispatch.crtDispatch->clReleaseAcceleratorINTEL); EXPECT_EQ(reinterpret_cast(clCreatePerfCountersCommandQueueINTEL), genericObject->dispatch.crtDispatch->clCreatePerfCountersCommandQueueINTEL); EXPECT_EQ(reinterpret_cast(clSetPerformanceConfigurationINTEL), genericObject->dispatch.crtDispatch->clSetPerformanceConfigurationINTEL); // Check empty placeholder dispatch table entries are null EXPECT_EQ(nullptr, genericObject->dispatch.crtDispatch->placeholder12); EXPECT_EQ(nullptr, genericObject->dispatch.crtDispatch->placeholder13); EXPECT_EQ(nullptr, genericObject->dispatch.crtDispatch->placeholder18); EXPECT_EQ(nullptr, genericObject->dispatch.crtDispatch->placeholder19); EXPECT_EQ(nullptr, genericObject->dispatch.crtDispatch->placeholder20); EXPECT_EQ(nullptr, genericObject->dispatch.crtDispatch->placeholder21); } TEST(BaseObjectTests, WhenSettingSharedContextFlagThenItIsSetCorrectly) { MockContext newContext; //cast to cl_context cl_context clContext = &newContext; EXPECT_FALSE(newContext.isSharedContext); clContext->isSharedContext = true; EXPECT_TRUE(newContext.isSharedContext); } TYPED_TEST(BaseObjectTests, WhenTakingAndReleasingOwnershipThenOwnershipCountIsUpdated) { TypeParam obj; EXPECT_FALSE(obj.hasOwnership()); obj.takeOwnership(); EXPECT_TRUE(obj.hasOwnership()); obj.takeOwnership(); EXPECT_TRUE(obj.hasOwnership()); obj.takeOwnership(); EXPECT_TRUE(obj.hasOwnership()); obj.releaseOwnership(); EXPECT_TRUE(obj.hasOwnership()); obj.releaseOwnership(); EXPECT_TRUE(obj.hasOwnership()); obj.releaseOwnership(); EXPECT_FALSE(obj.hasOwnership()); } TEST(CastToBuffer, WhenCastingFromMemObjThenBehavesAsExpected) { MockContext context; auto buffer = BufferHelper<>::create(&context); MemObj *memObj = buffer; cl_mem clObj = buffer; EXPECT_EQ(buffer, castToObject(clObj)); EXPECT_EQ(memObj, castToObject(clObj)); EXPECT_EQ(nullptr, castToObject(clObj)); buffer->release(); } TEST(CastToImage, WhenCastingFromMemObjThenBehavesAsExpected) { MockContext context; auto image = Image2dHelper<>::create(&context); MemObj *memObj = image; cl_mem clObj = image; EXPECT_EQ(image, castToObject(clObj)); EXPECT_EQ(memObj, castToObject(clObj)); EXPECT_EQ(nullptr, castToObject(clObj)); image->release(); } extern std::thread::id tempThreadID; class MockBuffer : public MockBufferStorage, public Buffer { using MockBufferStorage::device; public: MockBuffer() : MockBufferStorage(), Buffer(nullptr, ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_USE_HOST_PTR, 0, 0, MockBufferStorage::device.get()), CL_MEM_USE_HOST_PTR, 0, sizeof(data), &data, &data, GraphicsAllocationHelper::toMultiGraphicsAllocation(&mockGfxAllocation), true, false, false) { } void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device, bool useGlobalAtomics, bool areMultipleSubDevicesInContext) override { } }; TEST(BaseObjectTest, WhenUsingOwnershipWrapperThenOwnershipIsSetCorrectly) { MockBuffer buffer; { TakeOwnershipWrapper bufferOwnership(buffer, false); EXPECT_FALSE(buffer.hasOwnership()); } { TakeOwnershipWrapper bufferOwnership(buffer, true); EXPECT_TRUE(buffer.hasOwnership()); bufferOwnership.unlock(); EXPECT_FALSE(buffer.hasOwnership()); } } TYPED_TEST(BaseObjectTests, WhenObjectIsCreatedThenNumWaitersIsZero) { TypeParam *object = new TypeParam; EXPECT_EQ(0U, object->getCond().peekNumWaiters()); object->release(); } TYPED_TEST(BaseObjectTests, WhenConvertingToInternalObjectThenRefApiCountIsSetToZero) { class ObjectForTest : public NEO::MemObj { public: ObjectForTest() : MemObj(nullptr, 0, {}, 0, 0, 0u, nullptr, nullptr, 0, false, false, false) { } void convertToInternalObject(void) { NEO::BaseObject<_cl_mem>::convertToInternalObject(); } }; ObjectForTest *object = new ObjectForTest; EXPECT_EQ(1, object->getRefApiCount()); EXPECT_EQ(1, object->getRefInternalCount()); object->convertToInternalObject(); EXPECT_EQ(0, object->getRefApiCount()); EXPECT_EQ(1, object->getRefInternalCount()); object->decRefInternal(); } TYPED_TEST(BaseObjectTests, GivenNullptrWhenCastingToObjectOrAbortThenExceptionIsThrown) { EXPECT_ANY_THROW(castToObjectOrAbort(nullptr)); } TYPED_TEST(BaseObjectTests, WhenCastingToObjectOrAbortThenCorrectTypeIsDerived) { TypeParam object; typename TypeParam::BaseType *baseObject = &object; auto objectNew = castToObjectOrAbort(baseObject); EXPECT_EQ(&object, objectNew); } TYPED_TEST(BaseObjectTests, WhenCastToOrAbortWithDifferentTypeThenExceptionIsThrown) { TypeParam object; typename TypeParam::BaseType *baseObject = &object; auto notOriginalType = reinterpret_cast(baseObject); EXPECT_ANY_THROW(castToObjectOrAbort(notOriginalType)); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/helpers/cl_execution_environment_helper.cpp000066400000000000000000000022501422164147700324400ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/helpers/cl_execution_environment_helper.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/os_interface/device_factory.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "opencl/test/unit_test/mocks/mock_platform.h" namespace NEO { ExecutionEnvironment *getClExecutionEnvironmentImpl(HardwareInfo *&hwInfo, uint32_t rootDeviceEnvironments) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(rootDeviceEnvironments); DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleRootDevices.set(rootDeviceEnvironments); hwInfo = nullptr; DeviceFactory::prepareDeviceEnvironments(*executionEnvironment); hwInfo = executionEnvironment->rootDeviceEnvironments[0]->getMutableHardwareInfo(); executionEnvironment->initializeMemoryManager(); return executionEnvironment; } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/helpers/cl_execution_environment_helper.h000066400000000000000000000004761422164147700321150ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include namespace NEO { class ExecutionEnvironment; struct HardwareInfo; ExecutionEnvironment *getClExecutionEnvironmentImpl(HardwareInfo *&hwInfo, uint32_t rootDeviceEnvironments); } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/helpers/cl_helper_tests.cpp000066400000000000000000000056431422164147700271640ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/cl_helper.h" #include "gtest/gtest.h" #include TEST(ClHelper, whenCallGetStringWithCmdTypeFunctionThenGetProperCmdTypeAsString) { std::array expected = {{"CL_COMMAND_NDRANGE_KERNEL", "CL_COMMAND_TASK", "CL_COMMAND_NATIVE_KERNEL", "CL_COMMAND_READ_BUFFER", "CL_COMMAND_WRITE_BUFFER", "CL_COMMAND_COPY_BUFFER", "CL_COMMAND_READ_IMAGE", "CL_COMMAND_WRITE_IMAGE", "CL_COMMAND_COPY_IMAGE", "CL_COMMAND_COPY_IMAGE_TO_BUFFER", "CL_COMMAND_COPY_BUFFER_TO_IMAGE", "CL_COMMAND_MAP_BUFFER", "CL_COMMAND_MAP_IMAGE", "CL_COMMAND_UNMAP_MEM_OBJECT", "CL_COMMAND_MARKER", "CL_COMMAND_ACQUIRE_GL_OBJECTS", "CL_COMMAND_RELEASE_GL_OBJECTS", "CL_COMMAND_READ_BUFFER_RECT", "CL_COMMAND_WRITE_BUFFER_RECT", "CL_COMMAND_COPY_BUFFER_RECT", "CL_COMMAND_USER", "CL_COMMAND_BARRIER", "CL_COMMAND_MIGRATE_MEM_OBJECTS", "CL_COMMAND_FILL_BUFFER", "CL_COMMAND_FILL_IMAGE", "CL_COMMAND_SVM_FREE", "CL_COMMAND_SVM_MEMCPY", "CL_COMMAND_SVM_MEMFILL", "CL_COMMAND_SVM_MAP", "CL_COMMAND_SVM_UNMAP", "CL_COMMAND_SVM_MIGRATE_MEM"}}; for (int i = CL_COMMAND_NDRANGE_KERNEL; i <= CL_COMMAND_SVM_MIGRATE_MEM; i++) { EXPECT_STREQ(expected[i - CL_COMMAND_NDRANGE_KERNEL].c_str(), NEO::cmdTypetoString(i).c_str()); } std::stringstream stream; stream << "CMD_UNKNOWN:" << (cl_command_type)-1; EXPECT_STREQ(stream.str().c_str(), NEO::cmdTypetoString(-1).c_str()); EXPECT_STREQ("CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR", NEO::cmdTypetoString(CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR).c_str()); }compute-runtime-22.14.22890/opencl/test/unit_test/helpers/cl_hw_parse.h000066400000000000000000000012271422164147700257320ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/common/cmd_parse/hw_parse.h" #include "opencl/source/command_queue/command_queue.h" namespace NEO { struct ClHardwareParse : HardwareParse { using HardwareParse::parseCommands; template void parseCommands(NEO::CommandQueue &commandQueue) { auto &commandStreamReceiver = commandQueue.getGpgpuCommandStreamReceiver(); auto &commandStream = commandQueue.getCS(1024); return HardwareParse::parseCommands(commandStreamReceiver, commandStream); } }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/helpers/cmd_buffer_validator.h000066400000000000000000000333361422164147700276130ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/utilities/stackvec.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include #include #include namespace NEO { struct CmdValidator { CmdValidator() { } virtual ~CmdValidator() = default; virtual bool operator()(GenCmdList::iterator it, size_t numInSection, const std::string &member, std::string &outFailReason) = 0; }; template struct CmdValidatorWithStaticStorage : CmdValidator { static ChildT *get() { static ChildT val; return &val; } }; template struct GenericCmdValidator : CmdValidatorWithStaticStorage> { bool operator()(GenCmdList::iterator it, size_t numInSection, const std::string &member, std::string &outFailReason) override { auto cmd = genCmdCast(*it); UNRECOVERABLE_IF(cmd == nullptr); if (Expected != (cmd->*Getter)()) { outFailReason = member + " - expected: " + std::to_string(Expected) + ", got: " + std::to_string((cmd->*Getter)()); return false; } return true; } }; struct NamedValidator { NamedValidator(CmdValidator *validator) : NamedValidator(validator, "Unspecified") { } NamedValidator(CmdValidator *validator, const char *name) : validator(validator), name(name) { } CmdValidator *validator; const char *name; }; #define EXPECT_MEMBER(TYPE, FUNC, EXPECTED) \ NamedValidator { GenericCmdValidator, &TYPE::FUNC, EXPECTED>::get(), #FUNC } using Expects = std::vector; struct MatchCmd { MatchCmd(int amount, bool matchesAny) : amount(amount), matchesAny(matchesAny) { } MatchCmd(int amount) : MatchCmd(amount, false) { } virtual ~MatchCmd() = default; virtual bool matches(GenCmdList::iterator it) const = 0; virtual bool validates(GenCmdList::iterator it, std::string &outReason) const = 0; virtual const char *getName() const = 0; virtual void capture(GenCmdList::iterator it) = 0; int getExpectedCount() const { return amount; } bool getMatchesAny() const { return matchesAny; } protected: int amount = 0; bool matchesAny = false; }; constexpr int32_t AnyNumber = -1; constexpr int32_t AtLeastOne = -2; inline std::string countToString(int32_t count) { if (count == AnyNumber) { return "AnyNumber"; } else if (count == AtLeastOne) { return "AtLeastOne"; } else { return std::to_string(count); } } inline bool notPreciseNumber(int32_t count) { return (count == AnyNumber) || (count == AtLeastOne); } struct MatchAnyCmd : MatchCmd { MatchAnyCmd(int amount) : MatchCmd(amount, true) { if (amount > 0) { captured.reserve(amount); } } bool matches(GenCmdList::iterator it) const override { return true; } bool validates(GenCmdList::iterator it, std::string &outReason) const override { return true; } void capture(GenCmdList::iterator it) override { captured.push_back(*it); } const char *getName() const override { return "AnyCommand"; } protected: StackVec captured; }; template struct MatchHwCmd : MatchCmd { MatchHwCmd(int amount) : MatchCmd(amount) { if (amount > 0) { captured.reserve(amount); } } MatchHwCmd(int amount, Expects &&validators) : MatchHwCmd(amount) { this->validators.swap(validators); } bool matches(GenCmdList::iterator it) const override { return nullptr != genCmdCast(*it); } bool validates(GenCmdList::iterator it, std::string &outReason) const override { for (auto &v : validators) { if (false == (*v.validator)(it, captured.size(), v.name, outReason)) { return false; } } return true; } void capture(GenCmdList::iterator it) override { UNRECOVERABLE_IF(false == matches(it)); UNRECOVERABLE_IF(captured.size() == static_cast(amount)); captured.push_back(genCmdCast(*it)); } const char *getName() const override { CmdType cmd; cmd.init(); return HardwareParse::getCommandName(&cmd); } protected: StackVec captured; Expects validators; }; template inline bool expectCmdBuff(GenCmdList::iterator begin, GenCmdList::iterator end, std::vector &&expectedCmdBuffMatchers, std::string *outReason = nullptr) { if (expectedCmdBuffMatchers.size() == 0) { return begin == end; } bool failed = false; std::string failReason; auto it = begin; int cmdNum = 0; size_t currentMatcher = 0; int currentMatcherCount = 0; StackVec, 32> matchedCommandNames; auto matchedCommandsString = [&]() -> std::string { if (matchedCommandNames.size() == 0) { return "EMPTY"; } std::string ret = ""; for (size_t i = 0; i < matchedCommandNames.size(); ++i) { if (matchedCommandNames[i].second) { ret += std::to_string(i) + ":ANY(" + matchedCommandNames[i].first + ") "; } else { ret += std::to_string(i) + ":" + matchedCommandNames[i].first + " "; } } return ret; }; while (it != end) { if (currentMatcher < expectedCmdBuffMatchers.size()) { auto currentMatcherExpectedCount = expectedCmdBuffMatchers[currentMatcher]->getExpectedCount(); if (expectedCmdBuffMatchers[currentMatcher]->getMatchesAny() && ((currentMatcherExpectedCount == AnyNumber) || ((currentMatcherExpectedCount == AtLeastOne) && (currentMatcherCount > 0)))) { if (expectedCmdBuffMatchers.size() > currentMatcher + 1) { // eat as many as possible but proceed to next matcher when possible if (expectedCmdBuffMatchers[currentMatcher + 1]->matches(it)) { ++currentMatcher; currentMatcherCount = 0; } } } else if ((notPreciseNumber(expectedCmdBuffMatchers[currentMatcher]->getExpectedCount())) && (false == expectedCmdBuffMatchers[currentMatcher]->matches(it))) { // proceed to next matcher if not matched if ((expectedCmdBuffMatchers[currentMatcher]->getExpectedCount() == AtLeastOne) && (currentMatcherCount < 1)) { failed = true; failReason = "Unmatched cmd#" + std::to_string(cmdNum) + ":" + HardwareParse::getCommandName(*it) + " - expected " + std::string(expectedCmdBuffMatchers[currentMatcher]->getName()) + "(" + countToString(expectedCmdBuffMatchers[currentMatcher]->getExpectedCount()) + " - " + std::to_string(currentMatcherCount) + ") after : " + matchedCommandsString(); break; } ++currentMatcher; currentMatcherCount = 0; } while ((currentMatcher < expectedCmdBuffMatchers.size()) && expectedCmdBuffMatchers[currentMatcher]->getExpectedCount() == 0) { if (expectedCmdBuffMatchers[currentMatcher]->matches(it)) { failed = true; failReason = "Unmatched cmd#" + std::to_string(cmdNum) + " - expected anything but " + std::string(expectedCmdBuffMatchers[currentMatcher]->getName()) + "(" + countToString(expectedCmdBuffMatchers[currentMatcher]->getExpectedCount()) + " - " + std::to_string(currentMatcherCount) + ") after : " + matchedCommandsString(); break; } ++currentMatcher; currentMatcherCount = 0; } } if (currentMatcher >= expectedCmdBuffMatchers.size()) { failed = true; std::string unmatchedCommands; while (it != end) { unmatchedCommands += std::to_string(cmdNum) + ":" + HardwareParse::getCommandName(*it) + " "; ++it; ++cmdNum; } failReason = "Unexpected commands at the end of the command buffer : " + unmatchedCommands + ", AFTER : " + matchedCommandsString(); break; } if (false == expectedCmdBuffMatchers[currentMatcher]->matches(it)) { failed = true; failReason = "Unmatched cmd#" + std::to_string(cmdNum) + ":" + HardwareParse::getCommandName(*it) + " - expected " + std::string(expectedCmdBuffMatchers[currentMatcher]->getName()) + "(" + countToString(expectedCmdBuffMatchers[currentMatcher]->getExpectedCount()) + " - " + std::to_string(currentMatcherCount) + ") after : " + matchedCommandsString(); break; } if (false == expectedCmdBuffMatchers[currentMatcher]->validates(it, failReason)) { failReason = "cmd#" + std::to_string(cmdNum) + " (" + HardwareParse::getCommandName(*it) + ") failed validation - reason : " + failReason + " after : " + matchedCommandsString(); failed = true; break; } matchedCommandNames.push_back(std::make_pair(HardwareParse::getCommandName(*it), expectedCmdBuffMatchers[currentMatcher]->getMatchesAny())); ++currentMatcherCount; if (currentMatcherCount == expectedCmdBuffMatchers[currentMatcher]->getExpectedCount()) { ++currentMatcher; currentMatcherCount = 0; } ++cmdNum; ++it; } if (failed == false) { while ((currentMatcher < expectedCmdBuffMatchers.size()) && ((expectedCmdBuffMatchers[currentMatcher]->getExpectedCount() == 0) || (expectedCmdBuffMatchers[currentMatcher]->getExpectedCount() == AnyNumber))) { ++currentMatcher; currentMatcherCount = 0; } if (currentMatcher == expectedCmdBuffMatchers.size()) { // no more matchers } else if (currentMatcher + 1 == expectedCmdBuffMatchers.size()) { // last matcher auto currentMatcherExpectedCount = expectedCmdBuffMatchers[currentMatcher]->getExpectedCount(); if ((currentMatcherExpectedCount == AtLeastOne) && (currentMatcherCount < 1)) { failReason = "Unexpected command buffer end at cmd#" + std::to_string(cmdNum) + " - expected " + expectedCmdBuffMatchers[currentMatcher]->getName() + "(" + countToString(currentMatcherExpectedCount) + " - " + std::to_string(currentMatcherCount) + ") after : " + matchedCommandsString(); failed = true; } if ((false == notPreciseNumber(currentMatcherExpectedCount)) && (currentMatcherExpectedCount != currentMatcherCount)) { failReason = "Unexpected command buffer end at cmd#" + std::to_string(cmdNum) + " - expected " + expectedCmdBuffMatchers[currentMatcher]->getName() + "(" + countToString(currentMatcherExpectedCount) + " - " + std::to_string(currentMatcherCount) + ") after : " + matchedCommandsString(); failed = true; } } else { // many matchers left std::string expectedMatchers = ""; int32_t currentMatcherExpectedCount = expectedCmdBuffMatchers[currentMatcher]->getExpectedCount(); expectedMatchers = expectedCmdBuffMatchers[currentMatcher]->getName() + std::string("(") + countToString(currentMatcherExpectedCount) + " - " + std::to_string(currentMatcherCount) + "), "; ++currentMatcher; while (currentMatcher < expectedCmdBuffMatchers.size()) { currentMatcherExpectedCount = expectedCmdBuffMatchers[currentMatcher]->getExpectedCount(); expectedMatchers += expectedCmdBuffMatchers[currentMatcher]->getName() + std::string("(") + countToString(currentMatcherExpectedCount) + " - 0), "; ++currentMatcher; } failReason = "Unexpected command buffer end at cmd#" + std::to_string(cmdNum) + " - expected " + expectedMatchers + " after : " + matchedCommandsString(); failed = true; } } else { if ((it != end) && (++it != end)) { ++cmdNum; failReason += "\n Unconsumed commands after failed one : "; while (it != end) { failReason += std::to_string(cmdNum) + ":" + HardwareParse::getCommandName(*it) + " "; ++cmdNum; ++it; } } } if (failed) { if (outReason != nullptr) { failReason += "\n Note : Input command buffer was : "; it = begin; cmdNum = 0; while (it != end) { failReason += std::to_string(cmdNum) + ":" + HardwareParse::getCommandName(*it) + " "; ++cmdNum; ++it; } *outReason = failReason; } } for (auto *matcher : expectedCmdBuffMatchers) { delete matcher; } return (failed == false); } template inline bool expectCmdBuff(NEO::LinearStream &commandStream, size_t startOffset, std::vector &&expectedCmdBuffMatchers, std::string *outReason = nullptr) { HardwareParse hwParser; hwParser.parseCommands(commandStream, startOffset); return expectCmdBuff(hwParser.cmdList.begin(), hwParser.cmdList.end(), std::move(expectedCmdBuffMatchers), outReason); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/helpers/cmd_buffer_validator_tests.cpp000066400000000000000000000360441422164147700313670ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/helpers/cmd_buffer_validator.h" using HwParseTest = ::testing::Test; using namespace NEO; HWTEST_F(HwParseTest, WhenEmptyBufferThenDontExpectCommands) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; bool cmdBuffOk = false; GenCmdList::iterator beg, end; end = beg; cmdBuffOk = expectCmdBuff(beg, end, std::vector{}); EXPECT_TRUE(cmdBuffOk); cmdBuffOk = expectCmdBuff(beg, end, std::vector{ new MatchHwCmd(0), }); EXPECT_TRUE(cmdBuffOk); cmdBuffOk = expectCmdBuff(beg, end, std::vector{ new MatchHwCmd(AnyNumber), }); EXPECT_TRUE(cmdBuffOk); cmdBuffOk = expectCmdBuff(beg, end, std::vector{ new MatchHwCmd(AtLeastOne), }); EXPECT_FALSE(cmdBuffOk); cmdBuffOk = expectCmdBuff(beg, end, std::vector{ new MatchHwCmd(1), }); EXPECT_FALSE(cmdBuffOk); } HWTEST_F(HwParseTest, WhenExpectingAnyCommandThenAllCommandsAreValidAsLongAsTheCountMatches) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; bool cmdBuffOk = false; char buffer[8192]; LinearStream stream{alignUp(buffer, 4096), 4096}; *stream.getSpaceForCmd() = FamilyType::cmdInitPipeControl; *stream.getSpaceForCmd() = FamilyType::cmdInitStateBaseAddress; *stream.getSpaceForCmd() = FamilyType::cmdInitPipeControl; cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchAnyCmd(1), new MatchAnyCmd(1), new MatchAnyCmd(1)}); EXPECT_TRUE(cmdBuffOk); cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchAnyCmd(AtLeastOne), }); EXPECT_TRUE(cmdBuffOk); cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchAnyCmd(AnyNumber), }); EXPECT_TRUE(cmdBuffOk); cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchAnyCmd(1), new MatchAnyCmd(1), new MatchAnyCmd(1), new MatchAnyCmd(1)}); EXPECT_FALSE(cmdBuffOk); cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchAnyCmd(1), new MatchAnyCmd(1), new MatchAnyCmd(1), new MatchAnyCmd(AtLeastOne)}); EXPECT_FALSE(cmdBuffOk); cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchAnyCmd(1), new MatchAnyCmd(1), new MatchAnyCmd(1), new MatchAnyCmd(AnyNumber)}); EXPECT_TRUE(cmdBuffOk); cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchAnyCmd(AtLeastOne), new MatchAnyCmd(1)}); EXPECT_FALSE(cmdBuffOk); cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchAnyCmd(AnyNumber), new MatchAnyCmd(1)}); EXPECT_FALSE(cmdBuffOk); cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchAnyCmd(1), new MatchAnyCmd(1), }); EXPECT_FALSE(cmdBuffOk); cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchAnyCmd(1)}); EXPECT_FALSE(cmdBuffOk); } HWTEST_F(HwParseTest, WhenExpectingSpecificSetOfCommandsThenNoOtherCommandBufferIsValid) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; bool cmdBuffOk = false; char buffer[8192]; LinearStream stream{alignUp(buffer, 4096), 4096}; *stream.getSpaceForCmd() = FamilyType::cmdInitPipeControl; *stream.getSpaceForCmd() = FamilyType::cmdInitStateBaseAddress; *stream.getSpaceForCmd() = FamilyType::cmdInitPipeControl; cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchHwCmd(1), new MatchHwCmd(1), new MatchHwCmd(1)}); EXPECT_TRUE(cmdBuffOk); cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchHwCmd(1), new MatchHwCmd(1), new MatchHwCmd(1), new MatchHwCmd(1)}); EXPECT_FALSE(cmdBuffOk); cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchHwCmd(1), new MatchHwCmd(1), }); EXPECT_FALSE(cmdBuffOk); cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchHwCmd(1), new MatchHwCmd(1), new MatchHwCmd(1), new MatchHwCmd(1)}); EXPECT_FALSE(cmdBuffOk); cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchHwCmd(1), new MatchHwCmd(1), new MatchHwCmd(1)}); EXPECT_FALSE(cmdBuffOk); cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchHwCmd(1), new MatchHwCmd(1), new MatchHwCmd(1)}); EXPECT_FALSE(cmdBuffOk); } HWTEST_F(HwParseTest, WhenExpectingAnyNumberOfCommandsThenOnlyTypeOfCommandMatters) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; bool cmdBuffOk = false; char buffer[8192]; LinearStream stream{alignUp(buffer, 4096), 4096}; *stream.getSpaceForCmd() = FamilyType::cmdInitPipeControl; *stream.getSpaceForCmd() = FamilyType::cmdInitStateBaseAddress; *stream.getSpaceForCmd() = FamilyType::cmdInitStateBaseAddress; *stream.getSpaceForCmd() = FamilyType::cmdInitStateBaseAddress; *stream.getSpaceForCmd() = FamilyType::cmdInitStateBaseAddress; *stream.getSpaceForCmd() = FamilyType::cmdInitPipeControl; cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchHwCmd(1), new MatchHwCmd(1), new MatchHwCmd(1), new MatchHwCmd(1), new MatchHwCmd(1), new MatchHwCmd(1)}); EXPECT_TRUE(cmdBuffOk); cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchHwCmd(1), new MatchHwCmd(1), new MatchHwCmd(1)}); EXPECT_FALSE(cmdBuffOk); cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchHwCmd(1), new MatchHwCmd(AnyNumber), new MatchHwCmd(1)}); EXPECT_TRUE(cmdBuffOk); cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchHwCmd(AnyNumber), new MatchHwCmd(AnyNumber), new MatchHwCmd(AnyNumber)}); EXPECT_TRUE(cmdBuffOk); cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchHwCmd(AtLeastOne), new MatchHwCmd(AtLeastOne), new MatchHwCmd(AtLeastOne)}); EXPECT_TRUE(cmdBuffOk); cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchHwCmd(AtLeastOne), new MatchHwCmd(AtLeastOne), new MatchHwCmd(AtLeastOne), new MatchHwCmd(AtLeastOne), new MatchHwCmd(AtLeastOne), }); EXPECT_FALSE(cmdBuffOk); cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchHwCmd(AnyNumber), new MatchHwCmd(0), new MatchHwCmd(AnyNumber), new MatchHwCmd(AnyNumber), new MatchHwCmd(0)}); EXPECT_TRUE(cmdBuffOk); } HWTEST_F(HwParseTest, WhenCommandMemberValidatorFailsThenCommandBufferValidationFails) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; bool cmdBuffOk = false; char buffer[8192]; LinearStream stream{alignUp(buffer, 4096), 4096}; *stream.getSpaceForCmd() = FamilyType::cmdInitPipeControl; auto sba = stream.getSpaceForCmd(); *sba = FamilyType::cmdInitStateBaseAddress; sba->setGeneralStateBaseAddressModifyEnable(true); *stream.getSpaceForCmd() = FamilyType::cmdInitPipeControl; cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchHwCmd(AnyNumber), new MatchHwCmd(AnyNumber, Expects{EXPECT_MEMBER(STATE_BASE_ADDRESS, getGeneralStateBaseAddressModifyEnable, true)}), new MatchHwCmd(AnyNumber)}); EXPECT_TRUE(cmdBuffOk); cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchHwCmd(AnyNumber), new MatchHwCmd(AnyNumber, Expects{EXPECT_MEMBER(STATE_BASE_ADDRESS, getGeneralStateBaseAddressModifyEnable, false)}), new MatchHwCmd(AnyNumber)}); EXPECT_FALSE(cmdBuffOk); } compute-runtime-22.14.22890/opencl/test/unit_test/helpers/dispatch_info_builder_tests.cpp000066400000000000000000001352661422164147700315540ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/source/helpers/dispatch_info_builder.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" namespace NEO { using namespace SplitDispatch; class DispatchInfoBuilderFixture : public ContextFixture, public ClDeviceFixture { using ContextFixture::SetUp; public: DispatchInfoBuilderFixture() {} void clearCrossThreadData() { memset(pCrossThreadData, 0, sizeof(pCrossThreadData)); } protected: void SetUp() { ClDeviceFixture::SetUp(); cl_device_id device = pClDevice; ContextFixture::SetUp(1, &device); pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless; pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; pKernelInfo->kernelDescriptor.kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber; pKernelInfo->setPerThreadScratchSize(1024, 0); pKernelInfo->setPrintfSurface(sizeof(uintptr_t), 0); pKernelInfo->addArgBuffer(0, 0x10, sizeof(void *)); pKernelInfo->addArgBuffer(1, 0x30, sizeof(void *)); pKernelInfo->addArgBuffer(2, 0x50, sizeof(void *)); pProgram = new MockProgram(pContext, false, toClDeviceVector(*pClDevice)); pKernel = new MockKernel(pProgram, *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData)); pKernel->setKernelArgHandler(0, &Kernel::setArgBuffer); pKernel->slmTotalSize = 128; pKernel->isBuiltIn = true; } void TearDown() { delete pKernel; delete pProgram; ContextFixture::TearDown(); ClDeviceFixture::TearDown(); } std::unique_ptr pKernelInfo; MockProgram *pProgram = nullptr; MockKernel *pKernel = nullptr; char pCrossThreadData[128]; }; typedef Test DispatchInfoBuilderTest; template class DispatchInfoBuilderMock : DispatchInfoBuilder { public: using DispatchInfoBuilder::DispatchInfoBuilder; void pushSplit(const DispatchInfo &dispatchInfo, MultiDispatchInfo &outMdi) { DispatchInfoBuilder::pushSplit(dispatchInfo, outMdi); } }; TEST_F(DispatchInfoBuilderTest, Given1dWhenSplittingMultiDispatchInfoThenMultiDispatchInfo) { MultiDispatchInfo multiDispatchInfo; auto diBuilder = new DispatchInfoBuilderMock(*pClDevice); ASSERT_NE(nullptr, diBuilder); DispatchInfo dispatchInfo; diBuilder->pushSplit(dispatchInfo, multiDispatchInfo); EXPECT_TRUE(multiDispatchInfo.empty()); delete diBuilder; } TEST_F(DispatchInfoBuilderTest, WhenGettingDimensionThenCorrectDimensionIsReturned) { MultiDispatchInfo mdi1D, mdi2D, mdi3D; DispatchInfoBuilder *diBuilder1D = new DispatchInfoBuilder(*pClDevice); ASSERT_NE(nullptr, diBuilder1D); DispatchInfoBuilder *diBuilder2D = new DispatchInfoBuilder(*pClDevice); ASSERT_NE(nullptr, diBuilder2D); DispatchInfoBuilder *diBuilder3D = new DispatchInfoBuilder(*pClDevice); ASSERT_NE(nullptr, diBuilder3D); diBuilder1D->setDispatchGeometry(Vec3(1, 0, 0), Vec3(0, 0, 0), Vec3(0, 0, 0)); diBuilder1D->bake(mdi1D); for (auto &dispatchInfo : mdi1D) { EXPECT_EQ(1u, dispatchInfo.getDim()); } diBuilder2D->setDispatchGeometry(Vec3(1, 2, 0), Vec3(0, 0, 0), Vec3(0, 0, 0)); diBuilder2D->bake(mdi2D); for (auto &dispatchInfo : mdi2D) { EXPECT_EQ(2u, dispatchInfo.getDim()); } diBuilder3D->setDispatchGeometry(Vec3(1, 2, 3), Vec3(0, 0, 0), Vec3(0, 0, 0)); diBuilder3D->bake(mdi3D); for (auto &dispatchInfo : mdi3D) { EXPECT_EQ(3u, dispatchInfo.getDim()); } delete diBuilder3D; delete diBuilder2D; delete diBuilder1D; } TEST_F(DispatchInfoBuilderTest, WhenGettingGwsThenCorrectValuesAreReturned) { DispatchInfoBuilder *diBuilder = new DispatchInfoBuilder(*pClDevice); ASSERT_NE(nullptr, diBuilder); MultiDispatchInfo mdi0, mdi1, mdi2, mdi3; diBuilder->setDispatchGeometry(Vec3(0, 0, 0), Vec3(0, 0, 0), Vec3(0, 0, 0)); diBuilder->bake(mdi0); EXPECT_TRUE(mdi0.empty()); diBuilder->setDispatchGeometry(Vec3(1, 0, 0), Vec3(0, 0, 0), Vec3(0, 0, 0)); diBuilder->bake(mdi1); for (auto &dispatchInfo : mdi1) { EXPECT_EQ(1u, dispatchInfo.getGWS().x); EXPECT_EQ(1u, dispatchInfo.getGWS().y); EXPECT_EQ(1u, dispatchInfo.getGWS().z); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().x); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().z); } diBuilder->setDispatchGeometry(Vec3(1, 2, 0), Vec3(0, 0, 0), Vec3(0, 0, 0)); diBuilder->bake(mdi2); for (auto &dispatchInfo : mdi2) { EXPECT_EQ(1u, dispatchInfo.getGWS().x); EXPECT_EQ(2u, dispatchInfo.getGWS().y); EXPECT_EQ(1u, dispatchInfo.getGWS().z); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().x); EXPECT_EQ(2u, dispatchInfo.getActualWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().z); } diBuilder->setDispatchGeometry(Vec3(1, 2, 3), Vec3(0, 0, 0), Vec3(0, 0, 0)); diBuilder->bake(mdi3); for (auto &dispatchInfo : mdi3) { EXPECT_EQ(1u, dispatchInfo.getGWS().x); EXPECT_EQ(2u, dispatchInfo.getGWS().y); EXPECT_EQ(3u, dispatchInfo.getGWS().z); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().x); EXPECT_EQ(2u, dispatchInfo.getActualWorkgroupSize().y); EXPECT_EQ(3u, dispatchInfo.getActualWorkgroupSize().z); } delete diBuilder; } TEST_F(DispatchInfoBuilderTest, WhenGettingElwsThenCorrectValuesAreReturned) { DispatchInfoBuilder *diBuilder = new DispatchInfoBuilder(*pClDevice); ASSERT_NE(nullptr, diBuilder); MultiDispatchInfo mdi0, mdi1, mdi2, mdi3; diBuilder->setDispatchGeometry(Vec3(0, 0, 0), Vec3(1, 1, 1), Vec3(0, 0, 0)); diBuilder->bake(mdi0); EXPECT_TRUE(mdi0.empty()); diBuilder->setDispatchGeometry(Vec3(1, 0, 0), Vec3(1, 0, 0), Vec3(0, 0, 0)); diBuilder->bake(mdi1); for (auto &dispatchInfo : mdi1) { EXPECT_EQ(1u, dispatchInfo.getEnqueuedWorkgroupSize().x); EXPECT_EQ(1u, dispatchInfo.getEnqueuedWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getEnqueuedWorkgroupSize().z); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().x); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().z); } diBuilder->setDispatchGeometry(Vec3(1, 1, 0), Vec3(1, 2, 0), Vec3(0, 0, 0)); diBuilder->bake(mdi2); for (auto &dispatchInfo : mdi2) { EXPECT_EQ(1u, dispatchInfo.getEnqueuedWorkgroupSize().x); EXPECT_EQ(2u, dispatchInfo.getEnqueuedWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getEnqueuedWorkgroupSize().z); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().x); EXPECT_EQ(2u, dispatchInfo.getLocalWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().z); } diBuilder->setDispatchGeometry(Vec3(1, 1, 1), Vec3(1, 2, 3), Vec3(0, 0, 0)); diBuilder->bake(mdi3); for (auto &dispatchInfo : mdi3) { EXPECT_EQ(1u, dispatchInfo.getEnqueuedWorkgroupSize().x); EXPECT_EQ(2u, dispatchInfo.getEnqueuedWorkgroupSize().y); EXPECT_EQ(3u, dispatchInfo.getEnqueuedWorkgroupSize().z); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().x); EXPECT_EQ(2u, dispatchInfo.getLocalWorkgroupSize().y); EXPECT_EQ(3u, dispatchInfo.getLocalWorkgroupSize().z); } delete diBuilder; } TEST_F(DispatchInfoBuilderTest, WhenGettingLwsThenCorrectValuesAreReturned) { auto diBuilder = std::make_unique>(*pClDevice); ASSERT_NE(nullptr, diBuilder); MultiDispatchInfo mdi0, mdi1, mdi2, mdi3; diBuilder->setKernel(pKernel); diBuilder->setDispatchGeometry(Vec3(0, 0, 0), Vec3(0, 0, 0), Vec3(0, 0, 0)); diBuilder->bake(mdi0); EXPECT_TRUE(mdi0.empty()); diBuilder->setKernel(pKernel); diBuilder->setDispatchGeometry(Vec3(4, 0, 0), Vec3(0, 0, 0), Vec3(0, 0, 0)); diBuilder->bake(mdi1); for (auto &dispatchInfo : mdi1) { EXPECT_EQ(4u, dispatchInfo.getLocalWorkgroupSize().x); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().z); } diBuilder->setKernel(pKernel); diBuilder->setDispatchGeometry(Vec3(4, 4, 0), Vec3(0, 0, 0), Vec3(0, 0, 0)); diBuilder->bake(mdi2); for (auto &dispatchInfo : mdi2) { EXPECT_EQ(4u, dispatchInfo.getLocalWorkgroupSize().x); EXPECT_EQ(4u, dispatchInfo.getLocalWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().z); } diBuilder->setKernel(pKernel); diBuilder->setDispatchGeometry(Vec3(4, 4, 4), Vec3(0, 0, 0), Vec3(0, 0, 0)); diBuilder->bake(mdi3); for (auto &dispatchInfo : mdi3) { EXPECT_EQ(4u, dispatchInfo.getLocalWorkgroupSize().y); EXPECT_EQ(4u, dispatchInfo.getLocalWorkgroupSize().z); EXPECT_EQ(4u, dispatchInfo.getLocalWorkgroupSize().x); } } TEST_F(DispatchInfoBuilderTest, GivenNoSplitWhenCheckingIfBuiltinThenReturnTrue) { DispatchInfoBuilder *diBuilder = new DispatchInfoBuilder(*pClDevice); ASSERT_NE(nullptr, diBuilder); diBuilder->setKernel(pKernel); diBuilder->setDispatchGeometry(Vec3(256, 256, 256), Vec3(16, 16, 16), Vec3(0, 0, 0)); MultiDispatchInfo multiDispatchInfo; diBuilder->bake(multiDispatchInfo); for (auto &dispatchInfo : multiDispatchInfo) { ASSERT_EQ(pKernel, dispatchInfo.getKernel()); EXPECT_TRUE(dispatchInfo.getKernel()->isBuiltIn); } delete diBuilder; } TEST_F(DispatchInfoBuilderTest, GivenSplitWhenCheckingIfBuiltinThenReturnTrue) { DispatchInfoBuilder *diBuilder1D = new DispatchInfoBuilder(*pClDevice); ASSERT_NE(nullptr, diBuilder1D); DispatchInfoBuilder *diBuilder2D = new DispatchInfoBuilder(*pClDevice); ASSERT_NE(nullptr, diBuilder2D); DispatchInfoBuilder *diBuilder3D = new DispatchInfoBuilder(*pClDevice); ASSERT_NE(nullptr, diBuilder3D); // 1D diBuilder1D->setKernel(RegionCoordX::Left, pKernel); diBuilder1D->setDispatchGeometry(RegionCoordX::Left, Vec3(256, 0, 0), Vec3(16, 0, 0), Vec3(0, 0, 0)); MultiDispatchInfo mdi1D; diBuilder1D->bake(mdi1D); for (auto &dispatchInfo : mdi1D) { EXPECT_EQ(pKernel, dispatchInfo.getKernel()); EXPECT_TRUE(dispatchInfo.getKernel()->isBuiltIn); } //2D diBuilder2D->setKernel(RegionCoordX::Left, RegionCoordY::Bottom, pKernel); diBuilder2D->setDispatchGeometry(RegionCoordX::Left, RegionCoordY::Bottom, Vec3(256, 256, 0), Vec3(16, 16, 0), Vec3(0, 0, 0)); MultiDispatchInfo mdi2D; diBuilder2D->bake(mdi2D); for (auto &dispatchInfo : mdi2D) { EXPECT_EQ(pKernel, dispatchInfo.getKernel()); EXPECT_TRUE(dispatchInfo.getKernel()->isBuiltIn); } //3D diBuilder3D->setKernel(RegionCoordX::Right, RegionCoordY::Bottom, RegionCoordZ::Back, pKernel); diBuilder3D->setDispatchGeometry(RegionCoordX::Right, RegionCoordY::Bottom, RegionCoordZ::Back, Vec3(256, 256, 256), Vec3(16, 16, 16), Vec3(0, 0, 0)); MultiDispatchInfo mdi3D; diBuilder3D->bake(mdi3D); for (auto &dispatchInfo : mdi3D) { EXPECT_EQ(pKernel, dispatchInfo.getKernel()); EXPECT_TRUE(dispatchInfo.getKernel()->isBuiltIn); } delete diBuilder3D; delete diBuilder2D; delete diBuilder1D; } TEST_F(DispatchInfoBuilderTest, GivenNoSplitWhenGettingWalkerInfoThenCorrectValuesAreReturned) { DispatchInfoBuilder *diBuilder1D = new DispatchInfoBuilder(*pClDevice); ASSERT_NE(nullptr, diBuilder1D); DispatchInfoBuilder *diBuilder2D = new DispatchInfoBuilder(*pClDevice); ASSERT_NE(nullptr, diBuilder2D); DispatchInfoBuilder *diBuilder3D = new DispatchInfoBuilder(*pClDevice); ASSERT_NE(nullptr, diBuilder3D); // 1D diBuilder1D->setKernel(pKernel); diBuilder1D->setDispatchGeometry(Vec3(256, 0, 0), Vec3(16, 0, 0), Vec3(0, 0, 0)); MultiDispatchInfo mdi1D; diBuilder1D->bake(mdi1D); EXPECT_EQ(1u, mdi1D.size()); const DispatchInfo *di1D = mdi1D.begin(); EXPECT_EQ(pKernel, di1D->getKernel()); EXPECT_EQ(256u, di1D->getGWS().x); EXPECT_EQ(1u, di1D->getGWS().y); EXPECT_EQ(1u, di1D->getGWS().z); EXPECT_EQ(16u, di1D->getEnqueuedWorkgroupSize().x); EXPECT_EQ(1u, di1D->getEnqueuedWorkgroupSize().y); EXPECT_EQ(1u, di1D->getEnqueuedWorkgroupSize().z); EXPECT_EQ(0u, di1D->getOffset().x); EXPECT_EQ(0u, di1D->getOffset().y); EXPECT_EQ(0u, di1D->getOffset().z); EXPECT_EQ(16u, di1D->getLocalWorkgroupSize().x); EXPECT_EQ(1u, di1D->getLocalWorkgroupSize().y); EXPECT_EQ(1u, di1D->getLocalWorkgroupSize().z); EXPECT_EQ(256u, di1D->getActualWorkgroupSize().x); EXPECT_EQ(1u, di1D->getActualWorkgroupSize().y); EXPECT_EQ(1u, di1D->getActualWorkgroupSize().z); EXPECT_EQ(16u, di1D->getTotalNumberOfWorkgroups().x); EXPECT_EQ(1u, di1D->getTotalNumberOfWorkgroups().y); EXPECT_EQ(1u, di1D->getTotalNumberOfWorkgroups().z); EXPECT_EQ(16u, di1D->getNumberOfWorkgroups().x); EXPECT_EQ(1u, di1D->getNumberOfWorkgroups().y); EXPECT_EQ(1u, di1D->getNumberOfWorkgroups().z); EXPECT_EQ(0u, di1D->getStartOfWorkgroups().x); EXPECT_EQ(0u, di1D->getStartOfWorkgroups().y); EXPECT_EQ(0u, di1D->getStartOfWorkgroups().z); // 2D diBuilder2D->setKernel(pKernel); diBuilder2D->setDispatchGeometry(Vec3(256, 256, 0), Vec3(16, 16, 0), Vec3(0, 0, 0)); MultiDispatchInfo mdi2D; diBuilder2D->bake(mdi2D); EXPECT_EQ(1u, mdi2D.size()); const DispatchInfo *di2D = mdi2D.begin(); EXPECT_EQ(pKernel, di2D->getKernel()); EXPECT_EQ(256u, di2D->getGWS().x); EXPECT_EQ(256u, di2D->getGWS().y); EXPECT_EQ(1u, di2D->getGWS().z); EXPECT_EQ(16u, di2D->getEnqueuedWorkgroupSize().x); EXPECT_EQ(16u, di2D->getEnqueuedWorkgroupSize().y); EXPECT_EQ(1u, di2D->getEnqueuedWorkgroupSize().z); EXPECT_EQ(0u, di2D->getOffset().x); EXPECT_EQ(0u, di2D->getOffset().y); EXPECT_EQ(0u, di2D->getOffset().z); EXPECT_EQ(16u, di2D->getLocalWorkgroupSize().x); EXPECT_EQ(16u, di2D->getLocalWorkgroupSize().y); EXPECT_EQ(1u, di2D->getLocalWorkgroupSize().z); EXPECT_EQ(16u, di2D->getTotalNumberOfWorkgroups().x); EXPECT_EQ(16u, di2D->getTotalNumberOfWorkgroups().y); EXPECT_EQ(1u, di2D->getTotalNumberOfWorkgroups().z); EXPECT_EQ(16u, di2D->getNumberOfWorkgroups().x); EXPECT_EQ(16u, di2D->getNumberOfWorkgroups().y); EXPECT_EQ(1u, di2D->getNumberOfWorkgroups().z); EXPECT_EQ(0u, di2D->getStartOfWorkgroups().x); EXPECT_EQ(0u, di2D->getStartOfWorkgroups().y); EXPECT_EQ(0u, di2D->getStartOfWorkgroups().z); // 3D diBuilder3D->setKernel(pKernel); diBuilder3D->setDispatchGeometry(Vec3(256, 256, 256), Vec3(16, 16, 16), Vec3(0, 0, 0)); MultiDispatchInfo mdi3D; diBuilder3D->bake(mdi3D); EXPECT_EQ(1u, mdi3D.size()); const DispatchInfo *di3D = mdi3D.begin(); EXPECT_EQ(pKernel, di3D->getKernel()); EXPECT_EQ(256u, di3D->getGWS().x); EXPECT_EQ(256u, di3D->getGWS().y); EXPECT_EQ(256u, di3D->getGWS().z); EXPECT_EQ(16u, di3D->getEnqueuedWorkgroupSize().x); EXPECT_EQ(16u, di3D->getEnqueuedWorkgroupSize().y); EXPECT_EQ(16u, di3D->getEnqueuedWorkgroupSize().z); EXPECT_EQ(0u, di3D->getOffset().x); EXPECT_EQ(0u, di3D->getOffset().y); EXPECT_EQ(0u, di3D->getOffset().z); EXPECT_EQ(16u, di3D->getLocalWorkgroupSize().x); EXPECT_EQ(16u, di3D->getLocalWorkgroupSize().y); EXPECT_EQ(16u, di3D->getLocalWorkgroupSize().z); EXPECT_EQ(16u, di3D->getTotalNumberOfWorkgroups().x); EXPECT_EQ(16u, di3D->getTotalNumberOfWorkgroups().y); EXPECT_EQ(16u, di3D->getTotalNumberOfWorkgroups().z); EXPECT_EQ(16u, di3D->getNumberOfWorkgroups().x); EXPECT_EQ(16u, di3D->getNumberOfWorkgroups().y); EXPECT_EQ(16u, di3D->getNumberOfWorkgroups().z); EXPECT_EQ(0u, di3D->getStartOfWorkgroups().x); EXPECT_EQ(0u, di3D->getStartOfWorkgroups().y); EXPECT_EQ(0u, di3D->getStartOfWorkgroups().z); delete diBuilder3D; delete diBuilder2D; delete diBuilder1D; } TEST_F(DispatchInfoBuilderTest, GivenSplitWhenGettingWalkerInfoThenCorrectValuesAreReturned) { DispatchInfoBuilder *diBuilder1D = new DispatchInfoBuilder(*pClDevice); ASSERT_NE(nullptr, diBuilder1D); DispatchInfoBuilder *diBuilder2D = new DispatchInfoBuilder(*pClDevice); ASSERT_NE(nullptr, diBuilder2D); DispatchInfoBuilder *diBuilder3D = new DispatchInfoBuilder(*pClDevice); ASSERT_NE(nullptr, diBuilder3D); // 1D diBuilder1D->setKernel(pKernel); diBuilder1D->setDispatchGeometry(Vec3(256, 0, 0), Vec3(15, 0, 0), Vec3(0, 0, 0)); MultiDispatchInfo mdi1D; diBuilder1D->bake(mdi1D); EXPECT_EQ(2u, mdi1D.size()); auto dispatchId = 0; for (auto &dispatchInfo : mdi1D) { EXPECT_EQ(pKernel, dispatchInfo.getKernel()); EXPECT_EQ(256u, dispatchInfo.getGWS().x); EXPECT_EQ(1u, dispatchInfo.getGWS().y); EXPECT_EQ(1u, dispatchInfo.getGWS().z); EXPECT_EQ(15u, dispatchInfo.getEnqueuedWorkgroupSize().x); EXPECT_EQ(1u, dispatchInfo.getEnqueuedWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getEnqueuedWorkgroupSize().z); EXPECT_EQ(0u, dispatchInfo.getOffset().x); EXPECT_EQ(0u, dispatchInfo.getOffset().y); EXPECT_EQ(0u, dispatchInfo.getOffset().z); EXPECT_EQ(18u, dispatchInfo.getTotalNumberOfWorkgroups().x); EXPECT_EQ(1u, dispatchInfo.getTotalNumberOfWorkgroups().y); EXPECT_EQ(1u, dispatchInfo.getTotalNumberOfWorkgroups().z); switch (dispatchId) { case 0: EXPECT_EQ(255u, dispatchInfo.getActualWorkgroupSize().x); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().z); EXPECT_EQ(15u, dispatchInfo.getLocalWorkgroupSize().x); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().z); EXPECT_EQ(17u, dispatchInfo.getNumberOfWorkgroups().x); EXPECT_EQ(1u, dispatchInfo.getNumberOfWorkgroups().y); EXPECT_EQ(1u, dispatchInfo.getNumberOfWorkgroups().z); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().x); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().y); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().z); break; case 1: EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().x); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().z); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().x); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().z); EXPECT_EQ(18u, dispatchInfo.getNumberOfWorkgroups().x); EXPECT_EQ(1u, dispatchInfo.getNumberOfWorkgroups().y); EXPECT_EQ(1u, dispatchInfo.getNumberOfWorkgroups().z); EXPECT_EQ(17u, dispatchInfo.getStartOfWorkgroups().x); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().y); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().z); break; } dispatchId++; } //2D diBuilder2D->setKernel(pKernel); diBuilder2D->setDispatchGeometry(Vec3(256, 256, 0), Vec3(15, 15, 0), Vec3(0, 0, 0)); MultiDispatchInfo mdi2D; diBuilder2D->bake(mdi2D); EXPECT_EQ(4u, mdi2D.size()); dispatchId = 0; for (auto &dispatchInfo : mdi2D) { EXPECT_EQ(pKernel, dispatchInfo.getKernel()); EXPECT_EQ(256u, dispatchInfo.getGWS().x); EXPECT_EQ(256u, dispatchInfo.getGWS().y); EXPECT_EQ(1u, dispatchInfo.getGWS().z); EXPECT_EQ(15u, dispatchInfo.getEnqueuedWorkgroupSize().x); EXPECT_EQ(15u, dispatchInfo.getEnqueuedWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getEnqueuedWorkgroupSize().z); EXPECT_EQ(0u, dispatchInfo.getOffset().x); EXPECT_EQ(0u, dispatchInfo.getOffset().y); EXPECT_EQ(0u, dispatchInfo.getOffset().z); EXPECT_EQ(18u, dispatchInfo.getTotalNumberOfWorkgroups().x); EXPECT_EQ(18u, dispatchInfo.getTotalNumberOfWorkgroups().y); EXPECT_EQ(1u, dispatchInfo.getTotalNumberOfWorkgroups().z); switch (dispatchId) { case 0: EXPECT_EQ(255u, dispatchInfo.getActualWorkgroupSize().x); EXPECT_EQ(255u, dispatchInfo.getActualWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().z); EXPECT_EQ(15u, dispatchInfo.getLocalWorkgroupSize().x); EXPECT_EQ(15u, dispatchInfo.getLocalWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().z); EXPECT_EQ(17u, dispatchInfo.getNumberOfWorkgroups().x); EXPECT_EQ(17u, dispatchInfo.getNumberOfWorkgroups().y); EXPECT_EQ(1u, dispatchInfo.getNumberOfWorkgroups().z); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().x); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().y); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().z); break; case 1: EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().x); EXPECT_EQ(255u, dispatchInfo.getActualWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().z); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().x); EXPECT_EQ(15u, dispatchInfo.getLocalWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().z); EXPECT_EQ(18u, dispatchInfo.getNumberOfWorkgroups().x); EXPECT_EQ(17u, dispatchInfo.getNumberOfWorkgroups().y); EXPECT_EQ(1u, dispatchInfo.getNumberOfWorkgroups().z); EXPECT_EQ(17u, dispatchInfo.getStartOfWorkgroups().x); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().y); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().z); break; case 2: EXPECT_EQ(255u, dispatchInfo.getActualWorkgroupSize().x); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().z); EXPECT_EQ(15u, dispatchInfo.getLocalWorkgroupSize().x); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().z); EXPECT_EQ(17u, dispatchInfo.getNumberOfWorkgroups().x); EXPECT_EQ(18u, dispatchInfo.getNumberOfWorkgroups().y); EXPECT_EQ(1u, dispatchInfo.getNumberOfWorkgroups().z); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().x); EXPECT_EQ(17u, dispatchInfo.getStartOfWorkgroups().y); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().z); break; case 3: EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().x); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().z); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().x); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().z); EXPECT_EQ(18u, dispatchInfo.getNumberOfWorkgroups().x); EXPECT_EQ(18u, dispatchInfo.getNumberOfWorkgroups().y); EXPECT_EQ(1u, dispatchInfo.getNumberOfWorkgroups().z); EXPECT_EQ(17u, dispatchInfo.getStartOfWorkgroups().x); EXPECT_EQ(17u, dispatchInfo.getStartOfWorkgroups().y); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().z); break; } dispatchId++; } //3D diBuilder3D->setKernel(pKernel); diBuilder3D->setDispatchGeometry(Vec3(256, 256, 256), Vec3(15, 15, 15), Vec3(0, 0, 0)); MultiDispatchInfo mdi3D; diBuilder3D->bake(mdi3D); EXPECT_EQ(8u, mdi3D.size()); dispatchId = 0; for (auto &dispatchInfo : mdi3D) { EXPECT_EQ(pKernel, dispatchInfo.getKernel()); EXPECT_EQ(256u, dispatchInfo.getGWS().x); EXPECT_EQ(256u, dispatchInfo.getGWS().y); EXPECT_EQ(256u, dispatchInfo.getGWS().z); EXPECT_EQ(15u, dispatchInfo.getEnqueuedWorkgroupSize().x); EXPECT_EQ(15u, dispatchInfo.getEnqueuedWorkgroupSize().y); EXPECT_EQ(15u, dispatchInfo.getEnqueuedWorkgroupSize().z); EXPECT_EQ(0u, dispatchInfo.getOffset().x); EXPECT_EQ(0u, dispatchInfo.getOffset().y); EXPECT_EQ(0u, dispatchInfo.getOffset().z); EXPECT_EQ(18u, dispatchInfo.getTotalNumberOfWorkgroups().x); EXPECT_EQ(18u, dispatchInfo.getTotalNumberOfWorkgroups().y); EXPECT_EQ(18u, dispatchInfo.getTotalNumberOfWorkgroups().z); switch (dispatchId) { case 0: EXPECT_EQ(255u, dispatchInfo.getActualWorkgroupSize().x); EXPECT_EQ(255u, dispatchInfo.getActualWorkgroupSize().y); EXPECT_EQ(255u, dispatchInfo.getActualWorkgroupSize().z); EXPECT_EQ(15u, dispatchInfo.getLocalWorkgroupSize().x); EXPECT_EQ(15u, dispatchInfo.getLocalWorkgroupSize().y); EXPECT_EQ(15u, dispatchInfo.getLocalWorkgroupSize().z); EXPECT_EQ(17u, dispatchInfo.getNumberOfWorkgroups().x); EXPECT_EQ(17u, dispatchInfo.getNumberOfWorkgroups().y); EXPECT_EQ(17u, dispatchInfo.getNumberOfWorkgroups().z); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().x); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().y); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().z); break; case 1: EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().x); EXPECT_EQ(255u, dispatchInfo.getActualWorkgroupSize().y); EXPECT_EQ(255u, dispatchInfo.getActualWorkgroupSize().z); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().x); EXPECT_EQ(15u, dispatchInfo.getLocalWorkgroupSize().y); EXPECT_EQ(15u, dispatchInfo.getLocalWorkgroupSize().z); EXPECT_EQ(18u, dispatchInfo.getNumberOfWorkgroups().x); EXPECT_EQ(17u, dispatchInfo.getNumberOfWorkgroups().y); EXPECT_EQ(17u, dispatchInfo.getNumberOfWorkgroups().z); EXPECT_EQ(17u, dispatchInfo.getStartOfWorkgroups().x); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().y); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().z); break; case 2: EXPECT_EQ(255u, dispatchInfo.getActualWorkgroupSize().x); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().y); EXPECT_EQ(255u, dispatchInfo.getActualWorkgroupSize().z); EXPECT_EQ(15u, dispatchInfo.getLocalWorkgroupSize().x); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().y); EXPECT_EQ(15u, dispatchInfo.getLocalWorkgroupSize().z); EXPECT_EQ(17u, dispatchInfo.getNumberOfWorkgroups().x); EXPECT_EQ(18u, dispatchInfo.getNumberOfWorkgroups().y); EXPECT_EQ(17u, dispatchInfo.getNumberOfWorkgroups().z); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().x); EXPECT_EQ(17u, dispatchInfo.getStartOfWorkgroups().y); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().z); break; case 3: EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().x); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().y); EXPECT_EQ(255u, dispatchInfo.getActualWorkgroupSize().z); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().x); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().y); EXPECT_EQ(15u, dispatchInfo.getLocalWorkgroupSize().z); EXPECT_EQ(18u, dispatchInfo.getNumberOfWorkgroups().x); EXPECT_EQ(18u, dispatchInfo.getNumberOfWorkgroups().y); EXPECT_EQ(17u, dispatchInfo.getNumberOfWorkgroups().z); EXPECT_EQ(17u, dispatchInfo.getStartOfWorkgroups().x); EXPECT_EQ(17u, dispatchInfo.getStartOfWorkgroups().y); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().z); break; case 4: EXPECT_EQ(255u, dispatchInfo.getActualWorkgroupSize().x); EXPECT_EQ(255u, dispatchInfo.getActualWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().z); EXPECT_EQ(15u, dispatchInfo.getLocalWorkgroupSize().x); EXPECT_EQ(15u, dispatchInfo.getLocalWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().z); EXPECT_EQ(17u, dispatchInfo.getNumberOfWorkgroups().x); EXPECT_EQ(17u, dispatchInfo.getNumberOfWorkgroups().y); EXPECT_EQ(18u, dispatchInfo.getNumberOfWorkgroups().z); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().x); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().y); EXPECT_EQ(17u, dispatchInfo.getStartOfWorkgroups().z); break; case 5: EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().x); EXPECT_EQ(255u, dispatchInfo.getActualWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().z); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().x); EXPECT_EQ(15u, dispatchInfo.getLocalWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().z); EXPECT_EQ(18u, dispatchInfo.getNumberOfWorkgroups().x); EXPECT_EQ(17u, dispatchInfo.getNumberOfWorkgroups().y); EXPECT_EQ(18u, dispatchInfo.getNumberOfWorkgroups().z); EXPECT_EQ(17u, dispatchInfo.getStartOfWorkgroups().x); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().y); EXPECT_EQ(17u, dispatchInfo.getStartOfWorkgroups().z); break; case 6: EXPECT_EQ(255u, dispatchInfo.getActualWorkgroupSize().x); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().z); EXPECT_EQ(15u, dispatchInfo.getLocalWorkgroupSize().x); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().z); EXPECT_EQ(17u, dispatchInfo.getNumberOfWorkgroups().x); EXPECT_EQ(18u, dispatchInfo.getNumberOfWorkgroups().y); EXPECT_EQ(18u, dispatchInfo.getNumberOfWorkgroups().z); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().x); EXPECT_EQ(17u, dispatchInfo.getStartOfWorkgroups().y); EXPECT_EQ(17u, dispatchInfo.getStartOfWorkgroups().z); break; case 7: EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().x); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().z); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().x); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().z); EXPECT_EQ(18u, dispatchInfo.getNumberOfWorkgroups().x); EXPECT_EQ(18u, dispatchInfo.getNumberOfWorkgroups().y); EXPECT_EQ(18u, dispatchInfo.getNumberOfWorkgroups().z); EXPECT_EQ(17u, dispatchInfo.getStartOfWorkgroups().x); EXPECT_EQ(17u, dispatchInfo.getStartOfWorkgroups().y); EXPECT_EQ(17u, dispatchInfo.getStartOfWorkgroups().z); break; } dispatchId++; } delete diBuilder3D; delete diBuilder2D; delete diBuilder1D; } TEST_F(DispatchInfoBuilderTest, GivenSplit1dWhenSettingDispatchGeometryThenMdiSizeIsCorrect) { DispatchInfoBuilder *diBuilder = new DispatchInfoBuilder(*pClDevice); ASSERT_NE(nullptr, diBuilder); diBuilder->setDispatchGeometry(Vec3(0, 0, 0), Vec3(2, 0, 0), Vec3(0, 0, 0)); MultiDispatchInfo mdiSize0; diBuilder->bake(mdiSize0); EXPECT_EQ(0u, mdiSize0.size()); diBuilder->setDispatchGeometry(Vec3(2, 0, 0), Vec3(2, 0, 0), Vec3(0, 0, 0)); MultiDispatchInfo mdiSize1; diBuilder->bake(mdiSize1); EXPECT_EQ(1u, mdiSize1.size()); diBuilder->setDispatchGeometry(Vec3(3, 0, 0), Vec3(2, 0, 0), Vec3(0, 0, 0)); MultiDispatchInfo mdiSize2; diBuilder->bake(mdiSize2); EXPECT_EQ(2u, mdiSize2.size()); delete diBuilder; } TEST_F(DispatchInfoBuilderTest, GivenSplit2dWhenSettingDispatchGeometryThenMdiSizeIsCorrect) { DispatchInfoBuilder *diBuilder = new DispatchInfoBuilder(*pClDevice); ASSERT_NE(nullptr, diBuilder); diBuilder->setDispatchGeometry(Vec3(0, 0, 0), Vec3(2, 2, 0), Vec3(0, 0, 0)); MultiDispatchInfo mdiSize00; diBuilder->bake(mdiSize00); EXPECT_EQ(0u, mdiSize00.size()); diBuilder->setDispatchGeometry(Vec3(2, 2, 0), Vec3(2, 2, 0), Vec3(0, 0, 0)); MultiDispatchInfo mdiSize11; diBuilder->bake(mdiSize11); EXPECT_EQ(1u, mdiSize11.size()); diBuilder->setDispatchGeometry(Vec3(3, 2, 0), Vec3(2, 2, 0), Vec3(0, 0, 0)); MultiDispatchInfo mdiSize21; diBuilder->bake(mdiSize21); EXPECT_EQ(2u, mdiSize21.size()); diBuilder->setDispatchGeometry(Vec3(2, 3, 0), Vec3(2, 2, 0), Vec3(0, 0, 0)); MultiDispatchInfo mdiSize12; diBuilder->bake(mdiSize12); EXPECT_EQ(2u, mdiSize12.size()); diBuilder->setDispatchGeometry(Vec3(3, 3, 0), Vec3(2, 2, 0), Vec3(0, 0, 0)); MultiDispatchInfo mdiSize22; diBuilder->bake(mdiSize22); EXPECT_EQ(4u, mdiSize22.size()); delete diBuilder; } TEST_F(DispatchInfoBuilderTest, GivenSplit3dWhenSettingDispatchGeometryThenMdiSizeIsCorrect) { DispatchInfoBuilder *diBuilder = new DispatchInfoBuilder(*pClDevice); ASSERT_NE(nullptr, diBuilder); diBuilder->setDispatchGeometry(Vec3(0, 0, 0), Vec3(2, 2, 2), Vec3(0, 0, 0)); MultiDispatchInfo mdiSize000; diBuilder->bake(mdiSize000); EXPECT_EQ(0u, mdiSize000.size()); diBuilder->setDispatchGeometry(Vec3(2, 2, 2), Vec3(2, 2, 2), Vec3(0, 0, 0)); MultiDispatchInfo mdiSize111; diBuilder->bake(mdiSize111); EXPECT_EQ(1u, mdiSize111.size()); diBuilder->setDispatchGeometry(Vec3(3, 2, 2), Vec3(2, 2, 2), Vec3(0, 0, 0)); MultiDispatchInfo mdiSize211; diBuilder->bake(mdiSize211); EXPECT_EQ(2u, mdiSize211.size()); diBuilder->setDispatchGeometry(Vec3(2, 3, 2), Vec3(2, 2, 2), Vec3(0, 0, 0)); MultiDispatchInfo mdiSize121; diBuilder->bake(mdiSize121); EXPECT_EQ(2u, mdiSize121.size()); diBuilder->setDispatchGeometry(Vec3(2, 2, 3), Vec3(2, 2, 2), Vec3(0, 0, 0)); MultiDispatchInfo mdiSize112; diBuilder->bake(mdiSize112); EXPECT_EQ(2u, mdiSize112.size()); diBuilder->setDispatchGeometry(Vec3(3, 3, 2), Vec3(2, 2, 2), Vec3(0, 0, 0)); MultiDispatchInfo mdiSize221; diBuilder->bake(mdiSize221); EXPECT_EQ(4u, mdiSize221.size()); diBuilder->setDispatchGeometry(Vec3(3, 2, 3), Vec3(2, 2, 2), Vec3(0, 0, 0)); MultiDispatchInfo mdiSize212; diBuilder->bake(mdiSize212); EXPECT_EQ(4u, mdiSize212.size()); diBuilder->setDispatchGeometry(Vec3(2, 3, 3), Vec3(2, 2, 2), Vec3(0, 0, 0)); MultiDispatchInfo mdiSize122; diBuilder->bake(mdiSize122); EXPECT_EQ(4u, mdiSize122.size()); diBuilder->setDispatchGeometry(Vec3(3, 3, 3), Vec3(2, 2, 2), Vec3(0, 0, 0)); MultiDispatchInfo mdiSize222; diBuilder->bake(mdiSize222); EXPECT_EQ(8u, mdiSize222.size()); delete diBuilder; } TEST_F(DispatchInfoBuilderTest, WhenSettingKernelArgThenAddressesAreCorrect) { Buffer *buffer = new MockBuffer(); auto val = (cl_mem)buffer; auto pVal = &val; DispatchInfoBuilder *diBuilder = new DispatchInfoBuilder(*pClDevice); ASSERT_NE(nullptr, diBuilder); diBuilder->setKernel(pKernel); diBuilder->setDispatchGeometry(Vec3(256, 256, 256), Vec3(16, 16, 16), Vec3(0, 0, 0)); MultiDispatchInfo multiDispatchInfo; diBuilder->bake(multiDispatchInfo); clearCrossThreadData(); EXPECT_EQ(CL_SUCCESS, diBuilder->setArg(0, sizeof(cl_mem *), pVal)); char data[128]; void *svmPtr = &data; EXPECT_EQ(CL_SUCCESS, diBuilder->setArgSvm(1, sizeof(svmPtr), svmPtr, nullptr, 0u)); MockGraphicsAllocation svmAlloc(svmPtr, 128); EXPECT_EQ(CL_SUCCESS, diBuilder->setArgSvmAlloc(2, svmPtr, &svmAlloc)); for (auto &dispatchInfo : multiDispatchInfo) { auto crossthreadOffset0 = pKernelInfo->argAsPtr(0).stateless; EXPECT_EQ(buffer->getCpuAddress(), *reinterpret_cast((dispatchInfo.getKernel()->getCrossThreadData() + crossthreadOffset0))); auto crossthreadOffset1 = pKernelInfo->argAsPtr(1).stateless; EXPECT_EQ(svmPtr, *(reinterpret_cast(dispatchInfo.getKernel()->getCrossThreadData() + crossthreadOffset1))); auto crossthreadOffset2 = pKernelInfo->argAsPtr(2).stateless; EXPECT_EQ(svmPtr, *(reinterpret_cast(dispatchInfo.getKernel()->getCrossThreadData() + crossthreadOffset2))); } delete buffer; delete diBuilder; } TEST_F(DispatchInfoBuilderTest, GivenSplitWhenSettingKernelArgThenAddressesAreCorrect) { DispatchInfoBuilder builder1D(*pClDevice); DispatchInfoBuilder builder2D(*pClDevice); DispatchInfoBuilder builder3D(*pClDevice); Buffer *buffer = new MockBuffer(); auto val = (cl_mem)buffer; auto pVal = &val; char data[128]; void *svmPtr = &data; builder1D.setKernel(pKernel); builder2D.setKernel(pKernel); builder3D.setKernel(pKernel); Vec3 GWS(256, 256, 256); Vec3 ELWS(16, 16, 16); Vec3 offset(0, 0, 0); builder1D.setDispatchGeometry(SplitDispatch::RegionCoordX::Left, GWS, ELWS, offset); builder2D.setDispatchGeometry(SplitDispatch::RegionCoordX::Left, SplitDispatch::RegionCoordY::Top, GWS, ELWS, offset); builder3D.setDispatchGeometry(SplitDispatch::RegionCoordX::Left, SplitDispatch::RegionCoordY::Top, SplitDispatch::RegionCoordZ::Front, GWS, ELWS, offset); MultiDispatchInfo mdi1D; MultiDispatchInfo mdi2D; MultiDispatchInfo mdi3D; builder1D.bake(mdi1D); builder1D.bake(mdi2D); builder1D.bake(mdi3D); //Set arg clearCrossThreadData(); builder1D.setArg(SplitDispatch::RegionCoordX::Left, static_cast(0), sizeof(cl_mem *), pVal); for (auto &dispatchInfo : mdi1D) { EXPECT_EQ(buffer->getCpuAddress(), *reinterpret_cast((dispatchInfo.getKernel()->getCrossThreadData() + 0x10))); } clearCrossThreadData(); builder2D.setArg(SplitDispatch::RegionCoordX::Left, SplitDispatch::RegionCoordY::Top, static_cast(0), sizeof(cl_mem *), pVal); for (auto &dispatchInfo : mdi2D) { EXPECT_EQ(buffer->getCpuAddress(), *reinterpret_cast((dispatchInfo.getKernel()->getCrossThreadData() + 0x10))); } clearCrossThreadData(); builder3D.setArg(SplitDispatch::RegionCoordX::Left, SplitDispatch::RegionCoordY::Top, SplitDispatch::RegionCoordZ::Front, static_cast(0), sizeof(cl_mem *), pVal); for (auto &dispatchInfo : mdi3D) { EXPECT_EQ(buffer->getCpuAddress(), *reinterpret_cast((dispatchInfo.getKernel()->getCrossThreadData() + 0x10))); } //Set arg SVM clearCrossThreadData(); builder1D.setArgSvm(SplitDispatch::RegionCoordX::Left, 1, sizeof(svmPtr), svmPtr, nullptr, 0u); for (auto &dispatchInfo : mdi1D) { EXPECT_EQ(svmPtr, *(reinterpret_cast(dispatchInfo.getKernel()->getCrossThreadData() + 0x30))); } clearCrossThreadData(); builder2D.setArgSvm(SplitDispatch::RegionCoordX::Left, SplitDispatch::RegionCoordY::Top, 1, sizeof(svmPtr), svmPtr, nullptr, 0u); for (auto &dispatchInfo : mdi2D) { EXPECT_EQ(svmPtr, *(reinterpret_cast(dispatchInfo.getKernel()->getCrossThreadData() + 0x30))); } clearCrossThreadData(); builder3D.setArgSvm(SplitDispatch::RegionCoordX::Left, SplitDispatch::RegionCoordY::Top, SplitDispatch::RegionCoordZ::Front, 1, sizeof(svmPtr), svmPtr, nullptr, 0u); for (auto &dispatchInfo : mdi3D) { EXPECT_EQ(svmPtr, *(reinterpret_cast(dispatchInfo.getKernel()->getCrossThreadData() + 0x30))); } delete buffer; } TEST_F(DispatchInfoBuilderTest, GivenInvalidInputWhenSettingKernelArgThenInvalidMemObjectErrorIsReturned) { char *buffer = new char[sizeof(Buffer)]; auto val = (cl_mem)buffer; auto pVal = &val; DispatchInfoBuilder *diBuilder = new DispatchInfoBuilder(*pClDevice); ASSERT_NE(nullptr, diBuilder); diBuilder->setKernel(pKernel); diBuilder->setDispatchGeometry(Vec3(256, 256, 256), Vec3(16, 16, 16), Vec3(0, 0, 0)); MultiDispatchInfo multiDispatchInfo; diBuilder->bake(multiDispatchInfo); EXPECT_EQ(CL_INVALID_MEM_OBJECT, diBuilder->setArg(0, sizeof(cl_mem *), pVal)); EXPECT_EQ(CL_SUCCESS, diBuilder->setArgSvm(1, sizeof(void *), nullptr, nullptr, 0u)); delete diBuilder; delete[] buffer; } TEST_F(DispatchInfoBuilderTest, GivenNullKernelWhenSettingKernelArgThenSuccessIsReturned) { Buffer *buffer = new MockBuffer(); auto val = (cl_mem)buffer; auto pVal = &val; char data[128]; void *svmPtr = &data; MockGraphicsAllocation svmAlloc(svmPtr, 128); DispatchInfoBuilder *diBuilder = new DispatchInfoBuilder(*pClDevice); ASSERT_NE(nullptr, diBuilder); diBuilder->setDispatchGeometry(Vec3(256, 256, 256), Vec3(16, 16, 16), Vec3(0, 0, 0)); MultiDispatchInfo multiDispatchInfo; diBuilder->bake(multiDispatchInfo); EXPECT_EQ(CL_SUCCESS, diBuilder->setArg(0, sizeof(cl_mem *), pVal)); EXPECT_EQ(CL_SUCCESS, diBuilder->setArgSvm(1, sizeof(svmPtr), svmPtr, nullptr, 0u)); EXPECT_EQ(CL_SUCCESS, diBuilder->setArgSvmAlloc(2, svmPtr, &svmAlloc)); delete diBuilder; delete buffer; } TEST_F(DispatchInfoBuilderTest, WhenDimensionIsNotSetThenProperDimensionIsReturned) { MultiDispatchInfo mdi; auto diBuilder = std::make_unique>(*pClDevice); ASSERT_NE(nullptr, diBuilder); diBuilder->setDispatchGeometry(0u, Vec3(128, 4, 1), Vec3(0, 0, 0), Vec3(0, 0, 0)); diBuilder->bake(mdi); for (auto &dispatchInfo : mdi) { EXPECT_EQ(2u, dispatchInfo.getDim()); } } TEST_F(DispatchInfoBuilderTest, WhengDimensionIsNotMatchingGWSThenDimensionPassedAsArgumentIsReturned) { MultiDispatchInfo mdi; auto diBuilder = std::make_unique>(*pClDevice); ASSERT_NE(nullptr, diBuilder); diBuilder->setDispatchGeometry(2u, Vec3(128, 1, 1), Vec3(0, 0, 0), Vec3(0, 0, 0)); diBuilder->bake(mdi); for (auto &dispatchInfo : mdi) { EXPECT_EQ(2u, dispatchInfo.getDim()); } } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/helpers/dispatch_info_tests.cpp000066400000000000000000000314771422164147700300450ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "gtest/gtest.h" #include using namespace NEO; class DispatchInfoFixture : public ContextFixture, public ClDeviceFixture { using ContextFixture::SetUp; public: DispatchInfoFixture() {} protected: void SetUp() { ClDeviceFixture::SetUp(); cl_device_id device = pClDevice; ContextFixture::SetUp(1, &device); pKernelInfo = std::make_unique(); pKernelInfo->setPerThreadScratchSize(1024, 0); pKernelInfo->setPrintfSurface(sizeof(uintptr_t), 0); pProgram = new MockProgram(pContext, false, toClDeviceVector(*pClDevice)); pKernel = new MockKernel(pProgram, *pKernelInfo, *pClDevice); pKernel->slmTotalSize = 128; } void TearDown() { delete pKernel; delete pProgram; ContextFixture::TearDown(); ClDeviceFixture::TearDown(); } std::unique_ptr pKernelInfo; MockProgram *pProgram = nullptr; MockKernel *pKernel = nullptr; }; typedef Test DispatchInfoTest; TEST_F(DispatchInfoTest, GivenNoGeometryWhenDispatchInfoIsCreatedThenValuesAreSetCorrectly) { std::unique_ptr dispatchInfo(new DispatchInfo); EXPECT_EQ(nullptr, dispatchInfo->getKernel()); EXPECT_EQ(0u, dispatchInfo->getRequiredScratchSize()); EXPECT_FALSE(dispatchInfo->usesSlm()); EXPECT_FALSE(dispatchInfo->usesStatelessPrintfSurface()); EXPECT_EQ(0u, dispatchInfo->getDim()); Vec3 vecZero({0, 0, 0}); EXPECT_EQ(vecZero, dispatchInfo->getGWS()); EXPECT_EQ(vecZero, dispatchInfo->getEnqueuedWorkgroupSize()); EXPECT_EQ(vecZero, dispatchInfo->getOffset()); EXPECT_EQ(vecZero, dispatchInfo->getActualWorkgroupSize()); EXPECT_EQ(vecZero, dispatchInfo->getLocalWorkgroupSize()); EXPECT_EQ(vecZero, dispatchInfo->getTotalNumberOfWorkgroups()); EXPECT_EQ(vecZero, dispatchInfo->getNumberOfWorkgroups()); EXPECT_EQ(vecZero, dispatchInfo->getStartOfWorkgroups()); } TEST_F(DispatchInfoTest, GivenUserGeometryWhenDispatchInfoIsCreatedThenValuesAreSetCorrectly) { Vec3 gws({256, 256, 256}); Vec3 elws({16, 16, 16}); Vec3 offset({1, 2, 3}); std::unique_ptr dispatchInfo(new DispatchInfo(pClDevice, pKernel, 3, gws, elws, offset)); EXPECT_NE(nullptr, dispatchInfo->getKernel()); EXPECT_EQ(1024u, dispatchInfo->getRequiredScratchSize()); EXPECT_TRUE(dispatchInfo->usesSlm()); EXPECT_TRUE(dispatchInfo->usesStatelessPrintfSurface()); EXPECT_EQ(3u, dispatchInfo->getDim()); EXPECT_EQ(gws, dispatchInfo->getGWS()); EXPECT_EQ(elws, dispatchInfo->getEnqueuedWorkgroupSize()); EXPECT_EQ(offset, dispatchInfo->getOffset()); Vec3 vecZero({0, 0, 0}); EXPECT_EQ(vecZero, dispatchInfo->getActualWorkgroupSize()); EXPECT_EQ(vecZero, dispatchInfo->getLocalWorkgroupSize()); EXPECT_EQ(vecZero, dispatchInfo->getTotalNumberOfWorkgroups()); EXPECT_EQ(vecZero, dispatchInfo->getNumberOfWorkgroups()); EXPECT_EQ(vecZero, dispatchInfo->getStartOfWorkgroups()); dispatchInfo->setKernel(nullptr); EXPECT_EQ(nullptr, dispatchInfo->getKernel()); } TEST_F(DispatchInfoTest, GivenFullGeometryWhenDispatchInfoIsCreatedThenValuesAreSetCorrectly) { Vec3 gws({256, 256, 256}); Vec3 elws({32, 32, 32}); Vec3 offset({1, 2, 3}); Vec3 agws({256, 256, 256}); Vec3 lws({32, 32, 32}); Vec3 twgs({8, 8, 8}); Vec3 nwgs({8, 8, 8}); Vec3 swgs({0, 0, 0}); std::unique_ptr dispatchInfo(new DispatchInfo(pClDevice, pKernel, 3, gws, elws, offset, agws, lws, twgs, nwgs, swgs)); EXPECT_NE(nullptr, dispatchInfo->getKernel()); EXPECT_EQ(1024u, dispatchInfo->getRequiredScratchSize()); EXPECT_TRUE(dispatchInfo->usesSlm()); EXPECT_TRUE(dispatchInfo->usesStatelessPrintfSurface()); EXPECT_EQ(3u, dispatchInfo->getDim()); EXPECT_EQ(gws, dispatchInfo->getGWS()); EXPECT_EQ(elws, dispatchInfo->getEnqueuedWorkgroupSize()); EXPECT_EQ(offset, dispatchInfo->getOffset()); EXPECT_EQ(agws, dispatchInfo->getActualWorkgroupSize()); EXPECT_EQ(lws, dispatchInfo->getEnqueuedWorkgroupSize()); EXPECT_EQ(twgs, dispatchInfo->getTotalNumberOfWorkgroups()); EXPECT_EQ(nwgs, dispatchInfo->getNumberOfWorkgroups()); EXPECT_EQ(swgs, dispatchInfo->getStartOfWorkgroups()); dispatchInfo->setKernel(nullptr); EXPECT_EQ(nullptr, dispatchInfo->getKernel()); } TEST_F(DispatchInfoTest, WhenMultiDispatchInfoIsCreatedThenItIsNonCopyable) { EXPECT_FALSE(std::is_move_constructible::value); EXPECT_FALSE(std::is_copy_constructible::value); } TEST_F(DispatchInfoTest, WhenMultiDispatchInfoIsCreatedThenItIsNonAssignable) { EXPECT_FALSE(std::is_move_assignable::value); EXPECT_FALSE(std::is_copy_assignable::value); } TEST_F(DispatchInfoTest, WhenMultiDispatchInfoIsCreatedThenItIsEmpty) { MultiDispatchInfo multiDispatchInfo; EXPECT_TRUE(multiDispatchInfo.empty()); EXPECT_EQ(0u, multiDispatchInfo.getRequiredScratchSize()); EXPECT_FALSE(multiDispatchInfo.usesSlm()); EXPECT_FALSE(multiDispatchInfo.usesStatelessPrintfSurface()); EXPECT_EQ(0u, multiDispatchInfo.getRedescribedSurfaces().size()); } TEST_F(DispatchInfoTest, GivenRedescribedSurfacesWhenCreatingMultiDispatchInfoThenRedescribedSurfacesSizeisOne) { MultiDispatchInfo multiDispatchInfo; auto image = std::unique_ptr(Image2dHelper<>::create(pContext)); ASSERT_NE(nullptr, image); auto imageRedescribed = image->redescribe(); multiDispatchInfo.pushRedescribedMemObj(std::unique_ptr(imageRedescribed)); EXPECT_EQ(1u, multiDispatchInfo.getRedescribedSurfaces().size()); } TEST_F(DispatchInfoTest, GivenNoGeometryWhenMultiDispatchInfoIsCreatedThenValuesAreSetCorrectly) { DispatchInfo dispatchInfo; MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); EXPECT_FALSE(multiDispatchInfo.empty()); EXPECT_EQ(0u, multiDispatchInfo.getRequiredScratchSize()); EXPECT_FALSE(multiDispatchInfo.usesSlm()); EXPECT_FALSE(multiDispatchInfo.usesStatelessPrintfSurface()); } TEST_F(DispatchInfoTest, GivenUserGeometryWhenMultiDispatchInfoIsCreatedThenValuesAreSetCorrectly) { Vec3 gws({256, 256, 256}); Vec3 elws({16, 16, 16}); Vec3 offset({1, 2, 3}); DispatchInfo dispatchInfo(pClDevice, pKernel, 3, gws, elws, offset); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); EXPECT_FALSE(multiDispatchInfo.empty()); EXPECT_EQ(1024u, multiDispatchInfo.getRequiredScratchSize()); EXPECT_TRUE(multiDispatchInfo.usesSlm()); EXPECT_TRUE(multiDispatchInfo.usesStatelessPrintfSurface()); EXPECT_NE(nullptr, multiDispatchInfo.begin()->getKernel()); EXPECT_EQ(gws, multiDispatchInfo.begin()->getGWS()); EXPECT_EQ(elws, multiDispatchInfo.begin()->getEnqueuedWorkgroupSize()); EXPECT_EQ(offset, multiDispatchInfo.begin()->getOffset()); Vec3 vecZero({0, 0, 0}); EXPECT_EQ(vecZero, multiDispatchInfo.begin()->getLocalWorkgroupSize()); EXPECT_EQ(vecZero, multiDispatchInfo.begin()->getTotalNumberOfWorkgroups()); EXPECT_EQ(vecZero, multiDispatchInfo.begin()->getNumberOfWorkgroups()); EXPECT_EQ(vecZero, multiDispatchInfo.begin()->getStartOfWorkgroups()); } TEST_F(DispatchInfoTest, GivenFullGeometryWhenMultiDispatchInfoIsCreatedThenValuesAreSetCorrectly) { Vec3 gws({256, 256, 256}); Vec3 elws({32, 32, 32}); Vec3 offset({1, 2, 3}); Vec3 agws({256, 256, 256}); Vec3 lws({32, 32, 32}); Vec3 twgs({8, 8, 8}); Vec3 nwgs({8, 8, 8}); Vec3 swgs({0, 0, 0}); DispatchInfo dispatchInfo(pClDevice, pKernel, 3, gws, elws, offset, agws, lws, twgs, nwgs, swgs); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); EXPECT_FALSE(multiDispatchInfo.empty()); EXPECT_EQ(1024u, multiDispatchInfo.getRequiredScratchSize()); EXPECT_TRUE(multiDispatchInfo.usesSlm()); EXPECT_TRUE(multiDispatchInfo.usesStatelessPrintfSurface()); EXPECT_NE(nullptr, multiDispatchInfo.begin()->getKernel()); EXPECT_EQ(gws, multiDispatchInfo.begin()->getGWS()); EXPECT_EQ(elws, multiDispatchInfo.begin()->getEnqueuedWorkgroupSize()); EXPECT_EQ(offset, multiDispatchInfo.begin()->getOffset()); EXPECT_EQ(agws, multiDispatchInfo.begin()->getActualWorkgroupSize()); EXPECT_EQ(lws, multiDispatchInfo.begin()->getLocalWorkgroupSize()); EXPECT_EQ(twgs, multiDispatchInfo.begin()->getTotalNumberOfWorkgroups()); EXPECT_EQ(nwgs, multiDispatchInfo.begin()->getNumberOfWorkgroups()); EXPECT_EQ(swgs, multiDispatchInfo.begin()->getStartOfWorkgroups()); } TEST_F(DispatchInfoTest, WhenSettingValuesInDispatchInfoThenThoseValuesAreSet) { DispatchInfo dispatchInfo; Vec3 gws({256, 256, 256}); Vec3 elws({16, 16, 16}); Vec3 offset({1, 2, 3}); Vec3 agws({256, 256, 256}); Vec3 lws({4, 4, 4}); Vec3 twgs({64, 64, 64}); Vec3 nwgs({64, 64, 64}); Vec3 swgs({8, 8, 8}); dispatchInfo.setGWS(gws); dispatchInfo.setEnqueuedWorkgroupSize(elws); dispatchInfo.setOffsets(offset); dispatchInfo.setActualGlobalWorkgroupSize(agws); dispatchInfo.setLWS(lws); dispatchInfo.setTotalNumberOfWorkgroups(twgs); dispatchInfo.setNumberOfWorkgroups(nwgs); dispatchInfo.setStartOfWorkgroups(swgs); EXPECT_EQ(gws, dispatchInfo.getGWS()); EXPECT_EQ(elws, dispatchInfo.getEnqueuedWorkgroupSize()); EXPECT_EQ(offset, dispatchInfo.getOffset()); EXPECT_EQ(agws, dispatchInfo.getActualWorkgroupSize()); EXPECT_EQ(lws, dispatchInfo.getLocalWorkgroupSize()); EXPECT_EQ(twgs, dispatchInfo.getTotalNumberOfWorkgroups()); EXPECT_EQ(nwgs, dispatchInfo.getNumberOfWorkgroups()); EXPECT_EQ(swgs, dispatchInfo.getStartOfWorkgroups()); } TEST_F(DispatchInfoTest, givenKernelWhenMultiDispatchInfoIsCreatedThenQueryMainKernel) { std::unique_ptr baseKernel(MockKernel::create(*pDevice, pProgram)); std::unique_ptr builtInKernel(MockKernel::create(*pDevice, pProgram)); builtInKernel->isBuiltIn = true; DispatchInfo baseDispatchInfo(pClDevice, baseKernel.get(), 1, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}); DispatchInfo builtInDispatchInfo(pClDevice, builtInKernel.get(), 1, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}); { MultiDispatchInfo multiDispatchInfo(baseKernel.get()); multiDispatchInfo.push(builtInDispatchInfo); EXPECT_EQ(baseKernel.get(), multiDispatchInfo.peekMainKernel()); // dont pick builtin kernel multiDispatchInfo.push(baseDispatchInfo); EXPECT_EQ(baseKernel.get(), multiDispatchInfo.peekMainKernel()); } { MultiDispatchInfo multiDispatchInfo; EXPECT_EQ(nullptr, multiDispatchInfo.peekMainKernel()); multiDispatchInfo.push(builtInDispatchInfo); EXPECT_EQ(builtInKernel.get(), multiDispatchInfo.peekMainKernel()); } { MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(baseDispatchInfo); multiDispatchInfo.push(builtInDispatchInfo); std::reverse_iterator rend = multiDispatchInfo.rend(); std::reverse_iterator crend = multiDispatchInfo.crend(); std::reverse_iterator rbegin = multiDispatchInfo.rbegin(); std::reverse_iterator crbegin = multiDispatchInfo.crbegin(); EXPECT_EQ(rbegin.base(), multiDispatchInfo.end()); EXPECT_EQ(crbegin.base(), multiDispatchInfo.end()); EXPECT_EQ(rend.base(), multiDispatchInfo.begin()); EXPECT_EQ(crend.base(), multiDispatchInfo.begin()); } } TEST(DispatchInfoBasicTests, givenDispatchInfoWhenCreatedThenDefaultValueOfPartitionIsFalse) { DispatchInfo dispatchInfo; EXPECT_FALSE(dispatchInfo.peekCanBePartitioned()); } TEST(DispatchInfoBasicTests, givenDispatchInfoWhenSetCanBePartitionIsCalledThenStateIsChangedAccordingly) { DispatchInfo dispatchInfo; dispatchInfo.setCanBePartitioned(true); EXPECT_TRUE(dispatchInfo.peekCanBePartitioned()); } TEST(DispatchInfoBasicTests, givenDispatchInfoWithoutKernelWhenGettingSizeForPrivateScratchThenZeroIsReturned) { DispatchInfo dispatchInfo; EXPECT_EQ(nullptr, dispatchInfo.getKernel()); EXPECT_EQ(0u, dispatchInfo.getRequiredPrivateScratchSize()); } engine_node_helper_tests_xehp_and_later.cpp000066400000000000000000000352031422164147700340110ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/helpers/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/engine_node_helper.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" using namespace NEO; using EngineNodeHelperTestsXeHPAndLater = ::Test; HWCMDTEST_F(IGFX_XE_HP_CORE, EngineNodeHelperTestsXeHPAndLater, WhenGetBcsEngineTypeIsCalledThenBcsEngineIsReturned) { const auto hwInfo = pDevice->getHardwareInfo(); auto &selectorCopyEngine = pDevice->getNearestGenericSubDevice(0)->getSelectorCopyEngine(); EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS, EngineHelpers::getBcsEngineType(hwInfo, {}, selectorCopyEngine, false)); } HWTEST2_F(EngineNodeHelperTestsXeHPAndLater, givenDebugVariableSetWhenAskingForEngineTypeThenReturnTheSameAsVariableIndex, IsAtLeastXeHpCore) { DebugManagerStateRestore restore; DeviceBitfield deviceBitfield = 0b11; const auto hwInfo = pDevice->getHardwareInfo(); auto &selectorCopyEngine = pDevice->getNearestGenericSubDevice(0)->getSelectorCopyEngine(); for (int32_t i = 0; i <= 9; i++) { DebugManager.flags.ForceBcsEngineIndex.set(i); if (i == 0) { EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS, EngineHelpers::getBcsEngineType(hwInfo, deviceBitfield, selectorCopyEngine, false)); } else if (i <= 8) { EXPECT_EQ(static_cast(aub_stream::EngineType::ENGINE_BCS1 + i - 1), EngineHelpers::getBcsEngineType(hwInfo, deviceBitfield, selectorCopyEngine, false)); } else { EXPECT_ANY_THROW(EngineHelpers::getBcsEngineType(hwInfo, deviceBitfield, selectorCopyEngine, false)); } } } HWTEST2_F(EngineNodeHelperTestsXeHPAndLater, givenForceBCSForInternalCopyEngineWhenGetBcsEngineTypeForInternalEngineThenForcedTypeIsReturned, IsAtLeastXeHpCore) { DebugManagerStateRestore restore; DebugManager.flags.ForceBCSForInternalCopyEngine.set(0u); auto hwInfo = *pDevice->getRootDeviceEnvironment().getMutableHardwareInfo(); hwInfo.featureTable.ftrBcsInfo = 0xff; auto &selectorCopyEngine = pDevice->getNearestGenericSubDevice(0)->getSelectorCopyEngine(); DeviceBitfield deviceBitfield = 0xff; { DebugManager.flags.ForceBCSForInternalCopyEngine.set(0u); auto engineType = EngineHelpers::getBcsEngineType(hwInfo, deviceBitfield, selectorCopyEngine, true); EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS, engineType); } { DebugManager.flags.ForceBCSForInternalCopyEngine.set(3u); auto engineType = EngineHelpers::getBcsEngineType(hwInfo, deviceBitfield, selectorCopyEngine, true); EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS3, engineType); } } HWTEST2_F(EngineNodeHelperTestsXeHPAndLater, givenEnableCmdQRoundRobindBcsEngineAssignWhenSelectLinkCopyEngineThenRoundRobinOverAllAvailableLinkedCopyEngines, IsAtLeastXeHpCore) { DebugManagerStateRestore restore; DebugManager.flags.EnableCmdQRoundRobindBcsEngineAssign.set(1u); DeviceBitfield deviceBitfield = 0b10; auto hwInfo = *pDevice->getRootDeviceEnvironment().getMutableHardwareInfo(); hwInfo.featureTable.ftrBcsInfo.set(7); auto &selectorCopyEngine = pDevice->getNearestGenericSubDevice(0)->getSelectorCopyEngine(); int32_t expectedEngineType = aub_stream::EngineType::ENGINE_BCS1; for (int32_t i = 0; i <= 20; i++) { while (!HwHelper::get(hwInfo.platform.eRenderCoreFamily).isSubDeviceEngineSupported(hwInfo, deviceBitfield, static_cast(expectedEngineType)) || !hwInfo.featureTable.ftrBcsInfo.test(expectedEngineType - aub_stream::EngineType::ENGINE_BCS1 + 1)) { expectedEngineType++; if (static_cast(expectedEngineType) > aub_stream::EngineType::ENGINE_BCS8) { expectedEngineType = aub_stream::EngineType::ENGINE_BCS1; } } auto engineType = EngineHelpers::selectLinkCopyEngine(hwInfo, deviceBitfield, selectorCopyEngine.selector); EXPECT_EQ(engineType, static_cast(expectedEngineType)); expectedEngineType++; if (static_cast(expectedEngineType) > aub_stream::EngineType::ENGINE_BCS8) { expectedEngineType = aub_stream::EngineType::ENGINE_BCS1; } } } HWTEST2_F(EngineNodeHelperTestsXeHPAndLater, givenEnableCmdQRoundRobindBcsEngineAssignAndMainCopyEngineIncludedWhenSelectLinkCopyEngineThenRoundRobinOverAllAvailableLinkedCopyEnginesAndMainCopyEngine, IsAtLeastXeHpCore) { DebugManagerStateRestore restore; DebugManager.flags.EnableCmdQRoundRobindBcsEngineAssign.set(1u); DebugManager.flags.EnableCmdQRoundRobindBcsEngineAssignStartingValue.set(0); DeviceBitfield deviceBitfield = 0b10; auto hwInfo = *pDevice->getRootDeviceEnvironment().getMutableHardwareInfo(); hwInfo.featureTable.ftrBcsInfo = 0x17f; auto &selectorCopyEngine = pDevice->getNearestGenericSubDevice(0)->getSelectorCopyEngine(); int32_t expectedEngineType = aub_stream::EngineType::ENGINE_BCS; for (int32_t i = 0; i <= 20; i++) { while (!HwHelper::get(hwInfo.platform.eRenderCoreFamily).isSubDeviceEngineSupported(hwInfo, deviceBitfield, static_cast(expectedEngineType)) || !hwInfo.featureTable.ftrBcsInfo.test(expectedEngineType == aub_stream::EngineType::ENGINE_BCS ? 0 : expectedEngineType - aub_stream::EngineType::ENGINE_BCS1 + 1)) { if (expectedEngineType == aub_stream::EngineType::ENGINE_BCS) { expectedEngineType = aub_stream::EngineType::ENGINE_BCS1; } else { expectedEngineType++; } if (static_cast(expectedEngineType) > aub_stream::EngineType::ENGINE_BCS8) { expectedEngineType = aub_stream::EngineType::ENGINE_BCS; } } auto engineType = EngineHelpers::selectLinkCopyEngine(hwInfo, deviceBitfield, selectorCopyEngine.selector); EXPECT_EQ(engineType, static_cast(expectedEngineType)); if (expectedEngineType == aub_stream::EngineType::ENGINE_BCS) { expectedEngineType = aub_stream::EngineType::ENGINE_BCS1; } else { expectedEngineType++; } if (static_cast(expectedEngineType) > aub_stream::EngineType::ENGINE_BCS8) { expectedEngineType = aub_stream::EngineType::ENGINE_BCS; } } } HWTEST2_F(EngineNodeHelperTestsXeHPAndLater, givenEnableCmdQRoundRobindBcsEngineAssignAndMainCopyEngineIncludedAndLimitSetWhenSelectLinkCopyEngineThenRoundRobinOverAllAvailableLinkedCopyEnginesAndMainCopyEngine, IsAtLeastXeHpCore) { DebugManagerStateRestore restore; DebugManager.flags.EnableCmdQRoundRobindBcsEngineAssign.set(1u); DebugManager.flags.EnableCmdQRoundRobindBcsEngineAssignStartingValue.set(0); DebugManager.flags.EnableCmdQRoundRobindBcsEngineAssignLimit.set(6); DeviceBitfield deviceBitfield = 0b10; auto hwInfo = *pDevice->getRootDeviceEnvironment().getMutableHardwareInfo(); hwInfo.featureTable.ftrBcsInfo = 0x17f; auto &selectorCopyEngine = pDevice->getNearestGenericSubDevice(0)->getSelectorCopyEngine(); int32_t expectedEngineType = aub_stream::EngineType::ENGINE_BCS; for (int32_t i = 0; i <= 20; i++) { while (!HwHelper::get(hwInfo.platform.eRenderCoreFamily).isSubDeviceEngineSupported(hwInfo, deviceBitfield, static_cast(expectedEngineType)) || !hwInfo.featureTable.ftrBcsInfo.test(expectedEngineType == aub_stream::EngineType::ENGINE_BCS ? 0 : expectedEngineType - aub_stream::EngineType::ENGINE_BCS1 + 1)) { if (expectedEngineType == aub_stream::EngineType::ENGINE_BCS) { expectedEngineType = aub_stream::EngineType::ENGINE_BCS1; } else { expectedEngineType++; } if (static_cast(expectedEngineType) > aub_stream::EngineType::ENGINE_BCS5) { expectedEngineType = aub_stream::EngineType::ENGINE_BCS; } } auto engineType = EngineHelpers::selectLinkCopyEngine(hwInfo, deviceBitfield, selectorCopyEngine.selector); EXPECT_EQ(engineType, static_cast(expectedEngineType)); if (expectedEngineType == aub_stream::EngineType::ENGINE_BCS) { expectedEngineType = aub_stream::EngineType::ENGINE_BCS1; } else { expectedEngineType++; } if (static_cast(expectedEngineType) > aub_stream::EngineType::ENGINE_BCS5) { expectedEngineType = aub_stream::EngineType::ENGINE_BCS; } } } HWTEST2_F(EngineNodeHelperTestsXeHPAndLater, givenEnableCmdQRoundRobindBcsEngineAssignAndLimitSetWhenSelectLinkCopyEngineThenRoundRobinOverAllAvailableLinkedCopyEnginesAndMainCopyEngine, IsAtLeastXeHpCore) { DebugManagerStateRestore restore; DebugManager.flags.EnableCmdQRoundRobindBcsEngineAssign.set(1u); DebugManager.flags.EnableCmdQRoundRobindBcsEngineAssignLimit.set(6); DeviceBitfield deviceBitfield = 0b10; auto hwInfo = *pDevice->getRootDeviceEnvironment().getMutableHardwareInfo(); hwInfo.featureTable.ftrBcsInfo = 0x17f; auto &selectorCopyEngine = pDevice->getNearestGenericSubDevice(0)->getSelectorCopyEngine(); int32_t expectedEngineType = aub_stream::EngineType::ENGINE_BCS1; for (int32_t i = 0; i <= 20; i++) { while (!HwHelper::get(hwInfo.platform.eRenderCoreFamily).isSubDeviceEngineSupported(hwInfo, deviceBitfield, static_cast(expectedEngineType)) || !hwInfo.featureTable.ftrBcsInfo.test(expectedEngineType == aub_stream::EngineType::ENGINE_BCS ? 0 : expectedEngineType - aub_stream::EngineType::ENGINE_BCS1 + 1)) { if (expectedEngineType == aub_stream::EngineType::ENGINE_BCS) { expectedEngineType = aub_stream::EngineType::ENGINE_BCS1; } else { expectedEngineType++; } if (static_cast(expectedEngineType) > aub_stream::EngineType::ENGINE_BCS6) { expectedEngineType = aub_stream::EngineType::ENGINE_BCS1; } } auto engineType = EngineHelpers::selectLinkCopyEngine(hwInfo, deviceBitfield, selectorCopyEngine.selector); EXPECT_EQ(engineType, static_cast(expectedEngineType)); if (expectedEngineType == aub_stream::EngineType::ENGINE_BCS) { expectedEngineType = aub_stream::EngineType::ENGINE_BCS1; } else { expectedEngineType++; } if (static_cast(expectedEngineType) > aub_stream::EngineType::ENGINE_BCS6) { expectedEngineType = aub_stream::EngineType::ENGINE_BCS1; } } } HWTEST2_F(EngineNodeHelperTestsXeHPAndLater, givenEnableCmdQRoundRobindBcsEngineAssignAndStartOffsetIncludedWhenSelectLinkCopyEngineThenRoundRobinOverAllAvailableLinkedCopyEngines, IsAtLeastXeHpCore) { DebugManagerStateRestore restore; DebugManager.flags.EnableCmdQRoundRobindBcsEngineAssign.set(1u); DebugManager.flags.EnableCmdQRoundRobindBcsEngineAssignStartingValue.set(2); DebugManager.flags.EnableCmdQRoundRobindBcsEngineAssignLimit.set(5); DeviceBitfield deviceBitfield = 0b10; auto hwInfo = *pDevice->getRootDeviceEnvironment().getMutableHardwareInfo(); hwInfo.featureTable.ftrBcsInfo = 0x17f; auto &selectorCopyEngine = pDevice->getNearestGenericSubDevice(0)->getSelectorCopyEngine(); int32_t expectedEngineType = aub_stream::EngineType::ENGINE_BCS3; for (int32_t i = 0; i <= 20; i++) { while (!HwHelper::get(hwInfo.platform.eRenderCoreFamily).isSubDeviceEngineSupported(hwInfo, deviceBitfield, static_cast(expectedEngineType)) || !hwInfo.featureTable.ftrBcsInfo.test(expectedEngineType == aub_stream::EngineType::ENGINE_BCS ? 0 : expectedEngineType - aub_stream::EngineType::ENGINE_BCS1 + 1)) { if (expectedEngineType == aub_stream::EngineType::ENGINE_BCS) { expectedEngineType = aub_stream::EngineType::ENGINE_BCS1; } else { expectedEngineType++; } if (static_cast(expectedEngineType) > aub_stream::EngineType::ENGINE_BCS7) { expectedEngineType = aub_stream::EngineType::ENGINE_BCS3; } } auto engineType = EngineHelpers::selectLinkCopyEngine(hwInfo, deviceBitfield, selectorCopyEngine.selector); EXPECT_EQ(engineType, static_cast(expectedEngineType)); if (expectedEngineType == aub_stream::EngineType::ENGINE_BCS) { expectedEngineType = aub_stream::EngineType::ENGINE_BCS1; } else { expectedEngineType++; } if (static_cast(expectedEngineType) > aub_stream::EngineType::ENGINE_BCS7) { expectedEngineType = aub_stream::EngineType::ENGINE_BCS3; } } }compute-runtime-22.14.22890/opencl/test/unit_test/helpers/get_info_status_mapper_tests.cpp000066400000000000000000000016131422164147700317610ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/get_info_status_mapper.h" #include "gtest/gtest.h" TEST(getInfoStatusMapper, GivenValidGetInfoStatusWhenTranslatingThenExpectedClCodeIsReturned) { auto getInfoStatus = changeGetInfoStatusToCLResultType(GetInfoStatus::SUCCESS); EXPECT_EQ(CL_SUCCESS, getInfoStatus); getInfoStatus = changeGetInfoStatusToCLResultType(GetInfoStatus::INVALID_CONTEXT); EXPECT_EQ(CL_INVALID_CONTEXT, getInfoStatus); getInfoStatus = changeGetInfoStatusToCLResultType(GetInfoStatus::INVALID_VALUE); EXPECT_EQ(CL_INVALID_VALUE, getInfoStatus); } TEST(getInfoStatusMapper, GivenInvalidGetInfoStatusWhenTranslatingThenClInvalidValueIsReturned) { auto getInfoStatus = changeGetInfoStatusToCLResultType(static_cast(1)); EXPECT_EQ(CL_INVALID_VALUE, getInfoStatus); } compute-runtime-22.14.22890/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp000066400000000000000000002151171422164147700322430ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/helpers/hardware_commands_helper_tests.h" #include "shared/source/command_container/command_encoder.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/engine_node_helper.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/test_macros/test_checks_shared.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/api/api.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include using namespace NEO; void HardwareCommandsTest::SetUp() { ClDeviceFixture::SetUp(); ASSERT_NE(nullptr, pClDevice); cl_device_id device = pClDevice; ContextFixture::SetUp(1, &device); ASSERT_NE(nullptr, pContext); BuiltInFixture::SetUp(pDevice); ASSERT_NE(nullptr, pBuiltIns); mockKernelWithInternal = std::make_unique(*pClDevice, pContext); } void HardwareCommandsTest::TearDown() { mockKernelWithInternal.reset(nullptr); BuiltInFixture::TearDown(); ContextFixture::TearDown(); ClDeviceFixture::TearDown(); } void HardwareCommandsTest::addSpaceForSingleKernelArg() { kernelArguments.resize(1); kernelArguments[0] = kernelArgInfo; mockKernelWithInternal->kernelInfo.addArgBuffer(0, 0, sizeof(uintptr_t)); mockKernelWithInternal->mockKernel->setKernelArguments(kernelArguments); mockKernelWithInternal->mockKernel->kernelArgRequiresCacheFlush.resize(1); } HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenProgramInterfaceDescriptorDataIsCreatedThenOnlyRequiredSpaceOnIndirectHeapIsAllocated) { CommandQueueHw cmdQ(pContext, pClDevice, 0, false); std::unique_ptr srcImage(Image2dHelper<>::create(pContext)); ASSERT_NE(nullptr, srcImage.get()); std::unique_ptr dstImage(Image2dHelper<>::create(pContext)); ASSERT_NE(nullptr, dstImage.get()); auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyImageToImage3d, cmdQ.getClDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcMemObj = srcImage.get(); dc.dstMemObj = dstImage.get(); dc.srcOffset = {0, 0, 0}; dc.dstOffset = {0, 0, 0}; dc.size = {1, 1, 1}; MultiDispatchInfo multiDispatchInfo(dc); builder.buildDispatchInfos(multiDispatchInfo); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; auto &indirectHeap = cmdQ.getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 8192); auto usedIndirectHeapBefore = indirectHeap.getUsed(); indirectHeap.getSpace(sizeof(INTERFACE_DESCRIPTOR_DATA)); size_t crossThreadDataSize = kernel->getCrossThreadDataSize(); HardwareCommandsHelper::sendInterfaceDescriptorData( indirectHeap, 0, 0, crossThreadDataSize, 64, 0, 0, 0, 1, *kernel, 0, pDevice->getPreemptionMode(), nullptr, *pDevice); auto usedIndirectHeapAfter = indirectHeap.getUsed(); EXPECT_EQ(sizeof(INTERFACE_DESCRIPTOR_DATA), usedIndirectHeapAfter - usedIndirectHeapBefore); } HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenMediaInterfaceDescriptorIsCreatedThenOnlyRequiredSpaceInCommandStreamIsAllocated) { CommandQueueHw cmdQ(nullptr, pClDevice, 0, false); typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::MEDIA_STATE_FLUSH MEDIA_STATE_FLUSH; auto &commandStream = cmdQ.getCS(1024); auto usedBefore = commandStream.getUsed(); HardwareCommandsHelper::sendMediaInterfaceDescriptorLoad(commandStream, 0, sizeof(INTERFACE_DESCRIPTOR_DATA)); auto usedAfter = commandStream.getUsed(); EXPECT_EQ(sizeof(MEDIA_INTERFACE_DESCRIPTOR_LOAD) + sizeof(MEDIA_STATE_FLUSH), usedAfter - usedBefore); } HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenMediaStateFlushIsCreatedThenOnlyRequiredSpaceInCommandStreamIsAllocated) { CommandQueueHw cmdQ(nullptr, pClDevice, 0, false); typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; typedef typename FamilyType::MEDIA_STATE_FLUSH MEDIA_STATE_FLUSH; auto &commandStream = cmdQ.getCS(1024); auto usedBefore = commandStream.getUsed(); HardwareCommandsHelper::sendMediaStateFlush(commandStream, sizeof(INTERFACE_DESCRIPTOR_DATA)); auto usedAfter = commandStream.getUsed(); EXPECT_EQ(sizeof(MEDIA_STATE_FLUSH), usedAfter - usedBefore); } HWTEST_F(HardwareCommandsTest, WhenCrossThreadDataIsCreatedThenOnlyRequiredSpaceOnIndirectHeapIsAllocated) { REQUIRE_IMAGES_OR_SKIP(defaultHwInfo); CommandQueueHw cmdQ(pContext, pClDevice, 0, false); std::unique_ptr srcImage(Image2dHelper<>::create(pContext)); ASSERT_NE(nullptr, srcImage.get()); std::unique_ptr dstImage(Image2dHelper<>::create(pContext)); ASSERT_NE(nullptr, dstImage.get()); auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyImageToImage3d, cmdQ.getClDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcMemObj = srcImage.get(); dc.dstMemObj = dstImage.get(); dc.srcOffset = {0, 0, 0}; dc.dstOffset = {0, 0, 0}; dc.size = {1, 1, 1}; MultiDispatchInfo multiDispatchInfo(dc); builder.buildDispatchInfos(multiDispatchInfo); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); auto &indirectHeap = cmdQ.getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 8192); auto usedBefore = indirectHeap.getUsed(); auto sizeCrossThreadData = kernel->getCrossThreadDataSize(); HardwareCommandsHelper::sendCrossThreadData( indirectHeap, *kernel, false, nullptr, sizeCrossThreadData); auto usedAfter = indirectHeap.getUsed(); EXPECT_EQ(kernel->getCrossThreadDataSize(), usedAfter - usedBefore); } HWTEST_F(HardwareCommandsTest, givenSendCrossThreadDataWhenWhenAddPatchInfoCommentsForAUBDumpIsNotSetThenAddPatchInfoDataOffsetsAreNotMoved) { CommandQueueHw cmdQ(pContext, pClDevice, 0, false); MockContext context; MockProgram program(&context, false, toClDeviceVector(*pClDevice)); auto kernelInfo = std::make_unique(); std::unique_ptr kernel(new MockKernel(&program, *kernelInfo, *pClDevice)); auto &indirectHeap = cmdQ.getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 8192); PatchInfoData patchInfoData = {0xaaaaaaaa, 0, PatchInfoAllocationType::KernelArg, 0xbbbbbbbb, 0, PatchInfoAllocationType::IndirectObjectHeap}; kernel->getPatchInfoDataList().push_back(patchInfoData); auto sizeCrossThreadData = kernel->getCrossThreadDataSize(); HardwareCommandsHelper::sendCrossThreadData( indirectHeap, *kernel, false, nullptr, sizeCrossThreadData); ASSERT_EQ(1u, kernel->getPatchInfoDataList().size()); EXPECT_EQ(0xaaaaaaaa, kernel->getPatchInfoDataList()[0].sourceAllocation); EXPECT_EQ(0u, kernel->getPatchInfoDataList()[0].sourceAllocationOffset); EXPECT_EQ(PatchInfoAllocationType::KernelArg, kernel->getPatchInfoDataList()[0].sourceType); EXPECT_EQ(0xbbbbbbbb, kernel->getPatchInfoDataList()[0].targetAllocation); EXPECT_EQ(0u, kernel->getPatchInfoDataList()[0].targetAllocationOffset); EXPECT_EQ(PatchInfoAllocationType::IndirectObjectHeap, kernel->getPatchInfoDataList()[0].targetType); } HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenIndirectHeapNotAllocatedFromInternalPoolWhenSendCrossThreadDataIsCalledThenOffsetZeroIsReturned) { auto nonInternalAllocation = pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); IndirectHeap indirectHeap(nonInternalAllocation, false); auto sizeCrossThreadData = mockKernelWithInternal->mockKernel->getCrossThreadDataSize(); auto offset = HardwareCommandsHelper::sendCrossThreadData( indirectHeap, *mockKernelWithInternal->mockKernel, false, nullptr, sizeCrossThreadData); EXPECT_EQ(0u, offset); pDevice->getMemoryManager()->freeGraphicsMemory(nonInternalAllocation); } HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenIndirectHeapAllocatedFromInternalPoolWhenSendCrossThreadDataIsCalledThenHeapBaseOffsetIsReturned) { auto internalAllocation = pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties(pDevice->getRootDeviceIndex(), true, MemoryConstants::pageSize, AllocationType::INTERNAL_HEAP, pDevice->getDeviceBitfield())); IndirectHeap indirectHeap(internalAllocation, true); auto expectedOffset = internalAllocation->getGpuAddressToPatch(); auto sizeCrossThreadData = mockKernelWithInternal->mockKernel->getCrossThreadDataSize(); auto offset = HardwareCommandsHelper::sendCrossThreadData( indirectHeap, *mockKernelWithInternal->mockKernel, false, nullptr, sizeCrossThreadData); EXPECT_EQ(expectedOffset, offset); pDevice->getMemoryManager()->freeGraphicsMemory(internalAllocation); } HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenSendCrossThreadDataWhenWhenAddPatchInfoCommentsForAUBDumpIsSetThenAddPatchInfoDataOffsetsAreMoved) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AddPatchInfoCommentsForAUBDump.set(true); CommandQueueHw cmdQ(pContext, pClDevice, 0, false); MockContext context; MockProgram program(&context, false, toClDeviceVector(*pClDevice)); auto kernelInfo = std::make_unique(); std::unique_ptr kernel(new MockKernel(&program, *kernelInfo, *pClDevice)); auto &indirectHeap = cmdQ.getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 8192); indirectHeap.getSpace(128u); PatchInfoData patchInfoData1 = {0xaaaaaaaa, 0, PatchInfoAllocationType::KernelArg, 0xbbbbbbbb, 0, PatchInfoAllocationType::IndirectObjectHeap}; PatchInfoData patchInfoData2 = {0xcccccccc, 0, PatchInfoAllocationType::IndirectObjectHeap, 0xdddddddd, 0, PatchInfoAllocationType::Default}; kernel->getPatchInfoDataList().push_back(patchInfoData1); kernel->getPatchInfoDataList().push_back(patchInfoData2); auto sizeCrossThreadData = kernel->getCrossThreadDataSize(); auto offsetCrossThreadData = HardwareCommandsHelper::sendCrossThreadData( indirectHeap, *kernel, false, nullptr, sizeCrossThreadData); ASSERT_NE(0u, offsetCrossThreadData); EXPECT_EQ(128u, offsetCrossThreadData); ASSERT_EQ(2u, kernel->getPatchInfoDataList().size()); EXPECT_EQ(0xaaaaaaaa, kernel->getPatchInfoDataList()[0].sourceAllocation); EXPECT_EQ(0u, kernel->getPatchInfoDataList()[0].sourceAllocationOffset); EXPECT_EQ(PatchInfoAllocationType::KernelArg, kernel->getPatchInfoDataList()[0].sourceType); EXPECT_NE(0xbbbbbbbb, kernel->getPatchInfoDataList()[0].targetAllocation); EXPECT_EQ(indirectHeap.getGraphicsAllocation()->getGpuAddress(), kernel->getPatchInfoDataList()[0].targetAllocation); EXPECT_NE(0u, kernel->getPatchInfoDataList()[0].targetAllocationOffset); EXPECT_EQ(offsetCrossThreadData, kernel->getPatchInfoDataList()[0].targetAllocationOffset); EXPECT_EQ(PatchInfoAllocationType::IndirectObjectHeap, kernel->getPatchInfoDataList()[0].targetType); } HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenAllocatingIndirectStateResourceThenCorrectSizeIsAllocated) { using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; CommandQueueHw cmdQ(pContext, pClDevice, 0, false); std::unique_ptr srcImage(Image2dHelper<>::create(pContext)); ASSERT_NE(nullptr, srcImage.get()); std::unique_ptr dstImage(Image2dHelper<>::create(pContext)); ASSERT_NE(nullptr, dstImage.get()); auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyImageToImage3d, cmdQ.getClDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcMemObj = srcImage.get(); dc.dstMemObj = dstImage.get(); dc.srcOffset = {0, 0, 0}; dc.dstOffset = {0, 0, 0}; dc.size = {1, 1, 1}; MultiDispatchInfo multiDispatchInfo(dc); builder.buildDispatchInfos(multiDispatchInfo); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); const size_t localWorkSize = 256; const size_t localWorkSizes[3]{localWorkSize, 1, 1}; auto &commandStream = cmdQ.getCS(1024); auto pWalkerCmd = static_cast(commandStream.getSpace(sizeof(GPGPU_WALKER))); *pWalkerCmd = FamilyType::cmdInitGpgpuWalker; auto &dsh = cmdQ.getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 8192); auto &ioh = cmdQ.getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 8192); auto &ssh = cmdQ.getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 8192); auto usedBeforeCS = commandStream.getUsed(); auto usedBeforeDSH = dsh.getUsed(); auto usedBeforeIOH = ioh.getUsed(); auto usedBeforeSSH = ssh.getUsed(); dsh.align(EncodeStates::alignInterfaceDescriptorData); size_t IDToffset = dsh.getUsed(); dsh.getSpace(sizeof(INTERFACE_DESCRIPTOR_DATA)); HardwareCommandsHelper::sendMediaInterfaceDescriptorLoad( commandStream, IDToffset, sizeof(INTERFACE_DESCRIPTOR_DATA)); uint32_t interfaceDescriptorIndex = 0; auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType()); auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*kernel); HardwareCommandsHelper::sendIndirectState( commandStream, dsh, ioh, ssh, *kernel, kernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), kernel->getKernelInfo().getMaxSimdSize(), localWorkSizes, IDToffset, interfaceDescriptorIndex, pDevice->getPreemptionMode(), pWalkerCmd, nullptr, true, *pDevice); // It's okay these are EXPECT_GE as they're only going to be used for // estimation purposes to avoid OOM. auto usedAfterDSH = dsh.getUsed(); auto usedAfterIOH = ioh.getUsed(); auto usedAfterSSH = ssh.getUsed(); auto sizeRequiredDSH = HardwareCommandsHelper::getSizeRequiredDSH(*kernel); auto sizeRequiredIOH = HardwareCommandsHelper::getSizeRequiredIOH(*kernel, localWorkSize); auto sizeRequiredSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel); EXPECT_GE(sizeRequiredDSH, usedAfterDSH - usedBeforeDSH); EXPECT_GE(sizeRequiredIOH, usedAfterIOH - usedBeforeIOH); EXPECT_GE(sizeRequiredSSH, usedAfterSSH - usedBeforeSSH); auto usedAfterCS = commandStream.getUsed(); EXPECT_GE(HardwareCommandsHelper::getSizeRequiredCS(), usedAfterCS - usedBeforeCS); } HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelWithFourBindingTableEntriesWhenIndirectStateIsEmittedThenInterfaceDescriptorContainsCorrectBindingTableEntryCount) { using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; CommandQueueHw cmdQ(pContext, pClDevice, 0, false); auto &commandStream = cmdQ.getCS(1024); auto pWalkerCmd = static_cast(commandStream.getSpace(sizeof(GPGPU_WALKER))); *pWalkerCmd = FamilyType::cmdInitGpgpuWalker; auto expectedBindingTableCount = 3u; mockKernelWithInternal->mockKernel->numberOfBindingTableStates = expectedBindingTableCount; auto &dsh = cmdQ.getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 8192); auto &ioh = cmdQ.getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 8192); auto &ssh = cmdQ.getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 8192); const size_t localWorkSize = 256; const size_t localWorkSizes[3]{localWorkSize, 1, 1}; uint32_t interfaceDescriptorIndex = 0; auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType()); auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel); HardwareCommandsHelper::sendIndirectState( commandStream, dsh, ioh, ssh, *mockKernelWithInternal->mockKernel, mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(), localWorkSizes, 0, interfaceDescriptorIndex, pDevice->getPreemptionMode(), pWalkerCmd, nullptr, true, *pDevice); auto interfaceDescriptor = reinterpret_cast(dsh.getCpuBase()); if (EncodeSurfaceState::doBindingTablePrefetch()) { EXPECT_EQ(expectedBindingTableCount, interfaceDescriptor->getBindingTableEntryCount()); } else { EXPECT_EQ(0u, interfaceDescriptor->getBindingTableEntryCount()); } } HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelWith100BindingTableEntriesWhenIndirectStateIsEmittedThenInterfaceDescriptorHas31BindingTableEntriesSet) { using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; CommandQueueHw cmdQ(pContext, pClDevice, 0, false); auto &commandStream = cmdQ.getCS(1024); auto pWalkerCmd = static_cast(commandStream.getSpace(sizeof(GPGPU_WALKER))); *pWalkerCmd = FamilyType::cmdInitGpgpuWalker; auto expectedBindingTableCount = 100u; mockKernelWithInternal->mockKernel->numberOfBindingTableStates = expectedBindingTableCount; auto &dsh = cmdQ.getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 8192); auto &ioh = cmdQ.getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 8192); auto &ssh = cmdQ.getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 8192); const size_t localWorkSize = 256; const size_t localWorkSizes[3]{localWorkSize, 1, 1}; uint32_t interfaceDescriptorIndex = 0; auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType()); auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel); HardwareCommandsHelper::sendIndirectState( commandStream, dsh, ioh, ssh, *mockKernelWithInternal->mockKernel, mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(), localWorkSizes, 0, interfaceDescriptorIndex, pDevice->getPreemptionMode(), pWalkerCmd, nullptr, true, *pDevice); auto interfaceDescriptor = reinterpret_cast(dsh.getCpuBase()); if (EncodeSurfaceState::doBindingTablePrefetch()) { EXPECT_EQ(31u, interfaceDescriptor->getBindingTableEntryCount()); } else { EXPECT_EQ(0u, interfaceDescriptor->getBindingTableEntryCount()); } } HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, whenSendingIndirectStateThenKernelsWalkOrderIsTakenIntoAccount) { using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; CommandQueueHw cmdQ(pContext, pClDevice, 0, false); std::unique_ptr img(Image2dHelper<>::create(pContext)); auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyImageToImage3d, cmdQ.getClDevice()); BuiltinOpParams dc; dc.srcMemObj = img.get(); dc.dstMemObj = img.get(); dc.size = {1, 1, 1}; MultiDispatchInfo multiDispatchInfo(dc); builder.buildDispatchInfos(multiDispatchInfo); ASSERT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); const size_t localWorkSizeX = 2; const size_t localWorkSizeY = 3; const size_t localWorkSizeZ = 4; const size_t localWorkSizes[3]{localWorkSizeX, localWorkSizeY, localWorkSizeZ}; auto &commandStream = cmdQ.getCS(1024); auto pWalkerCmd = static_cast(commandStream.getSpace(sizeof(GPGPU_WALKER))); *pWalkerCmd = FamilyType::cmdInitGpgpuWalker; auto &dsh = cmdQ.getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 8192); auto &ioh = cmdQ.getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 8192); auto &ssh = cmdQ.getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 8192); dsh.align(EncodeStates::alignInterfaceDescriptorData); size_t IDToffset = dsh.getUsed(); dsh.getSpace(sizeof(INTERFACE_DESCRIPTOR_DATA)); KernelInfo modifiedKernelInfo = {}; modifiedKernelInfo.kernelDescriptor.kernelAttributes.workgroupWalkOrder[0] = 2; modifiedKernelInfo.kernelDescriptor.kernelAttributes.workgroupWalkOrder[1] = 1; modifiedKernelInfo.kernelDescriptor.kernelAttributes.workgroupWalkOrder[2] = 0; modifiedKernelInfo.kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[0] = 2; modifiedKernelInfo.kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[1] = 1; modifiedKernelInfo.kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[2] = 0; modifiedKernelInfo.kernelDescriptor.kernelAttributes.simdSize = 16; modifiedKernelInfo.kernelDescriptor.kernelAttributes.numLocalIdChannels = 3; MockKernel mockKernel(kernel->getProgram(), modifiedKernelInfo, *pClDevice); uint32_t interfaceDescriptorIndex = 0; auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType()); auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(mockKernel); HardwareCommandsHelper::sendIndirectState( commandStream, dsh, ioh, ssh, mockKernel, mockKernel.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), modifiedKernelInfo.getMaxSimdSize(), localWorkSizes, IDToffset, interfaceDescriptorIndex, pDevice->getPreemptionMode(), pWalkerCmd, nullptr, true, *pDevice); constexpr uint32_t grfSize = sizeof(typename FamilyType::GRF); size_t localWorkSize = localWorkSizeX * localWorkSizeY * localWorkSizeZ; auto numChannels = modifiedKernelInfo.kernelDescriptor.kernelAttributes.numLocalIdChannels; size_t expectedIohSize = PerThreadDataHelper::getPerThreadDataSizeTotal(modifiedKernelInfo.getMaxSimdSize(), grfSize, numChannels, localWorkSize); ASSERT_LE(expectedIohSize, ioh.getUsed()); auto expectedLocalIds = alignedMalloc(expectedIohSize, 64); generateLocalIDs(expectedLocalIds, modifiedKernelInfo.getMaxSimdSize(), std::array{{localWorkSizeX, localWorkSizeY, localWorkSizeZ}}, std::array{{modifiedKernelInfo.kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[0], modifiedKernelInfo.kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[1], modifiedKernelInfo.kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[2]}}, false, grfSize); EXPECT_EQ(0, memcmp(expectedLocalIds, ioh.getCpuBase(), expectedIohSize)); alignedFree(expectedLocalIds); } HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenSendingIndirectStateThenBindingTableStatesPointersAreCorrect) { typedef typename FamilyType::BINDING_TABLE_STATE BINDING_TABLE_STATE; typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; CommandQueueHw cmdQ(pContext, pClDevice, 0, false); std::unique_ptr dstImage(Image2dHelper<>::create(pContext)); ASSERT_NE(nullptr, dstImage.get()); auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToImage3d, cmdQ.getClDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcPtr = nullptr; dc.dstMemObj = dstImage.get(); dc.dstOffset = {0, 0, 0}; dc.size = {1, 1, 1}; dc.dstRowPitch = 0; dc.dstSlicePitch = 0; MultiDispatchInfo multiDispatchInfo(dc); builder.buildDispatchInfos(multiDispatchInfo); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); const size_t localWorkSizes[3]{256, 1, 1}; auto &commandStream = cmdQ.getCS(1024); auto pWalkerCmd = static_cast(commandStream.getSpace(sizeof(GPGPU_WALKER))); *pWalkerCmd = FamilyType::cmdInitGpgpuWalker; auto &dsh = cmdQ.getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 8192); auto &ioh = cmdQ.getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 8192); auto &ssh = cmdQ.getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 8192); auto sshUsed = ssh.getUsed(); // Obtain where the pointers will be stored const auto &kernelInfo = kernel->getKernelInfo(); auto numSurfaceStates = kernelInfo.kernelDescriptor.payloadMappings.bindingTable.numEntries; EXPECT_EQ(2u, numSurfaceStates); size_t bindingTableStateSize = numSurfaceStates * sizeof(RENDER_SURFACE_STATE); uint32_t *bindingTableStatesPointers = reinterpret_cast( reinterpret_cast(ssh.getCpuBase()) + ssh.getUsed() + bindingTableStateSize); for (auto i = 0u; i < numSurfaceStates; i++) { *(&bindingTableStatesPointers[i]) = 0xDEADBEEF; } uint32_t interfaceDescriptorIndex = 0; auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType()); auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*kernel); HardwareCommandsHelper::sendIndirectState( commandStream, dsh, ioh, ssh, *kernel, kernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), kernel->getKernelInfo().getMaxSimdSize(), localWorkSizes, 0, interfaceDescriptorIndex, pDevice->getPreemptionMode(), pWalkerCmd, nullptr, true, *pDevice); EXPECT_EQ(sshUsed + 0x00000000u, *(&bindingTableStatesPointers[0])); EXPECT_EQ(sshUsed + 0x00000040u, *(&bindingTableStatesPointers[1])); } HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenGettingBindingTableStateThenSurfaceStatePointersAreCorrect) { using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; // define kernel info auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; // define patch offsets for global, constant, private, event pool and default device queue surfaces pKernelInfo->setGlobalVariablesSurface(8, 0, 0); pKernelInfo->setGlobalConstantsSurface(8, 8, 64); pKernelInfo->setPrivateMemory(32, false, 8, 16, 128); pKernelInfo->setDeviceSideEnqueueEventPoolSurface(8, 24, 192); pKernelInfo->setDeviceSideEnqueueDefaultQueueSurface(8, 32, 256); // create program with valid context MockContext context; MockProgram program(&context, false, toClDeviceVector(*pClDevice)); // setup global memory char globalBuffer[16]; GraphicsAllocation gfxGlobalAlloc(0, AllocationType::UNKNOWN, globalBuffer, castToUint64(globalBuffer), 0llu, sizeof(globalBuffer), MemoryPool::MemoryNull, MemoryManager::maxOsContextCount); program.setGlobalSurface(&gfxGlobalAlloc); // setup constant memory char constBuffer[16]; GraphicsAllocation gfxConstAlloc(0, AllocationType::UNKNOWN, constBuffer, castToUint64(constBuffer), 0llu, sizeof(constBuffer), MemoryPool::MemoryNull, MemoryManager::maxOsContextCount); program.setConstantSurface(&gfxConstAlloc); // create kernel MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // setup surface state heap constexpr uint32_t numSurfaces = 5; constexpr uint32_t sshSize = numSurfaces * sizeof(typename FamilyType::RENDER_SURFACE_STATE) + numSurfaces * sizeof(typename FamilyType::BINDING_TABLE_STATE); unsigned char *surfaceStateHeap = reinterpret_cast(alignedMalloc(sshSize, sizeof(typename FamilyType::RENDER_SURFACE_STATE))); uint32_t btiOffset = static_cast(numSurfaces * sizeof(typename FamilyType::RENDER_SURFACE_STATE)); auto bti = reinterpret_cast(surfaceStateHeap + btiOffset); for (uint32_t i = 0; i < numSurfaces; ++i) { bti[i].setSurfaceStatePointer(i * sizeof(typename FamilyType::RENDER_SURFACE_STATE)); } pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->heapInfo.SurfaceStateHeapSize = sshSize; // setup kernel heap uint32_t kernelIsa[32]; pKernelInfo->heapInfo.pKernelHeap = kernelIsa; pKernelInfo->heapInfo.KernelHeapSize = sizeof(kernelIsa); pKernelInfo->setBindingTable(btiOffset, 5); pKernelInfo->setLocalIds({1, 1, 1}); // initialize kernel ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); // setup cross thread data char pCrossThreadData[64]; pKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData)); // try with different offsets to surface state base address for (uint32_t ssbaOffset : {0U, (uint32_t)sizeof(typename FamilyType::RENDER_SURFACE_STATE)}) { CommandQueueHw cmdQ(nullptr, pClDevice, 0, false); auto &commandStream = cmdQ.getCS(1024); auto pWalkerCmd = static_cast(commandStream.getSpace(sizeof(GPGPU_WALKER))); *pWalkerCmd = FamilyType::cmdInitGpgpuWalker; auto &dsh = cmdQ.getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 8192); auto &ioh = cmdQ.getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 8192); auto &ssh = cmdQ.getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 8192); // Initialize binding table state pointers with pattern EXPECT_EQ(numSurfaces, pKernel->getNumberOfBindingTableStates()); const size_t localWorkSizes[3]{256, 1, 1}; dsh.getSpace(sizeof(INTERFACE_DESCRIPTOR_DATA)); ssh.getSpace(ssbaOffset); // offset local ssh from surface state base address uint32_t localSshOffset = static_cast(ssh.getUsed()); // push surfaces states and binding table to given ssh heap uint32_t interfaceDescriptorIndex = 0; auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType()); auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*pKernel); HardwareCommandsHelper::sendIndirectState( commandStream, dsh, ioh, ssh, *pKernel, pKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), pKernel->getKernelInfo().getMaxSimdSize(), localWorkSizes, 0, interfaceDescriptorIndex, pDevice->getPreemptionMode(), pWalkerCmd, nullptr, true, *pDevice); bti = reinterpret_cast(reinterpret_cast(ssh.getCpuBase()) + localSshOffset + btiOffset); for (uint32_t i = 0; i < numSurfaces; ++i) { uint32_t expected = localSshOffset + i * sizeof(typename FamilyType::RENDER_SURFACE_STATE); EXPECT_EQ(expected, bti[i].getSurfaceStatePointer()); } program.setGlobalSurface(nullptr); program.setConstantSurface(nullptr); //exhaust space to trigger reload ssh.getSpace(ssh.getAvailableSpace()); dsh.getSpace(dsh.getAvailableSpace()); } alignedFree(surfaceStateHeap); delete pKernel; } HWTEST_F(HardwareCommandsTest, GivenBuffersNotRequiringSshWhenSettingBindingTableStatesForKernelThenSshIsNotUsed) { // define kernel info auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; // create program with valid context MockContext context; MockProgram program(&context, false, toClDeviceVector(*pClDevice)); // create kernel MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // setup surface state heap char surfaceStateHeap[256]; pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->heapInfo.SurfaceStateHeapSize = sizeof(surfaceStateHeap); pKernelInfo->addArgBuffer(0, 0, 0, 0); pKernelInfo->setAddressQualifier(0, KernelArgMetadata::AddrGlobal); // initialize kernel ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); CommandQueueHw cmdQ(nullptr, pClDevice, 0, false); auto &ssh = cmdQ.getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 8192); ssh.align(8); auto usedBefore = ssh.getUsed(); // Initialize binding table state pointers with pattern auto numSurfaceStates = pKernel->getNumberOfBindingTableStates(); EXPECT_EQ(0u, numSurfaceStates); // set binding table states auto dstBindingTablePointer = pushBindingTableAndSurfaceStates(ssh, *pKernel); EXPECT_EQ(0u, dstBindingTablePointer); auto usedAfter = ssh.getUsed(); EXPECT_EQ(usedBefore, usedAfter); ssh.align(8); EXPECT_EQ(usedAfter, ssh.getUsed()); delete pKernel; } HWTEST_F(HardwareCommandsTest, GivenZeroSurfaceStatesWhenSettingBindingTableStatesThenPointerIsZero) { // define kernel info auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; // create program with valid context MockContext context; MockProgram program(&context, false, toClDeviceVector(*pClDevice)); // create kernel MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // setup surface state heap char surfaceStateHeap[256]; pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->heapInfo.SurfaceStateHeapSize = sizeof(surfaceStateHeap); // initialize kernel ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); CommandQueueHw cmdQ(nullptr, pClDevice, 0, false); auto &ssh = cmdQ.getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 8192); // Initialize binding table state pointers with pattern auto numSurfaceStates = pKernel->getNumberOfBindingTableStates(); EXPECT_EQ(0u, numSurfaceStates); auto dstBindingTablePointer = pushBindingTableAndSurfaceStates(ssh, *pKernel); EXPECT_EQ(0u, dstBindingTablePointer); dstBindingTablePointer = pushBindingTableAndSurfaceStates(ssh, *pKernel); EXPECT_EQ(0u, dstBindingTablePointer); pKernelInfo->setBindingTable(64, 0); dstBindingTablePointer = pushBindingTableAndSurfaceStates(ssh, *pKernel); EXPECT_EQ(0u, dstBindingTablePointer); delete pKernel; } HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, GivenKernelWithInvalidSamplerStateArrayWhenSendIndirectStateIsCalledThenInterfaceDescriptorIsNotPopulated) { using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; CommandQueueHw cmdQ(pContext, pClDevice, 0, false); auto &commandStream = cmdQ.getCS(1024); auto pWalkerCmd = static_cast(commandStream.getSpace(sizeof(GPGPU_WALKER))); *pWalkerCmd = FamilyType::cmdInitGpgpuWalker; auto &dsh = cmdQ.getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 8192); auto &ioh = cmdQ.getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 8192); auto &ssh = cmdQ.getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 8192); const size_t localWorkSize = 256; const size_t localWorkSizes[3]{localWorkSize, 1, 1}; uint32_t interfaceDescriptorIndex = 0; auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType()); auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel); //Undefined Offset, Defined BorderColorOffset mockKernelWithInternal->kernelInfo.setSamplerTable(0, 2, undefined); HardwareCommandsHelper::sendIndirectState( commandStream, dsh, ioh, ssh, *mockKernelWithInternal->mockKernel, mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(), localWorkSizes, 0, interfaceDescriptorIndex, pDevice->getPreemptionMode(), pWalkerCmd, nullptr, true, *pDevice); auto interfaceDescriptor = reinterpret_cast(dsh.getCpuBase()); EXPECT_EQ(0U, interfaceDescriptor->getSamplerStatePointer()); EXPECT_EQ(0U, interfaceDescriptor->getSamplerCount()); //Defined Offset, Undefined BorderColorOffset mockKernelWithInternal->kernelInfo.setSamplerTable(undefined, 2, 0); HardwareCommandsHelper::sendIndirectState( commandStream, dsh, ioh, ssh, *mockKernelWithInternal->mockKernel, mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(), localWorkSizes, 0, interfaceDescriptorIndex, pDevice->getPreemptionMode(), pWalkerCmd, nullptr, true, *pDevice); interfaceDescriptor = reinterpret_cast(dsh.getCpuBase()); EXPECT_EQ(0U, interfaceDescriptor->getSamplerStatePointer()); EXPECT_EQ(0U, interfaceDescriptor->getSamplerCount()); } HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, GivenKernelWithSamplersWhenIndirectStateIsProgrammedThenBorderColorIsCorrectlyCopiedToDshAndSamplerStatesAreProgrammedWithPointer) { typedef typename FamilyType::BINDING_TABLE_STATE BINDING_TABLE_STATE; typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; typedef typename FamilyType::SAMPLER_STATE SAMPLER_STATE; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; CommandQueueHw cmdQ(nullptr, pClDevice, 0, false); const size_t localWorkSizes[3]{1, 1, 1}; auto &commandStream = cmdQ.getCS(1024); auto pWalkerCmd = static_cast(commandStream.getSpace(sizeof(GPGPU_WALKER))); *pWalkerCmd = FamilyType::cmdInitGpgpuWalker; auto &dsh = cmdQ.getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 8192); auto &ioh = cmdQ.getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 8192); auto &ssh = cmdQ.getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 8192); const uint32_t samplerTableOffset = 64; const uint32_t samplerStateSize = sizeof(SAMPLER_STATE) * 2; mockKernelWithInternal->kernelInfo.setSamplerTable(0, 2, static_cast(samplerTableOffset)); const uint32_t mockDshSize = (samplerTableOffset + samplerStateSize) * 4; char *mockDsh = new char[mockDshSize]; memset(mockDsh, 6, samplerTableOffset); memset(mockDsh + samplerTableOffset, 8, samplerTableOffset); mockKernelWithInternal->kernelInfo.heapInfo.pDsh = mockDsh; mockKernelWithInternal->kernelInfo.heapInfo.DynamicStateHeapSize = mockDshSize; uint64_t interfaceDescriptorTableOffset = dsh.getUsed(); dsh.getSpace(sizeof(INTERFACE_DESCRIPTOR_DATA)); dsh.getSpace(4); char *initialDshPointer = static_cast(dsh.getCpuBase()) + dsh.getUsed(); char *borderColorPointer = alignUp(initialDshPointer, 64); uint32_t borderColorOffset = static_cast(borderColorPointer - static_cast(dsh.getCpuBase())); SAMPLER_STATE *pSamplerState = reinterpret_cast(mockDsh + samplerTableOffset); for (uint32_t i = 0; i < 2; i++) { pSamplerState[i].setIndirectStatePointer(0); } mockKernelWithInternal->mockKernel->setCrossThreadData(mockKernelWithInternal->crossThreadData, sizeof(mockKernelWithInternal->crossThreadData)); mockKernelWithInternal->mockKernel->setSshLocal(mockKernelWithInternal->sshLocal, sizeof(mockKernelWithInternal->sshLocal)); uint32_t interfaceDescriptorIndex = 0; auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType()); auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel); HardwareCommandsHelper::sendIndirectState( commandStream, dsh, ioh, ssh, *mockKernelWithInternal->mockKernel, mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), 8, localWorkSizes, interfaceDescriptorTableOffset, interfaceDescriptorIndex, pDevice->getPreemptionMode(), pWalkerCmd, nullptr, true, *pDevice); bool isMemorySame = memcmp(borderColorPointer, mockDsh, samplerTableOffset) == 0; EXPECT_TRUE(isMemorySame); SAMPLER_STATE *pSamplerStatesCopied = reinterpret_cast(borderColorPointer + samplerTableOffset); for (uint32_t i = 0; i < 2; i++) { EXPECT_EQ(pSamplerState[i].getNonNormalizedCoordinateEnable(), pSamplerStatesCopied[i].getNonNormalizedCoordinateEnable()); EXPECT_EQ(pSamplerState[i].getTcxAddressControlMode(), pSamplerStatesCopied[i].getTcxAddressControlMode()); EXPECT_EQ(pSamplerState[i].getTcyAddressControlMode(), pSamplerStatesCopied[i].getTcyAddressControlMode()); EXPECT_EQ(pSamplerState[i].getTczAddressControlMode(), pSamplerStatesCopied[i].getTczAddressControlMode()); EXPECT_EQ(pSamplerState[i].getMinModeFilter(), pSamplerStatesCopied[i].getMinModeFilter()); EXPECT_EQ(pSamplerState[i].getMagModeFilter(), pSamplerStatesCopied[i].getMagModeFilter()); EXPECT_EQ(pSamplerState[i].getMipModeFilter(), pSamplerStatesCopied[i].getMipModeFilter()); EXPECT_EQ(pSamplerState[i].getUAddressMinFilterRoundingEnable(), pSamplerStatesCopied[i].getUAddressMinFilterRoundingEnable()); EXPECT_EQ(pSamplerState[i].getUAddressMagFilterRoundingEnable(), pSamplerStatesCopied[i].getUAddressMagFilterRoundingEnable()); EXPECT_EQ(pSamplerState[i].getVAddressMinFilterRoundingEnable(), pSamplerStatesCopied[i].getVAddressMinFilterRoundingEnable()); EXPECT_EQ(pSamplerState[i].getVAddressMagFilterRoundingEnable(), pSamplerStatesCopied[i].getVAddressMagFilterRoundingEnable()); EXPECT_EQ(pSamplerState[i].getRAddressMagFilterRoundingEnable(), pSamplerStatesCopied[i].getRAddressMagFilterRoundingEnable()); EXPECT_EQ(pSamplerState[i].getRAddressMinFilterRoundingEnable(), pSamplerStatesCopied[i].getRAddressMinFilterRoundingEnable()); EXPECT_EQ(pSamplerState[i].getLodAlgorithm(), pSamplerStatesCopied[i].getLodAlgorithm()); EXPECT_EQ(pSamplerState[i].getTextureLodBias(), pSamplerStatesCopied[i].getTextureLodBias()); EXPECT_EQ(pSamplerState[i].getLodPreclampMode(), pSamplerStatesCopied[i].getLodPreclampMode()); EXPECT_EQ(pSamplerState[i].getTextureBorderColorMode(), pSamplerStatesCopied[i].getTextureBorderColorMode()); EXPECT_EQ(pSamplerState[i].getSamplerDisable(), pSamplerStatesCopied[i].getSamplerDisable()); EXPECT_EQ(pSamplerState[i].getCubeSurfaceControlMode(), pSamplerStatesCopied[i].getCubeSurfaceControlMode()); EXPECT_EQ(pSamplerState[i].getShadowFunction(), pSamplerStatesCopied[i].getShadowFunction()); EXPECT_EQ(pSamplerState[i].getChromakeyMode(), pSamplerStatesCopied[i].getChromakeyMode()); EXPECT_EQ(pSamplerState[i].getChromakeyIndex(), pSamplerStatesCopied[i].getChromakeyIndex()); EXPECT_EQ(pSamplerState[i].getChromakeyEnable(), pSamplerStatesCopied[i].getChromakeyEnable()); EXPECT_EQ(pSamplerState[i].getMaxLod(), pSamplerStatesCopied[i].getMaxLod()); EXPECT_EQ(pSamplerState[i].getMinLod(), pSamplerStatesCopied[i].getMinLod()); EXPECT_EQ(pSamplerState[i].getLodClampMagnificationMode(), pSamplerStatesCopied[i].getLodClampMagnificationMode()); EXPECT_EQ(borderColorOffset, pSamplerStatesCopied[i].getIndirectStatePointer()); } delete[] mockDsh; } HWTEST_F(HardwareCommandsTest, givenEnabledPassInlineDataWhenKernelAllowsInlineThenReturnTrue) { DebugManagerStateRestore restore; DebugManager.flags.EnablePassInlineData.set(1u); uint32_t crossThreadData[8]; mockKernelWithInternal->kernelInfo.kernelDescriptor.kernelAttributes.flags.passInlineData = true; mockKernelWithInternal->mockKernel->setCrossThreadData(crossThreadData, sizeof(crossThreadData)); EXPECT_TRUE(HardwareCommandsHelper::inlineDataProgrammingRequired(*mockKernelWithInternal->mockKernel)); } HWTEST_F(HardwareCommandsTest, givenNoDebugSettingsWhenDefaultModeIsExcercisedThenWeFollowKernelSettingForInlineProgramming) { mockKernelWithInternal->kernelInfo.kernelDescriptor.kernelAttributes.flags.passInlineData = true; EXPECT_TRUE(HardwareCommandsHelper::inlineDataProgrammingRequired(*mockKernelWithInternal->mockKernel)); } HWTEST_F(HardwareCommandsTest, givenDisabledPassInlineDataWhenKernelAllowsInlineThenReturnFalse) { DebugManagerStateRestore restore; DebugManager.flags.EnablePassInlineData.set(0u); mockKernelWithInternal->kernelInfo.kernelDescriptor.kernelAttributes.flags.passInlineData = true; EXPECT_FALSE(HardwareCommandsHelper::inlineDataProgrammingRequired(*mockKernelWithInternal->mockKernel)); } HWTEST_F(HardwareCommandsTest, givenEnabledPassInlineDataWhenKernelDisallowsInlineThenReturnFalse) { DebugManagerStateRestore restore; DebugManager.flags.EnablePassInlineData.set(1u); uint32_t crossThreadData[8]; mockKernelWithInternal->kernelInfo.kernelDescriptor.kernelAttributes.flags.passInlineData = false; mockKernelWithInternal->mockKernel->setCrossThreadData(crossThreadData, sizeof(crossThreadData)); EXPECT_FALSE(HardwareCommandsHelper::inlineDataProgrammingRequired(*mockKernelWithInternal->mockKernel)); } HWTEST_F(HardwareCommandsTest, whenNumLocalIdsIsBiggerThanZeroThenExpectLocalIdsInUseIsTrue) { mockKernelWithInternal->kernelInfo.kernelDescriptor.kernelAttributes.numLocalIdChannels = 1; EXPECT_TRUE(HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel)); } HWTEST_F(HardwareCommandsTest, whenNumLocalIdsIsZeroThenExpectLocalIdsInUseIsFalse) { mockKernelWithInternal->kernelInfo.kernelDescriptor.kernelAttributes.numLocalIdChannels = 0; EXPECT_FALSE(HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel)); } HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenCacheFlushAfterWalkerEnabledWhenProgramGlobalSurfacePresentThenExpectCacheFlushCommand) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MEDIA_STATE_FLUSH = typename FamilyType::MEDIA_STATE_FLUSH; using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD; DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); CommandQueueHw cmdQ(nullptr, pClDevice, 0, false); auto &commandStream = cmdQ.getCS(1024); MockGraphicsAllocation globalAllocation; mockKernelWithInternal->mockProgram->setGlobalSurface(&globalAllocation); Kernel::CacheFlushAllocationsVec allocs; mockKernelWithInternal->mockKernel->getAllocationsForCacheFlush(allocs); EXPECT_NE(allocs.end(), std::find(allocs.begin(), allocs.end(), &globalAllocation)); size_t expectedSize = sizeof(PIPE_CONTROL); size_t actualSize = HardwareCommandsHelper::getSizeRequiredForCacheFlush(cmdQ, mockKernelWithInternal->mockKernel, 0U); EXPECT_EQ(expectedSize, actualSize); HardwareCommandsHelper::programCacheFlushAfterWalkerCommand(&commandStream, cmdQ, mockKernelWithInternal->mockKernel, 0U); HardwareParse hwParse; hwParse.parseCommands(commandStream); PIPE_CONTROL *pipeControl = hwParse.getCommand(); ASSERT_NE(nullptr, pipeControl); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); EXPECT_TRUE(pipeControl->getDcFlushEnable()); mockKernelWithInternal->mockProgram->setGlobalSurface(nullptr); } HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenCacheFlushAfterWalkerEnabledWhenSvmAllocationsSetAsCacheFlushRequiringThenExpectCacheFlushCommand) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MEDIA_STATE_FLUSH = typename FamilyType::MEDIA_STATE_FLUSH; using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD; DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); CommandQueueHw cmdQ(nullptr, pClDevice, 0, false); auto &commandStream = cmdQ.getCS(1024); char buff[MemoryConstants::pageSize * 2]; MockGraphicsAllocation svmAllocation1{alignUp(buff, MemoryConstants::pageSize), MemoryConstants::pageSize}; mockKernelWithInternal->mockKernel->kernelSvmGfxAllocations.push_back(&svmAllocation1); MockGraphicsAllocation svmAllocation2{alignUp(buff, MemoryConstants::pageSize), MemoryConstants::pageSize}; svmAllocation2.setFlushL3Required(false); mockKernelWithInternal->mockKernel->kernelSvmGfxAllocations.push_back(&svmAllocation2); mockKernelWithInternal->mockKernel->svmAllocationsRequireCacheFlush = true; Kernel::CacheFlushAllocationsVec allocs; mockKernelWithInternal->mockKernel->getAllocationsForCacheFlush(allocs); EXPECT_NE(allocs.end(), std::find(allocs.begin(), allocs.end(), &svmAllocation1)); EXPECT_EQ(allocs.end(), std::find(allocs.begin(), allocs.end(), &svmAllocation2)); size_t expectedSize = sizeof(PIPE_CONTROL); size_t actualSize = HardwareCommandsHelper::getSizeRequiredForCacheFlush(cmdQ, mockKernelWithInternal->mockKernel, 0U); EXPECT_EQ(expectedSize, actualSize); HardwareCommandsHelper::programCacheFlushAfterWalkerCommand(&commandStream, cmdQ, mockKernelWithInternal->mockKernel, 0U); HardwareParse hwParse; hwParse.parseCommands(commandStream); PIPE_CONTROL *pipeControl = hwParse.getCommand(); ASSERT_NE(nullptr, pipeControl); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); EXPECT_TRUE(pipeControl->getDcFlushEnable()); } HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenCacheFlushAfterWalkerEnabledWhenKernelArgIsSetAsCacheFlushRequiredThenExpectCacheFlushCommand) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MEDIA_STATE_FLUSH = typename FamilyType::MEDIA_STATE_FLUSH; using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD; DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); CommandQueueHw cmdQ(nullptr, pClDevice, 0, false); auto &commandStream = cmdQ.getCS(1024); addSpaceForSingleKernelArg(); MockGraphicsAllocation cacheRequiringAllocation; mockKernelWithInternal->mockKernel->kernelArgRequiresCacheFlush.resize(2); mockKernelWithInternal->mockKernel->kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; Kernel::CacheFlushAllocationsVec allocs; mockKernelWithInternal->mockKernel->getAllocationsForCacheFlush(allocs); EXPECT_NE(allocs.end(), std::find(allocs.begin(), allocs.end(), &cacheRequiringAllocation)); size_t expectedSize = sizeof(PIPE_CONTROL); size_t actualSize = HardwareCommandsHelper::getSizeRequiredForCacheFlush(cmdQ, mockKernelWithInternal->mockKernel, 0U); EXPECT_EQ(expectedSize, actualSize); HardwareCommandsHelper::programCacheFlushAfterWalkerCommand(&commandStream, cmdQ, mockKernelWithInternal->mockKernel, 0U); HardwareParse hwParse; hwParse.parseCommands(commandStream); PIPE_CONTROL *pipeControl = hwParse.getCommand(); ASSERT_NE(nullptr, pipeControl); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); EXPECT_TRUE(pipeControl->getDcFlushEnable()); } HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenCacheFlushAfterWalkerDisabledWhenGettingRequiredCacheFlushSizeThenReturnZero) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(0); CommandQueueHw cmdQ(nullptr, pClDevice, 0, false); size_t expectedSize = 0U; size_t actualSize = HardwareCommandsHelper::getSizeRequiredForCacheFlush(cmdQ, mockKernelWithInternal->mockKernel, 0U); EXPECT_EQ(expectedSize, actualSize); } TEST_F(HardwareCommandsTest, givenCacheFlushAfterWalkerEnabledWhenPlatformNotSupportFlushThenExpectNoCacheAllocationForFlush) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(-1); hardwareInfo.capabilityTable.supportCacheFlushAfterWalker = false; StackVec allocationsForCacheFlush; mockKernelWithInternal->mockKernel->getAllocationsForCacheFlush(allocationsForCacheFlush); EXPECT_EQ(0U, allocationsForCacheFlush.size()); } using KernelCacheFlushTests = Test>; HWTEST_F(KernelCacheFlushTests, givenLocallyUncachedBufferWhenGettingAllocationsForFlushThenEmptyVectorIsReturned) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(-1); DebugManager.flags.CreateMultipleSubDevices.set(2); auto context = std::make_unique(); auto kernel = std::unique_ptr(Kernel::create(pProgram, pProgram->getKernelInfoForKernel("CopyBuffer"), *context->getDevice(0), &retVal)); cl_mem_properties_intel bufferPropertiesUncachedResource[] = {CL_MEM_FLAGS_INTEL, CL_MEM_LOCALLY_UNCACHED_RESOURCE, 0}; auto bufferLocallyUncached = clCreateBufferWithPropertiesINTEL(context.get(), bufferPropertiesUncachedResource, 0, 1, nullptr, nullptr); kernel->setArg(0, sizeof(bufferLocallyUncached), &bufferLocallyUncached); using CacheFlushAllocationsVec = StackVec; CacheFlushAllocationsVec cacheFlushVec; kernel->getAllocationsForCacheFlush(cacheFlushVec); EXPECT_EQ(0u, cacheFlushVec.size()); auto bufferRegular = clCreateBufferWithPropertiesINTEL(context.get(), nullptr, 0, 1, nullptr, nullptr); kernel->setArg(1, sizeof(bufferRegular), &bufferRegular); kernel->getAllocationsForCacheFlush(cacheFlushVec); size_t expectedCacheFlushVecSize = (hardwareInfo.capabilityTable.supportCacheFlushAfterWalker ? 1u : 0u); EXPECT_EQ(expectedCacheFlushVecSize, cacheFlushVec.size()); clReleaseMemObject(bufferLocallyUncached); clReleaseMemObject(bufferRegular); } struct HardwareCommandsImplicitArgsTests : Test { void SetUp() override { ClDeviceFixture::SetUp(); indirectHeapAllocation = pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); expectedImplicitArgs.numWorkDim = 3; expectedImplicitArgs.simdWidth = 32; expectedImplicitArgs.localSizeX = 2; expectedImplicitArgs.localSizeY = 3; expectedImplicitArgs.localSizeZ = 4; expectedImplicitArgs.globalOffsetX = 1; expectedImplicitArgs.globalOffsetY = 2; expectedImplicitArgs.globalOffsetZ = 3; expectedImplicitArgs.groupCountX = 2; expectedImplicitArgs.groupCountY = 1; expectedImplicitArgs.groupCountZ = 3; } void TearDown() override { pDevice->getMemoryManager()->freeGraphicsMemory(indirectHeapAllocation); ClDeviceFixture::TearDown(); } template void dispatchKernelWithImplicitArgs() { expectedImplicitArgs.globalSizeX = expectedImplicitArgs.localSizeX * expectedImplicitArgs.groupCountX; expectedImplicitArgs.globalSizeY = expectedImplicitArgs.localSizeY * expectedImplicitArgs.groupCountY; expectedImplicitArgs.globalSizeZ = expectedImplicitArgs.localSizeZ * expectedImplicitArgs.groupCountZ; IndirectHeap indirectHeap(indirectHeapAllocation, false); auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = expectedImplicitArgs.simdWidth; UnitTestHelper::adjustKernelDescriptorForImplicitArgs(pKernelInfo->kernelDescriptor); pKernelInfo->kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[0] = workgroupDimOrder[0]; pKernelInfo->kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[1] = workgroupDimOrder[1]; pKernelInfo->kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[2] = workgroupDimOrder[2]; MockContext context(pClDevice); MockProgram program(&context, false, toClDeviceVector(*pClDevice)); MockKernel kernel(&program, *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); auto pImplicitArgs = kernel.getImplicitArgs(); ASSERT_NE(nullptr, pImplicitArgs); kernel.setCrossThreadData(nullptr, sizeof(uint64_t)); kernel.setWorkDim(expectedImplicitArgs.numWorkDim); kernel.setLocalWorkSizeValues(expectedImplicitArgs.localSizeX, expectedImplicitArgs.localSizeY, expectedImplicitArgs.localSizeZ); kernel.setGlobalWorkSizeValues(static_cast(expectedImplicitArgs.globalSizeX), static_cast(expectedImplicitArgs.globalSizeY), static_cast(expectedImplicitArgs.globalSizeZ)); kernel.setGlobalWorkOffsetValues(static_cast(expectedImplicitArgs.globalOffsetX), static_cast(expectedImplicitArgs.globalOffsetY), static_cast(expectedImplicitArgs.globalOffsetZ)); kernel.setNumWorkGroupsValues(expectedImplicitArgs.groupCountX, expectedImplicitArgs.groupCountY, expectedImplicitArgs.groupCountZ); implicitArgsProgrammingSize = ImplicitArgsHelper::getSizeForImplicitArgsPatching(pImplicitArgs, kernel.getDescriptor(), pDevice->getHardwareInfo()); auto sizeCrossThreadData = kernel.getCrossThreadDataSize(); HardwareCommandsHelper::sendCrossThreadData( indirectHeap, kernel, false, nullptr, sizeCrossThreadData); EXPECT_LE(implicitArgsProgrammingSize, indirectHeap.getUsed()); if (FamilyType::supportsCmdSet(IGFX_XE_HP_CORE)) { expectedImplicitArgs.localIdTablePtr = indirectHeapAllocation->getGpuAddress(); } } ImplicitArgs expectedImplicitArgs = {sizeof(ImplicitArgs)}; GraphicsAllocation *indirectHeapAllocation = nullptr; std::array workgroupDimOrder{0, 1, 2}; uint32_t implicitArgsProgrammingSize = 0u; }; HWCMDTEST_F(IGFX_XE_HP_CORE, HardwareCommandsImplicitArgsTests, givenXeHpAndLaterPlatformWhenSendingIndirectStateForKernelWithImplicitArgsThenImplicitArgsAreSentToIndirectHeapWithLocalIds) { dispatchKernelWithImplicitArgs(); auto localIdsProgrammingSize = implicitArgsProgrammingSize - sizeof(ImplicitArgs); auto implicitArgsInIndirectData = ptrOffset(indirectHeapAllocation->getUnderlyingBuffer(), localIdsProgrammingSize); EXPECT_EQ(0, memcmp(implicitArgsInIndirectData, &expectedImplicitArgs, sizeof(ImplicitArgs))); } HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsImplicitArgsTests, givenPreXeHpPlatformWhenSendingIndirectStateForKernelWithImplicitArgsThenImplicitArgsAreSentToIndirectHeapWithoutLocalIds) { dispatchKernelWithImplicitArgs(); auto implicitArgsInIndirectData = indirectHeapAllocation->getUnderlyingBuffer(); EXPECT_EQ(0, memcmp(implicitArgsInIndirectData, &expectedImplicitArgs, sizeof(ImplicitArgs))); auto crossThreadDataInIndirectData = ptrOffset(indirectHeapAllocation->getUnderlyingBuffer(), 0x80); auto programmedImplicitArgsGpuVA = reinterpret_cast(crossThreadDataInIndirectData)[0]; EXPECT_EQ(indirectHeapAllocation->getGpuAddress(), programmedImplicitArgsGpuVA); } HWCMDTEST_F(IGFX_XE_HP_CORE, HardwareCommandsImplicitArgsTests, givenKernelWithImplicitArgsAndRuntimeLocalIdsGenerationWhenSendingIndirectStateThenLocalIdsAreGeneratedAndCorrectlyProgrammedInCrossThreadData) { DebugManagerStateRestore restorer; DebugManager.flags.EnableHwGenerationLocalIds.set(0); workgroupDimOrder[0] = 2; workgroupDimOrder[1] = 1; workgroupDimOrder[2] = 0; std::array localSize{2, 3, 4}; size_t totalLocalSize = localSize[0] * localSize[1] * localSize[2]; expectedImplicitArgs.localSizeX = localSize[0]; expectedImplicitArgs.localSizeY = localSize[1]; expectedImplicitArgs.localSizeZ = localSize[2]; dispatchKernelWithImplicitArgs(); auto grfSize = ImplicitArgsHelper::getGrfSize(expectedImplicitArgs.simdWidth, sizeof(typename FamilyType::GRF)); auto expectedLocalIds = alignedMalloc(implicitArgsProgrammingSize - sizeof(ImplicitArgs), MemoryConstants::cacheLineSize); generateLocalIDs(expectedLocalIds, expectedImplicitArgs.simdWidth, localSize, workgroupDimOrder, false, grfSize); auto localIdsProgrammingSize = implicitArgsProgrammingSize - sizeof(ImplicitArgs); size_t sizeForLocalIds = PerThreadDataHelper::getPerThreadDataSizeTotal(expectedImplicitArgs.simdWidth, grfSize, 3u, totalLocalSize); EXPECT_EQ(0, memcmp(expectedLocalIds, indirectHeapAllocation->getUnderlyingBuffer(), sizeForLocalIds)); alignedFree(expectedLocalIds); auto implicitArgsInIndirectData = ptrOffset(indirectHeapAllocation->getUnderlyingBuffer(), localIdsProgrammingSize); EXPECT_EQ(0, memcmp(implicitArgsInIndirectData, &expectedImplicitArgs, sizeof(ImplicitArgs))); } HWCMDTEST_F(IGFX_XE_HP_CORE, HardwareCommandsImplicitArgsTests, givenKernelWithImplicitArgsAndHwLocalIdsGenerationWhenSendingIndirectStateThenLocalIdsAreGeneratedAndCorrectlyProgrammedInCrossThreadData) { DebugManagerStateRestore restorer; DebugManager.flags.EnableHwGenerationLocalIds.set(1); workgroupDimOrder[0] = 2; workgroupDimOrder[1] = 1; workgroupDimOrder[2] = 0; std::array expectedDimOrder = {0, 2, 1}; std::array localSize{2, 3, 4}; size_t totalLocalSize = localSize[0] * localSize[1] * localSize[2]; expectedImplicitArgs.localSizeX = localSize[0]; expectedImplicitArgs.localSizeY = localSize[1]; expectedImplicitArgs.localSizeZ = localSize[2]; dispatchKernelWithImplicitArgs(); auto grfSize = ImplicitArgsHelper::getGrfSize(expectedImplicitArgs.simdWidth, sizeof(typename FamilyType::GRF)); auto expectedLocalIds = alignedMalloc(implicitArgsProgrammingSize - sizeof(ImplicitArgs), MemoryConstants::cacheLineSize); generateLocalIDs(expectedLocalIds, expectedImplicitArgs.simdWidth, localSize, expectedDimOrder, false, grfSize); auto localIdsProgrammingSize = implicitArgsProgrammingSize - sizeof(ImplicitArgs); size_t sizeForLocalIds = PerThreadDataHelper::getPerThreadDataSizeTotal(expectedImplicitArgs.simdWidth, grfSize, 3u, totalLocalSize); EXPECT_EQ(0, memcmp(expectedLocalIds, indirectHeapAllocation->getUnderlyingBuffer(), sizeForLocalIds)); alignedFree(expectedLocalIds); auto implicitArgsInIndirectData = ptrOffset(indirectHeapAllocation->getUnderlyingBuffer(), localIdsProgrammingSize); EXPECT_EQ(0, memcmp(implicitArgsInIndirectData, &expectedImplicitArgs, sizeof(ImplicitArgs))); } HWCMDTEST_F(IGFX_XE_HP_CORE, HardwareCommandsImplicitArgsTests, givenKernelWithImplicitArgsWhenSendingIndirectStateWithSimd1ThenLocalIdsAreGeneratedCorrectly) { workgroupDimOrder[0] = 2; workgroupDimOrder[1] = 1; workgroupDimOrder[2] = 0; expectedImplicitArgs.simdWidth = 1; expectedImplicitArgs.localSizeX = 2; expectedImplicitArgs.localSizeY = 2; expectedImplicitArgs.localSizeZ = 1; dispatchKernelWithImplicitArgs(); uint16_t expectedLocalIds[][3] = {{0, 0, 0}, {0, 1, 0}, {0, 0, 1}, {0, 1, 1}}; EXPECT_EQ(0, memcmp(expectedLocalIds, indirectHeapAllocation->getUnderlyingBuffer(), sizeof(expectedLocalIds))); auto localIdsProgrammingSize = implicitArgsProgrammingSize - sizeof(ImplicitArgs); EXPECT_EQ(alignUp(sizeof(expectedLocalIds), MemoryConstants::cacheLineSize), localIdsProgrammingSize); auto implicitArgsInIndirectData = ptrOffset(indirectHeapAllocation->getUnderlyingBuffer(), localIdsProgrammingSize); EXPECT_EQ(0, memcmp(implicitArgsInIndirectData, &expectedImplicitArgs, sizeof(ImplicitArgs))); } using HardwareCommandsTestXeHpAndLater = HardwareCommandsTest; HWCMDTEST_F(IGFX_XE_HP_CORE, HardwareCommandsTestXeHpAndLater, givenIndirectHeapNotAllocatedFromInternalPoolWhenSendCrossThreadDataIsCalledThenOffsetZeroIsReturned) { auto nonInternalAllocation = pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); IndirectHeap indirectHeap(nonInternalAllocation, false); auto expectedOffset = is64bit ? 0u : indirectHeap.getHeapGpuBase(); auto sizeCrossThreadData = mockKernelWithInternal->mockKernel->getCrossThreadDataSize(); auto offset = HardwareCommandsHelper::sendCrossThreadData( indirectHeap, *mockKernelWithInternal->mockKernel, false, nullptr, sizeCrossThreadData); EXPECT_EQ(expectedOffset, offset); pDevice->getMemoryManager()->freeGraphicsMemory(nonInternalAllocation); } HWCMDTEST_F(IGFX_XE_HP_CORE, HardwareCommandsTestXeHpAndLater, givenIndirectHeapAllocatedFromInternalPoolWhenSendCrossThreadDataIsCalledThenHeapBaseOffsetIsReturned) { auto internalAllocation = pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties(pDevice->getRootDeviceIndex(), true, MemoryConstants::pageSize, AllocationType::INTERNAL_HEAP, pDevice->getDeviceBitfield())); IndirectHeap indirectHeap(internalAllocation, true); auto expectedOffset = is64bit ? internalAllocation->getGpuAddressToPatch() : 0u; auto sizeCrossThreadData = mockKernelWithInternal->mockKernel->getCrossThreadDataSize(); auto offset = HardwareCommandsHelper::sendCrossThreadData( indirectHeap, *mockKernelWithInternal->mockKernel, false, nullptr, sizeCrossThreadData); EXPECT_EQ(expectedOffset, offset); pDevice->getMemoryManager()->freeGraphicsMemory(internalAllocation); } HWCMDTEST_F(IGFX_XE_HP_CORE, HardwareCommandsTestXeHpAndLater, givenSendCrossThreadDataWhenWhenAddPatchInfoCommentsForAUBDumpIsSetThenAddPatchInfoDataOffsetsAreMoved) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AddPatchInfoCommentsForAUBDump.set(true); CommandQueueHw cmdQ(pContext, pClDevice, 0, false); MockContext context; MockProgram program(&context, false, toClDeviceVector(*pClDevice)); auto kernelInfo = std::make_unique(); std::unique_ptr kernel(new MockKernel(&program, *kernelInfo, *pClDevice)); auto &indirectHeap = cmdQ.getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 8192); indirectHeap.getSpace(128u); PatchInfoData patchInfoData1 = {0xaaaaaaaa, 0, PatchInfoAllocationType::KernelArg, 0xbbbbbbbb, 0, PatchInfoAllocationType::IndirectObjectHeap}; PatchInfoData patchInfoData2 = {0xcccccccc, 0, PatchInfoAllocationType::IndirectObjectHeap, 0xdddddddd, 0, PatchInfoAllocationType::Default}; kernel->getPatchInfoDataList().push_back(patchInfoData1); kernel->getPatchInfoDataList().push_back(patchInfoData2); auto sizeCrossThreadData = kernel->getCrossThreadDataSize(); auto offsetCrossThreadData = HardwareCommandsHelper::sendCrossThreadData( indirectHeap, *kernel, false, nullptr, sizeCrossThreadData); auto expectedOffsetRelativeToIohBase = 128u; auto iohBaseAddress = is64bit ? 0u : indirectHeap.getHeapGpuBase(); ASSERT_NE(0u, offsetCrossThreadData); EXPECT_EQ(iohBaseAddress + expectedOffsetRelativeToIohBase, offsetCrossThreadData); ASSERT_EQ(2u, kernel->getPatchInfoDataList().size()); EXPECT_EQ(0xaaaaaaaa, kernel->getPatchInfoDataList()[0].sourceAllocation); EXPECT_EQ(0u, kernel->getPatchInfoDataList()[0].sourceAllocationOffset); EXPECT_EQ(PatchInfoAllocationType::KernelArg, kernel->getPatchInfoDataList()[0].sourceType); EXPECT_NE(0xbbbbbbbb, kernel->getPatchInfoDataList()[0].targetAllocation); EXPECT_EQ(indirectHeap.getGraphicsAllocation()->getGpuAddress(), kernel->getPatchInfoDataList()[0].targetAllocation); EXPECT_NE(0u, kernel->getPatchInfoDataList()[0].targetAllocationOffset); EXPECT_EQ(expectedOffsetRelativeToIohBase, kernel->getPatchInfoDataList()[0].targetAllocationOffset); EXPECT_EQ(PatchInfoAllocationType::IndirectObjectHeap, kernel->getPatchInfoDataList()[0].targetType); } HWCMDTEST_F(IGFX_XE_HP_CORE, HardwareCommandsTestXeHpAndLater, whenGetSizeRequiredForCacheFlushIsCalledThenExceptionIsThrown) { CommandQueueHw cmdQ(pContext, pClDevice, 0, false); EXPECT_ANY_THROW(HardwareCommandsHelper::getSizeRequiredForCacheFlush(cmdQ, nullptr, 0)); } compute-runtime-22.14.22890/opencl/test/unit_test/helpers/hardware_commands_helper_tests.h000066400000000000000000000037301422164147700317040ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_container/command_encoder.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/kernel/kernel.h" #include "opencl/test/unit_test/fixtures/built_in_fixture.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include using namespace NEO; struct HardwareCommandsTest : ClDeviceFixture, ContextFixture, BuiltInFixture, ::testing::Test { using BuiltInFixture::SetUp; using ContextFixture::SetUp; void SetUp() override; void TearDown() override; void addSpaceForSingleKernelArg(); size_t sizeRequiredCS; size_t sizeRequiredISH; std::unique_ptr mockKernelWithInternal; Kernel::SimpleKernelArgInfo kernelArgInfo = {}; std::vector kernelArguments; template size_t pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, const Kernel &srcKernel) { return EncodeSurfaceState::pushBindingTableAndSurfaceStates(dstHeap, srcKernel.getKernelInfo().kernelDescriptor.payloadMappings.bindingTable.numEntries, srcKernel.getSurfaceStateHeap(), srcKernel.getSurfaceStateHeapSize(), srcKernel.getNumberOfBindingTableStates(), srcKernel.getBindingTableOffset()); } }; compute-runtime-22.14.22890/opencl/test/unit_test/helpers/heap_assigner_ocl_tests.cpp000066400000000000000000000013601422164147700306640ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/heap_assigner.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" namespace NEO { using AlocationHelperTests = Test; HWTEST_F(AlocationHelperTests, givenLinearStreamTypeWhenUseExternalAllocatorForSshAndDshDisabledThenUse32BitIsFalse) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.UseExternalAllocatorForSshAndDsh.set(true); HeapAssigner heapAssigner = {}; EXPECT_FALSE(heapAssigner.use32BitHeap(AllocationType::LINEAR_STREAM)); } } // namespace NEOcompute-runtime-22.14.22890/opencl/test/unit_test/helpers/hw_helper_default_tests.cpp000066400000000000000000000017771422164147700307140ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_info.h" #include "shared/test/common/helpers/hw_helper_tests.h" HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, givenHwHelperWhenAskedForHvAlign4RequiredThenReturnTrue) { auto &hwHelper = HwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily); EXPECT_TRUE(hwHelper.hvAlign4Required()); } HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, givenHwHelperWhenGettingBindlessSurfaceExtendedMessageDescriptorValueThenCorrectValueIsReturned) { auto &hwHelper = HwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily); auto value = hwHelper.getBindlessSurfaceExtendedMessageDescriptorValue(0x200); typename FamilyType::DataPortBindlessSurfaceExtendedMessageDescriptor messageExtDescriptor = {}; messageExtDescriptor.setBindlessSurfaceOffset(0x200); EXPECT_EQ(messageExtDescriptor.getBindlessSurfaceOffsetToPatch(), value); EXPECT_EQ(0x200u << 6, value); } compute-runtime-22.14.22890/opencl/test/unit_test/helpers/hw_helper_tests.cpp000066400000000000000000002204121422164147700271750ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/hw_helper_tests.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/gmm_helper/resource_info.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/pipe_control_args.h" #include "shared/source/helpers/string.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/os_interface/os_interface.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/mocks/mock_gmm.h" #include "shared/test/common/test_macros/test_checks_shared.h" #include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "test_traits_common.h" #include #include #include #include using namespace NEO; TEST(HwHelperSimpleTest, givenDebugVariableWhenAskingForCompressionThenReturnCorrectValue) { DebugManagerStateRestore restore; HardwareInfo localHwInfo = *defaultHwInfo; // debug variable not set localHwInfo.capabilityTable.ftrRenderCompressedBuffers = false; localHwInfo.capabilityTable.ftrRenderCompressedImages = false; EXPECT_FALSE(HwHelper::compressedBuffersSupported(localHwInfo)); EXPECT_FALSE(HwHelper::compressedImagesSupported(localHwInfo)); localHwInfo.capabilityTable.ftrRenderCompressedBuffers = true; localHwInfo.capabilityTable.ftrRenderCompressedImages = true; EXPECT_TRUE(HwHelper::compressedBuffersSupported(localHwInfo)); EXPECT_TRUE(HwHelper::compressedImagesSupported(localHwInfo)); // debug variable set DebugManager.flags.RenderCompressedBuffersEnabled.set(1); DebugManager.flags.RenderCompressedImagesEnabled.set(1); localHwInfo.capabilityTable.ftrRenderCompressedBuffers = false; localHwInfo.capabilityTable.ftrRenderCompressedImages = false; EXPECT_TRUE(HwHelper::compressedBuffersSupported(localHwInfo)); EXPECT_TRUE(HwHelper::compressedImagesSupported(localHwInfo)); DebugManager.flags.RenderCompressedBuffersEnabled.set(0); DebugManager.flags.RenderCompressedImagesEnabled.set(0); localHwInfo.capabilityTable.ftrRenderCompressedBuffers = true; localHwInfo.capabilityTable.ftrRenderCompressedImages = true; EXPECT_FALSE(HwHelper::compressedBuffersSupported(localHwInfo)); EXPECT_FALSE(HwHelper::compressedImagesSupported(localHwInfo)); } TEST_F(HwHelperTest, WhenGettingHelperThenValidHelperReturned) { auto &helper = HwHelper::get(renderCoreFamily); EXPECT_NE(nullptr, &helper); } HWTEST_F(HwHelperTest, givenHwHelperWhenAskingForTimestampPacketAlignmentThenReturnFourCachelines) { auto &helper = HwHelper::get(renderCoreFamily); constexpr auto expectedAlignment = MemoryConstants::cacheLineSize * 4; EXPECT_EQ(expectedAlignment, helper.getTimestampPacketAllocatorAlignment()); } HWTEST_F(HwHelperTest, givenHwHelperWhenGettingISAPaddingThenCorrectValueIsReturned) { auto &hwHelper = HwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily); EXPECT_EQ(hwHelper.getPaddingForISAAllocation(), 512u); } HWTEST_F(HwHelperTest, WhenSettingRenderSurfaceStateForBufferThenL1CachePolicyIsSet) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; class mockHwHelperHw : public HwHelperHw { public: bool called = false; using HwHelperHw::HwHelperHw; mockHwHelperHw() {} void setL1CachePolicy(bool useL1Cache, typename FamilyType::RENDER_SURFACE_STATE *surfaceState, const HardwareInfo *hwInfo) override { HwHelperHw::setL1CachePolicy(useL1Cache, surfaceState, hwInfo); called = true; } }; mockHwHelperHw helper; void *stateBuffer = alignedMalloc(sizeof(RENDER_SURFACE_STATE), sizeof(RENDER_SURFACE_STATE)); ASSERT_NE(nullptr, stateBuffer); memset(stateBuffer, 0, sizeof(RENDER_SURFACE_STATE)); RENDER_SURFACE_STATE state = FamilyType::cmdInitRenderSurfaceState; auto surfaceState = reinterpret_cast(stateBuffer); *surfaceState = state; auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment(); size_t size = 0x1000; uint64_t addr = 0x2000; size_t offset = 0x1000; uint32_t pitch = 0x40; SURFACE_TYPE type = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER; helper.setRenderSurfaceStateForBuffer(rootDeviceEnvironment, stateBuffer, size, addr, offset, pitch, nullptr, false, type, false, false); ASSERT_EQ(helper.called, true); helper.called = false; helper.setRenderSurfaceStateForBuffer(rootDeviceEnvironment, stateBuffer, size, addr, offset, pitch, nullptr, false, type, false, true); ASSERT_EQ(helper.called, true); alignedFree(stateBuffer); } HWTEST_F(HwHelperTest, WhenGettingBindingTableStateSurfaceStatePointerThenCorrectPointerIsReturned) { using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE; BINDING_TABLE_STATE bindingTableState[4]; bindingTableState[2].getRawData(0) = 0x00123456; auto &helper = HwHelper::get(renderCoreFamily); auto pointer = helper.getBindingTableStateSurfaceStatePointer(bindingTableState, 2); EXPECT_EQ(0x00123456u, pointer); } HWTEST_F(HwHelperTest, WhenGettingBindingTableStateSizeThenCorrectSizeIsReturned) { using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE; auto &helper = HwHelper::get(renderCoreFamily); auto pointer = helper.getBindingTableStateSize(); EXPECT_EQ(sizeof(BINDING_TABLE_STATE), pointer); } TEST_F(HwHelperTest, WhenGettingBindingTableStateAlignementThenCorrectSizeIsReturned) { auto &helper = HwHelper::get(renderCoreFamily); EXPECT_NE(0u, helper.getBindingTableStateAlignement()); } HWTEST_F(HwHelperTest, WhenGettingInterfaceDescriptorDataSizeThenCorrectSizeIsReturned) { using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; auto &helper = HwHelper::get(renderCoreFamily); EXPECT_EQ(sizeof(INTERFACE_DESCRIPTOR_DATA), helper.getInterfaceDescriptorDataSize()); } TEST_F(HwHelperTest, givenDebuggingInactiveWhenSipKernelTypeIsQueriedThenCsrTypeIsReturned) { auto &helper = HwHelper::get(renderCoreFamily); EXPECT_NE(nullptr, &helper); auto sipType = helper.getSipKernelType(false); EXPECT_EQ(SipKernelType::Csr, sipType); } TEST_F(HwHelperTest, givenEngineTypeRcsWhenCsTraitsAreQueiredThenCorrectNameInTraitsIsReturned) { auto &helper = HwHelper::get(renderCoreFamily); EXPECT_NE(nullptr, &helper); auto &csTraits = helper.getCsTraits(aub_stream::ENGINE_RCS); EXPECT_STREQ("RCS", csTraits.name); } using isTglLpOrBelow = IsAtMostProduct; HWTEST2_F(HwHelperTest, givenHwHelperWhenGettingThreadsPerEUConfigsThenNoConfigsAreReturned, isTglLpOrBelow) { auto &helper = HwHelper::get(renderCoreFamily); auto &configs = helper.getThreadsPerEUConfigs(); EXPECT_EQ(0U, configs.size()); } HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, givenHwHelperWhenGetGpuTimeStampInNSIsCalledThenCorrectValueIsReturned) { auto &helper = HwHelper::get(renderCoreFamily); auto timeStamp = 0x00ff'ffff'ffff; auto frequency = 123456.0; auto result = static_cast(timeStamp * frequency); EXPECT_EQ(result, helper.getGpuTimeStampInNS(timeStamp, frequency)); } TEST(DwordBuilderTest, WhenSettingNonMaskedBitsThenOnlySelectedBitAreSet) { uint32_t dword = 0; // expect non-masked bit 2 uint32_t expectedDword = (1 << 2); dword = DwordBuilder::build(2, false, true, 0); // set 2nd bit EXPECT_EQ(expectedDword, dword); // expect non-masked bits 2 and 3 expectedDword |= (1 << 3); dword = DwordBuilder::build(3, false, true, dword); // set 3rd bit with init value EXPECT_EQ(expectedDword, dword); } TEST(DwordBuilderTest, WhenSettingMaskedBitsThenOnlySelectedBitAreSet) { uint32_t dword = 0; // expect masked bit 2 uint32_t expectedDword = (1 << 2); expectedDword |= (1 << (2 + 16)); dword = DwordBuilder::build(2, true, true, 0); // set 2nd bit (masked) EXPECT_EQ(expectedDword, dword); // expect masked bits 2 and 3 expectedDword |= (1 << 3); expectedDword |= (1 << (3 + 16)); dword = DwordBuilder::build(3, true, true, dword); // set 3rd bit (masked) with init value EXPECT_EQ(expectedDword, dword); } TEST(DwordBuilderTest, GivenDifferentBitValuesWhenSettingMaskedBitsThenOnlySelectedBitAreSet) { // expect only mask bit uint32_t expectedDword = 1 << (2 + 16); auto dword = DwordBuilder::build(2, true, false, 0); EXPECT_EQ(expectedDword, dword); // expect masked bits 3 expectedDword = (1 << 3); expectedDword |= (1 << (3 + 16)); dword = DwordBuilder::build(3, true, true, 0); EXPECT_EQ(expectedDword, dword); } using LriHelperTests = ::testing::Test; HWTEST_F(LriHelperTests, givenAddressAndOffsetWhenHelperIsUsedThenProgramCmdStream) { using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; std::unique_ptr buffer(new uint8_t[128]); LinearStream stream(buffer.get(), 128); uint32_t address = 0x8888; uint32_t data = 0x1234; auto expectedLri = FamilyType::cmdInitLoadRegisterImm; expectedLri.setRegisterOffset(address); expectedLri.setDataDword(data); LriHelper::program(&stream, address, data, false); auto lri = genCmdCast(stream.getCpuBase()); ASSERT_NE(nullptr, lri); EXPECT_EQ(sizeof(MI_LOAD_REGISTER_IMM), stream.getUsed()); EXPECT_EQ(address, lri->getRegisterOffset()); EXPECT_EQ(data, lri->getDataDword()); } using PipeControlHelperTests = ::testing::Test; HWTEST_F(PipeControlHelperTests, givenPostSyncWriteTimestampModeWhenHelperIsUsedThenProperFieldsAreProgrammed) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; std::unique_ptr buffer(new uint8_t[128]); LinearStream stream(buffer.get(), 128); uint64_t address = 0x1234567887654321; uint64_t immediateData = 0x1234; auto expectedPipeControl = FamilyType::cmdInitPipeControl; expectedPipeControl.setCommandStreamerStallEnable(true); expectedPipeControl.setPostSyncOperation(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP); expectedPipeControl.setAddress(static_cast(address & 0x0000FFFFFFFFULL)); expectedPipeControl.setAddressHigh(static_cast(address >> 32)); HardwareInfo hardwareInfo = *defaultHwInfo; PipeControlArgs args; MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( stream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, address, immediateData, hardwareInfo, args); auto additionalPcSize = MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hardwareInfo) - sizeof(PIPE_CONTROL); auto pipeControlLocationSize = additionalPcSize - MemorySynchronizationCommands::getSizeForSingleAdditionalSynchronization(hardwareInfo); auto pipeControl = genCmdCast(ptrOffset(stream.getCpuBase(), pipeControlLocationSize)); ASSERT_NE(nullptr, pipeControl); EXPECT_EQ(sizeof(PIPE_CONTROL) + additionalPcSize, stream.getUsed()); EXPECT_TRUE(memcmp(pipeControl, &expectedPipeControl, sizeof(PIPE_CONTROL)) == 0); } HWTEST_F(PipeControlHelperTests, givenHwHelperwhenAskingForDcFlushThenReturnTrue) { EXPECT_TRUE(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo)); } HWTEST_F(PipeControlHelperTests, givenDcFlushNotAllowedWhenProgrammingPipeControlThenDontSetDcFlush) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; std::unique_ptr buffer(new uint8_t[128]); LinearStream stream(buffer.get(), 128); PipeControlArgs args; args.dcFlushEnable = true; MemorySynchronizationCommands::addPipeControl(stream, args); auto pipeControl = genCmdCast(stream.getCpuBase()); ASSERT_NE(nullptr, pipeControl); EXPECT_TRUE(pipeControl->getDcFlushEnable()); } HWTEST_F(PipeControlHelperTests, givenPostSyncWriteImmediateDataModeWhenHelperIsUsedThenProperFieldsAreProgrammed) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; std::unique_ptr buffer(new uint8_t[128]); LinearStream stream(buffer.get(), 128); uint64_t address = 0x1234567887654321; uint64_t immediateData = 0x1234; auto expectedPipeControl = FamilyType::cmdInitPipeControl; expectedPipeControl.setCommandStreamerStallEnable(true); expectedPipeControl.setPostSyncOperation(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA); expectedPipeControl.setAddress(static_cast(address & 0x0000FFFFFFFFULL)); expectedPipeControl.setAddressHigh(static_cast(address >> 32)); expectedPipeControl.setImmediateData(immediateData); HardwareInfo hardwareInfo = *defaultHwInfo; PipeControlArgs args; MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( stream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, address, immediateData, hardwareInfo, args); auto additionalPcSize = MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hardwareInfo) - sizeof(PIPE_CONTROL); auto pipeControlLocationSize = additionalPcSize - MemorySynchronizationCommands::getSizeForSingleAdditionalSynchronization(hardwareInfo); auto pipeControl = genCmdCast(ptrOffset(stream.getCpuBase(), pipeControlLocationSize)); ASSERT_NE(nullptr, pipeControl); EXPECT_EQ(sizeof(PIPE_CONTROL) + additionalPcSize, stream.getUsed()); EXPECT_TRUE(memcmp(pipeControl, &expectedPipeControl, sizeof(PIPE_CONTROL)) == 0); } HWTEST_F(PipeControlHelperTests, givenNotifyEnableArgumentIsTrueWhenHelperIsUsedThenNotifyEnableFlagIsTrue) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; std::unique_ptr buffer(new uint8_t[128]); LinearStream stream(buffer.get(), 128); uint64_t address = 0x1234567887654321; uint64_t immediateData = 0x1234; auto expectedPipeControl = FamilyType::cmdInitPipeControl; expectedPipeControl.setCommandStreamerStallEnable(true); expectedPipeControl.setPostSyncOperation(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA); expectedPipeControl.setAddress(static_cast(address & 0x0000FFFFFFFFULL)); expectedPipeControl.setAddressHigh(static_cast(address >> 32)); expectedPipeControl.setImmediateData(immediateData); expectedPipeControl.setNotifyEnable(true); HardwareInfo hardwareInfo = *defaultHwInfo; PipeControlArgs args; args.notifyEnable = true; MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( stream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, address, immediateData, hardwareInfo, args); auto additionalPcSize = MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hardwareInfo) - sizeof(PIPE_CONTROL); auto pipeControlLocationSize = additionalPcSize - MemorySynchronizationCommands::getSizeForSingleAdditionalSynchronization(hardwareInfo); auto pipeControl = genCmdCast(ptrOffset(stream.getCpuBase(), pipeControlLocationSize)); ASSERT_NE(nullptr, pipeControl); EXPECT_EQ(sizeof(PIPE_CONTROL) + additionalPcSize, stream.getUsed()); EXPECT_TRUE(memcmp(pipeControl, &expectedPipeControl, sizeof(PIPE_CONTROL)) == 0); } HWTEST_F(PipeControlHelperTests, WhenIsDcFlushAllowedIsCalledThenCorrectResultIsReturned) { auto &hwInfoConfig = *HwInfoConfig::get(defaultHwInfo->platform.eProductFamily); EXPECT_FALSE(MemorySynchronizationCommands::getDcFlushEnable(false, *defaultHwInfo)); EXPECT_EQ(hwInfoConfig.isDcFlushAllowed(), MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo)); } TEST(HwInfoTest, givenHwInfoWhenChosenEngineTypeQueriedThenDefaultIsReturned) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_RCS; auto engineType = getChosenEngineType(hwInfo); EXPECT_EQ(aub_stream::ENGINE_RCS, engineType); } TEST(HwInfoTest, givenNodeOrdinalSetWhenChosenEngineTypeQueriedThenSetValueIsReturned) { DebugManagerStateRestore dbgRestore; DebugManager.flags.NodeOrdinal.set(aub_stream::ENGINE_VECS); HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_RCS; auto engineType = getChosenEngineType(hwInfo); EXPECT_EQ(aub_stream::ENGINE_VECS, engineType); } HWTEST_F(HwHelperTest, givenCreatedSurfaceStateBufferWhenNoAllocationProvidedThenUseArgumentsasInput) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment(); auto gmmHelper = pDevice->getGmmHelper(); void *stateBuffer = alignedMalloc(sizeof(RENDER_SURFACE_STATE), sizeof(RENDER_SURFACE_STATE)); ASSERT_NE(nullptr, stateBuffer); memset(stateBuffer, 0, sizeof(RENDER_SURFACE_STATE)); auto &helper = HwHelper::get(renderCoreFamily); EXPECT_EQ(sizeof(RENDER_SURFACE_STATE), helper.getRenderSurfaceStateSize()); size_t size = 0x1000; SURFACE_STATE_BUFFER_LENGTH length; length.Length = static_cast(size - 1); uint64_t addr = 0x2000; size_t offset = 0x1000; uint32_t pitch = 0x40; SURFACE_TYPE type = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER; helper.setRenderSurfaceStateForBuffer(rootDeviceEnvironment, stateBuffer, size, addr, offset, pitch, nullptr, false, type, true, false); RENDER_SURFACE_STATE *state = reinterpret_cast(stateBuffer); EXPECT_EQ(length.SurfaceState.Depth + 1u, state->getDepth()); EXPECT_EQ(length.SurfaceState.Width + 1u, state->getWidth()); EXPECT_EQ(length.SurfaceState.Height + 1u, state->getHeight()); EXPECT_EQ(pitch, state->getSurfacePitch()); addr += offset; EXPECT_EQ(addr, state->getSurfaceBaseAddress()); EXPECT_EQ(type, state->getSurfaceType()); EXPECT_EQ(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER), state->getMemoryObjectControlState()); memset(stateBuffer, 0, sizeof(RENDER_SURFACE_STATE)); size = 0x1003; length.Length = static_cast(alignUp(size, 4) - 1); bool isReadOnly = false; helper.setRenderSurfaceStateForBuffer(rootDeviceEnvironment, stateBuffer, size, addr, 0, pitch, nullptr, isReadOnly, type, true, false); EXPECT_EQ(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED), state->getMemoryObjectControlState()); EXPECT_EQ(length.SurfaceState.Depth + 1u, state->getDepth()); EXPECT_EQ(length.SurfaceState.Width + 1u, state->getWidth()); EXPECT_EQ(length.SurfaceState.Height + 1u, state->getHeight()); memset(stateBuffer, 0, sizeof(RENDER_SURFACE_STATE)); size = 0x1000; addr = 0x2001; length.Length = static_cast(size - 1); helper.setRenderSurfaceStateForBuffer(rootDeviceEnvironment, stateBuffer, size, addr, 0, pitch, nullptr, isReadOnly, type, true, false); EXPECT_EQ(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED), state->getMemoryObjectControlState()); EXPECT_EQ(length.SurfaceState.Depth + 1u, state->getDepth()); EXPECT_EQ(length.SurfaceState.Width + 1u, state->getWidth()); EXPECT_EQ(length.SurfaceState.Height + 1u, state->getHeight()); EXPECT_EQ(addr, state->getSurfaceBaseAddress()); memset(stateBuffer, 0, sizeof(RENDER_SURFACE_STATE)); size = 0x1005; length.Length = static_cast(alignUp(size, 4) - 1); isReadOnly = true; helper.setRenderSurfaceStateForBuffer(rootDeviceEnvironment, stateBuffer, size, addr, 0, pitch, nullptr, isReadOnly, type, true, false); EXPECT_EQ(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER), state->getMemoryObjectControlState()); EXPECT_EQ(length.SurfaceState.Depth + 1u, state->getDepth()); EXPECT_EQ(length.SurfaceState.Width + 1u, state->getWidth()); EXPECT_EQ(length.SurfaceState.Height + 1u, state->getHeight()); EXPECT_EQ(addr, state->getSurfaceBaseAddress()); alignedFree(stateBuffer); } HWTEST_F(HwHelperTest, givenCreatedSurfaceStateBufferWhenAllocationProvidedThenUseAllocationAsInput) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment(); void *stateBuffer = alignedMalloc(sizeof(RENDER_SURFACE_STATE), sizeof(RENDER_SURFACE_STATE)); ASSERT_NE(nullptr, stateBuffer); RENDER_SURFACE_STATE *state = reinterpret_cast(stateBuffer); memset(stateBuffer, 0, sizeof(RENDER_SURFACE_STATE)); auto &helper = HwHelper::get(renderCoreFamily); size_t size = 0x1000; SURFACE_STATE_BUFFER_LENGTH length; uint64_t addr = 0x2000; uint32_t pitch = 0; void *cpuAddr = reinterpret_cast(0x4000); uint64_t gpuAddr = 0x4000u; size_t allocSize = size; length.Length = static_cast(allocSize - 1); GraphicsAllocation allocation(0, AllocationType::UNKNOWN, cpuAddr, gpuAddr, 0u, allocSize, MemoryPool::MemoryNull, 0u); allocation.setDefaultGmm(new Gmm(pDevice->getGmmClientContext(), allocation.getUnderlyingBuffer(), allocation.getUnderlyingBufferSize(), 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); SURFACE_TYPE type = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER; helper.setRenderSurfaceStateForBuffer(rootDeviceEnvironment, stateBuffer, size, addr, 0, pitch, &allocation, false, type, true, false); EXPECT_EQ(length.SurfaceState.Depth + 1u, state->getDepth()); EXPECT_EQ(length.SurfaceState.Width + 1u, state->getWidth()); EXPECT_EQ(length.SurfaceState.Height + 1u, state->getHeight()); EXPECT_EQ(pitch, state->getSurfacePitch() - 1u); EXPECT_EQ(gpuAddr, state->getSurfaceBaseAddress()); EXPECT_EQ(UnitTestHelper::getCoherencyTypeSupported(RENDER_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT), state->getCoherencyType()); EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE, state->getAuxiliarySurfaceMode()); delete allocation.getDefaultGmm(); alignedFree(stateBuffer); } HWTEST_F(HwHelperTest, givenCreatedSurfaceStateBufferWhenGmmAndAllocationCompressionEnabledAnNonAuxDisabledThenSetCoherencyToGpuAndAuxModeToCompression) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment(); void *stateBuffer = alignedMalloc(sizeof(RENDER_SURFACE_STATE), sizeof(RENDER_SURFACE_STATE)); ASSERT_NE(nullptr, stateBuffer); RENDER_SURFACE_STATE *state = reinterpret_cast(stateBuffer); memset(stateBuffer, 0, sizeof(RENDER_SURFACE_STATE)); auto &helper = HwHelper::get(renderCoreFamily); size_t size = 0x1000; uint64_t addr = 0x2000; uint32_t pitch = 0; void *cpuAddr = reinterpret_cast(0x4000); uint64_t gpuAddr = 0x4000u; size_t allocSize = size; GraphicsAllocation allocation(0, AllocationType::BUFFER, cpuAddr, gpuAddr, 0u, allocSize, MemoryPool::MemoryNull, 0u); allocation.setDefaultGmm(new Gmm(rootDeviceEnvironment.getGmmClientContext(), allocation.getUnderlyingBuffer(), allocation.getUnderlyingBufferSize(), 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); allocation.getDefaultGmm()->isCompressionEnabled = true; SURFACE_TYPE type = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER; helper.setRenderSurfaceStateForBuffer(rootDeviceEnvironment, stateBuffer, size, addr, 0, pitch, &allocation, false, type, false, false); EXPECT_EQ(RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT, state->getCoherencyType()); EXPECT_TRUE(EncodeSurfaceState::isAuxModeEnabled(state, allocation.getDefaultGmm())); delete allocation.getDefaultGmm(); alignedFree(stateBuffer); } HWTEST_F(HwHelperTest, givenCreatedSurfaceStateBufferWhenGmmCompressionDisabledAndAllocationEnabledAnNonAuxDisabledThenSetCoherencyToIaAndAuxModeToNone) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment(); void *stateBuffer = alignedMalloc(sizeof(RENDER_SURFACE_STATE), sizeof(RENDER_SURFACE_STATE)); ASSERT_NE(nullptr, stateBuffer); RENDER_SURFACE_STATE *state = reinterpret_cast(stateBuffer); memset(stateBuffer, 0, sizeof(RENDER_SURFACE_STATE)); auto &helper = HwHelper::get(renderCoreFamily); size_t size = 0x1000; uint64_t addr = 0x2000; uint32_t pitch = 0; void *cpuAddr = reinterpret_cast(0x4000); uint64_t gpuAddr = 0x4000u; size_t allocSize = size; GraphicsAllocation allocation(0, AllocationType::BUFFER, cpuAddr, gpuAddr, 0u, allocSize, MemoryPool::MemoryNull, 1); allocation.setDefaultGmm(new Gmm(rootDeviceEnvironment.getGmmClientContext(), allocation.getUnderlyingBuffer(), allocation.getUnderlyingBufferSize(), 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); SURFACE_TYPE type = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER; helper.setRenderSurfaceStateForBuffer(rootDeviceEnvironment, stateBuffer, size, addr, 0, pitch, &allocation, false, type, false, false); EXPECT_EQ(UnitTestHelper::getCoherencyTypeSupported(RENDER_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT), state->getCoherencyType()); EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE, state->getAuxiliarySurfaceMode()); delete allocation.getDefaultGmm(); alignedFree(stateBuffer); } HWTEST_F(HwHelperTest, givenOverrideMocsIndexForScratchSpaceWhenSurfaceStateIsProgrammedForScratchSpaceThenOverrideMocsIndexWithCorrectValue) { DebugManagerStateRestore restore; DebugManager.flags.OverrideMocsIndexForScratchSpace.set(1); using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment(); void *stateBuffer = alignedMalloc(sizeof(RENDER_SURFACE_STATE), sizeof(RENDER_SURFACE_STATE)); ASSERT_NE(nullptr, stateBuffer); RENDER_SURFACE_STATE *state = reinterpret_cast(stateBuffer); memset(stateBuffer, 0, sizeof(RENDER_SURFACE_STATE)); auto &helper = HwHelper::get(renderCoreFamily); size_t size = 0x1000; uint64_t addr = 0x2000; uint32_t pitch = 0; void *cpuAddr = reinterpret_cast(0x4000); uint64_t gpuAddr = 0x4000u; size_t allocSize = size; GraphicsAllocation allocation(0, AllocationType::BUFFER, cpuAddr, gpuAddr, 0u, allocSize, MemoryPool::MemoryNull, 1); allocation.setDefaultGmm(new Gmm(rootDeviceEnvironment.getGmmClientContext(), allocation.getUnderlyingBuffer(), allocation.getUnderlyingBufferSize(), 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); SURFACE_TYPE type = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER; helper.setRenderSurfaceStateForBuffer(rootDeviceEnvironment, stateBuffer, size, addr, 0, pitch, &allocation, false, type, false, false); auto mocsProgrammed = state->getMemoryObjectControlState() >> 1; EXPECT_EQ(1u, mocsProgrammed); delete allocation.getDefaultGmm(); alignedFree(stateBuffer); } HWTEST_F(HwHelperTest, givenCreatedSurfaceStateBufferWhenGmmAndAllocationCompressionEnabledAnNonAuxEnabledThenSetCoherencyToIaAndAuxModeToNone) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment(); void *stateBuffer = alignedMalloc(sizeof(RENDER_SURFACE_STATE), sizeof(RENDER_SURFACE_STATE)); ASSERT_NE(nullptr, stateBuffer); RENDER_SURFACE_STATE *state = reinterpret_cast(stateBuffer); memset(stateBuffer, 0, sizeof(RENDER_SURFACE_STATE)); auto &helper = HwHelper::get(renderCoreFamily); size_t size = 0x1000; uint64_t addr = 0x2000; uint32_t pitch = 0; void *cpuAddr = reinterpret_cast(0x4000); uint64_t gpuAddr = 0x4000u; size_t allocSize = size; GraphicsAllocation allocation(0, AllocationType::BUFFER, cpuAddr, gpuAddr, 0u, allocSize, MemoryPool::MemoryNull, 1u); allocation.setDefaultGmm(new Gmm(rootDeviceEnvironment.getGmmClientContext(), allocation.getUnderlyingBuffer(), allocation.getUnderlyingBufferSize(), 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); allocation.getDefaultGmm()->isCompressionEnabled = true; SURFACE_TYPE type = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER; helper.setRenderSurfaceStateForBuffer(rootDeviceEnvironment, stateBuffer, size, addr, 0, pitch, &allocation, false, type, true, false); EXPECT_EQ(UnitTestHelper::getCoherencyTypeSupported(RENDER_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT), state->getCoherencyType()); EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE, state->getAuxiliarySurfaceMode()); delete allocation.getDefaultGmm(); alignedFree(stateBuffer); } HWTEST_F(HwHelperTest, DISABLED_profilingCreationOfRenderSurfaceStateVsMemcpyOfCachelineAlignedBuffer) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; constexpr uint32_t maxLoop = 1000u; std::vector> timesCreate; timesCreate.reserve(maxLoop * 2); std::vector> timesMemCpy; timesMemCpy.reserve(maxLoop * 2); std::vector nanoDurationCreate; nanoDurationCreate.reserve(maxLoop); std::vector nanoDurationCpy; nanoDurationCpy.reserve(maxLoop); std::vector surfaceStates; surfaceStates.reserve(maxLoop); std::vector copyBuffers; copyBuffers.reserve(maxLoop); for (uint32_t i = 0; i < maxLoop; ++i) { void *stateBuffer = alignedMalloc(sizeof(RENDER_SURFACE_STATE), sizeof(RENDER_SURFACE_STATE)); ASSERT_NE(nullptr, stateBuffer); memset(stateBuffer, 0, sizeof(RENDER_SURFACE_STATE)); surfaceStates.push_back(stateBuffer); void *copyBuffer = alignedMalloc(sizeof(RENDER_SURFACE_STATE), sizeof(RENDER_SURFACE_STATE)); ASSERT_NE(nullptr, copyBuffer); copyBuffers.push_back(copyBuffer); } auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment(); auto &helper = HwHelper::get(renderCoreFamily); size_t size = 0x1000; uint64_t addr = 0x2000; uint32_t pitch = 0; SURFACE_TYPE type = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER; for (uint32_t i = 0; i < maxLoop; ++i) { auto t1 = std::chrono::high_resolution_clock::now(); helper.setRenderSurfaceStateForBuffer(rootDeviceEnvironment, surfaceStates[i], size, addr, 0, pitch, nullptr, false, type, true, false); auto t2 = std::chrono::high_resolution_clock::now(); timesCreate.push_back(t1); timesCreate.push_back(t2); } for (uint32_t i = 0; i < maxLoop; ++i) { auto t1 = std::chrono::high_resolution_clock::now(); memcpy_s(copyBuffers[i], sizeof(RENDER_SURFACE_STATE), surfaceStates[i], sizeof(RENDER_SURFACE_STATE)); auto t2 = std::chrono::high_resolution_clock::now(); timesMemCpy.push_back(t1); timesMemCpy.push_back(t2); } for (uint32_t i = 0; i < maxLoop; ++i) { std::chrono::duration delta = timesCreate[i * 2 + 1] - timesCreate[i * 2]; std::chrono::nanoseconds duration = std::chrono::duration_cast(delta); nanoDurationCreate.push_back(duration.count()); delta = timesMemCpy[i * 2 + 1] - timesMemCpy[i * 2]; duration = std::chrono::duration_cast(delta); nanoDurationCpy.push_back(duration.count()); } sort(nanoDurationCreate.begin(), nanoDurationCreate.end()); sort(nanoDurationCpy.begin(), nanoDurationCpy.end()); double averageCreate = std::accumulate(nanoDurationCreate.begin(), nanoDurationCreate.end(), 0.0) / nanoDurationCreate.size(); double averageCpy = std::accumulate(nanoDurationCpy.begin(), nanoDurationCpy.end(), 0.0) / nanoDurationCpy.size(); size_t middleCreate = nanoDurationCreate.size() / 2; size_t middleCpy = nanoDurationCpy.size() / 2; std::cout << "Creation average: " << averageCreate << " median: " << nanoDurationCreate[middleCreate]; std::cout << " min: " << nanoDurationCreate[0] << " max: " << nanoDurationCreate[nanoDurationCreate.size() - 1] << std::endl; std::cout << "Copy average: " << averageCpy << " median: " << nanoDurationCpy[middleCpy]; std::cout << " min: " << nanoDurationCpy[0] << " max: " << nanoDurationCpy[nanoDurationCpy.size() - 1] << std::endl; for (uint32_t i = 0; i < maxLoop; i++) { std::cout << "#" << (i + 1) << " Create: " << nanoDurationCreate[i] << " Copy: " << nanoDurationCpy[i] << std::endl; } for (uint32_t i = 0; i < maxLoop; ++i) { alignedFree(surfaceStates[i]); alignedFree(copyBuffers[i]); } } HWTEST_F(HwHelperTest, WhenTestingIfL3ConfigProgrammableThenCorrectValueIsReturned) { bool PreambleHelperL3Config; bool isL3Programmable; const HardwareInfo &hwInfo = *defaultHwInfo; PreambleHelperL3Config = PreambleHelper::isL3Configurable(*defaultHwInfo); isL3Programmable = HwHelperHw::get().isL3Configurable(hwInfo); EXPECT_EQ(PreambleHelperL3Config, isL3Programmable); } TEST(HwHelperCacheFlushTest, givenEnableCacheFlushFlagIsEnableWhenPlatformDoesNotSupportThenOverrideAndReturnSupportTrue) { DebugManagerStateRestore restore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); HardwareInfo localHwInfo = *defaultHwInfo; localHwInfo.capabilityTable.supportCacheFlushAfterWalker = false; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&localHwInfo)); EXPECT_TRUE(HwHelper::cacheFlushAfterWalkerSupported(device->getHardwareInfo())); } TEST(HwHelperCacheFlushTest, givenEnableCacheFlushFlagIsDisableWhenPlatformSupportsThenOverrideAndReturnSupportFalse) { DebugManagerStateRestore restore; DebugManager.flags.EnableCacheFlushAfterWalker.set(0); HardwareInfo localHwInfo = *defaultHwInfo; localHwInfo.capabilityTable.supportCacheFlushAfterWalker = true; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&localHwInfo)); EXPECT_FALSE(HwHelper::cacheFlushAfterWalkerSupported(device->getHardwareInfo())); } TEST(HwHelperCacheFlushTest, givenEnableCacheFlushFlagIsReadPlatformSettingWhenPlatformDoesNotSupportThenReturnSupportFalse) { DebugManagerStateRestore restore; DebugManager.flags.EnableCacheFlushAfterWalker.set(-1); HardwareInfo localHwInfo = *defaultHwInfo; localHwInfo.capabilityTable.supportCacheFlushAfterWalker = false; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&localHwInfo)); EXPECT_FALSE(HwHelper::cacheFlushAfterWalkerSupported(device->getHardwareInfo())); } TEST(HwHelperCacheFlushTest, givenEnableCacheFlushFlagIsReadPlatformSettingWhenPlatformSupportsThenReturnSupportTrue) { DebugManagerStateRestore restore; DebugManager.flags.EnableCacheFlushAfterWalker.set(-1); HardwareInfo localHwInfo = *defaultHwInfo; localHwInfo.capabilityTable.supportCacheFlushAfterWalker = true; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&localHwInfo)); EXPECT_TRUE(HwHelper::cacheFlushAfterWalkerSupported(device->getHardwareInfo())); } HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, givenHwHelperWhenGettingGlobalTimeStampBitsThenCorrectValueIsReturned) { auto &helper = HwHelper::get(renderCoreFamily); EXPECT_EQ(helper.getGlobalTimeStampBits(), 36U); } TEST_F(HwHelperTest, givenEnableLocalMemoryDebugVarAndOsEnableLocalMemoryWhenSetThenGetEnableLocalMemoryReturnsCorrectValue) { DebugManagerStateRestore dbgRestore; VariableBackup orgOsEnableLocalMemory(&OSInterface::osEnableLocalMemory); auto &helper = HwHelper::get(renderCoreFamily); DebugManager.flags.EnableLocalMemory.set(0); EXPECT_FALSE(helper.getEnableLocalMemory(hardwareInfo)); DebugManager.flags.EnableLocalMemory.set(1); EXPECT_TRUE(helper.getEnableLocalMemory(hardwareInfo)); DebugManager.flags.EnableLocalMemory.set(-1); OSInterface::osEnableLocalMemory = false; EXPECT_FALSE(helper.getEnableLocalMemory(hardwareInfo)); OSInterface::osEnableLocalMemory = true; EXPECT_EQ(helper.isLocalMemoryEnabled(hardwareInfo), helper.getEnableLocalMemory(hardwareInfo)); } TEST_F(HwHelperTest, givenAUBDumpForceAllToLocalMemoryDebugVarWhenSetThenGetEnableLocalMemoryReturnsCorrectValue) { DebugManagerStateRestore dbgRestore; std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(&hardwareInfo)); auto &helper = HwHelper::get(renderCoreFamily); DebugManager.flags.AUBDumpForceAllToLocalMemory.set(true); EXPECT_TRUE(helper.getEnableLocalMemory(hardwareInfo)); } HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, givenVariousCachesRequestThenCorrectMocsIndexesAreReturned) { auto &helper = HwHelper::get(renderCoreFamily); auto gmmHelper = this->pDevice->getGmmHelper(); auto expectedMocsForL3off = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) >> 1; auto expectedMocsForL3on = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1; auto expectedMocsForL3andL1on = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST) >> 1; auto mocsIndex = helper.getMocsIndex(*gmmHelper, false, true); EXPECT_EQ(expectedMocsForL3off, mocsIndex); mocsIndex = helper.getMocsIndex(*gmmHelper, true, false); EXPECT_EQ(expectedMocsForL3on, mocsIndex); mocsIndex = helper.getMocsIndex(*gmmHelper, true, true); if (mocsIndex != expectedMocsForL3andL1on) { EXPECT_EQ(expectedMocsForL3on, mocsIndex); } else { EXPECT_EQ(expectedMocsForL3andL1on, mocsIndex); } } HWTEST_F(HwHelperTest, whenQueryingMaxNumSamplersThenReturnSixteen) { auto &helper = HwHelper::get(renderCoreFamily); EXPECT_EQ(16u, helper.getMaxNumSamplers()); } HWTEST_F(HwHelperTest, givenKernelInfoWhenCheckingRequiresAuxResolvesThenCorrectValuesAreReturned) { auto &clHwHelper = ClHwHelper::get(renderCoreFamily); HardwareInfo hwInfo = *defaultHwInfo; KernelInfo kernelInfo{}; ArgDescriptor argDescriptorValue(ArgDescriptor::ArgType::ArgTValue); kernelInfo.kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptorValue); EXPECT_FALSE(clHwHelper.requiresAuxResolves(kernelInfo, hwInfo)); ArgDescriptor argDescriptorPointer(ArgDescriptor::ArgType::ArgTPointer); argDescriptorPointer.as().accessedUsingStatelessAddressingMode = true; kernelInfo.kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptorPointer); EXPECT_TRUE(clHwHelper.requiresAuxResolves(kernelInfo, hwInfo)); } HWTEST_F(HwHelperTest, givenDebugVariableSetWhenAskingForAuxTranslationModeThenReturnCorrectValue) { DebugManagerStateRestore restore; HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.blitterOperationsSupported = true; EXPECT_EQ(UnitTestHelper::requiredAuxTranslationMode, HwHelperHw::getAuxTranslationMode(hwInfo)); if (HwHelperHw::getAuxTranslationMode(hwInfo) == AuxTranslationMode::Blit) { hwInfo.capabilityTable.blitterOperationsSupported = false; EXPECT_EQ(AuxTranslationMode::Builtin, HwHelperHw::getAuxTranslationMode(hwInfo)); } DebugManager.flags.ForceAuxTranslationMode.set(static_cast(AuxTranslationMode::None)); EXPECT_EQ(AuxTranslationMode::None, HwHelperHw::getAuxTranslationMode(hwInfo)); hwInfo.capabilityTable.blitterOperationsSupported = false; DebugManager.flags.ForceAuxTranslationMode.set(static_cast(AuxTranslationMode::Blit)); EXPECT_EQ(AuxTranslationMode::Builtin, HwHelperHw::getAuxTranslationMode(hwInfo)); hwInfo.capabilityTable.blitterOperationsSupported = true; DebugManager.flags.ForceAuxTranslationMode.set(static_cast(AuxTranslationMode::Blit)); EXPECT_EQ(AuxTranslationMode::Blit, HwHelperHw::getAuxTranslationMode(hwInfo)); DebugManager.flags.ForceAuxTranslationMode.set(static_cast(AuxTranslationMode::Builtin)); EXPECT_EQ(AuxTranslationMode::Builtin, HwHelperHw::getAuxTranslationMode(hwInfo)); } HWTEST_F(HwHelperTest, givenDebugFlagWhenCheckingIfBufferIsSuitableForCompressionThenReturnCorrectValue) { DebugManagerStateRestore restore; auto &helper = HwHelper::get(renderCoreFamily); DebugManager.flags.OverrideBufferSuitableForRenderCompression.set(0); EXPECT_FALSE(helper.isBufferSizeSuitableForCompression(0, *defaultHwInfo)); EXPECT_FALSE(helper.isBufferSizeSuitableForCompression(KB, *defaultHwInfo)); EXPECT_FALSE(helper.isBufferSizeSuitableForCompression(KB + 1, *defaultHwInfo)); DebugManager.flags.OverrideBufferSuitableForRenderCompression.set(1); EXPECT_TRUE(helper.isBufferSizeSuitableForCompression(0, *defaultHwInfo)); EXPECT_TRUE(helper.isBufferSizeSuitableForCompression(KB, *defaultHwInfo)); EXPECT_TRUE(helper.isBufferSizeSuitableForCompression(KB + 1, *defaultHwInfo)); } HWTEST_F(HwHelperTest, givenHwHelperWhenAskingForTilingSupportThenReturnValidValue) { bool tilingSupported = UnitTestHelper::tiledImagesSupported; const uint32_t numImageTypes = 6; const cl_mem_object_type imgTypes[numImageTypes] = {CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE1D_BUFFER, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE2D_ARRAY, CL_MEM_OBJECT_IMAGE3D}; cl_image_desc imgDesc = {}; MockContext context; cl_int retVal = CL_SUCCESS; auto buffer = std::unique_ptr(Buffer::create(&context, 0, 1, nullptr, retVal)); auto &helper = HwHelper::get(renderCoreFamily); for (uint32_t i = 0; i < numImageTypes; i++) { imgDesc.image_type = imgTypes[i]; imgDesc.buffer = nullptr; bool allowedType = imgTypes[i] == (CL_MEM_OBJECT_IMAGE2D) || (imgTypes[i] == CL_MEM_OBJECT_IMAGE3D) || (imgTypes[i] == CL_MEM_OBJECT_IMAGE2D_ARRAY); // non shared context, dont force linear storage EXPECT_EQ((tilingSupported & allowedType), helper.tilingAllowed(false, Image::isImage1d(imgDesc), false)); { DebugManagerStateRestore restore; DebugManager.flags.ForceLinearImages.set(true); // non shared context, dont force linear storage + debug flag EXPECT_FALSE(helper.tilingAllowed(false, Image::isImage1d(imgDesc), false)); } // shared context, dont force linear storage EXPECT_FALSE(helper.tilingAllowed(true, Image::isImage1d(imgDesc), false)); // non shared context, force linear storage EXPECT_FALSE(helper.tilingAllowed(false, Image::isImage1d(imgDesc), true)); // non shared context, dont force linear storage + create from buffer imgDesc.buffer = buffer.get(); EXPECT_FALSE(helper.tilingAllowed(false, Image::isImage1d(imgDesc), false)); } } HWTEST_F(HwHelperTest, WhenIsBankOverrideRequiredIsCalledThenFalseIsReturned) { auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); EXPECT_FALSE(hwHelper.isBankOverrideRequired(hardwareInfo)); } HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, GivenVariousValuesWhenCallingGetBarriersCountFromHasBarrierThenCorrectValueIsReturned) { auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); EXPECT_EQ(0u, hwHelper.getBarriersCountFromHasBarriers(0u)); EXPECT_EQ(1u, hwHelper.getBarriersCountFromHasBarriers(1u)); } HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, GivenVariousValuesWhenCallingCalculateAvailableThreadCountThenCorrectValueIsReturned) { auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); auto result = hwHelper.calculateAvailableThreadCount( hardwareInfo.platform.eProductFamily, 0, hardwareInfo.gtSystemInfo.EUCount, hardwareInfo.gtSystemInfo.ThreadCount / hardwareInfo.gtSystemInfo.EUCount); EXPECT_EQ(hardwareInfo.gtSystemInfo.ThreadCount, result); } HWTEST_F(HwHelperTest, givenDefaultHwHelperHwWhenIsOffsetToSkipSetFFIDGPWARequiredCalledThenFalseIsReturned) { if (hardwareInfo.platform.eRenderCoreFamily == IGFX_GEN12LP_CORE) { GTEST_SKIP(); } auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); EXPECT_FALSE(hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)); } HWTEST_F(HwHelperTest, givenDefaultHwHelperHwWhenIsForceDefaultRCSEngineWARequiredCalledThenFalseIsReturned) { if (hardwareInfo.platform.eRenderCoreFamily == IGFX_GEN12LP_CORE) { GTEST_SKIP(); } EXPECT_FALSE(HwHelperHw::isForceDefaultRCSEngineWARequired(hardwareInfo)); } HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, givenDefaultHwHelperHwWhenIsWorkaroundRequiredCalledThenFalseIsReturned) { if (hardwareInfo.platform.eRenderCoreFamily == IGFX_GEN12LP_CORE) { GTEST_SKIP(); } auto &helper = HwHelper::get(renderCoreFamily); EXPECT_FALSE(helper.isWorkaroundRequired(REVISION_A0, REVISION_B, hardwareInfo)); } HWTEST_F(HwHelperTest, givenDefaultHwHelperHwWhenMinimalSIMDSizeIsQueriedThen8IsReturned) { auto &helper = HwHelper::get(renderCoreFamily); EXPECT_EQ(8u, helper.getMinimalSIMDSize()); } HWTEST_F(HwHelperTest, givenLockableAllocationWhenGettingIsBlitCopyRequiredForLocalMemoryThenCorrectValuesAreReturned) { DebugManagerStateRestore restore{}; auto &helper = HwHelper::get(renderCoreFamily); const auto &hwInfoConfig = *HwInfoConfig::get(productFamily); HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.blitterOperationsSupported = true; MockGraphicsAllocation graphicsAllocation; graphicsAllocation.setAllocationType(AllocationType::BUFFER_HOST_MEMORY); EXPECT_TRUE(GraphicsAllocation::isLockable(graphicsAllocation.getAllocationType())); graphicsAllocation.overrideMemoryPool(MemoryPool::LocalMemory); auto expectedDefaultValue = (hwInfoConfig.getLocalMemoryAccessMode(hwInfo) == LocalMemoryAccessMode::CpuAccessDisallowed); EXPECT_EQ(expectedDefaultValue, helper.isBlitCopyRequiredForLocalMemory(hwInfo, graphicsAllocation)); DebugManager.flags.ForceLocalMemoryAccessMode.set(0); EXPECT_FALSE(helper.isBlitCopyRequiredForLocalMemory(hwInfo, graphicsAllocation)); DebugManager.flags.ForceLocalMemoryAccessMode.set(1); EXPECT_FALSE(helper.isBlitCopyRequiredForLocalMemory(hwInfo, graphicsAllocation)); DebugManager.flags.ForceLocalMemoryAccessMode.set(3); EXPECT_TRUE(helper.isBlitCopyRequiredForLocalMemory(hwInfo, graphicsAllocation)); hwInfo.capabilityTable.blitterOperationsSupported = false; EXPECT_TRUE(helper.isBlitCopyRequiredForLocalMemory(hwInfo, graphicsAllocation)); graphicsAllocation.overrideMemoryPool(MemoryPool::System64KBPages); EXPECT_FALSE(helper.isBlitCopyRequiredForLocalMemory(hwInfo, graphicsAllocation)); hwInfo.capabilityTable.blitterOperationsSupported = true; EXPECT_FALSE(helper.isBlitCopyRequiredForLocalMemory(hwInfo, graphicsAllocation)); } HWTEST_F(HwHelperTest, givenNotLockableAllocationWhenGettingIsBlitCopyRequiredForLocalMemoryThenCorrectValuesAreReturned) { DebugManagerStateRestore restore{}; auto &helper = HwHelper::get(renderCoreFamily); HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.blitterOperationsSupported = true; MockGraphicsAllocation graphicsAllocation; graphicsAllocation.setAllocationType(AllocationType::SVM_GPU); EXPECT_FALSE(GraphicsAllocation::isLockable(graphicsAllocation.getAllocationType())); graphicsAllocation.overrideMemoryPool(MemoryPool::LocalMemory); MockGmm mockGmm(pDevice->getGmmClientContext(), nullptr, 100, 100, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true); mockGmm.resourceParams.Flags.Info.NotLockable = true; graphicsAllocation.setDefaultGmm(&mockGmm); EXPECT_TRUE(helper.isBlitCopyRequiredForLocalMemory(hwInfo, graphicsAllocation)); DebugManager.flags.ForceLocalMemoryAccessMode.set(0); EXPECT_TRUE(helper.isBlitCopyRequiredForLocalMemory(hwInfo, graphicsAllocation)); DebugManager.flags.ForceLocalMemoryAccessMode.set(1); EXPECT_TRUE(helper.isBlitCopyRequiredForLocalMemory(hwInfo, graphicsAllocation)); DebugManager.flags.ForceLocalMemoryAccessMode.set(3); EXPECT_TRUE(helper.isBlitCopyRequiredForLocalMemory(hwInfo, graphicsAllocation)); hwInfo.capabilityTable.blitterOperationsSupported = false; EXPECT_TRUE(helper.isBlitCopyRequiredForLocalMemory(hwInfo, graphicsAllocation)); graphicsAllocation.overrideMemoryPool(MemoryPool::System64KBPages); EXPECT_FALSE(helper.isBlitCopyRequiredForLocalMemory(hwInfo, graphicsAllocation)); hwInfo.capabilityTable.blitterOperationsSupported = true; EXPECT_FALSE(helper.isBlitCopyRequiredForLocalMemory(hwInfo, graphicsAllocation)); } HWTEST2_F(HwHelperTest, givenDefaultHwHelperHwWhenGettingIsBlitCopyRequiredForLocalMemoryThenFalseIsReturned, IsAtMostGen11) { auto &helper = HwHelper::get(renderCoreFamily); MockGraphicsAllocation graphicsAllocation; graphicsAllocation.overrideMemoryPool(MemoryPool::LocalMemory); graphicsAllocation.setAllocationType(AllocationType::BUFFER_HOST_MEMORY); EXPECT_FALSE(helper.isBlitCopyRequiredForLocalMemory(*defaultHwInfo, graphicsAllocation)); } HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, WhenIsFusedEuDispatchEnabledIsCalledThenFalseIsReturned) { if (hardwareInfo.platform.eRenderCoreFamily == IGFX_GEN12LP_CORE) { GTEST_SKIP(); } auto &helper = HwHelper::get(renderCoreFamily); EXPECT_FALSE(helper.isFusedEuDispatchEnabled(hardwareInfo, false)); } HWTEST_F(PipeControlHelperTests, WhenGettingPipeControSizeForCacheFlushThenReturnCorrectValue) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; size_t actualSize = MemorySynchronizationCommands::getSizeForFullCacheFlush(); EXPECT_EQ(sizeof(PIPE_CONTROL), actualSize); } HWTEST_F(PipeControlHelperTests, WhenProgrammingCacheFlushThenExpectBasicFieldsSet) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; std::unique_ptr buffer(new uint8_t[128]); LinearStream stream(buffer.get(), 128); MemorySynchronizationCommands::addFullCacheFlush(stream, *defaultHwInfo); PIPE_CONTROL *pipeControl = genCmdCast(buffer.get()); ASSERT_NE(nullptr, pipeControl); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), pipeControl->getDcFlushEnable()); EXPECT_TRUE(pipeControl->getRenderTargetCacheFlushEnable()); EXPECT_TRUE(pipeControl->getInstructionCacheInvalidateEnable()); EXPECT_TRUE(pipeControl->getTextureCacheInvalidationEnable()); EXPECT_TRUE(pipeControl->getPipeControlFlushEnable()); EXPECT_TRUE(pipeControl->getStateCacheInvalidationEnable()); EXPECT_TRUE(pipeControl->getTlbInvalidate()); } using HwInfoConfigCommonTest = ::testing::Test; HWTEST2_F(HwInfoConfigCommonTest, givenBlitterPreferenceWhenEnablingBlitterOperationsSupportThenHonorThePreference, IsAtLeastGen12lp) { HardwareInfo hardwareInfo = *defaultHwInfo; auto hwInfoConfig = HwInfoConfig::get(hardwareInfo.platform.eProductFamily); hwInfoConfig->configureHardwareCustom(&hardwareInfo, nullptr); const auto expectedBlitterSupport = hwInfoConfig->obtainBlitterPreference(hardwareInfo); EXPECT_EQ(expectedBlitterSupport, hardwareInfo.capabilityTable.blitterOperationsSupported); } HWTEST_F(HwHelperTest, givenHwHelperWhenAskingForIsaSystemMemoryPlacementThenReturnFalseIfLocalMemorySupported) { DebugManagerStateRestore restorer; HwHelper &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); hardwareInfo.featureTable.flags.ftrLocalMemory = true; auto localMemoryEnabled = hwHelper.getEnableLocalMemory(hardwareInfo); EXPECT_NE(localMemoryEnabled, hwHelper.useSystemMemoryPlacementForISA(hardwareInfo)); hardwareInfo.featureTable.flags.ftrLocalMemory = false; localMemoryEnabled = hwHelper.getEnableLocalMemory(hardwareInfo); EXPECT_NE(localMemoryEnabled, hwHelper.useSystemMemoryPlacementForISA(hardwareInfo)); DebugManager.flags.EnableLocalMemory.set(true); hardwareInfo.featureTable.flags.ftrLocalMemory = false; localMemoryEnabled = hwHelper.getEnableLocalMemory(hardwareInfo); EXPECT_NE(localMemoryEnabled, hwHelper.useSystemMemoryPlacementForISA(hardwareInfo)); DebugManager.flags.EnableLocalMemory.set(false); hardwareInfo.featureTable.flags.ftrLocalMemory = true; localMemoryEnabled = hwHelper.getEnableLocalMemory(hardwareInfo); EXPECT_NE(localMemoryEnabled, hwHelper.useSystemMemoryPlacementForISA(hardwareInfo)); } TEST_F(HwHelperTest, givenInvalidEngineTypeWhenGettingEngineGroupTypeThenThrow) { HwHelper &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); EXPECT_ANY_THROW(hwHelper.getEngineGroupType(aub_stream::EngineType::NUM_ENGINES, EngineUsage::Regular, hardwareInfo)); EXPECT_ANY_THROW(hwHelper.getEngineGroupType(aub_stream::EngineType::ENGINE_VECS, EngineUsage::Regular, hardwareInfo)); } HWTEST2_F(HwInfoConfigCommonTest, givenDebugFlagSetWhenEnablingBlitterOperationsSupportThenHonorTheFlag, IsAtLeastGen12lp) { DebugManagerStateRestore restore{}; HardwareInfo hardwareInfo = *defaultHwInfo; auto hwInfoConfig = HwInfoConfig::get(hardwareInfo.platform.eProductFamily); DebugManager.flags.EnableBlitterOperationsSupport.set(1); hwInfoConfig->configureHardwareCustom(&hardwareInfo, nullptr); EXPECT_TRUE(hardwareInfo.capabilityTable.blitterOperationsSupported); DebugManager.flags.EnableBlitterOperationsSupport.set(0); hwInfoConfig->configureHardwareCustom(&hardwareInfo, nullptr); EXPECT_FALSE(hardwareInfo.capabilityTable.blitterOperationsSupported); } HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, GivenVariousValuesWhenAlignSlmSizeIsCalledThenCorrectValueIsReturned) { if (::renderCoreFamily == IGFX_GEN8_CORE) { EXPECT_EQ(0u, HwHelperHw::get().alignSlmSize(0)); EXPECT_EQ(4096u, HwHelperHw::get().alignSlmSize(1)); EXPECT_EQ(4096u, HwHelperHw::get().alignSlmSize(1024)); EXPECT_EQ(4096u, HwHelperHw::get().alignSlmSize(1025)); EXPECT_EQ(4096u, HwHelperHw::get().alignSlmSize(2048)); EXPECT_EQ(4096u, HwHelperHw::get().alignSlmSize(2049)); EXPECT_EQ(4096u, HwHelperHw::get().alignSlmSize(4096)); EXPECT_EQ(8192u, HwHelperHw::get().alignSlmSize(4097)); EXPECT_EQ(8192u, HwHelperHw::get().alignSlmSize(8192)); EXPECT_EQ(16384u, HwHelperHw::get().alignSlmSize(8193)); EXPECT_EQ(16384u, HwHelperHw::get().alignSlmSize(12288)); EXPECT_EQ(16384u, HwHelperHw::get().alignSlmSize(16384)); EXPECT_EQ(32768u, HwHelperHw::get().alignSlmSize(16385)); EXPECT_EQ(32768u, HwHelperHw::get().alignSlmSize(24576)); EXPECT_EQ(32768u, HwHelperHw::get().alignSlmSize(32768)); EXPECT_EQ(65536u, HwHelperHw::get().alignSlmSize(32769)); EXPECT_EQ(65536u, HwHelperHw::get().alignSlmSize(49152)); EXPECT_EQ(65536u, HwHelperHw::get().alignSlmSize(65535)); EXPECT_EQ(65536u, HwHelperHw::get().alignSlmSize(65536)); } else { EXPECT_EQ(0u, HwHelperHw::get().alignSlmSize(0)); EXPECT_EQ(1024u, HwHelperHw::get().alignSlmSize(1)); EXPECT_EQ(1024u, HwHelperHw::get().alignSlmSize(1024)); EXPECT_EQ(2048u, HwHelperHw::get().alignSlmSize(1025)); EXPECT_EQ(2048u, HwHelperHw::get().alignSlmSize(2048)); EXPECT_EQ(4096u, HwHelperHw::get().alignSlmSize(2049)); EXPECT_EQ(4096u, HwHelperHw::get().alignSlmSize(4096)); EXPECT_EQ(8192u, HwHelperHw::get().alignSlmSize(4097)); EXPECT_EQ(8192u, HwHelperHw::get().alignSlmSize(8192)); EXPECT_EQ(16384u, HwHelperHw::get().alignSlmSize(8193)); EXPECT_EQ(16384u, HwHelperHw::get().alignSlmSize(16384)); EXPECT_EQ(32768u, HwHelperHw::get().alignSlmSize(16385)); EXPECT_EQ(32768u, HwHelperHw::get().alignSlmSize(32768)); EXPECT_EQ(65536u, HwHelperHw::get().alignSlmSize(32769)); EXPECT_EQ(65536u, HwHelperHw::get().alignSlmSize(65536)); } } HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, GivenVariousValuesWhenComputeSlmSizeIsCalledThenCorrectValueIsReturned) { auto hwInfo = *defaultHwInfo; if (::renderCoreFamily == IGFX_GEN8_CORE) { EXPECT_EQ(0u, HwHelperHw::get().computeSlmValues(hwInfo, 0)); EXPECT_EQ(1u, HwHelperHw::get().computeSlmValues(hwInfo, 1)); EXPECT_EQ(1u, HwHelperHw::get().computeSlmValues(hwInfo, 1024)); EXPECT_EQ(1u, HwHelperHw::get().computeSlmValues(hwInfo, 1025)); EXPECT_EQ(1u, HwHelperHw::get().computeSlmValues(hwInfo, 2048)); EXPECT_EQ(1u, HwHelperHw::get().computeSlmValues(hwInfo, 2049)); EXPECT_EQ(1u, HwHelperHw::get().computeSlmValues(hwInfo, 4096)); EXPECT_EQ(2u, HwHelperHw::get().computeSlmValues(hwInfo, 4097)); EXPECT_EQ(2u, HwHelperHw::get().computeSlmValues(hwInfo, 8192)); EXPECT_EQ(4u, HwHelperHw::get().computeSlmValues(hwInfo, 8193)); EXPECT_EQ(4u, HwHelperHw::get().computeSlmValues(hwInfo, 12288)); EXPECT_EQ(4u, HwHelperHw::get().computeSlmValues(hwInfo, 16384)); EXPECT_EQ(8u, HwHelperHw::get().computeSlmValues(hwInfo, 16385)); EXPECT_EQ(8u, HwHelperHw::get().computeSlmValues(hwInfo, 24576)); EXPECT_EQ(8u, HwHelperHw::get().computeSlmValues(hwInfo, 32768)); EXPECT_EQ(16u, HwHelperHw::get().computeSlmValues(hwInfo, 32769)); EXPECT_EQ(16u, HwHelperHw::get().computeSlmValues(hwInfo, 49152)); EXPECT_EQ(16u, HwHelperHw::get().computeSlmValues(hwInfo, 65535)); EXPECT_EQ(16u, HwHelperHw::get().computeSlmValues(hwInfo, 65536)); } else { EXPECT_EQ(0u, HwHelperHw::get().computeSlmValues(hwInfo, 0)); EXPECT_EQ(1u, HwHelperHw::get().computeSlmValues(hwInfo, 1)); EXPECT_EQ(1u, HwHelperHw::get().computeSlmValues(hwInfo, 1024)); EXPECT_EQ(2u, HwHelperHw::get().computeSlmValues(hwInfo, 1025)); EXPECT_EQ(2u, HwHelperHw::get().computeSlmValues(hwInfo, 2048)); EXPECT_EQ(3u, HwHelperHw::get().computeSlmValues(hwInfo, 2049)); EXPECT_EQ(3u, HwHelperHw::get().computeSlmValues(hwInfo, 4096)); EXPECT_EQ(4u, HwHelperHw::get().computeSlmValues(hwInfo, 4097)); EXPECT_EQ(4u, HwHelperHw::get().computeSlmValues(hwInfo, 8192)); EXPECT_EQ(5u, HwHelperHw::get().computeSlmValues(hwInfo, 8193)); EXPECT_EQ(5u, HwHelperHw::get().computeSlmValues(hwInfo, 16384)); EXPECT_EQ(6u, HwHelperHw::get().computeSlmValues(hwInfo, 16385)); EXPECT_EQ(6u, HwHelperHw::get().computeSlmValues(hwInfo, 32768)); EXPECT_EQ(7u, HwHelperHw::get().computeSlmValues(hwInfo, 32769)); EXPECT_EQ(7u, HwHelperHw::get().computeSlmValues(hwInfo, 65536)); } } HWTEST_F(HwHelperTest, GivenZeroSlmSizeWhenComputeSlmSizeIsCalledThenCorrectValueIsReturned) { using SHARED_LOCAL_MEMORY_SIZE = typename FamilyType::INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE; auto hwInfo = *defaultHwInfo; auto receivedSlmSize = static_cast(HwHelperHw::get().computeSlmValues(hwInfo, 0)); EXPECT_EQ(SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_0K, receivedSlmSize); } HWTEST2_F(HwHelperTest, givenHwHelperWhenCheckingSipWaThenFalseIsReturned, isTglLpOrBelow) { auto &helper = HwHelper::get(renderCoreFamily); EXPECT_FALSE(helper.isSipWANeeded(*defaultHwInfo)); } HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, givenHwHelperWhenGettingPlanarYuvHeightThenHelperReturnsCorrectValue) { auto &helper = HwHelper::get(renderCoreFamily); EXPECT_EQ(helper.getPlanarYuvMaxHeight(), 16352u); } TEST_F(HwHelperTest, WhenGettingIsCpuImageTransferPreferredThenFalseIsReturned) { REQUIRE_IMAGES_OR_SKIP(defaultHwInfo); auto &hwHelper = HwHelper::get(renderCoreFamily); EXPECT_FALSE(hwHelper.isCpuImageTransferPreferred(*defaultHwInfo)); } TEST_F(HwHelperTest, whenFtrGpGpuMidThreadLevelPreemptFeatureDisabledThenFalseIsReturned) { HwHelper &hwHelper = HwHelper::get(renderCoreFamily); FeatureTable featureTable = {}; featureTable.flags.ftrGpGpuMidThreadLevelPreempt = false; bool result = hwHelper.isAdditionalFeatureFlagRequired(&featureTable); EXPECT_FALSE(result); } HWTEST_F(HwHelperTest, whenGettingDefaultRevisionIdThenCorrectValueIsReturned) { auto &hwHelper = HwHelper::get(renderCoreFamily); auto revisionId = hwHelper.getDefaultRevisionId(*defaultHwInfo); if ((defaultHwInfo->platform.eRenderCoreFamily == IGFX_GEN9_CORE) && (strcmp(defaultHwInfo->capabilityTable.platformType, "core") == 0)) { EXPECT_EQ(9u, revisionId); } else { EXPECT_EQ(0u, revisionId); } } HWTEST_F(HwHelperTest, whenGettingNumberOfCacheRegionsThenReturnZero) { auto &hwHelper = HwHelper::get(renderCoreFamily); EXPECT_EQ(0u, hwHelper.getNumCacheRegions()); } HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, whenCheckingForSmallKernelPreferenceThenFalseIsReturned) { auto &hwHelper = HwHelper::get(renderCoreFamily); EXPECT_FALSE(hwHelper.preferSmallWorkgroupSizeForKernel(0u, this->pDevice->getHardwareInfo())); EXPECT_FALSE(hwHelper.preferSmallWorkgroupSizeForKernel(20000u, this->pDevice->getHardwareInfo())); } TEST_F(HwHelperTest, givenGenHelperWhenKernelArgumentIsNotPureStatefulThenRequireNonAuxMode) { auto &clHwHelper = ClHwHelper::get(renderCoreFamily); for (auto isPureStateful : {false, true}) { ArgDescPointer argAsPtr{}; argAsPtr.accessedUsingStatelessAddressingMode = !isPureStateful; EXPECT_EQ(!argAsPtr.isPureStateful(), clHwHelper.requiresNonAuxMode(argAsPtr, *defaultHwInfo)); } } HWTEST_F(HwHelperTest, whenSetCompressedFlagThenProperFlagSet) { auto &hwHelper = HwHelper::get(renderCoreFamily); auto gmm = std::make_unique(pDevice->getGmmClientContext()); gmm->resourceParams.Flags.Info.RenderCompressed = 0; hwHelper.applyRenderCompressionFlag(*gmm, 1); EXPECT_EQ(1u, gmm->resourceParams.Flags.Info.RenderCompressed); hwHelper.applyRenderCompressionFlag(*gmm, 0); EXPECT_EQ(0u, gmm->resourceParams.Flags.Info.RenderCompressed); } HWTEST_F(HwHelperTest, whenAdjustPreemptionSurfaceSizeIsCalledThenCsrSizeDoesntChange) { auto &hwHelper = HwHelper::get(renderCoreFamily); size_t csrSize = 1024; size_t oldCsrSize = csrSize; hwHelper.adjustPreemptionSurfaceSize(csrSize); EXPECT_EQ(oldCsrSize, csrSize); } HWTEST_F(HwHelperTest, whenSetSipKernelDataIsCalledThenSipKernelDataDoesntChange) { auto &hwHelper = HwHelper::get(renderCoreFamily); uint32_t *sipKernelBinary = nullptr; uint32_t *oldSipKernelBinary = sipKernelBinary; size_t kernelBinarySize = 1024; size_t oldKernelBinarySize = kernelBinarySize; hwHelper.setSipKernelData(sipKernelBinary, kernelBinarySize); EXPECT_EQ(oldKernelBinarySize, kernelBinarySize); EXPECT_EQ(oldSipKernelBinary, sipKernelBinary); } HWTEST_F(HwHelperTest, whenIsSipKernelAsHexadecimalArrayPreferredIsCalledThenReturnFalse) { auto &hwHelper = HwHelper::get(renderCoreFamily); EXPECT_FALSE(hwHelper.isSipKernelAsHexadecimalArrayPreferred()); } using isXeHpCoreOrBelow = IsAtMostProduct; HWTEST2_F(HwHelperTest, givenXeHPAndBelowPlatformWhenCheckingIfUnTypedDataPortCacheFlushRequiredThenReturnFalse, isXeHpCoreOrBelow) { const auto &hwHelper = HwHelper::get(renderCoreFamily); EXPECT_FALSE(hwHelper.unTypedDataPortCacheFlushRequired()); } HWTEST2_F(HwHelperTest, givenXeHPAndBelowPlatformPlatformWhenCheckingIfEngineTypeRemappingIsRequiredThenReturnFalse, isXeHpCoreOrBelow) { const auto &hwHelper = HwHelper::get(renderCoreFamily); EXPECT_FALSE(hwHelper.isEngineTypeRemappingToHwSpecificRequired()); } HWTEST2_F(HwHelperTest, givenAtMostGen12lpPlatformiWhenCheckingIfScratchSpaceSurfaceStateAccessibleThenFalseIsReturned, IsAtMostGen12lp) { const auto &hwHelper = HwHelper::get(renderCoreFamily); EXPECT_FALSE(hwHelper.isScratchSpaceSurfaceStateAccessible()); } HWTEST2_F(HwHelperTest, givenAtLeastXeHpPlatformWhenCheckingIfScratchSpaceSurfaceStateAccessibleTheniTrueIsReturned, IsAtLeastXeHpCore) { const auto &hwHelper = HwHelper::get(renderCoreFamily); EXPECT_TRUE(hwHelper.isScratchSpaceSurfaceStateAccessible()); } HWTEST_F(HwHelperTest, givenGetRenderSurfaceStateBaseAddressCalledThenCorrectValueIsReturned) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE renderSurfaceState; uint64_t expectedBaseAddress = 0x1122334455667788; renderSurfaceState.setSurfaceBaseAddress(expectedBaseAddress); const auto &hwHelper = HwHelper::get(renderCoreFamily); EXPECT_EQ(expectedBaseAddress, hwHelper.getRenderSurfaceStateBaseAddress(&renderSurfaceState)); } HWTEST_F(HwHelperTest, givenGetRenderSurfaceStatePitchCalledThenCorrectValueIsReturned) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE renderSurfaceState; uint32_t expectedPitch = 0x400; renderSurfaceState.setSurfacePitch(expectedPitch); const auto &hwHelper = HwHelper::get(renderCoreFamily); EXPECT_EQ(expectedPitch, hwHelper.getRenderSurfaceStatePitch(&renderSurfaceState)); } HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, givenCLImageFormatsWhenCallingIsFormatRedescribableThenCorrectValueReturned) { static const cl_image_format redescribeFormats[] = { {CL_R, CL_UNSIGNED_INT8}, {CL_R, CL_UNSIGNED_INT16}, {CL_R, CL_UNSIGNED_INT32}, {CL_RG, CL_UNSIGNED_INT32}, {CL_RGBA, CL_UNSIGNED_INT32}, }; MockContext context; auto &clHwHelper = ClHwHelper::get(context.getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily); const ArrayRef formats = SurfaceFormats::readWrite(); for (const auto &format : formats) { const cl_image_format oclFormat = format.OCLImageFormat; bool expectedResult = true; for (const auto &nonRedescribableFormat : redescribeFormats) { expectedResult &= (memcmp(&oclFormat, &nonRedescribableFormat, sizeof(cl_image_format)) != 0); } EXPECT_EQ(expectedResult, clHwHelper.isFormatRedescribable(oclFormat)); } } TEST(HwHelperTests, whenBlitterSupportIsDisabledThenDontExposeAnyBcsEngine) { auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.blitterOperationsSupported = false; hwInfo.featureTable.ftrBcsInfo.set(0); const auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); auto engineUsageTypes = hwHelper.getGpgpuEngineInstances(hwInfo); for (auto &engineUsageType : engineUsageTypes) { EXPECT_FALSE(EngineHelpers::isBcs(engineUsageType.first)); } } using NotXeHPOrDG2 = AreNotGfxCores; HWTEST2_F(HwHelperTest, givenNotAtsOrDg2WhenDisableL3ForDebugCalledThenFalseIsReturned, NotXeHPOrDG2) { const auto &hwHelper = HwHelper::get(renderCoreFamily); EXPECT_FALSE(hwHelper.disableL3CacheForDebug(*defaultHwInfo)); } using ATSOrDG2 = IsWithinGfxCore; HWTEST2_F(HwHelperTest, givenAtsOrDg2WhenDisableL3ForDebugCalledThenTrueIsReturned, ATSOrDG2) { const auto &hwHelper = HwHelper::get(renderCoreFamily); EXPECT_TRUE(hwHelper.disableL3CacheForDebug(*defaultHwInfo)); } HWTEST_F(HwHelperTest, givenHwHelperWhenGettingIfRevisionSpecificBinaryBuiltinIsRequiredThenFalseIsReturned) { auto &hwHelper = NEO::HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily); EXPECT_FALSE(hwHelper.isRevisionSpecificBinaryBuiltinRequired()); } HWTEST2_F(HwHelperTest, givenDG2HwHelperWhenGettingIsPlatformFlushTaskEnabledThenTrueIsReturned, IsDG2) { auto &hwHelper = NEO::HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily); EXPECT_TRUE(hwHelper.isPlatformFlushTaskEnabled(*defaultHwInfo)); } HWTEST2_F(HwHelperTest, givenPvcHwHelperWhenGettingIsPlatformFlushTaskEnabledThenTrueIsReturned, IsPVC) { auto &hwHelper = NEO::HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily); EXPECT_TRUE(hwHelper.isPlatformFlushTaskEnabled(*defaultHwInfo)); } HWTEST2_F(HwHelperTest, givenAtMostGen12lpHwHelperWhenGettingIsPlatformFlushTaskEnabledThenFalseIsReturned, IsAtMostGen12lp) { auto &hwHelper = NEO::HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily); EXPECT_FALSE(hwHelper.isPlatformFlushTaskEnabled(*defaultHwInfo)); } struct CoherentWANotNeeded { template static constexpr bool isMatched() { if (productFamily == IGFX_BROADWELL) return false; return !TestTraits::get()>::forceGpuNonCoherent; } }; HWTEST2_F(HwHelperTest, givenHwInfoConfigWhenCheckingForceNonGpuCoherencyWAThenPassedValueReturned, CoherentWANotNeeded) { const auto &hwHelper = HwHelper::get(renderCoreFamily); EXPECT_TRUE(hwHelper.forceNonGpuCoherencyWA(true)); EXPECT_FALSE(hwHelper.forceNonGpuCoherencyWA(false)); } struct ForceNonCoherentMode { template static constexpr bool isMatched() { if (productFamily == IGFX_BROADWELL) return false; return TestTraits::get()>::forceGpuNonCoherent; } }; HWTEST2_F(HwHelperTest, givenHwInfoConfigWhenCheckingForceNonGpuCoherencyWAThenFalseIsReturned, ForceNonCoherentMode) { const auto &hwHelper = HwHelper::get(renderCoreFamily); EXPECT_FALSE(hwHelper.forceNonGpuCoherencyWA(true)); EXPECT_FALSE(hwHelper.forceNonGpuCoherencyWA(false)); } HWTEST_F(HwHelperTest, GivenHwInfoWhenGetBatchBufferEndSizeCalledThenCorrectSizeReturned) { const auto &hwHelper = HwHelper::get(renderCoreFamily); EXPECT_EQ(hwHelper.getBatchBufferEndSize(), sizeof(typename FamilyType::MI_BATCH_BUFFER_END)); } HWTEST_F(HwHelperTest, GivenHwInfoWhenGetBatchBufferEndReferenceCalledThenCorrectPtrReturned) { const auto &hwHelper = HwHelper::get(renderCoreFamily); EXPECT_EQ(hwHelper.getBatchBufferEndReference(), reinterpret_cast(&FamilyType::cmdInitBatchBufferEnd)); } HWTEST_F(HwHelperTest, givenHwHelperWhenPassingCopyEngineTypeThenItsCopyOnly) { EXPECT_TRUE(EngineHelper::isCopyOnlyEngineType(EngineGroupType::Copy)); } HWTEST_F(HwHelperTest, givenHwHelperWhenPassingLinkedCopyEngineTypeThenItsCopyOnly) { EXPECT_TRUE(EngineHelper::isCopyOnlyEngineType(EngineGroupType::LinkedCopy)); } HWTEST_F(HwHelperTest, givenHwHelperWhenPassingComputeEngineTypeThenItsNotCopyOnly) { EXPECT_FALSE(EngineHelper::isCopyOnlyEngineType(EngineGroupType::Compute)); } compute-runtime-22.14.22890/opencl/test/unit_test/helpers/hw_helper_tests_dg2_and_later.cpp000066400000000000000000000167661422164147700317610ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/pipe_control_args.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/hw_helper_tests.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" using PipeControlHelperTestsDg2AndLater = ::testing::Test; using HwHelperTestsDg2AndLater = Test; HWTEST2_F(PipeControlHelperTestsDg2AndLater, WhenAddingPipeControlWAThenCorrectCommandsAreProgrammed, IsAtLeastXeHpgCore) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; uint8_t buffer[128]; uint64_t address = 0x1234567887654321; HardwareInfo hardwareInfo = *defaultHwInfo; bool requiresMemorySynchronization = (MemorySynchronizationCommands::getSizeForAdditonalSynchronization(hardwareInfo) > 0) ? true : false; for (auto ftrLocalMemory : ::testing::Bool()) { LinearStream stream(buffer, 128); hardwareInfo.featureTable.flags.ftrLocalMemory = ftrLocalMemory; MemorySynchronizationCommands::addPipeControlWA(stream, address, hardwareInfo); if (MemorySynchronizationCommands::isPipeControlWArequired(hardwareInfo) == false) { EXPECT_EQ(0u, stream.getUsed()); continue; } GenCmdList cmdList; FamilyType::PARSE::parseCommandBuffer(cmdList, stream.getCpuBase(), stream.getUsed()); EXPECT_EQ(requiresMemorySynchronization ? 2u : 1u, cmdList.size()); PIPE_CONTROL expectedPipeControl = FamilyType::cmdInitPipeControl; expectedPipeControl.setCommandStreamerStallEnable(true); UnitTestHelper::setPipeControlHdcPipelineFlush(expectedPipeControl, true); expectedPipeControl.setUnTypedDataPortCacheFlush(true); auto it = cmdList.begin(); auto pPipeControl = genCmdCast(*it); ASSERT_NE(nullptr, pPipeControl); EXPECT_TRUE(memcmp(&expectedPipeControl, pPipeControl, sizeof(PIPE_CONTROL)) == 0); if (requiresMemorySynchronization) { if (UnitTestHelper::isAdditionalMiSemaphoreWaitRequired(hardwareInfo)) { MI_SEMAPHORE_WAIT expectedMiSemaphoreWait; EncodeSempahore::programMiSemaphoreWait(&expectedMiSemaphoreWait, address, EncodeSempahore::invalidHardwareTag, MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, false); auto pMiSemaphoreWait = genCmdCast(*(++it)); ASSERT_NE(nullptr, pMiSemaphoreWait); EXPECT_TRUE(memcmp(&expectedMiSemaphoreWait, pMiSemaphoreWait, sizeof(MI_SEMAPHORE_WAIT)) == 0); } } } } HWTEST2_F(PipeControlHelperTestsDg2AndLater, WhenSettingExtraPipeControlPropertiesThenCorrectValuesAreSet, IsAtLeastXeHpgCore) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; for (auto ftrLocalMemory : ::testing::Bool()) { HardwareInfo hardwareInfo = *defaultHwInfo; hardwareInfo.featureTable.flags.ftrLocalMemory = ftrLocalMemory; PipeControlArgs args; MemorySynchronizationCommands::setPostSyncExtraProperties(args, hardwareInfo); if (ftrLocalMemory) { EXPECT_TRUE(args.hdcPipelineFlush); EXPECT_TRUE(args.unTypedDataPortCacheFlush); } else { EXPECT_FALSE(args.hdcPipelineFlush); EXPECT_FALSE(args.unTypedDataPortCacheFlush); } } } HWTEST2_F(PipeControlHelperTestsDg2AndLater, whenSettingCacheFlushExtraFieldsThenExpectHdcAndUnTypedDataPortFlushSet, IsAtLeastXeHpgCore) { PipeControlArgs args; MemorySynchronizationCommands::setCacheFlushExtraProperties(args); EXPECT_TRUE(args.hdcPipelineFlush); EXPECT_TRUE(args.unTypedDataPortCacheFlush); } HWTEST2_F(PipeControlHelperTestsDg2AndLater, givenRequestedCacheFlushesWhenProgrammingPipeControlThenFlushHdcAndUnTypedDataPortCache, IsAtLeastXeHpgCore) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; uint32_t buffer[sizeof(PIPE_CONTROL) * 2] = {}; LinearStream stream(buffer, sizeof(buffer)); PipeControlArgs args; args.hdcPipelineFlush = true; args.unTypedDataPortCacheFlush = true; args.compressionControlSurfaceCcsFlush = true; MemorySynchronizationCommands::addPipeControl(stream, args); auto pipeControl = reinterpret_cast(buffer); EXPECT_TRUE(UnitTestHelper::getPipeControlHdcPipelineFlush(*pipeControl)); EXPECT_TRUE(pipeControl->getUnTypedDataPortCacheFlush()); EXPECT_TRUE(pipeControl->getCompressionControlSurfaceCcsFlush()); } HWTEST2_F(PipeControlHelperTestsDg2AndLater, givenDebugVariableSetWhenProgrammingPipeControlThenFlushHdcAndUnTypedDataPortCache, IsAtLeastXeHpgCore) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; DebugManagerStateRestore restore; DebugManager.flags.FlushAllCaches.set(true); uint32_t buffer[sizeof(PIPE_CONTROL) * 2] = {}; LinearStream stream(buffer, sizeof(buffer)); PipeControlArgs args; MemorySynchronizationCommands::addPipeControl(stream, args); auto pipeControl = reinterpret_cast(buffer); EXPECT_TRUE(UnitTestHelper::getPipeControlHdcPipelineFlush(*pipeControl)); EXPECT_TRUE(pipeControl->getUnTypedDataPortCacheFlush()); EXPECT_TRUE(pipeControl->getCompressionControlSurfaceCcsFlush()); } HWTEST2_F(PipeControlHelperTestsDg2AndLater, givenDebugDisableCacheFlushWhenProgrammingPipeControlWithCacheFlushThenExpectDebugOverrideFlushHdcAndUnTypedDataPortCache, IsAtLeastXeHpgCore) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; DebugManagerStateRestore restore; DebugManager.flags.DoNotFlushCaches.set(true); uint32_t buffer[sizeof(PIPE_CONTROL) * 2] = {}; LinearStream stream(buffer, sizeof(buffer)); PipeControlArgs args; args.hdcPipelineFlush = true; args.unTypedDataPortCacheFlush = true; args.compressionControlSurfaceCcsFlush = true; MemorySynchronizationCommands::addPipeControl(stream, args); auto pipeControl = reinterpret_cast(buffer); EXPECT_FALSE(UnitTestHelper::getPipeControlHdcPipelineFlush(*pipeControl)); EXPECT_FALSE(pipeControl->getUnTypedDataPortCacheFlush()); EXPECT_FALSE(pipeControl->getCompressionControlSurfaceCcsFlush()); } HWTEST2_F(HwHelperTestsDg2AndLater, givenXeHPGAndLaterPlatformWhenCheckingIfUnTypedDataPortCacheFlushRequiredThenReturnTrue, IsAtLeastXeHpgCore) { auto &hwHelper = HwHelper::get(renderCoreFamily); EXPECT_TRUE(hwHelper.unTypedDataPortCacheFlushRequired()); } using HwInfoConfigTestDg2AndLater = ::testing::Test; HWTEST2_F(HwInfoConfigTestDg2AndLater, givenDg2AndLaterPlatformWhenAskedIfHeapInLocalMemThenTrueIsReturned, IsAtLeastXeHpgCore) { const auto &hwInfoConfig = *HwInfoConfig::get(defaultHwInfo->platform.eProductFamily); EXPECT_TRUE(hwInfoConfig.heapInLocalMem(*defaultHwInfo)); } compute-runtime-22.14.22890/opencl/test/unit_test/helpers/hw_helper_tests_dg2_or_below.cpp000066400000000000000000000011551422164147700316220ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_helper.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" using namespace NEO; using HwHelperDg2OrBelowTests = Test; using isDG2OrBelow = IsAtMostProduct; HWTEST2_F(HwHelperDg2OrBelowTests, WhenGettingIsKmdMigrationSupportedThenFalseIsReturned, isDG2OrBelow) { auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); EXPECT_FALSE(hwHelper.isKmdMigrationSupported(hardwareInfo)); } compute-runtime-22.14.22890/opencl/test/unit_test/helpers/hw_helper_tests_pvc_and_later.cpp000066400000000000000000000236631422164147700320670ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/engine_node_helper.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/hw_helper_tests.h" #include "shared/test/common/test_macros/test.h" using HwHelperTestPvcAndLater = HwHelperTest; HWTEST2_F(HwHelperTestPvcAndLater, givenVariousCachesRequestsThenProperMocsIndexesAreBeingReturned, IsAtLeastXeHpcCore) { DebugManagerStateRestore restore; auto &helper = HwHelper::get(renderCoreFamily); auto gmmHelper = this->pDevice->getRootDeviceEnvironment().getGmmHelper(); auto expectedMocsForL3off = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) >> 1; auto expectedMocsForL3on = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1; auto mocsIndex = helper.getMocsIndex(*gmmHelper, false, true); EXPECT_EQ(expectedMocsForL3off, mocsIndex); mocsIndex = helper.getMocsIndex(*gmmHelper, true, false); EXPECT_EQ(expectedMocsForL3on, mocsIndex); mocsIndex = helper.getMocsIndex(*gmmHelper, true, true); EXPECT_EQ(expectedMocsForL3on, mocsIndex); } HWTEST2_F(HwHelperTestPvcAndLater, givenRenderEngineWhenRemapCalledThenUseCccs, IsAtLeastXeHpcCore) { hardwareInfo.featureTable.flags.ftrCCSNode = false; auto &helper = HwHelper::get(renderCoreFamily); helper.adjustDefaultEngineType(&hardwareInfo); EXPECT_EQ(aub_stream::EngineType::ENGINE_CCCS, EngineHelpers::remapEngineTypeToHwSpecific(aub_stream::EngineType::ENGINE_RCS, hardwareInfo)); EXPECT_EQ(aub_stream::EngineType::ENGINE_CCCS, EngineHelpers::remapEngineTypeToHwSpecific(aub_stream::EngineType::ENGINE_CCCS, hardwareInfo)); EXPECT_EQ(aub_stream::EngineType::ENGINE_CCS, EngineHelpers::remapEngineTypeToHwSpecific(aub_stream::EngineType::ENGINE_CCS, hardwareInfo)); EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS, EngineHelpers::remapEngineTypeToHwSpecific(aub_stream::EngineType::ENGINE_BCS, hardwareInfo)); } HWTEST2_F(HwHelperTestPvcAndLater, GivenVariousValuesWhenCallingCalculateAvailableThreadCountThenCorrectValueIsReturned, IsAtLeastXeHpcCore) { struct TestInput { uint32_t grfCount; uint32_t expectedThreadCountPerEu; }; std::vector grfTestInputs = { {64, 16}, {96, 10}, {128, 8}, {160, 6}, {192, 5}, {256, 4}, }; auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); for (auto &testInput : grfTestInputs) { auto expected = testInput.expectedThreadCountPerEu * hardwareInfo.gtSystemInfo.EUCount; auto result = hwHelper.calculateAvailableThreadCount( hardwareInfo.platform.eProductFamily, testInput.grfCount, hardwareInfo.gtSystemInfo.EUCount, hardwareInfo.gtSystemInfo.ThreadCount / hardwareInfo.gtSystemInfo.EUCount); EXPECT_EQ(expected, result); } } HWTEST2_F(HwHelperTestPvcAndLater, GivenVariousValuesWhenCallingGetBarriersCountFromHasBarrierThenCorrectValueIsReturned, IsAtLeastXeHpcCore) { auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); EXPECT_EQ(0u, hwHelper.getBarriersCountFromHasBarriers(0u)); EXPECT_EQ(1u, hwHelper.getBarriersCountFromHasBarriers(1u)); EXPECT_EQ(2u, hwHelper.getBarriersCountFromHasBarriers(2u)); EXPECT_EQ(4u, hwHelper.getBarriersCountFromHasBarriers(3u)); EXPECT_EQ(8u, hwHelper.getBarriersCountFromHasBarriers(4u)); EXPECT_EQ(16u, hwHelper.getBarriersCountFromHasBarriers(5u)); EXPECT_EQ(24u, hwHelper.getBarriersCountFromHasBarriers(6u)); EXPECT_EQ(32u, hwHelper.getBarriersCountFromHasBarriers(7u)); } HWTEST2_F(HwHelperTestPvcAndLater, givenHwHelperWhenCheckIsUpdateTaskCountFromWaitSupportedThenReturnsTrue, IsAtLeastXeHpcCore) { auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); EXPECT_TRUE(hwHelper.isUpdateTaskCountFromWaitSupported()); } HWTEST2_F(HwHelperTestPvcAndLater, givenCooperativeContextSupportedWhenGetEngineInstancesThenReturnCorrectAmountOfCooperativeCcs, IsAtLeastXeHpcCore) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 2; hwInfo.featureTable.flags.ftrCCSNode = true; auto &hwHelper = HwHelperHw::get(); auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily); uint32_t revisions[] = {REVISION_A0, REVISION_B}; for (auto &revision : revisions) { auto hwRevId = hwInfoConfig.getHwRevIdFromStepping(revision, hwInfo); if (hwRevId == CommonConstants::invalidStepping) { continue; } hwInfo.platform.usRevId = hwRevId; auto engineInstances = hwHelper.getGpgpuEngineInstances(hwInfo); size_t ccsCount = 0u; size_t cooperativeCcsCount = 0u; for (auto &engineInstance : engineInstances) { if (EngineHelpers::isCcs(engineInstance.first)) { if (engineInstance.second == EngineUsage::Regular) { ccsCount++; } else if (engineInstance.second == EngineUsage::Cooperative) { cooperativeCcsCount++; } } } EXPECT_EQ(2u, ccsCount); if (hwHelper.isCooperativeEngineSupported(hwInfo)) { EXPECT_EQ(ccsCount, cooperativeCcsCount); } else { EXPECT_EQ(0u, cooperativeCcsCount); } } } HWTEST2_F(HwHelperTestPvcAndLater, givenComputeEngineAndCooperativeUsageWhenGetEngineGroupTypeIsCalledThenCooperativeComputeGroupTypeIsReturned, IsAtLeastXeHpcCore) { auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); auto hwInfo = *::defaultHwInfo; hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 4; aub_stream::EngineType engineTypes[] = {aub_stream::EngineType::ENGINE_CCS, aub_stream::EngineType::ENGINE_CCS1, aub_stream::EngineType::ENGINE_CCS2, aub_stream::EngineType::ENGINE_CCS3}; EngineUsage engineUsages[] = {EngineUsage::Regular, EngineUsage::LowPriority, EngineUsage::Internal, EngineUsage::Cooperative}; for (auto engineType : engineTypes) { for (auto engineUsage : engineUsages) { if (engineUsage == EngineUsage::Cooperative) { EXPECT_EQ(EngineGroupType::CooperativeCompute, hwHelper.getEngineGroupType(engineType, engineUsage, hwInfo)); } else { EXPECT_EQ(EngineGroupType::Compute, hwHelper.getEngineGroupType(engineType, engineUsage, hwInfo)); } } } } HWTEST2_F(HwHelperTestPvcAndLater, givenPVCAndLaterPlatformWhenCheckingIfEngineTypeRemappingIsRequiredThenReturnTrue, IsAtLeastXeHpcCore) { const auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); EXPECT_TRUE(hwHelper.isEngineTypeRemappingToHwSpecificRequired()); } HWTEST2_F(HwHelperTestPvcAndLater, WhenIsRcsAvailableIsCalledThenCorrectValueIsReturned, IsAtLeastXeHpcCore) { auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); auto hwInfo = *::defaultHwInfo; aub_stream::EngineType defaultEngineTypes[] = {aub_stream::EngineType::ENGINE_RCS, aub_stream::EngineType::ENGINE_CCCS, aub_stream::EngineType::ENGINE_BCS, aub_stream::EngineType::ENGINE_BCS2, aub_stream::EngineType::ENGINE_CCS, aub_stream::EngineType::ENGINE_CCS2}; for (auto defaultEngineType : defaultEngineTypes) { hwInfo.capabilityTable.defaultEngineType = defaultEngineType; for (auto ftrRcsNode : ::testing::Bool()) { hwInfo.featureTable.flags.ftrRcsNode = ftrRcsNode; if (ftrRcsNode || (defaultEngineType == aub_stream::EngineType::ENGINE_RCS) || (defaultEngineType == aub_stream::EngineType::ENGINE_CCCS)) { EXPECT_TRUE(hwHelper.isRcsAvailable(hwInfo)); } else { EXPECT_FALSE(hwHelper.isRcsAvailable(hwInfo)); } } } } HWTEST2_F(HwHelperTestPvcAndLater, WhenIsCooperativeDispatchSupportedThenCorrectValueIsReturned, IsAtLeastXeHpcCore) { struct MockHwHelper : NEO::HwHelperHw { bool isCooperativeEngineSupported(const HardwareInfo &hwInfo) const override { return isCooperativeEngineSupportedValue; } bool isRcsAvailable(const HardwareInfo &hwInfo) const override { return isRcsAvailableValue; } bool isCooperativeEngineSupportedValue = true; bool isRcsAvailableValue = true; }; MockHwHelper hwHelper{}; auto hwInfo = *::defaultHwInfo; for (auto isCooperativeEngineSupported : ::testing::Bool()) { hwHelper.isCooperativeEngineSupportedValue = isCooperativeEngineSupported; for (auto isRcsAvailable : ::testing::Bool()) { hwHelper.isRcsAvailableValue = isRcsAvailable; for (auto engineGroupType : {EngineGroupType::RenderCompute, EngineGroupType::Compute, EngineGroupType::CooperativeCompute}) { auto isCooperativeDispatchSupported = hwHelper.isCooperativeDispatchSupported(engineGroupType, hwInfo); if (isCooperativeEngineSupported) { switch (engineGroupType) { case EngineGroupType::RenderCompute: EXPECT_FALSE(isCooperativeDispatchSupported); break; case EngineGroupType::Compute: EXPECT_EQ(!isRcsAvailable, isCooperativeDispatchSupported); break; default: // EngineGroupType::CooperativeCompute EXPECT_TRUE(isCooperativeDispatchSupported); } } else { EXPECT_TRUE(isCooperativeDispatchSupported); } } } } } compute-runtime-22.14.22890/opencl/test/unit_test/helpers/hw_helper_tests_xehp_and_later.cpp000066400000000000000000000542771422164147700322500ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/command_encoder.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/engine_node_helper.h" #include "shared/source/helpers/pipe_control_args.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/hw_helper_tests.h" #include "shared/test/common/helpers/ult_hw_helper.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "engine_node.h" using HwHelperTestXeHPAndLater = Test; HWCMDTEST_F(IGFX_XE_HP_CORE, HwHelperTestXeHPAndLater, WhenGettingMaxBarriersPerSliceThen32IsReturned) { auto &helper = HwHelper::get(renderCoreFamily); EXPECT_EQ(32u, helper.getMaxBarrierRegisterPerSlice()); } HWCMDTEST_F(IGFX_XE_HP_CORE, HwHelperTestXeHPAndLater, givenHwHelperWhenGetGpuTimeStampInNSIsCalledThenOnlyLow32BitsFromTimeStampAreUsedAndCorrectValueIsReturned) { auto &helper = HwHelper::get(renderCoreFamily); auto timeStamp = 0x00ff'ffff'ffff; auto frequency = 123456.0; auto result = static_cast((timeStamp & 0xffff'ffff) * frequency); EXPECT_EQ(result, helper.getGpuTimeStampInNS(timeStamp, frequency)); } HWCMDTEST_F(IGFX_XE_HP_CORE, HwHelperTestXeHPAndLater, GivenNoCcsNodeThenDefaultEngineTypeIsRcs) { hardwareInfo.featureTable.flags.ftrCCSNode = false; auto &helper = HwHelper::get(renderCoreFamily); helper.adjustDefaultEngineType(&hardwareInfo); auto expectedEngine = EngineHelpers::remapEngineTypeToHwSpecific(aub_stream::EngineType::ENGINE_RCS, hardwareInfo); EXPECT_EQ(expectedEngine, hardwareInfo.capabilityTable.defaultEngineType); } HWCMDTEST_F(IGFX_XE_HP_CORE, HwHelperTestXeHPAndLater, GiveCcsNodeThenDefaultEngineTypeIsCcs) { hardwareInfo.featureTable.flags.ftrCCSNode = true; auto &helper = HwHelper::get(renderCoreFamily); helper.adjustDefaultEngineType(&hardwareInfo); EXPECT_EQ(aub_stream::ENGINE_CCS, hardwareInfo.capabilityTable.defaultEngineType); } HWCMDTEST_F(IGFX_XE_HP_CORE, HwHelperTestXeHPAndLater, givenXeHPAndLaterPlatformWithLocalMemoryFeatureWhenIsLocalMemoryEnabledIsCalledThenTrueIsReturned) { hardwareInfo.featureTable.flags.ftrLocalMemory = true; auto &helper = reinterpret_cast &>(HwHelperHw::get()); EXPECT_TRUE(helper.isLocalMemoryEnabled(hardwareInfo)); } HWCMDTEST_F(IGFX_XE_HP_CORE, HwHelperTestXeHPAndLater, givenXeHPAndLaterPlatformWithoutLocalMemoryFeatureWhenIsLocalMemoryEnabledIsCalledThenFalseIsReturned) { hardwareInfo.featureTable.flags.ftrLocalMemory = false; auto &helper = reinterpret_cast &>(HwHelperHw::get()); EXPECT_FALSE(helper.isLocalMemoryEnabled(hardwareInfo)); } HWCMDTEST_F(IGFX_XE_HP_CORE, HwHelperTestXeHPAndLater, givenXeHPAndLaterPlatformWhenCheckingIfHvAlign4IsRequiredThenReturnFalse) { auto &helper = HwHelperHw::get(); EXPECT_FALSE(helper.hvAlign4Required()); } HWCMDTEST_F(IGFX_XE_HP_CORE, HwHelperTestXeHPAndLater, givenXeHPAndLaterPlatformWhenCheckTimestampPacketWriteThenReturnTrue) { auto &hwHelper = HwHelperHw::get(); EXPECT_TRUE(hwHelper.timestampPacketWriteSupported()); } HWCMDTEST_F(IGFX_XE_HP_CORE, HwHelperTestXeHPAndLater, givenXeHPAndLaterPlatformWhenCheckAssignEngineRoundRobinSupportedThenReturnTrue) { auto &hwHelper = HwHelperHw::get(); EXPECT_TRUE(hwHelper.isAssignEngineRoundRobinSupported()); } HWCMDTEST_F(IGFX_XE_HP_CORE, HwHelperTestXeHPAndLater, givenAllFlagsSetWhenGetGpgpuEnginesThenReturnThreeRcsEnginesFourCcsEnginesAndOneBcsEngine) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.featureTable.flags.ftrCCSNode = true; hwInfo.featureTable.ftrBcsInfo = 1; hwInfo.featureTable.flags.ftrRcsNode = true; hwInfo.capabilityTable.blitterOperationsSupported = true; hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_CCS; hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 4; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); EXPECT_EQ(9u, device->allEngines.size()); auto &engines = HwHelperHw::get().getGpgpuEngineInstances(hwInfo); EXPECT_EQ(9u, engines.size()); EXPECT_EQ(aub_stream::ENGINE_CCS, engines[0].first); EXPECT_EQ(aub_stream::ENGINE_CCS1, engines[1].first); EXPECT_EQ(aub_stream::ENGINE_CCS2, engines[2].first); EXPECT_EQ(aub_stream::ENGINE_CCS3, engines[3].first); EXPECT_EQ(aub_stream::ENGINE_RCS, engines[4].first); EXPECT_EQ(hwInfo.capabilityTable.defaultEngineType, engines[5].first); // low priority EXPECT_EQ(EngineUsage::LowPriority, engines[5].second); EXPECT_EQ(hwInfo.capabilityTable.defaultEngineType, engines[6].first); // internal EXPECT_EQ(EngineUsage::Internal, engines[6].second); EXPECT_EQ(aub_stream::ENGINE_BCS, engines[7].first); EXPECT_EQ(aub_stream::ENGINE_BCS, engines[8].first); } HWCMDTEST_F(IGFX_XE_HP_CORE, HwHelperTestXeHPAndLater, givenBcsDisabledWhenGetGpgpuEnginesThenReturnThreeRcsEnginesFourCcsEngines) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.featureTable.flags.ftrCCSNode = true; hwInfo.featureTable.ftrBcsInfo = 0; hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_CCS; hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 4; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); EXPECT_EQ(7u, device->allEngines.size()); auto &engines = HwHelperHw::get().getGpgpuEngineInstances(hwInfo); EXPECT_EQ(7u, engines.size()); EXPECT_EQ(aub_stream::ENGINE_CCS, engines[0].first); EXPECT_EQ(aub_stream::ENGINE_CCS1, engines[1].first); EXPECT_EQ(aub_stream::ENGINE_CCS2, engines[2].first); EXPECT_EQ(aub_stream::ENGINE_CCS3, engines[3].first); EXPECT_EQ(aub_stream::ENGINE_RCS, engines[4].first); EXPECT_EQ(hwInfo.capabilityTable.defaultEngineType, engines[5].first); // low priority EXPECT_EQ(aub_stream::ENGINE_CCS, engines[6].first); } HWCMDTEST_F(IGFX_XE_HP_CORE, HwHelperTestXeHPAndLater, givenCcsDisabledWhenGetGpgpuEnginesThenReturnRcsAndOneBcsEngine) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.featureTable.flags.ftrCCSNode = false; hwInfo.featureTable.ftrBcsInfo = 1; hwInfo.capabilityTable.blitterOperationsSupported = true; hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_RCS; hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 0; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); EXPECT_EQ(5u, device->allEngines.size()); auto &engines = HwHelperHw::get().getGpgpuEngineInstances(hwInfo); EXPECT_EQ(5u, engines.size()); EXPECT_EQ(aub_stream::ENGINE_RCS, engines[0].first); EXPECT_EQ(aub_stream::ENGINE_RCS, engines[1].first); EXPECT_EQ(aub_stream::ENGINE_RCS, engines[2].first); EXPECT_EQ(aub_stream::ENGINE_BCS, engines[3].first); EXPECT_EQ(aub_stream::ENGINE_BCS, engines[4].first); } HWCMDTEST_F(IGFX_XE_HP_CORE, HwHelperTestXeHPAndLater, givenCcsDisabledAndNumberOfCcsEnabledWhenGetGpgpuEnginesThenReturnRcsAndOneBcsEngine) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.featureTable.flags.ftrCCSNode = false; hwInfo.featureTable.ftrBcsInfo = 1; hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_RCS; hwInfo.capabilityTable.blitterOperationsSupported = true; hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 4; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); EXPECT_EQ(5u, device->allEngines.size()); auto &engines = HwHelperHw::get().getGpgpuEngineInstances(hwInfo); EXPECT_EQ(5u, engines.size()); EXPECT_EQ(aub_stream::ENGINE_RCS, engines[0].first); EXPECT_EQ(aub_stream::ENGINE_RCS, engines[1].first); EXPECT_EQ(aub_stream::ENGINE_RCS, engines[2].first); EXPECT_EQ(aub_stream::ENGINE_BCS, engines[3].first); EXPECT_EQ(aub_stream::ENGINE_BCS, engines[4].first); } HWCMDTEST_F(IGFX_XE_HP_CORE, HwHelperTestXeHPAndLater, givenVariousCachesRequestProperMOCSIndexesAreBeingReturned) { DebugManagerStateRestore restore; auto &helper = HwHelper::get(renderCoreFamily); auto gmmHelper = this->pDevice->getRootDeviceEnvironment().getGmmHelper(); auto expectedMocsForL3off = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) >> 1; auto expectedMocsForL3on = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1; auto expectedMocsForL3andL1on = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST) >> 1; auto mocsIndex = helper.getMocsIndex(*gmmHelper, false, true); EXPECT_EQ(expectedMocsForL3off, mocsIndex); mocsIndex = helper.getMocsIndex(*gmmHelper, true, false); EXPECT_EQ(expectedMocsForL3andL1on, mocsIndex); mocsIndex = helper.getMocsIndex(*gmmHelper, true, true); EXPECT_EQ(expectedMocsForL3andL1on, mocsIndex); DebugManager.flags.ForceL1Caching.set(0u); mocsIndex = helper.getMocsIndex(*gmmHelper, true, false); EXPECT_EQ(expectedMocsForL3on, mocsIndex); mocsIndex = helper.getMocsIndex(*gmmHelper, true, true); expectedMocsForL3andL1on = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST) >> 1; EXPECT_EQ(expectedMocsForL3andL1on, mocsIndex); } HWCMDTEST_F(IGFX_XE_HP_CORE, HwHelperTestXeHPAndLater, givenStoreRegMemCommandWhenAdjustingThenSetRemapEnabled) { typename FamilyType::MI_STORE_REGISTER_MEM_CMD storeRegMem = {}; storeRegMem.setMmioRemapEnable(false); GpgpuWalkerHelper::adjustMiStoreRegMemMode(&storeRegMem); EXPECT_TRUE(storeRegMem.getMmioRemapEnable()); } using PipeControlHelperTestsXeHPAndLater = ::testing::Test; HWCMDTEST_F(IGFX_XE_HP_CORE, PipeControlHelperTestsXeHPAndLater, WhenAddingPipeControlWAThenCorrectCommandsAreProgrammed) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; uint8_t buffer[128]; uint64_t address = 0x1234567887654321; HardwareInfo hardwareInfo = *defaultHwInfo; bool requiresMemorySynchronization = (MemorySynchronizationCommands::getSizeForAdditonalSynchronization(hardwareInfo) > 0) ? true : false; for (auto ftrLocalMemory : ::testing::Bool()) { LinearStream stream(buffer, 128); hardwareInfo.featureTable.flags.ftrLocalMemory = ftrLocalMemory; MemorySynchronizationCommands::addPipeControlWA(stream, address, hardwareInfo); if (MemorySynchronizationCommands::isPipeControlWArequired(hardwareInfo) == false) { EXPECT_EQ(0u, stream.getUsed()); continue; } GenCmdList cmdList; FamilyType::PARSE::parseCommandBuffer(cmdList, stream.getCpuBase(), stream.getUsed()); EXPECT_EQ(requiresMemorySynchronization ? 2u : 1u, cmdList.size()); PIPE_CONTROL expectedPipeControl = FamilyType::cmdInitPipeControl; expectedPipeControl.setCommandStreamerStallEnable(true); UnitTestHelper::setPipeControlHdcPipelineFlush(expectedPipeControl, true); auto it = cmdList.begin(); auto pPipeControl = genCmdCast(*it); ASSERT_NE(nullptr, pPipeControl); EXPECT_TRUE(memcmp(&expectedPipeControl, pPipeControl, sizeof(PIPE_CONTROL)) == 0); if (requiresMemorySynchronization) { if (UnitTestHelper::isAdditionalMiSemaphoreWaitRequired(hardwareInfo)) { MI_SEMAPHORE_WAIT expectedMiSemaphoreWait; EncodeSempahore::programMiSemaphoreWait(&expectedMiSemaphoreWait, address, EncodeSempahore::invalidHardwareTag, MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, false); auto pMiSemaphoreWait = genCmdCast(*(++it)); ASSERT_NE(nullptr, pMiSemaphoreWait); EXPECT_TRUE(memcmp(&expectedMiSemaphoreWait, pMiSemaphoreWait, sizeof(MI_SEMAPHORE_WAIT)) == 0); } } } } HWCMDTEST_F(IGFX_XE_HP_CORE, PipeControlHelperTestsXeHPAndLater, WhenGettingSizeForAdditionalSynchronizationThenCorrectValueIsReturned) { HardwareInfo hardwareInfo = *defaultHwInfo; EXPECT_EQ(0u, UltMemorySynchronizationCommands::getSizeForAdditonalSynchronization(hardwareInfo)); } HWCMDTEST_F(IGFX_XE_HP_CORE, PipeControlHelperTestsXeHPAndLater, WhenSettingExtraPipeControlPropertiesThenCorrectValuesAreSet) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; for (auto ftrLocalMemory : ::testing::Bool()) { HardwareInfo hardwareInfo = *defaultHwInfo; hardwareInfo.featureTable.flags.ftrLocalMemory = ftrLocalMemory; PipeControlArgs args; MemorySynchronizationCommands::setPostSyncExtraProperties(args, hardwareInfo); if (ftrLocalMemory) { EXPECT_TRUE(args.hdcPipelineFlush); } else { EXPECT_FALSE(args.hdcPipelineFlush); } } } HWCMDTEST_F(IGFX_XE_HP_CORE, PipeControlHelperTestsXeHPAndLater, whenSettingCacheFlushExtraFieldsThenExpectHdcFlushSet) { PipeControlArgs args; MemorySynchronizationCommands::setCacheFlushExtraProperties(args); EXPECT_TRUE(args.hdcPipelineFlush); } HWCMDTEST_F(IGFX_XE_HP_CORE, PipeControlHelperTestsXeHPAndLater, givenRequestedCacheFlushesWhenProgrammingPipeControlThenFlushHdc) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; uint32_t buffer[sizeof(PIPE_CONTROL) * 2] = {}; LinearStream stream(buffer, sizeof(buffer)); PipeControlArgs args; args.hdcPipelineFlush = true; args.compressionControlSurfaceCcsFlush = true; MemorySynchronizationCommands::addPipeControl(stream, args); auto pipeControl = reinterpret_cast(buffer); EXPECT_TRUE(UnitTestHelper::getPipeControlHdcPipelineFlush(*pipeControl)); EXPECT_TRUE(pipeControl->getCompressionControlSurfaceCcsFlush()); } HWCMDTEST_F(IGFX_XE_HP_CORE, PipeControlHelperTestsXeHPAndLater, givenDebugVariableSetWhenProgrammingPipeControlThenFlushHdc) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; DebugManagerStateRestore restore; DebugManager.flags.FlushAllCaches.set(true); uint32_t buffer[sizeof(PIPE_CONTROL) * 2] = {}; LinearStream stream(buffer, sizeof(buffer)); PipeControlArgs args; MemorySynchronizationCommands::addPipeControl(stream, args); auto pipeControl = reinterpret_cast(buffer); EXPECT_TRUE(UnitTestHelper::getPipeControlHdcPipelineFlush(*pipeControl)); EXPECT_TRUE(pipeControl->getCompressionControlSurfaceCcsFlush()); } HWCMDTEST_F(IGFX_XE_HP_CORE, PipeControlHelperTestsXeHPAndLater, givenDebugDisableCacheFlushWhenProgrammingPipeControlWithCacheFlushThenExpectDebugOverrideFlushHdc) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; DebugManagerStateRestore restore; DebugManager.flags.DoNotFlushCaches.set(true); uint32_t buffer[sizeof(PIPE_CONTROL) * 2] = {}; LinearStream stream(buffer, sizeof(buffer)); PipeControlArgs args; args.hdcPipelineFlush = true; args.compressionControlSurfaceCcsFlush = true; MemorySynchronizationCommands::addPipeControl(stream, args); auto pipeControl = reinterpret_cast(buffer); EXPECT_FALSE(UnitTestHelper::getPipeControlHdcPipelineFlush(*pipeControl)); EXPECT_FALSE(pipeControl->getCompressionControlSurfaceCcsFlush()); } HWCMDTEST_F(IGFX_XE_HP_CORE, HwHelperTestXeHPAndLater, givenHwHelperXeCoreWhenGettingGlobalTimeStampBitsThen32IsReturned) { auto &helper = HwHelper::get(renderCoreFamily); EXPECT_EQ(helper.getGlobalTimeStampBits(), 32U); } HWCMDTEST_F(IGFX_XE_HP_CORE, HwHelperTestXeHPAndLater, givenHwHelperWhenGettingPlanarYuvHeightThenHelperReturnsCorrectValue) { auto &helper = HwHelper::get(renderCoreFamily); EXPECT_EQ(helper.getPlanarYuvMaxHeight(), 16128u); } HWCMDTEST_F(IGFX_XE_HP_CORE, HwHelperTestXeHPAndLater, WhenIsPipeControlWArequiredIsCalledThenCorrectValueIsReturned) { auto hwInfo = pDevice->getHardwareInfo(); for (auto ftrLocalMemory : ::testing::Bool()) { hwInfo.featureTable.flags.ftrLocalMemory = ftrLocalMemory; EXPECT_EQ(UnitTestHelper::isPipeControlWArequired(hwInfo), MemorySynchronizationCommands::isPipeControlWArequired(hwInfo)); } } HWCMDTEST_F(IGFX_XE_HP_CORE, HwHelperTestXeHPAndLater, whenGettingPreferenceForSmallKernelsThenCertainThresholdIsTested) { DebugManagerStateRestore restorer; auto &hwInfo = pDevice->getHardwareInfo(); auto &helper = HwHelper::get(renderCoreFamily); if (HwInfoConfig::get(hwInfo.platform.eProductFamily)->getSteppingFromHwRevId(hwInfo) >= REVISION_B) { EXPECT_FALSE(helper.preferSmallWorkgroupSizeForKernel(512u, hwInfo)); EXPECT_FALSE(helper.preferSmallWorkgroupSizeForKernel(10000u, hwInfo)); EXPECT_FALSE(helper.preferSmallWorkgroupSizeForKernel(2047u, hwInfo)); EXPECT_FALSE(helper.preferSmallWorkgroupSizeForKernel(2048u, hwInfo)); } else { EXPECT_TRUE(helper.preferSmallWorkgroupSizeForKernel(512u, hwInfo)); EXPECT_FALSE(helper.preferSmallWorkgroupSizeForKernel(10000u, hwInfo)); EXPECT_TRUE(helper.preferSmallWorkgroupSizeForKernel(2047u, hwInfo)); EXPECT_FALSE(helper.preferSmallWorkgroupSizeForKernel(2048u, hwInfo)); DebugManager.flags.OverrideKernelSizeLimitForSmallDispatch.set(1u); EXPECT_FALSE(helper.preferSmallWorkgroupSizeForKernel(1u, hwInfo)); EXPECT_TRUE(helper.preferSmallWorkgroupSizeForKernel(0u, hwInfo)); DebugManager.flags.OverrideKernelSizeLimitForSmallDispatch.set(0u); EXPECT_FALSE(helper.preferSmallWorkgroupSizeForKernel(1u, hwInfo)); EXPECT_FALSE(helper.preferSmallWorkgroupSizeForKernel(0u, hwInfo)); } } HWCMDTEST_F(IGFX_XE_HP_CORE, HwHelperTestXeHPAndLater, givenHwHelperWhenGettingBindlessSurfaceExtendedMessageDescriptorValueThenCorrectValueIsReturned) { auto &hwHelper = HwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily); auto value = hwHelper.getBindlessSurfaceExtendedMessageDescriptorValue(0x200); typename FamilyType::DataPortBindlessSurfaceExtendedMessageDescriptor messageExtDescriptor = {}; messageExtDescriptor.setBindlessSurfaceOffset(0x200); EXPECT_EQ(messageExtDescriptor.getBindlessSurfaceOffsetToPatch(), value); EXPECT_EQ(0x200u, value); } HWCMDTEST_F(IGFX_XE_HP_CORE, PipeControlHelperTestsXeHPAndLater, givenPostSyncPipeControlWhenSettingWorkloadPartitionFlagThenExpectPipeControlFlagSet) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using POST_SYNC_OPERATION = typename FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION; uint8_t buffer[128] = {}; LinearStream stream(buffer, sizeof(buffer)); HardwareInfo hardwareInfo = *defaultHwInfo; uint64_t gpuAddress = 0xBADA550; uint64_t data = 0xABCDEF; PipeControlArgs args; args.workloadPartitionOffset = true; MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( stream, POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, gpuAddress, data, hardwareInfo, args); GenCmdList cmdList; FamilyType::PARSE::parseCommandBuffer(cmdList, stream.getCpuBase(), stream.getUsed()); auto pipeControls = findAll(cmdList.begin(), cmdList.end()); bool foundPostSyncPipeControl = false; for (size_t i = 0; i < pipeControls.size(); i++) { auto pipeControl = reinterpret_cast(*pipeControls[i]); if (pipeControl->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { EXPECT_EQ(gpuAddress, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*pipeControl)); EXPECT_EQ(data, pipeControl->getImmediateData()); EXPECT_TRUE(pipeControl->getWorkloadPartitionIdOffsetEnable()); foundPostSyncPipeControl = true; break; } } EXPECT_TRUE(foundPostSyncPipeControl); } using HwInfoConfigTestXeHpAndLater = ::testing::Test; HWCMDTEST_F(IGFX_XE_HP_CORE, HwInfoConfigTestXeHpAndLater, givenXeHPAndLaterPlatformWhenCheckNewResidencyModelSupportedThenReturnTrue) { auto hwInfoConfig = HwInfoConfig::get(productFamily); EXPECT_TRUE(hwInfoConfig->isNewResidencyModelSupported()); } HWCMDTEST_F(IGFX_XE_HP_CORE, HwInfoConfigTestXeHpAndLater, whenCapabilityCoherencyFlagSetTrueThenOverrideToFalse) { auto &hwInfoConfig = *HwInfoConfig::get(productFamily); bool coherency = true; hwInfoConfig.setCapabilityCoherencyFlag(*defaultHwInfo, coherency); EXPECT_FALSE(coherency); } HWCMDTEST_F(IGFX_XE_HP_CORE, HwInfoConfigTestXeHpAndLater, givenXeHPAndLaterPlatformWhenAskedIfTile64With3DSurfaceOnBCSIsSupportedThenFalseIsReturned) { const auto &hwInfoConfig = *HwInfoConfig::get(productFamily); const auto &hwInfo = *defaultHwInfo; EXPECT_FALSE(hwInfoConfig.isTile64With3DSurfaceOnBCSSupported(hwInfo)); } HWCMDTEST_F(IGFX_XE_HP_CORE, HwInfoConfigTestXeHpAndLater, givenCLImageFormatsWhenCallingIsFormatRedescribableThenFalseIsReturned) { static const cl_image_format redescribeFormats[] = { {CL_R, CL_UNSIGNED_INT8}, {CL_R, CL_UNSIGNED_INT16}, {CL_R, CL_UNSIGNED_INT32}, {CL_RG, CL_UNSIGNED_INT32}, {CL_RGBA, CL_UNSIGNED_INT32}, }; MockContext context; auto &clHwHelper = ClHwHelper::get(context.getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily); for (const auto &format : redescribeFormats) { EXPECT_EQ(false, clHwHelper.isFormatRedescribable(format)); } } compute-runtime-22.14.22890/opencl/test/unit_test/helpers/kmd_notify_tests.cpp000066400000000000000000000634231422164147700273720ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/wait_status.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" using namespace NEO; struct KmdNotifyTests : public ::testing::Test { void SetUp() override { device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); hwInfo = device->getRootDeviceEnvironment().getMutableHardwareInfo(); cmdQ.reset(new MockCommandQueue(&context, device.get(), nullptr, false)); *device->getDefaultEngine().commandStreamReceiver->getTagAddress() = taskCountToWait; cmdQ->getGpgpuCommandStreamReceiver().waitForFlushStamp(flushStampToWait); overrideKmdNotifyParams(true, 2, true, 1, false, 0, false, 0); } void overrideKmdNotifyParams(bool kmdNotifyEnable, int64_t kmdNotifyDelay, bool quickKmdSleepEnable, int64_t quickKmdSleepDelay, bool quickKmdSleepEnableForSporadicWaits, int64_t quickKmdSleepDelayForSporadicWaits, bool quickKmdSleepEnableForDirectSubmission, int64_t quickKmdSleepDelayForDirectSubmission) { auto &properties = hwInfo->capabilityTable.kmdNotifyProperties; properties.enableKmdNotify = kmdNotifyEnable; properties.delayKmdNotifyMicroseconds = kmdNotifyDelay; properties.enableQuickKmdSleep = quickKmdSleepEnable; properties.delayQuickKmdSleepMicroseconds = quickKmdSleepDelay; properties.enableQuickKmdSleepForSporadicWaits = quickKmdSleepEnableForSporadicWaits; properties.delayQuickKmdSleepForSporadicWaitsMicroseconds = quickKmdSleepDelayForSporadicWaits; properties.enableQuickKmdSleepForDirectSubmission = quickKmdSleepEnableForDirectSubmission; properties.delayQuickKmdSleepForDirectSubmissionMicroseconds = quickKmdSleepDelayForDirectSubmission; } class MockKmdNotifyHelper : public KmdNotifyHelper { public: using KmdNotifyHelper::acLineConnected; using KmdNotifyHelper::getMicrosecondsSinceEpoch; using KmdNotifyHelper::lastWaitForCompletionTimestampUs; using KmdNotifyHelper::properties; MockKmdNotifyHelper() = delete; MockKmdNotifyHelper(const KmdNotifyProperties *newProperties) : KmdNotifyHelper(newProperties){}; void updateLastWaitForCompletionTimestamp() override { KmdNotifyHelper::updateLastWaitForCompletionTimestamp(); updateLastWaitForCompletionTimestampCalled++; } void updateAcLineStatus() override { KmdNotifyHelper::updateAcLineStatus(); updateAcLineStatusCalled++; } uint32_t updateLastWaitForCompletionTimestampCalled = 0u; uint32_t updateAcLineStatusCalled = 0u; }; template class MockKmdNotifyCsr : public UltCommandStreamReceiver { public: MockKmdNotifyCsr(const ExecutionEnvironment &executionEnvironment, const DeviceBitfield deviceBitfield) : UltCommandStreamReceiver(const_cast(executionEnvironment), 0, deviceBitfield) {} bool waitForFlushStamp(FlushStamp &flushStampToWait) override { waitForFlushStampCalled++; waitForFlushStampParamsPassed.push_back({flushStampToWait}); return waitForFlushStampResult; } struct WaitForFlushStampParams { FlushStamp flushStampToWait{}; }; uint32_t waitForFlushStampCalled = 0u; bool waitForFlushStampResult = true; StackVec waitForFlushStampParamsPassed{}; WaitStatus waitForCompletionWithTimeout(const WaitParams ¶ms, uint32_t taskCountToWait) override { waitForCompletionWithTimeoutCalled++; waitForCompletionWithTimeoutParamsPassed.push_back({params.enableTimeout, params.waitTimeout, taskCountToWait}); return waitForCompletionWithTimeoutResult; } struct WaitForCompletionWithTimeoutParams { bool enableTimeout{}; int64_t timeoutMs{}; uint32_t taskCountToWait{}; }; uint32_t waitForCompletionWithTimeoutCalled = 0u; WaitStatus waitForCompletionWithTimeoutResult = WaitStatus::Ready; StackVec waitForCompletionWithTimeoutParamsPassed{}; }; template MockKmdNotifyCsr *createMockCsr() { auto csr = new MockKmdNotifyCsr(*device->executionEnvironment, device->getDeviceBitfield()); device->resetCommandStreamReceiver(csr); mockKmdNotifyHelper = new MockKmdNotifyHelper(&device->getHardwareInfo().capabilityTable.kmdNotifyProperties); csr->resetKmdNotifyHelper(mockKmdNotifyHelper); return csr; } MockKmdNotifyHelper *mockKmdNotifyHelper = nullptr; HardwareInfo *hwInfo = nullptr; MockContext context; std::unique_ptr device; std::unique_ptr cmdQ; FlushStamp flushStampToWait = 1000; uint32_t taskCountToWait = 5; }; HWTEST_F(KmdNotifyTests, givenTaskCountWhenWaitUntilCompletionCalledThenAlwaysTryCpuPolling) { auto csr = createMockCsr(); cmdQ->waitUntilComplete(taskCountToWait, {}, flushStampToWait, false); EXPECT_EQ(1u, csr->waitForCompletionWithTimeoutCalled); EXPECT_EQ(true, csr->waitForCompletionWithTimeoutParamsPassed[0].enableTimeout); EXPECT_EQ(2, csr->waitForCompletionWithTimeoutParamsPassed[0].timeoutMs); EXPECT_EQ(taskCountToWait, csr->waitForCompletionWithTimeoutParamsPassed[0].taskCountToWait); } HWTEST_F(KmdNotifyTests, givenTaskCountAndKmdNotifyDisabledWhenWaitUntilCompletionCalledThenTryCpuPollingWithoutTimeout) { overrideKmdNotifyParams(false, 0, false, 0, false, 0, false, 0); auto csr = createMockCsr(); cmdQ->waitUntilComplete(taskCountToWait, {}, flushStampToWait, false); EXPECT_EQ(0u, csr->waitForFlushStampCalled); EXPECT_EQ(1u, csr->waitForCompletionWithTimeoutCalled); EXPECT_EQ(false, csr->waitForCompletionWithTimeoutParamsPassed[0].enableTimeout); EXPECT_EQ(0, csr->waitForCompletionWithTimeoutParamsPassed[0].timeoutMs); EXPECT_EQ(taskCountToWait, csr->waitForCompletionWithTimeoutParamsPassed[0].taskCountToWait); } HWTEST_F(KmdNotifyTests, givenNotReadyTaskCountWhenWaitUntilCompletionCalledThenTryCpuPollingAndKmdWait) { auto csr = createMockCsr(); *csr->getTagAddress() = taskCountToWait - 1; csr->waitForCompletionWithTimeoutResult = WaitStatus::NotReady; //we have unrecoverable for this case, this will throw. EXPECT_THROW(cmdQ->waitUntilComplete(taskCountToWait, {}, flushStampToWait, false), std::exception); EXPECT_EQ(1u, csr->waitForFlushStampCalled); EXPECT_EQ(flushStampToWait, csr->waitForFlushStampParamsPassed[0].flushStampToWait); EXPECT_EQ(2u, csr->waitForCompletionWithTimeoutCalled); EXPECT_EQ(true, csr->waitForCompletionWithTimeoutParamsPassed[0].enableTimeout); EXPECT_EQ(2, csr->waitForCompletionWithTimeoutParamsPassed[0].timeoutMs); EXPECT_EQ(taskCountToWait, csr->waitForCompletionWithTimeoutParamsPassed[0].taskCountToWait); EXPECT_EQ(false, csr->waitForCompletionWithTimeoutParamsPassed[1].enableTimeout); EXPECT_EQ(0, csr->waitForCompletionWithTimeoutParamsPassed[1].timeoutMs); EXPECT_EQ(taskCountToWait, csr->waitForCompletionWithTimeoutParamsPassed[1].taskCountToWait); } HWTEST_F(KmdNotifyTests, givenReadyTaskCountWhenWaitUntilCompletionCalledThenTryCpuPollingAndDontCallKmdWait) { auto csr = createMockCsr(); cmdQ->waitUntilComplete(taskCountToWait, {}, flushStampToWait, false); EXPECT_EQ(0u, csr->waitForFlushStampCalled); EXPECT_EQ(1u, csr->waitForCompletionWithTimeoutCalled); EXPECT_EQ(true, csr->waitForCompletionWithTimeoutParamsPassed[0].enableTimeout); EXPECT_EQ(2, csr->waitForCompletionWithTimeoutParamsPassed[0].timeoutMs); EXPECT_EQ(taskCountToWait, csr->waitForCompletionWithTimeoutParamsPassed[0].taskCountToWait); } HWTEST_F(KmdNotifyTests, givenDefaultArgumentWhenWaitUntilCompleteIsCalledThenDisableQuickKmdSleep) { auto csr = createMockCsr(); auto expectedTimeout = device->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds; cmdQ->waitUntilComplete(taskCountToWait, {}, flushStampToWait, false); EXPECT_EQ(1u, csr->waitForCompletionWithTimeoutCalled); EXPECT_EQ(true, csr->waitForCompletionWithTimeoutParamsPassed[0].enableTimeout); EXPECT_EQ(expectedTimeout, csr->waitForCompletionWithTimeoutParamsPassed[0].timeoutMs); EXPECT_EQ(taskCountToWait, csr->waitForCompletionWithTimeoutParamsPassed[0].taskCountToWait); } HWTEST_F(KmdNotifyTests, givenEnabledQuickSleepWhenWaitUntilCompleteIsCalledThenChangeDelayValue) { auto csr = createMockCsr(); auto expectedTimeout = device->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds; cmdQ->waitUntilComplete(taskCountToWait, {}, flushStampToWait, true); EXPECT_EQ(1u, csr->waitForCompletionWithTimeoutCalled); EXPECT_EQ(true, csr->waitForCompletionWithTimeoutParamsPassed[0].enableTimeout); EXPECT_EQ(expectedTimeout, csr->waitForCompletionWithTimeoutParamsPassed[0].timeoutMs); EXPECT_EQ(taskCountToWait, csr->waitForCompletionWithTimeoutParamsPassed[0].taskCountToWait); } HWTEST_F(KmdNotifyTests, givenDisabledQuickSleepWhenWaitUntilCompleteWithQuickSleepRequestIsCalledThenUseBaseDelayValue) { overrideKmdNotifyParams(true, 1, false, 0, false, 0, false, 0); auto csr = createMockCsr(); auto expectedTimeout = device->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds; cmdQ->waitUntilComplete(taskCountToWait, {}, flushStampToWait, true); EXPECT_EQ(1u, csr->waitForCompletionWithTimeoutCalled); EXPECT_EQ(true, csr->waitForCompletionWithTimeoutParamsPassed[0].enableTimeout); EXPECT_EQ(expectedTimeout, csr->waitForCompletionWithTimeoutParamsPassed[0].timeoutMs); EXPECT_EQ(taskCountToWait, csr->waitForCompletionWithTimeoutParamsPassed[0].taskCountToWait); } HWTEST_F(KmdNotifyTests, givenNotReadyTaskCountWhenPollForCompletionCalledThenTimeout) { *device->getDefaultEngine().commandStreamReceiver->getTagAddress() = taskCountToWait - 1; auto success = device->getUltCommandStreamReceiver().waitForCompletionWithTimeout(true, 1, taskCountToWait); EXPECT_NE(NEO::WaitStatus::Ready, success); } HWTEST_F(KmdNotifyTests, givenZeroFlushStampWhenWaitIsCalledThenDisableTimeout) { auto csr = createMockCsr(); EXPECT_TRUE(device->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableKmdNotify); csr->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, 0, false, QueueThrottle::MEDIUM); EXPECT_EQ(0u, csr->waitForFlushStampCalled); EXPECT_EQ(1u, csr->waitForCompletionWithTimeoutCalled); EXPECT_EQ(false, csr->waitForCompletionWithTimeoutParamsPassed[0].enableTimeout); EXPECT_EQ(taskCountToWait, csr->waitForCompletionWithTimeoutParamsPassed[0].taskCountToWait); } HWTEST_F(KmdNotifyTests, givenNonQuickSleepRequestWhenItsSporadicWaitThenOverrideQuickSleepRequest) { overrideKmdNotifyParams(true, 3, true, 2, true, 1, false, 0); auto csr = createMockCsr(); auto expectedDelay = device->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds; int64_t timeSinceLastWait = mockKmdNotifyHelper->properties->delayQuickKmdSleepForSporadicWaitsMicroseconds + 1; mockKmdNotifyHelper->lastWaitForCompletionTimestampUs = mockKmdNotifyHelper->getMicrosecondsSinceEpoch() - timeSinceLastWait; csr->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, 1, false, QueueThrottle::MEDIUM); EXPECT_EQ(1u, csr->waitForCompletionWithTimeoutCalled); EXPECT_EQ(expectedDelay, csr->waitForCompletionWithTimeoutParamsPassed[0].timeoutMs); } HWTEST_F(KmdNotifyTests, givenNonQuickSleepRequestWhenItsNotSporadicWaitThenOverrideQuickSleepRequest) { overrideKmdNotifyParams(true, 3, true, 2, true, 9999999, false, 0); auto csr = createMockCsr(); auto expectedDelay = device->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds; csr->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, 1, false, QueueThrottle::MEDIUM); EXPECT_EQ(1u, csr->waitForCompletionWithTimeoutCalled); EXPECT_EQ(expectedDelay, csr->waitForCompletionWithTimeoutParamsPassed[0].timeoutMs); } HWTEST_F(KmdNotifyTests, givenKmdNotifyDisabledWhenPowerSavingModeIsRequestedThenTimeoutIsEnabled) { overrideKmdNotifyParams(false, 3, false, 2, false, 9999999, false, 0); auto csr = createMockCsr(); csr->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, 1, false, QueueThrottle::LOW); EXPECT_EQ(1u, csr->waitForCompletionWithTimeoutCalled); EXPECT_EQ(true, csr->waitForCompletionWithTimeoutParamsPassed[0].enableTimeout); EXPECT_EQ(1, csr->waitForCompletionWithTimeoutParamsPassed[0].timeoutMs); } HWTEST_F(KmdNotifyTests, givenKmdNotifyDisabledWhenQueueHasPowerSavingModeAndCallWaitThenTimeoutIsEnabled) { overrideKmdNotifyParams(false, 3, false, 2, false, 9999999, false, 0); auto csr = createMockCsr(); cmdQ->throttle = QueueThrottle::LOW; cmdQ->waitUntilComplete(1, {}, 1, false); EXPECT_EQ(1u, csr->waitForCompletionWithTimeoutCalled); EXPECT_EQ(true, csr->waitForCompletionWithTimeoutParamsPassed[0].enableTimeout); EXPECT_EQ(1, csr->waitForCompletionWithTimeoutParamsPassed[0].timeoutMs); } HWTEST_F(KmdNotifyTests, givenKmdNotifyDisabledWhenQueueHasPowerSavingModButThereIsNoFlushStampeAndCallWaitThenTimeoutIsDisabled) { overrideKmdNotifyParams(false, 3, false, 2, false, 9999999, false, 0); auto csr = createMockCsr(); cmdQ->throttle = QueueThrottle::LOW; cmdQ->waitUntilComplete(1, {}, 0, false); EXPECT_EQ(1u, csr->waitForCompletionWithTimeoutCalled); EXPECT_EQ(false, csr->waitForCompletionWithTimeoutParamsPassed[0].enableTimeout); EXPECT_EQ(0, csr->waitForCompletionWithTimeoutParamsPassed[0].timeoutMs); } HWTEST_F(KmdNotifyTests, givenQuickSleepRequestWhenItsSporadicWaitOptimizationIsDisabledThenDontOverrideQuickSleepRequest) { overrideKmdNotifyParams(true, 3, true, 2, false, 0, false, 0); auto csr = createMockCsr(); auto expectedDelay = device->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds; csr->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, 1, true, QueueThrottle::MEDIUM); EXPECT_EQ(1u, csr->waitForCompletionWithTimeoutCalled); EXPECT_EQ(expectedDelay, csr->waitForCompletionWithTimeoutParamsPassed[0].timeoutMs); } HWTEST_F(KmdNotifyTests, givenTaskCountEqualToHwTagWhenWaitCalledThenDontMultiplyTimeout) { auto csr = createMockCsr(); *csr->getTagAddress() = taskCountToWait; auto expectedTimeout = device->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds; csr->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, 1, false, QueueThrottle::MEDIUM); EXPECT_EQ(1u, csr->waitForCompletionWithTimeoutCalled); EXPECT_EQ(true, csr->waitForCompletionWithTimeoutParamsPassed[0].enableTimeout); EXPECT_EQ(expectedTimeout, csr->waitForCompletionWithTimeoutParamsPassed[0].timeoutMs); } HWTEST_F(KmdNotifyTests, givenTaskCountLowerThanHwTagWhenWaitCalledThenDontMultiplyTimeout) { auto csr = createMockCsr(); *csr->getTagAddress() = taskCountToWait + 5; auto expectedTimeout = device->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds; csr->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, 1, false, QueueThrottle::MEDIUM); EXPECT_EQ(1u, csr->waitForCompletionWithTimeoutCalled); EXPECT_EQ(true, csr->waitForCompletionWithTimeoutParamsPassed[0].enableTimeout); EXPECT_EQ(expectedTimeout, csr->waitForCompletionWithTimeoutParamsPassed[0].timeoutMs); } HWTEST_F(KmdNotifyTests, givenDefaultCommandStreamReceiverWhenWaitCalledThenUpdateWaitTimestamp) { overrideKmdNotifyParams(true, 3, true, 2, true, 1, false, 0); auto csr = createMockCsr(); EXPECT_NE(0, mockKmdNotifyHelper->lastWaitForCompletionTimestampUs.load()); EXPECT_EQ(1u, mockKmdNotifyHelper->updateLastWaitForCompletionTimestampCalled); csr->waitForTaskCountWithKmdNotifyFallback(0, 0, false, QueueThrottle::MEDIUM); EXPECT_EQ(2u, mockKmdNotifyHelper->updateLastWaitForCompletionTimestampCalled); } HWTEST_F(KmdNotifyTests, givenDefaultCommandStreamReceiverWithDisabledSporadicWaitOptimizationWhenWaitCalledThenDontUpdateWaitTimestamp) { overrideKmdNotifyParams(true, 3, true, 2, false, 0, false, 0); auto csr = createMockCsr(); EXPECT_EQ(0, mockKmdNotifyHelper->lastWaitForCompletionTimestampUs.load()); csr->waitForTaskCountWithKmdNotifyFallback(0, 0, false, QueueThrottle::MEDIUM); EXPECT_EQ(0u, mockKmdNotifyHelper->updateLastWaitForCompletionTimestampCalled); } HWTEST_F(KmdNotifyTests, givenNewHelperWhenItsSetToCsrThenUpdateAcLineStatus) { auto helper = new MockKmdNotifyHelper(&(hwInfo->capabilityTable.kmdNotifyProperties)); EXPECT_EQ(0u, helper->updateAcLineStatusCalled); auto csr = createMockCsr(); csr->resetKmdNotifyHelper(helper); EXPECT_EQ(1u, helper->updateAcLineStatusCalled); } TEST_F(KmdNotifyTests, givenTaskCountDiffLowerThanMinimumToCheckAcLineWhenObtainingTimeoutPropertiesThenDontCheck) { hwInfo->capabilityTable.kmdNotifyProperties.enableKmdNotify = false; MockKmdNotifyHelper helper(&(hwInfo->capabilityTable.kmdNotifyProperties)); uint32_t hwTag = 9; uint32_t taskCountToWait = 10; EXPECT_TRUE(taskCountToWait - hwTag < KmdNotifyConstants::minimumTaskCountDiffToCheckAcLine); EXPECT_EQ(10u, KmdNotifyConstants::minimumTaskCountDiffToCheckAcLine); helper.obtainTimeoutParams(false, hwTag, taskCountToWait, 1, QueueThrottle::MEDIUM, true, false); EXPECT_EQ(0u, helper.updateAcLineStatusCalled); } TEST_F(KmdNotifyTests, givenTaskCountDiffGreaterThanMinimumToCheckAcLineAndDisabledKmdNotifyWhenObtainingTimeoutPropertiesThenCheck) { hwInfo->capabilityTable.kmdNotifyProperties.enableKmdNotify = false; MockKmdNotifyHelper helper(&(hwInfo->capabilityTable.kmdNotifyProperties)); uint32_t hwTag = 10; uint32_t taskCountToWait = 21; EXPECT_TRUE(taskCountToWait - hwTag > KmdNotifyConstants::minimumTaskCountDiffToCheckAcLine); EXPECT_EQ(10u, KmdNotifyConstants::minimumTaskCountDiffToCheckAcLine); helper.obtainTimeoutParams(false, hwTag, taskCountToWait, 1, QueueThrottle::MEDIUM, true, false); EXPECT_EQ(1u, helper.updateAcLineStatusCalled); } TEST_F(KmdNotifyTests, givenKmdWaitModeNotActiveWhenObtainTimeoutParamsThenFalseIsReturned) { MockKmdNotifyHelper helper(&(hwInfo->capabilityTable.kmdNotifyProperties)); auto params = helper.obtainTimeoutParams(false, 1, 1, 1, QueueThrottle::MEDIUM, false, false); EXPECT_FALSE(params.enableTimeout); EXPECT_FALSE(params.waitTimeout); } TEST_F(KmdNotifyTests, givenHighThrottleWhenObtainTimeoutParamsThenIndefinitelyPollSetToTrue) { MockKmdNotifyHelper helper(&(hwInfo->capabilityTable.kmdNotifyProperties)); auto params = helper.obtainTimeoutParams(false, 1, 1, 1, QueueThrottle::HIGH, false, false); EXPECT_FALSE(params.enableTimeout); EXPECT_FALSE(params.waitTimeout); EXPECT_TRUE(params.indefinitelyPoll); } TEST_F(KmdNotifyTests, givenTaskCountDiffGreaterThanMinimumToCheckAcLineAndEnabledKmdNotifyWhenObtainingTimeoutPropertiesThenDontCheck) { hwInfo->capabilityTable.kmdNotifyProperties.enableKmdNotify = true; MockKmdNotifyHelper helper(&(hwInfo->capabilityTable.kmdNotifyProperties)); uint32_t hwTag = 10; uint32_t taskCountToWait = 21; EXPECT_TRUE(taskCountToWait - hwTag > KmdNotifyConstants::minimumTaskCountDiffToCheckAcLine); EXPECT_EQ(10u, KmdNotifyConstants::minimumTaskCountDiffToCheckAcLine); helper.obtainTimeoutParams(false, hwTag, taskCountToWait, 1, QueueThrottle::MEDIUM, true, false); EXPECT_EQ(0u, helper.updateAcLineStatusCalled); } TEST_F(KmdNotifyTests, givenDisabledKmdNotifyMechanismWhenAcLineIsDisconnectedThenForceEnableTimeout) { hwInfo->capabilityTable.kmdNotifyProperties.enableKmdNotify = false; MockKmdNotifyHelper helper(&(hwInfo->capabilityTable.kmdNotifyProperties)); helper.acLineConnected = false; auto params = helper.obtainTimeoutParams(false, 1, 2, 2, QueueThrottle::MEDIUM, true, false); EXPECT_TRUE(params.enableTimeout); EXPECT_EQ(KmdNotifyConstants::timeoutInMicrosecondsForDisconnectedAcLine, params.waitTimeout); EXPECT_EQ(10000, KmdNotifyConstants::timeoutInMicrosecondsForDisconnectedAcLine); } TEST_F(KmdNotifyTests, givenEnabledKmdNotifyMechanismWhenAcLineIsDisconnectedThenDontChangeTimeoutValue) { hwInfo->capabilityTable.kmdNotifyProperties.enableKmdNotify = true; hwInfo->capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds = 5; MockKmdNotifyHelper helper(&(hwInfo->capabilityTable.kmdNotifyProperties)); helper.acLineConnected = false; auto params = helper.obtainTimeoutParams(false, 1, 2, 2, QueueThrottle::MEDIUM, true, false); EXPECT_TRUE(params.enableTimeout); EXPECT_EQ(hwInfo->capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds, params.waitTimeout); } TEST_F(KmdNotifyTests, givenDisabledKmdNotifyMechanismAndFlushStampIsZeroWhenAcLineIsDisconnectedThenDontForceEnableTimeout) { hwInfo->capabilityTable.kmdNotifyProperties.enableKmdNotify = false; MockKmdNotifyHelper helper(&(hwInfo->capabilityTable.kmdNotifyProperties)); helper.acLineConnected = false; FlushStamp flushStampToWait = 0; auto params = helper.obtainTimeoutParams(false, 1, 2, flushStampToWait, QueueThrottle::MEDIUM, true, false); EXPECT_FALSE(params.enableTimeout); } TEST_F(KmdNotifyTests, givenDisabledKmdNotifyMechanismWhenPowerSavingModeIsSetThenKmdNotifyMechanismIsUsedAndReturnsShortestWaitingTimePossible) { DebugManagerStateRestore stateRestore; DebugManager.flags.PowerSavingMode.set(1u); hwInfo->capabilityTable.kmdNotifyProperties.enableKmdNotify = false; MockKmdNotifyHelper helper(&(hwInfo->capabilityTable.kmdNotifyProperties)); helper.acLineConnected = false; FlushStamp flushStampToWait = 1; auto params = helper.obtainTimeoutParams(false, 1, 2, flushStampToWait, QueueThrottle::MEDIUM, true, false); EXPECT_TRUE(params.enableTimeout); EXPECT_EQ(1, params.waitTimeout); } TEST_F(KmdNotifyTests, givenDisabledKmdNotifyMechanismWhenPowerSavingModeIsRequestedThenKmdNotifyMechanismIsUsedAndReturnsShortestWaitingTimePossible) { hwInfo->capabilityTable.kmdNotifyProperties.enableKmdNotify = false; MockKmdNotifyHelper helper(&(hwInfo->capabilityTable.kmdNotifyProperties)); FlushStamp flushStampToWait = 1; auto params = helper.obtainTimeoutParams(false, 1, 2, flushStampToWait, QueueThrottle::LOW, true, false); EXPECT_TRUE(params.enableTimeout); EXPECT_EQ(1, params.waitTimeout); } TEST_F(KmdNotifyTests, givenEnabledKmdNotifyMechanismWhenPowerSavingModeIsSetAndNoFlushStampProvidedWhenParametersAreObtainedThenFalseIsReturned) { DebugManagerStateRestore stateRestore; DebugManager.flags.PowerSavingMode.set(1u); hwInfo->capabilityTable.kmdNotifyProperties.enableKmdNotify = true; MockKmdNotifyHelper helper(&(hwInfo->capabilityTable.kmdNotifyProperties)); helper.acLineConnected = false; FlushStamp flushStampToWait = 0; auto params = helper.obtainTimeoutParams(false, 1, 2, flushStampToWait, QueueThrottle::MEDIUM, true, false); EXPECT_FALSE(params.enableTimeout); EXPECT_EQ(0, params.waitTimeout); } TEST_F(KmdNotifyTests, givenEnabledKmdDirectSubmissionNotifyMechanismWhenDirectSubmissionIsEnabledThenSelectDelayTimeoutForDirectSubmission) { overrideKmdNotifyParams(true, 150, false, 0, false, 0, true, 20); MockKmdNotifyHelper helper(&(hwInfo->capabilityTable.kmdNotifyProperties)); constexpr int64_t expectedTimeout = 20; constexpr bool directSubmission = true; FlushStamp flushStampToWait = 1; auto params = helper.obtainTimeoutParams(false, 1, 2, flushStampToWait, QueueThrottle::MEDIUM, true, directSubmission); EXPECT_TRUE(params.enableTimeout); EXPECT_EQ(expectedTimeout, params.waitTimeout); } TEST_F(KmdNotifyTests, givenEnabledKmdDirectSubmissionNotifyMechanismWhenDirectSubmissionIsDisabledThenSelectBaseDelayTimeout) { overrideKmdNotifyParams(true, 150, false, 0, false, 0, true, 20); MockKmdNotifyHelper helper(&(hwInfo->capabilityTable.kmdNotifyProperties)); constexpr int64_t expectedTimeout = 150; constexpr bool directSubmission = false; FlushStamp flushStampToWait = 1; auto params = helper.obtainTimeoutParams(false, 1, 2, flushStampToWait, QueueThrottle::MEDIUM, true, directSubmission); EXPECT_TRUE(params.enableTimeout); EXPECT_EQ(expectedTimeout, params.waitTimeout); } TEST_F(KmdNotifyTests, givenDisabledKmdDirectSubmissionNotifyMechanismWhenDirectSubmissionIsEnabledThenSelectBaseDelayTimeout) { overrideKmdNotifyParams(true, 150, false, 0, false, 0, false, 20); MockKmdNotifyHelper helper(&(hwInfo->capabilityTable.kmdNotifyProperties)); constexpr int64_t expectedTimeout = 150; constexpr bool directSubmission = true; FlushStamp flushStampToWait = 1; auto params = helper.obtainTimeoutParams(false, 1, 2, flushStampToWait, QueueThrottle::MEDIUM, true, directSubmission); EXPECT_TRUE(params.enableTimeout); EXPECT_EQ(expectedTimeout, params.waitTimeout); }compute-runtime-22.14.22890/opencl/test/unit_test/helpers/linux/000077500000000000000000000000001422164147700244305ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/helpers/linux/CMakeLists.txt000066400000000000000000000006171422164147700271740ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(UNIX) set(IGDRCL_SRCS_tests_helpers_linux ${CMAKE_CURRENT_SOURCE_DIR}/kmd_notify_linux_tests.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_helpers_linux}) add_subdirectories() set_property(GLOBAL PROPERTY IGDRCL_SRCS_tests_helpers_linux ${IGDRCL_SRCS_tests_helpers_linux}) endif() compute-runtime-22.14.22890/opencl/test/unit_test/helpers/linux/kmd_notify_linux_tests.cpp000066400000000000000000000016421422164147700317430ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/kmd_notify_properties.h" #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/test_macros/test.h" namespace NEO { class MockKmdNotifyHelper : public KmdNotifyHelper { public: using KmdNotifyHelper::getBaseTimeout; MockKmdNotifyHelper(const KmdNotifyProperties *newProperties) : KmdNotifyHelper(newProperties){}; }; TEST(KmdNotifyLinuxTests, givenTaskCountDiffGreaterThanOneWhenBaseTimeoutRequestedThenMultiply) { auto localProperties = (defaultHwInfo->capabilityTable.kmdNotifyProperties); localProperties.delayKmdNotifyMicroseconds = 10; const int64_t multiplier = 10; MockKmdNotifyHelper helper(&localProperties); EXPECT_EQ(localProperties.delayKmdNotifyMicroseconds * multiplier, helper.getBaseTimeout(multiplier)); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/helpers/memory_management_tests.cpp000066400000000000000000000141261422164147700307270ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/fixtures/memory_management_fixture.h" #include "shared/test/common/helpers/memory_management.h" #include "gtest/gtest.h" using MemoryManagement::AllocationEvent; using MemoryManagement::eventsAllocated; using MemoryManagement::eventsDeallocated; using MemoryManagement::failingAllocation; using MemoryManagement::indexAllocation; using MemoryManagement::indexDeallocation; using MemoryManagement::numAllocations; TEST(allocation, GivenFailingAllocationNegativeOneWhenCreatingAllocationThenAllocationIsCreatedSuccesfully) { ASSERT_EQ(failingAllocation, static_cast(-1)); auto ptr = new (std::nothrow) char; EXPECT_NE(nullptr, ptr); delete ptr; } TEST(allocation, GivenFailingAllocationOneWhenCreatingAllocationsThenOnlyOneAllocationIsCreatedSuccesfully) { MemoryManagement::detailedAllocationLoggingActive = true; ASSERT_EQ(static_cast(-1), failingAllocation); auto previousAllocations = numAllocations.load(); MemoryManagement::indexAllocation = 0; failingAllocation = 1; auto ptr1 = new (std::nothrow) char; auto ptr2 = new (std::nothrow) char; delete ptr1; delete ptr2; auto currentAllocations = numAllocations.load(); failingAllocation = -1; EXPECT_NE(nullptr, ptr1); EXPECT_EQ(nullptr, ptr2); EXPECT_EQ(previousAllocations, currentAllocations); MemoryManagement::detailedAllocationLoggingActive = false; } struct MemoryManagementTest : public MemoryManagementFixture, public ::testing::Test { void SetUp() override { MemoryManagementFixture::SetUp(); } void TearDown() override { MemoryManagementFixture::TearDown(); } }; TEST_F(MemoryManagementTest, GivenFailingAllocationOneWhenCreatingAllocationsThenOnlyOneAllocationIsCreatedSuccesfully) { setFailingAllocation(1); auto ptr1 = new (std::nothrow) char; auto ptr2 = new (std::nothrow) char; delete ptr1; delete ptr2; clearFailingAllocation(); EXPECT_NE(nullptr, ptr1); EXPECT_EQ(nullptr, ptr2); } TEST_F(MemoryManagementTest, GivenNoFailingAllocationWhenCreatingAllocationThenMemoryIsNotLeaked) { auto indexAllocationTop = indexAllocation.load(); auto indexDellocationTop = indexDeallocation.load(); EXPECT_EQ(static_cast(-1), MemoryManagement::enumerateLeak(indexAllocationTop, indexDellocationTop, false, false)); } TEST_F(MemoryManagementTest, GivenOneFailingAllocationWhenCreatingAllocationThenMemoryIsLeaked) { size_t sizeBuffer = 10; auto ptr = new (std::nothrow) char[sizeBuffer]; auto indexAllocationTop = indexAllocation.load(); auto indexDeallocationTop = indexDeallocation.load(); auto leakIndex = MemoryManagement::enumerateLeak(indexAllocationTop, indexDeallocationTop, false, false); ASSERT_NE(static_cast(-1), leakIndex); EXPECT_EQ(ptr, eventsAllocated[leakIndex].address); EXPECT_EQ(sizeBuffer, eventsAllocated[leakIndex].size); // Not expecting any more failures EXPECT_EQ(static_cast(-1), MemoryManagement::enumerateLeak(indexAllocationTop, indexDeallocationTop, false, false)); delete[] ptr; } TEST_F(MemoryManagementTest, GivenFourEventsWhenCreatingAllocationThenMemoryIsLeakedOnce) { size_t sizeBuffer = 10; delete new (std::nothrow) char; auto ptr = new (std::nothrow) char[sizeBuffer]; delete new (std::nothrow) char; auto indexAllocationTop = indexAllocation.load(); auto indexDeallocationTop = indexDeallocation.load(); auto leakIndex = MemoryManagement::enumerateLeak(indexAllocationTop, indexDeallocationTop, false, false); ASSERT_NE(static_cast(-1), leakIndex); EXPECT_EQ(ptr, eventsAllocated[leakIndex].address); EXPECT_EQ(sizeBuffer, eventsAllocated[leakIndex].size); // Not expecting any more failures EXPECT_EQ(static_cast(-1), MemoryManagement::enumerateLeak(indexAllocationTop, indexDeallocationTop, false, false)); delete[] ptr; } TEST_F(MemoryManagementTest, GivenTwoFailingAllocationsWhenCreatingAllocationThenMemoryIsLeaked) { size_t sizeBuffer = 10; auto ptr1 = new (std::nothrow) char[sizeBuffer]; auto ptr2 = new (std::nothrow) char[sizeBuffer]; auto indexAllocationTop = indexAllocation.load(); auto indexDeallocationTop = indexDeallocation.load(); auto leakIndex1 = MemoryManagement::enumerateLeak(indexAllocationTop, indexDeallocationTop, false, false); auto leakIndex2 = MemoryManagement::enumerateLeak(indexAllocationTop, indexDeallocationTop, false, false); ASSERT_NE(static_cast(-1), leakIndex1); EXPECT_EQ(ptr1, eventsAllocated[leakIndex1].address); EXPECT_EQ(sizeBuffer, eventsAllocated[leakIndex1].size); ASSERT_NE(static_cast(-1), leakIndex2); EXPECT_EQ(ptr2, eventsAllocated[leakIndex2].address); EXPECT_EQ(sizeBuffer, eventsAllocated[leakIndex2].size); // Not expecting any more failures EXPECT_EQ(static_cast(-1), MemoryManagement::enumerateLeak(indexAllocationTop, indexDeallocationTop, false, false)); delete[] ptr1; delete[] ptr2; } TEST_F(MemoryManagementTest, WhenDeletingNullPtrThenLeakIsNotReported) { char *ptr = nullptr; delete ptr; } TEST_F(MemoryManagementTest, WhenPointerIsDeletedThenAllocationShouldbeVisible) { size_t sizeBuffer = 10; auto index = MemoryManagement::indexAllocation.load(); auto ptr = new (std::nothrow) char[sizeBuffer]; EXPECT_EQ(ptr, eventsAllocated[index].address); EXPECT_EQ(sizeBuffer, eventsAllocated[index].size); index = MemoryManagement::indexDeallocation; auto ptrCopy = ptr; delete[] ptr; EXPECT_EQ(ptrCopy, eventsDeallocated[index].address); } #if ENABLE_ME_FOR_LEAK_TESTING TEST_F(MemoryManagementTest, GivenEnableForLeakTestingThenDetectLeak) { // Useful reference : MemoryManagement::onAllocationEvent MemoryManagement::breakOnAllocationEvent = 1; MemoryManagement::breakOnDeallocationEvent = 0; delete new char; new char; MemoryManagement::breakOnAllocationEvent = -1; MemoryManagement::breakOnDeallocationEvent = -1; } #endif compute-runtime-22.14.22890/opencl/test/unit_test/helpers/memory_properties_helpers_tests.cpp000066400000000000000000000626341422164147700325400ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/memory_properties_helpers.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "opencl/source/mem_obj/mem_obj_helper.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "CL/cl_ext_intel.h" #include "gtest/gtest.h" #include "memory_properties_flags.h" using namespace NEO; TEST(MemoryProperties, givenResource48BitMemoryPropertySetWhenGetAllocationPropertiesCalledThenSetAllocationPropertyToo) { UltDeviceFactory deviceFactory{1, 0}; MemoryProperties memoryProperties{}; memoryProperties.pDevice = deviceFactory.rootDevices[0]; memoryProperties.flags.resource48Bit = true; DeviceBitfield deviceBitfield{0xf}; HardwareInfo hwInfo(*defaultHwInfo); auto allocationProperties = MemoryPropertiesHelper::getAllocationProperties(0, memoryProperties, true, 0, AllocationType::BUFFER, false, hwInfo, deviceBitfield, false); EXPECT_EQ(1u, allocationProperties.flags.resource48Bit); } TEST(MemoryProperties, givenValidPropertiesWhenCreateMemoryPropertiesThenTrueIsReturned) { UltDeviceFactory deviceFactory{1, 0}; auto pDevice = deviceFactory.rootDevices[0]; MemoryProperties properties; properties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_READ_WRITE, 0, 0, pDevice); EXPECT_TRUE(properties.flags.readWrite); properties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_WRITE_ONLY, 0, 0, pDevice); EXPECT_TRUE(properties.flags.writeOnly); properties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_READ_ONLY, 0, 0, pDevice); EXPECT_TRUE(properties.flags.readOnly); properties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_USE_HOST_PTR, 0, 0, pDevice); EXPECT_TRUE(properties.flags.useHostPtr); properties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_ALLOC_HOST_PTR, 0, 0, pDevice); EXPECT_TRUE(properties.flags.allocHostPtr); properties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_COPY_HOST_PTR, 0, 0, pDevice); EXPECT_TRUE(properties.flags.copyHostPtr); properties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_HOST_WRITE_ONLY, 0, 0, pDevice); EXPECT_TRUE(properties.flags.hostWriteOnly); properties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_HOST_READ_ONLY, 0, 0, pDevice); EXPECT_TRUE(properties.flags.hostReadOnly); properties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_HOST_NO_ACCESS, 0, 0, pDevice); EXPECT_TRUE(properties.flags.hostNoAccess); properties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_KERNEL_READ_AND_WRITE, 0, 0, pDevice); EXPECT_TRUE(properties.flags.kernelReadAndWrite); properties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL, 0, 0, pDevice); EXPECT_TRUE(properties.flags.accessFlagsUnrestricted); properties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_NO_ACCESS_INTEL, 0, 0, pDevice); EXPECT_TRUE(properties.flags.noAccess); properties = ClMemoryPropertiesHelper::createMemoryProperties(0, CL_MEM_LOCALLY_UNCACHED_RESOURCE, 0, pDevice); EXPECT_TRUE(properties.flags.locallyUncachedResource); properties = ClMemoryPropertiesHelper::createMemoryProperties(0, CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE, 0, pDevice); EXPECT_TRUE(properties.flags.locallyUncachedInSurfaceState); properties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_FORCE_HOST_MEMORY_INTEL, 0, 0, pDevice); EXPECT_TRUE(properties.flags.forceHostMemory); properties = ClMemoryPropertiesHelper::createMemoryProperties(0, 0, CL_MEM_ALLOC_WRITE_COMBINED_INTEL, pDevice); EXPECT_TRUE(properties.allocFlags.allocWriteCombined); properties = ClMemoryPropertiesHelper::createMemoryProperties(0, 0, CL_MEM_ALLOC_INITIAL_PLACEMENT_DEVICE_INTEL, pDevice); EXPECT_TRUE(properties.allocFlags.usmInitialPlacementGpu); properties = ClMemoryPropertiesHelper::createMemoryProperties(0, 0, CL_MEM_ALLOC_INITIAL_PLACEMENT_HOST_INTEL, pDevice); EXPECT_TRUE(properties.allocFlags.usmInitialPlacementCpu); properties = ClMemoryPropertiesHelper::createMemoryProperties(0, CL_MEM_48BIT_RESOURCE_INTEL, 0, pDevice); EXPECT_TRUE(properties.flags.resource48Bit); } TEST(MemoryProperties, givenClMemForceLinearStorageFlagWhenCreateMemoryPropertiesThenReturnProperValue) { UltDeviceFactory deviceFactory{1, 0}; auto pDevice = deviceFactory.rootDevices[0]; MemoryProperties memoryProperties; cl_mem_flags flags = 0; cl_mem_flags_intel flagsIntel = 0; flags |= CL_MEM_FORCE_LINEAR_STORAGE_INTEL; flagsIntel = 0; memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, pDevice); EXPECT_TRUE(memoryProperties.flags.forceLinearStorage); flags = 0; flagsIntel |= CL_MEM_FORCE_LINEAR_STORAGE_INTEL; memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, pDevice); EXPECT_TRUE(memoryProperties.flags.forceLinearStorage); flags |= CL_MEM_FORCE_LINEAR_STORAGE_INTEL; flagsIntel |= CL_MEM_FORCE_LINEAR_STORAGE_INTEL; memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, pDevice); EXPECT_TRUE(memoryProperties.flags.forceLinearStorage); flags = 0; flagsIntel = 0; memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, pDevice); EXPECT_FALSE(memoryProperties.flags.forceLinearStorage); } TEST(MemoryProperties, givenClAllowUnrestrictedSizeFlagWhenCreateMemoryPropertiesThenReturnProperValue) { UltDeviceFactory deviceFactory{1, 0}; auto pDevice = deviceFactory.rootDevices[0]; MemoryProperties memoryProperties; cl_mem_flags flags = 0; cl_mem_flags_intel flagsIntel = 0; flags |= CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL; flagsIntel = 0; memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, pDevice); EXPECT_TRUE(memoryProperties.flags.allowUnrestrictedSize); flags = 0; flagsIntel |= CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL; memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, pDevice); EXPECT_TRUE(memoryProperties.flags.allowUnrestrictedSize); flags |= CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL; flagsIntel |= CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL; memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, pDevice); EXPECT_TRUE(memoryProperties.flags.allowUnrestrictedSize); flags = 0; flagsIntel = 0; memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, pDevice); EXPECT_FALSE(memoryProperties.flags.allowUnrestrictedSize); } TEST(MemoryProperties, givenClCompressedHintFlagWhenCreateMemoryPropertiesThenReturnProperValue) { MemoryProperties memoryProperties; UltDeviceFactory deviceFactory{1, 0}; auto pDevice = deviceFactory.rootDevices[0]; cl_mem_flags flags = CL_MEM_COMPRESSED_HINT_INTEL; cl_mem_flags_intel flagsIntel = 0; memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, pDevice); EXPECT_TRUE(memoryProperties.flags.compressedHint); flags = 0; flagsIntel |= CL_MEM_COMPRESSED_HINT_INTEL; memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, pDevice); EXPECT_TRUE(memoryProperties.flags.compressedHint); flags |= CL_MEM_COMPRESSED_HINT_INTEL; flagsIntel |= CL_MEM_COMPRESSED_HINT_INTEL; memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, pDevice); EXPECT_TRUE(memoryProperties.flags.compressedHint); flags = 0; flagsIntel = 0; memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, pDevice); EXPECT_FALSE(memoryProperties.flags.compressedHint); } TEST(MemoryProperties, givenClUncompressedHintFlagWhenCreateMemoryPropertiesThenReturnProperValue) { MemoryProperties memoryProperties; UltDeviceFactory deviceFactory{1, 0}; auto pDevice = deviceFactory.rootDevices[0]; cl_mem_flags flags = CL_MEM_UNCOMPRESSED_HINT_INTEL; cl_mem_flags_intel flagsIntel = 0; memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, pDevice); EXPECT_TRUE(memoryProperties.flags.uncompressedHint); flags = 0; flagsIntel |= CL_MEM_UNCOMPRESSED_HINT_INTEL; memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, pDevice); EXPECT_TRUE(memoryProperties.flags.uncompressedHint); flags |= CL_MEM_UNCOMPRESSED_HINT_INTEL; flagsIntel |= CL_MEM_UNCOMPRESSED_HINT_INTEL; memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, pDevice); EXPECT_TRUE(memoryProperties.flags.uncompressedHint); flags = 0; flagsIntel = 0; memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, pDevice); EXPECT_FALSE(memoryProperties.flags.uncompressedHint); } struct MemoryPropertiesHelperTests : ::testing::Test { MockContext context; MemoryProperties memoryProperties; cl_mem_flags flags = 0; cl_mem_flags_intel flagsIntel = 0; cl_mem_alloc_flags_intel allocflags = 0; }; TEST_F(MemoryPropertiesHelperTests, givenNullPropertiesWhenParsingMemoryPropertiesThenTrueIsReturned) { EXPECT_TRUE(ClMemoryPropertiesHelper::parseMemoryProperties(nullptr, memoryProperties, flags, flagsIntel, allocflags, MemoryPropertiesHelper::ObjType::UNKNOWN, context)); } TEST_F(MemoryPropertiesHelperTests, givenEmptyPropertiesWhenParsingMemoryPropertiesThenTrueIsReturned) { cl_mem_properties_intel properties[] = {0}; EXPECT_TRUE(ClMemoryPropertiesHelper::parseMemoryProperties(properties, memoryProperties, flags, flagsIntel, allocflags, MemoryPropertiesHelper::ObjType::UNKNOWN, context)); EXPECT_TRUE(ClMemoryPropertiesHelper::parseMemoryProperties(properties, memoryProperties, flags, flagsIntel, allocflags, MemoryPropertiesHelper::ObjType::BUFFER, context)); EXPECT_TRUE(ClMemoryPropertiesHelper::parseMemoryProperties(properties, memoryProperties, flags, flagsIntel, allocflags, MemoryPropertiesHelper::ObjType::IMAGE, context)); } TEST_F(MemoryPropertiesHelperTests, givenValidPropertiesWhenParsingMemoryPropertiesThenTrueIsReturned) { cl_mem_properties_intel properties[] = { CL_MEM_FLAGS, CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS | CL_MEM_COMPRESSED_HINT_INTEL | CL_MEM_UNCOMPRESSED_HINT_INTEL, CL_MEM_FLAGS_INTEL, CL_MEM_LOCALLY_UNCACHED_RESOURCE | CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE | CL_MEM_COMPRESSED_HINT_INTEL | CL_MEM_UNCOMPRESSED_HINT_INTEL, CL_MEM_ALLOC_FLAGS_INTEL, CL_MEM_ALLOC_WRITE_COMBINED_INTEL, CL_MEM_ALLOC_DEFAULT_INTEL, 0}; EXPECT_TRUE(ClMemoryPropertiesHelper::parseMemoryProperties(properties, memoryProperties, flags, flagsIntel, allocflags, MemoryPropertiesHelper::ObjType::UNKNOWN, context)); } TEST_F(MemoryPropertiesHelperTests, givenValidPropertiesWhenParsingMemoryPropertiesForBufferThenTrueIsReturned) { cl_mem_properties_intel properties[] = { CL_MEM_FLAGS, MemObjHelper::validFlagsForBuffer, CL_MEM_FLAGS_INTEL, MemObjHelper::validFlagsForBufferIntel, 0}; EXPECT_TRUE(ClMemoryPropertiesHelper::parseMemoryProperties(properties, memoryProperties, flags, flagsIntel, allocflags, MemoryPropertiesHelper::ObjType::BUFFER, context)); } TEST_F(MemoryPropertiesHelperTests, givenValidPropertiesWhenParsingMemoryPropertiesForImageThenTrueIsReturned) { cl_mem_properties_intel properties[] = { CL_MEM_FLAGS, MemObjHelper::validFlagsForImage, CL_MEM_FLAGS_INTEL, MemObjHelper::validFlagsForImageIntel, 0}; EXPECT_TRUE(ClMemoryPropertiesHelper::parseMemoryProperties(properties, memoryProperties, flags, flagsIntel, allocflags, MemoryPropertiesHelper::ObjType::IMAGE, context)); } TEST_F(MemoryPropertiesHelperTests, givenInvalidPropertiesWhenParsingMemoryPropertiesThenFalseIsReturned) { cl_mem_properties_intel properties[] = { (1 << 30), CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR, 0}; EXPECT_FALSE(ClMemoryPropertiesHelper::parseMemoryProperties(properties, memoryProperties, flags, flagsIntel, allocflags, MemoryPropertiesHelper::ObjType::UNKNOWN, context)); EXPECT_FALSE(ClMemoryPropertiesHelper::parseMemoryProperties(properties, memoryProperties, flags, flagsIntel, allocflags, MemoryPropertiesHelper::ObjType::BUFFER, context)); EXPECT_FALSE(ClMemoryPropertiesHelper::parseMemoryProperties(properties, memoryProperties, flags, flagsIntel, allocflags, MemoryPropertiesHelper::ObjType::IMAGE, context)); } TEST_F(MemoryPropertiesHelperTests, givenInvalidPropertiesWhenParsingMemoryPropertiesForImageThenFalseIsReturned) { cl_mem_properties_intel properties[] = { CL_MEM_FLAGS, MemObjHelper::validFlagsForBuffer, CL_MEM_FLAGS_INTEL, MemObjHelper::validFlagsForBufferIntel, 0}; EXPECT_FALSE(ClMemoryPropertiesHelper::parseMemoryProperties(properties, memoryProperties, flags, flagsIntel, allocflags, MemoryPropertiesHelper::ObjType::IMAGE, context)); } TEST_F(MemoryPropertiesHelperTests, givenInvalidFlagsWhenParsingMemoryPropertiesForImageThenFalseIsReturned) { cl_mem_properties_intel properties[] = { CL_MEM_FLAGS, (1 << 30), CL_MEM_FLAGS_INTEL, MemObjHelper::validFlagsForImageIntel, 0}; EXPECT_FALSE(ClMemoryPropertiesHelper::parseMemoryProperties(properties, memoryProperties, flags, flagsIntel, allocflags, MemoryPropertiesHelper::ObjType::IMAGE, context)); } TEST_F(MemoryPropertiesHelperTests, givenInvalidFlagsIntelWhenParsingMemoryPropertiesForImageThenFalseIsReturned) { cl_mem_properties_intel properties[] = { CL_MEM_FLAGS, MemObjHelper::validFlagsForImage, CL_MEM_FLAGS_INTEL, (1 << 30), 0}; EXPECT_FALSE(ClMemoryPropertiesHelper::parseMemoryProperties(properties, memoryProperties, flags, flagsIntel, allocflags, MemoryPropertiesHelper::ObjType::IMAGE, context)); } TEST_F(MemoryPropertiesHelperTests, givenInvalidPropertiesWhenParsingMemoryPropertiesForBufferThenFalseIsReturned) { cl_mem_properties_intel properties[] = { CL_MEM_FLAGS, MemObjHelper::validFlagsForImage, CL_MEM_FLAGS_INTEL, MemObjHelper::validFlagsForImageIntel, 0}; EXPECT_FALSE(ClMemoryPropertiesHelper::parseMemoryProperties(properties, memoryProperties, flags, flagsIntel, allocflags, MemoryPropertiesHelper::ObjType::BUFFER, context)); } TEST_F(MemoryPropertiesHelperTests, givenInvalidFlagsWhenParsingMemoryPropertiesForBufferThenFalseIsReturned) { cl_mem_properties_intel properties[] = { CL_MEM_FLAGS, (1 << 30), CL_MEM_FLAGS_INTEL, MemObjHelper::validFlagsForBufferIntel, 0}; EXPECT_FALSE(ClMemoryPropertiesHelper::parseMemoryProperties(properties, memoryProperties, flags, flagsIntel, allocflags, MemoryPropertiesHelper::ObjType::BUFFER, context)); } TEST_F(MemoryPropertiesHelperTests, givenInvalidFlagsIntelWhenParsingMemoryPropertiesForBufferThenFalseIsReturned) { cl_mem_properties_intel properties[] = { CL_MEM_FLAGS, MemObjHelper::validFlagsForBuffer, CL_MEM_FLAGS_INTEL, (1 << 30), 0}; EXPECT_FALSE(ClMemoryPropertiesHelper::parseMemoryProperties(properties, memoryProperties, flags, flagsIntel, allocflags, MemoryPropertiesHelper::ObjType::BUFFER, context)); } TEST_F(MemoryPropertiesHelperTests, givenDifferentParametersWhenCallingFillCachePolicyInPropertiesThenFlushL3FlagsAreCorrectlySet) { AllocationProperties allocationProperties{mockRootDeviceIndex, 0, AllocationType::BUFFER, mockDeviceBitfield}; for (auto uncached : ::testing::Bool()) { for (auto readOnly : ::testing::Bool()) { for (auto deviceOnlyVisibilty : ::testing::Bool()) { if (uncached || readOnly || deviceOnlyVisibilty) { allocationProperties.flags.flushL3RequiredForRead = true; allocationProperties.flags.flushL3RequiredForWrite = true; MemoryPropertiesHelper::fillCachePolicyInProperties(allocationProperties, uncached, readOnly, deviceOnlyVisibilty, 0); EXPECT_FALSE(allocationProperties.flags.flushL3RequiredForRead); EXPECT_FALSE(allocationProperties.flags.flushL3RequiredForWrite); } else { allocationProperties.flags.flushL3RequiredForRead = false; allocationProperties.flags.flushL3RequiredForWrite = false; MemoryPropertiesHelper::fillCachePolicyInProperties(allocationProperties, uncached, readOnly, deviceOnlyVisibilty, 0); EXPECT_TRUE(allocationProperties.flags.flushL3RequiredForRead); EXPECT_TRUE(allocationProperties.flags.flushL3RequiredForWrite); } } } } } TEST_F(MemoryPropertiesHelperTests, givenMemFlagsWithFlagsAndPropertiesWhenParsingMemoryPropertiesThenTheyAreCorrectlyParsed) { struct TestInput { cl_mem_flags flagsParameter; cl_mem_properties_intel flagsProperties; cl_mem_flags expectedResult; }; TestInput testInputs[] = { {0b0, 0b0, 0b0}, {0b0, 0b1010, 0b1010}, {0b1010, 0b0, 0b1010}, {0b1010, 0b101, 0b1111}, {0b1010, 0b1010, 0b1010}, {0b1111, 0b1111, 0b1111}}; for (auto &testInput : testInputs) { flags = testInput.flagsParameter; cl_mem_properties_intel properties[] = { CL_MEM_FLAGS, testInput.flagsProperties, 0}; EXPECT_TRUE(ClMemoryPropertiesHelper::parseMemoryProperties(properties, memoryProperties, flags, flagsIntel, allocflags, MemoryPropertiesHelper::ObjType::UNKNOWN, context)); EXPECT_EQ(testInput.expectedResult, flags); } } TEST_F(MemoryPropertiesHelperTests, WhenAdjustingDeviceBitfieldThenCorrectBitfieldIsReturned) { UltClDeviceFactory deviceFactory{2, 4}; auto memoryPropertiesRootDevice0 = ClMemoryPropertiesHelper::createMemoryProperties(0, 0, 0, &deviceFactory.rootDevices[0]->getDevice()); auto memoryPropertiesRootDevice0Tile0 = ClMemoryPropertiesHelper::createMemoryProperties(0, 0, 0, &deviceFactory.subDevices[0]->getDevice()); auto memoryPropertiesRootDevice0Tile1 = ClMemoryPropertiesHelper::createMemoryProperties(0, 0, 0, &deviceFactory.subDevices[1]->getDevice()); auto memoryPropertiesRootDevice1 = ClMemoryPropertiesHelper::createMemoryProperties(0, 0, 0, &deviceFactory.rootDevices[1]->getDevice()); auto memoryPropertiesRootDevice1Tile0 = ClMemoryPropertiesHelper::createMemoryProperties(0, 0, 0, &deviceFactory.subDevices[4]->getDevice()); auto memoryPropertiesRootDevice1Tile1 = ClMemoryPropertiesHelper::createMemoryProperties(0, 0, 0, &deviceFactory.subDevices[5]->getDevice()); DeviceBitfield devicesInContextBitfield0001{0b1}; DeviceBitfield devicesInContextBitfield0101{0b101}; DeviceBitfield devicesInContextBitfield1010{0b1010}; DeviceBitfield devicesInContextBitfield1111{0b1111}; MemoryProperties memoryPropertiesToProcess[] = { memoryPropertiesRootDevice0, memoryPropertiesRootDevice0Tile0, memoryPropertiesRootDevice0Tile1, memoryPropertiesRootDevice1, memoryPropertiesRootDevice1Tile0, memoryPropertiesRootDevice1Tile1}; DeviceBitfield devicesInContextBitfields[] = {devicesInContextBitfield0001, devicesInContextBitfield0101, devicesInContextBitfield1010, devicesInContextBitfield1111}; uint32_t rootDevicesToProcess[] = {0, 1, 2}; EXPECT_EQ(0b1u, MemoryPropertiesHelper::adjustDeviceBitfield(0, memoryPropertiesRootDevice0Tile0, devicesInContextBitfield1111).to_ulong()); EXPECT_EQ(0b10u, MemoryPropertiesHelper::adjustDeviceBitfield(0, memoryPropertiesRootDevice0Tile1, devicesInContextBitfield1111).to_ulong()); EXPECT_EQ(0b1111u, MemoryPropertiesHelper::adjustDeviceBitfield(1, memoryPropertiesRootDevice0Tile0, devicesInContextBitfield1111).to_ulong()); EXPECT_EQ(0b1111u, MemoryPropertiesHelper::adjustDeviceBitfield(1, memoryPropertiesRootDevice0Tile1, devicesInContextBitfield1111).to_ulong()); EXPECT_EQ(0b101u, MemoryPropertiesHelper::adjustDeviceBitfield(0, memoryPropertiesRootDevice0, devicesInContextBitfield0101).to_ulong()); EXPECT_EQ(0b1010u, MemoryPropertiesHelper::adjustDeviceBitfield(0, memoryPropertiesRootDevice0, devicesInContextBitfield1010).to_ulong()); EXPECT_EQ(0b1111u, MemoryPropertiesHelper::adjustDeviceBitfield(0, memoryPropertiesRootDevice0, devicesInContextBitfield1111).to_ulong()); for (auto processedRootDevice : rootDevicesToProcess) { for (auto devicesInContextBitfield : devicesInContextBitfields) { for (auto &memoryProperties : memoryPropertiesToProcess) { auto expectedDeviceBitfield = devicesInContextBitfield; if (processedRootDevice == memoryProperties.pDevice->getRootDeviceIndex()) { expectedDeviceBitfield &= memoryProperties.pDevice->getDeviceBitfield(); } auto adjustedDeviceBitfield = MemoryPropertiesHelper::adjustDeviceBitfield( processedRootDevice, memoryProperties, devicesInContextBitfield); EXPECT_EQ(expectedDeviceBitfield, adjustedDeviceBitfield); } } } } TEST_F(MemoryPropertiesHelperTests, WhenCallingGetInitialPlacementThenCorrectValueIsReturned) { MemoryProperties memoryProperties{}; EXPECT_EQ(GraphicsAllocation::UsmInitialPlacement::CPU, MemoryPropertiesHelper::getUSMInitialPlacement(memoryProperties)); memoryProperties.allocFlags.usmInitialPlacementCpu = false; memoryProperties.allocFlags.usmInitialPlacementGpu = false; EXPECT_EQ(GraphicsAllocation::UsmInitialPlacement::CPU, MemoryPropertiesHelper::getUSMInitialPlacement(memoryProperties)); memoryProperties.allocFlags.usmInitialPlacementCpu = false; memoryProperties.allocFlags.usmInitialPlacementGpu = true; EXPECT_EQ(GraphicsAllocation::UsmInitialPlacement::GPU, MemoryPropertiesHelper::getUSMInitialPlacement(memoryProperties)); memoryProperties.allocFlags.usmInitialPlacementCpu = true; memoryProperties.allocFlags.usmInitialPlacementGpu = false; EXPECT_EQ(GraphicsAllocation::UsmInitialPlacement::CPU, MemoryPropertiesHelper::getUSMInitialPlacement(memoryProperties)); memoryProperties.allocFlags.usmInitialPlacementCpu = true; memoryProperties.allocFlags.usmInitialPlacementGpu = true; EXPECT_EQ(GraphicsAllocation::UsmInitialPlacement::CPU, MemoryPropertiesHelper::getUSMInitialPlacement(memoryProperties)); } TEST_F(MemoryPropertiesHelperTests, givenUsmInitialPlacementSetWhenCallingHasInitialPlacementCpuThenCorrectValueIsReturned) { DebugManagerStateRestore restorer; MemoryProperties memoryProperties{}; for (auto intialPlacement : {-1, 0, 1}) { DebugManager.flags.UsmInitialPlacement.set(intialPlacement); if (intialPlacement == 1) { EXPECT_EQ(GraphicsAllocation::UsmInitialPlacement::GPU, MemoryPropertiesHelper::getUSMInitialPlacement(memoryProperties)); } else { EXPECT_EQ(GraphicsAllocation::UsmInitialPlacement::CPU, MemoryPropertiesHelper::getUSMInitialPlacement(memoryProperties)); } } } TEST_F(MemoryPropertiesHelperTests, WhenCallingSetInitialPlacementThenCorrectValueIsSetInAllocationProperties) { AllocationProperties allocationProperties{mockRootDeviceIndex, 0, AllocationType::UNIFIED_SHARED_MEMORY, mockDeviceBitfield}; for (auto initialPlacement : {GraphicsAllocation::UsmInitialPlacement::CPU, GraphicsAllocation::UsmInitialPlacement::GPU}) { MemoryPropertiesHelper::setUSMInitialPlacement(allocationProperties, initialPlacement); EXPECT_EQ(initialPlacement, allocationProperties.usmInitialPlacement); } } compute-runtime-22.14.22890/opencl/test/unit_test/helpers/mipmap_tests.cpp000066400000000000000000000165631422164147700265150ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/mocks/mock_gmm.h" #include "opencl/source/helpers/mipmap.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_image.h" #include "gtest/gtest.h" using namespace NEO; constexpr size_t testOrigin[]{2, 3, 5, 7}; typedef ::testing::TestWithParam> MipLevelTest; TEST_P(MipLevelTest, givenMemObjectTypeThenProperMipLevelIsReturned) { auto pair = GetParam(); EXPECT_EQ(static_cast(pair.second), findMipLevel(pair.first, testOrigin)); } INSTANTIATE_TEST_CASE_P(MipLevel, MipLevelTest, ::testing::Values(std::make_pair(CL_MEM_OBJECT_IMAGE1D, testOrigin[1]), std::make_pair(CL_MEM_OBJECT_IMAGE1D_ARRAY, testOrigin[2]), std::make_pair(CL_MEM_OBJECT_IMAGE2D, testOrigin[2]), std::make_pair(CL_MEM_OBJECT_IMAGE2D_ARRAY, testOrigin[3]), std::make_pair(CL_MEM_OBJECT_IMAGE3D, testOrigin[3]), std::make_pair(CL_MEM_OBJECT_BUFFER, 0U), std::make_pair(CL_MEM_OBJECT_IMAGE1D_BUFFER, 0U), std::make_pair(CL_MEM_OBJECT_PIPE, 0U))); typedef ::testing::TestWithParam> MipLevelOriginIdxTest; TEST_P(MipLevelOriginIdxTest, givenMemObjectTypeWhenGettingMipLevelOriginIdxThenCorrectMipLevelIsReturned) { auto pair = GetParam(); EXPECT_EQ(static_cast(pair.second), getMipLevelOriginIdx(pair.first)); } INSTANTIATE_TEST_CASE_P(MipLevelOriginIdx, MipLevelOriginIdxTest, ::testing::Values(std::make_pair(CL_MEM_OBJECT_IMAGE1D, 1U), std::make_pair(CL_MEM_OBJECT_IMAGE1D_ARRAY, 2U), std::make_pair(CL_MEM_OBJECT_IMAGE2D, 2U), std::make_pair(CL_MEM_OBJECT_IMAGE2D_ARRAY, 3U), std::make_pair(CL_MEM_OBJECT_IMAGE3D, 3U), std::make_pair(CL_MEM_OBJECT_IMAGE1D_BUFFER, 0U), std::make_pair(CL_MEM_OBJECT_BUFFER, static_cast(-1)), std::make_pair(CL_MEM_OBJECT_PIPE, static_cast(-1)))); TEST(MipmapHelper, givenClImageDescWithoutMipLevelsWhenIsMipMappedIsCalledThenFalseIsReturned) { cl_image_desc desc = {}; desc.num_mip_levels = 0; EXPECT_FALSE(NEO::isMipMapped(desc)); desc.num_mip_levels = 1; EXPECT_FALSE(NEO::isMipMapped(desc)); } TEST(MipmapHelper, givenClImageDescWithMipLevelsWhenIsMipMappedIsCalledThenTrueIsReturned) { cl_image_desc desc = {}; desc.num_mip_levels = 2; EXPECT_TRUE(NEO::isMipMapped(desc)); } TEST(MipmapHelper, givenBufferWhenIsMipMappedIsCalledThenFalseIsReturned) { MockBuffer buffer; EXPECT_FALSE(NEO::isMipMapped(&buffer)); } struct MockImage : MockImageBase { MockImage() : MockImageBase() { surfaceFormatInfo.surfaceFormat.ImageElementSizeInBytes = 4u; } }; TEST(MipmapHelper, givenImageWithoutMipLevelsWhenIsMipMappedIsCalledThenFalseIsReturned) { MockImage image; image.imageDesc.num_mip_levels = 0; EXPECT_FALSE(NEO::isMipMapped(&image)); image.imageDesc.num_mip_levels = 1; EXPECT_FALSE(NEO::isMipMapped(&image)); } TEST(MipmapHelper, givenImageWithMipLevelsWhenIsMipMappedIsCalledThenTrueIsReturned) { MockImage image; image.imageDesc.num_mip_levels = 2; EXPECT_TRUE(NEO::isMipMapped(&image)); } TEST(MipmapHelper, givenImageWithoutMipLevelsWhenGetMipOffsetIsCalledThenZeroIsReturned) { MockImage image; image.imageDesc.num_mip_levels = 1; auto offset = getMipOffset(&image, testOrigin); EXPECT_EQ(0U, offset); } using myTuple = std::tuple, uint32_t, uint32_t>; using MipOffsetTest = ::testing::TestWithParam; TEST_P(MipOffsetTest, givenImageWithMipLevelsWhenGetMipOffsetIsCalledThenProperOffsetIsReturned) { std::array origin; uint32_t expectedOffset; cl_mem_object_type imageType; std::tie(origin, expectedOffset, imageType) = GetParam(); MockImage image; image.imageDesc.num_mip_levels = 16; image.imageDesc.image_type = imageType; image.imageDesc.image_width = 11; image.imageDesc.image_height = 13; image.imageDesc.image_depth = 17; auto offset = getMipOffset(&image, origin.data()); EXPECT_EQ(expectedOffset, offset); } constexpr myTuple testOrigins[]{myTuple({{2, 3, 5, 7}}, 812u, CL_MEM_OBJECT_IMAGE3D), myTuple({{2, 3, 5, 2}}, 592u, CL_MEM_OBJECT_IMAGE3D), myTuple({{2, 3, 5, 1}}, 572u, CL_MEM_OBJECT_IMAGE3D), myTuple({{2, 3, 5, 0}}, 0u, CL_MEM_OBJECT_IMAGE3D), myTuple({{2, 3, 5, 7}}, 812u, CL_MEM_OBJECT_IMAGE2D_ARRAY), myTuple({{2, 3, 5, 2}}, 592u, CL_MEM_OBJECT_IMAGE2D_ARRAY), myTuple({{2, 3, 5, 1}}, 572u, CL_MEM_OBJECT_IMAGE2D_ARRAY), myTuple({{2, 3, 5, 0}}, 0u, CL_MEM_OBJECT_IMAGE2D_ARRAY), myTuple({{2, 3, 5, 0}}, 724u, CL_MEM_OBJECT_IMAGE2D), myTuple({{2, 3, 2, 0}}, 592u, CL_MEM_OBJECT_IMAGE2D), myTuple({{2, 3, 1, 0}}, 572u, CL_MEM_OBJECT_IMAGE2D), myTuple({{2, 3, 0, 0}}, 0u, CL_MEM_OBJECT_IMAGE2D), myTuple({{2, 3, 5, 0}}, 724u, CL_MEM_OBJECT_IMAGE1D_ARRAY), myTuple({{2, 3, 2, 0}}, 592u, CL_MEM_OBJECT_IMAGE1D_ARRAY), myTuple({{2, 3, 1, 0}}, 572u, CL_MEM_OBJECT_IMAGE1D_ARRAY), myTuple({{2, 3, 0, 0}}, 0u, CL_MEM_OBJECT_IMAGE1D_ARRAY), myTuple({{2, 3, 0, 0}}, 56u, CL_MEM_OBJECT_IMAGE1D), myTuple({{2, 2, 0, 0}}, 52u, CL_MEM_OBJECT_IMAGE1D), myTuple({{2, 1, 0, 0}}, 44u, CL_MEM_OBJECT_IMAGE1D), myTuple({{2, 0, 0, 0}}, 0u, CL_MEM_OBJECT_IMAGE1D)}; INSTANTIATE_TEST_CASE_P(MipMapOffset, MipOffsetTest, ::testing::ValuesIn(testOrigins)); compute-runtime-22.14.22890/opencl/test/unit_test/helpers/per_thread_data_tests.cpp000066400000000000000000000270661422164147700303400ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/linear_stream.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/local_id_gen.h" #include "shared/source/helpers/per_thread_data.h" #include "shared/source/program/kernel_info.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "patch_shared.h" using namespace NEO; template struct PerThreadDataTests : public ClDeviceFixture, ::testing::Test { void SetUp() override { ClDeviceFixture::SetUp(); kernelInfo.setLocalIds({localIdX, localIdY, localIdZ}); kernelInfo.kernelDescriptor.kernelAttributes.flags.usesFlattenedLocalIds = flattenedId; kernelInfo.kernelDescriptor.kernelAttributes.flags.perThreadDataUnusedGrfIsPresent = !(localIdX || localIdY || localIdZ || flattenedId); numChannels = kernelInfo.kernelDescriptor.kernelAttributes.numLocalIdChannels; simd = 32; kernelInfo.kernelDescriptor.kernelAttributes.simdSize = simd; kernelInfo.heapInfo.pKernelHeap = kernelIsa; kernelInfo.heapInfo.KernelHeapSize = sizeof(kernelIsa); grfSize = 32; indirectHeapMemorySize = 4096; indirectHeapMemory = reinterpret_cast(alignedMalloc(indirectHeapMemorySize, 32)); ASSERT_TRUE(isAligned<32>(indirectHeapMemory)); } void TearDown() override { alignedFree(indirectHeapMemory); ClDeviceFixture::TearDown(); } const std::array workgroupWalkOrder = {{0, 1, 2}}; uint32_t simd; uint32_t grfSize; uint32_t numChannels; uint32_t kernelIsa[32]; uint8_t *indirectHeapMemory; size_t indirectHeapMemorySize; SKernelBinaryHeaderCommon kernelHeader; MockKernelInfo kernelInfo; }; typedef PerThreadDataTests<> PerThreadDataXYZTests; HWTEST_F(PerThreadDataXYZTests, WhenGettingLocalIdSizePerThreadThenCorrectValueIsReturned) { EXPECT_EQ(3 * 2 * grfSize, PerThreadDataHelper::getLocalIdSizePerThread(simd, grfSize, numChannels)); } HWTEST_F(PerThreadDataXYZTests, WhenGettingPerThreadDataSizeTotalThenCorrectValueIsReturned) { size_t localWorkSize = 256; EXPECT_EQ(256 * 3 * 2 * grfSize / 32, PerThreadDataHelper::getPerThreadDataSizeTotal(simd, grfSize, numChannels, localWorkSize)); } HWTEST_F(PerThreadDataXYZTests, Given256x1x1WhenSendingPerThreadDataThenCorrectAmountOfIndirectHeapIsConsumed) { MockGraphicsAllocation gfxAllocation(indirectHeapMemory, indirectHeapMemorySize); LinearStream indirectHeap(&gfxAllocation); const std::array localWorkSizes = {{256, 1, 1}}; size_t localWorkSize = localWorkSizes[0] * localWorkSizes[1] * localWorkSizes[2]; auto offsetPerThreadData = PerThreadDataHelper::sendPerThreadData( indirectHeap, simd, grfSize, numChannels, localWorkSizes, workgroupWalkOrder, false); auto expectedPerThreadDataSizeTotal = PerThreadDataHelper::getPerThreadDataSizeTotal(simd, grfSize, numChannels, localWorkSize); size_t sizeConsumed = indirectHeap.getUsed() - offsetPerThreadData; EXPECT_EQ(expectedPerThreadDataSizeTotal, sizeConsumed); } HWTEST_F(PerThreadDataXYZTests, Given2x4x8WhenSendingPerThreadDataThenCorrectAmountOfIndirectHeapIsConsumed) { MockGraphicsAllocation gfxAllocation(indirectHeapMemory, indirectHeapMemorySize); LinearStream indirectHeap(&gfxAllocation); const std::array localWorkSizes = {{2, 4, 8}}; auto offsetPerThreadData = PerThreadDataHelper::sendPerThreadData( indirectHeap, simd, grfSize, numChannels, localWorkSizes, workgroupWalkOrder, false); size_t sizeConsumed = indirectHeap.getUsed() - offsetPerThreadData; EXPECT_EQ(64u * (3u * 2u * 4u * 8u) / 32u, sizeConsumed); } HWTEST_F(PerThreadDataXYZTests, GivenDifferentSimdWhenGettingThreadPayloadSizeThenCorrectSizeIsReturned) { kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 32; uint32_t size = PerThreadDataHelper::getThreadPayloadSize(kernelInfo.kernelDescriptor, grfSize); EXPECT_EQ(grfSize * 2u * 3u, size); kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 16; size = PerThreadDataHelper::getThreadPayloadSize(kernelInfo.kernelDescriptor, grfSize); EXPECT_EQ(grfSize * 3u, size); kernelInfo.kernelDescriptor.kernelAttributes.flags.perThreadDataHeaderIsPresent = true; size = PerThreadDataHelper::getThreadPayloadSize(kernelInfo.kernelDescriptor, grfSize); EXPECT_EQ(grfSize * 4u, size); kernelInfo.kernelDescriptor.kernelAttributes.flags.perThreadDataUnusedGrfIsPresent = true; size = PerThreadDataHelper::getThreadPayloadSize(kernelInfo.kernelDescriptor, grfSize); EXPECT_EQ(grfSize * 5u, size); } typedef PerThreadDataTests PerThreadDataNoIdsTests; HWTEST_F(PerThreadDataNoIdsTests, givenZeroChannelsWhenPassedToGetLocalIdSizePerThreadThenSizeOfOneGrfIsReturned) { EXPECT_EQ(32u, PerThreadDataHelper::getLocalIdSizePerThread(simd, grfSize, numChannels)); } HWTEST_F(PerThreadDataNoIdsTests, givenZeroChannelsAndHighWkgSizeWhenGetPerThreadDataSizeTotalIsCalledThenReturnedSizeContainsUnusedGrfPerEachThread) { size_t localWorkSize = 256u; auto threadCount = localWorkSize / simd; auto expectedSize = threadCount * grfSize; EXPECT_EQ(expectedSize, PerThreadDataHelper::getPerThreadDataSizeTotal(simd, grfSize, numChannels, localWorkSize)); } HWTEST_F(PerThreadDataNoIdsTests, GivenThreadPaylodDataWithoutLocalIdsWhenSendingPerThreadDataThenIndirectHeapMemoryIsNotConsumed) { uint8_t fillValue = 0xcc; memset(indirectHeapMemory, fillValue, indirectHeapMemorySize); MockGraphicsAllocation gfxAllocation(indirectHeapMemory, indirectHeapMemorySize); LinearStream indirectHeap(&gfxAllocation); const std::array localWorkSizes = {{256, 1, 1}}; auto offsetPerThreadData = PerThreadDataHelper::sendPerThreadData( indirectHeap, simd, grfSize, numChannels, localWorkSizes, workgroupWalkOrder, false); size_t sizeConsumed = indirectHeap.getUsed() - offsetPerThreadData; EXPECT_EQ(0u, sizeConsumed); size_t i = 0; while (i < indirectHeapMemorySize) { ASSERT_EQ(fillValue, indirectHeapMemory[i]) << "for index " << i; ++i; } } HWTEST_F(PerThreadDataNoIdsTests, GivenSimdWhenGettingThreadPayloadSizeThenCorrectValueIsReturned) { kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 32; uint32_t size = PerThreadDataHelper::getThreadPayloadSize(kernelInfo.kernelDescriptor, grfSize); EXPECT_EQ(grfSize, size); kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 16; size = PerThreadDataHelper::getThreadPayloadSize(kernelInfo.kernelDescriptor, grfSize); EXPECT_EQ(grfSize, size); kernelInfo.kernelDescriptor.kernelAttributes.flags.perThreadDataHeaderIsPresent = true; size = PerThreadDataHelper::getThreadPayloadSize(kernelInfo.kernelDescriptor, grfSize); EXPECT_EQ(grfSize * 2u, size); } typedef PerThreadDataTests PerThreadDataFlattenedIdsTests; HWTEST_F(PerThreadDataFlattenedIdsTests, GivenSimdWhenGettingThreadPayloadSizeThenCorrectValueIsReturned) { kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 32; uint32_t size = PerThreadDataHelper::getThreadPayloadSize(kernelInfo.kernelDescriptor, grfSize); EXPECT_EQ(grfSize * 2u, size); kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 16; size = PerThreadDataHelper::getThreadPayloadSize(kernelInfo.kernelDescriptor, grfSize); EXPECT_EQ(grfSize, size); kernelInfo.kernelDescriptor.kernelAttributes.flags.perThreadDataHeaderIsPresent = true; size = PerThreadDataHelper::getThreadPayloadSize(kernelInfo.kernelDescriptor, grfSize); EXPECT_EQ(grfSize * 2u, size); kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 32; size = PerThreadDataHelper::getThreadPayloadSize(kernelInfo.kernelDescriptor, grfSize); EXPECT_EQ(grfSize * 3u, size); } TEST(PerThreadDataTest, WhenSettingLocalIdsInPerThreadDataThenIdsAreSetInCorrectOrder) { uint32_t simd = 8; uint32_t grfSize = 32; uint32_t numChannels = 3; uint32_t localWorkSize = 24; const std::array localWorkSizes = {{24, 1, 1}}; const std::array workgroupWalkOrder = {{0, 1, 2}}; auto sizePerThreadDataTotal = PerThreadDataHelper::getPerThreadDataSizeTotal(simd, grfSize, numChannels, localWorkSize); auto sizeOverSizedBuffer = sizePerThreadDataTotal * 4; auto buffer = static_cast(alignedMalloc(sizeOverSizedBuffer, 16)); memset(buffer, 0, sizeOverSizedBuffer); // Setup reference filled with zeros auto reference = static_cast(alignedMalloc(sizePerThreadDataTotal, 16)); memset(reference, 0, sizePerThreadDataTotal); LinearStream stream(buffer, sizeOverSizedBuffer / 2); PerThreadDataHelper::sendPerThreadData( stream, simd, grfSize, numChannels, localWorkSizes, workgroupWalkOrder, false); // Check if buffer overrun happend, only first sizePerThreadDataTotal bytes can be overwriten, following should be same as reference. for (auto i = sizePerThreadDataTotal; i < sizeOverSizedBuffer; i += sizePerThreadDataTotal) { int result = memcmp(buffer + i, reference, sizePerThreadDataTotal); EXPECT_EQ(0, result); } alignedFree(buffer); alignedFree(reference); } TEST(PerThreadDataTest, givenSimdEqualOneWhenSettingLocalIdsInPerThreadDataThenIdsAreSetInCorrectOrder) { uint32_t simd = 1; uint32_t grfSize = 32; uint32_t numChannels = 3; uint32_t localWorkSize = 24; const std::array localWorkSizes = {{3, 4, 2}}; const std::array workgroupWalkOrder = {{0, 1, 2}}; auto sizePerThreadDataTotal = PerThreadDataHelper::getPerThreadDataSizeTotal(simd, grfSize, numChannels, localWorkSize); auto sizeOverSizedBuffer = sizePerThreadDataTotal * 4; auto buffer = static_cast(alignedMalloc(sizeOverSizedBuffer, 16)); memset(buffer, 0, sizeOverSizedBuffer); // Setup reference filled with zeros auto reference = static_cast(alignedMalloc(sizePerThreadDataTotal, 16)); memset(reference, 0, sizePerThreadDataTotal); LinearStream stream(buffer, sizeOverSizedBuffer / 2); PerThreadDataHelper::sendPerThreadData( stream, simd, grfSize, numChannels, localWorkSizes, workgroupWalkOrder, false); auto bufferPtr = buffer; for (uint16_t i = 0; i < localWorkSizes[2]; i++) { for (uint16_t j = 0; j < localWorkSizes[1]; j++) { for (uint16_t k = 0; k < localWorkSizes[0]; k++) { uint16_t ids[] = {k, j, i}; int result = memcmp(bufferPtr, ids, sizeof(uint16_t) * 3); EXPECT_EQ(0, result); bufferPtr += grfSize; } } } // Check if buffer overrun happend, only first sizePerThreadDataTotal bytes can be overwriten, following should be same as reference. for (auto i = sizePerThreadDataTotal; i < sizeOverSizedBuffer; i += sizePerThreadDataTotal) { int result = memcmp(buffer + i, reference, sizePerThreadDataTotal); EXPECT_EQ(0, result); } alignedFree(buffer); alignedFree(reference); } compute-runtime-22.14.22890/opencl/test/unit_test/helpers/queue_helpers_tests.cpp000066400000000000000000000045111422164147700300660ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/queue_helpers.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "gtest/gtest.h" using namespace NEO; TEST(QueueHelpersTest, givenCommandQueueWithoutVirtualEventWhenReleaseQueueIsCalledThenCmdQInternalRefCountIsNotDecremented) { cl_int retVal = CL_SUCCESS; MockCommandQueue *cmdQ = new MockCommandQueue; EXPECT_EQ(1, cmdQ->getRefInternalCount()); EXPECT_EQ(1, cmdQ->getRefInternalCount()); cmdQ->incRefInternal(); EXPECT_EQ(2, cmdQ->getRefInternalCount()); releaseQueue(cmdQ, retVal); EXPECT_EQ(1, cmdQ->getRefInternalCount()); cmdQ->decRefInternal(); } TEST(QueueHelpersTest, givenPropertyListWithPropertyOfValueZeroWhenGettingPropertyValueThenCorrectValueIsReturned) { cl_queue_properties propertyName1 = 0xA; cl_queue_properties propertyName2 = 0xB; cl_queue_properties properties[] = {propertyName1, 0, propertyName2, 0, 0}; int testedPropertyValues[] = {-1, 0, 1}; for (auto property1Value : testedPropertyValues) { properties[1] = property1Value; for (auto property2Value : testedPropertyValues) { properties[3] = property2Value; EXPECT_EQ(property1Value, getCmdQueueProperties(properties, propertyName1)); EXPECT_EQ(property2Value, getCmdQueueProperties(properties, propertyName2)); } } } TEST(QueueHelpersTest, givenPropertiesWhenGettingPropertyValuesThenReturnCorrectFoundPropertyValue) { cl_queue_properties nonExistantProperty = 0xCC; cl_queue_properties properties[] = { 0xAA, 3, 0xBB, 0, 0}; bool foundProperty = false; EXPECT_EQ(properties[1], getCmdQueueProperties(properties, properties[0], &foundProperty)); EXPECT_TRUE(foundProperty); foundProperty = false; EXPECT_EQ(properties[3], getCmdQueueProperties(properties, properties[2], &foundProperty)); EXPECT_TRUE(foundProperty); foundProperty = false; EXPECT_EQ(0u, getCmdQueueProperties(properties, nonExistantProperty, &foundProperty)); EXPECT_FALSE(foundProperty); } compute-runtime-22.14.22890/opencl/test/unit_test/helpers/raii_hw_helper.h000066400000000000000000000013071422164147700264240ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/hw_helper.h" namespace NEO { extern HwHelper *hwHelperFactory[IGFX_MAX_CORE]; template class RAIIHwHelperFactory { public: GFXCORE_FAMILY gfxCoreFamily; HwHelper *hwHelper; MockHelper mockHwHelper; RAIIHwHelperFactory(GFXCORE_FAMILY gfxCoreFamily) { this->gfxCoreFamily = gfxCoreFamily; hwHelper = hwHelperFactory[this->gfxCoreFamily]; hwHelperFactory[this->gfxCoreFamily] = &mockHwHelper; } ~RAIIHwHelperFactory() { hwHelperFactory[this->gfxCoreFamily] = hwHelper; } }; } // namespace NEOcompute-runtime-22.14.22890/opencl/test/unit_test/helpers/ray_tracing_helper_tests.cpp000066400000000000000000000071701422164147700310650ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/constants.h" #include "shared/source/helpers/ray_tracing_helper.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_context.h" using namespace NEO; TEST(RayTracingHelperTests, whenGetMemoryBackedFifoSizeToPatchIsCalledCorrectValueIsReturned) { size_t fifoSize = RayTracingHelper::getMemoryBackedFifoSizeToPatch(); size_t expectedSize = RayTracingHelper::memoryBackedFifoSizePerDss == 0 ? 0 : Math::log2(RayTracingHelper::memoryBackedFifoSizePerDss / KB) - 1; EXPECT_EQ(expectedSize, fifoSize); } TEST(RayTracingHelperTests, whenMemoryBackedFifoSizeIsRequestedThenCorrectValueIsReturned) { MockDevice device; size_t size = RayTracingHelper::getTotalMemoryBackedFifoSize(device); size_t expectedSize = device.getHardwareInfo().gtSystemInfo.DualSubSliceCount * RayTracingHelper::memoryBackedFifoSizePerDss; EXPECT_EQ(expectedSize, size); } TEST(RayTracingHelperTests, whenGlobalDispatchSizeIsRequestedThenCorrectValueIsReturned) { MockClDevice device{new MockDevice}; MockContext context(&device); uint32_t maxBvhLevel = 2; uint32_t extraBytesPerThread = 20; uint32_t extraBytesGlobal = 100; size_t expectedSize = alignUp(RayTracingHelper::getRtGlobalsSize(), MemoryConstants::cacheLineSize) + alignUp((RayTracingHelper::hitInfoSize + RayTracingHelper::bvhStackSize * maxBvhLevel + extraBytesPerThread), MemoryConstants::cacheLineSize) * context.getDevice(0)->getHardwareInfo().gtSystemInfo.DualSubSliceCount * RayTracingHelper::stackDssMultiplier + extraBytesGlobal; EXPECT_EQ(expectedSize, RayTracingHelper::getDispatchGlobalSize(device.getDevice(), maxBvhLevel, extraBytesPerThread, extraBytesGlobal)); } TEST(RayTracingHelperTests, whenNumRtStacksPerDssIsRequestedThenCorrectValueIsReturned) { MockDevice device; uint32_t numDssRtStacks = RayTracingHelper::getNumRtStacksPerDss(device); uint32_t expectedValue = device.getHardwareInfo().gtSystemInfo.DualSubSliceCount ? static_cast(RayTracingHelper::getNumRtStacks(device) / device.getHardwareInfo().gtSystemInfo.DualSubSliceCount + 0.5) : RayTracingHelper::stackDssMultiplier; EXPECT_EQ(expectedValue, numDssRtStacks); } TEST(RayTracingHelperTests, whenNumDssIsRequestedThenCorrectValueIsReturned) { MockDevice device; EXPECT_EQ(device.getHardwareInfo().gtSystemInfo.DualSubSliceCount, RayTracingHelper::getNumDss(device)); } TEST(RayTracingHelperTests, whenStackSizePerRayIsRequestedThenCorrectValueIsReturned) { EXPECT_EQ(RayTracingHelper::hitInfoSize, RayTracingHelper::getStackSizePerRay(0, 0)); uint32_t maxBvhLevel = 1234; uint32_t extraBytesLocal = 5678; uint32_t expectedValue = alignUp((RayTracingHelper::hitInfoSize + RayTracingHelper::bvhStackSize * maxBvhLevel + extraBytesLocal), MemoryConstants::cacheLineSize); EXPECT_EQ(RayTracingHelper::getStackSizePerRay(maxBvhLevel, extraBytesLocal), expectedValue); } TEST(RayTracingHelperTests, whenGetMemoryBackedFifoSizeToPatchIsCalledThenCorrectValueIsReturned) { EXPECT_EQ(2u, RayTracingHelper::getMemoryBackedFifoSizeToPatch()); } compute-runtime-22.14.22890/opencl/test/unit_test/helpers/sampler_helpers_tests.cpp000066400000000000000000000015211422164147700304030ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/sampler_helpers.h" #include "gtest/gtest.h" TEST(SamplerHelpers, WhenGettingAddrModeEnumsThenCorrectValuesAreReturned) { EXPECT_EQ(CLK_ADDRESS_REPEAT, GetAddrModeEnum(CL_ADDRESS_REPEAT)); EXPECT_EQ(CLK_ADDRESS_CLAMP_TO_EDGE, GetAddrModeEnum(CL_ADDRESS_CLAMP_TO_EDGE)); EXPECT_EQ(CLK_ADDRESS_CLAMP, GetAddrModeEnum(CL_ADDRESS_CLAMP)); EXPECT_EQ(CLK_ADDRESS_NONE, GetAddrModeEnum(CL_ADDRESS_NONE)); EXPECT_EQ(CLK_ADDRESS_MIRRORED_REPEAT, GetAddrModeEnum(CL_ADDRESS_MIRRORED_REPEAT)); } TEST(SamplerHelpers, WhenGettingNormCoordsEnumsThenCorrectValuesAreReturned) { EXPECT_EQ(CLK_NORMALIZED_COORDS_TRUE, GetNormCoordsEnum(true)); EXPECT_EQ(CLK_NORMALIZED_COORDS_FALSE, GetNormCoordsEnum(false)); } compute-runtime-22.14.22890/opencl/test/unit_test/helpers/static_size3.h000066400000000000000000000004171422164147700260500ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once template struct StatickSize3 { operator const size_t *() { static const size_t v[] = {X, Y, Z}; return v; } }; compute-runtime-22.14.22890/opencl/test/unit_test/helpers/task_information_tests.cpp000066400000000000000000000605171422164147700305770ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/event/user_event.h" #include "opencl/source/helpers/task_information.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/dispatch_flags_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include #include using namespace NEO; TEST(CommandTest, GivenNoTerminateFlagWhenSubmittingMapUnmapThenCsrIsFlushed) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); std::unique_ptr cmdQ(new MockCommandQueue(nullptr, device.get(), nullptr, false)); MockCommandStreamReceiver csr(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield()); MockBuffer buffer; auto initialTaskCount = csr.peekTaskCount(); MemObjSizeArray size = {{1, 1, 1}}; MemObjOffsetArray offset = {{0, 0, 0}}; std::unique_ptr command(new CommandMapUnmap(MapOperationType::MAP, buffer, size, offset, false, *cmdQ)); CompletionStamp completionStamp = command->submit(20, false); auto expectedTaskCount = initialTaskCount + 1; EXPECT_EQ(expectedTaskCount, completionStamp.taskCount); } TEST(CommandTest, GivenTerminateFlagWhenSubmittingMapUnmapThenFlushIsAborted) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); std::unique_ptr cmdQ(new MockCommandQueue(nullptr, device.get(), nullptr, false)); MockCommandStreamReceiver csr(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield()); MockBuffer buffer; auto initialTaskCount = csr.peekTaskCount(); MemObjSizeArray size = {{1, 1, 1}}; MemObjOffsetArray offset = {{0, 0, 0}}; std::unique_ptr command(new CommandMapUnmap(MapOperationType::MAP, buffer, size, offset, false, *cmdQ)); CompletionStamp completionStamp = command->submit(20, true); auto submitTaskCount = csr.peekTaskCount(); EXPECT_EQ(initialTaskCount, submitTaskCount); auto expectedTaskCount = 0u; EXPECT_EQ(expectedTaskCount, completionStamp.taskCount); } TEST(CommandTest, GivenNoTerminateFlagWhenSubmittingMarkerThenCsrIsNotFlushed) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); std::unique_ptr cmdQ(new MockCommandQueue(nullptr, device.get(), nullptr, false)); MockCommandStreamReceiver csr(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield()); MockBuffer buffer; auto initialTaskCount = csr.peekTaskCount(); std::unique_ptr command(new CommandWithoutKernel(*cmdQ)); CompletionStamp completionStamp = command->submit(20, false); EXPECT_EQ(initialTaskCount, completionStamp.taskCount); EXPECT_EQ(initialTaskCount, csr.peekTaskCount()); } TEST(CommandTest, GivenTerminateFlagWhenSubmittingMarkerThenFlushIsAborted) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); std::unique_ptr cmdQ(new MockCommandQueue(nullptr, device.get(), nullptr, false)); MockCommandStreamReceiver csr(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield()); MockBuffer buffer; auto initialTaskCount = csr.peekTaskCount(); std::unique_ptr command(new CommandWithoutKernel(*cmdQ)); CompletionStamp completionStamp = command->submit(20, true); auto submitTaskCount = csr.peekTaskCount(); EXPECT_EQ(initialTaskCount, submitTaskCount); auto expectedTaskCount = 0u; EXPECT_EQ(expectedTaskCount, completionStamp.taskCount); } TEST(CommandTest, GivenGpuHangWhenSubmittingMapUnmapCommandsThenReturnedCompletionStampIndicatesGpuHang) { for (const auto operationType : {MapOperationType::MAP, MapOperationType::UNMAP}) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); std::unique_ptr cmdQ(new MockCommandQueue(nullptr, device.get(), nullptr, false)); cmdQ->waitUntilCompleteReturnValue = WaitStatus::GpuHang; MemObjSizeArray size = {{1, 1, 1}}; MemObjOffsetArray offset = {{0, 0, 0}}; MockBuffer buffer; buffer.isZeroCopy = false; buffer.callBaseTransferDataToHostPtr = false; buffer.callBaseTransferDataFromHostPtr = false; std::unique_ptr command(new CommandMapUnmap(operationType, buffer, size, offset, false, *cmdQ)); CompletionStamp completionStamp = command->submit(20, false); EXPECT_EQ(1, cmdQ->waitUntilCompleteCalledCount); EXPECT_EQ(CompletionStamp::gpuHang, completionStamp.taskCount); EXPECT_EQ(0, buffer.transferDataToHostPtrCalledCount); EXPECT_EQ(0, buffer.transferDataFromHostPtrCalledCount); } } TEST(CommandTest, GivenNoGpuHangWhenSubmittingMapUnmapCommandsThenReturnedCompletionStampDoesNotIndicateGpuHang) { constexpr size_t operationTypesCount{2}; constexpr static std::array operationTypes{MapOperationType::MAP, MapOperationType::UNMAP}; constexpr static std::array, operationTypesCount> expectedCallsCounts = { std::pair{1, 0}, std::pair{0, 1}}; for (auto i = 0u; i < operationTypesCount; ++i) { const auto operationType = operationTypes[i]; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); std::unique_ptr cmdQ(new MockCommandQueue(nullptr, device.get(), nullptr, false)); cmdQ->waitUntilCompleteReturnValue = WaitStatus::Ready; MemObjSizeArray size = {{1, 1, 1}}; MemObjOffsetArray offset = {{0, 0, 0}}; MockBuffer buffer; buffer.isZeroCopy = false; buffer.callBaseTransferDataToHostPtr = false; buffer.callBaseTransferDataFromHostPtr = false; std::unique_ptr command(new CommandMapUnmap(operationType, buffer, size, offset, false, *cmdQ)); CompletionStamp completionStamp = command->submit(20, false); EXPECT_EQ(1, cmdQ->waitUntilCompleteCalledCount); EXPECT_NE(CompletionStamp::gpuHang, completionStamp.taskCount); const auto &[expectedTransferDataToHostPtrCalledCount, expectedTransferDataFromHostPtrCalledCount] = expectedCallsCounts[i]; EXPECT_EQ(expectedTransferDataToHostPtrCalledCount, buffer.transferDataToHostPtrCalledCount); EXPECT_EQ(expectedTransferDataFromHostPtrCalledCount, buffer.transferDataFromHostPtrCalledCount); } } TEST(CommandTest, givenWaitlistRequestWhenCommandComputeKernelIsCreatedThenMakeLocalCopyOfWaitlist) { class MockCommandComputeKernel : public CommandComputeKernel { public: using CommandComputeKernel::eventsWaitlist; MockCommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr &kernelOperation, std::vector &surfaces, Kernel *kernel) : CommandComputeKernel(commandQueue, kernelOperation, surfaces, false, false, false, nullptr, PreemptionMode::Disabled, kernel, 0) {} }; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockCommandQueue cmdQ(nullptr, device.get(), nullptr, false); MockKernelWithInternals kernel(*device); IndirectHeap *ih1 = nullptr, *ih2 = nullptr, *ih3 = nullptr; cmdQ.allocateHeapMemory(IndirectHeap::Type::DYNAMIC_STATE, 1, ih1); cmdQ.allocateHeapMemory(IndirectHeap::Type::INDIRECT_OBJECT, 1, ih2); cmdQ.allocateHeapMemory(IndirectHeap::Type::SURFACE_STATE, 1, ih3); auto cmdStream = new LinearStream(device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 1, AllocationType::COMMAND_BUFFER, device->getDeviceBitfield()})); std::vector surfaces; auto kernelOperation = std::make_unique(cmdStream, *device->getDefaultEngine().commandStreamReceiver->getInternalAllocationStorage()); kernelOperation->setHeaps(ih1, ih2, ih3); UserEvent event1, event2, event3; cl_event waitlist[] = {&event1, &event2}; EventsRequest eventsRequest(2, waitlist, nullptr); MockCommandComputeKernel command(cmdQ, kernelOperation, surfaces, kernel); event1.incRefInternal(); event2.incRefInternal(); command.setEventsRequest(eventsRequest); waitlist[1] = &event3; EXPECT_EQ(static_cast(&event1), command.eventsWaitlist[0]); EXPECT_EQ(static_cast(&event2), command.eventsWaitlist[1]); } TEST(KernelOperationDestruction, givenKernelOperationWhenItIsDestructedThenAllAllocationsAreStoredInInternalStorageForReuse) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockCommandQueue cmdQ(nullptr, device.get(), nullptr, false); InternalAllocationStorage &allocationStorage = *device->getDefaultEngine().commandStreamReceiver->getInternalAllocationStorage(); auto &allocationsForReuse = allocationStorage.getAllocationsForReuse(); IndirectHeap *ih1 = nullptr, *ih2 = nullptr, *ih3 = nullptr; cmdQ.allocateHeapMemory(IndirectHeap::Type::DYNAMIC_STATE, 1, ih1); cmdQ.allocateHeapMemory(IndirectHeap::Type::INDIRECT_OBJECT, 1, ih2); cmdQ.allocateHeapMemory(IndirectHeap::Type::SURFACE_STATE, 1, ih3); auto cmdStream = new LinearStream(device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 1, AllocationType::COMMAND_BUFFER, device->getDeviceBitfield()})); auto &heapAllocation1 = *ih1->getGraphicsAllocation(); auto &heapAllocation2 = *ih2->getGraphicsAllocation(); auto &heapAllocation3 = *ih3->getGraphicsAllocation(); auto &cmdStreamAllocation = *cmdStream->getGraphicsAllocation(); auto kernelOperation = std::make_unique(cmdStream, allocationStorage); kernelOperation->setHeaps(ih1, ih2, ih3); EXPECT_TRUE(allocationsForReuse.peekIsEmpty()); kernelOperation.reset(); EXPECT_TRUE(allocationsForReuse.peekContains(cmdStreamAllocation)); EXPECT_TRUE(allocationsForReuse.peekContains(heapAllocation1)); EXPECT_TRUE(allocationsForReuse.peekContains(heapAllocation2)); EXPECT_TRUE(allocationsForReuse.peekContains(heapAllocation3)); } template class MockCsr1 : public CommandStreamReceiverHw { public: CompletionStamp flushTask(LinearStream &commandStream, size_t commandStreamStart, const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, uint32_t taskLevel, DispatchFlags &dispatchFlags, Device &device) override { passedDispatchFlags = dispatchFlags; return CompletionStamp(); } MockCsr1(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield) : CommandStreamReceiverHw::CommandStreamReceiverHw(executionEnvironment, rootDeviceIndex, deviceBitfield) {} DispatchFlags passedDispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); using CommandStreamReceiver::timestampPacketWriteEnabled; }; HWTEST_F(DispatchFlagsTests, givenCommandMapUnmapWhenSubmitThenPassCorrectDispatchFlags) { using CsrType = MockCsr1; SetUpImpl(); auto mockCmdQ = std::make_unique>(context.get(), device.get(), nullptr); auto mockCsr = static_cast(&mockCmdQ->getGpgpuCommandStreamReceiver()); MockBuffer buffer; MemObjSizeArray size = {{1, 1, 1}}; MemObjOffsetArray offset = {{0, 0, 0}}; std::unique_ptr command(new CommandMapUnmap(MapOperationType::MAP, buffer, size, offset, false, *mockCmdQ)); command->submit(20, false); PreemptionFlags flags = {}; PreemptionMode devicePreemption = mockCmdQ->getDevice().getPreemptionMode(); EXPECT_EQ(mockCmdQ->flushStamp->getStampReference(), mockCsr->passedDispatchFlags.flushStampReference); EXPECT_EQ(mockCmdQ->getThrottle(), mockCsr->passedDispatchFlags.throttle); EXPECT_EQ(PreemptionHelper::taskPreemptionMode(devicePreemption, flags), mockCsr->passedDispatchFlags.preemptionMode); EXPECT_EQ(GrfConfig::NotApplicable, mockCsr->passedDispatchFlags.numGrfRequired); EXPECT_EQ(L3CachingSettings::NotApplicable, mockCsr->passedDispatchFlags.l3CacheSettings); EXPECT_EQ(GrfConfig::NotApplicable, mockCsr->passedDispatchFlags.numGrfRequired); EXPECT_TRUE(mockCsr->passedDispatchFlags.blocking); EXPECT_TRUE(mockCsr->passedDispatchFlags.dcFlush); EXPECT_FALSE(mockCsr->passedDispatchFlags.useSLM); EXPECT_TRUE(mockCsr->passedDispatchFlags.guardCommandBufferWithPipeControl); EXPECT_FALSE(mockCsr->passedDispatchFlags.gsba32BitRequired); EXPECT_FALSE(mockCsr->passedDispatchFlags.requiresCoherency); EXPECT_EQ(mockCmdQ->getPriority() == QueuePriority::LOW, mockCsr->passedDispatchFlags.lowPriority); EXPECT_FALSE(mockCsr->passedDispatchFlags.implicitFlush); EXPECT_EQ(mockCmdQ->getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), mockCsr->passedDispatchFlags.outOfOrderExecutionAllowed); EXPECT_FALSE(mockCsr->passedDispatchFlags.epilogueRequired); } HWTEST_F(DispatchFlagsTests, givenCommandComputeKernelWhenSubmitThenPassCorrectDispatchFlags) { using CsrType = MockCsr1; SetUpImpl(); auto mockCmdQ = std::make_unique>(context.get(), device.get(), nullptr); auto mockCsr = static_cast(&mockCmdQ->getGpgpuCommandStreamReceiver()); IndirectHeap *ih1 = nullptr, *ih2 = nullptr, *ih3 = nullptr; mockCmdQ->allocateHeapMemory(IndirectHeap::Type::DYNAMIC_STATE, 1, ih1); mockCmdQ->allocateHeapMemory(IndirectHeap::Type::INDIRECT_OBJECT, 1, ih2); mockCmdQ->allocateHeapMemory(IndirectHeap::Type::SURFACE_STATE, 1, ih3); PreemptionMode preemptionMode = device->getPreemptionMode(); auto cmdStream = new LinearStream(device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 1, AllocationType::COMMAND_BUFFER, device->getDeviceBitfield()})); std::vector surfaces; auto kernelOperation = std::make_unique(cmdStream, *mockCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); MockKernelWithInternals kernel(*device); kernelOperation->setHeaps(ih1, ih2, ih3); bool flushDC = false; bool slmUsed = false; bool ndRangeKernel = false; bool requiresCoherency = false; for (auto &surface : surfaces) { requiresCoherency |= surface->IsCoherent; } std::unique_ptr command(new CommandComputeKernel(*mockCmdQ, kernelOperation, surfaces, flushDC, slmUsed, ndRangeKernel, nullptr, preemptionMode, kernel, 1)); command->submit(20, false); EXPECT_FALSE(mockCsr->passedDispatchFlags.pipelineSelectArgs.specialPipelineSelectMode); EXPECT_EQ(kernel.mockKernel->isVmeKernel(), mockCsr->passedDispatchFlags.pipelineSelectArgs.mediaSamplerRequired); EXPECT_EQ(mockCmdQ->flushStamp->getStampReference(), mockCsr->passedDispatchFlags.flushStampReference); EXPECT_EQ(mockCmdQ->getThrottle(), mockCsr->passedDispatchFlags.throttle); EXPECT_EQ(preemptionMode, mockCsr->passedDispatchFlags.preemptionMode); EXPECT_EQ(kernel.mockKernel->getKernelInfo().kernelDescriptor.kernelAttributes.numGrfRequired, mockCsr->passedDispatchFlags.numGrfRequired); EXPECT_EQ(L3CachingSettings::l3CacheOn, mockCsr->passedDispatchFlags.l3CacheSettings); EXPECT_TRUE(mockCsr->passedDispatchFlags.blocking); EXPECT_EQ(flushDC, mockCsr->passedDispatchFlags.dcFlush); EXPECT_EQ(slmUsed, mockCsr->passedDispatchFlags.useSLM); EXPECT_TRUE(mockCsr->passedDispatchFlags.guardCommandBufferWithPipeControl); EXPECT_EQ(ndRangeKernel, mockCsr->passedDispatchFlags.gsba32BitRequired); EXPECT_EQ(requiresCoherency, mockCsr->passedDispatchFlags.requiresCoherency); EXPECT_EQ(mockCmdQ->getPriority() == QueuePriority::LOW, mockCsr->passedDispatchFlags.lowPriority); EXPECT_FALSE(mockCsr->passedDispatchFlags.implicitFlush); EXPECT_EQ(mockCmdQ->getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), mockCsr->passedDispatchFlags.outOfOrderExecutionAllowed); EXPECT_FALSE(mockCsr->passedDispatchFlags.epilogueRequired); } HWTEST_F(DispatchFlagsTests, givenClCommandCopyImageWhenSubmitThenFlushTextureCacheHasProperValue) { using CsrType = MockCsr1; SetUpImpl(); auto mockCmdQ = std::make_unique>(context.get(), device.get(), nullptr); auto mockCsr = static_cast(&mockCmdQ->getGpgpuCommandStreamReceiver()); IndirectHeap *ih1 = nullptr, *ih2 = nullptr, *ih3 = nullptr; mockCmdQ->allocateHeapMemory(IndirectHeap::Type::DYNAMIC_STATE, 1, ih1); mockCmdQ->allocateHeapMemory(IndirectHeap::Type::INDIRECT_OBJECT, 1, ih2); mockCmdQ->allocateHeapMemory(IndirectHeap::Type::SURFACE_STATE, 1, ih3); PreemptionMode preemptionMode = device->getPreemptionMode(); auto cmdStream = new LinearStream(device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 1, AllocationType::COMMAND_BUFFER, device->getDeviceBitfield()})); std::vector surfaces; auto kernelOperation = std::make_unique(cmdStream, *mockCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); MockKernelWithInternals kernel(*device); kernelOperation->setHeaps(ih1, ih2, ih3); bool flushDC = false; bool slmUsed = false; uint32_t commandType = CL_COMMAND_COPY_IMAGE; bool requiresCoherency = false; for (auto &surface : surfaces) { requiresCoherency |= surface->IsCoherent; } std::unique_ptr command(new CommandComputeKernel(*mockCmdQ, kernelOperation, surfaces, flushDC, slmUsed, commandType, nullptr, preemptionMode, kernel, 1)); command->submit(20, false); EXPECT_FALSE(mockCsr->passedDispatchFlags.pipelineSelectArgs.specialPipelineSelectMode); EXPECT_EQ(kernel.mockKernel->isVmeKernel(), mockCsr->passedDispatchFlags.pipelineSelectArgs.mediaSamplerRequired); EXPECT_EQ(mockCmdQ->flushStamp->getStampReference(), mockCsr->passedDispatchFlags.flushStampReference); EXPECT_EQ(mockCmdQ->getThrottle(), mockCsr->passedDispatchFlags.throttle); EXPECT_EQ(preemptionMode, mockCsr->passedDispatchFlags.preemptionMode); EXPECT_EQ(kernel.mockKernel->getKernelInfo().kernelDescriptor.kernelAttributes.numGrfRequired, mockCsr->passedDispatchFlags.numGrfRequired); EXPECT_EQ(L3CachingSettings::l3CacheOn, mockCsr->passedDispatchFlags.l3CacheSettings); EXPECT_TRUE(mockCsr->passedDispatchFlags.blocking); EXPECT_EQ(flushDC, mockCsr->passedDispatchFlags.dcFlush); EXPECT_EQ(mockCmdQ->isTextureCacheFlushNeeded(commandType), mockCsr->passedDispatchFlags.textureCacheFlush); EXPECT_EQ(slmUsed, mockCsr->passedDispatchFlags.useSLM); EXPECT_TRUE(mockCsr->passedDispatchFlags.guardCommandBufferWithPipeControl); EXPECT_FALSE(mockCsr->passedDispatchFlags.gsba32BitRequired); EXPECT_EQ(requiresCoherency, mockCsr->passedDispatchFlags.requiresCoherency); EXPECT_EQ(mockCmdQ->getPriority() == QueuePriority::LOW, mockCsr->passedDispatchFlags.lowPriority); EXPECT_FALSE(mockCsr->passedDispatchFlags.implicitFlush); EXPECT_EQ(mockCmdQ->getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), mockCsr->passedDispatchFlags.outOfOrderExecutionAllowed); EXPECT_FALSE(mockCsr->passedDispatchFlags.epilogueRequired); } HWTEST_F(DispatchFlagsTests, givenCommandWithoutKernelWhenSubmitThenPassCorrectDispatchFlags) { using CsrType = MockCsr1; SetUpImpl(); auto mockCmdQ = std::make_unique>(context.get(), device.get(), nullptr); auto mockCsr = static_cast(&mockCmdQ->getGpgpuCommandStreamReceiver()); mockCsr->timestampPacketWriteEnabled = true; mockCmdQ->timestampPacketContainer = std::make_unique(); IndirectHeap *ih1 = nullptr, *ih2 = nullptr, *ih3 = nullptr; TimestampPacketDependencies timestampPacketDependencies; mockCmdQ->allocateHeapMemory(IndirectHeap::Type::DYNAMIC_STATE, 1, ih1); mockCmdQ->allocateHeapMemory(IndirectHeap::Type::INDIRECT_OBJECT, 1, ih2); mockCmdQ->allocateHeapMemory(IndirectHeap::Type::SURFACE_STATE, 1, ih3); auto cmdStream = new LinearStream(device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 1, AllocationType::COMMAND_BUFFER, device->getDeviceBitfield()})); auto kernelOperation = std::make_unique(cmdStream, *mockCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); kernelOperation->setHeaps(ih1, ih2, ih3); std::unique_ptr command(new CommandWithoutKernel(*mockCmdQ, kernelOperation)); command->setTimestampPacketNode(*mockCmdQ->timestampPacketContainer, std::move(timestampPacketDependencies)); command->submit(20, false); EXPECT_EQ(mockCmdQ->flushStamp->getStampReference(), mockCsr->passedDispatchFlags.flushStampReference); EXPECT_EQ(mockCmdQ->getThrottle(), mockCsr->passedDispatchFlags.throttle); EXPECT_EQ(mockCmdQ->getDevice().getPreemptionMode(), mockCsr->passedDispatchFlags.preemptionMode); EXPECT_EQ(GrfConfig::NotApplicable, mockCsr->passedDispatchFlags.numGrfRequired); EXPECT_EQ(L3CachingSettings::NotApplicable, mockCsr->passedDispatchFlags.l3CacheSettings); EXPECT_TRUE(mockCsr->passedDispatchFlags.blocking); EXPECT_FALSE(mockCsr->passedDispatchFlags.dcFlush); EXPECT_FALSE(mockCsr->passedDispatchFlags.useSLM); EXPECT_TRUE(mockCsr->passedDispatchFlags.guardCommandBufferWithPipeControl); EXPECT_FALSE(mockCsr->passedDispatchFlags.gsba32BitRequired); EXPECT_FALSE(mockCsr->passedDispatchFlags.requiresCoherency); EXPECT_EQ(mockCmdQ->getPriority() == QueuePriority::LOW, mockCsr->passedDispatchFlags.lowPriority); EXPECT_FALSE(mockCsr->passedDispatchFlags.implicitFlush); EXPECT_EQ(mockCmdQ->getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), mockCsr->passedDispatchFlags.outOfOrderExecutionAllowed); EXPECT_FALSE(mockCsr->passedDispatchFlags.epilogueRequired); } HWTEST_F(DispatchFlagsTests, givenCommandComputeKernelWhenSubmitThenPassCorrectDispatchHints) { using CsrType = MockCsr1; SetUpImpl(); auto mockCmdQ = std::make_unique>(context.get(), device.get(), nullptr); auto mockCsr = static_cast(&mockCmdQ->getGpgpuCommandStreamReceiver()); IndirectHeap *ih1 = nullptr, *ih2 = nullptr, *ih3 = nullptr; mockCmdQ->allocateHeapMemory(IndirectHeap::Type::DYNAMIC_STATE, 1, ih1); mockCmdQ->allocateHeapMemory(IndirectHeap::Type::INDIRECT_OBJECT, 1, ih2); mockCmdQ->allocateHeapMemory(IndirectHeap::Type::SURFACE_STATE, 1, ih3); mockCmdQ->dispatchHints = 1234; PreemptionMode preemptionMode = device->getPreemptionMode(); auto cmdStream = new LinearStream(device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 1, AllocationType::COMMAND_BUFFER, device->getDeviceBitfield()})); std::vector surfaces; auto kernelOperation = std::make_unique(cmdStream, *mockCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); MockKernelWithInternals kernel(*device); kernelOperation->setHeaps(ih1, ih2, ih3); bool flushDC = false; bool slmUsed = false; bool ndRangeKernel = false; bool requiresCoherency = false; for (auto &surface : surfaces) { requiresCoherency |= surface->IsCoherent; } std::unique_ptr command(new CommandComputeKernel(*mockCmdQ, kernelOperation, surfaces, flushDC, slmUsed, ndRangeKernel, nullptr, preemptionMode, kernel, 1)); command->submit(20, false); EXPECT_TRUE(mockCsr->passedDispatchFlags.epilogueRequired); EXPECT_EQ(1234u, mockCsr->passedDispatchFlags.engineHints); EXPECT_EQ(kernel.mockKernel->getThreadArbitrationPolicy(), mockCsr->passedDispatchFlags.threadArbitrationPolicy); } compute-runtime-22.14.22890/opencl/test/unit_test/helpers/test_preamble_dg2_and_later.cpp000066400000000000000000000063361422164147700314000ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/stream_properties.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/preamble/preamble_fixture.h" #include "opencl/source/helpers/hardware_commands_helper.h" using namespace NEO; using PreambleCfeStateDg2AndLater = PreambleFixture; using IsDG2AndLater = IsAtLeastXeHpgCore; HWTEST2_F(PreambleCfeStateDg2AndLater, whenprogramVFEStateIsCalledWithProperAdditionalKernelExecInfoThenProperStateIsSet, IsDG2AndLater) { using CFE_STATE = typename FamilyType::CFE_STATE; HardwareInfo hwInfo = *defaultHwInfo; const auto &hwInfoConfig = *HwInfoConfig::get(productFamily); hwInfo.platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_B, hwInfo); if (!hwInfoConfig.isDisableOverdispatchAvailable(hwInfo)) { GTEST_SKIP(); } auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, hwInfo, EngineGroupType::RenderCompute); StreamProperties properties{}; properties.frontEndState.disableOverdispatch.value = 1; PreambleHelper::programVfeState(pVfeCmd, hwInfo, 0u, 0, 0, properties); parseCommands(linearStream); auto cfeStateIt = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), cfeStateIt); auto cfeState = reinterpret_cast(*cfeStateIt); EXPECT_TRUE(cfeState->getComputeOverdispatchDisable()); cmdList.clear(); pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, hwInfo, EngineGroupType::RenderCompute); properties.frontEndState.disableOverdispatch.value = 0; PreambleHelper::programVfeState(pVfeCmd, hwInfo, 0u, 0, 0, properties); parseCommands(linearStream); cfeStateIt = find(cmdList.begin(), cmdList.end()); cfeStateIt++; ASSERT_NE(cmdList.end(), cfeStateIt); cfeState = reinterpret_cast(*cfeStateIt); EXPECT_FALSE(cfeState->getComputeOverdispatchDisable()); } HWTEST2_F(PreambleCfeStateDg2AndLater, givenSetDebugFlagWhenPreambleCfeStateIsProgrammedThenCFEStateParamsHaveSetValue, IsDG2AndLater) { using CFE_STATE = typename FamilyType::CFE_STATE; uint32_t expectedValue1 = 1u; DebugManagerStateRestore dbgRestore; DebugManager.flags.CFEComputeOverdispatchDisable.set(expectedValue1); uint64_t expectedAddress = 1 << CFE_STATE::SCRATCHSPACEBUFFER_BIT_SHIFT; auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, *defaultHwInfo, EngineGroupType::RenderCompute); StreamProperties emptyProperties{}; PreambleHelper::programVfeState(pVfeCmd, *defaultHwInfo, 0u, expectedAddress, 16u, emptyProperties); parseCommands(linearStream); auto cfeStateIt = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), cfeStateIt); auto cfeState = reinterpret_cast(*cfeStateIt); EXPECT_EQ(expectedValue1, cfeState->getComputeOverdispatchDisable()); } compute-runtime-22.14.22890/opencl/test/unit_test/helpers/test_preamble_xehp_and_later.cpp000066400000000000000000000720701422164147700316660ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/stream_properties.h" #include "shared/source/gmm_helper/client_context/gmm_client_context.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/state_base_address.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/preamble/preamble_fixture.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "reg_configs_common.h" #include "test_traits_common.h" using namespace NEO; using ThreadArbitrationXeHPAndLater = PreambleFixture; using Platforms = IsWithinGfxCore; HWTEST2_F(ThreadArbitrationXeHPAndLater, whenGetDefaultThreadArbitrationPolicyIsCalledThenCorrectPolicyIsReturned, Platforms) { EXPECT_EQ(ThreadArbitrationPolicy::AgeBased, HwHelperHw::get().getDefaultThreadArbitrationPolicy()); } using ProgramPipelineXeHPAndLater = PreambleFixture; HWCMDTEST_F(IGFX_XE_HP_CORE, ProgramPipelineXeHPAndLater, whenCleanStateInPreambleIsSetAndProgramPipelineSelectIsCalledThenExtraPipelineSelectAndTwoExtraPipeControlsAdded) { typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT; typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; DebugManagerStateRestore stateRestore; DebugManager.flags.CleanStateInPreamble.set(true); LinearStream &cs = linearStream; PipelineSelectArgs pipelineArgs; auto hwInfo = pDevice->getHardwareInfo(); PreambleHelper::programPipelineSelect(&cs, pipelineArgs, hwInfo); parseCommands(cs, 0); auto numPipeControl = getCommandsList().size(); EXPECT_EQ(2u, numPipeControl); auto numPipelineSelect = getCommandsList().size(); EXPECT_EQ(2u, numPipelineSelect); } HWCMDTEST_F(IGFX_XE_HP_CORE, ProgramPipelineXeHPAndLater, givenDebugVariableWhenProgramPipelineSelectIsCalledThenItHasProperFieldsSet) { typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT; DebugManagerStateRestore stateRestore; DebugManager.flags.OverrideSystolicPipelineSelect.set(1); LinearStream &cs = linearStream; PipelineSelectArgs pipelineArgs; auto hwInfo = pDevice->getHardwareInfo(); PreambleHelper::programPipelineSelect(&cs, pipelineArgs, hwInfo); parseCommands(linearStream); auto itorCmd = find(cmdList.begin(), cmdList.end()); ASSERT_NE(itorCmd, cmdList.end()); auto cmd = genCmdCast(*itorCmd); EXPECT_TRUE(cmd->getSystolicModeEnable()); } using PreemptionWatermarkXeHPAndLater = PreambleFixture; HWCMDTEST_F(IGFX_XE_HP_CORE, PreemptionWatermarkXeHPAndLater, givenPreambleThenPreambleWorkAroundsIsNotProgrammed) { PreambleHelper::programGenSpecificPreambleWorkArounds(&linearStream, *defaultHwInfo); parseCommands(linearStream); auto cmd = findMmioCmd(cmdList.begin(), cmdList.end(), FfSliceCsChknReg2::address); ASSERT_EQ(nullptr, cmd); MockDevice mockDevice; mockDevice.setDebuggerActive(false); size_t expectedSize = PreemptionHelper::getRequiredPreambleSize(mockDevice); EXPECT_EQ(expectedSize, PreambleHelper::getAdditionalCommandsSize(mockDevice)); mockDevice.setDebuggerActive(true); expectedSize += PreambleHelper::getKernelDebuggingCommandsSize(mockDevice.isDebuggerActive()); EXPECT_EQ(expectedSize, PreambleHelper::getAdditionalCommandsSize(mockDevice)); } struct KernelCommandsXeHPAndLater : public PreambleVfeState { void SetUp() override { PreambleVfeState::SetUp(); pDevice->incRefInternal(); pClDevice = new MockClDevice{pDevice}; ASSERT_NE(nullptr, pClDevice); program = std::make_unique(toClDeviceVector(*pClDevice)); } void TearDown() override { pClDevice->decRefInternal(); PreambleVfeState::TearDown(); } MockClDevice *pClDevice = nullptr; std::unique_ptr program; KernelInfo kernelInfo; }; HWCMDTEST_F(IGFX_XE_HP_CORE, KernelCommandsXeHPAndLater, whenKernelSizeIsRequiredThenReturnZero) { MockKernel kernel(program.get(), kernelInfo, *pClDevice); size_t expectedSize = 0; size_t actualSize = HardwareCommandsHelper::getSizeRequiredCS(); EXPECT_EQ(expectedSize, actualSize); } HWCMDTEST_F(IGFX_XE_HP_CORE, KernelCommandsXeHPAndLater, whenPipeControlForWaIsRequiredThenReturnFalse) { auto &hwInfo = pDevice->getHardwareInfo(); EXPECT_EQ(UnitTestHelper::isPipeControlWArequired(hwInfo), MemorySynchronizationCommands::isPipeControlWArequired(hwInfo)); } HWCMDTEST_F(IGFX_XE_HP_CORE, KernelCommandsXeHPAndLater, whenMediaInterfaceDescriptorLoadIsRequiredThenDoNotProgramNonExistingCommand) { size_t expectedSize = 0; EXPECT_EQ(expectedSize, linearStream.getUsed()); HardwareCommandsHelper::sendMediaInterfaceDescriptorLoad(linearStream, 0, 0); EXPECT_EQ(expectedSize, linearStream.getUsed()); } HWCMDTEST_F(IGFX_XE_HP_CORE, KernelCommandsXeHPAndLater, whenMediaStateFlushIsRequiredThenDoNotProgramNonExistingCommand) { size_t expectedSize = 0; EXPECT_EQ(expectedSize, linearStream.getUsed()); HardwareCommandsHelper::sendMediaStateFlush(linearStream, 0); EXPECT_EQ(expectedSize, linearStream.getUsed()); } using PreambleCfeStateXeHPAndLater = PreambleFixture; HWCMDTEST_F(IGFX_XE_HP_CORE, PreambleCfeStateXeHPAndLater, givenScratchEnabledWhenPreambleCfeStateIsProgrammedThenCheckMaxThreadsAddressFieldsAreProgrammed) { using CFE_STATE = typename FamilyType::CFE_STATE; uint64_t expectedAddress = 1 << CFE_STATE::SCRATCHSPACEBUFFER_BIT_SHIFT; uint32_t expectedMaxThreads = HwHelper::getMaxThreadsForVfe(*defaultHwInfo); auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, *defaultHwInfo, EngineGroupType::RenderCompute); StreamProperties emptyProperties{}; PreambleHelper::programVfeState(pVfeCmd, *defaultHwInfo, 0u, expectedAddress, expectedMaxThreads, emptyProperties); parseCommands(linearStream); auto cfeStateIt = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), cfeStateIt); auto cfeState = reinterpret_cast(*cfeStateIt); EXPECT_EQ(expectedMaxThreads, cfeState->getMaximumNumberOfThreads()); uint64_t address = cfeState->getScratchSpaceBuffer(); EXPECT_EQ(expectedAddress, address); } HWTEST2_F(PreambleCfeStateXeHPAndLater, givenNotSetDebugFlagWhenPreambleCfeStateIsProgrammedThenCFEStateParamsHaveNotSetValue, IsAtLeastXeHpCore) { using CFE_STATE = typename FamilyType::CFE_STATE; auto cfeState = reinterpret_cast(linearStream.getSpace(sizeof(CFE_STATE))); *cfeState = FamilyType::cmdInitCfeState; [[maybe_unused]] uint32_t numberOfWalkers = 0u; [[maybe_unused]] uint32_t fusedEuDispach = 0u; if constexpr (TestTraits::numberOfWalkersInCfeStateSupported) { numberOfWalkers = cfeState->getNumberOfWalkers(); } if constexpr (TestTraits::fusedEuDispatchSupported) { fusedEuDispach = cfeState->getFusedEuDispatch(); } uint32_t overDispatchControl = static_cast(cfeState->getOverDispatchControl()); uint64_t expectedAddress = 1 << CFE_STATE::SCRATCHSPACEBUFFER_BIT_SHIFT; uint32_t expectedMaxThreads = HwHelper::getMaxThreadsForVfe(*defaultHwInfo); auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, *defaultHwInfo, EngineGroupType::RenderCompute); StreamProperties emptyProperties{}; PreambleHelper::programVfeState(pVfeCmd, *defaultHwInfo, 0u, expectedAddress, expectedMaxThreads, emptyProperties); uint32_t maximumNumberOfThreads = cfeState->getMaximumNumberOfThreads(); if constexpr (TestTraits::numberOfWalkersInCfeStateSupported) { EXPECT_EQ(numberOfWalkers, cfeState->getNumberOfWalkers()); } if constexpr (TestTraits::fusedEuDispatchSupported) { EXPECT_EQ(fusedEuDispach, cfeState->getFusedEuDispatch()); } EXPECT_NE(expectedMaxThreads, maximumNumberOfThreads); EXPECT_EQ(overDispatchControl, static_cast(cfeState->getOverDispatchControl())); } HWTEST2_F(PreambleCfeStateXeHPAndLater, givenSetDebugFlagWhenPreambleCfeStateIsProgrammedThenCFEStateParamsHaveSetValue, IsAtLeastXeHpCore) { using CFE_STATE = typename FamilyType::CFE_STATE; uint32_t expectedValue1 = 1u; uint32_t expectedValue2 = 2u; DebugManagerStateRestore dbgRestore; DebugManager.flags.CFEFusedEUDispatch.set(expectedValue1); DebugManager.flags.CFEOverDispatchControl.set(expectedValue1); DebugManager.flags.CFESingleSliceDispatchCCSMode.set(expectedValue1); DebugManager.flags.CFELargeGRFThreadAdjustDisable.set(expectedValue1); DebugManager.flags.CFENumberOfWalkers.set(expectedValue2); DebugManager.flags.CFEMaximumNumberOfThreads.set(expectedValue2); uint64_t expectedAddress = 1 << CFE_STATE::SCRATCHSPACEBUFFER_BIT_SHIFT; auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, *defaultHwInfo, EngineGroupType::RenderCompute); StreamProperties emptyProperties{}; PreambleHelper::programVfeState(pVfeCmd, *defaultHwInfo, 0u, expectedAddress, 16u, emptyProperties); parseCommands(linearStream); auto cfeStateIt = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), cfeStateIt); auto cfeState = reinterpret_cast(*cfeStateIt); EXPECT_EQ(expectedValue1, static_cast(cfeState->getOverDispatchControl())); EXPECT_EQ(expectedValue1, cfeState->getLargeGRFThreadAdjustDisable()); if constexpr (TestTraits::numberOfWalkersInCfeStateSupported) { EXPECT_EQ(expectedValue2, cfeState->getNumberOfWalkers()); } EXPECT_EQ(expectedValue2, cfeState->getMaximumNumberOfThreads()); } using XeHpCommandStreamReceiverFlushTaskTests = UltCommandStreamReceiverTest; HWCMDTEST_F(IGFX_XE_HP_CORE, XeHpCommandStreamReceiverFlushTaskTests, whenFlushingCommandStreamReceiverThenExpectStateBaseAddressEqualsIndirectObjectBaseAddress) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); flushTask(commandStreamReceiver); HardwareParse hwParserCsr; hwParserCsr.parseCommands(commandStreamReceiver.commandStream, 0); hwParserCsr.findHardwareCommands(); ASSERT_NE(nullptr, hwParserCsr.cmdStateBaseAddress); auto stateBaseAddress = static_cast(hwParserCsr.cmdStateBaseAddress); if constexpr (is64bit) { EXPECT_EQ(commandStreamReceiver.getMemoryManager()->getInternalHeapBaseAddress(commandStreamReceiver.rootDeviceIndex, ioh.getGraphicsAllocation()->isAllocatedInLocalMemoryPool()), stateBaseAddress->getGeneralStateBaseAddress()); } else { EXPECT_EQ(0u, stateBaseAddress->getGeneralStateBaseAddress()); } } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHpCommandStreamReceiverFlushTaskTests, whenFlushCalledThenStateBaseAddressHasAllCachesOn) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto gmmHelper = pDevice->getRootDeviceEnvironment().getGmmHelper(); flushTask(commandStreamReceiver); HardwareParse hwParserCsr; hwParserCsr.parseCommands(commandStreamReceiver.commandStream, 0); hwParserCsr.findHardwareCommands(); ASSERT_NE(nullptr, hwParserCsr.cmdStateBaseAddress); auto stateBaseAddress = static_cast(hwParserCsr.cmdStateBaseAddress); auto expectedMocsForStateless = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST); auto expectedMocsForHeap = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_STATE_HEAP_BUFFER); EXPECT_EQ(expectedMocsForHeap, stateBaseAddress->getSurfaceStateMemoryObjectControlState()); EXPECT_EQ(expectedMocsForHeap, stateBaseAddress->getDynamicStateMemoryObjectControlState()); EXPECT_EQ(expectedMocsForHeap, stateBaseAddress->getGeneralStateMemoryObjectControlState()); EXPECT_EQ(expectedMocsForHeap, stateBaseAddress->getInstructionMemoryObjectControlState()); EXPECT_EQ(expectedMocsForHeap, stateBaseAddress->getBindlessSurfaceStateMemoryObjectControlState()); EXPECT_EQ(expectedMocsForHeap, stateBaseAddress->getBindlessSamplerStateMemoryObjectControlState()); EXPECT_EQ(expectedMocsForStateless, stateBaseAddress->getStatelessDataPortAccessMemoryObjectControlState()); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHpCommandStreamReceiverFlushTaskTests, whenFlushCalledThenStateBaseAddressHasAllCachesOffWhenDebugFlagIsPresent) { DebugManagerStateRestore restorer; DebugManager.flags.DisableCachingForHeaps.set(1); using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto gmmHelper = pDevice->getRootDeviceEnvironment().getGmmHelper(); flushTask(commandStreamReceiver); HardwareParse hwParserCsr; hwParserCsr.parseCommands(commandStreamReceiver.commandStream, 0); hwParserCsr.findHardwareCommands(); ASSERT_NE(nullptr, hwParserCsr.cmdStateBaseAddress); auto stateBaseAddress = static_cast(hwParserCsr.cmdStateBaseAddress); auto expectedMocsForHeap = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_SYSTEM_MEMORY_BUFFER_CACHELINE_MISALIGNED); EXPECT_EQ(expectedMocsForHeap, stateBaseAddress->getSurfaceStateMemoryObjectControlState()); EXPECT_EQ(expectedMocsForHeap, stateBaseAddress->getDynamicStateMemoryObjectControlState()); EXPECT_EQ(expectedMocsForHeap, stateBaseAddress->getGeneralStateMemoryObjectControlState()); EXPECT_EQ(expectedMocsForHeap, stateBaseAddress->getInstructionMemoryObjectControlState()); EXPECT_EQ(expectedMocsForHeap, stateBaseAddress->getBindlessSurfaceStateMemoryObjectControlState()); EXPECT_EQ(expectedMocsForHeap, stateBaseAddress->getBindlessSamplerStateMemoryObjectControlState()); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHpCommandStreamReceiverFlushTaskTests, givenL3ToL1DebugFlagWhenStatelessMocsIsProgrammedThenItHasL1CachingOn) { DebugManagerStateRestore restore; DebugManager.flags.ForceL1Caching.set(1u); using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); flushTask(commandStreamReceiver); HardwareParse hwParserCsr; hwParserCsr.parseCommands(commandStreamReceiver.commandStream, 0); hwParserCsr.findHardwareCommands(); ASSERT_NE(nullptr, hwParserCsr.cmdStateBaseAddress); auto stateBaseAddress = static_cast(hwParserCsr.cmdStateBaseAddress); auto expectedMocs = pDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST); EXPECT_EQ(expectedMocs, stateBaseAddress->getStatelessDataPortAccessMemoryObjectControlState()); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHpCommandStreamReceiverFlushTaskTests, givenForceL1CachingDebugFlagDisabledWhenStatelessMocsIsProgrammedThenItHasL3CachingOn) { DebugManagerStateRestore restore; DebugManager.flags.ForceL1Caching.set(0u); using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); flushTask(commandStreamReceiver); HardwareParse hwParserCsr; hwParserCsr.parseCommands(commandStreamReceiver.commandStream, 0); hwParserCsr.findHardwareCommands(); ASSERT_NE(nullptr, hwParserCsr.cmdStateBaseAddress); auto stateBaseAddress = static_cast(hwParserCsr.cmdStateBaseAddress); auto expectedMocs = pDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); EXPECT_EQ(expectedMocs, stateBaseAddress->getStatelessDataPortAccessMemoryObjectControlState()); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHpCommandStreamReceiverFlushTaskTests, whenFlushingCommandStreamReceiverThenExpectBindlessBaseAddressEqualSurfaceStateBaseAddress) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); flushTask(commandStreamReceiver); HardwareParse hwParserCsr; hwParserCsr.parseCommands(commandStreamReceiver.commandStream, 0); hwParserCsr.findHardwareCommands(); ASSERT_NE(nullptr, hwParserCsr.cmdStateBaseAddress); auto stateBaseAddress = static_cast(hwParserCsr.cmdStateBaseAddress); auto surfaceStateBaseAddress = ssh.getHeapGpuBase(); EXPECT_EQ(surfaceStateBaseAddress, stateBaseAddress->getBindlessSurfaceStateBaseAddress()); EXPECT_EQ(surfaceStateBaseAddress, stateBaseAddress->getSurfaceStateBaseAddress()); uint32_t bindlessSurfaceSize = static_cast(ssh.getMaxAvailableSpace() / sizeof(RENDER_SURFACE_STATE)) - 1; EXPECT_EQ(bindlessSurfaceSize, stateBaseAddress->getBindlessSurfaceStateSize()); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHpCommandStreamReceiverFlushTaskTests, whenFlushingCommandStreamReceiverThenSetBindlessSamplerStateBaseAddressModifyEnable) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); flushTask(commandStreamReceiver); HardwareParse hwParserCsr; hwParserCsr.parseCommands(commandStreamReceiver.commandStream, 0); hwParserCsr.findHardwareCommands(); ASSERT_NE(nullptr, hwParserCsr.cmdStateBaseAddress); auto stateBaseAddress = static_cast(hwParserCsr.cmdStateBaseAddress); EXPECT_TRUE(stateBaseAddress->getBindlessSamplerStateBaseAddressModifyEnable()); EXPECT_EQ(0u, stateBaseAddress->getBindlessSamplerStateBaseAddress()); EXPECT_EQ(0u, stateBaseAddress->getBindlessSamplerStateBufferSize()); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHpCommandStreamReceiverFlushTaskTests, givenMultEngineQueueFalseWhenFlushingCommandStreamReceiverThenSetPartialWriteFieldsTrue) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); flushTask(commandStreamReceiver); HardwareParse hwParserCsr; hwParserCsr.parseCommands(commandStreamReceiver.commandStream, 0); hwParserCsr.findHardwareCommands(); ASSERT_NE(nullptr, hwParserCsr.cmdStateBaseAddress); auto stateBaseAddress = static_cast(hwParserCsr.cmdStateBaseAddress); EXPECT_TRUE(stateBaseAddress->getDisableSupportForMultiGpuAtomicsForStatelessAccesses()); EXPECT_TRUE(stateBaseAddress->getDisableSupportForMultiGpuPartialWritesForStatelessMessages()); } struct MultiGpuGlobalAtomicsTest : public XeHpCommandStreamReceiverFlushTaskTests, public ::testing::WithParamInterface> { }; HWCMDTEST_P(IGFX_XE_HP_CORE, MultiGpuGlobalAtomicsTest, givenFlushingCommandStreamReceiverThenDisableSupportForMultiGpuAtomicsForStatelessAccessesIsSetCorrectly) { bool isMultiOsContextCapable, useGlobalAtomics, areMultipleSubDevicesInContext, enableMultiGpuAtomicsOptimization; std::tie(isMultiOsContextCapable, useGlobalAtomics, areMultipleSubDevicesInContext, enableMultiGpuAtomicsOptimization) = GetParam(); DebugManagerStateRestore stateRestore; DebugManager.flags.EnableMultiGpuAtomicsOptimization.set(enableMultiGpuAtomicsOptimization); using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.multiOsContextCapable = isMultiOsContextCapable; flushTaskFlags.useGlobalAtomics = useGlobalAtomics; flushTaskFlags.areMultipleSubDevicesInContext = areMultipleSubDevicesInContext; flushTask(commandStreamReceiver, false, 0, false, false); HardwareParse hwParserCsr; hwParserCsr.parseCommands(commandStreamReceiver.commandStream, 0); hwParserCsr.findHardwareCommands(); ASSERT_NE(nullptr, hwParserCsr.cmdStateBaseAddress); auto stateBaseAddress = static_cast(hwParserCsr.cmdStateBaseAddress); auto enabled = isMultiOsContextCapable; if (enableMultiGpuAtomicsOptimization) { enabled = useGlobalAtomics && (enabled || areMultipleSubDevicesInContext); } EXPECT_EQ(!enabled, stateBaseAddress->getDisableSupportForMultiGpuAtomicsForStatelessAccesses()); } INSTANTIATE_TEST_CASE_P(MultiGpuGlobalAtomics, MultiGpuGlobalAtomicsTest, ::testing::Combine( ::testing::Bool(), ::testing::Bool(), ::testing::Bool(), ::testing::Bool())); HWCMDTEST_F(IGFX_XE_HP_CORE, XeHpCommandStreamReceiverFlushTaskTests, givenDebugKeysThatOverrideMultiGpuSettingWhenStateBaseAddressIsProgrammedThenValuesMatch) { DebugManagerStateRestore restorer; using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); DebugManager.flags.ForceMultiGpuAtomics.set(0); DebugManager.flags.ForceMultiGpuPartialWrites.set(0); flushTask(commandStreamReceiver); HardwareParse hwParserCsr; hwParserCsr.parseCommands(commandStreamReceiver.commandStream, 0); hwParserCsr.findHardwareCommands(); ASSERT_NE(nullptr, hwParserCsr.cmdStateBaseAddress); auto stateBaseAddress = static_cast(hwParserCsr.cmdStateBaseAddress); EXPECT_EQ(0u, stateBaseAddress->getDisableSupportForMultiGpuAtomicsForStatelessAccesses()); EXPECT_EQ(0u, stateBaseAddress->getDisableSupportForMultiGpuPartialWritesForStatelessMessages()); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHpCommandStreamReceiverFlushTaskTests, givenMultEngineQueueTrueWhenFlushingCommandStreamReceiverThenSetPartialWriteFieldsFalse) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.multiOsContextCapable = true; flushTask(commandStreamReceiver); HardwareParse hwParserCsr; hwParserCsr.parseCommands(commandStreamReceiver.commandStream, 0); hwParserCsr.findHardwareCommands(); ASSERT_NE(nullptr, hwParserCsr.cmdStateBaseAddress); auto stateBaseAddress = static_cast(hwParserCsr.cmdStateBaseAddress); EXPECT_TRUE(stateBaseAddress->getDisableSupportForMultiGpuAtomicsForStatelessAccesses()); EXPECT_FALSE(stateBaseAddress->getDisableSupportForMultiGpuPartialWritesForStatelessMessages()); } using StateBaseAddressXeHPAndLaterTests = XeHpCommandStreamReceiverFlushTaskTests; struct CompressionParamsSupportedMatcher { template static constexpr bool isMatched() { if constexpr (HwMapper::GfxProduct::supportsCmdSet(IGFX_XE_HP_CORE)) { return TestTraits::get()>::surfaceStateCompressionParamsSupported; } return false; } }; HWTEST2_F(StateBaseAddressXeHPAndLaterTests, givenMemoryCompressionEnabledWhenAppendingSbaThenEnableStatelessCompressionForAllStatelessAccesses, CompressionParamsSupportedMatcher) { auto memoryManager = pDevice->getExecutionEnvironment()->memoryManager.get(); AllocationProperties properties(pDevice->getRootDeviceIndex(), 1, AllocationType::BUFFER, pDevice->getDeviceBitfield()); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties); IndirectHeap indirectHeap(allocation, 1); for (auto memoryCompressionState : {MemoryCompressionState::NotApplicable, MemoryCompressionState::Disabled, MemoryCompressionState::Enabled}) { auto sbaCmd = FamilyType::cmdInitStateBaseAddress; StateBaseAddressHelper::appendStateBaseAddressParameters(&sbaCmd, &indirectHeap, true, 0, pDevice->getRootDeviceEnvironment().getGmmHelper(), false, memoryCompressionState, true, false, 1u); if (memoryCompressionState == MemoryCompressionState::Enabled) { EXPECT_EQ(FamilyType::STATE_BASE_ADDRESS::ENABLE_MEMORY_COMPRESSION_FOR_ALL_STATELESS_ACCESSES_ENABLED, sbaCmd.getEnableMemoryCompressionForAllStatelessAccesses()); } else { EXPECT_EQ(FamilyType::STATE_BASE_ADDRESS::ENABLE_MEMORY_COMPRESSION_FOR_ALL_STATELESS_ACCESSES_DISABLED, sbaCmd.getEnableMemoryCompressionForAllStatelessAccesses()); } } memoryManager->freeGraphicsMemory(allocation); } HWCMDTEST_F(IGFX_XE_HP_CORE, StateBaseAddressXeHPAndLaterTests, givenNonZeroInternalHeapBaseAddressWhenSettingIsDisabledThenExpectCommandValueZero) { auto memoryManager = pDevice->getExecutionEnvironment()->memoryManager.get(); AllocationProperties properties(pDevice->getRootDeviceIndex(), 1, AllocationType::BUFFER, pDevice->getDeviceBitfield()); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties); IndirectHeap indirectHeap(allocation, 1); auto sbaCmd = FamilyType::cmdInitStateBaseAddress; uint64_t ihba = 0x80010000ull; StateBaseAddressHelper::appendStateBaseAddressParameters(&sbaCmd, &indirectHeap, false, ihba, pDevice->getRootDeviceEnvironment().getGmmHelper(), false, MemoryCompressionState::NotApplicable, true, false, 1u); EXPECT_EQ(0ull, sbaCmd.getGeneralStateBaseAddress()); memoryManager->freeGraphicsMemory(allocation); } using RenderSurfaceStateXeHPAndLaterTests = XeHpCommandStreamReceiverFlushTaskTests; HWCMDTEST_F(IGFX_XE_HP_CORE, RenderSurfaceStateXeHPAndLaterTests, givenSpecificProductFamilyWhenAppendingRssThenProgramGpuCoherency) { auto memoryManager = pDevice->getExecutionEnvironment()->memoryManager.get(); size_t allocationSize = MemoryConstants::pageSize; AllocationProperties properties(pDevice->getRootDeviceIndex(), allocationSize, AllocationType::BUFFER, pDevice->getDeviceBitfield()); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties); auto rssCmd = FamilyType::cmdInitRenderSurfaceState; MockContext context(pClDevice); auto multiGraphicsAllocation = MultiGraphicsAllocation(pClDevice->getRootDeviceIndex()); multiGraphicsAllocation.addAllocation(allocation); std::unique_ptr> buffer(static_cast *>( BufferHw::create(&context, {}, 0, 0, allocationSize, nullptr, nullptr, multiGraphicsAllocation, false, false, false))); NEO::EncodeSurfaceStateArgs args; args.outMemory = &rssCmd; args.graphicsAddress = allocation->getGpuAddress(); args.size = allocation->getUnderlyingBufferSize(); args.mocs = buffer->getMocsValue(false, false, pClDevice->getRootDeviceIndex()); args.numAvailableDevices = pClDevice->getNumGenericSubDevices(); args.allocation = allocation; args.gmmHelper = pClDevice->getGmmHelper(); args.areMultipleSubDevicesInContext = true; EncodeSurfaceState::encodeBuffer(args); EXPECT_EQ(FamilyType::RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT, rssCmd.getCoherencyType()); } using PipelineSelectTest = ::testing::Test; HWCMDTEST_F(IGFX_XE_HP_CORE, PipelineSelectTest, whenCallingIsSpecialPipelineSelectModeChangedThenReturnCorrectValue) { using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT; bool oldPipelineSelectSpecialMode = true; bool newPipelineSelectSpecialMode = false; auto result = PreambleHelper::isSpecialPipelineSelectModeChanged(oldPipelineSelectSpecialMode, newPipelineSelectSpecialMode, *defaultHwInfo); EXPECT_TRUE(result); } HWCMDTEST_F(IGFX_XE_HP_CORE, PipelineSelectTest, WhenProgramPipelineSelectThenProperMaskIsSet) { using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT; PIPELINE_SELECT cmd = FamilyType::cmdInitPipelineSelect; LinearStream pipelineSelectStream(&cmd, sizeof(cmd)); PreambleHelper::programPipelineSelect(&pipelineSelectStream, {}, *defaultHwInfo); auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits; if constexpr (FamilyType::isUsingMediaSamplerDopClockGate) { expectedMask |= pipelineSelectMediaSamplerDopClockGateMaskBits; } if (PreambleHelper::isSystolicModeConfigurable(*defaultHwInfo)) { expectedMask |= pipelineSelectSystolicModeEnableMaskBits; } EXPECT_EQ(expectedMask, cmd.getMaskBits()); }compute-runtime-22.14.22890/opencl/test/unit_test/helpers/timestamp_packet_1_tests.cpp000066400000000000000000002661221422164147700310020ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/utilities/tag_allocator.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/helpers/dispatch_flags_helper.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/mocks/mock_timestamp_container.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/command_queue/hardware_interface.h" #include "opencl/source/event/user_event.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/helpers/timestamp_packet_tests.h" #include "opencl/test/unit_test/mocks/mock_mdi.h" #include "opencl/test/unit_test/mocks/mock_platform.h" using namespace NEO; HWTEST_F(TimestampPacketTests, givenTagNodeWhenSemaphoreIsProgrammedThenUseGpuAddress) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; TimestampPackets tag; MockTagNode mockNode; mockNode.tagForCpuAccess = &tag; mockNode.gpuAddress = 0x1230000; auto &cmdStream = mockCmdQ->getCS(0); TimestampPacketHelper::programSemaphore(cmdStream, mockNode); HardwareParse hwParser; hwParser.parseCommands(cmdStream, 0); auto it = hwParser.cmdList.begin(); verifySemaphore(genCmdCast(*it++), &mockNode, 0); } HWTEST_F(TimestampPacketTests, givenTagNodeWithPacketsUsed2WhenSemaphoreIsProgrammedThenUseGpuAddress) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; TimestampPackets tag; MockTagNode mockNode; mockNode.tagForCpuAccess = &tag; mockNode.gpuAddress = 0x1230000; mockNode.setPacketsUsed(2); auto &cmdStream = mockCmdQ->getCS(0); TimestampPacketHelper::programSemaphore(cmdStream, mockNode); HardwareParse hwParser; hwParser.parseCommands(cmdStream, 0); auto it = hwParser.cmdList.begin(); for (uint32_t packetId = 0; packetId < mockNode.getPacketsUsed(); packetId++) { verifySemaphore(genCmdCast(*it++), &mockNode, packetId); } } TEST_F(TimestampPacketTests, givenTagNodeWhatAskingForGpuAddressesThenReturnCorrectValue) { TimestampPackets tag; MockTagNode mockNode; mockNode.tagForCpuAccess = &tag; mockNode.gpuAddress = 0x1230000; auto expectedEndAddress = mockNode.getGpuAddress() + (2 * sizeof(uint32_t)); EXPECT_EQ(expectedEndAddress, TimestampPacketHelper::getContextEndGpuAddress(mockNode)); } TEST_F(TimestampPacketSimpleTests, givenTimestampPacketContainerWhenMovedThenMoveAllNodes) { EXPECT_TRUE(std::is_move_constructible::value); EXPECT_TRUE(std::is_move_assignable::value); EXPECT_FALSE(std::is_copy_assignable::value); EXPECT_FALSE(std::is_copy_constructible::value); struct MockTagNode : public TagNode> { void returnTag() override { returnCalls++; } using TagNode>::refCount; uint32_t returnCalls = 0; }; MockTagNode node0; MockTagNode node1; { TimestampPacketContainer timestampPacketContainer0; TimestampPacketContainer timestampPacketContainer1; timestampPacketContainer0.add(&node0); timestampPacketContainer0.add(&node1); timestampPacketContainer1 = std::move(timestampPacketContainer0); EXPECT_EQ(0u, node0.returnCalls); EXPECT_EQ(0u, node1.returnCalls); EXPECT_EQ(2u, timestampPacketContainer1.peekNodes().size()); EXPECT_EQ(&node0, timestampPacketContainer1.peekNodes()[0]); EXPECT_EQ(&node1, timestampPacketContainer1.peekNodes()[1]); } EXPECT_EQ(1u, node0.returnCalls); EXPECT_EQ(1u, node1.returnCalls); } HWTEST_F(TimestampPacketSimpleTests, whenNewTagIsTakenThenReinitialize) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(executionEnvironment); MockTagAllocator allocator(0, &memoryManager, 1); using MockNode = TagNode; auto firstNode = static_cast(allocator.getTag()); auto i = 0u; for (auto &packet : firstNode->tagForCpuAccess->packets) { packet.contextStart = i++; packet.globalStart = i++; packet.contextEnd = i++; packet.globalEnd = i++; } setTagToReadyState(firstNode); allocator.returnTag(firstNode); auto secondNode = allocator.getTag(); EXPECT_EQ(secondNode, firstNode); for (const auto &packet : firstNode->tagForCpuAccess->packets) { EXPECT_EQ(1u, packet.contextStart); EXPECT_EQ(1u, packet.globalStart); EXPECT_EQ(1u, packet.contextEnd); EXPECT_EQ(1u, packet.globalEnd); } EXPECT_EQ(1u, firstNode->getPacketsUsed()); } TEST_F(TimestampPacketSimpleTests, whenObjectIsCreatedThenInitializeAllStamps) { MockTimestampPacketStorage timestampPacketStorage; EXPECT_EQ(TimestampPacketSizeControl::preferredPacketCount * sizeof(timestampPacketStorage.packets[0]), sizeof(timestampPacketStorage.packets)); for (const auto &packet : timestampPacketStorage.packets) { EXPECT_EQ(1u, packet.contextStart); EXPECT_EQ(1u, packet.globalStart); EXPECT_EQ(1u, packet.contextEnd); EXPECT_EQ(1u, packet.globalEnd); } } HWTEST_F(TimestampPacketTests, givenCommandStreamReceiverHwWhenObtainingPreferredTagPoolSizeThenReturnCorrectValue) { OsContext &osContext = *executionEnvironment->memoryManager->getRegisteredEngines()[0].osContext; CommandStreamReceiverHw csr(*executionEnvironment, 0, osContext.getDeviceBitfield()); EXPECT_EQ(2048u, csr.getPreferredTagPoolSize()); } HWTEST_F(TimestampPacketTests, givenDebugFlagSetWhenCreatingAllocatorThenUseCorrectSize) { OsContext &osContext = *executionEnvironment->memoryManager->getRegisteredEngines()[0].osContext; { CommandStreamReceiverHw csr(*executionEnvironment, 0, osContext.getDeviceBitfield()); csr.setupContext(osContext); auto allocator = csr.getTimestampPacketAllocator(); auto tag = allocator->getTag(); auto size = tag->getSinglePacketSize(); EXPECT_EQ(4u * sizeof(typename FamilyType::TimestampPacketType), size); } { DebugManager.flags.OverrideTimestampPacketSize.set(4); CommandStreamReceiverHw csr(*executionEnvironment, 0, osContext.getDeviceBitfield()); csr.setupContext(osContext); auto allocator = csr.getTimestampPacketAllocator(); auto tag = allocator->getTag(); auto size = tag->getSinglePacketSize(); EXPECT_EQ(4u * sizeof(uint32_t), size); } { DebugManager.flags.OverrideTimestampPacketSize.set(8); CommandStreamReceiverHw csr(*executionEnvironment, 0, osContext.getDeviceBitfield()); csr.setupContext(osContext); auto allocator = csr.getTimestampPacketAllocator(); auto tag = allocator->getTag(); auto size = tag->getSinglePacketSize(); EXPECT_EQ(4u * sizeof(uint64_t), size); } { DebugManager.flags.OverrideTimestampPacketSize.set(-1); CommandStreamReceiverHw csr(*executionEnvironment, 0, osContext.getDeviceBitfield()); csr.setupContext(osContext); DebugManager.flags.OverrideTimestampPacketSize.set(12); EXPECT_ANY_THROW(csr.getTimestampPacketAllocator()); } } HWCMDTEST_F(IGFX_XE_HP_CORE, TimestampPacketTests, givenInvalidDebugFlagSetWhenCreatingCsrThenExceptionIsThrown) { OsContext &osContext = *executionEnvironment->memoryManager->getRegisteredEngines()[0].osContext; DebugManager.flags.OverrideTimestampPacketSize.set(12); EXPECT_ANY_THROW(CommandStreamReceiverHw csr(*executionEnvironment, 0, osContext.getDeviceBitfield())); } HWTEST_F(TimestampPacketTests, givenTagAlignmentWhenCreatingAllocatorThenGpuAddressIsAligned) { auto csr = executionEnvironment->memoryManager->getRegisteredEngines()[0].commandStreamReceiver; auto &hwHelper = HwHelper::get(device->getHardwareInfo().platform.eRenderCoreFamily); auto allocator = csr->getTimestampPacketAllocator(); auto tag1 = allocator->getTag(); auto tag2 = allocator->getTag(); EXPECT_TRUE(isAligned(tag1->getGpuAddress(), hwHelper.getTimestampPacketAllocatorAlignment())); EXPECT_TRUE(isAligned(tag2->getGpuAddress(), hwHelper.getTimestampPacketAllocatorAlignment())); } HWTEST_F(TimestampPacketTests, givenDebugFlagSetWhenCreatingTimestampPacketAllocatorThenDisableReusingAndLimitPoolSize) { DebugManagerStateRestore restore; DebugManager.flags.DisableTimestampPacketOptimizations.set(true); OsContext &osContext = *executionEnvironment->memoryManager->getRegisteredEngines()[0].osContext; CommandStreamReceiverHw csr(*executionEnvironment, 0, osContext.getDeviceBitfield()); csr.setupContext(osContext); EXPECT_EQ(1u, csr.getPreferredTagPoolSize()); auto tag = csr.getTimestampPacketAllocator()->getTag(); setTagToReadyState(tag); EXPECT_FALSE(tag->canBeReleased()); } HWCMDTEST_F(IGFX_GEN8_CORE, TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEstimatingStreamSizeThenAddPipeControl) { MockKernelWithInternals kernel2(*device); MockMultiDispatchInfo multiDispatchInfo(device.get(), std::vector({kernel->mockKernel, kernel2.mockKernel})); auto mockCmdQHw = std::make_unique>(context, device.get(), nullptr); device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false; getCommandStream(*mockCmdQHw, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false); auto sizeWithDisabled = mockCmdQHw->requestedCmdStreamSize; device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; getCommandStream(*mockCmdQHw, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false); auto sizeWithEnabled = mockCmdQHw->requestedCmdStreamSize; auto extendedSize = sizeWithDisabled + sizeof(typename FamilyType::PIPE_CONTROL); EXPECT_EQ(sizeWithEnabled, extendedSize); } HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledAndOoqWhenEstimatingStreamSizeThenDontAddAdditionalSize) { MockMultiDispatchInfo multiDispatchInfo(device.get(), std::vector({kernel->mockKernel})); auto mockCmdQHw = std::make_unique>(context, device.get(), nullptr); mockCmdQHw->setOoqEnabled(); device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false; getCommandStream(*mockCmdQHw, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false); auto sizeWithDisabled = mockCmdQHw->requestedCmdStreamSize; device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; MockTimestampPacketContainer timestamp1(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp2(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 2); MockTimestampPacketContainer timestamp3(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 3); MockTimestampPacketContainer timestamp4(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 4); MockTimestampPacketContainer timestamp5(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 5); Event event1(mockCmdQHw.get(), 0, 0, 0); event1.addTimestampPacketNodes(timestamp1); Event event2(mockCmdQHw.get(), 0, 0, 0); event2.addTimestampPacketNodes(timestamp2); Event event3(mockCmdQHw.get(), 0, 0, 0); event3.addTimestampPacketNodes(timestamp3); Event event4(mockCmdQHw.get(), 0, 0, 0); event4.addTimestampPacketNodes(timestamp4); Event event5(mockCmdQHw.get(), 0, 0, 0); event5.addTimestampPacketNodes(timestamp5); const cl_uint numEventsOnWaitlist = 5; cl_event waitlist[] = {&event1, &event2, &event3, &event4, &event5}; EventsRequest eventsRequest(numEventsOnWaitlist, waitlist, nullptr); CsrDependencies csrDeps; eventsRequest.fillCsrDependenciesForTimestampPacketContainer( csrDeps, device->getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr); getCommandStream(*mockCmdQHw, csrDeps, false, false, false, multiDispatchInfo, nullptr, 0, false, false); auto sizeWithEnabled = mockCmdQHw->requestedCmdStreamSize; size_t sizeForNodeDependency = 0; for (auto timestampPacketContainer : csrDeps.timestampPacketContainer) { for (auto &node : timestampPacketContainer->peekNodes()) { sizeForNodeDependency += TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependency(*node); } } size_t extendedSize = sizeWithDisabled + EnqueueOperation::getSizeRequiredForTimestampPacketWrite() + sizeForNodeDependency; EXPECT_EQ(sizeWithEnabled, extendedSize); } HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEstimatingStreamSizeWithWaitlistThenAddSizeForSemaphores) { MockKernelWithInternals kernel2(*device); MockMultiDispatchInfo multiDispatchInfo(device.get(), std::vector({kernel->mockKernel, kernel2.mockKernel})); auto mockCmdQHw = std::make_unique>(context, device.get(), nullptr); device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false; getCommandStream(*mockCmdQHw, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false); auto sizeWithDisabled = mockCmdQHw->requestedCmdStreamSize; device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; MockTimestampPacketContainer timestamp1(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp2(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 2); MockTimestampPacketContainer timestamp3(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 3); MockTimestampPacketContainer timestamp4(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 4); MockTimestampPacketContainer timestamp5(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 5); Event event1(mockCmdQHw.get(), 0, 0, 0); event1.addTimestampPacketNodes(timestamp1); Event event2(mockCmdQHw.get(), 0, 0, 0); event2.addTimestampPacketNodes(timestamp2); Event event3(mockCmdQHw.get(), 0, 0, 0); event3.addTimestampPacketNodes(timestamp3); Event event4(mockCmdQHw.get(), 0, 0, 0); event4.addTimestampPacketNodes(timestamp4); Event event5(mockCmdQHw.get(), 0, 0, 0); event5.addTimestampPacketNodes(timestamp5); const cl_uint numEventsOnWaitlist = 5; cl_event waitlist[] = {&event1, &event2, &event3, &event4, &event5}; EventsRequest eventsRequest(numEventsOnWaitlist, waitlist, nullptr); CsrDependencies csrDeps; eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, device->getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr); getCommandStream(*mockCmdQHw, csrDeps, false, false, false, multiDispatchInfo, nullptr, 0, false, false); auto sizeWithEnabled = mockCmdQHw->requestedCmdStreamSize; size_t sizeForNodeDependency = 0; for (auto timestampPacketContainer : csrDeps.timestampPacketContainer) { for (auto &node : timestampPacketContainer->peekNodes()) { sizeForNodeDependency += TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependency(*node); } } size_t extendedSize = sizeWithDisabled + EnqueueOperation::getSizeRequiredForTimestampPacketWrite() + sizeForNodeDependency; EXPECT_EQ(sizeWithEnabled, extendedSize); } HWTEST_F(TimestampPacketTests, givenEventsRequestWithEventsWithoutTimestampsWhenComputingCsrDependenciesThenDoNotAddThem) { device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false; Event eventWithoutTimestampContainer1(mockCmdQ, 0, 0, 0); Event eventWithoutTimestampContainer2(mockCmdQ, 0, 0, 0); Event eventWithoutTimestampContainer3(mockCmdQ, 0, 0, 0); Event eventWithoutTimestampContainer4(mockCmdQ, 0, 0, 0); Event eventWithoutTimestampContainer5(mockCmdQ, 0, 0, 0); const cl_uint numEventsOnWaitlist = 5; cl_event waitlist[] = {&eventWithoutTimestampContainer1, &eventWithoutTimestampContainer2, &eventWithoutTimestampContainer3, &eventWithoutTimestampContainer4, &eventWithoutTimestampContainer5}; EventsRequest eventsRequest(numEventsOnWaitlist, waitlist, nullptr); CsrDependencies csrDepsEmpty; eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDepsEmpty, device->getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr); EXPECT_EQ(0u, csrDepsEmpty.timestampPacketContainer.size()); device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; MockTimestampPacketContainer timestamp1(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp2(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 2); MockTimestampPacketContainer timestamp3(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 3); MockTimestampPacketContainer timestamp4(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 4); MockTimestampPacketContainer timestamp5(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 5); Event event1(mockCmdQ, 0, 0, 0); event1.addTimestampPacketNodes(timestamp1); Event eventWithEmptyTimestampContainer2(mockCmdQ, 0, 0, 0); // event2 does not have timestamp Event event3(mockCmdQ, 0, 0, 0); event3.addTimestampPacketNodes(timestamp3); Event eventWithEmptyTimestampContainer4(mockCmdQ, 0, 0, 0); // event4 does not have timestamp Event event5(mockCmdQ, 0, 0, 0); event5.addTimestampPacketNodes(timestamp5); cl_event waitlist2[] = {&event1, &eventWithEmptyTimestampContainer2, &event3, &eventWithEmptyTimestampContainer4, &event5}; EventsRequest eventsRequest2(numEventsOnWaitlist, waitlist2, nullptr); CsrDependencies csrDepsSize3; eventsRequest2.fillCsrDependenciesForTimestampPacketContainer(csrDepsSize3, device->getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr); EXPECT_EQ(3u, csrDepsSize3.timestampPacketContainer.size()); size_t sizeForNodeDependency = 0; for (auto timestampPacketContainer : csrDepsSize3.timestampPacketContainer) { for (auto &node : timestampPacketContainer->peekNodes()) { sizeForNodeDependency += TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependency(*node); } } size_t expectedSize = sizeForNodeDependency; EXPECT_EQ(expectedSize, TimestampPacketHelper::getRequiredCmdStreamSize(csrDepsSize3)); } HWTEST_F(TimestampPacketTests, whenEstimatingSizeForNodeDependencyThenReturnCorrectValue) { TimestampPackets tag; MockTagNode mockNode; mockNode.tagForCpuAccess = &tag; mockNode.gpuAddress = 0x1230000; size_t sizeForNodeDependency = 0; sizeForNodeDependency += TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependency(mockNode); size_t expectedSize = mockNode.getPacketsUsed() * sizeof(typename FamilyType::MI_SEMAPHORE_WAIT); EXPECT_EQ(expectedSize, sizeForNodeDependency); } HWCMDTEST_F(IGFX_GEN8_CORE, TimestampPacketTests, givenTimestampPacketWhenDispatchingGpuWalkerThenAddTwoPcForLastWalker) { using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; MockTimestampPacketContainer timestampPacket(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 2); MockKernelWithInternals kernel2(*device); MockMultiDispatchInfo multiDispatchInfo(device.get(), std::vector({kernel->mockKernel, kernel2.mockKernel})); auto &cmdStream = mockCmdQ->getCS(0); device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; HardwareInterface::dispatchWalker( *mockCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, ×tampPacket, CL_COMMAND_NDRANGE_KERNEL); HardwareParse hwParser; hwParser.parseCommands(cmdStream, 0); uint32_t walkersFound = 0; for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) { if (genCmdCast(*it)) { if (MemorySynchronizationCommands::isPipeControlWArequired(device->getHardwareInfo())) { auto pipeControl = genCmdCast(*++it); EXPECT_NE(nullptr, pipeControl); } auto pipeControl = genCmdCast(*++it); EXPECT_NE(nullptr, pipeControl); auto expectedAddress = TimestampPacketHelper::getContextEndGpuAddress(*timestampPacket.getNode(walkersFound)); EXPECT_EQ(1u, pipeControl->getCommandStreamerStallEnable()); EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pipeControl->getPostSyncOperation()); EXPECT_EQ(0u, pipeControl->getImmediateData()); EXPECT_EQ(expectedAddress, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*pipeControl)); walkersFound++; } } EXPECT_EQ(2u, walkersFound); } HWCMDTEST_F(IGFX_GEN8_CORE, TimestampPacketTests, givenTimestampPacketDisabledWhenDispatchingGpuWalkerThenDontAddPipeControls) { MockTimestampPacketContainer timestampPacket(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockMultiDispatchInfo multiDispatchInfo(device.get(), kernel->mockKernel); auto &cmdStream = mockCmdQ->getCS(0); device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false; HardwareInterface::dispatchWalker( *mockCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, ×tampPacket, CL_COMMAND_NDRANGE_KERNEL); HardwareParse hwParser; hwParser.parseCommands(cmdStream, 0); auto cmdItor = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); EXPECT_EQ(hwParser.cmdList.end(), cmdItor); } HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingThenObtainNewStampAndPassToEvent) { auto &csr = device->getUltCommandStreamReceiver(); csr.timestampPacketWriteEnabled = true; auto mockTagAllocator = new MockTagAllocator<>(device->getRootDeviceIndex(), executionEnvironment->memoryManager.get()); csr.timestampPacketAllocator.reset(mockTagAllocator); auto cmdQ = std::make_unique>(context, device.get(), nullptr); cl_event event1, event2; // obtain first node for cmdQ and event1 cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &event1); auto node1 = cmdQ->timestampPacketContainer->peekNodes().at(0); EXPECT_NE(nullptr, node1); EXPECT_EQ(node1, cmdQ->timestampPacketContainer->peekNodes().at(0)); // obtain new node for cmdQ and event2 cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &event2); auto node2 = cmdQ->timestampPacketContainer->peekNodes().at(0); EXPECT_NE(nullptr, node2); EXPECT_EQ(node2, cmdQ->timestampPacketContainer->peekNodes().at(0)); EXPECT_EQ(0u, mockTagAllocator->returnedToFreePoolNodes.size()); // nothing returned. event1 owns previous node EXPECT_EQ(1u, mockTagAllocator->releaseReferenceNodes.size()); // cmdQ released first node EXPECT_EQ(node1, mockTagAllocator->releaseReferenceNodes.at(0)); EXPECT_NE(node1, node2); setTagToReadyState(node1); setTagToReadyState(node2); clReleaseEvent(event2); EXPECT_EQ(0u, mockTagAllocator->returnedToFreePoolNodes.size()); // nothing returned. cmdQ owns node2 EXPECT_EQ(2u, mockTagAllocator->releaseReferenceNodes.size()); // event2 released node2 EXPECT_EQ(node2, mockTagAllocator->releaseReferenceNodes.at(1)); clReleaseEvent(event1); EXPECT_EQ(0u, mockTagAllocator->returnedToFreePoolNodes.size()); EXPECT_EQ(3u, mockTagAllocator->releaseReferenceNodes.size()); // event1 released node1 EXPECT_EQ(node1, mockTagAllocator->releaseReferenceNodes.at(2)); { TimestampPacketContainer release; cmdQ->deferredTimestampPackets->swapNodes(release); } EXPECT_EQ(1u, mockTagAllocator->returnedToFreePoolNodes.size()); // removed last reference on node1 EXPECT_EQ(node1, mockTagAllocator->returnedToFreePoolNodes.at(0)); cmdQ.reset(nullptr); EXPECT_EQ(2u, mockTagAllocator->returnedToFreePoolNodes.size()); // removed last reference on node2 EXPECT_EQ(node2, mockTagAllocator->returnedToFreePoolNodes.at(1)); EXPECT_EQ(5u, mockTagAllocator->releaseReferenceNodes.size()); // cmdQ released node2 EXPECT_EQ(node2, mockTagAllocator->releaseReferenceNodes.at(4)); } HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingThenWriteWalkerStamp) { using GPGPU_WALKER = typename FamilyType::WALKER_TYPE; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; auto cmdQ = std::make_unique>(context, device.get(), nullptr); cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(1u, cmdQ->timestampPacketContainer->peekNodes().size()); HardwareParse hwParser; hwParser.parseCommands(cmdQ->getCS(0), 0); bool walkerFound = false; for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) { if (genCmdCast(*it)) { if (MemorySynchronizationCommands::isPipeControlWArequired(device->getHardwareInfo())) { auto pipeControl = genCmdCast(*++it); EXPECT_NE(nullptr, pipeControl); } walkerFound = true; it = find(++it, hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), it); auto pipeControl = genCmdCast(*it); ASSERT_NE(nullptr, pipeControl); EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pipeControl->getPostSyncOperation()); } } EXPECT_TRUE(walkerFound); } HWTEST_F(TimestampPacketTests, givenEventsRequestWhenEstimatingStreamSizeForCsrThenAddSizeForSemaphores) { auto device2 = std::make_unique(Device::create(executionEnvironment, 0u)); MockContext context2(device2.get()); auto cmdQ2 = std::make_unique>(&context2, device2.get(), nullptr); MockTimestampPacketContainer timestamp1(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp2(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 2); MockTimestampPacketContainer timestamp3(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 3); MockTimestampPacketContainer timestamp4(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 4); MockTimestampPacketContainer timestamp5(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 5); auto &csr = device->getUltCommandStreamReceiver(); auto &csr2 = device2->getUltCommandStreamReceiver(); csr2.timestampPacketWriteEnabled = true; Event event1(cmdQ2.get(), 0, 0, 0); event1.addTimestampPacketNodes(timestamp1); Event event2(cmdQ2.get(), 0, 0, 0); event2.addTimestampPacketNodes(timestamp2); Event event3(cmdQ2.get(), 0, 0, 0); event3.addTimestampPacketNodes(timestamp3); Event event4(cmdQ2.get(), 0, 0, 0); event4.addTimestampPacketNodes(timestamp4); Event event5(cmdQ2.get(), 0, 0, 0); event5.addTimestampPacketNodes(timestamp5); const cl_uint numEventsOnWaitlist = 5; cl_event waitlist[] = {&event1, &event2, &event3, &event4, &event5}; EventsRequest eventsRequest(numEventsOnWaitlist, waitlist, nullptr); DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags(); auto sizeWithoutEvents = csr.getRequiredCmdStreamSize(flags, device->getDevice()); eventsRequest.fillCsrDependenciesForTimestampPacketContainer(flags.csrDependencies, csr, NEO::CsrDependencies::DependenciesType::OutOfCsr); auto sizeWithEvents = csr.getRequiredCmdStreamSize(flags, device->getDevice()); size_t sizeForNodeDependency = 0; for (auto timestampPacketContainer : flags.csrDependencies.timestampPacketContainer) { for (auto &node : timestampPacketContainer->peekNodes()) { sizeForNodeDependency += TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependency(*node); } } size_t extendedSize = sizeWithoutEvents + sizeForNodeDependency; EXPECT_EQ(sizeWithEvents, extendedSize); } HWTEST_F(TimestampPacketTests, givenEventsRequestWhenEstimatingStreamSizeForDifferentCsrFromSameDeviceThenAddSizeForSemaphores) { // Create second (LOW_PRIORITY) queue on the same device cl_queue_properties props[] = {CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_LOW_KHR, 0}; auto cmdQ2 = std::make_unique>(context, device.get(), props); MockTimestampPacketContainer timestamp1(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp2(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 2); MockTimestampPacketContainer timestamp3(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 3); MockTimestampPacketContainer timestamp4(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 4); MockTimestampPacketContainer timestamp5(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 5); auto &csr = device->getUltCommandStreamReceiver(); auto &csr2 = cmdQ2->getUltCommandStreamReceiver(); csr2.timestampPacketWriteEnabled = true; Event event1(cmdQ2.get(), 0, 0, 0); event1.addTimestampPacketNodes(timestamp1); Event event2(cmdQ2.get(), 0, 0, 0); event2.addTimestampPacketNodes(timestamp2); Event event3(cmdQ2.get(), 0, 0, 0); event3.addTimestampPacketNodes(timestamp3); Event event4(cmdQ2.get(), 0, 0, 0); event4.addTimestampPacketNodes(timestamp4); Event event5(cmdQ2.get(), 0, 0, 0); event5.addTimestampPacketNodes(timestamp5); const cl_uint numEventsOnWaitlist = 5; cl_event waitlist[] = {&event1, &event2, &event3, &event4, &event5}; EventsRequest eventsRequest(numEventsOnWaitlist, waitlist, nullptr); DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags(); auto sizeWithoutEvents = csr.getRequiredCmdStreamSize(flags, device->getDevice()); eventsRequest.fillCsrDependenciesForTimestampPacketContainer(flags.csrDependencies, csr, NEO::CsrDependencies::DependenciesType::OutOfCsr); auto sizeWithEvents = csr.getRequiredCmdStreamSize(flags, device->getDevice()); size_t sizeForNodeDependency = 0; for (auto timestampPacketContainer : flags.csrDependencies.timestampPacketContainer) { for (auto &node : timestampPacketContainer->peekNodes()) { sizeForNodeDependency += TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependency(*node); } } size_t extendedSize = sizeWithoutEvents + sizeForNodeDependency; EXPECT_EQ(sizeWithEvents, extendedSize); } HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingThenProgramSemaphoresOnCsrStream) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto device2 = std::make_unique(Device::create(executionEnvironment, 0u)); device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; device2->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; MockContext context2(device2.get()); auto cmdQ1 = std::make_unique>(context, device.get(), nullptr); auto cmdQ2 = std::make_unique>(&context2, device2.get(), nullptr); const cl_uint eventsOnWaitlist = 6; MockTimestampPacketContainer timestamp3(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp4(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp5(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp6(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 2); UserEvent event1; event1.setStatus(CL_COMPLETE); UserEvent event2; event2.setStatus(CL_COMPLETE); Event event3(cmdQ1.get(), 0, 0, 0); event3.addTimestampPacketNodes(timestamp3); Event event4(cmdQ2.get(), 0, 0, 0); event4.addTimestampPacketNodes(timestamp4); Event event5(cmdQ1.get(), 0, 0, 0); event5.addTimestampPacketNodes(timestamp5); Event event6(cmdQ2.get(), 0, 0, 0); event6.addTimestampPacketNodes(timestamp6); cl_event waitlist[] = {&event1, &event2, &event3, &event4, &event5, &event6}; cmdQ1->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, eventsOnWaitlist, waitlist, nullptr); auto &cmdStream = device->getUltCommandStreamReceiver().commandStream; HardwareParse hwParser; hwParser.parseCommands(cmdStream, 0); auto it = hwParser.cmdList.begin(); verifySemaphore(genCmdCast(*it++), timestamp4.getNode(0), 0); verifySemaphore(genCmdCast(*it++), timestamp6.getNode(0), 0); verifySemaphore(genCmdCast(*it++), timestamp6.getNode(1), 0); while (it != hwParser.cmdList.end()) { EXPECT_EQ(nullptr, genCmdCast(*it)); it++; } } HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingThenTrackOwnershipUntilQueueIsCompleted) { device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; auto cmdQ = std::make_unique>(context, device.get(), nullptr); TimestampPacketContainer *timestampPacketContainer = cmdQ->timestampPacketContainer.get(); TimestampPacketContainer *deferredTimestampPackets = cmdQ->deferredTimestampPackets.get(); uint64_t latestNode = 0; { cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); latestNode = timestampPacketContainer->peekNodes()[0]->getGpuAddress(); EXPECT_EQ(0u, deferredTimestampPackets->peekNodes().size()); } { cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(1u, deferredTimestampPackets->peekNodes().size()); EXPECT_EQ(latestNode, deferredTimestampPackets->peekNodes().at(0u)->getGpuAddress()); latestNode = timestampPacketContainer->peekNodes()[0]->getGpuAddress(); } { cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(2u, deferredTimestampPackets->peekNodes().size()); EXPECT_EQ(latestNode, deferredTimestampPackets->peekNodes().at(1u)->getGpuAddress()); latestNode = timestampPacketContainer->peekNodes()[0]->getGpuAddress(); } cmdQ->flush(); EXPECT_EQ(2u, deferredTimestampPackets->peekNodes().size()); cmdQ->finish(); EXPECT_EQ(0u, deferredTimestampPackets->peekNodes().size()); } HWTEST_F(TimestampPacketTests, givenTimestampWaitEnabledWhenEnqueueWithEventThenEventHasCorrectTimestampsToCheckForCompletion) { DebugManagerStateRestore restorer; DebugManager.flags.UpdateTaskCountFromWait.set(3); DebugManager.flags.EnableTimestampWait.set(1); auto &csr = device->getUltCommandStreamReceiver(); csr.timestampPacketWriteEnabled = true; csr.callBaseWaitForCompletionWithTimeout = false; *csr.getTagAddress() = 0u; auto cmdQ = std::make_unique>(context, device.get(), nullptr); cl_event clEvent1; cl_event clEvent2; TimestampPacketContainer *deferredTimestampPackets = cmdQ->deferredTimestampPackets.get(); TimestampPacketContainer *timestampPacketContainer = cmdQ->timestampPacketContainer.get(); cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &clEvent1); cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &clEvent2); cmdQ->flush(); Event &event1 = static_cast(*clEvent1); Event &event2 = static_cast(*clEvent2); EXPECT_EQ(1u, deferredTimestampPackets->peekNodes().size()); EXPECT_EQ(1u, timestampPacketContainer->peekNodes().size()); EXPECT_FALSE(csr.downloadAllocationCalled); EXPECT_FALSE(event1.isCompleted()); EXPECT_FALSE(event2.isCompleted()); EXPECT_TRUE(csr.downloadAllocationCalled); csr.downloadAllocationCalled = false; typename FamilyType::TimestampPacketType timestampData[] = {2, 2, 2, 2}; for (uint32_t i = 0; i < deferredTimestampPackets->peekNodes()[0]->getPacketsUsed(); i++) { deferredTimestampPackets->peekNodes()[0]->assignDataToAllTimestamps(i, timestampData); } EXPECT_TRUE(event1.isCompleted()); EXPECT_FALSE(event2.isCompleted()); EXPECT_TRUE(csr.downloadAllocationCalled); csr.downloadAllocationCalled = false; for (uint32_t i = 0; i < deferredTimestampPackets->peekNodes()[0]->getPacketsUsed(); i++) { timestampPacketContainer->peekNodes()[0]->assignDataToAllTimestamps(i, timestampData); } EXPECT_TRUE(event1.isCompleted()); EXPECT_TRUE(event2.isCompleted()); EXPECT_TRUE(csr.downloadAllocationCalled); csr.downloadAllocationCalled = false; cmdQ->finish(); EXPECT_TRUE(event1.isCompleted()); EXPECT_TRUE(event2.isCompleted()); EXPECT_EQ(csr.waitForCompletionWithTimeoutTaskCountCalled, 0u); EXPECT_TRUE(csr.downloadAllocationCalled); clReleaseEvent(clEvent1); clReleaseEvent(clEvent2); *csr.getTagAddress() = csr.peekTaskCount(); } HWTEST_F(TimestampPacketTests, givenEnableTimestampWaitWhenFinishWithoutEnqueueThenDoNotWaitOnTimestamp) { DebugManagerStateRestore restorer; DebugManager.flags.UpdateTaskCountFromWait.set(3); DebugManager.flags.EnableTimestampWait.set(1); auto &csr = device->getUltCommandStreamReceiver(); csr.timestampPacketWriteEnabled = true; csr.callBaseWaitForCompletionWithTimeout = false; auto cmdQ = std::make_unique>(context, device.get(), nullptr); TimestampPacketContainer *deferredTimestampPackets = cmdQ->deferredTimestampPackets.get(); TimestampPacketContainer *timestampPacketContainer = cmdQ->timestampPacketContainer.get(); EXPECT_EQ(0u, deferredTimestampPackets->peekNodes().size()); EXPECT_EQ(0u, timestampPacketContainer->peekNodes().size()); cmdQ->finish(); EXPECT_EQ(csr.waitForCompletionWithTimeoutTaskCountCalled, 1u); } HWTEST_F(TimestampPacketTests, givenEnableTimestampWaitWhenFinishThenWaitOnTimestamp) { DebugManagerStateRestore restorer; DebugManager.flags.UpdateTaskCountFromWait.set(3); DebugManager.flags.EnableTimestampWait.set(1); auto &csr = device->getUltCommandStreamReceiver(); csr.timestampPacketWriteEnabled = true; csr.callBaseWaitForCompletionWithTimeout = false; auto cmdQ = std::make_unique>(context, device.get(), nullptr); TimestampPacketContainer *deferredTimestampPackets = cmdQ->deferredTimestampPackets.get(); TimestampPacketContainer *timestampPacketContainer = cmdQ->timestampPacketContainer.get(); cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); cmdQ->flush(); EXPECT_EQ(1u, deferredTimestampPackets->peekNodes().size()); EXPECT_EQ(1u, timestampPacketContainer->peekNodes().size()); typename FamilyType::TimestampPacketType timestampData[] = {2, 2, 2, 2}; for (uint32_t i = 0; i < deferredTimestampPackets->peekNodes()[0]->getPacketsUsed(); i++) { timestampPacketContainer->peekNodes()[0]->assignDataToAllTimestamps(i, timestampData); } cmdQ->finish(); EXPECT_EQ(csr.waitForCompletionWithTimeoutTaskCountCalled, 0u); } HWTEST_F(TimestampPacketTests, givenOOQAndEnableTimestampWaitWhenFinishThenWaitOnTimestamp) { DebugManagerStateRestore restorer; DebugManager.flags.UpdateTaskCountFromWait.set(3); DebugManager.flags.EnableTimestampWait.set(1); auto &csr = device->getUltCommandStreamReceiver(); csr.timestampPacketWriteEnabled = true; csr.callBaseWaitForCompletionWithTimeout = false; cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0}; auto cmdQ = std::make_unique>(context, device.get(), props); TimestampPacketContainer *deferredTimestampPackets = cmdQ->deferredTimestampPackets.get(); TimestampPacketContainer *timestampPacketContainer = cmdQ->timestampPacketContainer.get(); cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); cmdQ->flush(); EXPECT_EQ(1u, deferredTimestampPackets->peekNodes().size()); EXPECT_EQ(1u, timestampPacketContainer->peekNodes().size()); typename FamilyType::TimestampPacketType timestampData[] = {2, 2, 2, 2}; for (uint32_t i = 0; i < deferredTimestampPackets->peekNodes()[0]->getPacketsUsed(); i++) { deferredTimestampPackets->peekNodes()[0]->assignDataToAllTimestamps(i, timestampData); timestampPacketContainer->peekNodes()[0]->assignDataToAllTimestamps(i, timestampData); } cmdQ->finish(); EXPECT_EQ(csr.waitForCompletionWithTimeoutTaskCountCalled, 0u); cmdQ.reset(); } namespace CpuIntrinsicsTests { extern std::atomic pauseCounter; extern volatile uint32_t *pauseAddress; extern uint32_t pauseValue; extern uint32_t pauseOffset; extern std::function setupPauseAddress; } // namespace CpuIntrinsicsTests HWTEST_F(TimestampPacketTests, givenEnableTimestampWaitWhenFinishThenCallWaitUtils) { DebugManagerStateRestore restorer; DebugManager.flags.UpdateTaskCountFromWait.set(3); DebugManager.flags.EnableTimestampWait.set(1); device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0}; auto cmdQ = std::make_unique>(context, device.get(), props); TimestampPacketContainer *deferredTimestampPackets = cmdQ->deferredTimestampPackets.get(); TimestampPacketContainer *timestampPacketContainer = cmdQ->timestampPacketContainer.get(); cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); cmdQ->flush(); EXPECT_EQ(1u, deferredTimestampPackets->peekNodes().size()); EXPECT_EQ(1u, timestampPacketContainer->peekNodes().size()); VariableBackup backupPauseAddress(&CpuIntrinsicsTests::pauseAddress); VariableBackup backupPauseValue(&CpuIntrinsicsTests::pauseValue); VariableBackup backupPauseOffset(&CpuIntrinsicsTests::pauseOffset); VariableBackup> backupSetupPauseAddress(&CpuIntrinsicsTests::setupPauseAddress); deferredTimestampPackets->peekNodes()[0]->setPacketsUsed(1u); timestampPacketContainer->peekNodes()[0]->setPacketsUsed(1u); CpuIntrinsicsTests::pauseAddress = reinterpret_cast(const_cast(timestampPacketContainer->peekNodes()[0]->getContextEndAddress(0u))); CpuIntrinsicsTests::pauseValue = 2u; CpuIntrinsicsTests::setupPauseAddress = [&]() { CpuIntrinsicsTests::pauseAddress = reinterpret_cast(const_cast(deferredTimestampPackets->peekNodes()[0]->getContextEndAddress(0u))); }; CpuIntrinsicsTests::pauseCounter = 0u; EXPECT_FALSE(device->getUltCommandStreamReceiver().downloadAllocationCalled); cmdQ->finish(); EXPECT_EQ(2u, CpuIntrinsicsTests::pauseCounter); EXPECT_TRUE(device->getUltCommandStreamReceiver().downloadAllocationCalled); cmdQ.reset(); } HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingToOoqThenMoveToDeferredList) { device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; auto cmdQ = std::make_unique>(context, device.get(), nullptr); cmdQ->setOoqEnabled(); TimestampPacketContainer *deferredTimestampPackets = cmdQ->deferredTimestampPackets.get(); cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(0u, deferredTimestampPackets->peekNodes().size()); cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(1u, deferredTimestampPackets->peekNodes().size()); } HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingBlockingThenTrackOwnershipUntilQueueIsCompleted) { DebugManager.flags.MakeEachEnqueueBlocking.set(true); device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; auto cmdQ = std::make_unique>(context, device.get(), nullptr); TimestampPacketContainer *deferredTimestampPackets = cmdQ->deferredTimestampPackets.get(); cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(0u, deferredTimestampPackets->peekNodes().size()); cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(0u, deferredTimestampPackets->peekNodes().size()); } HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingOnDifferentRootDeviceThenDontProgramSemaphoresOnCsrStream) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto device2 = std::make_unique(Device::create(executionEnvironment, 1u)); device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; device2->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; MockContext context2(device2.get()); auto cmdQ1 = std::make_unique>(context, device.get(), nullptr); auto cmdQ2 = std::make_unique>(&context2, device2.get(), nullptr); const cl_uint eventsOnWaitlist = 6; MockTimestampPacketContainer timestamp3(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp4(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp5(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp6(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 2); UserEvent event1; event1.setStatus(CL_COMPLETE); UserEvent event2; event2.setStatus(CL_COMPLETE); Event event3(cmdQ1.get(), 0, 0, 0); event3.addTimestampPacketNodes(timestamp3); Event event4(cmdQ2.get(), 0, 0, 0); event4.addTimestampPacketNodes(timestamp4); Event event5(cmdQ1.get(), 0, 0, 0); event5.addTimestampPacketNodes(timestamp5); Event event6(cmdQ2.get(), 0, 0, 0); event6.addTimestampPacketNodes(timestamp6); cl_event waitlist[] = {&event1, &event2, &event3, &event4, &event5, &event6}; cmdQ1->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, eventsOnWaitlist, waitlist, nullptr); auto &cmdStream = device->getUltCommandStreamReceiver().commandStream; HardwareParse hwParser; hwParser.parseCommands(cmdStream, 0); auto it = hwParser.cmdList.begin(); while (it != hwParser.cmdList.end()) { auto semaphoreWait = genCmdCast(*it); if (semaphoreWait) { EXPECT_TRUE(UnitTestHelper::isAdditionalMiSemaphoreWait(*semaphoreWait)); } it++; } } HWTEST_F(TimestampPacketTests, givenAllDependencyTypesModeWhenFillingFromDifferentCsrsThenPushEverything) { auto device2 = std::make_unique(Device::create(executionEnvironment, 0u)); auto &csr1 = device->getUltCommandStreamReceiver(); auto &csr2 = device2->getUltCommandStreamReceiver(); csr1.timestampPacketWriteEnabled = true; csr2.timestampPacketWriteEnabled = true; MockContext context2(device2.get()); auto cmdQ1 = std::make_unique>(context, device.get(), nullptr); auto cmdQ2 = std::make_unique>(&context2, device2.get(), nullptr); const cl_uint eventsOnWaitlist = 2; MockTimestampPacketContainer timestamp1(*csr1.getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp2(*csr2.getTimestampPacketAllocator(), 1); Event event1(cmdQ1.get(), 0, 0, 0); event1.addTimestampPacketNodes(timestamp1); Event event2(cmdQ2.get(), 0, 0, 0); event2.addTimestampPacketNodes(timestamp2); cl_event waitlist[] = {&event1, &event2}; EventsRequest eventsRequest(eventsOnWaitlist, waitlist, nullptr); CsrDependencies csrDependencies; eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDependencies, csr1, CsrDependencies::DependenciesType::All); EXPECT_EQ(static_cast(eventsOnWaitlist), csrDependencies.timestampPacketContainer.size()); } HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledOnDifferentCSRsFromOneDeviceWhenEnqueueingThenProgramSemaphoresOnCsrStream) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; auto cmdQ1 = std::make_unique>(context, device.get(), nullptr); // Create second (LOW_PRIORITY) queue on the same device cl_queue_properties props[] = {CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_LOW_KHR, 0}; auto cmdQ2 = std::make_unique>(context, device.get(), props); cmdQ2->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; const cl_uint eventsOnWaitlist = 6; MockTimestampPacketContainer timestamp3(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp4(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp5(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp6(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 2); UserEvent event1; event1.setStatus(CL_COMPLETE); UserEvent event2; event2.setStatus(CL_COMPLETE); Event event3(cmdQ1.get(), 0, 0, 0); event3.addTimestampPacketNodes(timestamp3); Event event4(cmdQ2.get(), 0, 0, 0); event4.addTimestampPacketNodes(timestamp4); Event event5(cmdQ1.get(), 0, 0, 0); event5.addTimestampPacketNodes(timestamp5); Event event6(cmdQ2.get(), 0, 0, 0); event6.addTimestampPacketNodes(timestamp6); cl_event waitlist[] = {&event1, &event2, &event3, &event4, &event5, &event6}; cmdQ1->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, eventsOnWaitlist, waitlist, nullptr); auto &cmdStream = device->getUltCommandStreamReceiver().commandStream; HardwareParse hwParser; hwParser.parseCommands(cmdStream, 0); auto it = hwParser.cmdList.begin(); verifySemaphore(genCmdCast(*it++), timestamp4.getNode(0), 0); verifySemaphore(genCmdCast(*it++), timestamp6.getNode(0), 0); verifySemaphore(genCmdCast(*it++), timestamp6.getNode(1), 0); while (it != hwParser.cmdList.end()) { EXPECT_EQ(nullptr, genCmdCast(*it)); it++; } } HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingBlockedThenProgramSemaphoresOnCsrStreamOnFlush) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto device2 = std::make_unique(Device::create(executionEnvironment, 0u)); device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; device2->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; auto context2 = new MockContext(device2.get()); auto cmdQ1 = clUniquePtr(new MockCommandQueueHw(context, device.get(), nullptr)); auto cmdQ2 = new MockCommandQueueHw(context2, device2.get(), nullptr); MockTimestampPacketContainer timestamp0(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp1(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); UserEvent userEvent; Event event0(cmdQ1.get(), 0, 0, 0); event0.addTimestampPacketNodes(timestamp0); Event event1(cmdQ2, 0, 0, 0); event1.addTimestampPacketNodes(timestamp1); cl_event waitlist[] = {&userEvent, &event0, &event1}; cmdQ1->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 3, waitlist, nullptr); auto &cmdStream = device->getUltCommandStreamReceiver().commandStream; EXPECT_EQ(0u, cmdStream.getUsed()); userEvent.setStatus(CL_COMPLETE); cmdQ1->isQueueBlocked(); cmdQ2->isQueueBlocked(); HardwareParse hwParser; hwParser.parseCommands(cmdStream, 0); auto it = hwParser.cmdList.begin(); verifySemaphore(genCmdCast(*it++), timestamp1.getNode(0), 0); while (it != hwParser.cmdList.end()) { EXPECT_EQ(nullptr, genCmdCast(*it)); it++; } cmdQ2->release(); context2->release(); } HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledOnDifferentCSRsFromOneDeviceWhenEnqueueingBlockedThenProgramSemaphoresOnCsrStreamOnFlush) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto device2 = std::unique_ptr(Device::create(executionEnvironment, 1u)); device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; auto cmdQ1 = clUniquePtr(new MockCommandQueueHw(context, device.get(), nullptr)); // Create second (LOW_PRIORITY) queue on the same device cl_queue_properties props[] = {CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_LOW_KHR, 0}; auto cmdQ2 = clUniquePtr(new MockCommandQueueHw(context, device.get(), props)); cmdQ2->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; MockTimestampPacketContainer timestamp0(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp1(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); UserEvent userEvent; Event event0(cmdQ1.get(), 0, 0, 0); event0.addTimestampPacketNodes(timestamp0); Event event1(cmdQ2.get(), 0, 0, 0); event1.addTimestampPacketNodes(timestamp1); cl_event waitlist[] = {&userEvent, &event0, &event1}; cmdQ1->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 3, waitlist, nullptr); auto &cmdStream = device->getUltCommandStreamReceiver().commandStream; EXPECT_EQ(0u, cmdStream.getUsed()); userEvent.setStatus(CL_COMPLETE); HardwareParse hwParser; hwParser.parseCommands(cmdStream, 0); auto it = hwParser.cmdList.begin(); verifySemaphore(genCmdCast(*it++), timestamp1.getNode(0), 0); while (it != hwParser.cmdList.end()) { EXPECT_EQ(nullptr, genCmdCast(*it)); it++; } cmdQ2->isQueueBlocked(); cmdQ1->isQueueBlocked(); } HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenDispatchingThenProgramSemaphoresForWaitlist) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using WALKER = typename FamilyType::WALKER_TYPE; auto device2 = std::make_unique(Device::create(executionEnvironment, 0u)); device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; device2->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; MockContext context2(device2.get()); MockMultiDispatchInfo multiDispatchInfo(device.get(), std::vector({kernel->mockKernel})); MockCommandQueue mockCmdQ2(&context2, device2.get(), nullptr, false); auto &cmdStream = mockCmdQ->getCS(0); const cl_uint eventsOnWaitlist = 6; MockTimestampPacketContainer timestamp3(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp4(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp5(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 2); MockTimestampPacketContainer timestamp6(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp7(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); UserEvent event1; UserEvent event2; Event event3(mockCmdQ, 0, 0, 0); event3.addTimestampPacketNodes(timestamp3); Event event4(&mockCmdQ2, 0, 0, 0); event4.addTimestampPacketNodes(timestamp4); Event event5(mockCmdQ, 0, 0, 0); event5.addTimestampPacketNodes(timestamp5); Event event6(&mockCmdQ2, 0, 0, 0); event6.addTimestampPacketNodes(timestamp6); cl_event waitlist[] = {&event1, &event2, &event3, &event4, &event5, &event6}; EventsRequest eventsRequest(eventsOnWaitlist, waitlist, nullptr); CsrDependencies csrDeps; eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, mockCmdQ->getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr); HardwareInterface::dispatchWalker( *mockCmdQ, multiDispatchInfo, csrDeps, nullptr, nullptr, nullptr, nullptr, ×tamp7, CL_COMMAND_NDRANGE_KERNEL); HardwareParse hwParser; hwParser.parseCommands(cmdStream, 0); uint32_t semaphoresFound = 0; uint32_t walkersFound = 0; for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) { auto semaphoreCmd = genCmdCast(*it); if (semaphoreCmd) { semaphoresFound++; if (semaphoresFound == 1) { verifySemaphore(semaphoreCmd, timestamp3.getNode(0), 0); } else if (semaphoresFound == 2) { verifySemaphore(semaphoreCmd, timestamp5.getNode(0), 0); } else if (semaphoresFound == 3) { verifySemaphore(semaphoreCmd, timestamp5.getNode(1), 0); } } if (genCmdCast(*it)) { walkersFound++; EXPECT_EQ(3u, semaphoresFound); // semaphores from events programmed before walker } } EXPECT_EQ(1u, walkersFound); EXPECT_EQ(3u, semaphoresFound); // total number of semaphores found in cmdList } HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledOnDifferentCSRsFromOneDeviceWhenDispatchingThenProgramSemaphoresForWaitlist) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using WALKER = typename FamilyType::WALKER_TYPE; device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; MockMultiDispatchInfo multiDispatchInfo(device.get(), std::vector({kernel->mockKernel})); // Create second (LOW_PRIORITY) queue on the same device cl_queue_properties props[] = {CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_LOW_KHR, 0}; auto mockCmdQ2 = std::make_unique>(context, device.get(), props); mockCmdQ2->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; auto &cmdStream = mockCmdQ->getCS(0); const cl_uint eventsOnWaitlist = 6; MockTimestampPacketContainer timestamp3(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp4(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp5(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 2); MockTimestampPacketContainer timestamp6(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp7(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); UserEvent event1; UserEvent event2; Event event3(mockCmdQ, 0, 0, 0); event3.addTimestampPacketNodes(timestamp3); Event event4(mockCmdQ2.get(), 0, 0, 0); event4.addTimestampPacketNodes(timestamp4); Event event5(mockCmdQ, 0, 0, 0); event5.addTimestampPacketNodes(timestamp5); Event event6(mockCmdQ2.get(), 0, 0, 0); event6.addTimestampPacketNodes(timestamp6); cl_event waitlist[] = {&event1, &event2, &event3, &event4, &event5, &event6}; EventsRequest eventsRequest(eventsOnWaitlist, waitlist, nullptr); CsrDependencies csrDeps; eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, mockCmdQ->getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr); HardwareInterface::dispatchWalker( *mockCmdQ, multiDispatchInfo, csrDeps, nullptr, nullptr, nullptr, nullptr, ×tamp7, CL_COMMAND_NDRANGE_KERNEL); HardwareParse hwParser; hwParser.parseCommands(cmdStream, 0); uint32_t semaphoresFound = 0; uint32_t walkersFound = 0; for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) { auto semaphoreCmd = genCmdCast(*it); if (semaphoreCmd) { semaphoresFound++; if (semaphoresFound == 1) { verifySemaphore(semaphoreCmd, timestamp3.getNode(0), 0); } else if (semaphoresFound == 2) { verifySemaphore(semaphoreCmd, timestamp5.getNode(0), 0); } else if (semaphoresFound == 3) { verifySemaphore(semaphoreCmd, timestamp5.getNode(1), 0); } } if (genCmdCast(*it)) { walkersFound++; EXPECT_EQ(3u, semaphoresFound); // semaphores from events programmed before walker } } EXPECT_EQ(1u, walkersFound); EXPECT_EQ(3u, semaphoresFound); // total number of semaphores found in cmdList } HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledAndDependenciesResolvedViaPipeControlsIfPreviousOperationIsBlitThenStillProgramSemaphores) { DebugManagerStateRestore restorer; DebugManager.flags.ResolveDependenciesViaPipeControls.set(1); using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using WALKER = typename FamilyType::WALKER_TYPE; device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; MockMultiDispatchInfo multiDispatchInfo(device.get(), std::vector({kernel->mockKernel})); auto &cmdStream = mockCmdQ->getCS(0); mockCmdQ->updateLatestSentEnqueueType(NEO::EnqueueProperties::Operation::Blit); const cl_uint eventsOnWaitlist = 1; MockTimestampPacketContainer timestamp(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); Event event(mockCmdQ, 0, 0, 0); event.addTimestampPacketNodes(timestamp); cl_event waitlist[] = {&event}; EventsRequest eventsRequest(eventsOnWaitlist, waitlist, nullptr); CsrDependencies csrDeps; eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, mockCmdQ->getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr); HardwareInterface::dispatchWalker( *mockCmdQ, multiDispatchInfo, csrDeps, nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); HardwareParse hwParser; hwParser.parseCommands(cmdStream, 0); uint32_t semaphoresFound = 0; for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) { auto semaphoreCmd = genCmdCast(*it); if (semaphoreCmd) { semaphoresFound++; } } EXPECT_EQ(1u, semaphoresFound); // total number of semaphores found in cmdList } HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledAndDependenciesResolvedViaPipeControlsIfPreviousOperationIsGPUKernelThenDoNotProgramSemaphores) { DebugManagerStateRestore restorer; DebugManager.flags.ResolveDependenciesViaPipeControls.set(1); using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using WALKER = typename FamilyType::WALKER_TYPE; device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; MockMultiDispatchInfo multiDispatchInfo(device.get(), std::vector({kernel->mockKernel})); auto &cmdStream = mockCmdQ->getCS(0); mockCmdQ->updateLatestSentEnqueueType(NEO::EnqueueProperties::Operation::GpuKernel); const cl_uint eventsOnWaitlist = 1; MockTimestampPacketContainer timestamp(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); Event event(mockCmdQ, 0, 0, 0); event.addTimestampPacketNodes(timestamp); cl_event waitlist[] = {&event}; EventsRequest eventsRequest(eventsOnWaitlist, waitlist, nullptr); CsrDependencies csrDeps; eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, mockCmdQ->getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr); HardwareInterface::dispatchWalker( *mockCmdQ, multiDispatchInfo, csrDeps, nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); HardwareParse hwParser; hwParser.parseCommands(cmdStream, 0); uint32_t semaphoresFound = 0; for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) { auto semaphoreCmd = genCmdCast(*it); if (semaphoreCmd) { semaphoresFound++; } } EXPECT_EQ(0u, semaphoresFound); } HWTEST_F(TimestampPacketTests, givenAlreadyAssignedNodeWhenEnqueueingNonBlockedThenMakeItResident) { auto mockTagAllocator = new MockTagAllocator<>(device->getRootDeviceIndex(), executionEnvironment->memoryManager.get(), 1); auto &csr = device->getUltCommandStreamReceiver(); csr.timestampPacketAllocator.reset(mockTagAllocator); csr.timestampPacketWriteEnabled = true; auto cmdQ = std::make_unique>(context, device.get(), nullptr); TimestampPacketContainer previousNodes; cmdQ->obtainNewTimestampPacketNodes(1, previousNodes, false, cmdQ->getGpgpuCommandStreamReceiver()); auto firstNode = cmdQ->timestampPacketContainer->peekNodes().at(0); csr.storeMakeResidentAllocations = true; csr.timestampPacketWriteEnabled = true; cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); auto secondNode = cmdQ->timestampPacketContainer->peekNodes().at(0); EXPECT_NE(firstNode->getBaseGraphicsAllocation(), secondNode->getBaseGraphicsAllocation()); EXPECT_TRUE(csr.isMadeResident(firstNode->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation(), csr.taskCount)); } HWTEST_F(TimestampPacketTests, givenAlreadyAssignedNodeWhenEnqueueingBlockedThenMakeItResident) { auto mockTagAllocator = new MockTagAllocator<>(device->getRootDeviceIndex(), executionEnvironment->memoryManager.get(), 1); auto &csr = device->getUltCommandStreamReceiver(); csr.timestampPacketAllocator.reset(mockTagAllocator); csr.timestampPacketWriteEnabled = true; auto cmdQ = clUniquePtr(new MockCommandQueueHw(context, device.get(), nullptr)); TimestampPacketContainer previousNodes; cmdQ->obtainNewTimestampPacketNodes(1, previousNodes, false, cmdQ->getGpgpuCommandStreamReceiver()); auto firstNode = cmdQ->timestampPacketContainer->peekNodes().at(0); csr.storeMakeResidentAllocations = true; csr.timestampPacketWriteEnabled = true; UserEvent userEvent; cl_event clEvent = &userEvent; cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 1, &clEvent, nullptr); auto secondNode = cmdQ->timestampPacketContainer->peekNodes().at(0); EXPECT_NE(firstNode->getBaseGraphicsAllocation(), secondNode->getBaseGraphicsAllocation()); EXPECT_FALSE(csr.isMadeResident(firstNode->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation(), csr.taskCount)); userEvent.setStatus(CL_COMPLETE); EXPECT_TRUE(csr.isMadeResident(firstNode->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation(), csr.taskCount)); cmdQ->isQueueBlocked(); } HWTEST_F(TimestampPacketTests, givenAlreadyAssignedNodeWhenEnqueueingThenKeepDependencyOnPreviousNodeIfItsNotReady) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; MockTimestampPacketContainer firstNode(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 0); MockCommandQueueHw cmdQ(context, device.get(), nullptr); TimestampPacketContainer previousNodes; cmdQ.obtainNewTimestampPacketNodes(2, previousNodes, false, cmdQ.getGpgpuCommandStreamReceiver()); firstNode.add(cmdQ.timestampPacketContainer->peekNodes().at(0)); firstNode.add(cmdQ.timestampPacketContainer->peekNodes().at(1)); auto firstTag0 = firstNode.getNode(0); auto firstTag1 = firstNode.getNode(1); cmdQ.enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); HardwareParse hwParser; hwParser.parseCommands(*cmdQ.commandStream, 0); auto it = hwParser.cmdList.begin(); verifySemaphore(genCmdCast(*it), firstTag0, 0); verifySemaphore(genCmdCast(*++it), firstTag1, 0); it++; while (it != hwParser.cmdList.end()) { auto semaphoreWait = genCmdCast(*it); if (semaphoreWait) { EXPECT_TRUE(UnitTestHelper::isAdditionalMiSemaphoreWait(*semaphoreWait)); } it++; } } HWTEST_F(TimestampPacketTests, givenAlreadyAssignedNodeWhenEnqueueingToOoqThenDontKeepDependencyOnPreviousNodeIfItsNotReady) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; cl_queue_properties properties[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0}; MockCommandQueueHw cmdQ(context, device.get(), properties); TimestampPacketContainer previousNodes; cmdQ.obtainNewTimestampPacketNodes(1, previousNodes, false, cmdQ.getGpgpuCommandStreamReceiver()); cmdQ.enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); HardwareParse hwParser; hwParser.parseCommands(*cmdQ.commandStream, 0); uint32_t semaphoresFound = 0; for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) { if (genCmdCast(*it)) { semaphoresFound++; } } uint32_t expectedSemaphoresCount = (UnitTestHelper::isAdditionalMiSemaphoreWaitRequired(device->getHardwareInfo()) ? 1 : 0); EXPECT_EQ(expectedSemaphoresCount, semaphoresFound); } HWTEST_F(TimestampPacketTests, givenAlreadyAssignedNodeWhenEnqueueingWithOmitTimestampPacketDependenciesThenDontKeepDependencyOnPreviousNodeIfItsNotReady) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; DebugManagerStateRestore restore; DebugManager.flags.OmitTimestampPacketDependencies.set(true); MockCommandQueueHw cmdQ(context, device.get(), nullptr); TimestampPacketContainer previousNodes; cmdQ.obtainNewTimestampPacketNodes(1, previousNodes, false, cmdQ.getGpgpuCommandStreamReceiver()); cmdQ.enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); HardwareParse hwParser; hwParser.parseCommands(*cmdQ.commandStream, 0); uint32_t semaphoresFound = 0; for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) { if (genCmdCast(*it)) { semaphoresFound++; } } uint32_t expectedSemaphoresCount = (UnitTestHelper::isAdditionalMiSemaphoreWaitRequired(device->getHardwareInfo()) ? 1 : 0); EXPECT_EQ(expectedSemaphoresCount, semaphoresFound); } HWTEST_F(TimestampPacketTests, givenEventsWaitlistFromDifferentDevicesWhenEnqueueingThenMakeAllTimestampsResident) { MockTagAllocator> tagAllocator(device->getRootDeviceIndex(), executionEnvironment->memoryManager.get(), 1, 1, sizeof(TimestampPackets), false, device->getDeviceBitfield()); auto device2 = std::make_unique(Device::create(executionEnvironment, 0u)); auto &ultCsr = device->getUltCommandStreamReceiver(); ultCsr.timestampPacketWriteEnabled = true; ultCsr.storeMakeResidentAllocations = true; device2->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; MockContext context2(device2.get()); auto cmdQ1 = std::make_unique>(context, device.get(), nullptr); auto cmdQ2 = std::make_unique>(&context2, device2.get(), nullptr); MockTimestampPacketContainer node1(*ultCsr.getTimestampPacketAllocator(), 0); MockTimestampPacketContainer node2(*ultCsr.getTimestampPacketAllocator(), 0); auto tagNode1 = tagAllocator.getTag(); node1.add(tagNode1); auto tagNode2 = tagAllocator.getTag(); node2.add(tagNode2); Event event0(cmdQ1.get(), 0, 0, 0); event0.addTimestampPacketNodes(node1); Event event1(cmdQ2.get(), 0, 0, 0); event1.addTimestampPacketNodes(node2); cl_event waitlist[] = {&event0, &event1}; cmdQ1->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 2, waitlist, nullptr); EXPECT_NE(tagNode1->getBaseGraphicsAllocation(), tagNode2->getBaseGraphicsAllocation()); EXPECT_TRUE(ultCsr.isMadeResident(tagNode1->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation(), ultCsr.taskCount)); EXPECT_TRUE(ultCsr.isMadeResident(tagNode2->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation(), ultCsr.taskCount)); } HWTEST_F(TimestampPacketTests, givenEventsWaitlistFromDifferentCSRsWhenEnqueueingThenMakeAllTimestampsResident) { MockTagAllocator> tagAllocator(device->getRootDeviceIndex(), executionEnvironment->memoryManager.get(), 1, 1, sizeof(TimestampPackets), false, device->getDeviceBitfield()); auto &ultCsr = device->getUltCommandStreamReceiver(); ultCsr.timestampPacketWriteEnabled = true; ultCsr.storeMakeResidentAllocations = true; auto cmdQ1 = std::make_unique>(context, device.get(), nullptr); // Create second (LOW_PRIORITY) queue on the same device cl_queue_properties props[] = {CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_LOW_KHR, 0}; auto cmdQ2 = std::make_unique>(context, device.get(), props); cmdQ2->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; MockTimestampPacketContainer node1(*ultCsr.getTimestampPacketAllocator(), 0); MockTimestampPacketContainer node2(*ultCsr.getTimestampPacketAllocator(), 0); auto tagNode1 = tagAllocator.getTag(); node1.add(tagNode1); auto tagNode2 = tagAllocator.getTag(); node2.add(tagNode2); Event event0(cmdQ1.get(), 0, 0, 0); event0.addTimestampPacketNodes(node1); Event event1(cmdQ2.get(), 0, 0, 0); event1.addTimestampPacketNodes(node2); cl_event waitlist[] = {&event0, &event1}; cmdQ1->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 2, waitlist, nullptr); EXPECT_NE(tagNode1->getBaseGraphicsAllocation(), tagNode2->getBaseGraphicsAllocation()); EXPECT_TRUE(ultCsr.isMadeResident(tagNode1->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation(), ultCsr.taskCount)); EXPECT_TRUE(ultCsr.isMadeResident(tagNode2->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation(), ultCsr.taskCount)); } HWTEST_F(TimestampPacketTests, givenTimestampPacketWhenEnqueueingNonBlockedThenMakeItResident) { auto &csr = device->getUltCommandStreamReceiver(); csr.timestampPacketWriteEnabled = true; csr.storeMakeResidentAllocations = true; MockKernelWithInternals mockKernel(*device, context); MockCommandQueueHw cmdQ(context, device.get(), nullptr); cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); auto timestampPacketNode = cmdQ.timestampPacketContainer->peekNodes().at(0); EXPECT_TRUE(csr.isMadeResident(timestampPacketNode->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation(), csr.taskCount)); } HWTEST_F(TimestampPacketTests, givenTimestampPacketWhenEnqueueingBlockedThenMakeItResidentOnSubmit) { auto &csr = device->getUltCommandStreamReceiver(); csr.timestampPacketWriteEnabled = true; MockKernelWithInternals mockKernel(*device, context); auto cmdQ = clUniquePtr(new MockCommandQueueHw(context, device.get(), nullptr)); csr.storeMakeResidentAllocations = true; UserEvent userEvent; cl_event clEvent = &userEvent; cmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 1, &clEvent, nullptr); auto timestampPacketNode = cmdQ->timestampPacketContainer->peekNodes().at(0); EXPECT_FALSE(csr.isMadeResident(timestampPacketNode->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation(), csr.taskCount)); userEvent.setStatus(CL_COMPLETE); EXPECT_TRUE(csr.isMadeResident(timestampPacketNode->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation(), csr.taskCount)); cmdQ->isQueueBlocked(); } HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingBlockedThenVirtualEventIncrementsRefInternalAndDecrementsAfterCompleteEvent) { auto &csr = device->getUltCommandStreamReceiver(); csr.timestampPacketWriteEnabled = true; MockKernelWithInternals mockKernelWithInternals(*device, context); auto mockKernel = mockKernelWithInternals.mockKernel; auto cmdQ = clUniquePtr(new MockCommandQueueHw(context, device.get(), nullptr)); UserEvent userEvent; cl_event waitlist = &userEvent; auto internalCount = userEvent.getRefInternalCount(); cmdQ->enqueueKernel(mockKernel, 1, nullptr, gws, nullptr, 1, &waitlist, nullptr); EXPECT_EQ(internalCount + 1, userEvent.getRefInternalCount()); userEvent.setStatus(CL_COMPLETE); cmdQ->isQueueBlocked(); EXPECT_EQ(internalCount, mockKernel->getRefInternalCount()); } TEST_F(TimestampPacketTests, givenDispatchSizeWhenAskingForNewTimestampsThenObtainEnoughTags) { size_t dispatchSize = 3; mockCmdQ->timestampPacketContainer = std::make_unique(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 0); EXPECT_EQ(0u, mockCmdQ->timestampPacketContainer->peekNodes().size()); TimestampPacketContainer previousNodes; mockCmdQ->obtainNewTimestampPacketNodes(dispatchSize, previousNodes, false, mockCmdQ->getGpgpuCommandStreamReceiver()); EXPECT_EQ(dispatchSize, mockCmdQ->timestampPacketContainer->peekNodes().size()); } HWTEST_F(TimestampPacketTests, givenWaitlistAndOutputEventWhenEnqueueingWithoutKernelThenInheritTimestampPacketsWithoutSubmitting) { device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; auto cmdQ = clUniquePtr(new MockCommandQueueHw(context, device.get(), nullptr)); MockKernelWithInternals mockKernel(*device, context); cmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); // obtain first TimestampPackets TimestampPacketContainer cmdQNodes; cmdQNodes.assignAndIncrementNodesRefCounts(*cmdQ->timestampPacketContainer); MockTimestampPacketContainer node1(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer node2(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); Event event0(cmdQ.get(), 0, 0, 0); event0.addTimestampPacketNodes(node1); Event event1(cmdQ.get(), 0, 0, 0); event1.addTimestampPacketNodes(node2); UserEvent userEvent; Event eventWithoutContainer(nullptr, 0, 0, 0); uint32_t numEventsWithContainer = 2; uint32_t numEventsOnWaitlist = numEventsWithContainer + 2; // UserEvent + eventWithoutContainer cl_event waitlist[] = {&event0, &event1, &userEvent, &eventWithoutContainer}; cl_event clOutEvent; cmdQ->enqueueMarkerWithWaitList(numEventsOnWaitlist, waitlist, &clOutEvent); auto outEvent = castToObject(clOutEvent); EXPECT_EQ(cmdQ->timestampPacketContainer->peekNodes().at(0), cmdQNodes.peekNodes().at(0)); // no new nodes obtained EXPECT_EQ(1u, cmdQ->timestampPacketContainer->peekNodes().size()); auto &eventsNodes = outEvent->getTimestampPacketNodes()->peekNodes(); EXPECT_EQ(numEventsWithContainer + 1, eventsNodes.size()); // numEventsWithContainer + command queue EXPECT_EQ(cmdQNodes.peekNodes().at(0), eventsNodes.at(0)); EXPECT_EQ(event0.getTimestampPacketNodes()->peekNodes().at(0), eventsNodes.at(1)); EXPECT_EQ(event1.getTimestampPacketNodes()->peekNodes().at(0), eventsNodes.at(2)); clReleaseEvent(clOutEvent); userEvent.setStatus(CL_COMPLETE); cmdQ->isQueueBlocked(); } HWTEST_F(TimestampPacketTests, givenBlockedEnqueueWithoutKernelWhenSubmittingThenDispatchBlockedCommands) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto mockCsr = new MockCsrHw2(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield()); device->resetCommandStreamReceiver(mockCsr); mockCsr->timestampPacketWriteEnabled = true; mockCsr->storeFlushedTaskStream = true; auto cmdQ0 = clUniquePtr(new MockCommandQueueHw(context, device.get(), nullptr)); auto &secondEngine = device->getEngine(getChosenEngineType(device->getHardwareInfo()), EngineUsage::LowPriority); static_cast *>(secondEngine.commandStreamReceiver)->timestampPacketWriteEnabled = true; auto cmdQ1 = clUniquePtr(new MockCommandQueueHw(context, device.get(), nullptr)); cmdQ1->gpgpuEngine = &secondEngine; cmdQ1->timestampPacketContainer = std::make_unique(); EXPECT_NE(&cmdQ0->getGpgpuCommandStreamReceiver(), &cmdQ1->getGpgpuCommandStreamReceiver()); MockTimestampPacketContainer node0(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer node1(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); Event event0(cmdQ0.get(), 0, 0, 0); // on the same CSR event0.addTimestampPacketNodes(node0); Event event1(cmdQ1.get(), 0, 0, 0); // on different CSR event1.addTimestampPacketNodes(node1); uint32_t numEventsOnWaitlist = 3; uint32_t commands[] = {CL_COMMAND_MARKER, CL_COMMAND_BARRIER}; for (int i = 0; i < 2; i++) { UserEvent userEvent; cl_event waitlist[] = {&event0, &event1, &userEvent}; if (commands[i] == CL_COMMAND_MARKER) { cmdQ0->enqueueMarkerWithWaitList(numEventsOnWaitlist, waitlist, nullptr); } else if (commands[i] == CL_COMMAND_BARRIER) { cmdQ0->enqueueBarrierWithWaitList(numEventsOnWaitlist, waitlist, nullptr); } else { EXPECT_TRUE(false); } auto initialCsrStreamOffset = mockCsr->commandStream.getUsed(); userEvent.setStatus(CL_COMPLETE); HardwareParse hwParserCsr; HardwareParse hwParserCmdQ; LinearStream taskStream(mockCsr->storedTaskStream.get(), mockCsr->storedTaskStreamSize); taskStream.getSpace(mockCsr->storedTaskStreamSize); hwParserCsr.parseCommands(mockCsr->commandStream, initialCsrStreamOffset); hwParserCmdQ.parseCommands(taskStream, 0); auto queueSemaphores = findAll(hwParserCmdQ.cmdList.begin(), hwParserCmdQ.cmdList.end()); auto expectedQueueSemaphoresCount = 1u; if (UnitTestHelper::isAdditionalMiSemaphoreWaitRequired(device->getHardwareInfo())) { expectedQueueSemaphoresCount += 1; } EXPECT_EQ(expectedQueueSemaphoresCount, queueSemaphores.size()); verifySemaphore(genCmdCast(*(queueSemaphores[0])), node0.getNode(0), 0); auto csrSemaphores = findAll(hwParserCsr.cmdList.begin(), hwParserCsr.cmdList.end()); EXPECT_EQ(1u, csrSemaphores.size()); verifySemaphore(genCmdCast(*(csrSemaphores[0])), node1.getNode(0), 0); EXPECT_TRUE(mockCsr->passedDispatchFlags.blocking); EXPECT_TRUE(mockCsr->passedDispatchFlags.guardCommandBufferWithPipeControl); EXPECT_EQ(device->getPreemptionMode(), mockCsr->passedDispatchFlags.preemptionMode); cmdQ0->isQueueBlocked(); } } HWTEST_F(TimestampPacketTests, givenWaitlistAndOutputEventWhenEnqueueingMarkerWithoutKernelThenInheritTimestampPacketsAndProgramSemaphores) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto device2 = std::make_unique(Device::create(executionEnvironment, 0u)); device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; device2->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; MockContext context2(device2.get()); auto cmdQ = clUniquePtr(new MockCommandQueueHw(context, device.get(), nullptr)); auto cmdQ2 = std::make_unique>(&context2, device2.get(), nullptr); MockTimestampPacketContainer node1(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer node2(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); Event event0(cmdQ.get(), 0, 0, 0); event0.addTimestampPacketNodes(node1); Event event1(cmdQ2.get(), 0, 0, 0); event1.addTimestampPacketNodes(node2); uint32_t numEventsOnWaitlist = 2; cl_event waitlist[] = {&event0, &event1}; cmdQ->enqueueMarkerWithWaitList(numEventsOnWaitlist, waitlist, nullptr); HardwareParse hwParserCsr; HardwareParse hwParserCmdQ; hwParserCsr.parseCommands(device->getUltCommandStreamReceiver().commandStream, 0); hwParserCmdQ.parseCommands(*cmdQ->commandStream, 0); auto csrSemaphores = findAll(hwParserCsr.cmdList.begin(), hwParserCsr.cmdList.end()); EXPECT_EQ(1u, csrSemaphores.size()); verifySemaphore(genCmdCast(*(csrSemaphores[0])), node2.getNode(0), 0); auto queueSemaphores = findAll(hwParserCmdQ.cmdList.begin(), hwParserCmdQ.cmdList.end()); auto expectedQueueSemaphoresCount = 1u; if (UnitTestHelper::isAdditionalMiSemaphoreWaitRequired(device->getHardwareInfo())) { expectedQueueSemaphoresCount += 1; } EXPECT_EQ(expectedQueueSemaphoresCount, queueSemaphores.size()); verifySemaphore(genCmdCast(*(queueSemaphores[0])), node1.getNode(0), 0); } HWTEST_F(TimestampPacketTests, givenWaitlistAndOutputEventWhenEnqueueingBarrierWithoutKernelThenInheritTimestampPacketsAndProgramSemaphores) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto device2 = std::make_unique(Device::create(executionEnvironment, 0u)); device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; device2->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; MockContext context2(device2.get()); auto cmdQ = clUniquePtr(new MockCommandQueueHw(context, device.get(), nullptr)); auto cmdQ2 = std::make_unique>(&context2, device2.get(), nullptr); MockTimestampPacketContainer node1(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer node2(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); Event event0(cmdQ.get(), 0, 0, 0); event0.addTimestampPacketNodes(node1); Event event1(cmdQ2.get(), 0, 0, 0); event1.addTimestampPacketNodes(node2); uint32_t numEventsOnWaitlist = 2; cl_event waitlist[] = {&event0, &event1}; cmdQ->enqueueBarrierWithWaitList(numEventsOnWaitlist, waitlist, nullptr); HardwareParse hwParserCsr; HardwareParse hwParserCmdQ; hwParserCsr.parseCommands(device->getUltCommandStreamReceiver().commandStream, 0); hwParserCmdQ.parseCommands(*cmdQ->commandStream, 0); auto csrSemaphores = findAll(hwParserCsr.cmdList.begin(), hwParserCsr.cmdList.end()); EXPECT_EQ(1u, csrSemaphores.size()); verifySemaphore(genCmdCast(*(csrSemaphores[0])), node2.getNode(0), 0); auto queueSemaphores = findAll(hwParserCmdQ.cmdList.begin(), hwParserCmdQ.cmdList.end()); auto expectedQueueSemaphoresCount = 1u; if (UnitTestHelper::isAdditionalMiSemaphoreWaitRequired(device->getHardwareInfo())) { expectedQueueSemaphoresCount += 1; } EXPECT_EQ(expectedQueueSemaphoresCount, queueSemaphores.size()); verifySemaphore(genCmdCast(*(queueSemaphores[0])), node1.getNode(0), 0); } compute-runtime-22.14.22890/opencl/test/unit_test/helpers/timestamp_packet_2_tests.cpp000066400000000000000000000374741422164147700310110ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/array_count.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/mocks/mock_command_stream_receiver.h" #include "shared/test/common/mocks/mock_timestamp_container.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/event/user_event.h" #include "opencl/test/unit_test/helpers/timestamp_packet_tests.h" #include "opencl/test/unit_test/mocks/mock_event.h" using namespace NEO; HWTEST_F(TimestampPacketTests, givenEmptyWaitlistAndNoOutputEventWhenEnqueueingMarkerThenDoNothing) { auto &csr = device->getUltCommandStreamReceiver(); csr.timestampPacketWriteEnabled = true; auto cmdQ = clUniquePtr(new MockCommandQueueHw(context, device.get(), nullptr)); cmdQ->enqueueMarkerWithWaitList(0, nullptr, nullptr); EXPECT_EQ(0u, cmdQ->timestampPacketContainer->peekNodes().size()); EXPECT_FALSE(csr.stallingCommandsOnNextFlushRequired); } HWTEST_F(TimestampPacketTests, givenEmptyWaitlistAndEventWhenEnqueueingMarkerWithProfilingEnabledThenObtainNewNode) { auto &csr = device->getUltCommandStreamReceiver(); csr.timestampPacketWriteEnabled = true; auto cmdQ = clUniquePtr(new MockCommandQueueHw(context, device.get(), nullptr)); cmdQ->setProfilingEnabled(); cl_event event; cmdQ->enqueueMarkerWithWaitList(0, nullptr, &event); EXPECT_EQ(1u, cmdQ->timestampPacketContainer->peekNodes().size()); clReleaseEvent(event); } template class MockCommandStreamReceiverHW : public UltCommandStreamReceiver { public: MockCommandStreamReceiverHW(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield) : UltCommandStreamReceiver::UltCommandStreamReceiver(executionEnvironment, rootDeviceIndex, deviceBitfield) {} CompletionStamp flushTask( LinearStream &commandStream, size_t commandStreamStart, const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, uint32_t taskLevel, DispatchFlags &dispatchFlags, Device &device) override { stream = &commandStream; return UltCommandStreamReceiver::flushTask( commandStream, commandStreamStart, dsh, ioh, ssh, taskLevel, dispatchFlags, device); } LinearStream *stream = nullptr; }; HWTEST_F(TimestampPacketTests, givenEmptyWaitlistAndEventWhenMarkerProfilingEnabledThenPipeControlAddedBeforeWritingTimestamp) { using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; auto commandStreamReceiver = std::make_unique>(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield()); auto commandStreamReceiverPtr = commandStreamReceiver.get(); commandStreamReceiver->timestampPacketWriteEnabled = true; device->resetCommandStreamReceiver(commandStreamReceiver.release()); auto cmdQ = clUniquePtr(new MockCommandQueueHw(context, device.get(), nullptr)); cmdQ->setProfilingEnabled(); cl_event event; cmdQ->enqueueMarkerWithWaitList(0, nullptr, &event); HardwareParse hwParser; hwParser.parseCommands(*(commandStreamReceiverPtr->stream), 0); GenCmdList storeRegMemList = hwParser.getCommandsList(); EXPECT_EQ(4u, storeRegMemList.size()); auto storeRegMemIt = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); EXPECT_NE(storeRegMemIt, hwParser.cmdList.end()); auto pipeControlIt = find(hwParser.cmdList.begin(), storeRegMemIt); EXPECT_NE(storeRegMemIt, pipeControlIt); EXPECT_NE(hwParser.cmdList.end(), pipeControlIt); clReleaseEvent(event); } HWCMDTEST_F(IGFX_XE_HP_CORE, TimestampPacketTests, givenEmptyWaitlistAndEventWhenMarkerProfilingEnabledOnMultiTileCommandQueueThenCrossTileBarrierAddedBeforeWritingTimestamp) { using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using MI_ATOMIC = typename FamilyType::MI_ATOMIC; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto commandStreamReceiver = std::make_unique>(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield()); auto commandStreamReceiverPtr = commandStreamReceiver.get(); commandStreamReceiver->timestampPacketWriteEnabled = true; commandStreamReceiver->activePartitions = 2; commandStreamReceiver->activePartitionsConfig = 2; commandStreamReceiver->staticWorkPartitioningEnabled = true; device->resetCommandStreamReceiver(commandStreamReceiver.release()); *ptrOffset(commandStreamReceiverPtr->tagAddress, commandStreamReceiverPtr->postSyncWriteOffset) = *commandStreamReceiverPtr->tagAddress; auto cmdQ = clUniquePtr(new MockCommandQueueHw(context, device.get(), nullptr)); cmdQ->setProfilingEnabled(); cl_event event; cmdQ->enqueueMarkerWithWaitList(0, nullptr, &event); HardwareParse hwParser; hwParser.parseCommands(*(commandStreamReceiverPtr->stream), 0); GenCmdList storeRegMemList = hwParser.getCommandsList(); EXPECT_EQ(4u, storeRegMemList.size()); auto storeRegMemIt = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); EXPECT_NE(storeRegMemIt, hwParser.cmdList.end()); GenCmdList::reverse_iterator rItorStoreRegMemIt(storeRegMemIt); auto pipeControlIt = reverse_find(rItorStoreRegMemIt, hwParser.cmdList.rbegin()); auto pipeControl = genCmdCast(*pipeControlIt); EXPECT_NE(nullptr, pipeControl); GenCmdList::iterator cmdIt = pipeControlIt.base(); auto miAtomic = genCmdCast(*cmdIt); EXPECT_NE(nullptr, miAtomic); cmdIt++; auto miSemaphore = genCmdCast(*cmdIt); EXPECT_NE(nullptr, miSemaphore); cmdIt++; auto bbStart = genCmdCast(*cmdIt); EXPECT_NE(nullptr, bbStart); clReleaseEvent(event); } HWTEST_F(TimestampPacketTests, givenWithWaitlistAndEventWhenMarkerProfilingEnabledThenPipeControllNotAddedBeforeWritingTimestamp) { auto commandStreamReceiver = std::make_unique>(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield()); auto commandStreamReceiverPtr = commandStreamReceiver.get(); commandStreamReceiver->timestampPacketWriteEnabled = true; device->resetCommandStreamReceiver(commandStreamReceiver.release()); auto cmdQ = clUniquePtr(new MockCommandQueueHw(context, device.get(), nullptr)); cmdQ->setProfilingEnabled(); cl_event event; MockEvent events[] = { {cmdQ.get(), CL_COMMAND_READ_BUFFER, 0, 0}, {cmdQ.get(), CL_COMMAND_READ_BUFFER, 0, 0}, {cmdQ.get(), CL_COMMAND_READ_BUFFER, 0, 0}, }; const cl_event waitList[] = {events, events + 1, events + 2}; const cl_uint waitListSize = static_cast(arrayCount(waitList)); cmdQ->enqueueMarkerWithWaitList(waitListSize, waitList, &event); HardwareParse hwParser; hwParser.parseCommands(*(commandStreamReceiverPtr->stream), 0); auto storeRegMemIt = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); EXPECT_NE(storeRegMemIt, hwParser.cmdList.end()); auto pipeControlIt = find(hwParser.cmdList.begin(), storeRegMemIt); EXPECT_EQ(storeRegMemIt, pipeControlIt); clReleaseEvent(event); } HWTEST_F(TimestampPacketTests, whenEnqueueingBarrierThenRequestPipeControlOnCsrFlush) { auto &csr = device->getUltCommandStreamReceiver(); csr.timestampPacketWriteEnabled = true; EXPECT_FALSE(csr.stallingCommandsOnNextFlushRequired); MockCommandQueueHw cmdQ(context, device.get(), nullptr); MockKernelWithInternals mockKernel(*device, context); cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); // obtain first TimestampPackets TimestampPacketContainer cmdQNodes; cmdQNodes.assignAndIncrementNodesRefCounts(*cmdQ.timestampPacketContainer); cmdQ.enqueueBarrierWithWaitList(0, nullptr, nullptr); EXPECT_EQ(cmdQ.timestampPacketContainer->peekNodes().at(0), cmdQNodes.peekNodes().at(0)); // dont obtain new node EXPECT_EQ(1u, cmdQ.timestampPacketContainer->peekNodes().size()); EXPECT_TRUE(csr.stallingCommandsOnNextFlushRequired); } HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteDisabledWhenEnqueueingBarrierThenDontRequestPipeControlOnCsrFlush) { auto &csr = device->getUltCommandStreamReceiver(); csr.timestampPacketWriteEnabled = false; EXPECT_FALSE(csr.stallingCommandsOnNextFlushRequired); MockCommandQueueHw cmdQ(context, device.get(), nullptr); cmdQ.enqueueBarrierWithWaitList(0, nullptr, nullptr); EXPECT_FALSE(csr.stallingCommandsOnNextFlushRequired); } HWTEST_F(TimestampPacketTests, givenBlockedQueueWhenEnqueueingBarrierThenRequestPipeControlOnCsrFlush) { auto &csr = device->getUltCommandStreamReceiver(); csr.timestampPacketWriteEnabled = true; EXPECT_FALSE(csr.stallingCommandsOnNextFlushRequired); MockCommandQueueHw cmdQ(context, device.get(), nullptr); auto userEvent = make_releaseable(); cl_event waitlist[] = {userEvent.get()}; cmdQ.enqueueBarrierWithWaitList(1, waitlist, nullptr); EXPECT_TRUE(csr.stallingCommandsOnNextFlushRequired); userEvent->setStatus(CL_COMPLETE); } HWTEST_F(TimestampPacketTests, givenPipeControlRequestWhenEstimatingCsrStreamSizeThenAddSizeForPipeControl) { auto &csr = device->getUltCommandStreamReceiver(); DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags(); csr.stallingCommandsOnNextFlushRequired = false; auto sizeWithoutPcRequest = device->getUltCommandStreamReceiver().getRequiredCmdStreamSize(flags, device->getDevice()); csr.stallingCommandsOnNextFlushRequired = true; auto sizeWithPcRequest = device->getUltCommandStreamReceiver().getRequiredCmdStreamSize(flags, device->getDevice()); size_t extendedSize = sizeWithoutPcRequest + sizeof(typename FamilyType::PIPE_CONTROL); EXPECT_EQ(sizeWithPcRequest, extendedSize); } HWTEST_F(TimestampPacketTests, givenPipeControlRequestWithBarrierWriteWhenEstimatingCsrStreamSizeThenAddSizeForPipeControlForWrite) { auto &csr = device->getUltCommandStreamReceiver(); DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags(); TimestampPacketContainer barrierTimestampPacketNode; barrierTimestampPacketNode.add(csr.getTimestampPacketAllocator()->getTag()); flags.barrierTimestampPacketNodes = &barrierTimestampPacketNode; csr.stallingCommandsOnNextFlushRequired = false; auto sizeWithoutPcRequest = device->getUltCommandStreamReceiver().getRequiredCmdStreamSize(flags, device->getDevice()); csr.stallingCommandsOnNextFlushRequired = true; auto sizeWithPcRequest = device->getUltCommandStreamReceiver().getRequiredCmdStreamSize(flags, device->getDevice()); size_t extendedSize = sizeWithoutPcRequest + MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(device->getHardwareInfo()); EXPECT_EQ(sizeWithPcRequest, extendedSize); } HWTEST_F(TimestampPacketTests, givenInstructionCacheRequesWhenSizeIsEstimatedThenPipeControlIsAdded) { auto &csr = device->getUltCommandStreamReceiver(); DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags(); csr.requiresInstructionCacheFlush = false; auto sizeWithoutPcRequest = device->getUltCommandStreamReceiver().getRequiredCmdStreamSize(flags, device->getDevice()); csr.requiresInstructionCacheFlush = true; auto sizeWithPcRequest = device->getUltCommandStreamReceiver().getRequiredCmdStreamSize(flags, device->getDevice()); size_t extendedSize = sizeWithoutPcRequest + sizeof(typename FamilyType::PIPE_CONTROL); EXPECT_EQ(sizeWithPcRequest, extendedSize); } HWTEST_F(TimestampPacketTests, givenPipeControlRequestWhenFlushingThenProgramPipeControlAndResetRequestFlag) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; auto &csr = device->getUltCommandStreamReceiver(); csr.stallingCommandsOnNextFlushRequired = true; csr.timestampPacketWriteEnabled = true; MockCommandQueueHw cmdQ(context, device.get(), nullptr); MockKernelWithInternals mockKernel(*device, context); cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_FALSE(csr.stallingCommandsOnNextFlushRequired); HardwareParse hwParser; hwParser.parsePipeControl = true; hwParser.parseCommands(csr.commandStream, 0); hwParser.findHardwareCommands(); auto secondEnqueueOffset = csr.commandStream.getUsed(); auto pipeControl = genCmdCast(*hwParser.pipeControlList.begin()); ASSERT_NE(nullptr, pipeControl); EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_NO_WRITE, pipeControl->getPostSyncOperation()); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(secondEnqueueOffset, csr.commandStream.getUsed()); // nothing programmed when flag is not set } HWTEST_F(TimestampPacketTests, givenKernelWhichDoesntRequireFlushWhenEnqueueingKernelThenOneNodeIsCreated) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(false); auto &csr = device->getUltCommandStreamReceiver(); csr.timestampPacketWriteEnabled = true; auto mockTagAllocator = new MockTagAllocator<>(device->getRootDeviceIndex(), executionEnvironment->memoryManager.get()); csr.timestampPacketAllocator.reset(mockTagAllocator); auto cmdQ = std::make_unique>(context, device.get(), nullptr); // obtain first node for cmdQ and event1 cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); auto size = cmdQ->timestampPacketContainer->peekNodes().size(); EXPECT_EQ(size, 1u); } HWTEST_F(TimestampPacketTests, givenKernelWhichRequiresFlushWhenEnqueueingKernelThenTwoNodesAreCreated) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(true); auto &csr = device->getUltCommandStreamReceiver(); csr.timestampPacketWriteEnabled = true; auto mockTagAllocator = new MockTagAllocator<>(device->getRootDeviceIndex(), executionEnvironment->memoryManager.get()); csr.timestampPacketAllocator.reset(mockTagAllocator); auto cmdQ = std::make_unique>(context, device.get(), nullptr); kernel->mockKernel->svmAllocationsRequireCacheFlush = true; // obtain first node for cmdQ and event1 cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); auto node1 = cmdQ->timestampPacketContainer->peekNodes().at(0); auto node2 = cmdQ->timestampPacketContainer->peekNodes().at(1); auto size = cmdQ->timestampPacketContainer->peekNodes().size(); EXPECT_EQ(size, 2u); EXPECT_NE(nullptr, node1); EXPECT_NE(nullptr, node2); EXPECT_NE(node1, node2); } compute-runtime-22.14.22890/opencl/test/unit_test/helpers/timestamp_packet_tests.h000066400000000000000000000062511422164147700302220ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_platform.h" using namespace NEO; struct TimestampPacketSimpleTests : public ::testing::Test { class MockTimestampPacketStorage : public TimestampPackets { public: using TimestampPackets::packets; }; template void setTagToReadyState(TagNodeBase *tagNode) { auto packetsUsed = tagNode->getPacketsUsed(); tagNode->initialize(); typename FamilyType::TimestampPacketType zeros[4] = {}; for (uint32_t i = 0; i < TimestampPacketSizeControl::preferredPacketCount; i++) { tagNode->assignDataToAllTimestamps(i, zeros); } tagNode->setPacketsUsed(packetsUsed); } const size_t gws[3] = {1, 1, 1}; }; struct TimestampPacketTests : public TimestampPacketSimpleTests { struct MockTagNode : public TagNode> { using TagNode>::gpuAddress; }; void SetUp() override { DebugManager.flags.EnableTimestampPacket.set(1); executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(2); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(defaultHwInfo.get()); } device = std::make_unique(Device::create(executionEnvironment, 0u)); context = new MockContext(device.get()); kernel = std::make_unique(*device, context); mockCmdQ = new MockCommandQueue(context, device.get(), nullptr, false); } void TearDown() override { mockCmdQ->release(); context->release(); } template void verifySemaphore(MI_SEMAPHORE_WAIT *semaphoreCmd, TagNodeBase *timestampPacketNode, uint32_t packetId) { EXPECT_NE(nullptr, semaphoreCmd); EXPECT_EQ(semaphoreCmd->getCompareOperation(), MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD); EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword()); uint64_t compareOffset = packetId * TimestampPackets::getSinglePacketSize(); auto dataAddress = TimestampPacketHelper::getContextEndGpuAddress(*timestampPacketNode) + compareOffset; EXPECT_EQ(dataAddress, semaphoreCmd->getSemaphoreGraphicsAddress()); }; ExecutionEnvironment *executionEnvironment; std::unique_ptr device; MockContext *context; std::unique_ptr kernel; MockCommandQueue *mockCmdQ; DebugManagerStateRestore restorer; }; compute-runtime-22.14.22890/opencl/test/unit_test/helpers/transfer_properties_tests.cpp000066400000000000000000000135141422164147700313230ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/helpers/properties_helper.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "gtest/gtest.h" using namespace NEO; TEST(TransferPropertiesTest, givenTransferPropertiesCreatedWhenDefaultDebugSettingThenLockPtrIsNotSet) { MockBuffer buffer; const uint32_t rootDeviceIndex = buffer.mockGfxAllocation.getRootDeviceIndex(); size_t offset = 0; size_t size = 4096u; TransferProperties transferProperties(&buffer, CL_COMMAND_MAP_BUFFER, 0, false, &offset, &size, nullptr, true, rootDeviceIndex); EXPECT_EQ(nullptr, transferProperties.lockedPtr); } TEST(TransferPropertiesTest, givenAllocationInNonSystemPoolWhenTransferPropertiesAreCreatedForMapBufferAndCpuTransferIsRequestedThenLockPtrIsSet) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MemoryManagerCreate memoryManager(false, true, executionEnvironment); MockContext context; context.memoryManager = &memoryManager; cl_int retVal; std::unique_ptr buffer(Buffer::create(&context, 0, 1, nullptr, retVal)); static_cast(buffer->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex()))->overrideMemoryPool(MemoryPool::SystemCpuInaccessible); size_t offset = 0; size_t size = 4096u; TransferProperties transferProperties(buffer.get(), CL_COMMAND_MAP_BUFFER, 0, false, &offset, &size, nullptr, true, context.getDevice(0)->getRootDeviceIndex()); EXPECT_NE(nullptr, transferProperties.lockedPtr); } TEST(TransferPropertiesTest, givenAllocationInNonSystemPoolWhenTransferPropertiesAreCreatedForMapBufferAndCpuTransferIsNotRequestedThenLockPtrIsNotSet) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MemoryManagerCreate memoryManager(false, true, executionEnvironment); MockContext context; context.memoryManager = &memoryManager; cl_int retVal; std::unique_ptr buffer(Buffer::create(&context, 0, 1, nullptr, retVal)); static_cast(buffer->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex()))->overrideMemoryPool(MemoryPool::SystemCpuInaccessible); size_t offset = 0; size_t size = 4096u; TransferProperties transferProperties(buffer.get(), CL_COMMAND_MAP_BUFFER, 0, false, &offset, &size, nullptr, false, context.getDevice(0)->getRootDeviceIndex()); EXPECT_EQ(nullptr, transferProperties.lockedPtr); } TEST(TransferPropertiesTest, givenAllocationInSystemPoolWhenTransferPropertiesAreCreatedForMapBufferThenLockPtrIsNotSet) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MemoryManagerCreate memoryManager(false, true, executionEnvironment); MockContext context; context.memoryManager = &memoryManager; cl_int retVal; std::unique_ptr buffer(Buffer::create(&context, 0, 1, nullptr, retVal)); static_cast(buffer->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex()))->overrideMemoryPool(MemoryPool::System4KBPages); size_t offset = 0; size_t size = 4096u; TransferProperties transferProperties(buffer.get(), CL_COMMAND_MAP_BUFFER, 0, false, &offset, &size, nullptr, true, context.getDevice(0)->getRootDeviceIndex()); EXPECT_EQ(nullptr, transferProperties.lockedPtr); } TEST(TransferPropertiesTest, givenTransferPropertiesWhenLockedPtrIsSetThenItIsReturnedForReadWrite) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MemoryManagerCreate memoryManager(false, true, executionEnvironment); MockContext context; context.memoryManager = &memoryManager; cl_int retVal; std::unique_ptr buffer(Buffer::create(&context, 0, 1, nullptr, retVal)); static_cast(buffer->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex()))->overrideMemoryPool(MemoryPool::SystemCpuInaccessible); size_t offset = 0; size_t size = 4096u; TransferProperties transferProperties(buffer.get(), CL_COMMAND_MAP_BUFFER, 0, false, &offset, &size, nullptr, true, context.getDevice(0)->getRootDeviceIndex()); ASSERT_NE(nullptr, transferProperties.lockedPtr); EXPECT_EQ(transferProperties.lockedPtr, transferProperties.getCpuPtrForReadWrite()); } TEST(TransferPropertiesTest, givenTransferPropertiesWhenLockedPtrIsNotSetThenItIsNotReturnedForReadWrite) { MockBuffer buffer; const uint32_t rootDeviceIndex = buffer.mockGfxAllocation.getRootDeviceIndex(); size_t offset = 0; size_t size = 4096u; TransferProperties transferProperties(&buffer, CL_COMMAND_MAP_BUFFER, 0, false, &offset, &size, nullptr, true, rootDeviceIndex); ASSERT_EQ(nullptr, transferProperties.lockedPtr); EXPECT_NE(transferProperties.lockedPtr, transferProperties.getCpuPtrForReadWrite()); } TEST(TransferPropertiesTest, givenTransferPropertiesWhenLockedPtrIsSetThenLockedPtrWithMemObjOffsetIsReturnedForReadWrite) { MockBuffer buffer; const uint32_t rootDeviceIndex = buffer.mockGfxAllocation.getRootDeviceIndex(); void *lockedPtr = reinterpret_cast(0x1000); auto memObjOffset = MemoryConstants::cacheLineSize; buffer.offset = memObjOffset; size_t offset = 0; size_t size = 4096u; TransferProperties transferProperties(&buffer, CL_COMMAND_MAP_BUFFER, 0, false, &offset, &size, nullptr, true, rootDeviceIndex); transferProperties.lockedPtr = lockedPtr; auto expectedPtr = ptrOffset(lockedPtr, memObjOffset); EXPECT_EQ(expectedPtr, transferProperties.getCpuPtrForReadWrite()); } compute-runtime-22.14.22890/opencl/test/unit_test/helpers/uint16_sse4_tests.cpp000066400000000000000000000077521422164147700273160ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/uint16_sse4.h" #include "gtest/gtest.h" using namespace NEO; TEST(Uint16Sse4, GivenMaskWhenCastingToBoolThenTrueIsReturned) { EXPECT_TRUE(static_cast(NEO::uint16x8_t::mask())); } TEST(Uint16Sse4, GivenZeroWhenCastingToBoolThenFalseIsReturned) { EXPECT_FALSE(static_cast(NEO::uint16x8_t::zero())); } TEST(Uint16Sse4, WhenConjoiningMaskAndZeroThenBooleanResultIsCorrect) { EXPECT_TRUE(NEO::uint16x8_t::mask() && NEO::uint16x8_t::mask()); EXPECT_FALSE(NEO::uint16x8_t::mask() && NEO::uint16x8_t::zero()); EXPECT_FALSE(NEO::uint16x8_t::zero() && NEO::uint16x8_t::mask()); EXPECT_FALSE(NEO::uint16x8_t::zero() && NEO::uint16x8_t::zero()); } TEST(Uint16Sse4, GivenOneWhenCreatingThenInstancesAreSame) { auto one = NEO::uint16x8_t::one(); NEO::uint16x8_t alsoOne(one.value); EXPECT_EQ(0, memcmp(&alsoOne, &one, sizeof(NEO::uint16x8_t))); } TEST(Uint16Sse4, GivenValueWhenCreatingThenConstructorIsReplicated) { NEO::uint16x8_t allSevens(7u); for (int i = 0; i < NEO::uint16x8_t::numChannels; ++i) { EXPECT_EQ(7u, allSevens.get(i)); } } ALIGNAS(32) static const uint16_t laneValues[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; TEST(Uint16Sse4, GivenArrayWhenCreatingThenConstructorIsReplicated) { NEO::uint16x8_t lanes(laneValues); for (int i = 0; i < NEO::uint16x8_t::numChannels; ++i) { EXPECT_EQ(static_cast(i), lanes.get(i)); } } TEST(Uint16Sse4, WhenLoadingThenValuesAreSetCorrectly) { NEO::uint16x8_t lanes; lanes.load(laneValues); for (int i = 0; i < NEO::uint16x8_t::numChannels; ++i) { EXPECT_EQ(static_cast(i), lanes.get(i)); } } TEST(Uint16Sse4, WhenLoadingUnalignedThenValuesAreSetCorrectly) { NEO::uint16x8_t lanes; lanes.loadUnaligned(laneValues + 1); for (int i = 0; i < NEO::uint16x8_t::numChannels; ++i) { EXPECT_EQ(static_cast(i + 1), lanes.get(i)); } } TEST(Uint16Sse4, WhenStoringThenValuesAreSetCorrectly) { uint16_t *alignedMemory = reinterpret_cast(alignedMalloc(1024, 32)); NEO::uint16x8_t lanes(laneValues); lanes.store(alignedMemory); for (int i = 0; i < NEO::uint16x8_t::numChannels; ++i) { EXPECT_EQ(static_cast(i), alignedMemory[i]); } alignedFree(alignedMemory); } TEST(Uint16Sse4, WhenStoringUnalignedThenValuesAreSetCorrectly) { uint16_t *alignedMemory = reinterpret_cast(alignedMalloc(1024, 32)); NEO::uint16x8_t lanes(laneValues); lanes.storeUnaligned(alignedMemory + 1); for (int i = 0; i < NEO::uint16x8_t::numChannels; ++i) { EXPECT_EQ(static_cast(i), (alignedMemory + 1)[i]); } alignedFree(alignedMemory); } TEST(Uint16Sse4, WhenDecrementingThenValuesAreSetCorrectly) { NEO::uint16x8_t result(laneValues); result -= NEO::uint16x8_t::one(); for (int i = 0; i < NEO::uint16x8_t::numChannels; ++i) { EXPECT_EQ(static_cast(i - 1), result.get(i)); } } TEST(Uint16Sse4, WhenIncrementingThenValuesAreSetCorrectly) { NEO::uint16x8_t result(laneValues); result += NEO::uint16x8_t::one(); for (int i = 0; i < NEO::uint16x8_t::numChannels; ++i) { EXPECT_EQ(static_cast(i + 1), result.get(i)); } } TEST(Uint16Sse4, WhenBlendingThenValuesAreSetCorrectly) { NEO::uint16x8_t a(NEO::uint16x8_t::one()); NEO::uint16x8_t b(NEO::uint16x8_t::zero()); NEO::uint16x8_t c; // c = mask ? a : b c = blend(a, b, NEO::uint16x8_t::mask()); for (int i = 0; i < NEO::uint16x8_t::numChannels; ++i) { EXPECT_EQ(a.get(i), c.get(i)); } // c = mask ? a : b c = blend(a, b, NEO::uint16x8_t::zero()); for (int i = 0; i < NEO::uint16x8_t::numChannels; ++i) { EXPECT_EQ(b.get(i), c.get(i)); } } compute-runtime-22.14.22890/opencl/test/unit_test/helpers/ult_limits.h000066400000000000000000000003131422164147700256240ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include namespace NEO { constexpr uint32_t maxRootDeviceCount = 3u; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/helpers/validator_tests.cpp000066400000000000000000000224751422164147700272160ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/ptr_math.h" #include "shared/source/helpers/validators.h" #include "opencl/source/api/cl_types.h" #include "opencl/source/helpers/base_object.h" #include "opencl/source/helpers/cl_validators.h" #include "opencl/source/helpers/error_mappers.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "gtest/gtest.h" using namespace NEO; template struct ValidatorFixture : public ::testing::Test { }; TYPED_TEST_CASE_P(ValidatorFixture); TYPED_TEST_P(ValidatorFixture, GivenNullPtrWhenValidatingThenSuccessIsReturned) { TypeParam object = nullptr; cl_int rv = NullObjectErrorMapper::retVal; EXPECT_EQ(rv, validateObjects(object)); } TYPED_TEST_P(ValidatorFixture, GivenRandomMemoryWhenValidatingThenSuccessIsReturned) { // 6*uint64_t to satisfy memory requirements // we need 2 before object (dispatchTable) // and 4 of object (magic) uint64_t randomMemory[6] = { 0xdeadbeef, }; TypeParam object = (TypeParam)(randomMemory + 2); cl_int rv = InvalidObjectErrorMapper::retVal; EXPECT_EQ(rv, validateObjects(object)); } REGISTER_TYPED_TEST_CASE_P( ValidatorFixture, GivenNullPtrWhenValidatingThenSuccessIsReturned, GivenRandomMemoryWhenValidatingThenSuccessIsReturned); // Define new command types to run the parameterized tests typedef ::testing::Types< cl_command_queue, cl_context, cl_device_id, cl_event, cl_kernel, cl_mem, cl_platform_id, cl_program, uint64_t /*cl_queue_properties*/ *, cl_sampler> ValidatorParams; INSTANTIATE_TYPED_TEST_CASE_P(Validator, ValidatorFixture, ValidatorParams); TEST(GenericValidator, GivenNullCtxAndNullCqWhenValidatingThenInvalidContextIsReturned) { cl_context context = nullptr; cl_command_queue command_queue = nullptr; EXPECT_EQ(CL_INVALID_CONTEXT, validateObjects(context, command_queue)); } TEST(UserPointer, GivenNullPtrWhenValidatingThenInvalidValueIsReturned) { void *ptr = nullptr; EXPECT_EQ(CL_INVALID_VALUE, validateObjects(ptr)); } TEST(UserPointer, GivenNonNullPtrWhenValidatingThenSuccessIsReturned) { void *ptr = ptrGarbage; EXPECT_EQ(CL_SUCCESS, validateObjects(ptr)); } TEST(EventWaitList, GivenZeroEventsAndNonEmptyListWhenValidatingThenInvalidEventWaitListIsReturned) { cl_event eventList = (cl_event)ptrGarbage; EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, validateObjects(EventWaitList(0, &eventList))); } TEST(EventWaitList, GivenZeroEventsAndEmptyListWhenValidatingThenSuccessIsReturned) { EXPECT_EQ(CL_SUCCESS, validateObjects(EventWaitList(0, nullptr))); } TEST(EventWaitList, GivenNonZeroEventsAndEmptyListWhenValidatingThenInvalidEventWaitListIsReturned) { EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, validateObjects(EventWaitList(1, nullptr))); } TEST(EventWaitList, GivenNonZeroEventsAndIncorrectListWhenValidatingThenInvalidEventWaitListIsReturned) { cl_event eventList = (cl_event)ptrGarbage; EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, validateObjects(EventWaitList(1, &eventList))); } TEST(DeviceList, GivenZeroDevicesAndIncorrectListWhenValidatingThenInvalidValueIsReturned) { cl_device_id devList = (cl_device_id)ptrGarbage; EXPECT_EQ(CL_INVALID_VALUE, validateObjects(DeviceList(0, &devList))); } TEST(DeviceList, GivenZeroDevicesAndEmptyListWhenValidatingThenSuccessIsReturned) { EXPECT_EQ(CL_SUCCESS, validateObjects(DeviceList(0, nullptr))); } TEST(DeviceList, GivenNonZeroDevicesAndEmptyListWhenValidatingThenInvalidValueIsReturned) { EXPECT_EQ(CL_INVALID_VALUE, validateObjects(DeviceList(1, nullptr))); } TEST(DeviceList, GivenNonZeroDevicesAndInvalidListWhenValidatingThenInvalidValueIsReturned) { cl_device_id devList = (cl_device_id)ptrGarbage; EXPECT_EQ(CL_INVALID_DEVICE, validateObjects(DeviceList(1, &devList))); } TEST(MemObjList, GivenZeroMemObjsAndIncorrectListWhenValidatingThenInvalidValueIsReturned) { cl_mem memList = static_cast(ptrGarbage); EXPECT_EQ(CL_INVALID_VALUE, validateObjects(MemObjList(0, &memList))); } TEST(MemObjList, GivenZeroMemObjsAndNullPtrWhenValidatingThenSuccessIsReturned) { EXPECT_EQ(CL_SUCCESS, validateObjects(MemObjList(0, nullptr))); } TEST(MemObjList, GivenNonZeroMemObjsAndNullPtrWhenValidatingThenInvalidValueIsReturned) { EXPECT_EQ(CL_INVALID_VALUE, validateObjects(MemObjList(1, nullptr))); } TEST(MemObjList, GivenNonZeroMemObjsAndIncorrectListWhenValidatingThenInvalidMemObjIsReturned) { cl_mem memList = static_cast(ptrGarbage); EXPECT_EQ(CL_INVALID_MEM_OBJECT, validateObjects(MemObjList(1, &memList))); } TEST(MemObjList, GivenNonZeroMemObjsAndNonNullPtrWhenValidatingBufferThenSuccessIsReturned) { std::unique_ptr buffer(new MockBuffer()); cl_mem memList = static_cast(buffer.get()); EXPECT_EQ(CL_SUCCESS, validateObjects(MemObjList(1, &memList))); } TEST(NonZeroBufferSizeValidator, GivenSizeZeroWhenValidatingBufferThenInvalidBufferSizeIsReturned) { auto bsv = (NonZeroBufferSize)0; EXPECT_EQ(CL_INVALID_BUFFER_SIZE, validateObjects(bsv)); } TEST(NonZeroBufferSizeValidator, GivenNonZeroSizeWhenValidatingBufferThenSuccessIsReturned) { auto bsv = (NonZeroBufferSize)~0; EXPECT_EQ(CL_SUCCESS, validateObjects(bsv)); } TEST(Platform, givenNullPlatformThenReturnInvalidPlatform) { cl_platform_id platform = nullptr; EXPECT_EQ(CL_INVALID_PLATFORM, validateObjects(platform)); } TEST(Platform, GivenValidPlatformWhenValidatingThenSuccessIsReturned) { MockPlatform platform; cl_platform_id clPlatformId = &platform; EXPECT_EQ(CL_SUCCESS, validateObjects(clPlatformId)); } TEST(ValidatorBool, GivenBoolFlagWhenValidatingObjectThenCorrectValueIsReturned) { EXPECT_EQ(CL_INVALID_VALUE, validateObject(false)); EXPECT_EQ(CL_INVALID_VALUE, validateObjects(false, true)); EXPECT_EQ(CL_SUCCESS, validateObject(true)); } typedef ::testing::TestWithParam PatternSizeValid; TEST_P(PatternSizeValid, GivenValidPatternSizeWhenValidatingThenSuccessIsReturned) { auto psv = (PatternSize)GetParam(); EXPECT_EQ(CL_SUCCESS, validateObjects(psv)); } INSTANTIATE_TEST_CASE_P(PatternSize, PatternSizeValid, ::testing::Values(1, 2, 4, 8, 16, 32, 64, 128)); typedef ::testing::TestWithParam PatternSizeInvalid; TEST_P(PatternSizeInvalid, GivenInvalidPatternSizeWhenValidatingThenInvalidValueIsReturned) { auto psv = (PatternSize)GetParam(); EXPECT_EQ(CL_INVALID_VALUE, validateObjects(psv)); } INSTANTIATE_TEST_CASE_P(PatternSize, PatternSizeInvalid, ::testing::Values(0, 3, 5, 256, 512, 1024)); TEST(WithCastToInternal, GivenNullPtrWhenCastingThenNullPtrIsReturned) { Context *pContext = nullptr; cl_context context = nullptr; auto ret = WithCastToInternal(context, &pContext); EXPECT_EQ(ret, nullptr); } TEST(WithCastToInternal, GivenNonNullPtrWhenCastingThenNonNullPtrIsReturned) { Context *pContext = nullptr; auto temp = std::unique_ptr(new MockContext()); cl_context context = temp.get(); auto ret = WithCastToInternal(context, &pContext); EXPECT_NE(ret, nullptr); } TEST(validateYuvOperation, GivenValidateYuvOperationWhenValidOriginAndRegionThenReturnSuccess) { size_t origin[3] = {8, 0, 0}; size_t region[3] = {8, 0, 0}; auto ret = validateYuvOperation(origin, region); EXPECT_EQ(CL_SUCCESS, ret); } TEST(validateYuvOperation, GivenValidateYuvOperationWhenInvalidOriginThenReturnFailure) { size_t origin[3] = {1, 0, 0}; size_t region[3] = {8, 0, 0}; auto ret = validateYuvOperation(origin, region); EXPECT_EQ(CL_INVALID_VALUE, ret); } TEST(validateYuvOperation, GivenValidateYuvOperationWhenInvalidRegionThenReturnFailure) { size_t origin[3] = {8, 0, 0}; size_t region[3] = {1, 0, 0}; auto ret = validateYuvOperation(origin, region); EXPECT_EQ(CL_INVALID_VALUE, ret); } TEST(validateYuvOperation, GivenValidateYuvOperationWhenNullOriginThenReturnFailure) { size_t *origin = nullptr; size_t region[3] = {1, 0, 0}; auto ret = validateYuvOperation(origin, region); EXPECT_EQ(CL_INVALID_VALUE, ret); } TEST(validateYuvOperation, GivenValidateYuvOperationWhenNullRegionThenReturnFailure) { size_t origin[3] = {8, 0, 0}; size_t *region = nullptr; auto ret = validateYuvOperation(origin, region); EXPECT_EQ(CL_INVALID_VALUE, ret); } TEST(areNotNullptr, WhenGivenAllNonNullParamsThenReturnsTrue) { int a = 0; int b = 0; int c = 0; EXPECT_TRUE(areNotNullptr(&a)); EXPECT_TRUE(areNotNullptr(&a, &b)); EXPECT_TRUE(areNotNullptr(&a, &b, &c)); } TEST(areNotNullptr, WhenGivenAllNullParamsThenReturnsFalse) { int *a = nullptr; int *b = nullptr; int *c = nullptr; EXPECT_FALSE(areNotNullptr(a)); EXPECT_FALSE(areNotNullptr(a, b)); EXPECT_FALSE(areNotNullptr(a, b, c)); } TEST(areNotNullptr, WhenGivenNullParameterAmongNonNullParamsThenReturnsFalse) { int *a = nullptr; int b = 0; int c = 0; EXPECT_FALSE(areNotNullptr(a)); EXPECT_FALSE(areNotNullptr(a, &b)); EXPECT_FALSE(areNotNullptr(&b, a)); EXPECT_FALSE(areNotNullptr(a, &b, &c)); EXPECT_FALSE(areNotNullptr(&b, a, &c)); } compute-runtime-22.14.22890/opencl/test/unit_test/helpers/windows/000077500000000000000000000000001422164147700247635ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/helpers/windows/CMakeLists.txt000066400000000000000000000007171422164147700275300ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) set(IGDRCL_SRCS_tests_helpers_windows ${CMAKE_CURRENT_SOURCE_DIR}/kmd_notify_windows_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gl_helper_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_function.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_function.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_helpers_windows}) add_subdirectories() endif() compute-runtime-22.14.22890/opencl/test/unit_test/helpers/windows/gl_helper_tests.cpp000066400000000000000000000036331422164147700306570ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/os_library.h" #include "shared/source/os_interface/windows/windows_wrapper.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/helpers/windows/gl_helper.h" #include "opencl/test/unit_test/helpers/windows/mock_function.h" #include "gtest/gtest.h" #include typedef const char *(__cdecl *funcType)(); namespace NEO { class glFunctionHelperMock : public glFunctionHelper { public: glFunctionHelperMock(OsLibrary *glLibrary, const std::string &functionName) : glFunctionHelper(glLibrary, functionName) {} using glFunctionHelper::glFunctionPtr; }; TEST(glFunctionHelper, whenCreateGlFunctionHelperThenSetGlFunctionPtrToLoadAnotherFunctions) { std::unique_ptr glLibrary(OsLibrary::load("mock_opengl32.dll")); EXPECT_TRUE(glLibrary->isLoaded()); glFunctionHelperMock loader(glLibrary.get(), "mockLoader"); funcType function1 = ConvertibleProcAddr{loader.glFunctionPtr("realFunction")}; funcType function2 = loader["realFunction"]; EXPECT_STREQ(function1(), function2()); } TEST(glFunctionHelper, givenNonExistingFunctionNameWhenCreateGlFunctionHelperThenNullptr) { std::unique_ptr glLibrary(OsLibrary::load("mock_opengl32.dll")); EXPECT_TRUE(glLibrary->isLoaded()); glFunctionHelper loader(glLibrary.get(), "mockLoader"); funcType function = loader["nonExistingFunction"]; EXPECT_EQ(nullptr, function); } TEST(glFunctionHelper, givenRealFunctionNameWhenCreateGlFunctionHelperThenGetPointerToAppropriateFunction) { std::unique_ptr glLibrary(OsLibrary::load("mock_opengl32.dll")); EXPECT_TRUE(glLibrary->isLoaded()); glFunctionHelper loader(glLibrary.get(), "mockLoader"); funcType function = loader["realFunction"]; EXPECT_STREQ(realFunction(), function()); } }; // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/helpers/windows/kmd_notify_windows_tests.cpp000066400000000000000000000051201422164147700326240ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/kmd_notify_properties.h" #include "shared/source/os_interface/windows/sys_calls.h" #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/test_macros/test.h" namespace NEO { namespace SysCalls { extern BOOL systemPowerStatusRetVal; extern BYTE systemPowerStatusACLineStatusOverride; } // namespace SysCalls class MockKmdNotifyHelper : public KmdNotifyHelper { public: using KmdNotifyHelper::acLineConnected; using KmdNotifyHelper::getBaseTimeout; using KmdNotifyHelper::updateAcLineStatus; MockKmdNotifyHelper(const KmdNotifyProperties *newProperties) : KmdNotifyHelper(newProperties){}; }; TEST(KmdNotifyWindowsTests, whenGetSystemPowerStatusReturnSuccessThenUpdateAcLineStatus) { auto properties = &(defaultHwInfo->capabilityTable.kmdNotifyProperties); MockKmdNotifyHelper helper(properties); EXPECT_TRUE(helper.acLineConnected); VariableBackup systemPowerStatusRetValBkp(&SysCalls::systemPowerStatusRetVal); VariableBackup systemPowerStatusACLineStatusOverrideBkp(&SysCalls::systemPowerStatusACLineStatusOverride); systemPowerStatusRetValBkp = 1; systemPowerStatusACLineStatusOverrideBkp = 0; helper.updateAcLineStatus(); EXPECT_FALSE(helper.acLineConnected); systemPowerStatusACLineStatusOverrideBkp = 1; helper.updateAcLineStatus(); EXPECT_TRUE(helper.acLineConnected); } TEST(KmdNotifyWindowsTests, whenGetSystemPowerStatusReturnErrorThenDontUpdateAcLineStatus) { auto properties = &(defaultHwInfo->capabilityTable.kmdNotifyProperties); MockKmdNotifyHelper helper(properties); EXPECT_TRUE(helper.acLineConnected); VariableBackup systemPowerStatusRetValBkp(&SysCalls::systemPowerStatusRetVal); VariableBackup systemPowerStatusACLineStatusOverrideBkp(&SysCalls::systemPowerStatusACLineStatusOverride); systemPowerStatusRetValBkp = 0; systemPowerStatusACLineStatusOverrideBkp = 0; helper.updateAcLineStatus(); EXPECT_TRUE(helper.acLineConnected); } TEST(KmdNotifyWindowsTests, givenTaskCountDiffGreaterThanOneWhenBaseTimeoutRequestedThenDontMultiply) { auto localProperties = (defaultHwInfo->capabilityTable.kmdNotifyProperties); localProperties.delayKmdNotifyMicroseconds = 10; const int64_t multiplier = 10; MockKmdNotifyHelper helper(&localProperties); EXPECT_EQ(localProperties.delayKmdNotifyMicroseconds, helper.getBaseTimeout(multiplier)); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/helpers/windows/mock_function.cpp000066400000000000000000000002461422164147700303270ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "mock_function.h" const char *realFunction() { return "value"; } compute-runtime-22.14.22890/opencl/test/unit_test/helpers/windows/mock_function.h000066400000000000000000000001671422164147700277760ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ const char *realFunction(); compute-runtime-22.14.22890/opencl/test/unit_test/igdrcl_tests_pch.cpp000066400000000000000000000002171422164147700256530ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/igdrcl_tests_pch.h" compute-runtime-22.14.22890/opencl/test/unit_test/igdrcl_tests_pch.h000066400000000000000000000015471422164147700253270ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/gmm_lib.h" #include "shared/source/helpers/abort.h" #include "shared/source/helpers/completion_stamp.h" #include "shared/source/helpers/constants.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/kmd_notify_properties.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/sku_info/sku_info_base.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/test_macros/test.h" #include "hw_cmds.h" #include "third_party/opencl_headers/CL/cl.h" #include #include #include #include #include #include #include #include #include #include compute-runtime-22.14.22890/opencl/test/unit_test/indirect_heap/000077500000000000000000000000001422164147700244255ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/indirect_heap/indirect_heap_fixture.cpp000066400000000000000000000010411422164147700314710ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/indirect_heap/indirect_heap_fixture.h" #include "opencl/source/command_queue/command_queue.h" namespace NEO { void IndirectHeapFixture::SetUp(CommandQueue *pCmdQ) { pDSH = &pCmdQ->getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 8192); pSSH = &pCmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 4096); pIOH = &pCmdQ->getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, 4096); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/indirect_heap/indirect_heap_fixture.h000066400000000000000000000006621422164147700311460ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/indirect_heap/indirect_heap.h" namespace NEO { class CommandQueue; struct IndirectHeapFixture { virtual void SetUp(CommandQueue *pCmdQ); virtual void TearDown() { } IndirectHeap *pDSH = nullptr; IndirectHeap *pIOH = nullptr; IndirectHeap *pSSH = nullptr; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/kernel/000077500000000000000000000000001422164147700231075ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/kernel/CMakeLists.txt000066400000000000000000000033661422164147700256570ustar00rootroot00000000000000# # Copyright (C) 2018-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_kernel ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cache_flush_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/clone_kernel_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_transformer_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_accelerator_arg_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_buffer_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_buffer_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_buffer_fixture.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_info_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_pipe_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_svm_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_cache_flush_requirements_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_info_cl_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_image_arg_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_immediate_arg_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_is_patched_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_slm_arg_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_slm_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_transformable_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/debug_kernel_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/substitute_kernel_heap_tests.cpp ) if(TESTS_XEHP_AND_LATER) list(APPEND IGDRCL_SRCS_tests_kernel ${CMAKE_CURRENT_SOURCE_DIR}/cache_flush_xehp_and_later_tests.inl ) endif() if(TESTS_PVC_AND_LATER) list(APPEND IGDRCL_SRCS_tests_kernel ${CMAKE_CURRENT_SOURCE_DIR}/kernel_tests_pvc_and_later.cpp ) endif() target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_kernel}) add_subdirectories() compute-runtime-22.14.22890/opencl/test/unit_test/kernel/cache_flush_tests.inl000066400000000000000000000610171422164147700273060ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/l3_range.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/command_queue/resource_barrier.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "opencl/test/unit_test/helpers/cmd_buffer_validator.h" #include "opencl/test/unit_test/helpers/hardware_commands_helper_tests.h" #include "opencl/test/unit_test/helpers/static_size3.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" using namespace NEO; template struct L3ControlPolicy : CmdValidator { L3ControlPolicy(typename FamilyType::L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY expectedPolicy, bool isA0Stepping) : expectedPolicy(expectedPolicy), isA0Stepping(isA0Stepping) { } bool operator()(GenCmdList::iterator it, size_t numInScetion, const std::string &member, std::string &outReason) override { using L3_CONTROL = typename FamilyType::L3_CONTROL; auto l3ControlAddress = genCmdCast(*it)->getL3FlushAddressRange(); if (l3ControlAddress.getL3FlushEvictionPolicy(isA0Stepping) != expectedPolicy) { outReason = "Invalid L3_FLUSH_EVICTION_POLICY - expected: " + std::to_string(expectedPolicy) + ", got :" + std::to_string(l3ControlAddress.getL3FlushEvictionPolicy(isA0Stepping)); return false; } l3RangesParsed.push_back(L3Range::fromAddressMask(l3ControlAddress.getAddress(isA0Stepping), l3ControlAddress.getAddressMask(isA0Stepping))); return true; } L3RangesVec l3RangesParsed; typename FamilyType::L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY expectedPolicy; bool isA0Stepping; }; template class GivenCacheFlushAfterWalkerEnabledWhenSvmAllocationsSetAsCacheFlushRequiringThenExpectCacheFlushCommand : public HardwareCommandsTest { public: void TestBodyImpl() { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using L3_CONTROL_WITHOUT_POST_SYNC = typename FamilyType::L3_CONTROL; DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); CommandQueueHw cmdQ(nullptr, pClDevice, 0, false); auto &commandStream = cmdQ.getCS(1024); void *allocPtr = reinterpret_cast(static_cast(6 * MemoryConstants::pageSize)); MockGraphicsAllocation svmAllocation{allocPtr, MemoryConstants::pageSize * 2}; svmAllocation.setFlushL3Required(true); this->mockKernelWithInternal->mockKernel->kernelSvmGfxAllocations.push_back(&svmAllocation); this->mockKernelWithInternal->mockKernel->svmAllocationsRequireCacheFlush = true; size_t expectedSize = sizeof(PIPE_CONTROL) + sizeof(L3_CONTROL_WITHOUT_POST_SYNC); size_t actualSize = HardwareCommandsHelper::getSizeRequiredForCacheFlush(cmdQ, this->mockKernelWithInternal->mockKernel, 0U); EXPECT_EQ(expectedSize, actualSize); HardwareCommandsHelper::programCacheFlushAfterWalkerCommand(&commandStream, cmdQ, this->mockKernelWithInternal->mockKernel, 0U); std::string err; auto cmdBuffOk = expectCmdBuff(cmdQ.getCS(0), 0, std::vector({ new MatchHwCmd(1, Expects{EXPECT_MEMBER(PIPE_CONTROL, getCommandStreamerStallEnable, true), EXPECT_MEMBER(PIPE_CONTROL, getDcFlushEnable, false)}), new MatchHwCmd(AtLeastOne), }), &err); EXPECT_TRUE(cmdBuffOk) << err; } }; template class GivenCacheFlushAfterWalkerEnabledAndProperSteppingIsSetWhenKernelArgIsSetAsCacheFlushRequiredAndA0SteppingIsDisabledThenExpectCacheFlushCommand : public HardwareCommandsTest { public: void TestBodyImpl(bool isA0Stepping) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using L3_FLUSH_ADDRESS_RANGE = typename FamilyType::L3_FLUSH_ADDRESS_RANGE; using L3_CONTROL_WITHOUT_POST_SYNC = typename FamilyType::L3_CONTROL; DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); const auto &hwInfoConfig = *HwInfoConfig::get(hardwareInfo.platform.eProductFamily); auto stepping = (isA0Stepping ? REVISION_A0 : REVISION_A1); hardwareInfo.platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(stepping, hardwareInfo); pDevice->executionEnvironment->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->setHwInfo(&hardwareInfo); CommandQueueHw cmdQ(nullptr, pClDevice, 0, false); auto &commandStream = cmdQ.getCS(1024); addSpaceForSingleKernelArg(); this->mockKernelWithInternal->mockKernel->kernelArgRequiresCacheFlush.resize(2); void *allocPtr = reinterpret_cast(static_cast(6 * MemoryConstants::pageSize)); MockGraphicsAllocation cacheRequiringAllocation{allocPtr, MemoryConstants::pageSize * 7}; this->mockKernelWithInternal->mockKernel->kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; L3RangesVec rangesExpected; coverRangeExact(cacheRequiringAllocation.getGpuAddress(), cacheRequiringAllocation.getUnderlyingBufferSize(), rangesExpected, FamilyType::L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION); size_t expectedSize = sizeof(PIPE_CONTROL) + rangesExpected.size() * sizeof(L3_CONTROL_WITHOUT_POST_SYNC); size_t actualSize = HardwareCommandsHelper::getSizeRequiredForCacheFlush(cmdQ, this->mockKernelWithInternal->mockKernel, 0U); EXPECT_EQ(expectedSize, actualSize); HardwareCommandsHelper::programCacheFlushAfterWalkerCommand(&commandStream, cmdQ, this->mockKernelWithInternal->mockKernel, 0U); L3ControlPolicy validateL3ControlPolicy{L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION, isA0Stepping}; std::string err; auto cmdBuffOk = expectCmdBuff(cmdQ.getCS(0), 0, std::vector{ new MatchHwCmd(1, Expects{EXPECT_MEMBER(PIPE_CONTROL, getCommandStreamerStallEnable, true), EXPECT_MEMBER(PIPE_CONTROL, getDcFlushEnable, false)}), new MatchHwCmd(AtLeastOne, {&validateL3ControlPolicy}), }, &err); EXPECT_TRUE(cmdBuffOk) << err; EXPECT_EQ(rangesExpected, validateL3ControlPolicy.l3RangesParsed); } }; template class GivenCacheFlushAfterWalkerEnabledWhenProgramGlobalSurfacePresentThenExpectCacheFlushCommand : public HardwareCommandsTest { public: void TestBodyImpl() { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using L3_CONTROL_WITHOUT_POST_SYNC = typename FamilyType::L3_CONTROL; DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); CommandQueueHw cmdQ(nullptr, pClDevice, 0, false); auto &commandStream = cmdQ.getCS(1024); void *allocPtr = reinterpret_cast(static_cast(6 * MemoryConstants::pageSize)); MockGraphicsAllocation globalAllocation{allocPtr, MemoryConstants::pageSize * 2}; this->mockKernelWithInternal->mockProgram->setGlobalSurface(&globalAllocation); size_t expectedSize = sizeof(PIPE_CONTROL) + sizeof(L3_CONTROL_WITHOUT_POST_SYNC); size_t actualSize = HardwareCommandsHelper::getSizeRequiredForCacheFlush(cmdQ, this->mockKernelWithInternal->mockKernel, 0U); EXPECT_EQ(expectedSize, actualSize); HardwareCommandsHelper::programCacheFlushAfterWalkerCommand(&commandStream, cmdQ, this->mockKernelWithInternal->mockKernel, 0U); std::string err; auto cmdBuffOk = expectCmdBuff(cmdQ.getCS(0), 0, std::vector{ new MatchHwCmd(1, Expects{EXPECT_MEMBER(PIPE_CONTROL, getCommandStreamerStallEnable, true), EXPECT_MEMBER(PIPE_CONTROL, getDcFlushEnable, false)}), new MatchHwCmd(AtLeastOne)}, &err); EXPECT_TRUE(cmdBuffOk) << err; this->mockKernelWithInternal->mockProgram->setGlobalSurface(nullptr); } }; template class GivenCacheFlushAfterWalkerEnabledWhenProgramGlobalSurfacePresentAndPostSyncRequiredThenExpectProperCacheFlushCommand : public HardwareCommandsTest { public: void TestBodyImpl() { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using L3_CONTROL_WITH_POST_SYNC = typename FamilyType::L3_CONTROL; DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); CommandQueueHw cmdQ(nullptr, pClDevice, 0, false); auto &commandStream = cmdQ.getCS(1024); void *allocPtr = reinterpret_cast(static_cast(6 * MemoryConstants::pageSize)); MockGraphicsAllocation globalAllocation{allocPtr, MemoryConstants::pageSize * 2}; this->mockKernelWithInternal->mockProgram->setGlobalSurface(&globalAllocation); constexpr uint64_t postSyncAddress = 1024; size_t expectedSize = sizeof(PIPE_CONTROL) + sizeof(L3_CONTROL_WITH_POST_SYNC); size_t actualSize = HardwareCommandsHelper::getSizeRequiredForCacheFlush(cmdQ, this->mockKernelWithInternal->mockKernel, postSyncAddress); EXPECT_EQ(expectedSize, actualSize); HardwareCommandsHelper::programCacheFlushAfterWalkerCommand(&commandStream, cmdQ, this->mockKernelWithInternal->mockKernel, postSyncAddress); std::string err; auto cmdBuffOk = expectCmdBuff(cmdQ.getCS(0), 0, std::vector{ new MatchHwCmd(1, Expects{EXPECT_MEMBER(PIPE_CONTROL, getCommandStreamerStallEnable, true), EXPECT_MEMBER(PIPE_CONTROL, getDcFlushEnable, false)}), new MatchHwCmd(1, Expects{EXPECT_MEMBER(L3_CONTROL_WITH_POST_SYNC, getPostSyncAddress, postSyncAddress), EXPECT_MEMBER(L3_CONTROL_WITH_POST_SYNC, getPostSyncImmediateData, 0)})}, &err); EXPECT_TRUE(cmdBuffOk) << err; this->mockKernelWithInternal->mockProgram->setGlobalSurface(nullptr); } }; using EnqueueKernelFixture = HelloWorldFixture; using EnqueueKernelTest = Test; template class GivenCacheFlushAfterWalkerEnabledAndProperSteppingIsSetWhenAllocationRequiresCacheFlushThenFlushCommandPresentAfterWalker : public EnqueueKernelTest { public: void TestBodyImpl(bool isA0Stepping) { using WALKER = typename FamilyType::WALKER_TYPE; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using L3_FLUSH_ADDRESS_RANGE = typename FamilyType::L3_FLUSH_ADDRESS_RANGE; using L3_CONTROL_WITHOUT_POST_SYNC = typename FamilyType::L3_CONTROL; DebugManagerStateRestore restore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); DebugManager.flags.EnableTimestampPacket.set(0); const auto &hwInfoConfig = *HwInfoConfig::get(hardwareInfo.platform.eProductFamily); auto stepping = (isA0Stepping ? REVISION_A0 : REVISION_A1); hardwareInfo.platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(stepping, hardwareInfo); pDevice->executionEnvironment->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->setHwInfo(&hardwareInfo); MockKernelWithInternals mockKernel(*pClDevice, context, true); mockKernel.mockKernel->svmAllocationsRequireCacheFlush = false; auto cmdQ = std::make_unique>(context, pClDevice, nullptr); cmdQ->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false; auto memoryManager = pDevice->getUltCommandStreamReceiver().getMemoryManager(); SVMAllocsManager svmManager(memoryManager, false); void *svm = svmManager.createSVMAlloc(MemoryConstants::pageSize * 5, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); ASSERT_NE(nullptr, svm); auto svmData = svmManager.getSVMAlloc(svm); ASSERT_NE(nullptr, svmData); auto svmAllocation = svmData->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex()); ASSERT_NE(nullptr, svmAllocation); svmAllocation->setFlushL3Required(true); mockKernel.kernelInfo.kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties(pDevice->getRootDeviceIndex(), true, MemoryConstants::pageSize, AllocationType::INTERNAL_HEAP)); mockKernel.mockKernel->kernelArgRequiresCacheFlush.resize(1); mockKernel.mockKernel->setArgSvmAlloc(0, svm, svmAllocation, 0u); cmdQ->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false; cmdQ->enqueueKernel(mockKernel, 1, nullptr, StatickSize3<16, 1, 1>(), StatickSize3<16, 1, 1>(), 0, nullptr, nullptr); L3RangesVec rangesExpected; coverRangeExact(svmAllocation->getGpuAddress(), svmAllocation->getUnderlyingBufferSize(), rangesExpected, FamilyType::L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION); L3ControlPolicy validateL3ControlPolicy{L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION, isA0Stepping}; std::string err; auto cmdBuffOk = expectCmdBuff(cmdQ->getCS(0), 0, std::vector{new MatchAnyCmd(AnyNumber), new MatchHwCmd(1), new MatchAnyCmd(AnyNumber), new MatchHwCmd(1, Expects{EXPECT_MEMBER(PIPE_CONTROL, getCommandStreamerStallEnable, true), EXPECT_MEMBER(PIPE_CONTROL, getDcFlushEnable, false)}), new MatchHwCmd(AtLeastOne, Expects{&validateL3ControlPolicy}), new MatchAnyCmd(AnyNumber)}, &err); EXPECT_TRUE(cmdBuffOk) << err; EXPECT_EQ(rangesExpected, validateL3ControlPolicy.l3RangesParsed); memoryManager->freeGraphicsMemory(mockKernel.kernelInfo.kernelAllocation); svmManager.freeSVMAlloc(svm); } }; template class GivenCacheFlushAfterWalkerAndTimestampPacketsEnabledWhenAllocationRequiresCacheFlushThenFlushCommandPresentAfterWalker : public EnqueueKernelTest { public: void TestBodyImpl() { using WALKER = typename FamilyType::WALKER_TYPE; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using L3_FLUSH_ADDRESS_RANGE = typename FamilyType::L3_FLUSH_ADDRESS_RANGE; using L3_CONTROL_WITH_POST_SYNC = typename FamilyType::L3_CONTROL; DebugManagerStateRestore restore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); DebugManager.flags.EnableTimestampPacket.set(1); MockKernelWithInternals mockKernel(*pDevice, context, true); mockKernel.mockKernel->svmAllocationsRequireCacheFlush = false; auto cmdQ = std::make_unique>(context, pClDevice, nullptr); auto memoryManager = pDevice->getUltCommandStreamReceiver().getMemoryManager(); SVMAllocsManager svmManager(memoryManager, false); void *svm = svmManager.createSVMAlloc(MemoryConstants::pageSize * 5, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); ASSERT_NE(nullptr, svm); auto svmAllocation = svmManager.getSVMAlloc(svm)->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex()); svmAllocation->setFlushL3Required(true); mockKernel.kernelInfo.kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties(pDevice->getRootDeviceIndex(), true, MemoryConstants::pageSize, AllocationType::INTERNAL_HEAP)); mockKernel.mockKernel->kernelArgRequiresCacheFlush.resize(1); mockKernel.mockKernel->setArgSvmAlloc(0, svm, svmAllocation, 0); cmdQ->enqueueKernel(mockKernel, 1, nullptr, StatickSize3<16, 1, 1>(), StatickSize3<16, 1, 1>(), 0, nullptr, nullptr); L3RangesVec rangesExpected; coverRangeExact(svmAllocation->getGpuAddress(), svmAllocation->getUnderlyingBufferSize(), rangesExpected, FamilyType::L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION); L3ControlPolicy validateL3ControlPolicy{L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION}; std::string err; auto cmdBuffOk = expectCmdBuff(cmdQ->getCS(0), 0, std::vector{new MatchAnyCmd(AnyNumber), new MatchHwCmd(1), new MatchAnyCmd(AnyNumber), new MatchHwCmd(1, Expects{EXPECT_MEMBER(PIPE_CONTROL, getCommandStreamerStallEnable, true), EXPECT_MEMBER(PIPE_CONTROL, getDcFlushEnable, false)}), new MatchAnyCmd(AnyNumber), new MatchHwCmd(AtLeastOne, Expects{&validateL3ControlPolicy}), new MatchAnyCmd(AnyNumber)}, &err); EXPECT_TRUE(cmdBuffOk) << err; auto expectedRangeWithPostSync = rangesExpected[rangesExpected.size() - 1]; auto l3ParsedRangeWithPostSync = validateL3ControlPolicy.l3RangesParsed[validateL3ControlPolicy.l3RangesParsed.size() - 1]; EXPECT_EQ(expectedRangeWithPostSync, l3ParsedRangeWithPostSync); memoryManager->freeGraphicsMemory(mockKernel.kernelInfo.kernelAllocation); svmManager.freeSVMAlloc(svm); } }; template class GivenCacheFlushAfterWalkerDisabledAndProperSteppingIsSetWhenAllocationRequiresCacheFlushThenFlushCommandNotPresentAfterWalker : public EnqueueKernelTest { public: void TestBodyImpl(bool isA0Stepping) { using WALKER = typename FamilyType::WALKER_TYPE; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using L3_FLUSH_ADDRESS_RANGE = typename FamilyType::L3_FLUSH_ADDRESS_RANGE; using L3_CONTROL_BASE = typename FamilyType::L3_CONTROL_BASE; DebugManagerStateRestore restore; DebugManager.flags.EnableCacheFlushAfterWalker.set(0); MockKernelWithInternals mockKernel(*pClDevice, context, true); mockKernel.mockKernel->svmAllocationsRequireCacheFlush = false; auto cmdQ = std::make_unique>(context, pClDevice, nullptr); auto memoryManager = pDevice->getUltCommandStreamReceiver().getMemoryManager(); SVMAllocsManager svmManager(memoryManager, false); void *svm = svmManager.createSVMAlloc(MemoryConstants::pageSize * 5, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); ASSERT_NE(nullptr, svm); auto svmData = svmManager.getSVMAlloc(svm); ASSERT_NE(nullptr, svmData); auto svmAllocation = svmData->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex()); ASSERT_NE(nullptr, svmAllocation); svmAllocation->setFlushL3Required(true); mockKernel.kernelInfo.kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties(pDevice->getRootDeviceIndex(), true, MemoryConstants::pageSize, AllocationType::INTERNAL_HEAP)); mockKernel.mockKernel->kernelArgRequiresCacheFlush.resize(1); mockKernel.mockKernel->setArgSvmAlloc(0, svm, svmAllocation, 0u); cmdQ->enqueueKernel(mockKernel, 1, nullptr, StatickSize3<16, 1, 1>(), StatickSize3<16, 1, 1>(), 0, nullptr, nullptr); L3ControlPolicy validateL3ControlPolicy{L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION, isA0Stepping}; std::string err; auto cmdBuffOk = expectCmdBuff(cmdQ->getCS(0), 0, std::vector{ new MatchAnyCmd(AnyNumber), new MatchHwCmd(1), new MatchAnyCmd(AnyNumber), new MatchHwCmd(0), new MatchAnyCmd(AnyNumber), }, &err); EXPECT_TRUE(cmdBuffOk) << err; memoryManager->freeGraphicsMemory(mockKernel.kernelInfo.kernelAllocation); svmManager.freeSVMAlloc(svm); } }; template class GivenCacheResourceSurfacesWhenprocessingCacheFlushThenExpectProperCacheFlushCommand : public EnqueueKernelTest { public: void TestBodyImpl() { using L3_CONTROL_WITHOUT_POST_SYNC = typename FamilyType::L3_CONTROL; MockCommandQueueHw cmdQ(context, pClDevice, 0); auto &commandStream = cmdQ.getCS(1024); cl_resource_barrier_descriptor_intel descriptor{}; cl_resource_barrier_descriptor_intel descriptor2{}; SVMAllocsManager *svmManager = cmdQ.getContext().getSVMAllocsManager(); void *svm = svmManager->createSVMAlloc(MemoryConstants::pageSize, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); auto retVal = CL_INVALID_VALUE; size_t bufferSize = MemoryConstants::pageSize; std::unique_ptr buffer(Buffer::create( context, CL_MEM_READ_WRITE, bufferSize, nullptr, retVal)); descriptor.svm_allocation_pointer = svm; descriptor2.mem_object = buffer.get(); const cl_resource_barrier_descriptor_intel descriptors[] = {descriptor, descriptor2}; BarrierCommand bCmd(&cmdQ, descriptors, 2); CsrDependencies csrDeps; cmdQ.processDispatchForCacheFlush(bCmd.surfacePtrs.begin(), bCmd.numSurfaces, &commandStream, csrDeps); std::string err; auto cmdBuffOk = expectCmdBuff(cmdQ.getCS(0), 0, std::vector{ new MatchHwCmd(AtLeastOne)}, &err); EXPECT_TRUE(cmdBuffOk) << err; svmManager->freeSVMAlloc(svm); } }; compute-runtime-22.14.22890/opencl/test/unit_test/kernel/cache_flush_xehp_and_later_tests.inl000066400000000000000000001033351422164147700323430ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/l3_range.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "opencl/test/unit_test/helpers/cmd_buffer_validator.h" #include "opencl/test/unit_test/helpers/hardware_commands_helper_tests.h" #include "opencl/test/unit_test/helpers/static_size3.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" using namespace NEO; template struct L3ControlPolicy : CmdValidator { L3ControlPolicy(typename FamilyType::L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY expectedPolicy) : expectedPolicy(expectedPolicy) { } bool operator()(typename GenCmdList::iterator it, size_t numInScetion, const std::string &member, std::string &outReason) override { using L3_FLUSH_ADDRESS_RANGE = typename FamilyType::L3_FLUSH_ADDRESS_RANGE; using L3_CONTROL = typename FamilyType::L3_CONTROL; auto l3Control = genCmdCast(*it); auto flushRangesCount = (l3Control->getLength() - 3) / 2; l3Control++; auto l3Ranges = reinterpret_cast(l3Control); for (uint32_t i = 0; i < flushRangesCount; i++) { if (l3Ranges->getL3FlushEvictionPolicy() != expectedPolicy) { outReason = "Invalid L3_FLUSH_EVICTION_POLICY - expected: " + std::to_string(expectedPolicy) + ", got :" + std::to_string(l3Ranges->getL3FlushEvictionPolicy()); return false; } l3RangesParsed.push_back(L3Range::fromAddressMask(l3Ranges->getAddress(), l3Ranges->getAddressMask())); l3Ranges++; } return true; } L3RangesVec l3RangesParsed; typename FamilyType::L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY expectedPolicy; }; template class GivenCacheFlushAfterWalkerEnabledWhenSvmAllocationsSetAsCacheFlushRequiringThenExpectCorrectCommandSize : public HardwareCommandsTest { public: void TestBodyImpl() { using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using L3_CONTROL = typename FamilyType::L3_CONTROL; using L3_FLUSH_ADDRESS_RANGE = typename FamilyType::L3_FLUSH_ADDRESS_RANGE; DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); CommandQueueHw cmdQ(nullptr, pClDevice, 0, false); void *allocPtr = reinterpret_cast(static_cast(6 * MemoryConstants::pageSize)); MockGraphicsAllocation svmAllocation{allocPtr, MemoryConstants::pageSize * 2}; svmAllocation.setFlushL3Required(true); this->mockKernelWithInternal->mockKernel->kernelSvmGfxAllocations.push_back(&svmAllocation); this->mockKernelWithInternal->mockKernel->svmAllocationsRequireCacheFlush = true; StackVec allocationsForCacheFlush; this->mockKernelWithInternal->mockKernel->getAllocationsForCacheFlush(allocationsForCacheFlush); StackVec subranges; for (GraphicsAllocation *alloc : allocationsForCacheFlush) { coverRangeExact(alloc->getGpuAddress(), alloc->getUnderlyingBufferSize(), subranges, FamilyType::L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION); } size_t expectedSize = sizeof(COMPUTE_WALKER) + sizeof(PIPE_CONTROL); DispatchInfo di; size_t actualSize = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, cmdQ, this->mockKernelWithInternal->mockKernel, di); EXPECT_EQ(expectedSize, actualSize); } }; template class GivenCacheFlushAfterWalkerEnabledWhenSvmAllocationsSetAsCacheFlushRequiringThenExpectCacheFlushCommand : public HardwareCommandsTest { public: void TestBodyImpl() { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using L3_CONTROL = typename FamilyType::L3_CONTROL; DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); CommandQueueHw cmdQ(nullptr, pClDevice, 0, false); auto &commandStream = cmdQ.getCS(1024); void *allocPtr = reinterpret_cast(static_cast(6 * MemoryConstants::pageSize)); MockGraphicsAllocation svmAllocation{allocPtr, MemoryConstants::pageSize * 2}; svmAllocation.setFlushL3Required(true); this->mockKernelWithInternal->mockKernel->kernelSvmGfxAllocations.push_back(&svmAllocation); this->mockKernelWithInternal->mockKernel->svmAllocationsRequireCacheFlush = true; HardwareCommandsHelper::programCacheFlushAfterWalkerCommand(&commandStream, cmdQ, this->mockKernelWithInternal->mockKernel, 0U); std::string err; auto cmdBuffOk = expectCmdBuff(cmdQ.getCS(0), 0, std::vector({ new MatchHwCmd(1, Expects{EXPECT_MEMBER(PIPE_CONTROL, getCommandStreamerStallEnable, true), EXPECT_MEMBER(PIPE_CONTROL, getDcFlushEnable, false)}), new MatchHwCmd(1, Expects{EXPECT_MEMBER(L3_CONTROL, getPostSyncOperation, L3_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_NO_WRITE)}), }), &err); EXPECT_TRUE(cmdBuffOk) << err; } }; template class GivenCacheFlushAfterWalkerEnabledWhenKernelArgIsSetAsCacheFlushRequiredThenExpectCacheFlushCommand : public HardwareCommandsTest { public: void TestBodyImpl() { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using L3_FLUSH_ADDRESS_RANGE = typename FamilyType::L3_FLUSH_ADDRESS_RANGE; using L3_CONTROL = typename FamilyType::L3_CONTROL; DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); CommandQueueHw cmdQ(nullptr, pClDevice, 0, false); auto &commandStream = cmdQ.getCS(1024); addSpaceForSingleKernelArg(); this->mockKernelWithInternal->mockKernel->kernelArgRequiresCacheFlush.resize(2); void *allocPtr = reinterpret_cast(static_cast(6 * MemoryConstants::pageSize)); MockGraphicsAllocation cacheRequiringAllocation{allocPtr, MemoryConstants::pageSize * 7}; this->mockKernelWithInternal->mockKernel->kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; L3RangesVec rangesExpected; coverRangeExact(cacheRequiringAllocation.getGpuAddress(), cacheRequiringAllocation.getUnderlyingBufferSize(), rangesExpected, FamilyType::L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION); HardwareCommandsHelper::programCacheFlushAfterWalkerCommand(&commandStream, cmdQ, this->mockKernelWithInternal->mockKernel, 0U); L3ControlPolicy validateL3ControlPolicy{L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION}; std::string err; auto cmdBuffOk = expectCmdBuff(cmdQ.getCS(0), 0, std::vector{ new MatchHwCmd(1, Expects{EXPECT_MEMBER(PIPE_CONTROL, getCommandStreamerStallEnable, true), EXPECT_MEMBER(PIPE_CONTROL, getDcFlushEnable, false)}), new MatchHwCmd(1, Expects{EXPECT_MEMBER(L3_CONTROL, getPostSyncOperation, L3_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_NO_WRITE), &validateL3ControlPolicy}), }, &err); EXPECT_TRUE(cmdBuffOk) << err; EXPECT_EQ(rangesExpected, validateL3ControlPolicy.l3RangesParsed); } }; template class GivenCacheFlushAfterWalkerEnabledWhenNoGlobalSurfaceSvmAllocationKernelArgRequireCacheFlushThenExpectNoCacheFlushCommand : public HardwareCommandsTest { public: void TestBodyImpl() { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); CommandQueueHw cmdQ(nullptr, pClDevice, 0, false); auto &commandStream = cmdQ.getCS(1024); addSpaceForSingleKernelArg(); size_t actualSize = HardwareCommandsHelper::getSizeRequiredForCacheFlush(cmdQ, this->mockKernelWithInternal->mockKernel, 0U); EXPECT_EQ(0U, actualSize); HardwareCommandsHelper::programCacheFlushAfterWalkerCommand(&commandStream, cmdQ, this->mockKernelWithInternal->mockKernel, 0U, 0U); EXPECT_EQ(0U, commandStream.getUsed()); } }; template class GivenCacheFlushAfterWalkerEnabledWhenProgramGlobalSurfacePresentThenExpectCacheFlushCommand : public HardwareCommandsTest { public: void TestBodyImpl() { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using L3_CONTROL = typename FamilyType::L3_CONTROL; DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); CommandQueueHw cmdQ(nullptr, pClDevice, 0, false); auto &commandStream = cmdQ.getCS(1024); void *allocPtr = reinterpret_cast(static_cast(6 * MemoryConstants::pageSize)); MockGraphicsAllocation globalAllocation{allocPtr, MemoryConstants::pageSize * 2}; this->mockKernelWithInternal->mockProgram->setGlobalSurface(&globalAllocation); HardwareCommandsHelper::programCacheFlushAfterWalkerCommand(&commandStream, cmdQ, this->mockKernelWithInternal->mockKernel, 0U); std::string err; auto cmdBuffOk = expectCmdBuff(cmdQ.getCS(0), 0, std::vector{ new MatchHwCmd(1, Expects{EXPECT_MEMBER(PIPE_CONTROL, getCommandStreamerStallEnable, true), EXPECT_MEMBER(PIPE_CONTROL, getDcFlushEnable, false)}), new MatchHwCmd(1, Expects{EXPECT_MEMBER(L3_CONTROL, getPostSyncOperation, L3_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_NO_WRITE)})}, &err); EXPECT_TRUE(cmdBuffOk) << err; this->mockKernelWithInternal->mockProgram->setGlobalSurface(nullptr); } }; template class GivenCacheFlushAfterWalkerEnabledWhenProgramGlobalSurfacePresentAndPostSyncRequiredThenExpectProperCacheFlushCommand : public HardwareCommandsTest { public: void TestBodyImpl() { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using L3_CONTROL = typename FamilyType::L3_CONTROL; DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); CommandQueueHw cmdQ(nullptr, pClDevice, 0, false); auto &commandStream = cmdQ.getCS(1024); void *allocPtr = reinterpret_cast(static_cast(6 * MemoryConstants::pageSize)); MockGraphicsAllocation globalAllocation{allocPtr, MemoryConstants::pageSize * 2}; this->mockKernelWithInternal->mockProgram->setGlobalSurface(&globalAllocation); constexpr uint64_t postSyncAddress = 1024; HardwareCommandsHelper::programCacheFlushAfterWalkerCommand(&commandStream, cmdQ, this->mockKernelWithInternal->mockKernel, postSyncAddress); std::string err; auto cmdBuffOk = expectCmdBuff(cmdQ.getCS(0), 0, std::vector{ new MatchHwCmd(1, Expects{EXPECT_MEMBER(PIPE_CONTROL, getCommandStreamerStallEnable, true), EXPECT_MEMBER(PIPE_CONTROL, getDcFlushEnable, false)}), new MatchHwCmd(1, Expects{EXPECT_MEMBER(L3_CONTROL, getPostSyncAddress, postSyncAddress), EXPECT_MEMBER(L3_CONTROL, getPostSyncImmediateData, 0), EXPECT_MEMBER(L3_CONTROL, getPostSyncOperation, L3_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA)})}, &err); EXPECT_TRUE(cmdBuffOk) << err; this->mockKernelWithInternal->mockProgram->setGlobalSurface(nullptr); } }; using EnqueueKernelFixture = HelloWorldFixture; using EnqueueKernelTest = Test; template class GivenCacheFlushAfterWalkerEnabledWhenAllocationRequiresCacheFlushThenFlushCommandPresentAfterWalker : public EnqueueKernelTest { public: void TestBodyImpl() { using WALKER = typename FamilyType::WALKER_TYPE; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using L3_FLUSH_ADDRESS_RANGE = typename FamilyType::L3_FLUSH_ADDRESS_RANGE; using L3_CONTROL = typename FamilyType::L3_CONTROL; DebugManagerStateRestore restore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); DebugManager.flags.EnableTimestampPacket.set(0); MockKernelWithInternals mockKernel(*pClDevice, context, true); mockKernel.mockKernel->svmAllocationsRequireCacheFlush = false; auto cmdQ = std::make_unique>(context, pClDevice, nullptr); cmdQ->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false; auto memoryManager = pDevice->getUltCommandStreamReceiver().getMemoryManager(); SVMAllocsManager svmManager(memoryManager, false); void *svm = svmManager.createSVMAlloc(MemoryConstants::pageSize * 5, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); ASSERT_NE(nullptr, svm); auto svmAllocation = svmManager.getSVMAlloc(svm)->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex()); svmAllocation->setFlushL3Required(true); mockKernel.kernelInfo.kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties(pDevice->getRootDeviceIndex(), true, MemoryConstants::pageSize, AllocationType::INTERNAL_HEAP, pDevice->getDeviceBitfield())); mockKernel.mockKernel->kernelArgRequiresCacheFlush.resize(1); mockKernel.mockKernel->setArgSvmAlloc(0, svm, svmAllocation, 0u); cmdQ->enqueueKernel(mockKernel, 1, nullptr, StatickSize3<16, 1, 1>(), StatickSize3<16, 1, 1>(), 0, nullptr, nullptr); L3RangesVec rangesExpected; coverRangeExact(svmAllocation->getGpuAddress(), svmAllocation->getUnderlyingBufferSize(), rangesExpected, FamilyType::L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION); L3ControlPolicy validateL3ControlPolicy{L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION}; std::string err; auto cmdBuffOk = expectCmdBuff(cmdQ->getCS(0), 0, std::vector{new MatchAnyCmd(AnyNumber), new MatchHwCmd(1), new MatchAnyCmd(AnyNumber), new MatchHwCmd(1, Expects{EXPECT_MEMBER(PIPE_CONTROL, getCommandStreamerStallEnable, true), EXPECT_MEMBER(PIPE_CONTROL, getDcFlushEnable, false)}), new MatchHwCmd(1, Expects{EXPECT_MEMBER(L3_CONTROL, getPostSyncOperation, L3_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_NO_WRITE), &validateL3ControlPolicy}), new MatchAnyCmd(AnyNumber)}, &err); EXPECT_TRUE(cmdBuffOk) << err; EXPECT_EQ(rangesExpected, validateL3ControlPolicy.l3RangesParsed); memoryManager->freeGraphicsMemory(mockKernel.kernelInfo.kernelAllocation); svmManager.freeSVMAlloc(svm); } }; template class GivenCacheFlushAfterWalkerAndTimestampPacketsEnabledWhenAllocationRequiresCacheFlushThenFlushCommandPresentAfterWalker : public EnqueueKernelTest { public: void TestBodyImpl() { using WALKER = typename FamilyType::WALKER_TYPE; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using L3_FLUSH_ADDRESS_RANGE = typename FamilyType::L3_FLUSH_ADDRESS_RANGE; using L3_CONTROL = typename FamilyType::L3_CONTROL; DebugManagerStateRestore restore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); DebugManager.flags.EnableTimestampPacket.set(1); MockKernelWithInternals mockKernel(*pClDevice, context, true); mockKernel.mockKernel->svmAllocationsRequireCacheFlush = false; auto cmdQ = std::make_unique>(context, pClDevice, nullptr); auto memoryManager = pDevice->getUltCommandStreamReceiver().getMemoryManager(); SVMAllocsManager svmManager(memoryManager, false); void *svm = svmManager.createSVMAlloc(MemoryConstants::pageSize * 5, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); ASSERT_NE(nullptr, svm); auto svmAllocation = svmManager.getSVMAlloc(svm)->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex()); svmAllocation->setFlushL3Required(true); mockKernel.kernelInfo.kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties(pDevice->getRootDeviceIndex(), true, MemoryConstants::pageSize, AllocationType::INTERNAL_HEAP, pDevice->getDeviceBitfield())); mockKernel.mockKernel->kernelArgRequiresCacheFlush.resize(1); mockKernel.mockKernel->setArgSvmAlloc(0, svm, svmAllocation, 0u); cmdQ->enqueueKernel(mockKernel, 1, nullptr, StatickSize3<16, 1, 1>(), StatickSize3<16, 1, 1>(), 0, nullptr, nullptr); auto &nodes = cmdQ->timestampPacketContainer->peekNodes(); EXPECT_FALSE(nodes[nodes.size() - 1]->isProfilingCapable()); L3RangesVec rangesExpected; coverRangeExact(svmAllocation->getGpuAddress(), svmAllocation->getUnderlyingBufferSize(), rangesExpected, FamilyType::L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION); L3ControlPolicy validateL3ControlPolicy{L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION}; std::string err; auto cmdBuffOk = expectCmdBuff(cmdQ->getCS(0), 0, std::vector{new MatchAnyCmd(AnyNumber), new MatchHwCmd(1), new MatchAnyCmd(AnyNumber), new MatchHwCmd(1, Expects{EXPECT_MEMBER(PIPE_CONTROL, getCommandStreamerStallEnable, true), EXPECT_MEMBER(PIPE_CONTROL, getDcFlushEnable, false)}), new MatchAnyCmd(AnyNumber), new MatchHwCmd(1, Expects{EXPECT_MEMBER(L3_CONTROL, getPostSyncOperation, L3_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA), &validateL3ControlPolicy}), new MatchAnyCmd(AnyNumber)}, &err); EXPECT_TRUE(cmdBuffOk) << err; EXPECT_EQ(rangesExpected, validateL3ControlPolicy.l3RangesParsed); memoryManager->freeGraphicsMemory(mockKernel.kernelInfo.kernelAllocation); svmManager.freeSVMAlloc(svm); } }; template class GivenCacheFlushAfterWalkerDisabledWhenAllocationRequiresCacheFlushThenFlushCommandNotPresentAfterWalker : public EnqueueKernelTest { public: void TestBodyImpl() { using WALKER = typename FamilyType::WALKER_TYPE; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using L3_FLUSH_ADDRESS_RANGE = typename FamilyType::L3_FLUSH_ADDRESS_RANGE; using L3_CONTROL = typename FamilyType::L3_CONTROL; DebugManagerStateRestore restore; DebugManager.flags.EnableCacheFlushAfterWalker.set(0); MockKernelWithInternals mockKernel(*pClDevice, context, true); mockKernel.mockKernel->svmAllocationsRequireCacheFlush = false; auto cmdQ = std::make_unique>(context, pClDevice, nullptr); auto memoryManager = pDevice->getUltCommandStreamReceiver().getMemoryManager(); SVMAllocsManager svmManager(memoryManager, false); void *svm = svmManager.createSVMAlloc(MemoryConstants::pageSize * 5, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); ASSERT_NE(nullptr, svm); auto svmAllocation = svmManager.getSVMAlloc(svm)->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex()); svmAllocation->setFlushL3Required(true); mockKernel.kernelInfo.kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties(pDevice->getRootDeviceIndex(), true, MemoryConstants::pageSize, AllocationType::INTERNAL_HEAP, pDevice->getDeviceBitfield())); mockKernel.mockKernel->kernelArgRequiresCacheFlush.resize(1); mockKernel.mockKernel->setArgSvmAlloc(0, svm, svmAllocation, 0u); cmdQ->enqueueKernel(mockKernel, 1, nullptr, StatickSize3<16, 1, 1>(), StatickSize3<16, 1, 1>(), 0, nullptr, nullptr); L3ControlPolicy validateL3ControlPolicy{L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION}; std::string err; auto cmdBuffOk = expectCmdBuff(cmdQ->getCS(0), 0, std::vector{ new MatchAnyCmd(AnyNumber), new MatchHwCmd(1), new MatchAnyCmd(AnyNumber), new MatchHwCmd(0), new MatchAnyCmd(AnyNumber), }, &err); EXPECT_TRUE(cmdBuffOk) << err; memoryManager->freeGraphicsMemory(mockKernel.kernelInfo.kernelAllocation); svmManager.freeSVMAlloc(svm); } }; template class GivenCacheFlushAfterWalkerEnabledWhenMoreThan126AllocationRangesRequiresCacheFlushThenAtLeatsTwoFlushCommandPresentAfterWalker : public EnqueueKernelTest { public: void TestBodyImpl() { using WALKER = typename FamilyType::WALKER_TYPE; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using L3_FLUSH_ADDRESS_RANGE = typename FamilyType::L3_FLUSH_ADDRESS_RANGE; using L3_CONTROL = typename FamilyType::L3_CONTROL; DebugManagerStateRestore restore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); DebugManager.flags.EnableTimestampPacket.set(0); MockKernelWithInternals mockKernel(*pClDevice, context, true); mockKernel.mockKernel->svmAllocationsRequireCacheFlush = false; auto cmdQ = std::make_unique>(context, pClDevice, nullptr); cmdQ->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false; auto memoryManager = pDevice->getUltCommandStreamReceiver().getMemoryManager(); SVMAllocsManager svmManager(memoryManager, false); mockKernel.mockKernel->kernelArgRequiresCacheFlush.resize(maxFlushSubrangeCount + 1); std::vector svmAllocs; for (uint32_t i = 0; i < maxFlushSubrangeCount + 1; i++) { void *svm = svmManager.createSVMAlloc(MemoryConstants::pageSize, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); auto svmAllocation = svmManager.getSVMAlloc(svm)->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex()); svmAllocation->setFlushL3Required(true); mockKernel.mockKernel->addAllocationToCacheFlushVector(i, svmAllocation); svmAllocs.push_back(svm); } cmdQ->enqueueKernel(mockKernel, 1, nullptr, StatickSize3<16, 1, 1>(), StatickSize3<16, 1, 1>(), 0, nullptr, nullptr); std::string err; auto cmdBuffOk = expectCmdBuff(cmdQ->getCS(0), 0, std::vector{new MatchAnyCmd(AnyNumber), new MatchHwCmd(1), new MatchAnyCmd(AnyNumber), new MatchHwCmd(1, Expects{EXPECT_MEMBER(PIPE_CONTROL, getCommandStreamerStallEnable, true), EXPECT_MEMBER(PIPE_CONTROL, getDcFlushEnable, false)}), new MatchHwCmd(1), new MatchHwCmd(AtLeastOne), new MatchAnyCmd(AnyNumber)}, &err); EXPECT_TRUE(cmdBuffOk) << err; for (void *svm : svmAllocs) { svmManager.freeSVMAlloc(svm); } } }; template class GivenCacheFlushAfterWalkerAndTimestampPacketsEnabledWhenMoreThan126AllocationRangesRequiresCacheFlushThenExpectFlushWithOutPostSyncAndThenWithPostSync : public EnqueueKernelTest { public: void TestBodyImpl() { using WALKER = typename FamilyType::WALKER_TYPE; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using L3_FLUSH_ADDRESS_RANGE = typename FamilyType::L3_FLUSH_ADDRESS_RANGE; using L3_CONTROL = typename FamilyType::L3_CONTROL; DebugManagerStateRestore restore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); DebugManager.flags.EnableTimestampPacket.set(1); MockKernelWithInternals mockKernel(*pClDevice, context, true); mockKernel.mockKernel->svmAllocationsRequireCacheFlush = false; auto cmdQ = std::make_unique>(context, pClDevice, nullptr); cmdQ->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; auto memoryManager = pDevice->getUltCommandStreamReceiver().getMemoryManager(); SVMAllocsManager svmManager(memoryManager, false); mockKernel.mockKernel->kernelArgRequiresCacheFlush.resize(maxFlushSubrangeCount + 1); std::vector svmAllocs; for (uint32_t i = 0; i < maxFlushSubrangeCount + 1; i++) { void *svm = svmManager.createSVMAlloc(MemoryConstants::pageSize, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); auto svmAllocation = svmManager.getSVMAlloc(svm)->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex()); svmAllocation->setFlushL3Required(true); mockKernel.mockKernel->addAllocationToCacheFlushVector(i, svmAllocation); svmAllocs.push_back(svm); } cmdQ->enqueueKernel(mockKernel, 1, nullptr, StatickSize3<16, 1, 1>(), StatickSize3<16, 1, 1>(), 0, nullptr, nullptr); auto &nodes = cmdQ->timestampPacketContainer->peekNodes(); EXPECT_FALSE(nodes[1]->isProfilingCapable()); auto timestampPacketNode = nodes[1]; auto timestampPacketGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(*timestampPacketNode); std::string err; auto cmdBuffOk = expectCmdBuff(cmdQ->getCS(0), 0, std::vector{new MatchAnyCmd(AnyNumber), new MatchHwCmd(1), new MatchAnyCmd(AnyNumber), new MatchHwCmd(1, Expects{EXPECT_MEMBER(PIPE_CONTROL, getCommandStreamerStallEnable, true), EXPECT_MEMBER(PIPE_CONTROL, getDcFlushEnable, false)}), new MatchHwCmd(1, Expects{EXPECT_MEMBER(L3_CONTROL, getPostSyncOperation, L3_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_NO_WRITE)}), new MatchHwCmd(1, Expects{EXPECT_MEMBER(L3_CONTROL, getPostSyncOperation, L3_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA)}), new MatchAnyCmd(AnyNumber)}, &err); HardwareParse hwParser; hwParser.parseCommands(cmdQ->getCS(0), 0); bool postSyncWriteFound = false; for (auto &cmd : hwParser.cmdList) { if (auto l3ControlCmd = genCmdCast(cmd)) { if (l3ControlCmd->getPostSyncOperation() == L3_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { EXPECT_EQ(timestampPacketGpuAddress, l3ControlCmd->getPostSyncAddress()); postSyncWriteFound = true; } } } EXPECT_TRUE(postSyncWriteFound); EXPECT_TRUE(cmdBuffOk) << err; for (void *svm : svmAllocs) { svmManager.freeSVMAlloc(svm); } } }; template class GivenCacheFlushAfterWalkerEnabledWhen126AllocationRangesRequiresCacheFlushThenExpectOneFlush : public EnqueueKernelTest { public: void TestBodyImpl() { using WALKER = typename FamilyType::WALKER_TYPE; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using L3_FLUSH_ADDRESS_RANGE = typename FamilyType::L3_FLUSH_ADDRESS_RANGE; using L3_CONTROL = typename FamilyType::L3_CONTROL; DebugManagerStateRestore restore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); DebugManager.flags.EnableTimestampPacket.set(0); MockKernelWithInternals mockKernel(*pClDevice, context, true); mockKernel.mockKernel->svmAllocationsRequireCacheFlush = false; auto cmdQ = std::make_unique>(context, pClDevice, nullptr); cmdQ->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false; auto memoryManager = pDevice->getUltCommandStreamReceiver().getMemoryManager(); SVMAllocsManager svmManager(memoryManager, false); mockKernel.mockKernel->kernelArgRequiresCacheFlush.resize(maxFlushSubrangeCount); std::vector svmAllocs; for (uint32_t i = 0; i < maxFlushSubrangeCount; i++) { void *svm = svmManager.createSVMAlloc(MemoryConstants::pageSize, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); auto svmAllocation = svmManager.getSVMAlloc(svm)->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex()); svmAllocation->setFlushL3Required(true); mockKernel.mockKernel->addAllocationToCacheFlushVector(i, svmAllocation); svmAllocs.push_back(svm); } cmdQ->enqueueKernel(mockKernel, 1, nullptr, StatickSize3<16, 1, 1>(), StatickSize3<16, 1, 1>(), 0, nullptr, nullptr); std::string err; auto cmdBuffOk = expectCmdBuff(cmdQ->getCS(0), 0, std::vector{new MatchAnyCmd(AnyNumber), new MatchHwCmd(1), new MatchAnyCmd(AnyNumber), new MatchHwCmd(1, Expects{EXPECT_MEMBER(PIPE_CONTROL, getCommandStreamerStallEnable, true), EXPECT_MEMBER(PIPE_CONTROL, getDcFlushEnable, false)}), new MatchHwCmd(1), new MatchHwCmd(0), new MatchAnyCmd(AnyNumber)}, &err); EXPECT_TRUE(cmdBuffOk) << err; for (void *svm : svmAllocs) { svmManager.freeSVMAlloc(svm); } } };compute-runtime-22.14.22890/opencl/test/unit_test/kernel/clone_kernel_tests.cpp000066400000000000000000001116221422164147700275000ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/accelerators/intel_accelerator.h" #include "opencl/source/accelerators/intel_motion_estimation.h" #include "opencl/source/helpers/sampler_helpers.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/pipe.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_pipe.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "opencl/test/unit_test/mocks/mock_sampler.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" #include "CL/cl.h" #include "gtest/gtest.h" #include using namespace NEO; class CloneKernelTest : public MultiRootDeviceWithSubDevicesFixture { public: CloneKernelTest() { } protected: void SetUp() override { MultiRootDeviceWithSubDevicesFixture::SetUp(); pProgram = std::make_unique(context.get(), false, context->getDevices()); // define kernel info pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.payloadMappings.explicitArgs.resize(1); pKernelInfo->kernelDescriptor.payloadMappings.explicitArgsExtendedDescriptors.resize(1); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; pKernelInfo->kernelDescriptor.kernelAttributes.crossThreadDataSize = 72; pKernelInfo->setPrivateMemory(0x10, false, 8, 64, 64); pKernelInfo->heapInfo.SurfaceStateHeapSize = sizeof(surfaceStateHeap); pKernelInfo->heapInfo.pSsh = surfaceStateHeap; KernelInfoContainer kernelInfos; kernelInfos.resize(3); kernelInfos[0] = kernelInfos[1] = kernelInfos[2] = pKernelInfo.get(); KernelVectorType sourceKernels; sourceKernels.resize(3); KernelVectorType clonedKernels; clonedKernels.resize(3); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { pSourceKernel[rootDeviceIndex] = new MockKernel(pProgram.get(), *pKernelInfo, *deviceFactory->rootDevices[rootDeviceIndex]); ASSERT_EQ(CL_SUCCESS, pSourceKernel[rootDeviceIndex]->initialize()); sourceKernels[rootDeviceIndex] = pSourceKernel[rootDeviceIndex]; pClonedKernel[rootDeviceIndex] = new MockKernel(pProgram.get(), *pKernelInfo, *deviceFactory->rootDevices[rootDeviceIndex]); ASSERT_EQ(CL_SUCCESS, pClonedKernel[rootDeviceIndex]->initialize()); clonedKernels[rootDeviceIndex] = pClonedKernel[rootDeviceIndex]; } pSourceMultiDeviceKernel = std::make_unique(sourceKernels, kernelInfos); pClonedMultiDeviceKernel = std::make_unique(clonedKernels, kernelInfos); } void TearDown() override { MultiRootDeviceWithSubDevicesFixture::TearDown(); } cl_int retVal = CL_SUCCESS; std::unique_ptr pProgram; std::unique_ptr pSourceMultiDeviceKernel; std::unique_ptr pClonedMultiDeviceKernel; MockKernel *pSourceKernel[3] = {nullptr}; MockKernel *pClonedKernel[3] = {nullptr}; std::unique_ptr pKernelInfo; char surfaceStateHeap[128]; }; TEST_F(CloneKernelTest, GivenKernelWithPrivateSurfaceWhenCloningKernelThenClonedKernelProgramItsOwnPrivateSurfaceAddress) { for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { auto pSourcePrivateSurface = pSourceKernel[rootDeviceIndex]->privateSurface; auto pClonedPrivateSurface = pClonedKernel[rootDeviceIndex]->privateSurface; EXPECT_NE(nullptr, pSourcePrivateSurface); EXPECT_NE(nullptr, pClonedPrivateSurface); EXPECT_NE(pClonedPrivateSurface, pSourcePrivateSurface); { auto pSourcePrivateSurfPatchedAddress = reinterpret_cast(ptrOffset(pSourceKernel[rootDeviceIndex]->getCrossThreadData(), 64)); auto pClonedPrivateSurfPatchedAddress = reinterpret_cast(ptrOffset(pClonedKernel[rootDeviceIndex]->getCrossThreadData(), 64)); EXPECT_EQ(pSourcePrivateSurface->getGpuAddressToPatch(), *pSourcePrivateSurfPatchedAddress); EXPECT_EQ(pClonedPrivateSurface->getGpuAddressToPatch(), *pClonedPrivateSurfPatchedAddress); } retVal = pClonedKernel[rootDeviceIndex]->cloneKernel(pSourceKernel[rootDeviceIndex]); EXPECT_EQ(CL_SUCCESS, retVal); auto pClonedPrivateSurface2 = pClonedKernel[rootDeviceIndex]->privateSurface; EXPECT_EQ(pClonedPrivateSurface, pClonedPrivateSurface2); { auto pClonedPrivateSurfPatchedAddress = reinterpret_cast(ptrOffset(pClonedKernel[rootDeviceIndex]->getCrossThreadData(), 64)); EXPECT_EQ(pClonedPrivateSurface->getGpuAddressToPatch(), *pClonedPrivateSurfPatchedAddress); } } } TEST_F(CloneKernelTest, GivenUnsetArgWhenCloningKernelThenKernelInfoIsCorrect) { pKernelInfo->addArgBuffer(0); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getKernelArguments().size()); EXPECT_EQ(Kernel::NONE_OBJ, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type); EXPECT_EQ(nullptr, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).object); EXPECT_EQ(nullptr, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).value); EXPECT_EQ(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size); EXPECT_EQ(0u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_FALSE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); EXPECT_EQ(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId); } retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get()); EXPECT_EQ(CL_SUCCESS, retVal); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArguments().size(), pClonedKernel[rootDeviceIndex]->getKernelArguments().size()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).type); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).object, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).object); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).value, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).value); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId); } } TEST_F(CloneKernelTest, GivenArgLocalWhenCloningKernelThenKernelInfoIsCorrect) { const size_t slmSize = 0x800; pKernelInfo->addArgLocal(0, 0, 1); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { pSourceKernel[rootDeviceIndex]->setKernelArgHandler(0, &Kernel::setArgLocal); pClonedKernel[rootDeviceIndex]->setKernelArgHandler(0, &Kernel::setArgLocal); } retVal = pSourceMultiDeviceKernel->setArg(0, slmSize, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getKernelArguments().size()); EXPECT_EQ(Kernel::SLM_OBJ, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type); EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size); EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); EXPECT_EQ(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId); } retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get()); EXPECT_EQ(CL_SUCCESS, retVal); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArguments().size(), pClonedKernel[rootDeviceIndex]->getKernelArguments().size()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).type); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).object, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).object); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).value, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).value); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId); EXPECT_EQ(alignUp(slmSize, 1024), pClonedKernel[rootDeviceIndex]->slmTotalSize); } } TEST_F(CloneKernelTest, GivenArgBufferWhenCloningKernelThenKernelInfoIsCorrect) { pKernelInfo->addArgBuffer(0, 0x20, sizeof(void *)); auto buffer = clUniquePtr(Buffer::create(context.get(), 0, MemoryConstants::pageSize, nullptr, retVal)); cl_mem memObj = buffer.get(); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { pSourceKernel[rootDeviceIndex]->setKernelArgHandler(0, &Kernel::setArgBuffer); pClonedKernel[rootDeviceIndex]->setKernelArgHandler(0, &Kernel::setArgBuffer); } retVal = pSourceMultiDeviceKernel->setArg(0, sizeof(cl_mem), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getKernelArguments().size()); EXPECT_EQ(Kernel::BUFFER_OBJ, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type); EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size); EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); EXPECT_EQ(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId); } retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get()); EXPECT_EQ(CL_SUCCESS, retVal); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArguments().size(), pClonedKernel[rootDeviceIndex]->getKernelArguments().size()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).type); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).object, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).object); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).value, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).value); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId); auto pKernelArg = (cl_mem *)(pClonedKernel[rootDeviceIndex]->getCrossThreadData() + pClonedKernel[rootDeviceIndex]->getKernelInfo().getArgDescriptorAt(0).as().stateless); EXPECT_EQ(buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddressToPatch(), reinterpret_cast(*pKernelArg)); } } TEST_F(CloneKernelTest, GivenArgPipeWhenCloningKernelThenKernelInfoIsCorrect) { pKernelInfo->addArgPipe(0, 0x20, sizeof(void *)); auto pipe = clUniquePtr(Pipe::create(context.get(), 0, 1, 20, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); cl_mem memObj = pipe.get(); auto rootDeviceIndex = *context->getRootDeviceIndices().begin(); pSourceKernel[rootDeviceIndex]->setKernelArgHandler(0, &Kernel::setArgPipe); pClonedKernel[rootDeviceIndex]->setKernelArgHandler(0, &Kernel::setArgPipe); retVal = pSourceKernel[rootDeviceIndex]->setArg(0, sizeof(cl_mem), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getKernelArguments().size()); EXPECT_EQ(Kernel::PIPE_OBJ, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type); EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size); EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); EXPECT_EQ(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId); retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get()); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArguments().size(), pClonedKernel[rootDeviceIndex]->getKernelArguments().size()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).type); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).object, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).object); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).value, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).value); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId); auto pKernelArg = (cl_mem *)(pClonedKernel[rootDeviceIndex]->getCrossThreadData() + pClonedKernel[rootDeviceIndex]->getKernelInfo().getArgDescriptorAt(0).as().stateless); EXPECT_EQ(pipe->getGraphicsAllocation(rootDeviceIndex)->getGpuAddressToPatch(), reinterpret_cast(*pKernelArg)); } TEST_F(CloneKernelTest, GivenArgImageWhenCloningKernelThenKernelInfoIsCorrect) { pKernelInfo->addArgImage(0, 0x20); auto &metaPayload = pKernelInfo->argAsImg(0).metadataPayload; metaPayload.imgWidth = 0x4; metaPayload.imgHeight = 0x8; metaPayload.imgDepth = 0xc; pKernelInfo->addExtendedDeviceSideEnqueueDescriptor(0, 0); auto image = std::unique_ptr(Image2dHelper<>::create(context.get())); ASSERT_NE(nullptr, image); auto rootDeviceIndex = *context->getRootDeviceIndices().begin(); size_t imageWidth = image->getImageDesc().image_width; size_t imageHeight = image->getImageDesc().image_height; size_t imageDepth = image->getImageDesc().image_depth; cl_mem memObj = image.get(); pSourceKernel[rootDeviceIndex]->setKernelArgHandler(0, &Kernel::setArgImage); pClonedKernel[rootDeviceIndex]->setKernelArgHandler(0, &Kernel::setArgImage); retVal = pSourceKernel[rootDeviceIndex]->setArg(0, sizeof(cl_mem), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getKernelArguments().size()); EXPECT_EQ(Kernel::IMAGE_OBJ, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type); EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size); EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); EXPECT_EQ(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId); retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get()); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArguments().size(), pClonedKernel[rootDeviceIndex]->getKernelArguments().size()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).type); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).object, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).object); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).value, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).value); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId); auto crossThreadData = reinterpret_cast(pClonedKernel[rootDeviceIndex]->getCrossThreadData()); auto &clonedArg = pClonedKernel[rootDeviceIndex]->getKernelInfo().getArgDescriptorAt(0).as(); EXPECT_EQ(clonedArg.bindful, *crossThreadData); auto pImgWidth = ptrOffset(crossThreadData, clonedArg.metadataPayload.imgWidth); EXPECT_EQ(imageWidth, *pImgWidth); auto pImgHeight = ptrOffset(crossThreadData, clonedArg.metadataPayload.imgHeight); EXPECT_EQ(imageHeight, *pImgHeight); auto pImgDepth = ptrOffset(crossThreadData, clonedArg.metadataPayload.imgDepth); EXPECT_EQ(imageDepth, *pImgDepth); } TEST_F(CloneKernelTest, GivenArgAcceleratorWhenCloningKernelThenKernelInfoIsCorrect) { pKernelInfo->addArgAccelerator(0, undefined, 0x4, 0x14, 0x1c, 0xc); cl_motion_estimation_desc_intel desc = { CL_ME_MB_TYPE_4x4_INTEL, CL_ME_SUBPIXEL_MODE_QPEL_INTEL, CL_ME_SAD_ADJUST_MODE_HAAR_INTEL, CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL}; cl_accelerator_intel accelerator = VmeAccelerator::create( context.get(), CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(desc), &desc, retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, accelerator); auto rootDeviceIndex = *context->getRootDeviceIndices().begin(); pSourceKernel[rootDeviceIndex]->setKernelArgHandler(0, &Kernel::setArgAccelerator); pClonedKernel[rootDeviceIndex]->setKernelArgHandler(0, &Kernel::setArgAccelerator); retVal = pSourceKernel[rootDeviceIndex]->setArg(0, sizeof(cl_accelerator_intel), &accelerator); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getKernelArguments().size()); EXPECT_EQ(Kernel::ACCELERATOR_OBJ, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type); EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size); EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); EXPECT_EQ(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId); retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get()); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArguments().size(), pClonedKernel[rootDeviceIndex]->getKernelArguments().size()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).type); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).object, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).object); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).value, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).value); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId); auto crossThreadData = reinterpret_cast(pClonedKernel[rootDeviceIndex]->getCrossThreadData()); ASSERT_TRUE(pClonedKernel[rootDeviceIndex]->getKernelInfo().getArgDescriptorAt(0).getExtendedTypeInfo().hasVmeExtendedDescriptor); const auto clonedArgDescVme = reinterpret_cast(pClonedKernel[rootDeviceIndex]->getKernelInfo().kernelDescriptor.payloadMappings.explicitArgsExtendedDescriptors[0].get()); uint32_t *pMbBlockType = ptrOffset(crossThreadData, clonedArgDescVme->mbBlockType); EXPECT_EQ(desc.mb_block_type, *pMbBlockType); uint32_t *pSubpixelMode = ptrOffset(crossThreadData, clonedArgDescVme->subpixelMode); EXPECT_EQ(desc.subpixel_mode, *pSubpixelMode); uint32_t *pSadAdjustMode = ptrOffset(crossThreadData, clonedArgDescVme->sadAdjustMode); EXPECT_EQ(desc.sad_adjust_mode, *pSadAdjustMode); uint32_t *pSearchPathType = ptrOffset(crossThreadData, clonedArgDescVme->searchPathType); EXPECT_EQ(desc.search_path_type, *pSearchPathType); retVal = clReleaseAcceleratorINTEL(accelerator); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(CloneKernelTest, GivenArgSamplerWhenCloningKernelThenKernelInfoIsCorrect) { auto sampler = clUniquePtr(new MockSampler(context.get(), true, (cl_addressing_mode)CL_ADDRESS_MIRRORED_REPEAT, (cl_filter_mode)CL_FILTER_NEAREST)); pKernelInfo->addArgSampler(0, 0x20, 0x8, 0x10, 0x4); pKernelInfo->addExtendedDeviceSideEnqueueDescriptor(0, 0); cl_sampler samplerObj = sampler.get(); auto rootDeviceIndex = *context->getRootDeviceIndices().begin(); pSourceKernel[rootDeviceIndex]->setKernelArgHandler(0, &Kernel::setArgSampler); pClonedKernel[rootDeviceIndex]->setKernelArgHandler(0, &Kernel::setArgSampler); retVal = pSourceKernel[rootDeviceIndex]->setArg(0, sizeof(cl_sampler), &samplerObj); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getKernelArguments().size()); EXPECT_EQ(Kernel::SAMPLER_OBJ, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type); EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size); EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); EXPECT_EQ(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId); retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get()); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArguments().size(), pClonedKernel[rootDeviceIndex]->getKernelArguments().size()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).type); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).object, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).object); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).value, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).value); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId); auto crossThreadData = reinterpret_cast(pClonedKernel[rootDeviceIndex]->getCrossThreadData()); const auto &clonedArg = pClonedKernel[rootDeviceIndex]->getKernelInfo().getArgDescriptorAt(0).as(); EXPECT_EQ(SAMPLER_OBJECT_ID_SHIFT + clonedArg.bindful, *crossThreadData); auto pSnapWa = ptrOffset(crossThreadData, clonedArg.metadataPayload.samplerSnapWa); EXPECT_EQ(sampler->getSnapWaValue(), *pSnapWa); auto pAddressingMode = ptrOffset(crossThreadData, clonedArg.metadataPayload.samplerAddressingMode); EXPECT_EQ(GetAddrModeEnum(sampler->addressingMode), *pAddressingMode); auto pNormalizedCoords = ptrOffset(crossThreadData, clonedArg.metadataPayload.samplerNormalizedCoords); EXPECT_EQ(GetNormCoordsEnum(sampler->normalizedCoordinates), *pNormalizedCoords); EXPECT_EQ(3, sampler->getRefInternalCount()); } TEST_F(CloneKernelTest, GivenArgSvmWhenCloningKernelThenKernelInfoIsCorrect) { char *svmPtr = new char[256]; pKernelInfo->addArgBuffer(0, 0x20, sizeof(void *)); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { retVal = pSourceKernel[rootDeviceIndex]->setArgSvm(0, 256, svmPtr, nullptr, 0u); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getKernelArguments().size()); EXPECT_EQ(Kernel::SVM_OBJ, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type); EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size); EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); EXPECT_EQ(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId); } retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get()); EXPECT_EQ(CL_SUCCESS, retVal); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArguments().size(), pClonedKernel[rootDeviceIndex]->getKernelArguments().size()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).type); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).object, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).object); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).value, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).value); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId); auto pKernelArg = (void **)(pClonedKernel[rootDeviceIndex]->getCrossThreadData() + pClonedKernel[rootDeviceIndex]->getKernelInfo().getArgDescriptorAt(0).as().stateless); EXPECT_EQ(svmPtr, *pKernelArg); } delete[] svmPtr; } TEST_F(CloneKernelTest, GivenArgSvmAllocWhenCloningKernelThenKernelInfoIsCorrect) { pKernelInfo->addArgBuffer(0, 0x20, sizeof(void *)); char memory[100] = {}; MultiGraphicsAllocation multiGraphicsAllocation(3); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { auto svmAlloc = new MockGraphicsAllocation(rootDeviceIndex, memory, 100); multiGraphicsAllocation.addAllocation(svmAlloc); } retVal = pSourceMultiDeviceKernel->setArgSvmAlloc(0, memory, &multiGraphicsAllocation, 1u); ASSERT_EQ(CL_SUCCESS, retVal); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getKernelArguments().size()); EXPECT_EQ(multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex), pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).object); EXPECT_EQ(Kernel::SVM_ALLOC_OBJ, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type); EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size); EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId); } retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get()); EXPECT_EQ(CL_SUCCESS, retVal); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArguments().size(), pClonedKernel[rootDeviceIndex]->getKernelArguments().size()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).type); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).object, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).object); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).value, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).value); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId); auto pKernelArg = (void **)(pClonedKernel[rootDeviceIndex]->getCrossThreadData() + pClonedKernel[rootDeviceIndex]->getKernelInfo().getArgDescriptorAt(0).as().stateless); EXPECT_EQ(memory, *pKernelArg); delete multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex); } } TEST_F(CloneKernelTest, GivenArgImmediateWhenCloningKernelThenKernelInfoIsCorrect) { pKernelInfo->addArgImmediate(0, sizeof(void *), 0x20); using TypeParam = unsigned long; auto value = (TypeParam)0xAA55AA55UL; retVal = pSourceMultiDeviceKernel->setArg(0, sizeof(TypeParam), &value); ASSERT_EQ(CL_SUCCESS, retVal); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getKernelArguments().size()); EXPECT_EQ(Kernel::NONE_OBJ, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type); EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size); EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); EXPECT_EQ(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId); } retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get()); EXPECT_EQ(CL_SUCCESS, retVal); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArguments().size(), pClonedKernel[rootDeviceIndex]->getKernelArguments().size()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).type, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).type); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).object, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).object); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).value, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).value); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId); auto pKernelArg = (TypeParam *)(pClonedKernel[rootDeviceIndex]->getCrossThreadData() + pClonedKernel[rootDeviceIndex]->getKernelInfo().getArgDescriptorAt(0).as().elements[0].offset); EXPECT_EQ(value, *pKernelArg); } } TEST_F(CloneKernelTest, GivenExecInfoWhenCloningKernelThenSvmAllocationIsCorrect) { REQUIRE_SVM_OR_SKIP(device1); void *ptrSVM = context->getSVMAllocsManager()->createSVMAlloc(256, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); ASSERT_NE(nullptr, ptrSVM); auto svmData = context->getSVMAllocsManager()->getSVMAlloc(ptrSVM); ASSERT_NE(nullptr, svmData); auto &pSvmAllocs = svmData->gpuAllocations; pSourceMultiDeviceKernel->setSvmKernelExecInfo(pSvmAllocs); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->kernelSvmGfxAllocations.size()); EXPECT_NE(nullptr, pSourceKernel[rootDeviceIndex]->kernelSvmGfxAllocations.at(0)); EXPECT_EQ(pSvmAllocs.getGraphicsAllocation(rootDeviceIndex), pSourceKernel[rootDeviceIndex]->kernelSvmGfxAllocations.at(0)); } retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get()); EXPECT_EQ(CL_SUCCESS, retVal); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { EXPECT_EQ(pSourceKernel[rootDeviceIndex]->kernelSvmGfxAllocations.size(), pClonedKernel[rootDeviceIndex]->kernelSvmGfxAllocations.size()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->kernelSvmGfxAllocations.at(0), pClonedKernel[rootDeviceIndex]->kernelSvmGfxAllocations.at(0)); } context->getSVMAllocsManager()->freeSVMAlloc(ptrSVM); } TEST_F(CloneKernelTest, GivenUnifiedMemoryExecInfoWhenCloningKernelThenUnifiedMemoryAllocationIsCorrect) { REQUIRE_SVM_OR_SKIP(device1); void *ptrSVM = context->getSVMAllocsManager()->createSVMAlloc(256, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); ASSERT_NE(nullptr, ptrSVM); auto svmData = context->getSVMAllocsManager()->getSVMAlloc(ptrSVM); ASSERT_NE(nullptr, svmData); auto &pSvmAllocs = svmData->gpuAllocations; pSourceMultiDeviceKernel->setUnifiedMemoryExecInfo(pSvmAllocs); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->kernelUnifiedMemoryGfxAllocations.size()); EXPECT_NE(nullptr, pSourceKernel[rootDeviceIndex]->kernelUnifiedMemoryGfxAllocations.at(0)); EXPECT_EQ(pSvmAllocs.getGraphicsAllocation(rootDeviceIndex), pSourceKernel[rootDeviceIndex]->kernelUnifiedMemoryGfxAllocations.at(0)); } retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get()); EXPECT_EQ(CL_SUCCESS, retVal); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { EXPECT_EQ(pSourceKernel[rootDeviceIndex]->kernelUnifiedMemoryGfxAllocations.size(), pClonedKernel[rootDeviceIndex]->kernelUnifiedMemoryGfxAllocations.size()); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->kernelUnifiedMemoryGfxAllocations.at(0), pClonedKernel[rootDeviceIndex]->kernelUnifiedMemoryGfxAllocations.at(0)); } context->getSVMAllocsManager()->freeSVMAlloc(ptrSVM); } TEST_F(CloneKernelTest, givenBuiltinSourceKernelWhenCloningThenSetBuiltinFlagToClonedKernel) { for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { pSourceKernel[rootDeviceIndex]->isBuiltIn = true; } pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get()); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { EXPECT_TRUE(pClonedKernel[rootDeviceIndex]->isBuiltIn); } } compute-runtime-22.14.22890/opencl/test/unit_test/kernel/debug_kernel_tests.cpp000066400000000000000000000042671422164147700274740ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include using namespace NEO; TEST(DebugKernelTest, givenKernelCompiledForDebuggingWhenGetPerThreadSystemThreadSurfaceSizeIsCalledThenCorrectValueIsReturned) { auto device = std::make_unique(new MockDevice); MockProgram program(toClDeviceVector(*device)); program.enableKernelDebug(); std::unique_ptr kernel(MockKernel::create(device->getDevice(), &program)); EXPECT_EQ(MockDebugKernel::perThreadSystemThreadSurfaceSize, kernel->getPerThreadSystemThreadSurfaceSize()); } TEST(DebugKernelTest, givenKernelCompiledForDebuggingWhenQueryingIsKernelDebugEnabledThenTrueIsReturned) { auto device = std::make_unique(new MockDevice); MockProgram program(toClDeviceVector(*device)); program.enableKernelDebug(); std::unique_ptr kernel(MockKernel::create(device->getDevice(), &program)); kernel->initialize(); EXPECT_TRUE(kernel->isKernelDebugEnabled()); } TEST(DebugKernelTest, givenKernelWithoutDebugFlagWhenQueryingIsKernelDebugEnabledThenFalseIsReturned) { auto device = std::make_unique(new MockDevice); MockProgram program(toClDeviceVector(*device)); program.enableKernelDebug(); std::unique_ptr kernel(MockKernel::create(device->getDevice(), &program)); kernel->initialize(); EXPECT_FALSE(kernel->isKernelDebugEnabled()); } TEST(DebugKernelTest, givenKernelWithoutDebugFlagWhenGetPerThreadSystemThreadSurfaceSizeIsCalledThenZeroIsReturned) { auto device = std::make_unique(new MockDevice); MockProgram program(toClDeviceVector(*device)); program.enableKernelDebug(); std::unique_ptr kernel(MockKernel::create(device->getDevice(), &program)); EXPECT_EQ(0u, kernel->getPerThreadSystemThreadSurfaceSize()); } compute-runtime-22.14.22890/opencl/test/unit_test/kernel/gl/000077500000000000000000000000001422164147700235115ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/kernel/gl/kernel_arg_buffer_tests.cpp000066400000000000000000000046131422164147700311050ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/fixtures/memory_management_fixture.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/sharings/gl/gl_buffer.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/kernel/kernel_arg_buffer_fixture.h" #include "opencl/test/unit_test/mocks/gl/mock_gl_sharing.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "CL/cl.h" #include "config.h" #include "gtest/gtest.h" #include using namespace NEO; typedef Test KernelArgBufferTest; TEST_F(KernelArgBufferTest, givenSharedBufferWhenSetArgIsCalledThenReportSharedObjUsage) { MockGlSharing glSharing; glSharing.uploadDataToBufferInfo(1, 0); pContext->setSharingFunctions(new GlSharingFunctionsMock()); auto sharedBuffer = GlBuffer::createSharedGlBuffer(pContext, CL_MEM_READ_WRITE, 1); auto nonSharedBuffer = new MockBuffer; auto sharedMem = static_cast(sharedBuffer); auto nonSharedMem = static_cast(nonSharedBuffer); EXPECT_FALSE(pKernel->isUsingSharedObjArgs()); this->pKernel->setArg(0, sizeof(cl_mem *), &nonSharedMem); EXPECT_FALSE(pKernel->isUsingSharedObjArgs()); this->pKernel->setArg(0, sizeof(cl_mem *), &sharedMem); EXPECT_TRUE(pKernel->isUsingSharedObjArgs()); delete nonSharedBuffer; delete sharedBuffer; } HWTEST_F(KernelArgBufferTest, givenSharedBufferWhenSetArgStatefulIsCalledThenBufferSurfaceShouldBeUsed) { MockGlSharing glSharing; glSharing.uploadDataToBufferInfo(1, 0); pContext->setSharingFunctions(new GlSharingFunctionsMock()); auto sharedBuffer = GlBuffer::createSharedGlBuffer(pContext, CL_MEM_READ_WRITE, 1); using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; sharedBuffer->setArgStateful(&surfaceState); auto surfType = surfaceState.getSurfaceType(); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER, surfType); delete sharedBuffer; } compute-runtime-22.14.22890/opencl/test/unit_test/kernel/image_transformer_tests.cpp000066400000000000000000000202261422164147700305430ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/program/kernel_info.h" #include "shared/test/common/mocks/mock_kernel_info.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/kernel/image_transformer.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" using namespace NEO; class ImageTransformerTest : public ::testing::Test { public: void SetUp() override { using SimpleKernelArgInfo = Kernel::SimpleKernelArgInfo; pKernelInfo = std::make_unique(); pKernelInfo->addArgImage(0, firstImageOffset, iOpenCL::IMAGE_MEMORY_OBJECT_2D, true); pKernelInfo->addArgImage(1, secondImageOffset, iOpenCL::IMAGE_MEMORY_OBJECT_2D, true); image1.reset(Image3dHelper<>::create(&context)); image2.reset(Image3dHelper<>::create(&context)); SimpleKernelArgInfo imageArg1; SimpleKernelArgInfo imageArg2; clImage1 = static_cast(image2.get()); clImage2 = static_cast(image2.get()); imageArg1.value = &clImage1; imageArg1.object = clImage1; imageArg2.value = &clImage2; imageArg2.object = clImage2; kernelArguments.push_back(imageArg1); kernelArguments.push_back(imageArg2); } const int firstImageOffset = 0x20; const int secondImageOffset = 0x40; std::unique_ptr pKernelInfo; ImageTransformer imageTransformer; MockContext context; std::unique_ptr image1; std::unique_ptr image2; cl_mem clImage1; cl_mem clImage2; char ssh[0x80]; std::vector kernelArguments; }; TEST_F(ImageTransformerTest, givenImageTransformerWhenRegisterImage3dThenTransformerHasRegisteredImages3d) { bool retVal; retVal = imageTransformer.hasRegisteredImages3d(); EXPECT_FALSE(retVal); imageTransformer.registerImage3d(0); retVal = imageTransformer.hasRegisteredImages3d(); EXPECT_TRUE(retVal); } TEST_F(ImageTransformerTest, givenImageTransformerWhenTransformToImage2dArrayThenTransformerDidTransform) { bool retVal; retVal = imageTransformer.didTransform(); EXPECT_FALSE(retVal); imageTransformer.transformImagesTo2dArray(*pKernelInfo, kernelArguments, nullptr); retVal = imageTransformer.didTransform(); EXPECT_TRUE(retVal); } TEST_F(ImageTransformerTest, givenImageTransformerWhenTransformToImage3dThenTransformerDidNotTransform) { bool retVal; retVal = imageTransformer.didTransform(); EXPECT_FALSE(retVal); imageTransformer.transformImagesTo2dArray(*pKernelInfo, kernelArguments, nullptr); imageTransformer.transformImagesTo3d(*pKernelInfo, kernelArguments, nullptr); retVal = imageTransformer.didTransform(); EXPECT_FALSE(retVal); } HWTEST_F(ImageTransformerTest, givenImageTransformerWhenTransformToImage2dArrayThenTransformOnlyRegisteredImages) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; auto firstSurfaceState = reinterpret_cast(ptrOffset(ssh, firstImageOffset)); auto secondSurfaceState = reinterpret_cast(ptrOffset(ssh, secondImageOffset)); firstSurfaceState->setSurfaceType(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_NULL); secondSurfaceState->setSurfaceType(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_NULL); firstSurfaceState->setSurfaceArray(false); secondSurfaceState->setSurfaceArray(false); imageTransformer.registerImage3d(1); imageTransformer.transformImagesTo2dArray(*pKernelInfo, kernelArguments, ssh); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_NULL, firstSurfaceState->getSurfaceType()); EXPECT_FALSE(firstSurfaceState->getSurfaceArray()); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_2D, secondSurfaceState->getSurfaceType()); EXPECT_TRUE(secondSurfaceState->getSurfaceArray()); } HWTEST_F(ImageTransformerTest, givenImageTransformerWhenTransformToImage2dArrayThenTransformOnlyTransformableImages) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; pKernelInfo->argAt(1).getExtendedTypeInfo().isTransformable = false; auto firstSurfaceState = reinterpret_cast(ptrOffset(ssh, firstImageOffset)); auto secondSurfaceState = reinterpret_cast(ptrOffset(ssh, secondImageOffset)); firstSurfaceState->setSurfaceType(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_NULL); secondSurfaceState->setSurfaceType(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_NULL); firstSurfaceState->setSurfaceArray(false); secondSurfaceState->setSurfaceArray(false); imageTransformer.registerImage3d(0); imageTransformer.registerImage3d(1); imageTransformer.transformImagesTo2dArray(*pKernelInfo, kernelArguments, ssh); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_2D, firstSurfaceState->getSurfaceType()); EXPECT_TRUE(firstSurfaceState->getSurfaceArray()); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_NULL, secondSurfaceState->getSurfaceType()); EXPECT_FALSE(secondSurfaceState->getSurfaceArray()); } HWTEST_F(ImageTransformerTest, givenImageTransformerWhenTransformToImage3dThenTransformAllRegisteredImages) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; pKernelInfo->argAt(1).getExtendedTypeInfo().isTransformable = false; auto firstSurfaceState = reinterpret_cast(ptrOffset(ssh, firstImageOffset)); auto secondSurfaceState = reinterpret_cast(ptrOffset(ssh, secondImageOffset)); firstSurfaceState->setSurfaceType(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_NULL); secondSurfaceState->setSurfaceType(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_NULL); firstSurfaceState->setSurfaceArray(true); secondSurfaceState->setSurfaceArray(true); imageTransformer.registerImage3d(0); imageTransformer.registerImage3d(1); imageTransformer.transformImagesTo3d(*pKernelInfo, kernelArguments, ssh); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_3D, firstSurfaceState->getSurfaceType()); EXPECT_FALSE(firstSurfaceState->getSurfaceArray()); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_3D, secondSurfaceState->getSurfaceType()); EXPECT_FALSE(secondSurfaceState->getSurfaceArray()); } HWTEST_F(ImageTransformerTest, givenImageTransformerWhenTransformToImage3dThenTransformOnlyRegisteredImages) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; pKernelInfo->argAt(1).getExtendedTypeInfo().isTransformable = false; auto firstSurfaceState = reinterpret_cast(ptrOffset(ssh, firstImageOffset)); auto secondSurfaceState = reinterpret_cast(ptrOffset(ssh, secondImageOffset)); firstSurfaceState->setSurfaceType(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_NULL); secondSurfaceState->setSurfaceType(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_NULL); firstSurfaceState->setSurfaceArray(true); secondSurfaceState->setSurfaceArray(true); imageTransformer.registerImage3d(1); imageTransformer.transformImagesTo3d(*pKernelInfo, kernelArguments, ssh); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_NULL, firstSurfaceState->getSurfaceType()); EXPECT_TRUE(firstSurfaceState->getSurfaceArray()); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_3D, secondSurfaceState->getSurfaceType()); EXPECT_FALSE(secondSurfaceState->getSurfaceArray()); } class MockImageTransformer : public ImageTransformer { public: using ImageTransformer::argIndexes; }; TEST(ImageTransformerRegisterImageTest, givenImageTransformerWhenRegisterTheSameImageTwiceThenAppendOnlyOne) { MockImageTransformer transformer; EXPECT_EQ(0u, transformer.argIndexes.size()); transformer.registerImage3d(0); EXPECT_EQ(1u, transformer.argIndexes.size()); transformer.registerImage3d(0); EXPECT_EQ(1u, transformer.argIndexes.size()); transformer.registerImage3d(1); EXPECT_EQ(2u, transformer.argIndexes.size()); } compute-runtime-22.14.22890/opencl/test/unit_test/kernel/kernel_accelerator_arg_tests.cpp000066400000000000000000000120131422164147700315070ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/source/accelerators/intel_accelerator.h" #include "opencl/source/accelerators/intel_motion_estimation.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "CL/cl.h" #include "gtest/gtest.h" #include using namespace NEO; class KernelArgAcceleratorFixture : public ContextFixture, public ClDeviceFixture { using ContextFixture::SetUp; public: KernelArgAcceleratorFixture() { } protected: void SetUp() { desc = { CL_ME_MB_TYPE_4x4_INTEL, CL_ME_SUBPIXEL_MODE_QPEL_INTEL, CL_ME_SAD_ADJUST_MODE_HAAR_INTEL, CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL}; ClDeviceFixture::SetUp(); cl_device_id device = pClDevice; ContextFixture::SetUp(1, &device); pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; pKernelInfo->addArgAccelerator(0, 0x20, 0x04, 0x14, 0x1c, 0x0c); pProgram = new MockProgram(pContext, false, toClDeviceVector(*pClDevice)); pKernel = new MockKernel(pProgram, *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->setKernelArgHandler(0, &Kernel::setArgAccelerator); pCrossThreadData[0x04] = desc.mb_block_type; pCrossThreadData[0x0c] = desc.subpixel_mode; pCrossThreadData[0x14] = desc.sad_adjust_mode; pCrossThreadData[0x1c] = desc.sad_adjust_mode; pKernel->setCrossThreadData(&pCrossThreadData[0], sizeof(pCrossThreadData)); } void TearDown() { delete pKernel; delete pProgram; ContextFixture::TearDown(); ClDeviceFixture::TearDown(); } cl_motion_estimation_desc_intel desc; MockProgram *pProgram = nullptr; MockKernel *pKernel = nullptr; std::unique_ptr pKernelInfo; char pCrossThreadData[64]; }; typedef Test KernelArgAcceleratorTest; TEST_F(KernelArgAcceleratorTest, WhenCreatingVmeAcceleratorThenCorrectKernelArgsAreSet) { cl_int status; cl_accelerator_intel accelerator = VmeAccelerator::create( pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(desc), &desc, status); ASSERT_EQ(CL_SUCCESS, status); ASSERT_NE(nullptr, accelerator); status = this->pKernel->setArg(0, sizeof(cl_accelerator_intel), &accelerator); ASSERT_EQ(CL_SUCCESS, status); char *crossThreadData = pKernel->getCrossThreadData(); const auto vmeDescriptor = reinterpret_cast(pKernelInfo->kernelDescriptor.payloadMappings.explicitArgsExtendedDescriptors[0].get()); uint32_t *pMbBlockType = ptrOffset(reinterpret_cast(crossThreadData), vmeDescriptor->mbBlockType); EXPECT_EQ(desc.mb_block_type, *pMbBlockType); uint32_t *pSubpixelMode = ptrOffset(reinterpret_cast(crossThreadData), vmeDescriptor->subpixelMode); EXPECT_EQ(desc.subpixel_mode, *pSubpixelMode); uint32_t *pSadAdjustMode = ptrOffset(reinterpret_cast(crossThreadData), vmeDescriptor->sadAdjustMode); EXPECT_EQ(desc.sad_adjust_mode, *pSadAdjustMode); uint32_t *pSearchPathType = ptrOffset(reinterpret_cast(crossThreadData), vmeDescriptor->searchPathType); EXPECT_EQ(desc.search_path_type, *pSearchPathType); status = clReleaseAcceleratorINTEL(accelerator); EXPECT_EQ(CL_SUCCESS, status); } TEST_F(KernelArgAcceleratorTest, GivenNullWhenSettingKernelArgThenInvalidArgValueErrorIsReturned) { cl_int status; status = this->pKernel->setArg(0, sizeof(cl_accelerator_intel), nullptr); ASSERT_EQ(CL_INVALID_ARG_VALUE, status); } TEST_F(KernelArgAcceleratorTest, GivenInvalidSizeWhenSettingKernelArgThenInvalidArgSizeErrorIsReturned) { cl_int status; cl_accelerator_intel accelerator = VmeAccelerator::create( pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(desc), &desc, status); ASSERT_EQ(CL_SUCCESS, status); ASSERT_NE(nullptr, accelerator); status = this->pKernel->setArg(0, sizeof(cl_accelerator_intel) - 1, accelerator); ASSERT_EQ(CL_INVALID_ARG_SIZE, status); status = clReleaseAcceleratorINTEL(accelerator); EXPECT_EQ(CL_SUCCESS, status); } TEST_F(KernelArgAcceleratorTest, GivenInvalidAcceleratorWhenSettingKernelArgThenInvalidArgValueErrorIsReturned) { cl_int status; const void *notAnAccelerator = static_cast(pKernel); status = this->pKernel->setArg(0, sizeof(cl_accelerator_intel), notAnAccelerator); ASSERT_EQ(CL_INVALID_ARG_VALUE, status); } compute-runtime-22.14.22890/opencl/test/unit_test/kernel/kernel_arg_buffer_fixture.cpp000066400000000000000000000036141422164147700310270ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "kernel_arg_buffer_fixture.h" #include "shared/source/helpers/api_specific_config.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "CL/cl.h" #include "gtest/gtest.h" #include using namespace NEO; void KernelArgBufferFixture::SetUp() { ClDeviceFixture::SetUp(); cl_device_id device = pClDevice; ContextFixture::SetUp(1, &device); // define kernel info pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; pKernelInfo->heapInfo.pSsh = pSshLocal; pKernelInfo->heapInfo.SurfaceStateHeapSize = sizeof(pSshLocal); pKernelInfo->addArgBuffer(0, 0x30, sizeof(void *), 0x0); pKernelInfo->kernelDescriptor.kernelAttributes.bufferAddressingMode = ApiSpecificConfig::getBindlessConfiguration() ? KernelDescriptor::AddressingMode::BindlessAndStateless : KernelDescriptor::AddressingMode::BindfulAndStateless; pProgram = new MockProgram(pContext, false, toClDeviceVector(*pClDevice)); pKernel = new MockKernel(pProgram, *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData)); pKernel->setKernelArgHandler(0, &Kernel::setArgBuffer); } void KernelArgBufferFixture::TearDown() { delete pKernel; delete pProgram; ContextFixture::TearDown(); ClDeviceFixture::TearDown(); } compute-runtime-22.14.22890/opencl/test/unit_test/kernel/kernel_arg_buffer_fixture.h000066400000000000000000000020311422164147700304640ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/common/test_macros/test.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "CL/cl.h" #include "gtest/gtest.h" #include using namespace NEO; class KernelArgBufferFixture : public ContextFixture, public ClDeviceFixture { using ContextFixture::SetUp; public: void SetUp(); void TearDown(); cl_int retVal = CL_SUCCESS; MockProgram *pProgram = nullptr; MockKernel *pKernel = nullptr; std::unique_ptr pKernelInfo = nullptr; char pSshLocal[64]{}; char pCrossThreadData[64]{}; }; compute-runtime-22.14.22890/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp000066400000000000000000000751221422164147700305060ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/source/unified_memory/unified_memory.h" #include "shared/test/common/fixtures/memory_management_fixture.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/kernel/kernel_arg_buffer_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "opencl/test/unit_test/mocks/ult_cl_device_factory.h" #include "CL/cl.h" #include "gtest/gtest.h" #include "hw_cmds.h" #include using namespace NEO; struct KernelArgBufferTest : public Test { struct AllocationTypeHelper { AllocationType allocationType; bool compressed; }; }; TEST_F(KernelArgBufferTest, GivenValidBufferWhenSettingKernelArgThenBufferAddressIsCorrect) { Buffer *buffer = new MockBuffer(); auto val = (cl_mem)buffer; auto pVal = &val; auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); auto pKernelArg = (cl_mem **)(this->pKernel->getCrossThreadData() + this->pKernelInfo->argAsPtr(0).stateless); EXPECT_EQ(buffer->getCpuAddress(), *pKernelArg); delete buffer; } struct MultiDeviceKernelArgBufferTest : public ::testing::Test { void SetUp() override { ClDeviceVector devicesForContext; devicesForContext.push_back(deviceFactory.rootDevices[1]); devicesForContext.push_back(deviceFactory.subDevices[4]); devicesForContext.push_back(deviceFactory.subDevices[5]); pContext = std::make_unique(devicesForContext); kernelInfos.resize(3); kernelInfos[0] = nullptr; pKernelInfosStorage[0] = std::make_unique(); pKernelInfosStorage[1] = std::make_unique(); kernelInfos[1] = pKernelInfosStorage[0].get(); kernelInfos[2] = pKernelInfosStorage[1].get(); auto &hwHelper = HwHelper::get(renderCoreFamily); for (auto i = 0u; i < 2; i++) { pKernelInfosStorage[i]->heapInfo.pSsh = pSshLocal[i]; pKernelInfosStorage[i]->heapInfo.SurfaceStateHeapSize = sizeof(pSshLocal[i]); pKernelInfosStorage[i]->kernelDescriptor.kernelAttributes.simdSize = hwHelper.getMinimalSIMDSize(); auto crossThreadDataPointer = &pCrossThreadData[i]; memcpy_s(ptrOffset(&pCrossThreadData[i], i * sizeof(void *)), sizeof(void *), &crossThreadDataPointer, sizeof(void *)); pKernelInfosStorage[i]->crossThreadData = pCrossThreadData[i]; pKernelInfosStorage[i]->addArgBuffer(0, static_cast(i * sizeof(void *)), sizeof(void *)); pKernelInfosStorage[i]->setCrossThreadDataSize(static_cast((i + 1) * sizeof(void *))); } auto retVal = CL_INVALID_PROGRAM; pBuffer = std::unique_ptr(Buffer::create(pContext.get(), 0u, MemoryConstants::pageSize, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, pBuffer); pProgram = std::make_unique(pContext.get(), false, pContext->getDevices()); } void TearDown() override { for (auto i = 0u; i < 2; i++) { pKernelInfosStorage[i]->crossThreadData = nullptr; } } UltClDeviceFactory deviceFactory{3, 2}; std::unique_ptr pContext; std::unique_ptr pKernelInfosStorage[2]; char pCrossThreadData[2][64]{}; char pSshLocal[2][64]{}; KernelInfoContainer kernelInfos; std::unique_ptr pBuffer; std::unique_ptr pProgram; }; TEST_F(MultiDeviceKernelArgBufferTest, GivenValidBufferWhenSettingKernelArgThenBufferAddressIsCorrect) { int32_t retVal = CL_INVALID_VALUE; auto pMultiDeviceKernel = std::unique_ptr(MultiDeviceKernel::create(pProgram.get(), kernelInfos, &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); cl_mem val = pBuffer.get(); auto pVal = &val; retVal = pMultiDeviceKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); for (auto &rootDeviceIndex : pContext->getRootDeviceIndices()) { auto pKernel = static_cast(pMultiDeviceKernel->getKernel(rootDeviceIndex)); auto pKernelArg = reinterpret_cast(pKernel->getCrossThreadData() + kernelInfos[rootDeviceIndex]->getArgDescriptorAt(0).as().stateless); EXPECT_EQ(pBuffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddressToPatch(), *pKernelArg); } } TEST_F(KernelArgBufferTest, GivenSvmPtrStatelessWhenSettingKernelArgThenArgumentsAreSetCorrectly) { Buffer *buffer = new MockBuffer(); auto val = (cl_mem)buffer; auto pVal = &val; pKernelInfo->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless; auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(pKernel->requiresCoherency()); delete buffer; } HWTEST_F(KernelArgBufferTest, GivenSvmPtrStatefulWhenSettingKernelArgThenArgumentsAreSetCorrectly) { Buffer *buffer = new MockBuffer(); auto val = (cl_mem)buffer; auto pVal = &val; auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(pKernel->requiresCoherency()); EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->argAsPtr(0).bindful)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(buffer->getGraphicsAllocation(mockRootDeviceIndex)->getGpuAddress(), surfaceAddress); delete buffer; } HWTEST_F(MultiDeviceKernelArgBufferTest, GivenSvmPtrStatefulWhenSettingKernelArgThenArgumentsAreSetCorrectly) { cl_mem val = pBuffer.get(); auto pVal = &val; int32_t retVal = CL_INVALID_VALUE; for (auto &kernelInfo : pKernelInfosStorage) { kernelInfo->argAsPtr(0).bindful = 0; } auto pMultiDeviceKernel = std::unique_ptr(MultiDeviceKernel::create(pProgram.get(), kernelInfos, &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pMultiDeviceKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); for (auto &rootDeviceIndex : pContext->getRootDeviceIndices()) { auto pKernel = pMultiDeviceKernel->getKernel(rootDeviceIndex); EXPECT_FALSE(pKernel->requiresCoherency()); EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), kernelInfos[rootDeviceIndex]->getArgDescriptorAt(0).as().bindful)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(pBuffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(), surfaceAddress); } } HWTEST_F(KernelArgBufferTest, GivenBufferFromSvmPtrWhenSettingKernelArgThenArgumentsAreSetCorrectly) { Buffer *buffer = new MockBuffer(); buffer->getGraphicsAllocation(mockRootDeviceIndex)->setCoherent(true); auto val = (cl_mem)buffer; auto pVal = &val; auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(pKernel->requiresCoherency()); delete buffer; } TEST_F(KernelArgBufferTest, GivenInvalidBufferWhenSettingKernelArgThenInvalidMemObjectErrorIsReturned) { char *ptr = new char[sizeof(Buffer)]; auto val = (cl_mem *)ptr; auto pVal = &val; auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); delete[] ptr; } TEST_F(KernelArgBufferTest, GivenNullPtrWhenSettingKernelArgThenKernelArgIsNull) { auto val = (cl_mem *)nullptr; auto pVal = &val; this->pKernel->setArg(0, sizeof(cl_mem *), pVal); auto pKernelArg = (cl_mem **)(this->pKernel->getCrossThreadData() + this->pKernelInfo->argAsPtr(0).stateless); EXPECT_EQ(nullptr, *pKernelArg); } TEST_F(MultiDeviceKernelArgBufferTest, GivenNullPtrWhenSettingKernelArgThenKernelArgIsNull) { int32_t retVal = CL_INVALID_VALUE; auto pMultiDeviceKernel = std::unique_ptr(MultiDeviceKernel::create(pProgram.get(), kernelInfos, &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); auto val = nullptr; auto pVal = &val; pMultiDeviceKernel->setArg(0, sizeof(cl_mem *), pVal); for (auto &rootDeviceIndex : pContext->getRootDeviceIndices()) { auto pKernel = static_cast(pMultiDeviceKernel->getKernel(rootDeviceIndex)); auto pKernelArg = reinterpret_cast(pKernel->getCrossThreadData() + kernelInfos[rootDeviceIndex]->getArgDescriptorAt(0).as().stateless); EXPECT_EQ(nullptr, *pKernelArg); } } TEST_F(KernelArgBufferTest, given32BitDeviceWhenArgPtrPassedIsNullThenOnly4BytesAreBeingPatched) { auto val = (cl_mem *)nullptr; auto pVal = &val; auto &argAsPtr = pKernelInfo->argAsPtr(0); argAsPtr.pointerSize = 4; auto pKernelArg64bit = (uint64_t *)(this->pKernel->getCrossThreadData() + argAsPtr.stateless); auto pKernelArg32bit = (uint32_t *)pKernelArg64bit; *pKernelArg64bit = 0xffffffffffffffff; this->pKernel->setArg(0, sizeof(cl_mem *), pVal); uint64_t expValue = 0u; EXPECT_EQ(0u, *pKernelArg32bit); EXPECT_NE(expValue, *pKernelArg64bit); } TEST_F(KernelArgBufferTest, given32BitDeviceWhenArgPassedIsNullThenOnly4BytesAreBeingPatched) { auto pVal = nullptr; auto &argAsPtr = pKernelInfo->argAsPtr(0); argAsPtr.pointerSize = 4; auto pKernelArg64bit = (uint64_t *)(this->pKernel->getCrossThreadData() + argAsPtr.stateless); auto pKernelArg32bit = (uint32_t *)pKernelArg64bit; *pKernelArg64bit = 0xffffffffffffffff; this->pKernel->setArg(0, sizeof(cl_mem *), pVal); uint64_t expValue = 0u; EXPECT_EQ(0u, *pKernelArg32bit); EXPECT_NE(expValue, *pKernelArg64bit); } TEST_F(KernelArgBufferTest, givenWritableBufferWhenSettingAsArgThenDoNotExpectAllocationInCacheFlushVector) { auto buffer = std::make_unique(); buffer->mockGfxAllocation.setMemObjectsAllocationWithWritableFlags(true); buffer->mockGfxAllocation.setFlushL3Required(false); auto val = static_cast(buffer.get()); auto pVal = &val; auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, pKernel->kernelArgRequiresCacheFlush[0]); } TEST_F(KernelArgBufferTest, givenCacheFlushBufferWhenSettingAsArgThenExpectAllocationInCacheFlushVector) { auto buffer = std::make_unique(); buffer->mockGfxAllocation.setMemObjectsAllocationWithWritableFlags(false); buffer->mockGfxAllocation.setFlushL3Required(true); auto val = static_cast(buffer.get()); auto pVal = &val; auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(&buffer->mockGfxAllocation, pKernel->kernelArgRequiresCacheFlush[0]); } TEST_F(KernelArgBufferTest, givenNoCacheFlushBufferWhenSettingAsArgThenNotExpectAllocationInCacheFlushVector) { auto buffer = std::make_unique(); buffer->mockGfxAllocation.setMemObjectsAllocationWithWritableFlags(false); buffer->mockGfxAllocation.setFlushL3Required(false); auto val = static_cast(buffer.get()); auto pVal = &val; auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, pKernel->kernelArgRequiresCacheFlush[0]); } TEST_F(KernelArgBufferTest, givenBufferWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnFalse) { MockBuffer buffer; buffer.getGraphicsAllocation(mockRootDeviceIndex)->setAllocationType(AllocationType::BUFFER); auto val = (cl_mem)&buffer; auto pVal = &val; for (auto pureStatefulBufferAccess : {false, true}) { pKernelInfo->setBufferStateful(0, pureStatefulBufferAccess); auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory()); } } TEST_F(KernelArgBufferTest, givenSharedBufferWhenHasDirectStatelessAccessToSharedBufferIsCalledThenReturnCorrectValue) { MockBuffer buffer; buffer.getGraphicsAllocation(mockRootDeviceIndex)->setAllocationType(AllocationType::SHARED_BUFFER); auto val = (cl_mem)&buffer; auto pVal = &val; for (auto pureStatefulBufferAccess : {false, true}) { pKernelInfo->setBufferStateful(0, pureStatefulBufferAccess); auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(!pureStatefulBufferAccess, pKernel->hasDirectStatelessAccessToSharedBuffer()); } } TEST_F(KernelArgBufferTest, givenBufferInHostMemoryWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnCorrectValue) { MockBuffer buffer; buffer.getGraphicsAllocation(mockRootDeviceIndex)->setAllocationType(AllocationType::BUFFER_HOST_MEMORY); auto val = (cl_mem)&buffer; auto pVal = &val; for (auto pureStatefulBufferAccess : {false, true}) { pKernelInfo->setBufferStateful(0, pureStatefulBufferAccess); auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(!pureStatefulBufferAccess, pKernel->hasDirectStatelessAccessToHostMemory()); } } TEST_F(KernelArgBufferTest, givenGfxAllocationWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnFalse) { char data[128]; void *ptr = &data; MockGraphicsAllocation gfxAllocation(ptr, 128); gfxAllocation.setAllocationType(AllocationType::BUFFER); for (auto pureStatefulBufferAccess : {false, true}) { pKernelInfo->setBufferStateful(0, pureStatefulBufferAccess); auto retVal = pKernel->setArgSvmAlloc(0, ptr, &gfxAllocation, 0u); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory()); } } TEST_F(KernelArgBufferTest, givenGfxAllocationInHostMemoryWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnCorrectValue) { char data[128]; void *ptr = &data; MockGraphicsAllocation gfxAllocation(ptr, 128); gfxAllocation.setAllocationType(AllocationType::BUFFER_HOST_MEMORY); for (auto pureStatefulBufferAccess : {false, true}) { pKernelInfo->setBufferStateful(0, pureStatefulBufferAccess); auto retVal = pKernel->setArgSvmAlloc(0, ptr, &gfxAllocation, 0u); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(!pureStatefulBufferAccess, pKernel->hasDirectStatelessAccessToHostMemory()); } } TEST_F(KernelArgBufferTest, givenInvalidKernelObjWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnFalse) { KernelInfo kernelInfo; MockKernel emptyKernel(pProgram, kernelInfo, *pClDevice); EXPECT_FALSE(emptyKernel.hasDirectStatelessAccessToHostMemory()); pKernel->kernelArguments.at(0).type = Kernel::NONE_OBJ; EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory()); pKernel->kernelArguments.at(0).type = Kernel::BUFFER_OBJ; EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory()); pKernel->kernelArguments.at(0).type = Kernel::SVM_ALLOC_OBJ; EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory()); } TEST_F(KernelArgBufferTest, givenKernelWithIndirectStatelessAccessWhenHasIndirectStatelessAccessToHostMemoryIsCalledThenReturnTrueForHostMemoryAllocations) { KernelInfo kernelInfo; EXPECT_FALSE(kernelInfo.hasIndirectStatelessAccess); MockKernel kernelWithNoIndirectStatelessAccess(pProgram, kernelInfo, *pClDevice); EXPECT_FALSE(kernelWithNoIndirectStatelessAccess.hasIndirectStatelessAccessToHostMemory()); kernelInfo.hasIndirectStatelessAccess = true; MockKernel kernelWithNoIndirectHostAllocations(pProgram, kernelInfo, *pClDevice); EXPECT_FALSE(kernelWithNoIndirectHostAllocations.hasIndirectStatelessAccessToHostMemory()); const auto allocationTypes = {AllocationType::BUFFER, AllocationType::BUFFER_HOST_MEMORY}; MockKernel kernelWithIndirectUnifiedMemoryAllocation(pProgram, kernelInfo, *pClDevice); MockGraphicsAllocation gfxAllocation; for (const auto type : allocationTypes) { gfxAllocation.setAllocationType(type); kernelWithIndirectUnifiedMemoryAllocation.setUnifiedMemoryExecInfo(&gfxAllocation); if (type == AllocationType::BUFFER_HOST_MEMORY) { EXPECT_TRUE(kernelWithIndirectUnifiedMemoryAllocation.hasIndirectStatelessAccessToHostMemory()); } else { EXPECT_FALSE(kernelWithIndirectUnifiedMemoryAllocation.hasIndirectStatelessAccessToHostMemory()); } kernelWithIndirectUnifiedMemoryAllocation.clearUnifiedMemoryExecInfo(); } } TEST_F(KernelArgBufferTest, givenKernelExecInfoWithIndirectStatelessAccessWhenHasIndirectStatelessAccessToHostMemoryIsCalledThenReturnTrueForHostMemoryAllocations) { KernelInfo kernelInfo; kernelInfo.hasIndirectStatelessAccess = true; MockKernel mockKernel(pProgram, kernelInfo, *pClDevice); EXPECT_FALSE(mockKernel.unifiedMemoryControls.indirectHostAllocationsAllowed); EXPECT_FALSE(mockKernel.hasIndirectStatelessAccessToHostMemory()); auto svmAllocationsManager = mockKernel.getContext().getSVMAllocsManager(); if (svmAllocationsManager == nullptr) { return; } mockKernel.unifiedMemoryControls.indirectHostAllocationsAllowed = true; EXPECT_FALSE(mockKernel.hasIndirectStatelessAccessToHostMemory()); auto deviceProperties = SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY, mockKernel.getContext().getRootDeviceIndices(), mockKernel.getContext().getDeviceBitfields()); deviceProperties.device = &pClDevice->getDevice(); auto unifiedDeviceMemoryAllocation = svmAllocationsManager->createUnifiedMemoryAllocation(4096u, deviceProperties); EXPECT_FALSE(mockKernel.hasIndirectStatelessAccessToHostMemory()); auto hostProperties = SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY, mockKernel.getContext().getRootDeviceIndices(), mockKernel.getContext().getDeviceBitfields()); auto unifiedHostMemoryAllocation = svmAllocationsManager->createUnifiedMemoryAllocation(4096u, hostProperties); EXPECT_TRUE(mockKernel.hasIndirectStatelessAccessToHostMemory()); svmAllocationsManager->freeSVMAlloc(unifiedDeviceMemoryAllocation); svmAllocationsManager->freeSVMAlloc(unifiedHostMemoryAllocation); } TEST_F(KernelArgBufferTest, whenSettingAuxTranslationRequiredThenIsAuxTranslationRequiredReturnsCorrectValue) { for (auto auxTranslationRequired : {false, true}) { pKernel->setAuxTranslationRequired(auxTranslationRequired); EXPECT_EQ(auxTranslationRequired, pKernel->isAuxTranslationRequired()); } } TEST_F(KernelArgBufferTest, givenSetArgBufferOnKernelWithDirectStatelessAccessToSharedBufferWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnTrue) { DebugManagerStateRestore debugRestorer; DebugManager.flags.EnableStatelessCompression.set(1); MockBuffer buffer; buffer.getGraphicsAllocation(mockRootDeviceIndex)->setAllocationType(AllocationType::SHARED_BUFFER); auto val = (cl_mem)&buffer; auto pVal = &val; auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(pKernel->hasDirectStatelessAccessToSharedBuffer()); EXPECT_FALSE(pKernel->isAuxTranslationRequired()); pKernel->updateAuxTranslationRequired(); EXPECT_TRUE(pKernel->isAuxTranslationRequired()); } TEST_F(KernelArgBufferTest, givenSetArgBufferOnKernelWithDirectStatelessAccessToHostMemoryWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnTrue) { DebugManagerStateRestore debugRestorer; DebugManager.flags.EnableStatelessCompression.set(1); MockBuffer buffer; buffer.getGraphicsAllocation(mockRootDeviceIndex)->setAllocationType(AllocationType::BUFFER_HOST_MEMORY); auto val = (cl_mem)&buffer; auto pVal = &val; auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(pKernel->hasDirectStatelessAccessToHostMemory()); EXPECT_FALSE(pKernel->isAuxTranslationRequired()); pKernel->updateAuxTranslationRequired(); EXPECT_TRUE(pKernel->isAuxTranslationRequired()); } TEST_F(KernelArgBufferTest, givenSetArgBufferOnKernelWithNoDirectStatelessAccessToHostMemoryWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnFalse) { DebugManagerStateRestore debugRestorer; DebugManager.flags.EnableStatelessCompression.set(1); MockBuffer buffer; auto val = (cl_mem)&buffer; auto pVal = &val; auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory()); EXPECT_FALSE(pKernel->isAuxTranslationRequired()); pKernel->updateAuxTranslationRequired(); EXPECT_FALSE(pKernel->isAuxTranslationRequired()); } TEST_F(KernelArgBufferTest, givenSetArgSvmAllocOnKernelWithDirectStatelessAccessToHostMemoryWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnTrue) { DebugManagerStateRestore debugRestorer; DebugManager.flags.EnableStatelessCompression.set(1); char data[128]; void *ptr = &data; MockGraphicsAllocation gfxAllocation(ptr, 128); gfxAllocation.setAllocationType(AllocationType::BUFFER_HOST_MEMORY); auto retVal = pKernel->setArgSvmAlloc(0, ptr, &gfxAllocation, 0u); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(pKernel->hasDirectStatelessAccessToHostMemory()); EXPECT_FALSE(pKernel->isAuxTranslationRequired()); pKernel->updateAuxTranslationRequired(); EXPECT_TRUE(pKernel->isAuxTranslationRequired()); } TEST_F(KernelArgBufferTest, givenSetArgSvmAllocOnKernelWithNoDirectStatelessAccessToHostMemoryWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnFalse) { DebugManagerStateRestore debugRestorer; DebugManager.flags.EnableStatelessCompression.set(1); char data[128]; void *ptr = &data; MockGraphicsAllocation gfxAllocation(ptr, 128); auto retVal = pKernel->setArgSvmAlloc(0, ptr, &gfxAllocation, 0u); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory()); EXPECT_FALSE(pKernel->isAuxTranslationRequired()); pKernel->updateAuxTranslationRequired(); EXPECT_FALSE(pKernel->isAuxTranslationRequired()); } TEST_F(KernelArgBufferTest, givenSetUnifiedMemoryExecInfoOnKernelWithNoIndirectStatelessAccessWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnFalse) { DebugManagerStateRestore debugRestorer; DebugManager.flags.EnableStatelessCompression.set(1); pKernelInfo->hasIndirectStatelessAccess = false; MockGraphicsAllocation gfxAllocation; gfxAllocation.setAllocationType(AllocationType::BUFFER_HOST_MEMORY); pKernel->setUnifiedMemoryExecInfo(&gfxAllocation); EXPECT_FALSE(pKernel->hasIndirectStatelessAccessToHostMemory()); EXPECT_FALSE(pKernel->isAuxTranslationRequired()); pKernel->updateAuxTranslationRequired(); EXPECT_FALSE(pKernel->isAuxTranslationRequired()); } TEST_F(KernelArgBufferTest, givenSetUnifiedMemoryExecInfoOnKernelWithIndirectStatelessAccessWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnTrueForHostMemoryAllocation) { DebugManagerStateRestore debugRestorer; DebugManager.flags.EnableStatelessCompression.set(1); pKernelInfo->hasIndirectStatelessAccess = true; const auto allocationTypes = {AllocationType::BUFFER, AllocationType::BUFFER_HOST_MEMORY}; MockGraphicsAllocation gfxAllocation; for (const auto type : allocationTypes) { gfxAllocation.setAllocationType(type); pKernel->setUnifiedMemoryExecInfo(&gfxAllocation); if (type == AllocationType::BUFFER_HOST_MEMORY) { EXPECT_TRUE(pKernel->hasIndirectStatelessAccessToHostMemory()); } else { EXPECT_FALSE(pKernel->hasIndirectStatelessAccessToHostMemory()); } EXPECT_FALSE(pKernel->isAuxTranslationRequired()); pKernel->updateAuxTranslationRequired(); if (type == AllocationType::BUFFER_HOST_MEMORY) { EXPECT_TRUE(pKernel->isAuxTranslationRequired()); } else { EXPECT_FALSE(pKernel->isAuxTranslationRequired()); } pKernel->clearUnifiedMemoryExecInfo(); pKernel->setAuxTranslationRequired(false); } } TEST_F(KernelArgBufferTest, givenSetUnifiedMemoryExecInfoOnKernelWithIndirectStatelessAccessWhenFillWithKernelObjsForAuxTranslationIsCalledThenSetKernelObjectsForAuxTranslation) { DebugManagerStateRestore debugRestorer; DebugManager.flags.EnableStatelessCompression.set(1); pKernelInfo->hasIndirectStatelessAccess = true; constexpr std::array allocationTypes = {{{AllocationType::BUFFER, false}, {AllocationType::BUFFER, true}, {AllocationType::BUFFER_HOST_MEMORY, false}, {AllocationType::SVM_GPU, true}}}; auto gmm = std::make_unique(pDevice->getRootDeviceEnvironment().getGmmClientContext(), nullptr, 0, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, StorageInfo{}, true); MockGraphicsAllocation gfxAllocation; gfxAllocation.setDefaultGmm(gmm.get()); for (const auto type : allocationTypes) { gfxAllocation.setAllocationType(type.allocationType); pKernel->setUnifiedMemoryExecInfo(&gfxAllocation); gmm->isCompressionEnabled = type.compressed; KernelObjsForAuxTranslation kernelObjsForAuxTranslation; pKernel->fillWithKernelObjsForAuxTranslation(kernelObjsForAuxTranslation); if (type.compressed) { EXPECT_EQ(1u, kernelObjsForAuxTranslation.size()); auto kernelObj = *kernelObjsForAuxTranslation.find({KernelObjForAuxTranslation::Type::GFX_ALLOC, &gfxAllocation}); EXPECT_NE(nullptr, kernelObj.object); EXPECT_EQ(KernelObjForAuxTranslation::Type::GFX_ALLOC, kernelObj.type); kernelObjsForAuxTranslation.erase(kernelObj); } else { EXPECT_EQ(0u, kernelObjsForAuxTranslation.size()); } pKernel->clearUnifiedMemoryExecInfo(); pKernel->setAuxTranslationRequired(false); } } TEST_F(KernelArgBufferTest, givenSVMAllocsManagerWithCompressedSVMAllocationsWhenFillWithKernelObjsForAuxTranslationIsCalledThenSetKernelObjectsForAuxTranslation) { if (pContext->getSVMAllocsManager() == nullptr) { return; } DebugManagerStateRestore debugRestorer; DebugManager.flags.EnableStatelessCompression.set(1); constexpr std::array allocationTypes = {{{AllocationType::BUFFER, false}, {AllocationType::BUFFER, true}, {AllocationType::BUFFER_HOST_MEMORY, false}, {AllocationType::SVM_GPU, true}}}; auto gmm = std::make_unique(pDevice->getRootDeviceEnvironment().getGmmClientContext(), nullptr, 0, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, StorageInfo{}, true); MockGraphicsAllocation gfxAllocation; gfxAllocation.setDefaultGmm(gmm.get()); SvmAllocationData allocData(0); allocData.gpuAllocations.addAllocation(&gfxAllocation); allocData.device = &pClDevice->getDevice(); for (const auto type : allocationTypes) { gfxAllocation.setAllocationType(type.allocationType); gmm->isCompressionEnabled = type.compressed; pContext->getSVMAllocsManager()->insertSVMAlloc(allocData); KernelObjsForAuxTranslation kernelObjsForAuxTranslation; pKernel->fillWithKernelObjsForAuxTranslation(kernelObjsForAuxTranslation); if (type.compressed) { EXPECT_EQ(1u, kernelObjsForAuxTranslation.size()); auto kernelObj = *kernelObjsForAuxTranslation.find({KernelObjForAuxTranslation::Type::GFX_ALLOC, &gfxAllocation}); EXPECT_NE(nullptr, kernelObj.object); EXPECT_EQ(KernelObjForAuxTranslation::Type::GFX_ALLOC, kernelObj.type); kernelObjsForAuxTranslation.erase(kernelObj); } else { EXPECT_EQ(0u, kernelObjsForAuxTranslation.size()); } pContext->getSVMAllocsManager()->removeSVMAlloc(allocData); } } class KernelArgBufferFixtureBindless : public KernelArgBufferFixture { public: void SetUp() { DebugManager.flags.UseBindlessMode.set(1); KernelArgBufferFixture::SetUp(); pBuffer = new MockBuffer(); ASSERT_NE(nullptr, pBuffer); pKernelInfo->argAsPtr(0).bindless = bindlessOffset; pKernelInfo->argAsPtr(0).stateless = undefined; pKernelInfo->argAsPtr(0).bindful = undefined; } void TearDown() { delete pBuffer; KernelArgBufferFixture::TearDown(); } DebugManagerStateRestore restorer; MockBuffer *pBuffer; const CrossThreadDataOffset bindlessOffset = 0x10; }; typedef Test KernelArgBufferTestBindless; HWTEST_F(KernelArgBufferTestBindless, givenUsedBindlessBuffersWhenPatchingSurfaceStateOffsetsThenCorrectOffsetIsPatchedInCrossThreadData) { using DataPortBindlessSurfaceExtendedMessageDescriptor = typename FamilyType::DataPortBindlessSurfaceExtendedMessageDescriptor; auto patchLocation = reinterpret_cast(ptrOffset(pKernel->getCrossThreadData(), bindlessOffset)); *patchLocation = 0xdead; cl_mem memObj = pBuffer; retVal = pKernel->setArg(0, sizeof(memObj), &memObj); EXPECT_NE(0xdeadu, *patchLocation); } compute-runtime-22.14.22890/opencl/test/unit_test/kernel/kernel_arg_info_tests.cpp000066400000000000000000000132441422164147700301650ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/fixtures/memory_management_fixture.h" #include "shared/test/common/helpers/kernel_binary_helper.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/kernel/kernel.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "opencl/test/unit_test/program/program_tests.h" #include "opencl/test/unit_test/program/program_with_source.h" using namespace NEO; class KernelArgInfoTest : public ProgramFromSourceTest { public: KernelArgInfoTest() { } ~KernelArgInfoTest() override = default; protected: void SetUp() override { kbHelper = new KernelBinaryHelper("copybuffer", true); ProgramFromSourceTest::SetUp(); ASSERT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); retVal = pProgram->build( pProgram->getDevices(), nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); // create a kernel pKernel = Kernel::create( pProgram, pProgram->getKernelInfoForKernel(kernelName), *pPlatform->getClDevice(0), &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, pKernel); } void TearDown() override { delete pKernel; pKernel = nullptr; ProgramFromSourceTest::TearDown(); delete kbHelper; } template void queryArgInfo(cl_kernel_arg_info paramName, T ¶mValue) { size_t paramValueSize = 0; size_t param_value_size_ret = 0; // get size retVal = pKernel->getArgInfo( 0, paramName, paramValueSize, nullptr, ¶m_value_size_ret); EXPECT_NE(0u, param_value_size_ret); ASSERT_EQ(CL_SUCCESS, retVal); // get the name paramValueSize = param_value_size_ret; retVal = pKernel->getArgInfo( 0, paramName, paramValueSize, ¶mValue, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); } Kernel *pKernel = nullptr; cl_int retVal = CL_SUCCESS; KernelBinaryHelper *kbHelper = nullptr; }; TEST_F(KernelArgInfoTest, GivenNullWhenGettingKernelInfoThenNullIsReturned) { auto kernelInfo = this->pProgram->getKernelInfo(nullptr, 0); EXPECT_EQ(nullptr, kernelInfo); } TEST_F(KernelArgInfoTest, GivenInvalidParametersWhenGettingKernelArgInfoThenValueSizeRetIsNotUpdated) { size_t paramValueSizeRet = 0x1234; retVal = pKernel->getArgInfo( 0, 0, 0, nullptr, ¶mValueSizeRet); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(0x1234u, paramValueSizeRet); } TEST_F(KernelArgInfoTest, GivenKernelArgAccessQualifierWhenQueryingArgInfoThenKernelArgAcessNoneIsReturned) { cl_kernel_arg_access_qualifier param_value = 0; queryArgInfo(CL_KERNEL_ARG_ACCESS_QUALIFIER, param_value); EXPECT_EQ(static_cast(CL_KERNEL_ARG_ACCESS_NONE), param_value); } TEST_F(KernelArgInfoTest, GivenKernelArgAddressQualifierWhenQueryingArgInfoThenKernelArgAddressGlobalIsReturned) { cl_kernel_arg_address_qualifier param_value = 0; queryArgInfo(CL_KERNEL_ARG_ADDRESS_QUALIFIER, param_value); EXPECT_EQ(static_cast(CL_KERNEL_ARG_ADDRESS_GLOBAL), param_value); } TEST_F(KernelArgInfoTest, GivenKernelArgTypeQualifierWhenQueryingArgInfoThenKernelArgTypeNoneIsReturned) { cl_kernel_arg_type_qualifier param_value = 0; queryArgInfo(CL_KERNEL_ARG_TYPE_QUALIFIER, param_value); EXPECT_EQ(static_cast(CL_KERNEL_ARG_TYPE_NONE), param_value); } TEST_F(KernelArgInfoTest, GivenParamWhenGettingKernelTypeNameThenCorrectValueIsReturned) { cl_kernel_arg_info paramName = CL_KERNEL_ARG_TYPE_NAME; char *paramValue = nullptr; size_t paramValueSize = 0; size_t paramValueSizeRet = 0; // get size retVal = pKernel->getArgInfo( 0, paramName, paramValueSize, nullptr, ¶mValueSizeRet); EXPECT_NE(0u, paramValueSizeRet); ASSERT_EQ(CL_SUCCESS, retVal); // allocate space for name paramValue = new char[paramValueSizeRet]; // get the name paramValueSize = paramValueSizeRet; retVal = pKernel->getArgInfo( 0, paramName, paramValueSize, paramValue, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); const char expectedString[] = "uint*"; auto result = strncmp(paramValue, expectedString, sizeof(expectedString)); EXPECT_EQ(0, result); delete[] paramValue; } TEST_F(KernelArgInfoTest, GivenParamWhenGettingKernelArgNameThenCorrectValueIsReturned) { cl_kernel_arg_info paramName = CL_KERNEL_ARG_NAME; char *paramValue = nullptr; size_t paramValueSize = 0; size_t paramValueSizeRet = 0; // get size retVal = pKernel->getArgInfo( 0, paramName, paramValueSize, nullptr, ¶mValueSizeRet); EXPECT_NE(0u, paramValueSizeRet); ASSERT_EQ(CL_SUCCESS, retVal); // allocate space for name paramValue = new char[paramValueSizeRet]; // get the name paramValueSize = paramValueSizeRet; retVal = pKernel->getArgInfo( 0, paramName, paramValueSize, paramValue, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0, strcmp(paramValue, "src")); delete[] paramValue; } compute-runtime-22.14.22890/opencl/test/unit_test/kernel/kernel_arg_pipe_tests.cpp000066400000000000000000000130211422164147700301600ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/pipe.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_pipe.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "CL/cl.h" #include "gtest/gtest.h" #include using namespace NEO; class KernelArgPipeFixture : public ContextFixture, public ClDeviceFixture { using ContextFixture::SetUp; public: KernelArgPipeFixture() { } protected: void SetUp() { ClDeviceFixture::SetUp(); cl_device_id device = pClDevice; ContextFixture::SetUp(1, &device); // define kernel info pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; pKernelInfo->heapInfo.pSsh = pSshLocal; pKernelInfo->heapInfo.SurfaceStateHeapSize = sizeof(pSshLocal); pKernelInfo->addArgPipe(0, 0x30, sizeof(void *)); pProgram = new MockProgram(pContext, false, toClDeviceVector(*pClDevice)); pKernel = new MockKernel(pProgram, *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData)); pKernel->setKernelArgHandler(0, &Kernel::setArgPipe); } void TearDown() { delete pKernel; delete pProgram; ContextFixture::TearDown(); ClDeviceFixture::TearDown(); } cl_int retVal = CL_SUCCESS; MockProgram *pProgram = nullptr; MockKernel *pKernel = nullptr; std::unique_ptr pKernelInfo; SKernelBinaryHeaderCommon kernelHeader; char pSshLocal[64]; char pCrossThreadData[64]; }; typedef Test KernelArgPipeTest; TEST_F(KernelArgPipeTest, GivenValidPipeWhenSettingKernelArgThenPipeAddressIsCorrect) { Pipe *pipe = new MockPipe(pContext); auto val = (cl_mem)pipe; auto pVal = &val; auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); auto pKernelArg = (cl_mem **)(this->pKernel->getCrossThreadData() + this->pKernelInfo->argAsPtr(0).stateless); EXPECT_EQ(pipe->getCpuAddress(), *pKernelArg); delete pipe; } TEST_F(KernelArgPipeTest, GivenSvmPtrStatelessWhenSettingKernelArgThenArgumentsAreSetCorrectly) { Pipe *pipe = new MockPipe(pContext); auto val = (cl_mem)pipe; auto pVal = &val; auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); delete pipe; } HWTEST_F(KernelArgPipeTest, GivenSvmPtrStatefulWhenSettingKernelArgThenArgumentsAreSetCorrectly) { Pipe *pipe = new MockPipe(pContext); auto val = (cl_mem)pipe; auto pVal = &val; pKernelInfo->argAsPtr(0).bindful = 0; auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->argAsPtr(0).bindful)); void *surfaceAddress = reinterpret_cast(surfaceState->getSurfaceBaseAddress()); EXPECT_EQ(pipe->getCpuAddress(), surfaceAddress); delete pipe; } TEST_F(KernelArgPipeTest, GivenInvalidPipeWhenSettingKernelArgThenInvalidMemObjectErrorIsReturned) { char *ptr = new char[sizeof(Pipe)]; auto val = (cl_mem *)ptr; auto pVal = &val; auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); delete[] ptr; } TEST_F(KernelArgPipeTest, GivenBufferWhenSettingKernelArgThenInvalidArgValueErrorIsReturned) { Buffer *buffer = new MockBuffer(); auto val = (cl_mem)buffer; auto pVal = &val; auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_INVALID_ARG_VALUE, retVal); delete buffer; } TEST_F(KernelArgPipeTest, GivenPipeFromDifferentContextWhenSettingKernelArgThenInvalidMemObjectErrorIsReturned) { MockContext newContext; Pipe *pipe = new MockPipe(&newContext); auto val = (cl_mem)pipe; auto pVal = &val; auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); delete pipe; } TEST_F(KernelArgPipeTest, GivenInvalidSizeWhenSettingKernelArgThenInvalidArgSizeErrorIsReturned) { Pipe *pipe = new MockPipe(pContext); auto val = (cl_mem *)pipe; auto pVal = &val; auto retVal = this->pKernel->setArg(0, 1, pVal); EXPECT_EQ(CL_INVALID_ARG_SIZE, retVal); delete pipe; } TEST_F(KernelArgPipeTest, GivenPtrToNullWhenSettingKernelArgThenInvalidMemObjectErrorIsReturned) { auto val = (cl_mem *)nullptr; auto pVal = &val; auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(KernelArgPipeTest, GivenNullWhenSettingKernelArgThenInvalidMemObjectErrorIsReturned) { auto pVal = nullptr; auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } compute-runtime-22.14.22890/opencl/test/unit_test/kernel/kernel_arg_svm_tests.cpp000066400000000000000000000500421422164147700300340ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "CL/cl.h" #include "gtest/gtest.h" #include using namespace NEO; class KernelArgSvmFixture_ : public ContextFixture, public ClDeviceFixture { using ContextFixture::SetUp; public: KernelArgSvmFixture_() { } protected: void SetUp() { ClDeviceFixture::SetUp(); cl_device_id device = pClDevice; ContextFixture::SetUp(1, &device); // define kernel info pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; pKernelInfo->heapInfo.pSsh = pSshLocal; pKernelInfo->heapInfo.SurfaceStateHeapSize = sizeof(pSshLocal); pKernelInfo->addArgBuffer(0, 0x30, sizeof(void *)); pProgram = new MockProgram(pContext, false, toClDeviceVector(*pClDevice)); pKernel = new MockKernel(pProgram, *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData)); } void TearDown() { delete pKernel; delete pProgram; ContextFixture::TearDown(); ClDeviceFixture::TearDown(); } cl_int retVal = CL_SUCCESS; MockProgram *pProgram = nullptr; MockKernel *pKernel = nullptr; std::unique_ptr pKernelInfo; SKernelBinaryHeaderCommon kernelHeader; char pSshLocal[64]; char pCrossThreadData[64]; }; typedef Test KernelArgSvmTest; TEST_F(KernelArgSvmTest, GivenValidSvmPtrWhenSettingKernelArgThenSvmPtrIsCorrect) { char *svmPtr = new char[256]; auto retVal = pKernel->setArgSvm(0, 256, svmPtr, nullptr, 0u); EXPECT_EQ(CL_SUCCESS, retVal); auto pKernelArg = (void **)(pKernel->getCrossThreadData() + pKernelInfo->argAsPtr(0).stateless); EXPECT_EQ(svmPtr, *pKernelArg); delete[] svmPtr; } HWTEST_F(KernelArgSvmTest, GivenSvmPtrStatefulWhenSettingKernelArgThenArgumentsAreSetCorrectly) { char *svmPtr = new char[256]; pKernelInfo->argAsPtr(0).bindful = 0; auto retVal = pKernel->setArgSvm(0, 256, svmPtr, nullptr, 0u); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->argAsPtr(0).bindful)); void *surfaceAddress = reinterpret_cast(surfaceState->getSurfaceBaseAddress()); EXPECT_EQ(svmPtr, surfaceAddress); delete[] svmPtr; } TEST_F(KernelArgSvmTest, GivenValidSvmAllocWhenSettingKernelArgThenArgumentsAreSetCorrectly) { char *svmPtr = new char[256]; MockGraphicsAllocation svmAlloc(svmPtr, 256); auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc, 0u); EXPECT_EQ(CL_SUCCESS, retVal); auto pKernelArg = (void **)(pKernel->getCrossThreadData() + pKernelInfo->argAsPtr(0).stateless); EXPECT_EQ(svmPtr, *pKernelArg); delete[] svmPtr; } TEST_F(KernelArgSvmTest, GivenSvmAllocWithUncacheableWhenSettingKernelArgThenKernelHasUncacheableArgs) { auto svmPtr = std::make_unique(256); MockGraphicsAllocation svmAlloc(svmPtr.get(), 256); svmAlloc.setUncacheable(true); auto retVal = pKernel->setArgSvmAlloc(0, svmPtr.get(), &svmAlloc, 0u); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(pKernel->hasUncacheableStatelessArgs()); } TEST_F(KernelArgSvmTest, GivenSvmAllocWithoutUncacheableAndKenelWithUncachebleArgWhenSettingKernelArgThenKernelDoesNotHaveUncacheableArgs) { auto svmPtr = std::make_unique(256); MockGraphicsAllocation svmAlloc(svmPtr.get(), 256); svmAlloc.setUncacheable(true); auto retVal = pKernel->setArgSvmAlloc(0, svmPtr.get(), &svmAlloc, 0u); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(pKernel->hasUncacheableStatelessArgs()); svmAlloc.setUncacheable(false); pKernel->kernelArguments[0].isStatelessUncacheable = true; retVal = pKernel->setArgSvmAlloc(0, svmPtr.get(), &svmAlloc, 0u); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(pKernel->hasUncacheableStatelessArgs()); } HWTEST_F(KernelArgSvmTest, GivenValidSvmAllocStatefulWhenSettingKernelArgThenArgumentsAreSetCorrectly) { char *svmPtr = new char[256]; MockGraphicsAllocation svmAlloc(svmPtr, 256); pKernelInfo->argAsPtr(0).bindful = 0; auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc, 0u); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->argAsPtr(0).bindful)); void *surfaceAddress = reinterpret_cast(surfaceState->getSurfaceBaseAddress()); EXPECT_EQ(svmPtr, surfaceAddress); delete[] svmPtr; } HWTEST_F(KernelArgSvmTest, givenOffsetedSvmPointerWhenSetArgSvmAllocIsCalledThenProperSvmAddressIsPatched) { std::unique_ptr svmPtr(new char[256]); auto offsetedPtr = svmPtr.get() + 4; MockGraphicsAllocation svmAlloc(svmPtr.get(), 256); pKernelInfo->argAsPtr(0).bindful = 0; pKernel->setArgSvmAlloc(0, offsetedPtr, &svmAlloc, 0u); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->argAsPtr(0).bindful)); void *surfaceAddress = reinterpret_cast(surfaceState->getSurfaceBaseAddress()); EXPECT_EQ(offsetedPtr, surfaceAddress); } HWTEST_F(KernelArgSvmTest, givenDeviceSupportingSharedSystemAllocationsWhenSetArgSvmIsCalledWithSurfaceStateThenSizeIsMaxAndAddressIsProgrammed) { this->pClDevice->deviceInfo.sharedSystemMemCapabilities = CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL; auto systemPointer = reinterpret_cast(0xfeedbac); pKernelInfo->argAsPtr(0).bindful = 0; pKernel->setArgSvmAlloc(0, systemPointer, nullptr, 0u); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->argAsPtr(0).bindful)); void *surfaceAddress = reinterpret_cast(surfaceState->getSurfaceBaseAddress()); EXPECT_EQ(systemPointer, surfaceAddress); EXPECT_EQ(128u, surfaceState->getWidth()); EXPECT_EQ(2048u, surfaceState->getDepth()); EXPECT_EQ(16384u, surfaceState->getHeight()); } TEST_F(KernelArgSvmTest, WhenSettingKernelArgImmediateThenInvalidArgValueErrorIsReturned) { auto retVal = pKernel->setArgImmediate(0, 256, nullptr); EXPECT_EQ(CL_INVALID_ARG_VALUE, retVal); } HWTEST_F(KernelArgSvmTest, WhenPatchingWithImplicitSurfaceThenPatchIsApplied) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; constexpr size_t rendSurfSize = sizeof(RENDER_SURFACE_STATE); std::vector svmPtr; svmPtr.resize(256); pKernel->setCrossThreadData(nullptr, sizeof(void *)); pKernel->setSshLocal(nullptr, rendSurfSize); { MockGraphicsAllocation svmAlloc(svmPtr.data(), svmPtr.size()); pKernelInfo->setGlobalVariablesSurface(sizeof(void *), 0, 0); constexpr size_t patchOffset = 16; void *ptrToPatch = svmPtr.data() + patchOffset; ASSERT_GE(pKernel->getCrossThreadDataSize(), sizeof(void *)); *reinterpret_cast(pKernel->getCrossThreadData()) = 0U; ASSERT_GE(pKernel->getSurfaceStateHeapSize(), rendSurfSize); RENDER_SURFACE_STATE *surfState = reinterpret_cast(pKernel->getSurfaceStateHeap()); memset(surfState, 0, rendSurfSize); pKernel->patchWithImplicitSurface(ptrToPatch, svmAlloc, pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress); // verify cross thread data was properly patched EXPECT_EQ(ptrToPatch, *reinterpret_cast(pKernel->getCrossThreadData())); // create surface state for comparison RENDER_SURFACE_STATE expectedSurfaceState; memset(&expectedSurfaceState, 0, rendSurfSize); { void *addressToPatch = svmAlloc.getUnderlyingBuffer(); size_t sizeToPatch = svmAlloc.getUnderlyingBufferSize(); Buffer::setSurfaceState(pDevice, &expectedSurfaceState, false, false, sizeToPatch, addressToPatch, 0, &svmAlloc, 0, 0, false, false); } // verify ssh was properly patched EXPECT_EQ(0, memcmp(&expectedSurfaceState, surfState, rendSurfSize)); // when cross thread and ssh data is not available then should not do anything pKernel->setCrossThreadData(nullptr, 0); pKernel->setSshLocal(nullptr, 0); pKernel->patchWithImplicitSurface(ptrToPatch, svmAlloc, pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress); } } TEST_F(KernelArgSvmTest, WhenPatchingBufferOffsetThenPatchIsApplied) { std::vector svmPtr; svmPtr.resize(256); pKernel->setCrossThreadData(nullptr, sizeof(uint32_t)); { constexpr uint32_t initVal = 7U; constexpr uint32_t svmOffset = 13U; MockGraphicsAllocation svmAlloc(svmPtr.data(), 256); uint32_t *expectedPatchPtr = reinterpret_cast(pKernel->getCrossThreadData()); ArgDescPointer arg; void *returnedPtr = nullptr; arg.bufferOffset = undefined; *expectedPatchPtr = initVal; returnedPtr = pKernel->patchBufferOffset(arg, svmPtr.data(), &svmAlloc); EXPECT_EQ(svmPtr.data(), returnedPtr); EXPECT_EQ(initVal, *expectedPatchPtr); arg.bufferOffset = undefined; *expectedPatchPtr = initVal; returnedPtr = pKernel->patchBufferOffset(arg, svmPtr.data(), nullptr); EXPECT_EQ(svmPtr.data(), returnedPtr); EXPECT_EQ(initVal, *expectedPatchPtr); arg.bufferOffset = 0U; *expectedPatchPtr = initVal; returnedPtr = pKernel->patchBufferOffset(arg, svmPtr.data(), &svmAlloc); EXPECT_EQ(svmPtr.data(), returnedPtr); EXPECT_EQ(0U, *expectedPatchPtr); arg.bufferOffset = 0U; *expectedPatchPtr = initVal; returnedPtr = pKernel->patchBufferOffset(arg, svmPtr.data() + svmOffset, nullptr); void *expectedPtr = alignDown(svmPtr.data() + svmOffset, 4); // expecting to see DWORD alignment restriction in offset uint32_t expectedOffset = static_cast(ptrDiff(svmPtr.data() + svmOffset, expectedPtr)); EXPECT_EQ(expectedPtr, returnedPtr); EXPECT_EQ(expectedOffset, *expectedPatchPtr); arg.bufferOffset = 0U; *expectedPatchPtr = initVal; returnedPtr = pKernel->patchBufferOffset(arg, svmPtr.data() + svmOffset, &svmAlloc); EXPECT_EQ(svmPtr.data(), returnedPtr); EXPECT_EQ(svmOffset, *expectedPatchPtr); } } template class KernelArgSvmTestTyped : public KernelArgSvmTest { }; struct SetArgHandlerSetArgSvm { static void setArg(Kernel &kernel, uint32_t argNum, void *ptrToPatch, size_t allocSize, GraphicsAllocation &alloc) { kernel.setArgSvm(argNum, allocSize, ptrToPatch, &alloc, 0u); } static constexpr bool supportsOffsets() { return true; } }; struct SetArgHandlerSetArgSvmAlloc { static void setArg(Kernel &kernel, uint32_t argNum, void *ptrToPatch, size_t allocSize, GraphicsAllocation &alloc) { kernel.setArgSvmAlloc(argNum, ptrToPatch, &alloc, 0u); } static constexpr bool supportsOffsets() { return true; } }; struct SetArgHandlerSetArgBuffer { static void setArg(Kernel &kernel, uint32_t argNum, void *ptrToPatch, size_t allocSize, GraphicsAllocation &alloc) { MockBuffer mb{alloc}; cl_mem memObj = &mb; kernel.setArgBuffer(argNum, sizeof(cl_mem), &memObj); } static constexpr bool supportsOffsets() { return false; } }; using SetArgHandlers = ::testing::Types; TYPED_TEST_CASE(KernelArgSvmTestTyped, SetArgHandlers); HWTEST_TYPED_TEST(KernelArgSvmTestTyped, GivenBufferKernelArgWhenBufferOffsetIsNeededThenSetArgSetsIt) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; constexpr size_t rendSurfSize = sizeof(RENDER_SURFACE_STATE); auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); uint32_t svmSize = MemoryConstants::pageSize; char *svmPtr = reinterpret_cast(alignedMalloc(svmSize, MemoryConstants::pageSize)); auto &arg = this->pKernelInfo->argAsPtr(0); arg.bindful = 0; arg.stateless = 0; arg.pointerSize = sizeof(void *); arg.bufferOffset = arg.pointerSize; this->pKernel->setCrossThreadData(nullptr, arg.bufferOffset + sizeof(uint32_t)); this->pKernel->setSshLocal(nullptr, rendSurfSize); { MockGraphicsAllocation svmAlloc(svmPtr, svmSize); constexpr size_t patchOffset = 16; void *ptrToPatch = svmPtr + patchOffset; size_t sizeToPatch = svmSize - patchOffset; ASSERT_GE(this->pKernel->getCrossThreadDataSize(), arg.bufferOffset + sizeof(uint32_t)); void **expectedPointerPatchPtr = reinterpret_cast(this->pKernel->getCrossThreadData()); uint32_t *expectedOffsetPatchPtr = reinterpret_cast(ptrOffset(this->pKernel->getCrossThreadData(), arg.bufferOffset)); *expectedPointerPatchPtr = reinterpret_cast(0U); *expectedOffsetPatchPtr = 0U; ASSERT_GE(this->pKernel->getSurfaceStateHeapSize(), rendSurfSize); RENDER_SURFACE_STATE *surfState = reinterpret_cast(this->pKernel->getSurfaceStateHeap()); memset(surfState, 0, rendSurfSize); TypeParam::setArg(*this->pKernel, 0U, ptrToPatch, sizeToPatch, svmAlloc); // surface state for comparison RENDER_SURFACE_STATE expectedSurfaceState; memset(&expectedSurfaceState, 0, rendSurfSize); if (TypeParam::supportsOffsets()) { // setArgSvm, setArgSvmAlloc EXPECT_EQ(ptrToPatch, *expectedPointerPatchPtr); EXPECT_EQ(patchOffset, *expectedOffsetPatchPtr); } else { // setArgBuffer EXPECT_EQ(svmAlloc.getUnderlyingBuffer(), *expectedPointerPatchPtr); EXPECT_EQ(0U, *expectedOffsetPatchPtr); } Buffer::setSurfaceState(device.get(), &expectedSurfaceState, false, false, svmAlloc.getUnderlyingBufferSize(), svmAlloc.getUnderlyingBuffer(), 0, &svmAlloc, 0, 0, false, false); // verify ssh was properly patched int32_t cmpResult = memcmp(&expectedSurfaceState, surfState, rendSurfSize); EXPECT_EQ(0, cmpResult); } alignedFree(svmPtr); } TEST_F(KernelArgSvmTest, givenWritableSvmAllocationWhenSettingAsArgThenDoNotExpectAllocationInCacheFlushVector) { size_t svmSize = 4096; void *svmPtr = alignedMalloc(svmSize, MemoryConstants::pageSize); MockGraphicsAllocation svmAlloc(svmPtr, svmSize); svmAlloc.setMemObjectsAllocationWithWritableFlags(true); svmAlloc.setFlushL3Required(false); auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc, 0u); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, pKernel->kernelArgRequiresCacheFlush[0]); alignedFree(svmPtr); } TEST_F(KernelArgSvmTest, givenCacheFlushSvmAllocationWhenSettingAsArgThenExpectAllocationInCacheFlushVector) { size_t svmSize = 4096; void *svmPtr = alignedMalloc(svmSize, MemoryConstants::pageSize); MockGraphicsAllocation svmAlloc(svmPtr, svmSize); svmAlloc.setMemObjectsAllocationWithWritableFlags(false); svmAlloc.setFlushL3Required(true); auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc, 0u); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(&svmAlloc, pKernel->kernelArgRequiresCacheFlush[0]); alignedFree(svmPtr); } TEST_F(KernelArgSvmTest, givenNoCacheFlushSvmAllocationWhenSettingAsArgThenNotExpectAllocationInCacheFlushVector) { size_t svmSize = 4096; void *svmPtr = alignedMalloc(svmSize, MemoryConstants::pageSize); MockGraphicsAllocation svmAlloc(svmPtr, svmSize); svmAlloc.setMemObjectsAllocationWithWritableFlags(false); svmAlloc.setFlushL3Required(false); auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc, 0u); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, pKernel->kernelArgRequiresCacheFlush[0]); alignedFree(svmPtr); } TEST_F(KernelArgSvmTest, givenWritableSvmAllocationWhenSettingKernelExecInfoThenDoNotExpectSvmFlushFlagTrue) { size_t svmSize = 4096; void *svmPtr = alignedMalloc(svmSize, MemoryConstants::pageSize); MockGraphicsAllocation svmAlloc(svmPtr, svmSize); svmAlloc.setMemObjectsAllocationWithWritableFlags(true); svmAlloc.setFlushL3Required(false); pKernel->setSvmKernelExecInfo(&svmAlloc); EXPECT_FALSE(pKernel->svmAllocationsRequireCacheFlush); alignedFree(svmPtr); } TEST_F(KernelArgSvmTest, givenCacheFlushSvmAllocationWhenSettingKernelExecInfoThenExpectSvmFlushFlagTrue) { size_t svmSize = 4096; void *svmPtr = alignedMalloc(svmSize, MemoryConstants::pageSize); MockGraphicsAllocation svmAlloc(svmPtr, svmSize); svmAlloc.setMemObjectsAllocationWithWritableFlags(false); svmAlloc.setFlushL3Required(true); pKernel->setSvmKernelExecInfo(&svmAlloc); EXPECT_TRUE(pKernel->svmAllocationsRequireCacheFlush); alignedFree(svmPtr); } TEST_F(KernelArgSvmTest, givenNoCacheFlushReadOnlySvmAllocationWhenSettingKernelExecInfoThenExpectSvmFlushFlagFalse) { size_t svmSize = 4096; void *svmPtr = alignedMalloc(svmSize, MemoryConstants::pageSize); MockGraphicsAllocation svmAlloc(svmPtr, svmSize); svmAlloc.setMemObjectsAllocationWithWritableFlags(false); svmAlloc.setFlushL3Required(false); pKernel->setSvmKernelExecInfo(&svmAlloc); EXPECT_FALSE(pKernel->svmAllocationsRequireCacheFlush); alignedFree(svmPtr); } TEST_F(KernelArgSvmTest, givenCpuAddressIsNullWhenGpuAddressIsValidThenExpectSvmArgUseGpuAddress) { char svmPtr[256]; pKernelInfo->argAsPtr(0).bufferOffset = 0u; MockGraphicsAllocation svmAlloc(nullptr, reinterpret_cast(svmPtr), 256); auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc, 0u); EXPECT_EQ(CL_SUCCESS, retVal); auto pKernelArg = (void **)(pKernel->getCrossThreadData() + pKernelInfo->argAsPtr(0).stateless); EXPECT_EQ(svmPtr, *pKernelArg); } TEST_F(KernelArgSvmTest, givenCpuAddressIsNullWhenGpuAddressIsValidThenPatchBufferOffsetWithGpuAddress) { std::vector svmPtr; svmPtr.resize(256); pKernel->setCrossThreadData(nullptr, sizeof(uint32_t)); constexpr uint32_t initVal = 7U; MockGraphicsAllocation svmAlloc(nullptr, reinterpret_cast(svmPtr.data()), 256); uint32_t *expectedPatchPtr = reinterpret_cast(pKernel->getCrossThreadData()); ArgDescPointer arg; void *returnedPtr = nullptr; arg.bufferOffset = 0U; *expectedPatchPtr = initVal; returnedPtr = pKernel->patchBufferOffset(arg, svmPtr.data(), &svmAlloc); EXPECT_EQ(svmPtr.data(), returnedPtr); EXPECT_EQ(0U, *expectedPatchPtr); } compute-runtime-22.14.22890/opencl/test/unit_test/kernel/kernel_cache_flush_requirements_tests.cpp000066400000000000000000000360601422164147700334510ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" namespace NEO { class KernelWithCacheFlushTests : public PlatformFixture, public testing::TestWithParam> { public: void SetUp() override { } void TearDown() override { } void initializePlatform() { PlatformFixture::SetUp(); } void clearPlatform() { PlatformFixture::TearDown(); } }; TEST_F(KernelWithCacheFlushTests, givenDeviceWhichDoesntRequireCacheFlushWhenCheckIfKernelRequireFlushThenReturnedFalse) { initializePlatform(); auto device = pPlatform->getClDevice(0); auto mockKernel = std::make_unique(*device); MockContext mockContext(device); MockCommandQueue queue(mockContext); bool flushRequired = mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(queue); EXPECT_FALSE(flushRequired); clearPlatform(); } TEST_F(KernelWithCacheFlushTests, givenQueueWhichDoesntRequireCacheFlushWhenCheckIfKernelRequireFlushThenReturnedFalse) { initializePlatform(); DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); auto device = pPlatform->getClDevice(0); auto mockKernel = std::make_unique(*device); MockContext mockContext(device); MockCommandQueue queue(mockContext); bool flushRequired = mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(queue); EXPECT_FALSE(flushRequired); clearPlatform(); } TEST_F(KernelWithCacheFlushTests, givenCacheFlushForAllQueuesDisabledWhenCheckIfKernelRequireFlushThenReturnedFalse) { initializePlatform(); DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); DebugManager.flags.EnableCacheFlushAfterWalkerForAllQueues.set(0); auto device = pPlatform->getClDevice(0); auto mockKernel = std::make_unique(*device); MockContext mockContext(device); MockCommandQueue queue(mockContext); bool flushRequired = mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(queue); EXPECT_FALSE(flushRequired); clearPlatform(); } HWTEST_F(KernelWithCacheFlushTests, givenCacheFlushForMultiEngineEnabledWhenCheckIfKernelRequireFlushThenReturnedFalse) { initializePlatform(); DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); auto device = pPlatform->getClDevice(0); auto mockKernel = std::make_unique(*device); MockContext mockContext(device); auto cmdQ = std::make_unique>(&mockContext, device, nullptr); cmdQ->requiresCacheFlushAfterWalker = true; auto &ultCsr = static_cast &>(cmdQ->getGpgpuCommandStreamReceiver()); ultCsr.multiOsContextCapable = true; bool flushRequired = mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(*cmdQ.get()); EXPECT_FALSE(flushRequired); clearPlatform(); } HWTEST_F(KernelWithCacheFlushTests, givenCacheFlushForSingleDeviceProgramWhenCheckIfKernelRequireFlushThenReturnedFalse) { DebugManagerStateRestore dbgRestore; DebugManager.flags.CreateMultipleSubDevices.set(1); initializePlatform(); DebugManager.flags.EnableCacheFlushAfterWalker.set(1); auto device = pPlatform->getClDevice(0); auto mockKernel = std::make_unique(*device); MockContext mockContext(device); auto cmdQ = std::make_unique>(&mockContext, device, nullptr); auto &ultCsr = static_cast &>(cmdQ->getGpgpuCommandStreamReceiver()); ultCsr.multiOsContextCapable = false; cmdQ->requiresCacheFlushAfterWalker = true; bool flushRequired = mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(*cmdQ.get()); EXPECT_FALSE(flushRequired); clearPlatform(); } HWTEST_F(KernelWithCacheFlushTests, givenCacheFlushForDefaultTypeContextWhenCheckIfKernelRequireFlushThenReturnedFalse) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); uint32_t numDevices = 2; DebugManager.flags.CreateMultipleSubDevices.set(numDevices); initializePlatform(); auto device = pPlatform->getClDevice(0); auto mockKernel = std::make_unique(*device); MockContext mockContext(device); auto cmdQ = std::make_unique>(&mockContext, device, nullptr); cmdQ->requiresCacheFlushAfterWalker = true; auto &ultCsr = static_cast &>(cmdQ->getGpgpuCommandStreamReceiver()); ultCsr.multiOsContextCapable = false; bool flushRequired = mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(*cmdQ.get()); EXPECT_FALSE(flushRequired); clearPlatform(); } HWTEST_F(KernelWithCacheFlushTests, givenCacheFlushWithNullGlobalSurfaceWhenCheckIfKernelRequireFlushThenReturnedFalse) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); uint32_t numDevices = 2; DebugManager.flags.CreateMultipleSubDevices.set(numDevices); initializePlatform(); auto device = pPlatform->getClDevice(0); auto mockKernel = std::make_unique(*device); MockContext mockContext(device); mockContext.contextType = ContextType::CONTEXT_TYPE_SPECIALIZED; auto cmdQ = std::make_unique>(&mockContext, device, nullptr); cmdQ->requiresCacheFlushAfterWalker = true; auto &ultCsr = static_cast &>(cmdQ->getGpgpuCommandStreamReceiver()); ultCsr.multiOsContextCapable = false; bool flushRequired = mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(*cmdQ.get()); EXPECT_FALSE(flushRequired); clearPlatform(); } HWTEST_F(KernelWithCacheFlushTests, givenCacheFlushWithGlobalSurfaceWhenCheckIfKernelRequireFlushThenReturnedTrue) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); uint32_t numDevices = 2; DebugManager.flags.CreateMultipleSubDevices.set(numDevices); initializePlatform(); auto device = pPlatform->getClDevice(0); auto mockKernel = std::make_unique(*device); MockContext mockContext(device); mockContext.contextType = ContextType::CONTEXT_TYPE_SPECIALIZED; void *allocPtr = reinterpret_cast(static_cast(6 * MemoryConstants::pageSize)); MockGraphicsAllocation globalAllocation{allocPtr, MemoryConstants::pageSize * 2}; mockKernel->mockProgram->setGlobalSurface(&globalAllocation); auto cmdQ = std::make_unique>(&mockContext, device, nullptr); cmdQ->requiresCacheFlushAfterWalker = true; auto &ultCsr = static_cast &>(cmdQ->getGpgpuCommandStreamReceiver()); ultCsr.multiOsContextCapable = false; bool flushRequired = mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(*cmdQ.get()); EXPECT_TRUE(flushRequired); mockKernel->mockProgram->setGlobalSurface(nullptr); clearPlatform(); } HWTEST2_F(KernelWithCacheFlushTests, givenCacheFlushRequiredWhenEstimatingThenAddRequiredCommands, IsAtLeastXeHpCore) { DebugManagerStateRestore dbgRestore; DebugManager.flags.CreateMultipleSubDevices.set(2); initializePlatform(); if (!pPlatform->getClDevice(0)->getHardwareInfo().capabilityTable.supportCacheFlushAfterWalker) { clearPlatform(); GTEST_SKIP(); } auto device = pPlatform->getClDevice(0); auto mockKernel = std::make_unique(*device); MockContext mockContext(device); mockContext.contextType = ContextType::CONTEXT_TYPE_SPECIALIZED; auto cmdQ = std::make_unique>(&mockContext, device, nullptr); CsrDependencies csrDeps; DispatchInfo dispatchInfo; MultiDispatchInfo multiDispatchInfo(mockKernel->mockKernel); dispatchInfo.setKernel(mockKernel->mockKernel); dispatchInfo.setNumberOfWorkgroups({1, 1, 1}); dispatchInfo.setTotalNumberOfWorkgroups({1, 1, 1}); multiDispatchInfo.push(dispatchInfo); size_t initialSize = 0; size_t sizeWithCacheFlush = 0; size_t expectedDiff = sizeof(typename FamilyType::PIPE_CONTROL); if constexpr (FamilyType::isUsingL3Control) { expectedDiff += sizeof(typename FamilyType::L3_CONTROL) + sizeof(typename FamilyType::L3_FLUSH_ADDRESS_RANGE); } { EXPECT_FALSE(mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(*cmdQ)); initialSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, csrDeps, false, false, false, *cmdQ, multiDispatchInfo, false, false); } { DebugManager.flags.EnableCacheFlushAfterWalker.set(1); void *allocPtr = reinterpret_cast(static_cast(6 * MemoryConstants::pageSize)); MockGraphicsAllocation globalAllocation{allocPtr, MemoryConstants::pageSize * 2}; mockKernel->mockProgram->setGlobalSurface(&globalAllocation); cmdQ->requiresCacheFlushAfterWalker = true; auto &ultCsr = static_cast &>(cmdQ->getGpgpuCommandStreamReceiver()); ultCsr.multiOsContextCapable = false; EXPECT_TRUE(mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(*cmdQ)); sizeWithCacheFlush = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, csrDeps, false, false, false, *cmdQ, multiDispatchInfo, false, false); } EXPECT_EQ(initialSize + expectedDiff, sizeWithCacheFlush); mockKernel->mockProgram->setGlobalSurface(nullptr); clearPlatform(); } HWTEST_F(KernelWithCacheFlushTests, givenCacheFlushWithAllocationsRequireCacheFlushFlagOnWhenCheckIfKernelRequireFlushThenReturnedTrue) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); uint32_t numDevices = 2; DebugManager.flags.CreateMultipleSubDevices.set(numDevices); initializePlatform(); auto device = pPlatform->getClDevice(0); auto mockKernel = std::make_unique(*device); MockContext mockContext(device); mockContext.contextType = ContextType::CONTEXT_TYPE_SPECIALIZED; auto cmdQ = std::make_unique>(&mockContext, device, nullptr); cmdQ->requiresCacheFlushAfterWalker = true; auto &ultCsr = static_cast &>(cmdQ->getGpgpuCommandStreamReceiver()); ultCsr.multiOsContextCapable = false; mockKernel->mockKernel->svmAllocationsRequireCacheFlush = true; bool flushRequired = mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(*cmdQ.get()); EXPECT_TRUE(flushRequired); clearPlatform(); } HWTEST_F(KernelWithCacheFlushTests, givenCacheFlushWithAllocationsWhichRequireCacheFlushWhenCheckIfKernelRequireFlushThenReturnedTrue) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); uint32_t numDevices = 2; DebugManager.flags.CreateMultipleSubDevices.set(numDevices); initializePlatform(); auto device = pPlatform->getClDevice(0); auto mockKernel = std::make_unique(*device); MockContext mockContext(device); mockContext.contextType = ContextType::CONTEXT_TYPE_SPECIALIZED; auto cmdQ = std::make_unique>(&mockContext, device, nullptr); cmdQ->requiresCacheFlushAfterWalker = true; auto &ultCsr = static_cast &>(cmdQ->getGpgpuCommandStreamReceiver()); ultCsr.multiOsContextCapable = false; mockKernel->mockKernel->svmAllocationsRequireCacheFlush = false; mockKernel->mockKernel->kernelArgRequiresCacheFlush.resize(2); MockGraphicsAllocation cacheRequiringAllocation; mockKernel->mockKernel->kernelArgRequiresCacheFlush[1] = &cacheRequiringAllocation; bool flushRequired = mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(*cmdQ.get()); EXPECT_TRUE(flushRequired); clearPlatform(); } HWTEST_F(KernelWithCacheFlushTests, givenEnableCacheFlushAfterWalkerForAllQueuesFlagSetWhenCheckIfKernelRequierFlushThenTrueIsAlwaysReturned) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); DebugManager.flags.EnableCacheFlushAfterWalkerForAllQueues.set(1); MockGraphicsAllocation cacheRequiringAllocation; for (auto isMultiEngine : ::testing::Bool()) { for (auto isMultiDevice : ::testing::Bool()) { for (auto isDefaultContext : ::testing::Bool()) { for (auto svmAllocationRequiresCacheFlush : ::testing::Bool()) { for (auto kernelArgRequiresCacheFlush : ::testing::Bool()) { auto deviceCount = (isMultiDevice ? 2 : 0); auto contextType = (isDefaultContext ? ContextType::CONTEXT_TYPE_DEFAULT : ContextType::CONTEXT_TYPE_SPECIALIZED); GraphicsAllocation *kernelArg = (kernelArgRequiresCacheFlush ? &cacheRequiringAllocation : nullptr); DebugManager.flags.CreateMultipleSubDevices.set(deviceCount); initializePlatform(); auto device = pPlatform->getClDevice(0); MockContext mockContext(device); mockContext.contextType = contextType; auto cmdQ = std::make_unique>(&mockContext, device, nullptr); cmdQ->requiresCacheFlushAfterWalker = true; auto &ultCsr = static_cast &>(cmdQ->getGpgpuCommandStreamReceiver()); ultCsr.multiOsContextCapable = isMultiEngine; auto mockKernel = std::make_unique(*device); mockKernel->mockKernel->svmAllocationsRequireCacheFlush = svmAllocationRequiresCacheFlush; mockKernel->mockKernel->kernelArgRequiresCacheFlush.resize(1); mockKernel->mockKernel->kernelArgRequiresCacheFlush[0] = kernelArg; auto flushRequired = mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(*cmdQ.get()); EXPECT_TRUE(flushRequired); clearPlatform(); } } } } } } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/kernel/kernel_image_arg_tests.cpp000066400000000000000000000403061422164147700303130ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/ptr_math.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "opencl/source/kernel/kernel.h" #include "opencl/test/unit_test/fixtures/kernel_arg_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_image.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "gtest/gtest.h" #include "hw_cmds.h" using namespace NEO; TEST_F(KernelImageArgTest, GivenKernelWithImageArgsWhenCheckingDifferentScenariosThenBehaviourIsCorrect) { size_t imageWidth = image->getImageDesc().image_width; size_t imageHeight = image->getImageDesc().image_height; size_t imageDepth = image->getImageDesc().image_depth; uint32_t objectId = pKernelInfo->argAsImg(4).bindful; cl_mem memObj = image.get(); pKernel->setArg(0, sizeof(memObj), &memObj); pKernel->setArg(1, sizeof(memObj), &memObj); pKernel->setArg(3, sizeof(memObj), &memObj); pKernel->setArg(4, sizeof(memObj), &memObj); auto crossThreadData = reinterpret_cast(pKernel->getCrossThreadData()); auto imgWidthOffset = ptrOffset(crossThreadData, 0x4); EXPECT_EQ(imageWidth, *imgWidthOffset); auto imgHeightOffset = ptrOffset(crossThreadData, 0xc); EXPECT_EQ(imageHeight, *imgHeightOffset); auto dummyOffset = ptrOffset(crossThreadData, 0x20); EXPECT_EQ(0x12344321u, *dummyOffset); auto imgDepthOffset = ptrOffset(crossThreadData, 0x30); EXPECT_EQ(imageDepth, *imgDepthOffset); EXPECT_EQ(objectId, *crossThreadData); } TEST_F(KernelImageArgTest, givenKernelWithFlatImageTokensWhenArgIsSetThenPatchAllParams) { size_t imageWidth = image->getImageDesc().image_width; size_t imageHeight = image->getImageDesc().image_height; size_t imageRowPitch = image->getImageDesc().image_row_pitch; uint64_t imageBaseAddress = image->getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex())->getGpuAddress(); cl_mem memObj = image.get(); pKernel->setArg(0, sizeof(memObj), &memObj); auto crossThreadData = reinterpret_cast(pKernel->getCrossThreadData()); auto pixelSize = image->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes; const auto &metadata = pKernel->getKernelInfo().getArgDescriptorAt(0).as().metadataPayload; auto offsetFlatBaseOffset = ptrOffset(crossThreadData, metadata.flatBaseOffset); EXPECT_EQ(imageBaseAddress, *reinterpret_cast(offsetFlatBaseOffset)); auto offsetFlatWidth = ptrOffset(crossThreadData, metadata.flatWidth); EXPECT_EQ(static_cast((imageWidth * pixelSize) - 1), *offsetFlatWidth); auto offsetFlatHeight = ptrOffset(crossThreadData, metadata.flatHeight); EXPECT_EQ(static_cast((imageHeight * pixelSize) - 1), *offsetFlatHeight); auto offsetFlatPitch = ptrOffset(crossThreadData, metadata.flatPitch); EXPECT_EQ(imageRowPitch - 1, *offsetFlatPitch); } TEST_F(KernelImageArgTest, givenKernelWithValidOffsetNumMipLevelsWhenImageArgIsSetThenCrossthreadDataIsProperlyPatched) { MockImageBase image; image.imageDesc.num_mip_levels = 7U; cl_mem imageObj = ℑ pKernel->setArg(0, sizeof(imageObj), &imageObj); auto crossThreadData = reinterpret_cast(pKernel->getCrossThreadData()); auto patchedNumMipLevels = ptrOffset(crossThreadData, offsetNumMipLevelsImage0); EXPECT_EQ(7U, *patchedNumMipLevels); } TEST_F(KernelImageArgTest, givenImageWithNumSamplesWhenSetArgIsCalledThenPatchNumSamplesInfo) { cl_image_format imgFormat = {CL_RGBA, CL_UNORM_INT8}; cl_image_desc imgDesc = {}; imgDesc.num_samples = 16; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imgDesc.image_width = 5; imgDesc.image_height = 5; auto memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(0, 0, 0, pDevice); auto surfaceFormat = Image::getSurfaceFormatFromTable(0, &imgFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto sampleImg = Image::create(context.get(), memoryProperties, 0, 0, surfaceFormat, &imgDesc, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); cl_mem memObj = sampleImg; pKernel->setArg(0, sizeof(memObj), &memObj); auto crossThreadData = reinterpret_cast(pKernel->getCrossThreadData()); auto patchedNumSamples = ptrOffset(crossThreadData, 0x3c); EXPECT_EQ(16u, *patchedNumSamples); sampleImg->release(); } TEST_F(KernelImageArgTest, givenImageWithWriteOnlyAccessAndReadOnlyArgWhenCheckCorrectImageAccessQualifierIsCalledThenRetValNotValid) { cl_image_format imgFormat = {CL_RGBA, CL_UNORM_INT8}; cl_image_desc imgDesc = {}; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; cl_mem_flags flags = CL_MEM_WRITE_ONLY; imgDesc.image_width = 5; imgDesc.image_height = 5; auto surfaceFormat = Image::getSurfaceFormatFromTable( 0, &imgFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); std::unique_ptr img( Image::create(context.get(), ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imgDesc, nullptr, retVal)); pKernelInfo->setAccessQualifier(0, KernelArgMetadata::AccessReadOnly); cl_mem memObj = img.get(); retVal = pKernel->checkCorrectImageAccessQualifier(0, sizeof(memObj), &memObj); EXPECT_EQ(retVal, CL_INVALID_ARG_VALUE); retVal = clSetKernelArg( pMultiDeviceKernel.get(), 0, sizeof(memObj), &memObj); EXPECT_EQ(retVal, CL_INVALID_ARG_VALUE); retVal = clSetKernelArg( pMultiDeviceKernel.get(), 0, sizeof(memObj), &memObj); EXPECT_EQ(retVal, CL_INVALID_ARG_VALUE); retVal = clSetKernelArg( pMultiDeviceKernel.get(), 1000, sizeof(memObj), &memObj); EXPECT_EQ(retVal, CL_INVALID_ARG_INDEX); } TEST_F(KernelImageArgTest, givenInvalidImageWhenSettingArgImageThenInvalidArgValueErrorIsReturned) { cl_mem memObj = reinterpret_cast(pKernel); retVal = pKernel->setArg(0, memObj, 0u); EXPECT_EQ(retVal, CL_INVALID_ARG_VALUE); } TEST_F(KernelImageArgTest, givenImageWithReadOnlyAccessAndWriteOnlyArgWhenCheckCorrectImageAccessQualifierIsCalledThenReturnsInvalidArgValue) { cl_image_format imgFormat = {CL_RGBA, CL_UNORM_INT8}; cl_image_desc imgDesc = {}; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; cl_mem_flags flags = CL_MEM_READ_ONLY; imgDesc.image_width = 5; imgDesc.image_height = 5; auto surfaceFormat = Image::getSurfaceFormatFromTable( 0, &imgFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); std::unique_ptr img( Image::create(context.get(), ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imgDesc, nullptr, retVal)); pKernelInfo->setAccessQualifier(0, NEO::KernelArgMetadata::AccessWriteOnly); cl_mem memObj = img.get(); retVal = pKernel->checkCorrectImageAccessQualifier(0, sizeof(memObj), &memObj); EXPECT_EQ(retVal, CL_INVALID_ARG_VALUE); Image *image = NULL; memObj = image; retVal = pKernel->checkCorrectImageAccessQualifier(0, sizeof(memObj), &memObj); EXPECT_EQ(retVal, CL_INVALID_ARG_VALUE); } TEST_F(KernelImageArgTest, givenImageWithReadOnlyAccessAndReadOnlyArgWhenCheckCorrectImageAccessQualifierIsCalledThenRetValNotValid) { cl_image_format imgFormat = {CL_RGBA, CL_UNORM_INT8}; cl_image_desc imgDesc = {}; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; cl_mem_flags flags = CL_MEM_READ_ONLY; imgDesc.image_width = 5; imgDesc.image_height = 5; auto surfaceFormat = Image::getSurfaceFormatFromTable( 0, &imgFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); std::unique_ptr img( Image::create(context.get(), ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imgDesc, nullptr, retVal)); pKernelInfo->setAccessQualifier(0, NEO::KernelArgMetadata::AccessReadOnly); cl_mem memObj = img.get(); retVal = pKernel->checkCorrectImageAccessQualifier(0, sizeof(memObj), &memObj); EXPECT_EQ(retVal, CL_SUCCESS); } TEST_F(KernelImageArgTest, givenImageWithWriteOnlyAccessAndWriteOnlyArgWhenCheckCorrectImageAccessQualifierIsCalledThenRetValNotValid) { cl_image_format imgFormat = {CL_RGBA, CL_UNORM_INT8}; cl_image_desc imgDesc = {}; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; cl_mem_flags flags = CL_MEM_WRITE_ONLY; imgDesc.image_width = 5; imgDesc.image_height = 5; auto surfaceFormat = Image::getSurfaceFormatFromTable( 0, &imgFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); std::unique_ptr img( Image::create(context.get(), ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imgDesc, nullptr, retVal)); pKernelInfo->setAccessQualifier(0, NEO::KernelArgMetadata::AccessWriteOnly); cl_mem memObj = img.get(); retVal = pKernel->checkCorrectImageAccessQualifier(0, sizeof(memObj), &memObj); EXPECT_EQ(retVal, CL_SUCCESS); } HWTEST_F(KernelImageArgTest, givenImgWithMcsAllocWhenMakeResidentThenMakeMcsAllocationResident) { int32_t execStamp = 0; cl_image_format imgFormat = {CL_RGBA, CL_UNORM_INT8}; cl_image_desc imgDesc = {}; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imgDesc.image_width = 5; imgDesc.image_height = 5; auto memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(0, 0, 0, pDevice); auto surfaceFormat = Image::getSurfaceFormatFromTable(0, &imgFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto img = Image::create(context.get(), memoryProperties, 0, 0, surfaceFormat, &imgDesc, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); auto mcsAlloc = context->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); img->setMcsAllocation(mcsAlloc); cl_mem memObj = img; pKernel->setArg(0, sizeof(memObj), &memObj); std::unique_ptr> csr(new MockCsr(execStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield())); csr->setupContext(*pDevice->getDefaultEngine().osContext); pKernel->makeResident(*csr.get()); EXPECT_TRUE(csr->isMadeResident(mcsAlloc)); csr->makeSurfacePackNonResident(csr->getResidencyAllocations()); EXPECT_TRUE(csr->isMadeNonResident(mcsAlloc)); delete img; } TEST_F(KernelImageArgTest, givenKernelWithSettedArgWhenUnSetCalledThenArgIsUnsetAndArgCountIsDecreased) { cl_image_format imgFormat = {CL_RGBA, CL_UNORM_INT8}; cl_image_desc imgDesc = {}; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; cl_mem_flags flags = CL_MEM_WRITE_ONLY; imgDesc.image_width = 5; imgDesc.image_height = 5; auto surfaceFormat = Image::getSurfaceFormatFromTable( 0, &imgFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); std::unique_ptr img( Image::create(context.get(), ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imgDesc, nullptr, retVal)); cl_mem memObj = img.get(); retVal = pKernel->setArg(0, sizeof(memObj), &memObj); EXPECT_EQ(1u, pKernel->getPatchedArgumentsNum()); EXPECT_TRUE(pKernel->getKernelArguments()[0].isPatched); pKernel->unsetArg(0); EXPECT_EQ(0u, pKernel->getPatchedArgumentsNum()); EXPECT_FALSE(pKernel->getKernelArguments()[0].isPatched); } TEST_F(KernelImageArgTest, givenNullKernelWhenClSetKernelArgCalledThenInvalidKernelCodeReturned) { cl_mem memObj = NULL; retVal = clSetKernelArg( NULL, 1000, sizeof(memObj), &memObj); EXPECT_EQ(retVal, CL_INVALID_KERNEL); } class MockSharingHandler : public SharingHandler { public: void synchronizeObject(UpdateData &updateData) override { updateData.synchronizationStatus = ACQUIRE_SUCCESFUL; } }; TEST_F(KernelImageArgTest, givenKernelWithSharedImageWhenSetArgCalledThenUsingSharedObjArgsShouldBeTrue) { cl_image_format imgFormat = {CL_RGBA, CL_UNORM_INT8}; cl_image_desc imgDesc = {}; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; cl_mem_flags flags = CL_MEM_WRITE_ONLY; imgDesc.image_width = 5; imgDesc.image_height = 5; auto surfaceFormat = Image::getSurfaceFormatFromTable( 0, &imgFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); std::unique_ptr img( Image::create(context.get(), ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imgDesc, nullptr, retVal)); cl_mem memObj = img.get(); MockSharingHandler *mockSharingHandler = new MockSharingHandler; img->setSharingHandler(mockSharingHandler); retVal = pKernel->setArg(0, sizeof(memObj), &memObj); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, pKernel->getPatchedArgumentsNum()); EXPECT_TRUE(pKernel->getKernelArguments()[0].isPatched); EXPECT_TRUE(pKernel->isUsingSharedObjArgs()); } TEST_F(KernelImageArgTest, givenWritableImageWhenSettingAsArgThenDoNotExpectAllocationInCacheFlushVector) { MockImageBase image; image.graphicsAllocation->setMemObjectsAllocationWithWritableFlags(true); image.graphicsAllocation->setFlushL3Required(false); cl_mem imageObj = ℑ pKernel->setArg(0, sizeof(imageObj), &imageObj); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, pKernel->kernelArgRequiresCacheFlush[0]); } TEST_F(KernelImageArgTest, givenCacheFlushImageWhenSettingAsArgThenExpectAllocationInCacheFlushVector) { MockImageBase image; image.graphicsAllocation->setMemObjectsAllocationWithWritableFlags(false); image.graphicsAllocation->setFlushL3Required(true); cl_mem imageObj = ℑ pKernel->setArg(0, sizeof(imageObj), &imageObj); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(image.graphicsAllocation, pKernel->kernelArgRequiresCacheFlush[0]); } TEST_F(KernelImageArgTest, givenNoCacheFlushImageWhenSettingAsArgThenExpectAllocationInCacheFlushVector) { MockImageBase image; image.graphicsAllocation->setMemObjectsAllocationWithWritableFlags(false); image.graphicsAllocation->setFlushL3Required(false); cl_mem imageObj = ℑ pKernel->setArg(0, sizeof(imageObj), &imageObj); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, pKernel->kernelArgRequiresCacheFlush[0]); } class KernelImageArgTestBindless : public KernelImageArgTest { public: void SetUp() override { DebugManager.flags.UseBindlessMode.set(1); KernelImageArgTest::SetUp(); auto &img = pKernelInfo->argAsImg(0); img.bindful = undefined; img.bindless = bindlessOffset; } void TearDown() override { KernelImageArgTest::TearDown(); } DebugManagerStateRestore restorer; const CrossThreadDataOffset bindlessOffset = 0x10; }; HWTEST_F(KernelImageArgTestBindless, givenUsedBindlessImagesWhenPatchingSurfaceStateOffsetsThenCorrectOffsetIsPatchedInCrossThreadData) { using DataPortBindlessSurfaceExtendedMessageDescriptor = typename FamilyType::DataPortBindlessSurfaceExtendedMessageDescriptor; auto patchLocation = reinterpret_cast(ptrOffset(pKernel->getCrossThreadData(), bindlessOffset)); *patchLocation = 0xdead; cl_mem memObj = image.get(); pKernel->setArg(0, sizeof(memObj), &memObj); EXPECT_NE(0xdeadu, *patchLocation); }compute-runtime-22.14.22890/opencl/test/unit_test/kernel/kernel_immediate_arg_tests.cpp000066400000000000000000000410221422164147700311630ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/source/kernel/kernel.h" #include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "CL/cl.h" #include "gtest/gtest.h" using namespace NEO; template class KernelArgImmediateTest : public MultiRootDeviceWithSubDevicesFixture { public: protected: void SetUp() override { MultiRootDeviceWithSubDevicesFixture::SetUp(); program = std::make_unique(context.get(), false, context->getDevices()); KernelInfoContainer kernelInfos; kernelInfos.resize(3); KernelVectorType kernels; kernels.resize(3); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { memset(&pCrossThreadData[rootDeviceIndex], 0xfe, sizeof(pCrossThreadData[rootDeviceIndex])); // define kernel info this->pKernelInfo = std::make_unique(); this->pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; this->pKernelInfo->addArgImmediate(0, sizeof(T), 0x50); this->pKernelInfo->addArgImmediate(1, sizeof(T), 0x40); this->pKernelInfo->addArgImmediate(2, sizeof(T), 0x30); this->pKernelInfo->addArgImmediate(3, sizeof(T), 0x20); this->pKernelInfo->argAsVal(3).elements.push_back(ArgDescValue::Element{0x28, sizeof(T), 0}); this->pKernelInfo->argAsVal(3).elements.push_back(ArgDescValue::Element{0x38, sizeof(T), 0}); kernelInfos[rootDeviceIndex] = this->pKernelInfo.get(); } for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { pKernel[rootDeviceIndex] = new MockKernel(program.get(), *pKernelInfo, *deviceFactory->rootDevices[rootDeviceIndex]); kernels[rootDeviceIndex] = pKernel[rootDeviceIndex]; ASSERT_EQ(CL_SUCCESS, pKernel[rootDeviceIndex]->initialize()); } pMultiDeviceKernel = std::make_unique(kernels, kernelInfos); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { pKernel[rootDeviceIndex]->setCrossThreadData(&pCrossThreadData[rootDeviceIndex], sizeof(pCrossThreadData[rootDeviceIndex])); } } void TearDown() override { MultiRootDeviceWithSubDevicesFixture::TearDown(); } cl_int retVal = CL_SUCCESS; std::unique_ptr program; std::unique_ptr pMultiDeviceKernel; MockKernel *pKernel[3] = {nullptr}; std::unique_ptr pKernelInfo; char pCrossThreadData[3][0x60]; }; typedef ::testing::Types< char, float, int, short, long, unsigned char, unsigned int, unsigned short, unsigned long> KernelArgImmediateTypes; TYPED_TEST_CASE(KernelArgImmediateTest, KernelArgImmediateTypes); TYPED_TEST(KernelArgImmediateTest, WhenSettingKernelArgThenArgIsSetCorrectly) { auto val = (TypeParam)0xaaaaaaaaULL; auto pVal = &val; this->pMultiDeviceKernel->setArg(0, sizeof(TypeParam), pVal); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { auto pKernel = this->pMultiDeviceKernel->getKernel(rootDeviceIndex); auto pKernelArg = (TypeParam *)(pKernel->getCrossThreadData() + this->pKernelInfo->argAsVal(0).elements[0].offset); EXPECT_EQ(val, *pKernelArg); } } TYPED_TEST(KernelArgImmediateTest, GivenInvalidIndexWhenSettingKernelArgThenInvalidArgIndexErrorIsReturned) { auto val = (TypeParam)0U; auto pVal = &val; auto ret = this->pMultiDeviceKernel->setArg((uint32_t)-1, sizeof(TypeParam), pVal); EXPECT_EQ(ret, CL_INVALID_ARG_INDEX); } TYPED_TEST(KernelArgImmediateTest, GivenMultipleArgumentsWhenSettingKernelArgThenEachArgIsSetCorrectly) { auto val = (TypeParam)0xaaaaaaaaULL; auto pVal = &val; this->pMultiDeviceKernel->setArg(0, sizeof(TypeParam), pVal); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { auto pKernel = this->pMultiDeviceKernel->getKernel(rootDeviceIndex); auto pKernelArg = (TypeParam *)(pKernel->getCrossThreadData() + this->pKernelInfo->argAsVal(0).elements[0].offset); EXPECT_EQ(val, *pKernelArg); } val = (TypeParam)0xbbbbbbbbULL; this->pMultiDeviceKernel->setArg(1, sizeof(TypeParam), &val); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { auto pKernel = this->pMultiDeviceKernel->getKernel(rootDeviceIndex); auto pKernelArg = (TypeParam *)(pKernel->getCrossThreadData() + this->pKernelInfo->argAsVal(1).elements[0].offset); EXPECT_EQ(val, *pKernelArg); } val = (TypeParam)0xccccccccULL; this->pMultiDeviceKernel->setArg(2, sizeof(TypeParam), &val); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { auto pKernel = this->pMultiDeviceKernel->getKernel(rootDeviceIndex); auto pKernelArg = (TypeParam *)(pKernel->getCrossThreadData() + this->pKernelInfo->argAsVal(2).elements[0].offset); EXPECT_EQ(val, *pKernelArg); } } TYPED_TEST(KernelArgImmediateTest, GivenCrossThreadDataOverwritesWhenSettingKernelArgThenArgsAreSetCorrectly) { TypeParam val = (TypeParam)0xaaaaaaaaULL; TypeParam *pVal = &val; this->pMultiDeviceKernel->setArg(0, sizeof(TypeParam), pVal); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { auto pKernel = this->pMultiDeviceKernel->getKernel(rootDeviceIndex); TypeParam *pKernelArg = (TypeParam *)(pKernel->getCrossThreadData() + this->pKernelInfo->argAsVal(0).elements[0].offset); EXPECT_EQ(val, *pKernelArg); } val = (TypeParam)0xbbbbbbbbULL; this->pMultiDeviceKernel->setArg(1, sizeof(TypeParam), &val); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { auto pKernel = this->pMultiDeviceKernel->getKernel(rootDeviceIndex); auto pKernelArg = (TypeParam *)(pKernel->getCrossThreadData() + this->pKernelInfo->argAsVal(1).elements[0].offset); EXPECT_EQ(val, *pKernelArg); } val = (TypeParam)0xccccccccULL; this->pMultiDeviceKernel->setArg(0, sizeof(TypeParam), &val); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { auto pKernel = this->pMultiDeviceKernel->getKernel(rootDeviceIndex); auto pKernelArg = (TypeParam *)(pKernel->getCrossThreadData() + this->pKernelInfo->argAsVal(0).elements[0].offset); EXPECT_EQ(val, *pKernelArg); } } TYPED_TEST(KernelArgImmediateTest, GivenMultipleStructElementsWhenSettingKernelArgThenArgsAreSetCorrectly) { struct ImmediateStruct { TypeParam a; unsigned char unused[3]; // want to force a gap, ideally unpadded TypeParam b; } immediateStruct; immediateStruct.a = (TypeParam)0xaaaaaaaaULL; immediateStruct.b = (TypeParam)0xbbbbbbbbULL; immediateStruct.unused[0] = 0xfe; immediateStruct.unused[1] = 0xfe; immediateStruct.unused[2] = 0xfe; auto &elements = this->pKernelInfo->argAsVal(3).elements; elements[0].sourceOffset = offsetof(struct ImmediateStruct, a); elements[1].sourceOffset = offsetof(struct ImmediateStruct, b); this->pMultiDeviceKernel->setArg(3, sizeof(immediateStruct), &immediateStruct); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { auto pKernel = this->pMultiDeviceKernel->getKernel(rootDeviceIndex); auto pCrossthreadA = (TypeParam *)(pKernel->getCrossThreadData() + elements[0].offset); EXPECT_EQ(immediateStruct.a, *pCrossthreadA); auto pCrossthreadB = (TypeParam *)(pKernel->getCrossThreadData() + elements[1].offset); EXPECT_EQ(immediateStruct.b, *pCrossthreadB); } } TYPED_TEST(KernelArgImmediateTest, givenTooLargePatchSizeWhenSettingArgThenDontReadMemoryBeyondLimit) { for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { auto pKernel = this->pMultiDeviceKernel->getKernel(rootDeviceIndex); TypeParam memory[2]; std::memset(&memory[0], 0xaa, sizeof(TypeParam)); std::memset(&memory[1], 0xbb, sizeof(TypeParam)); const auto destinationMemoryAddress = pKernel->getCrossThreadData() + this->pKernelInfo->argAsVal(0).elements[0].offset; const auto memoryBeyondLimitAddress = destinationMemoryAddress + sizeof(TypeParam); const auto memoryBeyondLimitBefore = *reinterpret_cast(memoryBeyondLimitAddress); this->pKernelInfo->argAsVal(0).elements[0].size = sizeof(TypeParam) + 1; auto retVal = pKernel->setArg(0, sizeof(TypeParam), &memory[0]); const auto memoryBeyondLimitAfter = *reinterpret_cast(memoryBeyondLimitAddress); EXPECT_EQ(memoryBeyondLimitBefore, memoryBeyondLimitAfter); EXPECT_EQ(memory[0], *reinterpret_cast(destinationMemoryAddress)); EXPECT_EQ(CL_SUCCESS, retVal); } } TYPED_TEST(KernelArgImmediateTest, givenNotTooLargePatchSizeWhenSettingArgThenDontReadMemoryBeyondLimit) { for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { auto pKernel = this->pMultiDeviceKernel->getKernel(rootDeviceIndex); TypeParam memory[2]; std::memset(&memory[0], 0xaa, sizeof(TypeParam)); std::memset(&memory[1], 0xbb, sizeof(TypeParam)); const auto destinationMemoryAddress = pKernel->getCrossThreadData() + this->pKernelInfo->argAsVal(0).elements[0].offset; const auto memoryBeyondLimitAddress = destinationMemoryAddress + sizeof(TypeParam); const auto memoryBeyondLimitBefore = *reinterpret_cast(memoryBeyondLimitAddress); this->pKernelInfo->argAsVal(0).elements[0].size = sizeof(TypeParam); auto retVal = pKernel->setArg(0, sizeof(TypeParam), &memory[0]); const auto memoryBeyondLimitAfter = *reinterpret_cast(memoryBeyondLimitAddress); EXPECT_EQ(memoryBeyondLimitBefore, memoryBeyondLimitAfter); EXPECT_EQ(memory[0], *reinterpret_cast(destinationMemoryAddress)); EXPECT_EQ(CL_SUCCESS, retVal); } } TYPED_TEST(KernelArgImmediateTest, givenMulitplePatchesAndFirstPatchSizeTooLargeWhenSettingArgThenDontReadMemoryBeyondLimit) { if (sizeof(TypeParam) == 1) return; // multiple patch chars don't make sense for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { auto pKernel = this->pMultiDeviceKernel->getKernel(rootDeviceIndex); TypeParam memory[2]; std::memset(&memory[0], 0xaa, sizeof(TypeParam)); std::memset(&memory[1], 0xbb, sizeof(TypeParam)); auto &elements = this->pKernelInfo->argAsVal(3).elements; const auto destinationMemoryAddress1 = pKernel->getCrossThreadData() + elements[2].offset; const auto destinationMemoryAddress2 = pKernel->getCrossThreadData() + elements[1].offset; const auto memoryBeyondLimitAddress1 = destinationMemoryAddress1 + sizeof(TypeParam); const auto memoryBeyondLimitAddress2 = destinationMemoryAddress2 + sizeof(TypeParam) / 2; const std::vector memoryBeyondLimitBefore1(memoryBeyondLimitAddress1, memoryBeyondLimitAddress1 + sizeof(TypeParam)); const std::vector memoryBeyondLimitBefore2(memoryBeyondLimitAddress2, memoryBeyondLimitAddress2 + sizeof(TypeParam) / 2); elements[2].sourceOffset = 0; elements[1].sourceOffset = sizeof(TypeParam) / 2; elements[2].size = sizeof(TypeParam); elements[1].size = sizeof(TypeParam) / 2; auto retVal = pKernel->setArg(3, sizeof(TypeParam), &memory[0]); EXPECT_EQ(0, std::memcmp(memoryBeyondLimitBefore1.data(), memoryBeyondLimitAddress1, sizeof(TypeParam))); EXPECT_EQ(0, std::memcmp(memoryBeyondLimitBefore2.data(), memoryBeyondLimitAddress2, sizeof(TypeParam) / 2)); EXPECT_EQ(0, std::memcmp(&memory[0], destinationMemoryAddress1, sizeof(TypeParam))); EXPECT_EQ(0, std::memcmp(&memory[0], destinationMemoryAddress2, sizeof(TypeParam) / 2)); EXPECT_EQ(CL_SUCCESS, retVal); } } TYPED_TEST(KernelArgImmediateTest, givenMulitplePatchesAndSecondPatchSizeTooLargeWhenSettingArgThenDontReadMemoryBeyondLimit) { if (sizeof(TypeParam) == 1) return; // multiple patch chars don't make sense for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { auto pKernel = this->pMultiDeviceKernel->getKernel(rootDeviceIndex); TypeParam memory[2]; std::memset(&memory[0], 0xaa, sizeof(TypeParam)); std::memset(&memory[1], 0xbb, sizeof(TypeParam)); auto &elements = this->pKernelInfo->argAsVal(3).elements; const auto destinationMemoryAddress1 = pKernel->getCrossThreadData() + elements[2].offset; const auto destinationMemoryAddress2 = pKernel->getCrossThreadData() + elements[1].offset; const auto memoryBeyondLimitAddress1 = destinationMemoryAddress1 + sizeof(TypeParam) / 2; const auto memoryBeyondLimitAddress2 = destinationMemoryAddress2 + sizeof(TypeParam) / 2; const std::vector memoryBeyondLimitBefore1(memoryBeyondLimitAddress1, memoryBeyondLimitAddress1 + sizeof(TypeParam) / 2); const std::vector memoryBeyondLimitBefore2(memoryBeyondLimitAddress2, memoryBeyondLimitAddress2 + sizeof(TypeParam) / 2); elements[0].size = 0; elements[2].sourceOffset = 0; elements[1].sourceOffset = sizeof(TypeParam) / 2; elements[2].size = sizeof(TypeParam) / 2; elements[1].size = sizeof(TypeParam); auto retVal = pKernel->setArg(3, sizeof(TypeParam), &memory[0]); EXPECT_EQ(0, std::memcmp(memoryBeyondLimitBefore1.data(), memoryBeyondLimitAddress1, sizeof(TypeParam) / 2)); EXPECT_EQ(0, std::memcmp(memoryBeyondLimitBefore2.data(), memoryBeyondLimitAddress2, sizeof(TypeParam) / 2)); EXPECT_EQ(0, std::memcmp(&memory[0], destinationMemoryAddress1, sizeof(TypeParam) / 2)); EXPECT_EQ(0, std::memcmp(&memory[0], destinationMemoryAddress2, sizeof(TypeParam) / 2)); EXPECT_EQ(CL_SUCCESS, retVal); } } TYPED_TEST(KernelArgImmediateTest, givenMultiplePatchesAndOneSourceOffsetBeyondArgumentWhenSettingArgThenDontCopyThisPatch) { for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { auto pKernel = this->pMultiDeviceKernel->getKernel(rootDeviceIndex); TypeParam memory[2]; std::memset(&memory[0], 0xaa, sizeof(TypeParam)); std::memset(&memory[1], 0xbb, sizeof(TypeParam)); auto &elements = this->pKernelInfo->argAsVal(3).elements; const auto destinationMemoryAddress1 = pKernel->getCrossThreadData() + elements[1].offset; const auto destinationMemoryAddress2 = pKernel->getCrossThreadData() + elements[2].offset; const auto memoryBeyondLimitAddress1 = destinationMemoryAddress1 + sizeof(TypeParam); const auto memoryBeyondLimitAddress2 = destinationMemoryAddress2; const std::vector memoryBeyondLimitBefore1(memoryBeyondLimitAddress1, memoryBeyondLimitAddress1 + sizeof(TypeParam)); const std::vector memoryBeyondLimitBefore2(memoryBeyondLimitAddress2, memoryBeyondLimitAddress2 + sizeof(TypeParam)); elements[0].size = 0; elements[1].sourceOffset = 0; elements[1].size = sizeof(TypeParam); elements[2].sourceOffset = sizeof(TypeParam); elements[2].size = 1; auto retVal = pKernel->setArg(3, sizeof(TypeParam), &memory[0]); EXPECT_EQ(0, std::memcmp(memoryBeyondLimitBefore1.data(), memoryBeyondLimitAddress1, memoryBeyondLimitBefore1.size())); EXPECT_EQ(0, std::memcmp(memoryBeyondLimitBefore2.data(), memoryBeyondLimitAddress2, memoryBeyondLimitBefore2.size())); EXPECT_EQ(0, std::memcmp(&memory[0], destinationMemoryAddress1, sizeof(TypeParam))); EXPECT_EQ(CL_SUCCESS, retVal); } } compute-runtime-22.14.22890/opencl/test/unit_test/kernel/kernel_info_cl_tests.cpp000066400000000000000000000063701422164147700300140ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/kernel/kernel_info_cl.h" #include "gtest/gtest.h" TEST(AsClConvertersTest, whenConvertingAccessQualifiersThenProperEnumValuesAreReturned) { using namespace NEO::KernelArgMetadata; EXPECT_EQ(static_cast(CL_KERNEL_ARG_ACCESS_NONE), NEO::asClKernelArgAccessQualifier(AccessNone)); EXPECT_EQ(static_cast(CL_KERNEL_ARG_ACCESS_READ_ONLY), NEO::asClKernelArgAccessQualifier(AccessReadOnly)); EXPECT_EQ(static_cast(CL_KERNEL_ARG_ACCESS_WRITE_ONLY), NEO::asClKernelArgAccessQualifier(AccessWriteOnly)); EXPECT_EQ(static_cast(CL_KERNEL_ARG_ACCESS_READ_WRITE), NEO::asClKernelArgAccessQualifier(AccessReadWrite)); EXPECT_EQ(0U, NEO::asClKernelArgAccessQualifier(AccessUnknown)); } TEST(AsClConvertersTest, whenConvertingAddressQualifiersThenProperEnumValuesAreReturned) { using namespace NEO::KernelArgMetadata; EXPECT_EQ(static_cast(CL_KERNEL_ARG_ADDRESS_GLOBAL), NEO::asClKernelArgAddressQualifier(AddrGlobal)); EXPECT_EQ(static_cast(CL_KERNEL_ARG_ADDRESS_LOCAL), NEO::asClKernelArgAddressQualifier(AddrLocal)); EXPECT_EQ(static_cast(CL_KERNEL_ARG_ADDRESS_PRIVATE), NEO::asClKernelArgAddressQualifier(AddrPrivate)); EXPECT_EQ(static_cast(CL_KERNEL_ARG_ADDRESS_CONSTANT), NEO::asClKernelArgAddressQualifier(AddrConstant)); EXPECT_EQ(0U, NEO::asClKernelArgAddressQualifier(AddrUnknown)); } TEST(AsClConvertersTest, whenConvertingTypeQualifiersThenProperBitfieldsAreSet) { using namespace NEO::KernelArgMetadata; TypeQualifiers typeQualifiers = {}; typeQualifiers.constQual = true; EXPECT_EQ(static_cast(CL_KERNEL_ARG_TYPE_CONST), NEO::asClKernelArgTypeQualifier(typeQualifiers)); typeQualifiers = {}; typeQualifiers.volatileQual = true; EXPECT_EQ(static_cast(CL_KERNEL_ARG_TYPE_VOLATILE), NEO::asClKernelArgTypeQualifier(typeQualifiers)); typeQualifiers = {}; typeQualifiers.restrictQual = true; EXPECT_EQ(static_cast(CL_KERNEL_ARG_TYPE_RESTRICT), NEO::asClKernelArgTypeQualifier(typeQualifiers)); typeQualifiers = {}; typeQualifiers.pipeQual = true; EXPECT_EQ(static_cast(CL_KERNEL_ARG_TYPE_PIPE), NEO::asClKernelArgTypeQualifier(typeQualifiers)); typeQualifiers = {}; typeQualifiers.constQual = true; typeQualifiers.volatileQual = true; EXPECT_EQ(static_cast(CL_KERNEL_ARG_TYPE_CONST | CL_KERNEL_ARG_TYPE_VOLATILE), NEO::asClKernelArgTypeQualifier(typeQualifiers)); typeQualifiers = {}; typeQualifiers.constQual = true; typeQualifiers.volatileQual = true; typeQualifiers.pipeQual = true; typeQualifiers.restrictQual = true; EXPECT_EQ(static_cast(CL_KERNEL_ARG_TYPE_CONST | CL_KERNEL_ARG_TYPE_VOLATILE | CL_KERNEL_ARG_TYPE_RESTRICT | CL_KERNEL_ARG_TYPE_PIPE), NEO::asClKernelArgTypeQualifier(typeQualifiers)); } compute-runtime-22.14.22890/opencl/test/unit_test/kernel/kernel_is_patched_tests.cpp000066400000000000000000000066441422164147700305120ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "gtest/gtest.h" using namespace NEO; class PatchedKernelTest : public ::testing::Test { public: void SetUp() override { device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get(), rootDeviceIndex)); context.reset(new MockContext(device.get())); program.reset(Program::createBuiltInFromSource("FillBufferBytes", context.get(), context->getDevices(), &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); program->build(program->getDevices(), nullptr, false); kernel.reset(Kernel::create(program.get(), program->getKernelInfoForKernel("FillBufferBytes"), *device, &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); } void TearDown() override { context.reset(); } const uint32_t rootDeviceIndex = 0u; std::unique_ptr context; std::unique_ptr device; std::unique_ptr program; std::unique_ptr kernel; cl_int retVal = CL_SUCCESS; }; TEST_F(PatchedKernelTest, givenKernelWithoutPatchedArgsWhenIsPatchedIsCalledThenReturnsFalse) { EXPECT_FALSE(kernel->isPatched()); } TEST_F(PatchedKernelTest, givenKernelWithAllArgsSetWithBufferWhenIsPatchedIsCalledThenReturnsTrue) { auto buffer = clCreateBuffer(context.get(), CL_MEM_READ_ONLY, sizeof(int), nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); auto argsNum = kernel->getKernelArgsNumber(); for (uint32_t i = 0; i < argsNum; i++) { kernel->setArg(i, buffer); } EXPECT_TRUE(kernel->isPatched()); clReleaseMemObject(buffer); } TEST_F(PatchedKernelTest, givenKernelWithoutAllArgsSetWhenIsPatchedIsCalledThenReturnsFalse) { auto buffer = clCreateBuffer(context.get(), CL_MEM_READ_ONLY, sizeof(int), nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); auto argsNum = kernel->getKernelArgsNumber(); for (uint32_t i = 0; i < argsNum; i++) { kernel->setArg(0, buffer); } EXPECT_FALSE(kernel->isPatched()); clReleaseMemObject(buffer); } TEST_F(PatchedKernelTest, givenArgSvmAllocWhenArgIsSetThenArgIsPatched) { EXPECT_FALSE(kernel->getKernelArguments()[0].isPatched); kernel->setArgSvmAlloc(0, nullptr, nullptr, 0u); EXPECT_TRUE(kernel->getKernelArguments()[0].isPatched); } TEST_F(PatchedKernelTest, givenArgSvmWhenArgIsSetThenArgIsPatched) { uint32_t size = sizeof(int); EXPECT_FALSE(kernel->getKernelArguments()[0].isPatched); kernel->setArgSvm(0, size, nullptr, nullptr, 0); EXPECT_TRUE(kernel->getKernelArguments()[0].isPatched); } TEST_F(PatchedKernelTest, givenKernelWithOneArgumentToPatchWhichIsNonzeroIndexedWhenThatArgumentIsSetThenKernelIsPatched) { uint32_t size = sizeof(int); MockKernelWithInternals mockKernel(*device.get(), context.get()); mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.numArgsToPatch = 1; mockKernel.kernelInfo.addArgBuffer(1, 0); kernel.reset(mockKernel.mockKernel); kernel->initialize(); EXPECT_FALSE(kernel->Kernel::isPatched()); kernel->setArgSvm(1, size, nullptr, nullptr, 0u); EXPECT_TRUE(kernel->Kernel::isPatched()); kernel.release(); } compute-runtime-22.14.22890/opencl/test/unit_test/kernel/kernel_slm_arg_tests.cpp000066400000000000000000000077771422164147700300430ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/ptr_math.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/kernel/kernel.h" #include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "gtest/gtest.h" using namespace NEO; class KernelSlmArgTest : public MultiRootDeviceWithSubDevicesFixture { protected: void SetUp() override { MultiRootDeviceWithSubDevicesFixture::SetUp(); program = std::make_unique(context.get(), false, context->getDevices()); pKernelInfo = std::make_unique(); KernelVectorType kernels; kernels.resize(3); KernelInfoContainer kernelInfos; kernelInfos.resize(3); kernelInfos[0] = kernelInfos[1] = kernelInfos[2] = pKernelInfo.get(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; pKernelInfo->addArgLocal(0, 0x10, 0x1); pKernelInfo->addArgBuffer(1, 0x20, sizeof(void *)); pKernelInfo->addArgLocal(2, 0x30, 0x10); pKernelInfo->kernelDescriptor.kernelAttributes.slmInlineSize = 3 * KB; for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { pKernel[rootDeviceIndex] = new MockKernel(program.get(), *pKernelInfo, *deviceFactory->rootDevices[rootDeviceIndex]); kernels[rootDeviceIndex] = pKernel[rootDeviceIndex]; ASSERT_EQ(CL_SUCCESS, pKernel[rootDeviceIndex]->initialize()); } pMultiDeviceKernel = std::make_unique(kernels, kernelInfos); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { crossThreadData[rootDeviceIndex][0x20 / sizeof(uint32_t)] = 0x12344321; pKernel[rootDeviceIndex]->setCrossThreadData(&crossThreadData[rootDeviceIndex], sizeof(crossThreadData[rootDeviceIndex])); } } void TearDown() override { MultiRootDeviceWithSubDevicesFixture::TearDown(); } cl_int retVal = CL_SUCCESS; std::unique_ptr program; MockKernel *pKernel[3] = {nullptr}; std::unique_ptr pMultiDeviceKernel; std::unique_ptr pKernelInfo; static const size_t slmSize0 = 0x200; static const size_t slmSize2 = 0x30; uint32_t crossThreadData[3][0x40]{}; }; TEST_F(KernelSlmArgTest, WhenSettingSizeThenAlignmentOfHigherSlmArgsIsUpdated) { pMultiDeviceKernel->setArg(0, slmSize0, nullptr); pMultiDeviceKernel->setArg(2, slmSize2, nullptr); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { auto crossThreadData = reinterpret_cast(pKernel[rootDeviceIndex]->getCrossThreadData()); auto slmOffset = ptrOffset(crossThreadData, 0x10); EXPECT_EQ(0u, *slmOffset); slmOffset = ptrOffset(crossThreadData, 0x20); EXPECT_EQ(0x12344321u, *slmOffset); slmOffset = ptrOffset(crossThreadData, 0x30); EXPECT_EQ(0x200u, *slmOffset); EXPECT_EQ(4 * KB, pKernel[rootDeviceIndex]->slmTotalSize); } } TEST_F(KernelSlmArgTest, GivenReverseOrderWhenSettingSizeThenAlignmentOfHigherSlmArgsIsUpdated) { pMultiDeviceKernel->setArg(2, slmSize2, nullptr); pMultiDeviceKernel->setArg(0, slmSize0, nullptr); for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) { auto crossThreadData = reinterpret_cast(pKernel[rootDeviceIndex]->getCrossThreadData()); auto slmOffset = ptrOffset(crossThreadData, 0x10); EXPECT_EQ(0u, *slmOffset); slmOffset = ptrOffset(crossThreadData, 0x20); EXPECT_EQ(0x12344321u, *slmOffset); slmOffset = ptrOffset(crossThreadData, 0x30); EXPECT_EQ(0x200u, *slmOffset); EXPECT_EQ(4 * KB, pKernel[rootDeviceIndex]->slmTotalSize); } } compute-runtime-22.14.22890/opencl/test/unit_test/kernel/kernel_slm_tests.cpp000066400000000000000000000152071422164147700271750ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/program/kernel_info.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/kernel/kernel.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" using namespace NEO; struct KernelSLMAndBarrierTest : public ClDeviceFixture, public ::testing::TestWithParam { void SetUp() override { ClDeviceFixture::SetUp(); program = std::make_unique(toClDeviceVector(*pClDevice)); kernelInfo.setCrossThreadDataSize(sizeof(crossThreadData)); kernelInfo.setLocalIds({1, 1, 1}); kernelInfo.heapInfo.pKernelHeap = kernelIsa; kernelInfo.heapInfo.KernelHeapSize = sizeof(kernelIsa); kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 32; } void TearDown() override { ClDeviceFixture::TearDown(); } uint32_t simd; uint32_t numChannels; std::unique_ptr program; SKernelBinaryHeaderCommon kernelHeader; MockKernelInfo kernelInfo; uint32_t kernelIsa[32]; uint32_t crossThreadData[32]; uint32_t perThreadData[8]; }; static uint32_t slmSizeInKb[] = {1, 4, 8, 16, 32, 64}; HWCMDTEST_P(IGFX_GEN8_CORE, KernelSLMAndBarrierTest, GivenStaticSlmSizeWhenProgrammingSlmThenProgrammingIsCorrect) { ASSERT_NE(nullptr, pClDevice); CommandQueueHw cmdQ(nullptr, pClDevice, 0, false); typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; // define kernel info kernelInfo.kernelDescriptor.kernelAttributes.barrierCount = 1; kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize = GetParam() * KB; MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); // After creating Mock Kernel now create Indirect Heap auto &indirectHeap = cmdQ.getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 8192); uint64_t interfaceDescriptorOffset = indirectHeap.getUsed(); size_t offsetInterfaceDescriptorData = HardwareCommandsHelper::sendInterfaceDescriptorData( indirectHeap, interfaceDescriptorOffset, 0, sizeof(crossThreadData), sizeof(perThreadData), 0, 0, 0, 1, kernel, 4u, pDevice->getPreemptionMode(), nullptr, *pDevice); // add the heap base + offset uint32_t *pIdData = (uint32_t *)indirectHeap.getCpuBase() + offsetInterfaceDescriptorData; INTERFACE_DESCRIPTOR_DATA *pSrcIDData = (INTERFACE_DESCRIPTOR_DATA *)pIdData; uint32_t ExpectedSLMSize = 0; if (::renderCoreFamily == IGFX_GEN8_CORE) { if (kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize <= (4 * 1024)) { ExpectedSLMSize = 1; } else if (kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize <= (8 * 1024)) { ExpectedSLMSize = 2; } else if (kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize <= (16 * 1024)) { ExpectedSLMSize = 4; } else if (kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize <= (32 * 1024)) { ExpectedSLMSize = 8; } else if (kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize <= (64 * 1024)) { ExpectedSLMSize = 16; } } else { if (kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize <= (1 * 1024)) // its a power of "2" +1 for example 1 is 2^0 ( 0+1); 2 is 2^1 is (1+1) etc. { ExpectedSLMSize = 1; } else if (kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize <= (2 * 1024)) { ExpectedSLMSize = 2; } else if (kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize <= (4 * 1024)) { ExpectedSLMSize = 3; } else if (kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize <= (8 * 1024)) { ExpectedSLMSize = 4; } else if (kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize <= (16 * 1024)) { ExpectedSLMSize = 5; } else if (kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize <= (32 * 1024)) { ExpectedSLMSize = 6; } else if (kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize <= (64 * 1024)) { ExpectedSLMSize = 7; } } ASSERT_GT(ExpectedSLMSize, 0u); EXPECT_EQ(ExpectedSLMSize, pSrcIDData->getSharedLocalMemorySize()); EXPECT_EQ(kernelInfo.kernelDescriptor.kernelAttributes.usesBarriers(), pSrcIDData->getBarrierEnable()); EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::DENORM_MODE_SETBYKERNEL, pSrcIDData->getDenormMode()); if (EncodeSurfaceState::doBindingTablePrefetch()) { EXPECT_EQ(4u, pSrcIDData->getBindingTableEntryCount()); } else { EXPECT_EQ(0u, pSrcIDData->getBindingTableEntryCount()); } } INSTANTIATE_TEST_CASE_P( SlmSizes, KernelSLMAndBarrierTest, testing::ValuesIn(slmSizeInKb)); HWTEST_F(KernelSLMAndBarrierTest, GivenInterfaceDescriptorProgrammedWhenOverrideSlmAllocationSizeIsSetThenSlmSizeIsOverwritten) { using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; uint32_t expectedSlmSize = 5; DebugManagerStateRestore dbgRestore; DebugManager.flags.OverrideSlmAllocationSize.set(expectedSlmSize); kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize = 0; MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); CommandQueueHw cmdQ(nullptr, pClDevice, 0, false); auto &indirectHeap = cmdQ.getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 8192); uint64_t interfaceDescriptorOffset = indirectHeap.getUsed(); INTERFACE_DESCRIPTOR_DATA interfaceDescriptorData; HardwareCommandsHelper::sendInterfaceDescriptorData( indirectHeap, interfaceDescriptorOffset, 0, sizeof(crossThreadData), sizeof(perThreadData), 0, 0, 0, 1, kernel, 4u, pDevice->getPreemptionMode(), &interfaceDescriptorData, *pDevice); auto pInterfaceDescriptor = HardwareCommandsHelper::getInterfaceDescriptor(indirectHeap, interfaceDescriptorOffset, &interfaceDescriptorData); EXPECT_EQ(expectedSlmSize, pInterfaceDescriptor->getSharedLocalMemorySize()); } compute-runtime-22.14.22890/opencl/test/unit_test/kernel/kernel_tests.cpp000066400000000000000000004365421422164147700263330ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/command_stream/wait_status.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/flush_stamp.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/surface_format_info.h" #include "shared/source/memory_manager/allocations_list.h" #include "shared/source/memory_manager/os_agnostic_memory_manager.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/common/fixtures/memory_management_fixture.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/mocks/mock_timestamp_container.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "shared/test/unit_test/page_fault_manager/mock_cpu_page_fault_manager.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "opencl/test/unit_test/program/program_from_binary.h" #include "opencl/test/unit_test/program/program_tests.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" #include using namespace NEO; using KernelTest = ::testing::Test; class KernelTests : public ProgramFromBinaryFixture { public: ~KernelTests() override = default; protected: void SetUp() override { ProgramFromBinaryFixture::SetUp("CopyBuffer_simd32", "CopyBuffer"); ASSERT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); retVal = pProgram->build( pProgram->getDevices(), nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); // create a kernel pKernel = Kernel::create( pProgram, pProgram->getKernelInfoForKernel(kernelName), *pClDevice, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, pKernel); } void TearDown() override { delete pKernel; pKernel = nullptr; knownSource.reset(); ProgramFromBinaryFixture::TearDown(); } MockKernel *pKernel = nullptr; cl_int retVal = CL_SUCCESS; }; TEST(KernelTest, WhenKernelIsCreatedThenCorrectMembersAreMemObjects) { EXPECT_TRUE(Kernel::isMemObj(Kernel::BUFFER_OBJ)); EXPECT_TRUE(Kernel::isMemObj(Kernel::IMAGE_OBJ)); EXPECT_TRUE(Kernel::isMemObj(Kernel::PIPE_OBJ)); EXPECT_FALSE(Kernel::isMemObj(Kernel::SAMPLER_OBJ)); EXPECT_FALSE(Kernel::isMemObj(Kernel::ACCELERATOR_OBJ)); EXPECT_FALSE(Kernel::isMemObj(Kernel::NONE_OBJ)); EXPECT_FALSE(Kernel::isMemObj(Kernel::SVM_ALLOC_OBJ)); } TEST_F(KernelTests, WhenKernelIsCreatedThenKernelHeapIsCorrect) { EXPECT_EQ(pKernel->getKernelInfo().heapInfo.pKernelHeap, pKernel->getKernelHeap()); EXPECT_EQ(pKernel->getKernelInfo().heapInfo.KernelHeapSize, pKernel->getKernelHeapSize()); } TEST_F(KernelTests, GivenInvalidParamNameWhenGettingInfoThenInvalidValueErrorIsReturned) { size_t paramValueSizeRet = 0; // get size retVal = pKernel->getInfo( 0, 0, nullptr, ¶mValueSizeRet); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(KernelTests, GivenInvalidParametersWhenGettingInfoThenValueSizeRetIsNotUpdated) { size_t paramValueSizeRet = 0x1234; // get size retVal = pKernel->getInfo( 0, 0, nullptr, ¶mValueSizeRet); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(0x1234u, paramValueSizeRet); } TEST_F(KernelTests, GivenKernelFunctionNameWhenGettingInfoThenKernelFunctionNameIsReturned) { cl_kernel_info paramName = CL_KERNEL_FUNCTION_NAME; size_t paramValueSize = 0; char *paramValue = nullptr; size_t paramValueSizeRet = 0; // get size retVal = pKernel->getInfo( paramName, paramValueSize, nullptr, ¶mValueSizeRet); EXPECT_NE(0u, paramValueSizeRet); ASSERT_EQ(CL_SUCCESS, retVal); // allocate space for name paramValue = new char[paramValueSizeRet]; // get the name paramValueSize = paramValueSizeRet; retVal = pKernel->getInfo( paramName, paramValueSize, paramValue, nullptr); EXPECT_NE(nullptr, paramValue); EXPECT_EQ(0, strcmp(paramValue, kernelName)); EXPECT_EQ(CL_SUCCESS, retVal); delete[] paramValue; } TEST_F(KernelTests, GivenKernelBinaryProgramIntelWhenGettingInfoThenKernelBinaryIsReturned) { cl_kernel_info paramName = CL_KERNEL_BINARY_PROGRAM_INTEL; size_t paramValueSize = 0; char *paramValue = nullptr; size_t paramValueSizeRet = 0; const char *pKernelData = reinterpret_cast(pKernel->getKernelHeap()); EXPECT_NE(nullptr, pKernelData); // get size of kernel binary retVal = pKernel->getInfo( paramName, paramValueSize, nullptr, ¶mValueSizeRet); EXPECT_NE(0u, paramValueSizeRet); ASSERT_EQ(CL_SUCCESS, retVal); // allocate space for kernel binary paramValue = new char[paramValueSizeRet]; // get kernel binary paramValueSize = paramValueSizeRet; retVal = pKernel->getInfo( paramName, paramValueSize, paramValue, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, paramValue); EXPECT_EQ(0, memcmp(paramValue, pKernelData, paramValueSize)); delete[] paramValue; } TEST_F(KernelTests, givenBinaryWhenItIsQueriedForGpuAddressThenAbsoluteAddressIsReturned) { cl_kernel_info paramName = CL_KERNEL_BINARY_GPU_ADDRESS_INTEL; uint64_t paramValue = 0llu; size_t paramValueSize = sizeof(paramValue); size_t paramValueSizeRet = 0; retVal = pKernel->getInfo( paramName, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); auto expectedGpuAddress = GmmHelper::decanonize(pKernel->getKernelInfo().kernelAllocation->getGpuAddress()); EXPECT_EQ(expectedGpuAddress, paramValue); EXPECT_EQ(paramValueSize, paramValueSizeRet); } TEST_F(KernelTests, GivenKernelNumArgsWhenGettingInfoThenNumberOfKernelArgsIsReturned) { cl_kernel_info paramName = CL_KERNEL_NUM_ARGS; size_t paramValueSize = sizeof(cl_uint); cl_uint paramValue = 0; size_t paramValueSizeRet = 0; // get size retVal = pKernel->getInfo( paramName, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(sizeof(cl_uint), paramValueSizeRet); EXPECT_EQ(2u, paramValue); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(KernelTests, GivenKernelProgramWhenGettingInfoThenProgramIsReturned) { cl_kernel_info paramName = CL_KERNEL_PROGRAM; size_t paramValueSize = sizeof(cl_program); cl_program paramValue = 0; size_t paramValueSizeRet = 0; cl_program prog = pProgram; // get size retVal = pKernel->getInfo( paramName, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_program), paramValueSizeRet); EXPECT_EQ(prog, paramValue); } TEST_F(KernelTests, GivenKernelContextWhenGettingInfoThenKernelContextIsReturned) { cl_kernel_info paramName = CL_KERNEL_CONTEXT; cl_context paramValue = 0; size_t paramValueSize = sizeof(paramValue); size_t paramValueSizeRet = 0; cl_context context = pContext; // get size retVal = pKernel->getInfo( paramName, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValueSize, paramValueSizeRet); EXPECT_EQ(context, paramValue); } TEST_F(KernelTests, GivenKernelWorkGroupSizeWhenGettingWorkGroupInfoThenWorkGroupSizeIsReturned) { cl_kernel_info paramName = CL_KERNEL_WORK_GROUP_SIZE; size_t paramValue = 0; size_t paramValueSize = sizeof(paramValue); size_t paramValueSizeRet = 0; auto kernelMaxWorkGroupSize = pDevice->getDeviceInfo().maxWorkGroupSize - 1; pKernel->maxKernelWorkGroupSize = static_cast(kernelMaxWorkGroupSize); retVal = pKernel->getWorkGroupInfo( paramName, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValueSize, paramValueSizeRet); EXPECT_EQ(kernelMaxWorkGroupSize, paramValue); } TEST_F(KernelTests, GivenKernelCompileWorkGroupSizeWhenGettingWorkGroupInfoThenCompileWorkGroupSizeIsReturned) { cl_kernel_info paramName = CL_KERNEL_COMPILE_WORK_GROUP_SIZE; size_t paramValue[3]; size_t paramValueSize = sizeof(paramValue); size_t paramValueSizeRet = 0; retVal = pKernel->getWorkGroupInfo( paramName, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValueSize, paramValueSizeRet); } TEST_F(KernelTests, GivenRequiredDisabledEUFusionFlagWhenGettingPrefferedWorkGroupSizeMultipleThenCorectValueIsReturned) { KernelInfo kernelInfo = {}; kernelInfo.kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion = true; MockKernel kernel(pProgram, kernelInfo, *pClDevice); auto &hwHelper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily); bool fusedDispatchEnabled = hwHelper.isFusedEuDispatchEnabled(*defaultHwInfo, true); auto expectedValue = kernelInfo.getMaxSimdSize() * (fusedDispatchEnabled ? 2 : 1); cl_kernel_info paramName = CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE; size_t paramValue; size_t paramValueSize = sizeof(paramValue); size_t paramValueSizeRet = 0; retVal = kernel.getWorkGroupInfo( paramName, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValueSize, paramValueSizeRet); EXPECT_EQ(expectedValue, paramValue); } TEST_F(KernelTests, GivenCFEFusedEUDispatchEnabledAndRequiredDisabledUEFusionWhenGettingPrefferedWorkGroupSizeMultipleThenCorectValueIsReturned) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.CFEFusedEUDispatch.set(0); KernelInfo kernelInfo = {}; kernelInfo.kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion = true; MockKernel kernel(pProgram, kernelInfo, *pClDevice); auto &hwHelper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily); bool fusedDispatchEnabled = hwHelper.isFusedEuDispatchEnabled(*defaultHwInfo, true); auto expectedValue = kernelInfo.getMaxSimdSize() * (fusedDispatchEnabled ? 2 : 1); cl_kernel_info paramName = CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE; size_t paramValue; size_t paramValueSize = sizeof(paramValue); size_t paramValueSizeRet = 0; retVal = kernel.getWorkGroupInfo( paramName, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValueSize, paramValueSizeRet); EXPECT_EQ(expectedValue, paramValue); } TEST_F(KernelTests, GivenInvalidParamNameWhenGettingWorkGroupInfoThenInvalidValueErrorIsReturned) { size_t paramValueSizeRet = 0x1234u; retVal = pKernel->getWorkGroupInfo( 0, 0, nullptr, ¶mValueSizeRet); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(0x1234u, paramValueSizeRet); } TEST_F(KernelTests, WhenIsSingleSubdevicePreferredIsCalledThenCorrectValuesAreReturned) { std::unique_ptr pKernel{MockKernel::create(pClDevice->getDevice(), pProgram)}; for (auto usesSyncBuffer : ::testing::Bool()) { pKernel->getAllocatedKernelInfo()->kernelDescriptor.kernelAttributes.flags.usesSyncBuffer = usesSyncBuffer; for (auto singleSubdevicePreferredInCurrentEnqueue : ::testing::Bool()) { pKernel->singleSubdevicePreferredInCurrentEnqueue = singleSubdevicePreferredInCurrentEnqueue; EXPECT_EQ(usesSyncBuffer, pKernel->usesSyncBuffer()); auto expectedSingleSubdevicePreferredInCurrentEnqueue = singleSubdevicePreferredInCurrentEnqueue || usesSyncBuffer; EXPECT_EQ(expectedSingleSubdevicePreferredInCurrentEnqueue, pKernel->isSingleSubdevicePreferred()); } } } class KernelFromBinaryTest : public ProgramSimpleFixture { public: void SetUp() override { ProgramSimpleFixture::SetUp(); } void TearDown() override { ProgramSimpleFixture::TearDown(); } }; typedef Test KernelFromBinaryTests; TEST_F(KernelFromBinaryTests, GivenKernelNumArgsWhenGettingInfoThenNumberOfKernelArgsIsReturned) { CreateProgramFromBinary(pContext, pContext->getDevices(), "kernel_num_args"); ASSERT_NE(nullptr, pProgram); retVal = pProgram->build( pProgram->getDevices(), nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); auto &kernelInfo = pProgram->getKernelInfoForKernel("test"); // create a kernel auto pKernel = Kernel::create( pProgram, kernelInfo, *pClDevice, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); cl_uint paramValue = 0; size_t paramValueSizeRet = 0; // get size retVal = pKernel->getInfo( CL_KERNEL_NUM_ARGS, sizeof(cl_uint), ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_uint), paramValueSizeRet); EXPECT_EQ(3u, paramValue); delete pKernel; } TEST_F(KernelFromBinaryTests, WhenRegularKernelIsCreatedThenItIsNotBuiltIn) { CreateProgramFromBinary(pContext, pContext->getDevices(), "simple_kernels"); ASSERT_NE(nullptr, pProgram); retVal = pProgram->build( pProgram->getDevices(), nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); auto &kernelInfo = pProgram->getKernelInfoForKernel("simple_kernel_0"); // create a kernel auto pKernel = Kernel::create( pProgram, kernelInfo, *pClDevice, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, pKernel); // get builtIn property bool isBuiltIn = pKernel->isBuiltIn; EXPECT_FALSE(isBuiltIn); delete pKernel; } TEST_F(KernelFromBinaryTests, givenArgumentDeclaredAsConstantWhenKernelIsCreatedThenArgumentIsMarkedAsReadOnly) { CreateProgramFromBinary(pContext, pContext->getDevices(), "simple_kernels"); ASSERT_NE(nullptr, pProgram); retVal = pProgram->build( pProgram->getDevices(), nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); auto pKernelInfo = pProgram->getKernelInfo("simple_kernel_6", rootDeviceIndex); EXPECT_TRUE(pKernelInfo->getArgDescriptorAt(1).isReadOnly()); pKernelInfo = pProgram->getKernelInfo("simple_kernel_1", rootDeviceIndex); EXPECT_TRUE(pKernelInfo->getArgDescriptorAt(0).isReadOnly()); } typedef Test KernelPrivateSurfaceTest; typedef Test KernelGlobalSurfaceTest; typedef Test KernelConstantSurfaceTest; class CommandStreamReceiverMock : public CommandStreamReceiver { typedef CommandStreamReceiver BaseClass; public: using CommandStreamReceiver::executionEnvironment; using BaseClass::CommandStreamReceiver; TagAllocatorBase *getTimestampPacketAllocator() override { return nullptr; } void flushTagUpdate() override{}; void flushNonKernelTask(GraphicsAllocation *eventAlloc, uint64_t immediateGpuAddress, uint64_t immediateData, PipeControlArgs &args, bool isWaitOnEvents, bool startOfDispatch, bool endOfDispatch) override{}; void updateTagFromWait() override{}; bool isUpdateTagFromWaitEnabled() override { return false; }; bool isMultiOsContextCapable() const override { return false; } MemoryCompressionState getMemoryCompressionState(bool auxTranslationRequired, const HardwareInfo &hwInfo) const override { return MemoryCompressionState::NotApplicable; } CommandStreamReceiverMock() : BaseClass(*(new ExecutionEnvironment), 0, 1) { this->mockExecutionEnvironment.reset(&this->executionEnvironment); executionEnvironment.prepareRootDeviceEnvironments(1); executionEnvironment.rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); executionEnvironment.initializeMemoryManager(); } void makeResident(GraphicsAllocation &graphicsAllocation) override { residency[graphicsAllocation.getUnderlyingBuffer()] = graphicsAllocation.getUnderlyingBufferSize(); if (passResidencyCallToBaseClass) { CommandStreamReceiver::makeResident(graphicsAllocation); } } void makeNonResident(GraphicsAllocation &graphicsAllocation) override { residency.erase(graphicsAllocation.getUnderlyingBuffer()); if (passResidencyCallToBaseClass) { CommandStreamReceiver::makeNonResident(graphicsAllocation); } } NEO::SubmissionStatus flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override { return NEO::SubmissionStatus::SUCCESS; } WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, QueueThrottle throttle) override { return WaitStatus::Ready; } uint32_t flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override { return taskCount; }; CompletionStamp flushTask( LinearStream &commandStream, size_t commandStreamStart, const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, uint32_t taskLevel, DispatchFlags &dispatchFlags, Device &device) override { CompletionStamp cs = {}; return cs; } bool flushBatchedSubmissions() override { return true; } CommandStreamReceiverType getType() override { return CommandStreamReceiverType::CSR_HW; } void programHardwareContext(LinearStream &cmdStream) override {} size_t getCmdsSizeForHardwareContext() const override { return 0; } void programComputeBarrierCommand(LinearStream &cmdStream) override { } size_t getCmdsSizeForComputeBarrierCommand() const override { return 0; } GraphicsAllocation *getClearColorAllocation() override { return nullptr; } bool createPreemptionAllocation() override { return createPreemptionAllocationReturn; } void postInitFlagsSetup() override {} std::map residency; std::unique_ptr mockExecutionEnvironment; bool passResidencyCallToBaseClass = true; bool createPreemptionAllocationReturn = true; }; TEST_F(KernelPrivateSurfaceTest, WhenChangingResidencyThenCsrResidencySizeIsUpdated) { ASSERT_NE(nullptr, pDevice); auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; pKernelInfo->setPrivateMemory(112, false, 8, 40, 64); pKernelInfo->setCrossThreadDataSize(64); // create kernel MockContext context; MockProgram program(&context, false, toClDeviceVector(*pClDevice)); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); // Test it auto executionEnvironment = pDevice->getExecutionEnvironment(); std::unique_ptr csr(new CommandStreamReceiverMock(*executionEnvironment, 0, 1)); csr->setupContext(*pDevice->getDefaultEngine().osContext); csr->residency.clear(); EXPECT_EQ(0u, csr->residency.size()); pKernel->makeResident(*csr.get()); EXPECT_EQ(1u, csr->residency.size()); csr->makeSurfacePackNonResident(csr->getResidencyAllocations()); EXPECT_EQ(0u, csr->residency.size()); delete pKernel; } TEST_F(KernelPrivateSurfaceTest, givenKernelWithPrivateSurfaceThatIsInUseByGpuWhenKernelIsBeingDestroyedThenAllocationIsAddedToDeferredFreeList) { auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; pKernelInfo->setPrivateMemory(112, false, 8, 40, 64); pKernelInfo->setCrossThreadDataSize(64); MockContext context; MockProgram program(&context, false, toClDeviceVector(*pClDevice)); std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); pKernel->initialize(); auto &csr = pDevice->getGpgpuCommandStreamReceiver(); auto privateSurface = pKernel->privateSurface; auto tagAddress = csr.getTagAddress(); privateSurface->updateTaskCount(*tagAddress + 1, csr.getOsContext().getContextId()); EXPECT_TRUE(csr.getTemporaryAllocations().peekIsEmpty()); pKernel.reset(nullptr); EXPECT_FALSE(csr.getTemporaryAllocations().peekIsEmpty()); EXPECT_EQ(csr.getTemporaryAllocations().peekHead(), privateSurface); } TEST_F(KernelPrivateSurfaceTest, WhenPrivateSurfaceAllocationFailsThenOutOfResourcesErrorIsReturned) { ASSERT_NE(nullptr, pDevice); auto pKernelInfo = std::make_unique(); pKernelInfo->setPrivateMemory(112, false, 8, 40, 64); pKernelInfo->setCrossThreadDataSize(64); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; // create kernel MockContext context; MockProgram program(&context, false, toClDeviceVector(*pClDevice)); MemoryManagementFixture::InjectedFunction method = [&](size_t failureIndex) { MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); if (MemoryManagement::nonfailingAllocation == failureIndex) { EXPECT_EQ(CL_SUCCESS, pKernel->initialize()); } else { EXPECT_EQ(CL_OUT_OF_RESOURCES, pKernel->initialize()); } delete pKernel; }; auto f = new MemoryManagementFixture(); f->SetUp(); f->injectFailures(method); f->TearDown(); delete f; } TEST_F(KernelPrivateSurfaceTest, given32BitDeviceWhenKernelIsCreatedThenPrivateSurfaceIs32BitAllocation) { if constexpr (is64bit) { pDevice->getMemoryManager()->setForce32BitAllocations(true); auto pKernelInfo = std::make_unique(); pKernelInfo->setPrivateMemory(112, false, 8, 40, 64); pKernelInfo->setCrossThreadDataSize(64); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; // create kernel MockContext context; MockProgram program(&context, false, toClDeviceVector(*pClDevice)); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_TRUE(pKernel->privateSurface->is32BitAllocation()); delete pKernel; } } HWTEST_F(KernelPrivateSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenPrivateMemorySurfaceStateIsPatchedWithCpuAddress) { // define kernel info auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; pKernelInfo->setPrivateMemory(16, false, 8, 0, 0); MockContext context; MockProgram program(&context, false, toClDeviceVector(*pClDevice)); // create kernel MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // setup surface state heap char surfaceStateHeap[0x80]; pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->heapInfo.SurfaceStateHeapSize = sizeof(surfaceStateHeap); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); auto bufferAddress = pKernel->privateSurface->getGpuAddress(); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.privateMemoryAddress.bindful)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(bufferAddress, surfaceAddress); delete pKernel; } TEST_F(KernelPrivateSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenPrivateMemorySurfaceStateIsNotPatched) { // define kernel info auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; pKernelInfo->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless; // setup global memory char buffer[16]; MockGraphicsAllocation gfxAlloc(buffer, sizeof(buffer)); MockContext context(pClDevice); MockProgram program(&context, false, toClDeviceVector(*pClDevice)); program.setConstantSurface(&gfxAlloc); // create kernel MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); EXPECT_EQ(nullptr, pKernel->getSurfaceStateHeap()); program.setConstantSurface(nullptr); delete pKernel; } TEST_F(KernelPrivateSurfaceTest, givenNullDataParameterStreamWhenGettingConstantBufferSizeThenZeroIsReturned) { auto pKernelInfo = std::make_unique(); EXPECT_EQ(0u, pKernelInfo->getConstantBufferSize()); } TEST_F(KernelPrivateSurfaceTest, givenNonNullDataParameterStreamWhenGettingConstantBufferSizeThenCorrectSizeIsReturned) { auto pKernelInfo = std::make_unique(); pKernelInfo->setCrossThreadDataSize(64); EXPECT_EQ(64u, pKernelInfo->getConstantBufferSize()); } TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenPrivateSurfaceTooBigAndGpuPointerSize4ThenReturnOutOfResources) { auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; pKernelInfo->setPrivateMemory(std::numeric_limits::max(), false, 0, 0, 0); MockContext context; MockProgram program(&context, false, toClDeviceVector(*pClDevice)); std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); pKernelInfo->kernelDescriptor.kernelAttributes.gpuPointerSize = 4; pDevice->getMemoryManager()->setForce32BitAllocations(false); if (pDevice->getDeviceInfo().computeUnitsUsedForScratch == 0) pDevice->deviceInfo.computeUnitsUsedForScratch = 120; EXPECT_EQ(CL_OUT_OF_RESOURCES, pKernel->initialize()); } TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenPrivateSurfaceTooBigAndGpuPointerSize4And32BitAllocationsThenReturnOutOfResources) { auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; pKernelInfo->setPrivateMemory(std::numeric_limits::max(), false, 0, 0, 0); MockContext context; MockProgram program(&context, false, toClDeviceVector(*pClDevice)); std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); pKernelInfo->kernelDescriptor.kernelAttributes.gpuPointerSize = 4; pDevice->getMemoryManager()->setForce32BitAllocations(true); if (pDevice->getDeviceInfo().computeUnitsUsedForScratch == 0) pDevice->deviceInfo.computeUnitsUsedForScratch = 120; EXPECT_EQ(CL_OUT_OF_RESOURCES, pKernel->initialize()); } TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenPrivateSurfaceTooBigAndGpuPointerSize8And32BitAllocationsThenReturnOutOfResources) { auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; pKernelInfo->setPrivateMemory(std::numeric_limits::max(), false, 0, 0, 0); MockContext context; MockProgram program(&context, false, toClDeviceVector(*pClDevice)); std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); pKernelInfo->kernelDescriptor.kernelAttributes.gpuPointerSize = 8; pDevice->getMemoryManager()->setForce32BitAllocations(true); if (pDevice->getDeviceInfo().computeUnitsUsedForScratch == 0) pDevice->deviceInfo.computeUnitsUsedForScratch = 120; EXPECT_EQ(CL_OUT_OF_RESOURCES, pKernel->initialize()); } TEST_F(KernelGlobalSurfaceTest, givenBuiltInKernelWhenKernelIsCreatedThenGlobalSurfaceIsPatchedWithCpuAddress) { auto pKernelInfo = std::make_unique(); pKernelInfo->setGlobalVariablesSurface(8, 0); pKernelInfo->setCrossThreadDataSize(16); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; char buffer[16]; GraphicsAllocation gfxAlloc(0, AllocationType::UNKNOWN, buffer, (uint64_t)buffer - 8u, 8, static_cast(1u), MemoryPool::MemoryNull); uint64_t bufferAddress = (uint64_t)gfxAlloc.getUnderlyingBuffer(); // create kernel MockContext context; MockProgram program(&context, false, toClDeviceVector(*pClDevice)); program.setGlobalSurface(&gfxAlloc); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); pKernel->isBuiltIn = true; ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_EQ(bufferAddress, *(uint64_t *)pKernel->getCrossThreadData()); program.setGlobalSurface(nullptr); delete pKernel; } TEST_F(KernelGlobalSurfaceTest, givenNDRangeKernelWhenKernelIsCreatedThenGlobalSurfaceIsPatchedWithBaseAddressOffset) { auto pKernelInfo = std::make_unique(); pKernelInfo->setGlobalVariablesSurface(8, 0); pKernelInfo->setCrossThreadDataSize(16); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; char buffer[16]; GraphicsAllocation gfxAlloc(0, AllocationType::UNKNOWN, buffer, (uint64_t)buffer - 8u, 8, MemoryPool::MemoryNull, 0u); uint64_t bufferAddress = gfxAlloc.getGpuAddress(); // create kernel MockProgram program(toClDeviceVector(*pClDevice)); program.setGlobalSurface(&gfxAlloc); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_EQ(bufferAddress, *(uint64_t *)pKernel->getCrossThreadData()); program.setGlobalSurface(nullptr); delete pKernel; } HWTEST_F(KernelGlobalSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenGlobalMemorySurfaceStateIsPatchedWithCpuAddress) { // define kernel info auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; // setup global memory pKernelInfo->setGlobalVariablesSurface(8, 0, 0); char buffer[16]; MockGraphicsAllocation gfxAlloc(buffer, sizeof(buffer)); auto bufferAddress = gfxAlloc.getGpuAddress(); MockContext context; MockProgram program(&context, false, toClDeviceVector(*pClDevice)); program.setGlobalSurface(&gfxAlloc); // create kernel MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // setup surface state heap char surfaceStateHeap[0x80]; pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->heapInfo.SurfaceStateHeapSize = sizeof(surfaceStateHeap); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.bindful)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(bufferAddress, surfaceAddress); program.setGlobalSurface(nullptr); delete pKernel; } TEST_F(KernelGlobalSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenGlobalMemorySurfaceStateIsNotPatched) { // define kernel info auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; pKernelInfo->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless; // setup global memory char buffer[16]; MockGraphicsAllocation gfxAlloc(buffer, sizeof(buffer)); MockProgram program(toClDeviceVector(*pClDevice)); program.setGlobalSurface(&gfxAlloc); // create kernel MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); EXPECT_EQ(nullptr, pKernel->getSurfaceStateHeap()); program.setGlobalSurface(nullptr); delete pKernel; } TEST_F(KernelConstantSurfaceTest, givenBuiltInKernelWhenKernelIsCreatedThenConstantSurfaceIsPatchedWithCpuAddress) { auto pKernelInfo = std::make_unique(); pKernelInfo->setGlobalConstantsSurface(8, 0); pKernelInfo->setCrossThreadDataSize(16); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; char buffer[16]; GraphicsAllocation gfxAlloc(0, AllocationType::UNKNOWN, buffer, (uint64_t)buffer - 8u, 8, static_cast(1u), MemoryPool::MemoryNull); uint64_t bufferAddress = (uint64_t)gfxAlloc.getUnderlyingBuffer(); // create kernel MockProgram program(toClDeviceVector(*pClDevice)); program.setConstantSurface(&gfxAlloc); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); pKernel->isBuiltIn = true; ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_EQ(bufferAddress, *(uint64_t *)pKernel->getCrossThreadData()); program.setConstantSurface(nullptr); delete pKernel; } TEST_F(KernelConstantSurfaceTest, givenNDRangeKernelWhenKernelIsCreatedThenConstantSurfaceIsPatchedWithBaseAddressOffset) { auto pKernelInfo = std::make_unique(); pKernelInfo->setGlobalConstantsSurface(8, 0); pKernelInfo->setCrossThreadDataSize(16); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; char buffer[16]; GraphicsAllocation gfxAlloc(0, AllocationType::UNKNOWN, buffer, (uint64_t)buffer - 8u, 8, MemoryPool::MemoryNull, 0u); uint64_t bufferAddress = gfxAlloc.getGpuAddress(); // create kernel MockProgram program(toClDeviceVector(*pClDevice)); program.setConstantSurface(&gfxAlloc); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_EQ(bufferAddress, *(uint64_t *)pKernel->getCrossThreadData()); program.setConstantSurface(nullptr); delete pKernel; } HWTEST_F(KernelConstantSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenConstantMemorySurfaceStateIsPatchedWithCpuAddress) { auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; // setup constant memory pKernelInfo->setGlobalConstantsSurface(8, 0, 0); char buffer[16]; MockGraphicsAllocation gfxAlloc(buffer, sizeof(buffer)); auto bufferAddress = gfxAlloc.getGpuAddress(); MockContext context; MockProgram program(&context, false, toClDeviceVector(*pClDevice)); program.setConstantSurface(&gfxAlloc); // create kernel MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // setup surface state heap char surfaceStateHeap[0x80]; pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->heapInfo.SurfaceStateHeapSize = sizeof(surfaceStateHeap); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.bindful)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(bufferAddress, surfaceAddress); program.setConstantSurface(nullptr); delete pKernel; } TEST_F(KernelConstantSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenConstantMemorySurfaceStateIsNotPatched) { // define kernel info auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; pKernelInfo->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless; // setup global memory char buffer[16]; MockGraphicsAllocation gfxAlloc(buffer, sizeof(buffer)); MockProgram program(toClDeviceVector(*pClDevice)); program.setConstantSurface(&gfxAlloc); // create kernel MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); EXPECT_EQ(nullptr, pKernel->getSurfaceStateHeap()); program.setConstantSurface(nullptr); delete pKernel; } typedef Test KernelResidencyTest; HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledThenKernelIsaIsMadeResident) { ASSERT_NE(nullptr, pDevice); char pCrossThreadData[64]; // define kernel info auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.storeMakeResidentAllocations = true; auto memoryManager = commandStreamReceiver.getMemoryManager(); pKernelInfo->kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); // setup kernel arg offsets pKernelInfo->addArgBuffer(0, 0x10); pKernelInfo->addArgBuffer(1, 0x20); pKernelInfo->addArgBuffer(2, 0x30); MockProgram program(toClDeviceVector(*pClDevice)); MockContext ctx; program.setContext(&ctx); std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData)); EXPECT_EQ(0u, commandStreamReceiver.makeResidentAllocations.size()); pKernel->makeResident(pDevice->getGpgpuCommandStreamReceiver()); EXPECT_EQ(1u, commandStreamReceiver.makeResidentAllocations.size()); EXPECT_TRUE(commandStreamReceiver.isMadeResident(pKernel->getKernelInfo().getGraphicsAllocation())); memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation); } HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledThenExportedFunctionsIsaAllocationIsMadeResident) { auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.storeMakeResidentAllocations = true; auto memoryManager = commandStreamReceiver.getMemoryManager(); pKernelInfo->kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); MockProgram program(toClDeviceVector(*pClDevice)); auto exportedFunctionsSurface = std::make_unique(); program.buildInfos[pDevice->getRootDeviceIndex()].exportedFunctionsSurface = exportedFunctionsSurface.get(); MockContext ctx; program.setContext(&ctx); std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_EQ(0u, commandStreamReceiver.makeResidentAllocations.size()); pKernel->makeResident(pDevice->getGpgpuCommandStreamReceiver()); EXPECT_TRUE(commandStreamReceiver.isMadeResident(program.buildInfos[pDevice->getRootDeviceIndex()].exportedFunctionsSurface)); // check getResidency as well std::vector residencySurfaces; pKernel->getResidency(residencySurfaces); std::unique_ptr mockCsrExecEnv = std::make_unique(); mockCsrExecEnv->prepareRootDeviceEnvironments(1); mockCsrExecEnv->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); mockCsrExecEnv->initializeMemoryManager(); { CommandStreamReceiverMock csrMock(*mockCsrExecEnv.get(), 0, 1); csrMock.passResidencyCallToBaseClass = false; for (const auto &s : residencySurfaces) { s->makeResident(csrMock); delete s; } EXPECT_EQ(1U, csrMock.residency.count(exportedFunctionsSurface->getUnderlyingBuffer())); } memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation); } HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledThenGlobalBufferIsMadeResident) { auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.storeMakeResidentAllocations = true; auto memoryManager = commandStreamReceiver.getMemoryManager(); pKernelInfo->kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); MockProgram program(toClDeviceVector(*pClDevice)); MockContext ctx; program.setContext(&ctx); program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation(); std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_EQ(0u, commandStreamReceiver.makeResidentAllocations.size()); pKernel->makeResident(pDevice->getGpgpuCommandStreamReceiver()); EXPECT_TRUE(commandStreamReceiver.isMadeResident(program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface)); std::vector residencySurfaces; pKernel->getResidency(residencySurfaces); std::unique_ptr mockCsrExecEnv = std::make_unique(); mockCsrExecEnv->prepareRootDeviceEnvironments(1); mockCsrExecEnv->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); mockCsrExecEnv->initializeMemoryManager(); { CommandStreamReceiverMock csrMock(*mockCsrExecEnv.get(), 0, 1); csrMock.passResidencyCallToBaseClass = false; for (const auto &s : residencySurfaces) { s->makeResident(csrMock); delete s; } EXPECT_EQ(1U, csrMock.residency.count(program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface->getUnderlyingBuffer())); } memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation); } HWTEST_F(KernelResidencyTest, givenKernelWhenItUsesIndirectUnifiedMemoryDeviceAllocationThenTheyAreMadeResident) { MockKernelWithInternals mockKernel(*this->pClDevice); auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver(); auto svmAllocationsManager = mockKernel.mockContext->getSVMAllocsManager(); auto properties = SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY, mockKernel.mockContext->getRootDeviceIndices(), mockKernel.mockContext->getDeviceBitfields()); properties.device = pDevice; auto unifiedMemoryAllocation = svmAllocationsManager->createUnifiedMemoryAllocation(4096u, properties); mockKernel.mockKernel->makeResident(this->pDevice->getGpgpuCommandStreamReceiver()); EXPECT_EQ(0u, commandStreamReceiver.getResidencyAllocations().size()); mockKernel.mockKernel->setUnifiedMemoryProperty(CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, true); mockKernel.mockKernel->makeResident(this->pDevice->getGpgpuCommandStreamReceiver()); EXPECT_EQ(1u, commandStreamReceiver.getResidencyAllocations().size()); EXPECT_EQ(commandStreamReceiver.getResidencyAllocations()[0]->getGpuAddress(), castToUint64(unifiedMemoryAllocation)); mockKernel.mockKernel->setUnifiedMemoryProperty(CL_KERNEL_EXEC_INFO_SVM_PTRS, true); svmAllocationsManager->freeSVMAlloc(unifiedMemoryAllocation); } HWTEST_F(KernelResidencyTest, givenKernelUsingIndirectHostMemoryWhenMakeResidentIsCalledThenOnlyHostAllocationsAreMadeResident) { MockKernelWithInternals mockKernel(*this->pClDevice); auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver(); auto svmAllocationsManager = mockKernel.mockContext->getSVMAllocsManager(); auto deviceProperties = SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY, mockKernel.mockContext->getRootDeviceIndices(), mockKernel.mockContext->getDeviceBitfields()); deviceProperties.device = pDevice; auto hostProperties = SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY, mockKernel.mockContext->getRootDeviceIndices(), mockKernel.mockContext->getDeviceBitfields()); auto unifiedDeviceMemoryAllocation = svmAllocationsManager->createUnifiedMemoryAllocation(4096u, deviceProperties); auto unifiedHostMemoryAllocation = svmAllocationsManager->createUnifiedMemoryAllocation(4096u, hostProperties); mockKernel.mockKernel->makeResident(this->pDevice->getGpgpuCommandStreamReceiver()); EXPECT_EQ(0u, commandStreamReceiver.getResidencyAllocations().size()); mockKernel.mockKernel->setUnifiedMemoryProperty(CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, true); mockKernel.mockKernel->makeResident(this->pDevice->getGpgpuCommandStreamReceiver()); EXPECT_EQ(1u, commandStreamReceiver.getResidencyAllocations().size()); EXPECT_EQ(commandStreamReceiver.getResidencyAllocations()[0]->getGpuAddress(), castToUint64(unifiedHostMemoryAllocation)); svmAllocationsManager->freeSVMAlloc(unifiedDeviceMemoryAllocation); svmAllocationsManager->freeSVMAlloc(unifiedHostMemoryAllocation); } HWTEST_F(KernelResidencyTest, givenKernelUsingIndirectSharedMemoryWhenMakeResidentIsCalledThenOnlySharedAllocationsAreMadeResident) { MockKernelWithInternals mockKernel(*this->pClDevice); auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver(); auto svmAllocationsManager = mockKernel.mockContext->getSVMAllocsManager(); auto sharedProperties = SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, mockKernel.mockContext->getRootDeviceIndices(), mockKernel.mockContext->getDeviceBitfields()); auto hostProperties = SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY, mockKernel.mockContext->getRootDeviceIndices(), mockKernel.mockContext->getDeviceBitfields()); auto unifiedSharedMemoryAllocation = svmAllocationsManager->createSharedUnifiedMemoryAllocation(4096u, sharedProperties, mockKernel.mockContext->getSpecialQueue(pDevice->getRootDeviceIndex())); auto unifiedHostMemoryAllocation = svmAllocationsManager->createUnifiedMemoryAllocation(4096u, hostProperties); mockKernel.mockKernel->makeResident(this->pDevice->getGpgpuCommandStreamReceiver()); EXPECT_EQ(0u, commandStreamReceiver.getResidencyAllocations().size()); mockKernel.mockKernel->setUnifiedMemoryProperty(CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL, true); mockKernel.mockKernel->makeResident(this->pDevice->getGpgpuCommandStreamReceiver()); EXPECT_EQ(1u, commandStreamReceiver.getResidencyAllocations().size()); EXPECT_EQ(commandStreamReceiver.getResidencyAllocations()[0]->getGpuAddress(), castToUint64(unifiedSharedMemoryAllocation)); svmAllocationsManager->freeSVMAlloc(unifiedSharedMemoryAllocation); svmAllocationsManager->freeSVMAlloc(unifiedHostMemoryAllocation); } HWTEST_F(KernelResidencyTest, givenDeviceUnifiedMemoryAndPageFaultManagerWhenMakeResidentIsCalledThenAllocationIsNotDecommited) { auto mockPageFaultManager = new MockPageFaultManager(); static_cast(this->pDevice->getExecutionEnvironment()->memoryManager.get())->pageFaultManager.reset(mockPageFaultManager); MockKernelWithInternals mockKernel(*this->pClDevice); auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver(); auto svmAllocationsManager = mockKernel.mockContext->getSVMAllocsManager(); auto deviceProperties = SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY, mockKernel.mockContext->getRootDeviceIndices(), mockKernel.mockContext->getDeviceBitfields()); deviceProperties.device = pDevice; auto unifiedMemoryAllocation = svmAllocationsManager->createUnifiedMemoryAllocation(4096u, deviceProperties); auto unifiedMemoryGraphicsAllocation = svmAllocationsManager->getSVMAlloc(unifiedMemoryAllocation); EXPECT_EQ(0u, mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations.size()); mockKernel.mockKernel->setUnifiedMemoryExecInfo(unifiedMemoryGraphicsAllocation->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex())); EXPECT_EQ(1u, mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations.size()); mockKernel.mockKernel->makeResident(commandStreamReceiver); EXPECT_EQ(mockPageFaultManager->allowMemoryAccessCalled, 0); EXPECT_EQ(mockPageFaultManager->protectMemoryCalled, 0); EXPECT_EQ(mockPageFaultManager->transferToCpuCalled, 0); EXPECT_EQ(mockPageFaultManager->transferToGpuCalled, 0); mockKernel.mockKernel->clearUnifiedMemoryExecInfo(); EXPECT_EQ(0u, mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations.size()); svmAllocationsManager->freeSVMAlloc(unifiedMemoryAllocation); } HWTEST_F(KernelResidencyTest, givenSharedUnifiedMemoryAndPageFaultManagerWhenMakeResidentIsCalledThenAllocationIsDecommited) { auto mockPageFaultManager = new MockPageFaultManager(); static_cast(this->pDevice->getExecutionEnvironment()->memoryManager.get())->pageFaultManager.reset(mockPageFaultManager); MockKernelWithInternals mockKernel(*this->pClDevice); auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver(); auto svmAllocationsManager = mockKernel.mockContext->getSVMAllocsManager(); auto sharedProperties = SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, mockKernel.mockContext->getRootDeviceIndices(), mockKernel.mockContext->getDeviceBitfields()); auto unifiedMemoryAllocation = svmAllocationsManager->createSharedUnifiedMemoryAllocation(4096u, sharedProperties, mockKernel.mockContext->getSpecialQueue(pDevice->getRootDeviceIndex())); auto unifiedMemoryGraphicsAllocation = svmAllocationsManager->getSVMAlloc(unifiedMemoryAllocation); mockPageFaultManager->insertAllocation(unifiedMemoryAllocation, 4096u, svmAllocationsManager, mockKernel.mockContext->getSpecialQueue(pDevice->getRootDeviceIndex()), {}); EXPECT_EQ(mockPageFaultManager->transferToCpuCalled, 0); EXPECT_EQ(0u, mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations.size()); mockKernel.mockKernel->setUnifiedMemoryExecInfo(unifiedMemoryGraphicsAllocation->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex())); EXPECT_EQ(1u, mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations.size()); mockKernel.mockKernel->makeResident(commandStreamReceiver); EXPECT_EQ(mockPageFaultManager->allowMemoryAccessCalled, 0); EXPECT_EQ(mockPageFaultManager->protectMemoryCalled, 1); EXPECT_EQ(mockPageFaultManager->transferToCpuCalled, 0); EXPECT_EQ(mockPageFaultManager->transferToGpuCalled, 1); EXPECT_EQ(mockPageFaultManager->protectedMemoryAccessAddress, unifiedMemoryAllocation); EXPECT_EQ(mockPageFaultManager->protectedSize, 4096u); EXPECT_EQ(mockPageFaultManager->transferToGpuAddress, unifiedMemoryAllocation); mockKernel.mockKernel->clearUnifiedMemoryExecInfo(); EXPECT_EQ(0u, mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations.size()); svmAllocationsManager->freeSVMAlloc(unifiedMemoryAllocation); } HWTEST_F(KernelResidencyTest, givenSharedUnifiedMemoryAndNotRequiredMemSyncWhenMakeResidentIsCalledThenAllocationIsNotDecommited) { auto mockPageFaultManager = new MockPageFaultManager(); static_cast(this->pDevice->getExecutionEnvironment()->memoryManager.get())->pageFaultManager.reset(mockPageFaultManager); MockKernelWithInternals mockKernel(*this->pClDevice, nullptr, true); auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver(); auto svmAllocationsManager = mockKernel.mockContext->getSVMAllocsManager(); auto sharedProperties = SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, mockKernel.mockContext->getRootDeviceIndices(), mockKernel.mockContext->getDeviceBitfields()); auto unifiedMemoryAllocation = svmAllocationsManager->createSharedUnifiedMemoryAllocation(4096u, sharedProperties, mockKernel.mockContext->getSpecialQueue(pDevice->getRootDeviceIndex())); auto unifiedMemoryGraphicsAllocation = svmAllocationsManager->getSVMAlloc(unifiedMemoryAllocation); mockPageFaultManager->insertAllocation(unifiedMemoryAllocation, 4096u, svmAllocationsManager, mockKernel.mockContext->getSpecialQueue(pDevice->getRootDeviceIndex()), {}); EXPECT_EQ(mockPageFaultManager->transferToCpuCalled, 0); auto gpuAllocation = unifiedMemoryGraphicsAllocation->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex()); mockKernel.mockKernel->kernelArguments[0] = {Kernel::kernelArgType::SVM_ALLOC_OBJ, gpuAllocation, unifiedMemoryAllocation, 4096u, gpuAllocation, sizeof(uintptr_t)}; mockKernel.mockKernel->setUnifiedMemorySyncRequirement(false); mockKernel.mockKernel->makeResident(commandStreamReceiver); EXPECT_EQ(mockPageFaultManager->allowMemoryAccessCalled, 0); EXPECT_EQ(mockPageFaultManager->protectMemoryCalled, 0); EXPECT_EQ(mockPageFaultManager->transferToCpuCalled, 0); EXPECT_EQ(mockPageFaultManager->transferToGpuCalled, 0); EXPECT_EQ(0u, mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations.size()); svmAllocationsManager->freeSVMAlloc(unifiedMemoryAllocation); } HWTEST_F(KernelResidencyTest, givenSharedUnifiedMemoryRequiredMemSyncWhenMakeResidentIsCalledThenAllocationIsDecommited) { auto mockPageFaultManager = new MockPageFaultManager(); static_cast(this->pDevice->getExecutionEnvironment()->memoryManager.get())->pageFaultManager.reset(mockPageFaultManager); MockKernelWithInternals mockKernel(*this->pClDevice, nullptr, true); auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver(); auto svmAllocationsManager = mockKernel.mockContext->getSVMAllocsManager(); auto sharedProperties = SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, mockKernel.mockContext->getRootDeviceIndices(), mockKernel.mockContext->getDeviceBitfields()); auto unifiedMemoryAllocation = svmAllocationsManager->createSharedUnifiedMemoryAllocation(4096u, sharedProperties, mockKernel.mockContext->getSpecialQueue(pDevice->getRootDeviceIndex())); auto unifiedMemoryGraphicsAllocation = svmAllocationsManager->getSVMAlloc(unifiedMemoryAllocation); mockPageFaultManager->insertAllocation(unifiedMemoryAllocation, 4096u, svmAllocationsManager, mockKernel.mockContext->getSpecialQueue(pDevice->getRootDeviceIndex()), {}); auto gpuAllocation = unifiedMemoryGraphicsAllocation->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex()); EXPECT_EQ(mockPageFaultManager->transferToCpuCalled, 0); mockKernel.mockKernel->kernelArguments[0] = {Kernel::kernelArgType::SVM_ALLOC_OBJ, gpuAllocation, unifiedMemoryAllocation, 4096u, gpuAllocation, sizeof(uintptr_t)}; mockKernel.mockKernel->setUnifiedMemorySyncRequirement(true); mockKernel.mockKernel->makeResident(commandStreamReceiver); EXPECT_EQ(mockPageFaultManager->allowMemoryAccessCalled, 0); EXPECT_EQ(mockPageFaultManager->protectMemoryCalled, 1); EXPECT_EQ(mockPageFaultManager->transferToCpuCalled, 0); EXPECT_EQ(mockPageFaultManager->transferToGpuCalled, 1); EXPECT_EQ(0u, mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations.size()); svmAllocationsManager->freeSVMAlloc(unifiedMemoryAllocation); } HWTEST_F(KernelResidencyTest, givenSharedUnifiedMemoryAllocPageFaultManagerAndIndirectAllocsAllowedWhenMakeResidentIsCalledThenAllocationIsDecommited) { auto mockPageFaultManager = new MockPageFaultManager(); static_cast(this->pDevice->getExecutionEnvironment()->memoryManager.get())->pageFaultManager.reset(mockPageFaultManager); MockKernelWithInternals mockKernel(*this->pClDevice); auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver(); auto svmAllocationsManager = mockKernel.mockContext->getSVMAllocsManager(); auto sharedProperties = SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, mockKernel.mockContext->getRootDeviceIndices(), mockKernel.mockContext->getDeviceBitfields()); auto unifiedMemoryAllocation = svmAllocationsManager->createSharedUnifiedMemoryAllocation(4096u, sharedProperties, mockKernel.mockContext->getSpecialQueue(pDevice->getRootDeviceIndex())); mockPageFaultManager->insertAllocation(unifiedMemoryAllocation, 4096u, svmAllocationsManager, mockKernel.mockContext->getSpecialQueue(pDevice->getRootDeviceIndex()), {}); EXPECT_EQ(mockPageFaultManager->transferToCpuCalled, 0); mockKernel.mockKernel->unifiedMemoryControls.indirectSharedAllocationsAllowed = true; mockKernel.mockKernel->makeResident(commandStreamReceiver); EXPECT_EQ(mockPageFaultManager->allowMemoryAccessCalled, 0); EXPECT_EQ(mockPageFaultManager->protectMemoryCalled, 1); EXPECT_EQ(mockPageFaultManager->transferToCpuCalled, 0); EXPECT_EQ(mockPageFaultManager->transferToGpuCalled, 1); EXPECT_EQ(mockPageFaultManager->protectedMemoryAccessAddress, unifiedMemoryAllocation); EXPECT_EQ(mockPageFaultManager->protectedSize, 4096u); EXPECT_EQ(mockPageFaultManager->transferToGpuAddress, unifiedMemoryAllocation); mockKernel.mockKernel->clearUnifiedMemoryExecInfo(); EXPECT_EQ(0u, mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations.size()); svmAllocationsManager->freeSVMAlloc(unifiedMemoryAllocation); } HWTEST_F(KernelResidencyTest, givenKernelWhenSetKernelExecInfoWithUnifiedMemoryIsCalledThenAllocationIsStoredWithinKernel) { MockKernelWithInternals mockKernel(*this->pClDevice); auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver(); auto svmAllocationsManager = mockKernel.mockContext->getSVMAllocsManager(); auto deviceProperties = SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY, mockKernel.mockContext->getRootDeviceIndices(), mockKernel.mockContext->getDeviceBitfields()); deviceProperties.device = pDevice; auto unifiedMemoryAllocation = svmAllocationsManager->createUnifiedMemoryAllocation(4096u, deviceProperties); auto unifiedMemoryGraphicsAllocation = svmAllocationsManager->getSVMAlloc(unifiedMemoryAllocation); EXPECT_EQ(0u, mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations.size()); mockKernel.mockKernel->setUnifiedMemoryExecInfo(unifiedMemoryGraphicsAllocation->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex())); EXPECT_EQ(1u, mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations.size()); EXPECT_EQ(mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations[0]->getGpuAddress(), castToUint64(unifiedMemoryAllocation)); mockKernel.mockKernel->makeResident(this->pDevice->getGpgpuCommandStreamReceiver()); EXPECT_EQ(1u, commandStreamReceiver.getResidencyAllocations().size()); EXPECT_EQ(commandStreamReceiver.getResidencyAllocations()[0]->getGpuAddress(), castToUint64(unifiedMemoryAllocation)); mockKernel.mockKernel->clearUnifiedMemoryExecInfo(); EXPECT_EQ(0u, mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations.size()); svmAllocationsManager->freeSVMAlloc(unifiedMemoryAllocation); } HWTEST_F(KernelResidencyTest, givenKernelWhenclSetKernelExecInfoWithUnifiedMemoryIsCalledThenAllocationIsStoredWithinKernel) { REQUIRE_SVM_OR_SKIP(pClDevice); MockKernelWithInternals mockKernel(*this->pClDevice); auto svmAllocationsManager = mockKernel.mockContext->getSVMAllocsManager(); auto deviceProperties = SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY, mockKernel.mockContext->getRootDeviceIndices(), mockKernel.mockContext->getDeviceBitfields()); deviceProperties.device = pDevice; auto unifiedMemoryAllocation = svmAllocationsManager->createUnifiedMemoryAllocation(4096u, deviceProperties); auto unifiedMemoryAllocation2 = svmAllocationsManager->createUnifiedMemoryAllocation(4096u, deviceProperties); auto status = clSetKernelExecInfo(mockKernel.mockMultiDeviceKernel, CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL, sizeof(unifiedMemoryAllocation), &unifiedMemoryAllocation); EXPECT_EQ(CL_SUCCESS, status); EXPECT_EQ(1u, mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations.size()); EXPECT_EQ(mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations[0]->getGpuAddress(), castToUint64(unifiedMemoryAllocation)); status = clSetKernelExecInfo(mockKernel.mockMultiDeviceKernel, CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL, sizeof(unifiedMemoryAllocation), &unifiedMemoryAllocation2); EXPECT_EQ(CL_SUCCESS, status); EXPECT_EQ(1u, mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations.size()); EXPECT_EQ(mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations[0]->getGpuAddress(), castToUint64(unifiedMemoryAllocation2)); svmAllocationsManager->freeSVMAlloc(unifiedMemoryAllocation); svmAllocationsManager->freeSVMAlloc(unifiedMemoryAllocation2); } HWTEST_F(KernelResidencyTest, givenKernelWhenclSetKernelExecInfoWithUnifiedMemoryDevicePropertyAndDisableIndirectAccessNotSetThenKernelControlIsChanged) { DebugManagerStateRestore restorer; DebugManager.flags.DisableIndirectAccess.set(0); REQUIRE_SVM_OR_SKIP(pClDevice); MockKernelWithInternals mockKernel(*this->pClDevice); cl_bool enableIndirectDeviceAccess = CL_TRUE; auto status = clSetKernelExecInfo(mockKernel.mockMultiDeviceKernel, CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, sizeof(cl_bool), &enableIndirectDeviceAccess); EXPECT_EQ(CL_SUCCESS, status); EXPECT_TRUE(mockKernel.mockKernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed); enableIndirectDeviceAccess = CL_FALSE; status = clSetKernelExecInfo(mockKernel.mockMultiDeviceKernel, CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, sizeof(cl_bool), &enableIndirectDeviceAccess); EXPECT_EQ(CL_SUCCESS, status); EXPECT_FALSE(mockKernel.mockKernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed); } HWTEST_F(KernelResidencyTest, givenKernelWhenclSetKernelExecInfoWithUnifiedMemoryDevicePropertyAndDisableIndirectAccessSetThenKernelControlIsNotSet) { DebugManagerStateRestore restorer; DebugManager.flags.DisableIndirectAccess.set(1); REQUIRE_SVM_OR_SKIP(pClDevice); MockKernelWithInternals mockKernel(*this->pClDevice); cl_bool enableIndirectDeviceAccess = CL_TRUE; auto status = clSetKernelExecInfo(mockKernel.mockMultiDeviceKernel, CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, sizeof(cl_bool), &enableIndirectDeviceAccess); EXPECT_EQ(CL_SUCCESS, status); EXPECT_FALSE(mockKernel.mockKernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed); } HWTEST_F(KernelResidencyTest, givenKernelWhenclSetKernelExecInfoWithUnifiedMemoryDevicePropertyAndDisableIndirectAccessNotSetAndNoIndirectAccessInKernelThenKernelControlIsNotSet) { DebugManagerStateRestore restorer; DebugManager.flags.DisableIndirectAccess.set(0); REQUIRE_SVM_OR_SKIP(pClDevice); MockKernelWithInternals mockKernel(*this->pClDevice); mockKernel.mockKernel->kernelHasIndirectAccess = false; cl_bool enableIndirectDeviceAccess = CL_TRUE; auto status = clSetKernelExecInfo(mockKernel.mockMultiDeviceKernel, CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, sizeof(cl_bool), &enableIndirectDeviceAccess); EXPECT_EQ(CL_SUCCESS, status); EXPECT_FALSE(mockKernel.mockKernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed); } HWTEST_F(KernelResidencyTest, givenKernelWhenclSetKernelExecInfoWithUnifiedMemoryDevicePropertyIsCalledThenKernelControlIsChanged) { REQUIRE_SVM_OR_SKIP(pClDevice); MockKernelWithInternals mockKernel(*this->pClDevice); cl_bool enableIndirectDeviceAccess = CL_TRUE; auto status = clSetKernelExecInfo(mockKernel.mockMultiDeviceKernel, CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, sizeof(cl_bool), &enableIndirectDeviceAccess); EXPECT_EQ(CL_SUCCESS, status); EXPECT_TRUE(mockKernel.mockKernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed); enableIndirectDeviceAccess = CL_FALSE; status = clSetKernelExecInfo(mockKernel.mockMultiDeviceKernel, CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, sizeof(cl_bool), &enableIndirectDeviceAccess); EXPECT_EQ(CL_SUCCESS, status); EXPECT_FALSE(mockKernel.mockKernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed); } HWTEST_F(KernelResidencyTest, givenKernelWhenclSetKernelExecInfoWithUnifiedMemoryHostPropertyAndDisableIndirectAccessNotSetThenKernelControlIsChanged) { DebugManagerStateRestore restorer; DebugManager.flags.DisableIndirectAccess.set(0); REQUIRE_SVM_OR_SKIP(pClDevice); MockKernelWithInternals mockKernel(*this->pClDevice); cl_bool enableIndirectHostAccess = CL_TRUE; auto status = clSetKernelExecInfo(mockKernel.mockMultiDeviceKernel, CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, sizeof(cl_bool), &enableIndirectHostAccess); EXPECT_EQ(CL_SUCCESS, status); EXPECT_TRUE(mockKernel.mockKernel->unifiedMemoryControls.indirectHostAllocationsAllowed); enableIndirectHostAccess = CL_FALSE; status = clSetKernelExecInfo(mockKernel.mockMultiDeviceKernel, CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, sizeof(cl_bool), &enableIndirectHostAccess); EXPECT_EQ(CL_SUCCESS, status); EXPECT_FALSE(mockKernel.mockKernel->unifiedMemoryControls.indirectHostAllocationsAllowed); } HWTEST_F(KernelResidencyTest, givenKernelWhenclSetKernelExecInfoWithUnifiedMemoryHostPropertyAndDisableIndirectAccessSetThenKernelControlIsNotSet) { DebugManagerStateRestore restorer; DebugManager.flags.DisableIndirectAccess.set(1); REQUIRE_SVM_OR_SKIP(pClDevice); MockKernelWithInternals mockKernel(*this->pClDevice); cl_bool enableIndirectHostAccess = CL_TRUE; auto status = clSetKernelExecInfo(mockKernel.mockMultiDeviceKernel, CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, sizeof(cl_bool), &enableIndirectHostAccess); EXPECT_EQ(CL_SUCCESS, status); EXPECT_FALSE(mockKernel.mockKernel->unifiedMemoryControls.indirectHostAllocationsAllowed); } HWTEST_F(KernelResidencyTest, givenKernelWhenclSetKernelExecInfoWithUnifiedMemoryHostPropertyAndDisableIndirectAccessNotSetAndNoIndirectAccessInKernelThenKernelControlIsNotSet) { DebugManagerStateRestore restorer; DebugManager.flags.DisableIndirectAccess.set(0); REQUIRE_SVM_OR_SKIP(pClDevice); MockKernelWithInternals mockKernel(*this->pClDevice); mockKernel.mockKernel->kernelHasIndirectAccess = false; cl_bool enableIndirectHostAccess = CL_TRUE; auto status = clSetKernelExecInfo(mockKernel.mockMultiDeviceKernel, CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, sizeof(cl_bool), &enableIndirectHostAccess); EXPECT_EQ(CL_SUCCESS, status); EXPECT_FALSE(mockKernel.mockKernel->unifiedMemoryControls.indirectHostAllocationsAllowed); } HWTEST_F(KernelResidencyTest, givenKernelWhenclSetKernelExecInfoWithUnifiedMemoryHostPropertyIsCalledThenKernelControlIsChanged) { REQUIRE_SVM_OR_SKIP(pClDevice); MockKernelWithInternals mockKernel(*this->pClDevice); cl_bool enableIndirectHostAccess = CL_TRUE; auto status = clSetKernelExecInfo(mockKernel.mockMultiDeviceKernel, CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, sizeof(cl_bool), &enableIndirectHostAccess); EXPECT_EQ(CL_SUCCESS, status); EXPECT_TRUE(mockKernel.mockKernel->unifiedMemoryControls.indirectHostAllocationsAllowed); enableIndirectHostAccess = CL_FALSE; status = clSetKernelExecInfo(mockKernel.mockMultiDeviceKernel, CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, sizeof(cl_bool), &enableIndirectHostAccess); EXPECT_EQ(CL_SUCCESS, status); EXPECT_FALSE(mockKernel.mockKernel->unifiedMemoryControls.indirectHostAllocationsAllowed); } HWTEST_F(KernelResidencyTest, givenKernelWhenclSetKernelExecInfoWithUnifiedMemorySharedPropertyAndDisableIndirectAccessNotSetThenKernelControlIsChanged) { DebugManagerStateRestore restorer; DebugManager.flags.DisableIndirectAccess.set(0); REQUIRE_SVM_OR_SKIP(pClDevice); MockKernelWithInternals mockKernel(*this->pClDevice); cl_bool enableIndirectSharedAccess = CL_TRUE; auto status = clSetKernelExecInfo(mockKernel.mockMultiDeviceKernel, CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL, sizeof(cl_bool), &enableIndirectSharedAccess); EXPECT_EQ(CL_SUCCESS, status); EXPECT_TRUE(mockKernel.mockKernel->unifiedMemoryControls.indirectSharedAllocationsAllowed); enableIndirectSharedAccess = CL_FALSE; status = clSetKernelExecInfo(mockKernel.mockMultiDeviceKernel, CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL, sizeof(cl_bool), &enableIndirectSharedAccess); EXPECT_EQ(CL_SUCCESS, status); EXPECT_FALSE(mockKernel.mockKernel->unifiedMemoryControls.indirectSharedAllocationsAllowed); } HWTEST_F(KernelResidencyTest, givenKernelWhenclSetKernelExecInfoWithUnifiedMemorySharedPropertyAndDisableIndirectAccessSetThenKernelControlIsNotSet) { DebugManagerStateRestore restorer; DebugManager.flags.DisableIndirectAccess.set(1); REQUIRE_SVM_OR_SKIP(pClDevice); MockKernelWithInternals mockKernel(*this->pClDevice); cl_bool enableIndirectSharedAccess = CL_TRUE; auto status = clSetKernelExecInfo(mockKernel.mockMultiDeviceKernel, CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL, sizeof(cl_bool), &enableIndirectSharedAccess); EXPECT_EQ(CL_SUCCESS, status); EXPECT_FALSE(mockKernel.mockKernel->unifiedMemoryControls.indirectSharedAllocationsAllowed); } HWTEST_F(KernelResidencyTest, givenKernelWhenclSetKernelExecInfoWithUnifiedMemorySharedPropertyAndDisableIndirectAccessNotSetAndNoIndirectAccessInKernelThenKernelControlIsNotSet) { DebugManagerStateRestore restorer; DebugManager.flags.DisableIndirectAccess.set(0); REQUIRE_SVM_OR_SKIP(pClDevice); MockKernelWithInternals mockKernel(*this->pClDevice); mockKernel.mockKernel->kernelHasIndirectAccess = false; cl_bool enableIndirectSharedAccess = CL_TRUE; auto status = clSetKernelExecInfo(mockKernel.mockMultiDeviceKernel, CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL, sizeof(cl_bool), &enableIndirectSharedAccess); EXPECT_EQ(CL_SUCCESS, status); EXPECT_FALSE(mockKernel.mockKernel->unifiedMemoryControls.indirectSharedAllocationsAllowed); } HWTEST_F(KernelResidencyTest, givenKernelWhenclSetKernelExecInfoWithUnifiedMemorySharedPropertyIsCalledThenKernelControlIsChanged) { REQUIRE_SVM_OR_SKIP(pClDevice); MockKernelWithInternals mockKernel(*this->pClDevice); cl_bool enableIndirectSharedAccess = CL_TRUE; auto status = clSetKernelExecInfo(mockKernel.mockMultiDeviceKernel, CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL, sizeof(cl_bool), &enableIndirectSharedAccess); EXPECT_EQ(CL_SUCCESS, status); EXPECT_TRUE(mockKernel.mockKernel->unifiedMemoryControls.indirectSharedAllocationsAllowed); enableIndirectSharedAccess = CL_FALSE; status = clSetKernelExecInfo(mockKernel.mockMultiDeviceKernel, CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL, sizeof(cl_bool), &enableIndirectSharedAccess); EXPECT_EQ(CL_SUCCESS, status); EXPECT_FALSE(mockKernel.mockKernel->unifiedMemoryControls.indirectSharedAllocationsAllowed); } HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadNorKernelArgStoreNorKernelArgAtomicThenKernelHasIndirectAccessIsSetToFalse) { auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgLoad = false; pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgStore = false; pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic = false; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.storeMakeResidentAllocations = true; auto memoryManager = commandStreamReceiver.getMemoryManager(); pKernelInfo->kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); MockProgram program(toClDeviceVector(*pClDevice)); MockContext ctx; program.setContext(&ctx); program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation(); std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_FALSE(pKernel->getHasIndirectAccess()); memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation); } HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgLoadThenKernelHasIndirectAccessIsSetToTrue) { auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgLoad = true; pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgStore = false; pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic = false; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.storeMakeResidentAllocations = true; auto memoryManager = commandStreamReceiver.getMemoryManager(); pKernelInfo->kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); MockProgram program(toClDeviceVector(*pClDevice)); MockContext ctx; program.setContext(&ctx); program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation(); std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_TRUE(pKernel->getHasIndirectAccess()); memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation); } HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgStoreThenKernelHasIndirectAccessIsSetToTrue) { auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgLoad = false; pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgStore = true; pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic = false; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.storeMakeResidentAllocations = true; auto memoryManager = commandStreamReceiver.getMemoryManager(); pKernelInfo->kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); MockProgram program(toClDeviceVector(*pClDevice)); MockContext ctx; program.setContext(&ctx); program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation(); std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_TRUE(pKernel->getHasIndirectAccess()); memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation); } HWTEST_F(KernelResidencyTest, givenKernelWithNoKernelArgAtomicThenKernelHasIndirectAccessIsSetToTrue) { auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgLoad = false; pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgStore = false; pKernelInfo->kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic = true; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.storeMakeResidentAllocations = true; auto memoryManager = commandStreamReceiver.getMemoryManager(); pKernelInfo->kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); MockProgram program(toClDeviceVector(*pClDevice)); MockContext ctx; program.setContext(&ctx); program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation(); std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_TRUE(pKernel->getHasIndirectAccess()); memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation); } TEST(KernelConfigTests, givenTwoKernelConfigsWhenCompareThenResultsAreCorrect) { Vec3 lws{1, 1, 1}; Vec3 gws{1, 1, 1}; Vec3 offsets{1, 1, 1}; MockKernel::KernelConfig config{gws, lws, offsets}; MockKernel::KernelConfig config2{gws, lws, offsets}; EXPECT_TRUE(config == config2); config2.offsets.z = 2; EXPECT_FALSE(config == config2); config2.lws.z = 2; config2.offsets.z = 1; EXPECT_FALSE(config == config2); config2.lws.z = 1; config2.gws.z = 2; EXPECT_FALSE(config == config2); } HWTEST_F(KernelResidencyTest, givenEnableFullKernelTuningWhenPerformTunningThenKernelConfigDataIsTracked) { using TimestampPacketType = typename FamilyType::TimestampPacketType; DebugManagerStateRestore restorer; DebugManager.flags.EnableKernelTunning.set(2u); auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver(); MockKernelWithInternals mockKernel(*this->pClDevice); Vec3 lws{1, 1, 1}; Vec3 gws{1, 1, 1}; Vec3 offsets{1, 1, 1}; MockKernel::KernelConfig config{gws, lws, offsets}; MockTimestampPacketContainer container(*commandStreamReceiver.getTimestampPacketAllocator(), 1); MockTimestampPacketContainer subdeviceContainer(*commandStreamReceiver.getTimestampPacketAllocator(), 2); auto result = mockKernel.mockKernel->kernelSubmissionMap.find(config); EXPECT_EQ(result, mockKernel.mockKernel->kernelSubmissionMap.end()); mockKernel.mockKernel->performKernelTuning(commandStreamReceiver, lws, gws, offsets, &container); result = mockKernel.mockKernel->kernelSubmissionMap.find(config); EXPECT_NE(result, mockKernel.mockKernel->kernelSubmissionMap.end()); EXPECT_EQ(result->second.status, MockKernel::TunningStatus::STANDARD_TUNNING_IN_PROGRESS); EXPECT_FALSE(mockKernel.mockKernel->singleSubdevicePreferredInCurrentEnqueue); mockKernel.mockKernel->performKernelTuning(commandStreamReceiver, lws, gws, offsets, &subdeviceContainer); result = mockKernel.mockKernel->kernelSubmissionMap.find(config); EXPECT_NE(result, mockKernel.mockKernel->kernelSubmissionMap.end()); EXPECT_EQ(result->second.status, MockKernel::TunningStatus::SUBDEVICE_TUNNING_IN_PROGRESS); EXPECT_TRUE(mockKernel.mockKernel->singleSubdevicePreferredInCurrentEnqueue); mockKernel.mockKernel->performKernelTuning(commandStreamReceiver, lws, gws, offsets, &container); result = mockKernel.mockKernel->kernelSubmissionMap.find(config); EXPECT_NE(result, mockKernel.mockKernel->kernelSubmissionMap.end()); EXPECT_EQ(result->second.status, MockKernel::TunningStatus::SUBDEVICE_TUNNING_IN_PROGRESS); EXPECT_FALSE(mockKernel.mockKernel->singleSubdevicePreferredInCurrentEnqueue); TimestampPacketType data[4] = {static_cast(container.getNode(0u)->getContextStartValue(0)), static_cast(container.getNode(0u)->getGlobalStartValue(0)), 2, 2}; container.getNode(0u)->assignDataToAllTimestamps(0, data); mockKernel.mockKernel->performKernelTuning(commandStreamReceiver, lws, gws, offsets, &container); result = mockKernel.mockKernel->kernelSubmissionMap.find(config); EXPECT_NE(result, mockKernel.mockKernel->kernelSubmissionMap.end()); EXPECT_EQ(result->second.status, MockKernel::TunningStatus::SUBDEVICE_TUNNING_IN_PROGRESS); EXPECT_FALSE(mockKernel.mockKernel->singleSubdevicePreferredInCurrentEnqueue); data[0] = static_cast(subdeviceContainer.getNode(0u)->getContextStartValue(0)); data[1] = static_cast(subdeviceContainer.getNode(0u)->getGlobalStartValue(0)); data[2] = 2; data[3] = 2; subdeviceContainer.getNode(0u)->assignDataToAllTimestamps(0, data); mockKernel.mockKernel->performKernelTuning(commandStreamReceiver, lws, gws, offsets, &container); result = mockKernel.mockKernel->kernelSubmissionMap.find(config); EXPECT_NE(result, mockKernel.mockKernel->kernelSubmissionMap.end()); EXPECT_NE(result->second.kernelStandardTimestamps.get(), nullptr); EXPECT_NE(result->second.kernelSubdeviceTimestamps.get(), nullptr); EXPECT_EQ(result->second.status, MockKernel::TunningStatus::SUBDEVICE_TUNNING_IN_PROGRESS); EXPECT_FALSE(mockKernel.mockKernel->singleSubdevicePreferredInCurrentEnqueue); data[0] = static_cast(subdeviceContainer.getNode(1u)->getContextStartValue(0)); data[1] = static_cast(subdeviceContainer.getNode(1u)->getGlobalStartValue(0)); data[2] = 2; data[3] = 2; subdeviceContainer.getNode(1u)->assignDataToAllTimestamps(0, data); mockKernel.mockKernel->performKernelTuning(commandStreamReceiver, lws, gws, offsets, &container); result = mockKernel.mockKernel->kernelSubmissionMap.find(config); EXPECT_NE(result, mockKernel.mockKernel->kernelSubmissionMap.end()); EXPECT_EQ(result->second.kernelStandardTimestamps.get(), nullptr); EXPECT_EQ(result->second.kernelSubdeviceTimestamps.get(), nullptr); EXPECT_EQ(result->second.status, MockKernel::TunningStatus::TUNNING_DONE); EXPECT_EQ(result->second.singleSubdevicePreferred, mockKernel.mockKernel->singleSubdevicePreferredInCurrentEnqueue); mockKernel.mockKernel->performKernelTuning(commandStreamReceiver, lws, gws, offsets, &container); result = mockKernel.mockKernel->kernelSubmissionMap.find(config); EXPECT_NE(result, mockKernel.mockKernel->kernelSubmissionMap.end()); EXPECT_EQ(result->second.status, MockKernel::TunningStatus::TUNNING_DONE); EXPECT_EQ(result->second.singleSubdevicePreferred, mockKernel.mockKernel->singleSubdevicePreferredInCurrentEnqueue); } HWTEST_F(KernelResidencyTest, givenSimpleKernelTunningAndNoAtomicsWhenPerformTunningThenSingleSubdeviceIsPreferred) { DebugManagerStateRestore restorer; DebugManager.flags.EnableKernelTunning.set(1u); auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver(); MockKernelWithInternals mockKernel(*this->pClDevice); Vec3 lws{1, 1, 1}; Vec3 gws{1, 1, 1}; Vec3 offsets{1, 1, 1}; MockKernel::KernelConfig config{gws, lws, offsets}; MockTimestampPacketContainer container(*commandStreamReceiver.getTimestampPacketAllocator(), 1); auto result = mockKernel.mockKernel->kernelSubmissionMap.find(config); EXPECT_EQ(result, mockKernel.mockKernel->kernelSubmissionMap.end()); mockKernel.mockKernel->performKernelTuning(commandStreamReceiver, lws, gws, offsets, &container); result = mockKernel.mockKernel->kernelSubmissionMap.find(config); EXPECT_EQ(result, mockKernel.mockKernel->kernelSubmissionMap.end()); EXPECT_NE(mockKernel.mockKernel->isSingleSubdevicePreferred(), mockKernel.mockKernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics); } TEST(KernelImageDetectionTests, givenKernelWithImagesOnlyWhenItIsAskedIfItHasImagesOnlyThenTrueIsReturned) { auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; pKernelInfo->addArgImage(0); pKernelInfo->argAt(0).getExtendedTypeInfo().isMediaImage = true; pKernelInfo->addArgImage(1); pKernelInfo->addArgImage(2); const auto rootDeviceIndex = 0u; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get(), rootDeviceIndex)); auto context = clUniquePtr(new MockContext(device.get())); auto program = clUniquePtr(new MockProgram(context.get(), false, toClDeviceVector(*device))); auto kernel = std::make_unique(program.get(), *pKernelInfo, *device); EXPECT_FALSE(kernel->usesOnlyImages()); kernel->initialize(); EXPECT_TRUE(kernel->usesOnlyImages()); EXPECT_TRUE(kernel->usesImages()); } TEST(KernelImageDetectionTests, givenKernelWithImagesAndBuffersWhenItIsAskedIfItHasImagesOnlyThenFalseIsReturned) { auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; pKernelInfo->addArgImage(0); pKernelInfo->argAt(0).getExtendedTypeInfo().isMediaImage = true; pKernelInfo->addArgBuffer(1); pKernelInfo->addArgImage(2); const auto rootDeviceIndex = 0u; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get(), rootDeviceIndex)); auto context = clUniquePtr(new MockContext(device.get())); auto program = clUniquePtr(new MockProgram(context.get(), false, toClDeviceVector(*device))); auto kernel = std::make_unique(program.get(), *pKernelInfo, *device); EXPECT_FALSE(kernel->usesOnlyImages()); kernel->initialize(); EXPECT_FALSE(kernel->usesOnlyImages()); EXPECT_TRUE(kernel->usesImages()); } TEST(KernelImageDetectionTests, givenKernelWithNoImagesWhenItIsAskedIfItHasImagesOnlyThenFalseIsReturned) { auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; pKernelInfo->addArgBuffer(0); const auto rootDeviceIndex = 0u; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get(), rootDeviceIndex)); auto context = clUniquePtr(new MockContext(device.get())); auto program = clUniquePtr(new MockProgram(context.get(), false, toClDeviceVector(*device))); auto kernel = std::make_unique(program.get(), *pKernelInfo, *device); EXPECT_FALSE(kernel->usesOnlyImages()); kernel->initialize(); EXPECT_FALSE(kernel->usesOnlyImages()); EXPECT_FALSE(kernel->usesImages()); } HWTEST_F(KernelResidencyTest, WhenMakingArgsResidentThenImageFromImageCheckIsCorrect) { ASSERT_NE(nullptr, pDevice); //create NV12 image cl_mem_flags flags = CL_MEM_READ_ONLY | CL_MEM_HOST_NO_ACCESS; cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_NV12_INTEL; auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, pClDevice->getHardwareInfo().capabilityTable.supportsOcl21Features); cl_image_desc imageDesc = {}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 16; imageDesc.image_height = 16; imageDesc.image_depth = 1; cl_int retVal; MockContext context; std::unique_ptr imageNV12( Image::create(&context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); EXPECT_EQ(imageNV12->getMediaPlaneType(), 0u); //create Y plane imageFormat.image_channel_order = CL_R; flags = CL_MEM_READ_ONLY; surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); imageDesc.image_width = 0; imageDesc.image_height = 0; imageDesc.image_depth = 0; imageDesc.mem_object = imageNV12.get(); std::unique_ptr imageY( Image::create(&context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); EXPECT_EQ(imageY->getMediaPlaneType(), 0u); auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; pKernelInfo->addArgImage(0); auto program = std::make_unique(toClDeviceVector(*pClDevice)); program->setContext(&context); std::unique_ptr pKernel(new MockKernel(program.get(), *pKernelInfo, *pClDevice)); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->storeKernelArg(0, Kernel::IMAGE_OBJ, (cl_mem)imageY.get(), NULL, 0); pKernel->makeResident(pDevice->getGpgpuCommandStreamReceiver()); EXPECT_FALSE(imageNV12->isImageFromImage()); EXPECT_TRUE(imageY->isImageFromImage()); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); EXPECT_EQ(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore, commandStreamReceiver.samplerCacheFlushRequired); } struct KernelExecutionEnvironmentTest : public Test { void SetUp() override { ClDeviceFixture::SetUp(); program = std::make_unique(toClDeviceVector(*pClDevice)); pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; pKernel = new MockKernel(program.get(), *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); } void TearDown() override { delete pKernel; ClDeviceFixture::TearDown(); } MockKernel *pKernel; std::unique_ptr program; std::unique_ptr pKernelInfo; SPatchExecutionEnvironment executionEnvironment = {}; }; TEST_F(KernelExecutionEnvironmentTest, GivenCompiledWorkGroupSizeIsZeroWhenGettingMaxRequiredWorkGroupSizeThenMaxWorkGroupSizeIsCorrect) { auto maxWorkGroupSize = static_cast(pDevice->getDeviceInfo().maxWorkGroupSize); auto oldRequiredWorkGroupSizeX = this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0]; auto oldRequiredWorkGroupSizeY = this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1]; auto oldRequiredWorkGroupSizeZ = this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2]; this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] = 0; this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1] = 0; this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2] = 0; EXPECT_EQ(maxWorkGroupSize, this->pKernelInfo->getMaxRequiredWorkGroupSize(maxWorkGroupSize)); this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] = oldRequiredWorkGroupSizeX; this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1] = oldRequiredWorkGroupSizeY; this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2] = oldRequiredWorkGroupSizeZ; } TEST_F(KernelExecutionEnvironmentTest, GivenCompiledWorkGroupSizeLowerThanMaxWorkGroupSizeWhenGettingMaxRequiredWorkGroupSizeThenMaxWorkGroupSizeIsCorrect) { auto maxWorkGroupSize = static_cast(pDevice->getDeviceInfo().maxWorkGroupSize); auto oldRequiredWorkGroupSizeX = this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0]; auto oldRequiredWorkGroupSizeY = this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1]; auto oldRequiredWorkGroupSizeZ = this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2]; this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] = static_cast(maxWorkGroupSize / 2); this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1] = 1; this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2] = 1; EXPECT_EQ(maxWorkGroupSize / 2, this->pKernelInfo->getMaxRequiredWorkGroupSize(maxWorkGroupSize)); this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] = oldRequiredWorkGroupSizeX; this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1] = oldRequiredWorkGroupSizeY; this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2] = oldRequiredWorkGroupSizeZ; } TEST_F(KernelExecutionEnvironmentTest, GivenCompiledWorkGroupSizeIsGreaterThanMaxWorkGroupSizeWhenGettingMaxRequiredWorkGroupSizeThenMaxWorkGroupSizeIsCorrect) { auto maxWorkGroupSize = static_cast(pDevice->getDeviceInfo().maxWorkGroupSize); auto oldRequiredWorkGroupSizeX = this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0]; auto oldRequiredWorkGroupSizeY = this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1]; auto oldRequiredWorkGroupSizeZ = this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2]; this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] = static_cast(maxWorkGroupSize); this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1] = static_cast(maxWorkGroupSize); this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2] = static_cast(maxWorkGroupSize); EXPECT_EQ(maxWorkGroupSize, this->pKernelInfo->getMaxRequiredWorkGroupSize(maxWorkGroupSize)); this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] = oldRequiredWorkGroupSizeX; this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1] = oldRequiredWorkGroupSizeY; this->pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2] = oldRequiredWorkGroupSizeZ; } struct KernelCrossThreadTests : Test { KernelCrossThreadTests() { } void SetUp() override { ClDeviceFixture::SetUp(); program = std::make_unique(toClDeviceVector(*pClDevice)); pKernelInfo = std::make_unique(); pKernelInfo->setCrossThreadDataSize(64); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; } void TearDown() override { ClDeviceFixture::TearDown(); } std::unique_ptr program; std::unique_ptr pKernelInfo; SPatchExecutionEnvironment executionEnvironment = {}; }; TEST_F(KernelCrossThreadTests, WhenLocalWorkSize2OffsetsAreValidThenIsLocalWorkSize2PatchableReturnsTrue) { auto &localWorkSize2 = pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize2; localWorkSize2[0] = 0; localWorkSize2[1] = 4; localWorkSize2[2] = 8; MockKernel kernel(program.get(), *pKernelInfo, *pClDevice); EXPECT_TRUE(kernel.isLocalWorkSize2Patchable()); } TEST_F(KernelCrossThreadTests, WhenNotAllLocalWorkSize2OffsetsAreValidThenIsLocalWorkSize2PatchableReturnsTrue) { MockKernel kernel(program.get(), *pKernelInfo, *pClDevice); auto &localWorkSize2 = pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize2; for (auto ele0 : {true, false}) { for (auto ele1 : {true, false}) { for (auto ele2 : {true, false}) { if (ele0 && ele1 && ele2) { continue; } else { localWorkSize2[0] = ele0 ? 0 : undefined; localWorkSize2[1] = ele1 ? 4 : undefined; localWorkSize2[2] = ele2 ? 8 : undefined; EXPECT_FALSE(kernel.isLocalWorkSize2Patchable()); } } } } } TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenEnqueuedMaxWorkGroupSizeIsCorrect) { pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.maxWorkGroupSize = 12; MockKernel kernel(program.get(), *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_NE(nullptr, kernel.maxWorkGroupSizeForCrossThreadData); EXPECT_NE(&Kernel::dummyPatchLocation, kernel.maxWorkGroupSizeForCrossThreadData); EXPECT_EQ(static_cast(kernel.getCrossThreadData() + pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.maxWorkGroupSize), static_cast(kernel.maxWorkGroupSizeForCrossThreadData)); EXPECT_EQ(pDevice->getDeviceInfo().maxWorkGroupSize, *kernel.maxWorkGroupSizeForCrossThreadData); EXPECT_EQ(pDevice->getDeviceInfo().maxWorkGroupSize, kernel.maxKernelWorkGroupSize); } TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenDataParameterSimdSizeIsCorrect) { pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.simdSize = 16; pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 16; MockKernel kernel(program.get(), *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_NE(nullptr, kernel.dataParameterSimdSize); EXPECT_NE(&Kernel::dummyPatchLocation, kernel.dataParameterSimdSize); EXPECT_EQ(static_cast(kernel.getCrossThreadData() + pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.simdSize), static_cast(kernel.dataParameterSimdSize)); EXPECT_EQ_VAL(pKernelInfo->getMaxSimdSize(), *kernel.dataParameterSimdSize); } TEST_F(KernelCrossThreadTests, GivenParentEventOffsetWhenKernelIsInitializedThenParentEventIsInitiatedWithUndefined) { pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueParentEvent = 16; MockKernel kernel(program.get(), *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_NE(nullptr, kernel.parentEventOffset); EXPECT_NE(&Kernel::dummyPatchLocation, kernel.parentEventOffset); EXPECT_EQ(static_cast(kernel.getCrossThreadData() + pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueParentEvent), static_cast(kernel.parentEventOffset)); EXPECT_EQ(undefined, *kernel.parentEventOffset); } TEST_F(KernelCrossThreadTests, WhenAddingKernelThenProgramRefCountIsIncremented) { auto refCount = program->getReference(); MockKernel *kernel = new MockKernel(program.get(), *pKernelInfo, *pClDevice); auto refCount2 = program->getReference(); EXPECT_EQ(refCount2, refCount + 1); delete kernel; auto refCount3 = program->getReference(); EXPECT_EQ(refCount, refCount3); } TEST_F(KernelCrossThreadTests, GivenSlmStatisSizeWhenCreatingKernelThenSlmTotalSizeIsSet) { pKernelInfo->kernelDescriptor.kernelAttributes.slmInlineSize = 1024; MockKernel *kernel = new MockKernel(program.get(), *pKernelInfo, *pClDevice); EXPECT_EQ(1024u, kernel->slmTotalSize); delete kernel; } TEST_F(KernelCrossThreadTests, givenKernelWithPrivateMemoryWhenItIsCreatedThenCurbeIsPatchedProperly) { pKernelInfo->setPrivateMemory(1, false, 8, 0); MockKernel *kernel = new MockKernel(program.get(), *pKernelInfo, *pClDevice); kernel->initialize(); auto privateSurface = kernel->privateSurface; auto constantBuffer = kernel->getCrossThreadData(); auto privateAddress = (uintptr_t)privateSurface->getGpuAddressToPatch(); auto ptrCurbe = (uint64_t *)constantBuffer; auto privateAddressFromCurbe = (uintptr_t)*ptrCurbe; EXPECT_EQ(privateAddressFromCurbe, privateAddress); delete kernel; } TEST_F(KernelCrossThreadTests, givenKernelWithPreferredWkgMultipleWhenItIsCreatedThenCurbeIsPatchedProperly) { pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.preferredWkgMultiple = 8; MockKernel *kernel = new MockKernel(program.get(), *pKernelInfo, *pClDevice); kernel->initialize(); auto *crossThread = kernel->getCrossThreadData(); uint32_t *preferredWkgMultipleOffset = (uint32_t *)ptrOffset(crossThread, 8); EXPECT_EQ(pKernelInfo->getMaxSimdSize(), *preferredWkgMultipleOffset); delete kernel; } TEST(KernelInfoTest, WhenPatchingBorderColorOffsetThenPatchIsAppliedCorrectly) { MockKernelInfo info; EXPECT_EQ(0u, info.getBorderColorOffset()); info.setSamplerTable(3, 1, 0); EXPECT_EQ(3u, info.getBorderColorOffset()); } TEST(KernelInfoTest, GivenArgNameWhenGettingArgNumberByNameThenCorrectValueIsReturned) { MockKernelInfo info; EXPECT_EQ(-1, info.getArgNumByName("")); info.addExtendedMetadata(0, "arg1"); EXPECT_EQ(-1, info.getArgNumByName("")); EXPECT_EQ(-1, info.getArgNumByName("arg2")); EXPECT_EQ(0, info.getArgNumByName("arg1")); info.addExtendedMetadata(1, "arg2"); EXPECT_EQ(0, info.getArgNumByName("arg1")); EXPECT_EQ(1, info.getArgNumByName("arg2")); info.kernelDescriptor.explicitArgsExtendedMetadata.clear(); EXPECT_EQ(-1, info.getArgNumByName("arg1")); } TEST(KernelInfoTest, givenHwHelperWhenCreatingKernelAllocationThenCorrectPaddingIsAdded) { std::unique_ptr clDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get(), mockRootDeviceIndex)); std::unique_ptr context = std::make_unique(clDevice.get()); std::unique_ptr mockKernel = std::make_unique(*clDevice, context.get()); uint32_t kernelHeap = 0; mockKernel->kernelInfo.heapInfo.KernelHeapSize = 1; mockKernel->kernelInfo.heapInfo.pKernelHeap = &kernelHeap; mockKernel->kernelInfo.createKernelAllocation(clDevice->getDevice(), false); auto graphicsAllocation = mockKernel->kernelInfo.getGraphicsAllocation(); auto &hwHelper = NEO::HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily); size_t isaPadding = hwHelper.getPaddingForISAAllocation(); EXPECT_EQ(graphicsAllocation->getUnderlyingBufferSize(), mockKernel->kernelInfo.heapInfo.KernelHeapSize + isaPadding); clDevice->getMemoryManager()->freeGraphicsMemory(mockKernel->kernelInfo.getGraphicsAllocation()); } TEST(KernelTest, WhenSettingKernelArgThenBuiltinDispatchInfoBuilderIsUsed) { struct MockBuiltinDispatchBuilder : BuiltinDispatchInfoBuilder { using BuiltinDispatchInfoBuilder::BuiltinDispatchInfoBuilder; bool setExplicitArg(uint32_t argIndex, size_t argSize, const void *argVal, cl_int &err) const override { receivedArgs.push_back(std::make_tuple(argIndex, argSize, argVal)); err = errToReturn; return valueToReturn; } bool valueToReturn = false; cl_int errToReturn = CL_SUCCESS; mutable std::vector> receivedArgs; }; auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals kernel(*device); kernel.mockKernel->initialize(); kernel.mockKernel->kernelArguments.resize(2); MockBuiltinDispatchBuilder mockBuilder(*device->getBuiltIns(), *device); kernel.kernelInfo.builtinDispatchBuilder = &mockBuilder; mockBuilder.valueToReturn = false; mockBuilder.errToReturn = CL_SUCCESS; EXPECT_EQ(0u, kernel.mockKernel->getPatchedArgumentsNum()); auto ret = kernel.mockKernel->setArg(1, 3, reinterpret_cast(5)); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_EQ(1u, kernel.mockKernel->getPatchedArgumentsNum()); mockBuilder.valueToReturn = false; mockBuilder.errToReturn = CL_INVALID_ARG_SIZE; ret = kernel.mockKernel->setArg(7, 11, reinterpret_cast(13)); EXPECT_EQ(CL_INVALID_ARG_SIZE, ret); EXPECT_EQ(1u, kernel.mockKernel->getPatchedArgumentsNum()); mockBuilder.valueToReturn = true; mockBuilder.errToReturn = CL_SUCCESS; ret = kernel.mockKernel->setArg(17, 19, reinterpret_cast(23)); EXPECT_EQ(CL_INVALID_ARG_INDEX, ret); EXPECT_EQ(1u, kernel.mockKernel->getPatchedArgumentsNum()); mockBuilder.valueToReturn = true; mockBuilder.errToReturn = CL_INVALID_ARG_SIZE; ret = kernel.mockKernel->setArg(29, 31, reinterpret_cast(37)); EXPECT_EQ(CL_INVALID_ARG_INDEX, ret); EXPECT_EQ(1u, kernel.mockKernel->getPatchedArgumentsNum()); ASSERT_EQ(4U, mockBuilder.receivedArgs.size()); EXPECT_EQ(1U, std::get<0>(mockBuilder.receivedArgs[0])); EXPECT_EQ(3U, std::get<1>(mockBuilder.receivedArgs[0])); EXPECT_EQ(reinterpret_cast(5), std::get<2>(mockBuilder.receivedArgs[0])); EXPECT_EQ(7U, std::get<0>(mockBuilder.receivedArgs[1])); EXPECT_EQ(11U, std::get<1>(mockBuilder.receivedArgs[1])); EXPECT_EQ(reinterpret_cast(13), std::get<2>(mockBuilder.receivedArgs[1])); EXPECT_EQ(17U, std::get<0>(mockBuilder.receivedArgs[2])); EXPECT_EQ(19U, std::get<1>(mockBuilder.receivedArgs[2])); EXPECT_EQ(reinterpret_cast(23), std::get<2>(mockBuilder.receivedArgs[2])); EXPECT_EQ(29U, std::get<0>(mockBuilder.receivedArgs[3])); EXPECT_EQ(31U, std::get<1>(mockBuilder.receivedArgs[3])); EXPECT_EQ(reinterpret_cast(37), std::get<2>(mockBuilder.receivedArgs[3])); } HWTEST_F(KernelTest, givenKernelWhenDebugFlagToUseMaxSimdForCalculationsIsUsedThenMaxWorkgroupSizeIsSimdSizeDependant) { DebugManagerStateRestore dbgStateRestore; DebugManager.flags.UseMaxSimdSizeToDeduceMaxWorkgroupSize.set(true); HardwareInfo myHwInfo = *defaultHwInfo; GT_SYSTEM_INFO &mySysInfo = myHwInfo.gtSystemInfo; mySysInfo.EUCount = 24; mySysInfo.SubSliceCount = 3; mySysInfo.DualSubSliceCount = 3; mySysInfo.ThreadCount = 24 * 7; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&myHwInfo)); MockKernelWithInternals kernel(*device); size_t maxKernelWkgSize; kernel.kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 32; kernel.mockKernel->getWorkGroupInfo(CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxKernelWkgSize, nullptr); EXPECT_EQ(1024u, maxKernelWkgSize); kernel.kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 16; kernel.mockKernel->getWorkGroupInfo(CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxKernelWkgSize, nullptr); EXPECT_EQ(512u, maxKernelWkgSize); kernel.kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 8; kernel.mockKernel->getWorkGroupInfo(CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxKernelWkgSize, nullptr); EXPECT_EQ(256u, maxKernelWkgSize); } TEST(KernelTest, givenKernelWithKernelInfoWith32bitPointerSizeThenReport32bit) { KernelInfo info; info.kernelDescriptor.kernelAttributes.gpuPointerSize = 4; const auto rootDeviceIndex = 0u; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr, rootDeviceIndex)); MockContext context; MockProgram program(&context, false, toClDeviceVector(*device)); std::unique_ptr kernel(new MockKernel(&program, info, *device)); EXPECT_TRUE(kernel->is32Bit()); } TEST(KernelTest, givenKernelWithKernelInfoWith64bitPointerSizeThenReport64bit) { KernelInfo info; info.kernelDescriptor.kernelAttributes.gpuPointerSize = 8; const auto rootDeviceIndex = 0u; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr, rootDeviceIndex)); MockContext context; MockProgram program(&context, false, toClDeviceVector(*device)); std::unique_ptr kernel(new MockKernel(&program, info, *device)); EXPECT_FALSE(kernel->is32Bit()); } TEST(KernelTest, givenBuiltInProgramWhenCallingInitializeThenAuxTranslationRequiredIsFalse) { DebugManagerStateRestore restore; DebugManager.flags.RenderCompressedBuffersEnabled.set(1); KernelInfo info{}; ArgDescriptor argDescriptorPointer(ArgDescriptor::ArgType::ArgTPointer); argDescriptorPointer.as().accessedUsingStatelessAddressingMode = true; info.kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptorPointer); const auto rootDeviceIndex = 0u; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr, rootDeviceIndex)); MockContext context(device.get()); MockProgram program(&context, true, toClDeviceVector(*device)); MockKernel kernel(&program, info, *device); kernel.initialize(); EXPECT_FALSE(kernel.auxTranslationRequired); } TEST(KernelTest, givenFtrRenderCompressedBuffersWhenInitializingArgsWithNonStatefulAccessThenMarkKernelForAuxTranslation) { DebugManagerStateRestore restore; DebugManager.flags.ForceAuxTranslationEnabled.set(1); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto hwInfo = device->getRootDeviceEnvironment().getMutableHardwareInfo(); auto &capabilityTable = hwInfo->capabilityTable; auto context = clUniquePtr(new MockContext(device.get())); context->contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE; MockKernelWithInternals kernel(*device, context.get()); kernel.kernelInfo.kernelDescriptor.kernelAttributes.crossThreadDataSize = 0; kernel.kernelInfo.addArgBuffer(0); kernel.kernelInfo.addExtendedMetadata(0, "", "char *"); capabilityTable.ftrRenderCompressedBuffers = false; kernel.kernelInfo.setBufferStateful(0); kernel.mockKernel->initialize(); EXPECT_FALSE(kernel.mockKernel->isAuxTranslationRequired()); kernel.kernelInfo.setBufferStateful(0, false); kernel.mockKernel->initialize(); EXPECT_FALSE(kernel.mockKernel->isAuxTranslationRequired()); capabilityTable.ftrRenderCompressedBuffers = true; kernel.mockKernel->initialize(); EXPECT_EQ(ClHwHelper::get(hwInfo->platform.eRenderCoreFamily).requiresAuxResolves(kernel.kernelInfo, *hwInfo), kernel.mockKernel->isAuxTranslationRequired()); DebugManager.flags.ForceAuxTranslationEnabled.set(-1); kernel.mockKernel->initialize(); EXPECT_EQ(ClHwHelper::get(hwInfo->platform.eRenderCoreFamily).requiresAuxResolves(kernel.kernelInfo, *hwInfo), kernel.mockKernel->isAuxTranslationRequired()); DebugManager.flags.ForceAuxTranslationEnabled.set(0); kernel.mockKernel->initialize(); EXPECT_FALSE(kernel.mockKernel->isAuxTranslationRequired()); } TEST(KernelTest, WhenAuxTranslationIsRequiredThenKernelSetsRequiredResolvesInContext) { DebugManagerStateRestore restore; DebugManager.flags.ForceAuxTranslationEnabled.set(1); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto hwInfo = device->getRootDeviceEnvironment().getMutableHardwareInfo(); hwInfo->capabilityTable.ftrRenderCompressedBuffers = true; auto context = clUniquePtr(new MockContext(device.get())); context->contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE; MockKernelWithInternals kernel(*device, context.get()); kernel.kernelInfo.addArgBuffer(0); kernel.kernelInfo.addExtendedMetadata(0, "", "char *"); kernel.mockKernel->initialize(); if (ClHwHelper::get(device->getHardwareInfo().platform.eRenderCoreFamily).requiresAuxResolves(kernel.kernelInfo, *hwInfo)) { EXPECT_TRUE(context->getResolvesRequiredInKernels()); } else { EXPECT_FALSE(context->getResolvesRequiredInKernels()); } } TEST(KernelTest, WhenAuxTranslationIsNotRequiredThenKernelDoesNotSetRequiredResolvesInContext) { DebugManagerStateRestore restore; DebugManager.flags.ForceAuxTranslationEnabled.set(0); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto hwInfo = device->getRootDeviceEnvironment().getMutableHardwareInfo(); hwInfo->capabilityTable.ftrRenderCompressedBuffers = true; auto context = clUniquePtr(new MockContext(device.get())); context->contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE; MockKernelWithInternals kernel(*device, context.get()); kernel.kernelInfo.addArgBuffer(0); kernel.kernelInfo.addExtendedMetadata(0, "", "char *"); kernel.kernelInfo.setBufferStateful(0); kernel.mockKernel->initialize(); EXPECT_FALSE(context->getResolvesRequiredInKernels()); } TEST(KernelTest, givenDebugVariableSetWhenKernelHasStatefulBufferAccessThenMarkKernelForAuxTranslation) { DebugManagerStateRestore restore; DebugManager.flags.RenderCompressedBuffersEnabled.set(1); HardwareInfo localHwInfo = *defaultHwInfo; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&localHwInfo)); auto context = clUniquePtr(new MockContext(device.get())); MockKernelWithInternals kernel(*device, context.get()); kernel.kernelInfo.addArgBuffer(0); kernel.kernelInfo.addExtendedMetadata(0, "", "char *"); localHwInfo.capabilityTable.ftrRenderCompressedBuffers = false; kernel.mockKernel->initialize(); if (ClHwHelper::get(localHwInfo.platform.eRenderCoreFamily).requiresAuxResolves(kernel.kernelInfo, localHwInfo)) { EXPECT_TRUE(kernel.mockKernel->isAuxTranslationRequired()); } else { EXPECT_FALSE(kernel.mockKernel->isAuxTranslationRequired()); } } TEST(KernelTest, givenKernelWithPairArgumentWhenItIsInitializedThenPatchImmediateIsUsedAsArgHandler) { HardwareInfo localHwInfo = *defaultHwInfo; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&localHwInfo)); auto context = clUniquePtr(new MockContext(device.get())); MockKernelWithInternals kernel(*device, context.get()); kernel.kernelInfo.addExtendedMetadata(0, "", "pair"); kernel.kernelInfo.kernelDescriptor.payloadMappings.explicitArgs.resize(1); kernel.mockKernel->initialize(); EXPECT_EQ(&Kernel::setArgImmediate, kernel.mockKernel->kernelArgHandlers[0]); } TEST(KernelTest, whenNullAllocationThenAssignNullPointerToCacheFlushVector) { auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals kernel(*device); kernel.mockKernel->kernelArgRequiresCacheFlush.resize(1); kernel.mockKernel->kernelArgRequiresCacheFlush[0] = reinterpret_cast(0x1); kernel.mockKernel->addAllocationToCacheFlushVector(0, nullptr); EXPECT_EQ(nullptr, kernel.mockKernel->kernelArgRequiresCacheFlush[0]); } TEST(KernelTest, givenKernelCompiledWithSimdSizeLowerThanExpectedWhenInitializingThenReturnError) { auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); auto minSimd = HwHelper::get(device->getHardwareInfo().platform.eRenderCoreFamily).getMinimalSIMDSize(); MockKernelWithInternals kernel(*device); kernel.kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 8; cl_int retVal = kernel.mockKernel->initialize(); if (minSimd > 8) { EXPECT_EQ(CL_INVALID_KERNEL, retVal); } else { EXPECT_EQ(CL_SUCCESS, retVal); } } TEST(KernelTest, givenKernelCompiledWithSimdOneWhenInitializingThenReturnError) { auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals kernel(*device); kernel.kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 1; cl_int retVal = kernel.mockKernel->initialize(); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(KernelTest, whenAllocationRequiringCacheFlushThenAssignAllocationPointerToCacheFlushVector) { MockGraphicsAllocation mockAllocation; auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals kernel(*device); kernel.mockKernel->kernelArgRequiresCacheFlush.resize(1); mockAllocation.setMemObjectsAllocationWithWritableFlags(false); mockAllocation.setFlushL3Required(true); kernel.mockKernel->addAllocationToCacheFlushVector(0, &mockAllocation); EXPECT_EQ(&mockAllocation, kernel.mockKernel->kernelArgRequiresCacheFlush[0]); } TEST(KernelTest, whenKernelRequireCacheFlushAfterWalkerThenRequireCacheFlushAfterWalker) { MockGraphicsAllocation mockAllocation; auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals kernel(*device); kernel.mockKernel->svmAllocationsRequireCacheFlush = true; MockCommandQueue queue; DebugManagerStateRestore debugRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(true); queue.requiresCacheFlushAfterWalker = true; EXPECT_TRUE(kernel.mockKernel->requiresCacheFlushCommand(queue)); queue.requiresCacheFlushAfterWalker = false; EXPECT_TRUE(kernel.mockKernel->requiresCacheFlushCommand(queue)); } TEST(KernelTest, whenAllocationWriteableThenDoNotAssignAllocationPointerToCacheFlushVector) { MockGraphicsAllocation mockAllocation; auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals kernel(*device); kernel.mockKernel->kernelArgRequiresCacheFlush.resize(1); mockAllocation.setMemObjectsAllocationWithWritableFlags(true); mockAllocation.setFlushL3Required(false); kernel.mockKernel->addAllocationToCacheFlushVector(0, &mockAllocation); EXPECT_EQ(nullptr, kernel.mockKernel->kernelArgRequiresCacheFlush[0]); } TEST(KernelTest, whenAllocationReadOnlyNonFlushRequiredThenAssignNullPointerToCacheFlushVector) { MockGraphicsAllocation mockAllocation; auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals kernel(*device); kernel.mockKernel->kernelArgRequiresCacheFlush.resize(1); kernel.mockKernel->kernelArgRequiresCacheFlush[0] = reinterpret_cast(0x1); mockAllocation.setMemObjectsAllocationWithWritableFlags(false); mockAllocation.setFlushL3Required(false); kernel.mockKernel->addAllocationToCacheFlushVector(0, &mockAllocation); EXPECT_EQ(nullptr, kernel.mockKernel->kernelArgRequiresCacheFlush[0]); } TEST(KernelTest, givenKernelUsesPrivateMemoryWhenDeviceReleasedBeforeKernelThenKernelUsesMemoryManagerFromEnvironment) { auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); auto executionEnvironment = device->getExecutionEnvironment(); auto mockKernel = std::make_unique(*device); GraphicsAllocation *privateSurface = device->getExecutionEnvironment()->memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); mockKernel->mockKernel->setPrivateSurface(privateSurface, 10); executionEnvironment->incRefInternal(); mockKernel.reset(nullptr); executionEnvironment->decRefInternal(); } TEST(KernelTest, givenAllArgumentsAreStatefulBuffersWhenInitializingThenAllBufferArgsStatefulIsTrue) { auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals kernel{*device}; kernel.kernelInfo.addArgBuffer(0); kernel.kernelInfo.setBufferStateful(0); kernel.kernelInfo.addArgBuffer(1); kernel.kernelInfo.setBufferStateful(1); kernel.mockKernel->initialize(); EXPECT_TRUE(kernel.mockKernel->allBufferArgsStateful); } TEST(KernelTest, givenAllArgumentsAreBuffersButNotAllAreStatefulWhenInitializingThenAllBufferArgsStatefulIsFalse) { auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals kernel{*device}; kernel.kernelInfo.addArgBuffer(0); kernel.kernelInfo.setBufferStateful(0); kernel.kernelInfo.addArgBuffer(1); kernel.mockKernel->initialize(); EXPECT_FALSE(kernel.mockKernel->allBufferArgsStateful); } TEST(KernelTest, givenNotAllArgumentsAreBuffersButAllBuffersAreStatefulWhenInitializingThenAllBufferArgsStatefulIsTrue) { auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals kernel{*device}; kernel.kernelInfo.addArgImage(0); kernel.kernelInfo.addArgBuffer(1); kernel.kernelInfo.setBufferStateful(1); kernel.mockKernel->initialize(); EXPECT_TRUE(kernel.mockKernel->allBufferArgsStateful); } TEST(KernelTest, givenKernelRequiringPrivateScratchSpaceWhenGettingSizeForPrivateScratchSpaceThenCorrectSizeIsReturned) { auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals mockKernel(*device); mockKernel.kernelInfo.setPerThreadScratchSize(512u, 0); mockKernel.kernelInfo.setPerThreadScratchSize(1024u, 1); EXPECT_EQ(1024u, mockKernel.mockKernel->getPrivateScratchSize()); } TEST(KernelTest, givenKernelWithoutMediaVfeStateSlot1WhenGettingSizeForPrivateScratchSpaceThenCorrectSizeIsReturned) { auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals mockKernel(*device); EXPECT_EQ(0u, mockKernel.mockKernel->getPrivateScratchSize()); } TEST(KernelTest, givenKernelWithPatchInfoCollectionEnabledWhenPatchWithImplicitSurfaceCalledThenPatchInfoDataIsCollected) { DebugManagerStateRestore restore; DebugManager.flags.AddPatchInfoCommentsForAUBDump.set(true); auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals kernel(*device); MockGraphicsAllocation mockAllocation; kernel.kernelInfo.addArgBuffer(0, 0, sizeof(void *)); uint64_t crossThreadData = 0; EXPECT_EQ(0u, kernel.mockKernel->getPatchInfoDataList().size()); kernel.mockKernel->patchWithImplicitSurface(&crossThreadData, mockAllocation, kernel.kernelInfo.argAsPtr(0)); EXPECT_EQ(1u, kernel.mockKernel->getPatchInfoDataList().size()); } TEST(KernelTest, givenKernelWithPatchInfoCollecitonEnabledAndArgumentWithInvalidCrossThreadDataOffsetWhenPatchWithImplicitSurfaceCalledThenPatchInfoDataIsNotCollected) { auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals kernel(*device); MockGraphicsAllocation mockAllocation; kernel.kernelInfo.addArgBuffer(0, undefined, sizeof(void *)); uint64_t crossThreadData = 0; kernel.mockKernel->patchWithImplicitSurface(&crossThreadData, mockAllocation, kernel.kernelInfo.argAsPtr(0)); EXPECT_EQ(0u, kernel.mockKernel->getPatchInfoDataList().size()); } TEST(KernelTest, givenKernelWithPatchInfoCollectionEnabledAndValidArgumentWhenPatchWithImplicitSurfaceCalledThenPatchInfoDataIsCollected) { DebugManagerStateRestore restore; DebugManager.flags.AddPatchInfoCommentsForAUBDump.set(true); auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals kernel(*device); MockGraphicsAllocation mockAllocation; kernel.kernelInfo.addArgBuffer(0, 0, sizeof(void *)); uint64_t crossThreadData = 0; EXPECT_EQ(0u, kernel.mockKernel->getPatchInfoDataList().size()); kernel.mockKernel->patchWithImplicitSurface(&crossThreadData, mockAllocation, kernel.kernelInfo.argAsPtr(0)); EXPECT_EQ(1u, kernel.mockKernel->getPatchInfoDataList().size()); } TEST(KernelTest, givenKernelWithPatchInfoCollectionDisabledWhenPatchWithImplicitSurfaceCalledThenPatchInfoDataIsNotCollected) { auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals kernel(*device); MockGraphicsAllocation mockAllocation; kernel.kernelInfo.addArgBuffer(0, 0, sizeof(void *)); uint64_t crossThreadData = 0; EXPECT_EQ(0u, kernel.mockKernel->getPatchInfoDataList().size()); kernel.mockKernel->patchWithImplicitSurface(&crossThreadData, mockAllocation, kernel.kernelInfo.argAsPtr(0)); EXPECT_EQ(0u, kernel.mockKernel->getPatchInfoDataList().size()); } TEST(KernelTest, givenDefaultKernelWhenItIsCreatedThenItReportsStatelessWrites) { auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals kernel(*device); EXPECT_TRUE(kernel.mockKernel->areStatelessWritesUsed()); } TEST(KernelTest, givenPolicyWhensetKernelThreadArbitrationPolicyThenExpectedClValueIsReturned) { auto &hwHelper = NEO::ClHwHelper::get(defaultHwInfo->platform.eRenderCoreFamily); if (!hwHelper.isSupportedKernelThreadArbitrationPolicy()) { GTEST_SKIP(); } auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockKernelWithInternals kernel(*device); EXPECT_EQ(CL_SUCCESS, kernel.mockKernel->setKernelThreadArbitrationPolicy(CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_ROUND_ROBIN_INTEL)); EXPECT_EQ(CL_SUCCESS, kernel.mockKernel->setKernelThreadArbitrationPolicy(CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_OLDEST_FIRST_INTEL)); EXPECT_EQ(CL_SUCCESS, kernel.mockKernel->setKernelThreadArbitrationPolicy(CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_AFTER_DEPENDENCY_ROUND_ROBIN_INTEL)); uint32_t notExistPolicy = 0; EXPECT_EQ(CL_INVALID_VALUE, kernel.mockKernel->setKernelThreadArbitrationPolicy(notExistPolicy)); } TEST(KernelTest, GivenDifferentValuesWhenSetKernelExecutionTypeIsCalledThenCorrectValueIsSet) { auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals mockKernelWithInternals(*device); auto &kernel = *mockKernelWithInternals.mockKernel; cl_int retVal; EXPECT_EQ(KernelExecutionType::Default, kernel.executionType); retVal = kernel.setKernelExecutionType(-1); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(KernelExecutionType::Default, kernel.executionType); retVal = kernel.setKernelExecutionType(CL_KERNEL_EXEC_INFO_CONCURRENT_TYPE_INTEL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(KernelExecutionType::Concurrent, kernel.executionType); retVal = kernel.setKernelExecutionType(-1); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(KernelExecutionType::Concurrent, kernel.executionType); retVal = kernel.setKernelExecutionType(CL_KERNEL_EXEC_INFO_DEFAULT_TYPE_INTEL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(KernelExecutionType::Default, kernel.executionType); } TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeFalseWhenGettingStartOffsetThenOffsetToSkipPerThreadDataLoadIsAdded) { auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals mockKernel(*device); mockKernel.kernelInfo.setLocalIds({0, 0, 0}); mockKernel.kernelInfo.kernelDescriptor.entryPoints.skipPerThreadDataLoad = 128; mockKernel.kernelInfo.createKernelAllocation(device->getDevice(), false); auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch(); mockKernel.mockKernel->setStartOffset(128); auto offset = mockKernel.mockKernel->getKernelStartOffset(false, true, false); EXPECT_EQ(allocationOffset + 256u, offset); device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation()); } TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeTrueAndLocalIdsUsedWhenGettingStartOffsetThenOffsetToSkipPerThreadDataLoadIsNotAdded) { auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals mockKernel(*device); mockKernel.kernelInfo.setLocalIds({0, 0, 0}); mockKernel.kernelInfo.kernelDescriptor.entryPoints.skipPerThreadDataLoad = 128; mockKernel.kernelInfo.createKernelAllocation(device->getDevice(), false); auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch(); mockKernel.mockKernel->setStartOffset(128); auto offset = mockKernel.mockKernel->getKernelStartOffset(true, true, false); EXPECT_EQ(allocationOffset + 128u, offset); device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation()); } TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeFalseAndLocalIdsNotUsedWhenGettingStartOffsetThenOffsetToSkipPerThreadDataLoadIsNotAdded) { auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals mockKernel(*device); mockKernel.kernelInfo.setLocalIds({0, 0, 0}); mockKernel.kernelInfo.kernelDescriptor.entryPoints.skipPerThreadDataLoad = 128; mockKernel.kernelInfo.createKernelAllocation(device->getDevice(), false); auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch(); mockKernel.mockKernel->setStartOffset(128); auto offset = mockKernel.mockKernel->getKernelStartOffset(false, false, false); EXPECT_EQ(allocationOffset + 128u, offset); device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation()); } TEST(KernelTest, whenKernelIsInitializedThenThreadArbitrationPolicyIsSetToDefaultValue) { UltClDeviceFactory deviceFactory{1, 0}; SPatchExecutionEnvironment sPatchExecEnv = {}; sPatchExecEnv.SubgroupIndependentForwardProgressRequired = true; MockKernelWithInternals mockKernelWithInternals{*deviceFactory.rootDevices[0], sPatchExecEnv}; auto &mockKernel = *mockKernelWithInternals.mockKernel; auto &hwHelper = HwHelper::get(deviceFactory.rootDevices[0]->getHardwareInfo().platform.eRenderCoreFamily); EXPECT_EQ(hwHelper.getDefaultThreadArbitrationPolicy(), mockKernel.threadArbitrationPolicy); } TEST(KernelTest, givenKernelWhenSettingAdditinalKernelExecInfoThenCorrectValueIsSet) { UltClDeviceFactory deviceFactory{1, 0}; MockKernelWithInternals mockKernelWithInternals{*deviceFactory.rootDevices[0]}; mockKernelWithInternals.kernelInfo.kernelDescriptor.kernelAttributes.flags.requiresSubgroupIndependentForwardProgress = true; EXPECT_TRUE(mockKernelWithInternals.kernelInfo.requiresSubgroupIndependentForwardProgress()); auto &mockKernel = *mockKernelWithInternals.mockKernel; mockKernel.setAdditionalKernelExecInfo(123u); EXPECT_EQ(123u, mockKernel.getAdditionalKernelExecInfo()); mockKernel.setAdditionalKernelExecInfo(AdditionalKernelExecInfo::NotApplicable); EXPECT_EQ(AdditionalKernelExecInfo::NotApplicable, mockKernel.getAdditionalKernelExecInfo()); } using KernelMultiRootDeviceTest = MultiRootDeviceFixture; TEST_F(KernelMultiRootDeviceTest, givenKernelWithPrivateSurfaceWhenInitializeThenPrivateSurfacesHaveCorrectRootDeviceIndex) { auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; pKernelInfo->setPrivateMemory(112, false, 8, 40, 64); KernelInfoContainer kernelInfos; kernelInfos.resize(deviceFactory->rootDevices.size()); for (auto &rootDeviceIndex : context->getRootDeviceIndices()) { kernelInfos[rootDeviceIndex] = pKernelInfo.get(); } MockProgram program(context.get(), false, context->getDevices()); int32_t retVal = CL_INVALID_VALUE; auto pMultiDeviceKernel = std::unique_ptr(MultiDeviceKernel::create(&program, kernelInfos, &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); for (auto &rootDeviceIndex : context->getRootDeviceIndices()) { auto kernel = static_cast(pMultiDeviceKernel->getKernel(rootDeviceIndex)); auto privateSurface = kernel->privateSurface; ASSERT_NE(nullptr, privateSurface); EXPECT_EQ(rootDeviceIndex, privateSurface->getRootDeviceIndex()); } } TEST(KernelCreateTest, whenInitFailedThenReturnNull) { struct MockProgram { ClDeviceVector getDevices() { ClDeviceVector deviceVector; deviceVector.push_back(&mDevice); return deviceVector; } void getSource(std::string &) {} MockClDevice mDevice{new MockDevice}; } mockProgram; struct MockKernel { MockKernel(MockProgram *, const KernelInfo &, ClDevice &) {} int initialize() { return -1; }; }; KernelInfo info; info.kernelDescriptor.kernelAttributes.gpuPointerSize = 8; auto ret = Kernel::create(&mockProgram, info, mockProgram.mDevice, nullptr); EXPECT_EQ(nullptr, ret); } TEST(MultiDeviceKernelCreateTest, whenInitFailedThenReturnNullAndPropagateErrorCode) { MockContext context; auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 0; KernelInfoContainer kernelInfos; kernelInfos.push_back(pKernelInfo.get()); MockProgram program(&context, false, context.getDevices()); int32_t retVal = CL_SUCCESS; auto pMultiDeviceKernel = MultiDeviceKernel::create(&program, kernelInfos, &retVal); EXPECT_EQ(nullptr, pMultiDeviceKernel); EXPECT_EQ(CL_INVALID_KERNEL, retVal); } TEST(ArgTypeTraits, GivenDefaultInitializedArgTypeMetadataThenAddressSpaceIsGlobal) { ArgTypeTraits metadata; EXPECT_EQ(NEO::KernelArgMetadata::AddrGlobal, metadata.addressQualifier); } TEST_F(KernelTests, givenKernelWithSimdGreaterThan1WhenKernelCreatedThenMaxWorgGroupSizeEqualDeviceProperty) { auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; std::unique_ptr pKernel(new MockKernel(pProgram, *pKernelInfo, *pClDevice)); auto kernelMaxWorkGroupSize = pDevice->getDeviceInfo().maxWorkGroupSize; EXPECT_EQ(pKernel->getMaxKernelWorkGroupSize(), kernelMaxWorkGroupSize); } TEST_F(KernelTests, givenKernelWithSimdEqual1WhenKernelCreatedThenMaxWorgGroupSizeExualMaxHwThreadsPerWG) { auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; std::unique_ptr pKernel(new MockKernel(pProgram, *pKernelInfo, *pClDevice)); auto deviceMaxWorkGroupSize = pDevice->getDeviceInfo().maxWorkGroupSize; auto deviceInfo = pClDevice->getDevice().getDeviceInfo(); auto &hwInfoConfig = *HwInfoConfig::get(pKernel->getHardwareInfo().platform.eProductFamily); auto maxThreadsPerWG = hwInfoConfig.getMaxThreadsForWorkgroupInDSSOrSS(pKernel->getHardwareInfo(), static_cast(deviceInfo.maxNumEUsPerSubSlice), static_cast(deviceInfo.maxNumEUsPerDualSubSlice)); EXPECT_LT(pKernel->getMaxKernelWorkGroupSize(), deviceMaxWorkGroupSize); EXPECT_EQ(pKernel->getMaxKernelWorkGroupSize(), maxThreadsPerWG); } struct KernelLargeGrfTests : Test { void SetUp() override { ClDeviceFixture::SetUp(); program = std::make_unique(toClDeviceVector(*pClDevice)); pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.crossThreadDataSize = 64; } void TearDown() override { ClDeviceFixture::TearDown(); } std::unique_ptr program; std::unique_ptr pKernelInfo; SPatchExecutionEnvironment executionEnvironment = {}; }; HWTEST_F(KernelLargeGrfTests, GivenLargeGrfAndSimdSizeWhenGettingMaxWorkGroupSizeThenCorrectValueReturned) { pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 16; pKernelInfo->kernelDescriptor.kernelAttributes.crossThreadDataSize = 4; pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.maxWorkGroupSize = 0; { MockKernel kernel(program.get(), *pKernelInfo, *pClDevice); pKernelInfo->kernelDescriptor.kernelAttributes.numGrfRequired = GrfConfig::LargeGrfNumber - 1; EXPECT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_EQ(pDevice->getDeviceInfo().maxWorkGroupSize, *kernel.maxWorkGroupSizeForCrossThreadData); EXPECT_EQ(pDevice->getDeviceInfo().maxWorkGroupSize, kernel.maxKernelWorkGroupSize); } { MockKernel kernel(program.get(), *pKernelInfo, *pClDevice); pKernelInfo->kernelDescriptor.kernelAttributes.numGrfRequired = GrfConfig::LargeGrfNumber; EXPECT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_EQ(pDevice->getDeviceInfo().maxWorkGroupSize >> 1, *kernel.maxWorkGroupSizeForCrossThreadData); EXPECT_EQ(pDevice->getDeviceInfo().maxWorkGroupSize >> 1, kernel.maxKernelWorkGroupSize); } { MockKernel kernel(program.get(), *pKernelInfo, *pClDevice); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; pKernelInfo->kernelDescriptor.kernelAttributes.numGrfRequired = GrfConfig::LargeGrfNumber; EXPECT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_EQ(pDevice->getDeviceInfo().maxWorkGroupSize, *kernel.maxWorkGroupSizeForCrossThreadData); EXPECT_EQ(pDevice->getDeviceInfo().maxWorkGroupSize, kernel.maxKernelWorkGroupSize); } } HWTEST2_F(KernelConstantSurfaceTest, givenKernelWithConstantSurfaceWhenKernelIsCreatedThenConstantMemorySurfaceStateIsPatchedWithMocs, IsAtLeastXeHpCore) { auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; pKernelInfo->setGlobalConstantsSurface(8, 0, 0); char buffer[MemoryConstants::pageSize64k]; GraphicsAllocation gfxAlloc(0, AllocationType::CONSTANT_SURFACE, buffer, MemoryConstants::pageSize64k, static_cast(8), MemoryPool::MemoryNull, MemoryManager::maxOsContextCount); MockContext context(pClDevice); MockProgram program(&context, false, toClDeviceVector(*pClDevice)); program.setConstantSurface(&gfxAlloc); // create kernel std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); // setup surface state heap char surfaceStateHeap[0x80]; pKernelInfo->heapInfo.SurfaceStateHeapSize = sizeof(surfaceStateHeap); pKernelInfo->heapInfo.pSsh = surfaceStateHeap; ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.bindful)); auto actualMocs = surfaceState->getMemoryObjectControlState(); const auto expectedMocs = context.getDevice(0)->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST); EXPECT_EQ(expectedMocs, actualMocs); program.setConstantSurface(nullptr); } using KernelImplicitArgsTest = Test; TEST_F(KernelImplicitArgsTest, WhenKernelRequiresImplicitArgsThenImplicitArgsStructIsCreatedAndProperlyInitialized) { auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; pKernelInfo->kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs = false; MockContext context(pClDevice); MockProgram program(&context, false, toClDeviceVector(*pClDevice)); { MockKernel kernel(&program, *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_EQ(nullptr, kernel.getImplicitArgs()); } pKernelInfo->kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs = true; { MockKernel kernel(&program, *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); auto pImplicitArgs = kernel.getImplicitArgs(); ASSERT_NE(nullptr, pImplicitArgs); ImplicitArgs expectedImplicitArgs = {sizeof(ImplicitArgs), 0, 0, 32}; EXPECT_EQ(0, memcmp(&expectedImplicitArgs, pImplicitArgs, sizeof(ImplicitArgs))); } } TEST_F(KernelImplicitArgsTest, givenKernelWithImplicitArgsWhenSettingKernelParamsThenImplicitArgsAreProperlySet) { auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; pKernelInfo->kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs = true; MockContext context(pClDevice); MockProgram program(&context, false, toClDeviceVector(*pClDevice)); MockKernel kernel(&program, *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); auto pImplicitArgs = kernel.getImplicitArgs(); ASSERT_NE(nullptr, pImplicitArgs); ImplicitArgs expectedImplicitArgs = {sizeof(ImplicitArgs)}; expectedImplicitArgs.numWorkDim = 3; expectedImplicitArgs.simdWidth = 32; expectedImplicitArgs.localSizeX = 4; expectedImplicitArgs.localSizeY = 5; expectedImplicitArgs.localSizeZ = 6; expectedImplicitArgs.globalSizeX = 7; expectedImplicitArgs.globalSizeY = 8; expectedImplicitArgs.globalSizeZ = 9; expectedImplicitArgs.globalOffsetX = 1; expectedImplicitArgs.globalOffsetY = 2; expectedImplicitArgs.globalOffsetZ = 3; expectedImplicitArgs.groupCountX = 3; expectedImplicitArgs.groupCountY = 2; expectedImplicitArgs.groupCountZ = 1; kernel.setWorkDim(3); kernel.setLocalWorkSizeValues(4, 5, 6); kernel.setGlobalWorkSizeValues(7, 8, 9); kernel.setGlobalWorkOffsetValues(1, 2, 3); kernel.setNumWorkGroupsValues(3, 2, 1); EXPECT_EQ(0, memcmp(&expectedImplicitArgs, pImplicitArgs, sizeof(ImplicitArgs))); } TEST_F(KernelImplicitArgsTest, givenKernelWithImplicitArgsWhenCloneKernelThenImplicitArgsAreCopied) { auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; pKernelInfo->kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs = true; MockContext context(pClDevice); MockProgram program(&context, false, toClDeviceVector(*pClDevice)); MockKernel kernel(&program, *pKernelInfo, *pClDevice); MockKernel kernel2(&program, *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); ASSERT_EQ(CL_SUCCESS, kernel2.initialize()); ImplicitArgs expectedImplicitArgs = {sizeof(ImplicitArgs)}; expectedImplicitArgs.numWorkDim = 3; expectedImplicitArgs.simdWidth = 32; expectedImplicitArgs.localSizeX = 4; expectedImplicitArgs.localSizeY = 5; expectedImplicitArgs.localSizeZ = 6; expectedImplicitArgs.globalSizeX = 7; expectedImplicitArgs.globalSizeY = 8; expectedImplicitArgs.globalSizeZ = 9; expectedImplicitArgs.globalOffsetX = 1; expectedImplicitArgs.globalOffsetY = 2; expectedImplicitArgs.globalOffsetZ = 3; expectedImplicitArgs.groupCountX = 3; expectedImplicitArgs.groupCountY = 2; expectedImplicitArgs.groupCountZ = 1; kernel.setWorkDim(3); kernel.setLocalWorkSizeValues(4, 5, 6); kernel.setGlobalWorkSizeValues(7, 8, 9); kernel.setGlobalWorkOffsetValues(1, 2, 3); kernel.setNumWorkGroupsValues(3, 2, 1); ASSERT_EQ(CL_SUCCESS, kernel2.cloneKernel(&kernel)); auto pImplicitArgs = kernel2.getImplicitArgs(); ASSERT_NE(nullptr, pImplicitArgs); EXPECT_EQ(0, memcmp(&expectedImplicitArgs, pImplicitArgs, sizeof(ImplicitArgs))); } TEST_F(KernelImplicitArgsTest, givenKernelWithoutImplicitArgsWhenSettingKernelParamsThenImplicitArgsAreNotSet) { auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; pKernelInfo->kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs = false; MockContext context(pClDevice); MockProgram program(&context, false, toClDeviceVector(*pClDevice)); MockKernel kernel(&program, *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_EQ(nullptr, kernel.getImplicitArgs()); kernel.setWorkDim(3); kernel.setLocalWorkSizeValues(4, 5, 6); kernel.setGlobalWorkSizeValues(7, 8, 9); kernel.setGlobalWorkOffsetValues(1, 2, 3); kernel.setNumWorkGroupsValues(3, 2, 1); EXPECT_EQ(nullptr, kernel.getImplicitArgs()); } compute-runtime-22.14.22890/opencl/test/unit_test/kernel/kernel_tests_pvc_and_later.cpp000066400000000000000000000015021422164147700311740ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" using namespace NEO; using KernelTestPvcAndLater = ::testing::Test; using isAtLeastPvc = IsAtLeastGfxCore; HWTEST2_F(KernelTestPvcAndLater, givenPolicyWhenSetKernelThreadArbitrationPolicyThenExpectedClValueIsReturned, isAtLeastPvc) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockKernelWithInternals kernel(*device); EXPECT_EQ(CL_SUCCESS, kernel.mockKernel->setKernelThreadArbitrationPolicy(CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_STALL_BASED_ROUND_ROBIN_INTEL)); } compute-runtime-22.14.22890/opencl/test/unit_test/kernel/kernel_transformable_tests.cpp000066400000000000000000000347441422164147700312500ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/program/kernel_info.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/sampler/sampler.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_sampler.h" #include using namespace NEO; class KernelTransformableTest : public ::testing::Test { public: void SetUp() override { context = std::make_unique(deviceFactory.rootDevices[rootDeviceIndex]); pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->heapInfo.SurfaceStateHeapSize = sizeof(surfaceStateHeap); pKernelInfo->addArgSampler(0, 0); pKernelInfo->addArgSampler(1, 0); pKernelInfo->addArgImage(2, firstImageOffset); pKernelInfo->addArgImage(3, secondImageOffset); pKernelInfo->kernelDescriptor.kernelAttributes.numArgsToPatch = 4; program = std::make_unique(context.get(), false, toClDeviceVector(*context->getDevice(0))); pKernel.reset(new MockKernel(program.get(), *pKernelInfo, *deviceFactory.rootDevices[rootDeviceIndex])); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->setKernelArgHandler(0, &Kernel::setArgSampler); pKernel->setKernelArgHandler(1, &Kernel::setArgSampler); pKernel->setKernelArgHandler(2, &Kernel::setArgImage); pKernel->setKernelArgHandler(3, &Kernel::setArgImage); } Sampler *createTransformableSampler() { return new MockSampler(nullptr, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST); } Sampler *createNonTransformableSampler() { return new MockSampler(nullptr, CL_TRUE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST); } const int firstImageOffset = 0x20; const int secondImageOffset = 0x40; cl_int retVal = CL_SUCCESS; UltClDeviceFactory deviceFactory{2, 0}; std::unique_ptr context; std::unique_ptr program; std::unique_ptr sampler; std::unique_ptr pKernelInfo; std::unique_ptr pKernel; std::unique_ptr image; SKernelBinaryHeaderCommon kernelHeader; char surfaceStateHeap[0x80]; const uint32_t rootDeviceIndex = 1; }; HWTEST_F(KernelTransformableTest, givenKernelThatCannotTranformImagesWithTwoTransformableImagesAndTwoTransformableSamplersWhenAllArgsAreSetThenImagesAreNotTransformed) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; image.reset(Image3dHelper<>::create(context.get())); sampler.reset(createTransformableSampler()); cl_mem clImage = image.get(); cl_sampler clSampler = sampler.get(); pKernelInfo->argAt(2).getExtendedTypeInfo().isTransformable = true; pKernelInfo->argAt(3).getExtendedTypeInfo().isTransformable = true; pKernel->canKernelTransformImages = false; pKernel->setArg(0, sizeof(clSampler), &clSampler); pKernel->setArg(1, sizeof(clSampler), &clSampler); pKernel->setArg(2, sizeof(clImage), &clImage); pKernel->setArg(3, sizeof(clImage), &clImage); auto ssh = pKernel->getSurfaceStateHeap(); auto firstSurfaceState = reinterpret_cast(ptrOffset(ssh, firstImageOffset)); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_3D, firstSurfaceState->getSurfaceType()); EXPECT_FALSE(firstSurfaceState->getSurfaceArray()); auto secondSurfaceState = reinterpret_cast(ptrOffset(ssh, secondImageOffset)); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_3D, secondSurfaceState->getSurfaceType()); EXPECT_FALSE(secondSurfaceState->getSurfaceArray()); } HWTEST_F(KernelTransformableTest, givenKernelWithTwoTransformableImagesAndTwoTransformableSamplersWhenAllArgsAreSetThenImagesAreTransformed) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; image.reset(Image3dHelper<>::create(context.get())); sampler.reset(createTransformableSampler()); cl_mem clImage = image.get(); cl_sampler clSampler = sampler.get(); pKernelInfo->argAt(2).getExtendedTypeInfo().isTransformable = true; pKernelInfo->argAt(3).getExtendedTypeInfo().isTransformable = true; pKernel->setArg(0, sizeof(clSampler), &clSampler); pKernel->setArg(1, sizeof(clSampler), &clSampler); pKernel->setArg(2, sizeof(clImage), &clImage); pKernel->setArg(3, sizeof(clImage), &clImage); auto ssh = pKernel->getSurfaceStateHeap(); auto firstSurfaceState = reinterpret_cast(ptrOffset(ssh, firstImageOffset)); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_2D, firstSurfaceState->getSurfaceType()); EXPECT_TRUE(firstSurfaceState->getSurfaceArray()); auto secondSurfaceState = reinterpret_cast(ptrOffset(ssh, secondImageOffset)); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_2D, secondSurfaceState->getSurfaceType()); EXPECT_TRUE(secondSurfaceState->getSurfaceArray()); } HWTEST_F(KernelTransformableTest, givenKernelWithTwoTransformableImagesAndTwoTransformableSamplersWhenAnyArgIsResetThenImagesAreTransformedAgain) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; image.reset(Image3dHelper<>::create(context.get())); sampler.reset(createTransformableSampler()); cl_mem clImage = image.get(); cl_sampler clSampler = sampler.get(); pKernelInfo->argAt(2).getExtendedTypeInfo().isTransformable = true; pKernelInfo->argAt(3).getExtendedTypeInfo().isTransformable = true; pKernel->setArg(0, sizeof(clSampler), &clSampler); pKernel->setArg(1, sizeof(clSampler), &clSampler); pKernel->setArg(2, sizeof(clImage), &clImage); pKernel->setArg(3, sizeof(clImage), &clImage); auto ssh = pKernel->getSurfaceStateHeap(); auto firstSurfaceState = reinterpret_cast(ptrOffset(ssh, firstImageOffset)); auto secondSurfaceState = reinterpret_cast(ptrOffset(ssh, secondImageOffset)); firstSurfaceState->setSurfaceType(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_NULL); secondSurfaceState->setSurfaceType(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_NULL); pKernelInfo->argAt(3).getExtendedTypeInfo().isTransformable = false; pKernel->setArg(3, sizeof(clImage), &clImage); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_2D, firstSurfaceState->getSurfaceType()); EXPECT_TRUE(firstSurfaceState->getSurfaceArray()); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_3D, secondSurfaceState->getSurfaceType()); EXPECT_FALSE(secondSurfaceState->getSurfaceArray()); } HWTEST_F(KernelTransformableTest, givenKernelWithOneTransformableImageAndTwoTransformableSamplersWhenAnyArgIsResetThenOnlyOneImageIsTransformed) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; image.reset(Image3dHelper<>::create(context.get())); sampler.reset(createTransformableSampler()); cl_mem clImage = image.get(); cl_sampler clSampler = sampler.get(); pKernelInfo->argAt(2).getExtendedTypeInfo().isTransformable = true; pKernelInfo->argAt(3).getExtendedTypeInfo().isTransformable = false; pKernel->setArg(0, sizeof(clSampler), &clSampler); pKernel->setArg(1, sizeof(clSampler), &clSampler); pKernel->setArg(2, sizeof(clImage), &clImage); pKernel->setArg(3, sizeof(clImage), &clImage); auto ssh = pKernel->getSurfaceStateHeap(); auto firstSurfaceState = reinterpret_cast(ptrOffset(ssh, firstImageOffset)); auto secondSurfaceState = reinterpret_cast(ptrOffset(ssh, secondImageOffset)); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_2D, firstSurfaceState->getSurfaceType()); EXPECT_TRUE(firstSurfaceState->getSurfaceArray()); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_3D, secondSurfaceState->getSurfaceType()); EXPECT_FALSE(secondSurfaceState->getSurfaceArray()); } HWTEST_F(KernelTransformableTest, givenKernelWithImages2dAndTwoTransformableSamplersWhenAnyArgIsResetThenImagesAreNotTransformed) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; image.reset(Image2dHelper<>::create(context.get())); sampler.reset(createTransformableSampler()); cl_mem clImage = image.get(); cl_sampler clSampler = sampler.get(); pKernelInfo->argAt(2).getExtendedTypeInfo().isTransformable = true; pKernelInfo->argAt(3).getExtendedTypeInfo().isTransformable = true; auto ssh = pKernel->getSurfaceStateHeap(); auto firstSurfaceState = reinterpret_cast(ptrOffset(ssh, firstImageOffset)); auto secondSurfaceState = reinterpret_cast(ptrOffset(ssh, secondImageOffset)); pKernel->setArg(0, sizeof(clSampler), &clSampler); pKernel->setArg(1, sizeof(clSampler), &clSampler); pKernel->setArg(2, sizeof(clImage), &clImage); pKernel->setArg(3, sizeof(clImage), &clImage); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_2D, firstSurfaceState->getSurfaceType()); EXPECT_FALSE(firstSurfaceState->getSurfaceArray()); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_2D, secondSurfaceState->getSurfaceType()); EXPECT_FALSE(secondSurfaceState->getSurfaceArray()); } HWTEST_F(KernelTransformableTest, givenKernelWithTwoTransformableImagesAndTwoTransformableSamplersWhenChangeSamplerToNontransformableThenImagesAreTransformedTo3d) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; image.reset(Image3dHelper<>::create(context.get())); sampler.reset(createTransformableSampler()); cl_mem clImage = image.get(); cl_sampler clSampler = sampler.get(); pKernelInfo->argAt(2).getExtendedTypeInfo().isTransformable = true; pKernelInfo->argAt(3).getExtendedTypeInfo().isTransformable = true; pKernel->setArg(0, sizeof(clSampler), &clSampler); pKernel->setArg(1, sizeof(clSampler), &clSampler); pKernel->setArg(2, sizeof(clImage), &clImage); pKernel->setArg(3, sizeof(clImage), &clImage); auto ssh = pKernel->getSurfaceStateHeap(); auto firstSurfaceState = reinterpret_cast(ptrOffset(ssh, firstImageOffset)); auto secondSurfaceState = reinterpret_cast(ptrOffset(ssh, secondImageOffset)); std::unique_ptr sampler2(createNonTransformableSampler()); cl_sampler clSampler2 = sampler2.get(); pKernel->setArg(1, sizeof(clSampler2), &clSampler2); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_3D, firstSurfaceState->getSurfaceType()); EXPECT_FALSE(firstSurfaceState->getSurfaceArray()); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_3D, secondSurfaceState->getSurfaceType()); EXPECT_FALSE(secondSurfaceState->getSurfaceArray()); pKernel.reset(); } HWTEST_F(KernelTransformableTest, givenKernelWithNonTransformableSamplersWhenResetSamplerWithNontransformableThenImagesNotChangedAgain) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; image.reset(Image3dHelper<>::create(context.get())); sampler.reset(createNonTransformableSampler()); cl_mem clImage = image.get(); cl_sampler clSampler = sampler.get(); pKernelInfo->argAt(2).getExtendedTypeInfo().isTransformable = true; pKernelInfo->argAt(3).getExtendedTypeInfo().isTransformable = true; pKernel->setArg(0, sizeof(clSampler), &clSampler); pKernel->setArg(1, sizeof(clSampler), &clSampler); pKernel->setArg(2, sizeof(clImage), &clImage); pKernel->setArg(3, sizeof(clImage), &clImage); auto ssh = pKernel->getSurfaceStateHeap(); auto firstSurfaceState = reinterpret_cast(ptrOffset(ssh, firstImageOffset)); auto secondSurfaceState = reinterpret_cast(ptrOffset(ssh, secondImageOffset)); firstSurfaceState->setSurfaceType(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_NULL); secondSurfaceState->setSurfaceType(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_NULL); pKernel->setArg(0, sizeof(clSampler), &clSampler); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_NULL, firstSurfaceState->getSurfaceType()); EXPECT_FALSE(firstSurfaceState->getSurfaceArray()); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_NULL, secondSurfaceState->getSurfaceType()); EXPECT_FALSE(secondSurfaceState->getSurfaceArray()); } HWTEST_F(KernelTransformableTest, givenKernelWithoutSamplersAndTransformableImagesWhenResolveKernelThenImagesAreTransformed) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; image.reset(Image3dHelper<>::create(context.get())); cl_mem clImage = image.get(); pKernelInfo->kernelDescriptor.payloadMappings.explicitArgs.clear(); pKernelInfo->addArgImage(0, 0); pKernelInfo->addArgImage(1, 0); pKernelInfo->addArgImage(2, firstImageOffset); pKernelInfo->argAt(2).getExtendedTypeInfo().isTransformable = true; pKernelInfo->addArgImage(3, secondImageOffset); pKernelInfo->argAt(3).getExtendedTypeInfo().isTransformable = true; pKernel->setKernelArgHandler(0, &Kernel::setArgImage); pKernel->setKernelArgHandler(1, &Kernel::setArgImage); pKernel->setArg(0, sizeof(clImage), &clImage); pKernel->setArg(1, sizeof(clImage), &clImage); pKernel->setArg(2, sizeof(clImage), &clImage); pKernel->setArg(3, sizeof(clImage), &clImage); auto ssh = pKernel->getSurfaceStateHeap(); auto firstSurfaceState = reinterpret_cast(ptrOffset(ssh, firstImageOffset)); auto secondSurfaceState = reinterpret_cast(ptrOffset(ssh, secondImageOffset)); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_2D, firstSurfaceState->getSurfaceType()); EXPECT_TRUE(firstSurfaceState->getSurfaceArray()); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_2D, secondSurfaceState->getSurfaceType()); EXPECT_TRUE(secondSurfaceState->getSurfaceArray()); } compute-runtime-22.14.22890/opencl/test/unit_test/kernel/substitute_kernel_heap_tests.cpp000066400000000000000000000144451422164147700316150ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/os_agnostic_memory_manager.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" using namespace NEO; typedef Test KernelSubstituteTest; TEST_F(KernelSubstituteTest, givenKernelWhenSubstituteKernelHeapWithGreaterSizeThenAllocatesNewKernelAllocation) { MockKernelWithInternals kernel(*pClDevice); const size_t initialHeapSize = 0x40; kernel.kernelInfo.heapInfo.KernelHeapSize = initialHeapSize; EXPECT_EQ(nullptr, kernel.kernelInfo.kernelAllocation); kernel.kernelInfo.createKernelAllocation(*pDevice, false); auto firstAllocation = kernel.kernelInfo.kernelAllocation; EXPECT_NE(nullptr, firstAllocation); auto firstAllocationSize = firstAllocation->getUnderlyingBufferSize(); size_t isaPadding = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getPaddingForISAAllocation(); EXPECT_EQ(firstAllocationSize, initialHeapSize + isaPadding); auto firstAllocationId = static_cast(firstAllocation)->id; const size_t newHeapSize = initialHeapSize + 1; char newHeap[newHeapSize]; kernel.mockKernel->substituteKernelHeap(newHeap, newHeapSize); auto secondAllocation = kernel.kernelInfo.kernelAllocation; EXPECT_NE(nullptr, secondAllocation); auto secondAllocationSize = secondAllocation->getUnderlyingBufferSize(); EXPECT_NE(secondAllocationSize, initialHeapSize + isaPadding); EXPECT_EQ(secondAllocationSize, newHeapSize + isaPadding); auto secondAllocationId = static_cast(secondAllocation)->id; EXPECT_NE(firstAllocationId, secondAllocationId); pDevice->getMemoryManager()->checkGpuUsageAndDestroyGraphicsAllocations(secondAllocation); } TEST_F(KernelSubstituteTest, givenKernelWhenSubstituteKernelHeapWithSameSizeThenDoesNotAllocateNewKernelAllocation) { MockKernelWithInternals kernel(*pClDevice); const size_t initialHeapSize = 0x40; kernel.kernelInfo.heapInfo.KernelHeapSize = initialHeapSize; EXPECT_EQ(nullptr, kernel.kernelInfo.kernelAllocation); kernel.kernelInfo.createKernelAllocation(*pDevice, false); auto firstAllocation = kernel.kernelInfo.kernelAllocation; EXPECT_NE(nullptr, firstAllocation); auto firstAllocationSize = firstAllocation->getUnderlyingBufferSize(); size_t isaPadding = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getPaddingForISAAllocation(); EXPECT_EQ(firstAllocationSize, initialHeapSize + isaPadding); auto firstAllocationId = static_cast(firstAllocation)->id; const size_t newHeapSize = initialHeapSize; char newHeap[newHeapSize]; kernel.mockKernel->substituteKernelHeap(newHeap, newHeapSize); auto secondAllocation = kernel.kernelInfo.kernelAllocation; EXPECT_NE(nullptr, secondAllocation); auto secondAllocationSize = secondAllocation->getUnderlyingBufferSize(); EXPECT_EQ(secondAllocationSize, initialHeapSize + isaPadding); auto secondAllocationId = static_cast(secondAllocation)->id; EXPECT_EQ(firstAllocationId, secondAllocationId); pDevice->getMemoryManager()->checkGpuUsageAndDestroyGraphicsAllocations(secondAllocation); } TEST_F(KernelSubstituteTest, givenKernelWhenSubstituteKernelHeapWithSmallerSizeThenDoesNotAllocateNewKernelAllocation) { MockKernelWithInternals kernel(*pClDevice); const size_t initialHeapSize = 0x40; kernel.kernelInfo.heapInfo.KernelHeapSize = initialHeapSize; EXPECT_EQ(nullptr, kernel.kernelInfo.kernelAllocation); kernel.kernelInfo.createKernelAllocation(*pDevice, false); auto firstAllocation = kernel.kernelInfo.kernelAllocation; EXPECT_NE(nullptr, firstAllocation); auto firstAllocationSize = firstAllocation->getUnderlyingBufferSize(); size_t isaPadding = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getPaddingForISAAllocation(); EXPECT_EQ(firstAllocationSize, initialHeapSize + isaPadding); auto firstAllocationId = static_cast(firstAllocation)->id; const size_t newHeapSize = initialHeapSize - 1; char newHeap[newHeapSize]; kernel.mockKernel->substituteKernelHeap(newHeap, newHeapSize); auto secondAllocation = kernel.kernelInfo.kernelAllocation; EXPECT_NE(nullptr, secondAllocation); auto secondAllocationSize = secondAllocation->getUnderlyingBufferSize(); EXPECT_EQ(secondAllocationSize, initialHeapSize + isaPadding); auto secondAllocationId = static_cast(secondAllocation)->id; EXPECT_EQ(firstAllocationId, secondAllocationId); pDevice->getMemoryManager()->checkGpuUsageAndDestroyGraphicsAllocations(secondAllocation); } TEST_F(KernelSubstituteTest, givenKernelWithUsedKernelAllocationWhenSubstituteKernelHeapAndAllocateNewMemoryThenStoreOldAllocationOnTemporaryList) { MockKernelWithInternals kernel(*pClDevice); auto memoryManager = pDevice->getMemoryManager(); auto &commandStreamReceiver = pDevice->getGpgpuCommandStreamReceiver(); const size_t initialHeapSize = 0x40; kernel.kernelInfo.heapInfo.KernelHeapSize = initialHeapSize; kernel.kernelInfo.createKernelAllocation(*pDevice, false); auto firstAllocation = kernel.kernelInfo.kernelAllocation; uint32_t notReadyTaskCount = *commandStreamReceiver.getTagAddress() + 1u; firstAllocation->updateTaskCount(notReadyTaskCount, commandStreamReceiver.getOsContext().getContextId()); const size_t newHeapSize = initialHeapSize + 1; char newHeap[newHeapSize]; EXPECT_TRUE(commandStreamReceiver.getTemporaryAllocations().peekIsEmpty()); kernel.mockKernel->substituteKernelHeap(newHeap, newHeapSize); auto secondAllocation = kernel.kernelInfo.kernelAllocation; EXPECT_FALSE(commandStreamReceiver.getTemporaryAllocations().peekIsEmpty()); EXPECT_EQ(commandStreamReceiver.getTemporaryAllocations().peekHead(), firstAllocation); memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(secondAllocation); commandStreamReceiver.getInternalAllocationStorage()->cleanAllocationList(notReadyTaskCount, TEMPORARY_ALLOCATION); } compute-runtime-22.14.22890/opencl/test/unit_test/libult/000077500000000000000000000000001422164147700231225ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/libult/CMakeLists.txt000066400000000000000000000077231422164147700256730ustar00rootroot00000000000000# # Copyright (C) 2018-2022 Intel Corporation # # SPDX-License-Identifier: MIT # macro(macro_for_each_core_type) foreach(BRANCH_DIR ${BRANCH_DIR_LIST}) if(EXISTS ${NEO_SOURCE_DIR}/opencl/source${BRANCH_DIR}${CORE_TYPE_LOWER}/enable_family_full_ocl_${CORE_TYPE_LOWER}.cpp) list(APPEND IGDRCL_SRCS_ENABLE_TESTED_HW ${NEO_SOURCE_DIR}/opencl/source${BRANCH_DIR}${CORE_TYPE_LOWER}/enable_family_full_ocl_${CORE_TYPE_LOWER}.cpp ) endif() endforeach() endmacro() apply_macro_for_each_core_type("TESTED") set(IGDRCL_SRCS_LIB_ULT ${NEO_SOURCE_DIR}/opencl/source/compiler_interface/default_cache_config.cpp ${NEO_SOURCE_DIR}/opencl/source/dll/debug_manager.cpp ${NEO_SOURCE_DIR}/opencl/source/helpers/api_specific_config_ocl.cpp ${NEO_SOURCE_DIR}/opencl/source/helpers/implicit_scaling_ocl.cpp ${NEO_SOURCE_DIR}/opencl/source/memory_manager/compression_selector_ocl.cpp ${NEO_SOURCE_DIR}/opencl/source/memory_manager/cpu_page_fault_manager_memory_sync.cpp ${NEO_SOURCE_DIR}/shared/source/helpers/allow_deferred_deleter.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/helpers/cl_execution_environment_helper.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/helpers/cl_execution_environment_helper.h ) add_library(igdrcl_libult OBJECT EXCLUDE_FROM_ALL ${IGDRCL_SRCS_LIB_ULT} ${IGDRCL_SRCS_ENABLE_TESTED_HW} ) set(IGDRCL_SRCS_LIB_ULT_ENV ${CMAKE_CURRENT_SOURCE_DIR}/command_queue_ult.cpp ${NEO_SOURCE_DIR}/shared/test/common/helpers/custom_event_listener.h ${NEO_SOURCE_DIR}/opencl/test/unit_test/main.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/command_queue/command_queue_fixture.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/command_queue/command_queue_fixture.h ${NEO_SOURCE_DIR}/opencl/test/unit_test/fixtures/built_in_fixture.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/fixtures/built_in_fixture.h ${NEO_SOURCE_DIR}/opencl/test/unit_test/fixtures/buffer_fixture.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/fixtures/buffer_fixture.h ${NEO_SOURCE_DIR}/opencl/test/unit_test/fixtures/cl_device_fixture.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/fixtures/cl_device_fixture.h ${NEO_SOURCE_DIR}/opencl/test/unit_test/fixtures/context_fixture.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/fixtures/context_fixture.h ${NEO_SOURCE_DIR}/opencl/test/unit_test/fixtures/program_fixture.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/fixtures/program_fixture.h ${NEO_SOURCE_DIR}/opencl/test/unit_test/indirect_heap/indirect_heap_fixture.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/indirect_heap/indirect_heap_fixture.h ${NEO_SOURCE_DIR}/opencl/test/unit_test/ult_config_listener.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/ult_config_listener.h ) add_library(igdrcl_libult_env OBJECT EXCLUDE_FROM_ALL ${IGDRCL_SRCS_LIB_ULT_ENV} ) target_include_directories(igdrcl_libult_env PRIVATE $ ) set_property(GLOBAL PROPERTY IGDRCL_SRCS_ENABLE_TESTED_HW ${IGDRCL_SRCS_ENABLE_TESTED_HW}) set_property(GLOBAL PROPERTY IGDRCL_SRCS_ENABLE_TESTED_HW_LINUX ${IGDRCL_SRCS_ENABLE_TESTED_HW_LINUX}) set_property(GLOBAL PROPERTY IGDRCL_SRCS_LIB_ULT ${IGDRCL_SRCS_LIB_ULT}) set_property(GLOBAL PROPERTY IGDRCL_SRCS_LIB_ULT_ENV ${IGDRCL_SRCS_LIB_ULT_ENV}) foreach(target_name igdrcl_libult igdrcl_libult_env) set_target_properties(${target_name} PROPERTIES POSITION_INDEPENDENT_CODE ON) set_target_properties(${target_name} PROPERTIES FOLDER ${OPENCL_TEST_PROJECTS_FOLDER}) set_property(TARGET ${target_name} APPEND_STRING PROPERTY COMPILE_FLAGS ${ASAN_FLAGS} ${TSAN_FLAGS}) target_include_directories(${target_name} PRIVATE $) target_compile_definitions(${target_name} PRIVATE $) create_project_source_tree(${target_name}) endforeach() add_subdirectories() compute-runtime-22.14.22890/opencl/test/unit_test/libult/command_queue_ult.cpp000066400000000000000000000012021422164147700273270ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/ult_hw_config.h" #include "opencl/source/command_queue/command_queue.h" namespace NEO { bool CommandQueue::isAssignEngineRoundRobinEnabled() { auto assignEngineRoundRobin = false; if (DebugManager.flags.EnableCmdQRoundRobindEngineAssign.get() != -1) { assignEngineRoundRobin = DebugManager.flags.EnableCmdQRoundRobindEngineAssign.get(); } return assignEngineRoundRobin; } bool CommandQueue::isTimestampWaitEnabled() { return ultHwConfig.useWaitForTimestamps; } } // namespace NEOcompute-runtime-22.14.22890/opencl/test/unit_test/linux/000077500000000000000000000000001422164147700227665ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/linux/CMakeLists.txt000066400000000000000000000046041422164147700255320ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_linux_dll_tests ) set(target_name linux_dll_tests) add_executable(igdrcl_${target_name} $ $ $ $ $ $ ${CMAKE_CURRENT_SOURCE_DIR}/drm_null_device_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/drm_wrap.h ${CMAKE_CURRENT_SOURCE_DIR}/main_linux_dll.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_os_layer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_os_layer.h ${CMAKE_CURRENT_SOURCE_DIR}/os_interface_linux_tests.cpp ${NEO_SHARED_DIRECTORY}/os_interface/linux/sys_calls_linux.cpp ${NEO_SHARED_DIRECTORY}/dll/create_memory_manager_drm.cpp ${NEO_SHARED_DIRECTORY}/dll/direct_submission_controller_enabled.cpp ${NEO_SHARED_DIRECTORY}/dll/linux/drm_neo_create.cpp ${NEO_SHARED_DIRECTORY}/dll/linux/options_linux.cpp ${NEO_SHARED_DIRECTORY}/dll/linux/os_interface.cpp ${NEO_SOURCE_DIR}/opencl/source/dll/command_queue_dll.cpp ${NEO_SOURCE_DIR}/opencl/source/os_interface/linux/platform_teardown_linux.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/linux${BRANCH_DIR_SUFFIX}drm_other_requests.cpp ) if(NEO__LIBVA_FOUND) target_sources(igdrcl_${target_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/va_tests.cpp) endif() target_link_libraries(igdrcl_${target_name} ${NEO_MOCKABLE_LIB_NAME} ${NEO_SHARED_MOCKABLE_LIB_NAME} igdrcl_mocks ${NEO_EXTRA_LIBS} gmock-gtest) set_property(TARGET igdrcl_${target_name} APPEND_STRING PROPERTY COMPILE_FLAGS ${ASAN_FLAGS}) target_include_directories(igdrcl_${target_name} PRIVATE ${NEO_SHARED_TEST_DIRECTORY}/common/test_macros/header${BRANCH_DIR_SUFFIX} ${NEO_SHARED_DIRECTORY}/dll/linux/devices${BRANCH_DIR_SUFFIX} ${NEO_SOURCE_DIR}/opencl/test/unit_test/gen_common${BRANCH_DIR_SUFFIX} ${NEO_SHARED_TEST_DIRECTORY}/common/test_configuration/unit_tests ) add_dependencies(unit_tests igdrcl_${target_name}) add_subdirectories() compute-runtime-22.14.22890/opencl/test/unit_test/linux/drm_null_device_tests.cpp000066400000000000000000000052621422164147700300540ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/helpers/register_offsets.h" #include "shared/source/os_interface/linux/drm_null_device.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/linux/drm_wrap.h" #include "opencl/test/unit_test/linux/mock_os_layer.h" #include using namespace NEO; extern const DeviceDescriptor NEO::deviceDescriptorTable[]; class DrmNullDeviceTestsFixture { public: void SetUp() { if (deviceDescriptorTable[0].deviceId == 0) { GTEST_SKIP(); } // Create nullDevice drm DebugManager.flags.EnableNullHardware.set(true); executionEnvironment.prepareRootDeviceEnvironments(1); drmNullDevice = DrmWrap::createDrm(*executionEnvironment.rootDeviceEnvironments[0]); ASSERT_NE(drmNullDevice, nullptr); } void TearDown() { } std::unique_ptr drmNullDevice; ExecutionEnvironment executionEnvironment; protected: DebugManagerStateRestore dbgRestorer; }; typedef Test DrmNullDeviceTests; TEST_F(DrmNullDeviceTests, GIVENdrmNullDeviceWHENcallGetDeviceIdTHENreturnProperDeviceId) { int deviceIdQueried = 0; int ret = drmNullDevice->getDeviceID(deviceIdQueried); EXPECT_EQ(0, ret); EXPECT_EQ(deviceId, deviceIdQueried); } TEST_F(DrmNullDeviceTests, GIVENdrmNullDeviceWHENcallIoctlTHENalwaysSuccess) { EXPECT_EQ(drmNullDevice->ioctl(0, nullptr), 0); } TEST_F(DrmNullDeviceTests, GIVENdrmNullDeviceWHENregReadOtherThenTimestampReadTHENalwaysSuccess) { struct drm_i915_reg_read arg; arg.offset = 0; ASSERT_EQ(drmNullDevice->ioctl(DRM_IOCTL_I915_REG_READ, &arg), 0); } TEST_F(DrmNullDeviceTests, GIVENdrmNullDeviceWHENgetGpuTimestamp32bOr64bTHENerror) { struct drm_i915_reg_read arg; arg.offset = REG_GLOBAL_TIMESTAMP_LDW; ASSERT_EQ(drmNullDevice->ioctl(DRM_IOCTL_I915_REG_READ, &arg), -1); arg.offset = REG_GLOBAL_TIMESTAMP_UN; ASSERT_EQ(drmNullDevice->ioctl(DRM_IOCTL_I915_REG_READ, &arg), -1); } TEST_F(DrmNullDeviceTests, GIVENdrmNullDeviceWHENgetGpuTimestamp36bTHENproperValues) { struct drm_i915_reg_read arg; arg.offset = REG_GLOBAL_TIMESTAMP_LDW | 1; ASSERT_EQ(drmNullDevice->ioctl(DRM_IOCTL_I915_REG_READ, &arg), 0); EXPECT_EQ(arg.val, 1000ULL); ASSERT_EQ(drmNullDevice->ioctl(DRM_IOCTL_I915_REG_READ, &arg), 0); EXPECT_EQ(arg.val, 2000ULL); ASSERT_EQ(drmNullDevice->ioctl(DRM_IOCTL_I915_REG_READ, &arg), 0); EXPECT_EQ(arg.val, 3000ULL); } compute-runtime-22.14.22890/opencl/test/unit_test/linux/drm_other_requests.cpp000066400000000000000000000002631422164147700274110ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include int drmOtherRequests(unsigned long int request, ...) { return 0; } compute-runtime-22.14.22890/opencl/test/unit_test/linux/drm_wrap.h000066400000000000000000000015661422164147700247620ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/os_interface/linux/drm_neo.h" #include "shared/source/os_interface/linux/hw_device_id.h" #include "shared/source/os_interface/os_interface.h" #include "drm/i915_drm.h" class DrmWrap : public NEO::Drm { public: using Drm::virtualMemoryIds; static std::unique_ptr createDrm(RootDeviceEnvironment &rootDeviceEnvironment) { auto hwDeviceIds = OSInterface::discoverDevices(rootDeviceEnvironment.executionEnvironment); if (!hwDeviceIds.empty()) { return std::unique_ptr{NEO::Drm::create(std::unique_ptr(hwDeviceIds[0].release()->as()), rootDeviceEnvironment)}; } return nullptr; } }; compute-runtime-22.14.22890/opencl/test/unit_test/linux/main_linux_dll.cpp000066400000000000000000001113031422164147700264670ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/direct_submission/direct_submission_controller.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/driver_info.h" #include "shared/source/os_interface/linux/allocator_helper.h" #include "shared/source/os_interface/linux/sys_calls.h" #include "shared/source/os_interface/os_interface.h" #include "shared/test/common/helpers/custom_event_listener.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/default_hw_info.inl" #include "shared/test/common/helpers/ult_hw_config.inl" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/libult/signal_utils.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/os_interface/linux/device_command_stream_fixture.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/linux/drm_wrap.h" #include "opencl/test/unit_test/linux/mock_os_layer.h" #include "gtest/gtest.h" #include "os_inc.h" #include #include namespace Os { extern const char *dxcoreDllName; } namespace NEO { void __attribute__((destructor)) platformsDestructor(); extern const DeviceDescriptor deviceDescriptorTable[]; } // namespace NEO NEO::OsLibrary *setAdapterInfo(const PLATFORM *platform, const GT_SYSTEM_INFO *gtSystemInfo, uint64_t gpuAddressSpace) { return nullptr; } using namespace NEO; class DrmTestsFixture { public: void SetUp() { if (deviceDescriptorTable[0].deviceId == 0) { GTEST_SKIP(); } executionEnvironment.prepareRootDeviceEnvironments(1); rootDeviceEnvironment = executionEnvironment.rootDeviceEnvironments[0].get(); } void TearDown() { } ExecutionEnvironment executionEnvironment; RootDeviceEnvironment *rootDeviceEnvironment = nullptr; }; typedef Test DrmTests; void initializeTestedDevice() { for (uint32_t i = 0; deviceDescriptorTable[i].deviceId != 0; i++) { if (defaultHwInfo->platform.eProductFamily == deviceDescriptorTable[i].pHwInfo->platform.eProductFamily) { deviceId = deviceDescriptorTable[i].deviceId; break; } } } int openRetVal = 0; std::string lastOpenedPath; int testOpen(const char *fullPath, int, ...) { return openRetVal; }; int openCounter = 1; int openWithCounter(const char *fullPath, int, ...) { if (openCounter > 0) { if (fullPath) { lastOpenedPath = fullPath; } openCounter--; return 1023; // valid file descriptor for ULT } return -1; }; struct DrmSimpleTests : public ::testing::Test { void SetUp() override { if (deviceDescriptorTable[0].deviceId == 0) { GTEST_SKIP(); } } }; TEST_F(DrmSimpleTests, GivenTwoOpenableDevicesWhenDiscoverDevicesThenCreateTwoHwDeviceIds) { VariableBackup backupOpenFull(&openFull); openFull = openWithCounter; openCounter = 2; ExecutionEnvironment executionEnvironment; auto hwDeviceIds = OSInterface::discoverDevices(executionEnvironment); EXPECT_EQ(2u, hwDeviceIds.size()); } TEST_F(DrmSimpleTests, GivenSelectedNotExistingDeviceUsingForceDeviceIdFlagWhenGetDeviceFdThenFail) { DebugManagerStateRestore stateRestore; DebugManager.flags.ForceDeviceId.set("invalid"); openFull = nullptr; // open shouldn't be called ExecutionEnvironment executionEnvironment; auto hwDeviceIds = OSInterface::discoverDevices(executionEnvironment); EXPECT_TRUE(hwDeviceIds.empty()); } TEST_F(DrmSimpleTests, GivenSelectedExistingDeviceUsingForceDeviceIdFlagWhenGetDeviceFdThenReturnFd) { DebugManagerStateRestore stateRestore; DebugManager.flags.ForceDeviceId.set("0000:00:02.0"); VariableBackup backupOpenFull(&openFull); openFull = openWithCounter; openCounter = 10; ExecutionEnvironment executionEnvironment; auto hwDeviceIds = OSInterface::discoverDevices(executionEnvironment); EXPECT_EQ(1u, hwDeviceIds.size()); EXPECT_NE(nullptr, hwDeviceIds[0].get()); EXPECT_STREQ("/dev/dri/by-path/platform-4010000000.pcie-pci-0000:00:02.0-render", lastOpenedPath.c_str()); EXPECT_EQ(9, openCounter); // only one opened file } TEST_F(DrmSimpleTests, GivenSelectedNotExistingDeviceUsingFilterBdfWhenGetDeviceFdThenFail) { DebugManagerStateRestore stateRestore; DebugManager.flags.FilterBdfPath.set("invalid"); VariableBackup backupOpenFull(&openFull); openFull = nullptr; // open shouldn't be called ExecutionEnvironment executionEnvironment; auto hwDeviceIds = OSInterface::discoverDevices(executionEnvironment); EXPECT_TRUE(hwDeviceIds.empty()); } TEST_F(DrmSimpleTests, GivenSelectedExistingDeviceUsingFilterBdfWhenGetDeviceFdThenReturnFd) { DebugManagerStateRestore stateRestore; DebugManager.flags.FilterBdfPath.set("0000:00:02.0"); VariableBackup backupOpenFull(&openFull); openFull = openWithCounter; openCounter = 10; ExecutionEnvironment executionEnvironment; auto hwDeviceIds = OSInterface::discoverDevices(executionEnvironment); EXPECT_EQ(1u, hwDeviceIds.size()); EXPECT_NE(nullptr, hwDeviceIds[0].get()); EXPECT_STREQ("/dev/dri/by-path/platform-4010000000.pcie-pci-0000:00:02.0-render", lastOpenedPath.c_str()); EXPECT_EQ(9, openCounter); // only one opened file } TEST_F(DrmSimpleTests, GivenSelectedExistingDeviceWhenOpenDirSuccedsThenHwDeviceIdsHaveProperPciPaths) { VariableBackup backupOpenFull(&openFull); VariableBackup backupOpenDir(&failOnOpenDir, false); VariableBackup backupEntryIndex(&entryIndex, 0u); openFull = openWithCounter; ExecutionEnvironment executionEnvironment; entryIndex = 0; openCounter = 1; auto hwDeviceIds = OSInterface::discoverDevices(executionEnvironment); EXPECT_EQ(1u, hwDeviceIds.size()); EXPECT_NE(nullptr, hwDeviceIds[0].get()); EXPECT_STREQ("0000:00:03.1", hwDeviceIds[0]->as()->getPciPath()); entryIndex = 0; openCounter = 2; hwDeviceIds = OSInterface::discoverDevices(executionEnvironment); EXPECT_EQ(2u, hwDeviceIds.size()); EXPECT_NE(nullptr, hwDeviceIds[0].get()); EXPECT_STREQ("0000:00:03.1", hwDeviceIds[0]->as()->getPciPath()); EXPECT_NE(nullptr, hwDeviceIds[1].get()); EXPECT_STREQ("0000:00:02.0", hwDeviceIds[1]->as()->getPciPath()); } TEST_F(DrmSimpleTests, GivenSelectedExistingDeviceWhenOpenDirFailsThenRetryOpeningRenderDevices) { VariableBackup backupOpenFull(&openFull); VariableBackup backupOpenDir(&failOnOpenDir, true); VariableBackup backupReadlink(&readLinkCalledTimes, 0); openFull = openWithCounter; openCounter = 1; ExecutionEnvironment executionEnvironment; auto hwDeviceIds = OSInterface::discoverDevices(executionEnvironment); EXPECT_STREQ("/dev/dri/renderD128", lastOpenedPath.c_str()); EXPECT_EQ(1u, hwDeviceIds.size()); EXPECT_NE(nullptr, hwDeviceIds[0].get()); EXPECT_STREQ("0000:00:02.0", hwDeviceIds[0]->as()->getPciPath()); openCounter = 2; hwDeviceIds = OSInterface::discoverDevices(executionEnvironment); EXPECT_STREQ("/dev/dri/renderD129", lastOpenedPath.c_str()); EXPECT_EQ(2u, hwDeviceIds.size()); EXPECT_NE(nullptr, hwDeviceIds[0].get()); EXPECT_STREQ("0000:00:02.0", hwDeviceIds[0]->as()->getPciPath()); EXPECT_NE(nullptr, hwDeviceIds[1].get()); EXPECT_STREQ("0000:00:03.0", hwDeviceIds[1]->as()->getPciPath()); } TEST_F(DrmSimpleTests, givenPrintIoctlEntriesWhenCallIoctlThenIoctlIsPrinted) { ::testing::internal::CaptureStdout(); auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); auto drm = DrmWrap::createDrm(*executionEnvironment->rootDeviceEnvironments[0]); DebugManagerStateRestore restorer; DebugManager.flags.PrintIoctlEntries.set(true); uint32_t contextId = 1u; drm->destroyDrmContext(contextId); std::string output = ::testing::internal::GetCapturedStdout(); EXPECT_STREQ(output.c_str(), "IOCTL DRM_IOCTL_I915_GEM_CONTEXT_DESTROY called\nIOCTL DRM_IOCTL_I915_GEM_CONTEXT_DESTROY returns 0\n"); } struct DrmFailedIoctlTests : public ::testing::Test { void SetUp() override { if (deviceDescriptorTable[0].deviceId == 0) { GTEST_SKIP(); } } }; TEST_F(DrmFailedIoctlTests, givenPrintIoctlEntriesWhenCallFailedIoctlThenExpectedIoctlIsPrinted) { ::testing::internal::CaptureStdout(); auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); auto drm = DrmWrap::createDrm(*executionEnvironment->rootDeviceEnvironments[0]); DebugManagerStateRestore restorer; DebugManager.flags.PrintIoctlEntries.set(true); uint32_t contextId = 1u; uint32_t vmId = 100u; drm->queryVmId(contextId, vmId); std::string output = ::testing::internal::GetCapturedStdout(); EXPECT_STREQ(output.c_str(), "IOCTL DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM called\nIOCTL DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM returns -1, errno 9(Bad file descriptor)\n"); } TEST_F(DrmSimpleTests, givenPrintIoctlTimesWhenCallIoctlThenStatisticsAreGathered) { struct DrmMock : public Drm { using Drm::ioctlStatistics; }; constexpr long long initialMin = std::numeric_limits::max(); constexpr long long initialMax = 0; auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); auto drm = static_cast(DrmWrap::createDrm(*executionEnvironment->rootDeviceEnvironments[0]).release()); DebugManagerStateRestore restorer; DebugManager.flags.PrintIoctlTimes.set(true); EXPECT_TRUE(drm->ioctlStatistics.empty()); int euTotal = 0u; uint32_t contextId = 1u; drm->getEuTotal(euTotal); EXPECT_EQ(1u, drm->ioctlStatistics.size()); drm->getEuTotal(euTotal); EXPECT_EQ(1u, drm->ioctlStatistics.size()); drm->setLowPriorityContextParam(contextId); EXPECT_EQ(2u, drm->ioctlStatistics.size()); auto euTotalData = drm->ioctlStatistics.find(DRM_IOCTL_I915_GETPARAM); ASSERT_TRUE(euTotalData != drm->ioctlStatistics.end()); EXPECT_EQ(static_cast(DRM_IOCTL_I915_GETPARAM), euTotalData->first); EXPECT_EQ(2u, euTotalData->second.count); EXPECT_NE(0, euTotalData->second.totalTime); EXPECT_NE(initialMin, euTotalData->second.minTime); EXPECT_NE(initialMax, euTotalData->second.minTime); EXPECT_NE(initialMin, euTotalData->second.maxTime); EXPECT_NE(initialMax, euTotalData->second.maxTime); auto firstTime = euTotalData->second.totalTime; auto lowPriorityData = drm->ioctlStatistics.find(DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM); ASSERT_TRUE(lowPriorityData != drm->ioctlStatistics.end()); EXPECT_EQ(static_cast(DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM), lowPriorityData->first); EXPECT_EQ(1u, lowPriorityData->second.count); EXPECT_NE(0, lowPriorityData->second.totalTime); EXPECT_NE(initialMin, lowPriorityData->second.minTime); EXPECT_NE(initialMax, lowPriorityData->second.minTime); EXPECT_NE(initialMin, lowPriorityData->second.maxTime); EXPECT_NE(initialMax, lowPriorityData->second.maxTime); drm->getEuTotal(euTotal); EXPECT_EQ(drm->ioctlStatistics.size(), 2u); euTotalData = drm->ioctlStatistics.find(DRM_IOCTL_I915_GETPARAM); ASSERT_TRUE(euTotalData != drm->ioctlStatistics.end()); EXPECT_EQ(static_cast(DRM_IOCTL_I915_GETPARAM), euTotalData->first); EXPECT_EQ(3u, euTotalData->second.count); EXPECT_NE(0u, euTotalData->second.totalTime); auto secondTime = euTotalData->second.totalTime; EXPECT_GT(secondTime, firstTime); lowPriorityData = drm->ioctlStatistics.find(DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM); ASSERT_TRUE(lowPriorityData != drm->ioctlStatistics.end()); EXPECT_EQ(static_cast(DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM), lowPriorityData->first); EXPECT_EQ(1u, lowPriorityData->second.count); EXPECT_NE(0, lowPriorityData->second.totalTime); drm->destroyDrmContext(contextId); EXPECT_EQ(3u, drm->ioctlStatistics.size()); euTotalData = drm->ioctlStatistics.find(DRM_IOCTL_I915_GETPARAM); ASSERT_TRUE(euTotalData != drm->ioctlStatistics.end()); EXPECT_EQ(static_cast(DRM_IOCTL_I915_GETPARAM), euTotalData->first); EXPECT_EQ(3u, euTotalData->second.count); EXPECT_NE(0, euTotalData->second.totalTime); lowPriorityData = drm->ioctlStatistics.find(DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM); ASSERT_TRUE(lowPriorityData != drm->ioctlStatistics.end()); EXPECT_EQ(static_cast(DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM), lowPriorityData->first); EXPECT_EQ(1u, lowPriorityData->second.count); EXPECT_NE(0, lowPriorityData->second.totalTime); auto destroyData = drm->ioctlStatistics.find(DRM_IOCTL_I915_GEM_CONTEXT_DESTROY); ASSERT_TRUE(destroyData != drm->ioctlStatistics.end()); EXPECT_EQ(static_cast(DRM_IOCTL_I915_GEM_CONTEXT_DESTROY), destroyData->first); EXPECT_EQ(1u, destroyData->second.count); EXPECT_NE(0, destroyData->second.totalTime); ::testing::internal::CaptureStdout(); delete drm; std::string output = ::testing::internal::GetCapturedStdout(); EXPECT_STRNE("", output.c_str()); std::string_view requestString("Request"); std::string_view totalTimeString("Total time(ns)"); std::string_view countString("Count"); std::string_view avgTimeString("Avg time per ioctl"); std::string_view minString("Min"); std::string_view maxString("Max"); std::size_t position = output.find(requestString); EXPECT_NE(std::string::npos, position); position += requestString.size(); position = output.find(totalTimeString, position); EXPECT_NE(std::string::npos, position); position += totalTimeString.size(); position = output.find(countString, position); EXPECT_NE(std::string::npos, position); position += countString.size(); position = output.find(avgTimeString, position); EXPECT_NE(std::string::npos, position); position += avgTimeString.size(); position = output.find(minString, position); EXPECT_NE(std::string::npos, position); position += minString.size(); position = output.find(maxString, position); EXPECT_NE(std::string::npos, position); } TEST_F(DrmSimpleTests, GivenSelectedNonExistingDeviceWhenOpenDirFailsThenRetryOpeningRenderDevicesAndNoDevicesAreCreated) { VariableBackup backupOpenFull(&openFull); VariableBackup backupOpenDir(&failOnOpenDir, true); openFull = openWithCounter; openCounter = 0; ExecutionEnvironment executionEnvironment; auto hwDeviceIds = OSInterface::discoverDevices(executionEnvironment); EXPECT_EQ(0u, hwDeviceIds.size()); } TEST_F(DrmSimpleTests, GivenFailingOpenDirAndMultipleAvailableDevicesWhenCreateMultipleRootDevicesFlagIsSetThenTheFlagIsRespected) { DebugManagerStateRestore stateRestore; VariableBackup backupOpenFull(&openFull); VariableBackup backupOpenDir(&failOnOpenDir, true); VariableBackup backupReadlink(&readLinkCalledTimes, 0); openFull = openWithCounter; ExecutionEnvironment executionEnvironment; const uint32_t requestedNumRootDevices = 2u; DebugManager.flags.CreateMultipleRootDevices.set(requestedNumRootDevices); openCounter = 4; auto hwDeviceIds = OSInterface::discoverDevices(executionEnvironment); EXPECT_STREQ("/dev/dri/renderD129", lastOpenedPath.c_str()); EXPECT_EQ(requestedNumRootDevices, hwDeviceIds.size()); EXPECT_NE(nullptr, hwDeviceIds[0].get()); EXPECT_STREQ("0000:00:02.0", hwDeviceIds[0]->as()->getPciPath()); EXPECT_NE(nullptr, hwDeviceIds[1].get()); EXPECT_STREQ("0000:00:03.0", hwDeviceIds[1]->as()->getPciPath()); } TEST_F(DrmSimpleTests, GivenMultipleAvailableDevicesWhenCreateMultipleRootDevicesFlagIsSetThenTheFlagIsRespected) { DebugManagerStateRestore stateRestore; VariableBackup backupOpenFull(&openFull); openFull = openWithCounter; ExecutionEnvironment executionEnvironment; const uint32_t requestedNumRootDevices = 2u; DebugManager.flags.CreateMultipleRootDevices.set(requestedNumRootDevices); openCounter = 4; auto hwDeviceIds = OSInterface::discoverDevices(executionEnvironment); EXPECT_STREQ("/dev/dri/by-path/platform-4010000000.pcie-pci-0000:00:02.0-render", lastOpenedPath.c_str()); EXPECT_EQ(requestedNumRootDevices, hwDeviceIds.size()); EXPECT_NE(nullptr, hwDeviceIds[0].get()); EXPECT_STREQ("0000:00:03.1", hwDeviceIds[0]->as()->getPciPath()); EXPECT_NE(nullptr, hwDeviceIds[1].get()); EXPECT_STREQ("0000:00:02.0", hwDeviceIds[1]->as()->getPciPath()); } TEST_F(DrmTests, GivenSelectedIncorectDeviceByDeviceIdWhenGetDeviceFdThenFail) { DebugManagerStateRestore stateRestore; DebugManager.flags.FilterDeviceId.set("invalid"); auto drm1 = DrmWrap::createDrm(*rootDeviceEnvironment); EXPECT_EQ(drm1, nullptr); } TEST_F(DrmTests, GivenSelectedCorrectDeviceByDeviceIdWhenGetDeviceFdThenSucceed) { DebugManagerStateRestore stateRestore; std::stringstream deviceIdStr; deviceIdStr << std::hex << deviceId; DebugManager.flags.FilterDeviceId.set(deviceIdStr.str()); auto drm1 = DrmWrap::createDrm(*rootDeviceEnvironment); EXPECT_NE(drm1, nullptr); } TEST_F(DrmSimpleTests, givenUseVmBindFlagWhenOverrideBindSupportThenReturnProperValue) { DebugManagerStateRestore dbgRestorer; bool useVmBind = false; DebugManager.flags.UseVmBind.set(1); Drm::overrideBindSupport(useVmBind); EXPECT_TRUE(useVmBind); DebugManager.flags.UseVmBind.set(0); Drm::overrideBindSupport(useVmBind); EXPECT_FALSE(useVmBind); DebugManager.flags.UseVmBind.set(-1); Drm::overrideBindSupport(useVmBind); EXPECT_FALSE(useVmBind); } TEST_F(DrmTests, GivenErrorCodeWhenCreatingDrmThenDrmCreatedOnlyWithSpecificErrors) { auto drm = DrmWrap::createDrm(*rootDeviceEnvironment); EXPECT_NE(drm, nullptr); drm_i915_getparam_t getParam; int lDeviceId; VariableBackup backupIoctlCnt(&ioctlCnt); VariableBackup backupIoctlSeq(&ioctlSeq[0]); ioctlCnt = 0; ioctlSeq[0] = -1; errno = EINTR; // check if device works, although there was EINTR error from KMD getParam.param = I915_PARAM_CHIPSET_ID; getParam.value = &lDeviceId; auto ret = drm->ioctl(DRM_IOCTL_I915_GETPARAM, &getParam); EXPECT_EQ(0, ret); EXPECT_EQ(deviceId, lDeviceId); ioctlCnt = 0; ioctlSeq[0] = -1; errno = EAGAIN; // check if device works, although there was EAGAIN error from KMD getParam.param = I915_PARAM_CHIPSET_ID; getParam.value = &lDeviceId; ret = drm->ioctl(DRM_IOCTL_I915_GETPARAM, &getParam); EXPECT_EQ(0, ret); EXPECT_EQ(deviceId, lDeviceId); ioctlCnt = 0; ioctlSeq[0] = -1; errno = EBUSY; // check if device works, although there was EBUSY error from KMD getParam.param = I915_PARAM_CHIPSET_ID; getParam.value = &lDeviceId; ret = drm->ioctl(DRM_IOCTL_I915_GETPARAM, &getParam); EXPECT_EQ(0, ret); EXPECT_EQ(deviceId, lDeviceId); ioctlCnt = 0; ioctlSeq[0] = -1; errno = 0; // we failed with any other error code getParam.param = I915_PARAM_CHIPSET_ID; getParam.value = &lDeviceId; ret = drm->ioctl(DRM_IOCTL_I915_GETPARAM, &getParam); EXPECT_EQ(-1, ret); EXPECT_EQ(deviceId, lDeviceId); } TEST_F(DrmTests, WhenCreatingTwiceThenDifferentDrmReturned) { auto drm1 = DrmWrap::createDrm(*rootDeviceEnvironment); EXPECT_NE(drm1, nullptr); auto drm2 = DrmWrap::createDrm(*rootDeviceEnvironment); EXPECT_NE(drm2, nullptr); EXPECT_NE(drm1, drm2); } TEST_F(DrmTests, WhenDriDeviceFoundThenDrmCreatedOnFallback) { VariableBackup backupHaveDri(&haveDri); haveDri = 1; auto drm = DrmWrap::createDrm(*rootDeviceEnvironment); EXPECT_NE(drm, nullptr); } TEST_F(DrmTests, GivenNoDeviceWhenCreatingDrmThenNullIsReturned) { VariableBackup backupHaveDri(&haveDri); haveDri = -1; auto drm = DrmWrap::createDrm(*rootDeviceEnvironment); EXPECT_EQ(drm, nullptr); } TEST_F(DrmTests, GivenUnknownDeviceWhenCreatingDrmThenNullIsReturned) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.PrintDebugMessages.set(true); VariableBackup backupDeviceId(&deviceId); deviceId = -1; ::testing::internal::CaptureStderr(); ::testing::internal::CaptureStdout(); auto drm = DrmWrap::createDrm(*rootDeviceEnvironment); EXPECT_EQ(drm, nullptr); std::string errStr = ::testing::internal::GetCapturedStderr(); EXPECT_TRUE(hasSubstr(errStr, std::string("FATAL: Unknown device: deviceId: ffffffff, revisionId: 0000"))); ::testing::internal::GetCapturedStdout(); } TEST_F(DrmTests, GivenNoSoftPinWhenCreatingDrmThenNullIsReturned) { VariableBackup backupHaveSoftPin(&haveSoftPin); haveSoftPin = 0; auto drm = DrmWrap::createDrm(*rootDeviceEnvironment); EXPECT_EQ(drm, nullptr); } TEST_F(DrmTests, WhenCantFindDeviceIdThenDrmIsNotCreated) { VariableBackup backupFailOnDeviceId(&failOnDeviceId); failOnDeviceId = -1; auto drm = DrmWrap::createDrm(*rootDeviceEnvironment); EXPECT_EQ(drm, nullptr); } TEST_F(DrmTests, WhenCantQueryEuCountThenDrmIsNotCreated) { VariableBackup backupfailOnEuTotal(&failOnEuTotal); failOnEuTotal = -1; auto drm = DrmWrap::createDrm(*rootDeviceEnvironment); EXPECT_EQ(drm, nullptr); } TEST_F(DrmTests, WhenCantQuerySubsliceCountThenDrmIsNotCreated) { VariableBackup backupfailOnSubsliceTotal(&failOnSubsliceTotal); failOnSubsliceTotal = -1; auto drm = DrmWrap::createDrm(*rootDeviceEnvironment); EXPECT_EQ(drm, nullptr); } TEST_F(DrmTests, WhenCantQueryRevisionIdThenDrmIsNotCreated) { VariableBackup backupFailOnRevisionId(&failOnRevisionId); failOnRevisionId = -1; auto drm = DrmWrap::createDrm(*rootDeviceEnvironment); EXPECT_EQ(drm, nullptr); } TEST_F(DrmTests, WhenCantQuerySoftPinSupportThenDrmIsNotCreated) { VariableBackup backupFailOnSoftPin(&failOnSoftPin); failOnSoftPin = -1; auto drm = DrmWrap::createDrm(*rootDeviceEnvironment); EXPECT_EQ(drm, nullptr); } TEST_F(DrmTests, GivenFailOnParamBoostWhenCreatingDrmThenDrmIsCreated) { VariableBackup backupFailOnParamBoost(&failOnParamBoost); failOnParamBoost = -1; auto drm = DrmWrap::createDrm(*rootDeviceEnvironment); //non-fatal error - issue warning only EXPECT_NE(drm, nullptr); } TEST_F(DrmTests, GivenFailOnContextCreateWhenCreatingDrmThenDrmIsCreated) { VariableBackup backupFailOnContextCreate(&failOnContextCreate); auto drm = DrmWrap::createDrm(*rootDeviceEnvironment); EXPECT_NE(drm, nullptr); failOnContextCreate = -1; EXPECT_THROW(drm->createDrmContext(1, false, false), std::exception); EXPECT_FALSE(drm->isPreemptionSupported()); failOnContextCreate = 0; } TEST_F(DrmTests, GivenFailOnSetPriorityWhenCreatingDrmThenDrmIsCreated) { VariableBackup backupFailOnSetPriority(&failOnSetPriority); auto drm = DrmWrap::createDrm(*rootDeviceEnvironment); EXPECT_NE(drm, nullptr); failOnSetPriority = -1; auto drmContext = drm->createDrmContext(1, false, false); EXPECT_THROW(drm->setLowPriorityContextParam(drmContext), std::exception); EXPECT_FALSE(drm->isPreemptionSupported()); failOnSetPriority = 0; } TEST_F(DrmTests, WhenCantQueryDrmVersionThenDrmIsNotCreated) { VariableBackup backupFailOnDrmVersion(&failOnDrmVersion); failOnDrmVersion = -1; auto drm = DrmWrap::createDrm(*rootDeviceEnvironment); EXPECT_EQ(drm, nullptr); failOnDrmVersion = 0; } TEST_F(DrmTests, GivenInvalidDrmVersionNameWhenCreatingDrmThenNullIsReturned) { VariableBackup backupFailOnDrmVersion(&failOnDrmVersion); strcpy(providedDrmVersion, "NA"); auto drm = DrmWrap::createDrm(*rootDeviceEnvironment); EXPECT_EQ(drm, nullptr); failOnDrmVersion = 0; strcpy(providedDrmVersion, "i915"); } TEST_F(DrmTests, whenDrmIsCreatedThenSetMemoryRegionsDoesntFailAndDrmObjectIsReturned) { DebugManagerStateRestore restore; DebugManager.flags.EnableLocalMemory.set(1); auto drm = DrmWrap::createDrm(*rootDeviceEnvironment); EXPECT_NE(drm, nullptr); } TEST(AllocatorHelper, givenExpectedSizeToReserveWhenGetSizeToReserveCalledThenExpectedValueReturned) { EXPECT_EQ((maxNBitValue(47) + 1) / 4, NEO::getSizeToReserve()); } TEST(DrmMemoryManagerCreate, whenCallCreateMemoryManagerThenDrmMemoryManagerIsCreated) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); auto drm = new DrmMockSuccess(fakeFd, *executionEnvironment.rootDeviceEnvironments[0]); drm->setupIoctlHelper(defaultHwInfo->platform.eProductFamily); executionEnvironment.rootDeviceEnvironments[0]->osInterface = std::make_unique(); executionEnvironment.rootDeviceEnvironments[0]->osInterface->setDriverModel(std::unique_ptr(drm)); auto drmMemoryManager = MemoryManager::createMemoryManager(executionEnvironment); EXPECT_NE(nullptr, drmMemoryManager.get()); executionEnvironment.memoryManager = std::move(drmMemoryManager); } TEST(DrmMemoryManagerCreate, givenEnableHostPtrValidationSetToZeroWhenCreateDrmMemoryManagerThenHostPtrValidationIsDisabled) { DebugManagerStateRestore restorer; DebugManager.flags.EnableHostPtrValidation.set(0); DebugManager.flags.EnableGemCloseWorker.set(0); VariableBackup backup(&ultHwConfig); ultHwConfig.forceOsAgnosticMemoryManager = false; MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); auto drm = new DrmMockSuccess(fakeFd, *executionEnvironment.rootDeviceEnvironments[0]); drm->setupIoctlHelper(defaultHwInfo->platform.eProductFamily); executionEnvironment.rootDeviceEnvironments[0]->osInterface = std::make_unique(); executionEnvironment.rootDeviceEnvironments[0]->osInterface->setDriverModel(std::unique_ptr(drm)); auto drmMemoryManager = MemoryManager::createMemoryManager(executionEnvironment); EXPECT_NE(nullptr, drmMemoryManager.get()); EXPECT_FALSE(static_cast(drmMemoryManager.get())->isValidateHostMemoryEnabled()); executionEnvironment.memoryManager = std::move(drmMemoryManager); } TEST(OsInterfaceTests, givenOsInterfaceWhenEnableLocalMemoryIsSpecifiedThenItIsSetToTrueOn64Bit) { EXPECT_TRUE(OSInterface::osEnableLocalMemory); } TEST_F(DrmTests, whenDrmIsCreatedWithMultipleSubDevicesThenCreateMultipleVirtualMemoryAddressSpaces) { DebugManagerStateRestore restore; DebugManager.flags.CreateMultipleSubDevices.set(2); auto drm = DrmWrap::createDrm(*rootDeviceEnvironment); EXPECT_NE(drm, nullptr); if (drm->isPerContextVMRequired()) { GTEST_SKIP(); } auto numSubDevices = HwHelper::getSubDevicesCount(rootDeviceEnvironment->getHardwareInfo()); for (auto id = 0u; id < numSubDevices; id++) { EXPECT_EQ(id + 1, drm->getVirtualMemoryAddressSpace(id)); } } TEST_F(DrmTests, givenDebuggingEnabledWhenDrmIsCreatedThenPerContextVMIsTrueGetVirtualMemoryAddressSpaceReturnsZeroAndVMsAreNotCreated) { DebugManagerStateRestore restore; DebugManager.flags.CreateMultipleSubDevices.set(2); DebugManager.flags.UseVmBind.set(1); rootDeviceEnvironment->executionEnvironment.setDebuggingEnabled(); auto drm = DrmWrap::createDrm(*rootDeviceEnvironment); ASSERT_NE(drm, nullptr); if (drm->isVmBindAvailable()) { EXPECT_TRUE(drm->isPerContextVMRequired()); auto numSubDevices = HwHelper::getSubDevicesCount(rootDeviceEnvironment->getHardwareInfo()); for (auto id = 0u; id < numSubDevices; id++) { EXPECT_EQ(0u, drm->getVirtualMemoryAddressSpace(id)); } EXPECT_EQ(0u, static_cast(drm.get())->virtualMemoryIds.size()); } } TEST_F(DrmTests, givenEnabledDebuggingAndVmBindNotAvailableWhenDrmIsCreatedThenPerContextVMIsFalseVMsAreCreatedAndDebugMessageIsPrinted) { DebugManagerStateRestore restore; ::testing::internal::CaptureStderr(); ::testing::internal::CaptureStdout(); DebugManager.flags.CreateMultipleSubDevices.set(2); DebugManager.flags.UseVmBind.set(0); DebugManager.flags.PrintDebugMessages.set(true); rootDeviceEnvironment->executionEnvironment.setDebuggingEnabled(); auto drm = DrmWrap::createDrm(*rootDeviceEnvironment); EXPECT_NE(drm, nullptr); if (drm->isPerContextVMRequired()) { ::testing::internal::GetCapturedStdout(); ::testing::internal::GetCapturedStderr(); GTEST_SKIP(); } auto numSubDevices = HwHelper::getSubDevicesCount(rootDeviceEnvironment->getHardwareInfo()); for (auto id = 0u; id < numSubDevices; id++) { EXPECT_NE(0u, drm->getVirtualMemoryAddressSpace(id)); } EXPECT_NE(0u, static_cast(drm.get())->virtualMemoryIds.size()); DebugManager.flags.PrintDebugMessages.set(false); ::testing::internal::GetCapturedStdout(); std::string errStr = ::testing::internal::GetCapturedStderr(); EXPECT_TRUE(hasSubstr(errStr, std::string("WARNING: Debugging not supported\n"))); } TEST_F(DrmTests, givenDrmIsCreatedWhenCreateVirtualMemoryFailsThenReturnVirtualMemoryIdZeroAndPrintDebugMessage) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.PrintDebugMessages.set(true); VariableBackup backupFailOnVirtualMemoryCreate(&failOnVirtualMemoryCreate); failOnVirtualMemoryCreate = -1; ::testing::internal::CaptureStderr(); ::testing::internal::CaptureStdout(); auto drm = DrmWrap::createDrm(*rootDeviceEnvironment); EXPECT_NE(drm, nullptr); EXPECT_EQ(0u, drm->getVirtualMemoryAddressSpace(0)); EXPECT_EQ(0u, static_cast(drm.get())->virtualMemoryIds.size()); std::string errStr = ::testing::internal::GetCapturedStderr(); if (!drm->isPerContextVMRequired()) { EXPECT_TRUE(hasSubstr(errStr, std::string("INFO: Device doesn't support GEM Virtual Memory"))); } ::testing::internal::GetCapturedStdout(); } TEST(SysCalls, WhenSysCallsPollCalledThenCallIsRedirectedToOs) { struct pollfd pollFd; pollFd.fd = 0; pollFd.events = 0; auto result = NEO::SysCalls::poll(&pollFd, 1, 0); EXPECT_LE(0, result); } TEST(SysCalls, WhenSysCallsFstatCalledThenCallIsRedirectedToOs) { struct stat st = {}; auto result = NEO::SysCalls::fstat(0, &st); EXPECT_EQ(0, result); } int main(int argc, char **argv) { bool useDefaultListener = false; bool enableAlarm = true; ::testing::InitGoogleTest(&argc, argv); // parse remaining args assuming they're mine for (int i = 1; i < argc; ++i) { if (!strcmp("--disable_default_listener", argv[i])) { useDefaultListener = false; } else if (!strcmp("--enable_default_listener", argv[i])) { useDefaultListener = true; } else if (!strcmp("--disable_alarm", argv[i])) { enableAlarm = false; } } if (useDefaultListener == false) { auto &listeners = ::testing::UnitTest::GetInstance()->listeners(); auto defaultListener = listeners.default_result_printer(); auto customEventListener = new CCustomEventListener(defaultListener); listeners.Release(defaultListener); listeners.Append(customEventListener); } defaultHwInfo = std::make_unique(); *defaultHwInfo = DEFAULT_TEST_PLATFORM::hwInfo; initializeTestedDevice(); Os::dxcoreDllName = ""; int sigOut = setAlarm(enableAlarm); if (sigOut != 0) return sigOut; auto retVal = RUN_ALL_TESTS(); return retVal; } TEST_F(DrmTests, whenCreateDrmIsCalledThenProperHwInfoIsSetup) { auto oldHwInfo = rootDeviceEnvironment->getMutableHardwareInfo(); *oldHwInfo = {}; auto drm = DrmWrap::createDrm(*rootDeviceEnvironment); EXPECT_NE(drm, nullptr); auto currentHwInfo = rootDeviceEnvironment->getHardwareInfo(); EXPECT_NE(IGFX_UNKNOWN, currentHwInfo->platform.eProductFamily); EXPECT_NE(IGFX_UNKNOWN_CORE, currentHwInfo->platform.eRenderCoreFamily); EXPECT_LT(0u, currentHwInfo->gtSystemInfo.EUCount); EXPECT_LT(0u, currentHwInfo->gtSystemInfo.SubSliceCount); } TEST(DirectSubmissionControllerTest, whenCheckDirectSubmissionControllerSupportThenReturnsTrue) { EXPECT_TRUE(DirectSubmissionController::isSupported()); } TEST(CommandQueueTest, whenCheckEngineRoundRobinAssignThenReturnsFalse) { EXPECT_FALSE(CommandQueue::isAssignEngineRoundRobinEnabled()); } TEST(CommandQueueTest, whenCheckEngineTimestampWaitEnabledThenReturnsTrue) { EXPECT_TRUE(CommandQueue::isTimestampWaitEnabled()); } TEST(CommandQueueTest, givenEnableCmdQRoundRobindEngineAssignSetWhenCheckEngineRoundRobinAssignThenReturnsTrue) { DebugManagerStateRestore restorer; DebugManager.flags.EnableCmdQRoundRobindEngineAssign.set(1); EXPECT_TRUE(CommandQueue::isAssignEngineRoundRobinEnabled()); } TEST(CommandQueueTest, givenEnableCmdQRoundRobindEngineAssignSetZeroWhenCheckEngineRoundRobinAssignThenReturnsTrue) { DebugManagerStateRestore restorer; DebugManager.flags.EnableCmdQRoundRobindEngineAssign.set(0); EXPECT_FALSE(CommandQueue::isAssignEngineRoundRobinEnabled()); } TEST(PlatformsDestructor, whenGlobalPlatformsDestructorIsCalledThenGlobalPlatformsAreDestroyed) { EXPECT_NE(nullptr, platformsImpl); platformsDestructor(); EXPECT_EQ(nullptr, platformsImpl); platformsImpl = new std::vector>; } TEST_F(DrmTests, givenValidPciPathThenPciBusInfoIsAvailable) { VariableBackup backupOpenFull(&openFull); VariableBackup backupOpenDir(&failOnOpenDir, false); VariableBackup backupEntryIndex(&entryIndex, 0u); openFull = openWithCounter; entryIndex = 1; openCounter = 2; auto drm = DrmWrap::createDrm(*rootDeviceEnvironment); ASSERT_NE(drm, nullptr); EXPECT_EQ(drm->getPciBusInfo().pciDomain, 0u); EXPECT_EQ(drm->getPciBusInfo().pciBus, 0u); EXPECT_EQ(drm->getPciBusInfo().pciDevice, 3u); EXPECT_EQ(drm->getPciBusInfo().pciFunction, 1u); entryIndex = 2; openCounter = 1; drm = DrmWrap::createDrm(*rootDeviceEnvironment); ASSERT_NE(drm, nullptr); EXPECT_EQ(drm->getPciBusInfo().pciDomain, 0u); EXPECT_EQ(drm->getPciBusInfo().pciBus, 0u); EXPECT_EQ(drm->getPciBusInfo().pciDevice, 2u); EXPECT_EQ(drm->getPciBusInfo().pciFunction, 0u); uint32_t referenceData[4][4] = { {0x0a00, 0x00, 0x03, 0x1}, {0x0000, 0xb3, 0x03, 0x1}, {0x0000, 0x00, 0xb3, 0x1}, {0x0000, 0x00, 0x03, 0xa}}; for (uint32_t idx = 7; idx < 11; idx++) { entryIndex = idx; openCounter = 1; drm = DrmWrap::createDrm(*rootDeviceEnvironment); ASSERT_NE(drm, nullptr); EXPECT_EQ(drm->getPciBusInfo().pciDomain, referenceData[idx - 7][0]); EXPECT_EQ(drm->getPciBusInfo().pciBus, referenceData[idx - 7][1]); EXPECT_EQ(drm->getPciBusInfo().pciDevice, referenceData[idx - 7][2]); EXPECT_EQ(drm->getPciBusInfo().pciFunction, referenceData[idx - 7][3]); } } TEST_F(DrmTests, givenInValidPciPathThenNothingIsReturned) { VariableBackup backupOpenFull(&openFull); VariableBackup backupOpenDir(&failOnOpenDir, false); VariableBackup backupEntryIndex(&entryIndex, 0u); openFull = openWithCounter; entryIndex = 11; openCounter = 1; auto hwDeviceIds = OSInterface::discoverDevices(executionEnvironment); EXPECT_TRUE(hwDeviceIds.empty()); entryIndex = 12; openCounter = 1; hwDeviceIds = OSInterface::discoverDevices(executionEnvironment); EXPECT_TRUE(hwDeviceIds.empty()); entryIndex = 13; openCounter = 1; hwDeviceIds = OSInterface::discoverDevices(executionEnvironment); EXPECT_TRUE(hwDeviceIds.empty()); } compute-runtime-22.14.22890/opencl/test/unit_test/linux/mock_os_layer.cpp000066400000000000000000000215031422164147700263210ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "mock_os_layer.h" #include "shared/source/helpers/string.h" #include #include #include #include #include int (*c_open)(const char *pathname, int flags, ...) = nullptr; int (*openFull)(const char *pathname, int flags, ...) = nullptr; int fakeFd = 1023; int haveDri = 0; // index of dri to serve, -1 - none int deviceId = NEO::deviceDescriptorTable[0].deviceId; // default supported DeviceID int haveSoftPin = 1; int havePreemption = I915_SCHEDULER_CAP_ENABLED | I915_SCHEDULER_CAP_PRIORITY | I915_SCHEDULER_CAP_PREEMPTION; int vmId = 0; int failOnDeviceId = 0; int failOnEuTotal = 0; int failOnSubsliceTotal = 0; int failOnRevisionId = 0; int failOnSoftPin = 0; int failOnParamBoost = 0; int failOnSetParamSseu = 0; int failOnGetParamSseu = 0; int failOnContextCreate = 0; int failOnVirtualMemoryCreate = 0; int failOnSetPriority = 0; int failOnPreemption = 0; int failOnDrmVersion = 0; int accessCalledTimes = 0; int readLinkCalledTimes = 0; int fstatCalledTimes = 0; char providedDrmVersion[5] = {'i', '9', '1', '5', '\0'}; uint64_t gpuTimestamp = 0; int ioctlSeq[8] = {0, 0, 0, 0, 0, 0, 0, 0}; size_t ioctlCnt = 0; int fstat(int fd, struct stat *buf) { ++fstatCalledTimes; buf->st_rdev = 0x0; return 0; } int access(const char *pathname, int mode) { ++accessCalledTimes; return 0; } ssize_t readlink(const char *path, char *buf, size_t bufsiz) { ++readLinkCalledTimes; if (readLinkCalledTimes % 2 == 1) { return -1; } constexpr size_t sizeofPath = sizeof("../../devices/pci0000:4a/0000:4a:02.0/0000:4b:00.0/0000:4c:01.0/0000:00:03.0/drm/renderD128"); strcpy_s(buf, sizeofPath, "../../devices/pci0000:4a/0000:4a:02.0/0000:4b:00.0/0000:4c:01.0/0000:00:03.0/drm/renderD128"); return sizeofPath; } int open(const char *pathname, int flags, ...) { if (openFull != nullptr) { return openFull(pathname, flags); } if (c_open == nullptr) { c_open = (int (*)(const char *, int, ...))dlsym(RTLD_NEXT, "open"); } if (strncmp("/dev/dri/", pathname, 9) == 0) { if (haveDri >= 0) { return fakeFd; } else { return -1; } } return c_open(pathname, flags); } bool failOnOpenDir = false; DIR *validDir = reinterpret_cast(0xc001); DIR *opendir(const char *name) { if (failOnOpenDir) { return nullptr; } return validDir; } int closedir(DIR *dirp) { return 0u; } struct dirent entries[] = { {0, 0, 0, 0, "."}, {0, 0, 0, 0, "pci-0000:00:03.1-render"}, {0, 0, 0, 0, "platform-4010000000.pcie-pci-0000:00:02.0-render"}, {0, 0, 0, 0, "pci-0000:test1-render"}, {0, 0, 0, 0, "pci-0000:test2-render"}, {0, 0, 0, 0, "pci-0000:1234-render"}, {0, 0, 0, 0, "pci-0000:3:0.0-render"}, {0, 0, 0, 0, "pci-0a00:00:03.1-render"}, {0, 0, 0, 0, "pci-0000:b3:03.1-render"}, {0, 0, 0, 0, "pci-0000:00:b3.1-render"}, {0, 0, 0, 0, "pci-0000:00:03.a-render"}, {0, 0, 0, 0, "pci-0000:00:03.a-render-12"}, {0, 0, 0, 0, "pcii0000:00:03.a-render"}, {0, 0, 0, 0, "pcii-render"}, }; uint32_t entryIndex = 0u; const uint32_t numEntries = sizeof(entries) / sizeof(entries[0]); struct dirent *readdir(DIR *dir) { if (entryIndex >= numEntries) { entryIndex = 0; return nullptr; } return &entries[entryIndex++]; } int drmGetParam(drm_i915_getparam_t *param) { assert(param); int ret = 0; switch (param->param) { case I915_PARAM_CHIPSET_ID: *param->value = deviceId; ret = failOnDeviceId; break; case I915_PARAM_EU_TOTAL: *param->value = 3; ret = failOnEuTotal; break; case I915_PARAM_SUBSLICE_TOTAL: *param->value = 1; ret = failOnSubsliceTotal; break; case I915_PARAM_REVISION: *param->value = 0x0; ret = failOnRevisionId; break; case I915_PARAM_HAS_EXEC_SOFTPIN: *param->value = haveSoftPin; ret = failOnSoftPin; break; #if defined(I915_PARAM_HAS_SCHEDULER) case I915_PARAM_HAS_SCHEDULER: *param->value = havePreemption; ret = failOnPreemption; break; #endif default: ret = -1; break; } return ret; } int drmSetContextParam(drm_i915_gem_context_param *param) { assert(param); int ret = 0; switch (param->param) { case I915_CONTEXT_PRIVATE_PARAM_BOOST: ret = failOnParamBoost; break; case I915_CONTEXT_PARAM_VM: break; #if defined(I915_PARAM_HAS_SCHEDULER) case I915_CONTEXT_PARAM_PRIORITY: ret = failOnSetPriority; break; #endif case I915_CONTEXT_PARAM_SSEU: if (param->size == sizeof(struct drm_i915_gem_context_param_sseu) && param->value != 0 && param->ctx_id == 0) { ret = failOnSetParamSseu; } else { ret = -1; } break; default: ret = -1; break; } return ret; } int drmGetContextParam(drm_i915_gem_context_param *param) { int ret = 0; switch (param->param) { case I915_CONTEXT_PARAM_SSEU: if (param->size == sizeof(struct drm_i915_gem_context_param_sseu) && param->value != 0 && param->ctx_id == 0) { ret = failOnGetParamSseu; } else { ret = -1; } break; default: ret = -1; break; } return ret; } int drmContextCreate(drm_i915_gem_context_create_ext *create) { assert(create); create->ctx_id = 1; return failOnContextCreate; } int drmContextDestroy(drm_i915_gem_context_destroy *destroy) { assert(destroy); if (destroy->ctx_id == 1) return 0; else return -1; } int drmVirtualMemoryCreate(drm_i915_gem_vm_control *control) { assert(control); control->vm_id = ++vmId; return failOnVirtualMemoryCreate; } int drmVirtualMemoryDestroy(drm_i915_gem_vm_control *control) { assert(control); vmId--; return (control->vm_id > 0) ? 0 : -1; } int drmVersion(drm_version_t *version) { strcpy(version->name, providedDrmVersion); return failOnDrmVersion; } int drmQueryItem(drm_i915_query *query) { auto queryItemArg = reinterpret_cast(query->items_ptr); if (queryItemArg->length == 0) { if (queryItemArg->query_id == DRM_I915_QUERY_TOPOLOGY_INFO) { queryItemArg->length = sizeof(drm_i915_query_topology_info) + 1; return 0; } } else { if (queryItemArg->query_id == DRM_I915_QUERY_TOPOLOGY_INFO) { auto topologyArg = reinterpret_cast(queryItemArg->data_ptr); topologyArg->max_slices = 1; topologyArg->max_subslices = 1; topologyArg->max_eus_per_subslice = 3; topologyArg->data[0] = 0xFF; return failOnEuTotal || failOnSubsliceTotal; } } return drmOtherRequests(DRM_IOCTL_I915_QUERY, query); } int ioctl(int fd, unsigned long int request, ...) throw() { int res; va_list vl; va_start(vl, request); if (fd == fakeFd) { res = ioctlSeq[ioctlCnt % (sizeof(ioctlSeq) / sizeof(int))]; ioctlCnt++; if (res == 0) { switch (request) { case DRM_IOCTL_I915_GETPARAM: res = drmGetParam(va_arg(vl, drm_i915_getparam_t *)); break; case DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM: res = drmSetContextParam(va_arg(vl, drm_i915_gem_context_param *)); break; case DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM: res = drmGetContextParam(va_arg(vl, drm_i915_gem_context_param *)); break; case DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT: res = drmContextCreate(va_arg(vl, drm_i915_gem_context_create_ext *)); break; case DRM_IOCTL_I915_GEM_CONTEXT_DESTROY: res = drmContextDestroy(va_arg(vl, drm_i915_gem_context_destroy *)); break; case DRM_IOCTL_I915_GEM_VM_CREATE: res = drmVirtualMemoryCreate(va_arg(vl, drm_i915_gem_vm_control *)); break; case DRM_IOCTL_I915_GEM_VM_DESTROY: res = drmVirtualMemoryDestroy(va_arg(vl, drm_i915_gem_vm_control *)); break; case DRM_IOCTL_VERSION: res = drmVersion(va_arg(vl, drm_version_t *)); break; case DRM_IOCTL_I915_QUERY: res = drmQueryItem(va_arg(vl, drm_i915_query *)); break; default: res = drmOtherRequests(request, vl); break; } } va_end(vl); return res; } va_end(vl); return -1; } compute-runtime-22.14.22890/opencl/test/unit_test/linux/mock_os_layer.h000066400000000000000000000023411422164147700257650ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/linux/drm_neo.h" #include "drm/i915_drm.h" #include #include #include #include #include #include #include extern int (*c_open)(const char *pathname, int flags, ...); extern int (*openFull)(const char *pathname, int flags, ...); extern int drmOtherRequests(unsigned long int request, ...); extern int fakeFd; extern int haveDri; // index of dri to serve, -1 - none extern int deviceId; // known DeviceID extern int haveSoftPin; extern int vmId; extern int failOnDeviceId; extern int failOnEuTotal; extern int failOnSubsliceTotal; extern int failOnRevisionId; extern int failOnSoftPin; extern int failOnParamBoost; extern int failOnContextCreate; extern int failOnVirtualMemoryCreate; extern int failOnSetPriority; extern int failOnPreemption; extern int havePreemption; extern int failOnDrmVersion; extern char providedDrmVersion[5]; extern int ioctlSeq[8]; extern size_t ioctlCnt; extern bool failOnOpenDir; extern uint32_t entryIndex; extern int accessCalledTimes; extern int readLinkCalledTimes; extern int fstatCalledTimes; compute-runtime-22.14.22890/opencl/test/unit_test/linux/os_interface_linux_tests.cpp000066400000000000000000000043001422164147700305710ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/os_interface/os_interface.h" #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/libult/linux/drm_mock.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/linux/mock_os_layer.h" namespace NEO { extern GMM_INIT_IN_ARGS passedInputArgs; extern bool copyInputArgs; TEST(OsInterfaceTest, whenOsInterfaceSetupsGmmInputArgsThenFileDescriptorIsSetWithValueOfAdapterBdf) { MockExecutionEnvironment executionEnvironment; auto rootDeviceEnvironment = executionEnvironment.rootDeviceEnvironments[0].get(); auto osInterface = new OSInterface(); rootDeviceEnvironment->osInterface.reset(osInterface); auto drm = new DrmMock(fakeFd, *rootDeviceEnvironment); drm->setPciPath("0000:01:23.4"); EXPECT_EQ(0, drm->queryAdapterBDF()); osInterface->setDriverModel(std::unique_ptr(drm)); GMM_INIT_IN_ARGS gmmInputArgs = {}; EXPECT_EQ(0u, gmmInputArgs.FileDescriptor); osInterface->getDriverModel()->setGmmInputArgs(&gmmInputArgs); EXPECT_NE(0u, gmmInputArgs.FileDescriptor); ADAPTER_BDF expectedAdapterBDF{}; expectedAdapterBDF.Bus = 0x1; expectedAdapterBDF.Device = 0x23; expectedAdapterBDF.Function = 0x4; EXPECT_EQ(expectedAdapterBDF.Data, gmmInputArgs.FileDescriptor); EXPECT_EQ(GMM_CLIENT::GMM_OCL_VISTA, gmmInputArgs.ClientType); } TEST(GmmHelperTest, whenCreateGmmHelperWithoutOsInterfaceThenPassedFileDescriptorIsZeroed) { std::unique_ptr gmmHelper; VariableBackup passedInputArgsBackup(&passedInputArgs); VariableBackup copyInputArgsBackup(©InputArgs, true); uint32_t expectedFileDescriptor = 0u; gmmHelper.reset(new GmmHelper(nullptr, defaultHwInfo.get())); EXPECT_EQ(expectedFileDescriptor, passedInputArgs.FileDescriptor); EXPECT_EQ(GMM_CLIENT::GMM_OCL_VISTA, passedInputArgs.ClientType); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/linux/va_tests.cpp000066400000000000000000000044711422164147700253300ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/sharings/va/va_device.h" #include "opencl/source/sharings/va/va_sharing_functions.h" #include "opencl/test/unit_test/linux/mock_os_layer.h" #include using namespace NEO; TEST(VaTests, whenLibvaSo2IsNotInstalledThenFail) { VariableBackup dlopenBackup(&VASharingFunctions::fdlopen); VariableBackup dlcloseBackup(&VASharingFunctions::fdlclose); VariableBackup dlsymBackup(&VASharingFunctions::fdlsym); VASharingFunctions::fdlopen = [&](const char *filename, int flag) -> void * { if (!strncmp(filename, "libva.so.2", 10)) { return (void *)0xdeadbeef; } else return 0; }; VASharingFunctions::fdlclose = [&](void *handle) -> int { return 0; }; VASharingFunctions::fdlsym = [&](void *handle, const char *symbol) -> void * { return nullptr; }; VADisplay vaDisplay = nullptr; VASharingFunctions va(vaDisplay); EXPECT_EQ(true, va.isVaLibraryAvailable()); } TEST(VaTests, givenVADeviceWhenGetDeviceFromVAIsCalledThenProperSyscallsAreUsed) { VariableBackup backupAccessCalledTimes(&accessCalledTimes); VariableBackup backupReadLinkCalledTimes(&readLinkCalledTimes); VariableBackup backupFstatCalledTimes(&fstatCalledTimes); accessCalledTimes = 0; readLinkCalledTimes = 0; fstatCalledTimes = 0; auto vaDisplay = std::make_unique(); vaDisplay->vadpy_magic = 0x56414430; auto contextPtr = std::make_unique(); auto drmState = std::make_unique(); vaDisplay->pDriverContext = contextPtr.get(); contextPtr->drm_state = drmState.get(); *static_cast(contextPtr->drm_state) = 1; VADevice vaDevice{}; auto clDevice = vaDevice.getDeviceFromVA(nullptr, vaDisplay.get()); EXPECT_EQ(clDevice, nullptr); EXPECT_EQ(accessCalledTimes, 1); EXPECT_EQ(readLinkCalledTimes, 1); EXPECT_EQ(fstatCalledTimes, 1); } compute-runtime-22.14.22890/opencl/test/unit_test/lsan_suppressions.txt000066400000000000000000000000771422164147700261660ustar00rootroot00000000000000leak:OsAgnosticMemoryManager_pleaseDetectLeak_Test::TestBody() compute-runtime-22.14.22890/opencl/test/unit_test/main.cpp000066400000000000000000000353511422164147700232660ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/gmm_helper/gmm_interface.h" #include "shared/source/gmm_helper/resource_info.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/utilities/debug_settings_reader.h" #include "shared/test/common/helpers/custom_event_listener.h" #include "shared/test/common/helpers/default_hw_info.inl" #include "shared/test/common/helpers/kernel_binary_helper.h" #include "shared/test/common/helpers/memory_leak_listener.h" #include "shared/test/common/helpers/test_files.h" #include "shared/test/common/helpers/ult_hw_config.inl" #include "shared/test/common/libult/global_environment.h" #include "shared/test/common/libult/signal_utils.h" #include "shared/test/common/mocks/mock_gmm.h" #include "shared/test/common/mocks/mock_gmm_client_context.h" #include "shared/test/common/mocks/mock_sip.h" #include "shared/test/common/test_macros/test_checks_shared.h" #include "shared/test/unit_test/test_stats.h" #include "shared/test/unit_test/tests_configuration.h" #include "opencl/source/os_interface/ocl_reg_path.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "opencl/test/unit_test/ult_config_listener.h" #include "gmock/gmock.h" #include #include #include #include #include #include #ifdef WIN32 const char *fSeparator = "\\"; #else const char *fSeparator = "/"; #endif namespace NEO { extern const char *hardwarePrefix[]; extern const HardwareInfo *hardwareInfoTable[IGFX_MAX_PRODUCT]; extern const char *executionName; extern bool useMockGmm; extern TestMode testMode; extern const char *executionDirectorySuffix; std::thread::id tempThreadID; namespace PagaFaultManagerTestConfig { bool disabled = false; } } // namespace NEO using namespace NEO; extern PRODUCT_FAMILY productFamily; extern GFXCORE_FAMILY renderCoreFamily; extern std::string lastTest; bool generateRandomInput = false; void applyWorkarounds() { platformsImpl = new std::vector>; platformsImpl->reserve(1); { std::ofstream f; const std::string fileName("_tmp_"); f.open(fileName, std::ofstream::binary); f.close(); } { std::mutex mtx; std::unique_lock stateLock(mtx); } { std::stringstream ss("1"); int val; ss >> val; } { class BaseClass { public: int method(int param) { return 1; } }; class MockClass : public BaseClass { public: MOCK_METHOD1(method, int(int param)); }; ::testing::NiceMock mockObj; EXPECT_CALL(mockObj, method(::testing::_)) .Times(1); mockObj.method(2); } //intialize rand srand(static_cast(time(nullptr))); //Create at least on thread to prevent false memory leaks in tests using threads std::thread t([&]() { }); tempThreadID = t.get_id(); t.join(); //Create FileLogger to prevent false memory leaks { NEO::FileLoggerInstance(); } } void initializeTestHelpers(TestMode currentTestmode) { MockSipData::mockSipKernel.reset(new MockSipKernel()); if (currentTestmode == TestMode::AubTests || currentTestmode == TestMode::AubTestsWithTbx) { MockSipData::useMockSip = false; } } void cleanTestHelpers() { delete platformsImpl; } std::string getRunPath(char *argv0) { std::string res(argv0); auto pos = res.rfind(fSeparator); if (pos != std::string::npos) res = res.substr(0, pos); if (res == "." || pos == std::string::npos) { char *cwd; #if defined(__linux__) cwd = getcwd(nullptr, 0); #else cwd = _getcwd(nullptr, 0); #endif res = cwd; free(cwd); } return res; } int main(int argc, char **argv) { int retVal = 0; bool useDefaultListener = false; bool enableAbrt = true; bool enableAlarm = true; bool enableSegv = true; bool setupFeatureTableAndWorkaroundTable = testMode == TestMode::AubTests ? true : false; bool showTestStats = false; applyWorkarounds(); #if defined(__linux__) if (getenv("IGDRCL_TEST_SELF_EXEC") == nullptr) { std::string wd = getRunPath(argv[0]); char *ldLibraryPath = getenv("LD_LIBRARY_PATH"); if (ldLibraryPath == nullptr) { setenv("LD_LIBRARY_PATH", wd.c_str(), 1); } else { std::string ldLibraryPathConcat = wd + ":" + std::string(ldLibraryPath); setenv("LD_LIBRARY_PATH", ldLibraryPathConcat.c_str(), 1); } setenv("IGDRCL_TEST_SELF_EXEC", wd.c_str(), 1); execv(argv[0], argv); printf("FATAL ERROR: cannot self-exec test: %s!, errno: %d\n", argv[0], errno); return -1; } #endif { std::string envVar = std::string("NEO_") + executionName + "_DISABLE_TEST_ALARM"; char *envValue = getenv(envVar.c_str()); if (envValue != nullptr) { enableAlarm = false; } } ::testing::InitGoogleMock(&argc, argv); HardwareInfo hwInfoForTests = DEFAULT_TEST_PLATFORM::hwInfo; uint32_t euPerSubSlice = 0; uint32_t sliceCount = 0; uint32_t subSlicePerSliceCount = 0; int32_t revId = -1; int dieRecovery = 0; for (int i = 1; i < argc; ++i) { if (!strcmp("--disable_default_listener", argv[i])) { useDefaultListener = false; } else if (!strcmp("--enable_default_listener", argv[i])) { useDefaultListener = true; } else if (!strcmp("--disable_alarm", argv[i])) { enableAlarm = false; } else if (!strcmp("--show_test_stats", argv[i])) { showTestStats = true; } else if (!strcmp("--disable_pagefaulting_tests", argv[i])) { //disable tests which raise page fault signal during execution NEO::PagaFaultManagerTestConfig::disabled = true; } else if (!strcmp("--tbx", argv[i])) { if (testMode == TestMode::AubTests) { testMode = TestMode::AubTestsWithTbx; } initialHardwareTag = 0; } else if (!strcmp("--rev_id", argv[i])) { ++i; if (i < argc) { revId = atoi(argv[i]); } } else if (!strcmp("--product", argv[i])) { ++i; if (i < argc) { if (::isdigit(argv[i][0])) { int productValue = atoi(argv[i]); if (productValue > 0 && productValue < IGFX_MAX_PRODUCT && hardwarePrefix[productValue] != nullptr) { productFamily = static_cast(productValue); } else { productFamily = IGFX_UNKNOWN; } } else { productFamily = IGFX_UNKNOWN; for (int j = 0; j < IGFX_MAX_PRODUCT; j++) { if (hardwarePrefix[j] == nullptr) continue; if (strcmp(hardwarePrefix[j], argv[i]) == 0) { productFamily = static_cast(j); break; } } } if (productFamily == IGFX_UNKNOWN) { std::cout << "unknown or unsupported product family has been set: " << argv[i] << std::endl; return -1; } else { std::cout << "product family: " << hardwarePrefix[productFamily] << " (" << productFamily << ")" << std::endl; } hwInfoForTests = *hardwareInfoTable[productFamily]; } } else if (!strcmp("--slices", argv[i])) { ++i; if (i < argc) { sliceCount = atoi(argv[i]); } } else if (!strcmp("--subslices", argv[i])) { ++i; if (i < argc) { subSlicePerSliceCount = atoi(argv[i]); } } else if (!strcmp("--eu_per_ss", argv[i])) { ++i; if (i < argc) { euPerSubSlice = atoi(argv[i]); } } else if (!strcmp("--die_recovery", argv[i])) { ++i; if (i < argc) { dieRecovery = atoi(argv[i]) ? 1 : 0; } } else if (!strcmp("--generate_random_inputs", argv[i])) { generateRandomInput = true; } else if (!strcmp("--read-config", argv[i]) && (testMode == TestMode::AubTests || testMode == TestMode::AubTestsWithTbx)) { if (DebugManager.registryReadAvailable()) { DebugManager.setReaderImpl(SettingsReader::create(oclRegPath)); DebugManager.injectSettingsFromReader(); } } else if (!strcmp("--dump_buffer_format", argv[i]) && testMode == TestMode::AubTests) { ++i; std::string dumpBufferFormat(argv[i]); std::transform(dumpBufferFormat.begin(), dumpBufferFormat.end(), dumpBufferFormat.begin(), ::toupper); DebugManager.flags.AUBDumpBufferFormat.set(dumpBufferFormat); } else if (!strcmp("--dump_image_format", argv[i]) && testMode == TestMode::AubTests) { ++i; std::string dumpImageFormat(argv[i]); std::transform(dumpImageFormat.begin(), dumpImageFormat.end(), dumpImageFormat.begin(), ::toupper); DebugManager.flags.AUBDumpImageFormat.set(dumpImageFormat); } } if (showTestStats) { std::cout << getTestStats() << std::endl; return 0; } productFamily = hwInfoForTests.platform.eProductFamily; renderCoreFamily = hwInfoForTests.platform.eRenderCoreFamily; uint32_t threadsPerEu = hwInfoConfigFactory[productFamily]->threadsPerEu; PLATFORM &platform = hwInfoForTests.platform; if (revId != -1) { platform.usRevId = revId; } else { revId = platform.usRevId; } uint64_t hwInfoConfig = defaultHardwareInfoConfigTable[productFamily]; setHwInfoValuesFromConfig(hwInfoConfig, hwInfoForTests); // set Gt and FeatureTable to initial state hardwareInfoSetup[productFamily](&hwInfoForTests, setupFeatureTableAndWorkaroundTable, hwInfoConfig); GT_SYSTEM_INFO >SystemInfo = hwInfoForTests.gtSystemInfo; // and adjust dynamic values if not secified sliceCount = sliceCount > 0 ? sliceCount : gtSystemInfo.SliceCount; subSlicePerSliceCount = subSlicePerSliceCount > 0 ? subSlicePerSliceCount : (gtSystemInfo.SubSliceCount / sliceCount); euPerSubSlice = euPerSubSlice > 0 ? euPerSubSlice : gtSystemInfo.MaxEuPerSubSlice; // clang-format off gtSystemInfo.SliceCount = sliceCount; gtSystemInfo.SubSliceCount = gtSystemInfo.SliceCount * subSlicePerSliceCount; gtSystemInfo.EUCount = gtSystemInfo.SubSliceCount * euPerSubSlice - dieRecovery; gtSystemInfo.ThreadCount = gtSystemInfo.EUCount * threadsPerEu; gtSystemInfo.MaxEuPerSubSlice = std::max(gtSystemInfo.MaxEuPerSubSlice, euPerSubSlice); gtSystemInfo.MaxSlicesSupported = std::max(gtSystemInfo.MaxSlicesSupported, gtSystemInfo.SliceCount); gtSystemInfo.MaxSubSlicesSupported = std::max(gtSystemInfo.MaxSubSlicesSupported, gtSystemInfo.SubSliceCount); gtSystemInfo.IsDynamicallyPopulated = false; // clang-format on binaryNameSuffix.append(familyName[hwInfoForTests.platform.eRenderCoreFamily]); binaryNameSuffix.append(hwInfoForTests.capabilityTable.platformType); std::string nBinaryKernelFiles = getRunPath(argv[0]); nBinaryKernelFiles.append("/"); nBinaryKernelFiles.append(binaryNameSuffix); nBinaryKernelFiles.append("/"); nBinaryKernelFiles.append(std::to_string(revId)); nBinaryKernelFiles.append("/"); nBinaryKernelFiles.append(testFiles); testFiles = nBinaryKernelFiles; std::string nClFiles = getRunPath(argv[0]); nClFiles.append("/"); nClFiles.append(hardwarePrefix[productFamily]); nClFiles.append("/"); nClFiles.append(std::to_string(revId)); nClFiles.append("/"); nClFiles.append(clFiles); clFiles = nClFiles; std::string executionDirectory(hardwarePrefix[productFamily]); executionDirectory += NEO::executionDirectorySuffix; // _aub for aub_tests, empty otherwise executionDirectory += "/"; executionDirectory += std::to_string(revId); #ifdef WIN32 #include if (_chdir(executionDirectory.c_str())) { std::cout << "chdir into " << executionDirectory << " directory failed.\nThis might cause test failures." << std::endl; } #elif defined(__linux__) #include if (chdir(executionDirectory.c_str()) != 0) { std::cout << "chdir into " << executionDirectory << " directory failed.\nThis might cause test failures." << std::endl; } #endif defaultHwInfo = std::make_unique(); *defaultHwInfo = hwInfoForTests; auto &listeners = ::testing::UnitTest::GetInstance()->listeners(); if (useDefaultListener == false) { auto defaultListener = listeners.default_result_printer(); auto customEventListener = new CCustomEventListener(defaultListener, hardwarePrefix[productFamily]); listeners.Release(defaultListener); listeners.Append(customEventListener); } listeners.Append(new MemoryLeakListener); listeners.Append(new UltConfigListener); gEnvironment = reinterpret_cast(::testing::AddGlobalTestEnvironment(new TestEnvironment)); MockCompilerDebugVars fclDebugVars; MockCompilerDebugVars igcDebugVars; std::string builtInsFileName; if (TestChecks::supportsImages(defaultHwInfo)) { builtInsFileName = KernelBinaryHelper::BUILT_INS_WITH_IMAGES; } else { builtInsFileName = KernelBinaryHelper::BUILT_INS; } retrieveBinaryKernelFilename(fclDebugVars.fileName, builtInsFileName + "_", ".bc"); retrieveBinaryKernelFilename(igcDebugVars.fileName, builtInsFileName + "_", ".gen"); gEnvironment->setMockFileNames(fclDebugVars.fileName, igcDebugVars.fileName); gEnvironment->setDefaultDebugVars(fclDebugVars, igcDebugVars, hwInfoForTests); int sigOut = setAlarm(enableAlarm); if (sigOut != 0) { return sigOut; } sigOut = setSegv(enableSegv); if (sigOut != 0) { return sigOut; } sigOut = setAbrt(enableAbrt); if (sigOut != 0) { return sigOut; } if (useMockGmm) { GmmHelper::createGmmContextWrapperFunc = GmmClientContext::create; } else { GmmInterface::initialize(nullptr, nullptr); } initializeTestHelpers(testMode); retVal = RUN_ALL_TESTS(); cleanTestHelpers(); return retVal; } compute-runtime-22.14.22890/opencl/test/unit_test/mem_obj/000077500000000000000000000000001422164147700232375ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/mem_obj/CMakeLists.txt000066400000000000000000000051601422164147700260010ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_mem_obj ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/buffer_pin_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/buffer_set_arg_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/buffer_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/buffer_bcs_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/create_image_format_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/create_image_in_local_memory_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/destructor_callback_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/get_mem_object_info_subbuffer_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/get_mem_object_info_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_from_subbuffer_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image1d_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image2d_from_buffer_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image2d_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image3d_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_array_size_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_compression_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/image_format_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_redescribe_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_release_mapped_ptr_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_set_arg_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_snorm_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_tests_tgllp_and_later.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_tiled_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_transfer_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_validate_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/map_operations_handler_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mem_obj_destruction_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mem_obj_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mem_obj_helper_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/nv12_image_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/packed_yuv_image_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/pipe_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sub_buffer_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/zero_copy_tests.cpp ) if(TESTS_XEHP_AND_LATER) list(APPEND IGDRCL_SRCS_tests_mem_obj ${CMAKE_CURRENT_SOURCE_DIR}/buffer_tests_xehp_and_later.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_tests_xehp_and_later.cpp ) endif() if(TESTS_PVC_AND_LATER) list(APPEND IGDRCL_SRCS_tests_mem_obj ${CMAKE_CURRENT_SOURCE_DIR}/buffer_tests_pvc_and_later.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_tests_pvc_and_later.cpp ) endif() target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_mem_obj}) add_subdirectories() compute-runtime-22.14.22890/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp000066400000000000000000002217051422164147700272740ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/wait_status.h" #include "shared/source/memory_manager/allocations_list.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/engine_descriptor_helper.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/mocks/mock_timestamp_container.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/api/api.h" #include "opencl/source/event/user_event.h" #include "opencl/source/helpers/cl_blit_properties.h" #include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" #include using namespace NEO; struct BcsBufferTests : public ::testing::Test { template class MyMockCsr : public UltCommandStreamReceiver { public: using UltCommandStreamReceiver::UltCommandStreamReceiver; WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) override { EXPECT_EQ(this->latestFlushedTaskCount, taskCountToWait); EXPECT_EQ(0u, flushStampToWait); EXPECT_FALSE(useQuickKmdSleep); EXPECT_EQ(throttle, QueueThrottle::MEDIUM); EXPECT_EQ(1u, this->activePartitions); waitForTaskCountWithKmdNotifyFallbackCalled++; return WaitStatus::Ready; } WaitStatus waitForTaskCountAndCleanTemporaryAllocationList(uint32_t requiredTaskCount) override { EXPECT_EQ(1u, waitForTaskCountWithKmdNotifyFallbackCalled); EXPECT_EQ(this->latestFlushedTaskCount, requiredTaskCount); waitForTaskCountAndCleanAllocationListCalled++; return WaitStatus::Ready; } uint32_t waitForTaskCountAndCleanAllocationListCalled = 0; uint32_t waitForTaskCountWithKmdNotifyFallbackCalled = 0; CommandStreamReceiver *gpgpuCsr = nullptr; }; template void SetUpT() { if (is32bit) { GTEST_SKIP(); } REQUIRE_SVM_OR_SKIP(defaultHwInfo); DebugManager.flags.EnableTimestampPacket.set(1); DebugManager.flags.EnableBlitterForEnqueueOperations.set(1); DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(1); DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1); auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.blitterOperationsSupported = true; device = std::make_unique(MockClDevice::createWithNewExecutionEnvironment(&hwInfo)); if (!HwInfoConfig::get(defaultHwInfo->platform.eProductFamily)->isBlitterFullySupported(device->getHardwareInfo())) { GTEST_SKIP(); } bcsMockContext = std::make_unique(device.get()); commandQueue.reset(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); bcsCsr = static_cast *>(commandQueue.get())->bcsEngines[0]->commandStreamReceiver; } template void TearDownT() {} template void waitForCacheFlushFromBcsTest(MockCommandQueueHw &commandQueue); DebugManagerStateRestore restore; std::unique_ptr bcsOsContext; std::unique_ptr device; std::unique_ptr bcsMockContext; std::unique_ptr commandQueue; CommandStreamReceiver *bcsCsr; uint32_t hostPtr = 0; cl_int retVal = CL_SUCCESS; }; HWTEST_TEMPLATED_F(BcsBufferTests, givenBufferWithInitializationDataAndBcsCsrWhenCreatingThenUseBlitOperation) { auto bcsCsr = static_cast *>(bcsMockContext->bcsCsr.get()); static_cast(device->getExecutionEnvironment()->memoryManager.get())->enable64kbpages[0] = true; static_cast(device->getExecutionEnvironment()->memoryManager.get())->localMemorySupported[0] = true; EXPECT_EQ(0u, bcsCsr->blitBufferCalled); auto bufferForBlt = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_COPY_HOST_PTR, 2000, &hostPtr, retVal)); EXPECT_EQ(1u, bcsCsr->blitBufferCalled); } HWTEST_TEMPLATED_F(BcsBufferTests, givenBufferWithNotDefaultRootDeviceIndexAndBcsCsrWhenCreatingThenUseBlitOperation) { auto rootDeviceIndex = 1u; auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.blitterOperationsSupported = true; REQUIRE_FULL_BLITTER_OR_SKIP(&hwInfo); std::unique_ptr newDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo, rootDeviceIndex)); std::unique_ptr newBcsMockContext = std::make_unique(newDevice.get()); auto bcsCsr = static_cast *>(newBcsMockContext->bcsCsr.get()); static_cast(newDevice->getExecutionEnvironment()->memoryManager.get())->enable64kbpages[rootDeviceIndex] = true; static_cast(newDevice->getExecutionEnvironment()->memoryManager.get())->localMemorySupported[rootDeviceIndex] = true; EXPECT_EQ(0u, bcsCsr->blitBufferCalled); auto bufferForBlt = clUniquePtr(Buffer::create(newBcsMockContext.get(), CL_MEM_COPY_HOST_PTR, 2000, &hostPtr, retVal)); EXPECT_EQ(1u, bcsCsr->blitBufferCalled); } using NoBcsBufferTests = ::testing::Test; HWTEST_F(NoBcsBufferTests, givenProductWithNoFullyBlitterSupportWhenCreatingBufferWithCopyHostPtrThenDontUseBlitOperation) { uint32_t hostPtr = 0; auto rootDeviceIndex = 1u; auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.blitterOperationsSupported = false; EXPECT_FALSE(HwInfoConfig::get(hwInfo.platform.eProductFamily)->isBlitterFullySupported(hwInfo)); std::unique_ptr newDevice = std::make_unique(MockClDevice::createWithNewExecutionEnvironment(&hwInfo, rootDeviceIndex)); std::unique_ptr newBcsMockContext = std::make_unique(newDevice.get()); auto bcsCsr = static_cast *>(newBcsMockContext->bcsCsr.get()); static_cast(newDevice->getExecutionEnvironment()->memoryManager.get())->enable64kbpages[rootDeviceIndex] = true; static_cast(newDevice->getExecutionEnvironment()->memoryManager.get())->localMemorySupported[rootDeviceIndex] = true; EXPECT_EQ(0u, bcsCsr->blitBufferCalled); cl_int retVal = 0; auto bufferForBlt = clUniquePtr(Buffer::create(newBcsMockContext.get(), CL_MEM_COPY_HOST_PTR, sizeof(hostPtr), &hostPtr, retVal)); EXPECT_EQ(0u, bcsCsr->blitBufferCalled); } HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenEnqueueBufferOperationIsCalledThenUseBcsCsr) { DebugManager.flags.EnableBlitterForEnqueueOperations.set(0); auto mockCmdQueue = static_cast *>(commandQueue.get()); auto bcsEngine = mockCmdQueue->bcsEngines[0]; auto bcsCsr = static_cast *>(bcsEngine->commandStreamReceiver); auto bufferForBlt0 = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); auto bufferForBlt1 = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); bufferForBlt0->forceDisallowCPUCopy = true; bufferForBlt1->forceDisallowCPUCopy = true; size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {1, 2, 1}; DebugManager.flags.EnableBlitterForEnqueueOperations.set(0); mockCmdQueue->clearBcsEngines(); mockCmdQueue->clearBcsStates(); commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr); commandQueue->enqueueReadBufferRect(bufferForBlt0.get(), CL_TRUE, bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, &hostPtr, 0, nullptr, nullptr); commandQueue->enqueueWriteBufferRect(bufferForBlt0.get(), CL_TRUE, bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, &hostPtr, 0, nullptr, nullptr); commandQueue->enqueueCopyBufferRect(bufferForBlt0.get(), bufferForBlt1.get(), bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, 0, nullptr, nullptr); DebugManager.flags.EnableBlitterForEnqueueOperations.set(1); mockCmdQueue->clearBcsEngines(); mockCmdQueue->clearBcsStates(); commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr); commandQueue->enqueueReadBufferRect(bufferForBlt0.get(), CL_TRUE, bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, &hostPtr, 0, nullptr, nullptr); commandQueue->enqueueWriteBufferRect(bufferForBlt0.get(), CL_TRUE, bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, &hostPtr, 0, nullptr, nullptr); commandQueue->enqueueCopyBufferRect(bufferForBlt0.get(), bufferForBlt1.get(), bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, 0, nullptr, nullptr); DebugManager.flags.EnableBlitterForEnqueueOperations.set(0); mockCmdQueue->bcsEngines[0] = bcsEngine; mockCmdQueue->clearBcsStates(); commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr); commandQueue->enqueueReadBufferRect(bufferForBlt0.get(), CL_TRUE, bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, &hostPtr, 0, nullptr, nullptr); commandQueue->enqueueWriteBufferRect(bufferForBlt0.get(), CL_TRUE, bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, &hostPtr, 0, nullptr, nullptr); commandQueue->enqueueCopyBufferRect(bufferForBlt0.get(), bufferForBlt1.get(), bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, 0, nullptr, nullptr); commandQueue->enqueueSVMMemcpy(CL_TRUE, bufferForBlt0.get(), bufferForBlt1.get(), 1, 0, nullptr, nullptr); DebugManager.flags.EnableBlitterForEnqueueOperations.set(-1); mockCmdQueue->bcsEngines[0] = bcsEngine; mockCmdQueue->clearBcsStates(); commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr); commandQueue->enqueueReadBufferRect(bufferForBlt0.get(), CL_TRUE, bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, &hostPtr, 0, nullptr, nullptr); commandQueue->enqueueWriteBufferRect(bufferForBlt0.get(), CL_TRUE, bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, &hostPtr, 0, nullptr, nullptr); commandQueue->enqueueCopyBufferRect(bufferForBlt0.get(), bufferForBlt1.get(), bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, 0, nullptr, nullptr); commandQueue->enqueueSVMMemcpy(CL_TRUE, bufferForBlt0.get(), bufferForBlt1.get(), 1, 0, nullptr, nullptr); EXPECT_EQ(7u, bcsCsr->blitBufferCalled); DebugManager.flags.EnableBlitterForEnqueueOperations.set(1); mockCmdQueue->bcsEngines[0] = bcsEngine; mockCmdQueue->clearBcsStates(); commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(8u, bcsCsr->blitBufferCalled); commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(9u, bcsCsr->blitBufferCalled); commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr); EXPECT_EQ(10u, bcsCsr->blitBufferCalled); commandQueue->enqueueReadBufferRect(bufferForBlt0.get(), CL_TRUE, bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, &hostPtr, 0, nullptr, nullptr); EXPECT_EQ(11u, bcsCsr->blitBufferCalled); commandQueue->enqueueWriteBufferRect(bufferForBlt0.get(), CL_TRUE, bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, &hostPtr, 0, nullptr, nullptr); EXPECT_EQ(12u, bcsCsr->blitBufferCalled); commandQueue->enqueueCopyBufferRect(bufferForBlt0.get(), bufferForBlt1.get(), bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, 0, nullptr, nullptr); EXPECT_EQ(13u, bcsCsr->blitBufferCalled); commandQueue->enqueueSVMMemcpy(CL_TRUE, bufferForBlt0.get(), bufferForBlt1.get(), 1, 0, nullptr, nullptr); EXPECT_EQ(14u, bcsCsr->blitBufferCalled); } HWTEST_TEMPLATED_F(BcsBufferTests, givenDebugFlagSetWhenDispatchingBlitCommandsThenPrintDispatchDetails) { DebugManager.flags.PrintBlitDispatchDetails.set(true); uint32_t maxBlitWidth = static_cast(BlitterConstants::maxBlitWidth); uint32_t copySize = maxBlitWidth + 5; auto myHostPtr = std::make_unique(copySize); uint64_t hostPtrAddr = castToUint64(myHostPtr.get()); auto bufferForBlt = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, copySize, nullptr, retVal)); bufferForBlt->forceDisallowCPUCopy = true; uint64_t bufferGpuAddr = bufferForBlt->getGraphicsAllocation(0)->getGpuAddress(); testing::internal::CaptureStdout(); commandQueue->enqueueWriteBuffer(bufferForBlt.get(), CL_TRUE, 0, copySize, myHostPtr.get(), nullptr, 0, nullptr, nullptr); std::string output = testing::internal::GetCapturedStdout(); EXPECT_NE(0u, output.size()); char expectedStr[512] = {}; snprintf(expectedStr, 512, "\nBlit dispatch with AuxTranslationDirection %u \ \nBlit command. width: %u, height: %u, srcAddr: %#" SCNx64 ", dstAddr: %#" SCNx64 " \ \nBlit command. width: %u, height: %u, srcAddr: %#" SCNx64 ", dstAddr: %#" SCNx64 " ", static_cast(AuxTranslationDirection::None), maxBlitWidth, 1, hostPtrAddr, bufferGpuAddr, (copySize - maxBlitWidth), 1, ptrOffset(hostPtrAddr, maxBlitWidth), ptrOffset(bufferGpuAddr, maxBlitWidth)); EXPECT_TRUE(hasSubstr(output, std::string(expectedStr))); } HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenQueueIsBlockedThenDispatchBlitWhenUnblocked) { auto bcsCsr = static_cast *>(this->bcsCsr); auto bufferForBlt0 = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); auto bufferForBlt1 = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); bufferForBlt0->forceDisallowCPUCopy = true; bufferForBlt1->forceDisallowCPUCopy = true; UserEvent userEvent(bcsMockContext.get()); cl_event waitlist = &userEvent; size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {1, 2, 1}; commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_FALSE, 0, 1, &hostPtr, nullptr, 1, &waitlist, nullptr); commandQueue->enqueueReadBuffer(bufferForBlt1.get(), CL_FALSE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr); commandQueue->enqueueReadBufferRect(bufferForBlt0.get(), CL_FALSE, bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, &hostPtr, 0, nullptr, nullptr); commandQueue->enqueueWriteBufferRect(bufferForBlt0.get(), CL_FALSE, bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, &hostPtr, 0, nullptr, nullptr); commandQueue->enqueueCopyBufferRect(bufferForBlt0.get(), bufferForBlt1.get(), bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, 0, nullptr, nullptr); commandQueue->enqueueSVMMemcpy(CL_FALSE, bufferForBlt0.get(), bufferForBlt1.get(), 1, 0, nullptr, nullptr); EXPECT_EQ(0u, bcsCsr->blitBufferCalled); userEvent.setStatus(CL_COMPLETE); EXPECT_EQ(7u, bcsCsr->blitBufferCalled); commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_FALSE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(8u, bcsCsr->blitBufferCalled); commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_FALSE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(9u, bcsCsr->blitBufferCalled); commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr); EXPECT_EQ(10u, bcsCsr->blitBufferCalled); commandQueue->enqueueReadBufferRect(bufferForBlt0.get(), CL_FALSE, bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, &hostPtr, 0, nullptr, nullptr); EXPECT_EQ(11u, bcsCsr->blitBufferCalled); commandQueue->enqueueWriteBufferRect(bufferForBlt0.get(), CL_FALSE, bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, &hostPtr, 0, nullptr, nullptr); EXPECT_EQ(12u, bcsCsr->blitBufferCalled); commandQueue->enqueueCopyBufferRect(bufferForBlt0.get(), bufferForBlt1.get(), bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, 0, nullptr, nullptr); EXPECT_EQ(13u, bcsCsr->blitBufferCalled); commandQueue->enqueueSVMMemcpy(CL_FALSE, bufferForBlt0.get(), bufferForBlt1.get(), 1, 0, nullptr, nullptr); } HWTEST_TEMPLATED_F(BcsBufferTests, givenBuffersWhenCopyBufferCalledThenUseBcs) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); auto bufferForBlt0 = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); auto bufferForBlt1 = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); bufferForBlt0->forceDisallowCPUCopy = true; bufferForBlt1->forceDisallowCPUCopy = true; cmdQ->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 0, 1, 0, nullptr, nullptr); HardwareParse hwParser; hwParser.parseCommands(bcsCsr->getCS(0)); auto commandItor = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); EXPECT_NE(hwParser.cmdList.end(), commandItor); auto copyBltCmd = genCmdCast(*commandItor); EXPECT_EQ(bufferForBlt0->getGraphicsAllocation(device->getRootDeviceIndex())->getGpuAddress(), copyBltCmd->getSourceBaseAddress()); EXPECT_EQ(bufferForBlt1->getGraphicsAllocation(device->getRootDeviceIndex())->getGpuAddress(), copyBltCmd->getDestinationBaseAddress()); } HWTEST_TEMPLATED_F(BcsBufferTests, givenBuffersWhenCopyBufferRectCalledThenUseBcs) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); auto bufferForBlt0 = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); auto bufferForBlt1 = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); bufferForBlt0->forceDisallowCPUCopy = true; bufferForBlt1->forceDisallowCPUCopy = true; size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {1, 2, 1}; cmdQ->enqueueCopyBufferRect(bufferForBlt0.get(), bufferForBlt1.get(), bufferOrigin, hostOrigin, region, 0, 0, 0, 0, 0, nullptr, nullptr); HardwareParse hwParser; hwParser.parseCommands(bcsCsr->getCS(0)); auto commandItor = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); EXPECT_NE(hwParser.cmdList.end(), commandItor); auto copyBltCmd = genCmdCast(*commandItor); EXPECT_EQ(bufferForBlt0->getGraphicsAllocation(device->getRootDeviceIndex())->getGpuAddress(), copyBltCmd->getSourceBaseAddress()); EXPECT_EQ(bufferForBlt1->getGraphicsAllocation(device->getRootDeviceIndex())->getGpuAddress(), copyBltCmd->getDestinationBaseAddress()); } HWTEST_TEMPLATED_F(BcsBufferTests, givenDstHostPtrWhenEnqueueSVMMemcpyThenEnqueuReadBufferIsCalledAndBcs) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); auto pDstSVM = std::make_unique(1); auto pSrcSVM = bcsMockContext->getSVMAllocsManager()->createSVMAlloc(1, {}, bcsMockContext->getRootDeviceIndices(), bcsMockContext->getDeviceBitfields()); cmdQ->enqueueSVMMemcpy(true, pDstSVM.get(), pSrcSVM, 1, 0, nullptr, nullptr); HardwareParse hwParser; hwParser.parseCommands(bcsCsr->getCS(0)); auto commandItor = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); EXPECT_NE(hwParser.cmdList.end(), commandItor); auto copyBltCmd = genCmdCast(*commandItor); EXPECT_EQ(pSrcSVM, reinterpret_cast(copyBltCmd->getSourceBaseAddress())); EXPECT_EQ(pDstSVM.get(), reinterpret_cast(copyBltCmd->getDestinationBaseAddress())); bcsMockContext->getSVMAllocsManager()->freeSVMAlloc(pSrcSVM); } HWTEST_TEMPLATED_F(BcsBufferTests, givenSrcHostPtrWhenEnqueueSVMMemcpyThenEnqueuWriteBufferIsCalledAndBcs) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); auto pSrcSVM = std::make_unique(1); auto pDstSVM = bcsMockContext->getSVMAllocsManager()->createSVMAlloc(1, {}, bcsMockContext->getRootDeviceIndices(), bcsMockContext->getDeviceBitfields()); cmdQ->enqueueSVMMemcpy(true, pDstSVM, pSrcSVM.get(), 1, 0, nullptr, nullptr); HardwareParse hwParser; hwParser.parseCommands(bcsCsr->getCS(0)); auto commandItor = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); EXPECT_NE(hwParser.cmdList.end(), commandItor); auto copyBltCmd = genCmdCast(*commandItor); EXPECT_EQ(pSrcSVM.get(), reinterpret_cast(copyBltCmd->getSourceBaseAddress())); EXPECT_EQ(pDstSVM, reinterpret_cast(copyBltCmd->getDestinationBaseAddress())); bcsMockContext->getSVMAllocsManager()->freeSVMAlloc(pDstSVM); } HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockedBlitEnqueueWhenUnblockingThenMakeResidentAllTimestampPackets) { auto bcsCsr = static_cast *>(this->bcsCsr); bcsCsr->storeMakeResidentAllocations = true; auto mockCmdQ = static_cast *>(commandQueue.get()); auto bufferForBlt = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); bufferForBlt->forceDisallowCPUCopy = true; TimestampPacketContainer previousTimestampPackets; mockCmdQ->obtainNewTimestampPacketNodes(1, previousTimestampPackets, false, *bcsCsr); auto dependencyFromPreviousEnqueue = mockCmdQ->timestampPacketContainer->peekNodes()[0]; auto event = make_releaseable(mockCmdQ, CL_COMMAND_READ_BUFFER, 0, 0); MockTimestampPacketContainer eventDependencyContainer(*bcsCsr->getTimestampPacketAllocator(), 1); auto eventDependency = eventDependencyContainer.getNode(0); event->addTimestampPacketNodes(eventDependencyContainer); auto userEvent = make_releaseable(bcsMockContext.get()); cl_event waitlist[] = {userEvent.get(), event.get()}; commandQueue->enqueueReadBuffer(bufferForBlt.get(), CL_FALSE, 0, 1, &hostPtr, nullptr, 2, waitlist, nullptr); auto outputDependency = mockCmdQ->timestampPacketContainer->peekNodes()[0]; EXPECT_NE(outputDependency, dependencyFromPreviousEnqueue); EXPECT_FALSE(bcsCsr->isMadeResident(dependencyFromPreviousEnqueue->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation())); EXPECT_FALSE(bcsCsr->isMadeResident(outputDependency->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation())); EXPECT_FALSE(bcsCsr->isMadeResident(eventDependency->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation())); userEvent->setStatus(CL_COMPLETE); EXPECT_TRUE(bcsCsr->isMadeResident(dependencyFromPreviousEnqueue->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation(), bcsCsr->taskCount)); EXPECT_TRUE(bcsCsr->isMadeResident(outputDependency->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation(), bcsCsr->taskCount)); EXPECT_TRUE(bcsCsr->isMadeResident(eventDependency->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation(), bcsCsr->taskCount)); } HWTEST_TEMPLATED_F(BcsBufferTests, givenMapAllocationWhenEnqueueingReadOrWriteBufferThenStoreMapAllocationInDispatchParameters) { DebugManager.flags.DisableZeroCopyForBuffers.set(true); auto mockCmdQ = static_cast *>(commandQueue.get()); uint8_t hostPtr[64] = {}; auto bufferForBlt = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_USE_HOST_PTR, 1, hostPtr, retVal)); bufferForBlt->forceDisallowCPUCopy = true; auto mapAllocation = bufferForBlt->getMapAllocation(device->getRootDeviceIndex()); EXPECT_NE(nullptr, mapAllocation); mockCmdQ->kernelParams.transferAllocation = nullptr; auto mapPtr = clEnqueueMapBuffer(mockCmdQ, bufferForBlt.get(), true, 0, 0, 1, 0, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mapAllocation, mockCmdQ->kernelParams.transferAllocation); mockCmdQ->kernelParams.transferAllocation = nullptr; retVal = clEnqueueUnmapMemObject(mockCmdQ, bufferForBlt.get(), mapPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mapAllocation, mockCmdQ->kernelParams.transferAllocation); } HWTEST_TEMPLATED_F(BcsBufferTests, givenWriteBufferEnqueueWithGpgpuSubmissionWhenProgrammingCommandStreamThenDoNotAddSemaphoreWaitOnGpgpu) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); auto queueCsr = cmdQ->gpgpuEngine->commandStreamReceiver; auto initialTaskCount = queueCsr->peekTaskCount(); cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); buffer->forceDisallowCPUCopy = true; void *hostPtr = reinterpret_cast(0x12340000); cmdQ->enqueueWriteBuffer(buffer.get(), true, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr); HardwareParse hwParser; hwParser.parseCommands(*cmdQ->peekCommandStream()); uint32_t semaphoresCount = 0; for (auto &cmd : hwParser.cmdList) { if (auto semaphoreCmd = genCmdCast(cmd)) { if (UnitTestHelper::isAdditionalMiSemaphoreWait(*semaphoreCmd)) { continue; } semaphoresCount++; } } EXPECT_EQ(0u, semaphoresCount); EXPECT_EQ(initialTaskCount + 1, queueCsr->peekTaskCount()); } HWTEST_TEMPLATED_F(BcsBufferTests, givenReadBufferEnqueueWithGpgpuSubmissionWhenProgrammingCommandStreamThenDoNotAddSemaphoreWaitOnGpgpu) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); auto queueCsr = cmdQ->gpgpuEngine->commandStreamReceiver; auto initialTaskCount = queueCsr->peekTaskCount(); cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); buffer->forceDisallowCPUCopy = true; void *hostPtr = reinterpret_cast(0x12340000); cmdQ->enqueueWriteBuffer(buffer.get(), true, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr); HardwareParse hwParser; hwParser.parseCommands(*cmdQ->peekCommandStream()); uint32_t semaphoresCount = 0; for (auto &cmd : hwParser.cmdList) { if (auto semaphoreCmd = genCmdCast(cmd)) { if (UnitTestHelper::isAdditionalMiSemaphoreWait(*semaphoreCmd)) { continue; } semaphoresCount++; } } EXPECT_EQ(0u, semaphoresCount); EXPECT_EQ(initialTaskCount + 1, queueCsr->peekTaskCount()); } template void BcsBufferTests::waitForCacheFlushFromBcsTest(MockCommandQueueHw &commandQueue) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; bool isCacheFlushForBcsRequired = commandQueue.isCacheFlushForBcsRequired(); auto bcsCsr = static_cast *>(this->bcsCsr); cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); buffer->forceDisallowCPUCopy = true; void *hostPtr = reinterpret_cast(0x12340000); commandQueue.enqueueWriteBuffer(buffer.get(), true, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr); HardwareParse hwParserGpGpu; HardwareParse hwParserBcs; hwParserGpGpu.parseCommands(*commandQueue.peekCommandStream()); hwParserBcs.parseCommands(bcsCsr->commandStream); auto gpgpuPipeControls = findAll(hwParserGpGpu.cmdList.begin(), hwParserGpGpu.cmdList.end()); uint64_t cacheFlushWriteAddress = 0; for (auto &pipeControl : gpgpuPipeControls) { auto pipeControlCmd = genCmdCast(*pipeControl); cacheFlushWriteAddress = NEO::UnitTestHelper::getPipeControlPostSyncAddress(*pipeControlCmd); if (cacheFlushWriteAddress != 0) { EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), pipeControlCmd->getDcFlushEnable()); EXPECT_TRUE(pipeControlCmd->getCommandStreamerStallEnable()); EXPECT_EQ(isCacheFlushForBcsRequired, 0u == pipeControlCmd->getImmediateData()); break; } } auto bcsSemaphores = findAll(hwParserBcs.cmdList.begin(), hwParserBcs.cmdList.end()); size_t additionalSemaphores = UnitTestHelper::isAdditionalMiSemaphoreWaitRequired(device->getHardwareInfo()) ? 2 : 0; if (isCacheFlushForBcsRequired) { EXPECT_NE(0u, cacheFlushWriteAddress); EXPECT_EQ(1u + additionalSemaphores, bcsSemaphores.size()); auto semaphoreCmd = genCmdCast(*bcsSemaphores[0]); EXPECT_EQ(cacheFlushWriteAddress, semaphoreCmd->getSemaphoreGraphicsAddress()); } else { EXPECT_EQ(additionalSemaphores, bcsSemaphores.size()); } } HWTEST_TEMPLATED_F(BcsBufferTests, givenCommandQueueWithCacheFlushRequirementWhenProgrammingCmdBufferThenWaitForCacheFlushFromBcs) { auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); cmdQ->overrideIsCacheFlushForBcsRequired.enabled = true; cmdQ->overrideIsCacheFlushForBcsRequired.returnValue = true; waitForCacheFlushFromBcsTest(*cmdQ); } HWTEST_TEMPLATED_F(BcsBufferTests, givenCommandQueueWithoutCacheFlushRequirementWhenProgrammingCmdBufferThenWaitForCacheFlushFromBcs) { auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); cmdQ->overrideIsCacheFlushForBcsRequired.enabled = true; cmdQ->overrideIsCacheFlushForBcsRequired.returnValue = false; waitForCacheFlushFromBcsTest(*cmdQ); } HWTEST_TEMPLATED_F(BcsBufferTests, givenPipeControlRequestWhenDispatchingBlitEnqueueThenWaitPipeControlOnBcsEngine) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); auto bcsCsr = static_cast *>(this->bcsCsr); auto queueCsr = static_cast *>(cmdQ->gpgpuEngine->commandStreamReceiver); queueCsr->stallingCommandsOnNextFlushRequired = true; cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); buffer->forceDisallowCPUCopy = true; void *hostPtr = reinterpret_cast(0x12340000); cmdQ->enqueueWriteBuffer(buffer.get(), true, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr); HardwareParse hwParser; hwParser.parseCommands(queueCsr->commandStream); uint64_t pipeControlWriteAddress = 0; for (auto &cmd : hwParser.cmdList) { if (auto pipeControlCmd = genCmdCast(cmd)) { if (pipeControlCmd->getPostSyncOperation() != PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { continue; } EXPECT_TRUE(pipeControlCmd->getCommandStreamerStallEnable()); pipeControlWriteAddress = NEO::UnitTestHelper::getPipeControlPostSyncAddress(*pipeControlCmd); break; } } EXPECT_NE(0u, pipeControlWriteAddress); HardwareParse bcsHwParser; bcsHwParser.parseCommands(bcsCsr->commandStream); auto semaphores = findAll(bcsHwParser.cmdList.begin(), bcsHwParser.cmdList.end()); if (cmdQ->isCacheFlushForBcsRequired()) { EXPECT_EQ(UnitTestHelper::isAdditionalMiSemaphoreWaitRequired(device->getHardwareInfo()) ? 4u : 2u, semaphores.size()); EXPECT_EQ(pipeControlWriteAddress, genCmdCast(*(semaphores[1]))->getSemaphoreGraphicsAddress()); } else { EXPECT_EQ(UnitTestHelper::isAdditionalMiSemaphoreWaitRequired(device->getHardwareInfo()) ? 3u : 1u, semaphores.size()); EXPECT_EQ(pipeControlWriteAddress, genCmdCast(*(semaphores[0]))->getSemaphoreGraphicsAddress()); } } HWTEST_TEMPLATED_F(BcsBufferTests, givenBarrierWhenReleasingMultipleBlockedEnqueuesThenProgramBarrierOnce) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); buffer->forceDisallowCPUCopy = true; void *hostPtr = reinterpret_cast(0x12340000); UserEvent userEvent0, userEvent1; cl_event waitlist0[] = {&userEvent0}; cl_event waitlist1[] = {&userEvent1}; cmdQ->enqueueBarrierWithWaitList(0, nullptr, nullptr); cmdQ->enqueueWriteBuffer(buffer.get(), false, 0, 1, hostPtr, nullptr, 1, waitlist0, nullptr); cmdQ->enqueueWriteBuffer(buffer.get(), false, 0, 1, hostPtr, nullptr, 1, waitlist1, nullptr); auto pipeControlLookup = [](LinearStream &stream, size_t offset) { HardwareParse hwParser; hwParser.parseCommands(stream, offset); bool stallingPipeControlFound = false; for (auto &cmd : hwParser.cmdList) { if (auto pipeControlCmd = genCmdCast(cmd)) { if (pipeControlCmd->getPostSyncOperation() != PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { continue; } stallingPipeControlFound = true; EXPECT_TRUE(pipeControlCmd->getCommandStreamerStallEnable()); EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), pipeControlCmd->getDcFlushEnable()); break; } } return stallingPipeControlFound; }; auto &csrStream = cmdQ->getGpgpuCommandStreamReceiver().getCS(0); EXPECT_TRUE(cmdQ->getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired()); userEvent0.setStatus(CL_COMPLETE); EXPECT_FALSE(cmdQ->getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired()); EXPECT_TRUE(pipeControlLookup(csrStream, 0)); auto csrOffset = csrStream.getUsed(); userEvent1.setStatus(CL_COMPLETE); EXPECT_FALSE(pipeControlLookup(csrStream, csrOffset)); cmdQ->isQueueBlocked(); } HWTEST_TEMPLATED_F(BcsBufferTests, givenPipeControlRequestWhenDispatchingBlockedBlitEnqueueThenWaitPipeControlOnBcsEngine) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); auto bcsCsr = static_cast *>(this->bcsCsr); auto queueCsr = static_cast *>(cmdQ->gpgpuEngine->commandStreamReceiver); queueCsr->stallingCommandsOnNextFlushRequired = true; cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); buffer->forceDisallowCPUCopy = true; void *hostPtr = reinterpret_cast(0x12340000); UserEvent userEvent; cl_event waitlist = &userEvent; cmdQ->enqueueWriteBuffer(buffer.get(), false, 0, 1, hostPtr, nullptr, 1, &waitlist, nullptr); userEvent.setStatus(CL_COMPLETE); HardwareParse bcsHwParser; bcsHwParser.parseCommands(bcsCsr->commandStream); auto semaphores = findAll(bcsHwParser.cmdList.begin(), bcsHwParser.cmdList.end()); if (cmdQ->isCacheFlushForBcsRequired()) { EXPECT_EQ(UnitTestHelper::isAdditionalMiSemaphoreWaitRequired(device->getHardwareInfo()) ? 4u : 2u, semaphores.size()); } else { EXPECT_EQ(UnitTestHelper::isAdditionalMiSemaphoreWaitRequired(device->getHardwareInfo()) ? 3u : 1u, semaphores.size()); } cmdQ->isQueueBlocked(); } HWTEST_TEMPLATED_F(BcsBufferTests, givenBufferOperationWithoutKernelWhenEstimatingCommandsSizeThenReturnCorrectValue) { auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); CsrDependencies csrDependencies; MultiDispatchInfo multiDispatchInfo; auto &hwInfo = cmdQ->getDevice().getHardwareInfo(); auto readBufferCmdsSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_READ_BUFFER, csrDependencies, false, false, true, *cmdQ, multiDispatchInfo, false, false); auto writeBufferCmdsSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_WRITE_BUFFER, csrDependencies, false, false, true, *cmdQ, multiDispatchInfo, false, false); auto copyBufferCmdsSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_COPY_BUFFER, csrDependencies, false, false, true, *cmdQ, multiDispatchInfo, false, false); auto expectedSize = TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue(); if (cmdQ->isCacheFlushForBcsRequired()) { expectedSize += MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo); } EXPECT_EQ(expectedSize, readBufferCmdsSize); EXPECT_EQ(expectedSize, writeBufferCmdsSize); EXPECT_EQ(expectedSize, copyBufferCmdsSize); } HWTEST_TEMPLATED_F(BcsBufferTests, givenOutputTimestampPacketWhenBlitCalledThenProgramMiFlushDwWithDataWrite) { using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; auto csr = static_cast *>(this->bcsCsr); auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); buffer->forceDisallowCPUCopy = true; void *hostPtr = reinterpret_cast(0x12340000); cmdQ->enqueueWriteBuffer(buffer.get(), true, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr); auto outputTimestampPacket = cmdQ->timestampPacketContainer->peekNodes()[0]; auto timestampPacketGpuWriteAddress = TimestampPacketHelper::getContextEndGpuAddress(*outputTimestampPacket); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream); uint32_t miFlushDwCmdsWithOutputCount = 0; bool blitCmdFound = false; for (auto &cmd : hwParser.cmdList) { if (auto miFlushDwCmd = genCmdCast(cmd)) { if (miFlushDwCmd->getDestinationAddress() == 0) { continue; } EXPECT_EQ(miFlushDwCmdsWithOutputCount == 0, timestampPacketGpuWriteAddress == miFlushDwCmd->getDestinationAddress()); EXPECT_EQ(miFlushDwCmdsWithOutputCount == 0, 0u == miFlushDwCmd->getImmediateData()); miFlushDwCmdsWithOutputCount++; } else if (genCmdCast(cmd)) { blitCmdFound = true; EXPECT_EQ(0u, miFlushDwCmdsWithOutputCount); } } EXPECT_EQ(2u, miFlushDwCmdsWithOutputCount); // TimestampPacket + taskCount EXPECT_TRUE(blitCmdFound); } HWTEST_TEMPLATED_F(BcsBufferTests, givenInputAndOutputTimestampPacketWhenBlitCalledThenMakeThemResident) { auto bcsCsr = static_cast *>(this->bcsCsr); auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); cl_int retVal = CL_SUCCESS; auto memoryManager = bcsCsr->getMemoryManager(); bcsCsr->timestampPacketAllocator = std::make_unique>>(device->getRootDeviceIndex(), memoryManager, 1, MemoryConstants::cacheLineSize, sizeof(TimestampPackets), false, device->getDeviceBitfield()); auto buffer = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); buffer->forceDisallowCPUCopy = true; void *hostPtr = reinterpret_cast(0x12340000); // first enqueue to create IOQ dependency cmdQ->enqueueWriteBuffer(buffer.get(), true, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr); auto inputTimestampPacketAllocation = cmdQ->timestampPacketContainer->peekNodes().at(0)->getBaseGraphicsAllocation(); cmdQ->enqueueWriteBuffer(buffer.get(), true, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr); auto outputTimestampPacketAllocation = cmdQ->timestampPacketContainer->peekNodes().at(0)->getBaseGraphicsAllocation(); EXPECT_NE(outputTimestampPacketAllocation, inputTimestampPacketAllocation); EXPECT_EQ(cmdQ->taskCount, inputTimestampPacketAllocation->getDefaultGraphicsAllocation()->getTaskCount(bcsCsr->getOsContext().getContextId())); EXPECT_EQ(cmdQ->taskCount, outputTimestampPacketAllocation->getDefaultGraphicsAllocation()->getTaskCount(bcsCsr->getOsContext().getContextId())); } HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingWriteBufferWhenUsingBcsThenCallWait) { auto myMockCsr = new MyMockCsr(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield()); myMockCsr->taskCount = 1234; myMockCsr->initializeTagAllocation(); myMockCsr->setupContext(*bcsMockContext->bcsOsContext); bcsMockContext->bcsCsr.reset(myMockCsr); EngineControl bcsEngineControl = {myMockCsr, bcsMockContext->bcsOsContext.get()}; auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); cmdQ->clearBcsEngines(); cmdQ->bcsEngines[0] = &bcsEngineControl; auto &gpgpuCsr = cmdQ->getGpgpuCommandStreamReceiver(); myMockCsr->gpgpuCsr = &gpgpuCsr; cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); buffer->forceDisallowCPUCopy = true; void *hostPtr = reinterpret_cast(0x12340000); cmdQ->enqueueWriteBuffer(buffer.get(), false, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); bool tempAllocationFound = false; auto tempAllocation = myMockCsr->getTemporaryAllocations().peekHead(); while (tempAllocation) { if (tempAllocation->getUnderlyingBuffer() == hostPtr) { tempAllocationFound = true; break; } tempAllocation = tempAllocation->next; } EXPECT_TRUE(tempAllocationFound); cmdQ->enqueueWriteBuffer(buffer.get(), true, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); } HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingReadBufferRectWhenUsingBcsThenCallWait) { auto myMockCsr = new MyMockCsr(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield()); myMockCsr->taskCount = 1234; myMockCsr->initializeTagAllocation(); myMockCsr->setupContext(*bcsMockContext->bcsOsContext); bcsMockContext->bcsCsr.reset(myMockCsr); EngineControl bcsEngineControl = {myMockCsr, bcsMockContext->bcsOsContext.get()}; auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); cmdQ->clearBcsEngines(); cmdQ->bcsEngines[0] = &bcsEngineControl; auto &gpgpuCsr = cmdQ->getGpgpuCommandStreamReceiver(); myMockCsr->gpgpuCsr = &gpgpuCsr; cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); buffer->forceDisallowCPUCopy = true; void *hostPtr = reinterpret_cast(0x12340000); size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {1, 2, 1}; cmdQ->enqueueReadBufferRect(buffer.get(), false, bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, hostPtr, 0, nullptr, nullptr); EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); bool tempAllocationFound = false; auto tempAllocation = myMockCsr->getTemporaryAllocations().peekHead(); while (tempAllocation) { if (tempAllocation->getUnderlyingBuffer() == hostPtr) { tempAllocationFound = true; break; } tempAllocation = tempAllocation->next; } EXPECT_TRUE(tempAllocationFound); cmdQ->enqueueReadBufferRect(buffer.get(), true, bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, hostPtr, 0, nullptr, nullptr); EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); } HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingWriteBufferRectWhenUsingBcsThenCallWait) { auto myMockCsr = new MyMockCsr(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield()); myMockCsr->taskCount = 1234; myMockCsr->initializeTagAllocation(); myMockCsr->setupContext(*bcsMockContext->bcsOsContext); bcsMockContext->bcsCsr.reset(myMockCsr); EngineControl bcsEngineControl = {myMockCsr, bcsMockContext->bcsOsContext.get()}; auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); cmdQ->clearBcsEngines(); cmdQ->bcsEngines[0] = &bcsEngineControl; auto &gpgpuCsr = cmdQ->getGpgpuCommandStreamReceiver(); myMockCsr->gpgpuCsr = &gpgpuCsr; cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); buffer->forceDisallowCPUCopy = true; void *hostPtr = reinterpret_cast(0x12340000); size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {1, 2, 1}; cmdQ->enqueueWriteBufferRect(buffer.get(), false, bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, hostPtr, 0, nullptr, nullptr); EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); bool tempAllocationFound = false; auto tempAllocation = myMockCsr->getTemporaryAllocations().peekHead(); while (tempAllocation) { if (tempAllocation->getUnderlyingBuffer() == hostPtr) { tempAllocationFound = true; break; } tempAllocation = tempAllocation->next; } EXPECT_TRUE(tempAllocationFound); cmdQ->enqueueWriteBufferRect(buffer.get(), true, bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, hostPtr, 0, nullptr, nullptr); EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); } HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingReadBufferWhenUsingBcsThenCallWait) { auto myMockCsr = new MyMockCsr(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield()); myMockCsr->taskCount = 1234; myMockCsr->initializeTagAllocation(); myMockCsr->setupContext(*bcsMockContext->bcsOsContext); bcsMockContext->bcsCsr.reset(myMockCsr); EngineControl bcsEngineControl = {myMockCsr, bcsMockContext->bcsOsContext.get()}; auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); cmdQ->clearBcsEngines(); cmdQ->bcsEngines[0] = &bcsEngineControl; auto &gpgpuCsr = cmdQ->getGpgpuCommandStreamReceiver(); myMockCsr->gpgpuCsr = &gpgpuCsr; cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); buffer->forceDisallowCPUCopy = true; void *hostPtr = reinterpret_cast(0x12340000); cmdQ->enqueueReadBuffer(buffer.get(), false, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); bool tempAllocationFound = false; auto tempAllocation = myMockCsr->getTemporaryAllocations().peekHead(); while (tempAllocation) { if (tempAllocation->getUnderlyingBuffer() == hostPtr) { tempAllocationFound = true; break; } tempAllocation = tempAllocation->next; } EXPECT_TRUE(tempAllocationFound); cmdQ->enqueueReadBuffer(buffer.get(), true, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); } HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingSVMMemcpyAndEnqueuReadBufferIsCalledWhenUsingBcsThenCallWait) { auto myMockCsr = new MyMockCsr(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield()); myMockCsr->taskCount = 1234; myMockCsr->initializeTagAllocation(); myMockCsr->setupContext(*bcsMockContext->bcsOsContext); bcsMockContext->bcsCsr.reset(myMockCsr); EngineControl bcsEngineControl = {myMockCsr, bcsMockContext->bcsOsContext.get()}; auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); cmdQ->clearBcsEngines(); cmdQ->bcsEngines[0] = &bcsEngineControl; auto &gpgpuCsr = cmdQ->getGpgpuCommandStreamReceiver(); myMockCsr->gpgpuCsr = &gpgpuCsr; auto pDstSVM = std::make_unique(256); auto pSrcSVM = bcsMockContext->getSVMAllocsManager()->createSVMAlloc(256, {}, bcsMockContext->getRootDeviceIndices(), bcsMockContext->getDeviceBitfields()); cmdQ->enqueueSVMMemcpy(false, pDstSVM.get(), pSrcSVM, 256, 0, nullptr, nullptr); EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); auto tempAlloc = myMockCsr->getTemporaryAllocations().peekHead(); EXPECT_EQ(0u, tempAlloc->countSuccessors()); EXPECT_EQ(pDstSVM.get(), reinterpret_cast(tempAlloc->getGpuAddress())); cmdQ->enqueueSVMMemcpy(true, pDstSVM.get(), pSrcSVM, 256, 0, nullptr, nullptr); EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); bcsMockContext->getSVMAllocsManager()->freeSVMAlloc(pSrcSVM); } HWTEST_TEMPLATED_F(BcsBufferTests, givenSrcHostPtrBlockingEnqueueSVMMemcpyAndEnqueuWriteBufferIsCalledWhenUsingBcsThenCallWait) { auto myMockCsr = new MyMockCsr(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield()); myMockCsr->taskCount = 1234; myMockCsr->initializeTagAllocation(); myMockCsr->setupContext(*bcsMockContext->bcsOsContext); bcsMockContext->bcsCsr.reset(myMockCsr); EngineControl bcsEngineControl = {myMockCsr, bcsMockContext->bcsOsContext.get()}; auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); cmdQ->clearBcsEngines(); cmdQ->bcsEngines[0] = &bcsEngineControl; auto &gpgpuCsr = cmdQ->getGpgpuCommandStreamReceiver(); myMockCsr->gpgpuCsr = &gpgpuCsr; auto pSrcSVM = std::make_unique(256); auto pDstSVM = bcsMockContext->getSVMAllocsManager()->createSVMAlloc(256, {}, bcsMockContext->getRootDeviceIndices(), bcsMockContext->getDeviceBitfields()); cmdQ->enqueueSVMMemcpy(false, pDstSVM, pSrcSVM.get(), 256, 0, nullptr, nullptr); EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); auto tempAlloc = myMockCsr->getTemporaryAllocations().peekHead(); EXPECT_EQ(0u, tempAlloc->countSuccessors()); EXPECT_EQ(pSrcSVM.get(), reinterpret_cast(tempAlloc->getGpuAddress())); cmdQ->enqueueSVMMemcpy(true, pDstSVM, pSrcSVM.get(), 256, 0, nullptr, nullptr); EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); bcsMockContext->getSVMAllocsManager()->freeSVMAlloc(pDstSVM); } HWTEST_TEMPLATED_F(BcsBufferTests, givenDstHostPtrAndSrcHostPtrBlockingEnqueueSVMMemcpyAndEnqueuWriteBufferIsCalledWhenUsingBcsThenCallWait) { auto myMockCsr = new MyMockCsr(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield()); myMockCsr->taskCount = 1234; myMockCsr->initializeTagAllocation(); myMockCsr->setupContext(*bcsMockContext->bcsOsContext); bcsMockContext->bcsCsr.reset(myMockCsr); EngineControl bcsEngineControl = {myMockCsr, bcsMockContext->bcsOsContext.get()}; auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); cmdQ->clearBcsEngines(); cmdQ->bcsEngines[0] = &bcsEngineControl; auto &gpgpuCsr = cmdQ->getGpgpuCommandStreamReceiver(); myMockCsr->gpgpuCsr = &gpgpuCsr; auto pSrcSVM = std::make_unique(256); auto pDstSVM = std::make_unique(256); cmdQ->enqueueSVMMemcpy(false, pDstSVM.get(), pSrcSVM.get(), 256, 0, nullptr, nullptr); EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); auto tempAlloc = myMockCsr->getTemporaryAllocations().peekHead(); EXPECT_EQ(1u, tempAlloc->countSuccessors()); EXPECT_EQ(pSrcSVM.get(), reinterpret_cast(tempAlloc->getGpuAddress())); EXPECT_EQ(pDstSVM.get(), reinterpret_cast(tempAlloc->next->getGpuAddress())); cmdQ->enqueueSVMMemcpy(true, pDstSVM.get(), pSrcSVM.get(), 256, 0, nullptr, nullptr); EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); } HWTEST_TEMPLATED_F(BcsBufferTests, givenSvmToSvmCopyWhenEnqueueSVMMemcpyThenSvmMemcpyCommandIsCalledAndBcs) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); auto pDstSVM = bcsMockContext->getSVMAllocsManager()->createSVMAlloc(256, {}, bcsMockContext->getRootDeviceIndices(), bcsMockContext->getDeviceBitfields()); auto pSrcSVM = bcsMockContext->getSVMAllocsManager()->createSVMAlloc(256, {}, bcsMockContext->getRootDeviceIndices(), bcsMockContext->getDeviceBitfields()); cmdQ->enqueueSVMMemcpy(false, pDstSVM, pSrcSVM, 256, 0, nullptr, nullptr); HardwareParse hwParser; hwParser.parseCommands(bcsCsr->getCS(0)); auto commandItor = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); EXPECT_NE(hwParser.cmdList.end(), commandItor); auto copyBltCmd = genCmdCast(*commandItor); EXPECT_EQ(pSrcSVM, reinterpret_cast(copyBltCmd->getSourceBaseAddress())); EXPECT_EQ(pDstSVM, reinterpret_cast(copyBltCmd->getDestinationBaseAddress())); bcsMockContext->getSVMAllocsManager()->freeSVMAlloc(pDstSVM); bcsMockContext->getSVMAllocsManager()->freeSVMAlloc(pSrcSVM); } HWTEST_TEMPLATED_F(BcsBufferTests, givenSvmToSvmCopyTypeWhenEnqueueNonBlockingSVMMemcpyThenSvmMemcpyCommandIsEnqueuedWhenUsingBcsThenCallWait) { auto myMockCsr = new MyMockCsr(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield()); myMockCsr->taskCount = 1234; myMockCsr->initializeTagAllocation(); myMockCsr->setupContext(*bcsMockContext->bcsOsContext); bcsMockContext->bcsCsr.reset(myMockCsr); EngineControl bcsEngineControl = {myMockCsr, bcsMockContext->bcsOsContext.get()}; auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); cmdQ->clearBcsEngines(); cmdQ->bcsEngines[0] = &bcsEngineControl; auto &gpgpuCsr = cmdQ->getGpgpuCommandStreamReceiver(); myMockCsr->gpgpuCsr = &gpgpuCsr; auto pDstSVM = bcsMockContext->getSVMAllocsManager()->createSVMAlloc(256, {}, bcsMockContext->getRootDeviceIndices(), bcsMockContext->getDeviceBitfields()); auto pSrcSVM = bcsMockContext->getSVMAllocsManager()->createSVMAlloc(256, {}, bcsMockContext->getRootDeviceIndices(), bcsMockContext->getDeviceBitfields()); cmdQ->enqueueSVMMemcpy(false, pDstSVM, pSrcSVM, 256, 0, nullptr, nullptr); EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); EXPECT_TRUE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); cmdQ->enqueueSVMMemcpy(true, pDstSVM, pSrcSVM, 256, 0, nullptr, nullptr); EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); bcsMockContext->getSVMAllocsManager()->freeSVMAlloc(pDstSVM); bcsMockContext->getSVMAllocsManager()->freeSVMAlloc(pSrcSVM); } struct BcsSvmTests : public BcsBufferTests { template void SetUpT() { if (is32bit) { GTEST_SKIP(); } REQUIRE_SVM_OR_SKIP(defaultHwInfo); BcsBufferTests::SetUpT(); if (IsSkipped()) { GTEST_SKIP(); } deviceMemAlloc = clDeviceMemAllocINTEL(bcsMockContext.get(), device.get(), nullptr, allocSize, 0u, &retVal); ASSERT_NE(nullptr, deviceMemAlloc); ASSERT_EQ(CL_SUCCESS, retVal); allocation.push_back(deviceMemAlloc); retVal = CL_SUCCESS; hostMemAlloc = clHostMemAllocINTEL(bcsMockContext.get(), nullptr, allocSize, 0u, &retVal); ASSERT_NE(nullptr, hostMemAlloc); ASSERT_EQ(CL_SUCCESS, retVal); allocation.push_back(hostMemAlloc); sharedMemAlloc = clSharedMemAllocINTEL(bcsMockContext.get(), device.get(), nullptr, allocSize, 0u, &retVal); ASSERT_NE(nullptr, sharedMemAlloc); ASSERT_EQ(CL_SUCCESS, retVal); allocation.push_back(sharedMemAlloc); } template void TearDownT() { if (IsSkipped()) { return; } BcsBufferTests::TearDownT(); clMemFreeINTEL(bcsMockContext.get(), sharedMemAlloc); clMemFreeINTEL(bcsMockContext.get(), hostMemAlloc); clMemFreeINTEL(bcsMockContext.get(), deviceMemAlloc); } size_t allocSize = 4096u; std::vector offset{0, 1, 2, 4, 8, 16, 32}; std::vector allocation; void *hostMemAlloc = nullptr; void *deviceMemAlloc = nullptr; void *sharedMemAlloc = nullptr; cl_int retVal = CL_SUCCESS; }; HWTEST_TEMPLATED_F(BcsSvmTests, givenSVMMAllocationWithOffsetWhenUsingBcsThenProperValuesAreSet) { DebugManager.flags.EnableBlitterOperationsSupport.set(1); for (auto srcPtr : allocation) { for (auto dstPtr : allocation) { for (auto srcOff : offset) { for (auto dstOff : offset) { auto pSrcPtr = srcPtr; auto pDstPtr = dstPtr; auto srcOffset = srcOff; auto dstOffset = dstOff; pSrcPtr = ptrOffset(pSrcPtr, srcOffset); pDstPtr = ptrOffset(pDstPtr, dstOffset); auto dstSvmData = bcsMockContext.get()->getSVMAllocsManager()->getSVMAlloc(pDstPtr); auto srcSvmData = bcsMockContext.get()->getSVMAllocsManager()->getSVMAlloc(pSrcPtr); auto srcGpuAllocation = srcSvmData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); auto dstGpuAllocation = dstSvmData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); BuiltinOpParams builtinOpParams = {}; builtinOpParams.size = {allocSize, 0, 0}; builtinOpParams.srcPtr = const_cast(alignDown(pSrcPtr, 4)); builtinOpParams.srcSvmAlloc = srcGpuAllocation; builtinOpParams.srcOffset = {ptrDiff(pSrcPtr, builtinOpParams.srcPtr), 0, 0}; builtinOpParams.dstPtr = alignDown(pDstPtr, 4); builtinOpParams.dstSvmAlloc = dstGpuAllocation; builtinOpParams.dstOffset = {ptrDiff(pDstPtr, builtinOpParams.dstPtr), 0, 0}; auto bcsCsr = static_cast *>(this->bcsCsr); auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::BufferToBuffer, *bcsCsr, builtinOpParams); EXPECT_EQ(srcOffset, blitProperties.srcOffset.x); EXPECT_EQ(dstOffset, blitProperties.dstOffset.x); EXPECT_EQ(dstGpuAllocation, blitProperties.dstAllocation); EXPECT_EQ(srcGpuAllocation, blitProperties.srcAllocation); EXPECT_EQ(dstGpuAllocation->getGpuAddress(), blitProperties.dstGpuAddress); EXPECT_EQ(srcGpuAllocation->getGpuAddress(), blitProperties.srcGpuAddress); EXPECT_EQ(pDstPtr, reinterpret_cast(blitProperties.dstGpuAddress + blitProperties.dstOffset.x)); EXPECT_EQ(pSrcPtr, reinterpret_cast(blitProperties.srcGpuAddress + blitProperties.srcOffset.x)); } } } } } HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockedEnqueueWhenUsingBcsThenWaitForValidTaskCountOnBlockingCall) { auto myMockCsr = new MyMockCsr(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield()); myMockCsr->taskCount = 1234; myMockCsr->initializeTagAllocation(); myMockCsr->setupContext(*bcsMockContext->bcsOsContext); bcsMockContext->bcsCsr.reset(myMockCsr); EngineControl bcsEngineControl = {myMockCsr, bcsMockContext->bcsOsContext.get()}; auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); cmdQ->clearBcsEngines(); cmdQ->bcsEngines[0] = &bcsEngineControl; auto &gpgpuCsr = cmdQ->getGpgpuCommandStreamReceiver(); myMockCsr->gpgpuCsr = &gpgpuCsr; cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); buffer->forceDisallowCPUCopy = true; void *hostPtr = reinterpret_cast(0x12340000); UserEvent userEvent; cl_event waitlist = &userEvent; cmdQ->enqueueWriteBuffer(buffer.get(), false, 0, 1, hostPtr, nullptr, 1, &waitlist, nullptr); userEvent.setStatus(CL_COMPLETE); EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); cmdQ->finish(); EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); } HWTEST_TEMPLATED_F(BcsBufferTests, givenDebugFlagSetToOneWhenEnqueueingCopyLocalBufferToLocalBufferThenUseBlitter) { auto bcsCsr = static_cast *>(this->bcsCsr); MockGraphicsAllocation srcGraphicsAllocation{}; MockGraphicsAllocation dstGraphicsAllocation{}; MockBuffer srcMemObj{srcGraphicsAllocation}; MockBuffer dstMemObj{dstGraphicsAllocation}; srcGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; dstGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; const bool preferBlitterHw = ClHwHelper::get(::defaultHwInfo->platform.eRenderCoreFamily).preferBlitterForLocalToLocalTransfers(); uint32_t expectedBlitBufferCalled = 0; DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1); EXPECT_EQ(expectedBlitBufferCalled, bcsCsr->blitBufferCalled); commandQueue->enqueueCopyBuffer(&srcMemObj, &dstMemObj, 0, 1, 1, 0, nullptr, nullptr); if (preferBlitterHw) { expectedBlitBufferCalled++; } EXPECT_EQ(expectedBlitBufferCalled, bcsCsr->blitBufferCalled); DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0); EXPECT_EQ(expectedBlitBufferCalled, bcsCsr->blitBufferCalled); commandQueue->enqueueCopyBuffer(&srcMemObj, &dstMemObj, 0, 1, 1, 0, nullptr, nullptr); EXPECT_EQ(expectedBlitBufferCalled, bcsCsr->blitBufferCalled); DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1); EXPECT_EQ(expectedBlitBufferCalled, bcsCsr->blitBufferCalled); commandQueue->enqueueCopyBuffer(&srcMemObj, &dstMemObj, 0, 1, 1, 0, nullptr, nullptr); expectedBlitBufferCalled++; EXPECT_EQ(expectedBlitBufferCalled, bcsCsr->blitBufferCalled); } HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsQueueWhenEnqueueingCopyBufferToBufferThenUseBlitterRegardlessOfPreference) { REQUIRE_BLITTER_OR_SKIP(&device->getDevice().getHardwareInfo()); cl_command_queue_properties properties[] = { CL_QUEUE_FAMILY_INTEL, device->getDevice().getEngineGroupIndexFromEngineGroupType(EngineGroupType::Copy), CL_QUEUE_INDEX_INTEL, 0, 0, }; MockCommandQueueHw queue(bcsMockContext.get(), device.get(), properties); auto bcsCsr = static_cast *>(this->bcsCsr); MockGraphicsAllocation srcGraphicsAllocation{}; MockGraphicsAllocation dstGraphicsAllocation{}; MockBuffer srcMemObj{srcGraphicsAllocation}; MockBuffer dstMemObj{dstGraphicsAllocation}; srcGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; dstGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1); EXPECT_EQ(0u, bcsCsr->blitBufferCalled); queue.enqueueCopyBuffer(&srcMemObj, &dstMemObj, 0, 1, 1, 0, nullptr, nullptr); EXPECT_EQ(1u, bcsCsr->blitBufferCalled); DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0); EXPECT_EQ(1u, bcsCsr->blitBufferCalled); queue.enqueueCopyBuffer(&srcMemObj, &dstMemObj, 0, 1, 1, 0, nullptr, nullptr); EXPECT_EQ(2u, bcsCsr->blitBufferCalled); DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1); EXPECT_EQ(2u, bcsCsr->blitBufferCalled); queue.enqueueCopyBuffer(&srcMemObj, &dstMemObj, 0, 1, 1, 0, nullptr, nullptr); EXPECT_EQ(3u, bcsCsr->blitBufferCalled); } compute-runtime-22.14.22890/opencl/test/unit_test/mem_obj/buffer_pin_tests.cpp000066400000000000000000000075771422164147700273240ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/os_agnostic_memory_manager.h" #include "shared/test/common/fixtures/memory_management_fixture.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" using namespace NEO; class TestedMemoryManager : public OsAgnosticMemoryManager { public: using OsAgnosticMemoryManager::OsAgnosticMemoryManager; GraphicsAllocation *allocateGraphicsMemoryWithAlignment(const AllocationData &allocationData) override { EXPECT_NE(0u, expectedSize); if (expectedSize == allocationData.size) { EXPECT_TRUE(allocationData.flags.forcePin); allocCount++; } return OsAgnosticMemoryManager::allocateGraphicsMemoryWithAlignment(allocationData); }; GraphicsAllocation *allocateGraphicsMemory64kb(const AllocationData &allocationData) override { return nullptr; }; GraphicsAllocation *allocateGraphicsMemoryWithHostPtr(const AllocationData &properties) override { EXPECT_NE(0u, HPExpectedSize); if (HPExpectedSize == properties.size) { EXPECT_TRUE(properties.flags.forcePin); HPAllocCount++; } return OsAgnosticMemoryManager::allocateGraphicsMemoryWithHostPtr(properties); } GraphicsAllocation *allocateGraphicsMemoryForNonSvmHostPtr(const AllocationData &properties) override { EXPECT_NE(0u, HPExpectedSize); if (HPExpectedSize == properties.size) { EXPECT_TRUE(properties.flags.forcePin); HPAllocCount++; } return OsAgnosticMemoryManager::allocateGraphicsMemoryForNonSvmHostPtr(properties); } size_t expectedSize = 0; uint32_t allocCount = 0; size_t HPExpectedSize = 0; uint32_t HPAllocCount = 0; }; TEST(BufferTests, WhenBufferIsCreatedThenPinIsSet) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); std::unique_ptr mm(new MemoryManagerCreate(false, false, executionEnvironment)); if (mm->isLimitedGPU(0)) { GTEST_SKIP(); } { MockContext context; auto size = MemoryConstants::pageSize * 32; auto retVal = CL_INVALID_OPERATION; mm->expectedSize = size; mm->HPExpectedSize = 0u; context.memoryManager = mm.get(); auto buffer = Buffer::create( &context, 0, size, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, mm->allocCount); delete buffer; } } TEST(BufferTests, GivenHostPtrWhenBufferIsCreatedThenPinIsSet) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); std::unique_ptr mm(new TestedMemoryManager(executionEnvironment)); if (mm->isLimitedGPU(0)) { GTEST_SKIP(); } { MockContext context; auto retVal = CL_INVALID_OPERATION; auto size = MemoryConstants::pageSize * 32; mm->expectedSize = 0u; mm->HPExpectedSize = size; context.memoryManager = mm.get(); // memory must be aligned to use zero-copy void *bff = alignedMalloc(size, MemoryConstants::pageSize); auto buffer = Buffer::create( &context, CL_MEM_USE_HOST_PTR | CL_MEM_FORCE_HOST_MEMORY_INTEL, size, bff, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, mm->HPAllocCount); delete buffer; alignedFree(bff); } } compute-runtime-22.14.22890/opencl/test/unit_test/mem_obj/buffer_set_arg_tests.cpp000066400000000000000000000327051422164147700301510ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/memory_manager/surface.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/kernel/kernel.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" #include "gtest/gtest.h" using namespace NEO; class BufferSetArgTest : public ContextFixture, public ClDeviceFixture, public testing::Test { using ContextFixture::SetUp; public: BufferSetArgTest() {} protected: void SetUp() override { ClDeviceFixture::SetUp(); cl_device_id device = pClDevice; ContextFixture::SetUp(1, &device); pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; constexpr uint32_t sizeOfPointer = sizeof(void *); pKernelInfo->addArgBuffer(0, 0x10, sizeOfPointer); pKernelInfo->addArgBuffer(1, 0x20, sizeOfPointer); pKernelInfo->addArgBuffer(2, 0x30, sizeOfPointer); pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->heapInfo.SurfaceStateHeapSize = sizeof(surfaceStateHeap); pProgram = new MockProgram(pContext, false, toClDeviceVector(*pClDevice)); retVal = CL_INVALID_VALUE; pMultiDeviceKernel = MultiDeviceKernel::create(pProgram, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), &retVal); pKernel = static_cast(pMultiDeviceKernel->getKernel(rootDeviceIndex)); ASSERT_NE(nullptr, pKernel); ASSERT_EQ(CL_SUCCESS, retVal); pKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData)); pKernel->setKernelArgHandler(1, &Kernel::setArgBuffer); pKernel->setKernelArgHandler(2, &Kernel::setArgBuffer); pKernel->setKernelArgHandler(0, &Kernel::setArgBuffer); BufferDefaults::context = new MockContext(pClDevice); buffer = BufferHelper<>::create(BufferDefaults::context); } void TearDown() override { delete buffer; delete BufferDefaults::context; delete pMultiDeviceKernel; delete pProgram; ContextFixture::TearDown(); ClDeviceFixture::TearDown(); } cl_int retVal = CL_SUCCESS; MockProgram *pProgram; MultiDeviceKernel *pMultiDeviceKernel = nullptr; MockKernel *pKernel = nullptr; std::unique_ptr pKernelInfo; SKernelBinaryHeaderCommon kernelHeader; char surfaceStateHeap[0x80]; char pCrossThreadData[64]; Buffer *buffer = nullptr; }; TEST_F(BufferSetArgTest, WhenSettingKernelArgBufferThenGpuAddressIsSet) { auto pKernelArg = (void **)(pKernel->getCrossThreadData() + pKernelInfo->argAsPtr(0).stateless); buffer->setArgStateless(pKernelArg, pKernelInfo->argAsPtr(0).pointerSize, pClDevice->getRootDeviceIndex(), false); EXPECT_EQ(reinterpret_cast(buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress()), *pKernelArg); } TEST_F(BufferSetArgTest, givenInvalidSizeWhenSettingKernelArgBufferThenReturnClInvalidArgSize) { cl_mem arg = buffer; cl_int err = pKernel->setArgBuffer(0, sizeof(cl_mem) + 1, arg); EXPECT_EQ(CL_INVALID_ARG_SIZE, err); } HWTEST_F(BufferSetArgTest, givenSetArgBufferWhenNullArgStatefulThenProgramNullSurfaceState) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT; pKernelInfo->argAsPtr(0).bindful = 0; cl_int ret = pKernel->setArgBuffer(0, sizeof(cl_mem), nullptr); EXPECT_EQ(CL_SUCCESS, ret); auto surfaceState = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->argAsPtr(0).bindful)); auto surfaceFormat = surfaceState->getSurfaceType(); auto surfacetype = surfaceState->getSurfaceFormat(); EXPECT_EQ(surfaceFormat, RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL); EXPECT_EQ(surfacetype, SURFACE_FORMAT::SURFACE_FORMAT_RAW); } HWTEST_F(BufferSetArgTest, givenSetKernelArgOnReadOnlyBufferThatIsMisalingedWhenSurfaceStateIsSetThenCachingIsOn) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; pKernelInfo->setAddressQualifier(0, KernelArgMetadata::AddrConstant); pKernelInfo->argAsPtr(0).bindful = 0; auto graphicsAllocation = buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex()); graphicsAllocation->setSize(graphicsAllocation->getUnderlyingBufferSize() - 1); cl_mem clMemBuffer = buffer; cl_int ret = pKernel->setArgBuffer(0, sizeof(cl_mem), &clMemBuffer); EXPECT_EQ(CL_SUCCESS, ret); auto surfaceState = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->argAsPtr(0).bindful)); auto mocs = surfaceState->getMemoryObjectControlState(); auto gmmHelper = pDevice->getGmmHelper(); auto expectedMocs = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); auto expectedMocs2 = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST); EXPECT_TRUE(expectedMocs == mocs || expectedMocs2 == mocs); } HWTEST_F(BufferSetArgTest, givenSetArgBufferWithNullArgStatelessThenDontProgramNullSurfaceState) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT; char sshOriginal[sizeof(surfaceStateHeap)]; memcpy(sshOriginal, surfaceStateHeap, sizeof(surfaceStateHeap)); pKernelInfo->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless; cl_int ret = pKernel->setArgBuffer(0, sizeof(cl_mem), nullptr); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_EQ(memcmp(sshOriginal, surfaceStateHeap, sizeof(surfaceStateHeap)), 0); } HWTEST_F(BufferSetArgTest, givenNonPureStatefulArgWhenCompressedBufferIsSetThenSetNonAuxMode) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; pKernelInfo->argAsPtr(0).bindful = 0; auto graphicsAllocation = buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex()); graphicsAllocation->setDefaultGmm(new Gmm(pDevice->getGmmClientContext(), graphicsAllocation->getUnderlyingBuffer(), buffer->getSize(), 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); graphicsAllocation->getDefaultGmm()->isCompressionEnabled = true; cl_mem clMem = buffer; cl_int ret = pKernel->setArgBuffer(0, sizeof(cl_mem), &clMem); EXPECT_EQ(CL_SUCCESS, ret); auto surfaceState = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->argAsPtr(0).bindful)); EXPECT_TRUE(RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE == surfaceState->getAuxiliarySurfaceMode()); pKernelInfo->setBufferStateful(0); ret = pKernel->setArgBuffer(0, sizeof(cl_mem), &clMem); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_TRUE(EncodeSurfaceState::isAuxModeEnabled(surfaceState, graphicsAllocation->getDefaultGmm())); } TEST_F(BufferSetArgTest, Given32BitAddressingWhenSettingArgStatelessThenGpuAddressIsSetCorrectly) { auto pKernelArg = (void **)(pKernel->getCrossThreadData() + pKernelInfo->argAsPtr(0).stateless); auto gpuBase = buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress() >> 2; buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->setGpuBaseAddress(gpuBase); buffer->setArgStateless(pKernelArg, pKernelInfo->argAsPtr(0).pointerSize, pClDevice->getRootDeviceIndex(), true); EXPECT_EQ(reinterpret_cast(buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress() - gpuBase), *pKernelArg); } TEST_F(BufferSetArgTest, givenBufferWhenOffsetedSubbufferIsPassedToSetKernelArgThenCorrectGpuVAIsPatched) { cl_buffer_region region; region.origin = 0xc0; region.size = 32; cl_int error = 0; auto subBuffer = buffer->createSubBuffer(buffer->getFlags(), buffer->getFlagsIntel(), ®ion, error); ASSERT_NE(nullptr, subBuffer); EXPECT_EQ(ptrOffset(buffer->getCpuAddress(), region.origin), subBuffer->getCpuAddress()); auto pKernelArg = (void **)(pKernel->getCrossThreadData() + pKernelInfo->argAsPtr(0).stateless); subBuffer->setArgStateless(pKernelArg, pKernelInfo->argAsPtr(0).pointerSize, pClDevice->getRootDeviceIndex(), false); EXPECT_EQ(reinterpret_cast(subBuffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress() + region.origin), *pKernelArg); delete subBuffer; } TEST_F(BufferSetArgTest, givenCurbeTokenThatSizeIs4BytesWhenStatelessArgIsPatchedThenOnly4BytesArePatchedInCurbe) { auto pKernelArg = (void **)(pKernel->getCrossThreadData() + pKernelInfo->argAsPtr(0).stateless); //fill 8 bytes with 0xffffffffffffffff; uint64_t fillValue = -1; uint64_t *pointer64bytes = (uint64_t *)pKernelArg; *pointer64bytes = fillValue; constexpr uint32_t sizeOf4Bytes = sizeof(uint32_t); pKernelInfo->argAsPtr(0).pointerSize = sizeOf4Bytes; buffer->setArgStateless(pKernelArg, sizeOf4Bytes, pClDevice->getRootDeviceIndex(), false); //make sure only 4 bytes are patched auto bufferAddress = buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress(); uint32_t address32bits = static_cast(bufferAddress); uint64_t curbeValue = *pointer64bytes; uint32_t higherPart = curbeValue >> 32; uint32_t lowerPart = (curbeValue & 0xffffffff); EXPECT_EQ(0xffffffff, higherPart); EXPECT_EQ(address32bits, lowerPart); } TEST_F(BufferSetArgTest, WhenSettingKernelArgThenAddressToPatchIsSetCorrectlyAndSurfacesSet) { cl_mem memObj = buffer; retVal = clSetKernelArg( pMultiDeviceKernel, 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); auto pKernelArg = (void **)(pKernel->getCrossThreadData() + pKernelInfo->argAsPtr(0).stateless); EXPECT_EQ(reinterpret_cast(buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddressToPatch()), *pKernelArg); std::vector surfaces; pKernel->getResidency(surfaces); EXPECT_EQ(1u, surfaces.size()); for (auto &surface : surfaces) { delete surface; } } TEST_F(BufferSetArgTest, GivenSvmPointerWhenSettingKernelArgThenAddressToPatchIsSetCorrectlyAndSurfacesSet) { REQUIRE_SVM_OR_SKIP(pDevice); void *ptrSVM = pContext->getSVMAllocsManager()->createSVMAlloc(256, {}, pContext->getRootDeviceIndices(), pContext->getDeviceBitfields()); EXPECT_NE(nullptr, ptrSVM); auto svmData = pContext->getSVMAllocsManager()->getSVMAlloc(ptrSVM); ASSERT_NE(nullptr, svmData); GraphicsAllocation *pSvmAlloc = svmData->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex()); EXPECT_NE(nullptr, pSvmAlloc); retVal = pKernel->setArgSvmAlloc( 0, ptrSVM, pSvmAlloc, 0u); ASSERT_EQ(CL_SUCCESS, retVal); auto pKernelArg = (void **)(pKernel->getCrossThreadData() + pKernelInfo->argAsPtr(0).stateless); EXPECT_EQ(ptrSVM, *pKernelArg); std::vector surfaces; pKernel->getResidency(surfaces); EXPECT_EQ(1u, surfaces.size()); for (auto &surface : surfaces) { delete surface; } pContext->getSVMAllocsManager()->freeSVMAlloc(ptrSVM); } TEST_F(BufferSetArgTest, WhenGettingKernelArgThenBufferIsReturned) { cl_mem memObj = buffer; retVal = pKernel->setArg( 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(memObj, pKernel->getKernelArg(0)); } TEST_F(BufferSetArgTest, givenKernelArgBufferWhenAddPathInfoDataIsSetThenPatchInfoDataIsCollected) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AddPatchInfoCommentsForAUBDump.set(true); cl_mem memObj = buffer; retVal = pKernel->setArg( 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(1u, pKernel->getPatchInfoDataList().size()); EXPECT_EQ(PatchInfoAllocationType::KernelArg, pKernel->getPatchInfoDataList()[0].sourceType); EXPECT_EQ(PatchInfoAllocationType::IndirectObjectHeap, pKernel->getPatchInfoDataList()[0].targetType); EXPECT_EQ(buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddressToPatch(), pKernel->getPatchInfoDataList()[0].sourceAllocation); EXPECT_EQ(reinterpret_cast(pKernel->getCrossThreadData()), pKernel->getPatchInfoDataList()[0].targetAllocation); EXPECT_EQ(0u, pKernel->getPatchInfoDataList()[0].sourceAllocationOffset); } TEST_F(BufferSetArgTest, givenKernelArgBufferWhenAddPathInfoDataIsNotSetThenPatchInfoDataIsNotCollected) { cl_mem memObj = buffer; retVal = pKernel->setArg( 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, pKernel->getPatchInfoDataList().size()); } compute-runtime-22.14.22890/opencl/test/unit_test/mem_obj/buffer_tests.cpp000066400000000000000000002604101422164147700264410ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/array_count.h" #include "shared/source/helpers/compiler_hw_info_config.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/memory_manager/memory_operations_handler.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/test/common/fixtures/memory_management_fixture.h" #include "shared/test/common/helpers/ult_hw_config.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/mocks/mock_gmm.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/test.h" #include "opencl/extensions/public/cl_ext_private.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/mem_obj/mem_obj_helper.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" using namespace NEO; static const unsigned int g_scTestBufferSizeInBytes = 16; TEST(Buffer, giveBufferWhenAskedForPtrOffsetForMappingThenReturnCorrectValue) { MockContext ctx; cl_int retVal; std::unique_ptr buffer(Buffer::create(&ctx, 0, 1, nullptr, retVal)); MemObjOffsetArray offset = {{4, 5, 6}}; auto retOffset = buffer->calculateOffsetForMapping(offset); EXPECT_EQ(offset[0], retOffset); } TEST(Buffer, giveBufferCreateWithHostPtrButWithoutProperFlagsWhenCreatedThenErrorIsReturned) { MockContext ctx; cl_int retVal; auto hostPtr = reinterpret_cast(0x1774); std::unique_ptr buffer(Buffer::create(&ctx, CL_MEM_READ_WRITE, 1, hostPtr, retVal)); EXPECT_EQ(retVal, CL_INVALID_HOST_PTR); } TEST(Buffer, givenBufferWhenAskedForPtrLengthThenReturnCorrectValue) { MockContext ctx; cl_int retVal; std::unique_ptr buffer(Buffer::create(&ctx, 0, 1, nullptr, retVal)); MemObjSizeArray size = {{4, 5, 6}}; auto retOffset = buffer->calculateMappedPtrLength(size); EXPECT_EQ(size[0], retOffset); } TEST(Buffer, whenBufferAllocatedInLocalMemoryThenCpuCopyIsDisallowed) { MockGraphicsAllocation allocation{}; MockBuffer buffer(allocation); UltDeviceFactory factory{1, 0}; auto &device = *factory.rootDevices[0]; allocation.memoryPool = MemoryPool::LocalMemory; EXPECT_FALSE(buffer.isReadWriteOnCpuAllowed(device)); allocation.memoryPool = MemoryPool::System4KBPages; EXPECT_TRUE(buffer.isReadWriteOnCpuAllowed(device)); } TEST(Buffer, givenReadOnlySetOfInputFlagsWhenPassedToisReadOnlyMemoryPermittedByFlagsThenTrueIsReturned) { class MockBuffer : public Buffer { public: using Buffer::isReadOnlyMemoryPermittedByFlags; }; UltDeviceFactory deviceFactory{1, 0}; auto pDevice = deviceFactory.rootDevices[0]; cl_mem_flags flags = CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY; MemoryProperties memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, pDevice); EXPECT_TRUE(MockBuffer::isReadOnlyMemoryPermittedByFlags(memoryProperties)); flags = CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY; memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, pDevice); EXPECT_TRUE(MockBuffer::isReadOnlyMemoryPermittedByFlags(memoryProperties)); } TEST(TestBufferRectCheck, givenSmallerDstBufferWhenCallBufferRectPitchSetThenCorrectValidationIsDone) { auto srcBuffer = std::make_unique(); ASSERT_NE(nullptr, srcBuffer); srcBuffer->size = 500; size_t originBuffer[] = {0, 0, 0}; size_t region[] = {10, 20, 1}; size_t srcRowPitch = 20u; size_t srcSlicePitch = 0u; size_t dstRowPitch = 10u; size_t dstSlicePitch = 0u; auto retVal = srcBuffer->bufferRectPitchSet(originBuffer, region, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, true); EXPECT_TRUE(retVal); auto dstBuffer = std::make_unique(); ASSERT_NE(nullptr, dstBuffer); dstBuffer->size = 200; EXPECT_GT(srcBuffer->size, dstBuffer->size); retVal = dstBuffer->bufferRectPitchSet(originBuffer, region, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, false); EXPECT_TRUE(retVal); retVal = dstBuffer->bufferRectPitchSet(originBuffer, region, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, true); EXPECT_FALSE(retVal); } TEST(TestBufferRectCheck, givenInvalidSrcPitchWhenCallBufferRectPitchSetThenReturnFalse) { auto buffer = std::make_unique(); ASSERT_NE(nullptr, buffer); buffer->size = 200; size_t originBuffer[] = {0, 0, 0}; size_t region[] = {3, 1, 1}; size_t srcRowPitch = 10u; size_t srcSlicePitch = 10u; size_t dstRowPitch = 3u; size_t dstSlicePitch = 10u; auto retVal = buffer->bufferRectPitchSet(originBuffer, region, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, true); EXPECT_FALSE(retVal); } TEST(TestBufferRectCheck, givenInvalidDstPitchWhenCallBufferRectPitchSetThenReturnFalse) { auto buffer = std::make_unique(); ASSERT_NE(nullptr, buffer); buffer->size = 200; size_t originBuffer[] = {0, 0, 0}; size_t region[] = {3, 1, 1}; size_t srcRowPitch = 3u; size_t srcSlicePitch = 10u; size_t dstRowPitch = 10u; size_t dstSlicePitch = 10u; auto retVal = buffer->bufferRectPitchSet(originBuffer, region, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, true); EXPECT_FALSE(retVal); } TEST(TestBufferRectCheck, givenInvalidDstAndSrcPitchWhenCallBufferRectPitchSetThenReturnFalse) { auto buffer = std::make_unique(); ASSERT_NE(nullptr, buffer); buffer->size = 200; size_t originBuffer[] = {0, 0, 0}; size_t region[] = {3, 2, 1}; size_t srcRowPitch = 10u; size_t srcSlicePitch = 10u; size_t dstRowPitch = 10u; size_t dstSlicePitch = 10u; auto retVal = buffer->bufferRectPitchSet(originBuffer, region, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, true); EXPECT_FALSE(retVal); } TEST(TestBufferRectCheck, givenCorrectDstAndSrcPitchWhenCallBufferRectPitchSetThenReturnTrue) { auto buffer = std::make_unique(); ASSERT_NE(nullptr, buffer); buffer->size = 200; size_t originBuffer[] = {0, 0, 0}; size_t region[] = {3, 1, 1}; size_t srcRowPitch = 10u; size_t srcSlicePitch = 10u; size_t dstRowPitch = 10u; size_t dstSlicePitch = 10u; auto retVal = buffer->bufferRectPitchSet(originBuffer, region, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, true); EXPECT_TRUE(retVal); } class BufferReadOnlyTest : public testing::TestWithParam { }; TEST_P(BufferReadOnlyTest, givenNonReadOnlySetOfInputFlagsWhenPassedToisReadOnlyMemoryPermittedByFlagsThenFalseIsReturned) { class MockBuffer : public Buffer { public: using Buffer::isReadOnlyMemoryPermittedByFlags; }; UltDeviceFactory deviceFactory{1, 0}; auto pDevice = deviceFactory.rootDevices[0]; cl_mem_flags flags = GetParam() | CL_MEM_USE_HOST_PTR; MemoryProperties memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, pDevice); EXPECT_FALSE(MockBuffer::isReadOnlyMemoryPermittedByFlags(memoryProperties)); } static cl_mem_flags nonReadOnlyFlags[] = { CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY, CL_MEM_HOST_READ_ONLY, CL_MEM_HOST_WRITE_ONLY, CL_MEM_HOST_NO_ACCESS, CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY, CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY, 0}; INSTANTIATE_TEST_CASE_P( nonReadOnlyFlags, BufferReadOnlyTest, testing::ValuesIn(nonReadOnlyFlags)); TEST(Buffer, givenReadOnlyHostPtrMemoryWhenBufferIsCreatedWithReadOnlyFlagsThenBufferHasAllocatedNewMemoryStorageAndBufferIsNotZeroCopy) { void *memory = alignedMalloc(MemoryConstants::pageSize, MemoryConstants::pageSize); ASSERT_NE(nullptr, memory); memset(memory, 0xAA, MemoryConstants::pageSize); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockContext ctx(device.get()); // First fail simulates error for read only memory allocation auto memoryManager = std::make_unique(*device->getExecutionEnvironment()); memoryManager->returnNullptr = true; memoryManager->returnBaseAllocateGraphicsMemoryInDevicePool = true; cl_int retVal; cl_mem_flags flags = CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR; ctx.memoryManager = memoryManager.get(); std::unique_ptr buffer(Buffer::create(&ctx, flags, MemoryConstants::pageSize, (void *)memory, retVal)); ctx.memoryManager = device->getMemoryManager(); EXPECT_FALSE(buffer->isMemObjZeroCopy()); void *memoryStorage = buffer->getCpuAddressForMemoryTransfer(); EXPECT_NE((void *)memory, memoryStorage); EXPECT_EQ(0, memcmp(buffer->getCpuAddressForMemoryTransfer(), memory, MemoryConstants::pageSize)); alignedFree(memory); } TEST(Buffer, givenReadOnlyHostPtrMemoryWhenBufferIsCreatedWithReadOnlyFlagsAndSecondAllocationFailsThenNullptrIsReturned) { void *memory = alignedMalloc(MemoryConstants::pageSize, MemoryConstants::pageSize); ASSERT_NE(nullptr, memory); memset(memory, 0xAA, MemoryConstants::pageSize); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockContext ctx(device.get()); // First fail simulates error for read only memory allocation // Second fail returns nullptr auto memoryManager = std::make_unique(*device->getExecutionEnvironment()); cl_int retVal; cl_mem_flags flags = CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR; ctx.memoryManager = memoryManager.get(); std::unique_ptr buffer(Buffer::create(&ctx, flags, MemoryConstants::pageSize, (void *)memory, retVal)); ctx.memoryManager = device->getMemoryManager(); EXPECT_EQ(nullptr, buffer.get()); alignedFree(memory); } TEST(Buffer, givenReadOnlyHostPtrMemoryWhenBufferIsCreatedWithKernelWriteFlagThenBufferAllocationFailsAndReturnsNullptr) { void *memory = alignedMalloc(MemoryConstants::pageSize, MemoryConstants::pageSize); ASSERT_NE(nullptr, memory); memset(memory, 0xAA, MemoryConstants::pageSize); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockContext ctx(device.get()); // First fail simulates error for read only memory allocation auto memoryManager = std::make_unique(*device->getExecutionEnvironment()); cl_int retVal; cl_mem_flags flags = CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR; ctx.memoryManager = memoryManager.get(); std::unique_ptr buffer(Buffer::create(&ctx, flags, MemoryConstants::pageSize, (void *)memory, retVal)); ctx.memoryManager = device->getMemoryManager(); EXPECT_EQ(nullptr, buffer.get()); alignedFree(memory); } TEST(Buffer, givenNullPtrWhenBufferIsCreatedWithKernelReadOnlyFlagsThenBufferAllocationFailsAndReturnsNullptr) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockContext ctx(device.get()); // First fail simulates error for read only memory allocation auto memoryManager = std::make_unique(*device->getExecutionEnvironment()); cl_int retVal; cl_mem_flags flags = CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY; ctx.memoryManager = memoryManager.get(); std::unique_ptr buffer(Buffer::create(&ctx, flags, MemoryConstants::pageSize, nullptr, retVal)); ctx.memoryManager = device->getMemoryManager(); EXPECT_EQ(nullptr, buffer.get()); } TEST(Buffer, givenNullptrPassedToBufferCreateWhenAllocationIsNotSystemMemoryPoolThenBufferIsNotZeroCopy) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockContext ctx(device.get()); auto memoryManager = std::make_unique(*device->getExecutionEnvironment()); memoryManager->returnAllocateNonSystemGraphicsMemoryInDevicePool = true; cl_int retVal = 0; cl_mem_flags flags = CL_MEM_READ_WRITE; ctx.memoryManager = memoryManager.get(); std::unique_ptr buffer(Buffer::create(&ctx, flags, MemoryConstants::pageSize, nullptr, retVal)); ctx.memoryManager = device->getMemoryManager(); ASSERT_NE(nullptr, buffer.get()); EXPECT_FALSE(buffer->isMemObjZeroCopy()); } TEST(Buffer, givenNullptrPassedToBufferCreateWhenAllocationIsNotSystemMemoryPoolThenAllocationIsNotAddedToHostPtrManager) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockContext ctx(device.get()); auto memoryManager = std::make_unique(*device->getExecutionEnvironment()); memoryManager->returnAllocateNonSystemGraphicsMemoryInDevicePool = true; cl_int retVal = 0; cl_mem_flags flags = CL_MEM_READ_WRITE; auto hostPtrManager = static_cast(memoryManager->getHostPtrManager()); auto hostPtrAllocationCountBefore = hostPtrManager->getFragmentCount(); ctx.memoryManager = memoryManager.get(); std::unique_ptr buffer(Buffer::create(&ctx, flags, MemoryConstants::pageSize, nullptr, retVal)); ctx.memoryManager = device->getMemoryManager(); ASSERT_NE(nullptr, buffer.get()); auto hostPtrAllocationCountAfter = hostPtrManager->getFragmentCount(); EXPECT_EQ(hostPtrAllocationCountBefore, hostPtrAllocationCountAfter); } TEST(Buffer, givenNullptrPassedToBufferCreateWhenNoSharedContextOrCompressedBuffersThenBuffersAllocationTypeIsBufferOrBufferHostMemory) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockContext ctx(device.get()); cl_int retVal = 0; cl_mem_flags flags = CL_MEM_READ_WRITE; std::unique_ptr buffer(Buffer::create(&ctx, flags, MemoryConstants::pageSize, nullptr, retVal)); ASSERT_NE(nullptr, buffer.get()); if (MemoryPool::isSystemMemoryPool(buffer->getGraphicsAllocation(device->getRootDeviceIndex())->getMemoryPool())) { EXPECT_EQ(AllocationType::BUFFER_HOST_MEMORY, buffer->getGraphicsAllocation(device->getRootDeviceIndex())->getAllocationType()); } else { EXPECT_EQ(AllocationType::BUFFER, buffer->getGraphicsAllocation(device->getRootDeviceIndex())->getAllocationType()); } } TEST(Buffer, givenHostPtrPassedToBufferCreateWhenMemUseHostPtrFlagisSetAndBufferIsNotZeroCopyThenCreateMapAllocationWithHostPtr) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockContext ctx(device.get()); cl_int retVal = 0; cl_mem_flags flags = CL_MEM_USE_HOST_PTR; auto size = MemoryConstants::pageSize; void *ptr = (void *)alignedMalloc(size * 2, MemoryConstants::pageSize); auto ptrOffset = 1; void *offsetedPtr = (void *)((uintptr_t)ptr + ptrOffset); std::unique_ptr buffer(Buffer::create(&ctx, flags, MemoryConstants::pageSize, offsetedPtr, retVal)); ASSERT_NE(nullptr, buffer.get()); auto mapAllocation = buffer->getMapAllocation(device.get()->getRootDeviceIndex()); EXPECT_NE(nullptr, mapAllocation); EXPECT_EQ(offsetedPtr, mapAllocation->getUnderlyingBuffer()); EXPECT_EQ(AllocationType::MAP_ALLOCATION, mapAllocation->getAllocationType()); alignedFree(ptr); } TEST(Buffer, givenAlignedHostPtrPassedToBufferCreateWhenNoSharedContextOrCompressedBuffersThenBuffersAllocationTypeIsBufferHostMemory) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockContext ctx(device.get()); cl_int retVal = 0; cl_mem_flags flags = CL_MEM_USE_HOST_PTR; void *hostPtr = reinterpret_cast(0x3000); std::unique_ptr buffer(Buffer::create(&ctx, flags, MemoryConstants::pageSize, hostPtr, retVal)); ASSERT_NE(nullptr, buffer.get()); EXPECT_EQ(AllocationType::BUFFER_HOST_MEMORY, buffer->getMultiGraphicsAllocation().getAllocationType()); } TEST(Buffer, givenAllocHostPtrFlagPassedToBufferCreateWhenNoSharedContextOrCompressedBuffersThenBuffersAllocationTypeIsBufferHostMemory) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockContext ctx(device.get()); cl_int retVal = 0; cl_mem_flags flags = CL_MEM_ALLOC_HOST_PTR; std::unique_ptr buffer(Buffer::create(&ctx, flags, MemoryConstants::pageSize, nullptr, retVal)); ASSERT_NE(nullptr, buffer.get()); EXPECT_EQ(AllocationType::BUFFER_HOST_MEMORY, buffer->getMultiGraphicsAllocation().getAllocationType()); } TEST(Buffer, givenCompressedBuffersEnabledWhenAllocationTypeIsQueriedThenBufferCompressedTypeIsReturnedIn64Bit) { MockContext context; MemoryProperties memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(0, 0, 0, &context.getDevice(0)->getDevice()); context.contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE; context.isSharedContext = false; bool compressionEnabled = MemObjHelper::isSuitableForCompression(true, memoryProperties, context, true); EXPECT_TRUE(compressionEnabled); auto type = MockPublicAccessBuffer::getGraphicsAllocationTypeAndCompressionPreference(memoryProperties, context, compressionEnabled, false); EXPECT_TRUE(compressionEnabled); EXPECT_EQ(AllocationType::BUFFER, type); } TEST(Buffer, givenCompressedBuffersDisabledLocalMemoryEnabledWhenAllocationTypeIsQueriedThenBufferTypeIsReturnedIn64Bit) { MockContext context; MemoryProperties memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(0, 0, 0, &context.getDevice(0)->getDevice()); context.contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE; context.isSharedContext = false; bool compressionEnabled = MemObjHelper::isSuitableForCompression(false, memoryProperties, context, true); EXPECT_FALSE(compressionEnabled); auto type = MockPublicAccessBuffer::getGraphicsAllocationTypeAndCompressionPreference(memoryProperties, context, compressionEnabled, true); EXPECT_FALSE(compressionEnabled); EXPECT_EQ(AllocationType::BUFFER, type); } TEST(Buffer, givenSharedContextWhenAllocationTypeIsQueriedThenBufferHostMemoryTypeIsReturned) { MockContext context; MemoryProperties memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(0, 0, 0, &context.getDevice(0)->getDevice()); context.contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE; context.isSharedContext = true; bool compressionEnabled = MemObjHelper::isSuitableForCompression(false, memoryProperties, context, true); EXPECT_FALSE(compressionEnabled); auto type = MockPublicAccessBuffer::getGraphicsAllocationTypeAndCompressionPreference(memoryProperties, context, compressionEnabled, false); EXPECT_FALSE(compressionEnabled); EXPECT_EQ(AllocationType::BUFFER_HOST_MEMORY, type); } TEST(Buffer, givenSharedContextAndCompressedBuffersEnabledWhenAllocationTypeIsQueriedThenBufferHostMemoryTypeIsReturned) { MockContext context; MemoryProperties memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(0, 0, 0, &context.getDevice(0)->getDevice()); context.contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE; context.isSharedContext = true; bool compressionEnabled = MemObjHelper::isSuitableForCompression(true, memoryProperties, context, true); EXPECT_TRUE(compressionEnabled); auto type = MockPublicAccessBuffer::getGraphicsAllocationTypeAndCompressionPreference(memoryProperties, context, compressionEnabled, false); EXPECT_FALSE(compressionEnabled); EXPECT_EQ(AllocationType::BUFFER_HOST_MEMORY, type); } TEST(Buffer, givenUseHostPtrFlagAndLocalMemoryDisabledWhenAllocationTypeIsQueriedThenBufferHostMemoryTypeIsReturned) { cl_mem_flags flags = CL_MEM_USE_HOST_PTR; MockContext context; MemoryProperties memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()); context.contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE; context.isSharedContext = false; bool compressionEnabled = MemObjHelper::isSuitableForCompression(false, memoryProperties, context, true); EXPECT_FALSE(compressionEnabled); auto type = MockPublicAccessBuffer::getGraphicsAllocationTypeAndCompressionPreference(memoryProperties, context, compressionEnabled, false); EXPECT_FALSE(compressionEnabled); EXPECT_EQ(AllocationType::BUFFER_HOST_MEMORY, type); } TEST(Buffer, givenUseHostPtrFlagAndLocalMemoryEnabledWhenAllocationTypeIsQueriedThenBufferTypeIsReturned) { cl_mem_flags flags = CL_MEM_USE_HOST_PTR; MockContext context; MemoryProperties memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()); context.contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE; context.isSharedContext = false; bool compressionEnabled = MemObjHelper::isSuitableForCompression(false, memoryProperties, context, true); EXPECT_FALSE(compressionEnabled); auto type = MockPublicAccessBuffer::getGraphicsAllocationTypeAndCompressionPreference(memoryProperties, context, compressionEnabled, true); EXPECT_FALSE(compressionEnabled); EXPECT_EQ(AllocationType::BUFFER, type); } TEST(Buffer, givenAllocHostPtrFlagWhenAllocationTypeIsQueriedThenBufferTypeIsReturned) { cl_mem_flags flags = CL_MEM_ALLOC_HOST_PTR; MockContext context; MemoryProperties memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()); context.contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE; context.isSharedContext = false; bool compressionEnabled = MemObjHelper::isSuitableForCompression(false, memoryProperties, context, true); EXPECT_FALSE(compressionEnabled); auto type = MockPublicAccessBuffer::getGraphicsAllocationTypeAndCompressionPreference(memoryProperties, context, compressionEnabled, false); EXPECT_FALSE(compressionEnabled); EXPECT_EQ(AllocationType::BUFFER, type); } TEST(Buffer, givenUseHostPtrFlagAndLocalMemoryDisabledAndCompressedBuffersEnabledWhenAllocationTypeIsQueriedThenBufferMemoryTypeIsReturned) { cl_mem_flags flags = CL_MEM_USE_HOST_PTR; MockContext context; MemoryProperties memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()); context.contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE; context.isSharedContext = false; bool compressionEnabled = MemObjHelper::isSuitableForCompression(true, memoryProperties, context, true); EXPECT_TRUE(compressionEnabled); auto type = MockPublicAccessBuffer::getGraphicsAllocationTypeAndCompressionPreference(memoryProperties, context, compressionEnabled, false); EXPECT_FALSE(compressionEnabled); EXPECT_EQ(AllocationType::BUFFER_HOST_MEMORY, type); } TEST(Buffer, givenUseHostPtrFlagAndLocalMemoryEnabledAndCompressedBuffersEnabledWhenAllocationTypeIsQueriedThenBufferMemoryTypeIsReturned) { cl_mem_flags flags = CL_MEM_USE_HOST_PTR; MockContext context; MemoryProperties memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()); context.contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE; context.isSharedContext = false; bool compressionEnabled = MemObjHelper::isSuitableForCompression(true, memoryProperties, context, true); EXPECT_TRUE(compressionEnabled); auto type = MockPublicAccessBuffer::getGraphicsAllocationTypeAndCompressionPreference(memoryProperties, context, compressionEnabled, true); EXPECT_TRUE(compressionEnabled); EXPECT_EQ(AllocationType::BUFFER, type); } TEST(Buffer, givenUseHostPointerFlagAndForceSharedPhysicalStorageWhenLocalMemoryIsEnabledThenBufferHostMemoryTypeIsReturned) { cl_mem_flags flags = CL_MEM_USE_HOST_PTR | CL_MEM_FORCE_HOST_MEMORY_INTEL; MockContext context; MemoryProperties memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()); context.contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE; context.isSharedContext = false; bool compressionEnabled = MemObjHelper::isSuitableForCompression(true, memoryProperties, context, true); EXPECT_TRUE(compressionEnabled); auto type = MockPublicAccessBuffer::getGraphicsAllocationTypeAndCompressionPreference(memoryProperties, context, compressionEnabled, true); EXPECT_FALSE(compressionEnabled); EXPECT_EQ(AllocationType::BUFFER_HOST_MEMORY, type); } TEST(Buffer, givenAllocHostPtrFlagAndCompressedBuffersEnabledWhenAllocationTypeIsQueriedThenBufferCompressedTypeIsReturned) { cl_mem_flags flags = CL_MEM_ALLOC_HOST_PTR; MockContext context; MemoryProperties memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()); context.contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE; context.isSharedContext = false; bool compressionEnabled = MemObjHelper::isSuitableForCompression(true, memoryProperties, context, true); EXPECT_TRUE(compressionEnabled); auto type = MockPublicAccessBuffer::getGraphicsAllocationTypeAndCompressionPreference(memoryProperties, context, compressionEnabled, false); EXPECT_TRUE(compressionEnabled); EXPECT_EQ(AllocationType::BUFFER, type); } TEST(Buffer, givenZeroFlagsNoSharedContextAndCompressedBuffersDisabledWhenAllocationTypeIsQueriedThenBufferTypeIsReturned) { MockContext context; MemoryProperties memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(0, 0, 0, &context.getDevice(0)->getDevice()); context.contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE; context.isSharedContext = false; bool compressionEnabled = MemObjHelper::isSuitableForCompression(false, memoryProperties, context, true); EXPECT_FALSE(compressionEnabled); auto type = MockPublicAccessBuffer::getGraphicsAllocationTypeAndCompressionPreference(memoryProperties, context, compressionEnabled, false); EXPECT_FALSE(compressionEnabled); EXPECT_EQ(AllocationType::BUFFER, type); } TEST(Buffer, givenClMemCopyHostPointerPassedToBufferCreateWhenAllocationIsNotInSystemMemoryPoolThenAllocationIsWrittenByEnqueueWriteBuffer) { ExecutionEnvironment *executionEnvironment = MockClDevice::prepareExecutionEnvironment(defaultHwInfo.get(), 0u); auto *memoryManager = new MockMemoryManagerFailFirstAllocation(*executionEnvironment); executionEnvironment->memoryManager.reset(memoryManager); memoryManager->returnBaseAllocateGraphicsMemoryInDevicePool = true; auto device = std::make_unique(MockDevice::create(executionEnvironment, 0)); MockContext ctx(device.get()); cl_int retVal = 0; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR; char memory[] = {1, 2, 3, 4, 5, 6, 7, 8}; auto taskCount = device->getGpgpuCommandStreamReceiver().peekLatestFlushedTaskCount(); memoryManager->returnAllocateNonSystemGraphicsMemoryInDevicePool = true; std::unique_ptr buffer(Buffer::create(&ctx, flags, sizeof(memory), memory, retVal)); ASSERT_NE(nullptr, buffer.get()); auto taskCountSent = device->getGpgpuCommandStreamReceiver().peekLatestFlushedTaskCount(); if constexpr (is64bit) { EXPECT_LT(taskCount, taskCountSent); } } struct CompressedBuffersTests : public ::testing::Test { void SetUp() override { ExecutionEnvironment *executionEnvironment = MockDevice::prepareExecutionEnvironment(defaultHwInfo.get(), 0u); for (auto &rootDeviceEnvironment : executionEnvironment->rootDeviceEnvironments) { rootDeviceEnvironment->initGmm(); } SetUp(executionEnvironment); } void SetUp(ExecutionEnvironment *executionEnvironment) { hwInfo = executionEnvironment->rootDeviceEnvironments[0]->getMutableHardwareInfo(); device = std::make_unique(MockDevice::create(executionEnvironment, 0u)); context = std::make_unique(device.get(), true); context->contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE; } cl_int retVal = CL_SUCCESS; HardwareInfo *hwInfo = nullptr; std::unique_ptr device; std::unique_ptr context; std::unique_ptr buffer; uint32_t hostPtr[2048]; size_t bufferSize = sizeof(hostPtr); }; TEST_F(CompressedBuffersTests, givenBufferCompressedAllocationAndZeroCopyHostPtrWhenCheckingMemoryPropertiesThenUseHostPtrAndDontAllocateStorage) { hwInfo->capabilityTable.ftrRenderCompressedBuffers = false; void *cacheAlignedHostPtr = alignedMalloc(MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); buffer.reset(Buffer::create(context.get(), CL_MEM_FORCE_HOST_MEMORY_INTEL | CL_MEM_USE_HOST_PTR, MemoryConstants::cacheLineSize, cacheAlignedHostPtr, retVal)); auto allocation = buffer->getGraphicsAllocation(device->getRootDeviceIndex()); EXPECT_EQ(cacheAlignedHostPtr, allocation->getUnderlyingBuffer()); EXPECT_TRUE(buffer->isMemObjZeroCopy()); EXPECT_EQ(allocation->getAllocationType(), AllocationType::BUFFER_HOST_MEMORY); uint32_t pattern[2] = {0, 0}; pattern[0] = 0xdeadbeef; pattern[1] = 0xdeadbeef; static_assert(sizeof(pattern) <= MemoryConstants::cacheLineSize, "Incorrect pattern size"); uint32_t *dest = reinterpret_cast(cacheAlignedHostPtr); for (size_t i = 0; i < arrayCount(pattern); i++) { dest[i] = pattern[i]; } hwInfo->capabilityTable.ftrRenderCompressedBuffers = true; buffer.reset(Buffer::create(context.get(), CL_MEM_FORCE_HOST_MEMORY_INTEL | CL_MEM_USE_HOST_PTR, MemoryConstants::cacheLineSize, cacheAlignedHostPtr, retVal)); allocation = buffer->getGraphicsAllocation(device->getRootDeviceIndex()); EXPECT_EQ(cacheAlignedHostPtr, allocation->getUnderlyingBuffer()); EXPECT_TRUE(buffer->isMemObjZeroCopy()); EXPECT_EQ(allocation->getAllocationType(), AllocationType::BUFFER_HOST_MEMORY); EXPECT_EQ(0, memcmp(allocation->getUnderlyingBuffer(), &pattern[0], sizeof(pattern))); alignedFree(cacheAlignedHostPtr); } TEST_F(CompressedBuffersTests, givenAllocationCreatedWithForceSharedPhysicalMemoryWhenItIsCreatedThenItIsZeroCopy) { buffer.reset(Buffer::create(context.get(), CL_MEM_FORCE_HOST_MEMORY_INTEL, 1u, nullptr, retVal)); EXPECT_EQ(buffer->getGraphicsAllocation(device->getRootDeviceIndex())->getAllocationType(), AllocationType::BUFFER_HOST_MEMORY); EXPECT_TRUE(buffer->isMemObjZeroCopy()); EXPECT_EQ(1u, buffer->getSize()); } TEST_F(CompressedBuffersTests, givenCompressedBuffersAndAllocationCreatedWithForceSharedPhysicalMemoryWhenItIsCreatedThenItIsZeroCopy) { hwInfo->capabilityTable.ftrRenderCompressedBuffers = true; buffer.reset(Buffer::create(context.get(), CL_MEM_FORCE_HOST_MEMORY_INTEL, 1u, nullptr, retVal)); EXPECT_EQ(buffer->getGraphicsAllocation(device->getRootDeviceIndex())->getAllocationType(), AllocationType::BUFFER_HOST_MEMORY); EXPECT_TRUE(buffer->isMemObjZeroCopy()); EXPECT_EQ(1u, buffer->getSize()); } TEST_F(CompressedBuffersTests, givenBufferNotCompressedAllocationAndNoHostPtrWhenCheckingMemoryPropertiesThenForceDisableZeroCopy) { hwInfo->capabilityTable.ftrRenderCompressedBuffers = false; buffer.reset(Buffer::create(context.get(), 0, bufferSize, nullptr, retVal)); auto allocation = buffer->getGraphicsAllocation(device->getRootDeviceIndex()); EXPECT_TRUE(buffer->isMemObjZeroCopy()); if (MemoryPool::isSystemMemoryPool(allocation->getMemoryPool())) { EXPECT_EQ(allocation->getAllocationType(), AllocationType::BUFFER_HOST_MEMORY); } else { EXPECT_EQ(allocation->getAllocationType(), AllocationType::BUFFER); } auto memoryManager = static_cast(device->getExecutionEnvironment()->memoryManager.get()); hwInfo->capabilityTable.ftrRenderCompressedBuffers = true; buffer.reset(Buffer::create(context.get(), 0, bufferSize, nullptr, retVal)); allocation = buffer->getGraphicsAllocation(device->getRootDeviceIndex()); if (HwHelper::get(context->getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily).isBufferSizeSuitableForCompression(bufferSize, *hwInfo)) { EXPECT_FALSE(buffer->isMemObjZeroCopy()); EXPECT_EQ(allocation->getAllocationType(), AllocationType::BUFFER); EXPECT_EQ(!memoryManager->allocate32BitGraphicsMemoryImplCalled, allocation->isCompressionEnabled()); } else { EXPECT_TRUE(buffer->isMemObjZeroCopy()); EXPECT_EQ(allocation->getAllocationType(), AllocationType::BUFFER_HOST_MEMORY); } } TEST_F(CompressedBuffersTests, givenBufferCompressedAllocationWhenSharedContextIsUsedThenForceDisableCompression) { hwInfo->capabilityTable.ftrRenderCompressedBuffers = true; context->isSharedContext = false; auto memoryManager = static_cast(device->getExecutionEnvironment()->memoryManager.get()); buffer.reset(Buffer::create(context.get(), CL_MEM_READ_WRITE, bufferSize, nullptr, retVal)); auto graphicsAllocation = buffer->getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex()); if (HwHelper::get(context->getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily).isBufferSizeSuitableForCompression(bufferSize, *hwInfo)) { EXPECT_EQ(graphicsAllocation->getAllocationType(), AllocationType::BUFFER); EXPECT_EQ(!memoryManager->allocate32BitGraphicsMemoryImplCalled, graphicsAllocation->isCompressionEnabled()); } else { EXPECT_EQ(graphicsAllocation->getAllocationType(), AllocationType::BUFFER_HOST_MEMORY); } context->isSharedContext = true; buffer.reset(Buffer::create(context.get(), CL_MEM_USE_HOST_PTR, bufferSize, hostPtr, retVal)); graphicsAllocation = buffer->getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex()); EXPECT_EQ(graphicsAllocation->getAllocationType(), AllocationType::BUFFER_HOST_MEMORY); } TEST_F(CompressedBuffersTests, givenDebugVariableSetWhenHwFlagIsNotSetThenSelectOptionFromDebugFlag) { DebugManagerStateRestore restore; auto memoryManager = static_cast(device->getExecutionEnvironment()->memoryManager.get()); hwInfo->capabilityTable.ftrRenderCompressedBuffers = false; DebugManager.flags.RenderCompressedBuffersEnabled.set(1); buffer.reset(Buffer::create(context.get(), 0, bufferSize, nullptr, retVal)); auto graphicsAllocation = buffer->getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex()); if (HwHelper::get(context->getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily).isBufferSizeSuitableForCompression(bufferSize, *hwInfo)) { EXPECT_EQ(graphicsAllocation->getAllocationType(), AllocationType::BUFFER); EXPECT_EQ(!memoryManager->allocate32BitGraphicsMemoryImplCalled, graphicsAllocation->isCompressionEnabled()); } else { EXPECT_EQ(graphicsAllocation->getAllocationType(), AllocationType::BUFFER_HOST_MEMORY); } DebugManager.flags.RenderCompressedBuffersEnabled.set(0); buffer.reset(Buffer::create(context.get(), 0, bufferSize, nullptr, retVal)); graphicsAllocation = buffer->getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex()); EXPECT_FALSE(graphicsAllocation->isCompressionEnabled()); } struct CompressedBuffersSvmTests : public CompressedBuffersTests { void SetUp() override { ExecutionEnvironment *executionEnvironment = MockDevice::prepareExecutionEnvironment(defaultHwInfo.get(), 0u); for (auto &rootDeviceEnvironment : executionEnvironment->rootDeviceEnvironments) { rootDeviceEnvironment->initGmm(); } executionEnvironment->prepareRootDeviceEnvironments(1u); hwInfo = executionEnvironment->rootDeviceEnvironments[0]->getMutableHardwareInfo(); hwInfo->capabilityTable.gpuAddressSpace = MemoryConstants::max48BitAddress; CompressedBuffersTests::SetUp(executionEnvironment); } }; TEST_F(CompressedBuffersSvmTests, givenSvmAllocationWhenCreatingBufferThenForceDisableCompression) { hwInfo->capabilityTable.ftrRenderCompressedBuffers = true; auto svmPtr = context->getSVMAllocsManager()->createSVMAlloc(sizeof(uint32_t), {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); auto expectedAllocationType = context->getSVMAllocsManager()->getSVMAlloc(svmPtr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex())->getAllocationType(); buffer.reset(Buffer::create(context.get(), CL_MEM_USE_HOST_PTR, sizeof(uint32_t), svmPtr, retVal)); EXPECT_EQ(expectedAllocationType, buffer->getGraphicsAllocation(device->getRootDeviceIndex())->getAllocationType()); buffer.reset(nullptr); context->getSVMAllocsManager()->freeSVMAlloc(svmPtr); } struct CompressedBuffersCopyHostMemoryTests : public CompressedBuffersTests { void SetUp() override { CompressedBuffersTests::SetUp(); device->injectMemoryManager(new MockMemoryManager(true, false, *device->getExecutionEnvironment())); context->memoryManager = device->getMemoryManager(); mockCmdQ = new MockCommandQueue(); context->setSpecialQueue(mockCmdQ, device->getRootDeviceIndex()); } MockCommandQueue *mockCmdQ = nullptr; }; TEST_F(CompressedBuffersCopyHostMemoryTests, givenCompressedBufferWhenCopyFromHostPtrIsRequiredThenCallWriteBuffer) { if (is32bit) { return; } hwInfo->capabilityTable.ftrRenderCompressedBuffers = true; buffer.reset(Buffer::create(context.get(), CL_MEM_COPY_HOST_PTR, bufferSize, hostPtr, retVal)); auto graphicsAllocation = buffer->getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex()); if (HwHelper::get(context->getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily).isBufferSizeSuitableForCompression(bufferSize, *hwInfo)) { EXPECT_TRUE(graphicsAllocation->isCompressionEnabled()); EXPECT_EQ(1u, mockCmdQ->writeBufferCounter); EXPECT_TRUE(mockCmdQ->writeBufferBlocking); EXPECT_EQ(0u, mockCmdQ->writeBufferOffset); EXPECT_EQ(bufferSize, mockCmdQ->writeBufferSize); EXPECT_EQ(hostPtr, mockCmdQ->writeBufferPtr); } else { EXPECT_EQ(graphicsAllocation->getAllocationType(), AllocationType::BUFFER_HOST_MEMORY); EXPECT_EQ(0u, mockCmdQ->writeBufferCounter); EXPECT_FALSE(mockCmdQ->writeBufferBlocking); EXPECT_EQ(0u, mockCmdQ->writeBufferOffset); EXPECT_EQ(0u, mockCmdQ->writeBufferSize); EXPECT_EQ(nullptr, mockCmdQ->writeBufferPtr); } EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(CompressedBuffersCopyHostMemoryTests, givenBufferCreateWhenMemoryTransferWithEnqueueWriteBufferThenMapAllocationIsReused) { cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; auto &capabilityTable = device->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable; capabilityTable.blitterOperationsSupported = false; static_cast(context->memoryManager)->forceCompressed = true; std::unique_ptr buffer(Buffer::create(context.get(), flags, bufferSize, hostPtr, retVal)); EXPECT_NE(nullptr, mockCmdQ->writeMapAllocation); EXPECT_EQ(buffer->getMapAllocation(device.get()->getRootDeviceIndex()), mockCmdQ->writeMapAllocation); } TEST_F(CompressedBuffersCopyHostMemoryTests, givenNonCompressedBufferWhenCopyFromHostPtrIsRequiredThenDontCallWriteBuffer) { hwInfo->capabilityTable.ftrRenderCompressedBuffers = false; buffer.reset(Buffer::create(context.get(), CL_MEM_COPY_HOST_PTR, sizeof(uint32_t), &hostPtr, retVal)); EXPECT_FALSE(buffer->getGraphicsAllocation(0)->isCompressionEnabled()); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, mockCmdQ->writeBufferCounter); } TEST_F(CompressedBuffersCopyHostMemoryTests, givenCompressedBufferWhenWriteBufferFailsThenReturnErrorCode) { if (is32bit || !HwHelper::get(context->getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily).isBufferSizeSuitableForCompression(bufferSize, *hwInfo)) { return; } hwInfo->capabilityTable.ftrRenderCompressedBuffers = true; mockCmdQ->writeBufferRetValue = CL_INVALID_VALUE; buffer.reset(Buffer::create(context.get(), CL_MEM_COPY_HOST_PTR, bufferSize, hostPtr, retVal)); EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal); EXPECT_EQ(nullptr, buffer.get()); } class BufferTest : public ClDeviceFixture, public testing::TestWithParam { public: BufferTest() { } protected: void SetUp() override { flags = GetParam(); ClDeviceFixture::SetUp(); context.reset(new MockContext(pClDevice)); } void TearDown() override { context.reset(); ClDeviceFixture::TearDown(); } cl_int retVal = CL_SUCCESS; std::unique_ptr context; MemoryManager *contextMemoryManager; cl_mem_flags flags = 0; unsigned char pHostPtr[g_scTestBufferSizeInBytes]; }; typedef BufferTest NoHostPtr; TEST_P(NoHostPtr, GivenValidFlagsWhenCreatingBufferThenBufferIsCreated) { auto buffer = Buffer::create( context.get(), flags, g_scTestBufferSizeInBytes, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, buffer); auto address = buffer->getCpuAddress(); EXPECT_NE(nullptr, address); delete buffer; } TEST_P(NoHostPtr, GivenNoHostPtrWhenHwBufferCreationFailsThenReturnNullptr) { BufferFactoryFuncs BufferFuncsBackup[IGFX_MAX_CORE]; for (uint32_t i = 0; i < IGFX_MAX_CORE; i++) { BufferFuncsBackup[i] = bufferFactory[i]; bufferFactory[i].createBufferFunction = [](Context *, MemoryProperties, cl_mem_flags, cl_mem_flags_intel, size_t, void *, void *, MultiGraphicsAllocation, bool, bool, bool) -> NEO::Buffer * { return nullptr; }; } auto buffer = Buffer::create( context.get(), flags, g_scTestBufferSizeInBytes, nullptr, retVal); EXPECT_EQ(nullptr, buffer); for (uint32_t i = 0; i < IGFX_MAX_CORE; i++) { bufferFactory[i] = BufferFuncsBackup[i]; } } TEST_P(NoHostPtr, GivenNoHostPtrWhenCreatingBufferWithMemUseHostPtrThenInvalidHostPtrErrorIsReturned) { auto buffer = Buffer::create( context.get(), flags | CL_MEM_USE_HOST_PTR, g_scTestBufferSizeInBytes, nullptr, retVal); EXPECT_EQ(CL_INVALID_HOST_PTR, retVal); EXPECT_EQ(nullptr, buffer); delete buffer; } TEST_P(NoHostPtr, GivenNoHostPtrWhenCreatingBufferWithMemCopyHostPtrThenInvalidHostPtrErrorIsReturned) { auto buffer = Buffer::create( context.get(), flags | CL_MEM_COPY_HOST_PTR, g_scTestBufferSizeInBytes, nullptr, retVal); EXPECT_EQ(CL_INVALID_HOST_PTR, retVal); EXPECT_EQ(nullptr, buffer); delete buffer; } TEST_P(NoHostPtr, WhenGettingAllocationTypeThenCorrectBufferTypeIsReturned) { auto buffer = Buffer::create( context.get(), flags, g_scTestBufferSizeInBytes, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, buffer); auto allocation = buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex()); if (MemoryPool::isSystemMemoryPool(allocation->getMemoryPool())) { EXPECT_EQ(allocation->getAllocationType(), AllocationType::BUFFER_HOST_MEMORY); } else { EXPECT_EQ(allocation->getAllocationType(), AllocationType::BUFFER); } auto isBufferWritable = !(flags & (CL_MEM_READ_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)); EXPECT_EQ(isBufferWritable, allocation->isMemObjectsAllocationWithWritableFlags()); delete buffer; } // Parameterized test that tests buffer creation with all flags // that should be valid with a nullptr host ptr cl_mem_flags NoHostPtrFlags[] = { CL_MEM_READ_WRITE, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY, CL_MEM_HOST_READ_ONLY, CL_MEM_HOST_WRITE_ONLY, CL_MEM_HOST_NO_ACCESS}; INSTANTIATE_TEST_CASE_P( BufferTest_Create, NoHostPtr, testing::ValuesIn(NoHostPtrFlags)); struct ValidHostPtr : public BufferTest, public MemoryManagementFixture { typedef BufferTest BaseClass; using BufferTest::SetUp; using MemoryManagementFixture::SetUp; ValidHostPtr() { } void SetUp() override { MemoryManagementFixture::SetUp(); BaseClass::SetUp(); ASSERT_NE(nullptr, pDevice); } void TearDown() override { delete buffer; BaseClass::TearDown(); MemoryManagementFixture::TearDown(); } Buffer *createBuffer() { return Buffer::create( context.get(), flags, g_scTestBufferSizeInBytes, pHostPtr, retVal); } cl_int retVal = CL_INVALID_VALUE; Buffer *buffer = nullptr; }; TEST_P(ValidHostPtr, WhenBufferIsCreatedThenItIsNotResident) { buffer = createBuffer(); ASSERT_NE(nullptr, buffer); EXPECT_FALSE(buffer->getGraphicsAllocation(pDevice->getRootDeviceIndex())->isResident(pDevice->getDefaultEngine().osContext->getContextId())); } TEST_P(ValidHostPtr, WhenBufferIsCreatedThenAddressMatechesOnlyForHostPtr) { buffer = createBuffer(); ASSERT_NE(nullptr, buffer); auto address = buffer->getCpuAddress(); EXPECT_NE(nullptr, address); if (flags & CL_MEM_USE_HOST_PTR && buffer->isMemObjZeroCopy()) { // Buffer should use host ptr EXPECT_EQ(pHostPtr, address); EXPECT_EQ(pHostPtr, buffer->getHostPtr()); } else { // Buffer should have a different ptr EXPECT_NE(pHostPtr, address); } if (flags & CL_MEM_COPY_HOST_PTR) { // Buffer should contain a copy of host memory EXPECT_EQ(0, memcmp(pHostPtr, address, sizeof(g_scTestBufferSizeInBytes))); EXPECT_EQ(nullptr, buffer->getHostPtr()); } } TEST_P(ValidHostPtr, WhenGettingBufferSizeThenSizeIsCorrect) { buffer = createBuffer(); ASSERT_NE(nullptr, buffer); EXPECT_EQ(g_scTestBufferSizeInBytes, buffer->getSize()); } TEST_P(ValidHostPtr, givenValidHostPtrParentFlagsWhenSubBufferIsCreatedWithZeroFlagsThenItCreatesSuccesfuly) { auto retVal = CL_SUCCESS; auto clBuffer = clCreateBuffer(context.get(), flags, g_scTestBufferSizeInBytes, pHostPtr, &retVal); ASSERT_NE(nullptr, clBuffer); cl_buffer_region region = {0, g_scTestBufferSizeInBytes}; auto subBuffer = clCreateSubBuffer(clBuffer, 0, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(subBuffer); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(clBuffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_P(ValidHostPtr, givenValidHostPtrParentFlagsWhenSubBufferIsCreatedWithParentFlagsThenItIsCreatedSuccesfuly) { auto retVal = CL_SUCCESS; auto clBuffer = clCreateBuffer(context.get(), flags, g_scTestBufferSizeInBytes, pHostPtr, &retVal); ASSERT_NE(nullptr, clBuffer); cl_buffer_region region = {0, g_scTestBufferSizeInBytes}; const cl_mem_flags allValidFlags = CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS; cl_mem_flags unionFlags = flags & allValidFlags; auto subBuffer = clCreateSubBuffer(clBuffer, unionFlags, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, subBuffer); retVal = clReleaseMemObject(subBuffer); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(clBuffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_P(ValidHostPtr, givenValidHostPtrParentFlagsWhenSubBufferIsCreatedWithInvalidParentFlagsThenCreationFails) { auto retVal = CL_SUCCESS; cl_mem_flags invalidFlags = 0; if (flags & CL_MEM_READ_ONLY) { invalidFlags |= CL_MEM_WRITE_ONLY; } if (flags & CL_MEM_WRITE_ONLY) { invalidFlags |= CL_MEM_READ_ONLY; } if (flags & CL_MEM_HOST_NO_ACCESS) { invalidFlags |= CL_MEM_HOST_READ_ONLY; } if (flags & CL_MEM_HOST_READ_ONLY) { invalidFlags |= CL_MEM_HOST_WRITE_ONLY; } if (flags & CL_MEM_HOST_WRITE_ONLY) { invalidFlags |= CL_MEM_HOST_READ_ONLY; } if (invalidFlags == 0) { return; } auto clBuffer = clCreateBuffer(context.get(), flags, g_scTestBufferSizeInBytes, pHostPtr, &retVal); ASSERT_NE(nullptr, clBuffer); cl_buffer_region region = {0, g_scTestBufferSizeInBytes}; auto subBuffer = clCreateSubBuffer(clBuffer, invalidFlags, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &retVal); EXPECT_NE(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, subBuffer); retVal = clReleaseMemObject(clBuffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_P(ValidHostPtr, GivenFailedAllocationWhenCreatingBufferThenBufferIsNotCreated) { InjectedFunction method = [this](size_t failureIndex) { delete buffer; buffer = nullptr; // System under test buffer = createBuffer(); if (MemoryManagement::nonfailingAllocation == failureIndex) { EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); } else { EXPECT_EQ(nullptr, buffer); }; }; injectFailures(method); } TEST_P(ValidHostPtr, GivenSvmHostPtrWhenCreatingBufferThenBufferIsCreatedCorrectly) { const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { auto ptr = context->getSVMAllocsManager()->createSVMAlloc(64, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); auto bufferSvm = Buffer::create(context.get(), CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, 64, ptr, retVal); EXPECT_NE(nullptr, bufferSvm); EXPECT_TRUE(bufferSvm->isMemObjWithHostPtrSVM()); auto svmData = context->getSVMAllocsManager()->getSVMAlloc(ptr); ASSERT_NE(nullptr, svmData); EXPECT_EQ(svmData->gpuAllocations.getGraphicsAllocation(pDevice->getRootDeviceIndex()), bufferSvm->getGraphicsAllocation(pDevice->getRootDeviceIndex())); EXPECT_EQ(CL_SUCCESS, retVal); delete bufferSvm; context->getSVMAllocsManager()->freeSVMAlloc(ptr); } } // Parameterized test that tests buffer creation with all flags that should be // valid with a valid host ptr cl_mem_flags ValidHostPtrFlags[] = { 0 | CL_MEM_USE_HOST_PTR, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, CL_MEM_HOST_READ_ONLY | CL_MEM_USE_HOST_PTR, CL_MEM_HOST_WRITE_ONLY | CL_MEM_USE_HOST_PTR, CL_MEM_HOST_NO_ACCESS | CL_MEM_USE_HOST_PTR, 0 | CL_MEM_COPY_HOST_PTR, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, CL_MEM_HOST_READ_ONLY | CL_MEM_COPY_HOST_PTR, CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, CL_MEM_HOST_NO_ACCESS | CL_MEM_COPY_HOST_PTR}; INSTANTIATE_TEST_CASE_P( BufferTest_Create, ValidHostPtr, testing::ValuesIn(ValidHostPtrFlags)); class BufferCalculateHostPtrSize : public testing::TestWithParam> { public: BufferCalculateHostPtrSize(){}; protected: void SetUp() override { std::tie(origin[0], origin[1], origin[2], region[0], region[1], region[2], rowPitch, slicePitch, hostPtrSize) = GetParam(); } void TearDown() override { } size_t origin[3]; size_t region[3]; size_t rowPitch; size_t slicePitch; size_t hostPtrSize; }; /* origin, region, rowPitch, slicePitch, hostPtrSize*/ static std::tuple Inputs[] = {std::make_tuple(0, 0, 0, 1, 1, 1, 10, 1, 1), std::make_tuple(0, 0, 0, 7, 1, 1, 10, 1, 7), std::make_tuple(0, 0, 0, 7, 3, 1, 10, 1, 27), std::make_tuple(0, 0, 0, 7, 1, 3, 10, 10, 27), std::make_tuple(0, 0, 0, 7, 2, 3, 10, 20, 57), std::make_tuple(0, 0, 0, 7, 1, 3, 10, 30, 67), std::make_tuple(0, 0, 0, 7, 2, 3, 10, 30, 77), std::make_tuple(9, 0, 0, 1, 1, 1, 10, 1, 10), std::make_tuple(0, 2, 0, 7, 3, 1, 10, 1, 27 + 20), std::make_tuple(0, 0, 1, 7, 1, 3, 10, 10, 27 + 10), std::make_tuple(0, 2, 1, 7, 2, 3, 10, 20, 57 + 40), std::make_tuple(1, 1, 1, 7, 1, 3, 10, 30, 67 + 41), std::make_tuple(2, 0, 2, 7, 2, 3, 10, 30, 77 + 62)}; TEST_P(BufferCalculateHostPtrSize, WhenCalculatingHostPtrSizeThenItIsCorrect) { size_t calculatedSize = Buffer::calculateHostPtrSize(origin, region, rowPitch, slicePitch); EXPECT_EQ(hostPtrSize, calculatedSize); } INSTANTIATE_TEST_CASE_P( BufferCalculateHostPtrSizes, BufferCalculateHostPtrSize, testing::ValuesIn(Inputs)); TEST(Buffers64on32Tests, given32BitBufferCreatedWithUseHostPtrFlagThatIsZeroCopyWhenAskedForStorageThenHostPtrIsReturned) { DebugManagerStateRestore dbgRestorer; { DebugManager.flags.Force32bitAddressing.set(true); MockContext context; auto size = MemoryConstants::pageSize; void *ptr = (void *)0x1000; auto ptrOffset = MemoryConstants::cacheLineSize; uintptr_t offsetedPtr = (uintptr_t)ptr + ptrOffset; auto retVal = CL_SUCCESS; auto buffer = Buffer::create( &context, CL_MEM_USE_HOST_PTR, size, (void *)offsetedPtr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(buffer->isMemObjZeroCopy()); EXPECT_EQ((void *)offsetedPtr, buffer->getCpuAddressForMapping()); EXPECT_EQ((void *)offsetedPtr, buffer->getCpuAddressForMemoryTransfer()); delete buffer; DebugManager.flags.Force32bitAddressing.set(false); } } TEST(Buffers64on32Tests, given32BitBufferCreatedWithAllocHostPtrFlagThatIsZeroCopyWhenAskedForStorageThenStorageIsEqualToMemoryStorage) { DebugManagerStateRestore dbgRestorer; { DebugManager.flags.Force32bitAddressing.set(true); MockContext context; auto size = MemoryConstants::pageSize; auto retVal = CL_SUCCESS; auto buffer = Buffer::create( &context, CL_MEM_ALLOC_HOST_PTR, size, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(buffer->isMemObjZeroCopy()); EXPECT_EQ(buffer->getCpuAddress(), buffer->getCpuAddressForMapping()); EXPECT_EQ(buffer->getCpuAddress(), buffer->getCpuAddressForMemoryTransfer()); delete buffer; DebugManager.flags.Force32bitAddressing.set(false); } } TEST(Buffers64on32Tests, given32BitBufferThatIsCreatedWithUseHostPtrButIsNotZeroCopyThenProperPointersAreReturned) { DebugManagerStateRestore dbgRestorer; { DebugManager.flags.Force32bitAddressing.set(true); MockContext context; auto size = MemoryConstants::pageSize; void *ptr = (void *)alignedMalloc(size * 2, MemoryConstants::pageSize); auto ptrOffset = 1; uintptr_t offsetedPtr = (uintptr_t)ptr + ptrOffset; auto retVal = CL_SUCCESS; auto buffer = Buffer::create( &context, CL_MEM_USE_HOST_PTR, size, (void *)offsetedPtr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(buffer->isMemObjZeroCopy()); EXPECT_EQ((void *)offsetedPtr, buffer->getCpuAddressForMapping()); EXPECT_EQ(buffer->getCpuAddress(), buffer->getCpuAddressForMemoryTransfer()); delete buffer; DebugManager.flags.Force32bitAddressing.set(false); alignedFree(ptr); } } TEST(SharedBuffersTest, whenBuffersIsCreatedWithSharingHandlerThenItIsSharedBuffer) { MockContext context; auto memoryManager = context.getDevice(0)->getMemoryManager(); auto handler = new SharingHandler(); auto graphicsAlloaction = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), MemoryConstants::pageSize}); auto buffer = Buffer::createSharedBuffer(&context, CL_MEM_READ_ONLY, handler, GraphicsAllocationHelper::toMultiGraphicsAllocation(graphicsAlloaction)); ASSERT_NE(nullptr, buffer); EXPECT_EQ(handler, buffer->peekSharingHandler()); buffer->release(); } class BufferTests : public ::testing::Test { protected: void SetUp() override { device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); } void TearDown() override { } std::unique_ptr device; }; typedef BufferTests BufferSetSurfaceTests; HWCMDTEST_F(IGFX_GEN8_CORE, BufferSetSurfaceTests, givenBufferSetSurfaceThatMemoryPtrAndSizeIsAlignedToCachelineThenL3CacheShouldBeOn) { auto size = MemoryConstants::pageSize; auto ptr = (void *)alignedMalloc(size * 2, MemoryConstants::pageSize); using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; Buffer::setSurfaceState(device.get(), &surfaceState, false, false, size, ptr, 0, nullptr, 0, 0, false, false); auto mocs = surfaceState.getMemoryObjectControlState(); auto gmmHelper = device->getGmmHelper(); EXPECT_EQ(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER), mocs); alignedFree(ptr); } HWTEST_F(BufferSetSurfaceTests, givenDebugVariableToDisableCachingForStatefulBufferThenL3CacheShouldBeOff) { DebugManagerStateRestore restore; DebugManager.flags.DisableCachingForStatefulBufferAccess.set(true); auto size = MemoryConstants::pageSize; auto ptr = (void *)alignedMalloc(size * 2, MemoryConstants::pageSize); using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; Buffer::setSurfaceState(device.get(), &surfaceState, false, false, size, ptr, 0, nullptr, 0, 0, false, false); auto mocs = surfaceState.getMemoryObjectControlState(); auto gmmHelper = device->getGmmHelper(); EXPECT_EQ(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED), mocs); alignedFree(ptr); DebugManager.flags.DisableCachingForStatefulBufferAccess.set(false); } HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemoryPtrIsUnalignedToCachelineThenL3CacheShouldBeOff) { auto size = MemoryConstants::pageSize; auto ptr = alignedMalloc(size * 2, MemoryConstants::pageSize); auto ptrOffset = 1; auto offsetedPtr = (void *)((uintptr_t)ptr + ptrOffset); using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; Buffer::setSurfaceState(device.get(), &surfaceState, false, false, size, offsetedPtr, 0, nullptr, 0, 0, false, false); auto mocs = surfaceState.getMemoryObjectControlState(); auto gmmHelper = device->getGmmHelper(); EXPECT_EQ(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED), mocs); alignedFree(ptr); } HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemorySizeIsUnalignedToCachelineThenL3CacheShouldBeOff) { auto size = MemoryConstants::pageSize; auto ptr = alignedMalloc(size * 2, MemoryConstants::pageSize); auto sizeOffset = 1; auto offsetedSize = size + sizeOffset; using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; Buffer::setSurfaceState(device.get(), &surfaceState, false, false, offsetedSize, ptr, 0, nullptr, 0, 0, false, false); auto mocs = surfaceState.getMemoryObjectControlState(); auto gmmHelper = device->getGmmHelper(); EXPECT_EQ(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED), mocs); alignedFree(ptr); } HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemoryIsUnalignedToCachelineButReadOnlyThenL3CacheShouldBeStillOn) { auto size = MemoryConstants::pageSize; auto ptr = alignedMalloc(size * 2, MemoryConstants::pageSize); auto sizeOffset = 1; auto offsetedSize = size + sizeOffset; using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; Buffer::setSurfaceState(device.get(), &surfaceState, false, false, offsetedSize, ptr, 0, nullptr, CL_MEM_READ_ONLY, 0, false, false); auto mocs = surfaceState.getMemoryObjectControlState(); auto gmmHelper = device->getGmmHelper(); EXPECT_EQ(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER), mocs); alignedFree(ptr); } HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemorySizeIsUnalignedThenSurfaceSizeShouldBeAlignedToFour) { auto size = MemoryConstants::pageSize; auto ptr = alignedMalloc(size * 2, MemoryConstants::pageSize); auto sizeOffset = 1; auto offsetedSize = size + sizeOffset; using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; Buffer::setSurfaceState(device.get(), &surfaceState, false, false, offsetedSize, ptr, 0, nullptr, 0, 0, false, false); auto width = surfaceState.getWidth(); EXPECT_EQ(alignUp(width, 4), width); alignedFree(ptr); } HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceWhenOffsetIsSpecifiedForSvmAllocationThenSetSurfaceAddressWithOffsetedPointer) { auto size = 2 * MemoryConstants::pageSize; auto ptr = alignedMalloc(size, MemoryConstants::pageSize); auto offset = 4; MockGraphicsAllocation svmAlloc(ptr, size); using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; Buffer::setSurfaceState(device.get(), &surfaceState, false, false, size, ptr, offset, &svmAlloc, 0, 0, false, false); auto baseAddress = surfaceState.getSurfaceBaseAddress(); EXPECT_EQ(svmAlloc.getGpuAddress() + offset, baseAddress); alignedFree(ptr); } HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemoryPtrIsNotNullThenBufferSurfaceShouldBeUsed) { auto size = MemoryConstants::pageSize; auto ptr = alignedMalloc(size * 2, MemoryConstants::pageSize); using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; Buffer::setSurfaceState(device.get(), &surfaceState, false, false, size, ptr, 0, nullptr, 0, 0, false, false); auto surfType = surfaceState.getSurfaceType(); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER, surfType); alignedFree(ptr); } HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemoryPtrIsNullThenNullSurfaceShouldBeUsed) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; Buffer::setSurfaceState(device.get(), &surfaceState, false, false, 0, nullptr, 0, nullptr, 0, 0, false, false); auto surfType = surfaceState.getSurfaceType(); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL, surfType); } HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatAddressIsForcedTo32bitWhenSetArgStatefulIsCalledThenSurfaceBaseAddressIsPopulatedWithGpuAddress) { DebugManagerStateRestore dbgRestorer; { DebugManager.flags.Force32bitAddressing.set(true); MockContext context; auto rootDeviceIndex = context.getDevice(0)->getRootDeviceIndex(); auto size = MemoryConstants::pageSize; auto ptr = (void *)alignedMalloc(size * 2, MemoryConstants::pageSize); auto retVal = CL_SUCCESS; auto buffer = Buffer::create( &context, CL_MEM_USE_HOST_PTR, size, ptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(is64bit ? buffer->getGraphicsAllocation(rootDeviceIndex)->is32BitAllocation() : true); using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, false); auto surfBaseAddress = surfaceState.getSurfaceBaseAddress(); auto bufferAddress = buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(); EXPECT_EQ(bufferAddress, surfBaseAddress); delete buffer; alignedFree(ptr); DebugManager.flags.Force32bitAddressing.set(false); } } HWTEST_F(BufferSetSurfaceTests, givenBufferWithOffsetWhenSetArgStatefulIsCalledThenSurfaceBaseAddressIsProperlyOffseted) { MockContext context; auto rootDeviceIndex = context.getDevice(0)->getRootDeviceIndex(); auto size = MemoryConstants::pageSize; auto ptr = (void *)alignedMalloc(size * 2, MemoryConstants::pageSize); auto retVal = CL_SUCCESS; auto buffer = Buffer::create( &context, CL_MEM_USE_HOST_PTR, size, ptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); cl_buffer_region region = {4, 8}; retVal = -1; auto subBuffer = buffer->createSubBuffer(CL_MEM_READ_WRITE, 0, ®ion, retVal); ASSERT_NE(nullptr, subBuffer); ASSERT_EQ(CL_SUCCESS, retVal); using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; subBuffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, false); auto surfBaseAddress = surfaceState.getSurfaceBaseAddress(); auto bufferAddress = buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(); EXPECT_EQ(bufferAddress + region.origin, surfBaseAddress); subBuffer->release(); delete buffer; alignedFree(ptr); DebugManager.flags.Force32bitAddressing.set(false); } HWTEST_F(BufferSetSurfaceTests, givenBufferWhenSetArgStatefulWithL3ChacheDisabledIsCalledThenL3CacheShouldBeOffAndSizeIsAlignedTo512) { MockContext context; auto size = 128; auto retVal = CL_SUCCESS; auto buffer = std::unique_ptr(Buffer::create( &context, CL_MEM_READ_WRITE, size, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; buffer->setArgStateful(&surfaceState, false, true, true, false, context.getDevice(0)->getDevice(), false, false); auto mocs = surfaceState.getMemoryObjectControlState(); auto gmmHelper = device->getGmmHelper(); EXPECT_EQ(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED), mocs); EXPECT_EQ(128u, surfaceState.getWidth()); EXPECT_EQ(4u, surfaceState.getHeight()); } HWTEST_F(BufferSetSurfaceTests, givenBufferThatIsMisalignedButIsAReadOnlyArgumentWhenSurfaceStateIsSetThenL3IsOn) { MockContext context; auto rootDeviceIndex = context.getDevice(0)->getRootDeviceIndex(); auto size = 128; auto retVal = CL_SUCCESS; auto buffer = std::unique_ptr(Buffer::create( &context, CL_MEM_READ_WRITE, size, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; buffer->getGraphicsAllocation(rootDeviceIndex)->setSize(127); buffer->setArgStateful(&surfaceState, false, false, false, true, context.getDevice(0)->getDevice(), false, false); auto mocs = surfaceState.getMemoryObjectControlState(); auto gmmHelper = device->getGmmHelper(); auto expectedMocs = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); auto expectedMocs2 = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST); EXPECT_TRUE(expectedMocs == mocs || expectedMocs2 == mocs); } HWTEST_F(BufferSetSurfaceTests, givenAlignedCacheableReadOnlyBufferThenChoseOclBufferPolicy) { MockContext context; const auto size = MemoryConstants::pageSize; const auto ptr = (void *)alignedMalloc(size * 2, MemoryConstants::pageSize); const auto flags = CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY; auto retVal = CL_SUCCESS; auto buffer = std::unique_ptr(Buffer::create( &context, flags, size, ptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, false); const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); const auto actualMocs = surfaceState.getMemoryObjectControlState(); EXPECT_EQ(expectedMocs, actualMocs); alignedFree(ptr); } HWCMDTEST_F(IGFX_GEN8_CORE, BufferSetSurfaceTests, givenAlignedCacheableNonReadOnlyBufferThenChooseOclBufferPolicy) { MockContext context; const auto size = MemoryConstants::pageSize; const auto ptr = (void *)alignedMalloc(size * 2, MemoryConstants::pageSize); const auto flags = CL_MEM_USE_HOST_PTR; auto retVal = CL_SUCCESS; auto buffer = std::unique_ptr(Buffer::create( &context, flags, size, ptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, false); const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); const auto actualMocs = surfaceState.getMemoryObjectControlState(); EXPECT_EQ(expectedMocs, actualMocs); alignedFree(ptr); } HWTEST_F(BufferSetSurfaceTests, givenCompressedGmmResourceWhenSurfaceStateIsProgrammedThenSetAuxParams) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; RENDER_SURFACE_STATE surfaceState = {}; MockContext context; auto rootDeviceIndex = context.getDevice(0)->getRootDeviceIndex(); auto retVal = CL_SUCCESS; std::unique_ptr buffer(Buffer::create(&context, CL_MEM_READ_WRITE, 1, nullptr, retVal)); auto graphicsAllocation = buffer->getGraphicsAllocation(rootDeviceIndex); auto gmm = new Gmm(context.getDevice(0)->getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true); graphicsAllocation->setDefaultGmm(gmm); gmm->isCompressionEnabled = true; buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, false); EXPECT_EQ(0u, surfaceState.getAuxiliarySurfaceBaseAddress()); EXPECT_TRUE(EncodeSurfaceState::isAuxModeEnabled(&surfaceState, gmm)); EXPECT_TRUE(RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT == surfaceState.getCoherencyType()); } HWTEST_F(BufferSetSurfaceTests, givenNonCompressedGmmResourceWhenSurfaceStateIsProgrammedThenDontSetAuxParams) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; RENDER_SURFACE_STATE surfaceState = {}; MockContext context; auto rootDeviceIndex = context.getDevice(0)->getRootDeviceIndex(); auto retVal = CL_SUCCESS; std::unique_ptr buffer(Buffer::create(&context, CL_MEM_READ_WRITE, 1, nullptr, retVal)); auto gmm = new Gmm(context.getDevice(0)->getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true); buffer->getGraphicsAllocation(rootDeviceIndex)->setDefaultGmm(gmm); gmm->isCompressionEnabled = false; buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, false); EXPECT_EQ(0u, surfaceState.getAuxiliarySurfaceBaseAddress()); EXPECT_TRUE(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE == surfaceState.getAuxiliarySurfaceMode()); EXPECT_TRUE(UnitTestHelper::getCoherencyTypeSupported(RENDER_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT) == surfaceState.getCoherencyType()); } HWTEST_F(BufferSetSurfaceTests, givenMisalignedPointerWhenSurfaceStateIsProgrammedThenBaseAddressAndLengthAreAlignedToDword) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; RENDER_SURFACE_STATE surfaceState = {}; MockContext context; uintptr_t ptr = 0xfffff000; void *svmPtr = reinterpret_cast(ptr); Buffer::setSurfaceState(device.get(), &surfaceState, false, false, 5, svmPtr, 0, nullptr, 0, 0, false, false); EXPECT_EQ(castToUint64(svmPtr), surfaceState.getSurfaceBaseAddress()); SURFACE_STATE_BUFFER_LENGTH length = {}; length.SurfaceState.Width = surfaceState.getWidth() - 1; length.SurfaceState.Height = surfaceState.getHeight() - 1; length.SurfaceState.Depth = surfaceState.getDepth() - 1; EXPECT_EQ(alignUp(5u, 4u), length.Length + 1); } HWTEST_F(BufferSetSurfaceTests, givenBufferThatIsMisalignedWhenSurfaceStateIsBeingProgrammedThenL3CacheIsOff) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; MockContext context; void *svmPtr = reinterpret_cast(0x1005); Buffer::setSurfaceState(device.get(), &surfaceState, false, false, 5, svmPtr, 0, nullptr, 0, 0, false, false); EXPECT_EQ(0u, surfaceState.getMemoryObjectControlState()); } using BufferHwFromDeviceTests = BufferTests; HWTEST_F(BufferHwFromDeviceTests, givenMultiGraphicsAllocationWhenCreateBufferHwFromDeviceThenMultiGraphicsAllocationInBufferIsProperlySet) { auto size = 2 * MemoryConstants::pageSize; auto ptr = alignedMalloc(size, MemoryConstants::pageSize); MockGraphicsAllocation svmAlloc(ptr, size); auto multiGraphicsAllocation = MultiGraphicsAllocation(device.get()->getRootDeviceIndex()); multiGraphicsAllocation.addAllocation(&svmAlloc); auto buffer = std::unique_ptr(Buffer::createBufferHwFromDevice(device.get(), 0, 0, size, ptr, ptr, multiGraphicsAllocation, 0, true, false, false)); EXPECT_EQ(device.get()->getRootDeviceIndex(), 0u); EXPECT_EQ(buffer->getMultiGraphicsAllocation().getGraphicsAllocations().size(), multiGraphicsAllocation.getGraphicsAllocations().size()); EXPECT_EQ(buffer->getMultiGraphicsAllocation().getGraphicsAllocation(device.get()->getRootDeviceIndex()), multiGraphicsAllocation.getGraphicsAllocation(device.get()->getRootDeviceIndex())); alignedFree(ptr); } class BufferL3CacheTests : public ::testing::TestWithParam { public: void SetUp() override { hostPtr = reinterpret_cast(GetParam()); } MockContext ctx; const size_t region[3] = {3, 3, 1}; const size_t origin[3] = {0, 0, 0}; void *hostPtr; }; HWTEST_P(BufferL3CacheTests, givenMisalignedAndAlignedBufferWhenClEnqueueWriteImageThenL3CacheIsOn) { const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(defaultHwInfo->platform.eProductFamily); if (compilerHwInfoConfig.isForceToStatelessRequired() || !ctx.getDevice(0)->getHardwareInfo().capabilityTable.supportsImages) { GTEST_SKIP(); } using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; CommandQueueHw cmdQ(&ctx, ctx.getDevice(0), nullptr, false); auto surfaceState = reinterpret_cast(cmdQ.getGpgpuCommandStreamReceiver().getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0).getSpace(0)); cl_image_format imageFormat; cl_image_desc imageDesc; imageFormat.image_channel_order = CL_RGBA; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 3; imageDesc.image_height = 3; imageDesc.image_depth = 1; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = nullptr; auto image = clCreateImage(&ctx, CL_MEM_READ_WRITE, &imageFormat, &imageDesc, nullptr, nullptr); clEnqueueWriteImage(&cmdQ, image, false, origin, region, 0, 0, hostPtr, 0, nullptr, nullptr); auto expect = ctx.getDevice(0)->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); auto expect2 = ctx.getDevice(0)->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST); EXPECT_NE(NULL, surfaceState->getMemoryObjectControlState()); EXPECT_TRUE(expect == surfaceState->getMemoryObjectControlState() || expect2 == surfaceState->getMemoryObjectControlState()); clReleaseMemObject(image); } HWTEST_P(BufferL3CacheTests, givenMisalignedAndAlignedBufferWhenClEnqueueWriteBufferRectThenL3CacheIsOn) { const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(defaultHwInfo->platform.eProductFamily); if (compilerHwInfoConfig.isForceToStatelessRequired()) { GTEST_SKIP(); } using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; CommandQueueHw cmdQ(&ctx, ctx.getDevice(0), nullptr, false); auto surfaceState = reinterpret_cast(cmdQ.getGpgpuCommandStreamReceiver().getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0).getSpace(0)); auto buffer = clCreateBuffer(&ctx, CL_MEM_READ_WRITE, 36, nullptr, nullptr); clEnqueueWriteBufferRect(&cmdQ, buffer, false, origin, origin, region, 0, 0, 0, 0, hostPtr, 0, nullptr, nullptr); auto expect = ctx.getDevice(0)->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); auto expect2 = ctx.getDevice(0)->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST); EXPECT_NE(NULL, surfaceState->getMemoryObjectControlState()); EXPECT_TRUE(expect == surfaceState->getMemoryObjectControlState() || expect2 == surfaceState->getMemoryObjectControlState()); clReleaseMemObject(buffer); } static uint64_t pointers[] = { 0x1005, 0x2000}; INSTANTIATE_TEST_CASE_P( pointers, BufferL3CacheTests, testing::ValuesIn(pointers)); struct BufferUnmapTest : public ClDeviceFixture, public ::testing::Test { void SetUp() override { ClDeviceFixture::SetUp(); } void TearDown() override { ClDeviceFixture::TearDown(); } }; HWTEST_F(BufferUnmapTest, givenBufferWithSharingHandlerWhenUnmappingThenUseNonBlockingEnqueueWriteBuffer) { MockContext context(pClDevice); MockCommandQueueHw cmdQ(&context, pClDevice, nullptr); auto retVal = CL_SUCCESS; std::unique_ptr buffer(Buffer::create(&context, CL_MEM_ALLOC_HOST_PTR, 123, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); buffer->setSharingHandler(new SharingHandler()); EXPECT_NE(nullptr, buffer->peekSharingHandler()); auto gfxAllocation = buffer->getGraphicsAllocation(pDevice->getRootDeviceIndex()); for (auto handleId = 0u; handleId < gfxAllocation->getNumGmms(); handleId++) { gfxAllocation->setGmm(new MockGmm(pDevice->getGmmClientContext()), handleId); } auto mappedPtr = clEnqueueMapBuffer(&cmdQ, buffer.get(), CL_TRUE, CL_MAP_WRITE, 0, 1, 0, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, cmdQ.EnqueueWriteBufferCounter); retVal = clEnqueueUnmapMemObject(&cmdQ, buffer.get(), mappedPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, cmdQ.EnqueueWriteBufferCounter); EXPECT_FALSE(cmdQ.blockingWriteBuffer); } HWTEST_F(BufferUnmapTest, givenBufferWithoutSharingHandlerWhenUnmappingThenDontUseEnqueueWriteBuffer) { MockContext context(pClDevice); MockCommandQueueHw cmdQ(&context, pClDevice, nullptr); auto retVal = CL_SUCCESS; std::unique_ptr buffer(Buffer::create(&context, CL_MEM_ALLOC_HOST_PTR, 123, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, buffer->peekSharingHandler()); auto mappedPtr = clEnqueueMapBuffer(&cmdQ, buffer.get(), CL_TRUE, CL_MAP_READ, 0, 1, 0, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueUnmapMemObject(&cmdQ, buffer.get(), mappedPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, cmdQ.EnqueueWriteBufferCounter); } using BufferTransferTests = BufferUnmapTest; TEST_F(BufferTransferTests, givenBufferWhenTransferToHostPtrCalledThenCopyRequestedSizeAndOffsetOnly) { MockContext context(pClDevice); auto retVal = CL_SUCCESS; const size_t bufferSize = 100; size_t ignoredParam = 123; MemObjOffsetArray copyOffset = {{20, ignoredParam, ignoredParam}}; MemObjSizeArray copySize = {{10, ignoredParam, ignoredParam}}; uint8_t hostPtr[bufferSize] = {}; uint8_t expectedHostPtr[bufferSize] = {}; std::unique_ptr buffer(Buffer::create(&context, CL_MEM_USE_HOST_PTR, bufferSize, hostPtr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); auto srcPtr = buffer->getCpuAddress(); EXPECT_NE(srcPtr, hostPtr); memset(srcPtr, 123, bufferSize); memset(ptrOffset(expectedHostPtr, copyOffset[0]), 123, copySize[0]); buffer->transferDataToHostPtr(copySize, copyOffset); EXPECT_TRUE(memcmp(hostPtr, expectedHostPtr, copySize[0]) == 0); } TEST_F(BufferTransferTests, givenBufferWhenTransferFromHostPtrCalledThenCopyRequestedSizeAndOffsetOnly) { MockContext context(pClDevice); auto retVal = CL_SUCCESS; const size_t bufferSize = 100; size_t ignoredParam = 123; MemObjOffsetArray copyOffset = {{20, ignoredParam, ignoredParam}}; MemObjSizeArray copySize = {{10, ignoredParam, ignoredParam}}; uint8_t hostPtr[bufferSize] = {}; uint8_t expectedBufferMemory[bufferSize] = {}; std::unique_ptr buffer(Buffer::create(&context, CL_MEM_USE_HOST_PTR, bufferSize, hostPtr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(buffer->getCpuAddress(), hostPtr); memset(hostPtr, 123, bufferSize); memset(ptrOffset(expectedBufferMemory, copyOffset[0]), 123, copySize[0]); buffer->transferDataFromHostPtr(copySize, copyOffset); EXPECT_TRUE(memcmp(expectedBufferMemory, buffer->getCpuAddress(), copySize[0]) == 0); } using MultiRootDeviceBufferTest = MultiRootDeviceFixture; TEST_F(MultiRootDeviceBufferTest, WhenCleanAllGraphicsAllocationsCalledThenGraphicsAllocationsAreProperlyRemovedAccordingToIsParentObjectFlag) { AllocationInfoType allocationInfo; allocationInfo.resize(3u); allocationInfo[1u] = {}; allocationInfo[1u].memory = mockMemoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{1u, MemoryConstants::pageSize}); bool isParentObject = true; Buffer::cleanAllGraphicsAllocations(*context, *context->getMemoryManager(), allocationInfo, isParentObject); EXPECT_EQ(mockMemoryManager->freeGraphicsMemoryCalled, 0u); isParentObject = false; Buffer::cleanAllGraphicsAllocations(*context, *context->getMemoryManager(), allocationInfo, isParentObject); EXPECT_EQ(mockMemoryManager->freeGraphicsMemoryCalled, 1u); } TEST_F(MultiRootDeviceBufferTest, WhenBufferIsCreatedThenBufferGraphicsAllocationHasCorrectRootDeviceIndex) { cl_int retVal = 0; cl_mem_flags flags = CL_MEM_READ_WRITE; std::unique_ptr buffer(Buffer::create(context.get(), flags, MemoryConstants::pageSize, nullptr, retVal)); auto graphicsAllocation = buffer->getGraphicsAllocation(expectedRootDeviceIndex); ASSERT_NE(nullptr, graphicsAllocation); EXPECT_EQ(expectedRootDeviceIndex, graphicsAllocation->getRootDeviceIndex()); } TEST_F(MultiRootDeviceBufferTest, WhenBufferIsCreatedThenBufferMultiGraphicsAllocationIsCreatedInSystemMemoryPool) { cl_int retVal = 0; std::unique_ptr buffer1(Buffer::create(context.get(), 0, MemoryConstants::pageSize, nullptr, retVal)); EXPECT_TRUE(MemoryPool::isSystemMemoryPool(buffer1->getMultiGraphicsAllocation().getGraphicsAllocation(1u)->getMemoryPool())); EXPECT_TRUE(MemoryPool::isSystemMemoryPool(buffer1->getMultiGraphicsAllocation().getGraphicsAllocation(2u)->getMemoryPool())); } TEST(MultiRootDeviceBufferTest2, WhenBufferIsCreatedThenSecondAndSubsequentAllocationsAreCreatedFromExisitingStorage) { cl_int retVal = 0; MockDefaultContext context; auto memoryManager = static_cast(context.getMemoryManager()); memoryManager->createGraphicsAllocationFromExistingStorageCalled = 0u; memoryManager->allocationsFromExistingStorage.clear(); std::unique_ptr buffer(Buffer::create(&context, 0, MemoryConstants::pageSize, nullptr, retVal)); EXPECT_EQ(3u, context.getRootDeviceIndices().size()); EXPECT_NE(nullptr, buffer->getMultiGraphicsAllocation().getGraphicsAllocation(0u)); EXPECT_NE(nullptr, buffer->getMultiGraphicsAllocation().getGraphicsAllocation(1u)); EXPECT_NE(nullptr, buffer->getMultiGraphicsAllocation().getGraphicsAllocation(2u)); EXPECT_EQ(2u, memoryManager->createGraphicsAllocationFromExistingStorageCalled); EXPECT_EQ(memoryManager->allocationsFromExistingStorage[0], buffer->getMultiGraphicsAllocation().getGraphicsAllocation(1u)); EXPECT_EQ(memoryManager->allocationsFromExistingStorage[1], buffer->getMultiGraphicsAllocation().getGraphicsAllocation(2u)); } TEST_F(MultiRootDeviceBufferTest, givenBufferWhenGetSurfaceSizeCalledWithoutAlignSizeForAuxTranslationThenCorrectValueReturned) { cl_int retVal = 0; cl_mem_flags flags = CL_MEM_READ_WRITE; uint32_t size = 0x131; std::unique_ptr buffer(Buffer::create(context.get(), flags, size, nullptr, retVal)); auto surfaceSize = buffer->getSurfaceSize(false, expectedRootDeviceIndex); EXPECT_EQ(surfaceSize, alignUp(size, 4)); } TEST_F(MultiRootDeviceBufferTest, givenBufferWhenGetSurfaceSizeCalledWithAlignSizeForAuxTranslationThenCorrectValueReturned) { cl_int retVal = 0; cl_mem_flags flags = CL_MEM_READ_WRITE; uint32_t size = 0x131; std::unique_ptr buffer(Buffer::create(context.get(), flags, size, nullptr, retVal)); auto surfaceSize = buffer->getSurfaceSize(true, expectedRootDeviceIndex); EXPECT_EQ(surfaceSize, alignUp(size, 512)); } TEST_F(MultiRootDeviceBufferTest, givenNullptrGraphicsAllocationForRootDeviceIndexWhenGettingBufferAddressThenHostPtrReturned) { cl_int retVal = 0; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; char *hostPtr[MemoryConstants::pageSize]{}; std::unique_ptr buffer(Buffer::create(context.get(), flags, MemoryConstants::pageSize, hostPtr, retVal)); auto address = buffer->getBufferAddress(expectedRootDeviceIndex); auto graphicsAllocation = buffer->getGraphicsAllocation(expectedRootDeviceIndex); EXPECT_EQ(graphicsAllocation->getGpuAddress(), address); address = buffer->getBufferAddress(0); EXPECT_EQ(reinterpret_cast(buffer->getHostPtr()), address); } compute-runtime-22.14.22890/opencl/test/unit_test/mem_obj/buffer_tests_pvc_and_later.cpp000066400000000000000000000124511422164147700313220ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/extensions/public/cl_ext_private.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_platform.h" using namespace NEO; using PvcAndLaterBufferTests = ::testing::Test; HWTEST2_F(PvcAndLaterBufferTests, WhenAllocatingBufferThenGpuAddressIsFromHeapExtended, IsAtLeastXeHpcCore) { if (is32bit || defaultHwInfo->capabilityTable.gpuAddressSpace != maxNBitValue(57)) { GTEST_SKIP(); } DebugManagerStateRestore restore; DebugManager.flags.EnableLocalMemory.set(true); initPlatform(); MockContext context(platform()->getClDevice(0)); size_t size = 0x1000; auto retVal = CL_SUCCESS; auto buffer = std::unique_ptr( Buffer::create( &context, CL_MEM_READ_WRITE, size, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); auto graphicsAllocation = buffer->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex()); ASSERT_NE(nullptr, graphicsAllocation); auto gpuAddress = GmmHelper::decanonize(graphicsAllocation->getGpuAddress()); auto extendedHeapBase = context.memoryManager->getGfxPartition(0)->getHeapBase(HeapIndex::HEAP_EXTENDED); auto extendedHeapLimit = context.memoryManager->getGfxPartition(0)->getHeapLimit(HeapIndex::HEAP_EXTENDED); EXPECT_EQ(extendedHeapBase, maxNBitValue(57 - 1) + 1); EXPECT_EQ(extendedHeapLimit, extendedHeapBase + maxNBitValue(48)); EXPECT_GT(gpuAddress, extendedHeapBase); EXPECT_LT(gpuAddress, extendedHeapLimit); } HWTEST2_F(PvcAndLaterBufferTests, WhenAllocatingRtBufferThenGpuAddressFromHeapStandard64Kb, IsAtLeastXeHpcCore) { if (is32bit || defaultHwInfo->capabilityTable.gpuAddressSpace != maxNBitValue(57)) { GTEST_SKIP(); } DebugManagerStateRestore restore; DebugManager.flags.EnableLocalMemory.set(true); initPlatform(); MockContext context(platform()->getClDevice(0)); auto retVal = CL_SUCCESS; std::unique_ptr rtBuffer; rtBuffer.reset(Buffer::create(&context, ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_READ_WRITE, CL_MEM_48BIT_RESOURCE_INTEL, 0, &context.getDevice(0)->getDevice()), CL_MEM_READ_WRITE, CL_MEM_48BIT_RESOURCE_INTEL, MemoryConstants::pageSize, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, rtBuffer); EXPECT_TRUE(isValueSet(rtBuffer->getFlagsIntel(), CL_MEM_48BIT_RESOURCE_INTEL)); auto graphicsAllocation = rtBuffer->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex()); ASSERT_NE(nullptr, graphicsAllocation); auto gpuAddress = GmmHelper::decanonize(graphicsAllocation->getGpuAddress()); auto standard64KbHeapBase = context.memoryManager->getGfxPartition(0)->getHeapBase(HeapIndex::HEAP_STANDARD64KB); auto standard64KbHeapLimit = context.memoryManager->getGfxPartition(0)->getHeapLimit(HeapIndex::HEAP_STANDARD64KB); EXPECT_GT(gpuAddress, standard64KbHeapBase); EXPECT_LT(gpuAddress, standard64KbHeapLimit); } HWTEST2_F(PvcAndLaterBufferTests, givenCompressedBufferInSystemAndBlitterSupportedWhenCreatingBufferThenDoNotUseBlitterLogicForLocalMem, IsAtLeastXeHpcCore) { DebugManagerStateRestore restore; VariableBackup backupHwInfo(defaultHwInfo.get()); VariableBackup blitMemoryToAllocationFuncBackup{ &BlitHelperFunctions::blitMemoryToAllocation}; DebugManager.flags.RenderCompressedBuffersEnabled.set(true); defaultHwInfo->capabilityTable.blitterOperationsSupported = true; UltClDeviceFactory deviceFactory{1, 0}; auto pDevice = deviceFactory.rootDevices[0]; auto pMockContext = std::make_unique(pDevice); static_cast(pDevice->getExecutionEnvironment()->memoryManager.get())->enable64kbpages[0] = true; static_cast(pDevice->getExecutionEnvironment()->memoryManager.get())->localMemorySupported[0] = false; blitMemoryToAllocationFuncBackup = [](const NEO::Device &device, NEO::GraphicsAllocation *memory, size_t offset, const void *hostPtr, Vec3 size) -> NEO::BlitOperationResult { ADD_FAILURE(); return BlitOperationResult::Fail; }; cl_mem_flags flags = CL_MEM_COPY_HOST_PTR | CL_MEM_COMPRESSED_HINT_INTEL; uint32_t hostPtr = 0; cl_int retVal = CL_SUCCESS; auto bufferForBlt = clUniquePtr(Buffer::create(pMockContext.get(), flags, 2000, &hostPtr, retVal)); } compute-runtime-22.14.22890/opencl/test/unit_test/mem_obj/buffer_tests_xehp_and_later.cpp000066400000000000000000000257531422164147700315070ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/command_encoder.h" #include "shared/source/gmm_helper/client_context/gmm_client_context.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_gmm.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include using namespace NEO; using XeHPAndLaterBufferTests = ::testing::Test; HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterBufferTests, givenDebugFlagSetWhenProgramingSurfaceStateThenForceCompressionFormat) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; DebugManagerStateRestore restorer; uint32_t compressionFormat = 3; MockContext context; auto gmmContext = context.getDevice(0)->getGmmHelper()->getClientContext(); uint32_t defaultCompressionFormat = gmmContext->getSurfaceStateCompressionFormat(GMM_RESOURCE_FORMAT::GMM_FORMAT_GENERIC_8BIT); auto retVal = CL_SUCCESS; auto gmm = new Gmm(context.getDevice(0)->getGmmHelper()->getClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true); gmm->isCompressionEnabled = true; auto buffer = std::unique_ptr(Buffer::create(&context, CL_MEM_READ_WRITE, 1, nullptr, retVal)); buffer->getGraphicsAllocation(0)->setGmm(gmm, 0); RENDER_SURFACE_STATE surfaceState = FamilyType::cmdInitRenderSurfaceState; { buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, false); EXPECT_EQ(defaultCompressionFormat, surfaceState.getCompressionFormat()); } { DebugManager.flags.ForceBufferCompressionFormat.set(compressionFormat); buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, false); EXPECT_EQ(compressionFormat, surfaceState.getCompressionFormat()); } } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterBufferTests, givenBufferAllocationInDeviceMemoryWhenStatelessCompressionIsEnabledThenSetSurfaceStateWithCompressionSettings) { DebugManagerStateRestore restorer; DebugManager.flags.EnableLocalMemory.set(1); DebugManager.flags.EnableStatelessCompressionWithUnifiedMemory.set(1); using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; MockContext context; size_t size = 0x1000; auto retVal = CL_SUCCESS; auto buffer = std::unique_ptr( Buffer::create( &context, CL_MEM_READ_WRITE, size, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); auto &device = context.getDevice(0)->getDevice(); auto allocation = buffer->getGraphicsAllocation(device.getRootDeviceIndex()); auto gmm = new MockGmm(device.getGmmClientContext()); gmm->isCompressionEnabled = true; allocation->setDefaultGmm(gmm); EXPECT_TRUE(!MemoryPool::isSystemMemoryPool(allocation->getMemoryPool())); RENDER_SURFACE_STATE surfaceState = FamilyType::cmdInitRenderSurfaceState; buffer->setArgStateful(&surfaceState, false, false, false, false, device, false, false); EXPECT_EQ(RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT, surfaceState.getCoherencyType()); EXPECT_TRUE(EncodeSurfaceState::isAuxModeEnabled(&surfaceState, allocation->getDefaultGmm())); EXPECT_EQ(static_cast(DebugManager.flags.FormatForStatelessCompressionWithUnifiedMemory.get()), surfaceState.getCompressionFormat()); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterBufferTests, givenBufferAllocationInHostMemoryWhenStatelessCompressionIsEnabledThenDontSetSurfaceStateWithCompressionSettings) { DebugManagerStateRestore restorer; DebugManager.flags.EnableStatelessCompressionWithUnifiedMemory.set(1); using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; MockContext context; size_t size = 0x1000; auto retVal = CL_SUCCESS; auto buffer = std::unique_ptr( Buffer::create( &context, CL_MEM_READ_WRITE, size, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(MemoryPool::isSystemMemoryPool(buffer->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex())->getMemoryPool())); RENDER_SURFACE_STATE surfaceState = FamilyType::cmdInitRenderSurfaceState; buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, false); EXPECT_EQ(RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT, surfaceState.getCoherencyType()); EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE, surfaceState.getAuxiliarySurfaceMode()); EXPECT_EQ(0u, surfaceState.getCompressionFormat()); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterBufferTests, givenBufferAllocationWithoutGraphicsAllocationWhenStatelessCompressionIsEnabledThenDontSetSurfaceStateWithCompressionSettings) { DebugManagerStateRestore restorer; DebugManager.flags.EnableStatelessCompressionWithUnifiedMemory.set(1); using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; MockContext context; cl_float srcMemory[] = {1.0f, 2.0f, 3.0f, 4.0f}; std::unique_ptr buffer(Buffer::createBufferHw( &context, ClMemoryPropertiesHelper::createMemoryProperties(0, 0, 0, &context.getDevice(0)->getDevice()), 0, 0, sizeof(srcMemory), srcMemory, srcMemory, 0, false, false, false)); ASSERT_NE(nullptr, buffer); RENDER_SURFACE_STATE surfaceState = FamilyType::cmdInitRenderSurfaceState; buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, false); EXPECT_EQ(RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT, surfaceState.getCoherencyType()); EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE, surfaceState.getAuxiliarySurfaceMode()); EXPECT_EQ(0u, surfaceState.getCompressionFormat()); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterBufferTests, givenDebugVariableForcingL1CachingWhenBufferSurfaceStateIsSetThenItIsCachedInL1) { DebugManagerStateRestore restorer; DebugManager.flags.ForceL1Caching.set(1u); MockContext context; const auto size = MemoryConstants::pageSize; const auto flags = CL_MEM_READ_WRITE; auto retVal = CL_SUCCESS; auto buffer = std::unique_ptr(Buffer::create( &context, flags, size, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, false); const auto expectedMocs = context.getDevice(0)->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST); const auto actualMocs = surfaceState.getMemoryObjectControlState(); EXPECT_EQ(expectedMocs, actualMocs); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterBufferTests, givenDebugVariableForcingL1CachingDisabledWhenBufferSurfaceStateIsSetThenItIsCachedInL3) { DebugManagerStateRestore restorer; DebugManager.flags.ForceL1Caching.set(0u); MockContext context; const auto size = MemoryConstants::pageSize; const auto flags = CL_MEM_READ_WRITE; auto retVal = CL_SUCCESS; auto buffer = std::unique_ptr(Buffer::create( &context, flags, size, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, false); const auto expectedMocs = context.getDevice(0)->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); const auto actualMocs = surfaceState.getMemoryObjectControlState(); EXPECT_EQ(expectedMocs, actualMocs); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterBufferTests, givenBufferWhenArgumentIsConstAndAuxModeIsOnThenL3DisabledPolicyIsChoosen) { MockContext context; const auto size = MemoryConstants::pageSize; const auto flags = CL_MEM_READ_ONLY; auto retVal = CL_SUCCESS; auto buffer = std::unique_ptr(Buffer::create( &context, flags, size, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; buffer->setArgStateful(&surfaceState, true, true, false, false, context.getDevice(0)->getDevice(), false, false); const auto expectedMocs = context.getDevice(0)->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED); const auto actualMocs = surfaceState.getMemoryObjectControlState(); EXPECT_EQ(expectedMocs, actualMocs); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterBufferTests, givenBufferSetSurfaceThatMemoryPtrAndSizeIsAlignedToCachelineThenL1CacheShouldBeOn) { MockContext context; auto size = MemoryConstants::pageSize; auto ptr = (void *)alignedMalloc(size * 2, MemoryConstants::pageSize); using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); Buffer::setSurfaceState(device.get(), &surfaceState, false, false, size, ptr, 0, nullptr, 0, 0, false, false); auto mocs = surfaceState.getMemoryObjectControlState(); auto gmmHelper = device.get()->getGmmHelper(); EXPECT_EQ(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST), mocs); alignedFree(ptr); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterBufferTests, givenAlignedCacheableNonReadOnlyBufferThenChooseOclBufferPolicy) { MockContext context; const auto size = MemoryConstants::pageSize; const auto ptr = (void *)alignedMalloc(size * 2, MemoryConstants::pageSize); const auto flags = CL_MEM_USE_HOST_PTR; auto retVal = CL_SUCCESS; auto buffer = std::unique_ptr(Buffer::create( &context, flags, size, ptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, false); const auto expectedMocs = context.getDevice(0)->getDevice().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST); const auto actualMocs = surfaceState.getMemoryObjectControlState(); EXPECT_EQ(expectedMocs, actualMocs); alignedFree(ptr); } compute-runtime-22.14.22890/opencl/test/unit_test/mem_obj/create_image_format_tests.cpp000066400000000000000000000073331422164147700311500ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/fixtures/memory_management_fixture.h" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "opencl/source/helpers/surface_formats.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" using namespace NEO; static const unsigned int testImageDimensions = 32; template class CreateImageFormatTest : public testing::TestWithParam { public: CreateImageFormatTest() : flags(_flags) { } protected: void SetUp() override { indexImageFormat = GetParam(); ArrayRef surfaceFormatTable = SurfaceFormats::surfaceFormats(flags, defaultHwInfo->capabilityTable.supportsOcl21Features); ASSERT_GT(surfaceFormatTable.size(), indexImageFormat); surfaceFormat = &surfaceFormatTable[indexImageFormat]; // clang-format off imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = testImageDimensions; imageDesc.image_height = testImageDimensions; imageDesc.image_depth = 1; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; // clang-format on } void TearDown() override { } const ClSurfaceFormatInfo *surfaceFormat; size_t indexImageFormat; cl_image_format imageFormat; cl_image_desc imageDesc; cl_int retVal = CL_SUCCESS; MockContext context; cl_mem_flags flags; }; typedef CreateImageFormatTest ReadWriteFormatTest; TEST_P(ReadWriteFormatTest, GivenValidFormatWhenCreatingImageThenImageIsCreated) { auto image = Image::create( &context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); delete image; } static const size_t zero = 0; INSTANTIATE_TEST_CASE_P( CreateImage, ReadWriteFormatTest, testing::Range(zero, SurfaceFormats::readWrite().size())); typedef CreateImageFormatTest ReadOnlyFormatTest; TEST_P(ReadOnlyFormatTest, GivenValidReadOnlyFormatWhenCreatingImageThenImageIsCreated) { auto image = Image::create( &context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); delete image; } INSTANTIATE_TEST_CASE_P( CreateImage, ReadOnlyFormatTest, testing::Range(zero, SurfaceFormats::readOnly12().size())); typedef CreateImageFormatTest WriteOnlyFormatTest; TEST_P(WriteOnlyFormatTest, GivenValidWriteOnlyFormatWhenCreatingImageThenImageIsCreated) { auto image = Image::create( &context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); delete image; } INSTANTIATE_TEST_CASE_P( CreateImage, WriteOnlyFormatTest, testing::Range(zero, SurfaceFormats::writeOnly().size())); compute-runtime-22.14.22890/opencl/test/unit_test/mem_obj/create_image_in_local_memory_tests.cpp000066400000000000000000000074701422164147700330320ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/memory_manager/memory_pool.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_platform.h" using namespace NEO; class ImageInLocalMemoryTest : public testing::Test { public: ImageInLocalMemoryTest() = default; protected: void SetUp() override { HardwareInfo inputPlatformDevice = *defaultHwInfo; inputPlatformDevice.featureTable.flags.ftrLocalMemory = true; platformsImpl->clear(); auto executionEnvironment = constructPlatform()->peekExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(1u); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(&inputPlatformDevice); executionEnvironment->rootDeviceEnvironments[0]->initGmm(); mockMemoryManager = new MockMemoryManagerFailFirstAllocation(true, *executionEnvironment); executionEnvironment->memoryManager.reset(mockMemoryManager); mockMemoryManager->returnBaseAllocateGraphicsMemoryInDevicePool = true; device = std::make_unique(MockDevice::create(executionEnvironment, 0)); context = std::make_unique(device.get()); imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_width = 1; imageDesc.image_height = 1; imageDesc.image_row_pitch = sizeof(memory); imageFormat.image_channel_data_type = CL_UNSIGNED_INT8; imageFormat.image_channel_order = CL_R; } void TearDown() override {} MockMemoryManagerFailFirstAllocation *mockMemoryManager = nullptr; cl_image_desc imageDesc{}; cl_image_format imageFormat = {}; std::unique_ptr device; std::unique_ptr context; char memory[10]; }; TEST_F(ImageInLocalMemoryTest, givenImageWithoutHostPtrWhenLocalMemoryIsEnabledThenImageAllocationIsInLocalMemoryAndGpuAddressIsInStandard64KHeap) { cl_int retVal = 0; cl_mem_flags flags = CL_MEM_READ_WRITE; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); std::unique_ptr image(Image::create( context.get(), ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, memory, retVal)); ASSERT_NE(nullptr, image); auto imgGfxAlloc = image->getGraphicsAllocation(device->getRootDeviceIndex()); ASSERT_NE(nullptr, imgGfxAlloc); EXPECT_EQ(MemoryPool::LocalMemory, imgGfxAlloc->getMemoryPool()); EXPECT_LE(imageDesc.image_width * surfaceFormat->surfaceFormat.ImageElementSizeInBytes, imgGfxAlloc->getUnderlyingBufferSize()); EXPECT_EQ(AllocationType::IMAGE, imgGfxAlloc->getAllocationType()); EXPECT_EQ(0u, imgGfxAlloc->getDefaultGmm()->resourceParams.Flags.Info.NonLocalOnly); EXPECT_LT(GmmHelper::canonize(mockMemoryManager->getGfxPartition(imgGfxAlloc->getRootDeviceIndex())->getHeapBase(HeapIndex::HEAP_STANDARD64KB)), imgGfxAlloc->getGpuAddress()); EXPECT_GT(GmmHelper::canonize(mockMemoryManager->getGfxPartition(imgGfxAlloc->getRootDeviceIndex())->getHeapLimit(HeapIndex::HEAP_STANDARD64KB)), imgGfxAlloc->getGpuAddress()); EXPECT_EQ(0llu, imgGfxAlloc->getGpuBaseAddress()); } compute-runtime-22.14.22890/opencl/test/unit_test/mem_obj/destructor_callback_tests.cpp000066400000000000000000000062351422164147700312050ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/fixtures/memory_management_fixture.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "gtest/gtest.h" using namespace NEO; class DestructorCallbackFixture : public MemoryManagementFixture { public: DestructorCallbackFixture() { } void SetUp() override { MemoryManagementFixture::SetUp(); BufferDefaults::context = new MockContext; } void TearDown() override { delete BufferDefaults::context; platformsImpl->clear(); MemoryManagementFixture::TearDown(); } protected: cl_int retVal = CL_SUCCESS; }; typedef Test DestructorCallbackTest; static std::vector calls(32); void CL_CALLBACK callBack1(cl_mem memObj, void *userData) { calls.push_back(1); } void CL_CALLBACK callBack2(cl_mem memObj, void *userData) { calls.push_back(2); } void CL_CALLBACK callBack3(cl_mem memObj, void *userData) { calls.push_back(3); } TEST_F(DestructorCallbackTest, WhenSettingDestructorCallbackThenCallOrderIsPreserved) { auto buffer = BufferHelper>::create(); auto address = buffer->getCpuAddress(); EXPECT_NE(nullptr, address); calls.clear(); retVal = buffer->setDestructorCallback(callBack1, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); retVal = buffer->setDestructorCallback(callBack2, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); retVal = buffer->setDestructorCallback(callBack3, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); delete buffer; ASSERT_EQ(3u, calls.size()); EXPECT_EQ(3, calls[0]); EXPECT_EQ(2, calls[1]); EXPECT_EQ(1, calls[2]); calls.clear(); } TEST_F(DestructorCallbackTest, GivenInjectedFailureWhenSettingDestructorCallbackThenFailingAllocationIsNotRegistered) { std::shared_ptr context(new MockContext); InjectedFunction method = [this, context](size_t failureIndex) { char hostPtr[42]; auto buffer = Buffer::create( context.get(), CL_MEM_USE_HOST_PTR, sizeof(hostPtr), hostPtr, retVal); // if failures are injected into Buffer::create, we ignore them // we are only interested in setDestructorCallback if (retVal == CL_SUCCESS && buffer != nullptr) { auto address = buffer->getCpuAddress(); EXPECT_NE(nullptr, address); calls.clear(); retVal = buffer->setDestructorCallback(callBack1, nullptr); if (MemoryManagement::nonfailingAllocation == failureIndex) { EXPECT_EQ(CL_SUCCESS, retVal); } else { EXPECT_EQ(CL_OUT_OF_HOST_MEMORY, retVal) << "for allocation " << failureIndex; } delete buffer; if (MemoryManagement::nonfailingAllocation == failureIndex) { EXPECT_EQ(1u, calls.size()); } else { EXPECT_EQ(0u, calls.size()); } } }; injectFailures(method); } compute-runtime-22.14.22890/opencl/test/unit_test/mem_obj/get_mem_object_info_subbuffer_tests.cpp000066400000000000000000000106071422164147700332120ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/ptr_math.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" using namespace NEO; struct GetMemObjectSubBufferInfo : public ::testing::Test { GetMemObjectSubBufferInfo() { } void SetUp() override { bufferStorage = alignedMalloc(4096, MemoryConstants::preferredAlignment); region.origin = 4; region.size = 12; } void TearDown() override { delete subBuffer; delete buffer; alignedFree(bufferStorage); } void createBuffer(cl_mem_flags flags = CL_MEM_READ_WRITE) { auto retVal = CL_INVALID_VALUE; buffer = Buffer::create(&context, flags, bufferSize, nullptr, retVal); ASSERT_NE(nullptr, buffer); } void createSubBuffer(cl_mem_flags flags = CL_MEM_READ_WRITE) { cl_int retVal; subBuffer = buffer->createSubBuffer(flags, 0, ®ion, retVal); ASSERT_NE(nullptr, subBuffer); } void createHostPtrBuffer(cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR) { auto retVal = CL_INVALID_VALUE; buffer = Buffer::create(&context, flags, bufferSize, bufferStorage, retVal); ASSERT_NE(nullptr, buffer); } MockContext context; Buffer *buffer = nullptr; Buffer *subBuffer = nullptr; void *bufferStorage; static const size_t bufferSize = 256; cl_buffer_region region; cl_int retVal; size_t sizeReturned = 0; }; TEST_F(GetMemObjectSubBufferInfo, GivenMemAssociatedMemobjectWhenGettingMembojectInfoThenCorrectValueIsReturned) { createBuffer(); createSubBuffer(); cl_mem object = nullptr; retVal = subBuffer->getMemObjectInfo(CL_MEM_ASSOCIATED_MEMOBJECT, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(object), sizeReturned); retVal = subBuffer->getMemObjectInfo(CL_MEM_ASSOCIATED_MEMOBJECT, sizeof(object), &object, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); cl_mem clBuffer = (cl_mem)buffer; EXPECT_EQ(clBuffer, object); } TEST_F(GetMemObjectSubBufferInfo, GivenMemOffsetWhenGettingMembojectInfoThenCorrectValueIsReturned) { createBuffer(); createSubBuffer(); size_t offset = 0; retVal = subBuffer->getMemObjectInfo(CL_MEM_OFFSET, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(offset), sizeReturned); retVal = subBuffer->getMemObjectInfo(CL_MEM_OFFSET, sizeof(offset), &offset, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(region.origin, offset); } TEST_F(GetMemObjectSubBufferInfo, GivenMemFlagsWhenGettingMembojectInfoThenCorrectValueIsReturned) { createBuffer(); createSubBuffer(); cl_mem_flags flags = 0; retVal = subBuffer->getMemObjectInfo(CL_MEM_FLAGS, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(flags), sizeReturned); retVal = subBuffer->getMemObjectInfo(CL_MEM_FLAGS, sizeof(flags), &flags, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(static_cast(CL_MEM_READ_WRITE), flags); } TEST_F(GetMemObjectSubBufferInfo, GivenMemAssociatedMemobjectAndReadOnlyBufferWhenGettingMembojectInfoThenCorrectValueIsReturned) { createBuffer(CL_MEM_READ_ONLY); createSubBuffer(0); cl_mem_flags flags = 0; retVal = subBuffer->getMemObjectInfo(CL_MEM_FLAGS, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(flags), sizeReturned); retVal = subBuffer->getMemObjectInfo( CL_MEM_FLAGS, sizeof(flags), &flags, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(static_cast(0), flags); } TEST_F(GetMemObjectSubBufferInfo, GivenMemHostPtrWhenGettingMembojectInfoThenCorrectValueIsReturned) { createHostPtrBuffer(); createSubBuffer(); void *hostPtr = 0; retVal = subBuffer->getMemObjectInfo(CL_MEM_HOST_PTR, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(hostPtr), sizeReturned); retVal = subBuffer->getMemObjectInfo(CL_MEM_HOST_PTR, sizeof(hostPtr), &hostPtr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); auto expected = ptrOffset(this->bufferStorage, region.origin); EXPECT_EQ(expected, hostPtr); } compute-runtime-22.14.22890/opencl/test/unit_test/mem_obj/get_mem_object_info_tests.cpp000066400000000000000000000313571422164147700311540ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/ptr_math.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_gmm.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" #include using namespace NEO; class GetMemObjectInfo : public ::testing::Test, public PlatformFixture, public ClDeviceFixture { using ClDeviceFixture::SetUp; using PlatformFixture::SetUp; public: void SetUp() override { PlatformFixture::SetUp(); ClDeviceFixture::SetUp(); BufferDefaults::context = new MockContext; } void TearDown() override { delete BufferDefaults::context; ClDeviceFixture::TearDown(); PlatformFixture::TearDown(); } }; TEST_F(GetMemObjectInfo, GivenInvalidParamsWhenGettingMemObjectInfoThenInvalidValueErrorIsReturned) { auto buffer = std::unique_ptr(BufferHelper<>::create()); size_t sizeReturned = 0; auto retVal = buffer->getMemObjectInfo( 0, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(GetMemObjectInfo, GivenInvalidParametersWhenGettingMemObjectInfoThenValueSizeRetIsNotUpdated) { auto buffer = std::unique_ptr(BufferHelper<>::create()); size_t sizeReturned = 0x1234; auto retVal = buffer->getMemObjectInfo( 0, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(0x1234u, sizeReturned); } TEST_F(GetMemObjectInfo, GivenMemTypeWhenGettingMemObjectInfoThenCorrectValueIsReturned) { auto buffer = std::unique_ptr(BufferHelper<>::create()); size_t sizeReturned = 0; auto retVal = buffer->getMemObjectInfo( CL_MEM_TYPE, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_mem_object_type), sizeReturned); cl_mem_object_type object_type = 0; retVal = buffer->getMemObjectInfo( CL_MEM_TYPE, sizeof(cl_mem_object_type), &object_type, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(static_cast(CL_MEM_OBJECT_BUFFER), object_type); } TEST_F(GetMemObjectInfo, GivenMemFlagsWhenGettingMemObjectInfoThenCorrectValueIsReturned) { auto buffer = std::unique_ptr(BufferHelper<>::create()); size_t sizeReturned = 0; cl_mem_flags mem_flags = 0; auto retVal = buffer->getMemObjectInfo( CL_MEM_FLAGS, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(mem_flags), sizeReturned); retVal = buffer->getMemObjectInfo( CL_MEM_FLAGS, sizeof(mem_flags), &mem_flags, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(static_cast(CL_MEM_READ_WRITE), mem_flags); } TEST_F(GetMemObjectInfo, GivenMemSizeWhenGettingMemObjectInfoThenCorrectValueIsReturned) { auto buffer = std::unique_ptr(BufferHelper<>::create()); size_t sizeReturned = 0; size_t mem_size = 0; auto retVal = buffer->getMemObjectInfo( CL_MEM_SIZE, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(mem_size), sizeReturned); retVal = buffer->getMemObjectInfo( CL_MEM_SIZE, sizeof(mem_size), &mem_size, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(buffer->getSize(), mem_size); } TEST_F(GetMemObjectInfo, GivenMemHostPtrWhenGettingMemObjectInfoThenCorrectValueIsReturned) { auto buffer = std::unique_ptr(BufferHelper<>::create()); size_t sizeReturned = 0; void *host_ptr = nullptr; auto retVal = buffer->getMemObjectInfo( CL_MEM_HOST_PTR, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(host_ptr), sizeReturned); retVal = buffer->getMemObjectInfo( CL_MEM_HOST_PTR, sizeof(host_ptr), &host_ptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(buffer->getHostPtr(), host_ptr); } TEST_F(GetMemObjectInfo, GivenMemContextWhenGettingMemObjectInfoThenCorrectValueIsReturned) { MockContext context; auto buffer = std::unique_ptr(BufferHelper<>::create(&context)); size_t sizeReturned = 0; cl_context contextReturned = nullptr; auto retVal = buffer->getMemObjectInfo( CL_MEM_CONTEXT, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(contextReturned), sizeReturned); retVal = buffer->getMemObjectInfo( CL_MEM_CONTEXT, sizeof(contextReturned), &contextReturned, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(&context, contextReturned); } TEST_F(GetMemObjectInfo, GivenMemUsesSvmPointerWhenGettingMemObjectInfoThenCorrectValueIsReturned) { auto buffer = std::unique_ptr(BufferHelper>::create()); size_t sizeReturned = 0; cl_bool usesSVMPointer = false; auto retVal = buffer->getMemObjectInfo( CL_MEM_USES_SVM_POINTER, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(usesSVMPointer), sizeReturned); retVal = buffer->getMemObjectInfo( CL_MEM_USES_SVM_POINTER, sizeof(usesSVMPointer), &usesSVMPointer, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(static_cast(CL_FALSE), usesSVMPointer); } TEST_F(GetMemObjectInfo, GivenBufferWithMemUseHostPtrAndMemTypeWhenGettingMemObjectInfoThenCorrectValueIsReturned) { const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { auto hostPtr = clSVMAlloc(BufferDefaults::context, CL_MEM_READ_WRITE, BufferUseHostPtr<>::sizeInBytes, 64); ASSERT_NE(nullptr, hostPtr); cl_int retVal; auto buffer = Buffer::create( BufferDefaults::context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, BufferUseHostPtr<>::sizeInBytes, hostPtr, retVal); size_t sizeReturned = 0; cl_bool usesSVMPointer = false; retVal = buffer->getMemObjectInfo( CL_MEM_USES_SVM_POINTER, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(usesSVMPointer), sizeReturned); retVal = buffer->getMemObjectInfo( CL_MEM_USES_SVM_POINTER, sizeof(usesSVMPointer), &usesSVMPointer, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(static_cast(CL_TRUE), usesSVMPointer); delete buffer; clSVMFree(BufferDefaults::context, hostPtr); } } TEST_F(GetMemObjectInfo, GivenMemOffsetWhenGettingMemObjectInfoThenCorrectValueIsReturned) { auto buffer = std::unique_ptr(BufferHelper<>::create()); size_t sizeReturned = 0; size_t offset = false; auto retVal = buffer->getMemObjectInfo( CL_MEM_OFFSET, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(offset), sizeReturned); retVal = buffer->getMemObjectInfo( CL_MEM_OFFSET, sizeof(offset), &offset, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, offset); } TEST_F(GetMemObjectInfo, GivenMemAssociatedMemobjectWhenGettingMemObjectInfoThenCorrectValueIsReturned) { auto buffer = std::unique_ptr(BufferHelper<>::create()); size_t sizeReturned = 0; cl_mem object = nullptr; auto retVal = buffer->getMemObjectInfo( CL_MEM_ASSOCIATED_MEMOBJECT, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(object), sizeReturned); retVal = buffer->getMemObjectInfo( CL_MEM_ASSOCIATED_MEMOBJECT, sizeof(object), &object, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, object); } TEST_F(GetMemObjectInfo, GivenMemMapCountWhenGettingMemObjectInfoThenCorrectValueIsReturned) { auto buffer = std::unique_ptr(BufferHelper<>::create()); size_t sizeReturned = 0; cl_uint mapCount = static_cast(-1); auto retVal = buffer->getMemObjectInfo( CL_MEM_MAP_COUNT, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(mapCount), sizeReturned); retVal = buffer->getMemObjectInfo( CL_MEM_MAP_COUNT, sizeof(mapCount), &mapCount, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(mapCount), sizeReturned); } TEST_F(GetMemObjectInfo, GivenMemReferenceCountWhenGettingMemObjectInfoThenCorrectValueIsReturned) { auto buffer = std::unique_ptr(BufferHelper<>::create()); size_t sizeReturned = 0; cl_uint refCount = static_cast(-1); auto retVal = buffer->getMemObjectInfo( CL_MEM_REFERENCE_COUNT, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(refCount), sizeReturned); retVal = buffer->getMemObjectInfo( CL_MEM_REFERENCE_COUNT, sizeof(refCount), &refCount, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(refCount), sizeReturned); } TEST_F(GetMemObjectInfo, GivenValidBufferWhenGettingCompressionOfMemObjectThenCorrectValueIsReturned) { auto buffer = std::unique_ptr(BufferHelper<>::create()); auto graphicsAllocation = buffer->getMultiGraphicsAllocation().getDefaultGraphicsAllocation(); size_t sizeReturned = 0; cl_bool usesCompression{}; cl_int retVal{}; retVal = buffer->getMemObjectInfo( CL_MEM_USES_COMPRESSION_INTEL, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(sizeof(cl_bool), sizeReturned); MockBuffer::setAllocationType(graphicsAllocation, pDevice->getRootDeviceEnvironment().getGmmClientContext(), true); retVal = buffer->getMemObjectInfo( CL_MEM_USES_COMPRESSION_INTEL, sizeReturned, &usesCompression, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(cl_bool{CL_TRUE}, usesCompression); MockBuffer::setAllocationType(graphicsAllocation, pDevice->getRootDeviceEnvironment().getGmmClientContext(), false); retVal = buffer->getMemObjectInfo( CL_MEM_USES_COMPRESSION_INTEL, sizeReturned, &usesCompression, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(cl_bool{CL_FALSE}, usesCompression); } class GetMemObjectInfoLocalMemory : public GetMemObjectInfo { using GetMemObjectInfo::SetUp; public: void SetUp() override { dbgRestore = std::make_unique(); DebugManager.flags.EnableLocalMemory.set(1); GetMemObjectInfo::SetUp(); delete BufferDefaults::context; BufferDefaults::context = new MockContext(pClDevice, true); } std::unique_ptr dbgRestore; }; TEST_F(GetMemObjectInfoLocalMemory, givenLocalMemoryEnabledWhenNoZeroCopySvmAllocationUsedThenBufferAllocationInheritsZeroCopyFlag) { const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { auto hostPtr = clSVMAlloc(BufferDefaults::context, CL_MEM_READ_WRITE, BufferUseHostPtr<>::sizeInBytes, 64); ASSERT_NE(nullptr, hostPtr); cl_int retVal; auto buffer = Buffer::create( BufferDefaults::context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, BufferUseHostPtr<>::sizeInBytes, hostPtr, retVal); size_t sizeReturned = 0; cl_bool usesSVMPointer = false; retVal = buffer->getMemObjectInfo( CL_MEM_USES_SVM_POINTER, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(usesSVMPointer), sizeReturned); retVal = buffer->getMemObjectInfo( CL_MEM_USES_SVM_POINTER, sizeof(usesSVMPointer), &usesSVMPointer, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(static_cast(CL_TRUE), usesSVMPointer); EXPECT_TRUE(buffer->isMemObjWithHostPtrSVM()); EXPECT_FALSE(buffer->isMemObjZeroCopy()); delete buffer; clSVMFree(BufferDefaults::context, hostPtr); } } compute-runtime-22.14.22890/opencl/test/unit_test/mem_obj/image1d_tests.cpp000066400000000000000000000110631422164147700264750ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" using namespace NEO; static const unsigned int testImageDimensions = 32; class CreateImage1DTest : public ClDeviceFixture, public testing::TestWithParam { public: CreateImage1DTest() { } protected: void SetUp() override { ClDeviceFixture::SetUp(); types = GetParam(); // clang-format off imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_RGBA; imageDesc.image_type = types; imageDesc.image_width = testImageDimensions; imageDesc.image_height = 0; imageDesc.image_depth = 0; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; // clang-format on if (types == CL_MEM_OBJECT_IMAGE1D_ARRAY) { imageDesc.image_array_size = 10; } context = new MockContext(pClDevice); if (types == CL_MEM_OBJECT_IMAGE1D_BUFFER) { imageDesc.mem_object = clCreateBuffer(context, CL_MEM_ALLOC_HOST_PTR, testImageDimensions, nullptr, nullptr); } } void TearDown() override { if (types == CL_MEM_OBJECT_IMAGE1D_BUFFER) { clReleaseMemObject(imageDesc.mem_object); } delete context; ClDeviceFixture::TearDown(); } cl_image_format imageFormat; cl_image_desc imageDesc; cl_int retVal = CL_SUCCESS; MockContext *context; cl_mem_object_type types = 0; }; typedef CreateImage1DTest CreateImage1DType; HWTEST_P(CreateImage1DType, GivenValidTypeWhenCreatingImageThenImageParamsAreCorrect) { cl_mem_flags flags = CL_MEM_READ_WRITE; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, pClDevice->getHardwareInfo().capabilityTable.supportsOcl21Features); auto image = Image::create( context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, pDevice), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, image); auto imgDesc = image->getImageDesc(); EXPECT_NE(0u, imgDesc.image_width); EXPECT_EQ(0u, imgDesc.image_height); EXPECT_EQ(0u, imgDesc.image_depth); EXPECT_NE(0u, imgDesc.image_row_pitch); EXPECT_GE(imgDesc.image_slice_pitch, imgDesc.image_row_pitch); size_t ImageInfoHeight = 0; retVal = clGetImageInfo(image, CL_IMAGE_HEIGHT, sizeof(size_t), &ImageInfoHeight, NULL); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(0u, ImageInfoHeight); if ((types == CL_MEM_OBJECT_IMAGE1D) || (types == CL_MEM_OBJECT_IMAGE1D_BUFFER)) { EXPECT_EQ(0u, imgDesc.image_array_size); } else if (types == CL_MEM_OBJECT_IMAGE1D_ARRAY) { EXPECT_NE(0u, imgDesc.image_array_size); } else { ASSERT_TRUE(false); } EXPECT_EQ(image->getCubeFaceIndex(), static_cast(__GMM_NO_CUBE_MAP)); ASSERT_EQ(true, image->isMemObjZeroCopy()); EXPECT_FALSE(image->isImageFromImage()); auto address = image->getCpuAddress(); EXPECT_NE(nullptr, address); if (types == CL_MEM_OBJECT_IMAGE1D_BUFFER) { Buffer *inputBuffer = castToObject(imageDesc.buffer); EXPECT_NE(nullptr, inputBuffer->getCpuAddress()); EXPECT_EQ(inputBuffer->getCpuAddress(), image->getCpuAddress()); EXPECT_FALSE(image->getIsObjectRedescribed()); EXPECT_GE(2, inputBuffer->getRefInternalCount()); EXPECT_TRUE(image->isImageFromBuffer()); } else { EXPECT_FALSE(image->isImageFromBuffer()); } typedef typename FamilyType::RENDER_SURFACE_STATE SURFACE_STATE; auto imageHw = static_cast *>(image); EXPECT_EQ(SURFACE_STATE::SURFACE_TYPE_SURFTYPE_1D, imageHw->surfaceType); delete image; } static cl_mem_object_type Image1DTypes[] = { CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE1D_BUFFER, CL_MEM_OBJECT_IMAGE1D_ARRAY}; INSTANTIATE_TEST_CASE_P( CreateImage1DTestCreate, CreateImage1DType, testing::ValuesIn(Image1DTypes)); compute-runtime-22.14.22890/opencl/test/unit_test/mem_obj/image2d_from_buffer_tests.cpp000066400000000000000000000571371422164147700310660ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/compiler_interface/oclc_extensions.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/hw_helper.h" #include "shared/test/common/mocks/mock_gmm.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/cl_device/cl_device_info_map.h" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/helpers/raii_hw_helper.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" using namespace NEO; namespace NEO { extern HwHelper *hwHelperFactory[IGFX_MAX_CORE]; } // Tests for cl_khr_image2d_from_buffer class Image2dFromBufferTest : public ::testing::Test { public: Image2dFromBufferTest() {} protected: void SetUp() override { imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_RGBA; imageDesc.image_array_size = 0; imageDesc.image_depth = 0; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_height = 128; imageDesc.image_width = 256; imageDesc.num_mip_levels = 0; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_samples = 0; size = 128 * 256 * 4; hostPtr = alignedMalloc(size, 16); ASSERT_NE(nullptr, hostPtr); imageDesc.mem_object = clCreateBuffer(&context, CL_MEM_USE_HOST_PTR, size, hostPtr, &retVal); ASSERT_NE(nullptr, imageDesc.mem_object); } void TearDown() override { clReleaseMemObject(imageDesc.mem_object); alignedFree(hostPtr); } Image *createImage() { cl_mem_flags flags = CL_MEM_READ_ONLY; auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); return Image::create(&context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, NULL, retVal); } cl_image_format imageFormat; cl_image_desc imageDesc; cl_int retVal = CL_SUCCESS; MockContext context; void *hostPtr; size_t size; }; TEST_F(Image2dFromBufferTest, WhenCreatingImage2dFromBufferThenImagePropertiesAreCorrect) { auto buffer = castToObject(imageDesc.mem_object); ASSERT_NE(nullptr, buffer); EXPECT_EQ(1, buffer->getRefInternalCount()); auto imageFromBuffer = createImage(); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2, buffer->getRefInternalCount()); EXPECT_NE(nullptr, imageFromBuffer); EXPECT_FALSE(imageFromBuffer->isTiledAllocation()); EXPECT_EQ(imageFromBuffer->getCubeFaceIndex(), static_cast(__GMM_NO_CUBE_MAP)); delete imageFromBuffer; EXPECT_EQ(1, buffer->getRefInternalCount()); } TEST_F(Image2dFromBufferTest, givenBufferWhenCreateImage2dArrayFromBufferThenImageDescriptorIsInvalid) { imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY; cl_mem_flags flags = CL_MEM_READ_ONLY; auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); retVal = Image::validate(&context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), surfaceFormat, &imageDesc, NULL); EXPECT_EQ(CL_INVALID_IMAGE_DESCRIPTOR, retVal); } TEST_F(Image2dFromBufferTest, WhenCreatingImageThenRowPitchIsCorrect) { auto imageFromBuffer = createImage(); ASSERT_NE(nullptr, imageFromBuffer); EXPECT_EQ(1024u, imageFromBuffer->getImageDesc().image_row_pitch); delete imageFromBuffer; } TEST_F(Image2dFromBufferTest, givenInvalidRowPitchWhenCreateImage2dFromBufferThenReturnsError) { REQUIRE_IMAGES_OR_SKIP(&context); char ptr[10]; imageDesc.image_row_pitch = 255; cl_mem_flags flags = CL_MEM_READ_ONLY; auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); retVal = Image::validate(&context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), surfaceFormat, &imageDesc, ptr); EXPECT_EQ(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, retVal); } TEST_F(Image2dFromBufferTest, givenRowPitchThatIsGreaterThenComputedWhenImageIsCreatedThenPassedRowPitchIsUsedInsteadOfComputed) { auto computedSize = imageDesc.image_width * 4; auto passedSize = computedSize * 2; imageDesc.image_row_pitch = passedSize; auto imageFromBuffer = createImage(); EXPECT_EQ(passedSize, imageFromBuffer->getHostPtrRowPitch()); delete imageFromBuffer; } TEST_F(Image2dFromBufferTest, GivenInvalidHostPtrAlignmentWhenCreatingImageThenInvalidImageFormatDescriptorErrorIsReturned) { REQUIRE_IMAGES_OR_SKIP(&context); std::unique_ptr myHostPtr(malloc(size + 1), free); ASSERT_NE(nullptr, myHostPtr); void *nonAlignedHostPtr = myHostPtr.get(); if ((reinterpret_cast(myHostPtr.get()) % 4) == 0) { nonAlignedHostPtr = reinterpret_cast((reinterpret_cast(myHostPtr.get()) + 1)); } cl_mem origBuffer = imageDesc.mem_object; imageDesc.mem_object = clCreateBuffer(&context, CL_MEM_USE_HOST_PTR, size, nonAlignedHostPtr, &retVal); ASSERT_NE(nullptr, imageDesc.mem_object); cl_mem_flags flags = CL_MEM_READ_ONLY; auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); retVal = Image::validate(&context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), surfaceFormat, &imageDesc, NULL); EXPECT_EQ(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, retVal); clReleaseMemObject(imageDesc.mem_object); imageDesc.mem_object = origBuffer; } TEST_F(Image2dFromBufferTest, givenInvalidFlagsWhenValidateIsCalledThenReturnError) { REQUIRE_IMAGES_OR_SKIP(&context); cl_mem_flags flags[] = {CL_MEM_USE_HOST_PTR, CL_MEM_COPY_HOST_PTR}; for (auto flag : flags) { auto surfaceFormat = Image::getSurfaceFormatFromTable( flag, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); retVal = Image::validate(&context, ClMemoryPropertiesHelper::createMemoryProperties(flag, 0, 0, &context.getDevice(0)->getDevice()), surfaceFormat, &imageDesc, reinterpret_cast(0x12345)); EXPECT_EQ(CL_INVALID_VALUE, retVal); } } TEST_F(Image2dFromBufferTest, givenOneChannel8BitColorsNoRowPitchSpecifiedAndTooLargeImageWhenValidatingSurfaceFormatThenReturnError) { REQUIRE_IMAGES_OR_SKIP(&context); imageDesc.image_height = 1 + castToObject(imageDesc.mem_object)->getSize() / imageDesc.image_width; cl_mem_flags flags = CL_MEM_READ_ONLY; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); retVal = Image::validate(&context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), surfaceFormat, &imageDesc, NULL); EXPECT_EQ(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, retVal); } TEST_F(Image2dFromBufferTest, givenOneChannel16BitColorsNoRowPitchSpecifiedAndTooLargeImageWhenValidatingSurfaceFormatThenReturnError) { REQUIRE_IMAGES_OR_SKIP(&context); imageDesc.image_height = 1 + castToObject(imageDesc.mem_object)->getSize() / imageDesc.image_width / 2; cl_mem_flags flags = CL_MEM_READ_ONLY; imageFormat.image_channel_data_type = CL_UNORM_INT16; imageFormat.image_channel_order = CL_R; auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); retVal = Image::validate(&context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), surfaceFormat, &imageDesc, NULL); EXPECT_EQ(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, retVal); } TEST_F(Image2dFromBufferTest, givenFourChannel8BitColorsNoRowPitchSpecifiedAndTooLargeImageWhenValidatingSurfaceFormatThenReturnError) { REQUIRE_IMAGES_OR_SKIP(&context); imageDesc.image_height = 1 + castToObject(imageDesc.mem_object)->getSize() / imageDesc.image_width / 4; cl_mem_flags flags = CL_MEM_READ_ONLY; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_RGBA; const auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); retVal = Image::validate(&context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), surfaceFormat, &imageDesc, NULL); EXPECT_EQ(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, retVal); } TEST_F(Image2dFromBufferTest, givenFourChannel16BitColorsNoRowPitchSpecifiedAndTooLargeImageWhenValidatingSurfaceFormatThenReturnError) { REQUIRE_IMAGES_OR_SKIP(&context); imageDesc.image_height = 1 + castToObject(imageDesc.mem_object)->getSize() / imageDesc.image_width / 8; cl_mem_flags flags = CL_MEM_READ_ONLY; imageFormat.image_channel_data_type = CL_UNORM_INT16; imageFormat.image_channel_order = CL_RGBA; auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); retVal = Image::validate(&context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), surfaceFormat, &imageDesc, NULL); EXPECT_EQ(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, retVal); } TEST_F(Image2dFromBufferTest, givenFourChannel8BitColorsAndNotTooLargeRowPitchSpecifiedWhenValidatingSurfaceFormatThenDoNotReturnError) { REQUIRE_IMAGES_OR_SKIP(&context); imageDesc.image_height = castToObject(imageDesc.mem_object)->getSize() / imageDesc.image_width; imageDesc.image_row_pitch = imageDesc.image_width; cl_mem_flags flags = CL_MEM_READ_ONLY; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_RGBA; auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); retVal = Image::validate(&context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), surfaceFormat, &imageDesc, NULL); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(Image2dFromBufferTest, givenFourChannel8BitColorsAndTooLargeRowPitchSpecifiedWhenValidatingSurfaceFormatThenReturnError) { REQUIRE_IMAGES_OR_SKIP(&context); const auto pitchAlignment = &ClDeviceInfoTable::Map::getValue(*context.getDevice(0u)); imageDesc.image_height = castToObject(imageDesc.mem_object)->getSize() / imageDesc.image_width; imageDesc.image_row_pitch = imageDesc.image_width + *pitchAlignment; cl_mem_flags flags = CL_MEM_READ_ONLY; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_RGBA; auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); retVal = Image::validate(&context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), surfaceFormat, &imageDesc, NULL); EXPECT_EQ(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, retVal); } TEST_F(Image2dFromBufferTest, givenUnalignedImageWidthAndNoSpaceInBufferForAlignmentWhenValidatingSurfaceFormatThenReturnError) { REQUIRE_IMAGES_OR_SKIP(&context); static_cast(context.getDevice(0))->deviceInfo.imagePitchAlignment = 128; imageDesc.image_width = 64; imageDesc.image_height = castToObject(imageDesc.mem_object)->getSize() / imageDesc.image_width; cl_mem_flags flags = CL_MEM_READ_ONLY; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); retVal = Image::validate(&context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), surfaceFormat, &imageDesc, NULL); EXPECT_EQ(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, retVal); } TEST_F(Image2dFromBufferTest, GivenPlatformWhenGettingExtensionStringThenImage2dFromBufferExtensionIsCorrectlyReported) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto hwInfo = device->getHardwareInfo(); const auto &caps = device->getDeviceInfo(); std::string extensions = caps.deviceExtensions; size_t found = extensions.find("cl_khr_image2d_from_buffer"); if (hwInfo.capabilityTable.supportsImages) { EXPECT_NE(std::string::npos, found); } else { EXPECT_EQ(std::string::npos, found); } } TEST_F(Image2dFromBufferTest, WhenCreatingImageThenHostPtrIsCorrectlySet) { auto buffer = castToObject(imageDesc.mem_object); ASSERT_NE(nullptr, buffer); EXPECT_EQ(1, buffer->getRefInternalCount()); auto imageFromBuffer = createImage(); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(buffer->getHostPtr(), imageFromBuffer->getHostPtr()); EXPECT_EQ(true, imageFromBuffer->isMemObjZeroCopy()); delete imageFromBuffer; } TEST_F(Image2dFromBufferTest, givenImageFromBufferWhenItIsRedescribedThenItReturnsProperImageFromBufferValue) { std::unique_ptr imageFromBuffer(createImage()); EXPECT_TRUE(imageFromBuffer->isImageFromBuffer()); std::unique_ptr redescribedImage(imageFromBuffer->redescribe()); EXPECT_TRUE(redescribedImage->isImageFromBuffer()); std::unique_ptr redescribedfillImage(imageFromBuffer->redescribeFillImage()); EXPECT_TRUE(redescribedfillImage->isImageFromBuffer()); } TEST_F(Image2dFromBufferTest, givenMemoryManagerNotSupportingVirtualPaddingWhenImageIsCreatedThenPaddingIsNotApplied) { auto memoryManager = context.getMemoryManager(); memoryManager->setVirtualPaddingSupport(false); auto buffer = castToObject(imageDesc.mem_object); ASSERT_NE(nullptr, buffer); EXPECT_EQ(1, buffer->getRefInternalCount()); std::unique_ptr imageFromBuffer(createImage()); ASSERT_EQ(CL_SUCCESS, retVal); //graphics allocation for image and buffer is the same auto bufferGraphicsAllocation = buffer->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex()); auto imageGraphicsAllocation = imageFromBuffer->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex()); EXPECT_EQ(bufferGraphicsAllocation, imageGraphicsAllocation); } TEST_F(Image2dFromBufferTest, givenMemoryManagerSupportingVirtualPaddingWhenImageIsCreatedThatFitsInTheBufferThenPaddingIsNotApplied) { auto memoryManager = context.getMemoryManager(); memoryManager->setVirtualPaddingSupport(true); auto buffer = castToObject(imageDesc.mem_object); ASSERT_NE(nullptr, buffer); EXPECT_EQ(1, buffer->getRefInternalCount()); std::unique_ptr imageFromBuffer(createImage()); ASSERT_EQ(CL_SUCCESS, retVal); //graphics allocation for image and buffer is the same auto bufferGraphicsAllocation = buffer->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex()); auto imageGraphicsAllocation = imageFromBuffer->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex()); EXPECT_EQ(this->size, bufferGraphicsAllocation->getUnderlyingBufferSize()); auto imageDescriptor = Image::convertDescriptor(imageDesc); auto imgInfo = MockGmm::initImgInfo(imageDescriptor, 0, &imageFromBuffer->getSurfaceFormatInfo().surfaceFormat); auto queryGmm = MockGmm::queryImgParams(context.getDevice(0)->getGmmClientContext(), imgInfo, false); EXPECT_TRUE(queryGmm->gmmResourceInfo->getSizeAllocation() >= this->size); EXPECT_EQ(bufferGraphicsAllocation, imageGraphicsAllocation); } TEST_F(Image2dFromBufferTest, givenMemoryManagerSupportingVirtualPaddingWhenImageIsCreatedFromLocalMemoryBufferThenPaddingIsNotApplied) { auto memoryManager = context.getMemoryManager(); memoryManager->setVirtualPaddingSupport(true); auto buffer = castToObject(imageDesc.mem_object); uint64_t gpuAddress = 0x1234; auto cpuAddress = buffer->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex())->getUnderlyingBuffer(); buffer->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex())->setCpuPtrAndGpuAddress(0, gpuAddress); ASSERT_NE(nullptr, buffer); EXPECT_EQ(1, buffer->getRefInternalCount()); std::unique_ptr imageFromBuffer(createImage()); ASSERT_EQ(CL_SUCCESS, retVal); //graphics allocation for image and buffer is the same auto bufferGraphicsAllocation = buffer->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex()); auto imageGraphicsAllocation = imageFromBuffer->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex()); EXPECT_EQ(this->size, bufferGraphicsAllocation->getUnderlyingBufferSize()); auto imageDescriptor = Image::convertDescriptor(imageDesc); auto imgInfo = MockGmm::initImgInfo(imageDescriptor, 0, &imageFromBuffer->getSurfaceFormatInfo().surfaceFormat); auto queryGmm = MockGmm::queryImgParams(context.getDevice(0)->getGmmClientContext(), imgInfo, false); EXPECT_TRUE(queryGmm->gmmResourceInfo->getSizeAllocation() >= this->size); EXPECT_EQ(bufferGraphicsAllocation, imageGraphicsAllocation); buffer->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex())->setCpuPtrAndGpuAddress(cpuAddress, gpuAddress); } TEST_F(Image2dFromBufferTest, givenMemoryManagerSupportingVirtualPaddingWhenImageIsCreatedThatDoesntFitInTheBufferThenPaddingIsApplied) { imageFormat.image_channel_data_type = CL_FLOAT; imageFormat.image_channel_order = CL_RGBA; imageDesc.image_width = 29; imageDesc.image_height = 29; imageDesc.image_row_pitch = 512; //application calcualted buffer size auto bufferSize = imageDesc.image_row_pitch * imageDesc.image_height; auto buffer2 = clCreateBuffer(&context, CL_MEM_READ_WRITE, bufferSize, nullptr, nullptr); auto storeMem = imageDesc.mem_object; imageDesc.mem_object = buffer2; auto memoryManager = context.getMemoryManager(); memoryManager->setVirtualPaddingSupport(true); auto buffer = castToObject(imageDesc.mem_object); std::unique_ptr imageFromBuffer(createImage()); ASSERT_EQ(CL_SUCCESS, retVal); //graphics allocation for image and buffer is the same auto bufferGraphicsAllocation = buffer->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex()); auto imageGraphicsAllocation = imageFromBuffer->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex()); EXPECT_EQ(bufferSize, bufferGraphicsAllocation->getUnderlyingBufferSize()); auto imageDescriptor = Image::convertDescriptor(imageDesc); auto imgInfo = MockGmm::initImgInfo(imageDescriptor, 0, &imageFromBuffer->getSurfaceFormatInfo().surfaceFormat); auto queryGmm = MockGmm::queryImgParams(context.getDevice(0)->getGmmClientContext(), imgInfo, false); EXPECT_GT(queryGmm->gmmResourceInfo->getSizeAllocation(), bufferSize); EXPECT_NE(bufferGraphicsAllocation, imageGraphicsAllocation); EXPECT_EQ(queryGmm->gmmResourceInfo->getSizeAllocation(), imageFromBuffer->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex())->getUnderlyingBufferSize()); EXPECT_EQ(bufferSize, imageFromBuffer->getSize()); imageDesc.mem_object = storeMem; clReleaseMemObject(buffer2); } TEST_F(Image2dFromBufferTest, givenMemoryManagerSupportingVirtualPaddingWhen1DImageFromBufferImageIsCreatedThenVirtualPaddingIsNotApplied) { imageFormat.image_channel_data_type = CL_FLOAT; imageFormat.image_channel_order = CL_RGBA; imageDesc.image_width = 1024; imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; //application calcualted buffer size auto bufferSize = imageDesc.image_width * 16; auto buffer2 = clCreateBuffer(&context, CL_MEM_READ_WRITE, bufferSize, nullptr, nullptr); auto storeMem = imageDesc.mem_object; imageDesc.mem_object = buffer2; auto memoryManager = context.getMemoryManager(); memoryManager->setVirtualPaddingSupport(true); auto buffer = castToObject(imageDesc.mem_object); std::unique_ptr imageFromBuffer(createImage()); ASSERT_EQ(CL_SUCCESS, retVal); //graphics allocation match auto bufferGraphicsAllocation = buffer->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex()); auto imageGraphicsAllocation = imageFromBuffer->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex()); EXPECT_EQ(bufferGraphicsAllocation, imageGraphicsAllocation); imageDesc.mem_object = storeMem; clReleaseMemObject(buffer2); } TEST_F(Image2dFromBufferTest, givenMemoryManagerSupporting1DImageFromBufferWhenNoBufferThenCreatesImage) { imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; auto storeMem = imageDesc.mem_object; imageDesc.mem_object = nullptr; std::unique_ptr imageFromBuffer(createImage()); EXPECT_EQ(CL_SUCCESS, retVal); imageDesc.mem_object = storeMem; } TEST_F(Image2dFromBufferTest, givenBufferWhenImageFromBufferThenIsImageFromBufferSetAndAllocationTypeIsBuffer) { cl_int errCode = 0; auto buffer = Buffer::create(&context, 0, 1, nullptr, errCode); imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; auto memObj = imageDesc.mem_object; imageDesc.mem_object = buffer; std::unique_ptr imageFromBuffer(createImage()); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(imageFromBuffer->isImageFromBuffer()); auto graphicsAllocation = imageFromBuffer->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex()); EXPECT_TRUE(AllocationType::BUFFER_HOST_MEMORY == graphicsAllocation->getAllocationType()); buffer->release(); imageDesc.mem_object = memObj; } HWTEST_F(Image2dFromBufferTest, givenBufferWhenImageFromBufferThenIsImageFromBufferSetAndAllocationTypeIsBufferNullptr) { class MockHwHelperHw : public HwHelperHw { public: bool checkResourceCompatibility(GraphicsAllocation &graphicsAllocation) override { return false; } }; auto raiiFactory = RAIIHwHelperFactory(context.getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily); cl_int errCode = CL_SUCCESS; auto buffer = Buffer::create(&context, 0, 1, nullptr, errCode); imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; auto memObj = imageDesc.mem_object; imageDesc.mem_object = buffer; Image *imageFromBuffer = createImage(); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); EXPECT_EQ(imageFromBuffer, nullptr); buffer->release(); imageDesc.mem_object = memObj; } compute-runtime-22.14.22890/opencl/test/unit_test/mem_obj/image2d_tests.cpp000066400000000000000000000075171422164147700265070ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" using namespace NEO; static const unsigned int testImageDimensions = 32; class CreateImage2DTest : public ClDeviceFixture, public testing::TestWithParam { public: CreateImage2DTest() { } protected: void SetUp() override { ClDeviceFixture::SetUp(); types = GetParam(); // clang-format off imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_RGBA; imageDesc.image_type = types; imageDesc.image_width = testImageDimensions; imageDesc.image_height = testImageDimensions; imageDesc.image_depth = 0; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; // clang-format on if (types == CL_MEM_OBJECT_IMAGE2D_ARRAY) { imageDesc.image_array_size = 10; } context = new MockContext(pClDevice); } void TearDown() override { delete context; ClDeviceFixture::TearDown(); } Image *createImageWithFlags(cl_mem_flags flags) { auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); return Image::create(context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal); } cl_image_format imageFormat; cl_image_desc imageDesc; cl_int retVal = CL_SUCCESS; MockContext *context; cl_mem_object_type types = 0; }; typedef CreateImage2DTest CreateImage2DType; HWTEST_P(CreateImage2DType, GivenValidTypeWhenCreatingImageThenImageCreatedWithCorrectParams) { auto image = createImageWithFlags(CL_MEM_READ_WRITE); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, image); auto imgDesc = image->getImageDesc(); EXPECT_NE(0u, imgDesc.image_width); EXPECT_NE(0u, imgDesc.image_height); EXPECT_EQ(0u, imgDesc.image_depth); EXPECT_NE(0u, imgDesc.image_row_pitch); EXPECT_GE(imgDesc.image_slice_pitch, imgDesc.image_row_pitch); if (types == CL_MEM_OBJECT_IMAGE2D) { EXPECT_EQ(0u, imgDesc.image_array_size); } else if (types == CL_MEM_OBJECT_IMAGE2D_ARRAY) { EXPECT_NE(0u, imgDesc.image_array_size); } else { ASSERT_TRUE(false); } EXPECT_EQ(image->getCubeFaceIndex(), static_cast(__GMM_NO_CUBE_MAP)); EXPECT_EQ(!UnitTestHelper::tiledImagesSupported, image->isMemObjZeroCopy()); typedef typename FamilyType::RENDER_SURFACE_STATE SURFACE_STATE; auto imageHw = static_cast *>(image); EXPECT_EQ(SURFACE_STATE::SURFACE_TYPE_SURFTYPE_2D, imageHw->surfaceType); SurfaceOffsets surfaceOffsets; image->getSurfaceOffsets(surfaceOffsets); EXPECT_EQ(0u, surfaceOffsets.offset); EXPECT_EQ(0u, surfaceOffsets.xOffset); EXPECT_EQ(0u, surfaceOffsets.yOffset); EXPECT_EQ(0u, surfaceOffsets.yOffsetForUVplane); delete image; } static cl_mem_object_type Image2DTypes[] = { CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE2D_ARRAY}; INSTANTIATE_TEST_CASE_P( CreateImage2DTestCreate, CreateImage2DType, testing::ValuesIn(Image2DTypes)); compute-runtime-22.14.22890/opencl/test/unit_test/mem_obj/image3d_tests.cpp000066400000000000000000000122751422164147700265050ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/aligned_memory.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/mocks/mock_gmm.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" using namespace NEO; static const unsigned int testImageDimensions = 31; class CreateImage3DTest : public ClDeviceFixture, public testing::TestWithParam { public: CreateImage3DTest() {} protected: void SetUp() override { ClDeviceFixture::SetUp(); context = new MockContext(pClDevice); // clang-format off imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_RGBA; imageDesc.image_type = CL_MEM_OBJECT_IMAGE3D; imageDesc.image_width = testImageDimensions; imageDesc.image_height = testImageDimensions; imageDesc.image_depth = testImageDimensions; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; // clang-format on } void TearDown() override { delete context; ClDeviceFixture::TearDown(); } cl_image_format imageFormat; cl_image_desc imageDesc; cl_int retVal = CL_SUCCESS; MockContext *context; cl_mem_object_type types = 0; }; HWTEST_F(CreateImage3DTest, WhenCreatingImageThenPropertiesAreSetCorrectly) { cl_mem_flags flags = CL_MEM_READ_WRITE; auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto image = Image::create(context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, image); auto imgDesc = image->getImageDesc(); EXPECT_NE(0u, imgDesc.image_width); EXPECT_NE(0u, imgDesc.image_height); EXPECT_NE(0u, imgDesc.image_depth); EXPECT_NE(0u, imgDesc.image_slice_pitch); EXPECT_EQ(0u, imgDesc.image_array_size); EXPECT_NE(0u, imgDesc.image_row_pitch); EXPECT_EQ(image->getCubeFaceIndex(), static_cast(__GMM_NO_CUBE_MAP)); EXPECT_EQ(!UnitTestHelper::tiledImagesSupported, image->isMemObjZeroCopy()); typedef typename FamilyType::RENDER_SURFACE_STATE SURFACE_STATE; auto imageHw = static_cast *>(image); EXPECT_EQ(SURFACE_STATE::SURFACE_TYPE_SURFTYPE_3D, imageHw->surfaceType); delete image; } HWTEST_F(CreateImage3DTest, GivenTiledOrForcedLinearWhenCreatingImageThenPropertiesAreSetCorrectly) { bool defaultTiling = DebugManager.flags.ForceLinearImages.get(); imageDesc.image_height = 1; auto surfaceFormat = Image::getSurfaceFormatFromTable(0, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto imageDescriptor = Image::convertDescriptor(imageDesc); auto imgInfo = MockGmm::initImgInfo(imageDescriptor, 0, &surfaceFormat->surfaceFormat); MockGmm::queryImgParams(context->getDevice(0)->getGmmClientContext(), imgInfo, false); auto memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(0, 0, 0, &context->getDevice(0)->getDevice()); auto image = Image::create( context, memoryProperties, 0, 0, surfaceFormat, &imageDesc, nullptr, retVal); ASSERT_NE(nullptr, image); EXPECT_EQ(image->getSize(), imgInfo.size); EXPECT_EQ(image->getImageDesc().image_slice_pitch, imgInfo.slicePitch); EXPECT_GE(image->getImageDesc().image_slice_pitch, image->getImageDesc().image_row_pitch); EXPECT_EQ(image->getImageDesc().image_row_pitch, imgInfo.rowPitch); EXPECT_EQ(image->getQPitch(), imgInfo.qPitch); delete image; DebugManager.flags.ForceLinearImages.set(!defaultTiling); // query again surfaceFormat = Image::getSurfaceFormatFromTable(0, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); MockGmm::queryImgParams(context->getDevice(0)->getGmmClientContext(), imgInfo, false); image = Image::create( context, memoryProperties, 0, 0, surfaceFormat, &imageDesc, nullptr, retVal); ASSERT_NE(nullptr, image); EXPECT_EQ(image->getSize(), imgInfo.size); EXPECT_EQ(image->getImageDesc().image_slice_pitch, imgInfo.slicePitch); EXPECT_EQ(image->getImageDesc().image_row_pitch, imgInfo.rowPitch); EXPECT_GE(image->getImageDesc().image_slice_pitch, image->getImageDesc().image_row_pitch); EXPECT_EQ(image->getQPitch(), imgInfo.qPitch); delete image; DebugManager.flags.ForceLinearImages.set(defaultTiling); } compute-runtime-22.14.22890/opencl/test/unit_test/mem_obj/image_array_size_tests.cpp000066400000000000000000000214351422164147700305040ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/aligned_memory.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" using namespace NEO; static const unsigned int testImageDimensions = 17; class ImageArraySizeTest : public ClDeviceFixture, public testing::TestWithParam { public: ImageArraySizeTest() { } protected: void SetUp() override { ClDeviceFixture::SetUp(); types = GetParam(); // clang-format off imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_RGBA; imageDesc.image_type = types; imageDesc.image_width = testImageDimensions; imageDesc.image_height = testImageDimensions; imageDesc.image_depth = 0; imageDesc.image_array_size = 10; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; // clang-format on context = new MockContext(pClDevice); if (types == CL_MEM_OBJECT_IMAGE1D_BUFFER) { imageDesc.mem_object = clCreateBuffer(context, CL_MEM_ALLOC_HOST_PTR, testImageDimensions, nullptr, nullptr); } } void TearDown() override { if (types == CL_MEM_OBJECT_IMAGE1D_BUFFER) { clReleaseMemObject(imageDesc.mem_object); } delete context; ClDeviceFixture::TearDown(); } cl_image_format imageFormat; cl_image_desc imageDesc; cl_int retVal = CL_SUCCESS; MockContext *context; cl_mem_object_type types = 0; }; typedef ImageArraySizeTest CreateImageArraySize; HWTEST_P(CreateImageArraySize, GivenArrayTypeWhenCreatingImageThenImageCreatedWithCorrectParams) { cl_mem_flags flags = CL_MEM_READ_WRITE; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto image = Image::create( context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, image); if (types == CL_MEM_OBJECT_IMAGE1D_ARRAY) { EXPECT_TRUE(image->isMemObjZeroCopy()); auto address = image->getCpuAddress(); EXPECT_NE(nullptr, address); } else if (types == CL_MEM_OBJECT_IMAGE2D_ARRAY) { EXPECT_EQ(!UnitTestHelper::tiledImagesSupported, image->isMemObjZeroCopy()); } ASSERT_EQ(10u, image->getImageDesc().image_array_size); delete image; } static cl_mem_object_type ArrayImageTypes[] = { CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D_ARRAY}; INSTANTIATE_TEST_CASE_P( ImageArraySizeTestCreate, CreateImageArraySize, testing::ValuesIn(ArrayImageTypes)); typedef ImageArraySizeTest CreateImageNonArraySize; HWTEST_P(CreateImageNonArraySize, GivenNonArrayTypeWhenCreatingImageThenImageCreatedWithCorrectParams) { cl_mem_flags flags = CL_MEM_READ_WRITE; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto image = Image::create( context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, image); if (types == CL_MEM_OBJECT_IMAGE2D || types == CL_MEM_OBJECT_IMAGE3D) { EXPECT_EQ(!UnitTestHelper::tiledImagesSupported, image->isMemObjZeroCopy()); } else { EXPECT_TRUE(image->isMemObjZeroCopy()); auto address = image->getCpuAddress(); EXPECT_NE(nullptr, address); } ASSERT_EQ(0u, image->getImageDesc().image_array_size); delete image; } static cl_mem_object_type NonArrayImageTypes[] = { CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE1D_BUFFER, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE3D}; INSTANTIATE_TEST_CASE_P( ImageArraySizeTest_Create, CreateImageNonArraySize, testing::ValuesIn(NonArrayImageTypes)); typedef ImageArraySizeTest CreateImageSize; HWTEST_P(CreateImageSize, GivenImageTypeAndRegionWhenAskedForHostPtrSizeThenProperSizeIsBeingReturned) { size_t region[3] = {100, 200, 300}; auto rowPitch = 1000; auto slicePitch = 4000; auto pixelSize = 4; auto imageType = GetParam(); auto size = Image::calculateHostPtrSize(region, rowPitch, slicePitch, pixelSize, imageType); if ((imageType == CL_MEM_OBJECT_IMAGE1D) || (imageType == CL_MEM_OBJECT_IMAGE1D_BUFFER)) { EXPECT_EQ(region[0] * pixelSize, size); } else if (imageType == CL_MEM_OBJECT_IMAGE2D) { EXPECT_EQ((region[1] - 1) * rowPitch + region[0] * pixelSize, size); } else if (imageType == CL_MEM_OBJECT_IMAGE1D_ARRAY) { EXPECT_EQ((region[1] - 1) * slicePitch + region[0] * pixelSize, size); } else if ((imageType == CL_MEM_OBJECT_IMAGE3D) || (imageType == CL_MEM_OBJECT_IMAGE2D_ARRAY)) { EXPECT_EQ((region[2] - 1) * slicePitch + (region[1] - 1) * rowPitch + region[0] * pixelSize, size); } else { EXPECT_EQ(0u, size); } } typedef ImageArraySizeTest CreateImageOffset; HWTEST_P(CreateImageOffset, GivenImageTypeAndRegionWhenAskedForHostPtrOffsetThenProperOffsetIsBeingReturned) { size_t region[3] = {100, 1, 1}; size_t origin[3] = {0, 0, 0}; auto rowPitch = 1000; auto slicePitch = 0; auto pixelSize = 4; size_t imageOffset; auto imageType = GetParam(); switch (imageType) { case CL_MEM_OBJECT_IMAGE1D: case CL_MEM_OBJECT_IMAGE1D_BUFFER: Image::calculateHostPtrOffset(&imageOffset, origin, region, rowPitch, slicePitch, imageType, pixelSize); EXPECT_EQ(origin[0] * pixelSize, imageOffset); break; case CL_MEM_OBJECT_IMAGE2D: region[1] = 200; Image::calculateHostPtrOffset(&imageOffset, origin, region, rowPitch, slicePitch, imageType, pixelSize); EXPECT_EQ(origin[1] * rowPitch + origin[0] * pixelSize, imageOffset); break; case CL_MEM_OBJECT_IMAGE1D_ARRAY: slicePitch = 4000; Image::calculateHostPtrOffset(&imageOffset, origin, region, rowPitch, slicePitch, imageType, pixelSize); EXPECT_EQ(origin[1] * slicePitch + origin[0] * pixelSize, imageOffset); break; case CL_MEM_OBJECT_IMAGE3D: case CL_MEM_OBJECT_IMAGE2D_ARRAY: region[2] = 300; Image::calculateHostPtrOffset(&imageOffset, origin, region, rowPitch, slicePitch, imageType, pixelSize); EXPECT_EQ(origin[2] * slicePitch + origin[1] * rowPitch + origin[0] * pixelSize, imageOffset); break; case CL_MEM_OBJECT_BUFFER: Image::calculateHostPtrOffset(&imageOffset, origin, region, rowPitch, slicePitch, imageType, pixelSize); EXPECT_EQ(0u, imageOffset); break; } } typedef ImageArraySizeTest CheckImageType; TEST_P(CheckImageType, GivenImageTypeWhenImageTypeIsCheckedThenProperValueIsReturned) { auto imageType = GetParam(); switch (imageType) { case CL_MEM_OBJECT_IMAGE2D: EXPECT_TRUE(Image::isImage2d(imageType)); EXPECT_TRUE(Image::isImage2dOr2dArray(imageType)); break; case CL_MEM_OBJECT_IMAGE2D_ARRAY: EXPECT_FALSE(Image::isImage2d(imageType)); EXPECT_TRUE(Image::isImage2dOr2dArray(imageType)); break; default: EXPECT_FALSE(Image::isImage2d(imageType)); EXPECT_FALSE(Image::isImage2dOr2dArray(imageType)); break; } } static cl_mem_object_type AllImageTypes[] = { 0, //negative scenario CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE1D_BUFFER, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE3D, CL_MEM_OBJECT_IMAGE2D_ARRAY}; INSTANTIATE_TEST_CASE_P( ImageArraySizeTest_Create, CreateImageSize, testing::ValuesIn(AllImageTypes)); static cl_mem_object_type AllImageTypesWithBadOne[] = { 0, //negative scenario CL_MEM_OBJECT_BUFFER, CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE1D_BUFFER, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE3D, CL_MEM_OBJECT_IMAGE2D_ARRAY}; INSTANTIATE_TEST_CASE_P( ImageArraySizeTest_Create, CreateImageOffset, testing::ValuesIn(AllImageTypesWithBadOne)); compute-runtime-22.14.22890/opencl/test/unit_test/mem_obj/image_compression_fixture.h000066400000000000000000000036251422164147700306670ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/helpers/surface_formats.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_context.h" using namespace NEO; class ImageCompressionTests : public ::testing::Test { public: class MyMemoryManager : public MockMemoryManager { public: using MockMemoryManager::MockMemoryManager; GraphicsAllocation *allocateGraphicsMemoryForImage(const AllocationData &allocationData) override { mockMethodCalled = true; capturedPreferCompressed = allocationData.flags.preferCompressed; return OsAgnosticMemoryManager::allocateGraphicsMemoryForImage(allocationData); } bool mockMethodCalled = false; bool capturedPreferCompressed = false; }; void SetUp() override { mockExecutionEnvironment = new MockExecutionEnvironment(); myMemoryManager = new MyMemoryManager(*mockExecutionEnvironment); mockExecutionEnvironment->memoryManager.reset(myMemoryManager); mockDevice = std::make_unique(MockDevice::createWithExecutionEnvironment(nullptr, mockExecutionEnvironment, 0u)); mockContext = make_releaseable(mockDevice.get()); } MockExecutionEnvironment *mockExecutionEnvironment; std::unique_ptr mockDevice; ReleaseableObjectPtr mockContext; MyMemoryManager *myMemoryManager = nullptr; cl_image_desc imageDesc = {}; cl_image_format imageFormat{CL_R, CL_UNSIGNED_INT8}; cl_mem_flags flags = CL_MEM_READ_WRITE; cl_int retVal = CL_SUCCESS; }; compute-runtime-22.14.22890/opencl/test/unit_test/mem_obj/image_format_tests.cpp000066400000000000000000000027241422164147700276240ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/mem_obj/image.h" #include "gtest/gtest.h" using namespace NEO; struct MockImage : public Image { using Image::hasAlphaChannel; }; typedef ::testing::TestWithParam> HasAlphaChannelTest; TEST_P(HasAlphaChannelTest, GivenImageFormatWhenCheckingForAlphaChannelThenReturnCorrectValue) { cl_image_format imageFormat; bool expectedValue; std::tie(imageFormat.image_channel_order, expectedValue) = GetParam(); EXPECT_EQ(expectedValue, MockImage::hasAlphaChannel(&imageFormat)); } std::tuple paramsForAlphaChannelTests[] = { {CL_R, false}, {CL_A, true}, {CL_RG, false}, {CL_RA, true}, {CL_RGB, false}, {CL_RGBA, true}, {CL_BGRA, true}, {CL_ARGB, true}, {CL_INTENSITY, true}, {CL_LUMINANCE, false}, {CL_Rx, true}, {CL_RGx, true}, {CL_RGBx, true}, {CL_DEPTH, false}, {CL_DEPTH_STENCIL, false}, {CL_sRGB, false}, {CL_sRGBx, true}, {CL_sRGBA, true}, {CL_sBGRA, true}, {CL_ABGR, true}, {CL_NV12_INTEL, false}, {CL_YUYV_INTEL, false}, {CL_UYVY_INTEL, false}, {CL_YVYU_INTEL, false}, {CL_VYUY_INTEL, false}}; INSTANTIATE_TEST_CASE_P( ImageFormatTests, HasAlphaChannelTest, ::testing::ValuesIn(paramsForAlphaChannelTests)); compute-runtime-22.14.22890/opencl/test/unit_test/mem_obj/image_from_subbuffer_tests.cpp000066400000000000000000000101651422164147700313400ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/aligned_memory.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include using namespace NEO; // Tests for cl_khr_image2d_from_buffer class ImageFromSubBufferTest : public ClDeviceFixture, public ::testing::Test { public: ImageFromSubBufferTest() {} protected: void SetUp() override { imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_RGBA; imageDesc.image_array_size = 0; imageDesc.image_depth = 0; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_height = 128 / 2; imageDesc.image_width = 256 / 2; imageDesc.num_mip_levels = 0; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_samples = 0; size = 128 * 256 * 4; hostPtr = alignedMalloc(size, 16); ASSERT_NE(nullptr, hostPtr); parentBuffer = clCreateBuffer(&context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE, size, hostPtr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); const cl_buffer_region region = {size / 2, size / 2}; subBuffer = clCreateSubBuffer(parentBuffer, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, reinterpret_cast(®ion), &retVal); ASSERT_EQ(CL_SUCCESS, retVal); imageDesc.mem_object = subBuffer; ASSERT_NE(nullptr, imageDesc.mem_object); } void TearDown() override { clReleaseMemObject(subBuffer); clReleaseMemObject(parentBuffer); alignedFree(hostPtr); } Image *createImage() { cl_mem_flags flags = CL_MEM_READ_ONLY; auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); return Image::create(&context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, NULL, retVal); } cl_image_format imageFormat; cl_image_desc imageDesc; cl_int retVal = CL_SUCCESS; MockContext context; void *hostPtr; size_t size; cl_mem parentBuffer; cl_mem subBuffer; }; TEST_F(ImageFromSubBufferTest, GivenSubBufferWithOffsetWhenCreatingImageThenOffsetsAreCorrect) { std::unique_ptr imageFromSubBuffer(createImage()); EXPECT_NE(nullptr, imageFromSubBuffer); SurfaceOffsets surfaceOffsets = {0}; imageFromSubBuffer->getSurfaceOffsets(surfaceOffsets); uint32_t offsetExpected = static_cast(size) / 2; EXPECT_EQ(offsetExpected, surfaceOffsets.offset); EXPECT_EQ(0u, surfaceOffsets.xOffset); EXPECT_EQ(0u, surfaceOffsets.yOffset); EXPECT_EQ(0u, surfaceOffsets.yOffsetForUVplane); } TEST_F(ImageFromSubBufferTest, GivenSubBufferWithOffsetGreaterThan4gbWhenCreatingImageThenSurfaceOffsetsAreCorrect) { Buffer *buffer = castToObject(parentBuffer); uint64_t offsetExpected = 0; cl_buffer_region region = {0, size / 2}; if constexpr (is64bit) { offsetExpected = 8 * GB; region = {static_cast(offsetExpected), size / 2}; } Buffer *subBufferWithBigOffset = buffer->createSubBuffer(CL_MEM_READ_WRITE, 0, ®ion, retVal); imageDesc.mem_object = subBufferWithBigOffset; std::unique_ptr imageFromSubBuffer(createImage()); EXPECT_NE(nullptr, imageFromSubBuffer); SurfaceOffsets surfaceOffsets = {0}; imageFromSubBuffer->getSurfaceOffsets(surfaceOffsets); EXPECT_EQ(offsetExpected, surfaceOffsets.offset); EXPECT_EQ(0u, surfaceOffsets.xOffset); EXPECT_EQ(0u, surfaceOffsets.yOffset); EXPECT_EQ(0u, surfaceOffsets.yOffsetForUVplane); subBufferWithBigOffset->release(); } compute-runtime-22.14.22890/opencl/test/unit_test/mem_obj/image_redescribe_tests.cpp000066400000000000000000000225231422164147700304420ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/aligned_memory.h" #include "shared/source/memory_manager/os_agnostic_memory_manager.h" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "opencl/source/helpers/surface_formats.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" #include "igfxfmid.h" extern GFXCORE_FAMILY renderCoreFamily; using namespace NEO; class ImageRedescribeTest : public testing::TestWithParam> { protected: void SetUp() override { cl_image_format imageFormat; cl_image_desc imageDesc; std::tie(indexImageFormat, ImageType) = this->GetParam(); ArrayRef readWriteSurfaceFormats = SurfaceFormats::readWrite(); auto &surfaceFormatInfo = readWriteSurfaceFormats[indexImageFormat]; imageFormat = surfaceFormatInfo.OCLImageFormat; auto imageHeight = ImageType == CL_MEM_OBJECT_IMAGE1D_ARRAY ? 0 : 32; auto imageArrays = ImageType == CL_MEM_OBJECT_IMAGE1D_ARRAY || ImageType == CL_MEM_OBJECT_IMAGE2D_ARRAY ? 7 : 1; imageDesc.image_type = ImageType; imageDesc.image_width = 32; imageDesc.image_height = imageHeight; imageDesc.image_depth = 1; imageDesc.image_array_size = imageArrays; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; retVal = CL_INVALID_VALUE; cl_mem_flags flags = CL_MEM_READ_WRITE; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); image.reset(Image::create( &context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); ASSERT_NE(nullptr, image); } cl_int retVal = CL_SUCCESS; MockContext context; std::unique_ptr image; size_t indexImageFormat = 0; uint32_t ImageType; }; TEST_P(ImageRedescribeTest, givenImageWhenItIsRedescribedThenItContainsProperFormatFlagsAddressAndSameElementSizeInBytes) { std::unique_ptr imageNew(image->redescribe()); ASSERT_NE(nullptr, imageNew); ASSERT_NE(image, imageNew); EXPECT_EQ(static_cast(CL_MEM_USE_HOST_PTR), imageNew->getFlags() & CL_MEM_USE_HOST_PTR); EXPECT_EQ(image->getCpuAddress(), imageNew->getCpuAddress()); EXPECT_NE(static_cast(CL_FLOAT), imageNew->getSurfaceFormatInfo().OCLImageFormat.image_channel_data_type); EXPECT_NE(static_cast(CL_HALF_FLOAT), imageNew->getSurfaceFormatInfo().OCLImageFormat.image_channel_data_type); EXPECT_EQ(imageNew->getSurfaceFormatInfo().surfaceFormat.NumChannels * imageNew->getSurfaceFormatInfo().surfaceFormat.PerChannelSizeInBytes, imageNew->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes); EXPECT_EQ(image->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes, imageNew->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes); } TEST_P(ImageRedescribeTest, givenImageWhenItIsRedescribedThenNewImageFormatHasNumberOfChannelsDependingOnBytesPerPixel) { std::unique_ptr imageNew(image->redescribe()); ASSERT_NE(nullptr, imageNew); size_t bytesPerPixel = image->getSurfaceFormatInfo().surfaceFormat.NumChannels * image->getSurfaceFormatInfo().surfaceFormat.PerChannelSizeInBytes; size_t channelsExpected = 0; switch (bytesPerPixel) { case 1: case 2: case 4: channelsExpected = 1; break; case 8: channelsExpected = 2; break; case 16: channelsExpected = 4; break; } EXPECT_EQ(channelsExpected, imageNew->getSurfaceFormatInfo().surfaceFormat.NumChannels); } TEST_P(ImageRedescribeTest, givenImageWhenItIsRedescribedThenNewImageDimensionsAreMatchingTheRedescribedImage) { std::unique_ptr imageNew(image->redescribe()); ASSERT_NE(nullptr, imageNew); auto bytesWide = image->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes * image->getImageDesc().image_width; auto bytesWideNew = imageNew->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes * imageNew->getImageDesc().image_width; EXPECT_EQ(bytesWide, bytesWideNew); EXPECT_EQ(imageNew->getImageDesc().image_height, image->getImageDesc().image_height); EXPECT_EQ(imageNew->getImageDesc().image_array_size, image->getImageDesc().image_array_size); EXPECT_EQ(imageNew->getImageDesc().image_depth, image->getImageDesc().image_depth); EXPECT_EQ(imageNew->getImageDesc().image_type, image->getImageDesc().image_type); EXPECT_EQ(imageNew->getQPitch(), image->getQPitch()); EXPECT_EQ(imageNew->getImageDesc().image_width, image->getImageDesc().image_width); } TEST_P(ImageRedescribeTest, givenImageWhenItIsRedescribedThenCubeFaceIndexIsProperlySet) { std::unique_ptr imageNew(image->redescribe()); ASSERT_NE(nullptr, imageNew); ASSERT_EQ(imageNew->getCubeFaceIndex(), __GMM_NO_CUBE_MAP); for (uint32_t n = __GMM_CUBE_FACE_POS_X; n < __GMM_MAX_CUBE_FACE; n++) { image->setCubeFaceIndex(n); imageNew.reset(image->redescribe()); ASSERT_NE(nullptr, imageNew); ASSERT_EQ(imageNew->getCubeFaceIndex(), n); imageNew.reset(image->redescribeFillImage()); ASSERT_NE(nullptr, imageNew); ASSERT_EQ(imageNew->getCubeFaceIndex(), n); } } TEST_P(ImageRedescribeTest, givenImageWithMaxSizesWhenItIsRedescribedThenNewImageDoesNotExceedMaxSizes) { cl_image_format imageFormat; cl_image_desc imageDesc; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); const auto &sharedCaps = device->getSharedDeviceInfo(); auto memoryManager = (OsAgnosticMemoryManager *)context.getMemoryManager(); memoryManager->turnOnFakingBigAllocations(); ArrayRef readWriteSurfaceFormats = SurfaceFormats::readWrite(); auto &surfaceFormatInfo = readWriteSurfaceFormats[indexImageFormat]; imageFormat = surfaceFormatInfo.OCLImageFormat; auto imageWidth = 1; auto imageHeight = 1; auto imageArrays = ImageType == CL_MEM_OBJECT_IMAGE1D_ARRAY || ImageType == CL_MEM_OBJECT_IMAGE2D_ARRAY ? 7 : 1; size_t maxImageWidth = 0; size_t maxImageHeight = 0; switch (ImageType) { case CL_MEM_OBJECT_IMAGE1D: case CL_MEM_OBJECT_IMAGE1D_ARRAY: imageWidth = 16384; maxImageWidth = static_cast(sharedCaps.maxMemAllocSize); maxImageHeight = 1; break; case CL_MEM_OBJECT_IMAGE2D: case CL_MEM_OBJECT_IMAGE2D_ARRAY: imageHeight = 16384; maxImageWidth = sharedCaps.image2DMaxWidth; maxImageHeight = sharedCaps.image2DMaxHeight; break; case CL_MEM_OBJECT_IMAGE3D: imageHeight = 16384; maxImageWidth = caps.image3DMaxWidth; maxImageHeight = caps.image3DMaxHeight; break; } imageDesc.image_type = ImageType; imageDesc.image_width = imageWidth; imageDesc.image_height = imageHeight; imageDesc.image_depth = 1; imageDesc.image_array_size = imageArrays; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; cl_mem_flags flags = CL_MEM_READ_WRITE; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto bigImage = std::unique_ptr(Image::create( &context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); std::unique_ptr imageNew(bigImage->redescribe()); ASSERT_NE(nullptr, imageNew); EXPECT_GE(maxImageWidth, imageNew->getImageDesc().image_width); EXPECT_GE(maxImageHeight, imageNew->getImageDesc().image_height); } static uint32_t ImageTypes[] = { CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D_ARRAY}; decltype(SurfaceFormats::readWrite().size()) readWriteSurfaceFormatsStart = 0u; INSTANTIATE_TEST_CASE_P( Redescribe, ImageRedescribeTest, testing::Combine( ::testing::Range(readWriteSurfaceFormatsStart, SurfaceFormats::readWrite().size()), ::testing::ValuesIn(ImageTypes))); TEST(ImageRedescribeTestSimple, givenImageWhenItIsRedescribedThenCreateFunctionIsSameAsInOriginalImage) { MockContext context; std::unique_ptr image(ImageHelper::create(&context)); std::unique_ptr imageNew(image->redescribe()); ASSERT_NE(nullptr, imageNew); EXPECT_EQ(image->createFunction, imageNew->createFunction); } compute-runtime-22.14.22890/opencl/test/unit_test/mem_obj/image_release_mapped_ptr_tests.cpp000066400000000000000000000104341422164147700321640ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/event/user_event.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_event.h" using namespace NEO; template class MyMockCommandQueue : public CommandQueueHw { public: MyMockCommandQueue(Context *context, ClDevice *device) : CommandQueueHw(context, device, nullptr, false){}; cl_int enqueueWriteImage(Image *dstImage, cl_bool blockingWrite, const size_t *origin, const size_t *region, size_t inputRowPitch, size_t inputSlicePitch, const void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { passedBlockingWrite = blockingWrite; passedPtr = (void *)ptr; enqueueWriteImageCalled++; return CL_SUCCESS; } cl_int finish() override { finishCalled++; return CL_SUCCESS; } void *passedPtr = nullptr; cl_bool passedBlockingWrite = CL_INVALID_VALUE; unsigned int enqueueWriteImageCalled = 0; unsigned int finishCalled = 0; }; class ImageUnmapTest : public ::testing::Test { public: void SetUp() override { device = std::make_unique(MockClDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); context = std::make_unique(device.get()); image.reset(ImageHelper>::create(context.get())); } std::unique_ptr device; std::unique_ptr context; std::unique_ptr image; }; HWTEST_F(ImageUnmapTest, givenImageWhenUnmapMemObjIsCalledThenEnqueueNonBlockingMapImage) { std::unique_ptr> commandQueue(new MyMockCommandQueue(context.get(), device.get())); void *ptr = alignedMalloc(MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); MemObjOffsetArray origin = {{0, 0, 0}}; MemObjSizeArray region = {{1, 1, 1}}; image->setAllocatedMapPtr(ptr); cl_map_flags mapFlags = CL_MAP_WRITE; image->addMappedPtr(ptr, 1, mapFlags, region, origin, 0, nullptr); AllocationProperties properties{0, false, MemoryConstants::cacheLineSize, AllocationType::MAP_ALLOCATION, false, device->getDeviceBitfield()}; auto allocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties, ptr); image->setMapAllocation(allocation); commandQueue->enqueueUnmapMemObject(image.get(), ptr, 0, nullptr, nullptr); if (UnitTestHelper::tiledImagesSupported) { EXPECT_EQ(ptr, commandQueue->passedPtr); EXPECT_EQ((cl_bool)CL_FALSE, commandQueue->passedBlockingWrite); EXPECT_EQ(1u, commandQueue->enqueueWriteImageCalled); } else { EXPECT_EQ(0u, commandQueue->enqueueWriteImageCalled); } } HWTEST_F(ImageUnmapTest, givenImageWhenEnqueueMapImageIsCalledTwiceThenAllocatedMemoryPtrIsNotOverridden) { if (!UnitTestHelper::tiledImagesSupported) { GTEST_SKIP(); } cl_int retVal; size_t origin[] = {0, 0, 0}; size_t region[] = {1, 1, 1}; std::unique_ptr commandQueue(CommandQueue::create(context.get(), device.get(), nullptr, false, retVal)); commandQueue->enqueueMapImage(image.get(), CL_FALSE, 0, origin, region, nullptr, nullptr, 0, nullptr, nullptr, retVal); EXPECT_NE(nullptr, image->getAllocatedMapPtr()); void *ptr = image->getAllocatedMapPtr(); EXPECT_EQ(alignUp(ptr, MemoryConstants::pageSize), ptr); commandQueue->enqueueMapImage(image.get(), CL_FALSE, 0, origin, region, nullptr, nullptr, 0, nullptr, nullptr, retVal); EXPECT_EQ(ptr, image->getAllocatedMapPtr()); commandQueue->enqueueUnmapMemObject(image.get(), ptr, 0, nullptr, nullptr); } compute-runtime-22.14.22890/opencl/test/unit_test/mem_obj/image_set_arg_tests.cpp000066400000000000000000001530721422164147700277630ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/surface.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/mocks/mock_gmm.h" #include "shared/test/common/mocks/mock_gmm_resource_info.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/helpers/surface_formats.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "gtest/gtest.h" using namespace NEO; using namespace ::testing; class ImageSetArgTest : public ClDeviceFixture, public testing::Test { public: ImageSetArgTest() = default; protected: template void SetupChannels(int imgChannelOrder) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; expectedChannelRed = RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_RED; expectedChannelGreen = RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_GREEN; expectedChannelBlue = RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_BLUE; if (imgChannelOrder == CL_A) { expectedChannelRed = RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO; expectedChannelGreen = RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO; expectedChannelBlue = RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO; } else if (imgChannelOrder == CL_RA || imgChannelOrder == CL_R || imgChannelOrder == CL_Rx) { expectedChannelGreen = RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO; expectedChannelBlue = RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO; } else if (imgChannelOrder == CL_RG || imgChannelOrder == CL_RGx) { expectedChannelBlue = RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO; } } void SetUp() override { ClDeviceFixture::SetUp(); pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; // define kernel info pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->heapInfo.SurfaceStateHeapSize = sizeof(surfaceStateHeap); // setup kernel arg offsets pKernelInfo->addArgImage(0, 0x00); pKernelInfo->addArgImage(1, 0x40); program = std::make_unique(toClDeviceVector(*pClDevice)); retVal = CL_INVALID_VALUE; pMultiDeviceKernel = MultiDeviceKernel::create(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), &retVal); pKernel = static_cast(pMultiDeviceKernel->getKernel(rootDeviceIndex)); ASSERT_NE(nullptr, pKernel); ASSERT_EQ(CL_SUCCESS, retVal); pKernel->setKernelArgHandler(0, &Kernel::setArgImage); pKernel->setKernelArgHandler(1, &Kernel::setArgImage); context = new MockContext(pClDevice); srcImage = Image3dHelper<>::create(context); srcAllocation = srcImage->getGraphicsAllocation(pClDevice->getRootDeviceIndex()); ASSERT_NE(nullptr, srcImage); expectedChannelRed = 0; expectedChannelGreen = 0; expectedChannelBlue = 0; } void TearDown() override { delete srcImage; delete pMultiDeviceKernel; delete context; ClDeviceFixture::TearDown(); } cl_int retVal = CL_SUCCESS; MockContext *context; std::unique_ptr program; MockKernel *pKernel = nullptr; MultiDeviceKernel *pMultiDeviceKernel = nullptr; std::unique_ptr pKernelInfo; char surfaceStateHeap[0x80] = {}; Image *srcImage = nullptr; GraphicsAllocation *srcAllocation = nullptr; int expectedChannelRed; int expectedChannelGreen; int expectedChannelBlue; }; HWTEST_F(ImageSetArgTest, WhenSettingKernelArgImageThenSurfaceBaseAddressIsSetCorrectly) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->argAsImg(0).bindful)); srcImage->setImageArg(const_cast(surfaceState), false, 0, pClDevice->getRootDeviceIndex(), false); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(srcAllocation->getGpuAddress(), surfaceAddress); std::vector surfaces; pKernel->getResidency(surfaces); EXPECT_EQ(0u, surfaces.size()); } HWTEST_F(ImageSetArgTest, GivenMediaBlockImageWhenSettingImageArgThenCorrectValueIsSet) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState; srcImage->setImageArg(&surfaceState, true, 0, pClDevice->getRootDeviceIndex(), false); auto computedWidth = surfaceState.getWidth(); auto expectedWidth = (srcImage->getImageDesc().image_width * srcImage->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes) / sizeof(uint32_t); EXPECT_EQ(expectedWidth, computedWidth); } HWTEST_F(ImageSetArgTest, GivenNormalImageWhenSettingImageArgThenCorrectValueIsSet) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState; srcImage->setImageArg(&surfaceState, true, 0, pClDevice->getRootDeviceIndex(), false); auto computedWidth = surfaceState.getWidth(); EXPECT_EQ(srcImage->getImageDesc().image_width, computedWidth); EXPECT_EQ(0u, surfaceState.getMipCountLod()); } HWTEST_F(ImageSetArgTest, givenImageWhenSettingMipTailStartLodThenProgramValueFromGmmResourceinfo) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; if (pDevice->getHardwareInfo().platform.eRenderCoreFamily == IGFX_GEN8_CORE) { GTEST_SKIP(); } RENDER_SURFACE_STATE surfaceState = {}; const uint32_t mipTailStartLod = 4; auto gmm = srcAllocation->getDefaultGmm(); EXPECT_NE(nullptr, gmm); auto mockGmmResourceInfo = static_cast(gmm->gmmResourceInfo.get()); mockGmmResourceInfo->setMipTailStartLod(mipTailStartLod); srcImage->setImageArg(&surfaceState, false, 0, pClDevice->getRootDeviceIndex(), false); EXPECT_EQ(mipTailStartLod, surfaceState.getMipTailStartLod()); // default value delete gmm; srcAllocation->setDefaultGmm(nullptr); srcImage->setImageArg(&surfaceState, false, 0, pClDevice->getRootDeviceIndex(), false); EXPECT_EQ(0u, surfaceState.getMipTailStartLod()); } HWTEST_F(ImageSetArgTest, givenCubeMapIndexWhenSetKernelArgImageIsCalledThenModifySurfaceState) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; uint32_t cubeFaceIndex = 2; Image *src2dImage = Image2dHelper<>::create(context); src2dImage->setCubeFaceIndex(cubeFaceIndex); auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->argAsImg(0).bindful)); src2dImage->setImageArg(const_cast(surfaceState), false, 0, pClDevice->getRootDeviceIndex(), false); auto renderTargetViewExtent = surfaceState->getRenderTargetViewExtent(); auto minimumArrayElement = surfaceState->getMinimumArrayElement(); auto isImageArray = surfaceState->getSurfaceArray(); auto depth = surfaceState->getDepth(); EXPECT_EQ(renderTargetViewExtent, 1u); EXPECT_EQ(minimumArrayElement, cubeFaceIndex); EXPECT_EQ(depth, (__GMM_MAX_CUBE_FACE - cubeFaceIndex)); EXPECT_TRUE(isImageArray); delete src2dImage; } struct ImageSetArgSurfaceArrayTest : ImageSetArgTest { template void testSurfaceArrayProgramming(cl_mem_object_type imageType, size_t imageArraySize, bool expectedSurfaceArray) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState; cl_image_desc imageDesc = Image2dDefaults::imageDesc; imageDesc.image_array_size = imageArraySize; imageDesc.image_type = imageType; std::unique_ptr image{Image2dHelper<>::create(context, &imageDesc)}; image->setCubeFaceIndex(__GMM_NO_CUBE_MAP); image->setImageArg(&surfaceState, false, 0, pClDevice->getRootDeviceIndex(), false); EXPECT_EQ(expectedSurfaceArray, surfaceState.getSurfaceArray()); } }; HWTEST_F(ImageSetArgSurfaceArrayTest, givenImage1DArrayAndImageArraySizeIsZeroWhenCallingSetImageArgThenDoNotProgramSurfaceArray) { testSurfaceArrayProgramming(CL_MEM_OBJECT_IMAGE1D_ARRAY, 0u, false); } HWTEST_F(ImageSetArgSurfaceArrayTest, givenImage2DArrayAndImageArraySizeIsZeroWhenCallingSetImageArgThenDoNotProgramSurfaceArray) { testSurfaceArrayProgramming(CL_MEM_OBJECT_IMAGE2D_ARRAY, 0u, false); } HWTEST_F(ImageSetArgSurfaceArrayTest, givenImage1DArrayAndImageArraySizeIsOneWhenCallingSetImageArgThenDoNotProgramSurfaceArray) { testSurfaceArrayProgramming(CL_MEM_OBJECT_IMAGE1D_ARRAY, 1u, false); } HWTEST_F(ImageSetArgSurfaceArrayTest, givenImage2DArrayAndImageArraySizeIsOneWhenCallingSetImageArgThenDoNotProgramSurfaceArray) { testSurfaceArrayProgramming(CL_MEM_OBJECT_IMAGE2D_ARRAY, 1u, false); } HWTEST_F(ImageSetArgSurfaceArrayTest, givenImage1DArrayAndImageArraySizeIsGreaterThanOneWhenCallingSetImageArgThenProgramSurfaceArray) { testSurfaceArrayProgramming(CL_MEM_OBJECT_IMAGE1D_ARRAY, 2u, true); } HWTEST_F(ImageSetArgSurfaceArrayTest, givenImage2DArrayAndImageArraySizeIsGreaterThanOneWhenCallingSetImageArgThenProgramSurfaceArray) { testSurfaceArrayProgramming(CL_MEM_OBJECT_IMAGE2D_ARRAY, 2u, true); } HWTEST_F(ImageSetArgSurfaceArrayTest, givenNonArrayImageWhenCallingSetImageArgThenDoNotProgramSurfaceArray) { testSurfaceArrayProgramming(CL_MEM_OBJECT_IMAGE1D_BUFFER, 2u, false); } HWTEST_F(ImageSetArgTest, givenImageArraySizeGreaterThanOneButTypeIsNotImageArrayWhenCallingSetImageArgThenDoNotProgramSurfaceArray) { MockContext context; McsSurfaceInfo mcsSurfaceInfo = {}; MockGraphicsAllocation *allocation = new MockGraphicsAllocation(0, 0x1000); ImageInfo imageInfo = {}; ClSurfaceFormatInfo surfaceFormatInfo{}; surfaceFormatInfo.surfaceFormat.GMMSurfaceFormat = GMM_FORMAT_B8G8R8A8_UNORM; surfaceFormatInfo.surfaceFormat.ImageElementSizeInBytes = 4u; imageInfo.surfaceFormat = &surfaceFormatInfo.surfaceFormat; cl_image_desc imageDesc = Image2dDefaults::imageDesc; imageDesc.image_array_size = 3u; imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; imageInfo.imgDesc = Image::convertDescriptor(imageDesc); imageInfo.plane = GMM_NO_PLANE; auto gmm = MockGmm::queryImgParams(context.getDevice(0)->getGmmClientContext(), imageInfo, false); allocation->setDefaultGmm(gmm.release()); auto image = std::unique_ptr{Image::createSharedImage( &context, nullptr, mcsSurfaceInfo, GraphicsAllocationHelper::toMultiGraphicsAllocation(allocation), nullptr, CL_MEM_READ_WRITE, 0, &surfaceFormatInfo, imageInfo, 0, 0, 0)}; image->setCubeFaceIndex(__GMM_NO_CUBE_MAP); typename FamilyType::RENDER_SURFACE_STATE surfaceState{}; image->setImageArg(&surfaceState, false, 0, pClDevice->getRootDeviceIndex(), false); EXPECT_FALSE(surfaceState.getSurfaceArray()); } HWTEST_F(ImageSetArgTest, givenNonCubeMapIndexWhenSetKernelArgImageIsCalledThenDontModifySurfaceState) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->argAsImg(0).bindful)); EXPECT_EQ(srcImage->getCubeFaceIndex(), __GMM_NO_CUBE_MAP); srcImage->setImageArg(const_cast(surfaceState), false, 0, pClDevice->getRootDeviceIndex(), false); auto renderTargetViewExtent = surfaceState->getRenderTargetViewExtent(); auto minimumArrayElement = surfaceState->getMinimumArrayElement(); auto isImageArray = surfaceState->getSurfaceArray(); auto depth = surfaceState->getDepth(); auto hAlign = static_cast(surfaceState->getSurfaceHorizontalAlignment()); auto vAlign = static_cast(surfaceState->getSurfaceVerticalAlignment()); auto expectedHAlign = static_cast(MockGmmResourceInfo::getHAlignSurfaceStateResult); auto expectedVAlign = static_cast(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4); // 3D image EXPECT_EQ(renderTargetViewExtent, srcImage->getImageDesc().image_depth); EXPECT_EQ(minimumArrayElement, 0u); EXPECT_EQ(depth, srcImage->getImageDesc().image_depth); EXPECT_EQ(expectedHAlign, hAlign); EXPECT_EQ(expectedVAlign, vAlign); EXPECT_FALSE(isImageArray); } HWTEST_F(ImageSetArgTest, givenOffsetedBufferWhenSetKernelArgImageIscalledThenFullGPuPointerIsPatched) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->argAsImg(0).bindful)); auto graphicsAllocation = srcAllocation; graphicsAllocation->setGpuBaseAddress(12345u); srcImage->setImageArg(const_cast(surfaceState), false, 0, pClDevice->getRootDeviceIndex(), false); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(srcAllocation->getGpuAddress(), surfaceAddress); std::vector surfaces; pKernel->getResidency(surfaces); EXPECT_EQ(0u, surfaces.size()); } HWTEST_F(ImageSetArgTest, WhenSettingKernelArgThenPropertiesAreSetCorrectly) { auto gmmHelper = pDevice->getGmmHelper(); auto imageMocs = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_IMAGE); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; cl_mem memObj = srcImage; retVal = clSetKernelArg( pMultiDeviceKernel, 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->argAsImg(0).bindful)); size_t rPitch = srcImage->getImageDesc().image_row_pitch; SetupChannels(srcImage->getImageFormat().image_channel_order); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(srcAllocation->getGpuAddress(), surfaceAddress); EXPECT_EQ(srcImage->getImageDesc().image_width, surfaceState->getWidth()); EXPECT_EQ(srcImage->getImageDesc().image_height, surfaceState->getHeight()); EXPECT_EQ(srcImage->getImageDesc().image_depth, surfaceState->getDepth()); EXPECT_EQ(srcImage->getImageDesc().image_depth, surfaceState->getRenderTargetViewExtent()); EXPECT_EQ(rPitch, surfaceState->getSurfacePitch()); EXPECT_EQ(0u, surfaceState->getSurfaceQpitch() % 4); EXPECT_EQ(srcImage->getSurfaceFormatInfo().surfaceFormat.GenxSurfaceFormat, (GFX3DSTATE_SURFACEFORMAT)surfaceState->getSurfaceFormat()); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_3D, surfaceState->getSurfaceType()); EXPECT_EQ(expectedChannelRed, surfaceState->getShaderChannelSelectRed()); EXPECT_EQ(expectedChannelGreen, surfaceState->getShaderChannelSelectGreen()); EXPECT_EQ(expectedChannelBlue, surfaceState->getShaderChannelSelectBlue()); EXPECT_EQ(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA, surfaceState->getShaderChannelSelectAlpha()); EXPECT_EQ(imageMocs, surfaceState->getMemoryObjectControlState()); EXPECT_EQ(0u, surfaceState->getCoherencyType()); std::vector surfaces; pKernel->getResidency(surfaces); EXPECT_EQ(1u, surfaces.size()); for (auto &surface : surfaces) { delete surface; } } HWTEST_F(ImageSetArgTest, givenImage2DWithMipMapsWhenSetKernelArgIsCalledThenMipLevelAndMipCountIsSet) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; cl_mem memObj = srcImage; int mipLevel = 2; uint32_t mipCount = 3; srcImage->setBaseMipLevel(mipLevel); srcImage->setMipCount(mipCount); EXPECT_EQ(mipLevel, srcImage->peekBaseMipLevel()); EXPECT_EQ(3u, srcImage->peekMipCount()); retVal = clSetKernelArg( pMultiDeviceKernel, 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->argAsImg(0).bindful)); EXPECT_EQ((uint32_t)mipLevel, surfaceState->getSurfaceMinLod()); EXPECT_EQ((uint32_t)mipCount, surfaceState->getMipCountLod() + 1); } HWTEST_F(ImageSetArgTest, Given2dArrayWhenSettingKernelArgThenPropertiesAreSetCorrectly) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; Image *image2Darray = Image2dArrayHelper<>::create(context); auto graphicsAllocation = image2Darray->getGraphicsAllocation(pClDevice->getRootDeviceIndex()); cl_mem memObj = image2Darray; retVal = clSetKernelArg( pMultiDeviceKernel, 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->argAsImg(0).bindful)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); size_t rPitch = srcImage->getImageDesc().image_row_pitch; SetupChannels(image2Darray->getImageFormat().image_channel_order); EXPECT_EQ(graphicsAllocation->getGpuAddress(), surfaceAddress); EXPECT_EQ(image2Darray->getImageDesc().image_width, surfaceState->getWidth()); EXPECT_EQ(image2Darray->getImageDesc().image_height, surfaceState->getHeight()); EXPECT_EQ(image2Darray->getImageDesc().image_array_size, surfaceState->getDepth()); EXPECT_EQ(image2Darray->getImageDesc().image_array_size, surfaceState->getRenderTargetViewExtent()); EXPECT_EQ(rPitch, surfaceState->getSurfacePitch()); EXPECT_EQ(0u, surfaceState->getSurfaceQpitch() % 4); EXPECT_EQ(image2Darray->getSurfaceFormatInfo().surfaceFormat.GenxSurfaceFormat, (GFX3DSTATE_SURFACEFORMAT)surfaceState->getSurfaceFormat()); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_2D, surfaceState->getSurfaceType()); EXPECT_TRUE((GFX3DSTATE_SURFACEFORMAT)surfaceState->getSurfaceArray()); EXPECT_EQ(expectedChannelRed, surfaceState->getShaderChannelSelectRed()); EXPECT_EQ(expectedChannelGreen, surfaceState->getShaderChannelSelectGreen()); EXPECT_EQ(expectedChannelBlue, surfaceState->getShaderChannelSelectBlue()); EXPECT_EQ(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA, surfaceState->getShaderChannelSelectAlpha()); std::vector surfaces; pKernel->getResidency(surfaces); EXPECT_EQ(1u, surfaces.size()); for (auto &surface : surfaces) { delete surface; } delete image2Darray; } HWTEST_F(ImageSetArgTest, Given1dArrayWhenSettingKernelArgThenPropertiesAreSetCorrectly) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; Image *image1Darray = Image1dArrayHelper<>::create(context); auto graphicsAllocation = image1Darray->getGraphicsAllocation(pClDevice->getRootDeviceIndex()); cl_mem memObj = image1Darray; retVal = clSetKernelArg( pMultiDeviceKernel, 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->argAsImg(0).bindful)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); SetupChannels(image1Darray->getImageFormat().image_channel_order); EXPECT_EQ(graphicsAllocation->getGpuAddress(), surfaceAddress); EXPECT_EQ(image1Darray->getImageDesc().image_width, surfaceState->getWidth()); EXPECT_EQ(1u, surfaceState->getHeight()); EXPECT_EQ(image1Darray->getImageDesc().image_array_size, surfaceState->getDepth()); EXPECT_EQ(image1Darray->getImageDesc().image_array_size, surfaceState->getRenderTargetViewExtent()); EXPECT_EQ(image1Darray->getImageDesc().image_row_pitch, surfaceState->getSurfacePitch()); EXPECT_EQ(0u, surfaceState->getSurfaceQpitch() % 4); EXPECT_EQ(graphicsAllocation->getDefaultGmm()->queryQPitch(GMM_RESOURCE_TYPE::RESOURCE_1D), surfaceState->getSurfaceQpitch()); EXPECT_EQ(image1Darray->getSurfaceFormatInfo().surfaceFormat.GenxSurfaceFormat, (GFX3DSTATE_SURFACEFORMAT)surfaceState->getSurfaceFormat()); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_1D, surfaceState->getSurfaceType()); EXPECT_TRUE((GFX3DSTATE_SURFACEFORMAT)surfaceState->getSurfaceArray()); EXPECT_EQ(expectedChannelRed, surfaceState->getShaderChannelSelectRed()); EXPECT_EQ(expectedChannelGreen, surfaceState->getShaderChannelSelectGreen()); EXPECT_EQ(expectedChannelBlue, surfaceState->getShaderChannelSelectBlue()); EXPECT_EQ(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA, surfaceState->getShaderChannelSelectAlpha()); std::vector surfaces; pKernel->getResidency(surfaces); EXPECT_EQ(1u, surfaces.size()); for (auto &surface : surfaces) { delete surface; } delete image1Darray; } HWTEST_F(ImageSetArgTest, givenMcsAllocationWhenSetArgIsCalledWithoutUnifiedAuxCapabilityThenProgramAuxFieldsForMultisamples) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; McsSurfaceInfo msi = {10, 20, 3}; auto mcsAlloc = context->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); mcsAlloc->setDefaultGmm(new Gmm(pDevice->getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); cl_image_desc imgDesc = Image2dDefaults::imageDesc; imgDesc.num_samples = 8; auto image = Image2dHelper<>::create(context, &imgDesc); image->setMcsSurfaceInfo(msi); image->setMcsAllocation(mcsAlloc); cl_mem memObj = image; EXPECT_FALSE(mcsAlloc->getDefaultGmm()->unifiedAuxTranslationCapable()); retVal = clSetKernelArg( pMultiDeviceKernel, 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->argAsImg(0).bindful)); EXPECT_FALSE(Image::isDepthFormat(image->getImageFormat())); EXPECT_TRUE(surfaceState->getMultisampledSurfaceStorageFormat() == RENDER_SURFACE_STATE::MULTISAMPLED_SURFACE_STORAGE_FORMAT::MULTISAMPLED_SURFACE_STORAGE_FORMAT_MSS); EXPECT_TRUE(surfaceState->getAuxiliarySurfaceMode() == (typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE)1); EXPECT_EQ(msi.pitch, surfaceState->getAuxiliarySurfacePitch()); EXPECT_EQ(msi.qPitch, surfaceState->getAuxiliarySurfaceQpitch()); EXPECT_EQ(msi.multisampleCount, static_cast(surfaceState->getNumberOfMultisamples())); EXPECT_EQ(mcsAlloc->getGpuAddress(), surfaceState->getAuxiliarySurfaceBaseAddress()); delete image; } HWTEST_F(ImageSetArgTest, givenDepthFormatWhenSetArgIsCalledThenProgramAuxFields) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; McsSurfaceInfo msi = {0, 0, 3}; cl_image_desc imgDesc = Image2dDefaults::imageDesc; imgDesc.num_samples = 8; cl_image_format imgFormat = {CL_DEPTH, CL_FLOAT}; auto image = Image2dHelper<>::create(context, &imgDesc, &imgFormat); image->setMcsSurfaceInfo(msi); cl_mem memObj = image; retVal = clSetKernelArg( pMultiDeviceKernel, 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->argAsImg(0).bindful)); EXPECT_TRUE(Image::isDepthFormat(image->getImageFormat())); EXPECT_TRUE(surfaceState->getMultisampledSurfaceStorageFormat() == RENDER_SURFACE_STATE::MULTISAMPLED_SURFACE_STORAGE_FORMAT::MULTISAMPLED_SURFACE_STORAGE_FORMAT_DEPTH_STENCIL); EXPECT_TRUE(surfaceState->getAuxiliarySurfaceMode() == AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE); EXPECT_EQ(1u, surfaceState->getAuxiliarySurfacePitch()); EXPECT_EQ(0u, surfaceState->getAuxiliarySurfaceQpitch()); EXPECT_EQ(msi.multisampleCount, static_cast(surfaceState->getNumberOfMultisamples())); EXPECT_EQ(0u, surfaceState->getAuxiliarySurfaceBaseAddress()); delete image; } HWTEST_F(ImageSetArgTest, givenMultisampledR32Floatx8x24DepthStencilFormatWhenSetArgIsCalledThenSetMssSurfaceStateStorageParam) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT; McsSurfaceInfo msi = {0, 0, 3}; cl_image_desc imgDesc = Image2dDefaults::imageDesc; imgDesc.num_samples = 8; cl_image_format imgFormat = {CL_DEPTH_STENCIL, CL_FLOAT}; std::unique_ptr image(ImageHelper>::create(context, &imgDesc, &imgFormat)); image->setMcsSurfaceInfo(msi); cl_mem memObj = image.get(); retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->argAsImg(0).bindful)); EXPECT_TRUE(Image::isDepthFormat(image->getImageFormat())); EXPECT_TRUE(surfaceState->getSurfaceFormat() == SURFACE_FORMAT::SURFACE_FORMAT_R32_FLOAT_X8X24_TYPELESS); EXPECT_TRUE(surfaceState->getMultisampledSurfaceStorageFormat() == RENDER_SURFACE_STATE::MULTISAMPLED_SURFACE_STORAGE_FORMAT::MULTISAMPLED_SURFACE_STORAGE_FORMAT_MSS); } HWTEST_F(ImageSetArgTest, givenMcsAllocationAndCompressionWhenSetArgOnMultisampledImgIsCalledThenProgramAuxFieldsWithMcsParams) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; McsSurfaceInfo msi = {10, 20, 3}; auto mcsAlloc = context->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); mcsAlloc->setDefaultGmm(new Gmm(pDevice->getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); cl_image_desc imgDesc = Image2dDefaults::imageDesc; imgDesc.num_samples = 8; auto image = std::unique_ptr(Image2dHelper<>::create(context, &imgDesc)); auto graphicsAllocation = image->getGraphicsAllocation(pClDevice->getRootDeviceIndex()); graphicsAllocation->getDefaultGmm()->isCompressionEnabled = true; image->setMcsSurfaceInfo(msi); image->setMcsAllocation(mcsAlloc); cl_mem memObj = image.get(); retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->argAsImg(0).bindful)); EXPECT_TRUE(surfaceState->getMultisampledSurfaceStorageFormat() == RENDER_SURFACE_STATE::MULTISAMPLED_SURFACE_STORAGE_FORMAT::MULTISAMPLED_SURFACE_STORAGE_FORMAT_MSS); EXPECT_TRUE(surfaceState->getAuxiliarySurfaceMode() == (typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE)1); EXPECT_EQ(msi.pitch, surfaceState->getAuxiliarySurfacePitch()); EXPECT_EQ(msi.qPitch, surfaceState->getAuxiliarySurfaceQpitch()); EXPECT_EQ(msi.multisampleCount, static_cast(surfaceState->getNumberOfMultisamples())); EXPECT_EQ(mcsAlloc->getGpuAddress(), surfaceState->getAuxiliarySurfaceBaseAddress()); } HWTEST_F(ImageSetArgTest, givenDepthFormatAndCompressionWhenSetArgOnMultisampledImgIsCalledThenDontProgramAuxFields) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; McsSurfaceInfo msi = {0, 0, 3}; cl_image_desc imgDesc = Image2dDefaults::imageDesc; cl_image_format imgFormat = {CL_DEPTH, CL_FLOAT}; imgDesc.num_samples = 8; auto image = std::unique_ptr(Image2dHelper<>::create(context, &imgDesc, &imgFormat)); auto graphicsAllocation = image->getGraphicsAllocation(pClDevice->getRootDeviceIndex()); graphicsAllocation->getDefaultGmm()->isCompressionEnabled = true; image->setMcsSurfaceInfo(msi); cl_mem memObj = image.get(); retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->argAsImg(0).bindful)); EXPECT_TRUE(Image::isDepthFormat(image->getImageFormat())); EXPECT_TRUE(surfaceState->getMultisampledSurfaceStorageFormat() == RENDER_SURFACE_STATE::MULTISAMPLED_SURFACE_STORAGE_FORMAT::MULTISAMPLED_SURFACE_STORAGE_FORMAT_DEPTH_STENCIL); EXPECT_TRUE(surfaceState->getAuxiliarySurfaceMode() == AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE); EXPECT_EQ(1u, surfaceState->getAuxiliarySurfacePitch()); EXPECT_EQ(0u, surfaceState->getAuxiliarySurfaceQpitch()); EXPECT_EQ(msi.multisampleCount, static_cast(surfaceState->getNumberOfMultisamples())); EXPECT_EQ(0u, surfaceState->getAuxiliarySurfaceBaseAddress()); } HWTEST_F(ImageSetArgTest, givenMcsAllocationWhenSetArgIsCalledWithUnifiedAuxCapabilityThenProgramAuxFieldsForCcs) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; McsSurfaceInfo msi = {10, 20, 3}; auto mcsAlloc = context->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); mcsAlloc->setDefaultGmm(new Gmm(pDevice->getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); cl_image_desc imgDesc = Image2dDefaults::imageDesc; imgDesc.num_samples = 8; auto image = std::unique_ptr(Image2dHelper<>::create(context, &imgDesc)); image->setMcsSurfaceInfo(msi); image->setMcsAllocation(mcsAlloc); cl_mem memObj = image.get(); auto mockMcsGmmResInfo = static_cast(mcsAlloc->getDefaultGmm()->gmmResourceInfo.get()); mockMcsGmmResInfo->setUnifiedAuxTranslationCapable(); EXPECT_TRUE(mcsAlloc->getDefaultGmm()->unifiedAuxTranslationCapable()); mcsAlloc->getDefaultGmm()->isCompressionEnabled = true; retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->argAsImg(0).bindful)); EXPECT_TRUE(EncodeSurfaceState::isAuxModeEnabled(surfaceState, mcsAlloc->getDefaultGmm())); EXPECT_EQ(1u, surfaceState->getAuxiliarySurfacePitch()); EXPECT_EQ(0u, surfaceState->getAuxiliarySurfaceQpitch()); } HWTEST_F(ImageSetArgTest, givenMcsAllocationWhenSetArgIsCalledWithUnifiedAuxCapabilityAndMcsThenAuxBaseAddressIsSet) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; McsSurfaceInfo msi = {10, 20, 3}; auto mcsAlloc = context->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); mcsAlloc->setDefaultGmm(new Gmm(pDevice->getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); cl_image_desc imgDesc = Image2dDefaults::imageDesc; imgDesc.num_samples = 8; auto image = std::unique_ptr(Image2dHelper<>::create(context, &imgDesc)); image->setMcsSurfaceInfo(msi); image->setMcsAllocation(mcsAlloc); cl_mem memObj = image.get(); auto mockMcsGmmResInfo = static_cast(mcsAlloc->getDefaultGmm()->gmmResourceInfo.get()); mockMcsGmmResInfo->setUnifiedAuxTranslationCapable(); mockMcsGmmResInfo->setMultisampleControlSurface(); EXPECT_TRUE(mcsAlloc->getDefaultGmm()->unifiedAuxTranslationCapable()); retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->argAsImg(0).bindful)); EXPECT_NE(0u, surfaceState->getAuxiliarySurfaceBaseAddress()); } HWTEST_F(ImageSetArgTest, givenMcsAllocationWhenSetArgIsCalledWithUnifiedAuxCapabilityAndMcsThenAuxSurfPitchAndQPitchIsSet) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; McsSurfaceInfo msi = {10, 20, 3}; auto mcsAlloc = context->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); mcsAlloc->setDefaultGmm(new Gmm(pDevice->getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); cl_image_desc imgDesc = Image2dDefaults::imageDesc; imgDesc.num_samples = 8; auto image = std::unique_ptr(Image2dHelper<>::create(context, &imgDesc)); image->setMcsSurfaceInfo(msi); image->setMcsAllocation(mcsAlloc); cl_mem memObj = image.get(); auto mockMcsGmmResInfo = static_cast(mcsAlloc->getDefaultGmm()->gmmResourceInfo.get()); mockMcsGmmResInfo->setUnifiedAuxTranslationCapable(); mockMcsGmmResInfo->setMultisampleControlSurface(); uint32_t pitchValue = 4u; uint32_t qPitchValue = 12u; mockMcsGmmResInfo->setUnifiedAuxPitchTiles(pitchValue); mockMcsGmmResInfo->setAuxQPitch(qPitchValue); EXPECT_TRUE(mcsAlloc->getDefaultGmm()->unifiedAuxTranslationCapable()); retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->argAsImg(0).bindful)); EXPECT_EQ(pitchValue, surfaceState->getAuxiliarySurfacePitch()); EXPECT_EQ(qPitchValue, surfaceState->getAuxiliarySurfaceQpitch()); } HWTEST_F(ImageSetArgTest, GivenImageFrom1dBufferWhenSettingKernelArgThenPropertiesAreSetCorrectly) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto buffer = clCreateBuffer(context, 0, 4096 * 10, nullptr, nullptr); ASSERT_NE(nullptr, buffer); cl_image_desc imageDesc = {0}; imageDesc.buffer = buffer; imageDesc.image_width = 6400; // 2 * (1 << 21) + 5 * (1 << 7) + 0; imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; cl_image_format imageFormat = {0}; imageFormat.image_channel_data_type = CL_FLOAT; imageFormat.image_channel_order = CL_RGBA; cl_int retVal; auto imageFromBuffer = Image::validateAndCreateImage(context, nullptr, 0, 0, &imageFormat, &imageDesc, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, imageFromBuffer); retVal = clSetKernelArg( pMultiDeviceKernel, 0, sizeof(imageFromBuffer), &imageFromBuffer); ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->argAsImg(0).bindful)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); auto image = castToObject(imageFromBuffer); EXPECT_EQ(image->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress(), surfaceAddress); // Width is 7 bits EXPECT_EQ(128u, surfaceState->getWidth()); // Height is 14 bits EXPECT_EQ(50u, surfaceState->getHeight()); // Depth is 11 bits EXPECT_EQ(1u, surfaceState->getDepth()); EXPECT_EQ(1u, surfaceState->getRenderTargetViewExtent()); EXPECT_EQ(0u, surfaceState->getSurfaceQpitch() % 4); EXPECT_EQ(0u, surfaceState->getSurfaceQpitch()); EXPECT_EQ(image->getSurfaceFormatInfo().surfaceFormat.GenxSurfaceFormat, (GFX3DSTATE_SURFACEFORMAT)surfaceState->getSurfaceFormat()); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER, surfaceState->getSurfaceType()); EXPECT_FALSE((GFX3DSTATE_SURFACEFORMAT)surfaceState->getSurfaceArray()); EXPECT_EQ(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_RED, surfaceState->getShaderChannelSelectRed()); EXPECT_EQ(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_GREEN, surfaceState->getShaderChannelSelectGreen()); EXPECT_EQ(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_BLUE, surfaceState->getShaderChannelSelectBlue()); EXPECT_EQ(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA, surfaceState->getShaderChannelSelectAlpha()); clReleaseMemObject(imageFromBuffer); clReleaseMemObject(buffer); } HWTEST_F(ImageSetArgTest, GivenImageWithClLuminanceFormatWhenSettingKernelArgThenPropertiesAreSetCorrectly) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; Image *luminanceImage = Image3dHelper::create(context); cl_mem memObj = luminanceImage; retVal = clSetKernelArg( pMultiDeviceKernel, 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->argAsImg(0).bindful)); //for CL_LUMINANCE format we override channels to RED to be spec complaint. EXPECT_EQ(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_RED, surfaceState->getShaderChannelSelectRed()); EXPECT_EQ(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_RED, surfaceState->getShaderChannelSelectGreen()); EXPECT_EQ(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_RED, surfaceState->getShaderChannelSelectBlue()); EXPECT_EQ(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA, surfaceState->getShaderChannelSelectAlpha()); std::vector surfaces; pKernel->getResidency(surfaces); EXPECT_EQ(1u, surfaces.size()); for (auto &surface : surfaces) { delete surface; } delete luminanceImage; } HWTEST_F(ImageSetArgTest, WhenSettingArgThenImageIsReturned) { cl_mem memObj = srcImage; retVal = pKernel->setArg( 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(memObj, pKernel->getKernelArg(0)); std::vector surfaces; pKernel->getResidency(surfaces); EXPECT_EQ(1u, surfaces.size()); for (auto &surface : surfaces) { delete surface; } } HWTEST_F(ImageSetArgTest, givenCompressedResourceWhenSettingImgArgThenSetCorrectAuxParams) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; auto surfaceState = FamilyType::cmdInitRenderSurfaceState; srcAllocation->getDefaultGmm()->isCompressionEnabled = true; srcImage->setImageArg(&surfaceState, false, 0, pClDevice->getRootDeviceIndex(), false); EXPECT_TRUE(EncodeSurfaceState::isAuxModeEnabled(&surfaceState, srcAllocation->getDefaultGmm())); EXPECT_EQ(1u, surfaceState.getAuxiliarySurfacePitch()); EXPECT_EQ(0u, surfaceState.getAuxiliarySurfaceQpitch()); } HWTEST_F(ImageSetArgTest, givenNonCompressedResourceWhenSettingImgArgThenDontSetAuxParams) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; typedef typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE AUXILIARY_SURFACE_MODE; auto surfaceState = FamilyType::cmdInitRenderSurfaceState; auto gmm = srcAllocation->getDefaultGmm(); auto mockGmmResInfo = static_cast(gmm->gmmResourceInfo.get()); gmm->isCompressionEnabled = false; mockGmmResInfo->getUnifiedAuxSurfaceOffsetCalled = 0u; EXPECT_EQ(0u, surfaceState.getAuxiliarySurfaceQpitch()); EXPECT_EQ(1u, surfaceState.getAuxiliarySurfacePitch()); srcImage->setImageArg(&surfaceState, false, 0, pClDevice->getRootDeviceIndex(), false); EXPECT_TRUE(surfaceState.getAuxiliarySurfaceMode() == AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE); EXPECT_EQ(1u, surfaceState.getAuxiliarySurfacePitch()); EXPECT_EQ(0u, surfaceState.getAuxiliarySurfaceQpitch()); EXPECT_EQ(0u, surfaceState.getAuxiliarySurfaceBaseAddress()); EXPECT_EQ(0u, mockGmmResInfo->getUnifiedAuxSurfaceOffsetCalled); } /* cl_intel_media_block_io */ class ImageMediaBlockSetArgTest : public ImageSetArgTest { protected: void SetUp() override { ClDeviceFixture::SetUp(); pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; // define kernel info pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->heapInfo.SurfaceStateHeapSize = sizeof(surfaceStateHeap); // setup kernel arg offsets pKernelInfo->addArgImage(0, 0x00, iOpenCL::IMAGE_MEMORY_OBJECT_2D_MEDIA_BLOCK); pKernelInfo->addArgImage(0, 0x40, iOpenCL::IMAGE_MEMORY_OBJECT_2D_MEDIA_BLOCK); program = std::make_unique(toClDeviceVector(*pClDevice)); retVal = CL_INVALID_VALUE; pMultiDeviceKernel = MultiDeviceKernel::create(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), &retVal); pKernel = static_cast(pMultiDeviceKernel->getKernel(rootDeviceIndex)); ASSERT_NE(nullptr, pKernel); ASSERT_EQ(CL_SUCCESS, retVal); pKernel->setKernelArgHandler(0, &Kernel::setArgImage); pKernel->setKernelArgHandler(1, &Kernel::setArgImage); context = new MockContext(pClDevice); srcImage = Image3dHelper<>::create(context); srcAllocation = srcImage->getGraphicsAllocation(pClDevice->getRootDeviceIndex()); ASSERT_NE(nullptr, srcImage); } }; HWTEST_F(ImageMediaBlockSetArgTest, WhenSettingKernelArgImageThenPropertiesAreCorrect) { auto gmmHelper = pDevice->getGmmHelper(); auto imageMocs = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_IMAGE); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; cl_mem memObj = srcImage; retVal = clSetKernelArg( pMultiDeviceKernel, 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->argAsImg(0).bindful)); size_t rPitch = srcImage->getImageDesc().image_row_pitch; auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(srcAllocation->getGpuAddress(), surfaceAddress); uint32_t element_size = static_cast(srcImage->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes); SetupChannels(srcImage->getImageFormat().image_channel_order); EXPECT_EQ(srcImage->getImageDesc().image_width * element_size / sizeof(uint32_t), surfaceState->getWidth()); EXPECT_EQ(srcImage->getImageDesc().image_height, surfaceState->getHeight()); EXPECT_EQ(srcImage->getImageDesc().image_depth, surfaceState->getDepth()); EXPECT_EQ(srcImage->getImageDesc().image_depth, surfaceState->getRenderTargetViewExtent()); EXPECT_EQ(rPitch, surfaceState->getSurfacePitch()); EXPECT_EQ(0u, surfaceState->getSurfaceQpitch() % 4); EXPECT_EQ(srcImage->getSurfaceFormatInfo().surfaceFormat.GenxSurfaceFormat, (GFX3DSTATE_SURFACEFORMAT)surfaceState->getSurfaceFormat()); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_3D, surfaceState->getSurfaceType()); EXPECT_EQ(expectedChannelRed, surfaceState->getShaderChannelSelectRed()); EXPECT_EQ(expectedChannelGreen, surfaceState->getShaderChannelSelectGreen()); EXPECT_EQ(expectedChannelBlue, surfaceState->getShaderChannelSelectBlue()); EXPECT_EQ(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA, surfaceState->getShaderChannelSelectAlpha()); EXPECT_EQ(imageMocs, surfaceState->getMemoryObjectControlState()); std::vector surfaces; pKernel->getResidency(surfaces); EXPECT_EQ(1u, surfaces.size()); for (auto &surface : surfaces) { delete surface; } } typedef ImageSetArgTest ImageShaderChannelValueTest; HWTEST_F(ImageShaderChannelValueTest, GivenChannelAWhenGettingShaderChannelValueThenOutputChannelIsCorrect) { typedef typename FamilyType::RENDER_SURFACE_STATE SURFACE_STATE; int outputChannel = 0; int inputChannel = 0; inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_RED; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_A); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_GREEN; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_A); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_BLUE; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_A); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_A); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA, outputChannel); } HWTEST_F(ImageShaderChannelValueTest, GivenChannelRaWhenGettingShaderChannelValueThenOutputChannelIsCorrect) { typedef typename FamilyType::RENDER_SURFACE_STATE SURFACE_STATE; int outputChannel = 0; int inputChannel = 0; inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_GREEN; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_R); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_BLUE; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_R); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_RED; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_R); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_RED, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_R); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_GREEN; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_RA); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_BLUE; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_RA); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_RED; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_RA); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_RED, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_RA); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_GREEN; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_Rx); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_BLUE; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_Rx); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_RED; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_Rx); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_RED, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_Rx); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA, outputChannel); } HWTEST_F(ImageShaderChannelValueTest, GivenChannelRgaWhenGettingShaderChannelValueThenOutputChannelIsCorrect) { typedef typename FamilyType::RENDER_SURFACE_STATE SURFACE_STATE; int outputChannel = 0; int inputChannel = 0; inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_BLUE; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_RG); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_RG); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_RED; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_RG); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_RED, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_GREEN; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_RG); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_GREEN, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_BLUE; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_RGx); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_RGx); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_RED; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_RGx); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_RED, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_GREEN; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_RGx); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_GREEN, outputChannel); } HWTEST_F(ImageShaderChannelValueTest, GivenChannelRgbaWhenGettingShaderChannelValueThenOutputChannelIsCorrect) { typedef typename FamilyType::RENDER_SURFACE_STATE SURFACE_STATE; int outputChannel = 0; int inputChannel = 0; inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_RGBA); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_RED; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_RGBA); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_RED, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_GREEN; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_RGBA); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_GREEN, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_BLUE; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_RGBA); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_BLUE, outputChannel); } HWTEST_F(ImageSetArgTest, givenImageWithOffsetGreaterThan4GBWhenSurfaceStateIsProgrammedThenCorrectStataBaseAddressIsSet) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState; uint64_t surfaceOffset = 8 * GB; srcImage->setSurfaceOffsets(surfaceOffset, 0, 0, 0); srcImage->setImageArg(&surfaceState, false, 0, pClDevice->getRootDeviceIndex(), false); auto expectedAddress = srcAllocation->getGpuAddress() + surfaceOffset; auto surfaceAddress = surfaceState.getSurfaceBaseAddress(); EXPECT_EQ(expectedAddress, surfaceAddress); } HWTEST_F(ImageSetArgTest, givenMediaCompressedResourceThenSurfaceModeIsNone) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; RENDER_SURFACE_STATE surfaceState; auto gmm = srcAllocation->getDefaultGmm(); gmm->gmmResourceInfo->getResourceFlags()->Info.MediaCompressed = true; gmm->isCompressionEnabled = true; srcImage->setImageArg(&surfaceState, false, 0, pClDevice->getRootDeviceIndex(), false); EXPECT_EQ(surfaceState.getAuxiliarySurfaceMode(), AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE); } compute-runtime-22.14.22890/opencl/test/unit_test/mem_obj/image_snorm_tests.cpp000066400000000000000000000065721422164147700274770ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/surface_formats.h" #include "opencl/source/mem_obj/image.h" #include "gtest/gtest.h" #include using namespace NEO; const cl_mem_flags flagsForTests[] = {CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY, CL_MEM_READ_WRITE}; const ArrayRef paramsForSnormTests[] = { SurfaceFormats::readOnly12(), SurfaceFormats::readOnly20(), SurfaceFormats::writeOnly(), SurfaceFormats::readWrite()}; const std::array referenceSnormSurfaceFormats = {{ // clang-format off {{CL_R, CL_SNORM_INT8}, {GMM_FORMAT_R8_SNORM_TYPE, GFX3DSTATE_SURFACEFORMAT_R8_SNORM, 0, 1, 1, 1}}, {{CL_R, CL_SNORM_INT16}, {GMM_FORMAT_R16_SNORM_TYPE, GFX3DSTATE_SURFACEFORMAT_R16_SNORM, 0, 1, 2, 2}}, {{CL_RG, CL_SNORM_INT8}, {GMM_FORMAT_R8G8_SNORM_TYPE, GFX3DSTATE_SURFACEFORMAT_R8G8_SNORM, 0, 2, 1, 2}}, {{CL_RG, CL_SNORM_INT16}, {GMM_FORMAT_R16G16_SNORM_TYPE, GFX3DSTATE_SURFACEFORMAT_R16G16_SNORM, 0, 2, 2, 4}}, {{CL_RGBA, CL_SNORM_INT8}, {GMM_FORMAT_R8G8B8A8_SNORM_TYPE, GFX3DSTATE_SURFACEFORMAT_R8G8B8A8_SNORM, 0, 4, 1, 4}}, {{CL_RGBA, CL_SNORM_INT16}, {GMM_FORMAT_R16G16B16A16_SNORM_TYPE, GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_SNORM, 0, 4, 2, 8}}, // clang-format on }}; using SnormSurfaceFormatAccessFlagsTests = ::testing::TestWithParam; TEST_P(SnormSurfaceFormatAccessFlagsTests, givenSnormFormatWhenGetSurfaceFormatFromTableIsCalledThenReturnsCorrectFormat) { EXPECT_EQ(6u, referenceSnormSurfaceFormats.size()); cl_mem_flags flags = GetParam(); for (const auto &snormSurfaceFormat : referenceSnormSurfaceFormats) { auto format = Image::getSurfaceFormatFromTable(flags, &snormSurfaceFormat.OCLImageFormat, false /* supportsOcl20Features */); EXPECT_NE(nullptr, format); EXPECT_TRUE(memcmp(&snormSurfaceFormat, format, sizeof(ClSurfaceFormatInfo)) == 0); } for (const auto &snormSurfaceFormat : referenceSnormSurfaceFormats) { auto format = Image::getSurfaceFormatFromTable(flags, &snormSurfaceFormat.OCLImageFormat, true /* supportsOcl20Features */); EXPECT_NE(nullptr, format); EXPECT_TRUE(memcmp(&snormSurfaceFormat, format, sizeof(ClSurfaceFormatInfo)) == 0); } } using SnormSurfaceFormatTests = ::testing::TestWithParam>; TEST_P(SnormSurfaceFormatTests, givenSnormOclFormatWhenCheckingrReadOnlySurfaceFormatsThenFindExactCount) { ArrayRef formatsTable = GetParam(); size_t snormFormatsFound = 0; for (const auto &format : formatsTable) { auto oclFormat = format.OCLImageFormat; if (CL_SNORM_INT8 == oclFormat.image_channel_data_type || CL_SNORM_INT16 == oclFormat.image_channel_data_type) { EXPECT_TRUE(oclFormat.image_channel_order == CL_R || oclFormat.image_channel_order == CL_RG || oclFormat.image_channel_order == CL_RGBA); snormFormatsFound++; } } EXPECT_EQ(6u, snormFormatsFound); } INSTANTIATE_TEST_CASE_P( ImageSnormTests, SnormSurfaceFormatAccessFlagsTests, ::testing::ValuesIn(flagsForTests)); INSTANTIATE_TEST_CASE_P( ImageSnormTests, SnormSurfaceFormatTests, ::testing::ValuesIn(paramsForSnormTests)); compute-runtime-22.14.22890/opencl/test/unit_test/mem_obj/image_tests.cpp000066400000000000000000002156721422164147700262640ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/image/image_surface_state.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/common/fixtures/memory_management_fixture.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/kernel_binary_helper.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/mocks/mock_gmm.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/common/test_macros/test_checks_shared.h" #include "opencl/source/helpers/mipmap.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/mem_obj/mem_obj_helper.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "opencl/test/unit_test/mem_obj/image_compression_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_platform.h" using namespace NEO; static const unsigned int testImageDimensions = 45; auto channelType = CL_UNORM_INT8; auto channelOrder = CL_RGBA; auto const elementSize = 4; //sizeof CL_RGBA * CL_UNORM_INT8 class CreateImageTest : public ClDeviceFixture, public testing::TestWithParam, public CommandQueueHwFixture { typedef CommandQueueHwFixture CommandQueueFixture; public: CreateImageTest() { } Image *createImageWithFlags(cl_mem_flags flags) { return createImageWithFlags(flags, context); } Image *createImageWithFlags(cl_mem_flags flags, Context *context) { auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); return Image::create(context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal); } protected: void SetUp() override { ClDeviceFixture::SetUp(); CommandQueueFixture::SetUp(pClDevice, 0); flags = GetParam(); // clang-format off imageFormat.image_channel_data_type = channelType; imageFormat.image_channel_order = channelOrder; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = testImageDimensions; imageDesc.image_height = testImageDimensions; imageDesc.image_depth = 0; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; // clang-format on } void TearDown() override { CommandQueueFixture::TearDown(); ClDeviceFixture::TearDown(); } cl_image_format imageFormat; cl_image_desc imageDesc; cl_int retVal = CL_SUCCESS; cl_mem_flags flags = 0; unsigned char pHostPtr[testImageDimensions * testImageDimensions * elementSize * 4]; }; typedef CreateImageTest CreateImageNoHostPtr; TEST(TestSliceAndRowPitch, Given1dImageWithZeroRowPitchAndZeroSlicePitchWhenGettingHostPtrSlicePitchAndRowPitchThenCorrectValuesAreReturned) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceLinearImages.set(true); cl_image_format imageFormat; cl_image_desc imageDesc; cl_int retVal; MockContext context; const size_t width = 5; const size_t height = 3; const size_t depth = 2; char *hostPtr = (char *)alignedMalloc(width * height * depth * elementSize * 2, 64); imageFormat.image_channel_data_type = channelType; imageFormat.image_channel_order = channelOrder; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; // 1D image with 0 row_pitch and 0 slice_pitch imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_width = width; imageDesc.image_height = 0; imageDesc.image_depth = 0; imageDesc.image_array_size = 0; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto image = Image::create( &context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, hostPtr, retVal); ASSERT_NE(nullptr, image); EXPECT_EQ(width * elementSize, image->getHostPtrRowPitch()); EXPECT_EQ(0u, image->getHostPtrSlicePitch()); delete image; alignedFree(hostPtr); } TEST(TestSliceAndRowPitch, Given1dImageWithNonZeroRowPitchAndZeroSlicePitchWhenGettingHostPtrSlicePitchAndRowPitchThenCorrectValuesAreReturned) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceLinearImages.set(true); cl_image_format imageFormat; cl_image_desc imageDesc; cl_int retVal; MockContext context; const size_t width = 5; const size_t height = 3; const size_t depth = 2; char *hostPtr = (char *)alignedMalloc(width * height * depth * elementSize * 2, 64); imageFormat.image_channel_data_type = channelType; imageFormat.image_channel_order = channelOrder; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; // 1D image with non-zero row_pitch and 0 slice_pitch imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_width = width; imageDesc.image_height = 0; imageDesc.image_depth = 0; imageDesc.image_array_size = 0; imageDesc.image_row_pitch = (width + 1) * elementSize; imageDesc.image_slice_pitch = 0; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto image = Image::create( &context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, hostPtr, retVal); ASSERT_NE(nullptr, image); EXPECT_EQ((width + 1) * elementSize, image->getHostPtrRowPitch()); EXPECT_EQ(0u, image->getHostPtrSlicePitch()); delete image; alignedFree(hostPtr); } TEST(TestSliceAndRowPitch, Given2dImageWithNonZeroRowPitchAndZeroSlicePitchWhenGettingHostPtrSlicePitchAndRowPitchThenCorrectValuesAreReturned) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceLinearImages.set(true); cl_image_format imageFormat; cl_image_desc imageDesc; cl_int retVal; MockContext context; const size_t width = 5; const size_t height = 3; const size_t depth = 2; char *hostPtr = (char *)alignedMalloc(width * height * depth * elementSize * 2, 64); imageFormat.image_channel_data_type = channelType; imageFormat.image_channel_order = channelOrder; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; // 2D image with non-zero row_pitch and 0 slice_pitch imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = width; imageDesc.image_height = height; imageDesc.image_depth = 0; imageDesc.image_array_size = 0; imageDesc.image_row_pitch = (width + 1) * elementSize; imageDesc.image_slice_pitch = 0; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto image = Image::create( &context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, hostPtr, retVal); ASSERT_NE(nullptr, image); EXPECT_EQ((width + 1) * elementSize, image->getHostPtrRowPitch()); EXPECT_EQ(0u, image->getHostPtrSlicePitch()); delete image; alignedFree(hostPtr); } TEST(TestSliceAndRowPitch, Given1dArrayWithNonZeroRowPitchAndZeroSlicePitchWhenGettingHostPtrSlicePitchAndRowPitchThenCorrectValuesAreReturned) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceLinearImages.set(true); cl_image_format imageFormat; cl_image_desc imageDesc; cl_int retVal; MockContext context; const size_t width = 5; const size_t height = 3; const size_t depth = 2; char *hostPtr = (char *)alignedMalloc(width * height * depth * elementSize * 2, 64); imageFormat.image_channel_data_type = channelType; imageFormat.image_channel_order = channelOrder; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; // 1D ARRAY image with non-zero row_pitch and 0 slice_pitch imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY; imageDesc.image_width = width; imageDesc.image_height = 0; imageDesc.image_depth = 0; imageDesc.image_array_size = 2; imageDesc.image_row_pitch = (width + 1) * elementSize; imageDesc.image_slice_pitch = 0; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto image = Image::create( &context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, hostPtr, retVal); ASSERT_NE(nullptr, image); EXPECT_EQ((width + 1) * elementSize, image->getHostPtrRowPitch()); EXPECT_EQ((width + 1) * elementSize, image->getHostPtrSlicePitch()); delete image; alignedFree(hostPtr); } TEST(TestSliceAndRowPitch, Given2dArrayWithNonZeroRowPitchAndZeroSlicePitchWhenGettingHostPtrSlicePitchAndRowPitchThenCorrectValuesAreReturned) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceLinearImages.set(true); cl_image_format imageFormat; cl_image_desc imageDesc; cl_int retVal; MockContext context; const size_t width = 5; const size_t height = 3; const size_t depth = 2; char *hostPtr = (char *)alignedMalloc(width * height * depth * elementSize * 2, 64); imageFormat.image_channel_data_type = channelType; imageFormat.image_channel_order = channelOrder; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; // 2D ARRAY image with non-zero row_pitch and 0 slice_pitch imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY; imageDesc.image_width = width; imageDesc.image_height = height; imageDesc.image_depth = 0; imageDesc.image_array_size = 2; imageDesc.image_row_pitch = (width + 1) * elementSize; imageDesc.image_slice_pitch = 0; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto image = Image::create( &context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, hostPtr, retVal); ASSERT_NE(nullptr, image); EXPECT_EQ((width + 1) * elementSize, image->getHostPtrRowPitch()); EXPECT_EQ((width + 1) * elementSize * height, image->getHostPtrSlicePitch()); delete image; alignedFree(hostPtr); } TEST(TestSliceAndRowPitch, Given2dArrayWithZeroRowPitchAndNonZeroSlicePitchWhenGettingHostPtrSlicePitchAndRowPitchThenCorrectValuesAreReturned) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceLinearImages.set(true); cl_image_format imageFormat; cl_image_desc imageDesc; cl_int retVal; MockContext context; const size_t width = 5; const size_t height = 3; const size_t depth = 2; char *hostPtr = (char *)alignedMalloc(width * height * depth * elementSize * 2, 64); imageFormat.image_channel_data_type = channelType; imageFormat.image_channel_order = channelOrder; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; // 2D ARRAY image with zero row_pitch and non-zero slice_pitch imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY; imageDesc.image_width = width; imageDesc.image_height = height; imageDesc.image_depth = 0; imageDesc.image_array_size = 2; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = (width + 1) * elementSize * height; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto image = Image::create( &context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, hostPtr, retVal); ASSERT_NE(nullptr, image); EXPECT_EQ(width * elementSize, image->getHostPtrRowPitch()); EXPECT_EQ((width + 1) * elementSize * height, image->getHostPtrSlicePitch()); delete image; alignedFree(hostPtr); } TEST(TestSliceAndRowPitch, Given2dArrayWithNonZeroRowPitchAndNonZeroSlicePitchWhenGettingHostPtrSlicePitchAndRowPitchThenCorrectValuesAreReturned) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceLinearImages.set(true); cl_image_format imageFormat; cl_image_desc imageDesc; cl_int retVal; MockContext context; const size_t width = 5; const size_t height = 3; const size_t depth = 2; char *hostPtr = (char *)alignedMalloc(width * height * depth * elementSize * 2, 64); imageFormat.image_channel_data_type = channelType; imageFormat.image_channel_order = channelOrder; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; // 2D ARRAY image with non-zero row_pitch and non-zero slice_pitch imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY; imageDesc.image_width = width; imageDesc.image_height = height; imageDesc.image_depth = 0; imageDesc.image_array_size = 2; imageDesc.image_row_pitch = (width + 1) * elementSize; imageDesc.image_slice_pitch = (width + 1) * elementSize * height; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto image = Image::create( &context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, hostPtr, retVal); ASSERT_NE(nullptr, image); EXPECT_EQ((width + 1) * elementSize, image->getHostPtrRowPitch()); EXPECT_EQ((width + 1) * elementSize * height, image->getHostPtrSlicePitch()); delete image; alignedFree(hostPtr); } TEST(TestSliceAndRowPitch, Given2dArrayWithNonZeroRowPitchAndNonZeroSlicePitchGreaterThanRowPitchTimesHeightWhenGettingHostPtrSlicePitchAndRowPitchThenCorrectValuesAreReturned) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceLinearImages.set(true); cl_image_format imageFormat; cl_image_desc imageDesc; cl_int retVal; MockContext context; const size_t width = 5; const size_t height = 3; const size_t depth = 2; char *hostPtr = (char *)alignedMalloc(width * height * depth * elementSize * 2, 64); imageFormat.image_channel_data_type = channelType; imageFormat.image_channel_order = channelOrder; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; // 2D ARRAY image with non-zero row_pitch and non-zero slice_pitch > row_pitch * height imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY; imageDesc.image_width = width; imageDesc.image_height = height; imageDesc.image_depth = 0; imageDesc.image_array_size = 2; imageDesc.image_row_pitch = (width + 1) * elementSize; imageDesc.image_slice_pitch = (width + 1) * elementSize * (height + 1); cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto image = Image::create( &context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, hostPtr, retVal); ASSERT_NE(nullptr, image); EXPECT_EQ((width + 1) * elementSize, image->getHostPtrRowPitch()); EXPECT_EQ((width + 1) * elementSize * (height + 1), image->getHostPtrSlicePitch()); delete image; alignedFree(hostPtr); } TEST(TestCreateImage, GivenSharedContextWhenImageIsCreatedThenRowAndSliceAreCorrect) { cl_image_format imageFormat; cl_image_desc imageDesc; cl_int retVal; MockContext context; context.isSharedContext = true; const size_t width = 5; const size_t height = 3; const size_t depth = 2; char *hostPtr = (char *)alignedMalloc(width * height * depth * elementSize * 2, 64); imageFormat.image_channel_data_type = channelType; imageFormat.image_channel_order = channelOrder; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; // 2D image with non-zero row_pitch and 0 slice_pitch imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = width; imageDesc.image_height = height; imageDesc.image_depth = 0; imageDesc.image_array_size = 0; imageDesc.image_row_pitch = (width + 1) * elementSize; imageDesc.image_slice_pitch = 0; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto image = Image::create( &context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, hostPtr, retVal); ASSERT_NE(nullptr, image); EXPECT_EQ((width + 1) * elementSize, image->getHostPtrRowPitch()); EXPECT_EQ(0u, image->getHostPtrSlicePitch()); EXPECT_TRUE(image->isMemObjZeroCopy()); delete image; alignedFree(hostPtr); } TEST(TestCreateImageUseHostPtr, GivenDifferenHostPtrAlignmentsWhenCheckingMemoryALignmentThenCorrectValueIsReturned) { KernelBinaryHelper kbHelper(KernelBinaryHelper::BUILT_INS_WITH_IMAGES); cl_image_format imageFormat; cl_image_desc imageDesc; cl_int retVal; MockContext context; const size_t width = 4; const size_t height = 32; imageFormat.image_channel_data_type = channelType; imageFormat.image_channel_order = channelOrder; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; // 2D image with 0 row_pitch and 0 slice_pitch imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = width; imageDesc.image_height = height; imageDesc.image_depth = 0; imageDesc.image_array_size = 0; imageDesc.image_row_pitch = alignUp(alignUp(width, 4) * 4, 0x80); //row pitch for tiled img imageDesc.image_slice_pitch = 0; void *pageAlignedPointer = alignedMalloc(imageDesc.image_row_pitch * height * 1 * 4 + 256, 4096); void *hostPtr[] = {ptrOffset(pageAlignedPointer, 16), // 16 - byte alignment ptrOffset(pageAlignedPointer, 32), // 32 - byte alignment ptrOffset(pageAlignedPointer, 64), // 64 - byte alignment ptrOffset(pageAlignedPointer, 128)}; // 128 - byte alignment bool result[] = {false, false, true, true}; cl_mem_flags flags = CL_MEM_HOST_NO_ACCESS | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); for (int i = 0; i < 4; i++) { auto image = Image::create( &context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, hostPtr[i], retVal); ASSERT_NE(nullptr, image); auto address = image->getCpuAddress(); if (result[i] && image->isMemObjZeroCopy()) { EXPECT_EQ(hostPtr[i], address); } else { EXPECT_NE(hostPtr[i], address); } delete image; } alignedFree(pageAlignedPointer); } TEST(TestCreateImageUseHostPtr, givenZeroCopyImageValuesWhenUsingHostPtrThenZeroCopyImageIsCreated) { cl_int retVal = CL_SUCCESS; MockContext context; cl_image_desc imageDesc = {}; imageDesc.image_width = 4096; imageDesc.image_height = 1; imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; cl_image_format imageFormat = {}; imageFormat.image_channel_data_type = CL_UNSIGNED_INT8; imageFormat.image_channel_order = CL_R; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto hostPtr = alignedMalloc(imageDesc.image_width * surfaceFormat->surfaceFormat.ImageElementSizeInBytes, MemoryConstants::cacheLineSize); auto image = std::unique_ptr(Image::create( &context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, hostPtr, retVal)); auto allocation = image->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex()); EXPECT_NE(nullptr, image); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(image->isMemObjZeroCopy()); EXPECT_EQ(hostPtr, allocation->getUnderlyingBuffer()); EXPECT_EQ(nullptr, image->getMapAllocation(context.getDevice(0)->getRootDeviceIndex())); alignedFree(hostPtr); } TEST_P(CreateImageNoHostPtr, GivenMissingPitchWhenImageIsCreatedThenConstructorFillsMissingData) { auto image = createImageWithFlags(flags); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, image); const auto &imageDesc = image->getImageDesc(); // Sometimes the user doesn't pass image_row/slice_pitch during a create. // Ensure the driver fills in the missing data. EXPECT_NE(0u, imageDesc.image_row_pitch); EXPECT_GE(imageDesc.image_slice_pitch, imageDesc.image_row_pitch); delete image; } TEST_P(CreateImageNoHostPtr, whenImageIsCreatedThenItHasProperAccessAndCacheProperties) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(2); auto context = std::make_unique(); auto image = createImageWithFlags(flags, context.get()); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, image); auto allocation = image->getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex()); EXPECT_TRUE(allocation->getAllocationType() == AllocationType::IMAGE); auto isImageWritable = !(flags & (CL_MEM_READ_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)); EXPECT_EQ(isImageWritable, allocation->isMemObjectsAllocationWithWritableFlags()); auto isReadOnly = isValueSet(flags, CL_MEM_READ_ONLY); EXPECT_NE(isReadOnly, allocation->isFlushL3Required()); delete image; } // Parameterized test that tests image creation with all flags that should be // valid with a nullptr host ptr static cl_mem_flags NoHostPtrFlags[] = { CL_MEM_READ_WRITE, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY, CL_MEM_HOST_READ_ONLY, CL_MEM_HOST_WRITE_ONLY, CL_MEM_HOST_NO_ACCESS}; INSTANTIATE_TEST_CASE_P( CreateImageTest_Create, CreateImageNoHostPtr, testing::ValuesIn(NoHostPtrFlags)); struct CreateImageHostPtr : public CreateImageTest, public MemoryManagementFixture { typedef CreateImageTest BaseClass; CreateImageHostPtr() { } void SetUp() override { MemoryManagementFixture::SetUp(); BaseClass::SetUp(); } void TearDown() override { delete image; BaseClass::TearDown(); platformsImpl->clear(); MemoryManagementFixture::TearDown(); } Image *createImage(cl_int &retVal) { auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); return Image::create( context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, pHostPtr, retVal); } cl_int retVal = CL_INVALID_VALUE; Image *image = nullptr; }; TEST_P(CreateImageHostPtr, WhenImageIsCreatedThenResidencyIsFalse) { image = createImage(retVal); ASSERT_NE(nullptr, image); auto allocation = image->getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex()); EXPECT_FALSE(allocation->isResident(pDevice->getDefaultEngine().osContext->getContextId())); } TEST_P(CreateImageHostPtr, WhenCheckingAddressThenAlllocationDependsOnSizeRelativeToPage) { image = createImage(retVal); auto allocation = image->getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex()); ASSERT_NE(nullptr, image); auto address = image->getBasePtrForMap(0); EXPECT_NE(nullptr, address); if (!(flags & CL_MEM_USE_HOST_PTR)) { EXPECT_EQ(nullptr, image->getHostPtr()); } if (flags & CL_MEM_USE_HOST_PTR) { //if size fits within a page then zero copy can be applied, if not RT needs to do a copy of image auto computedSize = imageDesc.image_width * elementSize * alignUp(imageDesc.image_height, 4) * imageDesc.image_array_size; auto ptrSize = imageDesc.image_width * elementSize * imageDesc.image_height * imageDesc.image_array_size; auto alignedRequiredSize = alignSizeWholePage(static_cast(pHostPtr), computedSize); auto alignedPtrSize = alignSizeWholePage(static_cast(pHostPtr), ptrSize); size_t HalignReq = imageDesc.image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ? 64 : 1; auto rowPitch = imageDesc.image_width * elementSize; auto slicePitch = rowPitch * imageDesc.image_height; auto requiredRowPitch = alignUp(imageDesc.image_width, HalignReq) * elementSize; auto requiredSlicePitch = requiredRowPitch * alignUp(imageDesc.image_height, 4); bool copyRequired = (alignedRequiredSize > alignedPtrSize) | (requiredRowPitch != rowPitch) | (slicePitch != requiredSlicePitch); EXPECT_EQ(pHostPtr, address); EXPECT_EQ(pHostPtr, image->getHostPtr()); if (copyRequired) { EXPECT_FALSE(image->isMemObjZeroCopy()); } } else { EXPECT_NE(pHostPtr, address); } if (flags & CL_MEM_COPY_HOST_PTR && image->isMemObjZeroCopy()) { // Buffer should contain a copy of host memory EXPECT_EQ(0, memcmp(pHostPtr, allocation->getUnderlyingBuffer(), sizeof(testImageDimensions))); } } TEST_P(CreateImageHostPtr, WhenGettingImageDescThenCorrectValuesAreReturned) { image = createImage(retVal); ASSERT_NE(nullptr, image); auto allocation = image->getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex()); const auto &imageDesc = image->getImageDesc(); // clang-format off EXPECT_EQ(this->imageDesc.image_type, imageDesc.image_type); EXPECT_EQ(this->imageDesc.image_width, imageDesc.image_width); EXPECT_EQ(this->imageDesc.image_height, imageDesc.image_height); EXPECT_EQ(this->imageDesc.image_depth, imageDesc.image_depth); EXPECT_EQ(0u, imageDesc.image_array_size); EXPECT_NE(0u, imageDesc.image_row_pitch); EXPECT_GE(imageDesc.image_slice_pitch, imageDesc.image_row_pitch); EXPECT_EQ(this->imageDesc.num_mip_levels, imageDesc.num_mip_levels); EXPECT_EQ(this->imageDesc.num_samples, imageDesc.num_samples); EXPECT_EQ(this->imageDesc.buffer, imageDesc.buffer); EXPECT_EQ(this->imageDesc.mem_object, imageDesc.mem_object); // clang-format on EXPECT_EQ(image->getHostPtrRowPitch(), static_cast(imageDesc.image_width * elementSize)); // Only 3D, and array images can have slice pitch int isArrayOr3DType = 0; if (this->imageDesc.image_type == CL_MEM_OBJECT_IMAGE3D || this->imageDesc.image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY || this->imageDesc.image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) { isArrayOr3DType = 1; } EXPECT_EQ(image->getHostPtrSlicePitch(), static_cast(imageDesc.image_width * elementSize * imageDesc.image_height) * isArrayOr3DType); EXPECT_EQ(image->getImageCount(), 1u); EXPECT_NE(0u, image->getSize()); EXPECT_NE(nullptr, allocation); } TEST_P(CreateImageHostPtr, GivenFailedAllocationInjectionWhenCheckingAllocationThenOnlyFailedAllocationReturnsNull) { InjectedFunction method = [this](size_t failureIndex) { // System under test image = createImage(retVal); if (MemoryManagement::nonfailingAllocation == failureIndex) { EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); } else { EXPECT_EQ(CL_OUT_OF_HOST_MEMORY, retVal) << "for allocation " << failureIndex; EXPECT_EQ(nullptr, image); } delete image; image = nullptr; }; injectFailures(method, 4); // check only first 5 allocations - avoid checks on writeImg call allocations for tiled imgs } TEST_P(CreateImageHostPtr, givenLinearImageWhenFailedAtCreationThenReturnError) { DebugManagerStateRestore restore; DebugManager.flags.ForceLinearImages.set(true); InjectedFunction method = [this](size_t failureIndex) { // System under test image = createImage(retVal); if (MemoryManagement::nonfailingAllocation == failureIndex) { EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); } else { EXPECT_EQ(CL_OUT_OF_HOST_MEMORY, retVal) << "for allocation " << failureIndex; EXPECT_EQ(nullptr, image); } delete image; image = nullptr; }; injectFailures(method, 4); // check only first 5 allocations - avoid checks on writeImg call allocations for tiled imgs } TEST_P(CreateImageHostPtr, WhenWritingOutsideAllocatedMemoryWhileCreatingImageThenWriteIsHandledCorrectly) { auto mockMemoryManager = new MockMemoryManager(*pDevice->executionEnvironment); pDevice->injectMemoryManager(mockMemoryManager); context->memoryManager = mockMemoryManager; mockMemoryManager->redundancyRatio = 2; memset(pHostPtr, 1, testImageDimensions * testImageDimensions * elementSize * 4); imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY; imageDesc.image_height = 1; imageDesc.image_row_pitch = elementSize * imageDesc.image_width + 1; image = createImage(retVal); auto allocation = image->getGraphicsAllocation(pDevice->getRootDeviceIndex()); char *memory = reinterpret_cast(allocation->getUnderlyingBuffer()); auto memorySize = allocation->getUnderlyingBufferSize() / 2; for (size_t i = 0; i < image->getHostPtrSlicePitch(); ++i) { if (i < imageDesc.image_width * elementSize) { EXPECT_EQ(1, memory[i]); } else { EXPECT_EQ(0, memory[i]); } } for (size_t i = 0; i < memorySize; ++i) { EXPECT_EQ(0, memory[memorySize + i]); } mockMemoryManager->redundancyRatio = 1; } struct ModifyableImage { enum { flags = 0 }; static cl_image_format imageFormat; static cl_image_desc imageDesc; static void *hostPtr; static NEO::Context *context; }; void *ModifyableImage::hostPtr = nullptr; NEO::Context *ModifyableImage::context = nullptr; cl_image_format ModifyableImage::imageFormat; cl_image_desc ModifyableImage::imageDesc; class ImageTransfer : public ::testing::Test { public: void SetUp() override { context = new MockContext(); ASSERT_NE(context, nullptr); ModifyableImage::context = context; ModifyableImage::hostPtr = nullptr; ModifyableImage::imageFormat = {CL_R, CL_FLOAT}; ModifyableImage::imageDesc = {CL_MEM_OBJECT_IMAGE1D, 512, 0, 0, 0, 0, 0, 0, 0, {nullptr}}; hostPtr = nullptr; unalignedHostPtr = nullptr; } void TearDown() override { if (context) delete context; if (hostPtr) alignedFree(hostPtr); } void createHostPtrs(size_t imageSize) { hostPtr = alignedMalloc(imageSize + 100, 4096); unalignedHostPtr = (char *)hostPtr + 4; memset(hostPtr, 0, imageSize + 100); memset(unalignedHostPtr, 1, imageSize); } MockContext *context; void *hostPtr; void *unalignedHostPtr; }; TEST_F(ImageTransfer, GivenNonZeroCopyImageWhenDataTransferedFromHostPtrToMemStorageThenNoOverflowOfHostPtr) { size_t imageSize = 512 * 4; createHostPtrs(imageSize); ModifyableImage::imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; ModifyableImage::imageDesc.image_width = 512; ModifyableImage::imageDesc.image_height = 0; ModifyableImage::imageDesc.image_row_pitch = 0; ModifyableImage::imageDesc.image_array_size = 0; ModifyableImage::imageFormat.image_channel_order = CL_R; ModifyableImage::imageFormat.image_channel_data_type = CL_FLOAT; ModifyableImage::hostPtr = unalignedHostPtr; Image *imageNonZeroCopy = ImageHelper>::create(); ASSERT_NE(nullptr, imageNonZeroCopy); void *memoryStorage = imageNonZeroCopy->getCpuAddress(); size_t memoryStorageSize = imageNonZeroCopy->getSize(); ASSERT_NE(memoryStorage, unalignedHostPtr); int result = memcmp(memoryStorage, unalignedHostPtr, imageSize); EXPECT_EQ(0, result); memset(memoryStorage, 0, memoryStorageSize); memset((char *)unalignedHostPtr + imageSize, 2, 100 - 4); auto &imgDesc = imageNonZeroCopy->getImageDesc(); MemObjOffsetArray copyOffset = {{0, 0, 0}}; MemObjSizeArray copySize = {{imgDesc.image_width, imgDesc.image_height, imgDesc.image_depth}}; imageNonZeroCopy->transferDataFromHostPtr(copySize, copyOffset); void *foundData = memchr(memoryStorage, 2, memoryStorageSize); EXPECT_EQ(0, foundData); delete imageNonZeroCopy; } TEST_F(ImageTransfer, GivenNonZeroCopyNonZeroRowPitchImageWhenDataIsTransferedFromHostPtrToMemStorageThenDestinationIsNotOverflowed) { ModifyableImage::imageDesc.image_width = 16; ModifyableImage::imageDesc.image_row_pitch = 65; ModifyableImage::imageFormat.image_channel_data_type = CL_UNORM_INT8; size_t imageSize = ModifyableImage::imageDesc.image_row_pitch; size_t imageWidth = ModifyableImage::imageDesc.image_width; createHostPtrs(imageSize); ModifyableImage::hostPtr = unalignedHostPtr; Image *imageNonZeroCopy = ImageHelper>::create(); ASSERT_NE(nullptr, imageNonZeroCopy); void *memoryStorage = imageNonZeroCopy->getCpuAddress(); size_t memoryStorageSize = imageNonZeroCopy->getSize(); ASSERT_NE(memoryStorage, unalignedHostPtr); int result = memcmp(memoryStorage, unalignedHostPtr, imageWidth); EXPECT_EQ(0, result); memset(memoryStorage, 0, memoryStorageSize); memset((char *)unalignedHostPtr + imageSize, 2, 100 - 4); auto &imgDesc = imageNonZeroCopy->getImageDesc(); MemObjOffsetArray copyOffset = {{0, 0, 0}}; MemObjSizeArray copySize = {{imgDesc.image_width, imgDesc.image_height, imgDesc.image_depth}}; imageNonZeroCopy->transferDataFromHostPtr(copySize, copyOffset); void *foundData = memchr(memoryStorage, 2, memoryStorageSize); EXPECT_EQ(0, foundData); delete imageNonZeroCopy; } TEST_F(ImageTransfer, GivenNonZeroCopyNonZeroRowPitchWithExtraBytes1DArrayImageWhenDataIsTransferedForthAndBackThenDataValidates) { ModifyableImage::imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY; ModifyableImage::imageDesc.image_width = 5; ModifyableImage::imageDesc.image_row_pitch = 28; // == (4 * 5) row bytes + (4 * 2) extra bytes ModifyableImage::imageDesc.image_array_size = 3; ModifyableImage::imageFormat.image_channel_order = CL_RGBA; ModifyableImage::imageFormat.image_channel_data_type = CL_UNORM_INT8; const size_t imageWidth = ModifyableImage::imageDesc.image_width; const size_t imageRowPitchInPixels = ModifyableImage::imageDesc.image_row_pitch / 4; const size_t imageHeight = 1; const size_t imageCount = ModifyableImage::imageDesc.image_array_size; size_t imageSize = ModifyableImage::imageDesc.image_row_pitch * imageHeight * imageCount; createHostPtrs(imageSize); uint32_t *row = static_cast(unalignedHostPtr); for (uint32_t arrayIndex = 0; arrayIndex < imageCount; ++arrayIndex) { for (uint32_t pixelInRow = 0; pixelInRow < imageRowPitchInPixels; ++pixelInRow) { if (pixelInRow < imageWidth) { row[pixelInRow] = pixelInRow; } else { row[pixelInRow] = 66; } } row = row + imageRowPitchInPixels; } ModifyableImage::hostPtr = unalignedHostPtr; Image *imageNonZeroCopy = ImageHelper>::create(); ASSERT_NE(nullptr, imageNonZeroCopy); void *memoryStorage = imageNonZeroCopy->getCpuAddress(); ASSERT_NE(memoryStorage, unalignedHostPtr); size_t internalSlicePitch = imageNonZeroCopy->getImageDesc().image_slice_pitch; // Check twice, once after image create, and second time after transfer from HostPtrToMemoryStorage // when these paths are unified, only one check will be enough for (size_t run = 0; run < 2; ++run) { row = static_cast(unalignedHostPtr); unsigned char *internalRow = static_cast(memoryStorage); if (run == 1) { auto &imgDesc = imageNonZeroCopy->getImageDesc(); MemObjOffsetArray copyOffset = {{0, 0, 0}}; MemObjSizeArray copySize = {{imgDesc.image_width, imgDesc.image_height, imgDesc.image_depth}}; imageNonZeroCopy->transferDataFromHostPtr(copySize, copyOffset); } for (size_t arrayIndex = 0; arrayIndex < imageCount; ++arrayIndex) { for (size_t pixelInRow = 0; pixelInRow < imageRowPitchInPixels; ++pixelInRow) { if (pixelInRow < imageWidth) { if (memcmp(&row[pixelInRow], &internalRow[pixelInRow * 4], 4)) { EXPECT_FALSE(1) << "Data in memory storage did not validate, row: " << pixelInRow << " array: " << arrayIndex << "\n"; } } else { // Change extra bytes pattern row[pixelInRow] = 55; } } row = row + imageRowPitchInPixels; internalRow = internalRow + internalSlicePitch; } } auto &imgDesc = imageNonZeroCopy->getImageDesc(); MemObjOffsetArray copyOffset = {{0, 0, 0}}; MemObjSizeArray copySize = {{imgDesc.image_width, imgDesc.image_height, imgDesc.image_depth}}; imageNonZeroCopy->transferDataToHostPtr(copySize, copyOffset); row = static_cast(unalignedHostPtr); for (size_t arrayIndex = 0; arrayIndex < imageCount; ++arrayIndex) { for (size_t pixelInRow = 0; pixelInRow < imageRowPitchInPixels; ++pixelInRow) { if (pixelInRow < imageWidth) { if (row[pixelInRow] != pixelInRow) { EXPECT_FALSE(1) << "Data under host_ptr did not validate, row: " << pixelInRow << " array: " << arrayIndex << "\n"; } } else { if (row[pixelInRow] != 55) { EXPECT_FALSE(1) << "Data under host_ptr corrupted in extra bytes, row: " << pixelInRow << " array: " << arrayIndex << "\n"; } } } row = row + imageRowPitchInPixels; } delete imageNonZeroCopy; } // Parameterized test that tests image creation with all flags that should be // valid with a valid host ptr static cl_mem_flags ValidHostPtrFlags[] = { 0 | CL_MEM_USE_HOST_PTR, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, CL_MEM_HOST_READ_ONLY | CL_MEM_USE_HOST_PTR, CL_MEM_HOST_WRITE_ONLY | CL_MEM_USE_HOST_PTR, CL_MEM_HOST_NO_ACCESS | CL_MEM_USE_HOST_PTR, 0 | CL_MEM_COPY_HOST_PTR, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, CL_MEM_HOST_READ_ONLY | CL_MEM_COPY_HOST_PTR, CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, CL_MEM_HOST_NO_ACCESS | CL_MEM_COPY_HOST_PTR}; INSTANTIATE_TEST_CASE_P( CreateImageTest_Create, CreateImageHostPtr, testing::ValuesIn(ValidHostPtrFlags)); TEST(ImageGetSurfaceFormatInfoTest, givenNullptrFormatWhenGetSurfaceFormatInfoIsCalledThenReturnsNullptr) { MockContext context; auto surfaceFormat = Image::getSurfaceFormatFromTable(0, nullptr, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); EXPECT_EQ(nullptr, surfaceFormat); } HWTEST_F(ImageCompressionTests, givenTiledImageWhenCreatingAllocationThenPreferCompression) { imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 5; imageDesc.image_height = 5; MockContext context; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto image = std::unique_ptr(Image::create( mockContext.get(), ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); ASSERT_NE(nullptr, image); EXPECT_EQ(UnitTestHelper::tiledImagesSupported, image->isTiledAllocation()); EXPECT_TRUE(myMemoryManager->mockMethodCalled); EXPECT_EQ(UnitTestHelper::tiledImagesSupported, myMemoryManager->capturedPreferCompressed); } TEST_F(ImageCompressionTests, givenNonTiledImageWhenCreatingAllocationThenDontPreferCompression) { imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_width = 5; MockContext context; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto image = std::unique_ptr(Image::create( mockContext.get(), ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); ASSERT_NE(nullptr, image); EXPECT_FALSE(image->isTiledAllocation()); EXPECT_TRUE(myMemoryManager->mockMethodCalled); EXPECT_FALSE(myMemoryManager->capturedPreferCompressed); } HWTEST_F(ImageCompressionTests, givenTiledImageAndVariousFlagsWhenCreatingAllocationThenCorrectlySetPreferCompression) { imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 5; imageDesc.image_height = 5; auto newFlags = flags | CL_MEM_COMPRESSED_HINT_INTEL; MockContext context; auto surfaceFormat = Image::getSurfaceFormatFromTable( newFlags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto image = std::unique_ptr(Image::create( mockContext.get(), ClMemoryPropertiesHelper::createMemoryProperties(newFlags, 0, 0, &context.getDevice(0)->getDevice()), newFlags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); ASSERT_NE(nullptr, image); EXPECT_EQ(UnitTestHelper::tiledImagesSupported, image->isTiledAllocation()); EXPECT_TRUE(myMemoryManager->mockMethodCalled); EXPECT_EQ(UnitTestHelper::tiledImagesSupported, myMemoryManager->capturedPreferCompressed); newFlags = flags | CL_MEM_UNCOMPRESSED_HINT_INTEL; surfaceFormat = Image::getSurfaceFormatFromTable( newFlags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); image = std::unique_ptr(Image::create( mockContext.get(), ClMemoryPropertiesHelper::createMemoryProperties(newFlags, 0, 0, &context.getDevice(0)->getDevice()), newFlags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); ASSERT_NE(nullptr, image); EXPECT_EQ(UnitTestHelper::tiledImagesSupported, image->isTiledAllocation()); EXPECT_TRUE(myMemoryManager->mockMethodCalled); EXPECT_FALSE(myMemoryManager->capturedPreferCompressed); } TEST_F(ImageCompressionTests, givenNonTiledImageAndVariousFlagsWhenCreatingAllocationThenDontPreferCompression) { imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_width = 5; auto newFlags = flags | CL_MEM_COMPRESSED_HINT_INTEL; MockContext context; auto surfaceFormat = Image::getSurfaceFormatFromTable( newFlags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto image = std::unique_ptr(Image::create( mockContext.get(), ClMemoryPropertiesHelper::createMemoryProperties(newFlags, 0, 0, &context.getDevice(0)->getDevice()), newFlags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); ASSERT_NE(nullptr, image); EXPECT_FALSE(image->isTiledAllocation()); EXPECT_TRUE(myMemoryManager->mockMethodCalled); EXPECT_FALSE(myMemoryManager->capturedPreferCompressed); newFlags = flags | CL_MEM_UNCOMPRESSED_HINT_INTEL; surfaceFormat = Image::getSurfaceFormatFromTable( newFlags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); image = std::unique_ptr(Image::create( mockContext.get(), ClMemoryPropertiesHelper::createMemoryProperties(newFlags, 0, 0, &context.getDevice(0)->getDevice()), newFlags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); ASSERT_NE(nullptr, image); EXPECT_FALSE(image->isTiledAllocation()); EXPECT_TRUE(myMemoryManager->mockMethodCalled); EXPECT_FALSE(myMemoryManager->capturedPreferCompressed); } TEST(ImageTest, givenImageWhenGettingCompressionOfImageThenCorrectValueIsReturned) { MockContext context; std::unique_ptr image(ImageHelper::create(&context)); EXPECT_NE(nullptr, image); auto allocation = image->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex()); allocation->getDefaultGmm()->isCompressionEnabled = true; size_t sizeReturned = 0; cl_bool usesCompression; cl_int retVal = CL_SUCCESS; retVal = image->getMemObjectInfo( CL_MEM_USES_COMPRESSION_INTEL, sizeof(cl_bool), &usesCompression, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(sizeof(cl_bool), sizeReturned); EXPECT_TRUE(usesCompression); allocation->getDefaultGmm()->isCompressionEnabled = false; sizeReturned = 0; usesCompression = cl_bool{CL_FALSE}; retVal = image->getMemObjectInfo( CL_MEM_USES_COMPRESSION_INTEL, sizeof(cl_bool), &usesCompression, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(sizeof(cl_bool), sizeReturned); EXPECT_FALSE(usesCompression); } using ImageTests = ::testing::Test; HWTEST_F(ImageTests, givenImageWhenAskedForPtrOffsetForGpuMappingThenReturnCorrectValue) { if (!UnitTestHelper::tiledImagesSupported) { GTEST_SKIP(); } MockContext ctx; std::unique_ptr image(ImageHelper::create(&ctx)); EXPECT_FALSE(image->mappingOnCpuAllowed()); MemObjOffsetArray origin = {{4, 5, 6}}; auto retOffset = image->calculateOffsetForMapping(origin); size_t expectedOffset = image->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes * origin[0] + image->getHostPtrRowPitch() * origin[1] + image->getHostPtrSlicePitch() * origin[2]; EXPECT_EQ(expectedOffset, retOffset); } TEST(ImageTest, givenImageWhenAskedForMcsInfoThenDefaultValuesAreReturned) { MockContext ctx; std::unique_ptr image(ImageHelper::create(&ctx)); auto mcsInfo = image->getMcsSurfaceInfo(); EXPECT_EQ(0u, mcsInfo.multisampleCount); EXPECT_EQ(0u, mcsInfo.qPitch); EXPECT_EQ(0u, mcsInfo.pitch); } TEST(ImageTest, givenImageWhenAskedForPtrOffsetForCpuMappingThenReturnCorrectValue) { DebugManagerStateRestore restore; DebugManager.flags.ForceLinearImages.set(true); MockContext ctx; std::unique_ptr image(ImageHelper::create(&ctx)); EXPECT_TRUE(image->mappingOnCpuAllowed()); MemObjOffsetArray origin = {{4, 5, 6}}; auto retOffset = image->calculateOffsetForMapping(origin); size_t expectedOffset = image->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes * origin[0] + image->getImageDesc().image_row_pitch * origin[1] + image->getImageDesc().image_slice_pitch * origin[2]; EXPECT_EQ(expectedOffset, retOffset); } TEST(ImageTest, given1DArrayImageWhenAskedForPtrOffsetForMappingThenReturnCorrectValue) { MockContext ctx; std::unique_ptr image(ImageHelper::create(&ctx)); MemObjOffsetArray origin = {{4, 5, 0}}; auto retOffset = image->calculateOffsetForMapping(origin); size_t expectedOffset = image->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes * origin[0] + image->getImageDesc().image_slice_pitch * origin[1]; EXPECT_EQ(expectedOffset, retOffset); } HWTEST_F(ImageTests, givenImageWhenAskedForPtrLengthForGpuMappingThenReturnCorrectValue) { if (!UnitTestHelper::tiledImagesSupported) { GTEST_SKIP(); } MockContext ctx; std::unique_ptr image(ImageHelper::create(&ctx)); EXPECT_FALSE(image->mappingOnCpuAllowed()); MemObjSizeArray region = {{4, 5, 6}}; auto retLength = image->calculateMappedPtrLength(region); size_t expectedLength = image->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes * region[0] + image->getHostPtrRowPitch() * region[1] + image->getHostPtrSlicePitch() * region[2]; EXPECT_EQ(expectedLength, retLength); } TEST(ImageTest, givenImageWhenAskedForPtrLengthForCpuMappingThenReturnCorrectValue) { DebugManagerStateRestore restore; DebugManager.flags.ForceLinearImages.set(true); MockContext ctx; std::unique_ptr image(ImageHelper::create(&ctx)); EXPECT_TRUE(image->mappingOnCpuAllowed()); MemObjSizeArray region = {{4, 5, 6}}; auto retLength = image->calculateMappedPtrLength(region); size_t expectedLength = image->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes * region[0] + image->getImageDesc().image_row_pitch * region[1] + image->getImageDesc().image_slice_pitch * region[2]; EXPECT_EQ(expectedLength, retLength); } TEST(ImageTest, givenMipMapImage3DWhenAskedForPtrOffsetForGpuMappingThenReturnOffsetWithSlicePitch) { MockContext ctx; cl_image_desc imageDesc{}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE3D; imageDesc.image_width = 5; imageDesc.image_height = 5; imageDesc.image_depth = 5; imageDesc.num_mip_levels = 2; std::unique_ptr image(ImageHelper::create(&ctx, &imageDesc)); EXPECT_FALSE(image->mappingOnCpuAllowed()); MemObjOffsetArray origin{{1, 1, 1}}; auto retOffset = image->calculateOffsetForMapping(origin); size_t expectedOffset = image->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes * origin[0] + image->getHostPtrRowPitch() * origin[1] + image->getHostPtrSlicePitch() * origin[2]; EXPECT_EQ(expectedOffset, retOffset); } TEST(ImageTest, givenMipMapImage2DArrayWhenAskedForPtrOffsetForGpuMappingThenReturnOffsetWithSlicePitch) { MockContext ctx; cl_image_desc imageDesc{}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY; imageDesc.image_width = 5; imageDesc.image_height = 5; imageDesc.image_array_size = 5; imageDesc.num_mip_levels = 2; std::unique_ptr image(ImageHelper::create(&ctx, &imageDesc)); EXPECT_FALSE(image->mappingOnCpuAllowed()); MemObjOffsetArray origin{{1, 1, 1}}; auto retOffset = image->calculateOffsetForMapping(origin); size_t expectedOffset = image->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes * origin[0] + image->getHostPtrRowPitch() * origin[1] + image->getHostPtrSlicePitch() * origin[2]; EXPECT_EQ(expectedOffset, retOffset); } TEST(ImageTest, givenNonMipMapImage2DArrayWhenAskedForPtrOffsetForGpuMappingThenReturnOffsetWithSlicePitch) { MockContext ctx; cl_image_desc imageDesc{}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY; imageDesc.image_width = 5; imageDesc.image_height = 5; imageDesc.image_array_size = 5; imageDesc.num_mip_levels = 1; std::unique_ptr image(ImageHelper::create(&ctx, &imageDesc)); EXPECT_FALSE(image->mappingOnCpuAllowed()); MemObjOffsetArray origin{{1, 1, 1}}; auto retOffset = image->calculateOffsetForMapping(origin); size_t expectedOffset = image->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes * origin[0] + image->getHostPtrRowPitch() * origin[1] + image->getHostPtrSlicePitch() * origin[2]; EXPECT_EQ(expectedOffset, retOffset); } TEST(ImageTest, givenMipMapImage1DArrayWhenAskedForPtrOffsetForGpuMappingThenReturnOffsetWithSlicePitch) { MockContext ctx; cl_image_desc imageDesc{}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY; imageDesc.image_width = 5; imageDesc.image_array_size = 5; imageDesc.num_mip_levels = 2; std::unique_ptr image(ImageHelper::create(&ctx, &imageDesc)); EXPECT_FALSE(image->mappingOnCpuAllowed()); MemObjOffsetArray origin{{1, 1, 0}}; auto retOffset = image->calculateOffsetForMapping(origin); size_t expectedOffset = image->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes * origin[0] + image->getHostPtrSlicePitch() * origin[1]; EXPECT_EQ(expectedOffset, retOffset); } TEST(ImageTest, givenClMemForceLinearStorageSetWhenCreateImageThenDisallowTiling) { cl_int retVal = CL_SUCCESS; MockContext context; cl_image_desc imageDesc = {}; imageDesc.image_width = 4096; imageDesc.image_height = 1; imageDesc.image_depth = 1; imageDesc.image_type = CL_MEM_OBJECT_IMAGE3D; cl_image_format imageFormat = {}; imageFormat.image_channel_data_type = CL_UNSIGNED_INT8; imageFormat.image_channel_order = CL_R; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_FORCE_LINEAR_STORAGE_INTEL; auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto image = std::unique_ptr(Image::create( &context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); EXPECT_FALSE(image->isTiledAllocation()); EXPECT_NE(nullptr, image); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(ImageTest, givenClMemCopyHostPointerPassedToImageCreateWhenAllocationIsNotInSystemMemoryPoolThenAllocationIsWrittenByEnqueueWriteImage) { REQUIRE_IMAGES_OR_SKIP(defaultHwInfo); ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); auto *memoryManager = new MockMemoryManagerFailFirstAllocation(*executionEnvironment); executionEnvironment->memoryManager.reset(memoryManager); memoryManager->returnBaseAllocateGraphicsMemoryInDevicePool = true; auto device = std::make_unique(MockDevice::create(executionEnvironment, 0)); MockContext ctx(device.get()); char memory[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; auto taskCount = device->getGpgpuCommandStreamReceiver().peekLatestFlushedTaskCount(); cl_int retVal = 0; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR; cl_image_desc imageDesc{}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_width = 1; imageDesc.image_height = 1; imageDesc.image_row_pitch = sizeof(memory); cl_image_format imageFormat = {}; imageFormat.image_channel_data_type = CL_UNSIGNED_INT8; imageFormat.image_channel_order = CL_R; MockContext context; auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); memoryManager->returnAllocateNonSystemGraphicsMemoryInDevicePool = true; std::unique_ptr image( Image::create(&ctx, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, memory, retVal)); EXPECT_NE(nullptr, image); auto taskCountSent = device->getGpgpuCommandStreamReceiver().peekLatestFlushedTaskCount(); EXPECT_LT(taskCount, taskCountSent); } struct ImageConvertTypeTest : public ::testing::Test { void SetUp() override { } void TearDown() override { } std::array, 7> types = {{std::make_pair<>(CL_MEM_OBJECT_IMAGE1D, ImageType::Image1D), std::make_pair<>(CL_MEM_OBJECT_IMAGE2D, ImageType::Image2D), std::make_pair<>(CL_MEM_OBJECT_IMAGE3D, ImageType::Image3D), std::make_pair<>(CL_MEM_OBJECT_IMAGE1D_ARRAY, ImageType::Image1DArray), std::make_pair<>(CL_MEM_OBJECT_IMAGE2D_ARRAY, ImageType::Image2DArray), std::make_pair<>(CL_MEM_OBJECT_IMAGE1D_BUFFER, ImageType::Image1DBuffer), std::make_pair<>(0, ImageType::Invalid)}}; }; TEST_F(ImageConvertTypeTest, givenClMemObjectTypeWhenConvertedThenCorrectImageTypeIsReturned) { for (size_t i = 0; i < types.size(); i++) { EXPECT_EQ(types[i].second, Image::convertType(static_cast(types[i].first))); } } TEST_F(ImageConvertTypeTest, givenImageTypeWhenConvertedThenCorrectClMemObjectTypeIsReturned) { for (size_t i = 0; i < types.size(); i++) { EXPECT_EQ(static_cast(types[i].first), Image::convertType(types[i].second)); } } TEST(ImageConvertDescriptorTest, givenClImageDescWhenConvertedThenCorrectImageDescriptorIsReturned) { cl_image_desc clDesc = {CL_MEM_OBJECT_IMAGE1D, 16, 24, 1, 1, 1024, 2048, 1, 3, {nullptr}}; auto desc = Image::convertDescriptor(clDesc); EXPECT_EQ(ImageType::Image1D, desc.imageType); EXPECT_EQ(clDesc.image_array_size, desc.imageArraySize); EXPECT_EQ(clDesc.image_depth, desc.imageDepth); EXPECT_EQ(clDesc.image_height, desc.imageHeight); EXPECT_EQ(clDesc.image_row_pitch, desc.imageRowPitch); EXPECT_EQ(clDesc.image_slice_pitch, desc.imageSlicePitch); EXPECT_EQ(clDesc.image_width, desc.imageWidth); EXPECT_EQ(clDesc.num_mip_levels, desc.numMipLevels); EXPECT_EQ(clDesc.num_samples, desc.numSamples); EXPECT_FALSE(desc.fromParent); cl_mem temporary = reinterpret_cast(0x1234); clDesc.mem_object = temporary; desc = Image::convertDescriptor(clDesc); EXPECT_TRUE(desc.fromParent); } TEST(ImageConvertDescriptorTest, givenImageDescriptorWhenConvertedThenCorrectClImageDescIsReturned) { ImageDescriptor desc = {ImageType::Image2D, 16, 24, 1, 1, 1024, 2048, 1, 3, false}; auto clDesc = Image::convertDescriptor(desc); EXPECT_EQ(clDesc.image_type, static_cast(CL_MEM_OBJECT_IMAGE2D)); EXPECT_EQ(clDesc.image_array_size, desc.imageArraySize); EXPECT_EQ(clDesc.image_depth, desc.imageDepth); EXPECT_EQ(clDesc.image_height, desc.imageHeight); EXPECT_EQ(clDesc.image_row_pitch, desc.imageRowPitch); EXPECT_EQ(clDesc.image_slice_pitch, desc.imageSlicePitch); EXPECT_EQ(clDesc.image_width, desc.imageWidth); EXPECT_EQ(clDesc.num_mip_levels, desc.numMipLevels); EXPECT_EQ(clDesc.num_samples, desc.numSamples); EXPECT_EQ(nullptr, clDesc.mem_object); } TEST(ImageTest, givenImageWhenValidateRegionAndOriginIsCalledThenAdditionalOriginAndRegionCoordinatesAreAnalyzed) { size_t origin[3]{}; size_t region[3]{1, 1, 1}; for (uint32_t imageType : {CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE2D_ARRAY, CL_MEM_OBJECT_IMAGE3D}) { cl_image_desc desc = {}; desc.image_type = imageType; EXPECT_EQ(CL_INVALID_VALUE, Image::validateRegionAndOrigin(origin, region, desc)); desc.image_width = 1; EXPECT_EQ(CL_INVALID_VALUE, Image::validateRegionAndOrigin(origin, region, desc)); desc.image_height = 1; desc.image_depth = 1; desc.image_array_size = 1; EXPECT_EQ(CL_SUCCESS, Image::validateRegionAndOrigin(origin, region, desc)); if (imageType == CL_MEM_OBJECT_IMAGE3D) { desc.image_depth = 0; EXPECT_EQ(CL_INVALID_VALUE, Image::validateRegionAndOrigin(origin, region, desc)); } } } TEST(ImageTest, givenImageArrayWhenValidateRegionAndOriginIsCalledThenAdditionalOriginAndRegionCoordinatesAreAnalyzed) { size_t region[3]{1, 1, 1}; size_t origin[3]{}; cl_image_desc desc = {}; desc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY; desc.image_width = 1; EXPECT_EQ(CL_INVALID_VALUE, Image::validateRegionAndOrigin(origin, region, desc)); desc.image_array_size = 1; EXPECT_EQ(CL_SUCCESS, Image::validateRegionAndOrigin(origin, region, desc)); desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY; desc.image_array_size = 0; desc.image_width = 1; desc.image_height = 1; EXPECT_EQ(CL_INVALID_VALUE, Image::validateRegionAndOrigin(origin, region, desc)); desc.image_array_size = 1; EXPECT_EQ(CL_SUCCESS, Image::validateRegionAndOrigin(origin, region, desc)); } typedef ::testing::TestWithParam MipLevelCoordinateTest; TEST_P(MipLevelCoordinateTest, givenMipmappedImageWhenValidateRegionAndOriginIsCalledThenAdditionalOriginCoordinateIsAnalyzed) { size_t origin[4]{}; size_t region[3]{1, 1, 1}; cl_image_desc desc = {}; desc.image_type = GetParam(); desc.num_mip_levels = 2; desc.image_width = 1; desc.image_height = 1; desc.image_depth = 1; desc.image_array_size = 1; origin[getMipLevelOriginIdx(desc.image_type)] = 1; EXPECT_EQ(CL_SUCCESS, Image::validateRegionAndOrigin(origin, region, desc)); origin[getMipLevelOriginIdx(desc.image_type)] = 2; EXPECT_EQ(CL_INVALID_MIP_LEVEL, Image::validateRegionAndOrigin(origin, region, desc)); } INSTANTIATE_TEST_CASE_P(MipLevelCoordinate, MipLevelCoordinateTest, ::testing::Values(CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE2D_ARRAY, CL_MEM_OBJECT_IMAGE3D)); typedef ::testing::TestWithParam> HasSlicesTest; TEST_P(HasSlicesTest, givenMemObjectTypeWhenHasSlicesIsCalledThenReturnsTrueIfTypeDefinesObjectWithSlicePitch) { auto pair = GetParam(); EXPECT_EQ(pair.second, Image::hasSlices(pair.first)); } INSTANTIATE_TEST_CASE_P(HasSlices, HasSlicesTest, ::testing::Values(std::make_pair(CL_MEM_OBJECT_IMAGE1D, false), std::make_pair(CL_MEM_OBJECT_IMAGE1D_ARRAY, true), std::make_pair(CL_MEM_OBJECT_IMAGE2D, false), std::make_pair(CL_MEM_OBJECT_IMAGE2D_ARRAY, true), std::make_pair(CL_MEM_OBJECT_IMAGE3D, true), std::make_pair(CL_MEM_OBJECT_BUFFER, false), std::make_pair(CL_MEM_OBJECT_PIPE, false))); typedef ::testing::Test ImageTransformTest; HWTEST_F(ImageTransformTest, givenSurfaceStateWhenTransformImage3dTo2dArrayIsCalledThenSurface2dArrayIsSet) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; MockContext context; auto image = std::unique_ptr(ImageHelper::create(&context)); auto surfaceState = FamilyType::cmdInitRenderSurfaceState; auto imageHw = static_cast *>(image.get()); surfaceState.setSurfaceType(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_3D); surfaceState.setSurfaceArray(false); imageHw->transformImage3dTo2dArray(reinterpret_cast(&surfaceState)); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_2D, surfaceState.getSurfaceType()); EXPECT_TRUE(surfaceState.getSurfaceArray()); } HWTEST_F(ImageTransformTest, givenSurfaceStateWhenTransformImage2dArrayTo3dIsCalledThenSurface3dIsSet) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; MockContext context; auto image = std::unique_ptr(ImageHelper::create(&context)); auto surfaceState = FamilyType::cmdInitRenderSurfaceState; auto imageHw = static_cast *>(image.get()); surfaceState.setSurfaceType(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_2D); surfaceState.setSurfaceArray(true); imageHw->transformImage2dArrayTo3d(reinterpret_cast(&surfaceState)); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_3D, surfaceState.getSurfaceType()); EXPECT_FALSE(surfaceState.getSurfaceArray()); } HWTEST_F(ImageTransformTest, givenSurfaceBaseAddressAndUnifiedSurfaceWhenSetUnifiedAuxAddressCalledThenAddressIsSet) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; MockContext context; auto image = std::unique_ptr(ImageHelper::create(&context)); auto surfaceState = FamilyType::cmdInitRenderSurfaceState; auto gmm = std::unique_ptr(new Gmm(context.getDevice(0)->getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); uint64_t surfBsaseAddress = 0xABCDEF1000; surfaceState.setSurfaceBaseAddress(surfBsaseAddress); auto mockResource = reinterpret_cast(gmm->gmmResourceInfo.get()); mockResource->setUnifiedAuxTranslationCapable(); EXPECT_EQ(0u, surfaceState.getAuxiliarySurfaceBaseAddress()); setUnifiedAuxBaseAddress(&surfaceState, gmm.get()); uint64_t offset = gmm->gmmResourceInfo->getUnifiedAuxSurfaceOffset(GMM_UNIFIED_AUX_TYPE::GMM_AUX_SURF); EXPECT_EQ(surfBsaseAddress + offset, surfaceState.getAuxiliarySurfaceBaseAddress()); } TEST(ImageTest, givenImageWhenFillRegionIsCalledThenProperRegionIsSet) { MockContext context; { size_t region[3] = {}; std::unique_ptr image(Image1dHelper<>::create(&context)); image->fillImageRegion(region); EXPECT_EQ(Image1dDefaults::imageDesc.image_width, region[0]); EXPECT_EQ(1u, region[1]); EXPECT_EQ(1u, region[2]); } { size_t region[3] = {}; std::unique_ptr image(Image1dArrayHelper<>::create(&context)); image->fillImageRegion(region); EXPECT_EQ(Image1dArrayDefaults::imageDesc.image_width, region[0]); EXPECT_EQ(Image1dArrayDefaults::imageDesc.image_array_size, region[1]); EXPECT_EQ(1u, region[2]); } { size_t region[3] = {}; std::unique_ptr image(Image1dBufferHelper<>::create(&context)); image->fillImageRegion(region); EXPECT_EQ(Image1dBufferDefaults::imageDesc.image_width, region[0]); EXPECT_EQ(1u, region[1]); EXPECT_EQ(1u, region[2]); } { size_t region[3] = {}; std::unique_ptr image(Image2dHelper<>::create(&context)); image->fillImageRegion(region); EXPECT_EQ(Image2dDefaults::imageDesc.image_width, region[0]); EXPECT_EQ(Image2dDefaults::imageDesc.image_height, region[1]); EXPECT_EQ(1u, region[2]); } { size_t region[3] = {}; std::unique_ptr image(Image2dArrayHelper<>::create(&context)); image->fillImageRegion(region); EXPECT_EQ(Image2dArrayDefaults::imageDesc.image_width, region[0]); EXPECT_EQ(Image2dArrayDefaults::imageDesc.image_height, region[1]); EXPECT_EQ(Image2dArrayDefaults::imageDesc.image_array_size, region[2]); } { size_t region[3] = {}; std::unique_ptr image(Image3dHelper<>::create(&context)); image->fillImageRegion(region); EXPECT_EQ(Image3dDefaults::imageDesc.image_width, region[0]); EXPECT_EQ(Image3dDefaults::imageDesc.image_height, region[1]); EXPECT_EQ(Image3dDefaults::imageDesc.image_depth, region[2]); } } TEST(ImageTest, givenMultiDeviceEnvironmentWhenReleaseImageFromBufferThenMainBufferProperlyDereferenced) { MockDefaultContext context; int32_t retVal; auto *buffer = Buffer::create(&context, CL_MEM_READ_WRITE, MemoryConstants::pageSize, nullptr, retVal); auto imageDesc = Image2dDefaults::imageDesc; cl_mem clBuffer = buffer; imageDesc.mem_object = clBuffer; auto image = Image2dHelper<>::create(&context, &imageDesc); EXPECT_EQ(3u, buffer->getMultiGraphicsAllocation().getGraphicsAllocations().size()); EXPECT_EQ(3u, image->getMultiGraphicsAllocation().getGraphicsAllocations().size()); EXPECT_EQ(2, buffer->getRefInternalCount()); image->release(); EXPECT_EQ(1, buffer->getRefInternalCount()); buffer->release(); } compute-runtime-22.14.22890/opencl/test/unit_test/mem_obj/image_tests_pvc_and_later.cpp000066400000000000000000000114031422164147700311270ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/common/test_macros/test_checks_shared.h" #include "opencl/source/api/api.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_image.h" namespace NEO { extern ImageFactoryFuncs imageFactory[IGFX_MAX_CORE]; } using namespace NEO; using PvcAndLaterImageTests = ::testing::Test; template struct MockImage : public ImageHw { using ImageHw::transferData; using ImageHw::ImageHw; static Image *createMockImage(Context *context, const MemoryProperties &memoryProperties, uint64_t flags, uint64_t flagsIntel, size_t size, void *hostPtr, const cl_image_format &imageFormat, const cl_image_desc &imageDesc, bool zeroCopy, MultiGraphicsAllocation multiGraphicsAllocation, bool isObjectRedescribed, uint32_t baseMipLevel, uint32_t mipCount, const ClSurfaceFormatInfo *surfaceFormatInfo, const SurfaceOffsets *surfaceOffsets) { auto memoryStorage = multiGraphicsAllocation.getDefaultGraphicsAllocation()->getUnderlyingBuffer(); return new MockImage(context, memoryProperties, flags, flagsIntel, size, memoryStorage, hostPtr, imageFormat, imageDesc, zeroCopy, std::move(multiGraphicsAllocation), isObjectRedescribed, baseMipLevel, mipCount, *surfaceFormatInfo, surfaceOffsets); } void transferData(void *dst, size_t dstRowPitch, size_t dstSlicePitch, void *src, size_t srcRowPitch, size_t srcSlicePitch, std::array copyRegion, std::array copyOrigin) override { transferDataDestinationPointers.push_back(dst); ImageHw::transferData(dst, dstRowPitch, dstSlicePitch, src, srcRowPitch, srcSlicePitch, copyRegion, copyOrigin); } std::vector transferDataDestinationPointers; }; HWTEST2_F(PvcAndLaterImageTests, givenNoImagesSupportLocalMemoryEnabledAndCopyHostPtrWhenCreatingLinearImageThenMemoryIsTransferredOverCpu, IsAtLeastXeHpcCore) { DebugManagerStateRestore restore; DebugManager.flags.EnableLocalMemory.set(true); auto eRenderCoreFamily = defaultHwInfo->platform.eRenderCoreFamily; VariableBackup supportsImagesBackup{&defaultHwInfo->capabilityTable.supportsImages, false}; VariableBackup createImageFunctionBackup{&imageFactory[eRenderCoreFamily].createImageFunction}; createImageFunctionBackup = MockImage::createMockImage; uint32_t devicesCount = 1; UltClDeviceFactory clDeviceFactory{devicesCount, 0}; cl_device_id devices[] = {clDeviceFactory.rootDevices[0]}; MockContext context{ClDeviceVector{devices, 1}}; uint8_t imageMemory[5] = {1, 2, 3, 4, 5}; cl_int retVal = CL_INVALID_VALUE; cl_image_format format = {0}; format.image_channel_data_type = CL_UNSIGNED_INT8; format.image_channel_order = CL_R; cl_image_desc desc{0}; desc.image_type = CL_MEM_OBJECT_IMAGE2D; desc.image_width = 5; desc.image_height = 1; desc.image_depth = 1; cl_mem image = clCreateImageWithPropertiesINTEL(&context, nullptr, CL_MEM_COPY_HOST_PTR | CL_MEM_FORCE_LINEAR_STORAGE_INTEL, &format, &desc, imageMemory, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); auto &mockImage = *static_cast *>(image); auto &graphicsAllocations = mockImage.getMultiGraphicsAllocation().getGraphicsAllocations(); EXPECT_FALSE(graphicsAllocations[0]->isLocked()); auto &mockMemoryManager = static_cast(*clDeviceFactory.rootDevices[0]->getMemoryManager()); EXPECT_EQ(graphicsAllocations.size(), mockMemoryManager.lockResourceCalled); EXPECT_EQ(graphicsAllocations.size(), mockMemoryManager.unlockResourceCalled); EXPECT_EQ(devicesCount, mockMemoryManager.lockResourcePointers.size()); EXPECT_EQ(devicesCount, mockImage.transferDataDestinationPointers.size()); EXPECT_EQ(mockMemoryManager.lockResourcePointers[0], mockImage.transferDataDestinationPointers[0]); clReleaseMemObject(image); } compute-runtime-22.14.22890/opencl/test/unit_test/mem_obj/image_tests_tgllp_and_later.cpp000066400000000000000000000032161422164147700314640ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" using namespace NEO; struct ImageTestsTgllAndLater : ClDeviceFixture, testing::Test { void SetUp() override { ClDeviceFixture::SetUp(); context = std::make_unique(pClDevice); srcImage = std::unique_ptr(Image3dHelper<>::create(context.get())); } void TearDown() override { srcImage.reset(); context.reset(); ClDeviceFixture::TearDown(); } std::unique_ptr context{}; std::unique_ptr srcImage{}; }; using TgllpAndLaterMatcher = IsAtLeastProduct; HWTEST2_F(ImageTestsTgllAndLater, givenDepthResourceWhenSettingImageArgThenSetDepthStencilResourceField, TgllpAndLaterMatcher) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState{}; auto &gpuFlags = srcImage->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getDefaultGmm()->gmmResourceInfo->getResourceFlags()->Gpu; gpuFlags.Depth = 0; srcImage->setImageArg(&surfaceState, false, 0, pClDevice->getRootDeviceIndex(), false); EXPECT_FALSE(surfaceState.getDepthStencilResource()); gpuFlags.Depth = 1; srcImage->setImageArg(&surfaceState, false, 0, pClDevice->getRootDeviceIndex(), false); EXPECT_TRUE(surfaceState.getDepthStencilResource()); } compute-runtime-22.14.22890/opencl/test/unit_test/mem_obj/image_tests_xehp_and_later.cpp000066400000000000000000000564651422164147700313240ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/os_interface.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/mocks/mock_gmm.h" #include "shared/test/common/mocks/mock_gmm_client_context.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mem_obj/image_compression_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "test_traits_common.h" #include using namespace NEO; using XeHPAndLaterImageTests = ::testing::Test; HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterImageTests, WhenAppendingSurfaceStateParamsThenDoNothing) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; MockContext context; auto image = std::unique_ptr(ImageHelper::create(&context)); auto surfaceStateBefore = FamilyType::cmdInitRenderSurfaceState; auto surfaceStateAfter = FamilyType::cmdInitRenderSurfaceState; auto imageHw = static_cast *>(image.get()); EXPECT_EQ(0, memcmp(&surfaceStateBefore, &surfaceStateAfter, sizeof(RENDER_SURFACE_STATE))); imageHw->appendSurfaceStateParams(&surfaceStateAfter, context.getDevice(0)->getRootDeviceIndex(), true); EXPECT_EQ(0, memcmp(&surfaceStateBefore, &surfaceStateAfter, sizeof(RENDER_SURFACE_STATE))); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterImageTests, givenCompressionEnabledWhenAppendingSurfaceStateParamsThenProgramCompressionFormat) { MockContext context; auto mockGmmClient = static_cast(context.getDevice(0)->getRootDeviceEnvironment().getGmmClientContext()); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto image = std::unique_ptr(ImageHelper::create(&context)); auto imageHw = static_cast *>(image.get()); mockGmmClient->capturedFormat = GMM_FORMAT_INVALID; auto surfaceState = FamilyType::cmdInitRenderSurfaceState; surfaceState.setAuxiliarySurfaceMode(RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE); imageHw->setImageArg(&surfaceState, false, 0, context.getDevice(0)->getRootDeviceIndex(), false); EXPECT_EQ(0u, surfaceState.getCompressionFormat()); EXPECT_EQ(GMM_FORMAT_INVALID, mockGmmClient->capturedFormat); auto gmm = image->getMultiGraphicsAllocation().getDefaultGraphicsAllocation()->getDefaultGmm(); gmm->isCompressionEnabled = true; surfaceState = FamilyType::cmdInitRenderSurfaceState; imageHw->setImageArg(&surfaceState, false, 0, context.getDevice(0)->getRootDeviceIndex(), false); EXPECT_TRUE(EncodeSurfaceState::isAuxModeEnabled(&surfaceState, gmm)); EXPECT_NE(0u, surfaceState.getCompressionFormat()); EXPECT_EQ(image->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex())->getDefaultGmm()->gmmResourceInfo->getResourceFormat(), mockGmmClient->capturedFormat); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterImageTests, givenCompressionWhenAppendingImageFromBufferThenTwoIsSetAsCompressionFormat) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; MockContext context; uint32_t compressionFormat = context.getDevice(0)->getGmmHelper()->getClientContext()->getSurfaceStateCompressionFormat(GMM_RESOURCE_FORMAT::GMM_FORMAT_GENERIC_8BIT); cl_int retVal = CL_SUCCESS; cl_image_format imageFormat = {}; cl_image_desc imageDesc = {}; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_RGBA; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_height = 128; imageDesc.image_width = 256; imageDesc.mem_object = clCreateBuffer(&context, CL_MEM_READ_WRITE, 128 * 256 * 4, nullptr, &retVal); auto gmm = new Gmm(context.getDevice(0)->getGmmHelper()->getClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true); gmm->isCompressionEnabled = true; auto buffer = castToObject(imageDesc.mem_object); buffer->getGraphicsAllocation(0)->setGmm(gmm, 0); auto surfaceFormat = Image::getSurfaceFormatFromTable( CL_MEM_READ_WRITE, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto image = std::unique_ptr(Image::create( &context, ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_READ_WRITE, 0, 0, &context.getDevice(0)->getDevice()), CL_MEM_READ_WRITE, 0, surfaceFormat, &imageDesc, NULL, retVal)); auto imageHw = static_cast *>(image.get()); auto surfaceState = FamilyType::cmdInitRenderSurfaceState; imageHw->setImageArg(&surfaceState, false, 0, context.getDevice(0)->getRootDeviceIndex(), false); EXPECT_TRUE(EncodeSurfaceState::isAuxModeEnabled(&surfaceState, gmm)); EXPECT_EQ(compressionFormat, surfaceState.getCompressionFormat()); clReleaseMemObject(imageDesc.mem_object); } HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterImageTests, givenImageFromBufferWhenSettingSurfaceStateThenPickCompressionFormatFromDebugVariable) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; DebugManagerStateRestore restorer; uint32_t bufferCompressionFormat = 3; DebugManager.flags.ForceBufferCompressionFormat.set(bufferCompressionFormat); MockContext context; cl_int retVal = CL_SUCCESS; cl_image_format imageFormat = {}; cl_image_desc imageDesc = {}; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_RGBA; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_height = 128; imageDesc.image_width = 256; imageDesc.mem_object = clCreateBuffer(&context, CL_MEM_READ_WRITE, 128 * 256 * 4, nullptr, &retVal); auto gmm = new Gmm(context.getDevice(0)->getGmmHelper()->getClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true); gmm->isCompressionEnabled = true; auto buffer = castToObject(imageDesc.mem_object); buffer->getGraphicsAllocation(0)->setGmm(gmm, 0); auto surfaceFormat = Image::getSurfaceFormatFromTable(CL_MEM_READ_WRITE, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto image = std::unique_ptr(Image::create(&context, ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_READ_WRITE, 0, 0, &context.getDevice(0)->getDevice()), CL_MEM_READ_WRITE, 0, surfaceFormat, &imageDesc, NULL, retVal)); auto imageHw = static_cast *>(image.get()); auto surfaceState = FamilyType::cmdInitRenderSurfaceState; imageHw->setImageArg(&surfaceState, false, 0, context.getDevice(0)->getRootDeviceIndex(), false); EXPECT_TRUE(EncodeSurfaceState::isAuxModeEnabled(&surfaceState, gmm)); EXPECT_EQ(bufferCompressionFormat, surfaceState.getCompressionFormat()); clReleaseMemObject(imageDesc.mem_object); } struct CompressionParamsSupportedMatcher { template static constexpr bool isMatched() { if constexpr (HwMapper::GfxProduct::supportsCmdSet(IGFX_XE_HP_CORE)) { return TestTraits::get()>::surfaceStateCompressionParamsSupported; } return false; } }; HWTEST2_F(XeHPAndLaterImageTests, givenMcsAllocationWhenSetArgIsCalledWithUnifiedAuxCapabilityAndMCSThenProgramAuxFieldsForCcs, CompressionParamsSupportedMatcher) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; MockContext context; McsSurfaceInfo msi = {10, 20, 3}; auto mcsAlloc = context.getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), MemoryConstants::pageSize}); cl_image_desc imgDesc = Image2dDefaults::imageDesc; imgDesc.num_samples = 8; std::unique_ptr image(Image2dHelper<>::create(&context, &imgDesc)); auto surfaceState = FamilyType::cmdInitRenderSurfaceState; auto imageHw = static_cast *>(image.get()); mcsAlloc->setDefaultGmm(new Gmm(context.getDevice(0)->getRootDeviceEnvironment().getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); surfaceState.setSurfaceBaseAddress(0xABCDEF1000); imageHw->setMcsSurfaceInfo(msi); imageHw->setMcsAllocation(mcsAlloc); auto mockResource = static_cast(mcsAlloc->getDefaultGmm()->gmmResourceInfo.get()); mockResource->setUnifiedAuxTranslationCapable(); mockResource->setMultisampleControlSurface(); EXPECT_EQ(0u, surfaceState.getAuxiliarySurfaceBaseAddress()); imageHw->setAuxParamsForMultisamples(&surfaceState); EXPECT_NE(0u, surfaceState.getAuxiliarySurfaceBaseAddress()); EXPECT_EQ(surfaceState.getAuxiliarySurfaceMode(), AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_MCS_LCE); } HWTEST2_F(ImageClearColorFixture, givenImageForXeHPAndLaterWhenClearColorParametersAreSetThenClearColorSurfaceInSurfaceStateIsSet, CompressionParamsSupportedMatcher) { this->setUpImpl(); auto surfaceState = this->getSurfaceState(); surfaceState.setSurfaceBaseAddress(0xABCDEF1000); EXPECT_EQ(false, surfaceState.getClearValueAddressEnable()); EXPECT_EQ(0u, surfaceState.getClearColorAddress()); EXPECT_EQ(0u, surfaceState.getClearColorAddressHigh()); std::unique_ptr> imageHw(static_cast *>(ImageHelper::create(&context))); auto gmm = imageHw->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex())->getDefaultGmm(); gmm->gmmResourceInfo->getResourceFlags()->Gpu.IndirectClearColor = 1; EncodeSurfaceState::setClearColorParams(&surfaceState, gmm); EXPECT_EQ(true, surfaceState.getClearValueAddressEnable()); EXPECT_NE(0u, surfaceState.getClearColorAddress()); EXPECT_NE(0u, surfaceState.getClearColorAddressHigh()); } struct CompressionClearColorAddressMatcher { template static constexpr bool isMatched() { if constexpr (HwMapper::GfxProduct::supportsCmdSet(IGFX_XE_HP_CORE)) { return TestTraits::get()>::clearColorAddressMatcher; } return false; } }; HWTEST2_F(ImageClearColorFixture, givenImageForXeHPAndLaterWhenCanonicalAddresForClearColorIsUsedThenItsConvertedToNonCanonicalForm, CompressionClearColorAddressMatcher) { this->setUpImpl(); auto surfaceState = this->getSurfaceState(); uint64_t canonicalAddress = 0xffffABCDABCDE000; EXPECT_THROW(surfaceState.setClearColorAddressHigh(static_cast(canonicalAddress >> 32)), std::exception); surfaceState.setSurfaceBaseAddress(canonicalAddress); std::unique_ptr> imageHw(static_cast *>(ImageHelper::create(&context))); auto gmm = imageHw->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex())->getDefaultGmm(); gmm->gmmResourceInfo->getResourceFlags()->Gpu.IndirectClearColor = 1; EXPECT_NO_THROW(EncodeSurfaceState::setClearColorParams(&surfaceState, gmm)); uint64_t nonCanonicalAddress = ((static_cast(surfaceState.getClearColorAddressHigh()) << 32) | surfaceState.getClearColorAddress()); EXPECT_EQ(GmmHelper::decanonize(canonicalAddress), nonCanonicalAddress); } HWTEST2_F(XeHPAndLaterImageTests, givenMediaCompressionWhenAppendingNewAllocationThenNotZeroIsSetAsCompressionType, CompressionParamsSupportedMatcher) { MockContext context; using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; auto hwInfo = defaultHwInfo.get(); cl_image_desc imgDesc = Image2dDefaults::imageDesc; imgDesc.num_samples = 8; std::unique_ptr image(Image2dHelper<>::create(&context, &imgDesc)); auto surfaceState = FamilyType::cmdInitRenderSurfaceState; auto imageHw = static_cast *>(image.get()); imageHw->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex())->getDefaultGmm()->gmmResourceInfo->getResourceFlags()->Info.MediaCompressed = true; surfaceState.setAuxiliarySurfaceMode(RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E); EncodeSurfaceState::setImageAuxParamsForCCS(&surfaceState, imageHw->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex())->getDefaultGmm()); imageHw->setImageArg(&surfaceState, false, 0, context.getDevice(0)->getRootDeviceIndex(), false); if (hwInfo->featureTable.flags.ftrFlatPhysCCS) { EXPECT_NE(surfaceState.getCompressionFormat(), GMM_FLATCCS_FORMAT::GMM_FLATCCS_FORMAT_INVALID); } else { EXPECT_NE(surfaceState.getCompressionFormat(), GMM_E2ECOMP_FORMAT::GMM_E2ECOMP_FORMAT_INVALID); } EXPECT_TRUE(surfaceState.getMemoryCompressionEnable()); EXPECT_EQ(surfaceState.getAuxiliarySurfaceMode(), RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE); } HWTEST2_F(XeHPAndLaterImageTests, givenCompressionWhenAppendingNewAllocationThenNotZeroIsSetAsCompressionType, CompressionParamsSupportedMatcher) { MockContext context; using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; auto hwInfo = defaultHwInfo.get(); cl_image_desc imgDesc = Image2dDefaults::imageDesc; imgDesc.num_samples = 8; std::unique_ptr image(Image2dHelper<>::create(&context, &imgDesc)); auto surfaceState = FamilyType::cmdInitRenderSurfaceState; auto imageHw = static_cast *>(image.get()); surfaceState.setAuxiliarySurfaceMode(RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E); auto rootDeviceIndex = context.getDevice(0)->getRootDeviceIndex(); auto gmm = imageHw->getGraphicsAllocation(rootDeviceIndex)->getDefaultGmm(); gmm->gmmResourceInfo->getResourceFlags()->Info.RenderCompressed = true; gmm->isCompressionEnabled = true; auto mcsGmm = new MockGmm(context.getDevice(0)->getGmmClientContext()); mcsGmm->isCompressionEnabled = true; mcsGmm->gmmResourceInfo->getResourceFlags()->Info.RenderCompressed = true; mcsGmm->gmmResourceInfo->getResourceFlags()->Gpu.UnifiedAuxSurface = true; mcsGmm->gmmResourceInfo->getResourceFlags()->Gpu.CCS = true; auto mcsAlloc = context.getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, 1}); mcsAlloc->setDefaultGmm(mcsGmm); imageHw->setMcsAllocation(mcsAlloc); imageHw->setImageArg(&surfaceState, false, 0, rootDeviceIndex, false); if (hwInfo->featureTable.flags.ftrFlatPhysCCS) { EXPECT_NE(surfaceState.getCompressionFormat(), GMM_FLATCCS_FORMAT::GMM_FLATCCS_FORMAT_INVALID); } else { EXPECT_NE(surfaceState.getCompressionFormat(), GMM_E2ECOMP_FORMAT::GMM_E2ECOMP_FORMAT_INVALID); } EXPECT_FALSE(surfaceState.getMemoryCompressionEnable()); EXPECT_EQ(surfaceState.getAuxiliarySurfaceMode(), RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E); } HWTEST2_F(XeHPAndLaterImageTests, givenNoCompressionWhenProgramingImageSurfaceStateThenCompressionIsDisabled, CompressionParamsSupportedMatcher) { MockContext context; using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; cl_image_desc imgDesc = Image2dDefaults::imageDesc; std::unique_ptr image(Image2dHelper<>::create(&context, &imgDesc)); auto surfaceState = FamilyType::cmdInitRenderSurfaceState; surfaceState.setMemoryCompressionEnable(true); surfaceState.setAuxiliarySurfaceMode(RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E); auto imageHw = static_cast *>(image.get()); imageHw->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex())->getDefaultGmm()->isCompressionEnabled = false; imageHw->setImageArg(&surfaceState, false, 0, 0, false); EXPECT_FALSE(surfaceState.getMemoryCompressionEnable()); EXPECT_EQ(surfaceState.getAuxiliarySurfaceMode(), RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE); } struct XeHPAndLaterImageHelperTests : ::testing::Test { void SetUp() override { context = std::make_unique(); image.reset(ImageHelper::create(context.get())); mockGmmResourceInfo = static_cast(image->getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex())->getDefaultGmm()->gmmResourceInfo.get()); gmmClientContext = static_cast(context->getDevice(0)->getGmmHelper()->getClientContext()); } std::unique_ptr context; std::unique_ptr image; MockGmmResourceInfo *mockGmmResourceInfo; MockGmmClientContext *gmmClientContext; uint8_t mockCompressionFormat = 3u; }; HWTEST2_F(XeHPAndLaterImageHelperTests, givenMediaCompressedImageWhenAppendingSurfaceStateParamsForCompressionThenCallAppriopriateFunction, CompressionParamsSupportedMatcher) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE rss{}; platformsImpl->clear(); rss.setMemoryCompressionEnable(true); mockGmmResourceInfo->getResourceFlags()->Info.MediaCompressed = true; gmmClientContext->compressionFormatToReturn = mockCompressionFormat; const auto expectedGetSurfaceStateCompressionFormatCalled = gmmClientContext->getSurfaceStateCompressionFormatCalled; const auto expectedGetMediaSurfaceStateCompressionFormatCalled = gmmClientContext->getMediaSurfaceStateCompressionFormatCalled + 1; EncodeSurfaceState::appendImageCompressionParams(&rss, image->getMultiGraphicsAllocation().getDefaultGraphicsAllocation(), context->getDevice(0)->getGmmHelper(), false, GMM_NO_PLANE); EXPECT_EQ(platform(), nullptr); EXPECT_EQ(mockCompressionFormat, rss.getCompressionFormat()); EXPECT_EQ(expectedGetSurfaceStateCompressionFormatCalled, gmmClientContext->getSurfaceStateCompressionFormatCalled); EXPECT_EQ(expectedGetMediaSurfaceStateCompressionFormatCalled, gmmClientContext->getMediaSurfaceStateCompressionFormatCalled); } HWTEST2_F(XeHPAndLaterImageHelperTests, givenMediaCompressedPlanarImageWhenAppendingSurfaceStateParamsForCompressionThenCorrectCompressionFormatIsSet, CompressionParamsSupportedMatcher) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE rss{}; platformsImpl->clear(); rss.setMemoryCompressionEnable(true); mockGmmResourceInfo->getResourceFlags()->Info.MediaCompressed = true; struct { uint8_t returnedCompressionFormat; uint8_t expectedCompressionFormat; GMM_YUV_PLANE_ENUM plane; } testInputs[] = { // regular image {0x0, 0x0, GMM_NO_PLANE}, {0xF, 0xF, GMM_NO_PLANE}, {0x10, 0x10, GMM_NO_PLANE}, {0x1F, 0x1F, GMM_NO_PLANE}, // luma plane {0x0, 0x0, GMM_PLANE_Y}, {0xF, 0xF, GMM_PLANE_Y}, {0x10, 0x0, GMM_PLANE_Y}, {0x1F, 0xF, GMM_PLANE_Y}, // chroma plane {0x0, 0x10, GMM_PLANE_U}, {0x0, 0x10, GMM_PLANE_V}, {0xF, 0x1F, GMM_PLANE_U}, {0xF, 0x1F, GMM_PLANE_V}, {0x10, 0x10, GMM_PLANE_U}, {0x10, 0x10, GMM_PLANE_V}, {0x1F, 0x1F, GMM_PLANE_U}, {0x1F, 0x1F, GMM_PLANE_V}, }; for (auto &testInput : testInputs) { gmmClientContext->compressionFormatToReturn = testInput.returnedCompressionFormat; EncodeSurfaceState::appendImageCompressionParams(&rss, image->getMultiGraphicsAllocation().getDefaultGraphicsAllocation(), context->getDevice(0)->getGmmHelper(), false, testInput.plane); EXPECT_EQ(testInput.expectedCompressionFormat, rss.getCompressionFormat()); } } HWTEST2_F(XeHPAndLaterImageHelperTests, givenNotMediaCompressedImageWhenAppendingSurfaceStateParamsForCompressionThenCallAppriopriateFunction, CompressionParamsSupportedMatcher) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE rss{}; platformsImpl->clear(); rss.setMemoryCompressionEnable(true); mockGmmResourceInfo->getResourceFlags()->Info.MediaCompressed = false; gmmClientContext->compressionFormatToReturn = mockCompressionFormat; const auto expectedGetSurfaceStateCompressionFormatCalled = gmmClientContext->getSurfaceStateCompressionFormatCalled + 1; const auto expectedGetMediaSurfaceStateCompressionFormatCalled = gmmClientContext->getMediaSurfaceStateCompressionFormatCalled; EncodeSurfaceState::appendImageCompressionParams(&rss, image->getMultiGraphicsAllocation().getDefaultGraphicsAllocation(), context->getDevice(0)->getGmmHelper(), false, GMM_NO_PLANE); EXPECT_EQ(platform(), nullptr); EXPECT_EQ(mockCompressionFormat, rss.getCompressionFormat()); EXPECT_EQ(expectedGetSurfaceStateCompressionFormatCalled, gmmClientContext->getSurfaceStateCompressionFormatCalled); EXPECT_EQ(expectedGetMediaSurfaceStateCompressionFormatCalled, gmmClientContext->getMediaSurfaceStateCompressionFormatCalled); } HWTEST2_F(XeHPAndLaterImageHelperTests, givenAuxModeMcsLceWhenAppendingSurfaceStateParamsForCompressionThenCallAppriopriateFunction, CompressionParamsSupportedMatcher) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; RENDER_SURFACE_STATE rss{}; platformsImpl->clear(); rss.setMemoryCompressionEnable(false); rss.setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_MCS_LCE); mockGmmResourceInfo->getResourceFlags()->Info.MediaCompressed = true; gmmClientContext->compressionFormatToReturn = mockCompressionFormat; const auto expectedGetSurfaceStateCompressionFormatCalled = gmmClientContext->getSurfaceStateCompressionFormatCalled; const auto expectedGetMediaSurfaceStateCompressionFormatCalled = gmmClientContext->getMediaSurfaceStateCompressionFormatCalled + 1; EncodeSurfaceState::appendImageCompressionParams(&rss, image->getMultiGraphicsAllocation().getDefaultGraphicsAllocation(), context->getDevice(0)->getGmmHelper(), false, GMM_NO_PLANE); EXPECT_EQ(platform(), nullptr); EXPECT_EQ(mockCompressionFormat, rss.getCompressionFormat()); EXPECT_EQ(expectedGetSurfaceStateCompressionFormatCalled, gmmClientContext->getSurfaceStateCompressionFormatCalled); EXPECT_EQ(expectedGetMediaSurfaceStateCompressionFormatCalled, gmmClientContext->getMediaSurfaceStateCompressionFormatCalled); } compute-runtime-22.14.22890/opencl/test/unit_test/mem_obj/image_tiled_tests.cpp000066400000000000000000000123411422164147700274310ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_helper.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/mocks/mock_gmm.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" using namespace NEO; static const auto dimension = 16; static auto channelType = CL_UNORM_INT8; static auto channelOrder = CL_RGBA; class CreateTiledImageTest : public ClDeviceFixture, public testing::TestWithParam, public CommandQueueHwFixture { typedef CommandQueueHwFixture CommandQueueFixture; public: CreateTiledImageTest() { } protected: void SetUp() override { ClDeviceFixture::SetUp(); CommandQueueFixture::SetUp(pClDevice, 0); type = GetParam(); // clang-format off imageFormat.image_channel_data_type = channelType; imageFormat.image_channel_order = channelOrder; imageDesc.image_type = type; imageDesc.image_width = dimension; imageDesc.image_height = dimension; imageDesc.image_depth = 1; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; // clang-format on } void TearDown() override { CommandQueueFixture::TearDown(); ClDeviceFixture::TearDown(); } cl_image_format imageFormat; cl_image_desc imageDesc; cl_int retVal = CL_SUCCESS; cl_mem_object_type type = 0; }; HWTEST_P(CreateTiledImageTest, GivenImageTypeWhenCheckingIsTiledThenTrueReturnedForTiledImage) { MockContext context; cl_mem_flags flags = CL_MEM_READ_WRITE; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, pClDevice->getHardwareInfo().capabilityTable.supportsOcl21Features); auto image = Image::create( &context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, pDevice), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal); ASSERT_NE(nullptr, image); EXPECT_EQ(UnitTestHelper::tiledImagesSupported, image->isTiledAllocation()); delete image; } TEST_P(CreateTiledImageTest, GivenSharedTiledImageWhenCheckingIsTiledThenTrueReturned) { MockContext context; MockGraphicsAllocation *alloc = new MockGraphicsAllocation(0, 0x1000); ImageInfo info = {}; McsSurfaceInfo msi = {}; ClSurfaceFormatInfo surfaceFormat; surfaceFormat.surfaceFormat.GMMSurfaceFormat = GMM_FORMAT_B8G8R8A8_UNORM; info.surfaceFormat = &surfaceFormat.surfaceFormat; info.imgDesc = Image::convertDescriptor(imageDesc); info.plane = GMM_NO_PLANE; auto gmm = MockGmm::queryImgParams(context.getDevice(0)->getGmmClientContext(), info, false); alloc->setDefaultGmm(gmm.release()); auto image = Image::createSharedImage( &context, nullptr, msi, GraphicsAllocationHelper::toMultiGraphicsAllocation(alloc), nullptr, CL_MEM_READ_WRITE, 0, &surfaceFormat, info, 0, 0, 0); ASSERT_NE(nullptr, image); EXPECT_TRUE(image->isTiledAllocation()); delete image; } typedef CreateTiledImageTest CreateNonTiledImageTest; TEST_P(CreateNonTiledImageTest, GivenSharedNonTiledImageWhenCheckingIsTiledThenFalseReturned) { MockContext context; MockGraphicsAllocation *alloc = new MockGraphicsAllocation(0, 0x1000); ImageInfo info = {}; McsSurfaceInfo msi = {}; ClSurfaceFormatInfo surfaceFormat; imageDesc.image_height = 1; surfaceFormat.surfaceFormat.GMMSurfaceFormat = GMM_FORMAT_B8G8R8A8_UNORM; info.surfaceFormat = &surfaceFormat.surfaceFormat; info.imgDesc = Image::convertDescriptor(imageDesc); info.plane = GMM_NO_PLANE; auto gmm = MockGmm::queryImgParams(context.getDevice(0)->getGmmClientContext(), info, false); alloc->setDefaultGmm(gmm.release()); auto image = Image::createSharedImage( &context, nullptr, msi, GraphicsAllocationHelper::toMultiGraphicsAllocation(alloc), nullptr, CL_MEM_READ_WRITE, 0, &surfaceFormat, info, 0, 0, 0); ASSERT_NE(nullptr, image); EXPECT_FALSE(image->isTiledAllocation()); delete image; } // Parameterized test that tests image creation with tiled types static uint32_t TiledImageTypes[] = { CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE2D_ARRAY, CL_MEM_OBJECT_IMAGE3D}; static uint32_t NonTiledImageTypes[] = { CL_MEM_OBJECT_IMAGE1D}; INSTANTIATE_TEST_CASE_P(CreateTiledImageTest, CreateTiledImageTest, testing::ValuesIn(TiledImageTypes)); INSTANTIATE_TEST_CASE_P(CreateNonTiledImageTest, CreateNonTiledImageTest, testing::ValuesIn(NonTiledImageTypes)); compute-runtime-22.14.22890/opencl/test/unit_test/mem_obj/image_transfer_tests.cpp000066400000000000000000000223571422164147700301640ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_device.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" using namespace NEO; class ImageHostPtrTransferTests : public testing::Test { public: void SetUp() override { device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); context.reset(new MockContext(device.get())); } template void createImageAndSetTestParams() { image.reset(ImageHelper>::create(context.get())); imgDesc = &image->getImageDesc(); hostPtrSlicePitch = image->getHostPtrSlicePitch(); hostPtrRowPitch = image->getHostPtrRowPitch(); imageSlicePitch = image->getImageDesc().image_slice_pitch; imageRowPitch = image->getImageDesc().image_row_pitch; pixelSize = image->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes; } void setExpectedData(uint8_t *dstPtr, size_t slicePitch, size_t rowPitch, std::array copyOrigin, std::array copyRegion) { if (image->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { // For 1DArray type, array region and origin are stored on 2nd position. For 2Darray its on 3rd position. std::swap(copyOrigin[1], copyOrigin[2]); std::swap(copyRegion[1], copyRegion[2]); } for (size_t slice = copyOrigin[2]; slice < (copyOrigin[2] + copyRegion[2]); slice++) { auto sliceOffset = ptrOffset(dstPtr, slicePitch * slice); for (size_t height = copyOrigin[1]; height < (copyOrigin[1] + copyRegion[1]); height++) { auto rowOffset = ptrOffset(sliceOffset, rowPitch * height); memset(ptrOffset(rowOffset, copyOrigin[0] * pixelSize), 123, copyRegion[0] * pixelSize); } } } std::unique_ptr device; std::unique_ptr context; std::unique_ptr image; const cl_image_desc *imgDesc = nullptr; size_t hostPtrSlicePitch, hostPtrRowPitch, imageSlicePitch, imageRowPitch, pixelSize; }; TEST_F(ImageHostPtrTransferTests, given3dImageWithoutTilingWhenTransferToHostPtrCalledThenCopyRequestedRegionAndOriginOnly) { DebugManagerStateRestore restorer; DebugManager.flags.ForceLinearImages.set(true); createImageAndSetTestParams(); EXPECT_NE(hostPtrSlicePitch, imageSlicePitch); EXPECT_NE(hostPtrRowPitch, imageRowPitch); EXPECT_NE(image->getCpuAddress(), image->getHostPtr()); std::array copyOrigin = {{imgDesc->image_width / 2, imgDesc->image_height / 2, imgDesc->image_depth / 2}}; std::array copyRegion = copyOrigin; std::unique_ptr expectedHostPtr(new uint8_t[hostPtrSlicePitch * imgDesc->image_depth]); memset(image->getHostPtr(), 0, hostPtrSlicePitch * imgDesc->image_depth); memset(expectedHostPtr.get(), 0, hostPtrSlicePitch * imgDesc->image_depth); memset(image->getCpuAddress(), 123, imageSlicePitch * imgDesc->image_depth); setExpectedData(expectedHostPtr.get(), hostPtrSlicePitch, hostPtrRowPitch, copyOrigin, copyRegion); image->transferDataToHostPtr(copyRegion, copyOrigin); EXPECT_TRUE(memcmp(image->getHostPtr(), expectedHostPtr.get(), hostPtrSlicePitch * imgDesc->image_depth) == 0); } TEST_F(ImageHostPtrTransferTests, given3dImageWithoutTilingWhenTransferFromHostPtrCalledThenCopyRequestedRegionAndOriginOnly) { DebugManagerStateRestore restorer; DebugManager.flags.ForceLinearImages.set(true); createImageAndSetTestParams(); EXPECT_NE(hostPtrSlicePitch, imageSlicePitch); EXPECT_NE(hostPtrRowPitch, imageRowPitch); EXPECT_NE(image->getCpuAddress(), image->getHostPtr()); std::array copyOrigin = {{imgDesc->image_width / 2, imgDesc->image_height / 2, imgDesc->image_depth / 2}}; std::array copyRegion = copyOrigin; std::unique_ptr expectedImageData(new uint8_t[imageSlicePitch * imgDesc->image_depth]); memset(image->getHostPtr(), 123, hostPtrSlicePitch * imgDesc->image_depth); memset(expectedImageData.get(), 0, imageSlicePitch * imgDesc->image_depth); memset(image->getCpuAddress(), 0, imageSlicePitch * imgDesc->image_depth); setExpectedData(expectedImageData.get(), imageSlicePitch, imageRowPitch, copyOrigin, copyRegion); image->transferDataFromHostPtr(copyRegion, copyOrigin); EXPECT_TRUE(memcmp(image->getCpuAddress(), expectedImageData.get(), imageSlicePitch * imgDesc->image_depth) == 0); } TEST_F(ImageHostPtrTransferTests, given2dArrayImageWithoutTilingWhenTransferToHostPtrCalledThenCopyRequestedRegionAndOriginOnly) { DebugManagerStateRestore restorer; DebugManager.flags.ForceLinearImages.set(true); createImageAndSetTestParams(); EXPECT_NE(hostPtrSlicePitch, imageSlicePitch); EXPECT_NE(hostPtrRowPitch, imageRowPitch); EXPECT_NE(image->getCpuAddress(), image->getHostPtr()); std::array copyOrigin = {{imgDesc->image_width / 2, imgDesc->image_height / 2, imgDesc->image_array_size / 2}}; std::array copyRegion = copyOrigin; std::unique_ptr expectedHostPtr(new uint8_t[hostPtrSlicePitch * imgDesc->image_array_size]); memset(image->getHostPtr(), 0, hostPtrSlicePitch * imgDesc->image_array_size); memset(expectedHostPtr.get(), 0, hostPtrSlicePitch * imgDesc->image_array_size); memset(image->getCpuAddress(), 123, imageSlicePitch * imgDesc->image_array_size); setExpectedData(expectedHostPtr.get(), hostPtrSlicePitch, hostPtrRowPitch, copyOrigin, copyRegion); image->transferDataToHostPtr(copyRegion, copyOrigin); EXPECT_TRUE(memcmp(image->getHostPtr(), expectedHostPtr.get(), hostPtrSlicePitch * imgDesc->image_array_size) == 0); } TEST_F(ImageHostPtrTransferTests, given2dArrayImageWithoutTilingWhenTransferFromHostPtrCalledThenCopyRequestedRegionAndOriginOnly) { DebugManagerStateRestore restorer; DebugManager.flags.ForceLinearImages.set(true); createImageAndSetTestParams(); EXPECT_NE(hostPtrSlicePitch, imageSlicePitch); EXPECT_NE(hostPtrRowPitch, imageRowPitch); EXPECT_NE(image->getCpuAddress(), image->getHostPtr()); std::array copyOrigin = {{imgDesc->image_width / 2, imgDesc->image_height / 2, imgDesc->image_array_size / 2}}; std::array copyRegion = copyOrigin; std::unique_ptr expectedImageData(new uint8_t[imageSlicePitch * imgDesc->image_array_size]); memset(image->getHostPtr(), 123, hostPtrSlicePitch * imgDesc->image_array_size); memset(expectedImageData.get(), 0, imageSlicePitch * imgDesc->image_array_size); memset(image->getCpuAddress(), 0, imageSlicePitch * imgDesc->image_array_size); setExpectedData(expectedImageData.get(), imageSlicePitch, imageRowPitch, copyOrigin, copyRegion); image->transferDataFromHostPtr(copyRegion, copyOrigin); EXPECT_TRUE(memcmp(image->getCpuAddress(), expectedImageData.get(), imageSlicePitch * imgDesc->image_array_size) == 0); } TEST_F(ImageHostPtrTransferTests, given1dArrayImageWhenTransferToHostPtrCalledThenUseSecondCoordinateAsSlice) { createImageAndSetTestParams(); std::array copyOrigin = {{imgDesc->image_width / 2, imgDesc->image_array_size / 2, 0}}; std::array copyRegion = {{imgDesc->image_width / 2, imgDesc->image_array_size / 2, 1}}; std::unique_ptr expectedHostPtr(new uint8_t[hostPtrSlicePitch * imgDesc->image_array_size]); memset(image->getHostPtr(), 0, hostPtrSlicePitch * imgDesc->image_array_size); memset(expectedHostPtr.get(), 0, hostPtrSlicePitch * imgDesc->image_array_size); memset(image->getCpuAddress(), 123, imageSlicePitch * imgDesc->image_array_size); setExpectedData(expectedHostPtr.get(), hostPtrSlicePitch, hostPtrRowPitch, copyOrigin, copyRegion); image->transferDataToHostPtr(copyRegion, copyOrigin); EXPECT_TRUE(memcmp(image->getHostPtr(), expectedHostPtr.get(), hostPtrSlicePitch * imgDesc->image_array_size) == 0); } TEST_F(ImageHostPtrTransferTests, given1dArrayImageWhenTransferFromHostPtrCalledThenUseSecondCoordinateAsSlice) { createImageAndSetTestParams(); std::array copyOrigin = {{imgDesc->image_width / 2, imgDesc->image_array_size / 2, 0}}; std::array copyRegion = {{imgDesc->image_width / 2, imgDesc->image_array_size / 2, 1}}; std::unique_ptr expectedImageData(new uint8_t[imageSlicePitch * imgDesc->image_array_size]); memset(image->getHostPtr(), 123, hostPtrSlicePitch * imgDesc->image_array_size); memset(expectedImageData.get(), 0, imageSlicePitch * imgDesc->image_array_size); memset(image->getCpuAddress(), 0, imageSlicePitch * imgDesc->image_array_size); setExpectedData(expectedImageData.get(), imageSlicePitch, imageRowPitch, copyOrigin, copyRegion); image->transferDataFromHostPtr(copyRegion, copyOrigin); EXPECT_TRUE(memcmp(image->getCpuAddress(), expectedImageData.get(), imageSlicePitch * imgDesc->image_array_size) == 0); } compute-runtime-22.14.22890/opencl/test/unit_test/mem_obj/image_validate_tests.cpp000066400000000000000000001167321422164147700301320ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/aligned_memory.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/helpers/convert_color.h" #include "opencl/source/helpers/surface_formats.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" #include "gtest/gtest.h" using namespace NEO; typedef decltype(&Image::redescribe) RedescribeMethod; class ImageValidateTest : public testing::TestWithParam { public: ImageValidateTest() { imageFormat = &surfaceFormat.OCLImageFormat; imageFormat->image_channel_data_type = CL_UNSIGNED_INT8; imageFormat->image_channel_order = CL_RGBA; } protected: void SetUp() override { } void TearDown() override { } cl_int retVal = CL_SUCCESS; MockContext context; ClSurfaceFormatInfo surfaceFormat; cl_image_format *imageFormat; cl_image_desc imageDesc; }; typedef ImageValidateTest ValidDescriptor; typedef ImageValidateTest InvalidDescriptor; typedef ImageValidateTest InvalidSize; TEST_P(ValidDescriptor, GivenValidSizeWhenValidatingThenSuccessIsReturned) { imageDesc = GetParam(); retVal = Image::validate(&context, {}, &surfaceFormat, &imageDesc, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_P(InvalidDescriptor, GivenZeroSizeWhenValidatingThenInvalidImageDescriptorErrorIsReturned) { imageDesc = GetParam(); retVal = Image::validate(&context, {}, &surfaceFormat, &imageDesc, nullptr); EXPECT_EQ(CL_INVALID_IMAGE_DESCRIPTOR, retVal); } TEST_P(InvalidSize, GivenInvalidSizeWhenValidatingThenInvalidImageSizeErrorIsReturned) { imageDesc = GetParam(); retVal = Image::validate(&context, {}, &surfaceFormat, &imageDesc, nullptr); EXPECT_EQ(CL_INVALID_IMAGE_SIZE, retVal); } TEST_P(ValidDescriptor, given3dImageFormatWhenGetSupportedFormatIsCalledThenDontReturnDepthFormats) { imageDesc = GetParam(); uint32_t readOnlyformatCount; uint32_t writeOnlyformatCount; uint32_t readWriteOnlyformatCount; context.getSupportedImageFormats(&context.getDevice(0)->getDevice(), CL_MEM_READ_ONLY, imageDesc.image_type, 0, nullptr, &readOnlyformatCount); context.getSupportedImageFormats(&context.getDevice(0)->getDevice(), CL_MEM_WRITE_ONLY, imageDesc.image_type, 0, nullptr, &writeOnlyformatCount); context.getSupportedImageFormats(&context.getDevice(0)->getDevice(), CL_MEM_READ_WRITE, imageDesc.image_type, 0, nullptr, &readWriteOnlyformatCount); auto readOnlyImgFormats = new cl_image_format[readOnlyformatCount]; auto writeOnlyImgFormats = new cl_image_format[writeOnlyformatCount]; auto readWriteOnlyImgFormats = new cl_image_format[readWriteOnlyformatCount]; context.getSupportedImageFormats(&context.getDevice(0)->getDevice(), CL_MEM_READ_ONLY, imageDesc.image_type, readOnlyformatCount, readOnlyImgFormats, 0); context.getSupportedImageFormats(&context.getDevice(0)->getDevice(), CL_MEM_WRITE_ONLY, imageDesc.image_type, writeOnlyformatCount, writeOnlyImgFormats, 0); context.getSupportedImageFormats(&context.getDevice(0)->getDevice(), CL_MEM_READ_WRITE, imageDesc.image_type, readWriteOnlyformatCount, readWriteOnlyImgFormats, 0); bool depthFound = false; for (uint32_t i = 0; i < readOnlyformatCount; i++) { if (readOnlyImgFormats[i].image_channel_order == CL_DEPTH || readOnlyImgFormats[i].image_channel_order == CL_DEPTH_STENCIL) depthFound = true; } for (uint32_t i = 0; i < readOnlyformatCount; i++) { if (readOnlyImgFormats[i].image_channel_order == CL_DEPTH || readOnlyImgFormats[i].image_channel_order == CL_DEPTH_STENCIL) depthFound = true; } for (uint32_t i = 0; i < readOnlyformatCount; i++) { if (readOnlyImgFormats[i].image_channel_order == CL_DEPTH || readOnlyImgFormats[i].image_channel_order == CL_DEPTH_STENCIL) depthFound = true; } if (!Image::isImage2dOr2dArray(imageDesc.image_type)) { EXPECT_FALSE(depthFound); } else { EXPECT_TRUE(depthFound); } delete[] readOnlyImgFormats; delete[] writeOnlyImgFormats; delete[] readWriteOnlyImgFormats; } TEST(ImageDepthFormatTest, GivenDepthFormatsWhenGettingSurfaceFormatThenCorrectSurfaceFormatIsReturned) { cl_image_format imgFormat = {}; imgFormat.image_channel_order = CL_DEPTH; imgFormat.image_channel_data_type = CL_FLOAT; auto surfaceFormatInfo = Image::getSurfaceFormatFromTable(CL_MEM_READ_WRITE, &imgFormat, defaultHwInfo->capabilityTable.supportsOcl21Features); ASSERT_NE(surfaceFormatInfo, nullptr); EXPECT_TRUE(surfaceFormatInfo->surfaceFormat.GMMSurfaceFormat == GMM_FORMAT_R32_FLOAT_TYPE); imgFormat.image_channel_data_type = CL_UNORM_INT16; surfaceFormatInfo = Image::getSurfaceFormatFromTable(CL_MEM_READ_WRITE, &imgFormat, defaultHwInfo->capabilityTable.supportsOcl21Features); ASSERT_NE(surfaceFormatInfo, nullptr); EXPECT_TRUE(surfaceFormatInfo->surfaceFormat.GMMSurfaceFormat == GMM_FORMAT_R16_UNORM_TYPE); } TEST(ImageDepthFormatTest, GivenWriteOnlyDepthFormatsWhenGettingSurfaceFormatThenCorrectSurfaceFormatIsReturned) { cl_image_format imgFormat = {}; imgFormat.image_channel_order = CL_DEPTH; imgFormat.image_channel_data_type = CL_FLOAT; auto surfaceFormatInfo = Image::getSurfaceFormatFromTable(CL_MEM_WRITE_ONLY, &imgFormat, defaultHwInfo->capabilityTable.supportsOcl21Features); ASSERT_NE(surfaceFormatInfo, nullptr); EXPECT_TRUE(surfaceFormatInfo->surfaceFormat.GMMSurfaceFormat == GMM_FORMAT_R32_FLOAT_TYPE); imgFormat.image_channel_data_type = CL_UNORM_INT16; surfaceFormatInfo = Image::getSurfaceFormatFromTable(CL_MEM_WRITE_ONLY, &imgFormat, defaultHwInfo->capabilityTable.supportsOcl21Features); ASSERT_NE(surfaceFormatInfo, nullptr); EXPECT_TRUE(surfaceFormatInfo->surfaceFormat.GMMSurfaceFormat == GMM_FORMAT_R16_UNORM_TYPE); } TEST(ImageDepthFormatTest, GivenDepthStencilFormatsWhenGettingSurfaceFormatThenCorrectSurfaceFormatIsReturned) { cl_image_format imgFormat = {}; imgFormat.image_channel_order = CL_DEPTH_STENCIL; imgFormat.image_channel_data_type = CL_UNORM_INT24; auto surfaceFormatInfo = Image::getSurfaceFormatFromTable(CL_MEM_READ_ONLY, &imgFormat, defaultHwInfo->capabilityTable.supportsOcl21Features); ASSERT_NE(surfaceFormatInfo, nullptr); EXPECT_TRUE(surfaceFormatInfo->surfaceFormat.GMMSurfaceFormat == GMM_FORMAT_GENERIC_32BIT); imgFormat.image_channel_order = CL_DEPTH_STENCIL; imgFormat.image_channel_data_type = CL_FLOAT; surfaceFormatInfo = Image::getSurfaceFormatFromTable(CL_MEM_READ_ONLY, &imgFormat, defaultHwInfo->capabilityTable.supportsOcl21Features); ASSERT_NE(surfaceFormatInfo, nullptr); EXPECT_TRUE(surfaceFormatInfo->surfaceFormat.GMMSurfaceFormat == GMM_FORMAT_R32G32_FLOAT_TYPE); } static cl_image_desc validImageDesc[] = { {CL_MEM_OBJECT_IMAGE1D, /*image_type*/ 16384, /*image_width*/ 1, /*image_height*/ 1, /*image_depth*/ 0, /*image_array_size*/ 0, /*image_row_pitch*/ 0, /*image_slice_pitch*/ 0, /*num_mip_levels*/ 0, /*num_samples*/ {0}}, /*mem_object */ {CL_MEM_OBJECT_IMAGE1D_ARRAY, /*image_type*/ 16384, /*image_width*/ 1, /*image_height*/ 1, /*image_depth*/ 2, /*image_array_size*/ 0, /*image_row_pitch*/ 0, /*image_slice_pitch*/ 0, /*num_mip_levels*/ 0, /*num_samples*/ {0}}, /*mem_object */ {CL_MEM_OBJECT_IMAGE2D, /*image_type*/ 512, /*image_width*/ 512, /*image_height*/ 1, /*image_depth*/ 0, /*image_array_size*/ 0, /*image_row_pitch*/ 0, /*image_slice_pitch*/ 0, /*num_mip_levels*/ 0, /*num_samples*/ {0}}, /*mem_object */ {CL_MEM_OBJECT_IMAGE2D, /*image_type*/ 16384, /*image_width*/ 16384, /*image_height*/ 1, /*image_depth*/ 0, /*image_array_size*/ 0, /*image_row_pitch*/ 0, /*image_slice_pitch*/ 0, /*num_mip_levels*/ 0, /*num_samples*/ {0}}, /*mem_object */ {CL_MEM_OBJECT_IMAGE2D_ARRAY, /*image_type*/ 16384, /*image_width*/ 16384, /*image_height*/ 1, /*image_depth*/ 1, /*image_array_size*/ 0, /*image_row_pitch*/ 0, /*image_slice_pitch*/ 0, /*num_mip_levels*/ 0, /*num_samples*/ {0}}, /*mem_object */ {CL_MEM_OBJECT_IMAGE2D_ARRAY, /*image_type*/ 16384, /*image_width*/ 16384, /*image_height*/ 1, /*image_depth*/ 2, /*image_array_size*/ 0, /*image_row_pitch*/ 0, /*image_slice_pitch*/ 0, /*num_mip_levels*/ 0, /*num_samples*/ {0}}, /*mem_object */ {CL_MEM_OBJECT_IMAGE3D, /*image_type*/ 16384, /*image_width*/ 16384, /*image_height*/ 3, /*image_depth*/ 0, /*image_array_size*/ 0, /*image_row_pitch*/ 0, /*image_slice_pitch*/ 0, /*num_mip_levels*/ 0, /*num_samples*/ {0}}, /*mem_object */ {CL_MEM_OBJECT_IMAGE3D, /*image_type*/ 2, /*image_width*/ 2, /*image_height*/ 2, /*image_depth*/ 0, /*image_array_size*/ 0, /*image_row_pitch*/ 0, /*image_slice_pitch*/ 0, /*num_mip_levels*/ 0, /*num_samples*/ {0}}, /*mem_object */ }; static cl_image_desc invalidImageDesc[] = { {CL_MEM_OBJECT_IMAGE2D, /*image_type*/ 0, /*image_width*/ 512, /*image_height*/ 1, /*image_depth*/ 0, /*image_array_size*/ 0, /*image_row_pitch*/ 0, /*image_slice_pitch*/ 0, /*num_mip_levels*/ 0, /*num_samples*/ {0}}, /*mem_object */ {CL_MEM_OBJECT_IMAGE2D, /*image_type*/ 512, /*image_width*/ 0, /*image_height*/ 1, /*image_depth*/ 0, /*image_array_size*/ 0, /*image_row_pitch*/ 0, /*image_slice_pitch*/ 0, /*num_mip_levels*/ 0, /*num_samples*/ {0}}, /*mem_object */ }; static cl_image_desc invalidImageSize[] = { {CL_MEM_OBJECT_IMAGE2D, /*image_type*/ 16384 + 10, /*image_width*/ 512, /*image_height*/ 1, /*image_depth*/ 0, /*image_array_size*/ 0, /*image_row_pitch*/ 0, /*image_slice_pitch*/ 0, /*num_mip_levels*/ 0, /*num_samples*/ {0}}, /*mem_object */ {CL_MEM_OBJECT_IMAGE2D, /*image_type*/ 1, /*image_width*/ 16384 + 10, /*image_height*/ 1, /*image_depth*/ 0, /*image_array_size*/ 0, /*image_row_pitch*/ 0, /*image_slice_pitch*/ 0, /*num_mip_levels*/ 0, /*num_samples*/ {0}}, /*mem_object */ }; INSTANTIATE_TEST_CASE_P( ImageValidate, ValidDescriptor, ::testing::ValuesIn(validImageDesc)); INSTANTIATE_TEST_CASE_P( ImageValidate, InvalidDescriptor, ::testing::ValuesIn(invalidImageDesc)); INSTANTIATE_TEST_CASE_P( ImageValidate, InvalidSize, ::testing::ValuesIn(invalidImageSize)); class ValidImageFormatTest : public ::testing::TestWithParam> { public: void validateFormat() { cl_image_format imageFormat; cl_int retVal; std::tie(imageFormat.image_channel_order, imageFormat.image_channel_data_type) = GetParam(); retVal = Image::validateImageFormat(&imageFormat); EXPECT_EQ(CL_SUCCESS, retVal); } }; class InvalidImageFormatTest : public ::testing::TestWithParam> { public: void validateFormat() { cl_image_format imageFormat; cl_int retVal; std::tie(imageFormat.image_channel_order, imageFormat.image_channel_data_type) = GetParam(); retVal = Image::validateImageFormat(&imageFormat); EXPECT_EQ(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, retVal); } }; typedef ValidImageFormatTest ValidSingleChannelFormat; typedef InvalidImageFormatTest InvalidSingleChannelFormat; cl_channel_order validSingleChannelOrder[] = {CL_R, CL_A, CL_Rx}; cl_channel_type validSingleChannelDataTypes[] = {CL_SNORM_INT8, CL_SNORM_INT16, CL_UNORM_INT8, CL_UNORM_INT16, CL_SIGNED_INT8, CL_SIGNED_INT16, CL_SIGNED_INT32, CL_UNSIGNED_INT8, CL_UNSIGNED_INT16, CL_UNSIGNED_INT32, CL_HALF_FLOAT, CL_FLOAT}; cl_channel_type invalidSingleChannelDataTypes[] = {CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, CL_UNORM_INT_101010, CL_UNORM_INT24, CL_UNORM_INT_101010_2}; TEST_P(ValidSingleChannelFormat, givenValidSingleChannelImageFormatWhenValidateImageFormatIsCalledThenReturnsSuccess) { validateFormat(); }; TEST_P(InvalidSingleChannelFormat, givenInvalidSingleChannelChannelDataTypeWhenValidateImageFormatIsCalledThenReturnsError) { validateFormat(); }; INSTANTIATE_TEST_CASE_P( ImageValidate, ValidSingleChannelFormat, ::testing::Combine( ::testing::ValuesIn(validSingleChannelOrder), ::testing::ValuesIn(validSingleChannelDataTypes))); INSTANTIATE_TEST_CASE_P( ImageValidate, InvalidSingleChannelFormat, ::testing::Combine( ::testing::ValuesIn(validSingleChannelOrder), ::testing::ValuesIn(invalidSingleChannelDataTypes))); typedef ValidImageFormatTest ValidIntensityFormat; typedef InvalidImageFormatTest InvalidIntensityFormat; cl_channel_order validIntensityChannelOrders[] = {CL_INTENSITY}; cl_channel_type validIntensityChannelDataTypes[] = {CL_UNORM_INT8, CL_UNORM_INT16, CL_SNORM_INT8, CL_SNORM_INT16, CL_HALF_FLOAT, CL_FLOAT}; cl_channel_type invalidIntensityChannelDataTypes[] = {CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, CL_UNORM_INT_101010, CL_SIGNED_INT8, CL_SIGNED_INT16, CL_SIGNED_INT32, CL_UNSIGNED_INT8, CL_UNSIGNED_INT16, CL_UNSIGNED_INT32, CL_UNORM_INT24, CL_UNORM_INT_101010_2}; TEST_P(ValidIntensityFormat, givenValidIntensityImageFormatWhenValidateImageFormatIsCalledThenReturnsSuccess) { validateFormat(); }; TEST_P(InvalidIntensityFormat, givenInvalidIntensityChannelDataTypeWhenValidateImageFormatIsCalledThenReturnsError) { validateFormat(); }; INSTANTIATE_TEST_CASE_P( ImageValidate, ValidIntensityFormat, ::testing::Combine( ::testing::ValuesIn(validIntensityChannelOrders), ::testing::ValuesIn(validIntensityChannelDataTypes))); INSTANTIATE_TEST_CASE_P( ImageValidate, InvalidIntensityFormat, ::testing::Combine( ::testing::ValuesIn(validIntensityChannelOrders), ::testing::ValuesIn(invalidIntensityChannelDataTypes))); typedef ValidImageFormatTest ValidLuminanceFormat; typedef InvalidImageFormatTest InvalidLuminanceFormat; cl_channel_order validLuminanceChannelOrders[] = {CL_LUMINANCE}; cl_channel_type validLuminanceChannelDataTypes[] = {CL_UNORM_INT8, CL_UNORM_INT16, CL_SNORM_INT8, CL_SNORM_INT16, CL_HALF_FLOAT, CL_FLOAT}; cl_channel_type invalidLuminanceChannelDataTypes[] = {CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, CL_UNORM_INT_101010, CL_SIGNED_INT8, CL_SIGNED_INT16, CL_SIGNED_INT32, CL_UNSIGNED_INT8, CL_UNSIGNED_INT16, CL_UNSIGNED_INT32, CL_UNORM_INT24, CL_UNORM_INT_101010_2}; TEST_P(ValidLuminanceFormat, givenValidLuminanceImageFormatWhenValidateImageFormatIsCalledThenReturnsSuccess) { validateFormat(); }; TEST_P(InvalidLuminanceFormat, givenInvalidLuminanceChannelDataTypeWhenValidateImageFormatIsCalledThenReturnsError) { validateFormat(); }; INSTANTIATE_TEST_CASE_P( ImageValidate, ValidLuminanceFormat, ::testing::Combine( ::testing::ValuesIn(validLuminanceChannelOrders), ::testing::ValuesIn(validLuminanceChannelDataTypes))); INSTANTIATE_TEST_CASE_P( ImageValidate, InvalidLuminanceFormat, ::testing::Combine( ::testing::ValuesIn(validLuminanceChannelOrders), ::testing::ValuesIn(invalidLuminanceChannelDataTypes))); typedef ValidImageFormatTest ValidDepthFormat; typedef InvalidImageFormatTest InvalidDepthFormat; cl_channel_order validDepthChannelOrders[] = {CL_DEPTH}; cl_channel_type validDepthChannelDataTypes[] = {CL_UNORM_INT16, CL_FLOAT}; cl_channel_type invalidDepthChannelDataTypes[] = {CL_SNORM_INT8, CL_SNORM_INT16, CL_UNORM_INT8, CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, CL_UNORM_INT_101010, CL_SIGNED_INT8, CL_SIGNED_INT16, CL_SIGNED_INT32, CL_UNSIGNED_INT8, CL_UNSIGNED_INT16, CL_UNSIGNED_INT32, CL_HALF_FLOAT, CL_UNORM_INT24, CL_UNORM_INT_101010_2}; TEST_P(ValidDepthFormat, givenValidDepthImageFormatWhenValidateImageFormatIsCalledThenReturnsSuccess) { validateFormat(); }; TEST_P(InvalidDepthFormat, givenInvalidDepthChannelDataTypeWhenValidateImageFormatIsCalledThenReturnsError) { validateFormat(); }; INSTANTIATE_TEST_CASE_P( ImageValidate, ValidDepthFormat, ::testing::Combine( ::testing::ValuesIn(validDepthChannelOrders), ::testing::ValuesIn(validDepthChannelDataTypes))); INSTANTIATE_TEST_CASE_P( ImageValidate, InvalidDepthFormat, ::testing::Combine( ::testing::ValuesIn(validDepthChannelOrders), ::testing::ValuesIn(invalidDepthChannelDataTypes))); typedef ValidImageFormatTest ValidDoubleChannelFormat; typedef InvalidImageFormatTest InvalidDoubleChannelFormat; cl_channel_order validDoubleChannelOrders[] = {CL_RG, CL_RGx, CL_RA}; cl_channel_type validDoubleChannelDataTypes[] = {CL_SNORM_INT8, CL_SNORM_INT16, CL_UNORM_INT8, CL_UNORM_INT16, CL_SIGNED_INT8, CL_SIGNED_INT16, CL_SIGNED_INT32, CL_UNSIGNED_INT8, CL_UNSIGNED_INT16, CL_UNSIGNED_INT32, CL_HALF_FLOAT, CL_FLOAT}; cl_channel_type invalidDoubleChannelDataTypes[] = {CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, CL_UNORM_INT_101010, CL_UNORM_INT24, CL_UNORM_INT_101010_2}; TEST_P(ValidDoubleChannelFormat, givenValidDoubleChannelImageFormatWhenValidateImageFormatIsCalledThenReturnsSuccess) { validateFormat(); }; TEST_P(InvalidDoubleChannelFormat, givenInvalidDoubleChannelDataTypeWhenValidateImageFormatIsCalledThenReturnsError) { validateFormat(); }; INSTANTIATE_TEST_CASE_P( ImageValidate, ValidDoubleChannelFormat, ::testing::Combine( ::testing::ValuesIn(validDoubleChannelOrders), ::testing::ValuesIn(validDoubleChannelDataTypes))); INSTANTIATE_TEST_CASE_P( ImageValidate, InvalidDoubleChannelFormat, ::testing::Combine( ::testing::ValuesIn(validDoubleChannelOrders), ::testing::ValuesIn(invalidDoubleChannelDataTypes))); typedef ValidImageFormatTest ValidTripleChannelFormat; typedef InvalidImageFormatTest InvalidTripleChannelFormat; cl_channel_order validTripleChannelOrders[] = {CL_RGB, CL_RGBx}; cl_channel_type validTripleChannelDataTypes[] = {CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, CL_UNORM_INT_101010}; cl_channel_type invalidTripleChannelDataTypes[] = {CL_SNORM_INT8, CL_SNORM_INT16, CL_UNORM_INT8, CL_UNORM_INT16, CL_SIGNED_INT8, CL_SIGNED_INT16, CL_SIGNED_INT32, CL_UNSIGNED_INT8, CL_UNSIGNED_INT16, CL_UNSIGNED_INT32, CL_HALF_FLOAT, CL_FLOAT, CL_UNORM_INT24, CL_UNORM_INT_101010_2}; TEST_P(ValidTripleChannelFormat, givenValidTripleChannelImageFormatWhenValidateImageFormatIsCalledThenReturnsSuccess) { validateFormat(); }; TEST_P(InvalidTripleChannelFormat, givenInvalidTripleChannelDataTypeWhenValidateImageFormatIsCalledThenReturnsError) { validateFormat(); }; INSTANTIATE_TEST_CASE_P( ImageValidate, ValidTripleChannelFormat, ::testing::Combine( ::testing::ValuesIn(validTripleChannelOrders), ::testing::ValuesIn(validTripleChannelDataTypes))); INSTANTIATE_TEST_CASE_P( ImageValidate, InvalidTripleChannelFormat, ::testing::Combine( ::testing::ValuesIn(validTripleChannelOrders), ::testing::ValuesIn(invalidTripleChannelDataTypes))); typedef ValidImageFormatTest ValidRGBAChannelFormat; typedef InvalidImageFormatTest InvalidRGBAChannelFormat; cl_channel_order validRGBAChannelOrders[] = {CL_RGBA}; cl_channel_type validRGBAChannelDataTypes[] = {CL_SNORM_INT8, CL_SNORM_INT16, CL_UNORM_INT8, CL_UNORM_INT16, CL_SIGNED_INT8, CL_SIGNED_INT16, CL_SIGNED_INT32, CL_UNSIGNED_INT8, CL_UNSIGNED_INT16, CL_UNSIGNED_INT32, CL_HALF_FLOAT, CL_FLOAT}; cl_channel_type invalidRGBAChannelDataTypes[] = {CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, CL_UNORM_INT_101010, CL_UNORM_INT24, CL_UNORM_INT_101010_2}; TEST_P(ValidRGBAChannelFormat, givenValidRGBAChannelImageFormatWhenValidateImageFormatIsCalledThenReturnsSuccess) { validateFormat(); }; TEST_P(InvalidRGBAChannelFormat, givenInvalidRGBAChannelDataTypeWhenValidateImageFormatIsCalledThenReturnsError) { validateFormat(); }; INSTANTIATE_TEST_CASE_P( ImageValidate, ValidRGBAChannelFormat, ::testing::Combine( ::testing::ValuesIn(validRGBAChannelOrders), ::testing::ValuesIn(validRGBAChannelDataTypes))); INSTANTIATE_TEST_CASE_P( ImageValidate, InvalidRGBAChannelFormat, ::testing::Combine( ::testing::ValuesIn(validRGBAChannelOrders), ::testing::ValuesIn(invalidRGBAChannelDataTypes))); typedef ValidImageFormatTest ValidSRGBChannelFormat; typedef InvalidImageFormatTest InvalidSRGBChannelFormat; cl_channel_order validSRGBChannelOrders[] = {CL_sRGB, CL_sRGBx, CL_sRGBA, CL_sBGRA}; cl_channel_type validSRGBChannelDataTypes[] = {CL_UNORM_INT8}; cl_channel_type invalidSRGBChannelDataTypes[] = {CL_SNORM_INT8, CL_SNORM_INT16, CL_UNORM_INT16, CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, CL_UNORM_INT_101010, CL_SIGNED_INT8, CL_SIGNED_INT16, CL_SIGNED_INT32, CL_UNSIGNED_INT8, CL_UNSIGNED_INT16, CL_UNSIGNED_INT32, CL_HALF_FLOAT, CL_FLOAT, CL_UNORM_INT24, CL_UNORM_INT_101010_2}; TEST_P(ValidSRGBChannelFormat, givenValidSRGBChannelImageFormatWhenValidateImageFormatIsCalledThenReturnsSuccess) { validateFormat(); }; TEST_P(InvalidSRGBChannelFormat, givenInvalidSRGBChannelDataTypeWhenValidateImageFormatIsCalledThenReturnsError) { validateFormat(); }; INSTANTIATE_TEST_CASE_P( ImageValidate, ValidSRGBChannelFormat, ::testing::Combine( ::testing::ValuesIn(validSRGBChannelOrders), ::testing::ValuesIn(validSRGBChannelDataTypes))); INSTANTIATE_TEST_CASE_P( ImageValidate, InvalidSRGBChannelFormat, ::testing::Combine( ::testing::ValuesIn(validSRGBChannelOrders), ::testing::ValuesIn(invalidSRGBChannelDataTypes))); typedef ValidImageFormatTest ValidARGBChannelFormat; typedef InvalidImageFormatTest InvalidARGBChannelFormat; cl_channel_order validARGBChannelOrders[] = {CL_ARGB, CL_BGRA, CL_ABGR}; cl_channel_type validARGBChannelDataTypes[] = {CL_UNORM_INT8, CL_SNORM_INT8, CL_SIGNED_INT8, CL_UNSIGNED_INT8}; cl_channel_type invalidARGBChannelDataTypes[] = {CL_SNORM_INT16, CL_UNORM_INT16, CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, CL_UNORM_INT_101010, CL_SIGNED_INT16, CL_SIGNED_INT32, CL_UNSIGNED_INT16, CL_UNSIGNED_INT32, CL_HALF_FLOAT, CL_FLOAT, CL_UNORM_INT24, CL_UNORM_INT_101010_2}; TEST_P(ValidARGBChannelFormat, givenValidARGBChannelImageFormatWhenValidateImageFormatIsCalledThenReturnsSuccess) { validateFormat(); }; TEST_P(InvalidARGBChannelFormat, givenInvalidARGBChannelDataTypeWhenValidateImageFormatIsCalledThenReturnsError) { validateFormat(); }; INSTANTIATE_TEST_CASE_P( ImageValidate, ValidARGBChannelFormat, ::testing::Combine( ::testing::ValuesIn(validARGBChannelOrders), ::testing::ValuesIn(validARGBChannelDataTypes))); INSTANTIATE_TEST_CASE_P( ImageValidate, InvalidARGBChannelFormat, ::testing::Combine( ::testing::ValuesIn(validARGBChannelOrders), ::testing::ValuesIn(invalidARGBChannelDataTypes))); typedef ValidImageFormatTest ValidDepthStencilChannelFormat; typedef InvalidImageFormatTest InvalidDepthStencilChannelFormat; cl_channel_order validDepthStencilChannelOrders[] = {CL_DEPTH_STENCIL}; cl_channel_type validDepthStencilChannelDataTypes[] = {CL_UNORM_INT24, CL_FLOAT}; cl_channel_type invalidDepthStencilChannelDataTypes[] = {CL_SNORM_INT8, CL_SNORM_INT16, CL_UNORM_INT8, CL_UNORM_INT16, CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, CL_UNORM_INT_101010, CL_SIGNED_INT8, CL_SIGNED_INT16, CL_SIGNED_INT32, CL_UNSIGNED_INT8, CL_UNSIGNED_INT16, CL_UNSIGNED_INT32, CL_HALF_FLOAT, CL_UNORM_INT_101010_2}; TEST_P(ValidDepthStencilChannelFormat, givenValidDepthStencilChannelImageFormatWhenValidateImageFormatIsCalledThenReturnsSuccess) { validateFormat(); }; TEST_P(InvalidDepthStencilChannelFormat, givenInvalidDepthStencilChannelDataTypeWhenValidateImageFormatIsCalledThenReturnsError) { validateFormat(); }; INSTANTIATE_TEST_CASE_P( ImageValidate, ValidDepthStencilChannelFormat, ::testing::Combine( ::testing::ValuesIn(validDepthStencilChannelOrders), ::testing::ValuesIn(validDepthStencilChannelDataTypes))); INSTANTIATE_TEST_CASE_P( ImageValidate, InvalidDepthStencilChannelFormat, ::testing::Combine( ::testing::ValuesIn(validDepthStencilChannelOrders), ::testing::ValuesIn(invalidDepthStencilChannelDataTypes))); typedef ValidImageFormatTest ValidYUVImageFormat; typedef InvalidImageFormatTest InvalidYUVImageFormat; cl_channel_order validYUVChannelOrders[] = {CL_NV12_INTEL, CL_YUYV_INTEL, CL_UYVY_INTEL, CL_YVYU_INTEL, CL_VYUY_INTEL}; cl_channel_type validYUVChannelDataTypes[] = {CL_UNORM_INT8}; cl_channel_type invalidYUVChannelDataTypes[] = {CL_SNORM_INT8, CL_SNORM_INT16, CL_UNORM_INT16, CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, CL_UNORM_INT_101010, CL_SIGNED_INT8, CL_SIGNED_INT16, CL_SIGNED_INT32, CL_UNSIGNED_INT8, CL_UNSIGNED_INT16, CL_UNSIGNED_INT32, CL_HALF_FLOAT, CL_FLOAT, CL_UNORM_INT24, CL_UNORM_INT_101010_2}; TEST_P(ValidYUVImageFormat, givenValidYUVImageFormatWhenValidateImageFormatIsCalledThenReturnsSuccess) { validateFormat(); }; TEST_P(InvalidYUVImageFormat, givenInvalidYUVChannelDataTypeWhenValidateImageFormatIsCalledThenReturnsError) { validateFormat(); }; INSTANTIATE_TEST_CASE_P( ImageValidate, ValidYUVImageFormat, ::testing::Combine( ::testing::ValuesIn(validYUVChannelOrders), ::testing::ValuesIn(validYUVChannelDataTypes))); INSTANTIATE_TEST_CASE_P( ImageValidate, InvalidYUVImageFormat, ::testing::Combine( ::testing::ValuesIn(validYUVChannelOrders), ::testing::ValuesIn(invalidYUVChannelDataTypes))); TEST(ImageFormat, givenNullptrImageFormatWhenValidateImageFormatIsCalledThenReturnsError) { auto retVal = Image::validateImageFormat(nullptr); EXPECT_EQ(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, retVal); } TEST(validateAndCreateImage, givenInvalidImageFormatWhenValidateAndCreateImageIsCalledThenReturnsInvalidDescriptorError) { MockContext context; cl_image_format imageFormat; cl_int retVal = CL_SUCCESS; cl_mem image; imageFormat.image_channel_order = 0; imageFormat.image_channel_data_type = 0; image = Image::validateAndCreateImage(&context, nullptr, 0, 0, &imageFormat, &Image1dDefaults::imageDesc, nullptr, retVal); EXPECT_EQ(nullptr, image); EXPECT_EQ(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, retVal); } TEST(validateAndCreateImage, givenNotSupportedImageFormatWhenValidateAndCreateImageIsCalledThenReturnsNotSupportedFormatError) { MockContext context; cl_image_format imageFormat = {CL_INTENSITY, CL_UNORM_INT8}; cl_int retVal = CL_SUCCESS; cl_mem image; cl_mem_flags flags = CL_MEM_READ_WRITE; image = Image::validateAndCreateImage(&context, nullptr, flags, 0, &imageFormat, &Image1dDefaults::imageDesc, nullptr, retVal); EXPECT_EQ(nullptr, image); EXPECT_EQ(CL_IMAGE_FORMAT_NOT_SUPPORTED, retVal); } TEST(validateAndCreateImage, givenValidImageParamsWhenValidateAndCreateImageIsCalledThenReturnsSuccess) { MockContext context; cl_image_desc imageDesc; cl_mem_flags flags = CL_MEM_READ_ONLY; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; // 1D image with 0 row_pitch and 0 slice_pitch imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_width = 10; imageDesc.image_height = 0; imageDesc.image_depth = 0; imageDesc.image_array_size = 0; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; cl_image_format imageFormat = {CL_INTENSITY, CL_UNORM_INT8}; cl_int retVal = CL_SUCCESS; std::unique_ptr image = nullptr; image.reset(static_cast(Image::validateAndCreateImage( &context, nullptr, flags, 0, &imageFormat, &imageDesc, nullptr, retVal))); EXPECT_NE(nullptr, image); EXPECT_EQ(CL_SUCCESS, retVal); } std::tuple normalizingFactorValues[] = { std::make_tuple(CL_SNORM_INT8, 0x7F), std::make_tuple(CL_SNORM_INT16, 0x7fFF), std::make_tuple(CL_UNORM_INT8, 0xFF), std::make_tuple(CL_UNORM_INT16, 0xFFFF), std::make_tuple(CL_UNORM_SHORT_565, 0), std::make_tuple(CL_UNORM_SHORT_555, 0), std::make_tuple(CL_UNORM_INT_101010, 0), std::make_tuple(CL_SIGNED_INT8, 0), std::make_tuple(CL_SIGNED_INT16, 0), std::make_tuple(CL_SIGNED_INT32, 0), std::make_tuple(CL_UNSIGNED_INT8, 0), std::make_tuple(CL_UNSIGNED_INT16, 0), std::make_tuple(CL_UNSIGNED_INT32, 0), std::make_tuple(CL_HALF_FLOAT, 0), std::make_tuple(CL_FLOAT, 0), std::make_tuple(CL_UNORM_INT24, 0), std::make_tuple(CL_UNORM_INT_101010_2, 0), }; using NormalizingFactorTests = ::testing::TestWithParam>; TEST_P(NormalizingFactorTests, givenChannelTypeWhenAskingForFactorThenReturnValidValue) { auto factor = selectNormalizingFactor(std::get<0>(GetParam())); EXPECT_EQ(std::get<1>(GetParam()), factor); }; INSTANTIATE_TEST_CASE_P( NormalizingFactorTests, NormalizingFactorTests, ::testing::ValuesIn(normalizingFactorValues)); using ValidParentImageFormatTest = ::testing::TestWithParam>; cl_channel_order allChannelOrders[] = {CL_R, CL_A, CL_RG, CL_RA, CL_RGB, CL_RGBA, CL_BGRA, CL_ARGB, CL_INTENSITY, CL_LUMINANCE, CL_Rx, CL_RGx, CL_RGBx, CL_DEPTH, CL_DEPTH_STENCIL, CL_sRGB, CL_sRGBx, CL_sRGBA, CL_sBGRA, CL_ABGR, CL_NV12_INTEL}; struct NullImage : public Image { using Image::imageDesc; using Image::imageFormat; NullImage() : Image(nullptr, MemoryProperties(), cl_mem_flags{}, 0, 0, nullptr, nullptr, cl_image_format{}, cl_image_desc{}, false, GraphicsAllocationHelper::toMultiGraphicsAllocation(new MockGraphicsAllocation(nullptr, 0)), false, 0, 0, ClSurfaceFormatInfo{}, nullptr) { } ~NullImage() override { delete this->multiGraphicsAllocation.getGraphicsAllocation(0); } void setImageArg(void *memory, bool isMediaBlockImage, uint32_t mipLevel, uint32_t rootDeviceIndex, bool useGlobalAtomics) override {} void setMediaImageArg(void *memory, uint32_t rootDeviceIndex) override {} void setMediaSurfaceRotation(void *memory) override {} void setSurfaceMemoryObjectControlState(void *memory, uint32_t value) override {} void transformImage2dArrayTo3d(void *memory) override {} void transformImage3dTo2dArray(void *memory) override {} }; TEST_P(ValidParentImageFormatTest, givenParentChannelOrderWhenTestWithAllChannelOrdersThenReturnTrueForValidChannelOrder) { cl_image_format parentImageFormat; cl_image_format imageFormat; cl_channel_order validChannelOrder; NullImage image; std::tie(parentImageFormat.image_channel_order, validChannelOrder) = GetParam(); parentImageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_data_type = CL_UNORM_INT8; image.imageFormat = parentImageFormat; for (auto channelOrder : allChannelOrders) { imageFormat.image_channel_order = channelOrder; bool retVal = image.hasValidParentImageFormat(imageFormat); EXPECT_EQ(imageFormat.image_channel_order == validChannelOrder, retVal); } }; std::tuple imageFromImageValidChannelOrderPairs[] = { std::make_tuple(CL_BGRA, CL_sBGRA), std::make_tuple(CL_sBGRA, CL_BGRA), std::make_tuple(CL_RGBA, CL_sRGBA), std::make_tuple(CL_sRGBA, CL_RGBA), std::make_tuple(CL_RGB, CL_sRGB), std::make_tuple(CL_sRGB, CL_RGB), std::make_tuple(CL_RGBx, CL_sRGBx), std::make_tuple(CL_sRGBx, CL_RGBx), std::make_tuple(CL_R, CL_DEPTH), std::make_tuple(CL_A, 0), std::make_tuple(CL_RG, 0), std::make_tuple(CL_RA, 0), std::make_tuple(CL_ARGB, 0), std::make_tuple(CL_INTENSITY, 0), std::make_tuple(CL_LUMINANCE, 0), std::make_tuple(CL_Rx, 0), std::make_tuple(CL_RGx, 0), std::make_tuple(CL_DEPTH, 0), std::make_tuple(CL_DEPTH_STENCIL, 0), std::make_tuple(CL_ABGR, 0), std::make_tuple(CL_NV12_INTEL, 0)}; INSTANTIATE_TEST_CASE_P( ValidParentImageFormatTests, ValidParentImageFormatTest, ::testing::ValuesIn(imageFromImageValidChannelOrderPairs)); TEST(ImageDescriptorComparatorTest, givenImageWhenCallHasSameDescriptorWithSameDescriptorThenReturnTrueOtherwiseFalse) { NullImage image; cl_image_desc descriptor = image.imageDesc; image.imageDesc.image_row_pitch = image.getHostPtrRowPitch() + 10; // to make sure we compare host ptr row/slice pitches image.imageDesc.image_slice_pitch = image.getHostPtrSlicePitch() + 10; EXPECT_TRUE(image.hasSameDescriptor(descriptor)); descriptor.image_type++; EXPECT_FALSE(image.hasSameDescriptor(descriptor)); descriptor.image_type--; EXPECT_TRUE(image.hasSameDescriptor(descriptor)); descriptor.image_width++; EXPECT_FALSE(image.hasSameDescriptor(descriptor)); descriptor.image_width--; EXPECT_TRUE(image.hasSameDescriptor(descriptor)); descriptor.image_height++; EXPECT_FALSE(image.hasSameDescriptor(descriptor)); descriptor.image_height--; EXPECT_TRUE(image.hasSameDescriptor(descriptor)); descriptor.image_depth++; EXPECT_FALSE(image.hasSameDescriptor(descriptor)); descriptor.image_depth--; EXPECT_TRUE(image.hasSameDescriptor(descriptor)); descriptor.image_array_size++; EXPECT_FALSE(image.hasSameDescriptor(descriptor)); descriptor.image_array_size--; EXPECT_TRUE(image.hasSameDescriptor(descriptor)); descriptor.image_row_pitch++; EXPECT_FALSE(image.hasSameDescriptor(descriptor)); descriptor.image_row_pitch--; EXPECT_TRUE(image.hasSameDescriptor(descriptor)); descriptor.image_slice_pitch++; EXPECT_FALSE(image.hasSameDescriptor(descriptor)); descriptor.image_slice_pitch--; EXPECT_TRUE(image.hasSameDescriptor(descriptor)); descriptor.num_mip_levels++; EXPECT_FALSE(image.hasSameDescriptor(descriptor)); descriptor.num_mip_levels--; EXPECT_TRUE(image.hasSameDescriptor(descriptor)); descriptor.num_samples++; EXPECT_FALSE(image.hasSameDescriptor(descriptor)); }; TEST(ImageFormatValidatorTest, givenValidParentChannelOrderAndChannelOrderWhenFormatsHaveDifferentDataTypeThenHasValidParentImageFormatReturnsFalse) { cl_image_format imageFormat; NullImage image; image.imageFormat.image_channel_data_type = CL_UNORM_INT8; image.imageFormat.image_channel_order = CL_BGRA; imageFormat.image_channel_data_type = CL_UNORM_INT16; imageFormat.image_channel_order = CL_sBGRA; EXPECT_FALSE(image.hasValidParentImageFormat(imageFormat)); }; TEST(ImageValidatorTest, givenInvalidImage2dSizesWithoutParentObjectWhenValidateImageThenReturnsError) { MockContext context; cl_image_desc descriptor; void *dummyPtr = reinterpret_cast(0x17); ClSurfaceFormatInfo surfaceFormat; descriptor.image_type = CL_MEM_OBJECT_IMAGE2D; descriptor.image_row_pitch = 0; descriptor.image_height = 1; descriptor.image_width = 0; descriptor.mem_object = nullptr; EXPECT_EQ(CL_INVALID_IMAGE_DESCRIPTOR, Image::validate(&context, {}, &surfaceFormat, &descriptor, dummyPtr)); descriptor.image_height = 0; descriptor.image_width = 1; EXPECT_EQ(CL_INVALID_IMAGE_DESCRIPTOR, Image::validate(&context, {}, &surfaceFormat, &descriptor, dummyPtr)); }; TEST(ImageValidatorTest, givenNV12Image2dAsParentImageWhenValidateImageZeroSizedThenReturnsSuccess) { NullImage image; cl_image_desc descriptor; MockContext context; REQUIRE_IMAGES_OR_SKIP(&context); void *dummyPtr = reinterpret_cast(0x17); ClSurfaceFormatInfo surfaceFormat = {}; image.imageFormat.image_channel_order = CL_NV12_INTEL; descriptor.image_type = CL_MEM_OBJECT_IMAGE2D; descriptor.image_height = 0; descriptor.image_width = 0; descriptor.image_row_pitch = 0; descriptor.mem_object = ℑ EXPECT_EQ(CL_SUCCESS, Image::validate(&context, {}, &surfaceFormat, &descriptor, dummyPtr)); }; TEST(ImageValidatorTest, givenNonNV12Image2dAsParentImageWhenValidateImageZeroSizedThenReturnsError) { NullImage image; cl_image_desc descriptor; MockContext context; REQUIRE_IMAGES_OR_SKIP(&context); void *dummyPtr = reinterpret_cast(0x17); ClSurfaceFormatInfo surfaceFormat; image.imageFormat.image_channel_order = CL_BGRA; image.imageFormat.image_channel_data_type = CL_UNORM_INT8; surfaceFormat.OCLImageFormat.image_channel_order = CL_sBGRA; surfaceFormat.OCLImageFormat.image_channel_data_type = CL_UNORM_INT8; descriptor.image_type = CL_MEM_OBJECT_IMAGE2D; descriptor.image_height = 0; descriptor.image_width = 0; descriptor.image_row_pitch = image.getHostPtrRowPitch(); descriptor.image_slice_pitch = image.getHostPtrSlicePitch(); image.imageDesc = descriptor; descriptor.mem_object = ℑ EXPECT_EQ(CL_INVALID_IMAGE_DESCRIPTOR, Image::validate(&context, {}, &surfaceFormat, &descriptor, dummyPtr)); }; compute-runtime-22.14.22890/opencl/test/unit_test/mem_obj/map_operations_handler_tests.cpp000066400000000000000000000235201422164147700317040ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/ptr_math.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/mem_obj/map_operations_handler.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include using namespace NEO; struct MockMapOperationsHandler : public MapOperationsHandler { using MapOperationsHandler::isOverlapping; using MapOperationsHandler::mappedPointers; }; struct MapOperationsHandlerTests : public ::testing::Test { MockMapOperationsHandler mockHandler; std::unique_ptr allocations[3] = { std::make_unique(), std::make_unique(), std::make_unique(), }; MapInfo mappedPtrs[3] = { {(void *)0x1000, 1, {{1, 2, 3}}, {{4, 5, 6}}, 0}, {(void *)0x2000, 1, {{7, 8, 9}}, {{10, 11, 12}}, 0}, {(void *)0x3000, 1, {{13, 14, 15}}, {{16, 17, 18}}, 0}, }; cl_map_flags mapFlags = CL_MAP_READ; }; TEST_F(MapOperationsHandlerTests, givenMapInfoWhenFindingThenReturnCorrectvalues) { for (size_t i = 0; i < 3; i++) { EXPECT_TRUE(mockHandler.add(mappedPtrs[i].ptr, mappedPtrs[i].ptrLength, mapFlags, mappedPtrs[i].size, mappedPtrs[i].offset, 0, allocations[i].get())); } EXPECT_EQ(3u, mockHandler.size()); for (int i = 2; i >= 0; i--) { MapInfo receivedMapInfo; EXPECT_TRUE(mockHandler.find(mappedPtrs[i].ptr, receivedMapInfo)); EXPECT_EQ(receivedMapInfo.ptr, mappedPtrs[i].ptr); EXPECT_EQ(receivedMapInfo.size, mappedPtrs[i].size); EXPECT_EQ(receivedMapInfo.offset, mappedPtrs[i].offset); EXPECT_EQ(allocations[i].get(), receivedMapInfo.graphicsAllocation); } } TEST_F(MapOperationsHandlerTests, givenMapInfoWhenRemovingThenRemoveCorrectPointers) { for (size_t i = 0; i < 3; i++) { mockHandler.add(mappedPtrs[i].ptr, mappedPtrs[i].ptrLength, mapFlags, mappedPtrs[i].size, mappedPtrs[i].offset, 0, allocations[i].get()); } for (int i = 2; i >= 0; i--) { mockHandler.remove(mappedPtrs[i].ptr); MapInfo receivedMapInfo; EXPECT_FALSE(mockHandler.find(mappedPtrs[i].ptr, receivedMapInfo)); } EXPECT_EQ(0u, mockHandler.size()); } TEST_F(MapOperationsHandlerTests, givenMappedPtrsWhenDoubleRemovedThenDoNothing) { mockHandler.add(mappedPtrs[0].ptr, mappedPtrs[0].ptrLength, mapFlags, mappedPtrs[0].size, mappedPtrs[0].offset, 0, allocations[0].get()); mockHandler.add(mappedPtrs[1].ptr, mappedPtrs[1].ptrLength, mapFlags, mappedPtrs[0].size, mappedPtrs[0].offset, 0, allocations[1].get()); EXPECT_EQ(2u, mockHandler.size()); mockHandler.remove(mappedPtrs[1].ptr); mockHandler.remove(mappedPtrs[1].ptr); EXPECT_EQ(1u, mockHandler.size()); MapInfo receivedMapInfo; EXPECT_FALSE(mockHandler.find(mappedPtrs[1].ptr, receivedMapInfo)); EXPECT_TRUE(mockHandler.find(mappedPtrs[0].ptr, receivedMapInfo)); } TEST_F(MapOperationsHandlerTests, givenMapInfoWhenAddedThenSetReadOnlyFlag) { mapFlags = CL_MAP_READ; mockHandler.add(mappedPtrs[0].ptr, mappedPtrs[0].ptrLength, mapFlags, mappedPtrs[0].size, mappedPtrs[0].offset, 0, allocations[0].get()); EXPECT_TRUE(mockHandler.mappedPointers.back().readOnly); mockHandler.remove(mappedPtrs[0].ptr); mapFlags = CL_MAP_WRITE; mockHandler.add(mappedPtrs[0].ptr, mappedPtrs[0].ptrLength, mapFlags, mappedPtrs[0].size, mappedPtrs[0].offset, 0, allocations[0].get()); EXPECT_FALSE(mockHandler.mappedPointers.back().readOnly); mockHandler.remove(mappedPtrs[0].ptr); mapFlags = CL_MAP_WRITE_INVALIDATE_REGION; mockHandler.add(mappedPtrs[0].ptr, mappedPtrs[0].ptrLength, mapFlags, mappedPtrs[0].size, mappedPtrs[0].offset, 0, allocations[0].get()); EXPECT_FALSE(mockHandler.mappedPointers.back().readOnly); mockHandler.remove(mappedPtrs[0].ptr); mapFlags = CL_MAP_READ | CL_MAP_WRITE; mockHandler.add(mappedPtrs[0].ptr, mappedPtrs[0].ptrLength, mapFlags, mappedPtrs[0].size, mappedPtrs[0].offset, 0, allocations[0].get()); EXPECT_FALSE(mockHandler.mappedPointers.back().readOnly); mockHandler.remove(mappedPtrs[0].ptr); mapFlags = CL_MAP_READ | CL_MAP_WRITE_INVALIDATE_REGION; mockHandler.add(mappedPtrs[0].ptr, mappedPtrs[0].ptrLength, mapFlags, mappedPtrs[0].size, mappedPtrs[0].offset, 0, allocations[0].get()); EXPECT_FALSE(mockHandler.mappedPointers.back().readOnly); mockHandler.remove(mappedPtrs[0].ptr); } TEST_F(MapOperationsHandlerTests, givenNonReadOnlyOverlappingPtrWhenAddingThenReturnFalseAndDontAdd) { mapFlags = CL_MAP_WRITE; mappedPtrs->readOnly = false; mockHandler.add(mappedPtrs[0].ptr, mappedPtrs[0].ptrLength, mapFlags, mappedPtrs[0].size, mappedPtrs[0].offset, 0, allocations[0].get()); EXPECT_EQ(1u, mockHandler.size()); EXPECT_FALSE(mockHandler.mappedPointers.back().readOnly); EXPECT_TRUE(mockHandler.isOverlapping(mappedPtrs[0])); EXPECT_FALSE(mockHandler.add(mappedPtrs[0].ptr, mappedPtrs[0].ptrLength, mapFlags, mappedPtrs[0].size, mappedPtrs[0].offset, 0, allocations[0].get())); EXPECT_EQ(1u, mockHandler.size()); } TEST_F(MapOperationsHandlerTests, givenReadOnlyOverlappingPtrWhenAddingThenReturnTrueAndAdd) { mapFlags = CL_MAP_READ; mappedPtrs->readOnly = true; mockHandler.add(mappedPtrs[0].ptr, mappedPtrs[0].ptrLength, mapFlags, mappedPtrs[0].size, mappedPtrs[0].offset, 0, allocations[0].get()); EXPECT_EQ(1u, mockHandler.size()); EXPECT_TRUE(mockHandler.mappedPointers.back().readOnly); EXPECT_FALSE(mockHandler.isOverlapping(mappedPtrs[0])); EXPECT_TRUE(mockHandler.add(mappedPtrs[0].ptr, mappedPtrs[0].ptrLength, mapFlags, mappedPtrs[0].size, mappedPtrs[0].offset, 0, allocations[0].get())); EXPECT_EQ(2u, mockHandler.size()); EXPECT_TRUE(mockHandler.mappedPointers.back().readOnly); } const std::tuple overlappingCombinations[] = { // mappedPtrStart, mappedPtrLength, requestPtrStart, requestPtrLength, expectOverlap std::make_tuple((void *)5000, 50, (void *)4000, 1, false), //requested before, non-overlapping std::make_tuple((void *)5000, 50, (void *)4999, 10, true), //requested before, overlapping inside std::make_tuple((void *)5000, 50, (void *)4999, 100, true), //requested before, overlapping outside std::make_tuple((void *)5000, 50, (void *)5001, 1, true), //requested inside, overlapping inside std::make_tuple((void *)5000, 50, (void *)5001, 100, true), //requested inside, overlapping outside std::make_tuple((void *)5000, 50, (void *)6000, 1, false), //requested after, non-overlapping std::make_tuple((void *)5000, 50, (void *)5000, 1, true), //requested on start, overlapping inside std::make_tuple((void *)5000, 50, (void *)5000, 100, true), //requested on start, overlapping outside }; struct MapOperationsHandlerOverlapTests : public ::testing::WithParamInterface>, public ::testing::Test {}; TEST_P(MapOperationsHandlerOverlapTests, givenAlreadyMappedPtrWhenAskingForOverlapThenReturnCorrectValue) { cl_map_flags mapFlags = CL_MAP_WRITE; void *mappedPtr = std::get<0>(GetParam()); size_t mappedPtrLength = std::get<1>(GetParam()); void *requestedPtr = std::get<2>(GetParam()); size_t requestedPtrLength = std::get<3>(GetParam()); bool expectOverlap = std::get<4>(GetParam()); // size and offset arrays are ignored MapInfo mappedInfo(mappedPtr, mappedPtrLength, {{0, 0, 0}}, {{0, 0, 0}}, 0); MapInfo requestedInfo(requestedPtr, requestedPtrLength, {{0, 0, 0}}, {{0, 0, 0}}, 0); requestedInfo.readOnly = false; MockMapOperationsHandler mockHandler; mockHandler.add(mappedInfo.ptr, mappedInfo.ptrLength, mapFlags, mappedInfo.size, mappedInfo.offset, 0, mappedInfo.graphicsAllocation); EXPECT_EQ(expectOverlap, mockHandler.isOverlapping(requestedInfo)); } INSTANTIATE_TEST_CASE_P(MapOperationsHandlerOverlapTests, MapOperationsHandlerOverlapTests, ::testing::ValuesIn(overlappingCombinations)); struct MapOperationsStorageWhitebox : MapOperationsStorage { using MapOperationsStorage::handlers; }; TEST(MapOperationsStorageTest, givenMapOperationsStorageWhenGetHandlerIsUsedThenCreateHandler) { MockBuffer buffer1{}; MockBuffer buffer2{}; MapOperationsStorageWhitebox storage{}; EXPECT_EQ(0u, storage.handlers.size()); storage.getHandler(&buffer1); EXPECT_EQ(1u, storage.handlers.size()); storage.getHandler(&buffer2); EXPECT_EQ(2u, storage.handlers.size()); storage.getHandler(&buffer1); EXPECT_EQ(2u, storage.handlers.size()); } TEST(MapOperationsStorageTest, givenMapOperationsStorageWhenGetHandlerIfExistsIsUsedThenDoNotCreateHandler) { MockBuffer buffer1{}; MockBuffer buffer2{}; MapOperationsStorageWhitebox storage{}; EXPECT_EQ(0u, storage.handlers.size()); EXPECT_EQ(nullptr, storage.getHandlerIfExists(&buffer1)); EXPECT_EQ(nullptr, storage.getHandlerIfExists(&buffer2)); storage.getHandler(&buffer1); EXPECT_EQ(1u, storage.handlers.size()); EXPECT_NE(nullptr, storage.getHandlerIfExists(&buffer1)); EXPECT_EQ(nullptr, storage.getHandlerIfExists(&buffer2)); storage.getHandler(&buffer2); EXPECT_EQ(2u, storage.handlers.size()); EXPECT_NE(nullptr, storage.getHandlerIfExists(&buffer1)); EXPECT_NE(nullptr, storage.getHandlerIfExists(&buffer2)); EXPECT_NE(storage.getHandlerIfExists(&buffer1), storage.getHandlerIfExists(&buffer2)); } TEST(MapOperationsStorageTest, givenMapOperationsStorageWhenRemoveHandlerIsUsedThenRemoveHandler) { MockBuffer buffer{}; MapOperationsStorageWhitebox storage{}; storage.getHandler(&buffer); ASSERT_EQ(1u, storage.handlers.size()); storage.removeHandler(&buffer); EXPECT_EQ(0u, storage.handlers.size()); } compute-runtime-22.14.22890/opencl/test/unit_test/mem_obj/mem_obj_destruction_tests.cpp000066400000000000000000000546041422164147700312310ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/wait_status.h" #include "shared/source/memory_manager/allocations_list.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/api/api.h" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "opencl/source/mem_obj/mem_obj.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_platform.h" using namespace NEO; template class MyCsr : public UltCommandStreamReceiver { public: MyCsr(const ExecutionEnvironment &executionEnvironment, const DeviceBitfield deviceBitfield) : UltCommandStreamReceiver(const_cast(executionEnvironment), 0, deviceBitfield) {} WaitStatus waitForCompletionWithTimeout(const WaitParams ¶ms, uint32_t taskCountToWait) override { waitForCompletionWithTimeoutCalled++; waitForCompletionWithTimeoutParamsPassed.push_back({params.enableTimeout, params.waitTimeout, taskCountToWait}); *this->getTagAddress() = getTagAddressValue; return waitForCompletionWithTimeoutResult; } struct WaitForCompletionWithTimeoutParams { bool enableTimeout; int64_t timeoutMs; uint32_t taskCountToWait; }; uint32_t waitForCompletionWithTimeoutCalled = 0u; WaitStatus waitForCompletionWithTimeoutResult = NEO::WaitStatus::Ready; StackVec waitForCompletionWithTimeoutParamsPassed{}; uint32_t getTagAddressValue{}; }; void CL_CALLBACK emptyDestructorCallback(cl_mem memObj, void *userData) { } class MemObjDestructionTest : public ::testing::TestWithParam { public: void SetUp() override { executionEnvironment = platform()->peekExecutionEnvironment(); memoryManager = new MockMemoryManager(*executionEnvironment); executionEnvironment->memoryManager.reset(memoryManager); device = std::make_unique(MockDevice::create(executionEnvironment, 0)); context.reset(new MockContext(device.get())); allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), size}); memObj = new MemObj(context.get(), CL_MEM_OBJECT_BUFFER, ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_READ_WRITE, 0, 0, &device->getDevice()), CL_MEM_READ_WRITE, 0, size, nullptr, nullptr, GraphicsAllocationHelper::toMultiGraphicsAllocation(allocation), true, false, false); csr = device->getDefaultEngine().commandStreamReceiver; *csr->getTagAddress() = 0; contextId = device->getDefaultEngine().osContext->getContextId(); } void TearDown() override { context.reset(); } void makeMemObjUsed() { memObj->getGraphicsAllocation(device->getRootDeviceIndex())->updateTaskCount(taskCountReady, contextId); } void makeMemObjNotReady() { makeMemObjUsed(); *device->getDefaultEngine().commandStreamReceiver->getTagAddress() = taskCountReady - 1; } void makeMemObjReady() { makeMemObjUsed(); *device->getDefaultEngine().commandStreamReceiver->getTagAddress() = taskCountReady; } constexpr static uint32_t taskCountReady = 3u; ExecutionEnvironment *executionEnvironment = nullptr; std::unique_ptr device; uint32_t contextId = 0; MockMemoryManager *memoryManager = nullptr; std::unique_ptr context; GraphicsAllocation *allocation = nullptr; MemObj *memObj = nullptr; CommandStreamReceiver *csr = nullptr; size_t size = MemoryConstants::pageSize; }; class MemObjAsyncDestructionTest : public MemObjDestructionTest { public: void SetUp() override { DebugManager.flags.EnableAsyncDestroyAllocations.set(true); MemObjDestructionTest::SetUp(); } void TearDown() override { MemObjDestructionTest::TearDown(); DebugManager.flags.EnableAsyncDestroyAllocations.set(defaultFlag); } bool defaultFlag = DebugManager.flags.EnableAsyncDestroyAllocations.get(); }; class MemObjSyncDestructionTest : public MemObjDestructionTest { public: void SetUp() override { DebugManager.flags.EnableAsyncDestroyAllocations.set(false); MemObjDestructionTest::SetUp(); } void TearDown() override { MemObjDestructionTest::TearDown(); DebugManager.flags.EnableAsyncDestroyAllocations.set(defaultFlag); } bool defaultFlag = DebugManager.flags.EnableAsyncDestroyAllocations.get(); }; TEST_P(MemObjAsyncDestructionTest, givenMemObjWithDestructableAllocationWhenAsyncDestructionsAreEnabledAndAllocationIsNotReadyAndMemObjectIsDestructedThenAllocationIsDeferred) { bool isMemObjReady; bool expectedDeferration; isMemObjReady = GetParam(); expectedDeferration = !isMemObjReady; if (isMemObjReady) { makeMemObjReady(); } else { makeMemObjNotReady(); } auto &allocationList = csr->getTemporaryAllocations(); EXPECT_TRUE(allocationList.peekIsEmpty()); delete memObj; EXPECT_EQ(!expectedDeferration, allocationList.peekIsEmpty()); if (expectedDeferration) { EXPECT_EQ(allocation, allocationList.peekHead()); } } HWTEST_P(MemObjAsyncDestructionTest, givenUsedMemObjWithAsyncDestructionsEnabledThatHasDestructorCallbacksWhenItIsDestroyedThenDestructorWaitsOnTaskCount) { bool hasCallbacks = GetParam(); if (hasCallbacks) { memObj->setDestructorCallback(emptyDestructorCallback, nullptr); } auto rootDeviceIndex = device->getRootDeviceIndex(); auto mockCsr0 = new MyCsr(*device->executionEnvironment, device->getDeviceBitfield()); auto mockCsr1 = new MyCsr(*device->executionEnvironment, device->getDeviceBitfield()); device->resetCommandStreamReceiver(mockCsr0, 0); device->resetCommandStreamReceiver(mockCsr1, 1); *mockCsr0->getTagAddress() = 0; *mockCsr1->getTagAddress() = 0; mockCsr0->getTagAddressValue = taskCountReady; mockCsr1->getTagAddressValue = taskCountReady; auto osContextId0 = mockCsr0->getOsContext().getContextId(); auto osContextId1 = mockCsr1->getOsContext().getContextId(); memObj->getGraphicsAllocation(rootDeviceIndex)->updateTaskCount(taskCountReady, osContextId0); memObj->getGraphicsAllocation(rootDeviceIndex)->updateTaskCount(taskCountReady, osContextId1); uint32_t expectedTaskCount0{}; uint32_t expectedTaskCount1{}; if (hasCallbacks) { expectedTaskCount0 = allocation->getTaskCount(osContextId0); expectedTaskCount1 = allocation->getTaskCount(osContextId1); } else { *mockCsr0->getTagAddress() = taskCountReady; *mockCsr1->getTagAddress() = taskCountReady; } delete memObj; if (hasCallbacks) { EXPECT_EQ(1u, mockCsr0->waitForCompletionWithTimeoutCalled); EXPECT_EQ(TimeoutControls::maxTimeout, mockCsr0->waitForCompletionWithTimeoutParamsPassed[0].timeoutMs); EXPECT_EQ(expectedTaskCount0, mockCsr0->waitForCompletionWithTimeoutParamsPassed[0].taskCountToWait); EXPECT_EQ(1u, mockCsr1->waitForCompletionWithTimeoutCalled); EXPECT_EQ(TimeoutControls::maxTimeout, mockCsr1->waitForCompletionWithTimeoutParamsPassed[0].timeoutMs); EXPECT_EQ(expectedTaskCount1, mockCsr1->waitForCompletionWithTimeoutParamsPassed[0].taskCountToWait); } else { EXPECT_EQ(0u, mockCsr0->waitForCompletionWithTimeoutCalled); EXPECT_EQ(0u, mockCsr1->waitForCompletionWithTimeoutCalled); } } HWTEST_P(MemObjAsyncDestructionTest, givenUsedMemObjWithAsyncDestructionsEnabledThatHasAllocatedMappedPtrWhenItIsDestroyedThenDestructorWaitsOnTaskCount) { makeMemObjUsed(); bool hasAllocatedMappedPtr = GetParam(); if (hasAllocatedMappedPtr) { auto allocatedPtr = alignedMalloc(size, MemoryConstants::pageSize); memObj->setAllocatedMapPtr(allocatedPtr); } auto mockCsr = new MyCsr(*device->executionEnvironment, device->getDeviceBitfield()); device->resetCommandStreamReceiver(mockCsr); *mockCsr->getTagAddress() = 0; auto osContextId = mockCsr->getOsContext().getContextId(); uint32_t expectedTaskCount{}; if (hasAllocatedMappedPtr) { expectedTaskCount = allocation->getTaskCount(osContextId); } delete memObj; if (hasAllocatedMappedPtr) { EXPECT_EQ(1u, mockCsr->waitForCompletionWithTimeoutCalled); EXPECT_EQ(TimeoutControls::maxTimeout, mockCsr->waitForCompletionWithTimeoutParamsPassed[0].timeoutMs); EXPECT_EQ(expectedTaskCount, mockCsr->waitForCompletionWithTimeoutParamsPassed[0].taskCountToWait); } else { EXPECT_EQ(0u, mockCsr->waitForCompletionWithTimeoutCalled); } } HWTEST_P(MemObjAsyncDestructionTest, givenUsedMemObjWithAsyncDestructionsEnabledThatHasDestructableMappedPtrWhenItIsDestroyedThenDestructorWaitsOnTaskCount) { auto storage = alignedMalloc(size, MemoryConstants::pageSize); bool hasAllocatedMappedPtr = GetParam(); if (!hasAllocatedMappedPtr) { delete memObj; allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{context->getDevice(0)->getRootDeviceIndex(), size}); MemObjOffsetArray origin = {{0, 0, 0}}; MemObjSizeArray region = {{1, 1, 1}}; cl_map_flags mapFlags = CL_MAP_READ; memObj = new MemObj(context.get(), CL_MEM_OBJECT_BUFFER, ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_READ_WRITE, 0, 0, &context->getDevice(0)->getDevice()), CL_MEM_READ_WRITE, 0, size, storage, nullptr, GraphicsAllocationHelper::toMultiGraphicsAllocation(allocation), true, false, false); memObj->addMappedPtr(storage, 1, mapFlags, region, origin, 0, nullptr); } else { memObj->setAllocatedMapPtr(storage); } makeMemObjUsed(); auto mockCsr = new MyCsr(*device->executionEnvironment, device->getDeviceBitfield()); device->resetCommandStreamReceiver(mockCsr); *mockCsr->getTagAddress() = 0; auto osContextId = mockCsr->getOsContext().getContextId(); uint32_t expectedTaskCount{}; if (hasAllocatedMappedPtr) { expectedTaskCount = allocation->getTaskCount(osContextId); } delete memObj; if (!hasAllocatedMappedPtr) { alignedFree(storage); } if (hasAllocatedMappedPtr) { EXPECT_EQ(1u, mockCsr->waitForCompletionWithTimeoutCalled); EXPECT_EQ(TimeoutControls::maxTimeout, mockCsr->waitForCompletionWithTimeoutParamsPassed[0].timeoutMs); EXPECT_EQ(expectedTaskCount, mockCsr->waitForCompletionWithTimeoutParamsPassed[0].taskCountToWait); } else { EXPECT_EQ(0u, mockCsr->waitForCompletionWithTimeoutCalled); } } HWTEST_P(MemObjSyncDestructionTest, givenMemObjWithDestructableAllocationWhenAsyncDestructionsAreDisabledThenDestructorWaitsOnTaskCount) { bool isMemObjReady; isMemObjReady = GetParam(); if (isMemObjReady) { makeMemObjReady(); } else { makeMemObjNotReady(); } auto mockCsr = new MyCsr(*device->executionEnvironment, device->getDeviceBitfield()); device->resetCommandStreamReceiver(mockCsr); *mockCsr->getTagAddress() = 0; auto osContextId = mockCsr->getOsContext().getContextId(); uint32_t expectedTaskCount = allocation->getTaskCount(osContextId); delete memObj; EXPECT_EQ(1u, mockCsr->waitForCompletionWithTimeoutCalled); EXPECT_EQ(TimeoutControls::maxTimeout, mockCsr->waitForCompletionWithTimeoutParamsPassed[0].timeoutMs); EXPECT_EQ(expectedTaskCount, mockCsr->waitForCompletionWithTimeoutParamsPassed[0].taskCountToWait); } HWTEST_P(MemObjSyncDestructionTest, givenMemObjWithDestructableAllocationWhenAsyncDestructionsAreDisabledThenAllocationIsNotDeferred) { bool isMemObjReady; isMemObjReady = GetParam(); if (isMemObjReady) { makeMemObjReady(); } else { makeMemObjNotReady(); } auto mockCsr = new MyCsr(*device->executionEnvironment, device->getDeviceBitfield()); device->resetCommandStreamReceiver(mockCsr); *mockCsr->getTagAddress() = 0; delete memObj; auto &allocationList = mockCsr->getTemporaryAllocations(); EXPECT_TRUE(allocationList.peekIsEmpty()); } HWTEST_P(MemObjSyncDestructionTest, givenMemObjWithMapAllocationWhenAsyncDestructionsAreDisabledThenWaitForCompletionWithTimeoutOnMapAllocation) { auto isMapAllocationUsed = GetParam(); auto mockCsr = new MyCsr(*device->executionEnvironment, device->getDeviceBitfield()); device->resetCommandStreamReceiver(mockCsr); *mockCsr->getTagAddress() = 0; GraphicsAllocation *mapAllocation = nullptr; AllocationProperties properties{device->getRootDeviceIndex(), true, MemoryConstants::pageSize, AllocationType::MAP_ALLOCATION, false, context->getDeviceBitfieldForAllocation(device->getRootDeviceIndex())}; mapAllocation = memoryManager->allocateGraphicsMemoryWithProperties(properties, nullptr); memObj->setMapAllocation(mapAllocation); if (isMapAllocationUsed) { memObj->getMapAllocation(device->getRootDeviceIndex())->updateTaskCount(taskCountReady, contextId); } auto osContextId = mockCsr->getOsContext().getContextId(); uint32_t expectedTaskCount{}; if (isMapAllocationUsed) { expectedTaskCount = mapAllocation->getTaskCount(osContextId); } delete memObj; if (isMapAllocationUsed) { EXPECT_EQ(1u, mockCsr->waitForCompletionWithTimeoutCalled); EXPECT_EQ(TimeoutControls::maxTimeout, mockCsr->waitForCompletionWithTimeoutParamsPassed[0].timeoutMs); EXPECT_EQ(expectedTaskCount, mockCsr->waitForCompletionWithTimeoutParamsPassed[0].taskCountToWait); } else { EXPECT_EQ(0u, mockCsr->waitForCompletionWithTimeoutCalled); } } HWTEST_P(MemObjSyncDestructionTest, givenMemObjWithMapAllocationWhenAsyncDestructionsAreDisabledThenMapAllocationIsNotDeferred) { auto hasMapAllocation = GetParam(); auto mockCsr = new MyCsr(*device->executionEnvironment, device->getDeviceBitfield()); device->resetCommandStreamReceiver(mockCsr); *mockCsr->getTagAddress() = 0; GraphicsAllocation *mapAllocation = nullptr; if (hasMapAllocation) { AllocationProperties properties{device->getRootDeviceIndex(), true, MemoryConstants::pageSize, AllocationType::MAP_ALLOCATION, false, context->getDeviceBitfieldForAllocation(device->getRootDeviceIndex())}; mapAllocation = memoryManager->allocateGraphicsMemoryWithProperties(properties, nullptr); memObj->setMapAllocation(mapAllocation); memObj->getMapAllocation(device->getRootDeviceIndex())->updateTaskCount(taskCountReady, contextId); } makeMemObjUsed(); auto &allocationList = mockCsr->getTemporaryAllocations(); EXPECT_TRUE(allocationList.peekIsEmpty()); delete memObj; EXPECT_TRUE(allocationList.peekIsEmpty()); } HWTEST_P(MemObjAsyncDestructionTest, givenMemObjWithMapAllocationWithoutMemUseHostPtrFlagWhenAsyncDestructionsAreEnabledThenMapAllocationIsDeferred) { auto hasMapAllocation = GetParam(); auto mockCsr = new MyCsr(*device->executionEnvironment, device->getDeviceBitfield()); device->resetCommandStreamReceiver(mockCsr); *mockCsr->getTagAddress() = 0; GraphicsAllocation *mapAllocation = nullptr; if (hasMapAllocation) { AllocationProperties properties{device->getRootDeviceIndex(), true, MemoryConstants::pageSize, AllocationType::MAP_ALLOCATION, false, context->getDeviceBitfieldForAllocation(device->getRootDeviceIndex())}; mapAllocation = memoryManager->allocateGraphicsMemoryWithProperties(properties, nullptr); memObj->setMapAllocation(mapAllocation); memObj->getMapAllocation(device->getRootDeviceIndex())->updateTaskCount(taskCountReady, contextId); } makeMemObjUsed(); auto &allocationList = mockCsr->getTemporaryAllocations(); EXPECT_TRUE(allocationList.peekIsEmpty()); delete memObj; EXPECT_FALSE(allocationList.peekIsEmpty()); if (hasMapAllocation) { EXPECT_EQ(allocation, allocationList.peekHead()); EXPECT_EQ(mapAllocation, allocationList.peekTail()); } else { EXPECT_EQ(allocation, allocationList.peekHead()); EXPECT_EQ(allocation, allocationList.peekTail()); } } HWTEST_P(MemObjAsyncDestructionTest, givenMemObjWithMapAllocationWithMemUseHostPtrFlagWhenAsyncDestructionsAreEnabledThenMapAllocationIsNotDeferred) { auto hasMapAllocation = GetParam(); auto mockCsr = new MyCsr(*device->executionEnvironment, device->getDeviceBitfield()); device->resetCommandStreamReceiver(mockCsr); *mockCsr->getTagAddress() = 0; GraphicsAllocation *mapAllocation = nullptr; char *hostPtr = (char *)alignedMalloc(MemoryConstants::pageSize, MemoryConstants::pageSize64k); if (hasMapAllocation) { AllocationProperties properties{device->getRootDeviceIndex(), false, MemoryConstants::pageSize, AllocationType::MAP_ALLOCATION, false, context->getDeviceBitfieldForAllocation(device->getRootDeviceIndex())}; mapAllocation = memoryManager->allocateGraphicsMemoryWithProperties(properties, hostPtr); memObj->setMapAllocation(mapAllocation); memObj->getMapAllocation(device->getRootDeviceIndex())->updateTaskCount(taskCountReady, contextId); } makeMemObjUsed(); auto &allocationList = mockCsr->getTemporaryAllocations(); EXPECT_TRUE(allocationList.peekIsEmpty()); delete memObj; EXPECT_FALSE(allocationList.peekIsEmpty()); if (hasMapAllocation) { EXPECT_EQ(allocation, allocationList.peekHead()); EXPECT_EQ(allocation, allocationList.peekHead()); } else { EXPECT_EQ(allocation, allocationList.peekHead()); EXPECT_EQ(allocation, allocationList.peekTail()); } alignedFree(hostPtr); } INSTANTIATE_TEST_CASE_P( MemObjTests, MemObjAsyncDestructionTest, testing::Bool()); INSTANTIATE_TEST_CASE_P( MemObjTests, MemObjSyncDestructionTest, testing::Bool()); using UsmDestructionTests = ::testing::Test; HWTEST_F(UsmDestructionTests, givenSharedUsmAllocationWhenBlockingFreeIsCalledThenWaitForCompletionIsCalled) { MockDevice mockDevice; mockDevice.incRefInternal(); MockClDevice mockClDevice(&mockDevice); MockContext mockContext(&mockClDevice, false); if (mockContext.getDevice(0u)->getHardwareInfo().capabilityTable.supportsOcl21Features == false) { GTEST_SKIP(); } auto mockCsr = new MyCsr(*mockDevice.executionEnvironment, 1); mockDevice.resetCommandStreamReceiver(mockCsr); *mockCsr->getTagAddress() = 5u; SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, mockContext.getRootDeviceIndices(), mockContext.getDeviceBitfields()); auto svmAllocationsManager = mockContext.getSVMAllocsManager(); auto sharedMemory = svmAllocationsManager->createUnifiedAllocationWithDeviceStorage(4096u, {}, unifiedMemoryProperties); ASSERT_NE(nullptr, sharedMemory); auto svmEntry = svmAllocationsManager->getSVMAlloc(sharedMemory); svmEntry->gpuAllocations.getGraphicsAllocation(mockDevice.getRootDeviceIndex())->updateTaskCount(6u, 0u); svmEntry->cpuAllocation->updateTaskCount(6u, 0u); clMemBlockingFreeINTEL(&mockContext, sharedMemory); EXPECT_EQ(2u, mockCsr->waitForCompletionWithTimeoutCalled); EXPECT_EQ(TimeoutControls::maxTimeout, mockCsr->waitForCompletionWithTimeoutParamsPassed[0].timeoutMs); EXPECT_EQ(TimeoutControls::maxTimeout, mockCsr->waitForCompletionWithTimeoutParamsPassed[1].timeoutMs); EXPECT_EQ(6u, mockCsr->waitForCompletionWithTimeoutParamsPassed[0].taskCountToWait); EXPECT_EQ(6u, mockCsr->waitForCompletionWithTimeoutParamsPassed[1].taskCountToWait); } HWTEST_F(UsmDestructionTests, givenUsmAllocationWhenBlockingFreeIsCalledThenWaitForCompletionIsCalled) { MockDevice mockDevice; mockDevice.incRefInternal(); MockClDevice mockClDevice(&mockDevice); MockContext mockContext(&mockClDevice, false); if (mockContext.getDevice(0u)->getHardwareInfo().capabilityTable.supportsOcl21Features == false) { GTEST_SKIP(); } auto mockCsr = new MyCsr(*mockDevice.executionEnvironment, 1); mockDevice.resetCommandStreamReceiver(mockCsr); *mockCsr->getTagAddress() = 5u; SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY, mockContext.getRootDeviceIndices(), mockContext.getDeviceBitfields()); auto svmAllocationsManager = mockContext.getSVMAllocsManager(); auto hostMemory = svmAllocationsManager->createUnifiedMemoryAllocation(4096u, unifiedMemoryProperties); ASSERT_NE(nullptr, hostMemory); auto svmEntry = svmAllocationsManager->getSVMAlloc(hostMemory); svmEntry->gpuAllocations.getGraphicsAllocation(mockDevice.getRootDeviceIndex())->updateTaskCount(6u, 0u); clMemBlockingFreeINTEL(&mockContext, hostMemory); EXPECT_EQ(1u, mockCsr->waitForCompletionWithTimeoutCalled); EXPECT_EQ(TimeoutControls::maxTimeout, mockCsr->waitForCompletionWithTimeoutParamsPassed[0].timeoutMs); EXPECT_EQ(6u, mockCsr->waitForCompletionWithTimeoutParamsPassed[0].taskCountToWait); } compute-runtime-22.14.22890/opencl/test/unit_test/mem_obj/mem_obj_helper_tests.cpp000066400000000000000000000565541422164147700301530ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "opencl/source/mem_obj/mem_obj_helper.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" using namespace NEO; TEST(MemObjHelper, givenValidMemFlagsForSubBufferWhenFlagsAreCheckedThenTrueIsReturned) { cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS; EXPECT_TRUE(MemObjHelper::checkMemFlagsForSubBuffer(flags)); } TEST(MemObjHelper, givenInvalidMemFlagsForSubBufferWhenFlagsAreCheckedThenTrueIsReturned) { cl_mem_flags flags = CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR; EXPECT_FALSE(MemObjHelper::checkMemFlagsForSubBuffer(flags)); } TEST(MemObjHelper, givenClMemForceLinearStorageFlagWhenCheckForLinearStorageForceThenReturnProperValue) { UltDeviceFactory deviceFactory{1, 0}; auto pDevice = deviceFactory.rootDevices[0]; MemoryProperties memoryProperties; cl_mem_flags flags = 0; cl_mem_flags_intel flagsIntel = 0; flags |= CL_MEM_FORCE_LINEAR_STORAGE_INTEL; flagsIntel = 0; memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, pDevice); EXPECT_TRUE(memoryProperties.flags.forceLinearStorage); flags = 0; flagsIntel |= CL_MEM_FORCE_LINEAR_STORAGE_INTEL; memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, pDevice); EXPECT_TRUE(memoryProperties.flags.forceLinearStorage); flags |= CL_MEM_FORCE_LINEAR_STORAGE_INTEL; flagsIntel |= CL_MEM_FORCE_LINEAR_STORAGE_INTEL; memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, pDevice); EXPECT_TRUE(memoryProperties.flags.forceLinearStorage); flags = 0; flagsIntel = 0; memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, pDevice); EXPECT_FALSE(memoryProperties.flags.forceLinearStorage); } TEST(MemObjHelper, givenValidPropertiesWhenValidatingMemoryPropertiesThenTrueIsReturned) { cl_mem_flags flags = 0; cl_mem_flags_intel flagsIntel = 0; UltClDeviceFactory deviceFactory{1, 0}; auto pDevice = &deviceFactory.rootDevices[0]->getDevice(); MemoryProperties memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, pDevice); MockContext context{deviceFactory.rootDevices[0]}; EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForBuffer(memoryProperties, flags, flagsIntel, context)); EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForImage(memoryProperties, flags, flagsIntel, nullptr, context)); flags = CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL | CL_MEM_NO_ACCESS_INTEL; memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, pDevice); flags = CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL | CL_MEM_NO_ACCESS_INTEL; flagsIntel = 0; EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForImage(memoryProperties, flags, flagsIntel, nullptr, context)); flags = CL_MEM_NO_ACCESS_INTEL; memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, pDevice); flags = CL_MEM_NO_ACCESS_INTEL; flagsIntel = 0; EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForImage(memoryProperties, flags, flagsIntel, nullptr, context)); flags = CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_HOST_NO_ACCESS; memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, pDevice); flags = CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_HOST_NO_ACCESS; flagsIntel = 0; EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForBuffer(memoryProperties, flags, flagsIntel, context)); EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForImage(memoryProperties, flags, flagsIntel, nullptr, context)); flags = CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR | CL_MEM_HOST_WRITE_ONLY; memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, pDevice); flags = CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR | CL_MEM_HOST_WRITE_ONLY; flagsIntel = 0; EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForBuffer(memoryProperties, flags, flagsIntel, context)); EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForImage(memoryProperties, flags, flagsIntel, nullptr, context)); flags = CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_NO_ACCESS; memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, pDevice); flags = CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_NO_ACCESS; flagsIntel = 0; EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForBuffer(memoryProperties, flags, flagsIntel, context)); EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForImage(memoryProperties, flags, flagsIntel, nullptr, context)); flagsIntel = CL_MEM_LOCALLY_UNCACHED_RESOURCE; memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, pDevice); flags = 0; flagsIntel = CL_MEM_LOCALLY_UNCACHED_RESOURCE; EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForBuffer(memoryProperties, flags, flagsIntel, context)); EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForImage(memoryProperties, flags, flagsIntel, nullptr, context)); flagsIntel = CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE; memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, pDevice); flags = 0; flagsIntel = CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE; EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForBuffer(memoryProperties, flags, flagsIntel, context)); EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForImage(memoryProperties, flags, flagsIntel, nullptr, context)); flags = 0; memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, pDevice); flags = 0; flagsIntel = 0; EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForBuffer(memoryProperties, flags, flagsIntel, context)); EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForImage(memoryProperties, flags, flagsIntel, nullptr, context)); } struct Image1dWithAccessFlagsUnrestricted : public Image1dDefaults { enum { flags = CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL }; }; TEST(MemObjHelper, givenParentMemObjAndHostPtrFlagsWhenValidatingMemoryPropertiesForImageThenFalseIsReturned) { cl_mem_flags flags = 0; cl_mem_flags_intel flagsIntel = 0; UltClDeviceFactory deviceFactory{1, 0}; auto pDevice = &deviceFactory.rootDevices[0]->getDevice(); MemoryProperties memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, pDevice); MockContext context{deviceFactory.rootDevices[0]}; auto image = clUniquePtr(Image1dHelper<>::create(&context)); auto imageWithAccessFlagsUnrestricted = clUniquePtr(ImageHelper::create(&context)); cl_mem_flags hostPtrFlags[] = {CL_MEM_USE_HOST_PTR, CL_MEM_ALLOC_HOST_PTR, CL_MEM_COPY_HOST_PTR}; for (auto hostPtrFlag : hostPtrFlags) { flags = hostPtrFlag; memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, pDevice); flags = hostPtrFlag; EXPECT_FALSE(MemObjHelper::validateMemoryPropertiesForImage(memoryProperties, flags, 0, image.get(), context)); EXPECT_FALSE(MemObjHelper::validateMemoryPropertiesForImage(memoryProperties, flags, 0, imageWithAccessFlagsUnrestricted.get(), context)); flags |= CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL; memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, pDevice); flags |= CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL; EXPECT_FALSE(MemObjHelper::validateMemoryPropertiesForImage(memoryProperties, flags, 0, image.get(), context)); EXPECT_FALSE(MemObjHelper::validateMemoryPropertiesForImage(memoryProperties, flags, 0, imageWithAccessFlagsUnrestricted.get(), context)); } } TEST(MemObjHelper, givenContextWithMultipleRootDevicesWhenIsSuitableForCompressionIsCalledThenFalseIsReturned) { MockDefaultContext context; MemoryProperties memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(0, 0, 0, &context.pRootDevice0->getDevice()); EXPECT_FALSE(MemObjHelper::isSuitableForCompression(true, memoryProperties, context, true)); } TEST(MemObjHelper, givenCompressionEnabledButNotPreferredWhenCompressionHintIsPassedThenCompressionIsUsed) { cl_mem_flags_intel flagsIntel = CL_MEM_COMPRESSED_HINT_INTEL; cl_mem_flags flags = CL_MEM_COMPRESSED_HINT_INTEL; MockContext context; MemoryProperties memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, &context.getDevice(0)->getDevice()); context.contextType = ContextType::CONTEXT_TYPE_DEFAULT; EXPECT_TRUE(MemObjHelper::isSuitableForCompression(true, memoryProperties, context, false)); flags = CL_MEM_COMPRESSED_HINT_INTEL; flagsIntel = 0; memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, &context.getDevice(0)->getDevice()); EXPECT_TRUE(MemObjHelper::isSuitableForCompression(true, memoryProperties, context, false)); flagsIntel = CL_MEM_COMPRESSED_HINT_INTEL; flags = 0; memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, &context.getDevice(0)->getDevice()); EXPECT_TRUE(MemObjHelper::isSuitableForCompression(true, memoryProperties, context, false)); } TEST(MemObjHelper, givenCompressionEnabledAndPreferredWhenCompressionHintIsPassedThenCompressionIsUsed) { cl_mem_flags_intel flagsIntel = CL_MEM_COMPRESSED_HINT_INTEL; cl_mem_flags flags = CL_MEM_COMPRESSED_HINT_INTEL; MockContext context; MemoryProperties memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, &context.getDevice(0)->getDevice()); context.contextType = ContextType::CONTEXT_TYPE_DEFAULT; EXPECT_TRUE(MemObjHelper::isSuitableForCompression(true, memoryProperties, context, true)); flags = CL_MEM_COMPRESSED_HINT_INTEL; flagsIntel = 0; memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, &context.getDevice(0)->getDevice()); EXPECT_TRUE(MemObjHelper::isSuitableForCompression(true, memoryProperties, context, true)); flagsIntel = CL_MEM_COMPRESSED_HINT_INTEL; flags = 0; memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, &context.getDevice(0)->getDevice()); EXPECT_TRUE(MemObjHelper::isSuitableForCompression(true, memoryProperties, context, true)); } TEST(MemObjHelper, givenCompressionWhenCL_MEM_COMPRESSEDIsNotSetThenFalseReturned) { cl_mem_flags_intel flagsIntel = 0; cl_mem_flags flags = 0; MockContext context; MemoryProperties memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, &context.getDevice(0)->getDevice()); context.contextType = ContextType::CONTEXT_TYPE_DEFAULT; EXPECT_FALSE(MemObjHelper::isSuitableForCompression(true, memoryProperties, context, false)); } TEST(MemObjHelper, givenCompressionWhenCL_MEM_COMPRESSEDThenTrueIsReturned) { cl_mem_flags_intel flagsIntel = CL_MEM_COMPRESSED_HINT_INTEL; cl_mem_flags flags = CL_MEM_COMPRESSED_HINT_INTEL; MockContext context; MemoryProperties memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, &context.getDevice(0)->getDevice()); context.contextType = ContextType::CONTEXT_TYPE_DEFAULT; EXPECT_TRUE(MemObjHelper::isSuitableForCompression(true, memoryProperties, context, true)); } TEST(MemObjHelperMultiTile, givenValidExtraPropertiesWhenValidatingExtraPropertiesThenTrueIsReturned) { UltClDeviceFactory deviceFactory{1, 4}; cl_device_id devices[] = {deviceFactory.rootDevices[0], deviceFactory.subDevices[0], deviceFactory.subDevices[1], deviceFactory.subDevices[2], deviceFactory.subDevices[3]}; MockContext context(ClDeviceVector{devices, 5}); auto pDevice = &deviceFactory.rootDevices[0]->getDevice(); cl_mem_flags flags = CL_MEM_COMPRESSED_HINT_INTEL; cl_mem_flags_intel flagsIntel = 0; auto memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, pDevice); EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForBuffer(memoryProperties, flags, flagsIntel, context)); flags = CL_MEM_UNCOMPRESSED_HINT_INTEL; flagsIntel = 0; memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, pDevice); EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForBuffer(memoryProperties, flags, flagsIntel, context)); flags = 0; flagsIntel = CL_MEM_COMPRESSED_HINT_INTEL; memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, pDevice); EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForBuffer(memoryProperties, flags, flagsIntel, context)); flags = 0; flagsIntel = CL_MEM_UNCOMPRESSED_HINT_INTEL; memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, pDevice); EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForBuffer(memoryProperties, flags, flagsIntel, context)); } TEST(MemObjHelper, givenInvalidFlagsWhenValidatingExtraPropertiesThenFalseIsReturned) { MemoryProperties memoryProperties; cl_mem_flags flags = CL_MEM_COMPRESSED_HINT_INTEL | CL_MEM_UNCOMPRESSED_HINT_INTEL; cl_mem_flags_intel flagsIntel = 0; MockContext context; memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, &context.getDevice(0)->getDevice()); EXPECT_FALSE(MemObjHelper::validateMemoryPropertiesForBuffer(memoryProperties, flags, flagsIntel, context)); flags = 0; flagsIntel = CL_MEM_COMPRESSED_HINT_INTEL | CL_MEM_UNCOMPRESSED_HINT_INTEL; memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, &context.getDevice(0)->getDevice()); EXPECT_FALSE(MemObjHelper::validateMemoryPropertiesForBuffer(memoryProperties, flags, flagsIntel, context)); flags = CL_MEM_COMPRESSED_HINT_INTEL; flagsIntel = CL_MEM_UNCOMPRESSED_HINT_INTEL; memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, &context.getDevice(0)->getDevice()); EXPECT_FALSE(MemObjHelper::validateMemoryPropertiesForBuffer(memoryProperties, flags, flagsIntel, context)); flags = CL_MEM_UNCOMPRESSED_HINT_INTEL; flagsIntel = CL_MEM_COMPRESSED_HINT_INTEL; memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, &context.getDevice(0)->getDevice()); EXPECT_FALSE(MemObjHelper::validateMemoryPropertiesForBuffer(memoryProperties, flags, flagsIntel, context)); } TEST(MemObjHelper, givenMultipleSubDevicesWhenDefaultContextIsUsedThenResourcesAreNotSuitableForCompression) { DebugManagerStateRestore debugRestore; DebugManager.flags.CreateMultipleSubDevices.set(4u); initPlatform(); MockContext context(platform()->getClDevice(0)); MemoryProperties memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_READ_ONLY, 0u, 0, &context.getDevice(0)->getDevice()); EXPECT_FALSE(MemObjHelper::isSuitableForCompression(true, memoryProperties, context, true)); memoryProperties.flags.hostNoAccess = true; EXPECT_FALSE(MemObjHelper::isSuitableForCompression(true, memoryProperties, context, true)); } TEST(MemObjHelper, givenCompressionEnabledAndPreferredWhenContextRequiresResolveThenResourceNotSuitableForCompression) { MemoryProperties memoryProperties; MockContext context; context.contextType = ContextType::CONTEXT_TYPE_SPECIALIZED; context.resolvesRequiredInKernels = true; EXPECT_FALSE(MemObjHelper::isSuitableForCompression(true, memoryProperties, context, true)); } TEST(MemObjHelper, givenCompressionEnabledAndPreferredWhenContextNotRequiresResolveThenResourceSuitableForCompression) { MemoryProperties memoryProperties; MockContext context; context.contextType = ContextType::CONTEXT_TYPE_SPECIALIZED; context.resolvesRequiredInKernels = false; EXPECT_TRUE(MemObjHelper::isSuitableForCompression(true, memoryProperties, context, true)); } TEST(MemObjHelper, givenCompressionEnabledAndPreferredWhenContextNotRequiresResolveAndForceHintDisableCompressionThenResourceNotSuitableForCompression) { DebugManagerStateRestore restore; DebugManager.flags.ToggleHintKernelDisableCompression.set(0); MemoryProperties memoryProperties; MockContext context; context.contextType = ContextType::CONTEXT_TYPE_SPECIALIZED; context.resolvesRequiredInKernels = false; EXPECT_FALSE(MemObjHelper::isSuitableForCompression(true, memoryProperties, context, true)); } TEST(MemObjHelper, givenCompressionEnabledAndPreferredWhenContextRequiresResolveAndForceHintEnableCompressionThenResourceSuitableForCompression) { DebugManagerStateRestore restore; DebugManager.flags.ToggleHintKernelDisableCompression.set(1); MemoryProperties memoryProperties; MockContext context; context.contextType = ContextType::CONTEXT_TYPE_SPECIALIZED; context.resolvesRequiredInKernels = true; EXPECT_TRUE(MemObjHelper::isSuitableForCompression(true, memoryProperties, context, true)); } TEST(MemObjHelper, givenDifferentCapabilityAndDebugFlagValuesWhenCheckingBufferCompressionSupportThenCorrectValueIsReturned) { DebugManagerStateRestore debugRestore; VariableBackup renderCompressedBuffersCapability{&defaultHwInfo->capabilityTable.ftrRenderCompressedBuffers}; int32_t enableMultiTileCompressionValues[] = {-1, 0, 1}; auto &clHwHelper = ClHwHelper::get(defaultHwInfo->platform.eRenderCoreFamily); for (auto ftrRenderCompressedBuffers : ::testing::Bool()) { renderCompressedBuffersCapability = ftrRenderCompressedBuffers; for (auto enableMultiTileCompressionValue : enableMultiTileCompressionValues) { DebugManager.flags.EnableMultiTileCompression.set(enableMultiTileCompressionValue); MockSpecializedContext context; auto &device = context.getDevice(0)->getDevice(); MemoryProperties memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(0, 0, 0, &device); bool compressionEnabled = MemObjHelper::isSuitableForCompression(HwHelper::compressedBuffersSupported(*defaultHwInfo), memoryProperties, context, true); MockPublicAccessBuffer::getGraphicsAllocationTypeAndCompressionPreference( memoryProperties, context, compressionEnabled, false); bool expectBufferCompressed = ftrRenderCompressedBuffers && (enableMultiTileCompressionValue == 1); if (expectBufferCompressed && clHwHelper.allowCompressionForContext(*context.getDevice(0), context)) { EXPECT_TRUE(compressionEnabled); } else { EXPECT_FALSE(compressionEnabled); } } } } TEST(MemObjHelper, givenDifferentValuesWhenCheckingBufferCompressionSupportThenCorrectValueIsReturned) { DebugManagerStateRestore debugRestore; VariableBackup renderCompressedBuffersCapability{&defaultHwInfo->capabilityTable.ftrRenderCompressedBuffers, true}; VariableBackup hardwareStepping{&defaultHwInfo->platform.usRevId}; DebugManager.flags.EnableMultiTileCompression.set(1); uint32_t numsSubDevices[] = {0, 2}; cl_mem_flags flagsValues[] = {0, CL_MEM_READ_ONLY | CL_MEM_HOST_NO_ACCESS, CL_MEM_COMPRESSED_HINT_INTEL, CL_MEM_UNCOMPRESSED_HINT_INTEL}; cl_mem_flags_intel flagsIntelValues[] = {0, CL_MEM_COMPRESSED_HINT_INTEL, CL_MEM_UNCOMPRESSED_HINT_INTEL}; uint32_t contextTypes[] = {ContextType::CONTEXT_TYPE_DEFAULT, ContextType::CONTEXT_TYPE_SPECIALIZED, ContextType::CONTEXT_TYPE_UNRESTRICTIVE}; __REVID steppingValues[] = {REVISION_A0, REVISION_B}; const auto &hwInfoConfig = *HwInfoConfig::get(defaultHwInfo->platform.eProductFamily); auto &clHwHelper = ClHwHelper::get(defaultHwInfo->platform.eRenderCoreFamily); for (auto stepping : steppingValues) { hardwareStepping = hwInfoConfig.getHwRevIdFromStepping(stepping, *defaultHwInfo); if (hardwareStepping == CommonConstants::invalidStepping) { continue; } for (auto numSubDevices : numsSubDevices) { UltClDeviceFactory clDeviceFactory{1, numSubDevices}; for (auto contextType : contextTypes) { if ((numSubDevices == 0) && (contextType != ContextType::CONTEXT_TYPE_DEFAULT)) { continue; } ClDeviceVector contextDevices; if (contextType != ContextType::CONTEXT_TYPE_SPECIALIZED) { contextDevices.push_back(clDeviceFactory.rootDevices[0]); } if (contextType != ContextType::CONTEXT_TYPE_DEFAULT) { contextDevices.push_back(clDeviceFactory.subDevices[0]); contextDevices.push_back(clDeviceFactory.subDevices[1]); } MockContext context{contextDevices}; for (auto flags : flagsValues) { for (auto flagsIntel : flagsIntelValues) { auto &device = context.getDevice(0)->getDevice(); MemoryProperties memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, &device); bool compressionEnabled = MemObjHelper::isSuitableForCompression(HwHelper::compressedBuffersSupported(*defaultHwInfo), memoryProperties, context, true); MockPublicAccessBuffer::getGraphicsAllocationTypeAndCompressionPreference( memoryProperties, context, compressionEnabled, false); bool isCompressionDisabled = isValueSet(flags, CL_MEM_UNCOMPRESSED_HINT_INTEL) || isValueSet(flagsIntel, CL_MEM_UNCOMPRESSED_HINT_INTEL); bool expectBufferCompressed = !isCompressionDisabled; bool isMultiTile = (numSubDevices > 1); if (expectBufferCompressed && isMultiTile) { bool isBufferReadOnly = isValueSet(flags, CL_MEM_READ_ONLY | CL_MEM_HOST_NO_ACCESS); expectBufferCompressed = clHwHelper.allowCompressionForContext(*context.getDevice(0), context) && ((contextType == ContextType::CONTEXT_TYPE_SPECIALIZED) || isBufferReadOnly); } if (expectBufferCompressed) { EXPECT_TRUE(compressionEnabled); } else { EXPECT_FALSE(compressionEnabled); } } } } } } }compute-runtime-22.14.22890/opencl/test/unit_test/mem_obj/mem_obj_tests.cpp000066400000000000000000000747741422164147700266200ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/device/device.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/memory_manager/allocations_list.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/mocks/mock_deferred_deleter.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "opencl/source/helpers/properties_helper.h" #include "opencl/source/mem_obj/mem_obj.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "gtest/gtest.h" using namespace NEO; struct MySharingHandler : public SharingHandler { MySharingHandler(MemObj *memObj) : memObj(memObj) { auto alloc = getAllocation(); if (alloc) { alloc->incReuseCount(); } } MySharingHandler(GraphicsAllocation *allocation) : allocation(allocation) { auto alloc = getAllocation(); if (alloc) { alloc->incReuseCount(); } } void releaseReusedGraphicsAllocation() override { auto alloc = getAllocation(); if (alloc) { alloc->decReuseCount(); } } GraphicsAllocation *getAllocation() { if (memObj) { return memObj->getMultiGraphicsAllocation().getDefaultGraphicsAllocation(); } return allocation; } MemObj *memObj = nullptr; GraphicsAllocation *allocation = nullptr; }; TEST(MemObj, GivenMemObjWhenInititalizedFromHostPtrThenInitializeFields) { const size_t size = 64; char buffer[size]; MockContext context; MockGraphicsAllocation *mockAllocation = new MockGraphicsAllocation(buffer, sizeof(buffer)); auto memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_USE_HOST_PTR, 0, 0, &context.getDevice(0)->getDevice()); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_USE_HOST_PTR, 0, sizeof(buffer), buffer, buffer, GraphicsAllocationHelper::toMultiGraphicsAllocation(mockAllocation), true, false, false); EXPECT_EQ(&buffer, memObj.getCpuAddress()); EXPECT_EQ(&buffer, memObj.getHostPtr()); EXPECT_EQ(size, memObj.getSize()); EXPECT_EQ(static_cast(CL_MEM_USE_HOST_PTR), memObj.getFlags()); } TEST(MemObj, givenMemObjectWhenAskedForTransferToHostPtrThenDoNothing) { const size_t size = 64; uint8_t hostPtr[size] = {}; uint8_t expectedHostPtr[size] = {}; MockContext context; MockGraphicsAllocation *mockAllocation = new MockGraphicsAllocation(hostPtr, sizeof(hostPtr)); auto memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_USE_HOST_PTR, 0, 0, &context.getDevice(0)->getDevice()); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_USE_HOST_PTR, 0, size, hostPtr, hostPtr, GraphicsAllocationHelper::toMultiGraphicsAllocation(mockAllocation), true, false, false); memset(memObj.getCpuAddress(), 123, size); memset(hostPtr, 0, size); MemObjOffsetArray copyOffset = {{0, 0, 0}}; MemObjSizeArray copySize = {{size, 0, 0}}; EXPECT_THROW(memObj.transferDataToHostPtr(copySize, copyOffset), std::exception); EXPECT_TRUE(memcmp(hostPtr, expectedHostPtr, size) == 0); } TEST(MemObj, givenMemObjectWhenAskedForTransferFromHostPtrThenDoNothing) { const size_t size = 64; uint8_t hostPtr[size] = {}; uint8_t expectedBufferPtr[size] = {}; MockContext context; MockGraphicsAllocation *mockAllocation = new MockGraphicsAllocation(hostPtr, sizeof(hostPtr)); auto memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_USE_HOST_PTR, 0, 0, &context.getDevice(0)->getDevice()); MemObj memObj(&context, CL_MEM_OBJECT_PIPE, memoryProperties, CL_MEM_USE_HOST_PTR, 0, size, hostPtr, hostPtr, GraphicsAllocationHelper::toMultiGraphicsAllocation(mockAllocation), true, false, false); memset(memObj.getCpuAddress(), 123, size); memset(expectedBufferPtr, 123, size); MemObjOffsetArray copyOffset = {{0, 0, 0}}; MemObjSizeArray copySize = {{size, 0, 0}}; EXPECT_THROW(memObj.transferDataFromHostPtr(copySize, copyOffset), std::exception); EXPECT_TRUE(memcmp(memObj.getCpuAddress(), expectedBufferPtr, size) == 0); } TEST(MemObj, givenHostPtrAndUseHostPtrFlagWhenAskingForBaseMapPtrThenReturnHostPtr) { uint8_t hostPtr = 0; MockContext context; auto memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_USE_HOST_PTR, 0, 0, &context.getDevice(0)->getDevice()); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_USE_HOST_PTR, 0, 1, nullptr, &hostPtr, 0, true, false, false); EXPECT_EQ(&hostPtr, memObj.getBasePtrForMap(context.getDevice(0)->getRootDeviceIndex())); } TEST(MemObj, givenHostPtrWithoutUseHostPtrFlagWhenAskingForBaseMapPtrThenReturnAllocatedPtr) { uint8_t hostPtr = 0; MockContext context; auto memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_COPY_HOST_PTR, 0, 0, &context.getDevice(0)->getDevice()); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_COPY_HOST_PTR, 0, 1, nullptr, &hostPtr, 0, true, false, false); EXPECT_NE(&hostPtr, memObj.getBasePtrForMap(context.getDevice(0)->getRootDeviceIndex())); EXPECT_EQ(memObj.getAllocatedMapPtr(), memObj.getBasePtrForMap(context.getDevice(0)->getRootDeviceIndex())); } TEST(MemObj, givenMemObjWhenReleaseAllocatedPtrIsCalledTwiceThenItDoesntCrash) { void *allocatedPtr = alignedMalloc(MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); MockContext context; auto memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_USE_HOST_PTR, 0, 0, &context.getDevice(0)->getDevice()); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_USE_HOST_PTR, 0, 1, nullptr, nullptr, 0, true, false, false); memObj.setAllocatedMapPtr(allocatedPtr); memObj.releaseAllocatedMapPtr(); EXPECT_EQ(nullptr, memObj.getAllocatedMapPtr()); memObj.releaseAllocatedMapPtr(); EXPECT_EQ(nullptr, memObj.getAllocatedMapPtr()); } TEST(MemObj, givenNotReadyGraphicsAllocationWhenMemObjDestroysAllocationAsyncThenAllocationIsAddedToMemoryManagerAllocationList) { MockContext context; auto memoryManager = context.getDevice(0)->getExecutionEnvironment()->memoryManager.get(); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), MemoryConstants::pageSize}); auto defaultEngine = context.getDevice(0)->getDefaultEngine(); allocation->updateTaskCount(2, defaultEngine.osContext->getContextId()); *(defaultEngine.commandStreamReceiver->getTagAddress()) = 1; auto memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_COPY_HOST_PTR, 0, 0, &context.getDevice(0)->getDevice()); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_COPY_HOST_PTR, 0, MemoryConstants::pageSize, nullptr, nullptr, 0, true, false, false); auto &allocationList = defaultEngine.commandStreamReceiver->getTemporaryAllocations(); EXPECT_TRUE(allocationList.peekIsEmpty()); memObj.destroyGraphicsAllocation(allocation, true); EXPECT_FALSE(allocationList.peekIsEmpty()); } TEST(MemObj, givenReadyGraphicsAllocationWhenMemObjDestroysAllocationAsyncThenAllocationIsNotAddedToMemoryManagerAllocationList) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); auto device = std::make_unique(MockDevice::create(executionEnvironment, 0)); MockContext context(device.get()); auto memoryManager = executionEnvironment->memoryManager.get(); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); allocation->updateTaskCount(1, device->getDefaultEngine().osContext->getContextId()); *device->getDefaultEngine().commandStreamReceiver->getTagAddress() = 1; auto memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_USE_HOST_PTR, 0, 0, &context.getDevice(0)->getDevice()); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_COPY_HOST_PTR, 0, MemoryConstants::pageSize, nullptr, nullptr, 0, true, false, false); auto &allocationList = device->getDefaultEngine().commandStreamReceiver->getTemporaryAllocations(); EXPECT_TRUE(allocationList.peekIsEmpty()); memObj.destroyGraphicsAllocation(allocation, true); EXPECT_TRUE(allocationList.peekIsEmpty()); } TEST(MemObj, givenNotUsedGraphicsAllocationWhenMemObjDestroysAllocationAsyncThenAllocationIsNotAddedToMemoryManagerAllocationList) { MockContext context; MockMemoryManager memoryManager(*context.getDevice(0)->getExecutionEnvironment()); context.memoryManager = &memoryManager; auto allocation = memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), MemoryConstants::pageSize}); auto memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_COPY_HOST_PTR, 0, 0, &context.getDevice(0)->getDevice()); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_COPY_HOST_PTR, 0, MemoryConstants::pageSize, nullptr, nullptr, 0, true, false, false); auto &allocationList = context.getDevice(0)->getDefaultEngine().commandStreamReceiver->getTemporaryAllocations(); EXPECT_TRUE(allocationList.peekIsEmpty()); memObj.destroyGraphicsAllocation(allocation, true); EXPECT_TRUE(allocationList.peekIsEmpty()); } TEST(MemObj, givenMemoryManagerWithoutDeviceWhenMemObjDestroysAllocationAsyncThenAllocationIsNotAddedToMemoryManagerAllocationList) { MockContext context; MockMemoryManager memoryManager(*context.getDevice(0)->getExecutionEnvironment()); context.memoryManager = &memoryManager; auto allocation = memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), MemoryConstants::pageSize}); auto memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_COPY_HOST_PTR, 0, 0, &context.getDevice(0)->getDevice()); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_COPY_HOST_PTR, 0, MemoryConstants::pageSize, nullptr, nullptr, 0, true, false, false); auto &allocationList = context.getDevice(0)->getDefaultEngine().commandStreamReceiver->getTemporaryAllocations(); EXPECT_TRUE(allocationList.peekIsEmpty()); memObj.destroyGraphicsAllocation(allocation, true); EXPECT_TRUE(allocationList.peekIsEmpty()); } TEST(MemObj, givenMemObjAndPointerToObjStorageWithProperCommandWhenCheckIfMemTransferRequiredThenReturnFalse) { MockContext context; MockMemoryManager memoryManager(*context.getDevice(0)->getExecutionEnvironment()); context.memoryManager = &memoryManager; auto memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_COPY_HOST_PTR, 0, 0, &context.getDevice(0)->getDevice()); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_COPY_HOST_PTR, 0, MemoryConstants::pageSize, nullptr, nullptr, 0, true, false, false); void *ptr = memObj.getCpuAddressForMemoryTransfer(); bool isMemTransferNeeded = memObj.checkIfMemoryTransferIsRequired(0, 0, ptr, CL_COMMAND_WRITE_BUFFER); EXPECT_FALSE(isMemTransferNeeded); isMemTransferNeeded = memObj.checkIfMemoryTransferIsRequired(0, 0, ptr, CL_COMMAND_READ_BUFFER); EXPECT_FALSE(isMemTransferNeeded); isMemTransferNeeded = memObj.checkIfMemoryTransferIsRequired(0, 0, ptr, CL_COMMAND_WRITE_BUFFER_RECT); EXPECT_FALSE(isMemTransferNeeded); isMemTransferNeeded = memObj.checkIfMemoryTransferIsRequired(0, 0, ptr, CL_COMMAND_READ_BUFFER_RECT); EXPECT_FALSE(isMemTransferNeeded); isMemTransferNeeded = memObj.checkIfMemoryTransferIsRequired(0, 0, ptr, CL_COMMAND_WRITE_IMAGE); EXPECT_FALSE(isMemTransferNeeded); isMemTransferNeeded = memObj.checkIfMemoryTransferIsRequired(0, 0, ptr, CL_COMMAND_READ_IMAGE); EXPECT_FALSE(isMemTransferNeeded); } TEST(MemObj, givenMemObjAndPointerToObjStorageBadCommandWhenCheckIfMemTransferRequiredThenReturnTrue) { MockContext context; MockMemoryManager memoryManager(*context.getDevice(0)->getExecutionEnvironment()); context.memoryManager = &memoryManager; auto memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_COPY_HOST_PTR, 0, 0, &context.getDevice(0)->getDevice()); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_COPY_HOST_PTR, 0, MemoryConstants::pageSize, nullptr, nullptr, 0, true, false, false); void *ptr = memObj.getCpuAddressForMemoryTransfer(); bool isMemTransferNeeded = memObj.checkIfMemoryTransferIsRequired(0, 0, ptr, CL_COMMAND_FILL_BUFFER); EXPECT_TRUE(isMemTransferNeeded); } TEST(MemObj, givenMemObjAndPointerToDiffrentStorageAndProperCommandWhenCheckIfMemTransferRequiredThenReturnTrue) { MockContext context; MockMemoryManager memoryManager(*context.getDevice(0)->getExecutionEnvironment()); context.memoryManager = &memoryManager; auto memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_COPY_HOST_PTR, 0, 0, &context.getDevice(0)->getDevice()); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_COPY_HOST_PTR, 0, MemoryConstants::pageSize, nullptr, nullptr, 0, true, false, false); void *ptr = (void *)0x1234; bool isMemTransferNeeded = memObj.checkIfMemoryTransferIsRequired(0, 0, ptr, CL_COMMAND_WRITE_BUFFER); EXPECT_TRUE(isMemTransferNeeded); } TEST(MemObj, givenSharingHandlerWhenAskedForCpuMappingThenReturnFalse) { MockContext context; MockMemoryManager memoryManager(*context.getDevice(0)->getExecutionEnvironment()); auto allocation = memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), MemoryConstants::pageSize}); auto memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_COPY_HOST_PTR, 0, 0, &context.getDevice(0)->getDevice()); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_COPY_HOST_PTR, 0, MemoryConstants::pageSize, nullptr, nullptr, GraphicsAllocationHelper::toMultiGraphicsAllocation(allocation), true, false, false); memObj.setSharingHandler(new SharingHandler()); EXPECT_FALSE(memObj.mappingOnCpuAllowed()); } TEST(MemObj, givenTiledObjectWhenAskedForCpuMappingThenReturnFalse) { struct MyMemObj : public MemObj { using MemObj::MemObj; bool isTiledAllocation() const override { return true; } }; MockContext context; auto memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_COPY_HOST_PTR, 0, 0, &context.getDevice(0)->getDevice()); MyMemObj memObj(nullptr, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_COPY_HOST_PTR, 0, MemoryConstants::pageSize, nullptr, nullptr, 0, true, false, false); EXPECT_FALSE(memObj.mappingOnCpuAllowed()); } TEST(MemObj, givenCompressedGmmWhenAskingForMappingOnCpuThenDisallow) { MockContext context; MockMemoryManager memoryManager(*context.getDevice(0)->getExecutionEnvironment()); context.memoryManager = &memoryManager; auto allocation = memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), MemoryConstants::pageSize}); allocation->setDefaultGmm(new Gmm(context.getDevice(0)->getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); auto memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_READ_WRITE, 0, 0, &context.getDevice(0)->getDevice()); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_READ_WRITE, 0, 1, allocation->getUnderlyingBuffer(), nullptr, GraphicsAllocationHelper::toMultiGraphicsAllocation(allocation), false, false, false); allocation->getDefaultGmm()->isCompressionEnabled = false; EXPECT_TRUE(memObj.mappingOnCpuAllowed()); allocation->getDefaultGmm()->isCompressionEnabled = true; EXPECT_FALSE(memObj.mappingOnCpuAllowed()); } TEST(MemObj, givenDefaultWhenAskedForCpuMappingThenReturnTrue) { MockContext context; MockMemoryManager memoryManager(*context.getDevice(0)->getExecutionEnvironment()); context.memoryManager = &memoryManager; auto allocation = memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), MemoryConstants::pageSize}); auto memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_COPY_HOST_PTR, 0, 0, &context.getDevice(0)->getDevice()); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_COPY_HOST_PTR, 0, 64, allocation->getUnderlyingBuffer(), nullptr, GraphicsAllocationHelper::toMultiGraphicsAllocation(allocation), true, false, false); EXPECT_FALSE(memObj.isTiledAllocation()); EXPECT_FALSE(memObj.peekSharingHandler()); EXPECT_TRUE(memObj.mappingOnCpuAllowed()); } TEST(MemObj, givenNonCpuAccessibleMemoryWhenAskingForMappingOnCpuThenDisallow) { MockContext context; MockMemoryManager memoryManager(*context.getDevice(0)->getExecutionEnvironment()); context.memoryManager = &memoryManager; auto allocation = memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), MemoryConstants::pageSize}); allocation->setDefaultGmm(new Gmm(context.getDevice(0)->getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); auto memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_READ_WRITE, 0, 0, &context.getDevice(0)->getDevice()); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_READ_WRITE, 0, 1, allocation->getUnderlyingBuffer(), nullptr, GraphicsAllocationHelper::toMultiGraphicsAllocation(allocation), false, false, false); EXPECT_TRUE(memObj.mappingOnCpuAllowed()); reinterpret_cast(allocation)->overrideMemoryPool(MemoryPool::SystemCpuInaccessible); EXPECT_FALSE(memObj.mappingOnCpuAllowed()); } TEST(MemObj, givenMultipleMemObjectsWithReusedGraphicsAllocationWhenDestroyedThenFreeAllocationOnce) { MockContext context; MockMemoryManager memoryManager(*context.getDevice(0)->getExecutionEnvironment()); context.memoryManager = &memoryManager; auto allocation = memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), MemoryConstants::pageSize}); std::unique_ptr memObj1(new MemObj(&context, CL_MEM_OBJECT_BUFFER, {}, 0, 0, 1, nullptr, nullptr, GraphicsAllocationHelper::toMultiGraphicsAllocation(allocation), true, false, false)); memObj1->setSharingHandler(new MySharingHandler(allocation)); std::unique_ptr memObj2(new MemObj(&context, CL_MEM_OBJECT_BUFFER, {}, 0, 0, 1, nullptr, nullptr, GraphicsAllocationHelper::toMultiGraphicsAllocation(allocation), true, false, false)); memObj2->setSharingHandler(new MySharingHandler(allocation)); std::unique_ptr memObj3(new MemObj(&context, CL_MEM_OBJECT_BUFFER, {}, 0, 0, 1, nullptr, nullptr, GraphicsAllocationHelper::toMultiGraphicsAllocation(allocation), true, false, false)); memObj3->setSharingHandler(new MySharingHandler(allocation)); EXPECT_EQ(3u, allocation->peekReuseCount()); memObj3.reset(nullptr); EXPECT_EQ(2u, allocation->peekReuseCount()); memObj1.reset(nullptr); EXPECT_EQ(1u, allocation->peekReuseCount()); memObj2.reset(nullptr); } TEST(MemObj, givenMemObjectWhenContextIsNotNullThenContextOutlivesMemobjects) { MockContext context; EXPECT_EQ(1, context.getRefInternalCount()); { MemObj memObj(&context, 0, {}, 0, 0, 0, nullptr, nullptr, 0, false, false, false); EXPECT_EQ(2, context.getRefInternalCount()); } EXPECT_EQ(1, context.getRefInternalCount()); } TEST(MemObj, givenSharedMemObjectWithNullGfxAllocationWhenSettingGfxAllocationThenSucceed) { MockContext context; MockMemoryManager memoryManager(*context.getDevice(0)->getExecutionEnvironment()); context.memoryManager = &memoryManager; MockGraphicsAllocation *gfxAllocation = new MockGraphicsAllocation(nullptr, 0); auto memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_USE_HOST_PTR, 0, 0, &context.getDevice(0)->getDevice()); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_USE_HOST_PTR, 0, 1, nullptr, nullptr, 0, true, false, false); memObj.setSharingHandler(new MySharingHandler(&memObj)); memObj.resetGraphicsAllocation(gfxAllocation); gfxAllocation->incReuseCount(); ASSERT_EQ(1u, gfxAllocation->peekReuseCount()); EXPECT_EQ(gfxAllocation, memObj.getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex())); } TEST(MemObj, givenSharedMemObjectAndGfxAllocationWhenGraphicsAllocationIsRemovedThenTheAllocationIsNotAvailable) { MockContext context; MockMemoryManager memoryManager(*context.getDevice(0)->getExecutionEnvironment()); context.memoryManager = &memoryManager; MockGraphicsAllocation *graphicsAllocation = new MockGraphicsAllocation(nullptr, 0); auto memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_USE_HOST_PTR, 0, 0, &context.getDevice(0)->getDevice()); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_USE_HOST_PTR, 0, 1, nullptr, nullptr, GraphicsAllocationHelper::toMultiGraphicsAllocation(graphicsAllocation), true, false, false); memObj.setSharingHandler(new MySharingHandler(&memObj)); graphicsAllocation->decReuseCount(); memObj.removeGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex()); EXPECT_EQ(nullptr, memObj.getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex())); } TEST(MemObj, givenSharedMemObjectAndZeroReuseCountWhenChangingGfxAllocationThenOldAllocationIsDestroyed) { MockContext context; MockMemoryManager memoryManager(*context.getDevice(0)->getExecutionEnvironment()); context.memoryManager = &memoryManager; MockGraphicsAllocation *oldGfxAllocation = new MockGraphicsAllocation(nullptr, 0); MockGraphicsAllocation *newGfxAllocation = new MockGraphicsAllocation(nullptr, 0); auto memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_USE_HOST_PTR, 0, 0, &context.getDevice(0)->getDevice()); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_USE_HOST_PTR, 0, 1, nullptr, nullptr, GraphicsAllocationHelper::toMultiGraphicsAllocation(oldGfxAllocation), true, false, false); memObj.setSharingHandler(new MySharingHandler(&memObj)); oldGfxAllocation->decReuseCount(); memObj.resetGraphicsAllocation(newGfxAllocation); newGfxAllocation->incReuseCount(); ASSERT_EQ(1u, newGfxAllocation->peekReuseCount()); EXPECT_EQ(newGfxAllocation, memObj.getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex())); } TEST(MemObj, givenSharedMemObjectAndNonZeroReuseCountWhenChangingGfxAllocationThenOldAllocationIsNotDestroyed) { MockContext context; MockMemoryManager memoryManager(*context.getDevice(0)->getExecutionEnvironment()); context.memoryManager = &memoryManager; MockGraphicsAllocation *oldGfxAllocation = new MockGraphicsAllocation(nullptr, 0); MockGraphicsAllocation *newGfxAllocation = new MockGraphicsAllocation(nullptr, 0); auto memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_USE_HOST_PTR, 0, 0, &context.getDevice(0)->getDevice()); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_USE_HOST_PTR, 0, 1, nullptr, nullptr, GraphicsAllocationHelper::toMultiGraphicsAllocation(oldGfxAllocation), true, false, false); memObj.setSharingHandler(new MySharingHandler(&memObj)); memObj.resetGraphicsAllocation(newGfxAllocation); newGfxAllocation->incReuseCount(); ASSERT_EQ(1u, newGfxAllocation->peekReuseCount()); EXPECT_EQ(newGfxAllocation, memObj.getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex())); memoryManager.checkGpuUsageAndDestroyGraphicsAllocations(oldGfxAllocation); } TEST(MemObj, givenNotSharedMemObjectWhenChangingGfxAllocationThenOldAllocationIsDestroyed) { MockContext context; MockMemoryManager memoryManager(*context.getDevice(0)->getExecutionEnvironment()); context.memoryManager = &memoryManager; MockGraphicsAllocation *oldGfxAllocation = new MockGraphicsAllocation(nullptr, 0); MockGraphicsAllocation *newGfxAllocation = new MockGraphicsAllocation(nullptr, 0); auto memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_USE_HOST_PTR, 0, 0, &context.getDevice(0)->getDevice()); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_USE_HOST_PTR, 0, 1, nullptr, nullptr, GraphicsAllocationHelper::toMultiGraphicsAllocation(oldGfxAllocation), true, false, false); memObj.resetGraphicsAllocation(newGfxAllocation); EXPECT_EQ(newGfxAllocation, memObj.getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex())); } TEST(MemObj, givenGraphicsAllocationWhenCallingIsAllocDumpableThenItReturnsTheCorrectValue) { MockGraphicsAllocation gfxAllocation(nullptr, 0); EXPECT_FALSE(gfxAllocation.isAllocDumpable()); gfxAllocation.setAllocDumpable(true, false); EXPECT_TRUE(gfxAllocation.isAllocDumpable()); } TEST(MemObj, givenMemObjNotUsingHostPtrWhenGettingBasePtrTwiceThenReturnSameMapPtr) { MockContext context; auto memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_READ_WRITE, 0, 0, &context.getDevice(0)->getDevice()); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_READ_WRITE, 0, 1, nullptr, nullptr, 0, true, false, false); void *mapPtr = memObj.getBasePtrForMap(context.getDevice(0)->getRootDeviceIndex()); EXPECT_NE(nullptr, mapPtr); auto mapAllocation = memObj.getMapAllocation(context.getDevice(0)->getRootDeviceIndex()); ASSERT_NE(nullptr, mapAllocation); EXPECT_EQ(mapPtr, mapAllocation->getUnderlyingBuffer()); EXPECT_EQ(mapPtr, memObj.getAllocatedMapPtr()); } using MemObjMultiRootDeviceTests = MultiRootDeviceFixture; TEST_F(MemObjMultiRootDeviceTests, WhenMemObjMapIsCreatedThenAllocationHasCorrectRootDeviceIndex) { auto allocation = mockMemoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device1->getRootDeviceIndex(), MemoryConstants::pageSize}); auto memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_READ_WRITE, 0, 0, &context->getDevice(0)->getDevice()); std::unique_ptr memObj( new MemObj(context.get(), CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_READ_WRITE, 0, 1, nullptr, nullptr, GraphicsAllocationHelper::toMultiGraphicsAllocation(allocation), true, false, false)); void *mapPtr = memObj->getBasePtrForMap(device1->getRootDeviceIndex()); EXPECT_NE(nullptr, mapPtr); auto mapAllocation = memObj->getMapAllocation(device1->getRootDeviceIndex()); ASSERT_NE(nullptr, mapAllocation); EXPECT_EQ(expectedRootDeviceIndex, mapAllocation->getRootDeviceIndex()); memObj.reset(nullptr); } TEST_F(MemObjMultiRootDeviceTests, WhenMemObjIsCreatedWithMultiGraphicsAllocationThenAllAllocationAreDestroyedProperly) { auto allocation0 = mockMemoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{0, MemoryConstants::pageSize}); auto allocation1 = mockMemoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{1, MemoryConstants::pageSize}); auto multiGraphicsAllocation = MultiGraphicsAllocation(1); multiGraphicsAllocation.addAllocation(allocation0); multiGraphicsAllocation.addAllocation(allocation1); auto memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_READ_WRITE, 0, 0, &context->getDevice(0)->getDevice()); std::unique_ptr memObj( new MemObj(context.get(), CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_READ_WRITE, 0, 1, nullptr, nullptr, multiGraphicsAllocation, true, false, false)); EXPECT_NE(nullptr, memObj->getMultiGraphicsAllocation().getGraphicsAllocation(0)); EXPECT_NE(nullptr, memObj->getMultiGraphicsAllocation().getGraphicsAllocation(1)); EXPECT_NE(memObj->getMultiGraphicsAllocation().getGraphicsAllocation(0), memObj->getMultiGraphicsAllocation().getGraphicsAllocation(1)); memObj.reset(nullptr); } TEST_F(MemObjMultiRootDeviceTests, WhenMemObjMapAreCreatedThenAllAllocationAreDestroyedProperly) { auto allocation0 = mockMemoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{2, MemoryConstants::pageSize}); auto allocation1 = mockMemoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{1, MemoryConstants::pageSize}); auto multiGraphicsAllocation = MultiGraphicsAllocation(2); multiGraphicsAllocation.addAllocation(allocation0); multiGraphicsAllocation.addAllocation(allocation1); auto memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_READ_WRITE, 0, 0, &context->getDevice(1)->getDevice()); std::unique_ptr memObj( new MemObj(context.get(), CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_READ_WRITE, 0, 1, nullptr, nullptr, multiGraphicsAllocation, true, false, false)); auto mapAllocation0 = memObj->getMapAllocation(2); auto mapAllocation1 = memObj->getMapAllocation(1); EXPECT_EQ(nullptr, mapAllocation0); EXPECT_EQ(nullptr, mapAllocation1); EXPECT_NE(nullptr, memObj->getBasePtrForMap(2)); EXPECT_EQ(memObj->getBasePtrForMap(2), memObj->getBasePtrForMap(1)); mapAllocation0 = memObj->getMapAllocation(2); mapAllocation1 = memObj->getMapAllocation(1); ASSERT_NE(nullptr, mapAllocation0); ASSERT_NE(nullptr, mapAllocation1); ASSERT_NE(mapAllocation0, mapAllocation1); memObj.reset(nullptr); } compute-runtime-22.14.22890/opencl/test/unit_test/mem_obj/nv12_image_tests.cpp000066400000000000000000000566061422164147700271320ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/gmm.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/kernel_binary_helper.h" #include "shared/test/common/mocks/mock_gmm_resource_info.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/cl_device/cl_device_get_cap.inl" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "opencl/source/helpers/surface_formats.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" #include "gtest/gtest.h" using namespace NEO; class Nv12ImageTest : public testing::Test { public: void computeExpectedOffsets(Image *image) { SurfaceOffsets expectedSurfaceOffsets = {0}; GMM_REQ_OFFSET_INFO reqOffsetInfo = {}; SurfaceOffsets requestedOffsets = {0}; auto mockResInfo = static_cast(image->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex())->getDefaultGmm()->gmmResourceInfo.get()); mockResInfo->getOffset(reqOffsetInfo); if (image->getImageDesc().mem_object) { expectedSurfaceOffsets.offset = reqOffsetInfo.Render.Offset; expectedSurfaceOffsets.xOffset = reqOffsetInfo.Render.XOffset / (mockResInfo->getBitsPerPixel() / 8); expectedSurfaceOffsets.yOffset = reqOffsetInfo.Render.YOffset; } expectedSurfaceOffsets.yOffsetForUVplane = reqOffsetInfo.Lock.Offset / reqOffsetInfo.Lock.Pitch; image->getSurfaceOffsets(requestedOffsets); EXPECT_EQ(expectedSurfaceOffsets.offset, requestedOffsets.offset); EXPECT_EQ(expectedSurfaceOffsets.xOffset, requestedOffsets.xOffset); EXPECT_EQ(expectedSurfaceOffsets.yOffset, requestedOffsets.yOffset); EXPECT_EQ(expectedSurfaceOffsets.yOffsetForUVplane, requestedOffsets.yOffsetForUVplane); } protected: void SetUp() override { imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_NV12_INTEL; imageDesc.mem_object = NULL; imageDesc.image_array_size = 0; imageDesc.image_depth = 1; imageDesc.image_height = 4 * 4; // Valid values multiple of 4 imageDesc.image_width = 4 * 4; // Valid values multiple of 4 imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; flags = CL_MEM_HOST_NO_ACCESS; } void validateImageWithFlags(cl_mem_flags flags) { auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); retVal = Image::validate(&context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), surfaceFormat, &imageDesc, nullptr); } Image *createImageWithFlags(cl_mem_flags flags) { auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); return Image::create(&context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal); } cl_int retVal = CL_SUCCESS; MockContext context; cl_image_format imageFormat; cl_image_desc imageDesc; cl_mem_flags flags; }; TEST_F(Nv12ImageTest, WhenImageIsCreatedThenIsNv12ImageIsTrue) { std::unique_ptr image{createImageWithFlags(CL_MEM_READ_ONLY | CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL)}; ASSERT_NE(nullptr, image); EXPECT_TRUE(isNV12Image(&image->getImageFormat())); } TEST_F(Nv12ImageTest, GivenValidImageWhenValidatingThenSuccessIsReturned) { validateImageWithFlags(flags); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(Nv12ImageTest, GivenSnormInt16ImageTypeWhenValidatingThenImageFormatNotSupportedErrorIsReturned) { imageFormat.image_channel_data_type = CL_SNORM_INT16; validateImageWithFlags(flags); EXPECT_EQ(CL_IMAGE_FORMAT_NOT_SUPPORTED, retVal); } TEST_F(Nv12ImageTest, Given1dImageTypeWhenValidatingThenImageFormatNotSupportedErrorIsReturned) { imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; validateImageWithFlags(flags); EXPECT_EQ(CL_INVALID_IMAGE_DESCRIPTOR, retVal); } TEST_F(Nv12ImageTest, GivenInvalidImageHeightWhenValidatingThenInvalidImageDescriptorErrorIsReturned) { imageDesc.image_height = 17; validateImageWithFlags(flags); EXPECT_EQ(CL_INVALID_IMAGE_DESCRIPTOR, retVal); } TEST_F(Nv12ImageTest, GivenInvalidImageWidthWhenValidatingThenInvalidImageDescriptorErrorIsReturned) { imageDesc.image_width = 17; validateImageWithFlags(flags); EXPECT_EQ(CL_INVALID_IMAGE_DESCRIPTOR, retVal); } TEST_F(Nv12ImageTest, GivenInvalidImageFlagWhenValidatingThenInvalidValueErrorIsReturned) { flags &= ~(CL_MEM_HOST_NO_ACCESS); validateImageWithFlags(flags); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(Nv12ImageTest, GivenYPlaneWhenValidatingThenSuccessIsReturned) { REQUIRE_IMAGES_OR_SKIP(&context); std::unique_ptr image{createImageWithFlags(CL_MEM_READ_ONLY | CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL)}; ASSERT_NE(nullptr, image); imageDesc.mem_object = image.get(); imageDesc.image_depth = 0; // Plane Y of NV12 image validateImageWithFlags(CL_MEM_READ_WRITE); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(Nv12ImageTest, GivenUVPlaneWhenValidatingThenSuccessIsReturned) { REQUIRE_IMAGES_OR_SKIP(&context); std::unique_ptr image{createImageWithFlags(CL_MEM_READ_ONLY | CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL)}; ASSERT_NE(nullptr, image); imageDesc.mem_object = image.get(); imageDesc.image_depth = 1; // Plane UV of NV12 image validateImageWithFlags(CL_MEM_READ_WRITE); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(Nv12ImageTest, GivenInvalidImageDepthWhenValidatingThenInvalidImageDescriptorErrorIsReturned) { REQUIRE_IMAGES_OR_SKIP(&context); std::unique_ptr image{createImageWithFlags(CL_MEM_READ_ONLY | CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL)}; ASSERT_NE(nullptr, image); imageDesc.mem_object = image.get(); imageDesc.image_depth = 3; // Invalid Plane of NV12 image validateImageWithFlags(CL_MEM_READ_WRITE); EXPECT_EQ(CL_INVALID_IMAGE_DESCRIPTOR, retVal); } TEST_F(Nv12ImageTest, given2DImageWhenPassedToValidateImageTraitsThenValidateReturnsSuccess) { std::unique_ptr image{createImageWithFlags(CL_MEM_READ_ONLY | CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL)}; ASSERT_NE(nullptr, image); imageDesc.mem_object = image.get(); imageDesc.image_depth = 0; retVal = Image::validateImageTraits( &context, ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_READ_WRITE, 0, 0, &context.getDevice(0)->getDevice()), &imageFormat, &imageDesc, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(Nv12ImageTest, given1DImageWhenPassedAsParentImageThenValidateImageTraitsReturnsSuccess) { imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; std::unique_ptr image{createImageWithFlags(CL_MEM_READ_ONLY | CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL)}; ASSERT_NE(nullptr, image); imageDesc.mem_object = image.get(); imageDesc.image_depth = 0; retVal = Image::validateImageTraits( &context, ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_READ_WRITE, 0, 0, &context.getDevice(0)->getDevice()), &imageFormat, &imageDesc, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(Nv12ImageTest, givenBufferWhenPassedAsNV12ParentImageThenValidateImageTraitsReturnsInvalidDesriptor) { MockBuffer Buffer; imageDesc.mem_object = &Buffer; imageDesc.image_depth = 0; // Plane of NV12 image retVal = Image::validateImageTraits( &context, ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_READ_WRITE, 0, 0, &context.getDevice(0)->getDevice()), &imageFormat, &imageDesc, nullptr); EXPECT_EQ(CL_INVALID_IMAGE_DESCRIPTOR, retVal); } TEST_F(Nv12ImageTest, WhenImageIsCreatedThenOffsetsAreZero) { std::unique_ptr image{createImageWithFlags(CL_MEM_READ_ONLY | CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL)}; ASSERT_NE(nullptr, image); auto rowPitch = image->getHostPtrRowPitch(); EXPECT_NE(0u, rowPitch); SurfaceOffsets surfaceOffsets; image->getSurfaceOffsets(surfaceOffsets); EXPECT_EQ(0u, surfaceOffsets.offset); EXPECT_EQ(0u, surfaceOffsets.xOffset); EXPECT_EQ(0u, surfaceOffsets.yOffset); EXPECT_NE(0u, surfaceOffsets.yOffsetForUVplane); } TEST_F(Nv12ImageTest, WhenCreatingYPlaneImageThenDimensionsAreSetCorrectly) { // Create Parent NV12 image std::unique_ptr imageNV12{createImageWithFlags(CL_MEM_READ_ONLY | CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL)}; ASSERT_NE(nullptr, imageNV12); imageDesc.mem_object = imageNV12.get(); imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; imageDesc.image_width = 0; imageDesc.image_height = 0; imageDesc.image_depth = 0; // Create NV12 Y Plane image std::unique_ptr imageYPlane{createImageWithFlags(CL_MEM_READ_WRITE)}; ASSERT_NE(nullptr, imageYPlane); EXPECT_EQ(true, imageYPlane->isImageFromImage()); EXPECT_EQ(imageNV12->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex()), imageYPlane->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex())); EXPECT_EQ(GMM_PLANE_Y, imageYPlane->getPlane()); cl_image_desc parentDimensions, planeDimensions; parentDimensions = imageNV12->getImageDesc(); planeDimensions = imageYPlane->getImageDesc(); EXPECT_EQ(parentDimensions.image_height, planeDimensions.image_height); EXPECT_EQ(parentDimensions.image_width, planeDimensions.image_width); EXPECT_EQ(0u, planeDimensions.image_depth); EXPECT_NE(0u, planeDimensions.image_row_pitch); EXPECT_EQ(parentDimensions.image_slice_pitch, planeDimensions.image_slice_pitch); EXPECT_EQ(parentDimensions.image_type, planeDimensions.image_type); EXPECT_EQ(parentDimensions.image_array_size, planeDimensions.image_array_size); computeExpectedOffsets(imageYPlane.get()); computeExpectedOffsets(imageNV12.get()); } TEST_F(Nv12ImageTest, WhenCreatingUVPlaneImageThenDimensionsAreSetCorrectly) { // Create Parent NV12 image std::unique_ptr imageNV12{createImageWithFlags(CL_MEM_READ_ONLY | CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL)}; ASSERT_NE(nullptr, imageNV12); imageDesc.mem_object = imageNV12.get(); imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; imageDesc.image_width = 0; imageDesc.image_height = 0; imageDesc.image_depth = 1; // UV plane // Create NV12 UV Plane image std::unique_ptr imageUVPlane{createImageWithFlags(CL_MEM_READ_WRITE)}; ASSERT_NE(nullptr, imageUVPlane); EXPECT_EQ(true, imageUVPlane->isImageFromImage()); EXPECT_EQ(imageNV12->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex()), imageUVPlane->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex())); EXPECT_EQ(GMM_PLANE_U, imageUVPlane->getPlane()); cl_image_desc parentDimensions, planeDimensions; parentDimensions = imageNV12->getImageDesc(); planeDimensions = imageUVPlane->getImageDesc(); EXPECT_EQ(parentDimensions.image_height / 2, planeDimensions.image_height); EXPECT_EQ(parentDimensions.image_width / 2, planeDimensions.image_width); EXPECT_EQ(0u, planeDimensions.image_depth); EXPECT_EQ(parentDimensions.image_row_pitch, planeDimensions.image_row_pitch); EXPECT_NE(0u, planeDimensions.image_row_pitch); EXPECT_EQ(parentDimensions.image_slice_pitch, planeDimensions.image_slice_pitch); EXPECT_EQ(parentDimensions.image_type, planeDimensions.image_type); EXPECT_EQ(parentDimensions.image_array_size, planeDimensions.image_array_size); computeExpectedOffsets(imageUVPlane.get()); computeExpectedOffsets(imageNV12.get()); } TEST_F(Nv12ImageTest, GivenOffsetOfUVPlaneWhenCreatingUVPlaneImageThenDimensionsAreSetCorrectly) { // This size returns offset of UV plane, and 0 yOffset imageDesc.image_height = 64; // Valid values multiple of 4 imageDesc.image_width = 64; // Valid values multiple of 4 // Create Parent NV12 image std::unique_ptr imageNV12{createImageWithFlags(CL_MEM_READ_ONLY | CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL)}; ASSERT_NE(nullptr, imageNV12); imageDesc.mem_object = imageNV12.get(); imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; imageDesc.image_width = 0; imageDesc.image_height = 0; imageDesc.image_depth = 1; // UV plane // Create NV12 UV Plane image std::unique_ptr imageUVPlane{createImageWithFlags(CL_MEM_READ_WRITE)}; ASSERT_NE(nullptr, imageUVPlane); EXPECT_EQ(true, imageUVPlane->isImageFromImage()); EXPECT_EQ(imageNV12->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex()), imageUVPlane->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex())); cl_image_desc parentDimensions, planeDimensions; parentDimensions = imageNV12->getImageDesc(); planeDimensions = imageUVPlane->getImageDesc(); EXPECT_EQ(parentDimensions.image_height / 2, planeDimensions.image_height); EXPECT_EQ(parentDimensions.image_width / 2, planeDimensions.image_width); EXPECT_EQ(0u, planeDimensions.image_depth); EXPECT_EQ(parentDimensions.image_row_pitch, planeDimensions.image_row_pitch); EXPECT_NE(0u, planeDimensions.image_row_pitch); EXPECT_EQ(parentDimensions.image_slice_pitch, planeDimensions.image_slice_pitch); EXPECT_EQ(parentDimensions.image_type, planeDimensions.image_type); EXPECT_EQ(parentDimensions.image_array_size, planeDimensions.image_array_size); computeExpectedOffsets(imageUVPlane.get()); computeExpectedOffsets(imageNV12.get()); } HWTEST_F(Nv12ImageTest, WhenCreatingParentImageThenPlanesAreWritten) { KernelBinaryHelper kbHelper(KernelBinaryHelper::BUILT_INS_WITH_IMAGES); auto device = std::make_unique(MockClDevice::createWithNewExecutionEnvironment(nullptr)); char hostPtr[16 * 16 * 16]; auto contextWithMockCmdQ = new MockContext(device.get(), true); auto cmdQ = new MockCommandQueueHw(contextWithMockCmdQ, device.get(), 0); contextWithMockCmdQ->overrideSpecialQueueAndDecrementRefCount(cmdQ, device->getRootDeviceIndex()); // Create Parent NV12 image cl_mem_flags flags = CL_MEM_READ_ONLY | CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); std::unique_ptr imageNV12{Image::create(contextWithMockCmdQ, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, hostPtr, retVal)}; EXPECT_EQ(imageNV12->isTiledAllocation() ? 2u : 0u, cmdQ->EnqueueWriteImageCounter); ASSERT_NE(nullptr, imageNV12); contextWithMockCmdQ->release(); } HWTEST_F(Nv12ImageTest, WhenSettingImageArgThenSurfaceStateIsCorrect) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState; std::unique_ptr image{createImageWithFlags(CL_MEM_READ_ONLY | CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL)}; ASSERT_NE(nullptr, image); SurfaceOffsets surfaceOffsets; image->getSurfaceOffsets(surfaceOffsets); image->setImageArg(&surfaceState, false, 0, context.getDevice(0)->getRootDeviceIndex(), false); EXPECT_EQ(surfaceOffsets.xOffset, surfaceState.getXOffset()); EXPECT_EQ(surfaceOffsets.yOffset, surfaceState.getYOffset()); EXPECT_EQ(surfaceOffsets.yOffsetForUVplane, surfaceState.getYOffsetForUOrUvPlane()); // NV 12 image has correct alpha channel == one EXPECT_EQ(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ONE, surfaceState.getShaderChannelSelectAlpha()); } HWTEST_F(Nv12ImageTest, givenNv12ImageArrayAndImageArraySizeIsZeroWhenCallingSetImageArgThenDoNotProgramSurfaceArray) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState; cl_image_desc imageDesc = Image2dDefaults::imageDesc; imageDesc.image_array_size = 1; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY; cl_image_format imageFormat = Image2dDefaults::imageFormat; imageFormat.image_channel_order = CL_NV12_INTEL; imageFormat.image_channel_data_type = CL_UNORM_INT8; std::unique_ptr image{Image2dHelper<>::create(&context, &imageDesc, &imageFormat)}; image->setCubeFaceIndex(__GMM_NO_CUBE_MAP); image->setImageArg(&surfaceState, false, 0, context.getDevice(0)->getRootDeviceIndex(), false); EXPECT_FALSE(surfaceState.getSurfaceArray()); } HWTEST_F(Nv12ImageTest, WhenSettingImageArgUvPlaneImageThenOffsetSurfaceBaseAddressAndCorrectTileModeAreSet) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState; std::unique_ptr imageNV12{createImageWithFlags(CL_MEM_READ_ONLY | CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL)}; ASSERT_NE(nullptr, imageNV12); imageDesc.mem_object = imageNV12.get(); imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; imageDesc.image_width = 0; imageDesc.image_height = 0; imageDesc.image_depth = 1; // UV plane // Create NV12 UV Plane image std::unique_ptr imageUVPlane{createImageWithFlags(CL_MEM_READ_WRITE)}; ASSERT_NE(nullptr, imageUVPlane); EXPECT_EQ(imageNV12->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex()), imageUVPlane->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex())); SurfaceOffsets surfaceOffsets; imageUVPlane->getSurfaceOffsets(surfaceOffsets); imageUVPlane->setImageArg(&surfaceState, false, 0, context.getDevice(0)->getRootDeviceIndex(), false); EXPECT_EQ(imageUVPlane->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex())->getGpuAddress() + surfaceOffsets.offset, surfaceState.getSurfaceBaseAddress()); auto tileMode = RENDER_SURFACE_STATE::TILE_MODE_LINEAR; if (imageNV12->isTiledAllocation()) { tileMode = static_cast(MockGmmResourceInfo::yMajorTileModeValue); } EXPECT_EQ(tileMode, surfaceState.getTileMode()); } HWTEST_F(Nv12ImageTest, WhenSettingMediaImageArgThenSurfaceStateIsCorrect) { using MEDIA_SURFACE_STATE = typename FamilyType::MEDIA_SURFACE_STATE; MEDIA_SURFACE_STATE surfaceState; std::unique_ptr image{createImageWithFlags(CL_MEM_READ_ONLY | CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL)}; ASSERT_NE(nullptr, image); SurfaceOffsets surfaceOffsets; image->getSurfaceOffsets(surfaceOffsets); image->setMediaImageArg(&surfaceState, context.getDevice(0)->getRootDeviceIndex()); EXPECT_EQ(surfaceOffsets.xOffset, surfaceState.getXOffsetForUCb()); EXPECT_EQ(surfaceOffsets.yOffset, surfaceState.getXOffsetForUCb()); EXPECT_EQ(surfaceOffsets.yOffsetForUVplane, surfaceState.getYOffsetForUCb()); EXPECT_EQ(image->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex())->getGpuAddress() + surfaceOffsets.offset, surfaceState.getSurfaceBaseAddress()); } TEST_F(Nv12ImageTest, WhenRedescribingThenNV12ImageAndUVPlaneImageHaveCorrectOffsets) { std::unique_ptr image{createImageWithFlags(CL_MEM_READ_ONLY | CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL)}; ASSERT_NE(nullptr, image); std::unique_ptr imageRedescribed{image->redescribe()}; ASSERT_NE(nullptr, imageRedescribed); SurfaceOffsets imageOffsets, redescribedOffsets; image->getSurfaceOffsets(imageOffsets); imageRedescribed->getSurfaceOffsets(redescribedOffsets); EXPECT_EQ(imageOffsets.xOffset, redescribedOffsets.xOffset); EXPECT_EQ(imageOffsets.yOffset, redescribedOffsets.yOffset); EXPECT_EQ(imageOffsets.yOffsetForUVplane, redescribedOffsets.yOffsetForUVplane); imageDesc.mem_object = image.get(); imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; imageDesc.image_width = 0; imageDesc.image_height = 0; imageDesc.image_depth = 1; // UV plane // Create NV12 UV Plane image std::unique_ptr imageUVPlane{createImageWithFlags(CL_MEM_READ_WRITE)}; ASSERT_NE(nullptr, imageUVPlane); imageRedescribed.reset(imageUVPlane->redescribe()); ASSERT_NE(nullptr, imageRedescribed); imageUVPlane->getSurfaceOffsets(imageOffsets); imageRedescribed->getSurfaceOffsets(redescribedOffsets); EXPECT_EQ(imageOffsets.xOffset, redescribedOffsets.xOffset); EXPECT_EQ(imageOffsets.yOffset, redescribedOffsets.yOffset); EXPECT_EQ(imageOffsets.yOffsetForUVplane, redescribedOffsets.yOffsetForUVplane); } TEST_F(Nv12ImageTest, GivenInvalidImageHeightWhenValidatingPlanarYuvThenInvalidImageSizeErrorIsReturned) { auto pClDevice = context.getDevice(0); const size_t *maxHeight = nullptr; size_t srcSize = 0; size_t retSize = 0; ASSERT_NE(nullptr, pClDevice); pClDevice->getCap(reinterpret_cast(maxHeight), srcSize, retSize); imageDesc.image_height = *maxHeight + 12; retVal = Image::validatePlanarYUV(&context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &pClDevice->getDevice()), &imageDesc, nullptr); EXPECT_EQ(CL_INVALID_IMAGE_SIZE, retVal); } TEST_F(Nv12ImageTest, GivenInvalidImageWidthWhenValidatingPlanarYuvThenInvalidImageSizeErrorIsReturned) { auto pClDevice = context.getDevice(0); const size_t *maxWidth = nullptr; size_t srcSize = 0; size_t retSize = 0; ASSERT_NE(nullptr, pClDevice); pClDevice->getCap(reinterpret_cast(maxWidth), srcSize, retSize); imageDesc.image_width = *maxWidth + 12; retVal = Image::validatePlanarYUV(&context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &pClDevice->getDevice()), &imageDesc, nullptr); EXPECT_EQ(CL_INVALID_IMAGE_SIZE, retVal); } TEST_F(Nv12ImageTest, GivenValidImageHeightWhenValidatingPlanarYuvThenSuccessIsReturned) { retVal = Image::validatePlanarYUV(&context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), &imageDesc, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(Nv12ImageTest, GivenValidImageWidthWhenValidatingPlanarYuvThenSuccessIsReturned) { retVal = Image::validatePlanarYUV(&context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), &imageDesc, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } compute-runtime-22.14.22890/opencl/test/unit_test/mem_obj/packed_yuv_image_tests.cpp000066400000000000000000000074351422164147700304720ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/aligned_memory.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "opencl/source/helpers/cl_validators.h" #include "opencl/source/helpers/surface_formats.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" using namespace NEO; typedef decltype(&Image::redescribe) RedescribeMethod; class PackedYuvImageTest : public testing::Test, public testing::WithParamInterface { public: PackedYuvImageTest() { } protected: void SetUp() override { imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = GetParam(); imageDesc.mem_object = nullptr; imageDesc.image_array_size = 0; imageDesc.image_depth = 1; imageDesc.image_height = 13; imageDesc.image_width = 16; // Valid values multiple of 2 imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; } void TearDown() override { } void validateFormat() { retVal = Image::validateImageFormat(&imageFormat); if (retVal != CL_SUCCESS) return; auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); retVal = Image::validate( &context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), surfaceFormat, &imageDesc, nullptr); } cl_int retVal = CL_SUCCESS; MockContext context; cl_image_format imageFormat; cl_image_desc imageDesc; cl_mem_flags flags; }; cl_channel_order packedYuvChannels[] = {CL_YUYV_INTEL, CL_UYVY_INTEL, CL_YVYU_INTEL, CL_VYUY_INTEL}; TEST_P(PackedYuvImageTest, GivenValidPackedYuvImageFormatAndDescriptorWhenCreatingImageThenIsPackYuvImageReturnsTrue) { flags = CL_MEM_READ_ONLY; auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto image = Image::create( &context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal); ASSERT_NE(nullptr, image); EXPECT_TRUE(isPackedYuvImage(&image->getImageFormat())); delete image; } TEST_P(PackedYuvImageTest, GivenValidPackedYuvImageFormatAndDescriptorWhenValidatingImageFormatThenValidImageIsReturned) { flags = CL_MEM_READ_ONLY; validateFormat(); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_P(PackedYuvImageTest, GivenInvalidFormatWhenValidatingImageFormatThenInvalidFormatDescriptorErrorIsReturned) { imageFormat.image_channel_data_type = CL_SNORM_INT16; flags = CL_MEM_READ_ONLY; validateFormat(); EXPECT_EQ(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, retVal); } TEST_P(PackedYuvImageTest, GivenInvalidWidthWhenValidatingImageFormatThenInvalidImageDescriptorErrorIsReturned) { imageDesc.image_width = 17; flags = CL_MEM_READ_ONLY; validateFormat(); EXPECT_EQ(CL_INVALID_IMAGE_DESCRIPTOR, retVal); } INSTANTIATE_TEST_CASE_P( PackedYuvImageTests, PackedYuvImageTest, testing::ValuesIn(packedYuvChannels)); compute-runtime-22.14.22890/opencl/test/unit_test/mem_obj/pipe_tests.cpp000066400000000000000000000104401422164147700261210ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/fixtures/memory_management_fixture.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/mem_obj/pipe.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" using namespace NEO; //Tests for pipes class PipeTest : public ::testing::Test, public MemoryManagementFixture { public: protected: void SetUp() override { } void TearDown() override { } cl_int retVal = CL_INVALID_PIPE_SIZE; MockContext context; size_t size = 0u; }; TEST_F(PipeTest, WhenCreatingPipeThenSuccessIsReturned) { int errCode = CL_SUCCESS; auto pipe = Pipe::create(&context, CL_MEM_READ_ONLY, 1, 20, nullptr, errCode); EXPECT_NE(nullptr, pipe); EXPECT_EQ(CL_SUCCESS, errCode); delete pipe; } TEST_F(PipeTest, WhenCreatingPipeThenHeaderSizeAdditionIsReserved) { int errCode = CL_SUCCESS; auto pipe = Pipe::create(&context, CL_MEM_READ_ONLY, 1, 20, nullptr, errCode); ASSERT_NE(nullptr, pipe); EXPECT_EQ(CL_SUCCESS, errCode); EXPECT_EQ((1 * (20 + 1)) + Pipe::intelPipeHeaderReservedSpace, pipe->getSize()); delete pipe; } TEST_F(PipeTest, WhenCreatingPipeThenHeaderIsInitialized) { int errCode = CL_SUCCESS; auto pipe = Pipe::create(&context, CL_MEM_READ_ONLY, 1, 20, nullptr, errCode); ASSERT_NE(nullptr, pipe); EXPECT_EQ(CL_SUCCESS, errCode); EXPECT_EQ(21u, *reinterpret_cast(pipe->getCpuAddress())); delete pipe; } TEST_F(PipeTest, GivenFailedAllocationInjectionWhenCreatingPipeThenOnlyFailingAllocationsAreNull) { InjectedFunction method = [this](size_t failureIndex) { auto retVal = CL_INVALID_VALUE; auto pipe = Pipe::create(&context, CL_MEM_READ_ONLY, 1, 20, nullptr, retVal); if (MemoryManagement::nonfailingAllocation == failureIndex) { EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, pipe); delete pipe; } else { EXPECT_EQ(CL_OUT_OF_HOST_MEMORY, retVal) << "for allocation " << failureIndex; EXPECT_EQ(nullptr, pipe); } }; injectFailures(method); } TEST_F(PipeTest, givenPipeWhenEnqueueWriteForUnmapIsCalledThenReturnError) { int errCode = CL_SUCCESS; std::unique_ptr pipe(Pipe::create(&context, CL_MEM_READ_ONLY, 1, 20, nullptr, errCode)); ASSERT_NE(nullptr, pipe); EXPECT_EQ(CL_SUCCESS, errCode); MockCommandQueue cmdQ; errCode = clEnqueueUnmapMemObject(&cmdQ, pipe.get(), nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, errCode); } TEST_F(PipeTest, givenPipeWithDifferentCpuAndGpuAddressesWhenSetArgPipeThenUseGpuAddress) { int errCode = CL_SUCCESS; auto pipe = Pipe::create(&context, CL_MEM_READ_ONLY, 1, 20, nullptr, errCode); ASSERT_NE(nullptr, pipe); EXPECT_EQ(CL_SUCCESS, errCode); EXPECT_EQ(21u, *reinterpret_cast(pipe->getCpuAddress())); uint64_t gpuAddress = 0x12345; auto pipeAllocation = pipe->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex()); pipeAllocation->setCpuPtrAndGpuAddress(pipeAllocation->getUnderlyingBuffer(), gpuAddress); EXPECT_NE(reinterpret_cast(pipeAllocation->getUnderlyingBuffer()), pipeAllocation->getGpuAddress()); uint64_t valueToPatch; pipe->setPipeArg(&valueToPatch, sizeof(valueToPatch), context.getDevice(0)->getRootDeviceIndex()); EXPECT_EQ(valueToPatch, pipeAllocation->getGpuAddressToPatch()); delete pipe; } using MultiRootDeviceTests = MultiRootDeviceFixture; TEST_F(MultiRootDeviceTests, GivenPipeGraphicsAllocationThenItHasCorrectRootDeviceIndex) { int errCode = CL_SUCCESS; std::unique_ptr pipe(Pipe::create(context.get(), CL_MEM_READ_ONLY, 1, 20, nullptr, errCode)); EXPECT_EQ(CL_SUCCESS, errCode); ASSERT_NE(nullptr, pipe.get()); auto graphicsAllocation = pipe->getGraphicsAllocation(expectedRootDeviceIndex); ASSERT_NE(nullptr, graphicsAllocation); EXPECT_EQ(expectedRootDeviceIndex, graphicsAllocation->getRootDeviceIndex()); } compute-runtime-22.14.22890/opencl/test/unit_test/mem_obj/sub_buffer_tests.cpp000066400000000000000000000212361422164147700273130ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/constants.h" #include "shared/source/helpers/ptr_math.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" using namespace NEO; namespace ULT { static const unsigned int sizeTestBufferInBytes = 32; class SubBufferTest : public ClDeviceFixture, public ::testing::Test { public: SubBufferTest() { } protected: void SetUp() override { buffer = Buffer::create(&context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, sizeTestBufferInBytes, pHostPtr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, buffer); } void TearDown() override { delete buffer; ClDeviceFixture::TearDown(); } cl_int retVal = CL_SUCCESS; MockContext context; unsigned char pHostPtr[sizeTestBufferInBytes]; Buffer *buffer = nullptr; }; TEST_F(SubBufferTest, WhenCreatingSubBufferThenRefInternalCountIsIncremented) { cl_buffer_region region = {2, 12}; EXPECT_EQ(1, buffer->getRefInternalCount()); auto subBuffer = buffer->createSubBuffer(CL_MEM_READ_ONLY, 0, ®ion, retVal); EXPECT_EQ(2, buffer->getRefInternalCount()); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, subBuffer); delete subBuffer; EXPECT_EQ(1, buffer->getRefInternalCount()); } TEST_F(SubBufferTest, givenSubBufferWhenGetHighestRootMemObjIsCalledThenProperMemObjIsReturned) { cl_buffer_region region0 = {2, 12}; auto subBuffer = buffer->createSubBuffer(CL_MEM_READ_ONLY, 0, ®ion0, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(static_cast(buffer), buffer->getHighestRootMemObj()); EXPECT_EQ(static_cast(buffer), subBuffer->getHighestRootMemObj()); subBuffer->release(); } TEST_F(SubBufferTest, GivenUnalignedHostPtrBufferWhenSubBufferIsCreatedThenItIsNonZeroCopy) { cl_buffer_region region = {2, 2}; cl_int retVal = 0; void *pUnalignedHostPtr = alignUp(&pHostPtr, 4); Buffer *buffer = Buffer::create(&context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, sizeTestBufferInBytes, pUnalignedHostPtr, retVal); ASSERT_NE(nullptr, buffer); ASSERT_EQ(CL_SUCCESS, retVal); auto subBuffer = buffer->createSubBuffer(CL_MEM_READ_ONLY, 0, ®ion, retVal); EXPECT_NE(nullptr, subBuffer); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(subBuffer->isMemObjZeroCopy()); subBuffer->release(); buffer->release(); } TEST_F(SubBufferTest, GivenAlignmentThatIsHigherThen4BytesWhenCheckedForValidityThenTrueIsReturned) { cl_buffer_region region = {2, 2}; EXPECT_FALSE(buffer->isValidSubBufferOffset(region.origin)); cl_buffer_region region2 = {4, 4}; EXPECT_TRUE(buffer->isValidSubBufferOffset(region2.origin)); cl_buffer_region region3 = {8, 4}; EXPECT_TRUE(buffer->isValidSubBufferOffset(region3.origin)); MockBuffer::setAllocationType(buffer->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex()), context.getDevice(0)->getRootDeviceEnvironment().getGmmClientContext(), true); EXPECT_FALSE(buffer->isValidSubBufferOffset(region.origin)); EXPECT_FALSE(buffer->isValidSubBufferOffset(region2.origin)); cl_buffer_region region4 = {1025, 4}; EXPECT_FALSE(buffer->isValidSubBufferOffset(region4.origin)); cl_buffer_region region5 = {1024, 4}; EXPECT_TRUE(buffer->isValidSubBufferOffset(region5.origin)); cl_buffer_region region6 = {127, 4}; EXPECT_FALSE(buffer->isValidSubBufferOffset(region6.origin)); cl_buffer_region region7 = {128, 4}; EXPECT_TRUE(buffer->isValidSubBufferOffset(region7.origin)); cl_buffer_region region8 = {129, 4}; EXPECT_FALSE(buffer->isValidSubBufferOffset(region8.origin)); } TEST_F(SubBufferTest, givenSharingHandlerFromParentBufferWhenCreateThenShareHandler) { cl_buffer_region region = {2, 12}; auto handler = new SharingHandler(); buffer->setSharingHandler(handler); auto subBuffer = buffer->createSubBuffer(CL_MEM_READ_ONLY, 0, ®ion, retVal); ASSERT_NE(nullptr, subBuffer); EXPECT_EQ(subBuffer->getSharingHandler().get(), handler); delete subBuffer; EXPECT_EQ(1, buffer->getRefInternalCount()); } TEST_F(SubBufferTest, GivenBufferWithAlignedHostPtrAndSameMemoryStorageWhenSubBufferIsCreatedThenHostPtrAndMemoryStorageAreOffseted) { cl_buffer_region region = {2, 2}; cl_int retVal = 0; void *alignedPointer = alignedMalloc(MemoryConstants::pageSize, MemoryConstants::preferredAlignment); Buffer *buffer = Buffer::create(&context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR | CL_MEM_FORCE_HOST_MEMORY_INTEL, MemoryConstants::pageSize, alignedPointer, retVal); ASSERT_NE(nullptr, buffer); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(alignedPointer, buffer->getHostPtr()); EXPECT_EQ(alignedPointer, buffer->getCpuAddress()); auto subBuffer = buffer->createSubBuffer(CL_MEM_READ_ONLY, 0, ®ion, retVal); EXPECT_NE(nullptr, subBuffer); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(ptrOffset(alignedPointer, 2), subBuffer->getHostPtr()); EXPECT_EQ(ptrOffset(alignedPointer, 2), subBuffer->getCpuAddress()); subBuffer->release(); buffer->release(); alignedFree(alignedPointer); } TEST_F(SubBufferTest, GivenBufferWithMemoryStorageAndNullHostPtrWhenSubBufferIsCreatedThenMemoryStorageIsOffsetedAndHostPtrIsNull) { cl_buffer_region region = {2, 2}; cl_int retVal = 0; Buffer *buffer = Buffer::create(&context, CL_MEM_READ_WRITE, MemoryConstants::pageSize, nullptr, retVal); ASSERT_NE(nullptr, buffer); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, buffer->getHostPtr()); EXPECT_NE(nullptr, buffer->getCpuAddress()); auto subBuffer = buffer->createSubBuffer(CL_MEM_READ_ONLY, 0, ®ion, retVal); EXPECT_NE(nullptr, subBuffer); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, subBuffer->getHostPtr()); EXPECT_EQ(ptrOffset(buffer->getCpuAddress(), 2), subBuffer->getCpuAddress()); subBuffer->release(); buffer->release(); } TEST_F(SubBufferTest, givenBufferWithHostPtrWhenSubbufferGetsMapPtrThenExpectBufferHostPtr) { cl_buffer_region region = {0, 16}; auto subBuffer = buffer->createSubBuffer(CL_MEM_READ_WRITE, 0, ®ion, retVal); ASSERT_NE(nullptr, subBuffer); ASSERT_EQ(CL_SUCCESS, retVal); void *mapPtr = subBuffer->getBasePtrForMap(0); EXPECT_EQ(pHostPtr, mapPtr); mapPtr = subBuffer->getBasePtrForMap(0); EXPECT_EQ(pHostPtr, mapPtr); subBuffer->release(); } TEST_F(SubBufferTest, givenBufferWithNoHostPtrWhenSubbufferGetsMapPtrThenExpectBufferMap) { cl_buffer_region region = {0, 16}; Buffer *buffer = Buffer::create(&context, CL_MEM_READ_WRITE, MemoryConstants::pageSize, nullptr, retVal); ASSERT_NE(nullptr, buffer); ASSERT_EQ(CL_SUCCESS, retVal); auto subBuffer = buffer->createSubBuffer(CL_MEM_READ_WRITE, 0, ®ion, retVal); ASSERT_NE(nullptr, subBuffer); ASSERT_EQ(CL_SUCCESS, retVal); void *mapPtr = subBuffer->getBasePtrForMap(0); void *bufferMapPtr = buffer->getBasePtrForMap(0); EXPECT_EQ(bufferMapPtr, mapPtr); auto mapAllocation = subBuffer->getMapAllocation(0); auto bufferMapAllocation = buffer->getMapAllocation(0); ASSERT_NE(nullptr, bufferMapAllocation); EXPECT_EQ(bufferMapAllocation, mapAllocation); EXPECT_EQ(bufferMapPtr, mapAllocation->getUnderlyingBuffer()); mapPtr = subBuffer->getBasePtrForMap(0); EXPECT_EQ(bufferMapPtr, mapPtr); subBuffer->release(); buffer->release(); } TEST_F(SubBufferTest, givenSubBuffersWithMultipleDevicesWhenReleaseAllSubBuffersThenMainBufferProperlyDereferenced) { MockDefaultContext ctx; Buffer *buffer = Buffer::create(&ctx, CL_MEM_READ_WRITE, MemoryConstants::pageSize, nullptr, retVal); ASSERT_NE(nullptr, buffer); ASSERT_EQ(CL_SUCCESS, retVal); Buffer *subBuffers[8]; for (int i = 0; i < 8; i++) { cl_buffer_region region = {static_cast(i * 4), 4}; subBuffers[i] = buffer->createSubBuffer(CL_MEM_READ_WRITE, 0, ®ion, retVal); EXPECT_EQ(3u, subBuffers[i]->getMultiGraphicsAllocation().getGraphicsAllocations().size()); } EXPECT_EQ(9, buffer->getRefInternalCount()); for (int i = 0; i < 8; i++) { subBuffers[i]->release(); } EXPECT_EQ(1, buffer->getRefInternalCount()); buffer->release(); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/mem_obj/zero_copy_tests.cpp000066400000000000000000000221221422164147700271750ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/aligned_memory.h" #include "shared/test/common/fixtures/memory_management_fixture.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" using namespace NEO; class ZeroCopyBufferTest : public ClDeviceFixture, public testing::TestWithParam> { public: ZeroCopyBufferTest() { } protected: void SetUp() override { size_t sizeToAlloc; size_t alignment; host_ptr = nullptr; std::tie(flags, sizeToAlloc, alignment, size, ShouldBeZeroCopy, MisalignPointer) = GetParam(); if (sizeToAlloc > 0) { host_ptr = (void *)alignedMalloc(sizeToAlloc, alignment); } ClDeviceFixture::SetUp(); } void TearDown() override { ClDeviceFixture::TearDown(); alignedFree(host_ptr); } cl_int retVal = CL_SUCCESS; MockContext context; cl_mem_flags flags = 0; void *host_ptr; bool ShouldBeZeroCopy; cl_int size; bool MisalignPointer; }; static const int Multiplier = 1000; static const int CacheLinedAlignedSize = MemoryConstants::cacheLineSize * Multiplier; static const int CacheLinedMisAlignedSize = CacheLinedAlignedSize - 1; static const int PageAlignSize = MemoryConstants::preferredAlignment * Multiplier; // clang-format off //flags, size to alloc, alignment, size, ZeroCopy, MisalignPointer std::tuple Inputs[] = {std::make_tuple((cl_mem_flags)CL_MEM_USE_HOST_PTR, CacheLinedMisAlignedSize, MemoryConstants::preferredAlignment, CacheLinedMisAlignedSize, false, true), std::make_tuple((cl_mem_flags)CL_MEM_USE_HOST_PTR, CacheLinedAlignedSize, MemoryConstants::preferredAlignment, CacheLinedAlignedSize, false, true), std::make_tuple((cl_mem_flags)CL_MEM_USE_HOST_PTR, CacheLinedAlignedSize, MemoryConstants::preferredAlignment, CacheLinedAlignedSize, true, false), std::make_tuple((cl_mem_flags)CL_MEM_USE_HOST_PTR, CacheLinedMisAlignedSize, MemoryConstants::preferredAlignment, CacheLinedMisAlignedSize, false, false), std::make_tuple((cl_mem_flags)CL_MEM_USE_HOST_PTR, PageAlignSize, MemoryConstants::preferredAlignment, PageAlignSize, true, false), std::make_tuple((cl_mem_flags)CL_MEM_USE_HOST_PTR, CacheLinedMisAlignedSize, MemoryConstants::cacheLineSize, CacheLinedAlignedSize, true, false), std::make_tuple((cl_mem_flags)CL_MEM_COPY_HOST_PTR, CacheLinedMisAlignedSize, MemoryConstants::preferredAlignment, CacheLinedMisAlignedSize, true, true), std::make_tuple((cl_mem_flags)CL_MEM_COPY_HOST_PTR, CacheLinedMisAlignedSize, MemoryConstants::preferredAlignment, CacheLinedMisAlignedSize, true, false), std::make_tuple((cl_mem_flags)NULL, 0, 0, CacheLinedMisAlignedSize, true, false), std::make_tuple((cl_mem_flags)NULL, 0, 0, CacheLinedAlignedSize, true, true)}; // clang-format on TEST_P(ZeroCopyBufferTest, GivenCacheAlignedPointerWhenCreatingBufferThenZeroCopy) { char *PassedPtr = (char *)host_ptr; //misalign the pointer if (MisalignPointer && PassedPtr) { PassedPtr += 1; } auto buffer = Buffer::create( &context, flags, size, PassedPtr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(ShouldBeZeroCopy, buffer->isMemObjZeroCopy()) << "Zero Copy not handled properly"; if (!ShouldBeZeroCopy && flags & CL_MEM_USE_HOST_PTR) { EXPECT_NE(buffer->getCpuAddress(), host_ptr); } EXPECT_NE(nullptr, buffer->getCpuAddress()); //check if buffer always have properly aligned storage ( PAGE ) EXPECT_EQ(alignUp(buffer->getCpuAddress(), MemoryConstants::cacheLineSize), buffer->getCpuAddress()); delete buffer; } INSTANTIATE_TEST_CASE_P( ZeroCopyBufferTests, ZeroCopyBufferTest, testing::ValuesIn(Inputs)); TEST(ZeroCopyBufferTestWithSharedContext, GivenContextThatIsSharedWhenAskedForBufferCreationThenAlwaysResultsInZeroCopy) { MockContext context; auto host_ptr = reinterpret_cast(0x1001); auto size = 64; auto retVal = CL_SUCCESS; context.isSharedContext = true; std::unique_ptr buffer(Buffer::create(&context, CL_MEM_USE_HOST_PTR, size, host_ptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(buffer->isMemObjZeroCopy()) << "Zero Copy not handled properly"; if (buffer->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex())->is32BitAllocation() == false) { EXPECT_EQ(host_ptr, buffer->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex())->getUnderlyingBuffer()); } } TEST(ZeroCopyBufferTestWithSharedContext, GivenContextThatIsSharedAndDisableZeroCopyFlagWhenAskedForBufferCreationThenAlwaysResultsInZeroCopy) { DebugManagerStateRestore stateRestore; DebugManager.flags.DisableZeroCopyForUseHostPtr.set(true); MockContext context; auto host_ptr = reinterpret_cast(0x1001); auto size = 64; auto retVal = CL_SUCCESS; context.isSharedContext = true; std::unique_ptr buffer(Buffer::create(&context, CL_MEM_USE_HOST_PTR, size, host_ptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(buffer->isMemObjZeroCopy()); } TEST(ZeroCopyWithDebugFlag, GivenInputsThatWouldResultInZeroCopyAndUseHostptrDisableZeroCopyFlagWhenBufferIsCreatedThenNonZeroCopyBufferIsReturned) { DebugManagerStateRestore stateRestore; DebugManager.flags.DisableZeroCopyForUseHostPtr.set(true); MockContext context; auto host_ptr = alignedMalloc(MemoryConstants::pageSize, MemoryConstants::pageSize); auto size = MemoryConstants::pageSize; auto retVal = CL_SUCCESS; std::unique_ptr buffer(Buffer::create(&context, CL_MEM_USE_HOST_PTR, size, host_ptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(buffer->isMemObjZeroCopy()); alignedFree(host_ptr); } TEST(ZeroCopyWithDebugFlag, GivenInputsThatWouldResultInZeroCopyAndDisableZeroCopyFlagWhenBufferIsCreatedThenNonZeroCopyBufferIsReturned) { DebugManagerStateRestore stateRestore; DebugManager.flags.DisableZeroCopyForBuffers.set(true); MockContext context; auto host_ptr = alignedMalloc(MemoryConstants::pageSize, MemoryConstants::pageSize); auto size = MemoryConstants::pageSize; auto retVal = CL_SUCCESS; std::unique_ptr buffer(Buffer::create(&context, CL_MEM_USE_HOST_PTR, size, host_ptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(buffer->isMemObjZeroCopy()); EXPECT_FALSE(buffer->mappingOnCpuAllowed()); alignedFree(host_ptr); } TEST(ZeroCopyWithDebugFlag, GivenBufferInputsThatWouldResultInZeroCopyAndDisableZeroCopyFlagWhenBufferIsCreatedThenNonZeroCopyBufferIsReturned) { DebugManagerStateRestore stateRestore; DebugManager.flags.DisableZeroCopyForBuffers.set(true); MockContext context; auto retVal = CL_SUCCESS; std::unique_ptr buffer(Buffer::create(&context, CL_MEM_ALLOC_HOST_PTR, MemoryConstants::pageSize, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(buffer->isMemObjZeroCopy()); EXPECT_FALSE(buffer->mappingOnCpuAllowed()); EXPECT_EQ(nullptr, buffer->getHostPtr()); EXPECT_EQ(nullptr, buffer->getAllocatedMapPtr()); auto bufferAllocation = buffer->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex())->getUnderlyingBuffer(); auto mapAllocation = buffer->getBasePtrForMap(0); EXPECT_EQ(mapAllocation, buffer->getAllocatedMapPtr()); EXPECT_NE(mapAllocation, bufferAllocation); } TEST(ZeroCopyBufferWith32BitAddressing, GivenDeviceSupporting32BitAddressingWhenAskedForBufferCreationFromHostPtrThenNonZeroCopyBufferIsReturned) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.Force32bitAddressing.set(true); MockContext context; auto host_ptr = (void *)alignedMalloc(MemoryConstants::pageSize, MemoryConstants::pageSize); auto size = MemoryConstants::pageSize; auto retVal = CL_SUCCESS; std::unique_ptr buffer(Buffer::create(&context, CL_MEM_USE_HOST_PTR, size, host_ptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(buffer->isMemObjZeroCopy()); if constexpr (is64bit) { EXPECT_TRUE(buffer->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex())->is32BitAllocation()); } alignedFree(host_ptr); } compute-runtime-22.14.22890/opencl/test/unit_test/memory_manager/000077500000000000000000000000001422164147700246315ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/memory_manager/CMakeLists.txt000066400000000000000000000030331422164147700273700ustar00rootroot00000000000000# # Copyright (C) 2018-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_memory_manager ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cpu_page_fault_manager_memory_sync_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/host_ptr_manager_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/internal_allocation_storage_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/memory_manager_multi_device_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/memory_manager_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/memory_manager_allocate_in_preferred_pool_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/migraton_controller_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/surface_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/unified_memory_manager_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/unified_memory_token_tests.cpp ) get_property(NEO_CORE_CPU_PAGE_FAULT_MANAGER_TESTS GLOBAL PROPERTY NEO_CORE_CPU_PAGE_FAULT_MANAGER_TESTS) list(APPEND IGDRCL_SRCS_tests_memory_manager ${NEO_CORE_CPU_PAGE_FAULT_MANAGER_TESTS}) if(WIN32) get_property(NEO_CORE_PAGE_FAULT_MANAGER_WINDOWS_TESTS GLOBAL PROPERTY NEO_CORE_PAGE_FAULT_MANAGER_WINDOWS_TESTS) list(APPEND IGDRCL_SRCS_tests_memory_manager ${NEO_CORE_PAGE_FAULT_MANAGER_WINDOWS_TESTS}) endif() if(UNIX) get_property(NEO_CORE_PAGE_FAULT_MANAGER_LINUX_TESTS GLOBAL PROPERTY NEO_CORE_PAGE_FAULT_MANAGER_LINUX_TESTS) list(APPEND IGDRCL_SRCS_tests_memory_manager ${NEO_CORE_PAGE_FAULT_MANAGER_LINUX_TESTS}) endif() target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_memory_manager}) add_subdirectories() cpu_page_fault_manager_memory_sync_tests.cpp000066400000000000000000000113631422164147700356000ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/memory_manager/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/test_macros/test_checks_shared.h" #include "shared/test/unit_test/page_fault_manager/cpu_page_fault_manager_tests_fixture.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "gtest/gtest.h" using namespace NEO; struct CommandQueueMock : public MockCommandQueue { cl_int enqueueSVMUnmap(void *svmPtr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool externalAppCall) override { transferToGpuCalled++; return CL_SUCCESS; } cl_int enqueueSVMMap(cl_bool blockingMap, cl_map_flags mapFlags, void *svmPtr, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool externalAppCall) override { transferToCpuCalled++; passedMapFlags = mapFlags; return CL_SUCCESS; } cl_int finish() override { finishCalled++; return CL_SUCCESS; } int transferToCpuCalled = 0; int transferToGpuCalled = 0; int finishCalled = 0; uint64_t passedMapFlags = 0; }; TEST_F(PageFaultManagerTest, givenUnifiedMemoryAllocWhenSynchronizeMemoryThenEnqueueProperCalls) { MockExecutionEnvironment executionEnvironment; REQUIRE_SVM_OR_SKIP(executionEnvironment.rootDeviceEnvironments[0]->getHardwareInfo()); auto memoryManager = std::make_unique(executionEnvironment); auto svmAllocsManager = std::make_unique(memoryManager.get(), false); auto device = std::unique_ptr(new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr)}); auto rootDeviceIndex = device->getRootDeviceIndex(); std::set rootDeviceIndices{rootDeviceIndex}; std::map deviceBitfields{{rootDeviceIndex, device->getDeviceBitfield()}}; void *alloc = svmAllocsManager->createSVMAlloc(256, {}, rootDeviceIndices, deviceBitfields); auto cmdQ = std::make_unique(); cmdQ->device = device.get(); pageFaultManager->insertAllocation(alloc, 256, svmAllocsManager.get(), cmdQ.get(), {}); pageFaultManager->baseCpuTransfer(alloc, 10, cmdQ.get()); EXPECT_EQ(cmdQ->transferToCpuCalled, 1); EXPECT_EQ(cmdQ->transferToGpuCalled, 0); EXPECT_EQ(cmdQ->finishCalled, 0); pageFaultManager->baseGpuTransfer(alloc, cmdQ.get()); EXPECT_EQ(cmdQ->transferToCpuCalled, 1); EXPECT_EQ(cmdQ->transferToGpuCalled, 1); EXPECT_EQ(cmdQ->finishCalled, 1); svmAllocsManager->freeSVMAlloc(alloc); cmdQ->device = nullptr; } TEST_F(PageFaultManagerTest, givenUnifiedMemoryAllocWhenGpuTransferIsInvokedThenInsertMapOperation) { MockExecutionEnvironment executionEnvironment; REQUIRE_SVM_OR_SKIP(executionEnvironment.rootDeviceEnvironments[0]->getHardwareInfo()); struct MockSVMAllocsManager : SVMAllocsManager { using SVMAllocsManager::SVMAllocsManager; void insertSvmMapOperation(void *regionSvmPtr, size_t regionSize, void *baseSvmPtr, size_t offset, bool readOnlyMap) override { SVMAllocsManager::insertSvmMapOperation(regionSvmPtr, regionSize, baseSvmPtr, offset, readOnlyMap); insertSvmMapOperationCalled++; } int insertSvmMapOperationCalled = 0; }; auto memoryManager = std::make_unique(executionEnvironment); auto svmAllocsManager = std::make_unique(memoryManager.get(), false); auto device = std::unique_ptr(new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr)}); auto rootDeviceIndex = device->getRootDeviceIndex(); std::set rootDeviceIndices{rootDeviceIndex}; std::map deviceBitfields{{rootDeviceIndex, device->getDeviceBitfield()}}; void *alloc = svmAllocsManager->createSVMAlloc(256, {}, rootDeviceIndices, deviceBitfields); auto cmdQ = std::make_unique(); cmdQ->device = device.get(); pageFaultManager->insertAllocation(alloc, 256, svmAllocsManager.get(), cmdQ.get(), {}); EXPECT_EQ(svmAllocsManager->insertSvmMapOperationCalled, 0); pageFaultManager->baseGpuTransfer(alloc, cmdQ.get()); EXPECT_EQ(svmAllocsManager->insertSvmMapOperationCalled, 1); svmAllocsManager->freeSVMAlloc(alloc); cmdQ->device = nullptr; } compute-runtime-22.14.22890/opencl/test/unit_test/memory_manager/host_ptr_manager_tests.cpp000066400000000000000000001466511422164147700321300ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/constants.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/common/fixtures/memory_manager_fixture.h" #include "shared/test/common/helpers/engine_descriptor_helper.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/mocks/mock_host_ptr_manager.h" #include "shared/test/common/mocks/mock_internal_allocation_storage.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/test_macros/test.h" using namespace NEO; struct HostPtrManagerTest : ::testing::Test { const uint32_t rootDeviceIndex = 1u; }; TEST_F(HostPtrManagerTest, GivenAlignedPointerAndAlignedSizeWhenGettingAllocationRequirementsThenOneFragmentIsReturned) { auto size = MemoryConstants::pageSize * 10; void *ptr = (void *)0x1000; AllocationRequirements reqs = MockHostPtrManager::getAllocationRequirements(rootDeviceIndex, ptr, size); EXPECT_EQ(1u, reqs.requiredFragmentsCount); EXPECT_EQ(reqs.allocationFragments[0].fragmentPosition, FragmentPosition::MIDDLE); EXPECT_EQ(reqs.allocationFragments[1].fragmentPosition, FragmentPosition::NONE); EXPECT_EQ(reqs.allocationFragments[2].fragmentPosition, FragmentPosition::NONE); EXPECT_EQ(reqs.totalRequiredSize, size); EXPECT_EQ(ptr, reqs.allocationFragments[0].allocationPtr); EXPECT_EQ(size, reqs.allocationFragments[0].allocationSize); EXPECT_EQ(nullptr, reqs.allocationFragments[1].allocationPtr); EXPECT_EQ(nullptr, reqs.allocationFragments[2].allocationPtr); EXPECT_EQ(rootDeviceIndex, reqs.rootDeviceIndex); } TEST_F(HostPtrManagerTest, GivenAlignedPointerAndNotAlignedSizeWhenGettingAllocationRequirementsThenTwoFragmentsAreReturned) { auto size = MemoryConstants::pageSize * 10 - 1; void *ptr = (void *)0x1000; AllocationRequirements reqs = MockHostPtrManager::getAllocationRequirements(rootDeviceIndex, ptr, size); EXPECT_EQ(2u, reqs.requiredFragmentsCount); EXPECT_EQ(reqs.allocationFragments[0].fragmentPosition, FragmentPosition::MIDDLE); EXPECT_EQ(reqs.allocationFragments[1].fragmentPosition, FragmentPosition::TRAILING); EXPECT_EQ(reqs.allocationFragments[2].fragmentPosition, FragmentPosition::NONE); EXPECT_EQ(reqs.totalRequiredSize, alignUp(size, MemoryConstants::pageSize)); EXPECT_EQ(ptr, reqs.allocationFragments[0].allocationPtr); EXPECT_EQ(9 * MemoryConstants::pageSize, reqs.allocationFragments[0].allocationSize); auto trailingPtr = alignDown(ptrOffset(ptr, size), MemoryConstants::pageSize); EXPECT_EQ(trailingPtr, reqs.allocationFragments[1].allocationPtr); EXPECT_EQ(MemoryConstants::pageSize, reqs.allocationFragments[1].allocationSize); EXPECT_EQ(nullptr, reqs.allocationFragments[2].allocationPtr); EXPECT_EQ(0u, reqs.allocationFragments[2].allocationSize); } TEST_F(HostPtrManagerTest, GivenNotAlignedPointerAndNotAlignedSizeWhenGettingAllocationRequirementsThenThreeFragmentsAreReturned) { auto size = MemoryConstants::pageSize * 10 - 1; void *ptr = (void *)0x1045; AllocationRequirements reqs = MockHostPtrManager::getAllocationRequirements(rootDeviceIndex, ptr, size); EXPECT_EQ(3u, reqs.requiredFragmentsCount); EXPECT_EQ(reqs.allocationFragments[0].fragmentPosition, FragmentPosition::LEADING); EXPECT_EQ(reqs.allocationFragments[1].fragmentPosition, FragmentPosition::MIDDLE); EXPECT_EQ(reqs.allocationFragments[2].fragmentPosition, FragmentPosition::TRAILING); auto leadingPtr = (void *)0x1000; auto middlePtr = (void *)0x2000; auto trailingPtr = (void *)0xb000; EXPECT_EQ(reqs.totalRequiredSize, 11 * MemoryConstants::pageSize); EXPECT_EQ(leadingPtr, reqs.allocationFragments[0].allocationPtr); EXPECT_EQ(MemoryConstants::pageSize, reqs.allocationFragments[0].allocationSize); EXPECT_EQ(middlePtr, reqs.allocationFragments[1].allocationPtr); EXPECT_EQ(9 * MemoryConstants::pageSize, reqs.allocationFragments[1].allocationSize); EXPECT_EQ(trailingPtr, reqs.allocationFragments[2].allocationPtr); EXPECT_EQ(MemoryConstants::pageSize, reqs.allocationFragments[2].allocationSize); } TEST_F(HostPtrManagerTest, GivenNotAlignedPointerAndNotAlignedSizeWithinOnePageWhenGettingAllocationRequirementsThenOneFragmentIsReturned) { auto size = 200; void *ptr = (void *)0x1045; AllocationRequirements reqs = MockHostPtrManager::getAllocationRequirements(rootDeviceIndex, ptr, size); EXPECT_EQ(1u, reqs.requiredFragmentsCount); EXPECT_EQ(reqs.allocationFragments[0].fragmentPosition, FragmentPosition::LEADING); EXPECT_EQ(reqs.allocationFragments[1].fragmentPosition, FragmentPosition::NONE); EXPECT_EQ(reqs.allocationFragments[2].fragmentPosition, FragmentPosition::NONE); auto leadingPtr = (void *)0x1000; EXPECT_EQ(reqs.totalRequiredSize, MemoryConstants::pageSize); EXPECT_EQ(leadingPtr, reqs.allocationFragments[0].allocationPtr); EXPECT_EQ(MemoryConstants::pageSize, reqs.allocationFragments[0].allocationSize); EXPECT_EQ(nullptr, reqs.allocationFragments[1].allocationPtr); EXPECT_EQ(0u, reqs.allocationFragments[1].allocationSize); EXPECT_EQ(nullptr, reqs.allocationFragments[2].allocationPtr); EXPECT_EQ(0u, reqs.allocationFragments[2].allocationSize); } TEST_F(HostPtrManagerTest, GivenNotAlignedPointerAndNotAlignedSizeWithinTwoPagesWhenGettingAllocationRequirementsThenTwoFragmentsAreReturned) { auto size = MemoryConstants::pageSize; void *ptr = (void *)0x1045; AllocationRequirements reqs = MockHostPtrManager::getAllocationRequirements(rootDeviceIndex, ptr, size); EXPECT_EQ(2u, reqs.requiredFragmentsCount); EXPECT_EQ(reqs.allocationFragments[0].fragmentPosition, FragmentPosition::LEADING); EXPECT_EQ(reqs.allocationFragments[1].fragmentPosition, FragmentPosition::TRAILING); EXPECT_EQ(reqs.allocationFragments[2].fragmentPosition, FragmentPosition::NONE); auto leadingPtr = (void *)0x1000; auto trailingPtr = (void *)0x2000; EXPECT_EQ(reqs.totalRequiredSize, 2 * MemoryConstants::pageSize); EXPECT_EQ(leadingPtr, reqs.allocationFragments[0].allocationPtr); EXPECT_EQ(MemoryConstants::pageSize, reqs.allocationFragments[0].allocationSize); EXPECT_EQ(trailingPtr, reqs.allocationFragments[1].allocationPtr); EXPECT_EQ(MemoryConstants::pageSize, reqs.allocationFragments[1].allocationSize); EXPECT_EQ(nullptr, reqs.allocationFragments[2].allocationPtr); EXPECT_EQ(0u, reqs.allocationFragments[2].allocationSize); } TEST_F(HostPtrManagerTest, GivenAlignedPointerAndAlignedSizeOfOnePageWhenGettingAllocationRequirementsThenOnlyMiddleFragmentIsReturned) { auto size = MemoryConstants::pageSize * 10; void *ptr = (void *)0x1000; AllocationRequirements reqs = MockHostPtrManager::getAllocationRequirements(rootDeviceIndex, ptr, size); EXPECT_EQ(1u, reqs.requiredFragmentsCount); EXPECT_EQ(reqs.allocationFragments[0].fragmentPosition, FragmentPosition::MIDDLE); EXPECT_EQ(reqs.allocationFragments[1].fragmentPosition, FragmentPosition::NONE); EXPECT_EQ(reqs.allocationFragments[2].fragmentPosition, FragmentPosition::NONE); auto middlePtr = (void *)0x1000; EXPECT_EQ(reqs.totalRequiredSize, 10 * MemoryConstants::pageSize); EXPECT_EQ(middlePtr, reqs.allocationFragments[0].allocationPtr); EXPECT_EQ(10 * MemoryConstants::pageSize, reqs.allocationFragments[0].allocationSize); EXPECT_EQ(nullptr, reqs.allocationFragments[1].allocationPtr); EXPECT_EQ(0u, reqs.allocationFragments[1].allocationSize); EXPECT_EQ(nullptr, reqs.allocationFragments[2].allocationPtr); EXPECT_EQ(0u, reqs.allocationFragments[2].allocationSize); } TEST_F(HostPtrManagerTest, GivenNotAlignedPointerAndSizeThatFitsToPageWhenGettingAllocationRequirementsThenLeadingAndMiddleFragmentsAreReturned) { auto size = MemoryConstants::pageSize * 10 - 1; void *ptr = (void *)0x1001; AllocationRequirements reqs = MockHostPtrManager::getAllocationRequirements(rootDeviceIndex, ptr, size); EXPECT_EQ(2u, reqs.requiredFragmentsCount); EXPECT_EQ(reqs.allocationFragments[0].fragmentPosition, FragmentPosition::LEADING); EXPECT_EQ(reqs.allocationFragments[1].fragmentPosition, FragmentPosition::MIDDLE); EXPECT_EQ(reqs.allocationFragments[2].fragmentPosition, FragmentPosition::NONE); auto leadingPtr = (void *)0x1000; auto middlePtr = (void *)0x2000; EXPECT_EQ(reqs.totalRequiredSize, 10 * MemoryConstants::pageSize); EXPECT_EQ(leadingPtr, reqs.allocationFragments[0].allocationPtr); EXPECT_EQ(MemoryConstants::pageSize, reqs.allocationFragments[0].allocationSize); EXPECT_EQ(middlePtr, reqs.allocationFragments[1].allocationPtr); EXPECT_EQ(9 * MemoryConstants::pageSize, reqs.allocationFragments[1].allocationSize); EXPECT_EQ(nullptr, reqs.allocationFragments[2].allocationPtr); EXPECT_EQ(0u, reqs.allocationFragments[2].allocationSize); } TEST_F(HostPtrManagerTest, GivenAlignedPointerAndPageSizeWhenGettingAllocationRequirementsThenOnlyMiddleFragmentIsReturned) { auto size = MemoryConstants::pageSize; void *ptr = (void *)0x1000; AllocationRequirements reqs = MockHostPtrManager::getAllocationRequirements(rootDeviceIndex, ptr, size); EXPECT_EQ(1u, reqs.requiredFragmentsCount); EXPECT_EQ(reqs.allocationFragments[0].fragmentPosition, FragmentPosition::MIDDLE); EXPECT_EQ(reqs.allocationFragments[1].fragmentPosition, FragmentPosition::NONE); EXPECT_EQ(reqs.allocationFragments[2].fragmentPosition, FragmentPosition::NONE); auto middlePtr = (void *)0x1000; EXPECT_EQ(reqs.totalRequiredSize, MemoryConstants::pageSize); EXPECT_EQ(middlePtr, reqs.allocationFragments[0].allocationPtr); EXPECT_EQ(MemoryConstants::pageSize, reqs.allocationFragments[0].allocationSize); EXPECT_EQ(nullptr, reqs.allocationFragments[1].allocationPtr); EXPECT_EQ(0u, reqs.allocationFragments[1].allocationSize); EXPECT_EQ(nullptr, reqs.allocationFragments[2].allocationPtr); EXPECT_EQ(0u, reqs.allocationFragments[2].allocationSize); } TEST_F(HostPtrManagerTest, GivenAllocationRequirementsForMiddleAllocationThatIsNotStoredInManagerWhenGettingAllocationRequirementsThenNullptrIsReturned) { auto size = MemoryConstants::pageSize; void *ptr = (void *)0x1000; auto reqs = MockHostPtrManager::getAllocationRequirements(rootDeviceIndex, ptr, size); MockHostPtrManager hostPtrManager; auto gpuAllocationFragments = hostPtrManager.populateAlreadyAllocatedFragments(reqs); EXPECT_EQ(nullptr, gpuAllocationFragments.fragmentStorageData[0].osHandleStorage); EXPECT_EQ(ptr, gpuAllocationFragments.fragmentStorageData[0].cpuPtr); EXPECT_EQ(nullptr, gpuAllocationFragments.fragmentStorageData[1].osHandleStorage); EXPECT_EQ(nullptr, gpuAllocationFragments.fragmentStorageData[1].cpuPtr); EXPECT_EQ(nullptr, gpuAllocationFragments.fragmentStorageData[2].osHandleStorage); EXPECT_EQ(nullptr, gpuAllocationFragments.fragmentStorageData[2].cpuPtr); } TEST_F(HostPtrManagerTest, GivenAllocationRequirementsForMiddleAllocationThatIsStoredInManagerWhenGettingAllocationRequirementsThenProperAllocationIsReturnedAndRefCountIncreased) { MockHostPtrManager hostPtrManager; FragmentStorage allocationFragment; auto cpuPtr = (void *)0x1000; auto ptrSize = MemoryConstants::pageSize; auto osInternalStorage = (OsHandle *)0x12312; allocationFragment.fragmentCpuPointer = cpuPtr; allocationFragment.fragmentSize = ptrSize; allocationFragment.osInternalStorage = osInternalStorage; hostPtrManager.storeFragment(rootDeviceIndex, allocationFragment); auto reqs = MockHostPtrManager::getAllocationRequirements(rootDeviceIndex, cpuPtr, ptrSize); auto gpuAllocationFragments = hostPtrManager.populateAlreadyAllocatedFragments(reqs); EXPECT_EQ(osInternalStorage, gpuAllocationFragments.fragmentStorageData[0].osHandleStorage); EXPECT_EQ(cpuPtr, gpuAllocationFragments.fragmentStorageData[0].cpuPtr); EXPECT_EQ(nullptr, gpuAllocationFragments.fragmentStorageData[1].osHandleStorage); EXPECT_EQ(nullptr, gpuAllocationFragments.fragmentStorageData[1].cpuPtr); EXPECT_EQ(nullptr, gpuAllocationFragments.fragmentStorageData[2].osHandleStorage); EXPECT_EQ(nullptr, gpuAllocationFragments.fragmentStorageData[2].cpuPtr); auto fragment = hostPtrManager.getFragment({cpuPtr, rootDeviceIndex}); EXPECT_EQ(2, fragment->refCount); } TEST_F(HostPtrManagerTest, GivenAllocationRequirementsForAllocationWithinSizeOfStoredAllocationInManagerWhenGettingAllocationRequirementsThenProperAllocationIsReturned) { MockHostPtrManager hostPtrManager; FragmentStorage allocationFragment; auto cpuPtr = (void *)0x1000; auto ptrSize = MemoryConstants::pageSize * 10; auto osInternalStorage = (OsHandle *)0x12312; allocationFragment.fragmentCpuPointer = cpuPtr; allocationFragment.fragmentSize = ptrSize; allocationFragment.osInternalStorage = osInternalStorage; hostPtrManager.storeFragment(rootDeviceIndex, allocationFragment); auto reqs = MockHostPtrManager::getAllocationRequirements(rootDeviceIndex, cpuPtr, MemoryConstants::pageSize); auto gpuAllocationFragments = hostPtrManager.populateAlreadyAllocatedFragments(reqs); EXPECT_EQ(osInternalStorage, gpuAllocationFragments.fragmentStorageData[0].osHandleStorage); EXPECT_EQ(cpuPtr, gpuAllocationFragments.fragmentStorageData[0].cpuPtr); EXPECT_EQ(nullptr, gpuAllocationFragments.fragmentStorageData[1].osHandleStorage); EXPECT_EQ(nullptr, gpuAllocationFragments.fragmentStorageData[1].cpuPtr); EXPECT_EQ(nullptr, gpuAllocationFragments.fragmentStorageData[2].osHandleStorage); EXPECT_EQ(nullptr, gpuAllocationFragments.fragmentStorageData[2].cpuPtr); auto fragment = hostPtrManager.getFragment({cpuPtr, rootDeviceIndex}); EXPECT_EQ(2, fragment->refCount); } TEST_F(HostPtrManagerTest, WhenStoringFragmentThenContainerCountIsIncremented) { MockHostPtrManager hostPtrManager; FragmentStorage allocationFragment; EXPECT_EQ(allocationFragment.fragmentCpuPointer, nullptr); EXPECT_EQ(allocationFragment.fragmentSize, 0u); EXPECT_EQ(allocationFragment.refCount, 0); hostPtrManager.storeFragment(rootDeviceIndex, allocationFragment); EXPECT_EQ(1u, hostPtrManager.getFragmentCount()); } TEST_F(HostPtrManagerTest, WhenStoringFragmentTwiceThenContainerCountIsIncrementedOnce) { MockHostPtrManager hostPtrManager; FragmentStorage allocationFragment; hostPtrManager.storeFragment(rootDeviceIndex, allocationFragment); hostPtrManager.storeFragment(rootDeviceIndex, allocationFragment); EXPECT_EQ(1u, hostPtrManager.getFragmentCount()); } TEST_F(HostPtrManagerTest, GivenEmptyHostPtrManagerWhenAskingForFragmentThenNullptrIsReturned) { MockHostPtrManager hostPtrManager; auto fragment = hostPtrManager.getFragment({(void *)0x10121, rootDeviceIndex}); EXPECT_EQ(nullptr, fragment); EXPECT_EQ(0u, hostPtrManager.getFragmentCount()); } TEST_F(HostPtrManagerTest, GivenNonEmptyHostPtrManagerWhenAskingForFragmentThenProperFragmentIsReturnedWithRefCountOne) { MockHostPtrManager hostPtrManager; FragmentStorage fragment; void *cpuPtr = (void *)0x10121; auto fragmentSize = 101u; fragment.fragmentCpuPointer = cpuPtr; fragment.fragmentSize = fragmentSize; fragment.refCount = 0; hostPtrManager.storeFragment(rootDeviceIndex, fragment); auto retFragment = hostPtrManager.getFragment({cpuPtr, rootDeviceIndex}); EXPECT_NE(retFragment, &fragment); EXPECT_EQ(1, retFragment->refCount); EXPECT_EQ(cpuPtr, retFragment->fragmentCpuPointer); EXPECT_EQ(fragmentSize, retFragment->fragmentSize); EXPECT_EQ(1u, hostPtrManager.getFragmentCount()); } TEST_F(HostPtrManagerTest, GivenHostPtrManagerFilledTwiceWithTheSamePointerWhenAskingForFragmentThenProperFragmentIsReturnedWithRefCountTwo) { MockHostPtrManager hostPtrManager; FragmentStorage fragment; void *cpuPtr = (void *)0x10121; auto fragmentSize = 101u; fragment.fragmentCpuPointer = cpuPtr; fragment.fragmentSize = fragmentSize; fragment.refCount = 0; hostPtrManager.storeFragment(rootDeviceIndex, fragment); hostPtrManager.storeFragment(rootDeviceIndex, fragment); auto retFragment = hostPtrManager.getFragment({cpuPtr, rootDeviceIndex}); EXPECT_NE(retFragment, &fragment); EXPECT_EQ(2, retFragment->refCount); EXPECT_EQ(cpuPtr, retFragment->fragmentCpuPointer); EXPECT_EQ(fragmentSize, retFragment->fragmentSize); EXPECT_EQ(1u, hostPtrManager.getFragmentCount()); } TEST_F(HostPtrManagerTest, GivenHostPtrManagerFilledWithFragmentsWhenFragmentIsBeingReleasedThenManagerMaintainsProperRefferenceCount) { MockHostPtrManager hostPtrManager; FragmentStorage fragment; void *cpuPtr = (void *)0x1000; auto fragmentSize = MemoryConstants::pageSize; fragment.fragmentCpuPointer = cpuPtr; fragment.fragmentSize = fragmentSize; hostPtrManager.storeFragment(rootDeviceIndex, fragment); hostPtrManager.storeFragment(rootDeviceIndex, fragment); ASSERT_EQ(1u, hostPtrManager.getFragmentCount()); auto fragmentReadyForRelease = hostPtrManager.releaseHostPtr(rootDeviceIndex, cpuPtr); EXPECT_FALSE(fragmentReadyForRelease); auto retFragment = hostPtrManager.getFragment({cpuPtr, rootDeviceIndex}); EXPECT_EQ(1, retFragment->refCount); fragmentReadyForRelease = hostPtrManager.releaseHostPtr(rootDeviceIndex, cpuPtr); EXPECT_TRUE(fragmentReadyForRelease); retFragment = hostPtrManager.getFragment({cpuPtr, rootDeviceIndex}); EXPECT_EQ(nullptr, retFragment); EXPECT_EQ(0u, hostPtrManager.getFragmentCount()); } TEST_F(HostPtrManagerTest, GivenOsHandleStorageWhenAskedToStoreTheFragmentThenFragmentIsStoredProperly) { OsHandleStorage storage; void *cpu1 = (void *)0x1000; void *cpu2 = (void *)0x2000; auto size1 = MemoryConstants::pageSize; auto size2 = MemoryConstants::pageSize * 2; storage.fragmentStorageData[0].cpuPtr = cpu1; storage.fragmentStorageData[0].fragmentSize = size1; storage.fragmentStorageData[1].cpuPtr = cpu2; storage.fragmentStorageData[1].fragmentSize = size2; MockHostPtrManager hostPtrManager; EXPECT_EQ(0u, hostPtrManager.getFragmentCount()); hostPtrManager.storeFragment(rootDeviceIndex, storage.fragmentStorageData[0]); hostPtrManager.storeFragment(rootDeviceIndex, storage.fragmentStorageData[1]); EXPECT_EQ(2u, hostPtrManager.getFragmentCount()); hostPtrManager.releaseHandleStorage(rootDeviceIndex, storage); EXPECT_EQ(0u, hostPtrManager.getFragmentCount()); } TEST_F(HostPtrManagerTest, GivenHostPtrFilledWith3TripleFragmentsWhenAskedForPopulationThenAllFragmentsAreResued) { void *cpuPtr = (void *)0x1001; auto fragmentSize = MemoryConstants::pageSize * 10; MockHostPtrManager hostPtrManager; auto reqs = hostPtrManager.getAllocationRequirements(rootDeviceIndex, cpuPtr, fragmentSize); ASSERT_EQ(3u, reqs.requiredFragmentsCount); FragmentStorage fragments[maxFragmentsCount]; //check all fragments for (int i = 0; i < maxFragmentsCount; i++) { fragments[i].fragmentCpuPointer = const_cast(reqs.allocationFragments[i].allocationPtr); fragments[i].fragmentSize = reqs.allocationFragments[i].allocationSize; hostPtrManager.storeFragment(rootDeviceIndex, fragments[i]); } EXPECT_EQ(3u, hostPtrManager.getFragmentCount()); auto OsHandles = hostPtrManager.populateAlreadyAllocatedFragments(reqs); EXPECT_EQ(3u, hostPtrManager.getFragmentCount()); for (int i = 0; i < maxFragmentsCount; i++) { EXPECT_EQ(OsHandles.fragmentStorageData[i].cpuPtr, reqs.allocationFragments[i].allocationPtr); EXPECT_EQ(OsHandles.fragmentStorageData[i].fragmentSize, reqs.allocationFragments[i].allocationSize); auto fragment = hostPtrManager.getFragment({const_cast(reqs.allocationFragments[i].allocationPtr), rootDeviceIndex}); ASSERT_NE(nullptr, fragment); EXPECT_EQ(2, fragment->refCount); EXPECT_EQ(OsHandles.fragmentStorageData[i].cpuPtr, fragment->fragmentCpuPointer); } for (int i = 0; i < maxFragmentsCount; i++) { hostPtrManager.releaseHostPtr(rootDeviceIndex, fragments[i].fragmentCpuPointer); } EXPECT_EQ(3u, hostPtrManager.getFragmentCount()); for (int i = 0; i < maxFragmentsCount; i++) { auto fragment = hostPtrManager.getFragment({const_cast(reqs.allocationFragments[i].allocationPtr), rootDeviceIndex}); ASSERT_NE(nullptr, fragment); EXPECT_EQ(1, fragment->refCount); } for (int i = 0; i < maxFragmentsCount; i++) { hostPtrManager.releaseHostPtr(rootDeviceIndex, fragments[i].fragmentCpuPointer); } EXPECT_EQ(0u, hostPtrManager.getFragmentCount()); } TEST_F(HostPtrManagerTest, GivenFragmentSizeZeroWhenGettingFragmentThenNullptrIsReturned) { HostPtrManager hostPtrManager; auto ptr1 = (void *)0x010000; FragmentStorage fragment1; fragment1.fragmentCpuPointer = ptr1; fragment1.fragmentSize = 0; hostPtrManager.storeFragment(rootDeviceIndex, fragment1); auto ptr2 = (void *)0x040000; FragmentStorage fragment2; fragment2.fragmentCpuPointer = ptr2; fragment2.fragmentSize = 0; hostPtrManager.storeFragment(rootDeviceIndex, fragment2); auto cptr1 = (void *)0x00F000; auto frag1 = hostPtrManager.getFragment({cptr1, rootDeviceIndex}); EXPECT_EQ(frag1, nullptr); auto cptr2 = (void *)0x010000; auto frag2 = hostPtrManager.getFragment({cptr2, rootDeviceIndex}); EXPECT_NE(frag2, nullptr); auto cptr3 = (void *)0x010001; auto frag3 = hostPtrManager.getFragment({cptr3, rootDeviceIndex}); EXPECT_EQ(frag3, nullptr); auto cptr4 = (void *)0x020000; auto frag4 = hostPtrManager.getFragment({cptr4, rootDeviceIndex}); EXPECT_EQ(frag4, nullptr); auto cptr5 = (void *)0x040000; auto frag5 = hostPtrManager.getFragment({cptr5, rootDeviceIndex}); EXPECT_NE(frag5, nullptr); auto cptr6 = (void *)0x040001; auto frag6 = hostPtrManager.getFragment({cptr6, rootDeviceIndex}); EXPECT_EQ(frag6, nullptr); auto cptr7 = (void *)0x060000; auto frag7 = hostPtrManager.getFragment({cptr7, rootDeviceIndex}); EXPECT_EQ(frag7, nullptr); } TEST_F(HostPtrManagerTest, GivenFragmentSizeNonZeroWhenGettingFragmentThenCorrectAllocationIsReturned) { MockHostPtrManager hostPtrManager; uint32_t rootDeviceIndex2 = 2u; auto size1 = MemoryConstants::pageSize; auto ptr11 = (void *)0x010000; FragmentStorage fragment11; fragment11.fragmentCpuPointer = ptr11; fragment11.fragmentSize = size1; hostPtrManager.storeFragment(rootDeviceIndex, fragment11); auto ptr12 = (void *)0x020000; FragmentStorage fragment12; fragment12.fragmentCpuPointer = ptr12; fragment12.fragmentSize = size1; hostPtrManager.storeFragment(rootDeviceIndex2, fragment12); auto ptr21 = (void *)0x040000; FragmentStorage fragment21; fragment21.fragmentCpuPointer = ptr21; fragment21.fragmentSize = size1; hostPtrManager.storeFragment(rootDeviceIndex, fragment21); auto ptr22 = (void *)0x060000; FragmentStorage fragment22; fragment22.fragmentCpuPointer = ptr22; fragment22.fragmentSize = size1; hostPtrManager.storeFragment(rootDeviceIndex2, fragment22); auto cptr1 = (void *)0x010060; auto frag11 = hostPtrManager.getFragment({cptr1, rootDeviceIndex}); EXPECT_NE(frag11, nullptr); auto frag12 = hostPtrManager.getFragment({cptr1, rootDeviceIndex2}); EXPECT_EQ(frag12, nullptr); auto cptr2 = (void *)0x020000; auto frag21 = hostPtrManager.getFragment({cptr2, rootDeviceIndex}); EXPECT_EQ(frag21, nullptr); auto frag22 = hostPtrManager.getFragment({cptr2, rootDeviceIndex2}); EXPECT_NE(frag22, nullptr); auto cptr3 = (void *)0x040060; auto frag31 = hostPtrManager.getFragment({cptr3, rootDeviceIndex}); EXPECT_NE(frag31, nullptr); auto frag32 = hostPtrManager.getFragment({cptr3, rootDeviceIndex2}); EXPECT_EQ(frag32, nullptr); auto cptr4 = (void *)0x060000; auto frag41 = hostPtrManager.getFragment({cptr4, rootDeviceIndex}); EXPECT_EQ(frag41, nullptr); auto frag42 = hostPtrManager.getFragment({cptr4, rootDeviceIndex2}); EXPECT_NE(frag42, nullptr); AllocationRequirements requiredAllocations; requiredAllocations.rootDeviceIndex = rootDeviceIndex; auto ptr3 = (void *)0x040000; auto size3 = MemoryConstants::pageSize * 2; requiredAllocations = hostPtrManager.getAllocationRequirements(rootDeviceIndex, ptr3, size3); auto catchme = false; try { OsHandleStorage st = hostPtrManager.populateAlreadyAllocatedFragments(requiredAllocations); EXPECT_EQ(st.fragmentCount, 0u); } catch (...) { catchme = true; } EXPECT_TRUE(catchme); } TEST_F(HostPtrManagerTest, WhenCheckingForOverlapsThenCorrectStatusIsReturned) { MockHostPtrManager hostPtrManager; uint32_t rootDeviceIndex2 = 2u; auto size1 = MemoryConstants::pageSize; auto ptr11 = (void *)0x010000; FragmentStorage fragment11; fragment11.fragmentCpuPointer = ptr11; fragment11.fragmentSize = size1; hostPtrManager.storeFragment(rootDeviceIndex, fragment11); auto ptr12 = (void *)0x020000; FragmentStorage fragment12; fragment12.fragmentCpuPointer = ptr12; fragment12.fragmentSize = size1; hostPtrManager.storeFragment(rootDeviceIndex2, fragment12); auto ptr21 = (void *)0x040000; FragmentStorage fragment21; fragment21.fragmentCpuPointer = ptr21; fragment21.fragmentSize = size1; hostPtrManager.storeFragment(rootDeviceIndex, fragment21); auto ptr22 = (void *)0x060000; FragmentStorage fragment22; fragment22.fragmentCpuPointer = ptr22; fragment22.fragmentSize = size1; hostPtrManager.storeFragment(rootDeviceIndex2, fragment22); OverlapStatus overlappingStatus; auto cptr1 = (void *)0x010060; auto frag1 = hostPtrManager.getFragmentAndCheckForOverlaps(rootDeviceIndex, cptr1, 1u, overlappingStatus); EXPECT_NE(frag1, nullptr); EXPECT_EQ(overlappingStatus, OverlapStatus::FRAGMENT_WITHIN_STORED_FRAGMENT); frag1 = hostPtrManager.getFragmentAndCheckForOverlaps(rootDeviceIndex, ptr11, size1, overlappingStatus); EXPECT_NE(frag1, nullptr); EXPECT_EQ(overlappingStatus, OverlapStatus::FRAGMENT_WITH_EXACT_SIZE_AS_STORED_FRAGMENT); frag1 = hostPtrManager.getFragmentAndCheckForOverlaps(rootDeviceIndex, ptr11, size1 - 1, overlappingStatus); EXPECT_NE(frag1, nullptr); EXPECT_EQ(overlappingStatus, OverlapStatus::FRAGMENT_WITHIN_STORED_FRAGMENT); frag1 = hostPtrManager.getFragmentAndCheckForOverlaps(rootDeviceIndex2, ptr11, size1, overlappingStatus); EXPECT_EQ(frag1, nullptr); EXPECT_EQ(overlappingStatus, OverlapStatus::FRAGMENT_NOT_OVERLAPING_WITH_ANY_OTHER); auto cptr2 = (void *)0x020000; auto frag2 = hostPtrManager.getFragmentAndCheckForOverlaps(rootDeviceIndex, cptr2, 1u, overlappingStatus); EXPECT_EQ(frag2, nullptr); EXPECT_EQ(overlappingStatus, OverlapStatus::FRAGMENT_NOT_OVERLAPING_WITH_ANY_OTHER); auto cptr3 = (void *)0x040060; auto frag3 = hostPtrManager.getFragmentAndCheckForOverlaps(rootDeviceIndex, cptr3, 1u, overlappingStatus); EXPECT_NE(frag3, nullptr); EXPECT_EQ(overlappingStatus, OverlapStatus::FRAGMENT_WITHIN_STORED_FRAGMENT); auto cptr4 = (void *)0x060000; auto frag4 = hostPtrManager.getFragmentAndCheckForOverlaps(rootDeviceIndex, cptr4, 1u, overlappingStatus); EXPECT_EQ(frag4, nullptr); EXPECT_EQ(overlappingStatus, OverlapStatus::FRAGMENT_NOT_OVERLAPING_WITH_ANY_OTHER); auto cptr5 = (void *)0x040000; auto frag5 = hostPtrManager.getFragmentAndCheckForOverlaps(rootDeviceIndex, cptr5, size1 - 1, overlappingStatus); EXPECT_NE(frag5, nullptr); EXPECT_EQ(overlappingStatus, OverlapStatus::FRAGMENT_WITHIN_STORED_FRAGMENT); auto cptr6 = (void *)0x040000; auto frag6 = hostPtrManager.getFragmentAndCheckForOverlaps(rootDeviceIndex, cptr6, size1 + 1, overlappingStatus); EXPECT_EQ(frag6, nullptr); EXPECT_EQ(overlappingStatus, OverlapStatus::FRAGMENT_OVERLAPING_AND_BIGGER_THEN_STORED_FRAGMENT); auto cptr7 = (void *)0x03FFF0; auto frag7 = hostPtrManager.getFragmentAndCheckForOverlaps(rootDeviceIndex, cptr7, 2 * size1, overlappingStatus); EXPECT_EQ(frag7, nullptr); EXPECT_EQ(overlappingStatus, OverlapStatus::FRAGMENT_OVERLAPING_AND_BIGGER_THEN_STORED_FRAGMENT); auto cptr8 = (void *)0x040000; auto frag8 = hostPtrManager.getFragmentAndCheckForOverlaps(rootDeviceIndex, cptr8, size1, overlappingStatus); EXPECT_NE(frag8, nullptr); EXPECT_EQ(overlappingStatus, OverlapStatus::FRAGMENT_WITH_EXACT_SIZE_AS_STORED_FRAGMENT); auto cptr9 = (void *)0x010060; auto frag9 = hostPtrManager.getFragmentAndCheckForOverlaps(rootDeviceIndex, cptr9, 2 * size1, overlappingStatus); EXPECT_EQ(frag9, nullptr); EXPECT_EQ(overlappingStatus, OverlapStatus::FRAGMENT_OVERLAPING_AND_BIGGER_THEN_STORED_FRAGMENT); } TEST_F(HostPtrManagerTest, GivenHostPtrManagerFilledWithBigFragmentWhenAskedForFragmnetInTheMiddleOfBigFragmentThenBigFragmentIsReturned) { auto bigSize = 10 * MemoryConstants::pageSize; auto bigPtr = (void *)0x01000; FragmentStorage fragment; fragment.fragmentCpuPointer = bigPtr; fragment.fragmentSize = bigSize; MockHostPtrManager hostPtrManager; hostPtrManager.storeFragment(rootDeviceIndex, fragment); EXPECT_EQ(1u, hostPtrManager.getFragmentCount()); auto ptrInTheMiddle = (void *)0x2000; auto smallSize = MemoryConstants::pageSize; auto storedBigFragment = hostPtrManager.getFragment({bigPtr, rootDeviceIndex}); auto fragment2 = hostPtrManager.getFragment({ptrInTheMiddle, rootDeviceIndex}); EXPECT_EQ(storedBigFragment, fragment2); OverlapStatus overlapStatus; auto fragment3 = hostPtrManager.getFragmentAndCheckForOverlaps(rootDeviceIndex, ptrInTheMiddle, smallSize, overlapStatus); EXPECT_EQ(OverlapStatus::FRAGMENT_WITHIN_STORED_FRAGMENT, overlapStatus); EXPECT_EQ(fragment3, storedBigFragment); auto ptrOutside = (void *)0x1000000; auto outsideSize = 1; auto perfectMatchFragment = hostPtrManager.getFragmentAndCheckForOverlaps(rootDeviceIndex, bigPtr, bigSize, overlapStatus); EXPECT_EQ(OverlapStatus::FRAGMENT_WITH_EXACT_SIZE_AS_STORED_FRAGMENT, overlapStatus); EXPECT_EQ(perfectMatchFragment, storedBigFragment); auto oustideFragment = hostPtrManager.getFragmentAndCheckForOverlaps(rootDeviceIndex, ptrOutside, outsideSize, overlapStatus); EXPECT_EQ(OverlapStatus::FRAGMENT_NOT_OVERLAPING_WITH_ANY_OTHER, overlapStatus); EXPECT_EQ(nullptr, oustideFragment); //partialOverlap auto ptrPartial = (void *)(((uintptr_t)bigPtr + bigSize) - 100); auto partialBigSize = MemoryConstants::pageSize * 100; auto partialFragment = hostPtrManager.getFragmentAndCheckForOverlaps(rootDeviceIndex, ptrPartial, partialBigSize, overlapStatus); EXPECT_EQ(nullptr, partialFragment); EXPECT_EQ(OverlapStatus::FRAGMENT_OVERLAPING_AND_BIGGER_THEN_STORED_FRAGMENT, overlapStatus); } TEST_F(HostPtrManagerTest, GivenHostPtrManagerFilledWithFragmentsWhenCheckedForOverlappingThenProperOverlappingStatusIsReturned) { auto bigPtr = (void *)0x04000; auto bigSize = 10 * MemoryConstants::pageSize; FragmentStorage fragment; fragment.fragmentCpuPointer = bigPtr; fragment.fragmentSize = bigSize; MockHostPtrManager hostPtrManager; hostPtrManager.storeFragment(rootDeviceIndex, fragment); EXPECT_EQ(1u, hostPtrManager.getFragmentCount()); auto ptrNonOverlapingPriorToBigPtr = (void *)0x2000; auto smallSize = MemoryConstants::pageSize; OverlapStatus overlapStatus; auto fragment2 = hostPtrManager.getFragmentAndCheckForOverlaps(rootDeviceIndex, ptrNonOverlapingPriorToBigPtr, smallSize, overlapStatus); EXPECT_EQ(OverlapStatus::FRAGMENT_NOT_OVERLAPING_WITH_ANY_OTHER, overlapStatus); EXPECT_EQ(nullptr, fragment2); auto ptrNonOverlapingPriorToBigPtrByPage = (void *)0x3000; auto checkMatch = (uintptr_t)ptrNonOverlapingPriorToBigPtrByPage + smallSize; EXPECT_EQ(checkMatch, (uintptr_t)bigPtr); auto fragment3 = hostPtrManager.getFragmentAndCheckForOverlaps(rootDeviceIndex, ptrNonOverlapingPriorToBigPtrByPage, smallSize, overlapStatus); EXPECT_EQ(OverlapStatus::FRAGMENT_NOT_OVERLAPING_WITH_ANY_OTHER, overlapStatus); EXPECT_EQ(nullptr, fragment3); auto fragment4 = hostPtrManager.getFragmentAndCheckForOverlaps(rootDeviceIndex, ptrNonOverlapingPriorToBigPtrByPage, smallSize + 1, overlapStatus); EXPECT_EQ(OverlapStatus::FRAGMENT_OVERLAPING_AND_BIGGER_THEN_STORED_FRAGMENT, overlapStatus); EXPECT_EQ(nullptr, fragment4); } TEST_F(HostPtrManagerTest, GivenEmptyHostPtrManagerWhenAskedForOverlapingThenNoOverlappingIsReturned) { MockHostPtrManager hostPtrManager; auto bigPtr = (void *)0x04000; auto bigSize = 10 * MemoryConstants::pageSize; OverlapStatus overlapStatus; auto fragment3 = hostPtrManager.getFragmentAndCheckForOverlaps(rootDeviceIndex, bigPtr, bigSize, overlapStatus); EXPECT_EQ(OverlapStatus::FRAGMENT_NOT_OVERLAPING_WITH_ANY_OTHER, overlapStatus); EXPECT_EQ(nullptr, fragment3); } TEST_F(HostPtrManagerTest, GivenHostPtrManagerFilledWithFragmentsWhenAskedForOverlpaingThenProperStatusIsReturned) { auto bigPtr1 = (void *)0x01000; auto bigPtr2 = (void *)0x03000; auto bigSize = MemoryConstants::pageSize; FragmentStorage fragment; fragment.fragmentCpuPointer = bigPtr1; fragment.fragmentSize = bigSize; MockHostPtrManager hostPtrManager; hostPtrManager.storeFragment(rootDeviceIndex, fragment); fragment.fragmentCpuPointer = bigPtr2; hostPtrManager.storeFragment(rootDeviceIndex, fragment); EXPECT_EQ(2u, hostPtrManager.getFragmentCount()); auto ptrNonOverlapingInTheMiddleOfBigPtrs = (void *)0x2000; auto ptrNonOverlapingAfterBigPtr = (void *)0x4000; auto ptrNonOverlapingBeforeBigPtr = (void *)0; OverlapStatus overlapStatus; auto fragment1 = hostPtrManager.getFragmentAndCheckForOverlaps(rootDeviceIndex, ptrNonOverlapingInTheMiddleOfBigPtrs, bigSize, overlapStatus); EXPECT_EQ(OverlapStatus::FRAGMENT_NOT_OVERLAPING_WITH_ANY_OTHER, overlapStatus); EXPECT_EQ(nullptr, fragment1); auto fragment2 = hostPtrManager.getFragmentAndCheckForOverlaps(rootDeviceIndex, ptrNonOverlapingInTheMiddleOfBigPtrs, bigSize * 5, overlapStatus); EXPECT_EQ(OverlapStatus::FRAGMENT_OVERLAPING_AND_BIGGER_THEN_STORED_FRAGMENT, overlapStatus); EXPECT_EQ(nullptr, fragment2); auto fragment3 = hostPtrManager.getFragmentAndCheckForOverlaps(rootDeviceIndex, ptrNonOverlapingAfterBigPtr, bigSize * 5, overlapStatus); EXPECT_EQ(OverlapStatus::FRAGMENT_NOT_OVERLAPING_WITH_ANY_OTHER, overlapStatus); EXPECT_EQ(nullptr, fragment3); auto fragment4 = hostPtrManager.getFragmentAndCheckForOverlaps(rootDeviceIndex, ptrNonOverlapingBeforeBigPtr, bigSize, overlapStatus); EXPECT_EQ(OverlapStatus::FRAGMENT_NOT_OVERLAPING_WITH_ANY_OTHER, overlapStatus); EXPECT_EQ(nullptr, fragment4); } TEST_F(HostPtrManagerTest, GivenHostPtrManagerFilledWithFragmentsWhenAskedForOverlapingThenProperOverlapingStatusIsReturned) { auto bigPtr1 = (void *)0x10000; auto bigPtr2 = (void *)0x03000; auto bigPtr3 = (void *)0x11000; auto bigSize1 = MemoryConstants::pageSize; auto bigSize2 = MemoryConstants::pageSize * 4; auto bigSize3 = MemoryConstants::pageSize * 10; FragmentStorage fragment; fragment.fragmentCpuPointer = bigPtr1; fragment.fragmentSize = bigSize1; MockHostPtrManager hostPtrManager; hostPtrManager.storeFragment(rootDeviceIndex, fragment); fragment.fragmentCpuPointer = bigPtr2; fragment.fragmentSize = bigSize2; hostPtrManager.storeFragment(rootDeviceIndex, fragment); fragment.fragmentCpuPointer = bigPtr3; fragment.fragmentSize = bigSize3; hostPtrManager.storeFragment(rootDeviceIndex, fragment); EXPECT_EQ(3u, hostPtrManager.getFragmentCount()); OverlapStatus overlapStatus; auto fragment1 = hostPtrManager.getFragmentAndCheckForOverlaps(rootDeviceIndex, bigPtr1, bigSize1 + 1, overlapStatus); EXPECT_EQ(OverlapStatus::FRAGMENT_OVERLAPING_AND_BIGGER_THEN_STORED_FRAGMENT, overlapStatus); EXPECT_EQ(nullptr, fragment1); auto priorToBig1 = (void *)0x9999; auto fragment2 = hostPtrManager.getFragmentAndCheckForOverlaps(rootDeviceIndex, priorToBig1, 1, overlapStatus); EXPECT_EQ(OverlapStatus::FRAGMENT_NOT_OVERLAPING_WITH_ANY_OTHER, overlapStatus); EXPECT_EQ(nullptr, fragment2); auto middleOfBig3 = (void *)0x11111; auto fragment3 = hostPtrManager.getFragmentAndCheckForOverlaps(rootDeviceIndex, middleOfBig3, 1, overlapStatus); EXPECT_EQ(OverlapStatus::FRAGMENT_WITHIN_STORED_FRAGMENT, overlapStatus); EXPECT_NE(nullptr, fragment3); } using HostPtrAllocationTest = Test; TEST_F(HostPtrAllocationTest, givenTwoAllocationsThatSharesOneFragmentWhenOneIsDestroyedThenFragmentRemains) { void *cpuPtr1 = reinterpret_cast(0x100001); void *cpuPtr2 = ptrOffset(cpuPtr1, MemoryConstants::pageSize); auto hostPtrManager = static_cast(memoryManager->getHostPtrManager()); auto graphicsAllocation1 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, 2 * MemoryConstants::pageSize - 1, csr->getOsContext().getDeviceBitfield()}, cpuPtr1); EXPECT_EQ(2u, hostPtrManager->getFragmentCount()); auto graphicsAllocation2 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, MemoryConstants::pageSize, csr->getOsContext().getDeviceBitfield()}, cpuPtr2); EXPECT_EQ(3u, hostPtrManager->getFragmentCount()); memoryManager->freeGraphicsMemory(graphicsAllocation1); EXPECT_EQ(2u, hostPtrManager->getFragmentCount()); memoryManager->freeGraphicsMemory(graphicsAllocation2); EXPECT_EQ(0u, hostPtrManager->getFragmentCount()); } TEST_F(HostPtrAllocationTest, whenPrepareOsHandlesForAllocationThenPopulateAsManyFragmentsAsRequired) { auto hostPtrManager = static_cast(memoryManager->getHostPtrManager()); void *cpuPtr = reinterpret_cast(0x100001); size_t allocationSize = MemoryConstants::pageSize / 2; for (uint32_t expectedFragmentCount = 1; expectedFragmentCount <= 3; expectedFragmentCount++, allocationSize += MemoryConstants::pageSize) { auto requirements = hostPtrManager->getAllocationRequirements(csr->getRootDeviceIndex(), cpuPtr, allocationSize); EXPECT_EQ(expectedFragmentCount, requirements.requiredFragmentsCount); auto osStorage = hostPtrManager->prepareOsStorageForAllocation(*memoryManager, allocationSize, cpuPtr, 0); EXPECT_EQ(expectedFragmentCount, osStorage.fragmentCount); EXPECT_EQ(expectedFragmentCount, hostPtrManager->getFragmentCount()); hostPtrManager->releaseHandleStorage(csr->getRootDeviceIndex(), osStorage); memoryManager->cleanOsHandles(osStorage, 0); EXPECT_EQ(0u, hostPtrManager->getFragmentCount()); } } TEST_F(HostPtrAllocationTest, whenOverlappedFragmentIsBiggerThenStoredAndStoredFragmentIsDestroyedDuringSecondCleaningThenCheckForOverlappingReturnsSuccess) { void *cpuPtr1 = (void *)0x100004; auto hostPtrManager = static_cast(memoryManager->getHostPtrManager()); auto graphicsAllocation1 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, MemoryConstants::pageSize, csr->getOsContext().getDeviceBitfield()}, cpuPtr1); EXPECT_EQ(2u, hostPtrManager->getFragmentCount()); EXPECT_NE(nullptr, graphicsAllocation1); auto fragment1 = hostPtrManager->getFragment({alignDown(cpuPtr1, MemoryConstants::pageSize), csr->getRootDeviceIndex()}); EXPECT_NE(nullptr, fragment1); auto fragment2 = hostPtrManager->getFragment({alignUp(cpuPtr1, MemoryConstants::pageSize), csr->getRootDeviceIndex()}); EXPECT_NE(nullptr, fragment2); uint32_t taskCountReady = 2; auto storage = new MockInternalAllocationStorage(*csr); csr->internalAllocationStorage.reset(storage); storage->storeAllocationWithTaskCount(std::unique_ptr(graphicsAllocation1), TEMPORARY_ALLOCATION, taskCountReady); storage->updateCompletionAfterCleaningList(taskCountReady); // All fragments ready for release currentGpuTag = 1; csr->latestSentTaskCount = taskCountReady - 1; AllocationRequirements requirements; requirements.requiredFragmentsCount = 1; requirements.totalRequiredSize = MemoryConstants::pageSize * 10; requirements.allocationFragments[0].allocationPtr = alignDown(cpuPtr1, MemoryConstants::pageSize); requirements.allocationFragments[0].allocationSize = MemoryConstants::pageSize * 10; requirements.allocationFragments[0].fragmentPosition = FragmentPosition::NONE; RequirementsStatus status = hostPtrManager->checkAllocationsForOverlapping(*memoryManager, &requirements); EXPECT_EQ(RequirementsStatus::SUCCESS, status); } HWTEST_F(HostPtrAllocationTest, givenOverlappingFragmentsWhenCheckIsCalledThenWaitAndCleanOnAllEngines) { uint32_t taskCountReady = 2; uint32_t taskCountNotReady = 1; auto &engines = memoryManager->getRegisteredEngines(); EXPECT_EQ(1u, engines.size()); auto csr0 = static_cast(engines[0].commandStreamReceiver); auto csr1 = std::make_unique(executionEnvironment, 0, 1); uint32_t csr0GpuTag = taskCountNotReady; uint32_t csr1GpuTag = taskCountNotReady; csr0->tagAddress = &csr0GpuTag; csr1->tagAddress = &csr1GpuTag; auto osContext = memoryManager->createAndRegisterOsContext(csr1.get(), EngineDescriptorHelper::getDefaultDescriptor({aub_stream::EngineType::ENGINE_RCS, EngineUsage::LowPriority})); csr1->setupContext(*osContext); void *cpuPtr = reinterpret_cast(0x100004); auto hostPtrManager = static_cast(memoryManager->getHostPtrManager()); auto graphicsAllocation0 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, MemoryConstants::pageSize, csr->getOsContext().getDeviceBitfield()}, cpuPtr); auto graphicsAllocation1 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, MemoryConstants::pageSize, csr->getOsContext().getDeviceBitfield()}, cpuPtr); auto storage0 = new MockInternalAllocationStorage(*csr0); auto storage1 = new MockInternalAllocationStorage(*csr1); csr0->internalAllocationStorage.reset(storage0); storage0->storeAllocationWithTaskCount(std::unique_ptr(graphicsAllocation0), TEMPORARY_ALLOCATION, taskCountReady); storage0->updateCompletionAfterCleaningList(taskCountReady); csr1->internalAllocationStorage.reset(storage1); storage1->storeAllocationWithTaskCount(std::unique_ptr(graphicsAllocation1), TEMPORARY_ALLOCATION, taskCountReady); storage1->updateCompletionAfterCleaningList(taskCountReady); csr0->setLatestSentTaskCount(taskCountNotReady); csr1->setLatestSentTaskCount(taskCountNotReady); AllocationRequirements requirements; requirements.requiredFragmentsCount = 1; requirements.totalRequiredSize = MemoryConstants::pageSize * 10; requirements.allocationFragments[0].allocationPtr = alignDown(cpuPtr, MemoryConstants::pageSize); requirements.allocationFragments[0].allocationSize = MemoryConstants::pageSize * 10; requirements.allocationFragments[0].fragmentPosition = FragmentPosition::NONE; requirements.rootDeviceIndex = csr0->getRootDeviceIndex(); hostPtrManager->checkAllocationsForOverlapping(*memoryManager, &requirements); EXPECT_EQ(1u, csr0->waitForCompletionWithTimeoutCalled); EXPECT_EQ(1u, csr1->waitForCompletionWithTimeoutCalled); EXPECT_EQ(2u, storage0->cleanAllocationsCalled); EXPECT_EQ(2u, storage0->lastCleanAllocationsTaskCount); EXPECT_EQ(2u, storage1->cleanAllocationsCalled); EXPECT_EQ(2u, storage1->lastCleanAllocationsTaskCount); } TEST_F(HostPtrAllocationTest, whenOverlappedFragmentIsBiggerThenStoredAndStoredFragmentCannotBeDestroyedThenCheckForOverlappingReturnsError) { void *cpuPtr1 = (void *)0x100004; auto hostPtrManager = static_cast(memoryManager->getHostPtrManager()); auto graphicsAllocation1 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, MemoryConstants::pageSize, csr->getOsContext().getDeviceBitfield()}, cpuPtr1); EXPECT_EQ(2u, hostPtrManager->getFragmentCount()); EXPECT_NE(nullptr, graphicsAllocation1); auto fragment1 = hostPtrManager->getFragment({alignDown(cpuPtr1, MemoryConstants::pageSize), csr->getRootDeviceIndex()}); EXPECT_NE(nullptr, fragment1); auto fragment2 = hostPtrManager->getFragment({alignUp(cpuPtr1, MemoryConstants::pageSize), csr->getRootDeviceIndex()}); EXPECT_NE(nullptr, fragment2); uint32_t taskCountReady = 2; auto storage = csr->getInternalAllocationStorage(); storage->storeAllocationWithTaskCount(std::unique_ptr(graphicsAllocation1), TEMPORARY_ALLOCATION, taskCountReady); // All fragments ready for release currentGpuTag = taskCountReady - 1; csr->latestSentTaskCount = taskCountReady - 1; AllocationRequirements requirements; requirements.requiredFragmentsCount = 1; requirements.totalRequiredSize = MemoryConstants::pageSize * 10; requirements.allocationFragments[0].allocationPtr = alignDown(cpuPtr1, MemoryConstants::pageSize); requirements.allocationFragments[0].allocationSize = MemoryConstants::pageSize * 10; requirements.allocationFragments[0].fragmentPosition = FragmentPosition::NONE; requirements.rootDeviceIndex = csr->getRootDeviceIndex(); RequirementsStatus status = hostPtrManager->checkAllocationsForOverlapping(*memoryManager, &requirements); EXPECT_EQ(RequirementsStatus::FATAL, status); } TEST_F(HostPtrAllocationTest, GivenAllocationsWithoutBiggerOverlapWhenChckingForOverlappingThenSuccessIsReturned) { void *cpuPtr1 = (void *)0x100004; void *cpuPtr2 = (void *)0x101008; auto hostPtrManager = static_cast(memoryManager->getHostPtrManager()); auto graphicsAllocation1 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, MemoryConstants::pageSize, csr->getOsContext().getDeviceBitfield()}, cpuPtr1); EXPECT_EQ(2u, hostPtrManager->getFragmentCount()); auto graphicsAllocation2 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, MemoryConstants::pageSize * 3, csr->getOsContext().getDeviceBitfield()}, cpuPtr2); EXPECT_EQ(4u, hostPtrManager->getFragmentCount()); EXPECT_NE(nullptr, graphicsAllocation1); EXPECT_NE(nullptr, graphicsAllocation2); auto fragment1 = hostPtrManager->getFragment({alignDown(cpuPtr1, MemoryConstants::pageSize), csr->getRootDeviceIndex()}); EXPECT_NE(nullptr, fragment1); auto fragment2 = hostPtrManager->getFragment({alignUp(cpuPtr1, MemoryConstants::pageSize), csr->getRootDeviceIndex()}); EXPECT_NE(nullptr, fragment2); auto fragment3 = hostPtrManager->getFragment({alignDown(cpuPtr2, MemoryConstants::pageSize), csr->getRootDeviceIndex()}); EXPECT_NE(nullptr, fragment3); auto fragment4 = hostPtrManager->getFragment({alignUp(cpuPtr2, MemoryConstants::pageSize), csr->getRootDeviceIndex()}); EXPECT_NE(nullptr, fragment4); AllocationRequirements requirements; requirements.requiredFragmentsCount = 2; requirements.totalRequiredSize = MemoryConstants::pageSize * 2; requirements.allocationFragments[0].allocationPtr = alignDown(cpuPtr1, MemoryConstants::pageSize); requirements.allocationFragments[0].allocationSize = MemoryConstants::pageSize; requirements.allocationFragments[0].fragmentPosition = FragmentPosition::LEADING; requirements.allocationFragments[1].allocationPtr = alignUp(cpuPtr1, MemoryConstants::pageSize); requirements.allocationFragments[1].allocationSize = MemoryConstants::pageSize; requirements.allocationFragments[1].fragmentPosition = FragmentPosition::TRAILING; RequirementsStatus status = hostPtrManager->checkAllocationsForOverlapping(*memoryManager, &requirements); EXPECT_EQ(RequirementsStatus::SUCCESS, status); memoryManager->freeGraphicsMemory(graphicsAllocation1); memoryManager->freeGraphicsMemory(graphicsAllocation2); } TEST_F(HostPtrAllocationTest, GivenAllocationsWithBiggerOverlapWhenChckingForOverlappingThenSuccessIsReturned) { void *cpuPtr1 = (void *)0x100004; auto graphicsAllocation1 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, MemoryConstants::pageSize, csr->getOsContext().getDeviceBitfield()}, cpuPtr1); auto hostPtrManager = static_cast(memoryManager->getHostPtrManager()); EXPECT_EQ(2u, hostPtrManager->getFragmentCount()); EXPECT_NE(nullptr, graphicsAllocation1); auto fragment1 = hostPtrManager->getFragment({alignDown(cpuPtr1, MemoryConstants::pageSize), csr->getRootDeviceIndex()}); EXPECT_NE(nullptr, fragment1); auto fragment2 = hostPtrManager->getFragment({alignUp(cpuPtr1, MemoryConstants::pageSize), csr->getRootDeviceIndex()}); EXPECT_NE(nullptr, fragment2); uint32_t taskCountReady = 1; auto storage = csr->getInternalAllocationStorage(); storage->storeAllocationWithTaskCount(std::unique_ptr(graphicsAllocation1), TEMPORARY_ALLOCATION, taskCountReady); // All fragments ready for release taskCount = taskCountReady; csr->latestSentTaskCount = taskCountReady; AllocationRequirements requirements; requirements.requiredFragmentsCount = 1; requirements.totalRequiredSize = MemoryConstants::pageSize * 10; requirements.allocationFragments[0].allocationPtr = alignDown(cpuPtr1, MemoryConstants::pageSize); requirements.allocationFragments[0].allocationSize = MemoryConstants::pageSize * 10; requirements.allocationFragments[0].fragmentPosition = FragmentPosition::NONE; RequirementsStatus status = hostPtrManager->checkAllocationsForOverlapping(*memoryManager, &requirements); EXPECT_EQ(RequirementsStatus::SUCCESS, status); } TEST(HostPtrEntryKeyTest, givenTwoHostPtrEntryKeysWhenComparingThemThenKeyWithLowerRootDeviceIndexIsLower) { auto hostPtr0 = reinterpret_cast(0x100); auto hostPtr1 = reinterpret_cast(0x200); auto hostPtr2 = reinterpret_cast(0x300); HostPtrEntryKey key0{hostPtr1, 0u}; HostPtrEntryKey key1{hostPtr1, 1u}; EXPECT_TRUE(key0 < key1); EXPECT_FALSE(key1 < key0); key0.ptr = hostPtr0; EXPECT_TRUE(key0 < key1); EXPECT_FALSE(key1 < key0); key0.ptr = hostPtr2; EXPECT_TRUE(key0 < key1); EXPECT_FALSE(key1 < key0); } TEST(HostPtrEntryKeyTest, givenTwoHostPtrEntryKeysWithSameRootDeviceIndexWhenComparingThemThenKeyWithLowerPtrIsLower) { auto hostPtr0 = reinterpret_cast(0x100); auto hostPtr1 = reinterpret_cast(0x200); HostPtrEntryKey key0{hostPtr0, 1u}; HostPtrEntryKey key1{hostPtr1, 1u}; EXPECT_TRUE(key0 < key1); EXPECT_FALSE(key1 < key0); } TEST(HostPtrEntryKeyTest, givenTwoSameHostPtrEntryKeysWithSameRootDeviceIndexWhenComparingThemThenTheyAreEqual) { auto hostPtr = reinterpret_cast(0x100); HostPtrEntryKey key0{hostPtr, 1u}; HostPtrEntryKey key1{hostPtr, 1u}; EXPECT_FALSE(key0 < key1); EXPECT_FALSE(key1 < key0); } internal_allocation_storage_tests.cpp000066400000000000000000000351271422164147700342550ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/memory_manager/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/common/fixtures/memory_allocator_fixture.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/utilities/containers_tests_helpers.h" struct InternalAllocationStorageTest : public MemoryAllocatorFixture, public ::testing::Test { using MemoryAllocatorFixture::TearDown; void SetUp() override { MemoryAllocatorFixture::SetUp(); storage = csr->getInternalAllocationStorage(); } InternalAllocationStorage *storage; }; TEST_F(InternalAllocationStorageTest, givenDebugFlagThatDisablesAllocationReuseWhenStoreReusableAllocationIsCalledThenAllocationIsReleased) { DebugManagerStateRestore stateRestorer; DebugManager.flags.DisableResourceRecycling.set(true); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); storage->storeAllocation(std::unique_ptr(allocation), REUSABLE_ALLOCATION); EXPECT_NE(allocation, csr->getAllocationsForReuse().peekHead()); EXPECT_TRUE(csr->getAllocationsForReuse().peekIsEmpty()); } TEST_F(InternalAllocationStorageTest, whenCleanAllocationListThenRemoveOnlyCompletedAllocations) { auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); auto allocation2 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); auto allocation3 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); allocation->updateTaskCount(10, csr->getOsContext().getContextId()); allocation2->updateTaskCount(5, csr->getOsContext().getContextId()); allocation3->updateTaskCount(15, csr->getOsContext().getContextId()); storage->storeAllocation(std::unique_ptr(allocation), TEMPORARY_ALLOCATION); storage->storeAllocation(std::unique_ptr(allocation2), TEMPORARY_ALLOCATION); storage->storeAllocation(std::unique_ptr(allocation3), TEMPORARY_ALLOCATION); //head point to alloc 2, tail points to alloc3 EXPECT_TRUE(csr->getTemporaryAllocations().peekContains(*allocation)); EXPECT_TRUE(csr->getTemporaryAllocations().peekContains(*allocation2)); EXPECT_TRUE(csr->getTemporaryAllocations().peekContains(*allocation3)); EXPECT_EQ(-1, verifyDListOrder(csr->getTemporaryAllocations().peekHead(), allocation, allocation2, allocation3)); //now remove element form the middle storage->cleanAllocationList(6, TEMPORARY_ALLOCATION); EXPECT_TRUE(csr->getTemporaryAllocations().peekContains(*allocation)); EXPECT_FALSE(csr->getTemporaryAllocations().peekContains(*allocation2)); EXPECT_TRUE(csr->getTemporaryAllocations().peekContains(*allocation3)); EXPECT_EQ(-1, verifyDListOrder(csr->getTemporaryAllocations().peekHead(), allocation, allocation3)); //now remove head storage->cleanAllocationList(11, TEMPORARY_ALLOCATION); EXPECT_FALSE(csr->getTemporaryAllocations().peekContains(*allocation)); EXPECT_FALSE(csr->getTemporaryAllocations().peekContains(*allocation2)); EXPECT_TRUE(csr->getTemporaryAllocations().peekContains(*allocation3)); //now remove tail storage->cleanAllocationList(16, TEMPORARY_ALLOCATION); EXPECT_TRUE(csr->getTemporaryAllocations().peekIsEmpty()); } TEST_F(InternalAllocationStorageTest, whenAllocationIsStoredAsReusableButIsStillUsedThenCannotBeObtained) { auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(AllocationProperties{0, MemoryConstants::pageSize, AllocationType::BUFFER, mockDeviceBitfield}); storage->storeAllocationWithTaskCount(std::unique_ptr(allocation), REUSABLE_ALLOCATION, 2u); auto *hwTag = csr->getTagAddress(); *hwTag = 1u; auto newAllocation = storage->obtainReusableAllocation(1, AllocationType::BUFFER); EXPECT_EQ(nullptr, newAllocation); storage->cleanAllocationList(2u, REUSABLE_ALLOCATION); } TEST_F(InternalAllocationStorageTest, whenAllocationIsStoredAsTemporaryAndIsStillUsedThenCanBeObtained) { auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(AllocationProperties{0, MemoryConstants::pageSize, AllocationType::BUFFER, mockDeviceBitfield}); storage->storeAllocationWithTaskCount(std::unique_ptr(allocation), TEMPORARY_ALLOCATION, 2u); auto *hwTag = csr->getTagAddress(); *hwTag = 1u; auto newAllocation = storage->obtainTemporaryAllocationWithPtr(1, allocation->getUnderlyingBuffer(), AllocationType::BUFFER); EXPECT_EQ(allocation, newAllocation.get()); EXPECT_TRUE(csr->getTemporaryAllocations().peekIsEmpty()); memoryManager->freeGraphicsMemory(newAllocation.release()); } TEST_F(InternalAllocationStorageTest, givenTemporaryAllocationWhenAllocationIsObtainedThenItsTaskCountIsSetToNotReady) { const uint32_t initialTaskCount = 37u; const uint32_t contextId = csr->getOsContext().getContextId(); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(AllocationProperties{0, MemoryConstants::pageSize, AllocationType::BUFFER, mockDeviceBitfield}); storage->storeAllocationWithTaskCount(std::unique_ptr(allocation), TEMPORARY_ALLOCATION, initialTaskCount); ASSERT_EQ(initialTaskCount, allocation->getTaskCount(contextId)); auto newAllocation = storage->obtainTemporaryAllocationWithPtr(1, allocation->getUnderlyingBuffer(), AllocationType::BUFFER); EXPECT_EQ(allocation, newAllocation.get()); EXPECT_EQ(CompletionStamp::notReady, allocation->getTaskCount(contextId)); memoryManager->freeGraphicsMemory(newAllocation.release()); } TEST_F(InternalAllocationStorageTest, whenObtainAllocationFromEmptyReuseListThenReturnNullptr) { auto allocation2 = storage->obtainReusableAllocation(1, AllocationType::BUFFER); EXPECT_EQ(nullptr, allocation2); } TEST_F(InternalAllocationStorageTest, whenCompletedAllocationIsStoredAsReusableAndThenCanBeObtained) { auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(AllocationProperties{0, MemoryConstants::pageSize, AllocationType::BUFFER, mockDeviceBitfield}); EXPECT_NE(nullptr, allocation); storage->storeAllocationWithTaskCount(std::unique_ptr(allocation), REUSABLE_ALLOCATION, 2u); EXPECT_FALSE(csr->getAllocationsForReuse().peekIsEmpty()); auto *hwTag = csr->getTagAddress(); *hwTag = 2u; auto reusedAllocation = storage->obtainReusableAllocation(1, AllocationType::BUFFER).release(); EXPECT_EQ(allocation, reusedAllocation); EXPECT_TRUE(csr->getAllocationsForReuse().peekIsEmpty()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(InternalAllocationStorageTest, whenNotUsedAllocationIsStoredAsReusableAndThenCanBeObtained) { auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(AllocationProperties{0, MemoryConstants::pageSize, AllocationType::BUFFER, mockDeviceBitfield}); EXPECT_NE(nullptr, allocation); EXPECT_FALSE(allocation->isUsed()); EXPECT_EQ(0u, csr->peekTaskCount()); *csr->getTagAddress() = 0; // initial hw tag for dll storage->storeAllocation(std::unique_ptr(allocation), REUSABLE_ALLOCATION); EXPECT_EQ(0u, allocation->getTaskCount(csr->getOsContext().getContextId())); EXPECT_FALSE(csr->getAllocationsForReuse().peekIsEmpty()); auto reusedAllocation = storage->obtainReusableAllocation(1, AllocationType::BUFFER).release(); EXPECT_EQ(allocation, reusedAllocation); EXPECT_TRUE(csr->getAllocationsForReuse().peekIsEmpty()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(InternalAllocationStorageTest, whenObtainAllocationFromMidlleOfReusableListThenItIsDetachedFromLinkedList) { auto &reusableAllocations = csr->getAllocationsForReuse(); EXPECT_TRUE(reusableAllocations.peekIsEmpty()); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(AllocationProperties{0, 1, AllocationType::BUFFER, mockDeviceBitfield}); auto allocation2 = memoryManager->allocateGraphicsMemoryWithProperties(AllocationProperties{0, 10000, AllocationType::BUFFER, mockDeviceBitfield}); auto allocation3 = memoryManager->allocateGraphicsMemoryWithProperties(AllocationProperties{0, 1, AllocationType::BUFFER, mockDeviceBitfield}); EXPECT_TRUE(reusableAllocations.peekIsEmpty()); EXPECT_EQ(nullptr, allocation2->next); EXPECT_EQ(nullptr, allocation2->prev); storage->storeAllocation(std::unique_ptr(allocation), REUSABLE_ALLOCATION); EXPECT_TRUE(reusableAllocations.peekContains(*allocation)); EXPECT_FALSE(reusableAllocations.peekContains(*allocation2)); EXPECT_FALSE(reusableAllocations.peekContains(*allocation3)); EXPECT_EQ(nullptr, allocation2->next); EXPECT_EQ(nullptr, allocation2->prev); storage->storeAllocation(std::unique_ptr(allocation2), REUSABLE_ALLOCATION); EXPECT_TRUE(reusableAllocations.peekContains(*allocation)); EXPECT_TRUE(reusableAllocations.peekContains(*allocation2)); EXPECT_FALSE(reusableAllocations.peekContains(*allocation3)); EXPECT_EQ(nullptr, allocation2->next); EXPECT_EQ(allocation, allocation2->prev); storage->storeAllocation(std::unique_ptr(allocation3), REUSABLE_ALLOCATION); EXPECT_TRUE(reusableAllocations.peekContains(*allocation)); EXPECT_TRUE(reusableAllocations.peekContains(*allocation2)); EXPECT_TRUE(reusableAllocations.peekContains(*allocation3)); EXPECT_EQ(allocation3, allocation2->next); EXPECT_EQ(allocation, allocation2->prev); auto reusableAllocation = storage->obtainReusableAllocation(10000, AllocationType::BUFFER).release(); EXPECT_EQ(reusableAllocation, allocation2); EXPECT_EQ(nullptr, allocation2->next); EXPECT_EQ(nullptr, allocation2->prev); EXPECT_EQ(nullptr, reusableAllocation->next); EXPECT_EQ(nullptr, reusableAllocation->prev); EXPECT_FALSE(reusableAllocations.peekContains(*reusableAllocation)); EXPECT_TRUE(reusableAllocations.peekContains(*allocation)); EXPECT_FALSE(reusableAllocations.peekContains(*allocation2)); EXPECT_TRUE(reusableAllocations.peekContains(*allocation3)); memoryManager->freeGraphicsMemory(allocation2); allocation->updateTaskCount(0u, csr->getOsContext().getContextId()); allocation3->updateTaskCount(0u, csr->getOsContext().getContextId()); } TEST_F(InternalAllocationStorageTest, givenAllocationWhenItIsPutOnReusableListWhenOtherAllocationTypeIsRequestedThenNullIsReturned) { EXPECT_TRUE(csr->getAllocationsForReuse().peekIsEmpty()); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(AllocationProperties{0, MemoryConstants::pageSize, AllocationType::BUFFER, mockDeviceBitfield}); storage->storeAllocation(std::unique_ptr(allocation), REUSABLE_ALLOCATION); EXPECT_FALSE(csr->getAllocationsForReuse().peekIsEmpty()); auto internalAllocation = storage->obtainReusableAllocation(1, AllocationType::INTERNAL_HEAP); EXPECT_EQ(nullptr, internalAllocation); } class WaitAtDeletionAllocation : public MockGraphicsAllocation { public: WaitAtDeletionAllocation(void *buffer, size_t sizeIn) : MockGraphicsAllocation(buffer, sizeIn) { inDestructor = false; } std::mutex mutex; std::atomic inDestructor; ~WaitAtDeletionAllocation() override { inDestructor = true; std::lock_guard lock(mutex); } }; TEST_F(InternalAllocationStorageTest, givenAllocationListWhenTwoThreadsCleanConcurrentlyThenBothThreadsCanAccessTheList) { auto allocation1 = new WaitAtDeletionAllocation(nullptr, 0); allocation1->updateTaskCount(1, csr->getOsContext().getContextId()); storage->storeAllocation(std::unique_ptr(allocation1), TEMPORARY_ALLOCATION); std::unique_lock allocationDeletionLock(allocation1->mutex); auto allocation2 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); allocation2->updateTaskCount(2, csr->getOsContext().getContextId()); storage->storeAllocation(std::unique_ptr(allocation2), TEMPORARY_ALLOCATION); std::mutex mutex; std::unique_lock lock(mutex); std::thread thread1([&] { storage->cleanAllocationList(1, TEMPORARY_ALLOCATION); }); std::thread thread2([&] { std::lock_guard lock(mutex); storage->cleanAllocationList(2, TEMPORARY_ALLOCATION); }); while (!allocation1->inDestructor) ; lock.unlock(); allocationDeletionLock.unlock(); thread1.join(); thread2.join(); EXPECT_TRUE(csr->getTemporaryAllocations().peekIsEmpty()); } HWTEST_F(InternalAllocationStorageTest, givenMultipleActivePartitionsWhenDetachingReusableAllocationThenCheckTaskCountFinishedOnAllTiles) { auto ultCsr = reinterpret_cast *>(csr); csr->setActivePartitions(2u); ultCsr->postSyncWriteOffset = 32; auto tagAddress = csr->getTagAddress(); *tagAddress = 0xFF; tagAddress = ptrOffset(tagAddress, 32); *tagAddress = 0x0; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); storage->storeAllocation(std::unique_ptr(allocation), REUSABLE_ALLOCATION); EXPECT_EQ(allocation, csr->getAllocationsForReuse().peekHead()); EXPECT_FALSE(csr->getAllocationsForReuse().peekIsEmpty()); allocation->updateTaskCount(1u, csr->getOsContext().getContextId()); std::unique_ptr allocationReusable = csr->getAllocationsForReuse().detachAllocation(0, nullptr, csr, AllocationType::INTERNAL_HOST_MEMORY); EXPECT_EQ(nullptr, allocationReusable.get()); *tagAddress = 0x1; allocationReusable = csr->getAllocationsForReuse().detachAllocation(0, nullptr, csr, AllocationType::INTERNAL_HOST_MEMORY); EXPECT_EQ(allocation, allocationReusable.get()); memoryManager->freeGraphicsMemory(allocationReusable.release()); } memory_manager_allocate_in_preferred_pool_tests.cpp000066400000000000000000001711251422164147700371320ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/memory_manager/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/memory_manager/os_agnostic_memory_manager.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/engine_descriptor_helper.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/mocks/mock_gmm.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/mocks/mock_os_context.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "opencl/source/mem_obj/mem_obj_helper.h" using namespace NEO; using MemoryManagerGetAlloctionData32BitAnd64kbPagesAllowedTest = testing::TestWithParam; using MemoryManagerGetAlloctionDataTests = ::testing::Test; TEST_F(MemoryManagerGetAlloctionDataTests, givenHostMemoryAllocationTypeAndAllocateMemoryFlagAndNullptrWhenAllocationDataIsQueriedThenCorrectFlagsAndSizeAreSet) { AllocationData allocData; AllocationProperties properties(mockRootDeviceIndex, true, 10, AllocationType::BUFFER_HOST_MEMORY, false, mockDeviceBitfield); MockMemoryManager mockMemoryManager; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.useSystemMemory); EXPECT_EQ(10u, allocData.size); EXPECT_EQ(nullptr, allocData.hostPtr); } TEST_F(MemoryManagerGetAlloctionDataTests, givenNonHostMemoryAllocatoinTypeWhenAllocationDataIsQueriedThenUseSystemMemoryFlagsIsNotSet) { AllocationData allocData; AllocationProperties properties(mockRootDeviceIndex, true, 10, AllocationType::BUFFER, false, mockDeviceBitfield); MockMemoryManager mockMemoryManager; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_FALSE(allocData.flags.useSystemMemory); EXPECT_EQ(10u, allocData.size); EXPECT_EQ(nullptr, allocData.hostPtr); } TEST_F(MemoryManagerGetAlloctionDataTests, givenForceSystemMemoryFlagWhenAllocationDataIsQueriedThenUseSystemMemoryFlagsIsSet) { auto firstAllocationIdx = static_cast(AllocationType::UNKNOWN); auto lastAllocationIdx = static_cast(AllocationType::COUNT); for (int allocationIdx = firstAllocationIdx + 1; allocationIdx != lastAllocationIdx; allocationIdx++) { AllocationData allocData; AllocationProperties properties(mockRootDeviceIndex, true, 10, static_cast(allocationIdx), false, mockDeviceBitfield); properties.flags.forceSystemMemory = true; MockMemoryManager mockMemoryManager; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.useSystemMemory); } } TEST_F(MemoryManagerGetAlloctionDataTests, givenMultiRootDeviceIndexAllocationPropertiesWhenAllocationDataIsQueriedThenUseSystemMemoryFlagsIsSet) { { AllocationData allocData; AllocationProperties properties(mockRootDeviceIndex, true, 10, AllocationType::BUFFER, false, mockDeviceBitfield); properties.flags.crossRootDeviceAccess = true; MockMemoryManager mockMemoryManager; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.useSystemMemory); EXPECT_TRUE(allocData.flags.crossRootDeviceAccess); } { AllocationData allocData; AllocationProperties properties(mockRootDeviceIndex, true, 10, AllocationType::IMAGE, false, mockDeviceBitfield); properties.flags.crossRootDeviceAccess = true; MockMemoryManager mockMemoryManager; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.useSystemMemory); EXPECT_TRUE(allocData.flags.crossRootDeviceAccess); } } TEST_F(MemoryManagerGetAlloctionDataTests, givenDisabledCrossRootDeviceAccsessFlagInAllocationPropertiesWhenAllocationDataIsQueriedThenUseSystemMemoryFlagsIsNotSet) { { AllocationData allocData; AllocationProperties properties(mockRootDeviceIndex, true, 10, AllocationType::BUFFER, false, mockDeviceBitfield); properties.flags.crossRootDeviceAccess = false; MockMemoryManager mockMemoryManager; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_FALSE(allocData.flags.useSystemMemory); EXPECT_FALSE(allocData.flags.crossRootDeviceAccess); } { AllocationData allocData; AllocationProperties properties(mockRootDeviceIndex, true, 10, AllocationType::IMAGE, false, mockDeviceBitfield); properties.flags.crossRootDeviceAccess = false; MockMemoryManager mockMemoryManager; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_FALSE(allocData.flags.useSystemMemory); EXPECT_FALSE(allocData.flags.crossRootDeviceAccess); } } HWTEST_F(MemoryManagerGetAlloctionDataTests, givenCommandBufferAllocationTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsRequested) { AllocationData allocData; AllocationProperties properties(mockRootDeviceIndex, true, 10, AllocationType::COMMAND_BUFFER, false, mockDeviceBitfield); MockMemoryManager mockMemoryManager; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.useSystemMemory); } TEST_F(MemoryManagerGetAlloctionDataTests, givenAllocateMemoryFlagTrueWhenHostPtrIsNotNullThenAllocationDataHasHostPtrNulled) { AllocationData allocData; char memory = 0; AllocationProperties properties(mockRootDeviceIndex, true, sizeof(memory), AllocationType::BUFFER, false, mockDeviceBitfield); MockMemoryManager mockMemoryManager; mockMemoryManager.getAllocationData(allocData, properties, &memory, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_EQ(sizeof(memory), allocData.size); EXPECT_EQ(nullptr, allocData.hostPtr); } TEST_F(MemoryManagerGetAlloctionDataTests, givenBufferTypeWhenAllocationDataIsQueriedThenForcePinFlagIsSet) { AllocationData allocData; AllocationProperties properties(mockRootDeviceIndex, true, 10, AllocationType::BUFFER, false, mockDeviceBitfield); MockMemoryManager mockMemoryManager; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.forcePin); } TEST_F(MemoryManagerGetAlloctionDataTests, givenBufferHostMemoryTypeWhenAllocationDataIsQueriedThenForcePinFlagIsSet) { AllocationData allocData; AllocationProperties properties(mockRootDeviceIndex, true, 10, AllocationType::BUFFER_HOST_MEMORY, false, mockDeviceBitfield); MockMemoryManager mockMemoryManager; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.forcePin); } TEST_F(MemoryManagerGetAlloctionDataTests, givenBufferCompressedTypeWhenAllocationDataIsQueriedThenForcePinFlagIsSet) { AllocationData allocData; AllocationProperties properties(mockRootDeviceIndex, true, 10, AllocationType::BUFFER, false, mockDeviceBitfield); properties.flags.preferCompressed = true; MockMemoryManager mockMemoryManager; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.forcePin); } TEST_F(MemoryManagerGetAlloctionDataTests, givenWriteCombinedTypeWhenAllocationDataIsQueriedThenForcePinFlagIsSet) { AllocationData allocData; AllocationProperties properties(mockRootDeviceIndex, true, 10, AllocationType::WRITE_COMBINED, false, mockDeviceBitfield); MockMemoryManager mockMemoryManager; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.forcePin); } TEST_F(MemoryManagerGetAlloctionDataTests, givenDefaultAllocationFlagsWhenAllocationDataIsQueriedThenAllocateMemoryIsFalse) { AllocationData allocData; AllocationProperties properties(mockRootDeviceIndex, false, 0, AllocationType::BUFFER, false, mockDeviceBitfield); properties.flags.preferCompressed = true; char memory; MockMemoryManager mockMemoryManager; mockMemoryManager.getAllocationData(allocData, properties, &memory, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_FALSE(allocData.flags.allocateMemory); } TEST_F(MemoryManagerGetAlloctionDataTests, givenDebugModeWhenCertainAllocationTypesAreSelectedThenSystemPlacementIsChoosen) { DebugManagerStateRestore restorer; auto allocationType = AllocationType::BUFFER; auto mask = 1llu << (static_cast(allocationType) - 1); DebugManager.flags.ForceSystemMemoryPlacement.set(mask); AllocationData allocData; AllocationProperties properties(mockRootDeviceIndex, 0, allocationType, mockDeviceBitfield); allocData.flags.useSystemMemory = false; MockMemoryManager::overrideAllocationData(allocData, properties); EXPECT_TRUE(allocData.flags.useSystemMemory); allocData.flags.useSystemMemory = false; allocationType = AllocationType::WRITE_COMBINED; mask |= 1llu << (static_cast(allocationType) - 1); DebugManager.flags.ForceSystemMemoryPlacement.set(mask); AllocationProperties properties2(mockRootDeviceIndex, 0, allocationType, mockDeviceBitfield); MockMemoryManager::overrideAllocationData(allocData, properties2); EXPECT_TRUE(allocData.flags.useSystemMemory); allocData.flags.useSystemMemory = false; MockMemoryManager::overrideAllocationData(allocData, properties); EXPECT_TRUE(allocData.flags.useSystemMemory); allocData.flags.useSystemMemory = false; allocationType = AllocationType::IMAGE; mask = 1llu << (static_cast(allocationType) - 1); DebugManager.flags.ForceSystemMemoryPlacement.set(mask); MockMemoryManager::overrideAllocationData(allocData, properties); EXPECT_FALSE(allocData.flags.useSystemMemory); } TEST_P(MemoryManagerGetAlloctionData32BitAnd64kbPagesAllowedTest, givenAllocationTypesWith32BitAnd64kbPagesAllowedWhenAllocationDataIsQueriedThenProperFlagsAreSet) { AllocationData allocData; auto allocType = GetParam(); AllocationProperties properties(mockRootDeviceIndex, 0, allocType, mockDeviceBitfield); MockMemoryManager mockMemoryManager; mockMemoryManager.mockExecutionEnvironment->initGmm(); mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.allow32Bit); EXPECT_TRUE(allocData.flags.allow64kbPages); EXPECT_EQ(allocType, allocData.type); } TEST_P(MemoryManagerGetAlloctionData32BitAnd64kbPagesAllowedTest, given64kbAllowedAllocationTypeWhenAllocatingThenPreferCompressionOnlyForSpecificTypes) { auto allocType = GetParam(); AllocationData allocData; AllocationProperties properties(mockRootDeviceIndex, 10, allocType, mockDeviceBitfield); bool bufferCompressedType = (allocType == AllocationType::BUFFER); properties.flags.preferCompressed = bufferCompressedType; MockMemoryManager mockMemoryManager(true, false); mockMemoryManager.mockExecutionEnvironment->initGmm(); mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.allow64kbPages); auto allocation = mockMemoryManager.allocateGraphicsMemory(allocData); EXPECT_TRUE(mockMemoryManager.allocation64kbPageCreated); EXPECT_EQ(mockMemoryManager.preferCompressedFlagPassed, bufferCompressedType); mockMemoryManager.freeGraphicsMemory(allocation); } using MemoryManagerGetAlloctionData32BitAnd64kbPagesNotAllowedTest = MemoryManagerGetAlloctionData32BitAnd64kbPagesAllowedTest; TEST_P(MemoryManagerGetAlloctionData32BitAnd64kbPagesNotAllowedTest, givenAllocationTypesWith32BitAnd64kbPagesDisallowedWhenAllocationDataIsQueriedThenFlagsAreNotSet) { AllocationData allocData; auto allocType = GetParam(); AllocationProperties properties(mockRootDeviceIndex, 0, allocType, mockDeviceBitfield); MockMemoryManager mockMemoryManager; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_FALSE(allocData.flags.allow32Bit); EXPECT_FALSE(allocData.flags.allow64kbPages); EXPECT_EQ(allocType, allocData.type); } static const AllocationType allocationTypesWith32BitAnd64KbPagesAllowed[] = {AllocationType::BUFFER, AllocationType::BUFFER_HOST_MEMORY, AllocationType::PIPE, AllocationType::SCRATCH_SURFACE, AllocationType::WORK_PARTITION_SURFACE, AllocationType::PRIVATE_SURFACE, AllocationType::PRINTF_SURFACE, AllocationType::CONSTANT_SURFACE, AllocationType::GLOBAL_SURFACE, AllocationType::WRITE_COMBINED}; INSTANTIATE_TEST_CASE_P(Allow32BitAnd64kbPagesTypes, MemoryManagerGetAlloctionData32BitAnd64kbPagesAllowedTest, ::testing::ValuesIn(allocationTypesWith32BitAnd64KbPagesAllowed)); static const AllocationType allocationTypesWith32BitAnd64KbPagesNotAllowed[] = {AllocationType::COMMAND_BUFFER, AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, AllocationType::PROFILING_TAG_BUFFER, AllocationType::IMAGE, AllocationType::INSTRUCTION_HEAP, AllocationType::SHARED_RESOURCE_COPY}; INSTANTIATE_TEST_CASE_P(Disallow32BitAnd64kbPagesTypes, MemoryManagerGetAlloctionData32BitAnd64kbPagesNotAllowedTest, ::testing::ValuesIn(allocationTypesWith32BitAnd64KbPagesNotAllowed)); TEST(MemoryManagerTest, givenForced32BitSetWhenGraphicsMemoryFor32BitAllowedTypeIsAllocatedThen32BitAllocationIsReturned) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, false, executionEnvironment); memoryManager.setForce32BitAllocations(true); AllocationData allocData; AllocationProperties properties(mockRootDeviceIndex, 10, AllocationType::BUFFER, mockDeviceBitfield); memoryManager.getAllocationData(allocData, properties, nullptr, memoryManager.createStorageInfoFromProperties(properties)); auto allocation = memoryManager.allocateGraphicsMemory(allocData); ASSERT_NE(nullptr, allocation); if constexpr (is64bit) { EXPECT_TRUE(allocation->is32BitAllocation()); EXPECT_EQ(MemoryPool::System4KBPagesWith32BitGpuAddressing, allocation->getMemoryPool()); } else { EXPECT_FALSE(allocation->is32BitAllocation()); EXPECT_EQ(MemoryPool::System4KBPages, allocation->getMemoryPool()); } memoryManager.freeGraphicsMemory(allocation); } TEST(MemoryManagerTest, givenEnabledShareableWhenGraphicsAllocationIsAllocatedThenAllocationIsReturned) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); executionEnvironment.initGmm(); MockMemoryManager memoryManager(false, false, executionEnvironment); AllocationData allocData; AllocationProperties properties(mockRootDeviceIndex, 10, AllocationType::BUFFER, mockDeviceBitfield); properties.flags.shareable = true; memoryManager.getAllocationData(allocData, properties, nullptr, memoryManager.createStorageInfoFromProperties(properties)); EXPECT_EQ(allocData.flags.shareable, 1u); auto allocation = memoryManager.allocateGraphicsMemory(allocData); ASSERT_NE(nullptr, allocation); memoryManager.freeGraphicsMemory(allocation); } TEST(MemoryManagerTest, givenEnabledShareableWhenGraphicsAllocationIsCalledAndSystemMemoryFailsThenNullAllocationIsReturned) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); executionEnvironment.initGmm(); MockMemoryManager memoryManager(false, false, executionEnvironment); AllocationData allocData; AllocationProperties properties(mockRootDeviceIndex, 10, AllocationType::BUFFER, mockDeviceBitfield); properties.flags.shareable = true; memoryManager.getAllocationData(allocData, properties, nullptr, memoryManager.createStorageInfoFromProperties(properties)); EXPECT_EQ(allocData.flags.shareable, 1u); memoryManager.failAllocateSystemMemory = true; auto allocation = memoryManager.allocateGraphicsMemory(allocData); ASSERT_EQ(nullptr, allocation); memoryManager.freeGraphicsMemory(allocation); } TEST(MemoryManagerTest, givenForced32BitEnabledWhenGraphicsMemoryWihtoutAllow32BitFlagIsAllocatedThenNon32BitAllocationIsReturned) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(executionEnvironment); memoryManager.setForce32BitAllocations(true); AllocationData allocData; AllocationProperties properties(mockRootDeviceIndex, 10, AllocationType::BUFFER, mockDeviceBitfield); memoryManager.getAllocationData(allocData, properties, nullptr, memoryManager.createStorageInfoFromProperties(properties)); allocData.flags.allow32Bit = false; auto allocation = memoryManager.allocateGraphicsMemory(allocData); ASSERT_NE(nullptr, allocation); EXPECT_FALSE(allocation->is32BitAllocation()); memoryManager.freeGraphicsMemory(allocation); } TEST(MemoryManagerTest, givenForced32BitDisabledWhenGraphicsMemoryWith32BitFlagFor32BitAllowedTypeIsAllocatedThenNon32BitAllocationIsReturned) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(executionEnvironment); memoryManager.setForce32BitAllocations(false); AllocationData allocData; AllocationProperties properties(mockRootDeviceIndex, 10, AllocationType::BUFFER, mockDeviceBitfield); memoryManager.getAllocationData(allocData, properties, nullptr, memoryManager.createStorageInfoFromProperties(properties)); auto allocation = memoryManager.allocateGraphicsMemory(allocData); ASSERT_NE(nullptr, allocation); EXPECT_FALSE(allocation->is32BitAllocation()); memoryManager.freeGraphicsMemory(allocation); } TEST(MemoryManagerTest, givenEnabled64kbPagesWhenGraphicsMemoryMustBeHostMemoryAndIsAllocatedWithNullptrForBufferThen64kbAllocationIsReturned) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); executionEnvironment.initGmm(); MockMemoryManager memoryManager(true, false, executionEnvironment); AllocationData allocData; AllocationProperties properties(mockRootDeviceIndex, 10, AllocationType::BUFFER_HOST_MEMORY, mockDeviceBitfield); memoryManager.getAllocationData(allocData, properties, nullptr, memoryManager.createStorageInfoFromProperties(properties)); auto allocation = memoryManager.allocateGraphicsMemory(allocData); ASSERT_NE(nullptr, allocation); EXPECT_EQ(0u, reinterpret_cast(allocation->getUnderlyingBuffer()) & MemoryConstants::page64kMask); EXPECT_EQ(0u, allocation->getGpuAddress() & MemoryConstants::page64kMask); EXPECT_EQ(0u, allocation->getUnderlyingBufferSize() & MemoryConstants::page64kMask); EXPECT_EQ(MemoryPool::System64KBPages, allocation->getMemoryPool()); memoryManager.freeGraphicsMemory(allocation); } TEST(MemoryManagerTest, givenEnabled64kbPagesWhenGraphicsMemoryWithoutAllow64kbPagesFlagsIsAllocatedThenNon64kbAllocationIsReturned) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(true, false, executionEnvironment); AllocationData allocData; AllocationProperties properties(mockRootDeviceIndex, 10, AllocationType::BUFFER, mockDeviceBitfield); memoryManager.getAllocationData(allocData, properties, nullptr, memoryManager.createStorageInfoFromProperties(properties)); allocData.flags.allow64kbPages = false; auto allocation = memoryManager.allocateGraphicsMemory(allocData); ASSERT_NE(nullptr, allocation); EXPECT_FALSE(memoryManager.allocation64kbPageCreated); EXPECT_TRUE(memoryManager.allocationCreated); memoryManager.freeGraphicsMemory(allocation); } TEST(MemoryManagerTest, givenDisabled64kbPagesWhenGraphicsMemoryMustBeHostMemoryAndIsAllocatedWithNullptrForBufferThenNon64kbAllocationIsReturned) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, false, executionEnvironment); AllocationData allocData; AllocationProperties properties(mockRootDeviceIndex, 10, AllocationType::BUFFER_HOST_MEMORY, mockDeviceBitfield); memoryManager.getAllocationData(allocData, properties, nullptr, memoryManager.createStorageInfoFromProperties(properties)); auto allocation = memoryManager.allocateGraphicsMemory(allocData); ASSERT_NE(nullptr, allocation); EXPECT_FALSE(memoryManager.allocation64kbPageCreated); EXPECT_TRUE(memoryManager.allocationCreated); EXPECT_EQ(MemoryPool::System4KBPages, allocation->getMemoryPool()); memoryManager.freeGraphicsMemory(allocation); } TEST(MemoryManagerTest, givenForced32BitAndEnabled64kbPagesWhenGraphicsMemoryMustBeHostMemoryAndIsAllocatedWithNullptrForBufferThen32BitAllocationOver64kbIsChosen) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, false, executionEnvironment); memoryManager.setForce32BitAllocations(true); AllocationData allocData; AllocationProperties properties(mockRootDeviceIndex, 10, AllocationType::BUFFER_HOST_MEMORY, mockDeviceBitfield); memoryManager.getAllocationData(allocData, properties, nullptr, memoryManager.createStorageInfoFromProperties(properties)); auto allocation = memoryManager.allocateGraphicsMemory(allocData); ASSERT_NE(nullptr, allocation); if constexpr (is64bit) { EXPECT_TRUE(allocation->is32BitAllocation()); } else { EXPECT_FALSE(allocation->is32BitAllocation()); } memoryManager.freeGraphicsMemory(allocation); } TEST(MemoryManagerTest, givenEnabled64kbPagesWhenGraphicsMemoryIsAllocatedWithHostPtrForBufferThenExistingMemoryIsUsedForAllocation) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(true, false, executionEnvironment); AllocationData allocData; AllocationProperties properties(mockRootDeviceIndex, false, 1, AllocationType::BUFFER_HOST_MEMORY, false, mockDeviceBitfield); char memory[1]; memoryManager.getAllocationData(allocData, properties, &memory, memoryManager.createStorageInfoFromProperties(properties)); auto allocation = memoryManager.allocateGraphicsMemory(allocData); ASSERT_NE(nullptr, allocation); EXPECT_EQ((executionEnvironment.rootDeviceEnvironments[0u]->getHardwareInfo()->capabilityTable.hostPtrTrackingEnabled || is32bit), allocation->fragmentsStorage.fragmentCount); EXPECT_EQ(MemoryPool::System4KBPages, allocation->getMemoryPool()); memoryManager.freeGraphicsMemory(allocation); } TEST(MemoryManagerTest, givenMemoryManagerWhenGraphicsMemoryAllocationInDevicePoolFailsThenFallbackAllocationIsReturned) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, true, executionEnvironment); memoryManager.failInDevicePool = true; auto allocation = memoryManager.allocateGraphicsMemoryWithProperties({mockRootDeviceIndex, MemoryConstants::pageSize, AllocationType::BUFFER, mockDeviceBitfield}); ASSERT_NE(nullptr, allocation); EXPECT_TRUE(memoryManager.allocationCreated); EXPECT_EQ(MemoryPool::System4KBPages, allocation->getMemoryPool()); memoryManager.freeGraphicsMemory(allocation); } TEST(MemoryManagerTest, givenMemoryManagerWhenBufferTypeIsPassedThenAllocateGraphicsMemoryInPreferredPoolCanAllocateInDevicePool) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, true, executionEnvironment); auto allocation = memoryManager.allocateGraphicsMemoryWithProperties({mockRootDeviceIndex, MemoryConstants::pageSize, AllocationType::BUFFER, mockDeviceBitfield}); EXPECT_NE(nullptr, allocation); memoryManager.freeGraphicsMemory(allocation); } TEST(MemoryManagerTest, givenMemoryManagerWhenBufferTypeIsPassedAndAllocateInDevicePoolFailsWithErrorThenAllocateGraphicsMemoryInPreferredPoolReturnsNullptr) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, true, executionEnvironment); memoryManager.failInDevicePoolWithError = true; auto allocation = memoryManager.allocateGraphicsMemoryWithProperties({mockRootDeviceIndex, MemoryConstants::pageSize, AllocationType::BUFFER, mockDeviceBitfield}); ASSERT_EQ(nullptr, allocation); EXPECT_FALSE(memoryManager.allocationInDevicePoolCreated); memoryManager.freeGraphicsMemory(allocation); } TEST(MemoryManagerTest, givenSvmAllocationTypeWhenGetAllocationDataIsCalledThenAllocatingMemoryIsRequested) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{mockRootDeviceIndex, 1, AllocationType::SVM_ZERO_COPY, mockDeviceBitfield}; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.allocateMemory); } TEST(MemoryManagerTest, givenSvmAllocationTypeWhenGetAllocationDataIsCalledThen64kbPagesAreAllowedAnd32BitAllocationIsDisallowed) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{mockRootDeviceIndex, 1, AllocationType::SVM_ZERO_COPY, mockDeviceBitfield}; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.allow64kbPages); EXPECT_FALSE(allocData.flags.allow32Bit); } TEST(MemoryManagerTest, givenTagBufferTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsRequested) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{mockRootDeviceIndex, 1, AllocationType::TAG_BUFFER, mockDeviceBitfield}; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.useSystemMemory); } TEST(MemoryManagerTest, givenGlobalFenceTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsRequested) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{mockRootDeviceIndex, 1, AllocationType::GLOBAL_FENCE, mockDeviceBitfield}; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.useSystemMemory); } TEST(MemoryManagerTest, givenPreemptionTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsNotRequested) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{mockRootDeviceIndex, 1, AllocationType::PREEMPTION, mockDeviceBitfield}; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_FALSE(allocData.flags.useSystemMemory); } TEST(MemoryManagerTest, givenPreemptionTypeWhenGetAllocationDataIsCalledThen64kbPagesAllowed) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{mockRootDeviceIndex, 1, AllocationType::PREEMPTION, mockDeviceBitfield}; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.allow64kbPages); } TEST(MemoryManagerTest, givenPreemptionTypeWhenGetAllocationDataIsCalledThen48BitResourceIsTrue) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{mockRootDeviceIndex, 1, AllocationType::PREEMPTION, mockDeviceBitfield}; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.resource48Bit); } TEST(MemoryManagerTest, givenSharedContextImageTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsRequested) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{mockRootDeviceIndex, 1, AllocationType::SHARED_CONTEXT_IMAGE, mockDeviceBitfield}; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.useSystemMemory); } TEST(MemoryManagerTest, givenMCSTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsRequested) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{mockRootDeviceIndex, 1, AllocationType::MCS, mockDeviceBitfield}; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.useSystemMemory); } TEST(MemoryManagerTest, givenPipeTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsNotRequested) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{mockRootDeviceIndex, 1, AllocationType::PIPE, mockDeviceBitfield}; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_FALSE(allocData.flags.useSystemMemory); } TEST(MemoryManagerTest, givenGlobalSurfaceTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsNotRequested) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{mockRootDeviceIndex, 1, AllocationType::GLOBAL_SURFACE, mockDeviceBitfield}; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_FALSE(allocData.flags.useSystemMemory); } TEST(MemoryManagerTest, givenWriteCombinedTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsNotRequested) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{mockRootDeviceIndex, 1, AllocationType::WRITE_COMBINED, mockDeviceBitfield}; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_FALSE(allocData.flags.useSystemMemory); } TEST(MemoryManagerTest, givenInternalHostMemoryTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsRequested) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{mockRootDeviceIndex, 1, AllocationType::INTERNAL_HOST_MEMORY, mockDeviceBitfield}; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.useSystemMemory); } TEST(MemoryManagerTest, givenFillPatternTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsRequested) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{mockRootDeviceIndex, 1, AllocationType::FILL_PATTERN, mockDeviceBitfield}; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.useSystemMemory); } using GetAllocationDataTestHw = ::testing::Test; HWTEST_F(GetAllocationDataTestHw, givenLinearStreamTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsNotRequested) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{mockRootDeviceIndex, 1, AllocationType::LINEAR_STREAM, mockDeviceBitfield}; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_FALSE(allocData.flags.useSystemMemory); EXPECT_TRUE(allocData.flags.requiresCpuAccess); } HWTEST_F(GetAllocationDataTestHw, givenTimestampPacketTagBufferTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsRequestedAndRequireCpuAccess) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{mockRootDeviceIndex, 1, AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, mockDeviceBitfield}; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_EQ(UnitTestHelper::requiresTimestampPacketsInSystemMemory(*defaultHwInfo), allocData.flags.useSystemMemory); EXPECT_TRUE(allocData.flags.requiresCpuAccess); } TEST(MemoryManagerTest, givenProfilingTagBufferTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsRequested) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{mockRootDeviceIndex, 1, AllocationType::PROFILING_TAG_BUFFER, mockDeviceBitfield}; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.useSystemMemory); EXPECT_FALSE(allocData.flags.requiresCpuAccess); } TEST(MemoryManagerTest, givenAllocationPropertiesWithMultiOsContextCapableFlagEnabledWhenAllocateMemoryThenAllocationDataIsMultiOsContextCapable) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, false, executionEnvironment); AllocationProperties properties{mockRootDeviceIndex, MemoryConstants::pageSize, AllocationType::BUFFER, mockDeviceBitfield}; properties.flags.multiOsContextCapable = true; AllocationData allocData; memoryManager.getAllocationData(allocData, properties, nullptr, memoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.multiOsContextCapable); } TEST(MemoryManagerTest, givenAllocationPropertiesWithMultiOsContextCapableFlagDisabledWhenAllocateMemoryThenAllocationDataIsNotMultiOsContextCapable) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, false, executionEnvironment); AllocationProperties properties{mockRootDeviceIndex, MemoryConstants::pageSize, AllocationType::BUFFER, mockDeviceBitfield}; properties.flags.multiOsContextCapable = false; AllocationData allocData; memoryManager.getAllocationData(allocData, properties, nullptr, memoryManager.createStorageInfoFromProperties(properties)); EXPECT_FALSE(allocData.flags.multiOsContextCapable); } TEST(MemoryManagerTest, givenConstantSurfaceTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsNotRequested) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{mockRootDeviceIndex, 1, AllocationType::CONSTANT_SURFACE, mockDeviceBitfield}; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_FALSE(allocData.flags.useSystemMemory); } HWTEST_F(GetAllocationDataTestHw, givenInternalHeapTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsNotRequested) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{mockRootDeviceIndex, 1, AllocationType::INTERNAL_HEAP, mockDeviceBitfield}; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_FALSE(allocData.flags.useSystemMemory); EXPECT_TRUE(allocData.flags.requiresCpuAccess); } HWTEST_F(GetAllocationDataTestHw, givenGpuTimestampDeviceBufferTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsNotRequested) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{mockRootDeviceIndex, 1, AllocationType::GPU_TIMESTAMP_DEVICE_BUFFER, mockDeviceBitfield}; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_FALSE(allocData.flags.useSystemMemory); EXPECT_TRUE(allocData.flags.requiresCpuAccess); } HWTEST_F(GetAllocationDataTestHw, givenPrintfAllocationWhenGetAllocationDataIsCalledThenDontForceSystemMemoryAndRequireCpuAccess) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{mockRootDeviceIndex, 1, AllocationType::GPU_TIMESTAMP_DEVICE_BUFFER, mockDeviceBitfield}; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_FALSE(allocData.flags.useSystemMemory); EXPECT_TRUE(allocData.flags.requiresCpuAccess); } HWTEST_F(GetAllocationDataTestHw, givenLinearStreamAllocationWhenGetAllocationDataIsCalledThenDontForceSystemMemoryAndRequireCpuAccess) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{mockRootDeviceIndex, 1, AllocationType::LINEAR_STREAM, mockDeviceBitfield}; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_FALSE(allocData.flags.useSystemMemory); EXPECT_TRUE(allocData.flags.requiresCpuAccess); } HWTEST_F(GetAllocationDataTestHw, givenConstantSurfaceAllocationWhenGetAllocationDataIsCalledThenDontForceSystemMemoryAndRequireCpuAccess) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{mockRootDeviceIndex, 1, AllocationType::CONSTANT_SURFACE, mockDeviceBitfield}; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_FALSE(allocData.flags.useSystemMemory); EXPECT_TRUE(allocData.flags.requiresCpuAccess); } HWTEST_F(GetAllocationDataTestHw, givenKernelIsaTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsNotRequested) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{mockRootDeviceIndex, 1, AllocationType::KERNEL_ISA, mockDeviceBitfield}; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_NE(defaultHwInfo->featureTable.flags.ftrLocalMemory, allocData.flags.useSystemMemory); AllocationProperties properties2{mockRootDeviceIndex, 1, AllocationType::KERNEL_ISA_INTERNAL, mockDeviceBitfield}; mockMemoryManager.getAllocationData(allocData, properties2, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_NE(defaultHwInfo->featureTable.flags.ftrLocalMemory, allocData.flags.useSystemMemory); } HWTEST_F(GetAllocationDataTestHw, givenLinearStreamWhenGetAllocationDataIsCalledThenSystemMemoryIsNotRequested) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{mockRootDeviceIndex, 1, AllocationType::LINEAR_STREAM, mockDeviceBitfield}; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_FALSE(allocData.flags.useSystemMemory); EXPECT_TRUE(allocData.flags.requiresCpuAccess); } HWTEST_F(GetAllocationDataTestHw, givenPrintfAllocationWhenGetAllocationDataIsCalledThenDontUseSystemMemoryAndRequireCpuAccess) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{mockRootDeviceIndex, 1, AllocationType::PRINTF_SURFACE, mockDeviceBitfield}; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_FALSE(allocData.flags.useSystemMemory); EXPECT_TRUE(allocData.flags.requiresCpuAccess); } TEST(MemoryManagerTest, givenExternalHostMemoryWhenGetAllocationDataIsCalledThenItHasProperFieldsSet) { AllocationData allocData; auto hostPtr = reinterpret_cast(0x1234); MockMemoryManager mockMemoryManager; AllocationProperties properties{mockRootDeviceIndex, false, 1, AllocationType::EXTERNAL_HOST_PTR, false, mockDeviceBitfield}; mockMemoryManager.getAllocationData(allocData, properties, hostPtr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.useSystemMemory); EXPECT_FALSE(allocData.flags.allocateMemory); EXPECT_FALSE(allocData.flags.allow32Bit); EXPECT_FALSE(allocData.flags.allow64kbPages); EXPECT_EQ(allocData.hostPtr, hostPtr); } TEST(MemoryManagerTest, GivenAllocationPropertiesWhenGettingAllocationDataThenSameRootDeviceIndexIsUsed) { const uint32_t rootDevicesCount = 100u; AllocationData allocData; MockExecutionEnvironment executionEnvironment{defaultHwInfo.get(), true, rootDevicesCount}; MockMemoryManager mockMemoryManager{executionEnvironment}; AllocationProperties properties{mockRootDeviceIndex, 1, AllocationType::BUFFER, mockDeviceBitfield}; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_EQ(allocData.rootDeviceIndex, 0u); AllocationProperties properties2{rootDevicesCount - 1, 1, AllocationType::BUFFER, mockDeviceBitfield}; mockMemoryManager.getAllocationData(allocData, properties2, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_EQ(allocData.rootDeviceIndex, properties2.rootDeviceIndex); } TEST(MemoryManagerTest, givenMapAllocationWhenGetAllocationDataIsCalledThenItHasProperFieldsSet) { AllocationData allocData; auto hostPtr = reinterpret_cast(0x1234); MockMemoryManager mockMemoryManager; AllocationProperties properties{mockRootDeviceIndex, false, 1, AllocationType::MAP_ALLOCATION, false, mockDeviceBitfield}; mockMemoryManager.getAllocationData(allocData, properties, hostPtr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.useSystemMemory); EXPECT_FALSE(allocData.flags.allocateMemory); EXPECT_FALSE(allocData.flags.allow32Bit); EXPECT_FALSE(allocData.flags.allow64kbPages); EXPECT_EQ(allocData.hostPtr, hostPtr); } HWTEST_F(GetAllocationDataTestHw, givenRingBufferAllocationWhenGetAllocationDataIsCalledThenItHasProperFieldsSet) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{mockRootDeviceIndex, 0x10000u, AllocationType::RING_BUFFER, mockDeviceBitfield}; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.useSystemMemory); EXPECT_TRUE(allocData.flags.allocateMemory); EXPECT_FALSE(allocData.flags.allow32Bit); EXPECT_FALSE(allocData.flags.allow64kbPages); EXPECT_EQ(0x10000u, allocData.size); EXPECT_EQ(nullptr, allocData.hostPtr); EXPECT_TRUE(allocData.flags.requiresCpuAccess); } HWTEST_F(GetAllocationDataTestHw, givenSemaphoreBufferAllocationWhenGetAllocationDataIsCalledThenItHasProperFieldsSet) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{mockRootDeviceIndex, 0x1000u, AllocationType::SEMAPHORE_BUFFER, mockDeviceBitfield}; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.useSystemMemory); EXPECT_TRUE(allocData.flags.allocateMemory); EXPECT_FALSE(allocData.flags.allow32Bit); EXPECT_FALSE(allocData.flags.allow64kbPages); EXPECT_EQ(0x1000u, allocData.size); EXPECT_EQ(nullptr, allocData.hostPtr); EXPECT_TRUE(allocData.flags.requiresCpuAccess); } TEST(MemoryManagerTest, givenDirectBufferPlacementSetWhenDefaultIsUsedThenExpectNoFlagsChanged) { AllocationData allocationData; AllocationProperties properties(mockRootDeviceIndex, 0x1000, AllocationType::RING_BUFFER, mockDeviceBitfield); MockMemoryManager::overrideAllocationData(allocationData, properties); EXPECT_EQ(0u, allocationData.flags.requiresCpuAccess); EXPECT_EQ(0u, allocationData.flags.useSystemMemory); } TEST(MemoryManagerTest, givenDirectBufferPlacementSetWhenOverrideToNonSystemThenExpectNonSystemFlags) { DebugManagerStateRestore restorer; DebugManager.flags.DirectSubmissionBufferPlacement.set(0); AllocationData allocationData; AllocationProperties properties(mockRootDeviceIndex, 0x1000, AllocationType::RING_BUFFER, mockDeviceBitfield); MockMemoryManager::overrideAllocationData(allocationData, properties); EXPECT_EQ(1u, allocationData.flags.requiresCpuAccess); EXPECT_EQ(0u, allocationData.flags.useSystemMemory); } TEST(MemoryManagerTest, givenDirectBufferPlacementSetWhenOverrideToSystemThenExpectNonFlags) { DebugManagerStateRestore restorer; DebugManager.flags.DirectSubmissionBufferPlacement.set(1); AllocationData allocationData; AllocationProperties properties(mockRootDeviceIndex, 0x1000, AllocationType::RING_BUFFER, mockDeviceBitfield); MockMemoryManager::overrideAllocationData(allocationData, properties); EXPECT_EQ(0u, allocationData.flags.requiresCpuAccess); EXPECT_EQ(1u, allocationData.flags.useSystemMemory); } TEST(MemoryManagerTest, givenDirectSemaphorePlacementSetWhenDefaultIsUsedThenExpectNoFlagsChanged) { AllocationData allocationData; AllocationProperties properties(mockRootDeviceIndex, 0x1000, AllocationType::SEMAPHORE_BUFFER, mockDeviceBitfield); MockMemoryManager::overrideAllocationData(allocationData, properties); EXPECT_EQ(0u, allocationData.flags.requiresCpuAccess); EXPECT_EQ(0u, allocationData.flags.useSystemMemory); } TEST(MemoryManagerTest, givenDirectSemaphorePlacementSetWhenOverrideToNonSystemThenExpectNonSystemFlags) { DebugManagerStateRestore restorer; DebugManager.flags.DirectSubmissionSemaphorePlacement.set(0); AllocationData allocationData; AllocationProperties properties(mockRootDeviceIndex, 0x1000, AllocationType::SEMAPHORE_BUFFER, mockDeviceBitfield); MockMemoryManager::overrideAllocationData(allocationData, properties); EXPECT_EQ(1u, allocationData.flags.requiresCpuAccess); EXPECT_EQ(0u, allocationData.flags.useSystemMemory); } TEST(MemoryManagerTest, givenDirectSemaphorePlacementSetWhenOverrideToSystemThenExpectNonFlags) { DebugManagerStateRestore restorer; DebugManager.flags.DirectSubmissionSemaphorePlacement.set(1); AllocationData allocationData; AllocationProperties properties(mockRootDeviceIndex, 0x1000, AllocationType::SEMAPHORE_BUFFER, mockDeviceBitfield); MockMemoryManager::overrideAllocationData(allocationData, properties); EXPECT_EQ(0u, allocationData.flags.requiresCpuAccess); EXPECT_EQ(1u, allocationData.flags.useSystemMemory); } TEST(MemoryManagerTest, givenDirectBufferAddressingWhenOverrideToNo48BitThenExpect48BitFlagFalse) { DebugManagerStateRestore restorer; DebugManager.flags.DirectSubmissionBufferAddressing.set(0); AllocationData allocationData; AllocationProperties properties(mockRootDeviceIndex, 0x1000, AllocationType::RING_BUFFER, mockDeviceBitfield); allocationData.flags.resource48Bit = 1; MockMemoryManager::overrideAllocationData(allocationData, properties); EXPECT_EQ(0u, allocationData.flags.resource48Bit); } TEST(MemoryManagerTest, givenDirectBufferAddressingWhenOverrideTo48BitThenExpect48BitFlagTrue) { DebugManagerStateRestore restorer; DebugManager.flags.DirectSubmissionBufferAddressing.set(1); AllocationData allocationData; AllocationProperties properties(mockRootDeviceIndex, 0x1000, AllocationType::RING_BUFFER, mockDeviceBitfield); allocationData.flags.resource48Bit = 0; MockMemoryManager::overrideAllocationData(allocationData, properties); EXPECT_EQ(1u, allocationData.flags.resource48Bit); } TEST(MemoryManagerTest, givenDirectBufferAddressingDefaultWhenNoOverrideThenExpect48BitFlagSame) { AllocationData allocationData; AllocationProperties properties(mockRootDeviceIndex, 0x1000, AllocationType::RING_BUFFER, mockDeviceBitfield); allocationData.flags.resource48Bit = 0; MockMemoryManager::overrideAllocationData(allocationData, properties); EXPECT_EQ(0u, allocationData.flags.resource48Bit); allocationData.flags.resource48Bit = 1; MockMemoryManager::overrideAllocationData(allocationData, properties); EXPECT_EQ(1u, allocationData.flags.resource48Bit); } TEST(MemoryManagerTest, givenDirectSemaphoreAddressingWhenOverrideToNo48BitThenExpect48BitFlagFalse) { DebugManagerStateRestore restorer; DebugManager.flags.DirectSubmissionSemaphoreAddressing.set(0); AllocationData allocationData; AllocationProperties properties(mockRootDeviceIndex, 0x1000, AllocationType::SEMAPHORE_BUFFER, mockDeviceBitfield); allocationData.flags.resource48Bit = 1; MockMemoryManager::overrideAllocationData(allocationData, properties); EXPECT_EQ(0u, allocationData.flags.resource48Bit); } TEST(MemoryManagerTest, givenDirectSemaphoreAddressingWhenOverrideTo48BitThenExpect48BitFlagTrue) { DebugManagerStateRestore restorer; DebugManager.flags.DirectSubmissionSemaphoreAddressing.set(1); AllocationData allocationData; AllocationProperties properties(mockRootDeviceIndex, 0x1000, AllocationType::SEMAPHORE_BUFFER, mockDeviceBitfield); allocationData.flags.resource48Bit = 0; MockMemoryManager::overrideAllocationData(allocationData, properties); EXPECT_EQ(1u, allocationData.flags.resource48Bit); } TEST(MemoryManagerTest, givenDirectSemaphoreAddressingDefaultWhenNoOverrideThenExpect48BitFlagSame) { AllocationData allocationData; AllocationProperties properties(mockRootDeviceIndex, 0x1000, AllocationType::SEMAPHORE_BUFFER, mockDeviceBitfield); allocationData.flags.resource48Bit = 0; MockMemoryManager::overrideAllocationData(allocationData, properties); EXPECT_EQ(0u, allocationData.flags.resource48Bit); allocationData.flags.resource48Bit = 1; MockMemoryManager::overrideAllocationData(allocationData, properties); EXPECT_EQ(1u, allocationData.flags.resource48Bit); } TEST(MemoryManagerTest, givenForceNonSystemMaskWhenAllocationTypeMatchesMaskThenExpectSystemFlagFalse) { DebugManagerStateRestore restorer; auto allocationType = AllocationType::BUFFER; auto mask = 1llu << (static_cast(allocationType) - 1); DebugManager.flags.ForceNonSystemMemoryPlacement.set(mask); AllocationData allocationData; AllocationProperties properties(mockRootDeviceIndex, 0x1000, AllocationType::BUFFER, mockDeviceBitfield); allocationData.flags.useSystemMemory = 1; MockMemoryManager::overrideAllocationData(allocationData, properties); EXPECT_EQ(0u, allocationData.flags.useSystemMemory); } TEST(MemoryManagerTest, givenForceNonSystemMaskWhenAllocationTypeNotMatchesMaskThenExpectSystemFlagTrue) { DebugManagerStateRestore restorer; auto allocationType = AllocationType::BUFFER; auto mask = 1llu << (static_cast(allocationType) - 1); DebugManager.flags.ForceNonSystemMemoryPlacement.set(mask); AllocationData allocationData; AllocationProperties properties(mockRootDeviceIndex, 0x1000, AllocationType::COMMAND_BUFFER, mockDeviceBitfield); allocationData.flags.useSystemMemory = 1; MockMemoryManager::overrideAllocationData(allocationData, properties); EXPECT_EQ(1u, allocationData.flags.useSystemMemory); } TEST(MemoryManagerTest, givenDebugContextSaveAreaTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsRequested) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{mockRootDeviceIndex, 1, AllocationType::DEBUG_CONTEXT_SAVE_AREA, mockDeviceBitfield}; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.useSystemMemory); EXPECT_TRUE(allocData.flags.zeroMemory); } TEST(MemoryManagerTest, givenPropertiesWithOsContextWhenGetAllocationDataIsCalledThenOsContextIsSet) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{0, 1, AllocationType::DEBUG_CONTEXT_SAVE_AREA, mockDeviceBitfield}; MockOsContext osContext(0u, EngineDescriptorHelper::getDefaultDescriptor(HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo)[0])); properties.osContext = &osContext; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_EQ(&osContext, allocData.osContext); } TEST(MemoryManagerTest, givenPropertiesWithGpuAddressWhenGetAllocationDataIsCalledThenGpuAddressIsSet) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{mockRootDeviceIndex, 1, AllocationType::DEBUG_CONTEXT_SAVE_AREA, mockDeviceBitfield}; properties.gpuAddress = 0x4000; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_EQ(properties.gpuAddress, allocData.gpuAddress); } TEST(MemoryManagerTest, givenEnableLocalMemoryAndMemoryManagerWhenBufferTypeIsPassedThenAllocateGraphicsMemoryInPreferredPool) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, true, executionEnvironment); auto allocation = memoryManager.allocateGraphicsMemoryInPreferredPool({mockRootDeviceIndex, MemoryConstants::pageSize, AllocationType::BUFFER, mockDeviceBitfield}, nullptr); EXPECT_NE(nullptr, allocation); memoryManager.freeGraphicsMemory(allocation); } TEST(MemoryManagerTest, givenEnabledLocalMemoryWhenAllocatingSharedResourceCopyThenLocalMemoryAllocationIsReturnedAndGpuAddresIsInStandard64kHeap) { UltDeviceFactory deviceFactory{1, 0}; HardwareInfo localPlatformDevice = {}; localPlatformDevice = *defaultHwInfo; localPlatformDevice.featureTable.flags.ftrLocalMemory = true; auto executionEnvironment = std::unique_ptr(MockDevice::prepareExecutionEnvironment(&localPlatformDevice, 0u)); executionEnvironment->rootDeviceEnvironments[0]->initGmm(); MockMemoryManager memoryManager(false, true, *executionEnvironment); ImageDescriptor imgDesc = {}; imgDesc.imageWidth = 512; imgDesc.imageHeight = 1; imgDesc.imageType = ImageType::Image2D; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); auto memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(0, 0, 0, deviceFactory.rootDevices[0]); AllocationProperties allocProperties = MemObjHelper::getAllocationPropertiesWithImageInfo(mockRootDeviceIndex, imgInfo, true, memoryProperties, localPlatformDevice, mockDeviceBitfield, true); allocProperties.allocationType = AllocationType::SHARED_RESOURCE_COPY; auto allocation = memoryManager.allocateGraphicsMemoryInPreferredPool(allocProperties, nullptr); ASSERT_NE(nullptr, allocation); EXPECT_EQ(MemoryPool::LocalMemory, allocation->getMemoryPool()); EXPECT_EQ(0u, allocation->getDefaultGmm()->resourceParams.Flags.Info.NonLocalOnly); EXPECT_LT(GmmHelper::canonize(memoryManager.getGfxPartition(allocation->getRootDeviceIndex())->getHeapBase(HeapIndex::HEAP_STANDARD64KB)), allocation->getGpuAddress()); EXPECT_GT(GmmHelper::canonize(memoryManager.getGfxPartition(allocation->getRootDeviceIndex())->getHeapLimit(HeapIndex::HEAP_STANDARD64KB)), allocation->getGpuAddress()); EXPECT_EQ(0llu, allocation->getGpuBaseAddress()); memoryManager.freeGraphicsMemory(allocation); } using MemoryManagerGetAlloctionDataHaveToBeForcedTo48BitTest = testing::TestWithParam>; TEST_P(MemoryManagerGetAlloctionDataHaveToBeForcedTo48BitTest, givenAllocationTypesHaveToBeForcedTo48BitThenAllocationDataResource48BitIsSet) { AllocationType allocationType; bool propertiesFlag48Bit; std::tie(allocationType, propertiesFlag48Bit) = GetParam(); AllocationProperties properties(mockRootDeviceIndex, 0, allocationType, mockDeviceBitfield); properties.flags.resource48Bit = propertiesFlag48Bit; AllocationData allocationData; MockMemoryManager mockMemoryManager; mockMemoryManager.getAllocationData(allocationData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocationData.flags.resource48Bit); } using MemoryManagerGetAlloctionDataHaveNotToBeForcedTo48BitTest = testing::TestWithParam>; TEST_P(MemoryManagerGetAlloctionDataHaveNotToBeForcedTo48BitTest, givenAllocationTypesHaveNotToBeForcedTo48BitThenAllocationDataResource48BitIsSetProperly) { AllocationType allocationType; bool propertiesFlag48Bit; std::tie(allocationType, propertiesFlag48Bit) = GetParam(); AllocationProperties properties(mockRootDeviceIndex, 0, allocationType, mockDeviceBitfield); properties.flags.resource48Bit = propertiesFlag48Bit; AllocationData allocationData; MockMemoryManager mockMemoryManager; mockMemoryManager.getAllocationData(allocationData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_EQ(allocationData.flags.resource48Bit, propertiesFlag48Bit); } static const AllocationType allocationHaveToBeForcedTo48Bit[] = { AllocationType::COMMAND_BUFFER, AllocationType::IMAGE, AllocationType::INDIRECT_OBJECT_HEAP, AllocationType::INSTRUCTION_HEAP, AllocationType::INTERNAL_HEAP, AllocationType::KERNEL_ISA, AllocationType::LINEAR_STREAM, AllocationType::MCS, AllocationType::SCRATCH_SURFACE, AllocationType::WORK_PARTITION_SURFACE, AllocationType::SHARED_CONTEXT_IMAGE, AllocationType::SHARED_IMAGE, AllocationType::SHARED_RESOURCE_COPY, AllocationType::SURFACE_STATE_HEAP, AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, AllocationType::RING_BUFFER, AllocationType::SEMAPHORE_BUFFER, }; static const AllocationType allocationHaveNotToBeForcedTo48Bit[] = { AllocationType::BUFFER, AllocationType::BUFFER_HOST_MEMORY, AllocationType::CONSTANT_SURFACE, AllocationType::EXTERNAL_HOST_PTR, AllocationType::FILL_PATTERN, AllocationType::GLOBAL_SURFACE, AllocationType::INTERNAL_HOST_MEMORY, AllocationType::MAP_ALLOCATION, AllocationType::PIPE, AllocationType::PRINTF_SURFACE, AllocationType::PRIVATE_SURFACE, AllocationType::PROFILING_TAG_BUFFER, AllocationType::SHARED_BUFFER, AllocationType::SVM_CPU, AllocationType::SVM_GPU, AllocationType::SVM_ZERO_COPY, AllocationType::TAG_BUFFER, AllocationType::GLOBAL_FENCE, AllocationType::WRITE_COMBINED, AllocationType::DEBUG_CONTEXT_SAVE_AREA, }; INSTANTIATE_TEST_CASE_P(ForceTo48Bit, MemoryManagerGetAlloctionDataHaveToBeForcedTo48BitTest, ::testing::Combine( ::testing::ValuesIn(allocationHaveToBeForcedTo48Bit), ::testing::Bool())); INSTANTIATE_TEST_CASE_P(NotForceTo48Bit, MemoryManagerGetAlloctionDataHaveNotToBeForcedTo48BitTest, ::testing::Combine( ::testing::ValuesIn(allocationHaveNotToBeForcedTo48Bit), ::testing::Bool())); memory_manager_multi_device_tests.cpp000066400000000000000000000133231422164147700342350ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/memory_manager/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/constants.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/test/common/fixtures/memory_allocator_multi_device_fixture.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/test_macros/test.h" using namespace NEO; using MemoryManagerMultiDeviceTest = MemoryAllocatorMultiDeviceFixture<10>; TEST_P(MemoryManagerMultiDeviceTest, givenRootDeviceIndexSpecifiedWhenAllocateGraphicsMemoryIsCalledThenGraphicsAllocationHasTheSameRootDeviceIndex) { std::vector allocationTypes{AllocationType::BUFFER, AllocationType::KERNEL_ISA}; for (auto allocationType : allocationTypes) { for (uint32_t rootDeviceIndex = 0; rootDeviceIndex < getNumRootDevices(); ++rootDeviceIndex) { AllocationProperties properties{rootDeviceIndex, true, MemoryConstants::pageSize, allocationType, false, false, mockDeviceBitfield}; auto gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties(properties); ASSERT_NE(gfxAllocation, nullptr); EXPECT_EQ(rootDeviceIndex, gfxAllocation->getRootDeviceIndex()); memoryManager->freeGraphicsMemory(gfxAllocation); gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties(properties, (void *)0x1234); ASSERT_NE(gfxAllocation, nullptr); EXPECT_EQ(rootDeviceIndex, gfxAllocation->getRootDeviceIndex()); memoryManager->freeGraphicsMemory(gfxAllocation); gfxAllocation = memoryManager->allocateGraphicsMemoryInPreferredPool(properties, nullptr); ASSERT_NE(gfxAllocation, nullptr); EXPECT_EQ(rootDeviceIndex, gfxAllocation->getRootDeviceIndex()); memoryManager->freeGraphicsMemory(gfxAllocation); gfxAllocation = memoryManager->allocateGraphicsMemoryInPreferredPool(properties, (void *)0x1234); ASSERT_NE(gfxAllocation, nullptr); EXPECT_EQ(rootDeviceIndex, gfxAllocation->getRootDeviceIndex()); memoryManager->freeGraphicsMemory(gfxAllocation); gfxAllocation = memoryManager->createGraphicsAllocationFromSharedHandle(static_cast(0u), properties, false, false); ASSERT_NE(gfxAllocation, nullptr); EXPECT_EQ(rootDeviceIndex, gfxAllocation->getRootDeviceIndex()); memoryManager->freeGraphicsMemory(gfxAllocation); gfxAllocation = memoryManager->createGraphicsAllocationFromSharedHandle(static_cast(0u), properties, true, false); ASSERT_NE(gfxAllocation, nullptr); EXPECT_EQ(rootDeviceIndex, gfxAllocation->getRootDeviceIndex()); memoryManager->freeGraphicsMemory(gfxAllocation); } } } INSTANTIATE_TEST_CASE_P(MemoryManagerType, MemoryManagerMultiDeviceTest, ::testing::Bool()); TEST_P(MemoryManagerMultiDeviceTest, givenRootDeviceIndexSpecifiedWhenAllocateGraphicsMemoryIsCalledThenGraphicsAllocationHasProperGpuAddress) { std::vector rootDeviceIndices; for (uint32_t rootDeviceIndex = 0; rootDeviceIndex < getNumRootDevices(); ++rootDeviceIndex) { rootDeviceIndices.push_back(rootDeviceIndex); } auto maxRootDeviceIndex = *std::max_element(rootDeviceIndices.begin(), rootDeviceIndices.end(), std::less()); auto tagsMultiAllocation = new MultiGraphicsAllocation(maxRootDeviceIndex); AllocationProperties unifiedMemoryProperties{rootDeviceIndices.at(0), MemoryConstants::pageSize, AllocationType::TAG_BUFFER, systemMemoryBitfield}; memoryManager->createMultiGraphicsAllocationInSystemMemoryPool(rootDeviceIndices, unifiedMemoryProperties, *tagsMultiAllocation); EXPECT_NE(nullptr, tagsMultiAllocation); auto graphicsAllocation0 = tagsMultiAllocation->getGraphicsAllocation(0); for (auto graphicsAllocation : tagsMultiAllocation->getGraphicsAllocations()) { EXPECT_EQ(graphicsAllocation->getUnderlyingBuffer(), graphicsAllocation0->getUnderlyingBuffer()); } for (auto graphicsAllocation : tagsMultiAllocation->getGraphicsAllocations()) { memoryManager->freeGraphicsMemory(graphicsAllocation); } delete tagsMultiAllocation; } TEST_P(MemoryManagerMultiDeviceTest, givenRootDeviceIndexSpecifiedWhenAllocateGraphicsMemoryIsCalledThenAllocationPropertiesUsmFlagIsSetAccordingToAddressRange) { std::vector rootDeviceIndices; for (uint32_t rootDeviceIndex = 0; rootDeviceIndex < getNumRootDevices(); ++rootDeviceIndex) { rootDeviceIndices.push_back(rootDeviceIndex); } auto maxRootDeviceIndex = *std::max_element(rootDeviceIndices.begin(), rootDeviceIndices.end(), std::less()); auto tagsMultiAllocation = new MultiGraphicsAllocation(maxRootDeviceIndex); AllocationProperties unifiedMemoryProperties{rootDeviceIndices.at(0), MemoryConstants::pageSize, AllocationType::TAG_BUFFER, systemMemoryBitfield}; memoryManager->createMultiGraphicsAllocationInSystemMemoryPool(rootDeviceIndices, unifiedMemoryProperties, *tagsMultiAllocation); EXPECT_NE(nullptr, tagsMultiAllocation); for (auto rootDeviceIndex : rootDeviceIndices) { if (memoryManager->isLimitedRange(rootDeviceIndex)) { EXPECT_EQ(unifiedMemoryProperties.flags.isUSMHostAllocation, false); } else { EXPECT_EQ(unifiedMemoryProperties.flags.isUSMHostAllocation, true); } } for (auto graphicsAllocation : tagsMultiAllocation->getGraphicsAllocations()) { memoryManager->freeGraphicsMemory(graphicsAllocation); } delete tagsMultiAllocation; } compute-runtime-22.14.22890/opencl/test/unit_test/memory_manager/memory_manager_tests.cpp000066400000000000000000005026371422164147700315760ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/preemption.h" #include "shared/source/gmm_helper/page_table_mngr.h" #include "shared/source/helpers/cache_policy.h" #include "shared/source/helpers/constants.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/residency.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/os_interface/os_context.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/program/program_initialization.h" #include "shared/test/common/fixtures/memory_allocator_fixture.h" #include "shared/test/common/fixtures/memory_manager_fixture.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/engine_descriptor_helper.h" #include "shared/test/common/helpers/execution_environment_helper.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/mocks/mock_deferrable_deletion.h" #include "shared/test/common/mocks/mock_deferred_deleter.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/mocks/mock_gfx_partition.h" #include "shared/test/common/mocks/mock_gmm.h" #include "shared/test/common/mocks/mock_gmm_page_table_mngr.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/mocks/mock_os_context.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/compiler_interface/linker_mock.h" #include "opencl/source/event/event.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/mem_obj/mem_obj_helper.h" #include "opencl/source/platform/platform.h" #include "opencl/source/program/printf_handler.h" #include "opencl/source/program/program.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "opencl/test/unit_test/helpers/raii_hw_helper.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_mdi.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include #include using namespace NEO; typedef Test MemoryAllocatorTest; TEST(MemoryManagerTest, givenDebugVariableWhenCreatingMemoryManagerThenSetSupportForMultiStorageResources) { DebugManagerStateRestore restore; { MockMemoryManager memoryManager; EXPECT_TRUE(memoryManager.supportsMultiStorageResources); } { DebugManager.flags.EnableMultiStorageResources.set(0); MockMemoryManager memoryManager; EXPECT_FALSE(memoryManager.supportsMultiStorageResources); } { DebugManager.flags.EnableMultiStorageResources.set(1); MockMemoryManager memoryManager; EXPECT_TRUE(memoryManager.supportsMultiStorageResources); } } TEST(MemoryManagerTest, givenLocalMemoryRequiredWhenSelectingHeapThenPickDeviceHeapIndex) { EXPECT_EQ(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY, MemoryManager::selectInternalHeap(true)); EXPECT_EQ(HeapIndex::HEAP_INTERNAL, MemoryManager::selectInternalHeap(false)); EXPECT_EQ(HeapIndex::HEAP_EXTERNAL_DEVICE_MEMORY, MemoryManager::selectExternalHeap(true)); EXPECT_EQ(HeapIndex::HEAP_EXTERNAL, MemoryManager::selectExternalHeap(false)); } TEST(MemoryManagerTest, whenCreatingAllocPropertiesForMultiStorageResourceThenMultiStorageResourcesFlagIsSetToTrue) { AllocationProperties properties{0, false, 0u, AllocationType::SCRATCH_SURFACE, false, true, 0}; EXPECT_TRUE(properties.multiStorageResource); } TEST(MemoryBank, givenDifferentDeviceOrdinalsWhenGettingBankThenCorrectBanksAreReturned) { auto bank = MemoryBanks::getBank(0); EXPECT_EQ(MemoryBanks::MainBank, bank); bank = MemoryBanks::getBank(1); EXPECT_EQ(MemoryBanks::MainBank, bank); bank = MemoryBanks::getBank(100); EXPECT_EQ(MemoryBanks::MainBank, bank); } TEST(GraphicsAllocationTest, WhenGraphicsAllocationIsCreatedThenItIsNotCopyable) { EXPECT_FALSE(std::is_copy_constructible::value); EXPECT_FALSE(std::is_copy_assignable::value); } TEST(GraphicsAllocationTest, WhenAllocationIsCreatedThenItsAddressIsCorrect) { void *cpuPtr = (void *)0x30000; size_t size = 0x1000; MockGraphicsAllocation gfxAllocation(cpuPtr, size); uint64_t expectedGpuAddr = static_cast(reinterpret_cast(gfxAllocation.getUnderlyingBuffer())); EXPECT_EQ(expectedGpuAddr, gfxAllocation.getGpuAddress()); EXPECT_EQ(0u, gfxAllocation.getGpuBaseAddress()); } TEST(GraphicsAllocationTest, GivenNonSharedResourceHandleWhenAllocationIsCreatedThenItsAddressIsCorrect) { void *cpuPtr = (void *)0x30000; size_t size = 0x1000; osHandle sharedHandle = Sharing::nonSharedResource; GraphicsAllocation gfxAllocation(0, AllocationType::UNKNOWN, cpuPtr, size, sharedHandle, MemoryPool::MemoryNull, 0u); uint64_t expectedGpuAddr = static_cast(reinterpret_cast(gfxAllocation.getUnderlyingBuffer())); EXPECT_EQ(expectedGpuAddr, gfxAllocation.getGpuAddress()); EXPECT_EQ(0u, gfxAllocation.getGpuBaseAddress()); EXPECT_EQ(sharedHandle, gfxAllocation.peekSharedHandle()); } TEST(GraphicsAllocationTest, WhenGettingAddressesThenAddressesAreCorrect) { void *cpuPtr = (void *)0x30000; uint64_t gpuAddr = 0x30000; uint64_t gpuBaseAddr = 0x10000; size_t size = 0x1000; GraphicsAllocation gfxAllocation(0, AllocationType::UNKNOWN, cpuPtr, gpuAddr, gpuBaseAddr, size, MemoryPool::MemoryNull, 0u); EXPECT_EQ(gpuAddr, gfxAllocation.getGpuAddress()); cpuPtr = (void *)65535; gpuAddr = 1ULL; gfxAllocation.setCpuPtrAndGpuAddress(cpuPtr, gpuAddr); EXPECT_EQ(gpuAddr, gfxAllocation.getGpuAddress()); EXPECT_EQ(cpuPtr, gfxAllocation.getUnderlyingBuffer()); } TEST(GraphicsAllocationTest, WhenGettingGpuAddressToPatchThenOffsetIsCorrect) { void *cpuPtr = (void *)0x30000; uint64_t gpuAddr = 0x30000; uint64_t gpuBaseAddr = 0x10000; size_t size = 0x1000; GraphicsAllocation gfxAllocation(0, AllocationType::UNKNOWN, cpuPtr, gpuAddr, gpuBaseAddr, size, MemoryPool::MemoryNull, 0u); EXPECT_EQ(gpuAddr - gpuBaseAddr, gfxAllocation.getGpuAddressToPatch()); } TEST(GraphicsAllocationTest, WhenSetSizeThenUnderlyingBufferSizeIsSet) { void *cpuPtr = (void *)0x30000; uint64_t gpuAddr = 0x30000; uint64_t gpuBaseAddr = 0x10000; size_t size = 0x2000; GraphicsAllocation gfxAllocation(0, AllocationType::UNKNOWN, cpuPtr, gpuAddr, gpuBaseAddr, size, MemoryPool::MemoryNull, 0u); EXPECT_EQ(size, gfxAllocation.getUnderlyingBufferSize()); size = 0x3000; gfxAllocation.setSize(size); EXPECT_EQ(size, gfxAllocation.getUnderlyingBufferSize()); } TEST_F(MemoryAllocatorTest, WhenAllocatingSystemMemoryThenNonNullPointerIsReturned) { auto ptr = memoryManager->allocateSystemMemory(sizeof(char), 0); EXPECT_NE(nullptr, ptr); memoryManager->freeSystemMemory(ptr); } TEST_F(MemoryAllocatorTest, GivenGraphicsAllocationWhenAddAndRemoveAllocationToHostPtrManagerThenfragmentHasCorrectValues) { void *cpuPtr = (void *)0x30000; size_t size = 0x1000; MockGraphicsAllocation gfxAllocation(cpuPtr, size); memoryManager->addAllocationToHostPtrManager(&gfxAllocation); auto fragment = memoryManager->getHostPtrManager()->getFragment({gfxAllocation.getUnderlyingBuffer(), csr->getRootDeviceIndex()}); EXPECT_NE(fragment, nullptr); EXPECT_TRUE(fragment->driverAllocation); EXPECT_EQ(fragment->refCount, 1); EXPECT_EQ(fragment->fragmentCpuPointer, cpuPtr); EXPECT_EQ(fragment->fragmentSize, size); EXPECT_NE(fragment->osInternalStorage, nullptr); EXPECT_NE(fragment->residency, nullptr); FragmentStorage fragmentStorage = {}; fragmentStorage.fragmentCpuPointer = cpuPtr; memoryManager->getHostPtrManager()->storeFragment(csr->getRootDeviceIndex(), fragmentStorage); fragment = memoryManager->getHostPtrManager()->getFragment({gfxAllocation.getUnderlyingBuffer(), csr->getRootDeviceIndex()}); EXPECT_EQ(fragment->refCount, 2); fragment->driverAllocation = false; memoryManager->removeAllocationFromHostPtrManager(&gfxAllocation); fragment = memoryManager->getHostPtrManager()->getFragment({gfxAllocation.getUnderlyingBuffer(), csr->getRootDeviceIndex()}); EXPECT_EQ(fragment->refCount, 2); fragment->driverAllocation = true; memoryManager->removeAllocationFromHostPtrManager(&gfxAllocation); fragment = memoryManager->getHostPtrManager()->getFragment({gfxAllocation.getUnderlyingBuffer(), csr->getRootDeviceIndex()}); EXPECT_EQ(fragment->refCount, 1); memoryManager->removeAllocationFromHostPtrManager(&gfxAllocation); fragment = memoryManager->getHostPtrManager()->getFragment({gfxAllocation.getUnderlyingBuffer(), csr->getRootDeviceIndex()}); EXPECT_EQ(fragment, nullptr); } TEST_F(MemoryAllocatorTest, GivenAlignmentWhenAllocatingSystemMemoryThenAllocatedMemoryIsAligned) { unsigned int alignment = 0x100; auto ptr = memoryManager->allocateSystemMemory(sizeof(char), alignment); EXPECT_NE(nullptr, ptr); EXPECT_EQ(0u, reinterpret_cast(ptr) & (alignment - 1)); memoryManager->freeSystemMemory(ptr); } TEST_F(MemoryAllocatorTest, WhenAllocatingGraphicsMemoryThenAllocationHasCorrectProperties) { unsigned int alignment = 4096; memoryManager->createAndRegisterOsContext(csr, EngineDescriptorHelper::getDefaultDescriptor(HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo)[0], PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo))); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, allocation); // initial taskCount must be -1. if not, we may kill allocation before it will be used EXPECT_EQ((uint32_t)-1, allocation->getTaskCount(csr->getOsContext().getContextId())); // We know we want graphics memory to be page aligned EXPECT_EQ(0u, reinterpret_cast(allocation->getUnderlyingBuffer()) & (alignment - 1)); EXPECT_EQ(Sharing::nonSharedResource, allocation->peekSharedHandle()); // Gpu address equal to cpu address if (defaultHwInfo->capabilityTable.gpuAddressSpace == MemoryConstants::max48BitAddress) { EXPECT_EQ(reinterpret_cast(allocation->getUnderlyingBuffer()), allocation->getGpuAddress()); } memoryManager->freeGraphicsMemory(allocation); } TEST_F(MemoryAllocatorTest, WhenAllocatingGraphicsMemoryThenAllocationIsPageAligned) { unsigned int alignment = 4096; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); EXPECT_NE(nullptr, allocation); EXPECT_EQ(0u, reinterpret_cast(allocation->getUnderlyingBuffer()) & (alignment - 1)); memoryManager->freeGraphicsMemory(allocation); } TEST_F(MemoryAllocatorTest, GivenAlignedHostPtrWithAlignedSizeWhenAllocatingGraphicsThenOneFragmentIsAllocated) { auto ptr = (void *)0x1000; MockMemoryManager mockMemoryManager(*executionEnvironment); auto hostPtrManager = static_cast(mockMemoryManager.getHostPtrManager()); auto graphicsAllocation = mockMemoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), false, 4096, device->getDeviceBitfield()}, ptr); EXPECT_NE(nullptr, graphicsAllocation); EXPECT_EQ(1u, hostPtrManager->getFragmentCount()); auto fragmentData = hostPtrManager->getFragment({ptr, device->getRootDeviceIndex()}); ASSERT_NE(nullptr, fragmentData); EXPECT_NE(nullptr, fragmentData->osInternalStorage); mockMemoryManager.freeGraphicsMemory(graphicsAllocation); } TEST_F(MemoryAllocatorTest, GivenAlignedHostPtrAndCacheAlignedSizeWhenAskedForL3AllowanceThenTrueIsReturned) { auto ptr = (void *)0x1000; auto alignedSize = MemoryConstants::cacheLineSize; auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), false, alignedSize, device->getDeviceBitfield()}, ptr); EXPECT_TRUE(isL3Capable(*graphicsAllocation)); memoryManager->freeGraphicsMemory(graphicsAllocation); } TEST_F(MemoryAllocatorTest, GivenAlignedHostPtrAndNotCacheAlignedSizeWhenAskedForL3AllowanceThenFalseIsReturned) { auto ptr = (void *)0x1000; auto alignedSize = MemoryConstants::cacheLineSize - 1; auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), false, alignedSize, device->getDeviceBitfield()}, ptr); EXPECT_FALSE(isL3Capable(*graphicsAllocation)); memoryManager->freeGraphicsMemory(graphicsAllocation); } TEST_F(MemoryAllocatorTest, GivenMisAlignedHostPtrAndNotCacheAlignedSizeWhenAskedForL3AllowanceThenFalseIsReturned) { auto ptr = (void *)0x1001; auto alignedSize = MemoryConstants::cacheLineSize - 1; auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), false, alignedSize, device->getDeviceBitfield()}, ptr); EXPECT_FALSE(isL3Capable(*graphicsAllocation)); memoryManager->freeGraphicsMemory(graphicsAllocation); } TEST_F(MemoryAllocatorTest, GivenHostPtrAlignedToCacheLineWhenAskedForL3AllowanceThenTrueIsReturned) { auto ptr = (void *)0x1040; auto alignedSize = MemoryConstants::cacheLineSize; auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), false, alignedSize, device->getDeviceBitfield()}, ptr); EXPECT_TRUE(isL3Capable(*graphicsAllocation)); memoryManager->freeGraphicsMemory(graphicsAllocation); } TEST_F(MemoryAllocatorTest, WhenPopulatingOsHandleThenOneFragmentIsReturned) { OsHandleStorage storage; storage.fragmentStorageData[0].cpuPtr = (void *)0x1000; memoryManager->populateOsHandles(storage, 0); EXPECT_NE(nullptr, storage.fragmentStorageData[0].osHandleStorage); EXPECT_EQ(nullptr, storage.fragmentStorageData[1].osHandleStorage); EXPECT_EQ(nullptr, storage.fragmentStorageData[2].osHandleStorage); memoryManager->getHostPtrManager()->releaseHandleStorage(csr->getRootDeviceIndex(), storage); memoryManager->cleanOsHandles(storage, 0); } TEST_F(MemoryAllocatorTest, givenOsHandleStorageWhenOsHandlesAreCleanedAndAubManagerIsNotAvailableThenFreeMemoryIsNotCalledOnAubManager) { MockExecutionEnvironment mockExecutionEnvironment(defaultHwInfo.get()); MockMemoryManager mockMemoryManager(mockExecutionEnvironment); GmmHelper gmmHelper(nullptr, defaultHwInfo.get()); auto mockAubCenter = new MockAubCenter(defaultHwInfo.get(), gmmHelper, false, "aubfile", CommandStreamReceiverType::CSR_AUB); mockAubCenter->aubManager.reset(nullptr); mockExecutionEnvironment.rootDeviceEnvironments[0]->aubCenter.reset(mockAubCenter); OsHandleStorage storage; storage.fragmentStorageData[0].cpuPtr = (void *)0x1000; mockMemoryManager.populateOsHandles(storage, 0); mockMemoryManager.getHostPtrManager()->releaseHandleStorage(csr->getRootDeviceIndex(), storage); mockMemoryManager.cleanOsHandles(storage, 0); EXPECT_EQ(nullptr, mockAubCenter->aubManager); } TEST_F(MemoryAllocatorTest, givenOsHandleStorageAndFreeMemoryEnabledWhenOsHandlesAreCleanedAndAubManagerIsAvailableThenFreeMemoryIsCalledOnAubManager) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableFreeMemory.set(true); const uint32_t rootDeviceIndex = 1u; MockExecutionEnvironment mockExecutionEnvironment(defaultHwInfo.get(), true, 3); MockMemoryManager mockMemoryManager(mockExecutionEnvironment); GmmHelper gmmHelper(nullptr, defaultHwInfo.get()); auto mockManager0 = new MockAubManager(); auto mockAubCenter0 = new MockAubCenter(defaultHwInfo.get(), gmmHelper, false, "aubfile", CommandStreamReceiverType::CSR_AUB); mockAubCenter0->aubManager.reset(mockManager0); mockExecutionEnvironment.rootDeviceEnvironments[0]->aubCenter.reset(mockAubCenter0); auto mockManager1 = new MockAubManager(); auto mockAubCenter1 = new MockAubCenter(defaultHwInfo.get(), gmmHelper, false, "aubfile", CommandStreamReceiverType::CSR_AUB); mockAubCenter1->aubManager.reset(mockManager1); mockExecutionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->aubCenter.reset(mockAubCenter1); OsHandleStorage storage; storage.fragmentStorageData[0].cpuPtr = reinterpret_cast(0x1000); mockMemoryManager.populateOsHandles(storage, rootDeviceIndex); mockMemoryManager.getHostPtrManager()->releaseHandleStorage(rootDeviceIndex, storage); mockMemoryManager.cleanOsHandles(storage, rootDeviceIndex); EXPECT_FALSE(mockManager0->freeMemoryCalled); EXPECT_TRUE(mockManager1->freeMemoryCalled); } TEST_F(MemoryAllocatorTest, GivenEmptyMemoryManagerAndMisalingedHostPtrWithHugeSizeWhenAskedForHostPtrAllocationThenGraphicsAllocationIsBeignCreatedWithAllFragmentsPresent) { void *cpuPtr = (void *)0x1005; auto size = MemoryConstants::pageSize * 10 - 1; MockMemoryManager mockMemoryManager(*executionEnvironment); auto hostPtrManager = static_cast(mockMemoryManager.getHostPtrManager()); auto reqs = MockHostPtrManager::getAllocationRequirements(device->getRootDeviceIndex(), cpuPtr, size); ASSERT_EQ(3u, reqs.requiredFragmentsCount); auto graphicsAllocation = mockMemoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), false, size, device->getDeviceBitfield()}, cpuPtr); for (int i = 0; i < maxFragmentsCount; i++) { EXPECT_NE(nullptr, graphicsAllocation->fragmentsStorage.fragmentStorageData[i].osHandleStorage); EXPECT_EQ(reqs.allocationFragments[i].allocationPtr, graphicsAllocation->fragmentsStorage.fragmentStorageData[i].cpuPtr); EXPECT_EQ(reqs.allocationFragments[i].allocationSize, graphicsAllocation->fragmentsStorage.fragmentStorageData[i].fragmentSize); } EXPECT_EQ(3u, hostPtrManager->getFragmentCount()); EXPECT_EQ(Sharing::nonSharedResource, graphicsAllocation->peekSharedHandle()); mockMemoryManager.freeGraphicsMemory(graphicsAllocation); } TEST_F(MemoryAllocatorTest, GivenPointerAndSizeWhenAskedToCreateGrahicsAllocationThenGraphicsAllocationIsCreated) { OsHandleStorage handleStorage; auto ptr = (void *)0x1000; auto ptr2 = (void *)0x1001; auto size = MemoryConstants::pageSize; handleStorage.fragmentStorageData[0].cpuPtr = ptr; handleStorage.fragmentStorageData[1].cpuPtr = ptr2; handleStorage.fragmentStorageData[2].cpuPtr = nullptr; handleStorage.fragmentStorageData[0].fragmentSize = size; handleStorage.fragmentStorageData[1].fragmentSize = size * 2; handleStorage.fragmentStorageData[2].fragmentSize = size * 3; AllocationData allocationData; allocationData.size = size; allocationData.hostPtr = ptr; auto allocation = std::unique_ptr(memoryManager->createGraphicsAllocation(handleStorage, allocationData)); EXPECT_EQ(MemoryPool::System4KBPages, allocation->getMemoryPool()); EXPECT_EQ(ptr, allocation->getUnderlyingBuffer()); EXPECT_EQ(size, allocation->getUnderlyingBufferSize()); EXPECT_EQ(ptr, allocation->fragmentsStorage.fragmentStorageData[0].cpuPtr); EXPECT_EQ(ptr2, allocation->fragmentsStorage.fragmentStorageData[1].cpuPtr); EXPECT_EQ(nullptr, allocation->fragmentsStorage.fragmentStorageData[2].cpuPtr); EXPECT_EQ(size, allocation->fragmentsStorage.fragmentStorageData[0].fragmentSize); EXPECT_EQ(size * 2, allocation->fragmentsStorage.fragmentStorageData[1].fragmentSize); EXPECT_EQ(size * 3, allocation->fragmentsStorage.fragmentStorageData[2].fragmentSize); EXPECT_NE(&allocation->fragmentsStorage, &handleStorage); } TEST_F(MemoryAllocatorTest, givenMemoryManagerWhenAskedFor32bitAllocationThen32bitGraphicsAllocationIsReturned) { size_t size = 10; auto allocation = memoryManager->allocate32BitGraphicsMemory(device->getRootDeviceIndex(), size, nullptr, AllocationType::BUFFER); EXPECT_NE(nullptr, allocation); EXPECT_NE(nullptr, allocation->getUnderlyingBuffer()); EXPECT_EQ(size, allocation->getUnderlyingBufferSize()); EXPECT_TRUE(allocation->is32BitAllocation()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(MemoryAllocatorTest, givenNotEnoughSpaceInAllocatorWhenAskedFor32bitAllocationThenNullptrIsReturned) { size_t size = 0xfffff000; auto allocationFirst = memoryManager->allocate32BitGraphicsMemory(device->getRootDeviceIndex(), 0x5000, nullptr, AllocationType::BUFFER); auto allocation = memoryManager->allocate32BitGraphicsMemory(device->getRootDeviceIndex(), size, nullptr, AllocationType::BUFFER); EXPECT_EQ(nullptr, allocation); memoryManager->freeGraphicsMemory(allocationFirst); } TEST_F(MemoryAllocatorTest, givenNotEnoughSpaceInAllocatorWhenAskedFor32bitAllocationWithHostPtrThenNullptrIsReturned) { size_t size = 0xfffff000; void *ptr = (void *)0x10000; auto allocationFirst = memoryManager->allocate32BitGraphicsMemory(device->getRootDeviceIndex(), 0x5000, nullptr, AllocationType::BUFFER); auto allocation = memoryManager->allocate32BitGraphicsMemory(device->getRootDeviceIndex(), size, ptr, AllocationType::BUFFER); EXPECT_EQ(nullptr, allocation); memoryManager->freeGraphicsMemory(allocationFirst); } TEST_F(MemoryAllocatorTest, givenMemoryManagerWhenAskedFor32bitAllocationWithPtrThen32bitGraphicsAllocationWithGpuAddressIsReturned) { size_t size = 10; void *ptr = (void *)0x1000; auto allocation = memoryManager->allocate32BitGraphicsMemory(device->getRootDeviceIndex(), size, ptr, AllocationType::BUFFER); EXPECT_NE(nullptr, allocation); EXPECT_NE(nullptr, allocation->getUnderlyingBuffer()); EXPECT_EQ(size, allocation->getUnderlyingBufferSize()); EXPECT_TRUE(allocation->is32BitAllocation()); EXPECT_EQ(ptr, allocation->getUnderlyingBuffer()); EXPECT_NE(0u, allocation->getGpuAddress()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(MemoryAllocatorTest, givenAllocationWithFragmentsWhenCallingFreeGraphicsMemoryThenDoNotCallHandleFenceCompletion) { auto size = 3u * MemoryConstants::pageSize; auto *ptr = reinterpret_cast(0xbeef1); AllocationProperties properties{device->getRootDeviceIndex(), false, size, AllocationType::BUFFER, false, device->getDeviceBitfield()}; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties, ptr); EXPECT_EQ(3u, allocation->fragmentsStorage.fragmentCount); EXPECT_EQ(0u, memoryManager->handleFenceCompletionCalled); memoryManager->freeGraphicsMemory(allocation); EXPECT_EQ(0u, memoryManager->handleFenceCompletionCalled); } TEST_F(MemoryAllocatorTest, GivenShareableEnabledAndDisabledWhenAskedToCreateGrahicsAllocationThenValidAllocationIsReturned) { AllocationData allocationData; allocationData.type = AllocationType::BUFFER; allocationData.flags.shareable = 1u; auto shareableAllocation = memoryManager->allocateGraphicsMemory(allocationData); EXPECT_NE(nullptr, shareableAllocation); allocationData.flags.shareable = 0u; auto nonShareableAllocation = memoryManager->allocateGraphicsMemory(allocationData); EXPECT_NE(nullptr, nonShareableAllocation); memoryManager->freeGraphicsMemory(shareableAllocation); memoryManager->freeGraphicsMemory(nonShareableAllocation); } TEST_F(MemoryAllocatorTest, givenAllocationWithoutFragmentsWhenCallingFreeGraphicsMemoryThenCallHandleFenceCompletion) { auto allocation = memoryManager->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), MemoryConstants::pageSize, AllocationType::BUFFER, device->getDeviceBitfield()}); EXPECT_EQ(0u, allocation->fragmentsStorage.fragmentCount); EXPECT_EQ(0u, memoryManager->handleFenceCompletionCalled); memoryManager->freeGraphicsMemory(allocation); EXPECT_EQ(1u, memoryManager->handleFenceCompletionCalled); } class MockPrintfHandler : public PrintfHandler { public: static MockPrintfHandler *create(const MultiDispatchInfo &multiDispatchInfo, ClDevice &deviceArg) { return (MockPrintfHandler *)PrintfHandler::create(multiDispatchInfo, deviceArg); } }; TEST_F(MemoryAllocatorTest, givenStatelessKernelWithPrintfWhenPrintfSurfaceIsCreatedThenPrintfSurfaceIsPatchedWithBaseAddressOffset) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockKernelWithInternals kernel(*device); MockMultiDispatchInfo multiDispatchInfo(device.get(), kernel.mockKernel); kernel.kernelInfo.setBufferAddressingMode(KernelDescriptor::Stateless); kernel.kernelInfo.setPrintfSurface(sizeof(uintptr_t), 8); auto printfHandler = MockPrintfHandler::create(multiDispatchInfo, *device.get()); printfHandler->prepareDispatch(multiDispatchInfo); auto printfAllocation = printfHandler->getSurface(); auto allocationAddress = printfAllocation->getGpuAddressToPatch(); auto printfPatchAddress = ptrOffset(reinterpret_cast(kernel.mockKernel->getCrossThreadData()), kernel.mockKernel->getKernelInfo().kernelDescriptor.payloadMappings.implicitArgs.printfSurfaceAddress.stateless); EXPECT_EQ(allocationAddress, *(uintptr_t *)printfPatchAddress); delete printfHandler; } HWTEST_F(MemoryAllocatorTest, givenStatefulKernelWithPrintfWhenPrintfSurfaceIsCreatedThenPrintfSurfaceIsPatchedWithCpuAddress) { auto rootDeviceIndex = 1u; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get(), rootDeviceIndex)); MockKernelWithInternals kernel(*device); MockMultiDispatchInfo multiDispatchInfo(device.get(), kernel.mockKernel); kernel.kernelInfo.setPrintfSurface(sizeof(uintptr_t), 8, 16); auto printfHandler = MockPrintfHandler::create(multiDispatchInfo, *device.get()); printfHandler->prepareDispatch(multiDispatchInfo); auto printfAllocation = printfHandler->getSurface(); auto allocationAddress = printfAllocation->getGpuAddress(); EXPECT_NE(0u, kernel.mockKernel->getSurfaceStateHeapSize()); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(kernel.mockKernel->getSurfaceStateHeap(), kernel.mockKernel->getKernelInfo().kernelDescriptor.payloadMappings.implicitArgs.printfSurfaceAddress.bindful)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(allocationAddress, surfaceAddress); delete printfHandler; } TEST_F(MemoryAllocatorTest, given32BitDeviceWhenPrintfSurfaceIsCreatedThen32BitAllocationsIsMade) { DebugManagerStateRestore dbgRestorer; if constexpr (is64bit) { DebugManager.flags.Force32bitAddressing.set(true); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockKernelWithInternals kernel(*device); kernel.kernelInfo.setPrintfSurface(4, 0); for (int i = 0; i < 8; i++) { kernel.mockKernel->mockCrossThreadData[i] = 50; } MockMultiDispatchInfo multiDispatchInfo(device.get(), kernel.mockKernel); auto printfHandler = MockPrintfHandler::create(multiDispatchInfo, *device.get()); printfHandler->prepareDispatch(multiDispatchInfo); uint32_t *ptr32Bit = (uint32_t *)kernel.mockKernel->mockCrossThreadData.data(); auto printfAllocation = printfHandler->getSurface(); auto allocationAddress = printfAllocation->getGpuAddressToPatch(); uint32_t allocationAddress32bit = (uint32_t)(uintptr_t)allocationAddress; EXPECT_TRUE(printfAllocation->is32BitAllocation()); EXPECT_EQ(allocationAddress32bit, *ptr32Bit); for (int i = 4; i < 8; i++) { EXPECT_EQ(50, kernel.mockKernel->mockCrossThreadData[i]); } delete printfHandler; DebugManager.flags.Force32bitAddressing.set(false); } } HWTEST_F(MemoryAllocatorTest, givenSupportFor1MbAlignmentWhenAllocateGraphicsMemoryThenAlignmentIsSetCorrect) { class MockHwHelperHw : public HwHelperHw { public: using HwHelperHw::HwHelperHw; bool is1MbAlignmentSupported(const HardwareInfo &hwInfo, bool isCompressionEnabled) const override { return isEnable; } bool isEnable = false; }; auto raiiFactory = RAIIHwHelperFactory(defaultHwInfo->platform.eRenderCoreFamily); void *ptr = reinterpret_cast(0x1001); auto size = MemoryConstants::pageSize; raiiFactory.mockHwHelper.isEnable = true; auto osAgnosticMemoryManager = std::make_unique>(true, false, *executionEnvironment); osAgnosticMemoryManager->failInDevicePool = true; MockAllocationProperties properties(mockRootDeviceIndex, true, size, AllocationType::BUFFER, mockDeviceBitfield); properties.flags.preferCompressed = true; auto allocationWithEnabled1MbAlignment = osAgnosticMemoryManager->allocateGraphicsMemoryWithProperties(properties, ptr); ASSERT_NE(nullptr, allocationWithEnabled1MbAlignment); EXPECT_EQ(MemoryConstants::megaByte, osAgnosticMemoryManager->alignAllocationData.alignment); osAgnosticMemoryManager->freeGraphicsMemory(allocationWithEnabled1MbAlignment); raiiFactory.mockHwHelper.isEnable = false; auto allocationWithoutEnabled1MbAlignment = osAgnosticMemoryManager->allocateGraphicsMemoryWithProperties(properties, ptr); ASSERT_NE(nullptr, allocationWithoutEnabled1MbAlignment); EXPECT_NE(MemoryConstants::megaByte, osAgnosticMemoryManager->alignAllocationData.alignment); osAgnosticMemoryManager->freeGraphicsMemory(allocationWithoutEnabled1MbAlignment); } TEST(OsAgnosticMemoryManager, givenDefaultMemoryManagerWhenItIsCreatedThenItIsInitialized) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); OsAgnosticMemoryManager memoryManager(executionEnvironment); EXPECT_TRUE(memoryManager.isInitialized()); } TEST(OsAgnosticMemoryManager, givenDefaultAubUsageMemoryManagerWhenItIsCreatedThenItIsInitialized) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); OsAgnosticMemoryManager memoryManager(true, executionEnvironment); EXPECT_TRUE(memoryManager.isInitialized()); } TEST(OsAgnosticMemoryManager, givenDefaultMemoryManagerWhenItIsCreatedAndGfxPartitionInitIsFailedThenItIsNotInitialized) { class TestedOsAgnosticMemoryManager : public OsAgnosticMemoryManager { public: using OsAgnosticMemoryManager::gfxPartitions; using OsAgnosticMemoryManager::OsAgnosticMemoryManager; }; MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); TestedOsAgnosticMemoryManager memoryManager(executionEnvironment); EXPECT_TRUE(memoryManager.isInitialized()); auto failedInitGfxPartition = std::make_unique(); memoryManager.gfxPartitions[0].reset(failedInitGfxPartition.release()); memoryManager.initialize(false /*aubUsage*/); EXPECT_FALSE(memoryManager.isInitialized()); } TEST(OsAgnosticMemoryManager, givenDefaultMemoryManagerWhenItIsCreatedThenForce32BitAllocationsIsFalse) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); OsAgnosticMemoryManager memoryManager(executionEnvironment); EXPECT_FALSE(memoryManager.peekForce32BitAllocations()); } TEST(OsAgnosticMemoryManager, givenDefaultMemoryManagerWhenForce32bitallocationIsCalledWithTrueThenMemoryManagerForces32BitAlloactions) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); OsAgnosticMemoryManager memoryManager(executionEnvironment); memoryManager.setForce32BitAllocations(true); EXPECT_TRUE(memoryManager.peekForce32BitAllocations()); } TEST(OsAgnosticMemoryManager, givenMultipleRootDevicesWhenUpdateLatestContextIdForRootDeviceThenRootDeviceIndexToContextIdUpdateProperly) { class TestedOsAgnosticMemoryManager : public OsAgnosticMemoryManager { public: using OsAgnosticMemoryManager::latestContextId; using OsAgnosticMemoryManager::OsAgnosticMemoryManager; using OsAgnosticMemoryManager::rootDeviceIndexToContextId; using OsAgnosticMemoryManager::updateLatestContextIdForRootDevice; }; MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); TestedOsAgnosticMemoryManager memoryManager(executionEnvironment); uint32_t rootDeviceArray[] = {0, 1, 2, 3}; for (auto &rootDeviceIndex : rootDeviceArray) { for (int count = 0; count < 10; count++) { memoryManager.updateLatestContextIdForRootDevice(rootDeviceIndex); ++memoryManager.latestContextId; } } EXPECT_EQ(memoryManager.rootDeviceIndexToContextId[0], std::numeric_limits::max()); EXPECT_EQ(memoryManager.rootDeviceIndexToContextId[1], 9u); EXPECT_EQ(memoryManager.rootDeviceIndexToContextId[2], 19u); EXPECT_EQ(memoryManager.rootDeviceIndexToContextId[3], 29u); memoryManager.reInitLatestContextId(); for (int count = 0; count < 10; count++) { memoryManager.updateLatestContextIdForRootDevice(2); ++memoryManager.latestContextId; } EXPECT_EQ(memoryManager.rootDeviceIndexToContextId[2], 19u); } TEST(OsAgnosticMemoryManager, givenCreateOrReleaseDeviceSpecificMemResourcesWhenCreatingMemoryManagerObjectThenTheseMethodsAreEmpty) { class TestedOsAgnosticMemoryManager : public OsAgnosticMemoryManager { public: using OsAgnosticMemoryManager::OsAgnosticMemoryManager; }; MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); TestedOsAgnosticMemoryManager memoryManager(executionEnvironment); memoryManager.releaseDeviceSpecificMemResources(1); memoryManager.createDeviceSpecificMemResources(1); } class MyOsAgnosticMemoryManager : public OsAgnosticMemoryManager { public: bool peek32bit() override { return is32bit; } MyOsAgnosticMemoryManager(bool, ExecutionEnvironment &executionEnvironment) : OsAgnosticMemoryManager(false, executionEnvironment) {} MyOsAgnosticMemoryManager(ExecutionEnvironment &executionEnvironment) : MyOsAgnosticMemoryManager(false, executionEnvironment) {} bool is32bit = false; }; TEST(OsAgnosticMemoryManager, givenMemoryManagerWhenIsLimitedGPUIsCalledThenCorrectValueIsReturned) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MyOsAgnosticMemoryManager memoryManager(executionEnvironment); memoryManager.is32bit = false; EXPECT_FALSE(memoryManager.isLimitedGPU(mockRootDeviceIndex)); memoryManager.is32bit = true; if (executionEnvironment.rootDeviceEnvironments[mockRootDeviceIndex]->isFullRangeSvm()) { EXPECT_FALSE(memoryManager.isLimitedGPU(mockRootDeviceIndex)); } else { EXPECT_TRUE(memoryManager.isLimitedGPU(mockRootDeviceIndex)); } } TEST(OsAgnosticMemoryManager, givenMemoryManagerWhenIsLimitedGPUOnTypeIsCalledThenCorrectValueIsReturned) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MyOsAgnosticMemoryManager memoryManager(executionEnvironment); memoryManager.is32bit = false; EXPECT_FALSE(memoryManager.isLimitedGPUOnType(mockRootDeviceIndex, AllocationType::BUFFER)); EXPECT_FALSE(memoryManager.isLimitedGPUOnType(mockRootDeviceIndex, AllocationType::IMAGE)); EXPECT_FALSE(memoryManager.isLimitedGPUOnType(mockRootDeviceIndex, AllocationType::MAP_ALLOCATION)); memoryManager.is32bit = true; if (executionEnvironment.rootDeviceEnvironments[mockRootDeviceIndex]->isFullRangeSvm()) { EXPECT_FALSE(memoryManager.isLimitedGPUOnType(mockRootDeviceIndex, AllocationType::BUFFER)); EXPECT_FALSE(memoryManager.isLimitedGPUOnType(mockRootDeviceIndex, AllocationType::IMAGE)); EXPECT_FALSE(memoryManager.isLimitedGPUOnType(mockRootDeviceIndex, AllocationType::MAP_ALLOCATION)); } else { EXPECT_TRUE(memoryManager.isLimitedGPUOnType(mockRootDeviceIndex, AllocationType::BUFFER)); EXPECT_FALSE(memoryManager.isLimitedGPUOnType(mockRootDeviceIndex, AllocationType::IMAGE)); EXPECT_FALSE(memoryManager.isLimitedGPUOnType(mockRootDeviceIndex, AllocationType::MAP_ALLOCATION)); } } TEST(OsAgnosticMemoryManager, givenMemoryManagerWhenAskedFor32BitAllocationWhenLimitedAllocationIsEnabledThenGpuRangeFromExternalHeapIsAllocatiedAndBaseAddressIsSet) { if (is32bit) { GTEST_SKIP(); } ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); MockMemoryManager memoryManager(*executionEnvironment); memoryManager.setForce32BitAllocations(true); memoryManager.forceLimitedRangeAllocator(0, 0xFFFFFFFFF); AllocationData allocationData; memoryManager.getAllocationData(allocationData, {mockRootDeviceIndex, MemoryConstants::pageSize, AllocationType::BUFFER, mockDeviceBitfield}, nullptr, StorageInfo{}); auto gfxAllocation = memoryManager.allocateGraphicsMemoryWithAlignment(allocationData); ASSERT_NE(gfxAllocation, nullptr); EXPECT_NE(gfxAllocation->getGpuBaseAddress(), 0ull); EXPECT_EQ(gfxAllocation->getGpuBaseAddress(), memoryManager.getExternalHeapBaseAddress(gfxAllocation->getRootDeviceIndex(), gfxAllocation->isAllocatedInLocalMemoryPool())); memoryManager.freeGraphicsMemory(gfxAllocation); } TEST(OsAgnosticMemoryManager, givenMemoryManagerWhenAskedForNon32BitAllocationWhenLimitedAllocationIsEnabledThenGpuRangeFromiStandardHeapIsAllocatiedAndBaseAddressIsNotSet) { if (is32bit) { GTEST_SKIP(); } ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); MockMemoryManager memoryManager(*executionEnvironment); memoryManager.forceLimitedRangeAllocator(0, 0xFFFFFFFFF); AllocationData allocationData; memoryManager.getAllocationData(allocationData, {mockRootDeviceIndex, MemoryConstants::pageSize, AllocationType::BUFFER, mockDeviceBitfield}, nullptr, StorageInfo{}); auto gfxAllocation = memoryManager.allocateGraphicsMemoryWithAlignment(allocationData); ASSERT_NE(gfxAllocation, nullptr); EXPECT_EQ(gfxAllocation->getGpuBaseAddress(), 0ull); EXPECT_EQ(gfxAllocation->getGpuAddress(), memoryManager.getGfxPartition(allocationData.rootDeviceIndex)->getHeapLimit(HeapIndex::HEAP_STANDARD) + 1 - GfxPartition::heapGranularity - MemoryConstants::pageSize); memoryManager.freeGraphicsMemory(gfxAllocation); } TEST(OsAgnosticMemoryManager, givenDefaultMemoryManagerWhenAllocateGraphicsMemoryForImageIsCalledThenGraphicsAllocationIsReturned) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); MockMemoryManager memoryManager(*executionEnvironment); ImageDescriptor imgDesc = {}; imgDesc.imageWidth = 512; imgDesc.imageHeight = 1; imgDesc.imageType = ImageType::Image2D; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); AllocationData allocationData; allocationData.imgInfo = &imgInfo; auto imageAllocation = memoryManager.allocateGraphicsMemoryForImage(allocationData); ASSERT_NE(nullptr, imageAllocation); EXPECT_TRUE(imageAllocation->getDefaultGmm()->resourceParams.Usage == GMM_RESOURCE_USAGE_TYPE::GMM_RESOURCE_USAGE_OCL_IMAGE); EXPECT_EQ(executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo()->featureTable.flags.ftrLocalMemory, imageAllocation->getDefaultGmm()->resourceParams.Flags.Info.NonLocalOnly); memoryManager.freeGraphicsMemory(imageAllocation); } TEST(OsAgnosticMemoryManager, givenDestroyedTagAllocationWhenWaitForCompletiionThenWaitForTaskCountIsNotCalled) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); auto memoryManager = new OsAgnosticMemoryManager(executionEnvironment); DeviceBitfield deviceBitfield(1); std::unique_ptr csr(createCommandStream(executionEnvironment, 0u, deviceBitfield)); executionEnvironment.memoryManager.reset(memoryManager); auto osContext = memoryManager->createAndRegisterOsContext(csr.get(), EngineDescriptorHelper::getDefaultDescriptor(HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo)[0], PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo))); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{0, MemoryConstants::pageSize}); allocation->updateTaskCount(10, osContext->getContextId()); EXPECT_GT(allocation->getTaskCount(osContext->getContextId()), csr->peekTaskCount()); memoryManager->waitForEnginesCompletion(*allocation); memoryManager->freeGraphicsMemory(allocation); } TEST(OsAgnosticMemoryManager, givenEnabledLocalMemoryWhenAllocateGraphicsMemoryForImageIsCalledThenUseLocalMemoryIsNotSet) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); MockMemoryManager memoryManager(false, true, *executionEnvironment); ImageDescriptor imgDesc = {}; imgDesc.imageWidth = 1; imgDesc.imageHeight = 1; imgDesc.imageType = ImageType::Image2D; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); AllocationData allocationData; allocationData.imgInfo = &imgInfo; auto imageAllocation = memoryManager.allocateGraphicsMemoryForImage(allocationData); ASSERT_NE(nullptr, imageAllocation); EXPECT_FALSE(imgInfo.useLocalMemory); memoryManager.freeGraphicsMemory(imageAllocation); } TEST(OsAgnosticMemoryManager, givenHostPointerNotRequiringCopyWhenAllocateGraphicsMemoryForImageFromHostPtrIsCalledThenGraphicsAllocationIsReturned) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); MockMemoryManager memoryManager(false, false, *executionEnvironment); ImageDescriptor imgDesc = {}; imgDesc.imageWidth = 4; imgDesc.imageHeight = 1; imgDesc.imageType = ImageType::Image1D; cl_image_format imageFormat = {}; imageFormat.image_channel_data_type = CL_UNSIGNED_INT8; imageFormat.image_channel_order = CL_RGBA; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, defaultHwInfo->capabilityTable.supportsOcl21Features)->surfaceFormat; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, &surfaceFormat); imgInfo.rowPitch = imgDesc.imageWidth * 4; imgInfo.slicePitch = imgInfo.rowPitch * imgDesc.imageHeight; imgInfo.size = imgInfo.slicePitch; imgInfo.linearStorage = true; auto hostPtr = alignedMalloc(imgDesc.imageWidth * imgDesc.imageHeight * 4, MemoryConstants::pageSize); bool copyRequired = MockMemoryManager::isCopyRequired(imgInfo, hostPtr); EXPECT_FALSE(copyRequired); AllocationData allocationData; allocationData.imgInfo = &imgInfo; allocationData.hostPtr = hostPtr; allocationData.size = imgInfo.size; auto imageAllocation = memoryManager.allocateGraphicsMemoryForImageFromHostPtr(allocationData); ASSERT_NE(nullptr, imageAllocation); EXPECT_EQ(hostPtr, imageAllocation->getUnderlyingBuffer()); memoryManager.freeGraphicsMemory(imageAllocation); alignedFree(hostPtr); } TEST(OsAgnosticMemoryManager, givenHostPointerRequiringCopyWhenAllocateGraphicsMemoryForImageFromHostPtrIsCalledThenNullptrIsReturned) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); MockMemoryManager memoryManager(false, false, *executionEnvironment); ImageDescriptor imgDesc = {}; imgDesc.imageHeight = 4; imgDesc.imageWidth = 4; imgDesc.imageType = ImageType::Image2D; cl_image_format imageFormat = {}; imageFormat.image_channel_data_type = CL_UNSIGNED_INT8; imageFormat.image_channel_order = CL_RGBA; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, defaultHwInfo->capabilityTable.supportsOcl21Features); auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, &surfaceFormat->surfaceFormat); imgInfo.rowPitch = imgDesc.imageWidth * 4; imgInfo.slicePitch = imgInfo.rowPitch * imgDesc.imageHeight; imgInfo.size = imgInfo.slicePitch; auto hostPtr = alignedMalloc(imgDesc.imageWidth * imgDesc.imageHeight * 4, MemoryConstants::pageSize); bool copyRequired = MockMemoryManager::isCopyRequired(imgInfo, hostPtr); EXPECT_TRUE(copyRequired); AllocationData allocationData; allocationData.imgInfo = &imgInfo; allocationData.hostPtr = hostPtr; auto imageAllocation = memoryManager.allocateGraphicsMemoryForImageFromHostPtr(allocationData); EXPECT_EQ(nullptr, imageAllocation); alignedFree(hostPtr); } TEST(OsAgnosticMemoryManager, givenEnabledCrossRootDeviceAccessFlagWhenAllocateGraphicsMemoryForImageFromHostPtrIsCalledThenGraphicsAllocationIsReturned) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); MockMemoryManager memoryManager(false, false, *executionEnvironment); ImageDescriptor imgDesc = {}; imgDesc.imageWidth = 4; imgDesc.imageHeight = 1; imgDesc.imageType = ImageType::Image1D; cl_image_format imageFormat = {}; imageFormat.image_channel_data_type = CL_UNSIGNED_INT8; imageFormat.image_channel_order = CL_RGBA; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, defaultHwInfo->capabilityTable.supportsOcl21Features); auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, &surfaceFormat->surfaceFormat); imgInfo.rowPitch = imgDesc.imageWidth * 4; imgInfo.slicePitch = imgInfo.rowPitch * imgDesc.imageHeight; imgInfo.size = imgInfo.slicePitch; imgInfo.linearStorage = true; auto hostPtr = alignedMalloc(imgDesc.imageWidth * imgDesc.imageHeight * 4, MemoryConstants::pageSize); AllocationData allocationData; allocationData.imgInfo = &imgInfo; allocationData.hostPtr = hostPtr; allocationData.size = imgInfo.size; allocationData.flags.crossRootDeviceAccess = true; auto imageAllocation = memoryManager.allocateGraphicsMemoryForImageFromHostPtr(allocationData); ASSERT_NE(nullptr, imageAllocation); EXPECT_EQ(hostPtr, imageAllocation->getUnderlyingBuffer()); memoryManager.freeGraphicsMemory(imageAllocation); alignedFree(hostPtr); } TEST(OsAgnosticMemoryManager, givenDefaultMemoryManagerAndUnifiedAuxCapableAllocationWhenMappingThenReturnFalse) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); executionEnvironment.initGmm(); OsAgnosticMemoryManager memoryManager(executionEnvironment); auto gmm = new Gmm(executionEnvironment.rootDeviceEnvironments[0]->getGmmClientContext(), nullptr, 123, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true); auto allocation = memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{0, MemoryConstants::pageSize}); allocation->setDefaultGmm(gmm); auto mockGmmRes = reinterpret_cast(gmm->gmmResourceInfo.get()); mockGmmRes->setUnifiedAuxTranslationCapable(); EXPECT_FALSE(memoryManager.mapAuxGpuVA(allocation)); memoryManager.freeGraphicsMemory(allocation); } TEST(OsAgnosticMemoryManager, givenMemoryManagerWhenAllocateGraphicsMemoryIsCalledThenMemoryPoolIsSystem4KBPages) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MemoryManagerCreate memoryManager(false, false, executionEnvironment); if (memoryManager.isLimitedGPU(0)) { GTEST_SKIP(); } auto size = 4096u; auto allocation = memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{0, size}); EXPECT_NE(nullptr, allocation); EXPECT_EQ(MemoryPool::System4KBPages, allocation->getMemoryPool()); memoryManager.freeGraphicsMemory(allocation); } TEST(OsAgnosticMemoryManager, givenCompressionEnabledWhenAllocateGraphicsMemoryWithAlignmentIsCalledThenGmmIsAllocated) { DebugManagerStateRestore dbgRestore; DebugManager.flags.RenderCompressedBuffersEnabled.set(true); MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); executionEnvironment.initGmm(); MockMemoryManager memoryManager(true, false, executionEnvironment); AllocationData allocationData; allocationData.size = 4096u; allocationData.alignment = MemoryConstants::pageSize; allocationData.flags.preferCompressed = true; auto allocation = memoryManager.allocateGraphicsMemoryWithAlignment(allocationData); EXPECT_NE(nullptr, allocation); EXPECT_EQ(MemoryPool::System4KBPages, allocation->getMemoryPool()); EXPECT_NE(nullptr, allocation->getDefaultGmm()); EXPECT_EQ(true, allocation->getDefaultGmm()->isCompressionEnabled); EXPECT_EQ(MemoryConstants::pageSize, allocation->getDefaultGmm()->resourceParams.BaseAlignment); memoryManager.freeGraphicsMemory(allocation); } TEST(OsAgnosticMemoryManager, givenMemoryManagerWith64KBPagesEnabledWhenAllocateGraphicsMemory64kbIsCalledThenMemoryPoolIsSystem64KBPages) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); executionEnvironment.initGmm(); MockMemoryManager memoryManager(true, false, executionEnvironment); AllocationData allocationData; allocationData.size = 4096u; allocationData.alignment = MemoryConstants::pageSize; auto allocation = memoryManager.allocateGraphicsMemory64kb(allocationData); EXPECT_NE(nullptr, allocation); EXPECT_EQ(MemoryPool::System64KBPages, allocation->getMemoryPool()); EXPECT_EQ(MemoryConstants::pageSize64k, allocation->getDefaultGmm()->resourceParams.BaseAlignment); memoryManager.freeGraphicsMemory(allocation); } TEST(OsAgnosticMemoryManager, givenMemoryManagerWith64KBPagesEnabledAndCompressionEnabledWhenAllocateGraphicsMemory64kbIsCalledThenMemoryPoolIsSystem64KBPages) { DebugManagerStateRestore dbgRestore; DebugManager.flags.RenderCompressedBuffersEnabled.set(true); MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); executionEnvironment.initGmm(); MockMemoryManager memoryManager(true, false, executionEnvironment); AllocationData allocationData; allocationData.size = 4096u; allocationData.alignment = MemoryConstants::pageSize; allocationData.flags.preferCompressed = true; auto allocation = memoryManager.allocateGraphicsMemory64kb(allocationData); EXPECT_NE(nullptr, allocation); EXPECT_EQ(MemoryPool::System64KBPages, allocation->getMemoryPool()); EXPECT_GT(allocation->getDefaultGmm()->resourceParams.BaseAlignment, MemoryConstants::pageSize); memoryManager.freeGraphicsMemory(allocation); } TEST(OsAgnosticMemoryManager, givenMemoryManagerWith64KBPagesEnabledWhenAllocateGraphicsMemoryFailsThenNullptrIsReturned) { class MockOsAgnosticManagerWithFailingAllocate : public MemoryManagerCreate { public: using OsAgnosticMemoryManager::allocateGraphicsMemory64kb; MockOsAgnosticManagerWithFailingAllocate(bool enable64kbPages, ExecutionEnvironment &executionEnvironment) : MemoryManagerCreate(enable64kbPages, false, executionEnvironment) {} GraphicsAllocation *allocateGraphicsMemoryWithAlignment(const AllocationData &allocationData) override { return nullptr; } }; MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockOsAgnosticManagerWithFailingAllocate memoryManager(true, executionEnvironment); AllocationData allocationData; allocationData.size = 4096u; auto allocation = memoryManager.allocateGraphicsMemory64kb(allocationData); EXPECT_EQ(nullptr, allocation); memoryManager.freeGraphicsMemory(allocation); } TEST(OsAgnosticMemoryManager, givenMemoryManagerWhenAllocateGraphicsMemoryWithPtrIsCalledThenMemoryPoolIsSystem4KBPages) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MemoryManagerCreate memoryManager(false, false, executionEnvironment); if (memoryManager.isLimitedGPU(0)) { GTEST_SKIP(); } void *ptr = reinterpret_cast(0x1001); auto size = MemoryConstants::pageSize; auto allocation = memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{mockRootDeviceIndex, false, size, mockDeviceBitfield}, ptr); ASSERT_NE(nullptr, allocation); EXPECT_EQ(MemoryPool::System4KBPages, allocation->getMemoryPool()); memoryManager.freeGraphicsMemory(allocation); } TEST(OsAgnosticMemoryManager, givenMemoryManagerWhenAllocate32BitGraphicsMemoryWithPtrIsCalledThenMemoryPoolIsSystem4KBPagesWith32BitGpuAddressing) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); executionEnvironment.initGmm(); MockMemoryManager memoryManager(false, false, executionEnvironment); void *ptr = reinterpret_cast(0x1001); auto size = MemoryConstants::pageSize; auto allocation = memoryManager.allocate32BitGraphicsMemory(mockRootDeviceIndex, size, ptr, AllocationType::BUFFER); ASSERT_NE(nullptr, allocation); EXPECT_EQ(MemoryPool::System4KBPagesWith32BitGpuAddressing, allocation->getMemoryPool()); memoryManager.freeGraphicsMemory(allocation); } TEST(OsAgnosticMemoryManager, givenMemoryManagerWhenAllocate32BitGraphicsMemoryWithoutPtrIsCalledThenMemoryPoolIsSystem4KBPagesWith32BitGpuAddressing) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, false, executionEnvironment); void *ptr = nullptr; auto size = MemoryConstants::pageSize; auto allocation = memoryManager.allocate32BitGraphicsMemory(mockRootDeviceIndex, size, ptr, AllocationType::BUFFER); ASSERT_NE(nullptr, allocation); EXPECT_EQ(MemoryPool::System4KBPagesWith32BitGpuAddressing, allocation->getMemoryPool()); memoryManager.freeGraphicsMemory(allocation); } TEST(OsAgnosticMemoryManager, givenMemoryManagerWith64KBPagesEnabledWhenAllocateGraphicsMemoryThenMemoryPoolIsSystem64KBPages) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); executionEnvironment.initGmm(); MemoryManagerCreate memoryManager(true, false, executionEnvironment); if (memoryManager.isLimitedGPU(0)) { GTEST_SKIP(); } auto svmAllocation = memoryManager.allocateGraphicsMemoryWithProperties({mockRootDeviceIndex, MemoryConstants::pageSize, AllocationType::SVM_ZERO_COPY, mockDeviceBitfield}); EXPECT_NE(nullptr, svmAllocation); EXPECT_EQ(MemoryPool::System64KBPages, svmAllocation->getMemoryPool()); memoryManager.freeGraphicsMemory(svmAllocation); } TEST(OsAgnosticMemoryManager, givenMemoryManagerWith64KBPagesDisabledWhenAllocateGraphicsMemoryThen4KBGraphicsAllocationIsReturned) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); executionEnvironment.initGmm(); MemoryManagerCreate memoryManager(false, false, executionEnvironment); if (memoryManager.isLimitedGPU(0)) { GTEST_SKIP(); } auto svmAllocation = memoryManager.allocateGraphicsMemoryWithProperties({mockRootDeviceIndex, MemoryConstants::pageSize, AllocationType::SVM_ZERO_COPY, mockDeviceBitfield}); EXPECT_EQ(MemoryPool::System4KBPages, svmAllocation->getMemoryPool()); memoryManager.freeGraphicsMemory(svmAllocation); } TEST(OsAgnosticMemoryManager, givenDefaultMemoryManagerWhenCreateGraphicsAllocationFromSharedObjectIsCalledThenGraphicsAllocationIsReturned) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MemoryManagerCreate memoryManager(false, false, executionEnvironment); osHandle handle = 1; auto size = 4096u; AllocationProperties properties(mockRootDeviceIndex, false, size, AllocationType::SHARED_BUFFER, false, mockDeviceBitfield); auto sharedAllocation = memoryManager.createGraphicsAllocationFromSharedHandle(handle, properties, false, false); EXPECT_NE(nullptr, sharedAllocation); EXPECT_FALSE(sharedAllocation->isCoherent()); EXPECT_NE(nullptr, sharedAllocation->getUnderlyingBuffer()); EXPECT_EQ(size, sharedAllocation->getUnderlyingBufferSize()); EXPECT_EQ(MemoryPool::SystemCpuInaccessible, sharedAllocation->getMemoryPool()); memoryManager.freeGraphicsMemory(sharedAllocation); } TEST(OsAgnosticMemoryManager, givenDefaultMemoryManagerWhenCreateGraphicsAllocationFromSharedObjectIsCalledAndRootDeviceIndexIsSpecifiedThenGraphicsAllocationIsReturnedWithCorrectRootDeviceIndex) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MemoryManagerCreate memoryManager(false, false, executionEnvironment); osHandle handle = 1; auto size = 4096u; AllocationProperties properties(mockRootDeviceIndex, false, size, AllocationType::SHARED_BUFFER, false, false, mockDeviceBitfield); EXPECT_EQ(properties.subDevicesBitfield, mockDeviceBitfield); EXPECT_EQ(properties.rootDeviceIndex, 0u); auto sharedAllocation = memoryManager.createGraphicsAllocationFromSharedHandle(handle, properties, false, false); EXPECT_NE(nullptr, sharedAllocation); EXPECT_EQ(0u, sharedAllocation->getRootDeviceIndex()); EXPECT_FALSE(sharedAllocation->isCoherent()); EXPECT_NE(nullptr, sharedAllocation->getUnderlyingBuffer()); EXPECT_EQ(size, sharedAllocation->getUnderlyingBufferSize()); EXPECT_EQ(MemoryPool::SystemCpuInaccessible, sharedAllocation->getMemoryPool()); memoryManager.freeGraphicsMemory(sharedAllocation); } TEST(OsAgnosticMemoryManager, givenDefaultMemoryManagerWhenCreateGraphicsAllocationFromSharedObjectIsCalledWithSpecificBitnessThen32BitGraphicsAllocationIsReturned) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MemoryManagerCreate memoryManager(false, false, executionEnvironment); osHandle handle = 1; auto size = 4096u; AllocationProperties properties(mockRootDeviceIndex, false, size, AllocationType::SHARED_BUFFER, false, mockDeviceBitfield); auto sharedAllocation = memoryManager.createGraphicsAllocationFromSharedHandle(handle, properties, true, false); EXPECT_NE(nullptr, sharedAllocation); EXPECT_TRUE(sharedAllocation->is32BitAllocation()); EXPECT_FALSE(sharedAllocation->isCoherent()); EXPECT_NE(nullptr, sharedAllocation->getUnderlyingBuffer()); EXPECT_EQ(size, sharedAllocation->getUnderlyingBufferSize()); EXPECT_EQ(MemoryPool::SystemCpuInaccessible, sharedAllocation->getMemoryPool()); memoryManager.freeGraphicsMemory(sharedAllocation); } TEST(OsAgnosticMemoryManager, givenMemoryManagerWhenCreateAllocationFromNtHandleIsCalledThenReturnNullptr) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); OsAgnosticMemoryManager memoryManager(executionEnvironment); auto graphicsAllocation = memoryManager.createGraphicsAllocationFromNTHandle((void *)1, 0, AllocationType::SHARED_IMAGE); EXPECT_EQ(nullptr, graphicsAllocation); } TEST(OsAgnosticMemoryManager, givenMemoryManagerWhenLockUnlockCalledThenReturnCpuPtr) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); OsAgnosticMemoryManager memoryManager(executionEnvironment); auto allocation = memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{0, MemoryConstants::pageSize}); ASSERT_NE(nullptr, allocation); EXPECT_FALSE(allocation->isLocked()); auto ptr = memoryManager.lockResource(allocation); EXPECT_EQ(ptrOffset(allocation->getUnderlyingBuffer(), static_cast(allocation->getAllocationOffset())), ptr); EXPECT_TRUE(allocation->isLocked()); memoryManager.unlockResource(allocation); EXPECT_FALSE(allocation->isLocked()); memoryManager.freeGraphicsMemory(allocation); } TEST(OsAgnosticMemoryManager, givenDefaultMemoryManagerWhenGraphicsAllocationContainsOffsetWhenAddressIsObtainedThenOffsetIsAdded) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MemoryManagerCreate memoryManager(false, false, executionEnvironment); auto graphicsAllocation = memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{0, MemoryConstants::pageSize}); auto graphicsAddress = graphicsAllocation->getGpuAddress(); auto graphicsAddressToPatch = graphicsAllocation->getGpuAddressToPatch(); graphicsAllocation->setAllocationOffset(4); auto offsetedGraphicsAddress = graphicsAllocation->getGpuAddress(); auto offsetedGraphicsAddressToPatch = graphicsAllocation->getGpuAddressToPatch(); EXPECT_EQ(offsetedGraphicsAddress, graphicsAddress + graphicsAllocation->getAllocationOffset()); EXPECT_EQ(offsetedGraphicsAddressToPatch, graphicsAddressToPatch + graphicsAllocation->getAllocationOffset()); memoryManager.freeGraphicsMemory(graphicsAllocation); } TEST(OsAgnosticMemoryManager, givenDefaultMemoryManagerWhenGraphicsAllocationIsPaddedThenNewGraphicsAllocationIsCreated) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MemoryManagerCreate memoryManager(false, false, executionEnvironment); auto graphicsAllocation = memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{0, MemoryConstants::pageSize}); auto sizeWithPadding = 8192; auto paddedGraphicsAllocation = memoryManager.createGraphicsAllocationWithPadding(graphicsAllocation, sizeWithPadding); ASSERT_NE(nullptr, paddedGraphicsAllocation); EXPECT_NE(paddedGraphicsAllocation, graphicsAllocation); memoryManager.freeGraphicsMemory(paddedGraphicsAllocation); memoryManager.freeGraphicsMemory(graphicsAllocation); } TEST(OsAgnosticMemoryManager, WhenPointerIsCreatedThenLeakIsDetected) { void *ptr = new int[10]; EXPECT_NE(nullptr, ptr); MemoryManagement::fastLeaksDetectionMode = MemoryManagement::LeakDetectionMode::EXPECT_TO_LEAK; } TEST(OsAgnosticMemoryManager, givenMemoryManagerWhenAllocateMemoryWithNoAlignmentProvidedThenAllocationIsAlignedToPageSize) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MemoryManagerCreate memoryManager(false, false, executionEnvironment); MockAllocationProperties properties(0, MemoryConstants::pageSize >> 1); properties.alignment = 0; auto ga = memoryManager.allocateGraphicsMemoryWithProperties(properties); uintptr_t ptr = reinterpret_cast(ga->getUnderlyingBuffer()); ptr &= (MemoryConstants::allocationAlignment - 1); EXPECT_EQ(ptr, 0u); memoryManager.freeGraphicsMemory(ga); } TEST(OsAgnosticMemoryManager, givenMemoryManagerWhenAllocateMemoryWithAlignmentNotAlignedToPageSizeThenAlignmentIsAlignedUp) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MemoryManagerCreate memoryManager(false, false, executionEnvironment); MockAllocationProperties properties(0, MemoryConstants::pageSize >> 1); properties.alignment = MemoryConstants::pageSize - 1; auto ga = memoryManager.allocateGraphicsMemoryWithProperties(properties); uintptr_t ptr = reinterpret_cast(ga->getUnderlyingBuffer()); ptr &= (MemoryConstants::allocationAlignment - 1); EXPECT_EQ(ptr, 0u); memoryManager.freeGraphicsMemory(ga); } TEST(OsAgnosticMemoryManager, givenCommonMemoryManagerWhenIsAskedIfApplicationMemoryBudgetIsExhaustedThenFalseIsReturned) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); OsAgnosticMemoryManager memoryManager(executionEnvironment); EXPECT_FALSE(memoryManager.isMemoryBudgetExhausted()); } TEST(OsAgnosticMemoryManager, givenDebugModuleAreaTypeWhenCreatingAllocationThen32BitAllocationWithFrontWindowGpuVaIsReturned) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MemoryManagerCreate memoryManager(false, false, executionEnvironment); const auto size = MemoryConstants::pageSize64k; NEO::AllocationProperties properties{0, true, size, NEO::AllocationType::DEBUG_MODULE_AREA, false, mockDeviceBitfield}; auto moduleDebugArea = memoryManager.allocateGraphicsMemoryWithProperties(properties); EXPECT_NE(nullptr, moduleDebugArea); EXPECT_NE(nullptr, moduleDebugArea->getUnderlyingBuffer()); EXPECT_GE(moduleDebugArea->getUnderlyingBufferSize(), size); auto address64bit = moduleDebugArea->getGpuAddressToPatch(); EXPECT_LT(address64bit, MemoryConstants::max32BitAddress); EXPECT_TRUE(moduleDebugArea->is32BitAllocation()); auto frontWindowBase = GmmHelper::canonize(memoryManager.getGfxPartition(moduleDebugArea->getRootDeviceIndex())->getHeapBase(memoryManager.selectInternalHeap(moduleDebugArea->isAllocatedInLocalMemoryPool()))); EXPECT_EQ(frontWindowBase, moduleDebugArea->getGpuBaseAddress()); EXPECT_EQ(frontWindowBase, moduleDebugArea->getGpuAddress()); memoryManager.freeGraphicsMemory(moduleDebugArea); } TEST(OsAgnosticMemoryManager, givenLocalMemoryAndDebugModuleAreaTypeWhenCreatingAllocationThen32BitAllocationWithFrontWindowGpuVaIsReturned) { auto hwInfo = *defaultHwInfo; hwInfo.featureTable.flags.ftrLocalMemory = true; DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableLocalMemory.set(true); // Ensure family supports local memory if (!HwHelper::get(hwInfo.platform.eRenderCoreFamily).isLocalMemoryEnabled(hwInfo)) { GTEST_SKIP(); } MockExecutionEnvironment executionEnvironment(&hwInfo); MemoryManagerCreate memoryManager(false, true, executionEnvironment); const auto size = MemoryConstants::pageSize64k; NEO::AllocationProperties properties{0, true, size, NEO::AllocationType::DEBUG_MODULE_AREA, false, mockDeviceBitfield}; auto moduleDebugArea = memoryManager.allocateGraphicsMemoryWithProperties(properties); EXPECT_NE(nullptr, moduleDebugArea); EXPECT_NE(nullptr, moduleDebugArea->getUnderlyingBuffer()); EXPECT_GE(moduleDebugArea->getUnderlyingBufferSize(), size); auto address64bit = moduleDebugArea->getGpuAddressToPatch(); EXPECT_LT(address64bit, MemoryConstants::max32BitAddress); EXPECT_TRUE(moduleDebugArea->is32BitAllocation()); auto frontWindowBase = GmmHelper::canonize(memoryManager.getGfxPartition(moduleDebugArea->getRootDeviceIndex())->getHeapBase(memoryManager.selectInternalHeap(moduleDebugArea->isAllocatedInLocalMemoryPool()))); EXPECT_EQ(frontWindowBase, moduleDebugArea->getGpuBaseAddress()); EXPECT_EQ(frontWindowBase, moduleDebugArea->getGpuAddress()); memoryManager.freeGraphicsMemory(moduleDebugArea); } TEST(OsAgnosticMemoryManager, givenEnabledLocalMemoryWhenAllocatingGraphicsMemoryForIsaInSystemMemoryThenBaseAddressIsEqualToInternalHeapBaseAddress) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); auto hwInfo = executionEnvironment->rootDeviceEnvironments[0]->getMutableHardwareInfo(); hwInfo->featureTable.flags.ftrLocalMemory = true; MockMemoryManager memoryManager(false, true, *executionEnvironment); AllocationData allocationData; allocationData.type = AllocationType::KERNEL_ISA; allocationData.flags.useSystemMemory = 1; allocationData.size = 4096; auto allocation = memoryManager.allocateGraphicsMemory(allocationData); ASSERT_NE(nullptr, allocation); auto instructionHeapBaseAddress = memoryManager.getInternalHeapBaseAddress(0, false); EXPECT_EQ(instructionHeapBaseAddress, GmmHelper::decanonize(allocation->getGpuBaseAddress())); memoryManager.freeGraphicsMemory(allocation); } TEST(OsAgnosticMemoryManager, givenForcedSystemMemoryForIsaAndEnabledLocalMemoryWhenAllocatingGraphicsMemoryThenBaseAddressIsEqualToInternalHeapBaseAddress) { DebugManagerStateRestore dbgRestore; DebugManager.flags.ForceSystemMemoryPlacement.set(1 << (static_cast(AllocationType::KERNEL_ISA) - 1)); ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); auto hwInfo = executionEnvironment->rootDeviceEnvironments[0]->getMutableHardwareInfo(); hwInfo->featureTable.flags.ftrLocalMemory = true; MockMemoryManager memoryManager(false, true, *executionEnvironment); size_t kernelIsaSize = 4096; auto allocation = memoryManager.allocateGraphicsMemoryWithProperties({0, kernelIsaSize, AllocationType::KERNEL_ISA, 1}); ASSERT_NE(nullptr, allocation); auto instructionHeapBaseAddress = memoryManager.getInternalHeapBaseAddress(0, false); EXPECT_EQ(instructionHeapBaseAddress, GmmHelper::decanonize(allocation->getGpuBaseAddress())); memoryManager.freeGraphicsMemory(allocation); } class MemoryManagerWithAsyncDeleterTest : public ::testing::Test { public: MemoryManagerWithAsyncDeleterTest() : memoryManager(false, false){}; void SetUp() override { memoryManager.overrideAsyncDeleterFlag(true); } const uint32_t rootDeviceIndex = 0u; MockMemoryManager memoryManager; }; TEST_F(MemoryManagerWithAsyncDeleterTest, givenMemoryManagerWhenWaitForDeletionsIsCalledThenDeferredDeleterIsNullptr) { auto deleter = new MockDeferredDeleter(); memoryManager.setDeferredDeleter(deleter); deleter->expectDrainBlockingValue(false); EXPECT_EQ(deleter, memoryManager.getDeferredDeleter()); EXPECT_EQ(0, deleter->drainCalled); memoryManager.waitForDeletions(); EXPECT_EQ(nullptr, memoryManager.getDeferredDeleter()); } TEST_F(MemoryManagerWithAsyncDeleterTest, givenMemoryManagerWhenWaitForDeletionsIsCalledTwiceThenItDoesntCrash) { EXPECT_NE(nullptr, memoryManager.getDeferredDeleter()); memoryManager.waitForDeletions(); EXPECT_EQ(nullptr, memoryManager.getDeferredDeleter()); memoryManager.waitForDeletions(); EXPECT_EQ(nullptr, memoryManager.getDeferredDeleter()); } TEST_F(MemoryManagerWithAsyncDeleterTest, givenMemoryManagerWhenAllocateGraphicsMemoryIsCalledWithPtrAndDeleterIsNotNullptrThenDeletersQueueIsReleased) { MockDeferredDeleter *deleter = new MockDeferredDeleter(); memoryManager.setDeferredDeleter(deleter); EXPECT_NE(nullptr, memoryManager.getDeferredDeleter()); auto deletion = new MockDeferrableDeletion(); deleter->DeferredDeleter::deferDeletion(deletion); EXPECT_FALSE(deleter->isQueueEmpty()); char ptr[128]; EXPECT_EQ(0, deleter->drainCalled); deleter->expectDrainBlockingValue(true); auto allocation = memoryManager.MemoryManager::allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, false, sizeof(char)}, ptr); EXPECT_TRUE(deleter->isQueueEmpty()); memoryManager.freeGraphicsMemoryImpl(allocation); } TEST_F(MemoryManagerWithAsyncDeleterTest, givenMemoryManagerWhenAllocateGraphicsMemoryIsCalledWithPtrAndDeleterIsNullptrThenItDoesntCrash) { memoryManager.setDeferredDeleter(nullptr); EXPECT_EQ(nullptr, memoryManager.getDeferredDeleter()); char ptr[128]; auto allocation = memoryManager.MemoryManager::allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, false, sizeof(char)}, ptr); memoryManager.freeGraphicsMemoryImpl(allocation); } TEST(OsAgnosticMemoryManager, givenMemoryManagerWhenIsAsyncDeleterEnabledCalledThenReturnsValueOfFlag) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(executionEnvironment); memoryManager.overrideAsyncDeleterFlag(false); EXPECT_FALSE(memoryManager.isAsyncDeleterEnabled()); memoryManager.overrideAsyncDeleterFlag(true); EXPECT_TRUE(memoryManager.isAsyncDeleterEnabled()); } TEST(OsAgnosticMemoryManager, givenDefaultMemoryManagerWhenItIsCreatedThenAsyncDeleterEnabledIsFalse) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); OsAgnosticMemoryManager memoryManager(executionEnvironment); EXPECT_FALSE(memoryManager.isAsyncDeleterEnabled()); EXPECT_EQ(nullptr, memoryManager.getDeferredDeleter()); } TEST(OsAgnosticMemoryManager, givenEnabledAsyncDeleterFlagWhenMemoryManagerIsCreatedThenAsyncDeleterEnabledIsFalseAndDeleterIsNullptr) { bool defaultEnableDeferredDeleterFlag = DebugManager.flags.EnableDeferredDeleter.get(); DebugManager.flags.EnableDeferredDeleter.set(true); MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); OsAgnosticMemoryManager memoryManager(executionEnvironment); EXPECT_FALSE(memoryManager.isAsyncDeleterEnabled()); EXPECT_EQ(nullptr, memoryManager.getDeferredDeleter()); DebugManager.flags.EnableDeferredDeleter.set(defaultEnableDeferredDeleterFlag); } TEST(OsAgnosticMemoryManager, givenDisabledAsyncDeleterFlagWhenMemoryManagerIsCreatedThenAsyncDeleterEnabledIsFalseAndDeleterIsNullptr) { bool defaultEnableDeferredDeleterFlag = DebugManager.flags.EnableDeferredDeleter.get(); DebugManager.flags.EnableDeferredDeleter.set(false); MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); OsAgnosticMemoryManager memoryManager(executionEnvironment); EXPECT_FALSE(memoryManager.isAsyncDeleterEnabled()); EXPECT_EQ(nullptr, memoryManager.getDeferredDeleter()); DebugManager.flags.EnableDeferredDeleter.set(defaultEnableDeferredDeleterFlag); } TEST(OsAgnosticMemoryManager, GivenEnabled64kbPagesWhenHostMemoryAllocationIsCreatedThenAlignedto64KbAllocationIsReturned) { DebugManagerStateRestore dbgRestore; DebugManager.flags.Enable64kbpages.set(true); MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); executionEnvironment.initGmm(); MemoryManagerCreate memoryManager(true, false, executionEnvironment); if (memoryManager.isLimitedGPU(0)) { GTEST_SKIP(); } GraphicsAllocation *galloc = memoryManager.allocateGraphicsMemoryWithProperties({mockRootDeviceIndex, MemoryConstants::pageSize64k, AllocationType::BUFFER_HOST_MEMORY, mockDeviceBitfield}); EXPECT_NE(nullptr, galloc); memoryManager.freeGraphicsMemory(galloc); galloc = memoryManager.allocateGraphicsMemoryWithProperties({mockRootDeviceIndex, MemoryConstants::pageSize64k, AllocationType::BUFFER_HOST_MEMORY, mockDeviceBitfield}); EXPECT_NE(nullptr, galloc); EXPECT_NE(nullptr, galloc->getUnderlyingBuffer()); size_t size = (executionEnvironment.rootDeviceEnvironments[0u]->getHardwareInfo()->capabilityTable.hostPtrTrackingEnabled) ? MemoryConstants::pageSize64k : MemoryConstants::pageSize; EXPECT_EQ(0u, (uintptr_t)galloc->getUnderlyingBuffer() % size); EXPECT_NE(0u, galloc->getGpuAddress()); EXPECT_EQ(0u, (uintptr_t)galloc->getGpuAddress() % size); memoryManager.freeGraphicsMemory(galloc); } TEST(OsAgnosticMemoryManager, givenPointerAndSizeWhenCreateInternalAllocationIsCalledThenGraphicsAllocationIsReturned) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, false, executionEnvironment); auto ptr = (void *)0x100000; size_t allocationSize = 4096; auto graphicsAllocation = memoryManager.allocate32BitGraphicsMemory(mockRootDeviceIndex, allocationSize, ptr, AllocationType::INTERNAL_HEAP); EXPECT_EQ(ptr, graphicsAllocation->getUnderlyingBuffer()); EXPECT_EQ(allocationSize, graphicsAllocation->getUnderlyingBufferSize()); memoryManager.freeGraphicsMemory(graphicsAllocation); } TEST(OsAgnosticMemoryManager, givenOsAgnosticMemoryManagerWhenAllocateGraphicsMemoryForNonSvmHostPtrIsCalledThenAllocationIsCreated) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(executionEnvironment); AllocationData allocationData; allocationData.size = 13; allocationData.hostPtr = reinterpret_cast(0x5001); auto allocation = memoryManager.allocateGraphicsMemoryForNonSvmHostPtr(allocationData); EXPECT_NE(nullptr, allocation); EXPECT_EQ(13u, allocation->getUnderlyingBufferSize()); EXPECT_EQ(1u, allocation->getAllocationOffset()); memoryManager.freeGraphicsMemory(allocation); } using OsAgnosticMemoryManagerWithParams = ::testing::TestWithParam; TEST_P(OsAgnosticMemoryManagerWithParams, givenReducedGpuAddressSpaceWhenAllocateGraphicsMemoryForHostPtrIsCalledThenAllocationWithoutFragmentsIsCreated) { bool requiresL3Flush = GetParam(); MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); executionEnvironment.rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); if (executionEnvironment.rootDeviceEnvironments[0]->isFullRangeSvm() || is32bit) { return; } OsAgnosticMemoryManager memoryManager(executionEnvironment); auto hostPtr = reinterpret_cast(0x5001); AllocationProperties properties{0, false, 13, AllocationType::EXTERNAL_HOST_PTR, false, mockDeviceBitfield}; properties.flags.flushL3RequiredForRead = properties.flags.flushL3RequiredForWrite = requiresL3Flush; auto allocation = memoryManager.allocateGraphicsMemoryWithProperties(properties, hostPtr); EXPECT_NE(nullptr, allocation); EXPECT_EQ(0u, allocation->fragmentsStorage.fragmentCount); EXPECT_EQ(requiresL3Flush, allocation->isFlushL3Required()); memoryManager.freeGraphicsMemory(allocation); } TEST_P(OsAgnosticMemoryManagerWithParams, givenFullGpuAddressSpaceWhenAllocateGraphicsMemoryForHostPtrIsCalledThenAllocationWithFragmentsIsCreated) { bool requiresL3Flush = GetParam(); MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); executionEnvironment.rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); if ((!executionEnvironment.rootDeviceEnvironments[0]->isFullRangeSvm()) || !defaultHwInfo->capabilityTable.hostPtrTrackingEnabled) { GTEST_SKIP(); } OsAgnosticMemoryManager memoryManager(executionEnvironment); auto hostPtr = reinterpret_cast(0x5001); AllocationProperties properties{0, false, 13, AllocationType::EXTERNAL_HOST_PTR, false, mockDeviceBitfield}; properties.flags.flushL3RequiredForRead = properties.flags.flushL3RequiredForWrite = requiresL3Flush; auto allocation = memoryManager.allocateGraphicsMemoryWithProperties(properties, hostPtr); EXPECT_NE(nullptr, allocation); EXPECT_EQ(1u, allocation->fragmentsStorage.fragmentCount); EXPECT_EQ(requiresL3Flush, allocation->isFlushL3Required()); EXPECT_EQ(AllocationType::EXTERNAL_HOST_PTR, allocation->getAllocationType()); memoryManager.freeGraphicsMemory(allocation); } TEST_P(OsAgnosticMemoryManagerWithParams, givenDisabledHostPtrTrackingWhenAllocateGraphicsMemoryForHostPtrIsCalledThenAllocationWithoutFragmentsIsCreated) { if (is32bit) { GTEST_SKIP(); } DebugManagerStateRestore restore; DebugManager.flags.EnableHostPtrTracking.set(0); bool requiresL3Flush = GetParam(); MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); executionEnvironment.rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); if (!executionEnvironment.rootDeviceEnvironments[0]->isFullRangeSvm()) { return; } OsAgnosticMemoryManager memoryManager(executionEnvironment); auto hostPtr = reinterpret_cast(0x5001); AllocationProperties properties{0, false, 13, AllocationType::EXTERNAL_HOST_PTR, false, mockDeviceBitfield}; properties.flags.flushL3RequiredForRead = properties.flags.flushL3RequiredForWrite = requiresL3Flush; auto allocation = memoryManager.allocateGraphicsMemoryWithProperties(properties, hostPtr); EXPECT_NE(nullptr, allocation); EXPECT_EQ(0u, allocation->fragmentsStorage.fragmentCount); EXPECT_EQ(requiresL3Flush, allocation->isFlushL3Required()); memoryManager.freeGraphicsMemory(allocation); } INSTANTIATE_TEST_CASE_P(OsAgnosticMemoryManagerWithParams, OsAgnosticMemoryManagerWithParams, ::testing::Values(false, true)); TEST(OsAgnosticMemoryManager, givenOsAgnosticMemoryManagerAndFreeMemoryEnabledWhenGraphicsAllocationIsDestroyedThenFreeMemoryOnAubManagerShouldBeCalled) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableFreeMemory.set(true); MockExecutionEnvironment executionEnvironment; OsAgnosticMemoryManager memoryManager(executionEnvironment); GmmHelper gmmHelper(nullptr, defaultHwInfo.get()); MockAubManager *mockManager = new MockAubManager(); MockAubCenter *mockAubCenter = new MockAubCenter(defaultHwInfo.get(), gmmHelper, false, "file_name.aub", CommandStreamReceiverType::CSR_AUB); mockAubCenter->aubManager = std::unique_ptr(mockManager); executionEnvironment.rootDeviceEnvironments[0]->aubCenter.reset(mockAubCenter); auto gfxAllocation = memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{0, MemoryConstants::pageSize}); EXPECT_FALSE(mockManager->freeMemoryCalled); memoryManager.freeGraphicsMemory(gfxAllocation); EXPECT_TRUE(mockManager->freeMemoryCalled); } TEST(OsAgnosticMemoryManager, givenOsAgnosticMemoryManagerAndFreeMemoryDisabledWhenGraphicsAllocationIsDestroyedThenFreeMemoryOnAubManagerShouldBeCalled) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableFreeMemory.set(false); MockExecutionEnvironment executionEnvironment; OsAgnosticMemoryManager memoryManager(executionEnvironment); GmmHelper gmmHelper(nullptr, defaultHwInfo.get()); MockAubManager *mockManager = new MockAubManager(); MockAubCenter *mockAubCenter = new MockAubCenter(defaultHwInfo.get(), gmmHelper, false, "file_name.aub", CommandStreamReceiverType::CSR_AUB); mockAubCenter->aubManager = std::unique_ptr(mockManager); executionEnvironment.rootDeviceEnvironments[0]->aubCenter.reset(mockAubCenter); auto gfxAllocation = memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{0, MemoryConstants::pageSize}); EXPECT_FALSE(mockManager->freeMemoryCalled); memoryManager.freeGraphicsMemory(gfxAllocation); EXPECT_FALSE(mockManager->freeMemoryCalled); } TEST(OsAgnosticMemoryManager, givenOsAgnosticMemoryManagerWhenGpuAddressIsReservedAndFreedThenAddressFromGfxPartitionIsUsed) { MockExecutionEnvironment executionEnvironment; OsAgnosticMemoryManager memoryManager(executionEnvironment); auto addressRange = memoryManager.reserveGpuAddress(MemoryConstants::pageSize, 0); EXPECT_LE(memoryManager.getGfxPartition(0)->getHeapBase(HeapIndex::HEAP_STANDARD), GmmHelper::decanonize(addressRange.address)); EXPECT_GT(memoryManager.getGfxPartition(0)->getHeapLimit(HeapIndex::HEAP_STANDARD), GmmHelper::decanonize(addressRange.address)); memoryManager.freeGpuAddress(addressRange, 0); } TEST(OsAgnosticMemoryManager, givenOsAgnosticMemoryManagerWhenCheckedForIndirectAllocationsAsPackSupportThenFalseIsReturned) { MockExecutionEnvironment executionEnvironment; OsAgnosticMemoryManager memoryManager(executionEnvironment); EXPECT_FALSE(memoryManager.allowIndirectAllocationsAsPack(0u)); } TEST(OsAgnosticMemoryManager, givenOsAgnosticMemoryManagerWhenVerifyHandleThenReturnTrue) { MockExecutionEnvironment executionEnvironment; OsAgnosticMemoryManager memoryManager(executionEnvironment); osHandle testOSHandle = 1; EXPECT_TRUE(memoryManager.verifyHandle(testOSHandle, 0, 0)); } TEST(OsAgnosticMemoryManager, givenOsAgnosticMemoryManagerWhenisNTHandleThenReturnFalse) { MockExecutionEnvironment executionEnvironment; OsAgnosticMemoryManager memoryManager(executionEnvironment); osHandle testOSHandle = 1; EXPECT_FALSE(memoryManager.isNTHandle(testOSHandle, 0)); } TEST(OsAgnosticMemoryManager, givenMemoryManagerWhenGpuAddressIsSetThenAllocationWithSpecifiedGpuAddressInSystemMemoryIsCreated) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); auto memoryManager = new OsAgnosticMemoryManager(executionEnvironment); if (memoryManager->isLimitedGPU(0)) { GTEST_SKIP(); } DeviceBitfield deviceBitfield(1); std::unique_ptr csr(createCommandStream(executionEnvironment, 0u, deviceBitfield)); executionEnvironment.memoryManager.reset(memoryManager); auto osContext = memoryManager->createAndRegisterOsContext(csr.get(), EngineDescriptorHelper::getDefaultDescriptor(HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo)[0], PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo))); MockAllocationProperties properties = {0, MemoryConstants::pageSize}; properties.osContext = osContext; properties.gpuAddress = 0x2000; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties); EXPECT_EQ(MemoryPool::System4KBPages, allocation->getMemoryPool()); EXPECT_EQ(0x2000u, allocation->getGpuAddress()); memoryManager->freeGraphicsMemory(allocation); } TEST(OsAgnosticMemoryManager, givenOsAgnosticMemoryManagerWhenOsEnabled64kbPagesFalseThenIs64kbPagesEnabledReturnTrue) { MockExecutionEnvironment executionEnvironment; VariableBackup OsEnabled64kbPagesBackup(&OSInterface::osEnabled64kbPages); OsAgnosticMemoryManager memoryManager(executionEnvironment); auto hwInfo = *defaultHwInfo; OSInterface::osEnabled64kbPages = false; hwInfo.capabilityTable.ftr64KBpages = true; EXPECT_TRUE(memoryManager.is64kbPagesEnabled(&hwInfo)); OSInterface::osEnabled64kbPages = true; EXPECT_TRUE(memoryManager.is64kbPagesEnabled(&hwInfo)); } TEST(OsAgnosticMemoryManager, givenOsAgnosticMemoryManagerWhenCheckIs64kbPagesEnabledThenOsEnabled64PkbPagesIsNotAffectedReturnedValue) { MockExecutionEnvironment executionEnvironment; VariableBackup OsEnabled64kbPagesBackup(&OSInterface::osEnabled64kbPages); OsAgnosticMemoryManager memoryManager(executionEnvironment); auto hwInfo = *defaultHwInfo; OSInterface::osEnabled64kbPages = true; hwInfo.capabilityTable.ftr64KBpages = true; EXPECT_TRUE(memoryManager.is64kbPagesEnabled(&hwInfo)); hwInfo.capabilityTable.ftr64KBpages = false; EXPECT_FALSE(memoryManager.is64kbPagesEnabled(&hwInfo)); } TEST(OsAgnosticMemoryManager, givenOsAgnosticMemoryManagerWithFlagEnable64kbpagesWhenCheckIs64kbPagesEnabledThenProperValueIsReturned) { DebugManagerStateRestore dbgRestore; MockExecutionEnvironment executionEnvironment; OsAgnosticMemoryManager memoryManager(executionEnvironment); auto hwInfo = *defaultHwInfo; DebugManager.flags.Enable64kbpages.set(true); hwInfo.capabilityTable.ftr64KBpages = true; EXPECT_TRUE(memoryManager.is64kbPagesEnabled(&hwInfo)); DebugManager.flags.Enable64kbpages.set(true); hwInfo.capabilityTable.ftr64KBpages = false; EXPECT_FALSE(memoryManager.is64kbPagesEnabled(&hwInfo)); DebugManager.flags.Enable64kbpages.set(false); hwInfo.capabilityTable.ftr64KBpages = false; EXPECT_FALSE(memoryManager.is64kbPagesEnabled(&hwInfo)); DebugManager.flags.Enable64kbpages.set(false); hwInfo.capabilityTable.ftr64KBpages = true; EXPECT_FALSE(memoryManager.is64kbPagesEnabled(&hwInfo)); } TEST(MemoryManager, givenSharedResourceCopyWhenAllocatingGraphicsMemoryThenAllocateGraphicsMemoryForImageIsCalled) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); MockMemoryManager memoryManager(false, true, *executionEnvironment); ImageDescriptor imgDesc = {}; imgDesc.imageHeight = 1; imgDesc.imageWidth = 1; imgDesc.imageType = ImageType::Image2D; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); AllocationData allocationData; allocationData.imgInfo = &imgInfo; allocationData.type = AllocationType::SHARED_RESOURCE_COPY; auto imageAllocation = memoryManager.allocateGraphicsMemory(allocationData); EXPECT_NE(nullptr, imageAllocation); EXPECT_TRUE(memoryManager.allocateForImageCalled); memoryManager.freeGraphicsMemory(imageAllocation); } TEST(MemoryManager, givenImageAndLimitedGPUWhenAllocatingGraphicsMemoryThenAllocate32BitGraphicsMemoryImplIsNotCalled) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); MockMemoryManager memoryManager(false, true, *executionEnvironment); memoryManager.limitedGPU = true; ImageDescriptor imgDesc = {}; imgDesc.imageHeight = 1; imgDesc.imageWidth = 1; imgDesc.imageType = ImageType::Image2D; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); AllocationData allocationData; allocationData.imgInfo = &imgInfo; allocationData.type = AllocationType::IMAGE; auto imageAllocation = memoryManager.allocateGraphicsMemory(allocationData); EXPECT_NE(nullptr, imageAllocation); EXPECT_TRUE(memoryManager.allocateForImageCalled); EXPECT_FALSE(memoryManager.allocate32BitGraphicsMemoryImplCalled); memoryManager.freeGraphicsMemory(imageAllocation); } TEST(MemoryManager, givenBufferAndLimitedGPUWhenAllocatingGraphicsMemoryThenAllocate32BitGraphicsMemoryImplIsCalled) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); MockMemoryManager memoryManager(false, true, *executionEnvironment); memoryManager.limitedGPU = true; AllocationData allocationData; allocationData.size = 4096u; allocationData.type = AllocationType::BUFFER; auto bufferAllocation = memoryManager.allocateGraphicsMemory(allocationData); EXPECT_NE(nullptr, bufferAllocation); EXPECT_FALSE(memoryManager.allocateForImageCalled); EXPECT_TRUE(memoryManager.allocate32BitGraphicsMemoryImplCalled); memoryManager.freeGraphicsMemory(bufferAllocation); } TEST(MemoryManager, givenBufferHostMemoryAndHostPtrTrackingDisabledWhenAllocatingGraphicsMemoryThenAllocateGraphicsMemoryForNonSvmHostPtrIsCalled) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); HardwareInfo hwInfoLocal = *defaultHwInfo; hwInfoLocal.capabilityTable.hostPtrTrackingEnabled = false; executionEnvironment->rootDeviceEnvironments[0u]->setHwInfo(&hwInfoLocal); MockMemoryManager memoryManager(false, true, *executionEnvironment); char bufferData[4096]{}; AllocationData allocationData{}; allocationData.hostPtr = bufferData; allocationData.size = 4096u; allocationData.type = AllocationType::BUFFER_HOST_MEMORY; auto bufferAllocation = memoryManager.allocateGraphicsMemory(allocationData); EXPECT_NE(nullptr, bufferAllocation); EXPECT_FALSE(memoryManager.allocateForImageCalled); if (!is32bit) { EXPECT_TRUE(memoryManager.allocateGraphicsMemoryForNonSvmHostPtrCalled); } memoryManager.freeGraphicsMemory(bufferAllocation); } TEST(MemoryManager, givenBufferHostMemoryAndHostPtrTrackingDisabledAndForce32bitAllocationsWhenAllocatingGraphicsMemoryThenAllocateGraphicsMemoryForNonSvmHostPtrIsNotCalled) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); HardwareInfo hwInfoLocal = *defaultHwInfo; hwInfoLocal.capabilityTable.hostPtrTrackingEnabled = false; executionEnvironment->rootDeviceEnvironments[0u]->setHwInfo(&hwInfoLocal); MockMemoryManager memoryManager(false, true, *executionEnvironment); memoryManager.setForce32BitAllocations(true); char bufferData[4096]{}; AllocationData allocationData{}; allocationData.hostPtr = bufferData; allocationData.size = 4096u; allocationData.type = AllocationType::BUFFER_HOST_MEMORY; auto bufferAllocation = memoryManager.allocateGraphicsMemory(allocationData); EXPECT_NE(nullptr, bufferAllocation); EXPECT_FALSE(memoryManager.allocateGraphicsMemoryForNonSvmHostPtrCalled); memoryManager.freeGraphicsMemory(bufferAllocation); } TEST(MemoryManager, givenShareableWhenAllocatingGraphicsMemoryThenAllocateShareableIsCalled) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); MockMemoryManager memoryManager(false, true, *executionEnvironment); AllocationData allocationData; allocationData.size = 4096u; allocationData.type = AllocationType::BUFFER; allocationData.flags.shareable = true; auto allocation = memoryManager.allocateGraphicsMemory(allocationData); EXPECT_NE(nullptr, allocation); EXPECT_TRUE(memoryManager.allocateForShareableCalled); memoryManager.freeGraphicsMemory(allocation); } TEST_F(MemoryAllocatorTest, GivenSizeWhenGmmIsCreatedThenNonNullPointerIsReturned) { Gmm *gmm = new Gmm(device->getGmmClientContext(), nullptr, 65536, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true); EXPECT_NE(nullptr, gmm); delete gmm; } typedef Test MemoryManagerWithCsrTest; TEST_F(MemoryManagerWithCsrTest, GivenAllocationsInHostPtrManagerWhenBiggerOverllapingAllocationIsCreatedAndNothingToCleanThenAbortExecution) { void *cpuPtr1 = (void *)0x100004; void *cpuPtr2 = (void *)0x101008; void *cpuPtr3 = (void *)0x100000; auto hostPtrManager = static_cast(memoryManager->getHostPtrManager()); auto graphicsAllocation1 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, MemoryConstants::pageSize}, cpuPtr1); EXPECT_EQ(2u, hostPtrManager->getFragmentCount()); auto graphicsAllocation2 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, MemoryConstants::pageSize * 3}, cpuPtr2); EXPECT_EQ(4u, hostPtrManager->getFragmentCount()); GraphicsAllocation *graphicsAllocation3 = nullptr; bool catchMe = false; try { graphicsAllocation3 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, MemoryConstants::pageSize * 10}, cpuPtr3); } catch (...) { catchMe = true; } EXPECT_NE(nullptr, graphicsAllocation1); EXPECT_NE(nullptr, graphicsAllocation2); EXPECT_EQ(nullptr, graphicsAllocation3); EXPECT_TRUE(catchMe); EXPECT_EQ((uintptr_t)cpuPtr1 & ~MemoryConstants::pageMask, (uintptr_t)graphicsAllocation1->fragmentsStorage.fragmentStorageData[0].cpuPtr); EXPECT_EQ((uintptr_t)cpuPtr2 & ~MemoryConstants::pageMask, (uintptr_t)graphicsAllocation2->fragmentsStorage.fragmentStorageData[0].cpuPtr); EXPECT_EQ(((uintptr_t)cpuPtr2 + MemoryConstants::pageSize) & ~MemoryConstants::pageMask, (uintptr_t)graphicsAllocation2->fragmentsStorage.fragmentStorageData[1].cpuPtr); memoryManager->freeGraphicsMemory(graphicsAllocation1); memoryManager->freeGraphicsMemory(graphicsAllocation2); memoryManager->freeGraphicsMemory(graphicsAllocation3); } TEST_F(MemoryManagerWithCsrTest, GivenAllocationsInHostPtrManagerReadyForCleaningWhenBiggerOverllapingAllocationIsCreatedThenTemporaryAllocationsAreCleaned) { void *cpuPtr1 = (void *)0x100004; void *cpuPtr2 = (void *)0x101008; void *cpuPtr3 = (void *)0x100000; auto hostPtrManager = static_cast(memoryManager->getHostPtrManager()); auto graphicsAllocation1 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, MemoryConstants::pageSize}, cpuPtr1); EXPECT_EQ(2u, hostPtrManager->getFragmentCount()); auto graphicsAllocation2 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, MemoryConstants::pageSize * 3}, cpuPtr2); EXPECT_EQ(4u, hostPtrManager->getFragmentCount()); EXPECT_NE(nullptr, graphicsAllocation1); EXPECT_NE(nullptr, graphicsAllocation2); auto fragment1 = hostPtrManager->getFragment({alignDown(cpuPtr1, MemoryConstants::pageSize), csr->getRootDeviceIndex()}); EXPECT_NE(nullptr, fragment1); auto fragment2 = hostPtrManager->getFragment({alignUp(cpuPtr1, MemoryConstants::pageSize), csr->getRootDeviceIndex()}); EXPECT_NE(nullptr, fragment2); auto fragment3 = hostPtrManager->getFragment({alignDown(cpuPtr2, MemoryConstants::pageSize), csr->getRootDeviceIndex()}); EXPECT_NE(nullptr, fragment3); auto fragment4 = hostPtrManager->getFragment({alignUp(cpuPtr2, MemoryConstants::pageSize), csr->getRootDeviceIndex()}); EXPECT_NE(nullptr, fragment4); uint32_t taskCountReady = 1; auto storage = csr->getInternalAllocationStorage(); storage->storeAllocationWithTaskCount(std::unique_ptr(graphicsAllocation1), TEMPORARY_ALLOCATION, taskCountReady); storage->storeAllocationWithTaskCount(std::unique_ptr(graphicsAllocation2), TEMPORARY_ALLOCATION, taskCountReady); EXPECT_EQ(4u, hostPtrManager->getFragmentCount()); // All fragments ready for release taskCount = taskCountReady; csr->latestSentTaskCount = taskCountReady; auto graphicsAllocation3 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, MemoryConstants::pageSize * 10}, cpuPtr3); EXPECT_NE(nullptr, graphicsAllocation3); // no more overlapping allocation, previous allocations cleaned EXPECT_EQ(1u, graphicsAllocation3->fragmentsStorage.fragmentCount); EXPECT_EQ(cpuPtr3, graphicsAllocation3->fragmentsStorage.fragmentStorageData[0].cpuPtr); memoryManager->freeGraphicsMemory(graphicsAllocation3); } TEST_F(MemoryManagerWithCsrTest, givenAllocationThatWasNotUsedWhencheckGpuUsageAndDestroyGraphicsAllocationsIsCalledThenItIsDestroyedInPlace) { auto notUsedAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(notUsedAllocation); EXPECT_TRUE(csr->getTemporaryAllocations().peekIsEmpty()); } TEST_F(MemoryManagerWithCsrTest, givenAllocationThatWasUsedAndIsCompletedWhencheckGpuUsageAndDestroyGraphicsAllocationsIsCalledThenItIsDestroyedInPlace) { auto usedAllocationButGpuCompleted = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); auto tagAddress = csr->getTagAddress(); ASSERT_NE(0u, *tagAddress); usedAllocationButGpuCompleted->updateTaskCount(*tagAddress - 1, csr->getOsContext().getContextId()); memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(usedAllocationButGpuCompleted); EXPECT_TRUE(csr->getTemporaryAllocations().peekIsEmpty()); } TEST_F(MemoryManagerWithCsrTest, givenAllocationThatWasUsedAndIsNotCompletedWhencheckGpuUsageAndDestroyGraphicsAllocationsIsCalledThenItIsAddedToTemporaryAllocationList) { memoryManager->createAndRegisterOsContext(csr.get(), EngineDescriptorHelper::getDefaultDescriptor(HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo)[0], PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo))); auto usedAllocationAndNotGpuCompleted = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); auto tagAddress = csr->getTagAddress(); usedAllocationAndNotGpuCompleted->updateTaskCount(*tagAddress + 1, csr->getOsContext().getContextId()); memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(usedAllocationAndNotGpuCompleted); EXPECT_FALSE(csr->getTemporaryAllocations().peekIsEmpty()); EXPECT_EQ(csr->getTemporaryAllocations().peekHead(), usedAllocationAndNotGpuCompleted); // change task count so cleanup will not clear alloc in use usedAllocationAndNotGpuCompleted->updateTaskCount(csr->peekLatestFlushedTaskCount(), csr->getOsContext().getContextId()); } class MockAlignMallocMemoryManager : public MockMemoryManager { public: MockAlignMallocMemoryManager(ExecutionEnvironment &executionEnvironment) : MockMemoryManager(executionEnvironment) { testMallocRestrictions.minAddress = 0; alignMallocRestrictions = nullptr; alignMallocCount = 0; alignMallocMaxIter = 3; returnNullBad = false; returnNullGood = false; } AlignedMallocRestrictions testMallocRestrictions; AlignedMallocRestrictions *alignMallocRestrictions; static const uintptr_t alignMallocMinAddress = 0x100000; static const uintptr_t alignMallocStep = 10; int alignMallocMaxIter; int alignMallocCount; bool returnNullBad; bool returnNullGood; void *alignedMallocWrapper(size_t size, size_t align) override { if (alignMallocCount < alignMallocMaxIter) { alignMallocCount++; if (!returnNullBad) { return reinterpret_cast(alignMallocMinAddress - alignMallocStep); } else { return nullptr; } } alignMallocCount = 0; if (!returnNullGood) { return reinterpret_cast(alignMallocMinAddress + alignMallocStep); } else { return nullptr; } }; void alignedFreeWrapper(void *) override { alignMallocCount = 0; } AlignedMallocRestrictions *getAlignedMallocRestrictions() override { return alignMallocRestrictions; } }; class MockAlignMallocMemoryManagerTest : public MemoryAllocatorTest { public: MockAlignMallocMemoryManager *alignedMemoryManager = nullptr; void SetUp() override { MemoryAllocatorTest::SetUp(); MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); alignedMemoryManager = new (std::nothrow) MockAlignMallocMemoryManager(executionEnvironment); // assert we have memory manager ASSERT_NE(nullptr, memoryManager); } void TearDown() override { alignedMemoryManager->alignedFreeWrapper(nullptr); delete alignedMemoryManager; MemoryAllocatorTest::TearDown(); } }; TEST_F(MockAlignMallocMemoryManagerTest, givenMemoryManagerWhenNullAlignRestrictionsThenNotUseRestrictions) { EXPECT_EQ(nullptr, memoryManager->getAlignedMallocRestrictions()); EXPECT_EQ(nullptr, alignedMemoryManager->getAlignedMallocRestrictions()); uintptr_t expectedVal = MockAlignMallocMemoryManager::alignMallocMinAddress - MockAlignMallocMemoryManager::alignMallocStep; uintptr_t memVal = reinterpret_cast(alignedMemoryManager->allocateSystemMemory(0x1000, 0x1000)); EXPECT_EQ(expectedVal, memVal); } TEST_F(MockAlignMallocMemoryManagerTest, givenMemoryManagerWhenZeroAlignRestrictionsThenNotUseRestrictions) { alignedMemoryManager->alignMallocRestrictions = &alignedMemoryManager->testMallocRestrictions; EXPECT_NE(nullptr, alignedMemoryManager->getAlignedMallocRestrictions()); alignedMemoryManager->alignMallocCount = 0; uintptr_t expectedVal = MockAlignMallocMemoryManager::alignMallocMinAddress - MockAlignMallocMemoryManager::alignMallocStep; uintptr_t memVal = reinterpret_cast(alignedMemoryManager->allocateSystemMemory(0x1000, 0x1000)); EXPECT_EQ(expectedVal, memVal); alignedMemoryManager->alignMallocCount = alignedMemoryManager->alignMallocMaxIter + 1; expectedVal = MockAlignMallocMemoryManager::alignMallocMinAddress + MockAlignMallocMemoryManager::alignMallocStep; memVal = reinterpret_cast(alignedMemoryManager->allocateSystemMemory(0x1000, 0x1000)); EXPECT_EQ(expectedVal, memVal); } TEST_F(MockAlignMallocMemoryManagerTest, givenMemoryManagerWitNonZeroAlignRestrictionsWhenFirstGoodAddressThenUseRestrictionsAndReturnFirst) { alignedMemoryManager->alignMallocRestrictions = &alignedMemoryManager->testMallocRestrictions; alignedMemoryManager->testMallocRestrictions.minAddress = MockAlignMallocMemoryManager::alignMallocMinAddress; EXPECT_NE(nullptr, alignedMemoryManager->getAlignedMallocRestrictions()); alignedMemoryManager->alignMallocCount = alignedMemoryManager->alignMallocMaxIter + 1; uintptr_t expectedVal = MockAlignMallocMemoryManager::alignMallocMinAddress + MockAlignMallocMemoryManager::alignMallocStep; uintptr_t memVal = reinterpret_cast(alignedMemoryManager->allocateSystemMemory(0x1000, 0x1000)); EXPECT_EQ(expectedVal, memVal); } TEST_F(MockAlignMallocMemoryManagerTest, givenMemoryManagerWitNonZeroAlignRestrictionsWhenFirstNullAddressThenUseRestrictionsAndReturnFirstNull) { alignedMemoryManager->alignMallocRestrictions = &alignedMemoryManager->testMallocRestrictions; alignedMemoryManager->testMallocRestrictions.minAddress = MockAlignMallocMemoryManager::alignMallocMinAddress; EXPECT_NE(nullptr, alignedMemoryManager->getAlignedMallocRestrictions()); alignedMemoryManager->alignMallocCount = alignedMemoryManager->alignMallocMaxIter + 1; alignedMemoryManager->returnNullGood = true; uintptr_t expectedVal = 0; uintptr_t memVal = reinterpret_cast(alignedMemoryManager->allocateSystemMemory(0x1000, 0x1000)); EXPECT_EQ(expectedVal, memVal); } TEST_F(MockAlignMallocMemoryManagerTest, givenMemoryManagerWitNonZeroAlignRestrictionsWhenFirstBadAnotherGoodAddressThenUseRestrictionsAndReturnAnother) { alignedMemoryManager->alignMallocRestrictions = &alignedMemoryManager->testMallocRestrictions; alignedMemoryManager->testMallocRestrictions.minAddress = MockAlignMallocMemoryManager::alignMallocMinAddress; EXPECT_NE(nullptr, alignedMemoryManager->getAlignedMallocRestrictions()); alignedMemoryManager->alignMallocCount = 0; uintptr_t expectedVal = MockAlignMallocMemoryManager::alignMallocMinAddress + MockAlignMallocMemoryManager::alignMallocStep; uintptr_t memVal = reinterpret_cast(alignedMemoryManager->allocateSystemMemory(0x1000, 0x1000)); EXPECT_EQ(expectedVal, memVal); } TEST_F(MockAlignMallocMemoryManagerTest, givenMemoryManagerWitNonZeroAlignRestrictionsWhenFirstBadAnotherNullAddressThenUseRestrictionsAndReturnNull) { alignedMemoryManager->alignMallocRestrictions = &alignedMemoryManager->testMallocRestrictions; alignedMemoryManager->testMallocRestrictions.minAddress = MockAlignMallocMemoryManager::alignMallocMinAddress; EXPECT_NE(nullptr, alignedMemoryManager->getAlignedMallocRestrictions()); alignedMemoryManager->alignMallocCount = 0; alignedMemoryManager->returnNullGood = true; uintptr_t expectedVal = 0; uintptr_t memVal = reinterpret_cast(alignedMemoryManager->allocateSystemMemory(0x1000, 0x1000)); EXPECT_EQ(expectedVal, memVal); } TEST(GraphicsAllocation, givenCpuPointerBasedConstructorWhenGraphicsAllocationIsCreatedThenGpuAddressHasCorrectValue) { uintptr_t address = 0xf0000000; void *addressWithTrailingBitSet = reinterpret_cast(address); uint64_t expectedGpuAddress = 0xf0000000; MockGraphicsAllocation graphicsAllocation(addressWithTrailingBitSet, 1u); EXPECT_EQ(expectedGpuAddress, graphicsAllocation.getGpuAddress()); } using GraphicsAllocationTests = ::testing::Test; HWTEST_F(GraphicsAllocationTests, givenAllocationUsedOnlyByNonDefaultCsrWhenCheckingUsageBeforeDestroyThenStoreItAsTemporaryAllocation) { auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); auto &lowPriorityEngine = device->getEngine(device->getHardwareInfo().capabilityTable.defaultEngineType, EngineUsage::LowPriority); auto nonDefaultOsContext = lowPriorityEngine.osContext; auto nonDefaultCsr = static_cast *>(lowPriorityEngine.commandStreamReceiver); auto memoryManager = device->getExecutionEnvironment()->memoryManager.get(); auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); nonDefaultCsr->taskCount = *nonDefaultCsr->getTagAddress() + 1; nonDefaultCsr->latestFlushedTaskCount = *nonDefaultCsr->getTagAddress() + 1; graphicsAllocation->updateTaskCount(*nonDefaultCsr->getTagAddress() + 1, nonDefaultOsContext->getContextId()); memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(graphicsAllocation); EXPECT_NE(nullptr, nonDefaultCsr->getInternalAllocationStorage()->getTemporaryAllocations().peekHead()); (*nonDefaultCsr->getTagAddress())++; // no need to call freeGraphicsAllocation } HWTEST_F(GraphicsAllocationTests, givenAllocationUsedOnlyByNonDefaultDeviceWhenCheckingUsageBeforeDestroyThenStoreItAsTemporaryAllocation) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); auto device = std::unique_ptr(Device::create(executionEnvironment, 0u)); auto &defaultCommandStreamReceiver = device->getGpgpuCommandStreamReceiver(); auto &nonDefaultCommandStreamReceiver = static_cast &>(*device->commandStreamReceivers[1]); auto memoryManager = executionEnvironment->memoryManager.get(); auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); auto notReadyTaskCount = *nonDefaultCommandStreamReceiver.getTagAddress() + 1; EXPECT_NE(defaultCommandStreamReceiver.getOsContext().getContextId(), nonDefaultCommandStreamReceiver.getOsContext().getContextId()); nonDefaultCommandStreamReceiver.taskCount = notReadyTaskCount; nonDefaultCommandStreamReceiver.latestFlushedTaskCount = notReadyTaskCount; graphicsAllocation->updateTaskCount(notReadyTaskCount, nonDefaultCommandStreamReceiver.getOsContext().getContextId()); EXPECT_TRUE(nonDefaultCommandStreamReceiver.getInternalAllocationStorage()->getTemporaryAllocations().peekIsEmpty()); memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(graphicsAllocation); EXPECT_FALSE(nonDefaultCommandStreamReceiver.getInternalAllocationStorage()->getTemporaryAllocations().peekIsEmpty()); (*nonDefaultCommandStreamReceiver.getTagAddress())++; // no need to call freeGraphicsAllocation } HWTEST_F(GraphicsAllocationTests, givenAllocationUsedByManyOsContextsWhenCheckingUsageBeforeDestroyThenMultiContextDestructorIsUsedForWaitingForAllOsContexts) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); auto memoryManager = new MockMemoryManager(false, false, *executionEnvironment); executionEnvironment->memoryManager.reset(memoryManager); auto multiContextDestructor = new MockDeferredDeleter(); multiContextDestructor->expectDrainBlockingValue(false); memoryManager->multiContextResourceDestructor.reset(multiContextDestructor); auto device = std::unique_ptr(MockDevice::create(executionEnvironment, 0u)); auto &lowPriorityEngine = device->getEngine(device->getHardwareInfo().capabilityTable.defaultEngineType, EngineUsage::LowPriority); auto nonDefaultOsContext = lowPriorityEngine.osContext; auto nonDefaultCsr = static_cast *>(lowPriorityEngine.commandStreamReceiver); auto defaultCsr = static_cast *>(device->getDefaultEngine().commandStreamReceiver); auto defaultOsContext = device->getDefaultEngine().osContext; EXPECT_FALSE(defaultOsContext->isLowPriority()); EXPECT_TRUE(nonDefaultOsContext->isLowPriority()); auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); nonDefaultCsr->taskCount = *nonDefaultCsr->getTagAddress(); nonDefaultCsr->latestFlushedTaskCount = *nonDefaultCsr->getTagAddress(); graphicsAllocation->updateTaskCount(*nonDefaultCsr->getTagAddress(), nonDefaultOsContext->getContextId()); graphicsAllocation->updateTaskCount(0, defaultOsContext->getContextId()); // used and ready EXPECT_TRUE(graphicsAllocation->isUsedByManyOsContexts()); memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(graphicsAllocation); EXPECT_EQ(1, multiContextDestructor->deferDeletionCalled); EXPECT_TRUE(nonDefaultCsr->getInternalAllocationStorage()->getTemporaryAllocations().peekIsEmpty()); EXPECT_TRUE(defaultCsr->getInternalAllocationStorage()->getTemporaryAllocations().peekIsEmpty()); } TEST(GraphicsAllocation, givenSharedHandleBasedConstructorWhenGraphicsAllocationIsCreatedThenGpuAddressHasCorrectValue) { uintptr_t address = 0xf0000000; void *addressWithTrailingBitSet = reinterpret_cast(address); uint64_t expectedGpuAddress = 0xf0000000; osHandle sharedHandle{}; GraphicsAllocation graphicsAllocation(0, AllocationType::UNKNOWN, addressWithTrailingBitSet, 1u, sharedHandle, MemoryPool::MemoryNull, 0u); EXPECT_EQ(expectedGpuAddress, graphicsAllocation.getGpuAddress()); } TEST(ResidencyDataTest, givenOsContextWhenItIsRegisteredToMemoryManagerThenRefCountIncreases) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); auto memoryManager = new MockMemoryManager(false, false, executionEnvironment); executionEnvironment.memoryManager.reset(memoryManager); DeviceBitfield deviceBitfield(1); std::unique_ptr csr(createCommandStream(executionEnvironment, 0u, deviceBitfield)); memoryManager->createAndRegisterOsContext(csr.get(), EngineDescriptorHelper::getDefaultDescriptor(HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo)[0], PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo))); EXPECT_EQ(1u, memoryManager->getRegisteredEnginesCount()); EXPECT_EQ(1, memoryManager->registeredEngines[0].osContext->getRefInternalCount()); } TEST(MemoryManagerRegisteredEnginesTest, givenOsContextWhenItIsUnregisteredFromMemoryManagerThenRefCountDecreases) { auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); auto memoryManager = device->getMemoryManager(); auto &engine = device->getDefaultEngine(); engine.osContext->incRefInternal(); EXPECT_EQ(2, engine.osContext->getRefInternalCount()); memoryManager->unregisterEngineForCsr(engine.commandStreamReceiver); EXPECT_EQ(1, engine.osContext->getRefInternalCount()); engine.osContext->decRefInternal(); } TEST(ResidencyDataTest, givenDeviceBitfieldWhenCreatingOsContextThenSetValidValue) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); auto memoryManager = new MockMemoryManager(false, false, executionEnvironment); executionEnvironment.memoryManager.reset(memoryManager); DeviceBitfield deviceBitfield(0b11); std::unique_ptr csr(createCommandStream(executionEnvironment, 0u, deviceBitfield)); PreemptionMode preemptionMode = PreemptionMode::MidThread; memoryManager->createAndRegisterOsContext(csr.get(), EngineDescriptorHelper::getDefaultDescriptor(HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo)[0], preemptionMode, deviceBitfield)); EXPECT_EQ(2u, memoryManager->registeredEngines[0].osContext->getNumSupportedDevices()); EXPECT_EQ(deviceBitfield, memoryManager->registeredEngines[0].osContext->getDeviceBitfield()); EXPECT_EQ(preemptionMode, memoryManager->registeredEngines[0].osContext->getPreemptionMode()); } TEST(ResidencyDataTest, givenTwoOsContextsWhenTheyAreRegisteredFromHigherToLowerThenProperSizeIsReturned) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get(), true, 2u); auto memoryManager = new MockMemoryManager(false, false, executionEnvironment); executionEnvironment.memoryManager.reset(memoryManager); DeviceBitfield deviceBitfield(1); std::unique_ptr csr(createCommandStream(executionEnvironment, 0u, deviceBitfield)); std::unique_ptr csr1(createCommandStream(executionEnvironment, 1u, deviceBitfield)); memoryManager->createAndRegisterOsContext(csr.get(), EngineDescriptorHelper::getDefaultDescriptor(HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo)[0], PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo))); memoryManager->createAndRegisterOsContext(csr1.get(), EngineDescriptorHelper::getDefaultDescriptor(HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo)[0], PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo))); EXPECT_EQ(2u, memoryManager->getRegisteredEnginesCount()); EXPECT_EQ(1, memoryManager->registeredEngines[0].osContext->getRefInternalCount()); EXPECT_EQ(1, memoryManager->registeredEngines[1].osContext->getRefInternalCount()); } TEST(ResidencyDataTest, givenGpgpuEnginesWhenAskedForMaxOsContextCountThenValueIsGreaterOrEqual) { auto &engines = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo); EXPECT_TRUE(MemoryManager::maxOsContextCount >= engines.size()); } TEST(ResidencyDataTest, givenResidencyDataWhenUpdateCompletionDataIsCalledThenItIsProperlyUpdated) { struct MockResidencyData : public ResidencyData { using ResidencyData::lastFenceValues; using ResidencyData::ResidencyData; }; MockResidencyData residency(MemoryManager::maxOsContextCount); MockOsContext osContext(0u, EngineDescriptorHelper::getDefaultDescriptor(HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo)[0], PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo))); MockOsContext osContext2(1u, EngineDescriptorHelper::getDefaultDescriptor(HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo)[0], PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo))); auto lastFenceValue = 45llu; auto lastFenceValue2 = 23llu; auto lastFenceValue3 = 373llu; EXPECT_EQ(MemoryManager::maxOsContextCount, residency.lastFenceValues.size()); residency.updateCompletionData(lastFenceValue, osContext.getContextId()); EXPECT_EQ(MemoryManager::maxOsContextCount, residency.lastFenceValues.size()); EXPECT_EQ(lastFenceValue, residency.lastFenceValues[0]); EXPECT_EQ(lastFenceValue, residency.getFenceValueForContextId(osContext.getContextId())); residency.updateCompletionData(lastFenceValue2, osContext2.getContextId()); EXPECT_EQ(MemoryManager::maxOsContextCount, residency.lastFenceValues.size()); EXPECT_EQ(lastFenceValue2, residency.lastFenceValues[1]); EXPECT_EQ(lastFenceValue2, residency.getFenceValueForContextId(osContext2.getContextId())); residency.updateCompletionData(lastFenceValue3, osContext2.getContextId()); EXPECT_EQ(lastFenceValue3, residency.lastFenceValues[1]); EXPECT_EQ(lastFenceValue3, residency.getFenceValueForContextId(osContext2.getContextId())); } TEST(MemoryManagerTest, givenMemoryManagerWhenLockIsCalledOnLockedResourceThenDoesNothing) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, false, executionEnvironment); auto allocation = memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{0, MemoryConstants::pageSize}); EXPECT_FALSE(allocation->isLocked()); auto ptr = memoryManager.MemoryManager::lockResource(allocation); EXPECT_TRUE(allocation->isLocked()); EXPECT_EQ(1u, memoryManager.lockResourceCalled); EXPECT_EQ(0u, memoryManager.unlockResourceCalled); auto ptr2 = memoryManager.MemoryManager::lockResource(allocation); EXPECT_TRUE(allocation->isLocked()); EXPECT_EQ(1u, memoryManager.lockResourceCalled); EXPECT_EQ(0u, memoryManager.unlockResourceCalled); EXPECT_EQ(ptr, ptr2); memoryManager.freeGraphicsMemory(allocation); } TEST(MemoryManagerTest, givenMemoryManagerWhenAllocationWasNotUnlockedThenItIsUnlockedDuringDestruction) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, false, executionEnvironment); auto allocation = memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{0, MemoryConstants::pageSize}); EXPECT_FALSE(allocation->isLocked()); memoryManager.MemoryManager::lockResource(allocation); EXPECT_TRUE(allocation->isLocked()); EXPECT_EQ(1u, memoryManager.lockResourceCalled); EXPECT_EQ(0u, memoryManager.unlockResourceCalled); memoryManager.freeGraphicsMemory(allocation); EXPECT_EQ(1u, memoryManager.unlockResourceCalled); } TEST(MemoryManagerTest, givenExecutionEnvrionmentWithCleanedRootDeviceExecutionsWhenFreeGraphicsMemoryIsCalledThenMemoryManagerDoesntCrash) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, false, executionEnvironment); auto allocation = memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{0, MemoryConstants::pageSize}); EXPECT_NE(nullptr, allocation); executionEnvironment.rootDeviceEnvironments.clear(); EXPECT_NO_THROW(memoryManager.freeGraphicsMemory(allocation)); } TEST(MemoryManagerTest, givenAllocationTypesThatMayNeedL3FlushWhenCallingGetAllocationDataThenFlushL3FlagIsCorrectlySet) { AllocationData allocData; AllocationProperties properties(mockRootDeviceIndex, 1, AllocationType::UNKNOWN, mockDeviceBitfield); properties.flags.flushL3RequiredForRead = 1; properties.flags.flushL3RequiredForWrite = 1; AllocationType allocationTypesThatMayNeedL3Flush[] = { AllocationType::BUFFER, AllocationType::BUFFER_HOST_MEMORY, AllocationType::EXTERNAL_HOST_PTR, AllocationType::GLOBAL_SURFACE, AllocationType::IMAGE, AllocationType::PIPE, AllocationType::SHARED_IMAGE, AllocationType::SHARED_BUFFER, AllocationType::SHARED_RESOURCE_COPY, AllocationType::SVM_ZERO_COPY, AllocationType::SVM_GPU, AllocationType::SVM_CPU, AllocationType::WRITE_COMBINED, AllocationType::MAP_ALLOCATION}; MockMemoryManager mockMemoryManager; for (auto allocationType : allocationTypesThatMayNeedL3Flush) { properties.allocationType = allocationType; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.flushL3); } properties.flags.flushL3RequiredForRead = 0; properties.flags.flushL3RequiredForWrite = 0; for (auto allocationType : allocationTypesThatMayNeedL3Flush) { properties.allocationType = allocationType; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_FALSE(allocData.flags.flushL3); } } TEST(MemoryManagerTest, givenNullHostPtrWhenIsCopyRequiredIsCalledThenFalseIsReturned) { ImageInfo imgInfo{}; EXPECT_FALSE(MockMemoryManager::isCopyRequired(imgInfo, nullptr)); } TEST(MemoryManagerTest, givenAllowedTilingWhenIsCopyRequiredIsCalledThenTrueIsReturned) { ImageInfo imgInfo{}; cl_image_desc imageDesc{}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 1; imageDesc.image_height = 1; cl_image_format imageFormat = {}; imageFormat.image_channel_data_type = CL_UNSIGNED_INT8; imageFormat.image_channel_order = CL_R; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, defaultHwInfo->capabilityTable.supportsOcl21Features); imgInfo.imgDesc = Image::convertDescriptor(imageDesc); imgInfo.surfaceFormat = &surfaceFormat->surfaceFormat; imgInfo.rowPitch = imageDesc.image_width * surfaceFormat->surfaceFormat.ImageElementSizeInBytes; imgInfo.slicePitch = imgInfo.rowPitch * imageDesc.image_height; imgInfo.size = imgInfo.slicePitch; char memory; EXPECT_TRUE(MockMemoryManager::isCopyRequired(imgInfo, &memory)); } TEST(MemoryManagerTest, givenDifferentRowPitchWhenIsCopyRequiredIsCalledThenTrueIsReturned) { ImageInfo imgInfo{}; cl_image_desc imageDesc{}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_width = 1; imageDesc.image_height = 1; imageDesc.image_row_pitch = 10; cl_image_format imageFormat = {}; imageFormat.image_channel_data_type = CL_UNSIGNED_INT8; imageFormat.image_channel_order = CL_R; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, defaultHwInfo->capabilityTable.supportsOcl21Features); imgInfo.imgDesc = Image::convertDescriptor(imageDesc); imgInfo.surfaceFormat = &surfaceFormat->surfaceFormat; imgInfo.rowPitch = imageDesc.image_width * surfaceFormat->surfaceFormat.ImageElementSizeInBytes; imgInfo.slicePitch = imgInfo.rowPitch * imageDesc.image_height; imgInfo.size = imgInfo.slicePitch; char memory[10]; EXPECT_TRUE(MockMemoryManager::isCopyRequired(imgInfo, memory)); } TEST(MemoryManagerTest, givenDifferentSlicePitchAndTilingNotAllowedWhenIsCopyRequiredIsCalledThenTrueIsReturned) { ImageInfo imgInfo{}; cl_image_format imageFormat = {}; imageFormat.image_channel_data_type = CL_UNSIGNED_INT8; imageFormat.image_channel_order = CL_R; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, defaultHwInfo->capabilityTable.supportsOcl21Features); cl_image_desc imageDesc{}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_width = 4; imageDesc.image_height = 2; imageDesc.image_slice_pitch = imageDesc.image_width * (imageDesc.image_height + 3) * surfaceFormat->surfaceFormat.ImageElementSizeInBytes; imgInfo.imgDesc = Image::convertDescriptor(imageDesc); imgInfo.surfaceFormat = &surfaceFormat->surfaceFormat; imgInfo.rowPitch = imageDesc.image_width * surfaceFormat->surfaceFormat.ImageElementSizeInBytes; imgInfo.slicePitch = imgInfo.rowPitch * imageDesc.image_height; imgInfo.size = imgInfo.slicePitch; char memory[8]; EXPECT_TRUE(MockMemoryManager::isCopyRequired(imgInfo, memory)); } TEST(MemoryManagerTest, givenNotCachelinAlignedPointerWhenIsCopyRequiredIsCalledThenTrueIsReturned) { ImageInfo imgInfo{}; cl_image_format imageFormat = {}; imageFormat.image_channel_data_type = CL_UNSIGNED_INT8; imageFormat.image_channel_order = CL_R; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, defaultHwInfo->capabilityTable.supportsOcl21Features); cl_image_desc imageDesc{}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_width = 4096; imageDesc.image_height = 1; imgInfo.imgDesc = Image::convertDescriptor(imageDesc); imgInfo.surfaceFormat = &surfaceFormat->surfaceFormat; imgInfo.rowPitch = imageDesc.image_width * surfaceFormat->surfaceFormat.ImageElementSizeInBytes; imgInfo.slicePitch = imgInfo.rowPitch * imageDesc.image_height; imgInfo.size = imgInfo.slicePitch; char memory[8]; EXPECT_TRUE(MockMemoryManager::isCopyRequired(imgInfo, &memory[1])); } TEST(MemoryManagerTest, givenCachelineAlignedPointerAndProperDescriptorValuesWhenIsCopyRequiredIsCalledThenFalseIsReturned) { ImageInfo imgInfo{}; cl_image_format imageFormat = {}; imageFormat.image_channel_data_type = CL_UNSIGNED_INT8; imageFormat.image_channel_order = CL_R; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, defaultHwInfo->capabilityTable.supportsOcl21Features); cl_image_desc imageDesc{}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_width = 2; imageDesc.image_height = 1; imgInfo.imgDesc = Image::convertDescriptor(imageDesc); imgInfo.surfaceFormat = &surfaceFormat->surfaceFormat; imgInfo.rowPitch = imageDesc.image_width * surfaceFormat->surfaceFormat.ImageElementSizeInBytes; imgInfo.slicePitch = imgInfo.rowPitch * imageDesc.image_height; imgInfo.size = imgInfo.slicePitch; imgInfo.linearStorage = true; auto hostPtr = alignedMalloc(imgInfo.size, MemoryConstants::cacheLineSize); EXPECT_FALSE(MockMemoryManager::isCopyRequired(imgInfo, hostPtr)); alignedFree(hostPtr); } TEST(MemoryManagerTest, givenForcedLinearImages3DImageAndProperDescriptorValuesWhenIsCopyRequiredIsCalledThenFalseIsReturned) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceLinearImages.set(true); auto &hwHelper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily); ImageInfo imgInfo{}; cl_image_format imageFormat = {}; imageFormat.image_channel_data_type = CL_UNSIGNED_INT8; imageFormat.image_channel_order = CL_R; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, defaultHwInfo->capabilityTable.supportsOcl21Features); cl_image_desc imageDesc{}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE3D; imageDesc.image_width = 2; imageDesc.image_height = 2; imageDesc.image_depth = 2; imgInfo.imgDesc = Image::convertDescriptor(imageDesc); imgInfo.surfaceFormat = &surfaceFormat->surfaceFormat; imgInfo.rowPitch = imageDesc.image_width * surfaceFormat->surfaceFormat.ImageElementSizeInBytes; imgInfo.slicePitch = imgInfo.rowPitch * imageDesc.image_height; imgInfo.size = imgInfo.slicePitch; imgInfo.linearStorage = !hwHelper.tilingAllowed(false, Image::isImage1d(Image::convertDescriptor(imgInfo.imgDesc)), false); auto hostPtr = alignedMalloc(imgInfo.size, MemoryConstants::cacheLineSize); EXPECT_FALSE(MockMemoryManager::isCopyRequired(imgInfo, hostPtr)); alignedFree(hostPtr); } class HeapSelectorTest : public Test { public: using BaseClass = ClDeviceFixture; void SetUp() override { executionEnvironment = std::unique_ptr(new MockExecutionEnvironment(defaultHwInfo.get())); memoryManager = std::unique_ptr(new MockMemoryManager(false, false, *executionEnvironment.get())); BaseClass::SetUp(); } void TearDown() override { BaseClass::TearDown(); } std::unique_ptr executionEnvironment; std::unique_ptr memoryManager; }; TEST_F(HeapSelectorTest, given32bitInternalAllocationWhenSelectingHeapThenInternalHeapIsUsed) { GraphicsAllocation allocation{0, AllocationType::KERNEL_ISA, nullptr, 0, 0, 0, MemoryPool::MemoryNull}; allocation.set32BitAllocation(true); EXPECT_EQ(MemoryManager::selectInternalHeap(allocation.isAllocatedInLocalMemoryPool()), memoryManager->selectHeap(&allocation, false, false, false)); GraphicsAllocation allocation2{0, AllocationType::KERNEL_ISA_INTERNAL, nullptr, 0, 0, 0, MemoryPool::MemoryNull}; allocation2.set32BitAllocation(true); EXPECT_EQ(MemoryManager::selectInternalHeap(allocation2.isAllocatedInLocalMemoryPool()), memoryManager->selectHeap(&allocation2, false, false, false)); } TEST_F(HeapSelectorTest, givenNon32bitInternalAllocationWhenSelectingHeapThenInternalHeapIsUsed) { GraphicsAllocation allocation{0, AllocationType::KERNEL_ISA, nullptr, 0, 0, 0, MemoryPool::MemoryNull}; allocation.set32BitAllocation(false); EXPECT_EQ(MemoryManager::selectInternalHeap(allocation.isAllocatedInLocalMemoryPool()), memoryManager->selectHeap(&allocation, false, false, false)); GraphicsAllocation allocation2{0, AllocationType::KERNEL_ISA_INTERNAL, nullptr, 0, 0, 0, MemoryPool::MemoryNull}; allocation2.set32BitAllocation(false); EXPECT_EQ(MemoryManager::selectInternalHeap(allocation2.isAllocatedInLocalMemoryPool()), memoryManager->selectHeap(&allocation2, false, false, false)); } TEST_F(HeapSelectorTest, given32bitExternalAllocationWhenSelectingHeapThenExternalHeapIsUsed) { GraphicsAllocation allocation{0, AllocationType::UNKNOWN, nullptr, 0, 0, 0, MemoryPool::MemoryNull}; allocation.set32BitAllocation(true); EXPECT_EQ(MemoryManager::selectExternalHeap(allocation.isAllocatedInLocalMemoryPool()), memoryManager->selectHeap(&allocation, false, false, false)); } TEST_F(HeapSelectorTest, givenLimitedAddressSpaceWhenSelectingHeapForExternalAllocationThenStandardHeapIsUsed) { GraphicsAllocation allocation{0, AllocationType::UNKNOWN, nullptr, 0, 0, 0, MemoryPool::MemoryNull}; EXPECT_EQ(HeapIndex::HEAP_STANDARD, memoryManager->selectHeap(&allocation, true, false, false)); } TEST_F(HeapSelectorTest, givenFullAddressSpaceWhenSelectingHeapForExternalAllocationWithPtrThenSvmHeapIsUsed) { GraphicsAllocation allocation{0, AllocationType::UNKNOWN, nullptr, 0, 0, 0, MemoryPool::MemoryNull}; EXPECT_EQ(HeapIndex::HEAP_SVM, memoryManager->selectHeap(&allocation, true, true, false)); } TEST_F(HeapSelectorTest, givenFullAddressSpaceWhenSelectingHeapForExternalAllocationWithoutPtrAndResourceIs64KSuitableThenStandard64kHeapIsUsed) { GraphicsAllocation allocation{0, AllocationType::UNKNOWN, nullptr, 0, 0, 0, MemoryPool::MemoryNull}; auto rootDeviceEnvironment = platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0].get(); auto gmm = std::make_unique(rootDeviceEnvironment->getGmmClientContext(), nullptr, 0, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, StorageInfo{}, true); auto resourceInfo = static_cast(gmm->gmmResourceInfo.get()); resourceInfo->is64KBPageSuitableValue = true; allocation.setDefaultGmm(gmm.get()); EXPECT_EQ(HeapIndex::HEAP_STANDARD64KB, memoryManager->selectHeap(&allocation, false, true, false)); } TEST_F(HeapSelectorTest, givenFullAddressSpaceWhenSelectingHeapForExternalAllocationWithoutPtrAndResourceIsNot64KSuitableThenStandardHeapIsUsed) { GraphicsAllocation allocation{0, AllocationType::UNKNOWN, nullptr, 0, 0, 0, MemoryPool::MemoryNull}; auto rootDeviceEnvironment = platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0].get(); auto gmm = std::make_unique(rootDeviceEnvironment->getGmmClientContext(), nullptr, 0, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, StorageInfo{}, true); auto resourceInfo = static_cast(gmm->gmmResourceInfo.get()); resourceInfo->is64KBPageSuitableValue = false; allocation.setDefaultGmm(gmm.get()); EXPECT_EQ(HeapIndex::HEAP_STANDARD, memoryManager->selectHeap(&allocation, false, true, false)); } TEST_F(HeapSelectorTest, givenFullAddressSpaceWhenSelectingHeapForNullAllocationWithoutPtrThenStandardHeapIsUsed) { EXPECT_EQ(HeapIndex::HEAP_STANDARD, memoryManager->selectHeap(nullptr, false, true, false)); } TEST_F(HeapSelectorTest, givenLimitedAddressSpaceWhenSelectingHeapForNullAllocationWithoutPtrThenStandardHeapIsUsed) { EXPECT_EQ(HeapIndex::HEAP_STANDARD, memoryManager->selectHeap(nullptr, false, false, false)); } TEST_F(HeapSelectorTest, givenDebugModuleAreaAllocationAndUseFrontWindowWhenSelectingHeapThenInternalFrontWindowHeapIsReturned) { GraphicsAllocation allocation{0, AllocationType::DEBUG_MODULE_AREA, nullptr, 0, 0, 0, MemoryPool::MemoryNull, 1}; allocation.set32BitAllocation(true); EXPECT_EQ(HeapIndex::HEAP_INTERNAL_FRONT_WINDOW, memoryManager->selectHeap(&allocation, false, false, true)); } TEST_F(HeapSelectorTest, givenDebugModuleAreaAllocationInLocalMemoryAndUseFrontWindowWhenSelectingHeapThenInternalDeviceFrontWindowHeapIsReturned) { GraphicsAllocation allocation{0, AllocationType::DEBUG_MODULE_AREA, nullptr, 0, 0, 0, MemoryPool::LocalMemory, 1}; allocation.set32BitAllocation(true); EXPECT_TRUE(allocation.isAllocatedInLocalMemoryPool()); EXPECT_EQ(HeapIndex::HEAP_INTERNAL_DEVICE_FRONT_WINDOW, memoryManager->selectHeap(&allocation, false, false, true)); } TEST(MemoryAllocationTest, givenAllocationTypeWhenPassedToMemoryAllocationConstructorThenAllocationTypeIsStored) { MemoryAllocation allocation{0, AllocationType::COMMAND_BUFFER, nullptr, nullptr, 0, 0, 0, MemoryPool::MemoryNull, false, false, MemoryManager::maxOsContextCount}; EXPECT_EQ(AllocationType::COMMAND_BUFFER, allocation.getAllocationType()); } TEST(MemoryAllocationTest, givenMemoryPoolWhenPassedToMemoryAllocationConstructorThenMemoryPoolIsStored) { MemoryAllocation allocation{0, AllocationType::COMMAND_BUFFER, nullptr, nullptr, 0, 0, 0, MemoryPool::System64KBPages, false, false, MemoryManager::maxOsContextCount}; EXPECT_EQ(MemoryPool::System64KBPages, allocation.getMemoryPool()); } TEST_F(MemoryAllocatorTest, whenCommandStreamerIsRegisteredThenReturnAssociatedEngineControl) { auto engineControl = memoryManager->getRegisteredEngineForCsr(csr); ASSERT_NE(nullptr, engineControl); EXPECT_EQ(csr, engineControl->commandStreamReceiver); } TEST_F(MemoryAllocatorTest, whenCommandStreamerIsNotRegisteredThenReturnNullEngineControl) { CommandStreamReceiver *dummyCsr = reinterpret_cast(0x1); auto engineControl = memoryManager->getRegisteredEngineForCsr(dummyCsr); EXPECT_EQ(nullptr, engineControl); } TEST(MemoryManagerCopyMemoryTest, givenAllocationWithNoStorageWhenCopyMemoryToAllocationThenReturnFalse) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, false, executionEnvironment); uint8_t memory = 1; MockGraphicsAllocation invalidAllocation{nullptr, 0u}; EXPECT_FALSE(memoryManager.copyMemoryToAllocation(&invalidAllocation, 0, &memory, sizeof(memory))); } TEST(MemoryManagerCopyMemoryTest, givenValidAllocationAndMemoryWhenCopyMemoryToAllocationThenDataIsCopied) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, false, executionEnvironment); constexpr uint8_t allocationSize = 10; uint8_t allocationStorage[allocationSize] = {0}; MockGraphicsAllocation allocation{allocationStorage, allocationSize}; uint8_t memory = 1u; EXPECT_EQ(0u, allocationStorage[0]); size_t offset = 2; EXPECT_TRUE(memoryManager.copyMemoryToAllocation(&allocation, offset, &memory, sizeof(memory))); EXPECT_EQ(memory, allocationStorage[offset]); } TEST_F(MemoryAllocatorTest, whenReservingAddressRangeThenExpectProperAddressAndReleaseWhenFreeing) { size_t size = 0x1000; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), size}); ASSERT_NE(nullptr, allocation); void *reserve = memoryManager->reserveCpuAddressRange(size, 0); EXPECT_NE(nullptr, reserve); allocation->setReservedAddressRange(reserve, size); EXPECT_EQ(reserve, allocation->getReservedAddressPtr()); EXPECT_EQ(size, allocation->getReservedAddressSize()); memoryManager->freeGraphicsMemory(allocation); } TEST(MemoryManagerTest, givenMemoryManagerWhenGettingReservedMemoryThenAllocateIt) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, false, executionEnvironment); EXPECT_EQ(nullptr, memoryManager.reservedMemory); memoryManager.getReservedMemory(MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); EXPECT_NE(nullptr, memoryManager.reservedMemory); } TEST(MemoryManagerTest, givenMemoryManagerWhenGetReservedMemoryIsCalledManyTimesThenReuseSameMemory) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, false, executionEnvironment); auto reservedMemory = memoryManager.getReservedMemory(MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); memoryManager.getReservedMemory(MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); memoryManager.getReservedMemory(MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); EXPECT_EQ(reservedMemory, memoryManager.reservedMemory); } class MemoryManagerWithFailure : public MockMemoryManager { public: using MockMemoryManager::MockMemoryManager; GraphicsAllocation *allocateGraphicsMemoryWithProperties(const AllocationProperties &properties) override { recentlyPassedDeviceBitfield = properties.subDevicesBitfield; return nullptr; } }; TEST(MemoryManagerTest, whenMemoryManagerReturnsNullptrThenAllocateGlobalsSurfaceAlsoReturnsNullptr) { MockClDevice device{new MockDevice}; auto deviceBitfield = device.getDeviceBitfield(); auto memoryManager = new MemoryManagerWithFailure{*device.getExecutionEnvironment()}; device.injectMemoryManager(memoryManager); WhiteBox linkerInput; linkerInput.traits.exportsGlobalConstants = true; linkerInput.traits.exportsGlobalVariables = true; memoryManager->recentlyPassedDeviceBitfield = {}; GraphicsAllocation *allocation = allocateGlobalsSurface(nullptr, device.getDevice(), 1024, false, &linkerInput, nullptr); EXPECT_EQ(nullptr, allocation); EXPECT_EQ(deviceBitfield, memoryManager->recentlyPassedDeviceBitfield); auto svmAllocsManager = std::make_unique(device.getMemoryManager(), false); memoryManager->recentlyPassedDeviceBitfield = {}; allocation = allocateGlobalsSurface(svmAllocsManager.get(), device.getDevice(), 1024, false, &linkerInput, nullptr); EXPECT_EQ(nullptr, allocation); EXPECT_EQ(deviceBitfield, memoryManager->recentlyPassedDeviceBitfield); } HWTEST_F(MemoryAllocatorTest, givenMemoryManagerWhenEnableHostPtrTrackingFlagIsSetTo0ThenHostPointerTrackingIsDisabled) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableHostPtrTracking.set(0); EXPECT_FALSE(memoryManager->isHostPointerTrackingEnabled(0u)); } HWTEST_F(MemoryAllocatorTest, givenMemoryManagerWhenEnableHostPtrTrackingFlagIsNotSetTo1ThenHostPointerTrackingIsEnabled) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableHostPtrTracking.set(1); EXPECT_TRUE(memoryManager->isHostPointerTrackingEnabled(0u)); } HWTEST_F(MemoryAllocatorTest, givenMemoryManagerWhenEnableHostPtrTrackingFlagIsSetNotSetThenHostPointerTrackingDependsOnCapabilityTable) { if (is32bit) { EXPECT_TRUE(memoryManager->isHostPointerTrackingEnabled(0u)); } else { EXPECT_EQ(device->getHardwareInfo().capabilityTable.hostPtrTrackingEnabled, memoryManager->isHostPointerTrackingEnabled(0u)); } } using MemoryManagerMultiRootDeviceTests = MultiRootDeviceFixture; TEST_F(MemoryManagerMultiRootDeviceTests, WhenAllocatingGlobalSurfaceThenItHasCorrectRootDeviceIndex) { if (device1->getMemoryManager()->isLimitedRange(expectedRootDeviceIndex)) { delete context->svmAllocsManager; context->svmAllocsManager = nullptr; } std::vector initData(1024, 0x5B); WhiteBox linkerInput; linkerInput.traits.exportsGlobalConstants = true; linkerInput.traits.exportsGlobalVariables = true; GraphicsAllocation *allocation = allocateGlobalsSurface(context->svmAllocsManager, device1->getDevice(), initData.size(), false, &linkerInput, initData.data()); ASSERT_NE(nullptr, allocation); EXPECT_EQ(expectedRootDeviceIndex, allocation->getRootDeviceIndex()); if (device1->getMemoryManager()->isLimitedRange(expectedRootDeviceIndex)) { device1->getMemoryManager()->freeGraphicsMemory(allocation); } else { context->getSVMAllocsManager()->freeSVMAlloc(allocation->getUnderlyingBuffer()); } } TEST_F(MemoryManagerMultiRootDeviceTests, GivenMultipleRootDevicesWhenMemoryManagerInitializedThenIsaLocalMemoryPlacementIsCheckedForEveryRootDevice) { EXPECT_EQ(3u, mockMemoryManager->isaInLocalMemory.size()); } HWTEST_F(MemoryAllocatorTest, givenMemoryManagerWhen64BitAndHostPtrTrackingDisabledThenNonSvmHostPtrUsageIsSet) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableHostPtrTracking.set(0); bool expectedValue = !is32bit; auto result = memoryManager->useNonSvmHostPtrAlloc(AllocationType::EXTERNAL_HOST_PTR, 0u); EXPECT_EQ(expectedValue, result); result = memoryManager->useNonSvmHostPtrAlloc(AllocationType::MAP_ALLOCATION, 0u); EXPECT_EQ(expectedValue, result); } HWTEST_F(MemoryAllocatorTest, givenMemoryManagerWhenHostPtrTrackingModeThenNonSvmHostPtrUsageIsSet) { memoryManager->setForceNonSvmForExternalHostPtr(true); auto result = memoryManager->useNonSvmHostPtrAlloc(AllocationType::EXTERNAL_HOST_PTR, 0u); EXPECT_EQ(true, result); result = memoryManager->useNonSvmHostPtrAlloc(AllocationType::BUFFER_HOST_MEMORY, 0u); EXPECT_EQ(false, result); } HWTEST_F(MemoryAllocatorTest, givenMemoryManagerWhenHostPtrTrackingModeThenNonSvmBufferIsNotSet) { HardwareInfo hwInfoLocal = *defaultHwInfo; hwInfoLocal.capabilityTable.hostPtrTrackingEnabled = true; memoryManager->peekExecutionEnvironment().rootDeviceEnvironments[0u]->setHwInfo(&hwInfoLocal); int buffer = 0; EXPECT_FALSE(memoryManager->isNonSvmBuffer(&buffer, AllocationType::EXTERNAL_HOST_PTR, 0u)); EXPECT_FALSE(memoryManager->isNonSvmBuffer(&buffer, AllocationType::BUFFER_HOST_MEMORY, 0u)); } HWTEST_F(MemoryAllocatorTest, givenMemoryManagerWhenHostPtrTrackingDisabledAnd64bitsThenNonSvmBufferIsSetForBufferHostMemory) { HardwareInfo hwInfoLocal = *defaultHwInfo; hwInfoLocal.capabilityTable.hostPtrTrackingEnabled = false; memoryManager->peekExecutionEnvironment().rootDeviceEnvironments[0u]->setHwInfo(&hwInfoLocal); int buffer = 0; EXPECT_FALSE(memoryManager->isNonSvmBuffer(&buffer, AllocationType::EXTERNAL_HOST_PTR, 0u)); EXPECT_EQ(!is32bit, memoryManager->isNonSvmBuffer(&buffer, AllocationType::BUFFER_HOST_MEMORY, 0u)); } HWTEST_F(MemoryAllocatorTest, givenMemoryManagerWhenHostPtrTrackingEnabledThenNonSvmHostPtrUsageDependsOnFullRangeSvm) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableHostPtrTracking.set(1); auto result = memoryManager->useNonSvmHostPtrAlloc(AllocationType::EXTERNAL_HOST_PTR, 0u); EXPECT_EQ(!executionEnvironment->rootDeviceEnvironments[0]->isFullRangeSvm() && !is32bit, result); result = memoryManager->useNonSvmHostPtrAlloc(AllocationType::MAP_ALLOCATION, 0u); EXPECT_EQ(!executionEnvironment->rootDeviceEnvironments[0]->isFullRangeSvm() && !is32bit, result); } using PageTableManagerTest = ::testing::Test; using namespace ::testing; HWTEST_F(PageTableManagerTest, givenPageTableManagerWhenMapAuxGpuVaThenForAllEnginesWithPageTableUpdateAuxTableAreCalled) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(2); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(defaultHwInfo.get()); executionEnvironment->rootDeviceEnvironments[i]->initGmm(); } auto memoryManager = new MockMemoryManager(false, false, *executionEnvironment); executionEnvironment->memoryManager.reset(memoryManager); auto csr = std::unique_ptr(createCommandStream(*executionEnvironment, 1u, 1)); auto csr2 = std::unique_ptr(createCommandStream(*executionEnvironment, 1u, 1)); auto hwInfo = *defaultHwInfo; EngineInstancesContainer regularEngines = { {aub_stream::ENGINE_CCS, EngineUsage::Regular}, {aub_stream::ENGINE_BCS, EngineUsage::Regular}, }; memoryManager->createAndRegisterOsContext(csr.get(), EngineDescriptorHelper::getDefaultDescriptor(regularEngines[0], PreemptionHelper::getDefaultPreemptionMode(hwInfo))); memoryManager->createAndRegisterOsContext(csr2.get(), EngineDescriptorHelper::getDefaultDescriptor(regularEngines[1], PreemptionHelper::getDefaultPreemptionMode(hwInfo))); auto mockMngr = new MockGmmPageTableMngr(); auto mockMngr2 = new MockGmmPageTableMngr(); memoryManager->getRegisteredEngines()[0].commandStreamReceiver->pageTableManager.reset(mockMngr); memoryManager->getRegisteredEngines()[1].commandStreamReceiver->pageTableManager.reset(mockMngr2); MockGraphicsAllocation allocation(1u, AllocationType::UNKNOWN, nullptr, 0, 0, 0, MemoryPool::MemoryNull); MockGmm gmm(executionEnvironment->rootDeviceEnvironments[allocation.getRootDeviceIndex()]->getGmmClientContext()); gmm.isCompressionEnabled = true; allocation.setDefaultGmm(&gmm); GMM_DDI_UPDATEAUXTABLE expectedDdiUpdateAuxTable = {}; expectedDdiUpdateAuxTable.BaseGpuVA = allocation.getGpuAddress(); expectedDdiUpdateAuxTable.BaseResInfo = allocation.getDefaultGmm()->gmmResourceInfo->peekGmmResourceInfo(); expectedDdiUpdateAuxTable.DoNotWait = true; expectedDdiUpdateAuxTable.Map = true; bool result = memoryManager->mapAuxGpuVA(&allocation); EXPECT_TRUE(result); EXPECT_EQ(1u, mockMngr->updateAuxTableCalled); EXPECT_TRUE(memcmp(&expectedDdiUpdateAuxTable, &mockMngr->updateAuxTableParamsPassed[0].ddiUpdateAuxTable, sizeof(GMM_DDI_UPDATEAUXTABLE)) == 0); EXPECT_EQ(1u, mockMngr2->updateAuxTableCalled); EXPECT_TRUE(memcmp(&expectedDdiUpdateAuxTable, &mockMngr2->updateAuxTableParamsPassed[0].ddiUpdateAuxTable, sizeof(GMM_DDI_UPDATEAUXTABLE)) == 0); } HWTEST_F(PageTableManagerTest, givenPageTableManagerWhenUpdateAuxTableGmmErrorThenMapAuxGpuVaReturnFalse) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(2); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(defaultHwInfo.get()); executionEnvironment->rootDeviceEnvironments[i]->initGmm(); } auto memoryManager = new MockMemoryManager(false, false, *executionEnvironment); executionEnvironment->memoryManager.reset(memoryManager); auto csr = std::unique_ptr(createCommandStream(*executionEnvironment, 1u, 1)); auto hwInfo = *defaultHwInfo; EngineInstancesContainer regularEngines = { {aub_stream::ENGINE_CCS, EngineUsage::Regular}}; memoryManager->createAndRegisterOsContext(csr.get(), EngineDescriptorHelper::getDefaultDescriptor(regularEngines[0], PreemptionHelper::getDefaultPreemptionMode(hwInfo))); auto mockMngr = new MockGmmPageTableMngr(); mockMngr->updateAuxTableResult = GMM_ERROR; memoryManager->getRegisteredEngines()[0].commandStreamReceiver->pageTableManager.reset(mockMngr); MockGraphicsAllocation allocation(1u, AllocationType::UNKNOWN, nullptr, 0, 0, 0, MemoryPool::MemoryNull); MockGmm gmm(executionEnvironment->rootDeviceEnvironments[allocation.getRootDeviceIndex()]->getGmmClientContext()); gmm.isCompressionEnabled = true; allocation.setDefaultGmm(&gmm); bool result = memoryManager->mapAuxGpuVA(&allocation); EXPECT_FALSE(result); EXPECT_EQ(1u, mockMngr->updateAuxTableCalled); } HWTEST_F(PageTableManagerTest, givenNullPageTableManagerWhenMapAuxGpuVaThenNoThrow) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(2); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(defaultHwInfo.get()); executionEnvironment->rootDeviceEnvironments[i]->initGmm(); } auto memoryManager = new MockMemoryManager(false, false, *executionEnvironment); executionEnvironment->memoryManager.reset(memoryManager); auto csr = std::unique_ptr(createCommandStream(*executionEnvironment, 1u, 1)); auto hwInfo = *defaultHwInfo; EngineInstancesContainer regularEngines = { {aub_stream::ENGINE_CCS, EngineUsage::Regular}, }; memoryManager->createAndRegisterOsContext(csr.get(), EngineDescriptorHelper::getDefaultDescriptor(regularEngines[0], PreemptionHelper::getDefaultPreemptionMode(hwInfo))); memoryManager->getRegisteredEngines()[0].commandStreamReceiver->pageTableManager.reset(nullptr); MockGraphicsAllocation allocation(1u, AllocationType::UNKNOWN, nullptr, 0, 0, 0, MemoryPool::MemoryNull); MockGmm gmm(executionEnvironment->rootDeviceEnvironments[allocation.getRootDeviceIndex()]->getGmmClientContext()); gmm.isCompressionEnabled = true; allocation.setDefaultGmm(&gmm); EXPECT_NO_THROW(memoryManager->mapAuxGpuVA(&allocation)); } HWTEST_F(PageTableManagerTest, givenNullPageTableManagerWhenMapAuxGpuVaThenReturnFalse) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(2); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(defaultHwInfo.get()); executionEnvironment->rootDeviceEnvironments[i]->initGmm(); } auto memoryManager = new MockMemoryManager(false, false, *executionEnvironment); executionEnvironment->memoryManager.reset(memoryManager); auto csr = std::unique_ptr(createCommandStream(*executionEnvironment, 1u, 1)); auto hwInfo = *defaultHwInfo; memoryManager->createAndRegisterOsContext(csr.get(), EngineDescriptorHelper::getDefaultDescriptor(HwHelper::get(hwInfo.platform.eRenderCoreFamily).getGpgpuEngineInstances(hwInfo)[0], PreemptionHelper::getDefaultPreemptionMode(hwInfo))); for (auto engine : memoryManager->getRegisteredEngines()) { engine.commandStreamReceiver->pageTableManager.reset(nullptr); } MockGraphicsAllocation allocation(1u, AllocationType::UNKNOWN, nullptr, 0, 0, 0, MemoryPool::MemoryNull); bool result = memoryManager->mapAuxGpuVA(&allocation); EXPECT_FALSE(result); } HWTEST_F(PageTableManagerTest, givenMemoryManagerThatSupportsPageTableManagerWhenMapAuxGpuVAIsCalledThenItReturnsTrue) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(2); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(defaultHwInfo.get()); executionEnvironment->rootDeviceEnvironments[i]->initGmm(); } auto memoryManager = new MockMemoryManager(false, false, *executionEnvironment); executionEnvironment->memoryManager.reset(memoryManager); MockGraphicsAllocation allocation(1u, AllocationType::UNKNOWN, nullptr, 0, 0, 0, MemoryPool::MemoryNull); MockGmm gmm(executionEnvironment->rootDeviceEnvironments[allocation.getRootDeviceIndex()]->getGmmClientContext()); allocation.setDefaultGmm(&gmm); bool mapped = memoryManager->mapAuxGpuVA(&allocation); auto hwInfo = executionEnvironment->rootDeviceEnvironments[allocation.getRootDeviceIndex()]->getHardwareInfo(); EXPECT_EQ(HwInfoConfig::get(hwInfo->platform.eProductFamily)->isPageTableManagerSupported(*hwInfo), mapped); } TEST(MemoryManagerTest, givenDebugModuleAreaAllocationTypeWhenCallingGetAllocationDataThenUse32BitFrontWindowsIsSet) { AllocationData allocData; AllocationProperties properties(mockRootDeviceIndex, 1, AllocationType::DEBUG_MODULE_AREA, mockDeviceBitfield); MockMemoryManager mockMemoryManager; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_EQ(1u, allocData.flags.use32BitFrontWindow); } TEST(MemoryManagerTest, givenStorageInfoWithParamsWhenGettingAllocDataForSystemMemoryThenSetSystemMemoryFlag) { AllocationData allocData; AllocationProperties properties(mockRootDeviceIndex, 1, AllocationType::BUFFER_HOST_MEMORY, mockDeviceBitfield); EXPECT_NE(0lu, mockDeviceBitfield.to_ulong()); MockMemoryManager mockMemoryManager; auto storageInfo = mockMemoryManager.createStorageInfoFromProperties(properties); EXPECT_NE(0lu, storageInfo.memoryBanks.to_ulong()); mockMemoryManager.getAllocationData(allocData, properties, nullptr, storageInfo); EXPECT_EQ(1u, allocData.flags.useSystemMemory); EXPECT_TRUE(allocData.storageInfo.systemMemoryPlacement); } TEST(MemoryManagerTest, givenStorageInfoWithParamsWhenGettingAllocDataForLocalMemoryThenClearSystemMemoryFlag) { AllocationData allocData; AllocationProperties properties(mockRootDeviceIndex, 1, AllocationType::BUFFER, mockDeviceBitfield); EXPECT_NE(0lu, mockDeviceBitfield.to_ulong()); MockMemoryManager mockMemoryManager; auto storageInfo = mockMemoryManager.createStorageInfoFromProperties(properties); EXPECT_NE(0lu, storageInfo.memoryBanks.to_ulong()); mockMemoryManager.getAllocationData(allocData, properties, nullptr, storageInfo); EXPECT_EQ(0u, allocData.flags.useSystemMemory); EXPECT_FALSE(allocData.storageInfo.systemMemoryPlacement); } TEST(MemoryManagerTest, WhenCallingIsAllocationTypeToCaptureThenScratchAndPrivateTypesReturnTrue) { MockMemoryManager mockMemoryManager; EXPECT_TRUE(mockMemoryManager.isAllocationTypeToCapture(AllocationType::SCRATCH_SURFACE)); EXPECT_TRUE(mockMemoryManager.isAllocationTypeToCapture(AllocationType::PRIVATE_SURFACE)); } TEST(MemoryTransferHelperTest, WhenBlitterIsSelectedButBlitCopyFailsThenFallbackToCopyOnCPU) { constexpr uint32_t dataSize = 16; uint8_t destData[dataSize] = {}; uint8_t srcData[dataSize] = {}; for (uint8_t i = 0u; i < dataSize; i++) { srcData[i] = i; } MockGraphicsAllocation graphicsAllocation{destData, sizeof(destData)}; graphicsAllocation.setAllocationType(AllocationType::BUFFER_HOST_MEMORY); auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.blitterOperationsSupported = false; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); EXPECT_EQ(BlitOperationResult::Unsupported, BlitHelperFunctions::blitMemoryToAllocation(*device, &graphicsAllocation, 0, srcData, {dataSize, 1, 1})); auto result = MemoryTransferHelper::transferMemoryToAllocation(true, *device, &graphicsAllocation, 0u, srcData, dataSize); EXPECT_TRUE(result); EXPECT_EQ(0, memcmp(destData, srcData, dataSize)); } TEST(MemoryTransferHelperTest, givenBlitOperationSupportedWhenBcsEngineNotAvailableThenReturnUnsupported) { constexpr uint32_t dataSize = 16; uint8_t destData[dataSize] = {}; uint8_t srcData[dataSize] = {}; MockGraphicsAllocation graphicsAllocation{destData, sizeof(destData)}; graphicsAllocation.storageInfo.memoryBanks = 1; graphicsAllocation.setAllocationType(AllocationType::BUFFER); auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.blitterOperationsSupported = true; hwInfo.featureTable.ftrBcsInfo = 0; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); auto bcsEngine = device->tryGetEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular); EXPECT_EQ(nullptr, bcsEngine); EXPECT_EQ(BlitOperationResult::Unsupported, BlitHelperFunctions::blitMemoryToAllocation(*device, &graphicsAllocation, 0, srcData, {dataSize, 1, 1})); } TEST(MemoryManagerTest, givenMemoryManagerWithLocalMemoryWhenCreatingMultiGraphicsAllocationInSystemMemoryThenForceSystemMemoryPlacement) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); executionEnvironment.initGmm(); MockMemoryManager memoryManager(true, true, executionEnvironment); AllocationProperties allocationProperties{mockRootDeviceIndex, MemoryConstants::pageSize, AllocationType::SVM_GPU, systemMemoryBitfield}; auto localMemoryAllocation = memoryManager.allocateGraphicsMemoryWithProperties(allocationProperties); EXPECT_NE(nullptr, localMemoryAllocation); EXPECT_TRUE(localMemoryAllocation->isAllocatedInLocalMemoryPool()); memoryManager.freeGraphicsMemory(localMemoryAllocation); std::vector rootDeviceIndices{}; rootDeviceIndices.push_back(mockRootDeviceIndex); MultiGraphicsAllocation multiGraphicsAllocation(mockRootDeviceIndex); auto ptr = memoryManager.createMultiGraphicsAllocationInSystemMemoryPool(rootDeviceIndices, allocationProperties, multiGraphicsAllocation); EXPECT_NE(nullptr, ptr); auto systemMemoryAllocation = multiGraphicsAllocation.getGraphicsAllocation(mockRootDeviceIndex); EXPECT_NE(nullptr, systemMemoryAllocation); EXPECT_FALSE(systemMemoryAllocation->isAllocatedInLocalMemoryPool()); memoryManager.freeGraphicsMemory(systemMemoryAllocation); } TEST(MemoryManagerTest, givenDuplicateRootDeviceIndicesWhenCreatingMultiGraphicsAllocationInSystemMemoryThenDontLeakMemory) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); executionEnvironment.initGmm(); MockMemoryManager memoryManager(true, true, executionEnvironment); AllocationProperties allocationProperties{mockRootDeviceIndex, MemoryConstants::pageSize, AllocationType::SVM_GPU, systemMemoryBitfield}; std::vector rootDeviceIndices{}; rootDeviceIndices.push_back(mockRootDeviceIndex); rootDeviceIndices.push_back(mockRootDeviceIndex); rootDeviceIndices.push_back(mockRootDeviceIndex); EXPECT_EQ(3u, rootDeviceIndices.size()); MultiGraphicsAllocation multiGraphicsAllocation(mockRootDeviceIndex); auto ptr = memoryManager.createMultiGraphicsAllocationInSystemMemoryPool(rootDeviceIndices, allocationProperties, multiGraphicsAllocation); EXPECT_NE(nullptr, ptr); auto allocation = multiGraphicsAllocation.getGraphicsAllocation(mockRootDeviceIndex); EXPECT_NE(nullptr, allocation); EXPECT_EQ(mockRootDeviceIndex, allocation->getRootDeviceIndex()); memoryManager.freeGraphicsMemory(allocation); } compute-runtime-22.14.22890/opencl/test/unit_test/memory_manager/migraton_controller_tests.cpp000066400000000000000000000257741422164147700326610ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/migration_sync_data.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/mocks/mock_migration_sync_data.h" #include "shared/test/common/mocks/mock_multi_graphics_allocation.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/memory_manager/migration_controller.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" using namespace NEO; struct MigrationControllerTests : public ::testing::Test { void SetUp() override { pCsr0 = context.getDevice(0)->getDefaultEngine().commandStreamReceiver; pCsr1 = context.getDevice(1)->getDefaultEngine().commandStreamReceiver; memoryManager = static_cast(context.getMemoryManager()); } void TearDown() override { } MockDefaultContext context{true}; CommandStreamReceiver *pCsr0 = nullptr; CommandStreamReceiver *pCsr1 = nullptr; MockMemoryManager *memoryManager = nullptr; }; TEST_F(MigrationControllerTests, givenAllocationWithUndefinedLocationWhenHandleMigrationThenNoMigrationIsPerformedAndProperLocationIsSet) { std::unique_ptr pImage(Image1dHelper<>::create(&context)); EXPECT_TRUE(pImage->getMultiGraphicsAllocation().requiresMigrations()); EXPECT_EQ(MigrationSyncData::locationUndefined, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); MigrationController::handleMigration(context, *pCsr0, pImage.get()); EXPECT_EQ(0u, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); EXPECT_EQ(0u, memoryManager->lockResourceCalled); EXPECT_EQ(0u, memoryManager->unlockResourceCalled); EXPECT_EQ(0u, pCsr0->peekLatestFlushedTaskCount()); } TEST_F(MigrationControllerTests, givenAllocationWithDefinedLocationWhenHandleMigrationToTheSameLocationThenDontMigrateMemory) { std::unique_ptr pImage(Image1dHelper<>::create(&context)); EXPECT_TRUE(pImage->getMultiGraphicsAllocation().requiresMigrations()); pImage->getMultiGraphicsAllocation().getMigrationSyncData()->setCurrentLocation(1); EXPECT_EQ(1u, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); MigrationController::handleMigration(context, *pCsr1, pImage.get()); EXPECT_EQ(1u, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); EXPECT_EQ(0u, memoryManager->lockResourceCalled); EXPECT_EQ(0u, memoryManager->unlockResourceCalled); EXPECT_EQ(0u, pCsr1->peekLatestFlushedTaskCount()); } TEST_F(MigrationControllerTests, givenNotLockableImageAllocationWithDefinedLocationWhenHandleMigrationToDifferentLocationThenMigrateMemoryViaReadWriteImage) { REQUIRE_IMAGE_SUPPORT_OR_SKIP(&context); std::unique_ptr pImage(Image1dHelper<>::create(&context)); EXPECT_TRUE(pImage->getMultiGraphicsAllocation().requiresMigrations()); auto srcAllocation = pImage->getMultiGraphicsAllocation().getGraphicsAllocation(0); auto dstAllocation = pImage->getMultiGraphicsAllocation().getGraphicsAllocation(1); srcAllocation->getDefaultGmm()->resourceParams.Flags.Info.NotLockable = 1; dstAllocation->getDefaultGmm()->resourceParams.Flags.Info.NotLockable = 1; EXPECT_FALSE(srcAllocation->isAllocationLockable()); EXPECT_FALSE(dstAllocation->isAllocationLockable()); pImage->getMultiGraphicsAllocation().getMigrationSyncData()->setCurrentLocation(0); EXPECT_EQ(0u, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); MigrationController::handleMigration(context, *pCsr1, pImage.get()); EXPECT_EQ(1u, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); EXPECT_EQ(0u, memoryManager->lockResourceCalled); EXPECT_EQ(0u, memoryManager->unlockResourceCalled); EXPECT_EQ(1u, pCsr1->peekLatestFlushedTaskCount()); EXPECT_EQ(1u, pCsr0->peekLatestFlushedTaskCount()); } TEST_F(MigrationControllerTests, givenNotLockableBufferAllocationWithDefinedLocationWhenHandleMigrationToDifferentLocationThenMigrateMemoryViaReadWriteBuffer) { DebugManagerStateRestore restorer; DebugManager.flags.DoCpuCopyOnReadBuffer.set(0); DebugManager.flags.DoCpuCopyOnWriteBuffer.set(0); DebugManager.flags.EnableBlitterForEnqueueOperations.set(0); std::unique_ptr pBuffer(BufferHelper<>::create(&context)); const_cast(pBuffer->getMultiGraphicsAllocation()).setMultiStorage(true); EXPECT_TRUE(pBuffer->getMultiGraphicsAllocation().requiresMigrations()); auto srcAllocation = pBuffer->getMultiGraphicsAllocation().getGraphicsAllocation(0); auto dstAllocation = pBuffer->getMultiGraphicsAllocation().getGraphicsAllocation(1); auto gmm0 = new Gmm(context.getDevice(0)->getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true); auto gmm1 = new Gmm(context.getDevice(1)->getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true); srcAllocation->setDefaultGmm(gmm0); dstAllocation->setDefaultGmm(gmm1); srcAllocation->getDefaultGmm()->resourceParams.Flags.Info.NotLockable = 1; dstAllocation->getDefaultGmm()->resourceParams.Flags.Info.NotLockable = 1; EXPECT_FALSE(srcAllocation->isAllocationLockable()); EXPECT_FALSE(dstAllocation->isAllocationLockable()); pBuffer->getMultiGraphicsAllocation().getMigrationSyncData()->setCurrentLocation(0); EXPECT_EQ(0u, pBuffer->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); MigrationController::handleMigration(context, *pCsr1, pBuffer.get()); EXPECT_EQ(1u, pBuffer->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); EXPECT_EQ(0u, memoryManager->lockResourceCalled); EXPECT_EQ(0u, memoryManager->unlockResourceCalled); EXPECT_EQ(1u, pCsr1->peekLatestFlushedTaskCount()); EXPECT_EQ(1u, pCsr0->peekLatestFlushedTaskCount()); } TEST_F(MigrationControllerTests, givenLockableBufferAllocationWithDefinedLocationWhenHandleMigrationToDifferentLocationThenMigrateMemoryViaLockMemory) { std::unique_ptr pBuffer(BufferHelper<>::create(&context)); const_cast(pBuffer->getMultiGraphicsAllocation()).setMultiStorage(true); EXPECT_TRUE(pBuffer->getMultiGraphicsAllocation().requiresMigrations()); auto srcAllocation = pBuffer->getMultiGraphicsAllocation().getGraphicsAllocation(0); auto dstAllocation = pBuffer->getMultiGraphicsAllocation().getGraphicsAllocation(1); EXPECT_TRUE(srcAllocation->isAllocationLockable()); EXPECT_TRUE(dstAllocation->isAllocationLockable()); pBuffer->getMultiGraphicsAllocation().getMigrationSyncData()->setCurrentLocation(0); EXPECT_EQ(0u, pBuffer->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); MigrationController::handleMigration(context, *pCsr1, pBuffer.get()); EXPECT_EQ(1u, pBuffer->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); EXPECT_EQ(2u, memoryManager->lockResourceCalled); EXPECT_EQ(2u, memoryManager->unlockResourceCalled); EXPECT_EQ(0u, pCsr1->peekLatestFlushedTaskCount()); EXPECT_EQ(0u, pCsr0->peekLatestFlushedTaskCount()); } TEST_F(MigrationControllerTests, givenMultiGraphicsAllocationUsedInOneCsrWhenHandlingMigrationToOtherCsrOnTheSameRootDeviceThenWaitOnCpuForTheFirstCsrCompletion) { VariableBackup createFuncBackup{&MultiGraphicsAllocation::createMigrationSyncDataFunc}; MultiGraphicsAllocation::createMigrationSyncDataFunc = [](size_t size) -> MigrationSyncData * { return new MockMigrationSyncData(size); }; std::unique_ptr pImage(Image1dHelper<>::create(&context)); ASSERT_TRUE(pImage->getMultiGraphicsAllocation().requiresMigrations()); auto migrationSyncData = static_cast(pImage->getMultiGraphicsAllocation().getMigrationSyncData()); EXPECT_EQ(0u, migrationSyncData->waitOnCpuCalled); migrationSyncData->setCurrentLocation(0); EXPECT_EQ(0u, migrationSyncData->getCurrentLocation()); MigrationController::handleMigration(context, *pCsr0, pImage.get()); EXPECT_EQ(0u, migrationSyncData->getCurrentLocation()); EXPECT_EQ(0u, memoryManager->lockResourceCalled); EXPECT_EQ(0u, memoryManager->unlockResourceCalled); EXPECT_EQ(0u, pCsr1->peekLatestFlushedTaskCount()); EXPECT_EQ(0u, pCsr0->peekLatestFlushedTaskCount()); EXPECT_EQ(1u, migrationSyncData->waitOnCpuCalled); } TEST_F(MigrationControllerTests, givenMultiGraphicsAllocationUsedInOneCsrWhenHandlingMigrationToTheSameCsrThenDontWaitOnCpu) { VariableBackup createFuncBackup{&MultiGraphicsAllocation::createMigrationSyncDataFunc}; MultiGraphicsAllocation::createMigrationSyncDataFunc = [](size_t size) -> MigrationSyncData * { return new MockMigrationSyncData(size); }; std::unique_ptr pImage(Image1dHelper<>::create(&context)); ASSERT_TRUE(pImage->getMultiGraphicsAllocation().requiresMigrations()); auto migrationSyncData = static_cast(pImage->getMultiGraphicsAllocation().getMigrationSyncData()); EXPECT_EQ(0u, migrationSyncData->waitOnCpuCalled); migrationSyncData->signalUsage(pCsr0->getTagAddress(), 0u); migrationSyncData->setCurrentLocation(0); EXPECT_EQ(0u, migrationSyncData->getCurrentLocation()); MigrationController::handleMigration(context, *pCsr0, pImage.get()); EXPECT_EQ(0u, migrationSyncData->getCurrentLocation()); EXPECT_EQ(0u, memoryManager->lockResourceCalled); EXPECT_EQ(0u, memoryManager->unlockResourceCalled); EXPECT_EQ(0u, pCsr1->peekLatestFlushedTaskCount()); EXPECT_EQ(0u, pCsr0->peekLatestFlushedTaskCount()); EXPECT_EQ(0u, migrationSyncData->waitOnCpuCalled); } TEST_F(MigrationControllerTests, whenHandleMigrationThenProperTagAddressAndTaskCountIsSet) { VariableBackup createFuncBackup{&MultiGraphicsAllocation::createMigrationSyncDataFunc}; MultiGraphicsAllocation::createMigrationSyncDataFunc = [](size_t size) -> MigrationSyncData * { return new MockMigrationSyncData(size); }; std::unique_ptr pImage(Image1dHelper<>::create(&context)); ASSERT_TRUE(pImage->getMultiGraphicsAllocation().requiresMigrations()); auto migrationSyncData = static_cast(pImage->getMultiGraphicsAllocation().getMigrationSyncData()); migrationSyncData->setCurrentLocation(0); MigrationController::handleMigration(context, *pCsr0, pImage.get()); EXPECT_EQ(pCsr0->getTagAddress(), migrationSyncData->tagAddress); EXPECT_EQ(pCsr0->peekTaskCount() + 1, migrationSyncData->latestTaskCountUsed); }compute-runtime-22.14.22890/opencl/test/unit_test/memory_manager/surface_tests.cpp000066400000000000000000000107561422164147700302200ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/preemption.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/test/common/helpers/engine_descriptor_helper.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/memory_manager/mem_obj_surface.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "gtest/gtest.h" #include using namespace NEO; typedef ::testing::Types SurfaceTypes; namespace createSurface { template Surface *Create(char *data, MockBuffer *buffer, GraphicsAllocation *gfxAllocation); template <> Surface *Create(char *data, MockBuffer *buffer, GraphicsAllocation *gfxAllocation) { return new NullSurface; } template <> Surface *Create(char *data, MockBuffer *buffer, GraphicsAllocation *gfxAllocation) { return new HostPtrSurface(data, 10, gfxAllocation); } template <> Surface *Create(char *data, MockBuffer *buffer, GraphicsAllocation *gfxAllocation) { return new MemObjSurface(buffer); } template <> Surface *Create(char *data, MockBuffer *buffer, GraphicsAllocation *gfxAllocation) { return new GeneralSurface(gfxAllocation); } } // namespace createSurface template class SurfaceTest : public ::testing::Test { public: char data[10]; MockBuffer buffer; MockGraphicsAllocation gfxAllocation{nullptr, 0}; }; TYPED_TEST_CASE(SurfaceTest, SurfaceTypes); HWTEST_TYPED_TEST(SurfaceTest, GivenSurfaceWhenInterfaceIsUsedThenSurfaceBehavesCorrectly) { int32_t execStamp; ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->initializeMemoryManager(); DeviceBitfield deviceBitfield(1); auto csr = std::make_unique>(execStamp, *executionEnvironment, 0, deviceBitfield); auto hwInfo = *defaultHwInfo; auto engine = HwHelper::get(hwInfo.platform.eRenderCoreFamily).getGpgpuEngineInstances(hwInfo)[0]; auto osContext = executionEnvironment->memoryManager->createAndRegisterOsContext(csr.get(), EngineDescriptorHelper::getDefaultDescriptor(engine, PreemptionHelper::getDefaultPreemptionMode(hwInfo))); csr->setupContext(*osContext); Surface *surface = createSurface::Create(this->data, &this->buffer, &this->gfxAllocation); ASSERT_NE(nullptr, surface); Surface *duplicatedSurface = surface->duplicate(); ASSERT_NE(nullptr, duplicatedSurface); surface->makeResident(*csr); if (std::is_same::value || std::is_same::value || std::is_same::value) { EXPECT_EQ(1u, csr->madeResidentGfxAllocations.size()); } delete duplicatedSurface; delete surface; } class CoherentMemObjSurface : public SurfaceTest { public: CoherentMemObjSurface() { this->buffer.getGraphicsAllocation(mockRootDeviceIndex)->setCoherent(true); } }; TEST_F(CoherentMemObjSurface, GivenCoherentMemObjWhenCreatingSurfaceFromMemObjThenSurfaceIsCoherent) { Surface *surface = createSurface::Create(this->data, &this->buffer, &this->gfxAllocation); EXPECT_TRUE(surface->IsCoherent); delete surface; } TEST(HostPtrSurfaceTest, givenHostPtrSurfaceWhenCreatedWithoutSpecifyingPtrCopyAllowanceThenPtrCopyIsNotAllowed) { char memory[2] = {}; HostPtrSurface surface(memory, sizeof(memory)); EXPECT_FALSE(surface.peekIsPtrCopyAllowed()); } TEST(HostPtrSurfaceTest, givenHostPtrSurfaceWhenCreatedWithPtrCopyAllowedThenQueryReturnsTrue) { char memory[2] = {}; HostPtrSurface surface(memory, sizeof(memory), true); EXPECT_TRUE(surface.peekIsPtrCopyAllowed()); } TEST(HostPtrSurfaceTest, givenHostPtrSurfaceWhenCreatedWithPtrCopyNotAllowedThenQueryReturnsFalse) { char memory[2] = {}; HostPtrSurface surface(memory, sizeof(memory), false); EXPECT_FALSE(surface.peekIsPtrCopyAllowed()); } compute-runtime-22.14.22890/opencl/test/unit_test/memory_manager/unified_memory_manager_tests.cpp000066400000000000000000002304771422164147700333010ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/local_memory_access_modes.h" #include "shared/source/memory_manager/allocations_list.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/mocks/mock_svm_manager.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/page_fault_manager/cpu_page_fault_manager_tests_fixture.h" #include "opencl/source/api/api.h" #include "opencl/source/mem_obj/mem_obj_helper.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" #include "gtest/gtest.h" using namespace NEO; template struct SVMMemoryAllocatorFixture { SVMMemoryAllocatorFixture() : executionEnvironment(defaultHwInfo.get()) {} virtual void SetUp() { bool svmSupported = executionEnvironment.rootDeviceEnvironments[0]->getHardwareInfo()->capabilityTable.ftrSvm; if (!svmSupported) { GTEST_SKIP(); } executionEnvironment.initGmm(); memoryManager = std::make_unique(false, enableLocalMemory, executionEnvironment); svmManager = std::make_unique(memoryManager.get(), false); if (enableLocalMemory) { memoryManager->pageFaultManager.reset(new MockPageFaultManager); } } virtual void TearDown() { } MockExecutionEnvironment executionEnvironment; std::unique_ptr memoryManager; std::unique_ptr svmManager; std::set rootDeviceIndices{mockRootDeviceIndex}; std::map deviceBitfields{{mockRootDeviceIndex, mockDeviceBitfield}}; }; using SVMMemoryAllocatorTest = Test>; using SVMLocalMemoryAllocatorTest = Test>; TEST_F(SVMMemoryAllocatorTest, whenCreateZeroSizedSVMAllocationThenReturnNullptr) { auto ptr = svmManager->createSVMAlloc(0, {}, rootDeviceIndices, deviceBitfields); EXPECT_EQ(0u, svmManager->SVMAllocs.getNumAllocs()); EXPECT_EQ(ptr, nullptr); } TEST_F(SVMMemoryAllocatorTest, whenRequestSVMAllocsThenReturnNonNullptr) { auto svmAllocs = svmManager->getSVMAllocs(); EXPECT_NE(svmAllocs, nullptr); } using MultiDeviceSVMMemoryAllocatorTest = MultiRootDeviceWithSubDevicesFixture; TEST_F(MultiDeviceSVMMemoryAllocatorTest, givenMultipleDevicesWhenCreatingSVMAllocThenCreateOneGraphicsAllocationPerRootDeviceIndex) { REQUIRE_SVM_OR_SKIP(device1); auto svmManager = std::make_unique(device1->getMemoryManager(), false); auto ptr = svmManager->createSVMAlloc(MemoryConstants::pageSize, {}, context->getRootDeviceIndices(), context->getDeviceBitfields()); EXPECT_NE(nullptr, ptr); auto svmData = svmManager->getSVMAlloc(ptr); EXPECT_EQ(1u, svmManager->SVMAllocs.getNumAllocs()); ASSERT_NE(nullptr, svmData); for (auto &rootDeviceIndex : context->getRootDeviceIndices()) { auto svmAllocation = svmManager->getSVMAlloc(ptr)->gpuAllocations.getGraphicsAllocation(rootDeviceIndex); EXPECT_NE(nullptr, svmAllocation); EXPECT_EQ(AllocationType::SVM_ZERO_COPY, svmAllocation->getAllocationType()); EXPECT_FALSE(svmAllocation->isCoherent()); } svmManager->freeSVMAlloc(ptr); EXPECT_EQ(nullptr, svmManager->getSVMAlloc(ptr)); EXPECT_EQ(0u, svmManager->SVMAllocs.getNumAllocs()); } TEST_F(SVMMemoryAllocatorTest, whenSVMAllocationIsFreedThenCannotBeGotAgain) { auto ptr = svmManager->createSVMAlloc(MemoryConstants::pageSize, {}, rootDeviceIndices, deviceBitfields); EXPECT_NE(nullptr, ptr); auto svmData = svmManager->getSVMAlloc(ptr); ASSERT_NE(nullptr, svmData); EXPECT_NE(nullptr, svmData->gpuAllocations.getGraphicsAllocation(mockRootDeviceIndex)); svmData = svmManager->getSVMAlloc(ptr); ASSERT_NE(nullptr, svmData); EXPECT_NE(nullptr, svmData->gpuAllocations.getGraphicsAllocation(mockRootDeviceIndex)); EXPECT_EQ(1u, svmManager->SVMAllocs.getNumAllocs()); auto svmAllocation = svmManager->getSVMAlloc(ptr)->gpuAllocations.getGraphicsAllocation(mockRootDeviceIndex); EXPECT_FALSE(svmAllocation->isCoherent()); svmManager->freeSVMAlloc(ptr); EXPECT_EQ(nullptr, svmManager->getSVMAlloc(ptr)); EXPECT_EQ(0u, svmManager->SVMAllocs.getNumAllocs()); } TEST_F(SVMMemoryAllocatorTest, givenSvmManagerWhenOperatedOnThenCorrectAllocationIsInsertedReturnedAndRemoved) { int data; size_t size = sizeof(data); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties({mockRootDeviceIndex, size, AllocationType::BUFFER_HOST_MEMORY, mockDeviceBitfield}); NEO::SvmAllocationData svmData(mockRootDeviceIndex); svmData.gpuAllocations.addAllocation(allocation); svmData.cpuAllocation = nullptr; svmData.size = size; svmData.memoryType = InternalMemoryType::SHARED_UNIFIED_MEMORY; svmData.device = nullptr; auto ptr = reinterpret_cast(allocation->getGpuAddress()); svmManager->insertSVMAlloc(svmData); auto svmDataTemp = svmManager->getSVMAlloc(ptr); ASSERT_NE(nullptr, svmDataTemp); EXPECT_NE(nullptr, svmDataTemp->gpuAllocations.getGraphicsAllocation(mockRootDeviceIndex)); EXPECT_EQ(1u, svmManager->SVMAllocs.getNumAllocs()); auto svmAllocation = svmManager->getSVMAlloc(ptr)->gpuAllocations.getGraphicsAllocation(mockRootDeviceIndex); EXPECT_FALSE(svmAllocation->isCoherent()); svmManager->removeSVMAlloc(svmData); EXPECT_EQ(nullptr, svmManager->getSVMAlloc(ptr)); EXPECT_EQ(0u, svmManager->SVMAllocs.getNumAllocs()); svmManager->memoryManager->freeGraphicsMemory(allocation); } TEST_F(SVMMemoryAllocatorTest, whenGetSVMAllocationFromReturnedPointerAreaThenReturnSameAllocation) { auto ptr = svmManager->createSVMAlloc(MemoryConstants::pageSize, {}, rootDeviceIndices, deviceBitfields); EXPECT_NE(ptr, nullptr); auto svmData = svmManager->getSVMAlloc(ptr); ASSERT_NE(nullptr, svmData); GraphicsAllocation *graphicsAllocation = svmData->gpuAllocations.getGraphicsAllocation(mockRootDeviceIndex); EXPECT_NE(nullptr, graphicsAllocation); auto ptrInRange = ptrOffset(ptr, MemoryConstants::pageSize - 4); svmData = svmManager->getSVMAlloc(ptrInRange); ASSERT_NE(nullptr, svmData); GraphicsAllocation *graphicsAllocationInRange = svmData->gpuAllocations.getGraphicsAllocation(mockRootDeviceIndex); EXPECT_NE(nullptr, graphicsAllocationInRange); EXPECT_EQ(graphicsAllocation, graphicsAllocationInRange); svmManager->freeSVMAlloc(ptr); } TEST_F(SVMMemoryAllocatorTest, whenGetSVMAllocationFromOutsideOfReturnedPointerAreaThenDontReturnThisAllocation) { auto ptr = svmManager->createSVMAlloc(MemoryConstants::pageSize, {}, rootDeviceIndices, deviceBitfields); EXPECT_NE(ptr, nullptr); auto svmData = svmManager->getSVMAlloc(ptr); ASSERT_NE(nullptr, svmData); GraphicsAllocation *graphicsAllocation = svmData->gpuAllocations.getGraphicsAllocation(mockRootDeviceIndex); EXPECT_NE(nullptr, graphicsAllocation); auto ptrBefore = ptrOffset(ptr, -4); svmData = svmManager->getSVMAlloc(ptrBefore); EXPECT_EQ(nullptr, svmData); auto ptrAfter = ptrOffset(ptr, MemoryConstants::pageSize); svmData = svmManager->getSVMAlloc(ptrAfter); EXPECT_EQ(nullptr, svmData); svmManager->freeSVMAlloc(ptr); } TEST_F(SVMMemoryAllocatorTest, whenCouldNotAllocateInMemoryManagerThenReturnsNullAndDoesNotChangeAllocsMap) { FailMemoryManager failMemoryManager(executionEnvironment); svmManager->memoryManager = &failMemoryManager; auto ptr = svmManager->createSVMAlloc(MemoryConstants::pageSize, {}, rootDeviceIndices, deviceBitfields); EXPECT_EQ(nullptr, ptr); EXPECT_EQ(0u, svmManager->SVMAllocs.getNumAllocs()); svmManager->freeSVMAlloc(ptr); } TEST_F(SVMMemoryAllocatorTest, whenCouldNotAllocateInMemoryManagerThenCreateUnifiedMemoryAllocationReturnsNullAndDoesNotChangeAllocsMap) { FailMemoryManager failMemoryManager(executionEnvironment); svmManager->memoryManager = &failMemoryManager; MockContext mockContext; auto device = mockContext.getDevice(0u); SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields); unifiedMemoryProperties.device = &device->getDevice(); auto ptr = svmManager->createUnifiedMemoryAllocation(4096u, unifiedMemoryProperties); EXPECT_EQ(nullptr, ptr); EXPECT_EQ(0u, svmManager->SVMAllocs.getNumAllocs()); svmManager->freeSVMAlloc(ptr); } TEST_F(SVMMemoryAllocatorTest, given64kbAllowedWhenAllocatingSvmMemoryThenDontPreferCompression) { MockMemoryManager memoryManager64Kb(true, false, executionEnvironment); svmManager->memoryManager = &memoryManager64Kb; auto ptr = svmManager->createSVMAlloc(MemoryConstants::pageSize, {}, rootDeviceIndices, deviceBitfields); EXPECT_FALSE(memoryManager64Kb.preferCompressedFlagPassed); svmManager->freeSVMAlloc(ptr); } TEST_F(SVMMemoryAllocatorTest, given64kbAllowedwhenAllocatingSvmMemoryThenAllocationIsIn64kbPagePool) { MockMemoryManager memoryManager64Kb(true, false, executionEnvironment); svmManager->memoryManager = &memoryManager64Kb; auto ptr = svmManager->createSVMAlloc(MemoryConstants::pageSize, {}, rootDeviceIndices, deviceBitfields); EXPECT_EQ(MemoryPool::System64KBPages, svmManager->getSVMAlloc(ptr)->gpuAllocations.getGraphicsAllocation(mockRootDeviceIndex)->getMemoryPool()); svmManager->freeSVMAlloc(ptr); } TEST_F(SVMMemoryAllocatorTest, given64kbDisallowedWhenAllocatingSvmMemoryThenAllocationIsIn4kbPagePool) { auto ptr = svmManager->createSVMAlloc(MemoryConstants::pageSize, {}, rootDeviceIndices, deviceBitfields); EXPECT_EQ(MemoryPool::System4KBPages, svmManager->getSVMAlloc(ptr)->gpuAllocations.getGraphicsAllocation(mockRootDeviceIndex)->getMemoryPool()); svmManager->freeSVMAlloc(ptr); } TEST_F(SVMMemoryAllocatorTest, whenCoherentFlagIsPassedThenAllocationIsCoherent) { SVMAllocsManager::SvmAllocationProperties svmProperties; svmProperties.coherent = true; auto ptr = svmManager->createSVMAlloc(MemoryConstants::pageSize, svmProperties, rootDeviceIndices, deviceBitfields); EXPECT_TRUE(svmManager->getSVMAlloc(ptr)->gpuAllocations.getGraphicsAllocation(mockRootDeviceIndex)->isCoherent()); svmManager->freeSVMAlloc(ptr); } TEST_F(SVMLocalMemoryAllocatorTest, whenDeviceAllocationIsCreatedThenItIsStoredWithWriteCombinedTypeInAllocationMap) { SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields); unifiedMemoryProperties.allocationFlags.allocFlags.allocWriteCombined = true; auto allocationSize = 4000u; auto ptr = svmManager->createUnifiedMemoryAllocation(4000u, unifiedMemoryProperties); EXPECT_NE(nullptr, ptr); auto allocation = svmManager->getSVMAlloc(ptr); EXPECT_EQ(nullptr, allocation->cpuAllocation); auto gpuAllocation = allocation->gpuAllocations.getGraphicsAllocation(mockRootDeviceIndex); EXPECT_NE(nullptr, gpuAllocation); EXPECT_EQ(InternalMemoryType::DEVICE_UNIFIED_MEMORY, allocation->memoryType); EXPECT_EQ(allocationSize, allocation->size); EXPECT_EQ(gpuAllocation->getMemoryPool(), MemoryPool::LocalMemory); EXPECT_EQ(alignUp(allocationSize, MemoryConstants::pageSize64k), gpuAllocation->getUnderlyingBufferSize()); EXPECT_EQ(AllocationType::WRITE_COMBINED, gpuAllocation->getAllocationType()); svmManager->freeSVMAlloc(ptr); } TEST_F(SVMMemoryAllocatorTest, givenNoWriteCombinedFlagwhenDeviceAllocationIsCreatedThenItIsStoredWithProperTypeInAllocationMap) { if (is32bit) { GTEST_SKIP(); } MockContext mockContext; auto device = mockContext.getDevice(0u); SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields); unifiedMemoryProperties.device = &device->getDevice(); unifiedMemoryProperties.allocationFlags.allocFlags.allocWriteCombined = false; auto allocationSize = 4096u; auto ptr = svmManager->createUnifiedMemoryAllocation(4096u, unifiedMemoryProperties); EXPECT_NE(nullptr, ptr); auto allocation = svmManager->getSVMAlloc(ptr); EXPECT_EQ(nullptr, allocation->cpuAllocation); auto gpuAllocation = allocation->gpuAllocations.getGraphicsAllocation(mockRootDeviceIndex); EXPECT_NE(nullptr, gpuAllocation); EXPECT_EQ(InternalMemoryType::DEVICE_UNIFIED_MEMORY, allocation->memoryType); EXPECT_EQ(allocationSize, allocation->size); EXPECT_EQ(alignUp(allocationSize, MemoryConstants::pageSize64k), gpuAllocation->getUnderlyingBufferSize()); EXPECT_EQ(AllocationType::BUFFER, gpuAllocation->getAllocationType()); svmManager->freeSVMAlloc(ptr); } TEST_F(SVMMemoryAllocatorTest, whenHostAllocationIsCreatedThenItIsStoredWithProperTypeInAllocationMap) { SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields); auto allocationSize = 4096u; auto ptr = svmManager->createUnifiedMemoryAllocation(4096u, unifiedMemoryProperties); EXPECT_NE(nullptr, ptr); auto allocation = svmManager->getSVMAlloc(ptr); EXPECT_EQ(nullptr, allocation->cpuAllocation); auto gpuAllocation = allocation->gpuAllocations.getGraphicsAllocation(mockRootDeviceIndex); EXPECT_NE(nullptr, gpuAllocation); EXPECT_EQ(InternalMemoryType::HOST_UNIFIED_MEMORY, allocation->memoryType); EXPECT_EQ(allocationSize, allocation->size); EXPECT_EQ(alignUp(allocationSize, MemoryConstants::pageSize64k), gpuAllocation->getUnderlyingBufferSize()); EXPECT_EQ(AllocationType::BUFFER_HOST_MEMORY, gpuAllocation->getAllocationType()); EXPECT_NE(gpuAllocation->getMemoryPool(), MemoryPool::LocalMemory); EXPECT_NE(nullptr, gpuAllocation->getUnderlyingBuffer()); svmManager->freeSVMAlloc(ptr); } TEST_F(SVMMemoryAllocatorTest, whenCouldNotAllocateInMemoryManagerThenCreateSharedUnifiedMemoryAllocationReturnsNullAndDoesNotChangeAllocsMap) { MockCommandQueue cmdQ; DebugManagerStateRestore restore; DebugManager.flags.AllocateSharedAllocationsWithCpuAndGpuStorage.set(true); FailMemoryManager failMemoryManager(executionEnvironment); svmManager->memoryManager = &failMemoryManager; SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields); auto ptr = svmManager->createSharedUnifiedMemoryAllocation(4096u, unifiedMemoryProperties, &cmdQ); EXPECT_EQ(nullptr, ptr); EXPECT_EQ(0u, svmManager->SVMAllocs.getNumAllocs()); svmManager->freeSVMAlloc(ptr); } TEST_F(SVMMemoryAllocatorTest, whenSharedAllocationIsCreatedThenItIsStoredWithProperTypeInAllocationMap) { MockCommandQueue cmdQ; SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields); auto allocationSize = 4096u; auto ptr = svmManager->createSharedUnifiedMemoryAllocation(4096u, unifiedMemoryProperties, &cmdQ); EXPECT_NE(nullptr, ptr); auto allocation = svmManager->getSVMAlloc(ptr); EXPECT_EQ(nullptr, allocation->cpuAllocation); auto gpuAllocation = allocation->gpuAllocations.getGraphicsAllocation(mockRootDeviceIndex); EXPECT_NE(nullptr, gpuAllocation); EXPECT_EQ(InternalMemoryType::SHARED_UNIFIED_MEMORY, allocation->memoryType); EXPECT_EQ(allocationSize, allocation->size); EXPECT_EQ(alignUp(allocationSize, MemoryConstants::pageSize64k), gpuAllocation->getUnderlyingBufferSize()); EXPECT_EQ(AllocationType::BUFFER_HOST_MEMORY, gpuAllocation->getAllocationType()); EXPECT_NE(gpuAllocation->getMemoryPool(), MemoryPool::LocalMemory); EXPECT_NE(nullptr, gpuAllocation->getUnderlyingBuffer()); svmManager->freeSVMAlloc(ptr); } TEST_F(SVMLocalMemoryAllocatorTest, whenSharedAllocationIsCreatedWithDebugFlagSetThenItIsStoredWithProperTypeInAllocationMapAndHasCpuAndGpuStorage) { MockCommandQueue cmdQ; MockContext mockContext; DebugManagerStateRestore restore; DebugManager.flags.AllocateSharedAllocationsWithCpuAndGpuStorage.set(true); auto device = mockContext.getDevice(0u); SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields); unifiedMemoryProperties.device = &device->getDevice(); auto allocationSize = 4096u; auto ptr = svmManager->createSharedUnifiedMemoryAllocation(4096u, unifiedMemoryProperties, &cmdQ); EXPECT_NE(nullptr, ptr); auto allocation = svmManager->getSVMAlloc(ptr); auto gpuAllocation = allocation->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); EXPECT_NE(nullptr, allocation->cpuAllocation); EXPECT_NE(nullptr, gpuAllocation); EXPECT_EQ(InternalMemoryType::SHARED_UNIFIED_MEMORY, allocation->memoryType); EXPECT_EQ(allocationSize, allocation->size); EXPECT_EQ(mockContext.getDevice(0u), allocation->device->getSpecializedDevice()); EXPECT_EQ(alignUp(allocationSize, 64 * KB), gpuAllocation->getUnderlyingBufferSize()); EXPECT_EQ(alignUp(allocationSize, MemoryConstants::pageSize2Mb), allocation->cpuAllocation->getUnderlyingBufferSize()); EXPECT_EQ(AllocationType::SVM_GPU, gpuAllocation->getAllocationType()); EXPECT_EQ(AllocationType::SVM_CPU, allocation->cpuAllocation->getAllocationType()); EXPECT_EQ(gpuAllocation->getMemoryPool(), MemoryPool::LocalMemory); EXPECT_NE(allocation->cpuAllocation->getMemoryPool(), MemoryPool::LocalMemory); EXPECT_NE(nullptr, gpuAllocation->getUnderlyingBuffer()); svmManager->freeSVMAlloc(ptr); } TEST_F(SVMLocalMemoryAllocatorTest, whenSharedAllocationIsCreatedWithLocalMemoryAndRegisteredPageFaultHandlerThenItIsStoredWithProperTypeInAllocationMapAndHasCpuAndGpuStorage) { MockCommandQueue cmdQ; DebugManagerStateRestore restore; DebugManager.flags.EnableLocalMemory.set(1); SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields); auto allocationSize = 4096u; auto ptr = svmManager->createSharedUnifiedMemoryAllocation(4096u, unifiedMemoryProperties, &cmdQ); EXPECT_NE(nullptr, ptr); auto allocation = svmManager->getSVMAlloc(ptr); auto gpuAllocation = allocation->gpuAllocations.getGraphicsAllocation(mockRootDeviceIndex); EXPECT_NE(nullptr, allocation->cpuAllocation); EXPECT_NE(nullptr, gpuAllocation); EXPECT_EQ(InternalMemoryType::SHARED_UNIFIED_MEMORY, allocation->memoryType); EXPECT_EQ(allocationSize, allocation->size); EXPECT_EQ(alignUp(allocationSize, 64 * KB), gpuAllocation->getUnderlyingBufferSize()); EXPECT_EQ(alignUp(allocationSize, MemoryConstants::pageSize2Mb), allocation->cpuAllocation->getUnderlyingBufferSize()); EXPECT_EQ(AllocationType::SVM_GPU, gpuAllocation->getAllocationType()); EXPECT_EQ(AllocationType::SVM_CPU, allocation->cpuAllocation->getAllocationType()); EXPECT_EQ(gpuAllocation->getMemoryPool(), MemoryPool::LocalMemory); EXPECT_NE(allocation->cpuAllocation->getMemoryPool(), MemoryPool::LocalMemory); EXPECT_NE(nullptr, gpuAllocation->getUnderlyingBuffer()); svmManager->freeSVMAlloc(ptr); } TEST_F(SVMMemoryAllocatorTest, givenSharedAllocationsDebugFlagWhenDeviceMemoryIsAllocatedThenOneStorageIsProduced) { DebugManagerStateRestore restore; DebugManager.flags.AllocateSharedAllocationsWithCpuAndGpuStorage.set(true); MockContext mockContext; auto device = mockContext.getDevice(0u); SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields); unifiedMemoryProperties.device = &device->getDevice(); auto allocationSize = 4096u; auto ptr = svmManager->createUnifiedMemoryAllocation(4096u, unifiedMemoryProperties); EXPECT_NE(nullptr, ptr); auto allocation = svmManager->getSVMAlloc(ptr); EXPECT_EQ(nullptr, allocation->cpuAllocation); auto gpuAllocation = allocation->gpuAllocations.getGraphicsAllocation(mockRootDeviceIndex); EXPECT_NE(nullptr, gpuAllocation); EXPECT_EQ(InternalMemoryType::DEVICE_UNIFIED_MEMORY, allocation->memoryType); EXPECT_EQ(allocationSize, allocation->size); EXPECT_EQ(alignUp(allocationSize, MemoryConstants::pageSize64k), gpuAllocation->getUnderlyingBufferSize()); EXPECT_EQ(AllocationType::BUFFER, gpuAllocation->getAllocationType()); svmManager->freeSVMAlloc(ptr); } TEST(SvmAllocationPropertiesTests, givenDifferentMemFlagsWhenGettingSvmAllocationPropertiesThenPropertiesAreCorrectlySet) { SVMAllocsManager::SvmAllocationProperties allocationProperties = MemObjHelper::getSvmAllocationProperties(0); EXPECT_FALSE(allocationProperties.coherent); EXPECT_FALSE(allocationProperties.hostPtrReadOnly); EXPECT_FALSE(allocationProperties.readOnly); allocationProperties = MemObjHelper::getSvmAllocationProperties(CL_MEM_SVM_FINE_GRAIN_BUFFER); EXPECT_TRUE(allocationProperties.coherent); EXPECT_FALSE(allocationProperties.hostPtrReadOnly); EXPECT_FALSE(allocationProperties.readOnly); allocationProperties = MemObjHelper::getSvmAllocationProperties(CL_MEM_HOST_READ_ONLY); EXPECT_FALSE(allocationProperties.coherent); EXPECT_TRUE(allocationProperties.hostPtrReadOnly); EXPECT_FALSE(allocationProperties.readOnly); allocationProperties = MemObjHelper::getSvmAllocationProperties(CL_MEM_HOST_NO_ACCESS); EXPECT_FALSE(allocationProperties.coherent); EXPECT_TRUE(allocationProperties.hostPtrReadOnly); EXPECT_FALSE(allocationProperties.readOnly); allocationProperties = MemObjHelper::getSvmAllocationProperties(CL_MEM_READ_ONLY); EXPECT_FALSE(allocationProperties.coherent); EXPECT_FALSE(allocationProperties.hostPtrReadOnly); EXPECT_TRUE(allocationProperties.readOnly); allocationProperties = MemObjHelper::getSvmAllocationProperties(CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_HOST_READ_ONLY); EXPECT_TRUE(allocationProperties.coherent); EXPECT_TRUE(allocationProperties.hostPtrReadOnly); EXPECT_FALSE(allocationProperties.readOnly); allocationProperties = MemObjHelper::getSvmAllocationProperties(CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_READ_ONLY); EXPECT_TRUE(allocationProperties.coherent); EXPECT_FALSE(allocationProperties.hostPtrReadOnly); EXPECT_TRUE(allocationProperties.readOnly); } TEST_F(SVMMemoryAllocatorTest, whenReadOnlySvmAllocationCreatedThenGraphicsAllocationHasWriteableFlagFalse) { SVMAllocsManager::SvmAllocationProperties svmProperties; svmProperties.readOnly = true; void *svm = svmManager->createSVMAlloc(4096, svmProperties, rootDeviceIndices, deviceBitfields); EXPECT_NE(nullptr, svm); auto svmData = svmManager->getSVMAlloc(svm); ASSERT_NE(nullptr, svmData); GraphicsAllocation *svmAllocation = svmData->gpuAllocations.getGraphicsAllocation(mockRootDeviceIndex); EXPECT_NE(nullptr, svmAllocation); EXPECT_FALSE(svmAllocation->isMemObjectsAllocationWithWritableFlags()); svmManager->freeSVMAlloc(svm); } TEST_F(SVMLocalMemoryAllocatorTest, whenAllocatingSvmThenExpectCpuAllocationWithPointerAndGpuAllocationWithSameGpuAddress) { auto ptr = svmManager->createSVMAlloc(MemoryConstants::pageSize, {}, rootDeviceIndices, deviceBitfields); EXPECT_NE(ptr, nullptr); auto svmData = svmManager->getSVMAlloc(ptr); ASSERT_NE(nullptr, svmData); GraphicsAllocation *cpuAllocation = svmData->cpuAllocation; EXPECT_NE(nullptr, cpuAllocation); EXPECT_EQ(ptr, cpuAllocation->getUnderlyingBuffer()); GraphicsAllocation *gpuAllocation = svmData->gpuAllocations.getGraphicsAllocation(mockRootDeviceIndex); EXPECT_NE(nullptr, gpuAllocation); EXPECT_EQ(reinterpret_cast(ptr), gpuAllocation->getGpuAddress()); svmManager->freeSVMAlloc(ptr); } TEST_F(SVMLocalMemoryAllocatorTest, whenGetSVMAllocationFromOutsideOfReturnedPointerAreaThenDontReturnThisAllocation) { auto ptr = svmManager->createSVMAlloc(MemoryConstants::pageSize, {}, rootDeviceIndices, deviceBitfields); EXPECT_NE(ptr, nullptr); auto svmData = svmManager->getSVMAlloc(ptr); ASSERT_NE(nullptr, svmData); GraphicsAllocation *graphicsAllocation = svmData->gpuAllocations.getGraphicsAllocation(mockRootDeviceIndex); EXPECT_NE(nullptr, graphicsAllocation); auto ptrBefore = ptrOffset(ptr, -4); svmData = svmManager->getSVMAlloc(ptrBefore); EXPECT_EQ(nullptr, svmData); auto ptrAfter = ptrOffset(ptr, MemoryConstants::pageSize); svmData = svmManager->getSVMAlloc(ptrAfter); EXPECT_EQ(nullptr, svmData); svmManager->freeSVMAlloc(ptr); } TEST_F(SVMLocalMemoryAllocatorTest, whenCouldNotAllocateCpuAllocationInMemoryManagerThenReturnsNullAndDoesNotChangeAllocsMap) { FailMemoryManager failMemoryManager(false, true, executionEnvironment); svmManager->memoryManager = &failMemoryManager; auto ptr = svmManager->createSVMAlloc(MemoryConstants::pageSize, {}, rootDeviceIndices, deviceBitfields); EXPECT_EQ(nullptr, ptr); EXPECT_EQ(0u, svmManager->SVMAllocs.getNumAllocs()); svmManager->freeSVMAlloc(ptr); } TEST_F(SVMLocalMemoryAllocatorTest, whenCouldNotAllocateGpuAllocationInMemoryManagerThenReturnsNullAndDoesNotChangeAllocsMap) { FailMemoryManager failMemoryManager(1, executionEnvironment, true); svmManager->memoryManager = &failMemoryManager; auto ptr = svmManager->createSVMAlloc(MemoryConstants::pageSize, {}, rootDeviceIndices, deviceBitfields); EXPECT_EQ(nullptr, ptr); EXPECT_EQ(0u, svmManager->SVMAllocs.getNumAllocs()); svmManager->freeSVMAlloc(ptr); } TEST_F(SVMLocalMemoryAllocatorTest, whenCouldNotReserveCpuAddressRangeInMemoryManagerThenReturnsNullAndDoesNotChangeAllocsMap) { memoryManager->failReserveAddress = true; auto ptr = svmManager->createSVMAlloc(MemoryConstants::pageSize, {}, rootDeviceIndices, deviceBitfields); EXPECT_EQ(nullptr, ptr); EXPECT_EQ(0u, svmManager->SVMAllocs.getNumAllocs()); } struct MemoryManagerPropertiesCheck : public MockMemoryManager { using MockMemoryManager::MockMemoryManager; GraphicsAllocation *allocateGraphicsMemoryWithProperties(const AllocationProperties &properties) override { return this->allocateGraphicsMemoryWithProperties(properties, nullptr); } GraphicsAllocation *allocateGraphicsMemoryWithProperties(const AllocationProperties &properties, const void *ptr) override { this->multiOsContextCapablePassed = properties.flags.multiOsContextCapable; this->multiStorageResourcePassed = properties.multiStorageResource; this->subDevicesBitfieldPassed = properties.subDevicesBitfield; this->shareablePassed = properties.flags.shareable; return MockMemoryManager::allocateGraphicsMemoryWithProperties(properties, ptr); } bool multiOsContextCapablePassed; bool multiStorageResourcePassed; bool shareablePassed; DeviceBitfield subDevicesBitfieldPassed; }; struct UnifiedMemoryManagerPropertiesTest : public ::testing::Test { void SetUp() override { bool svmSupported = executionEnvironment.rootDeviceEnvironments[0]->getHardwareInfo()->capabilityTable.ftrSvm; if (!svmSupported) { GTEST_SKIP(); } memoryManager = std::make_unique(false, true, executionEnvironment); svmManager = std::make_unique(memoryManager.get(), false); memoryManager->pageFaultManager.reset(new MockPageFaultManager); } MockExecutionEnvironment executionEnvironment; std::unique_ptr memoryManager; std::unique_ptr svmManager; }; TEST(UnifiedMemoryTest, givenDeviceBitfieldWithMultipleBitsSetWhenSharedUnifiedMemoryAllocationIsCreatedThenProperPropertiesArePassedToMemoryManager) { MockCommandQueue cmdQ; DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(4); MockExecutionEnvironment executionEnvironment; auto memoryManager = std::make_unique(false, true, executionEnvironment); auto svmManager = std::make_unique(memoryManager.get(), false); memoryManager->pageFaultManager.reset(new MockPageFaultManager); std::set rootDeviceIndices{mockRootDeviceIndex}; std::map deviceBitfields{{mockRootDeviceIndex, DeviceBitfield(0xf)}}; SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields); svmManager->multiOsContextSupport = true; auto ptr = svmManager->createSharedUnifiedMemoryAllocation(4096u, unifiedMemoryProperties, &cmdQ); EXPECT_FALSE(memoryManager->multiOsContextCapablePassed); EXPECT_TRUE(memoryManager->multiStorageResourcePassed); EXPECT_EQ(unifiedMemoryProperties.subdeviceBitfields.at(mockRootDeviceIndex), memoryManager->subDevicesBitfieldPassed); svmManager->freeSVMAlloc(ptr); } TEST_F(UnifiedMemoryManagerPropertiesTest, givenDeviceBitfieldWithSingleBitSetWhenSharedUnifiedMemoryAllocationIsCreatedThenProperPropertiesArePassedToMemoryManager) { MockCommandQueue cmdQ; std::set rootDeviceIndices{mockRootDeviceIndex}; std::map deviceBitfields{{mockRootDeviceIndex, DeviceBitfield(0x8)}}; SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields); auto ptr = svmManager->createSharedUnifiedMemoryAllocation(4096u, unifiedMemoryProperties, &cmdQ); EXPECT_FALSE(memoryManager->multiOsContextCapablePassed); EXPECT_FALSE(memoryManager->multiStorageResourcePassed); EXPECT_EQ(unifiedMemoryProperties.subdeviceBitfields.at(mockRootDeviceIndex), memoryManager->subDevicesBitfieldPassed); svmManager->freeSVMAlloc(ptr); } TEST(UnifiedMemoryTest, givenDeviceBitfieldWithMultipleBitsSetWhenMultiOsContextFlagTrueThenProperPropertiesArePassedToMemoryManager) { MockCommandQueue cmdQ; DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(4); MockExecutionEnvironment executionEnvironment; auto memoryManager = std::make_unique(false, true, executionEnvironment); auto svmManager = std::make_unique(memoryManager.get(), false); memoryManager->pageFaultManager = std::make_unique(); std::set rootDeviceIndices{mockRootDeviceIndex}; std::map deviceBitfields{{mockRootDeviceIndex, DeviceBitfield(0xf)}}; SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields); svmManager->multiOsContextSupport = true; auto ptr = svmManager->createSharedUnifiedMemoryAllocation(4096u, unifiedMemoryProperties, &cmdQ); EXPECT_FALSE(memoryManager->multiOsContextCapablePassed); EXPECT_TRUE(memoryManager->multiStorageResourcePassed); EXPECT_EQ(unifiedMemoryProperties.subdeviceBitfields.at(mockRootDeviceIndex), memoryManager->subDevicesBitfieldPassed); svmManager->freeSVMAlloc(ptr); } TEST(UnifiedMemoryTest, givenDeviceBitfieldWithMultipleBitsSetWhenMultiOsContextFlagFalseThenLowestSubDevicePassedToMemoryManager) { MockCommandQueue cmdQ; DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(4); DebugManager.flags.OverrideLeastOccupiedBank.set(1); MockExecutionEnvironment executionEnvironment; auto memoryManager = std::make_unique(false, true, executionEnvironment); auto svmManager = std::make_unique(memoryManager.get(), false); memoryManager->pageFaultManager = std::make_unique(); std::set rootDeviceIndices{mockRootDeviceIndex}; std::map deviceBitfields{{mockRootDeviceIndex, DeviceBitfield(0xE)}}; SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields); svmManager->multiOsContextSupport = false; auto ptr = svmManager->createSharedUnifiedMemoryAllocation(4096u, unifiedMemoryProperties, &cmdQ); auto expectedSubDevices = unifiedMemoryProperties.subdeviceBitfields.at(mockRootDeviceIndex); expectedSubDevices.reset(); expectedSubDevices.set(1); EXPECT_FALSE(memoryManager->multiOsContextCapablePassed); EXPECT_FALSE(memoryManager->multiStorageResourcePassed); EXPECT_EQ(expectedSubDevices, memoryManager->subDevicesBitfieldPassed); svmManager->freeSVMAlloc(ptr); } TEST(UnifiedMemoryTest, givenDeviceBitfieldWithMultipleBitsSetWhenMultiOsContextFlagTrueAndDeviceMemoryThenProperPropertiesArePassedToMemoryManager) { MockContext mockContext; auto device = mockContext.getDevice(0u); MockExecutionEnvironment executionEnvironment; auto memoryManager = std::make_unique(false, true, executionEnvironment); auto svmManager = std::make_unique(memoryManager.get(), false); std::set rootDeviceIndices{mockRootDeviceIndex}; std::map deviceBitfields{{mockRootDeviceIndex, DeviceBitfield(0xf)}}; SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields); unifiedMemoryProperties.device = &device->getDevice(); svmManager->multiOsContextSupport = true; auto ptr = svmManager->createUnifiedMemoryAllocation(4096u, unifiedMemoryProperties); EXPECT_FALSE(memoryManager->multiOsContextCapablePassed); EXPECT_TRUE(memoryManager->multiStorageResourcePassed); EXPECT_EQ(unifiedMemoryProperties.subdeviceBitfields.at(mockRootDeviceIndex), memoryManager->subDevicesBitfieldPassed); svmManager->freeSVMAlloc(ptr); } TEST(UnifiedMemoryTest, givenDeviceBitfieldWithTwoBitsSetWhenMultiOsContextFlagTrueAndDeviceMemoryThenProperPropertiesArePassedToMemoryManager) { MockContext mockContext; std::set rootDeviceIndices{mockRootDeviceIndex}; MockExecutionEnvironment executionEnvironment; auto memoryManager = std::make_unique(false, true, executionEnvironment); auto svmManager = std::make_unique(memoryManager.get(), false); std::map deviceBitfields{{mockRootDeviceIndex, DeviceBitfield(0x6)}}; SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields); auto device = mockContext.getDevice(0u); unifiedMemoryProperties.device = &device->getDevice(); auto ptr = svmManager->createUnifiedMemoryAllocation(4096u, unifiedMemoryProperties); EXPECT_FALSE(memoryManager->multiOsContextCapablePassed); EXPECT_FALSE(memoryManager->multiStorageResourcePassed); auto expectedSubDevices = unifiedMemoryProperties.subdeviceBitfields.at(mockRootDeviceIndex); expectedSubDevices.reset(); expectedSubDevices.set(1); EXPECT_EQ(expectedSubDevices, memoryManager->subDevicesBitfieldPassed); svmManager->freeSVMAlloc(ptr); } TEST(UnifiedMemoryTest, givenDeviceBitfieldWithSingleBitsSetWhenMultiOsContextFlagTrueThenProperPropertiesArePassedToMemoryManager) { MockCommandQueue cmdQ; DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(1); MockExecutionEnvironment executionEnvironment; auto memoryManager = std::make_unique(false, true, executionEnvironment); auto svmManager = std::make_unique(memoryManager.get(), false); memoryManager->pageFaultManager = std::make_unique(); std::set rootDeviceIndices{mockRootDeviceIndex}; std::map deviceBitfields{{mockRootDeviceIndex, DeviceBitfield(0x1)}}; SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields); svmManager->multiOsContextSupport = true; auto ptr = svmManager->createSharedUnifiedMemoryAllocation(4096u, unifiedMemoryProperties, &cmdQ); EXPECT_FALSE(memoryManager->multiOsContextCapablePassed); EXPECT_FALSE(memoryManager->multiStorageResourcePassed); EXPECT_EQ(unifiedMemoryProperties.subdeviceBitfields.at(mockRootDeviceIndex), memoryManager->subDevicesBitfieldPassed); svmManager->freeSVMAlloc(ptr); } TEST(UnifiedMemoryTest, givenInternalAllocationWhenItIsMadeResidentThenNewTrackingEntryIsCreated) { MockCommandQueue cmdQ; MockDevice device; MockExecutionEnvironment executionEnvironment; auto memoryManager = std::make_unique(false, true, executionEnvironment); auto unifiedMemoryManager = std::make_unique(memoryManager.get(), false); memoryManager->pageFaultManager = std::make_unique(); std::set rootDeviceIndices{mockRootDeviceIndex}; std::map deviceBitfields{{mockRootDeviceIndex, DeviceBitfield(0x1)}}; SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields); auto ptr = unifiedMemoryManager->createSharedUnifiedMemoryAllocation(4096u, unifiedMemoryProperties, &cmdQ); ASSERT_NE(nullptr, ptr); auto graphicsAllocation = unifiedMemoryManager->getSVMAlloc(ptr); auto &commandStreamReceiver = device.getGpgpuCommandStreamReceiver(); EXPECT_FALSE(graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->isResident(commandStreamReceiver.getOsContext().getContextId())); EXPECT_EQ(0u, unifiedMemoryManager->indirectAllocationsResidency.size()); unifiedMemoryManager->makeIndirectAllocationsResident(commandStreamReceiver, 1u); EXPECT_TRUE(graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->isResident(commandStreamReceiver.getOsContext().getContextId())); EXPECT_EQ(GraphicsAllocation::objectAlwaysResident, graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->getResidencyTaskCount(commandStreamReceiver.getOsContext().getContextId())); EXPECT_FALSE(graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->peekEvictable()); EXPECT_EQ(1u, unifiedMemoryManager->indirectAllocationsResidency.size()); auto internalEntry = unifiedMemoryManager->indirectAllocationsResidency.find(&commandStreamReceiver)->second; EXPECT_EQ(1u, internalEntry.latestSentTaskCount); EXPECT_EQ(1u, internalEntry.latestResidentObjectId); unifiedMemoryManager->freeSVMAlloc(ptr); } TEST(UnifiedMemoryTest, givenInternalAllocationWhenItIsMadeResidentThenSubsequentCallsDoNotCallResidency) { MockCommandQueue cmdQ; MockDevice device; MockExecutionEnvironment executionEnvironment; auto memoryManager = std::make_unique(false, true, executionEnvironment); auto unifiedMemoryManager = std::make_unique(memoryManager.get(), false); memoryManager->pageFaultManager = std::make_unique(); std::set rootDeviceIndices{mockRootDeviceIndex}; std::map deviceBitfields{{mockRootDeviceIndex, DeviceBitfield(0x1)}}; SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields); auto ptr = unifiedMemoryManager->createSharedUnifiedMemoryAllocation(4096u, unifiedMemoryProperties, &cmdQ); ASSERT_NE(nullptr, ptr); auto graphicsAllocation = unifiedMemoryManager->getSVMAlloc(ptr); auto &commandStreamReceiver = device.getGpgpuCommandStreamReceiver(); unifiedMemoryManager->makeIndirectAllocationsResident(commandStreamReceiver, 1u); EXPECT_TRUE(graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->isResident(commandStreamReceiver.getOsContext().getContextId())); //now call with task count 2 , allocations shouldn't change unifiedMemoryManager->makeIndirectAllocationsResident(commandStreamReceiver, 2u); auto internalEntry = unifiedMemoryManager->indirectAllocationsResidency.find(&commandStreamReceiver)->second; EXPECT_EQ(2u, internalEntry.latestSentTaskCount); EXPECT_TRUE(graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->isResident(commandStreamReceiver.getOsContext().getContextId())); //force Graphics Allocation to be non resident graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->updateResidencyTaskCount(GraphicsAllocation::objectNotResident, commandStreamReceiver.getOsContext().getContextId()); EXPECT_FALSE(graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->isResident(commandStreamReceiver.getOsContext().getContextId())); //now call with task count 3 , allocations shouldn't change unifiedMemoryManager->makeIndirectAllocationsResident(commandStreamReceiver, 2u); EXPECT_FALSE(graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->isResident(commandStreamReceiver.getOsContext().getContextId())); unifiedMemoryManager->freeSVMAlloc(ptr); } TEST(UnifiedMemoryTest, givenInternalAllocationWhenNewAllocationIsCreatedThenItIsMadeResident) { MockCommandQueue cmdQ; MockDevice device; MockExecutionEnvironment executionEnvironment; auto memoryManager = std::make_unique(false, true, executionEnvironment); auto unifiedMemoryManager = std::make_unique(memoryManager.get(), false); memoryManager->pageFaultManager = std::make_unique(); std::set rootDeviceIndices{mockRootDeviceIndex}; std::map deviceBitfields{{mockRootDeviceIndex, DeviceBitfield(0x1)}}; SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields); auto ptr = unifiedMemoryManager->createSharedUnifiedMemoryAllocation(4096u, unifiedMemoryProperties, &cmdQ); ASSERT_NE(nullptr, ptr); auto graphicsAllocation = unifiedMemoryManager->getSVMAlloc(ptr); auto &commandStreamReceiver = device.getGpgpuCommandStreamReceiver(); unifiedMemoryManager->makeIndirectAllocationsResident(commandStreamReceiver, 1u); EXPECT_TRUE(graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->isResident(commandStreamReceiver.getOsContext().getContextId())); //force to non resident graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->updateResidencyTaskCount(GraphicsAllocation::objectNotResident, commandStreamReceiver.getOsContext().getContextId()); auto ptr2 = unifiedMemoryManager->createSharedUnifiedMemoryAllocation(4096u, unifiedMemoryProperties, &cmdQ); auto graphicsAllocation2 = unifiedMemoryManager->getSVMAlloc(ptr); EXPECT_FALSE(graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->isResident(commandStreamReceiver.getOsContext().getContextId())); EXPECT_FALSE(graphicsAllocation2->gpuAllocations.getDefaultGraphicsAllocation()->isResident(commandStreamReceiver.getOsContext().getContextId())); //now call with task count 2, both allocations needs to be made resident unifiedMemoryManager->makeIndirectAllocationsResident(commandStreamReceiver, 2u); EXPECT_TRUE(graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->isResident(commandStreamReceiver.getOsContext().getContextId())); EXPECT_TRUE(graphicsAllocation2->gpuAllocations.getDefaultGraphicsAllocation()->isResident(commandStreamReceiver.getOsContext().getContextId())); unifiedMemoryManager->freeSVMAlloc(ptr); unifiedMemoryManager->freeSVMAlloc(ptr2); } TEST(UnifiedMemoryTest, givenInternalAllocationsWhenTheyArePreparedForFreeingThenProperTaskCountIsAssigned) { MockCommandQueue cmdQ; MockDevice device; MockExecutionEnvironment executionEnvironment; auto memoryManager = std::make_unique(false, true, executionEnvironment); auto unifiedMemoryManager = std::make_unique(memoryManager.get(), false); memoryManager->pageFaultManager = std::make_unique(); std::set rootDeviceIndices{mockRootDeviceIndex}; std::map deviceBitfields{{mockRootDeviceIndex, DeviceBitfield(0x1)}}; SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields); auto ptr = unifiedMemoryManager->createSharedUnifiedMemoryAllocation(4096u, unifiedMemoryProperties, &cmdQ); ASSERT_NE(nullptr, ptr); auto graphicsAllocation = unifiedMemoryManager->getSVMAlloc(ptr); auto &commandStreamReceiver = device.getGpgpuCommandStreamReceiver(); unifiedMemoryManager->makeIndirectAllocationsResident(commandStreamReceiver, 1u); unifiedMemoryManager->makeIndirectAllocationsResident(commandStreamReceiver, 124u); EXPECT_EQ(1u, graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->getTaskCount(commandStreamReceiver.getOsContext().getContextId())); EXPECT_EQ(GraphicsAllocation::objectAlwaysResident, graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->getResidencyTaskCount(commandStreamReceiver.getOsContext().getContextId())); auto allocationData = unifiedMemoryManager->getSVMAlloc(ptr); unifiedMemoryManager->prepareIndirectAllocationForDestruction(allocationData); EXPECT_EQ(124u, graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->getTaskCount(commandStreamReceiver.getOsContext().getContextId())); EXPECT_EQ(124u, graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->getResidencyTaskCount(commandStreamReceiver.getOsContext().getContextId())); unifiedMemoryManager->freeSVMAlloc(ptr); } TEST_F(UnifiedMemoryManagerPropertiesTest, givenDeviceBitfieldWithSingleBitSetWhenDeviceUnifiedMemoryAllocationIsCreatedThenProperPropertiesArePassedToMemoryManager) { MockCommandQueue cmdQ; std::set rootDeviceIndices{mockRootDeviceIndex}; std::map deviceBitfields{{mockRootDeviceIndex, DeviceBitfield(0x8)}}; SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields); auto ptr = svmManager->createSharedUnifiedMemoryAllocation(4096u, unifiedMemoryProperties, &cmdQ); EXPECT_FALSE(memoryManager->multiOsContextCapablePassed); EXPECT_FALSE(memoryManager->multiStorageResourcePassed); EXPECT_EQ(unifiedMemoryProperties.subdeviceBitfields.at(mockRootDeviceIndex), memoryManager->subDevicesBitfieldPassed); svmManager->freeSVMAlloc(ptr); } TEST_F(UnifiedMemoryManagerPropertiesTest, givenDeviceBitfieldWithMultiDeviceBitSetWhenMultiOsContextFlagTrueThenProperPropertiesArePassedToMemoryManager) { std::set rootDeviceIndices{mockRootDeviceIndex}; std::map deviceBitfields{{mockRootDeviceIndex, DeviceBitfield(0xF)}}; SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields); svmManager->multiOsContextSupport = true; auto ptr = svmManager->createUnifiedAllocationWithDeviceStorage(10 * MemoryConstants::pageSize64k, {}, unifiedMemoryProperties); EXPECT_FALSE(memoryManager->multiOsContextCapablePassed); EXPECT_TRUE(memoryManager->multiStorageResourcePassed); EXPECT_EQ(unifiedMemoryProperties.subdeviceBitfields.at(mockRootDeviceIndex), memoryManager->subDevicesBitfieldPassed); svmManager->freeSVMAlloc(ptr); } TEST_F(UnifiedMemoryManagerPropertiesTest, givenDeviceBitfieldWithMultiDeviceBitSetWhenMultiOsContextFlagFalseThenLowestSubdeviceIsPassedToMemoryManager) { DebugManagerStateRestore restorer; DebugManager.flags.OverrideLeastOccupiedBank.set(1); std::set rootDeviceIndices{mockRootDeviceIndex}; std::map deviceBitfields{{mockRootDeviceIndex, DeviceBitfield(0xE)}}; SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields); svmManager->multiOsContextSupport = false; auto ptr = svmManager->createUnifiedAllocationWithDeviceStorage(10 * MemoryConstants::pageSize64k, {}, unifiedMemoryProperties); auto expectedSubDevices = unifiedMemoryProperties.subdeviceBitfields.at(mockRootDeviceIndex); expectedSubDevices.reset(); expectedSubDevices.set(1); EXPECT_FALSE(memoryManager->multiOsContextCapablePassed); EXPECT_FALSE(memoryManager->multiStorageResourcePassed); EXPECT_EQ(expectedSubDevices, memoryManager->subDevicesBitfieldPassed); svmManager->freeSVMAlloc(ptr); } TEST_F(UnifiedMemoryManagerPropertiesTest, givenDeviceBitfieldWithSingleDeviceBitSetWhenMultiOsContextFlagTrueThenProperPropertiesArePassedToMemoryManager) { std::set rootDeviceIndices{mockRootDeviceIndex}; std::map deviceBitfields{{mockRootDeviceIndex, DeviceBitfield(0x1)}}; SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields); svmManager->multiOsContextSupport = true; auto ptr = svmManager->createUnifiedAllocationWithDeviceStorage(10 * MemoryConstants::pageSize64k, {}, unifiedMemoryProperties); EXPECT_FALSE(memoryManager->multiOsContextCapablePassed); EXPECT_FALSE(memoryManager->multiStorageResourcePassed); EXPECT_EQ(unifiedMemoryProperties.subdeviceBitfields.at(mockRootDeviceIndex), memoryManager->subDevicesBitfieldPassed); svmManager->freeSVMAlloc(ptr); } TEST_F(UnifiedMemoryManagerPropertiesTest, givenSvmManagerMultiOsContextSupportFlagTrueWhenRootDeviceIsSingleThenMultiStorageFlagFalse) { std::set rootDeviceIndices{mockRootDeviceIndex}; std::map deviceBitfields{{mockRootDeviceIndex, DeviceBitfield(0x1)}}; SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields); svmManager->multiOsContextSupport = true; auto ptr = svmManager->createHostUnifiedMemoryAllocation(MemoryConstants::pageSize64k, unifiedMemoryProperties); EXPECT_FALSE(memoryManager->multiOsContextCapablePassed); EXPECT_FALSE(memoryManager->multiStorageResourcePassed); EXPECT_EQ(unifiedMemoryProperties.subdeviceBitfields.at(mockRootDeviceIndex), memoryManager->subDevicesBitfieldPassed); svmManager->freeSVMAlloc(ptr); } TEST_F(UnifiedMemoryManagerPropertiesTest, given1ByteAsAllocationSizeWhenHostMemAllocIsCreatedItIsAlignedTo4k) { std::set rootDeviceIndices{mockRootDeviceIndex}; std::map deviceBitfields{{mockRootDeviceIndex, DeviceBitfield(0x1)}}; SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields); svmManager->multiOsContextSupport = true; auto ptr = svmManager->createHostUnifiedMemoryAllocation(1u, unifiedMemoryProperties); auto allocation = svmManager->getSVMAlloc(ptr); EXPECT_EQ(MemoryConstants::pageSize, allocation->gpuAllocations.getDefaultGraphicsAllocation()->getUnderlyingBufferSize()); svmManager->freeSVMAlloc(ptr); } TEST_F(UnifiedMemoryManagerPropertiesTest, givenSvmManagerMultiOsContextSupportFlagFalseWhenRootDeviceIsMultiThenMultiStorageFlagFalse) { std::set rootDeviceIndices{mockRootDeviceIndex}; std::map deviceBitfields{{mockRootDeviceIndex, DeviceBitfield(0xF)}}; SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields); svmManager->multiOsContextSupport = false; auto ptr = svmManager->createHostUnifiedMemoryAllocation(MemoryConstants::pageSize64k, unifiedMemoryProperties); EXPECT_FALSE(memoryManager->multiOsContextCapablePassed); EXPECT_FALSE(memoryManager->multiStorageResourcePassed); EXPECT_EQ(unifiedMemoryProperties.subdeviceBitfields.at(mockRootDeviceIndex), memoryManager->subDevicesBitfieldPassed); svmManager->freeSVMAlloc(ptr); } TEST_F(UnifiedMemoryManagerPropertiesTest, givenSvmManagerMultiOsContextSupportFlagTrueWhenRootDeviceIsMultiThenMultiStorageFlagTrue) { std::set rootDeviceIndices{mockRootDeviceIndex}; std::map deviceBitfields{{mockRootDeviceIndex, DeviceBitfield(0xF)}}; SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields); svmManager->multiOsContextSupport = true; auto ptr = svmManager->createHostUnifiedMemoryAllocation(MemoryConstants::pageSize64k, unifiedMemoryProperties); EXPECT_FALSE(memoryManager->multiOsContextCapablePassed); EXPECT_TRUE(memoryManager->multiStorageResourcePassed); EXPECT_EQ(unifiedMemoryProperties.subdeviceBitfields.at(mockRootDeviceIndex), memoryManager->subDevicesBitfieldPassed); svmManager->freeSVMAlloc(ptr); } struct ShareableUnifiedMemoryManagerPropertiesTest : public ::testing::Test { void SetUp() override { executionEnvironment = platform()->peekExecutionEnvironment(); bool svmSupported = executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo()->capabilityTable.ftrSvm; if (!svmSupported) { GTEST_SKIP(); } memoryManager = std::make_unique(false, true, *executionEnvironment); svmManager = std::make_unique(memoryManager.get(), false); memoryManager->pageFaultManager.reset(new MockPageFaultManager); } ExecutionEnvironment *executionEnvironment; std::unique_ptr memoryManager; std::unique_ptr svmManager; std::set rootDeviceIndices{mockRootDeviceIndex}; std::map deviceBitfields{{mockRootDeviceIndex, mockDeviceBitfield}}; }; TEST_F(ShareableUnifiedMemoryManagerPropertiesTest, givenShareableUnifiedPropertyFlagThenShareableAllocationPropertyFlagIsSet) { MockContext mockContext; auto device = mockContext.getDevice(0u); SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields); unifiedMemoryProperties.device = &device->getDevice(); unifiedMemoryProperties.allocationFlags.flags.shareable = 1; auto ptr = svmManager->createUnifiedMemoryAllocation(4096u, unifiedMemoryProperties); EXPECT_TRUE(memoryManager->shareablePassed); svmManager->freeSVMAlloc(ptr); } TEST(UnifiedSharedMemoryTransferCalls, givenHostUsmAllocationWhenPointerIsUsedForTransferCallsThenUSMAllocationIsReused) { MockContext mockContext; auto device = mockContext.getDevice(0u); REQUIRE_SVM_OR_SKIP(device); cl_context clContext = &mockContext; auto status = CL_INVALID_PLATFORM; auto hostMemory = clHostMemAllocINTEL(clContext, nullptr, 4096u, 0u, &status); ASSERT_EQ(CL_SUCCESS, status); auto svmAllocation = mockContext.getSVMAllocsManager()->getSVMAlloc(hostMemory); auto gpuAllocation = svmAllocation->gpuAllocations.getGraphicsAllocation(mockContext.getDevice(0)->getRootDeviceIndex()); auto buffer = clCreateBuffer(clContext, CL_MEM_READ_WRITE, 4096u, nullptr, &status); ASSERT_EQ(CL_SUCCESS, status); cl_device_id clDevice = mockContext.getDevice(0u); auto commandQueue = clCreateCommandQueue(clContext, clDevice, 0u, &status); ASSERT_EQ(CL_SUCCESS, status); status = clEnqueueWriteBuffer(commandQueue, buffer, false, 0u, 4096u, hostMemory, 0u, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, status); auto neoQueue = castToObject(commandQueue); auto &temporaryAllocations = neoQueue->getGpgpuCommandStreamReceiver().getTemporaryAllocations(); EXPECT_TRUE(temporaryAllocations.peekIsEmpty()); auto osContextId = neoQueue->getGpgpuCommandStreamReceiver().getOsContext().getContextId(); EXPECT_EQ(1u, gpuAllocation->getTaskCount(osContextId)); status = clEnqueueReadBuffer(commandQueue, buffer, false, 0u, 4096u, hostMemory, 0u, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, status); EXPECT_TRUE(temporaryAllocations.peekIsEmpty()); EXPECT_EQ(2u, gpuAllocation->getTaskCount(osContextId)); status = clReleaseMemObject(buffer); ASSERT_EQ(CL_SUCCESS, status); status = clMemFreeINTEL(clContext, hostMemory); ASSERT_EQ(CL_SUCCESS, status); clReleaseCommandQueue(commandQueue); } TEST(UnifiedSharedMemoryTransferCalls, givenDeviceUsmAllocationWhenPtrIsUsedForTransferCallsThenUsmAllocationIsReused) { MockContext mockContext; cl_context clContext = &mockContext; auto status = CL_INVALID_PLATFORM; cl_device_id clDevice = mockContext.getDevice(0u); auto deviceMemory = clDeviceMemAllocINTEL(clContext, clDevice, nullptr, 4096u, 0u, &status); ASSERT_EQ(CL_SUCCESS, status); auto svmAllocation = mockContext.getSVMAllocsManager()->getSVMAlloc(deviceMemory); auto gpuAllocation = svmAllocation->gpuAllocations.getGraphicsAllocation(mockContext.getDevice(0)->getRootDeviceIndex()); auto buffer = clCreateBuffer(clContext, CL_MEM_READ_WRITE, 4096u, nullptr, &status); ASSERT_EQ(CL_SUCCESS, status); auto commandQueue = clCreateCommandQueue(clContext, clDevice, 0u, &status); ASSERT_EQ(CL_SUCCESS, status); status = clEnqueueWriteBuffer(commandQueue, buffer, false, 0u, 4096u, deviceMemory, 0u, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, status); auto neoQueue = castToObject(commandQueue); auto &temporaryAllocations = neoQueue->getGpgpuCommandStreamReceiver().getTemporaryAllocations(); EXPECT_TRUE(temporaryAllocations.peekIsEmpty()); auto osContextId = neoQueue->getGpgpuCommandStreamReceiver().getOsContext().getContextId(); EXPECT_EQ(1u, gpuAllocation->getTaskCount(osContextId)); status = clEnqueueReadBuffer(commandQueue, buffer, false, 0u, 4096u, deviceMemory, 0u, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, status); EXPECT_EQ(2u, gpuAllocation->getTaskCount(osContextId)); status = clReleaseMemObject(buffer); ASSERT_EQ(CL_SUCCESS, status); status = clMemFreeINTEL(clContext, deviceMemory); ASSERT_EQ(CL_SUCCESS, status); clReleaseCommandQueue(commandQueue); } TEST(UnifiedSharedMemoryTransferCalls, givenDeviceUsmAllocationWhenPtrIsUsedForTransferCallsThenCPUPathIsNotChoosen) { MockContext mockContext; cl_context clContext = &mockContext; auto status = CL_INVALID_PLATFORM; cl_device_id clDevice = mockContext.getDevice(0u); auto deviceMemory = clDeviceMemAllocINTEL(clContext, clDevice, nullptr, 4096u, 0u, &status); ASSERT_EQ(CL_SUCCESS, status); auto svmAllocation = mockContext.getSVMAllocsManager()->getSVMAlloc(deviceMemory); auto gpuAllocation = svmAllocation->gpuAllocations.getGraphicsAllocation(mockContext.getDevice(0)->getRootDeviceIndex()); auto buffer = clCreateBuffer(clContext, CL_MEM_READ_WRITE, 4096u, nullptr, &status); ASSERT_EQ(CL_SUCCESS, status); auto commandQueue = clCreateCommandQueue(clContext, clDevice, 0u, &status); ASSERT_EQ(CL_SUCCESS, status); status = clEnqueueWriteBuffer(commandQueue, buffer, true, 0u, 4096u, deviceMemory, 0u, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, status); auto neoQueue = castToObject(commandQueue); auto &temporaryAllocations = neoQueue->getGpgpuCommandStreamReceiver().getTemporaryAllocations(); EXPECT_TRUE(temporaryAllocations.peekIsEmpty()); auto osContextId = neoQueue->getGpgpuCommandStreamReceiver().getOsContext().getContextId(); EXPECT_EQ(1u, gpuAllocation->getTaskCount(osContextId)); status = clEnqueueReadBuffer(commandQueue, buffer, true, 0u, 4096u, deviceMemory, 0u, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, status); EXPECT_EQ(2u, gpuAllocation->getTaskCount(osContextId)); status = clReleaseMemObject(buffer); ASSERT_EQ(CL_SUCCESS, status); status = clMemFreeINTEL(clContext, deviceMemory); ASSERT_EQ(CL_SUCCESS, status); clReleaseCommandQueue(commandQueue); } TEST(UnifiedSharedMemoryTransferCalls, givenHostUsmAllocationWhenPtrIsUsedForTransferCallsThenCPUPathIsChoosen) { DebugManagerStateRestore restorer; DebugManager.flags.EnableLocalMemory.set(false); DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast(LocalMemoryAccessMode::Default)); MockContext mockContext; cl_context clContext = &mockContext; if (mockContext.getDevice(0u)->getHardwareInfo().capabilityTable.supportsOcl21Features == false) { GTEST_SKIP(); } auto status = CL_INVALID_PLATFORM; cl_device_id clDevice = mockContext.getDevice(0u); auto sharedMemory = clSharedMemAllocINTEL(clContext, clDevice, nullptr, 4096u, 0u, &status); ASSERT_EQ(CL_SUCCESS, status); auto svmAllocation = mockContext.getSVMAllocsManager()->getSVMAlloc(sharedMemory); auto gpuAllocation = svmAllocation->gpuAllocations.getGraphicsAllocation(mockContext.getDevice(0)->getRootDeviceIndex()); auto buffer = clCreateBuffer(clContext, CL_MEM_READ_WRITE, 4096u, nullptr, &status); ASSERT_EQ(CL_SUCCESS, status); auto commandQueue = clCreateCommandQueue(clContext, clDevice, 0u, &status); ASSERT_EQ(CL_SUCCESS, status); status = clEnqueueWriteBuffer(commandQueue, buffer, true, 0u, 4096u, sharedMemory, 0u, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, status); auto neoQueue = castToObject(commandQueue); auto &temporaryAllocations = neoQueue->getGpgpuCommandStreamReceiver().getTemporaryAllocations(); EXPECT_TRUE(temporaryAllocations.peekIsEmpty()); auto osContextId = neoQueue->getGpgpuCommandStreamReceiver().getOsContext().getContextId(); EXPECT_EQ(GraphicsAllocation::objectNotUsed, gpuAllocation->getTaskCount(osContextId)); status = clEnqueueReadBuffer(commandQueue, buffer, true, 0u, 4096u, sharedMemory, 0u, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, status); EXPECT_EQ(GraphicsAllocation::objectNotUsed, gpuAllocation->getTaskCount(osContextId)); status = clReleaseMemObject(buffer); ASSERT_EQ(CL_SUCCESS, status); status = clMemFreeINTEL(clContext, sharedMemory); ASSERT_EQ(CL_SUCCESS, status); clReleaseCommandQueue(commandQueue); } TEST(UnifiedSharedMemoryTransferCalls, givenHostAllocationThatIsSmallerThenTransferRequirementsThenErrorIsReturned) { MockContext mockContext; auto device = mockContext.getDevice(0u); REQUIRE_SVM_OR_SKIP(device); cl_context clContext = &mockContext; auto status = CL_SUCCESS; auto hostMemory = clHostMemAllocINTEL(clContext, nullptr, 4u, 0u, &status); ASSERT_EQ(CL_SUCCESS, status); auto buffer = clCreateBuffer(clContext, CL_MEM_READ_WRITE, 4096u, nullptr, &status); ASSERT_EQ(CL_SUCCESS, status); cl_device_id clDevice = mockContext.getDevice(0u); auto commandQueue = clCreateCommandQueue(clContext, clDevice, 0u, &status); ASSERT_EQ(CL_SUCCESS, status); status = clEnqueueWriteBuffer(commandQueue, buffer, false, 0u, 4096u, hostMemory, 0u, nullptr, nullptr); EXPECT_EQ(CL_INVALID_OPERATION, status); status = clEnqueueReadBuffer(commandQueue, buffer, false, 0u, 4096u, hostMemory, 0u, nullptr, nullptr); ASSERT_EQ(CL_INVALID_OPERATION, status); status = clReleaseMemObject(buffer); ASSERT_EQ(CL_SUCCESS, status); status = clMemFreeINTEL(clContext, hostMemory); ASSERT_EQ(CL_SUCCESS, status); clReleaseCommandQueue(commandQueue); } TEST(UnifiedSharedMemoryTransferCalls, givenSharedUsmAllocationWithoutLocalMemoryWhenPointerIsUsedAsTranfserParameterThenUSMAllocationIsReused) { DebugManagerStateRestore restore; DebugManager.flags.EnableLocalMemory.set(0); MockContext mockContext; cl_context clContext = &mockContext; cl_device_id clDevice = mockContext.getDevice(0u); auto status = CL_INVALID_PLATFORM; auto sharedMemory = clSharedMemAllocINTEL(clContext, clDevice, nullptr, 4096u, 0u, &status); ASSERT_EQ(CL_SUCCESS, status); auto svmAllocation = mockContext.getSVMAllocsManager()->getSVMAlloc(sharedMemory); auto gpuAllocation = svmAllocation->gpuAllocations.getGraphicsAllocation(mockContext.getDevice(0)->getRootDeviceIndex()); auto buffer = clCreateBuffer(clContext, CL_MEM_READ_WRITE, 4096u, nullptr, &status); ASSERT_EQ(CL_SUCCESS, status); auto commandQueue = clCreateCommandQueue(clContext, clDevice, 0u, &status); ASSERT_EQ(CL_SUCCESS, status); status = clEnqueueWriteBuffer(commandQueue, buffer, false, 0u, 4096u, sharedMemory, 0u, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, status); auto neoQueue = castToObject(commandQueue); auto &temporaryAllocations = neoQueue->getGpgpuCommandStreamReceiver().getTemporaryAllocations(); EXPECT_TRUE(temporaryAllocations.peekIsEmpty()); auto osContextId = neoQueue->getGpgpuCommandStreamReceiver().getOsContext().getContextId(); EXPECT_EQ(1u, gpuAllocation->getTaskCount(osContextId)); status = clEnqueueReadBuffer(commandQueue, buffer, false, 0u, 4096u, sharedMemory, 0u, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, status); EXPECT_TRUE(temporaryAllocations.peekIsEmpty()); EXPECT_EQ(2u, gpuAllocation->getTaskCount(osContextId)); status = clReleaseMemObject(buffer); ASSERT_EQ(CL_SUCCESS, status); status = clMemFreeINTEL(clContext, sharedMemory); ASSERT_EQ(CL_SUCCESS, status); clReleaseCommandQueue(commandQueue); } TEST(UnifiedSharedMemoryTransferCalls, givenSharedUsmAllocationWithLocalMemoryWhenPointerIsUsedAsTransferParameterThenUSMAllocationIsReused) { DebugManagerStateRestore restore; DebugManager.flags.EnableLocalMemory.set(1); MockContext mockContext; cl_context clContext = &mockContext; cl_device_id clDevice = mockContext.getDevice(0u); auto status = CL_SUCCESS; auto sharedMemory = clSharedMemAllocINTEL(clContext, clDevice, nullptr, 4096u, 0u, &status); auto svmAllocation = mockContext.getSVMAllocsManager()->getSVMAlloc(sharedMemory); ASSERT_EQ(CL_SUCCESS, status); auto buffer = clCreateBuffer(clContext, CL_MEM_READ_WRITE, 4096u, nullptr, &status); ASSERT_EQ(CL_SUCCESS, status); auto commandQueue = clCreateCommandQueue(clContext, clDevice, 0u, &status); ASSERT_EQ(CL_SUCCESS, status); auto neoQueue = castToObject(commandQueue); auto osContextId = neoQueue->getGpgpuCommandStreamReceiver().getOsContext().getContextId(); EXPECT_EQ(GraphicsAllocation::objectNotUsed, svmAllocation->cpuAllocation->getTaskCount(osContextId)); status = clEnqueueWriteBuffer(commandQueue, buffer, false, 0u, 4096u, sharedMemory, 0u, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, status); auto &temporaryAllocations = neoQueue->getGpgpuCommandStreamReceiver().getTemporaryAllocations(); EXPECT_TRUE(temporaryAllocations.peekIsEmpty()); EXPECT_EQ(1u, svmAllocation->cpuAllocation->getTaskCount(osContextId)); status = clEnqueueReadBuffer(commandQueue, buffer, false, 0u, 4096u, sharedMemory, 0u, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, status); EXPECT_TRUE(temporaryAllocations.peekIsEmpty()); EXPECT_EQ(2u, svmAllocation->cpuAllocation->getTaskCount(osContextId)); status = clReleaseMemObject(buffer); ASSERT_EQ(CL_SUCCESS, status); status = clMemFreeINTEL(clContext, sharedMemory); ASSERT_EQ(CL_SUCCESS, status); clReleaseCommandQueue(commandQueue); } class UnifiedSharedMemoryHWTest : public testing::Test { public: MockContext mockContext; }; template class TestCommandQueueHw : public CommandQueueHw { typedef CommandQueueHw BaseClass; public: TestCommandQueueHw(Context *context, ClDevice *device, cl_queue_properties *properties) : BaseClass(context, device, properties, false){}; void *srcPtr = nullptr; void *dstPtr = nullptr; void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &multiDispatchInfo) override { auto svmEntrySrc = this->getContext().getSVMAllocsManager()->getSVMAlloc(multiDispatchInfo.peekBuiltinOpParams().srcPtr); if (svmEntrySrc) { srcPtr = multiDispatchInfo.peekBuiltinOpParams().srcPtr; } else { srcPtr = multiDispatchInfo.peekBuiltinOpParams().transferAllocation->getUnderlyingBuffer(); } auto svmEntryDst = this->getContext().getSVMAllocsManager()->getSVMAlloc(multiDispatchInfo.peekBuiltinOpParams().dstPtr); if (svmEntryDst) { dstPtr = multiDispatchInfo.peekBuiltinOpParams().dstPtr; } else { dstPtr = multiDispatchInfo.peekBuiltinOpParams().transferAllocation->getUnderlyingBuffer(); } } }; HWTEST_F(UnifiedSharedMemoryHWTest, givenDeviceUsmAllocationWhenWriteBufferThenCpuPtrIsNotUsed) { SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY, mockContext.getRootDeviceIndices(), mockContext.getDeviceBitfields()); unifiedMemoryProperties.device = &mockContext.getDevice(0)->getDevice(); auto deviceMemory = mockContext.getSVMAllocsManager()->createUnifiedMemoryAllocation(4096u, unifiedMemoryProperties); auto svmAllocation = mockContext.getSVMAllocsManager()->getSVMAlloc(deviceMemory); GraphicsAllocation *gpuAllocation = svmAllocation->gpuAllocations.getGraphicsAllocation(mockContext.getDevice(0)->getRootDeviceIndex()); char *cpuPtr = static_cast(gpuAllocation->getUnderlyingBuffer()); auto gpuAddress = gpuAllocation->getGpuAddress(); void *gpuPtr = reinterpret_cast(gpuAddress); char *shiftedPtr = cpuPtr + 0x10; gpuAllocation->setCpuPtrAndGpuAddress(shiftedPtr, reinterpret_cast(shiftedPtr)); cl_mem_flags flags = 0; auto status = CL_INVALID_PLATFORM; auto buffer = Buffer::create(&mockContext, flags, 4096u, nullptr, status); ASSERT_EQ(CL_SUCCESS, status); TestCommandQueueHw myCmdQ(&mockContext, mockContext.getDevice(0u), 0); myCmdQ.enqueueWriteBuffer(buffer, false, 0u, 4096u, deviceMemory, nullptr, 0u, nullptr, nullptr); EXPECT_EQ(gpuPtr, myCmdQ.srcPtr); gpuAllocation->setCpuPtrAndGpuAddress(cpuPtr, gpuAddress); delete buffer; clMemFreeINTEL(&mockContext, deviceMemory); } HWTEST_F(UnifiedSharedMemoryHWTest, givenDeviceUsmAllocationWhenReadBufferThenCpuPtrIsNotUsed) { SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY, mockContext.getRootDeviceIndices(), mockContext.getDeviceBitfields()); unifiedMemoryProperties.device = &mockContext.getDevice(0)->getDevice(); auto deviceMemory = mockContext.getSVMAllocsManager()->createUnifiedMemoryAllocation(4096u, unifiedMemoryProperties); auto svmAllocation = mockContext.getSVMAllocsManager()->getSVMAlloc(deviceMemory); GraphicsAllocation *gpuAllocation = svmAllocation->gpuAllocations.getGraphicsAllocation(mockContext.getDevice(0)->getRootDeviceIndex()); char *cpuPtr = static_cast(gpuAllocation->getUnderlyingBuffer()); auto gpuAddress = gpuAllocation->getGpuAddress(); void *gpuPtr = reinterpret_cast(gpuAddress); char *shiftedPtr = cpuPtr + 0x10; gpuAllocation->setCpuPtrAndGpuAddress(shiftedPtr, reinterpret_cast(shiftedPtr)); cl_mem_flags flags = 0; auto status = CL_INVALID_PLATFORM; auto buffer = Buffer::create(&mockContext, flags, 4096u, nullptr, status); ASSERT_EQ(CL_SUCCESS, status); TestCommandQueueHw myCmdQ(&mockContext, mockContext.getDevice(0u), 0); myCmdQ.enqueueReadBuffer(buffer, false, 0u, 4096u, deviceMemory, nullptr, 0u, nullptr, nullptr); EXPECT_EQ(gpuPtr, myCmdQ.dstPtr); gpuAllocation->setCpuPtrAndGpuAddress(cpuPtr, gpuAddress); delete buffer; clMemFreeINTEL(&mockContext, deviceMemory); } HWTEST_F(UnifiedSharedMemoryHWTest, givenSharedUsmAllocationWhenWriteBufferThenCpuPtrIsNotUsed) { SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, mockContext.getRootDeviceIndices(), mockContext.getDeviceBitfields()); auto sharedMemory = mockContext.getSVMAllocsManager()->createUnifiedMemoryAllocation(4096u, unifiedMemoryProperties); auto svmAllocation = mockContext.getSVMAllocsManager()->getSVMAlloc(sharedMemory); GraphicsAllocation *gpuAllocation = svmAllocation->gpuAllocations.getGraphicsAllocation(mockContext.getDevice(0)->getRootDeviceIndex()); char *cpuPtr = static_cast(gpuAllocation->getUnderlyingBuffer()); auto gpuAddress = gpuAllocation->getGpuAddress(); void *gpuPtr = reinterpret_cast(gpuAddress); char *shiftedPtr = cpuPtr + 0x10; gpuAllocation->setCpuPtrAndGpuAddress(shiftedPtr, reinterpret_cast(shiftedPtr)); cl_mem_flags flags = 0; auto status = CL_INVALID_PLATFORM; auto buffer = Buffer::create(&mockContext, flags, 4096u, nullptr, status); ASSERT_EQ(CL_SUCCESS, status); TestCommandQueueHw myCmdQ(&mockContext, mockContext.getDevice(0u), 0); myCmdQ.enqueueWriteBuffer(buffer, false, 0u, 4096u, sharedMemory, nullptr, 0u, nullptr, nullptr); EXPECT_EQ(gpuPtr, myCmdQ.srcPtr); gpuAllocation->setCpuPtrAndGpuAddress(cpuPtr, gpuAddress); delete buffer; clMemFreeINTEL(&mockContext, sharedMemory); } HWTEST_F(UnifiedSharedMemoryHWTest, givenSharedUsmAllocationWhenReadBufferThenCpuPtrIsNotUsed) { SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, mockContext.getRootDeviceIndices(), mockContext.getDeviceBitfields()); auto sharedMemory = mockContext.getSVMAllocsManager()->createUnifiedMemoryAllocation(4096u, unifiedMemoryProperties); auto svmAllocation = mockContext.getSVMAllocsManager()->getSVMAlloc(sharedMemory); GraphicsAllocation *gpuAllocation = svmAllocation->gpuAllocations.getGraphicsAllocation(mockContext.getDevice(0)->getRootDeviceIndex()); char *cpuPtr = static_cast(gpuAllocation->getUnderlyingBuffer()); auto gpuAddress = gpuAllocation->getGpuAddress(); void *gpuPtr = reinterpret_cast(gpuAddress); char *shiftedPtr = cpuPtr + 0x10; gpuAllocation->setCpuPtrAndGpuAddress(shiftedPtr, reinterpret_cast(shiftedPtr)); cl_mem_flags flags = 0; auto status = CL_INVALID_PLATFORM; auto buffer = Buffer::create(&mockContext, flags, 4096u, nullptr, status); ASSERT_EQ(CL_SUCCESS, status); TestCommandQueueHw myCmdQ(&mockContext, mockContext.getDevice(0u), 0); myCmdQ.enqueueReadBuffer(buffer, false, 0u, 4096u, sharedMemory, nullptr, 0u, nullptr, nullptr); EXPECT_EQ(gpuPtr, myCmdQ.dstPtr); gpuAllocation->setCpuPtrAndGpuAddress(cpuPtr, gpuAddress); delete buffer; clMemFreeINTEL(&mockContext, sharedMemory); } TEST(UnifiedMemoryManagerTest, givenEnableStatelessCompressionWhenDeviceAllocationIsCreatedThenAllocationTypeIsBufferCompressed) { DebugManagerStateRestore restore; DebugManager.flags.RenderCompressedBuffersEnabled.set(1); cl_int retVal = CL_SUCCESS; MockContext mockContext; auto device = mockContext.getDevice(0u); auto allocationsManager = mockContext.getSVMAllocsManager(); for (auto enable : {-1, 0, 1}) { DebugManager.flags.EnableStatelessCompression.set(enable); auto deviceMemAllocPtr = clDeviceMemAllocINTEL(&mockContext, device, nullptr, 2048, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, deviceMemAllocPtr); auto deviceMemAlloc = allocationsManager->getSVMAllocs()->get(deviceMemAllocPtr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); EXPECT_NE(nullptr, deviceMemAlloc); EXPECT_EQ((enable > 0), deviceMemAlloc->isCompressionEnabled()); retVal = clMemFreeINTEL(&mockContext, deviceMemAllocPtr); EXPECT_EQ(CL_SUCCESS, retVal); } } compute-runtime-22.14.22890/opencl/test/unit_test/memory_manager/unified_memory_token_tests.cpp000066400000000000000000000016451422164147700330000ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/unified_memory/unified_memory.h" #include "shared/source/unified_memory/usm_memory_support.h" #include "opencl/extensions/public/cl_ext_private.h" #include "gtest/gtest.h" TEST(UnifiedMemoryTests, givenCLUSMMemorySupportFlagsWhenUsingUnifiedMemorySupportFlagsThenEverythingMatch) { static_assert(CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL == UNIFIED_SHARED_MEMORY_ACCESS, "Flags value difference"); static_assert(CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL == UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS, "Flags value difference"); static_assert(CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL == UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS, "Flags value difference"); static_assert(CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL == UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS, "Flags value difference"); } compute-runtime-22.14.22890/opencl/test/unit_test/mock_gl/000077500000000000000000000000001422164147700232425ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/mock_gl/CMakeLists.txt000066400000000000000000000001421422164147700257770ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # add_subdirectories() compute-runtime-22.14.22890/opencl/test/unit_test/mock_gl/windows/000077500000000000000000000000001422164147700247345ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/mock_gl/windows/CMakeLists.txt000066400000000000000000000014411422164147700274740ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) set(IGDRCL_SRCS_mock_opengl32 ${CMAKE_CURRENT_SOURCE_DIR}/mock_opengl32.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_opengl32.def ${NEO_SOURCE_DIR}/opencl/test/unit_test/helpers/windows/mock_function.cpp ) add_library(mock_opengl32 SHARED ${IGDRCL_SRCS_mock_opengl32}) add_dependencies(unit_tests mock_opengl32) add_dependencies(igdrcl_tests mock_opengl32) set_target_properties(mock_opengl32 PROPERTIES FOLDER "test mocks") target_include_directories(mock_opengl32 PRIVATE $) target_compile_definitions(mock_opengl32 PRIVATE $) endif() compute-runtime-22.14.22890/opencl/test/unit_test/mock_gl/windows/mock_opengl32.cpp000066400000000000000000000352741422164147700301150ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #define _GDI32_ //It causes that definitions of functions are not loaded from dll in file wingdi.h because they are in this file. #include "opencl/test/unit_test/helpers/windows/mock_function.h" #include "opencl/test/unit_test/mocks/gl/windows/mock_gl_sharing_windows.h" #include "GL/gl.h" #include extern "C" { const char *glString = "Intel"; const char *glVersion = "4.0"; const char *arrayStringi[2]{"GL_OES_framebuffer_object", "GL_EXT_framebuffer_object"}; int GLAcquireSharedBufferCalled = 0; int GLAcquireSharedRenderBufferCalled = 0; int GLAcquireSharedTextureCalled = 0; int GLDeleteContextCalled = 0; int GLGetCurrentContextCalled = 0; int GLGetCurrentDisplayCalled = 0; int GLGetSyncivCalled = 0; int GLMakeCurrentCalled = 0; int GLReleaseSharedBufferCalled = 0; int GLReleaseSharedRenderBufferCalled = 0; int GLReleaseSharedTextureCalled = 0; int GLReleaseSyncCalled = 0; int GLRetainSyncCalled = 0; int WGLCreateContextCalled = 0; int WGLDeleteContextCalled = 0; int WGLShareListsCalled = 0; CL_GL_BUFFER_INFO bufferInfoInput = {0}; CL_GL_BUFFER_INFO bufferInfoOutput = {0}; CL_GL_RESOURCE_INFO textureInfoInput = {0}; CL_GL_RESOURCE_INFO textureInfoOutput = {0}; NEO::GLMockReturnedValues glMockReturnedValues = {0}; GLboolean GLSetSharedOCLContextStateReturnedValue = 1u; bool glGetLuidFuncAvailable = true; int glGetLuidCalled = 0; const unsigned char *WINAPI glGetString(unsigned int name) { if (name == GL_VENDOR) return reinterpret_cast(glString); if (name == GL_VERSION) return reinterpret_cast(glVersion); return reinterpret_cast(""); }; GLboolean WINAPI wglSetSharedOCLContextStateINTELMock(HDC HDCHandle, HGLRC ContextHandle, unsigned char State, void *pContextInfo) { ((NEO::ContextInfo *)pContextInfo)->ContextHandle = 1; ((NEO::ContextInfo *)pContextInfo)->DeviceHandle = 2; return GLSetSharedOCLContextStateReturnedValue; }; GLboolean WINAPI mockGLAcquireSharedBuffer(GLDisplay, GLContext, GLContext, GLvoid *pResourceInfo) { auto pBufferInfo = (CL_GL_BUFFER_INFO *)pResourceInfo; bufferInfoInput = *pBufferInfo; pBufferInfo->bufferSize = bufferInfoOutput.bufferSize; pBufferInfo->globalShareHandle = bufferInfoOutput.globalShareHandle; pBufferInfo->pGmmResInfo = bufferInfoOutput.pGmmResInfo; pBufferInfo->bufferOffset = bufferInfoOutput.bufferOffset; GLAcquireSharedBufferCalled++; return (GLboolean)1; }; GLboolean WINAPI mockGLReleaseSharedBuffer(GLDisplay, GLContext, GLContext, GLvoid *pResourceInfo) { bufferInfoInput = *static_cast(pResourceInfo); GLReleaseSharedBufferCalled++; return (GLboolean)1; }; GLboolean WINAPI mockGLAcquireSharedRenderBuffer(GLDisplay, GLContext, GLContext, GLvoid *pResourceInfo) { auto pTextureInfo = (CL_GL_RESOURCE_INFO *)pResourceInfo; textureInfoInput = *pTextureInfo; pTextureInfo->globalShareHandle = textureInfoOutput.globalShareHandle; pTextureInfo->pGmmResInfo = textureInfoOutput.pGmmResInfo; pTextureInfo->glInternalFormat = GL_RGBA8; GLAcquireSharedRenderBufferCalled++; return (GLboolean)1; }; GLboolean WINAPI mockGLReleaseSharedRenderBuffer(GLDisplay, GLContext, GLContext, GLvoid *pResourceInfo) { textureInfoInput = *static_cast(pResourceInfo); GLReleaseSharedRenderBufferCalled++; return (GLboolean)1; }; GLboolean WINAPI mockGLAcquireSharedTexture(GLDisplay, GLContext, GLContext, GLvoid *pResourceInfo) { auto pTextureInfo = (CL_GL_RESOURCE_INFO *)pResourceInfo; textureInfoInput = *pTextureInfo; pTextureInfo->globalShareHandle = textureInfoOutput.globalShareHandle; pTextureInfo->globalShareHandleMCS = textureInfoOutput.globalShareHandleMCS; if (pTextureInfo->target == GL_TEXTURE_BUFFER) { // size and width for texture buffer are queried from textureInfo - not from gmm pTextureInfo->textureBufferSize = textureInfoOutput.textureBufferSize; pTextureInfo->textureBufferWidth = textureInfoOutput.textureBufferWidth; } pTextureInfo->pGmmResInfo = textureInfoOutput.pGmmResInfo; pTextureInfo->glInternalFormat = textureInfoOutput.glInternalFormat ? textureInfoOutput.glInternalFormat : GL_RGBA8; pTextureInfo->glHWFormat = textureInfoOutput.glHWFormat; pTextureInfo->textureBufferOffset = textureInfoOutput.textureBufferOffset; pTextureInfo->numberOfSamples = textureInfoOutput.numberOfSamples; pTextureInfo->isAuxEnabled = textureInfoOutput.isAuxEnabled; GLAcquireSharedTextureCalled++; return (GLboolean)1; }; GLboolean WINAPI mockGLReleaseSharedTexture(GLDisplay, GLContext, GLContext, GLvoid *pResourceInfo) { textureInfoInput = *static_cast(pResourceInfo); GLReleaseSharedTextureCalled++; return (GLboolean)1; }; GLboolean WINAPI mockGlRetainSync(GLDisplay HDCHandle, GLContext ContextHandle, GLContext BackupContextHandle, GLvoid *pSyncInfo) { GLRetainSyncCalled++; GL_CL_SYNC_INFO *syncInfo = (GL_CL_SYNC_INFO *)(pSyncInfo); syncInfo->pSync = (void *)0x123; return GL_TRUE; }; GLboolean WINAPI mockGlReleaseSync(GLDisplay HDCHandle, GLContext ContextHandle, GLContext BackupContextHandle, GLvoid *pSync) { GLReleaseSyncCalled++; return GL_TRUE; }; void WINAPI mockGlGetSynciv(GLvoid *pSync, GLenum pname, GLint *value) { GLGetSyncivCalled++; *value = glMockReturnedValues.syncivRetVal; }; const unsigned char *_stdcall glGetStringiMock(unsigned int name, unsigned int index) { return reinterpret_cast(arrayStringi[index]); }; GLDisplay WINAPI mockGLGetCurrentDisplay() { GLGetCurrentDisplayCalled++; return glMockReturnedValues.currentDisplay; }; LUID WINAPI wglGetLuidMock(GLContext glContext) { glGetLuidCalled++; LUID luid{}; if (reinterpret_cast(1) == glContext) { luid.HighPart = 0x1d2e; luid.LowPart = 0x3f4a; } else if (reinterpret_cast(2) == glContext) { luid.HighPart = 0x5d2e; luid.LowPart = 0x3f4a; } return luid; }; PROC WINAPI wglGetProcAddress(LPCSTR name) { if (strcmp(name, "wglSetSharedOCLContextStateINTEL") == 0) { return reinterpret_cast(*wglSetSharedOCLContextStateINTELMock); } if (strcmp(name, "wglAcquireSharedBufferINTEL") == 0) { return reinterpret_cast(*mockGLAcquireSharedBuffer); } if (strcmp(name, "wglReleaseSharedBufferINTEL") == 0) { return reinterpret_cast(*mockGLReleaseSharedBuffer); } if (strcmp(name, "wglAcquireSharedRenderBufferINTEL") == 0) { return reinterpret_cast(*mockGLAcquireSharedRenderBuffer); } if (strcmp(name, "wglReleaseSharedRenderBufferINTEL") == 0) { return reinterpret_cast(*mockGLReleaseSharedRenderBuffer); } if (strcmp(name, "wglAcquireSharedTextureINTEL") == 0) { return reinterpret_cast(*mockGLAcquireSharedTexture); } if (strcmp(name, "wglReleaseSharedTextureINTEL") == 0) { return reinterpret_cast(*mockGLReleaseSharedTexture); } if (strcmp(name, "wglRetainSyncINTEL") == 0) { return reinterpret_cast(*mockGlRetainSync); } if (strcmp(name, "wglReleaseSyncINTEL") == 0) { return reinterpret_cast(*mockGlReleaseSync); } if (strcmp(name, "wglGetSyncivINTEL") == 0) { return reinterpret_cast(*mockGlGetSynciv); } if (strcmp(name, "glGetStringi") == 0) { return reinterpret_cast(*glGetStringiMock); } if (strcmp(name, "wglGetLuidINTEL") == 0 && glGetLuidFuncAvailable) { return reinterpret_cast(wglGetLuidMock); } return nullptr; } HGLRC WINAPI wglGetCurrentContext() { GLGetCurrentContextCalled++; return glMockReturnedValues.currentContext; }; HDC WINAPI wglGetCurrentDC() { return mockGLGetCurrentDisplay(); }; HGLRC WINAPI wglCreateContext(HDC Arg1) { WGLCreateContextCalled++; return (GLContext)0x101; }; BOOL WINAPI wglDeleteContext(HGLRC Arg1) { WGLDeleteContextCalled++; GLDeleteContextCalled++; return (GLboolean)1; }; void WINAPI glGetIntegerv(GLenum pname, GLint *params) { return NEO::MockGLSharingFunctions::glGetIntegervTest(pname, params); }; BOOL WINAPI wglShareLists(HGLRC arg1, HGLRC arg2) { WGLShareListsCalled++; return 1; }; BOOL WINAPI wglMakeCurrent(HDC arg1, HGLRC arg2) { GLMakeCurrentCalled++; glMockReturnedValues.madeCurrentContext = arg2; if (glMockReturnedValues.forceMakeCurrentCallFail) { if (glMockReturnedValues.failsCounter < glMockReturnedValues.numberOfCallFails) { glMockReturnedValues.failsCounter++; return GL_FALSE; } } return (GLboolean)1; }; void *WINAPI mockLoader(const char *name) { if (strcmp(name, "realFunction") == 0) { return *realFunction; } return nullptr; }; void resetParam(const char *name) { if (strcmp(name, "GLAcquireSharedBufferCalled") == 0) { GLAcquireSharedBufferCalled = 0; } if (strcmp(name, "GLAcquireSharedRenderBufferCalled") == 0) { GLAcquireSharedRenderBufferCalled = 0; } if (strcmp(name, "GLAcquireSharedTextureCalled") == 0) { GLAcquireSharedTextureCalled = 0; } if (strcmp(name, "GLDeleteContextCalled") == 0) { GLDeleteContextCalled = 0; } if (strcmp(name, "GLGetCurrentContextCalled") == 0) { GLGetCurrentContextCalled = 0; } if (strcmp(name, "GLGetCurrentDisplayCalled") == 0) { GLGetCurrentDisplayCalled = 0; } if (strcmp(name, "GLGetSyncivCalled") == 0) { GLGetSyncivCalled = 0; } if (strcmp(name, "GLMakeCurrentCalled") == 0) { GLMakeCurrentCalled = 0; } if (strcmp(name, "GLReleaseSharedBufferCalled") == 0) { GLReleaseSharedBufferCalled = 0; } if (strcmp(name, "GLReleaseSharedRenderBufferCalled") == 0) { GLReleaseSharedRenderBufferCalled = 0; } if (strcmp(name, "GLReleaseSharedTextureCalled") == 0) { GLReleaseSharedTextureCalled = 0; } if (strcmp(name, "GLReleaseSyncCalled") == 0) { GLReleaseSyncCalled = 0; } if (strcmp(name, "GLRetainSyncCalled") == 0) { GLRetainSyncCalled = 0; } if (strcmp(name, "WGLCreateContextCalled") == 0) { WGLCreateContextCalled = 0; } if (strcmp(name, "WGLDeleteContextCalled") == 0) { WGLDeleteContextCalled = 0; } if (strcmp(name, "WGLShareListsCalled") == 0) { WGLShareListsCalled = 0; } if (strcmp(name, "") == 0) { GLAcquireSharedBufferCalled = 0; GLAcquireSharedRenderBufferCalled = 0; GLAcquireSharedTextureCalled = 0; GLDeleteContextCalled = 0; GLGetCurrentContextCalled = 0; GLGetCurrentDisplayCalled = 0; GLGetSyncivCalled = 0; GLMakeCurrentCalled = 0; GLReleaseSharedBufferCalled = 0; GLReleaseSharedRenderBufferCalled = 0; GLReleaseSharedTextureCalled = 0; GLReleaseSyncCalled = 0; GLRetainSyncCalled = 0; WGLCreateContextCalled = 0; WGLDeleteContextCalled = 0; WGLShareListsCalled = 0; glGetLuidCalled = 0; glGetLuidFuncAvailable = true; } if (strcmp(name, "glGetLuidCalled") == 0) { glGetLuidCalled = 0; } if (strcmp(name, "glGetLuidFuncAvailable") == 0) { glGetLuidFuncAvailable = true; } if (strcmp(name, "glGetLuidFuncNotAvailable") == 0) { glGetLuidFuncAvailable = false; } }; int getParam(const char *name) { if (strcmp(name, "GLAcquireSharedBufferCalled") == 0) { return GLAcquireSharedBufferCalled; } if (strcmp(name, "GLAcquireSharedRenderBufferCalled") == 0) { return GLAcquireSharedRenderBufferCalled; } if (strcmp(name, "GLAcquireSharedTextureCalled") == 0) { return GLAcquireSharedTextureCalled; } if (strcmp(name, "GLDeleteContextCalled") == 0) { return GLDeleteContextCalled; } if (strcmp(name, "GLGetCurrentContextCalled") == 0) { return GLGetCurrentContextCalled; } if (strcmp(name, "GLGetCurrentDisplayCalled") == 0) { return GLGetCurrentDisplayCalled; } if (strcmp(name, "GLGetSyncivCalled") == 0) { return GLGetSyncivCalled; } if (strcmp(name, "GLMakeCurrentCalled") == 0) { return GLMakeCurrentCalled; } if (strcmp(name, "GLReleaseSharedBufferCalled") == 0) { return GLReleaseSharedBufferCalled; } if (strcmp(name, "GLReleaseSharedRenderBufferCalled") == 0) { return GLReleaseSharedRenderBufferCalled; } if (strcmp(name, "GLReleaseSharedTextureCalled") == 0) { return GLReleaseSharedTextureCalled; } if (strcmp(name, "GLReleaseSyncCalled") == 0) { return GLReleaseSyncCalled; } if (strcmp(name, "GLRetainSyncCalled") == 0) { return GLRetainSyncCalled; } if (strcmp(name, "WGLCreateContextCalled") == 0) { return WGLCreateContextCalled; } if (strcmp(name, "WGLDeleteContextCalled") == 0) { return WGLDeleteContextCalled; } if (strcmp(name, "WGLShareListsCalled") == 0) { return WGLShareListsCalled; } if (strcmp(name, "glGetLuidCalled") == 0) { return glGetLuidCalled; } return 0; }; CL_GL_BUFFER_INFO getBufferInfo() { return bufferInfoInput; }; CL_GL_RESOURCE_INFO getTextureInfo() { return textureInfoInput; }; void memParam() { memset(&bufferInfoInput, 0, sizeof(CL_GL_BUFFER_INFO)); memset(&bufferInfoOutput, 0, sizeof(CL_GL_BUFFER_INFO)); memset(&textureInfoInput, 0, sizeof(CL_GL_RESOURCE_INFO)); memset(&textureInfoOutput, 0, sizeof(CL_GL_RESOURCE_INFO)); memset(&glMockReturnedValues, 0, sizeof(NEO::GLMockReturnedValues)); }; void loadBuffer(CL_GL_BUFFER_INFO buff) { bufferInfoOutput = buff; }; void loadTexture(CL_GL_RESOURCE_INFO texture) { textureInfoOutput = texture; }; NEO::GLMockReturnedValues getGlMockReturnedValues() { return glMockReturnedValues; }; void setGlMockReturnedValues(NEO::GLMockReturnedValues value) { glMockReturnedValues = value; }; void setGetSyncivReturnValue(int val) { glMockReturnedValues.syncivRetVal = val; } void glSetString(const char *name, unsigned int var) { if (var == GL_VENDOR) { glString = name; } else if (var == GL_VERSION) { glVersion = name; } }; void glSetStringi(const char *name, unsigned int index) { arrayStringi[index] = name; }; void setGLSetSharedOCLContextStateReturnedValue(GLboolean value) { GLSetSharedOCLContextStateReturnedValue = static_cast(value); }; GLboolean getGLSetSharedOCLContextStateReturnedValue() { return GLSetSharedOCLContextStateReturnedValue; }; } compute-runtime-22.14.22890/opencl/test/unit_test/mock_gl/windows/mock_opengl32.def000066400000000000000000000014661422164147700300650ustar00rootroot00000000000000; ; Copyright (C) 2017-2021 Intel Corporation ; ; SPDX-License-Identifier: MIT ; LIBRARY "mock_opengl32" EXPORTS glGetString wglGetProcAddress glSetString mockLoader glGetStringiMock wglSetSharedOCLContextStateINTELMock wglGetCurrentContext wglGetCurrentDC glGetIntegerv wglCreateContext wglDeleteContext wglShareLists wglMakeCurrent glSetString glSetStringi mockGLAcquireSharedBuffer resetParam getParam loadBuffer getBufferInfo memParam setGLSetSharedOCLContextStateReturnedValue getGLSetSharedOCLContextStateReturnedValue mockGLAcquireSharedRenderBuffer mockGLReleaseSharedBuffer mockGLReleaseSharedRenderBuffer mockGLReleaseSharedTexture mockGLAcquireSharedTexture loadTexture getTextureInfo mockGLGetCurrentDisplay setGlMockReturnedValues getGlMockReturnedValues mockGlRetainSync mockGlReleaseSync mockGlGetSyncivcompute-runtime-22.14.22890/opencl/test/unit_test/mocks/000077500000000000000000000000001422164147700227435ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/mocks/CMakeLists.txt000066400000000000000000000051501422164147700255040ustar00rootroot00000000000000# # Copyright (C) 2018-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_mocks ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/mock_async_event_handler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_async_event_handler.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_buffer.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_cl_device.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_cl_device.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_cl_execution_environment.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_command_queue.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_context.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_context.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_event.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_gmm_resource_info_ocl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_image.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_kernel.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_kernel.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_platform.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_platform.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_program.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_program.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_sampler.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_sharing_factory.h ${CMAKE_CURRENT_SOURCE_DIR}/ult_cl_device_factory.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ult_cl_device_factory.h ${NEO_SHARED_DIRECTORY}/gmm_helper/page_table_mngr_impl.cpp ${NEO_SHARED_TEST_DIRECTORY}/common/mocks/mock_gmm_client_context.cpp ${NEO_SHARED_TEST_DIRECTORY}/common/mocks/mock_gmm_client_context.h ${NEO_SHARED_TEST_DIRECTORY}/common/mocks/mock_experimental_command_buffer.h ${NEO_CORE_tests_compiler_mocks} ) if(WIN32) file(GLOB IGDRCL_SRC_tests_mock_wddm "${CMAKE_CURRENT_SOURCE_DIR}/mock_wddm2[0-9]\.*") list(APPEND IGDRCL_SRCS_tests_mocks ${CMAKE_CURRENT_SOURCE_DIR}/mock_d3d_objects.h ${IGDRCL_SRC_tests_mock_wddm} ) else() list(APPEND IGDRCL_SRCS_tests_mocks ${NEO_SHARED_DIRECTORY}/os_interface/linux/page_table_manager_functions.cpp ) endif() add_library(igdrcl_mocks STATIC EXCLUDE_FROM_ALL ${IGDRCL_SRCS_tests_mocks}) add_subdirectories() target_include_directories(igdrcl_mocks PRIVATE $ $ ) target_compile_definitions(igdrcl_mocks PRIVATE MOCKABLE_VIRTUAL=virtual $) set_target_properties(igdrcl_mocks PROPERTIES POSITION_INDEPENDENT_CODE ON) set_target_properties(igdrcl_mocks PROPERTIES FOLDER "test mocks") create_project_source_tree(igdrcl_mocks) compute-runtime-22.14.22890/opencl/test/unit_test/mocks/gl/000077500000000000000000000000001422164147700233455ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/mocks/gl/CMakeLists.txt000066400000000000000000000001471422164147700261070ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # add_subdirectories() compute-runtime-22.14.22890/opencl/test/unit_test/mocks/gl/windows/000077500000000000000000000000001422164147700250375ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/mocks/gl/windows/CMakeLists.txt000066400000000000000000000007231422164147700276010ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) list(APPEND IGDRCL_SRCS_tests_mocks_windows ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/mock_gl_arb_sync_event_windows.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_gl_sharing_windows.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_gl_sharing_windows.h ) target_sources(igdrcl_mocks PRIVATE ${IGDRCL_SRCS_tests_mocks_windows}) endif() compute-runtime-22.14.22890/opencl/test/unit_test/mocks/gl/windows/mock_gl_arb_sync_event_windows.h000066400000000000000000000024421422164147700334600ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/sharings/gl/gl_arb_sync_event.h" template struct DummyArbEvent : NEO::GlArbSyncEvent { DummyArbEvent(NEO::Context &ctx) : GlArbSyncEvent(ctx) { } using GlArbSyncEvent::baseEvent; using GlArbSyncEvent::glSyncInfo; using GlArbSyncEvent::osInterface; bool useBaseSetEvent = false; bool setBaseEvent(Event &ev) override { return GlArbSyncEvent::setBaseEvent(ev); } ~DummyArbEvent() override { GlArbSyncEvent::glSyncInfo.reset(); } static GlArbSyncEvent *create(Event &baseEv) { if (FailCreation) { return nullptr; } auto syncEv = new DummyArbEvent(*baseEv.getContext()); syncEv->baseEvent = &baseEv; return syncEv; } }; inline void glArbSyncObjectCleanupMockDoNothing(NEO::OSInterface &osInterface, CL_GL_SYNC_INFO *glSyncInfo) { } inline void glArbSyncObjectSignalMockDoNothing(NEO::OsContext &osContext, CL_GL_SYNC_INFO &glSyncInfo) { } template inline bool mockGlArbSyncObjectSetup(NEO::GLSharingFunctions &sharing, NEO::OSInterface &osInterface, CL_GL_SYNC_INFO &glSyncInfo) { return (Fail == false); } compute-runtime-22.14.22890/opencl/test/unit_test/mocks/gl/windows/mock_gl_sharing_windows.cpp000066400000000000000000000027711422164147700324520ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/mocks/gl/windows/mock_gl_sharing_windows.h" #include "config.h" namespace NEO { int EGLCreateContextCalled = 0; int EGLChooseConfigCalled = 0; int EGLDeleteContextCalled = 0; int GlxChooseFBConfigCalled = 0; int GlxQueryContextCalled = 0; int GlxCreateNewContextCalled = 0; int GlxDeleteContextCalled = 0; int GlxIsDirectCalled = 0; EGLBkpContextParams eglBkpContextParams = {0}; GLXBkpContextParams glxBkpContextParams = {0}; void GlSharingFunctionsMock::initMembers() { GLSharingFunctionsWindows::initGLFunctions(); GlDllHelper dllParam; dllParam.setGLSetSharedOCLContextStateReturnedValue(1u); dllParam.resetParam(""); dllParam.loadTexture({0}); dllParam.loadBuffer({0}); EGLChooseConfigCalled = 0; EGLCreateContextCalled = 0; EGLDeleteContextCalled = 0; GlxChooseFBConfigCalled = 0; GlxQueryContextCalled = 0; GlxCreateNewContextCalled = 0; GlxDeleteContextCalled = 0; GlxIsDirectCalled = 0; memset(&eglBkpContextParams, 0, sizeof(EGLBkpContextParams)); memset(&glxBkpContextParams, 0, sizeof(GLXBkpContextParams)); } GlSharingFunctionsMock::GlSharingFunctionsMock() { initMembers(); } MockGlSharing::MockGlSharing(GLType glhdcType, GLContext glhglrcHandle, GLContext glhglrcHandleBkpCtx, GLDisplay glhdcHandle) { sharingFunctions->setHandles(glhdcType, glhglrcHandle, glhglrcHandleBkpCtx, glhdcHandle); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/mocks/gl/windows/mock_gl_sharing_windows.h000066400000000000000000000166121422164147700321160ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/extensions/public/cl_gl_private_intel.h" #include "opencl/source/sharings/gl/windows/gl_sharing_windows.h" #include "opencl/test/unit_test/sharings/gl/gl_dll_helper.h" #include "config.h" #include namespace NEO { struct EGLBkpContextParams { int32_t configAttrs; int32_t contextAttrs[3]; int32_t numConfigs; }; struct GLXBkpContextParams { int FBConfigAttrs; int queryAttribute; int renderType; }; struct GLMockReturnedValues { GLContext currentContext; GLDisplay currentDisplay; GLContext madeCurrentContext; bool forceMakeCurrentCallFail; int numberOfCallFails; int failsCounter; int syncivRetVal; }; extern int GLSetSharedOCLContextStateCalled; extern int EGLCreateContextCalled; extern int EGLDeleteContextCalled; extern int EGLChooseConfigCalled; extern int GlxChooseFBConfigCalled; extern int GlxQueryContextCalled; extern int GlxCreateNewContextCalled; extern int GlxDeleteContextCalled; extern int GlxIsDirectCalled; extern EGLBkpContextParams eglBkpContextParams; extern GLXBkpContextParams glxBkpContextParams; namespace glTextureTargets { static const unsigned int supportedTargets[] = { GL_TEXTURE_1D, GL_TEXTURE_1D_ARRAY, GL_TEXTURE_BUFFER, GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_RECTANGLE, GL_TEXTURE_CUBE_MAP_NEGATIVE_X, GL_TEXTURE_CUBE_MAP_POSITIVE_X, GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, GL_TEXTURE_CUBE_MAP_POSITIVE_Y, GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, GL_TEXTURE_CUBE_MAP_POSITIVE_Z, GL_TEXTURE_3D, GL_RENDERBUFFER_EXT, GL_TEXTURE_2D_MULTISAMPLE, GL_TEXTURE_2D_MULTISAMPLE_ARRAY, }; } class GlSharingFunctionsMock : public GLSharingFunctionsWindows { void initMembers(); public: static GLboolean OSAPI mockGLSetSharedOCLContextState(GLDisplay, GLContext, GLboolean, GLvoid *pBufferInfo) { GLSetSharedOCLContextStateCalled++; return (GLboolean)1; }; ~GlSharingFunctionsMock() override = default; using GLSharingFunctionsWindows::GLAcquireSharedBuffer; using GLSharingFunctionsWindows::GLAcquireSharedRenderBuffer; using GLSharingFunctionsWindows::GLAcquireSharedTexture; using GLSharingFunctionsWindows::GLGetCurrentContext; using GLSharingFunctionsWindows::GLGetCurrentDisplay; using GLSharingFunctionsWindows::glGetIntegerv; using GLSharingFunctionsWindows::glGetString; using GLSharingFunctionsWindows::glGetStringi; using GLSharingFunctionsWindows::GLGetSynciv; using GLSharingFunctionsWindows::GLReleaseSharedBuffer; using GLSharingFunctionsWindows::GLReleaseSharedRenderBuffer; using GLSharingFunctionsWindows::GLReleaseSharedTexture; using GLSharingFunctionsWindows::GLReleaseSync; using GLSharingFunctionsWindows::GLRetainSync; using GLSharingFunctionsWindows::GLSetSharedOCLContextState; using GLSharingFunctionsWindows::isOpenGlExtensionSupported; using GLSharingFunctionsWindows::pfnWglCreateContext; using GLSharingFunctionsWindows::pfnWglDeleteContext; using GLSharingFunctionsWindows::pfnWglShareLists; using GLSharingFunctionsWindows::setSharedOCLContextState; using GLSharingFunctionsWindows::wglMakeCurrent; using GLSharingFunctionsWindows::glArbEventMapping; using GLSharingFunctionsWindows::GLContextHandle; using GLSharingFunctionsWindows::GLDeviceHandle; using GLSharingFunctionsWindows::getSupportedFormats; using GLSharingFunctionsWindows::pfnGlArbSyncObjectCleanup; using GLSharingFunctionsWindows::pfnGlArbSyncObjectSetup; using GLSharingFunctionsWindows::pfnGlArbSyncObjectSignal; using GLSharingFunctionsWindows::pfnGlArbSyncObjectWaitServer; GlSharingFunctionsMock(GLType GLHDCType, GLContext GLHGLRCHandle, GLContext GLHGLRCHandleBkpCtx, GLDisplay GLHDCHandle) : GLSharingFunctionsWindows(GLHDCType, GLHGLRCHandle, GLHGLRCHandleBkpCtx, GLHDCHandle) { initMembers(); updateOpenGLContext(); createBackupContext(); } GlSharingFunctionsMock(); void setHandles(GLType GLHDCType, GLContext GLHGLRCHandle, GLContext GLHGLRCHandleBkpCtx, GLDisplay GLHDCHandle) { this->GLHDCType = GLHDCType; this->GLHGLRCHandle = GLHGLRCHandle; this->GLHGLRCHandleBkpCtx = GLHGLRCHandleBkpCtx; this->GLHDCHandle = GLHDCHandle; } void setGLAcquireSharedBufferMock(PFNOGLAcquireSharedBufferINTEL mock) { GLAcquireSharedBuffer = mock; } void setGLAcquireSharedTextureMock(PFNOGLAcquireSharedTextureINTEL mock) { GLAcquireSharedTexture = mock; } }; class MockGlSharing { public: MockGlSharing() {} MockGlSharing(GLType GLHDCType, GLContext GLHGLRCHandle, GLContext GLHGLRCHandleBkpCtx, GLDisplay GLHDCHandle); void uploadDataToBufferInfo() { dllParam->loadBuffer(m_bufferInfoOutput); } void uploadDataToBufferInfo(unsigned int sharedHandle, int bufferOffset, GMM_RESOURCE_INFO *gmmResInfo) { m_bufferInfoOutput.globalShareHandle = sharedHandle; m_bufferInfoOutput.bufferOffset = bufferOffset; m_bufferInfoOutput.pGmmResInfo = gmmResInfo; dllParam->loadBuffer(m_bufferInfoOutput); } void uploadDataToTextureInfo() { dllParam->loadTexture(m_textureInfoOutput); } void uploadDataToTextureInfo(unsigned int sharedHandle) { m_textureInfoOutput.globalShareHandle = sharedHandle; dllParam->loadTexture(m_textureInfoOutput); } void uploadTextureBufferOffsetToTextureInfo(int texBufOffset) { m_textureInfoOutput.textureBufferOffset = texBufOffset; dllParam->loadTexture(m_textureInfoOutput); } void overrideGetCurrentValues(GLContext ctx, GLDisplay display, bool forceMakeCurrentFail = false, int numberOfFails = 0) { glMockReturnedValues.currentContext = ctx; glMockReturnedValues.currentDisplay = display; glMockReturnedValues.forceMakeCurrentCallFail = forceMakeCurrentFail; glMockReturnedValues.numberOfCallFails = numberOfFails; glMockReturnedValues.failsCounter = 0; dllParam->setGlMockReturnedValues(glMockReturnedValues); } void setGetSyncivReturnValue(int val) { glMockReturnedValues.syncivRetVal = val; dllParam->setGlMockReturnedValues(glMockReturnedValues); } std::unique_ptr sharingFunctions = std::make_unique(); std::unique_ptr dllParam = std::make_unique(); CL_GL_RESOURCE_INFO m_clGlResourceInfo = {0}; GL_CL_RESOURCE_INFO m_glClResourceInfo = {0}; CL_GL_BUFFER_INFO m_bufferInfoOutput = {0}; CL_GL_RESOURCE_INFO m_textureInfoOutput = {0}; GLMockReturnedValues glMockReturnedValues = {0}; }; class MockGLSharingFunctions : public GLSharingFunctionsWindows { public: using GLSharingFunctionsWindows::isOpenGlExtensionSupported; using GLSharingFunctionsWindows::setSharedOCLContextState; static bool SharingEnabled; static void OSAPI glGetIntegervTest(GLenum pname, GLint *data) { if (pname == GL_NUM_EXTENSIONS) *data = 2; }; using GLSharingFunctionsWindows::glGetIntegerv; using GLSharingFunctionsWindows::glGetString; std::unique_ptr dllParam = std::make_unique(); MockGLSharingFunctions() { GLSharingFunctionsWindows::initGLFunctions(); MockGLSharingFunctions::SharingEnabled = 1; } }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/mocks/mock_async_event_handler.cpp000066400000000000000000000003461422164147700304760ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/mocks/mock_async_event_handler.h" namespace MockAsyncEventHandlerGlobals { bool destructorCalled = false; }compute-runtime-22.14.22890/opencl/test/unit_test/mocks/mock_async_event_handler.h000066400000000000000000000033261422164147700301440ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/event/async_events_handler.h" #include #include #include #include #include using namespace NEO; namespace MockAsyncEventHandlerGlobals { extern bool destructorCalled; } class MockHandler : public AsyncEventsHandler { public: using AsyncEventsHandler::allowAsyncProcess; using AsyncEventsHandler::asyncMtx; using AsyncEventsHandler::asyncProcess; using AsyncEventsHandler::openThread; using AsyncEventsHandler::thread; ~MockHandler() override { if (!allowThreadCreating) { asyncProcess(this); // process once for cleanup } MockAsyncEventHandlerGlobals::destructorCalled = true; } MockHandler(bool allowAsync = false) : AsyncEventsHandler() { allowThreadCreating = allowAsync; transferCounter.store(0); MockAsyncEventHandlerGlobals::destructorCalled = false; } Event *process() { std::move(registerList.begin(), registerList.end(), std::back_inserter(list)); registerList.clear(); return processList(); } void transferRegisterList() override { transferCounter++; AsyncEventsHandler::transferRegisterList(); } void openThread() override { if (allowThreadCreating) { AsyncEventsHandler::openThread(); } openThreadCalled = true; } bool peekIsListEmpty() { return list.size() == 0; } bool peekIsRegisterListEmpty() { return registerList.size() == 0; } std::atomic transferCounter; bool openThreadCalled = false; bool allowThreadCreating = false; }; compute-runtime-22.14.22890/opencl/test/unit_test/mocks/mock_buffer.h000066400000000000000000000213641422164147700254040ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/aligned_memory.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/mocks/mock_context.h" using namespace NEO; class MockBufferStorage { public: MockBufferStorage() : mockGfxAllocation(data, sizeof(data) / 2), multiGfxAllocation(GraphicsAllocationHelper::toMultiGraphicsAllocation(&mockGfxAllocation)) { } MockBufferStorage(bool unaligned) : mockGfxAllocation(unaligned ? alignUp(&data, 4) : alignUp(&data, 64), sizeof(data) / 2), multiGfxAllocation(GraphicsAllocationHelper::toMultiGraphicsAllocation(&mockGfxAllocation)) { } ~MockBufferStorage() { if (mockGfxAllocation.getDefaultGmm()) { delete mockGfxAllocation.getDefaultGmm(); } } char data[128]; MockGraphicsAllocation mockGfxAllocation; std::unique_ptr device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); MultiGraphicsAllocation multiGfxAllocation; }; class MockBuffer : public MockBufferStorage, public Buffer { public: using Buffer::magic; using Buffer::offset; using Buffer::size; using MemObj::context; using MemObj::isZeroCopy; using MemObj::memObjectType; using MockBufferStorage::device; void setAllocationType(uint32_t rootDeviceIndex, bool compressed) { setAllocationType(multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex), device->getRootDeviceEnvironment().getGmmClientContext(), compressed); } static void setAllocationType(GraphicsAllocation *graphicsAllocation, GmmClientContext *gmmClientContext, bool compressed) { if (compressed && !graphicsAllocation->getDefaultGmm()) { graphicsAllocation->setDefaultGmm(new Gmm(gmmClientContext, nullptr, 0, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, compressed, {}, true)); } if (graphicsAllocation->getDefaultGmm()) { graphicsAllocation->getDefaultGmm()->isCompressionEnabled = compressed; } } MockBuffer(GraphicsAllocation &alloc) : MockBuffer(nullptr, alloc) {} MockBuffer(Context *context, GraphicsAllocation &alloc) : MockBufferStorage(), Buffer( context, ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_USE_HOST_PTR, 0, 0, MockBufferStorage::device.get()), CL_MEM_USE_HOST_PTR, 0, alloc.getUnderlyingBufferSize(), alloc.getUnderlyingBuffer(), alloc.getUnderlyingBuffer(), GraphicsAllocationHelper::toMultiGraphicsAllocation(&alloc), true, false, false), externalAlloc(&alloc) { } MockBuffer() : MockBufferStorage(), Buffer( nullptr, ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_USE_HOST_PTR, 0, 0, MockBufferStorage::device.get()), CL_MEM_USE_HOST_PTR, 0, sizeof(data), &data, &data, GraphicsAllocationHelper::toMultiGraphicsAllocation(&mockGfxAllocation), true, false, false) { } ~MockBuffer() override { if (externalAlloc != nullptr) { // no ownership over graphics allocation, do not release it this->multiGraphicsAllocation.removeAllocation(0u); } } void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device, bool useGlobalAtomics, bool areMultipleSubDevicesInContext) override { Buffer::setSurfaceState(this->device.get(), memory, forceNonAuxMode, disableL3, getSize(), getCpuAddress(), 0, (externalAlloc != nullptr) ? externalAlloc : &mockGfxAllocation, 0, 0, false, false); } void transferDataToHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) override { ++transferDataToHostPtrCalledCount; if (callBaseTransferDataToHostPtr) { Buffer::transferDataToHostPtr(copySize, copyOffset); } } void transferDataFromHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) override { ++transferDataFromHostPtrCalledCount; if (callBaseTransferDataFromHostPtr) { Buffer::transferDataFromHostPtr(copySize, copyOffset); } } GraphicsAllocation *externalAlloc = nullptr; bool callBaseTransferDataToHostPtr{true}; bool callBaseTransferDataFromHostPtr{true}; int transferDataToHostPtrCalledCount{0}; int transferDataFromHostPtrCalledCount{0}; }; class AlignedBuffer : public MockBufferStorage, public Buffer { public: using MockBufferStorage::device; AlignedBuffer() : MockBufferStorage(false), Buffer( nullptr, ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_USE_HOST_PTR, 0, 0, MockBufferStorage::device.get()), CL_MEM_USE_HOST_PTR, 0, sizeof(data) / 2, alignUp(&data, 64), alignUp(&data, 64), GraphicsAllocationHelper::toMultiGraphicsAllocation(&mockGfxAllocation), true, false, false) { } AlignedBuffer(GraphicsAllocation *gfxAllocation) : AlignedBuffer(nullptr, gfxAllocation) {} AlignedBuffer(Context *context, GraphicsAllocation *gfxAllocation) : MockBufferStorage(), Buffer( context, ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_USE_HOST_PTR, 0, 0, MockBufferStorage::device.get()), CL_MEM_USE_HOST_PTR, 0, sizeof(data) / 2, alignUp(&data, 64), alignUp(&data, 64), GraphicsAllocationHelper::toMultiGraphicsAllocation(gfxAllocation), true, false, false), externalAlloc(gfxAllocation) { } ~AlignedBuffer() override { if (externalAlloc != nullptr) { // no ownership over graphics allocation, do not release it this->multiGraphicsAllocation.removeAllocation(0u); } } void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device, bool useGlobalAtomics, bool areMultipleSubDevicesInContext) override { Buffer::setSurfaceState(this->device.get(), memory, forceNonAuxMode, disableL3, getSize(), getCpuAddress(), 0, &mockGfxAllocation, 0, 0, false, false); } GraphicsAllocation *externalAlloc = nullptr; }; class UnalignedBuffer : public MockBufferStorage, public Buffer { public: using MockBufferStorage::device; UnalignedBuffer() : MockBufferStorage(true), Buffer( nullptr, ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_USE_HOST_PTR, 0, 0, MockBufferStorage::device.get()), CL_MEM_USE_HOST_PTR, 0, sizeof(data) / 2, alignUp(&data, 4), alignUp(&data, 4), GraphicsAllocationHelper::toMultiGraphicsAllocation(&mockGfxAllocation), false, false, false) { } UnalignedBuffer(GraphicsAllocation *gfxAllocation) : UnalignedBuffer(nullptr, gfxAllocation) {} UnalignedBuffer(Context *context, GraphicsAllocation *gfxAllocation) : MockBufferStorage(true), Buffer( context, ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_USE_HOST_PTR, 0, 0, MockBufferStorage::device.get()), CL_MEM_USE_HOST_PTR, 0, sizeof(data) / 2, alignUp(&data, 4), alignUp(&data, 4), GraphicsAllocationHelper::toMultiGraphicsAllocation(gfxAllocation), false, false, false), externalAlloc(gfxAllocation) { } ~UnalignedBuffer() override { if (externalAlloc != nullptr) { // no ownership over graphics allocation, do not release it this->multiGraphicsAllocation.removeAllocation(0u); } } void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device, bool useGlobalAtomics, bool areMultipleSubDevicesInContext) override { Buffer::setSurfaceState(this->device.get(), memory, forceNonAuxMode, disableL3, getSize(), getCpuAddress(), 0, &mockGfxAllocation, 0, 0, false, false); } GraphicsAllocation *externalAlloc = nullptr; }; class MockPublicAccessBuffer : public Buffer { public: using Buffer::getGraphicsAllocationTypeAndCompressionPreference; }; compute-runtime-22.14.22890/opencl/test/unit_test/mocks/mock_builtin_dispatch_info_builder.h000066400000000000000000000032721422164147700321770ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "opencl/source/helpers/dispatch_info.h" using namespace NEO; class MockBuiltinDispatchInfoBuilder : public BuiltinDispatchInfoBuilder { public: MockBuiltinDispatchInfoBuilder(BuiltIns &kernelLib, ClDevice &clDevice, BuiltinDispatchInfoBuilder *origBuilder) : BuiltinDispatchInfoBuilder(kernelLib, clDevice), originalBuilder(origBuilder) { } virtual void validateInput(const BuiltinOpParams &conf) const {}; bool buildDispatchInfos(MultiDispatchInfo &mdi) const override { validateInput(mdi.peekBuiltinOpParams()); originalBuilder->buildDispatchInfos(mdi); for (auto &di : mdi) { multiDispatchInfo.push(di); } multiDispatchInfo.setBuiltinOpParams(mdi.peekBuiltinOpParams()); return true; } const BuiltinOpParams *getBuiltinOpParams() const { return &multiDispatchInfo.peekBuiltinOpParams(); }; const MultiDispatchInfo *getMultiDispatchInfo() const { return &multiDispatchInfo; }; void setFailingArgIndex(uint32_t index) { withFailureInjection = true; failingArgIndex = index; } bool setExplicitArg(uint32_t argIndex, size_t argSize, const void *argVal, cl_int &err) const override { err = (withFailureInjection && argIndex == failingArgIndex) ? CL_INVALID_ARG_VALUE : CL_SUCCESS; return false; } protected: mutable MultiDispatchInfo multiDispatchInfo; BuiltinDispatchInfoBuilder *originalBuilder; bool withFailureInjection = false; uint32_t failingArgIndex = 0; }; compute-runtime-22.14.22890/opencl/test/unit_test/mocks/mock_cl_device.cpp000066400000000000000000000015421422164147700263770ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "shared/test/common/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_platform.h" using namespace NEO; bool &MockClDevice::createSingleDevice = MockDevice::createSingleDevice; decltype(&createCommandStream) &MockClDevice::createCommandStreamReceiverFunc = MockDevice::createCommandStreamReceiverFunc; MockClDevice::MockClDevice(MockDevice *pMockDevice) : ClDevice(*pMockDevice, platform()), device(*pMockDevice), sharedDeviceInfo(device.deviceInfo), executionEnvironment(pMockDevice->executionEnvironment), allEngines(pMockDevice->allEngines) { } bool MockClDevice::areOcl21FeaturesSupported() const { return device.getHardwareInfo().capabilityTable.supportsOcl21Features; } compute-runtime-22.14.22890/opencl/test/unit_test/mocks/mock_cl_device.h000066400000000000000000000121331422164147700260420ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/common/mocks/mock_device.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/test/unit_test/mocks/mock_cl_execution_environment.h" namespace NEO { class FailMemoryManager; class OSTime; class SubDevice; template class UltCommandStreamReceiver; struct HardwareInfo; extern CommandStreamReceiver *createCommandStream(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield); class MockClDevice : public ClDevice { public: using ClDevice::ClDevice; using ClDevice::compilerExtensions; using ClDevice::compilerExtensionsWithFeatures; using ClDevice::deviceExtensions; using ClDevice::deviceInfo; using ClDevice::driverInfo; using ClDevice::enabledClVersion; using ClDevice::getClDeviceName; using ClDevice::getQueueFamilyCapabilities; using ClDevice::getQueueFamilyCapabilitiesAll; using ClDevice::initializeCaps; using ClDevice::name; using ClDevice::ocl21FeaturesEnabled; using ClDevice::simultaneousInterops; using ClDevice::subDevices; explicit MockClDevice(MockDevice *pMockDevice); bool createEngines() { return device.createEngines(); } void setOSTime(OSTime *osTime) { device.setOSTime(osTime); } bool getCpuTime(uint64_t *timeStamp) { return device.getCpuTime(timeStamp); } void setPreemptionMode(PreemptionMode mode) { device.setPreemptionMode(mode); } void injectMemoryManager(MemoryManager *pMemoryManager) { device.injectMemoryManager(pMemoryManager); } void setPerfCounters(PerformanceCounters *perfCounters) { device.setPerfCounters(perfCounters); } const char *getProductAbbrev() const { return device.getProductAbbrev(); } template UltCommandStreamReceiver &getUltCommandStreamReceiver() { return device.getUltCommandStreamReceiver(); } template UltCommandStreamReceiver &getUltCommandStreamReceiverFromIndex(uint32_t index) { return device.getUltCommandStreamReceiverFromIndex(index); } CommandStreamReceiver &getGpgpuCommandStreamReceiver() const { return device.getGpgpuCommandStreamReceiver(); } void resetCommandStreamReceiver(CommandStreamReceiver *newCsr) { device.resetCommandStreamReceiver(newCsr); } void resetCommandStreamReceiver(CommandStreamReceiver *newCsr, uint32_t engineIndex) { device.resetCommandStreamReceiver(newCsr, engineIndex); } void setSourceLevelDebuggerActive(bool active) { device.setDebuggerActive(active); } template static T *createWithExecutionEnvironment(const HardwareInfo *pHwInfo, ExecutionEnvironment *executionEnvironment, uint32_t rootDeviceIndex) { return MockDevice::createWithExecutionEnvironment(pHwInfo, executionEnvironment, rootDeviceIndex); } template static T *createWithNewExecutionEnvironment(const HardwareInfo *pHwInfo, uint32_t rootDeviceIndex = 0) { auto executionEnvironment = prepareExecutionEnvironment(pHwInfo, rootDeviceIndex); return MockDevice::createWithExecutionEnvironment(pHwInfo, executionEnvironment, rootDeviceIndex); } static ExecutionEnvironment *prepareExecutionEnvironment(const HardwareInfo *pHwInfo, uint32_t rootDeviceIndex) { auto executionEnvironment = new MockClExecutionEnvironment(); auto numRootDevices = DebugManager.flags.CreateMultipleRootDevices.get() ? DebugManager.flags.CreateMultipleRootDevices.get() : rootDeviceIndex + 1; executionEnvironment->prepareRootDeviceEnvironments(numRootDevices); pHwInfo = pHwInfo ? pHwInfo : defaultHwInfo.get(); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(pHwInfo); } executionEnvironment->calculateMaxOsContextCount(); return executionEnvironment; } SubDevice *createSubDevice(uint32_t subDeviceIndex) { return device.createSubDevice(subDeviceIndex); } std::unique_ptr createCommandStreamReceiver() const { return device.createCommandStreamReceiver(); } BuiltIns *getBuiltIns() const { return getDevice().getBuiltIns(); } bool areOcl21FeaturesSupported() const; void setDebuggerActive(bool active) { sharedDeviceInfo.debuggerActive = active; } MockDevice &device; DeviceInfo &sharedDeviceInfo; ExecutionEnvironment *&executionEnvironment; static bool &createSingleDevice; static decltype(&createCommandStream) &createCommandStreamReceiverFunc; std::vector &allEngines; }; class MockDeviceWithDebuggerActive : public MockDevice { public: MockDeviceWithDebuggerActive(ExecutionEnvironment *executionEnvironment, uint32_t deviceIndex) : MockDevice(executionEnvironment, deviceIndex) {} void initializeCaps() override { MockDevice::initializeCaps(); this->setDebuggerActive(true); } }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/mocks/mock_cl_execution_environment.h000066400000000000000000000017311422164147700312340ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/execution_environment/cl_execution_environment.h" namespace NEO { class MockClExecutionEnvironment : public ClExecutionEnvironment { public: using ClExecutionEnvironment::asyncEventsHandler; using ClExecutionEnvironment::builtinOpsBuilders; using ClExecutionEnvironment::ClExecutionEnvironment; std::unique_ptr setBuiltinDispatchInfoBuilder(uint32_t rootDeviceIndex, EBuiltInOps::Type operation, std::unique_ptr builder) { uint32_t operationId = static_cast(operation); auto &operationBuilder = peekBuilders(rootDeviceIndex)[operationId]; std::call_once(operationBuilder.second, [] {}); operationBuilder.first.swap(builder); return builder; } }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/mocks/mock_cl_hw_helper.h000066400000000000000000000004751422164147700265660ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/helpers/cl_hw_helper.h" namespace NEO { struct ClHwHelperMock : public ClHwHelper { using ClHwHelper::makeDeviceIpVersion; using ClHwHelper::makeDeviceRevision; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/mocks/mock_command_queue.h000066400000000000000000000472251422164147700267610ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/wait_status.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "opencl/source/command_queue/command_queue_hw.h" #include //////////////////////////////////////////////////////////////////////////////// // MockCommandQueue - Core implementation //////////////////////////////////////////////////////////////////////////////// namespace NEO { class MockCommandQueue : public CommandQueue { public: using CommandQueue::bcsEngines; using CommandQueue::bcsEngineTypes; using CommandQueue::bcsTimestampPacketContainers; using CommandQueue::blitEnqueueAllowed; using CommandQueue::blitEnqueueImageAllowed; using CommandQueue::bufferCpuCopyAllowed; using CommandQueue::device; using CommandQueue::gpgpuEngine; using CommandQueue::isCopyOnly; using CommandQueue::isTextureCacheFlushNeeded; using CommandQueue::obtainNewTimestampPacketNodes; using CommandQueue::overrideEngine; using CommandQueue::queueCapabilities; using CommandQueue::queueFamilyIndex; using CommandQueue::queueFamilySelected; using CommandQueue::queueIndexWithinFamily; using CommandQueue::requiresCacheFlushAfterWalker; using CommandQueue::throttle; using CommandQueue::timestampPacketContainer; void clearBcsEngines() { std::fill(bcsEngines.begin(), bcsEngines.end(), nullptr); bcsEngineTypes.clear(); } void insertBcsEngine(aub_stream::EngineType bcsEngineType) { const auto index = NEO::EngineHelpers::getBcsIndex(bcsEngineType); const auto engine = &getDevice().getEngine(bcsEngineType, EngineUsage::Regular); bcsEngines[index] = engine; bcsEngineTypes.push_back(bcsEngineType); } size_t countBcsEngines() const { return std::count_if(bcsEngines.begin(), bcsEngines.end(), [](const EngineControl *engine) { return engine != nullptr; }); } void setProfilingEnabled() { commandQueueProperties |= CL_QUEUE_PROFILING_ENABLE; } void setOoqEnabled() { commandQueueProperties |= CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; } MockCommandQueue() : CommandQueue(nullptr, nullptr, 0, false) {} MockCommandQueue(Context &context) : MockCommandQueue(&context, context.getDevice(0), nullptr, false) {} MockCommandQueue(Context *context, ClDevice *device, const cl_queue_properties *props, bool internalUsage) : CommandQueue(context, device, props, internalUsage) { } LinearStream &getCS(size_t minRequiredSize) override { requestedCmdStreamSize = minRequiredSize; return CommandQueue::getCS(minRequiredSize); } void releaseIndirectHeap(IndirectHeap::Type heap) override { releaseIndirectHeapCalled = true; CommandQueue::releaseIndirectHeap(heap); } cl_int enqueueWriteBuffer(Buffer *buffer, cl_bool blockingWrite, size_t offset, size_t size, const void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { writeBufferCounter++; writeBufferBlocking = (CL_TRUE == blockingWrite); writeBufferOffset = offset; writeBufferSize = size; writeBufferPtr = const_cast(ptr); writeMapAllocation = mapAllocation; return writeBufferRetValue; } WaitStatus waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override { latestTaskCountWaited = gpgpuTaskCountToWait; waitUntilCompleteCalledCount++; if (waitUntilCompleteReturnValue.has_value()) { return *waitUntilCompleteReturnValue; } return CommandQueue::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, cleanTemporaryAllocationList, skipWait); } WaitStatus waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override { latestTaskCountWaited = gpgpuTaskCountToWait; return CommandQueue::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep); } cl_int enqueueCopyImage(Image *srcImage, Image *dstImage, const size_t *srcOrigin, const size_t *dstOrigin, const size_t *region, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } cl_int enqueueFillImage(Image *image, const void *fillColor, const size_t *origin, const size_t *region, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } cl_int enqueueFillBuffer(Buffer *buffer, const void *pattern, size_t patternSize, size_t offset, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } cl_int enqueueKernel(Kernel *kernel, cl_uint workDim, const size_t *globalWorkOffset, const size_t *globalWorkSize, const size_t *localWorkSize, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } cl_int enqueueBarrierWithWaitList(cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } cl_int enqueueSVMMap(cl_bool blockingMap, cl_map_flags mapFlags, void *svmPtr, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool externalAppCall) override { return CL_SUCCESS; } cl_int enqueueSVMUnmap(void *svmPtr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool externalAppCall) override { return CL_SUCCESS; } cl_int enqueueSVMFree(cl_uint numSvmPointers, void *svmPointers[], void(CL_CALLBACK *pfnFreeFunc)(cl_command_queue queue, cl_uint numSvmPointers, void *svmPointers[], void *userData), void *userData, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } cl_int enqueueSVMMemcpy(cl_bool blockingCopy, void *dstPtr, const void *srcPtr, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } cl_int enqueueSVMMemFill(void *svmPtr, const void *pattern, size_t patternSize, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } cl_int enqueueMarkerWithWaitList(cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } cl_int enqueueMigrateMemObjects(cl_uint numMemObjects, const cl_mem *memObjects, cl_mem_migration_flags flags, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } cl_int enqueueSVMMigrateMem(cl_uint numSvmPointers, const void **svmPointers, const size_t *sizes, const cl_mem_migration_flags flags, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } cl_int enqueueCopyBuffer(Buffer *srcBuffer, Buffer *dstBuffer, size_t srcOffset, size_t dstOffset, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } cl_int enqueueReadBuffer(Buffer *buffer, cl_bool blockingRead, size_t offset, size_t size, void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } cl_int enqueueReadImage(Image *srcImage, cl_bool blockingRead, const size_t *origin, const size_t *region, size_t rowPitch, size_t slicePitch, void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } cl_int enqueueWriteImage(Image *dstImage, cl_bool blockingWrite, const size_t *origin, const size_t *region, size_t inputRowPitch, size_t inputSlicePitch, const void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } cl_int enqueueCopyBufferRect(Buffer *srcBuffer, Buffer *dstBuffer, const size_t *srcOrigin, const size_t *dstOrigin, const size_t *region, size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } cl_int enqueueWriteBufferRect(Buffer *buffer, cl_bool blockingWrite, const size_t *bufferOrigin, const size_t *hostOrigin, const size_t *region, size_t bufferRowPitch, size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch, const void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } cl_int enqueueReadBufferRect(Buffer *buffer, cl_bool blockingRead, const size_t *bufferOrigin, const size_t *hostOrigin, const size_t *region, size_t bufferRowPitch, size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch, void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } cl_int enqueueCopyBufferToImage(Buffer *srcBuffer, Image *dstImage, size_t srcOffset, const size_t *dstOrigin, const size_t *region, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } cl_int enqueueCopyImageToBuffer(Image *srcImage, Buffer *dstBuffer, const size_t *srcOrigin, const size_t *region, size_t dstOffset, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } cl_int enqueueResourceBarrier(BarrierCommand *resourceBarrier, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } cl_int finish() override { return CL_SUCCESS; } cl_int flush() override { return CL_SUCCESS; } bool obtainTimestampPacketForCacheFlush(bool isCacheFlushRequired) const override { return isCacheFlushRequired; } bool waitForTimestamps(uint32_t taskCount) override { return false; }; bool releaseIndirectHeapCalled = false; cl_int writeBufferRetValue = CL_SUCCESS; uint32_t writeBufferCounter = 0; bool writeBufferBlocking = false; size_t writeBufferOffset = 0; size_t writeBufferSize = 0; void *writeBufferPtr = nullptr; size_t requestedCmdStreamSize = 0; GraphicsAllocation *writeMapAllocation = nullptr; std::atomic latestTaskCountWaited{std::numeric_limits::max()}; std::optional waitUntilCompleteReturnValue{}; int waitUntilCompleteCalledCount{0}; }; template class MockCommandQueueHw : public CommandQueueHw { using BaseClass = CommandQueueHw; public: using BaseClass::bcsEngines; using BaseClass::bcsStates; using BaseClass::blitEnqueueAllowed; using BaseClass::commandQueueProperties; using BaseClass::commandStream; using BaseClass::deferredTimestampPackets; using BaseClass::gpgpuEngine; using BaseClass::isBlitAuxTranslationRequired; using BaseClass::latestSentEnqueueType; using BaseClass::obtainCommandStream; using BaseClass::obtainNewTimestampPacketNodes; using BaseClass::requiresCacheFlushAfterWalker; using BaseClass::throttle; using BaseClass::timestampPacketContainer; void clearBcsStates() { CopyEngineState unusedState{}; std::fill(bcsStates.begin(), bcsStates.end(), unusedState); } MockCommandQueueHw(Context *context, ClDevice *device, cl_queue_properties *properties) : BaseClass(context, device, properties, false) { } void clearBcsEngines() { std::fill(bcsEngines.begin(), bcsEngines.end(), nullptr); } cl_int flush() override { flushCalled = true; return BaseClass::flush(); } void setOoqEnabled() { commandQueueProperties |= CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; } void setProfilingEnabled() { commandQueueProperties |= CL_QUEUE_PROFILING_ENABLE; } LinearStream &getCS(size_t minRequiredSize) override { requestedCmdStreamSize = minRequiredSize; return CommandQueue::getCS(minRequiredSize); } UltCommandStreamReceiver &getUltCommandStreamReceiver() { return reinterpret_cast &>(*BaseClass::gpgpuEngine->commandStreamReceiver); } cl_int enqueueWriteImage(Image *dstImage, cl_bool blockingWrite, const size_t *origin, const size_t *region, size_t inputRowPitch, size_t inputSlicePitch, const void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { EnqueueWriteImageCounter++; return BaseClass::enqueueWriteImage(dstImage, blockingWrite, origin, region, inputRowPitch, inputSlicePitch, ptr, mapAllocation, numEventsInWaitList, eventWaitList, event); } void *cpuDataTransferHandler(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &retVal) override { cpuDataTransferHandlerCalled = true; return BaseClass::cpuDataTransferHandler(transferProperties, eventsRequest, retVal); } cl_int enqueueWriteBuffer(Buffer *buffer, cl_bool blockingWrite, size_t offset, size_t size, const void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { EnqueueWriteBufferCounter++; blockingWriteBuffer = blockingWrite == CL_TRUE; return BaseClass::enqueueWriteBuffer(buffer, blockingWrite, offset, size, ptr, mapAllocation, numEventsInWaitList, eventWaitList, event); } void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &dispatchInfo) override { kernelParams = dispatchInfo.peekBuiltinOpParams(); lastCommandType = commandType; for (auto &di : dispatchInfo) { lastEnqueuedKernels.push_back(di.getKernel()); if (storeMultiDispatchInfo) { storedMultiDispatchInfo.push(di); } } } void notifyEnqueueReadBuffer(Buffer *buffer, bool blockingRead, bool notifyBcsCsr) override { notifyEnqueueReadBufferCalled = true; useBcsCsrOnNotifyEnabled = notifyBcsCsr; } void notifyEnqueueReadImage(Image *image, bool blockingRead, bool notifyBcsCsr) override { notifyEnqueueReadImageCalled = true; useBcsCsrOnNotifyEnabled = notifyBcsCsr; } void notifyEnqueueSVMMemcpy(GraphicsAllocation *gfxAllocation, bool blockingCopy, bool notifyBcsCsr) override { notifyEnqueueSVMMemcpyCalled = true; useBcsCsrOnNotifyEnabled = notifyBcsCsr; } WaitStatus waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override { latestTaskCountWaited = gpgpuTaskCountToWait; return BaseClass::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, cleanTemporaryAllocationList, skipWait); } WaitStatus waitForAllEngines(bool blockedQueue, PrintfHandler *printfHandler, bool cleanTemporaryAllocationsList) override { waitForAllEnginesCalledCount++; if (waitForAllEnginesReturnValue.has_value()) { return *waitForAllEnginesReturnValue; } return BaseClass::waitForAllEngines(blockedQueue, printfHandler, cleanTemporaryAllocationsList); } bool isCacheFlushForBcsRequired() const override { if (overrideIsCacheFlushForBcsRequired.enabled) { return overrideIsCacheFlushForBcsRequired.returnValue; } return BaseClass::isCacheFlushForBcsRequired(); } bool blitEnqueueImageAllowed(const size_t *origin, const size_t *region, const Image &image) const override { isBlitEnqueueImageAllowed = BaseClass::blitEnqueueImageAllowed(origin, region, image); return isBlitEnqueueImageAllowed; } unsigned int lastCommandType; std::vector lastEnqueuedKernels; MultiDispatchInfo storedMultiDispatchInfo; size_t EnqueueWriteImageCounter = 0; size_t EnqueueWriteBufferCounter = 0; size_t requestedCmdStreamSize = 0; bool blockingWriteBuffer = false; bool storeMultiDispatchInfo = false; bool notifyEnqueueReadBufferCalled = false; bool notifyEnqueueReadImageCalled = false; bool notifyEnqueueSVMMemcpyCalled = false; bool cpuDataTransferHandlerCalled = false; bool useBcsCsrOnNotifyEnabled = false; mutable bool isBlitEnqueueImageAllowed = false; struct OverrideReturnValue { bool enabled = false; bool returnValue = false; } overrideIsCacheFlushForBcsRequired; BuiltinOpParams kernelParams; std::atomic latestTaskCountWaited{std::numeric_limits::max()}; bool flushCalled = false; std::optional waitForAllEnginesReturnValue{}; int waitForAllEnginesCalledCount{0}; LinearStream *peekCommandStream() { return this->commandStream; } }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/mocks/mock_context.cpp000066400000000000000000000177321422164147700261560ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/mocks/mock_context.h" #include "shared/source/built_ins/built_ins.h" #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/memory_manager/deferred_deleter.h" #include "shared/source/memory_manager/os_agnostic_memory_manager.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/test/common/helpers/engine_descriptor_helper.h" #include "shared/test/common/mocks/mock_svm_manager.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/sharings/sharing.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "d3d_sharing_functions.h" namespace NEO { MockContext::MockContext(ClDevice *pDevice, bool noSpecialQueue) { cl_device_id deviceId = pDevice; initializeWithDevices(ClDeviceVector{&deviceId, 1}, noSpecialQueue); } MockContext::MockContext(const ClDeviceVector &clDeviceVector, bool noSpecialQueue) { initializeWithDevices(clDeviceVector, noSpecialQueue); } MockContext::MockContext( void(CL_CALLBACK *funcNotify)(const char *, const void *, size_t, void *), void *data) { pDevice = nullptr; properties = nullptr; numProperties = 0; contextCallback = funcNotify; userData = data; memoryManager = nullptr; driverDiagnostics = nullptr; rootDeviceIndices = {}; maxRootDeviceIndex = std::numeric_limits::max(); deviceBitfields = {}; } MockContext::~MockContext() { for (auto &rootDeviceIndex : rootDeviceIndices) { if (specialQueues[rootDeviceIndex]) { specialQueues[rootDeviceIndex]->release(); specialQueues[rootDeviceIndex] = nullptr; } } if (memoryManager && memoryManager->isAsyncDeleterEnabled()) { memoryManager->getDeferredDeleter()->removeClient(); } memoryManager = nullptr; } MockContext::MockContext() { pDevice = new MockClDevice{MockClDevice::createWithNewExecutionEnvironment(nullptr)}; cl_device_id deviceId = pDevice; initializeWithDevices(ClDeviceVector{&deviceId, 1}, false); pDevice->decRefInternal(); } void MockContext::setSharingFunctions(SharingFunctions *sharingFunctions) { this->sharingFunctions[sharingFunctions->getId()].reset(sharingFunctions); } void MockContext::releaseSharingFunctions(SharingType sharing) { this->sharingFunctions[sharing].release(); } void MockContext::resetSharingFunctions(SharingType sharing) { this->sharingFunctions[sharing].reset(); } void MockContext::registerSharingWithId(SharingFunctions *sharing, SharingType sharingId) { this->sharingFunctions[sharingId].reset(sharing); } void MockContext::clearSharingFunctions() { std::vectorsharingFunctions)::value_type> v; this->sharingFunctions.swap(v); } std::unique_ptr &MockContext::getAsyncEventsHandlerUniquePtr() { return static_cast(devices[0]->getExecutionEnvironment())->asyncEventsHandler; } void MockContext::initializeWithDevices(const ClDeviceVector &devices, bool noSpecialQueue) { for (auto &pClDevice : devices) { pClDevice->incRefInternal(); rootDeviceIndices.insert(pClDevice->getRootDeviceIndex()); } maxRootDeviceIndex = *std::max_element(rootDeviceIndices.begin(), rootDeviceIndices.end(), std::less()); specialQueues.resize(maxRootDeviceIndex + 1u); this->devices = devices; memoryManager = devices[0]->getMemoryManager(); svmAllocsManager = new MockSVMAllocsManager(memoryManager, true); for (auto &rootDeviceIndex : rootDeviceIndices) { DeviceBitfield deviceBitfield{}; for (const auto &pDevice : devices) { if (pDevice->getRootDeviceIndex() == rootDeviceIndex) { deviceBitfield |= pDevice->getDeviceBitfield(); } } deviceBitfields.insert({rootDeviceIndex, deviceBitfield}); } cl_int retVal; if (!noSpecialQueue) { for (auto &device : devices) { if (!specialQueues[device->getRootDeviceIndex()]) { auto commandQueue = CommandQueue::create(this, device, nullptr, false, retVal); assert(retVal == CL_SUCCESS); overrideSpecialQueueAndDecrementRefCount(commandQueue, device->getRootDeviceIndex()); } } } setupContextType(); } MockDefaultContext::MockDefaultContext() : MockDefaultContext(false) {} MockDefaultContext::MockDefaultContext(bool initSpecialQueues) : MockContext(nullptr, nullptr) { pRootDevice0 = ultClDeviceFactory.rootDevices[0]; pRootDevice1 = ultClDeviceFactory.rootDevices[1]; pRootDevice2 = ultClDeviceFactory.rootDevices[2]; cl_device_id deviceIds[] = {pRootDevice0, pRootDevice1, pRootDevice2}; initializeWithDevices(ClDeviceVector{deviceIds, 3}, !initSpecialQueues); } MockSpecializedContext::MockSpecializedContext() : MockContext(nullptr, nullptr) { pRootDevice = ultClDeviceFactory.rootDevices[0]; pSubDevice0 = ultClDeviceFactory.subDevices[0]; pSubDevice1 = ultClDeviceFactory.subDevices[1]; cl_device_id deviceIds[] = {pSubDevice0, pSubDevice1}; initializeWithDevices(ClDeviceVector{deviceIds, 2}, true); } MockUnrestrictiveContext::MockUnrestrictiveContext() : MockContext(nullptr, nullptr) { pRootDevice = ultClDeviceFactory.rootDevices[0]; pSubDevice0 = ultClDeviceFactory.subDevices[0]; pSubDevice1 = ultClDeviceFactory.subDevices[1]; cl_device_id deviceIds[] = {pRootDevice, pSubDevice0, pSubDevice1}; initializeWithDevices(ClDeviceVector{deviceIds, 3}, true); } MockUnrestrictiveContextMultiGPU::MockUnrestrictiveContextMultiGPU() : MockContext(nullptr, nullptr) { pRootDevice0 = ultClDeviceFactory.rootDevices[0]; pSubDevice00 = ultClDeviceFactory.subDevices[0]; pSubDevice01 = ultClDeviceFactory.subDevices[1]; pRootDevice1 = ultClDeviceFactory.rootDevices[1]; pSubDevice10 = ultClDeviceFactory.subDevices[2]; pSubDevice11 = ultClDeviceFactory.subDevices[3]; cl_device_id deviceIds[] = {pRootDevice0, pSubDevice00, pSubDevice01, pRootDevice1, pSubDevice10, pSubDevice11}; initializeWithDevices(ClDeviceVector{deviceIds, 6}, true); } BcsMockContext::BcsMockContext(ClDevice *device) : MockContext(device) { bcsOsContext.reset(OsContext::create(nullptr, 0, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular}, device->getDeviceBitfield()))); bcsCsr.reset(createCommandStream(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield())); bcsCsr->setupContext(*bcsOsContext); bcsCsr->initializeTagAllocation(); bcsCsr->createGlobalFenceAllocation(); auto mockBlitMemoryToAllocation = [this](const Device &device, GraphicsAllocation *memory, size_t offset, const void *hostPtr, Vec3 size) -> BlitOperationResult { auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::HostPtrToBuffer, *bcsCsr, memory, nullptr, hostPtr, memory->getGpuAddress(), 0, 0, 0, size, 0, 0, 0, 0); BlitPropertiesContainer container; container.push_back(blitProperties); bcsCsr->flushBcsTask(container, true, false, const_cast(device)); return BlitOperationResult::Success; }; blitMemoryToAllocationFuncBackup = mockBlitMemoryToAllocation; } BcsMockContext::~BcsMockContext() = default; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/mocks/mock_context.h000066400000000000000000000063531422164147700256200ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/blit_commands_helper.h" #include "shared/test/common/helpers/variable_backup.h" #include "opencl/source/context/context.h" #include "opencl/source/sharings/sharing_factory.h" #include "opencl/test/unit_test/mocks/ult_cl_device_factory.h" #include namespace NEO { class AsyncEventsHandler; class OsContext; class MockContext : public Context { public: using Context::contextType; using Context::deviceBitfields; using Context::devices; using Context::driverDiagnostics; using Context::maxRootDeviceIndex; using Context::memoryManager; using Context::preferD3dSharedResources; using Context::resolvesRequiredInKernels; using Context::rootDeviceIndices; using Context::setupContextType; using Context::sharingFunctions; using Context::specialQueues; using Context::svmAllocsManager; MockContext(ClDevice *pDevice, bool noSpecialQueue = false); MockContext(const ClDeviceVector &clDeviceVector, bool noSpecialQueue = true); MockContext( void(CL_CALLBACK *funcNotify)(const char *, const void *, size_t, void *), void *data); MockContext(); ~MockContext() override; void clearSharingFunctions(); void setSharingFunctions(SharingFunctions *sharingFunctions); void releaseSharingFunctions(SharingType sharing); void resetSharingFunctions(SharingType sharing); void registerSharingWithId(SharingFunctions *sharing, SharingType sharingId); std::unique_ptr &getAsyncEventsHandlerUniquePtr(); void initializeWithDevices(const ClDeviceVector &devices, bool noSpecialQueue); private: ClDevice *pDevice = nullptr; }; struct MockDefaultContext : MockContext { MockDefaultContext(); MockDefaultContext(bool initSpecialQueues); UltClDeviceFactory ultClDeviceFactory{3, 0}; MockClDevice *pRootDevice0; MockClDevice *pRootDevice1; MockClDevice *pRootDevice2; }; struct MockSpecializedContext : MockContext { MockSpecializedContext(); UltClDeviceFactory ultClDeviceFactory{1, 2}; MockClDevice *pRootDevice; ClDevice *pSubDevice0 = nullptr; ClDevice *pSubDevice1 = nullptr; }; struct MockUnrestrictiveContext : MockContext { MockUnrestrictiveContext(); UltClDeviceFactory ultClDeviceFactory{1, 2}; MockClDevice *pRootDevice; ClDevice *pSubDevice0 = nullptr; ClDevice *pSubDevice1 = nullptr; }; struct MockUnrestrictiveContextMultiGPU : MockContext { MockUnrestrictiveContextMultiGPU(); UltClDeviceFactory ultClDeviceFactory{2, 2}; MockClDevice *pRootDevice0; ClDevice *pSubDevice00 = nullptr; ClDevice *pSubDevice01 = nullptr; MockClDevice *pRootDevice1; ClDevice *pSubDevice10 = nullptr; ClDevice *pSubDevice11 = nullptr; }; class BcsMockContext : public MockContext { public: BcsMockContext(ClDevice *device); ~BcsMockContext() override; std::unique_ptr bcsOsContext; std::unique_ptr bcsCsr; VariableBackup blitMemoryToAllocationFuncBackup{ &BlitHelperFunctions::blitMemoryToAllocation}; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/mocks/mock_d3d_objects.h000066400000000000000000000255561422164147700263250ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/common/test_macros/mock_method_macros.h" #include "opencl/source/sharings/d3d/d3d_sharing.h" #include "gtest/gtest.h" namespace NEO { template class MockD3DSharingFunctions : public D3DSharingFunctions { typedef typename D3D::D3DDevice D3DDevice; typedef typename D3D::D3DQuery D3DQuery; typedef typename D3D::D3DQueryDesc D3DQueryDesc; typedef typename D3D::D3DResource D3DResource; typedef typename D3D::D3DBufferDesc D3DBufferDesc; typedef typename D3D::D3DBufferObj D3DBufferObj; typedef typename D3D::D3DTexture2dDesc D3DTexture2dDesc; typedef typename D3D::D3DTexture3dDesc D3DTexture3dDesc; typedef typename D3D::D3DTexture2d D3DTexture2d; typedef typename D3D::D3DTexture3d D3DTexture3d; public: MockD3DSharingFunctions() : D3DSharingFunctions((D3DDevice *)1) { memset(&mockDxgiDesc, 0, sizeof(DXGI_ADAPTER_DESC)); mockDxgiDesc.VendorId = INTEL_VENDOR_ID; getDxgiDescFcn = &this->mockGetDxgiDesc; getDxgiDescCalled = 0; getDxgiDescAdapterRequested = nullptr; } void createQuery(D3DQuery **query) override { createQueryCalled++; if (createQuerySetParams) { *query = createQueryParamsSet.query; } } struct CreateQueryParams { D3DQuery *query{}; }; uint32_t createQueryCalled = 0u; CreateQueryParams createQueryParamsSet{}; bool createQuerySetParams = false; void createBuffer(D3DBufferObj **buffer, unsigned int width) override { createBufferCalled++; if (createBufferSetParams) { *buffer = createBufferParamsSet.buffer; } } struct CreateBufferParams { D3DBufferObj *buffer{}; unsigned int width{}; }; uint32_t createBufferCalled = 0u; CreateBufferParams createBufferParamsSet{}; bool createBufferSetParams = false; void createTexture2d(D3DTexture2d **texture, D3DTexture2dDesc *desc, cl_uint subresource) override { createTexture2dCalled++; if (createTexture2dSetParams) { *texture = createTexture2dParamsSet.texture; } } struct CreateTexture2dParams { D3DTexture2d *texture{}; D3DTexture2dDesc *desc{}; cl_uint subresource{}; }; uint32_t createTexture2dCalled = 0u; CreateTexture2dParams createTexture2dParamsSet{}; bool createTexture2dSetParams = false; void createTexture3d(D3DTexture3d **texture, D3DTexture3dDesc *desc, cl_uint subresource) override { createTexture3dCalled++; if (createTexture3dSetParams) { *texture = createTexture3dParamsSet.texture; } } struct CreateTexture3dParams { D3DTexture3d *texture{}; D3DTexture3dDesc *desc{}; cl_uint subresource{}; }; uint32_t createTexture3dCalled = 0u; CreateTexture3dParams createTexture3dParamsSet{}; bool createTexture3dSetParams = false; void getBufferDesc(D3DBufferDesc *bufferDesc, D3DBufferObj *buffer) override { getBufferDescCalled++; if (getBufferDescSetParams) { *bufferDesc = getBufferDescParamsSet.bufferDesc; } } struct GetBufferDescParams { D3DBufferDesc bufferDesc{}; D3DBufferObj *buffer{}; }; uint32_t getBufferDescCalled = 0u; GetBufferDescParams getBufferDescParamsSet{}; bool getBufferDescSetParams = false; void getTexture2dDesc(D3DTexture2dDesc *textureDesc, D3DTexture2d *texture) override { getTexture2dDescCalled++; if (getTexture2dDescSetParams) { *textureDesc = getTexture2dDescParamsSet.textureDesc; } } struct GetTexture2dDescParams { D3DTexture2dDesc textureDesc{}; D3DTexture2d *texture{}; }; uint32_t getTexture2dDescCalled = 0u; GetTexture2dDescParams getTexture2dDescParamsSet{}; bool getTexture2dDescSetParams = false; void getTexture3dDesc(D3DTexture3dDesc *textureDesc, D3DTexture3d *texture) override { getTexture3dDescCalled++; if (getTexture3dDescSetParams) { *textureDesc = getTexture3dDescParamsSet.textureDesc; } } struct GetTexture3dDescParams { D3DTexture3dDesc textureDesc{}; D3DTexture3d *texture{}; }; uint32_t getTexture3dDescCalled = 0u; GetTexture3dDescParams getTexture3dDescParamsSet{}; bool getTexture3dDescSetParams = false; void getSharedHandle(D3DResource *resource, void **handle) override { getSharedHandleCalled++; getSharedHandleParamsPassed.push_back({resource, handle}); } struct GetSharedHandleParams { D3DResource *resource{}; void **handle{}; }; uint32_t getSharedHandleCalled = 0u; StackVec getSharedHandleParamsPassed{}; void addRef(D3DResource *resource) override { addRefCalled++; addRefParamsPassed.push_back({resource}); } struct AddRefParams { D3DResource *resource{}; }; uint32_t addRefCalled = 0u; StackVec addRefParamsPassed{}; void release(IUnknown *resource) override { releaseCalled++; releaseParamsPassed.push_back({resource}); } struct ReleaseParams { IUnknown *resource{}; }; uint32_t releaseCalled = 0u; StackVec releaseParamsPassed{}; void copySubresourceRegion(D3DResource *dst, cl_uint dstSubresource, D3DResource *src, cl_uint srcSubresource) override { copySubresourceRegionCalled++; copySubresourceRegionParamsPassed.push_back({dst, dstSubresource, src, srcSubresource}); } struct CopySubresourceRegionParams { D3DResource *dst{}; cl_uint dstSubresource{}; D3DResource *src{}; cl_uint srcSubresource{}; }; uint32_t copySubresourceRegionCalled = 0u; StackVec copySubresourceRegionParamsPassed{}; void lockRect(D3DTexture2d *d3dResource, D3DLOCKED_RECT *lockedRect, uint32_t flags) override { lockRectCalled++; if (lockRectSetParams) { *lockedRect = lockRectParamsSet.lockedRect; } lockRectParamsPassed.push_back({d3dResource, *lockedRect, flags}); } struct LockRectParams { D3DTexture2d *d3dResource{}; D3DLOCKED_RECT lockedRect{}; uint32_t flags{}; }; uint32_t lockRectCalled = 0u; LockRectParams lockRectParamsSet{}; bool lockRectSetParams = false; StackVec lockRectParamsPassed{}; void unlockRect(D3DTexture2d *d3dResource) override { unlockRectCalled++; unlockRectParamsPassed.push_back({d3dResource}); } struct UnlockRectParams { D3DTexture2d *d3dResource{}; }; uint32_t unlockRectCalled = 0u; StackVec unlockRectParamsPassed{}; void getRenderTargetData(D3DTexture2d *renderTarget, D3DTexture2d *dstSurface) override { getRenderTargetDataCalled++; getRenderTargetDataParamsPassed.push_back({renderTarget, dstSurface}); } struct GetRenderTargetDataParams { D3DTexture2d *renderTarget{}; D3DTexture2d *dstSurface{}; }; uint32_t getRenderTargetDataCalled = 0u; StackVec getRenderTargetDataParamsPassed{}; void updateSurface(D3DTexture2d *renderTarget, D3DTexture2d *dstSurface) override { updateSurfaceCalled++; updateSurfaceParamsPassed.push_back({renderTarget, dstSurface}); } struct UpdateSurfaceParams { D3DTexture2d *renderTarget{}; D3DTexture2d *dstSurface{}; }; uint32_t updateSurfaceCalled = 0u; StackVec updateSurfaceParamsPassed{}; void updateDevice(D3DResource *resource) override { updateDeviceCalled++; updateDeviceParamsPassed.push_back({resource}); } struct UpdateDeviceParams { D3DResource *resource{}; }; uint32_t updateDeviceCalled = 0u; StackVec updateDeviceParamsPassed{}; bool checkFormatSupport(DXGI_FORMAT format, UINT *pFormat) override { checkFormatSupportCalled++; if (nullptr == pFormat) { return false; } if (checkFormatSupportSetParam0) { format = checkFormatSupportParamsSet.format; } if (checkFormatSupportSetParam1) { *pFormat = checkFormatSupportParamsSet.pFormat; } if (checkUnsupportedDXGIformats) { auto iter = std::find(unsupportedDXGIformats.begin(), unsupportedDXGIformats.end(), format); if (iter != unsupportedDXGIformats.end()) { *pFormat = {}; return false; } return true; } return checkFormatSupportResult; } struct CheckFormatSupportParams { DXGI_FORMAT format{}; UINT pFormat{}; }; uint32_t checkFormatSupportCalled = 0u; CheckFormatSupportParams checkFormatSupportParamsSet{}; bool checkFormatSupportResult = true; bool checkFormatSupportSetParam0 = false; bool checkFormatSupportSetParam1 = false; bool checkUnsupportedDXGIformats = false; std::vector unsupportedDXGIformats{}; cl_int validateFormatSupport(DXGI_FORMAT format, cl_mem_object_type type) override { validateFormatSupportCalled++; if (callBaseValidateFormatSupport) { validateFormatSupportResult = validateFormatSupportBase(format, type); } return validateFormatSupportResult; } uint32_t validateFormatSupportCalled = 0u; cl_int validateFormatSupportResult = CL_SUCCESS; bool callBaseValidateFormatSupport = false; cl_int validateFormatSupportBase(DXGI_FORMAT format, cl_mem_object_type type) { return D3DSharingFunctions::validateFormatSupport(format, type); } std::vector> *getTrackedResourcesVector() { return &this->trackedResources; } D3DBufferDesc mockBufferDesc = {}; D3DTexture2dDesc mockTexture2dDesc = {}; D3DTexture3dDesc mockTexture3dDesc = {}; static DXGI_ADAPTER_DESC mockDxgiDesc; static IDXGIAdapter *getDxgiDescAdapterRequested; static uint32_t getDxgiDescCalled; static void mockGetDxgiDesc(DXGI_ADAPTER_DESC *dxgiDesc, IDXGIAdapter *adapter, D3DDevice *device) { getDxgiDescCalled++; getDxgiDescAdapterRequested = adapter; *dxgiDesc = mockDxgiDesc; } ADDMETHOD_NOBASE(memObjectFormatSupport, bool, true, (cl_mem_object_type object, UINT format)); ADDMETHOD_NOBASE_VOIDRETURN(getSharedNTHandle, (D3DResource * resource, void **handle)); ADDMETHOD_NOBASE_VOIDRETURN(getDeviceContext, (D3DQuery * query)); ADDMETHOD_NOBASE_VOIDRETURN(releaseDeviceContext, (D3DQuery * query)); ADDMETHOD_NOBASE_VOIDRETURN(flushAndWait, (D3DQuery * query)); }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/mocks/mock_event.h000066400000000000000000000040651422164147700252530ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/wait_status.h" #include "opencl/source/event/event_builder.h" #include "opencl/source/event/user_event.h" #include namespace NEO { #define FORWARD_CONSTRUCTOR(THIS_CLASS, BASE_CLASS) \ template \ THIS_CLASS(ArgsT &&...args) : BASE_CLASS(std::forward(args)...) { \ } #define FORWARD_FUNC(FUNC_NAME, BASE_CLASS) \ template \ void FUNC_NAME(ArgsT &&...args) { \ BASE_CLASS::FUNC_NAME(std::forward(args)...); \ } template struct MockEvent : public BaseEventType { FORWARD_CONSTRUCTOR(MockEvent, BaseEventType); // make some protected members public : FORWARD_FUNC(submitCommand, BaseEventType); using BaseEventType::timeStampNode; using Event::calcProfilingData; using Event::calculateSubmitTimestampData; using Event::magic; using Event::queueTimeStamp; using Event::submitTimeStamp; using Event::timestampPacketContainer; WaitStatus wait(bool blocking, bool useQuickKmdSleep) override { if (waitReturnValue.has_value()) { return *waitReturnValue; } return BaseEventType::wait(blocking, useQuickKmdSleep); } std::optional waitReturnValue{}; }; #undef FORWARD_CONSTRUCTOR #undef FORWARD_FUNC struct MockEventBuilder : EventBuilder { MockEventBuilder() = default; MockEventBuilder(Event *ev) { setEvent(ev); } void setEvent(Event *ev) { this->event = ev; } template static EventType *createAndFinalize(ArgsT &&...args) { MockEventBuilder mb; mb.create(std::forward(args)...); return static_cast(mb.finalizeAndRelease()); } }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/mocks/mock_gmm_resource_info_ocl.cpp000066400000000000000000000035411422164147700310220ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/mocks/mock_gmm_resource_info.h" #include "opencl/source/helpers/surface_formats.h" #include "gtest/gtest.h" namespace NEO { void MockGmmResourceInfo::setSurfaceFormat() { auto iterate = [&](ArrayRef formats) { if (!surfaceFormatInfo) { for (auto &format : formats) { if (mockResourceCreateParams.Format == format.surfaceFormat.GMMSurfaceFormat) { surfaceFormatInfo = &format.surfaceFormat; break; } } } }; if (mockResourceCreateParams.Format == GMM_RESOURCE_FORMAT::GMM_FORMAT_P010 || mockResourceCreateParams.Format == GMM_RESOURCE_FORMAT::GMM_FORMAT_P016) { tempSurface.GMMSurfaceFormat = mockResourceCreateParams.Format; tempSurface.NumChannels = 1; tempSurface.ImageElementSizeInBytes = 16; tempSurface.PerChannelSizeInBytes = 16; surfaceFormatInfo = &tempSurface; } if (mockResourceCreateParams.Format == GMM_RESOURCE_FORMAT::GMM_FORMAT_RGBP) { tempSurface.GMMSurfaceFormat = GMM_RESOURCE_FORMAT::GMM_FORMAT_RGBP; tempSurface.NumChannels = 1; tempSurface.ImageElementSizeInBytes = 8; tempSurface.PerChannelSizeInBytes = 8; surfaceFormatInfo = &tempSurface; } iterate(SurfaceFormats::readOnly12()); iterate(SurfaceFormats::readOnly20()); iterate(SurfaceFormats::writeOnly()); iterate(SurfaceFormats::readWrite()); iterate(SurfaceFormats::packedYuv()); iterate(SurfaceFormats::planarYuv()); iterate(SurfaceFormats::packed()); iterate(SurfaceFormats::readOnlyDepth()); iterate(SurfaceFormats::readWriteDepth()); ASSERT_NE(nullptr, surfaceFormatInfo); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/mocks/mock_image.h000066400000000000000000000031511422164147700252070ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "opencl/source/mem_obj/image.h" #include "memory_properties_flags.h" namespace NEO { struct MockImageBase : public Image { using Image::imageDesc; using Image::imageFormat; MockGraphicsAllocation *graphicsAllocation = nullptr; MockImageBase(uint32_t rootDeviceIndex) : Image(nullptr, MemoryProperties(), cl_mem_flags{}, 0, 0, nullptr, nullptr, cl_image_format{}, cl_image_desc{}, false, GraphicsAllocationHelper::toMultiGraphicsAllocation(new MockGraphicsAllocation(rootDeviceIndex, nullptr, 0)), false, 0, 0, ClSurfaceFormatInfo{}, nullptr), graphicsAllocation(static_cast(multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex))) { } MockImageBase() : MockImageBase(0u) {} ~MockImageBase() override { delete this->graphicsAllocation; } MockGraphicsAllocation *getAllocation() { return graphicsAllocation; } void setImageArg(void *memory, bool isMediaBlockImage, uint32_t mipLevel, uint32_t rootDeviceIndex, bool useGlobalAtomics) override {} void setMediaImageArg(void *memory, uint32_t rootDeviceIndex) override {} void setMediaSurfaceRotation(void *memory) override {} void setSurfaceMemoryObjectControlState(void *memory, uint32_t value) override {} void transformImage2dArrayTo3d(void *memory) override {} void transformImage3dTo2dArray(void *memory) override {} }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/mocks/mock_kernel.cpp000066400000000000000000000027141422164147700257440ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/source/program/printf_handler.h" namespace NEO { const uint32_t MockDebugKernel::perThreadSystemThreadSurfaceSize = 0x100; const KernelInfoContainer MockKernel::toKernelInfoContainer(const KernelInfo &kernelInfo, uint32_t rootDeviceIndex) { KernelInfoContainer kernelInfos; kernelInfos.resize(rootDeviceIndex + 1); kernelInfos[rootDeviceIndex] = &kernelInfo; return kernelInfos; } bool MockKernel::isPatched() const { return isPatchedOverride; } bool MockKernel::canTransformImages() const { return canKernelTransformImages; } void MockKernel::makeResident(CommandStreamReceiver &commandStreamReceiver) { makeResidentCalls++; Kernel::makeResident(commandStreamReceiver); } void MockKernel::getResidency(std::vector &dst) { getResidencyCalls++; Kernel::getResidency(dst); } bool MockKernel::requiresCacheFlushCommand(const CommandQueue &commandQueue) const { if (DebugManager.flags.EnableCacheFlushAfterWalker.get() != -1) { return !!DebugManager.flags.EnableCacheFlushAfterWalker.get(); } return false; } cl_int MockKernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocation *svmAlloc, uint32_t allocId) { ++setArgSvmAllocCalls; return Kernel::setArgSvmAlloc(argIndex, svmPtr, svmAlloc, allocId); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/mocks/mock_kernel.h000066400000000000000000000351651422164147700254170ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/device/device.h" #include "shared/source/helpers/string.h" #include "shared/source/kernel/grf_config.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/mocks/mock_kernel_info.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/kernel/kernel_objects_for_aux_translation.h" #include "opencl/source/kernel/multi_device_kernel.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include namespace NEO { using namespace iOpenCL; void populateKernelDescriptor(KernelDescriptor &dst, const SPatchExecutionEnvironment &execEnv); struct MockKernelObjForAuxTranslation : public KernelObjForAuxTranslation { MockKernelObjForAuxTranslation(Type type) : KernelObjForAuxTranslation(type, nullptr) { if (type == KernelObjForAuxTranslation::Type::MEM_OBJ) { mockBuffer.reset(new MockBuffer); this->object = mockBuffer.get(); } else { DEBUG_BREAK_IF(type != KernelObjForAuxTranslation::Type::GFX_ALLOC); mockGraphicsAllocation.reset(new MockGraphicsAllocation(nullptr, 0x100)); this->object = mockGraphicsAllocation.get(); } }; MockKernelObjForAuxTranslation(Type type, size_t size) : MockKernelObjForAuxTranslation(type) { if (type == KernelObjForAuxTranslation::Type::MEM_OBJ) { mockBuffer->getGraphicsAllocation(0)->setSize(size); } else { DEBUG_BREAK_IF(type != KernelObjForAuxTranslation::Type::GFX_ALLOC); mockGraphicsAllocation->setSize(size); } } std::unique_ptr mockBuffer = nullptr; std::unique_ptr mockGraphicsAllocation = nullptr; }; class MockMultiDeviceKernel : public MultiDeviceKernel { public: static KernelVectorType toKernelVector(Kernel *pKernel) { KernelVectorType kernelVector; kernelVector.resize(pKernel->getProgram()->getMaxRootDeviceIndex() + 1); kernelVector[pKernel->getProgram()->getDevices()[0]->getRootDeviceIndex()] = pKernel; return kernelVector; } using MultiDeviceKernel::MultiDeviceKernel; template static MockMultiDeviceKernel *create(Program *programArg, const KernelInfoContainer &kernelInfoArg) { KernelVectorType kernelVector; kernelVector.resize(programArg->getMaxRootDeviceIndex() + 1); for (auto &pDevice : programArg->getDevices()) { auto rootDeviceIndex = pDevice->getRootDeviceIndex(); if (kernelVector[rootDeviceIndex]) { continue; } kernelVector[rootDeviceIndex] = new kernel_t(programArg, *kernelInfoArg[rootDeviceIndex], *pDevice); } return new MockMultiDeviceKernel(std::move(kernelVector), kernelInfoArg); } void takeOwnership() const override { MultiDeviceKernel::takeOwnership(); takeOwnershipCalls++; } void releaseOwnership() const override { releaseOwnershipCalls++; MultiDeviceKernel::releaseOwnership(); } mutable uint32_t takeOwnershipCalls = 0; mutable uint32_t releaseOwnershipCalls = 0; }; //////////////////////////////////////////////////////////////////////////////// // Kernel - Core implementation //////////////////////////////////////////////////////////////////////////////// class MockKernel : public Kernel { public: using Kernel::addAllocationToCacheFlushVector; using Kernel::allBufferArgsStateful; using Kernel::auxTranslationRequired; using Kernel::containsStatelessWrites; using Kernel::dataParameterSimdSize; using Kernel::executionType; using Kernel::getDevice; using Kernel::getHardwareInfo; using Kernel::hasDirectStatelessAccessToHostMemory; using Kernel::hasDirectStatelessAccessToSharedBuffer; using Kernel::hasIndirectStatelessAccessToHostMemory; using Kernel::kernelArgHandlers; using Kernel::kernelArgRequiresCacheFlush; using Kernel::kernelArguments; using Kernel::KernelConfig; using Kernel::kernelHasIndirectAccess; using Kernel::kernelSubmissionMap; using Kernel::kernelSvmGfxAllocations; using Kernel::kernelUnifiedMemoryGfxAllocations; using Kernel::maxKernelWorkGroupSize; using Kernel::maxWorkGroupSizeForCrossThreadData; using Kernel::numberOfBindingTableStates; using Kernel::parentEventOffset; using Kernel::patchBufferOffset; using Kernel::patchWithImplicitSurface; using Kernel::pImplicitArgs; using Kernel::preferredWkgMultipleOffset; using Kernel::privateSurface; using Kernel::singleSubdevicePreferredInCurrentEnqueue; using Kernel::svmAllocationsRequireCacheFlush; using Kernel::threadArbitrationPolicy; using Kernel::unifiedMemoryControls; using Kernel::slmSizes; using Kernel::slmTotalSize; MockKernel(Program *programArg, const KernelInfo &kernelInfoArg, ClDevice &clDeviceArg) : Kernel(programArg, kernelInfoArg, clDeviceArg) { } ~MockKernel() override { // prevent double deletion if (crossThreadData == mockCrossThreadData.data()) { crossThreadData = nullptr; } if (kernelInfoAllocated) { delete kernelInfoAllocated; } } template static KernelType *create(Device &device, Program *program) { return create(device, program, GrfConfig::DefaultGrfNumber); } template static KernelType *create(Device &device, Program *program, uint32_t grfNumber) { auto info = new MockKernelInfo(); const size_t crossThreadSize = 160; info->setLocalIds({0, 0, 0}); info->kernelDescriptor.kernelAttributes.numGrfRequired = grfNumber; info->kernelDescriptor.kernelAttributes.simdSize = 32; info->crossThreadData = new char[crossThreadSize]; auto kernel = new KernelType(program, *info, *device.getSpecializedDevice()); kernel->crossThreadData = new char[crossThreadSize]; memset(kernel->crossThreadData, 0, crossThreadSize); kernel->crossThreadDataSize = crossThreadSize; kernel->kernelInfoAllocated = info; return kernel; } static const KernelInfoContainer toKernelInfoContainer(const KernelInfo &kernelInfo, uint32_t rootDeviceIndex); uint32_t getPatchedArgumentsNum() const { return patchedArgumentsNum; } bool isPatched() const override; bool canTransformImages() const override; //////////////////////////////////////////////////////////////////////////////// void setCrossThreadData(const void *crossThreadDataPattern, uint32_t newCrossThreadDataSize) { if ((crossThreadData != nullptr) && (crossThreadData != mockCrossThreadData.data())) { delete[] crossThreadData; crossThreadData = nullptr; crossThreadDataSize = 0; } if (crossThreadDataPattern && (newCrossThreadDataSize > 0)) { mockCrossThreadData.clear(); mockCrossThreadData.insert(mockCrossThreadData.begin(), (char *)crossThreadDataPattern, ((char *)crossThreadDataPattern) + newCrossThreadDataSize); } else { mockCrossThreadData.resize(newCrossThreadDataSize, 0); } if (newCrossThreadDataSize == 0) { crossThreadData = nullptr; crossThreadDataSize = 0; return; } crossThreadData = mockCrossThreadData.data(); crossThreadDataSize = static_cast(mockCrossThreadData.size()); } void setSshLocal(const void *sshPattern, uint32_t newSshSize) { sshLocalSize = newSshSize; if (newSshSize == 0) { pSshLocal.reset(nullptr); } else { pSshLocal = std::make_unique(newSshSize); if (sshPattern) { memcpy_s(pSshLocal.get(), newSshSize, sshPattern, newSshSize); } } } void setPrivateSurface(GraphicsAllocation *gfxAllocation, uint32_t size) { privateSurface = gfxAllocation; privateSurfaceSize = size; } void setTotalSLMSize(uint32_t size) { slmTotalSize = size; } void setKernelArguments(std::vector kernelArguments) { this->kernelArguments = kernelArguments; } KernelInfo *getAllocatedKernelInfo() { return kernelInfoAllocated; } std::vector mockCrossThreadData; std::vector mockSshLocal; void setUsingSharedArgs(bool usingSharedArgValue) { this->usingSharedObjArgs = usingSharedArgValue; } void makeResident(CommandStreamReceiver &commandStreamReceiver) override; void getResidency(std::vector &dst) override; void setSpecialPipelineSelectMode(bool value) { specialPipelineSelectMode = value; } bool requiresCacheFlushCommand(const CommandQueue &commandQueue) const override; cl_int setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocation *svmAlloc, uint32_t allocId) override; uint32_t makeResidentCalls = 0; uint32_t getResidencyCalls = 0; uint32_t setArgSvmAllocCalls = 0; bool canKernelTransformImages = true; bool isPatchedOverride = true; protected: KernelInfo *kernelInfoAllocated = nullptr; }; //class below have enough internals to service Enqueue operation. class MockKernelWithInternals { public: MockKernelWithInternals(const ClDeviceVector &deviceVector, Context *context = nullptr, bool addDefaultArg = false, SPatchExecutionEnvironment execEnv = {}) { memset(&kernelHeader, 0, sizeof(SKernelBinaryHeaderCommon)); kernelInfo.heapInfo.pKernelHeap = kernelIsa; kernelInfo.heapInfo.KernelHeapSize = sizeof(kernelIsa); kernelInfo.heapInfo.pSsh = sshLocal; kernelInfo.heapInfo.SurfaceStateHeapSize = sizeof(sshLocal); kernelInfo.heapInfo.pDsh = dshLocal; kernelInfo.heapInfo.DynamicStateHeapSize = sizeof(dshLocal); populateKernelDescriptor(kernelInfo.kernelDescriptor, execEnv); kernelInfo.kernelDescriptor.kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber; kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 32; kernelInfo.setCrossThreadDataSize(sizeof(crossThreadData)); kernelInfo.setLocalIds({1, 1, 1}); if (context == nullptr) { mockContext = new MockContext(deviceVector); context = mockContext; } else { context->incRefInternal(); mockContext = context; } auto maxRootDeviceIndex = 0u; for (const auto &pClDevice : deviceVector) { if (pClDevice->getRootDeviceIndex() > maxRootDeviceIndex) { maxRootDeviceIndex = pClDevice->getRootDeviceIndex(); } } kernelInfos.resize(maxRootDeviceIndex + 1); for (const auto &pClDevice : deviceVector) { kernelInfos[pClDevice->getRootDeviceIndex()] = &kernelInfo; } mockProgram = new MockProgram(context, false, deviceVector); mockKernel = new MockKernel(mockProgram, kernelInfo, *deviceVector[0]); mockKernel->setCrossThreadData(&crossThreadData, sizeof(crossThreadData)); KernelVectorType mockKernels; mockKernels.resize(mockProgram->getMaxRootDeviceIndex() + 1); for (const auto &pClDevice : deviceVector) { auto rootDeviceIndex = pClDevice->getRootDeviceIndex(); if (mockKernels[rootDeviceIndex] == nullptr) { mockKernels[rootDeviceIndex] = mockKernel; } } mockMultiDeviceKernel = new MockMultiDeviceKernel(std::move(mockKernels), kernelInfos); mockKernel->setSshLocal(&sshLocal, sizeof(sshLocal)); if (addDefaultArg) { defaultKernelArguments.resize(2); defaultKernelArguments[0] = {}; defaultKernelArguments[1] = {}; kernelInfo.addArgBuffer(0, 0, sizeof(uintptr_t), 64); kernelInfo.setAddressQualifier(0, KernelArgMetadata::AddrGlobal); kernelInfo.setAccessQualifier(0, KernelArgMetadata::AccessReadWrite); kernelInfo.addArgBuffer(1, 8, sizeof(uintptr_t), 72); kernelInfo.setAddressQualifier(1, KernelArgMetadata::AddrGlobal); kernelInfo.setAccessQualifier(1, KernelArgMetadata::AccessReadWrite); mockKernel->setKernelArguments(defaultKernelArguments); mockKernel->kernelArgRequiresCacheFlush.resize(2); mockKernel->kernelArgHandlers.resize(2); mockKernel->kernelArgHandlers[0] = &Kernel::setArgBuffer; mockKernel->kernelArgHandlers[1] = &Kernel::setArgBuffer; } } MockKernelWithInternals(ClDevice &deviceArg, Context *context = nullptr, bool addDefaultArg = false, SPatchExecutionEnvironment execEnv = {}) : MockKernelWithInternals(toClDeviceVector(deviceArg), context, addDefaultArg, execEnv) { } MockKernelWithInternals(ClDevice &deviceArg, SPatchExecutionEnvironment execEnv) : MockKernelWithInternals(deviceArg, nullptr, false, execEnv) { mockKernel->initialize(); } ~MockKernelWithInternals() { mockMultiDeviceKernel->decRefInternal(); mockProgram->decRefInternal(); mockContext->decRefInternal(); } operator MockKernel *() { return mockKernel; } MockMultiDeviceKernel *mockMultiDeviceKernel = nullptr; MockKernel *mockKernel; MockProgram *mockProgram; Context *mockContext; KernelInfoContainer kernelInfos; MockKernelInfo kernelInfo; SKernelBinaryHeaderCommon kernelHeader = {}; uint32_t kernelIsa[32]; char crossThreadData[256]; char sshLocal[128]; char dshLocal[128]; std::vector defaultKernelArguments; }; class MockDebugKernel : public MockKernel { public: MockDebugKernel(Program *program, const KernelInfo &kernelInfo, ClDevice &clDeviceArg) : MockKernel(program, kernelInfo, clDeviceArg) { if (!isValidOffset(kernelInfo.kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful)) { auto &kd = const_cast(kernelInfo.kernelDescriptor); kd.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful = 0; kd.kernelAttributes.perThreadSystemThreadSurfaceSize = MockDebugKernel::perThreadSystemThreadSurfaceSize; } } ~MockDebugKernel() override {} static const uint32_t perThreadSystemThreadSurfaceSize; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/mocks/mock_mdi.h000066400000000000000000000021401422164147700246730ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/helpers/dispatch_info.h" using namespace NEO; class MockMultiDispatchInfo : public MultiDispatchInfo { public: using MultiDispatchInfo::dispatchInfos; MockMultiDispatchInfo(ClDevice *clDevice, Kernel *kernel) : MultiDispatchInfo(kernel) { DispatchInfo di(clDevice, kernel, 1, {100, 1, 1}, {10, 1, 1}, {0, 0, 0}); di.setNumberOfWorkgroups({10, 1, 1}); di.setTotalNumberOfWorkgroups({10, 1, 1}); dispatchInfos.push_back(di); } MockMultiDispatchInfo(ClDevice *clDevice, std::vector kernels) { for (auto kernel : kernels) { DispatchInfo di(clDevice, kernel, 1, {100, 1, 1}, {10, 1, 1}, {0, 0, 0}); di.setNumberOfWorkgroups({10, 1, 1}); di.setTotalNumberOfWorkgroups({10, 1, 1}); dispatchInfos.push_back(di); } } MockMultiDispatchInfo(std::vector dis) { for (auto di : dis) { dispatchInfos.push_back(*di); } } }; compute-runtime-22.14.22890/opencl/test/unit_test/mocks/mock_pipe.h000066400000000000000000000017441422164147700250700ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/aligned_memory.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "opencl/source/mem_obj/pipe.h" using namespace NEO; class MockPipeStorage { public: MockPipeStorage() { mockGfxAllocation = new MockGraphicsAllocation(data, sizeof(data) / 2); } MockPipeStorage(bool unaligned) { mockGfxAllocation = new MockGraphicsAllocation(alignUp(&data, 4), sizeof(data) / 2); } char data[256]{}; MockGraphicsAllocation *mockGfxAllocation = nullptr; }; class MockPipe : public MockPipeStorage, public Pipe { public: MockPipe(Context *context) : MockPipeStorage(), Pipe(context, 0, 1, 128, nullptr, &data, GraphicsAllocationHelper::toMultiGraphicsAllocation(mockGfxAllocation)) { } ~MockPipe() override { if (!getContext()) { delete mockGfxAllocation; } } }; compute-runtime-22.14.22890/opencl/test/unit_test/mocks/mock_platform.cpp000066400000000000000000000021361422164147700263060ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/mocks/mock_platform.h" #include "shared/source/device/device.h" #include "shared/source/os_interface/device_factory.h" #include "opencl/test/unit_test/mocks/mock_cl_execution_environment.h" namespace NEO { bool initPlatform() { auto pPlatform = platform(); return pPlatform->initialize(DeviceFactory::createDevices(*pPlatform->peekExecutionEnvironment())); } bool MockPlatform::initializeWithNewDevices() { executionEnvironment.prepareRootDeviceEnvironments(1u); return Platform::initialize(DeviceFactory::createDevices(executionEnvironment)); } Platform *platform() { if (platformsImpl->empty()) { return nullptr; } return (*platformsImpl)[0].get(); } Platform *constructPlatform() { static std::mutex mutex; std::unique_lock lock(mutex); if (platformsImpl->empty()) { platformsImpl->push_back(std::make_unique(*(new MockClExecutionEnvironment()))); } return (*platformsImpl)[0].get(); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/mocks/mock_platform.h000066400000000000000000000011651422164147700257540ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/execution_environment/execution_environment.h" #include "opencl/source/platform/platform.h" namespace NEO { class MockPlatform : public Platform { public: using Platform::fillGlobalDispatchTable; MockPlatform() : MockPlatform(*(new ExecutionEnvironment())) {} MockPlatform(ExecutionEnvironment &executionEnvironment) : Platform(executionEnvironment) {} bool initializeWithNewDevices(); }; Platform *platform(); Platform *constructPlatform(); bool initPlatform(); } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/mocks/mock_program.cpp000066400000000000000000000031101422164147700261220ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/mocks/mock_program.h" #include "shared/source/compiler_interface/compiler_cache.h" #include "shared/source/device_binary_format/patchtokens_decoder.h" #include "shared/source/helpers/hash.h" #include "shared/source/program/program_info_from_patchtokens.h" #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/mocks/mock_compiler_interface.h" #include "shared/test/common/mocks/mock_compilers.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "opencl/source/context/context.h" #include "opencl/source/program/create.inl" #include "opencl/test/unit_test/helpers/ult_limits.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" namespace NEO { ClDeviceVector toClDeviceVector(ClDevice &clDevice) { ClDeviceVector deviceVector; deviceVector.push_back(&clDevice); return deviceVector; } int MockProgram::getInternalOptionsCalled = 0; std::string MockProgram::getCachedFileName() const { CompilerCache cache(CompilerCacheConfig{}); auto hwInfo = this->context->getDevice(0)->getHardwareInfo(); auto input = ArrayRef(this->sourceCode.c_str(), this->sourceCode.size()); auto opts = ArrayRef(this->options.c_str(), this->options.size()); auto internalOptions = getInternalOptions(); auto internalOpts = ArrayRef(internalOptions.c_str(), internalOptions.size()); return cache.getCachedFileName(hwInfo, input, opts, internalOpts); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/mocks/mock_program.h000066400000000000000000000167121422164147700256030ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/device/device.h" #include "shared/source/helpers/hash.h" #include "shared/source/helpers/string.h" #include "shared/source/program/kernel_info.h" #include "shared/test/common/test_macros/mock_method_macros.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/kernel/multi_device_kernel.h" #include "opencl/source/program/program.h" #include namespace NEO { class GraphicsAllocation; ClDeviceVector toClDeviceVector(ClDevice &clDevice); //////////////////////////////////////////////////////////////////////////////// // Program - Core implementation //////////////////////////////////////////////////////////////////////////////// class MockProgram : public Program { public: using Program::allowNonUniform; using Program::applyAdditionalOptions; using Program::areSpecializationConstantsInitialized; using Program::buildInfos; using Program::context; using Program::createdFrom; using Program::createProgramFromBinary; using Program::deviceBuildInfos; using Program::extractInternalOptions; using Program::getKernelInfo; using Program::internalOptionsToExtract; using Program::irBinary; using Program::irBinarySize; using Program::isBuiltIn; using Program::isCreatedFromBinary; using Program::isSpirV; using Program::kernelDebugEnabled; using Program::linkBinary; using Program::options; using Program::packDeviceBinary; using Program::Program; using Program::setBuildStatus; using Program::shouldWarnAboutRebuild; using Program::sourceCode; using Program::specConstantsIds; using Program::specConstantsSizes; using Program::specConstantsValues; using Program::updateNonUniformFlag; MockProgram(const ClDeviceVector &deviceVector) : Program(nullptr, false, deviceVector) { } ~MockProgram() override { if (contextSet) context = nullptr; } KernelInfo mockKernelInfo; void setBuildOptions(const char *buildOptions) { options = buildOptions != nullptr ? buildOptions : ""; } void setConstantSurface(GraphicsAllocation *gfxAllocation) { if (gfxAllocation) { buildInfos[gfxAllocation->getRootDeviceIndex()].constantSurface = gfxAllocation; } else { for (auto &buildInfo : buildInfos) { buildInfo.constantSurface = nullptr; } } } void setGlobalSurface(GraphicsAllocation *gfxAllocation) { if (gfxAllocation) { buildInfos[gfxAllocation->getRootDeviceIndex()].globalSurface = gfxAllocation; } else { for (auto &buildInfo : buildInfos) { buildInfo.globalSurface = nullptr; } } } std::vector &getKernelInfoArray(uint32_t rootDeviceIndex) { return buildInfos[rootDeviceIndex].kernelInfoArray; } void addKernelInfo(KernelInfo *inInfo, uint32_t rootDeviceIndex) { buildInfos[rootDeviceIndex].kernelInfoArray.push_back(inInfo); } void setContext(Context *context) { this->context = context; contextSet = true; } void setSourceCode(const char *ptr) { sourceCode = ptr; } void clearOptions() { options = ""; } void setCreatedFromBinary(bool createdFromBin) { isCreatedFromBinary = createdFromBin; } void clearLog(uint32_t rootDeviceIndex) { buildInfos[rootDeviceIndex].buildLog.clear(); } void setIrBinary(char *ptr, bool isSpirv) { irBinary.reset(ptr); this->isSpirV = isSpirV; } void setIrBinarySize(size_t bsz, bool isSpirv) { irBinarySize = bsz; this->isSpirV = isSpirV; } std::string getCachedFileName() const; void setAllowNonUniform(bool allow) { allowNonUniform = allow; } bool isFlagOption(ConstStringRef option) override { if (isFlagOptionOverride != -1) { return (isFlagOptionOverride > 0); } return Program::isFlagOption(option); } bool isOptionValueValid(ConstStringRef option, ConstStringRef value) override { if (isOptionValueValidOverride != -1) { return (isOptionValueValidOverride > 0); } return Program::isOptionValueValid(option, value); } cl_int rebuildProgramFromIr() { this->isCreatedFromBinary = false; this->shouldWarnAboutRebuild = true; setBuildStatus(CL_BUILD_NONE); std::unordered_map builtins; return this->build(getDevices(), this->options.c_str(), false, builtins); } cl_int recompile() { this->isCreatedFromBinary = false; this->shouldWarnAboutRebuild = true; setBuildStatus(CL_BUILD_NONE); return this->compile(getDevices(), this->options.c_str(), 0, nullptr, nullptr); } void replaceDeviceBinary(std::unique_ptr &&newBinary, size_t newBinarySize, uint32_t rootDeviceIndex) override { if (replaceDeviceBinaryCalledPerRootDevice.find(rootDeviceIndex) == replaceDeviceBinaryCalledPerRootDevice.end()) { replaceDeviceBinaryCalledPerRootDevice.insert({rootDeviceIndex, 1}); } else { replaceDeviceBinaryCalledPerRootDevice[rootDeviceIndex]++; } Program::replaceDeviceBinary(std::move(newBinary), newBinarySize, rootDeviceIndex); } cl_int processGenBinary(const ClDevice &clDevice) override { auto rootDeviceIndex = clDevice.getRootDeviceIndex(); if (processGenBinaryCalledPerRootDevice.find(rootDeviceIndex) == processGenBinaryCalledPerRootDevice.end()) { processGenBinaryCalledPerRootDevice.insert({rootDeviceIndex, 1}); } else { processGenBinaryCalledPerRootDevice[rootDeviceIndex]++; } return Program::processGenBinary(clDevice); } std::string getInternalOptions() const override { getInternalOptionsCalled++; return Program::getInternalOptions(); }; const KernelInfo &getKernelInfoForKernel(const char *kernelName) const { return *getKernelInfo(kernelName, getDevices()[0]->getRootDeviceIndex()); } const KernelInfoContainer getKernelInfosForKernel(const char *kernelName) const { KernelInfoContainer kernelInfos; kernelInfos.resize(getMaxRootDeviceIndex() + 1); for (auto i = 0u; i < kernelInfos.size(); i++) { kernelInfos[i] = getKernelInfo(kernelName, i); } return kernelInfos; } void processDebugData(uint32_t rootDeviceIndex) override { Program::processDebugData(rootDeviceIndex); wasProcessDebugDataCalled = true; } void createDebugZebin(uint32_t rootDeviceIndex) override { Program::createDebugZebin(rootDeviceIndex); wasCreateDebugZebinCalled = true; } std::vector externalFunctions; std::map processGenBinaryCalledPerRootDevice; std::map replaceDeviceBinaryCalledPerRootDevice; static int getInternalOptionsCalled; bool contextSet = false; int isFlagOptionOverride = -1; int isOptionValueValidOverride = -1; bool wasProcessDebugDataCalled = false; bool wasCreateDebugZebinCalled = false; }; class MockProgramAppendKernelDebugOptions : public Program { public: using Program::Program; ADDMETHOD_NOBASE(appendKernelDebugOptions, bool, true, (ClDevice & clDevice, std::string &internalOptions)); }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/mocks/mock_sampler.h000066400000000000000000000021231422164147700255660ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/sampler/sampler.h" namespace NEO { struct MockSampler : public Sampler { public: MockSampler(Context *context, cl_bool normalizedCoordinates, cl_addressing_mode addressingMode, cl_filter_mode filterMode, cl_filter_mode mipFilterMode = CL_FILTER_NEAREST, float lodMin = 0.0f, float lodMax = 0.0f) : Sampler(context, normalizedCoordinates, addressingMode, filterMode, mipFilterMode, lodMin, lodMax) { } cl_context getContext() const { return context; } cl_bool getNormalizedCoordinates() const { return normalizedCoordinates; } cl_addressing_mode getAddressingMode() const { return addressingMode; } cl_filter_mode getFilterMode() const { return filterMode; } void setArg(void *memory, const HardwareInfo &hwInfo) override { } }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/mocks/mock_sharing_factory.h000066400000000000000000000005101422164147700273030ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/sharings/sharing_factory.h" class SharingFactoryMock : public NEO::SharingFactory { public: using NEO::SharingFactory::sharings; SharingFactoryMock() = default; ~SharingFactoryMock() = default; }; compute-runtime-22.14.22890/opencl/test/unit_test/mocks/ult_cl_device_factory.cpp000066400000000000000000000022411422164147700277760ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/mocks/ult_cl_device_factory.h" #include "shared/source/command_stream/create_command_stream_impl.h" #include "shared/source/os_interface/device_factory.h" #include "shared/test/common/helpers/ult_hw_config.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" using namespace NEO; UltClDeviceFactory::UltClDeviceFactory(uint32_t rootDevicesCount, uint32_t subDevicesCount) { pUltDeviceFactory = std::make_unique(rootDevicesCount, subDevicesCount, *(new ClExecutionEnvironment)); for (auto &pRootDevice : pUltDeviceFactory->rootDevices) { auto pRootClDevice = new MockClDevice{pRootDevice}; for (auto &pClSubDevice : pRootClDevice->subDevices) { subDevices.push_back(pClSubDevice.get()); } rootDevices.push_back(pRootClDevice); } } UltClDeviceFactory::~UltClDeviceFactory() { for (auto &pClDevice : rootDevices) { pClDevice->decRefInternal(); } } compute-runtime-22.14.22890/opencl/test/unit_test/mocks/ult_cl_device_factory.h000066400000000000000000000011351422164147700274440ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include #include #include namespace NEO { class ExecutionEnvironment; class ClDevice; class MemoryManager; class MockMemoryManager; class MockClDevice; struct UltDeviceFactory; struct UltClDeviceFactory { UltClDeviceFactory(uint32_t rootDevicesCount, uint32_t subDevicesCount); ~UltClDeviceFactory(); std::unique_ptr pUltDeviceFactory; std::vector rootDevices; std::vector subDevices; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/000077500000000000000000000000001422164147700234715ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/CMakeLists.txt000066400000000000000000000044701422164147700262360ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # project(igdrcl_mt_tests) set(OPENCL_MT_TEST_DIR ${CMAKE_CURRENT_SOURCE_DIR}) add_custom_target(run_mt_unit_tests) add_executable(igdrcl_mt_tests EXCLUDE_FROM_ALL ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${NEO_SOURCE_DIR}/opencl/test/unit_test/test_macros/test_checks_ocl.cpp ${NEO_SOURCE_DIR}/shared/test/common/test_macros/test_checks_shared.cpp $ $ $ $ $ $ $ $ $ ) target_include_directories(igdrcl_mt_tests PRIVATE ${NEO_SHARED_TEST_DIRECTORY}/common/test_configuration/unit_tests ${NEO_SHARED_TEST_DIRECTORY}/common/test_macros/header${BRANCH_DIR_SUFFIX} ${NEO_SHARED_TEST_DIRECTORY}/common/helpers/includes${BRANCH_DIR_SUFFIX} ${NEO_SOURCE_DIR}/opencl/source/gen_common ) add_subdirectories() target_link_libraries(igdrcl_mt_tests ${TSAN_LIB}) target_link_libraries(igdrcl_mt_tests ${NEO_MOCKABLE_LIB_NAME} ${NEO_SHARED_MOCKABLE_LIB_NAME}) target_link_libraries(igdrcl_mt_tests gmock-gtest) target_link_libraries(igdrcl_mt_tests igdrcl_mocks ${NEO_EXTRA_LIBS}) if(WIN32) add_dependencies(igdrcl_mt_tests mock_gdi igdrcl_tests) endif() add_dependencies(igdrcl_mt_tests test_dynamic_lib) add_dependencies(igdrcl_mt_tests prepare_test_kernels_for_shared prepare_test_kernels_for_ocl copy_test_files_per_product ) create_project_source_tree(igdrcl_mt_tests) set_target_properties(igdrcl_mt_tests PROPERTIES FOLDER ${OPENCL_TEST_PROJECTS_FOLDER}) set_property(TARGET igdrcl_mt_tests APPEND_STRING PROPERTY COMPILE_FLAGS ${TSAN_FLAGS}) if(NOT WIN32) set_property(TARGET igdrcl_mt_tests APPEND_STRING PROPERTY COMPILE_FLAGS " -g") endif() set_target_properties(run_mt_unit_tests PROPERTIES FOLDER ${OPENCL_TEST_PROJECTS_FOLDER}) compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/api/000077500000000000000000000000001422164147700242425ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/api/CMakeLists.txt000066400000000000000000000014611422164147700270040ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_mt_tests_api # local files ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cl_api_tests_mt.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_user_event_tests_mt.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_device_info_tests_mt.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_platform_ids_tests_mt.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_platform_info_tests_mt.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_intel_tracing_tests_mt.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_set_mem_object_destructor_callback_tests_mt.inl # necessary dependencies from igdrcl_tests ${NEO_SOURCE_DIR}/opencl/test/unit_test/api/cl_api_tests.cpp ) target_sources(igdrcl_mt_tests PRIVATE ${IGDRCL_SRCS_mt_tests_api}) compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/api/cl_api_tests_mt.cpp000066400000000000000000000011001422164147700301070ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/mt_tests/api/cl_create_user_event_tests_mt.inl" #include "opencl/test/unit_test/mt_tests/api/cl_get_device_info_tests_mt.inl" #include "opencl/test/unit_test/mt_tests/api/cl_get_platform_ids_tests_mt.inl" #include "opencl/test/unit_test/mt_tests/api/cl_get_platform_info_tests_mt.inl" #include "opencl/test/unit_test/mt_tests/api/cl_intel_tracing_tests_mt.inl" #include "opencl/test/unit_test/mt_tests/api/cl_set_mem_object_destructor_callback_tests_mt.inl" compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/api/cl_create_user_event_tests_mt.inl000066400000000000000000000023561422164147700330560ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/context/context.h" #include "opencl/test/unit_test/api/cl_api_tests.h" using namespace NEO; typedef api_tests clCreateUserEventMtTests; namespace ULT { TEST_F(clCreateUserEventMtTests, GivenClCompleteEventWhenWaitingForEventThenWaitForEventsIsCompleted) { auto userEvent = clCreateUserEvent( pContext, &retVal); std::atomic ThreadStarted(false); std::atomic WaitForEventsCompleted(false); int counter = 0; int Deadline = 2000; std::thread t([&]() { ThreadStarted = true; clWaitForEvents(1, &userEvent); WaitForEventsCompleted = true; }); //wait for the thread to start while (!ThreadStarted) ; //now wait a while. while (!WaitForEventsCompleted && counter++ < Deadline) ; ASSERT_EQ(WaitForEventsCompleted, false) << "WaitForEvents returned while user event is not signaled!"; //set event to CL_COMPLETE retVal = clSetUserEventStatus(userEvent, CL_COMPLETE); t.join(); ASSERT_EQ(WaitForEventsCompleted, true); retVal = clReleaseEvent(userEvent); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/api/cl_get_device_info_tests_mt.inl000066400000000000000000000033421422164147700324610ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" using namespace NEO; namespace ULT { using clGetDeviceInfoMtTests = Test; TEST_F(clGetDeviceInfoMtTests, GivenMultipleThreadsQueryingDeviceExtensionsWithVersionThenReturnedValuesAreValid) { UltClDeviceFactory deviceFactory{1, 0}; deviceFactory.rootDevices[0]->getDeviceInfo(CL_DEVICE_EXTENSIONS_WITH_VERSION, 0, nullptr, nullptr); auto extensionsCount = deviceFactory.rootDevices[0]->deviceInfo.extensionsWithVersion.size(); std::vector extensionsWithVersionArray[4]; for (auto &extensionsWithVersion : extensionsWithVersionArray) { extensionsWithVersion.resize(extensionsCount); } std::vector threads; for (auto &extensionsWithVersion : extensionsWithVersionArray) { threads.push_back(std::thread{[&] { clGetDeviceInfo(devices[0], CL_DEVICE_EXTENSIONS_WITH_VERSION, sizeof(cl_name_version) * extensionsCount, extensionsWithVersion.data(), nullptr); }}); } for (auto &thread : threads) { thread.join(); } auto &deviceInfo = deviceFactory.rootDevices[0]->deviceInfo; for (auto &extensionsWithVersion : extensionsWithVersionArray) { for (size_t i = 0; i < extensionsCount; i++) { EXPECT_STREQ(deviceInfo.extensionsWithVersion[i].name, extensionsWithVersion[i].name); EXPECT_EQ(deviceInfo.extensionsWithVersion[i].version, extensionsWithVersion[i].version); } } } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/api/cl_get_platform_ids_tests_mt.inl000066400000000000000000000013671422164147700326770ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" using namespace NEO; using clGetPlatformIDsMtTests = Test; namespace ULT { TEST_F(clGetPlatformIDsMtTests, GivenSeparateThreadWhenGettingPlatformIdThenPlatformIdIsCorrect) { cl_int retVal = CL_SUCCESS; cl_platform_id platform = nullptr; cl_platform_id threadPlatform = nullptr; std::thread t1([&] { clGetPlatformIDs(1, &threadPlatform, nullptr); }); retVal = clGetPlatformIDs(1, &platform, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); t1.join(); EXPECT_EQ(threadPlatform, platform); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/api/cl_get_platform_info_tests_mt.inl000066400000000000000000000033551422164147700330520ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" using namespace NEO; namespace ULT { using clGetPlatformInfoMtTests = Test; TEST_F(clGetPlatformInfoMtTests, GivenMultipleThreadsQueryingPlatformExtensionsWithVersionThenReturnedValuesAreValid) { UltClDeviceFactory deviceFactory{1, 0}; deviceFactory.rootDevices[0]->getDeviceInfo(CL_DEVICE_EXTENSIONS_WITH_VERSION, 0, nullptr, nullptr); auto extensionsCount = deviceFactory.rootDevices[0]->deviceInfo.extensionsWithVersion.size(); std::vector extensionsWithVersionArray[4]; for (auto &extensionsWithVersion : extensionsWithVersionArray) { extensionsWithVersion.resize(extensionsCount); } std::vector threads; for (auto &extensionsWithVersion : extensionsWithVersionArray) { threads.push_back(std::thread{[&] { clGetPlatformInfo(pPlatform, CL_PLATFORM_EXTENSIONS_WITH_VERSION, sizeof(cl_name_version) * extensionsCount, extensionsWithVersion.data(), nullptr); }}); } for (auto &thread : threads) { thread.join(); } auto &deviceInfo = deviceFactory.rootDevices[0]->deviceInfo; for (auto &extensionsWithVersion : extensionsWithVersionArray) { for (size_t i = 0; i < extensionsCount; i++) { EXPECT_STREQ(deviceInfo.extensionsWithVersion[i].name, extensionsWithVersion[i].name); EXPECT_EQ(deviceInfo.extensionsWithVersion[i].version, extensionsWithVersion[i].version); } } } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/api/cl_intel_tracing_tests_mt.inl000066400000000000000000000075471422164147700322050ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/tracing/tracing_api.h" #include "opencl/source/tracing/tracing_notify.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/test/unit_test/helpers/ult_limits.h" using namespace NEO; namespace ULT { struct IntelTracingMtTest : public Test { void SetUp() override { DebugManager.flags.CreateMultipleRootDevices.set(maxRootDeviceCount); Test::SetUp(); testedClDevice = pPlatform->getClDevice(rootDeviceIndex); } DebugManagerStateRestore restorer; const uint32_t rootDeviceIndex = 1u; cl_device_id testedClDevice = nullptr; protected: static void threadBody(int iterationCount, IntelTracingMtTest *test) { test->vthreadBody(iterationCount); } virtual void vthreadBody(int iterationCount) { cl_int status = CL_SUCCESS; cl_platform_id platform = nullptr; const uint32_t maxStrSize = 1024; char buffer[maxStrSize] = {0}; while (!started) { } for (int i = 0; i < iterationCount; ++i) { HostSideTracing::AtomicBackoff backoff; status = clGetDeviceInfo(testedClDevice, CL_DEVICE_NAME, maxStrSize, buffer, nullptr); EXPECT_EQ(CL_SUCCESS, status); backoff.pause(); status = clGetDeviceInfo(testedClDevice, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &platform, nullptr); EXPECT_EQ(CL_SUCCESS, status); backoff.pause(); status = clGetPlatformInfo(platform, CL_PLATFORM_VERSION, maxStrSize, buffer, nullptr); EXPECT_EQ(CL_SUCCESS, status); backoff.pause(); status = clGetPlatformInfo(platform, CL_PLATFORM_NAME, maxStrSize, buffer, nullptr); EXPECT_EQ(CL_SUCCESS, status); backoff.pause(); } } static void callback(cl_function_id fid, cl_callback_data *callbackData, void *userData) { ASSERT_NE(nullptr, userData); IntelTracingMtTest *base = (IntelTracingMtTest *)userData; base->vcallback(fid, callbackData, nullptr); } virtual void vcallback(cl_function_id fid, cl_callback_data *callbackData, void *userData) { if (fid == CL_FUNCTION_clGetDeviceInfo || fid == CL_FUNCTION_clGetPlatformInfo) { ++count; } } protected: cl_tracing_handle handle = nullptr; cl_int status = CL_SUCCESS; std::atomic started{false}; std::atomic count{0}; }; TEST_F(IntelTracingMtTest, WhenTracingFromMultipleThreadsThenAllThreadsAreCreated) { status = clCreateTracingHandleINTEL(testedClDevice, callback, this, &handle); EXPECT_EQ(CL_SUCCESS, status); status = clSetTracingPointINTEL(handle, CL_FUNCTION_clGetDeviceInfo, CL_TRUE); EXPECT_EQ(CL_SUCCESS, status); status = clSetTracingPointINTEL(handle, CL_FUNCTION_clGetPlatformInfo, CL_TRUE); EXPECT_EQ(CL_SUCCESS, status); status = clEnableTracingINTEL(handle); EXPECT_EQ(CL_SUCCESS, status); int numThreads = 4; int iterationCount = 1024; std::vector threads; for (int i = 0; i < numThreads; ++i) { threads.push_back(std::thread(threadBody, iterationCount, this)); } started = true; for (auto &thread : threads) { thread.join(); } status = clDisableTracingINTEL(handle); EXPECT_EQ(CL_SUCCESS, status); status = clDestroyTracingHandleINTEL(handle); EXPECT_EQ(CL_SUCCESS, status); int callsPerIteration = 4; int callbacksPerCall = 2; EXPECT_EQ(numThreads * iterationCount * callsPerIteration * callbacksPerCall, count); } } // namespace ULTcl_set_mem_object_destructor_callback_tests_mt.inl000066400000000000000000000031401422164147700363360ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/api/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/context/context.h" #include "opencl/test/unit_test/api/cl_api_tests.h" using namespace NEO; typedef api_tests clCreateBufferTests; namespace ULT { static int cbInvoked = 0; void CL_CALLBACK destructorCallBackMt(cl_mem memObj, void *userData) { cbInvoked++; } struct clSetMemObjectDestructorCallbackMtTests : public ApiFixture<>, public ::testing::Test { void SetUp() override { ApiFixture::SetUp(); cbInvoked = 0; } void TearDown() override { ApiFixture::TearDown(); } static void setMemCallbackThreadFunc(cl_mem buf) { auto ret = clSetMemObjectDestructorCallback(buf, destructorCallBackMt, nullptr); EXPECT_EQ(CL_SUCCESS, ret); } }; TEST_F(clSetMemObjectDestructorCallbackMtTests, GivenMultipleThreadsWhenSettingDestructorCallbackThenCallbackWasInvokedForEachThread) { auto buffer = clCreateBuffer(pContext, CL_MEM_READ_WRITE, 42, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); std::thread t1(clSetMemObjectDestructorCallbackMtTests::setMemCallbackThreadFunc, buffer); std::thread t2(clSetMemObjectDestructorCallbackMtTests::setMemCallbackThreadFunc, buffer); retVal = clSetMemObjectDestructorCallback(buffer, destructorCallBackMt, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); t1.join(); t2.join(); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(3, cbInvoked); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/cmake/000077500000000000000000000000001422164147700245515ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/cmake/run_mt_test_target.cmake000066400000000000000000000025271422164147700314720ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # string(REPLACE "/" ";" mt_test_config ${mt_test_config}) list(GET mt_test_config 0 product) list(GET mt_test_config 1 slices) list(GET mt_test_config 2 subslices) list(GET mt_test_config 3 eu_per_ss) list(GET mt_test_config 4 revision_id) add_custom_target(run_${product}_${revision_id}_mt_unit_tests DEPENDS igdrcl_mt_tests) if(NOT WIN32) add_dependencies(run_${product}_${revision_id}_mt_unit_tests copy_test_files_${product}_${revision_id}) endif() add_dependencies(run_mt_unit_tests run_${product}_${revision_id}_mt_unit_tests) set_target_properties(run_${product}_${revision_id}_mt_unit_tests PROPERTIES FOLDER "${PLATFORM_SPECIFIC_TEST_TARGETS_FOLDER}/${product}/${revision_id}") add_custom_command( TARGET run_${product}_${revision_id}_mt_unit_tests POST_BUILD COMMAND WORKING_DIRECTORY ${TargetDir} COMMAND echo "Running igdrcl_mt_tests ${product} ${slices}x${subslices}x${eu_per_ss}" COMMAND igdrcl_mt_tests --product ${product} --slices ${slices} --subslices ${subslices} --eu_per_ss ${eu_per_ss} --gtest_repeat=${GTEST_REPEAT} ${NEO_TESTS_LISTENER_OPTION} --rev_id ${revision_id} ) add_dependencies(run_${product}_${revision_id}_mt_unit_tests prepare_test_kernels_for_ocl) compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/command_queue/000077500000000000000000000000001422164147700263135ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/command_queue/CMakeLists.txt000066400000000000000000000011121422164147700310460ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_mt_tests_command_queue # local files ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/ioq_task_tests_mt.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ooq_task_tests_mt.cpp # necessary dependencies from igdrcl_tests ${NEO_SOURCE_DIR}/opencl/test/unit_test/command_queue/enqueue_kernel_mt_tests.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/command_queue/enqueue_fixture.cpp ) target_sources(igdrcl_mt_tests PRIVATE ${IGDRCL_SRCS_mt_tests_command_queue}) compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/command_queue/ioq_task_tests_mt.cpp000066400000000000000000000251451422164147700325620ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include using namespace NEO; typedef HelloWorldTest IOQTaskTestsMt; TEST_F(IOQTaskTestsMt, GivenBlockingAndBlockedOnUserEventWhenReadingBufferThenTaskCountAndTaskLevelAreIncremented) { auto buffer = std::unique_ptr(BufferHelper<>::create()); auto alignedReadPtr = alignedMalloc(BufferDefaults::sizeInBytes, MemoryConstants::cacheLineSize); ASSERT_NE(nullptr, alignedReadPtr); auto userEvent = clCreateUserEvent(pContext, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); auto previousTaskLevel = pCmdQ->taskLevel; auto previousTaskCount = pCmdQ->taskCount; std::thread t([=]() { Event *ev = castToObject(userEvent); while (ev->peekHasChildEvents() == false) { // active wait for VirtualEvent (which is added after queue is blocked) } auto ret = clSetUserEventStatus(userEvent, CL_COMPLETE); ASSERT_EQ(CL_SUCCESS, ret); }); buffer->forceDisallowCPUCopy = true; // no task level incrasing when cpu copy retVal = EnqueueReadBufferHelper<>::enqueueReadBuffer(pCmdQ, buffer.get(), CL_TRUE, 0, BufferDefaults::sizeInBytes, alignedReadPtr, nullptr, 1, &userEvent, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_LT(previousTaskCount, pCmdQ->taskCount); EXPECT_LT(previousTaskLevel, pCmdQ->taskLevel); t.join(); retVal = clReleaseEvent(userEvent); EXPECT_EQ(CL_SUCCESS, retVal); alignedFree(alignedReadPtr); } TEST_F(IOQTaskTestsMt, GivenBlockedOnUserEventWhenEnqueingMarkerThenSuccessIsReturned) { auto userEvent = clCreateUserEvent(pContext, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); std::thread t([=]() { Event *ev = castToObject(userEvent); while (ev->peekHasChildEvents() == false) { // active wait for VirtualEvent (which is added after queue is blocked) } auto ret = clSetUserEventStatus(userEvent, CL_COMPLETE); ASSERT_EQ(CL_SUCCESS, ret); }); retVal = pCmdQ->enqueueMarkerWithWaitList( 1, &userEvent, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); t.join(); retVal = clReleaseEvent(userEvent); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(IOQTaskTestsMt, GivenMultipleThreadsWhenMappingBufferThenEventsAreCompleted) { MockGraphicsAllocation alignedBufferAlloc{nullptr, MemoryConstants::pageSize}; AlignedBuffer alignedBuffer{pContext, &alignedBufferAlloc}; auto userEvent = clCreateUserEvent(pContext, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); cl_event outputEvent = nullptr; void *mappedPtr = pCmdQ->enqueueMapBuffer(&alignedBuffer, false, CL_MAP_READ, 0, alignedBuffer.getSize(), 1, &userEvent, &outputEvent, retVal); EXPECT_EQ(CL_SUCCESS, retVal); const int32_t numThreads = 20; std::thread threads[numThreads]; std::thread threadUnblocking; cl_event ouputEventsFromThreads[numThreads]; void *mappedPtrs[numThreads]; for (int32_t i = 0; i < numThreads; i++) { threads[i] = std::thread([&](int32_t index) { cl_int errCode = CL_SUCCESS; cl_int success = CL_SUCCESS; mappedPtrs[index] = pCmdQ->enqueueMapBuffer(&alignedBuffer, false, CL_MAP_READ, 0, alignedBuffer.getSize(), 0, nullptr, &ouputEventsFromThreads[index], errCode); EXPECT_EQ(success, errCode); }, i); if (i == numThreads / 2) { threadUnblocking = std::thread([=]() { auto ret = clSetUserEventStatus(userEvent, CL_COMPLETE); EXPECT_EQ(CL_SUCCESS, ret); }); } } cl_int errCode = clWaitForEvents(1, &outputEvent); EXPECT_EQ(CL_SUCCESS, errCode); cl_int eventStatus = 0; errCode = clGetEventInfo(outputEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &eventStatus, nullptr); EXPECT_EQ(CL_SUCCESS, errCode); EXPECT_EQ(CL_COMPLETE, eventStatus); for (int32_t i = 0; i < numThreads; i++) { threads[i].join(); cl_int errCode = clWaitForEvents(1, &ouputEventsFromThreads[i]); EXPECT_EQ(CL_SUCCESS, errCode); errCode = clGetEventInfo(outputEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &eventStatus, nullptr); EXPECT_EQ(CL_SUCCESS, errCode); EXPECT_EQ(CL_COMPLETE, eventStatus); } threadUnblocking.join(); retVal = clReleaseEvent(userEvent); for (int32_t i = 0; i < numThreads; i++) { pCmdQ->enqueueUnmapMemObject(&alignedBuffer, mappedPtrs[i], 0, nullptr, nullptr); retVal = clReleaseEvent(ouputEventsFromThreads[i]); EXPECT_EQ(CL_SUCCESS, retVal); } pCmdQ->enqueueUnmapMemObject(&alignedBuffer, mappedPtr, 0, nullptr, nullptr); retVal = clReleaseEvent(outputEvent); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(IOQTaskTestsMt, GivenMultipleThreadsWhenMappingImageThenEventsAreCompleted) { auto image = std::unique_ptr(ImageHelper::create(context)); auto userEvent = clCreateUserEvent(pContext, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); cl_event outputEvent = nullptr; const size_t origin[] = {0, 0, 0}; const size_t region[] = {1, 1, 1}; void *mappedPtr = pCmdQ->enqueueMapImage(image.get(), false, CL_MAP_READ, origin, region, nullptr, nullptr, 1, &userEvent, &outputEvent, retVal); EXPECT_EQ(CL_SUCCESS, retVal); const int32_t numThreads = 20; std::thread threads[numThreads]; std::thread threadUnblocking; cl_event ouputEventsFromThreads[numThreads]; void *mappedPtrs[numThreads]; for (int32_t i = 0; i < numThreads; i++) { threads[i] = std::thread([&](int32_t index) { cl_int errCode = CL_SUCCESS; cl_int success = CL_SUCCESS; mappedPtrs[index] = pCmdQ->enqueueMapImage(image.get(), false, CL_MAP_READ, origin, region, nullptr, nullptr, 0, nullptr, &ouputEventsFromThreads[index], errCode); EXPECT_EQ(success, errCode); }, i); if (i == numThreads / 2) { threadUnblocking = std::thread([=]() { auto ret = clSetUserEventStatus(userEvent, CL_COMPLETE); EXPECT_EQ(CL_SUCCESS, ret); }); } } cl_int errCode = clWaitForEvents(1, &outputEvent); EXPECT_EQ(CL_SUCCESS, errCode); cl_int eventStatus = 0; errCode = clGetEventInfo(outputEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &eventStatus, nullptr); EXPECT_EQ(CL_SUCCESS, errCode); EXPECT_EQ(CL_COMPLETE, eventStatus); for (int32_t i = 0; i < numThreads; i++) { threads[i].join(); cl_int errCode = clWaitForEvents(1, &ouputEventsFromThreads[i]); EXPECT_EQ(CL_SUCCESS, errCode); errCode = clGetEventInfo(outputEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &eventStatus, nullptr); EXPECT_EQ(CL_SUCCESS, errCode); EXPECT_EQ(CL_COMPLETE, eventStatus); } threadUnblocking.join(); retVal = clReleaseEvent(userEvent); for (int32_t i = 0; i < numThreads; i++) { pCmdQ->enqueueUnmapMemObject(image.get(), mappedPtrs[i], 0, nullptr, nullptr); retVal = clReleaseEvent(ouputEventsFromThreads[i]); EXPECT_EQ(CL_SUCCESS, retVal); } pCmdQ->enqueueUnmapMemObject(image.get(), mappedPtr, 0, nullptr, nullptr); retVal = clReleaseEvent(outputEvent); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(IOQTaskTestsMt, givenBlitterWhenCopyUsingMultipleThreadsThenSuccessReturned) { auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.blitterOperationsSupported = true; REQUIRE_FULL_BLITTER_OR_SKIP(&hwInfo); DebugManagerStateRestore restorer; DebugManager.flags.EnableBlitterForEnqueueOperations.set(1); DebugManager.flags.DoCpuCopyOnReadBuffer.set(0); constexpr uint32_t numThreads = 32; std::atomic_uint32_t barrier = numThreads; std::array, numThreads> threads; auto device = MockClDevice::createWithNewExecutionEnvironment(&hwInfo, rootDeviceIndex); MockClDevice clDevice(device); auto cmdQ = createCommandQueue(&clDevice); EXPECT_EQ(cmdQ->taskCount, 0u); EXPECT_EQ(cmdQ->getGpgpuCommandStreamReceiver().peekTaskCount(), 0u); EXPECT_EQ(cmdQ->peekBcsTaskCount(aub_stream::EngineType::ENGINE_BCS), 0u); auto buffer = std::unique_ptr(BufferHelper<>::create()); for (auto &thread : threads) { thread = std::async(std::launch::async, [&]() { auto alignedReadPtr = alignedMalloc(BufferDefaults::sizeInBytes, MemoryConstants::cacheLineSize); barrier.fetch_sub(1u); while (barrier.load() != 0u) { std::this_thread::yield(); } auto retVal = EnqueueReadBufferHelper<>::enqueueReadBuffer(cmdQ, buffer.get(), CL_TRUE, 0, BufferDefaults::sizeInBytes, alignedReadPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); alignedFree(alignedReadPtr); }); } for (auto &thread : threads) { thread.get(); } EXPECT_EQ(cmdQ->taskCount, 0u); EXPECT_EQ(cmdQ->getGpgpuCommandStreamReceiver().peekTaskCount(), 0u); EXPECT_EQ(cmdQ->peekBcsTaskCount(aub_stream::EngineType::ENGINE_BCS), numThreads); clReleaseCommandQueue(cmdQ); }compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/command_queue/ooq_task_tests_mt.cpp000066400000000000000000000153601422164147700325660ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include using namespace NEO; struct OOQFixtureFactory : public HelloWorldFixtureFactory { typedef OOQueueFixture CommandQueueFixture; }; template struct OOQTaskTypedTestsMt : public HelloWorldTest { }; typedef OOQTaskTypedTestsMt> OOQTaskTestsMt; TEST_F(OOQTaskTestsMt, GivenBlockingAndBlockedOnUserEventWhenReadingBufferThenTaskCountIsIncrementedAndTaskLevelIsUnchanged) { auto buffer = std::unique_ptr(BufferHelper<>::create()); auto alignedReadPtr = alignedMalloc(BufferDefaults::sizeInBytes, MemoryConstants::cacheLineSize); ASSERT_NE(nullptr, alignedReadPtr); auto userEvent = clCreateUserEvent(pContext, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); auto previousTaskCount = pCmdQ->taskCount; auto previousTaskLevel = pCmdQ->taskLevel; std::thread t([=]() { Event *ev = castToObject(userEvent); while (ev->peekHasChildEvents() == false) { // active wait for VirtualEvent (which is added after queue is blocked) } auto ret = clSetUserEventStatus(userEvent, CL_COMPLETE); ASSERT_EQ(CL_SUCCESS, ret); }); buffer->forceDisallowCPUCopy = true; // no task level incrasing when cpu copy retVal = EnqueueReadBufferHelper<>::enqueueReadBuffer(pCmdQ, buffer.get(), CL_TRUE, 0, BufferDefaults::sizeInBytes, alignedReadPtr, nullptr, 1, &userEvent, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_LT(previousTaskCount, pCmdQ->taskCount); EXPECT_EQ(previousTaskLevel, pCmdQ->taskLevel); t.join(); retVal = clReleaseEvent(userEvent); EXPECT_EQ(CL_SUCCESS, retVal); alignedFree(alignedReadPtr); } TEST_F(OOQTaskTestsMt, GivenBlockedOnUserEventWhenEnqueingMarkerThenSuccessIsReturned) { auto userEvent = clCreateUserEvent(pContext, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); std::thread t([=]() { Event *ev = castToObject(userEvent); while (ev->peekHasChildEvents() == false) { // active wait for VirtualEvent (which is added after queue is blocked) } auto ret = clSetUserEventStatus(userEvent, CL_COMPLETE); ASSERT_EQ(CL_SUCCESS, ret); }); retVal = pCmdQ->enqueueMarkerWithWaitList( 1, &userEvent, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); t.join(); retVal = clReleaseEvent(userEvent); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(OOQTaskTestsMt, givenBlitterWhenEnqueueCopyAndKernelUsingMultipleThreadsThenSuccessReturned) { auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.blitterOperationsSupported = true; REQUIRE_FULL_BLITTER_OR_SKIP(&hwInfo); DebugManagerStateRestore restorer; DebugManager.flags.EnableBlitterForEnqueueOperations.set(1); DebugManager.flags.DoCpuCopyOnReadBuffer.set(0); DebugManager.flags.DoCpuCopyOnWriteBuffer.set(0); constexpr uint32_t numThreads = 32; std::atomic_uint32_t barrier = numThreads; std::array, numThreads> threads; auto device = MockClDevice::createWithNewExecutionEnvironment(&hwInfo, rootDeviceIndex); MockClDevice clDevice(device); auto cmdQ = createCommandQueue(&clDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE); EXPECT_EQ(cmdQ->taskCount, 0u); EXPECT_EQ(cmdQ->getGpgpuCommandStreamReceiver().peekTaskCount(), 0u); EXPECT_EQ(cmdQ->peekBcsTaskCount(aub_stream::EngineType::ENGINE_BCS), 0u); auto buffer = std::unique_ptr(BufferHelper<>::create()); for (auto &thread : threads) { thread = std::async(std::launch::async, [&]() { auto alignedReadPtr = alignedMalloc(BufferDefaults::sizeInBytes, MemoryConstants::cacheLineSize); barrier.fetch_sub(1u); while (barrier.load() != 0u) { std::this_thread::yield(); } auto retVal = EnqueueWriteBufferHelper<>::enqueueWriteBuffer(cmdQ, buffer.get(), CL_TRUE, 0, BufferDefaults::sizeInBytes, alignedReadPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); size_t workSize[] = {64}; retVal = EnqueueKernelHelper<>::enqueueKernel(cmdQ, KernelFixture::pKernel, 1, nullptr, workSize, workSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = EnqueueReadBufferHelper<>::enqueueReadBuffer(cmdQ, buffer.get(), CL_TRUE, 0, BufferDefaults::sizeInBytes, alignedReadPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); alignedFree(alignedReadPtr); }); } for (auto &thread : threads) { thread.get(); } EXPECT_NE(cmdQ->taskCount, 0u); EXPECT_NE(cmdQ->getGpgpuCommandStreamReceiver().peekTaskCount(), 0u); EXPECT_EQ(cmdQ->peekBcsTaskCount(aub_stream::EngineType::ENGINE_BCS), 2 * numThreads); clReleaseCommandQueue(cmdQ); }compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/command_stream/000077500000000000000000000000001422164147700264625ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/command_stream/CMakeLists.txt000066400000000000000000000006271422164147700312270ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_mt_tests_command_stream # local files ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt # necessary dependencies from igdrcl_tests ${NEO_SOURCE_DIR}/opencl/test/unit_test/command_stream/command_stream_receiver_mt_tests.cpp ) target_sources(igdrcl_mt_tests PRIVATE ${IGDRCL_SRCS_mt_tests_command_stream}) compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/event/000077500000000000000000000000001422164147700246125ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/event/CMakeLists.txt000066400000000000000000000005461422164147700273570ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_mt_tests_event # local files ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/event_tests_mt.cpp ${CMAKE_CURRENT_SOURCE_DIR}/user_events_tests_mt.cpp ) target_sources(igdrcl_mt_tests PRIVATE ${IGDRCL_SRCS_mt_tests_event}) compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/event/event_tests_mt.cpp000066400000000000000000000020071422164147700303600ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "opencl/test/unit_test/event/event_fixture.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include class SmallMockEvent : public Event { public: SmallMockEvent(Context *ctx) : Event(ctx, nullptr, CL_COMMAND_NDRANGE_KERNEL, 0, 0) {} void switchToSubmitted() { transitionExecutionStatus(CL_SUBMITTED); } void switchToComplete() { transitionExecutionStatus(CL_COMPLETE); } }; TEST(EventTestMt, WhenWaitingForEventsThenDoNotReturnUntilAllStatusesSetToComplete) { for (uint32_t i = 0; i < 100; i++) { std::unique_ptr userEvent = std::unique_ptr(new UserEvent(nullptr)); std::thread t([&]() { userEvent->setStatus(CL_COMPLETE); }); t.join(); cl_event clEvent = userEvent.get(); Event::waitForEvents(1, &clEvent); } } compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/event/user_events_tests_mt.cpp000066400000000000000000000114741422164147700316110ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/memory_manager.h" #include "opencl/test/unit_test/event/event_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include typedef HelloWorldTest EventTests; TEST_F(MockEventTests, GivenEventCreatedFromUserEventsThatIsNotSignaledThenDoNotFlushToCsr) { uEvent = make_releaseable(); cl_event retEvent = nullptr; cl_event eventWaitList[] = {uEvent.get()}; int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); //call NDR auto retVal = callOneWorkItemNDRKernel(eventWaitList, sizeOfWaitList, &retEvent); auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); *csr.getTagAddress() = (unsigned int)-1; auto taskLevelBeforeWaitForEvents = csr.peekTaskLevel(); int counter = 0; int Deadline = 20000; std::atomic ThreadStarted(false); std::atomic WaitForEventsCompleted(false); std::thread t([&]() { ThreadStarted = true; //call WaitForEvents clWaitForEvents(1, &retEvent); WaitForEventsCompleted = true; }); //wait for the thread to start while (!ThreadStarted) ; //now wait a while. while (!WaitForEventsCompleted && counter++ < Deadline) ; ASSERT_EQ(WaitForEventsCompleted, false) << "WaitForEvents returned while user event is not signaled!"; EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(taskLevelBeforeWaitForEvents, csr.peekTaskLevel()); //set event to CL_COMPLETE uEvent->setStatus(CL_COMPLETE); t.join(); retVal = clReleaseEvent(retEvent); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EventTests, givenUserEventBlockingEnqueueWithBlockingFlagWhenUserEventIsCompletedAfterBlockedPathIsChosenThenBlockingFlagDoesNotCauseStall) { std::unique_ptr srcBuffer(BufferHelper<>::create()); std::unique_ptr dst(new char[srcBuffer->getSize()]); for (int32_t i = 0; i < 20; i++) { UserEvent uEvent; cl_event eventWaitList[] = {&uEvent}; int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); std::thread t([&]() { uEvent.setStatus(CL_COMPLETE); }); auto retVal = pCmdQ->enqueueReadBuffer(srcBuffer.get(), CL_TRUE, 0, srcBuffer->getSize(), dst.get(), nullptr, sizeOfWaitList, eventWaitList, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); t.join(); } } TEST_F(EventTests, givenUserEventBlockingEnqueueWithBlockingFlagWhenUserEventIsCompletedAfterUpdateFromCompletionStampThenBlockingFlagDoesNotCauseStall) { std::unique_ptr srcBuffer(BufferHelper<>::create()); std::unique_ptr dst(new char[srcBuffer->getSize()]); UserEvent uEvent; cl_event eventWaitList[] = {&uEvent}; int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); std::thread t([&]() { while (true) { pCmdQ->takeOwnership(); if (pCmdQ->taskLevel == CompletionStamp::notReady) { pCmdQ->releaseOwnership(); break; } pCmdQ->releaseOwnership(); } uEvent.setStatus(CL_COMPLETE); }); auto retVal = pCmdQ->enqueueReadBuffer(srcBuffer.get(), CL_TRUE, 0, srcBuffer->getSize(), dst.get(), nullptr, sizeOfWaitList, eventWaitList, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); t.join(); } HWTEST_F(EventTests, givenOneThreadUpdatingUserEventAnotherWaitingOnFinishWhenFinishIsCalledThenItWaitsForCorrectTaskCount) { MockCommandQueueHw mockCmdQueue(context, pClDevice, nullptr); std::unique_ptr srcBuffer(BufferHelper<>::create()); std::unique_ptr dst(new char[srcBuffer->getSize()]); for (uint32_t i = 0; i < 100; i++) { UserEvent uEvent; cl_event eventWaitList[] = {&uEvent}; int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); cl_event returnedEvent = nullptr; std::atomic_bool go{false}; std::atomic_bool updateEvent{true}; std::thread t([&]() { while (!go) ; uEvent.setStatus(CL_COMPLETE); }); auto retVal = mockCmdQueue.enqueueReadBuffer(srcBuffer.get(), CL_FALSE, 0, srcBuffer->getSize(), dst.get(), nullptr, sizeOfWaitList, eventWaitList, &returnedEvent); EXPECT_EQ(CL_SUCCESS, retVal); std::thread t2([&]() { while (updateEvent) { castToObject(returnedEvent)->updateExecutionStatus(); } }); go = true; clFinish(&mockCmdQueue); EXPECT_EQ(mockCmdQueue.latestTaskCountWaited, i + 1); t.join(); updateEvent = false; t2.join(); clReleaseEvent(returnedEvent); } } compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/fixtures/000077500000000000000000000000001422164147700253425ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/fixtures/CMakeLists.txt000066400000000000000000000007011422164147700301000ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_mt_tests_fixtures # local files ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt # necessary dependencies from igdrcl_tests ${NEO_SOURCE_DIR}/opencl/test/unit_test/fixtures/image_fixture.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/fixtures/platform_fixture.cpp ) target_sources(igdrcl_mt_tests PRIVATE ${IGDRCL_SRCS_mt_tests_fixtures}) compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/gen11/000077500000000000000000000000001422164147700244045ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/gen11/CMakeLists.txt000066400000000000000000000001741422164147700271460ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_GEN11) add_subdirectories() endif() compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/gen11/icllp/000077500000000000000000000000001422164147700255075ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/gen11/icllp/CMakeLists.txt000066400000000000000000000003131422164147700302440ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_ICLLP) set(mt_test_config "icllp/1/8/8/0") include(${OPENCL_MT_TEST_DIR}/cmake/run_mt_test_target.cmake) endif() compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/gen12lp/000077500000000000000000000000001422164147700247415ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/gen12lp/CMakeLists.txt000066400000000000000000000001761422164147700275050ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_GEN12LP) add_subdirectories() endif() compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/gen12lp/tgllp/000077500000000000000000000000001422164147700260635ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/gen12lp/tgllp/CMakeLists.txt000066400000000000000000000003141422164147700306210ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_TGLLP) set(mt_test_config "tgllp/1/6/16/0") include(${OPENCL_MT_TEST_DIR}/cmake/run_mt_test_target.cmake) endif() compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/gen8/000077500000000000000000000000001422164147700243325ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/gen8/CMakeLists.txt000066400000000000000000000001731422164147700270730ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_GEN8) add_subdirectories() endif() compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/gen8/bdw/000077500000000000000000000000001422164147700251065ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/gen8/bdw/CMakeLists.txt000066400000000000000000000003071422164147700276460ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_BDW) set(mt_test_config "bdw/1/3/8/0") include(${OPENCL_MT_TEST_DIR}/cmake/run_mt_test_target.cmake) endif() compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/gen9/000077500000000000000000000000001422164147700243335ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/gen9/CMakeLists.txt000066400000000000000000000001731422164147700270740ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_GEN9) add_subdirectories() endif() compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/gen9/skl/000077500000000000000000000000001422164147700251245ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/gen9/skl/CMakeLists.txt000066400000000000000000000003071422164147700276640ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_SKL) set(mt_test_config "skl/1/3/8/9") include(${OPENCL_MT_TEST_DIR}/cmake/run_mt_test_target.cmake) endif() compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/helpers/000077500000000000000000000000001422164147700251335ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/helpers/CMakeLists.txt000066400000000000000000000006701422164147700276760ustar00rootroot00000000000000# # Copyright (C) 2018-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_mt_tests_helpers # local files ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/base_object_tests_mt.cpp ${CMAKE_CURRENT_SOURCE_DIR}/interlocked_max_mt_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/map_operations_handler_mt_tests.cpp ) target_sources(igdrcl_mt_tests PRIVATE ${IGDRCL_SRCS_mt_tests_helpers}) compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/helpers/base_object_tests_mt.cpp000066400000000000000000000025611422164147700320250ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/accelerators/intel_accelerator.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "gtest/gtest.h" namespace NEO { TEST(BaseObjectTestsMt, givenObjectOwnershipForEachThreadWhenIncrementingNonAtomicValueThenNoDataRacesAreExpected) { MockCommandQueue *object = new MockCommandQueue; object->takeOwnership(); uint32_t counter = 0; const uint32_t loopCount = 50; const uint32_t numThreads = 3; auto incrementNonAtomicValue = [&](CommandQueue *obj) { for (uint32_t i = 0; i < loopCount; i++) { obj->takeOwnership(); counter++; obj->releaseOwnership(); } }; EXPECT_EQ(0U, object->getCond().peekNumWaiters()); std::thread t1(incrementNonAtomicValue, object); std::thread t2(incrementNonAtomicValue, object); std::thread t3(incrementNonAtomicValue, object); while (object->getCond().peekNumWaiters() != numThreads) { std::this_thread::yield(); } EXPECT_EQ(0u, counter); object->releaseOwnership(); t1.join(); t2.join(); t3.join(); EXPECT_EQ(loopCount * numThreads, counter); object->release(); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/helpers/interlocked_max_mt_tests.cpp000066400000000000000000000017141422164147700327340ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/interlocked_max.h" #include "gtest/gtest.h" #include TEST(MtTestInterlockedMaxFixture, givenCurrentPagingFenceValueWhenValueChangedThenValueIsSet) { std::atomic currentPagingFenceValue; std::atomic testCount; std::atomic maxValue; currentPagingFenceValue.store(0); testCount.store(100); maxValue.store(0); int threadsCount = 8; std::thread threads[8]; for (int i = 0; i < threadsCount; i++) { threads[i] = std::thread([&]() { while (testCount-- > 0) { uint64_t newVal = ++maxValue; interlockedMax(currentPagingFenceValue, newVal); } }); } for (int i = 0; i < threadsCount; i++) { threads[i].join(); } uint64_t endValue = currentPagingFenceValue.load(); EXPECT_EQ(endValue, 100u); }map_operations_handler_mt_tests.cpp000066400000000000000000000041621422164147700342220ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/helpers/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "opencl/source/mem_obj/map_operations_handler.h" #include "gtest/gtest.h" #include namespace NEO { struct MockMapOperationsHandler : public MapOperationsHandler { using MapOperationsHandler::isOverlapping; using MapOperationsHandler::mappedPointers; }; struct MapOperationsHandlerMtTests : public ::testing::Test { MockMapOperationsHandler mockHandler; MockGraphicsAllocation mockAllocation; std::vector mappedPtrs; void SetUp() override { MemObjSizeArray size = {{10, 10, 10}}; MemObjOffsetArray offset = {{0, 0, 0}}; cl_map_flags mapFlags = CL_MAP_READ; for (size_t i = 1; i <= 10; i++) { auto ptr = reinterpret_cast(0x1000lu * i); mappedPtrs.push_back(ptr); EXPECT_TRUE(mockHandler.add(ptr, 10, mapFlags, size, offset, 0, &mockAllocation)); } } }; TEST_F(MapOperationsHandlerMtTests, giveMapOperationsHandlerWhenAddingFindingAndRemovingThenExecuteSafely) { std::atomic removed = false; std::atomic t1Started = false; std::atomic t2Started = false; std::atomic t3Started = false; auto find = [&](std::atomic *threadStarted) { while (!removed.load()) { for (auto &ptr : mappedPtrs) { MapInfo out; mockHandler.findInfoForHostPtr(ptr, 1, out); } threadStarted->store(true); std::this_thread::yield(); } }; auto remove = [&]() { while (!t1Started.load() || !t2Started.load() || !t3Started.load()) { std::this_thread::yield(); } for (auto &ptr : mappedPtrs) { mockHandler.remove(ptr); } removed.store(true); }; std::thread t1(find, &t1Started); std::thread t2(find, &t2Started); std::thread t3(find, &t3Started); std::thread t4(remove); t1.join(); t2.join(); t3.join(); t4.join(); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/memory_manager/000077500000000000000000000000001422164147700264735ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/memory_manager/CMakeLists.txt000066400000000000000000000007421422164147700312360ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_mt_tests_memory_manager # local files ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/deferred_deleter_clear_queue_mt_tests.cpp # necessary dependencies from neo_shared_tests ${NEO_SHARED_TEST_DIRECTORY}/unit_test/memory_manager/deferred_deleter_mt_tests.cpp ) target_sources(igdrcl_mt_tests PRIVATE ${IGDRCL_SRCS_mt_tests_memory_manager}) deferred_deleter_clear_queue_mt_tests.cpp000066400000000000000000000072201422164147700367010ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/memory_manager/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/mocks/mock_deferrable_deletion.h" #include "shared/test/common/mocks/mock_deferred_deleter.h" #include "gtest/gtest.h" using namespace NEO; const int threadCount = 4; struct ClearQueueTest : public ::testing::Test, public ::testing::WithParamInterface { void SetUp() override { threadStopped = 0; startClear = false; deleter.reset(new MockDeferredDeleter()); } void TearDown() override { EXPECT_TRUE(deleter->isQueueEmpty()); EXPECT_EQ(0, deleter->getElementsToRelease()); } static void threadMethod(MockDeferredDeleter *deleter) { while (!startClear) ; deleter->clearQueue(); threadStopped++; } MockDeferrableDeletion *createDeletion() { return new MockDeferrableDeletion(); } std::unique_ptr deleter; static std::atomic startClear; static std::atomic threadStopped; }; std::atomic ClearQueueTest::startClear; std::atomic ClearQueueTest::threadStopped; TEST_P(ClearQueueTest, WhenQueueIsClearedThenAllElementsAreReleased) { auto elementsInQueue = GetParam(); EXPECT_EQ(0, deleter->clearCalled); for (int i = 0; i < elementsInQueue; i++) { deleter->DeferredDeleter::deferDeletion(createDeletion()); } std::thread threads[threadCount]; for (int i = 0; i < threadCount; i++) { threads[i] = std::thread(threadMethod, deleter.get()); } EXPECT_EQ(0, deleter->clearCalled); EXPECT_EQ(elementsInQueue, deleter->getElementsToRelease()); startClear = true; for (int i = 0; i < threadCount; i++) { threads[i].join(); } EXPECT_EQ(threadCount, deleter->clearCalled); EXPECT_EQ(0, deleter->getElementsToRelease()); } int paramsForClearQueueTest[] = {1, 10, 20, 50, 100}; INSTANTIATE_TEST_CASE_P(DeferredDeleterMtTests, ClearQueueTest, ::testing::ValuesIn(paramsForClearQueueTest)); class MyDeferredDeleter : public DeferredDeleter { public: bool isQueueEmpty() { std::lock_guard lock(queueMutex); return queue.peekIsEmpty(); } int getElementsToRelease() { return elementsToRelease; } bool isWorking() { return doWorkInBackground; } bool isThreadRunning() { return worker != nullptr; } int getClientsNum() { return numClients; } void forceSafeStop() { safeStop(); } }; struct DeferredDeleterMtTest : public ::testing::Test { void SetUp() override { deleter.reset(new MyDeferredDeleter()); } void TearDown() override { EXPECT_TRUE(deleter->isQueueEmpty()); EXPECT_EQ(0, deleter->getElementsToRelease()); } void waitForAsyncThread() { while (!deleter->isWorking()) { std::this_thread::yield(); } } std::unique_ptr deleter; }; TEST_F(DeferredDeleterMtTest, WhenForcingSafeStopThenDeferredDeleterIsStopped) { deleter->addClient(); waitForAsyncThread(); EXPECT_TRUE(deleter->isThreadRunning()); EXPECT_TRUE(deleter->isWorking()); // Start worker thread std::thread t([&]() { deleter->forceSafeStop(); EXPECT_FALSE(deleter->isThreadRunning()); EXPECT_FALSE(deleter->isWorking()); }); deleter->forceSafeStop(); EXPECT_FALSE(deleter->isThreadRunning()); EXPECT_FALSE(deleter->isWorking()); t.join(); deleter->removeClient(); EXPECT_EQ(0, deleter->getClientsNum()); } compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/os_interface/000077500000000000000000000000001422164147700261325ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/os_interface/CMakeLists.txt000066400000000000000000000003431422164147700306720ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(UNIX) target_sources(igdrcl_mt_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/linux/drm_memory_manager_mt_tests.cpp ) endif() compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/os_interface/linux/000077500000000000000000000000001422164147700272715ustar00rootroot00000000000000drm_memory_manager_mt_tests.cpp000066400000000000000000000132401422164147700355040ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/os_interface/linux/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/os_interface/linux/drm_memory_manager.h" #include "shared/source/os_interface/linux/drm_memory_operations_handler.h" #include "shared/source/os_interface/os_interface.h" #include "shared/test/common/mocks/linux/mock_drm_memory_manager.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/os_interface/linux/device_command_stream_fixture.h" #include "gtest/gtest.h" #include #include #include using namespace NEO; TEST(DrmMemoryManagerTest, givenDrmMemoryManagerWhenSharedAllocationIsCreatedFromMultipleThreadsThenSingleBoIsReused) { class MockDrm : public Drm { public: using Drm::setupIoctlHelper; MockDrm(int fd, RootDeviceEnvironment &rootDeviceEnvironment) : Drm(std::make_unique(fd, ""), rootDeviceEnvironment) {} int ioctl(unsigned long request, void *arg) override { if (request == DRM_IOCTL_PRIME_FD_TO_HANDLE) { auto *primeToHandleParams = (drm_prime_handle *)arg; primeToHandleParams->handle = 10; } return 0; } }; MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); executionEnvironment.rootDeviceEnvironments[0]->osInterface = std::make_unique(); auto mock = new MockDrm(0, *executionEnvironment.rootDeviceEnvironments[0]); mock->setupIoctlHelper(defaultHwInfo->platform.eProductFamily); executionEnvironment.rootDeviceEnvironments[0]->osInterface->setDriverModel(std::unique_ptr(mock)); executionEnvironment.rootDeviceEnvironments[0]->memoryOperationsInterface = DrmMemoryOperationsHandler::create(*mock, 0u); auto memoryManager = std::make_unique(executionEnvironment); osHandle handle = 3; constexpr size_t maxThreads = 10; GraphicsAllocation *createdAllocations[maxThreads]; std::thread threads[maxThreads]; std::atomic index(0); std::atomic allocateCount(0); auto createFunction = [&]() { size_t indexFree = index++; AllocationProperties properties(0, false, MemoryConstants::pageSize, AllocationType::SHARED_BUFFER, false, {}); createdAllocations[indexFree] = memoryManager->createGraphicsAllocationFromSharedHandle(handle, properties, false, false); EXPECT_NE(nullptr, createdAllocations[indexFree]); EXPECT_GE(1u, memoryManager->peekSharedBosSize()); allocateCount++; }; for (size_t i = 0; i < maxThreads; i++) { threads[i] = std::thread(createFunction); } while (allocateCount < maxThreads) { EXPECT_GE(1u, memoryManager->peekSharedBosSize()); } for (size_t i = 0; i < maxThreads; i++) { threads[i].join(); memoryManager->freeGraphicsMemory(createdAllocations[i]); } } TEST(DrmMemoryManagerTest, givenMultipleThreadsWhenSharedAllocationIsCreatedThenPrimeFdToHandleDoesNotRaceWithClose) { class MockDrm : public Drm { public: using Drm::setupIoctlHelper; MockDrm(int fd, RootDeviceEnvironment &rootDeviceEnvironment) : Drm(std::make_unique(fd, ""), rootDeviceEnvironment) { primeFdHandle = 1; closeHandle = 1; } std::atomic primeFdHandle; std::atomic closeHandle; int ioctl(unsigned long request, void *arg) override { if (request == DRM_IOCTL_PRIME_FD_TO_HANDLE) { auto *primeToHandleParams = (drm_prime_handle *)arg; primeToHandleParams->handle = primeFdHandle; // PrimeFdHandle should not be lower than closeHandle // GemClose shouldn't be executed concurrently with primtFdToHandle EXPECT_EQ(closeHandle.load(), primeFdHandle.load()); } else if (request == DRM_IOCTL_GEM_CLOSE) { closeHandle++; std::this_thread::yield(); primeFdHandle.store(closeHandle.load()); } return 0; } }; MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); executionEnvironment.rootDeviceEnvironments[0]->osInterface = std::make_unique(); auto mock = new MockDrm(0, *executionEnvironment.rootDeviceEnvironments[0]); mock->setupIoctlHelper(defaultHwInfo->platform.eProductFamily); executionEnvironment.rootDeviceEnvironments[0]->osInterface->setDriverModel(std::unique_ptr(mock)); executionEnvironment.rootDeviceEnvironments[0]->memoryOperationsInterface = DrmMemoryOperationsHandler::create(*mock, 0u); auto memoryManager = std::make_unique(executionEnvironment); osHandle handle = 3; constexpr size_t maxThreads = 10; GraphicsAllocation *createdAllocations[maxThreads]; std::thread threads[maxThreads]; std::atomic index(0); auto createFunction = [&]() { size_t indexFree = index++; AllocationProperties properties(0, false, MemoryConstants::pageSize, AllocationType::SHARED_BUFFER, false, {}); createdAllocations[indexFree] = memoryManager->createGraphicsAllocationFromSharedHandle(handle, properties, false, false); EXPECT_NE(nullptr, createdAllocations[indexFree]); std::this_thread::yield(); memoryManager->freeGraphicsMemory(createdAllocations[indexFree]); }; for (size_t i = 0; i < maxThreads; i++) { threads[i] = std::thread(createFunction); } for (size_t i = 0; i < maxThreads; i++) { threads[i].join(); } } compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/sharing/000077500000000000000000000000001422164147700251245ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/sharing/CMakeLists.txt000066400000000000000000000005071422164147700276660ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_mt_tests_sharing # local files ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/d3d_sharing_mt_tests.cpp ) if(WIN32) target_sources(igdrcl_mt_tests PRIVATE ${IGDRCL_SRCS_mt_tests_sharing}) endif() compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/sharing/d3d_sharing_mt_tests.cpp000066400000000000000000000070321422164147700317410ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/source/os_interface/windows/d3d_sharing_functions.h" #include "opencl/source/sharings/d3d/d3d_sharing.h" #include "opencl/test/unit_test/mocks/mock_context.h" using namespace NEO; template class MockD3DSharingFunctions : public D3DSharingFunctions { public: typedef typename D3D::D3DDevice D3DDevice; typedef typename D3D::D3DQuery D3DQuery; typedef typename D3D::D3DResource D3DResource; MockD3DSharingFunctions() : D3DSharingFunctions((D3DDevice *)1) { } void getDeviceContext(D3DQuery *query) override { signalDeviceContextCalled = true; while (!signalLockChecked) ; } void copySubresourceRegion(D3DResource *dst, cl_uint dstSubresource, D3DResource *src, cl_uint srcSubresource) override { } void flushAndWait(D3DQuery *query) override { } void releaseDeviceContext(D3DQuery *query) override { } void addRef(D3DResource *resource) override { } void createQuery(D3DQuery **query) override { } void release(IUnknown *resource) override { } std::atomic_bool signalDeviceContextCalled = false; std::atomic_bool signalLockChecked = false; }; template class MockD3DSharingBase : public D3DSharing { public: using D3DSharing::sharingFunctions; MockD3DSharingBase(Context *ctx) : D3DSharing(ctx, nullptr, nullptr, 0, false) { } void checkIfMutexWasLocked() { isLocked = !this->mtx.try_lock(); reinterpret_cast *>(this->sharingFunctions)->signalLockChecked = true; } bool isLocked = false; }; TEST(SharingD3DMT, givenD3DSharingWhenSynchroniceObjectIsCalledThenMtxIsLockedBeforeAccessingDevice) { auto mockCtx = std::make_unique(); mockCtx->sharingFunctions[MockD3DSharingFunctions::sharingId] = std::make_unique>(); auto mockD3DSharing = std::make_unique>(mockCtx.get()); UpdateData updateData(0); std::thread t1(&MockD3DSharingBase::synchronizeObject, mockD3DSharing.get(), updateData); while (!reinterpret_cast *>(mockD3DSharing->sharingFunctions)->signalDeviceContextCalled) ; std::thread t2(&MockD3DSharingBase::checkIfMutexWasLocked, mockD3DSharing.get()); t1.join(); t2.join(); EXPECT_TRUE(mockD3DSharing->isLocked); } TEST(SharingD3DMT, givenD3DSharingWhenReleaseResourceIsCalledThenMtxIsLockedBeforeAccessingDevice) { auto mockCtx = std::make_unique(); mockCtx->sharingFunctions[MockD3DSharingFunctions::sharingId] = std::make_unique>(); auto mockD3DSharing = std::make_unique>(mockCtx.get()); UpdateData updateData(0); std::thread t1(&MockD3DSharingBase::releaseResource, mockD3DSharing.get(), nullptr, 0); while (!reinterpret_cast *>(mockD3DSharing->sharingFunctions)->signalDeviceContextCalled) ; std::thread t2(&MockD3DSharingBase::checkIfMutexWasLocked, mockD3DSharing.get()); t1.join(); t2.join(); EXPECT_TRUE(mockD3DSharing->isLocked); } compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/utilities/000077500000000000000000000000001422164147700255045ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/utilities/CMakeLists.txt000066400000000000000000000005701422164147700302460ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_mt_tests_utilities # local files ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt # necessary dependencies from igdrcl_tests ${CMAKE_CURRENT_SOURCE_DIR}/reference_tracked_object_tests_mt.cpp ) target_sources(igdrcl_mt_tests PRIVATE ${IGDRCL_SRCS_mt_tests_utilities}) reference_tracked_object_tests_mt.cpp000066400000000000000000000106151422164147700350370ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/mt_tests/utilities/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/utilities/reference_tracked_object.h" #include "gtest/gtest.h" #include #include namespace NEO { struct MockReferenceTrackedObject : ReferenceTrackedObject { MockReferenceTrackedObject(std::atomic &marker, std::atomic &flagInsideCustomDeleter, std::atomic &flagUseCustomDeleter, std::atomic &flagAfterBgDecRefCount) : marker(marker), flagInsideCustomDeleter(flagInsideCustomDeleter), flagUseCustomDeleter(flagUseCustomDeleter), flagAfterBgDecRefCount(flagAfterBgDecRefCount) { } using DeleterFuncType = void (*)(MockReferenceTrackedObject *); DeleterFuncType getCustomDeleter() const { if (flagUseCustomDeleter == false) { return nullptr; } flagInsideCustomDeleter = true; while (flagAfterBgDecRefCount == false) { } const_cast(this)->SetMarker(marker); return nullptr; } virtual void SetMarker(std::atomic &marker) { marker = GetMarker(); } static int GetMarker() { return 1; } std::atomic ▮ std::atomic &flagInsideCustomDeleter; std::atomic &flagUseCustomDeleter; std::atomic &flagAfterBgDecRefCount; }; struct MockReferenceTrackedObjectDerivative : MockReferenceTrackedObject { using MockReferenceTrackedObject::MockReferenceTrackedObject; void SetMarker(std::atomic &marker) override { marker = GetMarker(); } static int GetMarker() { return 2; } }; void DecRefCount(MockReferenceTrackedObject *obj, bool useInternalRefCount, std::atomic *flagInsideCustomDeleter, std::atomic *flagUseCustomDeleter, std::atomic *flagAfterBgDecRefCount) { while (*flagInsideCustomDeleter == false) { } *flagUseCustomDeleter = false; if (useInternalRefCount) { obj->decRefInternal(); } else { obj->decRefApi(); } *flagAfterBgDecRefCount = true; } TEST(ReferenceTrackedObject, whenDecreasingApiRefcountSimultaneouslyThenRetrieveProperCustomDeleterWhileObjectIsStillAlive) { ASSERT_NE(MockReferenceTrackedObjectDerivative::GetMarker(), MockReferenceTrackedObject::GetMarker()); std::atomic marker; std::atomic flagInsideCustomDeleter; std::atomic flagUseCustomDeleter; std::atomic flagAfterBgDecRefCount; marker = 0; flagInsideCustomDeleter = false; flagUseCustomDeleter = true; flagAfterBgDecRefCount = false; MockReferenceTrackedObjectDerivative *obj = new MockReferenceTrackedObjectDerivative(marker, flagInsideCustomDeleter, flagUseCustomDeleter, flagAfterBgDecRefCount); obj->incRefApi(); obj->incRefApi(); ASSERT_EQ(2, obj->getRefApiCount()); ASSERT_EQ(2, obj->getRefInternalCount()); ASSERT_EQ(0, marker); std::thread bgThread(DecRefCount, obj, false, &flagInsideCustomDeleter, &flagUseCustomDeleter, &flagAfterBgDecRefCount); obj->decRefApi(); bgThread.join(); EXPECT_EQ(MockReferenceTrackedObjectDerivative::GetMarker(), marker); } TEST(ReferenceTrackedObject, whenDecreasingInternalRefcountSimultaneouslyThenRetrieveProperCustomDeleterWhileObjectIsStillAlive) { ASSERT_NE(MockReferenceTrackedObjectDerivative::GetMarker(), MockReferenceTrackedObject::GetMarker()); std::atomic marker; std::atomic flagInsideCustomDeleter; std::atomic flagUseCustomDeleter; std::atomic flagAfterBgDecRefCount; marker = 0; flagInsideCustomDeleter = false; flagUseCustomDeleter = true; flagAfterBgDecRefCount = false; MockReferenceTrackedObjectDerivative *obj = new MockReferenceTrackedObjectDerivative(marker, flagInsideCustomDeleter, flagUseCustomDeleter, flagAfterBgDecRefCount); obj->incRefInternal(); obj->incRefInternal(); ASSERT_EQ(2, obj->getRefInternalCount()); ASSERT_EQ(0, obj->getRefApiCount()); ASSERT_EQ(0, marker); std::thread bgThread(DecRefCount, obj, true, &flagInsideCustomDeleter, &flagUseCustomDeleter, &flagAfterBgDecRefCount); obj->decRefInternal(); bgThread.join(); EXPECT_EQ(MockReferenceTrackedObjectDerivative::GetMarker(), marker); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/000077500000000000000000000000001422164147700251435ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/CMakeLists.txt000066400000000000000000000204721422164147700277100ustar00rootroot00000000000000# # Copyright (C) 2018-2022 Intel Corporation # # SPDX-License-Identifier: MIT # project(ocloc_tests) set(IGDRCL_SRCS_cloc ${OCLOC_DIRECTORY}/source/decoder/binary_decoder.cpp ${OCLOC_DIRECTORY}/source/decoder/binary_encoder.cpp ${OCLOC_DIRECTORY}/source/offline_compiler.cpp ${OCLOC_DIRECTORY}/source/offline_linker.cpp ${OCLOC_DIRECTORY}/source/ocloc_fatbinary.cpp ) set(IGDRCL_SRCS_offline_compiler_mock ${CMAKE_CURRENT_SOURCE_DIR}/decoder/mock/mock_decoder.h ${CMAKE_CURRENT_SOURCE_DIR}/decoder/mock/mock_encoder.h ${CMAKE_CURRENT_SOURCE_DIR}/decoder/mock/mock_iga_wrapper.h ${CMAKE_CURRENT_SOURCE_DIR}/mock/mock_argument_helper.h ${CMAKE_CURRENT_SOURCE_DIR}/mock/mock_multi_command.h ${CMAKE_CURRENT_SOURCE_DIR}/mock/mock_offline_compiler.h ${CMAKE_CURRENT_SOURCE_DIR}/mock/mock_offline_linker.h ${CMAKE_CURRENT_SOURCE_DIR}/mock/mock_sip_ocloc_tests.cpp ) set(CLOC_LIB_SRCS_UTILITIES ${OCLOC_DIRECTORY}/source/utilities/safety_caller.h ${OCLOC_DIRECTORY}/source/utilities/get_current_dir.h ) if(WIN32) list(APPEND CLOC_LIB_SRCS_UTILITIES ${OCLOC_DIRECTORY}/source/utilities/windows/safety_caller_windows.cpp ${OCLOC_DIRECTORY}/source/utilities/windows/safety_guard_windows.h ${OCLOC_DIRECTORY}/source/utilities/windows/seh_exception.cpp ${OCLOC_DIRECTORY}/source/utilities/windows/seh_exception.h ${OCLOC_DIRECTORY}/source/utilities/windows/get_current_dir_windows.cpp ) else() list(APPEND CLOC_LIB_SRCS_UTILITIES ${OCLOC_DIRECTORY}/source/utilities/linux/safety_caller_linux.cpp ${OCLOC_DIRECTORY}/source/utilities/linux/safety_guard_linux.h ${OCLOC_DIRECTORY}/source/utilities/linux/get_current_dir_linux.cpp ) endif() get_property(NEO_CORE_tests_compiler_mocks GLOBAL PROPERTY NEO_CORE_tests_compiler_mocks) set(IGDRCL_SRCS_offline_compiler_tests ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/decoder/decoder_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/decoder/encoder_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/environment.h ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ocloc_api_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ocloc_fatbinary_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ocloc_fatbinary_tests.h ${CMAKE_CURRENT_SOURCE_DIR}/ocloc_product_config_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ocloc_product_config_tests.h ${CMAKE_CURRENT_SOURCE_DIR}/ocloc_tests_configuration.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ocloc_validator_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/offline_compiler_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/offline_compiler_tests.h ${CMAKE_CURRENT_SOURCE_DIR}/offline_linker_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/offline_linker_tests.h ${NEO_SHARED_DIRECTORY}/helpers/abort.cpp ${NEO_SHARED_DIRECTORY}/helpers/file_io.cpp ${NEO_SHARED_DIRECTORY}/memory_manager/deferred_deleter.cpp ${NEO_SHARED_DIRECTORY}/memory_manager/deferred_deleter.h ${NEO_SHARED_TEST_DIRECTORY}/common/mocks/mock_compilers.cpp ${NEO_SHARED_TEST_DIRECTORY}/common/mocks/mock_compilers.h ${NEO_SHARED_TEST_DIRECTORY}/unit_test/device_binary_format/zebin_tests.h ${NEO_SHARED_TEST_DIRECTORY}/common/helpers/test_files.cpp ${NEO_SHARED_TEST_DIRECTORY}/common/libult/signal_utils.h ${NEO_SHARED_TEST_DIRECTORY}/common/test_macros/test_excludes.cpp ${IGDRCL_SRCS_cloc} ${IGDRCL_SRCS_offline_compiler_mock} ${NEO_CORE_tests_compiler_mocks} ${CLOC_LIB_SRCS_LIB} ${CLOC_LIB_SRCS_UTILITIES} ) append_sources_from_properties(IGDRCL_SRCS_offline_compiler_tests NEO_SRCS_ENABLE_CORE) if(WIN32) list(APPEND IGDRCL_SRCS_offline_compiler_tests ${NEO_SHARED_DIRECTORY}/os_interface/windows/os_thread_win.cpp ${NEO_SHARED_TEST_DIRECTORY}/common/os_interface/windows/signal_utils.cpp ) else() list(APPEND IGDRCL_SRCS_offline_compiler_tests ${NEO_SHARED_DIRECTORY}/os_interface/linux/os_thread_linux.cpp ${NEO_SHARED_DIRECTORY}/os_interface/linux/sys_calls_linux.cpp ${NEO_SHARED_TEST_DIRECTORY}/common/os_interface/linux/signal_utils.cpp ${OCLOC_DIRECTORY}/source/linux/os_library_ocloc_helper.cpp ) endif() link_directories(${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) add_executable(ocloc_tests ${IGDRCL_SRCS_offline_compiler_tests}) target_include_directories(ocloc_tests PRIVATE ${NEO_SHARED_TEST_DIRECTORY}/common/test_macros/header${BRANCH_DIR_SUFFIX} ${NEO_SHARED_TEST_DIRECTORY}/common/test_configuration/unit_tests $ ) target_compile_definitions(ocloc_tests PUBLIC MOCKABLE_VIRTUAL=virtual $ ${TESTED_CORE_FLAGS_DEFINITONS}) target_link_libraries(ocloc_tests gmock-gtest) if(WIN32) target_link_libraries(ocloc_tests dbghelp) endif() if(UNIX) target_link_libraries(ocloc_tests dl pthread) endif() get_property(CLOC_LIB_FOLDER TARGET ocloc_lib PROPERTY FOLDER) get_property(CLOC_LIB_COMPILE_FLAGS TARGET ocloc_lib PROPERTY COMPILE_FLAGS) set_property(TARGET ocloc_tests PROPERTY FOLDER ${CLOC_LIB_FOLDER}) set_property(TARGET ocloc_tests APPEND_STRING PROPERTY COMPILE_FLAGS ${CLOC_LIB_COMPILE_FLAGS}) string(TOLOWER ${DEFAULT_TESTED_PLATFORM} CLOC_LIB_DEFAULT_DEVICE) add_custom_target(run_ocloc_tests ALL DEPENDS ocloc_tests ) if(NOT NEO_DISABLE_BUILTINS_COMPILATION) add_dependencies(run_unit_tests run_ocloc_tests) add_dependencies(unit_tests ocloc_tests) endif() macro(macro_for_each_platform) if("${PLATFORM_IT_LOWER}" STREQUAL "${CLOC_LIB_DEFAULT_DEVICE}") foreach(PLATFORM_TYPE ${PLATFORM_TYPES}) if(${PLATFORM_IT}_IS_${PLATFORM_TYPE}) list(GET ${PLATFORM_TYPE}_${CORE_TYPE}_REVISIONS 0 REVISION_ID) get_family_name_with_type(${CORE_TYPE} ${PLATFORM_TYPE}) add_dependencies(run_ocloc_tests prepare_test_kernels_${family_name_with_type}_${REVISION_ID}) add_dependencies(unit_tests prepare_test_kernels_${family_name_with_type}_${REVISION_ID}) neo_copy_test_files(copy_test_files_${family_name_with_type}_${REVISION_ID} ${family_name_with_type}) add_dependencies(run_ocloc_tests copy_test_files_${family_name_with_type}_${REVISION_ID}) add_dependencies(unit_tests copy_test_files_${family_name_with_type}_${REVISION_ID}) set(run_tests_cmd ocloc_tests --device ${CLOC_LIB_DEFAULT_DEVICE} --family_type ${family_name_with_type} --rev_id ${REVISION_ID}) endif() endforeach() endif() endmacro() macro(macro_for_each_core_type) apply_macro_for_each_platform() endmacro() apply_macro_for_each_core_type("TESTED") set_property(TARGET run_ocloc_tests PROPERTY FOLDER ${CLOC_LIB_FOLDER}) if(WIN32) add_custom_command( TARGET run_ocloc_tests POST_BUILD COMMAND echo deleting offline compiler files and directories... COMMAND ${CMAKE_COMMAND} -E remove ${TargetDir}/${CLOC_LIB_DEFAULT_DEVICE}/copybuffer_${CLOC_LIB_DEFAULT_DEVICE}.bc COMMAND ${CMAKE_COMMAND} -E remove ${TargetDir}/${CLOC_LIB_DEFAULT_DEVICE}/copybuffer_${CLOC_LIB_DEFAULT_DEVICE}.gen COMMAND ${CMAKE_COMMAND} -E remove ${TargetDir}/${CLOC_LIB_DEFAULT_DEVICE}/copybuffer_${CLOC_LIB_DEFAULT_DEVICE}.bin COMMAND ${CMAKE_COMMAND} -E remove_directory ${TargetDir}/${CLOC_LIB_DEFAULT_DEVICE}/offline_compiler_test COMMAND ${run_tests_cmd} WORKING_DIRECTORY ${TargetDir} ) else() add_custom_command( TARGET run_ocloc_tests POST_BUILD COMMAND echo deleting offline compiler files and directories... COMMAND ${CMAKE_COMMAND} -E remove ${TargetDir}/${CLOC_LIB_DEFAULT_DEVICE}/*.bc COMMAND ${CMAKE_COMMAND} -E remove ${TargetDir}/${CLOC_LIB_DEFAULT_DEVICE}/*.gen COMMAND ${CMAKE_COMMAND} -E remove ${TargetDir}/${CLOC_LIB_DEFAULT_DEVICE}/*.ll COMMAND ${CMAKE_COMMAND} -E remove ${TargetDir}/${CLOC_LIB_DEFAULT_DEVICE}/*.bin COMMAND ${CMAKE_COMMAND} -E remove_directory "${TargetDir}/offline_compiler_test" COMMAND ${run_tests_cmd} WORKING_DIRECTORY ${TargetDir} ) endif() add_subdirectories() create_project_source_tree(ocloc_tests) compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/decoder/000077500000000000000000000000001422164147700265505ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/decoder/decoder_tests.cpp000066400000000000000000000354201422164147700321070ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/array_count.h" #include "shared/test/common/helpers/test_files.h" #include "opencl/test/unit_test/offline_compiler/mock/mock_argument_helper.h" #include "opencl/test/unit_test/test_files/patch_list.h" #include "gtest/gtest.h" #include "mock/mock_decoder.h" #include SProgramBinaryHeader createProgramBinaryHeader(const uint32_t numberOfKernels, const uint32_t patchListSize) { return SProgramBinaryHeader{MAGIC_CL, 0, 0, 0, numberOfKernels, 0, patchListSize}; } SKernelBinaryHeaderCommon createKernelBinaryHeaderCommon(const uint32_t kernelNameSize, const uint32_t patchListSize) { SKernelBinaryHeaderCommon kernelHeader = {}; kernelHeader.CheckSum = 0xFFFFFFFF; kernelHeader.ShaderHashCode = 0xFFFFFFFFFFFFFFFF; kernelHeader.KernelNameSize = kernelNameSize; kernelHeader.PatchListSize = patchListSize; return kernelHeader; } namespace NEO { TEST(DecoderTests, WhenParsingValidListOfParametersThenReturnValueIsZero) { std::vector args = { "ocloc", "decoder", "-file", "test_files/binary.bin", "-patch", "test_files/patch", "-dump", "test_files/created"}; MockDecoder decoder; EXPECT_EQ(0, decoder.validateInput(args)); } TEST(DecoderTests, GivenValidSizeStringWhenGettingSizeThenProperOutcomeIsExpectedAndExceptionIsNotThrown) { MockDecoder decoder; EXPECT_EQ(static_cast(1), decoder.getSize("uint8_t")); EXPECT_EQ(static_cast(2), decoder.getSize("uint16_t")); EXPECT_EQ(static_cast(4), decoder.getSize("uint32_t")); EXPECT_EQ(static_cast(8), decoder.getSize("uint64_t")); } TEST(DecoderTests, GivenProperStructWhenReadingStructFieldsThenFieldsVectorGetsPopulatedCorrectly) { std::vector lines; lines.push_back("/* */"); lines.push_back("struct SPatchSamplerStateArray :"); lines.push_back(" SPatchItemHeader"); lines.push_back("{"); lines.push_back(" uint64_t SomeField;"); lines.push_back(" uint32_t Offset;"); lines.push_back(""); lines.push_back(" uint16_t Count;"); lines.push_back(" uint8_t BorderColorOffset;"); lines.push_back("};"); std::vector fields; MockDecoder decoder; size_t pos = 4; uint32_t full_size = decoder.readStructFields(lines, pos, fields); EXPECT_EQ(static_cast(15), full_size); EXPECT_EQ(static_cast(8), fields[0].size); EXPECT_EQ("SomeField", fields[0].name); EXPECT_EQ(static_cast(4), fields[1].size); EXPECT_EQ("Offset", fields[1].name); EXPECT_EQ(static_cast(2), fields[2].size); EXPECT_EQ("Count", fields[2].name); EXPECT_EQ(static_cast(1), fields[3].size); EXPECT_EQ("BorderColorOffset", fields[3].name); } TEST(DecoderTests, GivenProperPatchListFileWhenParsingTokensThenFileIsParsedCorrectly) { MockDecoder decoder; decoder.pathToPatch = "test_files/"; decoder.parseTokens(); EXPECT_EQ(static_cast(28), (decoder.programHeader.size)); EXPECT_EQ(static_cast(4), (decoder.programHeader.fields[0].size)); EXPECT_EQ("Magic", (decoder.programHeader.fields[0].name)); EXPECT_EQ(static_cast(4), (decoder.programHeader.fields[1].size)); EXPECT_EQ("Version", (decoder.programHeader.fields[1].name)); EXPECT_EQ(static_cast(4), (decoder.programHeader.fields[2].size)); EXPECT_EQ("Device", (decoder.programHeader.fields[2].name)); EXPECT_EQ(static_cast(4), (decoder.programHeader.fields[3].size)); EXPECT_EQ("GPUPointerSizeInBytes", (decoder.programHeader.fields[3].name)); EXPECT_EQ(static_cast(4), (decoder.programHeader.fields[4].size)); EXPECT_EQ("NumberOfKernels", (decoder.programHeader.fields[4].name)); EXPECT_EQ(static_cast(4), (decoder.programHeader.fields[5].size)); EXPECT_EQ("SteppingId", (decoder.programHeader.fields[5].name)); EXPECT_EQ(static_cast(4), (decoder.programHeader.fields[6].size)); EXPECT_EQ("PatchListSize", (decoder.programHeader.fields[6].name)); EXPECT_EQ(static_cast(40), (decoder.kernelHeader.size)); EXPECT_EQ(static_cast(4), (decoder.kernelHeader.fields[0].size)); EXPECT_EQ("CheckSum", (decoder.kernelHeader.fields[0].name)); EXPECT_EQ(static_cast(8), (decoder.kernelHeader.fields[1].size)); EXPECT_EQ("ShaderHashCode", (decoder.kernelHeader.fields[1].name)); EXPECT_EQ(static_cast(4), (decoder.kernelHeader.fields[2].size)); EXPECT_EQ("KernelNameSize", (decoder.kernelHeader.fields[2].name)); EXPECT_EQ(static_cast(4), (decoder.kernelHeader.fields[3].size)); EXPECT_EQ("PatchListSize", (decoder.kernelHeader.fields[3].name)); EXPECT_EQ(static_cast(4), (decoder.kernelHeader.fields[4].size)); EXPECT_EQ("KernelHeapSize", (decoder.kernelHeader.fields[4].name)); EXPECT_EQ(static_cast(4), (decoder.kernelHeader.fields[5].size)); EXPECT_EQ("GeneralStateHeapSize", (decoder.kernelHeader.fields[5].name)); EXPECT_EQ(static_cast(4), (decoder.kernelHeader.fields[6].size)); EXPECT_EQ("DynamicStateHeapSize", (decoder.kernelHeader.fields[6].name)); EXPECT_EQ(static_cast(4), (decoder.kernelHeader.fields[7].size)); EXPECT_EQ("SurfaceStateHeapSize", (decoder.kernelHeader.fields[7].name)); EXPECT_EQ(static_cast(4), (decoder.kernelHeader.fields[8].size)); EXPECT_EQ("KernelUnpaddedSize", (decoder.kernelHeader.fields[8].name)); EXPECT_EQ(static_cast(4), (decoder.patchTokens[2]->size)); EXPECT_EQ("PATCH_TOKEN_STATE_SIP", (decoder.patchTokens[2]->name)); EXPECT_EQ(static_cast(4), (decoder.patchTokens[2]->fields[0].size)); EXPECT_EQ("SystemKernelOffset", (decoder.patchTokens[2]->fields[0].name)); EXPECT_EQ(static_cast(12), decoder.patchTokens[5]->size); EXPECT_EQ("PATCH_TOKEN_SAMPLER_STATE_ARRAY", decoder.patchTokens[5]->name); EXPECT_EQ(static_cast(4), (decoder.patchTokens[5]->fields[0].size)); EXPECT_EQ("Offset", (decoder.patchTokens[5]->fields[0].name)); EXPECT_EQ(static_cast(4), (decoder.patchTokens[5]->fields[1].size)); EXPECT_EQ("Count", (decoder.patchTokens[5]->fields[1].name)); EXPECT_EQ(static_cast(4), (decoder.patchTokens[5]->fields[2].size)); EXPECT_EQ("BorderColorOffset", (decoder.patchTokens[5]->fields[2].name)); EXPECT_EQ(static_cast(8), decoder.patchTokens[42]->size); EXPECT_EQ("PATCH_TOKEN_ALLOCATE_CONSTANT_MEMORY_SURFACE_PROGRAM_BINARY_INFO", decoder.patchTokens[42]->name); EXPECT_EQ(static_cast(4), (decoder.patchTokens[42]->fields[0].size)); EXPECT_EQ("ConstantBufferIndex", (decoder.patchTokens[42]->fields[0].name)); EXPECT_EQ(static_cast(4), (decoder.patchTokens[42]->fields[1].size)); EXPECT_EQ("InlineDataSize", (decoder.patchTokens[42]->fields[1].name)); EXPECT_EQ(static_cast(4), decoder.patchTokens[19]->size); EXPECT_EQ("PATCH_TOKEN_MEDIA_INTERFACE_DESCRIPTOR_LOAD", decoder.patchTokens[19]->name); EXPECT_EQ(static_cast(4), (decoder.patchTokens[19]->fields[0].size)); EXPECT_EQ("InterfaceDescriptorDataOffset", (decoder.patchTokens[19]->fields[0].name)); } TEST(DecoderTests, WhenPathToPatchTokensNotProvidedThenUseDefaults) { MockDecoder decoder; decoder.pathToPatch = ""; decoder.parseTokens(); EXPECT_NE(0U, decoder.programHeader.size); EXPECT_NE(0U, decoder.kernelHeader.size); } TEST(DecoderTests, GivenValidBinaryWhenReadingPatchTokensFromBinaryThenBinaryIsReadCorrectly) { std::string binaryString; std::stringstream binarySS; uint8_t byte; uint32_t byte4; byte4 = 4; binarySS.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 16; binarySS.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 1234; binarySS.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 5678; binarySS.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 2; binarySS.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 12; binarySS.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte = 255; for (auto i = 0; i < 4; ++i) { binarySS.write(reinterpret_cast(&byte), sizeof(uint8_t)); } binaryString = binarySS.str(); std::vector binary(binaryString.begin(), binaryString.end()); MockDecoder decoder; std::stringstream out; auto PTptr = std::make_unique(); PTptr->size = 20; PTptr->name = "Example patchtoken"; PTptr->fields.push_back(PTField{4, "First"}); PTptr->fields.push_back(PTField{4, "Second"}); decoder.patchTokens.insert(std::pair>(4, std::move(PTptr))); const void *ptr = reinterpret_cast(binary.data()); decoder.readPatchTokens(ptr, 28, out); std::string s = "Example patchtoken:\n\t4 Token 4\n\t4 Size 16\n\t4 First 1234\n\t4 Second 5678\nUnidentified PatchToken:\n\t4 Token 2\n\t4 Size 12\n\tHex ff ff ff ff\n"; EXPECT_EQ(s, out.str()); } TEST(DecoderTests, GivenValidBinaryWithoutPatchTokensWhenProcessingBinaryThenBinaryIsReadCorrectly) { auto programHeader = createProgramBinaryHeader(1, 0); std::string kernelName("ExampleKernel"); auto kernelHeader = createKernelBinaryHeaderCommon(static_cast(kernelName.size() + 1), 0); std::stringstream binarySS; binarySS.write(reinterpret_cast(&programHeader), sizeof(SProgramBinaryHeader)); binarySS.write(reinterpret_cast(&kernelHeader), sizeof(SKernelBinaryHeaderCommon)); binarySS.write(kernelName.c_str(), kernelHeader.KernelNameSize); std::stringstream ptmFile; MockDecoder decoder; decoder.pathToPatch = "test_files/"; decoder.pathToDump = "non_existing_folder/"; decoder.parseTokens(); std::string binaryString = binarySS.str(); std::vector binary(binaryString.begin(), binaryString.end()); const void *ptr = reinterpret_cast(binary.data()); int retVal = decoder.processBinary(ptr, ptmFile); EXPECT_EQ(0, retVal); std::string expectedOutput = "ProgramBinaryHeader:\n\t4 Magic 1229870147\n\t4 Version 0\n\t4 Device 0\n\t4 GPUPointerSizeInBytes 0\n\t4 NumberOfKernels 1\n\t4 SteppingId 0\n\t4 PatchListSize 0\nKernel #0\nKernelBinaryHeader:\n\t4 CheckSum 4294967295\n\t8 ShaderHashCode 18446744073709551615\n\t4 KernelNameSize 14\n\t4 PatchListSize 0\n\t4 KernelHeapSize 0\n\t4 GeneralStateHeapSize 0\n\t4 DynamicStateHeapSize 0\n\t4 SurfaceStateHeapSize 0\n\t4 KernelUnpaddedSize 0\n\tKernelName ExampleKernel\n"; EXPECT_EQ(expectedOutput, ptmFile.str()); } TEST(DecoderTests, GivenValidBinaryWhenProcessingBinaryThenProgramAndKernelAndPatchTokensAreReadCorrectly) { std::stringstream binarySS; //ProgramBinaryHeader auto programHeader = createProgramBinaryHeader(1, 30); binarySS.write(reinterpret_cast(&programHeader), sizeof(SProgramBinaryHeader)); //PATCH_TOKEN_ALLOCATE_CONSTANT_MEMORY_SURFACE_PROGRAM_BINARY_INFO SPatchAllocateConstantMemorySurfaceProgramBinaryInfo patchAllocateConstantMemory; patchAllocateConstantMemory.Token = 42; patchAllocateConstantMemory.Size = 16; patchAllocateConstantMemory.ConstantBufferIndex = 0; patchAllocateConstantMemory.InlineDataSize = 14; binarySS.write(reinterpret_cast(&patchAllocateConstantMemory), sizeof(patchAllocateConstantMemory)); //InlineData for (uint8_t i = 0; i < 14; ++i) { binarySS.write(reinterpret_cast(&i), sizeof(uint8_t)); } //KernelBinaryHeader std::string kernelName("ExampleKernel"); auto kernelHeader = createKernelBinaryHeaderCommon(static_cast(kernelName.size() + 1), 12); binarySS.write(reinterpret_cast(&kernelHeader), sizeof(SKernelBinaryHeaderCommon)); binarySS.write(kernelName.c_str(), kernelHeader.KernelNameSize); //PATCH_TOKEN_MEDIA_INTERFACE_DESCRIPTOR_LOAD SPatchMediaInterfaceDescriptorLoad patchMediaInterfaceDescriptorLoad; patchMediaInterfaceDescriptorLoad.Token = 19; patchMediaInterfaceDescriptorLoad.Size = 12; patchMediaInterfaceDescriptorLoad.InterfaceDescriptorDataOffset = 0; binarySS.write(reinterpret_cast(&patchMediaInterfaceDescriptorLoad), sizeof(SPatchMediaInterfaceDescriptorLoad)); std::string binaryString = binarySS.str(); std::vector binary(binaryString.begin(), binaryString.end()); std::stringstream ptmFile; MockDecoder decoder; decoder.pathToPatch = "test_files/"; decoder.pathToDump = "non_existing_folder/"; decoder.parseTokens(); const void *ptr = reinterpret_cast(binary.data()); int retVal = decoder.processBinary(ptr, ptmFile); EXPECT_EQ(0, retVal); std::string expectedOutput = "ProgramBinaryHeader:\n\t4 Magic 1229870147\n\t4 Version 0\n\t4 Device 0\n\t4 GPUPointerSizeInBytes 0\n\t4 NumberOfKernels 1\n\t4 SteppingId 0\n\t4 PatchListSize 30\nPATCH_TOKEN_ALLOCATE_CONSTANT_MEMORY_SURFACE_PROGRAM_BINARY_INFO:\n\t4 Token 42\n\t4 Size 16\n\t4 ConstantBufferIndex 0\n\t4 InlineDataSize 14\n\tHex 0 1 2 3 4 5 6 7 8 9 a b c d\nKernel #0\nKernelBinaryHeader:\n\t4 CheckSum 4294967295\n\t8 ShaderHashCode 18446744073709551615\n\t4 KernelNameSize 14\n\t4 PatchListSize 12\n\t4 KernelHeapSize 0\n\t4 GeneralStateHeapSize 0\n\t4 DynamicStateHeapSize 0\n\t4 SurfaceStateHeapSize 0\n\t4 KernelUnpaddedSize 0\n\tKernelName ExampleKernel\nPATCH_TOKEN_MEDIA_INTERFACE_DESCRIPTOR_LOAD:\n\t4 Token 19\n\t4 Size 12\n\t4 InterfaceDescriptorDataOffset 0\n"; EXPECT_EQ(expectedOutput, ptmFile.str()); EXPECT_TRUE(decoder.getMockIga()->disasmWasCalled); EXPECT_FALSE(decoder.getMockIga()->asmWasCalled); } TEST(DecoderTests, givenNonPatchtokensBinaryFormatWhenTryingToGetDevBinaryFormatThenDoNotReturnRawData) { MockDecoder decoder; std::map files; auto mockArgHelper = std::make_unique(files); decoder.argHelper = mockArgHelper.get(); files["mockgen.gen"] = "NOTMAGIC\n\n\n\n\n\n\n"; decoder.binaryFile = "mockgen.gen"; auto data = decoder.getDevBinary(); EXPECT_EQ(nullptr, data); } TEST(DecoderTests, givenPatchtokensBinaryFormatWhenTryingToGetDevBinaryThenRawDataIsReturned) { MockDecoder decoder; std::map files; auto mockArgHelper = std::make_unique(files); decoder.argHelper = mockArgHelper.get(); size_t dataSize = 11u; files["mockgen.gen"] = "CTNI\n\n\n\n\n\n\n"; decoder.binaryFile = "mockgen.gen"; auto data = decoder.getDevBinary(); std::string dataString(static_cast(data), dataSize); EXPECT_STREQ("CTNI\n\n\n\n\n\n\n", dataString.c_str()); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/decoder/encoder_tests.cpp000066400000000000000000000461211422164147700321210ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/offline_compiler/source/decoder/binary_decoder.h" #include "shared/source/helpers/array_count.h" #include "shared/test/common/helpers/test_files.h" #include "gtest/gtest.h" #include "mock/mock_encoder.h" #include namespace NEO { TEST(EncoderTests, WhenParsingValidListOfParametersThenReturnValueIsZero) { std::vector args = { "ocloc", "asm", "-dump", "test_files/dump", "-out", "test_files/binary_gen.bin"}; MockEncoder encoder; EXPECT_EQ(0, encoder.validateInput(args)); } TEST(EncoderTests, WhenMissingParametersThenErrorCodeIsReturned) { std::vector args = { "ocloc", "asm", "-dump", "test_files/dump", "-out"}; MockEncoder encoder; EXPECT_NE(0, encoder.validateInput(args)); } TEST(EncoderTests, GivenWrongParametersWhenParsingParametersThenErrorCodeIsReturned) { std::vector args = { "ocloc", "asm", "-dump", "", "-out", "rasputin"}; MockEncoder encoder; EXPECT_NE(0, encoder.validateInput(args)); } TEST(EncoderTests, WhenTryingToCopyNonExistingFileThenErrorCodeIsReturned) { MockEncoder encoder; std::stringstream ss; auto retVal = encoder.copyBinaryToBinary("test_files/non_existing.bin", ss); EXPECT_FALSE(retVal); } TEST(EncoderTests, WhenWritingValuesToBinaryThenValuesAreWrittenCorrectly) { MockEncoder encoder; std::stringstream in; std::stringstream out; in.str("255 255 255 255"); std::string s = in.str(); encoder.write(in, out); uint8_t val1; out.read(reinterpret_cast(&val1), sizeof(uint8_t)); ASSERT_EQ(static_cast(255), val1); encoder.write(in, out); uint16_t val2; out.read(reinterpret_cast(&val2), sizeof(uint16_t)); ASSERT_EQ(static_cast(255), val2); encoder.write(in, out); uint32_t val3; out.read(reinterpret_cast(&val3), sizeof(uint32_t)); ASSERT_EQ(static_cast(255), val3); encoder.write(in, out); uint64_t val4; out.read(reinterpret_cast(&val4), sizeof(uint64_t)); ASSERT_EQ(static_cast(255), val4); } TEST(EncoderTests, GivenProperPTMFileFormatWhenWritingToBinaryThenValuesAreWrittenCorrectly) { MockEncoder encoder; std::stringstream out; out.str(""); std::string s = "ProgramBinaryHeader:"; int retVal = encoder.writeDeviceBinary(s, out); ASSERT_EQ(0, retVal); ASSERT_EQ("", out.str()); s = "Hex 48 65 6c 6c 6f 20 77 6f 72 6c 64"; retVal = encoder.writeDeviceBinary(s, out); ASSERT_EQ(0, retVal); ASSERT_EQ("Hello world", out.str()); s = "1 CheckOne 220"; out.str(""); retVal = encoder.writeDeviceBinary(s, out); ASSERT_EQ(0, retVal); uint8_t val1; out.read(reinterpret_cast(&val1), sizeof(uint8_t)); ASSERT_EQ(static_cast(220), val1); s = "2 CheckTwo 2428"; out.str(""); retVal = encoder.writeDeviceBinary(s, out); ASSERT_EQ(0, retVal); uint16_t val2; out.read(reinterpret_cast(&val2), sizeof(uint16_t)); ASSERT_EQ(static_cast(2428), val2); s = "4 CheckThree 242806820"; out.str(""); retVal = encoder.writeDeviceBinary(s, out); ASSERT_EQ(retVal, 0); uint32_t val3; out.read(reinterpret_cast(&val3), sizeof(uint32_t)); ASSERT_EQ(static_cast(242806820), val3); s = "8 CheckFour 242806820"; out.str(""); retVal = encoder.writeDeviceBinary(s, out); ASSERT_EQ(retVal, 0); uint64_t val4; out.read(reinterpret_cast(&val4), sizeof(uint64_t)); ASSERT_EQ(static_cast(242806820), val4); } TEST(EncoderTests, GivenImproperPTMFIleFormatWhenWritingToBinaryThenErrorCodeIsReturned) { std::string s = "3 UnknownSize 41243"; std::stringstream out(""); MockEncoder encoder; int retVal = encoder.writeDeviceBinary(s, out); ASSERT_EQ(-1, retVal); } TEST(EncoderTests, GivenIncorrectPatchListSizeWhileCalculatingPatchListSizeThenPatchListSizeIsSetToCorrectValue) { std::vector ptmFile; ptmFile.push_back("ProgramBinaryHeader:"); ptmFile.push_back("\t4 Magic 1229870147"); ptmFile.push_back("\t4 PatchListSize 14"); ptmFile.push_back("PATCH_TOKEN_ALLOCATE_CONSTANT_MEMORY_SURFACE_PROGRAM_BINARY_INFO:"); ptmFile.push_back("\t8 Token 42"); ptmFile.push_back("\t4 Size 16"); ptmFile.push_back("\t1 ConstantBufferIndex 0"); ptmFile.push_back("\t2 InlineDataSize 14"); ptmFile.push_back("\tHex 48 65 6c 6c 6f 20 77 6f 72 6c 64 21 a 0"); ptmFile.push_back("Kernel #0"); MockEncoder encoder; encoder.calculatePatchListSizes(ptmFile); EXPECT_EQ("\t4 PatchListSize 29", ptmFile[2]); } TEST(EncoderTests, GivenCorrectPTMFileWhileProcessingThenCorrectProgramHeaderExpected) { std::stringstream expectedBinary; uint8_t byte; uint32_t byte4; byte4 = 1229870147; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 1042; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 12; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 4; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 1; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 2; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 18; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 42; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 16; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 0; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 2; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte = 0x48; expectedBinary.write(reinterpret_cast(&byte), sizeof(uint8_t)); byte = 0x65; expectedBinary.write(reinterpret_cast(&byte), sizeof(uint8_t)); std::vector ptmFile; ptmFile.push_back("ProgramBinaryHeader:"); ptmFile.push_back("\t4 Magic 1229870147"); ptmFile.push_back("\t4 Version 1042"); ptmFile.push_back("\t4 Device 12"); ptmFile.push_back("\t4 GPUPointerSizeInBytes 4"); ptmFile.push_back("\t4 NumberOfKernels 1"); ptmFile.push_back("\t4 SteppingId 2"); ptmFile.push_back("\t4 PatchListSize 18"); ptmFile.push_back("PATCH_TOKEN_ALLOCATE_CONSTANT_MEMORY_SURFACE_PROGRAM_BINARY_INFO:"); ptmFile.push_back("\t4 Token 42"); ptmFile.push_back("\t4 Size 16"); ptmFile.push_back("\t4 ConstantBufferIndex 0"); ptmFile.push_back("\t4 InlineDataSize 2"); ptmFile.push_back("\tHex 48 65"); ptmFile.push_back("Kernel #0"); ptmFile.push_back("KernelBinaryHeader:"); ptmFile.push_back("\t4 CheckSum 2316678223"); ptmFile.push_back("\t8 ShaderHashCode 4988534869940066475"); ptmFile.push_back("\t4 KernelNameSize 12"); ptmFile.push_back("\t4 PatchListSize 0"); ptmFile.push_back("\t4 KernelHeapSize 0"); ptmFile.push_back("\t4 GeneralStateHeapSize 0"); ptmFile.push_back("\t4 DynamicStateHeapSize 0"); ptmFile.push_back("\t4 KernelUnpaddedSize 520"); ptmFile.push_back("\tKernelName kernel"); std::stringstream binary; MockEncoder().processBinary(ptmFile, binary); EXPECT_EQ(expectedBinary.str(), binary.str()); } TEST(EncoderTests, WhenAddPaddingIsCalledThenProperNumberOfZerosIsAdded) { std::stringstream stream; stream << "aa"; MockEncoder().addPadding(stream, 8); std::string asString = stream.str(); ASSERT_EQ(10U, asString.size()); char expected[] = {'a', 'a', 0, 0, 0, 0, 0, 0, 0, 0}; EXPECT_EQ(0, memcmp(asString.c_str(), expected, 10U)); } TEST(EncoderTests, WhenProcessingDeviceBinaryThenProperChecksumIsCalculated) { std::stringstream expectedBinary; uint8_t byte; uint32_t byte4; uint64_t byte8; MockEncoder encoder; std::string kernelName = "kernel"; encoder.filesMap["kernel_DynamicStateHeap.bin"] = std::string(16, 2); encoder.filesMap["kernel_KernelHeap.dat"] = std::string(16, 4); encoder.filesMap["kernel_SurfaceStateHeap.bin"] = std::string(16, 8); std::stringstream kernelBlob; kernelBlob << kernelName; kernelBlob.write(encoder.filesMap["kernel_KernelHeap.dat"].data(), encoder.filesMap["kernel_KernelHeap.dat"].size()); encoder.addPadding(kernelBlob, 128); // isa prefetch padding encoder.addPadding(kernelBlob, 64 - (encoder.filesMap["kernel_KernelHeap.dat"].size() + 128) % 64); // isa alignment size_t kernelHeapSize = encoder.filesMap["kernel_KernelHeap.dat"].size(); kernelHeapSize = alignUp(kernelHeapSize + 128, 64); kernelBlob.write(encoder.filesMap["kernel_DynamicStateHeap.bin"].data(), encoder.filesMap["kernel_DynamicStateHeap.bin"].size()); kernelBlob.write(encoder.filesMap["kernel_SurfaceStateHeap.bin"].data(), encoder.filesMap["kernel_SurfaceStateHeap.bin"].size()); auto kernelBlobData = kernelBlob.str(); uint64_t hashValue = NEO::Hash::hash(reinterpret_cast(kernelBlobData.data()), kernelBlobData.size()); uint32_t checksum = hashValue & 0xFFFFFFFF; byte4 = 1229870147; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 1042; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 12; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 4; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 1; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 2; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 18; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 42; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 16; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 0; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 2; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte = 0x48; expectedBinary.write(reinterpret_cast(&byte), sizeof(uint8_t)); byte = 0x65; expectedBinary.write(reinterpret_cast(&byte), sizeof(uint8_t)); byte4 = checksum; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte8 = 4988534869940066475; expectedBinary.write(reinterpret_cast(&byte8), sizeof(uint64_t)); byte4 = static_cast(kernelName.size()); expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 0; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = static_cast(kernelHeapSize); expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 0; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 16; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = static_cast(encoder.filesMap["kernel_KernelHeap.dat"].size()); expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); expectedBinary.write(kernelName.c_str(), kernelName.length()); expectedBinary.write(encoder.filesMap["kernel_KernelHeap.dat"].data(), encoder.filesMap["kernel_KernelHeap.dat"].size()); std::vector ptmFile; ptmFile.push_back("ProgramBinaryHeader:"); ptmFile.push_back("\t4 Magic 1229870147"); ptmFile.push_back("\t4 Version 1042"); ptmFile.push_back("\t4 Device 12"); ptmFile.push_back("\t4 GPUPointerSizeInBytes 4"); ptmFile.push_back("\t4 NumberOfKernels 1"); ptmFile.push_back("\t4 SteppingId 2"); ptmFile.push_back("\t4 PatchListSize 18"); ptmFile.push_back("PATCH_TOKEN_ALLOCATE_CONSTANT_MEMORY_SURFACE_PROGRAM_BINARY_INFO:"); ptmFile.push_back("\t4 Token 42"); ptmFile.push_back("\t4 Size 16"); ptmFile.push_back("\t4 ConstantBufferIndex 0"); ptmFile.push_back("\t4 InlineDataSize 2"); ptmFile.push_back("\tHex 48 65"); ptmFile.push_back("Kernel #0"); ptmFile.push_back("KernelBinaryHeader:"); ptmFile.push_back("\t4 CheckSum 0"); ptmFile.push_back("\t8 ShaderHashCode 4988534869940066475"); ptmFile.push_back("\t4 KernelNameSize " + std::to_string(kernelName.size())); ptmFile.push_back("\t4 PatchListSize 0"); ptmFile.push_back("\t4 KernelHeapSize 16"); ptmFile.push_back("\t4 GeneralStateHeapSize 0"); ptmFile.push_back("\t4 DynamicStateHeapSize 16"); ptmFile.push_back("\t4 KernelUnpaddedSize 16"); ptmFile.push_back("\tKernelName " + kernelName); std::stringstream result; auto ret = encoder.processBinary(ptmFile, result); auto resultAsString = result.str(); EXPECT_EQ(0, ret); auto expectedBinaryAsString = expectedBinary.str(); resultAsString.resize(expectedBinaryAsString.size()); // don't test beyond kernel header EXPECT_EQ(expectedBinaryAsString, resultAsString); EXPECT_FALSE(encoder.getMockIga()->disasmWasCalled); EXPECT_FALSE(encoder.getMockIga()->asmWasCalled); } TEST(EncoderTests, WhenProcessingDeviceBinaryAndAsmIsAvailableThenAseembleItWithIga) { std::stringstream expectedBinary; uint8_t byte; uint32_t byte4; uint64_t byte8; MockEncoder encoder; encoder.getMockIga()->binaryToReturn = std::string(32, 13); std::string kernelName = "kernel"; encoder.filesMap["kernel_DynamicStateHeap.bin"] = std::string(16, 2); encoder.filesMap["kernel_KernelHeap.dat"] = std::string(16, 4); encoder.filesMap["kernel_KernelHeap.asm"] = std::string(16, 7); encoder.filesMap["kernel_SurfaceStateHeap.bin"] = std::string(16, 8); std::stringstream kernelBlob; kernelBlob << kernelName; kernelBlob.write(encoder.getMockIga()->binaryToReturn.c_str(), encoder.getMockIga()->binaryToReturn.size()); encoder.addPadding(kernelBlob, 128); // isa prefetch padding encoder.addPadding(kernelBlob, 64 - (encoder.getMockIga()->binaryToReturn.size() + 128) % 64); // isa alignment size_t kernelHeapSize = encoder.getMockIga()->binaryToReturn.size(); kernelHeapSize = alignUp(kernelHeapSize + 128, 64); kernelBlob.write(encoder.filesMap["kernel_DynamicStateHeap.bin"].data(), encoder.filesMap["kernel_DynamicStateHeap.bin"].size()); kernelBlob.write(encoder.filesMap["kernel_SurfaceStateHeap.bin"].data(), encoder.filesMap["kernel_SurfaceStateHeap.bin"].size()); auto kernelBlobData = kernelBlob.str(); uint64_t hashValue = NEO::Hash::hash(reinterpret_cast(kernelBlobData.data()), kernelBlobData.size()); uint32_t checksum = hashValue & 0xFFFFFFFF; byte4 = 1229870147; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 1042; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 12; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 4; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 1; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 2; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 18; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 42; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 16; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 0; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 2; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte = 0x48; expectedBinary.write(reinterpret_cast(&byte), sizeof(uint8_t)); byte = 0x65; expectedBinary.write(reinterpret_cast(&byte), sizeof(uint8_t)); byte4 = checksum; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte8 = 4988534869940066475; expectedBinary.write(reinterpret_cast(&byte8), sizeof(uint64_t)); byte4 = static_cast(kernelName.size()); expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 0; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = static_cast(kernelHeapSize); expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 0; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = static_cast(16); expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = static_cast(encoder.getMockIga()->binaryToReturn.size()); expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); expectedBinary.write(kernelName.c_str(), kernelName.length()); expectedBinary.write(encoder.getMockIga()->binaryToReturn.data(), encoder.getMockIga()->binaryToReturn.size()); std::vector ptmFile; ptmFile.push_back("ProgramBinaryHeader:"); ptmFile.push_back("\t4 Magic 1229870147"); ptmFile.push_back("\t4 Version 1042"); ptmFile.push_back("\t4 Device 12"); ptmFile.push_back("\t4 GPUPointerSizeInBytes 4"); ptmFile.push_back("\t4 NumberOfKernels 1"); ptmFile.push_back("\t4 SteppingId 2"); ptmFile.push_back("\t4 PatchListSize 18"); ptmFile.push_back("PATCH_TOKEN_ALLOCATE_CONSTANT_MEMORY_SURFACE_PROGRAM_BINARY_INFO:"); ptmFile.push_back("\t4 Token 42"); ptmFile.push_back("\t4 Size 16"); ptmFile.push_back("\t4 ConstantBufferIndex 0"); ptmFile.push_back("\t4 InlineDataSize 2"); ptmFile.push_back("\tHex 48 65"); ptmFile.push_back("Kernel #0"); ptmFile.push_back("KernelBinaryHeader:"); ptmFile.push_back("\t4 CheckSum 0"); ptmFile.push_back("\t8 ShaderHashCode 4988534869940066475"); ptmFile.push_back("\t4 KernelNameSize " + std::to_string(kernelName.size())); ptmFile.push_back("\t4 PatchListSize 0"); ptmFile.push_back("\t4 KernelHeapSize 16"); ptmFile.push_back("\t4 GeneralStateHeapSize 0"); ptmFile.push_back("\t4 DynamicStateHeapSize 16"); ptmFile.push_back("\t4 KernelUnpaddedSize 16"); ptmFile.push_back("\tKernelName " + kernelName); std::stringstream result; auto ret = encoder.processBinary(ptmFile, result); auto resultAsString = result.str(); EXPECT_EQ(0, ret); auto expectedBinaryAsString = expectedBinary.str(); resultAsString.resize(expectedBinaryAsString.size()); // don't test beyond kernel header EXPECT_EQ(expectedBinaryAsString, resultAsString); EXPECT_FALSE(encoder.getMockIga()->disasmWasCalled); EXPECT_TRUE(encoder.getMockIga()->asmWasCalled); EXPECT_EQ(encoder.filesMap["kernel_KernelHeap.asm"], encoder.getMockIga()->receivedAsm); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/decoder/mock/000077500000000000000000000000001422164147700275015ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/decoder/mock/mock_decoder.h000066400000000000000000000026531422164147700322760ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/offline_compiler/source/decoder/binary_decoder.h" #include "mock_iga_wrapper.h" struct MockDecoder : public BinaryDecoder { MockDecoder() : MockDecoder("", "", "") { } MockDecoder(const std::string &file, const std::string &patch, const std::string &dump) : BinaryDecoder(file, patch, dump) { this->iga.reset(new MockIgaWrapper); oclocArgHelperWithoutInput = std::make_unique(); argHelper = oclocArgHelperWithoutInput.get(); argHelper->getPrinterRef() = MessagePrinter(true); }; using BinaryDecoder::argHelper; using BinaryDecoder::binaryFile; using BinaryDecoder::decode; using BinaryDecoder::getDevBinary; using BinaryDecoder::getSize; using BinaryDecoder::iga; using BinaryDecoder::kernelHeader; using BinaryDecoder::parseTokens; using BinaryDecoder::patchTokens; using BinaryDecoder::pathToDump; using BinaryDecoder::pathToPatch; using BinaryDecoder::processBinary; using BinaryDecoder::processKernel; using BinaryDecoder::programHeader; using BinaryDecoder::readPatchTokens; using BinaryDecoder::readStructFields; std::unique_ptr oclocArgHelperWithoutInput; MockIgaWrapper *getMockIga() const { return static_cast(iga.get()); } }; compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/decoder/mock/mock_encoder.h000066400000000000000000000036051422164147700323060ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/offline_compiler/source/decoder/binary_encoder.h" #include "shared/source/helpers/hash.h" #include "opencl/test/unit_test/offline_compiler/mock/mock_argument_helper.h" #include "mock_iga_wrapper.h" #include #include struct MockEncoder : public BinaryEncoder { MockEncoder() : MockEncoder("", ""){}; MockEncoder(const std::string &dump, const std::string &elf) : BinaryEncoder(dump, elf) { this->iga.reset(new MockIgaWrapper); oclocArgHelperWithoutInput = std::make_unique(filesMap); argHelper = oclocArgHelperWithoutInput.get(); argHelper->getPrinterRef() = MessagePrinter(true); }; std::map filesMap; bool copyBinaryToBinary(const std::string &srcFileName, std::ostream &outBinary, uint32_t *binaryLength) override { auto it = filesMap.find(srcFileName); if (it == filesMap.end()) { return false; } outBinary.write(it->second.c_str(), it->second.size()); if (binaryLength != nullptr) { *binaryLength = static_cast(it->second.size()); } return true; } using BinaryEncoder::addPadding; using BinaryEncoder::calculatePatchListSizes; using BinaryEncoder::copyBinaryToBinary; using BinaryEncoder::createElf; using BinaryEncoder::elfName; using BinaryEncoder::encode; using BinaryEncoder::iga; using BinaryEncoder::pathToDump; using BinaryEncoder::processBinary; using BinaryEncoder::processKernel; using BinaryEncoder::write; using BinaryEncoder::writeDeviceBinary; std::unique_ptr oclocArgHelperWithoutInput; MockIgaWrapper *getMockIga() const { return static_cast(iga.get()); } }; compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/decoder/mock/mock_iga_wrapper.h000066400000000000000000000023051422164147700331630ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/offline_compiler/source/decoder/iga_wrapper.h" #include #include struct MockIgaWrapper : public IgaWrapper { bool tryDisassembleGenISA(const void *kernelPtr, uint32_t kernelSize, std::string &out) override { out = asmToReturn; disasmWasCalled = true; receivedBinary.assign(reinterpret_cast(kernelPtr), kernelSize); return asmToReturn.size() != 0; } bool tryAssembleGenISA(const std::string &inAsm, std::string &outBinary) override { outBinary = binaryToReturn; asmWasCalled = true; receivedAsm = inAsm; return outBinary.size() != 0; } void setGfxCore(GFXCORE_FAMILY core) override { } void setProductFamily(PRODUCT_FAMILY product) override { } bool isKnownPlatform() const override { return false; } bool tryLoadIga() override { return true; } std::string asmToReturn; std::string binaryToReturn; std::string receivedAsm; std::string receivedBinary; bool disasmWasCalled = false; bool asmWasCalled = false; }; compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/environment.h000066400000000000000000000031601422164147700276600ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/offline_compiler/source/offline_compiler.h" #include "shared/source/os_interface/os_inc_base.h" #include "shared/test/common/helpers/test_files.h" #include "shared/test/common/mocks/mock_compilers.h" #include "gtest/gtest.h" class Environment : public ::testing::Environment { public: Environment(const std::string &devicePrefix, const std::string &familyNameWithType) : libraryFrontEnd(nullptr), libraryIGC(nullptr), devicePrefix(devicePrefix), familyNameWithType(familyNameWithType) { } void SetInputFileName( const std::string filename) { retrieveBinaryKernelFilename(igcDebugVars.fileName, filename + "_", ".gen"); retrieveBinaryKernelFilename(fclDebugVars.fileName, filename + "_", ".bc"); NEO::setIgcDebugVars(igcDebugVars); NEO::setFclDebugVars(fclDebugVars); } void SetUp() override { mockCompilerGuard.Enable(); SetInputFileName("copybuffer"); } void TearDown() override { delete libraryFrontEnd; delete libraryIGC; mockCompilerGuard.Disable(); } NEO::OsLibrary *libraryFrontEnd; NEO::OsLibrary *libraryIGC; NEO::MockCompilerDebugVars igcDebugVars; NEO::MockCompilerDebugVars fclDebugVars; void (*igcSetDebugVarsFPtr)(NEO::MockCompilerDebugVars &debugVars); void (*fclSetDebugVarsFPtr)(NEO::MockCompilerDebugVars &debugVars); NEO::MockCompilerEnableGuard mockCompilerGuard; const std::string devicePrefix; const std::string familyNameWithType; }; compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/gen12lp/000077500000000000000000000000001422164147700264135ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/gen12lp/CMakeLists.txt000066400000000000000000000004701422164147700311540ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_GEN12LP) set(IGDRCL_SRCS_offline_compiler_tests_gen12lp ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) target_sources(ocloc_tests PRIVATE ${IGDRCL_SRCS_offline_compiler_tests_gen12lp}) add_subdirectories() endif() compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/gen12lp/rkl/000077500000000000000000000000001422164147700272035ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/gen12lp/rkl/CMakeLists.txt000066400000000000000000000005261422164147700317460ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_RKL) set(IGDRCL_SRCS_offline_compiler_tests_rkl ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/offline_compiler_tests_rkl.cpp ) target_sources(ocloc_tests PRIVATE ${IGDRCL_SRCS_offline_compiler_tests_rkl}) endif() offline_compiler_tests_rkl.cpp000066400000000000000000000017561422164147700352470ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/gen12lp/rkl/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/compiler_interface/compiler_options/compiler_options.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/offline_compiler/mock/mock_offline_compiler.h" #include "opencl/test/unit_test/offline_compiler/offline_compiler_tests.h" using namespace NEO; using MockOfflineCompilerRklTests = ::testing::Test; RKLTEST_F(MockOfflineCompilerRklTests, givenRklWhenAppendExtraInternalOptionsThenForceEmuInt32DivRemSPIsApplied) { MockOfflineCompiler mockOfflineCompiler; mockOfflineCompiler.deviceName = "rkl"; mockOfflineCompiler.initHardwareInfo(mockOfflineCompiler.deviceName); std::string internalOptions = mockOfflineCompiler.internalOptions; mockOfflineCompiler.appendExtraInternalOptions(internalOptions); size_t found = internalOptions.find(NEO::CompilerOptions::forceEmuInt32DivRemSP.data()); EXPECT_NE(std::string::npos, found); } compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/gen8/000077500000000000000000000000001422164147700260045ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/gen8/CMakeLists.txt000066400000000000000000000004611422164147700305450ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_GEN8) set(IGDRCL_SRCS_offline_compiler_tests_gen8 ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) target_sources(ocloc_tests PRIVATE ${IGDRCL_SRCS_offline_compiler_tests_gen8}) add_subdirectories() endif() compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/gen8/bdw/000077500000000000000000000000001422164147700265605ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/gen8/bdw/CMakeLists.txt000066400000000000000000000005271422164147700313240ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_BDW) set(IGDRCL_SRCS_offline_compiler_tests_bdw ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/offline_compiler_tests_bdw.cpp ) target_sources(ocloc_tests PRIVATE ${IGDRCL_SRCS_offline_compiler_tests_bdw}) endif() offline_compiler_tests_bdw.cpp000066400000000000000000000032541422164147700346030ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/gen8/bdw/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/compiler_interface/compiler_options/compiler_options.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/offline_compiler/mock/mock_offline_compiler.h" #include "opencl/test/unit_test/offline_compiler/offline_compiler_tests.h" #include "gtest/gtest.h" using namespace NEO; using MockOfflineCompilerBdwTests = ::testing::Test; BDWTEST_F(MockOfflineCompilerBdwTests, givenDebugOptionAndBdwThenInternalOptionShouldNotContainKernelDebugEnable) { std::vector argv = { "ocloc", "-q", "-options", "-g", "-file", "test_files/copybuffer.cl", "-device", "bdw"}; auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); ASSERT_NE(nullptr, mockOfflineCompiler); mockOfflineCompiler->initialize(argv.size(), argv); std::string internalOptions = mockOfflineCompiler->internalOptions; EXPECT_FALSE(hasSubstr(internalOptions, "-cl-kernel-debug-enable")); } BDWTEST_F(MockOfflineCompilerBdwTests, GivenBdwWhenParseDebugSettingsThenContainsHasBufferOffsetArg) { MockOfflineCompiler mockOfflineCompiler; mockOfflineCompiler.deviceName = "bdw"; mockOfflineCompiler.initHardwareInfo(mockOfflineCompiler.deviceName); mockOfflineCompiler.parseDebugSettings(); std::string internalOptions = mockOfflineCompiler.internalOptions; size_t found = internalOptions.find(NEO::CompilerOptions::hasBufferOffsetArg.data()); EXPECT_EQ(std::string::npos, found); } compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/gen9/000077500000000000000000000000001422164147700260055ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/gen9/CMakeLists.txt000066400000000000000000000004601422164147700305450ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_GEN9) set(IGDRCL_SRCS_offline_compiler_tests_gen9 ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) target_sources(ocloc_tests PRIVATE ${IGDRCL_SRCS_offline_compiler_tests_gen9}) add_subdirectories() endif() compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/gen9/skl/000077500000000000000000000000001422164147700265765ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/gen9/skl/CMakeLists.txt000066400000000000000000000005271422164147700313420ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_SKL) set(IGDRCL_SRCS_offline_compiler_tests_skl ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/offline_compiler_tests_skl.cpp ) target_sources(ocloc_tests PRIVATE ${IGDRCL_SRCS_offline_compiler_tests_skl}) endif() offline_compiler_tests_skl.cpp000066400000000000000000000056601422164147700346410ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/gen9/skl/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/compiler_interface/compiler_options/compiler_options.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/offline_compiler/mock/mock_offline_compiler.h" #include "opencl/test/unit_test/offline_compiler/offline_compiler_tests.h" namespace NEO { using MockOfflineCompilerSklTests = ::testing::Test; SKLTEST_F(MockOfflineCompilerSklTests, GivenSklWhenParseDebugSettingsThenStatelessToStatefullOptimizationIsEnabled) { MockOfflineCompiler mockOfflineCompiler; mockOfflineCompiler.deviceName = "skl"; mockOfflineCompiler.initHardwareInfo(mockOfflineCompiler.deviceName); mockOfflineCompiler.parseDebugSettings(); std::string internalOptions = mockOfflineCompiler.internalOptions; size_t found = internalOptions.find(NEO::CompilerOptions::hasBufferOffsetArg.data()); EXPECT_NE(std::string::npos, found); } SKLTEST_F(MockOfflineCompilerSklTests, GivenSklAndDisabledViaDebugThenStatelessToStatefullOptimizationDisabled) { DebugManagerStateRestore stateRestore; MockOfflineCompiler mockOfflineCompiler; mockOfflineCompiler.deviceName = "skl"; DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.set(0); mockOfflineCompiler.initHardwareInfo(mockOfflineCompiler.deviceName); mockOfflineCompiler.setStatelessToStatefullBufferOffsetFlag(); std::string internalOptions = mockOfflineCompiler.internalOptions; size_t found = internalOptions.find(NEO::CompilerOptions::hasBufferOffsetArg.data()); EXPECT_EQ(std::string::npos, found); } SKLTEST_F(MockOfflineCompilerSklTests, givenSklWhenAppendExtraInternalOptionsThenForceEmuInt32DivRemSPIsNotApplied) { MockOfflineCompiler mockOfflineCompiler; mockOfflineCompiler.deviceName = "skl"; mockOfflineCompiler.initHardwareInfo(mockOfflineCompiler.deviceName); std::string internalOptions = mockOfflineCompiler.internalOptions; mockOfflineCompiler.appendExtraInternalOptions(internalOptions); size_t found = internalOptions.find(NEO::CompilerOptions::forceEmuInt32DivRemSP.data()); EXPECT_EQ(std::string::npos, found); } SKLTEST_F(MockOfflineCompilerSklTests, givenSklWhenAppendExtraInternalOptionsThenGreaterThan4gbBuffersRequiredIsNotSet) { MockOfflineCompiler mockOfflineCompiler; mockOfflineCompiler.deviceName = "skl"; mockOfflineCompiler.initHardwareInfo(mockOfflineCompiler.deviceName); std::string internalOptions = mockOfflineCompiler.internalOptions; mockOfflineCompiler.forceStatelessToStatefulOptimization = false; mockOfflineCompiler.appendExtraInternalOptions(internalOptions); size_t found = internalOptions.find(NEO::CompilerOptions::greaterThan4gbBuffersRequired.data()); EXPECT_EQ(std::string::npos, found); } } // namespace NEOcompute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/main.cpp000066400000000000000000000113001422164147700265660ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/os_library.h" #include "shared/test/common/helpers/custom_event_listener.h" #include "shared/test/common/helpers/test_files.h" #include "shared/test/common/libult/signal_utils.h" #include "shared/test/unit_test/test_stats.h" #include "environment.h" #include "limits.h" #ifdef WIN32 const char *fSeparator = "\\"; #elif defined(__linux__) const char *fSeparator = "/"; #endif Environment *gEnvironment; extern PRODUCT_FAMILY productFamily; extern GFXCORE_FAMILY renderCoreFamily; std::string getRunPath() { char *cwd; #if defined(__linux__) cwd = getcwd(nullptr, 0); #else cwd = _getcwd(nullptr, 0); #endif std::string res{cwd}; free(cwd); return res; } int main(int argc, char **argv) { int retVal = 0; bool useDefaultListener = false; bool enableAlarm = true; bool showTestStats = false; std::string devicePrefix("skl"); std::string familyNameWithType("Gen9core"); std::string revId("0"); #if defined(__linux__) if (getenv("CLOC_SELFTEST") == nullptr) { setenv("CLOC_SELFTEST", "YES", 1); char *ldLibraryPath = getenv("LD_LIBRARY_PATH"); if (ldLibraryPath == nullptr) { setenv("LD_LIBRARY_PATH", getRunPath().c_str(), 1); } else { std::string ldLibraryPathConcat = getRunPath() + ":" + std::string(ldLibraryPath); setenv("LD_LIBRARY_PATH", ldLibraryPathConcat.c_str(), 1); } execv(argv[0], argv); //execv failed, we return with error printf("FATAL ERROR: cannot self-exec test!\n"); return -1; } #endif ::testing::InitGoogleTest(&argc, argv); if (argc > 0) { // parse remaining args assuming they're mine for (int i = 0; i < argc; i++) { if (strcmp("--use_default_listener", argv[i]) == 0) { useDefaultListener = true; } else if (!strcmp("--disable_alarm", argv[i])) { enableAlarm = false; } else if (strcmp("--device", argv[i]) == 0) { ++i; devicePrefix = argv[i]; } else if (strcmp("--family_type", argv[i]) == 0) { ++i; familyNameWithType = argv[i]; } else if (strcmp("--rev_id", argv[i]) == 0) { ++i; revId = argv[i]; } else if (!strcmp("--show_test_stats", argv[i])) { showTestStats = true; } } } if (showTestStats) { std::cout << getTestStats() << std::endl; return 0; } for (unsigned int productId = 0; productId < IGFX_MAX_PRODUCT; ++productId) { if (NEO::hardwarePrefix[productId] && (0 == strcmp(devicePrefix.c_str(), NEO::hardwarePrefix[productId]))) { if (NEO::hardwareInfoTable[productId]) { renderCoreFamily = NEO::hardwareInfoTable[productId]->platform.eRenderCoreFamily; productFamily = NEO::hardwareInfoTable[productId]->platform.eProductFamily; break; } } } // we look for test files always relative to binary location // this simplifies multi-process execution and using different // working directories std::string nTestFiles = getRunPath(); nTestFiles.append("/"); nTestFiles.append(familyNameWithType); nTestFiles.append("/"); nTestFiles.append(revId); nTestFiles.append("/"); nTestFiles.append(testFiles); testFiles = nTestFiles; binaryNameSuffix.append(familyNameWithType); #ifdef WIN32 #include if (_chdir(familyNameWithType.c_str())) { std::cout << "chdir into " << familyNameWithType << " directory failed.\nThis might cause test failures." << std::endl; } #elif defined(__linux__) #include if (chdir(familyNameWithType.c_str()) != 0) { std::cout << "chdir into " << familyNameWithType << " directory failed.\nThis might cause test failures." << std::endl; } #endif if (useDefaultListener == false) { ::testing::TestEventListeners &listeners = ::testing::UnitTest::GetInstance()->listeners(); ::testing::TestEventListener *defaultListener = listeners.default_result_printer(); auto customEventListener = new CCustomEventListener(defaultListener); listeners.Release(listeners.default_result_printer()); listeners.Append(customEventListener); } gEnvironment = reinterpret_cast(::testing::AddGlobalTestEnvironment(new Environment(devicePrefix, familyNameWithType))); int sigOut = setAlarm(enableAlarm); if (sigOut != 0) return sigOut; retVal = RUN_ALL_TESTS(); return retVal; } compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/mock/000077500000000000000000000000001422164147700260745ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/mock/mock_argument_helper.h000066400000000000000000000076111422164147700324440ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/offline_compiler/source/ocloc_arg_helper.h" #include "shared/source/helpers/string.h" #include "gtest/gtest.h" #include #include #include class MockOclocArgHelper : public OclocArgHelper { public: using FileName = std::string; using FileData = std::string; using FilesMap = std::map; using OclocArgHelper::deviceProductTable; FilesMap &filesMap; bool interceptOutput{false}; bool shouldLoadDataFromFileReturnZeroSize{false}; FilesMap interceptedFiles; std::vector createdFiles{}; bool callBaseFileExists = false; bool callBaseReadBinaryFile = false; bool callBaseLoadDataFromFile = false; bool callBaseSaveOutput = false; bool callBaseReadFileToVectorOfStrings = false; bool shouldReturnEmptyVectorOfStrings = false; MockOclocArgHelper(FilesMap &filesMap) : OclocArgHelper(0, nullptr, nullptr, nullptr, 0, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr), filesMap(filesMap){}; ~MockOclocArgHelper() { cleanUpOutput(); } void setAllCallBase(bool value) { callBaseFileExists = value; callBaseReadBinaryFile = value; callBaseLoadDataFromFile = value; callBaseSaveOutput = value; callBaseReadFileToVectorOfStrings = value; } protected: bool fileExists(const std::string &filename) const override { if (callBaseFileExists) { return OclocArgHelper::fileExists(filename); } return filesMap.find(filename) != filesMap.end(); } void readFileToVectorOfStrings(const std::string &filename, std::vector &lines) override { if (callBaseReadFileToVectorOfStrings) { return OclocArgHelper::readFileToVectorOfStrings(filename, lines); } if (shouldReturnEmptyVectorOfStrings) { lines.clear(); } } std::vector readBinaryFile(const std::string &filename) override { if (callBaseReadBinaryFile) { return OclocArgHelper::readBinaryFile(filename); } auto file = filesMap[filename]; return std::vector(file.begin(), file.end()); } std::unique_ptr loadDataFromFile(const std::string &filename, size_t &retSize) override { if (callBaseLoadDataFromFile) { return OclocArgHelper::loadDataFromFile(filename, retSize); } if (shouldLoadDataFromFileReturnZeroSize) { retSize = 0; return {}; } if (!fileExists(filename)) { return OclocArgHelper::loadDataFromFile(filename, retSize); } const auto &file = filesMap[filename]; std::unique_ptr result{new char[file.size() + 1]}; std::copy(file.begin(), file.end(), result.get()); result[file.size()] = '\0'; retSize = file.size() + 1; return result; } void saveOutput(const std::string &filename, const void *pData, const size_t &dataSize) override { if (interceptOutput) { auto &fileContent = interceptedFiles[filename]; fileContent.resize(dataSize, '\0'); memcpy_s(fileContent.data(), fileContent.size(), pData, dataSize); } else { if (callBaseSaveOutput) { createdFiles.push_back(filename.c_str()); } OclocArgHelper::saveOutput(filename, pData, dataSize); } } void cleanUpOutput() { for (const auto &fileName : createdFiles) { int retVal = remove(fileName.c_str()); EXPECT_EQ(0, retVal); if (retVal != 0) { auto errMsg = "Error deleting file: " + fileName; perror(errMsg.c_str()); } } } }; compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/mock/mock_multi_command.h000066400000000000000000000021471422164147700321120ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/offline_compiler/source/multi_command.h" #include "opencl/test/unit_test/offline_compiler/mock/mock_argument_helper.h" #include #include namespace NEO { class MockMultiCommand : public MultiCommand { public: using MultiCommand::argHelper; using MultiCommand::quiet; using MultiCommand::retValues; using MultiCommand::addAdditionalOptionsToSingleCommandLine; using MultiCommand::initialize; using MultiCommand::printHelp; using MultiCommand::runBuilds; using MultiCommand::showResults; using MultiCommand::singleBuild; using MultiCommand::splitLineInSeparateArgs; MockMultiCommand() : MultiCommand{} { uniqueHelper = std::make_unique(filesMap); uniqueHelper->setAllCallBase(true); argHelper = uniqueHelper.get(); } ~MockMultiCommand() override = default; std::map filesMap{}; std::unique_ptr uniqueHelper{}; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/mock/mock_offline_compiler.h000066400000000000000000000070131422164147700325730ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/offline_compiler/source/offline_compiler.h" #include "opencl/test/unit_test/offline_compiler/mock/mock_argument_helper.h" #include #include namespace NEO { class MockOfflineCompiler : public OfflineCompiler { public: using OfflineCompiler::appendExtraInternalOptions; using OfflineCompiler::argHelper; using OfflineCompiler::deviceName; using OfflineCompiler::elfBinary; using OfflineCompiler::excludeIr; using OfflineCompiler::fclDeviceCtx; using OfflineCompiler::forceStatelessToStatefulOptimization; using OfflineCompiler::genBinary; using OfflineCompiler::genBinarySize; using OfflineCompiler::generateFilePathForIr; using OfflineCompiler::generateOptsSuffix; using OfflineCompiler::getStringWithinDelimiters; using OfflineCompiler::hwInfo; using OfflineCompiler::igcDeviceCtx; using OfflineCompiler::initHardwareInfo; using OfflineCompiler::inputFileLlvm; using OfflineCompiler::inputFileSpirV; using OfflineCompiler::internalOptions; using OfflineCompiler::irBinary; using OfflineCompiler::irBinarySize; using OfflineCompiler::isSpirV; using OfflineCompiler::options; using OfflineCompiler::outputDirectory; using OfflineCompiler::outputFile; using OfflineCompiler::parseCommandLine; using OfflineCompiler::parseDebugSettings; using OfflineCompiler::setStatelessToStatefullBufferOffsetFlag; using OfflineCompiler::sourceCode; using OfflineCompiler::storeBinary; using OfflineCompiler::updateBuildLog; using OfflineCompiler::useGenFile; using OfflineCompiler::useLlvmBc; using OfflineCompiler::useLlvmText; using OfflineCompiler::useOptionsSuffix; MockOfflineCompiler() : OfflineCompiler() { uniqueHelper = std::make_unique(filesMap); uniqueHelper->setAllCallBase(true); argHelper = uniqueHelper.get(); } ~MockOfflineCompiler() override = default; int initialize(size_t numArgs, const std::vector &argv) { return OfflineCompiler::initialize(numArgs, argv, true); } void storeGenBinary(const void *pSrc, const size_t srcSize) { OfflineCompiler::storeBinary(genBinary, genBinarySize, pSrc, srcSize); } int build() override { ++buildCalledCount; if (buildReturnValue.has_value()) { return *buildReturnValue; } return OfflineCompiler::build(); } int buildSourceCode() override { if (overrideBuildSourceCodeStatus) { return buildSourceCodeStatus; } return OfflineCompiler::buildSourceCode(); } bool generateElfBinary() override { generateElfBinaryCalled++; return OfflineCompiler::generateElfBinary(); } void writeOutAllFiles() override { writeOutAllFilesCalled++; OfflineCompiler::writeOutAllFiles(); } void clearLog() { uniqueHelper = std::make_unique(filesMap); uniqueHelper->setAllCallBase(true); argHelper = uniqueHelper.get(); } std::map filesMap{}; int buildSourceCodeStatus = 0; bool overrideBuildSourceCodeStatus = false; uint32_t generateElfBinaryCalled = 0u; uint32_t writeOutAllFilesCalled = 0u; std::unique_ptr uniqueHelper; int buildCalledCount{0}; std::optional buildReturnValue{}; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/mock/mock_offline_linker.h000066400000000000000000000061521422164147700322500ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/offline_compiler/source/offline_linker.h" namespace NEO { class MockOfflineLinker : public OfflineLinker { public: using OfflineLinker::InputFileContent; using OfflineLinker::OperationMode; using OfflineLinker::initHardwareInfo; using OfflineLinker::initialize; using OfflineLinker::loadInputFilesContent; using OfflineLinker::parseCommand; using OfflineLinker::prepareIgc; using OfflineLinker::tryToStoreBuildLog; using OfflineLinker::verifyLinkerCommand; using OfflineLinker::hwInfo; using OfflineLinker::inputFilenames; using OfflineLinker::inputFilesContent; using OfflineLinker::internalOptions; using OfflineLinker::operationMode; using OfflineLinker::options; using OfflineLinker::outputFilename; using OfflineLinker::outputFormat; bool shouldReturnEmptyHardwareInfoTable{false}; bool shouldFailLoadingOfIgcLib{false}; bool shouldFailLoadingOfIgcCreateMainFunction{false}; bool shouldFailCreationOfIgcMain{false}; bool shouldFailCreationOfIgcDeviceContext{false}; bool shouldReturnInvalidIgcPlatformHandle{false}; bool shouldReturnInvalidGTSystemInfoHandle{false}; MockOfflineLinker(OclocArgHelper *argHelper) : OfflineLinker{argHelper} {} ArrayRef getHardwareInfoTable() const override { if (shouldReturnEmptyHardwareInfoTable) { return {}; } else { return OfflineLinker::getHardwareInfoTable(); } } std::unique_ptr loadIgcLibrary() const override { if (shouldFailLoadingOfIgcLib) { return nullptr; } else { return OfflineLinker::loadIgcLibrary(); } } CIF::CreateCIFMainFunc_t loadCreateIgcMainFunction() const override { if (shouldFailLoadingOfIgcCreateMainFunction) { return nullptr; } else { return OfflineLinker::loadCreateIgcMainFunction(); } } CIF::RAII::UPtr_t createIgcMain(CIF::CreateCIFMainFunc_t createMainFunction) const override { if (shouldFailCreationOfIgcMain) { return nullptr; } else { return OfflineLinker::createIgcMain(createMainFunction); } } CIF::RAII::UPtr_t createIgcDeviceContext() const override { if (shouldFailCreationOfIgcDeviceContext) { return nullptr; } else { return OfflineLinker::createIgcDeviceContext(); } } CIF::RAII::UPtr_t getIgcPlatformHandle() const override { if (shouldReturnInvalidIgcPlatformHandle) { return nullptr; } else { return OfflineLinker::getIgcPlatformHandle(); } } CIF::RAII::UPtr_t getGTSystemInfoHandle() const override { if (shouldReturnInvalidGTSystemInfoHandle) { return nullptr; } else { return OfflineLinker::getGTSystemInfoHandle(); } } }; } // namespace NEOcompute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/mock/mock_sip_ocloc_tests.cpp000066400000000000000000000007051422164147700330070ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/mocks/mock_sip.h" #include static std::vector dummyBinaryForSip; using namespace NEO; const char *MockSipKernel::dummyBinaryForSip = "12345678"; std::vector MockSipKernel::getDummyGenBinary() { return std::vector(dummyBinaryForSip, dummyBinaryForSip + sizeof(MockSipKernel::dummyBinaryForSip)); } compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/ocloc_api_tests.cpp000066400000000000000000000506771422164147700310400ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/offline_compiler/source/ocloc_api.h" #include "shared/offline_compiler/source/ocloc_error_code.h" #include "shared/offline_compiler/source/queries.h" #include "shared/offline_compiler/source/utilities/get_git_version_info.h" #include "shared/source/device_binary_format/elf/elf_decoder.h" #include "shared/source/device_binary_format/elf/ocl_elf.h" #include "shared/source/helpers/file_io.h" #include "environment.h" #include "gtest/gtest.h" #include "hw_cmds.h" #include #include extern Environment *gEnvironment; using namespace std::string_literals; TEST(OclocApiTests, WhenOclocVersionIsCalledThenCurrentOclocVersionIsReturned) { EXPECT_EQ(ocloc_version_t::OCLOC_VERSION_CURRENT, oclocVersion()); } TEST(OclocApiTests, WhenGoodArgsAreGivenThenSuccessIsReturned) { const char *argv[] = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", gEnvironment->devicePrefix.c_str()}; unsigned int argc = sizeof(argv) / sizeof(const char *); testing::internal::CaptureStdout(); int retVal = oclocInvoke(argc, argv, 0, nullptr, nullptr, nullptr, 0, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr); std::string output = testing::internal::GetCapturedStdout(); EXPECT_EQ(retVal, NEO::OclocErrorCode::SUCCESS); EXPECT_EQ(std::string::npos, output.find("Command was: ocloc -file test_files/copybuffer.cl -device "s + argv[4])); } TEST(OclocApiTests, GivenNeoRevisionQueryWhenQueryingThenNeoRevisionIsReturned) { uint32_t numOutputs; uint64_t *lenOutputs; uint8_t **dataOutputs; char **nameOutputs; const char *argv[] = { "ocloc", "query", NEO::Queries::queryNeoRevision.data()}; unsigned int argc = sizeof(argv) / sizeof(const char *); int retVal = oclocInvoke(argc, argv, 0, nullptr, nullptr, nullptr, 0, nullptr, nullptr, nullptr, &numOutputs, &dataOutputs, &lenOutputs, &nameOutputs); EXPECT_EQ(retVal, NEO::OclocErrorCode::SUCCESS); EXPECT_EQ(numOutputs, 2u); int queryOutputIndex = -1; for (uint32_t i = 0; i < numOutputs; ++i) { if (strcmp(NEO::Queries::queryNeoRevision.data(), nameOutputs[i]) == 0) { queryOutputIndex = i; } } ASSERT_NE(-1, queryOutputIndex); NEO::ConstStringRef queryOutput(reinterpret_cast(dataOutputs[queryOutputIndex]), static_cast(lenOutputs[queryOutputIndex])); EXPECT_STREQ(NEO::getRevision().c_str(), queryOutput.data()); oclocFreeOutput(&numOutputs, &dataOutputs, &lenOutputs, &nameOutputs); } TEST(OclocApiTests, GivenOclDriverVersionQueryWhenQueryingThenNeoRevisionIsReturned) { uint32_t numOutputs; uint64_t *lenOutputs; uint8_t **dataOutputs; char **nameOutputs; const char *argv[] = { "ocloc", "query", NEO::Queries::queryOCLDriverVersion.data()}; unsigned int argc = sizeof(argv) / sizeof(const char *); int retVal = oclocInvoke(argc, argv, 0, nullptr, nullptr, nullptr, 0, nullptr, nullptr, nullptr, &numOutputs, &dataOutputs, &lenOutputs, &nameOutputs); EXPECT_EQ(retVal, NEO::OclocErrorCode::SUCCESS); EXPECT_EQ(numOutputs, 2u); int queryOutputIndex = -1; for (uint32_t i = 0; i < numOutputs; ++i) { if (strcmp(NEO::Queries::queryOCLDriverVersion.data(), nameOutputs[i]) == 0) { queryOutputIndex = i; } } ASSERT_NE(-1, queryOutputIndex); NEO::ConstStringRef queryOutput(reinterpret_cast(dataOutputs[queryOutputIndex]), static_cast(lenOutputs[queryOutputIndex])); EXPECT_STREQ(NEO::getOclDriverVersion().c_str(), queryOutput.data()); oclocFreeOutput(&numOutputs, &dataOutputs, &lenOutputs, &nameOutputs); } TEST(OclocApiTests, GivenNoQueryWhenQueryingThenErrorIsReturned) { const char *argv[] = { "ocloc", "query"}; unsigned int argc = sizeof(argv) / sizeof(const char *); testing::internal::CaptureStdout(); int retVal = oclocInvoke(argc, argv, 0, nullptr, nullptr, nullptr, 0, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr); std::string output = testing::internal::GetCapturedStdout(); EXPECT_EQ(retVal, NEO::OclocErrorCode::INVALID_COMMAND_LINE); EXPECT_STREQ("Error: Invalid command line. Expected ocloc query ", output.c_str()); } TEST(OclocApiTests, GivenInvalidQueryWhenQueryingThenErrorIsReturned) { const char *argv[] = { "ocloc", "query", "unknown_query"}; unsigned int argc = sizeof(argv) / sizeof(const char *); testing::internal::CaptureStdout(); int retVal = oclocInvoke(argc, argv, 0, nullptr, nullptr, nullptr, 0, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr); std::string output = testing::internal::GetCapturedStdout(); EXPECT_EQ(retVal, NEO::OclocErrorCode::INVALID_COMMAND_LINE); EXPECT_STREQ("Error: Invalid command line. Uknown argument unknown_query.", output.c_str()); } TEST(OclocApiTests, WhenGoodFamilyNameIsProvidedThenSuccessIsReturned) { const char *argv[] = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", NEO::familyName[NEO::DEFAULT_PLATFORM::hwInfo.platform.eRenderCoreFamily]}; unsigned int argc = sizeof(argv) / sizeof(const char *); testing::internal::CaptureStdout(); int retVal = oclocInvoke(argc, argv, 0, nullptr, nullptr, nullptr, 0, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr); std::string output = testing::internal::GetCapturedStdout(); EXPECT_EQ(retVal, NEO::OclocErrorCode::SUCCESS); EXPECT_EQ(std::string::npos, output.find("Command was: ocloc -file test_files/copybuffer.cl -device "s + argv[4])); } TEST(OclocApiTests, WhenArgsWithMissingFileAreGivenThenErrorMessageIsProduced) { const char *argv[] = { "ocloc", "-q", "-file", "test_files/IDoNotExist.cl", "-device", gEnvironment->devicePrefix.c_str()}; unsigned int argc = sizeof(argv) / sizeof(const char *); testing::internal::CaptureStdout(); int retVal = oclocInvoke(argc, argv, 0, nullptr, nullptr, nullptr, 0, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr); std::string output = testing::internal::GetCapturedStdout(); EXPECT_EQ(retVal, NEO::OclocErrorCode::INVALID_FILE); EXPECT_NE(std::string::npos, output.find("Command was: ocloc -q -file test_files/IDoNotExist.cl -device "s + argv[5])); } TEST(OclocApiTests, givenInputOptionsAndInternalOptionsWhenCmdlineIsPrintedThenBothAreInQuotes) { const char *argv[] = { "ocloc", "-q", "-file", "test_files/IDoNotExist.cl", "-device", gEnvironment->devicePrefix.c_str(), "-options", "-D DEBUG -cl-kernel-arg-info", "-internal_options", "-internalOption1 -internal-option-2"}; unsigned int argc = sizeof(argv) / sizeof(const char *); testing::internal::CaptureStdout(); int retVal = oclocInvoke(argc, argv, 0, nullptr, nullptr, nullptr, 0, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr); std::string output = testing::internal::GetCapturedStdout(); EXPECT_NE(retVal, NEO::OclocErrorCode::SUCCESS); EXPECT_TRUE(output.find("Command was: ocloc -q -file test_files/IDoNotExist.cl -device "s + gEnvironment->devicePrefix.c_str() + " -options \"-D DEBUG -cl-kernel-arg-info\" -internal_options \"-internalOption1 -internal-option-2\"") != std::string::npos); size_t quotesCount = std::count(output.begin(), output.end(), '\"'); EXPECT_EQ(quotesCount, 4u); } TEST(OclocApiTests, givenInputOptionsCalledOptionsWhenCmdlineIsPrintedThenQuotesAreCorrect) { const char *argv[] = { "ocloc", "-q", "-file", "test_files/IDoNotExist.cl", "-device", gEnvironment->devicePrefix.c_str(), "-options", "-options", "-internal_options", "-internalOption"}; unsigned int argc = sizeof(argv) / sizeof(const char *); testing::internal::CaptureStdout(); int retVal = oclocInvoke(argc, argv, 0, nullptr, nullptr, nullptr, 0, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr); std::string output = testing::internal::GetCapturedStdout(); EXPECT_NE(retVal, NEO::OclocErrorCode::SUCCESS); EXPECT_TRUE(output.find("Command was: ocloc -q -file test_files/IDoNotExist.cl -device "s + gEnvironment->devicePrefix.c_str() + " -options \"-options\" -internal_options \"-internalOption\"") != std::string::npos); size_t quotesCount = std::count(output.begin(), output.end(), '\"'); EXPECT_EQ(quotesCount, 4u); } TEST(OclocApiTests, givenInvalidInputOptionsAndInternalOptionsFilesWhenCmdlineIsPrintedThenTheyArePrinted) { ASSERT_TRUE(fileExists("test_files/shouldfail.cl")); ASSERT_TRUE(fileExists("test_files/shouldfail_options.txt")); ASSERT_TRUE(fileExists("test_files/shouldfail_internal_options.txt")); const char *argv[] = { "ocloc", "-q", "-file", "test_files/shouldfail.cl", "-device", gEnvironment->devicePrefix.c_str()}; unsigned int argc = sizeof(argv) / sizeof(const char *); testing::internal::CaptureStdout(); int retVal = oclocInvoke(argc, argv, 0, nullptr, nullptr, nullptr, 0, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr); std::string output = testing::internal::GetCapturedStdout(); EXPECT_NE(retVal, NEO::OclocErrorCode::SUCCESS); EXPECT_TRUE(output.find("Compiling options read from file were:\n" "-shouldfailOptions") != std::string::npos); EXPECT_TRUE(output.find("Internal options read from file were:\n" "-shouldfailInternalOptions") != std::string::npos); } TEST(OclocApiTests, givenInvalidOclocOptionsFileWhenCmdlineIsPrintedThenTheyArePrinted) { ASSERT_TRUE(fileExists("test_files/valid_kernel.cl")); ASSERT_TRUE(fileExists("test_files/valid_kernel_ocloc_options.txt")); const char *argv[] = { "ocloc", "-q", "-file", "test_files/valid_kernel.cl", "-device", gEnvironment->devicePrefix.c_str()}; unsigned int argc = sizeof(argv) / sizeof(const char *); testing::internal::CaptureStdout(); int retVal = oclocInvoke(argc, argv, 0, nullptr, nullptr, nullptr, 0, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr); std::string output = testing::internal::GetCapturedStdout(); EXPECT_NE(retVal, NEO::OclocErrorCode::SUCCESS); EXPECT_TRUE(output.find("Failed with ocloc options from file:\n" "-invalid_ocloc_option") != std::string::npos); EXPECT_FALSE(output.find("Building with ocloc options:") != std::string::npos); } TEST(OclocApiTests, GivenIncludeHeadersWhenCompilingThenPassesToFclHeadersPackedAsElf) { auto prevFclDebugVars = NEO::getFclDebugVars(); auto debugVars = prevFclDebugVars; std::string receivedInput; debugVars.receivedInput = &receivedInput; setFclDebugVars(debugVars); const char *argv[] = { "ocloc", "-file", "main.cl", "-device", gEnvironment->devicePrefix.c_str()}; unsigned int argc = sizeof(argv) / sizeof(const char *); const char *headerA = R"===( void foo() {} )==="; const char *headerB = R"===( void bar() {} )==="; const char *main = R"===( #include "includeA.h" #include "includeB.h" __kernel void k(){ foo(); bar(); } )==="; const char *sourcesNames[] = {"main.cl"}; const uint8_t *sources[] = {reinterpret_cast(main)}; const uint64_t sourcesLen[] = {strlen(main) + 1}; const char *headersNames[] = {"includeA.h", "includeB.h"}; const uint8_t *headers[] = {reinterpret_cast(headerA), reinterpret_cast(headerB)}; const uint64_t headersLen[] = {strlen(headerA) + 1, strlen(headerB) + 1}; uint32_t numOutputs = 0U; uint8_t **outputs = nullptr; uint64_t *outputsLen = nullptr; char **ouputsNames = nullptr; oclocInvoke(argc, argv, 1, sources, sourcesLen, sourcesNames, 2, headers, headersLen, headersNames, &numOutputs, &outputs, &outputsLen, &ouputsNames); NEO::setFclDebugVars(prevFclDebugVars); std::string decodeErr, decodeWarn; ArrayRef rawElf(reinterpret_cast(receivedInput.data()), receivedInput.size()); auto elf = NEO::Elf::decodeElf(rawElf, decodeErr, decodeWarn); ASSERT_NE(nullptr, elf.elfFileHeader) << decodeWarn << " " << decodeErr; EXPECT_EQ(NEO::Elf::ET_OPENCL_SOURCE, elf.elfFileHeader->type); using SectionT = std::remove_reference_t; const SectionT *sourceSection, *headerASection, *headerBSection; ASSERT_NE(NEO::Elf::SHN_UNDEF, elf.elfFileHeader->shStrNdx); auto sectionNamesSection = elf.sectionHeaders.begin() + elf.elfFileHeader->shStrNdx; auto elfStrings = sectionNamesSection->data.toArrayRef(); for (const auto §ion : elf.sectionHeaders) { if (NEO::Elf::SHT_OPENCL_SOURCE == section.header->type) { sourceSection = §ion; } else if (NEO::Elf::SHT_OPENCL_HEADER == section.header->type) { auto sectionName = elfStrings.begin() + section.header->name; if (0 == strcmp("includeA.h", sectionName)) { headerASection = §ion; } else if (0 == strcmp("includeB.h", sectionName)) { headerBSection = §ion; } else { EXPECT_FALSE(true) << sectionName; } } } ASSERT_NE(nullptr, sourceSection); EXPECT_EQ(sourcesLen[0], sourceSection->data.size()); EXPECT_STREQ(main, reinterpret_cast(sourceSection->data.begin())); ASSERT_NE(nullptr, headerASection); EXPECT_EQ(sourcesLen[0], sourceSection->data.size()); EXPECT_STREQ(headerA, reinterpret_cast(headerASection->data.begin())); ASSERT_NE(nullptr, headerBSection); EXPECT_EQ(sourcesLen[0], sourceSection->data.size()); EXPECT_STREQ(headerB, reinterpret_cast(headerBSection->data.begin())); } TEST(OclocApiTests, GivenHelpParameterWhenDecodingThenHelpMsgIsPrintedAndSuccessIsReturned) { const char *argv[] = { "ocloc", "disasm", "--help"}; unsigned int argc = sizeof(argv) / sizeof(const char *); testing::internal::CaptureStdout(); int retVal = oclocInvoke(argc, argv, 0, nullptr, nullptr, nullptr, 0, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr); std::string output = testing::internal::GetCapturedStdout(); EXPECT_FALSE(output.empty()); EXPECT_EQ(retVal, NEO::OclocErrorCode::SUCCESS); } TEST(OclocApiTests, GivenHelpParameterWhenEncodingThenHelpMsgIsPrintedAndSuccessIsReturned) { const char *argv[] = { "ocloc", "asm", "--help"}; unsigned int argc = sizeof(argv) / sizeof(const char *); testing::internal::CaptureStdout(); int retVal = oclocInvoke(argc, argv, 0, nullptr, nullptr, nullptr, 0, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr); std::string output = testing::internal::GetCapturedStdout(); EXPECT_FALSE(output.empty()); EXPECT_EQ(retVal, NEO::OclocErrorCode::SUCCESS); } TEST(OclocApiTests, GivenNonexistentFileWhenValidateIsInvokedThenErrorIsPrinted) { const char *argv[] = { "ocloc", "validate", "-file", "some_special_nonexistent_file.gen"}; unsigned int argc = sizeof(argv) / sizeof(argv[0]); testing::internal::CaptureStdout(); int retVal = oclocInvoke(argc, argv, 0, nullptr, nullptr, nullptr, 0, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr); const auto output = testing::internal::GetCapturedStdout(); EXPECT_EQ(-1, retVal); const std::string expectedErrorMessage{"Error : Input file missing : some_special_nonexistent_file.gen\n"}; EXPECT_EQ(expectedErrorMessage, output); } TEST(OclocApiTests, GivenZeroArgumentsWhenOclocIsInvokedThenHelpIsPrinted) { testing::internal::CaptureStdout(); int retVal = oclocInvoke(0, nullptr, 0, nullptr, nullptr, nullptr, 0, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr); const auto output = testing::internal::GetCapturedStdout(); EXPECT_EQ(NEO::OclocErrorCode::INVALID_COMMAND_LINE, retVal); EXPECT_FALSE(output.empty()); } TEST(OclocApiTests, GivenCommandWithoutArgsWhenOclocIsInvokedThenHelpIsPrinted) { const char *argv[] = { "ocloc"}; unsigned int argc = sizeof(argv) / sizeof(argv[0]); testing::internal::CaptureStdout(); int retVal = oclocInvoke(argc, argv, 0, nullptr, nullptr, nullptr, 0, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr); const auto output = testing::internal::GetCapturedStdout(); EXPECT_EQ(NEO::OclocErrorCode::SUCCESS, retVal); EXPECT_FALSE(output.empty()); } TEST(OclocApiTests, GivenLongHelpArgumentWhenOclocIsInvokedThenHelpIsPrinted) { const char *argv[] = { "ocloc", "--help"}; unsigned int argc = sizeof(argv) / sizeof(argv[0]); testing::internal::CaptureStdout(); int retVal = oclocInvoke(argc, argv, 0, nullptr, nullptr, nullptr, 0, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr); const auto output = testing::internal::GetCapturedStdout(); EXPECT_EQ(NEO::OclocErrorCode::SUCCESS, retVal); EXPECT_FALSE(output.empty()); } TEST(OclocApiTests, GivenHelpParameterWhenLinkingThenHelpMsgIsPrintedAndSuccessIsReturned) { const char *argv[] = { "ocloc", "link", "--help"}; unsigned int argc = sizeof(argv) / sizeof(argv[0]); testing::internal::CaptureStdout(); int retVal = oclocInvoke(argc, argv, 0, nullptr, nullptr, nullptr, 0, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr); std::string output = testing::internal::GetCapturedStdout(); EXPECT_FALSE(output.empty()); EXPECT_EQ(NEO::OclocErrorCode::SUCCESS, retVal); } TEST(OclocApiTests, GivenInvalidParameterWhenLinkingThenErrorIsReturned) { const char *argv[] = { "ocloc", "link", "--dummy_param"}; unsigned int argc = sizeof(argv) / sizeof(argv[0]); testing::internal::CaptureStdout(); int retVal = oclocInvoke(argc, argv, 0, nullptr, nullptr, nullptr, 0, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr); std::string output = testing::internal::GetCapturedStdout(); EXPECT_EQ(NEO::OclocErrorCode::INVALID_COMMAND_LINE, retVal); const std::string expectedInitError{"Invalid option (arg 2): --dummy_param\n"}; const std::string expectedExecuteError{"Error: Linker cannot be executed due to unsuccessful initialization!\n"}; const std::string expectedErrorMessage = expectedInitError + expectedExecuteError; EXPECT_EQ(expectedErrorMessage, output); }compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/ocloc_fatbinary_tests.cpp000066400000000000000000001640531422164147700322400ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/offline_compiler/ocloc_fatbinary_tests.h" #include "shared/offline_compiler/source/ocloc_arg_helper.h" #include "shared/offline_compiler/source/ocloc_error_code.h" #include "shared/source/device_binary_format/ar/ar.h" #include "shared/source/device_binary_format/ar/ar_decoder.h" #include "shared/source/device_binary_format/elf/elf_decoder.h" #include "shared/source/device_binary_format/elf/ocl_elf.h" #include "shared/source/helpers/hw_helper.h" #include "environment.h" #include "mock/mock_argument_helper.h" #include "mock/mock_offline_compiler.h" #include #include extern Environment *gEnvironment; namespace NEO { auto searchInArchiveByFilename(const Ar::Ar &archive, const ConstStringRef &name) { const auto isSearchedFile = [&name](const auto &file) { return file.fileName == name; }; const auto &arFiles = archive.files; return std::find_if(arFiles.begin(), arFiles.end(), isSearchedFile); } std::string prepareTwoDevices(MockOclocArgHelper *argHelper) { auto allEnabledDeviceConfigs = argHelper->getAllSupportedDeviceConfigs(); if (allEnabledDeviceConfigs.size() < 2) { return {}; } const auto cfg1 = argHelper->parseProductConfigFromValue(allEnabledDeviceConfigs[0].config); const auto cfg2 = argHelper->parseProductConfigFromValue(allEnabledDeviceConfigs[1].config); return cfg1 + "," + cfg2; } std::string getDeviceConfig(const OfflineCompiler &offlineCompiler) { const auto &hwInfo = offlineCompiler.getHardwareInfo(); const std::string product = hardwarePrefix[hwInfo.platform.eProductFamily]; const auto stepping = hwInfo.platform.usRevId; return product + "." + std::to_string(stepping); } TEST(OclocFatBinaryRequestedFatBinary, WhenDeviceArgMissingThenReturnsFalse) { const char *args[] = {"ocloc", "-aaa", "*", "-device", "*"}; std::unique_ptr argHelper = std::make_unique(); EXPECT_FALSE(NEO::requestedFatBinary(0, nullptr, argHelper.get())); EXPECT_FALSE(NEO::requestedFatBinary(1, args, argHelper.get())); EXPECT_FALSE(NEO::requestedFatBinary(2, args, argHelper.get())); EXPECT_FALSE(NEO::requestedFatBinary(3, args, argHelper.get())); EXPECT_FALSE(NEO::requestedFatBinary(4, args, argHelper.get())); } TEST(OclocFatBinaryRequestedFatBinary, GivenDeviceArgProvidedWhenFatBinaryFormatWithRangeIsPassedThenTrueIsReturned) { std::unique_ptr argHelper = std::make_unique(); const char *allPlatforms[] = {"ocloc", "-device", "*"}; const char *manyPlatforms[] = {"ocloc", "-device", "a,b"}; const char *manyGens[] = {"ocloc", "-device", "gen0,gen1"}; const char *rangePlatformFrom[] = {"ocloc", "-device", "skl-"}; const char *rangePlatformTo[] = {"ocloc", "-device", "-skl"}; const char *rangePlatformBounds[] = {"ocloc", "-device", "skl-icllp"}; const char *rangeGenFrom[] = {"ocloc", "-device", "gen0-"}; const char *rangeGenTo[] = {"ocloc", "-device", "-gen5"}; const char *rangeGenBounds[] = {"ocloc", "-device", "gen0-gen5"}; const char *rangeConfigBounds[] = {"ocloc", "-device", "9-11"}; const char *manyConfigs[] = {"ocloc", "-device", "9.0,11"}; const char *rangeConfigFrom[] = {"ocloc", "-device", "10.1-"}; const char *rangeConfigTo[] = {"ocloc", "-device", "-11.2"}; const char *rangeConfigsBoundsSecond[] = {"ocloc", "-device", "11.2-12.2"}; EXPECT_TRUE(NEO::requestedFatBinary(3, allPlatforms, argHelper.get())); EXPECT_TRUE(NEO::requestedFatBinary(3, manyPlatforms, argHelper.get())); EXPECT_TRUE(NEO::requestedFatBinary(3, manyGens, argHelper.get())); EXPECT_TRUE(NEO::requestedFatBinary(3, rangePlatformFrom, argHelper.get())); EXPECT_TRUE(NEO::requestedFatBinary(3, rangePlatformTo, argHelper.get())); EXPECT_TRUE(NEO::requestedFatBinary(3, rangePlatformBounds, argHelper.get())); EXPECT_TRUE(NEO::requestedFatBinary(3, rangeGenFrom, argHelper.get())); EXPECT_TRUE(NEO::requestedFatBinary(3, rangeGenTo, argHelper.get())); EXPECT_TRUE(NEO::requestedFatBinary(3, rangeGenBounds, argHelper.get())); EXPECT_TRUE(NEO::requestedFatBinary(3, rangeConfigBounds, argHelper.get())); EXPECT_TRUE(NEO::requestedFatBinary(3, manyConfigs, argHelper.get())); EXPECT_TRUE(NEO::requestedFatBinary(3, rangeConfigFrom, argHelper.get())); EXPECT_TRUE(NEO::requestedFatBinary(3, rangeConfigTo, argHelper.get())); EXPECT_TRUE(NEO::requestedFatBinary(3, rangeConfigsBoundsSecond, argHelper.get())); } TEST(OclocFatBinaryRequestedFatBinary, GivenDeviceArgToFatBinaryWhenConfigMatchesMoreThanOneProductThenTrueIsReturned) { std::unique_ptr argHelper = std::make_unique(); auto allEnabledDeviceConfigs = argHelper->getAllSupportedDeviceConfigs(); std::string configNum0 = argHelper->parseProductConfigFromValue(allEnabledDeviceConfigs[allEnabledDeviceConfigs.size() / 2].config); auto major_pos = configNum0.find("."); auto cutMinorAndRevision = configNum0.substr(0, major_pos); auto matchedConfigs = getAllMatchedConfigs(cutMinorAndRevision, argHelper.get()); if (matchedConfigs.size() < 2) { GTEST_SKIP(); } const char *fewConfigs[] = {"ocloc", "-device", cutMinorAndRevision.c_str()}; EXPECT_TRUE(NEO::requestedFatBinary(3, fewConfigs, argHelper.get())); } TEST(OclocFatBinaryRequestedFatBinary, GivenDeviceArgAsSingleProductConfigThenFatBinaryIsNotRequested) { std::unique_ptr argHelper = std::make_unique(); auto allEnabledDeviceConfigs = argHelper->getAllSupportedDeviceConfigs(); for (auto &deviceConfig : allEnabledDeviceConfigs) { std::string configStr = argHelper->parseProductConfigFromValue(deviceConfig.config); const char *singleConfig[] = {"ocloc", "-device", configStr.c_str()}; EXPECT_FALSE(NEO::requestedFatBinary(3, singleConfig, argHelper.get())); } } TEST(OclocFatBinaryRequestedFatBinary, WhenPlatformIsProvidedButDoesNotContainMoreThanOneProductThenReturnFalse) { std::unique_ptr argHelper = std::make_unique(); const char *skl[] = {"ocloc", "-device", "skl"}; EXPECT_FALSE(NEO::requestedFatBinary(3, skl, argHelper.get())); } TEST(OclocFatBinaryToProductConfigStrings, GivenListOfProductIdsThenReturnsListOfStrings) { auto platforms = NEO::getAllSupportedTargetPlatforms(); auto names = NEO::toProductNames(platforms); EXPECT_EQ(names.size(), platforms.size()); } TEST_F(OclocFatBinaryGetTargetConfigsForFatbinary, GivenDifferentDeviceArgWhenCheckIfPlatformsAbbreviationIsPassedThenReturnCorrectValue) { auto allEnabledPlatforms = NEO::getAllSupportedTargetPlatforms(); auto allEnabledDeviceConfigs = oclocArgHelperWithoutInput->getAllSupportedDeviceConfigs(); if (allEnabledPlatforms.size() < 3 || allEnabledDeviceConfigs.size() < 3) { GTEST_SKIP(); } auto platform0 = allEnabledPlatforms[0]; ConstStringRef platformName0(hardwarePrefix[platform0], strlen(hardwarePrefix[platform0])); auto platform1 = allEnabledPlatforms[1]; ConstStringRef platformName1(hardwarePrefix[platform1], strlen(hardwarePrefix[platform1])); auto deviceMapConfig0 = allEnabledDeviceConfigs[0]; auto configNumConvention0 = oclocArgHelperWithoutInput->parseProductConfigFromValue(deviceMapConfig0.config); auto deviceMapConfig1 = allEnabledDeviceConfigs[1]; auto configNumConvention1 = oclocArgHelperWithoutInput->parseProductConfigFromValue(deviceMapConfig1.config); auto twoPlatforms = platformName0.str() + "," + platformName1.str(); auto configsRange = configNumConvention0 + "-" + configNumConvention1; auto gen = std::to_string(deviceMapConfig0.hwInfo->platform.eRenderCoreFamily); EXPECT_TRUE(isDeviceWithPlatformAbbreviation(platformName0, oclocArgHelperWithoutInput.get())); EXPECT_TRUE(isDeviceWithPlatformAbbreviation(ConstStringRef(twoPlatforms), oclocArgHelperWithoutInput.get())); EXPECT_FALSE(isDeviceWithPlatformAbbreviation(ConstStringRef(configsRange), oclocArgHelperWithoutInput.get())); EXPECT_FALSE(isDeviceWithPlatformAbbreviation(ConstStringRef(gen), oclocArgHelperWithoutInput.get())); } TEST_F(OclocFatBinaryGetTargetConfigsForFatbinary, GivenAsterixThenReturnAllEnabledConfigs) { auto expected = oclocArgHelperWithoutInput->getAllSupportedDeviceConfigs(); auto got = NEO::getTargetConfigsForFatbinary("*", oclocArgHelperWithoutInput.get()); EXPECT_EQ(got.size(), expected.size()); } TEST_F(OclocFatBinaryGetTargetConfigsForFatbinary, GivenProductConfigWhenConfigIsUndefinedThenReturnEmptyList) { auto got = NEO::getTargetConfigsForFatbinary("0.0.0", oclocArgHelperWithoutInput.get()); EXPECT_TRUE(got.empty()); got = NEO::getTargetConfigsForFatbinary("0.0", oclocArgHelperWithoutInput.get()); EXPECT_TRUE(got.empty()); got = NEO::getTargetConfigsForFatbinary("0", oclocArgHelperWithoutInput.get()); EXPECT_TRUE(got.empty()); } TEST_F(OclocFatBinaryGetTargetConfigsForFatbinary, GivenProductConfigOpenRangeToWhenConfigIsUndefinedThenReturnEmptyList) { auto got = NEO::getTargetConfigsForFatbinary("-0.0.0", oclocArgHelperWithoutInput.get()); EXPECT_TRUE(got.empty()); got = NEO::getTargetConfigsForFatbinary("-0.0", oclocArgHelperWithoutInput.get()); EXPECT_TRUE(got.empty()); got = NEO::getTargetConfigsForFatbinary("-0", oclocArgHelperWithoutInput.get()); EXPECT_TRUE(got.empty()); } TEST_F(OclocFatBinaryGetTargetConfigsForFatbinary, GivenProductConfigOpenRangeFromWhenConfigIsUndefinedThenReturnEmptyList) { auto got = NEO::getTargetConfigsForFatbinary("0.0.0-", oclocArgHelperWithoutInput.get()); EXPECT_TRUE(got.empty()); got = NEO::getTargetConfigsForFatbinary("0.0-", oclocArgHelperWithoutInput.get()); EXPECT_TRUE(got.empty()); got = NEO::getTargetConfigsForFatbinary("0-", oclocArgHelperWithoutInput.get()); EXPECT_TRUE(got.empty()); } TEST_F(OclocFatBinaryGetTargetConfigsForFatbinary, GivenProductConfigClosedRangeWhenAnyOfConfigIsUndefinedOrIncorrectThenReturnEmptyList) { auto allEnabledDeviceConfigs = oclocArgHelperWithoutInput->getAllSupportedDeviceConfigs(); if (allEnabledDeviceConfigs.size() < 2) { GTEST_SKIP(); } auto deviceMapConfig0 = allEnabledDeviceConfigs[0]; auto config0Str = oclocArgHelperWithoutInput->parseProductConfigFromValue(deviceMapConfig0.config); auto got = NEO::getTargetConfigsForFatbinary("1.2-" + config0Str, oclocArgHelperWithoutInput.get()); EXPECT_TRUE(got.empty()); got = NEO::getTargetConfigsForFatbinary(config0Str + "-1.2", oclocArgHelperWithoutInput.get()); EXPECT_TRUE(got.empty()); got = NEO::getTargetConfigsForFatbinary("1.a.c-" + config0Str, oclocArgHelperWithoutInput.get()); EXPECT_TRUE(got.empty()); got = NEO::getTargetConfigsForFatbinary(config0Str + "-1.a.c", oclocArgHelperWithoutInput.get()); EXPECT_TRUE(got.empty()); } TEST(OclocFatBinaryRequestedFatBinary, GivenDeviceArgProvidedWhenUnknownGenNameIsPassedThenRequestedFatBinaryReturnsFalse) { std::unique_ptr argHelper = std::make_unique(); const char *unknownGen[] = {"ocloc", "-device", "gen0"}; const char *unknownGenCaseInsensitive[] = {"ocloc", "-device", "Gen0"}; EXPECT_FALSE(NEO::requestedFatBinary(3, unknownGen, argHelper.get())); EXPECT_FALSE(NEO::requestedFatBinary(3, unknownGenCaseInsensitive, argHelper.get())); } TEST(OclocFatBinaryRequestedFatBinary, GivenDeviceArgProvidedWhenKnownGenNameIsPassedThenRequestedFatBinaryReturnsTrue) { std::unique_ptr argHelper = std::make_unique(); unsigned int i = 0; for (; i < IGFX_MAX_CORE; ++i) { if (NEO::familyName[i] != nullptr) break; } const char *genFromFamilyName[] = {"ocloc", "-device", NEO::familyName[i]}; EXPECT_TRUE(NEO::requestedFatBinary(3, genFromFamilyName, argHelper.get())); } TEST(OclocFatBinaryGetAllSupportedTargetPlatforms, WhenRequestedThenReturnsAllPlatformsWithNonNullHardwarePrefixes) { auto platforms = NEO::getAllSupportedTargetPlatforms(); std::unordered_set platformsSet(platforms.begin(), platforms.end()); for (unsigned int productId = 0; productId < IGFX_MAX_PRODUCT; ++productId) { if (nullptr != NEO::hardwarePrefix[productId]) { EXPECT_EQ(1U, platformsSet.count(static_cast(productId))) << productId; } else { EXPECT_EQ(0U, platformsSet.count(static_cast(productId))) << productId; } } } TEST(OclocFatBinaryAsProductId, GivenEnabledPlatformNameThenReturnsProperPlatformId) { auto platforms = NEO::getAllSupportedTargetPlatforms(); auto names = NEO::toProductNames(platforms); for (size_t i = 0; i < platforms.size(); ++i) { auto idByName = NEO::asProductId(names[i], platforms); EXPECT_EQ(idByName, platforms[i]) << names[i].data() << " : " << platforms[i] << " != " << idByName; } } TEST(OclocFatBinaryAsProductId, GivenDisabledPlatformNameThenReturnsUnknownPlatformId) { auto platforms = NEO::getAllSupportedTargetPlatforms(); auto names = NEO::toProductNames(platforms); platforms.clear(); for (size_t i = 0; i < platforms.size(); ++i) { auto idByName = NEO::asProductId(names[i], platforms); EXPECT_EQ(IGFX_UNKNOWN, platforms[i]) << names[i].data() << " : IGFX_UNKNOWN != " << idByName; } } TEST(OclocFatBinaryAsGfxCoreIdList, GivenEnabledGfxCoreNameThenReturnsNonEmptyList) { std::unique_ptr argHelper = std::make_unique(); for (unsigned int coreId = 0; coreId < IGFX_MAX_CORE; ++coreId) { if (nullptr != NEO::familyName[coreId]) { EXPECT_TRUE(argHelper->isGen(ConstStringRef(NEO::familyName[coreId]).str())); std::string caseInsensitive = NEO::familyName[coreId]; std::transform(caseInsensitive.begin(), caseInsensitive.end(), caseInsensitive.begin(), ::tolower); EXPECT_TRUE(argHelper->isGen(caseInsensitive)); auto findCore = caseInsensitive.find("_core"); if (findCore != std::string::npos) { caseInsensitive = caseInsensitive.substr(0, findCore); EXPECT_TRUE(argHelper->isGen(caseInsensitive)); } auto findUnderline = caseInsensitive.find("_"); if (findUnderline != std::string::npos) { caseInsensitive.erase(std::remove(caseInsensitive.begin(), caseInsensitive.end(), '_'), caseInsensitive.end()); EXPECT_TRUE(argHelper->isGen(caseInsensitive)); } } } } TEST(OclocFatBinaryAsGfxCoreIdList, GivenDisabledGfxCoreNameThenReturnsEmptyList) { std::unique_ptr argHelper = std::make_unique(); EXPECT_FALSE(argHelper->isGen(ConstStringRef("genA").str())); EXPECT_FALSE(argHelper->isGen(ConstStringRef("gen0").str())); EXPECT_FALSE(argHelper->isGen(ConstStringRef("gen1").str())); EXPECT_FALSE(argHelper->isGen(ConstStringRef("gen2").str())); } TEST(OclocFatBinaryAsGfxCoreIdList, GivenEnabledGfxCoreNameThenReturnsNonNullIGFX) { std::unique_ptr argHelper = std::make_unique(); for (unsigned int coreId = 0; coreId < IGFX_MAX_CORE; ++coreId) { if (nullptr != NEO::familyName[coreId]) { EXPECT_EQ(argHelper->returnIGFXforGen(ConstStringRef(NEO::familyName[coreId]).str()), coreId); std::string caseInsensitive = NEO::familyName[coreId]; std::transform(caseInsensitive.begin(), caseInsensitive.end(), caseInsensitive.begin(), ::tolower); EXPECT_EQ(argHelper->returnIGFXforGen(caseInsensitive), coreId); auto findCore = caseInsensitive.find("_core"); if (findCore != std::string::npos) { caseInsensitive = caseInsensitive.substr(0, findCore); EXPECT_EQ(argHelper->returnIGFXforGen(caseInsensitive), coreId); } auto findUnderline = caseInsensitive.find("_"); if (findUnderline != std::string::npos) { caseInsensitive.erase(std::remove(caseInsensitive.begin(), caseInsensitive.end(), '_'), caseInsensitive.end()); EXPECT_EQ(argHelper->returnIGFXforGen(caseInsensitive), coreId); } } } } TEST(OclocFatBinaryAsGfxCoreIdList, GivenDisabledGfxCoreNameThenReturnsNullIGFX) { std::unique_ptr argHelper = std::make_unique(); EXPECT_EQ(argHelper->returnIGFXforGen(ConstStringRef("genA").str()), 0u); EXPECT_EQ(argHelper->returnIGFXforGen(ConstStringRef("gen0").str()), 0u); EXPECT_EQ(argHelper->returnIGFXforGen(ConstStringRef("gen1").str()), 0u); EXPECT_EQ(argHelper->returnIGFXforGen(ConstStringRef("gen2").str()), 0u); } TEST_F(OclocFatBinaryGetTargetConfigsForFatbinary, GivenMutiplePlatformThenReturnThosePlatforms) { auto allEnabledPlatforms = NEO::getAllSupportedTargetPlatforms(); if (allEnabledPlatforms.size() < 2) { GTEST_SKIP(); } auto platform0 = allEnabledPlatforms[0]; std::string platform0Name = NEO::hardwarePrefix[platform0]; auto platform1 = allEnabledPlatforms[1]; std::string platform1Name = NEO::hardwarePrefix[platform1]; std::vector expected{platform0Name, platform1Name}; auto got = NEO::getTargetPlatformsForFatbinary(platform0Name + "," + platform1Name, oclocArgHelperWithoutInput.get()); EXPECT_EQ(expected, got); } TEST_F(OclocFatBinaryGetTargetConfigsForFatbinary, GivenPlatformOpenRangeFromThenReturnAllEnabledPlatformsThatMatch) { auto allEnabledPlatforms = NEO::getAllSupportedTargetPlatforms(); if (allEnabledPlatforms.size() < 3) { GTEST_SKIP(); } auto platform0 = allEnabledPlatforms[allEnabledPlatforms.size() / 2]; std::string platformName = NEO::hardwarePrefix[platform0]; std::vector expectedPlatforms; auto platformFrom = std::find(allEnabledPlatforms.begin(), allEnabledPlatforms.end(), platform0); expectedPlatforms.insert(expectedPlatforms.end(), platformFrom, allEnabledPlatforms.end()); auto expected = NEO::toProductNames(expectedPlatforms); auto got = NEO::getTargetPlatformsForFatbinary(platformName + "-", oclocArgHelperWithoutInput.get()); EXPECT_EQ(expected, got); } TEST_F(OclocFatBinaryGetTargetConfigsForFatbinary, GivenPlatformOpenRangeToThenReturnAllEnabledPlatformsThatMatch) { auto allEnabledPlatforms = NEO::getAllSupportedTargetPlatforms(); if (allEnabledPlatforms.size() < 3) { GTEST_SKIP(); } auto platform0 = allEnabledPlatforms[allEnabledPlatforms.size() / 2]; std::string platformName = NEO::hardwarePrefix[platform0]; std::vector expectedPlatforms; auto platformTo = std::find(allEnabledPlatforms.begin(), allEnabledPlatforms.end(), platform0); expectedPlatforms.insert(expectedPlatforms.end(), allEnabledPlatforms.begin(), platformTo + 1); auto expected = NEO::toProductNames(expectedPlatforms); auto got = NEO::getTargetPlatformsForFatbinary("-" + platformName, oclocArgHelperWithoutInput.get()); EXPECT_EQ(expected, got); } TEST_F(OclocFatBinaryGetTargetConfigsForFatbinary, GivenPlatformClosedRangeThenReturnAllEnabledPlatformsThatMatch) { auto allEnabledPlatforms = NEO::getAllSupportedTargetPlatforms(); if (allEnabledPlatforms.size() < 4) { GTEST_SKIP(); } auto platformFrom = allEnabledPlatforms[1]; auto platformTo = allEnabledPlatforms[allEnabledPlatforms.size() - 2]; std::string platformNameFrom = NEO::hardwarePrefix[platformFrom]; std::string platformNameTo = NEO::hardwarePrefix[platformTo]; std::vector expectedPlatforms; expectedPlatforms.insert(expectedPlatforms.end(), allEnabledPlatforms.begin() + 1, allEnabledPlatforms.begin() + allEnabledPlatforms.size() - 1); auto expected = NEO::toProductNames(expectedPlatforms); auto got = NEO::getTargetPlatformsForFatbinary(platformNameFrom + "-" + platformNameTo, oclocArgHelperWithoutInput.get()); EXPECT_EQ(expected, got); got = NEO::getTargetPlatformsForFatbinary(platformNameTo + "-" + platformNameFrom, oclocArgHelperWithoutInput.get()); // swap min with max implicitly EXPECT_EQ(expected, got); } std::vector getEnabledCores() { std::vector ret; for (unsigned int coreId = 0; coreId < IGFX_MAX_CORE; ++coreId) { if (nullptr != NEO::familyName[coreId]) { ret.push_back(static_cast(coreId)); } } return ret; } TEST_F(OclocFatBinaryGetTargetConfigsForFatbinary, GivenArchitectureThenReturnAllEnabledConfigsThatMatch) { auto allEnabledCores = getEnabledCores(); if (allEnabledCores.size() < 3) { GTEST_SKIP(); } auto core = allEnabledCores[allEnabledCores.size() / 2]; std::string coreName = NEO::familyName[core]; if (coreName[0] == 'G') { coreName[0] = 'g'; } std::vector expected; oclocArgHelperWithoutInput->getProductConfigsForGfxCoreFamily(core, expected); auto got = NEO::getTargetConfigsForFatbinary(coreName, oclocArgHelperWithoutInput.get()); EXPECT_EQ(expected.size(), got.size()); for (unsigned int i = 0; i < got.size(); i++) { EXPECT_TRUE(expected[i] == got[i]); } } TEST_F(OclocFatBinaryGetTargetConfigsForFatbinary, GivenArchitectureOpenRangeFromThenReturnAllEnabledConfigsThatMatch) { auto allEnabledCores = getEnabledCores(); if (allEnabledCores.size() < 3) { GTEST_SKIP(); } auto core0 = allEnabledCores[allEnabledCores.size() / 2]; std::string coreName = NEO::familyName[core0]; if (coreName[0] == 'G') { coreName[0] = 'g'; } std::vector expected; unsigned int coreIt = core0; while (coreIt < static_cast(IGFX_MAX_CORE)) { oclocArgHelperWithoutInput->getProductConfigsForGfxCoreFamily(static_cast(coreIt), expected); ++coreIt; } auto got = NEO::getTargetConfigsForFatbinary(coreName + "-", oclocArgHelperWithoutInput.get()); EXPECT_EQ(expected.size(), got.size()); for (unsigned int i = 0; i < got.size(); i++) { EXPECT_TRUE(expected[i] == got[i]); } } TEST_F(OclocFatBinaryGetTargetConfigsForFatbinary, GivenArchitectureOpenRangeToThenReturnAllEnabledConfigsThatMatch) { auto allEnabledCores = getEnabledCores(); if (allEnabledCores.size() < 3) { GTEST_SKIP(); } auto core0 = allEnabledCores[allEnabledCores.size() / 2]; std::string coreName = NEO::familyName[core0]; if (coreName[0] == 'G') { coreName[0] = 'g'; } std::vector expected; unsigned int coreIt = IGFX_UNKNOWN_CORE; ++coreIt; while (coreIt <= static_cast(core0)) { oclocArgHelperWithoutInput->getProductConfigsForGfxCoreFamily(static_cast(coreIt), expected); ++coreIt; } auto got = NEO::getTargetConfigsForFatbinary("-" + coreName, oclocArgHelperWithoutInput.get()); EXPECT_EQ(expected.size(), got.size()); for (unsigned int i = 0; i < got.size(); i++) { EXPECT_TRUE(expected[i] == got[i]); } } TEST_F(OclocFatBinaryGetTargetConfigsForFatbinary, GivenArchitectureClosedRangeThenReturnAllEnabledConfigsThatMatch) { auto allEnabledCores = getEnabledCores(); if (allEnabledCores.size() < 4) { GTEST_SKIP(); } auto coreFrom = allEnabledCores[1]; auto coreTo = allEnabledCores[allEnabledCores.size() - 2]; std::string coreNameFrom = NEO::familyName[coreFrom]; if (coreNameFrom[0] == 'G') { coreNameFrom[0] = 'g'; } std::string coreNameTo = NEO::familyName[coreTo]; if (coreNameTo[0] == 'G') { coreNameTo[0] = 'g'; } std::vector expected; auto coreIt = coreFrom; while (coreIt <= coreTo) { oclocArgHelperWithoutInput->getProductConfigsForGfxCoreFamily(static_cast(coreIt), expected); coreIt = static_cast(static_cast(coreIt) + 1); } auto got = NEO::getTargetConfigsForFatbinary(coreNameFrom + "-" + coreNameTo, oclocArgHelperWithoutInput.get()); EXPECT_EQ(expected.size(), got.size()); for (unsigned int i = 0; i < got.size(); i++) { EXPECT_TRUE(expected[i] == got[i]); } got = NEO::getTargetConfigsForFatbinary(coreNameTo + "-" + coreNameFrom, oclocArgHelperWithoutInput.get()); // swap min with max implicitly EXPECT_EQ(expected.size(), got.size()); for (unsigned int i = 0; i < got.size(); i++) { EXPECT_TRUE(expected[i] == got[i]); } } TEST_F(OclocFatBinaryGetTargetConfigsForFatbinary, GivenUnkownArchitectureThenReturnEmptyList) { auto got = NEO::getTargetConfigsForFatbinary("gen0", oclocArgHelperWithoutInput.get()); EXPECT_TRUE(got.empty()); } TEST_F(OclocFatBinaryGetTargetConfigsForFatbinary, GivenMutiplePlatformWhenSecondPlatformsIsUnknownThenReturnErrorMessage) { std::unique_ptr argHelper = std::make_unique(); auto allEnabledPlatforms = NEO::getAllSupportedTargetPlatforms(); auto platform0 = allEnabledPlatforms[0]; std::string platform0Name = NEO::hardwarePrefix[platform0]; auto platformTarget = platform0Name + ",unk"; std::stringstream resString; std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", platformTarget}; testing::internal::CaptureStdout(); int retVal = buildFatBinary(argv, argHelper.get()); auto output = testing::internal::GetCapturedStdout(); EXPECT_NE(retVal, NEO::OclocErrorCode::SUCCESS); resString << "Unknown device : unk\n"; resString << "Failed to parse target devices from : " << platformTarget << "\n"; EXPECT_STREQ(output.c_str(), resString.str().c_str()); } TEST_F(OclocFatBinaryGetTargetConfigsForFatbinary, GivenClosedRangeTooExtensiveWhenConfigIsValidThenErrorMessageAndFailIsReturned) { std::unique_ptr argHelper = std::make_unique(); auto allEnabledDeviceConfigs = argHelper->getAllSupportedDeviceConfigs(); if (allEnabledDeviceConfigs.size() < 4) { GTEST_SKIP(); } std::string configNum0 = argHelper->parseProductConfigFromValue(allEnabledDeviceConfigs[0].config); std::string configNum1 = argHelper->parseProductConfigFromValue(allEnabledDeviceConfigs[1].config); std::string configNum2 = argHelper->parseProductConfigFromValue(allEnabledDeviceConfigs[2].config); std::stringstream configString; configString << configNum0 << "-" << configNum1 << "-" << configNum2; std::stringstream resString; std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", configString.str()}; testing::internal::CaptureStdout(); int retVal = buildFatBinary(argv, argHelper.get()); auto output = testing::internal::GetCapturedStdout(); EXPECT_NE(retVal, NEO::OclocErrorCode::SUCCESS); resString << "Invalid range : " << configString.str() << " - should be from-to or -to or from-" << "\n"; resString << "Failed to parse target devices from : " << configString.str() << "\n"; EXPECT_STREQ(output.c_str(), resString.str().c_str()); } TEST_F(OclocFatBinaryGetTargetConfigsForFatbinary, GivenClosedRangeTooExtensiveWhenPlatformIsValidThenErrorMessageAndReturnEmptyList) { std::unique_ptr argHelper = std::make_unique(); auto allEnabledPlatforms = NEO::getAllSupportedTargetPlatforms(); if (allEnabledPlatforms.size() < 4) { GTEST_SKIP(); } auto platform0 = allEnabledPlatforms[0]; std::string platform0Name = NEO::hardwarePrefix[platform0]; auto platform1 = allEnabledPlatforms[1]; std::string platform1Name = NEO::hardwarePrefix[platform1]; auto platform2 = allEnabledPlatforms[2]; std::string platform2Name = NEO::hardwarePrefix[platform2]; std::string platformsTarget = platform0Name + "-" + platform1Name + "-" + platform2Name; std::string resString = "Invalid range : " + platformsTarget + " - should be from-to or -to or from-\n"; testing::internal::CaptureStdout(); auto got = NEO::getTargetPlatformsForFatbinary(platformsTarget, argHelper.get()); auto output = testing::internal::GetCapturedStdout(); EXPECT_STREQ(output.c_str(), resString.c_str()); EXPECT_TRUE(got.empty()); } TEST_F(OclocFatBinaryGetTargetConfigsForFatbinary, GivenPlatformClosedRangeWhenSecondPlatformIsUnkownThenReturnEmptyList) { auto allEnabledPlatforms = NEO::getAllSupportedTargetPlatforms(); auto platform0 = allEnabledPlatforms[0]; std::string platform0Name = NEO::hardwarePrefix[platform0]; auto got = NEO::getTargetPlatformsForFatbinary(platform0Name + "-unk", oclocArgHelperWithoutInput.get()); EXPECT_TRUE(got.empty()); } TEST_F(OclocFatBinaryGetTargetConfigsForFatbinary, GivenGenOpenRangeFromWhenGenIsUnknownThenReturnEmptyList) { auto got = NEO::getTargetConfigsForFatbinary("gen2-", oclocArgHelperWithoutInput.get()); EXPECT_TRUE(got.empty()); } TEST_F(OclocFatBinaryGetTargetConfigsForFatbinary, GivenGenOpenRangeToWhenGenIsUnknownThenReturnEmptyList) { auto got = NEO::getTargetConfigsForFatbinary("-gen2", oclocArgHelperWithoutInput.get()); EXPECT_TRUE(got.empty()); } TEST_F(OclocFatBinaryGetTargetConfigsForFatbinary, GivenGenClosedRangeWhenAnyOfGensIsUnknownThenReturnEmptyList) { auto allEnabledPlatforms = NEO::getAllSupportedTargetPlatforms(); auto platform0 = allEnabledPlatforms[0]; auto gfxCore0 = NEO::hardwareInfoTable[platform0]->platform.eRenderCoreFamily; std::string genName = NEO::familyName[gfxCore0]; if (genName[0] == 'G') { genName[0] = 'g'; } auto got = NEO::getTargetConfigsForFatbinary("gen2-" + genName, oclocArgHelperWithoutInput.get()); EXPECT_TRUE(got.empty()); got = NEO::getTargetConfigsForFatbinary(genName + "-gen2", oclocArgHelperWithoutInput.get()); EXPECT_TRUE(got.empty()); got = NEO::getTargetConfigsForFatbinary(genName + ",gen2", oclocArgHelperWithoutInput.get()); EXPECT_TRUE(got.empty()); got = NEO::getTargetConfigsForFatbinary("gen2," + genName, oclocArgHelperWithoutInput.get()); EXPECT_TRUE(got.empty()); } TEST_F(OclocFatBinaryGetTargetConfigsForFatbinary, GivenTwoPlatformsWhenFatBinaryBuildIsInvokedThenSuccessIsReturned) { std::unique_ptr argHelper = std::make_unique(); auto allEnabledPlatforms = NEO::getAllSupportedTargetPlatforms(); if (allEnabledPlatforms.size() < 3) { GTEST_SKIP(); } auto platform0 = allEnabledPlatforms[0]; ConstStringRef platformName0(hardwarePrefix[platform0], strlen(hardwarePrefix[platform0])); auto platform1 = allEnabledPlatforms[1]; ConstStringRef platformName1(hardwarePrefix[platform1], strlen(hardwarePrefix[platform1])); std::vector expected{platformName0, platformName1}; std::string platformsTarget = platformName0.str() + "," + platformName1.str(); auto got = NEO::getTargetPlatformsForFatbinary(platformsTarget, argHelper.get()); EXPECT_EQ(expected, got); auto platformRev0 = std::to_string(hardwareInfoTable[platform0]->platform.usRevId); auto platformRev1 = std::to_string(hardwareInfoTable[platform1]->platform.usRevId); std::vector platformsRevision{platformRev0, platformRev1}; std::stringstream resString; std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", platformsTarget}; testing::internal::CaptureStdout(); int retVal = buildFatBinary(argv, argHelper.get()); auto output = testing::internal::GetCapturedStdout(); EXPECT_EQ(retVal, NEO::OclocErrorCode::SUCCESS); for (uint32_t i = 0; i < got.size(); i++) { resString << "Build succeeded for : " << expected[i].str() + "." + platformsRevision[i] + ".\n"; } EXPECT_STREQ(output.c_str(), resString.str().c_str()); } TEST_F(OclocFatBinaryGetTargetConfigsForFatbinary, GivenPlatformsClosedRangeWhenFatBinaryBuildIsInvokedThenSuccessIsReturned) { std::unique_ptr argHelper = std::make_unique(); auto allEnabledPlatforms = NEO::getAllSupportedTargetPlatforms(); if (allEnabledPlatforms.size() < 4) { GTEST_SKIP(); } auto platformFrom = allEnabledPlatforms[0]; ConstStringRef platformNameFrom(hardwarePrefix[platformFrom], strlen(hardwarePrefix[platformFrom])); auto platformTo = allEnabledPlatforms[allEnabledPlatforms.size() / 2]; ConstStringRef platformNameTo(hardwarePrefix[platformTo], strlen(hardwarePrefix[platformTo])); if (platformFrom > platformTo) { std::swap(platformFrom, platformTo); } std::vector requestedPlatforms; auto from = std::find(allEnabledPlatforms.begin(), allEnabledPlatforms.end(), platformFrom); auto to = std::find(allEnabledPlatforms.begin(), allEnabledPlatforms.end(), platformTo) + 1; requestedPlatforms.insert(requestedPlatforms.end(), from, to); auto expected = toProductNames(requestedPlatforms); std::string platformsTarget = platformNameFrom.str() + "-" + platformNameTo.str(); auto got = NEO::getTargetPlatformsForFatbinary(platformsTarget, argHelper.get()); EXPECT_EQ(expected, got); std::vector platformsRevisions; for (auto platform : requestedPlatforms) { platformsRevisions.push_back(std::to_string(hardwareInfoTable[platform]->platform.usRevId)); } std::stringstream resString; std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", platformsTarget}; testing::internal::CaptureStdout(); int retVal = buildFatBinary(argv, argHelper.get()); auto output = testing::internal::GetCapturedStdout(); EXPECT_EQ(retVal, NEO::OclocErrorCode::SUCCESS); for (uint32_t i = 0; i < got.size(); i++) { resString << "Build succeeded for : " << expected[i].str() + "." + platformsRevisions[i] + ".\n"; } EXPECT_STREQ(output.c_str(), resString.str().c_str()); } TEST_F(OclocFatBinaryGetTargetConfigsForFatbinary, GivenPlatformsOpenRangeToWhenFatBinaryBuildIsInvokedThenSuccessIsReturned) { std::unique_ptr argHelper = std::make_unique(); auto allEnabledPlatforms = NEO::getAllSupportedTargetPlatforms(); if (allEnabledPlatforms.size() < 4) { GTEST_SKIP(); } auto platformTo = allEnabledPlatforms[0]; ConstStringRef platformNameTo(hardwarePrefix[platformTo], strlen(hardwarePrefix[platformTo])); std::vector requestedPlatforms; auto platformToId = std::find(allEnabledPlatforms.begin(), allEnabledPlatforms.end(), platformTo); assert(platformToId != allEnabledPlatforms.end()); requestedPlatforms.insert(requestedPlatforms.end(), allEnabledPlatforms.begin(), platformToId + 1); auto expected = toProductNames(requestedPlatforms); std::string platformsTarget = "-" + platformNameTo.str(); auto got = NEO::getTargetPlatformsForFatbinary(platformsTarget, argHelper.get()); EXPECT_EQ(expected, got); std::vector platformsRevisions; for (auto platform : requestedPlatforms) { platformsRevisions.push_back(std::to_string(hardwareInfoTable[platform]->platform.usRevId)); } std::stringstream resString; std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", platformsTarget}; testing::internal::CaptureStdout(); int retVal = buildFatBinary(argv, argHelper.get()); auto output = testing::internal::GetCapturedStdout(); EXPECT_EQ(retVal, NEO::OclocErrorCode::SUCCESS); for (uint32_t i = 0; i < got.size(); i++) { resString << "Build succeeded for : " << expected[i].str() + "." + platformsRevisions[i] + ".\n"; } EXPECT_STREQ(output.c_str(), resString.str().c_str()); } TEST_F(OclocFatBinaryGetTargetConfigsForFatbinary, GivenPlatformsOpenRangeFromWhenFatBinaryBuildIsInvokedThenSuccessIsReturned) { std::unique_ptr argHelper = std::make_unique(); auto allEnabledPlatforms = NEO::getAllSupportedTargetPlatforms(); if (allEnabledPlatforms.size() < 4) { GTEST_SKIP(); } auto platformFrom = allEnabledPlatforms[0]; ConstStringRef platformNameFrom(hardwarePrefix[platformFrom], strlen(hardwarePrefix[platformFrom])); std::vector requestedPlatforms; auto platformToId = std::find(allEnabledPlatforms.begin(), allEnabledPlatforms.end(), platformFrom); assert(platformToId != allEnabledPlatforms.end()); requestedPlatforms.insert(requestedPlatforms.end(), platformToId, allEnabledPlatforms.end()); auto expected = toProductNames(requestedPlatforms); std::string platformsTarget = platformNameFrom.str() + "-"; auto got = NEO::getTargetPlatformsForFatbinary(platformsTarget, argHelper.get()); EXPECT_EQ(expected, got); std::vector platformsRevisions; for (auto platform : requestedPlatforms) { platformsRevisions.push_back(std::to_string(hardwareInfoTable[platform]->platform.usRevId)); } std::stringstream resString; std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", platformsTarget}; testing::internal::CaptureStdout(); int retVal = buildFatBinary(argv, argHelper.get()); auto output = testing::internal::GetCapturedStdout(); EXPECT_EQ(retVal, NEO::OclocErrorCode::SUCCESS); for (uint32_t i = 0; i < got.size(); i++) { resString << "Build succeeded for : " << expected[i].str() + "." + platformsRevisions[i] + ".\n"; } EXPECT_STREQ(output.c_str(), resString.str().c_str()); } TEST_F(OclocFatBinaryGetTargetConfigsForFatbinary, GivenTwoConfigsWhenFatBinaryBuildIsInvokedThenSuccessIsReturned) { std::unique_ptr argHelper = std::make_unique(); auto allEnabledDeviceConfigs = argHelper->getAllSupportedDeviceConfigs(); if (allEnabledDeviceConfigs.size() < 2) { GTEST_SKIP(); } auto config0 = allEnabledDeviceConfigs[0]; auto config1 = allEnabledDeviceConfigs[1]; auto configStr0 = argHelper->parseProductConfigFromValue(config0.config); auto configStr1 = argHelper->parseProductConfigFromValue(config1.config); std::vector targets{configStr0, configStr1}; std::vector expected; for (auto &target : targets) { auto configFirstEl = argHelper->findConfigMatch(target, true); auto configLastEl = argHelper->findConfigMatch(target, false); for (auto &deviceConfig : allEnabledDeviceConfigs) { if (deviceConfig.config >= configFirstEl && deviceConfig.config <= configLastEl) { expected.push_back(deviceConfig); } } } auto configsTarget = configStr0 + "," + configStr1; auto got = NEO::getTargetConfigsForFatbinary(configsTarget, argHelper.get()); EXPECT_EQ(expected.size(), got.size()); for (unsigned int i = 0; i < got.size(); i++) { EXPECT_TRUE(expected[i] == got[i]); } std::stringstream resString; std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", configsTarget}; testing::internal::CaptureStdout(); int retVal = buildFatBinary(argv, argHelper.get()); auto output = testing::internal::GetCapturedStdout(); EXPECT_EQ(retVal, NEO::OclocErrorCode::SUCCESS); for (auto deviceConfig : expected) { auto targetConfig = argHelper->parseProductConfigFromValue(deviceConfig.config); resString << "Build succeeded for : " << targetConfig + ".\n"; } EXPECT_STREQ(output.c_str(), resString.str().c_str()); } TEST_F(OclocFatBinaryGetTargetConfigsForFatbinary, GivenProductConfigOpenRangeFromWhenFatBinaryBuildIsInvokedThenSuccessIsReturned) { std::unique_ptr argHelper = std::make_unique(); auto allEnabledDeviceConfigs = argHelper->getAllSupportedDeviceConfigs(); if (allEnabledDeviceConfigs.size() < 2) { GTEST_SKIP(); } auto deviceMapConfig = allEnabledDeviceConfigs[allEnabledDeviceConfigs.size() / 2]; auto configNumConvention = argHelper->parseProductConfigFromValue(deviceMapConfig.config); std::vector expected; auto configFrom = std::find_if(allEnabledDeviceConfigs.begin(), allEnabledDeviceConfigs.end(), [&cf = deviceMapConfig](const DeviceMapping &c) -> bool { return cf.config == c.config; }); expected.insert(expected.end(), configFrom, allEnabledDeviceConfigs.end()); auto configsTarget = configNumConvention + "-"; auto got = NEO::getTargetConfigsForFatbinary(configsTarget, argHelper.get()); EXPECT_EQ(expected.size(), got.size()); for (unsigned int i = 0; i < got.size(); i++) { EXPECT_TRUE(expected[i] == got[i]); } std::stringstream resString; std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", configsTarget}; testing::internal::CaptureStdout(); int retVal = buildFatBinary(argv, argHelper.get()); auto output = testing::internal::GetCapturedStdout(); EXPECT_EQ(retVal, NEO::OclocErrorCode::SUCCESS); for (auto deviceConfig : expected) { auto targetConfig = argHelper->parseProductConfigFromValue(deviceConfig.config); resString << "Build succeeded for : " << targetConfig + ".\n"; } EXPECT_STREQ(output.c_str(), resString.str().c_str()); } TEST_F(OclocFatBinaryGetTargetConfigsForFatbinary, GivenProductConfigOpenRangeToWhenFatBinaryBuildIsInvokedThenSuccessIsReturned) { std::unique_ptr argHelper = std::make_unique(); auto allEnabledDeviceConfigs = argHelper->getAllSupportedDeviceConfigs(); if (allEnabledDeviceConfigs.size() < 2) { GTEST_SKIP(); } auto deviceMapConfig = allEnabledDeviceConfigs[allEnabledDeviceConfigs.size() / 2]; auto configNumConvention = argHelper->parseProductConfigFromValue(deviceMapConfig.config); std::vector expected; for (auto &deviceConfig : allEnabledDeviceConfigs) { if (deviceConfig.config <= deviceMapConfig.config) { expected.push_back(deviceConfig); } } auto configsTarget = "-" + configNumConvention; auto got = NEO::getTargetConfigsForFatbinary(configsTarget, argHelper.get()); EXPECT_EQ(expected.size(), got.size()); for (unsigned int i = 0; i < got.size(); i++) { EXPECT_TRUE(expected[i] == got[i]); } std::stringstream resString; std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", configsTarget}; testing::internal::CaptureStdout(); int retVal = buildFatBinary(argv, argHelper.get()); auto output = testing::internal::GetCapturedStdout(); EXPECT_EQ(retVal, NEO::OclocErrorCode::SUCCESS); for (auto deviceConfig : expected) { auto targetConfig = argHelper->parseProductConfigFromValue(deviceConfig.config); resString << "Build succeeded for : " << targetConfig + ".\n"; } EXPECT_STREQ(output.c_str(), resString.str().c_str()); } TEST_F(OclocFatBinaryGetTargetConfigsForFatbinary, GivenProductConfigClosedRangeWhenFatBinaryBuildIsInvokedThenSuccessIsReturned) { std::unique_ptr argHelper = std::make_unique(); auto allEnabledDeviceConfigs = argHelper->getAllSupportedDeviceConfigs(); if (allEnabledDeviceConfigs.size() < 4) { GTEST_SKIP(); } auto deviceMapConfigFrom = allEnabledDeviceConfigs[1]; auto deviceMapConfigTo = allEnabledDeviceConfigs[allEnabledDeviceConfigs.size() - 2]; auto configFromNumConvention = argHelper->parseProductConfigFromValue(deviceMapConfigFrom.config); auto configToNumConvention = argHelper->parseProductConfigFromValue(deviceMapConfigTo.config); std::vector expected; for (auto &deviceConfig : allEnabledDeviceConfigs) { if (deviceConfig.config >= deviceMapConfigFrom.config && deviceConfig.config <= deviceMapConfigTo.config) { expected.push_back(deviceConfig); } } auto configsTarget = configFromNumConvention + "-" + configToNumConvention; auto got = NEO::getTargetConfigsForFatbinary(configsTarget, argHelper.get()); // swap min with max implicitly EXPECT_EQ(expected.size(), got.size()); for (unsigned int i = 0; i < got.size(); i++) { EXPECT_TRUE(expected[i] == got[i]); } got = NEO::getTargetConfigsForFatbinary(configToNumConvention + "-" + configFromNumConvention, argHelper.get()); // swap min with max implicitly EXPECT_EQ(expected.size(), got.size()); for (unsigned int i = 0; i < got.size(); i++) { EXPECT_TRUE(expected[i] == got[i]); } std::stringstream resString; std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", configsTarget}; testing::internal::CaptureStdout(); int retVal = buildFatBinary(argv, argHelper.get()); auto output = testing::internal::GetCapturedStdout(); EXPECT_EQ(retVal, NEO::OclocErrorCode::SUCCESS); for (auto deviceConfig : expected) { auto targetConfig = argHelper->parseProductConfigFromValue(deviceConfig.config); resString << "Build succeeded for : " << targetConfig + ".\n"; } EXPECT_STREQ(output.c_str(), resString.str().c_str()); } TEST_F(OclocFatBinaryGetTargetConfigsForFatbinary, GivenArgsWhenCorrectDeviceNumerationIsProvidedWithoutRevisionThenTargetsAreFound) { auto allEnabledDeviceConfigs = oclocArgHelperWithoutInput->getAllSupportedDeviceConfigs(); if (allEnabledDeviceConfigs.size() < 2) { GTEST_SKIP(); } std::string configNum0 = oclocArgHelperWithoutInput->parseProductConfigFromValue(allEnabledDeviceConfigs[0].config); auto major_pos = configNum0.find("."); auto minor_pos = configNum0.find(".", ++major_pos); auto cutRevision = configNum0.substr(0, minor_pos); auto got = NEO::getTargetConfigsForFatbinary(ConstStringRef(cutRevision), oclocArgHelperWithoutInput.get()); EXPECT_FALSE(got.empty()); } TEST_F(OclocFatBinaryGetTargetConfigsForFatbinary, GivenArgsWhenCorrectDeviceNumerationIsProvidedWithoutMinorAndRevisionThenTargetsAreFound) { auto allEnabledDeviceConfigs = oclocArgHelperWithoutInput->getAllSupportedDeviceConfigs(); if (allEnabledDeviceConfigs.size() < 2) { GTEST_SKIP(); } std::string configNum0 = oclocArgHelperWithoutInput->parseProductConfigFromValue(allEnabledDeviceConfigs[0].config); auto major_pos = configNum0.find("."); auto cutMinorAndRevision = configNum0.substr(0, major_pos); auto got = NEO::getTargetConfigsForFatbinary(ConstStringRef(cutMinorAndRevision), oclocArgHelperWithoutInput.get()); EXPECT_FALSE(got.empty()); } TEST_F(OclocFatBinaryTest, GivenSpirvInputWhenFatBinaryIsRequestedThenArchiveContainsGenericIrFileWithSpirvContent) { const auto devices = prepareTwoDevices(&mockArgHelper); if (devices.empty()) { GTEST_SKIP(); } const std::vector args = { "ocloc", "-output", outputArchiveName, "-file", spirvFilename, "-output_no_suffix", "-spirv_input", "-device", devices}; const auto buildResult = buildFatBinary(args, &mockArgHelper); ASSERT_EQ(OclocErrorCode::SUCCESS, buildResult); ASSERT_EQ(1u, mockArgHelper.interceptedFiles.count(outputArchiveName)); const auto &rawArchive = mockArgHelper.interceptedFiles[outputArchiveName]; const auto archiveBytes = ArrayRef::fromAny(rawArchive.data(), rawArchive.size()); std::string outErrReason{}; std::string outWarning{}; const auto decodedArchive = NEO::Ar::decodeAr(archiveBytes, outErrReason, outWarning); ASSERT_NE(nullptr, decodedArchive.magic); ASSERT_TRUE(outErrReason.empty()); ASSERT_TRUE(outWarning.empty()); const auto spirvFileIt = searchInArchiveByFilename(decodedArchive, archiveGenericIrName); ASSERT_NE(decodedArchive.files.end(), spirvFileIt); const auto elf = Elf::decodeElf(spirvFileIt->fileData, outErrReason, outWarning); ASSERT_NE(nullptr, elf.elfFileHeader); ASSERT_TRUE(outErrReason.empty()); ASSERT_TRUE(outWarning.empty()); const auto isSpirvSection = [](const auto §ion) { return section.header && section.header->type == Elf::SHT_OPENCL_SPIRV; }; const auto spirvSectionIt = std::find_if(elf.sectionHeaders.begin(), elf.sectionHeaders.end(), isSpirvSection); ASSERT_NE(elf.sectionHeaders.end(), spirvSectionIt); ASSERT_EQ(spirvFileContent.size() + 1, spirvSectionIt->header->size); const auto isSpirvDataEqualsInputFileData = std::memcmp(spirvFileContent.data(), spirvSectionIt->data.begin(), spirvFileContent.size()) == 0; EXPECT_TRUE(isSpirvDataEqualsInputFileData); } TEST_F(OclocFatBinaryTest, GivenSpirvInputAndExcludeIrFlagWhenFatBinaryIsRequestedThenArchiveDoesNotContainGenericIrFile) { const auto devices = prepareTwoDevices(&mockArgHelper); if (devices.empty()) { GTEST_SKIP(); } const std::vector args = { "ocloc", "-output", outputArchiveName, "-file", spirvFilename, "-output_no_suffix", "-spirv_input", "-exclude_ir", "-device", devices}; const auto buildResult = buildFatBinary(args, &mockArgHelper); ASSERT_EQ(OclocErrorCode::SUCCESS, buildResult); ASSERT_EQ(1u, mockArgHelper.interceptedFiles.count(outputArchiveName)); const auto &rawArchive = mockArgHelper.interceptedFiles[outputArchiveName]; const auto archiveBytes = ArrayRef::fromAny(rawArchive.data(), rawArchive.size()); std::string outErrReason{}; std::string outWarning{}; const auto decodedArchive = NEO::Ar::decodeAr(archiveBytes, outErrReason, outWarning); ASSERT_NE(nullptr, decodedArchive.magic); ASSERT_TRUE(outErrReason.empty()); ASSERT_TRUE(outWarning.empty()); const auto spirvFileIt = searchInArchiveByFilename(decodedArchive, archiveGenericIrName); EXPECT_EQ(decodedArchive.files.end(), spirvFileIt); } TEST_F(OclocFatBinaryTest, GivenClInputFileWhenFatBinaryIsRequestedThenArchiveDoesNotContainGenericIrFile) { const auto devices = prepareTwoDevices(&mockArgHelper); if (devices.empty()) { GTEST_SKIP(); } const std::string clFilename = "some_kernel.cl"; mockArgHelperFilesMap[clFilename] = "__kernel void some_kernel(){}"; const std::vector args = { "ocloc", "-output", outputArchiveName, "-file", clFilename, "-output_no_suffix", "-device", devices}; const auto buildResult = buildFatBinary(args, &mockArgHelper); ASSERT_EQ(OclocErrorCode::SUCCESS, buildResult); ASSERT_EQ(1u, mockArgHelper.interceptedFiles.count(outputArchiveName)); const auto &rawArchive = mockArgHelper.interceptedFiles[outputArchiveName]; const auto archiveBytes = ArrayRef::fromAny(rawArchive.data(), rawArchive.size()); std::string outErrReason{}; std::string outWarning{}; const auto decodedArchive = NEO::Ar::decodeAr(archiveBytes, outErrReason, outWarning); ASSERT_NE(nullptr, decodedArchive.magic); ASSERT_TRUE(outErrReason.empty()); ASSERT_TRUE(outWarning.empty()); const auto spirvFileIt = searchInArchiveByFilename(decodedArchive, archiveGenericIrName); EXPECT_EQ(decodedArchive.files.end(), spirvFileIt); } TEST_F(OclocFatBinaryTest, GivenEmptyFileWhenAppendingGenericIrThenInvalidFileIsReturned) { Ar::ArEncoder ar; std::string emptyFile{"empty_file.spv"}; mockArgHelperFilesMap[emptyFile] = ""; mockArgHelper.shouldLoadDataFromFileReturnZeroSize = true; ::testing::internal::CaptureStdout(); const auto errorCode{appendGenericIr(ar, emptyFile, &mockArgHelper)}; const auto output{::testing::internal::GetCapturedStdout()}; EXPECT_EQ(OclocErrorCode::INVALID_FILE, errorCode); EXPECT_EQ("Error! Couldn't read input file!\n", output); } TEST_F(OclocFatBinaryTest, GivenInvalidIrFileWhenAppendingGenericIrThenInvalidFileIsReturned) { Ar::ArEncoder ar; std::string dummyFile{"dummy_file.spv"}; mockArgHelperFilesMap[dummyFile] = "This is not IR!"; ::testing::internal::CaptureStdout(); const auto errorCode{appendGenericIr(ar, dummyFile, &mockArgHelper)}; const auto output{::testing::internal::GetCapturedStdout()}; EXPECT_EQ(OclocErrorCode::INVALID_FILE, errorCode); const auto expectedErrorMessage{"Error! Input file is not in supported generic IR format! " "Currently supported format is SPIR-V.\n"}; EXPECT_EQ(expectedErrorMessage, output); } TEST(OclocFatBinaryHelpersTest, GivenPreviousCompilationErrorWhenBuildingFatbinaryForTargetThenNothingIsDoneAndErrorIsReturned) { const std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", gEnvironment->devicePrefix.c_str()}; MockOfflineCompiler mockOfflineCompiler{}; mockOfflineCompiler.initialize(argv.size(), argv); // We expect that nothing is done and error is returned. // Therefore, if offline compiler is used, ensure that it just returns error code, // which is different than expected one. mockOfflineCompiler.buildReturnValue = OclocErrorCode::SUCCESS; Ar::ArEncoder ar; const std::string pointerSize{"32"}; const auto mockArgHelper = mockOfflineCompiler.uniqueHelper.get(); const auto deviceConfig = getDeviceConfig(mockOfflineCompiler); const int previousReturnValue{OclocErrorCode::INVALID_FILE}; const auto buildResult = buildFatBinaryForTarget(previousReturnValue, argv, pointerSize, ar, &mockOfflineCompiler, mockArgHelper, deviceConfig); EXPECT_EQ(OclocErrorCode::INVALID_FILE, buildResult); EXPECT_EQ(0, mockOfflineCompiler.buildCalledCount); } TEST(OclocFatBinaryHelpersTest, GivenPreviousCompilationSuccessAndFailingBuildWhenBuildingFatbinaryForTargetThenCompilationIsInvokedAndErrorLogIsPrinted) { const std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", gEnvironment->devicePrefix.c_str()}; MockOfflineCompiler mockOfflineCompiler{}; mockOfflineCompiler.initialize(argv.size(), argv); mockOfflineCompiler.buildReturnValue = OclocErrorCode::INVALID_FILE; Ar::ArEncoder ar; const std::string pointerSize{"32"}; const auto mockArgHelper = mockOfflineCompiler.uniqueHelper.get(); const auto deviceConfig = getDeviceConfig(mockOfflineCompiler); ::testing::internal::CaptureStdout(); const int previousReturnValue{OclocErrorCode::SUCCESS}; const auto buildResult = buildFatBinaryForTarget(previousReturnValue, argv, pointerSize, ar, &mockOfflineCompiler, mockArgHelper, deviceConfig); const auto output{::testing::internal::GetCapturedStdout()}; EXPECT_EQ(OclocErrorCode::INVALID_FILE, buildResult); EXPECT_EQ(1, mockOfflineCompiler.buildCalledCount); std::string commandString{}; for (const auto &arg : argv) { commandString += " "; commandString += arg; } const std::string expectedOutput{ "Build failed for : " + deviceConfig + " with error code: -5151\n" "Command was:" + commandString + "\n"}; EXPECT_EQ(expectedOutput, output); } TEST(OclocFatBinaryHelpersTest, GivenNonEmptyBuildLogWhenBuildingFatbinaryForTargetThenBuildLogIsPrinted) { using namespace std::string_literals; const std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", gEnvironment->devicePrefix.c_str()}; MockOfflineCompiler mockOfflineCompiler{}; mockOfflineCompiler.initialize(argv.size(), argv); const char buildWarning[] = "Warning: This is a build log!"; mockOfflineCompiler.updateBuildLog(buildWarning, sizeof(buildWarning)); mockOfflineCompiler.buildReturnValue = OclocErrorCode::SUCCESS; // Dummy value mockOfflineCompiler.elfBinary = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; Ar::ArEncoder ar; const std::string pointerSize{"32"}; const auto mockArgHelper = mockOfflineCompiler.uniqueHelper.get(); const auto deviceConfig = getDeviceConfig(mockOfflineCompiler); ::testing::internal::CaptureStdout(); const int previousReturnValue{OclocErrorCode::SUCCESS}; const auto buildResult = buildFatBinaryForTarget(previousReturnValue, argv, pointerSize, ar, &mockOfflineCompiler, mockArgHelper, deviceConfig); const auto output{::testing::internal::GetCapturedStdout()}; EXPECT_EQ(OclocErrorCode::SUCCESS, buildResult); EXPECT_EQ(1, mockOfflineCompiler.buildCalledCount); const std::string expectedOutput{buildWarning + "\nBuild succeeded for : "s + deviceConfig + ".\n"s}; EXPECT_EQ(expectedOutput, output); } TEST(OclocFatBinaryHelpersTest, GivenQuietModeWhenBuildingFatbinaryForTargetThenNothingIsPrinted) { using namespace std::string_literals; const std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-q", "-device", gEnvironment->devicePrefix.c_str()}; MockOfflineCompiler mockOfflineCompiler{}; mockOfflineCompiler.initialize(argv.size(), argv); // Dummy value mockOfflineCompiler.elfBinary = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; mockOfflineCompiler.buildReturnValue = OclocErrorCode::SUCCESS; Ar::ArEncoder ar; const std::string pointerSize{"32"}; const auto mockArgHelper = mockOfflineCompiler.uniqueHelper.get(); const auto deviceConfig = getDeviceConfig(mockOfflineCompiler); ::testing::internal::CaptureStdout(); const int previousReturnValue{OclocErrorCode::SUCCESS}; const auto buildResult = buildFatBinaryForTarget(previousReturnValue, argv, pointerSize, ar, &mockOfflineCompiler, mockArgHelper, deviceConfig); const auto output{::testing::internal::GetCapturedStdout()}; EXPECT_EQ(OclocErrorCode::SUCCESS, buildResult); EXPECT_EQ(1, mockOfflineCompiler.buildCalledCount); EXPECT_TRUE(output.empty()) << output; } } // namespace NEOcompute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/ocloc_fatbinary_tests.h000066400000000000000000000024001422164147700316700ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/offline_compiler/source/ocloc_arg_helper.h" #include "shared/offline_compiler/source/ocloc_fatbinary.h" #include "gtest/gtest.h" #include "mock/mock_argument_helper.h" #include namespace NEO { class OclocFatBinaryGetTargetConfigsForFatbinary : public ::testing::Test { public: OclocFatBinaryGetTargetConfigsForFatbinary() { oclocArgHelperWithoutInput = std::make_unique(); oclocArgHelperWithoutInput->getPrinterRef() = MessagePrinter{true}; } std::unique_ptr oclocArgHelperWithoutInput; }; class OclocFatBinaryTest : public ::testing::Test { public: OclocFatBinaryTest() { mockArgHelperFilesMap[spirvFilename] = spirvFileContent; mockArgHelper.interceptOutput = true; } protected: constexpr static ConstStringRef archiveGenericIrName{"generic_ir"}; MockOclocArgHelper::FilesMap mockArgHelperFilesMap{}; MockOclocArgHelper mockArgHelper{mockArgHelperFilesMap}; std::string outputArchiveName{"output_archive"}; std::string spirvFilename{"input_file.spv"}; std::string spirvFileContent{"\x07\x23\x02\x03"}; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/ocloc_product_config_tests.cpp000066400000000000000000000022321422164147700332540ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/offline_compiler/ocloc_product_config_tests.h" namespace NEO { TEST_P(OclocProductConfigTests, GivenProductConfigValuesWhenInitHardwareInfoThenCorrectValuesAreSet) { auto deviceId = 0u; auto revId = 0u; auto allSupportedDeviceConfigs = mockOfflineCompiler->argHelper->getAllSupportedDeviceConfigs(); for (const auto &deviceConfig : allSupportedDeviceConfigs) { if (productConfig == deviceConfig.config) { if (deviceConfig.deviceIds) { deviceId = deviceConfig.deviceIds->front(); } revId = deviceConfig.revId; break; } } mockOfflineCompiler->deviceName = mockOfflineCompiler->argHelper->parseProductConfigFromValue(productConfig); mockOfflineCompiler->initHardwareInfo(mockOfflineCompiler->deviceName); EXPECT_EQ(mockOfflineCompiler->hwInfo.platform.eProductFamily, productFamily); EXPECT_EQ(mockOfflineCompiler->hwInfo.platform.usRevId, revId); EXPECT_EQ(mockOfflineCompiler->hwInfo.platform.usDeviceID, deviceId); } } // namespace NEOcompute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/ocloc_product_config_tests.h000066400000000000000000000012251422164147700327220ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/offline_compiler/mock/mock_offline_compiler.h" namespace NEO { struct OclocProductConfigTests : public ::testing::TestWithParam> { void SetUp() override { std::tie(productConfig, productFamily) = GetParam(); mockOfflineCompiler = std::make_unique(); } PRODUCT_CONFIG productConfig; PRODUCT_FAMILY productFamily; std::unique_ptr mockOfflineCompiler; }; } // namespace NEOcompute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/ocloc_tests_configuration.cpp000066400000000000000000000003121422164147700331130ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ namespace NEO { unsigned int ultIterationMaxTime = 45; const char *executionName = "OCLOC"; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/ocloc_validator_tests.cpp000066400000000000000000000143351422164147700322430ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/offline_compiler/source/ocloc_validator.h" #include "shared/source/device_binary_format/device_binary_formats.h" #include "shared/test/unit_test/device_binary_format/zebin_tests.h" #include "opencl/test/unit_test/offline_compiler/mock/mock_argument_helper.h" #include "gtest/gtest.h" TEST(OclocValidate, WhenFileArgIsMissingThenFail) { std::map files; MockOclocArgHelper argHelper{files}; argHelper.getPrinterRef() = MessagePrinter(true); int res = NEO::Ocloc::validate({}, &argHelper); EXPECT_EQ(-1, res); std::string oclocStdout = argHelper.getPrinterRef().getLog().str(); EXPECT_STREQ("Error : Mandatory argument -file is missing.\n", oclocStdout.c_str()); } TEST(OclocValidate, WhenInputFileIsMissingThenFail) { MockOclocArgHelper::FilesMap files; MockOclocArgHelper argHelper{files}; argHelper.getPrinterRef() = MessagePrinter(true); int res = NEO::Ocloc::validate({"-file", "src.gen"}, &argHelper); EXPECT_EQ(-1, res); std::string oclocStdout = argHelper.getPrinterRef().getLog().str(); EXPECT_STREQ("Error : Input file missing : src.gen\n", oclocStdout.c_str()); } TEST(OclocValidate, WhenInputFileIsAvailableThenLogItsSize) { MockOclocArgHelper::FilesMap files{{"src.gen", "01234567"}}; MockOclocArgHelper argHelper{files}; argHelper.getPrinterRef() = MessagePrinter(true); int res = NEO::Ocloc::validate({"-file", "src.gen"}, &argHelper); EXPECT_NE(0, res); std::string oclocStdout = argHelper.getPrinterRef().getLog().str(); EXPECT_NE(nullptr, strstr(oclocStdout.c_str(), "Validating : src.gen (8 bytes).\n")) << oclocStdout; } TEST(OclocValidate, WhenInputFileIsNotZebinThenFail) { MockOclocArgHelper::FilesMap files{{"src.gen", "01234567"}}; MockOclocArgHelper argHelper{files}; argHelper.getPrinterRef() = MessagePrinter(true); int res = NEO::Ocloc::validate({"-file", "src.gen"}, &argHelper); EXPECT_EQ(-2, res); std::string oclocStdout = argHelper.getPrinterRef().getLog().str(); EXPECT_NE(nullptr, strstr(oclocStdout.c_str(), "Input is not a Zebin file (not elf or wrong elf object file type)")) << oclocStdout; } TEST(OclocValidate, WhenInputIsValidZebinThenReturnSucceed) { ZebinTestData::ValidEmptyProgram zebin; MockOclocArgHelper::FilesMap files{{"src.gen", MockOclocArgHelper::FileData(reinterpret_cast(zebin.storage.data()), reinterpret_cast(zebin.storage.data()) + zebin.storage.size())}}; MockOclocArgHelper argHelper{files}; argHelper.getPrinterRef() = MessagePrinter(true); int res = NEO::Ocloc::validate({"-file", "src.gen"}, &argHelper); std::string oclocStdout = argHelper.getPrinterRef().getLog().str(); EXPECT_EQ(0, res) << oclocStdout; } TEST(OclocValidate, WhenWarningsEmitedThenRedirectsThemToStdout) { ZebinTestData::ValidEmptyProgram zebin; zebin.removeSection(NEO::Elf::SHT_ZEBIN_ZEINFO, NEO::Elf::SectionsNamesZebin::zeInfo); MockOclocArgHelper::FilesMap files{{"src.gen", MockOclocArgHelper::FileData(reinterpret_cast(zebin.storage.data()), reinterpret_cast(zebin.storage.data()) + zebin.storage.size())}}; MockOclocArgHelper argHelper{files}; argHelper.getPrinterRef() = MessagePrinter(true); int res = NEO::Ocloc::validate({"-file", "src.gen"}, &argHelper); std::string oclocStdout = argHelper.getPrinterRef().getLog().str(); EXPECT_EQ(0, res) << oclocStdout; EXPECT_NE(nullptr, strstr(oclocStdout.c_str(), "Validator detected potential problems :\nDeviceBinaryFormat::Zebin : Expected at least one .ze_info section, got 0")) << oclocStdout; } TEST(OclocValidate, WhenErrorsEmitedThenRedirectsThemToStdout) { ZebinTestData::ValidEmptyProgram zebin; zebin.removeSection(NEO::Elf::SHT_ZEBIN_ZEINFO, NEO::Elf::SectionsNamesZebin::zeInfo); std::string zeInfo = "version:" + toString(NEO::zeInfoDecoderVersion) + "\nkernels : \nkernels :\n"; zebin.appendSection(NEO::Elf::SHT_ZEBIN_ZEINFO, NEO::Elf::SectionsNamesZebin::zeInfo, ArrayRef(zeInfo).toArrayRef()); MockOclocArgHelper::FilesMap files{{"src.gen", MockOclocArgHelper::FileData(reinterpret_cast(zebin.storage.data()), reinterpret_cast(zebin.storage.data()) + zebin.storage.size())}}; MockOclocArgHelper argHelper{files}; argHelper.getPrinterRef() = MessagePrinter(true); int res = NEO::Ocloc::validate({"-file", "src.gen"}, &argHelper); std::string oclocStdout = argHelper.getPrinterRef().getLog().str(); EXPECT_EQ(static_cast(NEO::DecodeError::InvalidBinary), res) << oclocStdout; EXPECT_NE(nullptr, strstr(oclocStdout.c_str(), "Validator detected errors :\nDeviceBinaryFormat::Zebin::.ze_info : Expected at most one kernels entry in global scope of .ze_info, got : 2")) << oclocStdout; } TEST(OclocValidate, givenDeviceProductTableEveryProductMatchesProperPattern) { MockOclocArgHelper::FilesMap files{{"src.gen", "01234567"}}; MockOclocArgHelper argHelper{files}; ASSERT_GE(argHelper.deviceProductTable.size(), 1u); std::vector genPatterns; for (int j = 0; j < IGFX_MAX_PRODUCT; j++) { if (NEO::hardwarePrefix[j] == nullptr) continue; genPatterns.push_back(NEO::hardwarePrefix[j]); } ASSERT_GE(genPatterns.size(), 1u); if (argHelper.deviceProductTable.size() == 1 && argHelper.deviceProductTable[0].deviceId == 0) { auto &deviceProductTable = const_cast &>(argHelper.deviceProductTable); deviceProductTable[0].product = genPatterns[0]; deviceProductTable[0].deviceId = 0x123; deviceProductTable.push_back(DeviceProduct{0, ""}); } for (int i = 0; argHelper.deviceProductTable[i].deviceId != 0; i++) { auto res = std::find(genPatterns.begin(), genPatterns.end(), argHelper.returnProductNameForDevice(argHelper.deviceProductTable[i].deviceId)); EXPECT_NE(res, genPatterns.end()); } } compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/offline_compiler_tests.cpp000066400000000000000000003001741422164147700324120ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "offline_compiler_tests.h" #include "shared/source/compiler_interface/intermediate_representations.h" #include "shared/source/compiler_interface/oclc_extensions.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/device_binary_format/elf/elf_decoder.h" #include "shared/source/device_binary_format/elf/ocl_elf.h" #include "shared/source/helpers/compiler_hw_info_config.h" #include "shared/source/helpers/file_io.h" #include "shared/source/helpers/hw_info.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/mocks/mock_compilers.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/device_binary_format/zebin_tests.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "compiler_options.h" #include "environment.h" #include "gtest/gtest.h" #include "hw_cmds.h" #include "mock/mock_argument_helper.h" #include "mock/mock_multi_command.h" #include "mock/mock_offline_compiler.h" #include #include #include #include extern Environment *gEnvironment; namespace NEO { void MultiCommandTests::createFileWithArgs(const std::vector &singleArgs, int numOfBuild) { std::ofstream myfile(nameOfFileWithArgs); if (myfile.is_open()) { for (int i = 0; i < numOfBuild; i++) { for (auto singleArg : singleArgs) myfile << singleArg + " "; myfile << std::endl; } myfile.close(); } else printf("Unable to open file\n"); } void MultiCommandTests::deleteFileWithArgs() { if (remove(nameOfFileWithArgs.c_str()) != 0) perror("Error deleting file"); } void MultiCommandTests::deleteOutFileList() { if (remove(outFileList.c_str()) != 0) perror("Error deleting file"); } std::string getCompilerOutputFileName(const std::string &fileName, const std::string &type) { std::string fName(fileName); fName.append("_"); fName.append(gEnvironment->familyNameWithType); fName.append("."); fName.append(type); return fName; } bool compilerOutputExists(const std::string &fileName, const std::string &type) { return fileExists(getCompilerOutputFileName(fileName, type)); } void compilerOutputRemove(const std::string &fileName, const std::string &type) { std::remove(getCompilerOutputFileName(fileName, type).c_str()); } template bool isAnyIrSectionDefined(const SectionHeaders §ionHeaders) { const auto isIrSection = [](const auto §ion) { return section.header && (section.header->type == Elf::SHT_OPENCL_SPIRV || section.header->type == Elf::SHT_OPENCL_LLVM_BINARY); }; return std::any_of(sectionHeaders.begin(), sectionHeaders.end(), isIrSection); } TEST_F(MultiCommandTests, WhenBuildingMultiCommandThenSuccessIsReturned) { nameOfFileWithArgs = "test_files/ImAMulitiComandMinimalGoodFile.txt"; std::vector argv = { "ocloc", "multi", nameOfFileWithArgs.c_str(), "-q", }; std::vector singleArgs = { "-file", "test_files/copybuffer.cl", "-device", gEnvironment->devicePrefix.c_str()}; int numOfBuild = 4; createFileWithArgs(singleArgs, numOfBuild); auto pMultiCommand = std::unique_ptr(MultiCommand::create(argv, retVal, oclocArgHelperWithoutInput.get())); EXPECT_NE(nullptr, pMultiCommand); EXPECT_EQ(CL_SUCCESS, retVal); deleteFileWithArgs(); } TEST_F(MultiCommandTests, GivenOutputFileWhenBuildingMultiCommandThenSuccessIsReturned) { nameOfFileWithArgs = "test_files/ImAMulitiComandMinimalGoodFile.txt"; std::vector argv = { "ocloc", "multi", nameOfFileWithArgs.c_str(), "-q", }; std::vector singleArgs = { "-file", "test_files/copybuffer.cl", "-device", gEnvironment->devicePrefix.c_str()}; int numOfBuild = 4; createFileWithArgs(singleArgs, numOfBuild); auto pMultiCommand = std::unique_ptr(MultiCommand::create(argv, retVal, oclocArgHelperWithoutInput.get())); EXPECT_NE(nullptr, pMultiCommand); EXPECT_EQ(CL_SUCCESS, retVal); for (int i = 0; i < numOfBuild; i++) { std::string outFileName = pMultiCommand->outDirForBuilds + "/build_no_" + std::to_string(i + 1); EXPECT_TRUE(compilerOutputExists(outFileName, "bc") || compilerOutputExists(outFileName, "spv")); EXPECT_TRUE(compilerOutputExists(outFileName, "gen")); EXPECT_TRUE(compilerOutputExists(outFileName, "bin")); } deleteFileWithArgs(); } TEST_F(MultiCommandTests, GivenSpecifiedOutputDirWhenBuildingMultiCommandThenSuccessIsReturned) { nameOfFileWithArgs = "test_files/ImAMulitiComandMinimalGoodFile.txt"; std::vector argv = { "ocloc", "multi", nameOfFileWithArgs.c_str(), "-q", }; std::vector singleArgs = { "-file", "test_files/copybuffer.cl", "-device", gEnvironment->devicePrefix.c_str(), "-out_dir", "offline_compiler_test"}; int numOfBuild = 4; createFileWithArgs(singleArgs, numOfBuild); pMultiCommand = MultiCommand::create(argv, retVal, oclocArgHelperWithoutInput.get()); EXPECT_NE(nullptr, pMultiCommand); EXPECT_EQ(CL_SUCCESS, retVal); for (int i = 0; i < numOfBuild; i++) { std::string outFileName = "offline_compiler_test/build_no_" + std::to_string(i + 1); EXPECT_TRUE(compilerOutputExists(outFileName, "bc") || compilerOutputExists(outFileName, "spv")); EXPECT_TRUE(compilerOutputExists(outFileName, "gen")); EXPECT_TRUE(compilerOutputExists(outFileName, "bin")); } deleteFileWithArgs(); delete pMultiCommand; } TEST_F(MultiCommandTests, GivenSpecifiedOutputDirWithProductConfigValueWhenBuildingMultiCommandThenSuccessIsReturned) { auto allEnabledDeviceConfigs = oclocArgHelperWithoutInput->getAllSupportedDeviceConfigs(); if (allEnabledDeviceConfigs.empty()) { GTEST_SKIP(); } std::string configStr; for (auto &deviceMapConfig : allEnabledDeviceConfigs) { if (productFamily == deviceMapConfig.hwInfo->platform.eProductFamily) { configStr = oclocArgHelperWithoutInput->parseProductConfigFromValue(deviceMapConfig.config); break; } } nameOfFileWithArgs = "test_files/ImAMulitiComandMinimalGoodFile.txt"; std::vector argv = { "ocloc", "multi", nameOfFileWithArgs.c_str(), "-q", }; std::vector singleArgs = { "-file", "test_files/copybuffer.cl", "-device", configStr, "-out_dir", "offline_compiler_test"}; int numOfBuild = 4; createFileWithArgs(singleArgs, numOfBuild); pMultiCommand = MultiCommand::create(argv, retVal, oclocArgHelperWithoutInput.get()); EXPECT_NE(nullptr, pMultiCommand); EXPECT_EQ(CL_SUCCESS, retVal); for (int i = 0; i < numOfBuild; i++) { std::string outFileName = "offline_compiler_test/build_no_" + std::to_string(i + 1); EXPECT_TRUE(compilerOutputExists(outFileName, "bc") || compilerOutputExists(outFileName, "spv")); EXPECT_TRUE(compilerOutputExists(outFileName, "gen")); EXPECT_TRUE(compilerOutputExists(outFileName, "bin")); } deleteFileWithArgs(); delete pMultiCommand; } TEST_F(MultiCommandTests, GivenMissingTextFileWithArgsWhenBuildingMultiCommandThenInvalidFileErrorIsReturned) { nameOfFileWithArgs = "test_files/ImANotExistedComandFile.txt"; std::vector argv = { "ocloc", "multi", "test_files/ImANaughtyFile.txt", "-q", }; testing::internal::CaptureStdout(); auto pMultiCommand = std::unique_ptr(MultiCommand::create(argv, retVal, oclocArgHelperWithoutInput.get())); std::string output = testing::internal::GetCapturedStdout(); EXPECT_STRNE(output.c_str(), ""); EXPECT_EQ(nullptr, pMultiCommand); EXPECT_EQ(OclocErrorCode::INVALID_FILE, retVal); DebugManager.flags.PrintDebugMessages.set(false); } TEST_F(MultiCommandTests, GivenLackOfClFileWhenBuildingMultiCommandThenInvalidFileErrorIsReturned) { nameOfFileWithArgs = "test_files/ImAMulitiComandMinimalGoodFile.txt"; std::vector argv = { "ocloc", "multi", nameOfFileWithArgs.c_str(), "-q", }; std::vector singleArgs = { "-file", "test_files/ImANaughtyFile.cl", "-device", gEnvironment->devicePrefix.c_str()}; int numOfBuild = 4; createFileWithArgs(singleArgs, numOfBuild); testing::internal::CaptureStdout(); auto pMultiCommand = std::unique_ptr(MultiCommand::create(argv, retVal, oclocArgHelperWithoutInput.get())); std::string output = testing::internal::GetCapturedStdout(); EXPECT_EQ(nullptr, pMultiCommand); EXPECT_EQ(OclocErrorCode::INVALID_FILE, retVal); DebugManager.flags.PrintDebugMessages.set(false); deleteFileWithArgs(); } TEST_F(MultiCommandTests, GivenOutputFileListFlagWhenBuildingMultiCommandThenSuccessIsReturned) { nameOfFileWithArgs = "test_files/ImAMulitiComandMinimalGoodFile.txt"; std::vector argv = { "ocloc", "multi", nameOfFileWithArgs.c_str(), "-q", "-output_file_list", "outFileList.txt", }; std::vector singleArgs = { "-file", "test_files/copybuffer.cl", "-device", gEnvironment->devicePrefix.c_str()}; int numOfBuild = 4; createFileWithArgs(singleArgs, numOfBuild); pMultiCommand = MultiCommand::create(argv, retVal, oclocArgHelperWithoutInput.get()); EXPECT_NE(nullptr, pMultiCommand); EXPECT_EQ(CL_SUCCESS, retVal); outFileList = pMultiCommand->outputFileList; EXPECT_TRUE(fileExists(outFileList)); for (int i = 0; i < numOfBuild; i++) { std::string outFileName = pMultiCommand->outDirForBuilds + "/build_no_" + std::to_string(i + 1); EXPECT_TRUE(compilerOutputExists(outFileName, "bc") || compilerOutputExists(outFileName, "spv")); EXPECT_TRUE(compilerOutputExists(outFileName, "gen")); EXPECT_TRUE(compilerOutputExists(outFileName, "bin")); } deleteFileWithArgs(); deleteOutFileList(); delete pMultiCommand; } TEST(MultiCommandWhiteboxTest, GivenVerboseModeWhenShowingResultsThenLogsArePrintedForEachBuild) { MockMultiCommand mockMultiCommand{}; mockMultiCommand.retValues = {OclocErrorCode::SUCCESS, OclocErrorCode::INVALID_FILE}; mockMultiCommand.quiet = false; ::testing::internal::CaptureStdout(); const auto result = mockMultiCommand.showResults(); const auto output = testing::internal::GetCapturedStdout(); const auto maskedResult = result | OclocErrorCode::INVALID_FILE; EXPECT_NE(OclocErrorCode::SUCCESS, result); EXPECT_EQ(OclocErrorCode::INVALID_FILE, maskedResult); const auto expectedOutput{"Build command 0: successful\n" "Build command 1: failed. Error code: -5151\n"}; EXPECT_EQ(expectedOutput, output); } TEST(MultiCommandWhiteboxTest, GivenVerboseModeAndDefinedOutputFilenameAndDirectoryWhenAddingAdditionalOptionsToSingleCommandLineThenNothingIsDone) { MockMultiCommand mockMultiCommand{}; mockMultiCommand.quiet = false; std::vector singleArgs = { "-file", "test_files/copybuffer.cl", "-output", "SpecialOutputFilename", "-out_dir", "SomeOutputDirectory", "-device", gEnvironment->devicePrefix.c_str()}; const auto singleArgsCopy{singleArgs}; const size_t buildId{0}; ::testing::internal::CaptureStdout(); mockMultiCommand.addAdditionalOptionsToSingleCommandLine(singleArgs, buildId); const auto output = testing::internal::GetCapturedStdout(); EXPECT_EQ(singleArgsCopy, singleArgs); } TEST(MultiCommandWhiteboxTest, GivenHelpArgumentsWhenInitializingThenHelpIsPrinted) { MockMultiCommand mockMultiCommand{}; mockMultiCommand.quiet = false; std::vector singleArgs = { "--help"}; const auto args{singleArgs}; ::testing::internal::CaptureStdout(); const auto result = mockMultiCommand.initialize(args); const auto output = testing::internal::GetCapturedStdout(); const auto expectedOutput = R"===(Compiles multiple files using a config file. Usage: ocloc multi Input file containing a list of arguments for subsequent ocloc invocations. Expected format of each line inside such file is: '-file -device [compile_options]'. See 'ocloc compile --help' for available compile_options. Results of subsequent compilations will be dumped into a directory with name indentical file_name's base name. -output_file_list Name of optional file containing paths to outputs .bin files )==="; EXPECT_EQ(expectedOutput, output); EXPECT_EQ(-1, result); } TEST(MultiCommandWhiteboxTest, GivenCommandLineWithApostrophesWhenSplittingLineInSeparateArgsThenTextBetweenApostrophesIsReadAsSingleArg) { MockMultiCommand mockMultiCommand{}; mockMultiCommand.quiet = false; const std::string commandLine{" -out_dir \"Some Directory\" -output \'Some Filename\'"}; std::vector outputArgs{}; const std::size_t numberOfBuild{0}; ::testing::internal::CaptureStdout(); const auto result = mockMultiCommand.splitLineInSeparateArgs(outputArgs, commandLine, numberOfBuild); const auto output = testing::internal::GetCapturedStdout(); EXPECT_EQ(OclocErrorCode::SUCCESS, result); EXPECT_TRUE(output.empty()) << output; ASSERT_EQ(4u, outputArgs.size()); EXPECT_EQ("-out_dir", outputArgs[0]); EXPECT_EQ("Some Directory", outputArgs[1]); EXPECT_EQ("-output", outputArgs[2]); EXPECT_EQ("Some Filename", outputArgs[3]); } TEST(MultiCommandWhiteboxTest, GivenCommandLineWithMissingApostropheWhenSplittingLineInSeparateArgsThenErrorIsReturned) { MockMultiCommand mockMultiCommand{}; mockMultiCommand.quiet = false; const std::string commandLine{"-out_dir \"Some Directory"}; std::vector outputArgs{}; const std::size_t numberOfBuild{0}; ::testing::internal::CaptureStdout(); const auto result = mockMultiCommand.splitLineInSeparateArgs(outputArgs, commandLine, numberOfBuild); const auto output = testing::internal::GetCapturedStdout(); EXPECT_EQ(OclocErrorCode::INVALID_FILE, result); const auto expectedOutput = "One of the quotes is open in build number 1\n"; EXPECT_EQ(expectedOutput, output); } TEST(MultiCommandWhiteboxTest, GivenArgsWithQuietModeAndEmptyMulticomandFileWhenInitializingThenQuietFlagIsSetAndErrorIsReturned) { MockMultiCommand mockMultiCommand{}; mockMultiCommand.quiet = false; mockMultiCommand.uniqueHelper->callBaseFileExists = false; mockMultiCommand.uniqueHelper->callBaseReadFileToVectorOfStrings = false; mockMultiCommand.uniqueHelper->shouldReturnEmptyVectorOfStrings = true; mockMultiCommand.filesMap["commands.txt"] = ""; const std::vector args = { "ocloc", "multi", "commands.txt", "-q"}; ::testing::internal::CaptureStdout(); const auto result = mockMultiCommand.initialize(args); const auto output = testing::internal::GetCapturedStdout(); EXPECT_EQ(OclocErrorCode::INVALID_FILE, result); const auto expectedOutput = "Command file was empty.\n"; EXPECT_EQ(expectedOutput, output); } TEST(MockOfflineCompilerTests, givenProductConfigValueWhenInitHwInfoThenResetGtSystemInfo) { MockOfflineCompiler mockOfflineCompiler; auto allEnabledDeviceConfigs = mockOfflineCompiler.argHelper->getAllSupportedDeviceConfigs(); if (allEnabledDeviceConfigs.empty()) { GTEST_SKIP(); } auto expectedRevId = 0u; for (auto &deviceMapConfig : allEnabledDeviceConfigs) { if (productFamily == deviceMapConfig.hwInfo->platform.eProductFamily) { mockOfflineCompiler.deviceName = mockOfflineCompiler.argHelper->parseProductConfigFromValue(deviceMapConfig.config); expectedRevId = deviceMapConfig.revId; } } EXPECT_FALSE(mockOfflineCompiler.deviceName.empty()); mockOfflineCompiler.initHardwareInfo(mockOfflineCompiler.deviceName); GT_SYSTEM_INFO expectedGtSystemInfo = {0}; EXPECT_EQ(mockOfflineCompiler.hwInfo.platform.usRevId, expectedRevId); EXPECT_EQ(mockOfflineCompiler.hwInfo.platform.eProductFamily, productFamily); EXPECT_EQ(memcmp(&mockOfflineCompiler.hwInfo.gtSystemInfo, &expectedGtSystemInfo, sizeof(GT_SYSTEM_INFO)), 0); } TEST_F(OfflineCompilerTests, GivenHelpOptionOnQueryThenSuccessIsReturned) { std::vector argv = { "ocloc", "query", "--help"}; testing::internal::CaptureStdout(); int retVal = OfflineCompiler::query(argv.size(), argv, oclocArgHelperWithoutInput.get()); std::string output = testing::internal::GetCapturedStdout(); EXPECT_STREQ(OfflineCompiler::queryHelp.data(), output.c_str()); EXPECT_EQ(OclocErrorCode::SUCCESS, retVal); } TEST_F(OfflineCompilerTests, GivenFlagsWhichRequireMoreArgsWithoutThemWhenParsingThenErrorIsReported) { const std::array flagsToTest = { "-file", "-output", "-device", "-options", "-internal_options", "-out_dir", "-revision_id"}; for (const auto &flag : flagsToTest) { const std::vector argv = { "ocloc", "compile", flag}; MockOfflineCompiler mockOfflineCompiler{}; ::testing::internal::CaptureStdout(); const auto result = mockOfflineCompiler.parseCommandLine(argv.size(), argv); const auto output{::testing::internal::GetCapturedStdout()}; EXPECT_EQ(OclocErrorCode::INVALID_COMMAND_LINE, result); const std::string expectedErrorMessage{"Invalid option (arg 2): " + flag + "\n"}; EXPECT_EQ(expectedErrorMessage, output); } } TEST_F(OfflineCompilerTests, Given32BitModeFlagWhenParsingThenInternalOptionsContain32BitModeFlag) { const std::array flagsToTest = { "-32", CompilerOptions::arch32bit.str()}; for (const auto &flag : flagsToTest) { const std::vector argv = { "ocloc", "compile", "-file", "test_files/copybuffer.cl", flag, "-device", gEnvironment->devicePrefix.c_str()}; MockOfflineCompiler mockOfflineCompiler{}; const auto result = mockOfflineCompiler.parseCommandLine(argv.size(), argv); EXPECT_EQ(OclocErrorCode::SUCCESS, result); const auto is32BitModeSet{mockOfflineCompiler.internalOptions.find(CompilerOptions::arch32bit.data()) != std::string::npos}; EXPECT_TRUE(is32BitModeSet); } } TEST_F(OfflineCompilerTests, Given64BitModeFlagWhenParsingThenInternalOptionsContain64BitModeFlag) { const std::array flagsToTest = { "-64", CompilerOptions::arch64bit.str()}; for (const auto &flag : flagsToTest) { const std::vector argv = { "ocloc", "compile", "-file", "test_files/copybuffer.cl", flag, "-device", gEnvironment->devicePrefix.c_str()}; MockOfflineCompiler mockOfflineCompiler{}; const auto result = mockOfflineCompiler.parseCommandLine(argv.size(), argv); EXPECT_EQ(OclocErrorCode::SUCCESS, result); const auto is64BitModeSet{mockOfflineCompiler.internalOptions.find(CompilerOptions::arch64bit.data()) != std::string::npos}; EXPECT_TRUE(is64BitModeSet); } } TEST_F(OfflineCompilerTests, Given32BitModeFlagAnd64BitModeFlagWhenParsingThenErrorLogIsPrintedAndFailureIsReturned) { const std::vector argv = { "ocloc", "compile", "-file", "test_files/copybuffer.cl", "-32", "-64", "-device", gEnvironment->devicePrefix.c_str()}; MockOfflineCompiler mockOfflineCompiler{}; ::testing::internal::CaptureStdout(); const auto result = mockOfflineCompiler.parseCommandLine(argv.size(), argv); const auto output{::testing::internal::GetCapturedStdout()}; EXPECT_NE(OclocErrorCode::SUCCESS, result); const auto maskedResult = result | OclocErrorCode::INVALID_COMMAND_LINE; EXPECT_EQ(OclocErrorCode::INVALID_COMMAND_LINE, maskedResult); const std::string expectedErrorMessage{"Error: Cannot compile for 32-bit and 64-bit, please choose one.\n"}; EXPECT_EQ(expectedErrorMessage, output); } TEST_F(OfflineCompilerTests, GivenFlagStringWhenParsingThenInternalBooleanIsSetAndSuccessIsReturned) { using namespace std::string_literals; const std::array flagsToTest = { std::pair{"-options_name"s, &MockOfflineCompiler::useOptionsSuffix}, std::pair{"-gen_file"s, &MockOfflineCompiler::useGenFile}, std::pair{"-llvm_bc"s, &MockOfflineCompiler::useLlvmBc}}; for (const auto &[flagString, memberBoolean] : flagsToTest) { const std::vector argv = { "ocloc", "compile", "-file", "test_files/copybuffer.cl", flagString, "-device", gEnvironment->devicePrefix.c_str()}; MockOfflineCompiler mockOfflineCompiler{}; const auto result = mockOfflineCompiler.parseCommandLine(argv.size(), argv); EXPECT_EQ(OclocErrorCode::SUCCESS, result); EXPECT_TRUE(mockOfflineCompiler.*memberBoolean); } } TEST_F(OfflineCompilerTests, GivenArgsWhenQueryIsCalledThenSuccessIsReturned) { std::vector argv = { "ocloc", "query", "NEO_REVISION"}; int retVal = OfflineCompiler::query(argv.size(), argv, oclocArgHelperWithoutInput.get()); EXPECT_EQ(OclocErrorCode::SUCCESS, retVal); } TEST_F(OfflineCompilerTests, GivenArgsWhenOfflineCompilerIsCreatedThenSuccessIsReturned) { std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", gEnvironment->devicePrefix.c_str()}; pOfflineCompiler = OfflineCompiler::create(argv.size(), argv, true, retVal, oclocArgHelperWithoutInput.get()); EXPECT_NE(nullptr, pOfflineCompiler); EXPECT_EQ(CL_SUCCESS, retVal); delete pOfflineCompiler; } TEST_F(OfflineCompilerTests, givenDeviceIdHexValueWhenInitHwInfoThenItHasCorrectlySetValues) { auto deviceId = oclocArgHelperWithoutInput->deviceProductTable[0].deviceId; if (oclocArgHelperWithoutInput->deviceProductTable.size() == 1 && deviceId == 0) { GTEST_SKIP(); } MockOfflineCompiler mockOfflineCompiler; std::stringstream deviceString, productString; deviceString << "0x" << std::hex << deviceId; mockOfflineCompiler.initHardwareInfo(deviceString.str()); EXPECT_EQ(mockOfflineCompiler.hwInfo.platform.usDeviceID, deviceId); } TEST_F(OfflineCompilerTests, givenProperDeviceIdHexAsDeviceArgumentThenSuccessIsReturned) { auto deviceId = oclocArgHelperWithoutInput->deviceProductTable[0].deviceId; if (oclocArgHelperWithoutInput->deviceProductTable.size() == 1 && deviceId == 0) { GTEST_SKIP(); } std::stringstream deviceString, productString; deviceString << "0x" << std::hex << deviceId; productString << oclocArgHelperWithoutInput->deviceProductTable[0].product; std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", deviceString.str()}; testing::internal::CaptureStdout(); pOfflineCompiler = OfflineCompiler::create(argv.size(), argv, true, retVal, oclocArgHelperWithoutInput.get()); EXPECT_EQ(pOfflineCompiler->getHardwareInfo().platform.usDeviceID, deviceId); auto output = testing::internal::GetCapturedStdout(); std::stringstream resString; resString << "Auto-detected target based on " << deviceString.str() << " device id: " << productString.str() << "\n"; EXPECT_NE(nullptr, pOfflineCompiler); EXPECT_STREQ(output.c_str(), resString.str().c_str()); EXPECT_EQ(CL_SUCCESS, retVal); delete pOfflineCompiler; } TEST_F(OfflineCompilerTests, givenIncorrectDeviceIdHexThenInvalidDeviceIsReturned) { std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", "0x0"}; testing::internal::CaptureStdout(); pOfflineCompiler = OfflineCompiler::create(argv.size(), argv, true, retVal, oclocArgHelperWithoutInput.get()); auto output = testing::internal::GetCapturedStdout(); EXPECT_EQ(nullptr, pOfflineCompiler); EXPECT_STREQ(output.c_str(), "Could not determine target based on device id: 0x0\nError: Cannot get HW Info for device 0x0.\n"); EXPECT_EQ(CL_INVALID_DEVICE, retVal); } TEST_F(OfflineCompilerTests, givenDeviceNumerationWithMissingRevisionValueWhenInvalidPatternIsPassedThenInvalidDeviceIsReturned) { std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", "9.1."}; testing::internal::CaptureStdout(); pOfflineCompiler = OfflineCompiler::create(argv.size(), argv, true, retVal, oclocArgHelperWithoutInput.get()); auto output = testing::internal::GetCapturedStdout(); EXPECT_EQ(nullptr, pOfflineCompiler); EXPECT_STREQ(output.c_str(), "Could not determine target based on product config: 9.1.\nError: Cannot get HW Info for device 9.1..\n"); EXPECT_EQ(CL_INVALID_DEVICE, retVal); } TEST_F(OfflineCompilerTests, givenDeviceNumerationWithInvalidPatternThenInvalidDeviceIsReturned) { std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", "9.1.."}; testing::internal::CaptureStdout(); pOfflineCompiler = OfflineCompiler::create(argv.size(), argv, true, retVal, oclocArgHelperWithoutInput.get()); auto output = testing::internal::GetCapturedStdout(); EXPECT_EQ(nullptr, pOfflineCompiler); EXPECT_STREQ(output.c_str(), "Could not determine target based on product config: 9.1..\nError: Cannot get HW Info for device 9.1...\n"); EXPECT_EQ(CL_INVALID_DEVICE, retVal); } TEST_F(OfflineCompilerTests, givenDeviceNumerationWithMissingMajorValueWhenInvalidPatternIsPassedThenInvalidDeviceIsReturned) { std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", ".1.2"}; testing::internal::CaptureStdout(); pOfflineCompiler = OfflineCompiler::create(argv.size(), argv, true, retVal, oclocArgHelperWithoutInput.get()); auto output = testing::internal::GetCapturedStdout(); EXPECT_EQ(nullptr, pOfflineCompiler); EXPECT_STREQ(output.c_str(), "Could not determine target based on product config: .1.2\nError: Cannot get HW Info for device .1.2.\n"); EXPECT_EQ(CL_INVALID_DEVICE, retVal); } TEST_F(OfflineCompilerTests, givenDeviceNumerationWhenInvalidRevisionValueIsPassedThenInvalidDeviceIsReturned) { std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", "9.0.a"}; testing::internal::CaptureStdout(); pOfflineCompiler = OfflineCompiler::create(argv.size(), argv, true, retVal, oclocArgHelperWithoutInput.get()); auto output = testing::internal::GetCapturedStdout(); EXPECT_EQ(nullptr, pOfflineCompiler); EXPECT_STREQ(output.c_str(), "Could not determine target based on product config: 9.0.a\nError: Cannot get HW Info for device 9.0.a.\n"); EXPECT_EQ(CL_INVALID_DEVICE, retVal); } TEST_F(OfflineCompilerTests, givenDeviceNumerationWhenInvalidMinorValueIsPassedThenInvalidDeviceIsReturned) { std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", "9.a"}; testing::internal::CaptureStdout(); pOfflineCompiler = OfflineCompiler::create(argv.size(), argv, true, retVal, oclocArgHelperWithoutInput.get()); auto output = testing::internal::GetCapturedStdout(); EXPECT_EQ(nullptr, pOfflineCompiler); EXPECT_STREQ(output.c_str(), "Could not determine target based on product config: 9.a\nError: Cannot get HW Info for device 9.a.\n"); EXPECT_EQ(CL_INVALID_DEVICE, retVal); } TEST_F(OfflineCompilerTests, givenDeviceNumerationWhenPassedValuesAreOutOfRangeThenInvalidDeviceIsReturned) { std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", "256.350"}; testing::internal::CaptureStdout(); pOfflineCompiler = OfflineCompiler::create(argv.size(), argv, true, retVal, oclocArgHelperWithoutInput.get()); auto output = testing::internal::GetCapturedStdout(); EXPECT_EQ(nullptr, pOfflineCompiler); EXPECT_STREQ(output.c_str(), "Could not determine target based on product config: 256.350\nError: Cannot get HW Info for device 256.350.\n"); EXPECT_EQ(CL_INVALID_DEVICE, retVal); } TEST_F(OfflineCompilerTests, givenInitHardwareInfowhenDeviceConfigContainsDeviceIdsThenSetFirstDeviceId) { MockOfflineCompiler mockOfflineCompiler; auto &allEnabledDeviceConfigs = mockOfflineCompiler.argHelper->getAllSupportedDeviceConfigs(); if (allEnabledDeviceConfigs.empty()) { GTEST_SKIP(); } std::vector deviceIdsForTests = {0xfffd, 0xfffe, 0xffff}; for (auto &deviceMapConfig : allEnabledDeviceConfigs) { if (productFamily == deviceMapConfig.hwInfo->platform.eProductFamily) { mockOfflineCompiler.deviceName = mockOfflineCompiler.argHelper->parseProductConfigFromValue(deviceMapConfig.config); deviceMapConfig.deviceIds = &deviceIdsForTests; break; } } mockOfflineCompiler.initHardwareInfo(mockOfflineCompiler.deviceName); EXPECT_EQ(mockOfflineCompiler.hwInfo.platform.eProductFamily, productFamily); EXPECT_EQ(mockOfflineCompiler.hwInfo.platform.usDeviceID, deviceIdsForTests.front()); } TEST_F(OfflineCompilerTests, givenIncorrectDeviceIdWithIncorrectHexPatternThenInvalidDeviceIsReturned) { std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", "0xnonexist"}; testing::internal::CaptureStdout(); pOfflineCompiler = OfflineCompiler::create(argv.size(), argv, true, retVal, oclocArgHelperWithoutInput.get()); auto output = testing::internal::GetCapturedStdout(); EXPECT_EQ(nullptr, pOfflineCompiler); EXPECT_STREQ(output.c_str(), "Error: Cannot get HW Info for device 0xnonexist.\n"); EXPECT_EQ(CL_INVALID_DEVICE, retVal); } TEST_F(OfflineCompilerTests, givenDebugOptionThenInternalOptionShouldContainKernelDebugEnable) { if (gEnvironment->devicePrefix == "bdw") { GTEST_SKIP(); } std::vector argv = { "ocloc", "-options", "-g", "-file", "test_files/copybuffer.cl", "-device", gEnvironment->devicePrefix.c_str()}; auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); ASSERT_NE(nullptr, mockOfflineCompiler); mockOfflineCompiler->initialize(argv.size(), argv); std::string internalOptions = mockOfflineCompiler->internalOptions; EXPECT_TRUE(hasSubstr(internalOptions, "-cl-kernel-debug-enable")); } TEST_F(OfflineCompilerTests, givenDashGInBiggerOptionStringWhenInitializingThenInternalOptionsShouldNotContainKernelDebugEnable) { std::vector argv = { "ocloc", "-options", "-gNotRealDashGOption", "-file", "test_files/copybuffer.cl", "-device", gEnvironment->devicePrefix.c_str()}; auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); ASSERT_NE(nullptr, mockOfflineCompiler); mockOfflineCompiler->initialize(argv.size(), argv); std::string internalOptions = mockOfflineCompiler->internalOptions; EXPECT_FALSE(hasSubstr(internalOptions, "-cl-kernel-debug-enable")); } TEST_F(OfflineCompilerTests, givenExcludeIrFromZebinInternalOptionWhenInitIsPerformedThenIrExcludeFlagsShouldBeUnified) { std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-internal_options", "-ze-allow-zebin -ze-exclude-ir-from-zebin", "-device", gEnvironment->devicePrefix.c_str()}; MockOfflineCompiler mockOfflineCompiler{}; mockOfflineCompiler.initialize(argv.size(), argv); EXPECT_TRUE(mockOfflineCompiler.excludeIr); } TEST_F(OfflineCompilerTests, givenExcludeIrArgumentWhenInitIsPerformedThenIrExcludeFlagsShouldBeUnified) { std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-exclude_ir", "-internal_options", "-ze-allow-zebin", "-device", gEnvironment->devicePrefix.c_str()}; MockOfflineCompiler mockOfflineCompiler{}; mockOfflineCompiler.initialize(argv.size(), argv); const auto expectedInternalOption{"-ze-exclude-ir-from-zebin"}; const auto excludeIrFromZebinEnabled{mockOfflineCompiler.internalOptions.find(expectedInternalOption) != std::string::npos}; EXPECT_TRUE(excludeIrFromZebinEnabled); } TEST_F(OfflineCompilerTests, givenExcludeIrArgumentWhenCompilingKernelThenIrShouldBeExcluded) { std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-exclude_ir", "-device", gEnvironment->devicePrefix.c_str()}; MockOfflineCompiler mockOfflineCompiler{}; mockOfflineCompiler.initialize(argv.size(), argv); const auto buildResult{mockOfflineCompiler.build()}; ASSERT_EQ(OclocErrorCode::SUCCESS, buildResult); std::string errorReason{}; std::string warning{}; const auto elf{Elf::decodeElf(mockOfflineCompiler.elfBinary, errorReason, warning)}; ASSERT_TRUE(errorReason.empty()); ASSERT_TRUE(warning.empty()); EXPECT_FALSE(isAnyIrSectionDefined(elf.sectionHeaders)); } TEST_F(OfflineCompilerTests, givenLackOfExcludeIrArgumentWhenCompilingKernelThenIrShouldBeIncluded) { std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", gEnvironment->devicePrefix.c_str()}; MockOfflineCompiler mockOfflineCompiler{}; mockOfflineCompiler.initialize(argv.size(), argv); const auto buildResult{mockOfflineCompiler.build()}; ASSERT_EQ(OclocErrorCode::SUCCESS, buildResult); std::string errorReason{}; std::string warning{}; const auto elf{Elf::decodeElf(mockOfflineCompiler.elfBinary, errorReason, warning)}; ASSERT_TRUE(errorReason.empty()); ASSERT_TRUE(warning.empty()); EXPECT_TRUE(isAnyIrSectionDefined(elf.sectionHeaders)); } TEST_F(OfflineCompilerTests, givenVariousClStdValuesWhenCompilingSourceThenCorrectExtensionsArePassed) { std::string clStdOptionValues[] = {"", "-cl-std=CL1.2", "-cl-std=CL2.0", "-cl-std=CL3.0"}; for (auto &clStdOptionValue : clStdOptionValues) { std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", gEnvironment->devicePrefix.c_str()}; if (!clStdOptionValue.empty()) { argv.push_back("-options"); argv.push_back(clStdOptionValue); } auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); ASSERT_NE(nullptr, mockOfflineCompiler); mockOfflineCompiler->initialize(argv.size(), argv); std::string internalOptions = mockOfflineCompiler->internalOptions; std::string oclVersionOption = getOclVersionCompilerInternalOption(mockOfflineCompiler->hwInfo.capabilityTable.clVersionSupport); EXPECT_TRUE(hasSubstr(internalOptions, oclVersionOption)); if (clStdOptionValue == "-cl-std=CL2.0") { auto expectedRegex = std::string{"cl_khr_3d_image_writes"}; if (mockOfflineCompiler->hwInfo.capabilityTable.supportsImages) { expectedRegex += ".+" + std::string{"cl_khr_3d_image_writes"}; } EXPECT_TRUE(containsRegex(internalOptions, expectedRegex)); } OpenClCFeaturesContainer openclCFeatures; getOpenclCFeaturesList(mockOfflineCompiler->hwInfo, openclCFeatures); for (auto &feature : openclCFeatures) { if (clStdOptionValue == "-cl-std=CL3.0") { EXPECT_TRUE(hasSubstr(internalOptions, std::string{feature.name})); } else { EXPECT_FALSE(hasSubstr(internalOptions, std::string{feature.name})); } } if (mockOfflineCompiler->hwInfo.capabilityTable.supportsImages) { EXPECT_TRUE(hasSubstr(internalOptions, CompilerOptions::enableImageSupport.data())); } else { EXPECT_FALSE(hasSubstr(internalOptions, CompilerOptions::enableImageSupport.data())); } } } TEST_F(OfflineCompilerTests, GivenArgsWhenBuildingThenBuildSucceeds) { std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", gEnvironment->devicePrefix.c_str()}; pOfflineCompiler = OfflineCompiler::create(argv.size(), argv, true, retVal, oclocArgHelperWithoutInput.get()); EXPECT_NE(nullptr, pOfflineCompiler); EXPECT_EQ(CL_SUCCESS, retVal); testing::internal::CaptureStdout(); retVal = pOfflineCompiler->build(); std::string output = testing::internal::GetCapturedStdout(); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(compilerOutputExists("copybuffer", "bc") || compilerOutputExists("copybuffer", "spv")); EXPECT_TRUE(compilerOutputExists("copybuffer", "gen")); EXPECT_TRUE(compilerOutputExists("copybuffer", "bin")); std::string buildLog = pOfflineCompiler->getBuildLog(); EXPECT_STREQ(buildLog.c_str(), ""); delete pOfflineCompiler; } TEST_F(OfflineCompilerTests, GivenArgsWhenBuildingWithDeviceConfigValueThenBuildSucceeds) { auto allEnabledDeviceConfigs = oclocArgHelperWithoutInput->getAllSupportedDeviceConfigs(); if (allEnabledDeviceConfigs.empty()) { return; } std::string configStr; for (auto &deviceMapConfig : allEnabledDeviceConfigs) { if (productFamily == deviceMapConfig.hwInfo->platform.eProductFamily) { configStr = oclocArgHelperWithoutInput->parseProductConfigFromValue(deviceMapConfig.config); break; } } std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", configStr}; pOfflineCompiler = OfflineCompiler::create(argv.size(), argv, true, retVal, oclocArgHelperWithoutInput.get()); EXPECT_NE(nullptr, pOfflineCompiler); EXPECT_EQ(CL_SUCCESS, retVal); testing::internal::CaptureStdout(); retVal = pOfflineCompiler->build(); std::string output = testing::internal::GetCapturedStdout(); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(compilerOutputExists("copybuffer", "bc") || compilerOutputExists("copybuffer", "spv")); EXPECT_TRUE(compilerOutputExists("copybuffer", "gen")); EXPECT_TRUE(compilerOutputExists("copybuffer", "bin")); std::string buildLog = pOfflineCompiler->getBuildLog(); EXPECT_STREQ(buildLog.c_str(), ""); delete pOfflineCompiler; } TEST_F(OfflineCompilerTests, GivenLlvmTextWhenBuildingThenBuildSucceeds) { std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", gEnvironment->devicePrefix.c_str(), "-llvm_text"}; pOfflineCompiler = OfflineCompiler::create(argv.size(), argv, true, retVal, oclocArgHelperWithoutInput.get()); EXPECT_NE(nullptr, pOfflineCompiler); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pOfflineCompiler->build(); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(compilerOutputExists("copybuffer", "ll")); EXPECT_TRUE(compilerOutputExists("copybuffer", "gen")); EXPECT_TRUE(compilerOutputExists("copybuffer", "bin")); delete pOfflineCompiler; } TEST_F(OfflineCompilerTests, WhenFclNotNeededThenDontLoadIt) { std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", gEnvironment->devicePrefix.c_str(), "-spirv_input"}; MockOfflineCompiler offlineCompiler; auto ret = offlineCompiler.initialize(argv.size(), argv); EXPECT_EQ(0, ret); EXPECT_EQ(nullptr, offlineCompiler.fclDeviceCtx); EXPECT_NE(nullptr, offlineCompiler.igcDeviceCtx); } TEST_F(OfflineCompilerTests, WhenParsingBinToCharArrayThenCorrectResult) { std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", gEnvironment->devicePrefix.c_str()}; pOfflineCompiler = OfflineCompiler::create(argv.size(), argv, true, retVal, oclocArgHelperWithoutInput.get()); // clang-format off uint8_t binary[] = { 0x02, 0x23, 0x3, 0x40, 0x56, 0x7, 0x80, 0x90, 0x1, 0x03, 0x34, 0x5, 0x60, 0x78, 0x9, 0x66, 0xff, 0x10, 0x10, 0x10, 0x02, 0x23, 0x3, 0x40, 0x56, 0x7, 0x80, 0x90, 0x1, 0x03, 0x34, 0x5, 0x60, 0x78, 0x9, 0x66, 0xff, }; // clang-format on std::string familyNameWithType = gEnvironment->familyNameWithType; std::string fileName = "scheduler"; std::string retArray = pOfflineCompiler->parseBinAsCharArray(binary, sizeof(binary), fileName); std::string target = "#include \n" "#include \n\n" "size_t SchedulerBinarySize_" + familyNameWithType + " = 37;\n" "uint32_t SchedulerBinary_" + familyNameWithType + "[10] = {\n" " 0x40032302, 0x90800756, 0x05340301, 0x66097860, 0x101010ff, 0x40032302, 0x90800756, 0x05340301, \n" " 0x66097860, 0xff000000};\n\n" "#include \"shared/source/built_ins/registry/built_ins_registry.h\"\n\n" "namespace NEO {\n" "static RegisterEmbeddedResource registerSchedulerBin(\n" " \"" + gEnvironment->familyNameWithType + "_0_scheduler.builtin_kernel.bin\",\n" " (const char *)SchedulerBinary_" + familyNameWithType + ",\n" " SchedulerBinarySize_" + familyNameWithType + ");\n" "}\n"; EXPECT_EQ(retArray, target); delete pOfflineCompiler; } TEST_F(OfflineCompilerTests, GivenCppFileWhenBuildingThenBuildSucceeds) { std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", gEnvironment->devicePrefix.c_str(), "-cpp_file"}; pOfflineCompiler = OfflineCompiler::create(argv.size(), argv, true, retVal, oclocArgHelperWithoutInput.get()); EXPECT_NE(nullptr, pOfflineCompiler); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pOfflineCompiler->build(); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(compilerOutputExists("copybuffer", "cpp")); EXPECT_TRUE(compilerOutputExists("copybuffer", "bc") || compilerOutputExists("copybuffer", "spv")); EXPECT_TRUE(compilerOutputExists("copybuffer", "gen")); EXPECT_TRUE(compilerOutputExists("copybuffer", "bin")); delete pOfflineCompiler; } TEST_F(OfflineCompilerTests, GivenOutputDirWhenBuildingThenBuildSucceeds) { std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", gEnvironment->devicePrefix.c_str(), "-out_dir", "offline_compiler_test"}; pOfflineCompiler = OfflineCompiler::create(argv.size(), argv, true, retVal, oclocArgHelperWithoutInput.get()); EXPECT_NE(nullptr, pOfflineCompiler); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pOfflineCompiler->build(); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(compilerOutputExists("offline_compiler_test/copybuffer", "bc") || compilerOutputExists("offline_compiler_test/copybuffer", "spv")); EXPECT_TRUE(compilerOutputExists("offline_compiler_test/copybuffer", "gen")); EXPECT_TRUE(compilerOutputExists("offline_compiler_test/copybuffer", "bin")); delete pOfflineCompiler; } TEST_F(OfflineCompilerTests, GivenHelpOptionThenBuildDoesNotOccur) { std::vector argv = { "ocloc", "--help"}; testing::internal::CaptureStdout(); pOfflineCompiler = OfflineCompiler::create(argv.size(), argv, true, retVal, oclocArgHelperWithoutInput.get()); std::string output = testing::internal::GetCapturedStdout(); EXPECT_STRNE("", output.c_str()); EXPECT_EQ(OclocErrorCode::SUCCESS, retVal); delete pOfflineCompiler; } TEST_F(OfflineCompilerTests, GivenInvalidFileWhenBuildingThenInvalidFileErrorIsReturned) { DebugManager.flags.PrintDebugMessages.set(true); std::vector argv = { "ocloc", "-file", "test_files/ImANaughtyFile.cl", "-device", gEnvironment->devicePrefix.c_str()}; testing::internal::CaptureStdout(); pOfflineCompiler = OfflineCompiler::create(argv.size(), argv, true, retVal, oclocArgHelperWithoutInput.get()); std::string output = testing::internal::GetCapturedStdout(); EXPECT_STRNE(output.c_str(), ""); EXPECT_EQ(nullptr, pOfflineCompiler); EXPECT_EQ(OclocErrorCode::INVALID_FILE, retVal); DebugManager.flags.PrintDebugMessages.set(false); delete pOfflineCompiler; } TEST_F(OfflineCompilerTests, GivenInvalidFlagWhenBuildingThenInvalidCommandLineErrorIsReturned) { std::vector argv = { "ocloc", "-n", "test_files/ImANaughtyFile.cl", "-device", gEnvironment->devicePrefix.c_str()}; testing::internal::CaptureStdout(); pOfflineCompiler = OfflineCompiler::create(argv.size(), argv, true, retVal, oclocArgHelperWithoutInput.get()); std::string output = testing::internal::GetCapturedStdout(); EXPECT_STRNE(output.c_str(), ""); EXPECT_EQ(nullptr, pOfflineCompiler); EXPECT_EQ(OclocErrorCode::INVALID_COMMAND_LINE, retVal); delete pOfflineCompiler; } TEST_F(OfflineCompilerTests, GivenInvalidOptionsWhenBuildingThenInvalidCommandLineErrorIsReturned) { std::vector argvA = { "ocloc", "-file", }; testing::internal::CaptureStdout(); pOfflineCompiler = OfflineCompiler::create(argvA.size(), argvA, true, retVal, oclocArgHelperWithoutInput.get()); std::string output = testing::internal::GetCapturedStdout(); EXPECT_STRNE(output.c_str(), ""); EXPECT_EQ(nullptr, pOfflineCompiler); EXPECT_EQ(OclocErrorCode::INVALID_COMMAND_LINE, retVal); delete pOfflineCompiler; std::vector argvB = { "ocloc", "-file", "test_files/ImANaughtyFile.cl", "-device"}; testing::internal::CaptureStdout(); pOfflineCompiler = OfflineCompiler::create(argvB.size(), argvB, true, retVal, oclocArgHelperWithoutInput.get()); output = testing::internal::GetCapturedStdout(); EXPECT_STRNE(output.c_str(), ""); EXPECT_EQ(nullptr, pOfflineCompiler); EXPECT_EQ(OclocErrorCode::INVALID_COMMAND_LINE, retVal); delete pOfflineCompiler; } TEST_F(OfflineCompilerTests, GivenNonexistantDeviceWhenCompilingThenInvalidDeviceErrorAndErrorMessageAreReturned) { std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", "foobar"}; testing::internal::CaptureStdout(); pOfflineCompiler = OfflineCompiler::create(argv.size(), argv, true, retVal, oclocArgHelperWithoutInput.get()); std::string output = testing::internal::GetCapturedStdout(); EXPECT_STREQ(output.c_str(), "Error: Cannot get HW Info for device foobar.\n"); EXPECT_EQ(nullptr, pOfflineCompiler); EXPECT_EQ(CL_INVALID_DEVICE, retVal); } TEST_F(OfflineCompilerTests, GivenInvalidKernelWhenBuildingThenBuildProgramFailureErrorIsReturned) { std::vector argv = { "ocloc", "-file", "test_files/shouldfail.cl", "-device", gEnvironment->devicePrefix.c_str()}; pOfflineCompiler = OfflineCompiler::create(argv.size(), argv, true, retVal, oclocArgHelperWithoutInput.get()); EXPECT_NE(nullptr, pOfflineCompiler); EXPECT_EQ(CL_SUCCESS, retVal); gEnvironment->SetInputFileName("invalid_file_name"); testing::internal::CaptureStdout(); retVal = pOfflineCompiler->build(); EXPECT_EQ(CL_BUILD_PROGRAM_FAILURE, retVal); std::string output = testing::internal::GetCapturedStdout(); EXPECT_STREQ(output.c_str(), ""); std::string buildLog = pOfflineCompiler->getBuildLog(); EXPECT_STRNE(buildLog.c_str(), ""); gEnvironment->SetInputFileName("copybuffer"); delete pOfflineCompiler; } TEST(OfflineCompilerTest, WhenParsingCmdLineThenOptionsAreReadCorrectly) { std::vector argv = { "ocloc", NEO::CompilerOptions::greaterThan4gbBuffersRequired.data()}; MockOfflineCompiler *mockOfflineCompiler = new MockOfflineCompiler(); ASSERT_NE(nullptr, mockOfflineCompiler); testing::internal::CaptureStdout(); mockOfflineCompiler->parseCommandLine(argv.size(), argv); std::string output = testing::internal::GetCapturedStdout(); std::string internalOptions = mockOfflineCompiler->internalOptions; size_t found = internalOptions.find(argv.begin()[1]); EXPECT_NE(std::string::npos, found); delete mockOfflineCompiler; } TEST(OfflineCompilerTest, givenStatelessToStatefullOptimizationEnabledWhenDebugSettingsAreParsedThenOptimizationStringIsPresent) { DebugManagerStateRestore stateRestore; MockOfflineCompiler mockOfflineCompiler; DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.set(1); mockOfflineCompiler.parseDebugSettings(); std::string internalOptions = mockOfflineCompiler.internalOptions; size_t found = internalOptions.find(NEO::CompilerOptions::hasBufferOffsetArg.data()); EXPECT_NE(std::string::npos, found); } TEST(OfflineCompilerTest, givenStatelessToStatefullOptimizationEnabledWhenDebugSettingsAreParsedThenOptimizationStringIsSetToDefault) { DebugManagerStateRestore stateRestore; MockOfflineCompiler mockOfflineCompiler; DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.set(-1); mockOfflineCompiler.parseDebugSettings(); std::string internalOptions = mockOfflineCompiler.internalOptions; size_t found = internalOptions.find(NEO::CompilerOptions::hasBufferOffsetArg.data()); EXPECT_NE(std::string::npos, found); } TEST(OfflineCompilerTest, GivenDelimitersWhenGettingStringThenParseIsCorrect) { auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); ASSERT_NE(nullptr, mockOfflineCompiler); size_t srcSize = 0; auto ptrSrc = loadDataFromFile("test_files/copy_buffer_to_buffer.builtin_kernel", srcSize); const std::string src = ptrSrc.get(); ASSERT_EQ(srcSize, src.size()); // assert that pattern was found ASSERT_NE(std::string::npos, src.find("R\"===(")); ASSERT_NE(std::string::npos, src.find(")===\"")); auto dst = mockOfflineCompiler->getStringWithinDelimiters(src); size_t size = dst.size(); char nullChar = '\0'; EXPECT_EQ(nullChar, dst[size - 1]); // expect that pattern was not found EXPECT_EQ(std::string::npos, dst.find("R\"===(")); EXPECT_EQ(std::string::npos, dst.find(")===\"")); } TEST(OfflineCompilerTest, WhenConvertingToPascalCaseThenResultIsCorrect) { EXPECT_EQ(0, strcmp("AuxTranslation", convertToPascalCase("aux_translation").c_str())); EXPECT_EQ(0, strcmp("CopyBufferToBuffer", convertToPascalCase("copy_buffer_to_buffer").c_str())); EXPECT_EQ(0, strcmp("CopyBufferRect", convertToPascalCase("copy_buffer_rect").c_str())); EXPECT_EQ(0, strcmp("FillBuffer", convertToPascalCase("fill_buffer").c_str())); EXPECT_EQ(0, strcmp("CopyBufferToImage3d", convertToPascalCase("copy_buffer_to_image3d").c_str())); EXPECT_EQ(0, strcmp("CopyImage3dToBuffer", convertToPascalCase("copy_image3d_to_buffer").c_str())); EXPECT_EQ(0, strcmp("CopyImageToImage1d", convertToPascalCase("copy_image_to_image1d").c_str())); EXPECT_EQ(0, strcmp("CopyImageToImage2d", convertToPascalCase("copy_image_to_image2d").c_str())); EXPECT_EQ(0, strcmp("CopyImageToImage3d", convertToPascalCase("copy_image_to_image3d").c_str())); EXPECT_EQ(0, strcmp("FillImage1d", convertToPascalCase("fill_image1d").c_str())); EXPECT_EQ(0, strcmp("FillImage2d", convertToPascalCase("fill_image2d").c_str())); EXPECT_EQ(0, strcmp("FillImage3d", convertToPascalCase("fill_image3d").c_str())); EXPECT_EQ(0, strcmp("VmeBlockMotionEstimateIntel", convertToPascalCase("vme_block_motion_estimate_intel").c_str())); EXPECT_EQ(0, strcmp("VmeBlockAdvancedMotionEstimateCheckIntel", convertToPascalCase("vme_block_advanced_motion_estimate_check_intel").c_str())); EXPECT_EQ(0, strcmp("VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel", convertToPascalCase("vme_block_advanced_motion_estimate_bidirectional_check_intel").c_str())); EXPECT_EQ(0, strcmp("Scheduler", convertToPascalCase("scheduler").c_str())); EXPECT_EQ(0, strcmp("", convertToPascalCase("").c_str())); } TEST(OfflineCompilerTest, GivenValidParamWhenGettingHardwareInfoThenSuccessIsReturned) { auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); ASSERT_NE(nullptr, mockOfflineCompiler); EXPECT_EQ(CL_INVALID_DEVICE, mockOfflineCompiler->initHardwareInfo("invalid")); EXPECT_EQ(PRODUCT_FAMILY::IGFX_UNKNOWN, mockOfflineCompiler->getHardwareInfo().platform.eProductFamily); EXPECT_EQ(CL_SUCCESS, mockOfflineCompiler->initHardwareInfo(gEnvironment->devicePrefix.c_str())); EXPECT_NE(PRODUCT_FAMILY::IGFX_UNKNOWN, mockOfflineCompiler->getHardwareInfo().platform.eProductFamily); } TEST(OfflineCompilerTest, GivenConfigValueWhichIsOutOfRangeWhenGettingHardwareInfoThenInvalidDeviceIsReturned) { auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); ASSERT_NE(nullptr, mockOfflineCompiler); uint32_t value = 0xffffff + 1; std::stringstream inproperValue, resString; inproperValue << value; testing::internal::CaptureStdout(); EXPECT_EQ(CL_INVALID_DEVICE, mockOfflineCompiler->initHardwareInfo(inproperValue.str())); resString << "Could not determine target based on product config: " << inproperValue.str() << "\n"; auto output = testing::internal::GetCapturedStdout(); EXPECT_STREQ(output.c_str(), resString.str().c_str()); } TEST(OfflineCompilerTest, WhenStoringBinaryThenStoredCorrectly) { auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); ASSERT_NE(nullptr, mockOfflineCompiler); const char pSrcBinary[] = {0x01, 0x02, 0x03, 0x04, 0x05}; const size_t srcBinarySize = sizeof(pSrcBinary); char *pDstBinary = new char[srcBinarySize]; size_t dstBinarySize = srcBinarySize; mockOfflineCompiler->storeBinary(pDstBinary, dstBinarySize, pSrcBinary, srcBinarySize); EXPECT_EQ(0, memcmp(pDstBinary, pSrcBinary, srcBinarySize)); delete[] pDstBinary; } TEST(OfflineCompilerTest, givenErrorStringsWithoutExtraNullCharactersWhenUpdatingBuildLogThenMessageIsCorrect) { auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); ASSERT_NE(nullptr, mockOfflineCompiler); std::string ErrorString = "Error: undefined variable"; mockOfflineCompiler->updateBuildLog(ErrorString.c_str(), ErrorString.length()); EXPECT_EQ(0, ErrorString.compare(mockOfflineCompiler->getBuildLog())); std::string FinalString = "Build failure"; mockOfflineCompiler->updateBuildLog(FinalString.c_str(), FinalString.length()); EXPECT_EQ(0, (ErrorString + "\n" + FinalString).compare(mockOfflineCompiler->getBuildLog().c_str())); } TEST(OfflineCompilerTest, givenErrorStringsWithExtraNullCharactersWhenUpdatingBuildLogThenMessageIsCorrect) { auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); ASSERT_NE(nullptr, mockOfflineCompiler); std::array errorMessageArray = {"Error: undefined variable\0"}; std::string expectedBuildLogString = "Error: undefined variable"; EXPECT_EQ(errorMessageArray.size(), std::string("Error: undefined variable").length() + 2); mockOfflineCompiler->updateBuildLog(errorMessageArray.data(), errorMessageArray.size()); EXPECT_EQ(mockOfflineCompiler->getBuildLog(), expectedBuildLogString); std::array additionalErrorMessageArray = {"Build failure\0"}; expectedBuildLogString = "Error: undefined variable\n" "Build failure"; EXPECT_EQ(additionalErrorMessageArray.size(), std::string("Build failure").length() + 2); mockOfflineCompiler->updateBuildLog(additionalErrorMessageArray.data(), additionalErrorMessageArray.size()); EXPECT_EQ(mockOfflineCompiler->getBuildLog(), expectedBuildLogString); } TEST(OfflineCompilerTest, GivenSourceCodeWhenBuildingThenSuccessIsReturned) { auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); ASSERT_NE(nullptr, mockOfflineCompiler); auto retVal = mockOfflineCompiler->buildSourceCode(); EXPECT_EQ(CL_INVALID_PROGRAM, retVal); std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", gEnvironment->devicePrefix.c_str()}; retVal = mockOfflineCompiler->initialize(argv.size(), argv); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, mockOfflineCompiler->genBinary); EXPECT_EQ(0u, mockOfflineCompiler->genBinarySize); retVal = mockOfflineCompiler->buildSourceCode(); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, mockOfflineCompiler->genBinary); EXPECT_NE(0u, mockOfflineCompiler->genBinarySize); } TEST(OfflineCompilerTest, givenSpvOnlyOptionPassedWhenCmdLineParsedThenGenerateOnlySpvFile) { std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-output", "myOutputFileName", "-spv_only", "-device", gEnvironment->devicePrefix.c_str()}; auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); ASSERT_NE(nullptr, mockOfflineCompiler); auto retVal = mockOfflineCompiler->initialize(argv.size(), argv); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(compilerOutputExists("myOutputFileName", "bc") || compilerOutputExists("myOutputFileName", "spv")); EXPECT_FALSE(compilerOutputExists("myOutputFileName", "bin")); EXPECT_FALSE(compilerOutputExists("myOutputFileName", "gen")); retVal = mockOfflineCompiler->build(); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(compilerOutputExists("myOutputFileName", "bc") || compilerOutputExists("myOutputFileName", "spv")); EXPECT_FALSE(compilerOutputExists("myOutputFileName", "bin")); EXPECT_FALSE(compilerOutputExists("myOutputFileName", "gen")); } TEST(OfflineCompilerTest, GivenKernelWhenNoCharAfterKernelSourceThenBuildWithSuccess) { auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); ASSERT_NE(nullptr, mockOfflineCompiler); auto retVal = mockOfflineCompiler->buildSourceCode(); EXPECT_EQ(CL_INVALID_PROGRAM, retVal); std::vector argv = { "ocloc", "-file", "test_files/emptykernel.cl", "-device", gEnvironment->devicePrefix.c_str()}; retVal = mockOfflineCompiler->initialize(argv.size(), argv); EXPECT_EQ(CL_SUCCESS, retVal); retVal = mockOfflineCompiler->buildSourceCode(); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(OfflineCompilerTest, WhenGeneratingElfBinaryThenBinaryIsCreated) { auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); ASSERT_NE(nullptr, mockOfflineCompiler); auto retVal = mockOfflineCompiler->generateElfBinary(); EXPECT_FALSE(retVal); iOpenCL::SProgramBinaryHeader binHeader; memset(&binHeader, 0, sizeof(binHeader)); binHeader.Magic = iOpenCL::MAGIC_CL; binHeader.Version = iOpenCL::CURRENT_ICBE_VERSION - 3; binHeader.Device = mockOfflineCompiler->hwInfo.platform.eRenderCoreFamily; binHeader.GPUPointerSizeInBytes = 8; binHeader.NumberOfKernels = 0; binHeader.SteppingId = 0; binHeader.PatchListSize = 0; size_t binSize = sizeof(iOpenCL::SProgramBinaryHeader); mockOfflineCompiler->storeGenBinary(&binHeader, binSize); EXPECT_TRUE(mockOfflineCompiler->elfBinary.empty()); retVal = mockOfflineCompiler->generateElfBinary(); EXPECT_TRUE(retVal); EXPECT_FALSE(mockOfflineCompiler->elfBinary.empty()); } TEST(OfflineCompilerTest, givenLlvmInputOptionPassedWhenCmdLineParsedThenInputFileLlvmIsSetTrue) { std::vector argv = { "ocloc", "-llvm_input"}; auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); ASSERT_NE(nullptr, mockOfflineCompiler); testing::internal::CaptureStdout(); mockOfflineCompiler->parseCommandLine(argv.size(), argv); std::string output = testing::internal::GetCapturedStdout(); EXPECT_NE(0u, output.size()); bool llvmFileOption = mockOfflineCompiler->inputFileLlvm; EXPECT_TRUE(llvmFileOption); } TEST(OfflineCompilerTest, givenDefaultOfflineCompilerObjectWhenNoOptionsAreChangedThenLlvmInputFileIsFalse) { auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); ASSERT_NE(nullptr, mockOfflineCompiler); bool llvmFileOption = mockOfflineCompiler->inputFileLlvm; EXPECT_FALSE(llvmFileOption); } TEST(OfflineCompilerTest, givenSpirvInputOptionPassedWhenCmdLineParsedThenInputFileSpirvIsSetTrue) { std::vector argv = {"ocloc", "-spirv_input"}; auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); testing::internal::CaptureStdout(); mockOfflineCompiler->parseCommandLine(argv.size(), argv); std::string output = testing::internal::GetCapturedStdout(); EXPECT_NE(0u, output.size()); EXPECT_TRUE(mockOfflineCompiler->inputFileSpirV); } TEST(OfflineCompilerTest, givenDefaultOfflineCompilerObjectWhenNoOptionsAreChangedThenSpirvInputFileIsFalse) { auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); EXPECT_FALSE(mockOfflineCompiler->inputFileSpirV); } TEST(OfflineCompilerTest, givenIntermediateRepresentationInputWhenBuildSourceCodeIsCalledThenProperTranslationContextIsUsed) { MockOfflineCompiler mockOfflineCompiler; std::vector argv = { "ocloc", "-file", "test_files/emptykernel.cl", "-device", gEnvironment->devicePrefix.c_str()}; testing::internal::CaptureStdout(); auto retVal = mockOfflineCompiler.initialize(argv.size(), argv); auto mockIgcOclDeviceCtx = new NEO::MockIgcOclDeviceCtx(); mockOfflineCompiler.igcDeviceCtx = CIF::RAII::Pack(mockIgcOclDeviceCtx); ASSERT_EQ(CL_SUCCESS, retVal); mockOfflineCompiler.inputFileSpirV = true; retVal = mockOfflineCompiler.buildSourceCode(); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(1U, mockIgcOclDeviceCtx->requestedTranslationCtxs.size()); NEO::MockIgcOclDeviceCtx::TranslationOpT expectedTranslation = {IGC::CodeType::spirV, IGC::CodeType::oclGenBin}; ASSERT_EQ(expectedTranslation, mockIgcOclDeviceCtx->requestedTranslationCtxs[0]); mockOfflineCompiler.inputFileSpirV = false; mockOfflineCompiler.inputFileLlvm = true; mockIgcOclDeviceCtx->requestedTranslationCtxs.clear(); retVal = mockOfflineCompiler.buildSourceCode(); ASSERT_EQ(mockOfflineCompiler.irBinarySize, mockOfflineCompiler.sourceCode.size()); EXPECT_EQ(0, memcmp(mockOfflineCompiler.irBinary, mockOfflineCompiler.sourceCode.data(), mockOfflineCompiler.sourceCode.size())); EXPECT_FALSE(mockOfflineCompiler.isSpirV); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(1U, mockIgcOclDeviceCtx->requestedTranslationCtxs.size()); expectedTranslation = {IGC::CodeType::llvmBc, IGC::CodeType::oclGenBin}; ASSERT_EQ(expectedTranslation, mockIgcOclDeviceCtx->requestedTranslationCtxs[0]); testing::internal::GetCapturedStdout(); } TEST(OfflineCompilerTest, givenBinaryInputThenDontTruncateSourceAtFirstZero) { std::vector argvLlvm = {"ocloc", "-llvm_input", "-file", "test_files/binary_with_zeroes", "-device", gEnvironment->devicePrefix.c_str()}; auto mockOfflineCompiler = std::make_unique(); mockOfflineCompiler->initialize(argvLlvm.size(), argvLlvm); EXPECT_LT(0U, mockOfflineCompiler->sourceCode.size()); std::vector argvSpirV = {"ocloc", "-spirv_input", "-file", "test_files/binary_with_zeroes", "-device", gEnvironment->devicePrefix.c_str()}; mockOfflineCompiler = std::make_unique(); mockOfflineCompiler->initialize(argvSpirV.size(), argvSpirV); EXPECT_LT(0U, mockOfflineCompiler->sourceCode.size()); } TEST(OfflineCompilerTest, givenSpirvInputFileWhenCmdLineHasOptionsThenCorrectOptionsArePassedToCompiler) { char data[] = {1, 2, 3, 4, 5, 6, 7, 8}; MockCompilerDebugVars igcDebugVars(gEnvironment->igcDebugVars); igcDebugVars.binaryToReturn = data; igcDebugVars.binaryToReturnSize = sizeof(data); NEO::setIgcDebugVars(igcDebugVars); MockOfflineCompiler mockOfflineCompiler; std::vector argv = { "ocloc", "-file", "test_files/emptykernel.cl", "-spirv_input", "-device", gEnvironment->devicePrefix.c_str(), "-options", "test_options_passed"}; auto retVal = mockOfflineCompiler.initialize(argv.size(), argv); auto mockIgcOclDeviceCtx = new NEO::MockIgcOclDeviceCtx(); mockOfflineCompiler.igcDeviceCtx = CIF::RAII::Pack(mockIgcOclDeviceCtx); ASSERT_EQ(CL_SUCCESS, retVal); mockOfflineCompiler.inputFileSpirV = true; retVal = mockOfflineCompiler.buildSourceCode(); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_STREQ("test_options_passed", mockOfflineCompiler.options.c_str()); NEO::setIgcDebugVars(gEnvironment->igcDebugVars); } TEST(OfflineCompilerTest, givenOutputFileOptionWhenSourceIsCompiledThenOutputFileHasCorrectName) { std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-output", "myOutputFileName", "-device", gEnvironment->devicePrefix.c_str()}; auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); ASSERT_NE(nullptr, mockOfflineCompiler); int retVal = mockOfflineCompiler->initialize(argv.size(), argv); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(compilerOutputExists("myOutputFileName", "bc") || compilerOutputExists("myOutputFileName", "spv")); EXPECT_FALSE(compilerOutputExists("myOutputFileName", "bin")); EXPECT_FALSE(compilerOutputExists("myOutputFileName", "gen")); retVal = mockOfflineCompiler->build(); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(compilerOutputExists("myOutputFileName", "bc") || compilerOutputExists("myOutputFileName", "spv")); EXPECT_TRUE(compilerOutputExists("myOutputFileName", "bin")); EXPECT_TRUE(compilerOutputExists("myOutputFileName", "gen")); } TEST(OfflineCompilerTest, givenDebugDataAvailableWhenSourceIsBuiltThenDebugDataFileIsCreated) { std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-output", "myOutputFileName", "-device", gEnvironment->devicePrefix.c_str()}; char debugData[10]; MockCompilerDebugVars igcDebugVars(gEnvironment->igcDebugVars); igcDebugVars.debugDataToReturn = debugData; igcDebugVars.debugDataToReturnSize = sizeof(debugData); NEO::setIgcDebugVars(igcDebugVars); auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); ASSERT_NE(nullptr, mockOfflineCompiler); int retVal = mockOfflineCompiler->initialize(argv.size(), argv); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(compilerOutputExists("myOutputFileName", "bc") || compilerOutputExists("myOutputFileName", "spv")); EXPECT_FALSE(compilerOutputExists("myOutputFileName", "bin")); EXPECT_FALSE(compilerOutputExists("myOutputFileName", "gen")); EXPECT_FALSE(compilerOutputExists("myOutputFileName", "dbg")); retVal = mockOfflineCompiler->build(); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(compilerOutputExists("myOutputFileName", "bc") || compilerOutputExists("myOutputFileName", "spv")); EXPECT_TRUE(compilerOutputExists("myOutputFileName", "bin")); EXPECT_TRUE(compilerOutputExists("myOutputFileName", "gen")); EXPECT_TRUE(compilerOutputExists("myOutputFileName", "dbg")); NEO::setIgcDebugVars(gEnvironment->igcDebugVars); } TEST(OfflineCompilerTest, givenInternalOptionsWhenCmdLineParsedThenOptionsAreAppendedToInternalOptionsString) { std::vector argv = { "ocloc", "-internal_options", "myInternalOptions"}; auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); ASSERT_NE(nullptr, mockOfflineCompiler); testing::internal::CaptureStdout(); mockOfflineCompiler->parseCommandLine(argv.size(), argv); std::string output = testing::internal::GetCapturedStdout(); EXPECT_NE(0u, output.size()); std::string internalOptions = mockOfflineCompiler->internalOptions; EXPECT_TRUE(hasSubstr(internalOptions, std::string("myInternalOptions"))); } TEST(OfflineCompilerTest, givenInputOptionsAndInternalOptionsFilesWhenOfflineCompilerIsInitializedThenCorrectOptionsAreSetAndRemainAfterBuild) { auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); ASSERT_NE(nullptr, mockOfflineCompiler); ASSERT_TRUE(fileExists("test_files/shouldfail_options.txt")); ASSERT_TRUE(fileExists("test_files/shouldfail_internal_options.txt")); std::vector argv = { "ocloc", "-q", "-file", "test_files/shouldfail.cl", "-device", gEnvironment->devicePrefix.c_str()}; int retVal = mockOfflineCompiler->initialize(argv.size(), argv); EXPECT_EQ(CL_SUCCESS, retVal); auto &options = mockOfflineCompiler->options; auto &internalOptions = mockOfflineCompiler->internalOptions; EXPECT_STREQ(options.c_str(), "-shouldfailOptions"); EXPECT_TRUE(internalOptions.find("-shouldfailInternalOptions") != std::string::npos); EXPECT_TRUE(mockOfflineCompiler->getOptionsReadFromFile().find("-shouldfailOptions") != std::string::npos); EXPECT_TRUE(mockOfflineCompiler->getInternalOptionsReadFromFile().find("-shouldfailInternalOptions") != std::string::npos); mockOfflineCompiler->build(); EXPECT_STREQ(options.c_str(), "-shouldfailOptions"); EXPECT_TRUE(internalOptions.find("-shouldfailInternalOptions") != std::string::npos); EXPECT_TRUE(mockOfflineCompiler->getOptionsReadFromFile().find("-shouldfailOptions") != std::string::npos); EXPECT_TRUE(mockOfflineCompiler->getInternalOptionsReadFromFile().find("-shouldfailInternalOptions") != std::string::npos); } TEST(OfflineCompilerTest, givenInputOptionsFileWithSpecialCharsWhenOfflineCompilerIsInitializedThenCorrectOptionsAreSet) { auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); ASSERT_NE(nullptr, mockOfflineCompiler); ASSERT_TRUE(fileExists("test_files/simple_kernels_opts_options.txt")); std::vector argv = { "ocloc", "-q", "-file", "test_files/simple_kernels_opts.cl", "-device", gEnvironment->devicePrefix.c_str()}; int retVal = mockOfflineCompiler->initialize(argv.size(), argv); EXPECT_EQ(CL_SUCCESS, retVal); auto &options = mockOfflineCompiler->options; EXPECT_STREQ(options.c_str(), "-cl-opt-disable -DDEF_WAS_SPECIFIED=1 -DARGS=\", const __global int *arg1, float arg2, const __global int *arg3, float arg4\""); } TEST(OfflineCompilerTest, givenInputOptionsAndOclockOptionsFileWithForceStosOptWhenOfflineCompilerIsInitializedThenCompilerOptionGreaterThan4gbBuffersRequiredIsNotApplied) { auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); ASSERT_NE(nullptr, mockOfflineCompiler); ASSERT_TRUE(fileExists("test_files/stateful_copy_buffer_ocloc_options.txt")); std::vector argv = { "ocloc", "-q", "-file", "test_files/stateful_copy_buffer.cl", "-device", gEnvironment->devicePrefix.c_str()}; int retVal = mockOfflineCompiler->initialize(argv.size(), argv); EXPECT_EQ(CL_SUCCESS, retVal); mockOfflineCompiler->build(); auto &internalOptions = mockOfflineCompiler->internalOptions; size_t found = internalOptions.find(NEO::CompilerOptions::greaterThan4gbBuffersRequired.data()); EXPECT_EQ(std::string::npos, found); } struct OfflineCompilerStatelessToStatefulTests : public ::testing::Test { void SetUp() override { mockOfflineCompiler = std::make_unique(); mockOfflineCompiler->deviceName = gEnvironment->devicePrefix; mockOfflineCompiler->initHardwareInfo(mockOfflineCompiler->deviceName); } void runTest() const { std::pair testParams[] = {{true, false}, {false, true}}; for (const auto &[forceStatelessToStatefulOptimization, containsGreaterThan4gbBuffersRequired] : testParams) { auto internalOptions = mockOfflineCompiler->internalOptions; mockOfflineCompiler->forceStatelessToStatefulOptimization = forceStatelessToStatefulOptimization; mockOfflineCompiler->appendExtraInternalOptions(internalOptions); auto found = internalOptions.find(NEO::CompilerOptions::greaterThan4gbBuffersRequired.data()); if (containsGreaterThan4gbBuffersRequired) { EXPECT_NE(std::string::npos, found); } else { EXPECT_EQ(std::string::npos, found); } } } std::unique_ptr mockOfflineCompiler; }; TEST_F(OfflineCompilerStatelessToStatefulTests, whenAppendExtraInternalOptionsThenInternalOptionsAreCorrect) { const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(mockOfflineCompiler->hwInfo.platform.eProductFamily); if (!compilerHwInfoConfig.isForceToStatelessRequired()) { GTEST_SKIP(); } runTest(); } template class MockCompilerHwInfoConfigHw : public CompilerHwInfoConfigHw { public: bool isForceToStatelessRequired() const override { return true; } }; HWTEST2_F(OfflineCompilerStatelessToStatefulTests, givenMockWhenAppendExtraInternalOptionsThenInternalOptionsAreCorrect, MatchAny) { MockCompilerHwInfoConfigHw mockCompilerHwInfoConfig; VariableBackup backupMockHwInfoConfig(&CompilerHwInfoConfigFactory[productFamily], &mockCompilerHwInfoConfig); runTest(); } TEST(OfflineCompilerTest, givenNonExistingFilenameWhenUsedToReadOptionsThenReadOptionsFromFileReturnsFalse) { std::string options; std::string file("non_existing_file"); ASSERT_FALSE(fileExists(file.c_str())); auto helper = std::make_unique(); bool result = OfflineCompiler::readOptionsFromFile(options, file, helper.get()); EXPECT_FALSE(result); } class MyOclocArgHelper : public OclocArgHelper { public: std::unique_ptr loadDataFromFile(const std::string &filename, size_t &retSize) override { auto file = std::make_unique(fileContent.size() + 1); strcpy_s(file.get(), fileContent.size() + 1, fileContent.c_str()); retSize = fileContent.size(); return file; } bool fileExists(const std::string &filename) const override { return true; } std::string fileContent; }; TEST(OfflineCompilerTest, givenEmptyFileWhenReadOptionsFromFileThenSuccessIsReturned) { std::string options; std::string filename("non_existing_file"); auto helper = std::make_unique(); helper->fileContent = ""; EXPECT_TRUE(OfflineCompiler::readOptionsFromFile(options, filename, helper.get())); EXPECT_TRUE(options.empty()); } TEST(OfflineCompilerTest, givenNoCopyrightsWhenReadOptionsFromFileThenSuccessIsReturned) { std::string options; std::string filename("non_existing_file"); auto helper = std::make_unique(); helper->fileContent = "-dummy_option"; EXPECT_TRUE(OfflineCompiler::readOptionsFromFile(options, filename, helper.get())); EXPECT_STREQ(helper->fileContent.c_str(), options.c_str()); } TEST(OfflineCompilerTest, givenEmptyDirectoryWhenGenerateFilePathIsCalledThenTrailingSlashIsNotAppended) { std::string path = generateFilePath("", "a", "b"); EXPECT_STREQ("ab", path.c_str()); } TEST(OfflineCompilerTest, givenNonEmptyDirectoryWithTrailingSlashWhenGenerateFilePathIsCalledThenAdditionalTrailingSlashIsNotAppended) { std::string path = generateFilePath("d/", "a", "b"); EXPECT_STREQ("d/ab", path.c_str()); } TEST(OfflineCompilerTest, givenNonEmptyDirectoryWithoutTrailingSlashWhenGenerateFilePathIsCalledThenTrailingSlashIsAppended) { std::string path = generateFilePath("d", "a", "b"); EXPECT_STREQ("d/ab", path.c_str()); } TEST(OfflineCompilerTest, givenSpirvPathWhenGenerateFilePathForIrIsCalledThenProperExtensionIsReturned) { MockOfflineCompiler compiler; compiler.isSpirV = true; compiler.outputDirectory = "d"; std::string path = compiler.generateFilePathForIr("a"); EXPECT_STREQ("d/a.spv", path.c_str()); } TEST(OfflineCompilerTest, givenLlvmBcPathWhenGenerateFilePathForIrIsCalledThenProperExtensionIsReturned) { MockOfflineCompiler compiler; compiler.isSpirV = false; compiler.outputDirectory = "d"; std::string path = compiler.generateFilePathForIr("a"); EXPECT_STREQ("d/a.bc", path.c_str()); } TEST(OfflineCompilerTest, givenLlvmTextPathWhenGenerateFilePathForIrIsCalledThenProperExtensionIsReturned) { MockOfflineCompiler compiler; compiler.isSpirV = false; compiler.useLlvmText = true; compiler.outputDirectory = "d"; std::string path = compiler.generateFilePathForIr("a"); EXPECT_STREQ("d/a.ll", path.c_str()); compiler.isSpirV = true; path = compiler.generateFilePathForIr("a"); EXPECT_STREQ("d/a.ll", path.c_str()); } TEST(OfflineCompilerTest, givenDisabledOptsSuffixWhenGenerateOptsSuffixIsCalledThenEmptyStringIsReturned) { MockOfflineCompiler compiler; compiler.options = "A B C"; compiler.useOptionsSuffix = false; std::string suffix = compiler.generateOptsSuffix(); EXPECT_STREQ("", suffix.c_str()); } TEST(OfflineCompilerTest, givenEnabledOptsSuffixWhenGenerateOptsSuffixIsCalledThenEscapedStringIsReturned) { MockOfflineCompiler compiler; compiler.options = "A B C"; compiler.useOptionsSuffix = true; std::string suffix = compiler.generateOptsSuffix(); EXPECT_STREQ("A_B_C", suffix.c_str()); } TEST(OfflineCompilerTest, givenCompilerWhenBuildSourceCodeFailsThenGenerateElfBinaryAndWriteOutAllFilesAreCalled) { MockOfflineCompiler compiler; compiler.overrideBuildSourceCodeStatus = true; auto expectedError = OclocErrorCode::BUILD_PROGRAM_FAILURE; compiler.buildSourceCodeStatus = expectedError; EXPECT_EQ(0u, compiler.generateElfBinaryCalled); EXPECT_EQ(0u, compiler.writeOutAllFilesCalled); auto status = compiler.build(); EXPECT_EQ(expectedError, status); EXPECT_EQ(1u, compiler.generateElfBinaryCalled); EXPECT_EQ(1u, compiler.writeOutAllFilesCalled); } TEST(OfflineCompilerTest, givenDeviceSpecificKernelFileWhenCompilerIsInitializedThenOptionsAreReadFromFile) { auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); ASSERT_NE(nullptr, mockOfflineCompiler); const char *kernelFileName = "test_files/kernel_for_specific_device.skl"; const char *optionsFileName = "test_files/kernel_for_specific_device_options.txt"; ASSERT_TRUE(fileExists(kernelFileName)); ASSERT_TRUE(fileExists(optionsFileName)); std::vector argv = { "ocloc", "-q", "-file", kernelFileName, "-device", gEnvironment->devicePrefix.c_str()}; int retVal = mockOfflineCompiler->initialize(argv.size(), argv); EXPECT_EQ(OclocErrorCode::SUCCESS, retVal); EXPECT_STREQ("-cl-opt-disable", mockOfflineCompiler->options.c_str()); } TEST(OfflineCompilerTest, givenHexadecimalRevisionIdWhenCompilerIsInitializedThenPassItToHwInfo) { auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); ASSERT_NE(nullptr, mockOfflineCompiler); std::vector argv = { "ocloc", "-q", "-file", "test_files/copybuffer.cl", "-device", gEnvironment->devicePrefix.c_str(), "-revision_id", "0x11"}; int retVal = mockOfflineCompiler->initialize(argv.size(), argv); EXPECT_EQ(OclocErrorCode::SUCCESS, retVal); EXPECT_EQ(mockOfflineCompiler->hwInfo.platform.usRevId, 17); } TEST(OfflineCompilerTest, givenDebugVariableSetWhenInitializingThenOverrideRevision) { DebugManagerStateRestore stateRestore; DebugManager.flags.OverrideRevision.set(123); auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); ASSERT_NE(nullptr, mockOfflineCompiler); std::vector argv = { "ocloc", "-q", "-file", "test_files/copybuffer.cl", "-device", gEnvironment->devicePrefix.c_str(), "-revision_id", "0x11"}; int retVal = mockOfflineCompiler->initialize(argv.size(), argv); EXPECT_EQ(OclocErrorCode::SUCCESS, retVal); EXPECT_EQ(mockOfflineCompiler->hwInfo.platform.usRevId, 123); } TEST(OfflineCompilerTest, givenDecimalRevisionIdWhenCompilerIsInitializedThenPassItToHwInfo) { auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); ASSERT_NE(nullptr, mockOfflineCompiler); std::vector argv = { "ocloc", "-q", "-file", "test_files/copybuffer.cl", "-device", gEnvironment->devicePrefix.c_str(), "-revision_id", "17"}; int retVal = mockOfflineCompiler->initialize(argv.size(), argv); EXPECT_EQ(OclocErrorCode::SUCCESS, retVal); EXPECT_EQ(mockOfflineCompiler->hwInfo.platform.usRevId, 17); } TEST(OfflineCompilerTest, givenNoRevisionIdWhenCompilerIsInitializedThenHwInfoHasDefaultRevId) { auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); ASSERT_NE(nullptr, mockOfflineCompiler); std::vector argv = { "ocloc", "-q", "-file", "test_files/copybuffer.cl", "-device", gEnvironment->devicePrefix.c_str()}; mockOfflineCompiler->initHardwareInfo(gEnvironment->devicePrefix.c_str()); auto revId = mockOfflineCompiler->hwInfo.platform.usRevId; int retVal = mockOfflineCompiler->initialize(argv.size(), argv); EXPECT_EQ(OclocErrorCode::SUCCESS, retVal); EXPECT_EQ(mockOfflineCompiler->hwInfo.platform.usRevId, revId); } TEST(OfflineCompilerTest, whenDeviceIsSpecifiedThenDefaultConfigFromTheDeviceIsUsed) { auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); ASSERT_NE(nullptr, mockOfflineCompiler); std::vector argv = { "ocloc", "-q", "-file", "test_files/copybuffer.cl", "-device", gEnvironment->devicePrefix.c_str()}; int retVal = mockOfflineCompiler->initialize(argv.size(), argv); EXPECT_EQ(OclocErrorCode::SUCCESS, retVal); HardwareInfo hwInfo = mockOfflineCompiler->hwInfo; uint32_t sliceCount = 2; uint32_t subSlicePerSliceCount = 4; uint32_t euPerSubSliceCount = 5; uint64_t hwInfoConfig = euPerSubSliceCount; hwInfoConfig |= (static_cast(subSlicePerSliceCount) << 16); hwInfoConfig |= (static_cast(sliceCount) << 32); setHwInfoValuesFromConfig(hwInfoConfig, hwInfo); EXPECT_EQ(sliceCount, hwInfo.gtSystemInfo.SliceCount); EXPECT_EQ(subSlicePerSliceCount * sliceCount, hwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(subSlicePerSliceCount * sliceCount, hwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(euPerSubSliceCount * subSlicePerSliceCount * sliceCount, hwInfo.gtSystemInfo.EUCount); } TEST(OclocCompile, whenDetectedPotentialInputTypeMismatchThenEmitsWarning) { std::string sourceOclC = "__kernel void k() { }"; std::string sourceLlvmBc = NEO::llvmBcMagic.str(); std::string sourceSpirv = NEO::spirvMagic.str(); std::string sourceSpirvInv = NEO::spirvMagicInv.str(); std::string notSpirvWarning = "Warning : file does not look like spirv bitcode (wrong magic numbers)"; std::string notLlvmBcWarning = "Warning : file does not look like llvm bitcode (wrong magic numbers)"; std::string isSpirvWarning = "Warning : file looks like spirv bitcode (based on magic numbers) - please make sure proper CLI flags are present"; std::string isLlvmBcWarning = "Warning : file looks like llvm bitcode (based on magic numbers) - please make sure proper CLI flags are present"; std::string allWarnings[] = {notSpirvWarning, notLlvmBcWarning, isLlvmBcWarning, isSpirvWarning}; struct Case { std::string input; bool isSpirv; bool isLlvm; std::string expectedWarning; }; Case cases[] = { {sourceOclC, false, false, ""}, {sourceOclC, true, false, notSpirvWarning}, {sourceOclC, false, true, notLlvmBcWarning}, {sourceLlvmBc, false, false, isLlvmBcWarning}, {sourceLlvmBc, true, false, notSpirvWarning}, {sourceLlvmBc, false, true, ""}, {sourceSpirv, false, false, isSpirvWarning}, {sourceSpirv, true, false, ""}, {sourceSpirv, false, true, notLlvmBcWarning}, {sourceSpirvInv, false, false, isSpirvWarning}, {sourceSpirvInv, true, false, ""}, {sourceSpirvInv, false, true, notLlvmBcWarning}, }; { MockOfflineCompiler ocloc; std::vector argv = { "ocloc", "-q", "-file", "test_files/copybuffer.cl", "-device", gEnvironment->devicePrefix.c_str()}; ocloc.initHardwareInfo(gEnvironment->devicePrefix.c_str()); int retVal = ocloc.initialize(argv.size(), argv); ASSERT_EQ(0, retVal); int caseNum = 0; for (auto &c : cases) { testing::internal::CaptureStdout(); ocloc.sourceCode = c.input; ocloc.inputFileLlvm = c.isLlvm; ocloc.inputFileSpirV = c.isSpirv; ocloc.build(); auto log = ocloc.argHelper->getPrinterRef().getLog().str(); ocloc.clearLog(); std::string output = testing::internal::GetCapturedStdout(); if (c.expectedWarning.empty()) { for (auto &w : allWarnings) { EXPECT_FALSE(hasSubstr(log, w)) << " Case : " << caseNum; } } else { EXPECT_TRUE(hasSubstr(log, c.expectedWarning)) << " Case : " << caseNum; EXPECT_STREQ(log.c_str(), output.c_str()); } caseNum++; } } } TEST(OclocCompile, givenCommandLineWithoutDeviceWhenCompilingToSpirvThenSucceedsButUsesEmptyExtensionString) { MockOfflineCompiler ocloc; std::vector argv = { "ocloc", "-q", "-file", "test_files/copybuffer.cl", "-spv_only"}; int retVal = ocloc.initialize(argv.size(), argv); ASSERT_EQ(0, retVal); retVal = ocloc.build(); EXPECT_EQ(0, retVal); EXPECT_TRUE(hasSubstr(ocloc.internalOptions, "-ocl-version=300 -cl-ext=-all,+cl_khr_3d_image_writes -D__IMAGE_SUPPORT__=1")); } TEST(OclocCompile, givenDeviceAndInternalOptionsOptionWhenCompilingToSpirvThenInternalOptionsAreSetCorrectly) { MockOfflineCompiler ocloc; std::vector argv = { "ocloc", "-q", "-file", "test_files/copybuffer.cl", "-internal_options", "-cl-ext=+custom_param", "-device", gEnvironment->devicePrefix.c_str(), "-spv_only"}; int retVal = ocloc.initialize(argv.size(), argv); ASSERT_EQ(0, retVal); retVal = ocloc.build(); EXPECT_EQ(0, retVal); std::string regexToMatch = "\\-ocl\\-version=" + std::to_string(ocloc.hwInfo.capabilityTable.clVersionSupport) + "0 \\-cl\\-ext=\\-all.* \\-cl\\-ext=\\+custom_param"; EXPECT_TRUE(containsRegex(ocloc.internalOptions, regexToMatch)); } TEST(OclocCompile, givenNoDeviceAndInternalOptionsOptionWhenCompilingToSpirvThenInternalOptionsAreSetCorrectly) { MockOfflineCompiler ocloc; std::vector argv = { "ocloc", "-q", "-file", "test_files/copybuffer.cl", "-internal_options", "-cl-ext=+custom_param", "-spv_only"}; int retVal = ocloc.initialize(argv.size(), argv); ASSERT_EQ(0, retVal); retVal = ocloc.build(); EXPECT_EQ(0, retVal); EXPECT_TRUE(hasSubstr(ocloc.internalOptions, "-ocl-version=300 -cl-ext=-all,+cl_khr_3d_image_writes -cl-ext=+custom_param")); } TEST(OclocCompile, givenPackedDeviceBinaryFormatWhenGeneratingElfBinaryThenItIsReturnedAsItIs) { MockOfflineCompiler ocloc; ZebinTestData::ValidEmptyProgram zebin; // genBinary is deleted in ocloc's destructor ocloc.genBinary = new char[zebin.storage.size()]; ocloc.genBinarySize = zebin.storage.size(); memcpy_s(ocloc.genBinary, ocloc.genBinarySize, zebin.storage.data(), zebin.storage.size()); ASSERT_EQ(true, ocloc.generateElfBinary()); EXPECT_EQ(0, memcmp(zebin.storage.data(), ocloc.elfBinary.data(), zebin.storage.size())); } TEST(OclocCompile, givenSpirvInputThenDontGenerateSpirvFile) { MockOfflineCompiler ocloc; std::vector argv = { "ocloc", "-q", "-file", "test_files/binary_with_zeroes", "-out_dir", "offline_compiler_test", "-device", gEnvironment->devicePrefix.c_str(), "-spirv_input"}; int retVal = ocloc.initialize(argv.size(), argv); ASSERT_EQ(0, retVal); retVal = ocloc.build(); EXPECT_EQ(0, retVal); EXPECT_TRUE(compilerOutputExists("offline_compiler_test/binary_with_zeroes", "gen")); EXPECT_TRUE(compilerOutputExists("offline_compiler_test/binary_with_zeroes", "bin")); EXPECT_FALSE(compilerOutputExists("offline_compiler_test/binary_with_zeroes", "spv")); } TEST(OfflineCompilerTest, GivenDebugFlagWhenSetStatelessToStatefullBufferOffsetFlagThenStatelessToStatefullOptimizationIsSetCorrectly) { DebugManagerStateRestore stateRestore; MockOfflineCompiler mockOfflineCompiler; { DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.set(0); mockOfflineCompiler.initHardwareInfo(gEnvironment->devicePrefix.c_str()); mockOfflineCompiler.setStatelessToStatefullBufferOffsetFlag(); std::string internalOptions = mockOfflineCompiler.internalOptions; size_t found = internalOptions.find(NEO::CompilerOptions::hasBufferOffsetArg.data()); EXPECT_EQ(std::string::npos, found); } { DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.set(1); mockOfflineCompiler.initHardwareInfo(gEnvironment->devicePrefix.c_str()); mockOfflineCompiler.setStatelessToStatefullBufferOffsetFlag(); std::string internalOptions = mockOfflineCompiler.internalOptions; size_t found = internalOptions.find(NEO::CompilerOptions::hasBufferOffsetArg.data()); EXPECT_NE(std::string::npos, found); } } struct WhiteBoxOclocArgHelper : public OclocArgHelper { using OclocArgHelper::messagePrinter; using OclocArgHelper::OclocArgHelper; }; TEST(OclocArgHelperTest, GivenOutputSuppressMessagesAndSaveItToFile) { uint32_t numOutputs = 0U; uint64_t *lenOutputs = nullptr; uint8_t **outputs = nullptr; char **nameOutputs = nullptr; auto helper = std::unique_ptr(new WhiteBoxOclocArgHelper(0, nullptr, nullptr, nullptr, 0, nullptr, nullptr, nullptr, &numOutputs, &outputs, &lenOutputs, &nameOutputs)); EXPECT_TRUE(helper->messagePrinter.isSuppressed()); ConstStringRef printMsg = "Hello world!"; testing::internal::CaptureStdout(); helper->printf(printMsg.data()); std::string capturedStdout = testing::internal::GetCapturedStdout(); EXPECT_TRUE(capturedStdout.empty()); helper.reset(); // Delete helper. Destructor saves data to output EXPECT_EQ(1U, numOutputs); EXPECT_EQ(printMsg.length(), lenOutputs[0]); EXPECT_STREQ("stdout.log", nameOutputs[0]); std::string stdoutStr = std::string(reinterpret_cast(outputs[0]), static_cast(lenOutputs[0])); EXPECT_STREQ(printMsg.data(), stdoutStr.c_str()); delete[] nameOutputs[0]; delete[] outputs[0]; delete[] nameOutputs; delete[] outputs; delete[] lenOutputs; } TEST(OclocArgHelperTest, GivenValidSourceFileWhenRequestingVectorOfStringsThenLinesAreStored) { const char input[] = "First\nSecond\nThird"; const auto inputLength{sizeof(input)}; const auto filename{"some_file.txt"}; Source source{reinterpret_cast(input), inputLength, filename}; std::vector lines{}; source.toVectorOfStrings(lines); ASSERT_EQ(3u, lines.size()); EXPECT_EQ("First", lines[0]); EXPECT_EQ("Second", lines[1]); EXPECT_EQ("Third", lines[2]); } TEST(OclocArgHelperTest, GivenSourceFileWithTabsWhenRequestingVectorOfStringsWithTabsReplacementThenLinesWithSpacesAreStored) { const char input[] = "First\tWord\nSecond\tWord\nThird\tWord"; const auto inputLength{sizeof(input)}; const auto filename{"some_file.txt"}; Source source{reinterpret_cast(input), inputLength, filename}; constexpr bool replaceTabs{true}; std::vector lines{}; source.toVectorOfStrings(lines, replaceTabs); ASSERT_EQ(3u, lines.size()); EXPECT_EQ("First Word", lines[0]); EXPECT_EQ("Second Word", lines[1]); EXPECT_EQ("Third Word", lines[2]); } TEST(OclocArgHelperTest, GivenSourceFileWithEmptyLinesWhenRequestingVectorOfStringsThenOnlyNonEmptyLinesAreStored) { const char input[] = "First\n\n\nSecond\n"; const auto inputLength{sizeof(input)}; const auto filename{"some_file.txt"}; Source source{reinterpret_cast(input), inputLength, filename}; std::vector lines{}; source.toVectorOfStrings(lines); ASSERT_EQ(2u, lines.size()); EXPECT_EQ("First", lines[0]); EXPECT_EQ("Second", lines[1]); } TEST(OclocArgHelperTest, GivenSourceFileWhenRequestingBinaryVectorThenBinaryIsReturned) { const char input[] = "A file content"; const auto inputLength{sizeof(input)}; const auto filename{"some_file.txt"}; Source source{reinterpret_cast(input), inputLength, filename}; const auto binaryContent = source.toBinaryVector(); ASSERT_EQ(inputLength, binaryContent.size()); ASSERT_TRUE(std::equal(binaryContent.begin(), binaryContent.end(), input)); } TEST(OclocArgHelperTest, GivenNoOutputPrintMessages) { auto helper = WhiteBoxOclocArgHelper(0, nullptr, nullptr, nullptr, 0, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr); EXPECT_FALSE(helper.messagePrinter.isSuppressed()); ConstStringRef printMsg = "Hello world!"; testing::internal::CaptureStdout(); helper.printf(printMsg.data()); std::string capturedStdout = testing::internal::GetCapturedStdout(); EXPECT_STREQ(printMsg.data(), capturedStdout.c_str()); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/offline_compiler_tests.h000066400000000000000000000027621422164147700320610ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/offline_compiler/source/multi_command.h" #include "shared/offline_compiler/source/ocloc_error_code.h" #include "shared/offline_compiler/source/offline_compiler.h" #include "opencl/test/unit_test/offline_compiler/mock/mock_argument_helper.h" #include "gtest/gtest.h" #include #include namespace NEO { class OfflineCompilerTests : public ::testing::Test { public: OfflineCompiler *pOfflineCompiler = nullptr; int retVal = OclocErrorCode::SUCCESS; std::map filesMap; std::unique_ptr oclocArgHelperWithoutInput = std::make_unique(filesMap); protected: void SetUp() override { oclocArgHelperWithoutInput->setAllCallBase(true); } }; class MultiCommandTests : public ::testing::Test { public: void createFileWithArgs(const std::vector &, int numOfBuild); void deleteFileWithArgs(); void deleteOutFileList(); MultiCommand *pMultiCommand = nullptr; std::string nameOfFileWithArgs; std::string outFileList; int retVal = OclocErrorCode::SUCCESS; std::map filesMap; std::unique_ptr oclocArgHelperWithoutInput = std::make_unique(filesMap); protected: void SetUp() override { oclocArgHelperWithoutInput->setAllCallBase(true); } }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/offline_linker_tests.cpp000066400000000000000000000742551422164147700320740ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "offline_linker_tests.h" #include "shared/offline_compiler/source/ocloc_error_code.h" #include "shared/source/device_binary_format/elf/elf_decoder.h" #include "shared/source/device_binary_format/elf/ocl_elf.h" #include "shared/source/helpers/string.h" #include "shared/source/os_interface/os_inc_base.h" #include "shared/test/common/mocks/mock_compilers.h" #include "environment.h" #include #include #include #include #include #include #include #include extern Environment *gEnvironment; namespace NEO { using OperationMode = MockOfflineLinker::OperationMode; void OfflineLinkerTest::SetUp() { MockCompilerDebugVars igcDebugVars{gEnvironment->igcDebugVars}; igcDebugVars.binaryToReturn = binaryToReturn; igcDebugVars.binaryToReturnSize = sizeof(binaryToReturn); setIgcDebugVars(igcDebugVars); } void OfflineLinkerTest::TearDown() { setIgcDebugVars(gEnvironment->igcDebugVars); } std::string OfflineLinkerTest::getEmptySpirvFile() const { std::string spirv{"\x07\x23\x02\x03"}; spirv.resize(64, '\0'); return spirv; } std::string OfflineLinkerTest::getEmptyLlvmBcFile() const { std::string llvmbc{"BC\xc0\xde"}; llvmbc.resize(64, '\0'); return llvmbc; } MockOfflineLinker::InputFileContent OfflineLinkerTest::createFileContent(const std::string &content, IGC::CodeType::CodeType_t codeType) const { std::unique_ptr bytes{new char[content.size()]}; std::copy(content.begin(), content.end(), bytes.get()); return {std::move(bytes), content.size(), codeType}; } TEST_F(OfflineLinkerTest, GivenDefaultConstructedLinkerThenRequiredFieldsHaveDefaultValues) { MockOfflineLinker mockOfflineLinker{&mockArgHelper}; EXPECT_EQ(OperationMode::SKIP_EXECUTION, mockOfflineLinker.operationMode); EXPECT_EQ("linker_output", mockOfflineLinker.outputFilename); EXPECT_EQ(IGC::CodeType::llvmBc, mockOfflineLinker.outputFormat); } TEST_F(OfflineLinkerTest, GivenLessThanTwoArgumentsWhenParsingThenInvalidCommandIsReturned) { const std::vector argv = { "ocloc.exe"}; MockOfflineLinker mockOfflineLinker{&mockArgHelper}; const auto result = mockOfflineLinker.parseCommand(argv.size(), argv); EXPECT_EQ(OclocErrorCode::INVALID_COMMAND_LINE, result); } TEST_F(OfflineLinkerTest, GivenInputFilesArgumentsWhenParsingThenListOfFilenamesIsPopulated) { const std::string firstFile{"sample_input_1.spv"}; const std::string secondFile{"sample_input_2.spv"}; const std::vector argv = { "ocloc.exe", "link", "-file", firstFile, "-file", secondFile}; MockOfflineLinker mockOfflineLinker{&mockArgHelper}; const auto result = mockOfflineLinker.parseCommand(argv.size(), argv); ASSERT_EQ(OclocErrorCode::SUCCESS, result); ASSERT_EQ(2u, mockOfflineLinker.inputFilenames.size()); EXPECT_EQ(firstFile, mockOfflineLinker.inputFilenames[0]); EXPECT_EQ(secondFile, mockOfflineLinker.inputFilenames[1]); } TEST_F(OfflineLinkerTest, GivenOutputFilenameArgumentWhenParsingThenOutputFilenameIsSetAccordingly) { const std::string outputFilename{"my_custom_output_filename"}; const std::vector argv = { "ocloc.exe", "link", "-out", outputFilename}; MockOfflineLinker mockOfflineLinker{&mockArgHelper}; const auto result = mockOfflineLinker.parseCommand(argv.size(), argv); ASSERT_EQ(OclocErrorCode::SUCCESS, result); EXPECT_EQ(outputFilename, mockOfflineLinker.outputFilename); } TEST_F(OfflineLinkerTest, GivenValidOutputFileFormatWhenParsingThenOutputFormatIsSetAccordingly) { const std::vector argv = { "ocloc.exe", "link", "-out_format", "LLVM_BC"}; MockOfflineLinker mockOfflineLinker{&mockArgHelper}; const auto result = mockOfflineLinker.parseCommand(argv.size(), argv); ASSERT_EQ(OclocErrorCode::SUCCESS, result); EXPECT_EQ(IGC::CodeType::llvmBc, mockOfflineLinker.outputFormat); } TEST_F(OfflineLinkerTest, GivenUnknownOutputFileFormatWhenParsingThenInvalidFormatIsSet) { const std::vector argv = { "ocloc.exe", "link", "-out_format", "StrangeFormat"}; MockOfflineLinker mockOfflineLinker{&mockArgHelper}; const auto result = mockOfflineLinker.parseCommand(argv.size(), argv); ASSERT_EQ(OclocErrorCode::SUCCESS, result); EXPECT_EQ(IGC::CodeType::invalid, mockOfflineLinker.outputFormat); } TEST_F(OfflineLinkerTest, GivenOptionsArgumentWhenParsingThenOptionsAreSet) { const std::string options{"-g"}; const std::vector argv = { "ocloc.exe", "link", "-options", options}; MockOfflineLinker mockOfflineLinker{&mockArgHelper}; const auto result = mockOfflineLinker.parseCommand(argv.size(), argv); ASSERT_EQ(OclocErrorCode::SUCCESS, result); EXPECT_EQ(options, mockOfflineLinker.options); } TEST_F(OfflineLinkerTest, GivenInternalOptionsArgumentWhenParsingThenInternalOptionsAreSet) { const std::string internalOptions{"-ze-allow-zebin"}; const std::vector argv = { "ocloc.exe", "link", "-internal_options", internalOptions}; MockOfflineLinker mockOfflineLinker{&mockArgHelper}; const auto result = mockOfflineLinker.parseCommand(argv.size(), argv); ASSERT_EQ(OclocErrorCode::SUCCESS, result); EXPECT_EQ(internalOptions, mockOfflineLinker.internalOptions); } TEST_F(OfflineLinkerTest, GivenHelpArgumentWhenParsingThenShowHelpOperationIsSet) { const std::vector argv = { "ocloc.exe", "link", "--help"}; MockOfflineLinker mockOfflineLinker{&mockArgHelper}; const auto result = mockOfflineLinker.parseCommand(argv.size(), argv); ASSERT_EQ(OclocErrorCode::SUCCESS, result); EXPECT_EQ(OperationMode::SHOW_HELP, mockOfflineLinker.operationMode); } TEST_F(OfflineLinkerTest, GivenUnknownArgumentWhenParsingThenErrorIsReported) { const std::vector argv = { "ocloc.exe", "link", "-some_new_unknown_command"}; ::testing::internal::CaptureStdout(); MockOfflineLinker mockOfflineLinker{&mockArgHelper}; const auto result = mockOfflineLinker.initialize(argv.size(), argv); const auto output{::testing::internal::GetCapturedStdout()}; EXPECT_EQ(OclocErrorCode::INVALID_COMMAND_LINE, result); const std::string expectedErrorMessage{"Invalid option (arg 2): -some_new_unknown_command\n"}; EXPECT_EQ(expectedErrorMessage, output); } TEST_F(OfflineLinkerTest, GivenFlagsWhichRequireMoreArgsWithoutThemWhenParsingThenErrorIsReported) { const std::array flagsToTest = { "-file", "-out", "-out_format", "-options", "-internal_options"}; for (const auto &flag : flagsToTest) { const std::vector argv = { "ocloc.exe", "link", flag}; MockOfflineLinker mockOfflineLinker{&mockArgHelper}; ::testing::internal::CaptureStdout(); const auto result = mockOfflineLinker.parseCommand(argv.size(), argv); const auto output{::testing::internal::GetCapturedStdout()}; EXPECT_EQ(OclocErrorCode::INVALID_COMMAND_LINE, result); const std::string expectedErrorMessage{"Invalid option (arg 2): " + flag + "\n"}; EXPECT_EQ(expectedErrorMessage, output); } } TEST_F(OfflineLinkerTest, GivenCommandWithoutInputFilesWhenVerificationIsPerformedThenErrorIsReturned) { MockOfflineLinker mockOfflineLinker{&mockArgHelper}; mockOfflineLinker.inputFilenames = {}; ::testing::internal::CaptureStdout(); const auto verificationResult = mockOfflineLinker.verifyLinkerCommand(); const auto output{::testing::internal::GetCapturedStdout()}; EXPECT_EQ(OclocErrorCode::INVALID_COMMAND_LINE, verificationResult); const std::string expectedErrorMessage{"Error: Input name is missing! At least one input file is required!\n"}; EXPECT_EQ(expectedErrorMessage, output); } TEST_F(OfflineLinkerTest, GivenCommandWithEmptyFilenameWhenVerificationIsPerformedThenErrorIsReturned) { const std::vector argv = { "ocloc.exe", "link", "-file", ""}; MockOfflineLinker mockOfflineLinker{&mockArgHelper}; ::testing::internal::CaptureStdout(); const auto verificationResult = mockOfflineLinker.initialize(argv.size(), argv); const auto output{::testing::internal::GetCapturedStdout()}; EXPECT_EQ(OclocErrorCode::INVALID_COMMAND_LINE, verificationResult); const std::string expectedErrorMessage{"Error: Empty filename cannot be used!\n"}; EXPECT_EQ(expectedErrorMessage, output); } TEST_F(OfflineLinkerTest, GivenCommandWithNonexistentInputFileWhenVerificationIsPerformedThenErrorIsReturned) { const std::vector argv = { "ocloc.exe", "link", "-file", "some_file1.spv", "-file", "some_file2.spv"}; MockOfflineLinker mockOfflineLinker{&mockArgHelper}; const auto parsingResult = mockOfflineLinker.parseCommand(argv.size(), argv); ASSERT_EQ(OclocErrorCode::SUCCESS, parsingResult); ::testing::internal::CaptureStdout(); const auto verificationResult = mockOfflineLinker.verifyLinkerCommand(); const auto output{::testing::internal::GetCapturedStdout()}; EXPECT_EQ(OclocErrorCode::INVALID_FILE, verificationResult); const std::string expectedErrorMessage{"Error: Input file some_file1.spv missing.\n"}; EXPECT_EQ(expectedErrorMessage, output); } TEST_F(OfflineLinkerTest, GivenCommandWithInvalidOutputFormatWhenVerificationIsPerformedThenErrorIsReturned) { mockArgHelperFilesMap["some_file.spv"] = getEmptySpirvFile(); const std::vector argv = { "ocloc.exe", "link", "-file", "some_file.spv", "-out_format", "SomeDummyUnknownFormat"}; MockOfflineLinker mockOfflineLinker{&mockArgHelper}; const auto parsingResult = mockOfflineLinker.parseCommand(argv.size(), argv); ASSERT_EQ(OclocErrorCode::SUCCESS, parsingResult); ::testing::internal::CaptureStdout(); const auto verificationResult = mockOfflineLinker.verifyLinkerCommand(); const auto output{::testing::internal::GetCapturedStdout()}; EXPECT_EQ(OclocErrorCode::INVALID_COMMAND_LINE, verificationResult); const std::string expectedErrorMessage{"Error: Invalid output type!\n"}; EXPECT_EQ(expectedErrorMessage, output); } TEST_F(OfflineLinkerTest, GivenValidCommandWhenVerificationIsPerformedThenSuccessIsReturned) { mockArgHelperFilesMap["some_file1.spv"] = getEmptySpirvFile(); mockArgHelperFilesMap["some_file2.spv"] = getEmptySpirvFile(); const std::vector argv = { "ocloc.exe", "link", "-file", "some_file1.spv", "-file", "some_file2.spv"}; MockOfflineLinker mockOfflineLinker{&mockArgHelper}; const auto parsingResult = mockOfflineLinker.parseCommand(argv.size(), argv); ASSERT_EQ(OclocErrorCode::SUCCESS, parsingResult); ::testing::internal::CaptureStdout(); const auto verificationResult = mockOfflineLinker.verifyLinkerCommand(); const auto output{::testing::internal::GetCapturedStdout()}; EXPECT_EQ(OclocErrorCode::SUCCESS, verificationResult); EXPECT_TRUE(output.empty()); } TEST_F(OfflineLinkerTest, GivenEmptyFileWhenLoadingInputFilesThenErrorIsReturned) { const std::string filename{"some_file.spv"}; mockArgHelperFilesMap[filename] = ""; mockArgHelper.shouldLoadDataFromFileReturnZeroSize = true; const std::vector argv = { "ocloc.exe", "link", "-file", filename}; MockOfflineLinker mockOfflineLinker{&mockArgHelper}; ::testing::internal::CaptureStdout(); const auto readingResult = mockOfflineLinker.initialize(argv.size(), argv); const auto output{::testing::internal::GetCapturedStdout()}; ASSERT_EQ(OclocErrorCode::INVALID_FILE, readingResult); const std::string expectedErrorMessage{"Error: Cannot read input file: some_file.spv\n"}; EXPECT_EQ(expectedErrorMessage, output); } TEST_F(OfflineLinkerTest, GivenValidFileWithUnknownFormatWhenLoadingInputFilesThenErrorIsReturned) { const std::string filename{"some_file.unknown"}; // Spir-V or LLVM-BC magic constants are required. This should be treated as error. mockArgHelperFilesMap[filename] = "Some unknown format!"; MockOfflineLinker mockOfflineLinker{&mockArgHelper}; mockOfflineLinker.inputFilenames.push_back(filename); ::testing::internal::CaptureStdout(); const auto readingResult = mockOfflineLinker.loadInputFilesContent(); const auto output{::testing::internal::GetCapturedStdout()}; ASSERT_EQ(OclocErrorCode::INVALID_PROGRAM, readingResult); const std::string expectedErrorMessage{"Error: Unsupported format of input file: some_file.unknown\n"}; EXPECT_EQ(expectedErrorMessage, output); } TEST_F(OfflineLinkerTest, GivenValidFilesWithValidFormatsWhenLoadingInputFilesThenFilesAreLoadedAndSuccessIsReturned) { const std::string firstFilename{"some_file1.spv"}; const std::string secondFilename{"some_file2.llvmbc"}; mockArgHelperFilesMap[firstFilename] = getEmptySpirvFile(); mockArgHelperFilesMap[secondFilename] = getEmptyLlvmBcFile(); MockOfflineLinker mockOfflineLinker{&mockArgHelper}; mockOfflineLinker.inputFilenames.push_back(firstFilename); mockOfflineLinker.inputFilenames.push_back(secondFilename); ::testing::internal::CaptureStdout(); const auto readingResult = mockOfflineLinker.loadInputFilesContent(); const auto output{::testing::internal::GetCapturedStdout()}; ASSERT_EQ(OclocErrorCode::SUCCESS, readingResult); EXPECT_TRUE(output.empty()); const auto &firstExpectedContent = mockArgHelperFilesMap[firstFilename]; const auto &firstActualContent = mockOfflineLinker.inputFilesContent[0]; ASSERT_EQ(firstExpectedContent.size() + 1, firstActualContent.size); const auto isFirstPairEqual = std::equal(firstExpectedContent.begin(), firstExpectedContent.end(), firstActualContent.bytes.get()); EXPECT_TRUE(isFirstPairEqual); const auto &secondExpectedContent = mockArgHelperFilesMap[secondFilename]; const auto &secondActualContent = mockOfflineLinker.inputFilesContent[1]; ASSERT_EQ(secondExpectedContent.size() + 1, secondActualContent.size); const auto isSecondPairEqual = std::equal(secondExpectedContent.begin(), secondExpectedContent.end(), secondActualContent.bytes.get()); EXPECT_TRUE(isSecondPairEqual); } TEST_F(OfflineLinkerTest, GivenValidFilesWhenInitializationIsSuccessfulThenLinkModeOfOperationIsSet) { const std::string firstFilename{"some_file1.spv"}; const std::string secondFilename{"some_file2.llvmbc"}; mockArgHelperFilesMap[firstFilename] = getEmptySpirvFile(); mockArgHelperFilesMap[secondFilename] = getEmptyLlvmBcFile(); const std::vector argv = { "ocloc.exe", "link", "-file", firstFilename, "-file", secondFilename}; MockOfflineLinker mockOfflineLinker{&mockArgHelper}; ::testing::internal::CaptureStdout(); const auto readingResult = mockOfflineLinker.initialize(argv.size(), argv); const auto output{::testing::internal::GetCapturedStdout()}; EXPECT_EQ(OclocErrorCode::SUCCESS, readingResult); EXPECT_TRUE(output.empty()); EXPECT_EQ(OperationMode::LINK_FILES, mockOfflineLinker.operationMode); } TEST_F(OfflineLinkerTest, GivenSPIRVandLLVMBCFilesWhenElfOutputIsRequestedThenElfWithSPIRVAndLLVMSectionsIsCreated) { auto spirvFileContent = createFileContent(getEmptySpirvFile(), IGC::CodeType::spirV); auto llvmbcFileContent = createFileContent(getEmptyLlvmBcFile(), IGC::CodeType::llvmBc); mockArgHelper.interceptOutput = true; MockOfflineLinker mockOfflineLinker{&mockArgHelper}; mockOfflineLinker.inputFilesContent.emplace_back(std::move(spirvFileContent.bytes), spirvFileContent.size, spirvFileContent.codeType); mockOfflineLinker.inputFilesContent.emplace_back(std::move(llvmbcFileContent.bytes), llvmbcFileContent.size, llvmbcFileContent.codeType); mockOfflineLinker.outputFormat = IGC::CodeType::elf; mockOfflineLinker.operationMode = OperationMode::LINK_FILES; const auto linkingResult{mockOfflineLinker.execute()}; ASSERT_EQ(OclocErrorCode::SUCCESS, linkingResult); ASSERT_EQ(1u, mockArgHelper.interceptedFiles.count("linker_output")); const auto &rawOutput{mockArgHelper.interceptedFiles.at("linker_output")}; const auto encodedElf{ArrayRef::fromAny(rawOutput.data(), rawOutput.size())}; std::string errorReason{}; std::string warning{}; const auto elf{Elf::decodeElf(encodedElf, errorReason, warning)}; ASSERT_TRUE(errorReason.empty()); EXPECT_TRUE(warning.empty()); // SPIR-V bitcode section. EXPECT_EQ(Elf::SHT_OPENCL_SPIRV, elf.sectionHeaders[1].header->type); const auto &expectedFirstSection = mockOfflineLinker.inputFilesContent[0]; const auto &actualFirstSection = elf.sectionHeaders[1]; ASSERT_EQ(expectedFirstSection.size, actualFirstSection.header->size); const auto isFirstSectionContentEqual = std::memcmp(actualFirstSection.data.begin(), expectedFirstSection.bytes.get(), expectedFirstSection.size) == 0; EXPECT_TRUE(isFirstSectionContentEqual); // LLVM bitcode section. EXPECT_EQ(Elf::SHT_OPENCL_LLVM_BINARY, elf.sectionHeaders[2].header->type); const auto &expectedSecondSection = mockOfflineLinker.inputFilesContent[1]; const auto &actualSecondSection = elf.sectionHeaders[2]; ASSERT_EQ(expectedSecondSection.size, actualSecondSection.header->size); const auto isSecondSectionContentEqual = std::memcmp(actualSecondSection.data.begin(), expectedSecondSection.bytes.get(), expectedSecondSection.size) == 0; EXPECT_TRUE(isSecondSectionContentEqual); } TEST_F(OfflineLinkerTest, GivenValidInputFileContentsWhenLlvmBcOutputIsRequestedThenSuccessIsReturnedAndFileIsWritten) { auto spirvFileContent = createFileContent(getEmptySpirvFile(), IGC::CodeType::spirV); auto llvmbcFileContent = createFileContent(getEmptyLlvmBcFile(), IGC::CodeType::llvmBc); mockArgHelper.interceptOutput = true; MockOfflineLinker mockOfflineLinker{&mockArgHelper}; mockOfflineLinker.inputFilesContent.emplace_back(std::move(spirvFileContent.bytes), spirvFileContent.size, spirvFileContent.codeType); mockOfflineLinker.inputFilesContent.emplace_back(std::move(llvmbcFileContent.bytes), llvmbcFileContent.size, llvmbcFileContent.codeType); mockOfflineLinker.outputFormat = IGC::CodeType::llvmBc; mockOfflineLinker.operationMode = OperationMode::LINK_FILES; const auto igcInitializationResult{mockOfflineLinker.prepareIgc()}; ASSERT_EQ(OclocErrorCode::SUCCESS, igcInitializationResult); const auto linkingResult{mockOfflineLinker.execute()}; ASSERT_EQ(OclocErrorCode::SUCCESS, linkingResult); ASSERT_EQ(1u, mockArgHelper.interceptedFiles.count("linker_output")); const auto &actualOutput{mockArgHelper.interceptedFiles.at("linker_output")}; const auto &expectedOutput{binaryToReturn}; ASSERT_EQ(sizeof(expectedOutput), actualOutput.size()); const auto isActualOutputSameAsExpected{std::equal(std::begin(expectedOutput), std::end(expectedOutput), std::begin(actualOutput))}; EXPECT_TRUE(isActualOutputSameAsExpected); } TEST_F(OfflineLinkerTest, GivenValidInputFileContentsAndFailingIGCWhenLlvmBcOutputIsRequestedThenErrorIsReturned) { MockCompilerDebugVars igcDebugVars{gEnvironment->igcDebugVars}; igcDebugVars.forceBuildFailure = true; setIgcDebugVars(igcDebugVars); auto spirvFileContent = createFileContent(getEmptySpirvFile(), IGC::CodeType::spirV); auto llvmbcFileContent = createFileContent(getEmptyLlvmBcFile(), IGC::CodeType::llvmBc); mockArgHelper.interceptOutput = true; MockOfflineLinker mockOfflineLinker{&mockArgHelper}; mockOfflineLinker.inputFilesContent.emplace_back(std::move(spirvFileContent.bytes), spirvFileContent.size, spirvFileContent.codeType); mockOfflineLinker.inputFilesContent.emplace_back(std::move(llvmbcFileContent.bytes), llvmbcFileContent.size, llvmbcFileContent.codeType); mockOfflineLinker.outputFormat = IGC::CodeType::llvmBc; mockOfflineLinker.operationMode = OperationMode::LINK_FILES; const auto igcInitializationResult{mockOfflineLinker.prepareIgc()}; ASSERT_EQ(OclocErrorCode::SUCCESS, igcInitializationResult); ::testing::internal::CaptureStdout(); const auto linkingResult{mockOfflineLinker.execute()}; const auto output{::testing::internal::GetCapturedStdout()}; ASSERT_EQ(OclocErrorCode::BUILD_PROGRAM_FAILURE, linkingResult); EXPECT_EQ(0u, mockArgHelper.interceptedFiles.count("linker_output")); const std::string expectedErrorMessage{"Error: Translation has failed! IGC returned empty output.\n"}; EXPECT_EQ(expectedErrorMessage, output); } TEST_F(OfflineLinkerTest, GivenValidInputFileContentsAndInvalidTranslationOutputWhenLlvmBcOutputIsRequestedThenErrorIsReturned) { MockCompilerDebugVars igcDebugVars{gEnvironment->igcDebugVars}; igcDebugVars.shouldReturnInvalidTranslationOutput = true; setIgcDebugVars(igcDebugVars); auto spirvFileContent = createFileContent(getEmptySpirvFile(), IGC::CodeType::spirV); auto llvmbcFileContent = createFileContent(getEmptyLlvmBcFile(), IGC::CodeType::llvmBc); mockArgHelper.interceptOutput = true; MockOfflineLinker mockOfflineLinker{&mockArgHelper}; mockOfflineLinker.inputFilesContent.emplace_back(std::move(spirvFileContent.bytes), spirvFileContent.size, spirvFileContent.codeType); mockOfflineLinker.inputFilesContent.emplace_back(std::move(llvmbcFileContent.bytes), llvmbcFileContent.size, llvmbcFileContent.codeType); mockOfflineLinker.outputFormat = IGC::CodeType::llvmBc; mockOfflineLinker.operationMode = OperationMode::LINK_FILES; const auto igcInitializationResult{mockOfflineLinker.prepareIgc()}; ASSERT_EQ(OclocErrorCode::SUCCESS, igcInitializationResult); ::testing::internal::CaptureStdout(); const auto linkingResult{mockOfflineLinker.execute()}; const auto output{::testing::internal::GetCapturedStdout()}; ASSERT_EQ(OclocErrorCode::OUT_OF_HOST_MEMORY, linkingResult); EXPECT_EQ(0u, mockArgHelper.interceptedFiles.count("linker_output")); const std::string expectedErrorMessage{"Error: Translation has failed! IGC output is nullptr!\n"}; EXPECT_EQ(expectedErrorMessage, output); } TEST_F(OfflineLinkerTest, GivenUninitializedLinkerWhenExecuteIsInvokedThenErrorIsIssued) { MockOfflineLinker mockOfflineLinker{&mockArgHelper}; ::testing::internal::CaptureStdout(); const auto executionResult{mockOfflineLinker.execute()}; const auto output{::testing::internal::GetCapturedStdout()}; ASSERT_EQ(OclocErrorCode::INVALID_COMMAND_LINE, executionResult); ASSERT_FALSE(output.empty()); } TEST_F(OfflineLinkerTest, GivenHelpRequestWhenExecuteIsInvokedThenHelpIsPrinted) { MockOfflineLinker mockOfflineLinker{&mockArgHelper}; mockOfflineLinker.operationMode = OperationMode::SHOW_HELP; ::testing::internal::CaptureStdout(); const auto executionResult{mockOfflineLinker.execute()}; const auto output{::testing::internal::GetCapturedStdout()}; ASSERT_EQ(OclocErrorCode::SUCCESS, executionResult); ASSERT_FALSE(output.empty()); } TEST_F(OfflineLinkerTest, GivenInvalidOperationModeWhenExecuteIsInvokedThenErrorIsIssued) { MockOfflineLinker mockOfflineLinker{&mockArgHelper}; mockOfflineLinker.operationMode = static_cast(7); ::testing::internal::CaptureStdout(); const auto executionResult{mockOfflineLinker.execute()}; const auto output{::testing::internal::GetCapturedStdout()}; ASSERT_EQ(OclocErrorCode::INVALID_COMMAND_LINE, executionResult); ASSERT_FALSE(output.empty()); } TEST_F(OfflineLinkerTest, GivenUninitializedHwInfoWhenInitIsCalledThenHwInfoIsInitialized) { MockOfflineLinker mockOfflineLinker{&mockArgHelper}; ASSERT_EQ(IGFX_UNKNOWN, mockOfflineLinker.hwInfo.platform.eProductFamily); const auto hwInfoInitializationResult{mockOfflineLinker.initHardwareInfo()}; ASSERT_EQ(OclocErrorCode::SUCCESS, hwInfoInitializationResult); EXPECT_NE(IGFX_UNKNOWN, mockOfflineLinker.hwInfo.platform.eProductFamily); } TEST_F(OfflineLinkerTest, GivenEmptyHwInfoTableWhenInitializationIsPerformedThenItFailsOnHwInit) { const std::string firstFilename{"some_file1.spv"}; const std::string secondFilename{"some_file2.llvmbc"}; mockArgHelperFilesMap[firstFilename] = getEmptySpirvFile(); mockArgHelperFilesMap[secondFilename] = getEmptyLlvmBcFile(); const std::vector argv = { "ocloc.exe", "link", "-file", firstFilename, "-file", secondFilename}; MockOfflineLinker mockOfflineLinker{&mockArgHelper}; mockOfflineLinker.shouldReturnEmptyHardwareInfoTable = true; ::testing::internal::CaptureStdout(); const auto readingResult = mockOfflineLinker.initialize(argv.size(), argv); const auto output{::testing::internal::GetCapturedStdout()}; EXPECT_EQ(OclocErrorCode::INVALID_DEVICE, readingResult); const std::string expectedErrorMessage{"Error! Cannot retrieve any valid hardware information!\n"}; EXPECT_EQ(expectedErrorMessage, output); } TEST_F(OfflineLinkerTest, GivenMissingIgcLibraryWhenInitializationIsPerformedThenItFailsOnIgcPreparation) { const std::string firstFilename{"some_file1.spv"}; const std::string secondFilename{"some_file2.llvmbc"}; mockArgHelperFilesMap[firstFilename] = getEmptySpirvFile(); mockArgHelperFilesMap[secondFilename] = getEmptyLlvmBcFile(); const std::vector argv = { "ocloc.exe", "link", "-file", firstFilename, "-file", secondFilename}; MockOfflineLinker mockOfflineLinker{&mockArgHelper}; mockOfflineLinker.shouldFailLoadingOfIgcLib = true; ::testing::internal::CaptureStdout(); const auto readingResult = mockOfflineLinker.initialize(argv.size(), argv); const auto output{::testing::internal::GetCapturedStdout()}; EXPECT_EQ(OclocErrorCode::OUT_OF_HOST_MEMORY, readingResult); std::stringstream expectedErrorMessage; expectedErrorMessage << "Error! Loading of IGC library has failed! Filename: " << Os::igcDllName << "\n"; EXPECT_EQ(expectedErrorMessage.str(), output); } TEST_F(OfflineLinkerTest, GivenOfflineLinkerWhenStoringValidBuildLogThenItIsSaved) { const std::string someValidLog{"Warning: This is a build log!"}; MockOfflineLinker mockOfflineLinker{&mockArgHelper}; mockOfflineLinker.tryToStoreBuildLog(someValidLog.data(), someValidLog.size()); EXPECT_EQ(someValidLog, mockOfflineLinker.getBuildLog()); } TEST_F(OfflineLinkerTest, GivenOfflineLinkerWhenStoringInvalidBuildLogThenItIsIgnored) { MockOfflineLinker mockOfflineLinker{&mockArgHelper}; mockOfflineLinker.tryToStoreBuildLog(nullptr, 0); const auto buildLog{mockOfflineLinker.getBuildLog()}; EXPECT_TRUE(buildLog.empty()); // Invalid size has been passed. const char *log{"Info: This is a log!"}; mockOfflineLinker.tryToStoreBuildLog(log, 0); const auto buildLog2{mockOfflineLinker.getBuildLog()}; EXPECT_TRUE(buildLog2.empty()); } TEST_F(OfflineLinkerTest, GivenFailingLoadingOfIgcSymbolsWhenPreparingIgcThenFailureIsReported) { MockOfflineLinker mockOfflineLinker{&mockArgHelper}; mockOfflineLinker.shouldFailLoadingOfIgcCreateMainFunction = true; ::testing::internal::CaptureStdout(); const auto igcPreparationResult{mockOfflineLinker.prepareIgc()}; const auto output{::testing::internal::GetCapturedStdout()}; EXPECT_EQ(OclocErrorCode::OUT_OF_HOST_MEMORY, igcPreparationResult); const std::string expectedErrorMessage{"Error! Cannot load required functions from IGC library.\n"}; EXPECT_EQ(expectedErrorMessage, output); } TEST_F(OfflineLinkerTest, GivenFailingCreationOfIgcMainWhenPreparingIgcThenFailureIsReported) { MockOfflineLinker mockOfflineLinker{&mockArgHelper}; mockOfflineLinker.shouldFailCreationOfIgcMain = true; ::testing::internal::CaptureStdout(); const auto igcPreparationResult{mockOfflineLinker.prepareIgc()}; const auto output{::testing::internal::GetCapturedStdout()}; EXPECT_EQ(OclocErrorCode::OUT_OF_HOST_MEMORY, igcPreparationResult); const std::string expectedErrorMessage{"Error! Cannot create IGC main component!\n"}; EXPECT_EQ(expectedErrorMessage, output); } TEST_F(OfflineLinkerTest, GivenFailingCreationOfIgcDeviceContextWhenPreparingIgcThenFailureIsReported) { MockOfflineLinker mockOfflineLinker{&mockArgHelper}; mockOfflineLinker.shouldFailCreationOfIgcDeviceContext = true; ::testing::internal::CaptureStdout(); const auto igcPreparationResult{mockOfflineLinker.prepareIgc()}; const auto output{::testing::internal::GetCapturedStdout()}; EXPECT_EQ(OclocErrorCode::OUT_OF_HOST_MEMORY, igcPreparationResult); const std::string expectedErrorMessage{"Error! Cannot create IGC device context!\n"}; EXPECT_EQ(expectedErrorMessage, output); } TEST_F(OfflineLinkerTest, GivenInvalidIgcPlatformHandleWhenPreparingIgcThenFailureIsReported) { MockOfflineLinker mockOfflineLinker{&mockArgHelper}; mockOfflineLinker.shouldReturnInvalidIgcPlatformHandle = true; ::testing::internal::CaptureStdout(); const auto igcPreparationResult{mockOfflineLinker.prepareIgc()}; const auto output{::testing::internal::GetCapturedStdout()}; EXPECT_EQ(OclocErrorCode::OUT_OF_HOST_MEMORY, igcPreparationResult); const std::string expectedErrorMessage{"Error! IGC device context has not been properly created!\n"}; EXPECT_EQ(expectedErrorMessage, output); } TEST_F(OfflineLinkerTest, GivenInvalidIgcGTSystemInfoHandleWhenPreparingIgcThenFailureIsReported) { MockOfflineLinker mockOfflineLinker{&mockArgHelper}; mockOfflineLinker.shouldReturnInvalidGTSystemInfoHandle = true; ::testing::internal::CaptureStdout(); const auto igcPreparationResult{mockOfflineLinker.prepareIgc()}; const auto output{::testing::internal::GetCapturedStdout()}; EXPECT_EQ(OclocErrorCode::OUT_OF_HOST_MEMORY, igcPreparationResult); const std::string expectedErrorMessage{"Error! IGC device context has not been properly created!\n"}; EXPECT_EQ(expectedErrorMessage, output); } } // namespace NEOcompute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/offline_linker_tests.h000066400000000000000000000013721422164147700315270ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "gtest/gtest.h" #include "mock/mock_argument_helper.h" #include "mock/mock_offline_linker.h" namespace NEO { class OfflineLinkerTest : public ::testing::Test { public: void SetUp() override; void TearDown() override; std::string getEmptySpirvFile() const; std::string getEmptyLlvmBcFile() const; MockOfflineLinker::InputFileContent createFileContent(const std::string &content, IGC::CodeType::CodeType_t codeType) const; protected: MockOclocArgHelper::FilesMap mockArgHelperFilesMap{}; MockOclocArgHelper mockArgHelper{mockArgHelperFilesMap}; char binaryToReturn[8]{7, 7, 7, 7, 0, 1, 2, 3}; }; } // namespace NEOcompute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/segfault_test/000077500000000000000000000000001422164147700300145ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/segfault_test/CMakeLists.txt000066400000000000000000000056221422164147700325610ustar00rootroot00000000000000# # Copyright (C) 2018-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(CLOC_SEGFAULT_TEST_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/segfault_helper.h ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp ${NEO_SHARED_DIRECTORY}/helpers/abort.cpp ${NEO_SHARED_DIRECTORY}/os_interface/os_library.h ${NEO_SHARED_TEST_DIRECTORY}/common/libult/signal_utils.h ${NEO_SHARED_TEST_DIRECTORY}/unit_test/helpers/debug_helpers.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/offline_compiler/ocloc_tests_configuration.cpp ) if(WIN32) list(APPEND CLOC_SEGFAULT_TEST_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/windows/safety_guard_caller_windows.cpp ${NEO_SHARED_DIRECTORY}/os_interface/windows/os_library_win.cpp ${NEO_SHARED_DIRECTORY}/os_interface/windows/os_library_win.h ${NEO_SHARED_TEST_DIRECTORY}/common/os_interface/windows/signal_utils.cpp ${OCLOC_DIRECTORY}/source/utilities/windows/safety_guard_windows.h ${OCLOC_DIRECTORY}/source/utilities/windows/seh_exception.cpp ${OCLOC_DIRECTORY}/source/utilities/windows/seh_exception.h ) else() list(APPEND CLOC_SEGFAULT_TEST_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/linux/safety_guard_caller_linux.cpp ${NEO_SHARED_DIRECTORY}/os_interface/linux/os_library_linux.cpp ${NEO_SHARED_DIRECTORY}/os_interface/linux/os_library_linux.h ${NEO_SHARED_DIRECTORY}/os_interface/linux/sys_calls_linux.cpp ${NEO_SHARED_TEST_DIRECTORY}/common/os_interface/linux/signal_utils.cpp ${OCLOC_DIRECTORY}/source/linux/os_library_ocloc_helper.cpp ) endif() add_executable(ocloc_segfault_test ${CLOC_SEGFAULT_TEST_SOURCES}) target_link_libraries(ocloc_segfault_test gmock-gtest) if(MSVC) target_compile_options(ocloc_segfault_test PRIVATE /Zi) set_property(TARGET ocloc_segfault_test APPEND PROPERTY LINK_FLAGS /DEBUG) target_link_libraries(ocloc_segfault_test dbghelp) endif() if(UNIX) target_link_libraries(ocloc_segfault_test dl pthread) endif() set(CLOC_SEGFAULT_TEST_INCLUDES ${NEO_SOURCE_DIR} ${THIRD_PARTY_DIR} ) if(CMAKE_COMPILER_IS_GNUCC) target_compile_definitions(ocloc_segfault_test PRIVATE SKIP_SEGFAULT_TEST=1) endif() get_property(CLOC_FOLDER TARGET ocloc PROPERTY FOLDER) set_property(TARGET ocloc_segfault_test PROPERTY FOLDER ${CLOC_FOLDER}) target_include_directories(ocloc_segfault_test BEFORE PRIVATE ${CLOC_SEGFAULT_TEST_INCLUDES}) create_project_source_tree(ocloc_segfault_test) add_custom_target(run_ocloc_segfault_test ALL DEPENDS ocloc_segfault_test ) if(NOT USE_TSAN) add_custom_command( TARGET run_ocloc_segfault_test POST_BUILD COMMAND echo Running ocloc_segfault_test COMMAND ocloc_segfault_test WORKING_DIRECTORY ${TargetDir} ) endif() set_property(TARGET run_ocloc_segfault_test PROPERTY FOLDER ${CLOC_FOLDER}) compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/segfault_test/linux/000077500000000000000000000000001422164147700311535ustar00rootroot00000000000000safety_guard_caller_linux.cpp000066400000000000000000000010541422164147700370160ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/segfault_test/linux/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/offline_compiler/source/utilities/linux/safety_guard_linux.h" #include "../segfault_helper.h" int generateSegfaultWithSafetyGuard(SegfaultHelper *segfaultHelper) { SafetyGuardLinux safetyGuard; safetyGuard.onSigSegv = segfaultHelper->segfaultHandlerCallback; int retVal = -60; return safetyGuard.call(segfaultHelper, &SegfaultHelper::generateSegfault, retVal); } compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/segfault_test/main.cpp000066400000000000000000000026451422164147700314530ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/libult/signal_utils.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "gtest/gtest.h" #include "segfault_helper.h" #include extern int generateSegfaultWithSafetyGuard(SegfaultHelper *segfaultHelper); int main(int argc, char **argv) { int retVal = 0; bool enableAlarm = true; ::testing::InitGoogleTest(&argc, argv); for (int i = 1; i < argc; ++i) { if (!strcmp("--disable_alarm", argv[i])) { enableAlarm = false; } } int sigOut = setAlarm(enableAlarm); if (sigOut != 0) return sigOut; retVal = RUN_ALL_TESTS(); return retVal; } void captureAndCheckStdOut() { std::string callstack = ::testing::internal::GetCapturedStdout(); EXPECT_TRUE(hasSubstr(callstack, std::string("Callstack"))); EXPECT_TRUE(hasSubstr(callstack, std::string("cloc_segfault_test"))); EXPECT_TRUE(hasSubstr(callstack, std::string("generateSegfaultWithSafetyGuard"))); } TEST(SegFault, givenCallWithSafetyGuardWhenSegfaultHappensThenCallstackIsPrintedToStdOut) { #if !defined(SKIP_SEGFAULT_TEST) ::testing::internal::CaptureStdout(); SegfaultHelper segfault; segfault.segfaultHandlerCallback = captureAndCheckStdOut; auto retVal = generateSegfaultWithSafetyGuard(&segfault); EXPECT_EQ(-60, retVal); #endif } compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/segfault_test/segfault_helper.h000066400000000000000000000011131422164147700333320ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #if defined(__clang__) #define NO_SANITIZE __attribute__((no_sanitize("address", "undefined"))) #elif defined(__GNUC__) #define NO_SANITIZE __attribute__((no_sanitize_address)) #else #define NO_SANITIZE #endif class SegfaultHelper { public: int NO_SANITIZE generateSegfault() { int *pointer = reinterpret_cast(0); *pointer = 0; return 0; } typedef void (*callbackFunction)(); callbackFunction segfaultHandlerCallback = nullptr; }; compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/segfault_test/windows/000077500000000000000000000000001422164147700315065ustar00rootroot00000000000000safety_guard_caller_windows.cpp000066400000000000000000000011431422164147700377030ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/segfault_test/windows/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/offline_compiler/source/utilities/windows/safety_guard_windows.h" #include "opencl/test/unit_test/offline_compiler/segfault_test/segfault_helper.h" int generateSegfaultWithSafetyGuard(SegfaultHelper *segfaultHelper) { SafetyGuardWindows safetyGuard; safetyGuard.onExcept = segfaultHelper->segfaultHandlerCallback; int retVal = -60; return safetyGuard.call(segfaultHelper, &SegfaultHelper::generateSegfault, retVal); } compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/xe_hpc_core/000077500000000000000000000000001422164147700274215ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/xe_hpc_core/CMakeLists.txt000066400000000000000000000005041422164147700321600ustar00rootroot00000000000000# # Copyright (C) 2022 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_XE_HPC_CORE) set(IGDRCL_SRCS_offline_compiler_tests_xe_hpc_core ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) target_sources(ocloc_tests PRIVATE ${IGDRCL_SRCS_offline_compiler_tests_xe_hpc_core}) add_subdirectories() endif() compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/xe_hpc_core/pvc/000077500000000000000000000000001422164147700302115ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/xe_hpc_core/pvc/CMakeLists.txt000066400000000000000000000005271422164147700327550ustar00rootroot00000000000000# # Copyright (C) 2022 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_PVC) set(IGDRCL_SRCS_offline_compiler_tests_pvc ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/offline_compiler_tests_pvc.cpp ) target_sources(ocloc_tests PRIVATE ${IGDRCL_SRCS_offline_compiler_tests_pvc}) endif() offline_compiler_tests_pvc.cpp000066400000000000000000000007761422164147700362560ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/xe_hpc_core/pvc/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/offline_compiler/ocloc_product_config_tests.h" namespace NEO { static PRODUCT_CONFIG pvcProductConfig[] = { PVC_XL_A0, PVC_XL_B0, PVC_XT_A0, PVC_XT_B0}; INSTANTIATE_TEST_CASE_P( OclocProductConfigPvcTestsValues, OclocProductConfigTests, ::testing::Combine( ::testing::ValuesIn(pvcProductConfig), ::testing::Values(IGFX_PVC))); } // namespace NEOcompute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/xe_hpg_core/000077500000000000000000000000001422164147700274255ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/xe_hpg_core/CMakeLists.txt000066400000000000000000000005041422164147700321640ustar00rootroot00000000000000# # Copyright (C) 2022 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_XE_HPG_CORE) set(IGDRCL_SRCS_offline_compiler_tests_xe_hpg_core ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) target_sources(ocloc_tests PRIVATE ${IGDRCL_SRCS_offline_compiler_tests_xe_hpg_core}) add_subdirectories() endif() compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/xe_hpg_core/dg2/000077500000000000000000000000001422164147700301015ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/xe_hpg_core/dg2/CMakeLists.txt000066400000000000000000000005551422164147700326460ustar00rootroot00000000000000# # Copyright (C) 2022 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_DG2) set(IGDRCL_SRCS_offline_compiler_tests_dg2 ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/offline_compiler_tests_dg2.cpp ) target_sources(ocloc_tests PRIVATE ${IGDRCL_SRCS_offline_compiler_tests_dg2}) endif() add_subdirectories() offline_compiler_tests_dg2.cpp000066400000000000000000000007571422164147700360310ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/offline_compiler/xe_hpg_core/dg2/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/offline_compiler/ocloc_product_config_tests.h" namespace NEO { static PRODUCT_CONFIG dg2ProductConfig[] = { DG2_G10_A0, DG2_G11, DG2_G10_B0}; INSTANTIATE_TEST_CASE_P( OclocProductConfigDg2TestsValues, OclocProductConfigTests, ::testing::Combine( ::testing::ValuesIn(dg2ProductConfig), ::testing::Values(IGFX_DG2))); } // namespace NEOcompute-runtime-22.14.22890/opencl/test/unit_test/os_interface/000077500000000000000000000000001422164147700242705ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/CMakeLists.txt000066400000000000000000000017601422164147700270340ustar00rootroot00000000000000# # Copyright (C) 2018-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_os_interface_base ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/device_factory_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_tests.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_performance_counters.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_performance_counters.h ${CMAKE_CURRENT_SOURCE_DIR}/os_context_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_interface_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_library_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_memory_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/performance_counters_gen_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/performance_counters_tests.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_os_interface_base}) set_property(GLOBAL PROPERTY IGDRCL_SRCS_tests_os_interface_base ${IGDRCL_SRCS_tests_os_interface_base}) add_subdirectories() compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/device_factory_tests.cpp000066400000000000000000000504461422164147700312150ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/helpers/constants.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/os_interface/device_factory.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/os_interface/os_library.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/ult_hw_config.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "hw_device_id.h" #include using namespace NEO; OsLibrary *setAdapterInfo(const PLATFORM *platform, const GT_SYSTEM_INFO *gtSystemInfo, uint64_t gpuAddressSpace); struct DeviceFactoryTest : public ::testing::Test { public: void SetUp() override { const HardwareInfo *hwInfo = defaultHwInfo.get(); executionEnvironment = platform()->peekExecutionEnvironment(); mockGdiDll = setAdapterInfo(&hwInfo->platform, &hwInfo->gtSystemInfo, hwInfo->capabilityTable.gpuAddressSpace); } void TearDown() override { delete mockGdiDll; } protected: OsLibrary *mockGdiDll; ExecutionEnvironment *executionEnvironment; }; TEST_F(DeviceFactoryTest, WhenDeviceEnvironemntIsPreparedThenItIsInitializedCorrectly) { const HardwareInfo *refHwinfo = defaultHwInfo.get(); bool success = DeviceFactory::prepareDeviceEnvironments(*executionEnvironment); EXPECT_TRUE(success); const HardwareInfo *hwInfo = executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo(); EXPECT_EQ(refHwinfo->platform.eDisplayCoreFamily, hwInfo->platform.eDisplayCoreFamily); } TEST_F(DeviceFactoryTest, WhenOverridingUsingDebugManagerThenOverridesAreAppliedCorrectly) { DebugManagerStateRestore stateRestore; bool success = DeviceFactory::prepareDeviceEnvironments(*executionEnvironment); auto hwInfo = executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo(); ASSERT_TRUE(success); auto refEnableKmdNotify = hwInfo->capabilityTable.kmdNotifyProperties.enableKmdNotify; auto refDelayKmdNotifyMicroseconds = hwInfo->capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds; auto refEnableQuickKmdSleep = hwInfo->capabilityTable.kmdNotifyProperties.enableQuickKmdSleep; auto refDelayQuickKmdSleepMicroseconds = hwInfo->capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds; auto refEnableQuickKmdSleepForSporadicWaits = hwInfo->capabilityTable.kmdNotifyProperties.enableQuickKmdSleepForSporadicWaits; auto refDelayQuickKmdSleepForSporadicWaitsMicroseconds = hwInfo->capabilityTable.kmdNotifyProperties.delayQuickKmdSleepForSporadicWaitsMicroseconds; auto refEnableQuickKmdSleepForDirectSubmission = hwInfo->capabilityTable.kmdNotifyProperties.enableQuickKmdSleepForDirectSubmission; auto refDelayQuickKmdSleepForDirectSubmissionMicroseconds = hwInfo->capabilityTable.kmdNotifyProperties.delayQuickKmdSleepForDirectSubmissionMicroseconds; DebugManager.flags.OverrideEnableKmdNotify.set(!refEnableKmdNotify); DebugManager.flags.OverrideKmdNotifyDelayMicroseconds.set(static_cast(refDelayKmdNotifyMicroseconds) + 10); DebugManager.flags.OverrideEnableQuickKmdSleep.set(!refEnableQuickKmdSleep); DebugManager.flags.OverrideQuickKmdSleepDelayMicroseconds.set(static_cast(refDelayQuickKmdSleepMicroseconds) + 11); DebugManager.flags.OverrideEnableQuickKmdSleepForSporadicWaits.set(!refEnableQuickKmdSleepForSporadicWaits); DebugManager.flags.OverrideDelayQuickKmdSleepForSporadicWaitsMicroseconds.set(static_cast(refDelayQuickKmdSleepForSporadicWaitsMicroseconds) + 12); DebugManager.flags.OverrideEnableQuickKmdSleepForDirectSubmission.set(!refEnableQuickKmdSleepForDirectSubmission); DebugManager.flags.OverrideDelayQuickKmdSleepForDirectSubmissionMicroseconds.set(static_cast(refDelayQuickKmdSleepForDirectSubmissionMicroseconds) + 15); platformsImpl->clear(); executionEnvironment = constructPlatform()->peekExecutionEnvironment(); success = DeviceFactory::prepareDeviceEnvironments(*executionEnvironment); ASSERT_TRUE(success); hwInfo = executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo(); EXPECT_EQ(!refEnableKmdNotify, hwInfo->capabilityTable.kmdNotifyProperties.enableKmdNotify); EXPECT_EQ(refDelayKmdNotifyMicroseconds + 10, hwInfo->capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds); EXPECT_EQ(!refEnableQuickKmdSleep, hwInfo->capabilityTable.kmdNotifyProperties.enableQuickKmdSleep); EXPECT_EQ(refDelayQuickKmdSleepMicroseconds + 11, hwInfo->capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds); EXPECT_EQ(!refEnableQuickKmdSleepForSporadicWaits, hwInfo->capabilityTable.kmdNotifyProperties.enableQuickKmdSleepForSporadicWaits); EXPECT_EQ(refDelayQuickKmdSleepForSporadicWaitsMicroseconds + 12, hwInfo->capabilityTable.kmdNotifyProperties.delayQuickKmdSleepForSporadicWaitsMicroseconds); EXPECT_EQ(!refEnableQuickKmdSleepForDirectSubmission, hwInfo->capabilityTable.kmdNotifyProperties.enableQuickKmdSleepForDirectSubmission); EXPECT_EQ(refDelayQuickKmdSleepForDirectSubmissionMicroseconds + 15, hwInfo->capabilityTable.kmdNotifyProperties.delayQuickKmdSleepForDirectSubmissionMicroseconds); } TEST_F(DeviceFactoryTest, givenZeAffinityMaskSetWhenCreateDevicesThenProperNumberOfDevicesIsReturned) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleRootDevices.set(5); DebugManager.flags.CreateMultipleSubDevices.set(4); DebugManager.flags.ZE_AFFINITY_MASK.set("1.0,2.3,2.1,1.3,0,2.0,4.0,4.2,4.3,4.1"); VariableBackup backup(&ultHwConfig); ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false; auto devices = DeviceFactory::createDevices(*executionEnvironment); EXPECT_EQ(devices.size(), 4u); EXPECT_EQ(devices[0]->getNumSubDevices(), 4u); EXPECT_EQ(devices[1]->getNumSubDevices(), 2u); EXPECT_EQ(devices[2]->getNumSubDevices(), 3u); EXPECT_EQ(devices[3]->getNumSubDevices(), 4u); } TEST_F(DeviceFactoryTest, givenZeAffinityMaskSetToGreaterRootDeviceThanAvailableWhenCreateDevicesThenProperNumberOfDevicesIsReturned) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleRootDevices.set(2); DebugManager.flags.CreateMultipleSubDevices.set(4); DebugManager.flags.ZE_AFFINITY_MASK.set("0,92,1.1"); VariableBackup backup(&ultHwConfig); ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false; auto devices = DeviceFactory::createDevices(*executionEnvironment); EXPECT_EQ(devices.size(), 2u); EXPECT_EQ(devices[0]->getNumSubDevices(), 4u); EXPECT_EQ(devices[0]->getNumGenericSubDevices(), 4u); EXPECT_EQ(devices[1]->getNumGenericSubDevices(), 0u); } TEST_F(DeviceFactoryTest, givenZeAffinityMaskSetToGreaterSubDeviceThanAvailableWhenCreateDevicesThenProperNumberOfDevicesIsReturned) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleRootDevices.set(2); DebugManager.flags.CreateMultipleSubDevices.set(4); DebugManager.flags.ZE_AFFINITY_MASK.set("0,1.54"); VariableBackup backup(&ultHwConfig); ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false; auto devices = DeviceFactory::createDevices(*executionEnvironment); EXPECT_EQ(devices.size(), 1u); EXPECT_EQ(devices[0]->getNumSubDevices(), 4u); } TEST_F(DeviceFactoryTest, givenZeAffinityMaskSetToRootDevicesOnlyWhenCreateDevicesThenProperNumberOfDevicesIsReturned) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleRootDevices.set(2); DebugManager.flags.CreateMultipleSubDevices.set(4); DebugManager.flags.ZE_AFFINITY_MASK.set("0,1"); VariableBackup backup(&ultHwConfig); ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false; auto devices = DeviceFactory::createDevices(*executionEnvironment); EXPECT_EQ(devices.size(), 2u); EXPECT_EQ(devices[0]->getNumSubDevices(), 4u); EXPECT_EQ(devices[1]->getNumSubDevices(), 4u); } TEST_F(DeviceFactoryTest, WhenOverridingEngineTypeThenDebugEngineIsReported) { DebugManagerStateRestore dbgRestorer; int32_t debugEngineType = 2; DebugManager.flags.NodeOrdinal.set(debugEngineType); bool success = DeviceFactory::prepareDeviceEnvironments(*executionEnvironment); ASSERT_TRUE(success); auto hwInfo = executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo(); int32_t actualEngineType = static_cast(hwInfo->capabilityTable.defaultEngineType); EXPECT_EQ(debugEngineType, actualEngineType); } TEST_F(DeviceFactoryTest, givenPointerToHwInfoWhenGetDevicedCalledThenRequiedSurfaceSizeIsSettedProperly) { bool success = DeviceFactory::prepareDeviceEnvironments(*executionEnvironment); ASSERT_TRUE(success); auto hwInfo = executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo(); const auto &hwHelper = HwHelper::get(hwInfo->platform.eRenderCoreFamily); auto expextedSize = static_cast(hwInfo->gtSystemInfo.CsrSizeInMb * MemoryConstants::megaByte); hwHelper.adjustPreemptionSurfaceSize(expextedSize); EXPECT_EQ(expextedSize, hwInfo->capabilityTable.requiredPreemptionSurfaceSize); } TEST_F(DeviceFactoryTest, givenCreateMultipleRootDevicesDebugFlagWhenPrepareDeviceEnvironmentsIsCalledThenNumberOfReturnedDevicesIsEqualToDebugVariable) { DebugManagerStateRestore stateRestore; auto requiredDeviceCount = 2u; DebugManager.flags.CreateMultipleRootDevices.set(requiredDeviceCount); bool success = DeviceFactory::prepareDeviceEnvironments(*executionEnvironment); ASSERT_TRUE(success); EXPECT_EQ(requiredDeviceCount, executionEnvironment->rootDeviceEnvironments.size()); } TEST_F(DeviceFactoryTest, givenDebugFlagSetWhenPrepareDeviceEnvironmentsIsCalledThenOverrideGpuAddressSpace) { DebugManagerStateRestore restore; DebugManager.flags.OverrideGpuAddressSpace.set(12); bool success = DeviceFactory::prepareDeviceEnvironments(*executionEnvironment); EXPECT_TRUE(success); EXPECT_EQ(maxNBitValue(12), executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo()->capabilityTable.gpuAddressSpace); } TEST_F(DeviceFactoryTest, givenDebugFlagSetWhenPrepareDeviceEnvironmentsForProductFamilyOverrideIsCalledThenOverrideGpuAddressSpace) { DebugManagerStateRestore restore; DebugManager.flags.OverrideGpuAddressSpace.set(12); bool success = DeviceFactory::prepareDeviceEnvironmentsForProductFamilyOverride(*executionEnvironment); EXPECT_TRUE(success); EXPECT_EQ(maxNBitValue(12), executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo()->capabilityTable.gpuAddressSpace); } TEST_F(DeviceFactoryTest, givenDebugFlagSetWhenPrepareDeviceEnvironmentsIsCalledThenOverrideRevision) { DebugManagerStateRestore restore; DebugManager.flags.OverrideRevision.set(3); bool success = DeviceFactory::prepareDeviceEnvironments(*executionEnvironment); EXPECT_TRUE(success); EXPECT_EQ(3u, executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo()->platform.usRevId); } TEST_F(DeviceFactoryTest, givenDebugFlagSetWhenPrepareDeviceEnvironmentsForProductFamilyOverrideIsCalledThenOverrideRevision) { DebugManagerStateRestore restore; DebugManager.flags.OverrideRevision.set(3); bool success = DeviceFactory::prepareDeviceEnvironmentsForProductFamilyOverride(*executionEnvironment); EXPECT_TRUE(success); EXPECT_EQ(3u, executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo()->platform.usRevId); } TEST_F(DeviceFactoryTest, givenDebugFlagWithoutZeroXWhenPrepareDeviceEnvironmentsForProductFamilyOverrideIsCalledThenOverrideDeviceIdToHexValue) { DebugManagerStateRestore restore; DebugManager.flags.ForceDeviceId.set("1234"); bool success = DeviceFactory::prepareDeviceEnvironmentsForProductFamilyOverride(*executionEnvironment); EXPECT_TRUE(success); EXPECT_EQ(0x1234u, executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo()->platform.usDeviceID); } TEST_F(DeviceFactoryTest, givenDebugFlagWithZeroXWhenPrepareDeviceEnvironmentsForProductFamilyOverrideIsCalledThenOverrideDeviceIdToHexValue) { DebugManagerStateRestore restore; DebugManager.flags.ForceDeviceId.set("0x1234"); bool success = DeviceFactory::prepareDeviceEnvironmentsForProductFamilyOverride(*executionEnvironment); EXPECT_TRUE(success); EXPECT_EQ(0x1234u, executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo()->platform.usDeviceID); } TEST_F(DeviceFactoryTest, whenPrepareDeviceEnvironmentsIsCalledThenAllRootDeviceEnvironmentMembersAreInitialized) { DebugManagerStateRestore stateRestore; auto requiredDeviceCount = 2u; DebugManager.flags.CreateMultipleRootDevices.set(requiredDeviceCount); MockExecutionEnvironment executionEnvironment(defaultHwInfo.get(), true, requiredDeviceCount); bool success = DeviceFactory::prepareDeviceEnvironments(executionEnvironment); ASSERT_TRUE(success); std::set memoryOperationHandlers; std::set osInterfaces; for (auto rootDeviceIndex = 0u; rootDeviceIndex < requiredDeviceCount; rootDeviceIndex++) { auto rootDeviceEnvironment = static_cast(executionEnvironment.rootDeviceEnvironments[rootDeviceIndex].get()); auto memoryOperationInterface = rootDeviceEnvironment->memoryOperationsInterface.get(); EXPECT_NE(nullptr, memoryOperationInterface); EXPECT_EQ(memoryOperationHandlers.end(), memoryOperationHandlers.find(memoryOperationInterface)); memoryOperationHandlers.insert(memoryOperationInterface); auto osInterface = rootDeviceEnvironment->osInterface.get(); EXPECT_NE(nullptr, osInterface); EXPECT_EQ(osInterfaces.end(), osInterfaces.find(osInterface)); osInterfaces.insert(osInterface); } } TEST_F(DeviceFactoryTest, givenInvalidHwConfigStringWhenPreparingDeviceEnvironmentsForProductFamilyOverrideThenFalseIsReturned) { DebugManagerStateRestore stateRestore; DebugManager.flags.HardwareInfoOverride.set("1x3"); MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); bool success = DeviceFactory::prepareDeviceEnvironmentsForProductFamilyOverride(executionEnvironment); EXPECT_FALSE(success); } HWTEST_F(DeviceFactoryTest, givenInvalidHwConfigStringWhenPrepareDeviceEnvironmentsForProductFamilyOverrideThenThrowsException) { DebugManagerStateRestore stateRestore; DebugManager.flags.HardwareInfoOverride.set("1x1x1"); MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); EXPECT_ANY_THROW(DeviceFactory::prepareDeviceEnvironmentsForProductFamilyOverride(executionEnvironment)); } TEST_F(DeviceFactoryTest, givenPrepareDeviceEnvironmentsCallWhenItIsDoneThenOsInterfaceIsAllocated) { bool success = DeviceFactory::prepareDeviceEnvironments(*executionEnvironment); EXPECT_TRUE(success); EXPECT_NE(nullptr, executionEnvironment->rootDeviceEnvironments[0]->osInterface); } TEST(DeviceFactory, givenCreateMultipleRootDevicesWhenCreateDevicesIsCalledThenVectorReturnedWouldContainFirstDiscreteDevicesThenIntegratedDevices) { uint32_t numRootDevices = 8u; NEO::HardwareInfo hwInfo[8]; auto executionEnvironment = new NEO::ExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(numRootDevices); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { hwInfo[i] = *NEO::defaultHwInfo.get(); executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(&hwInfo[i]); } executionEnvironment->rootDeviceEnvironments[0].get()->getMutableHardwareInfo()->capabilityTable.isIntegratedDevice = true; executionEnvironment->rootDeviceEnvironments[1].get()->getMutableHardwareInfo()->capabilityTable.isIntegratedDevice = true; executionEnvironment->rootDeviceEnvironments[2].get()->getMutableHardwareInfo()->capabilityTable.isIntegratedDevice = true; executionEnvironment->rootDeviceEnvironments[3].get()->getMutableHardwareInfo()->capabilityTable.isIntegratedDevice = false; executionEnvironment->rootDeviceEnvironments[4].get()->getMutableHardwareInfo()->capabilityTable.isIntegratedDevice = false; executionEnvironment->rootDeviceEnvironments[5].get()->getMutableHardwareInfo()->capabilityTable.isIntegratedDevice = true; executionEnvironment->rootDeviceEnvironments[6].get()->getMutableHardwareInfo()->capabilityTable.isIntegratedDevice = true; executionEnvironment->rootDeviceEnvironments[7].get()->getMutableHardwareInfo()->capabilityTable.isIntegratedDevice = false; auto devices = DeviceFactory::createDevices(*executionEnvironment); for (auto iterator = 0u; iterator < 3; iterator++) { EXPECT_FALSE(devices[iterator]->getHardwareInfo().capabilityTable.isIntegratedDevice); // Initial entries would be for discrete devices } for (auto iterator = 3u; iterator < 8u; iterator++) { EXPECT_TRUE(devices[iterator]->getHardwareInfo().capabilityTable.isIntegratedDevice); // Later entries would be for integrated } } TEST(DeviceFactory, givenHwModeSelectedWhenIsHwModeSelectedIsCalledThenTrueIsReturned) { DebugManagerStateRestore stateRestore; constexpr int32_t hwModes[] = {-1, CommandStreamReceiverType::CSR_HW, CommandStreamReceiverType::CSR_HW_WITH_AUB}; for (const auto &hwMode : hwModes) { DebugManager.flags.SetCommandStreamReceiver.set(hwMode); EXPECT_TRUE(DeviceFactory::isHwModeSelected()); } } TEST(DeviceFactory, givenNonHwModeSelectedWhenIsHwModeSelectedIsCalledThenFalseIsReturned) { DebugManagerStateRestore stateRestore; constexpr int32_t nonHwModes[] = {CommandStreamReceiverType::CSR_AUB, CommandStreamReceiverType::CSR_TBX, CommandStreamReceiverType::CSR_TBX_WITH_AUB}; for (const auto &nonHwMode : nonHwModes) { DebugManager.flags.SetCommandStreamReceiver.set(nonHwMode); EXPECT_FALSE(DeviceFactory::isHwModeSelected()); } } TEST(DiscoverDevices, whenDiscoverDevicesAndForceDeviceIdIsDifferentFromTheExistingDeviceThenReturnNullptr) { DebugManagerStateRestore stateRestore; DebugManager.flags.ForceDeviceId.set("invalid"); ExecutionEnvironment executionEnviornment; auto hwDeviceIds = OSInterface::discoverDevices(executionEnviornment); EXPECT_TRUE(hwDeviceIds.empty()); } TEST(DiscoverDevices, whenDiscoverDevicesAndForceDeviceIdIsDifferentFromTheExistingDeviceThenPrepareDeviceEnvironmentsReturnsFalse) { DebugManagerStateRestore stateRestore; DebugManager.flags.ForceDeviceId.set("invalid"); ExecutionEnvironment executionEnviornment; auto result = DeviceFactory::prepareDeviceEnvironments(executionEnviornment); EXPECT_FALSE(result); } TEST(DiscoverDevices, whenDiscoverDevicesAndFilterDifferentFromTheExistingDeviceThenReturnNullptr) { DebugManagerStateRestore stateRestore; DebugManager.flags.FilterDeviceId.set("invalid"); DebugManager.flags.FilterBdfPath.set("invalid"); ExecutionEnvironment executionEnviornment; auto hwDeviceIds = OSInterface::discoverDevices(executionEnviornment); EXPECT_TRUE(hwDeviceIds.empty()); } TEST(DiscoverDevices, whenDiscoverDevicesAndFilterDifferentFromTheExistingDeviceThenPrepareDeviceEnvironmentsReturnsFalse) { DebugManagerStateRestore stateRestore; DebugManager.flags.FilterDeviceId.set("invalid"); DebugManager.flags.FilterBdfPath.set("invalid"); ExecutionEnvironment executionEnviornment; auto result = DeviceFactory::prepareDeviceEnvironments(executionEnviornment); EXPECT_FALSE(result); } using UltDeviceFactoryTest = DeviceFactoryTest; TEST_F(UltDeviceFactoryTest, givenExecutionEnvironmentWhenCreatingUltDeviceFactoryThenMockMemoryManagerIsAllocated) { executionEnvironment->rootDeviceEnvironments.clear(); executionEnvironment->memoryManager.reset(); UltDeviceFactory ultDeviceFactory{2, 0, *executionEnvironment}; EXPECT_EQ(2u, executionEnvironment->rootDeviceEnvironments.size()); EXPECT_NE(nullptr, executionEnvironment->memoryManager.get()); EXPECT_EQ(true, executionEnvironment->memoryManager.get()->isInitialized()); } compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/hw_info_config_tests.cpp000066400000000000000000000436631422164147700312100ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/os_interface/hw_info_config_tests.h" #include "shared/source/helpers/driver_model_type.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/sampler/sampler.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" using namespace NEO; void HwInfoConfigTest::SetUp() { PlatformFixture::SetUp(); pInHwInfo = pPlatform->getClDevice(0)->getHardwareInfo(); testPlatform = &pInHwInfo.platform; testSkuTable = &pInHwInfo.featureTable; testWaTable = &pInHwInfo.workaroundTable; testSysInfo = &pInHwInfo.gtSystemInfo; outHwInfo = {}; } void HwInfoConfigTest::TearDown() { PlatformFixture::TearDown(); } HWTEST_F(HwInfoConfigTest, givenDebugFlagSetWhenAskingForHostMemCapabilitesThenReturnCorrectValue) { DebugManagerStateRestore restore; auto hwInfoConfig = HwInfoConfig::get(pInHwInfo.platform.eProductFamily); DebugManager.flags.EnableHostUsmSupport.set(0); EXPECT_EQ(0u, hwInfoConfig->getHostMemCapabilities(&pInHwInfo)); DebugManager.flags.EnableHostUsmSupport.set(1); EXPECT_NE(0u, hwInfoConfig->getHostMemCapabilities(&pInHwInfo)); } HWTEST_F(HwInfoConfigTest, givenHwInfoConfigWhenGettingSharedSystemMemCapabilitiesThenCorrectValueIsReturned) { DebugManagerStateRestore restore; auto hwInfoConfig = HwInfoConfig::get(pInHwInfo.platform.eProductFamily); EXPECT_EQ(0u, hwInfoConfig->getSharedSystemMemCapabilities(&pInHwInfo)); for (auto enable : {-1, 0, 1}) { DebugManager.flags.EnableSharedSystemUsmSupport.set(enable); if (enable > 0) { auto caps = UNIFIED_SHARED_MEMORY_ACCESS | UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS | UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS | UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS; EXPECT_EQ(caps, hwInfoConfig->getSharedSystemMemCapabilities(&pInHwInfo)); } else { EXPECT_EQ(0u, hwInfoConfig->getSharedSystemMemCapabilities(&pInHwInfo)); } } } TEST_F(HwInfoConfigTest, WhenParsingHwInfoConfigThenCorrectValuesAreReturned) { uint64_t hwInfoConfig = 0x0; bool success = parseHwInfoConfigString("1x1x1", hwInfoConfig); EXPECT_TRUE(success); EXPECT_EQ(hwInfoConfig, 0x100010001u); setHwInfoValuesFromConfig(hwInfoConfig, outHwInfo); EXPECT_EQ(outHwInfo.gtSystemInfo.SliceCount, 1u); EXPECT_EQ(outHwInfo.gtSystemInfo.SubSliceCount, 1u); EXPECT_EQ(outHwInfo.gtSystemInfo.DualSubSliceCount, 1u); EXPECT_EQ(outHwInfo.gtSystemInfo.EUCount, 1u); for (uint32_t slice = 0; slice < outHwInfo.gtSystemInfo.SliceCount; slice++) { EXPECT_TRUE(outHwInfo.gtSystemInfo.SliceInfo[slice].Enabled); } success = parseHwInfoConfigString("3x1x1", hwInfoConfig); EXPECT_TRUE(success); EXPECT_EQ(hwInfoConfig, 0x300010001u); setHwInfoValuesFromConfig(hwInfoConfig, outHwInfo); EXPECT_EQ(outHwInfo.gtSystemInfo.SliceCount, 3u); EXPECT_EQ(outHwInfo.gtSystemInfo.SubSliceCount, 3u); EXPECT_EQ(outHwInfo.gtSystemInfo.DualSubSliceCount, 3u); EXPECT_EQ(outHwInfo.gtSystemInfo.EUCount, 3u); for (uint32_t slice = 0; slice < outHwInfo.gtSystemInfo.SliceCount; slice++) { EXPECT_TRUE(outHwInfo.gtSystemInfo.SliceInfo[slice].Enabled); } success = parseHwInfoConfigString("1x7x1", hwInfoConfig); EXPECT_TRUE(success); EXPECT_EQ(hwInfoConfig, 0x100070001u); setHwInfoValuesFromConfig(hwInfoConfig, outHwInfo); EXPECT_EQ(outHwInfo.gtSystemInfo.SliceCount, 1u); EXPECT_EQ(outHwInfo.gtSystemInfo.SubSliceCount, 7u); EXPECT_EQ(outHwInfo.gtSystemInfo.DualSubSliceCount, 7u); EXPECT_EQ(outHwInfo.gtSystemInfo.EUCount, 7u); for (uint32_t slice = 0; slice < outHwInfo.gtSystemInfo.SliceCount; slice++) { EXPECT_TRUE(outHwInfo.gtSystemInfo.SliceInfo[slice].Enabled); } success = parseHwInfoConfigString("1x1x7", hwInfoConfig); EXPECT_TRUE(success); EXPECT_EQ(hwInfoConfig, 0x100010007u); setHwInfoValuesFromConfig(hwInfoConfig, outHwInfo); EXPECT_EQ(outHwInfo.gtSystemInfo.SliceCount, 1u); EXPECT_EQ(outHwInfo.gtSystemInfo.SubSliceCount, 1u); EXPECT_EQ(outHwInfo.gtSystemInfo.DualSubSliceCount, 1u); EXPECT_EQ(outHwInfo.gtSystemInfo.EUCount, 7u); for (uint32_t slice = 0; slice < outHwInfo.gtSystemInfo.SliceCount; slice++) { EXPECT_TRUE(outHwInfo.gtSystemInfo.SliceInfo[slice].Enabled); } success = parseHwInfoConfigString("2x4x16", hwInfoConfig); EXPECT_TRUE(success); EXPECT_EQ(0x200040010u, hwInfoConfig); setHwInfoValuesFromConfig(hwInfoConfig, outHwInfo); EXPECT_EQ(outHwInfo.gtSystemInfo.SliceCount, 2u); EXPECT_EQ(outHwInfo.gtSystemInfo.SubSliceCount, 8u); EXPECT_EQ(outHwInfo.gtSystemInfo.DualSubSliceCount, 8u); EXPECT_EQ(outHwInfo.gtSystemInfo.EUCount, 128u); for (uint32_t slice = 0; slice < outHwInfo.gtSystemInfo.SliceCount; slice++) { EXPECT_TRUE(outHwInfo.gtSystemInfo.SliceInfo[slice].Enabled); } } TEST_F(HwInfoConfigTest, givenInvalidHwInfoWhenParsingHwInfoConfigThenErrorIsReturned) { uint64_t hwInfoConfig = 0x0; bool success = parseHwInfoConfigString("1", hwInfoConfig); EXPECT_FALSE(success); success = parseHwInfoConfigString("1x3", hwInfoConfig); EXPECT_FALSE(success); success = parseHwInfoConfigString("65536x3x8", hwInfoConfig); EXPECT_FALSE(success); success = parseHwInfoConfigString("1x65536x8", hwInfoConfig); EXPECT_FALSE(success); success = parseHwInfoConfigString("1x3x65536", hwInfoConfig); EXPECT_FALSE(success); success = parseHwInfoConfigString("65535x65535x8", hwInfoConfig); EXPECT_FALSE(success); success = parseHwInfoConfigString("1x65535x65535", hwInfoConfig); EXPECT_FALSE(success); } HWTEST_F(HwInfoConfigTest, whenConvertingTimestampsToCsDomainThenNothingIsChanged) { auto hwInfoConfig = HwInfoConfig::get(pInHwInfo.platform.eProductFamily); uint64_t timestampData = 0x1234; uint64_t initialData = timestampData; hwInfoConfig->convertTimestampsFromOaToCsDomain(timestampData); EXPECT_EQ(initialData, timestampData); } HWTEST_F(HwInfoConfigTest, whenOverrideGfxPartitionLayoutForWslThenReturnFalse) { auto hwInfoConfig = HwInfoConfig::get(pInHwInfo.platform.eProductFamily); EXPECT_FALSE(hwInfoConfig->overrideGfxPartitionLayoutForWsl()); } HWTEST_F(HwInfoConfigTest, givenSamplerStateWhenAdjustSamplerStateThenNothingIsChanged) { using SAMPLER_STATE = typename FamilyType::SAMPLER_STATE; auto hwInfoConfig = HwInfoConfig::get(pInHwInfo.platform.eProductFamily); auto context = clUniquePtr(new MockContext()); auto sampler = clUniquePtr(new SamplerHw(context.get(), CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST)); auto state = FamilyType::cmdInitSamplerState; auto initialState = state; hwInfoConfig->adjustSamplerState(&state, pInHwInfo); EXPECT_EQ(0, memcmp(&initialState, &state, sizeof(SAMPLER_STATE))); } HWTEST_F(HwInfoConfigTest, givenHardwareInfoWhenCallingIsAdditionalStateBaseAddressWARequiredThenFalseIsReturned) { auto hwInfoConfig = HwInfoConfig::get(pInHwInfo.platform.eProductFamily); bool ret = hwInfoConfig->isAdditionalStateBaseAddressWARequired(pInHwInfo); EXPECT_FALSE(ret); } HWTEST_F(HwInfoConfigTest, givenHardwareInfoWhenCallingIsMaxThreadsForWorkgroupWARequiredThenFalseIsReturned) { auto hwInfoConfig = HwInfoConfig::get(pInHwInfo.platform.eProductFamily); bool ret = hwInfoConfig->isMaxThreadsForWorkgroupWARequired(pInHwInfo); EXPECT_FALSE(ret); } HWTEST_F(HwInfoConfigTest, givenHwInfoConfigWhenAskedForPageTableManagerSupportThenReturnCorrectValue) { const auto &hwInfoConfig = *HwInfoConfig::get(pInHwInfo.platform.eProductFamily); EXPECT_EQ(hwInfoConfig.isPageTableManagerSupported(pInHwInfo), UnitTestHelper::isPageTableManagerSupported(pInHwInfo)); } HWTEST_F(HwInfoConfigTest, givenVariousValuesWhenConvertingHwRevIdAndSteppingThenConversionIsCorrect) { const auto &hwInfoConfig = *HwInfoConfig::get(pInHwInfo.platform.eProductFamily); for (uint32_t testValue = 0; testValue < 0x10; testValue++) { auto hwRevIdFromStepping = hwInfoConfig.getHwRevIdFromStepping(testValue, pInHwInfo); if (hwRevIdFromStepping != CommonConstants::invalidStepping) { pInHwInfo.platform.usRevId = hwRevIdFromStepping; EXPECT_EQ(testValue, hwInfoConfig.getSteppingFromHwRevId(pInHwInfo)); } pInHwInfo.platform.usRevId = testValue; auto steppingFromHwRevId = hwInfoConfig.getSteppingFromHwRevId(pInHwInfo); if (steppingFromHwRevId != CommonConstants::invalidStepping) { EXPECT_EQ(testValue, hwInfoConfig.getHwRevIdFromStepping(steppingFromHwRevId, pInHwInfo)); } } } HWTEST_F(HwInfoConfigTest, givenVariousValuesWhenGettingAubStreamSteppingFromHwRevIdThenReturnValuesAreCorrect) { struct MockHwInfoConfig : HwInfoConfigHw { uint32_t getSteppingFromHwRevId(const HardwareInfo &hwInfo) const override { return returnedStepping; } std::vector getKernelSupportedThreadArbitrationPolicies() override { return {}; } uint32_t returnedStepping = 0; }; MockHwInfoConfig mockHwInfoConfig; mockHwInfoConfig.returnedStepping = REVISION_A0; EXPECT_EQ(AubMemDump::SteppingValues::A, mockHwInfoConfig.getAubStreamSteppingFromHwRevId(pInHwInfo)); mockHwInfoConfig.returnedStepping = REVISION_A1; EXPECT_EQ(AubMemDump::SteppingValues::A, mockHwInfoConfig.getAubStreamSteppingFromHwRevId(pInHwInfo)); mockHwInfoConfig.returnedStepping = REVISION_A3; EXPECT_EQ(AubMemDump::SteppingValues::A, mockHwInfoConfig.getAubStreamSteppingFromHwRevId(pInHwInfo)); mockHwInfoConfig.returnedStepping = REVISION_B; EXPECT_EQ(AubMemDump::SteppingValues::B, mockHwInfoConfig.getAubStreamSteppingFromHwRevId(pInHwInfo)); mockHwInfoConfig.returnedStepping = REVISION_C; EXPECT_EQ(AubMemDump::SteppingValues::C, mockHwInfoConfig.getAubStreamSteppingFromHwRevId(pInHwInfo)); mockHwInfoConfig.returnedStepping = REVISION_D; EXPECT_EQ(AubMemDump::SteppingValues::D, mockHwInfoConfig.getAubStreamSteppingFromHwRevId(pInHwInfo)); mockHwInfoConfig.returnedStepping = REVISION_K; EXPECT_EQ(AubMemDump::SteppingValues::K, mockHwInfoConfig.getAubStreamSteppingFromHwRevId(pInHwInfo)); mockHwInfoConfig.returnedStepping = CommonConstants::invalidStepping; EXPECT_EQ(AubMemDump::SteppingValues::A, mockHwInfoConfig.getAubStreamSteppingFromHwRevId(pInHwInfo)); } HWTEST_F(HwInfoConfigTest, givenHwInfoConfigWhenAskedForDefaultEngineTypeAdjustmentThenFalseIsReturned) { const auto &hwInfoConfig = *HwInfoConfig::get(pInHwInfo.platform.eProductFamily); EXPECT_FALSE(hwInfoConfig.isDefaultEngineTypeAdjustmentRequired(pInHwInfo)); } HWTEST_F(HwInfoConfigTest, whenCallingGetDeviceMemoryNameThenDdrIsReturned) { const auto &hwInfoConfig = *HwInfoConfig::get(pInHwInfo.platform.eProductFamily); auto deviceMemoryName = hwInfoConfig.getDeviceMemoryName(); EXPECT_TRUE(hasSubstr(deviceMemoryName, std::string("DDR"))); } HWCMDTEST_F(IGFX_GEN8_CORE, HwInfoConfigTest, givenHwInfoConfigWhenAdditionalKernelExecInfoSupportCheckedThenCorrectValueIsReturned) { const auto &hwInfoConfig = *HwInfoConfig::get(pInHwInfo.platform.eProductFamily); EXPECT_FALSE(hwInfoConfig.isDisableOverdispatchAvailable(pInHwInfo)); } HWTEST_F(HwInfoConfigTest, WhenAllowRenderCompressionIsCalledThenTrueIsReturned) { const auto &hwInfoConfig = *HwInfoConfig::get(pInHwInfo.platform.eProductFamily); EXPECT_TRUE(hwInfoConfig.allowCompression(pInHwInfo)); } HWTEST_F(HwInfoConfigTest, WhenAllowStatelessCompressionIsCalledThenReturnCorrectValue) { DebugManagerStateRestore restore; const auto &hwInfoConfig = *HwInfoConfig::get(pInHwInfo.platform.eProductFamily); EXPECT_FALSE(hwInfoConfig.allowStatelessCompression(pInHwInfo)); for (auto enable : {-1, 0, 1}) { DebugManager.flags.EnableStatelessCompression.set(enable); if (enable > 0) { EXPECT_TRUE(hwInfoConfig.allowStatelessCompression(pInHwInfo)); } else { EXPECT_FALSE(hwInfoConfig.allowStatelessCompression(pInHwInfo)); } } } HWTEST_F(HwInfoConfigTest, givenVariousDebugKeyValuesWhenGettingLocalMemoryAccessModeThenCorrectValueIsReturned) { struct MockHwInfoConfig : HwInfoConfigHw { using HwInfoConfig::getDefaultLocalMemoryAccessMode; }; DebugManagerStateRestore restore{}; auto mockHwInfoConfig = static_cast(*HwInfoConfig::get(productFamily)); const auto &hwInfoConfig = *HwInfoConfig::get(productFamily); EXPECT_EQ(mockHwInfoConfig.getDefaultLocalMemoryAccessMode(pInHwInfo), mockHwInfoConfig.getLocalMemoryAccessMode(pInHwInfo)); DebugManager.flags.ForceLocalMemoryAccessMode.set(0); EXPECT_EQ(LocalMemoryAccessMode::Default, hwInfoConfig.getLocalMemoryAccessMode(pInHwInfo)); DebugManager.flags.ForceLocalMemoryAccessMode.set(1); EXPECT_EQ(LocalMemoryAccessMode::CpuAccessAllowed, hwInfoConfig.getLocalMemoryAccessMode(pInHwInfo)); DebugManager.flags.ForceLocalMemoryAccessMode.set(3); EXPECT_EQ(LocalMemoryAccessMode::CpuAccessDisallowed, hwInfoConfig.getLocalMemoryAccessMode(pInHwInfo)); } HWTEST_F(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfAllocationSizeAdjustmentIsRequiredThenFalseIsReturned) { const auto &hwInfoConfig = *HwInfoConfig::get(pInHwInfo.platform.eProductFamily); EXPECT_FALSE(hwInfoConfig.isAllocationSizeAdjustmentRequired(pInHwInfo)); } HWTEST_F(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfPrefetchDisablingIsRequiredThenFalseIsReturned) { const auto &hwInfoConfig = *HwInfoConfig::get(pInHwInfo.platform.eProductFamily); EXPECT_FALSE(hwInfoConfig.isPrefetchDisablingRequired(pInHwInfo)); } HWTEST_F(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfPipeControlPriorToNonPipelinedStateCommandsWARequiredThenFalseIsReturned) { const auto &hwInfoConfig = *HwInfoConfig::get(pInHwInfo.platform.eProductFamily); auto isRcs = false; const auto &[isBasicWARequired, isExtendedWARequired] = hwInfoConfig.isPipeControlPriorToNonPipelinedStateCommandsWARequired(pInHwInfo, isRcs); EXPECT_FALSE(isExtendedWARequired); EXPECT_FALSE(isBasicWARequired); } HWTEST2_F(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfHeapInLocalMemThenFalseIsReturned, IsAtMostGen12lp) { const auto &hwInfoConfig = *HwInfoConfig::get(pInHwInfo.platform.eProductFamily); EXPECT_FALSE(hwInfoConfig.heapInLocalMem(pInHwInfo)); } HWTEST2_F(HwInfoConfigTest, givenHwInfoConfigWhenSettingCapabilityCoherencyFlagThenFlagIsSet, IsAtMostGen11) { auto &hwInfoConfig = *HwInfoConfig::get(pInHwInfo.platform.eProductFamily); bool coherency = false; hwInfoConfig.setCapabilityCoherencyFlag(pInHwInfo, coherency); EXPECT_TRUE(coherency); } HWTEST_F(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfAdditionalMediaSamplerProgrammingIsRequiredThenFalseIsReturned) { const auto &hwInfoConfig = *HwInfoConfig::get(pInHwInfo.platform.eProductFamily); EXPECT_FALSE(hwInfoConfig.isAdditionalMediaSamplerProgrammingRequired()); } HWTEST_F(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfInitialFlagsProgrammingIsRequiredThenFalseIsReturned) { const auto &hwInfoConfig = *HwInfoConfig::get(pInHwInfo.platform.eProductFamily); EXPECT_FALSE(hwInfoConfig.isInitialFlagsProgrammingRequired()); } HWTEST_F(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfReturnedCmdSizeForMediaSamplerAdjustmentIsRequiredThenFalseIsReturned) { const auto &hwInfoConfig = *HwInfoConfig::get(pInHwInfo.platform.eProductFamily); EXPECT_FALSE(hwInfoConfig.isReturnedCmdSizeForMediaSamplerAdjustmentRequired()); } HWTEST_F(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfExtraParametersAreInvalidThenFalseIsReturned) { const auto &hwInfoConfig = *HwInfoConfig::get(pInHwInfo.platform.eProductFamily); EXPECT_FALSE(hwInfoConfig.extraParametersInvalid(pInHwInfo)); } HWTEST_F(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfPipeControlWAIsRequiredThenFalseIsReturned) { const auto &hwInfoConfig = *HwInfoConfig::get(pInHwInfo.platform.eProductFamily); EXPECT_FALSE(hwInfoConfig.pipeControlWARequired(pInHwInfo)); } HWTEST_F(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfImagePitchAlignmentWAIsRequiredThenFalseIsReturned) { const auto &hwInfoConfig = *HwInfoConfig::get(pInHwInfo.platform.eProductFamily); EXPECT_FALSE(hwInfoConfig.imagePitchAlignmentWARequired(pInHwInfo)); } HWTEST_F(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfForceEmuInt32DivRemSPWAIsRequiredThenFalseIsReturned) { const auto &hwInfoConfig = *HwInfoConfig::get(pInHwInfo.platform.eProductFamily); EXPECT_FALSE(hwInfoConfig.isForceEmuInt32DivRemSPWARequired(pInHwInfo)); } HWTEST_F(HwInfoConfigTest, givenHwInfoConfigWhenAskedIf3DPipelineSelectWAIsRequiredThenFalseIsReturned) { const auto &hwInfoConfig = *HwInfoConfig::get(pInHwInfo.platform.eProductFamily); EXPECT_FALSE(hwInfoConfig.is3DPipelineSelectWARequired()); } HWTEST_F(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfStorageInfoAdjustmentIsRequiredThenFalseIsReturned) { const auto &hwInfoConfig = *HwInfoConfig::get(pInHwInfo.platform.eProductFamily); EXPECT_FALSE(hwInfoConfig.isStorageInfoAdjustmentRequired()); } HWTEST_F(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfBlitterForImagesIsSupportedThenFalseIsReturned) { const auto &hwInfoConfig = *HwInfoConfig::get(productFamily); EXPECT_FALSE(hwInfoConfig.isBlitterForImagesSupported()); } HWTEST_F(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfTile64With3DSurfaceOnBCSIsSupportedThenTrueIsReturned) { const auto &hwInfoConfig = *HwInfoConfig::get(pInHwInfo.platform.eProductFamily); EXPECT_TRUE(hwInfoConfig.isTile64With3DSurfaceOnBCSSupported(pInHwInfo)); } compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/hw_info_config_tests.h000066400000000000000000000012641422164147700306440ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/device/device.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "gtest/gtest.h" using namespace NEO; struct HwInfoConfigTest : public ::testing::Test, public PlatformFixture { void SetUp() override; void TearDown() override; HardwareInfo pInHwInfo; HardwareInfo outHwInfo; PLATFORM *testPlatform = nullptr; FeatureTable *testSkuTable = nullptr; WorkaroundTable *testWaTable = nullptr; GT_SYSTEM_INFO *testSysInfo = nullptr; }; compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/000077500000000000000000000000001422164147700254275ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/.clang-tidy000066400000000000000000000031131422164147700274610ustar00rootroot00000000000000--- Checks: 'clang-diagnostic-*,clang-analyzer-*,google-default-arguments,modernize-use-override,modernize-use-default-member-init,-clang-analyzer-alpha*,readability-identifier-naming,-clang-analyzer-optin.performance.Padding,-clang-analyzer-cplusplus.NewDelete,-clang-analyzer-cplusplus.NewDeleteLeaks,-clang-analyzer-optin.cplusplus.VirtualCall' # WarningsAsErrors: '.*' HeaderFilterRegex: '^((?!^third_party\/).+)\.(h|hpp|inl)$' AnalyzeTemporaryDtors: false CheckOptions: - key: google-readability-braces-around-statements.ShortStatementLines value: '1' - key: google-readability-function-size.StatementThreshold value: '800' - key: google-readability-namespace-comments.ShortNamespaceLines value: '10' - key: google-readability-namespace-comments.SpacesBeforeComments value: '2' - key: readability-identifier-naming.ParameterCase value: camelBack - key: modernize-loop-convert.MaxCopySize value: '16' - key: modernize-loop-convert.MinConfidence value: reasonable - key: modernize-loop-convert.NamingStyle value: CamelCase - key: modernize-pass-by-value.IncludeStyle value: llvm - key: modernize-replace-auto-ptr.IncludeStyle value: llvm - key: modernize-use-nullptr.NullMacros value: 'NULL' - key: modernize-use-default-member-init.UseAssignment value: '1' ... compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/CMakeLists.txt000066400000000000000000000062421422164147700301730ustar00rootroot00000000000000# # Copyright (C) 2018-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_os_interface_linux ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/allocator_helper_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/debug_env_reader.cpp ${CMAKE_CURRENT_SOURCE_DIR}/device_command_stream_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/device_os_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/driver_info_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/drm_buffer_object_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/drm_command_stream_mm_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/drm_command_stream_tests_1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/drm_command_stream_tests_2.cpp ${CMAKE_CURRENT_SOURCE_DIR}/drm_gem_close_worker_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/drm_mapper_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/drm_memory_manager_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}drm_memory_manager_localmem_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/drm_os_memory_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/drm_residency_handler_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/drm_system_info_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/drm_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/drm_uuid_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/file_logger_linux_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_linux_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_linux_tests.h ${CMAKE_CURRENT_SOURCE_DIR}/linux_create_command_queue_with_properties_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_os_time_linux.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_performance_counters_linux.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_performance_counters_linux.h ${CMAKE_CURRENT_SOURCE_DIR}/os_interface_linux_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_library_linux_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_time_test.cpp ${CMAKE_CURRENT_SOURCE_DIR}/performance_counters_linux_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/self_lib_lin.cpp ) if(NEO_ENABLE_i915_PRELIM_DETECTION) list(APPEND IGDRCL_SRCS_tests_os_interface_linux ${CMAKE_CURRENT_SOURCE_DIR}/device_command_stream_fixture_context.cpp ${CMAKE_CURRENT_SOURCE_DIR}/device_command_stream_fixture_context.h ${CMAKE_CURRENT_SOURCE_DIR}/device_command_stream_fixture_prelim.h ${CMAKE_CURRENT_SOURCE_DIR}/drm_buffer_object_tests_prelim.cpp ${CMAKE_CURRENT_SOURCE_DIR}/drm_memory_manager_debug_surface_prelim_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/drm_memory_manager_prelim_fixtures.h ${CMAKE_CURRENT_SOURCE_DIR}/drm_memory_manager_localmem_prelim_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/drm_residency_handler_prelim_tests.cpp ) endif() if(NEO__LIBVA_FOUND) list(APPEND IGDRCL_SRCS_tests_os_interface_linux ${CMAKE_CURRENT_SOURCE_DIR}/drm_va_sharing_tests.cpp ) endif() if(TESTS_XEHP_AND_LATER) list(APPEND IGDRCL_SRCS_tests_os_interface_linux ${CMAKE_CURRENT_SOURCE_DIR}/drm_command_stream_xehp_and_later_tests.cpp ) endif() if(UNIX) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_os_interface_linux}) endif() set_property(GLOBAL PROPERTY IGDRCL_SRCS_tests_os_interface_linux ${IGDRCL_SRCS_tests_os_interface_linux}) add_subdirectories() compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/allocator_helper_tests.cpp000066400000000000000000000006111422164147700326720ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/constants.h" #include "shared/source/os_interface/linux/allocator_helper.h" #include "gtest/gtest.h" TEST(AllocatorHelper, givenExpectedSizeToReserveWhenGetSizeToReserveCalledThenExpectedValueReturned) { EXPECT_EQ((4 * 4 + 2 * 4) * GB, NEO::getSizeToReserve()); } compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/debug_env_reader.cpp000066400000000000000000000113451422164147700314170ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/debug_env_reader.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/mocks/mock_io_functions.h" #include "shared/test/common/test_macros/test.h" #include #include namespace NEO { class DebugEnvReaderTests : public ::testing::Test { public: void SetUp() override { evr = SettingsReader::createOsReader(false, ""); EXPECT_NE(nullptr, evr); } void TearDown() override { delete evr; } SettingsReader *evr = nullptr; }; TEST_F(DebugEnvReaderTests, GivenSetVariableThenSetValueIsReturned) { int32_t ret; std::string retString; std::string defaultString = "Default Value"; std::string setString = "Expected Value"; const char *testingVariableName = "TestingVariable"; const char *testingVariableValue = "1234"; { VariableBackup mockGetenvCalledBackup(&IoFunctions::mockGetenvCalled, 0); std::unordered_map mockableEnvs = {{testingVariableName, testingVariableValue}}; VariableBackup *> mockableEnvValuesBackup(&IoFunctions::mockableEnvValues, &mockableEnvs); ret = evr->getSetting(testingVariableName, 1); EXPECT_EQ(1u, IoFunctions::mockGetenvCalled); EXPECT_EQ(1234, ret); } { VariableBackup mockGetenvCalledBackup(&IoFunctions::mockGetenvCalled, 0); std::unordered_map mockableEnvs = {{testingVariableName, setString.c_str()}}; VariableBackup *> mockableEnvValuesBackup(&IoFunctions::mockableEnvValues, &mockableEnvs); retString = evr->getSetting("TestingVariable", defaultString); EXPECT_EQ(1u, IoFunctions::mockGetenvCalled); EXPECT_EQ(0, retString.compare(setString)); } { VariableBackup mockGetenvCalledBackup(&IoFunctions::mockGetenvCalled, 0); std::unordered_map mockableEnvs = {}; VariableBackup *> mockableEnvValuesBackup(&IoFunctions::mockableEnvValues, &mockableEnvs); ret = evr->getSetting("TestingVariable", 1); EXPECT_EQ(1u, IoFunctions::mockGetenvCalled); EXPECT_EQ(1, ret); } } TEST_F(DebugEnvReaderTests, GivenUnsetVariableThenDefaultValueIsReturned) { int32_t ret; std::string retString; std::string defaultString = "Default Value"; VariableBackup mockGetenvCalledBackup(&IoFunctions::mockGetenvCalled, 0); std::unordered_map mockableEnvs = {}; VariableBackup *> mockableEnvValuesBackup(&IoFunctions::mockableEnvValues, &mockableEnvs); ret = evr->getSetting("TestingVariable", 1); EXPECT_EQ(1u, IoFunctions::mockGetenvCalled); EXPECT_EQ(1, ret); retString = evr->getSetting("TestingVariable", defaultString); EXPECT_EQ(2u, IoFunctions::mockGetenvCalled); EXPECT_EQ(0, retString.compare(defaultString)); } TEST_F(DebugEnvReaderTests, GivenBoolEnvVariableWhenGettingThenCorrectValueIsReturned) { bool ret; bool defaultValue = true; bool expectedValue = false; { VariableBackup mockGetenvCalledBackup(&IoFunctions::mockGetenvCalled, 0); std::unordered_map mockableEnvs = {{"TestingVariable", "0"}}; VariableBackup *> mockableEnvValuesBackup(&IoFunctions::mockableEnvValues, &mockableEnvs); ret = evr->getSetting("TestingVariable", defaultValue); EXPECT_EQ(1u, IoFunctions::mockGetenvCalled); EXPECT_EQ(expectedValue, ret); } { VariableBackup mockGetenvCalledBackup(&IoFunctions::mockGetenvCalled, 0); std::unordered_map mockableEnvs = {}; VariableBackup *> mockableEnvValuesBackup(&IoFunctions::mockableEnvValues, &mockableEnvs); ret = evr->getSetting("TestingVariable", defaultValue); EXPECT_EQ(1u, IoFunctions::mockGetenvCalled); EXPECT_EQ(defaultValue, ret); } } TEST_F(DebugEnvReaderTests, WhenSettingAppSpecificLocationThenLocationIsReturned) { std::string appSpecific; appSpecific = "cl_cache_dir"; EXPECT_EQ(appSpecific, evr->appSpecificLocation(appSpecific)); } TEST_F(DebugEnvReaderTests, givenEnvironmentVariableReaderWhenCreateOsReaderWithStringThenNotNullPointer) { std::unique_ptr evr(SettingsReader::createOsReader(false, "")); EXPECT_NE(nullptr, evr); } } // namespace NEO device_command_stream_fixture_context.cpp000066400000000000000000000041761422164147700357060ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/os_interface/linux/device_command_stream_fixture_context.h" #include "third_party/uapi/prelim/drm/i915_drm.h" int DrmMockCustomPrelimContext::ioctlExtra(unsigned long request, void *arg) { switch (request) { case PRELIM_DRM_IOCTL_I915_GEM_CREATE_EXT: { auto createExtParams = reinterpret_cast(arg); createExtSize = createExtParams->size; createExtHandle = createExtParams->handle; createExtExtensions = createExtParams->extensions; } break; case PRELIM_DRM_IOCTL_I915_GEM_VM_BIND: { } break; case PRELIM_DRM_IOCTL_I915_GEM_VM_UNBIND: { } break; case PRELIM_DRM_IOCTL_I915_GEM_WAIT_USER_FENCE: { const auto wait = reinterpret_cast(arg); receivedGemWaitUserFence = WaitUserFence{ wait->extensions, wait->addr, wait->ctx_id, wait->op, wait->flags, wait->value, wait->mask, wait->timeout, }; gemWaitUserFenceCalled++; } break; default: { std::cout << std::hex << DRM_IOCTL_I915_GEM_WAIT << std::endl; std::cout << "unexpected IOCTL: " << std::hex << request << std::endl; UNRECOVERABLE_IF(true); } break; } return 0; } void DrmMockCustomPrelimContext::execBufferExtensions(void *arg) { const auto execbuf = reinterpret_cast(arg); if ((execbuf->flags | I915_EXEC_USE_EXTENSIONS) && (execbuf->cliprects_ptr != 0)) { i915_user_extension *base = reinterpret_cast(execbuf->cliprects_ptr); if (base->name == PRELIM_DRM_I915_GEM_EXECBUFFER_EXT_USER_FENCE) { prelim_drm_i915_gem_execbuffer_ext_user_fence *userFenceExt = reinterpret_cast(execbuf->cliprects_ptr); this->completionAddress = userFenceExt->addr; this->completionValue = userFenceExt->value; } } } device_command_stream_fixture_context.h000066400000000000000000000013101422164147700353360ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/common/libult/linux/drm_mock_prelim_context.h" struct DrmMockCustomPrelimContext { //PRELIM_DRM_IOCTL_I915_GEM_CREATE_EXT uint64_t createExtSize = 0; uint32_t createExtHandle = 0; uint64_t createExtExtensions = 0; //PRELIM_DRM_IOCTL_I915_GEM_WAIT_USER_FENCE WaitUserFence receivedGemWaitUserFence{}; uint32_t gemWaitUserFenceCalled = 0; //PRELIM_DRM_I915_GEM_EXECBUFFER_EXT_USER_FENCE uint64_t completionAddress = 0; uint64_t completionValue = 0; int ioctlExtra(unsigned long request, void *arg); void execBufferExtensions(void *arg); }; device_command_stream_fixture_impl.h000066400000000000000000000035021422164147700346200ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/common/os_interface/linux/device_command_stream_fixture.h" class DrmMockCustomImpl : public DrmMockCustom { public: using Drm::memoryInfo; class Ioctls { public: void reset() { gemCreateExt = 0; gemMmapOffset = 0; } std::atomic gemCreateExt; std::atomic gemMmapOffset; }; Ioctls ioctlImpl_cnt; Ioctls ioctlImpl_expected; void testIoctls() { #define NEO_IOCTL_EXPECT_EQ(PARAM) \ if (this->ioctlImpl_expected.PARAM >= 0) { \ EXPECT_EQ(this->ioctlImpl_expected.PARAM, this->ioctlImpl_cnt.PARAM); \ } NEO_IOCTL_EXPECT_EQ(gemMmapOffset); #undef NEO_IOCTL_EXPECT_EQ } //DRM_IOCTL_I915_GEM_CREATE_EXT __u64 createExtSize = 0; __u32 createExtHandle = 0; __u64 createExtExtensions = 0; int ioctlExtra(unsigned long request, void *arg) override { switch (request) { case DRM_IOCTL_I915_GEM_CREATE_EXT: { auto createExtParams = reinterpret_cast(arg); createExtSize = createExtParams->size; createExtHandle = createExtParams->handle; createExtExtensions = createExtParams->extensions; ioctlImpl_cnt.gemCreateExt++; } break; default: { std::cout << "unexpected IOCTL: " << std::hex << request << std::endl; UNRECOVERABLE_IF(true); } break; } return 0; } DrmMockCustomImpl(RootDeviceEnvironment &rootDeviceEnvironment) : DrmMockCustom(rootDeviceEnvironment) { ioctlImpl_cnt.reset(); ioctlImpl_expected.reset(); } }; device_command_stream_fixture_prelim.h000066400000000000000000000016711422164147700351540ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/common/os_interface/linux/device_command_stream_fixture.h" #include "opencl/test/unit_test/os_interface/linux/device_command_stream_fixture_context.h" class DrmMockCustomPrelim : public DrmMockCustom { public: using Drm::cacheInfo; using Drm::ioctlHelper; using Drm::memoryInfo; DrmMockCustomPrelim(RootDeviceEnvironment &rootDeviceEnvironment) : DrmMockCustom(rootDeviceEnvironment) { setupIoctlHelper(IGFX_UNKNOWN); } void getPrelimVersion(std::string &prelimVersion) override { prelimVersion = "2.0"; } int ioctlExtra(unsigned long request, void *arg) override { return context.ioctlExtra(request, arg); } void execBufferExtensions(void *arg) override { return context.execBufferExtensions(arg); } DrmMockCustomPrelimContext context{}; }; compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/device_command_stream_tests.cpp000066400000000000000000000130401422164147700336630ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/aub_command_stream_receiver.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/device_command_stream.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/os_interface/linux/device_command_stream.inl" #include "shared/source/os_interface/linux/drm_command_stream.h" #include "shared/source/os_interface/linux/drm_neo.h" #include "shared/source/os_interface/os_interface.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/execution_environment_helper.h" #include "shared/test/common/os_interface/linux/device_command_stream_fixture.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/fixtures/mock_aub_center_fixture.h" #include "gtest/gtest.h" #include using namespace NEO; struct DeviceCommandStreamLeaksTest : ::testing::Test { void SetUp() override { HardwareInfo *hwInfo = nullptr; executionEnvironment = getExecutionEnvironmentImpl(hwInfo, 1); executionEnvironment->incRefInternal(); MockAubCenterFixture::setMockAubCenter(*executionEnvironment->rootDeviceEnvironments[0]); } void TearDown() override { executionEnvironment->decRefInternal(); } ExecutionEnvironment *executionEnvironment; }; HWTEST_F(DeviceCommandStreamLeaksTest, WhenCreatingDeviceCsrThenValidPointerIsReturned) { std::unique_ptr ptr(DeviceCommandStreamReceiver::create(false, *executionEnvironment, 0, 1)); DrmMockSuccess mockDrm(mockFd, *executionEnvironment->rootDeviceEnvironments[0]); EXPECT_NE(nullptr, ptr); } HWTEST_F(DeviceCommandStreamLeaksTest, givenDefaultDrmCsrWhenItIsCreatedThenGemCloseWorkerInactiveModeIsSelected) { std::unique_ptr ptr(DeviceCommandStreamReceiver::create(false, *executionEnvironment, 0, 1)); auto drmCsr = (DrmCommandStreamReceiver *)ptr.get(); EXPECT_EQ(drmCsr->peekGemCloseWorkerOperationMode(), gemCloseWorkerMode::gemCloseWorkerActive); } HWTEST_F(DeviceCommandStreamLeaksTest, givenDefaultDrmCsrWithAubDumWhenItIsCreatedThenGemCloseWorkerInactiveModeIsSelected) { std::unique_ptr ptr(DeviceCommandStreamReceiver::create(true, *executionEnvironment, 0, 1)); auto drmCsrWithAubDump = (CommandStreamReceiverWithAUBDump> *)ptr.get(); EXPECT_EQ(drmCsrWithAubDump->peekGemCloseWorkerOperationMode(), gemCloseWorkerMode::gemCloseWorkerActive); auto aubCSR = static_cast> *>(ptr.get())->aubCSR.get(); EXPECT_NE(nullptr, aubCSR); } HWTEST_F(DeviceCommandStreamLeaksTest, givenDefaultDrmCsrWhenOsInterfaceIsNullptrThenValidateDrm) { std::unique_ptr ptr(DeviceCommandStreamReceiver::create(false, *executionEnvironment, 0, 1)); auto drmCsr = (DrmCommandStreamReceiver *)ptr.get(); EXPECT_NE(nullptr, executionEnvironment->rootDeviceEnvironments[0]->osInterface); auto expected = drmCsr->getOSInterface()->getDriverModel()->template as(); auto got = executionEnvironment->rootDeviceEnvironments[0]->osInterface->getDriverModel()->as(); EXPECT_EQ(expected, got); } HWTEST_F(DeviceCommandStreamLeaksTest, givenDisabledGemCloseWorkerWhenCsrIsCreatedThenGemCloseWorkerInactiveModeIsSelected) { DebugManagerStateRestore restorer; DebugManager.flags.EnableGemCloseWorker.set(0u); std::unique_ptr ptr(DeviceCommandStreamReceiver::create(false, *executionEnvironment, 0, 1)); auto drmCsr = (DrmCommandStreamReceiver *)ptr.get(); EXPECT_EQ(drmCsr->peekGemCloseWorkerOperationMode(), gemCloseWorkerMode::gemCloseWorkerInactive); } HWTEST_F(DeviceCommandStreamLeaksTest, givenEnabledGemCloseWorkerWhenCsrIsCreatedThenGemCloseWorkerActiveModeIsSelected) { DebugManagerStateRestore restorer; DebugManager.flags.EnableGemCloseWorker.set(1u); std::unique_ptr ptr(DeviceCommandStreamReceiver::create(false, *executionEnvironment, 0, 1)); auto drmCsr = (DrmCommandStreamReceiver *)ptr.get(); EXPECT_EQ(drmCsr->peekGemCloseWorkerOperationMode(), gemCloseWorkerMode::gemCloseWorkerActive); } HWTEST_F(DeviceCommandStreamLeaksTest, givenDefaultGemCloseWorkerWhenCsrIsCreatedThenGemCloseWorkerActiveModeIsSelected) { std::unique_ptr ptr(DeviceCommandStreamReceiver::create(false, *executionEnvironment, 0, 1)); auto drmCsr = (DrmCommandStreamReceiver *)ptr.get(); EXPECT_EQ(drmCsr->peekGemCloseWorkerOperationMode(), gemCloseWorkerMode::gemCloseWorkerActive); } using DeviceCommandStreamSetInternalUsageTests = DeviceCommandStreamLeaksTest; HWTEST_F(DeviceCommandStreamSetInternalUsageTests, givenValidDrmCsrThenGemCloseWorkerOperationModeIsSetToInactiveWhenInternalUsageIsSet) { std::unique_ptr ptr(DeviceCommandStreamReceiver::create(false, *executionEnvironment, 0, 1)); auto drmCsr = (DrmCommandStreamReceiver *)ptr.get(); EXPECT_EQ(drmCsr->peekGemCloseWorkerOperationMode(), gemCloseWorkerMode::gemCloseWorkerActive); drmCsr->initializeDefaultsForInternalEngine(); EXPECT_EQ(drmCsr->peekGemCloseWorkerOperationMode(), gemCloseWorkerMode::gemCloseWorkerInactive); } compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/device_os_tests.cpp000066400000000000000000000116041422164147700313170ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/source/helpers/get_info.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "opencl/source/api/api.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "gtest/gtest.h" using namespace ::testing; namespace NEO { TEST(DeviceOsTest, GivenDefaultClDeviceWhenCheckingForOsSpecificExtensionsThenCorrectExtensionsAreSet) { auto hwInfo = defaultHwInfo.get(); auto pDevice = MockDevice::createWithNewExecutionEnvironment(hwInfo); auto pClDevice = new ClDevice{*pDevice, platform()}; std::string extensionString(pClDevice->getDeviceInfo().deviceExtensions); EXPECT_FALSE(hasSubstr(extensionString, std::string("cl_intel_dx9_media_sharing "))); EXPECT_FALSE(hasSubstr(extensionString, std::string("cl_khr_dx9_media_sharing "))); EXPECT_FALSE(hasSubstr(extensionString, std::string("cl_khr_d3d10_sharing "))); EXPECT_FALSE(hasSubstr(extensionString, std::string("cl_khr_d3d11_sharing "))); EXPECT_FALSE(hasSubstr(extensionString, std::string("cl_intel_d3d11_nv12_media_sharing "))); EXPECT_FALSE(hasSubstr(extensionString, std::string("cl_intel_simultaneous_sharing "))); delete pClDevice; } TEST(DeviceOsTest, WhenDeviceIsCreatedThenSimultaneousInteropsIsSupported) { auto pDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); std::vector expected = {0}; EXPECT_TRUE(pDevice->simultaneousInterops == expected); } TEST(DeviceOsTest, GivenFailedDeviceWhenCreatingDeviceThenNullIsReturned) { auto hwInfo = defaultHwInfo.get(); auto pDevice = MockDevice::createWithNewExecutionEnvironment(hwInfo); EXPECT_EQ(nullptr, pDevice); } TEST(ApiOsTest, GivenUnupportedApiTokensWhenGettingInfoThenInvalidValueErrorIsReturned) { MockContext context; MockBuffer buffer; cl_bool boolVal; size_t size; auto retVal = context.getInfo(CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR, sizeof(cl_bool), &boolVal, &size); EXPECT_EQ(CL_INVALID_VALUE, retVal); void *paramVal = nullptr; retVal = buffer.getMemObjectInfo(CL_MEM_D3D10_RESOURCE_KHR, sizeof(void *), paramVal, &size); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST(ApiOsTest, GivenUnsupportedApiWhenGettingDispatchThenNullIsReturned) { MockContext context; EXPECT_EQ(nullptr, context.dispatch.crtDispatch->clGetDeviceIDsFromDX9INTEL); EXPECT_EQ(nullptr, context.dispatch.crtDispatch->clCreateFromDX9MediaSurfaceINTEL); EXPECT_EQ(nullptr, context.dispatch.crtDispatch->clEnqueueAcquireDX9ObjectsINTEL); EXPECT_EQ(nullptr, context.dispatch.crtDispatch->clEnqueueReleaseDX9ObjectsINTEL); EXPECT_EQ(nullptr, context.dispatch.icdDispatch->clGetDeviceIDsFromDX9MediaAdapterKHR); EXPECT_EQ(nullptr, context.dispatch.icdDispatch->clCreateFromDX9MediaSurfaceKHR); EXPECT_EQ(nullptr, context.dispatch.icdDispatch->clEnqueueAcquireDX9MediaSurfacesKHR); EXPECT_EQ(nullptr, context.dispatch.icdDispatch->clEnqueueReleaseDX9MediaSurfacesKHR); EXPECT_EQ(nullptr, context.dispatch.icdDispatch->clGetDeviceIDsFromD3D10KHR); EXPECT_EQ(nullptr, context.dispatch.icdDispatch->clCreateFromD3D10BufferKHR); EXPECT_EQ(nullptr, context.dispatch.icdDispatch->clCreateFromD3D10Texture2DKHR); EXPECT_EQ(nullptr, context.dispatch.icdDispatch->clCreateFromD3D10Texture3DKHR); EXPECT_EQ(nullptr, context.dispatch.icdDispatch->clEnqueueAcquireD3D10ObjectsKHR); EXPECT_EQ(nullptr, context.dispatch.icdDispatch->clEnqueueReleaseD3D10ObjectsKHR); EXPECT_EQ(nullptr, context.dispatch.icdDispatch->clGetDeviceIDsFromD3D11KHR); EXPECT_EQ(nullptr, context.dispatch.icdDispatch->clCreateFromD3D11BufferKHR); EXPECT_EQ(nullptr, context.dispatch.icdDispatch->clCreateFromD3D11Texture2DKHR); EXPECT_EQ(nullptr, context.dispatch.icdDispatch->clCreateFromD3D11Texture3DKHR); EXPECT_EQ(nullptr, context.dispatch.icdDispatch->clEnqueueAcquireD3D11ObjectsKHR); EXPECT_EQ(nullptr, context.dispatch.icdDispatch->clEnqueueReleaseD3D11ObjectsKHR); } TEST(DeviceOsTest, GivenMidThreadPreemptionAndFailedDeviceWhenCreatingDeviceThenNullIsReturned) { DebugManagerStateRestore dbgRestore; DebugManager.flags.ForcePreemptionMode.set(static_cast(PreemptionMode::MidThread)); auto pDevice = MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()); EXPECT_EQ(nullptr, pDevice); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/driver_info_tests.cpp000066400000000000000000000036721422164147700316730ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_info.h" #include "shared/source/os_interface/driver_info.h" #include "shared/test/common/helpers/default_hw_info.h" #include "gtest/gtest.h" #include #include namespace NEO { TEST(DriverInfo, GivenUninitializedHardwareInfoWhenCreateDriverInfoLinuxThenReturnNull) { std::unique_ptr driverInfo(DriverInfo::create(nullptr, nullptr)); EXPECT_EQ(nullptr, driverInfo.get()); } TEST(DriverInfo, GivenCreateDriverInfoWhenLinuxThenReturnNewInstance) { auto hwInfo = *defaultHwInfo; std::unique_ptr driverInfo(DriverInfo::create(&hwInfo, nullptr)); EXPECT_NE(nullptr, driverInfo.get()); } TEST(DriverInfo, GivenDriverInfoWhenLinuxThenReturnDefault) { auto hwInfo = *defaultHwInfo; std::unique_ptr driverInfo(DriverInfo::create(&hwInfo, nullptr)); std::string defaultName = "testName"; std::string defaultVersion = "testVersion"; auto resultName = driverInfo.get()->getDeviceName(defaultName); auto resultVersion = driverInfo.get()->getVersion(defaultVersion); EXPECT_STREQ(defaultName.c_str(), resultName.c_str()); EXPECT_STREQ(defaultVersion.c_str(), resultVersion.c_str()); } TEST(DriverInfo, givenGetMediaSharingSupportWhenLinuxThenReturnTrue) { auto hwInfo = *defaultHwInfo; std::unique_ptr driverInfo(DriverInfo::create(&hwInfo, nullptr)); EXPECT_TRUE(driverInfo->getMediaSharingSupport()); } TEST(DriverInfo, givenGetImageSupportWhenHwInfoSupportsImagesThenReturnTrueOtherwiseFalse) { auto hwInfo = *defaultHwInfo; for (bool supportsImages : {false, true}) { hwInfo.capabilityTable.supportsImages = supportsImages; std::unique_ptr driverInfo(DriverInfo::create(&hwInfo, nullptr)); EXPECT_EQ(supportsImages, driverInfo->getImageSupport()); } } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/drm_buffer_object_tests.cpp000066400000000000000000000630041422164147700330210ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/os_interface.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/libult/linux/drm_mock.h" #include "shared/test/common/mocks/linux/mock_drm_allocation.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_gmm_helper.h" #include "shared/test/common/os_interface/linux/device_command_stream_fixture.h" #include "shared/test/common/os_interface/linux/drm_buffer_object_fixture.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" using namespace NEO; using DrmMockBufferObjectFixture = DrmBufferObjectFixture; using DrmBufferObjectTest = Test; TEST_F(DrmBufferObjectTest, WhenCallingExecThenReturnIsCorrect) { mock->ioctl_expected.total = 1; mock->ioctl_res = 0; drm_i915_gem_exec_object2 execObjectsStorage = {}; auto ret = bo->exec(0, 0, 0, false, osContext.get(), 0, 1, nullptr, 0u, &execObjectsStorage, 0, 0); EXPECT_EQ(mock->ioctl_res, ret); EXPECT_EQ(0u, mock->execBuffer.flags); } TEST_F(DrmBufferObjectTest, GivenInvalidParamsWhenCallingExecThenEfaultIsReturned) { mock->ioctl_expected.total = 3; mock->ioctl_res = -1; mock->errnoValue = EFAULT; drm_i915_gem_exec_object2 execObjectsStorage = {}; EXPECT_EQ(EFAULT, bo->exec(0, 0, 0, false, osContext.get(), 0, 1, nullptr, 0u, &execObjectsStorage, 0, 0)); } TEST_F(DrmBufferObjectTest, WhenSettingTilingThenCallSucceeds) { mock->ioctl_expected.total = 1; //set_tiling auto ret = bo->setTiling(I915_TILING_X, 0); EXPECT_TRUE(ret); } TEST_F(DrmBufferObjectTest, WhenSettingSameTilingThenCallSucceeds) { mock->ioctl_expected.total = 0; //set_tiling bo->tileBy(I915_TILING_X); auto ret = bo->setTiling(I915_TILING_X, 0); EXPECT_TRUE(ret); } TEST_F(DrmBufferObjectTest, GivenInvalidTilingWhenSettingTilingThenCallFails) { mock->ioctl_expected.total = 1; //set_tiling mock->ioctl_res = -1; auto ret = bo->setTiling(I915_TILING_X, 0); EXPECT_FALSE(ret); } TEST_F(DrmBufferObjectTest, givenBindAvailableWhenCallWaitThenNoIoctlIsCalled) { mock->bindAvailable = true; mock->ioctl_expected.total = 0; auto ret = bo->wait(-1); EXPECT_FALSE(ret); } TEST_F(DrmBufferObjectTest, givenAddressThatWhenSizeIsAddedCrosses32BitBoundaryWhenExecIsCalledThen48BitFlagIsSet) { drm_i915_gem_exec_object2 execObject; memset(&execObject, 0, sizeof(execObject)); bo->setAddress(((uint64_t)1u << 32) - 0x1000u); bo->setSize(0x1000); bo->fillExecObject(execObject, osContext.get(), 0, 1); //base address + size > size of 32bit address space EXPECT_TRUE(execObject.flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS); } TEST_F(DrmBufferObjectTest, givenAddressThatWhenSizeIsAddedWithin32BitBoundaryWhenExecIsCalledThen48BitFlagSet) { drm_i915_gem_exec_object2 execObject; memset(&execObject, 0, sizeof(execObject)); bo->setAddress(((uint64_t)1u << 32) - 0x1000u); bo->setSize(0xFFF); bo->fillExecObject(execObject, osContext.get(), 0, 1); //base address + size < size of 32bit address space EXPECT_TRUE(execObject.flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS); } TEST_F(DrmBufferObjectTest, whenExecFailsThenPinFails) { std::unique_ptr buff(new uint32_t[1024]); mock->ioctl_expected.total = 3; mock->ioctl_res = -1; this->mock->errnoValue = EINVAL; std::unique_ptr boToPin(new TestedBufferObject(this->mock.get())); ASSERT_NE(nullptr, boToPin.get()); bo->setAddress(reinterpret_cast(buff.get())); BufferObject *boArray[1] = {boToPin.get()}; auto ret = bo->pin(boArray, 1, osContext.get(), 0, 1); EXPECT_EQ(EINVAL, ret); } TEST_F(DrmBufferObjectTest, whenExecFailsThenValidateHostPtrFails) { std::unique_ptr buff(new uint32_t[1024]); mock->ioctl_expected.total = 3; mock->ioctl_res = -1; this->mock->errnoValue = EINVAL; std::unique_ptr boToPin(new TestedBufferObject(this->mock.get())); ASSERT_NE(nullptr, boToPin.get()); bo->setAddress(reinterpret_cast(buff.get())); BufferObject *boArray[1] = {boToPin.get()}; auto ret = bo->validateHostPtr(boArray, 1, osContext.get(), 0, 1); EXPECT_EQ(EINVAL, ret); } TEST_F(DrmBufferObjectTest, givenResidentBOWhenPrintExecutionBufferIsSetToTrueThenDebugInformationAboutBOIsPrinted) { mock->ioctl_expected.total = 1; DebugManagerStateRestore restore; DebugManager.flags.PrintExecutionBuffer.set(true); std::unique_ptr buff(new uint32_t[1024]); std::unique_ptr bo(new TestedBufferObject(this->mock.get())); ASSERT_NE(nullptr, bo.get()); bo->setAddress(reinterpret_cast(buff.get())); BufferObject *boArray[1] = {bo.get()}; testing::internal::CaptureStdout(); auto ret = bo->pin(boArray, 1, osContext.get(), 0, 1); EXPECT_EQ(0, ret); std::string output = testing::internal::GetCapturedStdout(); auto idx = output.find("drm_i915_gem_execbuffer2 {"); size_t expectedValue = 29; EXPECT_EQ(expectedValue, idx); idx = output.find("Buffer Object = { handle: BO-"); EXPECT_NE(std::string::npos, idx); idx = output.find("Command Buffer Object = { handle: BO-"); EXPECT_NE(std::string::npos, idx); } TEST_F(DrmBufferObjectTest, whenPrintBOCreateDestroyResultFlagIsSetAndCloseIsCalledOnBOThenDebugInfromationIsPrinted) { mock->ioctl_expected.total = 1; DebugManagerStateRestore stateRestore; DebugManager.flags.PrintBOCreateDestroyResult.set(true); testing::internal::CaptureStdout(); bool result = bo->close(); EXPECT_EQ(true, result); std::string output = testing::internal::GetCapturedStdout(); size_t idx = output.find("Calling gem close on handle: BO-"); size_t expectedValue = 0; EXPECT_EQ(expectedValue, idx); } TEST_F(DrmBufferObjectTest, whenPrintExecutionBufferIsSetToTrueThenMessageFoundInStdStream) { mock->ioctl_expected.total = 1; DebugManagerStateRestore restore; DebugManager.flags.PrintExecutionBuffer.set(true); drm_i915_gem_exec_object2 execObjectsStorage = {}; testing::internal::CaptureStdout(); auto ret = bo->exec(0, 0, 0, false, osContext.get(), 0, 1, nullptr, 0u, &execObjectsStorage, 0, 0); EXPECT_EQ(0, ret); std::string output = testing::internal::GetCapturedStdout(); auto idx = output.find("drm_i915_gem_execbuffer2 {"); size_t expectedValue = 29; EXPECT_EQ(expectedValue, idx); } TEST(DrmBufferObjectSimpleTest, givenInvalidBoWhenValidateHostptrIsCalledThenErrorIsReturned) { std::unique_ptr buff(new uint32_t[256]); MockExecutionEnvironment executionEnvironment; std::unique_ptr mock(new DrmMockCustom(*executionEnvironment.rootDeviceEnvironments[0])); executionEnvironment.rootDeviceEnvironments[0]->memoryOperationsInterface = DrmMemoryOperationsHandler::create(*mock.get(), 0u); OsContextLinux osContext(*mock, 0u, EngineDescriptorHelper::getDefaultDescriptor()); ASSERT_NE(nullptr, mock.get()); std::unique_ptr bo(new TestedBufferObject(mock.get())); ASSERT_NE(nullptr, bo.get()); // fail DRM_IOCTL_I915_GEM_EXECBUFFER2 in pin mock->ioctl_res = -1; std::unique_ptr boToPin(new TestedBufferObject(mock.get())); ASSERT_NE(nullptr, boToPin.get()); bo->setAddress(reinterpret_cast(buff.get())); mock->errnoValue = EFAULT; BufferObject *boArray[1] = {boToPin.get()}; auto ret = bo->pin(boArray, 1, &osContext, 0, 1); EXPECT_EQ(EFAULT, ret); mock->ioctl_res = 0; } TEST(DrmBufferObjectSimpleTest, givenInvalidBoWhenPinIsCalledThenErrorIsReturned) { std::unique_ptr buff(new uint32_t[256]); MockExecutionEnvironment executionEnvironment; std::unique_ptr mock(new DrmMockCustom(*executionEnvironment.rootDeviceEnvironments[0])); executionEnvironment.rootDeviceEnvironments[0]->memoryOperationsInterface = DrmMemoryOperationsHandler::create(*mock.get(), 0u); OsContextLinux osContext(*mock, 0u, EngineDescriptorHelper::getDefaultDescriptor()); ASSERT_NE(nullptr, mock.get()); std::unique_ptr bo(new TestedBufferObject(mock.get())); ASSERT_NE(nullptr, bo.get()); // fail DRM_IOCTL_I915_GEM_EXECBUFFER2 in pin mock->ioctl_res = -1; std::unique_ptr boToPin(new TestedBufferObject(mock.get())); ASSERT_NE(nullptr, boToPin.get()); bo->setAddress(reinterpret_cast(buff.get())); mock->errnoValue = EFAULT; BufferObject *boArray[1] = {boToPin.get()}; auto ret = bo->validateHostPtr(boArray, 1, &osContext, 0, 1); EXPECT_EQ(EFAULT, ret); mock->ioctl_res = 0; } TEST(DrmBufferObjectSimpleTest, givenBufferObjectWhenConstructedWithASizeThenTheSizeIsInitialized) { MockExecutionEnvironment executionEnvironment; std::unique_ptr drmMock(new DrmMockCustom(*executionEnvironment.rootDeviceEnvironments[0])); std::unique_ptr bo(new BufferObject(drmMock.get(), 1, 0x1000, 1)); EXPECT_EQ(0x1000u, bo->peekSize()); } TEST(DrmBufferObjectSimpleTest, givenArrayOfBosWhenPinnedThenAllBosArePinned) { std::unique_ptr buff(new uint32_t[256]); MockExecutionEnvironment executionEnvironment; std::unique_ptr mock(new DrmMockCustom(*executionEnvironment.rootDeviceEnvironments[0])); ASSERT_NE(nullptr, mock.get()); OsContextLinux osContext(*mock, 0u, EngineDescriptorHelper::getDefaultDescriptor()); std::unique_ptr bo(new TestedBufferObject(mock.get())); ASSERT_NE(nullptr, bo.get()); mock->ioctl_res = 0; std::unique_ptr boToPin(new TestedBufferObject(mock.get())); std::unique_ptr boToPin2(new TestedBufferObject(mock.get())); std::unique_ptr boToPin3(new TestedBufferObject(mock.get())); ASSERT_NE(nullptr, boToPin.get()); ASSERT_NE(nullptr, boToPin2.get()); ASSERT_NE(nullptr, boToPin3.get()); BufferObject *array[3] = {boToPin.get(), boToPin2.get(), boToPin3.get()}; bo->setAddress(reinterpret_cast(buff.get())); auto ret = bo->pin(array, 3, &osContext, 0, 1); EXPECT_EQ(mock->ioctl_res, ret); EXPECT_LT(0u, mock->execBuffer.batch_len); EXPECT_EQ(4u, mock->execBuffer.buffer_count); // 3 bos to pin plus 1 exec bo EXPECT_EQ(reinterpret_cast(boToPin->execObjectPointerFilled), mock->execBuffer.buffers_ptr); EXPECT_NE(nullptr, boToPin2->execObjectPointerFilled); EXPECT_NE(nullptr, boToPin3->execObjectPointerFilled); bo->setAddress(0llu); } TEST(DrmBufferObjectSimpleTest, givenArrayOfBosWhenValidatedThenAllBosArePinned) { std::unique_ptr buff(new uint32_t[256]); MockExecutionEnvironment executionEnvironment; std::unique_ptr mock(new DrmMockCustom(*executionEnvironment.rootDeviceEnvironments[0])); ASSERT_NE(nullptr, mock.get()); OsContextLinux osContext(*mock, 0u, EngineDescriptorHelper::getDefaultDescriptor()); std::unique_ptr bo(new TestedBufferObject(mock.get())); ASSERT_NE(nullptr, bo.get()); mock->ioctl_res = 0; std::unique_ptr boToPin(new TestedBufferObject(mock.get())); std::unique_ptr boToPin2(new TestedBufferObject(mock.get())); std::unique_ptr boToPin3(new TestedBufferObject(mock.get())); ASSERT_NE(nullptr, boToPin.get()); ASSERT_NE(nullptr, boToPin2.get()); ASSERT_NE(nullptr, boToPin3.get()); BufferObject *array[3] = {boToPin.get(), boToPin2.get(), boToPin3.get()}; bo->setAddress(reinterpret_cast(buff.get())); auto ret = bo->validateHostPtr(array, 3, &osContext, 0, 1); EXPECT_EQ(mock->ioctl_res, ret); EXPECT_LT(0u, mock->execBuffer.batch_len); EXPECT_EQ(4u, mock->execBuffer.buffer_count); // 3 bos to pin plus 1 exec bo EXPECT_EQ(reinterpret_cast(boToPin->execObjectPointerFilled), mock->execBuffer.buffers_ptr); EXPECT_NE(nullptr, boToPin2->execObjectPointerFilled); EXPECT_NE(nullptr, boToPin3->execObjectPointerFilled); bo->setAddress(0llu); } TEST_F(DrmBufferObjectTest, givenDeleterWhenBufferObjectIsCreatedAndDeletedThenCloseIsCalled) { mock->ioctl_cnt.reset(); mock->ioctl_expected.reset(); { std::unique_ptr bo(new BufferObject(mock.get(), 1, 0x1000, 1)); } EXPECT_EQ(1, mock->ioctl_cnt.gemClose); mock->ioctl_cnt.reset(); mock->ioctl_expected.reset(); } TEST(DrmBufferObject, givenPerContextVmRequiredWhenBoCreatedThenBindInfoIsInitializedToOsContextCount) { auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); device->getRootDeviceEnvironment().executionEnvironment.setDebuggingEnabled(); device->getExecutionEnvironment()->calculateMaxOsContextCount(); DrmMock drm(*(device->getExecutionEnvironment()->rootDeviceEnvironments[0].get())); EXPECT_TRUE(drm.isPerContextVMRequired()); auto osContextCount = device->getExecutionEnvironment()->memoryManager->getRegisteredEnginesCount(); MockBufferObject bo(&drm, 0, 0, osContextCount); EXPECT_EQ(osContextCount, bo.bindInfo.size()); for (auto &iter : bo.bindInfo) { for (uint32_t i = 0; i < EngineLimits::maxHandleCount; i++) { EXPECT_FALSE(iter[i]); } } } TEST(DrmBufferObject, givenDrmIoctlReturnsErrorNotSupportedThenBufferObjectReturnsError) { auto executionEnvironment = new ExecutionEnvironment; executionEnvironment->setDebuggingEnabled(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); executionEnvironment->calculateMaxOsContextCount(); executionEnvironment->rootDeviceEnvironments[0]->osInterface = std::make_unique(); DrmMockReturnErrorNotSupported *drm = new DrmMockReturnErrorNotSupported(*executionEnvironment->rootDeviceEnvironments[0]); executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel(std::unique_ptr(drm)); executionEnvironment->rootDeviceEnvironments[0]->memoryOperationsInterface = DrmMemoryOperationsHandler::create(*drm, 0u); std::unique_ptr device(MockDevice::createWithExecutionEnvironment(defaultHwInfo.get(), executionEnvironment, 0)); auto osContextCount = device->getExecutionEnvironment()->memoryManager->getRegisteredEnginesCount(); MockBufferObject bo(drm, 0, 0, osContextCount); std::unique_ptr osContext; osContext.reset(new OsContextLinux(*drm, 0u, EngineDescriptorHelper::getDefaultDescriptor())); drm_i915_gem_exec_object2 execObjectsStorage = {}; auto ret = bo.exec(0, 0, 0, false, osContext.get(), 0, 1, nullptr, 0u, &execObjectsStorage, 0, 0); EXPECT_NE(0, ret); } TEST(DrmBufferObject, givenPerContextVmRequiredWhenBoBoundAndUnboundThenCorrectBindInfoIsUpdated) { auto executionEnvironment = new ExecutionEnvironment; executionEnvironment->setDebuggingEnabled(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); executionEnvironment->calculateMaxOsContextCount(); executionEnvironment->rootDeviceEnvironments[0]->osInterface = std::make_unique(); DrmMockNonFailing *drm = new DrmMockNonFailing(*executionEnvironment->rootDeviceEnvironments[0]); EXPECT_TRUE(drm->isPerContextVMRequired()); executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel(std::unique_ptr(drm)); executionEnvironment->rootDeviceEnvironments[0]->memoryOperationsInterface = DrmMemoryOperationsHandler::create(*drm, 0u); std::unique_ptr device(MockDevice::createWithExecutionEnvironment(defaultHwInfo.get(), executionEnvironment, 0)); auto osContextCount = device->getExecutionEnvironment()->memoryManager->getRegisteredEnginesCount(); MockBufferObject bo(drm, 0, 0, osContextCount); EXPECT_EQ(osContextCount, bo.bindInfo.size()); auto contextId = device->getExecutionEnvironment()->memoryManager->getRegisteredEnginesCount() / 2; auto osContext = device->getExecutionEnvironment()->memoryManager->getRegisteredEngines()[contextId].osContext; osContext->ensureContextInitialized(); bo.bind(osContext, 0); EXPECT_TRUE(bo.bindInfo[contextId][0]); bo.unbind(osContext, 0); EXPECT_FALSE(bo.bindInfo[contextId][0]); } TEST(DrmBufferObject, givenPrintBOBindingResultWhenBOBindAndUnbindSucceedsThenPrintDebugInformationAboutBOBindingResult) { struct DrmMockToSucceedBindBufferObject : public DrmMock { DrmMockToSucceedBindBufferObject(RootDeviceEnvironment &rootDeviceEnvironment) : DrmMock(rootDeviceEnvironment) {} int bindBufferObject(OsContext *osContext, uint32_t vmHandleId, BufferObject *bo) override { return 0; } int unbindBufferObject(OsContext *osContext, uint32_t vmHandleId, BufferObject *bo) override { return 0; } }; DebugManagerStateRestore restore; DebugManager.flags.PrintBOBindingResult.set(true); auto executionEnvironment = new ExecutionEnvironment; executionEnvironment->setDebuggingEnabled(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); executionEnvironment->calculateMaxOsContextCount(); executionEnvironment->rootDeviceEnvironments[0]->osInterface = std::make_unique(); auto drm = new DrmMockToSucceedBindBufferObject(*executionEnvironment->rootDeviceEnvironments[0]); executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel(std::unique_ptr(drm)); executionEnvironment->rootDeviceEnvironments[0]->memoryOperationsInterface = DrmMemoryOperationsHandler::create(*drm, 0u); std::unique_ptr device(MockDevice::createWithExecutionEnvironment(defaultHwInfo.get(), executionEnvironment, 0)); auto osContextCount = device->getExecutionEnvironment()->memoryManager->getRegisteredEnginesCount(); MockBufferObject bo(drm, 0, 0, osContextCount); EXPECT_EQ(osContextCount, bo.bindInfo.size()); auto contextId = device->getExecutionEnvironment()->memoryManager->getRegisteredEnginesCount() / 2; auto osContext = device->getExecutionEnvironment()->memoryManager->getRegisteredEngines()[contextId].osContext; osContext->ensureContextInitialized(); testing::internal::CaptureStdout(); bo.bind(osContext, 0); EXPECT_TRUE(bo.bindInfo[contextId][0]); std::string bindOutput = testing::internal::GetCapturedStdout(); EXPECT_STREQ(bindOutput.c_str(), "bind BO-0 to VM 0, drmVmId = 1, range: 0 - 0, size: 0, result: 0\n"); testing::internal::CaptureStdout(); bo.unbind(osContext, 0); EXPECT_FALSE(bo.bindInfo[contextId][0]); std::string unbindOutput = testing::internal::GetCapturedStdout(); EXPECT_STREQ(unbindOutput.c_str(), "unbind BO-0 from VM 0, drmVmId = 1, range: 0 - 0, size: 0, result: 0\n"); } TEST(DrmBufferObject, givenPrintBOBindingResultWhenBOBindAndUnbindFailsThenPrintDebugInformationAboutBOBindingResultWithErrno) { struct DrmMockToFailBindBufferObject : public DrmMock { DrmMockToFailBindBufferObject(RootDeviceEnvironment &rootDeviceEnvironment) : DrmMock(rootDeviceEnvironment) {} int bindBufferObject(OsContext *osContext, uint32_t vmHandleId, BufferObject *bo) override { return -1; } int unbindBufferObject(OsContext *osContext, uint32_t vmHandleId, BufferObject *bo) override { return -1; } int getErrno() override { return EINVAL; } }; DebugManagerStateRestore restore; DebugManager.flags.PrintBOBindingResult.set(true); auto executionEnvironment = new ExecutionEnvironment; executionEnvironment->setDebuggingEnabled(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); executionEnvironment->calculateMaxOsContextCount(); executionEnvironment->rootDeviceEnvironments[0]->osInterface = std::make_unique(); auto drm = new DrmMockToFailBindBufferObject(*executionEnvironment->rootDeviceEnvironments[0]); executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel(std::unique_ptr(drm)); executionEnvironment->rootDeviceEnvironments[0]->memoryOperationsInterface = DrmMemoryOperationsHandler::create(*drm, 0u); std::unique_ptr device(MockDevice::createWithExecutionEnvironment(defaultHwInfo.get(), executionEnvironment, 0)); auto osContextCount = device->getExecutionEnvironment()->memoryManager->getRegisteredEnginesCount(); MockBufferObject bo(drm, 0, 0, osContextCount); EXPECT_EQ(osContextCount, bo.bindInfo.size()); auto contextId = device->getExecutionEnvironment()->memoryManager->getRegisteredEnginesCount() / 2; auto osContext = device->getExecutionEnvironment()->memoryManager->getRegisteredEngines()[contextId].osContext; osContext->ensureContextInitialized(); testing::internal::CaptureStderr(); bo.bind(osContext, 0); EXPECT_FALSE(bo.bindInfo[contextId][0]); std::string bindOutput = testing::internal::GetCapturedStderr(); EXPECT_TRUE(hasSubstr(bindOutput, "bind BO-0 to VM 0, drmVmId = 1, range: 0 - 0, size: 0, result: -1, errno: 22")); testing::internal::CaptureStderr(); bo.bindInfo[contextId][0] = true; bo.unbind(osContext, 0); EXPECT_TRUE(bo.bindInfo[contextId][0]); std::string unbindOutput = testing::internal::GetCapturedStderr(); EXPECT_TRUE(hasSubstr(unbindOutput, "unbind BO-0 from VM 0, drmVmId = 1, range: 0 - 0, size: 0, result: -1, errno: 22")); } TEST(DrmBufferObject, whenBindExtHandleAddedThenItIsStored) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMockResources drm(*executionEnvironment->rootDeviceEnvironments[0]); MockBufferObject bo(&drm, 0, 0, 1); bo.addBindExtHandle(4); EXPECT_EQ(1u, bo.bindExtHandles.size()); EXPECT_EQ(4u, bo.bindExtHandles[0]); EXPECT_EQ(1u, bo.getBindExtHandles().size()); EXPECT_EQ(4u, bo.getBindExtHandles()[0]); } TEST(DrmBufferObject, whenMarkForCapturedCalledThenIsMarkedForCaptureReturnsTrue) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMockResources drm(*executionEnvironment->rootDeviceEnvironments[0]); MockBufferObject bo(&drm, 0, 0, 1); EXPECT_FALSE(bo.isMarkedForCapture()); bo.markForCapture(); EXPECT_TRUE(bo.isMarkedForCapture()); } TEST_F(DrmBufferObjectTest, givenBoMarkedForCaptureWhenFillingExecObjectThenCaptureFlagIsSet) { drm_i915_gem_exec_object2 execObject; memset(&execObject, 0, sizeof(execObject)); bo->markForCapture(); bo->setAddress(0x45000); bo->setSize(0x1000); bo->fillExecObject(execObject, osContext.get(), 0, 1); EXPECT_TRUE(execObject.flags & EXEC_OBJECT_CAPTURE); } TEST_F(DrmBufferObjectTest, givenAsyncDebugFlagWhenFillingExecObjectThenFlagIsSet) { drm_i915_gem_exec_object2 execObject; DebugManagerStateRestore restorer; DebugManager.flags.UseAsyncDrmExec.set(1); memset(&execObject, 0, sizeof(execObject)); bo->setAddress(0x45000); bo->setSize(0x1000); bo->fillExecObject(execObject, osContext.get(), 0, 1); EXPECT_TRUE(execObject.flags & EXEC_OBJECT_ASYNC); } TEST_F(DrmBufferObjectTest, given47bitAddressWhenSetThenIsAddressNotCanonized) { VariableBackup backup(&MockGmmHelper::addressWidth, 48); MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); DrmMock drm(*(executionEnvironment.rootDeviceEnvironments[0].get())); uint64_t address = maxNBitValue(47) - maxNBitValue(5); MockBufferObject bo(&drm, 0, 0, 1); bo.setAddress(address); auto boAddress = bo.peekAddress(); EXPECT_EQ(boAddress, address); } TEST_F(DrmBufferObjectTest, given48bitAddressWhenSetThenAddressIsCanonized) { VariableBackup backup(&MockGmmHelper::addressWidth, 48); MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); DrmMock drm(*(executionEnvironment.rootDeviceEnvironments[0].get())); uint64_t address = maxNBitValue(48) - maxNBitValue(5); uint64_t expectedAddress = std::numeric_limits::max() - maxNBitValue(5); MockBufferObject bo(&drm, 0, 0, 1); bo.setAddress(address); auto boAddress = bo.peekAddress(); EXPECT_EQ(boAddress, expectedAddress); } TEST_F(DrmBufferObjectTest, givenBoIsCreatedWhenPageFaultIsSupportedThenExplicitResidencyIsRequiredByDefault) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); DrmMock drm(*(executionEnvironment.rootDeviceEnvironments[0].get())); for (auto isPageFaultSupported : {false, true}) { drm.pageFaultSupported = isPageFaultSupported; MockBufferObject bo(&drm, 0, 0, 1); EXPECT_EQ(isPageFaultSupported, bo.isExplicitResidencyRequired()); } } TEST_F(DrmBufferObjectTest, whenBoRequiresExplicitResidencyThenTheCorrespondingQueryReturnsCorrectValue) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); DrmMock drm(*(executionEnvironment.rootDeviceEnvironments[0].get())); MockBufferObject bo(&drm, 0, 0, 1); for (auto required : {false, true}) { bo.requireExplicitResidency(required); EXPECT_EQ(required, bo.isExplicitResidencyRequired()); } } drm_buffer_object_tests_prelim.cpp000066400000000000000000000023521422164147700343110ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/os_interface.h" #include "shared/test/common/libult/linux/drm_mock.h" #include "shared/test/common/os_interface/linux/drm_buffer_object_fixture.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/os_interface/linux/device_command_stream_fixture_prelim.h" using namespace NEO; using DrmBufferObjectPrelimFixture = DrmBufferObjectFixture; using DrmBufferObjectPrelimTest = Test; TEST_F(DrmBufferObjectPrelimTest, GivenCompletionAddressWhenCallingExecThenReturnIsCorrect) { mock->ioctl_expected.total = 1; mock->ioctl_res = 0; constexpr uint64_t completionAddress = 0x1230; constexpr uint32_t completionValue = 33; constexpr uint64_t expectedCompletionValue = completionValue; drm_i915_gem_exec_object2 execObjectsStorage = {}; auto ret = bo->exec(0, 0, 0, false, osContext.get(), 0, 1, nullptr, 0u, &execObjectsStorage, completionAddress, completionValue); EXPECT_EQ(0, ret); EXPECT_EQ(completionAddress, mock->context.completionAddress); EXPECT_EQ(expectedCompletionValue, mock->context.completionValue); } compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/drm_command_stream_mm_tests.cpp000066400000000000000000000227041422164147700337060ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/command_encoder.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/os_interface/linux/drm_command_stream.h" #include "shared/source/os_interface/linux/drm_memory_manager.h" #include "shared/source/os_interface/linux/drm_memory_operations_handler.h" #include "shared/source/os_interface/os_interface.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/libult/linux/drm_mock.h" #include "shared/test/common/mocks/linux/mock_drm_allocation.h" #include "shared/test/common/mocks/linux/mock_drm_memory_manager.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/os_interface/linux/device_command_stream_fixture.h" #include "shared/test/common/os_interface/linux/drm_buffer_object_fixture.h" #include "shared/test/common/os_interface/linux/drm_command_stream_fixture.h" #include "shared/test/common/test_macros/test.h" using namespace NEO; using DrmCommandStreamMMTest = ::testing::Test; struct DrmCommandStreamMemExecTest : public DrmCommandStreamEnhancedTemplate { void SetUp() override { DrmCommandStreamEnhancedTemplate::SetUp(); } void TearDown() override { DrmCommandStreamEnhancedTemplate::TearDown(); } }; HWTEST_F(DrmCommandStreamMMTest, GivenForcePinThenMemoryManagerCreatesPinBb) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableForcePin.set(true); MockExecutionEnvironment executionEnvironment; auto drm = new DrmMockCustom(*executionEnvironment.rootDeviceEnvironments[0]); executionEnvironment.rootDeviceEnvironments[0]->osInterface = std::make_unique(); executionEnvironment.rootDeviceEnvironments[0]->osInterface->setDriverModel(std::unique_ptr(drm)); executionEnvironment.rootDeviceEnvironments[0]->memoryOperationsInterface = DrmMemoryOperationsHandler::create(*drm, 0u); DrmCommandStreamReceiver csr(executionEnvironment, 0, 1, gemCloseWorkerMode::gemCloseWorkerInactive); auto memoryManager = new TestedDrmMemoryManager(false, true, false, executionEnvironment); executionEnvironment.memoryManager.reset(memoryManager); ASSERT_NE(nullptr, memoryManager); EXPECT_NE(nullptr, memoryManager->pinBBs[0]); } HWTEST_F(DrmCommandStreamMMTest, givenForcePinDisabledWhenMemoryManagerIsCreatedThenPinBBIsCreated) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableForcePin.set(false); MockExecutionEnvironment executionEnvironment; auto drm = new DrmMockCustom(*executionEnvironment.rootDeviceEnvironments[0]); executionEnvironment.rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); executionEnvironment.rootDeviceEnvironments[0]->osInterface = std::make_unique(); executionEnvironment.rootDeviceEnvironments[0]->osInterface->setDriverModel(std::unique_ptr(drm)); DrmCommandStreamReceiver csr(executionEnvironment, 0, 1, gemCloseWorkerMode::gemCloseWorkerInactive); auto memoryManager = new TestedDrmMemoryManager(false, true, false, executionEnvironment); executionEnvironment.memoryManager.reset(memoryManager); ASSERT_NE(nullptr, memoryManager); EXPECT_NE(nullptr, memoryManager->pinBBs[0]); } HWTEST_F(DrmCommandStreamMMTest, givenExecutionEnvironmentWithMoreThanOneRootDeviceEnvWhenCreatingDrmMemoryManagerThenCreateAsManyPinBBs) { MockExecutionEnvironment executionEnvironment; executionEnvironment.prepareRootDeviceEnvironments(2); for (uint32_t rootDeviceIndex = 0; rootDeviceIndex < executionEnvironment.rootDeviceEnvironments.size(); rootDeviceIndex++) { executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->setHwInfo(defaultHwInfo.get()); executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->osInterface = std::make_unique(); auto drm = new DrmMockCustom(*executionEnvironment.rootDeviceEnvironments[0]); executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->osInterface->setDriverModel(std::unique_ptr(drm)); executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->memoryOperationsInterface = DrmMemoryOperationsHandler::create(*drm, 0u); } auto memoryManager = new TestedDrmMemoryManager(false, true, false, executionEnvironment); executionEnvironment.memoryManager.reset(memoryManager); ASSERT_NE(nullptr, memoryManager); for (uint32_t rootDeviceIndex = 0; rootDeviceIndex < executionEnvironment.rootDeviceEnvironments.size(); rootDeviceIndex++) { EXPECT_NE(nullptr, memoryManager->pinBBs[rootDeviceIndex]); } } HWTEST_TEMPLATED_F(DrmCommandStreamMemExecTest, GivenDrmSupportsVmBindAndCompletionFenceWhenCallingCsrExecThenTagAllocationIsPassed) { mock->completionFenceSupported = true; mock->isVmBindAvailableCall.callParent = false; mock->isVmBindAvailableCall.returnValue = true; TestedBufferObject bo(mock, 128); MockDrmAllocation cmdBuffer(AllocationType::COMMAND_BUFFER, MemoryPool::System4KBPages); cmdBuffer.bufferObjects[0] = &bo; uint8_t buff[128]; LinearStream cs(&cmdBuffer, buff, 128); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; auto allocation = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); csr->makeResident(cmdBuffer); csr->makeResident(*allocation); csr->makeResident(*csr->getTagAllocation()); uint64_t expectedCompletionGpuAddress = csr->getTagAllocation()->getGpuAddress() + Drm::completionFenceOffset; auto *testCsr = static_cast *>(csr); testCsr->latestSentTaskCount = 2; int ret = testCsr->exec(batchBuffer, 1, 2, 0); EXPECT_EQ(0, ret); EXPECT_EQ(expectedCompletionGpuAddress, bo.receivedCompletionGpuAddress); EXPECT_EQ(testCsr->latestSentTaskCount, bo.receivedCompletionValue); mm->freeGraphicsMemory(allocation); } HWTEST_TEMPLATED_F(DrmCommandStreamMemExecTest, GivenDrmSupportsVmBindAndNotCompletionFenceWhenCallingCsrExecThenTagAllocationIsNotPassed) { mock->completionFenceSupported = false; mock->isVmBindAvailableCall.callParent = false; mock->isVmBindAvailableCall.returnValue = true; TestedBufferObject bo(mock, 128); MockDrmAllocation cmdBuffer(AllocationType::COMMAND_BUFFER, MemoryPool::System4KBPages); cmdBuffer.bufferObjects[0] = &bo; uint8_t buff[128]; LinearStream cs(&cmdBuffer, buff, 128); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; auto allocation = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); csr->makeResident(cmdBuffer); csr->makeResident(*allocation); csr->makeResident(*csr->getTagAllocation()); constexpr uint64_t expectedCompletionGpuAddress = 0; constexpr uint32_t expectedCompletionValue = 0; auto *testCsr = static_cast *>(csr); testCsr->latestSentTaskCount = 2; int ret = testCsr->exec(batchBuffer, 1, 2, 0); EXPECT_EQ(0, ret); EXPECT_EQ(expectedCompletionGpuAddress, bo.receivedCompletionGpuAddress); EXPECT_EQ(expectedCompletionValue, bo.receivedCompletionValue); mm->freeGraphicsMemory(allocation); } HWTEST_TEMPLATED_F(DrmCommandStreamMemExecTest, GivenDrmSupportsCompletionFenceAndNotVmBindWhenCallingCsrExecThenTagAllocationIsNotPassed) { mock->completionFenceSupported = true; mock->isVmBindAvailableCall.callParent = false; mock->isVmBindAvailableCall.returnValue = false; TestedBufferObject bo(mock, 128); MockDrmAllocation cmdBuffer(AllocationType::COMMAND_BUFFER, MemoryPool::System4KBPages); cmdBuffer.bufferObjects[0] = &bo; uint8_t buff[128]; LinearStream cs(&cmdBuffer, buff, 128); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; auto allocation = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); csr->makeResident(cmdBuffer); csr->makeResident(*allocation); csr->makeResident(*csr->getTagAllocation()); constexpr uint64_t expectedCompletionGpuAddress = 0; constexpr uint32_t expectedCompletionValue = 0; auto *testCsr = static_cast *>(csr); testCsr->latestSentTaskCount = 2; int ret = testCsr->exec(batchBuffer, 1, 2, 0); EXPECT_EQ(0, ret); EXPECT_EQ(expectedCompletionGpuAddress, bo.receivedCompletionGpuAddress); EXPECT_EQ(expectedCompletionValue, bo.receivedCompletionValue); mm->freeGraphicsMemory(allocation); } compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/drm_command_stream_tests_1.cpp000066400000000000000000001321021422164147700334270ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/direct_submission/linux/drm_direct_submission.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/gmm_helper/page_table_mngr.h" #include "shared/source/gmm_helper/resource_info.h" #include "shared/source/helpers/flush_stamp.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/residency.h" #include "shared/source/os_interface/linux/drm_buffer_object.h" #include "shared/source/os_interface/linux/drm_command_stream.h" #include "shared/source/os_interface/linux/drm_memory_operations_handler_default.h" #include "shared/source/os_interface/linux/os_context_linux.h" #include "shared/source/os_interface/os_context.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/os_interface/sys_calls_common.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/dispatch_flags_helper.h" #include "shared/test/common/helpers/engine_descriptor_helper.h" #include "shared/test/common/helpers/execution_environment_helper.h" #include "shared/test/common/mocks/linux/mock_drm_command_stream_receiver.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/mocks/mock_gmm.h" #include "shared/test/common/mocks/mock_gmm_page_table_mngr.h" #include "shared/test/common/mocks/mock_host_ptr_manager.h" #include "shared/test/common/mocks/mock_submissions_aggregator.h" #include "shared/test/common/os_interface/linux/drm_command_stream_fixture.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "drm/i915_drm.h" #include "gtest/gtest.h" using namespace NEO; HWTEST_TEMPLATED_F(DrmCommandStreamTest, givenFlushStampWhenWaitCalledThenWaitForSpecifiedBoHandle) { FlushStamp handleToWait = 123; drm_i915_gem_wait expectedWait = {}; expectedWait.bo_handle = static_cast(handleToWait); expectedWait.timeout_ns = -1; csr->waitForFlushStamp(handleToWait); EXPECT_TRUE(memcmp(&expectedWait, &mock->receivedGemWait, sizeof(drm_i915_gem_wait)) == 0); EXPECT_EQ(1, mock->ioctlCount.gemWait); } HWTEST_TEMPLATED_F(DrmCommandStreamTest, WhenMakingResidentThenSucceeds) { DrmAllocation graphicsAllocation(0, AllocationType::UNKNOWN, nullptr, nullptr, 1024, static_cast(1u), MemoryPool::MemoryNull); csr->makeResident(graphicsAllocation); EXPECT_EQ(0, mock->ioctlCount.gemUserptr); EXPECT_EQ(0, mock->ioctlCount.execbuffer2); mock->ioctlTearDownExpected.gemWait = 0; mock->ioctlTearDownExpected.gemClose = 0; mock->ioctlTearDownExpects = true; } HWTEST_TEMPLATED_F(DrmCommandStreamTest, WhenMakingResidentTwiceThenSucceeds) { DrmAllocation graphicsAllocation(0, AllocationType::UNKNOWN, nullptr, nullptr, 1024, static_cast(1u), MemoryPool::MemoryNull); csr->makeResident(graphicsAllocation); csr->makeResident(graphicsAllocation); EXPECT_EQ(0, mock->ioctlCount.gemUserptr); EXPECT_EQ(0, mock->ioctlCount.execbuffer2); mock->ioctlTearDownExpected.gemWait = 0; mock->ioctlTearDownExpected.gemClose = 0; mock->ioctlTearDownExpects = true; } HWTEST_TEMPLATED_F(DrmCommandStreamTest, GivenSizeZeroWhenMakingResidentTwiceThenSucceeds) { DrmAllocation graphicsAllocation(0, AllocationType::UNKNOWN, nullptr, nullptr, 0, static_cast(1u), MemoryPool::MemoryNull); csr->makeResident(graphicsAllocation); EXPECT_EQ(0, mock->ioctlCount.gemUserptr); EXPECT_EQ(0, mock->ioctlCount.execbuffer2); mock->ioctlTearDownExpected.gemWait = 0; mock->ioctlTearDownExpected.gemClose = 0; mock->ioctlTearDownExpects = true; } HWTEST_TEMPLATED_F(DrmCommandStreamTest, GivenResizedWhenMakingResidentTwiceThenSucceeds) { DrmAllocation graphicsAllocation(0, AllocationType::UNKNOWN, nullptr, nullptr, 1024, static_cast(1u), MemoryPool::MemoryNull); DrmAllocation graphicsAllocation2(0, AllocationType::UNKNOWN, nullptr, nullptr, 8192, static_cast(1u), MemoryPool::MemoryNull); csr->makeResident(graphicsAllocation); csr->makeResident(graphicsAllocation2); EXPECT_EQ(0, mock->ioctlCount.gemUserptr); EXPECT_EQ(0, mock->ioctlCount.execbuffer2); mock->ioctlTearDownExpected.gemWait = 0; mock->ioctlTearDownExpected.gemClose = 0; mock->ioctlTearDownExpects = true; } HWTEST_TEMPLATED_F(DrmCommandStreamTest, WhenFlushingThenAvailableSpaceDoesNotChange) { auto expectedSize = alignUp(8u, MemoryConstants::cacheLineSize); // bbEnd int boHandle = 123; mock->returnHandle = boHandle; auto &cs = csr->getCS(); auto commandBuffer = static_cast(cs.getGraphicsAllocation()); ASSERT_NE(nullptr, commandBuffer); ASSERT_EQ(0u, reinterpret_cast(commandBuffer->getUnderlyingBuffer()) & 0xFFF); EXPECT_EQ(boHandle, commandBuffer->getBO()->peekHandle()); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; auto availableSpacePriorToFlush = cs.getAvailableSpace(); csr->flush(batchBuffer, csr->getResidencyAllocations()); EXPECT_EQ(static_cast(boHandle), csr->obtainCurrentFlushStamp()); EXPECT_NE(cs.getCpuBase(), nullptr); EXPECT_EQ(availableSpacePriorToFlush, cs.getAvailableSpace()); mock->ioctlTearDownExpected.gemWait = 2; mock->ioctlTearDownExpected.gemClose = 1; mock->ioctlTearDownExpects = true; EXPECT_EQ(1, mock->ioctlCount.gemUserptr); EXPECT_EQ(1, mock->ioctlCount.execbuffer2); EXPECT_EQ(0u, mock->execBuffers.back().batch_start_offset); EXPECT_EQ(expectedSize, mock->execBuffers.back().batch_len); } HWTEST_TEMPLATED_F(DrmCommandStreamTest, givenPrintIndicesEnabledWhenFlushThenPrintIndices) { DebugManagerStateRestore restorer; DebugManager.flags.PrintDeviceAndEngineIdOnSubmission.set(true); auto &cs = csr->getCS(); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; ::testing::internal::CaptureStdout(); csr->flush(batchBuffer, csr->getResidencyAllocations()); const std::string engineType = EngineHelpers::engineTypeToString(csr->getOsContext().getEngineType()); const std::string engineUsage = EngineHelpers::engineUsageToString(csr->getOsContext().getEngineUsage()); std::ostringstream expectedValue; expectedValue << SysCalls::getProcessId() << ": Submission to RootDevice Index: " << csr->getRootDeviceIndex() << ", Sub-Devices Mask: " << csr->getOsContext().getDeviceBitfield().to_ulong() << ", EngineId: " << csr->getOsContext().getEngineType() << " (" << engineType << ", " << engineUsage << ")\n"; EXPECT_TRUE(hasSubstr(::testing::internal::GetCapturedStdout(), expectedValue.str())); } HWTEST_TEMPLATED_F(DrmCommandStreamTest, givenDrmContextIdWhenFlushingThenSetIdToAllExecBuffersAndObjects) { uint32_t expectedDrmContextId = 321; uint32_t numAllocations = 3; auto allocation1 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); auto allocation2 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); csr->makeResident(*allocation1); csr->makeResident(*allocation2); mock->storedDrmContextId = expectedDrmContextId; osContext = std::make_unique(*mock, 1, EngineDescriptorHelper::getDefaultDescriptor(HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo)[0], PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo))); osContext->ensureContextInitialized(); csr->setupContext(*osContext); auto &cs = csr->getCS(); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; csr->flush(batchBuffer, csr->getResidencyAllocations()); memoryManager->freeGraphicsMemory(allocation1); memoryManager->freeGraphicsMemory(allocation2); EXPECT_EQ(1, mock->ioctlCount.contextCreate); EXPECT_EQ(1, mock->ioctlCount.execbuffer2); EXPECT_EQ(numAllocations, mock->execBuffers.back().buffer_count); EXPECT_EQ(expectedDrmContextId, mock->execBuffers.back().rsvd1); for (uint32_t i = 0; i < mock->receivedBos.size(); i++) { EXPECT_EQ(expectedDrmContextId, mock->receivedBos[i].rsvd1); } } HWTEST_TEMPLATED_F(DrmCommandStreamTest, GivenLowPriorityContextWhenFlushingThenSucceeds) { auto expectedSize = alignUp(8u, MemoryConstants::cacheLineSize); // bbEnd auto &cs = csr->getCS(); auto commandBuffer = static_cast(cs.getGraphicsAllocation()); ASSERT_NE(nullptr, commandBuffer); ASSERT_EQ(0u, reinterpret_cast(commandBuffer->getUnderlyingBuffer()) & 0xFFF); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, true, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; csr->flush(batchBuffer, csr->getResidencyAllocations()); EXPECT_NE(cs.getCpuBase(), nullptr); EXPECT_EQ(1, mock->ioctlCount.gemUserptr); EXPECT_EQ(1, mock->ioctlCount.execbuffer2); mock->ioctlTearDownExpected.gemWait = 2; mock->ioctlTearDownExpected.gemClose = 1; mock->ioctlTearDownExpects = true; EXPECT_EQ(0u, mock->execBuffers.back().batch_start_offset); EXPECT_EQ(expectedSize, mock->execBuffers.back().batch_len); } HWTEST_TEMPLATED_F(DrmCommandStreamTest, GivenInvalidAddressWhenFlushingThenSucceeds) { //allocate command buffer manually char *commandBuffer = new (std::nothrow) char[1024]; ASSERT_NE(nullptr, commandBuffer); DrmAllocation commandBufferAllocation(0, AllocationType::COMMAND_BUFFER, nullptr, commandBuffer, 1024, static_cast(1u), MemoryPool::MemoryNull); LinearStream cs(&commandBufferAllocation); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; csr->flush(batchBuffer, csr->getResidencyAllocations()); delete[] commandBuffer; EXPECT_EQ(0, mock->ioctlCount.gemUserptr); EXPECT_EQ(0, mock->ioctlCount.execbuffer2); mock->ioctlTearDownExpected.gemWait = 0; mock->ioctlTearDownExpected.gemClose = 0; mock->ioctlTearDownExpects = true; } HWTEST_TEMPLATED_F(DrmCommandStreamTest, GivenNotEmptyBbWhenFlushingThenSucceeds) { uint32_t bbUsed = 16 * sizeof(uint32_t); auto expectedSize = alignUp(bbUsed + 8, MemoryConstants::cacheLineSize); // bbUsed + bbEnd auto &cs = csr->getCS(); cs.getSpace(bbUsed); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; csr->flush(batchBuffer, csr->getResidencyAllocations()); EXPECT_EQ(1, mock->ioctlCount.gemUserptr); EXPECT_EQ(1, mock->ioctlCount.execbuffer2); mock->ioctlTearDownExpected.gemWait = 2; mock->ioctlTearDownExpected.gemClose = 1; mock->ioctlTearDownExpects = true; EXPECT_EQ(0u, mock->execBuffers.back().batch_start_offset); EXPECT_EQ(expectedSize, mock->execBuffers.back().batch_len); } HWTEST_TEMPLATED_F(DrmCommandStreamTest, GivenNotEmptyNotPaddedBbWhenFlushingThenSucceeds) { uint32_t bbUsed = 15 * sizeof(uint32_t); auto &cs = csr->getCS(); cs.getSpace(bbUsed); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; csr->flush(batchBuffer, csr->getResidencyAllocations()); EXPECT_EQ(1, mock->ioctlCount.gemUserptr); EXPECT_EQ(1, mock->ioctlCount.execbuffer2); mock->ioctlTearDownExpected.gemWait = 2; mock->ioctlTearDownExpected.gemClose = 1; mock->ioctlTearDownExpects = true; EXPECT_EQ(0u, mock->execBuffers.back().batch_start_offset); EXPECT_EQ(bbUsed + 4, mock->execBuffers.back().batch_len); } HWTEST_TEMPLATED_F(DrmCommandStreamTest, GivenNotAlignedWhenFlushingThenSucceeds) { auto &cs = csr->getCS(); auto commandBuffer = static_cast(cs.getGraphicsAllocation()); //make sure command buffer with offset is not page aligned ASSERT_NE(0u, (reinterpret_cast(commandBuffer->getUnderlyingBuffer()) + 4) & (this->alignment - 1)); ASSERT_EQ(4u, (reinterpret_cast(commandBuffer->getUnderlyingBuffer()) + 4) & 0x7F); auto expectedBatchStartOffset = (reinterpret_cast(commandBuffer->getUnderlyingBuffer()) + 4) & (this->alignment - 1); auto expectedSize = alignUp(8u, MemoryConstants::cacheLineSize); // bbEnd CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 4, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; csr->flush(batchBuffer, csr->getResidencyAllocations()); EXPECT_EQ(1, mock->ioctlCount.gemUserptr); EXPECT_EQ(1, mock->ioctlCount.execbuffer2); mock->ioctlTearDownExpected.gemWait = 2; mock->ioctlTearDownExpected.gemClose = 1; mock->ioctlTearDownExpects = true; EXPECT_EQ(expectedBatchStartOffset, mock->execBuffers.back().batch_start_offset); EXPECT_EQ(expectedSize, mock->execBuffers.back().batch_len); } HWTEST_TEMPLATED_F(DrmCommandStreamTest, GivenCheckFlagsWhenFlushingThenSucceeds) { auto &cs = csr->getCS(); DrmAllocation allocation(0, AllocationType::UNKNOWN, nullptr, (void *)0x7FFFFFFF, 1024, static_cast(0u), MemoryPool::MemoryNull); DrmAllocation allocation2(0, AllocationType::UNKNOWN, nullptr, (void *)0x307FFFFFFF, 1024, static_cast(0u), MemoryPool::MemoryNull); csr->makeResident(allocation); csr->makeResident(allocation2); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; csr->flush(batchBuffer, csr->getResidencyAllocations()); EXPECT_EQ(1, mock->ioctlCount.execbuffer2); } HWTEST_TEMPLATED_F(DrmCommandStreamTest, GivenCheckDrmFreeWhenFlushingThenSucceeds) { mock->returnHandle = 17; auto &cs = csr->getCS(); auto commandBuffer = static_cast(cs.getGraphicsAllocation()); //make sure command buffer with offset is not page aligned ASSERT_NE(0u, (reinterpret_cast(commandBuffer->getUnderlyingBuffer()) + 4) & (this->alignment - 1)); ASSERT_EQ(4u, (reinterpret_cast(commandBuffer->getUnderlyingBuffer()) + 4) & 0x7F); auto expectedBatchStartOffset = (reinterpret_cast(commandBuffer->getUnderlyingBuffer()) + 4) & (this->alignment - 1); auto expectedSize = alignUp(8u, MemoryConstants::cacheLineSize); // bbEnd DrmAllocation allocation(0, AllocationType::UNKNOWN, nullptr, nullptr, 1024, static_cast(0u), MemoryPool::MemoryNull); csr->makeResident(allocation); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 4, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; csr->flush(batchBuffer, csr->getResidencyAllocations()); EXPECT_EQ(1, mock->ioctlCount.gemUserptr); EXPECT_EQ(1, mock->ioctlCount.execbuffer2); mock->ioctlTearDownExpected.gemClose = 1; mock->ioctlTearDownExpected.gemWait = 2; mock->ioctlTearDownExpects = true; EXPECT_EQ(expectedBatchStartOffset, mock->execBuffers.back().batch_start_offset); EXPECT_EQ(expectedSize, mock->execBuffers.back().batch_len); } HWTEST_TEMPLATED_F(DrmCommandStreamTest, WhenGettingDrmThenNonNullPointerIsReturned) { Drm *pDrm = nullptr; if (csr->getOSInterface()) { pDrm = csr->getOSInterface()->getDriverModel()->as(); } ASSERT_NE(nullptr, pDrm); } HWTEST_TEMPLATED_F(DrmCommandStreamTest, GivenCheckDrmFreeCloseFailedWhenFlushingThenSucceeds) { mock->returnHandle = 17; auto &cs = csr->getCS(); auto commandBuffer = static_cast(cs.getGraphicsAllocation()); //make sure command buffer with offset is not page aligned ASSERT_NE(0u, (reinterpret_cast(commandBuffer->getUnderlyingBuffer()) + 4) & (this->alignment - 1)); ASSERT_EQ(4u, (reinterpret_cast(commandBuffer->getUnderlyingBuffer()) + 4) & 0x7F); auto expectedBatchStartOffset = (reinterpret_cast(commandBuffer->getUnderlyingBuffer()) + 4) & (this->alignment - 1); auto expectedSize = alignUp(8u, MemoryConstants::cacheLineSize); // bbEnd mock->storedRetValForGemClose = -1; DrmAllocation allocation(0, AllocationType::UNKNOWN, nullptr, nullptr, 1024, static_cast(0u), MemoryPool::MemoryNull); csr->makeResident(allocation); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 4, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; csr->flush(batchBuffer, csr->getResidencyAllocations()); EXPECT_EQ(1, mock->ioctlCount.gemUserptr); EXPECT_EQ(1, mock->ioctlCount.execbuffer2); mock->ioctlTearDownExpected.gemClose = 1; mock->ioctlTearDownExpected.gemWait = 2; mock->ioctlTearDownExpects = true; EXPECT_EQ(expectedBatchStartOffset, mock->execBuffers.back().batch_start_offset); EXPECT_EQ(expectedSize, mock->execBuffers.back().batch_len); } class DrmCommandStreamBatchingTests : public DrmCommandStreamEnhancedTest { public: DrmAllocation *preemptionAllocation; template void SetUpT() { DrmCommandStreamEnhancedTest::SetUpT(); preemptionAllocation = static_cast(device->getDefaultEngine().commandStreamReceiver->getPreemptionAllocation()); } template void TearDownT() { DrmCommandStreamEnhancedTest::TearDownT(); } }; HWTEST_TEMPLATED_F(DrmCommandStreamBatchingTests, givenCsrWhenFlushIsCalledThenProperFlagsArePassed) { mock->reset(); auto commandBuffer = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); auto dummyAllocation = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); ASSERT_EQ(0u, reinterpret_cast(commandBuffer->getUnderlyingBuffer()) & 0xFFF); LinearStream cs(commandBuffer); csr->makeResident(*dummyAllocation); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; csr->flush(batchBuffer, csr->getResidencyAllocations()); int ioctlExecCnt = 1; int ioctlUserPtrCnt = 2; auto engineFlag = static_cast(csr->getOsContext()).getEngineFlag(); EXPECT_EQ(ioctlExecCnt + ioctlUserPtrCnt, this->mock->ioctl_cnt.total); EXPECT_EQ(ioctlExecCnt, this->mock->ioctl_cnt.execbuffer2); EXPECT_EQ(ioctlUserPtrCnt, this->mock->ioctl_cnt.gemUserptr); uint64_t flags = engineFlag | I915_EXEC_NO_RELOC; EXPECT_EQ(flags, this->mock->execBuffer.flags); mm->freeGraphicsMemory(dummyAllocation); mm->freeGraphicsMemory(commandBuffer); } HWTEST_TEMPLATED_F(DrmCommandStreamBatchingTests, givenCsrWhenDispatchPolicyIsSetToBatchingThenCommandBufferIsNotSubmitted) { mock->reset(); csr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); auto testedCsr = static_cast *>(csr); testedCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); testedCsr->useNewResourceImplicitFlush = false; testedCsr->useGpuIdleImplicitFlush = false; auto commandBuffer = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); auto dummyAllocation = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); ASSERT_EQ(0u, reinterpret_cast(commandBuffer->getUnderlyingBuffer()) & 0xFFF); IndirectHeap cs(commandBuffer); csr->makeResident(*dummyAllocation); auto allocations = device->getDefaultEngine().commandStreamReceiver->getTagsMultiAllocation(); csr->setTagAllocation(static_cast(allocations->getGraphicsAllocation(csr->getRootDeviceIndex()))); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(device->getHardwareInfo()); csr->flushTask(cs, 0u, &cs, &cs, &cs, 0u, dispatchFlags, *device); //make sure command buffer is recorded auto &cmdBuffers = mockedSubmissionsAggregator->peekCommandBuffers(); EXPECT_FALSE(cmdBuffers.peekIsEmpty()); EXPECT_NE(nullptr, cmdBuffers.peekHead()); //preemption allocation size_t csrSurfaceCount = (device->getPreemptionMode() == PreemptionMode::MidThread) ? 2 : 0; csrSurfaceCount += testedCsr->globalFenceAllocation ? 1 : 0; csrSurfaceCount += testedCsr->clearColorAllocation ? 1 : 0; auto recordedCmdBuffer = cmdBuffers.peekHead(); EXPECT_EQ(3u + csrSurfaceCount, recordedCmdBuffer->surfaces.size()); //try to find all allocations auto elementInVector = std::find(recordedCmdBuffer->surfaces.begin(), recordedCmdBuffer->surfaces.end(), dummyAllocation); EXPECT_NE(elementInVector, recordedCmdBuffer->surfaces.end()); elementInVector = std::find(recordedCmdBuffer->surfaces.begin(), recordedCmdBuffer->surfaces.end(), commandBuffer); EXPECT_NE(elementInVector, recordedCmdBuffer->surfaces.end()); elementInVector = std::find(recordedCmdBuffer->surfaces.begin(), recordedCmdBuffer->surfaces.end(), allocations->getGraphicsAllocation(0u)); EXPECT_NE(elementInVector, recordedCmdBuffer->surfaces.end()); EXPECT_EQ(testedCsr->commandStream.getGraphicsAllocation(), recordedCmdBuffer->batchBuffer.commandBufferAllocation); int ioctlUserPtrCnt = (device->getPreemptionMode() == PreemptionMode::MidThread) ? 4 : 3; ioctlUserPtrCnt += testedCsr->clearColorAllocation ? 1 : 0; EXPECT_EQ(ioctlUserPtrCnt, this->mock->ioctl_cnt.total); EXPECT_EQ(ioctlUserPtrCnt, this->mock->ioctl_cnt.gemUserptr); EXPECT_EQ(0u, this->mock->execBuffer.flags); csr->flushBatchedSubmissions(); mm->freeGraphicsMemory(dummyAllocation); mm->freeGraphicsMemory(commandBuffer); } HWTEST_TEMPLATED_F(DrmCommandStreamBatchingTests, givenRecordedCommandBufferWhenItIsSubmittedThenFlushTaskIsProperlyCalled) { mock->reset(); csr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); auto testedCsr = static_cast *>(csr); testedCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); testedCsr->useNewResourceImplicitFlush = false; testedCsr->useGpuIdleImplicitFlush = false; auto commandBuffer = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); IndirectHeap cs(commandBuffer); auto allocations = device->getDefaultEngine().commandStreamReceiver->getTagsMultiAllocation(); csr->setTagAllocation(static_cast(allocations->getGraphicsAllocation(csr->getRootDeviceIndex()))); auto &submittedCommandBuffer = csr->getCS(1024); //use some bytes submittedCommandBuffer.getSpace(4); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(device->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; csr->flushTask(cs, 0u, &cs, &cs, &cs, 0u, dispatchFlags, *device); auto &cmdBuffers = mockedSubmissionsAggregator->peekCommandBuffers(); auto storedCommandBuffer = cmdBuffers.peekHead(); ResidencyContainer copyOfResidency = storedCommandBuffer->surfaces; copyOfResidency.push_back(storedCommandBuffer->batchBuffer.commandBufferAllocation); csr->flushBatchedSubmissions(); EXPECT_TRUE(cmdBuffers.peekIsEmpty()); auto commandBufferGraphicsAllocation = submittedCommandBuffer.getGraphicsAllocation(); EXPECT_TRUE(commandBufferGraphicsAllocation->isResident(csr->getOsContext().getContextId())); //preemption allocation size_t csrSurfaceCount = (device->getPreemptionMode() == PreemptionMode::MidThread) ? 2 : 0; csrSurfaceCount += testedCsr->globalFenceAllocation ? 1 : 0; csrSurfaceCount += testedCsr->clearColorAllocation ? 1 : 0; //validate that submited command buffer has what we want EXPECT_EQ(3u + csrSurfaceCount, this->mock->execBuffer.buffer_count); EXPECT_EQ(4u, this->mock->execBuffer.batch_start_offset); EXPECT_EQ(submittedCommandBuffer.getUsed(), this->mock->execBuffer.batch_len); drm_i915_gem_exec_object2 *exec_objects = (drm_i915_gem_exec_object2 *)this->mock->execBuffer.buffers_ptr; for (unsigned int i = 0; i < this->mock->execBuffer.buffer_count; i++) { int handle = exec_objects[i].handle; auto handleFound = false; for (auto &graphicsAllocation : copyOfResidency) { auto bo = static_cast(graphicsAllocation)->getBO(); if (bo->peekHandle() == handle) { handleFound = true; } } EXPECT_TRUE(handleFound); } int ioctlExecCnt = 1; int ioctlUserPtrCnt = (device->getPreemptionMode() == PreemptionMode::MidThread) ? 3 : 2; ioctlUserPtrCnt += testedCsr->clearColorAllocation ? 1 : 0; EXPECT_EQ(ioctlExecCnt, this->mock->ioctl_cnt.execbuffer2); EXPECT_EQ(ioctlUserPtrCnt, this->mock->ioctl_cnt.gemUserptr); EXPECT_EQ(ioctlExecCnt + ioctlUserPtrCnt, this->mock->ioctl_cnt.total); mm->freeGraphicsMemory(commandBuffer); } struct DrmCommandStreamDirectSubmissionTest : public DrmCommandStreamEnhancedTest { template void SetUpT() { DebugManager.flags.EnableDirectSubmission.set(1u); DebugManager.flags.DirectSubmissionDisableMonitorFence.set(0); DrmCommandStreamEnhancedTest::SetUpT(); auto hwInfo = device->getRootDeviceEnvironment().getMutableHardwareInfo(); auto engineType = device->getDefaultEngine().osContext->getEngineType(); hwInfo->capabilityTable.directSubmissionEngines.data[engineType].engineSupported = true; csr->initDirectSubmission(*device.get(), *device->getDefaultEngine().osContext); } template void TearDownT() { this->dbgState.reset(); DrmCommandStreamEnhancedTest::TearDownT(); } DebugManagerStateRestore restorer; }; struct DrmCommandStreamBlitterDirectSubmissionTest : public DrmCommandStreamDirectSubmissionTest { template void SetUpT() { DebugManager.flags.DirectSubmissionOverrideBlitterSupport.set(1u); DebugManager.flags.DirectSubmissionOverrideRenderSupport.set(0u); DebugManager.flags.DirectSubmissionOverrideComputeSupport.set(0u); DrmCommandStreamDirectSubmissionTest::SetUpT(); osContext.reset(OsContext::create(device->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), 0, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular}, PreemptionMode::ThreadGroup, device->getDeviceBitfield()))); osContext->ensureContextInitialized(); csr->initDirectSubmission(*device.get(), *osContext.get()); } template void TearDownT() { DrmCommandStreamDirectSubmissionTest::TearDownT(); } std::unique_ptr osContext; }; struct DrmDirectSubmissionFunctionsCalled { bool stopRingBuffer; bool wait; bool deallocateResources; }; template struct MockDrmDirectSubmissionToTestDtor : public DrmDirectSubmission> { MockDrmDirectSubmissionToTestDtor(Device &device, OsContext &osContext, DrmDirectSubmissionFunctionsCalled &functionsCalled) : DrmDirectSubmission>(device, osContext), functionsCalled(functionsCalled) { } ~MockDrmDirectSubmissionToTestDtor() override { if (ringStart) { stopRingBuffer(); wait(static_cast(this->currentTagData.tagValue)); } deallocateResources(); } using DrmDirectSubmission>::ringStart; bool stopRingBuffer() override { functionsCalled.stopRingBuffer = true; return true; } void wait(uint32_t taskCountToWait) override { functionsCalled.wait = true; } void deallocateResources() override { functionsCalled.deallocateResources = true; } DrmDirectSubmissionFunctionsCalled &functionsCalled; }; HWTEST_TEMPLATED_F(DrmCommandStreamDirectSubmissionTest, givenEnabledDirectSubmissionWhenDtorIsCalledButRingIsNotStartedThenDontCallStopRingBufferNorWaitForTagValue) { DrmDirectSubmissionFunctionsCalled functionsCalled{}; auto directSubmission = std::make_unique>(*device.get(), *device->getDefaultEngine().osContext, functionsCalled); ASSERT_NE(nullptr, directSubmission); EXPECT_FALSE(directSubmission->ringStart); directSubmission.reset(); EXPECT_FALSE(functionsCalled.stopRingBuffer); EXPECT_FALSE(functionsCalled.wait); EXPECT_TRUE(functionsCalled.deallocateResources); } template struct MockDrmDirectSubmissionToTestRingStop : public DrmDirectSubmission> { MockDrmDirectSubmissionToTestRingStop(Device &device, OsContext &osContext) : DrmDirectSubmission>(device, osContext) { } using DrmDirectSubmission>::ringStart; }; HWTEST_TEMPLATED_F(DrmCommandStreamDirectSubmissionTest, givenEnabledDirectSubmissionWhenStopRingBufferIsCalledThenClearRingStart) { auto directSubmission = std::make_unique>(*device.get(), *device->getDefaultEngine().osContext); ASSERT_NE(nullptr, directSubmission); directSubmission->stopRingBuffer(); EXPECT_FALSE(directSubmission->ringStart); } template struct MockDrmDirectSubmissionDispatchCommandBuffer : public DrmDirectSubmission> { MockDrmDirectSubmissionDispatchCommandBuffer(Device &device, OsContext &osContext) : DrmDirectSubmission>(device, osContext) { } ADDMETHOD_NOBASE(dispatchCommandBuffer, bool, false, (BatchBuffer & batchBuffer, FlushStampTracker &flushStamp)); }; template struct MockDrmBlitterDirectSubmissionDispatchCommandBuffer : public DrmDirectSubmission> { MockDrmBlitterDirectSubmissionDispatchCommandBuffer(Device &device, OsContext &osContext) : DrmDirectSubmission>(device, osContext) { } ADDMETHOD_NOBASE(dispatchCommandBuffer, bool, false, (BatchBuffer & batchBuffer, FlushStampTracker &flushStamp)); }; HWTEST_TEMPLATED_F(DrmCommandStreamDirectSubmissionTest, givenDirectSubmissionFailsThenFlushReturnsError) { static_cast *>(csr)->directSubmission = std::make_unique>(*device.get(), *device->getDefaultEngine().osContext); auto directSubmission = static_cast *>(csr)->directSubmission.get(); static_cast *>(directSubmission)->dispatchCommandBufferResult = false; auto &cs = csr->getCS(); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 4, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; uint8_t bbStart[64]; batchBuffer.endCmdPtr = &bbStart[0]; auto res = csr->flush(batchBuffer, csr->getResidencyAllocations()); EXPECT_GT(static_cast *>(directSubmission)->dispatchCommandBufferCalled, 0u); EXPECT_EQ(NEO::SubmissionStatus::FAILED, res); } HWTEST_TEMPLATED_F(DrmCommandStreamBlitterDirectSubmissionTest, givenBlitterDirectSubmissionFailsThenFlushReturnsError) { static_cast *>(csr)->blitterDirectSubmission = std::make_unique>(*device.get(), *device->getDefaultEngine().osContext); auto blitterDirectSubmission = static_cast *>(csr)->blitterDirectSubmission.get(); static_cast *>(blitterDirectSubmission)->dispatchCommandBufferResult = false; auto &cs = csr->getCS(); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 4, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; uint8_t bbStart[64]; batchBuffer.endCmdPtr = &bbStart[0]; auto res = csr->flush(batchBuffer, csr->getResidencyAllocations()); EXPECT_GT(static_cast *>(blitterDirectSubmission)->dispatchCommandBufferCalled, 0u); EXPECT_EQ(NEO::SubmissionStatus::FAILED, res); } template struct MockDrmDirectSubmission : public DrmDirectSubmission> { using DrmDirectSubmission>::currentTagData; }; template struct MockDrmBlitterDirectSubmission : public DrmDirectSubmission> { using DrmDirectSubmission>::currentTagData; }; HWTEST_TEMPLATED_F(DrmCommandStreamDirectSubmissionTest, givenEnabledDirectSubmissionWhenFlushThenFlushStampIsNotUpdated) { auto &cs = csr->getCS(); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 4, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; uint8_t bbStart[64]; batchBuffer.endCmdPtr = &bbStart[0]; auto flushStamp = csr->obtainCurrentFlushStamp(); csr->flush(batchBuffer, csr->getResidencyAllocations()); EXPECT_EQ(csr->obtainCurrentFlushStamp(), flushStamp); auto directSubmission = static_cast *>(csr)->directSubmission.get(); ASSERT_NE(nullptr, directSubmission); static_cast *>(directSubmission)->currentTagData.tagValue = 0u; } HWTEST_TEMPLATED_F(DrmCommandStreamDirectSubmissionTest, givenEnabledDirectSubmissionWhenFlushThenCommandBufferAllocationIsResident) { mock->bindAvailable = true; auto &cs = csr->getCS(); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 4, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; uint8_t bbStart[64]; batchBuffer.endCmdPtr = &bbStart[0]; csr->flush(batchBuffer, csr->getResidencyAllocations()); auto memoryOperationsInterface = executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->memoryOperationsInterface.get(); EXPECT_EQ(memoryOperationsInterface->isResident(device.get(), *batchBuffer.commandBufferAllocation), MemoryOperationsStatus::SUCCESS); auto directSubmission = static_cast *>(csr)->directSubmission.get(); ASSERT_NE(nullptr, directSubmission); static_cast *>(directSubmission)->currentTagData.tagValue = 0u; } HWTEST_TEMPLATED_F(DrmCommandStreamBlitterDirectSubmissionTest, givenEnabledDirectSubmissionOnBlitterWhenFlushThenFlushStampIsNotUpdated) { auto &cs = csr->getCS(); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 4, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; uint8_t bbStart[64]; batchBuffer.endCmdPtr = &bbStart[0]; auto flushStamp = csr->obtainCurrentFlushStamp(); csr->flush(batchBuffer, csr->getResidencyAllocations()); EXPECT_EQ(csr->obtainCurrentFlushStamp(), flushStamp); auto directSubmission = static_cast *>(csr)->blitterDirectSubmission.get(); ASSERT_NE(nullptr, directSubmission); static_cast *>(directSubmission)->currentTagData.tagValue = 0u; EXPECT_EQ(nullptr, static_cast *>(csr)->directSubmission.get()); } HWTEST_TEMPLATED_F(DrmCommandStreamBlitterDirectSubmissionTest, givenEnabledDirectSubmissionOnBlitterWhenFlushThenCommandBufferAllocationIsResident) { mock->bindAvailable = true; auto &cs = csr->getCS(); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 4, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; uint8_t bbStart[64]; batchBuffer.endCmdPtr = &bbStart[0]; csr->flush(batchBuffer, csr->getResidencyAllocations()); auto memoryOperationsInterface = executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->memoryOperationsInterface.get(); EXPECT_EQ(memoryOperationsInterface->isResident(device.get(), *batchBuffer.commandBufferAllocation), MemoryOperationsStatus::SUCCESS); auto directSubmission = static_cast *>(csr)->blitterDirectSubmission.get(); ASSERT_NE(nullptr, directSubmission); static_cast *>(directSubmission)->currentTagData.tagValue = 0u; EXPECT_EQ(nullptr, static_cast *>(csr)->directSubmission.get()); } template struct MockDrmCsr : public DrmCommandStreamReceiver { using DrmCommandStreamReceiver::DrmCommandStreamReceiver; using DrmCommandStreamReceiver::dispatchMode; }; HWTEST_TEMPLATED_F(DrmCommandStreamTest, givenDrmCommandStreamReceiverWhenCreatePageTableManagerIsCalledThenCreatePageTableManager) { executionEnvironment.prepareRootDeviceEnvironments(2); executionEnvironment.rootDeviceEnvironments[1]->setHwInfo(defaultHwInfo.get()); executionEnvironment.rootDeviceEnvironments[1]->initGmm(); executionEnvironment.rootDeviceEnvironments[1]->osInterface = std::make_unique(); executionEnvironment.rootDeviceEnvironments[1]->osInterface->setDriverModel(std::unique_ptr(new DrmMockCustom(*executionEnvironment.rootDeviceEnvironments[0]))); auto csr = std::make_unique>(executionEnvironment, 1, 1, gemCloseWorkerMode::gemCloseWorkerActive); auto pageTableManager = csr->createPageTableManager(); EXPECT_EQ(csr->pageTableManager.get(), pageTableManager); } HWTEST_TEMPLATED_F(DrmCommandStreamTest, givenLocalMemoryEnabledWhenCreatingDrmCsrThenEnableBatching) { { DebugManagerStateRestore restore; DebugManager.flags.EnableLocalMemory.set(1); MockDrmCsr csr1(executionEnvironment, 0, 1, gemCloseWorkerMode::gemCloseWorkerInactive); EXPECT_EQ(DispatchMode::BatchedDispatch, csr1.dispatchMode); DebugManager.flags.CsrDispatchMode.set(static_cast(DispatchMode::ImmediateDispatch)); MockDrmCsr csr2(executionEnvironment, 0, 1, gemCloseWorkerMode::gemCloseWorkerInactive); EXPECT_EQ(DispatchMode::ImmediateDispatch, csr2.dispatchMode); } { DebugManagerStateRestore restore; DebugManager.flags.EnableLocalMemory.set(0); MockDrmCsr csr1(executionEnvironment, 0, 1, gemCloseWorkerMode::gemCloseWorkerInactive); EXPECT_EQ(DispatchMode::ImmediateDispatch, csr1.dispatchMode); DebugManager.flags.CsrDispatchMode.set(static_cast(DispatchMode::BatchedDispatch)); MockDrmCsr csr2(executionEnvironment, 0, 1, gemCloseWorkerMode::gemCloseWorkerInactive); EXPECT_EQ(DispatchMode::BatchedDispatch, csr2.dispatchMode); } } HWTEST_TEMPLATED_F(DrmCommandStreamTest, givenPageTableManagerAndMapTrueWhenUpdateAuxTableIsCalledThenItReturnsTrue) { auto mockMngr = new MockGmmPageTableMngr(); csr->pageTableManager.reset(mockMngr); executionEnvironment.rootDeviceEnvironments[0]->initGmm(); auto gmm = std::make_unique(executionEnvironment.rootDeviceEnvironments[0]->getGmmClientContext()); auto result = csr->pageTableManager->updateAuxTable(0, gmm.get(), true); EXPECT_EQ(0ull, mockMngr->updateAuxTableParamsPassed[0].ddiUpdateAuxTable.BaseGpuVA); EXPECT_EQ(gmm->gmmResourceInfo->peekHandle(), mockMngr->updateAuxTableParamsPassed[0].ddiUpdateAuxTable.BaseResInfo); EXPECT_EQ(true, mockMngr->updateAuxTableParamsPassed[0].ddiUpdateAuxTable.DoNotWait); EXPECT_EQ(1u, mockMngr->updateAuxTableParamsPassed[0].ddiUpdateAuxTable.Map); EXPECT_TRUE(result); EXPECT_EQ(1u, mockMngr->updateAuxTableCalled); } HWTEST_TEMPLATED_F(DrmCommandStreamTest, givenPageTableManagerAndMapFalseWhenUpdateAuxTableIsCalledThenItReturnsTrue) { auto mockMngr = new MockGmmPageTableMngr(); csr->pageTableManager.reset(mockMngr); executionEnvironment.rootDeviceEnvironments[0]->initGmm(); auto gmm = std::make_unique(executionEnvironment.rootDeviceEnvironments[0]->getGmmClientContext()); auto result = csr->pageTableManager->updateAuxTable(0, gmm.get(), false); EXPECT_EQ(0ull, mockMngr->updateAuxTableParamsPassed[0].ddiUpdateAuxTable.BaseGpuVA); EXPECT_EQ(gmm->gmmResourceInfo->peekHandle(), mockMngr->updateAuxTableParamsPassed[0].ddiUpdateAuxTable.BaseResInfo); EXPECT_EQ(true, mockMngr->updateAuxTableParamsPassed[0].ddiUpdateAuxTable.DoNotWait); EXPECT_EQ(0u, mockMngr->updateAuxTableParamsPassed[0].ddiUpdateAuxTable.Map); EXPECT_TRUE(result); EXPECT_EQ(1u, mockMngr->updateAuxTableCalled); } compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/drm_command_stream_tests_2.cpp000066400000000000000000002405201422164147700334340ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/gmm_helper/page_table_mngr.h" #include "shared/source/gmm_helper/resource_info.h" #include "shared/source/helpers/flush_stamp.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/residency.h" #include "shared/source/os_interface/linux/drm_buffer_object.h" #include "shared/source/os_interface/linux/drm_command_stream.h" #include "shared/source/os_interface/linux/drm_memory_operations_handler_default.h" #include "shared/source/os_interface/linux/os_context_linux.h" #include "shared/source/os_interface/os_context.h" #include "shared/source/os_interface/os_interface.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/dispatch_flags_helper.h" #include "shared/test/common/helpers/execution_environment_helper.h" #include "shared/test/common/mocks/linux/mock_drm_command_stream_receiver.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/mocks/mock_gmm.h" #include "shared/test/common/mocks/mock_gmm_page_table_mngr.h" #include "shared/test/common/mocks/mock_host_ptr_manager.h" #include "shared/test/common/mocks/mock_submissions_aggregator.h" #include "shared/test/common/os_interface/linux/drm_command_stream_fixture.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "drm/i915_drm.h" using namespace NEO; HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenDefaultDrmCSRWhenItIsCreatedThenGemCloseWorkerModeIsInactive) { EXPECT_EQ(gemCloseWorkerMode::gemCloseWorkerInactive, static_cast *>(csr)->peekGemCloseWorkerOperationMode()); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenCommandStreamWhenItIsFlushedWithGemCloseWorkerInDefaultModeThenWorkerDecreasesTheRefCount) { auto commandBuffer = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); auto storedBase = cs.getCpuBase(); auto storedGraphicsAllocation = cs.getGraphicsAllocation(); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; csr->flush(batchBuffer, csr->getResidencyAllocations()); EXPECT_EQ(cs.getCpuBase(), storedBase); EXPECT_EQ(cs.getGraphicsAllocation(), storedGraphicsAllocation); auto drmAllocation = static_cast(storedGraphicsAllocation); auto bo = drmAllocation->getBO(); //spin until gem close worker finishes execution while (bo->getRefCount() > 1) ; mm->freeGraphicsMemory(commandBuffer); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenTaskThatRequiresLargeResourceCountWhenItIsFlushedThenExecStorageIsResized) { std::vector graphicsAllocations; auto &execStorage = static_cast *>(csr)->getExecStorage(); execStorage.resize(0); for (auto id = 0; id < 10; id++) { auto graphicsAllocation = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); csr->makeResident(*graphicsAllocation); graphicsAllocations.push_back(graphicsAllocation); } auto commandBuffer = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); LinearStream cs(commandBuffer); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; csr->flush(batchBuffer, csr->getResidencyAllocations()); EXPECT_EQ(11u, this->mock->execBuffer.buffer_count); mm->freeGraphicsMemory(commandBuffer); for (auto graphicsAllocation : graphicsAllocations) { mm->freeGraphicsMemory(graphicsAllocation); } EXPECT_EQ(11u, execStorage.size()); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenGemCloseWorkerInactiveModeWhenMakeResidentIsCalledThenRefCountsAreNotUpdated) { auto dummyAllocation = static_cast(mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize})); auto bo = dummyAllocation->getBO(); EXPECT_EQ(1u, bo->getRefCount()); csr->makeResident(*dummyAllocation); EXPECT_EQ(1u, bo->getRefCount()); csr->processResidency(csr->getResidencyAllocations(), 0u); csr->makeNonResident(*dummyAllocation); EXPECT_EQ(1u, bo->getRefCount()); mm->freeGraphicsMemory(dummyAllocation); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, GivenTwoAllocationsWhenBackingStorageIsDifferentThenMakeResidentShouldAddTwoLocations) { auto allocation = static_cast(mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize})); auto allocation2 = static_cast(mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize})); csr->makeResident(*allocation); csr->makeResident(*allocation2); auto osContextId = csr->getOsContext().getContextId(); EXPECT_TRUE(allocation->isResident(osContextId)); EXPECT_TRUE(allocation2->isResident(osContextId)); csr->processResidency(csr->getResidencyAllocations(), 0u); EXPECT_TRUE(allocation->isResident(osContextId)); EXPECT_TRUE(allocation2->isResident(osContextId)); EXPECT_EQ(getResidencyVector().size(), 2u); csr->makeNonResident(*allocation); csr->makeNonResident(*allocation2); EXPECT_FALSE(allocation->isResident(osContextId)); EXPECT_FALSE(allocation2->isResident(osContextId)); EXPECT_EQ(getResidencyVector().size(), 0u); mm->freeGraphicsMemory(allocation); mm->freeGraphicsMemory(allocation2); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenCommandStreamWithDuplicatesWhenItIsFlushedWithGemCloseWorkerInactiveModeThenCsIsNotNulled) { auto commandBuffer = static_cast(mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize})); auto dummyAllocation = static_cast(mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize})); ASSERT_NE(nullptr, commandBuffer); ASSERT_EQ(0u, reinterpret_cast(commandBuffer->getUnderlyingBuffer()) & 0xFFF); LinearStream cs(commandBuffer); csr->makeResident(*dummyAllocation); csr->makeResident(*dummyAllocation); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); auto storedBase = cs.getCpuBase(); auto storedGraphicsAllocation = cs.getGraphicsAllocation(); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; csr->flush(batchBuffer, csr->getResidencyAllocations()); EXPECT_EQ(cs.getCpuBase(), storedBase); EXPECT_EQ(cs.getGraphicsAllocation(), storedGraphicsAllocation); mm->freeGraphicsMemory(dummyAllocation); mm->freeGraphicsMemory(commandBuffer); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenDebugFlagSetWhenFlushingThenReadBackCommandBufferPointerIfRequired) { auto commandBuffer = static_cast(mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize})); LinearStream cs(commandBuffer); auto testedCsr = static_cast *>(csr); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; { DebugManager.flags.ReadBackCommandBufferAllocation.set(1); csr->flush(batchBuffer, csr->getResidencyAllocations()); if (commandBuffer->isAllocatedInLocalMemoryPool()) { EXPECT_EQ(commandBuffer->getUnderlyingBuffer(), testedCsr->latestReadBackAddress); } else { EXPECT_EQ(nullptr, testedCsr->latestReadBackAddress); } testedCsr->latestReadBackAddress = nullptr; } { DebugManager.flags.ReadBackCommandBufferAllocation.set(2); csr->flush(batchBuffer, csr->getResidencyAllocations()); EXPECT_EQ(commandBuffer->getUnderlyingBuffer(), testedCsr->latestReadBackAddress); } mm->freeGraphicsMemory(commandBuffer); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenDrmCsrCreatedWithInactiveGemCloseWorkerPolicyThenThreadIsNotCreated) { TestedDrmCommandStreamReceiver testedCsr(gemCloseWorkerMode::gemCloseWorkerInactive, *this->executionEnvironment, 1); EXPECT_EQ(gemCloseWorkerMode::gemCloseWorkerInactive, testedCsr.peekGemCloseWorkerOperationMode()); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenDrmAllocationWhenGetBufferObjectToModifyIsCalledForAGivenHandleIdThenTheCorrespondingBufferObjectGetsModified) { auto size = 1024u; auto allocation = new DrmAllocation(0, AllocationType::UNKNOWN, nullptr, nullptr, size, static_cast(0u), MemoryPool::MemoryNull); auto &bos = allocation->getBOs(); for (auto handleId = 0u; handleId < EngineLimits::maxHandleCount; handleId++) { EXPECT_EQ(nullptr, bos[handleId]); } for (auto handleId = 0u; handleId < EngineLimits::maxHandleCount; handleId++) { allocation->getBufferObjectToModify(handleId) = this->createBO(size); } for (auto handleId = 0u; handleId < EngineLimits::maxHandleCount; handleId++) { EXPECT_NE(nullptr, bos[handleId]); } mm->freeGraphicsMemory(allocation); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, WhenMakingResidentThenSucceeds) { auto buffer = this->createBO(1024); auto allocation = new DrmAllocation(0, AllocationType::UNKNOWN, buffer, nullptr, buffer->peekSize(), static_cast(0u), MemoryPool::MemoryNull); EXPECT_EQ(nullptr, allocation->getUnderlyingBuffer()); csr->makeResident(*allocation); csr->processResidency(csr->getResidencyAllocations(), 0u); EXPECT_TRUE(isResident(buffer)); EXPECT_EQ(1u, buffer->getRefCount()); csr->makeNonResident(*allocation); EXPECT_FALSE(isResident(buffer)); EXPECT_EQ(1u, buffer->getRefCount()); mm->freeGraphicsMemory(allocation); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, GivenMultipleAllocationsWhenMakingResidentThenEachSucceeds) { BufferObject *buffer1 = this->createBO(4096); BufferObject *buffer2 = this->createBO(4096); auto allocation1 = new DrmAllocation(0, AllocationType::UNKNOWN, buffer1, nullptr, buffer1->peekSize(), static_cast(0u), MemoryPool::MemoryNull); auto allocation2 = new DrmAllocation(0, AllocationType::UNKNOWN, buffer2, nullptr, buffer2->peekSize(), static_cast(0u), MemoryPool::MemoryNull); EXPECT_EQ(nullptr, allocation1->getUnderlyingBuffer()); EXPECT_EQ(nullptr, allocation2->getUnderlyingBuffer()); csr->makeResident(*allocation1); csr->makeResident(*allocation2); csr->processResidency(csr->getResidencyAllocations(), 0u); EXPECT_TRUE(isResident(buffer1)); EXPECT_TRUE(isResident(buffer2)); EXPECT_EQ(1u, buffer1->getRefCount()); EXPECT_EQ(1u, buffer2->getRefCount()); // dont call makeNonResident on allocation2, any other makeNonResident call will clean this // we want to keep all makeResident calls before flush and makeNonResident everyting after flush csr->makeNonResident(*allocation1); mm->freeGraphicsMemory(allocation1); mm->freeGraphicsMemory(allocation2); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, WhenMakingResidentTwiceThenRefCountIsOne) { auto buffer = this->createBO(1024); auto allocation = new DrmAllocation(0, AllocationType::UNKNOWN, buffer, nullptr, buffer->peekSize(), static_cast(0u), MemoryPool::MemoryNull); csr->makeResident(*allocation); csr->processResidency(csr->getResidencyAllocations(), 0u); EXPECT_TRUE(isResident(buffer)); EXPECT_EQ(1u, buffer->getRefCount()); csr->getResidencyAllocations().clear(); csr->makeResident(*allocation); csr->processResidency(csr->getResidencyAllocations(), 0u); EXPECT_TRUE(isResident(buffer)); EXPECT_EQ(1u, buffer->getRefCount()); csr->makeNonResident(*allocation); EXPECT_FALSE(isResident(buffer)); EXPECT_EQ(1u, buffer->getRefCount()); mm->freeGraphicsMemory(allocation); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, GivenFragmentStorageWhenMakingResidentTwiceThenRefCountIsOne) { auto ptr = (void *)0x1001; auto size = MemoryConstants::pageSize * 10; auto reqs = MockHostPtrManager::getAllocationRequirements(csr->getRootDeviceIndex(), ptr, size); auto allocation = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, size}, ptr); ASSERT_EQ(3u, allocation->fragmentsStorage.fragmentCount); csr->makeResident(*allocation); csr->makeResident(*allocation); csr->processResidency(csr->getResidencyAllocations(), 0u); for (int i = 0; i < maxFragmentsCount; i++) { ASSERT_EQ(allocation->fragmentsStorage.fragmentStorageData[i].cpuPtr, reqs.allocationFragments[i].allocationPtr); auto bo = static_cast(allocation->fragmentsStorage.fragmentStorageData[i].osHandleStorage)->bo; EXPECT_TRUE(isResident(bo)); EXPECT_EQ(1u, bo->getRefCount()); } csr->makeNonResident(*allocation); for (int i = 0; i < maxFragmentsCount; i++) { auto bo = static_cast(allocation->fragmentsStorage.fragmentStorageData[i].osHandleStorage)->bo; EXPECT_FALSE(isResident(bo)); EXPECT_EQ(1u, bo->getRefCount()); } mm->freeGraphicsMemory(allocation); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenFragmentedAllocationsWithResuedFragmentsWhenTheyAreMadeResidentThenFragmentsDoNotDuplicate) { mock->ioctl_expected.total = 9; //3 fragments auto ptr = (void *)0x1001; auto size = MemoryConstants::pageSize * 10; auto graphicsAllocation = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, size}, ptr); auto offsetedPtr = (void *)((uintptr_t)ptr + size); auto size2 = MemoryConstants::pageSize - 1; auto graphicsAllocation2 = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, size2}, offsetedPtr); //graphicsAllocation2 reuses one fragment from graphicsAllocation EXPECT_EQ(graphicsAllocation->fragmentsStorage.fragmentStorageData[2].residency, graphicsAllocation2->fragmentsStorage.fragmentStorageData[0].residency); csr->makeResident(*graphicsAllocation); csr->makeResident(*graphicsAllocation2); csr->processResidency(csr->getResidencyAllocations(), 0u); auto &osContext = csr->getOsContext(); EXPECT_TRUE(graphicsAllocation->fragmentsStorage.fragmentStorageData[0].residency->resident[osContext.getContextId()]); EXPECT_TRUE(graphicsAllocation->fragmentsStorage.fragmentStorageData[1].residency->resident[osContext.getContextId()]); EXPECT_TRUE(graphicsAllocation->fragmentsStorage.fragmentStorageData[2].residency->resident[osContext.getContextId()]); EXPECT_TRUE(graphicsAllocation2->fragmentsStorage.fragmentStorageData[0].residency->resident[osContext.getContextId()]); auto &residency = getResidencyVector(); EXPECT_EQ(3u, residency.size()); csr->makeSurfacePackNonResident(csr->getResidencyAllocations()); //check that each packet is not resident EXPECT_FALSE(graphicsAllocation->fragmentsStorage.fragmentStorageData[0].residency->resident[osContext.getContextId()]); EXPECT_FALSE(graphicsAllocation->fragmentsStorage.fragmentStorageData[1].residency->resident[osContext.getContextId()]); EXPECT_FALSE(graphicsAllocation->fragmentsStorage.fragmentStorageData[2].residency->resident[osContext.getContextId()]); EXPECT_FALSE(graphicsAllocation2->fragmentsStorage.fragmentStorageData[0].residency->resident[osContext.getContextId()]); EXPECT_EQ(0u, residency.size()); csr->makeResident(*graphicsAllocation); csr->makeResident(*graphicsAllocation2); csr->processResidency(csr->getResidencyAllocations(), 0u); EXPECT_TRUE(graphicsAllocation->fragmentsStorage.fragmentStorageData[0].residency->resident[osContext.getContextId()]); EXPECT_TRUE(graphicsAllocation->fragmentsStorage.fragmentStorageData[1].residency->resident[osContext.getContextId()]); EXPECT_TRUE(graphicsAllocation->fragmentsStorage.fragmentStorageData[2].residency->resident[osContext.getContextId()]); EXPECT_TRUE(graphicsAllocation2->fragmentsStorage.fragmentStorageData[0].residency->resident[osContext.getContextId()]); EXPECT_EQ(3u, residency.size()); csr->makeSurfacePackNonResident(csr->getResidencyAllocations()); EXPECT_EQ(0u, residency.size()); EXPECT_FALSE(graphicsAllocation->fragmentsStorage.fragmentStorageData[0].residency->resident[osContext.getContextId()]); EXPECT_FALSE(graphicsAllocation->fragmentsStorage.fragmentStorageData[1].residency->resident[osContext.getContextId()]); EXPECT_FALSE(graphicsAllocation->fragmentsStorage.fragmentStorageData[2].residency->resident[osContext.getContextId()]); EXPECT_FALSE(graphicsAllocation2->fragmentsStorage.fragmentStorageData[0].residency->resident[osContext.getContextId()]); mm->freeGraphicsMemory(graphicsAllocation); mm->freeGraphicsMemory(graphicsAllocation2); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, GivenAllocationCreatedFromThreeFragmentsWhenMakeResidentIsBeingCalledThenAllFragmentsAreMadeResident) { auto ptr = (void *)0x1001; auto size = MemoryConstants::pageSize * 10; auto reqs = MockHostPtrManager::getAllocationRequirements(csr->getRootDeviceIndex(), ptr, size); auto allocation = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, size}, ptr); ASSERT_EQ(3u, allocation->fragmentsStorage.fragmentCount); csr->makeResident(*allocation); csr->processResidency(csr->getResidencyAllocations(), 0u); for (int i = 0; i < maxFragmentsCount; i++) { ASSERT_EQ(allocation->fragmentsStorage.fragmentStorageData[i].cpuPtr, reqs.allocationFragments[i].allocationPtr); auto bo = static_cast(allocation->fragmentsStorage.fragmentStorageData[i].osHandleStorage)->bo; EXPECT_TRUE(isResident(bo)); EXPECT_EQ(1u, bo->getRefCount()); } csr->makeNonResident(*allocation); for (int i = 0; i < maxFragmentsCount; i++) { auto bo = static_cast(allocation->fragmentsStorage.fragmentStorageData[i].osHandleStorage)->bo; EXPECT_FALSE(isResident(bo)); EXPECT_EQ(1u, bo->getRefCount()); } mm->freeGraphicsMemory(allocation); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, GivenAllocationsContainingDifferentCountOfFragmentsWhenAllocationIsMadeResidentThenAllFragmentsAreMadeResident) { auto ptr = (void *)0x1001; auto size = MemoryConstants::pageSize; auto size2 = 100u; auto reqs = MockHostPtrManager::getAllocationRequirements(csr->getRootDeviceIndex(), ptr, size); auto allocation = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, size}, ptr); ASSERT_EQ(2u, allocation->fragmentsStorage.fragmentCount); ASSERT_EQ(2u, reqs.requiredFragmentsCount); csr->makeResident(*allocation); csr->processResidency(csr->getResidencyAllocations(), 0u); for (unsigned int i = 0; i < reqs.requiredFragmentsCount; i++) { ASSERT_EQ(allocation->fragmentsStorage.fragmentStorageData[i].cpuPtr, reqs.allocationFragments[i].allocationPtr); auto bo = static_cast(allocation->fragmentsStorage.fragmentStorageData[i].osHandleStorage)->bo; EXPECT_TRUE(isResident(bo)); EXPECT_EQ(1u, bo->getRefCount()); } csr->makeNonResident(*allocation); for (unsigned int i = 0; i < reqs.requiredFragmentsCount; i++) { auto bo = static_cast(allocation->fragmentsStorage.fragmentStorageData[i].osHandleStorage)->bo; EXPECT_FALSE(isResident(bo)); EXPECT_EQ(1u, bo->getRefCount()); } mm->freeGraphicsMemory(allocation); csr->getResidencyAllocations().clear(); auto allocation2 = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, size2}, ptr); reqs = MockHostPtrManager::getAllocationRequirements(csr->getRootDeviceIndex(), ptr, size2); ASSERT_EQ(1u, allocation2->fragmentsStorage.fragmentCount); ASSERT_EQ(1u, reqs.requiredFragmentsCount); csr->makeResident(*allocation2); csr->processResidency(csr->getResidencyAllocations(), 0u); for (unsigned int i = 0; i < reqs.requiredFragmentsCount; i++) { ASSERT_EQ(allocation2->fragmentsStorage.fragmentStorageData[i].cpuPtr, reqs.allocationFragments[i].allocationPtr); auto bo = static_cast(allocation2->fragmentsStorage.fragmentStorageData[i].osHandleStorage)->bo; EXPECT_TRUE(isResident(bo)); EXPECT_EQ(1u, bo->getRefCount()); } csr->makeNonResident(*allocation2); for (unsigned int i = 0; i < reqs.requiredFragmentsCount; i++) { auto bo = static_cast(allocation2->fragmentsStorage.fragmentStorageData[i].osHandleStorage)->bo; EXPECT_FALSE(isResident(bo)); EXPECT_EQ(1u, bo->getRefCount()); } mm->freeGraphicsMemory(allocation2); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, GivenTwoAllocationsWhenBackingStorageIsTheSameThenMakeResidentShouldAddOnlyOneLocation) { auto ptr = (void *)0x1000; auto size = MemoryConstants::pageSize; auto ptr2 = (void *)0x1000; auto allocation = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, size}, ptr); auto allocation2 = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, size}, ptr2); csr->makeResident(*allocation); csr->makeResident(*allocation2); csr->processResidency(csr->getResidencyAllocations(), 0u); EXPECT_EQ(getResidencyVector().size(), 1u); csr->makeNonResident(*allocation); csr->makeNonResident(*allocation2); mm->freeGraphicsMemory(allocation); mm->freeGraphicsMemory(allocation2); csr->getResidencyAllocations().clear(); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, WhenFlushingThenSucceeds) { auto &cs = csr->getCS(); auto commandBuffer = static_cast(cs.getGraphicsAllocation()); ASSERT_EQ(0u, reinterpret_cast(commandBuffer->getUnderlyingBuffer()) & 0xFFF); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; csr->flush(batchBuffer, csr->getResidencyAllocations()); EXPECT_NE(cs.getCpuBase(), nullptr); EXPECT_NE(cs.getGraphicsAllocation(), nullptr); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, WhenFlushNotCalledThenClearResidency) { auto allocation1 = static_cast(mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize})); auto allocation2 = static_cast(mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize})); ASSERT_NE(nullptr, allocation1); ASSERT_NE(nullptr, allocation2); EXPECT_EQ(getResidencyVector().size(), 0u); csr->makeResident(*allocation1); csr->makeResident(*allocation2); csr->processResidency(csr->getResidencyAllocations(), 0u); EXPECT_TRUE(isResident(allocation1->getBO())); EXPECT_TRUE(isResident(allocation2->getBO())); EXPECT_EQ(getResidencyVector().size(), 2u); EXPECT_EQ(allocation1->getBO()->getRefCount(), 1u); EXPECT_EQ(allocation2->getBO()->getRefCount(), 1u); // makeNonResident without flush csr->makeNonResident(*allocation1); EXPECT_EQ(getResidencyVector().size(), 0u); // everything is nonResident after first call EXPECT_FALSE(isResident(allocation1->getBO())); EXPECT_FALSE(isResident(allocation2->getBO())); EXPECT_EQ(allocation1->getBO()->getRefCount(), 1u); EXPECT_EQ(allocation2->getBO()->getRefCount(), 1u); mm->freeGraphicsMemory(allocation1); mm->freeGraphicsMemory(allocation2); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenPrintBOsForSubmitWhenPrintThenProperValuesArePrinted) { DebugManagerStateRestore restorer; DebugManager.flags.PrintBOsForSubmit.set(true); auto allocation1 = static_cast(mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize})); auto allocation2 = static_cast(mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize})); auto buffer = static_cast(mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize})); ASSERT_NE(nullptr, allocation1); ASSERT_NE(nullptr, allocation2); ASSERT_NE(nullptr, buffer); csr->makeResident(*allocation1); csr->makeResident(*allocation2); ResidencyContainer residency; residency.push_back(allocation1); residency.push_back(allocation2); testing::internal::CaptureStdout(); static_cast *>(csr)->printBOsForSubmit(residency, *buffer); std::string output = testing::internal::GetCapturedStdout(); std::vector bos; allocation1->makeBOsResident(&csr->getOsContext(), 0, &bos, true); allocation2->makeBOsResident(&csr->getOsContext(), 0, &bos, true); buffer->makeBOsResident(&csr->getOsContext(), 0, &bos, true); std::stringstream expected; expected << "Buffer object for submit\n"; for (const auto &bo : bos) { expected << "BO-" << bo->peekHandle() << ", range: " << std::hex << bo->peekAddress() << " - " << ptrOffset(bo->peekAddress(), bo->peekSize()) << ", size: " << std::dec << bo->peekSize() << "\n"; } expected << "\n"; EXPECT_FALSE(output.compare(expected.str())); mm->freeGraphicsMemory(allocation1); mm->freeGraphicsMemory(allocation2); mm->freeGraphicsMemory(buffer); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, GivenFlushMultipleTimesThenSucceeds) { auto &cs = csr->getCS(); auto commandBuffer = static_cast(cs.getGraphicsAllocation()); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; csr->flush(batchBuffer, csr->getResidencyAllocations()); cs.replaceBuffer(commandBuffer->getUnderlyingBuffer(), commandBuffer->getUnderlyingBufferSize()); cs.replaceGraphicsAllocation(commandBuffer); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); BatchBuffer batchBuffer2{cs.getGraphicsAllocation(), 8, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; csr->flush(batchBuffer2, csr->getResidencyAllocations()); auto allocation = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, allocation); auto allocation2 = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, allocation2); csr->makeResident(*allocation); csr->makeResident(*allocation2); csr->getInternalAllocationStorage()->storeAllocation(std::unique_ptr(commandBuffer), REUSABLE_ALLOCATION); auto commandBuffer2 = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer2); cs.replaceBuffer(commandBuffer2->getUnderlyingBuffer(), commandBuffer2->getUnderlyingBufferSize()); cs.replaceGraphicsAllocation(commandBuffer2); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); BatchBuffer batchBuffer3{cs.getGraphicsAllocation(), 16, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; csr->flush(batchBuffer3, csr->getResidencyAllocations()); csr->makeSurfacePackNonResident(csr->getResidencyAllocations()); mm->freeGraphicsMemory(allocation); mm->freeGraphicsMemory(allocation2); csr->getInternalAllocationStorage()->storeAllocation(std::unique_ptr(commandBuffer2), REUSABLE_ALLOCATION); commandBuffer2 = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer2); cs.replaceBuffer(commandBuffer2->getUnderlyingBuffer(), commandBuffer2->getUnderlyingBufferSize()); cs.replaceGraphicsAllocation(commandBuffer2); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); BatchBuffer batchBuffer4{cs.getGraphicsAllocation(), 24, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; csr->flush(batchBuffer4, csr->getResidencyAllocations()); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, GivenNotEmptyBbWhenFlushingThenSucceeds) { int bbUsed = 16 * sizeof(uint32_t); auto &cs = csr->getCS(); cs.getSpace(bbUsed); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; csr->flush(batchBuffer, csr->getResidencyAllocations()); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, GivenNotEmptyNotPaddedBbWhenFlushingThenSucceeds) { int bbUsed = 15 * sizeof(uint32_t); auto &cs = csr->getCS(); cs.getSpace(bbUsed); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; csr->flush(batchBuffer, csr->getResidencyAllocations()); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedWithFailingExec, GivenFailingExecThenCSRFlushFails) { int bbUsed = 15 * sizeof(uint32_t); auto &cs = csr->getCS(); cs.getSpace(bbUsed); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; NEO::SubmissionStatus ret = csr->flush(batchBuffer, csr->getResidencyAllocations()); EXPECT_EQ(ret, NEO::SubmissionStatus::FAILED); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, GivenNotAlignedWhenFlushingThenSucceeds) { auto &cs = csr->getCS(); auto commandBuffer = static_cast(cs.getGraphicsAllocation()); //make sure command buffer with offset is not page aligned ASSERT_NE(0u, (reinterpret_cast(commandBuffer->getUnderlyingBuffer()) + 4) & 0xFFF); ASSERT_EQ(4u, (reinterpret_cast(commandBuffer->getUnderlyingBuffer()) + 4) & 0x7F); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 4, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; csr->flush(batchBuffer, csr->getResidencyAllocations()); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, GivenCheckDrmFreeWhenFlushingThenSucceeds) { auto &cs = csr->getCS(); auto commandBuffer = static_cast(cs.getGraphicsAllocation()); //make sure command buffer with offset is not page aligned ASSERT_NE(0u, (reinterpret_cast(commandBuffer->getUnderlyingBuffer()) + 4) & 0xFFF); ASSERT_EQ(4u, (reinterpret_cast(commandBuffer->getUnderlyingBuffer()) + 4) & 0x7F); auto allocation = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); csr->makeResident(*allocation); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 4, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; csr->flush(batchBuffer, csr->getResidencyAllocations()); csr->makeNonResident(*allocation); mm->freeGraphicsMemory(allocation); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, WhenMakingResidentThenClearResidencyAllocationsInCommandStreamReceiver) { auto allocation1 = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); auto allocation2 = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, allocation1); ASSERT_NE(nullptr, allocation2); csr->makeResident(*allocation1); csr->makeResident(*allocation2); EXPECT_NE(0u, csr->getResidencyAllocations().size()); csr->processResidency(csr->getResidencyAllocations(), 0u); csr->makeSurfacePackNonResident(csr->getResidencyAllocations()); EXPECT_EQ(0u, csr->getResidencyAllocations().size()); mm->freeGraphicsMemory(allocation1); mm->freeGraphicsMemory(allocation2); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenMultipleMakeResidentWhenMakeNonResidentIsCalledOnlyOnceThenSurfaceIsMadeNonResident) { auto allocation1 = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, allocation1); csr->makeResident(*allocation1); csr->makeResident(*allocation1); EXPECT_NE(0u, csr->getResidencyAllocations().size()); csr->processResidency(csr->getResidencyAllocations(), 0u); csr->makeSurfacePackNonResident(csr->getResidencyAllocations()); EXPECT_EQ(0u, csr->getResidencyAllocations().size()); EXPECT_FALSE(allocation1->isResident(csr->getOsContext().getContextId())); mm->freeGraphicsMemory(allocation1); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenAllocationThatIsAlwaysResidentWhenMakeNonResidentIsCalledThenItIsNotMadeNonResident) { auto allocation1 = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, allocation1); csr->makeResident(*allocation1); allocation1->updateResidencyTaskCount(GraphicsAllocation::objectAlwaysResident, csr->getOsContext().getContextId()); EXPECT_NE(0u, csr->getResidencyAllocations().size()); csr->processResidency(csr->getResidencyAllocations(), 0u); csr->makeSurfacePackNonResident(csr->getResidencyAllocations()); EXPECT_EQ(0u, csr->getResidencyAllocations().size()); EXPECT_TRUE(allocation1->isResident(csr->getOsContext().getContextId())); mm->freeGraphicsMemory(allocation1); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, GivenMemObjectCallsDrmCsrWhenMakingNonResidentThenMakeNonResidentWithGraphicsAllocation) { auto allocation1 = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), 0x1000}); ASSERT_NE(nullptr, allocation1); auto &makeNonResidentResult = static_cast *>(csr)->makeNonResidentResult; csr->makeResident(*allocation1); makeNonResidentResult.called = false; makeNonResidentResult.allocation = nullptr; csr->makeNonResident(*allocation1); EXPECT_TRUE(makeNonResidentResult.called); EXPECT_EQ(allocation1, makeNonResidentResult.allocation); EXPECT_EQ(0u, csr->getEvictionAllocations().size()); mm->freeGraphicsMemory(allocation1); } class DrmMockBuffer : public MockBufferStorage, public Buffer { using MockBufferStorage::device; public: static DrmMockBuffer *create() { char *data = static_cast(::alignedMalloc(128, 64)); DrmAllocation *alloc = new (std::nothrow) DrmAllocation(0, AllocationType::UNKNOWN, nullptr, &data, sizeof(data), static_cast(0), MemoryPool::MemoryNull); return new DrmMockBuffer(data, 128, alloc); } ~DrmMockBuffer() override { ::alignedFree(data); delete gfxAllocation; } DrmMockBuffer(char *data, size_t size, DrmAllocation *alloc) : Buffer( nullptr, ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_USE_HOST_PTR, 0, 0, MockBufferStorage::device.get()), CL_MEM_USE_HOST_PTR, 0, size, data, data, GraphicsAllocationHelper::toMultiGraphicsAllocation(alloc), true, false, false), data(data), gfxAllocation(alloc) { } void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device, bool useGlobalAtomics, bool areMultipleSubDevicesInContext) override { } protected: char *data; DrmAllocation *gfxAllocation; }; HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, GivenMultipleResidencyRequestsWhenMakingNonResidentThenAllocationIsNotResident) { std::unique_ptr buffer(DrmMockBuffer::create()); auto osContextId = csr->getOsContext().getContextId(); auto graphicsAllocation = buffer->getGraphicsAllocation(rootDeviceIndex); ASSERT_FALSE(graphicsAllocation->isResident(osContextId)); ASSERT_GT(buffer->getSize(), 0u); //make it resident 8 times for (int c = 0; c < 8; c++) { csr->makeResident(*graphicsAllocation); csr->processResidency(csr->getResidencyAllocations(), 0u); EXPECT_TRUE(graphicsAllocation->isResident(osContextId)); EXPECT_EQ(graphicsAllocation->getResidencyTaskCount(osContextId), csr->peekTaskCount() + 1); } csr->makeNonResident(*graphicsAllocation); EXPECT_FALSE(graphicsAllocation->isResident(osContextId)); csr->makeNonResident(*graphicsAllocation); EXPECT_FALSE(graphicsAllocation->isResident(osContextId)); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenDrmCommandStreamReceiverWhenMemoryManagerIsCreatedThenItHasHostMemoryValidationEnabledByDefault) { EXPECT_TRUE(mm->isValidateHostMemoryEnabled()); } struct MockDrmAllocationBindBO : public DrmAllocation { MockDrmAllocationBindBO(uint32_t rootDeviceIndex, AllocationType allocationType, BufferObjects &bos, void *ptrIn, uint64_t gpuAddress, size_t sizeIn, MemoryPool::Type pool) : DrmAllocation(rootDeviceIndex, allocationType, bos, ptrIn, gpuAddress, sizeIn, pool) { } ADDMETHOD_NOBASE(bindBO, int, 0, (BufferObject * bo, OsContext *osContext, uint32_t vmHandleId, std::vector *bufferObjects, bool bind)); }; struct MockDrmAllocationBindBOs : public DrmAllocation { MockDrmAllocationBindBOs(uint32_t rootDeviceIndex, AllocationType allocationType, BufferObjects &bos, void *ptrIn, uint64_t gpuAddress, size_t sizeIn, MemoryPool::Type pool) : DrmAllocation(rootDeviceIndex, allocationType, bos, ptrIn, gpuAddress, sizeIn, pool) { } ADDMETHOD_NOBASE(bindBOs, int, 0, (OsContext * osContext, uint32_t vmHandleId, std::vector *bufferObjects, bool bind)); }; HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenBindBOsFailsThenMakeBOsResidentReturnsError) { auto size = 1024u; auto bo = this->createBO(size); BufferObjects bos{bo}; auto allocation = new MockDrmAllocationBindBOs(0, AllocationType::UNKNOWN, bos, nullptr, 0u, size, MemoryPool::LocalMemory); allocation->bindBOsResult = -1; auto res = allocation->makeBOsResident(&csr->getOsContext(), 0, nullptr, true); EXPECT_NE(res, 0); EXPECT_EQ(allocation->fragmentsStorage.fragmentCount, 0u); EXPECT_GT(allocation->bindBOsCalled, 0u); mm->freeGraphicsMemory(allocation); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenFragmentStorageAndBindBOFailsThenMakeBOsResidentReturnsError) { auto size = 1024u; auto bo = this->createBO(size); BufferObjects bos{bo}; auto allocation = new MockDrmAllocationBindBO(0, AllocationType::UNKNOWN, bos, nullptr, 0u, size, MemoryPool::LocalMemory); allocation->bindBOResult = -1; OsHandleStorage prevStorage; OsHandleStorage storage; OsHandleLinux osHandleStorage; ResidencyData *residency = new ResidencyData(1); storage.fragmentCount = 1; storage.fragmentStorageData[0].osHandleStorage = &osHandleStorage; storage.fragmentStorageData[0].residency = residency; storage.fragmentStorageData[0].residency->resident[csr->getOsContext().getContextId()] = false; memcpy(&prevStorage, &allocation->fragmentsStorage, sizeof(OsHandleStorage)); memcpy(&allocation->fragmentsStorage, &storage, sizeof(OsHandleStorage)); auto res = allocation->makeBOsResident(&csr->getOsContext(), 0, nullptr, true); EXPECT_NE(res, 0); EXPECT_EQ(allocation->fragmentsStorage.fragmentCount, 1u); EXPECT_GT(allocation->bindBOCalled, 0u); memcpy(&allocation->fragmentsStorage, &prevStorage, sizeof(OsHandleStorage)); mm->freeGraphicsMemory(allocation); delete residency; } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenBindBOFailsThenBindBOsReturnsError) { auto size = 1024u; auto bo = this->createBO(size); BufferObjects bos{bo}; auto allocation = new MockDrmAllocationBindBO(0, AllocationType::UNKNOWN, bos, nullptr, 0u, size, MemoryPool::LocalMemory); allocation->bindBOResult = -1; auto res = allocation->bindBOs(&csr->getOsContext(), 0u, &static_cast *>(csr)->residency, false); EXPECT_NE(res, 0); EXPECT_GT(allocation->bindBOCalled, 0u); mm->freeGraphicsMemory(allocation); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenBindBOFailsWithMultipleMemoryBanksThenBindBOsReturnsError) { auto size = 1024u; auto bo = this->createBO(size); auto bo2 = this->createBO(size); BufferObjects bos{bo, bo2}; auto allocation = new MockDrmAllocationBindBO(0, AllocationType::UNKNOWN, bos, nullptr, 0u, size, MemoryPool::LocalMemory); allocation->bindBOResult = -1; allocation->storageInfo.memoryBanks = 0b11; EXPECT_EQ(allocation->storageInfo.getNumBanks(), 2u); auto res = allocation->bindBOs(&csr->getOsContext(), 0u, &static_cast *>(csr)->residency, false); EXPECT_NE(res, 0); EXPECT_GT(allocation->bindBOCalled, 0u); mm->freeGraphicsMemory(allocation); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenBindBOFailsWithMultipleMemoryBanksWithTileInstancedThenBindBOsReturnsError) { auto size = 1024u; auto bo = this->createBO(size); auto bo2 = this->createBO(size); BufferObjects bos{bo, bo2}; auto allocation = new MockDrmAllocationBindBO(0, AllocationType::UNKNOWN, bos, nullptr, 0u, size, MemoryPool::LocalMemory); allocation->bindBOResult = -1; allocation->storageInfo.tileInstanced = true; allocation->storageInfo.memoryBanks = 0b11; EXPECT_EQ(allocation->storageInfo.getNumBanks(), 2u); auto res = allocation->bindBOs(&csr->getOsContext(), 0u, &static_cast *>(csr)->residency, false); EXPECT_NE(res, 0); EXPECT_GT(allocation->bindBOCalled, 0u); mm->freeGraphicsMemory(allocation); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenAllocationWithSingleBufferObjectWhenMakeResidentBufferObjectsIsCalledThenTheBufferObjectIsMadeResident) { auto size = 1024u; auto bo = this->createBO(size); BufferObjects bos{bo}; auto allocation = new DrmAllocation(0, AllocationType::UNKNOWN, bos, nullptr, 0u, size, MemoryPool::LocalMemory); EXPECT_EQ(bo, allocation->getBO()); makeResidentBufferObjects(&csr->getOsContext(), allocation); EXPECT_TRUE(isResident(bo)); mm->freeGraphicsMemory(allocation); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenWaitUserFenceFlagAndVmBindAvailableSetWhenDrmCsrFlushedThenExpectLatestSentTaskCountStoredAsFlushStamp) { DebugManagerStateRestore restorer; DebugManager.flags.EnableUserFenceForCompletionWait.set(1); mock->isVmBindAvailableCall.callParent = false; mock->isVmBindAvailableCall.returnValue = true; TestedDrmCommandStreamReceiver *testedCsr = new TestedDrmCommandStreamReceiver(gemCloseWorkerMode::gemCloseWorkerInactive, *this->executionEnvironment, 1); EXPECT_TRUE(testedCsr->useUserFenceWait); device->resetCommandStreamReceiver(testedCsr); auto commandBuffer = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{testedCsr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; testedCsr->latestSentTaskCount = 160u; testedCsr->flush(batchBuffer, testedCsr->getResidencyAllocations()); EXPECT_EQ(160u, testedCsr->flushStamp->peekStamp()); mm->freeGraphicsMemory(commandBuffer); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenWaitUserFenceFlagNotSetAndVmBindAvailableSetWhenDrmCsrFlushedThenExpectCommandBufferBoHandleAsFlushStamp) { DebugManagerStateRestore restorer; DebugManager.flags.EnableUserFenceForCompletionWait.set(0); mock->isVmBindAvailableCall.callParent = false; mock->isVmBindAvailableCall.returnValue = true; TestedDrmCommandStreamReceiver *testedCsr = new TestedDrmCommandStreamReceiver(gemCloseWorkerMode::gemCloseWorkerInactive, *this->executionEnvironment, 1); EXPECT_FALSE(testedCsr->useUserFenceWait); device->resetCommandStreamReceiver(testedCsr); auto commandBuffer = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{testedCsr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; DrmAllocation *alloc = static_cast(cs.getGraphicsAllocation()); auto boHandle = static_cast(alloc->getBO()->peekHandle()); testedCsr->latestSentTaskCount = 160u; testedCsr->flush(batchBuffer, testedCsr->getResidencyAllocations()); EXPECT_EQ(boHandle, testedCsr->flushStamp->peekStamp()); mm->freeGraphicsMemory(commandBuffer); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenWaitUserFenceFlagAndNoVmBindAvailableSetWhenDrmCsrFlushedThenExpectCommandBufferBoHandleAsFlushStamp) { DebugManagerStateRestore restorer; DebugManager.flags.EnableUserFenceForCompletionWait.set(1); mock->isVmBindAvailableCall.callParent = false; mock->isVmBindAvailableCall.returnValue = false; TestedDrmCommandStreamReceiver *testedCsr = new TestedDrmCommandStreamReceiver(gemCloseWorkerMode::gemCloseWorkerInactive, *this->executionEnvironment, 1); EXPECT_TRUE(testedCsr->useUserFenceWait); device->resetCommandStreamReceiver(testedCsr); auto commandBuffer = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{testedCsr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; DrmAllocation *alloc = static_cast(cs.getGraphicsAllocation()); auto boHandle = static_cast(alloc->getBO()->peekHandle()); testedCsr->latestSentTaskCount = 160u; testedCsr->flush(batchBuffer, testedCsr->getResidencyAllocations()); EXPECT_EQ(boHandle, testedCsr->flushStamp->peekStamp()); mm->freeGraphicsMemory(commandBuffer); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenWaitUserFenceFlagNotSetWhenDrmCsrWaitsForFlushStampThenExpectUseDrmGemWaitCall) { DebugManagerStateRestore restorer; DebugManager.flags.EnableUserFenceForCompletionWait.set(0); TestedDrmCommandStreamReceiver *testedCsr = new TestedDrmCommandStreamReceiver(gemCloseWorkerMode::gemCloseWorkerInactive, *this->executionEnvironment, 1); EXPECT_FALSE(testedCsr->useUserFenceWait); EXPECT_FALSE(testedCsr->isUsedNotifyEnableForPostSync()); EXPECT_FALSE(testedCsr->useContextForUserFenceWait); device->resetCommandStreamReceiver(testedCsr); mock->ioctl_cnt.gemWait = 0; FlushStamp handleToWait = 123; testedCsr->waitForFlushStamp(handleToWait); EXPECT_EQ(1, mock->ioctl_cnt.gemWait); EXPECT_EQ(-1, mock->gemWaitTimeout); EXPECT_EQ(0u, testedCsr->waitUserFenceResult.called); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenGemWaitUsedWhenKmdTimeoutUsedWhenDrmCsrWaitsForFlushStampThenExpectUseDrmGemWaitCallAndOverrideTimeout) { DebugManagerStateRestore restorer; DebugManager.flags.SetKmdWaitTimeout.set(1000); DebugManager.flags.EnableUserFenceForCompletionWait.set(0); TestedDrmCommandStreamReceiver *testedCsr = new TestedDrmCommandStreamReceiver(gemCloseWorkerMode::gemCloseWorkerInactive, *this->executionEnvironment, 1); EXPECT_FALSE(testedCsr->useUserFenceWait); EXPECT_FALSE(testedCsr->isUsedNotifyEnableForPostSync()); EXPECT_FALSE(testedCsr->useContextForUserFenceWait); device->resetCommandStreamReceiver(testedCsr); mock->ioctl_cnt.gemWait = 0; FlushStamp handleToWait = 123; testedCsr->waitForFlushStamp(handleToWait); EXPECT_EQ(1, mock->ioctl_cnt.gemWait); EXPECT_EQ(1000, mock->gemWaitTimeout); EXPECT_EQ(0u, testedCsr->waitUserFenceResult.called); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenWaitUserFenceFlagSetAndVmBindAvailableAndUseDrmCtxWhenDrmCsrWaitsForFlushStampThenExpectUseDrmWaitUserFenceCallWithNonZeroContext) { DebugManagerStateRestore restorer; DebugManager.flags.EnableUserFenceForCompletionWait.set(1); DebugManager.flags.EnableUserFenceUseCtxId.set(1); mock->isVmBindAvailableCall.callParent = false; mock->isVmBindAvailableCall.returnValue = true; TestedDrmCommandStreamReceiver *testedCsr = new TestedDrmCommandStreamReceiver(gemCloseWorkerMode::gemCloseWorkerInactive, *this->executionEnvironment, 1); EXPECT_TRUE(testedCsr->useUserFenceWait); EXPECT_TRUE(testedCsr->isUsedNotifyEnableForPostSync()); EXPECT_TRUE(testedCsr->useContextForUserFenceWait); device->resetCommandStreamReceiver(testedCsr); mock->ioctl_cnt.gemWait = 0; mock->isVmBindAvailableCall.called = 0u; auto osContextLinux = static_cast(device->getDefaultEngine().osContext); std::vector &drmCtxIds = const_cast &>(osContextLinux->getDrmContextIds()); size_t drmCtxSize = drmCtxIds.size(); for (uint32_t i = 0; i < drmCtxSize; i++) { drmCtxIds[i] = 5u + i; } FlushStamp handleToWait = 123; testedCsr->waitForFlushStamp(handleToWait); EXPECT_EQ(0, mock->ioctl_cnt.gemWait); EXPECT_EQ(1u, testedCsr->waitUserFenceResult.called); EXPECT_EQ(123u, testedCsr->waitUserFenceResult.waitValue); EXPECT_EQ(1u, mock->isVmBindAvailableCall.called); EXPECT_EQ(1u, mock->waitUserFenceCall.called); EXPECT_NE(0u, mock->waitUserFenceCall.ctxId); EXPECT_EQ(-1, mock->waitUserFenceCall.timeout); EXPECT_EQ(Drm::ValueWidth::U32, mock->waitUserFenceCall.dataWidth); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenWaitUserFenceFlagSetAndVmBindNotAvailableWhenDrmCsrWaitsForFlushStampThenExpectUseDrmGemWaitCall) { DebugManagerStateRestore restorer; DebugManager.flags.EnableUserFenceForCompletionWait.set(1); mock->isVmBindAvailableCall.callParent = false; mock->isVmBindAvailableCall.returnValue = false; TestedDrmCommandStreamReceiver *testedCsr = new TestedDrmCommandStreamReceiver(gemCloseWorkerMode::gemCloseWorkerInactive, *this->executionEnvironment, 1); EXPECT_TRUE(testedCsr->useUserFenceWait); EXPECT_TRUE(testedCsr->isUsedNotifyEnableForPostSync()); EXPECT_FALSE(testedCsr->useContextForUserFenceWait); device->resetCommandStreamReceiver(testedCsr); mock->ioctl_cnt.gemWait = 0; mock->isVmBindAvailableCall.called = 0u; FlushStamp handleToWait = 123; testedCsr->waitForFlushStamp(handleToWait); EXPECT_EQ(1, mock->ioctl_cnt.gemWait); EXPECT_EQ(0u, testedCsr->waitUserFenceResult.called); EXPECT_EQ(2u, mock->isVmBindAvailableCall.called); EXPECT_EQ(0u, mock->waitUserFenceCall.called); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenWaitUserFenceFlagNotSetAndVmBindAvailableWhenDrmCsrWaitsForFlushStampThenExpectUseDrmGemWaitCall) { DebugManagerStateRestore restorer; DebugManager.flags.EnableUserFenceForCompletionWait.set(0); mock->isVmBindAvailableCall.callParent = false; mock->isVmBindAvailableCall.returnValue = true; TestedDrmCommandStreamReceiver *testedCsr = new TestedDrmCommandStreamReceiver(gemCloseWorkerMode::gemCloseWorkerInactive, *this->executionEnvironment, 1); EXPECT_FALSE(testedCsr->useUserFenceWait); EXPECT_FALSE(testedCsr->isUsedNotifyEnableForPostSync()); EXPECT_FALSE(testedCsr->useContextForUserFenceWait); device->resetCommandStreamReceiver(testedCsr); mock->ioctl_cnt.gemWait = 0; mock->isVmBindAvailableCall.called = 0u; FlushStamp handleToWait = 123; EXPECT_ANY_THROW(testedCsr->waitForFlushStamp(handleToWait)); EXPECT_EQ(0, mock->ioctl_cnt.gemWait); EXPECT_EQ(0u, testedCsr->waitUserFenceResult.called); EXPECT_EQ(2u, mock->isVmBindAvailableCall.called); EXPECT_EQ(0u, mock->waitUserFenceCall.called); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenWaitUserFenceSetAndUseCtxFlagsNotSetAndVmBindAvailableWhenDrmCsrWaitsForFlushStampThenExpectUseDrmWaitUserFenceCallWithZeroContext) { DebugManagerStateRestore restorer; DebugManager.flags.EnableUserFenceForCompletionWait.set(1); DebugManager.flags.EnableUserFenceUseCtxId.set(0); DebugManager.flags.SetKmdWaitTimeout.set(1000); mock->isVmBindAvailableCall.callParent = false; mock->isVmBindAvailableCall.returnValue = true; TestedDrmCommandStreamReceiver *testedCsr = new TestedDrmCommandStreamReceiver(gemCloseWorkerMode::gemCloseWorkerInactive, *this->executionEnvironment, 1); EXPECT_TRUE(testedCsr->useUserFenceWait); EXPECT_TRUE(testedCsr->isUsedNotifyEnableForPostSync()); EXPECT_FALSE(testedCsr->useContextForUserFenceWait); device->resetCommandStreamReceiver(testedCsr); mock->ioctl_cnt.gemWait = 0; mock->isVmBindAvailableCall.called = 0u; FlushStamp handleToWait = 123; testedCsr->waitForFlushStamp(handleToWait); EXPECT_EQ(0, mock->ioctl_cnt.gemWait); EXPECT_EQ(1u, testedCsr->waitUserFenceResult.called); EXPECT_EQ(123u, testedCsr->waitUserFenceResult.waitValue); EXPECT_EQ(1u, mock->waitUserFenceCall.called); EXPECT_EQ(1u, mock->isVmBindAvailableCall.called); EXPECT_EQ(0u, mock->waitUserFenceCall.ctxId); EXPECT_EQ(1000, mock->waitUserFenceCall.timeout); EXPECT_EQ(Drm::ValueWidth::U32, mock->waitUserFenceCall.dataWidth); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenNoDebugFlagWaitUserFenceSetWhenDrmCsrIsCreatedThenUseNotifyEnableFlagIsSet) { mock->isVmBindAvailableCall.callParent = false; mock->isVmBindAvailableCall.returnValue = true; std::unique_ptr> testedCsr = std::make_unique>(gemCloseWorkerMode::gemCloseWorkerInactive, *this->executionEnvironment, 1); EXPECT_TRUE(testedCsr->useUserFenceWait); EXPECT_TRUE(testedCsr->isUsedNotifyEnableForPostSync()); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenWaitUserFenceSetWhenDrmCsrIsCreatedThenUseNotifyEnableFlagIsSet) { DebugManagerStateRestore restorer; DebugManager.flags.EnableUserFenceForCompletionWait.set(1); mock->isVmBindAvailableCall.callParent = false; mock->isVmBindAvailableCall.returnValue = true; std::unique_ptr> testedCsr = std::make_unique>(gemCloseWorkerMode::gemCloseWorkerInactive, *this->executionEnvironment, 1); EXPECT_TRUE(testedCsr->useUserFenceWait); EXPECT_TRUE(testedCsr->isUsedNotifyEnableForPostSync()); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenWaitUserFenceNotSetWhenDrmCsrIsCreatedThenUseNotifyEnableFlagIsNotSet) { DebugManagerStateRestore restorer; DebugManager.flags.EnableUserFenceForCompletionWait.set(0); mock->isVmBindAvailableCall.callParent = false; mock->isVmBindAvailableCall.returnValue = true; std::unique_ptr> testedCsr = std::make_unique>(gemCloseWorkerMode::gemCloseWorkerInactive, *this->executionEnvironment, 1); EXPECT_FALSE(testedCsr->useUserFenceWait); EXPECT_FALSE(testedCsr->isUsedNotifyEnableForPostSync()); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenWaitUserFenceNotSetAndOverrideNotifyEnableSetWhenDrmCsrIsCreatedThenUseNotifyEnableFlagIsSet) { DebugManagerStateRestore restorer; DebugManager.flags.EnableUserFenceForCompletionWait.set(0); DebugManager.flags.OverrideNotifyEnableForTagUpdatePostSync.set(1); mock->isVmBindAvailableCall.callParent = false; mock->isVmBindAvailableCall.returnValue = true; std::unique_ptr> testedCsr = std::make_unique>(gemCloseWorkerMode::gemCloseWorkerInactive, *this->executionEnvironment, 1); EXPECT_FALSE(testedCsr->useUserFenceWait); EXPECT_TRUE(testedCsr->isUsedNotifyEnableForPostSync()); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenWaitUserFenceSetAndOverrideNotifyEnableNotSetWhenDrmCsrIsCreatedThenUseNotifyEnableFlagIsNotSet) { DebugManagerStateRestore restorer; DebugManager.flags.EnableUserFenceForCompletionWait.set(1); DebugManager.flags.OverrideNotifyEnableForTagUpdatePostSync.set(0); mock->isVmBindAvailableCall.callParent = false; mock->isVmBindAvailableCall.returnValue = true; std::unique_ptr> testedCsr = std::make_unique>(gemCloseWorkerMode::gemCloseWorkerInactive, *this->executionEnvironment, 1); EXPECT_TRUE(testedCsr->useUserFenceWait); EXPECT_FALSE(testedCsr->isUsedNotifyEnableForPostSync()); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenVmBindNotAvailableWhenCheckingForKmdWaitModeActiveThenReturnTrue) { auto testDrmCsr = static_cast *>(csr); mock->isVmBindAvailableCall.called = 0u; mock->isVmBindAvailableCall.callParent = false; mock->isVmBindAvailableCall.returnValue = false; EXPECT_TRUE(testDrmCsr->isKmdWaitModeActive()); EXPECT_EQ(1u, mock->isVmBindAvailableCall.called); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenVmBindAvailableUseWaitCallTrueWhenCheckingForKmdWaitModeActiveThenReturnTrue) { auto testDrmCsr = static_cast *>(csr); mock->isVmBindAvailableCall.called = 0u; mock->isVmBindAvailableCall.callParent = false; mock->isVmBindAvailableCall.returnValue = true; testDrmCsr->useUserFenceWait = true; EXPECT_TRUE(testDrmCsr->isKmdWaitModeActive()); EXPECT_EQ(1u, mock->isVmBindAvailableCall.called); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenVmBindAvailableUseWaitCallFalseWhenCheckingForKmdWaitModeActiveThenReturnFalse) { auto testDrmCsr = static_cast *>(csr); mock->isVmBindAvailableCall.called = 0u; mock->isVmBindAvailableCall.callParent = false; mock->isVmBindAvailableCall.returnValue = true; testDrmCsr->useUserFenceWait = false; EXPECT_FALSE(testDrmCsr->isKmdWaitModeActive()); EXPECT_EQ(1u, mock->isVmBindAvailableCall.called); } struct MockMergeResidencyContainerMemoryOperationsHandler : public DrmMemoryOperationsHandlerDefault { using DrmMemoryOperationsHandlerDefault::DrmMemoryOperationsHandlerDefault; ADDMETHOD_NOBASE(mergeWithResidencyContainer, NEO::MemoryOperationsStatus, NEO::MemoryOperationsStatus::SUCCESS, (OsContext * osContext, ResidencyContainer &residencyContainer)); ADDMETHOD_NOBASE(makeResidentWithinOsContext, NEO::MemoryOperationsStatus, NEO::MemoryOperationsStatus::SUCCESS, (OsContext * osContext, ArrayRef gfxAllocations, bool evictable)); }; HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenMakeResidentWithinOsContextFailsThenFlushReturnsError) { struct MockDrmCsr : public DrmCommandStreamReceiver { using DrmCommandStreamReceiver::DrmCommandStreamReceiver; int flushInternal(const BatchBuffer &batchBuffer, const ResidencyContainer &allocationsForResidency) override { return 0; } }; mock->bindAvailable = true; executionEnvironment->rootDeviceEnvironments[0]->memoryOperationsInterface.reset(new MockMergeResidencyContainerMemoryOperationsHandler()); auto operationHandler = static_cast(executionEnvironment->rootDeviceEnvironments[0]->memoryOperationsInterface.get()); operationHandler->makeResidentWithinOsContextResult = NEO::MemoryOperationsStatus::FAILED; auto osContext = std::make_unique(*mock, 0u, EngineDescriptorHelper::getDefaultDescriptor(HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo)[0], PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo))); auto commandBuffer = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); LinearStream cs(commandBuffer); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; MockDrmCsr mockCsr(*executionEnvironment, rootDeviceIndex, 1, gemCloseWorkerMode::gemCloseWorkerInactive); mockCsr.setupContext(*osContext.get()); auto res = mockCsr.flush(batchBuffer, mockCsr.getResidencyAllocations()); EXPECT_GT(operationHandler->makeResidentWithinOsContextCalled, 0u); EXPECT_EQ(NEO::SubmissionStatus::FAILED, res); mm->freeGraphicsMemory(commandBuffer); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenMakeResidentWithinOsContextOutOfMemoryThenFlushReturnsError) { struct MockDrmCsr : public DrmCommandStreamReceiver { using DrmCommandStreamReceiver::DrmCommandStreamReceiver; int flushInternal(const BatchBuffer &batchBuffer, const ResidencyContainer &allocationsForResidency) override { return 0; } }; mock->bindAvailable = true; executionEnvironment->rootDeviceEnvironments[0]->memoryOperationsInterface.reset(new MockMergeResidencyContainerMemoryOperationsHandler()); auto operationHandler = static_cast(executionEnvironment->rootDeviceEnvironments[0]->memoryOperationsInterface.get()); operationHandler->makeResidentWithinOsContextResult = NEO::MemoryOperationsStatus::OUT_OF_MEMORY; auto osContext = std::make_unique(*mock, 0u, EngineDescriptorHelper::getDefaultDescriptor(HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo)[0], PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo))); auto commandBuffer = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); LinearStream cs(commandBuffer); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; MockDrmCsr mockCsr(*executionEnvironment, rootDeviceIndex, 1, gemCloseWorkerMode::gemCloseWorkerInactive); mockCsr.setupContext(*osContext.get()); auto res = mockCsr.flush(batchBuffer, mockCsr.getResidencyAllocations()); EXPECT_GT(operationHandler->makeResidentWithinOsContextCalled, 0u); EXPECT_EQ(NEO::SubmissionStatus::OUT_OF_MEMORY, res); mm->freeGraphicsMemory(commandBuffer); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenMergeWithResidencyContainerFailsThenFlushReturnsError) { struct MockDrmCsr : public DrmCommandStreamReceiver { using DrmCommandStreamReceiver::DrmCommandStreamReceiver; int flushInternal(const BatchBuffer &batchBuffer, const ResidencyContainer &allocationsForResidency) override { return 0; } }; executionEnvironment->rootDeviceEnvironments[0]->memoryOperationsInterface.reset(new MockMergeResidencyContainerMemoryOperationsHandler()); auto operationHandler = static_cast(executionEnvironment->rootDeviceEnvironments[0]->memoryOperationsInterface.get()); operationHandler->mergeWithResidencyContainerResult = NEO::MemoryOperationsStatus::FAILED; auto osContext = std::make_unique(*mock, 0u, EngineDescriptorHelper::getDefaultDescriptor(HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo)[0], PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo))); auto commandBuffer = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); LinearStream cs(commandBuffer); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; MockDrmCsr mockCsr(*executionEnvironment, rootDeviceIndex, 1, gemCloseWorkerMode::gemCloseWorkerInactive); mockCsr.setupContext(*osContext.get()); auto res = mockCsr.flush(batchBuffer, mockCsr.getResidencyAllocations()); EXPECT_GT(operationHandler->mergeWithResidencyContainerCalled, 0u); EXPECT_EQ(NEO::SubmissionStatus::FAILED, res); mm->freeGraphicsMemory(commandBuffer); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenMergeWithResidencyContainerReturnsOutOfMemoryThenFlushReturnsError) { struct MockDrmCsr : public DrmCommandStreamReceiver { using DrmCommandStreamReceiver::DrmCommandStreamReceiver; int flushInternal(const BatchBuffer &batchBuffer, const ResidencyContainer &allocationsForResidency) override { return 0; } }; executionEnvironment->rootDeviceEnvironments[0]->memoryOperationsInterface.reset(new MockMergeResidencyContainerMemoryOperationsHandler()); auto operationHandler = static_cast(executionEnvironment->rootDeviceEnvironments[0]->memoryOperationsInterface.get()); operationHandler->mergeWithResidencyContainerResult = NEO::MemoryOperationsStatus::OUT_OF_MEMORY; auto osContext = std::make_unique(*mock, 0u, EngineDescriptorHelper::getDefaultDescriptor(HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo)[0], PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo))); auto commandBuffer = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); LinearStream cs(commandBuffer); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; MockDrmCsr mockCsr(*executionEnvironment, rootDeviceIndex, 1, gemCloseWorkerMode::gemCloseWorkerInactive); mockCsr.setupContext(*osContext.get()); auto res = mockCsr.flush(batchBuffer, mockCsr.getResidencyAllocations()); EXPECT_GT(operationHandler->mergeWithResidencyContainerCalled, 0u); EXPECT_EQ(NEO::SubmissionStatus::OUT_OF_MEMORY, res); mm->freeGraphicsMemory(commandBuffer); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenNoAllocsInMemoryOperationHandlerDefaultWhenFlushThenDrmMemoryOperationHandlerIsNotLocked) { struct MockDrmMemoryOperationsHandler : public DrmMemoryOperationsHandler { using DrmMemoryOperationsHandler::mutex; }; struct MockDrmCsr : public DrmCommandStreamReceiver { using DrmCommandStreamReceiver::DrmCommandStreamReceiver; int flushInternal(const BatchBuffer &batchBuffer, const ResidencyContainer &allocationsForResidency) override { auto memoryOperationsInterface = static_cast(this->executionEnvironment.rootDeviceEnvironments[this->rootDeviceIndex]->memoryOperationsInterface.get()); EXPECT_TRUE(memoryOperationsInterface->mutex.try_lock()); memoryOperationsInterface->mutex.unlock(); return 0; } }; auto osContext = std::make_unique(*mock, 0u, EngineDescriptorHelper::getDefaultDescriptor(HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo)[0], PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo))); auto commandBuffer = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); LinearStream cs(commandBuffer); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; MockDrmCsr mockCsr(*executionEnvironment, rootDeviceIndex, 1, gemCloseWorkerMode::gemCloseWorkerInactive); mockCsr.setupContext(*osContext.get()); mockCsr.flush(batchBuffer, mockCsr.getResidencyAllocations()); mm->freeGraphicsMemory(commandBuffer); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenAllocsInMemoryOperationHandlerDefaultWhenFlushThenDrmMemoryOperationHandlerIsLocked) { struct MockDrmMemoryOperationsHandler : public DrmMemoryOperationsHandler { using DrmMemoryOperationsHandler::mutex; }; struct MockDrmCsr : public DrmCommandStreamReceiver { using DrmCommandStreamReceiver::DrmCommandStreamReceiver; int flushInternal(const BatchBuffer &batchBuffer, const ResidencyContainer &allocationsForResidency) override { auto memoryOperationsInterface = static_cast(this->executionEnvironment.rootDeviceEnvironments[this->rootDeviceIndex]->memoryOperationsInterface.get()); EXPECT_FALSE(memoryOperationsInterface->mutex.try_lock()); return 0; } }; auto osContext = std::make_unique(*mock, 0u, EngineDescriptorHelper::getDefaultDescriptor(HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo)[0], PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo))); auto allocation = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); executionEnvironment->rootDeviceEnvironments[csr->getRootDeviceIndex()]->memoryOperationsInterface->makeResident(device.get(), ArrayRef(&allocation, 1)); auto commandBuffer = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); LinearStream cs(commandBuffer); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; MockDrmCsr mockCsr(*executionEnvironment, rootDeviceIndex, 1, gemCloseWorkerMode::gemCloseWorkerInactive); mockCsr.setupContext(*osContext.get()); mockCsr.flush(batchBuffer, mockCsr.getResidencyAllocations()); mm->freeGraphicsMemory(commandBuffer); mm->freeGraphicsMemory(allocation); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenAllocInMemoryOperationsInterfaceWhenFlushThenAllocIsResident) { auto commandBuffer = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); LinearStream cs(commandBuffer); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; auto allocation = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); executionEnvironment->rootDeviceEnvironments[csr->getRootDeviceIndex()]->memoryOperationsInterface->makeResident(device.get(), ArrayRef(&allocation, 1)); csr->flush(batchBuffer, csr->getResidencyAllocations()); const auto boRequirments = [&allocation](const auto &bo) { return (static_cast(bo.handle) == static_cast(allocation)->getBO()->peekHandle() && bo.offset == static_cast(allocation)->getBO()->peekAddress()); }; auto &residency = static_cast *>(csr)->getExecStorage(); EXPECT_TRUE(std::find_if(residency.begin(), residency.end(), boRequirments) != residency.end()); EXPECT_EQ(residency.size(), 2u); residency.clear(); csr->flush(batchBuffer, csr->getResidencyAllocations()); EXPECT_TRUE(std::find_if(residency.begin(), residency.end(), boRequirments) != residency.end()); EXPECT_EQ(residency.size(), 2u); residency.clear(); csr->getResidencyAllocations().clear(); executionEnvironment->rootDeviceEnvironments[csr->getRootDeviceIndex()]->memoryOperationsInterface->evict(device.get(), *allocation); csr->flush(batchBuffer, csr->getResidencyAllocations()); EXPECT_FALSE(std::find_if(residency.begin(), residency.end(), boRequirments) != residency.end()); EXPECT_EQ(residency.size(), 1u); mm->freeGraphicsMemory(allocation); mm->freeGraphicsMemory(commandBuffer); } drm_command_stream_xehp_and_later_tests.cpp000066400000000000000000000211471422164147700361730ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/command_encoder.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/os_interface/linux/drm_command_stream.h" #include "shared/source/os_interface/linux/drm_memory_manager.h" #include "shared/source/os_interface/linux/drm_memory_operations_handler.h" #include "shared/source/os_interface/linux/os_context_linux.h" #include "shared/source/os_interface/os_interface.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/engine_descriptor_helper.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/libult/linux/drm_mock.h" #include "shared/test/common/mocks/linux/mock_drm_allocation.h" #include "shared/test/common/mocks/linux/mock_drm_memory_manager.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/os_interface/linux/device_command_stream_fixture.h" #include "shared/test/common/os_interface/linux/drm_buffer_object_fixture.h" #include "shared/test/common/os_interface/linux/drm_command_stream_fixture.h" #include "shared/test/common/test_macros/test.h" using namespace NEO; struct DrmCommandStreamMultiTileMemExecFixture { void SetUp() { DebugManager.flags.CreateMultipleSubDevices.set(2u); DebugManager.flags.EnableImplicitScaling.set(1); DebugManager.flags.EnableForcePin.set(false); osLocalMemoryBackup = std::make_unique>(&OSInterface::osEnableLocalMemory, true); executionEnvironment = new MockExecutionEnvironment(); executionEnvironment->incRefInternal(); executionEnvironment->initGmm(); mock = new DrmMockCustom(*executionEnvironment->rootDeviceEnvironments[0]); executionEnvironment->rootDeviceEnvironments[0]->osInterface = std::make_unique(); executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel(std::unique_ptr(mock)); executionEnvironment->rootDeviceEnvironments[0]->memoryOperationsInterface = DrmMemoryOperationsHandler::create(*mock, 0); memoryManager = new DrmMemoryManager(gemCloseWorkerMode::gemCloseWorkerInactive, DebugManager.flags.EnableForcePin.get(), true, *executionEnvironment); executionEnvironment->memoryManager.reset(memoryManager); executionEnvironment->prepareRootDeviceEnvironments(1u); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(NEO::defaultHwInfo.get()); executionEnvironment->initializeMemoryManager(); VariableBackup backup(&ultHwConfig); ultHwConfig.useHwCsr = true; device.reset(MockDevice::create(executionEnvironment, 0)); } void TearDown() { executionEnvironment->decRefInternal(); } DebugManagerStateRestore dbgRestore; std::unique_ptr> osLocalMemoryBackup; std::unique_ptr device; MockExecutionEnvironment *executionEnvironment = nullptr; DrmMockCustom *mock = nullptr; DrmMemoryManager *memoryManager = nullptr; }; using DrmCommandStreamMultiTileMemExecTest = Test; HWCMDTEST_F(IGFX_XE_HP_CORE, DrmCommandStreamMultiTileMemExecTest, GivenDrmSupportsCompletionFenceAndVmBindWhenCallingCsrExecThenMultipleTagAllocationIsPassed) { auto *testCsr = new TestedDrmCommandStreamReceiver(*executionEnvironment, 0, device->getDeviceBitfield()); device->resetCommandStreamReceiver(testCsr); EXPECT_EQ(2u, testCsr->activePartitions); mock->completionFenceSupported = true; mock->isVmBindAvailableCall.callParent = false; mock->isVmBindAvailableCall.returnValue = true; TestedBufferObject bo(mock, 128); MockDrmAllocation cmdBuffer(AllocationType::COMMAND_BUFFER, MemoryPool::System4KBPages); cmdBuffer.bufferObjects[0] = &bo; uint8_t buff[128]; LinearStream cs(&cmdBuffer, buff, 128); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); EncodeNoop::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{testCsr->getRootDeviceIndex(), MemoryConstants::pageSize}); testCsr->makeResident(cmdBuffer); testCsr->makeResident(*allocation); testCsr->makeResident(*testCsr->getTagAllocation()); testCsr->latestSentTaskCount = 2; testCsr->postSyncWriteOffset = 16; uint64_t expectedCompletionGpuAddress = testCsr->getTagAllocation()->getGpuAddress() + Drm::completionFenceOffset + testCsr->postSyncWriteOffset; int ret = testCsr->flushInternal(batchBuffer, testCsr->getResidencyAllocations()); EXPECT_EQ(0, ret); EXPECT_EQ(expectedCompletionGpuAddress, bo.receivedCompletionGpuAddress); EXPECT_EQ(testCsr->latestSentTaskCount, bo.receivedCompletionValue); EXPECT_EQ(2u, bo.execCalled); memoryManager->freeGraphicsMemory(allocation); } HWCMDTEST_F(IGFX_XE_HP_CORE, DrmCommandStreamMultiTileMemExecTest, GivenDrmSupportsCompletionFenceAndVmBindWhenHandlingCompletionThenExpectMultipleWaitCalls) { EngineControl &defaultEngine = device->getDefaultEngine(); EXPECT_EQ(2u, defaultEngine.commandStreamReceiver->getActivePartitions()); uint32_t postSyncOffset = defaultEngine.commandStreamReceiver->getPostSyncWriteOffset(); EXPECT_NE(0u, postSyncOffset); mock->completionFenceSupported = true; mock->isVmBindAvailableCall.callParent = false; mock->isVmBindAvailableCall.returnValue = true; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{0, 1024, AllocationType::COMMAND_BUFFER}); allocation->updateTaskCount(2, defaultEngine.osContext->getContextId()); volatile uint32_t *completionAddress = defaultEngine.commandStreamReceiver->getTagAddress(); completionAddress += (Drm::completionFenceOffset / sizeof(uint32_t)); *completionAddress = 1; completionAddress += (postSyncOffset / sizeof(uint32_t)); *completionAddress = 1; memoryManager->handleFenceCompletion(allocation); uint64_t expectedAddress = castToUint64(const_cast(defaultEngine.commandStreamReceiver->getTagAddress())) + Drm::completionFenceOffset + postSyncOffset; constexpr uint64_t expectedValue = 2; EXPECT_EQ(2u, mock->waitUserFenceCall.called); EXPECT_EQ(expectedAddress, mock->waitUserFenceCall.address); EXPECT_EQ(expectedValue, mock->waitUserFenceCall.value); memoryManager->freeGraphicsMemory(allocation); } HWCMDTEST_F(IGFX_XE_HP_CORE, DrmCommandStreamMultiTileMemExecTest, GivenDrmSupportsCompletionFenceAndVmBindWhenHandlingCompletionAndOneContextIsReadyThenExpectOneWaitCall) { EngineControl &defaultEngine = device->getDefaultEngine(); EXPECT_EQ(2u, defaultEngine.commandStreamReceiver->getActivePartitions()); uint32_t postSyncOffset = defaultEngine.commandStreamReceiver->getPostSyncWriteOffset(); EXPECT_NE(0u, postSyncOffset); mock->completionFenceSupported = true; mock->isVmBindAvailableCall.callParent = false; mock->isVmBindAvailableCall.returnValue = true; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{0, 1024, AllocationType::COMMAND_BUFFER}); allocation->updateTaskCount(2, defaultEngine.osContext->getContextId()); volatile uint32_t *completionAddress = defaultEngine.commandStreamReceiver->getTagAddress(); completionAddress += (Drm::completionFenceOffset / sizeof(uint32_t)); *completionAddress = 2; //1st context is ready completionAddress += (postSyncOffset / sizeof(uint32_t)); *completionAddress = 1; memoryManager->handleFenceCompletion(allocation); uint64_t expectedAddress = castToUint64(const_cast(defaultEngine.commandStreamReceiver->getTagAddress())) + Drm::completionFenceOffset + postSyncOffset; constexpr uint64_t expectedValue = 2; EXPECT_EQ(1u, mock->waitUserFenceCall.called); EXPECT_EQ(expectedAddress, mock->waitUserFenceCall.address); EXPECT_EQ(expectedValue, mock->waitUserFenceCall.value); memoryManager->freeGraphicsMemory(allocation); } compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/drm_gem_close_worker_tests.cpp000066400000000000000000000146531422164147700335560ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/device_command_stream.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/os_interface/linux/drm_buffer_object.h" #include "shared/source/os_interface/linux/drm_command_stream.h" #include "shared/source/os_interface/linux/drm_gem_close_worker.h" #include "shared/source/os_interface/linux/drm_memory_manager.h" #include "shared/source/os_interface/linux/drm_memory_operations_handler.h" #include "shared/source/os_interface/os_interface.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/os_interface/linux/device_command_stream_fixture.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/mem_obj/buffer.h" #include "drm/i915_drm.h" #include "gtest/gtest.h" #include #include #include #include #include using namespace NEO; class DrmMockForWorker : public Drm { public: using Drm::setupIoctlHelper; std::mutex mutex; std::atomic gem_close_cnt; std::atomic gem_close_expected; std::atomic ioctl_caller_thread_id; DrmMockForWorker(RootDeviceEnvironment &rootDeviceEnvironment) : Drm(std::make_unique(mockFd, mockPciPath), rootDeviceEnvironment) { } int ioctl(unsigned long request, void *arg) override { if (_IOC_TYPE(request) == DRM_IOCTL_BASE) { //when drm ioctl is called, try acquire mutex //main thread can hold mutex, to prevent ioctl handling std::lock_guard lock(mutex); } if (request == DRM_IOCTL_GEM_CLOSE) gem_close_cnt++; ioctl_caller_thread_id = std::this_thread::get_id(); return 0; }; }; class DrmGemCloseWorkerFixture { public: DrmGemCloseWorkerFixture() : executionEnvironment(defaultHwInfo.get()){}; //max loop count for while static const uint32_t deadCntInit = 10 * 1000 * 1000; DrmMemoryManager *mm; DrmMockForWorker *drmMock; uint32_t deadCnt = deadCntInit; void SetUp() { this->drmMock = new DrmMockForWorker(*executionEnvironment.rootDeviceEnvironments[0]); auto hwInfo = executionEnvironment.rootDeviceEnvironments[0]->getHardwareInfo(); drmMock->setupIoctlHelper(hwInfo->platform.eProductFamily); executionEnvironment.rootDeviceEnvironments[0]->osInterface = std::make_unique(); executionEnvironment.rootDeviceEnvironments[0]->osInterface->setDriverModel(std::unique_ptr(drmMock)); executionEnvironment.rootDeviceEnvironments[0]->memoryOperationsInterface = DrmMemoryOperationsHandler::create(*drmMock, 0u); this->mm = new DrmMemoryManager(gemCloseWorkerMode::gemCloseWorkerInactive, false, false, executionEnvironment); this->drmMock->gem_close_cnt = 0; this->drmMock->gem_close_expected = 0; } void TearDown() { if (this->drmMock->gem_close_expected >= 0) { EXPECT_EQ(this->drmMock->gem_close_expected, this->drmMock->gem_close_cnt); } delete this->mm; } protected: class DrmAllocationWrapper : public DrmAllocation { public: DrmAllocationWrapper(BufferObject *bo) : DrmAllocation(0, AllocationType::UNKNOWN, bo, nullptr, 0, static_cast(0u), MemoryPool::MemoryNull) { } }; MockExecutionEnvironment executionEnvironment; }; typedef Test DrmGemCloseWorkerTests; TEST_F(DrmGemCloseWorkerTests, WhenClosingGemThenSucceeds) { this->drmMock->gem_close_expected = 1; auto worker = new DrmGemCloseWorker(*mm); auto bo = new BufferObject(this->drmMock, 1, 0, 1); worker->push(bo); delete worker; } TEST_F(DrmGemCloseWorkerTests, GivenMultipleThreadsWhenClosingGemThenSucceeds) { this->drmMock->gem_close_expected = -1; auto worker = new DrmGemCloseWorker(*mm); auto bo = new BufferObject(this->drmMock, 1, 0, 1); worker->push(bo); //wait for worker to complete or deadCnt drops while (!worker->isEmpty() && (deadCnt-- > 0)) sched_yield(); //yield to another threads worker->close(false); //and check if GEM was closed EXPECT_EQ(1, this->drmMock->gem_close_cnt.load()); delete worker; } TEST_F(DrmGemCloseWorkerTests, GivenMultipleThreadsAndCloseFalseWhenClosingGemThenSucceeds) { this->drmMock->gem_close_expected = -1; auto worker = new DrmGemCloseWorker(*mm); auto bo = new BufferObject(this->drmMock, 1, 0, 1); worker->push(bo); worker->close(false); //wait for worker to complete or deadCnt drops while (!worker->isEmpty() && (deadCnt-- > 0)) sched_yield(); //yield to another threads //and check if GEM was closed EXPECT_EQ(1, this->drmMock->gem_close_cnt.load()); delete worker; } TEST_F(DrmGemCloseWorkerTests, givenAllocationWhenAskedForUnreferenceWithForceFlagSetThenAllocationIsReleasedFromCallingThread) { this->drmMock->gem_close_expected = 1; auto worker = new DrmGemCloseWorker(*mm); auto bo = new BufferObject(this->drmMock, 1, 0, 1); bo->reference(); worker->push(bo); auto r = mm->unreference(bo, true); EXPECT_EQ(1u, r); EXPECT_EQ(drmMock->ioctl_caller_thread_id, std::this_thread::get_id()); delete worker; } TEST_F(DrmGemCloseWorkerTests, givenDrmGemCloseWorkerWhenCloseIsCalledWithBlockingFlagThenThreadIsClosed) { struct mockDrmGemCloseWorker : DrmGemCloseWorker { using DrmGemCloseWorker::DrmGemCloseWorker; using DrmGemCloseWorker::thread; }; std::unique_ptr worker(new mockDrmGemCloseWorker(*mm)); EXPECT_NE(nullptr, worker->thread); worker->close(true); EXPECT_EQ(nullptr, worker->thread); } TEST_F(DrmGemCloseWorkerTests, givenDrmGemCloseWorkerWhenCloseIsCalledMultipleTimeWithBlockingFlagThenThreadIsClosed) { struct mockDrmGemCloseWorker : DrmGemCloseWorker { using DrmGemCloseWorker::DrmGemCloseWorker; using DrmGemCloseWorker::thread; }; std::unique_ptr worker(new mockDrmGemCloseWorker(*mm)); worker->close(true); worker->close(true); worker->close(true); EXPECT_EQ(nullptr, worker->thread); } compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/drm_mapper_tests.cpp000066400000000000000000000043421422164147700315060ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/linux/drm_engine_mapper.h" #include "shared/test/common/test_macros/test.h" #include "drm/i915_drm.h" using namespace NEO; TEST(DrmMapperTests, GivenEngineWhenMappingNodeThenCorrectEngineReturned) { unsigned int flagBcs = DrmEngineMapper::engineNodeMap(aub_stream::ENGINE_BCS); unsigned int flagRcs = DrmEngineMapper::engineNodeMap(aub_stream::ENGINE_RCS); unsigned int flagCcs = DrmEngineMapper::engineNodeMap(aub_stream::ENGINE_CCS); unsigned int flagCccs = DrmEngineMapper::engineNodeMap(aub_stream::ENGINE_CCCS); unsigned int expectedBcs = I915_EXEC_BLT; unsigned int expectedRcs = I915_EXEC_RENDER; unsigned int expectedCcs = I915_EXEC_DEFAULT; unsigned int expectedCccs = I915_EXEC_RENDER; EXPECT_EQ(expectedBcs, flagBcs); EXPECT_EQ(expectedRcs, flagRcs); EXPECT_EQ(expectedCcs, flagCcs); EXPECT_EQ(expectedCccs, flagCccs); } TEST(DrmMapperTests, givenLinkCopyEngineWhenMapperCalledThenReturnDefaultBltEngine) { const std::array bcsLinkEngines = {{aub_stream::ENGINE_BCS1, aub_stream::ENGINE_BCS2, aub_stream::ENGINE_BCS3, aub_stream::ENGINE_BCS4, aub_stream::ENGINE_BCS5, aub_stream::ENGINE_BCS6, aub_stream::ENGINE_BCS7, aub_stream::ENGINE_BCS8}}; for (auto engine : bcsLinkEngines) { EXPECT_EQ(static_cast(I915_EXEC_BLT), DrmEngineMapper::engineNodeMap(engine)); } } TEST(DrmMapperTests, GivenCcsWhenGettingEngineNodeMapThenReturnDefault) { unsigned int expected = I915_EXEC_DEFAULT; EXPECT_EQ(DrmEngineMapper::engineNodeMap(aub_stream::ENGINE_CCS), expected); EXPECT_EQ(DrmEngineMapper::engineNodeMap(aub_stream::ENGINE_CCS1), expected); EXPECT_EQ(DrmEngineMapper::engineNodeMap(aub_stream::ENGINE_CCS2), expected); EXPECT_EQ(DrmEngineMapper::engineNodeMap(aub_stream::ENGINE_CCS3), expected); } TEST(DrmMapperTests, GivenVcsWhenGettingEngineNodeMapThenExceptionIsThrown) { EXPECT_THROW(DrmEngineMapper::engineNodeMap(aub_stream::ENGINE_VCS), std::exception); } drm_memory_manager_debug_surface_prelim_tests.cpp000066400000000000000000000107561422164147700374010ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/os_interface/linux/drm_memory_operations_handler.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/libult/linux/drm_query_mock.h" #include "shared/test/common/mocks/linux/mock_drm_memory_manager.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_gfx_partition.h" #include "opencl/test/unit_test/os_interface/linux/device_command_stream_fixture_prelim.h" #include "opencl/test/unit_test/os_interface/linux/drm_memory_manager_prelim_fixtures.h" #include "opencl/test/unit_test/os_interface/linux/drm_mock_memory_info.h" #include "gtest/gtest.h" TEST_F(DrmMemoryManagerLocalMemoryPrelimTest, givenCreateDebugSurfaceWithUnalignedSizeCalledThenNullptrReturned) { AllocationProperties debugSurfaceProperties{0, true, MemoryConstants::pageSize + 101, NEO::AllocationType::DEBUG_CONTEXT_SAVE_AREA, false, false, 0b1011}; auto debugSurface = memoryManager->allocateGraphicsMemoryWithProperties(debugSurfaceProperties); EXPECT_EQ(nullptr, debugSurface); } TEST_F(DrmMemoryManagerLocalMemoryPrelimTest, givenCreateDebugSurfaceAndAlignedMallocFailedThenNullptrReturned) { AllocationProperties debugSurfaceProperties{0, true, MemoryConstants::pageSize, NEO::AllocationType::DEBUG_CONTEXT_SAVE_AREA, false, false, 0b1011}; memoryManager->alignedMallocShouldFail = true; auto debugSurface = memoryManager->allocateGraphicsMemoryWithProperties(debugSurfaceProperties); memoryManager->alignedMallocShouldFail = false; EXPECT_EQ(nullptr, debugSurface); } TEST_F(DrmMemoryManagerLocalMemoryWithCustomPrelimMockTest, givenCreateDebugSurfaceAndAllocUserptrFailedThenNullptrReturned) { mock->ioctl_res = -1; AllocationProperties debugSurfaceProperties{0, true, MemoryConstants::pageSize, NEO::AllocationType::DEBUG_CONTEXT_SAVE_AREA, false, false, 0b1011}; auto debugSurface = memoryManager->allocateGraphicsMemoryWithProperties(debugSurfaceProperties); mock->ioctl_res = 0; EXPECT_EQ(1, mock->ioctl_cnt.gemUserptr); EXPECT_EQ(nullptr, debugSurface); } TEST_F(DrmMemoryManagerLocalMemoryWithCustomPrelimMockTest, givenCreateDebugSurfaceSuccessThenCorrectMultiHostAllocationReturned) { AllocationProperties debugSurfaceProperties{0, true, MemoryConstants::pageSize, NEO::AllocationType::DEBUG_CONTEXT_SAVE_AREA, false, false, 0b1011}; auto debugSurface = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(debugSurfaceProperties)); EXPECT_NE(nullptr, debugSurface); EXPECT_EQ(MemoryPool::System4KBPages, debugSurface->getMemoryPool()); EXPECT_EQ(3u, debugSurface->getNumGmms()); EXPECT_EQ(3, mock->ioctl_cnt.gemUserptr); EXPECT_NE(nullptr, debugSurface->getUnderlyingBuffer()); EXPECT_EQ(MemoryConstants::pageSize, debugSurface->getUnderlyingBufferSize()); EXPECT_EQ(3 * MemoryConstants::pageSize, memoryManager->alignedMallocSizeRequired); auto gpuAddress = debugSurface->getGpuAddress(); auto gfxPartition = memoryManager->getGfxPartition(0); EXPECT_NE(reinterpret_cast(debugSurface->getUnderlyingBuffer()), gpuAddress); EXPECT_GE(GmmHelper::decanonize(gpuAddress), gfxPartition->getHeapBase(HeapIndex::HEAP_STANDARD)); EXPECT_LT(GmmHelper::decanonize(gpuAddress), gfxPartition->getHeapLimit(HeapIndex::HEAP_STANDARD)); auto &storageInfo = debugSurface->storageInfo; auto &bos = debugSurface->getBOs(); EXPECT_NE(nullptr, bos[0]); EXPECT_EQ(gpuAddress, bos[0]->peekAddress()); EXPECT_NE(nullptr, bos[1]); EXPECT_EQ(gpuAddress, bos[1]->peekAddress()); EXPECT_EQ(nullptr, bos[2]); EXPECT_NE(nullptr, bos[3]); EXPECT_EQ(gpuAddress, bos[3]->peekAddress()); EXPECT_TRUE(debugSurface->isFlushL3Required()); EXPECT_TRUE(debugSurface->isUncacheable()); EXPECT_EQ(debugSurface->getNumGmms(), storageInfo.getNumBanks()); EXPECT_EQ(0b1011u, storageInfo.memoryBanks.to_ulong()); EXPECT_EQ(0b1011u, storageInfo.pageTablesVisibility.to_ulong()); EXPECT_FALSE(storageInfo.cloningOfPageTables); EXPECT_FALSE(storageInfo.multiStorage); EXPECT_FALSE(storageInfo.readOnlyMultiStorage); EXPECT_TRUE(storageInfo.tileInstanced); EXPECT_TRUE(storageInfo.cpuVisibleSegment); EXPECT_TRUE(storageInfo.isLockable); memoryManager->freeGraphicsMemory(debugSurface); } drm_memory_manager_localmem_prelim_tests.cpp000066400000000000000000000014301422164147700363610ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/os_interface/linux/drm_memory_manager_prelim_fixtures.h" #include "gtest/gtest.h" TEST_F(DrmMemoryManagerLocalMemoryWithCustomPrelimMockTest, givenDrmMemoryManagerWithLocalMemoryWhenLockResourceIsCalledOnBufferObjectThenReturnPtr) { BufferObject bo(mock, 1, 1024, 1); DrmAllocation drmAllocation(0, AllocationType::UNKNOWN, &bo, nullptr, 0u, 0u, MemoryPool::LocalMemory); EXPECT_EQ(&bo, drmAllocation.getBO()); auto ptr = memoryManager->lockResourceInLocalMemoryImpl(&bo); EXPECT_NE(nullptr, ptr); EXPECT_EQ(ptr, bo.peekLockedAddress()); memoryManager->unlockResourceInLocalMemoryImpl(&bo); EXPECT_EQ(nullptr, bo.peekLockedAddress()); } drm_memory_manager_localmem_tests.cpp000066400000000000000000001432701422164147700350220ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/heap_assigner.h" #include "shared/source/os_interface/linux/allocator_helper.h" #include "shared/source/os_interface/linux/drm_memory_manager.h" #include "shared/source/os_interface/linux/drm_memory_operations_handler.h" #include "shared/source/os_interface/os_interface.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/linux/mock_drm_memory_manager.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/mocks/mock_gmm.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/os_interface/linux/drm_mock_impl.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/os_interface/linux/drm_memory_manager_tests_impl.h" #include "opencl/test/unit_test/os_interface/linux/drm_mock_memory_info.h" #include "gtest/gtest.h" namespace NEO { BufferObject *createBufferObjectInMemoryRegion(Drm *drm, uint64_t gpuAddress, size_t size, uint32_t memoryBanks, size_t maxOsContextCount); class DrmMemoryManagerLocalMemoryTest : public ::testing::Test { public: DrmTipMock *mock; void SetUp() override { const bool localMemoryEnabled = true; executionEnvironment = new ExecutionEnvironment; executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->setHwInfo(defaultHwInfo.get()); mock = new DrmTipMock(*executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]); mock->memoryInfo.reset(new MockMemoryInfo()); executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->osInterface = std::make_unique(); executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->osInterface->setDriverModel(std::unique_ptr(mock)); executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->memoryOperationsInterface = DrmMemoryOperationsHandler::create(*mock, 0u); device.reset(MockDevice::createWithExecutionEnvironment(defaultHwInfo.get(), executionEnvironment, rootDeviceIndex)); memoryManager = std::make_unique(localMemoryEnabled, false, false, *executionEnvironment); } bool isAllocationWithinHeap(const GraphicsAllocation &allocation, HeapIndex heap) { const auto allocationStart = allocation.getGpuAddress(); const auto allocationEnd = allocationStart + allocation.getUnderlyingBufferSize(); const auto heapStart = GmmHelper::canonize(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapBase(heap)); const auto heapEnd = GmmHelper::canonize(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapLimit(heap)); return heapStart <= allocationStart && allocationEnd <= heapEnd; } protected: DebugManagerStateRestore restorer{}; ExecutionEnvironment *executionEnvironment = nullptr; std::unique_ptr device; std::unique_ptr memoryManager; const uint32_t rootDeviceIndex = 0u; }; class DrmMemoryManagerLocalMemoryWithCustomMockTest : public ::testing::Test { public: DrmMockCustomImpl *mock; void SetUp() override { const bool localMemoryEnabled = true; executionEnvironment = new ExecutionEnvironment; executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); mock = new DrmMockCustomImpl(*executionEnvironment->rootDeviceEnvironments[0]); executionEnvironment->rootDeviceEnvironments[0]->osInterface = std::make_unique(); executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel(std::unique_ptr(mock)); device.reset(MockDevice::createWithExecutionEnvironment(defaultHwInfo.get(), executionEnvironment, 0)); memoryManager = std::make_unique(localMemoryEnabled, false, false, *executionEnvironment); } protected: ExecutionEnvironment *executionEnvironment = nullptr; std::unique_ptr device; std::unique_ptr memoryManager; }; HWTEST2_F(DrmMemoryManagerLocalMemoryTest, givenDrmMemoryManagerWhenCreateBufferObjectInMemoryRegionIsCalledThenBufferObjectWithAGivenGpuAddressAndSizeIsCreatedAndAllocatedInASpecifiedMemoryRegion, NonDefaultIoctlsSupported) { DebugManagerStateRestore restorer; DebugManager.flags.EnableLocalMemory.set(1); std::vector regionInfo(2); regionInfo[0].region = {I915_MEMORY_CLASS_SYSTEM, 0}; regionInfo[1].region = {I915_MEMORY_CLASS_DEVICE, 0}; mock->memoryInfo.reset(new MemoryInfo(regionInfo)); mock->ioctlCallsCount = 0; auto gpuAddress = 0x1234u; auto size = MemoryConstants::pageSize64k; auto bo = std::unique_ptr(memoryManager->createBufferObjectInMemoryRegion(&memoryManager->getDrm(0), gpuAddress, size, (1 << (MemoryBanks::getBankForLocalMemory(0) - 1)), 1)); ASSERT_NE(nullptr, bo); EXPECT_EQ(1u, mock->ioctlCallsCount); EXPECT_EQ(1u, mock->createExt.handle); EXPECT_EQ(size, mock->createExt.size); EXPECT_EQ(1u, mock->numRegions); auto memRegions = mock->memRegions; EXPECT_EQ(I915_MEMORY_CLASS_DEVICE, memRegions.memory_class); EXPECT_EQ(0u, memRegions.memory_instance); EXPECT_EQ(gpuAddress, bo->peekAddress()); EXPECT_EQ(size, bo->peekSize()); } HWTEST2_F(DrmMemoryManagerLocalMemoryTest, givenMultiRootDeviceEnvironmentAndMemoryInfoWhenCreateMultiGraphicsAllocationThenImportAndExportIoctlAreUsed, NonDefaultIoctlsSupported) { uint32_t rootDevicesNumber = 3u; MultiGraphicsAllocation multiGraphics(rootDevicesNumber); std::vector rootDeviceIndices; auto osInterface = executionEnvironment->rootDeviceEnvironments[0]->osInterface.release(); executionEnvironment->prepareRootDeviceEnvironments(rootDevicesNumber); for (uint32_t i = 0; i < rootDevicesNumber; i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(defaultHwInfo.get()); auto mock = new DrmTipMock(*executionEnvironment->rootDeviceEnvironments[i]); std::vector regionInfo(2); regionInfo[0].region = {I915_MEMORY_CLASS_SYSTEM, 0}; regionInfo[1].region = {I915_MEMORY_CLASS_DEVICE, 0}; mock->memoryInfo.reset(new MemoryInfo(regionInfo)); mock->ioctlCallsCount = 0; executionEnvironment->rootDeviceEnvironments[i]->osInterface = std::make_unique(); executionEnvironment->rootDeviceEnvironments[i]->osInterface->setDriverModel(std::unique_ptr(mock)); executionEnvironment->rootDeviceEnvironments[i]->memoryOperationsInterface = DrmMemoryOperationsHandler::create(*mock, 0u); rootDeviceIndices.push_back(i); } auto memoryManager = std::make_unique(true, false, false, *executionEnvironment); size_t size = 4096u; AllocationProperties properties(rootDeviceIndex, true, size, AllocationType::BUFFER_HOST_MEMORY, false, {}); static_cast(executionEnvironment->rootDeviceEnvironments[0]->osInterface->getDriverModel()->as())->outputFd = 7; auto ptr = memoryManager->createMultiGraphicsAllocationInSystemMemoryPool(rootDeviceIndices, properties, multiGraphics); EXPECT_NE(ptr, nullptr); EXPECT_NE(static_cast(multiGraphics.getDefaultGraphicsAllocation())->getMmapPtr(), nullptr); for (uint32_t i = 0; i < rootDevicesNumber; i++) { if (i != 0) { EXPECT_EQ(static_cast(executionEnvironment->rootDeviceEnvironments[i]->osInterface->getDriverModel()->as())->inputFd, 7); } EXPECT_NE(multiGraphics.getGraphicsAllocation(i), nullptr); memoryManager->freeGraphicsMemory(multiGraphics.getGraphicsAllocation(i)); } executionEnvironment->rootDeviceEnvironments[0]->osInterface.reset(osInterface); } TEST_F(DrmMemoryManagerLocalMemoryTest, givenMultiRootDeviceEnvironmentAndMemoryInfoWhenCreateMultiGraphicsAllocationAndImportFailsThenNullptrIsReturned) { uint32_t rootDevicesNumber = 3u; MultiGraphicsAllocation multiGraphics(rootDevicesNumber); std::vector rootDeviceIndices; auto osInterface = executionEnvironment->rootDeviceEnvironments[0]->osInterface.release(); executionEnvironment->prepareRootDeviceEnvironments(rootDevicesNumber); for (uint32_t i = 0; i < rootDevicesNumber; i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(defaultHwInfo.get()); auto mock = new DrmTipMock(*executionEnvironment->rootDeviceEnvironments[i]); std::vector regionInfo(2); regionInfo[0].region = {I915_MEMORY_CLASS_SYSTEM, 0}; regionInfo[1].region = {I915_MEMORY_CLASS_DEVICE, 0}; mock->memoryInfo.reset(new MemoryInfo(regionInfo)); mock->ioctlCallsCount = 0; mock->fdToHandleRetVal = -1; executionEnvironment->rootDeviceEnvironments[i]->osInterface = std::make_unique(); executionEnvironment->rootDeviceEnvironments[i]->osInterface->setDriverModel(std::unique_ptr(mock)); executionEnvironment->rootDeviceEnvironments[i]->memoryOperationsInterface = DrmMemoryOperationsHandler::create(*mock, 0u); rootDeviceIndices.push_back(i); } auto memoryManager = std::make_unique(true, false, false, *executionEnvironment); size_t size = 4096u; AllocationProperties properties(rootDeviceIndex, true, size, AllocationType::BUFFER_HOST_MEMORY, false, {}); auto ptr = memoryManager->createMultiGraphicsAllocationInSystemMemoryPool(rootDeviceIndices, properties, multiGraphics); EXPECT_EQ(ptr, nullptr); executionEnvironment->rootDeviceEnvironments[0]->osInterface.reset(osInterface); } using DrmMemoryManagerUsmSharedHandleTest = DrmMemoryManagerLocalMemoryTest; TEST_F(DrmMemoryManagerUsmSharedHandleTest, givenDrmMemoryManagerAndOsHandleWhenCreateIsCalledWithBufferHostMemoryAllocationTypeThenGraphicsAllocationIsReturned) { osHandle handle = 1u; this->mock->outputHandle = 2u; size_t size = 4096u; AllocationProperties properties(rootDeviceIndex, false, size, AllocationType::BUFFER_HOST_MEMORY, false, {}); auto graphicsAllocation = memoryManager->createGraphicsAllocationFromSharedHandle(handle, properties, false, true); ASSERT_NE(nullptr, graphicsAllocation); EXPECT_EQ(this->mock->inputFd, (int)handle); EXPECT_EQ(this->mock->setTilingHandle, 0u); DrmAllocation *drmAllocation = static_cast(graphicsAllocation); auto bo = drmAllocation->getBO(); EXPECT_EQ(bo->peekHandle(), (int)this->mock->outputHandle); EXPECT_EQ(1u, bo->getRefCount()); EXPECT_EQ(size, bo->peekSize()); memoryManager->freeGraphicsMemory(graphicsAllocation); } TEST_F(DrmMemoryManagerUsmSharedHandleTest, givenMultiRootDeviceEnvironmentAndMemoryInfoWhenCreateMultiGraphicsAllocationAndImportFailsThenNullptrIsReturned) { uint32_t rootDevicesNumber = 1u; uint32_t rootDeviceIndex = 0u; MultiGraphicsAllocation multiGraphics(rootDevicesNumber); std::vector rootDeviceIndices; auto osInterface = executionEnvironment->rootDeviceEnvironments[0]->osInterface.release(); executionEnvironment->prepareRootDeviceEnvironments(rootDevicesNumber); executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->setHwInfo(defaultHwInfo.get()); auto mock = new DrmTipMock(*executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]); std::vector regionInfo(2); regionInfo[0].region = {I915_MEMORY_CLASS_SYSTEM, 0}; regionInfo[1].region = {I915_MEMORY_CLASS_DEVICE, 0}; mock->memoryInfo.reset(new MemoryInfo(regionInfo)); mock->ioctlCallsCount = 0; mock->fdToHandleRetVal = -1; executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->osInterface = std::make_unique(); executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->osInterface->setDriverModel(std::unique_ptr(mock)); executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->memoryOperationsInterface = DrmMemoryOperationsHandler::create(*mock, 0u); rootDeviceIndices.push_back(rootDeviceIndex); auto memoryManager = std::make_unique(true, false, false, *executionEnvironment); size_t size = 4096u; AllocationProperties properties(rootDeviceIndex, true, size, AllocationType::BUFFER_HOST_MEMORY, false, {}); auto ptr = memoryManager->createUSMHostAllocationFromSharedHandle(1, properties, false); EXPECT_EQ(ptr, nullptr); executionEnvironment->rootDeviceEnvironments[0]->osInterface.reset(osInterface); } TEST_F(DrmMemoryManagerLocalMemoryTest, givenMultiRootDeviceEnvironmentAndNoMemoryInfoWhenCreateMultiGraphicsAllocationThenOldPathIsUsed) { uint32_t rootDevicesNumber = 3u; MultiGraphicsAllocation multiGraphics(rootDevicesNumber); std::vector rootDeviceIndices; auto osInterface = executionEnvironment->rootDeviceEnvironments[0]->osInterface.release(); executionEnvironment->prepareRootDeviceEnvironments(rootDevicesNumber); for (uint32_t i = 0; i < rootDevicesNumber; i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(defaultHwInfo.get()); auto mock = new DrmTipMock(*executionEnvironment->rootDeviceEnvironments[i]); mock->memoryInfo.reset(nullptr); mock->ioctlCallsCount = 0; mock->fdToHandleRetVal = -1; executionEnvironment->rootDeviceEnvironments[i]->osInterface = std::make_unique(); executionEnvironment->rootDeviceEnvironments[i]->osInterface->setDriverModel(std::unique_ptr(mock)); executionEnvironment->rootDeviceEnvironments[i]->memoryOperationsInterface = DrmMemoryOperationsHandler::create(*mock, 0u); rootDeviceIndices.push_back(i); } auto memoryManager = std::make_unique(true, false, false, *executionEnvironment); size_t size = 4096u; AllocationProperties properties(rootDeviceIndex, true, size, AllocationType::BUFFER_HOST_MEMORY, false, {}); auto ptr = memoryManager->createMultiGraphicsAllocationInSystemMemoryPool(rootDeviceIndices, properties, multiGraphics); EXPECT_NE(ptr, nullptr); EXPECT_EQ(static_cast(multiGraphics.getDefaultGraphicsAllocation())->getMmapPtr(), nullptr); for (uint32_t i = 0; i < rootDevicesNumber; i++) { EXPECT_NE(multiGraphics.getGraphicsAllocation(i), nullptr); memoryManager->freeGraphicsMemory(multiGraphics.getGraphicsAllocation(i)); } executionEnvironment->rootDeviceEnvironments[0]->osInterface.reset(osInterface); } HWTEST2_F(DrmMemoryManagerLocalMemoryTest, givenMemoryInfoWhenAllocateWithAlignmentThenGemCreateExtIsUsed, NonDefaultIoctlsSupported) { DebugManagerStateRestore restorer; DebugManager.flags.EnableBOMmapCreate.set(-1); std::vector regionInfo(2); regionInfo[0].region = {I915_MEMORY_CLASS_SYSTEM, 0}; regionInfo[1].region = {I915_MEMORY_CLASS_DEVICE, 0}; mock->memoryInfo.reset(new MemoryInfo(regionInfo)); mock->ioctlCallsCount = 0; AllocationData allocationData; allocationData.size = MemoryConstants::pageSize64k; auto allocation = memoryManager->allocateGraphicsMemoryWithAlignment(allocationData); EXPECT_NE(allocation, nullptr); EXPECT_NE(allocation->getMmapPtr(), nullptr); EXPECT_NE(allocation->getMmapSize(), 0u); EXPECT_EQ(allocation->getAllocationOffset(), 0u); EXPECT_EQ(1u, mock->createExt.handle); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerLocalMemoryTest, givenMemoryInfoAndNotUseObjectMmapPropertyWhenAllocateWithAlignmentThenUserptrIsUsed) { DebugManagerStateRestore restorer; DebugManager.flags.EnableBOMmapCreate.set(0); std::vector regionInfo(2); regionInfo[0].region = {I915_MEMORY_CLASS_SYSTEM, 0}; regionInfo[1].region = {I915_MEMORY_CLASS_DEVICE, 0}; mock->memoryInfo.reset(new MemoryInfo(regionInfo)); mock->mmapOffsetRetVal = -1; AllocationData allocationData; allocationData.size = MemoryConstants::pageSize64k; allocationData.useMmapObject = false; auto allocation = memoryManager->allocateGraphicsMemoryWithAlignment(allocationData); EXPECT_NE(allocation, nullptr); EXPECT_EQ(static_cast(mock->returnHandle), allocation->getBO()->peekHandle() + 1); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerLocalMemoryTest, givenMemoryInfoAndFailedMmapOffsetWhenAllocateWithAlignmentThenNullptr) { DebugManagerStateRestore restorer; DebugManager.flags.EnableBOMmapCreate.set(-1); std::vector regionInfo(2); regionInfo[0].region = {I915_MEMORY_CLASS_SYSTEM, 0}; regionInfo[1].region = {I915_MEMORY_CLASS_DEVICE, 0}; mock->memoryInfo.reset(new MemoryInfo(regionInfo)); mock->mmapOffsetRetVal = -1; AllocationData allocationData; allocationData.size = MemoryConstants::pageSize64k; auto allocation = memoryManager->allocateGraphicsMemoryWithAlignment(allocationData); EXPECT_EQ(allocation, nullptr); mock->mmapOffsetRetVal = 0; } TEST_F(DrmMemoryManagerLocalMemoryTest, givenMemoryInfoAndDisabledMmapBOCreationtWhenAllocateWithAlignmentThenUserptrIsUsed) { DebugManagerStateRestore restorer; DebugManager.flags.EnableBOMmapCreate.set(0); std::vector regionInfo(2); regionInfo[0].region = {I915_MEMORY_CLASS_SYSTEM, 0}; regionInfo[1].region = {I915_MEMORY_CLASS_DEVICE, 0}; mock->memoryInfo.reset(new MemoryInfo(regionInfo)); mock->mmapOffsetRetVal = -1; AllocationData allocationData; allocationData.size = MemoryConstants::pageSize64k; auto allocation = memoryManager->allocateGraphicsMemoryWithAlignment(allocationData); EXPECT_NE(allocation, nullptr); EXPECT_EQ(static_cast(mock->returnHandle), allocation->getBO()->peekHandle() + 1); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerLocalMemoryTest, givenMemoryInfoAndFailedGemCreateExtWhenAllocateWithAlignmentThenNullptr) { DebugManagerStateRestore restorer; DebugManager.flags.EnableBOMmapCreate.set(-1); std::vector regionInfo(2); regionInfo[0].region = {I915_MEMORY_CLASS_SYSTEM, 0}; regionInfo[1].region = {I915_MEMORY_CLASS_DEVICE, 0}; mock->memoryInfo.reset(new MemoryInfo(regionInfo)); mock->gemCreateExtRetVal = -1; AllocationData allocationData; allocationData.size = MemoryConstants::pageSize64k; auto allocation = memoryManager->allocateGraphicsMemoryWithAlignment(allocationData); EXPECT_EQ(allocation, nullptr); mock->gemCreateExtRetVal = 0; } class DrmMemoryManagerLocalMemoryMemoryBankMock : public TestedDrmMemoryManager { public: DrmMemoryManagerLocalMemoryMemoryBankMock(bool enableLocalMemory, bool allowForcePin, bool validateHostPtrMemory, ExecutionEnvironment &executionEnvironment) : TestedDrmMemoryManager(enableLocalMemory, allowForcePin, validateHostPtrMemory, executionEnvironment) { } BufferObject *createBufferObjectInMemoryRegion(Drm *drm, uint64_t gpuAddress, size_t size, uint32_t memoryBanks, size_t maxOsContextCount) override { memoryBankIsOne = (memoryBanks == 1) ? true : false; return nullptr; } bool memoryBankIsOne = false; }; class DrmMemoryManagerLocalMemoryMemoryBankTest : public ::testing::Test { public: DrmTipMock *mock; void SetUp() override { const bool localMemoryEnabled = true; executionEnvironment = new ExecutionEnvironment; executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->setHwInfo(defaultHwInfo.get()); mock = new DrmTipMock(*executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]); mock->memoryInfo.reset(new MockMemoryInfo()); executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->osInterface = std::make_unique(); executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->osInterface->setDriverModel(std::unique_ptr(mock)); executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->memoryOperationsInterface = DrmMemoryOperationsHandler::create(*mock, 0u); device.reset(MockDevice::createWithExecutionEnvironment(defaultHwInfo.get(), executionEnvironment, rootDeviceIndex)); memoryManager = std::make_unique(localMemoryEnabled, false, false, *executionEnvironment); } protected: ExecutionEnvironment *executionEnvironment = nullptr; std::unique_ptr device; std::unique_ptr memoryManager; const uint32_t rootDeviceIndex = 0u; }; TEST_F(DrmMemoryManagerLocalMemoryMemoryBankTest, givenDeviceMemoryWhenGraphicsAllocationInDevicePoolIsAllocatedThenMemoryBankIsSetToOne) { MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success; AllocationData allocData; allocData.allFlags = 0; allocData.size = MemoryConstants::pageSize; allocData.flags.useSystemMemory = false; allocData.type = AllocationType::BUFFER; allocData.rootDeviceIndex = rootDeviceIndex; allocData.storageInfo.memoryBanks = 1u; memoryManager->allocateGraphicsMemoryInDevicePool(allocData, status); EXPECT_TRUE(memoryManager->memoryBankIsOne); } HWTEST2_F(DrmMemoryManagerLocalMemoryTest, givenCpuAccessRequiredWhenAllocatingInDevicePoolThenAllocationIsLocked, NonDefaultIoctlsSupported) { MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success; AllocationData allocData; allocData.allFlags = 0; allocData.size = MemoryConstants::pageSize; allocData.flags.requiresCpuAccess = true; allocData.flags.allocateMemory = true; allocData.type = AllocationType::BUFFER; allocData.rootDeviceIndex = rootDeviceIndex; auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocData, status); ASSERT_NE(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::Success, status); EXPECT_EQ(MemoryPool::LocalMemory, allocation->getMemoryPool()); EXPECT_TRUE(allocation->isLocked()); EXPECT_NE(nullptr, allocation->getLockedPtr()); EXPECT_NE(nullptr, allocation->getUnderlyingBuffer()); EXPECT_NE(0u, allocation->getGpuAddress()); memoryManager->freeGraphicsMemory(allocation); } HWTEST2_F(DrmMemoryManagerLocalMemoryTest, givenWriteCombinedAllocationWhenAllocatingInDevicePoolThenAllocationIsLockedAndLockedPtrIsUsedAsGpuAddress, NonDefaultIoctlsSupported) { MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success; AllocationData allocData{}; allocData.size = MemoryConstants::pageSize; allocData.type = AllocationType::WRITE_COMBINED; allocData.rootDeviceIndex = rootDeviceIndex; auto sizeAligned = alignUp(allocData.size + MemoryConstants::pageSize64k, 2 * MemoryConstants::megaByte) + 2 * MemoryConstants::megaByte; auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocData, status); EXPECT_NE(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::Success, status); EXPECT_EQ(MemoryPool::LocalMemory, allocation->getMemoryPool()); EXPECT_TRUE(allocation->isLocked()); EXPECT_NE(nullptr, allocation->getLockedPtr()); EXPECT_EQ(allocation->getLockedPtr(), allocation->getUnderlyingBuffer()); EXPECT_EQ(allocation->getLockedPtr(), reinterpret_cast(allocation->getGpuAddress())); EXPECT_EQ(sizeAligned, allocation->getUnderlyingBufferSize()); EXPECT_EQ(0u, allocation->getReservedAddressSize()); auto cpuAddress = allocation->getLockedPtr(); auto alignedCpuAddress = alignDown(cpuAddress, 2 * MemoryConstants::megaByte); auto offset = ptrDiff(cpuAddress, alignedCpuAddress); EXPECT_EQ(offset, allocation->getAllocationOffset()); auto drmAllocation = static_cast(allocation); auto bo = drmAllocation->getBO(); EXPECT_NE(nullptr, bo); EXPECT_EQ(reinterpret_cast(cpuAddress), bo->peekAddress()); EXPECT_EQ(sizeAligned, bo->peekSize()); memoryManager->freeGraphicsMemory(allocation); } HWTEST2_F(DrmMemoryManagerLocalMemoryTest, givenSupportedTypeWhenAllocatingInDevicePoolThenSuccessStatusAndNonNullPtrIsReturned, NonDefaultIoctlsSupported) { MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success; AllocationData allocData; allocData.allFlags = 0; allocData.size = MemoryConstants::pageSize; allocData.flags.allocateMemory = true; allocData.rootDeviceIndex = rootDeviceIndex; ImageDescriptor imgDesc = {}; imgDesc.imageType = ImageType::Image2D; imgDesc.imageWidth = MemoryConstants::pageSize; imgDesc.imageHeight = MemoryConstants::pageSize; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); bool resource48Bit[] = {true, false}; AllocationType supportedTypes[] = {AllocationType::BUFFER, AllocationType::IMAGE, AllocationType::COMMAND_BUFFER, AllocationType::LINEAR_STREAM, AllocationType::INDIRECT_OBJECT_HEAP, AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, AllocationType::INTERNAL_HEAP, AllocationType::KERNEL_ISA, AllocationType::SVM_GPU}; for (auto res48bit : resource48Bit) { for (auto supportedType : supportedTypes) { allocData.type = supportedType; allocData.imgInfo = (AllocationType::IMAGE == supportedType) ? &imgInfo : nullptr; allocData.hostPtr = (AllocationType::SVM_GPU == supportedType) ? ::alignedMalloc(allocData.size, 4096) : nullptr; switch (supportedType) { case AllocationType::IMAGE: case AllocationType::INDIRECT_OBJECT_HEAP: case AllocationType::INTERNAL_HEAP: case AllocationType::KERNEL_ISA: allocData.flags.resource48Bit = true; break; default: allocData.flags.resource48Bit = res48bit; } auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocData, status); ASSERT_NE(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::Success, status); auto gpuAddress = allocation->getGpuAddress(); if (allocation->getAllocationType() == AllocationType::SVM_GPU) { if (!memoryManager->isLimitedRange(0)) { EXPECT_LT(GmmHelper::canonize(memoryManager->getGfxPartition(0)->getHeapBase(HeapIndex::HEAP_SVM)), gpuAddress); EXPECT_GT(GmmHelper::canonize(memoryManager->getGfxPartition(0)->getHeapLimit(HeapIndex::HEAP_SVM)), gpuAddress); } } else if (memoryManager->heapAssigner.useInternal32BitHeap(allocation->getAllocationType())) { EXPECT_LT(GmmHelper::canonize(memoryManager->getGfxPartition(0)->getHeapBase(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY)), gpuAddress); EXPECT_GT(GmmHelper::canonize(memoryManager->getGfxPartition(0)->getHeapLimit(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY)), gpuAddress); } else { const bool prefer2MBAlignment = allocation->getUnderlyingBufferSize() >= 2 * MemoryConstants::megaByte; auto heap = HeapIndex::HEAP_STANDARD64KB; if (prefer2MBAlignment) { heap = HeapIndex::HEAP_STANDARD2MB; } else if (memoryManager->getGfxPartition(0)->getHeapLimit(HeapIndex::HEAP_EXTENDED) > 0 && !allocData.flags.resource48Bit) { heap = HeapIndex::HEAP_EXTENDED; } EXPECT_LT(GmmHelper::canonize(memoryManager->getGfxPartition(0)->getHeapBase(heap)), gpuAddress); EXPECT_GT(GmmHelper::canonize(memoryManager->getGfxPartition(0)->getHeapLimit(heap)), gpuAddress); } memoryManager->freeGraphicsMemory(allocation); if (AllocationType::SVM_GPU == supportedType) { ::alignedFree(const_cast(allocData.hostPtr)); } } } } TEST_F(DrmMemoryManagerLocalMemoryTest, givenDrmMemoryManagerWithLocalMemoryWhenLockResourceIsCalledOnNullBufferObjectThenReturnNullPtr) { auto ptr = memoryManager->lockResourceInLocalMemoryImpl(nullptr); EXPECT_EQ(nullptr, ptr); memoryManager->unlockResourceInLocalMemoryImpl(nullptr); } TEST_F(DrmMemoryManagerLocalMemoryWithCustomMockTest, givenDrmMemoryManagerWithLocalMemoryWhenLockResourceIsCalledOnBufferObjectThenReturnPtr) { BufferObject bo(mock, 1, 1024, 0); DrmAllocation drmAllocation(0, AllocationType::UNKNOWN, &bo, nullptr, 0u, 0u, MemoryPool::LocalMemory); EXPECT_EQ(&bo, drmAllocation.getBO()); auto ptr = memoryManager->lockResourceInLocalMemoryImpl(&bo); EXPECT_NE(nullptr, ptr); EXPECT_EQ(ptr, bo.peekLockedAddress()); memoryManager->unlockResourceInLocalMemoryImpl(&bo); EXPECT_EQ(nullptr, bo.peekLockedAddress()); } using DrmMemoryManagerFailInjectionTest = Test; HWTEST2_F(DrmMemoryManagerFailInjectionTest, givenEnabledLocalMemoryWhenNewFailsThenAllocateInDevicePoolReturnsStatusErrorAndNullallocation, NonDefaultIoctlsSupported) { mock->ioctl_expected.total = -1; //don't care class MockGfxPartition : public GfxPartition { public: MockGfxPartition() : GfxPartition(reservedCpuAddressRange) { init(defaultHwInfo->capabilityTable.gpuAddressSpace, getSizeToReserve(), 0, 1); } ~MockGfxPartition() override { for (const auto &heap : heaps) { auto mockHeap = static_cast(&heap); if (defaultHwInfo->capabilityTable.gpuAddressSpace != MemoryConstants::max36BitAddress && mockHeap->getSize() > 0) { EXPECT_EQ(0u, mockHeap->alloc->getUsedSize()); } } } struct MockHeap : Heap { using Heap::alloc; }; OSMemory::ReservedCpuAddressRange reservedCpuAddressRange; }; TestedDrmMemoryManager testedMemoryManager(true, false, true, *executionEnvironment); testedMemoryManager.overrideGfxPartition(new MockGfxPartition); InjectedFunction method = [&](size_t failureIndex) { MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success; AllocationData allocData; allocData.allFlags = 0; allocData.size = MemoryConstants::pageSize; allocData.flags.allocateMemory = true; allocData.type = AllocationType::BUFFER; allocData.rootDeviceIndex = rootDeviceIndex; auto allocation = testedMemoryManager.allocateGraphicsMemoryInDevicePool(allocData, status); if (MemoryManagement::nonfailingAllocation != failureIndex) { EXPECT_EQ(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::Error, status); } else { EXPECT_NE(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::Success, status); testedMemoryManager.freeGraphicsMemory(allocation); } }; mock->memoryInfo.reset(new MockMemoryInfo()); injectFailures(method); } using DrmMemoryManagerCopyMemoryToAllocationTest = DrmMemoryManagerLocalMemoryTest; struct DrmMemoryManagerToTestCopyMemoryToAllocation : public DrmMemoryManager { using DrmMemoryManager::allocateGraphicsMemoryInDevicePool; DrmMemoryManagerToTestCopyMemoryToAllocation(ExecutionEnvironment &executionEnvironment, bool localMemoryEnabled, size_t lockableLocalMemorySize) : DrmMemoryManager(gemCloseWorkerMode::gemCloseWorkerInactive, false, false, executionEnvironment) { std::fill(this->localMemorySupported.begin(), this->localMemorySupported.end(), localMemoryEnabled); lockedLocalMemorySize = lockableLocalMemorySize; } void *lockResourceImpl(GraphicsAllocation &graphicsAllocation) override { if (lockedLocalMemorySize > 0) { lockedLocalMemory.reset(new uint8_t[lockedLocalMemorySize]); return lockedLocalMemory.get(); } return nullptr; } void *lockResourceInLocalMemoryImpl(BufferObject *bo) override { if (lockedLocalMemorySize > 0) { lockedLocalMemory.reset(new uint8_t[lockedLocalMemorySize]); return lockedLocalMemory.get(); } return nullptr; } void unlockResourceInLocalMemoryImpl(BufferObject *bo) override { } void unlockResourceImpl(GraphicsAllocation &graphicsAllocation) override { } std::unique_ptr lockedLocalMemory; size_t lockedLocalMemorySize = 0; }; HWTEST2_F(DrmMemoryManagerCopyMemoryToAllocationTest, givenDrmMemoryManagerWhenCopyMemoryToAllocationReturnsSuccessThenAllocationIsFilledWithCorrectData, NonDefaultIoctlsSupported) { size_t offset = 3; size_t sourceAllocationSize = MemoryConstants::pageSize; size_t destinationAllocationSize = sourceAllocationSize + offset; DrmMemoryManagerToTestCopyMemoryToAllocation drmMemoryManger(*executionEnvironment, true, destinationAllocationSize); std::vector dataToCopy(sourceAllocationSize, 1u); AllocationData allocData; allocData.allFlags = 0; allocData.size = destinationAllocationSize; allocData.flags.allocateMemory = true; allocData.type = AllocationType::KERNEL_ISA; allocData.rootDeviceIndex = rootDeviceIndex; allocData.storageInfo.memoryBanks.set(0, true); MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success; auto allocation = drmMemoryManger.allocateGraphicsMemoryInDevicePool(allocData, status); ASSERT_NE(nullptr, allocation); auto ret = drmMemoryManger.copyMemoryToAllocation(allocation, offset, dataToCopy.data(), dataToCopy.size()); EXPECT_TRUE(ret); EXPECT_EQ(0, memcmp(ptrOffset(drmMemoryManger.lockedLocalMemory.get(), offset), dataToCopy.data(), dataToCopy.size())); drmMemoryManger.freeGraphicsMemory(allocation); } HWTEST2_F(DrmMemoryManagerCopyMemoryToAllocationTest, givenDrmMemoryManagerWhenCopyMemoryToAllocationFailsToLockResourceThenItReturnsFalse, NonDefaultIoctlsSupported) { DrmMemoryManagerToTestCopyMemoryToAllocation drmMemoryManger(*executionEnvironment, true, 0); std::vector dataToCopy(MemoryConstants::pageSize, 1u); AllocationData allocData; allocData.allFlags = 0; allocData.size = dataToCopy.size(); allocData.flags.allocateMemory = true; allocData.type = AllocationType::KERNEL_ISA; allocData.rootDeviceIndex = rootDeviceIndex; allocData.storageInfo.memoryBanks.set(0, true); MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success; auto allocation = drmMemoryManger.allocateGraphicsMemoryInDevicePool(allocData, status); ASSERT_NE(nullptr, allocation); auto ret = drmMemoryManger.copyMemoryToAllocation(allocation, 0, dataToCopy.data(), dataToCopy.size()); EXPECT_FALSE(ret); drmMemoryManger.freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerCopyMemoryToAllocationTest, givenDrmMemoryManagerWhenCopyMemoryToAllocationWithCpuPtrThenAllocationIsFilledWithCorrectData) { size_t offset = 3; size_t sourceAllocationSize = MemoryConstants::pageSize; size_t destinationAllocationSize = sourceAllocationSize + offset; DrmMemoryManagerToTestCopyMemoryToAllocation drmMemoryManger(*executionEnvironment, false, 0); std::vector dataToCopy(sourceAllocationSize, 1u); auto allocation = drmMemoryManger.allocateGraphicsMemoryWithProperties({mockRootDeviceIndex, destinationAllocationSize, AllocationType::KERNEL_ISA, mockDeviceBitfield}); ASSERT_NE(nullptr, allocation); auto ret = drmMemoryManger.copyMemoryToAllocation(allocation, offset, dataToCopy.data(), dataToCopy.size()); EXPECT_TRUE(ret); EXPECT_EQ(0, memcmp(ptrOffset(allocation->getUnderlyingBuffer(), offset), dataToCopy.data(), dataToCopy.size())); drmMemoryManger.freeGraphicsMemory(allocation); } using DrmMemoryManagerTestImpl = Test; HWTEST2_F(DrmMemoryManagerTestImpl, givenDrmMemoryManagerWhenLockUnlockIsCalledOnAllocationInLocalMemoryThenCallIoctlGemMapOffsetAndReturnLockedPtr, NonDefaultIoctlsSupported) { mockExp->ioctlImpl_expected.gemCreateExt = 1; mockExp->ioctl_expected.gemWait = 1; mockExp->ioctl_expected.gemClose = 1; mockExp->ioctl_expected.gemMmapOffset = 1; mockExp->memoryInfo.reset(new MockMemoryInfo()); AllocationData allocData; allocData.allFlags = 0; allocData.size = MemoryConstants::pageSize; allocData.flags.allocateMemory = true; allocData.type = AllocationType::INTERNAL_HEAP; allocData.rootDeviceIndex = rootDeviceIndex; MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success; auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocData, status); ASSERT_NE(nullptr, allocation); EXPECT_EQ(nullptr, allocation->getUnderlyingBuffer()); EXPECT_EQ(MemoryPool::LocalMemory, allocation->getMemoryPool()); auto ptr = memoryManager->lockResource(allocation); EXPECT_NE(nullptr, ptr); auto drmAllocation = static_cast(allocation); EXPECT_NE(nullptr, drmAllocation->getBO()->peekLockedAddress()); EXPECT_EQ(static_cast(drmAllocation->getBO()->peekHandle()), mockExp->mmapOffsetHandle); EXPECT_EQ(0u, mockExp->mmapOffsetPad); EXPECT_EQ(0u, mockExp->mmapOffsetExpected); EXPECT_EQ(4u, mockExp->mmapOffsetFlags); memoryManager->unlockResource(allocation); EXPECT_EQ(nullptr, drmAllocation->getBO()->peekLockedAddress()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerTestImpl, givenDrmMemoryManagerWhenLockUnlockIsCalledOnAllocationInLocalMemoryButFailsOnMmapThenReturnNullPtr) { mockExp->ioctl_expected.gemMmapOffset = 2; this->ioctlResExt = {mockExp->ioctl_cnt.total, -1}; mockExp->ioctl_res_ext = &ioctlResExt; BufferObject bo(mockExp, 1, 0, 0); DrmAllocation drmAllocation(0, AllocationType::UNKNOWN, &bo, nullptr, 0u, 0u, MemoryPool::LocalMemory); EXPECT_NE(nullptr, drmAllocation.getBO()); auto ptr = memoryManager->lockResource(&drmAllocation); EXPECT_EQ(nullptr, ptr); memoryManager->unlockResource(&drmAllocation); mockExp->ioctl_res_ext = &mockExp->NONE; } TEST_F(DrmMemoryManagerTestImpl, givenDrmMemoryManagerWhenLockUnlockIsCalledOnAllocationInLocalMemoryButFailsOnIoctlMmapFunctionOffsetThenReturnNullPtr) { mockExp->ioctl_expected.gemMmapOffset = 2; mockExp->returnIoctlExtraErrorValue = true; mockExp->failOnMmapOffset = true; BufferObject bo(mockExp, 1, 0, 0); DrmAllocation drmAllocation(0, AllocationType::UNKNOWN, &bo, nullptr, 0u, 0u, MemoryPool::LocalMemory); EXPECT_NE(nullptr, drmAllocation.getBO()); auto ptr = memoryManager->lockResource(&drmAllocation); EXPECT_EQ(nullptr, ptr); memoryManager->unlockResource(&drmAllocation); mockExp->ioctl_res_ext = &mockExp->NONE; } TEST_F(DrmMemoryManagerTestImpl, givenDrmMemoryManagerWhenLockUnlockIsCalledOnAllocationInLocalMemoryButBufferObjectIsNullThenReturnNullPtr) { DrmAllocation drmAllocation(0, AllocationType::UNKNOWN, nullptr, nullptr, 0u, 0u, MemoryPool::LocalMemory); auto ptr = memoryManager->lockResource(&drmAllocation); EXPECT_EQ(nullptr, ptr); memoryManager->unlockResource(&drmAllocation); } TEST_F(DrmMemoryManagerTestImpl, givenDrmMemoryManagerWhenGetLocalMemorySizeIsCalledForMemoryInfoThenReturnMemoryRegionSize) { MockExecutionEnvironment executionEnvironment; executionEnvironment.rootDeviceEnvironments[0]->osInterface = std::make_unique(); auto drm = new DrmMock(*executionEnvironment.rootDeviceEnvironments[0]); drm->memoryInfo.reset(new MockMemoryInfo()); executionEnvironment.rootDeviceEnvironments[0]->osInterface->setDriverModel(std::unique_ptr(drm)); TestedDrmMemoryManager memoryManager(executionEnvironment); auto memoryInfo = drm->getMemoryInfo(); ASSERT_NE(nullptr, memoryInfo); EXPECT_EQ(memoryInfo->getMemoryRegionSize(MemoryBanks::getBankForLocalMemory(0)), memoryManager.getLocalMemorySize(0u, 0xF)); } TEST_F(DrmMemoryManagerTestImpl, givenDrmMemoryManagerWhenGetLocalMemorySizeIsCalledForMemoryInfoAndInvalidDeviceBitfieldThenReturnZero) { MockExecutionEnvironment executionEnvironment; executionEnvironment.rootDeviceEnvironments[0]->osInterface = std::make_unique(); auto drm = new DrmMock(*executionEnvironment.rootDeviceEnvironments[0]); drm->memoryInfo.reset(new MockMemoryInfo()); executionEnvironment.rootDeviceEnvironments[0]->osInterface->setDriverModel(std::unique_ptr(drm)); TestedDrmMemoryManager memoryManager(executionEnvironment); auto memoryInfo = drm->getMemoryInfo(); ASSERT_NE(nullptr, memoryInfo); EXPECT_EQ(0u, memoryManager.getLocalMemorySize(0u, 0u)); } TEST_F(DrmMemoryManagerTestImpl, givenDrmMemoryManagerWhenGetLocalMemorySizeIsCalledButMemoryInfoIsNotAvailableThenSizeZeroIsReturned) { MockExecutionEnvironment executionEnvironment; executionEnvironment.rootDeviceEnvironments[0]->osInterface = std::make_unique(); auto drm = new DrmMock(*executionEnvironment.rootDeviceEnvironments[0]); executionEnvironment.rootDeviceEnvironments[0]->osInterface->setDriverModel(std::unique_ptr(drm)); TestedDrmMemoryManager memoryManager(executionEnvironment); EXPECT_EQ(0u, memoryManager.getLocalMemorySize(0u, 0xF)); } HWTEST2_F(DrmMemoryManagerLocalMemoryTest, givenGraphicsAllocationInDevicePoolIsAllocatedForImage1DWhenTheSizeReturnedFromGmmIsUnalignedThenCreateBufferObjectWithSizeAlignedTo64KB, NonDefaultIoctlsSupported) { ImageDescriptor imgDesc = {}; imgDesc.imageType = ImageType::Image1D; imgDesc.imageWidth = 100; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success; AllocationData allocData; allocData.allFlags = 0; allocData.size = MemoryConstants::pageSize; allocData.type = AllocationType::IMAGE; allocData.flags.resource48Bit = true; allocData.imgInfo = &imgInfo; allocData.rootDeviceIndex = rootDeviceIndex; auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocData, status); EXPECT_NE(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::Success, status); EXPECT_TRUE(allocData.imgInfo->useLocalMemory); EXPECT_EQ(MemoryPool::LocalMemory, allocation->getMemoryPool()); auto gmm = allocation->getDefaultGmm(); EXPECT_NE(nullptr, gmm); EXPECT_EQ(0u, gmm->resourceParams.Flags.Info.NonLocalOnly); auto gpuAddress = allocation->getGpuAddress(); auto sizeAlignedTo64KB = alignUp(allocData.imgInfo->size, MemoryConstants::pageSize64k); EXPECT_NE(0u, gpuAddress); EXPECT_EQ(sizeAlignedTo64KB, allocation->getUnderlyingBufferSize()); EXPECT_EQ(gpuAddress, reinterpret_cast(allocation->getReservedAddressPtr())); EXPECT_EQ(sizeAlignedTo64KB, allocation->getReservedAddressSize()); auto drmAllocation = static_cast(allocation); auto bo = drmAllocation->getBO(); EXPECT_NE(nullptr, bo); EXPECT_EQ(gpuAddress, bo->peekAddress()); EXPECT_EQ(sizeAlignedTo64KB, bo->peekSize()); memoryManager->freeGraphicsMemory(allocation); } static uint32_t munmapCalledCount = 0u; HWTEST2_F(DrmMemoryManagerLocalMemoryTest, givenAlignmentAndSizeWhenMmapReturnsUnalignedPointerThenCreateAllocWithAlignmentUnmapTwoUnalignedPart, NonDefaultIoctlsSupported) { DebugManagerStateRestore restorer; DebugManager.flags.EnableBOMmapCreate.set(-1); std::vector regionInfo(2); regionInfo[0].region = {I915_MEMORY_CLASS_SYSTEM, 0}; regionInfo[1].region = {I915_MEMORY_CLASS_DEVICE, 0}; mock->memoryInfo.reset(new MemoryInfo(regionInfo)); mock->ioctlCallsCount = 0; AllocationData allocationData; allocationData.size = MemoryConstants::pageSize64k; memoryManager->mmapFunction = [](void *addr, size_t len, int prot, int flags, int fd, off_t offset) throw() { if (addr == 0) { return reinterpret_cast(0x12345678); } else { return addr; } }; memoryManager->munmapFunction = [](void *addr, size_t len) throw() { munmapCalledCount++; return 0; }; munmapCalledCount = 0; auto allocation = memoryManager->createAllocWithAlignment(allocationData, MemoryConstants::pageSize, MemoryConstants::pageSize64k, MemoryConstants::pageSize64k, 0u); EXPECT_EQ(alignUp(reinterpret_cast(0x12345678), MemoryConstants::pageSize64k), allocation->getMmapPtr()); EXPECT_EQ(1u, munmapCalledCount); memoryManager->freeGraphicsMemory(allocation); EXPECT_EQ(3u, munmapCalledCount); munmapCalledCount = 0u; } HWTEST2_F(DrmMemoryManagerLocalMemoryTest, givenAlignmentAndSizeWhenMmapReturnsAlignedThenCreateAllocWithAlignmentUnmapOneUnalignedPart, NonDefaultIoctlsSupported) { DebugManagerStateRestore restorer; DebugManager.flags.EnableBOMmapCreate.set(-1); std::vector regionInfo(2); regionInfo[0].region = {I915_MEMORY_CLASS_SYSTEM, 0}; regionInfo[1].region = {I915_MEMORY_CLASS_DEVICE, 0}; mock->memoryInfo.reset(new MemoryInfo(regionInfo)); mock->ioctlCallsCount = 0; AllocationData allocationData; allocationData.size = MemoryConstants::pageSize64k; memoryManager->mmapFunction = [](void *addr, size_t len, int prot, int flags, int fd, off_t offset) throw() { if (addr == 0) { return reinterpret_cast(0x12345678); } else { return addr; } }; memoryManager->munmapFunction = [](void *addr, size_t len) throw() { munmapCalledCount++; return 0; }; munmapCalledCount = 0u; auto allocation = memoryManager->createAllocWithAlignment(allocationData, MemoryConstants::pageSize, 4u, MemoryConstants::pageSize64k, 0u); EXPECT_EQ(reinterpret_cast(0x12345678), allocation->getMmapPtr()); EXPECT_EQ(1u, munmapCalledCount); memoryManager->freeGraphicsMemory(allocation); EXPECT_EQ(2u, munmapCalledCount); munmapCalledCount = 0u; } TEST_F(DrmMemoryManagerLocalMemoryTest, givenAllocationWithInvalidCacheRegionWhenAllocatingInDevicePoolThenReturnNullptr) { MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success; AllocationData allocData; allocData.allFlags = 0; allocData.size = 18 * MemoryConstants::pageSize64k; allocData.flags.allocateMemory = true; allocData.type = AllocationType::BUFFER; allocData.storageInfo.memoryBanks = maxNBitValue(MemoryBanks::getBankForLocalMemory(3)); allocData.storageInfo.multiStorage = true; allocData.rootDeviceIndex = rootDeviceIndex; allocData.cacheRegion = 0xFFFF; auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocData, status); ASSERT_EQ(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::Error, status); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerLocalMemoryTest, givenAllocationWithUnifiedMemoryAllocationThenReturnNullptr) { MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success; AllocationData allocData; allocData.allFlags = 0; allocData.size = 18 * MemoryConstants::pageSize64k; allocData.flags.allocateMemory = true; allocData.type = AllocationType::UNIFIED_SHARED_MEMORY; allocData.rootDeviceIndex = rootDeviceIndex; auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocData, status); ASSERT_EQ(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::Error, status); memoryManager->freeGraphicsMemory(allocation); } TEST(ResidencyTests, whenBuffersIsCreatedWithMakeResidentFlagThenItSuccessfulyCreates) { VariableBackup backup(&ultHwConfig); ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false; ultHwConfig.forceOsAgnosticMemoryManager = false; DebugManagerStateRestore restorer; DebugManager.flags.MakeAllBuffersResident.set(true); initPlatform(); auto device = platform()->getClDevice(0u); MockContext context(device, false); auto retValue = CL_SUCCESS; auto clBuffer = clCreateBuffer(&context, 0u, 4096u, nullptr, &retValue); ASSERT_EQ(retValue, CL_SUCCESS); clReleaseMemObject(clBuffer); } } // namespace NEO drm_memory_manager_prelim_fixtures.h000066400000000000000000000063631422164147700346760ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/os_interface/linux/drm_memory_operations_handler.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/libult/linux/drm_query_mock.h" #include "shared/test/common/mocks/linux/mock_drm_memory_manager.h" #include "shared/test/common/mocks/mock_device.h" #include "opencl/test/unit_test/os_interface/linux/device_command_stream_fixture_prelim.h" #include "opencl/test/unit_test/os_interface/linux/drm_mock_memory_info.h" #include "gtest/gtest.h" class DrmMemoryManagerLocalMemoryPrelimTest : public ::testing::Test { public: DrmQueryMock *mock; void SetUp() override { executionEnvironment = new ExecutionEnvironment; executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->setHwInfo(defaultHwInfo.get()); mock = new DrmQueryMock(*executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]); mock->memoryInfo.reset(new MockExtendedMemoryInfo()); executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->osInterface = std::make_unique(); executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->osInterface->setDriverModel(std::unique_ptr(mock)); executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->memoryOperationsInterface = DrmMemoryOperationsHandler::create(*mock, 0u); device.reset(MockDevice::createWithExecutionEnvironment(defaultHwInfo.get(), executionEnvironment, rootDeviceIndex)); constexpr bool localMemoryEnabled = true; memoryManager = std::make_unique(localMemoryEnabled, false, false, *executionEnvironment); } protected: DebugManagerStateRestore restorer{}; ExecutionEnvironment *executionEnvironment; std::unique_ptr device; std::unique_ptr memoryManager; const uint32_t rootDeviceIndex = 0u; }; class DrmMemoryManagerLocalMemoryWithCustomPrelimMockTest : public ::testing::Test { public: void SetUp() override { const bool localMemoryEnabled = true; executionEnvironment = new ExecutionEnvironment; executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); mock = new DrmMockCustomPrelim(*executionEnvironment->rootDeviceEnvironments[0]); executionEnvironment->rootDeviceEnvironments[0]->osInterface = std::make_unique(); executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel(std::unique_ptr(mock)); device.reset(MockDevice::createWithExecutionEnvironment(defaultHwInfo.get(), executionEnvironment, 0)); memoryManager = std::make_unique(localMemoryEnabled, false, false, *executionEnvironment); } protected: std::unique_ptr device; std::unique_ptr memoryManager; DrmMockCustomPrelim *mock; ExecutionEnvironment *executionEnvironment; }; compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp000066400000000000000000010422301422164147700332230ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/os_interface/linux/drm_memory_manager_tests.h" #include "shared/source/built_ins/sip.h" #include "shared/source/command_stream/device_command_stream.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/constants.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/memory_manager/host_ptr_manager.h" #include "shared/source/memory_manager/residency.h" #include "shared/source/os_interface/linux/allocator_helper.h" #include "shared/source/os_interface/linux/drm_allocation.h" #include "shared/source/os_interface/linux/drm_buffer_object.h" #include "shared/source/os_interface/linux/drm_command_stream.h" #include "shared/source/os_interface/linux/drm_memory_manager.h" #include "shared/source/os_interface/linux/os_context_linux.h" #include "shared/source/os_interface/os_context.h" #include "shared/source/utilities/tag_allocator.h" #include "shared/test/common/fixtures/memory_allocator_multi_device_fixture.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/engine_descriptor_helper.h" #include "shared/test/common/helpers/ult_hw_config.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/libult/linux/drm_mock.h" #include "shared/test/common/mocks/linux/mock_drm_allocation.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/mocks/mock_gfx_partition.h" #include "shared/test/common/mocks/mock_gmm.h" #include "shared/test/common/os_interface/linux/drm_mock_cache_info.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "opencl/source/event/event.h" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "drm/i915_drm.h" #include "gtest/gtest.h" #include #include namespace NEO { using MemoryManagerMultiDeviceSharedHandleTest = MemoryAllocatorMultiDeviceFixture<2>; TEST_P(MemoryManagerMultiDeviceSharedHandleTest, whenCreatingAllocationFromSharedHandleWithSameHandleAndSameRootDeviceThenSameBOIsUsed) { uint32_t handle0 = 0; uint32_t rootDeviceIndex0 = 0; AllocationProperties properties0{rootDeviceIndex0, true, MemoryConstants::pageSize, AllocationType::BUFFER, false, false, mockDeviceBitfield}; auto gfxAllocation0 = memoryManager->createGraphicsAllocationFromSharedHandle(handle0, properties0, false, false); ASSERT_NE(gfxAllocation0, nullptr); EXPECT_EQ(rootDeviceIndex0, gfxAllocation0->getRootDeviceIndex()); uint32_t handle1 = 0; uint32_t rootDeviceIndex1 = 0; AllocationProperties properties1{rootDeviceIndex1, true, MemoryConstants::pageSize, AllocationType::BUFFER, false, false, mockDeviceBitfield}; auto gfxAllocation1 = memoryManager->createGraphicsAllocationFromSharedHandle(handle1, properties1, false, false); ASSERT_NE(gfxAllocation1, nullptr); EXPECT_EQ(rootDeviceIndex1, gfxAllocation1->getRootDeviceIndex()); DrmAllocation *drmAllocation0 = static_cast(gfxAllocation0); DrmAllocation *drmAllocation1 = static_cast(gfxAllocation1); EXPECT_EQ(drmAllocation0->getBO(), drmAllocation1->getBO()); memoryManager->freeGraphicsMemory(gfxAllocation0); memoryManager->freeGraphicsMemory(gfxAllocation1); } TEST_P(MemoryManagerMultiDeviceSharedHandleTest, whenCreatingAllocationFromSharedHandleWithSameHandleAndDifferentRootDeviceThenDifferentBOIsUsed) { uint32_t handle0 = 0; uint32_t rootDeviceIndex0 = 0; AllocationProperties properties0{rootDeviceIndex0, true, MemoryConstants::pageSize, AllocationType::BUFFER, false, false, mockDeviceBitfield}; auto gfxAllocation0 = memoryManager->createGraphicsAllocationFromSharedHandle(handle0, properties0, false, false); ASSERT_NE(gfxAllocation0, nullptr); EXPECT_EQ(rootDeviceIndex0, gfxAllocation0->getRootDeviceIndex()); uint32_t handle1 = 0; uint32_t rootDeviceIndex1 = 1; AllocationProperties properties1{rootDeviceIndex1, true, MemoryConstants::pageSize, AllocationType::BUFFER, false, false, mockDeviceBitfield}; auto gfxAllocation1 = memoryManager->createGraphicsAllocationFromSharedHandle(handle1, properties1, false, false); ASSERT_NE(gfxAllocation1, nullptr); EXPECT_EQ(rootDeviceIndex1, gfxAllocation1->getRootDeviceIndex()); DrmAllocation *drmAllocation0 = static_cast(gfxAllocation0); DrmAllocation *drmAllocation1 = static_cast(gfxAllocation1); EXPECT_NE(drmAllocation0->getBO(), drmAllocation1->getBO()); memoryManager->freeGraphicsMemory(gfxAllocation0); memoryManager->freeGraphicsMemory(gfxAllocation1); } TEST_P(MemoryManagerMultiDeviceSharedHandleTest, whenCreatingAllocationFromSharedHandleWithDifferentHandleAndSameRootDeviceThenDifferentBOIsUsed) { uint32_t handle0 = 0; uint32_t rootDeviceIndex0 = 0; AllocationProperties properties0{rootDeviceIndex0, true, MemoryConstants::pageSize, AllocationType::BUFFER, false, false, mockDeviceBitfield}; auto gfxAllocation0 = memoryManager->createGraphicsAllocationFromSharedHandle(handle0, properties0, false, false); ASSERT_NE(gfxAllocation0, nullptr); EXPECT_EQ(rootDeviceIndex0, gfxAllocation0->getRootDeviceIndex()); uint32_t handle1 = 1; uint32_t rootDeviceIndex1 = 0; AllocationProperties properties1{rootDeviceIndex1, true, MemoryConstants::pageSize, AllocationType::BUFFER, false, false, mockDeviceBitfield}; auto gfxAllocation1 = memoryManager->createGraphicsAllocationFromSharedHandle(handle1, properties1, false, false); ASSERT_NE(gfxAllocation1, nullptr); EXPECT_EQ(rootDeviceIndex1, gfxAllocation1->getRootDeviceIndex()); DrmAllocation *drmAllocation0 = static_cast(gfxAllocation0); DrmAllocation *drmAllocation1 = static_cast(gfxAllocation1); EXPECT_NE(drmAllocation0->getBO(), drmAllocation1->getBO()); memoryManager->freeGraphicsMemory(gfxAllocation0); memoryManager->freeGraphicsMemory(gfxAllocation1); } TEST_P(MemoryManagerMultiDeviceSharedHandleTest, whenCreatingAllocationFromSharedHandleWithDifferentHandleAndDifferentRootDeviceThenDifferentBOIsUsed) { uint32_t handle0 = 0; uint32_t rootDeviceIndex0 = 0; AllocationProperties properties0{rootDeviceIndex0, true, MemoryConstants::pageSize, AllocationType::BUFFER, false, false, mockDeviceBitfield}; auto gfxAllocation0 = memoryManager->createGraphicsAllocationFromSharedHandle(handle0, properties0, false, false); ASSERT_NE(gfxAllocation0, nullptr); EXPECT_EQ(rootDeviceIndex0, gfxAllocation0->getRootDeviceIndex()); uint32_t handle1 = 1; uint32_t rootDeviceIndex1 = 1; AllocationProperties properties1{rootDeviceIndex1, true, MemoryConstants::pageSize, AllocationType::BUFFER, false, false, mockDeviceBitfield}; auto gfxAllocation1 = memoryManager->createGraphicsAllocationFromSharedHandle(handle1, properties1, false, false); ASSERT_NE(gfxAllocation1, nullptr); EXPECT_EQ(rootDeviceIndex1, gfxAllocation1->getRootDeviceIndex()); DrmAllocation *drmAllocation0 = static_cast(gfxAllocation0); DrmAllocation *drmAllocation1 = static_cast(gfxAllocation1); EXPECT_NE(drmAllocation0->getBO(), drmAllocation1->getBO()); memoryManager->freeGraphicsMemory(gfxAllocation0); memoryManager->freeGraphicsMemory(gfxAllocation1); } AllocationProperties createAllocationProperties(uint32_t rootDeviceIndex, size_t size, bool forcePin) { MockAllocationProperties properties(rootDeviceIndex, size); properties.alignment = MemoryConstants::preferredAlignment; properties.flags.forcePin = forcePin; return properties; } typedef Test DrmMemoryManagerTest; typedef Test DrmMemoryManagerWithLocalMemoryTest; typedef Test DrmMemoryManagerWithExplicitExpectationsTest; typedef Test DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest; TEST_F(DrmMemoryManagerTest, givenEnableDirectSubmissionWhenCreateDrmMemoryManagerThenGemCloseWorkerInactive) { DebugManagerStateRestore dbgState; DebugManager.flags.EnableDirectSubmission.set(1); TestedDrmMemoryManager memoryManager(false, false, false, *executionEnvironment); EXPECT_EQ(memoryManager.peekGemCloseWorker(), nullptr); } TEST_F(DrmMemoryManagerTest, givenDebugVariableWhenCreatingDrmMemoryManagerThenSetSupportForMultiStorageResources) { DebugManagerStateRestore dbgState; EXPECT_TRUE(memoryManager->supportsMultiStorageResources); { DebugManager.flags.EnableMultiStorageResources.set(0); TestedDrmMemoryManager memoryManager(false, false, false, *executionEnvironment); EXPECT_FALSE(memoryManager.supportsMultiStorageResources); } { DebugManager.flags.EnableMultiStorageResources.set(1); TestedDrmMemoryManager memoryManager(false, false, false, *executionEnvironment); EXPECT_TRUE(memoryManager.supportsMultiStorageResources); } } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenCheckForKmdMigrationThenCorrectValueIsReturned) { DebugManagerStateRestore restorer; { DebugManager.flags.UseKmdMigration.set(1); auto retVal = memoryManager->isKmdMigrationAvailable(rootDeviceIndex); EXPECT_TRUE(retVal); } { DebugManager.flags.UseKmdMigration.set(0); auto retVal = memoryManager->isKmdMigrationAvailable(rootDeviceIndex); EXPECT_FALSE(retVal); } this->dontTestIoctlInTearDown = true; } TEST_F(DrmMemoryManagerTest, GivenGraphicsAllocationWhenAddAndRemoveAllocationToHostPtrManagerThenfragmentHasCorrectValues) { void *cpuPtr = (void *)0x30000; size_t size = 0x1000; const uint32_t rootDeviceIndex = 0u; DrmAllocation gfxAllocation(rootDeviceIndex, AllocationType::UNKNOWN, nullptr, cpuPtr, size, static_cast(1u), MemoryPool::MemoryNull); memoryManager->addAllocationToHostPtrManager(&gfxAllocation); auto fragment = memoryManager->getHostPtrManager()->getFragment({gfxAllocation.getUnderlyingBuffer(), rootDeviceIndex}); EXPECT_NE(fragment, nullptr); EXPECT_TRUE(fragment->driverAllocation); EXPECT_EQ(fragment->refCount, 1); EXPECT_EQ(fragment->refCount, 1); EXPECT_EQ(fragment->fragmentCpuPointer, cpuPtr); EXPECT_EQ(fragment->fragmentSize, size); EXPECT_NE(fragment->osInternalStorage, nullptr); EXPECT_EQ(static_cast(fragment->osInternalStorage)->bo, gfxAllocation.getBO()); EXPECT_NE(fragment->residency, nullptr); FragmentStorage fragmentStorage = {}; fragmentStorage.fragmentCpuPointer = cpuPtr; memoryManager->getHostPtrManager()->storeFragment(rootDeviceIndex, fragmentStorage); fragment = memoryManager->getHostPtrManager()->getFragment({gfxAllocation.getUnderlyingBuffer(), rootDeviceIndex}); EXPECT_EQ(fragment->refCount, 2); fragment->driverAllocation = false; memoryManager->removeAllocationFromHostPtrManager(&gfxAllocation); fragment = memoryManager->getHostPtrManager()->getFragment({gfxAllocation.getUnderlyingBuffer(), rootDeviceIndex}); EXPECT_EQ(fragment->refCount, 2); fragment->driverAllocation = true; memoryManager->removeAllocationFromHostPtrManager(&gfxAllocation); fragment = memoryManager->getHostPtrManager()->getFragment({gfxAllocation.getUnderlyingBuffer(), rootDeviceIndex}); EXPECT_EQ(fragment->refCount, 1); memoryManager->removeAllocationFromHostPtrManager(&gfxAllocation); fragment = memoryManager->getHostPtrManager()->getFragment({gfxAllocation.getUnderlyingBuffer(), rootDeviceIndex}); EXPECT_EQ(fragment, nullptr); } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenDrmMemoryManagerWhenGpuAddressIsReservedAndFreedThenAddressFromGfxPartitionIsUsed) { auto memoryManager = std::make_unique(false, true, false, *executionEnvironment); auto addressRange = memoryManager->reserveGpuAddress(MemoryConstants::pageSize, 0); EXPECT_LE(memoryManager->getGfxPartition(0)->getHeapBase(HeapIndex::HEAP_STANDARD), GmmHelper::decanonize(addressRange.address)); EXPECT_GT(memoryManager->getGfxPartition(0)->getHeapLimit(HeapIndex::HEAP_STANDARD), GmmHelper::decanonize(addressRange.address)); memoryManager->freeGpuAddress(addressRange, 0); } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenSmallSizeAndGpuAddressSetWhenGraphicsMemoryIsAllocatedThenAllocationWithSpecifiedGpuAddressInSystemMemoryIsCreated) { auto memoryManager = std::make_unique(false, true, false, *executionEnvironment); auto osContext = device->getDefaultEngine().osContext; MockAllocationProperties properties = {rootDeviceIndex, MemoryConstants::pageSize}; properties.gpuAddress = 0x2000; properties.osContext = osContext; mock->reset(); mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.execbuffer2 = 0; // pinBB not called auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties); EXPECT_EQ(MemoryPool::System4KBPages, allocation->getMemoryPool()); EXPECT_EQ(0x2000u, allocation->getGpuAddress()); mock->testIoctls(); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerTest, givenInjectedFailuresWhenGraphicsMemoryWithGpuVaIsAllocatedThenNullptrIsReturned) { mock->ioctl_expected.total = -1; // don't care auto memoryManager = std::make_unique(false, true, false, *executionEnvironment); auto osContext = device->getDefaultEngine().osContext; MockAllocationProperties properties = {rootDeviceIndex, MemoryConstants::pageSize}; properties.gpuAddress = 0x2000; properties.osContext = osContext; InjectedFunction method = [&](size_t failureIndex) { auto ptr = memoryManager->allocateGraphicsMemoryWithProperties(properties); if (MemoryManagement::nonfailingAllocation != failureIndex) { EXPECT_EQ(nullptr, ptr); } else { EXPECT_NE(nullptr, ptr); memoryManager->freeGraphicsMemory(ptr); } }; injectFailures(method); } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenSizeExceedingThresholdAndGpuAddressSetWhenGraphicsMemoryIsAllocatedThenAllocationWithSpecifiedGpuAddressInSystemMemoryIsCreated) { auto memoryManager = std::make_unique(false, true, false, *executionEnvironment); auto osContext = device->getDefaultEngine().osContext; MockAllocationProperties properties = {rootDeviceIndex, memoryManager->pinThreshold + MemoryConstants::pageSize}; properties.gpuAddress = 0x2000; properties.osContext = osContext; mock->reset(); mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.execbuffer2 = 1; // pinBB called auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties); EXPECT_EQ(MemoryPool::System4KBPages, allocation->getMemoryPool()); EXPECT_EQ(0x2000u, allocation->getGpuAddress()); mock->testIoctls(); memoryManager->freeGraphicsMemory(allocation); memoryManager->injectPinBB(nullptr, rootDeviceIndex); // pinBB not available mock->reset(); mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.execbuffer2 = 0; // pinBB not called properties.gpuAddress = 0x5000; allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties); EXPECT_EQ(MemoryPool::System4KBPages, allocation->getMemoryPool()); EXPECT_EQ(0x5000u, allocation->getGpuAddress()); mock->testIoctls(); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenDisabledForcePinAndSizeExceedingThresholdAndGpuAddressSetWhenGraphicsMemoryIsAllocatedThenBufferIsNotPinned) { auto memoryManager = std::make_unique(false, false, false, *executionEnvironment); auto osContext = device->getDefaultEngine().osContext; MockAllocationProperties properties = {rootDeviceIndex, memoryManager->pinThreshold + MemoryConstants::pageSize}; properties.gpuAddress = 0x2000; properties.osContext = osContext; mock->reset(); mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.execbuffer2 = 0; // pinBB not called auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties); EXPECT_EQ(MemoryPool::System4KBPages, allocation->getMemoryPool()); EXPECT_EQ(0x2000u, allocation->getGpuAddress()); mock->testIoctls(); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenforcePinAllowedWhenMemoryManagerIsCreatedThenPinBbIsCreated) { auto memoryManager = std::make_unique(false, true, false, *executionEnvironment); EXPECT_NE(nullptr, memoryManager->pinBBs[device->getRootDeviceIndex()]); } TEST_F(DrmMemoryManagerTest, givenDefaultDrmMemoryManagerWhenItIsCreatedThenItIsInitialized) { EXPECT_TRUE(memoryManager->isInitialized()); } TEST_F(DrmMemoryManagerTest, givenDefaultDrmMemoryManagerWhenItIsCreatedAndGfxPartitionInitIsFailedThenItIsNotInitialized) { EXPECT_TRUE(memoryManager->isInitialized()); auto failedInitGfxPartition = std::make_unique(); memoryManager->gfxPartitions[0].reset(failedInitGfxPartition.release()); memoryManager->initialize(gemCloseWorkerMode::gemCloseWorkerInactive); EXPECT_FALSE(memoryManager->isInitialized()); auto mockGfxPartitionBasic = std::make_unique(); memoryManager->overrideGfxPartition(mockGfxPartitionBasic.release()); } TEST_F(DrmMemoryManagerTest, WhenMemoryManagerIsCreatedThenPinBbIsCreated) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemClose = 1; auto memoryManager = std::make_unique(false, true, false, *executionEnvironment); EXPECT_NE(nullptr, memoryManager->pinBBs[rootDeviceIndex]); } TEST_F(DrmMemoryManagerTest, GivenMemoryManagerIsCreatedWhenInvokingReleaseMemResourcesBasedOnGpuDeviceThenPinBbIsRemoved) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemClose = 1; auto drmMemoryManager = std::make_unique(false, true, false, *executionEnvironment); EXPECT_NE(nullptr, drmMemoryManager->pinBBs[rootDeviceIndex]); auto length = drmMemoryManager->pinBBs.size(); drmMemoryManager->releaseDeviceSpecificMemResources(rootDeviceIndex); EXPECT_EQ(length, drmMemoryManager->pinBBs.size()); EXPECT_EQ(nullptr, drmMemoryManager->pinBBs[rootDeviceIndex]); } TEST_F(DrmMemoryManagerTest, GivenMemoryManagerIsCreatedWhenInvokingCreatMemResourcesBasedOnGpuDeviceThenPinBbIsCreated) { mock->ioctl_expected.gemUserptr = 2; mock->ioctl_expected.gemClose = 2; auto drmMemoryManager = std::make_unique(false, true, false, *executionEnvironment); auto rootDeviceBufferObjectOld = drmMemoryManager->pinBBs[rootDeviceIndex]; EXPECT_NE(nullptr, rootDeviceBufferObjectOld); auto length = drmMemoryManager->pinBBs.size(); auto memoryManagerTest = static_cast(drmMemoryManager.get()); drmMemoryManager->releaseDeviceSpecificMemResources(rootDeviceIndex); EXPECT_EQ(length, drmMemoryManager->pinBBs.size()); EXPECT_EQ(nullptr, drmMemoryManager->pinBBs[rootDeviceIndex]); memoryManagerTest->createDeviceSpecificMemResources(rootDeviceIndex); EXPECT_EQ(length, drmMemoryManager->pinBBs.size()); EXPECT_NE(nullptr, drmMemoryManager->pinBBs[rootDeviceIndex]); } TEST_F(DrmMemoryManagerTest, givenNotAllowedForcePinWhenMemoryManagerIsCreatedThenPinBBIsNotCreated) { std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, false, false, *executionEnvironment)); EXPECT_EQ(nullptr, memoryManager->pinBBs[rootDeviceIndex]); } TEST_F(DrmMemoryManagerTest, WhenIoctlFailsThenPinBbIsNotCreated) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_res = -1; auto memoryManager = new (std::nothrow) TestedDrmMemoryManager(false, true, false, *executionEnvironment); EXPECT_EQ(nullptr, memoryManager->pinBBs[rootDeviceIndex]); mock->ioctl_res = 0; delete memoryManager; } TEST_F(DrmMemoryManagerTest, WhenAskedAndAllowedAndBigAllocationThenPinAfterAllocate) { mock->ioctl_expected.gemUserptr = 2; mock->ioctl_expected.execbuffer2 = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 2; auto memoryManager = std::make_unique(false, true, false, *executionEnvironment); memoryManager->registeredEngines = EngineControlContainer{this->device->allEngines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } ASSERT_NE(nullptr, memoryManager->pinBBs[rootDeviceIndex]); auto alloc = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(createAllocationProperties(rootDeviceIndex, 10 * MemoryConstants::megaByte, true))); ASSERT_NE(nullptr, alloc); EXPECT_NE(nullptr, alloc->getBO()); memoryManager->freeGraphicsMemory(alloc); } TEST_F(DrmMemoryManagerTest, whenPeekInternalHandleIsCalledThenBoIsReturend) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; mock->ioctl_expected.handleToPrimeFd = 1; mock->outputFd = 1337; auto allocation = static_cast(this->memoryManager->allocateGraphicsMemoryWithProperties(createAllocationProperties(rootDeviceIndex, 10 * MemoryConstants::pageSize, true))); ASSERT_NE(allocation->getBO(), nullptr); ASSERT_EQ(allocation->peekInternalHandle(this->memoryManager), static_cast(1337)); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerTest, givenDrmContextIdWhenAllocationIsCreatedThenPinWithPassedDrmContextId) { mock->ioctl_expected.gemUserptr = 2; mock->ioctl_expected.execbuffer2 = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 2; auto memoryManager = std::make_unique(false, true, false, *executionEnvironment); memoryManager->registeredEngines = EngineControlContainer{this->device->allEngines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } auto drmContextId = memoryManager->getDefaultDrmContextId(rootDeviceIndex); ASSERT_NE(nullptr, memoryManager->pinBBs[rootDeviceIndex]); EXPECT_NE(0u, drmContextId); auto alloc = memoryManager->allocateGraphicsMemoryWithProperties(createAllocationProperties(rootDeviceIndex, memoryManager->pinThreshold, true)); EXPECT_EQ(drmContextId, mock->execBuffer.rsvd1); memoryManager->freeGraphicsMemory(alloc); } TEST_F(DrmMemoryManagerTest, WhenAskedAndAllowedButSmallAllocationThenDoNotPinAfterAllocate) { mock->ioctl_expected.gemUserptr = 2; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 2; auto memoryManager = std::make_unique(false, true, false, *executionEnvironment); ASSERT_NE(nullptr, memoryManager->pinBBs[rootDeviceIndex]); // one page is too small for early pinning auto alloc = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(createAllocationProperties(rootDeviceIndex, MemoryConstants::pageSize, true))); ASSERT_NE(nullptr, alloc); EXPECT_NE(nullptr, alloc->getBO()); memoryManager->freeGraphicsMemory(alloc); } TEST_F(DrmMemoryManagerTest, WhenNotAskedButAllowedThenDoNotPinAfterAllocate) { mock->ioctl_expected.gemUserptr = 2; mock->ioctl_expected.gemClose = 2; mock->ioctl_expected.gemWait = 1; auto memoryManager = std::make_unique(false, true, false, *executionEnvironment); memoryManager->registeredEngines = EngineControlContainer{this->device->allEngines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } ASSERT_NE(nullptr, memoryManager->pinBBs[rootDeviceIndex]); auto alloc = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(createAllocationProperties(rootDeviceIndex, MemoryConstants::pageSize, false))); ASSERT_NE(nullptr, alloc); EXPECT_NE(nullptr, alloc->getBO()); memoryManager->freeGraphicsMemory(alloc); } TEST_F(DrmMemoryManagerTest, WhenAskedButNotAllowedThenDoNotPinAfterAllocate) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; auto memoryManager = std::make_unique(false, false, false, *executionEnvironment); memoryManager->registeredEngines = EngineControlContainer{this->device->allEngines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } auto alloc = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(createAllocationProperties(rootDeviceIndex, MemoryConstants::pageSize, true))); ASSERT_NE(nullptr, alloc); EXPECT_NE(nullptr, alloc->getBO()); memoryManager->freeGraphicsMemory(alloc); } // ---- HostPtr TEST_F(DrmMemoryManagerTest, WhenAskedAndAllowedAndBigAllocationHostPtrThenPinAfterAllocate) { mock->ioctl_expected.gemUserptr = 2; mock->ioctl_expected.gemClose = 2; mock->ioctl_expected.execbuffer2 = 1; mock->ioctl_expected.gemWait = 1; auto memoryManager = std::make_unique(false, true, false, *executionEnvironment); memoryManager->registeredEngines = EngineControlContainer{this->device->allEngines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } ASSERT_NE(nullptr, memoryManager->pinBBs[rootDeviceIndex]); allocationData.size = 10 * MB; allocationData.hostPtr = ::alignedMalloc(allocationData.size, 4096); allocationData.flags.forcePin = true; auto alloc = memoryManager->allocateGraphicsMemoryWithHostPtr(allocationData); ASSERT_NE(nullptr, alloc); EXPECT_NE(nullptr, alloc->getBO()); memoryManager->freeGraphicsMemory(alloc); ::alignedFree(const_cast(allocationData.hostPtr)); } TEST_F(DrmMemoryManagerTest, givenSmallAllocationHostPtrAllocationWhenForcePinIsTrueThenBufferObjectIsNotPinned) { mock->ioctl_expected.gemUserptr = 2; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 2; auto memoryManager = std::make_unique(false, true, false, *executionEnvironment); memoryManager->registeredEngines = EngineControlContainer{this->device->allEngines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } ASSERT_NE(nullptr, memoryManager->pinBBs[rootDeviceIndex]); // one page is too small for early pinning allocationData.size = 4 * 1024; allocationData.hostPtr = ::alignedMalloc(allocationData.size, 4096); allocationData.flags.forcePin = true; auto alloc = memoryManager->allocateGraphicsMemoryWithHostPtr(allocationData); ASSERT_NE(nullptr, alloc); EXPECT_NE(nullptr, alloc->getBO()); memoryManager->freeGraphicsMemory(alloc); ::alignedFree(const_cast(allocationData.hostPtr)); } TEST_F(DrmMemoryManagerTest, WhenNotAskedButAllowedHostPtrThendoNotPinAfterAllocate) { mock->ioctl_expected.gemUserptr = 2; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 2; auto memoryManager = std::make_unique(false, true, false, *executionEnvironment); ASSERT_NE(nullptr, memoryManager->pinBBs[rootDeviceIndex]); allocationData.size = 4 * 1024; allocationData.hostPtr = ::alignedMalloc(allocationData.size, 4096); auto alloc = memoryManager->allocateGraphicsMemoryWithHostPtr(allocationData); ASSERT_NE(nullptr, alloc); EXPECT_NE(nullptr, alloc->getBO()); memoryManager->freeGraphicsMemory(alloc); ::alignedFree(const_cast(allocationData.hostPtr)); } TEST_F(DrmMemoryManagerTest, WhenAskedButNotAllowedHostPtrThenDoNotPinAfterAllocate) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; auto memoryManager = std::make_unique(false, false, false, *executionEnvironment); allocationData.size = 4 * 1024; allocationData.hostPtr = ::alignedMalloc(allocationData.size, 4096); allocationData.flags.forcePin = true; auto alloc = memoryManager->allocateGraphicsMemoryWithHostPtr(allocationData); ASSERT_NE(nullptr, alloc); EXPECT_NE(nullptr, alloc->getBO()); memoryManager->freeGraphicsMemory(alloc); ::alignedFree(const_cast(allocationData.hostPtr)); } TEST_F(DrmMemoryManagerTest, WhenUnreferenceIsCalledThenCallSucceeds) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemClose = 1; BufferObject *bo = memoryManager->allocUserptr(0, (size_t)1024, 0ul, rootDeviceIndex); ASSERT_NE(nullptr, bo); memoryManager->unreference(bo, false); } TEST_F(DrmMemoryManagerTest, whenPrintBOCreateDestroyResultIsSetAndAllocUserptrIsCalledThenBufferObjectIsCreatedAndDebugInformationIsPrinted) { DebugManagerStateRestore stateRestore; DebugManager.flags.PrintBOCreateDestroyResult.set(true); mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemClose = 1; testing::internal::CaptureStdout(); BufferObject *bo = memoryManager->allocUserptr(0, (size_t)1024, 0ul, rootDeviceIndex); ASSERT_NE(nullptr, bo); DebugManager.flags.PrintBOCreateDestroyResult.set(false); std::string output = testing::internal::GetCapturedStdout(); size_t idx = output.find("Created new BO with GEM_USERPTR, handle: BO-"); size_t expectedValue = 0; EXPECT_EQ(expectedValue, idx); memoryManager->unreference(bo, false); } TEST_F(DrmMemoryManagerTest, GivenNullptrWhenUnreferenceIsCalledThenCallSucceeds) { memoryManager->unreference(nullptr, false); } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenDrmMemoryManagerCreatedWithGemCloseWorkerModeInactiveThenGemCloseWorkerIsNotCreated) { DrmMemoryManager drmMemoryManger(gemCloseWorkerMode::gemCloseWorkerInactive, false, false, *executionEnvironment); EXPECT_EQ(nullptr, drmMemoryManger.peekGemCloseWorker()); } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenDrmMemoryManagerCreatedWithGemCloseWorkerActiveThenGemCloseWorkerIsCreated) { DrmMemoryManager drmMemoryManger(gemCloseWorkerMode::gemCloseWorkerActive, false, false, *executionEnvironment); EXPECT_NE(nullptr, drmMemoryManger.peekGemCloseWorker()); } TEST_F(DrmMemoryManagerTest, GivenAllocationWhenClosingSharedHandleThenSucceeds) { mock->ioctl_expected.primeFdToHandle = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; osHandle handle = 1u; this->mock->outputHandle = 2u; size_t size = 4096u; AllocationProperties properties(rootDeviceIndex, false, size, AllocationType::SHARED_BUFFER, false, {}); auto graphicsAllocation = memoryManager->createGraphicsAllocationFromSharedHandle(handle, properties, false, false); EXPECT_EQ(handle, graphicsAllocation->peekSharedHandle()); memoryManager->closeSharedHandle(graphicsAllocation); EXPECT_EQ(Sharing::nonSharedResource, graphicsAllocation->peekSharedHandle()); memoryManager->freeGraphicsMemory(graphicsAllocation); } TEST_F(DrmMemoryManagerTest, GivenAllocationWhenFreeingThenSucceeds) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; auto alloc = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, MemoryConstants::pageSize})); ASSERT_NE(nullptr, alloc); EXPECT_NE(nullptr, alloc->getBO()); EXPECT_EQ(Sharing::nonSharedResource, alloc->peekSharedHandle()); memoryManager->freeGraphicsMemory(alloc); } TEST_F(DrmMemoryManagerTest, GivenInjectedFailureWhenAllocatingThenAllocationFails) { mock->ioctl_expected.total = -1; // don't care InjectedFunction method = [this](size_t failureIndex) { auto ptr = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, MemoryConstants::pageSize}); if (MemoryManagement::nonfailingAllocation != failureIndex) { EXPECT_EQ(nullptr, ptr); } else { EXPECT_NE(nullptr, ptr); memoryManager->freeGraphicsMemory(ptr); } }; injectFailures(method); } TEST_F(DrmMemoryManagerTest, GivenZeroBytesWhenAllocatingThenAllocationIsCreated) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; auto ptr = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, 0u}); ASSERT_NE(nullptr, ptr); EXPECT_NE(nullptr, ptr->getUnderlyingBuffer()); memoryManager->freeGraphicsMemory(ptr); } TEST_F(DrmMemoryManagerTest, GivenThreeBytesWhenAllocatingThenAllocationIsCreated) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; auto ptr = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, MemoryConstants::pageSize}); ASSERT_NE(nullptr, ptr); EXPECT_NE(nullptr, ptr->getUnderlyingBuffer()); memoryManager->freeGraphicsMemory(ptr); } TEST_F(DrmMemoryManagerTest, GivenUserptrWhenCreatingAllocationThenFail) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_res = -1; auto ptr = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, MemoryConstants::pageSize}); EXPECT_EQ(nullptr, ptr); mock->ioctl_res = 0; } TEST_F(DrmMemoryManagerTest, GivenNullPtrWhenFreeingThenSucceeds) { memoryManager->freeGraphicsMemory(nullptr); } TEST_F(DrmMemoryManagerTest, GivenHostPtrWhenCreatingAllocationThenSucceeds) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; void *ptr = ::alignedMalloc(1024, 4096); ASSERT_NE(nullptr, ptr); auto alloc = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, false, 1024}, ptr)); ASSERT_NE(nullptr, alloc); EXPECT_NE(nullptr, alloc->getUnderlyingBuffer()); EXPECT_EQ(ptr, alloc->getUnderlyingBuffer()); auto bo = alloc->getBO(); ASSERT_NE(nullptr, bo); EXPECT_EQ(ptr, reinterpret_cast(bo->peekAddress())); EXPECT_EQ(Sharing::nonSharedResource, alloc->peekSharedHandle()); memoryManager->freeGraphicsMemory(alloc); ::alignedFree(ptr); } TEST_F(DrmMemoryManagerTest, GivenNullHostPtrWhenCreatingAllocationThenSucceeds) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; void *ptr = nullptr; allocationData.hostPtr = nullptr; allocationData.size = MemoryConstants::pageSize; auto alloc = static_cast(memoryManager->allocateGraphicsMemoryWithHostPtr(allocationData)); ASSERT_NE(nullptr, alloc); EXPECT_EQ(ptr, alloc->getUnderlyingBuffer()); auto bo = alloc->getBO(); ASSERT_NE(nullptr, bo); EXPECT_EQ(ptr, reinterpret_cast(bo->peekAddress())); memoryManager->freeGraphicsMemory(alloc); ::alignedFree(ptr); } TEST_F(DrmMemoryManagerTest, GivenMisalignedHostPtrWhenCreatingAllocationThenSucceeds) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; void *ptrT = ::alignedMalloc(1024, 4096); ASSERT_NE(nullptr, ptrT); void *ptr = ptrOffset(ptrT, 128); auto alloc = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, false, 1024}, ptr)); ASSERT_NE(nullptr, alloc); EXPECT_NE(nullptr, alloc->getUnderlyingBuffer()); EXPECT_EQ(ptr, alloc->getUnderlyingBuffer()); auto bo = alloc->getBO(); ASSERT_NE(nullptr, bo); EXPECT_EQ(ptrT, reinterpret_cast(bo->peekAddress())); memoryManager->freeGraphicsMemory(alloc); ::alignedFree(ptrT); } TEST_F(DrmMemoryManagerTest, GivenHostPtrUserptrWhenCreatingAllocationThenFails) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_res = -1; void *ptrT = ::alignedMalloc(1024, 4096); ASSERT_NE(nullptr, ptrT); auto alloc = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, false, 1024}, ptrT); EXPECT_EQ(nullptr, alloc); ::alignedFree(ptrT); mock->ioctl_res = 0; } TEST_F(DrmMemoryManagerTest, givenDrmAllocationWhenHandleFenceCompletionThenCallBufferObjectWait) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.contextDestroy = 0; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, 1024}); memoryManager->handleFenceCompletion(allocation); mock->testIoctls(); mock->ioctl_expected.gemClose = 1; mock->ioctl_expected.gemWait = 2; memoryManager->freeGraphicsMemory(allocation); } TEST(DrmMemoryManagerTest2, givenDrmMemoryManagerWhengetSystemSharedMemoryIsCalledThenContextGetParamIsCalled) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(4u); for (auto i = 0u; i < 4u; i++) { auto mock = new DrmMockCustom(*executionEnvironment->rootDeviceEnvironments[0]); executionEnvironment->rootDeviceEnvironments[i]->osInterface = std::make_unique(); executionEnvironment->rootDeviceEnvironments[i]->osInterface->setDriverModel(std::unique_ptr(mock)); executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(defaultHwInfo.get()); } auto memoryManager = std::make_unique(false, false, false, *executionEnvironment); for (auto i = 0u; i < 4u; i++) { auto mock = executionEnvironment->rootDeviceEnvironments[i]->osInterface->getDriverModel()->as(); mock->getContextParamRetValue = 16 * MemoryConstants::gigaByte; uint64_t mem = memoryManager->getSystemSharedMemory(i); mock->ioctl_expected.contextGetParam = 1; EXPECT_EQ(mock->recordedGetContextParam.param, static_cast<__u64>(I915_CONTEXT_PARAM_GTT_SIZE)); EXPECT_GT(mem, 0u); executionEnvironment->rootDeviceEnvironments[i]->osInterface.reset(); } } TEST_F(DrmMemoryManagerTest, GivenBitnessWhenGettingMaxApplicationAddressThenCorrectValueIsReturned) { uint64_t maxAddr = memoryManager->getMaxApplicationAddress(); if constexpr (is64bit) { EXPECT_EQ(maxAddr, MemoryConstants::max64BitAppAddress); } else { EXPECT_EQ(maxAddr, MemoryConstants::max32BitAppAddress); } } TEST(DrmMemoryManagerTest2, WhenGetMinimumSystemSharedMemoryThenCorrectValueIsReturned) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(4u); for (auto i = 0u; i < 4u; i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(defaultHwInfo.get()); auto mock = new DrmMockCustom(*executionEnvironment->rootDeviceEnvironments[0]); executionEnvironment->rootDeviceEnvironments[i]->osInterface = std::make_unique(); executionEnvironment->rootDeviceEnvironments[i]->osInterface->setDriverModel(std::unique_ptr(mock)); } auto memoryManager = std::make_unique(false, false, false, *executionEnvironment); for (auto i = 0u; i < 4u; i++) { auto mock = executionEnvironment->rootDeviceEnvironments[i]->osInterface->getDriverModel()->as(); auto hostMemorySize = MemoryConstants::pageSize * (uint64_t)(sysconf(_SC_PHYS_PAGES)); // gpuMemSize < hostMemSize auto gpuMemorySize = hostMemorySize - 1u; mock->ioctl_expected.contextGetParam = 1; mock->getContextParamRetValue = gpuMemorySize; uint64_t systemSharedMemorySize = memoryManager->getSystemSharedMemory(i); EXPECT_EQ(gpuMemorySize, systemSharedMemorySize); mock->ioctl_expected.contextDestroy = 0; mock->ioctl_expected.contextCreate = 0; mock->testIoctls(); // gpuMemSize > hostMemSize gpuMemorySize = hostMemorySize + 1u; mock->getContextParamRetValue = gpuMemorySize; systemSharedMemorySize = memoryManager->getSystemSharedMemory(i); mock->ioctl_expected.contextGetParam = 2; EXPECT_EQ(hostMemorySize, systemSharedMemorySize); mock->testIoctls(); executionEnvironment->rootDeviceEnvironments[i]->osInterface.reset(); } } TEST_F(DrmMemoryManagerTest, GivenBoWaitFailureThenExpectThrow) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; BufferObject *bo = memoryManager->allocUserptr(0, (size_t)1024, 0ul, rootDeviceIndex); ASSERT_NE(nullptr, bo); mock->ioctl_res = -EIO; EXPECT_THROW(bo->wait(-1), std::exception); mock->ioctl_res = 1; memoryManager->unreference(bo, false); mock->ioctl_res = 0; } TEST_F(DrmMemoryManagerTest, WhenNullOsHandleStorageAskedForPopulationThenFilledPointerIsReturned) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; OsHandleStorage storage; storage.fragmentStorageData[0].cpuPtr = reinterpret_cast(0x1000); storage.fragmentStorageData[0].fragmentSize = 1; memoryManager->populateOsHandles(storage, rootDeviceIndex); EXPECT_NE(nullptr, storage.fragmentStorageData[0].osHandleStorage); EXPECT_EQ(nullptr, storage.fragmentStorageData[1].osHandleStorage); EXPECT_EQ(nullptr, storage.fragmentStorageData[2].osHandleStorage); storage.fragmentStorageData[0].freeTheFragment = true; memoryManager->cleanOsHandles(storage, rootDeviceIndex); } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenEnabledHostMemoryValidationWhenReadOnlyPointerCausesPinningFailWithEfaultThenPopulateOsHandlesReturnsInvalidHostPointerError) { std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, false, true, *executionEnvironment)); memoryManager->registeredEngines = EngineControlContainer{this->device->allEngines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } OsHandleStorage storage; storage.fragmentStorageData[0].cpuPtr = reinterpret_cast(0x1000); storage.fragmentStorageData[0].fragmentSize = 1; mock->reset(); DrmMockCustom::IoctlResExt ioctlResExt = {1, -1}; ioctlResExt.no.push_back(2); ioctlResExt.no.push_back(3); mock->ioctl_res_ext = &ioctlResExt; mock->errnoValue = EFAULT; mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.execbuffer2 = 3; MemoryManager::AllocationStatus result = memoryManager->populateOsHandles(storage, rootDeviceIndex); EXPECT_EQ(MemoryManager::AllocationStatus::InvalidHostPointer, result); mock->testIoctls(); EXPECT_NE(nullptr, storage.fragmentStorageData[0].osHandleStorage); EXPECT_EQ(nullptr, storage.fragmentStorageData[1].osHandleStorage); EXPECT_EQ(nullptr, storage.fragmentStorageData[2].osHandleStorage); storage.fragmentStorageData[0].freeTheFragment = true; memoryManager->cleanOsHandles(storage, rootDeviceIndex); mock->ioctl_res_ext = &mock->NONE; } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenEnabledHostMemoryValidationWhenReadOnlyPointerCausesPinningFailWithEfaultThenAlocateMemoryForNonSvmHostPtrReturnsNullptr) { std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, false, true, *executionEnvironment)); memoryManager->registeredEngines = EngineControlContainer{this->device->allEngines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } mock->reset(); size_t dummySize = 13u; DrmMockCustom::IoctlResExt ioctlResExt = {1, -1}; ioctlResExt.no.push_back(2); ioctlResExt.no.push_back(3); mock->ioctl_res_ext = &ioctlResExt; mock->errnoValue = EFAULT; mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.execbuffer2 = 3; mock->ioctl_expected.gemClose = 1; AllocationData allocationData; allocationData.size = dummySize; allocationData.hostPtr = reinterpret_cast(0x5001); allocationData.rootDeviceIndex = device->getRootDeviceIndex(); auto gfxPartition = memoryManager->getGfxPartition(device->getRootDeviceIndex()); auto allocatedPointer = gfxPartition->heapAllocate(HeapIndex::HEAP_STANDARD, dummySize); gfxPartition->freeGpuAddressRange(allocatedPointer, dummySize); auto allocation = memoryManager->allocateGraphicsMemoryForNonSvmHostPtr(allocationData); EXPECT_EQ(nullptr, allocation); mock->testIoctls(); mock->ioctl_res_ext = &mock->NONE; // make sure that partition is free size_t dummySize2 = 13u; auto allocatedPointer2 = gfxPartition->heapAllocate(HeapIndex::HEAP_STANDARD, dummySize2); EXPECT_EQ(allocatedPointer2, allocatedPointer); gfxPartition->freeGpuAddressRange(allocatedPointer, dummySize2); } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenEnabledHostMemoryValidationWhenHostPtrDoesntCausePinningFailThenAlocateMemoryForNonSvmHostPtrReturnsAllocation) { std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, false, true, *executionEnvironment)); memoryManager->registeredEngines = EngineControlContainer{this->device->allEngines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } mock->reset(); DrmMockCustom::IoctlResExt ioctlResExt = {1, -1}; ioctlResExt.no.push_back(2); ioctlResExt.no.push_back(3); mock->ioctl_res_ext = &ioctlResExt; mock->errnoValue = 0; mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.execbuffer2 = 3; AllocationData allocationData; allocationData.size = 13u; allocationData.hostPtr = reinterpret_cast(0x5001); allocationData.rootDeviceIndex = device->getRootDeviceIndex(); auto allocation = memoryManager->allocateGraphicsMemoryForNonSvmHostPtr(allocationData); EXPECT_NE(nullptr, allocation); mock->testIoctls(); mock->ioctl_res_ext = &mock->NONE; memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenEnabledHostMemoryValidationWhenAllocatingMemoryForNonSvmHostPtrThenAllocatedCorrectly) { std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, false, true, *executionEnvironment)); memoryManager->registeredEngines = EngineControlContainer{this->device->allEngines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } mock->reset(); DrmMockCustom::IoctlResExt ioctlResExt = {1, -1}; ioctlResExt.no.push_back(2); ioctlResExt.no.push_back(3); mock->ioctl_res_ext = &ioctlResExt; mock->errnoValue = 0; mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.execbuffer2 = 3; AllocationData allocationData; allocationData.size = 13u; allocationData.hostPtr = reinterpret_cast(0x5001); allocationData.rootDeviceIndex = device->getRootDeviceIndex(); auto allocation = memoryManager->allocateGraphicsMemoryForNonSvmHostPtr(allocationData); EXPECT_NE(nullptr, allocation); EXPECT_EQ(allocation->getGpuAddress() - allocation->getAllocationOffset(), mock->execBufferBufferObjects.offset); mock->testIoctls(); mock->ioctl_res_ext = &mock->NONE; memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenEnabledHostMemoryValidationWhenPinningFailWithErrorDifferentThanEfaultThenPopulateOsHandlesReturnsError) { std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, false, true, *executionEnvironment)); memoryManager->registeredEngines = EngineControlContainer{this->device->allEngines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } OsHandleStorage storage; storage.fragmentStorageData[0].cpuPtr = reinterpret_cast(0x1000); storage.fragmentStorageData[0].fragmentSize = 1; mock->reset(); DrmMockCustom::IoctlResExt ioctlResExt = {1, -1}; ioctlResExt.no.push_back(2); ioctlResExt.no.push_back(3); mock->ioctl_res_ext = &ioctlResExt; mock->errnoValue = ENOMEM; mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.execbuffer2 = 3; MemoryManager::AllocationStatus result = memoryManager->populateOsHandles(storage, rootDeviceIndex); EXPECT_EQ(MemoryManager::AllocationStatus::Error, result); mock->testIoctls(); EXPECT_NE(nullptr, storage.fragmentStorageData[0].osHandleStorage); EXPECT_EQ(nullptr, storage.fragmentStorageData[1].osHandleStorage); EXPECT_EQ(nullptr, storage.fragmentStorageData[2].osHandleStorage); storage.fragmentStorageData[0].freeTheFragment = true; memoryManager->cleanOsHandles(storage, rootDeviceIndex); mock->ioctl_res_ext = &mock->NONE; } TEST_F(DrmMemoryManagerTest, GivenNoInputsWhenOsHandleIsCreatedThenAllBoHandlesAreInitializedAsNullPtrs) { OsHandleLinux boHandle; EXPECT_EQ(nullptr, boHandle.bo); std::unique_ptr boHandle2(new OsHandleLinux); EXPECT_EQ(nullptr, boHandle2->bo); } TEST_F(DrmMemoryManagerTest, GivenPointerAndSizeWhenAskedToCreateGrahicsAllocationThenGraphicsAllocationIsCreated) { OsHandleStorage handleStorage; auto ptr = reinterpret_cast(0x1000); auto ptr2 = reinterpret_cast(0x1001); auto size = MemoryConstants::pageSize; handleStorage.fragmentStorageData[0].cpuPtr = ptr; handleStorage.fragmentStorageData[1].cpuPtr = ptr2; handleStorage.fragmentStorageData[2].cpuPtr = nullptr; handleStorage.fragmentStorageData[0].fragmentSize = size; handleStorage.fragmentStorageData[1].fragmentSize = size * 2; handleStorage.fragmentStorageData[2].fragmentSize = size * 3; allocationData.size = size; allocationData.hostPtr = ptr; auto allocation = std::unique_ptr(memoryManager->createGraphicsAllocation(handleStorage, allocationData)); EXPECT_EQ(reinterpret_cast(allocation->getGpuAddress()), ptr); EXPECT_EQ(ptr, allocation->getUnderlyingBuffer()); EXPECT_EQ(size, allocation->getUnderlyingBufferSize()); EXPECT_EQ(ptr, allocation->fragmentsStorage.fragmentStorageData[0].cpuPtr); EXPECT_EQ(ptr2, allocation->fragmentsStorage.fragmentStorageData[1].cpuPtr); EXPECT_EQ(nullptr, allocation->fragmentsStorage.fragmentStorageData[2].cpuPtr); EXPECT_EQ(size, allocation->fragmentsStorage.fragmentStorageData[0].fragmentSize); EXPECT_EQ(size * 2, allocation->fragmentsStorage.fragmentStorageData[1].fragmentSize); EXPECT_EQ(size * 3, allocation->fragmentsStorage.fragmentStorageData[2].fragmentSize); EXPECT_NE(&allocation->fragmentsStorage, &handleStorage); } TEST_F(DrmMemoryManagerTest, GivenMemoryManagerWhenCreatingGraphicsAllocation64kbThenNullPtrIsReturned) { allocationData.size = MemoryConstants::pageSize64k; allocationData.rootDeviceIndex = rootDeviceIndex; auto allocation = memoryManager->allocateGraphicsMemory64kb(allocationData); EXPECT_EQ(nullptr, allocation); } TEST_F(DrmMemoryManagerTest, givenRequiresStandardHeapThenStandardHeapIsAcquired) { const uint32_t rootDeviceIndex = 0; size_t bufferSize = 4096u; uint64_t range = memoryManager->acquireGpuRange(bufferSize, rootDeviceIndex, HeapIndex::HEAP_STANDARD); EXPECT_LT(GmmHelper::canonize(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapBase(HeapIndex::HEAP_STANDARD)), range); EXPECT_GT(GmmHelper::canonize(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapLimit(HeapIndex::HEAP_STANDARD)), range); } TEST_F(DrmMemoryManagerTest, givenRequiresStandard2MBHeapThenStandard2MBHeapIsAcquired) { const uint32_t rootDeviceIndex = 0; size_t bufferSize = 4096u; uint64_t range = memoryManager->acquireGpuRange(bufferSize, rootDeviceIndex, HeapIndex::HEAP_STANDARD2MB); EXPECT_LT(GmmHelper::canonize(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapBase(HeapIndex::HEAP_STANDARD2MB)), range); EXPECT_GT(GmmHelper::canonize(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapLimit(HeapIndex::HEAP_STANDARD2MB)), range); } TEST_F(DrmMemoryManagerTest, GivenShareableEnabledWhenAskedToCreateGraphicsAllocationThenValidAllocationIsReturnedAndStandard64KBHeapIsUsed) { mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemCreate = 1; mock->ioctl_expected.gemClose = 1; allocationData.size = MemoryConstants::pageSize; allocationData.flags.shareable = true; auto allocation = memoryManager->allocateMemoryByKMD(allocationData); EXPECT_NE(nullptr, allocation); EXPECT_NE(0u, allocation->getGpuAddress()); EXPECT_LT(GmmHelper::canonize(memoryManager->getGfxPartition(allocation->getRootDeviceIndex())->getHeapBase(HeapIndex::HEAP_STANDARD64KB)), allocation->getGpuAddress()); EXPECT_GT(GmmHelper::canonize(memoryManager->getGfxPartition(allocation->getRootDeviceIndex())->getHeapLimit(HeapIndex::HEAP_STANDARD64KB)), allocation->getGpuAddress()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerTest, GivenMisalignedHostPtrAndMultiplePagesSizeWhenAskedForGraphicsAllocationThenItContainsAllFragmentsWithProperGpuAdrresses) { mock->ioctl_expected.gemUserptr = 3; mock->ioctl_expected.gemWait = 3; mock->ioctl_expected.gemClose = 3; auto ptr = reinterpret_cast(0x1001); auto size = MemoryConstants::pageSize * 10; auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, false, size}, ptr); auto hostPtrManager = static_cast(memoryManager->getHostPtrManager()); if (memoryManager->isLimitedRange(rootDeviceIndex)) { ASSERT_EQ(6u, hostPtrManager->getFragmentCount()); } else { ASSERT_EQ(3u, hostPtrManager->getFragmentCount()); } auto reqs = MockHostPtrManager::getAllocationRequirements(rootDeviceIndex, ptr, size); for (int i = 0; i < maxFragmentsCount; i++) { auto osHandle = static_cast(graphicsAllocation->fragmentsStorage.fragmentStorageData[i].osHandleStorage); ASSERT_NE(nullptr, osHandle->bo); EXPECT_EQ(reqs.allocationFragments[i].allocationSize, osHandle->bo->peekSize()); EXPECT_EQ(reqs.allocationFragments[i].allocationPtr, reinterpret_cast(osHandle->bo->peekAddress())); } memoryManager->freeGraphicsMemory(graphicsAllocation); if (memoryManager->isLimitedRange(rootDeviceIndex)) { EXPECT_EQ(3u, hostPtrManager->getFragmentCount()); } else { EXPECT_EQ(0u, hostPtrManager->getFragmentCount()); } } TEST_F(DrmMemoryManagerTest, givenMemoryManagerWhenAskedFor32BitAllocationThen32BitDrmAllocationIsBeingReturned) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; auto size = 10u; memoryManager->setForce32BitAllocations(true); auto allocation = memoryManager->allocate32BitGraphicsMemory(rootDeviceIndex, size, nullptr, AllocationType::BUFFER); EXPECT_NE(nullptr, allocation); EXPECT_NE(nullptr, allocation->getUnderlyingBuffer()); EXPECT_GE(allocation->getUnderlyingBufferSize(), size); auto address64bit = allocation->getGpuAddressToPatch(); EXPECT_LT(address64bit, MemoryConstants::max32BitAddress); EXPECT_TRUE(allocation->is32BitAllocation()); EXPECT_EQ(GmmHelper::canonize(memoryManager->getExternalHeapBaseAddress(allocation->getRootDeviceIndex(), allocation->isAllocatedInLocalMemoryPool())), allocation->getGpuBaseAddress()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerTest, givenMemoryManagerWhenAskedFor32BitAllocationWhenLimitedAllocationEnabledThen32BitDrmAllocationWithGpuAddrDifferentFromCpuAddrIsBeingReturned) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; memoryManager->forceLimitedRangeAllocator(0xFFFFFFFFF); auto size = 10u; memoryManager->setForce32BitAllocations(true); auto allocation = memoryManager->allocate32BitGraphicsMemory(rootDeviceIndex, size, nullptr, AllocationType::BUFFER); EXPECT_NE(nullptr, allocation); EXPECT_NE(nullptr, allocation->getUnderlyingBuffer()); EXPECT_GE(allocation->getUnderlyingBufferSize(), size); EXPECT_NE((uint64_t)allocation->getGpuAddress(), (uint64_t)allocation->getUnderlyingBuffer()); memoryManager->freeGraphicsMemory(allocation); } struct ClDrmMemoryManagerTest : public DrmMemoryManagerTest { void SetUp() override { MemoryManagementFixture::SetUp(); executionEnvironment = MockClDevice::prepareExecutionEnvironment(defaultHwInfo.get(), numRootDevices - 1); DrmMemoryManagerFixture::SetUp(new DrmMockCustom(*executionEnvironment->rootDeviceEnvironments[0]), false); pClDevice = new MockClDevice{device}; device->incRefInternal(); } void TearDown() override { delete pClDevice; DrmMemoryManagerTest::TearDown(); } MockClDevice *pClDevice = nullptr; }; TEST_F(ClDrmMemoryManagerTest, Given32bitAllocatorWhenAskedForBufferAllocationThen32BitBufferIsReturned) { DebugManagerStateRestore dbgRestorer; mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; DebugManager.flags.Force32bitAddressing.set(true); MockContext context(pClDevice); memoryManager->setForce32BitAllocations(true); auto size = MemoryConstants::pageSize; auto retVal = CL_SUCCESS; auto buffer = Buffer::create( &context, CL_MEM_ALLOC_HOST_PTR, size, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(buffer->isMemObjZeroCopy()); auto bufferAddress = buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(); auto baseAddress = buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuBaseAddress(); EXPECT_LT(ptrDiff(bufferAddress, baseAddress), MemoryConstants::max32BitAddress); delete buffer; } TEST_F(ClDrmMemoryManagerTest, Given32bitAllocatorWhenAskedForBufferCreatedFromHostPtrThen32BitBufferIsReturned) { DebugManagerStateRestore dbgRestorer; mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; DebugManager.flags.Force32bitAddressing.set(true); MockContext context(pClDevice); memoryManager->setForce32BitAllocations(true); auto size = MemoryConstants::pageSize; void *ptr = reinterpret_cast(0x1000); auto ptrOffset = MemoryConstants::cacheLineSize; uintptr_t offsetedPtr = (uintptr_t)ptr + ptrOffset; auto retVal = CL_SUCCESS; auto buffer = Buffer::create( &context, CL_MEM_USE_HOST_PTR, size, reinterpret_cast(offsetedPtr), retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(buffer->isMemObjZeroCopy()); auto bufferAddress = buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(); auto drmAllocation = static_cast(buffer->getGraphicsAllocation(rootDeviceIndex)); auto baseAddress = buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuBaseAddress(); EXPECT_LT(ptrDiff(bufferAddress, baseAddress), MemoryConstants::max32BitAddress); EXPECT_TRUE(drmAllocation->is32BitAllocation()); auto allocationCpuPtr = drmAllocation->getUnderlyingBuffer(); auto allocationPageOffset = ptrDiff(allocationCpuPtr, alignDown(allocationCpuPtr, MemoryConstants::pageSize)); auto allocationGpuPtr = drmAllocation->getGpuAddress(); auto allocationGpuOffset = ptrDiff(allocationGpuPtr, alignDown(allocationGpuPtr, MemoryConstants::pageSize)); auto bufferObject = drmAllocation->getBO(); EXPECT_EQ(drmAllocation->getUnderlyingBuffer(), reinterpret_cast(offsetedPtr)); // Gpu address should be different EXPECT_NE(offsetedPtr, drmAllocation->getGpuAddress()); // Gpu address offset iqual to cpu offset EXPECT_EQ(allocationGpuOffset, ptrOffset); EXPECT_EQ(allocationPageOffset, ptrOffset); auto boAddress = bufferObject->peekAddress(); EXPECT_EQ(alignDown(boAddress, MemoryConstants::pageSize), boAddress); delete buffer; } TEST_F(ClDrmMemoryManagerTest, Given32bitAllocatorWhenAskedForBufferCreatedFrom64BitHostPtrThen32BitBufferIsReturned) { DebugManagerStateRestore dbgRestorer; { if (is32bit) { mock->ioctl_expected.total = -1; } else { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; DebugManager.flags.Force32bitAddressing.set(true); MockContext context(pClDevice); memoryManager->setForce32BitAllocations(true); auto size = MemoryConstants::pageSize; void *ptr = reinterpret_cast(0x100000000000); auto ptrOffset = MemoryConstants::cacheLineSize; uintptr_t offsetedPtr = (uintptr_t)ptr + ptrOffset; auto retVal = CL_SUCCESS; auto buffer = Buffer::create( &context, CL_MEM_USE_HOST_PTR, size, reinterpret_cast(offsetedPtr), retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(buffer->isMemObjZeroCopy()); auto bufferAddress = buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(); auto baseAddress = buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuBaseAddress(); EXPECT_LT(ptrDiff(bufferAddress, baseAddress), MemoryConstants::max32BitAddress); auto drmAllocation = static_cast(buffer->getGraphicsAllocation(rootDeviceIndex)); EXPECT_TRUE(drmAllocation->is32BitAllocation()); auto allocationCpuPtr = drmAllocation->getUnderlyingBuffer(); auto allocationPageOffset = ptrDiff(allocationCpuPtr, alignDown(allocationCpuPtr, MemoryConstants::pageSize)); auto bufferObject = drmAllocation->getBO(); EXPECT_EQ(allocationPageOffset, ptrOffset); auto boAddress = bufferObject->peekAddress(); EXPECT_EQ(alignDown(boAddress, MemoryConstants::pageSize), boAddress); delete buffer; DebugManager.flags.Force32bitAddressing.set(false); } } } TEST_F(DrmMemoryManagerTest, givenMemoryManagerWhenLimitedRangeAllocatorSetThenHeapSizeAndEndAddrCorrectlySetForGivenGpuRange) { memoryManager->forceLimitedRangeAllocator(0xFFFFFFFFF); uint64_t sizeBig = 4 * MemoryConstants::megaByte + MemoryConstants::pageSize; auto gpuAddressLimitedRange = memoryManager->getGfxPartition(rootDeviceIndex)->heapAllocate(HeapIndex::HEAP_STANDARD, sizeBig); EXPECT_LT(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapBase(HeapIndex::HEAP_STANDARD), gpuAddressLimitedRange); EXPECT_GT(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapLimit(HeapIndex::HEAP_STANDARD), gpuAddressLimitedRange + sizeBig); EXPECT_EQ(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapMinimalAddress(HeapIndex::HEAP_STANDARD), gpuAddressLimitedRange); auto gpuInternal32BitAlloc = memoryManager->getGfxPartition(rootDeviceIndex)->heapAllocate(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY, sizeBig); EXPECT_LT(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapBase(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY), gpuInternal32BitAlloc); EXPECT_GT(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapLimit(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY), gpuInternal32BitAlloc + sizeBig); EXPECT_EQ(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapMinimalAddress(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY), gpuInternal32BitAlloc); } TEST_F(DrmMemoryManagerTest, givenMemoryManagerWhenAskedForAllocationWithAlignmentAndLimitedRangeAllocatorSetAndAcquireGpuRangeFailsThenNullIsReturned) { mock->ioctl_expected.gemUserptr = 0; mock->ioctl_expected.gemClose = 0; AllocationData allocationData; // emulate GPU address space exhaust memoryManager->forceLimitedRangeAllocator(0xFFFFFFFFF); memoryManager->getGfxPartition(rootDeviceIndex)->heapInit(HeapIndex::HEAP_STANDARD, 0x0, 0x10000); // set size to something bigger than allowed space allocationData.size = 0x20000; allocationData.rootDeviceIndex = rootDeviceIndex; EXPECT_EQ(nullptr, memoryManager->allocateGraphicsMemoryWithAlignment(allocationData)); } TEST_F(DrmMemoryManagerTest, givenMemoryManagerWhenAskedFor32BitAllocationWithHostPtrAndAllocUserptrFailsThenFails) { mock->ioctl_expected.gemUserptr = 1; this->ioctlResExt = {mock->ioctl_cnt.total, -1}; mock->ioctl_res_ext = &ioctlResExt; auto size = 10u; void *host_ptr = reinterpret_cast(0x1000); memoryManager->setForce32BitAllocations(true); auto allocation = memoryManager->allocate32BitGraphicsMemory(rootDeviceIndex, size, host_ptr, AllocationType::BUFFER); EXPECT_EQ(nullptr, allocation); mock->ioctl_res_ext = &mock->NONE; } TEST_F(DrmMemoryManagerTest, givenMemoryManagerWhenAskedFor32BitAllocationAndAllocUserptrFailsThenFails) { mock->ioctl_expected.gemUserptr = 1; this->ioctlResExt = {mock->ioctl_cnt.total, -1}; mock->ioctl_res_ext = &ioctlResExt; auto size = 10u; memoryManager->setForce32BitAllocations(true); auto allocation = memoryManager->allocate32BitGraphicsMemory(rootDeviceIndex, size, nullptr, AllocationType::BUFFER); EXPECT_EQ(nullptr, allocation); mock->ioctl_res_ext = &mock->NONE; } TEST_F(DrmMemoryManagerTest, givenLimitedRangeAllocatorWhenAskedForInternal32BitAllocationAndAllocUserptrFailsThenFails) { mock->ioctl_expected.gemUserptr = 1; this->ioctlResExt = {mock->ioctl_cnt.total, -1}; mock->ioctl_res_ext = &ioctlResExt; auto size = 10u; memoryManager->forceLimitedRangeAllocator(0xFFFFFFFFF); auto allocation = memoryManager->allocate32BitGraphicsMemory(rootDeviceIndex, size, nullptr, AllocationType::INTERNAL_HEAP); EXPECT_EQ(nullptr, allocation); mock->ioctl_res_ext = &mock->NONE; } TEST_F(ClDrmMemoryManagerTest, GivenSizeAbove2GBWhenUseHostPtrAndAllocHostPtrAreCreatedThenFirstSucceedsAndSecondFails) { DebugManagerStateRestore dbgRestorer; mock->ioctl_expected.total = -1; DebugManager.flags.Force32bitAddressing.set(true); MockContext context(pClDevice); memoryManager->setForce32BitAllocations(true); size_t size = 2 * GB; void *ptr = reinterpret_cast(0x100000000000); auto retVal = CL_SUCCESS; auto buffer = Buffer::create( &context, CL_MEM_USE_HOST_PTR, size, ptr, retVal); size_t size2 = 4 * GB - MemoryConstants::pageSize; // Keep size aligned auto buffer2 = Buffer::create( &context, CL_MEM_ALLOC_HOST_PTR, size2, nullptr, retVal); EXPECT_NE(retVal, CL_SUCCESS); EXPECT_EQ(nullptr, buffer2); if (buffer) { auto bufferPtr = buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(); EXPECT_TRUE(buffer->getGraphicsAllocation(rootDeviceIndex)->is32BitAllocation()); auto baseAddress = buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuBaseAddress(); EXPECT_LT(ptrDiff(bufferPtr, baseAddress), MemoryConstants::max32BitAddress); } delete buffer; } TEST_F(ClDrmMemoryManagerTest, GivenSizeAbove2GBWhenAllocHostPtrAndUseHostPtrAreCreatedThenFirstSucceedsAndSecondFails) { DebugManagerStateRestore dbgRestorer; mock->ioctl_expected.total = -1; DebugManager.flags.Force32bitAddressing.set(true); MockContext context(pClDevice); memoryManager->setForce32BitAllocations(true); size_t size = 2 * GB; void *ptr = reinterpret_cast(0x100000000000); auto retVal = CL_SUCCESS; auto buffer = Buffer::create( &context, CL_MEM_ALLOC_HOST_PTR, size, nullptr, retVal); size_t size2 = 4 * GB - MemoryConstants::pageSize; // Keep size aligned auto buffer2 = Buffer::create( &context, CL_MEM_USE_HOST_PTR, size2, ptr, retVal); EXPECT_NE(retVal, CL_SUCCESS); EXPECT_EQ(nullptr, buffer2); if (buffer) { auto bufferPtr = buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(); EXPECT_TRUE(buffer->getGraphicsAllocation(rootDeviceIndex)->is32BitAllocation()); auto baseAddress = buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuBaseAddress(); EXPECT_LT(ptrDiff(bufferPtr, baseAddress), MemoryConstants::max32BitAddress); } delete buffer; } TEST_F(ClDrmMemoryManagerTest, givenDrmBufferWhenItIsQueriedForInternalAllocationThenBoIsReturned) { mock->ioctl_expected.total = -1; mock->outputFd = 1337; MockContext context(pClDevice); size_t size = 1u; auto retVal = CL_SUCCESS; auto buffer = Buffer::create( &context, CL_MEM_ALLOC_HOST_PTR, size, nullptr, retVal); uint64_t handle = 0llu; retVal = clGetMemObjectInfo(buffer, CL_MEM_ALLOCATION_HANDLE_INTEL, sizeof(handle), &handle, nullptr); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(static_cast(1337), handle); clReleaseMemObject(buffer); } TEST_F(DrmMemoryManagerTest, GivenExhaustedInternalHeapWhenAllocate32BitIsCalledThenNullIsReturned) { DebugManagerStateRestore dbgStateRestore; DebugManager.flags.Force32bitAddressing.set(true); memoryManager->setForce32BitAllocations(true); size_t size = MemoryConstants::pageSize64k; auto alloc = memoryManager->getGfxPartition(rootDeviceIndex)->heapAllocate(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY, size); EXPECT_NE(0llu, alloc); size_t allocationSize = 4 * GB; auto graphicsAllocation = memoryManager->allocate32BitGraphicsMemory(rootDeviceIndex, allocationSize, nullptr, AllocationType::INTERNAL_HEAP); EXPECT_EQ(nullptr, graphicsAllocation); } TEST_F(DrmMemoryManagerTest, givenSetForceUserptrAlignmentWhenGetUserptrAlignmentThenForcedValueIsReturned) { DebugManagerStateRestore dbgStateRestore; DebugManager.flags.ForceUserptrAlignment.set(123456); EXPECT_EQ(123456 * MemoryConstants::kiloByte, memoryManager->getUserptrAlignment()); } TEST_F(DrmMemoryManagerTest, whenGetUserptrAlignmentThenDefaultValueIsReturned) { EXPECT_EQ(MemoryConstants::allocationAlignment, memoryManager->getUserptrAlignment()); } TEST_F(DrmMemoryManagerTest, GivenMemoryManagerWhenAllocateGraphicsMemoryForImageIsCalledThenProperIoctlsAreCalledAndUnmapSizeIsNonZero) { mock->ioctl_expected.gemCreate = 1; mock->ioctl_expected.gemSetTiling = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; ImageDescriptor imgDesc = {}; imgDesc.imageType = ImageType::Image2D; // tiled imgDesc.imageWidth = 512; imgDesc.imageHeight = 512; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); imgInfo.size = 4096u; imgInfo.rowPitch = 512u; AllocationData allocationData; allocationData.imgInfo = &imgInfo; allocationData.rootDeviceIndex = rootDeviceIndex; auto imageGraphicsAllocation = memoryManager->allocateGraphicsMemoryForImage(allocationData); ASSERT_NE(nullptr, imageGraphicsAllocation); EXPECT_NE(0u, imageGraphicsAllocation->getGpuAddress()); EXPECT_EQ(nullptr, imageGraphicsAllocation->getUnderlyingBuffer()); EXPECT_TRUE(imageGraphicsAllocation->getDefaultGmm()->resourceParams.Usage == GMM_RESOURCE_USAGE_TYPE::GMM_RESOURCE_USAGE_OCL_IMAGE); EXPECT_EQ(1u, this->mock->createParamsHandle); EXPECT_EQ(imgInfo.size, this->mock->createParamsSize); __u32 tilingMode = I915_TILING_Y; EXPECT_EQ(tilingMode, this->mock->setTilingMode); EXPECT_EQ(imgInfo.rowPitch, this->mock->setTilingStride); EXPECT_EQ(1u, this->mock->setTilingHandle); memoryManager->freeGraphicsMemory(imageGraphicsAllocation); } HWTEST_F(ClDrmMemoryManagerTest, givenDrmMemoryManagerWhenTiledImageWithMipCountZeroIsBeingCreatedThenallocateGraphicsMemoryForImageIsUsed) { if (!UnitTestHelper::tiledImagesSupported) { GTEST_SKIP(); } mock->ioctl_expected.gemCreate = 1; mock->ioctl_expected.gemSetTiling = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; MockContext context(pClDevice); cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; cl_image_desc imageDesc = {}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 64u; imageDesc.image_height = 64u; auto retVal = CL_SUCCESS; cl_mem_flags flags = CL_MEM_WRITE_ONLY; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); std::unique_ptr dstImage(Image::create( &context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, dstImage); auto imageGraphicsAllocation = dstImage->getGraphicsAllocation(rootDeviceIndex); ASSERT_NE(nullptr, imageGraphicsAllocation); EXPECT_TRUE(imageGraphicsAllocation->getDefaultGmm()->resourceParams.Usage == GMM_RESOURCE_USAGE_TYPE::GMM_RESOURCE_USAGE_OCL_IMAGE); DrmAllocation *drmAllocation = static_cast(imageGraphicsAllocation); auto imageSize = drmAllocation->getUnderlyingBufferSize(); auto rowPitch = dstImage->getImageDesc().image_row_pitch; EXPECT_EQ(1u, this->mock->createParamsHandle); EXPECT_EQ(imageSize, this->mock->createParamsSize); __u32 tilingMode = I915_TILING_Y; EXPECT_EQ(tilingMode, this->mock->setTilingMode); EXPECT_EQ(rowPitch, this->mock->setTilingStride); EXPECT_EQ(1u, this->mock->setTilingHandle); } HWTEST_F(ClDrmMemoryManagerTest, givenDrmMemoryManagerWhenTiledImageWithMipCountNonZeroIsBeingCreatedThenallocateGraphicsMemoryForImageIsUsed) { if (!UnitTestHelper::tiledImagesSupported) { GTEST_SKIP(); } mock->ioctl_expected.gemCreate = 1; mock->ioctl_expected.gemSetTiling = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; MockContext context(pClDevice); cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; cl_image_desc imageDesc = {}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 64u; imageDesc.image_height = 64u; imageDesc.num_mip_levels = 1u; auto retVal = CL_SUCCESS; cl_mem_flags flags = CL_MEM_WRITE_ONLY; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); std::unique_ptr dstImage(Image::create( &context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, dstImage); EXPECT_EQ(static_cast(imageDesc.num_mip_levels), dstImage->peekMipCount()); auto imageGraphicsAllocation = dstImage->getGraphicsAllocation(rootDeviceIndex); ASSERT_NE(nullptr, imageGraphicsAllocation); EXPECT_TRUE(imageGraphicsAllocation->getDefaultGmm()->resourceParams.Usage == GMM_RESOURCE_USAGE_TYPE::GMM_RESOURCE_USAGE_OCL_IMAGE); DrmAllocation *drmAllocation = static_cast(imageGraphicsAllocation); auto imageSize = drmAllocation->getUnderlyingBufferSize(); auto rowPitch = dstImage->getImageDesc().image_row_pitch; EXPECT_EQ(1u, this->mock->createParamsHandle); EXPECT_EQ(imageSize, this->mock->createParamsSize); __u32 tilingMode = I915_TILING_Y; EXPECT_EQ(tilingMode, this->mock->setTilingMode); EXPECT_EQ(rowPitch, this->mock->setTilingStride); EXPECT_EQ(1u, this->mock->setTilingHandle); } TEST_F(ClDrmMemoryManagerTest, givenDrmMemoryManagerWhenTiledImageIsBeingCreatedAndAllocationFailsThenReturnNullptr) { MockContext context(pClDevice); cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; cl_image_desc imageDesc = {}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 64u; imageDesc.image_height = 64u; auto retVal = CL_SUCCESS; InjectedFunction method = [&](size_t failureIndex) { cl_mem_flags flags = CL_MEM_WRITE_ONLY; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); std::unique_ptr dstImage(Image::create( &context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); if (MemoryManagement::nonfailingAllocation == failureIndex) { EXPECT_NE(nullptr, dstImage.get()); } else { EXPECT_EQ(nullptr, dstImage.get()); } }; injectFailures(method); mock->reset(); } HWTEST_F(ClDrmMemoryManagerTest, givenDrmMemoryManagerWhenTiledImageIsBeingCreatedFromHostPtrThenAllocateGraphicsMemoryForImageIsUsed) { if (!UnitTestHelper::tiledImagesSupported) { GTEST_SKIP(); } device->setPreemptionMode(PreemptionMode::Disabled); auto csr = static_cast *>(device->getDefaultEngine().commandStreamReceiver); csr->callHwFlush = false; mock->ioctl_expected.gemCreate = 1; mock->ioctl_expected.gemSetTiling = 1; mock->ioctl_expected.gemWait = 2; mock->ioctl_expected.gemClose = 2; mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.execbuffer2 = 0; // builtins kernels mock->ioctl_expected.gemUserptr += 5; // command buffers mock->ioctl_expected.gemUserptr += 2; additionalDestroyDeviceIoctls.gemClose += 2; additionalDestroyDeviceIoctls.gemWait += 2; // indirect heaps mock->ioctl_expected.gemUserptr += 3; additionalDestroyDeviceIoctls.gemClose += 3; additionalDestroyDeviceIoctls.gemWait += 3; if (device->getDefaultEngine().commandStreamReceiver->peekTimestampPacketWriteEnabled()) { mock->ioctl_expected.gemUserptr++; additionalDestroyDeviceIoctls.gemClose++; additionalDestroyDeviceIoctls.gemWait++; } if (device->getDefaultEngine().commandStreamReceiver->getClearColorAllocation() != nullptr) { mock->ioctl_expected.gemUserptr++; additionalDestroyDeviceIoctls.gemClose++; additionalDestroyDeviceIoctls.gemWait++; } MockContext context(pClDevice); cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; cl_image_desc imageDesc = {}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 64u; imageDesc.image_height = 64u; auto data = alignedMalloc(64u * 64u * 4 * 8, MemoryConstants::pageSize); auto retVal = CL_SUCCESS; cl_mem_flags flags = CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); std::unique_ptr dstImage(Image::create( &context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, data, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, dstImage); auto imageGraphicsAllocation = dstImage->getGraphicsAllocation(rootDeviceIndex); ASSERT_NE(nullptr, imageGraphicsAllocation); EXPECT_TRUE(imageGraphicsAllocation->getDefaultGmm()->resourceParams.Usage == GMM_RESOURCE_USAGE_TYPE::GMM_RESOURCE_USAGE_OCL_IMAGE); DrmAllocation *drmAllocation = static_cast(imageGraphicsAllocation); auto imageSize = drmAllocation->getUnderlyingBufferSize(); auto rowPitch = dstImage->getImageDesc().image_row_pitch; EXPECT_EQ(1u, this->mock->createParamsHandle); EXPECT_EQ(imageSize, this->mock->createParamsSize); __u32 tilingMode = I915_TILING_Y; EXPECT_EQ(tilingMode, this->mock->setTilingMode); EXPECT_EQ(rowPitch, this->mock->setTilingStride); EXPECT_EQ(1u, this->mock->setTilingHandle); alignedFree(data); } TEST_F(ClDrmMemoryManagerTest, givenDrmMemoryManagerWhenMemoryAllocatedForImageThenUnmapSizeCorrectlySetWhenLimitedRangeAllocationUsedOrNotUsed) { mock->ioctl_expected.gemUserptr = 2; mock->ioctl_expected.gemWait = 2; mock->ioctl_expected.gemClose = 2; MockContext context(pClDevice); cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; cl_image_desc imageDesc = {}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_width = 64u; auto data = alignedMalloc(64u * 4 * 8, MemoryConstants::pageSize); auto retVal = CL_SUCCESS; cl_mem_flags flags = CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); std::unique_ptr dstImage(Image::create( &context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, data, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, dstImage); auto imageGraphicsAllocation = dstImage->getGraphicsAllocation(rootDeviceIndex); ASSERT_NE(nullptr, imageGraphicsAllocation); alignedFree(data); } TEST_F(ClDrmMemoryManagerTest, givenDrmMemoryManagerWhenNonTiledImgWithMipCountZeroisBeingCreatedThenAllocateGraphicsMemoryIsUsed) { mock->ioctl_expected.gemUserptr = 2; mock->ioctl_expected.gemWait = 2; mock->ioctl_expected.gemClose = 2; MockContext context(pClDevice); cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; cl_image_desc imageDesc = {}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_width = 64u; auto data = alignedMalloc(64u * 4 * 8, MemoryConstants::pageSize); auto retVal = CL_SUCCESS; this->mock->createParamsHandle = 0; this->mock->createParamsSize = 0; cl_mem_flags flags = CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); std::unique_ptr dstImage(Image::create( &context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, data, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, dstImage); auto imageGraphicsAllocation = dstImage->getGraphicsAllocation(rootDeviceIndex); ASSERT_NE(nullptr, imageGraphicsAllocation); EXPECT_TRUE(imageGraphicsAllocation->getDefaultGmm()->resourceParams.Usage == GMM_RESOURCE_USAGE_TYPE::GMM_RESOURCE_USAGE_OCL_IMAGE); EXPECT_EQ(0u, this->mock->createParamsHandle); EXPECT_EQ(0u, this->mock->createParamsSize); __u32 tilingMode = I915_TILING_NONE; EXPECT_EQ(tilingMode, this->mock->setTilingMode); EXPECT_EQ(0u, this->mock->setTilingStride); EXPECT_EQ(0u, this->mock->setTilingHandle); EXPECT_EQ(Sharing::nonSharedResource, imageGraphicsAllocation->peekSharedHandle()); alignedFree(data); } TEST_F(ClDrmMemoryManagerTest, givenDrmMemoryManagerWhenNonTiledImgWithMipCountNonZeroisBeingCreatedThenAllocateGraphicsMemoryIsUsed) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; MockContext context(pClDevice); cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; cl_image_desc imageDesc = {}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_width = 64u; imageDesc.num_mip_levels = 1u; auto data = alignedMalloc(64u * 4 * 8, MemoryConstants::pageSize); auto retVal = CL_SUCCESS; this->mock->createParamsHandle = 0; this->mock->createParamsSize = 0; cl_mem_flags flags = CL_MEM_WRITE_ONLY; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); std::unique_ptr dstImage(Image::create( &context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, data, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, dstImage); EXPECT_EQ(static_cast(imageDesc.num_mip_levels), dstImage->peekMipCount()); auto imageGraphicsAllocation = dstImage->getGraphicsAllocation(rootDeviceIndex); ASSERT_NE(nullptr, imageGraphicsAllocation); EXPECT_TRUE(imageGraphicsAllocation->getDefaultGmm()->resourceParams.Usage == GMM_RESOURCE_USAGE_TYPE::GMM_RESOURCE_USAGE_OCL_IMAGE); EXPECT_EQ(0u, this->mock->createParamsHandle); EXPECT_EQ(0u, this->mock->createParamsSize); __u32 tilingMode = I915_TILING_NONE; EXPECT_EQ(tilingMode, this->mock->setTilingMode); EXPECT_EQ(0u, this->mock->setTilingStride); EXPECT_EQ(0u, this->mock->setTilingHandle); EXPECT_EQ(Sharing::nonSharedResource, imageGraphicsAllocation->peekSharedHandle()); alignedFree(data); } TEST_F(ClDrmMemoryManagerTest, givenDrmMemoryManagerWhen1DarrayImageIsBeingCreatedFromHostPtrThenTilingIsNotCalled) { mock->ioctl_expected.gemUserptr = 2; mock->ioctl_expected.gemWait = 2; mock->ioctl_expected.gemClose = 2; MockContext context(pClDevice); cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; cl_image_desc imageDesc = {}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_width = 64u; auto data = alignedMalloc(64u * 4 * 8, MemoryConstants::pageSize); auto retVal = CL_SUCCESS; this->mock->createParamsHandle = 0; this->mock->createParamsSize = 0; cl_mem_flags flags = CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); std::unique_ptr dstImage(Image::create( &context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, data, retVal)); auto imageGraphicsAllocation = dstImage->getGraphicsAllocation(rootDeviceIndex); ASSERT_NE(nullptr, imageGraphicsAllocation); EXPECT_EQ(0u, this->mock->createParamsHandle); EXPECT_EQ(0u, this->mock->createParamsSize); __u32 tilingMode = I915_TILING_NONE; EXPECT_EQ(tilingMode, this->mock->setTilingMode); EXPECT_EQ(0u, this->mock->setTilingStride); EXPECT_EQ(0u, this->mock->setTilingHandle); EXPECT_EQ(Sharing::nonSharedResource, imageGraphicsAllocation->peekSharedHandle()); alignedFree(data); } TEST_F(ClDrmMemoryManagerTest, givenHostPointerNotRequiringCopyWhenAllocateGraphicsMemoryForImageIsCalledThenGraphicsAllocationIsReturned) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; ImageDescriptor imgDesc = {}; imgDesc.imageType = ImageType::Image1D; imgDesc.imageWidth = MemoryConstants::pageSize; imgDesc.imageHeight = 1; cl_image_format imageFormat = {}; imageFormat.image_channel_data_type = CL_UNSIGNED_INT8; imageFormat.image_channel_order = CL_R; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; MockContext context(pClDevice); auto surfaceFormat = &Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features)->surfaceFormat; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, surfaceFormat); imgInfo.rowPitch = imgDesc.imageWidth * surfaceFormat->ImageElementSizeInBytes; imgInfo.slicePitch = imgInfo.rowPitch * imgDesc.imageHeight; imgInfo.size = imgInfo.slicePitch; imgInfo.linearStorage = true; auto hostPtr = alignedMalloc(imgDesc.imageWidth * imgDesc.imageHeight * 4, MemoryConstants::pageSize); bool copyRequired = MockMemoryManager::isCopyRequired(imgInfo, hostPtr); EXPECT_FALSE(copyRequired); AllocationData allocationData; allocationData.imgInfo = &imgInfo; allocationData.hostPtr = hostPtr; allocationData.rootDeviceIndex = rootDeviceIndex; auto imageAllocation = memoryManager->allocateGraphicsMemoryForImage(allocationData); ASSERT_NE(nullptr, imageAllocation); EXPECT_EQ(hostPtr, imageAllocation->getUnderlyingBuffer()); memoryManager->freeGraphicsMemory(imageAllocation); alignedFree(hostPtr); } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerAndOsHandleWhenCreateIsCalledThenGraphicsAllocationIsReturned) { mock->ioctl_expected.primeFdToHandle = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; osHandle handle = 1u; this->mock->outputHandle = 2u; size_t size = 4096u; AllocationProperties properties(rootDeviceIndex, false, size, AllocationType::SHARED_BUFFER, false, {}); auto graphicsAllocation = memoryManager->createGraphicsAllocationFromSharedHandle(handle, properties, false, false); ASSERT_NE(nullptr, graphicsAllocation); EXPECT_NE(nullptr, graphicsAllocation->getUnderlyingBuffer()); EXPECT_EQ(size, graphicsAllocation->getUnderlyingBufferSize()); EXPECT_EQ(this->mock->inputFd, (int)handle); EXPECT_EQ(this->mock->setTilingHandle, 0u); DrmAllocation *drmAllocation = static_cast(graphicsAllocation); auto bo = drmAllocation->getBO(); EXPECT_EQ(bo->peekHandle(), (int)this->mock->outputHandle); EXPECT_NE(0llu, bo->peekAddress()); EXPECT_EQ(1u, bo->getRefCount()); EXPECT_EQ(size, bo->peekSize()); EXPECT_EQ(handle, graphicsAllocation->peekSharedHandle()); memoryManager->freeGraphicsMemory(graphicsAllocation); } TEST_F(DrmMemoryManagerWithLocalMemoryTest, givenDrmMemoryManagerWithLocalMemoryWhenCreateGraphicsAllocationFromSharedHandleIsCalledThenAcquireGpuAddressFromStandardHeap64KB) { mock->ioctl_expected.primeFdToHandle = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; osHandle handle = 1u; this->mock->outputHandle = 2u; size_t size = 4096u; AllocationProperties properties(rootDeviceIndex, false, size, AllocationType::SHARED_BUFFER, false, {}); auto graphicsAllocation = memoryManager->createGraphicsAllocationFromSharedHandle(handle, properties, false, false); ASSERT_NE(nullptr, graphicsAllocation); EXPECT_NE(nullptr, graphicsAllocation->getUnderlyingBuffer()); EXPECT_EQ(size, graphicsAllocation->getUnderlyingBufferSize()); EXPECT_EQ(MemoryPool::SystemCpuInaccessible, graphicsAllocation->getMemoryPool()); EXPECT_EQ(this->mock->inputFd, static_cast(handle)); auto gpuAddress = graphicsAllocation->getGpuAddress(); EXPECT_LT(GmmHelper::canonize(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapBase(HeapIndex::HEAP_STANDARD2MB)), gpuAddress); EXPECT_GT(GmmHelper::canonize(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapLimit(HeapIndex::HEAP_STANDARD2MB)), gpuAddress); DrmAllocation *drmAllocation = static_cast(graphicsAllocation); auto bo = drmAllocation->getBO(); EXPECT_EQ(this->mock->outputHandle, static_cast(bo->peekHandle())); EXPECT_EQ(gpuAddress, bo->peekAddress()); EXPECT_EQ(size, bo->peekSize()); EXPECT_EQ(alignUp(size, 2 * MemoryConstants::megaByte), bo->peekUnmapSize()); EXPECT_EQ(handle, graphicsAllocation->peekSharedHandle()); memoryManager->freeGraphicsMemory(graphicsAllocation); } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerAndOsHandleWhenCreateIsCalledAndRootDeviceIndexIsSpecifiedThenGraphicsAllocationIsReturnedWithCorrectRootDeviceIndex) { mock->ioctl_expected.primeFdToHandle = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; osHandle handle = 1u; this->mock->outputHandle = 2u; size_t size = 4096u; AllocationProperties properties(rootDeviceIndex, false, size, AllocationType::SHARED_BUFFER, false, false, 0u); ASSERT_TRUE(properties.subDevicesBitfield.none()); auto graphicsAllocation = memoryManager->createGraphicsAllocationFromSharedHandle(handle, properties, false, false); ASSERT_NE(nullptr, graphicsAllocation); EXPECT_EQ(rootDeviceIndex, graphicsAllocation->getRootDeviceIndex()); EXPECT_NE(nullptr, graphicsAllocation->getUnderlyingBuffer()); EXPECT_EQ(size, graphicsAllocation->getUnderlyingBufferSize()); EXPECT_EQ(this->mock->inputFd, (int)handle); EXPECT_EQ(this->mock->setTilingHandle, 0u); DrmAllocation *drmAllocation = static_cast(graphicsAllocation); auto bo = drmAllocation->getBO(); EXPECT_EQ(bo->peekHandle(), (int)this->mock->outputHandle); EXPECT_NE(0llu, bo->peekAddress()); EXPECT_EQ(1u, bo->getRefCount()); EXPECT_EQ(size, bo->peekSize()); EXPECT_EQ(handle, graphicsAllocation->peekSharedHandle()); memoryManager->freeGraphicsMemory(graphicsAllocation); } TEST_F(ClDrmMemoryManagerTest, givenOsHandleWithNonTiledObjectWhenCreateFromSharedHandleIsCalledThenNonTiledGmmIsCreatedAndSetInAllocation) { mock->ioctl_expected.primeFdToHandle = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; mock->ioctl_expected.gemGetTiling = 1; mock->getTilingModeOut = I915_TILING_NONE; osHandle handle = 1u; uint32_t boHandle = 2u; mock->outputHandle = boHandle; cl_mem_flags flags = CL_MEM_READ_ONLY; cl_image_desc imgDesc = {}; cl_image_format gmmImgFormat = {CL_NV12_INTEL, CL_UNORM_INT8}; const ClSurfaceFormatInfo *gmmSurfaceFormat = nullptr; ImageInfo imgInfo = {}; imgDesc.image_width = 4; imgDesc.image_height = 4; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imgInfo.imgDesc = Image::convertDescriptor(imgDesc); MockContext context(pClDevice); gmmSurfaceFormat = Image::getSurfaceFormatFromTable(flags, &gmmImgFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); imgInfo.surfaceFormat = &gmmSurfaceFormat->surfaceFormat; imgInfo.plane = GMM_PLANE_Y; AllocationProperties properties(rootDeviceIndex, false, imgInfo, AllocationType::SHARED_IMAGE, context.getDevice(0)->getDeviceBitfield()); auto graphicsAllocation = memoryManager->createGraphicsAllocationFromSharedHandle(handle, properties, false, false); ASSERT_NE(nullptr, graphicsAllocation); EXPECT_EQ(boHandle, mock->getTilingHandleIn); EXPECT_EQ(AllocationType::SHARED_IMAGE, graphicsAllocation->getAllocationType()); auto gmm = graphicsAllocation->getDefaultGmm(); ASSERT_NE(nullptr, gmm); EXPECT_EQ(1u, gmm->resourceParams.Flags.Info.Linear); EXPECT_EQ(0u, gmm->resourceParams.Flags.Info.TiledY); memoryManager->freeGraphicsMemory(graphicsAllocation); } TEST_F(ClDrmMemoryManagerTest, givenOsHandleWithTileYObjectWhenCreateFromSharedHandleIsCalledThenTileYGmmIsCreatedAndSetInAllocation) { mock->ioctl_expected.primeFdToHandle = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; mock->ioctl_expected.gemGetTiling = 1; mock->getTilingModeOut = I915_TILING_Y; osHandle handle = 1u; uint32_t boHandle = 2u; mock->outputHandle = boHandle; cl_mem_flags flags = CL_MEM_READ_ONLY; cl_image_desc imgDesc = {}; cl_image_format gmmImgFormat = {CL_NV12_INTEL, CL_UNORM_INT8}; const ClSurfaceFormatInfo *gmmSurfaceFormat = nullptr; ImageInfo imgInfo = {}; imgDesc.image_width = 4; imgDesc.image_height = 4; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imgInfo.imgDesc = Image::convertDescriptor(imgDesc); MockContext context(pClDevice); gmmSurfaceFormat = Image::getSurfaceFormatFromTable(flags, &gmmImgFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); imgInfo.surfaceFormat = &gmmSurfaceFormat->surfaceFormat; imgInfo.plane = GMM_PLANE_Y; AllocationProperties properties(rootDeviceIndex, false, imgInfo, AllocationType::SHARED_IMAGE, context.getDevice(0)->getDeviceBitfield()); auto graphicsAllocation = memoryManager->createGraphicsAllocationFromSharedHandle(handle, properties, false, false); ASSERT_NE(nullptr, graphicsAllocation); EXPECT_EQ(boHandle, mock->getTilingHandleIn); EXPECT_EQ(AllocationType::SHARED_IMAGE, graphicsAllocation->getAllocationType()); auto gmm = graphicsAllocation->getDefaultGmm(); ASSERT_NE(nullptr, gmm); EXPECT_EQ(0u, gmm->resourceParams.Flags.Info.Linear); memoryManager->freeGraphicsMemory(graphicsAllocation); } TEST_F(ClDrmMemoryManagerTest, givenDrmMemoryManagerWhenCreateFromSharedHandleFailsToCallGetTilingThenNonLinearStorageIsAssumed) { mock->ioctl_expected.primeFdToHandle = 1; mock->ioctl_expected.gemGetTiling = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; this->ioctlResExt = {mock->ioctl_cnt.total + 1, -1}; mock->ioctl_res_ext = &ioctlResExt; osHandle handle = 1u; uint32_t boHandle = 2u; mock->outputHandle = boHandle; cl_mem_flags flags = CL_MEM_READ_ONLY; cl_image_desc imgDesc = {}; cl_image_format gmmImgFormat = {CL_NV12_INTEL, CL_UNORM_INT8}; const ClSurfaceFormatInfo *gmmSurfaceFormat = nullptr; ImageInfo imgInfo = {}; imgDesc.image_width = 4; imgDesc.image_height = 4; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imgInfo.imgDesc = Image::convertDescriptor(imgDesc); MockContext context(pClDevice); gmmSurfaceFormat = Image::getSurfaceFormatFromTable(flags, &gmmImgFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); imgInfo.surfaceFormat = &gmmSurfaceFormat->surfaceFormat; imgInfo.plane = GMM_PLANE_Y; AllocationProperties properties(rootDeviceIndex, false, imgInfo, AllocationType::SHARED_IMAGE, context.getDevice(0)->getDeviceBitfield()); auto graphicsAllocation = memoryManager->createGraphicsAllocationFromSharedHandle(handle, properties, false, false); ASSERT_NE(nullptr, graphicsAllocation); EXPECT_EQ(boHandle, mock->getTilingHandleIn); EXPECT_EQ(AllocationType::SHARED_IMAGE, graphicsAllocation->getAllocationType()); auto gmm = graphicsAllocation->getDefaultGmm(); ASSERT_NE(nullptr, gmm); EXPECT_EQ(0u, gmm->resourceParams.Flags.Info.Linear); memoryManager->freeGraphicsMemory(graphicsAllocation); } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerAndOsHandleWhenAllocationFailsThenReturnNullPtr) { osHandle handle = 1u; InjectedFunction method = [this, &handle](size_t failureIndex) { AllocationProperties properties(rootDeviceIndex, false, MemoryConstants::pageSize, AllocationType::SHARED_BUFFER, false, mockDeviceBitfield); auto graphicsAllocation = memoryManager->createGraphicsAllocationFromSharedHandle(handle, properties, false, false); if (MemoryManagement::nonfailingAllocation == failureIndex) { EXPECT_NE(nullptr, graphicsAllocation); memoryManager->freeGraphicsMemory(graphicsAllocation); } else { EXPECT_EQ(nullptr, graphicsAllocation); } }; injectFailures(method); mock->reset(); } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerAndThreeOsHandlesWhenReuseCreatesAreCalledThenGraphicsAllocationsAreReturned) { mock->ioctl_expected.primeFdToHandle = 3; mock->ioctl_expected.gemWait = 3; mock->ioctl_expected.gemClose = 2; osHandle handles[] = {1u, 2u, 3u}; size_t size = 4096u; GraphicsAllocation *graphicsAllocations[3]; DrmAllocation *drmAllocation; BufferObject *bo; unsigned int expectedRefCount; this->mock->outputHandle = 2u; for (unsigned int i = 0; i < 3; ++i) { expectedRefCount = i < 2 ? i + 1 : 1; if (i == 2) this->mock->outputHandle = 3u; AllocationProperties properties(rootDeviceIndex, false, MemoryConstants::pageSize, AllocationType::SHARED_BUFFER, false, mockDeviceBitfield); graphicsAllocations[i] = memoryManager->createGraphicsAllocationFromSharedHandle(handles[i], properties, false, false); // Clang-tidy false positive WA if (graphicsAllocations[i] == nullptr) { ASSERT_FALSE(true); continue; } ASSERT_NE(nullptr, graphicsAllocations[i]); EXPECT_NE(nullptr, graphicsAllocations[i]->getUnderlyingBuffer()); EXPECT_EQ(size, graphicsAllocations[i]->getUnderlyingBufferSize()); EXPECT_EQ(this->mock->inputFd, (int)handles[i]); EXPECT_EQ(this->mock->setTilingHandle, 0u); drmAllocation = static_cast(graphicsAllocations[i]); bo = drmAllocation->getBO(); EXPECT_EQ(bo->peekHandle(), (int)this->mock->outputHandle); EXPECT_NE(0llu, bo->peekAddress()); EXPECT_EQ(expectedRefCount, bo->getRefCount()); EXPECT_EQ(size, bo->peekSize()); EXPECT_EQ(handles[i], graphicsAllocations[i]->peekSharedHandle()); } for (const auto &it : graphicsAllocations) { // Clang-tidy false positive WA if (it != nullptr) memoryManager->freeGraphicsMemory(it); } } TEST_F(DrmMemoryManagerTest, given32BitAddressingWhenBufferFromSharedHandleAndBitnessRequiredIsCreatedThenItis32BitAllocation) { mock->ioctl_expected.primeFdToHandle = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; memoryManager->setForce32BitAllocations(true); osHandle handle = 1u; this->mock->outputHandle = 2u; AllocationProperties properties(rootDeviceIndex, false, MemoryConstants::pageSize, AllocationType::SHARED_BUFFER, false, mockDeviceBitfield); auto graphicsAllocation = memoryManager->createGraphicsAllocationFromSharedHandle(handle, properties, true, false); auto drmAllocation = static_cast(graphicsAllocation); EXPECT_TRUE(graphicsAllocation->is32BitAllocation()); EXPECT_EQ(1, lseekCalledCount); EXPECT_EQ(GmmHelper::canonize(memoryManager->getExternalHeapBaseAddress(graphicsAllocation->getRootDeviceIndex(), drmAllocation->isAllocatedInLocalMemoryPool())), drmAllocation->getGpuBaseAddress()); memoryManager->freeGraphicsMemory(graphicsAllocation); } TEST_F(DrmMemoryManagerTest, given32BitAddressingWhenBufferFromSharedHandleIsCreatedAndDoesntRequireBitnessThenItIsNot32BitAllocation) { mock->ioctl_expected.primeFdToHandle = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; memoryManager->setForce32BitAllocations(true); osHandle handle = 1u; this->mock->outputHandle = 2u; AllocationProperties properties(rootDeviceIndex, false, MemoryConstants::pageSize, AllocationType::SHARED_BUFFER, false, mockDeviceBitfield); auto graphicsAllocation = memoryManager->createGraphicsAllocationFromSharedHandle(handle, properties, false, false); auto drmAllocation = static_cast(graphicsAllocation); EXPECT_FALSE(graphicsAllocation->is32BitAllocation()); EXPECT_EQ(1, lseekCalledCount); EXPECT_EQ(0llu, drmAllocation->getGpuBaseAddress()); memoryManager->freeGraphicsMemory(graphicsAllocation); } TEST_F(DrmMemoryManagerTest, givenLimitedRangeAllocatorWhenBufferFromSharedHandleIsCreatedThenItIsLimitedRangeAllocation) { mock->ioctl_expected.primeFdToHandle = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; memoryManager->forceLimitedRangeAllocator(0xFFFFFFFFF); osHandle handle = 1u; this->mock->outputHandle = 2u; AllocationProperties properties(rootDeviceIndex, false, MemoryConstants::pageSize, AllocationType::SHARED_BUFFER, false, mockDeviceBitfield); auto graphicsAllocation = memoryManager->createGraphicsAllocationFromSharedHandle(handle, properties, false, false); EXPECT_FALSE(graphicsAllocation->is32BitAllocation()); auto drmAllocation = static_cast(graphicsAllocation); EXPECT_EQ(0llu, drmAllocation->getGpuBaseAddress()); EXPECT_EQ(1, lseekCalledCount); memoryManager->freeGraphicsMemory(graphicsAllocation); } TEST_F(DrmMemoryManagerTest, givenNon32BitAddressingWhenBufferFromSharedHandleIsCreatedAndDRequireBitnessThenItIsNot32BitAllocation) { mock->ioctl_expected.primeFdToHandle = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; memoryManager->setForce32BitAllocations(false); osHandle handle = 1u; this->mock->outputHandle = 2u; AllocationProperties properties(rootDeviceIndex, false, MemoryConstants::pageSize, AllocationType::SHARED_BUFFER, false, mockDeviceBitfield); auto graphicsAllocation = memoryManager->createGraphicsAllocationFromSharedHandle(handle, properties, true, false); auto drmAllocation = static_cast(graphicsAllocation); EXPECT_FALSE(graphicsAllocation->is32BitAllocation()); EXPECT_EQ(1, lseekCalledCount); EXPECT_EQ(0llu, drmAllocation->getGpuBaseAddress()); memoryManager->freeGraphicsMemory(graphicsAllocation); } TEST_F(DrmMemoryManagerTest, givenSharedHandleWhenAllocationIsCreatedAndIoctlPrimeFdToHandleFailsThenNullPtrIsReturned) { mock->ioctl_expected.primeFdToHandle = 1; this->ioctlResExt = {mock->ioctl_cnt.total, -1}; mock->ioctl_res_ext = &this->ioctlResExt; osHandle handle = 1u; this->mock->outputHandle = 2u; AllocationProperties properties(rootDeviceIndex, false, MemoryConstants::pageSize, AllocationType::SHARED_BUFFER, false, mockDeviceBitfield); auto graphicsAllocation = memoryManager->createGraphicsAllocationFromSharedHandle(handle, properties, false, false); EXPECT_EQ(nullptr, graphicsAllocation); memoryManager->freeGraphicsMemory(graphicsAllocation); } TEST_F(DrmMemoryManagerTest, givenTwoGraphicsAllocationsThatShareTheSameBufferObjectWhenTheyAreMadeResidentThenOnlyOneBoIsPassedToExec) { auto testedCsr = static_cast *>(device->getDefaultEngine().commandStreamReceiver); mock->ioctl_expected.primeFdToHandle = 2; mock->ioctl_expected.gemClose = 1; mock->ioctl_expected.gemWait = 2; osHandle sharedHandle = 1u; AllocationProperties properties(rootDeviceIndex, false, MemoryConstants::pageSize, AllocationType::SHARED_BUFFER, false, mockDeviceBitfield); auto graphicsAllocation = memoryManager->createGraphicsAllocationFromSharedHandle(sharedHandle, properties, false, false); auto graphicsAllocation2 = memoryManager->createGraphicsAllocationFromSharedHandle(sharedHandle, properties, false, false); testedCsr->makeResident(*graphicsAllocation); testedCsr->makeResident(*graphicsAllocation2); EXPECT_EQ(2u, testedCsr->getResidencyAllocations().size()); testedCsr->processResidency(testedCsr->getResidencyAllocations(), 0u); EXPECT_EQ(1u, testedCsr->residency.size()); memoryManager->freeGraphicsMemory(graphicsAllocation); memoryManager->freeGraphicsMemory(graphicsAllocation2); } TEST_F(DrmMemoryManagerTest, givenTwoGraphicsAllocationsThatDoesnShareTheSameBufferObjectWhenTheyAreMadeResidentThenTwoBoIsPassedToExec) { auto testedCsr = static_cast *>(device->getDefaultEngine().commandStreamReceiver); mock->ioctl_expected.primeFdToHandle = 2; mock->ioctl_expected.gemClose = 2; mock->ioctl_expected.gemWait = 2; osHandle sharedHandle = 1u; AllocationProperties properties(rootDeviceIndex, false, MemoryConstants::pageSize, AllocationType::SHARED_BUFFER, false, {}); auto graphicsAllocation = memoryManager->createGraphicsAllocationFromSharedHandle(sharedHandle, properties, false, false); mock->outputHandle++; auto graphicsAllocation2 = memoryManager->createGraphicsAllocationFromSharedHandle(sharedHandle, properties, false, false); testedCsr->makeResident(*graphicsAllocation); testedCsr->makeResident(*graphicsAllocation2); EXPECT_EQ(2u, testedCsr->getResidencyAllocations().size()); testedCsr->processResidency(testedCsr->getResidencyAllocations(), 0u); EXPECT_EQ(2u, testedCsr->residency.size()); memoryManager->freeGraphicsMemory(graphicsAllocation); memoryManager->freeGraphicsMemory(graphicsAllocation2); } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenDrmMemoryManagerWhenCreateAllocationFromNtHandleIsCalledThenReturnNullptr) { auto graphicsAllocation = memoryManager->createGraphicsAllocationFromNTHandle(reinterpret_cast(1), 0, AllocationType::SHARED_IMAGE); EXPECT_EQ(nullptr, graphicsAllocation); } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenLockUnlockIsCalledThenReturnPtr) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemSetDomain = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, MemoryConstants::pageSize}); ASSERT_NE(nullptr, allocation); auto ptr = memoryManager->lockResource(allocation); EXPECT_NE(nullptr, ptr); memoryManager->unlockResource(allocation); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenLockUnlockIsCalledOnAllocationWithCpuPtrThenReturnCpuPtrAndSetCpuDomain) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemSetDomain = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, MemoryConstants::pageSize}); ASSERT_NE(nullptr, allocation); EXPECT_NE(nullptr, allocation->getUnderlyingBuffer()); auto ptr = memoryManager->lockResource(allocation); EXPECT_EQ(allocation->getUnderlyingBuffer(), ptr); // check DRM_IOCTL_I915_GEM_SET_DOMAIN input params auto drmAllocation = static_cast(allocation); EXPECT_EQ((uint32_t)drmAllocation->getBO()->peekHandle(), mock->setDomainHandle); EXPECT_EQ((uint32_t)I915_GEM_DOMAIN_CPU, mock->setDomainReadDomains); EXPECT_EQ(0u, mock->setDomainWriteDomain); memoryManager->unlockResource(allocation); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenLockUnlockIsCalledOnAllocationWithoutCpuPtrThenReturnLockedPtrAndSetCpuDomain) { mock->ioctl_expected.gemCreate = 1; mock->ioctl_expected.gemMmap = 1; mock->ioctl_expected.gemSetDomain = 1; mock->ioctl_expected.gemSetTiling = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; ImageDescriptor imgDesc = {}; imgDesc.imageType = ImageType::Image2D; imgDesc.imageWidth = 512; imgDesc.imageHeight = 512; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); imgInfo.size = 4096u; imgInfo.rowPitch = 512u; AllocationData allocationData; allocationData.imgInfo = &imgInfo; allocationData.rootDeviceIndex = rootDeviceIndex; auto allocation = memoryManager->allocateGraphicsMemoryForImage(allocationData); ASSERT_NE(nullptr, allocation); EXPECT_EQ(nullptr, allocation->getUnderlyingBuffer()); auto ptr = memoryManager->lockResource(allocation); EXPECT_NE(nullptr, ptr); auto drmAllocation = static_cast(allocation); EXPECT_NE(nullptr, drmAllocation->getBO()->peekLockedAddress()); // check DRM_IOCTL_I915_GEM_MMAP input params EXPECT_EQ((uint32_t)drmAllocation->getBO()->peekHandle(), mock->mmapHandle); EXPECT_EQ(0u, mock->mmapPad); EXPECT_EQ(0u, mock->mmapOffset); EXPECT_EQ(drmAllocation->getBO()->peekSize(), mock->mmapSize); EXPECT_EQ(0u, mock->mmapFlags); // check DRM_IOCTL_I915_GEM_SET_DOMAIN input params EXPECT_EQ((uint32_t)drmAllocation->getBO()->peekHandle(), mock->setDomainHandle); EXPECT_EQ((uint32_t)I915_GEM_DOMAIN_CPU, mock->setDomainReadDomains); EXPECT_EQ(0u, mock->setDomainWriteDomain); memoryManager->unlockResource(allocation); EXPECT_EQ(nullptr, drmAllocation->getBO()->peekLockedAddress()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenLockUnlockIsCalledOnNullAllocationThenReturnNullPtr) { GraphicsAllocation *allocation = nullptr; auto ptr = memoryManager->lockResource(allocation); EXPECT_EQ(nullptr, ptr); memoryManager->unlockResource(allocation); } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenLockUnlockIsCalledOnAllocationWithoutBufferObjectThenReturnNullPtr) { DrmAllocation drmAllocation(rootDeviceIndex, AllocationType::UNKNOWN, nullptr, nullptr, 0, static_cast(0u), MemoryPool::MemoryNull); EXPECT_EQ(nullptr, drmAllocation.getBO()); auto ptr = memoryManager->lockResource(&drmAllocation); EXPECT_EQ(nullptr, ptr); memoryManager->unlockResource(&drmAllocation); } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenLockUnlockIsCalledButFailsOnIoctlMmapThenReturnNullPtr) { mock->ioctl_expected.gemMmap = 1; this->ioctlResExt = {mock->ioctl_cnt.total, -1}; mock->ioctl_res_ext = &ioctlResExt; DrmMockCustom drmMock(*executionEnvironment->rootDeviceEnvironments[0]); struct BufferObjectMock : public BufferObject { BufferObjectMock(Drm *drm) : BufferObject(drm, 1, 0, 1) {} }; BufferObjectMock bo(&drmMock); DrmAllocation drmAllocation(rootDeviceIndex, AllocationType::UNKNOWN, &bo, nullptr, 0u, static_cast(0u), MemoryPool::MemoryNull); EXPECT_NE(nullptr, drmAllocation.getBO()); auto ptr = memoryManager->lockResource(&drmAllocation); EXPECT_EQ(nullptr, ptr); memoryManager->unlockResource(&drmAllocation); mock->ioctl_res_ext = &mock->NONE; } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenUnlockResourceIsCalledOnAllocationInLocalMemoryThenRedirectToUnlockResourceInLocalMemory) { struct DrmMemoryManagerToTestUnlockResource : public DrmMemoryManager { using DrmMemoryManager::unlockResourceImpl; DrmMemoryManagerToTestUnlockResource(ExecutionEnvironment &executionEnvironment, bool localMemoryEnabled, size_t lockableLocalMemorySize) : DrmMemoryManager(gemCloseWorkerMode::gemCloseWorkerInactive, false, false, executionEnvironment) { } void unlockResourceInLocalMemoryImpl(BufferObject *bo) override { unlockResourceInLocalMemoryImplParam.bo = bo; unlockResourceInLocalMemoryImplParam.called = true; } struct unlockResourceInLocalMemoryImplParamType { BufferObject *bo = nullptr; bool called = false; } unlockResourceInLocalMemoryImplParam; }; DrmMemoryManagerToTestUnlockResource drmMemoryManager(*executionEnvironment, true, MemoryConstants::pageSize); DrmMockCustom drmMock(*executionEnvironment->rootDeviceEnvironments[0]); struct BufferObjectMock : public BufferObject { BufferObjectMock(Drm *drm) : BufferObject(drm, 1, 0, 1) {} }; auto bo = new BufferObjectMock(&drmMock); auto drmAllocation = new DrmAllocation(rootDeviceIndex, AllocationType::UNKNOWN, bo, nullptr, 0u, static_cast(0u), MemoryPool::LocalMemory); drmMemoryManager.unlockResourceImpl(*drmAllocation); EXPECT_TRUE(drmMemoryManager.unlockResourceInLocalMemoryImplParam.called); EXPECT_EQ(bo, drmMemoryManager.unlockResourceInLocalMemoryImplParam.bo); drmMemoryManager.freeGraphicsMemory(drmAllocation); } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenSetDomainCpuIsCalledOnAllocationWithoutBufferObjectThenReturnFalse) { DrmAllocation drmAllocation(rootDeviceIndex, AllocationType::UNKNOWN, nullptr, nullptr, 0, static_cast(0u), MemoryPool::MemoryNull); EXPECT_EQ(nullptr, drmAllocation.getBO()); auto success = memoryManager->setDomainCpu(drmAllocation, false); EXPECT_FALSE(success); } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenSetDomainCpuIsCalledButFailsOnIoctlSetDomainThenReturnFalse) { mock->ioctl_expected.gemSetDomain = 1; this->ioctlResExt = {mock->ioctl_cnt.total, -1}; mock->ioctl_res_ext = &ioctlResExt; DrmMockCustom drmMock(*executionEnvironment->rootDeviceEnvironments[0]); struct BufferObjectMock : public BufferObject { BufferObjectMock(Drm *drm) : BufferObject(drm, 1, 0, 1) {} }; BufferObjectMock bo(&drmMock); DrmAllocation drmAllocation(rootDeviceIndex, AllocationType::UNKNOWN, &bo, nullptr, 0u, static_cast(0u), MemoryPool::MemoryNull); EXPECT_NE(nullptr, drmAllocation.getBO()); auto success = memoryManager->setDomainCpu(drmAllocation, false); EXPECT_FALSE(success); mock->ioctl_res_ext = &mock->NONE; } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenSetDomainCpuIsCalledOnAllocationThenReturnSetWriteDomain) { mock->ioctl_expected.gemSetDomain = 1; DrmMockCustom drmMock(*executionEnvironment->rootDeviceEnvironments[0]); struct BufferObjectMock : public BufferObject { BufferObjectMock(Drm *drm) : BufferObject(drm, 1, 0, 1) {} }; BufferObjectMock bo(&drmMock); DrmAllocation drmAllocation(rootDeviceIndex, AllocationType::UNKNOWN, &bo, nullptr, 0u, static_cast(0u), MemoryPool::MemoryNull); EXPECT_NE(nullptr, drmAllocation.getBO()); auto success = memoryManager->setDomainCpu(drmAllocation, true); EXPECT_TRUE(success); // check DRM_IOCTL_I915_GEM_SET_DOMAIN input params EXPECT_EQ((uint32_t)drmAllocation.getBO()->peekHandle(), mock->setDomainHandle); EXPECT_EQ((uint32_t)I915_GEM_DOMAIN_CPU, mock->setDomainReadDomains); EXPECT_EQ((uint32_t)I915_GEM_DOMAIN_CPU, mock->setDomainWriteDomain); } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerAndUnifiedAuxCapableAllocationWhenMappingThenReturnFalse) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; auto gmm = new Gmm(rootDeviceEnvironment->getGmmClientContext(), nullptr, 123, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, MemoryConstants::pageSize}); allocation->setDefaultGmm(gmm); auto mockGmmRes = static_cast(gmm->gmmResourceInfo.get()); mockGmmRes->setUnifiedAuxTranslationCapable(); EXPECT_FALSE(memoryManager->mapAuxGpuVA(allocation)); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerTest, given32BitAllocatorWithHeapAllocatorWhenLargerFragmentIsReusedThenOnlyUnmapSizeIsLargerWhileSizeStaysTheSame) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; DebugManagerStateRestore dbgFlagsKeeper; memoryManager->setForce32BitAllocations(true); size_t allocationSize = 4 * MemoryConstants::pageSize; auto ptr = memoryManager->getGfxPartition(rootDeviceIndex)->heapAllocate(HeapIndex::HEAP_EXTERNAL, allocationSize); size_t smallAllocationSize = MemoryConstants::pageSize; memoryManager->getGfxPartition(rootDeviceIndex)->heapAllocate(HeapIndex::HEAP_EXTERNAL, smallAllocationSize); // now free first allocation , this will move it to chunks memoryManager->getGfxPartition(rootDeviceIndex)->heapFree(HeapIndex::HEAP_EXTERNAL, ptr, allocationSize); // now ask for 3 pages, this will give ptr from chunks size_t pages3size = 3 * MemoryConstants::pageSize; void *host_ptr = reinterpret_cast(0x1000); DrmAllocation *graphicsAlloaction = memoryManager->allocate32BitGraphicsMemory(rootDeviceIndex, pages3size, host_ptr, AllocationType::BUFFER); auto bo = graphicsAlloaction->getBO(); EXPECT_EQ(pages3size, bo->peekSize()); EXPECT_EQ(GmmHelper::canonize(ptr), graphicsAlloaction->getGpuAddress()); memoryManager->freeGraphicsMemory(graphicsAlloaction); } TEST_F(DrmMemoryManagerTest, givenSharedAllocationWithSmallerThenRealSizeWhenCreateIsCalledThenRealSizeIsUsed) { unsigned int realSize = 64 * 1024; lseekReturn = realSize; mock->ioctl_expected.primeFdToHandle = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; osHandle sharedHandle = 1u; AllocationProperties properties(rootDeviceIndex, false, MemoryConstants::pageSize, AllocationType::SHARED_BUFFER, false, {}); auto graphicsAllocation = memoryManager->createGraphicsAllocationFromSharedHandle(sharedHandle, properties, false, false); EXPECT_NE(nullptr, graphicsAllocation->getUnderlyingBuffer()); EXPECT_EQ(realSize, graphicsAllocation->getUnderlyingBufferSize()); EXPECT_EQ(this->mock->inputFd, (int)sharedHandle); DrmAllocation *drmAllocation = static_cast(graphicsAllocation); auto bo = drmAllocation->getBO(); EXPECT_EQ(bo->peekHandle(), (int)this->mock->outputHandle); EXPECT_NE(0llu, bo->peekAddress()); EXPECT_EQ(1u, bo->getRefCount()); EXPECT_EQ(realSize, bo->peekSize()); EXPECT_EQ(1, lseekCalledCount); memoryManager->freeGraphicsMemory(graphicsAllocation); } TEST_F(DrmMemoryManagerTest, givenMemoryManagerSupportingVirutalPaddingWhenItIsRequiredThenNewGraphicsAllocationIsCreated) { mock->ioctl_expected.gemUserptr = 2; mock->ioctl_expected.gemWait = 2; mock->ioctl_expected.gemClose = 2; // first let's create normal buffer auto bufferSize = MemoryConstants::pageSize; auto buffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, bufferSize}); // buffer should have size 16 EXPECT_EQ(bufferSize, buffer->getUnderlyingBufferSize()); auto bufferWithPaddingSize = 8192u; auto paddedAllocation = memoryManager->createGraphicsAllocationWithPadding(buffer, 8192u); EXPECT_NE(nullptr, paddedAllocation); EXPECT_NE(0u, paddedAllocation->getGpuAddress()); EXPECT_NE(0u, paddedAllocation->getGpuAddressToPatch()); EXPECT_NE(buffer->getGpuAddress(), paddedAllocation->getGpuAddress()); EXPECT_NE(buffer->getGpuAddressToPatch(), paddedAllocation->getGpuAddressToPatch()); EXPECT_EQ(buffer->getUnderlyingBuffer(), paddedAllocation->getUnderlyingBuffer()); EXPECT_EQ(bufferWithPaddingSize, paddedAllocation->getUnderlyingBufferSize()); EXPECT_FALSE(paddedAllocation->isCoherent()); EXPECT_EQ(0u, paddedAllocation->fragmentsStorage.fragmentCount); auto bufferbo = static_cast(buffer)->getBO(); auto bo = static_cast(paddedAllocation)->getBO(); EXPECT_NE(nullptr, bo); EXPECT_NE(bufferbo->peekHandle(), bo->peekHandle()); memoryManager->freeGraphicsMemory(paddedAllocation); memoryManager->freeGraphicsMemory(buffer); } TEST_F(DrmMemoryManagerTest, givenMemoryManagerWhenAskedForInternalAllocationWithNoPointerThenAllocationFromInternalHeapIsReturned) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; auto bufferSize = MemoryConstants::pageSize; void *ptr = nullptr; auto drmAllocation = static_cast(memoryManager->allocate32BitGraphicsMemory(rootDeviceIndex, bufferSize, ptr, AllocationType::INTERNAL_HEAP)); ASSERT_NE(nullptr, drmAllocation); EXPECT_NE(nullptr, drmAllocation->getUnderlyingBuffer()); EXPECT_EQ(bufferSize, drmAllocation->getUnderlyingBufferSize()); EXPECT_TRUE(drmAllocation->is32BitAllocation()); auto gpuPtr = drmAllocation->getGpuAddress(); auto heapBase = GmmHelper::canonize(memoryManager->getInternalHeapBaseAddress(drmAllocation->getRootDeviceIndex(), drmAllocation->isAllocatedInLocalMemoryPool())); auto heapSize = 4 * GB; EXPECT_GE(gpuPtr, heapBase); EXPECT_LE(gpuPtr, heapBase + heapSize); EXPECT_EQ(drmAllocation->getGpuBaseAddress(), heapBase); memoryManager->freeGraphicsMemory(drmAllocation); } TEST_F(DrmMemoryManagerTest, givenLimitedRangeAllocatorWhenAskedForInternalAllocationWithNoPointerThenAllocationFromInternalHeapIsReturned) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; memoryManager->forceLimitedRangeAllocator(0xFFFFFFFFF); auto bufferSize = MemoryConstants::pageSize; void *ptr = nullptr; auto drmAllocation = static_cast(memoryManager->allocate32BitGraphicsMemory(rootDeviceIndex, bufferSize, ptr, AllocationType::INTERNAL_HEAP)); ASSERT_NE(nullptr, drmAllocation); EXPECT_NE(nullptr, drmAllocation->getUnderlyingBuffer()); EXPECT_EQ(bufferSize, drmAllocation->getUnderlyingBufferSize()); ASSERT_NE(nullptr, drmAllocation->getDriverAllocatedCpuPtr()); EXPECT_EQ(drmAllocation->getDriverAllocatedCpuPtr(), drmAllocation->getUnderlyingBuffer()); EXPECT_TRUE(drmAllocation->is32BitAllocation()); auto gpuPtr = drmAllocation->getGpuAddress(); auto heapBase = GmmHelper::canonize(memoryManager->getInternalHeapBaseAddress(drmAllocation->getRootDeviceIndex(), drmAllocation->isAllocatedInLocalMemoryPool())); auto heapSize = 4 * GB; EXPECT_GE(gpuPtr, heapBase); EXPECT_LE(gpuPtr, heapBase + heapSize); EXPECT_EQ(drmAllocation->getGpuBaseAddress(), heapBase); memoryManager->freeGraphicsMemory(drmAllocation); } TEST_F(DrmMemoryManagerTest, givenLimitedRangeAllocatorWhenAskedForExternalAllocationWithNoPointerThenAllocationFromInternalHeapIsReturned) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; memoryManager->setForce32BitAllocations(true); memoryManager->forceLimitedRangeAllocator(0xFFFFFFFFF); auto bufferSize = MemoryConstants::pageSize; void *ptr = nullptr; auto drmAllocation = static_cast(memoryManager->allocate32BitGraphicsMemory(rootDeviceIndex, bufferSize, ptr, AllocationType::BUFFER)); ASSERT_NE(nullptr, drmAllocation); EXPECT_NE(nullptr, drmAllocation->getUnderlyingBuffer()); EXPECT_TRUE(drmAllocation->is32BitAllocation()); memoryManager->freeGraphicsMemory(drmAllocation); } TEST_F(DrmMemoryManagerTest, givenLimitedRangeAllocatorWhenAskedForInternalAllocationWithNoPointerAndHugeBufferSizeThenAllocationFromInternalHeapFailed) { memoryManager->forceLimitedRangeAllocator(0xFFFFFFFFF); auto bufferSize = 128 * MemoryConstants::megaByte + 4 * MemoryConstants::pageSize; void *ptr = nullptr; auto drmAllocation = static_cast(memoryManager->allocate32BitGraphicsMemory(rootDeviceIndex, bufferSize, ptr, AllocationType::INTERNAL_HEAP)); ASSERT_EQ(nullptr, drmAllocation); } TEST_F(DrmMemoryManagerTest, givenMemoryManagerWhenAskedForInternalAllocationWithPointerThenAllocationFromInternalHeapIsReturned) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; auto bufferSize = MemoryConstants::pageSize; void *ptr = reinterpret_cast(0x100000); auto drmAllocation = static_cast(memoryManager->allocate32BitGraphicsMemory(rootDeviceIndex, bufferSize, ptr, AllocationType::INTERNAL_HEAP)); ASSERT_NE(nullptr, drmAllocation); EXPECT_NE(nullptr, drmAllocation->getUnderlyingBuffer()); EXPECT_EQ(ptr, drmAllocation->getUnderlyingBuffer()); EXPECT_EQ(bufferSize, drmAllocation->getUnderlyingBufferSize()); EXPECT_TRUE(drmAllocation->is32BitAllocation()); auto gpuPtr = drmAllocation->getGpuAddress(); auto heapBase = GmmHelper::canonize(memoryManager->getInternalHeapBaseAddress(drmAllocation->getRootDeviceIndex(), drmAllocation->isAllocatedInLocalMemoryPool())); auto heapSize = 4 * GB; EXPECT_GE(gpuPtr, heapBase); EXPECT_LE(gpuPtr, heapBase + heapSize); EXPECT_EQ(drmAllocation->getGpuBaseAddress(), heapBase); memoryManager->freeGraphicsMemory(drmAllocation); } TEST_F(DrmMemoryManagerTest, givenMemoryManagerSupportingVirutalPaddingWhenAllocUserptrFailsThenReturnsNullptr) { mock->ioctl_expected.gemUserptr = 2; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; this->ioctlResExt = {mock->ioctl_cnt.total + 1, -1}; mock->ioctl_res_ext = &ioctlResExt; // first let's create normal buffer auto bufferSize = MemoryConstants::pageSize; auto buffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, bufferSize}); // buffer should have size 16 EXPECT_EQ(bufferSize, buffer->getUnderlyingBufferSize()); auto bufferWithPaddingSize = 8192u; auto paddedAllocation = memoryManager->createGraphicsAllocationWithPadding(buffer, bufferWithPaddingSize); EXPECT_EQ(nullptr, paddedAllocation); memoryManager->freeGraphicsMemory(buffer); mock->ioctl_res_ext = &mock->NONE; } using DrmMemoryManagerUSMHostAllocationTests = Test; TEST_F(DrmMemoryManagerUSMHostAllocationTests, givenCallToAllocateGraphicsMemoryWithAlignmentWithIsHostUsmAllocationSetToFalseThenNewHostPointerIsUsedAndAllocationIsCreatedSuccesfully) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemClose = 1; AllocationData allocationData; allocationData.size = 16384; allocationData.rootDeviceIndex = rootDeviceIndex; NEO::DrmAllocation *alloc = memoryManager->allocateGraphicsMemoryWithAlignment(allocationData); EXPECT_NE(nullptr, alloc); memoryManager->freeGraphicsMemoryImpl(alloc); } TEST_F(DrmMemoryManagerUSMHostAllocationTests, givenCallToAllocateGraphicsMemoryWithAlignmentWithIsHostUsmAllocationSetToTrueThenGpuAddressIsNotFromGfxPartition) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemClose = 1; AllocationData allocationData; allocationData.size = 16384; allocationData.rootDeviceIndex = rootDeviceIndex; allocationData.flags.isUSMHostAllocation = true; allocationData.type = AllocationType::SVM_CPU; auto alloc = memoryManager->allocateGraphicsMemoryWithAlignment(allocationData); EXPECT_NE(nullptr, alloc); EXPECT_EQ(reinterpret_cast(alloc->getUnderlyingBuffer()), alloc->getGpuAddress()); memoryManager->freeGraphicsMemoryImpl(alloc); } TEST_F(DrmMemoryManagerUSMHostAllocationTests, givenMmapPtrWhenFreeGraphicsMemoryImplThenPtrIsDeallocated) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemClose = 1; const size_t size = 16384; AllocationData allocationData; allocationData.size = size; allocationData.rootDeviceIndex = rootDeviceIndex; auto alloc = memoryManager->allocateGraphicsMemoryWithAlignment(allocationData); EXPECT_NE(nullptr, alloc); auto ptr = memoryManager->mmapFunction(0, size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS, -1, 0); static_cast(alloc)->setMmapPtr(ptr); static_cast(alloc)->setMmapSize(size); memoryManager->freeGraphicsMemoryImpl(alloc); } TEST_F(DrmMemoryManagerUSMHostAllocationTests, givenCallToallocateGraphicsMemoryWithAlignmentWithisHostUSMAllocationSetToTrueThenTheExistingHostPointerIsUsedAndAllocationIsCreatedSuccesfully) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemClose = 1; AllocationData allocationData; size_t allocSize = 16384; void *hostPtr = alignedMalloc(allocSize, 0); allocationData.size = allocSize; allocationData.rootDeviceIndex = rootDeviceIndex; allocationData.flags.isUSMHostAllocation = true; allocationData.hostPtr = hostPtr; NEO::GraphicsAllocation *alloc = memoryManager->allocateGraphicsMemory(allocationData); EXPECT_NE(nullptr, alloc); EXPECT_EQ(hostPtr, alloc->getUnderlyingBuffer()); memoryManager->freeGraphicsMemoryImpl(alloc); alignedFree(hostPtr); } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenDefaultDrmMemoryManagerWhenAskedForVirtualPaddingSupportThenTrueIsReturned) { EXPECT_TRUE(memoryManager->peekVirtualPaddingSupport()); } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenDefaultDrmMemoryManagerWhenAskedForAlignedMallocRestrictionsThenNullPtrIsReturned) { EXPECT_EQ(nullptr, memoryManager->getAlignedMallocRestrictions()); } #include #include TEST(MmapFlags, givenVariousMmapParametersWhenGettingTimeDeltaThenTimeIsPrinted) { // disabling this test in CI. return; typedef std::chrono::high_resolution_clock Time; typedef std::chrono::nanoseconds ns; typedef std::chrono::duration fsec; std::vector pointersForFree; // allocate 4GB. auto size = 4 * GB; unsigned int maxTime = 0; unsigned int minTime = -1; unsigned int totalTime = 0; auto iterCount = 10; for (int i = 0; i < iterCount; i++) { auto t0 = Time::now(); auto gpuRange = mmap(nullptr, size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); auto t1 = Time::now(); pointersForFree.push_back(gpuRange); fsec fs = t1 - t0; ns d = std::chrono::duration_cast(fs); unsigned int duration = (unsigned int)d.count(); totalTime += duration; minTime = std::min(duration, minTime); maxTime = std::max(duration, maxTime); } std::cout << "\n" << "min = " << minTime << "\nmax = " << maxTime << "\naverage = " << totalTime / iterCount << std::endl; for (auto &ptr : pointersForFree) { auto t0 = Time::now(); munmap(ptr, size); auto t1 = Time::now(); fsec fs = t1 - t0; ns d = std::chrono::duration_cast(fs); unsigned int duration = (unsigned int)d.count(); std::cout << "\nfreeing ptr " << ptr << " of size " << size << "time " << duration; } } TEST_F(DrmMemoryManagerBasic, givenDefaultMemoryManagerWhenItIsCreatedThenAsyncDeleterEnabledIsTrue) { DrmMemoryManager memoryManager(gemCloseWorkerMode::gemCloseWorkerInactive, false, true, executionEnvironment); EXPECT_FALSE(memoryManager.isAsyncDeleterEnabled()); EXPECT_EQ(nullptr, memoryManager.getDeferredDeleter()); memoryManager.commonCleanup(); } TEST_F(DrmMemoryManagerBasic, givenDisabledGemCloseWorkerWhenMemoryManagerIsCreatedThenNoGemCloseWorker) { DebugManagerStateRestore dbgStateRestore; DebugManager.flags.EnableGemCloseWorker.set(0u); TestedDrmMemoryManager memoryManager(true, true, true, executionEnvironment); EXPECT_EQ(memoryManager.peekGemCloseWorker(), nullptr); } TEST_F(DrmMemoryManagerBasic, givenEnabledGemCloseWorkerWhenMemoryManagerIsCreatedThenGemCloseWorker) { DebugManagerStateRestore dbgStateRestore; DebugManager.flags.EnableGemCloseWorker.set(1u); TestedDrmMemoryManager memoryManager(true, true, true, executionEnvironment); EXPECT_NE(memoryManager.peekGemCloseWorker(), nullptr); } TEST_F(DrmMemoryManagerBasic, givenDefaultGemCloseWorkerWhenMemoryManagerIsCreatedThenGemCloseWorker) { MemoryManagerCreate memoryManager(false, false, gemCloseWorkerMode::gemCloseWorkerActive, false, false, executionEnvironment); EXPECT_NE(memoryManager.peekGemCloseWorker(), nullptr); } TEST_F(DrmMemoryManagerBasic, givenEnabledAsyncDeleterFlagWhenMemoryManagerIsCreatedThenAsyncDeleterEnabledIsFalseAndDeleterIsNullptr) { DebugManagerStateRestore dbgStateRestore; DebugManager.flags.EnableDeferredDeleter.set(true); DrmMemoryManager memoryManager(gemCloseWorkerMode::gemCloseWorkerInactive, false, true, executionEnvironment); EXPECT_FALSE(memoryManager.isAsyncDeleterEnabled()); EXPECT_EQ(nullptr, memoryManager.getDeferredDeleter()); memoryManager.commonCleanup(); } TEST_F(DrmMemoryManagerBasic, givenDisabledAsyncDeleterFlagWhenMemoryManagerIsCreatedThenAsyncDeleterEnabledIsFalseAndDeleterIsNullptr) { DebugManagerStateRestore dbgStateRestore; DebugManager.flags.EnableDeferredDeleter.set(false); DrmMemoryManager memoryManager(gemCloseWorkerMode::gemCloseWorkerInactive, false, true, executionEnvironment); EXPECT_FALSE(memoryManager.isAsyncDeleterEnabled()); EXPECT_EQ(nullptr, memoryManager.getDeferredDeleter()); memoryManager.commonCleanup(); } TEST_F(DrmMemoryManagerBasic, givenWorkerToCloseWhenCommonCleanupIsCalledThenClosingIsBlocking) { MockDrmMemoryManager memoryManager(gemCloseWorkerMode::gemCloseWorkerInactive, false, true, executionEnvironment); memoryManager.gemCloseWorker.reset(new MockDrmGemCloseWorker(memoryManager)); auto pWorker = static_cast(memoryManager.gemCloseWorker.get()); memoryManager.commonCleanup(); EXPECT_TRUE(pWorker->wasBlocking); } TEST_F(DrmMemoryManagerBasic, givenDefaultDrmMemoryManagerWhenItIsQueriedForInternalHeapBaseThenInternalHeapBaseIsReturned) { std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, true, true, executionEnvironment)); auto heapBase = memoryManager->getGfxPartition(rootDeviceIndex)->getHeapBase(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY); EXPECT_EQ(heapBase, memoryManager->getInternalHeapBaseAddress(rootDeviceIndex, true)); } TEST_F(DrmMemoryManagerBasic, givenMemoryManagerWithEnabledHostMemoryValidationWhenFeatureIsQueriedThenTrueIsReturned) { std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, false, true, executionEnvironment)); ASSERT_NE(nullptr, memoryManager.get()); EXPECT_TRUE(memoryManager->isValidateHostMemoryEnabled()); } TEST_F(DrmMemoryManagerBasic, givenMemoryManagerWithDisabledHostMemoryValidationWhenFeatureIsQueriedThenFalseIsReturned) { std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, false, false, executionEnvironment)); ASSERT_NE(nullptr, memoryManager.get()); EXPECT_FALSE(memoryManager->isValidateHostMemoryEnabled()); } TEST_F(DrmMemoryManagerBasic, givenEnabledHostMemoryValidationWhenMemoryManagerIsCreatedThenPinBBIsCreated) { std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, false, true, executionEnvironment)); ASSERT_NE(nullptr, memoryManager.get()); ASSERT_NE(nullptr, memoryManager->pinBBs[rootDeviceIndex]); } TEST_F(DrmMemoryManagerBasic, givenEnabledHostMemoryValidationAndForcePinWhenMemoryManagerIsCreatedThenPinBBIsCreated) { std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, true, true, executionEnvironment)); ASSERT_NE(nullptr, memoryManager.get()); ASSERT_NE(nullptr, memoryManager->pinBBs[rootDeviceIndex]); } TEST_F(DrmMemoryManagerBasic, givenMemoryManagerWhenAllocateGraphicsMemoryIsCalledThenMemoryPoolIsSystem4KBPages) { std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager( false, false, true, executionEnvironment)); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(createAllocationProperties(rootDeviceIndex, MemoryConstants::pageSize, false)); EXPECT_NE(nullptr, allocation); EXPECT_EQ(MemoryPool::System4KBPages, allocation->getMemoryPool()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenMemoryManagerWhenAllocateGraphicsMemoryWithPtrIsCalledThenMemoryPoolIsSystem4KBPages) { std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, false, true, *executionEnvironment)); memoryManager->registeredEngines = EngineControlContainer{this->device->allEngines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } void *ptr = reinterpret_cast(0x1001); auto size = 4096u; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), false, size}, ptr); ASSERT_NE(nullptr, allocation); EXPECT_EQ(MemoryPool::System4KBPages, allocation->getMemoryPool()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerBasic, givenMemoryManagerWhenAllocate32BitGraphicsMemoryWithPtrIsCalledThenMemoryPoolIsSystem4KBPagesWith32BitGpuAddressing) { std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, false, true, executionEnvironment)); memoryManager->setForce32BitAllocations(true); void *ptr = reinterpret_cast(0x1001); auto size = MemoryConstants::pageSize; auto allocation = memoryManager->allocate32BitGraphicsMemory(rootDeviceIndex, size, ptr, AllocationType::BUFFER); ASSERT_NE(nullptr, allocation); EXPECT_EQ(MemoryPool::System4KBPagesWith32BitGpuAddressing, allocation->getMemoryPool()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerBasic, givenMemoryManagerWhenCreateAllocationFromHandleIsCalledThenMemoryPoolIsSystemCpuInaccessible) { std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, false, true, executionEnvironment)); auto osHandle = 1u; AllocationProperties properties(rootDeviceIndex, false, MemoryConstants::pageSize, AllocationType::SHARED_BUFFER, false, {}); auto allocation = memoryManager->createGraphicsAllocationFromSharedHandle(osHandle, properties, false, false); EXPECT_NE(nullptr, allocation); EXPECT_EQ(MemoryPool::SystemCpuInaccessible, allocation->getMemoryPool()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenDisabledForcePinAndEnabledValidateHostMemoryWhenPinBBAllocationFailsThenUnrecoverableIsCalled) { this->mock = static_cast(executionEnvironment->rootDeviceEnvironments[0]->osInterface->getDriverModel()->as()); this->mock->reset(); this->mock->ioctl_res = -1; this->mock->ioctl_expected.gemUserptr = 1; EXPECT_THROW( { std::unique_ptr memoryManager(new TestedDrmMemoryManager(false, false, true, *executionEnvironment)); EXPECT_NE(nullptr, memoryManager.get()); }, std::exception); this->mock->ioctl_res = 0; this->mock->ioctl_expected.contextDestroy = 0; this->mock->testIoctls(); } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenDisabledForcePinAndEnabledValidateHostMemoryWhenPopulateOsHandlesIsCalledThenHostMemoryIsValidated) { std::unique_ptr memoryManager(new TestedDrmMemoryManager(false, false, true, *executionEnvironment)); memoryManager->registeredEngines = EngineControlContainer{this->device->allEngines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } ASSERT_NE(nullptr, memoryManager.get()); ASSERT_NE(nullptr, memoryManager->pinBBs[device->getRootDeviceIndex()]); mock->reset(); mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.execbuffer2 = 1; // for pinning - host memory validation OsHandleStorage handleStorage; handleStorage.fragmentStorageData[0].cpuPtr = reinterpret_cast(0x1000); handleStorage.fragmentStorageData[0].fragmentSize = 4096; auto result = memoryManager->populateOsHandles(handleStorage, rootDeviceIndex); EXPECT_EQ(MemoryManager::AllocationStatus::Success, result); mock->testIoctls(); EXPECT_NE(nullptr, handleStorage.fragmentStorageData[0].osHandleStorage); handleStorage.fragmentStorageData[0].freeTheFragment = true; memoryManager->cleanOsHandles(handleStorage, rootDeviceIndex); } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenDisabledForcePinAndEnabledValidateHostMemoryWhenPopulateOsHandlesIsCalledWithFirstFragmentAlreadyAllocatedThenNewBosAreValidated) { class PinBufferObject : public BufferObject { public: PinBufferObject(Drm *drm) : BufferObject(drm, 1, 0, 1) { } int validateHostPtr(BufferObject *const boToPin[], size_t numberOfBos, OsContext *osContext, uint32_t vmHandleId, uint32_t drmContextId) override { for (size_t i = 0; i < numberOfBos; i++) { pinnedBoArray[i] = boToPin[i]; } numberOfBosPinned = numberOfBos; return 0; } BufferObject *pinnedBoArray[5]; size_t numberOfBosPinned; }; std::unique_ptr memoryManager(new TestedDrmMemoryManager(false, false, true, *executionEnvironment)); memoryManager->registeredEngines = EngineControlContainer{this->device->allEngines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } ASSERT_NE(nullptr, memoryManager.get()); ASSERT_NE(nullptr, memoryManager->pinBBs[device->getRootDeviceIndex()]); PinBufferObject *pinBB = new PinBufferObject(this->mock); memoryManager->injectPinBB(pinBB, rootDeviceIndex); mock->reset(); mock->ioctl_expected.gemUserptr = 2; mock->ioctl_expected.execbuffer2 = 0; // pinning for host memory validation is mocked OsHandleStorage handleStorage; OsHandleLinux handle1; handleStorage.fragmentStorageData[0].osHandleStorage = &handle1; handleStorage.fragmentStorageData[0].cpuPtr = reinterpret_cast(0x1000); handleStorage.fragmentStorageData[0].fragmentSize = 4096; handleStorage.fragmentStorageData[1].osHandleStorage = nullptr; handleStorage.fragmentStorageData[1].cpuPtr = reinterpret_cast(0x2000); handleStorage.fragmentStorageData[1].fragmentSize = 8192; handleStorage.fragmentStorageData[2].osHandleStorage = nullptr; handleStorage.fragmentStorageData[2].cpuPtr = reinterpret_cast(0x4000); handleStorage.fragmentStorageData[2].fragmentSize = 4096; auto result = memoryManager->populateOsHandles(handleStorage, rootDeviceIndex); EXPECT_EQ(MemoryManager::AllocationStatus::Success, result); mock->testIoctls(); EXPECT_NE(nullptr, handleStorage.fragmentStorageData[0].osHandleStorage); EXPECT_NE(nullptr, handleStorage.fragmentStorageData[1].osHandleStorage); EXPECT_NE(nullptr, handleStorage.fragmentStorageData[2].osHandleStorage); EXPECT_EQ(static_cast(handleStorage.fragmentStorageData[1].osHandleStorage)->bo, pinBB->pinnedBoArray[0]); EXPECT_EQ(static_cast(handleStorage.fragmentStorageData[2].osHandleStorage)->bo, pinBB->pinnedBoArray[1]); handleStorage.fragmentStorageData[0].freeTheFragment = false; handleStorage.fragmentStorageData[1].freeTheFragment = true; handleStorage.fragmentStorageData[2].freeTheFragment = true; memoryManager->cleanOsHandles(handleStorage, rootDeviceIndex); } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenValidateHostPtrMemoryEnabledWhenHostPtrAllocationIsCreatedWithoutForcingPinThenBufferObjectIsPinned) { mock->ioctl_expected.gemUserptr = 2; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 2; std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, true, true, *executionEnvironment)); memoryManager->registeredEngines = EngineControlContainer{this->device->allEngines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } ASSERT_NE(nullptr, memoryManager->pinBBs[device->getRootDeviceIndex()]); size_t size = 10 * MB; void *ptr = ::alignedMalloc(size, 4096); auto alloc = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), false, size}, ptr)); ASSERT_NE(nullptr, alloc); EXPECT_NE(nullptr, alloc->getBO()); memoryManager->freeGraphicsMemory(alloc); ::alignedFree(ptr); } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenEnabledHostMemoryValidationWhenValidHostPointerIsPassedToPopulateThenSuccessIsReturned) { std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, false, true, *executionEnvironment)); memoryManager->registeredEngines = EngineControlContainer{this->device->allEngines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } OsHandleStorage storage; storage.fragmentStorageData[0].cpuPtr = reinterpret_cast(0x1000); storage.fragmentStorageData[0].fragmentSize = 1; auto result = memoryManager->populateOsHandles(storage, rootDeviceIndex); EXPECT_EQ(MemoryManager::AllocationStatus::Success, result); EXPECT_NE(nullptr, storage.fragmentStorageData[0].osHandleStorage); storage.fragmentStorageData[0].freeTheFragment = true; memoryManager->cleanOsHandles(storage, rootDeviceIndex); } TEST_F(DrmMemoryManagerTest, givenForcePinAndHostMemoryValidationEnabledWhenSmallAllocationIsCreatedThenBufferObjectIsPinned) { mock->ioctl_expected.gemUserptr = 2; // 1 pinBB, 1 small allocation mock->ioctl_expected.execbuffer2 = 1; // pinning mock->ioctl_expected.gemWait = 1; // in freeGraphicsAllocation mock->ioctl_expected.gemClose = 2; // 1 pinBB, 1 small allocation std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, true, true, *executionEnvironment)); memoryManager->registeredEngines = EngineControlContainer{this->device->allEngines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } ASSERT_NE(nullptr, memoryManager->pinBBs[rootDeviceIndex]); // one page is too small for early pinning but pinning is used for host memory validation allocationData.size = 4 * 1024; allocationData.hostPtr = ::alignedMalloc(allocationData.size, 4096); auto alloc = memoryManager->allocateGraphicsMemoryWithHostPtr(allocationData); ASSERT_NE(nullptr, alloc); EXPECT_NE(nullptr, alloc->getBO()); memoryManager->freeGraphicsMemory(alloc); ::alignedFree(const_cast(allocationData.hostPtr)); } TEST_F(DrmMemoryManagerTest, givenForcePinAndHostMemoryValidationEnabledThenPinnedBufferObjectGpuAddressWithinDeviceGpuAddressSpace) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemClose = 1; std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, true, true, *executionEnvironment)); auto bo = memoryManager->pinBBs[rootDeviceIndex]; ASSERT_NE(nullptr, bo); EXPECT_LT(bo->peekAddress(), defaultHwInfo->capabilityTable.gpuAddressSpace); } TEST_F(DrmMemoryManagerTest, givenForcePinAndHostMemoryValidationEnabledThenPinnedBufferObjectWrittenWithMIBBENDAndNOOP) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemClose = 1; std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, true, true, *executionEnvironment)); EXPECT_NE(0ul, memoryManager->memoryForPinBBs.size()); ASSERT_NE(nullptr, memoryManager->memoryForPinBBs[rootDeviceIndex]); uint32_t *buffer = reinterpret_cast(memoryManager->memoryForPinBBs[rootDeviceIndex]); uint32_t bb_end = 0x05000000; EXPECT_EQ(bb_end, buffer[0]); EXPECT_EQ(0ul, buffer[1]); } TEST_F(DrmMemoryManagerTest, givenForcePinAllowedAndNoPinBBInMemoryManagerWhenAllocationWithForcePinFlagTrueIsCreatedThenAllocationIsNotPinned) { mock->ioctl_expected.gemUserptr = 2; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; mock->ioctl_res = -1; std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, true, false, *executionEnvironment)); memoryManager->registeredEngines = EngineControlContainer{this->device->allEngines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } EXPECT_EQ(nullptr, memoryManager->pinBBs[rootDeviceIndex]); mock->ioctl_res = 0; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(createAllocationProperties(rootDeviceIndex, MemoryConstants::pageSize, true)); EXPECT_NE(nullptr, allocation); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerTest, givenNullptrOrZeroSizeWhenAllocateGraphicsMemoryForNonSvmHostPtrIsCalledThenAllocationIsNotCreated) { allocationData.size = 0; allocationData.hostPtr = nullptr; EXPECT_FALSE(memoryManager->allocateGraphicsMemoryForNonSvmHostPtr(allocationData)); allocationData.size = 100; allocationData.hostPtr = nullptr; EXPECT_FALSE(memoryManager->allocateGraphicsMemoryForNonSvmHostPtr(allocationData)); allocationData.size = 0; allocationData.hostPtr = reinterpret_cast(0x12345); EXPECT_FALSE(memoryManager->allocateGraphicsMemoryForNonSvmHostPtr(allocationData)); } TEST_F(DrmMemoryManagerBasic, givenDrmMemoryManagerWhenAllocateGraphicsMemoryForNonSvmHostPtrIsCalledWithNotAlignedPtrIsPassedThenAllocationIsCreated) { AllocationData allocationData; std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, false, false, executionEnvironment)); memoryManager->forceLimitedRangeAllocator(0xFFFFFFFFF); allocationData.size = 13; allocationData.hostPtr = reinterpret_cast(0x5001); allocationData.rootDeviceIndex = rootDeviceIndex; auto allocation = memoryManager->allocateGraphicsMemoryForNonSvmHostPtr(allocationData); EXPECT_NE(nullptr, allocation); EXPECT_EQ(0x5001u, reinterpret_cast(allocation->getUnderlyingBuffer())); EXPECT_EQ(13u, allocation->getUnderlyingBufferSize()); EXPECT_EQ(1u, allocation->getAllocationOffset()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerBasic, givenDrmMemoryManagerWhenAllocateGraphicsMemoryForNonSvmHostPtrThenObjectAlignedSizeIsUsedByAllocUserPtrWhenBiggerSizeAllocatedInHeap) { AllocationData allocationData; allocationData.rootDeviceIndex = rootDeviceIndex; std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, false, false, executionEnvironment)); memoryManager->forceLimitedRangeAllocator(0xFFFFFFFFF); allocationData.size = 4 * MB + 16 * 1024; allocationData.hostPtr = reinterpret_cast(0x10000000); auto allocation0 = memoryManager->allocateGraphicsMemoryForNonSvmHostPtr(allocationData); allocationData.hostPtr = reinterpret_cast(0x20000000); auto allocation1 = memoryManager->allocateGraphicsMemoryForNonSvmHostPtr(allocationData); memoryManager->freeGraphicsMemory(allocation0); allocationData.size = 4 * MB + 12 * 1024; allocationData.hostPtr = reinterpret_cast(0x30000000); allocation0 = memoryManager->allocateGraphicsMemoryForNonSvmHostPtr(allocationData); EXPECT_EQ((uint64_t)(allocation0->getBO()->peekSize()), 4 * MB + 12 * 1024); memoryManager->freeGraphicsMemory(allocation0); memoryManager->freeGraphicsMemory(allocation1); } TEST_F(DrmMemoryManagerBasic, givenDrmMemoryManagerWhenAllocateGraphicsMemoryForNonSvmHostPtrIsCalledButAllocationFailedThenNullPtrReturned) { AllocationData allocationData; allocationData.rootDeviceIndex = rootDeviceIndex; std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, false, false, executionEnvironment)); memoryManager->forceLimitedRangeAllocator(0xFFFFFFFFF); allocationData.size = 64 * GB; allocationData.hostPtr = reinterpret_cast(0x100000000000); EXPECT_FALSE(memoryManager->allocateGraphicsMemoryForNonSvmHostPtr(allocationData)); } TEST_F(DrmMemoryManagerBasic, givenDrmMemoryManagerWhenAllocateGraphicsMemoryForNonSvmHostPtrFailsThenNullPtrReturnedAndAllocationIsNotRegistered) { std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, false, false, executionEnvironment)); memoryManager->forceLimitedRangeAllocator(0xFFFFFFFFF); MockAllocationProperties properties(0u, 64 * GB); auto ptr = reinterpret_cast(0x100000000000); EXPECT_FALSE(memoryManager->allocateGraphicsMemoryInPreferredPool(properties, ptr)); EXPECT_EQ(memoryManager->getSysMemAllocs().size(), 0u); } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenAllocateGraphicsMemoryForNonSvmHostPtrIsCalledWithHostPtrIsPassedAndWhenAllocUserptrFailsThenFails) { memoryManager->forceLimitedRangeAllocator(0xFFFFFFFFF); mock->ioctl_expected.gemUserptr = 1; this->ioctlResExt = {mock->ioctl_cnt.total, -1}; mock->ioctl_res_ext = &ioctlResExt; allocationData.size = 10; allocationData.hostPtr = reinterpret_cast(0x1000); auto allocation = memoryManager->allocateGraphicsMemoryForNonSvmHostPtr(allocationData); EXPECT_EQ(nullptr, allocation); mock->ioctl_res_ext = &mock->NONE; } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenForcePinNotAllowedAndHostMemoryValidationEnabledWhenAllocationIsCreatedThenBufferObjectIsPinnedOnlyOnce) { std::unique_ptr memoryManager(new TestedDrmMemoryManager(false, false, true, *executionEnvironment)); memoryManager->registeredEngines = EngineControlContainer{this->device->allEngines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } mock->reset(); mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.execbuffer2 = 1; mock->ioctl_expected.gemClose = 1; mock->ioctl_expected.gemWait = 1; AllocationData allocationData; allocationData.size = 4 * 1024; allocationData.hostPtr = ::alignedMalloc(allocationData.size, 4096); allocationData.flags.forcePin = true; allocationData.rootDeviceIndex = device->getRootDeviceIndex(); auto alloc = memoryManager->allocateGraphicsMemoryWithHostPtr(allocationData); ASSERT_NE(nullptr, alloc); EXPECT_NE(nullptr, alloc->getBO()); memoryManager->freeGraphicsMemory(alloc); mock->testIoctls(); ::alignedFree(const_cast(allocationData.hostPtr)); } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenForcePinNotAllowedAndHostMemoryValidationDisabledWhenAllocationIsCreatedThenBufferObjectIsNotPinned) { std::unique_ptr memoryManager(new TestedDrmMemoryManager(false, false, false, *executionEnvironment)); memoryManager->registeredEngines = EngineControlContainer{this->device->allEngines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } mock->reset(); mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemClose = 1; mock->ioctl_expected.gemWait = 1; AllocationData allocationData; allocationData.size = 10 * MB; // bigger than threshold allocationData.hostPtr = ::alignedMalloc(allocationData.size, 4096); allocationData.flags.forcePin = true; allocationData.rootDeviceIndex = device->getRootDeviceIndex(); auto alloc = memoryManager->allocateGraphicsMemoryWithHostPtr(allocationData); ASSERT_NE(nullptr, alloc); EXPECT_NE(nullptr, alloc->getBO()); memoryManager->freeGraphicsMemory(alloc); mock->testIoctls(); ::alignedFree(const_cast(allocationData.hostPtr)); } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenEnabledValidateHostMemoryWhenReadOnlyPointerCausesPinningFailWithEfaultThenPopulateOsHandlesMarksFragmentsToFree) { std::unique_ptr memoryManager(new TestedDrmMemoryManager(false, false, true, *executionEnvironment)); memoryManager->registeredEngines = EngineControlContainer{this->device->allEngines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } ASSERT_NE(nullptr, memoryManager.get()); ASSERT_NE(nullptr, memoryManager->pinBBs[device->getRootDeviceIndex()]); mock->reset(); DrmMockCustom::IoctlResExt ioctlResExt = {2, -1}; ioctlResExt.no.push_back(3); ioctlResExt.no.push_back(4); mock->ioctl_res_ext = &ioctlResExt; mock->errnoValue = EFAULT; mock->ioctl_expected.gemUserptr = 2; mock->ioctl_expected.execbuffer2 = 3; OsHandleStorage handleStorage; OsHandleLinux handle1; handleStorage.fragmentStorageData[0].osHandleStorage = &handle1; handleStorage.fragmentStorageData[0].cpuPtr = reinterpret_cast(0x1000); handleStorage.fragmentStorageData[0].fragmentSize = 4096; handleStorage.fragmentStorageData[1].osHandleStorage = nullptr; handleStorage.fragmentStorageData[1].cpuPtr = reinterpret_cast(0x2000); handleStorage.fragmentStorageData[1].fragmentSize = 8192; handleStorage.fragmentStorageData[2].osHandleStorage = nullptr; handleStorage.fragmentStorageData[2].cpuPtr = reinterpret_cast(0x4000); handleStorage.fragmentStorageData[2].fragmentSize = 4096; auto result = memoryManager->populateOsHandles(handleStorage, rootDeviceIndex); EXPECT_EQ(MemoryManager::AllocationStatus::InvalidHostPointer, result); mock->testIoctls(); EXPECT_NE(nullptr, handleStorage.fragmentStorageData[0].osHandleStorage); EXPECT_NE(nullptr, handleStorage.fragmentStorageData[1].osHandleStorage); EXPECT_NE(nullptr, handleStorage.fragmentStorageData[2].osHandleStorage); EXPECT_TRUE(handleStorage.fragmentStorageData[1].freeTheFragment); EXPECT_TRUE(handleStorage.fragmentStorageData[2].freeTheFragment); handleStorage.fragmentStorageData[0].freeTheFragment = false; handleStorage.fragmentStorageData[1].freeTheFragment = true; handleStorage.fragmentStorageData[2].freeTheFragment = true; memoryManager->cleanOsHandles(handleStorage, rootDeviceIndex); mock->ioctl_res_ext = &mock->NONE; } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenEnabledValidateHostMemoryWhenReadOnlyPointerCausesPinningFailWithEfaultThenPopulateOsHandlesDoesNotStoreTheFragments) { std::unique_ptr memoryManager(new TestedDrmMemoryManager(false, false, true, *executionEnvironment)); memoryManager->registeredEngines = EngineControlContainer{this->device->allEngines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } ASSERT_NE(nullptr, memoryManager->pinBBs[device->getRootDeviceIndex()]); mock->reset(); DrmMockCustom::IoctlResExt ioctlResExt = {2, -1}; ioctlResExt.no.push_back(3); ioctlResExt.no.push_back(4); mock->ioctl_res_ext = &ioctlResExt; mock->errnoValue = EFAULT; mock->ioctl_expected.gemUserptr = 2; mock->ioctl_expected.execbuffer2 = 3; OsHandleStorage handleStorage; OsHandleLinux handle1; handleStorage.fragmentStorageData[0].osHandleStorage = &handle1; handleStorage.fragmentStorageData[0].cpuPtr = reinterpret_cast(0x1000); handleStorage.fragmentStorageData[0].fragmentSize = 4096; handleStorage.fragmentStorageData[1].osHandleStorage = nullptr; handleStorage.fragmentStorageData[1].cpuPtr = reinterpret_cast(0x2000); handleStorage.fragmentStorageData[1].fragmentSize = 8192; handleStorage.fragmentStorageData[2].osHandleStorage = nullptr; handleStorage.fragmentStorageData[2].cpuPtr = reinterpret_cast(0x4000); handleStorage.fragmentStorageData[2].fragmentSize = 4096; auto result = memoryManager->populateOsHandles(handleStorage, rootDeviceIndex); EXPECT_EQ(MemoryManager::AllocationStatus::InvalidHostPointer, result); mock->testIoctls(); auto hostPtrManager = static_cast(memoryManager->getHostPtrManager()); EXPECT_EQ(0u, hostPtrManager->getFragmentCount()); EXPECT_EQ(nullptr, hostPtrManager->getFragment({handleStorage.fragmentStorageData[1].cpuPtr, rootDeviceIndex})); EXPECT_EQ(nullptr, hostPtrManager->getFragment({handleStorage.fragmentStorageData[2].cpuPtr, rootDeviceIndex})); handleStorage.fragmentStorageData[0].freeTheFragment = false; handleStorage.fragmentStorageData[1].freeTheFragment = true; handleStorage.fragmentStorageData[2].freeTheFragment = true; memoryManager->cleanOsHandles(handleStorage, rootDeviceIndex); mock->ioctl_res_ext = &mock->NONE; } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenEnabledValidateHostMemoryWhenPopulateOsHandlesSucceedsThenFragmentIsStoredInHostPtrManager) { std::unique_ptr memoryManager(new TestedDrmMemoryManager(false, false, true, *executionEnvironment)); memoryManager->registeredEngines = EngineControlContainer{this->device->allEngines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } ASSERT_NE(nullptr, memoryManager->pinBBs[device->getRootDeviceIndex()]); mock->reset(); mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.execbuffer2 = 1; OsHandleStorage handleStorage; handleStorage.fragmentStorageData[0].osHandleStorage = nullptr; handleStorage.fragmentStorageData[0].cpuPtr = reinterpret_cast(0x1000); handleStorage.fragmentStorageData[0].fragmentSize = 4096; auto result = memoryManager->populateOsHandles(handleStorage, rootDeviceIndex); EXPECT_EQ(MemoryManager::AllocationStatus::Success, result); mock->testIoctls(); auto hostPtrManager = static_cast(memoryManager->getHostPtrManager()); EXPECT_EQ(1u, hostPtrManager->getFragmentCount()); EXPECT_NE(nullptr, hostPtrManager->getFragment({handleStorage.fragmentStorageData[0].cpuPtr, device->getRootDeviceIndex()})); handleStorage.fragmentStorageData[0].freeTheFragment = true; memoryManager->cleanOsHandles(handleStorage, rootDeviceIndex); } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenDrmMemoryManagerWhenCleanOsHandlesDeletesHandleDataThenOsHandleStorageAndResidencyIsSetToNullptr) { std::unique_ptr memoryManager(new TestedDrmMemoryManager(false, false, true, *executionEnvironment)); ASSERT_NE(nullptr, memoryManager->pinBBs[device->getRootDeviceIndex()]); auto maxOsContextCount = 1u; OsHandleStorage handleStorage; handleStorage.fragmentStorageData[0].osHandleStorage = new OsHandleLinux(); handleStorage.fragmentStorageData[0].residency = new ResidencyData(maxOsContextCount); handleStorage.fragmentStorageData[0].cpuPtr = reinterpret_cast(0x1000); handleStorage.fragmentStorageData[0].fragmentSize = 4096; handleStorage.fragmentStorageData[1].osHandleStorage = new OsHandleLinux(); handleStorage.fragmentStorageData[1].residency = new ResidencyData(maxOsContextCount); handleStorage.fragmentStorageData[1].cpuPtr = reinterpret_cast(0x1000); handleStorage.fragmentStorageData[1].fragmentSize = 4096; handleStorage.fragmentStorageData[0].freeTheFragment = true; handleStorage.fragmentStorageData[1].freeTheFragment = true; memoryManager->cleanOsHandles(handleStorage, rootDeviceIndex); for (uint32_t i = 0; i < 2; i++) { EXPECT_EQ(nullptr, handleStorage.fragmentStorageData[i].osHandleStorage); EXPECT_EQ(nullptr, handleStorage.fragmentStorageData[i].residency); } } TEST_F(DrmMemoryManagerBasic, ifLimitedRangeAllocatorAvailableWhenAskedForAllocationThenLimitedRangePointerIsReturned) { std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, false, false, executionEnvironment)); memoryManager->forceLimitedRangeAllocator(0xFFFFFFFFF); size_t size = 100u; auto ptr = memoryManager->getGfxPartition(rootDeviceIndex)->heapAllocate(HeapIndex::HEAP_STANDARD, size); auto address64bit = ptrDiff(ptr, memoryManager->getGfxPartition(rootDeviceIndex)->getHeapBase(HeapIndex::HEAP_STANDARD)); EXPECT_LT(address64bit, defaultHwInfo->capabilityTable.gpuAddressSpace); EXPECT_LT(0u, address64bit); memoryManager->getGfxPartition(rootDeviceIndex)->heapFree(HeapIndex::HEAP_STANDARD, ptr, size); } TEST_F(DrmMemoryManagerBasic, givenSpecificAddressSpaceWhenInitializingMemoryManagerThenSetCorrectHeaps) { executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getMutableHardwareInfo()->capabilityTable.gpuAddressSpace = maxNBitValue(48); TestedDrmMemoryManager memoryManager(false, false, false, executionEnvironment); auto gfxPartition = memoryManager.getGfxPartition(rootDeviceIndex); auto limit = gfxPartition->getHeapLimit(HeapIndex::HEAP_SVM); EXPECT_EQ(maxNBitValue(48 - 1), limit); } TEST_F(DrmMemoryManagerBasic, givenDisabledHostPtrTrackingWhenAllocateGraphicsMemoryForNonSvmHostPtrIsCalledWithNotAlignedPtrIsPassedThenAllocationIsCreated) { DebugManagerStateRestore restore; DebugManager.flags.EnableHostPtrTracking.set(false); AllocationData allocationData; std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, false, false, executionEnvironment)); memoryManager->forceLimitedRangeAllocator(MemoryConstants::max48BitAddress); allocationData.size = 13; allocationData.hostPtr = reinterpret_cast(0x5001); allocationData.rootDeviceIndex = rootDeviceIndex; auto allocation = memoryManager->allocateGraphicsMemoryForNonSvmHostPtr(allocationData); EXPECT_NE(nullptr, allocation); EXPECT_EQ(0x5001u, reinterpret_cast(allocation->getUnderlyingBuffer())); EXPECT_EQ(13u, allocation->getUnderlyingBufferSize()); EXPECT_EQ(1u, allocation->getAllocationOffset()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerBasic, givenImageOrSharedResourceCopyWhenGraphicsAllocationInDevicePoolIsAllocatedThenNullptrIsReturned) { std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, false, false, executionEnvironment)); MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Error; AllocationData allocData; allocData.size = MemoryConstants::pageSize; allocData.flags.allocateMemory = true; allocData.rootDeviceIndex = rootDeviceIndex; AllocationType types[] = {AllocationType::IMAGE, AllocationType::SHARED_RESOURCE_COPY}; for (auto type : types) { allocData.type = type; auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocData, status); EXPECT_EQ(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::RetryInNonDevicePool, status); } } TEST_F(DrmMemoryManagerBasic, givenLocalMemoryDisabledWhenAllocateInDevicePoolIsCalledThenNullptrAndStatusRetryIsReturned) { const bool localMemoryEnabled = false; TestedDrmMemoryManager memoryManager(localMemoryEnabled, false, false, executionEnvironment); MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success; AllocationData allocData; allocData.size = MemoryConstants::pageSize; allocData.flags.useSystemMemory = false; allocData.flags.allocateMemory = true; allocData.rootDeviceIndex = rootDeviceIndex; auto allocation = memoryManager.allocateGraphicsMemoryInDevicePool(allocData, status); EXPECT_EQ(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::RetryInNonDevicePool, status); } TEST_F(DrmMemoryManagerTest, givenDebugModuleAreaTypeWhenCreatingAllocationThen32BitDrmAllocationWithFrontWindowGpuVaIsReturned) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; const auto size = MemoryConstants::pageSize64k; NEO::AllocationProperties properties{device->getRootDeviceIndex(), true, size, NEO::AllocationType::DEBUG_MODULE_AREA, false, device->getDeviceBitfield()}; auto moduleDebugArea = memoryManager->allocateGraphicsMemoryWithProperties(properties); EXPECT_NE(nullptr, moduleDebugArea); EXPECT_NE(nullptr, moduleDebugArea->getUnderlyingBuffer()); EXPECT_GE(moduleDebugArea->getUnderlyingBufferSize(), size); auto address64bit = moduleDebugArea->getGpuAddressToPatch(); EXPECT_LT(address64bit, MemoryConstants::max32BitAddress); EXPECT_TRUE(moduleDebugArea->is32BitAllocation()); HeapIndex heap = HeapAssigner::mapInternalWindowIndex(memoryManager->selectInternalHeap(moduleDebugArea->isAllocatedInLocalMemoryPool())); EXPECT_TRUE(heap == HeapIndex::HEAP_INTERNAL_DEVICE_FRONT_WINDOW || heap == HeapIndex::HEAP_INTERNAL_FRONT_WINDOW); auto frontWindowBase = GmmHelper::canonize(memoryManager->getGfxPartition(moduleDebugArea->getRootDeviceIndex())->getHeapBase(heap)); EXPECT_EQ(frontWindowBase, moduleDebugArea->getGpuBaseAddress()); EXPECT_EQ(frontWindowBase, moduleDebugArea->getGpuAddress()); auto internalHeapBase = GmmHelper::canonize(memoryManager->getGfxPartition(moduleDebugArea->getRootDeviceIndex())->getHeapBase(memoryManager->selectInternalHeap(moduleDebugArea->isAllocatedInLocalMemoryPool()))); EXPECT_EQ(internalHeapBase, moduleDebugArea->getGpuBaseAddress()); memoryManager->freeGraphicsMemory(moduleDebugArea); } TEST(DrmAllocationTest, givenAllocationTypeWhenPassedToDrmAllocationConstructorThenAllocationTypeIsStored) { DrmAllocation allocation{0, AllocationType::COMMAND_BUFFER, nullptr, nullptr, static_cast(0), 0u, MemoryPool::MemoryNull}; EXPECT_EQ(AllocationType::COMMAND_BUFFER, allocation.getAllocationType()); DrmAllocation allocation2{0, AllocationType::UNKNOWN, nullptr, nullptr, 0ULL, static_cast(0), MemoryPool::MemoryNull}; EXPECT_EQ(AllocationType::UNKNOWN, allocation2.getAllocationType()); } TEST(DrmAllocationTest, givenMemoryPoolWhenPassedToDrmAllocationConstructorThenMemoryPoolIsStored) { DrmAllocation allocation{0, AllocationType::COMMAND_BUFFER, nullptr, nullptr, static_cast(0), 0u, MemoryPool::System64KBPages}; EXPECT_EQ(MemoryPool::System64KBPages, allocation.getMemoryPool()); DrmAllocation allocation2{0, AllocationType::UNKNOWN, nullptr, nullptr, 0ULL, static_cast(0), MemoryPool::SystemCpuInaccessible}; EXPECT_EQ(MemoryPool::SystemCpuInaccessible, allocation2.getMemoryPool()); } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, whenReservingAddressRangeThenExpectProperAddressAndReleaseWhenFreeing) { constexpr size_t size = 0x1000; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), size}); ASSERT_NE(nullptr, allocation); void *reserve = memoryManager->reserveCpuAddressRange(size, 0u); EXPECT_EQ(nullptr, reserve); allocation->setReservedAddressRange(reserve, size); EXPECT_EQ(reserve, allocation->getReservedAddressPtr()); EXPECT_EQ(size, allocation->getReservedAddressSize()); memoryManager->freeGraphicsMemory(allocation); } TEST(DrmMemoryManagerWithExplicitExpectationsTest2, whenObtainFdFromHandleIsCalledThenProperFdHandleIsReturned) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(4u); for (auto i = 0u; i < 4u; i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(defaultHwInfo.get()); auto mock = new DrmMockCustom(*executionEnvironment->rootDeviceEnvironments[0]); executionEnvironment->rootDeviceEnvironments[i]->osInterface = std::make_unique(); executionEnvironment->rootDeviceEnvironments[i]->osInterface->setDriverModel(std::unique_ptr(mock)); } auto memoryManager = std::make_unique(false, false, false, *executionEnvironment); for (auto i = 0u; i < 4u; i++) { auto mock = executionEnvironment->rootDeviceEnvironments[i]->osInterface->getDriverModel()->as(); int boHandle = 3; mock->outputFd = 1337; mock->ioctl_expected.handleToPrimeFd = 1; auto fdHandle = memoryManager->obtainFdFromHandle(boHandle, i); EXPECT_EQ(mock->inputHandle, static_cast(boHandle)); EXPECT_EQ(mock->inputFlags, DRM_CLOEXEC | DRM_RDWR); EXPECT_EQ(1337, fdHandle); } } TEST_F(DrmMemoryManagerTest, givenSvmCpuAllocationWhenSizeAndAlignmentProvidedThenAllocateMemoryAndReserveGpuVa) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; AllocationData allocationData; allocationData.size = 2 * MemoryConstants::megaByte; allocationData.alignment = 2 * MemoryConstants::megaByte; allocationData.type = AllocationType::SVM_CPU; allocationData.rootDeviceIndex = rootDeviceIndex; DrmAllocation *allocation = memoryManager->allocateGraphicsMemoryWithAlignment(allocationData); ASSERT_NE(nullptr, allocation); EXPECT_EQ(AllocationType::SVM_CPU, allocation->getAllocationType()); EXPECT_EQ(allocationData.size, allocation->getUnderlyingBufferSize()); EXPECT_NE(nullptr, allocation->getUnderlyingBuffer()); EXPECT_EQ(allocation->getUnderlyingBuffer(), allocation->getDriverAllocatedCpuPtr()); EXPECT_NE(0llu, allocation->getGpuAddress()); EXPECT_NE(reinterpret_cast(allocation->getUnderlyingBuffer()), allocation->getGpuAddress()); auto bo = allocation->getBO(); ASSERT_NE(nullptr, bo); EXPECT_NE(0llu, bo->peekAddress()); EXPECT_LT(GmmHelper::canonize(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapBase(HeapIndex::HEAP_STANDARD)), bo->peekAddress()); EXPECT_GT(GmmHelper::canonize(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapLimit(HeapIndex::HEAP_STANDARD)), bo->peekAddress()); EXPECT_EQ(reinterpret_cast(allocation->getGpuAddress()), alignUp(allocation->getReservedAddressPtr(), allocationData.alignment)); EXPECT_EQ(alignUp(allocationData.size, allocationData.alignment) + allocationData.alignment, allocation->getReservedAddressSize()); EXPECT_GT(allocation->getReservedAddressSize(), bo->peekSize()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerTest, givenSvmCpuAllocationWhenSizeAndAlignmentProvidedButFailsToReserveGpuVaThenNullAllocationIsReturned) { mock->ioctl_expected.gemUserptr = 0; mock->ioctl_expected.gemWait = 0; mock->ioctl_expected.gemClose = 0; memoryManager->getGfxPartition(rootDeviceIndex)->heapInit(HeapIndex::HEAP_STANDARD, 0, 0); AllocationData allocationData; allocationData.size = 2 * MemoryConstants::megaByte; allocationData.alignment = 2 * MemoryConstants::megaByte; allocationData.type = AllocationType::SVM_CPU; allocationData.rootDeviceIndex = rootDeviceIndex; DrmAllocation *allocation = memoryManager->allocateGraphicsMemoryWithAlignment(allocationData); EXPECT_EQ(nullptr, allocation); } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerAndReleaseGpuRangeIsCalledThenGpuAddressIsDecanonized) { constexpr size_t reservedCpuAddressRangeSize = is64bit ? (6 * 4 * GB) : 0; auto hwInfo = defaultHwInfo.get(); auto mockGfxPartition = std::make_unique(); mockGfxPartition->init(hwInfo->capabilityTable.gpuAddressSpace, reservedCpuAddressRangeSize, 0, 1); auto size = 2 * MemoryConstants::megaByte; auto gpuAddress = mockGfxPartition->heapAllocate(HeapIndex::HEAP_STANDARD, size); auto gpuAddressCanonized = GmmHelper::canonize(gpuAddress); EXPECT_LE(gpuAddress, gpuAddressCanonized); memoryManager->overrideGfxPartition(mockGfxPartition.release()); memoryManager->releaseGpuRange(reinterpret_cast(gpuAddressCanonized), size, 0); auto mockGfxPartitionBasic = std::make_unique(); memoryManager->overrideGfxPartition(mockGfxPartitionBasic.release()); } TEST(DrmMemoryManagerFreeGraphicsMemoryCallSequenceTest, givenDrmMemoryManagerAndFreeGraphicsMemoryIsCalledThenUnreferenceBufferObjectIsCalledFirstWithSynchronousDestroySetToTrue) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); executionEnvironment.rootDeviceEnvironments[0]->osInterface = std::make_unique(); auto drm = Drm::create(nullptr, *executionEnvironment.rootDeviceEnvironments[0]); executionEnvironment.rootDeviceEnvironments[0]->osInterface->setDriverModel(std::unique_ptr(drm)); executionEnvironment.rootDeviceEnvironments[0]->memoryOperationsInterface = DrmMemoryOperationsHandler::create(*drm, 0u); TestedDrmMemoryManager memoryManger(executionEnvironment); AllocationProperties properties{mockRootDeviceIndex, MemoryConstants::pageSize, AllocationType::BUFFER, mockDeviceBitfield}; auto allocation = memoryManger.allocateGraphicsMemoryWithProperties(properties); ASSERT_NE(allocation, nullptr); memoryManger.freeGraphicsMemory(allocation); EXPECT_EQ(EngineLimits::maxHandleCount, memoryManger.unreferenceCalled); for (size_t i = 0; i < EngineLimits::maxHandleCount; ++i) { EXPECT_TRUE(memoryManger.unreferenceParamsPassed[i].synchronousDestroy); } EXPECT_EQ(1u, memoryManger.releaseGpuRangeCalled); EXPECT_EQ(1u, memoryManger.alignedFreeWrapperCalled); } TEST(DrmMemoryManagerFreeGraphicsMemoryUnreferenceTest, givenDrmMemoryManagerAndFreeGraphicsMemoryIsCalledForSharedAllocationThenUnreferenceBufferObjectIsCalledWithSynchronousDestroySetToFalse) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); const uint32_t rootDeviceIndex = 0u; executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->osInterface = std::make_unique(); auto drm = Drm::create(nullptr, *executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]); executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->osInterface->setDriverModel(std::unique_ptr(drm)); executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->memoryOperationsInterface = DrmMemoryOperationsHandler::create(*drm, 0u); TestedDrmMemoryManager memoryManger(executionEnvironment); osHandle handle = 1u; AllocationProperties properties(rootDeviceIndex, false, MemoryConstants::pageSize, AllocationType::SHARED_BUFFER, false, {}); auto allocation = memoryManger.createGraphicsAllocationFromSharedHandle(handle, properties, false, false); ASSERT_NE(nullptr, allocation); memoryManger.freeGraphicsMemory(allocation); EXPECT_EQ(1 + EngineLimits::maxHandleCount - 1, memoryManger.unreferenceCalled); EXPECT_FALSE(memoryManger.unreferenceParamsPassed[0].synchronousDestroy); for (size_t i = 1; i < EngineLimits::maxHandleCount - 1; ++i) { EXPECT_TRUE(memoryManger.unreferenceParamsPassed[i].synchronousDestroy); } } TEST(DrmMemoryMangerTest, givenMultipleRootDeviceWhenMemoryManagerGetsDrmThenDrmIsFromCorrectRootDevice) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleRootDevices.set(4); VariableBackup backup{&ultHwConfig}; ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false; initPlatform(); TestedDrmMemoryManager drmMemoryManager(*platform()->peekExecutionEnvironment()); for (auto i = 0u; i < platform()->peekExecutionEnvironment()->rootDeviceEnvironments.size(); i++) { auto drmFromRootDevice = platform()->peekExecutionEnvironment()->rootDeviceEnvironments[i]->osInterface->getDriverModel()->as(); EXPECT_EQ(drmFromRootDevice, &drmMemoryManager.getDrm(i)); EXPECT_EQ(i, drmMemoryManager.getRootDeviceIndex(drmFromRootDevice)); } EXPECT_EQ(CommonConstants::unspecifiedDeviceIndex, drmMemoryManager.getRootDeviceIndex(nullptr)); } TEST(DrmAllocationTest, givenResourceRegistrationEnabledWhenAllocationTypeShouldBeRegisteredThenBoHasBindExtHandleAdded) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMockResources drm(*executionEnvironment->rootDeviceEnvironments[0]); for (uint32_t i = 3; i < 3 + static_cast(Drm::ResourceClass::MaxSize); i++) { drm.classHandles.push_back(i); } { MockBufferObject bo(&drm, 0, 0, 1); MockDrmAllocation allocation(AllocationType::DEBUG_CONTEXT_SAVE_AREA, MemoryPool::System4KBPages); allocation.bufferObjects[0] = &bo; allocation.registerBOBindExtHandle(&drm); EXPECT_EQ(DrmMockResources::registerResourceReturnHandle, bo.bindExtHandles[0]); EXPECT_EQ(Drm::ResourceClass::ContextSaveArea, drm.registeredClass); } drm.registeredClass = Drm::ResourceClass::MaxSize; { MockBufferObject bo(&drm, 0, 0, 1); MockDrmAllocation allocation(AllocationType::DEBUG_SBA_TRACKING_BUFFER, MemoryPool::System4KBPages); allocation.bufferObjects[0] = &bo; allocation.registerBOBindExtHandle(&drm); EXPECT_EQ(DrmMockResources::registerResourceReturnHandle, bo.bindExtHandles[0]); EXPECT_EQ(Drm::ResourceClass::SbaTrackingBuffer, drm.registeredClass); } drm.registeredClass = Drm::ResourceClass::MaxSize; { MockBufferObject bo(&drm, 0, 0, 1); MockDrmAllocation allocation(AllocationType::KERNEL_ISA, MemoryPool::System4KBPages); allocation.bufferObjects[0] = &bo; allocation.registerBOBindExtHandle(&drm); EXPECT_EQ(DrmMockResources::registerResourceReturnHandle, bo.bindExtHandles[0]); EXPECT_EQ(Drm::ResourceClass::Isa, drm.registeredClass); } drm.registeredClass = Drm::ResourceClass::MaxSize; { MockBufferObject bo(&drm, 0, 0, 1); MockDrmAllocation allocation(AllocationType::DEBUG_MODULE_AREA, MemoryPool::System4KBPages); allocation.bufferObjects[0] = &bo; allocation.registerBOBindExtHandle(&drm); EXPECT_EQ(DrmMockResources::registerResourceReturnHandle, bo.bindExtHandles[0]); EXPECT_EQ(Drm::ResourceClass::ModuleHeapDebugArea, drm.registeredClass); } drm.registeredClass = Drm::ResourceClass::MaxSize; { MockBufferObject bo(&drm, 0, 0, 1); MockDrmAllocation allocation(AllocationType::BUFFER_HOST_MEMORY, MemoryPool::System4KBPages); allocation.bufferObjects[0] = &bo; allocation.registerBOBindExtHandle(&drm); EXPECT_EQ(0u, bo.bindExtHandles.size()); EXPECT_EQ(Drm::ResourceClass::MaxSize, drm.registeredClass); } } TEST(DrmAllocationTest, givenResourceRegistrationEnabledWhenAllocationTypeShouldNotBeRegisteredThenNoBindHandleCreated) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMockResources drm(*executionEnvironment->rootDeviceEnvironments[0]); drm.registeredClass = Drm::ResourceClass::MaxSize; for (uint32_t i = 3; i < 3 + static_cast(Drm::ResourceClass::MaxSize); i++) { drm.classHandles.push_back(i); } { MockBufferObject bo(&drm, 0, 0, 1); MockDrmAllocation allocation(AllocationType::KERNEL_ISA_INTERNAL, MemoryPool::System4KBPages); allocation.bufferObjects[0] = &bo; allocation.registerBOBindExtHandle(&drm); EXPECT_EQ(0u, bo.bindExtHandles.size()); } EXPECT_EQ(Drm::ResourceClass::MaxSize, drm.registeredClass); } TEST(DrmAllocationTest, givenResourceRegistrationNotEnabledWhenRegisteringBindExtHandleThenHandleIsNotAddedToBo) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMockResources drm(*executionEnvironment->rootDeviceEnvironments[0]); EXPECT_EQ(0u, drm.classHandles.size()); MockBufferObject bo(&drm, 0, 0, 1); MockDrmAllocation allocation(AllocationType::DEBUG_CONTEXT_SAVE_AREA, MemoryPool::System4KBPages); allocation.bufferObjects[0] = &bo; allocation.registerBOBindExtHandle(&drm); EXPECT_EQ(0u, bo.bindExtHandles.size()); EXPECT_EQ(Drm::ResourceClass::MaxSize, drm.registeredClass); } TEST(DrmMemoryManager, givenTrackedAllocationTypeAndDisabledRegistrationInDrmWhenAllocatingThenRegisterBoBindExtHandleIsNotCalled) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1u); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); auto mockDrm = new DrmMockResources(*executionEnvironment->rootDeviceEnvironments[0]); executionEnvironment->rootDeviceEnvironments[0]->osInterface = std::make_unique(); executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel(std::unique_ptr(mockDrm)); auto memoryManager = std::make_unique(false, false, false, *executionEnvironment); EXPECT_FALSE(mockDrm->resourceRegistrationEnabled()); mockDrm->registeredDataSize = 0; MockDrmAllocation allocation(AllocationType::DEBUG_CONTEXT_SAVE_AREA, MemoryPool::System4KBPages); memoryManager->registerAllocationInOs(&allocation); EXPECT_FALSE(allocation.registerBOBindExtHandleCalled); EXPECT_EQ(Drm::ResourceClass::MaxSize, mockDrm->registeredClass); } TEST(DrmMemoryManager, givenResourceRegistrationEnabledAndAllocTypeToCaptureWhenRegisteringAllocationInOsThenItIsMarkedForCapture) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1u); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); auto mockDrm = new DrmMockResources(*executionEnvironment->rootDeviceEnvironments[0]); executionEnvironment->rootDeviceEnvironments[0]->osInterface = std::make_unique(); executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel(std::unique_ptr(mockDrm)); auto memoryManager = std::make_unique(false, false, false, *executionEnvironment); // mock resource registration enabling by storing class handles mockDrm->classHandles.push_back(1); MockBufferObject bo(mockDrm, 0, 0, 1); MockDrmAllocation allocation(AllocationType::SCRATCH_SURFACE, MemoryPool::System4KBPages); allocation.bufferObjects[0] = &bo; memoryManager->registerAllocationInOs(&allocation); EXPECT_TRUE(allocation.markedForCapture); MockDrmAllocation allocation2(AllocationType::BUFFER, MemoryPool::System4KBPages); allocation2.bufferObjects[0] = &bo; memoryManager->registerAllocationInOs(&allocation2); EXPECT_FALSE(allocation2.markedForCapture); } TEST(DrmMemoryManager, givenTrackedAllocationTypeWhenAllocatingThenAllocationIsRegistered) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1u); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); auto mockDrm = new DrmMockResources(*executionEnvironment->rootDeviceEnvironments[0]); executionEnvironment->rootDeviceEnvironments[0]->osInterface = std::make_unique(); executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel(std::unique_ptr(mockDrm)); auto memoryManager = std::make_unique(false, false, false, *executionEnvironment); for (uint32_t i = 3; i < 3 + static_cast(Drm::ResourceClass::MaxSize); i++) { mockDrm->classHandles.push_back(i); } EXPECT_TRUE(mockDrm->resourceRegistrationEnabled()); NEO::AllocationProperties properties{0, true, MemoryConstants::pageSize, NEO::AllocationType::DEBUG_SBA_TRACKING_BUFFER, false, 1}; properties.gpuAddress = 0x20000; auto sbaAllocation = memoryManager->allocateGraphicsMemoryWithProperties(properties); EXPECT_EQ(Drm::ResourceClass::SbaTrackingBuffer, mockDrm->registeredClass); EXPECT_EQ(sizeof(uint64_t), mockDrm->registeredDataSize); uint64_t *data = reinterpret_cast(mockDrm->registeredData); EXPECT_EQ(properties.gpuAddress, *data); memoryManager->freeGraphicsMemory(sbaAllocation); } TEST(DrmMemoryManager, givenTrackedAllocationTypeWhenFreeingThenRegisteredHandlesAreUnregistered) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1u); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); auto mockDrm = new DrmMockResources(*executionEnvironment->rootDeviceEnvironments[0]); executionEnvironment->rootDeviceEnvironments[0]->osInterface = std::make_unique(); executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel(std::unique_ptr(mockDrm)); auto memoryManager = std::make_unique(false, false, false, *executionEnvironment); for (uint32_t i = 3; i < 3 + static_cast(Drm::ResourceClass::MaxSize); i++) { mockDrm->classHandles.push_back(i); } EXPECT_TRUE(mockDrm->resourceRegistrationEnabled()); NEO::AllocationProperties properties{0, true, MemoryConstants::pageSize, NEO::AllocationType::DEBUG_SBA_TRACKING_BUFFER, false, 1}; properties.gpuAddress = 0x20000; auto sbaAllocation = memoryManager->allocateGraphicsMemoryWithProperties(properties); EXPECT_EQ(0u, mockDrm->unregisterCalledCount); memoryManager->freeGraphicsMemory(sbaAllocation); EXPECT_EQ(DrmMockResources::registerResourceReturnHandle, mockDrm->unregisteredHandle); EXPECT_EQ(1u, mockDrm->unregisterCalledCount); } TEST(DrmMemoryManager, givenNullBoWhenRegisteringBindExtHandleThenEarlyReturn) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1u); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); auto mockDrm = std::make_unique(*executionEnvironment->rootDeviceEnvironments[0]); for (uint32_t i = 3; i < 3 + static_cast(Drm::ResourceClass::MaxSize); i++) { mockDrm->classHandles.push_back(i); } EXPECT_TRUE(mockDrm->resourceRegistrationEnabled()); MockDrmAllocation gfxAllocation(AllocationType::DEBUG_SBA_TRACKING_BUFFER, MemoryPool::MemoryNull); gfxAllocation.registerBOBindExtHandle(mockDrm.get()); EXPECT_EQ(1u, gfxAllocation.registeredBoBindHandles.size()); gfxAllocation.freeRegisteredBOBindExtHandles(mockDrm.get()); } TEST(DrmAllocationTest, givenResourceRegistrationEnabledWhenAllocationIsRegisteredThenBosAreMarkedForCaptureAndRequireImmediateBinding) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMockResources drm(*executionEnvironment->rootDeviceEnvironments[0]); // mock resource registration enabling by storing class handles drm.classHandles.push_back(1); MockBufferObject bo(&drm, 0, 0, 1); MockDrmAllocation allocation(AllocationType::DEBUG_CONTEXT_SAVE_AREA, MemoryPool::System4KBPages); allocation.bufferObjects[0] = &bo; allocation.registerBOBindExtHandle(&drm); EXPECT_TRUE(bo.isMarkedForCapture()); EXPECT_TRUE(bo.isImmediateBindingRequired()); } TEST(DrmAllocationTest, givenResourceRegistrationEnabledWhenIsaIsRegisteredThenCookieIsAddedToBoHandle) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMockResources drm(*executionEnvironment->rootDeviceEnvironments[0]); for (uint32_t i = 3; i < 3 + static_cast(Drm::ResourceClass::MaxSize); i++) { drm.classHandles.push_back(i); } drm.registeredClass = Drm::ResourceClass::MaxSize; MockBufferObject bo(&drm, 0, 0, 1); MockDrmAllocation allocation(AllocationType::KERNEL_ISA, MemoryPool::System4KBPages); allocation.bufferObjects[0] = &bo; allocation.registerBOBindExtHandle(&drm); EXPECT_EQ(2u, bo.bindExtHandles.size()); EXPECT_EQ(DrmMockResources::registerResourceReturnHandle, bo.bindExtHandles[0]); EXPECT_EQ(drm.currentCookie - 1, bo.bindExtHandles[1]); allocation.freeRegisteredBOBindExtHandles(&drm); EXPECT_EQ(2u, drm.unregisterCalledCount); } TEST(DrmAllocationTest, givenDrmAllocationWhenSetCacheRegionIsCalledForDefaultRegionThenReturnTrue) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMock drm(*executionEnvironment->rootDeviceEnvironments[0]); MockDrmAllocation allocation(AllocationType::BUFFER, MemoryPool::LocalMemory); EXPECT_TRUE(allocation.setCacheRegion(&drm, CacheRegion::Default)); } TEST(DrmAllocationTest, givenDrmAllocationWhenCacheInfoIsNotAvailableThenCacheRegionIsNotSet) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMock drm(*executionEnvironment->rootDeviceEnvironments[0]); MockDrmAllocation allocation(AllocationType::BUFFER, MemoryPool::LocalMemory); EXPECT_FALSE(allocation.setCacheRegion(&drm, CacheRegion::Region1)); } TEST(DrmAllocationTest, givenDrmAllocationWhenDefaultCacheInfoIsAvailableThenCacheRegionIsNotSet) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMock drm(*executionEnvironment->rootDeviceEnvironments[0]); drm.setupCacheInfo(*defaultHwInfo.get()); MockDrmAllocation allocation(AllocationType::BUFFER, MemoryPool::LocalMemory); EXPECT_FALSE(allocation.setCacheRegion(&drm, CacheRegion::Region1)); } TEST(DrmAllocationTest, givenDrmAllocationWhenCacheRegionIsNotSetThenReturnFalse) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMock drm(*executionEnvironment->rootDeviceEnvironments[0]); drm.cacheInfo.reset(new MockCacheInfoImpl(drm, 32 * MemoryConstants::kiloByte, 2, 32)); MockDrmAllocation allocation(AllocationType::BUFFER, MemoryPool::LocalMemory); EXPECT_FALSE(allocation.setCacheAdvice(&drm, 1024, CacheRegion::None)); } TEST(DrmAllocationTest, givenDrmAllocationWhenCacheRegionIsSetSuccessfullyThenReturnTrue) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMock drm(*executionEnvironment->rootDeviceEnvironments[0]); drm.cacheInfo.reset(new MockCacheInfoImpl(drm, 32 * MemoryConstants::kiloByte, 2, 32)); MockDrmAllocation allocation(AllocationType::BUFFER, MemoryPool::LocalMemory); EXPECT_TRUE(allocation.setCacheAdvice(&drm, 1024, CacheRegion::Region1)); } TEST(DrmAllocationTest, givenDrmAllocationWhenCacheRegionIsSetSuccessfullyThenSetRegionInBufferObject) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMock drm(*executionEnvironment->rootDeviceEnvironments[0]); drm.cacheInfo.reset(new MockCacheInfoImpl(drm, 32 * MemoryConstants::kiloByte, 2, 32)); MockBufferObject bo(&drm, 0, 0, 1); MockDrmAllocation allocation(AllocationType::BUFFER, MemoryPool::LocalMemory); allocation.bufferObjects[0] = &bo; EXPECT_TRUE(allocation.setCacheAdvice(&drm, 1024, CacheRegion::Region1)); for (auto bo : allocation.bufferObjects) { if (bo != nullptr) { EXPECT_EQ(CacheRegion::Region1, bo->peekCacheRegion()); } } } TEST(DrmAllocationTest, givenDrmAllocationWhenBufferObjectIsCreatedThenApplyDefaultCachePolicy) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMock drm(*executionEnvironment->rootDeviceEnvironments[0]); MockBufferObject bo(&drm, 0, 0, 1); MockDrmAllocation allocation(AllocationType::BUFFER, MemoryPool::LocalMemory); allocation.bufferObjects[0] = &bo; for (auto bo : allocation.bufferObjects) { if (bo != nullptr) { EXPECT_EQ(CachePolicy::WriteBack, bo->peekCachePolicy()); } } } TEST(DrmAllocationTest, givenDrmAllocationWhenSetCachePolicyIsCalledThenUpdatePolicyInBufferObject) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMock drm(*executionEnvironment->rootDeviceEnvironments[0]); MockBufferObject bo(&drm, 0, 0, 1); MockDrmAllocation allocation(AllocationType::BUFFER, MemoryPool::LocalMemory); allocation.bufferObjects[0] = &bo; allocation.setCachePolicy(CachePolicy::Uncached); for (auto bo : allocation.bufferObjects) { if (bo != nullptr) { EXPECT_EQ(CachePolicy::Uncached, bo->peekCachePolicy()); } } } TEST(DrmAllocationTest, givenDrmAllocationWhenSetMemAdviseWithCachePolicyIsCalledThenUpdatePolicyInBufferObject) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMock drm(*executionEnvironment->rootDeviceEnvironments[0]); MockBufferObject bo(&drm, 0, 0, 1); MockDrmAllocation allocation(AllocationType::BUFFER, MemoryPool::LocalMemory); allocation.bufferObjects[0] = &bo; EXPECT_EQ(CachePolicy::WriteBack, bo.peekCachePolicy()); MemAdviseFlags memAdviseFlags{}; EXPECT_TRUE(memAdviseFlags.cached_memory); for (auto cached : {true, false, true}) { memAdviseFlags.cached_memory = cached; EXPECT_TRUE(allocation.setMemAdvise(&drm, memAdviseFlags)); EXPECT_EQ(cached ? CachePolicy::WriteBack : CachePolicy::Uncached, bo.peekCachePolicy()); EXPECT_EQ(memAdviseFlags.memadvise_flags, allocation.enabledMemAdviseFlags.memadvise_flags); } } TEST(DrmAllocationTest, givenBoWhenMarkingForCaptureThenBosAreMarked) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMock drm(*executionEnvironment->rootDeviceEnvironments[0]); MockBufferObject bo(&drm, 0, 0, 1); MockDrmAllocation allocation(AllocationType::SCRATCH_SURFACE, MemoryPool::System4KBPages); allocation.markForCapture(); allocation.bufferObjects[0] = &bo; allocation.markForCapture(); EXPECT_TRUE(bo.isMarkedForCapture()); } TEST_F(DrmMemoryManagerTest, givenDrmAllocationWithHostPtrWhenItIsCreatedWithCacheRegionThenSetRegionInBufferObject) { mock->ioctl_expected.total = -1; auto drm = static_cast(executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->osInterface->getDriverModel()->as()); drm->cacheInfo.reset(new MockCacheInfoImpl(*drm, 32 * MemoryConstants::kiloByte, 2, 32)); auto ptr = reinterpret_cast(0x1000); auto size = MemoryConstants::pageSize; OsHandleStorage storage; storage.fragmentStorageData[0].cpuPtr = ptr; storage.fragmentStorageData[0].fragmentSize = 1; storage.fragmentCount = 1; memoryManager->populateOsHandles(storage, rootDeviceIndex); auto allocation = std::make_unique(rootDeviceIndex, AllocationType::BUFFER_HOST_MEMORY, nullptr, ptr, castToUint64(ptr), size, MemoryPool::System4KBPages); allocation->fragmentsStorage = storage; allocation->setCacheAdvice(drm, 1024, CacheRegion::Region1); for (uint32_t i = 0; i < storage.fragmentCount; i++) { auto bo = static_cast(allocation->fragmentsStorage.fragmentStorageData[i].osHandleStorage)->bo; EXPECT_EQ(CacheRegion::Region1, bo->peekCacheRegion()); } storage.fragmentStorageData[0].freeTheFragment = true; memoryManager->cleanOsHandles(storage, rootDeviceIndex); } HWTEST_F(DrmMemoryManagerTest, givenDrmAllocationWithHostPtrWhenItIsCreatedWithIncorrectCacheRegionThenReturnNull) { mock->ioctl_expected.total = -1; auto drm = static_cast(executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->osInterface->getDriverModel()->as()); drm->setupCacheInfo(*defaultHwInfo.get()); auto ptr = reinterpret_cast(0x1000); auto size = MemoryConstants::pageSize; allocationData.size = size; allocationData.hostPtr = ptr; allocationData.cacheRegion = 0xFFFF; auto allocation = std::unique_ptr(memoryManager->allocateGraphicsMemoryWithHostPtr(allocationData)); EXPECT_EQ(allocation, nullptr); } HWTEST_F(DrmMemoryManagerTest, givenDrmAllocationWithWithAlignmentFromUserptrWhenItIsCreatedWithIncorrectCacheRegionThenReturnNull) { mock->ioctl_expected.total = -1; auto drm = static_cast(executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->osInterface->getDriverModel()->as()); drm->setupCacheInfo(*defaultHwInfo.get()); auto size = MemoryConstants::pageSize; allocationData.size = size; allocationData.cacheRegion = 0xFFFF; auto allocation = static_cast(memoryManager->createAllocWithAlignmentFromUserptr(allocationData, size, 0, 0, 0x1000)); EXPECT_EQ(allocation, nullptr); } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenAllocateGraphicsMemoryWithPropertiesCalledWithDebugSurfaceTypeThenDebugSurfaceIsCreated) { AllocationProperties debugSurfaceProperties{0, true, MemoryConstants::pageSize, NEO::AllocationType::DEBUG_CONTEXT_SAVE_AREA, false, false, 0b1011}; auto debugSurface = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(debugSurfaceProperties)); EXPECT_NE(nullptr, debugSurface); auto mem = debugSurface->getUnderlyingBuffer(); ASSERT_NE(nullptr, mem); auto sipType = SipKernel::getSipKernelType(*device); SipKernel::initSipKernel(sipType, *device); auto &stateSaveAreaHeader = NEO::SipKernel::getSipKernel(*device).getStateSaveAreaHeader(); mem = ptrOffset(mem, stateSaveAreaHeader.size()); auto size = debugSurface->getUnderlyingBufferSize() - stateSaveAreaHeader.size(); EXPECT_TRUE(memoryZeroed(mem, size)); memoryManager->freeGraphicsMemory(debugSurface); } TEST_F(DrmMemoryManagerTest, whenWddmMemoryManagerIsCreatedThenAlignmentSelectorHasExpectedAlignments) { std::vector expectedAlignments = { {MemoryConstants::pageSize2Mb, false, AlignmentSelector::anyWastage, HeapIndex::HEAP_STANDARD2MB}, {MemoryConstants::pageSize64k, true, AlignmentSelector::anyWastage, HeapIndex::HEAP_STANDARD64KB}, }; TestedDrmMemoryManager memoryManager(false, false, false, *executionEnvironment); EXPECT_EQ(expectedAlignments, memoryManager.alignmentSelector.peekCandidateAlignments()); } TEST_F(DrmMemoryManagerTest, whenDebugFlagToNotFreeResourcesIsSpecifiedThenFreeIsNotDoingAnything) { DebugManagerStateRestore restorer; DebugManager.flags.DoNotFreeResources.set(true); TestedDrmMemoryManager memoryManager(false, false, false, *executionEnvironment); size_t sizeIn = 1024llu; uint64_t gpuAddress = 0x1337llu; DrmAllocation stackDrmAllocation(0u, AllocationType::BUFFER, nullptr, nullptr, gpuAddress, sizeIn, MemoryPool::System64KBPages); memoryManager.freeGraphicsMemoryImpl(&stackDrmAllocation); } TEST_F(DrmMemoryManagerTest, given2MbPagesDisabledWhenWddmMemoryManagerIsCreatedThenAlignmentSelectorHasExpectedAlignments) { DebugManagerStateRestore restore{}; DebugManager.flags.AlignLocalMemoryVaTo2MB.set(0); std::vector expectedAlignments = { {MemoryConstants::pageSize64k, true, AlignmentSelector::anyWastage, HeapIndex::HEAP_STANDARD64KB}, }; TestedDrmMemoryManager memoryManager(false, false, false, *executionEnvironment); EXPECT_EQ(expectedAlignments, memoryManager.alignmentSelector.peekCandidateAlignments()); } TEST_F(DrmMemoryManagerTest, givenCustomAlignmentWhenWddmMemoryManagerIsCreatedThenAlignmentSelectorHasExpectedAlignments) { DebugManagerStateRestore restore{}; { DebugManager.flags.ExperimentalEnableCustomLocalMemoryAlignment.set(MemoryConstants::megaByte); std::vector expectedAlignments = { {MemoryConstants::pageSize2Mb, false, AlignmentSelector::anyWastage, HeapIndex::HEAP_STANDARD2MB}, {MemoryConstants::megaByte, true, AlignmentSelector::anyWastage, HeapIndex::HEAP_STANDARD64KB}, {MemoryConstants::pageSize64k, true, AlignmentSelector::anyWastage, HeapIndex::HEAP_STANDARD64KB}, }; TestedDrmMemoryManager memoryManager(false, false, false, *executionEnvironment); EXPECT_EQ(expectedAlignments, memoryManager.alignmentSelector.peekCandidateAlignments()); } { DebugManager.flags.ExperimentalEnableCustomLocalMemoryAlignment.set(2 * MemoryConstants::pageSize2Mb); std::vector expectedAlignments = { {2 * MemoryConstants::pageSize2Mb, true, AlignmentSelector::anyWastage, HeapIndex::HEAP_STANDARD2MB}, {MemoryConstants::pageSize2Mb, false, AlignmentSelector::anyWastage, HeapIndex::HEAP_STANDARD2MB}, {MemoryConstants::pageSize64k, true, AlignmentSelector::anyWastage, HeapIndex::HEAP_STANDARD64KB}, }; TestedDrmMemoryManager memoryManager(false, false, false, *executionEnvironment); EXPECT_EQ(expectedAlignments, memoryManager.alignmentSelector.peekCandidateAlignments()); } } TEST_F(DrmMemoryManagerTest, givenDrmManagerWithLocalMemoryWhenGettingGlobalMemoryPercentThenCorrectValueIsReturned) { TestedDrmMemoryManager memoryManager(true, false, false, *executionEnvironment); uint32_t rootDeviceIndex = 0u; EXPECT_EQ(memoryManager.getPercentOfGlobalMemoryAvailable(rootDeviceIndex), 0.95); } TEST_F(DrmMemoryManagerTest, givenDrmManagerWithoutLocalMemoryWhenGettingGlobalMemoryPercentThenCorrectValueIsReturned) { TestedDrmMemoryManager memoryManager(false, false, false, *executionEnvironment); uint32_t rootDeviceIndex = 0u; EXPECT_EQ(memoryManager.getPercentOfGlobalMemoryAvailable(rootDeviceIndex), 0.8); } struct DrmMemoryManagerToTestLockInLocalMemory : public TestedDrmMemoryManager { using TestedDrmMemoryManager::lockResourceInLocalMemoryImpl; DrmMemoryManagerToTestLockInLocalMemory(ExecutionEnvironment &executionEnvironment) : TestedDrmMemoryManager(true, false, false, executionEnvironment) {} void *lockResourceInLocalMemoryImpl(BufferObject *bo) override { lockedLocalMemory.reset(new uint8_t[bo->peekSize()]); return lockedLocalMemory.get(); } std::unique_ptr lockedLocalMemory; }; TEST_F(DrmMemoryManagerTest, givenDrmManagerWithLocalMemoryWhenLockResourceIsCalledOnWriteCombinedAllocationThenReturnPtrAlignedTo64Kb) { DrmMemoryManagerToTestLockInLocalMemory memoryManager(*executionEnvironment); BufferObject bo(mock, 1, 1024, 0); DrmAllocation drmAllocation(0, AllocationType::WRITE_COMBINED, &bo, nullptr, 0u, 0u, MemoryPool::LocalMemory); EXPECT_EQ(&bo, drmAllocation.getBO()); auto ptr = memoryManager.lockResourceInLocalMemoryImpl(drmAllocation); EXPECT_NE(nullptr, ptr); EXPECT_EQ(ptr, bo.peekLockedAddress()); EXPECT_TRUE(isAligned(ptr)); memoryManager.unlockResourceInLocalMemoryImpl(&bo); EXPECT_EQ(nullptr, bo.peekLockedAddress()); } TEST_F(DrmMemoryManagerTest, givenDrmManagerWithoutLocalMemoryWhenLockResourceIsCalledOnWriteCombinedAllocationThenReturnNullptr) { TestedDrmMemoryManager memoryManager(false, false, false, *executionEnvironment); BufferObject bo(mock, 1, 1024, 0); DrmAllocation drmAllocation(0, AllocationType::WRITE_COMBINED, &bo, nullptr, 0u, 0u, MemoryPool::LocalMemory); EXPECT_EQ(&bo, drmAllocation.getBO()); auto ptr = memoryManager.lockResourceInLocalMemoryImpl(drmAllocation); EXPECT_EQ(nullptr, ptr); memoryManager.unlockResourceInLocalMemoryImpl(&bo); EXPECT_EQ(nullptr, bo.peekLockedAddress()); } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWithoutLocalMemoryWhenCopyMemoryToAllocationThenAllocationIsFilledWithCorrectData) { TestedDrmMemoryManager memoryManager(false, false, false, *executionEnvironment); mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; std::vector dataToCopy(MemoryConstants::pageSize, 1u); auto allocation = memoryManager.allocateGraphicsMemoryWithProperties({rootDeviceIndex, dataToCopy.size(), AllocationType::BUFFER, device->getDeviceBitfield()}); ASSERT_NE(nullptr, allocation); auto ret = memoryManager.copyMemoryToAllocation(allocation, 0, dataToCopy.data(), dataToCopy.size()); EXPECT_TRUE(ret); EXPECT_EQ(0, memcmp(allocation->getUnderlyingBuffer(), dataToCopy.data(), dataToCopy.size())); memoryManager.freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWithoutLocalMemoryAndCpuPtrWhenCopyMemoryToAllocationThenReturnFalse) { TestedDrmMemoryManager memoryManager(false, false, false, *executionEnvironment); mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; std::vector dataToCopy(MemoryConstants::pageSize, 1u); auto allocation = memoryManager.allocateGraphicsMemoryWithProperties({rootDeviceIndex, dataToCopy.size(), AllocationType::BUFFER, device->getDeviceBitfield()}); ASSERT_NE(nullptr, allocation); allocation->setCpuPtrAndGpuAddress(nullptr, 0u); auto ret = memoryManager.copyMemoryToAllocation(allocation, 0, dataToCopy.data(), dataToCopy.size()); EXPECT_FALSE(ret); memoryManager.freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerTest, givenNullDefaultAllocWhenCreateGraphicsAllocationFromExistingStorageThenDoNotImportHandle) { TestedDrmMemoryManager memoryManager(false, false, false, *executionEnvironment); mock->ioctl_expected.primeFdToHandle = 0; MockAllocationProperties properties(0u, 1u); MultiGraphicsAllocation allocation(0u); auto alloc = memoryManager.createGraphicsAllocationFromExistingStorage(properties, nullptr, allocation); EXPECT_NE(alloc, nullptr); memoryManager.freeGraphicsMemory(alloc); } TEST(DrmMemoryManagerSimpleTest, givenDrmMemoryManagerWhenAllocateInDevicePoolIsCalledThenNullptrAndStatusRetryIsReturned) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); executionEnvironment.rootDeviceEnvironments[0]->osInterface = std::make_unique(); auto drm = Drm::create(nullptr, *executionEnvironment.rootDeviceEnvironments[0]); executionEnvironment.rootDeviceEnvironments[0]->osInterface->setDriverModel(std::unique_ptr(drm)); executionEnvironment.rootDeviceEnvironments[0]->memoryOperationsInterface = DrmMemoryOperationsHandler::create(*drm, 0u); TestedDrmMemoryManager memoryManager(executionEnvironment); MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success; AllocationData allocData; allocData.size = MemoryConstants::pageSize; allocData.flags.useSystemMemory = true; allocData.flags.allocateMemory = true; auto allocation = memoryManager.allocateGraphicsMemoryInDevicePool(allocData, status); EXPECT_EQ(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::RetryInNonDevicePool, status); } TEST(DrmMemoryManagerSimpleTest, givenDrmMemoryManagerWhenLockResourceIsCalledOnNullBufferObjectThenReturnNullPtr) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); executionEnvironment.rootDeviceEnvironments[0]->osInterface = std::make_unique(); auto drm = Drm::create(nullptr, *executionEnvironment.rootDeviceEnvironments[0]); executionEnvironment.rootDeviceEnvironments[0]->osInterface->setDriverModel(std::unique_ptr(drm)); executionEnvironment.rootDeviceEnvironments[0]->memoryOperationsInterface = DrmMemoryOperationsHandler::create(*drm, 0u); TestedDrmMemoryManager memoryManager(executionEnvironment); DrmAllocation drmAllocation(0, AllocationType::UNKNOWN, nullptr, nullptr, 0u, 0u, MemoryPool::LocalMemory); auto ptr = memoryManager.lockResourceInLocalMemoryImpl(drmAllocation.getBO()); EXPECT_EQ(nullptr, ptr); memoryManager.unlockResourceInLocalMemoryImpl(drmAllocation.getBO()); } TEST(DrmMemoryManagerSimpleTest, givenDrmMemoryManagerWhenFreeGraphicsMemoryIsCalledOnAllocationWithNullBufferObjectThenEarlyReturn) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); executionEnvironment.rootDeviceEnvironments[0]->osInterface = std::make_unique(); auto drm = Drm::create(nullptr, *executionEnvironment.rootDeviceEnvironments[0]); executionEnvironment.rootDeviceEnvironments[0]->osInterface->setDriverModel(std::unique_ptr(drm)); executionEnvironment.rootDeviceEnvironments[0]->memoryOperationsInterface = DrmMemoryOperationsHandler::create(*drm, 0u); TestedDrmMemoryManager memoryManager(executionEnvironment); auto drmAllocation = new DrmAllocation(0, AllocationType::UNKNOWN, nullptr, nullptr, 0u, 0u, MemoryPool::LocalMemory); EXPECT_NE(nullptr, drmAllocation); memoryManager.freeGraphicsMemoryImpl(drmAllocation); } TEST(DrmMemoryManagerSimpleTest, WhenDrmIsCreatedThenQueryPageFaultSupportIsCalled) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); executionEnvironment.rootDeviceEnvironments[0]->osInterface = std::make_unique(); auto drm = std::unique_ptr(Drm::create(nullptr, *executionEnvironment.rootDeviceEnvironments[0])); EXPECT_TRUE(static_cast(drm.get())->queryPageFaultSupportCalled); } using DrmMemoryManagerWithLocalMemoryTest = Test; TEST_F(DrmMemoryManagerWithLocalMemoryTest, givenDrmMemoryManagerWithLocalMemoryWhenLockResourceIsCalledOnAllocationInLocalMemoryThenReturnNullPtr) { DrmAllocation drmAllocation(rootDeviceIndex, AllocationType::UNKNOWN, nullptr, nullptr, 0u, 0u, MemoryPool::LocalMemory); auto ptr = memoryManager->lockResource(&drmAllocation); EXPECT_EQ(nullptr, ptr); memoryManager->unlockResource(&drmAllocation); } using DrmMemoryManagerTest = Test; TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenCopyMemoryToAllocationThenAllocationIsFilledWithCorrectData) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; std::vector dataToCopy(MemoryConstants::pageSize, 1u); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties({rootDeviceIndex, dataToCopy.size(), AllocationType::BUFFER, device->getDeviceBitfield()}); ASSERT_NE(nullptr, allocation); auto ret = memoryManager->copyMemoryToAllocation(allocation, 0, dataToCopy.data(), dataToCopy.size()); EXPECT_TRUE(ret); EXPECT_EQ(0, memcmp(allocation->getUnderlyingBuffer(), dataToCopy.data(), dataToCopy.size())); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenFreeingImportedMemoryThenCloseSharedHandleIsNotCalled) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; std::vector dataToCopy(MemoryConstants::pageSize, 1u); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties({rootDeviceIndex, dataToCopy.size(), AllocationType::BUFFER, device->getDeviceBitfield()}); ASSERT_NE(nullptr, allocation); memoryManager->freeGraphicsMemory(allocation, true); EXPECT_EQ(memoryManager->callsToCloseSharedHandle, 0u); } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenFreeingNonImportedMemoryThenCloseSharedHandleIsCalled) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; std::vector dataToCopy(MemoryConstants::pageSize, 1u); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties({rootDeviceIndex, dataToCopy.size(), AllocationType::BUFFER, device->getDeviceBitfield()}); ASSERT_NE(nullptr, allocation); memoryManager->freeGraphicsMemory(allocation); EXPECT_EQ(memoryManager->callsToCloseSharedHandle, 1u); } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenGetLocalMemoryIsCalledThenSizeOfLocalMemoryIsReturned) { EXPECT_EQ(0 * GB, memoryManager->getLocalMemorySize(rootDeviceIndex, 0xF)); } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenSetMemAdviseIsCalledThenUpdateCachePolicyInBufferObject) { TestedDrmMemoryManager memoryManager(false, false, false, *executionEnvironment); BufferObject bo(mock, 1, 1024, 0); DrmAllocation drmAllocation(0, AllocationType::UNIFIED_SHARED_MEMORY, &bo, nullptr, 0u, 0u, MemoryPool::LocalMemory); EXPECT_EQ(&bo, drmAllocation.getBO()); for (auto isCached : {false, true}) { MemAdviseFlags flags{}; flags.cached_memory = isCached; EXPECT_TRUE(memoryManager.setMemAdvise(&drmAllocation, flags, rootDeviceIndex)); EXPECT_EQ(isCached ? CachePolicy::WriteBack : CachePolicy::Uncached, bo.peekCachePolicy()); } } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenSetMemPrefetchIsCalledThenReturnTrue) { TestedDrmMemoryManager memoryManager(false, false, false, *executionEnvironment); BufferObject bo(mock, 1, 1024, 0); DrmAllocation drmAllocation(0, AllocationType::UNIFIED_SHARED_MEMORY, &bo, nullptr, 0u, 0u, MemoryPool::LocalMemory); EXPECT_EQ(&bo, drmAllocation.getBO()); EXPECT_TRUE(memoryManager.setMemPrefetch(&drmAllocation, 0, rootDeviceIndex)); } TEST_F(DrmMemoryManagerTest, givenPageFaultIsUnSupportedWhenCallingBindBoOnBufferAllocationThenAllocationShouldNotPageFaultAndExplicitResidencyIsNotRequired) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(NEO::defaultHwInfo.get()); executionEnvironment->initializeMemoryManager(); DrmMock drm(*executionEnvironment->rootDeviceEnvironments[0]); EXPECT_FALSE(drm.pageFaultSupported); OsContextLinux osContext(drm, 0u, EngineDescriptorHelper::getDefaultDescriptor()); osContext.ensureContextInitialized(); uint32_t vmHandleId = 0; MockBufferObject bo(&drm, 0, 0, 1); MockDrmAllocation allocation(AllocationType::BUFFER, MemoryPool::LocalMemory); allocation.bufferObjects[0] = &bo; std::vector bufferObjects; allocation.bindBO(&bo, &osContext, vmHandleId, &bufferObjects, true); EXPECT_FALSE(allocation.shouldAllocationPageFault(&drm)); EXPECT_FALSE(bo.isExplicitResidencyRequired()); } TEST_F(DrmMemoryManagerTest, givenPageFaultIsSupportedWhenCallingBindBoOnAllocationThatShouldPageFaultThenExplicitResidencyIsNotRequired) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(NEO::defaultHwInfo.get()); executionEnvironment->initializeMemoryManager(); DrmMock drm(*executionEnvironment->rootDeviceEnvironments[0]); drm.pageFaultSupported = true; OsContextLinux osContext(drm, 0u, EngineDescriptorHelper::getDefaultDescriptor()); osContext.ensureContextInitialized(); uint32_t vmHandleId = 0; struct MockDrmAllocationToTestPageFault : MockDrmAllocation { MockDrmAllocationToTestPageFault() : MockDrmAllocation(AllocationType::BUFFER, MemoryPool::LocalMemory){}; bool shouldAllocationPageFault(const Drm *drm) override { return shouldPageFault; } bool shouldPageFault = false; }; for (auto shouldAllocationPageFault : {false, true}) { MockBufferObject bo(&drm, 0, 0, 1); MockDrmAllocationToTestPageFault allocation; allocation.bufferObjects[0] = &bo; allocation.shouldPageFault = shouldAllocationPageFault; std::vector bufferObjects; allocation.bindBO(&bo, &osContext, vmHandleId, &bufferObjects, true); EXPECT_EQ(shouldAllocationPageFault, allocation.shouldAllocationPageFault(&drm)); EXPECT_EQ(!shouldAllocationPageFault, bo.isExplicitResidencyRequired()); } } TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, givenDrmMemoryManagerWhenCreateBufferObjectInMemoryRegionIsCalledWithoutMemoryInfoThenNullBufferObjectIsReturned) { mock->memoryInfo.reset(nullptr); auto gpuAddress = 0x1234u; auto size = MemoryConstants::pageSize; auto bo = std::unique_ptr(memoryManager->createBufferObjectInMemoryRegion(&memoryManager->getDrm(0), gpuAddress, size, MemoryBanks::MainBank, 1)); EXPECT_EQ(nullptr, bo); } TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, givenDrmMemoryManagerWhenCreateBufferObjectInMemoryRegionIsCalledWithZeroSizeThenNullBufferObjectIsReturned) { auto gpuAddress = 0x1234u; auto size = 0u; auto bo = std::unique_ptr(memoryManager->createBufferObjectInMemoryRegion(&memoryManager->getDrm(0), gpuAddress, size, MemoryBanks::MainBank, 1)); EXPECT_EQ(nullptr, bo); } TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, givenUseSystemMemoryFlagWhenGraphicsAllocationInDevicePoolIsAllocatedThenNullptrIsReturned) { MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success; AllocationData allocData; allocData.allFlags = 0; allocData.size = MemoryConstants::pageSize; allocData.flags.useSystemMemory = true; allocData.type = AllocationType::BUFFER; allocData.rootDeviceIndex = rootDeviceIndex; auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocData, status); EXPECT_EQ(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::RetryInNonDevicePool, status); } TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, givenSvmGpuAllocationWhenHostPtrProvidedThenUseHostPtrAsGpuVa) { size_t size = 2 * MemoryConstants::megaByte; AllocationProperties properties{rootDeviceIndex, false, size, AllocationType::SVM_GPU, false, mockDeviceBitfield}; properties.alignment = size; void *svmPtr = reinterpret_cast(2 * size); auto allocation = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(properties, svmPtr)); ASSERT_NE(nullptr, allocation); EXPECT_EQ(size, allocation->getUnderlyingBufferSize()); EXPECT_EQ(nullptr, allocation->getUnderlyingBuffer()); EXPECT_EQ(nullptr, allocation->getDriverAllocatedCpuPtr()); EXPECT_EQ(svmPtr, reinterpret_cast(allocation->getGpuAddress())); EXPECT_EQ(0u, allocation->getReservedAddressSize()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, givenAllowed32BitAndForce32BitWhenGraphicsAllocationInDevicePoolIsAllocatedThenNullptrIsReturned) { memoryManager->setForce32BitAllocations(true); MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success; AllocationData allocData; allocData.allFlags = 0; allocData.size = MemoryConstants::pageSize; allocData.flags.allow32Bit = true; allocData.flags.allocateMemory = true; allocData.type = AllocationType::BUFFER; allocData.rootDeviceIndex = rootDeviceIndex; auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocData, status); EXPECT_EQ(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::RetryInNonDevicePool, status); } TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, givenAllowed32BitWhen32BitIsNotForcedThenGraphicsAllocationInDevicePoolReturnsLocalMemoryAllocation) { memoryManager->setForce32BitAllocations(false); MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success; AllocationData allocData; allocData.allFlags = 0; allocData.size = MemoryConstants::pageSize; allocData.flags.allow32Bit = true; allocData.flags.allocateMemory = true; allocData.type = AllocationType::BUFFER; allocData.rootDeviceIndex = rootDeviceIndex; auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocData, status); ASSERT_NE(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::Success, status); EXPECT_EQ(MemoryPool::LocalMemory, allocation->getMemoryPool()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, givenAllocationWithKernelIsaWhenAllocatingInDevicePoolOnAllMemoryBanksThenCreateFourBufferObjectsWithSameGpuVirtualAddressAndSize) { MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success; AllocationData allocData; allocData.allFlags = 0; allocData.size = 3 * MemoryConstants::pageSize64k; allocData.flags.allocateMemory = true; allocData.storageInfo.memoryBanks = maxNBitValue(MemoryBanks::getBankForLocalMemory(3)); allocData.storageInfo.multiStorage = false; allocData.rootDeviceIndex = rootDeviceIndex; AllocationType isaTypes[] = {AllocationType::KERNEL_ISA, AllocationType::KERNEL_ISA_INTERNAL}; for (uint32_t i = 0; i < 2; i++) { allocData.type = isaTypes[i]; auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocData, status); ASSERT_NE(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::Success, status); EXPECT_EQ(MemoryPool::LocalMemory, allocation->getMemoryPool()); EXPECT_NE(0u, allocation->getGpuAddress()); EXPECT_EQ(EngineLimits::maxHandleCount, allocation->getNumGmms()); auto drmAllocation = static_cast(allocation); auto &bos = drmAllocation->getBOs(); auto boAddress = drmAllocation->getGpuAddress(); for (auto handleId = 0u; handleId < EngineLimits::maxHandleCount; handleId++) { auto bo = bos[handleId]; ASSERT_NE(nullptr, bo); auto boSize = allocation->getGmm(handleId)->gmmResourceInfo->getSizeAllocation(); EXPECT_EQ(boAddress, bo->peekAddress()); EXPECT_EQ(boSize, bo->peekSize()); EXPECT_EQ(boSize, 3 * MemoryConstants::pageSize64k); } memoryManager->freeGraphicsMemory(allocation); } } TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, givenAllocationWithLargeBufferWhenAllocatingInDevicePoolOnAllMemoryBanksThenCreateFourBufferObjectsWithDifferentGpuVirtualAddressesAndPartialSizes) { MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success; AllocationData allocData; allocData.allFlags = 0; allocData.size = 18 * MemoryConstants::pageSize64k; allocData.flags.allocateMemory = true; allocData.type = AllocationType::BUFFER; allocData.storageInfo.memoryBanks = maxNBitValue(MemoryBanks::getBankForLocalMemory(3)); allocData.storageInfo.multiStorage = true; allocData.rootDeviceIndex = rootDeviceIndex; auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocData, status); ASSERT_NE(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::Success, status); EXPECT_EQ(MemoryPool::LocalMemory, allocation->getMemoryPool()); EXPECT_NE(0u, allocation->getGpuAddress()); EXPECT_EQ(EngineLimits::maxHandleCount, allocation->getNumGmms()); auto drmAllocation = static_cast(allocation); auto &bos = drmAllocation->getBOs(); auto boAddress = drmAllocation->getGpuAddress(); for (auto handleId = 0u; handleId < EngineLimits::maxHandleCount; handleId++) { auto bo = bos[handleId]; ASSERT_NE(nullptr, bo); auto boSize = allocation->getGmm(handleId)->gmmResourceInfo->getSizeAllocation(); EXPECT_EQ(boAddress, bo->peekAddress()); EXPECT_EQ(boSize, bo->peekSize()); EXPECT_EQ(boSize, handleId == 0 || handleId == 1 ? 5 * MemoryConstants::pageSize64k : 4 * MemoryConstants::pageSize64k); boAddress += boSize; } memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, givenAllocationWithKernelIsaWhenAllocationInDevicePoolAndDeviceBitfieldWithHolesThenCorrectAllocationCreated) { MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success; AllocationData allocData; allocData.allFlags = 0; allocData.size = MemoryConstants::pageSize; allocData.flags.allocateMemory = true; allocData.type = AllocationType::KERNEL_ISA; allocData.storageInfo.memoryBanks = 0b1011; allocData.storageInfo.multiStorage = false; allocData.rootDeviceIndex = rootDeviceIndex; auto kernelIsaAllocation = static_cast(memoryManager->allocateGraphicsMemoryInDevicePool(allocData, status)); EXPECT_NE(nullptr, kernelIsaAllocation); auto gpuAddress = kernelIsaAllocation->getGpuAddress(); auto &bos = kernelIsaAllocation->getBOs(); EXPECT_NE(nullptr, bos[0]); EXPECT_EQ(gpuAddress, bos[0]->peekAddress()); EXPECT_NE(nullptr, bos[1]); EXPECT_EQ(gpuAddress, bos[1]->peekAddress()); EXPECT_EQ(nullptr, bos[2]); EXPECT_NE(nullptr, bos[3]); EXPECT_EQ(gpuAddress, bos[3]->peekAddress()); auto &storageInfo = kernelIsaAllocation->storageInfo; EXPECT_EQ(0b1011u, storageInfo.memoryBanks.to_ulong()); memoryManager->freeGraphicsMemory(kernelIsaAllocation); } struct DrmMemoryManagerLocalMemoryAlignmentTest : DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest { std::unique_ptr createMemoryManager() { return std::make_unique(true, false, false, *executionEnvironment); } bool isAllocationWithinHeap(MemoryManager &memoryManager, const GraphicsAllocation &allocation, HeapIndex heap) { const auto allocationStart = allocation.getGpuAddress(); const auto allocationEnd = allocationStart + allocation.getUnderlyingBufferSize(); const auto heapStart = GmmHelper::canonize(memoryManager.getGfxPartition(rootDeviceIndex)->getHeapBase(heap)); const auto heapEnd = GmmHelper::canonize(memoryManager.getGfxPartition(rootDeviceIndex)->getHeapLimit(heap)); return heapStart <= allocationStart && allocationEnd <= heapEnd; } const uint32_t rootDeviceIndex = 1u; DebugManagerStateRestore restore{}; }; TEST_F(DrmMemoryManagerLocalMemoryAlignmentTest, given2MbAlignmentAllowedWhenAllocatingAllocationLessThen2MbThenUse64kbHeap) { AllocationData allocationData; allocationData.allFlags = 0; allocationData.size = MemoryConstants::pageSize; allocationData.flags.allocateMemory = true; allocationData.rootDeviceIndex = rootDeviceIndex; allocationData.type = AllocationType::BUFFER; allocationData.flags.resource48Bit = true; MemoryManager::AllocationStatus allocationStatus; { DebugManager.flags.AlignLocalMemoryVaTo2MB.set(-1); auto memoryManager = createMemoryManager(); auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocationData, allocationStatus); ASSERT_NE(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::Success, allocationStatus); EXPECT_TRUE(isAllocationWithinHeap(*memoryManager, *allocation, HeapIndex::HEAP_STANDARD64KB)); memoryManager->freeGraphicsMemory(allocation); } { DebugManager.flags.AlignLocalMemoryVaTo2MB.set(0); auto memoryManager = createMemoryManager(); auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocationData, allocationStatus); ASSERT_NE(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::Success, allocationStatus); EXPECT_TRUE(isAllocationWithinHeap(*memoryManager, *allocation, HeapIndex::HEAP_STANDARD64KB)); memoryManager->freeGraphicsMemory(allocation); } { DebugManager.flags.AlignLocalMemoryVaTo2MB.set(1); auto memoryManager = createMemoryManager(); auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocationData, allocationStatus); ASSERT_NE(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::Success, allocationStatus); EXPECT_TRUE(isAllocationWithinHeap(*memoryManager, *allocation, HeapIndex::HEAP_STANDARD64KB)); memoryManager->freeGraphicsMemory(allocation); } } TEST_F(DrmMemoryManagerLocalMemoryAlignmentTest, given2MbAlignmentAllowedWhenAllocatingAllocationBiggerThan2MbThenUse2MbHeap) { AllocationData allocationData; allocationData.allFlags = 0; allocationData.size = 2 * MemoryConstants::megaByte; allocationData.flags.allocateMemory = true; allocationData.rootDeviceIndex = rootDeviceIndex; allocationData.type = AllocationType::BUFFER; allocationData.flags.resource48Bit = true; MemoryManager::AllocationStatus allocationStatus; { DebugManager.flags.AlignLocalMemoryVaTo2MB.set(-1); auto memoryManager = createMemoryManager(); auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocationData, allocationStatus); ASSERT_NE(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::Success, allocationStatus); EXPECT_TRUE(isAllocationWithinHeap(*memoryManager, *allocation, HeapIndex::HEAP_STANDARD2MB)); memoryManager->freeGraphicsMemory(allocation); } { DebugManager.flags.AlignLocalMemoryVaTo2MB.set(0); auto memoryManager = createMemoryManager(); auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocationData, allocationStatus); ASSERT_NE(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::Success, allocationStatus); EXPECT_TRUE(isAllocationWithinHeap(*memoryManager, *allocation, HeapIndex::HEAP_STANDARD64KB)); memoryManager->freeGraphicsMemory(allocation); } { DebugManager.flags.AlignLocalMemoryVaTo2MB.set(1); auto memoryManager = createMemoryManager(); auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocationData, allocationStatus); ASSERT_NE(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::Success, allocationStatus); EXPECT_TRUE(isAllocationWithinHeap(*memoryManager, *allocation, HeapIndex::HEAP_STANDARD2MB)); EXPECT_TRUE(isAligned(allocation->getGpuAddress(), MemoryConstants::pageSize2Mb)); memoryManager->freeGraphicsMemory(allocation); } } TEST_F(DrmMemoryManagerLocalMemoryAlignmentTest, givenExtendedHeapPreferredAnd2MbAlignmentAllowedWhenAllocatingAllocationBiggerThenUseExtendedHeap) { if (memoryManager->getGfxPartition(0)->getHeapLimit(HeapIndex::HEAP_EXTENDED) == 0) { GTEST_SKIP(); } AllocationData allocationData; allocationData.allFlags = 0; allocationData.size = 2 * MemoryConstants::megaByte; allocationData.flags.allocateMemory = true; allocationData.rootDeviceIndex = rootDeviceIndex; allocationData.type = AllocationType::BUFFER; allocationData.flags.resource48Bit = false; MemoryManager::AllocationStatus allocationStatus; { DebugManager.flags.AlignLocalMemoryVaTo2MB.set(-1); auto memoryManager = createMemoryManager(); auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocationData, allocationStatus); ASSERT_NE(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::Success, allocationStatus); EXPECT_TRUE(isAligned(allocation->getGpuAddress(), MemoryConstants::pageSize2Mb)); EXPECT_TRUE(isAllocationWithinHeap(*memoryManager, *allocation, HeapIndex::HEAP_EXTENDED)); memoryManager->freeGraphicsMemory(allocation); } { DebugManager.flags.AlignLocalMemoryVaTo2MB.set(0); auto memoryManager = createMemoryManager(); auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocationData, allocationStatus); ASSERT_NE(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::Success, allocationStatus); EXPECT_TRUE(isAllocationWithinHeap(*memoryManager, *allocation, HeapIndex::HEAP_EXTENDED)); memoryManager->freeGraphicsMemory(allocation); } { DebugManager.flags.AlignLocalMemoryVaTo2MB.set(1); auto memoryManager = createMemoryManager(); auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocationData, allocationStatus); ASSERT_NE(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::Success, allocationStatus); EXPECT_TRUE(isAllocationWithinHeap(*memoryManager, *allocation, HeapIndex::HEAP_EXTENDED)); EXPECT_TRUE(isAligned(allocation->getGpuAddress(), MemoryConstants::pageSize2Mb)); memoryManager->freeGraphicsMemory(allocation); } } TEST_F(DrmMemoryManagerLocalMemoryAlignmentTest, givenCustomAlignmentWhenAllocatingAllocationBiggerThanTheAlignmentThenAlignProperly) { AllocationData allocationData; allocationData.allFlags = 0; allocationData.flags.allocateMemory = true; allocationData.rootDeviceIndex = rootDeviceIndex; allocationData.type = AllocationType::BUFFER; allocationData.flags.resource48Bit = true; MemoryManager::AllocationStatus allocationStatus; { // size==2MB, use 2MB heap DebugManager.flags.ExperimentalEnableCustomLocalMemoryAlignment.set(2 * MemoryConstants::megaByte); allocationData.size = 2 * MemoryConstants::megaByte; auto memoryManager = createMemoryManager(); auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocationData, allocationStatus); ASSERT_NE(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::Success, allocationStatus); EXPECT_TRUE(isAllocationWithinHeap(*memoryManager, *allocation, HeapIndex::HEAP_STANDARD2MB)); EXPECT_TRUE(isAligned(allocation->getGpuAddress(), 2 * MemoryConstants::megaByte)); memoryManager->freeGraphicsMemory(allocation); } { // size > 2MB, use 2MB heap DebugManager.flags.ExperimentalEnableCustomLocalMemoryAlignment.set(16 * MemoryConstants::megaByte); allocationData.size = 16 * MemoryConstants::megaByte; auto memoryManager = createMemoryManager(); auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocationData, allocationStatus); ASSERT_NE(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::Success, allocationStatus); EXPECT_TRUE(isAllocationWithinHeap(*memoryManager, *allocation, HeapIndex::HEAP_STANDARD2MB)); EXPECT_TRUE(isAligned(allocation->getGpuAddress(), 16 * MemoryConstants::megaByte)); memoryManager->freeGraphicsMemory(allocation); } { // size < 2MB, use 64KB heap DebugManager.flags.ExperimentalEnableCustomLocalMemoryAlignment.set(8 * MemoryConstants::pageSize64k); allocationData.size = 8 * MemoryConstants::pageSize64k; auto memoryManager = createMemoryManager(); auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocationData, allocationStatus); ASSERT_NE(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::Success, allocationStatus); EXPECT_TRUE(isAllocationWithinHeap(*memoryManager, *allocation, HeapIndex::HEAP_STANDARD64KB)); EXPECT_TRUE(isAligned(allocation->getGpuAddress(), 8 * MemoryConstants::pageSize64k)); memoryManager->freeGraphicsMemory(allocation); } } TEST_F(DrmMemoryManagerLocalMemoryAlignmentTest, givenCustomAlignmentWhenAllocatingAllocationLessThanTheAlignmentThenIgnoreCustomAlignment) { AllocationData allocationData; allocationData.allFlags = 0; allocationData.size = 3 * MemoryConstants::megaByte; allocationData.flags.allocateMemory = true; allocationData.rootDeviceIndex = rootDeviceIndex; allocationData.type = AllocationType::BUFFER; allocationData.flags.resource48Bit = true; MemoryManager::AllocationStatus allocationStatus; { // Too small allocation, fallback to 2MB heap DebugManager.flags.AlignLocalMemoryVaTo2MB.set(0); DebugManager.flags.ExperimentalEnableCustomLocalMemoryAlignment.set(32 * MemoryConstants::megaByte); auto memoryManager = createMemoryManager(); auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocationData, allocationStatus); ASSERT_NE(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::Success, allocationStatus); EXPECT_TRUE(isAllocationWithinHeap(*memoryManager, *allocation, HeapIndex::HEAP_STANDARD2MB)); memoryManager->freeGraphicsMemory(allocation); } { // Too small allocation, fallback to 2MB heap DebugManager.flags.AlignLocalMemoryVaTo2MB.set(1); DebugManager.flags.ExperimentalEnableCustomLocalMemoryAlignment.set(32 * MemoryConstants::megaByte); auto memoryManager = createMemoryManager(); auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocationData, allocationStatus); ASSERT_NE(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::Success, allocationStatus); EXPECT_TRUE(isAllocationWithinHeap(*memoryManager, *allocation, HeapIndex::HEAP_STANDARD2MB)); memoryManager->freeGraphicsMemory(allocation); } } TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, givenNotSetUseSystemMemoryWhenGraphicsAllocationInDevicePoolIsAllocatedForBufferThenLocalMemoryAllocationIsReturnedFromStandard64KbHeap) { MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success; AllocationData allocData; allocData.allFlags = 0; allocData.size = MemoryConstants::pageSize; allocData.flags.allocateMemory = true; allocData.type = AllocationType::BUFFER; allocData.rootDeviceIndex = rootDeviceIndex; auto sizeAligned = alignUp(allocData.size, MemoryConstants::pageSize64k); auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocData, status); EXPECT_NE(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::Success, status); EXPECT_EQ(MemoryPool::LocalMemory, allocation->getMemoryPool()); auto gmm = allocation->getDefaultGmm(); EXPECT_NE(nullptr, gmm); EXPECT_EQ(0u, gmm->resourceParams.Flags.Info.NonLocalOnly); EXPECT_EQ(RESOURCE_BUFFER, gmm->resourceParams.Type); EXPECT_EQ(sizeAligned, gmm->resourceParams.BaseWidth64); EXPECT_NE(nullptr, gmm->gmmResourceInfo->peekHandle()); EXPECT_NE(0u, gmm->gmmResourceInfo->getHAlign()); auto gpuAddress = allocation->getGpuAddress(); EXPECT_NE(0u, gpuAddress); auto heap = HeapIndex::HEAP_STANDARD64KB; if (memoryManager->getGfxPartition(rootDeviceIndex)->getHeapLimit(HeapIndex::HEAP_EXTENDED)) { heap = HeapIndex::HEAP_EXTENDED; } EXPECT_LT(GmmHelper::canonize(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapBase(heap)), gpuAddress); EXPECT_GT(GmmHelper::canonize(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapLimit(heap)), gpuAddress); EXPECT_EQ(0u, allocation->getGpuBaseAddress()); EXPECT_EQ(sizeAligned, allocation->getUnderlyingBufferSize()); EXPECT_EQ(gpuAddress, reinterpret_cast(allocation->getReservedAddressPtr())); EXPECT_EQ(sizeAligned, allocation->getReservedAddressSize()); EXPECT_EQ(allocData.storageInfo.memoryBanks, allocation->storageInfo.memoryBanks); EXPECT_EQ(allocData.storageInfo.pageTablesVisibility, allocation->storageInfo.pageTablesVisibility); EXPECT_EQ(allocData.storageInfo.cloningOfPageTables, allocation->storageInfo.cloningOfPageTables); EXPECT_EQ(allocData.storageInfo.tileInstanced, allocation->storageInfo.tileInstanced); EXPECT_EQ(allocData.storageInfo.multiStorage, allocation->storageInfo.multiStorage); EXPECT_EQ(allocData.flags.flushL3, allocation->isFlushL3Required()); auto drmAllocation = static_cast(allocation); auto bo = drmAllocation->getBO(); EXPECT_NE(nullptr, bo); EXPECT_EQ(gpuAddress, bo->peekAddress()); EXPECT_EQ(sizeAligned, bo->peekSize()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, givenNotSetUseSystemMemoryWhenGraphicsAllocationInDevicePoolIsAllocatedForImageThenLocalMemoryAllocationIsReturnedFromStandard64KbHeap) { ImageDescriptor imgDesc = {}; imgDesc.imageType = ImageType::Image2D; imgDesc.imageWidth = 512; imgDesc.imageHeight = 512; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success; AllocationData allocData; allocData.allFlags = 0; allocData.size = MemoryConstants::pageSize; allocData.type = AllocationType::IMAGE; allocData.flags.resource48Bit = true; allocData.imgInfo = &imgInfo; allocData.rootDeviceIndex = rootDeviceIndex; auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocData, status); EXPECT_NE(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::Success, status); EXPECT_TRUE(allocData.imgInfo->useLocalMemory); EXPECT_EQ(MemoryPool::LocalMemory, allocation->getMemoryPool()); auto gmm = allocation->getDefaultGmm(); EXPECT_NE(nullptr, gmm); EXPECT_EQ(0u, gmm->resourceParams.Flags.Info.NonLocalOnly); auto gpuAddress = allocation->getGpuAddress(); auto sizeAligned = alignUp(allocData.imgInfo->size, MemoryConstants::pageSize64k); EXPECT_NE(0u, gpuAddress); EXPECT_LT(GmmHelper::canonize(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapBase(HeapIndex::HEAP_STANDARD64KB)), gpuAddress); EXPECT_GT(GmmHelper::canonize(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapLimit(HeapIndex::HEAP_STANDARD64KB)), gpuAddress); EXPECT_EQ(0u, allocation->getGpuBaseAddress()); EXPECT_EQ(sizeAligned, allocation->getUnderlyingBufferSize()); EXPECT_EQ(gpuAddress, reinterpret_cast(allocation->getReservedAddressPtr())); EXPECT_EQ(sizeAligned, allocation->getReservedAddressSize()); EXPECT_EQ(allocData.storageInfo.memoryBanks, allocation->storageInfo.memoryBanks); EXPECT_EQ(allocData.storageInfo.pageTablesVisibility, allocation->storageInfo.pageTablesVisibility); EXPECT_EQ(allocData.storageInfo.cloningOfPageTables, allocation->storageInfo.cloningOfPageTables); EXPECT_EQ(allocData.storageInfo.tileInstanced, allocation->storageInfo.tileInstanced); EXPECT_EQ(allocData.storageInfo.multiStorage, allocation->storageInfo.multiStorage); EXPECT_EQ(allocData.flags.flushL3, allocation->isFlushL3Required()); auto drmAllocation = static_cast(allocation); auto bo = drmAllocation->getBO(); EXPECT_NE(nullptr, bo); EXPECT_EQ(gpuAddress, bo->peekAddress()); EXPECT_EQ(sizeAligned, bo->peekSize()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, givenNotSetUseSystemMemoryWhenGraphicsAllocatioInDevicePoolIsAllocatednForKernelIsaThenLocalMemoryAllocationIsReturnedFromInternalHeap) { MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success; AllocationData allocData; allocData.allFlags = 0; allocData.size = MemoryConstants::pageSize; allocData.flags.allocateMemory = true; allocData.rootDeviceIndex = rootDeviceIndex; auto sizeAligned = alignUp(allocData.size, MemoryConstants::pageSize64k); AllocationType isaTypes[] = {AllocationType::KERNEL_ISA, AllocationType::KERNEL_ISA_INTERNAL}; for (uint32_t i = 0; i < 2; i++) { allocData.type = isaTypes[i]; auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocData, status); EXPECT_NE(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::Success, status); EXPECT_EQ(MemoryPool::LocalMemory, allocation->getMemoryPool()); auto gpuAddress = allocation->getGpuAddress(); EXPECT_LT(GmmHelper::canonize(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapBase(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY)), gpuAddress); EXPECT_GT(GmmHelper::canonize(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapLimit(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY)), gpuAddress); EXPECT_EQ(GmmHelper::canonize(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapBase(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY)), allocation->getGpuBaseAddress()); EXPECT_EQ(sizeAligned, allocation->getUnderlyingBufferSize()); EXPECT_EQ(gpuAddress, reinterpret_cast(allocation->getReservedAddressPtr())); EXPECT_EQ(sizeAligned, allocation->getReservedAddressSize()); auto drmAllocation = static_cast(allocation); auto bo = drmAllocation->getBO(); EXPECT_NE(nullptr, bo); EXPECT_EQ(gpuAddress, bo->peekAddress()); EXPECT_EQ(sizeAligned, bo->peekSize()); memoryManager->freeGraphicsMemory(allocation); } } TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, givenUnsupportedTypeWhenAllocatingInDevicePoolThenRetryInNonDevicePoolStatusAndNullptrIsReturned) { MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success; AllocationData allocData; allocData.allFlags = 0; allocData.size = MemoryConstants::pageSize; allocData.flags.allocateMemory = true; allocData.rootDeviceIndex = rootDeviceIndex; AllocationType unsupportedTypes[] = {AllocationType::SHARED_RESOURCE_COPY}; for (auto unsupportedType : unsupportedTypes) { allocData.type = unsupportedType; auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocData, status); ASSERT_EQ(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::RetryInNonDevicePool, status); memoryManager->freeGraphicsMemory(allocation); } } TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, givenOversizedAllocationWhenGraphicsAllocationInDevicePoolIsAllocatedThenAllocationAndBufferObjectHaveRequestedSize) { auto heap = HeapIndex::HEAP_STANDARD64KB; if (memoryManager->getGfxPartition(rootDeviceIndex)->getHeapLimit(HeapIndex::HEAP_EXTENDED)) { heap = HeapIndex::HEAP_EXTENDED; } auto largerSize = 6 * MemoryConstants::megaByte; auto gpuAddress0 = memoryManager->getGfxPartition(rootDeviceIndex)->heapAllocateWithCustomAlignment(heap, largerSize, MemoryConstants::pageSize2Mb); EXPECT_NE(0u, gpuAddress0); EXPECT_EQ(6 * MemoryConstants::megaByte, largerSize); auto gpuAddress1 = memoryManager->getGfxPartition(rootDeviceIndex)->heapAllocate(heap, largerSize); EXPECT_NE(0u, gpuAddress1); EXPECT_EQ(6 * MemoryConstants::megaByte, largerSize); auto gpuAddress2 = memoryManager->getGfxPartition(rootDeviceIndex)->heapAllocate(heap, largerSize); EXPECT_NE(0u, gpuAddress2); EXPECT_EQ(6 * MemoryConstants::megaByte, largerSize); memoryManager->getGfxPartition(rootDeviceIndex)->heapFree(heap, gpuAddress1, largerSize); auto status = MemoryManager::AllocationStatus::Error; AllocationData allocData; allocData.size = 5 * MemoryConstants::megaByte; allocData.type = AllocationType::BUFFER; allocData.rootDeviceIndex = rootDeviceIndex; auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocData, status); memoryManager->getGfxPartition(rootDeviceIndex)->heapFree(heap, gpuAddress2, largerSize); EXPECT_EQ(MemoryManager::AllocationStatus::Success, status); ASSERT_NE(nullptr, allocation); EXPECT_EQ(largerSize, allocation->getReservedAddressSize()); EXPECT_EQ(allocData.size, allocation->getUnderlyingBufferSize()); EXPECT_EQ(allocData.size, static_cast(allocation)->getBO()->peekSize()); memoryManager->freeGraphicsMemory(allocation); } struct DrmMemoryManagerToTestCopyMemoryToAllocationBanks : public DrmMemoryManager { DrmMemoryManagerToTestCopyMemoryToAllocationBanks(ExecutionEnvironment &executionEnvironment, size_t lockableLocalMemorySize) : DrmMemoryManager(gemCloseWorkerMode::gemCloseWorkerInactive, false, false, executionEnvironment) { lockedLocalMemorySize = lockableLocalMemorySize; } void *lockResourceInLocalMemoryImpl(BufferObject *bo) override { if (lockedLocalMemorySize > 0) { if (static_cast(bo->peekHandle()) < lockedLocalMemory.size()) { lockedLocalMemory[bo->peekHandle()].reset(new uint8_t[lockedLocalMemorySize]); return lockedLocalMemory[bo->peekHandle()].get(); } } return nullptr; } void unlockResourceInLocalMemoryImpl(BufferObject *bo) override { } std::array, 4> lockedLocalMemory; size_t lockedLocalMemorySize = 0; }; TEST(DrmMemoryManagerCopyMemoryToAllocationBanksTest, givenDrmMemoryManagerWhenCopyMemoryToAllocationOnSpecificMemoryBanksThenAllocationIsFilledWithCorrectDataOnSpecificBanks) { uint8_t sourceData[64]{}; size_t offset = 3; size_t sourceAllocationSize = sizeof(sourceData); size_t destinationAllocationSize = sourceAllocationSize + offset; MockExecutionEnvironment executionEnvironment; auto drm = new DrmMock(mockFd, *executionEnvironment.rootDeviceEnvironments[0]); executionEnvironment.rootDeviceEnvironments[0]->osInterface.reset(new OSInterface()); executionEnvironment.rootDeviceEnvironments[0]->osInterface->setDriverModel(std::unique_ptr(drm)); DrmMemoryManagerToTestCopyMemoryToAllocationBanks drmMemoryManger(executionEnvironment, destinationAllocationSize); std::vector dataToCopy(sourceAllocationSize, 1u); MockDrmAllocation mockAllocation(AllocationType::WORK_PARTITION_SURFACE, MemoryPool::LocalMemory); mockAllocation.storageInfo.memoryBanks = 0b1111; DeviceBitfield memoryBanksToCopy = 0b1010; mockAllocation.bufferObjects.clear(); for (auto index = 0u; index < 4; index++) { drmMemoryManger.lockedLocalMemory[index].reset(); mockAllocation.bufferObjects.push_back(new BufferObject(drm, index, sourceAllocationSize, 3)); } auto ret = drmMemoryManger.copyMemoryToAllocationBanks(&mockAllocation, offset, dataToCopy.data(), dataToCopy.size(), memoryBanksToCopy); EXPECT_TRUE(ret); EXPECT_EQ(nullptr, drmMemoryManger.lockedLocalMemory[0].get()); ASSERT_NE(nullptr, drmMemoryManger.lockedLocalMemory[1].get()); EXPECT_EQ(nullptr, drmMemoryManger.lockedLocalMemory[2].get()); ASSERT_NE(nullptr, drmMemoryManger.lockedLocalMemory[3].get()); EXPECT_EQ(0, memcmp(ptrOffset(drmMemoryManger.lockedLocalMemory[1].get(), offset), dataToCopy.data(), dataToCopy.size())); EXPECT_EQ(0, memcmp(ptrOffset(drmMemoryManger.lockedLocalMemory[3].get(), offset), dataToCopy.data(), dataToCopy.size())); for (auto index = 0u; index < 4; index++) { delete mockAllocation.bufferObjects[index]; } } TEST_F(DrmMemoryManagerWithLocalMemoryTest, givenDrmWhenRetrieveMmapOffsetForBufferObjectSucceedsThenReturnTrueAndCorrectOffset) { mock->ioctl_expected.gemMmapOffset = 1; BufferObject bo(mock, 1, 1024, 0); mock->mmapOffsetExpected = 21; uint64_t offset = 0; auto ret = memoryManager->retrieveMmapOffsetForBufferObject(rootDeviceIndex, bo, 0, offset); EXPECT_TRUE(ret); EXPECT_EQ(21u, offset); } TEST_F(DrmMemoryManagerWithLocalMemoryTest, givenDrmWhenRetrieveMmapOffsetForBufferObjectFailsThenReturnFalse) { mock->ioctl_expected.gemMmapOffset = 2; BufferObject bo(mock, 1, 1024, 0); mock->failOnMmapOffset = true; uint64_t offset = 0; auto ret = memoryManager->retrieveMmapOffsetForBufferObject(rootDeviceIndex, bo, 0, offset); EXPECT_FALSE(ret); } TEST_F(DrmMemoryManagerWithLocalMemoryTest, givenDrmWhenRetrieveMmapOffsetForBufferObjectIsCalledForLocalMemoryThenApplyCorrectFlags) { mock->ioctl_expected.gemMmapOffset = 5; BufferObject bo(mock, 1, 1024, 0); uint64_t offset = 0; auto ret = memoryManager->retrieveMmapOffsetForBufferObject(rootDeviceIndex, bo, 0, offset); EXPECT_TRUE(ret); EXPECT_EQ(4u, mock->mmapOffsetFlags); mock->failOnMmapOffset = true; for (uint64_t flags : {I915_MMAP_OFFSET_WC, I915_MMAP_OFFSET_WB}) { ret = memoryManager->retrieveMmapOffsetForBufferObject(rootDeviceIndex, bo, flags, offset); EXPECT_FALSE(ret); EXPECT_EQ(flags, mock->mmapOffsetFlags); } } TEST_F(DrmMemoryManagerTest, givenDrmWhenRetrieveMmapOffsetForBufferObjectIsCalledForSystemMemoryThenApplyCorrectFlags) { mock->ioctl_expected.gemMmapOffset = 4; BufferObject bo(mock, 1, 1024, 0); uint64_t offset = 0; bool ret = false; for (uint64_t flags : {I915_MMAP_OFFSET_WC, I915_MMAP_OFFSET_WB}) { ret = memoryManager->retrieveMmapOffsetForBufferObject(rootDeviceIndex, bo, flags, offset); EXPECT_TRUE(ret); EXPECT_EQ(flags, mock->mmapOffsetFlags); } mock->failOnMmapOffset = true; for (uint64_t flags : {I915_MMAP_OFFSET_WC, I915_MMAP_OFFSET_WB}) { ret = memoryManager->retrieveMmapOffsetForBufferObject(rootDeviceIndex, bo, flags, offset); EXPECT_FALSE(ret); EXPECT_EQ(flags, mock->mmapOffsetFlags); } } TEST_F(DrmMemoryManagerTest, whenCallPaddedAllocationWithoutMmapPtrThenOnlyUserptrCalled) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemClose = 1; void *cpuPtr = (void *)0x30000; size_t size = 0x1000; DrmAllocation gfxAllocation(rootDeviceIndex, AllocationType::UNKNOWN, nullptr, cpuPtr, size, (osHandle)1u, MemoryPool::MemoryNull); auto gfxPaddedAllocation = memoryManager->createPaddedAllocation(&gfxAllocation, size); ASSERT_NE(nullptr, gfxPaddedAllocation); memoryManager->freeGraphicsMemoryImpl(gfxPaddedAllocation); } TEST_F(DrmMemoryManagerTest, whenCallPaddedAllocationWithMmapPtrThenMmapCalled) { mock->ioctl_expected.gemMmap = 1; mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemClose = 1; BufferObject bo(mock, 1, 1024, 0); void *cpuPtr = (void *)0x30000; size_t size = 0x1000; DrmAllocation gfxAllocation(rootDeviceIndex, AllocationType::UNKNOWN, &bo, cpuPtr, size, (osHandle)1u, MemoryPool::MemoryNull); gfxAllocation.setMmapPtr(cpuPtr); gfxAllocation.setMmapSize(size); auto gfxPaddedAllocation = memoryManager->createPaddedAllocation(&gfxAllocation, size); ASSERT_NE(nullptr, gfxPaddedAllocation); EXPECT_TRUE(gfxAllocation.isLocked()); memoryManager->freeGraphicsMemoryImpl(gfxPaddedAllocation); } TEST_F(DrmMemoryManagerTest, whenCallPaddedAllocationWithMmapPtrAndFailedMmapCalledThenReturnNullptr) { mock->ioctl_expected.gemMmap = 1; mock->ioctl_res = -1; BufferObject bo(mock, 1, 1024, 0); void *cpuPtr = (void *)0x30000; size_t size = 0x1000; DrmAllocation gfxAllocation(rootDeviceIndex, AllocationType::UNKNOWN, &bo, cpuPtr, size, (osHandle)1u, MemoryPool::MemoryNull); gfxAllocation.setMmapPtr(cpuPtr); gfxAllocation.setMmapSize(size); auto gfxPaddedAllocation = memoryManager->createPaddedAllocation(&gfxAllocation, size); ASSERT_EQ(nullptr, gfxPaddedAllocation); mock->ioctl_res = 0; } TEST(DistanceInfoTest, givenDistanceInfosWhenAssignRegionsFromDistancesThenCorrectRegionsSet) { std::vector memRegions(4); memRegions[0] = {{I915_MEMORY_CLASS_SYSTEM, 0}, 1024, 0}; memRegions[1] = {{I915_MEMORY_CLASS_DEVICE, 0}, 1024, 0}; memRegions[2] = {{I915_MEMORY_CLASS_DEVICE, 1}, 1024, 0}; memRegions[3] = {{I915_MEMORY_CLASS_DEVICE, 2}, 1024, 0}; auto memoryInfo = std::make_unique(memRegions); std::vector engines(3); engines[0] = {I915_ENGINE_CLASS_RENDER, 0}; engines[1] = {I915_ENGINE_CLASS_COPY, 0}; engines[2] = {I915_ENGINE_CLASS_COPY, 2}; auto distances = std::vector(); for (const auto ®ion : memRegions) { if (region.region.memoryClass == I915_MEMORY_CLASS_SYSTEM) { continue; } for (const auto &engine : engines) { DistanceInfo dist{}; dist.engine = engine; dist.region = {region.region.memoryClass, region.region.memoryInstance}; dist.distance = (region.region.memoryInstance == engine.engineInstance) ? 0 : 100; distances.push_back(dist); } } memoryInfo->assignRegionsFromDistances(distances); EXPECT_EQ(1024u, memoryInfo->getMemoryRegionSize(1)); EXPECT_EQ(1024u, memoryInfo->getMemoryRegionSize(2)); EXPECT_EQ(0u, memoryInfo->getMemoryRegionSize(4)); } TEST_F(DrmMemoryManagerTest, GivenEligbleAllocationTypeWhenCheckingAllocationEligbleForCompletionFenceThenReturnTrue) { AllocationType validAllocations[] = { AllocationType::COMMAND_BUFFER, AllocationType::RING_BUFFER, AllocationType::SEMAPHORE_BUFFER, AllocationType::TAG_BUFFER}; for (size_t i = 0; i < 4; i++) { EXPECT_TRUE(memoryManager->allocationTypeForCompletionFence(validAllocations[i])); } } TEST_F(DrmMemoryManagerTest, GivenNotEligbleAllocationTypeWhenCheckingAllocationEligbleForCompletionFenceThenReturnFalse) { AllocationType invalidAllocations[] = { AllocationType::BUFFER_HOST_MEMORY, AllocationType::CONSTANT_SURFACE, AllocationType::FILL_PATTERN, AllocationType::GLOBAL_SURFACE}; for (size_t i = 0; i < 4; i++) { EXPECT_FALSE(memoryManager->allocationTypeForCompletionFence(invalidAllocations[i])); } } TEST_F(DrmMemoryManagerTest, GivenNotEligbleAllocationTypeAndDebugFlagOverridingWhenCheckingAllocationEligbleForCompletionFenceThenReturnTrue) { DebugManagerStateRestore dbgState; DebugManager.flags.UseDrmCompletionFenceForAllAllocations.set(1); AllocationType invalidAllocations[] = { AllocationType::BUFFER_HOST_MEMORY, AllocationType::CONSTANT_SURFACE, AllocationType::FILL_PATTERN, AllocationType::GLOBAL_SURFACE}; for (size_t i = 0; i < 4; i++) { EXPECT_TRUE(memoryManager->allocationTypeForCompletionFence(invalidAllocations[i])); } } TEST_F(DrmMemoryManagerTest, givenCompletionFenceEnabledWhenHandlingCompletionOfUsedAndEligbleAllocationThenCallWaitUserFence) { mock->ioctl_expected.total = -1; VariableBackup backupFenceSupported{&mock->completionFenceSupported, true}; VariableBackup backupVmBindCallParent{&mock->isVmBindAvailableCall.callParent, false}; VariableBackup backupVmBindReturnValue{&mock->isVmBindAvailableCall.returnValue, true}; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, 1024, AllocationType::COMMAND_BUFFER}); auto engine = memoryManager->getRegisteredEngines()[0]; allocation->updateTaskCount(2, engine.osContext->getContextId()); uint64_t expectedFenceAddress = castToUint64(const_cast(engine.commandStreamReceiver->getTagAddress())) + Drm::completionFenceOffset; constexpr uint64_t expectedValue = 2; memoryManager->handleFenceCompletion(allocation); EXPECT_EQ(1u, mock->waitUserFenceCall.called); EXPECT_EQ(expectedFenceAddress, mock->waitUserFenceCall.address); EXPECT_EQ(expectedValue, mock->waitUserFenceCall.value); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerTest, givenCompletionFenceEnabledWhenHandlingCompletionOfNotUsedAndEligbleAllocationThenDoNotCallWaitUserFence) { mock->ioctl_expected.total = -1; VariableBackup backupFenceSupported{&mock->completionFenceSupported, true}; VariableBackup backupVmBindCallParent{&mock->isVmBindAvailableCall.callParent, false}; VariableBackup backupVmBindReturnValue{&mock->isVmBindAvailableCall.returnValue, true}; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, 1024, AllocationType::COMMAND_BUFFER}); memoryManager->handleFenceCompletion(allocation); EXPECT_EQ(0u, mock->waitUserFenceCall.called); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerTest, givenCompletionFenceEnabledWhenHandlingCompletionOfUsedAndNotEligbleAllocationThenDoNotCallWaitUserFence) { mock->ioctl_expected.total = -1; VariableBackup backupFenceSupported{&mock->completionFenceSupported, true}; VariableBackup backupVmBindCallParent{&mock->isVmBindAvailableCall.callParent, false}; VariableBackup backupVmBindReturnValue{&mock->isVmBindAvailableCall.returnValue, true}; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, 1024, AllocationType::GLOBAL_SURFACE}); auto engine = memoryManager->getRegisteredEngines()[0]; allocation->updateTaskCount(2, engine.osContext->getContextId()); memoryManager->handleFenceCompletion(allocation); EXPECT_EQ(0u, mock->waitUserFenceCall.called); memoryManager->freeGraphicsMemory(allocation); } HWTEST_F(DrmMemoryManagerTest, givenCompletionFenceEnabledWhenHandlingCompletionAndTagAddressIsNullThenDoNotCallWaitUserFence) { mock->ioctl_expected.total = -1; VariableBackup backupFenceSupported{&mock->completionFenceSupported, true}; VariableBackup backupVmBindCallParent{&mock->isVmBindAvailableCall.callParent, false}; VariableBackup backupVmBindReturnValue{&mock->isVmBindAvailableCall.returnValue, true}; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, 1024, AllocationType::COMMAND_BUFFER}); auto engine = memoryManager->getRegisteredEngines()[0]; allocation->updateTaskCount(2, engine.osContext->getContextId()); auto testCsr = static_cast *>(engine.commandStreamReceiver); auto backupTagAddress = testCsr->tagAddress; testCsr->tagAddress = nullptr; memoryManager->handleFenceCompletion(allocation); EXPECT_EQ(0u, mock->waitUserFenceCall.called); testCsr->tagAddress = backupTagAddress; memoryManager->freeGraphicsMemory(allocation); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/drm_memory_manager_tests_impl.h000066400000000000000000000022261422164147700337110ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/common/helpers/ult_hw_config.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/os_interface/linux/drm_memory_manager_tests.h" #include "opencl/test/unit_test/os_interface/linux/device_command_stream_fixture_impl.h" namespace NEO { class DrmMemoryManagerFixtureImpl : public DrmMemoryManagerFixture { public: DrmMockCustomImpl *mockExp; void SetUp() override { backup = std::make_unique>(&ultHwConfig); ultHwConfig.csrBaseCallCreatePreemption = false; MemoryManagementFixture::SetUp(); executionEnvironment = MockDevice::prepareExecutionEnvironment(defaultHwInfo.get(), numRootDevices - 1); mockExp = new DrmMockCustomImpl(*executionEnvironment->rootDeviceEnvironments[0]); DrmMemoryManagerFixture::SetUp(mockExp, true); } void TearDown() override { mockExp->testIoctls(); DrmMemoryManagerFixture::TearDown(); } std::unique_ptr> backup; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/drm_mock_memory_info.h000066400000000000000000000016611422164147700320020ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/linux/memory_info.h" const std::vector memoryRegions = { {{I915_MEMORY_CLASS_SYSTEM, 0}, 64 * GB, 0}, {{I915_MEMORY_CLASS_DEVICE, 0}, 8 * GB, 0}}; struct MockMemoryInfo : public MemoryInfo { MockMemoryInfo() : MemoryInfo(memoryRegions) {} ~MockMemoryInfo() override = default; }; const std::vector extendedMemoryRegions = { {{I915_MEMORY_CLASS_SYSTEM, 1}, 64 * GB, 0}, {{I915_MEMORY_CLASS_DEVICE, 0x100}, 8 * GB, 0}, {{I915_MEMORY_CLASS_DEVICE, 0x200}, 8 * GB, 0}, {{I915_MEMORY_CLASS_DEVICE, 0x400}, 8 * GB, 0}, {{I915_MEMORY_CLASS_DEVICE, 0x800}, 8 * GB, 0}}; struct MockExtendedMemoryInfo : public MemoryInfo { MockExtendedMemoryInfo() : MemoryInfo(extendedMemoryRegions) {} ~MockExtendedMemoryInfo() override = default; }; compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/drm_os_memory_tests.cpp000066400000000000000000000123631422164147700322350ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/constants.h" #include "shared/source/helpers/file_io.h" #include "shared/source/os_interface/linux/os_memory_linux.h" #include "shared/source/utilities/stackvec.h" #include "gtest/gtest.h" namespace NEO { class MockOSMemoryLinux : public OSMemoryLinux { public: static std::unique_ptr create() { return std::make_unique(); } MockOSMemoryLinux() = default; void *mmapWrapper(void *addr, size_t size, int prot, int flags, int fd, off_t off) override { mmapWrapperCalled++; mmapWrapperParamsPassed.push_back({addr, size, prot, flags, fd, off}); return this->baseMmapWrapper(addr, size, prot, flags, fd, off); } struct MmapWrapperParams { void *addr; size_t size; int prot; int flags; int fd; off_t off; }; uint32_t mmapWrapperCalled = 0u; StackVec mmapWrapperParamsPassed{}; int munmapWrapper(void *addr, size_t size) override { munmapWrapperCalled++; munmapWrapperParamsPassed.push_back({addr, size}); return this->baseMunmapWrapper(addr, size); } struct MunmapWrapperParams { void *addr; size_t size; }; uint32_t munmapWrapperCalled = 0u; StackVec munmapWrapperParamsPassed{}; void *baseMmapWrapper(void *addr, size_t size, int prot, int flags, int fd, off_t off) { return OSMemoryLinux::mmapWrapper(addr, size, prot, flags, fd, off); } int baseMunmapWrapper(void *addr, size_t size) { return OSMemoryLinux::munmapWrapper(addr, size); } }; TEST(OSMemoryLinux, givenOSMemoryLinuxWhenReserveCpuAddressRangeIsCalledThenMinusOneIsPassedToMmapAsFdParam) { auto mockOSMemoryLinux = MockOSMemoryLinux::create(); auto reservedCpuRange = mockOSMemoryLinux->reserveCpuAddressRange(MemoryConstants::pageSize, MemoryConstants::pageSize64k); mockOSMemoryLinux->releaseCpuAddressRange(reservedCpuRange); EXPECT_EQ(-1, mockOSMemoryLinux->mmapWrapperParamsPassed[0].fd); EXPECT_EQ(reservedCpuRange.originalPtr, mockOSMemoryLinux->munmapWrapperParamsPassed[0].addr); EXPECT_EQ(reservedCpuRange.actualReservedSize, mockOSMemoryLinux->munmapWrapperParamsPassed[0].size); } TEST(OSMemoryLinux, givenOSMemoryLinuxWhenReserveCpuAddressRangeIsCalledAndBaseAddressIsSpecifiedThenCorrectValueIsPassedToMmapAsAddrParam) { auto mockOSMemoryLinux = MockOSMemoryLinux::create(); auto reservedCpuRange = mockOSMemoryLinux->reserveCpuAddressRange(reinterpret_cast(0x10000000), MemoryConstants::pageSize, MemoryConstants::pageSize64k); mockOSMemoryLinux->releaseCpuAddressRange(reservedCpuRange); EXPECT_EQ(reinterpret_cast(0x10000000), mockOSMemoryLinux->mmapWrapperParamsPassed[0].addr); EXPECT_EQ(-1, mockOSMemoryLinux->mmapWrapperParamsPassed[0].fd); EXPECT_EQ(reservedCpuRange.originalPtr, mockOSMemoryLinux->munmapWrapperParamsPassed[0].addr); EXPECT_EQ(reservedCpuRange.actualReservedSize, mockOSMemoryLinux->munmapWrapperParamsPassed[0].size); } TEST(OSMemoryLinux, givenOSMemoryLinuxWhenReserveCpuAddressRangeIsCalledAndBaseAddressIsNotSpecifiedThenoZeroIsPassedToMmapAsAddrParam) { auto mockOSMemoryLinux = MockOSMemoryLinux::create(); auto reservedCpuRange = mockOSMemoryLinux->reserveCpuAddressRange(MemoryConstants::pageSize, MemoryConstants::pageSize64k); mockOSMemoryLinux->releaseCpuAddressRange(reservedCpuRange); EXPECT_EQ(nullptr, mockOSMemoryLinux->mmapWrapperParamsPassed[0].addr); EXPECT_EQ(-1, mockOSMemoryLinux->mmapWrapperParamsPassed[0].fd); EXPECT_EQ(reservedCpuRange.originalPtr, mockOSMemoryLinux->munmapWrapperParamsPassed[0].addr); EXPECT_EQ(reservedCpuRange.actualReservedSize, mockOSMemoryLinux->munmapWrapperParamsPassed[0].size); } TEST(OSMemoryLinux, GivenProcSelfMapsFileExistsWhenGetMemoryMapsIsQueriedThenValidValueIsReturned) { auto mockOSMemoryLinux = MockOSMemoryLinux::create(); std::string mapsFile = "test_files/linux/proc/self/maps"; EXPECT_TRUE(fileExists(mapsFile)); OSMemory::MemoryMaps memoryMaps; mockOSMemoryLinux->getMemoryMaps(memoryMaps); static const OSMemory::MappedRegion referenceMaps[] = { {0x564fcd1fa000, 0x564fcd202000}, {0x564fcd401000, 0x564fcd402000}, {0x564fcd402000, 0x564fcd403000}, {0x564fcdf40000, 0x564fcdf61000}, {0x7fded3d79000, 0x7fded4879000}, {0x7fded4879000, 0x7fded4a60000}, {0x7fded4a60000, 0x7fded4c60000}, {0x7fded4c60000, 0x7fded4c64000}, {0x7fded4c64000, 0x7fded4c66000}, {0x7fded4c66000, 0x7fded4c6a000}, {0x7fded4c6a000, 0x7fded4c91000}, {0x7fded4e54000, 0x7fded4e78000}, {0x7fded4e91000, 0x7fded4e92000}, {0x7fded4e92000, 0x7fded4e93000}, {0x7fded4e93000, 0x7fded4e94000}, {0x7ffd6dfa2000, 0x7ffd6dfc3000}, {0x7ffd6dfe8000, 0x7ffd6dfeb000}, {0x7ffd6dfeb000, 0x7ffd6dfec000}, {0xffffffffff600000, 0xffffffffff601000}}; EXPECT_FALSE(memoryMaps.empty()); EXPECT_EQ(memoryMaps.size(), GTEST_ARRAY_SIZE_(referenceMaps)); for (size_t i = 0; i < memoryMaps.size(); ++i) { EXPECT_EQ(memoryMaps[i].start, referenceMaps[i].start); EXPECT_EQ(memoryMaps[i].end, referenceMaps[i].end); } } }; // namespace NEO drm_residency_handler_prelim_tests.cpp000066400000000000000000001321331422164147700351750ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/direct_submission/dispatchers/render_dispatcher.h" #include "shared/source/direct_submission/linux/drm_direct_submission.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/os_interface/linux/cache_info_impl.h" #include "shared/source/os_interface/linux/clos_helper.h" #include "shared/source/os_interface/linux/drm_memory_operations_handler_bind.h" #include "shared/source/os_interface/linux/os_context_linux.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/utilities/tag_allocator.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/engine_descriptor_helper.h" #include "shared/test/common/libult/linux/drm_query_mock.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/mocks/linux/mock_drm_memory_manager.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/mocks/mock_command_stream_receiver.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/test_macros/test.h" #include using namespace NEO; struct MockDrmMemoryOperationsHandlerBind : public DrmMemoryOperationsHandlerBind { using DrmMemoryOperationsHandlerBind::DrmMemoryOperationsHandlerBind; using DrmMemoryOperationsHandlerBind::evictImpl; bool useBaseEvictUnused = true; uint32_t evictUnusedCalled = 0; void evictUnusedAllocations(bool waitForCompletion, bool isLockNeeded) override { evictUnusedCalled++; if (useBaseEvictUnused) { DrmMemoryOperationsHandlerBind::evictUnusedAllocations(waitForCompletion, isLockNeeded); } } }; template struct DrmMemoryOperationsHandlerBindFixture : public ::testing::Test { public: void SetUp() override { DebugManager.flags.DeferOsContextInitialization.set(0); DebugManager.flags.CreateMultipleSubDevices.set(2u); VariableBackup mockDeviceFlagBackup(&MockDevice::createSingleDevice, false); executionEnvironment = new ExecutionEnvironment; executionEnvironment->prepareRootDeviceEnvironments(numRootDevices); for (uint32_t i = 0u; i < numRootDevices; i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(defaultHwInfo.get()); } executionEnvironment->calculateMaxOsContextCount(); for (uint32_t i = 0u; i < numRootDevices; i++) { auto mock = new DrmQueryMock(*executionEnvironment->rootDeviceEnvironments[i]); mock->setBindAvailable(); executionEnvironment->rootDeviceEnvironments[i]->osInterface = std::make_unique(); executionEnvironment->rootDeviceEnvironments[i]->osInterface->setDriverModel(std::unique_ptr(mock)); executionEnvironment->rootDeviceEnvironments[i]->memoryOperationsInterface.reset(new MockDrmMemoryOperationsHandlerBind(*executionEnvironment->rootDeviceEnvironments[i].get(), i)); devices.emplace_back(MockDevice::createWithExecutionEnvironment(defaultHwInfo.get(), executionEnvironment, i)); } memoryManager = std::make_unique(*executionEnvironment); device = devices[0].get(); mock = executionEnvironment->rootDeviceEnvironments[0]->osInterface->getDriverModel()->as(); operationHandler = static_cast(executionEnvironment->rootDeviceEnvironments[0]->memoryOperationsInterface.get()); memoryManagerBackup = executionEnvironment->memoryManager.release(); executionEnvironment->memoryManager.reset(memoryManager.get()); memoryManager->registeredEngines = memoryManagerBackup->getRegisteredEngines(); } void TearDown() override { executionEnvironment->memoryManager.release(); executionEnvironment->memoryManager.reset(memoryManagerBackup); memoryManager->getRegisteredEngines().clear(); } protected: ExecutionEnvironment *executionEnvironment = nullptr; MockDevice *device; std::vector> devices; std::unique_ptr memoryManager; MockDrmMemoryOperationsHandlerBind *operationHandler = nullptr; DebugManagerStateRestore restorer; DrmQueryMock *mock; MemoryManager *memoryManagerBackup; }; using DrmMemoryOperationsHandlerBindMultiRootDeviceTest = DrmMemoryOperationsHandlerBindFixture<2u>; TEST_F(DrmMemoryOperationsHandlerBindMultiRootDeviceTest, whenSetNewResourceBoundToVMThenAllContextsUsingThatVMHasSetNewResourceBound) { mock->setNewResourceBoundToVM(1u); for (const auto &engine : device->getAllEngines()) { auto osContexLinux = static_cast(engine.osContext); if (osContexLinux->getDeviceBitfield().test(1u)) { EXPECT_TRUE(osContexLinux->getNewResourceBound()); } else { EXPECT_FALSE(osContexLinux->getNewResourceBound()); } osContexLinux->setNewResourceBound(false); } for (const auto &engine : devices[1]->getAllEngines()) { auto osContexLinux = static_cast(engine.osContext); EXPECT_FALSE(osContexLinux->getNewResourceBound()); } auto mock2 = executionEnvironment->rootDeviceEnvironments[1u]->osInterface->getDriverModel()->as(); mock2->setNewResourceBoundToVM(0u); for (const auto &engine : devices[1]->getAllEngines()) { auto osContexLinux = static_cast(engine.osContext); if (osContexLinux->getDeviceBitfield().test(0u)) { EXPECT_TRUE(osContexLinux->getNewResourceBound()); } else { EXPECT_FALSE(osContexLinux->getNewResourceBound()); } } for (const auto &engine : device->getAllEngines()) { auto osContexLinux = static_cast(engine.osContext); EXPECT_FALSE(osContexLinux->getNewResourceBound()); } } using DrmMemoryOperationsHandlerBindTest = DrmMemoryOperationsHandlerBindFixture<1u>; TEST_F(DrmMemoryOperationsHandlerBindTest, whenNoSpaceLeftOnDeviceThenEvictUnusedAllocations) { auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); mock->context.vmBindReturn = -1; mock->baseErrno = false; mock->errnoRetVal = ENOSPC; operationHandler->useBaseEvictUnused = false; EXPECT_EQ(operationHandler->evictUnusedCalled, 0u); operationHandler->makeResident(device, ArrayRef(&allocation, 1)); EXPECT_EQ(operationHandler->evictUnusedCalled, 1u); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryOperationsHandlerBindTest, givenObjectAlwaysResidentAndNotUsedWhenRunningOutOfMemoryThenUnusedAllocationIsNotUnbound) { auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); for (auto &engine : device->getAllEngines()) { *engine.commandStreamReceiver->getTagAddress() = 10; allocation->updateTaskCount(GraphicsAllocation::objectNotUsed, engine.osContext->getContextId()); EXPECT_EQ(operationHandler->makeResidentWithinOsContext(engine.osContext, ArrayRef(&allocation, 1), true), MemoryOperationsStatus::SUCCESS); } for (auto &engine : device->getSubDevice(0u)->getAllEngines()) { *engine.commandStreamReceiver->getTagAddress() = 10; allocation->updateResidencyTaskCount(GraphicsAllocation::objectAlwaysResident, engine.osContext->getContextId()); EXPECT_EQ(operationHandler->makeResidentWithinOsContext(engine.osContext, ArrayRef(&allocation, 1), true), MemoryOperationsStatus::SUCCESS); } for (auto &engine : device->getSubDevice(1u)->getAllEngines()) { *engine.commandStreamReceiver->getTagAddress() = 10; allocation->updateTaskCount(GraphicsAllocation::objectNotUsed, engine.osContext->getContextId()); EXPECT_EQ(operationHandler->makeResidentWithinOsContext(engine.osContext, ArrayRef(&allocation, 1), true), MemoryOperationsStatus::SUCCESS); } EXPECT_EQ(mock->context.vmBindCalled, 2u); operationHandler->evictUnusedAllocations(false, true); EXPECT_EQ(mock->context.vmBindCalled, 2u); EXPECT_EQ(mock->context.vmUnbindCalled, 1u); memoryManager->freeGraphicsMemory(allocation); } HWTEST_F(DrmMemoryOperationsHandlerBindTest, givenMakeEachAllocationResidentWhenCreateAllocationThenVmBindIsCalled) { DebugManagerStateRestore restorer; DebugManager.flags.MakeEachAllocationResident.set(1); EXPECT_EQ(mock->context.vmBindCalled, 0u); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, allocation); EXPECT_EQ(mock->context.vmBindCalled, 2u); auto &csr = device->getUltCommandStreamReceiver(); csr.makeResident(*allocation); EXPECT_EQ(csr.getResidencyAllocations().size(), 0u); memoryManager->freeGraphicsMemory(allocation); } HWTEST_F(DrmMemoryOperationsHandlerBindTest, givenMakeEachAllocationResidentWhenMergeWithResidencyContainerThenVmBindIsCalled) { DebugManagerStateRestore restorer; DebugManager.flags.MakeEachAllocationResident.set(2); EXPECT_EQ(mock->context.vmBindCalled, 0u); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); EXPECT_EQ(mock->context.vmBindCalled, 0u); auto &csr = device->getUltCommandStreamReceiver(); ResidencyContainer residency; operationHandler->mergeWithResidencyContainer(&csr.getOsContext(), residency); EXPECT_EQ(mock->context.vmBindCalled, 2u); csr.makeResident(*allocation); EXPECT_EQ(csr.getResidencyAllocations().size(), 0u); memoryManager->freeGraphicsMemory(allocation); } HWTEST_F(DrmMemoryOperationsHandlerBindTest, whenEvictUnusedResourcesWithWaitForCompletionThenWaitCsrMethodIsCalled) { auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); for (auto &engine : device->getAllEngines()) { *engine.commandStreamReceiver->getTagAddress() = 10; allocation->updateTaskCount(8u, engine.osContext->getContextId()); EXPECT_EQ(operationHandler->makeResidentWithinOsContext(engine.osContext, ArrayRef(&allocation, 1), true), MemoryOperationsStatus::SUCCESS); } for (auto &engine : device->getSubDevice(0u)->getAllEngines()) { *engine.commandStreamReceiver->getTagAddress() = 10; allocation->updateTaskCount(8u, engine.osContext->getContextId()); EXPECT_EQ(operationHandler->makeResidentWithinOsContext(engine.osContext, ArrayRef(&allocation, 1), true), MemoryOperationsStatus::SUCCESS); } for (auto &engine : device->getSubDevice(1u)->getAllEngines()) { *engine.commandStreamReceiver->getTagAddress() = 10; allocation->updateTaskCount(8u, engine.osContext->getContextId()); EXPECT_EQ(operationHandler->makeResidentWithinOsContext(engine.osContext, ArrayRef(&allocation, 1), true), MemoryOperationsStatus::SUCCESS); } *device->getSubDevice(1u)->getDefaultEngine().commandStreamReceiver->getTagAddress() = 5; auto &csr = device->getUltCommandStreamReceiver(); csr.latestWaitForCompletionWithTimeoutTaskCount.store(123u); operationHandler->evictUnusedAllocations(true, true); auto latestWaitTaskCount = csr.latestWaitForCompletionWithTimeoutTaskCount.load(); EXPECT_NE(latestWaitTaskCount, 123u); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryOperationsHandlerBindTest, whenRunningOutOfMemoryThenUnusedAllocationsAreUnbound) { auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); for (auto &engine : device->getAllEngines()) { *engine.commandStreamReceiver->getTagAddress() = 10; allocation->updateTaskCount(8u, engine.osContext->getContextId()); EXPECT_EQ(operationHandler->makeResidentWithinOsContext(engine.osContext, ArrayRef(&allocation, 1), true), MemoryOperationsStatus::SUCCESS); } for (auto &engine : device->getSubDevice(0u)->getAllEngines()) { *engine.commandStreamReceiver->getTagAddress() = 10; allocation->updateTaskCount(8u, engine.osContext->getContextId()); EXPECT_EQ(operationHandler->makeResidentWithinOsContext(engine.osContext, ArrayRef(&allocation, 1), true), MemoryOperationsStatus::SUCCESS); } for (auto &engine : device->getSubDevice(1u)->getAllEngines()) { *engine.commandStreamReceiver->getTagAddress() = 10; allocation->updateTaskCount(8u, engine.osContext->getContextId()); EXPECT_EQ(operationHandler->makeResidentWithinOsContext(engine.osContext, ArrayRef(&allocation, 1), true), MemoryOperationsStatus::SUCCESS); } *device->getSubDevice(1u)->getDefaultEngine().commandStreamReceiver->getTagAddress() = 5; EXPECT_EQ(mock->context.vmBindCalled, 2u); operationHandler->evictUnusedAllocations(false, true); EXPECT_EQ(mock->context.vmBindCalled, 2u); EXPECT_EQ(mock->context.vmUnbindCalled, 1u); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryOperationsHandlerBindTest, givenUsedAllocationInBothSubdevicesWhenEvictUnusedThenNothingIsUnbound) { auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); for (auto &engine : device->getAllEngines()) { *engine.commandStreamReceiver->getTagAddress() = 5; allocation->updateTaskCount(8u, engine.osContext->getContextId()); EXPECT_EQ(operationHandler->makeResidentWithinOsContext(engine.osContext, ArrayRef(&allocation, 1), true), MemoryOperationsStatus::SUCCESS); } for (auto &engine : device->getSubDevice(0u)->getAllEngines()) { *engine.commandStreamReceiver->getTagAddress() = 5; allocation->updateTaskCount(8u, engine.osContext->getContextId()); EXPECT_EQ(operationHandler->makeResidentWithinOsContext(engine.osContext, ArrayRef(&allocation, 1), true), MemoryOperationsStatus::SUCCESS); } for (auto &engine : device->getSubDevice(1u)->getAllEngines()) { *engine.commandStreamReceiver->getTagAddress() = 5; allocation->updateTaskCount(8u, engine.osContext->getContextId()); EXPECT_EQ(operationHandler->makeResidentWithinOsContext(engine.osContext, ArrayRef(&allocation, 1), true), MemoryOperationsStatus::SUCCESS); } EXPECT_EQ(mock->context.vmBindCalled, 2u); operationHandler->evictUnusedAllocations(false, true); EXPECT_EQ(mock->context.vmBindCalled, 2u); EXPECT_EQ(mock->context.vmUnbindCalled, 0u); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryOperationsHandlerBindTest, givenResidencyWithinOsContextFailsThenThenMergeWithResidencyContainertReturnsError) { struct MockDrmMemoryOperationsHandlerBindResidencyFail : public DrmMemoryOperationsHandlerBind { MockDrmMemoryOperationsHandlerBindResidencyFail(RootDeviceEnvironment &rootDeviceEnvironment, uint32_t rootDeviceIndex) : DrmMemoryOperationsHandlerBind(rootDeviceEnvironment, rootDeviceIndex) {} MemoryOperationsStatus makeResidentWithinOsContext(OsContext *osContext, ArrayRef gfxAllocations, bool evictable) override { return NEO::MemoryOperationsStatus::FAILED; } }; ResidencyContainer residencyContainer; executionEnvironment->rootDeviceEnvironments[0]->memoryOperationsInterface.reset(new MockDrmMemoryOperationsHandlerBindResidencyFail(*executionEnvironment->rootDeviceEnvironments[0], 0u)); MockDrmMemoryOperationsHandlerBindResidencyFail *operationsHandlerResidency = static_cast(executionEnvironment->rootDeviceEnvironments[0]->memoryOperationsInterface.get()); auto &engines = device->getAllEngines(); for (const auto &engine : engines) { EXPECT_NE(operationsHandlerResidency->mergeWithResidencyContainer(engine.osContext, residencyContainer), MemoryOperationsStatus::SUCCESS); } } TEST_F(DrmMemoryOperationsHandlerBindTest, givenEvictWithinOsContextFailsThenEvictReturnsError) { struct MockDrmMemoryOperationsHandlerBindEvictFail : public DrmMemoryOperationsHandlerBind { MockDrmMemoryOperationsHandlerBindEvictFail(RootDeviceEnvironment &rootDeviceEnvironment, uint32_t rootDeviceIndex) : DrmMemoryOperationsHandlerBind(rootDeviceEnvironment, rootDeviceIndex) {} MemoryOperationsStatus evictWithinOsContext(OsContext *osContext, GraphicsAllocation &gfxAllocation) override { return NEO::MemoryOperationsStatus::FAILED; } }; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); executionEnvironment->rootDeviceEnvironments[0]->memoryOperationsInterface.reset(new MockDrmMemoryOperationsHandlerBindEvictFail(*executionEnvironment->rootDeviceEnvironments[0], 0u)); MockDrmMemoryOperationsHandlerBindEvictFail *operationsHandlerEvict = static_cast(executionEnvironment->rootDeviceEnvironments[0]->memoryOperationsInterface.get()); EXPECT_NE(operationsHandlerEvict->evict(device, *allocation), MemoryOperationsStatus::SUCCESS); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryOperationsHandlerBindTest, givenEvictImplFailsThenEvictWithinOsContextReturnsError) { struct MockDrmMemoryOperationsHandlerBindEvictImplFail : public DrmMemoryOperationsHandlerBind { MockDrmMemoryOperationsHandlerBindEvictImplFail(RootDeviceEnvironment &rootDeviceEnvironment, uint32_t rootDeviceIndex) : DrmMemoryOperationsHandlerBind(rootDeviceEnvironment, rootDeviceIndex) {} int evictImpl(OsContext *osContext, GraphicsAllocation &gfxAllocation, DeviceBitfield deviceBitfield) override { return -1; } }; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); executionEnvironment->rootDeviceEnvironments[0]->memoryOperationsInterface.reset(new MockDrmMemoryOperationsHandlerBindEvictImplFail(*executionEnvironment->rootDeviceEnvironments[0], 0u)); MockDrmMemoryOperationsHandlerBindEvictImplFail *operationsHandlerEvict = static_cast(executionEnvironment->rootDeviceEnvironments[0]->memoryOperationsInterface.get()); auto &engines = device->getAllEngines(); for (const auto &engine : engines) { EXPECT_NE(operationsHandlerEvict->evictWithinOsContext(engine.osContext, *allocation), MemoryOperationsStatus::SUCCESS); } memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryOperationsHandlerBindTest, givenMakeBOsResidentFailsThenMakeResidentWithinOsContextReturnsError) { struct MockDrmAllocationBOsResident : public DrmAllocation { MockDrmAllocationBOsResident(uint32_t rootDeviceIndex, AllocationType allocationType, BufferObjects &bos, void *ptrIn, uint64_t gpuAddress, size_t sizeIn, MemoryPool::Type pool) : DrmAllocation(rootDeviceIndex, allocationType, bos, ptrIn, gpuAddress, sizeIn, pool) { } int makeBOsResident(OsContext *osContext, uint32_t vmHandleId, std::vector *bufferObjects, bool bind) override { return -1; } }; auto size = 1024u; BufferObjects bos; auto allocation = new MockDrmAllocationBOsResident(0, AllocationType::UNKNOWN, bos, nullptr, 0u, size, MemoryPool::LocalMemory); auto graphicsAllocation = static_cast(allocation); EXPECT_EQ(operationHandler->makeResidentWithinOsContext(device->getDefaultEngine().osContext, ArrayRef(&graphicsAllocation, 1), false), MemoryOperationsStatus::OUT_OF_MEMORY); delete allocation; } TEST_F(DrmMemoryOperationsHandlerBindTest, givenDrmMemoryOperationBindWhenMakeResidentWithinOsContextEvictableAllocationThenAllocationIsNotMarkedAsAlwaysResident) { auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); EXPECT_EQ(operationHandler->makeResidentWithinOsContext(device->getDefaultEngine().osContext, ArrayRef(&allocation, 1), false), MemoryOperationsStatus::SUCCESS); EXPECT_TRUE(allocation->isAlwaysResident(device->getDefaultEngine().osContext->getContextId())); EXPECT_EQ(operationHandler->evict(device, *allocation), MemoryOperationsStatus::SUCCESS); EXPECT_EQ(operationHandler->makeResidentWithinOsContext(device->getDefaultEngine().osContext, ArrayRef(&allocation, 1), true), MemoryOperationsStatus::SUCCESS); EXPECT_FALSE(allocation->isAlwaysResident(device->getDefaultEngine().osContext->getContextId())); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryOperationsHandlerBindTest, givenDrmMemoryOperationBindWhenChangingResidencyThenOperationIsHandledProperly) { auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); EXPECT_EQ(operationHandler->isResident(device, *allocation), MemoryOperationsStatus::MEMORY_NOT_FOUND); EXPECT_EQ(operationHandler->makeResident(device, ArrayRef(&allocation, 1)), MemoryOperationsStatus::SUCCESS); EXPECT_EQ(operationHandler->isResident(device, *allocation), MemoryOperationsStatus::SUCCESS); EXPECT_EQ(operationHandler->evict(device, *allocation), MemoryOperationsStatus::SUCCESS); EXPECT_EQ(operationHandler->isResident(device, *allocation), MemoryOperationsStatus::MEMORY_NOT_FOUND); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryOperationsHandlerBindTest, givenDeviceWithMultipleSubdevicesWhenMakeResidentWithSubdeviceThenAllocationIsBindedOnlyInItsOsContexts) { auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); EXPECT_EQ(operationHandler->isResident(device, *allocation), MemoryOperationsStatus::MEMORY_NOT_FOUND); EXPECT_EQ(operationHandler->isResident(device->getSubDevice(0u), *allocation), MemoryOperationsStatus::MEMORY_NOT_FOUND); EXPECT_EQ(operationHandler->isResident(device->getSubDevice(1u), *allocation), MemoryOperationsStatus::MEMORY_NOT_FOUND); auto retVal = operationHandler->makeResident(device->getSubDevice(1u), ArrayRef(&allocation, 1)); EXPECT_EQ(retVal, MemoryOperationsStatus::SUCCESS); EXPECT_EQ(operationHandler->isResident(device, *allocation), MemoryOperationsStatus::MEMORY_NOT_FOUND); EXPECT_EQ(operationHandler->isResident(device->getSubDevice(0u), *allocation), MemoryOperationsStatus::MEMORY_NOT_FOUND); EXPECT_EQ(operationHandler->isResident(device->getSubDevice(1u), *allocation), MemoryOperationsStatus::SUCCESS); retVal = operationHandler->evict(device->getSubDevice(0u), *allocation); EXPECT_EQ(retVal, MemoryOperationsStatus::SUCCESS); EXPECT_EQ(operationHandler->isResident(device, *allocation), MemoryOperationsStatus::MEMORY_NOT_FOUND); EXPECT_EQ(operationHandler->isResident(device->getSubDevice(0u), *allocation), MemoryOperationsStatus::MEMORY_NOT_FOUND); EXPECT_EQ(operationHandler->isResident(device->getSubDevice(1u), *allocation), MemoryOperationsStatus::SUCCESS); retVal = operationHandler->evict(device->getSubDevice(1u), *allocation); EXPECT_EQ(retVal, MemoryOperationsStatus::SUCCESS); EXPECT_EQ(operationHandler->isResident(device, *allocation), MemoryOperationsStatus::MEMORY_NOT_FOUND); EXPECT_EQ(operationHandler->isResident(device->getSubDevice(0u), *allocation), MemoryOperationsStatus::MEMORY_NOT_FOUND); EXPECT_EQ(operationHandler->isResident(device->getSubDevice(1u), *allocation), MemoryOperationsStatus::MEMORY_NOT_FOUND); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryOperationsHandlerBindTest, whenIoctlFailDuringEvictingThenUnrecoverableIsThrown) { auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); EXPECT_EQ(operationHandler->isResident(device, *allocation), MemoryOperationsStatus::MEMORY_NOT_FOUND); EXPECT_EQ(operationHandler->makeResident(device, ArrayRef(&allocation, 1)), MemoryOperationsStatus::SUCCESS); EXPECT_EQ(operationHandler->isResident(device, *allocation), MemoryOperationsStatus::SUCCESS); mock->context.vmUnbindReturn = -1; EXPECT_NE(operationHandler->evict(device, *allocation), MemoryOperationsStatus::SUCCESS); mock->context.vmUnbindReturn = 0; memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryOperationsHandlerBindTest, whenMakeResidentTwiceThenAllocIsBoundOnlyOnce) { auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); EXPECT_EQ(operationHandler->makeResident(device, ArrayRef(&allocation, 1)), MemoryOperationsStatus::SUCCESS); EXPECT_EQ(operationHandler->makeResident(device, ArrayRef(&allocation, 1)), MemoryOperationsStatus::SUCCESS); EXPECT_EQ(operationHandler->isResident(device, *allocation), MemoryOperationsStatus::SUCCESS); EXPECT_EQ(mock->context.vmBindCalled, 2u); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryOperationsHandlerBindTest, WhenVmBindAvaialableThenMemoryManagerReturnsSupportForIndirectAllocationsAsPack) { mock->bindAvailable = true; EXPECT_TRUE(memoryManager->allowIndirectAllocationsAsPack(0u)); } TEST_F(DrmMemoryOperationsHandlerBindTest, givenNoVmBindSupportInDrmWhenCheckForSupportThenDefaultResidencyHandlerIsReturned) { mock->bindAvailable = false; auto handler = DrmMemoryOperationsHandler::create(*mock, 0u); mock->context.vmBindCalled = 0u; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); handler->makeResident(device, ArrayRef(&allocation, 1)); EXPECT_FALSE(mock->context.vmBindCalled); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryOperationsHandlerBindTest, givenVmBindSupportAndNoMultiTileWhenCheckForSupportThenDefaultResidencyHandlerIsReturned) { DebugManager.flags.CreateMultipleSubDevices.set(1u); mock->bindAvailable = false; auto handler = DrmMemoryOperationsHandler::create(*mock, 0u); mock->context.vmBindCalled = 0u; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); handler->makeResident(device, ArrayRef(&allocation, 1)); EXPECT_FALSE(mock->context.vmBindCalled); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryOperationsHandlerBindTest, givenDisabledVmBindWhenCreateDrmHandlerThenVmBindIsNotUsed) { mock->context.vmBindReturn = 0; mock->bindAvailable = false; auto handler = DrmMemoryOperationsHandler::create(*mock, 0u); mock->context.vmBindCalled = false; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); handler->makeResident(device, ArrayRef(&allocation, 1)); EXPECT_FALSE(mock->context.vmBindCalled); memoryManager->freeGraphicsMemory(allocation); } HWTEST_F(DrmMemoryOperationsHandlerBindTest, givenVmBindSupportAndMultiSubdeviceWhenPinBOThenVmBindToAllVMsIsCalledInsteadOfExec) { DebugManager.flags.UseVmBind.set(1); mock->bindAvailable = true; BufferObject pinBB(mock, 1, 0, 1); BufferObject boToPin(mock, 2, 0, 1); BufferObject *boToPinPtr = &boToPin; pinBB.pin(&boToPinPtr, 1u, device->getDefaultEngine().osContext, 0u, 0u); EXPECT_EQ(mock->context.vmBindCalled, 2u); EXPECT_EQ(0, mock->ioctlCount.execbuffer2); } HWTEST_F(DrmMemoryOperationsHandlerBindTest, givenVmBindSupportAndMultiSubdeviceWhenValidateHostptrThenOnlyBindToSingleVMIsCalled) { DebugManager.flags.UseVmBind.set(1); mock->bindAvailable = true; BufferObject pinBB(mock, 1, 0, 1); BufferObject boToPin(mock, 2, 0, 1); BufferObject *boToPinPtr = &boToPin; pinBB.validateHostPtr(&boToPinPtr, 1u, device->getDefaultEngine().osContext, 0u, 0u); EXPECT_EQ(mock->context.vmBindCalled, 1u); EXPECT_EQ(0, mock->ioctlCount.execbuffer2); } HWTEST_F(DrmMemoryOperationsHandlerBindTest, givenVmBindSupportAndMultiSubdeviceWhenValidateHostptrThenBindToGivenVm) { DebugManager.flags.UseVmBind.set(1); mock->bindAvailable = true; BufferObject pinBB(mock, 1, 0, 1); BufferObject boToPin(mock, 2, 0, 1); BufferObject *boToPinPtr = &boToPin; uint32_t vmHandleId = 1u; pinBB.validateHostPtr(&boToPinPtr, 1u, device->getDefaultEngine().osContext, vmHandleId, 0u); EXPECT_EQ(mock->context.vmBindCalled, 1u); EXPECT_EQ(0, mock->ioctlCount.execbuffer2); EXPECT_EQ(mock->context.receivedVmBind->vmId, mock->getVirtualMemoryAddressSpace(vmHandleId)); } HWTEST_F(DrmMemoryOperationsHandlerBindTest, givenVmBindSupportAndMultiSubdeviceWhenValidateMultipleBOsAndFirstBindFailsThenOnlyOneBindCalledAndErrorReturned) { DebugManager.flags.UseVmBind.set(1); mock->bindAvailable = true; mock->context.vmBindReturn = -1; BufferObject pinBB(mock, 1, 0, 1); BufferObject boToPin(mock, 2, 0, 1); BufferObject boToPin2(mock, 3, 0, 1); BufferObject *boToPinPtr[] = {&boToPin, &boToPin2}; auto ret = pinBB.validateHostPtr(boToPinPtr, 2u, device->getDefaultEngine().osContext, 0u, 0u); EXPECT_EQ(ret, -1); EXPECT_EQ(mock->context.receivedVmBind->handle, 2u); EXPECT_EQ(0, mock->ioctlCount.execbuffer2); } HWTEST_F(DrmMemoryOperationsHandlerBindTest, givenDirectSubmissionWhenPinBOThenVmBindIsCalledInsteadOfExec) { DebugManager.flags.UseVmBind.set(1); mock->bindAvailable = true; device->getDefaultEngine().osContext->setDirectSubmissionActive(); BufferObject pinBB(mock, 1, 0, 1); BufferObject boToPin(mock, 2, 0, 1); BufferObject *boToPinPtr = &boToPin; pinBB.pin(&boToPinPtr, 1u, device->getDefaultEngine().osContext, 0u, 0u); EXPECT_TRUE(mock->context.vmBindCalled); EXPECT_EQ(0, mock->ioctlCount.execbuffer2); } HWTEST_F(DrmMemoryOperationsHandlerBindTest, givenDirectSubmissionAndValidateHostptrWhenPinBOThenVmBindIsCalledInsteadOfExec) { DebugManager.flags.UseVmBind.set(1); mock->bindAvailable = true; device->getDefaultEngine().osContext->setDirectSubmissionActive(); BufferObject pinBB(mock, 1, 0, 1); BufferObject boToPin(mock, 2, 0, 1); BufferObject *boToPinPtr = &boToPin; pinBB.validateHostPtr(&boToPinPtr, 1u, device->getDefaultEngine().osContext, 0u, 0u); EXPECT_TRUE(mock->context.vmBindCalled); EXPECT_EQ(0, mock->ioctlCount.execbuffer2); } HWTEST_F(DrmMemoryOperationsHandlerBindTest, givenVmBindSupportWhenPinBOThenAllocIsBound) { struct MockBO : public BufferObject { using BufferObject::bindInfo; using BufferObject::BufferObject; }; DebugManager.flags.UseVmBind.set(1); mock->bindAvailable = true; BufferObject pinBB(mock, 1, 0, 1); MockBO boToPin(mock, 2, 0, 1); BufferObject *boToPinPtr = &boToPin; auto ret = pinBB.pin(&boToPinPtr, 1u, device->getDefaultEngine().osContext, 0u, 0u); EXPECT_TRUE(boToPin.bindInfo[0u][0u]); EXPECT_FALSE(ret); } HWTEST_F(DrmMemoryOperationsHandlerBindTest, givenVmBindSupportWhenPinBOAndVmBindFailedThenAllocIsNotBound) { struct MockBO : public BufferObject { using BufferObject::bindInfo; using BufferObject::BufferObject; }; DebugManager.flags.UseVmBind.set(1); mock->bindAvailable = true; mock->context.vmBindReturn = -1; BufferObject pinBB(mock, 1, 0, 1); MockBO boToPin(mock, 2, 0, 1); BufferObject *boToPinPtr = &boToPin; auto ret = pinBB.pin(&boToPinPtr, 1u, device->getDefaultEngine().osContext, 0u, 0u); EXPECT_FALSE(boToPin.bindInfo[0u][0u]); EXPECT_TRUE(ret); } HWTEST_F(DrmMemoryOperationsHandlerBindTest, givenCsrTagAllocatorsWhenDestructingCsrThenAllInternalAllocationsAreUnbound) { DebugManager.flags.UseVmBind.set(1); mock->bindAvailable = true; auto csr = std::make_unique>(*executionEnvironment, 0, DeviceBitfield(1)); auto osContext = memoryManager->createAndRegisterOsContext(csr.get(), EngineDescriptorHelper::getDefaultDescriptor()); csr->setupContext(*osContext); auto timestampStorageAlloc = csr->getTimestampPacketAllocator()->getTag()->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation(); auto hwTimeStampsAlloc = csr->getEventTsAllocator()->getTag()->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation(); auto hwPerfCounterAlloc = csr->getEventPerfCountAllocator(4)->getTag()->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation(); operationHandler->makeResident(device, ArrayRef(×tampStorageAlloc, 1)); operationHandler->makeResident(device, ArrayRef(&hwTimeStampsAlloc, 1)); operationHandler->makeResident(device, ArrayRef(&hwPerfCounterAlloc, 1)); csr.reset(); EXPECT_EQ(mock->context.vmBindCalled, mock->context.vmUnbindCalled); } HWTEST_F(DrmMemoryOperationsHandlerBindTest, givenClosEnabledWhenVmBindCalledThenSetPatIndexExtension) { DebugManager.flags.UseVmBind.set(1); mock->bindAvailable = true; auto csr = std::make_unique>(*executionEnvironment, 0, DeviceBitfield(1)); auto osContext = memoryManager->createAndRegisterOsContext(csr.get(), EngineDescriptorHelper::getDefaultDescriptor()); csr->setupContext(*osContext); auto timestampStorageAlloc = csr->getTimestampPacketAllocator()->getTag()->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation(); auto &hwHelper = HwHelper::get(executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo()->platform.eRenderCoreFamily); bool supported = (hwHelper.getNumCacheRegions() > 0); for (int32_t debugFlag : {-1, 0, 1}) { DebugManager.flags.ClosEnabled.set(debugFlag); mock->context.receivedVmBindPatIndex.reset(); operationHandler->makeResident(device, ArrayRef(×tampStorageAlloc, 1)); if (debugFlag == 0 || (debugFlag == -1 && !supported)) { EXPECT_FALSE(mock->context.receivedVmBindPatIndex); operationHandler->evict(device, *timestampStorageAlloc); EXPECT_FALSE(mock->context.receivedVmUnbindPatIndex); continue; } EXPECT_EQ(3u, mock->context.receivedVmBindPatIndex.value()); mock->context.receivedVmUnbindPatIndex.reset(); operationHandler->evict(device, *timestampStorageAlloc); EXPECT_EQ(3u, mock->context.receivedVmUnbindPatIndex.value()); mock->context.receivedVmBindPatIndex.reset(); mock->context.receivedVmUnbindPatIndex.reset(); } } HWTEST_F(DrmMemoryOperationsHandlerBindTest, givenDebugFlagSetWhenVmBindCalledThenOverridePatIndex) { DebugManager.flags.UseVmBind.set(1); DebugManager.flags.ClosEnabled.set(1); DebugManager.flags.OverridePatIndex.set(1); mock->bindAvailable = true; auto csr = std::make_unique>(*executionEnvironment, 0, DeviceBitfield(1)); auto osContext = memoryManager->createAndRegisterOsContext(csr.get(), EngineDescriptorHelper::getDefaultDescriptor()); csr->setupContext(*osContext); auto timestampStorageAlloc = csr->getTimestampPacketAllocator()->getTag()->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation(); auto &hwHelper = HwHelper::get(executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo()->platform.eRenderCoreFamily); if (hwHelper.getNumCacheRegions() == 0) { GTEST_SKIP(); } operationHandler->makeResident(device, ArrayRef(×tampStorageAlloc, 1)); EXPECT_EQ(1u, mock->context.receivedVmBindPatIndex.value()); operationHandler->evict(device, *timestampStorageAlloc); EXPECT_EQ(1u, mock->context.receivedVmUnbindPatIndex.value()); } TEST_F(DrmMemoryOperationsHandlerBindTest, givenClosEnabledAndAllocationToBeCachedInCacheRegionWhenVmBindIsCalledThenSetPatIndexCorrespondingToRequestedRegion) { DebugManager.flags.UseVmBind.set(1); DebugManager.flags.ClosEnabled.set(1); mock->bindAvailable = true; auto csr = std::make_unique(*executionEnvironment, 0, 1); auto osContext = memoryManager->createAndRegisterOsContext(csr.get(), EngineDescriptorHelper::getDefaultDescriptor()); csr->setupContext(*osContext); mock->cacheInfo.reset(new CacheInfoImpl(*mock, 64 * MemoryConstants::kiloByte, 2, 32)); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); for (auto cacheRegion : {CacheRegion::Default, CacheRegion::Region1, CacheRegion::Region2}) { EXPECT_TRUE(static_cast(allocation)->setCacheAdvice(mock, 32 * MemoryConstants::kiloByte, cacheRegion)); mock->context.receivedVmBindPatIndex.reset(); operationHandler->makeResident(device, ArrayRef(&allocation, 1)); EXPECT_EQ(ClosHelper::getPatIndex(cacheRegion, CachePolicy::WriteBack), mock->context.receivedVmBindPatIndex.value()); mock->context.receivedVmUnbindPatIndex.reset(); operationHandler->evict(device, *allocation); EXPECT_EQ(ClosHelper::getPatIndex(cacheRegion, CachePolicy::WriteBack), mock->context.receivedVmUnbindPatIndex.value()); } memoryManager->freeGraphicsMemory(allocation); } TEST(DrmResidencyHandlerTests, givenClosIndexAndMemoryTypeWhenAskingForPatIndexThenReturnCorrectValue) { EXPECT_EQ(0u, ClosHelper::getPatIndex(CacheRegion::Default, CachePolicy::Uncached)); EXPECT_EQ(1u, ClosHelper::getPatIndex(CacheRegion::Default, CachePolicy::WriteCombined)); EXPECT_EQ(2u, ClosHelper::getPatIndex(CacheRegion::Default, CachePolicy::WriteThrough)); EXPECT_EQ(3u, ClosHelper::getPatIndex(CacheRegion::Default, CachePolicy::WriteBack)); EXPECT_ANY_THROW(ClosHelper::getPatIndex(CacheRegion::Region1, CachePolicy::Uncached)); EXPECT_ANY_THROW(ClosHelper::getPatIndex(CacheRegion::Region1, CachePolicy::WriteCombined)); EXPECT_EQ(4u, ClosHelper::getPatIndex(CacheRegion::Region1, CachePolicy::WriteThrough)); EXPECT_EQ(5u, ClosHelper::getPatIndex(CacheRegion::Region1, CachePolicy::WriteBack)); EXPECT_ANY_THROW(ClosHelper::getPatIndex(CacheRegion::Region2, CachePolicy::Uncached)); EXPECT_ANY_THROW(ClosHelper::getPatIndex(CacheRegion::Region2, CachePolicy::WriteCombined)); EXPECT_EQ(6u, ClosHelper::getPatIndex(CacheRegion::Region2, CachePolicy::WriteThrough)); EXPECT_EQ(7u, ClosHelper::getPatIndex(CacheRegion::Region2, CachePolicy::WriteBack)); } TEST(DrmResidencyHandlerTests, givenForceAllResourcesUnchashedSetAskingForPatIndexThenReturnCorrectValue) { DebugManagerStateRestore restorer; DebugManager.flags.ForceAllResourcesUncached.set(1); EXPECT_EQ(0u, ClosHelper::getPatIndex(CacheRegion::Default, CachePolicy::Uncached)); EXPECT_EQ(0u, ClosHelper::getPatIndex(CacheRegion::Default, CachePolicy::WriteCombined)); EXPECT_EQ(0u, ClosHelper::getPatIndex(CacheRegion::Default, CachePolicy::WriteThrough)); EXPECT_EQ(0u, ClosHelper::getPatIndex(CacheRegion::Default, CachePolicy::WriteBack)); EXPECT_EQ(0u, ClosHelper::getPatIndex(CacheRegion::Region1, CachePolicy::Uncached)); EXPECT_EQ(0u, ClosHelper::getPatIndex(CacheRegion::Region1, CachePolicy::WriteCombined)); EXPECT_EQ(0u, ClosHelper::getPatIndex(CacheRegion::Region1, CachePolicy::WriteThrough)); EXPECT_EQ(0u, ClosHelper::getPatIndex(CacheRegion::Region1, CachePolicy::WriteBack)); EXPECT_EQ(0u, ClosHelper::getPatIndex(CacheRegion::Region2, CachePolicy::Uncached)); EXPECT_EQ(0u, ClosHelper::getPatIndex(CacheRegion::Region2, CachePolicy::WriteCombined)); EXPECT_EQ(0u, ClosHelper::getPatIndex(CacheRegion::Region2, CachePolicy::WriteThrough)); EXPECT_EQ(0u, ClosHelper::getPatIndex(CacheRegion::Region2, CachePolicy::WriteBack)); } TEST(DrmResidencyHandlerTests, givenSupportedVmBindAndDebugFlagUseVmBindWhenQueryingIsVmBindAvailableThenBindAvailableIsInitializedOnce) { DebugManagerStateRestore restorer; DebugManager.flags.UseVmBind.set(1); auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmQueryMock drm{*executionEnvironment->rootDeviceEnvironments[0]}; drm.context.vmBindQueryValue = 1; EXPECT_FALSE(drm.bindAvailable); EXPECT_EQ(0u, drm.context.vmBindQueryCalled); EXPECT_TRUE(drm.isVmBindAvailable()); EXPECT_TRUE(drm.bindAvailable); EXPECT_EQ(1u, drm.context.vmBindQueryCalled); EXPECT_TRUE(drm.isVmBindAvailable()); EXPECT_EQ(1u, drm.context.vmBindQueryCalled); } TEST(DrmResidencyHandlerTests, givenDebugFlagUseVmBindWhenQueryingIsVmBindAvailableThenSupportIsOverriden) { DebugManagerStateRestore restorer; DebugManager.flags.UseVmBind.set(1); auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmQueryMock drm{*executionEnvironment->rootDeviceEnvironments[0]}; EXPECT_FALSE(drm.bindAvailable); drm.context.vmBindQueryReturn = -1; EXPECT_EQ(0u, drm.context.vmBindQueryCalled); EXPECT_TRUE(drm.isVmBindAvailable()); EXPECT_TRUE(drm.bindAvailable); EXPECT_EQ(1u, drm.context.vmBindQueryCalled); EXPECT_TRUE(drm.isVmBindAvailable()); EXPECT_EQ(1u, drm.context.vmBindQueryCalled); } namespace NEO { extern bool disableBindDefaultInTests; } TEST(DrmResidencyHandlerTests, givenDebugFlagUseVmBindSetDefaultAndBindAvailableInDrmWhenQueryingIsVmBindAvailableThenBindIsAvailableWhenSupported) { DebugManagerStateRestore restorer; DebugManager.flags.UseVmBind.set(-1); VariableBackup disableBindBackup(&disableBindDefaultInTests, false); auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmQueryMock drm{*executionEnvironment->rootDeviceEnvironments[0]}; drm.context.vmBindQueryValue = 1; drm.context.vmBindQueryReturn = 0; EXPECT_FALSE(drm.bindAvailable); auto hwInfo = drm.getRootDeviceEnvironment().getHardwareInfo(); auto hwInfoConfig = HwInfoConfig::get(hwInfo->platform.eProductFamily); EXPECT_EQ(0u, drm.context.vmBindQueryCalled); EXPECT_EQ(drm.isVmBindAvailable(), hwInfoConfig->isNewResidencyModelSupported()); EXPECT_EQ(drm.bindAvailable, hwInfoConfig->isNewResidencyModelSupported()); EXPECT_EQ(1u, drm.context.vmBindQueryCalled); } TEST(DrmResidencyHandlerTests, givenDebugFlagUseVmBindSetDefaultWhenQueryingIsVmBindAvailableFailedThenBindIsNot) { DebugManagerStateRestore restorer; DebugManager.flags.UseVmBind.set(-1); VariableBackup disableBindBackup(&disableBindDefaultInTests, false); auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmQueryMock drm{*executionEnvironment->rootDeviceEnvironments[0]}; drm.context.vmBindQueryValue = 1; drm.context.vmBindQueryReturn = -1; EXPECT_FALSE(drm.bindAvailable); EXPECT_EQ(0u, drm.context.vmBindQueryCalled); EXPECT_FALSE(drm.isVmBindAvailable()); EXPECT_FALSE(drm.bindAvailable); EXPECT_EQ(1u, drm.context.vmBindQueryCalled); } TEST(DrmResidencyHandlerTests, givenDebugFlagUseVmBindSetDefaultWhenQueryingIsVmBindAvailableSuccedAndReportNoBindAvailableInDrmThenBindIsNotAvailable) { DebugManagerStateRestore restorer; DebugManager.flags.UseVmBind.set(-1); VariableBackup disableBindBackup(&disableBindDefaultInTests, false); auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmQueryMock drm{*executionEnvironment->rootDeviceEnvironments[0]}; drm.context.vmBindQueryValue = 0; drm.context.vmBindQueryReturn = 0; EXPECT_FALSE(drm.bindAvailable); EXPECT_EQ(0u, drm.context.vmBindQueryCalled); EXPECT_FALSE(drm.isVmBindAvailable()); EXPECT_FALSE(drm.bindAvailable); EXPECT_EQ(1u, drm.context.vmBindQueryCalled); } compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/drm_residency_handler_tests.cpp000066400000000000000000000045241422164147700337060ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/linux/drm_memory_operations_handler_default.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/test_macros/test.h" #include using namespace NEO; struct MockDrmMemoryOperationsHandlerDefault : public DrmMemoryOperationsHandlerDefault { using DrmMemoryOperationsHandlerDefault::residency; }; struct DrmMemoryOperationsHandlerBaseTest : public ::testing::Test { void SetUp() override { drmMemoryOperationsHandler = std::make_unique(); allocationPtr = &graphicsAllocation; } MockGraphicsAllocation graphicsAllocation; GraphicsAllocation *allocationPtr; std::unique_ptr drmMemoryOperationsHandler; }; TEST_F(DrmMemoryOperationsHandlerBaseTest, whenMakingAllocationResidentThenAllocationIsResident) { EXPECT_EQ(drmMemoryOperationsHandler->makeResident(nullptr, ArrayRef(&allocationPtr, 1)), MemoryOperationsStatus::SUCCESS); EXPECT_EQ(drmMemoryOperationsHandler->residency.size(), 1u); EXPECT_TRUE(drmMemoryOperationsHandler->residency.find(allocationPtr) != drmMemoryOperationsHandler->residency.end()); EXPECT_EQ(drmMemoryOperationsHandler->isResident(nullptr, graphicsAllocation), MemoryOperationsStatus::SUCCESS); } TEST_F(DrmMemoryOperationsHandlerBaseTest, whenEvictingResidentAllocationThenAllocationIsNotResident) { EXPECT_EQ(drmMemoryOperationsHandler->residency.size(), 0u); EXPECT_EQ(drmMemoryOperationsHandler->makeResident(nullptr, ArrayRef(&allocationPtr, 1)), MemoryOperationsStatus::SUCCESS); EXPECT_EQ(drmMemoryOperationsHandler->isResident(nullptr, graphicsAllocation), MemoryOperationsStatus::SUCCESS); EXPECT_EQ(drmMemoryOperationsHandler->residency.size(), 1u); EXPECT_TRUE(drmMemoryOperationsHandler->residency.find(allocationPtr) != drmMemoryOperationsHandler->residency.end()); EXPECT_EQ(drmMemoryOperationsHandler->evict(nullptr, graphicsAllocation), MemoryOperationsStatus::SUCCESS); EXPECT_EQ(drmMemoryOperationsHandler->isResident(nullptr, graphicsAllocation), MemoryOperationsStatus::MEMORY_NOT_FOUND); EXPECT_EQ(drmMemoryOperationsHandler->residency.size(), 0u); } compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/drm_system_info_tests.cpp000066400000000000000000000164331422164147700325650ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/linux/system_info.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/libult/linux/drm_mock.h" #include "shared/test/common/os_interface/linux/drm_mock_device_blob.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "gtest/gtest.h" using namespace NEO; TEST(DrmSystemInfoTest, whenQueryingSystemInfoThenSystemInfoIsNotCreatedAndIoctlsAreCalledOnce) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMock drm(*executionEnvironment->rootDeviceEnvironments[0]); EXPECT_FALSE(drm.querySystemInfo()); EXPECT_EQ(nullptr, drm.getSystemInfo()); EXPECT_EQ(1u, drm.ioctlCallsCount); } TEST(DrmSystemInfoTest, givenSystemInfoCreatedWhenQueryingSpecificAtrributesThenReturnZero) { std::vector inputData{}; SystemInfo systemInfo(inputData); EXPECT_EQ(0u, systemInfo.getL3CacheSizeInKb()); EXPECT_EQ(0u, systemInfo.getL3BankCount()); EXPECT_EQ(0u, systemInfo.getMemoryType()); EXPECT_EQ(0u, systemInfo.getMaxMemoryChannels()); EXPECT_EQ(0u, systemInfo.getNumThreadsPerEu()); EXPECT_EQ(0u, systemInfo.getMaxFillRate()); EXPECT_EQ(0u, systemInfo.getTotalVsThreads()); EXPECT_EQ(0u, systemInfo.getTotalHsThreads()); EXPECT_EQ(0u, systemInfo.getTotalDsThreads()); EXPECT_EQ(0u, systemInfo.getTotalGsThreads()); EXPECT_EQ(0u, systemInfo.getTotalPsThreads()); EXPECT_EQ(0u, systemInfo.getMaxEuPerDualSubSlice()); EXPECT_EQ(0u, systemInfo.getMaxSlicesSupported()); EXPECT_EQ(0u, systemInfo.getMaxDualSubSlicesSupported()); EXPECT_EQ(0u, systemInfo.getMaxRCS()); EXPECT_EQ(0u, systemInfo.getMaxCCS()); } TEST(DrmSystemInfoTest, givenSetupHardwareInfoWhenQuerySystemInfoFalseThenSystemInfoIsNotCreatedAndDebugMessageIsNotPrinted) { struct DrmMockToQuerySystemInfo : public DrmMock { DrmMockToQuerySystemInfo(RootDeviceEnvironment &rootDeviceEnvironment) : DrmMock(rootDeviceEnvironment) {} bool querySystemInfo() override { return false; } }; DebugManagerStateRestore restorer; DebugManager.flags.PrintDebugMessages.set(true); auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); DrmMockToQuerySystemInfo drm(*executionEnvironment->rootDeviceEnvironments[0]); HardwareInfo hwInfo = *defaultHwInfo; auto setupHardwareInfo = [](HardwareInfo *, bool) {}; DeviceDescriptor device = {0, &hwInfo, setupHardwareInfo}; ::testing::internal::CaptureStdout(); int ret = drm.setupHardwareInfo(&device, false); EXPECT_EQ(ret, 0); EXPECT_EQ(nullptr, drm.getSystemInfo()); EXPECT_TRUE(isEmpty(::testing::internal::GetCapturedStdout())); } TEST(DrmSystemInfoTest, whenQueryingSystemInfoThenSystemInfoIsCreatedAndReturnsNonZeros) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMockEngine drm(*executionEnvironment->rootDeviceEnvironments[0]); EXPECT_TRUE(drm.querySystemInfo()); auto systemInfo = drm.getSystemInfo(); EXPECT_NE(nullptr, systemInfo); EXPECT_NE(0u, systemInfo->getMaxMemoryChannels()); EXPECT_NE(0u, systemInfo->getMemoryType()); EXPECT_NE(0u, systemInfo->getTotalVsThreads()); EXPECT_NE(0u, systemInfo->getTotalHsThreads()); EXPECT_NE(0u, systemInfo->getTotalDsThreads()); EXPECT_NE(0u, systemInfo->getTotalGsThreads()); EXPECT_NE(0u, systemInfo->getTotalPsThreads()); EXPECT_NE(0u, systemInfo->getMaxEuPerDualSubSlice()); EXPECT_NE(0u, systemInfo->getMaxSlicesSupported()); EXPECT_NE(0u, systemInfo->getMaxDualSubSlicesSupported()); EXPECT_NE(0u, systemInfo->getMaxDualSubSlicesSupported()); EXPECT_NE(0u, systemInfo->getMaxRCS()); EXPECT_NE(0u, systemInfo->getMaxCCS()); EXPECT_EQ(2u, drm.ioctlCallsCount); } TEST(DrmSystemInfoTest, givenSystemInfoCreatedFromDeviceBlobWhenQueryingSpecificAtrributesThenReturnCorrectValues) { SystemInfo systemInfo(inputBlobData); EXPECT_EQ(0x0Au, systemInfo.getMaxMemoryChannels()); EXPECT_EQ(0x0Bu, systemInfo.getMemoryType()); EXPECT_EQ(0x10u, systemInfo.getTotalVsThreads()); EXPECT_EQ(0x12u, systemInfo.getTotalHsThreads()); EXPECT_EQ(0x13u, systemInfo.getTotalDsThreads()); EXPECT_EQ(0x11u, systemInfo.getTotalGsThreads()); EXPECT_EQ(0x15u, systemInfo.getTotalPsThreads()); EXPECT_EQ(0x03u, systemInfo.getMaxEuPerDualSubSlice()); EXPECT_EQ(0x01u, systemInfo.getMaxSlicesSupported()); EXPECT_EQ(0x02u, systemInfo.getMaxDualSubSlicesSupported()); EXPECT_EQ(0x02u, systemInfo.getMaxDualSubSlicesSupported()); EXPECT_EQ(0x17u, systemInfo.getMaxRCS()); EXPECT_EQ(0x18u, systemInfo.getMaxCCS()); } TEST(DrmSystemInfoTest, givenSetupHardwareInfoWhenQuerySystemInfoFailsThenSystemInfoIsNotCreatedAndDebugMessageIsPrinted) { DebugManagerStateRestore restorer; DebugManager.flags.PrintDebugMessages.set(true); auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); DrmMockEngine drm(*executionEnvironment->rootDeviceEnvironments[0]); HardwareInfo hwInfo = *defaultHwInfo; auto setupHardwareInfo = [](HardwareInfo *, bool) {}; DeviceDescriptor device = {0, &hwInfo, setupHardwareInfo}; ::testing::internal::CaptureStdout(); drm.failQueryDeviceBlob = true; int ret = drm.setupHardwareInfo(&device, false); EXPECT_EQ(ret, 0); EXPECT_EQ(nullptr, drm.getSystemInfo()); EXPECT_TRUE(hasSubstr(::testing::internal::GetCapturedStdout(), "INFO: System Info query failed!\n")); } TEST(DrmSystemInfoTest, givenSetupHardwareInfoWhenQuerySystemInfoSucceedsThenSystemInfoIsCreatedAndUsedToSetHardwareInfoAttributes) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); DrmMockEngine drm(*executionEnvironment->rootDeviceEnvironments[0]); HardwareInfo hwInfo = *defaultHwInfo; auto setupHardwareInfo = [](HardwareInfo *, bool) {}; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; DeviceDescriptor device = {0, &hwInfo, setupHardwareInfo}; int ret = drm.setupHardwareInfo(&device, false); EXPECT_EQ(ret, 0); EXPECT_NE(nullptr, drm.getSystemInfo()); EXPECT_GT(gtSystemInfo.TotalVsThreads, 0u); EXPECT_GT(gtSystemInfo.TotalHsThreads, 0u); EXPECT_GT(gtSystemInfo.TotalDsThreads, 0u); EXPECT_GT(gtSystemInfo.TotalGsThreads, 0u); EXPECT_GT(gtSystemInfo.TotalPsThreadsWindowerRange, 0u); EXPECT_GT(gtSystemInfo.TotalDsThreads, 0u); EXPECT_GT(gtSystemInfo.MaxEuPerSubSlice, 0u); EXPECT_GT(gtSystemInfo.MaxSlicesSupported, 0u); EXPECT_GT(gtSystemInfo.MaxSubSlicesSupported, 0u); EXPECT_GT(gtSystemInfo.MaxDualSubSlicesSupported, 0u); EXPECT_GT(gtSystemInfo.MemoryType, 0u); } compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/drm_tests.cpp000066400000000000000000001332641422164147700301500ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/file_io.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/os_interface/device_factory.h" #include "shared/source/os_interface/linux/os_context_linux.h" #include "shared/source/os_interface/os_interface.h" #include "shared/test/common/fixtures/memory_management_fixture.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/helpers/engine_descriptor_helper.h" #include "shared/test/common/libult/linux/drm_mock.h" #include "shared/test/common/mocks/linux/mock_os_context_linux.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "gtest/gtest.h" #include #include using namespace NEO; TEST(DrmTest, WhenGettingDeviceIdThenCorrectIdReturned) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMock *pDrm = new DrmMock(*executionEnvironment->rootDeviceEnvironments[0]); EXPECT_NE(nullptr, pDrm); pDrm->storedDeviceID = 0x1234; int deviceID = 0; int ret = pDrm->getDeviceID(deviceID); EXPECT_EQ(0, ret); EXPECT_EQ(pDrm->storedDeviceID, deviceID); delete pDrm; } TEST(DrmTest, GivenValidPciPathWhenGettingAdapterBdfThenCorrectValuesAreReturned) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMock drm{*executionEnvironment->rootDeviceEnvironments[0]}; { drm.setPciPath("0000:ab:cd.e"); EXPECT_EQ(0, drm.queryAdapterBDF()); auto adapterBdf = drm.getAdapterBDF(); EXPECT_EQ(0xabu, adapterBdf.Bus); EXPECT_EQ(0xcdu, adapterBdf.Device); EXPECT_EQ(0xeu, adapterBdf.Function); auto pciInfo = drm.getPciBusInfo(); EXPECT_EQ(0x0u, pciInfo.pciDomain); EXPECT_EQ(0xabu, pciInfo.pciBus); EXPECT_EQ(0xcdu, pciInfo.pciDevice); EXPECT_EQ(0xeu, pciInfo.pciFunction); } { drm.setPciPath("0000:01:23.4"); EXPECT_EQ(0, drm.queryAdapterBDF()); auto adapterBdf = drm.getAdapterBDF(); EXPECT_EQ(0x1u, adapterBdf.Bus); EXPECT_EQ(0x23u, adapterBdf.Device); EXPECT_EQ(0x4u, adapterBdf.Function); auto pciInfo = drm.getPciBusInfo(); EXPECT_EQ(0x0u, pciInfo.pciDomain); EXPECT_EQ(0x1u, pciInfo.pciBus); EXPECT_EQ(0x23u, pciInfo.pciDevice); EXPECT_EQ(0x4u, pciInfo.pciFunction); } } TEST(DrmTest, GivenInvalidPciPathWhenGettingAdapterBdfThenInvalidPciInfoIsReturned) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMock drm{*executionEnvironment->rootDeviceEnvironments[0]}; drm.setPciPath("invalidPci"); EXPECT_EQ(1, drm.queryAdapterBDF()); auto adapterBdf = drm.getAdapterBDF(); EXPECT_EQ(std::numeric_limits::max(), adapterBdf.Data); auto pciInfo = drm.getPciBusInfo(); EXPECT_EQ(PhysicalDevicePciBusInfo::InvalidValue, pciInfo.pciDomain); EXPECT_EQ(PhysicalDevicePciBusInfo::InvalidValue, pciInfo.pciBus); EXPECT_EQ(PhysicalDevicePciBusInfo::InvalidValue, pciInfo.pciDevice); EXPECT_EQ(PhysicalDevicePciBusInfo::InvalidValue, pciInfo.pciFunction); } TEST(DrmTest, GivenInvalidPciPathWhenFrequencyIsQueriedThenReturnError) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMock drm{*executionEnvironment->rootDeviceEnvironments[0]}; auto hwInfo = *defaultHwInfo; int maxFrequency = 0; drm.setPciPath("invalidPci"); int ret = drm.getMaxGpuFrequency(hwInfo, maxFrequency); EXPECT_NE(0, ret); EXPECT_EQ(0, maxFrequency); } TEST(DrmTest, WhenGettingRevisionIdThenCorrectIdIsReturned) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMock *pDrm = new DrmMock(*executionEnvironment->rootDeviceEnvironments[0]); EXPECT_NE(nullptr, pDrm); pDrm->storedDeviceID = 0x1234; pDrm->storedDeviceRevID = 0xB; int deviceID = 0; int ret = pDrm->getDeviceID(deviceID); EXPECT_EQ(0, ret); int revID = 0; ret = pDrm->getDeviceRevID(revID); EXPECT_EQ(0, ret); EXPECT_EQ(pDrm->storedDeviceID, deviceID); EXPECT_EQ(pDrm->storedDeviceRevID, revID); delete pDrm; } TEST(DrmTest, GivenDrmWhenAskedForGttSizeThenReturnCorrectValue) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); auto drm = std::make_unique(*executionEnvironment->rootDeviceEnvironments[0]); uint64_t queryGttSize = 0; drm->storedRetValForGetGttSize = 0; drm->storedGTTSize = 1ull << 31; EXPECT_EQ(0, drm->queryGttSize(queryGttSize)); EXPECT_EQ(drm->storedGTTSize, queryGttSize); queryGttSize = 0; drm->storedRetValForGetGttSize = -1; EXPECT_NE(0, drm->queryGttSize(queryGttSize)); EXPECT_EQ(0u, queryGttSize); } TEST(DrmTest, GivenDrmWhenAskedForPreemptionThenCorrectValueReturned) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMock *pDrm = new DrmMock(*executionEnvironment->rootDeviceEnvironments[0]); pDrm->storedRetVal = 0; pDrm->storedPreemptionSupport = I915_SCHEDULER_CAP_ENABLED | I915_SCHEDULER_CAP_PRIORITY | I915_SCHEDULER_CAP_PREEMPTION; pDrm->checkPreemptionSupport(); EXPECT_TRUE(pDrm->isPreemptionSupported()); pDrm->storedPreemptionSupport = 0; pDrm->checkPreemptionSupport(); EXPECT_FALSE(pDrm->isPreemptionSupported()); pDrm->storedRetVal = -1; pDrm->storedPreemptionSupport = I915_SCHEDULER_CAP_ENABLED | I915_SCHEDULER_CAP_PRIORITY | I915_SCHEDULER_CAP_PREEMPTION; pDrm->checkPreemptionSupport(); EXPECT_FALSE(pDrm->isPreemptionSupported()); pDrm->storedPreemptionSupport = 0; pDrm->checkPreemptionSupport(); EXPECT_FALSE(pDrm->isPreemptionSupported()); delete pDrm; } TEST(DrmTest, GivenDrmWhenAskedForContextThatFailsThenFalseIsReturned) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMock *pDrm = new DrmMock(*executionEnvironment->rootDeviceEnvironments[0]); pDrm->storedRetVal = -1; EXPECT_THROW(pDrm->createDrmContext(1, false, false), std::exception); pDrm->storedRetVal = 0; delete pDrm; } TEST(DrmTest, givenDrmWhenOsContextIsCreatedThenCreateAndDestroyNewDrmOsContext) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMock drmMock(*executionEnvironment->rootDeviceEnvironments[0]); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); { OsContextLinux osContext1(drmMock, 0u, EngineDescriptorHelper::getDefaultDescriptor()); osContext1.ensureContextInitialized(); EXPECT_EQ(1u, osContext1.getDrmContextIds().size()); EXPECT_EQ(drmMock.storedDrmContextId, osContext1.getDrmContextIds()[0]); EXPECT_EQ(0u, drmMock.receivedDestroyContextId); { OsContextLinux osContext2(drmMock, 0u, EngineDescriptorHelper::getDefaultDescriptor()); osContext2.ensureContextInitialized(); EXPECT_EQ(1u, osContext2.getDrmContextIds().size()); EXPECT_EQ(drmMock.storedDrmContextId, osContext2.getDrmContextIds()[0]); EXPECT_EQ(0u, drmMock.receivedDestroyContextId); } } EXPECT_EQ(2u, drmMock.receivedContextParamRequestCount); } TEST(DrmTest, whenCreatingDrmContextWithVirtualMemoryAddressSpaceThenProperVmIdIsSet) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); DrmMock drmMock(*executionEnvironment->rootDeviceEnvironments[0]); ASSERT_EQ(1u, drmMock.virtualMemoryIds.size()); OsContextLinux osContext(drmMock, 0u, EngineDescriptorHelper::getDefaultDescriptor()); osContext.ensureContextInitialized(); EXPECT_EQ(drmMock.receivedContextParamRequest.value, drmMock.getVirtualMemoryAddressSpace(0u)); } TEST(DrmTest, whenCreatingDrmContextWithNoVirtualMemoryAddressSpaceThenProperContextIdIsSet) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); DrmMock drmMock(*executionEnvironment->rootDeviceEnvironments[0]); drmMock.destroyVirtualMemoryAddressSpace(); ASSERT_EQ(0u, drmMock.virtualMemoryIds.size()); OsContextLinux osContext(drmMock, 0u, EngineDescriptorHelper::getDefaultDescriptor()); osContext.ensureContextInitialized(); EXPECT_EQ(0u, drmMock.receivedContextParamRequestCount); } TEST(DrmTest, givenDrmAndNegativeCheckNonPersistentContextsSupportWhenOsContextIsCreatedThenReceivedContextParamRequestCountReturnsCorrectValue) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); DrmMock drmMock(*executionEnvironment->rootDeviceEnvironments[0]); auto expectedCount = 0u; { drmMock.storedRetValForPersistant = -1; drmMock.checkNonPersistentContextsSupport(); expectedCount += 2; OsContextLinux osContext(drmMock, 0u, EngineDescriptorHelper::getDefaultDescriptor()); osContext.ensureContextInitialized(); EXPECT_EQ(expectedCount, drmMock.receivedContextParamRequestCount); } { drmMock.storedRetValForPersistant = 0; drmMock.checkNonPersistentContextsSupport(); ++expectedCount; OsContextLinux osContext(drmMock, 0u, EngineDescriptorHelper::getDefaultDescriptor()); osContext.ensureContextInitialized(); expectedCount += 2; EXPECT_EQ(expectedCount, drmMock.receivedContextParamRequestCount); } } TEST(DrmTest, givenDrmPreemptionEnabledAndLowPriorityEngineWhenCreatingOsContextThenCallSetContextPriorityIoctl) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); DrmMock drmMock(*executionEnvironment->rootDeviceEnvironments[0]); drmMock.preemptionSupported = false; OsContextLinux osContext1(drmMock, 0u, EngineDescriptorHelper::getDefaultDescriptor()); osContext1.ensureContextInitialized(); OsContextLinux osContext2(drmMock, 0u, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::LowPriority})); osContext2.ensureContextInitialized(); EXPECT_EQ(2u, drmMock.receivedContextParamRequestCount); drmMock.preemptionSupported = true; OsContextLinux osContext3(drmMock, 0u, EngineDescriptorHelper::getDefaultDescriptor()); osContext3.ensureContextInitialized(); EXPECT_EQ(3u, drmMock.receivedContextParamRequestCount); OsContextLinux osContext4(drmMock, 0u, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::LowPriority})); osContext4.ensureContextInitialized(); EXPECT_EQ(5u, drmMock.receivedContextParamRequestCount); EXPECT_EQ(drmMock.storedDrmContextId, drmMock.receivedContextParamRequest.ctx_id); EXPECT_EQ(static_cast(I915_CONTEXT_PARAM_PRIORITY), drmMock.receivedContextParamRequest.param); EXPECT_EQ(static_cast(-1023), drmMock.receivedContextParamRequest.value); EXPECT_EQ(0u, drmMock.receivedContextParamRequest.size); } TEST(DrmTest, WhenGettingExecSoftPinThenCorrectValueIsReturned) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMock *pDrm = new DrmMock(*executionEnvironment->rootDeviceEnvironments[0]); int execSoftPin = 0; int ret = pDrm->getExecSoftPin(execSoftPin); EXPECT_EQ(0, ret); EXPECT_EQ(0, execSoftPin); pDrm->storedExecSoftPin = 1; ret = pDrm->getExecSoftPin(execSoftPin); EXPECT_EQ(0, ret); EXPECT_EQ(1, execSoftPin); delete pDrm; } TEST(DrmTest, WhenEnablingTurboBoostThenSucceeds) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMock *pDrm = new DrmMock(*executionEnvironment->rootDeviceEnvironments[0]); int ret = pDrm->enableTurboBoost(); EXPECT_EQ(0, ret); delete pDrm; } TEST(DrmTest, WhenGettingEnabledPooledEuThenCorrectValueIsReturned) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMock *pDrm = new DrmMock(*executionEnvironment->rootDeviceEnvironments[0]); int enabled = 0; int ret = 0; pDrm->storedHasPooledEU = -1; #if defined(I915_PARAM_HAS_POOLED_EU) ret = pDrm->getEnabledPooledEu(enabled); EXPECT_EQ(0, ret); EXPECT_EQ(-1, enabled); pDrm->storedHasPooledEU = 0; ret = pDrm->getEnabledPooledEu(enabled); EXPECT_EQ(0, ret); EXPECT_EQ(0, enabled); pDrm->storedHasPooledEU = 1; ret = pDrm->getEnabledPooledEu(enabled); EXPECT_EQ(0, ret); EXPECT_EQ(1, enabled); pDrm->storedRetValForPooledEU = -1; ret = pDrm->getEnabledPooledEu(enabled); EXPECT_EQ(-1, ret); EXPECT_EQ(1, enabled); #else ret = pDrm->getEnabledPooledEu(enabled); EXPECT_EQ(0, ret); EXPECT_EQ(0, enabled); #endif delete pDrm; } TEST(DrmTest, WhenGettingMinEuInPoolThenCorrectValueIsReturned) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMock *pDrm = new DrmMock(*executionEnvironment->rootDeviceEnvironments[0]); pDrm->storedMinEUinPool = -1; int minEUinPool = 0; int ret = 0; #if defined(I915_PARAM_MIN_EU_IN_POOL) ret = pDrm->getMinEuInPool(minEUinPool); EXPECT_EQ(0, ret); EXPECT_EQ(-1, minEUinPool); pDrm->storedMinEUinPool = 0; ret = pDrm->getMinEuInPool(minEUinPool); EXPECT_EQ(0, ret); EXPECT_EQ(0, minEUinPool); pDrm->storedMinEUinPool = 1; ret = pDrm->getMinEuInPool(minEUinPool); EXPECT_EQ(0, ret); EXPECT_EQ(1, minEUinPool); pDrm->storedRetValForMinEUinPool = -1; ret = pDrm->getMinEuInPool(minEUinPool); EXPECT_EQ(-1, ret); EXPECT_EQ(1, minEUinPool); #else ret = pDrm->getMinEuInPool(minEUinPool); EXPECT_EQ(0, ret); EXPECT_EQ(0, minEUinPool); #endif delete pDrm; } TEST(DrmTest, givenDrmWhenGetErrnoIsCalledThenErrnoValueIsReturned) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMock *pDrm = new DrmMock(*executionEnvironment->rootDeviceEnvironments[0]); EXPECT_NE(nullptr, pDrm); auto errnoFromDrm = pDrm->getErrno(); EXPECT_EQ(errno, errnoFromDrm); delete pDrm; } TEST(DrmTest, givenPlatformWhereGetSseuRetFailureWhenCallSetQueueSliceCountThenSliceCountIsNotSet) { uint64_t newSliceCount = 1; auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); auto drm = std::make_unique(*executionEnvironment->rootDeviceEnvironments[0]); drm->storedRetValForGetSSEU = -1; drm->checkQueueSliceSupport(); EXPECT_FALSE(drm->sliceCountChangeSupported); EXPECT_FALSE(drm->setQueueSliceCount(newSliceCount)); EXPECT_NE(drm->getSliceMask(newSliceCount), drm->storedParamSseu); } TEST(DrmTest, whenCheckNonPeristentSupportIsCalledThenAreNonPersistentContextsSupportedReturnsCorrectValues) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); auto drm = std::make_unique(*executionEnvironment->rootDeviceEnvironments[0]); drm->storedRetValForPersistant = -1; drm->checkNonPersistentContextsSupport(); EXPECT_FALSE(drm->areNonPersistentContextsSupported()); drm->storedRetValForPersistant = 0; drm->checkNonPersistentContextsSupport(); EXPECT_TRUE(drm->areNonPersistentContextsSupported()); } TEST(DrmTest, givenPlatformWhereSetSseuRetFailureWhenCallSetQueueSliceCountThenReturnFalse) { uint64_t newSliceCount = 1; auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); auto drm = std::make_unique(*executionEnvironment->rootDeviceEnvironments[0]); drm->storedRetValForSetSSEU = -1; drm->storedRetValForGetSSEU = 0; drm->checkQueueSliceSupport(); EXPECT_TRUE(drm->sliceCountChangeSupported); EXPECT_FALSE(drm->setQueueSliceCount(newSliceCount)); } TEST(DrmTest, givenPlatformWithSupportToChangeSliceCountWhenCallSetQueueSliceCountThenReturnTrue) { uint64_t newSliceCount = 1; auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); auto drm = std::make_unique(*executionEnvironment->rootDeviceEnvironments[0]); drm->storedRetValForSetSSEU = 0; drm->storedRetValForSetSSEU = 0; drm->checkQueueSliceSupport(); EXPECT_TRUE(drm->sliceCountChangeSupported); EXPECT_TRUE(drm->setQueueSliceCount(newSliceCount)); drm_i915_gem_context_param_sseu sseu = {}; EXPECT_EQ(0, drm->getQueueSliceCount(&sseu)); EXPECT_EQ(drm->getSliceMask(newSliceCount), sseu.slice_mask); } namespace NEO { namespace SysCalls { extern uint32_t closeFuncCalled; extern int closeFuncArgPassed; extern uint32_t vmId; } // namespace SysCalls } // namespace NEO TEST(HwDeviceId, whenHwDeviceIdIsDestroyedThenFileDescriptorIsClosed) { SysCalls::closeFuncCalled = 0; int fileDescriptor = 0x1234; { HwDeviceIdDrm hwDeviceId(fileDescriptor, ""); } EXPECT_EQ(1u, SysCalls::closeFuncCalled); EXPECT_EQ(fileDescriptor, SysCalls::closeFuncArgPassed); } TEST(DrmTest, givenDrmWhenCreatingOsContextThenCreateDrmContextWithVmId) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); DrmMock drmMock(*executionEnvironment->rootDeviceEnvironments[0]); OsContextLinux osContext(drmMock, 0u, EngineDescriptorHelper::getDefaultDescriptor()); osContext.ensureContextInitialized(); EXPECT_EQ(SysCalls::vmId, drmMock.getVirtualMemoryAddressSpace(0)); auto &contextIds = osContext.getDrmContextIds(); EXPECT_EQ(1u, contextIds.size()); } TEST(DrmTest, givenDrmWithPerContextVMRequiredWhenCreatingOsContextsThenImplicitVmIdPerContextIsUsed) { auto &rootEnv = *platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0]; rootEnv.executionEnvironment.setDebuggingEnabled(); DrmMock drmMock(rootEnv); EXPECT_TRUE(drmMock.requirePerContextVM); OsContextLinux osContext1(drmMock, 0u, EngineDescriptorHelper::getDefaultDescriptor()); osContext1.ensureContextInitialized(); OsContextLinux osContext2(drmMock, 5u, EngineDescriptorHelper::getDefaultDescriptor()); osContext2.ensureContextInitialized(); } TEST(DrmTest, givenPerContextVMRequiredWhenCreatingOsContextsThenImplicitVmIdPerContextIsQueriedAndStored) { auto &rootEnv = *platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0]; rootEnv.executionEnvironment.setDebuggingEnabled(); DrmMock drmMock(rootEnv); EXPECT_TRUE(drmMock.requirePerContextVM); drmMock.storedRetValForVmId = 20; OsContextLinux osContext(drmMock, 0u, EngineDescriptorHelper::getDefaultDescriptor()); osContext.ensureContextInitialized(); EXPECT_EQ(2u, drmMock.receivedContextParamRequestCount); auto &drmVmIds = osContext.getDrmVmIds(); EXPECT_EQ(32u, drmVmIds.size()); EXPECT_EQ(20u, drmVmIds[0]); } TEST(DrmTest, givenPerContextVMRequiredWhenCreatingOsContextForSubDeviceThenImplicitVmIdPerContextIsQueriedAndStoredAtSubDeviceIndex) { auto &rootEnv = *platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0]; rootEnv.executionEnvironment.setDebuggingEnabled(); DrmMock drmMock(rootEnv); EXPECT_TRUE(drmMock.requirePerContextVM); drmMock.storedRetValForVmId = 20; DeviceBitfield deviceBitfield(1 << 3); OsContextLinux osContext(drmMock, 0u, EngineDescriptorHelper::getDefaultDescriptor(deviceBitfield)); osContext.ensureContextInitialized(); EXPECT_EQ(2u, drmMock.receivedContextParamRequestCount); auto &drmVmIds = osContext.getDrmVmIds(); EXPECT_EQ(32u, drmVmIds.size()); EXPECT_EQ(20u, drmVmIds[3]); EXPECT_EQ(0u, drmVmIds[0]); EXPECT_EQ(0u, drmVmIds[2]); } TEST(DrmTest, givenPerContextVMRequiredWhenCreatingOsContextsForRootDeviceThenImplicitVmIdsPerContextAreQueriedAndStoredAtSubDeviceIndices) { auto &rootEnv = *platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0]; rootEnv.executionEnvironment.setDebuggingEnabled(); DrmMock drmMock(rootEnv); EXPECT_TRUE(drmMock.requirePerContextVM); drmMock.storedRetValForVmId = 20; DeviceBitfield deviceBitfield(1 | 1 << 1); OsContextLinux osContext(drmMock, 0u, EngineDescriptorHelper::getDefaultDescriptor(deviceBitfield)); osContext.ensureContextInitialized(); EXPECT_EQ(2 * 2u, drmMock.receivedContextParamRequestCount); auto &drmVmIds = osContext.getDrmVmIds(); EXPECT_EQ(32u, drmVmIds.size()); EXPECT_EQ(20u, drmVmIds[0]); EXPECT_EQ(20u, drmVmIds[1]); EXPECT_EQ(0u, drmVmIds[2]); EXPECT_EQ(0u, drmVmIds[31]); } TEST(DrmTest, givenNoPerContextVmsDrmWhenCreatingOsContextsThenVmIdIsNotQueriedAndStored) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); DrmMock drmMock(*executionEnvironment->rootDeviceEnvironments[0]); EXPECT_FALSE(drmMock.requirePerContextVM); drmMock.storedRetValForVmId = 1; OsContextLinux osContext(drmMock, 0u, EngineDescriptorHelper::getDefaultDescriptor()); osContext.ensureContextInitialized(); EXPECT_EQ(1u, drmMock.receivedContextParamRequestCount); auto &drmVmIds = osContext.getDrmVmIds(); EXPECT_EQ(0u, drmVmIds.size()); } TEST(DrmTest, givenProgramDebuggingAndContextDebugAvailableWhenCreatingContextThenSetContextDebugFlagIsCalled) { auto executionEnvironment = std::make_unique(); executionEnvironment->setDebuggingEnabled(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); executionEnvironment->calculateMaxOsContextCount(); executionEnvironment->rootDeviceEnvironments[0]->osInterface = std::make_unique(); DrmMockNonFailing drmMock(*executionEnvironment->rootDeviceEnvironments[0]); drmMock.contextDebugSupported = true; drmMock.callBaseCreateDrmContext = false; OsContextLinux osContext(drmMock, 5u, EngineDescriptorHelper::getDefaultDescriptor()); osContext.ensureContextInitialized(); // drmMock returns ctxId == 0 EXPECT_EQ(0u, drmMock.passedContextDebugId); } TEST(DrmTest, givenProgramDebuggingAndContextDebugAvailableWhenCreatingContextForInternalEngineThenSetContextDebugFlagIsNotCalled) { auto executionEnvironment = std::make_unique(); executionEnvironment->setDebuggingEnabled(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); executionEnvironment->calculateMaxOsContextCount(); executionEnvironment->rootDeviceEnvironments[0]->osInterface = std::make_unique(); DrmMockNonFailing drmMock(*executionEnvironment->rootDeviceEnvironments[0]); drmMock.contextDebugSupported = true; OsContextLinux osContext(drmMock, 5u, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::Internal})); osContext.ensureContextInitialized(); EXPECT_EQ(static_cast(-1), drmMock.passedContextDebugId); } TEST(DrmTest, givenNotEnabledDebuggingOrContextDebugUnsupportedWhenCreatingContextThenCooperativeFlagIsNotPassedToCreateDrmContext) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); executionEnvironment->calculateMaxOsContextCount(); executionEnvironment->rootDeviceEnvironments[0]->osInterface = std::make_unique(); DrmMockNonFailing drmMock(*executionEnvironment->rootDeviceEnvironments[0]); drmMock.contextDebugSupported = true; drmMock.callBaseCreateDrmContext = false; drmMock.capturedCooperativeContextRequest = true; EXPECT_FALSE(executionEnvironment->isDebuggingEnabled()); OsContextLinux osContext(drmMock, 5u, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::Regular})); osContext.ensureContextInitialized(); EXPECT_FALSE(drmMock.capturedCooperativeContextRequest); executionEnvironment->setDebuggingEnabled(); drmMock.contextDebugSupported = false; drmMock.callBaseCreateDrmContext = false; drmMock.capturedCooperativeContextRequest = true; OsContextLinux osContext2(drmMock, 5u, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::Regular})); osContext2.ensureContextInitialized(); EXPECT_FALSE(drmMock.capturedCooperativeContextRequest); } TEST(DrmTest, givenPrintIoctlDebugFlagSetWhenGettingTimestampFrequencyThenCaptureExpectedOutput) { DebugManagerStateRestore restore; DebugManager.flags.PrintIoctlEntries.set(true); auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMock drm{*executionEnvironment->rootDeviceEnvironments[0]}; int frequency = 0; testing::internal::CaptureStdout(); // start capturing int ret = drm.getTimestampFrequency(frequency); std::string outputString = testing::internal::GetCapturedStdout(); // stop capturing EXPECT_EQ(0, ret); EXPECT_EQ(1000, frequency); std::string expectedString = "DRM_IOCTL_I915_GETPARAM: param: I915_PARAM_CS_TIMESTAMP_FREQUENCY, output value: 1000, retCode: 0"; EXPECT_NE(std::string::npos, outputString.find(expectedString)); } TEST(DrmTest, givenPrintIoctlDebugFlagNotSetWhenGettingTimestampFrequencyThenCaptureExpectedOutput) { DebugManagerStateRestore restore; DebugManager.flags.PrintIoctlEntries.set(false); auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMock drm{*executionEnvironment->rootDeviceEnvironments[0]}; int frequency = 0; testing::internal::CaptureStdout(); // start capturing int ret = drm.getTimestampFrequency(frequency); std::string outputString = testing::internal::GetCapturedStdout(); // stop capturing EXPECT_EQ(0, ret); EXPECT_EQ(1000, frequency); std::string expectedString = "DRM_IOCTL_I915_GETPARAM: param: I915_PARAM_CS_TIMESTAMP_FREQUENCY, output value: 1000, retCode: 0"; EXPECT_EQ(std::string::npos, outputString.find(expectedString)); } TEST(DrmTest, givenProgramDebuggingWhenCreatingContextThenUnrecoverableContextIsSet) { auto executionEnvironment = std::make_unique(); executionEnvironment->setDebuggingEnabled(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); executionEnvironment->calculateMaxOsContextCount(); executionEnvironment->rootDeviceEnvironments[0]->osInterface = std::make_unique(); DrmMock drm(*executionEnvironment->rootDeviceEnvironments[0]); OsContextLinux osContext(drm, 0u, EngineDescriptorHelper::getDefaultDescriptor()); osContext.ensureContextInitialized(); EXPECT_EQ(0u, drm.receivedRecoverableContextValue); EXPECT_EQ(2u, drm.receivedContextParamRequestCount); } TEST(DrmTest, whenPageFaultIsSupportedThenUseVmBindImmediate) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMock drm(*executionEnvironment->rootDeviceEnvironments[0]); for (auto hasPageFaultSupport : {false, true}) { drm.pageFaultSupported = hasPageFaultSupport; EXPECT_EQ(hasPageFaultSupport, drm.useVMBindImmediate()); } } TEST(DrmTest, whenImmediateVmBindExtIsEnabledThenUseVmBindImmediate) { DebugManagerStateRestore restorer; auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMock drm(*executionEnvironment->rootDeviceEnvironments[0]); for (auto enableImmediateBind : {false, true}) { DebugManager.flags.EnableImmediateVmBindExt.set(enableImmediateBind); EXPECT_EQ(enableImmediateBind, drm.useVMBindImmediate()); } } TEST(DrmQueryTest, GivenDrmWhenSetupHardwareInfoCalledThenCorrectMaxValuesInGtSystemInfoArePreservedAndIoctlHelperSet) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); *executionEnvironment->rootDeviceEnvironments[0]->getMutableHardwareInfo() = *NEO::defaultHwInfo.get(); auto hwInfo = executionEnvironment->rootDeviceEnvironments[0]->getMutableHardwareInfo(); DrmMock drm{*executionEnvironment->rootDeviceEnvironments[0]}; drm.failRetTopology = true; drm.storedEUVal = 48; drm.storedSSVal = 6; hwInfo->gtSystemInfo.SliceCount = 2; auto setupHardwareInfo = [](HardwareInfo *, bool) {}; DeviceDescriptor device = {0, hwInfo, setupHardwareInfo}; drm.ioctlHelper.reset(); drm.setupHardwareInfo(&device, false); EXPECT_NE(nullptr, drm.getIoctlHelper()); EXPECT_EQ(NEO::defaultHwInfo->gtSystemInfo.MaxSlicesSupported, hwInfo->gtSystemInfo.MaxSlicesSupported); EXPECT_EQ(NEO::defaultHwInfo->gtSystemInfo.MaxSubSlicesSupported, hwInfo->gtSystemInfo.MaxSubSlicesSupported); EXPECT_EQ(NEO::defaultHwInfo->gtSystemInfo.MaxEuPerSubSlice, hwInfo->gtSystemInfo.MaxEuPerSubSlice); } TEST(DrmQueryTest, GivenLessAvailableSubSlicesThanMaxSubSlicesWhenQueryingTopologyInfoThenCorrectMaxSubSliceCountIsSet) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); *executionEnvironment->rootDeviceEnvironments[0]->getMutableHardwareInfo() = *NEO::defaultHwInfo.get(); DrmMock drm{*executionEnvironment->rootDeviceEnvironments[0]}; drm.disableSomeTopology = true; Drm::QueryTopologyData topologyData = {}; drm.storedSVal = 4; drm.storedSSVal = drm.storedSVal * 7; drm.storedEUVal = drm.storedSSVal * 4; EXPECT_TRUE(drm.queryTopology(*executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo(), topologyData)); EXPECT_EQ(2, topologyData.sliceCount); EXPECT_EQ(6, topologyData.subSliceCount); EXPECT_EQ(12, topologyData.euCount); EXPECT_EQ(drm.storedSVal, topologyData.maxSliceCount); EXPECT_EQ(7, topologyData.maxSubSliceCount); } TEST(DrmQueryTest, givenDrmWhenGettingTopologyMapThenCorrectMapIsReturned) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); *executionEnvironment->rootDeviceEnvironments[0]->getMutableHardwareInfo() = *NEO::defaultHwInfo.get(); DrmMock drmMock{*executionEnvironment->rootDeviceEnvironments[0]}; Drm::QueryTopologyData topologyData = {}; EXPECT_TRUE(drmMock.queryTopology(*executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo(), topologyData)); auto topologyMap = drmMock.getTopologyMap(); EXPECT_LE(1u, topologyMap.size()); for (uint32_t i = 0; i < topologyMap.size(); i++) { EXPECT_EQ(drmMock.storedSVal, static_cast(topologyMap.at(i).sliceIndices.size())); } } TEST(DrmQueryTest, GivenSingleSliceConfigWhenQueryingTopologyInfoThenSubsliceIndicesAreStored) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); *executionEnvironment->rootDeviceEnvironments[0]->getMutableHardwareInfo() = *NEO::defaultHwInfo.get(); DrmMock drm{*executionEnvironment->rootDeviceEnvironments[0]}; Drm::QueryTopologyData topologyData = {}; drm.storedSVal = 1; drm.storedSSVal = drm.storedSVal * 7; drm.storedEUVal = drm.storedSSVal * 4; EXPECT_TRUE(drm.queryTopology(*executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo(), topologyData)); EXPECT_EQ(1, topologyData.sliceCount); EXPECT_EQ(7, topologyData.subSliceCount); EXPECT_EQ(28, topologyData.euCount); EXPECT_EQ(drm.storedSVal, topologyData.maxSliceCount); EXPECT_EQ(7, topologyData.maxSubSliceCount); auto topologyMap = drm.getTopologyMap(); for (uint32_t i = 0; i < topologyMap.size(); i++) { EXPECT_EQ(drm.storedSVal, static_cast(topologyMap.at(i).sliceIndices.size())); EXPECT_EQ(7u, topologyMap.at(i).subsliceIndices.size()); for (int subsliceId = 0; subsliceId < static_cast(topologyMap.at(i).subsliceIndices.size()); subsliceId++) { EXPECT_EQ(subsliceId, topologyMap.at(i).subsliceIndices[subsliceId]); } } } TEST(DrmQueryTest, GivenMultiSliceConfigWhenQueryingTopologyInfoThenSubsliceIndicesAreNotStored) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); *executionEnvironment->rootDeviceEnvironments[0]->getMutableHardwareInfo() = *NEO::defaultHwInfo.get(); DrmMock drm{*executionEnvironment->rootDeviceEnvironments[0]}; Drm::QueryTopologyData topologyData = {}; drm.storedSVal = 2; drm.storedSSVal = drm.storedSVal * 7; drm.storedEUVal = drm.storedSSVal * 4; EXPECT_TRUE(drm.queryTopology(*executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo(), topologyData)); EXPECT_EQ(2, topologyData.sliceCount); EXPECT_EQ(14, topologyData.subSliceCount); EXPECT_EQ(56, topologyData.euCount); EXPECT_EQ(drm.storedSVal, topologyData.maxSliceCount); EXPECT_EQ(7, topologyData.maxSubSliceCount); auto topologyMap = drm.getTopologyMap(); for (uint32_t i = 0; i < topologyMap.size(); i++) { EXPECT_EQ(drm.storedSVal, static_cast(topologyMap.at(i).sliceIndices.size())); EXPECT_EQ(0u, topologyMap.at(i).subsliceIndices.size()); } } TEST(DrmQueryTest, GivenNonTileArchitectureWhenFrequencyIsQueriedThenFallbackToLegacyInterface) { int expectedMaxFrequency = 2000; auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMock drm{*executionEnvironment->rootDeviceEnvironments[0]}; auto hwInfo = *defaultHwInfo; hwInfo.gtSystemInfo.MultiTileArchInfo.TileCount = 0; hwInfo.gtSystemInfo.MultiTileArchInfo.IsValid = true; std::string gtMaxFreqFile = "test_files/linux/devices/device/drm/card1/gt_max_freq_mhz"; EXPECT_TRUE(fileExists(gtMaxFreqFile)); drm.setPciPath("device"); int maxFrequency = 0; int ret = drm.getMaxGpuFrequency(hwInfo, maxFrequency); EXPECT_EQ(0, ret); EXPECT_EQ(expectedMaxFrequency, maxFrequency); } TEST(DrmQueryTest, GivenTileArchitectureIsInvalidWhenFrequencyIsQueriedThenFallbackToLegacyInterface) { int expectedMaxFrequency = 2000; auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMock drm{*executionEnvironment->rootDeviceEnvironments[0]}; auto hwInfo = *defaultHwInfo; hwInfo.gtSystemInfo.MultiTileArchInfo.TileCount = 2; hwInfo.gtSystemInfo.MultiTileArchInfo.IsValid = false; std::string gtMaxFreqFile = "test_files/linux/devices/device/drm/card1/gt_max_freq_mhz"; EXPECT_TRUE(fileExists(gtMaxFreqFile)); drm.setPciPath("device"); int maxFrequency = 0; int ret = drm.getMaxGpuFrequency(hwInfo, maxFrequency); EXPECT_EQ(0, ret); EXPECT_EQ(expectedMaxFrequency, maxFrequency); } TEST(DrmQueryTest, GivenRpsMaxFreqFileExistsWhenFrequencyIsQueriedThenValidValueIsReturned) { int expectedMaxFrequency = 3000; auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMock drm{*executionEnvironment->rootDeviceEnvironments[0]}; auto hwInfo = *defaultHwInfo; hwInfo.gtSystemInfo.MultiTileArchInfo.TileCount = 1; hwInfo.gtSystemInfo.MultiTileArchInfo.IsValid = true; std::string rpsMaxFreqFile = "test_files/linux/devices/device/drm/card1/gt/gt0/rps_max_freq_mhz"; EXPECT_TRUE(fileExists(rpsMaxFreqFile)); drm.setPciPath("device"); int maxFrequency = 0; int ret = drm.getMaxGpuFrequency(hwInfo, maxFrequency); EXPECT_EQ(0, ret); EXPECT_EQ(expectedMaxFrequency, maxFrequency); } TEST(DrmQueryTest, GivenRpsMaxFreqFilesExistWhenFrequenciesAreQueriedThenValidValueIsReturned) { int expectedMaxFrequency = 4000; auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMock drm{*executionEnvironment->rootDeviceEnvironments[0]}; auto hwInfo = *defaultHwInfo; hwInfo.gtSystemInfo.MultiTileArchInfo.TileCount = 2; hwInfo.gtSystemInfo.MultiTileArchInfo.IsValid = true; std::string rpsMaxFreqFile = "test_files/linux/devices/device/drm/card1/gt/gt1/rps_max_freq_mhz"; EXPECT_TRUE(fileExists(rpsMaxFreqFile)); drm.setPciPath("device"); int maxFrequency = 0; int ret = drm.getMaxGpuFrequency(hwInfo, maxFrequency); EXPECT_EQ(0, ret); EXPECT_EQ(expectedMaxFrequency, maxFrequency); } TEST(DrmQueryTest, GivenRpsMaxFreqFileDoesntExistWhenFrequencyIsQueriedThenFallbackToLegacyInterface) { int expectedMaxFrequency = 2000; auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMock drm{*executionEnvironment->rootDeviceEnvironments[0]}; auto hwInfo = *defaultHwInfo; hwInfo.gtSystemInfo.MultiTileArchInfo.TileCount = 3; hwInfo.gtSystemInfo.MultiTileArchInfo.IsValid = true; std::string rpsMaxFreqFile = "test_files/linux/devices/device/drm/card1/gt/gt2/rps_max_freq_mhz"; EXPECT_FALSE(fileExists(rpsMaxFreqFile)); std::string gtMaxFreqFile = "test_files/linux/devices/device/drm/card1/gt_max_freq_mhz"; EXPECT_TRUE(fileExists(gtMaxFreqFile)); drm.setPciPath("device"); int maxFrequency = 0; int ret = drm.getMaxGpuFrequency(hwInfo, maxFrequency); EXPECT_EQ(0, ret); EXPECT_EQ(expectedMaxFrequency, maxFrequency); } TEST(DrmTest, whenCheckedIfResourcesCleanupCanBeSkippedThenReturnsFalse) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMock *pDrm = new DrmMock(*executionEnvironment->rootDeviceEnvironments[0]); EXPECT_FALSE(pDrm->skipResourceCleanup()); delete pDrm; } TEST(DrmQueryTest, givenUapiPrelimVersionThenReturnCorrectString) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMock drm{*executionEnvironment->rootDeviceEnvironments[0]}; std::string prelimVersionFile = "test_files/linux/devices/device/drm/card1/prelim_uapi_version"; EXPECT_TRUE(fileExists(prelimVersionFile)); drm.setPciPath("device"); std::string prelimVersion = ""; drm.getPrelimVersion(prelimVersion); EXPECT_EQ("2.0", prelimVersion); } TEST(DrmQueryTest, givenUapiPrelimVersionWithInvalidPathThenReturnEmptyString) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMock drm{*executionEnvironment->rootDeviceEnvironments[0]}; drm.setPciPath("invalidPath"); std::string prelimVersion = "2.0"; drm.getPrelimVersion(prelimVersion); EXPECT_TRUE(prelimVersion.empty()); } TEST(DrmTest, givenInvalidUapiPrelimVersionThenFallbackToBasePrelim) { const auto productFamily = defaultHwInfo.get()->platform.eProductFamily; std::unique_ptr ioctlHelper(IoctlHelper::get(productFamily, "-1")); EXPECT_NE(nullptr, ioctlHelper.get()); } TEST(DrmTest, GivenCompletionFenceDebugFlagWhenCreatingDrmObjectThenExpectCorrectSetting) { DebugManagerStateRestore restore; auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); HardwareInfo *hwInfo = defaultHwInfo.get(); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(hwInfo); auto &hwHelper = HwHelper::get(hwInfo->platform.eRenderCoreFamily); DrmMock drmDefault{*executionEnvironment->rootDeviceEnvironments[0]}; if (hwHelper.isLinuxCompletionFenceSupported() && drmDefault.isVmBindAvailable()) { EXPECT_TRUE(drmDefault.completionFenceSupport()); } else { EXPECT_FALSE(drmDefault.completionFenceSupport()); } DebugManager.flags.UseVmBind.set(1); DebugManager.flags.EnableDrmCompletionFence.set(1); DrmMock drmEnabled{*executionEnvironment->rootDeviceEnvironments[0]}; EXPECT_TRUE(drmEnabled.completionFenceSupport()); DebugManager.flags.EnableDrmCompletionFence.set(0); DrmMock drmDisabled{*executionEnvironment->rootDeviceEnvironments[0]}; EXPECT_FALSE(drmDisabled.completionFenceSupport()); } TEST(DrmTest, GivenIoctlErrorWhenIsGpuHangIsCalledThenErrorIsThrown) { ExecutionEnvironment executionEnvironment{}; executionEnvironment.prepareRootDeviceEnvironments(1); DrmMock drm{*executionEnvironment.rootDeviceEnvironments[0]}; uint32_t contextId{0}; EngineDescriptor engineDescriptor{EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular})}; MockOsContextLinux mockOsContextLinux{drm, contextId, engineDescriptor}; mockOsContextLinux.drmContextIds.push_back(0); mockOsContextLinux.drmContextIds.push_back(3); EXPECT_THROW(drm.isGpuHangDetected(mockOsContextLinux), std::runtime_error); } TEST(DrmTest, GivenZeroBatchActiveAndZeroBatchPendingResetStatsWhenIsGpuHangIsCalledThenNoHangIsReported) { ExecutionEnvironment executionEnvironment{}; executionEnvironment.prepareRootDeviceEnvironments(1); DrmMock drm{*executionEnvironment.rootDeviceEnvironments[0]}; uint32_t contextId{0}; EngineDescriptor engineDescriptor{EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular})}; MockOsContextLinux mockOsContextLinux{drm, contextId, engineDescriptor}; mockOsContextLinux.drmContextIds.push_back(0); mockOsContextLinux.drmContextIds.push_back(3); drm_i915_reset_stats resetStats{}; resetStats.ctx_id = 0; drm.resetStatsToReturn.push_back(resetStats); resetStats.ctx_id = 3; drm.resetStatsToReturn.push_back(resetStats); bool isGpuHangDetected{}; EXPECT_NO_THROW(isGpuHangDetected = drm.isGpuHangDetected(mockOsContextLinux)); EXPECT_FALSE(isGpuHangDetected); } TEST(DrmTest, GivenBatchActiveGreaterThanZeroResetStatsWhenIsGpuHangIsCalledThenHangIsReported) { ExecutionEnvironment executionEnvironment{}; executionEnvironment.prepareRootDeviceEnvironments(1); DrmMock drm{*executionEnvironment.rootDeviceEnvironments[0]}; uint32_t contextId{0}; EngineDescriptor engineDescriptor{EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular})}; MockOsContextLinux mockOsContextLinux{drm, contextId, engineDescriptor}; mockOsContextLinux.drmContextIds.push_back(0); mockOsContextLinux.drmContextIds.push_back(3); drm_i915_reset_stats resetStats{}; resetStats.ctx_id = 0; drm.resetStatsToReturn.push_back(resetStats); resetStats.ctx_id = 3; resetStats.batch_active = 2; drm.resetStatsToReturn.push_back(resetStats); bool isGpuHangDetected{}; EXPECT_NO_THROW(isGpuHangDetected = drm.isGpuHangDetected(mockOsContextLinux)); EXPECT_TRUE(isGpuHangDetected); } TEST(DrmTest, GivenBatchPendingGreaterThanZeroResetStatsWhenIsGpuHangIsCalledThenHangIsReported) { ExecutionEnvironment executionEnvironment{}; executionEnvironment.prepareRootDeviceEnvironments(1); DrmMock drm{*executionEnvironment.rootDeviceEnvironments[0]}; uint32_t contextId{0}; EngineDescriptor engineDescriptor{EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular})}; MockOsContextLinux mockOsContextLinux{drm, contextId, engineDescriptor}; mockOsContextLinux.drmContextIds.push_back(8); drm_i915_reset_stats resetStats{}; resetStats.ctx_id = 8; resetStats.batch_pending = 7; drm.resetStatsToReturn.push_back(resetStats); bool isGpuHangDetected{}; EXPECT_NO_THROW(isGpuHangDetected = drm.isGpuHangDetected(mockOsContextLinux)); EXPECT_TRUE(isGpuHangDetected); } TEST(DrmTest, givenSetupIoctlHelperThenIoctlHelperNotNull) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMock drm{*executionEnvironment->rootDeviceEnvironments[0]}; drm.ioctlHelper.reset(nullptr); const auto productFamily = executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo()->platform.eProductFamily; drm.setupIoctlHelper(productFamily); EXPECT_NE(nullptr, drm.ioctlHelper.get()); } compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/drm_uuid_tests.cpp000066400000000000000000000070771422164147700312000ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/linux/drm_debug.h" #include "shared/test/common/libult/linux/drm_mock.h" #include "gtest/gtest.h" using namespace NEO; TEST(DrmUuidTest, GivenDrmWhenGeneratingUUIDThenCorrectStringsAreReturned) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMock drm{*executionEnvironment->rootDeviceEnvironments[0]}; auto uuid1 = drm.generateUUID(); auto uuid2 = drm.generateUUID(); std::string uuidff; for (int i = 0; i < 0xff - 2; i++) { uuidff = drm.generateUUID(); } EXPECT_STREQ("00000000-0000-0000-0000-000000000001", uuid1.c_str()); EXPECT_STREQ("00000000-0000-0000-0000-000000000002", uuid2.c_str()); EXPECT_STREQ("00000000-0000-0000-0000-0000000000ff", uuidff.c_str()); } TEST(DrmUuidTest, GivenDrmWhenGeneratingElfUUIDThenCorrectStringsAreReturned) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMock drm{*executionEnvironment->rootDeviceEnvironments[0]}; std::string elfClassUuid = classNamesToUuid[static_cast(Drm::ResourceClass::Elf)].second; std::string Uuid1stElfClass = elfClassUuid.substr(0, 18); char data[] = "abc"; auto uuid1 = drm.generateElfUUID(static_cast(data)); std::string Uuid1stElfBin1 = uuid1.substr(0, 18); EXPECT_STREQ(Uuid1stElfClass.c_str(), Uuid1stElfBin1.c_str()); char data2[] = "123"; auto uuid2 = drm.generateElfUUID(static_cast(data2)); std::string Uuid1stElfBin2 = uuid2.substr(0, 18); EXPECT_STREQ(Uuid1stElfClass.c_str(), Uuid1stElfBin2.c_str()); auto uuid3 = drm.generateElfUUID(reinterpret_cast(0xFFFFFFFFFFFFFFFF)); std::string UuidElf = Uuid1stElfClass + "-ffff-ffffffffffff"; EXPECT_STREQ(UuidElf.c_str(), uuid3.c_str()); } TEST(DrmUuidTest, whenResourceClassIsUsedToIndexClassNamesThenCorrectNamesAreReturned) { EXPECT_STREQ(classNamesToUuid[static_cast(Drm::ResourceClass::Elf)].first, "I915_UUID_CLASS_ELF_BINARY"); EXPECT_STREQ(classNamesToUuid[static_cast(Drm::ResourceClass::Isa)].first, "I915_UUID_CLASS_ISA_BYTECODE"); EXPECT_STREQ(classNamesToUuid[static_cast(Drm::ResourceClass::ContextSaveArea)].first, "I915_UUID_L0_SIP_AREA"); EXPECT_STREQ(classNamesToUuid[static_cast(Drm::ResourceClass::ModuleHeapDebugArea)].first, "I915_UUID_L0_MODULE_AREA"); EXPECT_STREQ(classNamesToUuid[static_cast(Drm::ResourceClass::SbaTrackingBuffer)].first, "I915_UUID_L0_SBA_AREA"); EXPECT_STREQ(classNamesToUuid[static_cast(Drm::ResourceClass::L0ZebinModule)].first, "L0_ZEBIN_MODULE"); } TEST(DrmUuidTest, givenUuidStringWhenGettingClassIndexThenCorrectIndexForValidStringsIsReturned) { uint32_t index = 100; auto validUuid = DrmUuid::getClassUuidIndex(classNamesToUuid[static_cast(Drm::ResourceClass::ContextSaveArea)].second, index); EXPECT_TRUE(validUuid); EXPECT_EQ(static_cast(Drm::ResourceClass::ContextSaveArea), index); validUuid = DrmUuid::getClassUuidIndex(classNamesToUuid[static_cast(Drm::ResourceClass::ModuleHeapDebugArea)].second, index); EXPECT_TRUE(validUuid); EXPECT_EQ(static_cast(Drm::ResourceClass::ModuleHeapDebugArea), index); index = 100; validUuid = DrmUuid::getClassUuidIndex("invalid", index); EXPECT_FALSE(validUuid); EXPECT_EQ(100u, index); } compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/drm_va_sharing_tests.cpp000066400000000000000000000030661422164147700323450ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/os_interface/linux/drm_memory_manager_tests.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/sharings/va/va_surface.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/sharings/va/mock_va_sharing.h" namespace NEO { using DrmVaSharingTest = Test; TEST_F(DrmVaSharingTest, givenDrmMemoryManagerWhenSharedVaSurfaceIsImportedWithDrmPrimeFdToHandleThenDrmPrimeFdCanBeClosed) { mock->ioctl_expected.total = -1; device->incRefInternal(); MockClDevice clDevice{device}; MockContext context(&clDevice); MockVaSharing vaSharing; VASurfaceID vaSurfaceId = 0u; vaSharing.updateAcquiredHandle(1); std::unique_ptr sharedImage1(VASurface::createSharedVaSurface(&context, &vaSharing.sharingFunctions, CL_MEM_READ_WRITE, 0, &vaSurfaceId, 0, nullptr)); EXPECT_EQ(1, closeCalledCount); EXPECT_EQ(1, closeInputFd); vaSharing.updateAcquiredHandle(2); std::unique_ptr sharedImage2(VASurface::createSharedVaSurface(&context, &vaSharing.sharingFunctions, CL_MEM_READ_WRITE, 0, &vaSurfaceId, 0, nullptr)); EXPECT_EQ(2, closeCalledCount); EXPECT_EQ(2, closeInputFd); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/file_logger_linux_tests.cpp000066400000000000000000000153341422164147700330600ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/test/common/libult/linux/drm_mock.h" #include "shared/test/common/mocks/linux/mock_drm_allocation.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/utilities/file_logger_tests.h" using namespace NEO; TEST(FileLogger, GivenLogAllocationMemoryPoolFlagThenLogsCorrectInfo) { std::string testFile = "testfile"; DebugVariables flags; flags.LogAllocationMemoryPool.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); // Log file not created bool logFileCreated = fileExists(fileLogger.getLogFileName()); EXPECT_FALSE(logFileCreated); auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMock drm(*executionEnvironment->rootDeviceEnvironments[0]); MockDrmAllocation allocation(AllocationType::BUFFER, MemoryPool::System64KBPages); allocation.setCpuPtrAndGpuAddress(&allocation, 0x12345); MockBufferObject bo(&drm); bo.handle = 4; allocation.bufferObjects[0] = &bo; fileLogger.logAllocation(&allocation); std::thread::id thisThread = std::this_thread::get_id(); std::stringstream threadIDCheck; threadIDCheck << " ThreadID: " << thisThread; std::stringstream memoryPoolCheck; memoryPoolCheck << " MemoryPool: " << allocation.getMemoryPool(); std::stringstream gpuAddressCheck; gpuAddressCheck << " GPU address: 0x" << std::hex << allocation.getGpuAddress(); std::stringstream rootDeviceIndexCheck; rootDeviceIndexCheck << " Root device index: " << allocation.getRootDeviceIndex(); if (fileLogger.wasFileCreated(fileLogger.getLogFileName())) { auto str = fileLogger.getFileString(fileLogger.getLogFileName()); EXPECT_TRUE(str.find(threadIDCheck.str()) != std::string::npos); EXPECT_TRUE(str.find(memoryPoolCheck.str()) != std::string::npos); EXPECT_TRUE(str.find(gpuAddressCheck.str()) != std::string::npos); EXPECT_TRUE(str.find(rootDeviceIndexCheck.str()) != std::string::npos); EXPECT_TRUE(str.find("AllocationType: BUFFER") != std::string::npos); EXPECT_TRUE(str.find("Handle: 4") != std::string::npos); } } TEST(FileLogger, givenLogAllocationStdoutWhenLogAllocationThenLogToStdoutInsteadOfFileAndDoNotCreateFile) { std::string testFile = "testfile"; DebugVariables flags; flags.LogAllocationMemoryPool.set(true); flags.LogAllocationStdout.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); // Log file not created bool logFileCreated = fileExists(fileLogger.getLogFileName()); EXPECT_FALSE(logFileCreated); auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMock drm(*executionEnvironment->rootDeviceEnvironments[0]); MockDrmAllocation allocation(AllocationType::BUFFER, MemoryPool::System64KBPages); allocation.setCpuPtrAndGpuAddress(&allocation, 0x12345); MockBufferObject bo(&drm); bo.handle = 4; allocation.bufferObjects[0] = &bo; testing::internal::CaptureStdout(); fileLogger.logAllocation(&allocation); std::string output = testing::internal::GetCapturedStdout(); std::thread::id thisThread = std::this_thread::get_id(); std::stringstream threadIDCheck; threadIDCheck << " ThreadID: " << thisThread; std::stringstream memoryPoolCheck; memoryPoolCheck << " MemoryPool: " << allocation.getMemoryPool(); std::stringstream gpuAddressCheck; gpuAddressCheck << " GPU address: 0x" << std::hex << allocation.getGpuAddress(); std::stringstream rootDeviceIndexCheck; rootDeviceIndexCheck << " Root device index: " << allocation.getRootDeviceIndex(); EXPECT_TRUE(output.find(threadIDCheck.str()) != std::string::npos); EXPECT_TRUE(output.find(memoryPoolCheck.str()) != std::string::npos); EXPECT_TRUE(output.find(gpuAddressCheck.str()) != std::string::npos); EXPECT_TRUE(output.find(rootDeviceIndexCheck.str()) != std::string::npos); EXPECT_TRUE(output.find("AllocationType: BUFFER") != std::string::npos); EXPECT_TRUE(output.find("Handle: 4") != std::string::npos); EXPECT_TRUE(output.find("\n") != std::string::npos); logFileCreated = fileExists(fileLogger.getLogFileName()); EXPECT_FALSE(logFileCreated); } TEST(FileLogger, GivenDrmAllocationWithoutBOThenNoHandleLogged) { std::string testFile = "testfile"; DebugVariables flags; flags.LogAllocationMemoryPool.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); // Log file not created bool logFileCreated = fileExists(fileLogger.getLogFileName()); EXPECT_FALSE(logFileCreated); MockDrmAllocation allocation(AllocationType::BUFFER, MemoryPool::System64KBPages); fileLogger.logAllocation(&allocation); std::thread::id thisThread = std::this_thread::get_id(); std::stringstream threadIDCheck; threadIDCheck << " ThreadID: " << thisThread; std::stringstream memoryPoolCheck; memoryPoolCheck << " MemoryPool: " << allocation.getMemoryPool(); if (fileLogger.wasFileCreated(fileLogger.getLogFileName())) { auto str = fileLogger.getFileString(fileLogger.getLogFileName()); EXPECT_TRUE(str.find(threadIDCheck.str()) != std::string::npos); EXPECT_TRUE(str.find(memoryPoolCheck.str()) != std::string::npos); EXPECT_TRUE(str.find("AllocationType: BUFFER") != std::string::npos); EXPECT_FALSE(str.find("Handle: 4") != std::string::npos); } } TEST(FileLogger, GivenLogAllocationMemoryPoolFlagSetFalseThenAllocationIsNotLogged) { std::string testFile = "testfile"; DebugVariables flags; flags.LogAllocationMemoryPool.set(false); FullyEnabledFileLogger fileLogger(testFile, flags); // Log file not created bool logFileCreated = fileExists(fileLogger.getLogFileName()); EXPECT_FALSE(logFileCreated); MockDrmAllocation allocation(AllocationType::BUFFER, MemoryPool::System64KBPages); fileLogger.logAllocation(&allocation); std::thread::id thisThread = std::this_thread::get_id(); std::stringstream threadIDCheck; threadIDCheck << " ThreadID: " << thisThread; std::stringstream memoryPoolCheck; memoryPoolCheck << " MemoryPool: " << allocation.getMemoryPool(); if (fileLogger.wasFileCreated(fileLogger.getLogFileName())) { auto str = fileLogger.getFileString(fileLogger.getLogFileName()); EXPECT_FALSE(str.find(threadIDCheck.str()) != std::string::npos); EXPECT_FALSE(str.find(memoryPoolCheck.str()) != std::string::npos); EXPECT_FALSE(str.find("AllocationType: BUFFER") != std::string::npos); } } compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/hw_info_config_linux_tests.cpp000066400000000000000000000530311422164147700335540ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/os_interface/linux/hw_info_config_linux_tests.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/os_interface/os_interface.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "opencl/extensions/public/cl_ext_private.h" #include namespace NEO { constexpr uint32_t hwConfigTestMidThreadBit = 1 << 8; constexpr uint32_t hwConfigTestThreadGroupBit = 1 << 9; constexpr uint32_t hwConfigTestMidBatchBit = 1 << 10; template <> int HwInfoConfigHw::configureHardwareCustom(HardwareInfo *hwInfo, OSInterface *osIface) { FeatureTable *featureTable = &hwInfo->featureTable; featureTable->flags.ftrGpGpuMidThreadLevelPreempt = 0; featureTable->flags.ftrGpGpuThreadGroupLevelPreempt = 0; featureTable->flags.ftrGpGpuMidBatchPreempt = 0; if (hwInfo->platform.usDeviceID == 30) { GT_SYSTEM_INFO *gtSystemInfo = &hwInfo->gtSystemInfo; gtSystemInfo->EdramSizeInKb = 128 * 1000; } if (hwInfo->platform.usDeviceID & hwConfigTestMidThreadBit) { featureTable->flags.ftrGpGpuMidThreadLevelPreempt = 1; } if (hwInfo->platform.usDeviceID & hwConfigTestThreadGroupBit) { featureTable->flags.ftrGpGpuThreadGroupLevelPreempt = 1; } if (hwInfo->platform.usDeviceID & hwConfigTestMidBatchBit) { featureTable->flags.ftrGpGpuMidBatchPreempt = 1; } return (hwInfo->platform.usDeviceID == 10) ? -1 : 0; } } // namespace NEO struct DummyHwConfig : HwInfoConfigHw { }; using namespace NEO; void mockCpuidex(int *cpuInfo, int functionId, int subfunctionId); void HwInfoConfigTestLinux::SetUp() { HwInfoConfigTest::SetUp(); executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); osInterface = new OSInterface(); drm = new DrmMock(*executionEnvironment->rootDeviceEnvironments[0]); osInterface->setDriverModel(std::unique_ptr(drm)); drm->storedDeviceID = pInHwInfo.platform.usDeviceID; drm->storedDeviceRevID = 0; drm->storedEUVal = pInHwInfo.gtSystemInfo.EUCount; drm->storedSSVal = pInHwInfo.gtSystemInfo.SubSliceCount; rt_cpuidex_func = CpuInfo::cpuidexFunc; CpuInfo::cpuidexFunc = mockCpuidex; } void HwInfoConfigTestLinux::TearDown() { CpuInfo::cpuidexFunc = rt_cpuidex_func; delete osInterface; HwInfoConfigTest::TearDown(); } void mockCpuidex(int *cpuInfo, int functionId, int subfunctionId) { if (subfunctionId == 0) { cpuInfo[0] = 0x7F; } if (subfunctionId == 1) { cpuInfo[0] = 0x1F; } if (subfunctionId == 2) { cpuInfo[0] = 0; } } struct HwInfoConfigTestLinuxDummy : HwInfoConfigTestLinux { void SetUp() override { HwInfoConfigTestLinux::SetUp(); drm->storedDeviceID = 1; testPlatform->eRenderCoreFamily = defaultHwInfo->platform.eRenderCoreFamily; } void TearDown() override { HwInfoConfigTestLinux::TearDown(); } DummyHwConfig hwConfig; }; TEST_F(HwInfoConfigTestLinuxDummy, GivenDummyConfigWhenConfiguringHwInfoThenSucceeds) { int ret = hwConfig.configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); } using HwInfoConfigCommonLinuxTest = ::testing::Test; HWTEST2_F(HwInfoConfigCommonLinuxTest, givenDebugFlagSetWhenEnablingBlitterOperationsSupportThenIgnore, IsAtMostGen11) { DebugManagerStateRestore restore{}; HardwareInfo hardwareInfo = *defaultHwInfo; auto hwInfoConfig = HwInfoConfig::get(hardwareInfo.platform.eProductFamily); DebugManager.flags.EnableBlitterOperationsSupport.set(1); hwInfoConfig->configureHardwareCustom(&hardwareInfo, nullptr); EXPECT_FALSE(hardwareInfo.capabilityTable.blitterOperationsSupported); } TEST_F(HwInfoConfigTestLinuxDummy, GivenDummyConfigThenEdramIsDetected) { drm->storedDeviceID = 30; int ret = hwConfig.configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(1u, outHwInfo.featureTable.flags.ftrEDram); } TEST_F(HwInfoConfigTestLinuxDummy, givenEnabledPlatformCoherencyWhenConfiguringHwInfoThenIgnoreAndSetAsDisabled) { drm->storedDeviceID = 21; int ret = hwConfig.configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_FALSE(outHwInfo.capabilityTable.ftrSupportsCoherency); } TEST_F(HwInfoConfigTestLinuxDummy, givenDisabledPlatformCoherencyWhenConfiguringHwInfoThenSetValidCapability) { drm->storedDeviceID = 20; int ret = hwConfig.configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_FALSE(outHwInfo.capabilityTable.ftrSupportsCoherency); } TEST_F(HwInfoConfigTestLinuxDummy, GivenUnknownDevIdWhenConfiguringHwInfoThenFails) { drm->storedDeviceID = 0; int ret = hwConfig.configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); } TEST_F(HwInfoConfigTestLinuxDummy, GivenFailGetDevIdWhenConfiguringHwInfoThenFails) { drm->storedRetValForDeviceID = -2; int ret = hwConfig.configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-2, ret); } TEST_F(HwInfoConfigTestLinuxDummy, GivenFailGetDevRevIdWhenConfiguringHwInfoThenFails) { drm->storedRetValForDeviceRevID = -3; int ret = hwConfig.configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-3, ret); } TEST_F(HwInfoConfigTestLinuxDummy, GivenFailGetEuCountWhenConfiguringHwInfoThenFails) { drm->storedRetValForEUVal = -4; drm->failRetTopology = true; int ret = hwConfig.configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-4, ret); } TEST_F(HwInfoConfigTestLinuxDummy, GivenFailGetSsCountWhenConfiguringHwInfoThenFails) { drm->storedRetValForSSVal = -5; drm->failRetTopology = true; int ret = hwConfig.configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-5, ret); } TEST_F(HwInfoConfigTestLinuxDummy, whenFailGettingTopologyThenFallbackToEuCountIoctl) { drm->failRetTopology = true; int ret = hwConfig.configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_NE(-1, ret); } TEST_F(HwInfoConfigTestLinuxDummy, givenInvalidTopologyDataWhenConfiguringThenReturnError) { auto storedSVal = drm->storedSVal; auto storedSSVal = drm->storedSSVal; auto storedEUVal = drm->storedEUVal; { // 0 euCount drm->storedSVal = storedSVal; drm->storedSSVal = storedSSVal; drm->storedEUVal = 0; Drm::QueryTopologyData topologyData = {}; EXPECT_FALSE(drm->queryTopology(outHwInfo, topologyData)); } { // 0 subSliceCount drm->storedSVal = storedSVal; drm->storedSSVal = 0; drm->storedEUVal = storedEUVal; Drm::QueryTopologyData topologyData = {}; EXPECT_FALSE(drm->queryTopology(outHwInfo, topologyData)); } { // 0 sliceCount drm->storedSVal = 0; drm->storedSSVal = storedSSVal; drm->storedEUVal = storedEUVal; Drm::QueryTopologyData topologyData = {}; EXPECT_FALSE(drm->queryTopology(outHwInfo, topologyData)); } } TEST_F(HwInfoConfigTestLinuxDummy, GivenFailingCustomConfigWhenConfiguringHwInfoThenFails) { drm->storedDeviceID = 10; int ret = hwConfig.configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); } TEST_F(HwInfoConfigTestLinuxDummy, GivenUnknownDeviceIdWhenConfiguringHwInfoThenFails) { drm->storedDeviceID = 0; auto hwConfig = DummyHwConfig{}; int ret = hwConfig.configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); } TEST_F(HwInfoConfigTestLinuxDummy, whenConfigureHwInfoIsCalledThenAreNonPersistentContextsSupportedReturnsTrue) { int ret = hwConfig.configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_TRUE(drm->areNonPersistentContextsSupported()); } TEST_F(HwInfoConfigTestLinuxDummy, whenConfigureHwInfoIsCalledAndPersitentContextIsUnsupportedThenAreNonPersistentContextsSupportedReturnsFalse) { drm->storedPersistentContextsSupport = 0; int ret = hwConfig.configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_FALSE(drm->areNonPersistentContextsSupported()); } HWTEST_F(HwInfoConfigTestLinuxDummy, GivenPreemptionDrmEnabledMidThreadOnWhenConfiguringHwInfoThenPreemptionIsSupported) { pInHwInfo.capabilityTable.defaultPreemptionMode = PreemptionMode::MidThread; drm->storedPreemptionSupport = I915_SCHEDULER_CAP_ENABLED | I915_SCHEDULER_CAP_PRIORITY | I915_SCHEDULER_CAP_PREEMPTION; drm->storedDeviceID = hwConfigTestMidThreadBit; UnitTestHelper::setExtraMidThreadPreemptionFlag(pInHwInfo, true); int ret = hwConfig.configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(PreemptionMode::MidThread, outHwInfo.capabilityTable.defaultPreemptionMode); EXPECT_TRUE(drm->isPreemptionSupported()); } TEST_F(HwInfoConfigTestLinuxDummy, GivenPreemptionDrmEnabledThreadGroupOnWhenConfiguringHwInfoThenPreemptionIsSupported) { pInHwInfo.capabilityTable.defaultPreemptionMode = PreemptionMode::MidThread; drm->storedPreemptionSupport = I915_SCHEDULER_CAP_ENABLED | I915_SCHEDULER_CAP_PRIORITY | I915_SCHEDULER_CAP_PREEMPTION; drm->storedDeviceID = hwConfigTestThreadGroupBit; int ret = hwConfig.configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(PreemptionMode::ThreadGroup, outHwInfo.capabilityTable.defaultPreemptionMode); EXPECT_TRUE(drm->isPreemptionSupported()); } TEST_F(HwInfoConfigTestLinuxDummy, givenDebugFlagSetWhenConfiguringHwInfoThenPrintGetParamIoctlsOutput) { DebugManagerStateRestore restore; DebugManager.flags.PrintIoctlEntries.set(true); testing::internal::CaptureStdout(); // start capturing int ret = hwConfig.configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); std::array expectedStrings = {{"DRM_IOCTL_I915_GETPARAM: param: I915_PARAM_CHIPSET_ID, output value: 1, retCode: 0", "DRM_IOCTL_I915_GETPARAM: param: I915_PARAM_REVISION, output value: 0, retCode: 0", "DRM_IOCTL_I915_GETPARAM: param: I915_PARAM_CHIPSET_ID, output value: 1, retCode: 0", "DRM_IOCTL_I915_GETPARAM: param: I915_PARAM_HAS_SCHEDULER, output value: 7, retCode: 0" }}; DebugManager.flags.PrintIoctlEntries.set(false); std::string output = testing::internal::GetCapturedStdout(); // stop capturing for (const auto &expectedString : expectedStrings) { EXPECT_NE(std::string::npos, output.find(expectedString)); } EXPECT_EQ(std::string::npos, output.find("UNKNOWN")); } TEST_F(HwInfoConfigTestLinuxDummy, GivenPreemptionDrmEnabledMidBatchOnWhenConfiguringHwInfoThenPreemptionIsSupported) { pInHwInfo.capabilityTable.defaultPreemptionMode = PreemptionMode::MidThread; drm->storedPreemptionSupport = I915_SCHEDULER_CAP_ENABLED | I915_SCHEDULER_CAP_PRIORITY | I915_SCHEDULER_CAP_PREEMPTION; drm->storedDeviceID = hwConfigTestMidBatchBit; int ret = hwConfig.configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(PreemptionMode::MidBatch, outHwInfo.capabilityTable.defaultPreemptionMode); EXPECT_TRUE(drm->isPreemptionSupported()); } TEST_F(HwInfoConfigTestLinuxDummy, WhenConfiguringHwInfoThenPreemptionIsSupportedPreemptionDrmEnabledNoPreemptionWhenConfiguringHwInfoThenPreemptionIsNotSupported) { pInHwInfo.capabilityTable.defaultPreemptionMode = PreemptionMode::MidThread; drm->storedPreemptionSupport = I915_SCHEDULER_CAP_ENABLED | I915_SCHEDULER_CAP_PRIORITY | I915_SCHEDULER_CAP_PREEMPTION; drm->storedDeviceID = 1; int ret = hwConfig.configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(PreemptionMode::Disabled, outHwInfo.capabilityTable.defaultPreemptionMode); EXPECT_TRUE(drm->isPreemptionSupported()); } TEST_F(HwInfoConfigTestLinuxDummy, GivenPreemptionDrmDisabledAllPreemptionWhenConfiguringHwInfoThenPreemptionIsNotSupported) { pInHwInfo.capabilityTable.defaultPreemptionMode = PreemptionMode::MidThread; drm->storedPreemptionSupport = 0; drm->storedDeviceID = hwConfigTestMidThreadBit | hwConfigTestThreadGroupBit | hwConfigTestMidBatchBit; int ret = hwConfig.configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(PreemptionMode::Disabled, outHwInfo.capabilityTable.defaultPreemptionMode); EXPECT_FALSE(drm->isPreemptionSupported()); } TEST_F(HwInfoConfigTestLinuxDummy, GivenPreemptionDrmEnabledAllPreemptionDriverThreadGroupWhenConfiguringHwInfoThenPreemptionIsSupported) { pInHwInfo.capabilityTable.defaultPreemptionMode = PreemptionMode::ThreadGroup; drm->storedPreemptionSupport = I915_SCHEDULER_CAP_ENABLED | I915_SCHEDULER_CAP_PRIORITY | I915_SCHEDULER_CAP_PREEMPTION; drm->storedDeviceID = hwConfigTestMidThreadBit | hwConfigTestThreadGroupBit | hwConfigTestMidBatchBit; int ret = hwConfig.configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(PreemptionMode::ThreadGroup, outHwInfo.capabilityTable.defaultPreemptionMode); EXPECT_TRUE(drm->isPreemptionSupported()); } TEST_F(HwInfoConfigTestLinuxDummy, GivenPreemptionDrmEnabledAllPreemptionDriverMidBatchWhenConfiguringHwInfoThenPreemptionIsSupported) { pInHwInfo.capabilityTable.defaultPreemptionMode = PreemptionMode::MidBatch; drm->storedPreemptionSupport = I915_SCHEDULER_CAP_ENABLED | I915_SCHEDULER_CAP_PRIORITY | I915_SCHEDULER_CAP_PREEMPTION; drm->storedDeviceID = hwConfigTestMidThreadBit | hwConfigTestThreadGroupBit | hwConfigTestMidBatchBit; int ret = hwConfig.configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(PreemptionMode::MidBatch, outHwInfo.capabilityTable.defaultPreemptionMode); EXPECT_TRUE(drm->isPreemptionSupported()); } TEST_F(HwInfoConfigTestLinuxDummy, GivenConfigPreemptionDrmEnabledAllPreemptionDriverDisabledWhenConfiguringHwInfoThenPreemptionIsSupported) { pInHwInfo.capabilityTable.defaultPreemptionMode = PreemptionMode::Disabled; drm->storedPreemptionSupport = I915_SCHEDULER_CAP_ENABLED | I915_SCHEDULER_CAP_PRIORITY | I915_SCHEDULER_CAP_PREEMPTION; drm->storedDeviceID = hwConfigTestMidThreadBit | hwConfigTestThreadGroupBit | hwConfigTestMidBatchBit; int ret = hwConfig.configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(PreemptionMode::Disabled, outHwInfo.capabilityTable.defaultPreemptionMode); EXPECT_TRUE(drm->isPreemptionSupported()); } TEST_F(HwInfoConfigTestLinuxDummy, givenPlatformEnabledFtrCompressionWhenInitializingThenFlagsAreSet) { pInHwInfo.capabilityTable.ftrRenderCompressedImages = true; pInHwInfo.capabilityTable.ftrRenderCompressedBuffers = true; int ret = hwConfig.configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_TRUE(outHwInfo.capabilityTable.ftrRenderCompressedImages); EXPECT_TRUE(outHwInfo.capabilityTable.ftrRenderCompressedBuffers); } TEST_F(HwInfoConfigTestLinuxDummy, givenPointerToHwInfoWhenConfigureHwInfoCalledThenRequiedSurfaceSizeIsSettedProperly) { EXPECT_EQ(MemoryConstants::pageSize, pInHwInfo.capabilityTable.requiredPreemptionSurfaceSize); int ret = hwConfig.configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); auto expectedSize = static_cast(outHwInfo.gtSystemInfo.CsrSizeInMb * MemoryConstants::megaByte); HwHelper::get(outHwInfo.platform.eRenderCoreFamily).adjustPreemptionSurfaceSize(expectedSize); EXPECT_EQ(expectedSize, outHwInfo.capabilityTable.requiredPreemptionSurfaceSize); } TEST_F(HwInfoConfigTestLinuxDummy, givenInstrumentationForHardwareIsEnabledOrDisabledWhenConfiguringHwInfoThenOverrideItUsingHaveInstrumentation) { int ret; pInHwInfo.capabilityTable.instrumentationEnabled = false; ret = hwConfig.configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); ASSERT_EQ(0, ret); EXPECT_FALSE(outHwInfo.capabilityTable.instrumentationEnabled); pInHwInfo.capabilityTable.instrumentationEnabled = true; ret = hwConfig.configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); ASSERT_EQ(0, ret); EXPECT_TRUE(outHwInfo.capabilityTable.instrumentationEnabled); } TEST_F(HwInfoConfigTestLinuxDummy, givenGttSizeReturnedWhenInitializingHwInfoThenSetSvmFtr) { drm->storedGTTSize = MemoryConstants::max64BitAppAddress; int ret = hwConfig.configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_FALSE(outHwInfo.capabilityTable.ftrSvm); drm->storedGTTSize = MemoryConstants::max64BitAppAddress + 1; ret = hwConfig.configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_TRUE(outHwInfo.capabilityTable.ftrSvm); } TEST_F(HwInfoConfigTestLinuxDummy, givenGttSizeReturnedWhenInitializingHwInfoThenSetGpuAddressSpace) { drm->storedGTTSize = maxNBitValue(40) + 1; int ret = hwConfig.configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(drm->storedGTTSize - 1, outHwInfo.capabilityTable.gpuAddressSpace); } TEST_F(HwInfoConfigTestLinuxDummy, givenFailingGttSizeIoctlWhenInitializingHwInfoThenSetDefaultValues) { drm->storedRetValForGetGttSize = -1; int ret = hwConfig.configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_TRUE(outHwInfo.capabilityTable.ftrSvm); EXPECT_NE(0u, outHwInfo.capabilityTable.gpuAddressSpace); EXPECT_EQ(pInHwInfo.capabilityTable.gpuAddressSpace, outHwInfo.capabilityTable.gpuAddressSpace); } using HwConfigLinux = ::testing::Test; HWTEST2_F(HwConfigLinux, GivenDifferentValuesFromTopologyQueryWhenConfiguringHwInfoThenMaxSlicesSupportedSetToAvailableCountInGtSystemInfo, MatchAny) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); *executionEnvironment->rootDeviceEnvironments[0]->getMutableHardwareInfo() = *NEO::defaultHwInfo.get(); auto drm = new DrmMock(*executionEnvironment->rootDeviceEnvironments[0]); auto osInterface = std::make_unique(); osInterface->setDriverModel(std::unique_ptr(drm)); auto hwInfo = *executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo(); HardwareInfo outHwInfo; auto hwConfig = HwInfoConfigHw::get(); hwInfo.gtSystemInfo.MaxSubSlicesSupported = drm->storedSSVal * 2; hwInfo.gtSystemInfo.MaxDualSubSlicesSupported = drm->storedSSVal * 2; hwInfo.gtSystemInfo.MaxEuPerSubSlice = 16; hwInfo.gtSystemInfo.MaxSlicesSupported = drm->storedSVal * 4; int ret = hwConfig->configureHwInfoDrm(&hwInfo, &outHwInfo, osInterface.get()); EXPECT_EQ(0, ret); EXPECT_EQ(static_cast(drm->storedSSVal * 2), outHwInfo.gtSystemInfo.MaxSubSlicesSupported); EXPECT_EQ(static_cast(drm->storedSSVal * 2), outHwInfo.gtSystemInfo.MaxDualSubSlicesSupported); EXPECT_EQ(16u, outHwInfo.gtSystemInfo.MaxEuPerSubSlice); EXPECT_EQ(static_cast(drm->storedSVal), outHwInfo.gtSystemInfo.MaxSlicesSupported); drm->storedSVal = 3; drm->storedSSVal = 12; drm->storedEUVal = 12 * 8; hwInfo.gtSystemInfo.MaxSubSlicesSupported = drm->storedSSVal / 2; hwInfo.gtSystemInfo.MaxDualSubSlicesSupported = drm->storedSSVal / 2; hwInfo.gtSystemInfo.MaxEuPerSubSlice = 6; hwInfo.gtSystemInfo.MaxSlicesSupported = drm->storedSVal / 2; ret = hwConfig->configureHwInfoDrm(&hwInfo, &outHwInfo, osInterface.get()); EXPECT_EQ(0, ret); EXPECT_EQ(12u, outHwInfo.gtSystemInfo.MaxSubSlicesSupported); EXPECT_EQ(6u, outHwInfo.gtSystemInfo.MaxEuPerSubSlice); // MaxEuPerSubslice is preserved EXPECT_EQ(static_cast(drm->storedSVal), outHwInfo.gtSystemInfo.MaxSlicesSupported); EXPECT_EQ(hwInfo.gtSystemInfo.MaxDualSubSlicesSupported, outHwInfo.gtSystemInfo.MaxDualSubSlicesSupported); hwInfo.gtSystemInfo.MaxEuPerSubSlice = 0; ret = hwConfig->configureHwInfoDrm(&hwInfo, &outHwInfo, osInterface.get()); EXPECT_EQ(0, ret); EXPECT_EQ(8u, outHwInfo.gtSystemInfo.MaxEuPerSubSlice); } HWTEST2_F(HwConfigLinux, givenSliceCountWhenConfigureHwInfoDrmThenProperInitializationInSliceInfoEnabled, MatchAny) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); *executionEnvironment->rootDeviceEnvironments[0]->getMutableHardwareInfo() = *NEO::defaultHwInfo.get(); auto drm = new DrmMock(*executionEnvironment->rootDeviceEnvironments[0]); auto osInterface = std::make_unique(); osInterface->setDriverModel(std::unique_ptr(drm)); auto hwInfo = *executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo(); HardwareInfo outHwInfo; auto hwConfig = HwInfoConfigHw::get(); uint32_t sliceCount = 4; drm->storedSVal = sliceCount; hwInfo.gtSystemInfo.SliceCount = sliceCount; int ret = hwConfig->configureHwInfoDrm(&hwInfo, &outHwInfo, osInterface.get()); EXPECT_EQ(0, ret); for (uint32_t i = 0; i < sliceCount; i++) { EXPECT_TRUE(outHwInfo.gtSystemInfo.SliceInfo[i].Enabled); } }compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/hw_info_config_linux_tests.h000066400000000000000000000014521422164147700332210ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/utilities/cpu_info.h" #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/libult/linux/drm_mock.h" #include "opencl/test/unit_test/os_interface/hw_info_config_tests.h" using namespace NEO; struct HwInfoConfigTestLinux : public HwInfoConfigTest { void SetUp() override; void TearDown() override; OSInterface *osInterface; std::unique_ptr executionEnvironment; DrmMock *drm; void (*rt_cpuidex_func)(int *, int, int); }; linux_create_command_queue_with_properties_tests.cpp000066400000000000000000000230421422164147700401720ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/linux/drm_memory_operations_handler.h" #include "shared/source/os_interface/os_interface.h" #include "shared/test/common/libult/linux/drm_mock.h" #include "shared/test/common/mocks/linux/mock_drm_command_stream_receiver.h" #include "shared/test/common/mocks/linux/mock_drm_memory_manager.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" using namespace NEO; struct clCreateCommandQueueWithPropertiesLinux : public UltCommandStreamReceiverTest { void SetUp() override { UltCommandStreamReceiverTest::SetUp(); ExecutionEnvironment *executionEnvironment = new MockExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(1); drm = new DrmMock(*executionEnvironment->rootDeviceEnvironments[0]); auto osInterface = new OSInterface(); osInterface->setDriverModel(std::unique_ptr(drm)); executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->osInterface.reset(osInterface); executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->memoryOperationsInterface = DrmMemoryOperationsHandler::create(*drm, rootDeviceIndex); executionEnvironment->memoryManager.reset(new TestedDrmMemoryManager(*executionEnvironment)); mdevice = std::make_unique(MockDevice::create(executionEnvironment, rootDeviceIndex)); clDevice = mdevice.get(); retVal = CL_SUCCESS; context = std::unique_ptr(Context::create(nullptr, ClDeviceVector(&clDevice, 1), nullptr, nullptr, retVal)); } void TearDown() override { UltCommandStreamReceiverTest::TearDown(); } DrmMock *drm = nullptr; std::unique_ptr mdevice = nullptr; std::unique_ptr context; cl_device_id clDevice = nullptr; cl_int retVal = 0; const uint32_t rootDeviceIndex = 0u; }; namespace ULT { TEST_F(clCreateCommandQueueWithPropertiesLinux, givenUnPossiblePropertiesWithClQueueSliceCountWhenCreateCommandQueueThenQueueNotCreated) { uint64_t newSliceCount = 1; size_t maxSliceCount; clGetDeviceInfo(clDevice, CL_DEVICE_SLICE_COUNT_INTEL, sizeof(size_t), &maxSliceCount, nullptr); newSliceCount = maxSliceCount + 1; cl_queue_properties properties[] = {CL_QUEUE_SLICE_COUNT_INTEL, newSliceCount, 0}; cl_command_queue cmdQ = clCreateCommandQueueWithProperties(context.get(), clDevice, properties, &retVal); EXPECT_EQ(nullptr, cmdQ); EXPECT_EQ(CL_INVALID_QUEUE_PROPERTIES, retVal); } TEST_F(clCreateCommandQueueWithPropertiesLinux, givenZeroWithClQueueSliceCountWhenCreateCommandQueueThenSliceCountEqualDefaultSliceCount) { uint64_t newSliceCount = 0; cl_queue_properties properties[] = {CL_QUEUE_SLICE_COUNT_INTEL, newSliceCount, 0}; cl_command_queue cmdQ = clCreateCommandQueueWithProperties(context.get(), clDevice, properties, &retVal); ASSERT_NE(nullptr, cmdQ); ASSERT_EQ(CL_SUCCESS, retVal); auto commandQueue = castToObject(cmdQ); EXPECT_EQ(commandQueue->getSliceCount(), QueueSliceCount::defaultSliceCount); retVal = clReleaseCommandQueue(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateCommandQueueWithPropertiesLinux, givenPossiblePropertiesWithClQueueSliceCountWhenCreateCommandQueueThenSliceCountIsSet) { uint64_t newSliceCount = 1; size_t maxSliceCount; clGetDeviceInfo(clDevice, CL_DEVICE_SLICE_COUNT_INTEL, sizeof(size_t), &maxSliceCount, nullptr); if (maxSliceCount > 1) { newSliceCount = maxSliceCount - 1; } cl_queue_properties properties[] = {CL_QUEUE_SLICE_COUNT_INTEL, newSliceCount, 0}; cl_command_queue cmdQ = clCreateCommandQueueWithProperties(context.get(), clDevice, properties, &retVal); ASSERT_NE(nullptr, cmdQ); ASSERT_EQ(CL_SUCCESS, retVal); auto commandQueue = castToObject(cmdQ); EXPECT_EQ(commandQueue->getSliceCount(), newSliceCount); retVal = clReleaseCommandQueue(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(clCreateCommandQueueWithPropertiesLinux, givenPropertiesWithClQueueSliceCountWhenCreateCommandQueueThenCallFlushTaskAndSliceCountIsSet) { uint64_t newSliceCount = 1; size_t maxSliceCount; clGetDeviceInfo(clDevice, CL_DEVICE_SLICE_COUNT_INTEL, sizeof(size_t), &maxSliceCount, nullptr); if (maxSliceCount > 1) { newSliceCount = maxSliceCount - 1; } cl_queue_properties properties[] = {CL_QUEUE_SLICE_COUNT_INTEL, newSliceCount, 0}; auto mockCsr = new TestedDrmCommandStreamReceiver(*mdevice->executionEnvironment, rootDeviceIndex, 1); mockCsr->callHwFlush = false; mdevice->resetCommandStreamReceiver(mockCsr); cl_command_queue cmdQ = clCreateCommandQueueWithProperties(context.get(), clDevice, properties, &retVal); ASSERT_NE(nullptr, cmdQ); ASSERT_EQ(CL_SUCCESS, retVal); auto commandQueue = castToObject>(cmdQ); auto &commandStream = commandQueue->getCS(1024u); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.sliceCount = commandQueue->getSliceCount(); dispatchFlags.implicitFlush = true; mockCsr->flushTask(commandStream, 0u, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, mdevice->getDevice()); auto expectedSliceMask = drm->getSliceMask(newSliceCount); EXPECT_EQ(expectedSliceMask, drm->storedParamSseu); drm_i915_gem_context_param_sseu sseu = {}; EXPECT_EQ(0, drm->getQueueSliceCount(&sseu)); EXPECT_EQ(expectedSliceMask, sseu.slice_mask); EXPECT_EQ(newSliceCount, mockCsr->lastSentSliceCount); retVal = clReleaseCommandQueue(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(clCreateCommandQueueWithPropertiesLinux, givenSameSliceCountAsRecentlySetWhenCreateCommandQueueThenSetQueueSliceCountNotCalled) { uint64_t newSliceCount = 1; size_t maxSliceCount; clGetDeviceInfo(clDevice, CL_DEVICE_SLICE_COUNT_INTEL, sizeof(size_t), &maxSliceCount, nullptr); if (maxSliceCount > 1) { newSliceCount = maxSliceCount - 1; } cl_queue_properties properties[] = {CL_QUEUE_SLICE_COUNT_INTEL, newSliceCount, 0}; auto mockCsr = new TestedDrmCommandStreamReceiver(*mdevice->executionEnvironment, rootDeviceIndex, 1); mockCsr->callHwFlush = false; mdevice->resetCommandStreamReceiver(mockCsr); cl_command_queue cmdQ = clCreateCommandQueueWithProperties(context.get(), clDevice, properties, &retVal); ASSERT_NE(nullptr, cmdQ); ASSERT_EQ(CL_SUCCESS, retVal); auto commandQueue = castToObject>(cmdQ); auto &commandStream = commandQueue->getCS(1024u); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.sliceCount = commandQueue->getSliceCount(); dispatchFlags.implicitFlush = true; mockCsr->lastSentSliceCount = newSliceCount; mockCsr->flushTask(commandStream, 0u, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, mdevice->getDevice()); auto expectedSliceMask = drm->getSliceMask(newSliceCount); EXPECT_NE(expectedSliceMask, drm->storedParamSseu); drm_i915_gem_context_param_sseu sseu = {}; EXPECT_EQ(0, drm->getQueueSliceCount(&sseu)); EXPECT_NE(expectedSliceMask, sseu.slice_mask); retVal = clReleaseCommandQueue(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(clCreateCommandQueueWithPropertiesLinux, givenPropertiesWithClQueueSliceCountWhenCreateCommandQueueThenSetReturnFalseAndLastSliceCountNotModify) { uint64_t newSliceCount = 1; size_t maxSliceCount; clGetDeviceInfo(clDevice, CL_DEVICE_SLICE_COUNT_INTEL, sizeof(size_t), &maxSliceCount, nullptr); if (maxSliceCount > 1) { newSliceCount = maxSliceCount - 1; } cl_queue_properties properties[] = {CL_QUEUE_SLICE_COUNT_INTEL, newSliceCount, 0}; auto mockCsr = new TestedDrmCommandStreamReceiver(*mdevice->executionEnvironment, rootDeviceIndex, 1); mockCsr->callHwFlush = false; mdevice->resetCommandStreamReceiver(mockCsr); cl_command_queue cmdQ = clCreateCommandQueueWithProperties(context.get(), clDevice, properties, &retVal); ASSERT_NE(nullptr, cmdQ); ASSERT_EQ(CL_SUCCESS, retVal); auto commandQueue = castToObject>(cmdQ); auto &commandStream = commandQueue->getCS(1024u); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.sliceCount = commandQueue->getSliceCount(); drm->storedRetValForSetSSEU = -1; auto lastSliceCountBeforeFlushTask = mockCsr->lastSentSliceCount; mockCsr->flushTask(commandStream, 0u, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, mdevice->getDevice()); EXPECT_NE(newSliceCount, mockCsr->lastSentSliceCount); EXPECT_EQ(lastSliceCountBeforeFlushTask, mockCsr->lastSentSliceCount); retVal = clReleaseCommandQueue(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/mock_os_time_linux.h000066400000000000000000000025661422164147700315000ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/linux/device_time_drm.h" #include "shared/source/os_interface/linux/os_time_linux.h" #include "shared/source/os_interface/os_interface.h" namespace NEO { class MockDeviceTimeDrm : public DeviceTimeDrm { public: using DeviceTimeDrm::pDrm; MockDeviceTimeDrm() : DeviceTimeDrm(nullptr) { } }; class MockOSTimeLinux : public OSTimeLinux { public: MockOSTimeLinux(OSInterface *osInterface) : OSTimeLinux(osInterface, std::make_unique()) { } void setResolutionFunc(resolutionFunc_t func) { this->resolutionFunc = func; } void setGetTimeFunc(getTimeFunc_t func) { this->getTimeFunc = func; } void updateDrm(Drm *drm) { osInterface->setDriverModel(std::unique_ptr(drm)); static_cast(this->deviceTime.get())->pDrm = drm; static_cast(this->deviceTime.get())->timestampTypeDetect(); } static std::unique_ptr create(OSInterface *osInterface) { return std::unique_ptr(new MockOSTimeLinux(osInterface)); } MockDeviceTimeDrm *getDeviceTime() { return static_cast(this->deviceTime.get()); } }; } // namespace NEO mock_performance_counters_linux.cpp000066400000000000000000000050341422164147700345310ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "mock_performance_counters_linux.h" #include "shared/test/common/libult/linux/drm_mock.h" #include "opencl/test/unit_test/os_interface/linux/mock_os_time_linux.h" #include "opencl/test/unit_test/os_interface/mock_performance_counters.h" namespace NEO { ////////////////////////////////////////////////////// // MockPerformanceCountersLinux::MockPerformanceCountersLinux ////////////////////////////////////////////////////// MockPerformanceCountersLinux::MockPerformanceCountersLinux(Device *device) : PerformanceCountersLinux() { } ////////////////////////////////////////////////////// // MockPerformanceCounters::create ////////////////////////////////////////////////////// std::unique_ptr MockPerformanceCounters::create(Device *device) { auto performanceCounters = std::unique_ptr(new MockPerformanceCountersLinux(device)); auto metricsLibrary = std::make_unique(); auto metricsLibraryDll = std::make_unique(); metricsLibrary->api = std::make_unique(); metricsLibrary->osLibrary = std::move(metricsLibraryDll); performanceCounters->setMetricsLibraryInterface(std::move(metricsLibrary)); return performanceCounters; } ////////////////////////////////////////////////////// // PerformanceCountersFixture::createPerfCounters ////////////////////////////////////////////////////// void PerformanceCountersFixture::createPerfCounters() { performanceCountersBase = MockPerformanceCounters::create(&device->getDevice()); } ////////////////////////////////////////////////////// // PerformanceCountersFixture::SetUp ////////////////////////////////////////////////////// void PerformanceCountersFixture::SetUp() { device = std::make_unique(new MockDevice()); context = std::make_unique(device.get()); queue = std::make_unique(context.get(), device.get(), &queueProperties, false); osInterface = std::unique_ptr(new OSInterface()); osInterface->setDriverModel(std::unique_ptr(new DrmMock(*device->getExecutionEnvironment()->rootDeviceEnvironments[0]))); device->setOSTime(new MockOSTimeLinux(osInterface.get())); } ////////////////////////////////////////////////////// // PerformanceCountersFixture::TearDown ////////////////////////////////////////////////////// void PerformanceCountersFixture::TearDown() { } } // namespace NEO mock_performance_counters_linux.h000066400000000000000000000007231422164147700341760ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/linux/performance_counters_linux.h" #include "opencl/test/unit_test/os_interface/mock_performance_counters.h" namespace NEO { using MetricsLibraryApi::LinuxAdapterType; class MockPerformanceCountersLinux : public PerformanceCountersLinux { public: MockPerformanceCountersLinux(Device *device); }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/os_interface_linux_tests.cpp000066400000000000000000000035631422164147700332440ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/preemption.h" #include "shared/source/gmm_helper/gmm_lib.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/os_interface/linux/os_context_linux.h" #include "shared/source/os_interface/os_interface.h" #include "shared/test/common/libult/linux/drm_mock.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "gtest/gtest.h" namespace NEO { TEST(OsInterfaceTest, GivenLinuxWhenCallingAre64kbPagesEnabledThenReturnFalse) { EXPECT_FALSE(OSInterface::are64kbPagesEnabled()); } TEST(OsInterfaceTest, GivenLinuxOsInterfaceWhenDeviceHandleQueriedThenZeroIsReturned) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); auto drm = std::make_unique(*executionEnvironment->rootDeviceEnvironments[0]); OSInterface osInterface; osInterface.setDriverModel(std::move(drm)); EXPECT_EQ(0u, osInterface.getDriverModel()->getDeviceHandle()); } TEST(OsInterfaceTest, GivenLinuxOsWhenCheckForNewResourceImplicitFlushSupportThenReturnTrue) { EXPECT_TRUE(OSInterface::newResourceImplicitFlush); } TEST(OsInterfaceTest, GivenLinuxOsWhenCheckForGpuIdleImplicitFlushSupportThenReturnFalse) { EXPECT_TRUE(OSInterface::gpuIdleImplicitFlush); } TEST(OsInterfaceTest, GivenLinuxOsInterfaceWhenCallingIsDebugAttachAvailableThenFalseIsReturned) { OSInterface osInterface; auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); DrmMock *drm = new DrmMock(*executionEnvironment->rootDeviceEnvironments[0]); osInterface.setDriverModel(std::unique_ptr(drm)); EXPECT_FALSE(osInterface.isDebugAttachAvailable()); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/os_library_linux_tests.cpp000066400000000000000000000030561422164147700327450ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/linux/os_library_linux.h" #include "shared/source/os_interface/linux/sys_calls.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/variable_backup.h" #include "gtest/gtest.h" #include namespace NEO { namespace SysCalls { extern int dlOpenFlags; extern bool dlOpenCalled; } // namespace SysCalls TEST(OsLibraryTest, WhenCreatingFullSystemPathThenProperPathIsConstructed) { auto fullPath = OsLibrary::createFullSystemPath("test"); EXPECT_STREQ("test", fullPath.c_str()); } TEST(OsLibraryTest, GivenDisableDeepBindFlagWhenOpeningLibraryThenRtldDeepBindFlagIsNotPassed) { DebugManagerStateRestore restorer; VariableBackup dlOpenFlagsBackup{&NEO::SysCalls::dlOpenFlags, 0}; VariableBackup dlOpenCalledBackup{&NEO::SysCalls::dlOpenCalled, false}; DebugManager.flags.DisableDeepBind.set(1); auto lib = std::make_unique("_abc.so", nullptr); EXPECT_TRUE(NEO::SysCalls::dlOpenCalled); EXPECT_EQ(0, NEO::SysCalls::dlOpenFlags & RTLD_DEEPBIND); } TEST(OsLibraryTest, GivenInvalidLibraryWhenOpeningLibraryThenDlopenErrorIsReturned) { VariableBackup dlOpenCalledBackup{&NEO::SysCalls::dlOpenCalled, false}; std::string errorValue; auto lib = std::make_unique("_abc.so", &errorValue); EXPECT_FALSE(errorValue.empty()); EXPECT_TRUE(NEO::SysCalls::dlOpenCalled); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/os_time_test.cpp000066400000000000000000000227001422164147700306320ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/linux/drm_neo.h" #include "shared/source/os_interface/linux/os_time_linux.h" #include "shared/source/os_interface/os_interface.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/os_interface/linux/device_command_stream_fixture.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/os_interface/linux/mock_os_time_linux.h" #include "gtest/gtest.h" #include static int actualTime = 0; int getTimeFuncFalse(clockid_t clkId, struct timespec *tp) throw() { return -1; } int getTimeFuncTrue(clockid_t clkId, struct timespec *tp) throw() { tp->tv_sec = 0; tp->tv_nsec = ++actualTime; return 0; } int resolutionFuncFalse(clockid_t clkId, struct timespec *res) throw() { return -1; } int resolutionFuncTrue(clockid_t clkId, struct timespec *res) throw() { res->tv_sec = 0; res->tv_nsec = 5; return 0; } using namespace NEO; struct DrmTimeTest : public ::testing::Test { public: void SetUp() override { osInterface = std::unique_ptr(new OSInterface()); osTime = MockOSTimeLinux::create(osInterface.get()); osTime->setResolutionFunc(resolutionFuncTrue); osTime->setGetTimeFunc(getTimeFuncTrue); } void TearDown() override { } std::unique_ptr osTime; std::unique_ptr osInterface; MockExecutionEnvironment executionEnvironment; }; TEST_F(DrmTimeTest, GivenMockOsTimeThenInitializes) { } TEST_F(DrmTimeTest, WhenGettingCpuTimeThenSucceeds) { uint64_t time = 0; auto error = osTime->getCpuTime(&time); EXPECT_TRUE(error); EXPECT_NE(0ULL, time); } TEST_F(DrmTimeTest, GivenFalseTimeFuncWhenGettingCpuTimeThenFails) { uint64_t time = 0; osTime->setGetTimeFunc(getTimeFuncFalse); auto error = osTime->getCpuTime(&time); EXPECT_FALSE(error); } TEST_F(DrmTimeTest, WhenGettingGpuTimeThenSuceeds) { uint64_t time = 0; auto pDrm = new DrmMockTime(mockFd, *executionEnvironment.rootDeviceEnvironments[0]); osTime->updateDrm(pDrm); auto error = osTime->getDeviceTime()->getGpuTime32(&time); EXPECT_TRUE(error); EXPECT_NE(0ULL, time); error = osTime->getDeviceTime()->getGpuTime36(&time); EXPECT_TRUE(error); EXPECT_NE(0ULL, time); error = osTime->getDeviceTime()->getGpuTimeSplitted(&time); EXPECT_TRUE(error); EXPECT_NE(0ULL, time); } TEST_F(DrmTimeTest, GivenInvalidDrmWhenGettingGpuTimeThenFails) { uint64_t time = 0; auto pDrm = new DrmMockFail(*executionEnvironment.rootDeviceEnvironments[0]); osTime->updateDrm(pDrm); auto error = osTime->getDeviceTime()->getGpuTime32(&time); EXPECT_FALSE(error); error = osTime->getDeviceTime()->getGpuTime36(&time); EXPECT_FALSE(error); error = osTime->getDeviceTime()->getGpuTimeSplitted(&time); EXPECT_FALSE(error); } TEST_F(DrmTimeTest, WhenGettingCpuGpuTimeThenSucceeds) { TimeStampData CPUGPUTime01 = {0, 0}; TimeStampData CPUGPUTime02 = {0, 0}; auto pDrm = new DrmMockTime(mockFd, *executionEnvironment.rootDeviceEnvironments[0]); osTime->updateDrm(pDrm); auto error = osTime->getCpuGpuTime(&CPUGPUTime01); EXPECT_TRUE(error); EXPECT_NE(0ULL, CPUGPUTime01.CPUTimeinNS); EXPECT_NE(0ULL, CPUGPUTime01.GPUTimeStamp); error = osTime->getCpuGpuTime(&CPUGPUTime02); EXPECT_TRUE(error); EXPECT_NE(0ULL, CPUGPUTime02.CPUTimeinNS); EXPECT_NE(0ULL, CPUGPUTime02.GPUTimeStamp); EXPECT_GT(CPUGPUTime02.GPUTimeStamp, CPUGPUTime01.GPUTimeStamp); EXPECT_GT(CPUGPUTime02.CPUTimeinNS, CPUGPUTime01.CPUTimeinNS); } TEST_F(DrmTimeTest, GivenDrmWhenGettingCpuGpuTimeThenSucceeds) { TimeStampData CPUGPUTime01 = {0, 0}; TimeStampData CPUGPUTime02 = {0, 0}; auto pDrm = new DrmMockTime(mockFd, *executionEnvironment.rootDeviceEnvironments[0]); osTime->updateDrm(pDrm); auto error = osTime->getCpuGpuTime(&CPUGPUTime01); EXPECT_TRUE(error); EXPECT_NE(0ULL, CPUGPUTime01.CPUTimeinNS); EXPECT_NE(0ULL, CPUGPUTime01.GPUTimeStamp); error = osTime->getCpuGpuTime(&CPUGPUTime02); EXPECT_TRUE(error); EXPECT_NE(0ULL, CPUGPUTime02.CPUTimeinNS); EXPECT_NE(0ULL, CPUGPUTime02.GPUTimeStamp); EXPECT_GT(CPUGPUTime02.GPUTimeStamp, CPUGPUTime01.GPUTimeStamp); EXPECT_GT(CPUGPUTime02.CPUTimeinNS, CPUGPUTime01.CPUTimeinNS); } TEST_F(DrmTimeTest, givenGetCpuGpuTimeWhenItIsUnavailableThenReturnFalse) { TimeStampData CPUGPUTime = {0, 0}; auto error = osTime->getCpuGpuTime(&CPUGPUTime); EXPECT_FALSE(error); } TEST_F(DrmTimeTest, GivenInvalidDrmWhenGettingCpuGpuTimeThenFails) { TimeStampData CPUGPUTime01 = {0, 0}; auto pDrm = new DrmMockFail(*executionEnvironment.rootDeviceEnvironments[0]); osTime->updateDrm(pDrm); auto error = osTime->getCpuGpuTime(&CPUGPUTime01); EXPECT_FALSE(error); } TEST_F(DrmTimeTest, GivenInvalidFuncTimeWhenGettingCpuGpuTimeCpuThenFails) { TimeStampData CPUGPUTime01 = {0, 0}; auto pDrm = new DrmMockTime(mockFd, *executionEnvironment.rootDeviceEnvironments[0]); osTime->setGetTimeFunc(getTimeFuncFalse); osTime->updateDrm(pDrm); auto error = osTime->getCpuGpuTime(&CPUGPUTime01); EXPECT_FALSE(error); } TEST_F(DrmTimeTest, WhenGettingTimeThenTimeIsCorrect) { auto drm = new DrmMockCustom(*executionEnvironment.rootDeviceEnvironments[0]); osTime->updateDrm(drm); { auto p = osTime->getDeviceTime()->getGpuTime; EXPECT_EQ(p, &DeviceTimeDrm::getGpuTime36); } { drm->ioctl_res = -1; osTime->getDeviceTime()->timestampTypeDetect(); auto p = osTime->getDeviceTime()->getGpuTime; EXPECT_EQ(p, &DeviceTimeDrm::getGpuTime32); } DrmMockCustom::IoctlResExt ioctlToPass = {1, 0}; { drm->reset(); drm->ioctl_res = -1; drm->ioctl_res_ext = &ioctlToPass; // 2nd ioctl is successful osTime->getDeviceTime()->timestampTypeDetect(); auto p = osTime->getDeviceTime()->getGpuTime; EXPECT_EQ(p, &DeviceTimeDrm::getGpuTimeSplitted); drm->ioctl_res_ext = &drm->NONE; } } TEST_F(DrmTimeTest, givenGpuTimestampResolutionQueryWhenIoctlFailsThenDefaultResolutionIsReturned) { auto defaultResolution = defaultHwInfo->capabilityTable.defaultProfilingTimerResolution; auto drm = new DrmMockCustom(*executionEnvironment.rootDeviceEnvironments[0]); osTime->updateDrm(drm); drm->getParamRetValue = 0; drm->ioctl_res = -1; auto result = osTime->getDynamicDeviceTimerResolution(*defaultHwInfo); EXPECT_DOUBLE_EQ(result, defaultResolution); } TEST_F(DrmTimeTest, givenGetDynamicDeviceTimerClockWhenIoctlFailsThenDefaultClockIsReturned) { auto defaultResolution = defaultHwInfo->capabilityTable.defaultProfilingTimerResolution; auto drm = new DrmMockCustom(*executionEnvironment.rootDeviceEnvironments[0]); osTime->updateDrm(drm); drm->getParamRetValue = 0; drm->ioctl_res = -1; auto result = osTime->getDynamicDeviceTimerClock(*defaultHwInfo); auto expectedResult = static_cast(1000000000.0 / defaultResolution); EXPECT_EQ(result, expectedResult); } TEST_F(DrmTimeTest, givenGetDynamicDeviceTimerClockWhenIoctlSucceedsThenNonDefaultClockIsReturned) { auto drm = new DrmMockCustom(*executionEnvironment.rootDeviceEnvironments[0]); osTime->updateDrm(drm); uint64_t frequency = 1500; drm->getParamRetValue = static_cast(frequency); auto result = osTime->getDynamicDeviceTimerClock(*defaultHwInfo); EXPECT_EQ(result, frequency); } TEST_F(DrmTimeTest, givenGpuTimestampResolutionQueryWhenNoDrmThenDefaultResolutionIsReturned) { osTime->updateDrm(nullptr); auto defaultResolution = defaultHwInfo->capabilityTable.defaultProfilingTimerResolution; auto result = osTime->getDynamicDeviceTimerResolution(*defaultHwInfo); EXPECT_DOUBLE_EQ(result, defaultResolution); } TEST_F(DrmTimeTest, givenGpuTimestampResolutionQueryWhenIoctlSuccedsThenCorrectResolutionIsReturned) { auto drm = new DrmMockCustom(*executionEnvironment.rootDeviceEnvironments[0]); osTime->updateDrm(drm); // 19200000 is frequency yelding 52.083ns resolution drm->getParamRetValue = 19200000; drm->ioctl_res = 0; auto result = osTime->getDynamicDeviceTimerResolution(*defaultHwInfo); EXPECT_DOUBLE_EQ(result, 52.08333333333333); } TEST_F(DrmTimeTest, givenAlwaysFailingResolutionFuncWhenGetHostTimerResolutionIsCalledThenReturnsZero) { osTime->setResolutionFunc(resolutionFuncFalse); auto retVal = osTime->getHostTimerResolution(); EXPECT_EQ(0, retVal); } TEST_F(DrmTimeTest, givenAlwaysPassingResolutionFuncWhenGetHostTimerResolutionIsCalledThenReturnsNonzero) { osTime->setResolutionFunc(resolutionFuncTrue); auto retVal = osTime->getHostTimerResolution(); EXPECT_EQ(5, retVal); } TEST_F(DrmTimeTest, givenAlwaysFailingResolutionFuncWhenGetCpuRawTimestampIsCalledThenReturnsZero) { osTime->setResolutionFunc(resolutionFuncFalse); auto retVal = osTime->getCpuRawTimestamp(); EXPECT_EQ(0ull, retVal); } TEST_F(DrmTimeTest, givenAlwaysFailingGetTimeFuncWhenGetCpuRawTimestampIsCalledThenReturnsZero) { osTime->setGetTimeFunc(getTimeFuncFalse); auto retVal = osTime->getCpuRawTimestamp(); EXPECT_EQ(0ull, retVal); } TEST_F(DrmTimeTest, givenAlwaysPassingResolutionFuncWhenGetCpuRawTimestampIsCalledThenReturnsNonzero) { actualTime = 4; auto retVal = osTime->getCpuRawTimestamp(); EXPECT_EQ(1ull, retVal); } performance_counters_linux_tests.cpp000066400000000000000000000024561422164147700347470ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/linux/drm_neo.h" #include "shared/source/os_interface/linux/performance_counters_linux.h" #include "shared/source/os_interface/os_interface.h" #include "opencl/test/unit_test/os_interface/mock_performance_counters.h" #include "gtest/gtest.h" using namespace NEO; struct PerformanceCountersLinuxTest : public PerformanceCountersFixture, public ::testing::Test { public: void SetUp() override { PerformanceCountersFixture::SetUp(); } void TearDown() override { PerformanceCountersFixture::TearDown(); } }; TEST_F(PerformanceCountersLinuxTest, WhenCreatingPerformanceCountersThenDrmFileDescriptorIsPassed) { auto performanceCounters = PerformanceCounters::create(&device->getDevice()); EXPECT_NE(nullptr, performanceCounters); EXPECT_NE(nullptr, performanceCounters.get()); auto performanceCountersLinux = static_cast(performanceCounters.get()); EXPECT_EQ(LinuxAdapterType::DrmFileDescriptor, performanceCountersLinux->adapter.Type); EXPECT_EQ(osInterface->getDriverModel()->as()->getFileDescriptor(), performanceCountersLinux->adapter.DrmFileDescriptor); } compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/linux/self_lib_lin.cpp000066400000000000000000000002571422164147700305600ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ extern "C" __attribute__((visibility("default"))) void selfDynamicLibraryFunc() { } compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/mock_performance_counters.cpp000066400000000000000000000364631422164147700322440ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "mock_performance_counters.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/os_interface/os_interface.h" #include "shared/test/common/mocks/mock_execution_environment.h" using namespace MetricsLibraryApi; namespace NEO { ////////////////////////////////////////////////////// // MockMetricsLibrary::open ////////////////////////////////////////////////////// bool MockMetricsLibrary::open() { if (validOpen) { ++openCount; return true; } else { return false; } } ////////////////////////////////////////////////////// // MockMetricsLibrary::contextCreate ////////////////////////////////////////////////////// bool MockMetricsLibrary::contextCreate(const ClientType_1_0 &client, ClientOptionsSubDeviceData_1_0 &subDevice, ClientOptionsSubDeviceIndexData_1_0 &subDeviceIndex, ClientOptionsSubDeviceCountData_1_0 &subDeviceCount, ClientData_1_0 &clientData, ContextCreateData_1_0 &createData, ContextHandle_1_0 &handle) { if (client.Api != MetricsLibraryApi::ClientApi::OpenCL) { return false; } handle.data = reinterpret_cast(this); ++contextCount; return true; } ////////////////////////////////////////////////////// // MockMetricsLibrary::contextDelete ////////////////////////////////////////////////////// bool MockMetricsLibrary::contextDelete(const ContextHandle_1_0 &handle) { if (!handle.IsValid()) { return false; } --contextCount; return true; } ////////////////////////////////////////////////////// // MockMetricsLibrary::hwCountersCreate ////////////////////////////////////////////////////// bool MockMetricsLibrary::hwCountersCreate(const ContextHandle_1_0 &context, const uint32_t slots, const ConfigurationHandle_1_0 mmio, QueryHandle_1_0 &handle) { ++queryCount; return true; }; ////////////////////////////////////////////////////// // MockMetricsLibrary::hwCountersDelete ////////////////////////////////////////////////////// bool MockMetricsLibrary::hwCountersDelete(const QueryHandle_1_0 &handle) { --queryCount; return true; } ////////////////////////////////////////////////////// // MockMetricsLibrary::hwCountersGetReport ////////////////////////////////////////////////////// bool MockMetricsLibrary::hwCountersGetReport(const QueryHandle_1_0 &handle, const uint32_t slot, const uint32_t slotsCount, const uint32_t dataSize, void *data) { return validGetData; } ////////////////////////////////////////////////////// // MockMetricsLibrary::hwCountersGetApiReportSize ////////////////////////////////////////////////////// uint32_t MockMetricsLibrary::hwCountersGetApiReportSize() { return 1; } ////////////////////////////////////////////////////// // MockMetricsLibrary::hwCountersGetGpuReportSize ////////////////////////////////////////////////////// uint32_t MockMetricsLibrary::hwCountersGetGpuReportSize() { return sizeof(HwPerfCounter); } ////////////////////////////////////////////////////// // MockMetricsLibrary::commandBufferGet ////////////////////////////////////////////////////// bool MockMetricsLibrary::commandBufferGet(CommandBufferData_1_0 &data) { MI_REPORT_PERF_COUNT mirpc = {}; mirpc.init(); DEBUG_BREAK_IF(data.Data == nullptr); memcpy(data.Data, &mirpc, sizeof(mirpc)); return true; } ////////////////////////////////////////////////////// // MockMetricsLibrary::commandBufferGetSize ////////////////////////////////////////////////////// bool MockMetricsLibrary::commandBufferGetSize(const CommandBufferData_1_0 &commandBufferData, CommandBufferSize_1_0 &commandBufferSize) { commandBufferSize.GpuMemorySize = sizeof(MI_REPORT_PERF_COUNT); return true; } ////////////////////////////////////////////////////// // MockMetricsLibrary::getProcAddress ////////////////////////////////////////////////////// void *MockMetricsLibraryDll::getProcAddress(const std::string &procName) { if (procName == METRICS_LIBRARY_CONTEXT_CREATE_1_0) { return validContextCreate ? reinterpret_cast(&MockMetricsLibraryValidInterface::ContextCreate) : nullptr; } else if (procName == METRICS_LIBRARY_CONTEXT_DELETE_1_0) { return validContextDelete ? reinterpret_cast(&MockMetricsLibraryValidInterface::ContextDelete) : nullptr; } else { return nullptr; } } ////////////////////////////////////////////////////// // MockMetricsLibrary::isLoaded ////////////////////////////////////////////////////// bool MockMetricsLibraryDll::isLoaded() { return validIsLoaded; } ////////////////////////////////////////////////////// // MockMetricsLibraryValidInterface::ContextCreate ////////////////////////////////////////////////////// StatusCode MockMetricsLibraryValidInterface::ContextCreate(ClientType_1_0 clientType, ContextCreateData_1_0 *createData, ContextHandle_1_0 *handle) { // Validate input. EXPECT_EQ(clientType.Api, ClientApi::OpenCL); // Library handle. auto library = new MockMetricsLibraryValidInterface(); handle->data = library; EXPECT_TRUE(handle->IsValid()); // Context count. library->contextCount++; EXPECT_EQ(library->contextCount, 1u); return handle->IsValid() ? StatusCode::Success : StatusCode::Failed; } ////////////////////////////////////////////////////// // MockMetricsLibraryValidInterface::ContextDelete ////////////////////////////////////////////////////// StatusCode MockMetricsLibraryValidInterface::ContextDelete(const ContextHandle_1_0 handle) { auto validHandle = handle.IsValid(); auto library = static_cast(handle.data); // Validate input. EXPECT_TRUE(validHandle); EXPECT_TRUE(validHandle); EXPECT_EQ(--library->contextCount, 0u); // Delete handle. delete library; return validHandle ? StatusCode::Success : StatusCode::IncorrectObject; } ////////////////////////////////////////////////////// // MockMetricsLibraryInterface::QueryCreate ////////////////////////////////////////////////////// StatusCode MockMetricsLibraryValidInterface::QueryCreate(const QueryCreateData_1_0 *createData, QueryHandle_1_0 *handle) { EXPECT_NE(handle, nullptr); EXPECT_NE(createData, nullptr); EXPECT_GE(createData->Slots, 1u); EXPECT_TRUE(createData->HandleContext.IsValid()); EXPECT_EQ(createData->Type, ObjectType::QueryHwCounters); handle->data = new uint32_t(0); return StatusCode::Success; } ////////////////////////////////////////////////////// // MockMetricsLibraryValidInterface::QueryDelete ////////////////////////////////////////////////////// StatusCode MockMetricsLibraryValidInterface::QueryDelete(const QueryHandle_1_0 handle) { if (handle.IsValid()) { delete (uint32_t *)handle.data; } return StatusCode::Success; } ////////////////////////////////////////////////////// // MockMetricsLibraryValidInterface::CommandBufferGetSize ////////////////////////////////////////////////////// StatusCode MockMetricsLibraryValidInterface::CommandBufferGetSize(const CommandBufferData_1_0 *data, CommandBufferSize_1_0 *size) { auto library = static_cast(data->HandleContext.data); EXPECT_NE(data, nullptr); EXPECT_TRUE(data->HandleContext.IsValid()); EXPECT_TRUE(data->QueryHwCounters.Handle.IsValid()); EXPECT_EQ(data->Type, GpuCommandBufferType::Render); EXPECT_EQ(data->CommandsType, ObjectType::QueryHwCounters); EXPECT_NE(size, nullptr); size->GpuMemorySize = library->validGpuReportSize ? 123 : 0; return library->validGpuReportSize ? StatusCode::Success : StatusCode::Failed; } ////////////////////////////////////////////////////// // MockMetricsLibraryValidInterface::CommandBufferGet ////////////////////////////////////////////////////// StatusCode MockMetricsLibraryValidInterface::CommandBufferGet(const CommandBufferData_1_0 *data) { EXPECT_NE(data, nullptr); EXPECT_TRUE(data->HandleContext.IsValid()); EXPECT_TRUE(data->QueryHwCounters.Handle.IsValid()); EXPECT_EQ(data->Type, GpuCommandBufferType::Render); EXPECT_EQ(data->CommandsType, ObjectType::QueryHwCounters); EXPECT_NE(data->Data, nullptr); EXPECT_GT(data->Size, 0u); return StatusCode::Success; } ////////////////////////////////////////////////////// // MockMetricsLibraryValidInterface::CommandBufferGet ////////////////////////////////////////////////////// StatusCode MockMetricsLibraryValidInterface::GetParameter(const ParameterType parameter, ValueType *type, TypedValue_1_0 *value) { EXPECT_NE(type, nullptr); EXPECT_NE(value, nullptr); switch (parameter) { case ParameterType::QueryHwCountersReportApiSize: *type = ValueType::Uint32; value->ValueUInt32 = 123; break; case ParameterType::QueryHwCountersReportGpuSize: *type = ValueType::Uint32; value->ValueUInt32 = 123; break; default: EXPECT_TRUE(false); break; } return StatusCode::Success; } ////////////////////////////////////////////////////// // MockMetricsLibraryValidInterface::ConfigurationCreate ////////////////////////////////////////////////////// StatusCode ML_STDCALL MockMetricsLibraryValidInterface::ConfigurationCreate(const ConfigurationCreateData_1_0 *createData, ConfigurationHandle_1_0 *handle) { EXPECT_NE(createData, nullptr); EXPECT_NE(handle, nullptr); EXPECT_TRUE(createData->HandleContext.IsValid()); const bool validType = (createData->Type == ObjectType::ConfigurationHwCountersOa) || (createData->Type == ObjectType::ConfigurationHwCountersUser); // Mock overrides auto api = static_cast(createData->HandleContext.data); if (!api->validCreateConfigurationOa && (createData->Type == ObjectType::ConfigurationHwCountersOa)) { return StatusCode::Failed; } if (!api->validCreateConfigurationUser && (createData->Type == ObjectType::ConfigurationHwCountersUser)) { return StatusCode::Failed; } EXPECT_TRUE(validType); handle->data = api; return StatusCode::Success; } ////////////////////////////////////////////////////// // MockMetricsLibraryValidInterface::ConfigurationActivate ////////////////////////////////////////////////////// StatusCode ML_STDCALL MockMetricsLibraryValidInterface::ConfigurationActivate(const ConfigurationHandle_1_0 handle, const ConfigurationActivateData_1_0 *activateData) { auto api = static_cast(handle.data); return api->validActivateConfigurationOa ? StatusCode::Success : StatusCode::Failed; } ////////////////////////////////////////////////////// // MockMetricsLibraryValidInterface::ConfigurationDelete ////////////////////////////////////////////////////// StatusCode ML_STDCALL MockMetricsLibraryValidInterface::ConfigurationDelete(const ConfigurationHandle_1_0 handle) { EXPECT_TRUE(handle.IsValid()); return StatusCode::Success; } ////////////////////////////////////////////////////// // MockMetricsLibraryValidInterface::GetData ////////////////////////////////////////////////////// StatusCode MockMetricsLibraryValidInterface::GetData(GetReportData_1_0 *data) { EXPECT_NE(data, nullptr); EXPECT_EQ(data->Type, ObjectType::QueryHwCounters); EXPECT_TRUE(data->Query.Handle.IsValid()); EXPECT_GE(data->Query.Slot, 0u); EXPECT_GT(data->Query.SlotsCount, 0u); EXPECT_NE(data->Query.Data, nullptr); EXPECT_GT(data->Query.DataSize, 0u); return StatusCode::Success; } ////////////////////////////////////////////////////// // PerformanceCountersDeviceFixture::SetUp ////////////////////////////////////////////////////// void PerformanceCountersDeviceFixture::SetUp() { createFunc = Device::createPerformanceCountersFunc; Device::createPerformanceCountersFunc = MockPerformanceCounters::create; } ////////////////////////////////////////////////////// // PerformanceCountersDeviceFixture::TearDown ////////////////////////////////////////////////////// void PerformanceCountersDeviceFixture::TearDown() { Device::createPerformanceCountersFunc = createFunc; } ////////////////////////////////////////////////////// // PerformanceCountersMetricsLibraryFixture::SetUp ////////////////////////////////////////////////////// void PerformanceCountersMetricsLibraryFixture::SetUp() { PerformanceCountersFixture::SetUp(); } ////////////////////////////////////////////////////// // PerformanceCountersMetricsLibraryFixture::TearDown ////////////////////////////////////////////////////// void PerformanceCountersMetricsLibraryFixture::TearDown() { device->setPerfCounters(nullptr); PerformanceCountersFixture::TearDown(); } ////////////////////////////////////////////////////// // PerformanceCountersFixture::PerformanceCountersFixture ////////////////////////////////////////////////////// PerformanceCountersFixture::PerformanceCountersFixture() { executionEnvironment = std::make_unique(); rootDeviceEnvironment = std::make_unique(*executionEnvironment); rootDeviceEnvironment->setHwInfo(defaultHwInfo.get()); } ////////////////////////////////////////////////////// // PerformanceCountersFixture::~PerformanceCountersFixture ////////////////////////////////////////////////////// PerformanceCountersFixture::~PerformanceCountersFixture() { } ////////////////////////////////////////////////////// // PerformanceCountersMetricsLibraryFixture::createPerformanceCounters ////////////////////////////////////////////////////// void PerformanceCountersMetricsLibraryFixture::createPerformanceCounters(const bool validMetricsLibraryApi, const bool mockMetricsLibrary) { performanceCountersBase = MockPerformanceCounters::create(&device->getDevice()); auto metricsLibraryInterface = performanceCountersBase->getMetricsLibraryInterface(); auto metricsLibraryDll = std::make_unique(); EXPECT_NE(performanceCountersBase, nullptr); EXPECT_NE(metricsLibraryInterface, nullptr); device->setPerfCounters(performanceCountersBase.get()); // Attached mock version of metrics library interface. if (mockMetricsLibrary) { performanceCountersBase->setMetricsLibraryInterface(std::make_unique()); metricsLibraryInterface = performanceCountersBase->getMetricsLibraryInterface(); } else { performanceCountersBase->setMetricsLibraryInterface(std::make_unique()); metricsLibraryInterface = performanceCountersBase->getMetricsLibraryInterface(); } if (validMetricsLibraryApi) { metricsLibraryInterface->api = std::make_unique(); metricsLibraryInterface->osLibrary = std::move(metricsLibraryDll); } else { metricsLibraryDll->validContextCreate = false; metricsLibraryDll->validContextDelete = false; metricsLibraryDll->validIsLoaded = false; metricsLibraryInterface->api = std::make_unique(); metricsLibraryInterface->osLibrary = std::move(metricsLibraryDll); } EXPECT_NE(metricsLibraryInterface->api, nullptr); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/mock_performance_counters.h000066400000000000000000000302761422164147700317050ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/device/device.h" #include "shared/source/memory_manager/os_agnostic_memory_manager.h" #include "shared/test/common/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" namespace NEO { ////////////////////////////////////////////////////// // Metrics Library types ////////////////////////////////////////////////////// using MetricsLibraryApi::ClientApi; using MetricsLibraryApi::ClientData_1_0; using MetricsLibraryApi::ClientGen; using MetricsLibraryApi::ClientType_1_0; using MetricsLibraryApi::CommandBufferData_1_0; using MetricsLibraryApi::CommandBufferSize_1_0; using MetricsLibraryApi::ConfigurationHandle_1_0; using MetricsLibraryApi::ContextCreateData_1_0; using MetricsLibraryApi::ContextHandle_1_0; using MetricsLibraryApi::GpuMemory_1_0; using MetricsLibraryApi::QueryHandle_1_0; ////////////////////////////////////////////////////// // MI_REPORT_PERF_COUNT definition for all GENs ////////////////////////////////////////////////////// struct MI_REPORT_PERF_COUNT { uint32_t DwordLength : BITFIELD_RANGE(0, 5); uint32_t Reserved_6 : BITFIELD_RANGE(6, 22); uint32_t MiCommandOpcode : BITFIELD_RANGE(23, 28); uint32_t CommandType : BITFIELD_RANGE(29, 31); uint64_t UseGlobalGtt : BITFIELD_RANGE(0, 0); uint64_t Reserved_33 : BITFIELD_RANGE(1, 3); uint64_t CoreModeEnable : BITFIELD_RANGE(4, 4); uint64_t Reserved_37 : BITFIELD_RANGE(5, 5); uint64_t MemoryAddress : BITFIELD_RANGE(6, 63); uint32_t ReportId; typedef enum tagDWORD_LENGTH { DWORD_LENGTH_EXCLUDES_DWORD_0_1 = 0x2, } DWORD_LENGTH; typedef enum tagMI_COMMAND_OPCODE { MI_COMMAND_OPCODE_MI_REPORT_PERF_COUNT = 0x28, } MI_COMMAND_OPCODE; typedef enum tagCOMMAND_TYPE { COMMAND_TYPE_MI_COMMAND = 0x0, } COMMAND_TYPE; inline void init(void) { memset(this, 0, sizeof(MI_REPORT_PERF_COUNT)); DwordLength = DWORD_LENGTH_EXCLUDES_DWORD_0_1; MiCommandOpcode = MI_COMMAND_OPCODE_MI_REPORT_PERF_COUNT; CommandType = COMMAND_TYPE_MI_COMMAND; } }; // clang-format off ////////////////////////////////////////////////////// // MockMetricsLibrary ////////////////////////////////////////////////////// class MockMetricsLibrary : public MetricsLibrary { public: uint32_t openCount = 0; uint32_t contextCount = 0; uint32_t queryCount = 0; bool validOpen = true; bool validGetData = true; // Library open / close functions. bool open() override; // Context create / destroy functions. bool contextCreate (const ClientType_1_0 &client, ClientOptionsSubDeviceData_1_0 &subDevice, ClientOptionsSubDeviceIndexData_1_0 &subDeviceIndex, ClientOptionsSubDeviceCountData_1_0 &subDeviceCount, ClientData_1_0& clientData, ContextCreateData_1_0 &createData, ContextHandle_1_0 &handle) override; bool contextDelete (const ContextHandle_1_0 &handle) override; // HwCounters functions. bool hwCountersCreate (const ContextHandle_1_0 &context, const uint32_t slots, const ConfigurationHandle_1_0 mmio, QueryHandle_1_0 &handle) override; bool hwCountersDelete (const QueryHandle_1_0 &handle) override; bool hwCountersGetReport (const QueryHandle_1_0 &handle, const uint32_t slot, const uint32_t slotsCount, const uint32_t dataSize, void *data) override; uint32_t hwCountersGetApiReportSize() override; uint32_t hwCountersGetGpuReportSize() override; // Command buffer functions. bool commandBufferGet (CommandBufferData_1_0 &data) override; bool commandBufferGetSize (const CommandBufferData_1_0 &commandBufferData, CommandBufferSize_1_0 &commandBufferSize) override; // Oa configuration functions. bool oaConfigurationCreate (const ContextHandle_1_0 &context, ConfigurationHandle_1_0 &handle) override { return true; } bool oaConfigurationDelete (const ConfigurationHandle_1_0 &handle) override { return true; } bool oaConfigurationActivate (const ConfigurationHandle_1_0 &handle) override { return true; } bool oaConfigurationDeactivate (const ConfigurationHandle_1_0 &handle) override { return true; } // User mmio configuration functions. bool userConfigurationCreate (const ContextHandle_1_0 &context, ConfigurationHandle_1_0 &handle) override { return true; } bool userConfigurationDelete (const ConfigurationHandle_1_0 &handle) override { return true; } }; ////////////////////////////////////////////////////// // MockMetricsLibraryValidInterface ////////////////////////////////////////////////////// class MockMetricsLibraryValidInterface: public MetricsLibraryInterface { public: uint32_t contextCount = 0; bool validCreateConfigurationOa = true; bool validCreateConfigurationUser = true; bool validActivateConfigurationOa = true; bool validGpuReportSize = true; static StatusCode ML_STDCALL ContextCreate ( ClientType_1_0 clientType, ContextCreateData_1_0* createData, ContextHandle_1_0* handle ); static StatusCode ML_STDCALL ContextDelete (const ContextHandle_1_0 handle); static StatusCode ML_STDCALL GetParameter (const ParameterType parameter, ValueType *type, TypedValue_1_0 *value); static StatusCode ML_STDCALL CommandBufferGet (const CommandBufferData_1_0 *data); static StatusCode ML_STDCALL CommandBufferGetSize (const CommandBufferData_1_0 *data, CommandBufferSize_1_0 *size); static StatusCode ML_STDCALL QueryCreate (const QueryCreateData_1_0 *createData, QueryHandle_1_0 *handle); static StatusCode ML_STDCALL QueryDelete (const QueryHandle_1_0 handle); static StatusCode ML_STDCALL ConfigurationCreate (const ConfigurationCreateData_1_0 *createData, ConfigurationHandle_1_0 *handle); static StatusCode ML_STDCALL ConfigurationActivate (const ConfigurationHandle_1_0 handle, const ConfigurationActivateData_1_0 *activateData); static StatusCode ML_STDCALL ConfigurationDeactivate (const ConfigurationHandle_1_0 handle) { return StatusCode::Success; } static StatusCode ML_STDCALL ConfigurationDelete (const ConfigurationHandle_1_0 handle); static StatusCode ML_STDCALL GetData (GetReportData_1_0 *data); MockMetricsLibraryValidInterface() { contextCreate = &ContextCreate; contextDelete = &ContextDelete; functions.GetParameter = &GetParameter; functions.CommandBufferGet = &CommandBufferGet; functions.CommandBufferGetSize = &CommandBufferGetSize; functions.QueryCreate = &QueryCreate; functions.QueryDelete = &QueryDelete; functions.ConfigurationCreate = &ConfigurationCreate; functions.ConfigurationActivate = &ConfigurationActivate; functions.ConfigurationDeactivate = &ConfigurationDeactivate; functions.ConfigurationDelete = &ConfigurationDelete; functions.GetData = &GetData; } }; ////////////////////////////////////////////////////// // MockMetricsLibraryInvalidInterface ////////////////////////////////////////////////////// class MockMetricsLibraryInvalidInterface: public MetricsLibraryInterface { public: static StatusCode ML_STDCALL ContextCreate ( ClientType_1_0 clientType, ContextCreateData_1_0* createData, ContextHandle_1_0* handle ){ return StatusCode::Failed;} static StatusCode ML_STDCALL ContextDelete (const ContextHandle_1_0 handle){ return StatusCode::Failed;} static StatusCode ML_STDCALL GetParameter (const ParameterType parameter, ValueType *type, TypedValue_1_0 *value){ return StatusCode::Failed;} static StatusCode ML_STDCALL CommandBufferGet (const CommandBufferData_1_0 *data){ return StatusCode::Failed;} static StatusCode ML_STDCALL CommandBufferGetSize (const CommandBufferData_1_0 *data, CommandBufferSize_1_0 *size){ return StatusCode::Failed;} static StatusCode ML_STDCALL QueryCreate (const QueryCreateData_1_0 *createData, QueryHandle_1_0 *handle){ return StatusCode::Failed;} static StatusCode ML_STDCALL QueryDelete (const QueryHandle_1_0 handle){ return StatusCode::Failed;} static StatusCode ML_STDCALL ConfigurationCreate (const ConfigurationCreateData_1_0 *createData, ConfigurationHandle_1_0 *handle){ return StatusCode::Failed;} static StatusCode ML_STDCALL ConfigurationActivate (const ConfigurationHandle_1_0 handle, const ConfigurationActivateData_1_0 *activateData){ return StatusCode::Failed;} static StatusCode ML_STDCALL ConfigurationDeactivate (const ConfigurationHandle_1_0 handle){ return StatusCode::Failed;} static StatusCode ML_STDCALL ConfigurationDelete (const ConfigurationHandle_1_0 handle){ return StatusCode::Failed;} static StatusCode ML_STDCALL GetData (GetReportData_1_0 *data){ return StatusCode::Failed;} MockMetricsLibraryInvalidInterface() { contextCreate = &ContextCreate; contextDelete = &ContextDelete; functions.GetParameter = &GetParameter; functions.CommandBufferGet = &CommandBufferGet; functions.CommandBufferGetSize = &CommandBufferGetSize; functions.QueryCreate = &QueryCreate; functions.QueryDelete = &QueryDelete; functions.ConfigurationCreate = &ConfigurationCreate; functions.ConfigurationActivate = &ConfigurationActivate; functions.ConfigurationDeactivate = &ConfigurationDeactivate; functions.ConfigurationDelete = &ConfigurationDelete; functions.GetData = &GetData; } }; // clang-format on ////////////////////////////////////////////////////// // MockMetricsLibraryDll ////////////////////////////////////////////////////// class MockMetricsLibraryDll : public OsLibrary { public: bool validContextCreate = true; bool validContextDelete = true; bool validIsLoaded = true; void *getProcAddress(const std::string &procName) override; bool isLoaded() override; }; ////////////////////////////////////////////////////// // MockPerformanceCounters ////////////////////////////////////////////////////// class MockPerformanceCounters { public: static std::unique_ptr create(Device *device); }; ////////////////////////////////////////////////////// // PerformanceCountersDeviceFixture ////////////////////////////////////////////////////// struct PerformanceCountersDeviceFixture { virtual void SetUp(); virtual void TearDown(); decltype(&PerformanceCounters::create) createFunc; }; struct MockExecutionEnvironment; struct RootDeviceEnvironment; ///////////////////////////////////////////////////// // PerformanceCountersFixture ////////////////////////////////////////////////////// struct PerformanceCountersFixture { PerformanceCountersFixture(); ~PerformanceCountersFixture(); virtual void SetUp(); virtual void TearDown(); virtual void createPerfCounters(); cl_queue_properties queueProperties = {}; std::unique_ptr device; std::unique_ptr context; std::unique_ptr queue; std::unique_ptr performanceCountersBase; std::unique_ptr executionEnvironment; std::unique_ptr rootDeviceEnvironment; std::unique_ptr osInterface; }; ////////////////////////////////////////////////////// // PerformanceCountersMetricsLibraryFixture ////////////////////////////////////////////////////// struct PerformanceCountersMetricsLibraryFixture : PerformanceCountersFixture { void SetUp() override; void TearDown() override; void createPerformanceCounters(const bool validMetricsLibraryApi, const bool mockMatricsLibrary); std::unique_ptr performanceCountersBase; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/os_context_tests.cpp000066400000000000000000000213321422164147700304040ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/device_factory.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/helpers/engine_descriptor_helper.h" #include "shared/test/common/mocks/mock_device.h" #include "gtest/gtest.h" using namespace NEO; TEST(OSContext, whenCreatingDefaultOsContextThenExpectInitializedAlways) { OsContext *osContext = OsContext::create(nullptr, 0, EngineDescriptorHelper::getDefaultDescriptor()); EXPECT_FALSE(osContext->isLowPriority()); EXPECT_FALSE(osContext->isInternalEngine()); EXPECT_FALSE(osContext->isRootDevice()); delete osContext; } TEST(OSContext, givenInternalAndRootDeviceAreTrueWhenCreatingDefaultOsContextThenExpectGettersTrue) { auto descriptor = EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::Internal}); descriptor.isRootDevice = true; OsContext *osContext = OsContext::create(nullptr, 0, descriptor); EXPECT_FALSE(osContext->isLowPriority()); EXPECT_TRUE(osContext->isInternalEngine()); EXPECT_TRUE(osContext->isRootDevice()); delete osContext; } TEST(OSContext, givenLowPriorityAndRootDeviceAreTrueWhenCreatingDefaultOsContextThenExpectGettersTrue) { auto descriptor = EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::LowPriority}); descriptor.isRootDevice = true; OsContext *osContext = OsContext::create(nullptr, 0, descriptor); EXPECT_TRUE(osContext->isLowPriority()); EXPECT_FALSE(osContext->isInternalEngine()); EXPECT_TRUE(osContext->isRootDevice()); delete osContext; } TEST(OSContext, givenOsContextCreatedDefaultIsFalseWhenSettingTrueThenFlagTrueReturned) { OsContext *osContext = OsContext::create(nullptr, 0, EngineDescriptorHelper::getDefaultDescriptor()); EXPECT_FALSE(osContext->isDefaultContext()); osContext->setDefaultContext(true); EXPECT_TRUE(osContext->isDefaultContext()); delete osContext; } TEST(OSContext, givenCooperativeEngineWhenIsCooperativeEngineIsCalledThenReturnTrue) { auto engineDescriptor = EngineDescriptorHelper::getDefaultDescriptor(); engineDescriptor.engineTypeUsage.second = EngineUsage::Cooperative; auto pOsContext = OsContext::create(nullptr, 0, engineDescriptor); EXPECT_FALSE(pOsContext->isRegular()); EXPECT_FALSE(pOsContext->isLowPriority()); EXPECT_FALSE(pOsContext->isInternalEngine()); EXPECT_TRUE(pOsContext->isCooperativeEngine()); delete pOsContext; } TEST(OSContext, givenReinitializeContextWhenContextIsInitThenContextIsStillIinitializedAfter) { auto engineDescriptor = EngineDescriptorHelper::getDefaultDescriptor(); auto pOsContext = OsContext::create(nullptr, 0, engineDescriptor); EXPECT_NO_THROW(pOsContext->reInitializeContext()); EXPECT_NO_THROW(pOsContext->ensureContextInitialized()); delete pOsContext; } TEST(OSContext, givenSetPowerHintThenGetPowerHintShowsTheSameValue) { auto engineDescriptor = EngineDescriptorHelper::getDefaultDescriptor(); auto pOsContext = OsContext::create(nullptr, 0, engineDescriptor); pOsContext->setUmdPowerHintValue(1); EXPECT_EQ(1, pOsContext->getUmdPowerHintValue()); delete pOsContext; } struct DeferredOsContextCreationTests : ::testing::Test { void SetUp() override { device = std::unique_ptr{MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())}; DeviceFactory::prepareDeviceEnvironments(*device->getExecutionEnvironment()); } std::unique_ptr createOsContext(EngineTypeUsage engineTypeUsage, bool defaultEngine) { OSInterface *osInterface = device->getRootDeviceEnvironment().osInterface.get(); std::unique_ptr osContext{OsContext::create(osInterface, 0, EngineDescriptorHelper::getDefaultDescriptor(engineTypeUsage))}; EXPECT_FALSE(osContext->isInitialized()); return osContext; } void expectContextCreation(EngineTypeUsage engineTypeUsage, bool defaultEngine, bool expectedImmediate) { auto osContext = createOsContext(engineTypeUsage, defaultEngine); const bool immediate = osContext->isImmediateContextInitializationEnabled(defaultEngine); EXPECT_EQ(expectedImmediate, immediate); if (immediate) { osContext->ensureContextInitialized(); EXPECT_TRUE(osContext->isInitialized()); } } void expectDeferredContextCreation(EngineTypeUsage engineTypeUsage, bool defaultEngine) { expectContextCreation(engineTypeUsage, defaultEngine, false); } void expectImmediateContextCreation(EngineTypeUsage engineTypeUsage, bool defaultEngine) { expectContextCreation(engineTypeUsage, defaultEngine, true); } std::unique_ptr device; static inline const EngineTypeUsage engineTypeUsageRegular{aub_stream::ENGINE_RCS, EngineUsage::Regular}; static inline const EngineTypeUsage engineTypeUsageInternal{aub_stream::ENGINE_RCS, EngineUsage::Internal}; static inline const EngineTypeUsage engineTypeUsageBlitter{aub_stream::ENGINE_BCS, EngineUsage::Regular}; }; TEST_F(DeferredOsContextCreationTests, givenRegularEngineWhenCreatingOsContextThenOsContextIsInitializedDeferred) { DebugManagerStateRestore restore{}; expectDeferredContextCreation(engineTypeUsageRegular, false); DebugManager.flags.DeferOsContextInitialization.set(1); expectDeferredContextCreation(engineTypeUsageRegular, false); DebugManager.flags.DeferOsContextInitialization.set(0); expectImmediateContextCreation(engineTypeUsageRegular, false); } TEST_F(DeferredOsContextCreationTests, givenDefaultEngineWhenCreatingOsContextThenOsContextIsInitializedImmediately) { DebugManagerStateRestore restore{}; expectImmediateContextCreation(engineTypeUsageRegular, true); DebugManager.flags.DeferOsContextInitialization.set(1); expectImmediateContextCreation(engineTypeUsageRegular, true); DebugManager.flags.DeferOsContextInitialization.set(0); expectImmediateContextCreation(engineTypeUsageRegular, true); } TEST_F(DeferredOsContextCreationTests, givenInternalEngineWhenCreatingOsContextThenOsContextIsInitializedImmediately) { DebugManagerStateRestore restore{}; expectImmediateContextCreation(engineTypeUsageInternal, false); DebugManager.flags.DeferOsContextInitialization.set(1); expectImmediateContextCreation(engineTypeUsageInternal, false); DebugManager.flags.DeferOsContextInitialization.set(0); expectImmediateContextCreation(engineTypeUsageInternal, false); } TEST_F(DeferredOsContextCreationTests, givenBlitterEngineWhenCreatingOsContextThenOsContextIsInitializedImmediately) { DebugManagerStateRestore restore{}; expectImmediateContextCreation(engineTypeUsageBlitter, false); DebugManager.flags.DeferOsContextInitialization.set(1); expectImmediateContextCreation(engineTypeUsageBlitter, false); DebugManager.flags.DeferOsContextInitialization.set(0); expectImmediateContextCreation(engineTypeUsageBlitter, false); } TEST_F(DeferredOsContextCreationTests, givenEnsureContextInitializeCalledMultipleTimesWhenOsContextIsCreatedThenInitializeOnlyOnce) { struct MyOsContext : OsContext { MyOsContext(uint32_t contextId, const EngineDescriptor &engineDescriptor) : OsContext(contextId, engineDescriptor) {} void initializeContext() override { initializeContextCalled++; } size_t initializeContextCalled = 0u; }; MyOsContext osContext{0, EngineDescriptorHelper::getDefaultDescriptor(engineTypeUsageRegular)}; EXPECT_FALSE(osContext.isInitialized()); osContext.ensureContextInitialized(); EXPECT_TRUE(osContext.isInitialized()); EXPECT_EQ(1u, osContext.initializeContextCalled); osContext.ensureContextInitialized(); EXPECT_TRUE(osContext.isInitialized()); EXPECT_EQ(1u, osContext.initializeContextCalled); } TEST_F(DeferredOsContextCreationTests, givenPrintOsContextInitializationsIsSetWhenOsContextItIsInitializedThenInfoIsLoggedToStdout) { DebugManagerStateRestore restore{}; DebugManager.flags.DeferOsContextInitialization.set(1); DebugManager.flags.PrintOsContextInitializations.set(1); testing::internal::CaptureStdout(); auto osContext = createOsContext(engineTypeUsageRegular, false); EXPECT_EQ(std::string{}, testing::internal::GetCapturedStdout()); testing::internal::CaptureStdout(); osContext->ensureContextInitialized(); std::string expectedMessage = "OsContext initialization: contextId=0 usage=Regular type=RCS isRootDevice=0\n"; EXPECT_EQ(expectedMessage, testing::internal::GetCapturedStdout()); } compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/os_interface_tests.cpp000066400000000000000000000011361422164147700306600ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/os_interface.h" #include "gtest/gtest.h" #include TEST(OSInterface, WhenInterfaceIsCreatedThenItIsNonCopyable) { EXPECT_FALSE(std::is_move_constructible::value); EXPECT_FALSE(std::is_copy_constructible::value); } TEST(OSInterface, WhenInterfaceIsCreatedThenItIsNonAssignable) { EXPECT_FALSE(std::is_move_assignable::value); EXPECT_FALSE(std::is_copy_assignable::value); } compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/os_library_tests.cpp000066400000000000000000000102001422164147700303540ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #if defined(_WIN32) #include "shared/source/os_interface/windows/os_library_win.h" #elif defined(__linux__) #include "shared/source/os_interface/linux/os_library_linux.h" #endif #include "shared/source/os_interface/os_library.h" #include "shared/test/common/fixtures/memory_management_fixture.h" #include "shared/test/common/test_macros/test.h" #include "gtest/gtest.h" #include namespace Os { extern const char *testDllName; } // namespace Os const std::string fakeLibName = "_fake_library_name_"; const std::string fnName = "testDynamicLibraryFunc"; using namespace NEO; TEST(OSLibraryTest, whenLibraryNameIsEmptyThenCurrentProcesIsUsedAsLibrary) { std::unique_ptr library{OsLibrary::load("")}; EXPECT_NE(nullptr, library); void *ptr = library->getProcAddress("selfDynamicLibraryFunc"); EXPECT_NE(nullptr, ptr); } TEST(OSLibraryTest, GivenFakeLibNameWhenLoadingLibraryThenNullIsReturned) { OsLibrary *library = OsLibrary::load(fakeLibName); EXPECT_EQ(nullptr, library); } TEST(OSLibraryTest, GivenFakeLibNameWhenLoadingLibraryThenNullIsReturnedAndErrorString) { std::string errorValue; OsLibrary *library = OsLibrary::load(fakeLibName, &errorValue); EXPECT_FALSE(errorValue.empty()); EXPECT_EQ(nullptr, library); } TEST(OSLibraryTest, GivenValidLibNameWhenLoadingLibraryThenLibraryIsLoaded) { std::unique_ptr library(OsLibrary::load(Os::testDllName)); EXPECT_NE(nullptr, library); } TEST(OSLibraryTest, GivenValidLibNameWhenLoadingLibraryThenLibraryIsLoadedWithNoErrorString) { std::string errorValue; std::unique_ptr library(OsLibrary::load(Os::testDllName, &errorValue)); EXPECT_TRUE(errorValue.empty()); EXPECT_NE(nullptr, library); } TEST(OSLibraryTest, whenSymbolNameIsValidThenGetProcAddressReturnsNonNullPointer) { std::unique_ptr library(OsLibrary::load(Os::testDllName)); EXPECT_NE(nullptr, library); void *ptr = library->getProcAddress(fnName); EXPECT_NE(nullptr, ptr); } TEST(OSLibraryTest, whenSymbolNameIsInvalidThenGetProcAddressReturnsNullPointer) { std::unique_ptr library(OsLibrary::load(Os::testDllName)); EXPECT_NE(nullptr, library); void *ptr = library->getProcAddress(fnName + "invalid"); EXPECT_EQ(nullptr, ptr); } using OsLibraryTestWithFailureInjection = Test; TEST_F(OsLibraryTestWithFailureInjection, GivenFailureInjectionWhenLibraryIsLoadedThenOnlyFailedAllocationIsNull) { InjectedFunction method = [](size_t failureIndex) { std::string libName(Os::testDllName); // System under test OsLibrary *library = OsLibrary::load(libName); if (MemoryManagement::nonfailingAllocation == failureIndex) { EXPECT_NE(nullptr, library); } else { EXPECT_EQ(nullptr, library); } // Make sure that we only have 1 buffer allocated at a time delete library; }; injectFailures(method); } TEST(OsLibrary, whenCallingIndexOperatorThenObjectConvertibleToFunctionOrVoidPointerIsReturned) { struct MockOsLibrary : OsLibrary { void *getProcAddress(const std::string &procName) override { lastRequestedProcName = procName; return ptrToReturn; } bool isLoaded() override { return true; } void *ptrToReturn = nullptr; std::string lastRequestedProcName; }; MockOsLibrary lib; int varA; int varB; int varC; using FunctionTypeA = void (*)(int *, float); using FunctionTypeB = int (*)(); lib.ptrToReturn = &varA; FunctionTypeA functionA = lib["funcA"]; EXPECT_STREQ("funcA", lib.lastRequestedProcName.c_str()); EXPECT_EQ(&varA, reinterpret_cast(functionA)); lib.ptrToReturn = &varB; FunctionTypeB functionB = lib["funcB"]; EXPECT_STREQ("funcB", lib.lastRequestedProcName.c_str()); EXPECT_EQ(&varB, reinterpret_cast(functionB)); lib.ptrToReturn = &varC; void *rawPtr = lib["funcC"]; EXPECT_STREQ("funcC", lib.lastRequestedProcName.c_str()); EXPECT_EQ(&varC, rawPtr); } compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/os_memory_tests.cpp000066400000000000000000000014221422164147700302260ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/constants.h" #include "shared/source/os_interface/os_memory.h" #include "gtest/gtest.h" using namespace NEO; TEST(OSMemory, WhenReservingCpuAddressRangeThenMemoryIsAligned) { auto osMemory = OSMemory::create(); size_t reservedCpuAddressRangeSize = 1024; auto reservedCpuAddressRange = osMemory->reserveCpuAddressRange(reservedCpuAddressRangeSize, MemoryConstants::pageSize64k); EXPECT_NE(reservedCpuAddressRange.originalPtr, nullptr); EXPECT_TRUE(isAligned(reservedCpuAddressRange.alignedPtr)); osMemory->releaseCpuAddressRange(reservedCpuAddressRange); } compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/performance_counters_gen_tests.cpp000066400000000000000000000006561422164147700333010ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/performance_counters.h" #include "shared/test/common/test_macros/test.h" using namespace NEO; struct PerformanceCountersGenTest : public ::testing::Test { }; class MockPerformanceCountersGen : public PerformanceCounters { public: MockPerformanceCountersGen() : PerformanceCounters() { } };compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/performance_counters_tests.cpp000066400000000000000000000701131422164147700324430ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/os_interface/os_time.h" #include "shared/source/utilities/tag_allocator.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/engine_descriptor_helper.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/mocks/mock_os_context.h" #include "opencl/test/unit_test/fixtures/device_instrumentation_fixture.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/os_interface/mock_performance_counters.h" #include "gtest/gtest.h" using namespace NEO; struct PerformanceCountersDeviceTest : public PerformanceCountersDeviceFixture, public DeviceInstrumentationFixture, public ::testing::Test { void SetUp() override { PerformanceCountersDeviceFixture::SetUp(); } void TearDown() override { PerformanceCountersDeviceFixture::TearDown(); } }; TEST_F(PerformanceCountersDeviceTest, GivenEnabledInstrumentationWhenGettingPerformanceCountersThenNonNullPtrIsReturned) { DeviceInstrumentationFixture::SetUp(true); EXPECT_NE(nullptr, device->getPerformanceCounters()); } TEST_F(PerformanceCountersDeviceTest, GivenDisabledInstrumentationWhenGettingPerformanceCountersThenNullPtrIsReturned) { DeviceInstrumentationFixture::SetUp(false); EXPECT_EQ(nullptr, device->getPerformanceCounters()); } struct PerformanceCountersTest : public PerformanceCountersFixture, public ::testing::Test { public: void SetUp() override { PerformanceCountersFixture::SetUp(); } void TearDown() override { PerformanceCountersFixture::TearDown(); } }; TEST_F(PerformanceCountersTest, WhenCreatingPerformanceCountersThenObjectIsNotNull) { auto performanceCounters = PerformanceCounters::create(&device->getDevice()); EXPECT_NE(nullptr, performanceCounters); EXPECT_NE(nullptr, performanceCounters.get()); } TEST_F(PerformanceCountersTest, givenPerformanceCountersWhenCreatedThenAllValuesProperlyInitialized) { createPerfCounters(); EXPECT_NE(nullptr, performanceCountersBase->getMetricsLibraryInterface()); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); } struct PerformanceCountersProcessEventTest : public PerformanceCountersTest, public ::testing::WithParamInterface { void SetUp() override { PerformanceCountersTest::SetUp(); createPerfCounters(); eventComplete = true; outputParamSize = 0; inputParamSize = performanceCountersBase->getApiReportSize(); inputParam.reset(new uint8_t); } void TearDown() override { performanceCountersBase->shutdown(); PerformanceCountersTest::TearDown(); } std::unique_ptr inputParam; size_t inputParamSize; size_t outputParamSize; bool eventComplete; }; TEST_P(PerformanceCountersProcessEventTest, givenNullptrInputParamWhenProcessEventPerfCountersIsCalledThenReturnsFalse) { eventComplete = GetParam(); HwPerfCounter counters = {}; TagNode query = {}; query.tagForCpuAccess = &counters; performanceCountersBase->getQueryHandleRef(counters.query.handle); auto retVal = performanceCountersBase->getApiReport(&query, inputParamSize, nullptr, &outputParamSize, eventComplete); performanceCountersBase->deleteQuery(counters.query.handle); EXPECT_FALSE(retVal); } TEST_P(PerformanceCountersProcessEventTest, givenCorrectInputParamWhenProcessEventPerfCountersIsCalledAndEventIsCompletedThenReturnsTrue) { eventComplete = GetParam(); EXPECT_EQ(0ull, outputParamSize); HwPerfCounter counters = {}; TagNode query = {}; query.tagForCpuAccess = &counters; performanceCountersBase->getQueryHandleRef(counters.query.handle); auto retVal = performanceCountersBase->getApiReport(&query, inputParamSize, inputParam.get(), &outputParamSize, eventComplete); performanceCountersBase->deleteQuery(counters.query.handle); if (eventComplete) { EXPECT_TRUE(retVal); EXPECT_EQ(outputParamSize, inputParamSize); } else { EXPECT_FALSE(retVal); EXPECT_EQ(inputParamSize, outputParamSize); } } TEST_P(PerformanceCountersProcessEventTest, givenCorrectInputParamWhenProcessEventPerfCountersIsNotCalledThenReturnsFalse) { eventComplete = GetParam(); EXPECT_EQ(0ull, outputParamSize); HwPerfCounter tag = {}; TagNode query = {}; query.tagForCpuAccess = &tag; auto retVal = performanceCountersBase->getApiReport(&query, inputParamSize, inputParam.get(), &outputParamSize, eventComplete); EXPECT_EQ(eventComplete, retVal); } TEST_F(PerformanceCountersProcessEventTest, givenInvalidInputParamSizeWhenProcessEventPerfCountersIsCalledThenReturnsFalse) { EXPECT_EQ(0ull, outputParamSize); HwPerfCounter counters = {}; TagNode query = {}; query.tagForCpuAccess = &counters; performanceCountersBase->getQueryHandleRef(counters.query.handle); auto retVal = performanceCountersBase->getApiReport(&query, inputParamSize - 1, inputParam.get(), &outputParamSize, eventComplete); performanceCountersBase->deleteQuery(counters.query.handle); EXPECT_FALSE(retVal); EXPECT_EQ(outputParamSize, inputParamSize); } TEST_F(PerformanceCountersProcessEventTest, givenNullptrOutputParamSizeWhenProcessEventPerfCountersIsCalledThenDoesNotReturnsOutputSize) { EXPECT_EQ(0ull, outputParamSize); HwPerfCounter counters = {}; TagNode query = {}; query.tagForCpuAccess = &counters; performanceCountersBase->getQueryHandleRef(counters.query.handle); auto retVal = performanceCountersBase->getApiReport(&query, inputParamSize, inputParam.get(), nullptr, eventComplete); performanceCountersBase->deleteQuery(counters.query.handle); EXPECT_TRUE(retVal); EXPECT_EQ(0ull, outputParamSize); } TEST_F(PerformanceCountersProcessEventTest, givenNullptrInputZeroSizeWhenProcessEventPerfCountersIsCalledThenQueryProperSize) { EXPECT_EQ(0ull, outputParamSize); HwPerfCounter counters = {}; TagNode query = {}; query.tagForCpuAccess = &counters; performanceCountersBase->getQueryHandleRef(counters.query.handle); auto retVal = performanceCountersBase->getApiReport(&query, 0, nullptr, &outputParamSize, eventComplete); performanceCountersBase->deleteQuery(counters.query.handle); EXPECT_TRUE(retVal); EXPECT_EQ(inputParamSize, outputParamSize); } TEST_F(PerformanceCountersProcessEventTest, givenNullptrInputZeroSizeAndNullptrOutputSizeWhenProcessEventPerfCountersIsCalledThenReturnFalse) { EXPECT_EQ(0ull, outputParamSize); HwPerfCounter counters = {}; TagNode query = {}; query.tagForCpuAccess = &counters; performanceCountersBase->getQueryHandleRef(counters.query.handle); auto retVal = performanceCountersBase->getApiReport(&query, 0, nullptr, nullptr, eventComplete); performanceCountersBase->deleteQuery(counters.query.handle); EXPECT_FALSE(retVal); EXPECT_EQ(0ull, outputParamSize); } TEST_F(PerformanceCountersProcessEventTest, givenNullptrQueryWhenProcessEventPerfCountersIsCalledThenReturnFalse) { EXPECT_EQ(0ull, outputParamSize); auto retVal = performanceCountersBase->getApiReport(nullptr, 0, nullptr, nullptr, eventComplete); EXPECT_FALSE(retVal); } INSTANTIATE_TEST_CASE_P( PerfCountersTests, PerformanceCountersProcessEventTest, testing::Bool()); struct PerformanceCountersMetricsLibraryTest : public PerformanceCountersMetricsLibraryFixture, public ::testing::Test { public: void SetUp() override { PerformanceCountersMetricsLibraryFixture::SetUp(); auto hwInfo = rootDeviceEnvironment->getHardwareInfo(); osContext = std::make_unique(0, EngineDescriptorHelper::getDefaultDescriptor(HwHelper::get(hwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*hwInfo)[0], PreemptionHelper::getDefaultPreemptionMode(*hwInfo))); queue->getGpgpuCommandStreamReceiver().setupContext(*osContext); } void TearDown() override { PerformanceCountersMetricsLibraryFixture::TearDown(); } std::unique_ptr osContext; }; TEST_F(PerformanceCountersMetricsLibraryTest, givenPerformanceCountersWhenMetricLibraryIsValidThenQueryIsCreated) { // Create performance counters. createPerformanceCounters(true, true); EXPECT_NE(nullptr, performanceCountersBase); EXPECT_TRUE(performanceCountersBase->enable(false)); EXPECT_EQ(1u, performanceCountersBase->getReferenceNumber()); // Check metric library context. auto context = static_cast(performanceCountersBase->getMetricsLibraryContext().data); EXPECT_NE(nullptr, context); EXPECT_EQ(1u, context->contextCount); // Close library. performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); } TEST_F(PerformanceCountersMetricsLibraryTest, givenPerformanceCountersWhenMetricLibraryIsValidThenQueryReturnsValidGpuCommands) { // Create performance counters. createPerformanceCounters(true, false); EXPECT_NE(nullptr, performanceCountersBase); EXPECT_TRUE(performanceCountersBase->enable(false)); EXPECT_EQ(1u, performanceCountersBase->getReferenceNumber()); // Obtain required command buffer size. uint32_t commandsSize = performanceCountersBase->getGpuCommandsSize(MetricsLibraryApi::GpuCommandBufferType::Render, true); EXPECT_NE(0u, commandsSize); // Fill command buffer. uint8_t buffer[1000] = {}; HwPerfCounter perfCounter = {}; TagNode query = {}; query.tagForCpuAccess = &perfCounter; EXPECT_TRUE(performanceCountersBase->getGpuCommands(MetricsLibraryApi::GpuCommandBufferType::Render, query, true, sizeof(buffer), buffer)); // Close library. performanceCountersBase->deleteQuery(perfCounter.query.handle); performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); } TEST_F(PerformanceCountersMetricsLibraryTest, givenInitialNonCcsEngineWhenEnablingThenDontAllowCcsOnNextCalls) { createPerformanceCounters(true, false); EXPECT_NE(nullptr, performanceCountersBase); EXPECT_TRUE(performanceCountersBase->enable(false)); EXPECT_TRUE(performanceCountersBase->enable(false)); EXPECT_FALSE(performanceCountersBase->enable(true)); performanceCountersBase->shutdown(); performanceCountersBase->shutdown(); performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); EXPECT_TRUE(performanceCountersBase->enable(true)); performanceCountersBase->shutdown(); } TEST_F(PerformanceCountersMetricsLibraryTest, givenInitialCcsEngineWhenEnablingThenDontAllowNonCcsOnNextCalls) { createPerformanceCounters(true, false); EXPECT_NE(nullptr, performanceCountersBase); EXPECT_TRUE(performanceCountersBase->enable(true)); EXPECT_TRUE(performanceCountersBase->enable(true)); EXPECT_FALSE(performanceCountersBase->enable(false)); performanceCountersBase->shutdown(); performanceCountersBase->shutdown(); performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); EXPECT_TRUE(performanceCountersBase->enable(false)); performanceCountersBase->shutdown(); } TEST_F(PerformanceCountersMetricsLibraryTest, givenPerformanceCountersWhenMetricLibraryIsInvalidThenQueryReturnsInvalidGpuCommands) { // Create performance counters. createPerformanceCounters(false, false); EXPECT_NE(nullptr, performanceCountersBase); EXPECT_FALSE(performanceCountersBase->enable(true)); // Obtain required command buffer size. uint32_t commandsSize = performanceCountersBase->getGpuCommandsSize(MetricsLibraryApi::GpuCommandBufferType::Render, true); EXPECT_EQ(0u, commandsSize); // Close library. performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); } TEST_F(PerformanceCountersMetricsLibraryTest, givenPerformanceCountersWhenMetricLibraryIsValidThenApiReportSizeIsValid) { // Create performance counters. createPerformanceCounters(true, true); EXPECT_NE(nullptr, performanceCountersBase); EXPECT_TRUE(performanceCountersBase->enable(false)); EXPECT_EQ(1u, performanceCountersBase->getReferenceNumber()); // Obtain api report size. uint32_t apiReportSize = performanceCountersBase->getApiReportSize(); EXPECT_GT(apiReportSize, 0u); // Close library. performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); } TEST_F(PerformanceCountersMetricsLibraryTest, givenPerformanceCountersWhenMetricLibraryIsInvalidThenApiReportSizeIsInvalid) { // Create performance counters. createPerformanceCounters(false, false); EXPECT_NE(nullptr, performanceCountersBase); EXPECT_FALSE(performanceCountersBase->enable(false)); EXPECT_EQ(1u, performanceCountersBase->getReferenceNumber()); // Obtain api report size. uint32_t apiReportSize = performanceCountersBase->getApiReportSize(); EXPECT_EQ(0u, apiReportSize); // Close library. performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); } TEST_F(PerformanceCountersMetricsLibraryTest, givenPerformanceCountersWhenMetricLibraryIsInvalidThenGpuReportSizeIsInvalid) { // Create performance counters. createPerformanceCounters(false, false); EXPECT_NE(nullptr, performanceCountersBase); EXPECT_FALSE(performanceCountersBase->enable(false)); EXPECT_EQ(1u, performanceCountersBase->getReferenceNumber()); // Obtain gpu report size. uint32_t gpuReportSize = performanceCountersBase->getGpuReportSize(); EXPECT_EQ(0u, gpuReportSize); // Close library. performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); } TEST_F(PerformanceCountersMetricsLibraryTest, givenPerformanceCountersWhenMetricLibraryIsValidThenQueryIsAvailable) { // Create performance counters. createPerformanceCounters(true, true); EXPECT_NE(nullptr, performanceCountersBase); EXPECT_TRUE(performanceCountersBase->enable(false)); EXPECT_EQ(1u, performanceCountersBase->getReferenceNumber()); // Close library. performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); } TEST_F(PerformanceCountersMetricsLibraryTest, givenPerformanceCountersWhenMetricLibraryIsInvalidThenQueryIsNotAvailable) { // Create performance counters. createPerformanceCounters(false, false); EXPECT_NE(nullptr, performanceCountersBase); EXPECT_FALSE(performanceCountersBase->enable(false)); EXPECT_EQ(1u, performanceCountersBase->getReferenceNumber()); // Close library. performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); } TEST_F(PerformanceCountersMetricsLibraryTest, givenPerformanceCountersWhenMetricLibraryHasInvalidExportFunctionsDestroyThenQueryIsNotAvailable) { createPerformanceCounters(true, false); auto metricsLibraryInterface = performanceCountersBase->getMetricsLibraryInterface(); auto metricsLibraryDll = reinterpret_cast(metricsLibraryInterface->osLibrary.get()); metricsLibraryDll->validContextCreate = true; metricsLibraryDll->validContextDelete = false; EXPECT_NE(nullptr, performanceCountersBase); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); EXPECT_FALSE(performanceCountersBase->enable(false)); EXPECT_EQ(1u, performanceCountersBase->getReferenceNumber()); // Close library. performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); } TEST_F(PerformanceCountersMetricsLibraryTest, givenPerformanceCountersWhenMetricLibraryHasInvalidExportFunctionsCreateAndDestroyThenQueryIsNotAvailable) { createPerformanceCounters(true, false); auto metricsLibraryInterface = performanceCountersBase->getMetricsLibraryInterface(); auto metricsLibraryDll = reinterpret_cast(metricsLibraryInterface->osLibrary.get()); metricsLibraryDll->validContextCreate = false; metricsLibraryDll->validContextDelete = false; EXPECT_NE(nullptr, performanceCountersBase); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); EXPECT_FALSE(performanceCountersBase->enable(false)); EXPECT_EQ(1u, performanceCountersBase->getReferenceNumber()); // Close library. performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); } TEST_F(PerformanceCountersMetricsLibraryTest, givenPerformanceCountersWhenMetricLibraryIsValidThenQueryReturnsCorrectApiReport) { // Create performance counters. createPerformanceCounters(true, false); EXPECT_NE(nullptr, performanceCountersBase); EXPECT_TRUE(performanceCountersBase->enable(false)); EXPECT_EQ(1u, performanceCountersBase->getReferenceNumber()); // Obtain required command buffer size. uint32_t commandsSize = performanceCountersBase->getGpuCommandsSize(MetricsLibraryApi::GpuCommandBufferType::Render, true); EXPECT_NE(0u, commandsSize); // Fill command buffer. uint8_t buffer[1000] = {}; TagNode query = {}; HwPerfCounter perfCounter = {}; query.tagForCpuAccess = &perfCounter; EXPECT_TRUE(performanceCountersBase->getGpuCommands(MetricsLibraryApi::GpuCommandBufferType::Render, query, true, sizeof(buffer), buffer)); // Obtain api report size. uint32_t apiReportSize = performanceCountersBase->getApiReportSize(); EXPECT_GT(apiReportSize, 0u); // Obtain gpu report size. uint32_t gpuReportSize = performanceCountersBase->getGpuReportSize(); EXPECT_GT(gpuReportSize, 0u); // Allocate memory for api report. uint8_t *apiReport = new uint8_t[apiReportSize]; EXPECT_NE(apiReport, nullptr); // Obtain api report. EXPECT_TRUE(performanceCountersBase->getApiReport(&query, apiReportSize, apiReport, nullptr, true)); delete[] apiReport; apiReport = nullptr; // Close library. performanceCountersBase->deleteQuery(perfCounter.query.handle); performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); } TEST_F(PerformanceCountersMetricsLibraryTest, givenPerformanceCountersWhenMetricLibraryIsValidThenReferenceCounterIsValid) { createPerformanceCounters(true, true); EXPECT_NE(nullptr, performanceCountersBase); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); EXPECT_TRUE(performanceCountersBase->enable(false)); EXPECT_EQ(1u, performanceCountersBase->getReferenceNumber()); EXPECT_TRUE(performanceCountersBase->enable(false)); EXPECT_EQ(2u, performanceCountersBase->getReferenceNumber()); performanceCountersBase->shutdown(); EXPECT_EQ(1u, performanceCountersBase->getReferenceNumber()); performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); } TEST_F(PerformanceCountersMetricsLibraryTest, givenPerformanceCountersWhenMetricLibraryIsValidThenQueryHandleIsValid) { createPerformanceCounters(true, false); EXPECT_NE(nullptr, performanceCountersBase); MetricsLibraryApi::QueryHandle_1_0 query = {}; EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); EXPECT_TRUE(performanceCountersBase->enable(false)); performanceCountersBase->getQueryHandleRef(query); EXPECT_TRUE(query.IsValid()); performanceCountersBase->deleteQuery(query); performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); } TEST_F(PerformanceCountersMetricsLibraryTest, givenPerformanceCountersWhenOaConfigurationIsInvalidThenGpuReportSizeIsInvalid) { createPerformanceCounters(true, false); EXPECT_NE(nullptr, performanceCountersBase); EXPECT_TRUE(performanceCountersBase->enable(false)); EXPECT_EQ(1u, performanceCountersBase->getReferenceNumber()); auto metricLibraryApi = static_cast(performanceCountersBase->getMetricsLibraryContext().data); metricLibraryApi->validCreateConfigurationOa = false; EXPECT_EQ(0u, performanceCountersBase->getGpuCommandsSize(MetricsLibraryApi::GpuCommandBufferType::Render, true)); EXPECT_GT(performanceCountersBase->getGpuCommandsSize(MetricsLibraryApi::GpuCommandBufferType::Render, false), 0u); performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); } TEST_F(PerformanceCountersMetricsLibraryTest, GivenInvalidMetricsLibraryWhenGettingGpuCommandSizeThenZeroIsReported) { createPerformanceCounters(true, false); EXPECT_NE(nullptr, performanceCountersBase); EXPECT_TRUE(performanceCountersBase->enable(false)); EXPECT_EQ(1u, performanceCountersBase->getReferenceNumber()); auto metricLibraryApi = static_cast(performanceCountersBase->getMetricsLibraryContext().data); metricLibraryApi->validGpuReportSize = false; EXPECT_EQ(0u, performanceCountersBase->getGpuCommandsSize(MetricsLibraryApi::GpuCommandBufferType::Render, true)); EXPECT_EQ(0u, performanceCountersBase->getGpuCommandsSize(MetricsLibraryApi::GpuCommandBufferType::Render, false)); performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); } TEST_F(PerformanceCountersMetricsLibraryTest, givenPerformanceCountersWhenAllConfigurationsAreValidThenGpuReportSizeIsValid) { createPerformanceCounters(true, false); EXPECT_NE(nullptr, performanceCountersBase); EXPECT_TRUE(performanceCountersBase->enable(false)); EXPECT_EQ(1u, performanceCountersBase->getReferenceNumber()); auto metricLibraryApi = static_cast(performanceCountersBase->getMetricsLibraryContext().data); metricLibraryApi->validCreateConfigurationOa = true; metricLibraryApi->validCreateConfigurationUser = true; EXPECT_GT(performanceCountersBase->getGpuCommandsSize(MetricsLibraryApi::GpuCommandBufferType::Render, true), 0u); EXPECT_GT(performanceCountersBase->getGpuCommandsSize(MetricsLibraryApi::GpuCommandBufferType::Render, false), 0u); performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); } TEST_F(PerformanceCountersMetricsLibraryTest, givenPerformanceCountersWhenOaConfigurationsActivationIsInvalidThenGpuReportSizeIsInvalid) { createPerformanceCounters(true, false); EXPECT_NE(nullptr, performanceCountersBase); EXPECT_TRUE(performanceCountersBase->enable(false)); EXPECT_EQ(1u, performanceCountersBase->getReferenceNumber()); auto metricLibraryApi = static_cast(performanceCountersBase->getMetricsLibraryContext().data); metricLibraryApi->validActivateConfigurationOa = false; EXPECT_EQ(0u, performanceCountersBase->getGpuCommandsSize(MetricsLibraryApi::GpuCommandBufferType::Render, true)); EXPECT_GT(performanceCountersBase->getGpuCommandsSize(MetricsLibraryApi::GpuCommandBufferType::Render, false), 0u); performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); } TEST_F(PerformanceCountersMetricsLibraryTest, givenPerformanceCountersWhenCreatingUserConfigurationThenReturnSuccess) { createPerformanceCounters(true, false); EXPECT_NE(nullptr, performanceCountersBase); EXPECT_TRUE(performanceCountersBase->enable(false)); EXPECT_EQ(1u, performanceCountersBase->getReferenceNumber()); ConfigurationHandle_1_0 configurationHandle = {}; auto metricsLibrary = performanceCountersBase->getMetricsLibraryInterface(); auto contextHandle = performanceCountersBase->getMetricsLibraryContext(); EXPECT_TRUE(metricsLibrary->userConfigurationCreate(contextHandle, configurationHandle)); EXPECT_TRUE(metricsLibrary->userConfigurationDelete(configurationHandle)); performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); } TEST_F(PerformanceCountersMetricsLibraryTest, WhenGettingHwPerfCounterThenValidPointerIsReturned) { createPerformanceCounters(true, false); EXPECT_NE(nullptr, performanceCountersBase); EXPECT_TRUE(performanceCountersBase->enable(false)); EXPECT_EQ(1u, performanceCountersBase->getReferenceNumber()); ASSERT_NE(nullptr, queue->getPerfCounters()); std::unique_ptr event(new Event(queue.get(), CL_COMMAND_COPY_BUFFER, 0, 0)); ASSERT_NE(nullptr, event); auto perfCounter = static_cast *>(event->getHwPerfCounterNode()); ASSERT_NE(nullptr, perfCounter); ASSERT_EQ(0ULL, perfCounter->tagForCpuAccess->report[0]); auto perfCounter2 = event->getHwPerfCounterNode(); ASSERT_EQ(perfCounter, perfCounter2); performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); } TEST_F(PerformanceCountersMetricsLibraryTest, WhenGettingHwPerfCounterAllocationThenValidPointerIsReturned) { createPerformanceCounters(true, false); EXPECT_NE(nullptr, performanceCountersBase); EXPECT_TRUE(performanceCountersBase->enable(false)); EXPECT_EQ(1u, performanceCountersBase->getReferenceNumber()); ASSERT_NE(nullptr, queue->getPerfCounters()); std::unique_ptr event(new Event(queue.get(), CL_COMMAND_COPY_BUFFER, 0, 0)); ASSERT_NE(nullptr, event); GraphicsAllocation *allocation = event->getHwPerfCounterNode()->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation(); ASSERT_NE(nullptr, allocation); void *memoryStorage = allocation->getUnderlyingBuffer(); size_t memoryStorageSize = allocation->getUnderlyingBufferSize(); EXPECT_NE(nullptr, memoryStorage); EXPECT_GT(memoryStorageSize, 0u); performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); } TEST_F(PerformanceCountersMetricsLibraryTest, WhenCreatingEventThenHwPerfCounterMemoryIsPlacedInGraphicsAllocation) { createPerformanceCounters(true, false); EXPECT_NE(nullptr, performanceCountersBase); EXPECT_TRUE(performanceCountersBase->enable(false)); EXPECT_EQ(1u, performanceCountersBase->getReferenceNumber()); ASSERT_NE(nullptr, queue->getPerfCounters()); std::unique_ptr event(new Event(queue.get(), CL_COMMAND_COPY_BUFFER, 0, 0)); ASSERT_NE(nullptr, event); HwPerfCounter *perfCounter = static_cast *>(event->getHwPerfCounterNode())->tagForCpuAccess; ASSERT_NE(nullptr, perfCounter); GraphicsAllocation *allocation = event->getHwPerfCounterNode()->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation(); ASSERT_NE(nullptr, allocation); void *memoryStorage = allocation->getUnderlyingBuffer(); size_t graphicsAllocationSize = allocation->getUnderlyingBufferSize(); EXPECT_GE(perfCounter, memoryStorage); EXPECT_LE(perfCounter + 1, ptrOffset(memoryStorage, graphicsAllocationSize)); performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); } TEST_F(PerformanceCountersMetricsLibraryTest, GivenPerformanceCountersObjectIsNotPresentWhenCreatingEventThenNodeisNull) { std::unique_ptr event(new Event(queue.get(), CL_COMMAND_COPY_BUFFER, 0, 0)); ASSERT_NE(nullptr, event); auto node = event->getHwPerfCounterNode(); ASSERT_EQ(nullptr, node); } TEST_F(PerformanceCountersTest, givenRenderCoreFamilyWhenGettingGenIdThenMetricsLibraryGenIdentifierAreValid) { const auto &hwInfo = device->getHardwareInfo(); const auto gen = hwInfo.platform.eRenderCoreFamily; EXPECT_NE(ClientGen::Unknown, static_cast(HwHelper::get(gen).getMetricsLibraryGenId())); } compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/raii_hw_info_config.h000066400000000000000000000014301422164147700304210ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/hw_info_config.h" namespace NEO { extern HwInfoConfig *hwInfoConfigFactory[IGFX_MAX_PRODUCT]; template class RAIIHwInfoConfigFactory { public: PRODUCT_FAMILY productFamily; HwInfoConfig *hwInfoConfig; MockHwInfoConfig mockHwInfoConfig{}; RAIIHwInfoConfigFactory(PRODUCT_FAMILY productFamily) { this->productFamily = productFamily; hwInfoConfig = hwInfoConfigFactory[this->productFamily]; hwInfoConfigFactory[this->productFamily] = &mockHwInfoConfig; } ~RAIIHwInfoConfigFactory() { hwInfoConfigFactory[this->productFamily] = hwInfoConfig; } }; } // namespace NEOcompute-runtime-22.14.22890/opencl/test/unit_test/os_interface/windows/000077500000000000000000000000001422164147700257625ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/windows/CMakeLists.txt000066400000000000000000000041421422164147700305230ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_os_interface_windows ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/deferrable_deletion_win_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/device_command_stream_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/device_os_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/driver_info_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/file_logger_win_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_win_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_win_tests.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_kmdaf_listener.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_os_time_win.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_performance_counters_win.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_performance_counters_win.h ${CMAKE_CURRENT_SOURCE_DIR}/os_context_win_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_interface_win_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_interface_win_tests.h ${CMAKE_CURRENT_SOURCE_DIR}/os_library_win_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_time_win_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/performance_counters_win_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/registry_reader_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/registry_reader_tests.h ${CMAKE_CURRENT_SOURCE_DIR}/self_lib_win.cpp ${CMAKE_CURRENT_SOURCE_DIR}/wddm_address_space_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/wddm_kmdaf_listener_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/wddm_memory_manager_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/wddm_memory_manager_tests.h ${CMAKE_CURRENT_SOURCE_DIR}/wddm_mapper_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/wddm_memory_manager_allocate_in_device_pool_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/wddm_residency_controller_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/wddm_residency_handler_tests.cpp ) if(WIN32) file(GLOB IGDRCL_SRC_tests_wddm_interface "${CMAKE_CURRENT_SOURCE_DIR}/wddm2[0-9]_tests\.cpp") target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_os_interface_windows} ${IGDRCL_SRC_tests_wddm_interface} ) endif() add_subdirectories() deferrable_deletion_win_tests.cpp000066400000000000000000000051011422164147700344610ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/windows/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/os_interface/windows/deferrable_deletion_win.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/mocks/mock_wddm.h" #include "gtest/gtest.h" #include using namespace NEO; TEST(DeferrableDeletionImpl, WhenCreatedThenItIsNonCopyable) { EXPECT_FALSE(std::is_move_constructible::value); EXPECT_FALSE(std::is_copy_constructible::value); } TEST(DeferrableDeletionImpl, WhenCreatedThenItIsNonAssignable) { EXPECT_FALSE(std::is_move_assignable::value); EXPECT_FALSE(std::is_copy_assignable::value); } class MockDeferrableDeletion : public DeferrableDeletionImpl { public: using DeferrableDeletionImpl::allocationCount; using DeferrableDeletionImpl::DeferrableDeletionImpl; using DeferrableDeletionImpl::handles; using DeferrableDeletionImpl::resourceHandle; using DeferrableDeletionImpl::wddm; }; class DeferrableDeletionTest : public ::testing::Test { public: std::unique_ptr executionEnvironment; std::unique_ptr wddm; const D3DKMT_HANDLE handle = 0; uint32_t allocationCount = 1; D3DKMT_HANDLE resourceHandle = 0; void SetUp() override { executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); wddm = std::make_unique(*executionEnvironment->rootDeviceEnvironments[0]); } }; TEST_F(DeferrableDeletionTest, givenDeferrableDeletionWhenIsCreatedThenObjectMembersAreSetProperly) { MockDeferrableDeletion deletion(wddm.get(), &handle, allocationCount, resourceHandle); EXPECT_EQ(wddm.get(), deletion.wddm); EXPECT_NE(nullptr, deletion.handles); EXPECT_EQ(handle, *deletion.handles); EXPECT_NE(&handle, deletion.handles); EXPECT_EQ(allocationCount, deletion.allocationCount); EXPECT_EQ(resourceHandle, deletion.resourceHandle); } TEST_F(DeferrableDeletionTest, givenDeferrableDeletionWhenApplyIsCalledThenDeletionIsApplied) { wddm->callBaseDestroyAllocations = false; std::unique_ptr deletion(DeferrableDeletion::create((Wddm *)wddm.get(), &handle, allocationCount, resourceHandle)); EXPECT_EQ(0, wddm->destroyAllocationResult.called); deletion->apply(); EXPECT_EQ(1, wddm->destroyAllocationResult.called); } device_command_stream_tests.cpp000066400000000000000000001766351422164147700341630ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/windows/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/aub_command_stream_receiver.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/command_stream_receiver_with_aub_dump.h" #include "shared/source/command_stream/device_command_stream.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/direct_submission/dispatchers/render_dispatcher.h" #include "shared/source/direct_submission/windows/wddm_direct_submission.h" #include "shared/source/helpers/flush_stamp.h" #include "shared/source/helpers/windows/gmm_callbacks.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/os_interface/sys_calls_common.h" #include "shared/source/os_interface/windows/os_context_win.h" #include "shared/source/os_interface/windows/wddm_device_command_stream.h" #include "shared/source/os_interface/windows/wddm_memory_manager.h" #include "shared/source/os_interface/windows/wddm_memory_operations_handler.h" #include "shared/source/os_interface/windows/wddm_residency_controller.h" #include "shared/test/common/fixtures/device_fixture.h" #include "shared/test/common/fixtures/memory_management_fixture.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/dispatch_flags_helper.h" #include "shared/test/common/helpers/engine_descriptor_helper.h" #include "shared/test/common/helpers/execution_environment_helper.h" #include "shared/test/common/mocks/mock_builtins.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_gmm_page_table_mngr.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/mocks/mock_io_functions.h" #include "shared/test/common/mocks/mock_submissions_aggregator.h" #include "shared/test/common/mocks/mock_wddm_interface23.h" #include "shared/test/common/mocks/windows/mock_gdi_interface.h" #include "shared/test/common/mocks/windows/mock_wddm_direct_submission.h" #include "shared/test/common/os_interface/windows/mock_wddm_memory_manager.h" #include "shared/test/common/os_interface/windows/wddm_fixture.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/fixtures/mock_aub_center_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "hw_cmds.h" using namespace NEO; using namespace ::testing; class WddmCommandStreamFixture { public: std::unique_ptr device; DeviceCommandStreamReceiver *csr; MockWddmMemoryManager *memoryManager = nullptr; WddmMock *wddm = nullptr; DebugManagerStateRestore stateRestore; void SetUp() { HardwareInfo *hwInfo = nullptr; DebugManager.flags.CsrDispatchMode.set(static_cast(DispatchMode::ImmediateDispatch)); auto executionEnvironment = getExecutionEnvironmentImpl(hwInfo, 1); memoryManager = new MockWddmMemoryManager(*executionEnvironment); executionEnvironment->memoryManager.reset(memoryManager); wddm = static_cast(executionEnvironment->rootDeviceEnvironments[0]->osInterface->getDriverModel()->as()); device.reset(MockDevice::create(executionEnvironment, 0u)); ASSERT_NE(nullptr, device); csr = new WddmCommandStreamReceiver(*executionEnvironment, 0, device->getDeviceBitfield()); device->resetCommandStreamReceiver(csr); csr->getOsContext().ensureContextInitialized(); } void TearDown() { } }; template struct MockWddmCsr : public WddmCommandStreamReceiver { using CommandStreamReceiver::clearColorAllocation; using CommandStreamReceiver::commandStream; using CommandStreamReceiver::dispatchMode; using CommandStreamReceiver::getCS; using CommandStreamReceiver::globalFenceAllocation; using CommandStreamReceiver::useGpuIdleImplicitFlush; using CommandStreamReceiver::useNewResourceImplicitFlush; using CommandStreamReceiverHw::blitterDirectSubmission; using CommandStreamReceiverHw::directSubmission; using WddmCommandStreamReceiver::commandBufferHeader; using WddmCommandStreamReceiver::initDirectSubmission; using WddmCommandStreamReceiver::WddmCommandStreamReceiver; void overrideDispatchPolicy(DispatchMode overrideValue) { this->dispatchMode = overrideValue; } SubmissionAggregator *peekSubmissionAggregator() { return this->submissionAggregator.get(); } void overrideSubmissionAggregator(SubmissionAggregator *newSubmissionsAggregator) { this->submissionAggregator.reset(newSubmissionsAggregator); } void overrideRecorededCommandBuffer(Device &device) { recordedCommandBuffer = std::unique_ptr(new CommandBuffer(device)); } bool initDirectSubmission(Device &device, OsContext &osContext) override { if (callParentInitDirectSubmission) { return WddmCommandStreamReceiver::initDirectSubmission(device, osContext); } bool ret = true; if (DebugManager.flags.EnableDirectSubmission.get() == 1) { if (!initBlitterDirectSubmission) { directSubmission = std::make_unique< MockWddmDirectSubmission>>(device, osContext); ret = directSubmission->initialize(true, false); this->dispatchMode = DispatchMode::ImmediateDispatch; } else { blitterDirectSubmission = std::make_unique< MockWddmDirectSubmission>>(device, osContext); blitterDirectSubmission->initialize(true, false); } } return ret; } int flushCalledCount = 0; std::unique_ptr recordedCommandBuffer = nullptr; bool callParentInitDirectSubmission = true; bool initBlitterDirectSubmission = false; }; class WddmCommandStreamWithMockGdiFixture { public: MockWddmCsr *csr = nullptr; MemoryManager *memoryManager = nullptr; std::unique_ptr device = nullptr; WddmMock *wddm = nullptr; MockGdi *gdi = nullptr; DebugManagerStateRestore stateRestore; GraphicsAllocation *preemptionAllocation = nullptr; void SetUp() { HardwareInfo *hwInfo = nullptr; ExecutionEnvironment *executionEnvironment = getExecutionEnvironmentImpl(hwInfo, 1); wddm = static_cast(executionEnvironment->rootDeviceEnvironments[0]->osInterface->getDriverModel()->as()); gdi = new MockGdi(); wddm->resetGdi(gdi); ASSERT_NE(wddm, nullptr); DebugManager.flags.CsrDispatchMode.set(static_cast(DispatchMode::ImmediateDispatch)); this->csr = new MockWddmCsr(*executionEnvironment, 0, 1); memoryManager = new WddmMemoryManager(*executionEnvironment); ASSERT_NE(nullptr, memoryManager); executionEnvironment->memoryManager.reset(memoryManager); device = std::unique_ptr(Device::create(executionEnvironment, 0u)); device->resetCommandStreamReceiver(this->csr); ASSERT_NE(nullptr, device); this->csr->overrideRecorededCommandBuffer(*device); } void TearDown() { wddm = nullptr; } }; using WddmCommandStreamTest = ::Test; using WddmCommandStreamMockGdiTest = ::Test; using WddmDefaultTest = ::Test; struct DeviceCommandStreamTest : ::Test, DeviceFixture { void SetUp() override { DeviceFixture::SetUp(); MockAubCenterFixture::SetUp(); setMockAubCenter(pDevice->getRootDeviceEnvironmentRef()); } void TearDown() override { MockAubCenterFixture::TearDown(); DeviceFixture::TearDown(); } }; TEST_F(DeviceCommandStreamTest, WhenCreatingWddmCsrThenWddmPointerIsSetCorrectly) { ExecutionEnvironment *executionEnvironment = pDevice->getExecutionEnvironment(); auto wddm = Wddm::createWddm(nullptr, *executionEnvironment->rootDeviceEnvironments[0].get()); executionEnvironment->rootDeviceEnvironments[0]->osInterface = std::make_unique(); executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel(std::unique_ptr(wddm)); executionEnvironment->initializeMemoryManager(); std::unique_ptr> csr(static_cast *>(WddmCommandStreamReceiver::create(false, *executionEnvironment, 0, 1))); EXPECT_NE(nullptr, csr); auto wddmFromCsr = csr->peekWddm(); EXPECT_NE(nullptr, wddmFromCsr); } TEST_F(DeviceCommandStreamTest, WhenCreatingWddmCsrWithAubDumpThenAubCsrIsCreated) { ExecutionEnvironment *executionEnvironment = pDevice->getExecutionEnvironment(); auto wddm = Wddm::createWddm(nullptr, *executionEnvironment->rootDeviceEnvironments[0].get()); executionEnvironment->rootDeviceEnvironments[0]->osInterface = std::make_unique(); executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel(std::unique_ptr(wddm)); executionEnvironment->initializeMemoryManager(); std::unique_ptr> csr(static_cast *>(WddmCommandStreamReceiver::create(true, *executionEnvironment, 0, 1))); EXPECT_NE(nullptr, csr); auto wddmFromCsr = csr->peekWddm(); EXPECT_NE(nullptr, wddmFromCsr); auto aubCSR = static_cast> *>(csr.get())->aubCSR.get(); EXPECT_NE(nullptr, aubCSR); } TEST_F(WddmCommandStreamTest, givenFlushStampWhenWaitCalledThenWaitForSpecifiedMonitoredFence) { uint64_t stampToWait = 123; wddm->waitFromCpuResult.called = 0u; csr->waitForFlushStamp(stampToWait); EXPECT_EQ(1u, wddm->waitFromCpuResult.called); EXPECT_TRUE(wddm->waitFromCpuResult.success); EXPECT_EQ(stampToWait, wddm->waitFromCpuResult.uint64ParamPassed); } TEST_F(WddmCommandStreamTest, WhenFlushingThenFlushIsSubmitted) { GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; csr->flush(batchBuffer, csr->getResidencyAllocations()); EXPECT_EQ(1u, wddm->submitResult.called); EXPECT_TRUE(wddm->submitResult.success); EXPECT_EQ(csr->obtainCurrentFlushStamp(), static_cast(csr->getOsContext()).getResidencyController().getMonitoredFence().lastSubmittedFence); memoryManager->freeGraphicsMemory(commandBuffer); } TEST_F(WddmCommandStreamTest, givenPrintIndicesEnabledWhenFlushThenPrintIndices) { DebugManagerStateRestore restorer; DebugManager.flags.PrintDeviceAndEngineIdOnSubmission.set(true); GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; ::testing::internal::CaptureStdout(); csr->flush(batchBuffer, csr->getResidencyAllocations()); const std::string engineType = EngineHelpers::engineTypeToString(csr->getOsContext().getEngineType()); const std::string engineUsage = EngineHelpers::engineUsageToString(csr->getOsContext().getEngineUsage()); std::ostringstream expectedValue; expectedValue << SysCalls::getProcessId() << ": Submission to RootDevice Index: " << csr->getRootDeviceIndex() << ", Sub-Devices Mask: " << csr->getOsContext().getDeviceBitfield().to_ulong() << ", EngineId: " << csr->getOsContext().getEngineType() << " (" << engineType << ", " << engineUsage << ")\n"; auto osContextWin = static_cast(&csr->getOsContext()); expectedValue << SysCalls::getProcessId() << ": Wddm Submission with context handle " << osContextWin->getWddmContextHandle() << " and HwQueue handle " << osContextWin->getHwQueue().handle << "\n"; EXPECT_STREQ(::testing::internal::GetCapturedStdout().c_str(), expectedValue.str().c_str()); memoryManager->freeGraphicsMemory(commandBuffer); } TEST_F(WddmCommandStreamTest, givenGraphicsAllocationWithDifferentGpuAddressThenCpuAddressWhenSubmitIsCalledThenGpuAddressIsUsed) { GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); auto cpuAddress = commandBuffer->getUnderlyingBuffer(); uint64_t mockGpuAddres = 1337; commandBuffer->setCpuPtrAndGpuAddress(cpuAddress, mockGpuAddres); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; csr->flush(batchBuffer, csr->getResidencyAllocations()); EXPECT_EQ(mockGpuAddres, wddm->submitResult.commandBufferSubmitted); memoryManager->freeGraphicsMemory(commandBuffer); } TEST_F(WddmCommandStreamTest, GivenOffsetWhenFlushingThenFlushIsSubmittedCorrectly) { auto offset = 128u; GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize, mockDeviceBitfield}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), offset, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; csr->flush(batchBuffer, csr->getResidencyAllocations()); EXPECT_EQ(1u, wddm->submitResult.called); EXPECT_TRUE(wddm->submitResult.success); EXPECT_EQ(wddm->submitResult.commandBufferSubmitted, commandBuffer->getGpuAddress() + offset); memoryManager->freeGraphicsMemory(commandBuffer); } TEST_F(WddmCommandStreamTest, givenWdmmWhenSubmitIsCalledThenCoherencyRequiredFlagIsSetToFalse) { GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; csr->flush(batchBuffer, csr->getResidencyAllocations()); auto commandHeader = wddm->submitResult.commandHeaderSubmitted; COMMAND_BUFFER_HEADER *pHeader = reinterpret_cast(commandHeader); EXPECT_FALSE(pHeader->RequiresCoherency); memoryManager->freeGraphicsMemory(commandBuffer); } struct WddmPreemptionHeaderFixture { void SetUp() { executionEnvironment = getExecutionEnvironmentImpl(hwInfo, 1); executionEnvironment->incRefInternal(); wddm = static_cast(executionEnvironment->rootDeviceEnvironments[0]->osInterface->getDriverModel()->as()); } void TearDown() { executionEnvironment->decRefInternal(); } ExecutionEnvironment *executionEnvironment = nullptr; HardwareInfo *hwInfo = nullptr; WddmMock *wddm = nullptr; }; using WddmPreemptionHeaderTests = ::Test; TEST_F(WddmPreemptionHeaderTests, givenWddmCommandStreamReceiverWhenPreemptionIsOffWhenWorkloadIsSubmittedThenHeaderDoesntHavePreemptionFieldSet) { hwInfo->capabilityTable.defaultPreemptionMode = PreemptionMode::Disabled; executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(hwInfo); auto csr = std::make_unique>(*executionEnvironment, 0, 1); executionEnvironment->memoryManager.reset(new MemoryManagerCreate(false, false, *executionEnvironment)); csr->overrideDispatchPolicy(DispatchMode::ImmediateDispatch); OsContextWin osContext(*wddm, 0u, EngineDescriptorHelper::getDefaultDescriptor(HwHelper::get(hwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*hwInfo)[0], PreemptionHelper::getDefaultPreemptionMode(*hwInfo))); csr->setupContext(osContext); auto commandBuffer = executionEnvironment->memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; csr->flush(batchBuffer, csr->getResidencyAllocations()); auto commandHeader = wddm->submitResult.commandHeaderSubmitted; COMMAND_BUFFER_HEADER *pHeader = reinterpret_cast(commandHeader); EXPECT_FALSE(pHeader->NeedsMidBatchPreEmptionSupport); executionEnvironment->memoryManager->freeGraphicsMemory(commandBuffer); } TEST_F(WddmPreemptionHeaderTests, givenWddmCommandStreamReceiverWhenPreemptionIsOnWhenWorkloadIsSubmittedThenHeaderDoesHavePreemptionFieldSet) { hwInfo->capabilityTable.defaultPreemptionMode = PreemptionMode::MidThread; executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(hwInfo); auto csr = std::make_unique>(*executionEnvironment, 0, 1); executionEnvironment->memoryManager.reset(new MemoryManagerCreate(false, false, *executionEnvironment)); csr->overrideDispatchPolicy(DispatchMode::ImmediateDispatch); OsContextWin osContext(*wddm, 0u, EngineDescriptorHelper::getDefaultDescriptor(HwHelper::get(hwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*hwInfo)[0], PreemptionHelper::getDefaultPreemptionMode(*hwInfo))); csr->setupContext(osContext); auto commandBuffer = executionEnvironment->memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; csr->flush(batchBuffer, csr->getResidencyAllocations()); auto commandHeader = wddm->submitResult.commandHeaderSubmitted; COMMAND_BUFFER_HEADER *pHeader = reinterpret_cast(commandHeader); EXPECT_TRUE(pHeader->NeedsMidBatchPreEmptionSupport); executionEnvironment->memoryManager->freeGraphicsMemory(commandBuffer); } TEST_F(WddmPreemptionHeaderTests, givenDeviceSupportingPreemptionWhenCommandStreamReceiverIsCreatedThenHeaderContainsPreemptionFieldSet) { hwInfo->capabilityTable.defaultPreemptionMode = PreemptionMode::MidThread; executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(hwInfo); auto commandStreamReceiver = std::make_unique>(*executionEnvironment, 0, 1); auto commandHeader = commandStreamReceiver->commandBufferHeader; auto header = reinterpret_cast(commandHeader); EXPECT_TRUE(header->NeedsMidBatchPreEmptionSupport); } TEST_F(WddmPreemptionHeaderTests, givenDevicenotSupportingPreemptionWhenCommandStreamReceiverIsCreatedThenHeaderPreemptionFieldIsNotSet) { hwInfo->capabilityTable.defaultPreemptionMode = PreemptionMode::Disabled; executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(hwInfo); auto commandStreamReceiver = std::make_unique>(*executionEnvironment, 0, 1); auto commandHeader = commandStreamReceiver->commandBufferHeader; auto header = reinterpret_cast(commandHeader); EXPECT_FALSE(header->NeedsMidBatchPreEmptionSupport); } TEST_F(WddmCommandStreamTest, givenWdmmWhenSubmitIsCalledWhenEUCountWouldBeOddThenRequestEvenEuCount) { GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); wddm->getGtSysInfo()->EUCount = 9; wddm->getGtSysInfo()->SubSliceCount = 3; BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::LOW, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; csr->flush(batchBuffer, csr->getResidencyAllocations()); auto commandHeader = wddm->submitResult.commandHeaderSubmitted; COMMAND_BUFFER_HEADER *pHeader = reinterpret_cast(commandHeader); EXPECT_EQ(0, pHeader->UmdRequestedSliceState); EXPECT_EQ(0, pHeader->UmdRequestedSubsliceCount); EXPECT_EQ((wddm->getGtSysInfo()->EUCount / wddm->getGtSysInfo()->SubSliceCount) & (~1u), pHeader->UmdRequestedEUCount); memoryManager->freeGraphicsMemory(commandBuffer); } TEST_F(WddmCommandStreamTest, givenWdmmWhenSubmitIsCalledAndThrottleIsToLowThenSetHeaderFieldsProperly) { GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::LOW, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; csr->flush(batchBuffer, csr->getResidencyAllocations()); auto commandHeader = wddm->submitResult.commandHeaderSubmitted; COMMAND_BUFFER_HEADER *pHeader = reinterpret_cast(commandHeader); EXPECT_EQ(0, pHeader->UmdRequestedSliceState); EXPECT_EQ(0, pHeader->UmdRequestedSubsliceCount); EXPECT_EQ((wddm->getGtSysInfo()->EUCount / wddm->getGtSysInfo()->SubSliceCount) & (~1u), pHeader->UmdRequestedEUCount); memoryManager->freeGraphicsMemory(commandBuffer); } TEST_F(WddmCommandStreamTest, givenWdmmWhenSubmitIsCalledAndThrottleIsToMediumThenSetHeaderFieldsProperly) { GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; csr->flush(batchBuffer, csr->getResidencyAllocations()); auto commandHeader = wddm->submitResult.commandHeaderSubmitted; COMMAND_BUFFER_HEADER *pHeader = reinterpret_cast(commandHeader); EXPECT_EQ(0, pHeader->UmdRequestedSliceState); EXPECT_EQ(0, pHeader->UmdRequestedSubsliceCount); EXPECT_EQ((wddm->getGtSysInfo()->EUCount / wddm->getGtSysInfo()->SubSliceCount) & (~1u), pHeader->UmdRequestedEUCount); memoryManager->freeGraphicsMemory(commandBuffer); } TEST_F(WddmCommandStreamTest, givenWdmmWhenSubmitIsCalledAndThrottleIsToHighThenSetHeaderFieldsProperly) { GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::HIGH, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; csr->flush(batchBuffer, csr->getResidencyAllocations()); auto commandHeader = wddm->submitResult.commandHeaderSubmitted; COMMAND_BUFFER_HEADER *pHeader = reinterpret_cast(commandHeader); const uint32_t maxRequestedSubsliceCount = 7; EXPECT_EQ(0, pHeader->UmdRequestedSliceState); EXPECT_EQ((wddm->getGtSysInfo()->SubSliceCount <= maxRequestedSubsliceCount) ? wddm->getGtSysInfo()->SubSliceCount : 0, pHeader->UmdRequestedSubsliceCount); EXPECT_EQ((wddm->getGtSysInfo()->EUCount / wddm->getGtSysInfo()->SubSliceCount) & (~1u), pHeader->UmdRequestedEUCount); memoryManager->freeGraphicsMemory(commandBuffer); } TEST_F(WddmCommandStreamTest, givenWddmWithKmDafDisabledWhenFlushIsCalledWithAllocationsForResidencyThenNoneAllocationShouldBeKmDafLocked) { GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; auto linearStreamAllocation = memoryManager->allocateGraphicsMemoryWithProperties({csr->getRootDeviceIndex(), MemoryConstants::pageSize, AllocationType::LINEAR_STREAM, device->getDeviceBitfield()}); ASSERT_NE(nullptr, linearStreamAllocation); ResidencyContainer allocationsForResidency = {linearStreamAllocation}; EXPECT_FALSE(wddm->isKmDafEnabled()); csr->flush(batchBuffer, allocationsForResidency); EXPECT_EQ(0u, wddm->kmDafLockResult.called); EXPECT_EQ(0u, wddm->kmDafLockResult.lockedAllocations.size()); memoryManager->freeGraphicsMemory(commandBuffer); memoryManager->freeGraphicsMemory(linearStreamAllocation); } TEST_F(WddmCommandStreamTest, givenWddmWithKmDafEnabledWhenFlushIsCalledWithoutAllocationsForResidencyThenNoneAllocationShouldBeKmDafLocked) { GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; wddm->setKmDafEnabled(true); csr->flush(batchBuffer, csr->getResidencyAllocations()); EXPECT_EQ(0u, wddm->kmDafLockResult.called); EXPECT_EQ(0u, wddm->kmDafLockResult.lockedAllocations.size()); memoryManager->freeGraphicsMemory(commandBuffer); } TEST_F(WddmCommandStreamTest, givenWddmWithKmDafEnabledWhenFlushIsCalledWithResidencyAllocationsInMemoryManagerThenLinearStreamAllocationsShouldBeKmDafLocked) { GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; auto linearStreamAllocation = static_cast(memoryManager->allocateGraphicsMemoryWithProperties({csr->getRootDeviceIndex(), MemoryConstants::pageSize, AllocationType::LINEAR_STREAM, device->getDeviceBitfield()})); ASSERT_NE(nullptr, linearStreamAllocation); csr->makeResident(*linearStreamAllocation); EXPECT_EQ(1u, csr->getResidencyAllocations().size()); EXPECT_EQ(linearStreamAllocation, csr->getResidencyAllocations()[0]); wddm->setKmDafEnabled(true); csr->flush(batchBuffer, csr->getResidencyAllocations()); EXPECT_EQ(1u, wddm->kmDafLockResult.called); EXPECT_EQ(1u, wddm->kmDafLockResult.lockedAllocations.size()); EXPECT_EQ(linearStreamAllocation->getDefaultHandle(), wddm->kmDafLockResult.lockedAllocations[0]); memoryManager->freeGraphicsMemory(commandBuffer); memoryManager->freeGraphicsMemory(linearStreamAllocation); } TEST_F(WddmCommandStreamTest, givenWddmWithKmDafEnabledWhenFlushIsCalledWithAllocationsForResidencyThenLinearStreamAllocationsShouldBeKmDafLocked) { GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; auto linearStreamAllocation = static_cast(memoryManager->allocateGraphicsMemoryWithProperties({csr->getRootDeviceIndex(), MemoryConstants::pageSize, AllocationType::LINEAR_STREAM, device->getDeviceBitfield()})); ASSERT_NE(nullptr, linearStreamAllocation); ResidencyContainer allocationsForResidency = {linearStreamAllocation}; wddm->setKmDafEnabled(true); csr->flush(batchBuffer, allocationsForResidency); EXPECT_EQ(1u, wddm->kmDafLockResult.called); EXPECT_EQ(1u, wddm->kmDafLockResult.lockedAllocations.size()); EXPECT_EQ(linearStreamAllocation->getDefaultHandle(), wddm->kmDafLockResult.lockedAllocations[0]); memoryManager->freeGraphicsMemory(commandBuffer); memoryManager->freeGraphicsMemory(linearStreamAllocation); } TEST_F(WddmCommandStreamTest, givenWddmWithKmDafEnabledWhenFlushIsCalledWithAllocationsForResidencyThenFillPatternAllocationsShouldBeKmDafLocked) { GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; auto fillPatternAllocation = static_cast(memoryManager->allocateGraphicsMemoryWithProperties({csr->getRootDeviceIndex(), MemoryConstants::pageSize, AllocationType::FILL_PATTERN, device->getDeviceBitfield()})); ASSERT_NE(nullptr, fillPatternAllocation); ResidencyContainer allocationsForResidency = {fillPatternAllocation}; wddm->setKmDafEnabled(true); csr->flush(batchBuffer, allocationsForResidency); EXPECT_EQ(1u, wddm->kmDafLockResult.called); EXPECT_EQ(1u, wddm->kmDafLockResult.lockedAllocations.size()); EXPECT_EQ(fillPatternAllocation->getDefaultHandle(), wddm->kmDafLockResult.lockedAllocations[0]); memoryManager->freeGraphicsMemory(commandBuffer); memoryManager->freeGraphicsMemory(fillPatternAllocation); } TEST_F(WddmCommandStreamTest, givenWddmWithKmDafEnabledWhenFlushIsCalledWithAllocationsForResidencyThenCommandBufferAllocationsShouldBeKmDafLocked) { GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; auto commandBufferAllocation = static_cast(memoryManager->allocateGraphicsMemoryWithProperties({csr->getRootDeviceIndex(), MemoryConstants::pageSize, AllocationType::COMMAND_BUFFER, device->getDeviceBitfield()})); ASSERT_NE(nullptr, commandBufferAllocation); ResidencyContainer allocationsForResidency = {commandBufferAllocation}; wddm->setKmDafEnabled(true); csr->flush(batchBuffer, allocationsForResidency); EXPECT_EQ(1u, wddm->kmDafLockResult.called); EXPECT_EQ(1u, wddm->kmDafLockResult.lockedAllocations.size()); EXPECT_EQ(commandBufferAllocation->getDefaultHandle(), wddm->kmDafLockResult.lockedAllocations[0]); memoryManager->freeGraphicsMemory(commandBuffer); memoryManager->freeGraphicsMemory(commandBufferAllocation); } TEST_F(WddmCommandStreamTest, givenWddmWithKmDafEnabledWhenFlushIsCalledWithAllocationsForResidencyThenNonLinearStreamAllocationShouldNotBeKmDafLocked) { GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; auto nonLinearStreamAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, nonLinearStreamAllocation); ResidencyContainer allocationsForResidency = {nonLinearStreamAllocation}; wddm->setKmDafEnabled(true); csr->flush(batchBuffer, allocationsForResidency); EXPECT_EQ(0u, wddm->kmDafLockResult.called); EXPECT_EQ(0u, wddm->kmDafLockResult.lockedAllocations.size()); memoryManager->freeGraphicsMemory(commandBuffer); memoryManager->freeGraphicsMemory(nonLinearStreamAllocation); } TEST_F(WddmCommandStreamTest, WhenMakingResidentThenAllocationIsCorrectlySet) { GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); csr->makeResident(*commandBuffer); EXPECT_EQ(0u, wddm->makeResidentResult.called); EXPECT_EQ(1u, csr->getResidencyAllocations().size()); EXPECT_EQ(commandBuffer, csr->getResidencyAllocations()[0]); memoryManager->freeGraphicsMemory(commandBuffer); } TEST_F(WddmCommandStreamTest, WhenMakingNonResidentThenAllocationIsPlacedInEvictionAllocations) { GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); csr->makeResident(*cs.getGraphicsAllocation()); csr->makeNonResident(*commandBuffer); EXPECT_EQ(1u, csr->getEvictionAllocations().size()); memoryManager->freeGraphicsMemory(commandBuffer); } TEST_F(WddmCommandStreamTest, WhenProcessingEvictionThenAllAllocationsArePlacedOnTrimCandidateList) { GraphicsAllocation *allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); GraphicsAllocation *allocation2 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, allocation); ASSERT_NE(nullptr, allocation2); csr->getEvictionAllocations().push_back(allocation); csr->getEvictionAllocations().push_back(allocation2); EXPECT_EQ(2u, csr->getEvictionAllocations().size()); csr->processEviction(); EXPECT_EQ(2u, static_cast(csr->getOsContext()).getResidencyController().peekTrimCandidateList().size()); memoryManager->freeGraphicsMemory(allocation); memoryManager->freeGraphicsMemory(allocation2); } TEST_F(WddmCommandStreamTest, WhenProcesssingEvictionThenEvictionAllocationsListIsCleared) { GraphicsAllocation *allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, allocation); csr->getEvictionAllocations().push_back(allocation); EXPECT_EQ(1u, csr->getEvictionAllocations().size()); csr->processEviction(); EXPECT_EQ(0u, csr->getEvictionAllocations().size()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmCommandStreamTest, WhenMakingResidentAndNonResidentThenAllocationIsMovedCorrectly) { GraphicsAllocation *gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); Buffer *buffer = new AlignedBuffer(gfxAllocation); csr->makeResident(*buffer->getGraphicsAllocation(csr->getRootDeviceIndex())); EXPECT_EQ(0u, wddm->makeResidentResult.called); EXPECT_EQ(1u, csr->getResidencyAllocations().size()); EXPECT_EQ(gfxAllocation, csr->getResidencyAllocations()[0]); csr->makeNonResident(*buffer->getGraphicsAllocation(csr->getRootDeviceIndex())); EXPECT_EQ(gfxAllocation, csr->getEvictionAllocations()[0]); delete buffer; memoryManager->freeGraphicsMemory(gfxAllocation); } TEST_F(WddmCommandStreamTest, givenGraphicsAllocationWhenMakeResidentThenAllocationIsInResidencyContainer) { if (memoryManager->isLimitedGPU(0)) { GTEST_SKIP(); } void *hostPtr = reinterpret_cast(wddm->virtualAllocAddress + 0x1234); auto size = 1234u; auto gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, size}, hostPtr); ASSERT_NE(nullptr, gfxAllocation); csr->makeResidentHostPtrAllocation(gfxAllocation); EXPECT_EQ(1u, csr->getResidencyAllocations().size()); EXPECT_EQ(hostPtr, gfxAllocation->getUnderlyingBuffer()); memoryManager->freeGraphicsMemory(gfxAllocation); } TEST_F(WddmCommandStreamTest, givenHostPtrAllocationWhenMapFailsThenFragmentsAreClearedAndNullptrIsReturned) { if (memoryManager->isLimitedGPU(0)) { GTEST_SKIP(); } this->wddm->callBaseMapGpuVa = false; this->wddm->mapGpuVaStatus = false; void *hostPtr = reinterpret_cast(wddm->virtualAllocAddress + 0x1234); auto size = 1234u; wddm->mapGpuVirtualAddressResult.called = 0u; wddm->destroyAllocationResult.called = 0u; auto gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, size}, hostPtr); EXPECT_EQ(1u, wddm->mapGpuVirtualAddressResult.called); EXPECT_EQ(1u, wddm->destroyAllocationResult.called); EXPECT_EQ(nullptr, gfxAllocation); } TEST_F(WddmCommandStreamTest, givenAddressWithHighestBitSetWhenItIsMappedThenProperAddressIsPassed) { if (memoryManager->isLimitedGPU(0)) { GTEST_SKIP(); } uintptr_t address = 0xffff0000; void *faultyAddress = reinterpret_cast(address); wddm->mapGpuVirtualAddressResult.called = 0u; auto gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, MemoryConstants::pageSize}, faultyAddress); EXPECT_EQ(1u, wddm->mapGpuVirtualAddressResult.called); ASSERT_NE(nullptr, gfxAllocation); auto expectedAddress = castToUint64(faultyAddress); EXPECT_EQ(gfxAllocation->getGpuAddress(), expectedAddress); ASSERT_EQ(gfxAllocation->fragmentsStorage.fragmentCount, 1u); EXPECT_EQ(expectedAddress, static_cast(gfxAllocation->fragmentsStorage.fragmentStorageData[0].osHandleStorage)->gpuPtr); memoryManager->freeGraphicsMemory(gfxAllocation); } TEST_F(WddmCommandStreamTest, givenHostPtrWhenPtrBelowRestrictionThenCreateAllocationAndMakeResident) { if (memoryManager->isLimitedGPU(0)) { GTEST_SKIP(); } void *hostPtr = reinterpret_cast(memoryManager->getAlignedMallocRestrictions()->minAddress - 0x1000); auto size = 0x2000u; auto gfxAllocation = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, size}, hostPtr)); void *expectedReserve = reinterpret_cast(wddm->virtualAllocAddress); ASSERT_NE(nullptr, gfxAllocation); csr->makeResidentHostPtrAllocation(gfxAllocation); EXPECT_EQ(1u, csr->getResidencyAllocations().size()); EXPECT_EQ(hostPtr, gfxAllocation->getUnderlyingBuffer()); EXPECT_EQ(expectedReserve, gfxAllocation->getReservedAddressPtr()); memoryManager->freeGraphicsMemory(gfxAllocation); } TEST_F(WddmCommandStreamTest, givenTwoTemporaryAllocationsWhenCleanTemporaryAllocationListThenDestoryOnlyCompletedAllocations) { if (memoryManager->isLimitedGPU(0)) { GTEST_SKIP(); } void *host_ptr = (void *)0x1212341; void *host_ptr2 = (void *)0x2212341; auto size = 17262u; GraphicsAllocation *graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, size}, host_ptr); GraphicsAllocation *graphicsAllocation2 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, size}, host_ptr2); csr->getInternalAllocationStorage()->storeAllocation(std::unique_ptr(graphicsAllocation), TEMPORARY_ALLOCATION); csr->getInternalAllocationStorage()->storeAllocation(std::unique_ptr(graphicsAllocation2), TEMPORARY_ALLOCATION); graphicsAllocation->updateTaskCount(1, csr->getOsContext().getContextId()); graphicsAllocation2->updateTaskCount(100, csr->getOsContext().getContextId()); const auto firstWaitResult = csr->waitForTaskCountAndCleanAllocationList(1, TEMPORARY_ALLOCATION); EXPECT_EQ(WaitStatus::Ready, firstWaitResult); // graphicsAllocation2 still lives EXPECT_EQ(host_ptr2, graphicsAllocation2->getUnderlyingBuffer()); auto hostPtrManager = memoryManager->getHostPtrManager(); auto alignedPtr = alignDown(host_ptr, MemoryConstants::pageSize); auto alignedPtr2 = alignDown(host_ptr2, MemoryConstants::pageSize); auto fragment = hostPtrManager->getFragment({alignedPtr2, csr->getRootDeviceIndex()}); ASSERT_NE(nullptr, fragment); EXPECT_EQ(alignedPtr2, fragment->fragmentCpuPointer); auto fragment2 = hostPtrManager->getFragment({alignedPtr, csr->getRootDeviceIndex()}); EXPECT_EQ(nullptr, fragment2); // destroy remaining allocation const auto secondWaitResult = csr->waitForTaskCountAndCleanAllocationList(100, TEMPORARY_ALLOCATION); EXPECT_EQ(WaitStatus::Ready, secondWaitResult); } TEST_F(WddmCommandStreamMockGdiTest, WhenFlushingThenWddmMakeResidentIsCalledForResidencyAllocations) { GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); wddm->callBaseMakeResident = true; csr->makeResident(*commandBuffer); EXPECT_EQ(1u, csr->getResidencyAllocations().size()); gdi->getMakeResidentArg().NumAllocations = 0; BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; csr->flush(batchBuffer, csr->getResidencyAllocations()); EXPECT_NE(0u, gdi->getMakeResidentArg().NumAllocations); memoryManager->freeGraphicsMemory(commandBuffer); } TEST_F(WddmCommandStreamMockGdiTest, WhenMakingResidentThenResidencyAllocationsListIsCleared) { GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); csr->makeResident(*commandBuffer); EXPECT_EQ(1u, csr->getResidencyAllocations().size()); EXPECT_EQ(0u, csr->getEvictionAllocations().size()); EXPECT_EQ(trimListUnusedPosition, static_cast(commandBuffer)->getTrimCandidateListPosition(csr->getOsContext().getContextId())); csr->processResidency(csr->getResidencyAllocations(), 0u); csr->makeSurfacePackNonResident(csr->getResidencyAllocations()); EXPECT_EQ(0u, csr->getResidencyAllocations().size()); EXPECT_EQ(0u, csr->getEvictionAllocations().size()); EXPECT_EQ(0u, static_cast(commandBuffer)->getTrimCandidateListPosition(csr->getOsContext().getContextId())); memoryManager->freeGraphicsMemory(commandBuffer); } HWTEST_F(WddmCommandStreamMockGdiTest, givenRecordedCommandBufferWhenItIsSubmittedThenFlushTaskIsProperlyCalled) { //preemption allocation + sip allocation size_t csrSurfaceCount = 0; if (device->getPreemptionMode() == PreemptionMode::MidThread) { csrSurfaceCount = 2; } csrSurfaceCount += csr->globalFenceAllocation ? 1 : 0; csr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); csr->useNewResourceImplicitFlush = false; csr->useGpuIdleImplicitFlush = false; auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); csr->overrideSubmissionAggregator(mockedSubmissionsAggregator); auto commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); auto dshAlloc = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); auto iohAlloc = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); auto sshAlloc = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); auto tagAllocation = csr->getTagAllocation(); LinearStream cs(commandBuffer); IndirectHeap dsh(dshAlloc); IndirectHeap ioh(iohAlloc); IndirectHeap ssh(sshAlloc); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(device->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; dispatchFlags.requiresCoherency = true; csr->flushTask(cs, 0u, &dsh, &ioh, &ssh, 0u, dispatchFlags, *device); auto &cmdBuffers = mockedSubmissionsAggregator->peekCommandBuffers(); auto storedCommandBuffer = cmdBuffers.peekHead(); ResidencyContainer copyOfResidency = storedCommandBuffer->surfaces; copyOfResidency.push_back(storedCommandBuffer->batchBuffer.commandBufferAllocation); csr->flushBatchedSubmissions(); csrSurfaceCount += csr->clearColorAllocation ? 1 : 0; csrSurfaceCount -= device->getHardwareInfo().capabilityTable.supportsImages ? 0 : 1; EXPECT_TRUE(cmdBuffers.peekIsEmpty()); EXPECT_EQ(1u, wddm->submitResult.called); auto csrCommandStream = csr->commandStream.getGraphicsAllocation(); EXPECT_EQ(csrCommandStream->getGpuAddress(), wddm->submitResult.commandBufferSubmitted); EXPECT_TRUE(((COMMAND_BUFFER_HEADER *)wddm->submitResult.commandHeaderSubmitted)->RequiresCoherency); EXPECT_EQ(6u + csrSurfaceCount, wddm->makeResidentResult.handleCount); std::vector expectedHandles; expectedHandles.push_back(static_cast(tagAllocation)->getDefaultHandle()); expectedHandles.push_back(static_cast(commandBuffer)->getDefaultHandle()); expectedHandles.push_back(static_cast(dshAlloc)->getDefaultHandle()); expectedHandles.push_back(static_cast(iohAlloc)->getDefaultHandle()); expectedHandles.push_back(static_cast(sshAlloc)->getDefaultHandle()); expectedHandles.push_back(static_cast(csrCommandStream)->getDefaultHandle()); for (auto i = 0u; i < wddm->makeResidentResult.handleCount; i++) { auto handle = wddm->makeResidentResult.handlePack[i]; auto found = false; for (auto &expectedHandle : expectedHandles) { if (expectedHandle == handle) { found = true; } } EXPECT_TRUE(found); } EXPECT_NE(trimListUnusedPosition, static_cast(tagAllocation)->getTrimCandidateListPosition(csr->getOsContext().getContextId())); EXPECT_NE(trimListUnusedPosition, static_cast(commandBuffer)->getTrimCandidateListPosition(csr->getOsContext().getContextId())); EXPECT_EQ(trimListUnusedPosition, static_cast(dshAlloc)->getTrimCandidateListPosition(csr->getOsContext().getContextId())); EXPECT_EQ(trimListUnusedPosition, static_cast(iohAlloc)->getTrimCandidateListPosition(csr->getOsContext().getContextId())); EXPECT_NE(trimListUnusedPosition, static_cast(sshAlloc)->getTrimCandidateListPosition(csr->getOsContext().getContextId())); EXPECT_NE(trimListUnusedPosition, static_cast(csrCommandStream)->getTrimCandidateListPosition(csr->getOsContext().getContextId())); memoryManager->freeGraphicsMemory(dshAlloc); memoryManager->freeGraphicsMemory(iohAlloc); memoryManager->freeGraphicsMemory(sshAlloc); memoryManager->freeGraphicsMemory(commandBuffer); } using WddmSimpleTest = ::testing::Test; HWTEST_F(WddmSimpleTest, givenDefaultWddmCsrWhenItIsCreatedThenBatchingIsTurnedOn) { DebugManager.flags.CsrDispatchMode.set(0); HardwareInfo *hwInfo = nullptr; ExecutionEnvironment *executionEnvironment = getExecutionEnvironmentImpl(hwInfo, 1); std::unique_ptr device(Device::create(executionEnvironment, 0u)); { std::unique_ptr> mockCsr(new MockWddmCsr(*executionEnvironment, 0, 1)); EXPECT_EQ(DispatchMode::BatchedDispatch, mockCsr->dispatchMode); } } HWTEST_F(WddmDefaultTest, givenFtrWddmHwQueuesFlagWhenCreatingCsrThenPickWddmVersionBasingOnFtrFlag) { auto wddm = Wddm::createWddm(nullptr, *pDevice->executionEnvironment->rootDeviceEnvironments[0].get()); pDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface = std::make_unique(); pDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel(std::unique_ptr(wddm)); pDevice->executionEnvironment->rootDeviceEnvironments[0]->memoryOperationsInterface = std::make_unique(wddm); WddmCommandStreamReceiver wddmCsr(*pDevice->executionEnvironment, 0, 1); auto wddmFromCsr = wddmCsr.peekWddm(); EXPECT_NE(nullptr, wddmFromCsr); } struct WddmCsrCompressionTests : ::testing::Test { void setCompressionEnabled(bool enableForBuffer, bool enableForImages) { RuntimeCapabilityTable capabilityTable = defaultHwInfo->capabilityTable; capabilityTable.ftrRenderCompressedBuffers = enableForBuffer; capabilityTable.ftrRenderCompressedImages = enableForImages; hwInfo->capabilityTable = capabilityTable; } HardwareInfo *hwInfo = nullptr; WddmMock *myMockWddm; }; struct WddmCsrCompressionParameterizedTest : WddmCsrCompressionTests, ::testing::WithParamInterface { void SetUp() override { compressionEnabled = GetParam(); } bool compressionEnabled; }; HWTEST_P(WddmCsrCompressionParameterizedTest, givenEnabledCompressionWhenInitializedThenCreatePagetableMngr) { uint32_t index = 1u; ExecutionEnvironment *executionEnvironment = getExecutionEnvironmentImpl(hwInfo, 2); std::unique_ptr device(Device::create(executionEnvironment, 1u)); setCompressionEnabled(compressionEnabled, !compressionEnabled); myMockWddm = static_cast(executionEnvironment->rootDeviceEnvironments[0]->osInterface->getDriverModel()->as()); MockWddmCsr mockWddmCsr(*executionEnvironment, index, 1); mockWddmCsr.createPageTableManager(); ASSERT_NE(nullptr, mockWddmCsr.pageTableManager.get()); auto mockMngr = reinterpret_cast(mockWddmCsr.pageTableManager.get()); EXPECT_EQ(1u, mockMngr->setCsrHanleCalled); EXPECT_EQ(&mockWddmCsr, mockMngr->passedCsrHandle); GMM_TRANSLATIONTABLE_CALLBACKS expectedTTCallbacks = {}; unsigned int expectedFlags = TT_TYPE::AUXTT; expectedTTCallbacks.pfWriteL3Adr = TTCallbacks::writeL3Address; EXPECT_TRUE(memcmp(&expectedTTCallbacks, &mockMngr->translationTableCb, sizeof(GMM_TRANSLATIONTABLE_CALLBACKS)) == 0); EXPECT_TRUE(memcmp(&expectedFlags, &mockMngr->translationTableFlags, sizeof(unsigned int)) == 0); } HWTEST_F(WddmCsrCompressionTests, givenDisabledCompressionWhenInitializedThenDontCreatePagetableMngr) { ExecutionEnvironment *executionEnvironment = getExecutionEnvironmentImpl(hwInfo, 2); std::unique_ptr device(Device::create(executionEnvironment, 1u)); setCompressionEnabled(false, false); myMockWddm = static_cast(executionEnvironment->rootDeviceEnvironments[0]->osInterface->getDriverModel()->as()); MockWddmCsr mockWddmCsr(*executionEnvironment, 1, device->getDeviceBitfield()); for (auto engine : executionEnvironment->memoryManager.get()->getRegisteredEngines()) { EXPECT_EQ(nullptr, engine.commandStreamReceiver->pageTableManager.get()); } } INSTANTIATE_TEST_CASE_P( WddmCsrCompressionParameterizedTestCreate, WddmCsrCompressionParameterizedTest, ::testing::Bool()); HWTEST_F(WddmCsrCompressionTests, givenDisabledCompressionWhenFlushingThenDontInitTranslationTable) { ExecutionEnvironment *executionEnvironment = getExecutionEnvironmentImpl(hwInfo, 2); setCompressionEnabled(false, false); myMockWddm = static_cast(executionEnvironment->rootDeviceEnvironments[0]->osInterface->getDriverModel()->as()); executionEnvironment->memoryManager.reset(new WddmMemoryManager(*executionEnvironment)); std::unique_ptr device(Device::create(executionEnvironment, 1u)); auto mockWddmCsr = new MockWddmCsr(*executionEnvironment, 1, device->getDeviceBitfield()); mockWddmCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); device->resetCommandStreamReceiver(mockWddmCsr); auto memoryManager = executionEnvironment->memoryManager.get(); for (auto engine : memoryManager->getRegisteredEngines()) { EXPECT_EQ(nullptr, engine.commandStreamReceiver->pageTableManager.get()); } auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{mockWddmCsr->getRootDeviceIndex(), MemoryConstants::pageSize}); IndirectHeap cs(graphicsAllocation); for (auto engine : memoryManager->getRegisteredEngines()) { EXPECT_EQ(nullptr, engine.commandStreamReceiver->pageTableManager.get()); } DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); mockWddmCsr->flushTask(cs, 0u, &cs, &cs, &cs, 0u, dispatchFlags, *device); for (auto engine : memoryManager->getRegisteredEngines()) { EXPECT_EQ(nullptr, engine.commandStreamReceiver->pageTableManager.get()); } mockWddmCsr->flushBatchedSubmissions(); memoryManager->freeGraphicsMemory(graphicsAllocation); } template struct MockWddmDrmDirectSubmissionDispatchCommandBuffer : public MockWddmDirectSubmission> { MockWddmDrmDirectSubmissionDispatchCommandBuffer(Device &device, OsContext &osContext) : MockWddmDirectSubmission>(device, osContext) { } bool dispatchCommandBuffer(BatchBuffer &batchBuffer, FlushStampTracker &flushStamp) override { dispatchCommandBufferCalled++; return false; } uint32_t dispatchCommandBufferCalled = 0; }; TEST_F(WddmCommandStreamMockGdiTest, givenDirectSubmissionFailsThenFlushReturnsError) { using MockSubmission = MockWddmDrmDirectSubmissionDispatchCommandBuffer; DebugManager.flags.EnableDirectSubmission.set(1); auto hwInfo = device->getRootDeviceEnvironment().getMutableHardwareInfo(); hwInfo->capabilityTable.directSubmissionEngines.data[aub_stream::ENGINE_RCS].engineSupported = true; std::unique_ptr osContext; osContext.reset(OsContext::create(device->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), 0, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::Regular}, PreemptionMode::ThreadGroup, device->getDeviceBitfield()))); osContext->setDefaultContext(true); csr->callParentInitDirectSubmission = false; bool ret = csr->initDirectSubmission(*device.get(), *osContext.get()); EXPECT_TRUE(ret); EXPECT_TRUE(csr->isDirectSubmissionEnabled()); EXPECT_FALSE(csr->isBlitterDirectSubmissionEnabled()); GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, commandBuffer->getUnderlyingBuffer(), false}; csr->directSubmission = std::make_unique(*device.get(), *osContext.get()); auto res = csr->flush(batchBuffer, csr->getResidencyAllocations()); EXPECT_EQ(NEO::SubmissionStatus::FAILED, res); auto directSubmission = reinterpret_cast(csr->directSubmission.get()); EXPECT_GT(directSubmission->dispatchCommandBufferCalled, 0u); memoryManager->freeGraphicsMemory(commandBuffer); } TEST_F(WddmCommandStreamMockGdiTest, givenDirectSubmissionEnabledOnRcsWhenFlushingCommandBufferThenExpectDirectSubmissionUsed) { using Dispatcher = RenderDispatcher; using MockSubmission = MockWddmDirectSubmission; DebugManager.flags.EnableDirectSubmission.set(1); auto hwInfo = device->getRootDeviceEnvironment().getMutableHardwareInfo(); hwInfo->capabilityTable.directSubmissionEngines.data[aub_stream::ENGINE_RCS].engineSupported = true; std::unique_ptr osContext; osContext.reset(OsContext::create(device->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), 0, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::Regular}, PreemptionMode::ThreadGroup, device->getDeviceBitfield()))); osContext->setDefaultContext(true); csr->callParentInitDirectSubmission = false; bool ret = csr->initDirectSubmission(*device.get(), *osContext.get()); EXPECT_TRUE(ret); EXPECT_TRUE(csr->isDirectSubmissionEnabled()); EXPECT_FALSE(csr->isBlitterDirectSubmissionEnabled()); GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, commandBuffer->getUnderlyingBuffer(), false}; csr->flush(batchBuffer, csr->getResidencyAllocations()); auto directSubmission = reinterpret_cast(csr->directSubmission.get()); EXPECT_TRUE(directSubmission->ringStart); size_t actualDispatchSize = directSubmission->ringCommandStream.getUsed(); size_t expectedSize = directSubmission->getSizeSemaphoreSection() + Dispatcher::getSizePreemption() + directSubmission->getSizeDispatch(); EXPECT_EQ(expectedSize, actualDispatchSize); memoryManager->freeGraphicsMemory(commandBuffer); } TEST_F(WddmCommandStreamMockGdiTest, givenDirectSubmissionEnabledOnBcsWhenFlushingCommandBufferThenExpectDirectSubmissionUsed) { using Dispatcher = BlitterDispatcher; using MockSubmission = MockWddmDirectSubmission; DebugManager.flags.EnableDirectSubmission.set(1); auto hwInfo = device->getRootDeviceEnvironment().getMutableHardwareInfo(); hwInfo->capabilityTable.directSubmissionEngines.data[aub_stream::ENGINE_BCS].engineSupported = true; std::unique_ptr osContext; osContext.reset(OsContext::create(device->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), 0, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::Regular}, PreemptionMode::ThreadGroup, device->getDeviceBitfield()))); csr->callParentInitDirectSubmission = false; csr->initBlitterDirectSubmission = true; bool ret = csr->initDirectSubmission(*device.get(), *osContext.get()); EXPECT_TRUE(ret); EXPECT_FALSE(csr->isDirectSubmissionEnabled()); EXPECT_TRUE(csr->isBlitterDirectSubmissionEnabled()); GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, commandBuffer->getUnderlyingBuffer(), false}; csr->flush(batchBuffer, csr->getResidencyAllocations()); auto directSubmission = reinterpret_cast(csr->blitterDirectSubmission.get()); EXPECT_TRUE(directSubmission->ringStart); size_t actualDispatchSize = directSubmission->ringCommandStream.getUsed(); size_t expectedSize = directSubmission->getSizeSemaphoreSection() + Dispatcher::getSizePreemption() + directSubmission->getSizeDispatch(); EXPECT_EQ(expectedSize, actualDispatchSize); memoryManager->freeGraphicsMemory(commandBuffer); } TEST_F(WddmCommandStreamTest, givenResidencyLoggingAvailableWhenFlushingCommandBufferThenNotifiesResidencyLogger) { if (!NEO::wddmResidencyLoggingAvailable) { GTEST_SKIP(); } GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false}; DebugManagerStateRestore restorer; DebugManager.flags.WddmResidencyLogger.set(1); NEO::IoFunctions::mockFopenCalled = 0u; NEO::IoFunctions::mockVfptrinfCalled = 0u; NEO::IoFunctions::mockFcloseCalled = 0u; wddm->createPagingFenceLogger(); wddm->callBaseMakeResident = true; EXPECT_EQ(1u, NEO::IoFunctions::mockFopenCalled); EXPECT_EQ(1u, NEO::IoFunctions::mockVfptrinfCalled); EXPECT_EQ(0u, NEO::IoFunctions::mockFcloseCalled); csr->flush(batchBuffer, csr->getResidencyAllocations()); EXPECT_EQ(1u, NEO::IoFunctions::mockFopenCalled); EXPECT_EQ(3u, NEO::IoFunctions::mockVfptrinfCalled); EXPECT_EQ(0u, NEO::IoFunctions::mockFcloseCalled); memoryManager->freeGraphicsMemory(commandBuffer); } compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/windows/device_os_tests.cpp000066400000000000000000000062201422164147700316500ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/source/helpers/get_info.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "gtest/gtest.h" using namespace ::testing; namespace NEO { TEST(DeviceOsTest, GivenDefaultClDeviceWhenCheckingForOsSpecificExtensionsThenCorrectExtensionsAreSet) { auto hwInfo = defaultHwInfo.get(); auto pDevice = MockDevice::createWithNewExecutionEnvironment(hwInfo); auto pClDevice = new ClDevice{*pDevice, platform()}; std::string extensionString(pClDevice->getDeviceInfo().deviceExtensions); EXPECT_FALSE(hasSubstr(extensionString, std::string("cl_intel_va_api_media_sharing "))); EXPECT_TRUE(hasSubstr(extensionString, std::string("cl_intel_dx9_media_sharing "))); EXPECT_TRUE(hasSubstr(extensionString, std::string("cl_khr_dx9_media_sharing "))); EXPECT_TRUE(hasSubstr(extensionString, std::string("cl_khr_d3d10_sharing "))); EXPECT_TRUE(hasSubstr(extensionString, std::string("cl_khr_d3d11_sharing "))); EXPECT_TRUE(hasSubstr(extensionString, std::string("cl_intel_d3d11_nv12_media_sharing "))); EXPECT_TRUE(hasSubstr(extensionString, std::string("cl_intel_simultaneous_sharing "))); delete pClDevice; } TEST(DeviceOsTest, WhenCreatingDeviceThenSimultaneousInteropsIsSupported) { auto pDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); std::vector expected = {CL_GL_CONTEXT_KHR, CL_WGL_HDC_KHR, CL_CONTEXT_ADAPTER_D3D9_KHR, CL_CONTEXT_D3D9_DEVICE_INTEL, CL_CONTEXT_ADAPTER_D3D9EX_KHR, CL_CONTEXT_D3D9EX_DEVICE_INTEL, CL_CONTEXT_ADAPTER_DXVA_KHR, CL_CONTEXT_DXVA_DEVICE_INTEL, CL_CONTEXT_D3D10_DEVICE_KHR, CL_CONTEXT_D3D11_DEVICE_KHR, 0}; EXPECT_TRUE(pDevice->simultaneousInterops == expected); } TEST(DeviceOsTest, GivenFailedDeviceWhenCreatingWithNewExecutionEnvironmentThenNullIsReturned) { auto hwInfo = defaultHwInfo.get(); auto pDevice = MockDevice::createWithNewExecutionEnvironment(hwInfo); EXPECT_EQ(nullptr, pDevice); } TEST(DeviceOsTest, GivenMidThreadPreemptionAndFailedDeviceWhenCreatingDeviceThenNullIsReturned) { DebugManagerStateRestore dbgRestore; DebugManager.flags.ForcePreemptionMode.set(static_cast(PreemptionMode::MidThread)); auto pDevice = MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()); EXPECT_EQ(nullptr, pDevice); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/windows/driver_info_tests.cpp000066400000000000000000000301051422164147700322150ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/preemption.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/memory_manager/os_agnostic_memory_manager.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/os_interface/windows/debug_registry_reader.h" #include "shared/source/os_interface/windows/driver_info_windows.h" #include "shared/test/common/helpers/ult_hw_config.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/mocks/mock_wddm.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/os_interface/windows/registry_reader_tests.h" #include "gtest/gtest.h" #include namespace NEO { namespace SysCalls { extern const wchar_t *currentLibraryPath; } extern CommandStreamReceiverCreateFunc commandStreamReceiverFactory[2 * IGFX_MAX_CORE]; CommandStreamReceiver *createMockCommandStreamReceiver(bool withAubDump, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield); class DriverInfoDeviceTest : public ::testing::Test { public: void SetUp() { hwInfo = defaultHwInfo.get(); commandStreamReceiverCreateFunc = commandStreamReceiverFactory[hwInfo->platform.eRenderCoreFamily]; commandStreamReceiverFactory[hwInfo->platform.eRenderCoreFamily] = createMockCommandStreamReceiver; } void TearDown() { commandStreamReceiverFactory[hwInfo->platform.eRenderCoreFamily] = commandStreamReceiverCreateFunc; } CommandStreamReceiverCreateFunc commandStreamReceiverCreateFunc; const HardwareInfo *hwInfo; }; CommandStreamReceiver *createMockCommandStreamReceiver(bool withAubDump, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield) { auto csr = new MockCommandStreamReceiver(executionEnvironment, rootDeviceIndex, deviceBitfield); if (!executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->osInterface) { auto wddm = new WddmMock(*executionEnvironment.rootDeviceEnvironments[0]); wddm->init(); } EXPECT_NE(nullptr, executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->osInterface.get()); return csr; } class MockDriverInfoWindows : public DriverInfoWindows { public: using DriverInfoWindows::DriverInfoWindows; using DriverInfoWindows::path; using DriverInfoWindows::registryReader; const char *getRegistryReaderRegKey() { return reader->getRegKey(); } TestedRegistryReader *reader = nullptr; static MockDriverInfoWindows *create(std::string path) { auto result = new MockDriverInfoWindows("", PhysicalDevicePciBusInfo(PhysicalDevicePciBusInfo::InvalidValue, PhysicalDevicePciBusInfo::InvalidValue, PhysicalDevicePciBusInfo::InvalidValue, PhysicalDevicePciBusInfo::InvalidValue)); result->reader = new TestedRegistryReader(path); result->registryReader.reset(result->reader); return result; }; }; TEST_F(DriverInfoDeviceTest, GivenDeviceCreatedWhenCorrectOSInterfaceThenCreateDriverInfo) { VariableBackup backup(&ultHwConfig); ultHwConfig.useHwCsr = true; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(hwInfo)); EXPECT_NE(nullptr, device->driverInfo.get()); } TEST_F(DriverInfoDeviceTest, GivenDeviceCreatedWithoutCorrectOSInterfaceThenDontCreateDriverInfo) { VariableBackup backup(&ultHwConfig); ultHwConfig.useHwCsr = false; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(hwInfo)); EXPECT_EQ(nullptr, device->driverInfo.get()); } class MockRegistryReader : public SettingsReader { public: std::string nameString; std::string versionString; std::string getSetting(const char *settingName, const std::string &value) override { std::string key(settingName); if (key == "HardwareInformation.AdapterString") { properNameKey = true; } else if (key == "DriverVersion") { properVersionKey = true; } else if (key == "UserModeDriverName") { properMediaSharingExtensions = true; using64bit = true; return returnString; } else if (key == "UserModeDriverNameWOW") { properMediaSharingExtensions = true; return returnString; } else if (key == "DriverStorePathForComputeRuntime") { return driverStorePath; } else if (key == "OpenCLDriverName") { return openCLDriverName; } return value; } bool getSetting(const char *settingName, bool defaultValue) override { return defaultValue; }; int64_t getSetting(const char *settingName, int64_t defaultValue) override { return defaultValue; }; int32_t getSetting(const char *settingName, int32_t defaultValue) override { return defaultValue; }; const char *appSpecificLocation(const std::string &name) override { return name.c_str(); }; bool properNameKey = false; bool properVersionKey = false; std::string driverStorePath = "driverStore\\0x8086"; std::string openCLDriverName = "igdrcl.dll"; bool properMediaSharingExtensions = false; bool using64bit = false; std::string returnString = ""; }; struct DriverInfoWindowsTest : public ::testing::Test { void SetUp() override { DriverInfoWindows::createRegistryReaderFunc = [](const std::string &) -> std::unique_ptr { return std::make_unique(); }; driverInfo = std::make_unique("", PhysicalDevicePciBusInfo(PhysicalDevicePciBusInfo::InvalidValue, PhysicalDevicePciBusInfo::InvalidValue, PhysicalDevicePciBusInfo::InvalidValue, PhysicalDevicePciBusInfo::InvalidValue)); } VariableBackup createFuncBackup{&DriverInfoWindows::createRegistryReaderFunc}; std::unique_ptr driverInfo; }; TEST_F(DriverInfoWindowsTest, GivenDriverInfoWhenThenReturnNonNullptr) { auto registryReaderMock = static_cast(driverInfo->registryReader.get()); std::string defaultName = "defaultName"; auto name = driverInfo->getDeviceName(defaultName); EXPECT_STREQ(defaultName.c_str(), name.c_str()); EXPECT_TRUE(registryReaderMock->properNameKey); std::string defaultVersion = "defaultVersion"; auto driverVersion = driverInfo->getVersion(defaultVersion); EXPECT_STREQ(defaultVersion.c_str(), driverVersion.c_str()); EXPECT_TRUE(registryReaderMock->properVersionKey); }; TEST(DriverInfo, givenDriverInfoWhenGetStringReturnNotMeaningEmptyStringThenEnableSharingSupport) { MockDriverInfoWindows driverInfo("", PhysicalDevicePciBusInfo(PhysicalDevicePciBusInfo::InvalidValue, PhysicalDevicePciBusInfo::InvalidValue, PhysicalDevicePciBusInfo::InvalidValue, PhysicalDevicePciBusInfo::InvalidValue)); MockRegistryReader *registryReaderMock = new MockRegistryReader(); driverInfo.registryReader.reset(registryReaderMock); auto enable = driverInfo.getMediaSharingSupport(); EXPECT_TRUE(enable); EXPECT_EQ(is64bit, registryReaderMock->using64bit); EXPECT_TRUE(registryReaderMock->properMediaSharingExtensions); }; TEST(DriverInfo, givenDriverInfoWhenGetStringReturnMeaningEmptyStringThenDisableSharingSupport) { MockDriverInfoWindows driverInfo("", PhysicalDevicePciBusInfo(PhysicalDevicePciBusInfo::InvalidValue, PhysicalDevicePciBusInfo::InvalidValue, PhysicalDevicePciBusInfo::InvalidValue, PhysicalDevicePciBusInfo::InvalidValue)); MockRegistryReader *registryReaderMock = new MockRegistryReader(); registryReaderMock->returnString = "<>"; driverInfo.registryReader.reset(registryReaderMock); auto enable = driverInfo.getMediaSharingSupport(); EXPECT_FALSE(enable); EXPECT_EQ(is64bit, registryReaderMock->using64bit); EXPECT_TRUE(registryReaderMock->properMediaSharingExtensions); }; TEST(DriverInfo, givenFullPathToRegistryWhenCreatingDriverInfoWindowsThenTheRegistryPathIsTrimmed) { std::string registryPath = "Path\\In\\Registry"; std::string fullRegistryPath = "\\REGISTRY\\MACHINE\\" + registryPath; std::string expectedTrimmedRegistryPath = registryPath; MockDriverInfoWindows driverInfo(std::move(fullRegistryPath), PhysicalDevicePciBusInfo(PhysicalDevicePciBusInfo::InvalidValue, PhysicalDevicePciBusInfo::InvalidValue, PhysicalDevicePciBusInfo::InvalidValue, PhysicalDevicePciBusInfo::InvalidValue)); EXPECT_STREQ(expectedTrimmedRegistryPath.c_str(), driverInfo.path.c_str()); }; TEST(DriverInfo, givenInitializedOsInterfaceWhenCreateDriverInfoThenReturnDriverInfoWindowsNotNullptr) { MockExecutionEnvironment executionEnvironment; RootDeviceEnvironment rootDeviceEnvironment(executionEnvironment); std::unique_ptr osInterface(new OSInterface()); osInterface->setDriverModel(std::unique_ptr(Wddm::createWddm(nullptr, rootDeviceEnvironment))); EXPECT_NE(nullptr, osInterface->getDriverModel()->as()); std::unique_ptr driverInfo(DriverInfo::create(nullptr, osInterface.get())); EXPECT_NE(nullptr, driverInfo); }; TEST(DriverInfo, givenNotInitializedOsInterfaceWhenCreateDriverInfoThenReturnDriverInfoWindowsNullptr) { std::unique_ptr osInterface; std::unique_ptr driverInfo(DriverInfo::create(nullptr, osInterface.get())); EXPECT_EQ(nullptr, driverInfo); }; TEST(DriverInfo, givenInitializedOsInterfaceWhenCreateDriverInfoWindowsThenSetRegistryReaderWithExpectRegKey) { std::string path = ""; std::unique_ptr driverInfo(MockDriverInfoWindows::create(path)); EXPECT_STREQ(driverInfo->getRegistryReaderRegKey(), driverInfo->reader->getRegKey()); }; TEST_F(DriverInfoWindowsTest, whenThereIsNoOpenCLDriverNamePointedByDriverInfoThenItIsNotCompatible) { VariableBackup currentLibraryPathBackup(&SysCalls::currentLibraryPath); currentLibraryPathBackup = L"driverStore\\0x8086\\myLib.dll"; static_cast(driverInfo->registryReader.get())->openCLDriverName = ""; EXPECT_FALSE(driverInfo->isCompatibleDriverStore()); } TEST_F(DriverInfoWindowsTest, whenCurrentLibraryIsLoadedFromDriverStorePointedByDriverInfoThenItIsCompatible) { VariableBackup currentLibraryPathBackup(&SysCalls::currentLibraryPath); currentLibraryPathBackup = L"driverStore\\0x8086\\myLib.dll"; EXPECT_TRUE(driverInfo->isCompatibleDriverStore()); } TEST_F(DriverInfoWindowsTest, whenCurrentLibraryIsLoadedFromDifferentDriverStoreThanPointedByDriverInfoThenItIsNotCompatible) { VariableBackup currentLibraryPathBackup(&SysCalls::currentLibraryPath); currentLibraryPathBackup = L"driverStore\\different_driverStore\\myLib.dll"; EXPECT_FALSE(driverInfo->isCompatibleDriverStore()); } TEST_F(DriverInfoWindowsTest, givenDriverInfoWindowsWhenGetImageSupportIsCalledThenReturnTrue) { MockExecutionEnvironment executionEnvironment; RootDeviceEnvironment rootDeviceEnvironment(executionEnvironment); std::unique_ptr osInterface(new OSInterface()); osInterface->setDriverModel(std::unique_ptr(Wddm::createWddm(nullptr, rootDeviceEnvironment))); EXPECT_NE(nullptr, osInterface->getDriverModel()->as()); std::unique_ptr driverInfo(DriverInfo::create(nullptr, osInterface.get())); EXPECT_TRUE(driverInfo->getImageSupport()); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/windows/file_logger_win_tests.cpp000066400000000000000000000101601422164147700330410ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/test/common/fixtures/mock_execution_environment_gmm_fixture.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/os_interface/windows/mock_wddm_allocation.h" #include "opencl/test/unit_test/utilities/file_logger_tests.h" using namespace NEO; using FileLoggerTests = Test; TEST_F(FileLoggerTests, GivenLogAllocationMemoryPoolFlagThenLogsCorrectInfo) { std::string testFile = "testfile"; DebugVariables flags; flags.LogAllocationMemoryPool.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); // Log file not created bool logFileCreated = fileExists(fileLogger.getLogFileName()); EXPECT_FALSE(logFileCreated); MockWddmAllocation allocation(getGmmClientContext()); allocation.handle = 4; allocation.setAllocationType(AllocationType::BUFFER); allocation.memoryPool = MemoryPool::System64KBPages; allocation.getDefaultGmm()->resourceParams.Flags.Info.NonLocalOnly = 0; allocation.setGpuAddress(0x12345); fileLogger.logAllocation(&allocation); std::thread::id thisThread = std::this_thread::get_id(); std::stringstream threadIDCheck; threadIDCheck << " ThreadID: " << thisThread; std::stringstream memoryPoolCheck; memoryPoolCheck << " MemoryPool: " << allocation.getMemoryPool(); std::stringstream gpuAddressCheck; gpuAddressCheck << " GPU address: 0x" << std::hex << allocation.getGpuAddress(); std::stringstream rootDeviceIndexCheck; rootDeviceIndexCheck << " Root device index: " << allocation.getRootDeviceIndex(); if (fileLogger.wasFileCreated(fileLogger.getLogFileName())) { auto str = fileLogger.getFileString(fileLogger.getLogFileName()); EXPECT_TRUE(str.find(threadIDCheck.str()) != std::string::npos); EXPECT_TRUE(str.find("Handle: 4") != std::string::npos); EXPECT_TRUE(str.find(memoryPoolCheck.str()) != std::string::npos); EXPECT_TRUE(str.find(gpuAddressCheck.str()) != std::string::npos); EXPECT_TRUE(str.find(rootDeviceIndexCheck.str()) != std::string::npos); EXPECT_TRUE(str.find("AllocationType: BUFFER") != std::string::npos); } } TEST_F(FileLoggerTests, GivenLogAllocationMemoryPoolFlagSetFalseThenAllocationIsNotLogged) { std::string testFile = "testfile"; DebugVariables flags; flags.LogAllocationMemoryPool.set(false); FullyEnabledFileLogger fileLogger(testFile, flags); // Log file not created bool logFileCreated = fileExists(fileLogger.getLogFileName()); EXPECT_FALSE(logFileCreated); auto executionEnvironment = std::unique_ptr(MockDevice::prepareExecutionEnvironment(defaultHwInfo.get(), 0u)); executionEnvironment->rootDeviceEnvironments[0]->initGmm(); MockWddmAllocation allocation(executionEnvironment->rootDeviceEnvironments[0]->getGmmClientContext()); allocation.handle = 4; allocation.setAllocationType(AllocationType::BUFFER); allocation.memoryPool = MemoryPool::System64KBPages; allocation.getDefaultGmm()->resourceParams.Flags.Info.NonLocalOnly = 0; fileLogger.logAllocation(&allocation); std::thread::id thisThread = std::this_thread::get_id(); std::stringstream threadIDCheck; threadIDCheck << " ThreadID: " << thisThread; std::stringstream memoryPoolCheck; memoryPoolCheck << " MemoryPool: " << allocation.getMemoryPool(); if (fileLogger.wasFileCreated(fileLogger.getLogFileName())) { auto str = fileLogger.getFileString(fileLogger.getLogFileName()); EXPECT_FALSE(str.find(threadIDCheck.str()) != std::string::npos); EXPECT_FALSE(str.find("Handle: 4") != std::string::npos); EXPECT_FALSE(str.find(memoryPoolCheck.str()) != std::string::npos); EXPECT_FALSE(str.find("AllocationType: BUFFER") != std::string::npos); EXPECT_FALSE(str.find("NonLocalOnly: 0") != std::string::npos); } } compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/windows/gl/000077500000000000000000000000001422164147700263645ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/windows/gl/CMakeLists.txt000066400000000000000000000005341422164147700311260ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_os_interface_windows_gl ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_gl_context_info_khr_tests.cpp ) if(WIN32) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_os_interface_windows_gl}) endif() cl_get_gl_context_info_khr_tests.cpp000066400000000000000000000200421422164147700355730ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/windows/gl/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_info.h" #include "shared/source/os_interface/device_factory.h" #include "shared/source/os_interface/os_interface.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_wddm.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/test/unit_test/api/cl_api_tests.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/sharings/gl/gl_dll_helper.h" using namespace NEO; using clGetGLContextInfoKhrTest = api_tests; namespace ULT { TEST_F(clGetGLContextInfoKhrTest, GivenDefaultPlatformWhenGettingGlContextThenSuccessIsReturned) { VariableBackup backup(&ultHwConfig); ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false; auto defaultPlatform = std::make_unique(); defaultPlatform->initializeWithNewDevices(); (*platformsImpl)[0] = std::move(defaultPlatform); auto expectedDevice = ::platform()->getClDevice(0); cl_device_id retDevice = 0; size_t retSize = 0; const cl_context_properties properties[] = {CL_GL_CONTEXT_KHR, 1, CL_WGL_HDC_KHR, 2, 0}; retVal = clGetGLContextInfoKHR(properties, CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR, sizeof(cl_device_id), &retDevice, &retSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedDevice, retDevice); EXPECT_EQ(sizeof(cl_device_id), retSize); retVal = clGetGLContextInfoKHR(properties, CL_DEVICES_FOR_GL_CONTEXT_KHR, sizeof(cl_device_id), &retDevice, &retSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedDevice, retDevice); EXPECT_EQ(sizeof(cl_device_id), retSize); } using clGetGLContextInfoKHRNonDefaultPlatform = ::testing::Test; TEST_F(clGetGLContextInfoKHRNonDefaultPlatform, GivenNonDefaultPlatformWhenGettingGlContextThenSuccessIsReturned) { platformsImpl->clear(); VariableBackup backup(&ultHwConfig); ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false; cl_int retVal = CL_SUCCESS; auto nonDefaultPlatform = std::make_unique(); nonDefaultPlatform->initializeWithNewDevices(); cl_platform_id nonDefaultPlatformCl = nonDefaultPlatform.get(); auto expectedDevice = nonDefaultPlatform->getClDevice(0); size_t retSize = 0; cl_device_id retDevice = 0; const cl_context_properties properties[] = {CL_GL_CONTEXT_KHR, 1, CL_WGL_HDC_KHR, 2, CL_CONTEXT_PLATFORM, reinterpret_cast(nonDefaultPlatformCl), 0}; retVal = clGetGLContextInfoKHR(properties, CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR, sizeof(cl_device_id), &retDevice, &retSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedDevice, retDevice); EXPECT_EQ(sizeof(cl_device_id), retSize); retVal = clGetGLContextInfoKHR(properties, CL_DEVICES_FOR_GL_CONTEXT_KHR, sizeof(cl_device_id), &retDevice, &retSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedDevice, retDevice); EXPECT_EQ(sizeof(cl_device_id), retSize); } TEST_F(clGetGLContextInfoKhrTest, GivenInvalidParamWhenGettingGlContextThenInvalidValueErrorIsReturned) { cl_device_id retDevice = 0; size_t retSize = 0; const cl_context_properties properties[] = {CL_GL_CONTEXT_KHR, 1, CL_WGL_HDC_KHR, 2, 0}; retVal = clGetGLContextInfoKHR(properties, 0, sizeof(cl_device_id), &retDevice, &retSize); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(nullptr, retDevice); EXPECT_EQ(0u, retSize); } TEST_F(clGetGLContextInfoKhrTest, givenContextFromNoIntelOpenGlDriverWhenCallClGetGLContextInfoKHRThenReturnClInvalidContext) { cl_device_id retDevice = 0; size_t retSize = 0; const cl_context_properties properties[] = {CL_GL_CONTEXT_KHR, 1, CL_WGL_HDC_KHR, 2, 0}; GlDllHelper setDllParam; setDllParam.glSetString("NoIntel", GL_VENDOR); retVal = clGetGLContextInfoKHR(properties, 0, sizeof(cl_device_id), &retDevice, &retSize); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); EXPECT_EQ(nullptr, retDevice); EXPECT_EQ(0u, retSize); } TEST_F(clGetGLContextInfoKhrTest, givenNullVersionFromIntelOpenGlDriverWhenCallClGetGLContextInfoKHRThenReturnClInvalidContext) { cl_device_id retDevice = 0; size_t retSize = 0; const cl_context_properties properties[] = {CL_GL_CONTEXT_KHR, 1, CL_WGL_HDC_KHR, 2, 0}; GlDllHelper setDllParam; setDllParam.glSetString("", GL_VERSION); retVal = clGetGLContextInfoKHR(properties, 0, sizeof(cl_device_id), &retDevice, &retSize); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); EXPECT_EQ(nullptr, retDevice); EXPECT_EQ(0u, retSize); } TEST_F(clGetGLContextInfoKhrTest, GivenIncorrectPropertiesWhenCallclGetGLContextInfoKHRThenReturnClInvalidGlShareGroupRererencKhr) { cl_device_id retDevice = 0; size_t retSize = 0; retVal = clGetGLContextInfoKHR(nullptr, 0, sizeof(cl_device_id), &retDevice, &retSize); EXPECT_EQ(CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR, retVal); const cl_context_properties propertiesLackOfWglHdcKhr[] = {CL_GL_CONTEXT_KHR, 1, 0}; retVal = clGetGLContextInfoKHR(propertiesLackOfWglHdcKhr, 0, sizeof(cl_device_id), &retDevice, &retSize); EXPECT_EQ(CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR, retVal); const cl_context_properties propertiesLackOfCLGlContextKhr[] = {CL_WGL_HDC_KHR, 2, 0}; retVal = clGetGLContextInfoKHR(propertiesLackOfCLGlContextKhr, 0, sizeof(cl_device_id), &retDevice, &retSize); EXPECT_EQ(CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR, retVal); } TEST_F(clGetGLContextInfoKHRNonDefaultPlatform, whenVerificationOfAdapterLuidFailsThenInvalidGlReferenceErrorIsReturned) { platformsImpl->clear(); VariableBackup backup(&ultHwConfig); ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false; cl_int retVal = CL_SUCCESS; auto nonDefaultPlatform = std::make_unique(); nonDefaultPlatform->initializeWithNewDevices(); cl_platform_id nonDefaultPlatformCl = nonDefaultPlatform.get(); auto device = nonDefaultPlatform->getClDevice(0); static_cast(device->getRootDeviceEnvironment().osInterface->getDriverModel()->as())->verifyAdapterLuidReturnValue = false; size_t retSize = 0; cl_device_id retDevice = 0; const cl_context_properties properties[] = {CL_GL_CONTEXT_KHR, 1, CL_WGL_HDC_KHR, 2, CL_CONTEXT_PLATFORM, reinterpret_cast(nonDefaultPlatformCl), 0}; retVal = clGetGLContextInfoKHR(properties, CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR, sizeof(cl_device_id), &retDevice, &retSize); EXPECT_EQ(CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR, retVal); } TEST_F(clGetGLContextInfoKHRNonDefaultPlatform, whenVerificationOfAdapterLuidFailsForFirstDeviceButSucceedsForSecondOneThenReturnTheSecondDevice) { platformsImpl->clear(); DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleRootDevices.set(2); VariableBackup backup(&ultHwConfig); ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false; cl_int retVal = CL_SUCCESS; auto nonDefaultPlatform = std::make_unique(); nonDefaultPlatform->initializeWithNewDevices(); cl_platform_id nonDefaultPlatformCl = nonDefaultPlatform.get(); auto device0 = nonDefaultPlatform->getClDevice(0); auto device1 = nonDefaultPlatform->getClDevice(0); cl_device_id expectedDevice = device1; static_cast(device0->getRootDeviceEnvironment().osInterface->getDriverModel()->as())->verifyAdapterLuidReturnValue = false; static_cast(device1->getRootDeviceEnvironment().osInterface->getDriverModel()->as())->verifyAdapterLuidReturnValue = true; size_t retSize = 0; cl_device_id retDevice = 0; const cl_context_properties properties[] = {CL_GL_CONTEXT_KHR, 1, CL_WGL_HDC_KHR, 2, CL_CONTEXT_PLATFORM, reinterpret_cast(nonDefaultPlatformCl), 0}; retVal = clGetGLContextInfoKHR(properties, CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR, sizeof(cl_device_id), &retDevice, &retSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedDevice, retDevice); EXPECT_EQ(sizeof(cl_device_id), retSize); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/windows/hw_info_config_win_tests.cpp000066400000000000000000000065521422164147700335530ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/os_interface/windows/hw_info_config_win_tests.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/os_interface/windows/wddm/wddm.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/test_macros/test.h" namespace NEO { template <> int HwInfoConfigHw::configureHardwareCustom(HardwareInfo *hwInfo, OSInterface *osIface) { return 0; } HwInfoConfigTestWindows::HwInfoConfigTestWindows() { this->executionEnvironment = std::make_unique(); this->rootDeviceEnvironment = std::make_unique(*executionEnvironment); } HwInfoConfigTestWindows::~HwInfoConfigTestWindows() { } void HwInfoConfigTestWindows::SetUp() { HwInfoConfigTest::SetUp(); osInterface.reset(new OSInterface()); auto wddm = Wddm::createWddm(nullptr, *rootDeviceEnvironment); wddm->init(); outHwInfo = *rootDeviceEnvironment->getHardwareInfo(); } void HwInfoConfigTestWindows::TearDown() { HwInfoConfigTest::TearDown(); } TEST_F(HwInfoConfigTestWindows, givenCorrectParametersWhenConfiguringHwInfoThenReturnSuccess) { int ret = hwConfig.configureHwInfoWddm(&pInHwInfo, &outHwInfo, osInterface.get()); EXPECT_EQ(0, ret); } TEST_F(HwInfoConfigTestWindows, givenCorrectParametersWhenConfiguringHwInfoThenSetFtrSvmCorrectly) { auto ftrSvm = outHwInfo.featureTable.flags.ftrSVM; int ret = hwConfig.configureHwInfoWddm(&pInHwInfo, &outHwInfo, osInterface.get()); ASSERT_EQ(0, ret); EXPECT_EQ(outHwInfo.capabilityTable.ftrSvm, ftrSvm); } TEST_F(HwInfoConfigTestWindows, givenInstrumentationForHardwareIsEnabledOrDisabledWhenConfiguringHwInfoThenOverrideItUsingHaveInstrumentation) { int ret; outHwInfo.capabilityTable.instrumentationEnabled = false; ret = hwConfig.configureHwInfoWddm(&pInHwInfo, &outHwInfo, osInterface.get()); ASSERT_EQ(0, ret); EXPECT_FALSE(outHwInfo.capabilityTable.instrumentationEnabled); outHwInfo.capabilityTable.instrumentationEnabled = true; ret = hwConfig.configureHwInfoWddm(&pInHwInfo, &outHwInfo, osInterface.get()); ASSERT_EQ(0, ret); EXPECT_TRUE(outHwInfo.capabilityTable.instrumentationEnabled); } HWTEST_F(HwInfoConfigTestWindows, givenFtrIaCoherencyFlagWhenConfiguringHwInfoThenSetCoherencySupportCorrectly) { HardwareInfo initialHwInfo = *defaultHwInfo; auto hwInfoConfig = HwInfoConfig::get(initialHwInfo.platform.eProductFamily); bool initialCoherencyStatus = false; hwInfoConfig->setCapabilityCoherencyFlag(outHwInfo, initialCoherencyStatus); initialHwInfo.featureTable.flags.ftrL3IACoherency = false; hwInfoConfig->configureHwInfoWddm(&initialHwInfo, &outHwInfo, osInterface.get()); EXPECT_FALSE(outHwInfo.capabilityTable.ftrSupportsCoherency); initialHwInfo.featureTable.flags.ftrL3IACoherency = true; hwInfoConfig->configureHwInfoWddm(&initialHwInfo, &outHwInfo, osInterface.get()); EXPECT_EQ(initialCoherencyStatus, outHwInfo.capabilityTable.ftrSupportsCoherency); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/windows/hw_info_config_win_tests.h000066400000000000000000000014411422164147700332100ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/hw_info_config.h" #include "opencl/test/unit_test/os_interface/hw_info_config_tests.h" #include namespace NEO { struct MockExecutionEnvironment; struct RootDeviceEnvironment; struct DummyHwConfig : HwInfoConfigHw { }; struct HwInfoConfigTestWindows : public HwInfoConfigTest { HwInfoConfigTestWindows(); ~HwInfoConfigTestWindows(); void SetUp() override; void TearDown() override; std::unique_ptr osInterface; DummyHwConfig hwConfig; std::unique_ptr executionEnvironment; std::unique_ptr rootDeviceEnvironment; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/windows/mock_kmdaf_listener.h000066400000000000000000000140741422164147700321410ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/windows/kmdaf_listener.h" namespace NEO { struct KmDafListenerMock : public KmDafListener { inline void notifyLock(bool ftrKmdDaf, D3DKMT_HANDLE hAdapter, D3DKMT_HANDLE hDevice, const D3DKMT_HANDLE hAllocation, D3DDDICB_LOCKFLAGS *pLockFlags, PFND3DKMT_ESCAPE pfnEscape) override { notifyLockParametrization.ftrKmdDaf = ftrKmdDaf; notifyLockParametrization.hAdapter = hAdapter; notifyLockParametrization.hDevice = hDevice; notifyLockParametrization.hAllocation = hAllocation; notifyLockParametrization.pLockFlags = pLockFlags; notifyLockParametrization.pfnEscape = pfnEscape; } inline void notifyUnlock(bool ftrKmdDaf, D3DKMT_HANDLE hAdapter, D3DKMT_HANDLE hDevice, const D3DKMT_HANDLE *phAllocation, ULONG allocations, PFND3DKMT_ESCAPE pfnEscape) override { notifyUnlockParametrization.ftrKmdDaf = ftrKmdDaf; notifyUnlockParametrization.hAdapter = hAdapter; notifyUnlockParametrization.hDevice = hDevice; notifyUnlockParametrization.phAllocation = phAllocation; notifyUnlockParametrization.allocations = allocations; notifyUnlockParametrization.pfnEscape = pfnEscape; } inline void notifyMapGpuVA(bool ftrKmdDaf, D3DKMT_HANDLE hAdapter, D3DKMT_HANDLE hDevice, const D3DKMT_HANDLE hAllocation, D3DGPU_VIRTUAL_ADDRESS gpuVirtualAddress, PFND3DKMT_ESCAPE pfnEscape) override { notifyMapGpuVAParametrization.ftrKmdDaf = ftrKmdDaf; notifyMapGpuVAParametrization.hAdapter = hAdapter; notifyMapGpuVAParametrization.hDevice = hDevice; notifyMapGpuVAParametrization.hAllocation = hAllocation; notifyMapGpuVAParametrization.gpuVirtualAddress = gpuVirtualAddress; notifyMapGpuVAParametrization.pfnEscape = pfnEscape; } inline void notifyUnmapGpuVA(bool ftrKmdDaf, D3DKMT_HANDLE hAdapter, D3DKMT_HANDLE hDevice, D3DGPU_VIRTUAL_ADDRESS gpuVirtualAddress, PFND3DKMT_ESCAPE pfnEscape) override { notifyUnmapGpuVAParametrization.ftrKmdDaf = ftrKmdDaf; notifyUnmapGpuVAParametrization.hAdapter = hAdapter; notifyUnmapGpuVAParametrization.hDevice = hDevice; notifyUnmapGpuVAParametrization.gpuVirtualAddress = gpuVirtualAddress; notifyUnmapGpuVAParametrization.pfnEscape = pfnEscape; } inline void notifyMakeResident(bool ftrKmdDaf, D3DKMT_HANDLE hAdapter, D3DKMT_HANDLE hDevice, const D3DKMT_HANDLE *phAllocation, ULONG allocations, PFND3DKMT_ESCAPE pfnEscape) override { notifyMakeResidentParametrization.ftrKmdDaf = ftrKmdDaf; notifyMakeResidentParametrization.hAdapter = hAdapter; notifyMakeResidentParametrization.hDevice = hDevice; notifyMakeResidentParametrization.phAllocation = phAllocation; notifyMakeResidentParametrization.allocations = allocations; notifyMakeResidentParametrization.pfnEscape = pfnEscape; } inline void notifyEvict(bool ftrKmdDaf, D3DKMT_HANDLE hAdapter, D3DKMT_HANDLE hDevice, const D3DKMT_HANDLE *phAllocation, ULONG allocations, PFND3DKMT_ESCAPE pfnEscape) override { notifyEvictParametrization.ftrKmdDaf = ftrKmdDaf; notifyEvictParametrization.hAdapter = hAdapter; notifyEvictParametrization.hDevice = hDevice; notifyEvictParametrization.phAllocation = phAllocation; notifyEvictParametrization.allocations = allocations; notifyEvictParametrization.pfnEscape = pfnEscape; } inline void notifyWriteTarget(bool ftrKmdDaf, D3DKMT_HANDLE hAdapter, D3DKMT_HANDLE hDevice, const D3DKMT_HANDLE hAllocation, PFND3DKMT_ESCAPE pfnEscape) override { notifyWriteTargetParametrization.ftrKmdDaf = ftrKmdDaf; notifyWriteTargetParametrization.hAdapter = hAdapter; notifyWriteTargetParametrization.hDevice = hDevice; notifyWriteTargetParametrization.hAllocation = hAllocation; notifyWriteTargetParametrization.pfnEscape = pfnEscape; } struct NotifyLockParametrization { bool ftrKmdDaf = false; D3DKMT_HANDLE hAdapter = 0; D3DKMT_HANDLE hDevice = 0; D3DKMT_HANDLE hAllocation = 0; D3DDDICB_LOCKFLAGS *pLockFlags = nullptr; PFND3DKMT_ESCAPE pfnEscape = nullptr; } notifyLockParametrization; struct NotifyUnlockParametrization { bool ftrKmdDaf = 0; D3DKMT_HANDLE hAdapter = 0; D3DKMT_HANDLE hDevice = 0; const D3DKMT_HANDLE *phAllocation = nullptr; ULONG allocations = 0; PFND3DKMT_ESCAPE pfnEscape = nullptr; } notifyUnlockParametrization; struct NotifyMapGpuVAParametrization { bool ftrKmdDaf = false; D3DKMT_HANDLE hAdapter = 0; D3DKMT_HANDLE hDevice = 0; D3DKMT_HANDLE hAllocation = 0; D3DGPU_VIRTUAL_ADDRESS gpuVirtualAddress = 0; PFND3DKMT_ESCAPE pfnEscape = nullptr; } notifyMapGpuVAParametrization; struct NotifyUnmapGpuVAParametrization { bool ftrKmdDaf = false; D3DKMT_HANDLE hAdapter = 0; D3DKMT_HANDLE hDevice = 0; D3DGPU_VIRTUAL_ADDRESS gpuVirtualAddress = 0; PFND3DKMT_ESCAPE pfnEscape = nullptr; } notifyUnmapGpuVAParametrization; struct NotifyMakeResidentParametrization { bool ftrKmdDaf = 0; D3DKMT_HANDLE hAdapter = 0; D3DKMT_HANDLE hDevice = 0; const D3DKMT_HANDLE *phAllocation = nullptr; ULONG allocations = 0; PFND3DKMT_ESCAPE pfnEscape = nullptr; } notifyMakeResidentParametrization; struct NotifyEvictParametrization { bool ftrKmdDaf = 0; D3DKMT_HANDLE hAdapter = 0; D3DKMT_HANDLE hDevice = 0; const D3DKMT_HANDLE *phAllocation = nullptr; ULONG allocations = 0; PFND3DKMT_ESCAPE pfnEscape = nullptr; } notifyEvictParametrization; struct NotifyWriteTargetParametrization { bool ftrKmdDaf = 0; D3DKMT_HANDLE hAdapter = 0; D3DKMT_HANDLE hDevice = 0; D3DKMT_HANDLE hAllocation = 0; PFND3DKMT_ESCAPE pfnEscape = nullptr; } notifyWriteTargetParametrization; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/windows/mock_os_time_win.h000066400000000000000000000010751422164147700314630ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/windows/os_time_win.h" namespace NEO { class MockOSTimeWin : public OSTimeWin { public: MockOSTimeWin(OSInterface *osInterface) : OSTimeWin(osInterface){}; void overrideQueryPerformanceCounterFunction(decltype(&QueryPerformanceCounter) function) { this->QueryPerfomanceCounterFnc = function; } void setFrequency(LARGE_INTEGER frequency) { this->frequency = frequency; } }; } // namespace NEOmock_performance_counters_win.cpp000066400000000000000000000050201422164147700345150ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/windows/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "mock_performance_counters_win.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/os_interface/windows/windows_wrapper.h" #include "shared/test/common/mocks/mock_wddm.h" #include "opencl/test/unit_test/os_interface/windows/mock_os_time_win.h" namespace NEO { /////////////////////////////////////////////////////// // MockPerformanceCountersWin::MockPerformanceCountersWin /////////////////////////////////////////////////////// MockPerformanceCountersWin::MockPerformanceCountersWin(Device *device) : PerformanceCountersWin() { } /////////////////////////////////////////////////////// // MockPerformanceCounters::create /////////////////////////////////////////////////////// std::unique_ptr MockPerformanceCounters::create(Device *device) { auto performanceCounters = std::unique_ptr(new MockPerformanceCountersWin(device)); auto metricsLibrary = std::make_unique(); auto metricsLibraryDll = std::make_unique(); metricsLibrary->api = std::make_unique(); metricsLibrary->osLibrary = std::move(metricsLibraryDll); performanceCounters->setMetricsLibraryInterface(std::move(metricsLibrary)); return performanceCounters; } ////////////////////////////////////////////////////// // PerformanceCountersFixture::createPerfCounters ////////////////////////////////////////////////////// void PerformanceCountersFixture::createPerfCounters() { performanceCountersBase = MockPerformanceCounters::create(&device->getDevice()); } ////////////////////////////////////////////////////// // PerformanceCountersFixture::SetUp ////////////////////////////////////////////////////// void PerformanceCountersFixture::SetUp() { device = std::make_unique(new MockDevice()); context = std::make_unique(device.get()); queue = std::make_unique(context.get(), device.get(), &queueProperties, false); osInterface = std::unique_ptr(new OSInterface()); osInterface->setDriverModel(std::unique_ptr(new WddmMock(*rootDeviceEnvironment))); device->setOSTime(new MockOSTimeWin(osInterface.get())); } ////////////////////////////////////////////////////// // PerformanceCountersFixture::TearDown ////////////////////////////////////////////////////// void PerformanceCountersFixture::TearDown() { } } // namespace NEO mock_performance_counters_win.h000066400000000000000000000006411422164147700341660ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/windows/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/windows/performance_counters_win.h" #include "opencl/test/unit_test/os_interface/mock_performance_counters.h" namespace NEO { class MockPerformanceCountersWin : public PerformanceCountersWin { public: MockPerformanceCountersWin(Device *device); }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/windows/mock_wddm_allocation.h000066400000000000000000000030721422164147700323060ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/windows/wddm_allocation.h" #include "shared/test/common/mock_gdi/mock_gdi.h" #include "shared/test/common/mocks/mock_gmm.h" namespace NEO { class MockWddmAllocation : public WddmAllocation { public: MockWddmAllocation(GmmClientContext *gmmClientContext) : MockWddmAllocation(gmmClientContext, EngineLimits::maxHandleCount) {} MockWddmAllocation(GmmClientContext *gmmClientContext, uint32_t numGmms) : WddmAllocation(0, numGmms, AllocationType::UNKNOWN, nullptr, 0, nullptr, MemoryPool::MemoryNull, 0u, 3u), gpuPtr(gpuAddress), handle(handles[0]) { for (uint32_t i = 0; i < numGmms; i++) { setGmm(new MockGmm(gmmClientContext), i); setHandle(ALLOCATION_HANDLE, i); } } void clearGmms() { for (uint32_t i = 0; i < getNumGmms(); i++) { delete getGmm(i); } gmms.resize(0); } ~MockWddmAllocation() { clearGmms(); } void resizeGmms(size_t newSize) { clearGmms(); gmms.resize(newSize); handles.resize(newSize); } using WddmAllocation::cpuPtr; using WddmAllocation::handles; using WddmAllocation::memoryPool; using WddmAllocation::size; D3DGPU_VIRTUAL_ADDRESS &gpuPtr; D3DKMT_HANDLE &handle; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/windows/os_context_win_tests.cpp000066400000000000000000000103041422164147700327500ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/engine_descriptor_helper.h" #include "shared/test/common/mocks/mock_wddm.h" #include "shared/test/common/os_interface/windows/wddm_fixture.h" using namespace NEO; struct OsContextWinTest : public WddmTestWithMockGdiDll { void SetUp() override { WddmTestWithMockGdiDll::SetUp(); preemptionMode = PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo); engineTypeUsage = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo)[0]; init(); } PreemptionMode preemptionMode; EngineTypeUsage engineTypeUsage; }; TEST_F(OsContextWinTest, givenWddm20WhenCreatingOsContextThenOsContextIsInitialized) { osContext = std::make_unique(*osInterface->getDriverModel()->as(), 0u, EngineDescriptorHelper::getDefaultDescriptor(engineTypeUsage, preemptionMode)); EXPECT_NO_THROW(osContext->ensureContextInitialized()); } TEST_F(OsContextWinTest, givenWddm20WhenCreatingWddmContextFailThenOsContextCreationFails) { wddm->device = INVALID_HANDLE; osContext = std::make_unique(*osInterface->getDriverModel()->as(), 0u, EngineDescriptorHelper::getDefaultDescriptor(engineTypeUsage, preemptionMode)); EXPECT_ANY_THROW(osContext->ensureContextInitialized()); } TEST_F(OsContextWinTest, givenWddm20WhenCreatingWddmMonitorFenceFailThenOsContextCreationFails) { *getCreateSynchronizationObject2FailCallFcn() = true; osContext = std::make_unique(*osInterface->getDriverModel()->as(), 0u, EngineDescriptorHelper::getDefaultDescriptor(engineTypeUsage, preemptionMode)); EXPECT_ANY_THROW(osContext->ensureContextInitialized()); } TEST_F(OsContextWinTest, givenWddm20WhenRegisterTrimCallbackFailThenOsContextCreationFails) { *getRegisterTrimNotificationFailCallFcn() = true; osContext = std::make_unique(*osInterface->getDriverModel()->as(), 0u, EngineDescriptorHelper::getDefaultDescriptor(engineTypeUsage, preemptionMode)); EXPECT_ANY_THROW(osContext->ensureContextInitialized()); } TEST_F(OsContextWinTest, givenWddm20WhenRegisterTrimCallbackIsDisabledThenOsContextIsInitialized) { DebugManagerStateRestore stateRestore; DebugManager.flags.DoNotRegisterTrimCallback.set(true); *getRegisterTrimNotificationFailCallFcn() = true; osContext = std::make_unique(*osInterface->getDriverModel()->as(), 0u, EngineDescriptorHelper::getDefaultDescriptor(engineTypeUsage, preemptionMode)); EXPECT_NO_THROW(osContext->ensureContextInitialized()); } TEST_F(OsContextWinTest, givenReinitializeContextWhenContextIsInitThenContextIsDestroyedAndRecreated) { osContext = std::make_unique(*osInterface->getDriverModel()->as(), 0u, EngineDescriptorHelper::getDefaultDescriptor(engineTypeUsage, preemptionMode)); EXPECT_NO_THROW(osContext->reInitializeContext()); EXPECT_NO_THROW(osContext->ensureContextInitialized()); } TEST_F(OsContextWinTest, givenReinitializeContextWhenContextIsNotInitThenContextIsCreated) { EXPECT_NO_THROW(osContext->reInitializeContext()); EXPECT_NO_THROW(osContext->ensureContextInitialized()); } struct OsContextWinTestNoCleanup : public WddmTestWithMockGdiDllNoCleanup { void SetUp() override { WddmTestWithMockGdiDllNoCleanup::SetUp(); preemptionMode = PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo); engineTypeUsage = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo)[0]; init(); } PreemptionMode preemptionMode; EngineTypeUsage engineTypeUsage; }; TEST_F(OsContextWinTestNoCleanup, givenReinitializeContextWhenContextIsInitThenContextIsNotDestroyed) { osContext = std::make_unique(*osInterface->getDriverModel()->as(), 0u, EngineDescriptorHelper::getDefaultDescriptor(engineTypeUsage, preemptionMode)); EXPECT_TRUE(this->wddm->skipResourceCleanup()); EXPECT_NO_THROW(osContext->reInitializeContext()); EXPECT_NO_THROW(osContext->ensureContextInitialized()); } compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/windows/os_interface_win_tests.cpp000066400000000000000000000042471422164147700332350ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/os_interface/windows/os_interface_win_tests.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/os_interface/windows/os_context_win.h" #include "shared/source/os_interface/windows/sys_calls.h" #include "shared/test/common/helpers/engine_descriptor_helper.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/os_interface/windows/wddm_fixture.h" TEST_F(OsInterfaceTest, GivenWindowsWhenOsSupportFor64KBpagesIsBeingQueriedThenTrueIsReturned) { EXPECT_TRUE(OSInterface::are64kbPagesEnabled()); } TEST_F(OsInterfaceTest, GivenWindowsWhenCreateEentIsCalledThenValidEventHandleIsReturned) { auto ev = NEO::SysCalls::createEvent(NULL, TRUE, FALSE, "DUMMY_EVENT_NAME"); EXPECT_NE(nullptr, ev); auto ret = NEO::SysCalls::closeHandle(ev); EXPECT_EQ(TRUE, ret); } TEST(OsContextTest, givenWddmWhenCreateOsContextAfterInitWddmThenOsContextIsInitializedTrimCallbackIsRegisteredMemoryOperationsHandlerCreated) { MockExecutionEnvironment executionEnvironment; RootDeviceEnvironment rootDeviceEnvironment(executionEnvironment); auto wddm = new WddmMock(rootDeviceEnvironment); auto preemptionMode = PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo); wddm->init(); EXPECT_EQ(0u, wddm->registerTrimCallbackResult.called); auto osContext = std::make_unique(*wddm, 0u, EngineDescriptorHelper::getDefaultDescriptor(HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo)[0], preemptionMode)); osContext->ensureContextInitialized(); EXPECT_EQ(osContext->getWddm(), wddm); EXPECT_EQ(1u, wddm->registerTrimCallbackResult.called); } TEST_F(OsInterfaceTest, GivenWindowsOsWhenCheckForNewResourceImplicitFlushSupportThenReturnFalse) { EXPECT_FALSE(OSInterface::newResourceImplicitFlush); } TEST_F(OsInterfaceTest, GivenWindowsOsWhenCheckForGpuIdleImplicitFlushSupportThenReturnFalse) { EXPECT_FALSE(OSInterface::gpuIdleImplicitFlush); } compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/windows/os_interface_win_tests.h000066400000000000000000000011471422164147700326760ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/hw_info.h" #include "shared/source/os_interface/device_factory.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/os_interface/windows/wddm/wddm.h" #include "gtest/gtest.h" using namespace NEO; class OsInterfaceTest : public ::testing::Test { public: void SetUp() override { osInterface = std::unique_ptr(new OSInterface()); } void TearDown() override { } std::unique_ptr osInterface; }; compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/windows/os_library_win_tests.cpp000066400000000000000000000100441422164147700327310ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/windows/os_library_win.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/test_macros/test.h" #include "gtest/gtest.h" #include namespace Os { extern const char *testDllName; } using namespace NEO; class OsLibraryBackup : public Windows::OsLibrary { using Type = decltype(Windows::OsLibrary::loadLibraryExA); using BackupType = typename VariableBackup; using ModuleNameType = decltype(Windows::OsLibrary::getModuleFileNameA); using ModuleNameBackupType = typename VariableBackup; using SystemDirectoryType = decltype(Windows::OsLibrary::getSystemDirectoryA); using SystemDirectoryBackupType = typename VariableBackup; struct Backup { std::unique_ptr bkp1 = nullptr; std::unique_ptr bkp2 = nullptr; std::unique_ptr bkp3 = nullptr; }; public: static std::unique_ptr backup(Type newValue, ModuleNameType newModuleName, SystemDirectoryType newSystemDirectoryName) { std::unique_ptr bkp(new Backup()); bkp->bkp1.reset(new BackupType(&OsLibrary::loadLibraryExA, newValue)); bkp->bkp2.reset(new ModuleNameBackupType(&OsLibrary::getModuleFileNameA, newModuleName)); bkp->bkp3.reset(new SystemDirectoryBackupType(&OsLibrary::getSystemDirectoryA, newSystemDirectoryName)); return bkp; }; }; bool mockWillFail = true; void trimFileName(char *buff, size_t length) { for (size_t l = length; l > 0; l--) { if (buff[l - 1] == '\\') { buff[l] = '\0'; break; } } } DWORD WINAPI GetModuleFileNameAMock(HMODULE hModule, LPSTR lpFilename, DWORD nSize) { return snprintf(lpFilename, nSize, "z:\\SomeFakeName.dll"); } HMODULE WINAPI LoadLibraryExAMock(LPCSTR lpFileName, HANDLE hFile, DWORD dwFlags) { if (mockWillFail) return NULL; char fName[MAX_PATH]; auto lenFn = strlen(lpFileName); strcpy_s(fName, sizeof(fName), lpFileName); trimFileName(fName, lenFn); EXPECT_STREQ("z:\\", fName); return (HMODULE)1; } UINT WINAPI GetSystemDirectoryAMock(LPSTR lpBuffer, UINT uSize) { const char path[] = "C:\\System"; strcpy_s(lpBuffer, sizeof(path), path); return sizeof(path) - 1; // do not include terminating null } TEST(OSLibraryWinTest, WhenLoadDependencyFailsThenFallbackToNonDriverStore) { auto bkp = OsLibraryBackup::backup(LoadLibraryExAMock, GetModuleFileNameAMock, GetSystemDirectoryAMock); std::unique_ptr library(OsLibrary::load(Os::testDllName)); EXPECT_NE(nullptr, library); } TEST(OSLibraryWinTest, WhenDependencyLoadsThenProperPathIsConstructed) { auto bkp = OsLibraryBackup::backup(LoadLibraryExAMock, GetModuleFileNameAMock, GetSystemDirectoryAMock); VariableBackup bkpM(&mockWillFail, false); std::unique_ptr library(OsLibrary::load(Os::testDllName)); EXPECT_NE(nullptr, library); } TEST(OSLibraryWinTest, WhenCreatingFullSystemPathThenProperPathIsConstructed) { auto bkp = OsLibraryBackup::backup(LoadLibraryExAMock, GetModuleFileNameAMock, GetSystemDirectoryAMock); VariableBackup bkpM(&mockWillFail, false); auto fullPath = OsLibrary::createFullSystemPath("test"); EXPECT_STREQ("C:\\System\\test", fullPath.c_str()); } TEST(OSLibraryWinTest, GivenInvalidLibraryWhenOpeningLibraryThenLoadLibraryErrorIsReturned) { std::string errorValue; auto lib = std::make_unique("abc", &errorValue); EXPECT_FALSE(errorValue.empty()); } TEST(OSLibraryWinTest, GivenNoLastErrorOnWindowsThenErrorStringisEmpty) { std::string errorValue; auto lib = std::make_unique(Os::testDllName, &errorValue); EXPECT_NE(nullptr, lib); EXPECT_TRUE(errorValue.empty()); lib.get()->getLastErrorString(&errorValue); EXPECT_TRUE(errorValue.empty()); lib.get()->getLastErrorString(nullptr); }compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/windows/os_time_win_tests.cpp000066400000000000000000000073361422164147700322350ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/os_interface/os_interface.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/os_interface/windows/wddm_fixture.h" #include "gtest/gtest.h" #include "mock_os_time_win.h" #include using namespace NEO; LARGE_INTEGER valueToSet = {0}; BOOL WINAPI QueryPerformanceCounterMock( _Out_ LARGE_INTEGER *lpPerformanceCount) { *lpPerformanceCount = valueToSet; return true; }; struct OSTimeWinTest : public ::testing::Test { public: void SetUp() override { osTime = std::unique_ptr(new MockOSTimeWin(nullptr)); } void TearDown() override { } std::unique_ptr osTime; }; TEST_F(OSTimeWinTest, givenZeroFrequencyWhenGetHostTimerFuncIsCalledThenReturnsZero) { LARGE_INTEGER frequency; frequency.QuadPart = 0; osTime->setFrequency(frequency); auto retVal = osTime->getHostTimerResolution(); EXPECT_EQ(0, retVal); } TEST_F(OSTimeWinTest, givenNonZeroFrequencyWhenGetHostTimerFuncIsCalledThenReturnsNonZero) { LARGE_INTEGER frequency; frequency.QuadPart = NSEC_PER_SEC; osTime->setFrequency(frequency); auto retVal = osTime->getHostTimerResolution(); EXPECT_EQ(1.0, retVal); } TEST_F(OSTimeWinTest, givenOsTimeWinWhenGetCpuRawTimestampIsCalledThenReturnsNonZero) { auto retVal = osTime->getCpuRawTimestamp(); EXPECT_NE(0ull, retVal); } TEST_F(OSTimeWinTest, givenHighValueOfCpuTimestampWhenItIsObtainedThenItHasProperValue) { osTime->overrideQueryPerformanceCounterFunction(QueryPerformanceCounterMock); LARGE_INTEGER frequency = {0}; frequency.QuadPart = 190457; osTime->setFrequency(frequency); valueToSet.QuadPart = 700894514854; uint64_t timeStamp = 0; uint64_t expectedTimestamp = static_cast((static_cast(valueToSet.QuadPart) * static_cast(NSEC_PER_SEC) / static_cast(frequency.QuadPart))); osTime->getCpuTime(&timeStamp); EXPECT_EQ(expectedTimestamp, timeStamp); } TEST(OSTimeWinTests, givenNoOSInterfaceWhenGetCpuTimeThenReturnsSuccess) { uint64_t time = 0; auto osTime(OSTime::create(nullptr)); auto error = osTime->getCpuTime(&time); EXPECT_TRUE(error); EXPECT_NE(0, time); } TEST(OSTimeWinTests, givenNoOSInterfaceWhenGetCpuGpuTimeThenReturnsError) { TimeStampData CPUGPUTime = {0}; auto osTime(OSTime::create(nullptr)); auto success = osTime->getCpuGpuTime(&CPUGPUTime); EXPECT_FALSE(success); EXPECT_EQ(0, CPUGPUTime.CPUTimeinNS); EXPECT_EQ(0, CPUGPUTime.GPUTimeStamp); } TEST(OSTimeWinTests, givenOSInterfaceWhenGetCpuGpuTimeThenReturnsSuccess) { MockExecutionEnvironment executionEnvironment; RootDeviceEnvironment rootDeviceEnvironment(executionEnvironment); rootDeviceEnvironment.setHwInfo(defaultHwInfo.get()); auto wddm = new WddmMock(rootDeviceEnvironment); TimeStampData CPUGPUTime01 = {0}; TimeStampData CPUGPUTime02 = {0}; std::unique_ptr osInterface(new OSInterface()); osInterface->setDriverModel(std::unique_ptr(wddm)); auto osTime = OSTime::create(osInterface.get()); auto success = osTime->getCpuGpuTime(&CPUGPUTime01); EXPECT_TRUE(success); EXPECT_NE(0, CPUGPUTime01.CPUTimeinNS); EXPECT_NE(0, CPUGPUTime01.GPUTimeStamp); success = osTime->getCpuGpuTime(&CPUGPUTime02); EXPECT_TRUE(success); EXPECT_NE(0, CPUGPUTime02.CPUTimeinNS); EXPECT_NE(0, CPUGPUTime02.GPUTimeStamp); EXPECT_GT(CPUGPUTime02.GPUTimeStamp, CPUGPUTime01.GPUTimeStamp); EXPECT_GT(CPUGPUTime02.CPUTimeinNS, CPUGPUTime01.CPUTimeinNS); } performance_counters_win_tests.cpp000066400000000000000000000011261422164147700347310ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/windows/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/windows/performance_counters_win.h" #include "opencl/test/unit_test/os_interface/mock_performance_counters.h" #include "gtest/gtest.h" using namespace NEO; struct PerformanceCountersWinTest : public PerformanceCountersFixture, public ::testing::Test { public: void SetUp() override { PerformanceCountersFixture::SetUp(); } void TearDown() override { PerformanceCountersFixture::TearDown(); } }; compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/windows/registry_reader_tests.cpp000066400000000000000000000226701422164147700331110ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/os_interface/windows/registry_reader_tests.h" #include "shared/source/os_interface/windows/sys_calls.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/test_macros/test.h" namespace NEO { using RegistryReaderTest = ::testing::Test; namespace SysCalls { extern uint32_t regOpenKeySuccessCount; extern uint32_t regQueryValueSuccessCount; extern uint64_t regQueryValueExpectedData; } // namespace SysCalls TEST_F(RegistryReaderTest, givenRegistryReaderWhenItIsCreatedWithUserScopeSetToFalseThenItsHkeyTypeIsInitializedToHkeyLocalMachine) { bool userScope = false; TestedRegistryReader registryReader(userScope); EXPECT_EQ(HKEY_LOCAL_MACHINE, registryReader.getHkeyType()); } TEST_F(RegistryReaderTest, givenRegistryReaderWhenItIsCreatedWithUserScopeSetToTrueThenItsHkeyTypeIsInitializedHkeyCurrentUser) { bool userScope = true; TestedRegistryReader registryReader(userScope); EXPECT_EQ(HKEY_CURRENT_USER, registryReader.getHkeyType()); } TEST_F(RegistryReaderTest, givenRegistryReaderWhenCallAppSpecificLocationThenReturnCurrentProcessName) { char buff[MAX_PATH]; GetModuleFileNameA(nullptr, buff, MAX_PATH); TestedRegistryReader registryReader(false); const char *ret = registryReader.appSpecificLocation("cl_cache_dir"); EXPECT_STREQ(buff, ret); } TEST_F(RegistryReaderTest, givenRegistryReaderWhenRegKeyNotExistThenReturnDefaultValue) { std::string regKey = "notExistPath"; std::string value = "defaultValue"; TestedRegistryReader registryReader(regKey); EXPECT_EQ(value, registryReader.getSetting("", value)); } TEST_F(RegistryReaderTest, givenRegistryReaderWhenItIsCreatedWithRegKeySpecifiedThenRegKeyIsInitializedAccordingly) { std::string regKey = "Software\\Intel\\IGFX\\OCL\\regKey"; TestedRegistryReader registryReader(regKey); EXPECT_STREQ(regKey.c_str(), registryReader.getRegKey()); } TEST_F(RegistryReaderTest, givenRegistryReaderWhenEnvironmentVariableExistsThenReturnCorrectValue) { char *envVar = "TestedEnvironmentVariable"; std::string value = "defaultValue"; TestedRegistryReader registryReader(""); EXPECT_EQ("TestedEnvironmentVariableValue", registryReader.getSetting(envVar, value)); } TEST_F(RegistryReaderTest, givenRegistryReaderWhenEnvironmentIntVariableExistsThenReturnCorrectValue) { char *envVar = "TestedEnvironmentIntVariable"; int32_t value = -1; TestedRegistryReader registryReader(""); EXPECT_EQ(1234, registryReader.getSetting(envVar, value)); } struct DebugReaderWithRegistryAndEnvTest : ::testing::Test { VariableBackup openRegCountBackup{&SysCalls::regOpenKeySuccessCount}; VariableBackup queryRegCountBackup{&SysCalls::regQueryValueSuccessCount}; TestedRegistryReader registryReader{""}; }; TEST_F(DebugReaderWithRegistryAndEnvTest, givenIntDebugKeyWhenReadFromRegistrySucceedsThenReturnObtainedValue) { SysCalls::regOpenKeySuccessCount = 1u; SysCalls::regQueryValueSuccessCount = 1u; EXPECT_EQ(1, registryReader.getSetting("settingSourceInt", 0)); } TEST_F(DebugReaderWithRegistryAndEnvTest, givenInt64DebugKeyWhenReadFromRegistrySucceedsThenReturnObtainedValue) { SysCalls::regOpenKeySuccessCount = 1u; SysCalls::regQueryValueSuccessCount = 1u; EXPECT_EQ(0xffffffffeeeeeeee, registryReader.getSetting("settingSourceInt64", 0)); } TEST_F(DebugReaderWithRegistryAndEnvTest, givenIntDebugKeyWhenQueryValueFailsThenObtainValueFromEnv) { SysCalls::regOpenKeySuccessCount = 1u; SysCalls::regQueryValueSuccessCount = 0u; EXPECT_EQ(2, registryReader.getSetting("settingSourceInt", 0)); } TEST_F(DebugReaderWithRegistryAndEnvTest, givenIntDebugKeyWhenOpenKeyFailsThenObtainValueFromEnv) { SysCalls::regOpenKeySuccessCount = 0u; SysCalls::regQueryValueSuccessCount = 0u; EXPECT_EQ(2, registryReader.getSetting("settingSourceInt", 0)); } TEST_F(DebugReaderWithRegistryAndEnvTest, givenStringDebugKeyWhenReadFromRegistrySucceedsThenReturnObtainedValue) { std::string defaultValue("default"); SysCalls::regOpenKeySuccessCount = 1u; SysCalls::regQueryValueSuccessCount = 2u; EXPECT_STREQ("registry", registryReader.getSetting("settingSourceString", defaultValue).c_str()); } TEST_F(DebugReaderWithRegistryAndEnvTest, givenStringDebugKeyWhenQueryValueFailsThenObtainValueFromEnv) { std::string defaultValue("default"); SysCalls::regOpenKeySuccessCount = 1u; SysCalls::regQueryValueSuccessCount = 0u; EXPECT_STREQ("environment", registryReader.getSetting("settingSourceString", defaultValue).c_str()); SysCalls::regOpenKeySuccessCount = 1u; SysCalls::regQueryValueSuccessCount = 1u; EXPECT_STREQ("environment", registryReader.getSetting("settingSourceString", defaultValue).c_str()); } TEST_F(DebugReaderWithRegistryAndEnvTest, givenStringDebugKeyWhenOpenKeyFailsThenObtainValueFromEnv) { std::string defaultValue("default"); SysCalls::regOpenKeySuccessCount = 0u; SysCalls::regQueryValueSuccessCount = 0u; EXPECT_STREQ("environment", registryReader.getSetting("settingSourceString", defaultValue).c_str()); } TEST_F(DebugReaderWithRegistryAndEnvTest, givenBinaryDebugKeyWhenReadFromRegistrySucceedsThenReturnObtainedValue) { std::string defaultValue("default"); SysCalls::regOpenKeySuccessCount = 1u; SysCalls::regQueryValueSuccessCount = 2u; EXPECT_STREQ("registry", registryReader.getSetting("settingSourceBinary", defaultValue).c_str()); } TEST_F(DebugReaderWithRegistryAndEnvTest, givenBinaryDebugKeyOnlyInRegistryWhenReadFromRegistryFailsThenReturnDefaultValue) { std::string defaultValue("default"); SysCalls::regOpenKeySuccessCount = 1u; SysCalls::regQueryValueSuccessCount = 1u; EXPECT_STREQ("default", registryReader.getSetting("settingSourceBinary", defaultValue).c_str()); SysCalls::regOpenKeySuccessCount = 1u; SysCalls::regQueryValueSuccessCount = 0u; EXPECT_STREQ("default", registryReader.getSetting("settingSourceBinary", defaultValue).c_str()); SysCalls::regOpenKeySuccessCount = 0u; SysCalls::regQueryValueSuccessCount = 0u; EXPECT_STREQ("default", registryReader.getSetting("settingSourceBinary", defaultValue).c_str()); } TEST_F(RegistryReaderTest, givenRegistryKeyPresentWhenValueIsZeroThenExpectBooleanFalse) { std::string regKey = "notExistPath"; std::string keyName = "boolRegistryKey"; bool defaultValue = false; SysCalls::regOpenKeySuccessCount = 1; SysCalls::regQueryValueSuccessCount = 1; SysCalls::regQueryValueExpectedData = 0ull; TestedRegistryReader registryReader(regKey); bool value = registryReader.getSetting(keyName.c_str(), defaultValue); EXPECT_FALSE(value); } TEST_F(RegistryReaderTest, givenRegistryKeyNotPresentWhenDefaulValueIsFalseOrTrueThenExpectReturnIsMatchingFalseOrTrue) { std::string regKey = "notExistPath"; std::string keyName = "boolRegistryKey"; bool defaultValue = false; SysCalls::regOpenKeySuccessCount = 1; SysCalls::regQueryValueSuccessCount = 0; SysCalls::regQueryValueExpectedData = 1ull; TestedRegistryReader registryReader(regKey); bool value = registryReader.getSetting(keyName.c_str(), defaultValue); EXPECT_FALSE(value); defaultValue = true; SysCalls::regOpenKeySuccessCount = 1; SysCalls::regQueryValueSuccessCount = 0; SysCalls::regQueryValueExpectedData = 0ull; value = registryReader.getSetting(keyName.c_str(), defaultValue); EXPECT_TRUE(value); } TEST_F(RegistryReaderTest, givenRegistryKeyPresentWhenValueIsNonZeroInHigherDwordThenExpectBooleanFalse) { std::string regKey = "notExistPath"; std::string keyName = "boolRegistryKey"; bool defaultValue = true; SysCalls::regOpenKeySuccessCount = 1; SysCalls::regQueryValueSuccessCount = 1; SysCalls::regQueryValueExpectedData = 1ull << 32; TestedRegistryReader registryReader(regKey); bool value = registryReader.getSetting(keyName.c_str(), defaultValue); EXPECT_FALSE(value); } TEST_F(RegistryReaderTest, givenRegistryKeyPresentWhenValueIsNonZeroInLowerDwordThenExpectBooleanTrue) { std::string regKey = "notExistPath"; std::string keyName = "boolRegistryKey"; bool defaultValue = false; SysCalls::regOpenKeySuccessCount = 1; SysCalls::regQueryValueSuccessCount = 1; SysCalls::regQueryValueExpectedData = 1ull; TestedRegistryReader registryReader(regKey); bool value = registryReader.getSetting(keyName.c_str(), defaultValue); EXPECT_TRUE(value); } TEST_F(RegistryReaderTest, givenRegistryKeyPresentWhenValueIsNonZeroInBothDwordsThenExpectBooleanTrue) { std::string regKey = "notExistPath"; std::string keyName = "boolRegistryKey"; bool defaultValue = false; SysCalls::regOpenKeySuccessCount = 1; SysCalls::regQueryValueSuccessCount = 1; SysCalls::regQueryValueExpectedData = 1ull | (1ull << 32); TestedRegistryReader registryReader(regKey); bool value = registryReader.getSetting(keyName.c_str(), defaultValue); EXPECT_TRUE(value); } TEST_F(DebugReaderWithRegistryAndEnvTest, givenSetProcessNameWhenReadFromEnvironmentVariableThenReturnClCacheDir) { SysCalls::regOpenKeySuccessCount = 0u; SysCalls::regQueryValueSuccessCount = 0u; registryReader.processName = "processName"; std::string defaultCacheDir = ""; std::string cacheDir = registryReader.getSetting("processName", defaultCacheDir); EXPECT_STREQ("./tested_cl_cache_dir", cacheDir.c_str()); } } // namespace NEOcompute-runtime-22.14.22890/opencl/test/unit_test/os_interface/windows/registry_reader_tests.h000066400000000000000000000026041422164147700325510ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/windows/debug_registry_reader.h" #include "opencl/source/os_interface/ocl_reg_path.h" namespace NEO { class TestedRegistryReader : public RegistryReader { public: TestedRegistryReader(bool userScope) : RegistryReader(userScope, oclRegPath){}; TestedRegistryReader(std::string regKey) : RegistryReader(false, regKey){}; HKEY getHkeyType() const { return hkeyType; } using RegistryReader::getSetting; using RegistryReader::processName; char *getenv(const char *envVar) override { if (strcmp(envVar, "TestedEnvironmentVariable") == 0) { return "TestedEnvironmentVariableValue"; } else if (strcmp(envVar, "TestedEnvironmentIntVariable") == 0) { return "1234"; } else if (strcmp(envVar, "settingSourceString") == 0) { return "environment"; } else if (strcmp(envVar, "settingSourceInt") == 0) { return "2"; } else if (strcmp(envVar, "processName") == 0) { return "processName"; } else if (strcmp(envVar, "cl_cache_dir") == 0) { return "./tested_cl_cache_dir"; } else { return nullptr; } } const char *getRegKey() const { return registryReadRootKey.c_str(); } }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/windows/self_lib_win.cpp000066400000000000000000000002361422164147700311230ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ extern "C" __declspec(dllexport) void selfDynamicLibraryFunc() { } compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/windows/wddm20_tests.cpp000066400000000000000000002363621422164147700310210ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/memory_manager/os_agnostic_memory_manager.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/os_interface/os_library.h" #include "shared/source/os_interface/os_time.h" #include "shared/source/os_interface/windows/driver_info_windows.h" #include "shared/source/os_interface/windows/os_context_win.h" #include "shared/source/os_interface/windows/os_environment_win.h" #include "shared/source/os_interface/windows/sys_calls.h" #include "shared/source/os_interface/windows/wddm/wddm_interface.h" #include "shared/source/os_interface/windows/wddm_allocation.h" #include "shared/source/os_interface/windows/wddm_engine_mapper.h" #include "shared/source/os_interface/windows/wddm_memory_manager.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/mocks/mock_gfx_partition.h" #include "shared/test/common/mocks/mock_gmm_resource_info.h" #include "shared/test/common/mocks/mock_io_functions.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/mocks/mock_wddm_residency_logger.h" #include "shared/test/common/os_interface/windows/ult_dxcore_factory.h" #include "shared/test/common/os_interface/windows/wddm_fixture.h" #include "opencl/test/unit_test/os_interface/windows/mock_wddm_allocation.h" #include "gtest/gtest.h" #include "mock_gmm_memory.h" #include #include #include #include namespace NEO { namespace SysCalls { extern const wchar_t *currentLibraryPath; } extern uint32_t numRootDevicesToEnum; std::unique_ptr createHwDeviceIdFromAdapterLuid(OsEnvironmentWin &osEnvironment, LUID adapterLuid); } // namespace NEO using namespace NEO; namespace GmmHelperFunctions { Gmm *getGmm(void *ptr, size_t size, GmmClientContext *clientContext) { size_t alignedSize = alignSizeWholePage(ptr, size); void *alignedPtr = alignUp(ptr, 4096); Gmm *gmm = new Gmm(clientContext, alignedPtr, alignedSize, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true); EXPECT_NE(gmm->gmmResourceInfo.get(), nullptr); return gmm; } } // namespace GmmHelperFunctions using Wddm20Tests = WddmTest; using Wddm20WithMockGdiDllTestsWithoutWddmInit = WddmTestWithMockGdiDll; using Wddm20InstrumentationTest = WddmInstrumentationTest; struct Wddm20WithMockGdiDllTests : public Wddm20WithMockGdiDllTestsWithoutWddmInit { using Wddm20WithMockGdiDllTestsWithoutWddmInit::TearDown; void SetUp() override { Wddm20WithMockGdiDllTestsWithoutWddmInit::SetUp(); init(); } }; TEST_F(Wddm20Tests, givenMinWindowsAddressWhenWddmIsInitializedThenWddmUseThisAddress) { uintptr_t expectedAddress = 0x200000; EXPECT_EQ(expectedAddress, NEO::windowsMinAddress); EXPECT_EQ(expectedAddress, wddm->getWddmMinAddress()); } TEST_F(Wddm20Tests, GivenExisitingContextWhenInitializingWddmThenCreateContextResultCalledIsStillOne) { EXPECT_EQ(1u, wddm->createContextResult.called); wddm->init(); EXPECT_EQ(1u, wddm->createContextResult.called); } TEST_F(Wddm20Tests, givenNullPageTableManagerAndCompressedResourceWhenMappingGpuVaThenDontUpdateAuxTable) { auto gmm = std::unique_ptr(new Gmm(getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); auto mockGmmRes = reinterpret_cast(gmm->gmmResourceInfo.get()); mockGmmRes->setUnifiedAuxTranslationCapable(); void *fakePtr = reinterpret_cast(0x100); WddmAllocation allocation(0, AllocationType::UNKNOWN, fakePtr, 0x2100, nullptr, MemoryPool::MemoryNull, 0u, 1u); allocation.setDefaultGmm(gmm.get()); allocation.getHandleToModify(0u) = ALLOCATION_HANDLE; EXPECT_TRUE(wddm->mapGpuVirtualAddress(&allocation)); } TEST(WddmDiscoverDevices, WhenNoHwDeviceIdIsProvidedToWddmThenWddmIsNotCreated) { struct MockWddm : public Wddm { MockWddm(std::unique_ptr &&hwDeviceIdIn, RootDeviceEnvironment &rootDeviceEnvironment) : Wddm(std::move(hwDeviceIdIn), rootDeviceEnvironment) {} }; MockExecutionEnvironment executionEnvironment; RootDeviceEnvironment rootDeviceEnvironment(executionEnvironment); EXPECT_THROW(auto wddm = std::make_unique(nullptr, rootDeviceEnvironment), std::exception); } TEST(WddmDiscoverDevices, WhenMultipleRootDevicesAreAvailableThenAllAreDiscovered) { VariableBackup backup{&numRootDevicesToEnum}; numRootDevicesToEnum = 3u; ExecutionEnvironment executionEnvironment; auto hwDeviceIds = OSInterface::discoverDevices(executionEnvironment); EXPECT_EQ(numRootDevicesToEnum, hwDeviceIds.size()); } TEST(WddmDiscoverDevices, givenMultipleRootDevicesExposedWhenCreateMultipleRootDevicesFlagIsSetToLowerValueThenDiscoverOnlySpecifiedNumberOfDevices) { DebugManagerStateRestore restorer{}; VariableBackup backup{&numRootDevicesToEnum}; numRootDevicesToEnum = 3u; uint32_t requestedNumRootDevices = 2u; DebugManager.flags.CreateMultipleRootDevices.set(requestedNumRootDevices); ExecutionEnvironment executionEnvironment; auto hwDeviceIds = OSInterface::discoverDevices(executionEnvironment); EXPECT_EQ(requestedNumRootDevices, hwDeviceIds.size()); } TEST(WddmDiscoverDevices, givenInvalidFirstAdapterWhenDiscoveringAdaptersThenReturnAllValidAdapters) { VariableBackup backup{&numRootDevicesToEnum, 2u}; VariableBackup backup2{&UltDXCoreAdapterList::firstInvalid, true}; ExecutionEnvironment executionEnvironment; auto hwDeviceIds = OSInterface::discoverDevices(executionEnvironment); EXPECT_EQ(1u, hwDeviceIds.size()); } TEST(WddmDiscoverDevices, givenMultipleRootDevicesExposedWhenCreateMultipleRootDevicesFlagIsSetToGreaterValueThenDiscoverSpecifiedNumberOfDevices) { DebugManagerStateRestore restorer{}; VariableBackup backup{&numRootDevicesToEnum}; numRootDevicesToEnum = 3u; uint32_t requestedNumRootDevices = 4u; DebugManager.flags.CreateMultipleRootDevices.set(requestedNumRootDevices); ExecutionEnvironment executionEnvironment; auto hwDeviceIds = OSInterface::discoverDevices(executionEnvironment); EXPECT_EQ(requestedNumRootDevices, hwDeviceIds.size()); } TEST(WddmDiscoverDevices, WhenAdapterDescriptionContainsVirtualRenderThenAdapterIsDiscovered) { VariableBackup descriptionBackup(&UltDxCoreAdapter::description); descriptionBackup = "Virtual Render"; ExecutionEnvironment executionEnvironment; auto hwDeviceIds = OSInterface::discoverDevices(executionEnvironment); EXPECT_EQ(1u, hwDeviceIds.size()); EXPECT_NE(nullptr, hwDeviceIds[0].get()); } TEST(Wddm20EnumAdaptersTest, WhenInitializingWddmThenHardwareInfoIsCorrectlyPopulated) { const HardwareInfo *hwInfo = defaultHwInfo.get(); std::unique_ptr mockGdiDll(setAdapterInfo(&hwInfo->platform, &hwInfo->gtSystemInfo, hwInfo->capabilityTable.gpuAddressSpace)); MockExecutionEnvironment executionEnvironment; RootDeviceEnvironment rootDeviceEnvironment(executionEnvironment); auto wddm = Wddm::createWddm(nullptr, rootDeviceEnvironment); bool success = wddm->init(); EXPECT_TRUE(success); EXPECT_EQ(rootDeviceEnvironment.getHardwareInfo()->platform.eDisplayCoreFamily, hwInfo->platform.eDisplayCoreFamily); } TEST(Wddm20EnumAdaptersTest, givenEmptyHardwareInfoWhenEnumAdapterIsCalledThenCapabilityTableIsSet) { const HardwareInfo *hwInfo = defaultHwInfo.get(); std::unique_ptr mockGdiDll(setAdapterInfo(&hwInfo->platform, &hwInfo->gtSystemInfo, hwInfo->capabilityTable.gpuAddressSpace)); ExecutionEnvironment executionEnvironment; executionEnvironment.prepareRootDeviceEnvironments(1); auto rootDeviceEnvironment = executionEnvironment.rootDeviceEnvironments[0].get(); auto wddm = Wddm::createWddm(nullptr, *rootDeviceEnvironment); bool success = wddm->init(); HardwareInfo outHwInfo = *rootDeviceEnvironment->getHardwareInfo(); EXPECT_TRUE(success); EXPECT_EQ(outHwInfo.platform.eDisplayCoreFamily, hwInfo->platform.eDisplayCoreFamily); EXPECT_EQ(outHwInfo.capabilityTable.defaultProfilingTimerResolution, hwInfo->capabilityTable.defaultProfilingTimerResolution); EXPECT_EQ(outHwInfo.capabilityTable.clVersionSupport, hwInfo->capabilityTable.clVersionSupport); EXPECT_EQ(outHwInfo.capabilityTable.kmdNotifyProperties.enableKmdNotify, hwInfo->capabilityTable.kmdNotifyProperties.enableKmdNotify); EXPECT_EQ(outHwInfo.capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds, hwInfo->capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds); EXPECT_EQ(outHwInfo.capabilityTable.kmdNotifyProperties.enableQuickKmdSleep, hwInfo->capabilityTable.kmdNotifyProperties.enableQuickKmdSleep); EXPECT_EQ(outHwInfo.capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds, hwInfo->capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds); } TEST(Wddm20EnumAdaptersTest, givenUnknownPlatformWhenEnumAdapterIsCalledThenFalseIsReturnedAndOutputIsEmpty) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.platform.eProductFamily = IGFX_UNKNOWN; std::unique_ptr mockGdiDll(setAdapterInfo(&hwInfo.platform, &hwInfo.gtSystemInfo, hwInfo.capabilityTable.gpuAddressSpace)); MockExecutionEnvironment executionEnvironment; RootDeviceEnvironment rootDeviceEnvironment(executionEnvironment); auto wddm = Wddm::createWddm(nullptr, rootDeviceEnvironment); auto ret = wddm->init(); EXPECT_FALSE(ret); // reset mock gdi hwInfo = *defaultHwInfo; mockGdiDll.reset(setAdapterInfo(&hwInfo.platform, &hwInfo.gtSystemInfo, hwInfo.capabilityTable.gpuAddressSpace)); } TEST_F(Wddm20Tests, whenInitializeWddmThenContextIsCreated) { auto context = osContext->getWddmContextHandle(); EXPECT_TRUE(context != static_cast(0)); } TEST_F(Wddm20Tests, whenCreatingContextWithPowerHintSuccessIsReturned) { auto newContext = osContext.get(); newContext->setUmdPowerHintValue(1); EXPECT_EQ(1, newContext->getUmdPowerHintValue()); wddm->createContext(*newContext); EXPECT_TRUE(wddm->createContext(*newContext)); } TEST_F(Wddm20Tests, whenInitPrivateDataThenDefaultValuesAreSet) { auto newContext = osContext.get(); CREATECONTEXT_PVTDATA PrivateData = initPrivateData(*newContext); EXPECT_FALSE(PrivateData.IsProtectedProcess); EXPECT_FALSE(PrivateData.IsDwm); EXPECT_TRUE(PrivateData.GpuVAContext); EXPECT_FALSE(PrivateData.IsMediaUsage); } TEST_F(Wddm20Tests, WhenCreatingAllocationAndDestroyingAllocationThenCorrectResultReturned) { OsAgnosticMemoryManager mm(*executionEnvironment); WddmAllocation allocation(0, AllocationType::UNKNOWN, mm.allocateSystemMemory(100, 0), 100, nullptr, MemoryPool::MemoryNull, 0u, 1u); Gmm *gmm = GmmHelperFunctions::getGmm(allocation.getUnderlyingBuffer(), allocation.getUnderlyingBufferSize(), getGmmClientContext()); allocation.setDefaultGmm(gmm); auto status = wddm->createAllocation(&allocation); EXPECT_EQ(STATUS_SUCCESS, status); EXPECT_TRUE(allocation.getDefaultHandle() != 0); auto error = wddm->destroyAllocation(&allocation, osContext.get()); EXPECT_TRUE(error); delete gmm; mm.freeSystemMemory(allocation.getUnderlyingBuffer()); } TEST_F(Wddm20WithMockGdiDllTests, givenAllocationSmallerUnderlyingThanAlignedSizeWhenCreatedThenWddmUseAligned) { void *ptr = reinterpret_cast(wddm->virtualAllocAddress + 0x1000); size_t underlyingSize = 0x2100; size_t alignedSize = 0x3000; size_t underlyingPages = underlyingSize / MemoryConstants::pageSize; size_t alignedPages = alignedSize / MemoryConstants::pageSize; WddmAllocation allocation(0, AllocationType::UNKNOWN, ptr, 0x2100, nullptr, MemoryPool::MemoryNull, 0u, 1u); Gmm *gmm = GmmHelperFunctions::getGmm(allocation.getAlignedCpuPtr(), allocation.getAlignedSize(), getGmmClientContext()); allocation.setDefaultGmm(gmm); auto status = wddm->createAllocation(&allocation); EXPECT_EQ(STATUS_SUCCESS, status); EXPECT_NE(0, allocation.getDefaultHandle()); bool ret = wddm->mapGpuVirtualAddress(&allocation); EXPECT_TRUE(ret); EXPECT_EQ(alignedPages, getLastCallMapGpuVaArgFcn()->SizeInPages); EXPECT_NE(underlyingPages, getLastCallMapGpuVaArgFcn()->SizeInPages); ret = wddm->destroyAllocation(&allocation, osContext.get()); EXPECT_TRUE(ret); delete gmm; } TEST_F(Wddm20WithMockGdiDllTests, givenReserveCallWhenItIsCalledWithProperParamtersThenAddressInRangeIsReturend) { auto sizeAlignedTo64Kb = 64 * KB; auto reservationAddress = wddm->reserveGpuVirtualAddress(wddm->getGfxPartition().Heap32[0].Base, wddm->getGfxPartition().Heap32[0].Limit, sizeAlignedTo64Kb); EXPECT_GE(reservationAddress, wddm->getGfxPartition().Heap32[0].Base); auto programmedReserved = getLastCallReserveGpuVaArgFcn(); EXPECT_EQ(0llu, programmedReserved->BaseAddress); EXPECT_EQ(wddm->getGfxPartition().Heap32[0].Base, programmedReserved->MinimumAddress); EXPECT_EQ(wddm->getGfxPartition().Heap32[0].Limit, programmedReserved->MaximumAddress); EXPECT_EQ(sizeAlignedTo64Kb, programmedReserved->Size); auto pagingQueue = wddm->getPagingQueue(); EXPECT_NE(0llu, pagingQueue); EXPECT_EQ(pagingQueue, programmedReserved->hPagingQueue); } TEST_F(Wddm20WithMockGdiDllTests, givenWddmAllocationWhenMappingGpuVaThenUseGmmSize) { void *fakePtr = reinterpret_cast(0x123); WddmAllocation allocation(0, AllocationType::UNKNOWN, fakePtr, 100, nullptr, MemoryPool::MemoryNull, 0u, 1u); std::unique_ptr gmm(GmmHelperFunctions::getGmm(allocation.getAlignedCpuPtr(), allocation.getAlignedSize(), getGmmClientContext())); allocation.setDefaultGmm(gmm.get()); auto status = wddm->createAllocation(&allocation); EXPECT_EQ(STATUS_SUCCESS, status); auto mockResourceInfo = static_cast(gmm->gmmResourceInfo.get()); mockResourceInfo->overrideReturnedSize(allocation.getAlignedSize() + (2 * MemoryConstants::pageSize)); wddm->mapGpuVirtualAddress(&allocation); uint64_t expectedSizeInPages = static_cast(mockResourceInfo->getSizeAllocation() / MemoryConstants::pageSize); EXPECT_EQ(expectedSizeInPages, getLastCallMapGpuVaArgFcn()->SizeInPages); } TEST_F(Wddm20Tests, givenGraphicsAllocationWhenItIsMappedInHeap0ThenItHasGpuAddressWithinHeapInternalLimits) { void *alignedPtr = (void *)0x12000; size_t alignedSize = 0x2000; std::unique_ptr gmm(GmmHelperFunctions::getGmm(alignedPtr, alignedSize, getGmmClientContext())); uint64_t gpuAddress = 0u; auto heapBase = wddm->getGfxPartition().Heap32[static_cast(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY)].Base; auto heapLimit = wddm->getGfxPartition().Heap32[static_cast(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY)].Limit; bool ret = wddm->mapGpuVirtualAddress(gmm.get(), ALLOCATION_HANDLE, heapBase, heapLimit, 0u, gpuAddress); EXPECT_TRUE(ret); auto cannonizedHeapBase = GmmHelper::canonize(heapBase); auto cannonizedHeapEnd = GmmHelper::canonize(heapLimit); EXPECT_GE(gpuAddress, cannonizedHeapBase); EXPECT_LE(gpuAddress, cannonizedHeapEnd); } TEST_F(Wddm20WithMockGdiDllTests, GivenInvalidCpuAddressWhenCheckingForGpuHangThenFalseIsReturned) { osContext->getResidencyController().getMonitoredFence().cpuAddress = nullptr; EXPECT_FALSE(wddm->isGpuHangDetected(*osContext)); } TEST_F(Wddm20WithMockGdiDllTests, GivenCpuValueDifferentThanGpuHangIndicationWhenCheckingForGpuHangThenFalseIsReturned) { constexpr auto cpuValue{777u}; ASSERT_NE(NEO::Wddm::gpuHangIndication, cpuValue); *osContext->getResidencyController().getMonitoredFence().cpuAddress = cpuValue; EXPECT_FALSE(wddm->isGpuHangDetected(*osContext)); } TEST_F(Wddm20WithMockGdiDllTests, GivenGpuHangIndicationWhenCheckingForGpuHangThenTrueIsReturned) { *osContext->getResidencyController().getMonitoredFence().cpuAddress = NEO::Wddm::gpuHangIndication; EXPECT_TRUE(wddm->isGpuHangDetected(*osContext)); } TEST_F(Wddm20WithMockGdiDllTests, GivenThreeOsHandlesWhenAskedForDestroyAllocationsThenAllMarkedAllocationsAreDestroyed) { OsHandleStorage storage; OsHandleWin osHandle1; OsHandleWin osHandle2; OsHandleWin osHandle3; osHandle1.handle = ALLOCATION_HANDLE; osHandle2.handle = ALLOCATION_HANDLE; osHandle3.handle = ALLOCATION_HANDLE; storage.fragmentStorageData[0].osHandleStorage = &osHandle1; storage.fragmentStorageData[0].freeTheFragment = true; storage.fragmentStorageData[1].osHandleStorage = &osHandle2; storage.fragmentStorageData[1].freeTheFragment = false; storage.fragmentStorageData[2].osHandleStorage = &osHandle3; storage.fragmentStorageData[2].freeTheFragment = true; D3DKMT_HANDLE handles[3] = {ALLOCATION_HANDLE, ALLOCATION_HANDLE, ALLOCATION_HANDLE}; bool retVal = wddm->destroyAllocations(handles, 3, 0); EXPECT_TRUE(retVal); auto destroyWithResourceHandleCalled = 0u; D3DKMT_DESTROYALLOCATION2 *ptrToDestroyAlloc2 = nullptr; getSizesFcn(destroyWithResourceHandleCalled, ptrToDestroyAlloc2); EXPECT_EQ(0u, ptrToDestroyAlloc2->Flags.SynchronousDestroy); EXPECT_EQ(1u, ptrToDestroyAlloc2->Flags.AssumeNotInUse); } TEST_F(Wddm20Tests, WhenMappingAndFreeingGpuVaThenReturnIsCorrect) { OsAgnosticMemoryManager mm(*executionEnvironment); WddmAllocation allocation(0, AllocationType::UNKNOWN, mm.allocateSystemMemory(100, 0), 100, nullptr, MemoryPool::MemoryNull, 0u, 1u); Gmm *gmm = GmmHelperFunctions::getGmm(allocation.getUnderlyingBuffer(), allocation.getUnderlyingBufferSize(), getGmmClientContext()); allocation.setDefaultGmm(gmm); auto status = wddm->createAllocation(&allocation); EXPECT_EQ(STATUS_SUCCESS, status); EXPECT_TRUE(allocation.getDefaultHandle() != 0); auto error = wddm->mapGpuVirtualAddress(&allocation); EXPECT_TRUE(error); EXPECT_TRUE(allocation.getGpuAddress() != 0); error = wddm->freeGpuVirtualAddress(allocation.getGpuAddressToModify(), allocation.getUnderlyingBufferSize()); EXPECT_TRUE(error); EXPECT_TRUE(allocation.getGpuAddress() == 0); error = wddm->destroyAllocation(&allocation, osContext.get()); EXPECT_TRUE(error); delete gmm; mm.freeSystemMemory(allocation.getUnderlyingBuffer()); } TEST_F(Wddm20Tests, givenNullAllocationWhenCreateThenAllocateAndMap) { OsAgnosticMemoryManager mm(*executionEnvironment); WddmAllocation allocation(0, AllocationType::UNKNOWN, nullptr, 100, nullptr, MemoryPool::MemoryNull, 0u, 1u); auto gmm = std::unique_ptr(GmmHelperFunctions::getGmm(allocation.getUnderlyingBuffer(), allocation.getUnderlyingBufferSize(), getGmmClientContext())); allocation.setDefaultGmm(gmm.get()); auto status = wddm->createAllocation(&allocation); EXPECT_EQ(STATUS_SUCCESS, status); bool ret = wddm->mapGpuVirtualAddress(&allocation); EXPECT_TRUE(ret); EXPECT_NE(0u, allocation.getGpuAddress()); EXPECT_EQ(allocation.getGpuAddress(), GmmHelper::canonize(allocation.getGpuAddress())); mm.freeSystemMemory(allocation.getUnderlyingBuffer()); } TEST_F(WddmTestWithMockGdiDll, givenShareableAllocationWhenCreateThenCreateResourceFlagIsEnabled) { init(); WddmAllocation allocation(0, AllocationType::UNKNOWN, nullptr, MemoryConstants::pageSize, nullptr, MemoryPool::MemoryNull, true, 1u); auto gmm = std::unique_ptr(GmmHelperFunctions::getGmm(nullptr, MemoryConstants::pageSize, getGmmClientContext())); allocation.setDefaultGmm(gmm.get()); auto status = wddm->createAllocation(&allocation); EXPECT_EQ(STATUS_SUCCESS, status); auto passedCreateAllocation = getMockAllocationFcn(); EXPECT_EQ(TRUE, passedCreateAllocation->Flags.CreateShared); EXPECT_EQ(TRUE, passedCreateAllocation->Flags.CreateResource); } TEST_F(WddmTestWithMockGdiDll, givenShareableAllocationWhenCreateThenSharedHandleAndResourceHandleAreSet) { init(); struct MockWddmMemoryManager : public WddmMemoryManager { using WddmMemoryManager::createGpuAllocationsWithRetry; using WddmMemoryManager::WddmMemoryManager; }; MemoryManagerCreate memoryManager(false, false, *executionEnvironment); WddmAllocation allocation(0, AllocationType::UNKNOWN, nullptr, MemoryConstants::pageSize, nullptr, MemoryPool::MemoryNull, true, 1u); auto gmm = std::unique_ptr(GmmHelperFunctions::getGmm(nullptr, MemoryConstants::pageSize, getGmmClientContext())); allocation.setDefaultGmm(gmm.get()); auto status = memoryManager.createGpuAllocationsWithRetry(&allocation); EXPECT_TRUE(status); EXPECT_NE(0u, allocation.peekInternalHandle(&memoryManager)); } TEST(WddmAllocationTest, whenAllocationIsShareableThenSharedHandleToModifyIsSharedHandleOfAllocation) { WddmAllocation allocation(0, AllocationType::UNKNOWN, nullptr, MemoryConstants::pageSize, nullptr, MemoryPool::MemoryNull, true, 1u); auto sharedHandleToModify = allocation.getSharedHandleToModify(); EXPECT_NE(nullptr, sharedHandleToModify); *sharedHandleToModify = 1234u; EXPECT_EQ(*sharedHandleToModify, allocation.peekInternalHandle(nullptr)); } TEST(WddmAllocationTest, whenAllocationIsNotShareableThenItDoesntReturnSharedHandleToModify) { WddmAllocation allocation(0, AllocationType::UNKNOWN, nullptr, MemoryConstants::pageSize, nullptr, MemoryPool::MemoryNull, false, 1u); auto sharedHandleToModify = allocation.getSharedHandleToModify(); EXPECT_EQ(nullptr, sharedHandleToModify); } TEST_F(Wddm20Tests, WhenMakingResidentAndEvictingThenReturnIsCorrect) { OsAgnosticMemoryManager mm(*executionEnvironment); WddmAllocation allocation(0, AllocationType::UNKNOWN, mm.allocateSystemMemory(100, 0), 100, nullptr, MemoryPool::MemoryNull, 0u, 1u); Gmm *gmm = GmmHelperFunctions::getGmm(allocation.getUnderlyingBuffer(), allocation.getUnderlyingBufferSize(), getGmmClientContext()); allocation.setDefaultGmm(gmm); auto status = wddm->createAllocation(&allocation); EXPECT_EQ(STATUS_SUCCESS, status); EXPECT_TRUE(allocation.getDefaultHandle() != 0); auto error = wddm->mapGpuVirtualAddress(&allocation); EXPECT_TRUE(error); EXPECT_TRUE(allocation.getGpuAddress() != 0); error = wddm->makeResident(&allocation.getHandles()[0], allocation.getNumGmms(), false, nullptr, allocation.getAlignedSize()); EXPECT_TRUE(error); uint64_t sizeToTrim; error = wddm->evict(&allocation.getHandles()[0], allocation.getNumGmms(), sizeToTrim); EXPECT_TRUE(error); auto monitoredFence = osContext->getResidencyController().getMonitoredFence(); UINT64 fenceValue = 100; monitoredFence.cpuAddress = &fenceValue; monitoredFence.currentFenceValue = 101; error = wddm->destroyAllocation(&allocation, osContext.get()); EXPECT_TRUE(error); delete gmm; mm.freeSystemMemory(allocation.getUnderlyingBuffer()); } TEST_F(Wddm20WithMockGdiDllTests, givenSharedHandleWhenCreateGraphicsAllocationFromSharedHandleIsCalledThenGraphicsAllocationWithSharedPropertiesIsCreated) { void *pSysMem = (void *)0x1000; std::unique_ptr gmm(new Gmm(getGmmClientContext(), pSysMem, 4096u, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); auto status = setSizesFcn(gmm->gmmResourceInfo.get(), 1u, 1024u, 1u); EXPECT_EQ(0u, status); MemoryManagerCreate mm(false, false, *executionEnvironment); AllocationProperties properties(0, false, 4096u, AllocationType::SHARED_BUFFER, false, {}); auto graphicsAllocation = mm.createGraphicsAllocationFromSharedHandle(ALLOCATION_HANDLE, properties, false, false); auto wddmAllocation = (WddmAllocation *)graphicsAllocation; ASSERT_NE(nullptr, wddmAllocation); EXPECT_EQ(ALLOCATION_HANDLE, wddmAllocation->peekSharedHandle()); EXPECT_EQ(RESOURCE_HANDLE, wddmAllocation->resourceHandle); EXPECT_NE(0u, wddmAllocation->getDefaultHandle()); EXPECT_EQ(ALLOCATION_HANDLE, wddmAllocation->getDefaultHandle()); EXPECT_NE(0u, wddmAllocation->getGpuAddress()); EXPECT_EQ(4096u, wddmAllocation->getUnderlyingBufferSize()); EXPECT_EQ(nullptr, wddmAllocation->getAlignedCpuPtr()); EXPECT_NE(nullptr, wddmAllocation->getDefaultGmm()); EXPECT_EQ(4096u, wddmAllocation->getDefaultGmm()->gmmResourceInfo->getSizeAllocation()); mm.freeGraphicsMemory(graphicsAllocation); auto destroyWithResourceHandleCalled = 0u; D3DKMT_DESTROYALLOCATION2 *ptrToDestroyAlloc2 = nullptr; status = getSizesFcn(destroyWithResourceHandleCalled, ptrToDestroyAlloc2); EXPECT_EQ(0u, ptrToDestroyAlloc2->Flags.SynchronousDestroy); EXPECT_EQ(1u, ptrToDestroyAlloc2->Flags.AssumeNotInUse); EXPECT_EQ(0u, status); EXPECT_EQ(1u, destroyWithResourceHandleCalled); } TEST_F(Wddm20WithMockGdiDllTests, givenSharedHandleWhenCreateGraphicsAllocationFromSharedHandleIsCalledThenMapGpuVaWithCpuPtrDepensOnBitness) { void *pSysMem = (void *)0x1000; std::unique_ptr gmm(new Gmm(getGmmClientContext(), pSysMem, 4096u, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); auto status = setSizesFcn(gmm->gmmResourceInfo.get(), 1u, 1024u, 1u); EXPECT_EQ(0u, status); MemoryManagerCreate mm(false, false, *executionEnvironment); AllocationProperties properties(0, false, 4096, AllocationType::SHARED_BUFFER, false, {}); auto graphicsAllocation = mm.createGraphicsAllocationFromSharedHandle(ALLOCATION_HANDLE, properties, false, false); auto wddmAllocation = (WddmAllocation *)graphicsAllocation; ASSERT_NE(nullptr, wddmAllocation); if (is32bit) { EXPECT_NE(wddm->mapGpuVirtualAddressResult.cpuPtrPassed, nullptr); } else { EXPECT_EQ(wddm->mapGpuVirtualAddressResult.cpuPtrPassed, nullptr); } mm.freeGraphicsMemory(graphicsAllocation); } TEST_F(Wddm20Tests, givenWddmCreatedWhenInitedThenMinAddressValid) { uintptr_t expected = windowsMinAddress; uintptr_t actual = wddm->getWddmMinAddress(); EXPECT_EQ(expected, actual); } HWTEST_F(Wddm20InstrumentationTest, WhenConfiguringDeviceAddressSpaceThenTrueIsReturned) { SYSTEM_INFO sysInfo = {}; WddmMock::getSystemInfo(&sysInfo); D3DKMT_HANDLE adapterHandle = ADAPTER_HANDLE; D3DKMT_HANDLE deviceHandle = DEVICE_HANDLE; const HardwareInfo hwInfo = *defaultHwInfo; BOOLEAN FtrL3IACoherency = hwInfo.featureTable.flags.ftrL3IACoherency ? 1 : 0; uintptr_t maxAddr = hwInfo.capabilityTable.gpuAddressSpace >= MemoryConstants::max64BitAppAddress ? reinterpret_cast(sysInfo.lpMaximumApplicationAddress) + 1 : 0; wddm->init(); EXPECT_EQ(1u, gmmMem->configureDeviceAddressSpaceCalled); EXPECT_EQ(adapterHandle, gmmMem->configureDeviceAddressSpaceParamsPassed[0].hAdapter); EXPECT_EQ(deviceHandle, gmmMem->configureDeviceAddressSpaceParamsPassed[0].hDevice); EXPECT_EQ(wddm->getGdi()->escape.mFunc, gmmMem->configureDeviceAddressSpaceParamsPassed[0].pfnEscape); EXPECT_EQ(maxAddr, gmmMem->configureDeviceAddressSpaceParamsPassed[0].svmSize); EXPECT_EQ(FtrL3IACoherency, gmmMem->configureDeviceAddressSpaceParamsPassed[0].bdwL3Coherency); } TEST_F(Wddm20InstrumentationTest, GivenNoAdapterWhenConfiguringDeviceAddressSpaceThenFalseIsReturned) { auto gdi = std::make_unique(); wddm->resetGdi(gdi.release()); auto ret = wddm->configureDeviceAddressSpace(); EXPECT_FALSE(ret); EXPECT_EQ(0u, gmmMem->configureDeviceAddressSpaceCalled); } TEST_F(Wddm20InstrumentationTest, GivenNoDeviceWhenConfiguringDeviceAddressSpaceThenFalseIsReturned) { wddm->device = static_cast(0); auto ret = wddm->configureDeviceAddressSpace(); EXPECT_FALSE(ret); EXPECT_EQ(0u, gmmMem->configureDeviceAddressSpaceCalled); } TEST_F(Wddm20InstrumentationTest, GivenNoEscFuncWhenConfiguringDeviceAddressSpaceThenFalseIsReturned) { wddm->getGdi()->escape = static_cast(nullptr); auto ret = wddm->configureDeviceAddressSpace(); EXPECT_FALSE(ret); EXPECT_EQ(0u, gmmMem->configureDeviceAddressSpaceCalled); } TEST_F(Wddm20Tests, WhenGettingMaxApplicationAddressThen32Or64BitIsCorrectlyReturned) { uint64_t maxAddr = wddm->getMaxApplicationAddress(); if (is32bit) { EXPECT_EQ(maxAddr, MemoryConstants::max32BitAppAddress); } else { EXPECT_EQ(maxAddr, MemoryConstants::max64BitAppAddress); } } TEST_F(Wddm20WithMockGdiDllTestsWithoutWddmInit, givenUseNoRingFlushesKmdModeDebugFlagToFalseWhenCreateContextIsCalledThenNoRingFlushesKmdModeIsSetToFalse) { DebugManagerStateRestore dbgRestore; DebugManager.flags.UseNoRingFlushesKmdMode.set(false); init(); auto createContextParams = this->getCreateContextDataFcn(); auto privateData = (CREATECONTEXT_PVTDATA *)createContextParams->pPrivateDriverData; EXPECT_FALSE(!!privateData->NoRingFlushes); } TEST_F(Wddm20WithMockGdiDllTestsWithoutWddmInit, givenCreateContextCallWhenDriverHintsThenItPointsToOpenCL) { init(); auto createContextParams = this->getCreateContextDataFcn(); EXPECT_EQ(D3DKMT_CLIENTHINT_OPENCL, createContextParams->ClientHint); } TEST_F(Wddm20WithMockGdiDllTestsWithoutWddmInit, givenUseNoRingFlushesKmdModeDebugFlagToTrueWhenCreateContextIsCalledThenNoRingFlushesKmdModeIsSetToTrue) { DebugManagerStateRestore dbgRestore; DebugManager.flags.UseNoRingFlushesKmdMode.set(true); init(); auto createContextParams = this->getCreateContextDataFcn(); auto privateData = (CREATECONTEXT_PVTDATA *)createContextParams->pPrivateDriverData; EXPECT_TRUE(!!privateData->NoRingFlushes); } TEST_F(Wddm20WithMockGdiDllTestsWithoutWddmInit, givenEngineTypeWhenCreatingContextThenPassCorrectNodeOrdinal) { init(); auto createContextParams = this->getCreateContextDataFcn(); UINT expected = WddmEngineMapper::engineNodeMap(HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo)[0].first); EXPECT_EQ(expected, createContextParams->NodeOrdinal); } TEST_F(Wddm20WithMockGdiDllTests, whenCreateContextIsCalledThenDisableHwQueues) { EXPECT_FALSE(wddm->wddmInterface->hwQueuesSupported()); EXPECT_EQ(0u, getCreateContextDataFcn()->Flags.HwQueueSupported); } TEST_F(Wddm20WithMockGdiDllTests, givenDestructionOsContextWinWhenCallingDestroyMonitorFenceThenDoCallGdiDestroy) { auto fenceHandle = osContext->getResidencyController().getMonitoredFence().fenceHandle; osContext.reset(nullptr); EXPECT_EQ(1u, wddmMockInterface->destroyMonitorFenceCalled); EXPECT_EQ(fenceHandle, getDestroySynchronizationObjectDataFcn()->hSyncObject); } TEST_F(Wddm20Tests, whenCreateHwQueueIsCalledThenAlwaysReturnFalse) { EXPECT_FALSE(wddm->wddmInterface->createHwQueue(*osContext.get())); } TEST_F(Wddm20Tests, whenWddmIsInitializedThenGdiDoesntHaveHwQueueDDIs) { EXPECT_EQ(nullptr, wddm->getGdi()->createHwQueue.mFunc); EXPECT_EQ(nullptr, wddm->getGdi()->destroyHwQueue.mFunc); EXPECT_EQ(nullptr, wddm->getGdi()->submitCommandToHwQueue.mFunc); } TEST(DebugFlagTest, givenDebugManagerWhenGetForUseNoRingFlushesKmdModeIsCalledThenTrueIsReturned) { EXPECT_TRUE(DebugManager.flags.UseNoRingFlushesKmdMode.get()); } TEST_F(Wddm20Tests, GivenMultipleHandlesWhenMakingResidentThenAllocationListIsCorrect) { D3DKMT_HANDLE handles[2] = {ALLOCATION_HANDLE, ALLOCATION_HANDLE}; gdi->getMakeResidentArg().NumAllocations = 0; gdi->getMakeResidentArg().AllocationList = nullptr; wddm->callBaseMakeResident = true; bool error = wddm->makeResident(handles, 2, false, nullptr, 0x1000); EXPECT_TRUE(error); EXPECT_EQ(2u, gdi->getMakeResidentArg().NumAllocations); EXPECT_EQ(handles, gdi->getMakeResidentArg().AllocationList); } TEST_F(Wddm20Tests, GivenMultipleHandlesWhenMakingResidentThenBytesToTrimIsCorrect) { D3DKMT_HANDLE handles[2] = {ALLOCATION_HANDLE, ALLOCATION_HANDLE}; gdi->getMakeResidentArg().NumAllocations = 0; gdi->getMakeResidentArg().AllocationList = nullptr; gdi->getMakeResidentArg().NumBytesToTrim = 30; wddm->callBaseMakeResident = true; uint64_t bytesToTrim = 0; bool success = wddm->makeResident(handles, 2, false, &bytesToTrim, 0x1000); EXPECT_TRUE(success); EXPECT_EQ(gdi->getMakeResidentArg().NumBytesToTrim, bytesToTrim); } TEST_F(Wddm20Tests, WhenMakingNonResidentThenEvictIsCalled) { D3DKMT_HANDLE handle = (D3DKMT_HANDLE)0x1234; gdi->getEvictArg().AllocationList = nullptr; gdi->getEvictArg().Flags.Value = 0; gdi->getEvictArg().hDevice = 0; gdi->getEvictArg().NumAllocations = 0; gdi->getEvictArg().NumBytesToTrim = 20; wddm->callBaseEvict = true; uint64_t sizeToTrim = 10; wddm->evict(&handle, 1, sizeToTrim); EXPECT_EQ(1u, gdi->getEvictArg().NumAllocations); EXPECT_EQ(&handle, gdi->getEvictArg().AllocationList); EXPECT_EQ(wddm->getDeviceHandle(), gdi->getEvictArg().hDevice); EXPECT_EQ(0u, gdi->getEvictArg().NumBytesToTrim); } TEST_F(Wddm20Tests, givenDestroyAllocationWhenItIsCalledThenAllocationIsPassedToDestroyAllocation) { MockWddmAllocation allocation(rootDeviceEnvironment->getGmmClientContext()); allocation.getResidencyData().updateCompletionData(10, osContext->getContextId()); allocation.handle = ALLOCATION_HANDLE; *osContext->getResidencyController().getMonitoredFence().cpuAddress = 10; gdi->getWaitFromCpuArg().FenceValueArray = nullptr; gdi->getWaitFromCpuArg().Flags.Value = 0; gdi->getWaitFromCpuArg().hDevice = (D3DKMT_HANDLE)0; gdi->getWaitFromCpuArg().ObjectCount = 0; gdi->getWaitFromCpuArg().ObjectHandleArray = nullptr; gdi->getDestroyArg().AllocationCount = 0; gdi->getDestroyArg().Flags.Value = 0; gdi->getDestroyArg().hDevice = (D3DKMT_HANDLE)0; gdi->getDestroyArg().hResource = (D3DKMT_HANDLE)0; gdi->getDestroyArg().phAllocationList = nullptr; wddm->destroyAllocation(&allocation, osContext.get()); EXPECT_EQ(wddm->getDeviceHandle(), gdi->getDestroyArg().hDevice); EXPECT_EQ(1u, gdi->getDestroyArg().AllocationCount); EXPECT_NE(nullptr, gdi->getDestroyArg().phAllocationList); } TEST_F(Wddm20Tests, WhenLastFenceLessEqualThanMonitoredThenWaitFromCpuIsNotCalled) { MockWddmAllocation allocation(rootDeviceEnvironment->getGmmClientContext()); allocation.getResidencyData().updateCompletionData(10, osContext->getContextId()); allocation.handle = ALLOCATION_HANDLE; *osContext->getResidencyController().getMonitoredFence().cpuAddress = 10; gdi->getWaitFromCpuArg().FenceValueArray = nullptr; gdi->getWaitFromCpuArg().Flags.Value = 0; gdi->getWaitFromCpuArg().hDevice = (D3DKMT_HANDLE)0; gdi->getWaitFromCpuArg().ObjectCount = 0; gdi->getWaitFromCpuArg().ObjectHandleArray = nullptr; auto status = wddm->waitFromCpu(10, osContext->getResidencyController().getMonitoredFence()); EXPECT_TRUE(status); EXPECT_EQ(nullptr, gdi->getWaitFromCpuArg().FenceValueArray); EXPECT_EQ((D3DKMT_HANDLE)0, gdi->getWaitFromCpuArg().hDevice); EXPECT_EQ(0u, gdi->getWaitFromCpuArg().ObjectCount); EXPECT_EQ(nullptr, gdi->getWaitFromCpuArg().ObjectHandleArray); } TEST_F(Wddm20Tests, WhenLastFenceGreaterThanMonitoredThenWaitFromCpuIsCalled) { MockWddmAllocation allocation(rootDeviceEnvironment->getGmmClientContext()); allocation.getResidencyData().updateCompletionData(10, osContext->getContextId()); allocation.handle = ALLOCATION_HANDLE; *osContext->getResidencyController().getMonitoredFence().cpuAddress = 10; gdi->getWaitFromCpuArg().FenceValueArray = nullptr; gdi->getWaitFromCpuArg().Flags.Value = 0; gdi->getWaitFromCpuArg().hDevice = (D3DKMT_HANDLE)0; gdi->getWaitFromCpuArg().ObjectCount = 0; gdi->getWaitFromCpuArg().ObjectHandleArray = nullptr; auto status = wddm->waitFromCpu(20, osContext->getResidencyController().getMonitoredFence()); EXPECT_TRUE(status); EXPECT_NE(nullptr, gdi->getWaitFromCpuArg().FenceValueArray); EXPECT_EQ((D3DKMT_HANDLE)wddm->getDeviceHandle(), gdi->getWaitFromCpuArg().hDevice); EXPECT_EQ(1u, gdi->getWaitFromCpuArg().ObjectCount); EXPECT_NE(nullptr, gdi->getWaitFromCpuArg().ObjectHandleArray); } TEST_F(Wddm20Tests, WhenCreatingMonitoredFenceThenItIsInitializedWithFenceValueZeroAndCurrentFenceValueIsSetToOne) { gdi->createSynchronizationObject2 = gdi->createSynchronizationObject2Mock; gdi->getCreateSynchronizationObject2Arg().Info.MonitoredFence.InitialFenceValue = 300; wddm->wddmInterface->createMonitoredFence(*osContext); EXPECT_EQ(0u, gdi->getCreateSynchronizationObject2Arg().Info.MonitoredFence.InitialFenceValue); EXPECT_EQ(1u, osContext->getResidencyController().getMonitoredFence().currentFenceValue); } NTSTATUS APIENTRY queryResourceInfoMock(D3DKMT_QUERYRESOURCEINFO *pData) { pData->NumAllocations = 0; return 0; } TEST_F(Wddm20Tests, givenOpenSharedHandleWhenZeroAllocationsThenReturnNull) { D3DKMT_HANDLE handle = 0; WddmAllocation *alloc = nullptr; gdi->queryResourceInfo = reinterpret_cast(queryResourceInfoMock); auto ret = wddm->openSharedHandle(handle, alloc); EXPECT_EQ(false, ret); } TEST_F(Wddm20Tests, whenCreateAllocation64kFailsThenReturnFalse) { struct FailingCreateAllocation { static NTSTATUS APIENTRY mockCreateAllocation2(D3DKMT_CREATEALLOCATION *param) { return STATUS_GRAPHICS_NO_VIDEO_MEMORY; }; }; gdi->createAllocation2 = FailingCreateAllocation::mockCreateAllocation2; void *fakePtr = reinterpret_cast(0x123); auto gmm = std::make_unique(rootDeviceEnvironment->getGmmClientContext(), fakePtr, 100, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, StorageInfo{}, true); WddmAllocation allocation(0, AllocationType::UNKNOWN, fakePtr, 100, nullptr, MemoryPool::MemoryNull, 0u, 1u); allocation.setDefaultGmm(gmm.get()); EXPECT_FALSE(wddm->createAllocation64k(&allocation)); } TEST_F(Wddm20Tests, givenReadOnlyMemoryWhenCreateAllocationFailsWithNoVideoMemoryThenCorrectStatusIsReturned) { class MockCreateAllocation { public: static NTSTATUS APIENTRY mockCreateAllocation2(D3DKMT_CREATEALLOCATION *param) { return STATUS_GRAPHICS_NO_VIDEO_MEMORY; }; }; gdi->createAllocation2 = MockCreateAllocation::mockCreateAllocation2; OsHandleStorage handleStorage; OsHandleWin handle; auto maxOsContextCount = 1u; ResidencyData residency(maxOsContextCount); handleStorage.fragmentCount = 1; handleStorage.fragmentStorageData[0].cpuPtr = (void *)0x1000; handleStorage.fragmentStorageData[0].fragmentSize = 0x1000; handleStorage.fragmentStorageData[0].freeTheFragment = false; handleStorage.fragmentStorageData[0].osHandleStorage = &handle; handleStorage.fragmentStorageData[0].residency = &residency; handle.gmm = GmmHelperFunctions::getGmm(nullptr, 0, getGmmClientContext()); NTSTATUS result = wddm->createAllocationsAndMapGpuVa(handleStorage); EXPECT_EQ(STATUS_GRAPHICS_NO_VIDEO_MEMORY, result); delete handle.gmm; } TEST_F(Wddm20Tests, whenContextIsInitializedThenApplyAdditionalContextFlagsIsCalled) { auto result = wddm->init(); EXPECT_TRUE(result); EXPECT_EQ(1u, wddm->applyAdditionalContextFlagsResult.called); } TEST_F(Wddm20Tests, givenTrimCallbackRegistrationIsDisabledInDebugVariableWhenRegisteringCallbackThenReturnNullptr) { DebugManagerStateRestore stateRestore; DebugManager.flags.DoNotRegisterTrimCallback.set(true); WddmResidencyController residencyController{*wddm, 0u}; EXPECT_EQ(nullptr, wddm->registerTrimCallback([](D3DKMT_TRIMNOTIFICATION *) {}, residencyController)); } TEST_F(Wddm20Tests, givenSuccessWhenRegisteringTrimCallbackThenReturnTrimCallbackHandle) { WddmResidencyController residencyController{*wddm, 0u}; auto trimCallbackHandle = wddm->registerTrimCallback([](D3DKMT_TRIMNOTIFICATION *) {}, residencyController); EXPECT_NE(nullptr, trimCallbackHandle); } TEST_F(Wddm20Tests, givenCorrectArgumentsWhenUnregisteringTrimCallbackThenPassArgumentsToGdiCall) { PFND3DKMT_TRIMNOTIFICATIONCALLBACK callback = [](D3DKMT_TRIMNOTIFICATION *) {}; auto trimCallbackHandle = reinterpret_cast(0x9876); wddm->unregisterTrimCallback(callback, trimCallbackHandle); EXPECT_EQ(callback, gdi->getUnregisterTrimNotificationArg().Callback); EXPECT_EQ(trimCallbackHandle, gdi->getUnregisterTrimNotificationArg().Handle); } TEST_F(Wddm20Tests, givenNullTrimCallbackHandleWhenUnregisteringTrimCallbackThenDoNotDoGdiCall) { PFND3DKMT_TRIMNOTIFICATIONCALLBACK callbackBefore = [](D3DKMT_TRIMNOTIFICATION *) {}; auto trimCallbackHandleBefore = reinterpret_cast(0x9876); gdi->getUnregisterTrimNotificationArg().Callback = callbackBefore; gdi->getUnregisterTrimNotificationArg().Handle = trimCallbackHandleBefore; wddm->unregisterTrimCallback([](D3DKMT_TRIMNOTIFICATION *) {}, nullptr); EXPECT_EQ(callbackBefore, gdi->getUnregisterTrimNotificationArg().Callback); EXPECT_EQ(trimCallbackHandleBefore, gdi->getUnregisterTrimNotificationArg().Handle); } using WddmLockWithMakeResidentTests = Wddm20Tests; TEST_F(WddmLockWithMakeResidentTests, givenAllocationThatDoesntNeedMakeResidentBeforeLockWhenLockThenDontStoreItOrCallMakeResident) { EXPECT_TRUE(mockTemporaryResources->resourceHandles.empty()); EXPECT_EQ(0u, wddm->makeResidentResult.called); wddm->lockResource(ALLOCATION_HANDLE, false, 0x1000); EXPECT_TRUE(mockTemporaryResources->resourceHandles.empty()); EXPECT_EQ(0u, wddm->makeResidentResult.called); wddm->unlockResource(ALLOCATION_HANDLE); } TEST_F(WddmLockWithMakeResidentTests, givenAllocationThatNeedsMakeResidentBeforeLockWhenLockThenCallBlockingMakeResident) { wddm->lockResource(ALLOCATION_HANDLE, true, 0x1000); EXPECT_EQ(1u, wddm->makeResidentResult.called); } TEST_F(WddmLockWithMakeResidentTests, givenAllocationWhenApplyBlockingMakeResidentThenAcquireUniqueLock) { wddm->temporaryResources->makeResidentResource(ALLOCATION_HANDLE, 0x1000); EXPECT_EQ(1u, mockTemporaryResources->acquireLockResult.called); EXPECT_EQ(reinterpret_cast(&mockTemporaryResources->resourcesLock), mockTemporaryResources->acquireLockResult.uint64ParamPassed); } TEST_F(WddmLockWithMakeResidentTests, givenAllocationWhenApplyBlockingMakeResidentThenCallMakeResidentAndStoreAllocation) { wddm->temporaryResources->makeResidentResource(ALLOCATION_HANDLE, 0x1000); EXPECT_EQ(1u, wddm->makeResidentResult.called); EXPECT_EQ(ALLOCATION_HANDLE, mockTemporaryResources->resourceHandles.back()); } TEST_F(WddmLockWithMakeResidentTests, givenAllocationWhenApplyBlockingMakeResidentThenWaitForCurrentPagingFenceValue) { wddm->callBaseMakeResident = true; wddm->mockPagingFence = 0u; wddm->temporaryResources->makeResidentResource(ALLOCATION_HANDLE, 0x1000); UINT64 expectedCallNumber = NEO::wddmResidencyLoggingAvailable ? MockGdi::pagingFenceReturnValue + 1 : 0ull; EXPECT_EQ(1u, wddm->makeResidentResult.called); EXPECT_EQ(MockGdi::pagingFenceReturnValue + 1, wddm->mockPagingFence); EXPECT_EQ(expectedCallNumber, wddm->getPagingFenceAddressResult.called); } TEST_F(WddmLockWithMakeResidentTests, givenAllocationWhenApplyBlockingMakeResidentAndMakeResidentCallFailsThenEvictTemporaryResourcesAndRetry) { MockWddmAllocation allocation(rootDeviceEnvironment->getGmmClientContext()); allocation.handle = 0x3; WddmMock mockWddm(*executionEnvironment->rootDeviceEnvironments[0]); mockWddm.makeResidentStatus = false; auto mockTemporaryResources = static_cast(mockWddm.temporaryResources.get()); mockWddm.temporaryResources->makeResidentResource(allocation.handle, 0x1000); EXPECT_EQ(1u, mockTemporaryResources->evictAllResourcesResult.called); EXPECT_EQ(allocation.handle, mockWddm.makeResidentResult.handlePack[0]); EXPECT_EQ(2u, mockWddm.makeResidentResult.called); } TEST_F(WddmLockWithMakeResidentTests, whenApplyBlockingMakeResidentAndTemporaryResourcesAreEvictedSuccessfullyThenCallMakeResidentOneMoreTime) { MockWddmAllocation allocation(rootDeviceEnvironment->getGmmClientContext()); allocation.handle = 0x3; WddmMock mockWddm(*executionEnvironment->rootDeviceEnvironments[0]); mockWddm.makeResidentStatus = false; auto mockTemporaryResources = static_cast(mockWddm.temporaryResources.get()); mockTemporaryResources->resourceHandles.push_back(allocation.handle); mockWddm.temporaryResources->makeResidentResource(allocation.handle, 0x1000); EXPECT_EQ(2u, mockTemporaryResources->evictAllResourcesResult.called); EXPECT_EQ(1u, mockWddm.evictResult.called); EXPECT_EQ(allocation.handle, mockWddm.makeResidentResult.handlePack[0]); EXPECT_EQ(3u, mockWddm.makeResidentResult.called); } TEST_F(WddmLockWithMakeResidentTests, whenApplyBlockingMakeResidentAndMakeResidentStillFailsThenDontStoreTemporaryResource) { MockWddmAllocation allocation(rootDeviceEnvironment->getGmmClientContext()); allocation.handle = 0x2; WddmMock mockWddm(*executionEnvironment->rootDeviceEnvironments[0]); mockWddm.makeResidentStatus = false; auto mockTemporaryResources = static_cast(mockWddm.temporaryResources.get()); mockTemporaryResources->resourceHandles.push_back(0x1); EXPECT_EQ(1u, mockTemporaryResources->resourceHandles.size()); mockWddm.temporaryResources->makeResidentResource(allocation.handle, 0x1000); EXPECT_EQ(0u, mockTemporaryResources->resourceHandles.size()); EXPECT_EQ(1u, mockWddm.evictResult.called); EXPECT_EQ(allocation.handle, mockWddm.makeResidentResult.handlePack[0]); EXPECT_EQ(3u, mockWddm.makeResidentResult.called); } TEST_F(WddmLockWithMakeResidentTests, whenApplyBlockingMakeResidentAndMakeResidentPassesAfterEvictThenStoreTemporaryResource) { MockWddmAllocation allocation(rootDeviceEnvironment->getGmmClientContext()); allocation.handle = 0x2; WddmMock mockWddm(*executionEnvironment->rootDeviceEnvironments[0]); mockWddm.makeResidentResults = {false, true}; auto mockTemporaryResources = static_cast(mockWddm.temporaryResources.get()); mockTemporaryResources->resourceHandles.push_back(0x1); EXPECT_EQ(1u, mockTemporaryResources->resourceHandles.size()); mockWddm.temporaryResources->makeResidentResource(allocation.handle, 0x1000); EXPECT_EQ(1u, mockTemporaryResources->resourceHandles.size()); EXPECT_EQ(0x2, mockTemporaryResources->resourceHandles.back()); EXPECT_EQ(1u, mockWddm.evictResult.called); EXPECT_EQ(allocation.handle, mockWddm.makeResidentResult.handlePack[0]); EXPECT_EQ(2u, mockWddm.makeResidentResult.called); } TEST_F(WddmLockWithMakeResidentTests, whenApplyBlockingMakeResidentAndMakeResidentPassesThenStoreTemporaryResource) { MockWddmAllocation allocation(rootDeviceEnvironment->getGmmClientContext()); allocation.handle = 0x2; WddmMock mockWddm(*executionEnvironment->rootDeviceEnvironments[0]); auto mockTemporaryResources = static_cast(mockWddm.temporaryResources.get()); mockTemporaryResources->resourceHandles.push_back(0x1); mockWddm.temporaryResources->makeResidentResource(allocation.handle, 0x1000); EXPECT_EQ(2u, mockTemporaryResources->resourceHandles.size()); EXPECT_EQ(0x2, mockTemporaryResources->resourceHandles.back()); EXPECT_EQ(allocation.handle, mockWddm.makeResidentResult.handlePack[0]); EXPECT_EQ(1u, mockWddm.makeResidentResult.called); } TEST_F(WddmLockWithMakeResidentTests, givenNoTemporaryResourcesWhenEvictingAllTemporaryResourcesThenEvictionIsNotApplied) { wddm->getTemporaryResourcesContainer()->evictAllResources(); EXPECT_EQ(MemoryOperationsStatus::MEMORY_NOT_FOUND, mockTemporaryResources->evictAllResourcesResult.operationSuccess); } TEST_F(WddmLockWithMakeResidentTests, whenEvictingAllTemporaryResourcesThenAcquireTemporaryResourcesLock) { wddm->getTemporaryResourcesContainer()->evictAllResources(); EXPECT_EQ(1u, mockTemporaryResources->acquireLockResult.called); EXPECT_EQ(reinterpret_cast(&mockTemporaryResources->resourcesLock), mockTemporaryResources->acquireLockResult.uint64ParamPassed); } TEST_F(WddmLockWithMakeResidentTests, whenEvictingAllTemporaryResourcesAndAllEvictionsSucceedThenReturnSuccess) { MockWddmAllocation allocation(rootDeviceEnvironment->getGmmClientContext()); WddmMock mockWddm(*executionEnvironment->rootDeviceEnvironments[0]); auto mockTemporaryResources = static_cast(mockWddm.temporaryResources.get()); mockTemporaryResources->resourceHandles.push_back(allocation.handle); mockWddm.getTemporaryResourcesContainer()->evictAllResources(); EXPECT_EQ(1u, mockTemporaryResources->evictAllResourcesResult.called); EXPECT_EQ(MemoryOperationsStatus::SUCCESS, mockTemporaryResources->evictAllResourcesResult.operationSuccess); EXPECT_EQ(1u, mockWddm.evictResult.called); } TEST_F(WddmLockWithMakeResidentTests, givenThreeAllocationsWhenEvictingAllTemporaryResourcesThenCallEvictForEachAllocationAndCleanList) { WddmMock mockWddm(*executionEnvironment->rootDeviceEnvironments[0]); auto mockTemporaryResources = static_cast(mockWddm.temporaryResources.get()); constexpr uint32_t numAllocations = 3u; for (auto i = 0u; i < numAllocations; i++) { mockTemporaryResources->resourceHandles.push_back(i); } mockWddm.getTemporaryResourcesContainer()->evictAllResources(); EXPECT_TRUE(mockTemporaryResources->resourceHandles.empty()); EXPECT_EQ(1u, mockWddm.evictResult.called); } TEST_F(WddmLockWithMakeResidentTests, givenThreeAllocationsWhenEvictingAllTemporaryResourcesAndOneOfThemFailsThenReturnFail) { WddmMock mockWddm(*executionEnvironment->rootDeviceEnvironments[0].get()); mockWddm.evictStatus = false; auto mockTemporaryResources = static_cast(mockWddm.temporaryResources.get()); constexpr uint32_t numAllocations = 3u; for (auto i = 0u; i < numAllocations; i++) { mockTemporaryResources->resourceHandles.push_back(i); } mockWddm.getTemporaryResourcesContainer()->evictAllResources(); EXPECT_EQ(MemoryOperationsStatus::FAILED, mockTemporaryResources->evictAllResourcesResult.operationSuccess); EXPECT_EQ(1u, mockWddm.evictResult.called); } TEST_F(WddmLockWithMakeResidentTests, givenNoTemporaryResourcesWhenEvictingTemporaryResourceThenEvictionIsNotApplied) { wddm->getTemporaryResourcesContainer()->evictResource(ALLOCATION_HANDLE); EXPECT_EQ(MemoryOperationsStatus::MEMORY_NOT_FOUND, mockTemporaryResources->evictResourceResult.operationSuccess); } TEST_F(WddmLockWithMakeResidentTests, whenEvictingTemporaryResourceThenAcquireTemporaryResourcesLock) { wddm->getTemporaryResourcesContainer()->evictResource(ALLOCATION_HANDLE); EXPECT_EQ(1u, mockTemporaryResources->acquireLockResult.called); EXPECT_EQ(reinterpret_cast(&mockTemporaryResources->resourcesLock), mockTemporaryResources->acquireLockResult.uint64ParamPassed); } TEST_F(WddmLockWithMakeResidentTests, whenEvictingNonExistingTemporaryResourceThenEvictIsNotAppliedAndTemporaryResourcesAreRestored) { mockTemporaryResources->resourceHandles.push_back(ALLOCATION_HANDLE); EXPECT_FALSE(mockTemporaryResources->resourceHandles.empty()); wddm->getTemporaryResourcesContainer()->evictResource(ALLOCATION_HANDLE + 1); EXPECT_FALSE(mockTemporaryResources->resourceHandles.empty()); EXPECT_EQ(MemoryOperationsStatus::MEMORY_NOT_FOUND, mockTemporaryResources->evictResourceResult.operationSuccess); } TEST_F(WddmLockWithMakeResidentTests, whenEvictingTemporaryResourceAndEvictFailsThenReturnFail) { WddmMock mockWddm(*executionEnvironment->rootDeviceEnvironments[0]); mockWddm.evictStatus = false; auto mockTemporaryResources = static_cast(mockWddm.temporaryResources.get()); mockTemporaryResources->resourceHandles.push_back(ALLOCATION_HANDLE); mockWddm.getTemporaryResourcesContainer()->evictResource(ALLOCATION_HANDLE); EXPECT_TRUE(mockTemporaryResources->resourceHandles.empty()); EXPECT_EQ(MemoryOperationsStatus::FAILED, mockTemporaryResources->evictResourceResult.operationSuccess); EXPECT_EQ(1u, mockWddm.evictResult.called); } TEST_F(WddmLockWithMakeResidentTests, whenEvictingTemporaryResourceAndEvictSucceedThenReturnSuccess) { WddmMock mockWddm(*executionEnvironment->rootDeviceEnvironments[0]); auto mockTemporaryResources = static_cast(mockWddm.temporaryResources.get()); mockTemporaryResources->resourceHandles.push_back(ALLOCATION_HANDLE); mockWddm.getTemporaryResourcesContainer()->evictResource(ALLOCATION_HANDLE); EXPECT_TRUE(mockTemporaryResources->resourceHandles.empty()); EXPECT_EQ(MemoryOperationsStatus::SUCCESS, mockTemporaryResources->evictResourceResult.operationSuccess); EXPECT_EQ(1u, mockWddm.evictResult.called); } TEST_F(WddmLockWithMakeResidentTests, whenEvictingTemporaryResourceThenOtherResourcesRemainOnTheList) { mockTemporaryResources->resourceHandles.push_back(0x1); mockTemporaryResources->resourceHandles.push_back(0x2); mockTemporaryResources->resourceHandles.push_back(0x3); wddm->getTemporaryResourcesContainer()->evictResource(0x2); EXPECT_EQ(2u, mockTemporaryResources->resourceHandles.size()); EXPECT_EQ(0x1, mockTemporaryResources->resourceHandles.front()); EXPECT_EQ(0x3, mockTemporaryResources->resourceHandles.back()); } TEST_F(WddmLockWithMakeResidentTests, whenAlllocationNeedsBlockingMakeResidentBeforeLockThenLockWithBlockingMakeResident) { WddmMemoryManager memoryManager(*executionEnvironment); MockWddmAllocation allocation(rootDeviceEnvironment->getGmmClientContext()); allocation.needsMakeResidentBeforeLock = false; memoryManager.lockResource(&allocation); EXPECT_EQ(1u, wddm->lockResult.called); EXPECT_EQ(0u, wddm->lockResult.uint64ParamPassed); memoryManager.unlockResource(&allocation); allocation.needsMakeResidentBeforeLock = true; memoryManager.lockResource(&allocation); EXPECT_EQ(2u, wddm->lockResult.called); EXPECT_EQ(1u, wddm->lockResult.uint64ParamPassed); memoryManager.unlockResource(&allocation); } using WddmGfxPartitionTest = Wddm20Tests; TEST_F(WddmGfxPartitionTest, WhenInitializingGfxPartitionThenAllHeapsAreInitialized) { MockGfxPartition gfxPartition; for (auto heap : MockGfxPartition::allHeapNames) { ASSERT_FALSE(gfxPartition.heapInitialized(heap)); } wddm->initGfxPartition(gfxPartition, 0, 1, false); for (auto heap : MockGfxPartition::allHeapNames) { if (!gfxPartition.heapInitialized(heap)) { EXPECT_TRUE(heap == HeapIndex::HEAP_SVM || heap == HeapIndex::HEAP_STANDARD2MB || heap == HeapIndex::HEAP_EXTENDED); } else { EXPECT_TRUE(gfxPartition.heapInitialized(heap)); } } } TEST(WddmGfxPartitionTests, WhenInitializingGfxPartitionThen64KBHeapsAreUsed) { struct MockWddm : public Wddm { using Wddm::gfxPartition; MockWddm(RootDeviceEnvironment &rootDeviceEnvironment) : Wddm(std::unique_ptr(OSInterface::discoverDevices(rootDeviceEnvironment.executionEnvironment)[0].release()->as()), rootDeviceEnvironment) {} }; MockExecutionEnvironment executionEnvironment; auto wddm = new MockWddm(*executionEnvironment.rootDeviceEnvironments[0]); uint32_t rootDeviceIndex = 3; size_t numRootDevices = 5; MockGfxPartition gfxPartition; wddm->init(); wddm->initGfxPartition(gfxPartition, rootDeviceIndex, numRootDevices, false); auto heapStandard64KBSize = alignDown((wddm->gfxPartition.Standard64KB.Limit - wddm->gfxPartition.Standard64KB.Base + 1) / numRootDevices, GfxPartition::heapGranularity); EXPECT_EQ(heapStandard64KBSize, gfxPartition.getHeapSize(HeapIndex::HEAP_STANDARD64KB)); EXPECT_EQ(wddm->gfxPartition.Standard64KB.Base + rootDeviceIndex * heapStandard64KBSize, gfxPartition.getHeapBase(HeapIndex::HEAP_STANDARD64KB)); } TEST(WddmGfxPartitionTests, givenGfxPartitionWhenInitializedThenInternalFrontWindowHeapIsAllocatedAtInternalHeapFront) { MockExecutionEnvironment executionEnvironment; auto wddm = new WddmMock(*executionEnvironment.rootDeviceEnvironments[0]); uint32_t rootDeviceIndex = 0; size_t numRootDevices = 1; MockGfxPartition gfxPartition; wddm->init(); wddm->initGfxPartition(gfxPartition, rootDeviceIndex, numRootDevices, false); EXPECT_EQ(gfxPartition.getHeapBase(HeapIndex::HEAP_INTERNAL_FRONT_WINDOW), gfxPartition.getHeapBase(HeapIndex::HEAP_INTERNAL)); EXPECT_EQ(gfxPartition.getHeapBase(HeapIndex::HEAP_INTERNAL_DEVICE_FRONT_WINDOW), gfxPartition.getHeapBase(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY)); auto frontWindowSize = GfxPartition::internalFrontWindowPoolSize; EXPECT_EQ(gfxPartition.getHeapSize(HeapIndex::HEAP_INTERNAL_FRONT_WINDOW), frontWindowSize); EXPECT_EQ(gfxPartition.getHeapSize(HeapIndex::HEAP_INTERNAL_DEVICE_FRONT_WINDOW), frontWindowSize); EXPECT_EQ(gfxPartition.getHeapMinimalAddress(HeapIndex::HEAP_INTERNAL_FRONT_WINDOW), gfxPartition.getHeapBase(HeapIndex::HEAP_INTERNAL_FRONT_WINDOW)); EXPECT_EQ(gfxPartition.getHeapMinimalAddress(HeapIndex::HEAP_INTERNAL_DEVICE_FRONT_WINDOW), gfxPartition.getHeapBase(HeapIndex::HEAP_INTERNAL_DEVICE_FRONT_WINDOW)); EXPECT_EQ(gfxPartition.getHeapMinimalAddress(HeapIndex::HEAP_INTERNAL), gfxPartition.getHeapBase(HeapIndex::HEAP_INTERNAL) + frontWindowSize); EXPECT_EQ(gfxPartition.getHeapMinimalAddress(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY), gfxPartition.getHeapBase(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY) + frontWindowSize); EXPECT_EQ(gfxPartition.getHeapLimit(HeapIndex::HEAP_INTERNAL), gfxPartition.getHeapBase(HeapIndex::HEAP_INTERNAL) + gfxPartition.getHeapSize(HeapIndex::HEAP_INTERNAL) - 1); EXPECT_EQ(gfxPartition.getHeapLimit(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY), gfxPartition.getHeapBase(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY) + gfxPartition.getHeapSize(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY) - 1); } TEST(WddmGfxPartitionTests, givenInternalFrontWindowHeapWhenAllocatingSmallOrBigChunkThenAddressFromFrontIsReturned) { MockExecutionEnvironment executionEnvironment; auto wddm = new WddmMock(*executionEnvironment.rootDeviceEnvironments[0]); uint32_t rootDeviceIndex = 0; size_t numRootDevices = 1; MockGfxPartition gfxPartition; wddm->init(); wddm->initGfxPartition(gfxPartition, rootDeviceIndex, numRootDevices, false); const size_t sizeSmall = MemoryConstants::pageSize64k; const size_t sizeBig = static_cast(gfxPartition.getHeapSize(HeapIndex::HEAP_INTERNAL_FRONT_WINDOW)) - MemoryConstants::pageSize64k; HeapIndex heaps[] = {HeapIndex::HEAP_INTERNAL_FRONT_WINDOW, HeapIndex::HEAP_INTERNAL_DEVICE_FRONT_WINDOW}; for (int i = 0; i < 2; i++) { size_t sizeToAlloc = sizeSmall; auto address = gfxPartition.heapAllocate(heaps[i], sizeToAlloc); EXPECT_EQ(gfxPartition.getHeapBase(heaps[i]), address); gfxPartition.heapFree(heaps[i], address, sizeToAlloc); sizeToAlloc = sizeBig; address = gfxPartition.heapAllocate(heaps[i], sizeToAlloc); EXPECT_EQ(gfxPartition.getHeapBase(heaps[i]), address); gfxPartition.heapFree(heaps[i], address, sizeToAlloc); } } TEST_F(Wddm20Tests, givenWddmWhenDiscoverDevicesAndFilterDeviceIdIsTheSameAsTheExistingDeviceThenReturnTheAdapter) { DebugManagerStateRestore stateRestore; DebugManager.flags.FilterDeviceId.set("1234"); // Existing device Id ExecutionEnvironment executionEnvironment; auto hwDeviceIds = OSInterface::discoverDevices(executionEnvironment); EXPECT_EQ(1u, hwDeviceIds.size()); EXPECT_NE(nullptr, hwDeviceIds[0].get()); } TEST_F(Wddm20Tests, givenWddmWhenDiscoverDevicesAndForceDeviceIdIsTheSameAsTheExistingDeviceThenReturnTheAdapter) { DebugManagerStateRestore stateRestore; DebugManager.flags.ForceDeviceId.set("1234"); // Existing device Id ExecutionEnvironment executionEnvironment; auto hwDeviceIds = OSInterface::discoverDevices(executionEnvironment); EXPECT_EQ(1u, hwDeviceIds.size()); EXPECT_NE(nullptr, hwDeviceIds[0].get()); } TEST_F(WddmTest, WhenFeatureFlagHwQueueIsDisabledThenReturnWddm20Version) { wddm->featureTable->flags.ftrWddmHwQueues = 0; EXPECT_EQ(WddmVersion::WDDM_2_0, wddm->getWddmVersion()); } TEST_F(WddmTest, WhenFeatureFlagHwQueueIsEnabledThenReturnWddm23Version) { wddm->featureTable->flags.ftrWddmHwQueues = 1; EXPECT_EQ(WddmVersion::WDDM_2_3, wddm->getWddmVersion()); } TEST_F(Wddm20WithMockGdiDllTests, GivenCreationSucceedWhenCreatingSeparateMonitorFenceThenReturnFilledStructure) { MonitoredFence monitorFence = {0}; bool ret = wddmMockInterface->createMonitoredFence(monitorFence); EXPECT_TRUE(ret); EXPECT_EQ(4u, monitorFence.fenceHandle); EXPECT_EQ(getMonitorFenceCpuFenceAddressFcn(), monitorFence.cpuAddress); } TEST_F(Wddm20WithMockGdiDllTests, GivenCreationFailWhenCreatingSeparateMonitorFenceThenReturnNotFilledStructure) { MonitoredFence monitorFence = {0}; *getCreateSynchronizationObject2FailCallFcn() = true; bool ret = wddmMockInterface->createMonitoredFence(monitorFence); EXPECT_FALSE(ret); EXPECT_EQ(0u, monitorFence.fenceHandle); void *retAddress = reinterpret_cast(0); EXPECT_EQ(retAddress, monitorFence.cpuAddress); } TEST_F(Wddm20WithMockGdiDllTests, WhenDestroyingSeparateMonitorFenceThenExpectGdiCalled) { MonitoredFence monitorFence = {0}; monitorFence.fenceHandle = 10u; wddmMockInterface->destroyMonitorFence(monitorFence); EXPECT_EQ(monitorFence.fenceHandle, getDestroySynchronizationObjectDataFcn()->hSyncObject); } namespace NEO { long __stdcall notifyAubCapture(void *csrHandle, uint64_t gfxAddress, size_t gfxSize, bool allocate); } TEST_F(Wddm20WithMockGdiDllTests, whenSetDeviceInfoSucceedsThenDeviceCallbacksArePassedToGmmMemory) { GMM_DEVICE_CALLBACKS_INT expectedDeviceCb{}; wddm->init(); auto gdi = wddm->getGdi(); auto gmmMemory = static_cast(wddm->getGmmMemory()); expectedDeviceCb.Adapter.KmtHandle = wddm->getAdapter(); expectedDeviceCb.hDevice.KmtHandle = wddm->getDeviceHandle(); expectedDeviceCb.hCsr = nullptr; expectedDeviceCb.PagingQueue = wddm->getPagingQueue(); expectedDeviceCb.PagingFence = wddm->getPagingQueueSyncObject(); expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnAllocate = gdi->createAllocation_; expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnDeallocate = gdi->destroyAllocation; expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnMapGPUVA = gdi->mapGpuVirtualAddress; expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnMakeResident = gdi->makeResident; expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnEvict = gdi->evict; expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnReserveGPUVA = gdi->reserveGpuVirtualAddress; expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnUpdateGPUVA = gdi->updateGpuVirtualAddress; expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnWaitFromCpu = gdi->waitForSynchronizationObjectFromCpu; expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnLock = gdi->lock2; expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnUnLock = gdi->unlock2; expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnEscape = gdi->escape; expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnFreeGPUVA = gdi->freeGpuVirtualAddress; expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnNotifyAubCapture = notifyAubCapture; EXPECT_EQ(expectedDeviceCb.Adapter.KmtHandle, gmmMemory->deviceCallbacks.Adapter.KmtHandle); EXPECT_EQ(expectedDeviceCb.hDevice.KmtHandle, gmmMemory->deviceCallbacks.hDevice.KmtHandle); EXPECT_EQ(expectedDeviceCb.hCsr, gmmMemory->deviceCallbacks.hCsr); EXPECT_EQ(expectedDeviceCb.PagingQueue, gmmMemory->deviceCallbacks.PagingQueue); EXPECT_EQ(expectedDeviceCb.PagingFence, gmmMemory->deviceCallbacks.PagingFence); EXPECT_EQ(expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnAllocate, gmmMemory->deviceCallbacks.DevCbPtrs.KmtCbPtrs.pfnAllocate); EXPECT_EQ(expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnDeallocate, gmmMemory->deviceCallbacks.DevCbPtrs.KmtCbPtrs.pfnDeallocate); EXPECT_EQ(expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnMapGPUVA, gmmMemory->deviceCallbacks.DevCbPtrs.KmtCbPtrs.pfnMapGPUVA); EXPECT_EQ(expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnMakeResident, gmmMemory->deviceCallbacks.DevCbPtrs.KmtCbPtrs.pfnMakeResident); EXPECT_EQ(expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnEvict, gmmMemory->deviceCallbacks.DevCbPtrs.KmtCbPtrs.pfnEvict); EXPECT_EQ(expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnReserveGPUVA, gmmMemory->deviceCallbacks.DevCbPtrs.KmtCbPtrs.pfnReserveGPUVA); EXPECT_EQ(expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnUpdateGPUVA, gmmMemory->deviceCallbacks.DevCbPtrs.KmtCbPtrs.pfnUpdateGPUVA); EXPECT_EQ(expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnWaitFromCpu, gmmMemory->deviceCallbacks.DevCbPtrs.KmtCbPtrs.pfnWaitFromCpu); EXPECT_EQ(expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnLock, gmmMemory->deviceCallbacks.DevCbPtrs.KmtCbPtrs.pfnLock); EXPECT_EQ(expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnUnLock, gmmMemory->deviceCallbacks.DevCbPtrs.KmtCbPtrs.pfnUnLock); EXPECT_EQ(expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnEscape, gmmMemory->deviceCallbacks.DevCbPtrs.KmtCbPtrs.pfnEscape); EXPECT_EQ(expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnFreeGPUVA, gmmMemory->deviceCallbacks.DevCbPtrs.KmtCbPtrs.pfnFreeGPUVA); EXPECT_EQ(expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnNotifyAubCapture, gmmMemory->deviceCallbacks.DevCbPtrs.KmtCbPtrs.pfnNotifyAubCapture); } TEST_F(Wddm20WithMockGdiDllTests, whenSetDeviceInfoFailsThenDeviceIsNotConfigured) { auto mockGmmMemory = new MockGmmMemoryBase(getGmmClientContext()); mockGmmMemory->setDeviceInfoResult = false; wddm->gmmMemory.reset(mockGmmMemory); wddm->init(); EXPECT_EQ(0u, mockGmmMemory->configureDeviceAddressSpaceCalled); } HWTEST_F(Wddm20WithMockGdiDllTests, givenNonGen12LPPlatformWhenConfigureDeviceAddressSpaceThenDontObtainMinAddress) { if (defaultHwInfo->platform.eRenderCoreFamily == IGFX_GEN12LP_CORE) { GTEST_SKIP(); } auto gmmMemory = new MockGmmMemoryBase(getGmmClientContext()); wddm->gmmMemory.reset(gmmMemory); wddm->init(); EXPECT_EQ(NEO::windowsMinAddress, wddm->getWddmMinAddress()); EXPECT_EQ(0u, gmmMemory->getInternalGpuVaRangeLimitCalled); } struct GdiWithMockedCloseFunc : public MockGdi { GdiWithMockedCloseFunc() : MockGdi() { closeAdapter = mockCloseAdapter; GdiWithMockedCloseFunc::closeAdapterCalled = 0u; GdiWithMockedCloseFunc::closeAdapterCalledArgPassed = 0u; } static NTSTATUS __stdcall mockCloseAdapter(IN CONST D3DKMT_CLOSEADAPTER *adapter) { closeAdapterCalled++; closeAdapterCalledArgPassed = adapter->hAdapter; return STATUS_SUCCESS; } static uint32_t closeAdapterCalled; static D3DKMT_HANDLE closeAdapterCalledArgPassed; }; uint32_t GdiWithMockedCloseFunc::closeAdapterCalled; D3DKMT_HANDLE GdiWithMockedCloseFunc::closeAdapterCalledArgPassed; TEST(HwDeviceId, whenHwDeviceIdIsDestroyedThenAdapterIsClosed) { auto gdi = std::make_unique(); auto osEnv = std::make_unique(); osEnv->gdi.reset(gdi.release()); D3DKMT_HANDLE adapter = 0x1234; { HwDeviceIdWddm hwDeviceId{adapter, {}, osEnv.get(), std::make_unique()}; } EXPECT_EQ(1u, GdiWithMockedCloseFunc::closeAdapterCalled); EXPECT_EQ(adapter, GdiWithMockedCloseFunc::closeAdapterCalledArgPassed); } TEST_F(WddmTest, WhenResidencyLoggingEnabledThenExpectLoggerCreated) { NEO::IoFunctions::mockFopenCalled = 0; NEO::IoFunctions::mockVfptrinfCalled = 0; NEO::IoFunctions::mockFcloseCalled = 0; DebugManagerStateRestore dbgRestore; DebugManager.flags.WddmResidencyLogger.set(true); wddm->createPagingFenceLogger(); EXPECT_NE(nullptr, wddm->residencyLogger.get()); wddm->residencyLogger.reset(); if (NEO::wddmResidencyLoggingAvailable) { EXPECT_EQ(1u, NEO::IoFunctions::mockFopenCalled); EXPECT_EQ(1u, NEO::IoFunctions::mockVfptrinfCalled); EXPECT_EQ(1u, NEO::IoFunctions::mockFcloseCalled); } } TEST_F(WddmTest, GivenResidencyLoggingEnabledWhenMakeResidentSuccessThenExpectSizeRapport) { if (!NEO::wddmResidencyLoggingAvailable) { GTEST_SKIP(); } NEO::IoFunctions::mockFopenCalled = 0; NEO::IoFunctions::mockVfptrinfCalled = 0; NEO::IoFunctions::mockFcloseCalled = 0; DebugManagerStateRestore dbgRestore; DebugManager.flags.WddmResidencyLogger.set(true); wddm->callBaseCreatePagingLogger = false; wddm->callBaseMakeResident = true; wddm->createPagingFenceLogger(); EXPECT_NE(nullptr, wddm->residencyLogger.get()); auto logger = static_cast(wddm->residencyLogger.get()); D3DKMT_HANDLE handle = ALLOCATION_HANDLE; uint64_t bytesToTrim = 0; wddm->makeResident(&handle, 1, false, &bytesToTrim, 0x1000); //2 - one for open log, second for allocation size EXPECT_EQ(2u, NEO::IoFunctions::mockVfptrinfCalled); EXPECT_TRUE(logger->makeResidentCall); EXPECT_EQ(MockGdi::pagingFenceReturnValue, logger->makeResidentPagingFence); } TEST_F(WddmTest, GivenResidencyLoggingEnabledWhenMakeResidentFailThenExpectTrimReport) { if (!NEO::wddmResidencyLoggingAvailable) { GTEST_SKIP(); } NEO::IoFunctions::mockFopenCalled = 0; NEO::IoFunctions::mockVfptrinfCalled = 0; NEO::IoFunctions::mockFcloseCalled = 0; DebugManagerStateRestore dbgRestore; DebugManager.flags.WddmResidencyLogger.set(true); wddm->callBaseCreatePagingLogger = false; wddm->callBaseMakeResident = true; wddm->createPagingFenceLogger(); EXPECT_NE(nullptr, wddm->residencyLogger.get()); auto logger = static_cast(wddm->residencyLogger.get()); D3DKMT_HANDLE handle = INVALID_HANDLE; uint64_t bytesToTrim = 0; wddm->makeResident(&handle, 1, false, &bytesToTrim, 0x1000); //3 - one for open log, second for report allocations, 3rd for trim size EXPECT_EQ(3u, NEO::IoFunctions::mockVfptrinfCalled); EXPECT_FALSE(logger->makeResidentCall); } TEST_F(WddmTest, GivenResidencyLoggingEnabledWhenEnterWaitCalledThenExpectInternalFlagOn) { if (!NEO::wddmResidencyLoggingAvailable) { GTEST_SKIP(); } NEO::IoFunctions::mockFopenCalled = 0; NEO::IoFunctions::mockVfptrinfCalled = 0; NEO::IoFunctions::mockFcloseCalled = 0; DebugManagerStateRestore dbgRestore; DebugManager.flags.WddmResidencyLogger.set(true); wddm->callBaseCreatePagingLogger = false; wddm->createPagingFenceLogger(); EXPECT_NE(nullptr, wddm->residencyLogger.get()); auto logger = static_cast(wddm->residencyLogger.get()); logger->enteredWait(); EXPECT_TRUE(logger->enterWait); } TEST_F(WddmTest, GivenResidencyLoggingEnabledWhenMakeResidentAndWaitPagingThenExpectFlagsOff) { if (!NEO::wddmResidencyLoggingAvailable) { GTEST_SKIP(); } NEO::IoFunctions::mockFopenCalled = 0; NEO::IoFunctions::mockVfptrinfCalled = 0; NEO::IoFunctions::mockFcloseCalled = 0; DebugManagerStateRestore dbgRestore; DebugManager.flags.WddmResidencyLogger.set(true); wddm->callBaseCreatePagingLogger = false; wddm->callBaseMakeResident = true; wddm->createPagingFenceLogger(); EXPECT_NE(nullptr, wddm->residencyLogger.get()); auto logger = static_cast(wddm->residencyLogger.get()); D3DKMT_HANDLE handle = ALLOCATION_HANDLE; uint64_t bytesToTrim = 0; wddm->makeResident(&handle, 1, false, &bytesToTrim, 0x1000); //2 - one for open log, second for allocation size EXPECT_EQ(2u, NEO::IoFunctions::mockVfptrinfCalled); EXPECT_TRUE(logger->makeResidentCall); EXPECT_EQ(MockGdi::pagingFenceReturnValue, logger->makeResidentPagingFence); logger->enterWait = true; wddm->waitOnPagingFenceFromCpu(); EXPECT_EQ(5u, NEO::IoFunctions::mockVfptrinfCalled); EXPECT_FALSE(logger->makeResidentCall); EXPECT_FALSE(logger->enterWait); EXPECT_EQ(MockGdi::pagingFenceReturnValue, logger->startWaitPagingFenceSave); } class MockRegistryReaderWithDriverStorePath : public SettingsReader { public: MockRegistryReaderWithDriverStorePath(const char *driverStorePathArg) : driverStorePath(driverStorePathArg){}; std::string getSetting(const char *settingName, const std::string &value) override { std::string key(settingName); if (key == "DriverStorePathForComputeRuntime") { return driverStorePath; } else if (key == "OpenCLDriverName") { return driverStorePath; } return value; } bool getSetting(const char *settingName, bool defaultValue) override { return defaultValue; }; int64_t getSetting(const char *settingName, int64_t defaultValue) override { return defaultValue; }; int32_t getSetting(const char *settingName, int32_t defaultValue) override { return defaultValue; }; const char *appSpecificLocation(const std::string &name) override { return name.c_str(); }; const std::string driverStorePath; }; TEST(DiscoverDevices, whenDriverInfoHasIncompatibleDriverStoreThenHwDeviceIdIsNotCreated) { VariableBackup createFuncBackup{&DriverInfoWindows::createRegistryReaderFunc}; DriverInfoWindows::createRegistryReaderFunc = [](const std::string &) -> std::unique_ptr { return std::make_unique("driverStore\\0x8086"); }; VariableBackup currentLibraryPathBackup(&SysCalls::currentLibraryPath); currentLibraryPathBackup = L"driverStore\\different_driverStore\\myLib.dll"; ExecutionEnvironment executionEnvironment; auto hwDeviceIds = OSInterface::discoverDevices(executionEnvironment); EXPECT_TRUE(hwDeviceIds.empty()); } TEST(DiscoverDevices, givenDifferentCaseInLibPathAndInDriverStorePathWhenDiscoveringDeviceThenHwDeviceIdIsCreated) { VariableBackup createFuncBackup{&DriverInfoWindows::createRegistryReaderFunc}; DriverInfoWindows::createRegistryReaderFunc = [](const std::string &) -> std::unique_ptr { return std::make_unique("\\SystemRoot\\driverStore\\0x8086"); }; VariableBackup currentLibraryPathBackup(&SysCalls::currentLibraryPath); currentLibraryPathBackup = L"\\SyStEmrOOt\\driverstore\\0x8086\\myLib.dll"; ExecutionEnvironment executionEnvironment; auto hwDeviceIds = OSInterface::discoverDevices(executionEnvironment); EXPECT_EQ(1u, hwDeviceIds.size()); } TEST(DiscoverDevices, givenLibFromHostDriverStoreAndRegistryWithDriverStoreWhenDiscoveringDeviceThenHwDeviceIdIsCreated) { VariableBackup createFuncBackup{&DriverInfoWindows::createRegistryReaderFunc}; DriverInfoWindows::createRegistryReaderFunc = [](const std::string &) -> std::unique_ptr { return std::make_unique("\\SystemRoot\\driverStore\\0x8086"); }; VariableBackup currentLibraryPathBackup(&SysCalls::currentLibraryPath); currentLibraryPathBackup = L"\\SystemRoot\\hostdriverStore\\0x8086\\myLib.dll"; ExecutionEnvironment executionEnvironment; auto hwDeviceIds = OSInterface::discoverDevices(executionEnvironment); EXPECT_EQ(1u, hwDeviceIds.size()); } TEST(DiscoverDevices, givenLibFromDriverStoreAndRegistryWithHostDriverStoreWhenDiscoveringDeviceThenHwDeviceIdIsCreated) { VariableBackup createFuncBackup{&DriverInfoWindows::createRegistryReaderFunc}; DriverInfoWindows::createRegistryReaderFunc = [](const std::string &) -> std::unique_ptr { return std::make_unique("\\SystemRoot\\driverStore\\0x8086"); }; VariableBackup currentLibraryPathBackup(&SysCalls::currentLibraryPath); currentLibraryPathBackup = L"\\SystemRoot\\hostdriverStore\\0x8086\\myLib.dll"; ExecutionEnvironment executionEnvironment; auto hwDeviceIds = OSInterface::discoverDevices(executionEnvironment); EXPECT_EQ(1u, hwDeviceIds.size()); } TEST(VerifyAdapterType, whenAdapterDoesntSupportRenderThenDontCreateHwDeviceId) { auto gdi = std::make_unique(); auto osEnv = std::make_unique(); osEnv->gdi.reset(gdi.release()); LUID shadowAdapterLuid = {0xdd, 0xdd}; auto hwDeviceId = createHwDeviceIdFromAdapterLuid(*osEnv, shadowAdapterLuid); EXPECT_EQ(nullptr, hwDeviceId.get()); } TEST(VerifyAdapterType, whenAdapterSupportsRenderThenCreateHwDeviceId) { auto gdi = std::make_unique(); auto osEnv = std::make_unique(); osEnv->gdi.reset(gdi.release()); LUID adapterLuid = {0x12, 0x1234}; auto hwDeviceId = createHwDeviceIdFromAdapterLuid(*osEnv, adapterLuid); EXPECT_NE(nullptr, hwDeviceId.get()); } TEST_F(WddmTestWithMockGdiDll, givenInvalidInputwhenSettingAllocationPriorityThenFalseIsReturned) { init(); EXPECT_FALSE(wddm->setAllocationPriority(nullptr, 0, DXGI_RESOURCE_PRIORITY_MAXIMUM)); EXPECT_FALSE(wddm->setAllocationPriority(nullptr, 5, DXGI_RESOURCE_PRIORITY_MAXIMUM)); { D3DKMT_HANDLE handles[] = {ALLOCATION_HANDLE, 0}; EXPECT_FALSE(wddm->setAllocationPriority(handles, 2, DXGI_RESOURCE_PRIORITY_MAXIMUM)); } } TEST_F(WddmTestWithMockGdiDll, givenValidInputwhenSettingAllocationPriorityThenTrueIsReturned) { init(); D3DKMT_HANDLE handles[] = {ALLOCATION_HANDLE, ALLOCATION_HANDLE + 1}; EXPECT_TRUE(wddm->setAllocationPriority(handles, 2, DXGI_RESOURCE_PRIORITY_MAXIMUM)); EXPECT_EQ(DXGI_RESOURCE_PRIORITY_MAXIMUM, getLastPriorityFcn()); EXPECT_TRUE(wddm->setAllocationPriority(handles, 2, DXGI_RESOURCE_PRIORITY_NORMAL)); EXPECT_EQ(DXGI_RESOURCE_PRIORITY_NORMAL, getLastPriorityFcn()); } TEST_F(WddmTestWithMockGdiDll, givenQueryAdapterInfoCallReturnsSuccesThenPciBusInfoIsValid) { ADAPTER_BDF queryAdapterBDF{}; queryAdapterBDF.Bus = 1; queryAdapterBDF.Device = 2; queryAdapterBDF.Function = 3; setAdapterBDFFcn(queryAdapterBDF); EXPECT_TRUE(wddm->queryAdapterInfo()); auto pciBusInfo = wddm->getPciBusInfo(); EXPECT_EQ(pciBusInfo.pciDomain, 0u); EXPECT_EQ(pciBusInfo.pciBus, 1u); EXPECT_EQ(pciBusInfo.pciDevice, 2u); EXPECT_EQ(pciBusInfo.pciFunction, 3u); } TEST_F(WddmTestWithMockGdiDll, givenQueryAdapterInfoCallReturnsInvalidAdapterBDFThenPciBusInfoIsNotValid) { ADAPTER_BDF queryAdapterBDF{}; queryAdapterBDF.Data = std::numeric_limits::max(); setAdapterBDFFcn(queryAdapterBDF); EXPECT_TRUE(wddm->queryAdapterInfo()); auto pciBusInfo = wddm->getPciBusInfo(); EXPECT_EQ(pciBusInfo.pciDomain, PhysicalDevicePciBusInfo::InvalidValue); EXPECT_EQ(pciBusInfo.pciBus, PhysicalDevicePciBusInfo::InvalidValue); EXPECT_EQ(pciBusInfo.pciDevice, PhysicalDevicePciBusInfo::InvalidValue); EXPECT_EQ(pciBusInfo.pciFunction, PhysicalDevicePciBusInfo::InvalidValue); } compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/windows/wddm23_tests.cpp000066400000000000000000000247671422164147700310300ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/preemption.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/constants.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/os_interface/windows/gdi_interface.h" #include "shared/source/os_interface/windows/os_context_win.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/engine_descriptor_helper.h" #include "shared/test/common/mocks/mock_wddm.h" #include "shared/test/common/mocks/mock_wddm_interface23.h" #include "shared/test/common/os_interface/windows/gdi_dll_fixture.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/mocks/mock_platform.h" using namespace NEO; struct Wddm23TestsWithoutWddmInit : public ::testing::Test, GdiDllFixture { void SetUp() override { GdiDllFixture::SetUp(); executionEnvironment = platform()->peekExecutionEnvironment(); wddm = static_cast(Wddm::createWddm(nullptr, *executionEnvironment->rootDeviceEnvironments[0].get())); auto &osInterface = executionEnvironment->rootDeviceEnvironments[0]->osInterface; osInterface = std::make_unique(); osInterface->setDriverModel(std::unique_ptr(wddm)); wddm->featureTable->flags.ftrWddmHwQueues = true; wddmMockInterface = new WddmMockInterface23(*wddm); wddm->wddmInterface.reset(wddmMockInterface); } void init() { auto preemptionMode = PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo); wddmMockInterface = static_cast(wddm->wddmInterface.release()); wddm->init(); wddm->wddmInterface.reset(wddmMockInterface); osContext = std::make_unique(*wddm, 0u, EngineDescriptorHelper::getDefaultDescriptor(HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo)[0], preemptionMode)); osContext->ensureContextInitialized(); } void TearDown() override { GdiDllFixture::TearDown(); } std::unique_ptr osContext; WddmMock *wddm = nullptr; WddmMockInterface23 *wddmMockInterface = nullptr; ExecutionEnvironment *executionEnvironment; }; struct Wddm23Tests : public Wddm23TestsWithoutWddmInit { using Wddm23TestsWithoutWddmInit::TearDown; void SetUp() override { Wddm23TestsWithoutWddmInit::SetUp(); init(); } }; TEST_F(Wddm23Tests, whenGetDedicatedVideoMemoryIsCalledThenCorrectValueIsReturned) { EXPECT_EQ(wddm->dedicatedVideoMemory, wddm->getDedicatedVideoMemory()); } TEST_F(Wddm23Tests, whenCreateContextIsCalledThenEnableHwQueues) { EXPECT_TRUE(wddm->wddmInterface->hwQueuesSupported()); EXPECT_EQ(1u, getCreateContextDataFcn()->Flags.HwQueueSupported); } TEST_F(Wddm23Tests, givenPreemptionModeWhenCreateHwQueueCalledThenSetGpuTimeoutIfEnabled) { auto defaultEngine = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo)[0]; OsContextWin osContextWithoutPreemption(*wddm, 0u, EngineDescriptorHelper::getDefaultDescriptor(defaultEngine, PreemptionMode::Disabled)); OsContextWin osContextWithPreemption(*wddm, 0, EngineDescriptorHelper::getDefaultDescriptor(defaultEngine, PreemptionMode::MidBatch)); wddm->wddmInterface->createHwQueue(osContextWithoutPreemption); EXPECT_EQ(0u, getCreateHwQueueDataFcn()->Flags.DisableGpuTimeout); wddm->wddmInterface->createHwQueue(osContextWithPreemption); EXPECT_EQ(1u, getCreateHwQueueDataFcn()->Flags.DisableGpuTimeout); } TEST_F(Wddm23Tests, whenDestroyHwQueueCalledThenPassExistingHandle) { D3DKMT_HANDLE hwQueue = 123; osContext->setHwQueue({hwQueue, 0, nullptr, 0}); wddmMockInterface->destroyHwQueue(osContext->getHwQueue().handle); EXPECT_EQ(hwQueue, getDestroyHwQueueDataFcn()->hHwQueue); hwQueue = 0; osContext->setHwQueue({hwQueue, 0, nullptr, 0}); wddmMockInterface->destroyHwQueue(osContext->getHwQueue().handle); EXPECT_NE(hwQueue, getDestroyHwQueueDataFcn()->hHwQueue); // gdi not called when 0 } TEST_F(Wddm23Tests, whenObjectIsDestructedThenDestroyHwQueue) { D3DKMT_HANDLE hwQueue = 123; osContext->setHwQueue({hwQueue, 0, nullptr, 0}); osContext.reset(); EXPECT_EQ(hwQueue, getDestroyHwQueueDataFcn()->hHwQueue); } TEST_F(Wddm23Tests, givenCmdBufferWhenSubmitCalledThenSetAllRequiredFiledsAndUpdateMonitoredFence) { uint64_t cmdBufferAddress = 123; size_t cmdSize = 456; auto hwQueue = osContext->getHwQueue(); COMMAND_BUFFER_HEADER cmdBufferHeader = {}; EXPECT_EQ(1u, osContext->getResidencyController().getMonitoredFence().currentFenceValue); EXPECT_EQ(0u, osContext->getResidencyController().getMonitoredFence().lastSubmittedFence); WddmSubmitArguments submitArgs = {}; submitArgs.contextHandle = osContext->getWddmContextHandle(); submitArgs.hwQueueHandle = hwQueue.handle; submitArgs.monitorFence = &osContext->getResidencyController().getMonitoredFence(); wddm->submit(cmdBufferAddress, cmdSize, &cmdBufferHeader, submitArgs); EXPECT_EQ(cmdBufferAddress, getSubmitCommandToHwQueueDataFcn()->CommandBuffer); EXPECT_EQ(static_cast(cmdSize), getSubmitCommandToHwQueueDataFcn()->CommandLength); EXPECT_EQ(hwQueue.handle, getSubmitCommandToHwQueueDataFcn()->hHwQueue); EXPECT_EQ(osContext->getResidencyController().getMonitoredFence().lastSubmittedFence, getSubmitCommandToHwQueueDataFcn()->HwQueueProgressFenceId); EXPECT_EQ(&cmdBufferHeader, getSubmitCommandToHwQueueDataFcn()->pPrivateDriverData); EXPECT_EQ(static_cast(sizeof(COMMAND_BUFFER_HEADER)), getSubmitCommandToHwQueueDataFcn()->PrivateDriverDataSize); EXPECT_EQ(0u, cmdBufferHeader.MonitorFenceVA); EXPECT_EQ(0u, cmdBufferHeader.MonitorFenceValue); EXPECT_EQ(2u, osContext->getResidencyController().getMonitoredFence().currentFenceValue); EXPECT_EQ(1u, osContext->getResidencyController().getMonitoredFence().lastSubmittedFence); } TEST_F(Wddm23Tests, givenDebugVariableSetWhenSubmitCalledThenUseCmdBufferHeaderSizeForPrivateDriverDataSize) { DebugManagerStateRestore restore; DebugManager.flags.UseCommandBufferHeaderSizeForWddmQueueSubmission.set(true); COMMAND_BUFFER_HEADER cmdBufferHeader = {}; WddmSubmitArguments submitArgs = {}; submitArgs.contextHandle = osContext->getWddmContextHandle(); submitArgs.hwQueueHandle = osContext->getHwQueue().handle; submitArgs.monitorFence = &osContext->getResidencyController().getMonitoredFence(); wddm->submit(123, 456, &cmdBufferHeader, submitArgs); EXPECT_EQ(static_cast(sizeof(COMMAND_BUFFER_HEADER)), getSubmitCommandToHwQueueDataFcn()->PrivateDriverDataSize); DebugManager.flags.UseCommandBufferHeaderSizeForWddmQueueSubmission.set(false); cmdBufferHeader = {}; submitArgs = {}; submitArgs.contextHandle = osContext->getWddmContextHandle(); submitArgs.hwQueueHandle = osContext->getHwQueue().handle; submitArgs.monitorFence = &osContext->getResidencyController().getMonitoredFence(); wddm->submit(123, 456, &cmdBufferHeader, submitArgs); EXPECT_EQ(static_cast(MemoryConstants::pageSize), getSubmitCommandToHwQueueDataFcn()->PrivateDriverDataSize); } TEST_F(Wddm23Tests, whenMonitoredFenceIsCreatedThenSetupAllRequiredFields) { wddm->wddmInterface->createMonitoredFence(*osContext); auto hwQueue = osContext->getHwQueue(); EXPECT_EQ(hwQueue.progressFenceCpuVA, osContext->getResidencyController().getMonitoredFence().cpuAddress); EXPECT_EQ(1u, osContext->getResidencyController().getMonitoredFence().currentFenceValue); EXPECT_EQ(hwQueue.progressFenceHandle, osContext->getResidencyController().getMonitoredFence().fenceHandle); EXPECT_EQ(hwQueue.progressFenceGpuVA, osContext->getResidencyController().getMonitoredFence().gpuAddress); EXPECT_EQ(0u, osContext->getResidencyController().getMonitoredFence().lastSubmittedFence); } TEST_F(Wddm23Tests, givenCurrentPendingFenceValueGreaterThanPendingFenceValueWhenSubmitCalledThenCallWaitOnGpu) { uint64_t cmdBufferAddress = 123; size_t cmdSize = 456; COMMAND_BUFFER_HEADER cmdBufferHeader = {}; WddmSubmitArguments submitArgs = {}; submitArgs.contextHandle = osContext->getWddmContextHandle(); submitArgs.hwQueueHandle = osContext->getHwQueue().handle; submitArgs.monitorFence = &osContext->getResidencyController().getMonitoredFence(); *wddm->pagingFenceAddress = 1; wddm->currentPagingFenceValue = 1; wddm->submit(cmdBufferAddress, cmdSize, &cmdBufferHeader, submitArgs); EXPECT_EQ(0u, wddm->waitOnGPUResult.called); wddm->currentPagingFenceValue = 2; wddm->submit(cmdBufferAddress, cmdSize, &cmdBufferHeader, submitArgs); EXPECT_EQ(1u, wddm->waitOnGPUResult.called); } TEST_F(Wddm23Tests, givenDestructionOsContextWinWhenCallingDestroyMonitorFenceThenDoNotCallGdiDestroy) { osContext.reset(nullptr); EXPECT_EQ(1u, wddmMockInterface->destroyMonitorFenceCalled); EXPECT_EQ(0u, getDestroySynchronizationObjectDataFcn()->hSyncObject); } TEST_F(Wddm23TestsWithoutWddmInit, whenInitCalledThenInitializeNewGdiDDIsAndCallToCreateHwQueue) { EXPECT_EQ(nullptr, wddm->getGdi()->createHwQueue.mFunc); EXPECT_EQ(nullptr, wddm->getGdi()->destroyHwQueue.mFunc); EXPECT_EQ(nullptr, wddm->getGdi()->submitCommandToHwQueue.mFunc); init(); EXPECT_EQ(1u, wddmMockInterface->createHwQueueCalled); EXPECT_NE(nullptr, wddm->getGdi()->createHwQueue.mFunc); EXPECT_NE(nullptr, wddm->getGdi()->destroyHwQueue.mFunc); EXPECT_NE(nullptr, wddm->getGdi()->submitCommandToHwQueue.mFunc); } TEST_F(Wddm23TestsWithoutWddmInit, whenCreateHwQueueFailedThenReturnFalseFromInit) { wddmMockInterface->forceCreateHwQueueFail = true; EXPECT_ANY_THROW(init()); } TEST_F(Wddm23TestsWithoutWddmInit, givenFailureOnGdiInitializationWhenCreatingHwQueueThenReturnFailure) { struct MyMockGdi : public Gdi { bool setupHwQueueProcAddresses() override { return false; } }; auto myMockGdi = new MyMockGdi(); wddm->resetGdi(myMockGdi); EXPECT_ANY_THROW(init()); EXPECT_EQ(1u, wddmMockInterface->createHwQueueCalled); EXPECT_FALSE(wddmMockInterface->createHwQueueResult); } compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/windows/wddm_address_space_tests.cpp000066400000000000000000000113221422164147700335220ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/preemption.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/mocks/mock_wddm.h" #include "shared/test/common/test_macros/test.h" using namespace NEO; class WddmMockReserveAddress : public WddmMock { public: WddmMockReserveAddress(RootDeviceEnvironment &rootDeviceEnvironment) : WddmMock(rootDeviceEnvironment) {} void *virtualAlloc(void *inPtr, size_t size, bool topDownHint) override { if (returnGood != 0) { return WddmMock::virtualAlloc(inPtr, size, topDownHint); } if (returnInvalidCount != 0) { returnInvalidIter++; if (returnInvalidIter > returnInvalidCount) { return WddmMock::virtualAlloc(inPtr, size, topDownHint); } if (returnNullCount != 0) { returnNullIter++; if (returnNullIter > returnNullCount) { return nullptr; } return reinterpret_cast(0x1000); } return reinterpret_cast(0x1000); } return nullptr; } void virtualFree(void *ptr, size_t size) override { if ((ptr == reinterpret_cast(0x1000)) || (ptr == reinterpret_cast(0x0))) { return; } return WddmMock::virtualFree(ptr, size); } uint32_t returnGood = 0; uint32_t returnInvalidCount = 0; uint32_t returnInvalidIter = 0; uint32_t returnNullCount = 0; uint32_t returnNullIter = 0; }; TEST(WddmReserveAddressTest, givenWddmWhenFirstIsSuccessfulThenReturnReserveAddress) { MockExecutionEnvironment executionEnvironment; RootDeviceEnvironment rootDeviceEnvironment(executionEnvironment); auto wddm = new WddmMockReserveAddress(rootDeviceEnvironment); size_t size = 0x1000; void *reserve = nullptr; wddm->init(); wddm->returnGood = 1; auto ret = wddm->reserveValidAddressRange(size, reserve); uintptr_t expectedReserve = wddm->virtualAllocAddress; EXPECT_TRUE(ret); EXPECT_EQ(expectedReserve, reinterpret_cast(reserve)); wddm->releaseReservedAddress(reserve); } TEST(WddmReserveAddressTest, givenWddmWhenFirstIsNullThenReturnNull) { MockExecutionEnvironment executionEnvironment; RootDeviceEnvironment rootDeviceEnvironment(executionEnvironment); auto wddm = new WddmMockReserveAddress(rootDeviceEnvironment); size_t size = 0x1000; void *reserve = nullptr; wddm->init(); uintptr_t expectedReserve = 0; auto ret = wddm->reserveValidAddressRange(size, reserve); EXPECT_FALSE(ret); EXPECT_EQ(expectedReserve, reinterpret_cast(reserve)); } TEST(WddmReserveAddressTest, givenWddmWhenFirstIsInvalidSecondSuccessfulThenReturnSecond) { MockExecutionEnvironment executionEnvironment; RootDeviceEnvironment rootDeviceEnvironment(executionEnvironment); auto wddm = new WddmMockReserveAddress(rootDeviceEnvironment); size_t size = 0x1000; void *reserve = nullptr; wddm->init(); wddm->returnInvalidCount = 1; auto ret = wddm->reserveValidAddressRange(size, reserve); uintptr_t expectedReserve = wddm->virtualAllocAddress; EXPECT_TRUE(ret); EXPECT_EQ(expectedReserve, reinterpret_cast(reserve)); wddm->releaseReservedAddress(reserve); } TEST(WddmReserveAddressTest, givenWddmWhenSecondIsInvalidThirdSuccessfulThenReturnThird) { MockExecutionEnvironment executionEnvironment; RootDeviceEnvironment rootDeviceEnvironment(executionEnvironment); auto wddm = new WddmMockReserveAddress(rootDeviceEnvironment); size_t size = 0x1000; void *reserve = nullptr; wddm->init(); wddm->returnInvalidCount = 2; auto ret = wddm->reserveValidAddressRange(size, reserve); uintptr_t expectedReserve = wddm->virtualAllocAddress; EXPECT_TRUE(ret); EXPECT_EQ(expectedReserve, reinterpret_cast(reserve)); wddm->releaseReservedAddress(reserve); } TEST(WddmReserveAddressTest, givenWddmWhenFirstIsInvalidSecondNullThenReturnSecondNull) { MockExecutionEnvironment executionEnvironment; RootDeviceEnvironment rootDeviceEnvironment(executionEnvironment); auto wddm = new WddmMockReserveAddress(rootDeviceEnvironment); size_t size = 0x1000; void *reserve = nullptr; wddm->init(); wddm->returnInvalidCount = 2; wddm->returnNullCount = 1; uintptr_t expectedReserve = 0; auto ret = wddm->reserveValidAddressRange(size, reserve); EXPECT_FALSE(ret); EXPECT_EQ(expectedReserve, reinterpret_cast(reserve)); } compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/windows/wddm_kmdaf_listener_tests.cpp000066400000000000000000000331161422164147700337160ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/preemption.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/windows/os_environment_win.h" #include "shared/source/os_interface/windows/wddm/wddm.h" #include "shared/source/os_interface/windows/wddm_allocation.h" #include "shared/test/common/mock_gdi/mock_gdi.h" #include "shared/test/common/mocks/windows/mock_gdi_interface.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/os_interface/windows/mock_kmdaf_listener.h" #include "opencl/test/unit_test/os_interface/windows/mock_wddm_allocation.h" using namespace NEO; class WddmWithKmDafMock : public Wddm { public: using Wddm::featureTable; using Wddm::mapGpuVirtualAddress; WddmWithKmDafMock(RootDeviceEnvironment &rootDeviceEnvironment) : Wddm(std::make_unique(ADAPTER_HANDLE, LUID{}, rootDeviceEnvironment.executionEnvironment.osEnvironment.get(), std::make_unique()), rootDeviceEnvironment) { kmDafListener.reset(new KmDafListenerMock); } KmDafListenerMock &getKmDafListenerMock() { return static_cast(*this->kmDafListener); } }; class WddmKmDafListenerTest : public ::testing::Test { public: void SetUp() { executionEnvironment = platform()->peekExecutionEnvironment(); rootDeviceEnvironment = executionEnvironment->rootDeviceEnvironments[0].get(); auto osEnvironment = new OsEnvironmentWin(); osEnvironment->gdi.reset(new MockGdi()); executionEnvironment->osEnvironment.reset(osEnvironment); wddmWithKmDafMock = new WddmWithKmDafMock(*rootDeviceEnvironment); wddmWithKmDafMock->init(); wddmWithKmDafMock->featureTable->flags.ftrKmdDaf = true; } void TearDown() { } WddmWithKmDafMock *wddmWithKmDafMock = nullptr; ExecutionEnvironment *executionEnvironment = nullptr; RootDeviceEnvironment *rootDeviceEnvironment = nullptr; }; TEST_F(WddmKmDafListenerTest, givenWddmWhenLockResourceIsCalledThenKmDafListenerNotifyLockIsFedWithCorrectParams) { wddmWithKmDafMock->lockResource(ALLOCATION_HANDLE, false, 0x1000); EXPECT_EQ(wddmWithKmDafMock->featureTable->flags.ftrKmdDaf, wddmWithKmDafMock->getKmDafListenerMock().notifyLockParametrization.ftrKmdDaf); EXPECT_EQ(wddmWithKmDafMock->getAdapter(), wddmWithKmDafMock->getKmDafListenerMock().notifyLockParametrization.hAdapter); EXPECT_EQ(wddmWithKmDafMock->getDeviceHandle(), wddmWithKmDafMock->getKmDafListenerMock().notifyLockParametrization.hDevice); EXPECT_EQ(ALLOCATION_HANDLE, wddmWithKmDafMock->getKmDafListenerMock().notifyLockParametrization.hAllocation); EXPECT_EQ(0, wddmWithKmDafMock->getKmDafListenerMock().notifyLockParametrization.pLockFlags); EXPECT_EQ(wddmWithKmDafMock->getGdi()->escape, wddmWithKmDafMock->getKmDafListenerMock().notifyLockParametrization.pfnEscape); } TEST_F(WddmKmDafListenerTest, givenWddmWhenUnlockResourceIsCalledThenKmDafListenerNotifyUnlockIsFedWithCorrectParams) { wddmWithKmDafMock->unlockResource(ALLOCATION_HANDLE); EXPECT_EQ(wddmWithKmDafMock->featureTable->flags.ftrKmdDaf, wddmWithKmDafMock->getKmDafListenerMock().notifyUnlockParametrization.ftrKmdDaf); EXPECT_EQ(wddmWithKmDafMock->getAdapter(), wddmWithKmDafMock->getKmDafListenerMock().notifyUnlockParametrization.hAdapter); EXPECT_EQ(wddmWithKmDafMock->getDeviceHandle(), wddmWithKmDafMock->getKmDafListenerMock().notifyUnlockParametrization.hDevice); EXPECT_EQ(ALLOCATION_HANDLE, *wddmWithKmDafMock->getKmDafListenerMock().notifyUnlockParametrization.phAllocation); EXPECT_EQ(1u, wddmWithKmDafMock->getKmDafListenerMock().notifyUnlockParametrization.allocations); EXPECT_EQ(wddmWithKmDafMock->getGdi()->escape, wddmWithKmDafMock->getKmDafListenerMock().notifyUnlockParametrization.pfnEscape); } TEST_F(WddmKmDafListenerTest, givenWddmWhenMapGpuVirtualAddressIsCalledThenKmDafListenerNotifyMapGpuVAIsFedWithCorrectParams) { uint64_t gpuPtr = 0u; auto gmm = std::make_unique(rootDeviceEnvironment->getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, StorageInfo{}, true); wddmWithKmDafMock->mapGpuVirtualAddress(gmm.get(), ALLOCATION_HANDLE, wddmWithKmDafMock->getGfxPartition().Standard.Base, wddmWithKmDafMock->getGfxPartition().Standard.Limit, 0u, gpuPtr); EXPECT_EQ(wddmWithKmDafMock->featureTable->flags.ftrKmdDaf, wddmWithKmDafMock->getKmDafListenerMock().notifyMapGpuVAParametrization.ftrKmdDaf); EXPECT_EQ(wddmWithKmDafMock->getAdapter(), wddmWithKmDafMock->getKmDafListenerMock().notifyMapGpuVAParametrization.hAdapter); EXPECT_EQ(wddmWithKmDafMock->getDeviceHandle(), wddmWithKmDafMock->getKmDafListenerMock().notifyMapGpuVAParametrization.hDevice); EXPECT_EQ(ALLOCATION_HANDLE, wddmWithKmDafMock->getKmDafListenerMock().notifyMapGpuVAParametrization.hAllocation); EXPECT_EQ(GmmHelper::decanonize(gpuPtr), wddmWithKmDafMock->getKmDafListenerMock().notifyMapGpuVAParametrization.gpuVirtualAddress); EXPECT_EQ(wddmWithKmDafMock->getGdi()->escape, wddmWithKmDafMock->getKmDafListenerMock().notifyMapGpuVAParametrization.pfnEscape); } TEST_F(WddmKmDafListenerTest, givenWddmWhenFreeGpuVirtualAddressIsCalledThenKmDafListenerNotifyUnmapGpuVAIsFedWithCorrectParams) { uint64_t gpuPtr = GPUVA; wddmWithKmDafMock->freeGpuVirtualAddress(gpuPtr, MemoryConstants::pageSize); EXPECT_EQ(wddmWithKmDafMock->featureTable->flags.ftrKmdDaf, wddmWithKmDafMock->getKmDafListenerMock().notifyUnmapGpuVAParametrization.ftrKmdDaf); EXPECT_EQ(wddmWithKmDafMock->getAdapter(), wddmWithKmDafMock->getKmDafListenerMock().notifyUnmapGpuVAParametrization.hAdapter); EXPECT_EQ(wddmWithKmDafMock->getDeviceHandle(), wddmWithKmDafMock->getKmDafListenerMock().notifyUnmapGpuVAParametrization.hDevice); EXPECT_EQ(GPUVA, wddmWithKmDafMock->getKmDafListenerMock().notifyUnmapGpuVAParametrization.gpuVirtualAddress); EXPECT_EQ(wddmWithKmDafMock->getGdi()->escape, wddmWithKmDafMock->getKmDafListenerMock().notifyUnmapGpuVAParametrization.pfnEscape); } TEST_F(WddmKmDafListenerTest, givenWddmWhenMakeResidentIsCalledThenKmDafListenerNotifyMakeResidentIsFedWithCorrectParams) { MockWddmAllocation allocation(rootDeviceEnvironment->getGmmClientContext()); wddmWithKmDafMock->makeResident(&allocation.handle, 1, false, nullptr, 0x1000); EXPECT_EQ(wddmWithKmDafMock->featureTable->flags.ftrKmdDaf, wddmWithKmDafMock->getKmDafListenerMock().notifyMakeResidentParametrization.ftrKmdDaf); EXPECT_EQ(wddmWithKmDafMock->getAdapter(), wddmWithKmDafMock->getKmDafListenerMock().notifyMakeResidentParametrization.hAdapter); EXPECT_EQ(wddmWithKmDafMock->getDeviceHandle(), wddmWithKmDafMock->getKmDafListenerMock().notifyMakeResidentParametrization.hDevice); EXPECT_EQ(allocation.handle, *wddmWithKmDafMock->getKmDafListenerMock().notifyMakeResidentParametrization.phAllocation); EXPECT_EQ(1u, wddmWithKmDafMock->getKmDafListenerMock().notifyMakeResidentParametrization.allocations); EXPECT_EQ(wddmWithKmDafMock->getGdi()->escape, wddmWithKmDafMock->getKmDafListenerMock().notifyMakeResidentParametrization.pfnEscape); } TEST_F(WddmKmDafListenerTest, givenWddmWhenEvictIsCalledThenKmDafListenerNotifyEvictIsFedWithCorrectParams) { MockWddmAllocation allocation(rootDeviceEnvironment->getGmmClientContext()); uint64_t sizeToTrim; wddmWithKmDafMock->evict(&allocation.handle, 1, sizeToTrim); EXPECT_EQ(wddmWithKmDafMock->featureTable->flags.ftrKmdDaf, wddmWithKmDafMock->getKmDafListenerMock().notifyEvictParametrization.ftrKmdDaf); EXPECT_EQ(wddmWithKmDafMock->getAdapter(), wddmWithKmDafMock->getKmDafListenerMock().notifyEvictParametrization.hAdapter); EXPECT_EQ(wddmWithKmDafMock->getDeviceHandle(), wddmWithKmDafMock->getKmDafListenerMock().notifyEvictParametrization.hDevice); EXPECT_EQ(allocation.handle, *wddmWithKmDafMock->getKmDafListenerMock().notifyEvictParametrization.phAllocation); EXPECT_EQ(1u, wddmWithKmDafMock->getKmDafListenerMock().notifyEvictParametrization.allocations); EXPECT_EQ(wddmWithKmDafMock->getGdi()->escape, wddmWithKmDafMock->getKmDafListenerMock().notifyEvictParametrization.pfnEscape); } TEST_F(WddmKmDafListenerTest, givenWddmWhenCreateAllocationIsCalledThenKmDafListenerNotifyWriteTargetIsFedWithCorrectParams) { auto gmm = std::make_unique(rootDeviceEnvironment->getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, StorageInfo{}, true); auto handle = 0u; auto resourceHandle = 0u; auto ptr = reinterpret_cast(0x10000); wddmWithKmDafMock->createAllocation(ptr, gmm.get(), handle, resourceHandle, nullptr); EXPECT_EQ(wddmWithKmDafMock->featureTable->flags.ftrKmdDaf, wddmWithKmDafMock->getKmDafListenerMock().notifyWriteTargetParametrization.ftrKmdDaf); EXPECT_EQ(wddmWithKmDafMock->getAdapter(), wddmWithKmDafMock->getKmDafListenerMock().notifyWriteTargetParametrization.hAdapter); EXPECT_EQ(wddmWithKmDafMock->getDeviceHandle(), wddmWithKmDafMock->getKmDafListenerMock().notifyWriteTargetParametrization.hDevice); EXPECT_EQ(handle, wddmWithKmDafMock->getKmDafListenerMock().notifyWriteTargetParametrization.hAllocation); EXPECT_EQ(wddmWithKmDafMock->getGdi()->escape, wddmWithKmDafMock->getKmDafListenerMock().notifyWriteTargetParametrization.pfnEscape); } TEST_F(WddmKmDafListenerTest, givenWddmWhenCreateAllocation64IsCalledThenKmDafListenerNotifyWriteTargetIsFedWithCorrectParams) { auto gmm = std::make_unique(rootDeviceEnvironment->getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, StorageInfo{}, true); auto handle = 0u; wddmWithKmDafMock->createAllocation(gmm.get(), handle); EXPECT_EQ(wddmWithKmDafMock->featureTable->flags.ftrKmdDaf, wddmWithKmDafMock->getKmDafListenerMock().notifyWriteTargetParametrization.ftrKmdDaf); EXPECT_EQ(wddmWithKmDafMock->getAdapter(), wddmWithKmDafMock->getKmDafListenerMock().notifyWriteTargetParametrization.hAdapter); EXPECT_EQ(wddmWithKmDafMock->getDeviceHandle(), wddmWithKmDafMock->getKmDafListenerMock().notifyWriteTargetParametrization.hDevice); EXPECT_EQ(handle, wddmWithKmDafMock->getKmDafListenerMock().notifyWriteTargetParametrization.hAllocation); EXPECT_EQ(wddmWithKmDafMock->getGdi()->escape, wddmWithKmDafMock->getKmDafListenerMock().notifyWriteTargetParametrization.pfnEscape); } TEST_F(WddmKmDafListenerTest, givenWddmWhenCreateAllocationsAndMapGpuVaIsCalledThenKmDafListenerNotifyWriteTargetAndMapGpuVAIsFedWithCorrectParams) { OsHandleStorage storage; OsHandleWin osHandle; auto gmm = std::unique_ptr(new Gmm(rootDeviceEnvironment->getGmmClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); storage.fragmentStorageData[0].osHandleStorage = &osHandle; storage.fragmentStorageData[0].fragmentSize = 100; static_cast(storage.fragmentStorageData[0].osHandleStorage)->gmm = gmm.get(); wddmWithKmDafMock->createAllocationsAndMapGpuVa(storage); EXPECT_EQ(wddmWithKmDafMock->featureTable->flags.ftrKmdDaf, wddmWithKmDafMock->getKmDafListenerMock().notifyWriteTargetParametrization.ftrKmdDaf); EXPECT_EQ(wddmWithKmDafMock->getAdapter(), wddmWithKmDafMock->getKmDafListenerMock().notifyWriteTargetParametrization.hAdapter); EXPECT_EQ(wddmWithKmDafMock->getDeviceHandle(), wddmWithKmDafMock->getKmDafListenerMock().notifyWriteTargetParametrization.hDevice); EXPECT_EQ(osHandle.handle, wddmWithKmDafMock->getKmDafListenerMock().notifyWriteTargetParametrization.hAllocation); EXPECT_EQ(wddmWithKmDafMock->getGdi()->escape, wddmWithKmDafMock->getKmDafListenerMock().notifyWriteTargetParametrization.pfnEscape); EXPECT_EQ(wddmWithKmDafMock->featureTable->flags.ftrKmdDaf, wddmWithKmDafMock->getKmDafListenerMock().notifyMapGpuVAParametrization.ftrKmdDaf); EXPECT_EQ(wddmWithKmDafMock->getAdapter(), wddmWithKmDafMock->getKmDafListenerMock().notifyMapGpuVAParametrization.hAdapter); EXPECT_EQ(wddmWithKmDafMock->getDeviceHandle(), wddmWithKmDafMock->getKmDafListenerMock().notifyMapGpuVAParametrization.hDevice); EXPECT_EQ(osHandle.handle, wddmWithKmDafMock->getKmDafListenerMock().notifyMapGpuVAParametrization.hAllocation); EXPECT_EQ(GmmHelper::decanonize(osHandle.gpuPtr), wddmWithKmDafMock->getKmDafListenerMock().notifyMapGpuVAParametrization.gpuVirtualAddress); EXPECT_EQ(wddmWithKmDafMock->getGdi()->escape, wddmWithKmDafMock->getKmDafListenerMock().notifyMapGpuVAParametrization.pfnEscape); } TEST_F(WddmKmDafListenerTest, givenWddmWhenKmDafLockIsCalledThenKmDafListenerNotifyLockIsFedWithCorrectParams) { wddmWithKmDafMock->kmDafLock(ALLOCATION_HANDLE); EXPECT_EQ(wddmWithKmDafMock->featureTable->flags.ftrKmdDaf, wddmWithKmDafMock->getKmDafListenerMock().notifyLockParametrization.ftrKmdDaf); EXPECT_EQ(wddmWithKmDafMock->getAdapter(), wddmWithKmDafMock->getKmDafListenerMock().notifyLockParametrization.hAdapter); EXPECT_EQ(wddmWithKmDafMock->getDeviceHandle(), wddmWithKmDafMock->getKmDafListenerMock().notifyLockParametrization.hDevice); EXPECT_EQ(ALLOCATION_HANDLE, wddmWithKmDafMock->getKmDafListenerMock().notifyLockParametrization.hAllocation); EXPECT_EQ(0, wddmWithKmDafMock->getKmDafListenerMock().notifyLockParametrization.pLockFlags); EXPECT_EQ(wddmWithKmDafMock->getGdi()->escape, wddmWithKmDafMock->getKmDafListenerMock().notifyLockParametrization.pfnEscape); } compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/windows/wddm_mapper_tests.cpp000066400000000000000000000042351422164147700322130ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/windows/wddm_engine_mapper.h" #include "shared/test/common/test_macros/test.h" using namespace NEO; TEST(WddmMapperTests, givenRcsEngineTypeWhenAskedForNodeOrdinalThenReturn3d) { GPUNODE_ORDINAL gpuNodeBcs = WddmEngineMapper::engineNodeMap(aub_stream::ENGINE_BCS); GPUNODE_ORDINAL gpuNodeRcs = WddmEngineMapper::engineNodeMap(aub_stream::ENGINE_RCS); GPUNODE_ORDINAL gpuNodeCcs = WddmEngineMapper::engineNodeMap(aub_stream::ENGINE_CCS); GPUNODE_ORDINAL gpuNodeCcs1 = WddmEngineMapper::engineNodeMap(aub_stream::ENGINE_CCS1); GPUNODE_ORDINAL gpuNodeCcs2 = WddmEngineMapper::engineNodeMap(aub_stream::ENGINE_CCS2); GPUNODE_ORDINAL gpuNodeCcs3 = WddmEngineMapper::engineNodeMap(aub_stream::ENGINE_CCS3); GPUNODE_ORDINAL gpuNodeCccs = WddmEngineMapper::engineNodeMap(aub_stream::ENGINE_CCCS); GPUNODE_ORDINAL expectedBcs = GPUNODE_BLT; GPUNODE_ORDINAL expectedRcs = GPUNODE_3D; GPUNODE_ORDINAL expectedCcs = GPUNODE_CCS0; GPUNODE_ORDINAL expectedCccs = GPUNODE_3D; EXPECT_EQ(expectedBcs, gpuNodeBcs); EXPECT_EQ(expectedRcs, gpuNodeRcs); EXPECT_EQ(expectedCcs, gpuNodeCcs); EXPECT_EQ(expectedCcs, gpuNodeCcs1); EXPECT_EQ(expectedCcs, gpuNodeCcs2); EXPECT_EQ(expectedCcs, gpuNodeCcs3); EXPECT_EQ(expectedCccs, gpuNodeCccs); } TEST(WddmMapperTests, givenLinkCopyEngineWhenMapperCalledThenReturnDefaultBltEngine) { const std::array bcsLinkEngines = {{aub_stream::ENGINE_BCS1, aub_stream::ENGINE_BCS2, aub_stream::ENGINE_BCS3, aub_stream::ENGINE_BCS4, aub_stream::ENGINE_BCS5, aub_stream::ENGINE_BCS6, aub_stream::ENGINE_BCS7, aub_stream::ENGINE_BCS8}}; for (auto engine : bcsLinkEngines) { EXPECT_EQ(GPUNODE_BLT, WddmEngineMapper::engineNodeMap(engine)); } } TEST(WddmMapperTests, givenNotSupportedEngineWhenAskedForNodeThenAbort) { EXPECT_THROW(WddmEngineMapper::engineNodeMap(aub_stream::ENGINE_VCS), std::exception); } wddm_memory_manager_allocate_in_device_pool_tests.cpp000066400000000000000000001021171422164147700405520ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/windows/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/array_count.h" #include "shared/source/os_interface/windows/wddm_memory_manager.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "opencl/test/unit_test/os_interface/windows/wddm_memory_manager_tests.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" #include "gtest/gtest.h" using namespace NEO; using namespace ::testing; TEST_F(WddmMemoryManagerSimpleTest, givenUseSystemMemorySetToTrueWhenAllocateInDevicePoolIsCalledThenNullptrIsReturned) { memoryManager.reset(new MockWddmMemoryManager(false, false, *executionEnvironment)); MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success; AllocationData allocData; allocData.size = MemoryConstants::pageSize; allocData.flags.useSystemMemory = true; allocData.flags.allocateMemory = true; auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocData, status); EXPECT_EQ(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::RetryInNonDevicePool, status); } TEST_F(WddmMemoryManagerSimpleTest, givenNotSetUseSystemMemoryWhenGraphicsAllocationInDevicePoolIsAllocatedThenLocalMemoryAllocationIsReturned) { const bool localMemoryEnabled = true; memoryManager = std::make_unique(false, localMemoryEnabled, *executionEnvironment); MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Error; AllocationData allocData; allocData.allFlags = 0; allocData.size = MemoryConstants::pageSize; allocData.flags.allocateMemory = true; auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocData, status); EXPECT_NE(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::Success, status); EXPECT_EQ(MemoryPool::LocalMemory, allocation->getMemoryPool()); EXPECT_EQ(0u, allocation->getDefaultGmm()->resourceParams.Flags.Info.NonLocalOnly); memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmMemoryManagerSimpleTest, givenShareableAllocationWhenAllocateInDevicePoolThenMemoryIsNotLocableAndLocalOnlyIsSet) { const bool localMemoryEnabled = true; NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get(); hwInfo.featureTable.flags.ftrLocalMemory = true; executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(&hwInfo); memoryManager = std::make_unique(false, localMemoryEnabled, *executionEnvironment); MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Error; AllocationData allocData; allocData.allFlags = 0; allocData.type = AllocationType::SVM_GPU; allocData.storageInfo.localOnlyRequired = true; allocData.size = MemoryConstants::pageSize; allocData.flags.allocateMemory = true; allocData.flags.shareable = true; allocData.storageInfo.memoryBanks = 2; allocData.storageInfo.systemMemoryPlacement = false; auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocData, status); EXPECT_NE(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::Success, status); EXPECT_EQ(MemoryPool::LocalMemory, allocation->getMemoryPool()); EXPECT_EQ(0u, allocation->getDefaultGmm()->resourceParams.Flags.Info.NonLocalOnly); EXPECT_NE(allocation->peekInternalHandle(memoryManager.get()), 0u); EXPECT_EQ(1u, allocation->getDefaultGmm()->resourceParams.Flags.Info.LocalOnly); EXPECT_EQ(1u, allocation->getDefaultGmm()->resourceParams.Flags.Info.NotLockable); memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmMemoryManagerSimpleTest, givenShareableAllocationWhenAllocateGraphicsMemoryInPreferredPoolThenMemoryIsNotLocableAndLocalOnlyIsSet) { const bool localMemoryEnabled = true; NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get(); hwInfo.featureTable.flags.ftrLocalMemory = true; executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(&hwInfo); memoryManager = std::make_unique(false, localMemoryEnabled, *executionEnvironment); AllocationProperties properties{mockRootDeviceIndex, MemoryConstants::pageSize, AllocationType::SVM_GPU, mockDeviceBitfield}; properties.allFlags = 0; properties.size = MemoryConstants::pageSize; properties.flags.allocateMemory = true; properties.flags.shareable = true; auto allocation = memoryManager->allocateGraphicsMemoryInPreferredPool(properties, nullptr); EXPECT_NE(nullptr, allocation); EXPECT_EQ(MemoryPool::LocalMemory, allocation->getMemoryPool()); EXPECT_EQ(0u, allocation->getDefaultGmm()->resourceParams.Flags.Info.NonLocalOnly); EXPECT_NE(allocation->peekInternalHandle(memoryManager.get()), 0u); EXPECT_EQ(1u, allocation->getDefaultGmm()->resourceParams.Flags.Info.LocalOnly); EXPECT_EQ(1u, allocation->getDefaultGmm()->resourceParams.Flags.Info.NotLockable); memoryManager->freeGraphicsMemory(allocation); } struct WddmMemoryManagerDevicePoolAlignmentTests : WddmMemoryManagerSimpleTest { void testAlignment(uint32_t allocationSize, uint32_t expectedAlignment) { const bool enable64kbPages = false; const bool localMemoryEnabled = true; memoryManager = std::make_unique(enable64kbPages, localMemoryEnabled, *executionEnvironment); MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Error; AllocationData allocData; allocData.allFlags = 0; allocData.size = allocationSize; allocData.flags.allocateMemory = true; auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocData, status); EXPECT_NE(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::Success, status); EXPECT_EQ(MemoryPool::LocalMemory, allocation->getMemoryPool()); EXPECT_EQ(0u, allocation->getDefaultGmm()->resourceParams.Flags.Info.NonLocalOnly); EXPECT_EQ(alignUp(allocationSize, expectedAlignment), allocation->getUnderlyingBufferSize()); EXPECT_EQ(expectedAlignment, allocation->getDefaultGmm()->resourceParams.BaseAlignment); memoryManager->freeGraphicsMemory(allocation); } DebugManagerStateRestore restore{}; }; TEST_F(WddmMemoryManagerDevicePoolAlignmentTests, givenCustomAlignmentAndAllocationAsBigAsTheAlignmentWhenAllocationInDevicePoolIsCreatedThenUseCustomAlignment) { const uint32_t customAlignment = 4 * MemoryConstants::pageSize64k; const uint32_t expectedAlignment = customAlignment; const uint32_t size = 4 * MemoryConstants::pageSize64k; DebugManager.flags.ExperimentalEnableCustomLocalMemoryAlignment.set(customAlignment); testAlignment(size, expectedAlignment); testAlignment(size + 1, expectedAlignment); } TEST_F(WddmMemoryManagerDevicePoolAlignmentTests, givenCustomAlignmentAndAllocationNotAsBigAsTheAlignmentWhenAllocationInDevicePoolIsCreatedThenDoNotUseCustomAlignment) { const uint32_t customAlignment = 4 * MemoryConstants::pageSize64k; const uint32_t expectedAlignment = MemoryConstants::pageSize64k; const uint32_t size = 3 * MemoryConstants::pageSize64k; DebugManager.flags.ExperimentalEnableCustomLocalMemoryAlignment.set(customAlignment); testAlignment(size, expectedAlignment); } TEST_F(WddmMemoryManagerDevicePoolAlignmentTests, givenCustomAlignmentBiggerThan2MbAndAllocationBiggerThanCustomAlignmentWhenAllocationInDevicePoolIsCreatedThenUseCustomAlignment) { const uint32_t customAlignment = 4 * MemoryConstants::megaByte; const uint32_t expectedAlignment = customAlignment; const uint32_t size = 4 * MemoryConstants::megaByte; DebugManager.flags.ExperimentalEnableCustomLocalMemoryAlignment.set(customAlignment); testAlignment(size, expectedAlignment); testAlignment(size + 1, expectedAlignment); } TEST_F(WddmMemoryManagerDevicePoolAlignmentTests, givenCustomAlignmentBiggerThan2MbAndAllocationLessThanCustomAlignmentWhenAllocationInDevicePoolIsCreatedThenDoNotUseCustomAlignment) { const uint32_t customAlignment = 4 * MemoryConstants::megaByte; const uint32_t expectedAlignment = 2 * MemoryConstants::megaByte; const uint32_t size = 4 * MemoryConstants::megaByte - 1; DebugManager.flags.ExperimentalEnableCustomLocalMemoryAlignment.set(customAlignment); testAlignment(size, expectedAlignment); } TEST_F(WddmMemoryManagerDevicePoolAlignmentTests, givenAllocationLessThen2MbWhenAllocationInDevicePoolIsCreatedThenUse64KbAlignment) { const uint32_t expectedAlignment = MemoryConstants::pageSize64k; const uint32_t size = 2 * MemoryConstants::megaByte - 1; testAlignment(size, expectedAlignment); } TEST_F(WddmMemoryManagerDevicePoolAlignmentTests, givenTooMuchMemoryWastedOn2MbAlignmentWhenAllocationInDevicePoolIsCreatedThenUse64kbAlignment) { const float threshold = 0.1f; { const uint32_t alignedSize = 4 * MemoryConstants::megaByte; const uint32_t maxAmountOfWastedMemory = static_cast(alignedSize * threshold); testAlignment(alignedSize, MemoryConstants::pageSize2Mb); testAlignment(alignedSize - maxAmountOfWastedMemory + 1, MemoryConstants::pageSize2Mb); testAlignment(alignedSize - maxAmountOfWastedMemory - 1, MemoryConstants::pageSize64k); } { const uint32_t alignedSize = 8 * MemoryConstants::megaByte; const uint32_t maxAmountOfWastedMemory = static_cast(alignedSize * threshold); testAlignment(alignedSize, MemoryConstants::pageSize2Mb); testAlignment(alignedSize - maxAmountOfWastedMemory + 1, MemoryConstants::pageSize2Mb); testAlignment(alignedSize - maxAmountOfWastedMemory - 1, MemoryConstants::pageSize64k); } } TEST_F(WddmMemoryManagerDevicePoolAlignmentTests, givenBigAllocationWastingMaximumPossibleAmountOfMemorytWhenAllocationInDevicePoolIsCreatedThenStillUse2MbAlignment) { const uint32_t size = 200 * MemoryConstants::megaByte + 1; // almost entire 2MB page will be wasted testAlignment(size, MemoryConstants::pageSize2Mb); } TEST_F(WddmMemoryManagerDevicePoolAlignmentTests, givenAtLeast2MbAllocationWhenAllocationInDevicePoolIsCreatedThenUse2MbAlignment) { const uint32_t size = 2 * MemoryConstants::megaByte; { DebugManager.flags.AlignLocalMemoryVaTo2MB.set(-1); const uint32_t expectedAlignment = 2 * MemoryConstants::megaByte; testAlignment(size, expectedAlignment); testAlignment(2 * size, expectedAlignment); } { DebugManager.flags.AlignLocalMemoryVaTo2MB.set(0); const uint32_t expectedAlignment = MemoryConstants::pageSize64k; testAlignment(size, expectedAlignment); testAlignment(2 * size, expectedAlignment); } { DebugManager.flags.AlignLocalMemoryVaTo2MB.set(1); const uint32_t expectedAlignment = 2 * MemoryConstants::megaByte; testAlignment(size, expectedAlignment); testAlignment(2 * size, expectedAlignment); } } HWTEST_F(WddmMemoryManagerSimpleTest, givenLinearStreamWhenItIsAllocatedThenItIsInLocalMemoryHasCpuPointerAndHasStandardHeap64kbAsGpuAddress) { memoryManager = std::make_unique(false, true, *executionEnvironment); auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties({mockRootDeviceIndex, 4096u, AllocationType::LINEAR_STREAM, mockDeviceBitfield}); ASSERT_NE(nullptr, graphicsAllocation); EXPECT_EQ(MemoryPool::LocalMemory, graphicsAllocation->getMemoryPool()); EXPECT_NE(nullptr, graphicsAllocation->getUnderlyingBuffer()); EXPECT_TRUE(graphicsAllocation->isLocked()); auto gpuAddress = graphicsAllocation->getGpuAddress(); auto gpuAddressEnd = gpuAddress + 4096u; auto &partition = wddm->getGfxPartition(); if (is64bit) { if (executionEnvironment->rootDeviceEnvironments[graphicsAllocation->getRootDeviceIndex()]->isFullRangeSvm()) { EXPECT_GE(gpuAddress, GmmHelper::canonize(partition.Standard64KB.Base)); EXPECT_LE(gpuAddressEnd, GmmHelper::canonize(partition.Standard64KB.Limit)); } else { EXPECT_GE(gpuAddress, GmmHelper::canonize(partition.Standard.Base)); EXPECT_LE(gpuAddressEnd, GmmHelper::canonize(partition.Standard.Limit)); } } else { if (executionEnvironment->rootDeviceEnvironments[graphicsAllocation->getRootDeviceIndex()]->isFullRangeSvm()) { EXPECT_GE(gpuAddress, 0ull); EXPECT_LE(gpuAddress, UINT32_MAX); EXPECT_GE(gpuAddressEnd, 0ull); EXPECT_LE(gpuAddressEnd, UINT32_MAX); } } EXPECT_EQ(graphicsAllocation->getAllocationType(), AllocationType::LINEAR_STREAM); memoryManager->freeGraphicsMemory(graphicsAllocation); } TEST_F(WddmMemoryManagerSimpleTest, givenNotSetUseSystemMemoryWhenGraphicsAllocationInDevicePoolIsAllocatedThenLocalMemoryAllocationHasCorrectStorageInfoAndFlushL3IsSet) { auto executionEnvironment = platform()->peekExecutionEnvironment(); memoryManager = std::make_unique(false, true, *executionEnvironment); MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Error; AllocationData allocData; allocData.allFlags = 0; allocData.size = MemoryConstants::pageSize; allocData.flags.allocateMemory = true; allocData.storageInfo.memoryBanks = 0x1; allocData.storageInfo.pageTablesVisibility = 0x2; allocData.storageInfo.cloningOfPageTables = false; allocData.flags.flushL3 = true; auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocData, status); EXPECT_NE(nullptr, allocation); EXPECT_EQ(allocData.storageInfo.memoryBanks, allocation->storageInfo.memoryBanks); EXPECT_EQ(allocData.storageInfo.pageTablesVisibility, allocation->storageInfo.pageTablesVisibility); EXPECT_FALSE(allocation->storageInfo.cloningOfPageTables); EXPECT_TRUE(allocation->isFlushL3Required()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmMemoryManagerSimpleTest, givenEnabledLocalMemoryAndUseSytemMemoryWhenGraphicsAllocationInDevicePoolIsAllocatedThenNullptrIsReturned) { auto executionEnvironment = platform()->peekExecutionEnvironment(); memoryManager = std::make_unique(false, true, *executionEnvironment); MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success; AllocationData allocData; allocData.allFlags = 0; allocData.size = MemoryConstants::pageSize; allocData.flags.useSystemMemory = true; allocData.flags.allocateMemory = true; auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocData, status); EXPECT_EQ(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::RetryInNonDevicePool, status); } TEST_F(WddmMemoryManagerSimpleTest, givenEnabledLocalMemoryAndAllowed32BitAndForce32BitWhenGraphicsAllocationInDevicePoolIsAllocatedThenNullptrIsReturned) { auto executionEnvironment = platform()->peekExecutionEnvironment(); memoryManager = std::make_unique(false, true, *executionEnvironment); memoryManager->setForce32BitAllocations(true); MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success; AllocationData allocData; allocData.allFlags = 0; allocData.size = MemoryConstants::pageSize; allocData.flags.allow32Bit = true; allocData.flags.allocateMemory = true; auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocData, status); EXPECT_EQ(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::RetryInNonDevicePool, status); } TEST_F(WddmMemoryManagerSimpleTest, givenEnabledLocalMemoryAndAllowed32BitWhen32BitIsNotForcedThenGraphicsAllocationInDevicePoolReturnsLocalMemoryAllocation) { const bool localMemoryEnabled = true; auto executionEnvironment = platform()->peekExecutionEnvironment(); memoryManager = std::make_unique(false, localMemoryEnabled, *executionEnvironment); memoryManager->setForce32BitAllocations(false); MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success; AllocationData allocData; allocData.allFlags = 0; allocData.size = MemoryConstants::pageSize; allocData.flags.allow32Bit = true; allocData.flags.allocateMemory = true; auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocData, status); ASSERT_NE(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::Success, status); EXPECT_EQ(MemoryPool::LocalMemory, allocation->getMemoryPool()); EXPECT_EQ(0u, allocation->getDefaultGmm()->resourceParams.Flags.Info.NonLocalOnly); memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmMemoryManagerSimpleTest, givenEnabledLocalMemoryWhenAllocateFailsThenGraphicsAllocationInDevicePoolReturnsError) { const bool localMemoryEnabled = true; auto executionEnvironment = platform()->peekExecutionEnvironment(); memoryManager = std::make_unique(false, localMemoryEnabled, *executionEnvironment); MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success; AllocationData allocData; allocData.allFlags = 0; allocData.size = MemoryConstants::pageSize; allocData.flags.allocateMemory = true; wddm->callBaseDestroyAllocations = false; wddm->createAllocationStatus = STATUS_NO_MEMORY; auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocData, status); EXPECT_EQ(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::Error, status); memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmMemoryManagerTest, givenLocalMemoryAllocationWhenCpuPointerNotMeetRestrictionsThenDontReserveMemRangeForMap) { const bool localMemoryEnabled = true; auto executionEnvironment = platform()->peekExecutionEnvironment(); memoryManager = std::make_unique(false, localMemoryEnabled, *executionEnvironment); void *cpuPtr = reinterpret_cast(memoryManager->getAlignedMallocRestrictions()->minAddress - 0x1000); size_t size = 0x1000; auto allocation = static_cast(memoryManager->allocateGraphicsMemoryWithProperties({mockRootDeviceIndex, size, AllocationType::BUFFER, mockDeviceBitfield}, cpuPtr)); ASSERT_NE(nullptr, allocation); EXPECT_FALSE(MemoryPool::isSystemMemoryPool(allocation->getMemoryPool())); if (is32bit && this->executionEnvironment->rootDeviceEnvironments[allocation->getRootDeviceIndex()]->isFullRangeSvm()) { EXPECT_NE(nullptr, allocation->getReservedAddressPtr()); EXPECT_EQ(alignUp(size, MemoryConstants::pageSize64k) + 2 * MemoryConstants::megaByte, allocation->getReservedAddressSize()); EXPECT_EQ(allocation->getGpuAddress(), castToUint64(allocation->getReservedAddressPtr())); } else { EXPECT_EQ(nullptr, allocation->getReservedAddressPtr()); } memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmMemoryManagerSimpleTest, whenMemoryIsAllocatedInLocalMemoryThenTheAllocationNeedsMakeResidentBeforeLock) { const bool localMemoryEnabled = true; auto executionEnvironment = platform()->peekExecutionEnvironment(); memoryManager = std::make_unique(false, localMemoryEnabled, *executionEnvironment); MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Error; AllocationData allocData; allocData.allFlags = 0; allocData.size = MemoryConstants::pageSize; allocData.flags.allocateMemory = true; auto allocation = static_cast(memoryManager->allocateGraphicsMemoryInDevicePool(allocData, status)); ASSERT_NE(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::Success, status); EXPECT_EQ(MemoryPool::LocalMemory, allocation->getMemoryPool()); EXPECT_TRUE(allocation->needsMakeResidentBeforeLock); memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmMemoryManagerSimpleTest, givenAllocationWithHighPriorityWhenMemoryIsAllocatedInLocalMemoryThenSetAllocationPriorityIsCalledWithHighPriority) { const bool localMemoryEnabled = true; auto executionEnvironment = platform()->peekExecutionEnvironment(); memoryManager = std::make_unique(false, localMemoryEnabled, *executionEnvironment); AllocationType highPriorityTypes[] = { AllocationType::KERNEL_ISA, AllocationType::KERNEL_ISA_INTERNAL, AllocationType::COMMAND_BUFFER, AllocationType::INTERNAL_HEAP, AllocationType::LINEAR_STREAM }; for (auto &allocationType : highPriorityTypes) { MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Error; AllocationData allocData; allocData.allFlags = 0; allocData.size = MemoryConstants::pageSize; allocData.flags.allocateMemory = true; allocData.type = allocationType; auto allocation = static_cast(memoryManager->allocateGraphicsMemoryInDevicePool(allocData, status)); ASSERT_NE(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::Success, status); EXPECT_EQ(1u, wddm->setAllocationPriorityResult.called); EXPECT_EQ(DXGI_RESOURCE_PRIORITY_HIGH, wddm->setAllocationPriorityResult.uint64ParamPassed); wddm->setAllocationPriorityResult.called = 0u; memoryManager->freeGraphicsMemory(allocation); } } TEST_F(WddmMemoryManagerSimpleTest, givenAllocationWithoutHighPriorityWhenMemoryIsAllocatedInLocalMemoryThenSetAllocationPriorityIsCalledWithNormalPriority) { const bool localMemoryEnabled = true; auto executionEnvironment = platform()->peekExecutionEnvironment(); memoryManager = std::make_unique(false, localMemoryEnabled, *executionEnvironment); MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Error; AllocationData allocData; allocData.allFlags = 0; allocData.size = MemoryConstants::pageSize; allocData.flags.allocateMemory = true; allocData.type = AllocationType::BUFFER; auto allocation = static_cast(memoryManager->allocateGraphicsMemoryInDevicePool(allocData, status)); ASSERT_NE(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::Success, status); EXPECT_EQ(1u, wddm->setAllocationPriorityResult.called); EXPECT_EQ(DXGI_RESOURCE_PRIORITY_NORMAL, wddm->setAllocationPriorityResult.uint64ParamPassed); memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmMemoryManagerSimpleTest, givenSetAllocationPriorityFailureWhenMemoryIsAllocatedInLocalMemoryThenNullptrIsReturned) { const bool localMemoryEnabled = true; auto executionEnvironment = platform()->peekExecutionEnvironment(); memoryManager = std::make_unique(false, localMemoryEnabled, *executionEnvironment); MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Error; AllocationData allocData; allocData.allFlags = 0; allocData.size = MemoryConstants::pageSize; allocData.flags.allocateMemory = true; wddm->callBaseSetAllocationPriority = false; wddm->setAllocationPriorityResult.success = false; auto allocation = static_cast(memoryManager->allocateGraphicsMemoryInDevicePool(allocData, status)); EXPECT_EQ(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::Error, status); } class WddmMemoryManagerSimpleTestWithLocalMemory : public MockWddmMemoryManagerFixture, public ::testing::Test { public: void SetUp() override { HardwareInfo localPlatformDevice = *defaultHwInfo; localPlatformDevice.featureTable.flags.ftrLocalMemory = true; platformsImpl->clear(); auto executionEnvironment = constructPlatform()->peekExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(1u); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(&localPlatformDevice); executionEnvironment->rootDeviceEnvironments[0]->initGmm(); MockWddmMemoryManagerFixture::SetUp(); wddm->init(); } void TearDown() override { MockWddmMemoryManagerFixture::TearDown(); } HardwareInfo localPlatformDevice = {}; FeatureTable ftrTable = {}; }; TEST_F(WddmMemoryManagerSimpleTestWithLocalMemory, givenLocalMemoryAndImageOrSharedResourceWhenAllocateInDevicePoolIsCalledThenLocalMemoryAllocationAndAndStatusSuccessIsReturned) { memoryManager = std::make_unique(false, true, *executionEnvironment); MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Error; ImageDescriptor imgDesc = {}; imgDesc.imageWidth = 1; imgDesc.imageHeight = 1; imgDesc.imageType = ImageType::Image2D; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); AllocationType types[] = {AllocationType::IMAGE, AllocationType::SHARED_RESOURCE_COPY}; AllocationData allocData; allocData.allFlags = 0; allocData.size = MemoryConstants::pageSize; allocData.flags.preferCompressed = true; allocData.imgInfo = &imgInfo; for (uint32_t i = 0; i < arrayCount(types); i++) { allocData.type = types[i]; auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocData, status); EXPECT_NE(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::Success, status); EXPECT_EQ(MemoryPool::LocalMemory, allocation->getMemoryPool()); EXPECT_EQ(0u, allocation->getDefaultGmm()->resourceParams.Flags.Info.NonLocalOnly); EXPECT_TRUE(allocData.imgInfo->useLocalMemory); memoryManager->freeGraphicsMemory(allocation); } } using WddmMemoryManagerMultiHandleAllocationTest = WddmMemoryManagerSimpleTest; TEST_F(WddmMemoryManagerSimpleTest, givenSvmGpuAllocationWhenHostPtrProvidedThenUseHostPtrAsGpuVa) { size_t size = 2 * MemoryConstants::megaByte; AllocationProperties properties{mockRootDeviceIndex, false, size, AllocationType::SVM_GPU, false, mockDeviceBitfield}; properties.alignment = size; void *svmPtr = reinterpret_cast(2 * size); memoryManager->localMemorySupported[properties.rootDeviceIndex] = true; auto allocation = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(properties, svmPtr)); ASSERT_NE(nullptr, allocation); EXPECT_EQ(size, allocation->getUnderlyingBufferSize()); EXPECT_EQ(nullptr, allocation->getUnderlyingBuffer()); EXPECT_EQ(nullptr, allocation->getDriverAllocatedCpuPtr()); //limited platforms will not use heap HeapIndex::HEAP_SVM if (executionEnvironment->rootDeviceEnvironments[0]->isFullRangeSvm()) { EXPECT_EQ(svmPtr, reinterpret_cast(allocation->getGpuAddress())); } EXPECT_EQ(nullptr, allocation->getReservedAddressPtr()); memoryManager->freeGraphicsMemory(allocation); } TEST(WddmMemoryManager, givenWddmMemoryManagerWhenGetLocalMemoryIsCalledThenSizeOfLocalMemoryIsReturned) { MockExecutionEnvironment executionEnvironment; executionEnvironment.prepareRootDeviceEnvironments(4u); for (auto i = 0u; i < 4u; i++) { executionEnvironment.rootDeviceEnvironments[i]->osInterface.reset(); auto wddmMock = Wddm::createWddm(nullptr, *executionEnvironment.rootDeviceEnvironments[i]); wddmMock->init(); static_cast(wddmMock)->dedicatedVideoMemory = 32 * MemoryConstants::gigaByte; } MockWddmMemoryManager memoryManager(executionEnvironment); for (auto i = 0u; i < 4u; i++) { auto wddmMock = executionEnvironment.rootDeviceEnvironments[i]->osInterface->getDriverModel()->as(); auto hwInfo = executionEnvironment.rootDeviceEnvironments[0]->getHardwareInfo(); auto deviceMask = std::max(static_cast(maxNBitValue(hwInfo->gtSystemInfo.MultiTileArchInfo.TileCount)), 1u); EXPECT_EQ(wddmMock->getDedicatedVideoMemory(), memoryManager.getLocalMemorySize(i, deviceMask)); } } TEST(WddmMemoryManager, givenMultipleTilesWhenGetLocalMemorySizeIsCalledThenReturnCorrectValue) { MockExecutionEnvironment executionEnvironment; executionEnvironment.prepareRootDeviceEnvironments(1u); auto hwInfo = executionEnvironment.rootDeviceEnvironments[0]->getMutableHardwareInfo(); executionEnvironment.rootDeviceEnvironments[0]->osInterface.reset(); auto wddmMock = Wddm::createWddm(nullptr, *executionEnvironment.rootDeviceEnvironments[0]); wddmMock->init(); hwInfo->gtSystemInfo.MultiTileArchInfo.IsValid = 1; hwInfo->gtSystemInfo.MultiTileArchInfo.TileCount = 4; static_cast(wddmMock)->dedicatedVideoMemory = 32 * MemoryConstants::gigaByte; MockWddmMemoryManager memoryManager(executionEnvironment); auto singleRegionSize = wddmMock->getDedicatedVideoMemory() / hwInfo->gtSystemInfo.MultiTileArchInfo.TileCount; EXPECT_EQ(singleRegionSize, memoryManager.getLocalMemorySize(0, 0b0001)); EXPECT_EQ(singleRegionSize, memoryManager.getLocalMemorySize(0, 0b0010)); EXPECT_EQ(singleRegionSize, memoryManager.getLocalMemorySize(0, 0b0100)); EXPECT_EQ(singleRegionSize, memoryManager.getLocalMemorySize(0, 0b1000)); EXPECT_EQ(singleRegionSize * 2, memoryManager.getLocalMemorySize(0, 0b0011)); EXPECT_EQ(wddmMock->getDedicatedVideoMemory(), memoryManager.getLocalMemorySize(0, 0b1111)); } TEST_F(WddmMemoryManagerSimpleTest, given32BitAllocationOfBufferWhenItIsAllocatedThenItHas32BitGpuPointer) { if constexpr (is64bit) { GTEST_SKIP(); } REQUIRE_SVM_OR_SKIP(defaultHwInfo); AllocationType allocationTypes[] = {AllocationType::BUFFER, AllocationType::SHARED_BUFFER, AllocationType::SCRATCH_SURFACE, AllocationType::PRIVATE_SURFACE}; for (auto &allocationType : allocationTypes) { size_t size = 2 * MemoryConstants::kiloByte; auto alignedSize = alignUp(size, MemoryConstants::pageSize64k); AllocationProperties properties{mockRootDeviceIndex, size, allocationType, mockDeviceBitfield}; memoryManager->localMemorySupported[properties.rootDeviceIndex] = true; auto allocation = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(properties, nullptr)); ASSERT_NE(nullptr, allocation); EXPECT_EQ(alignedSize, allocation->getUnderlyingBufferSize()); EXPECT_EQ(nullptr, allocation->getUnderlyingBuffer()); EXPECT_EQ(nullptr, allocation->getDriverAllocatedCpuPtr()); EXPECT_NE(nullptr, allocation->getReservedAddressPtr()); EXPECT_EQ(alignedSize + 2 * MemoryConstants::megaByte, allocation->getReservedAddressSize()); EXPECT_EQ(castToUint64(allocation->getReservedAddressPtr()), allocation->getGpuAddress()); EXPECT_EQ(0u, allocation->getGpuAddress() % 2 * MemoryConstants::megaByte); EXPECT_GE(allocation->getGpuAddress(), 0u); EXPECT_LE(allocation->getGpuAddress(), MemoryConstants::max32BitAddress); memoryManager->freeGraphicsMemory(allocation); } } struct WddmMemoryManagerSimple64BitTest : public WddmMemoryManagerSimpleTest { using WddmMemoryManagerSimpleTest::SetUp; using WddmMemoryManagerSimpleTest::TearDown; template void givenLocalMemoryAllocationAndRequestedSizeIsHugeThenResultAllocationIsSplitted() { if constexpr (using32Bit) { GTEST_SKIP(); } else { DebugManagerStateRestore dbgRestore; wddm->init(); wddm->mapGpuVaStatus = true; VariableBackup restorer{&wddm->callBaseMapGpuVa, false}; memoryManager = std::make_unique(false, true, *executionEnvironment); AllocationData allocData; allocData.allFlags = 0; allocData.size = static_cast(MemoryConstants::gigaByte * 13); allocData.flags.allocateMemory = true; MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Error; auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocData, status); EXPECT_NE(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::Success, status); EXPECT_EQ(MemoryPool::LocalMemory, allocation->getMemoryPool()); EXPECT_EQ(4, allocation->getNumGmms()); EXPECT_EQ(4, wddm->createAllocationResult.called); uint64_t totalSizeFromGmms = 0u; for (uint32_t gmmId = 0u; gmmId < allocation->getNumGmms(); ++gmmId) { Gmm *gmm = allocation->getGmm(gmmId); EXPECT_EQ(0u, gmm->resourceParams.Flags.Info.NonLocalOnly); EXPECT_EQ(2 * MemoryConstants::megaByte, gmm->resourceParams.BaseAlignment); EXPECT_TRUE(isAligned(gmm->resourceParams.BaseWidth64, gmm->resourceParams.BaseAlignment)); totalSizeFromGmms += gmm->resourceParams.BaseWidth64; } EXPECT_EQ(static_cast(allocData.size), totalSizeFromGmms); memoryManager->freeGraphicsMemory(allocation); } } }; TEST_F(WddmMemoryManagerSimple64BitTest, givenLocalMemoryAllocationAndRequestedSizeIsHugeThenResultAllocationIsSplitted) { givenLocalMemoryAllocationAndRequestedSizeIsHugeThenResultAllocationIsSplitted(); } compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/windows/wddm_memory_manager_tests.cpp000066400000000000000000004404641422164147700337410ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/os_interface/windows/wddm_memory_manager_tests.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/array_count.h" #include "shared/source/os_interface/device_factory.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/os_interface/os_library.h" #include "shared/source/os_interface/windows/os_context_win.h" #include "shared/source/os_interface/windows/wddm_residency_controller.h" #include "shared/source/utilities/tag_allocator.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/engine_descriptor_helper.h" #include "shared/test/common/helpers/execution_environment_helper.h" #include "shared/test/common/helpers/ult_hw_config.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/mocks/mock_deferred_deleter.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_gmm_client_context.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/mocks/mock_os_context.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/mem_obj/mem_obj_helper.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/os_interface/windows/mock_wddm_allocation.h" using namespace NEO; using namespace ::testing; void WddmMemoryManagerFixture::SetUp() { GdiDllFixture::SetUp(); executionEnvironment = platform()->peekExecutionEnvironment(); rootDeviceEnvironment = executionEnvironment->rootDeviceEnvironments[rootDeviceIndex].get(); wddm = static_cast(Wddm::createWddm(nullptr, *rootDeviceEnvironment)); if (defaultHwInfo->capabilityTable.ftrRenderCompressedBuffers || defaultHwInfo->capabilityTable.ftrRenderCompressedImages) { GMM_TRANSLATIONTABLE_CALLBACKS dummyTTCallbacks = {}; auto csr = std::unique_ptr(createCommandStream(*executionEnvironment, 1u, 1)); auto hwInfo = *defaultHwInfo; EngineInstancesContainer regularEngines = { {aub_stream::ENGINE_CCS, EngineUsage::Regular}}; memoryManager->createAndRegisterOsContext(csr.get(), EngineDescriptorHelper::getDefaultDescriptor(regularEngines[0], PreemptionHelper::getDefaultPreemptionMode(hwInfo))); for (auto engine : memoryManager->getRegisteredEngines()) { if (engine.getEngineUsage() == EngineUsage::Regular) { engine.commandStreamReceiver->pageTableManager.reset(GmmPageTableMngr::create(nullptr, 0, &dummyTTCallbacks)); } } } wddm->init(); constexpr uint64_t heap32Base = (is32bit) ? 0x1000 : 0x800000000000; wddm->setHeap32(heap32Base, 1000 * MemoryConstants::pageSize - 1); rootDeviceEnvironment->memoryOperationsInterface = std::make_unique(wddm); memoryManager = std::make_unique(*executionEnvironment); } TEST(ResidencyData, givenNewlyConstructedResidencyDataThenItIsNotResidentOnAnyOsContext) { auto maxOsContextCount = 3u; ResidencyData residencyData(maxOsContextCount); for (auto contextId = 0u; contextId < maxOsContextCount; contextId++) { EXPECT_EQ(false, residencyData.resident[contextId]); } } TEST(WddmMemoryManager, WhenWddmMemoryManagerIsCreatedThenItIsNonCopyable) { EXPECT_FALSE(std::is_move_constructible::value); EXPECT_FALSE(std::is_copy_constructible::value); } TEST(WddmMemoryManager, WhenWddmMemoryManagerIsCreatedThenItIsNonAssignable) { EXPECT_FALSE(std::is_move_assignable::value); EXPECT_FALSE(std::is_copy_assignable::value); } TEST(WddmAllocationTest, givenAllocationIsTrimCandidateInOneOsContextWhenGettingTrimCandidatePositionThenReturnItsPositionAndUnusedPositionInOtherContexts) { auto executionEnvironment = std::unique_ptr(MockDevice::prepareExecutionEnvironment(defaultHwInfo.get(), 0u)); executionEnvironment->rootDeviceEnvironments[0]->initGmm(); MockWddmAllocation allocation(executionEnvironment->rootDeviceEnvironments[0]->getGmmClientContext()); MockOsContext osContext(1u, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::Regular}, PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo))); allocation.setTrimCandidateListPosition(osContext.getContextId(), 700u); EXPECT_EQ(trimListUnusedPosition, allocation.getTrimCandidateListPosition(0u)); EXPECT_EQ(700u, allocation.getTrimCandidateListPosition(1u)); } TEST(WddmAllocationTest, givenAllocationCreatedWithOsContextCountOneWhenItIsCreatedThenMaxOsContextCountIsUsedInstead) { auto executionEnvironment = std::unique_ptr(MockDevice::prepareExecutionEnvironment(defaultHwInfo.get(), 0u)); executionEnvironment->rootDeviceEnvironments[0]->initGmm(); MockWddmAllocation allocation(executionEnvironment->rootDeviceEnvironments[0]->getGmmClientContext()); allocation.setTrimCandidateListPosition(1u, 700u); EXPECT_EQ(700u, allocation.getTrimCandidateListPosition(1u)); EXPECT_EQ(trimListUnusedPosition, allocation.getTrimCandidateListPosition(0u)); } TEST(WddmAllocationTest, givenRequestedContextIdTooLargeWhenGettingTrimCandidateListPositionThenReturnUnusedPosition) { auto executionEnvironment = std::unique_ptr(MockDevice::prepareExecutionEnvironment(defaultHwInfo.get(), 0u)); executionEnvironment->rootDeviceEnvironments[0]->initGmm(); MockWddmAllocation allocation(executionEnvironment->rootDeviceEnvironments[0]->getGmmClientContext()); EXPECT_EQ(trimListUnusedPosition, allocation.getTrimCandidateListPosition(1u)); EXPECT_EQ(trimListUnusedPosition, allocation.getTrimCandidateListPosition(1000u)); } TEST(WddmAllocationTest, givenAllocationTypeWhenPassedToWddmAllocationConstructorThenAllocationTypeIsStored) { WddmAllocation allocation{0, AllocationType::COMMAND_BUFFER, nullptr, 0, nullptr, MemoryPool::MemoryNull, 0u, 1u}; EXPECT_EQ(AllocationType::COMMAND_BUFFER, allocation.getAllocationType()); } TEST(WddmAllocationTest, givenMemoryPoolWhenPassedToWddmAllocationConstructorThenMemoryPoolIsStored) { WddmAllocation allocation{0, AllocationType::COMMAND_BUFFER, nullptr, 0, nullptr, MemoryPool::System64KBPages, 0u, 1u}; EXPECT_EQ(MemoryPool::System64KBPages, allocation.getMemoryPool()); WddmAllocation allocation2{0, AllocationType::COMMAND_BUFFER, nullptr, 0, 0u, MemoryPool::SystemCpuInaccessible, 0u, 1u}; EXPECT_EQ(MemoryPool::SystemCpuInaccessible, allocation2.getMemoryPool()); } TEST(WddmMemoryManagerExternalHeapTest, WhenExternalHeapIsCreatedThenItHasCorrectBase) { HardwareInfo *hwInfo; auto executionEnvironment = getExecutionEnvironmentImpl(hwInfo, 1); executionEnvironment->incRefInternal(); { std::unique_ptr wddm(static_cast(Wddm::createWddm(nullptr, *executionEnvironment->rootDeviceEnvironments[0].get()))); wddm->init(); uint64_t base = 0x56000; uint64_t size = 0x9000; wddm->setHeap32(base, size); executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel(std::move(wddm)); std::unique_ptr memoryManager = std::unique_ptr(new WddmMemoryManager(*executionEnvironment)); EXPECT_EQ(base, memoryManager->getExternalHeapBaseAddress(0, false)); } executionEnvironment->decRefInternal(); } TEST(WddmMemoryManagerWithDeferredDeleterTest, givenWmmWhenAsyncDeleterIsEnabledAndWaitForDeletionsIsCalledThenDeleterInWddmIsSetToNullptr) { HardwareInfo *hwInfo; auto executionEnvironment = getExecutionEnvironmentImpl(hwInfo, 1); executionEnvironment->incRefInternal(); { auto wddm = std::make_unique(*executionEnvironment->rootDeviceEnvironments[0].get()); wddm->init(); executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel(std::move(wddm)); bool actualDeleterFlag = DebugManager.flags.EnableDeferredDeleter.get(); DebugManager.flags.EnableDeferredDeleter.set(true); MockWddmMemoryManager memoryManager(*executionEnvironment); EXPECT_NE(nullptr, memoryManager.getDeferredDeleter()); memoryManager.waitForDeletions(); EXPECT_EQ(nullptr, memoryManager.getDeferredDeleter()); DebugManager.flags.EnableDeferredDeleter.set(actualDeleterFlag); } executionEnvironment->decRefInternal(); } TEST_F(WddmMemoryManagerSimpleTest, givenMemoryManagerWhenAllocateGraphicsMemoryIsCalledThenMemoryPoolIsSystem4KBPages) { memoryManager.reset(new MockWddmMemoryManager(false, false, *executionEnvironment)); if (memoryManager->isLimitedGPU(0)) { GTEST_SKIP(); } auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); EXPECT_NE(nullptr, allocation); EXPECT_EQ(MemoryPool::System4KBPages, allocation->getMemoryPool()); EXPECT_EQ(executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo()->featureTable.flags.ftrLocalMemory, allocation->getDefaultGmm()->resourceParams.Flags.Info.NonLocalOnly); memoryManager->freeGraphicsMemory(allocation); } class MockCreateWddmAllocationMemoryManager : public MockWddmMemoryManager { public: MockCreateWddmAllocationMemoryManager(NEO::ExecutionEnvironment &execEnv) : MockWddmMemoryManager(execEnv) {} bool createWddmAllocation(WddmAllocation *allocation, void *requiredGpuPtr) override { return false; } }; TEST_F(WddmMemoryManagerSimpleTest, givenMemoryManagerWhenAllocateGraphicsMemoryFailedThenNullptrFromAllocateMemoryByKMDIsReturned) { memoryManager.reset(new MockCreateWddmAllocationMemoryManager(*executionEnvironment)); AllocationData allocationData; allocationData.size = MemoryConstants::pageSize; auto allocation = memoryManager->allocateMemoryByKMD(allocationData); EXPECT_EQ(nullptr, allocation); } TEST_F(WddmMemoryManagerSimpleTest, givenMemoryManagerWith64KBPagesEnabledWhenAllocateGraphicsMemory64kbIsCalledThenMemoryPoolIsSystem64KBPages) { memoryManager.reset(new MockWddmMemoryManager(false, false, *executionEnvironment)); AllocationData allocationData; allocationData.size = 4096u; auto allocation = memoryManager->allocateGraphicsMemory64kb(allocationData); EXPECT_NE(nullptr, allocation); EXPECT_EQ(MemoryPool::System64KBPages, allocation->getMemoryPool()); EXPECT_EQ(executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo()->featureTable.flags.ftrLocalMemory, allocation->getDefaultGmm()->resourceParams.Flags.Info.NonLocalOnly); memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmMemoryManagerSimpleTest, givenAllocationDataWithStorageInfoWhenAllocateGraphicsMemory64kbThenStorageInfoInAllocationIsSetCorrectly) { memoryManager.reset(new MockWddmMemoryManager(false, false, *executionEnvironment)); AllocationData allocationData; allocationData.storageInfo = {}; auto allocation = memoryManager->allocateGraphicsMemory64kb(allocationData); EXPECT_NE(nullptr, allocation); EXPECT_TRUE(memcmp(&allocationData.storageInfo, &allocation->storageInfo, sizeof(StorageInfo)) == 0); memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmMemoryManagerSimpleTest, givenAllocationDataWithFlagsWhenAllocateGraphicsMemory64kbThenAllocationFlagFlushL3RequiredIsSetCorrectly) { class MockGraphicsAllocation : public GraphicsAllocation { public: using GraphicsAllocation::allocationInfo; }; memoryManager.reset(new MockWddmMemoryManager(false, false, *executionEnvironment)); AllocationData allocationData; allocationData.flags.flushL3 = true; auto allocation = static_cast(memoryManager->allocateGraphicsMemory64kb(allocationData)); EXPECT_NE(nullptr, allocation); EXPECT_EQ(allocationData.flags.flushL3, allocation->allocationInfo.flags.flushL3Required); memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmMemoryManagerSimpleTest, givenMemoryManagerWhenAllocateGraphicsMemoryWithPtrIsCalledThenMemoryPoolIsSystem4KBPages) { memoryManager.reset(new MockWddmMemoryManager(false, false, *executionEnvironment)); if (memoryManager->isLimitedGPU(0)) { GTEST_SKIP(); } void *ptr = reinterpret_cast(0x1001); auto size = 4096u; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, size}, ptr); ASSERT_NE(nullptr, allocation); EXPECT_EQ(MemoryPool::System4KBPages, allocation->getMemoryPool()); for (size_t i = 0; i < allocation->fragmentsStorage.fragmentCount; i++) { EXPECT_EQ(1u, static_cast(allocation->fragmentsStorage.fragmentStorageData[i].osHandleStorage)->gmm->resourceParams.Flags.Info.NonLocalOnly); } memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmMemoryManagerSimpleTest, givenMemoryManagerWhenAllocate32BitGraphicsMemoryWithPtrIsCalledThenMemoryPoolIsSystem4KBPagesWith32BitGpuAddressing) { memoryManager.reset(new MockWddmMemoryManager(false, false, *executionEnvironment)); void *ptr = reinterpret_cast(0x1001); auto size = MemoryConstants::pageSize; auto allocation = memoryManager->allocate32BitGraphicsMemory(csr->getRootDeviceIndex(), size, ptr, AllocationType::BUFFER); ASSERT_NE(nullptr, allocation); EXPECT_EQ(MemoryPool::System4KBPagesWith32BitGpuAddressing, allocation->getMemoryPool()); EXPECT_EQ(executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo()->featureTable.flags.ftrLocalMemory, allocation->getDefaultGmm()->resourceParams.Flags.Info.NonLocalOnly); memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmMemoryManagerSimpleTest, givenMemoryManagerWith64KBPagesDisabledWhenAllocateGraphicsMemoryForSVMThen4KBGraphicsAllocationIsReturned) { memoryManager.reset(new MockWddmMemoryManager(false, false, *executionEnvironment)); if (memoryManager->isLimitedGPU(0)) { GTEST_SKIP(); } auto size = MemoryConstants::pageSize; auto svmAllocation = memoryManager->allocateGraphicsMemoryWithProperties({csr->getRootDeviceIndex(), size, AllocationType::SVM_ZERO_COPY, mockDeviceBitfield}); EXPECT_NE(nullptr, svmAllocation); EXPECT_EQ(MemoryPool::System4KBPages, svmAllocation->getMemoryPool()); EXPECT_EQ(executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo()->featureTable.flags.ftrLocalMemory, svmAllocation->getDefaultGmm()->resourceParams.Flags.Info.NonLocalOnly); memoryManager->freeGraphicsMemory(svmAllocation); } TEST_F(WddmMemoryManagerSimpleTest, givenMemoryManagerWith64KBPagesEnabledWhenAllocateGraphicsMemoryForSVMThenMemoryPoolIsSystem64KBPages) { memoryManager.reset(new MockWddmMemoryManager(true, false, *executionEnvironment)); if (memoryManager->isLimitedGPU(0)) { GTEST_SKIP(); } auto size = MemoryConstants::pageSize; auto svmAllocation = memoryManager->allocateGraphicsMemoryWithProperties({csr->getRootDeviceIndex(), size, AllocationType::SVM_ZERO_COPY, mockDeviceBitfield}); EXPECT_NE(nullptr, svmAllocation); EXPECT_EQ(MemoryPool::System64KBPages, svmAllocation->getMemoryPool()); memoryManager->freeGraphicsMemory(svmAllocation); } TEST_F(WddmMemoryManagerSimpleTest, givenMemoryManagerWhenCreateAllocationFromHandleIsCalledThenMemoryPoolIsSystemCpuInaccessible) { memoryManager.reset(new MockWddmMemoryManager(false, false, *executionEnvironment)); auto osHandle = 1u; gdi->getQueryResourceInfoArgOut().NumAllocations = 1; std::unique_ptr gmm(new Gmm(rootDeviceEnvironment->getGmmClientContext(), nullptr, 0, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); D3DDDI_OPENALLOCATIONINFO allocationInfo; allocationInfo.pPrivateDriverData = gmm->gmmResourceInfo->peekHandle(); allocationInfo.hAllocation = ALLOCATION_HANDLE; allocationInfo.PrivateDriverDataSize = sizeof(GMM_RESOURCE_INFO); gdi->getOpenResourceArgOut().pOpenAllocationInfo = &allocationInfo; AllocationProperties properties(0, false, 0, AllocationType::SHARED_BUFFER, false, false, 0); auto allocation = memoryManager->createGraphicsAllocationFromSharedHandle(osHandle, properties, false, false); EXPECT_NE(nullptr, allocation); EXPECT_EQ(MemoryPool::SystemCpuInaccessible, allocation->getMemoryPool()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmMemoryManagerSimpleTest, givenAllocationPropertiesWhenCreateAllocationFromHandleIsCalledThenCorrectAllocationTypeIsSet) { memoryManager.reset(new MockWddmMemoryManager(false, false, *executionEnvironment)); auto osHandle = 1u; gdi->getQueryResourceInfoArgOut().NumAllocations = 1; std::unique_ptr gmm(new Gmm(rootDeviceEnvironment->getGmmClientContext(), nullptr, 0, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); D3DDDI_OPENALLOCATIONINFO allocationInfo; allocationInfo.pPrivateDriverData = gmm->gmmResourceInfo->peekHandle(); allocationInfo.hAllocation = ALLOCATION_HANDLE; allocationInfo.PrivateDriverDataSize = sizeof(GMM_RESOURCE_INFO); gdi->getOpenResourceArgOut().pOpenAllocationInfo = &allocationInfo; AllocationProperties propertiesBuffer(0, false, 0, AllocationType::SHARED_BUFFER, false, false, 0); AllocationProperties propertiesImage(0, false, 0, AllocationType::SHARED_IMAGE, false, false, 0); AllocationProperties *propertiesArray[2] = {&propertiesBuffer, &propertiesImage}; for (auto properties : propertiesArray) { auto allocation = memoryManager->createGraphicsAllocationFromSharedHandle(osHandle, *properties, false, false); EXPECT_NE(nullptr, allocation); EXPECT_EQ(properties->allocationType, allocation->getAllocationType()); memoryManager->freeGraphicsMemory(allocation); } } TEST_F(WddmMemoryManagerSimpleTest, whenCreateAllocationFromHandleAndMapCallFailsThenFreeGraphicsMemoryIsCalled) { memoryManager.reset(new MockWddmMemoryManager(false, false, *executionEnvironment)); auto osHandle = 1u; gdi->getQueryResourceInfoArgOut().NumAllocations = 1; auto gmm = std::make_unique(rootDeviceEnvironment->getGmmClientContext(), nullptr, 0, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, StorageInfo{}, true); D3DDDI_OPENALLOCATIONINFO allocationInfo; allocationInfo.pPrivateDriverData = gmm->gmmResourceInfo->peekHandle(); allocationInfo.hAllocation = ALLOCATION_HANDLE; allocationInfo.PrivateDriverDataSize = sizeof(GMM_RESOURCE_INFO); wddm->mapGpuVaStatus = false; wddm->callBaseMapGpuVa = false; gdi->getOpenResourceArgOut().pOpenAllocationInfo = &allocationInfo; EXPECT_EQ(0u, memoryManager->freeGraphicsMemoryImplCalled); AllocationProperties properties(0, false, 0, AllocationType::SHARED_BUFFER, false, false, 0); auto allocation = memoryManager->createGraphicsAllocationFromSharedHandle(osHandle, properties, false, false); EXPECT_EQ(nullptr, allocation); EXPECT_EQ(1u, memoryManager->freeGraphicsMemoryImplCalled); } TEST_F(WddmMemoryManagerSimpleTest, givenAllocateGraphicsMemoryForNonSvmHostPtrIsCalledWhenNotAlignedPtrIsPassedThenAlignedGraphicsAllocationIsCreated) { memoryManager.reset(new MockWddmMemoryManager(false, false, *executionEnvironment)); auto size = 13u; auto hostPtr = reinterpret_cast(0x10001); AllocationData allocationData; allocationData.size = size; allocationData.hostPtr = hostPtr; auto allocation = memoryManager->allocateGraphicsMemoryForNonSvmHostPtr(allocationData); EXPECT_NE(nullptr, allocation); EXPECT_EQ(hostPtr, allocation->getUnderlyingBuffer()); EXPECT_EQ(size, allocation->getUnderlyingBufferSize()); EXPECT_EQ(1u, allocation->getAllocationOffset()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmMemoryManagerSimpleTest, givenAllocateGraphicsMemoryForNonSvmHostPtrIsCalledWhenNotAlignedPtrIsPassedAndImportedAllocationIsFalseThenAlignedGraphicsAllocationIsFreed) { memoryManager.reset(new MockWddmMemoryManager(false, false, *executionEnvironment)); auto size = 13u; auto hostPtr = reinterpret_cast(0x10001); AllocationData allocationData; allocationData.size = size; allocationData.hostPtr = hostPtr; auto allocation = memoryManager->allocateGraphicsMemoryForNonSvmHostPtr(allocationData); EXPECT_NE(nullptr, allocation); EXPECT_EQ(hostPtr, allocation->getUnderlyingBuffer()); EXPECT_EQ(size, allocation->getUnderlyingBufferSize()); EXPECT_EQ(1u, allocation->getAllocationOffset()); memoryManager->freeGraphicsMemoryImpl(allocation, false); } TEST_F(WddmMemoryManagerTest, givenAllocateGraphicsMemoryForNonSvmHostPtrIsCalledWhencreateWddmAllocationFailsThenGraphicsAllocationIsNotCreated) { char hostPtr[64]; memoryManager->setDeferredDeleter(nullptr); setMapGpuVaFailConfigFcn(0, 1); AllocationData allocationData; allocationData.size = sizeof(hostPtr); allocationData.hostPtr = hostPtr; auto allocation = memoryManager->allocateGraphicsMemoryForNonSvmHostPtr(allocationData); EXPECT_EQ(nullptr, allocation); memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmMemoryManagerSimpleTest, GivenShareableEnabledAndSmallSizeWhenAskedToCreateGrahicsAllocationThenValidAllocationIsReturned) { memoryManager.reset(new MockWddmMemoryManager(false, false, *executionEnvironment)); memoryManager->hugeGfxMemoryChunkSize = MemoryConstants::pageSize64k; AllocationData allocationData; allocationData.size = 4096u; allocationData.flags.shareable = true; auto allocation = memoryManager->allocateMemoryByKMD(allocationData); EXPECT_NE(nullptr, allocation); EXPECT_FALSE(memoryManager->allocateHugeGraphicsMemoryCalled); memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmMemoryManagerSimpleTest, GivenShareableEnabledAndHugeSizeWhenAskedToCreateGrahicsAllocationThenValidAllocationIsReturned) { memoryManager.reset(new MockWddmMemoryManager(false, false, *executionEnvironment)); memoryManager->hugeGfxMemoryChunkSize = MemoryConstants::pageSize64k; AllocationData allocationData; allocationData.size = 2ULL * MemoryConstants::pageSize64k; allocationData.flags.shareable = true; auto allocation = memoryManager->allocateMemoryByKMD(allocationData); EXPECT_NE(nullptr, allocation); EXPECT_TRUE(memoryManager->allocateHugeGraphicsMemoryCalled); memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmMemoryManagerSimpleTest, givenZeroFenceValueOnSingleEngineRegisteredWhenHandleFenceCompletionIsCalledThenDoNotWaitOnCpu) { ASSERT_EQ(1u, memoryManager->getRegisteredEnginesCount()); auto allocation = static_cast(memoryManager->allocateGraphicsMemoryWithProperties({0, 32, AllocationType::BUFFER, mockDeviceBitfield})); allocation->getResidencyData().updateCompletionData(0u, 0u); memoryManager->handleFenceCompletion(allocation); EXPECT_EQ(0u, wddm->waitFromCpuResult.called); memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmMemoryManagerSimpleTest, givenNonZeroFenceValueOnSingleEngineRegisteredWhenHandleFenceCompletionIsCalledThenWaitOnCpuOnce) { ASSERT_EQ(1u, memoryManager->getRegisteredEnginesCount()); auto allocation = static_cast(memoryManager->allocateGraphicsMemoryWithProperties({0, 32, AllocationType::BUFFER, mockDeviceBitfield})); auto fence = &static_cast(memoryManager->getRegisteredEngines()[0].osContext)->getResidencyController().getMonitoredFence(); allocation->getResidencyData().updateCompletionData(129u, 0u); memoryManager->handleFenceCompletion(allocation); EXPECT_EQ(1u, wddm->waitFromCpuResult.called); EXPECT_EQ(129u, wddm->waitFromCpuResult.uint64ParamPassed); EXPECT_EQ(fence, wddm->waitFromCpuResult.monitoredFence); memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmMemoryManagerSimpleTest, givenNonZeroFenceValuesOnMultipleEnginesRegisteredWhenHandleFenceCompletionIsCalledThenWaitOnCpuForEachEngine) { executionEnvironment->prepareRootDeviceEnvironments(2u); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(defaultHwInfo.get()); } const uint32_t rootDeviceIndex = 1; DeviceBitfield deviceBitfield(2); std::unique_ptr csr(createCommandStream(*executionEnvironment, rootDeviceIndex, deviceBitfield)); auto wddm2 = static_cast(Wddm::createWddm(nullptr, *executionEnvironment->rootDeviceEnvironments[rootDeviceIndex].get())); wddm2->init(); executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->memoryOperationsInterface = std::make_unique(wddm2); auto hwInfo = executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo(); OsContext *osContext = memoryManager->createAndRegisterOsContext(csr.get(), EngineDescriptorHelper::getDefaultDescriptor(HwHelper::get(hwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*hwInfo)[1], PreemptionHelper::getDefaultPreemptionMode(*hwInfo), deviceBitfield)); osContext->ensureContextInitialized(); ASSERT_EQ(2u, memoryManager->getRegisteredEnginesCount()); auto allocation = static_cast(memoryManager->allocateGraphicsMemoryWithProperties({0u, 32, AllocationType::BUFFER, mockDeviceBitfield})); auto lastEngineFence = &static_cast(osContext)->getResidencyController().getMonitoredFence(); allocation->getResidencyData().updateCompletionData(129u, 0u); allocation->getResidencyData().updateCompletionData(152u, 1u); memoryManager->handleFenceCompletion(allocation); EXPECT_EQ(1u, wddm->waitFromCpuResult.called); EXPECT_EQ(1u, wddm2->waitFromCpuResult.called); EXPECT_EQ(129u, wddm->waitFromCpuResult.uint64ParamPassed); EXPECT_EQ(152u, wddm2->waitFromCpuResult.uint64ParamPassed); EXPECT_EQ(lastEngineFence, wddm2->waitFromCpuResult.monitoredFence); memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmMemoryManagerSimpleTest, givenNonZeroFenceValueOnSomeOfMultipleEnginesRegisteredWhenHandleFenceCompletionIsCalledThenWaitOnCpuForTheseEngines) { const uint32_t rootDeviceIndex = 1; executionEnvironment->prepareRootDeviceEnvironments(2u); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(defaultHwInfo.get()); } DeviceBitfield deviceBitfield(2); std::unique_ptr csr(createCommandStream(*executionEnvironment, rootDeviceIndex, deviceBitfield)); auto wddm2 = static_cast(Wddm::createWddm(nullptr, *executionEnvironment->rootDeviceEnvironments[rootDeviceIndex].get())); wddm2->init(); executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->memoryOperationsInterface = std::make_unique(wddm2); auto hwInfo = executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo(); memoryManager->createAndRegisterOsContext(csr.get(), EngineDescriptorHelper::getDefaultDescriptor(HwHelper::get(hwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*hwInfo)[1], PreemptionHelper::getDefaultPreemptionMode(*hwInfo), deviceBitfield)); ASSERT_EQ(2u, memoryManager->getRegisteredEnginesCount()); auto allocation = static_cast(memoryManager->allocateGraphicsMemoryWithProperties({0u, 32, AllocationType::BUFFER, mockDeviceBitfield})); auto lastEngineFence = &static_cast(memoryManager->getRegisteredEngines()[0].osContext)->getResidencyController().getMonitoredFence(); allocation->getResidencyData().updateCompletionData(129u, 0u); allocation->getResidencyData().updateCompletionData(0, 1u); memoryManager->handleFenceCompletion(allocation); EXPECT_EQ(1u, wddm->waitFromCpuResult.called); EXPECT_EQ(129, wddm->waitFromCpuResult.uint64ParamPassed); EXPECT_EQ(lastEngineFence, wddm->waitFromCpuResult.monitoredFence); memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmMemoryManagerSimpleTest, givenWddmMemoryManagerWhenGpuAddressIsReservedAndFreedThenAddressRangeIsZero) { auto addressRange = memoryManager->reserveGpuAddress(MemoryConstants::pageSize, 0); EXPECT_EQ(0u, GmmHelper::decanonize(addressRange.address)); EXPECT_EQ(0u, addressRange.size); memoryManager->freeGpuAddress(addressRange, 0); } TEST_F(WddmMemoryManagerSimpleTest, givenWddmMemoryManagerWhenAllocatingWithGpuVaThenNullptrIsReturned) { AllocationData allocationData; allocationData.size = 0x1000; allocationData.gpuAddress = 0x2000; allocationData.osContext = osContext; auto allocation = memoryManager->allocateGraphicsMemoryWithGpuVa(allocationData); EXPECT_EQ(nullptr, allocation); } TEST_F(WddmMemoryManagerTest, givenDefaultWddmMemoryManagerWhenAskedForVirtualPaddingSupportThenFalseIsReturned) { EXPECT_FALSE(memoryManager->peekVirtualPaddingSupport()); } TEST_F(WddmMemoryManagerTest, GivenGraphicsAllocationWhenAddAndRemoveAllocationToHostPtrManagerThenFragmentHasCorrectValues) { void *cpuPtr = (void *)0x30000; size_t size = 0x1000; uint64_t gpuPtr = 0x123; MockWddmAllocation gfxAllocation(rootDeviceEnvironment->getGmmClientContext()); HostPtrEntryKey key{cpuPtr, gfxAllocation.getRootDeviceIndex()}; gfxAllocation.cpuPtr = cpuPtr; gfxAllocation.size = size; gfxAllocation.gpuPtr = gpuPtr; memoryManager->addAllocationToHostPtrManager(&gfxAllocation); auto fragment = memoryManager->getHostPtrManager()->getFragment(key); EXPECT_NE(fragment, nullptr); EXPECT_TRUE(fragment->driverAllocation); EXPECT_EQ(fragment->refCount, 1); EXPECT_EQ(fragment->fragmentCpuPointer, cpuPtr); EXPECT_EQ(fragment->fragmentSize, size); EXPECT_NE(fragment->osInternalStorage, nullptr); auto osHandle = static_cast(fragment->osInternalStorage); EXPECT_EQ(osHandle->gmm, gfxAllocation.getDefaultGmm()); EXPECT_EQ(osHandle->gpuPtr, gpuPtr); EXPECT_EQ(osHandle->handle, gfxAllocation.handle); EXPECT_NE(fragment->residency, nullptr); FragmentStorage fragmentStorage = {}; fragmentStorage.fragmentCpuPointer = cpuPtr; memoryManager->getHostPtrManager()->storeFragment(gfxAllocation.getRootDeviceIndex(), fragmentStorage); fragment = memoryManager->getHostPtrManager()->getFragment(key); EXPECT_EQ(fragment->refCount, 2); fragment->driverAllocation = false; memoryManager->removeAllocationFromHostPtrManager(&gfxAllocation); fragment = memoryManager->getHostPtrManager()->getFragment(key); EXPECT_EQ(fragment->refCount, 2); fragment->driverAllocation = true; memoryManager->removeAllocationFromHostPtrManager(&gfxAllocation); fragment = memoryManager->getHostPtrManager()->getFragment(key); EXPECT_EQ(fragment->refCount, 1); memoryManager->removeAllocationFromHostPtrManager(&gfxAllocation); fragment = memoryManager->getHostPtrManager()->getFragment(key); EXPECT_EQ(fragment, nullptr); } TEST_F(WddmMemoryManagerTest, WhenAllocatingGpuMemHostPtrThenCpuPtrAndGpuPtrAreSame) { // three pages void *ptr = alignedMalloc(3 * 4096, 4096); ASSERT_NE(nullptr, ptr); auto *gpuAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, false, MemoryConstants::pageSize}, ptr); // Should be same cpu ptr and gpu ptr EXPECT_EQ(ptr, gpuAllocation->getUnderlyingBuffer()); memoryManager->freeGraphicsMemory(gpuAllocation); alignedFree(ptr); } TEST_F(WddmMemoryManagerTest, givenDefaultMemoryManagerWhenAllocateWithSizeIsCalledThenSharedHandleIsZero) { auto *gpuAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, MemoryConstants::pageSize}); auto wddmAllocation = static_cast(gpuAllocation); EXPECT_EQ(0u, wddmAllocation->peekSharedHandle()); memoryManager->freeGraphicsMemory(gpuAllocation); } TEST_F(WddmMemoryManagerTest, givenWddmMemoryManagerWhenCreateFromSharedHandleIsCalledThenNonNullGraphicsAllocationIsReturned) { auto osHandle = 1u; void *pSysMem = reinterpret_cast(0x1000); std::unique_ptr gmm(new Gmm(rootDeviceEnvironment->getGmmClientContext(), pSysMem, 4096u, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); setSizesFcn(gmm->gmmResourceInfo.get(), 1u, 1024u, 1u); AllocationProperties properties(0, false, 4096u, AllocationType::SHARED_BUFFER, false, false, mockDeviceBitfield); auto *gpuAllocation = memoryManager->createGraphicsAllocationFromSharedHandle(osHandle, properties, false, false); auto wddmAlloc = static_cast(gpuAllocation); ASSERT_NE(nullptr, gpuAllocation); EXPECT_EQ(RESOURCE_HANDLE, wddmAlloc->resourceHandle); EXPECT_EQ(ALLOCATION_HANDLE, wddmAlloc->getDefaultHandle()); memoryManager->freeGraphicsMemory(gpuAllocation); } TEST_F(WddmMemoryManagerSimpleTest, whenAllocationCreatedFromSharedHandleIsDestroyedThenDestroyAllocationFromGdiIsNotInvoked) { gdi->getQueryResourceInfoArgOut().NumAllocations = 1; std::unique_ptr gmm(new Gmm(rootDeviceEnvironment->getGmmClientContext(), nullptr, 0, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); D3DDDI_OPENALLOCATIONINFO allocationInfo; allocationInfo.pPrivateDriverData = gmm->gmmResourceInfo->peekHandle(); allocationInfo.hAllocation = ALLOCATION_HANDLE; allocationInfo.PrivateDriverDataSize = sizeof(GMM_RESOURCE_INFO); gdi->getOpenResourceArgOut().pOpenAllocationInfo = &allocationInfo; AllocationProperties properties(0, false, 0, AllocationType::SHARED_BUFFER, false, false, 0); auto allocation = memoryManager->createGraphicsAllocationFromSharedHandle(1, properties, false, false); EXPECT_NE(nullptr, allocation); memoryManager->setDeferredDeleter(nullptr); memoryManager->freeGraphicsMemory(allocation); EXPECT_EQ(1u, memoryManager->freeGraphicsMemoryImplCalled); gdi->getDestroyArg().AllocationCount = 7; auto destroyArg = gdi->getDestroyArg(); EXPECT_EQ(7, destroyArg.AllocationCount); gdi->getDestroyArg().AllocationCount = 0; } TEST_F(WddmMemoryManagerTest, givenWddmMemoryManagerWhenCreateFromNTHandleIsCalledThenNonNullGraphicsAllocationIsReturned) { void *pSysMem = reinterpret_cast(0x1000); std::unique_ptr gmm(new Gmm(rootDeviceEnvironment->getGmmClientContext(), pSysMem, 4096u, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); setSizesFcn(gmm->gmmResourceInfo.get(), 1u, 1024u, 1u); auto *gpuAllocation = memoryManager->createGraphicsAllocationFromNTHandle(reinterpret_cast(1), 0, AllocationType::SHARED_IMAGE); auto wddmAlloc = static_cast(gpuAllocation); ASSERT_NE(nullptr, gpuAllocation); EXPECT_EQ(NT_RESOURCE_HANDLE, wddmAlloc->resourceHandle); EXPECT_EQ(NT_ALLOCATION_HANDLE, wddmAlloc->getDefaultHandle()); EXPECT_EQ(AllocationType::SHARED_IMAGE, wddmAlloc->getAllocationType()); memoryManager->freeGraphicsMemory(gpuAllocation); } TEST_F(WddmMemoryManagerTest, givenWddmMemoryManagerWhenLockUnlockIsCalledThenReturnPtr) { auto alloc = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, MemoryConstants::pageSize}); auto ptr = memoryManager->lockResource(alloc); EXPECT_NE(nullptr, ptr); EXPECT_EQ(1u, wddm->lockResult.called); EXPECT_TRUE(wddm->lockResult.success); memoryManager->unlockResource(alloc); EXPECT_EQ(1u, wddm->unlockResult.called); EXPECT_TRUE(wddm->unlockResult.success); memoryManager->freeGraphicsMemory(alloc); } TEST_F(WddmMemoryManagerTest, GivenForce32bitAddressingAndRequireSpecificBitnessWhenCreatingAllocationFromSharedHandleThen32BitAllocationIsReturned) { auto osHandle = 1u; void *pSysMem = reinterpret_cast(0x1000); std::unique_ptr gmm(new Gmm(rootDeviceEnvironment->getGmmClientContext(), pSysMem, 4096u, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); setSizesFcn(gmm->gmmResourceInfo.get(), 1u, 1024u, 1u); memoryManager->setForce32BitAllocations(true); AllocationProperties properties(0, false, 4096u, AllocationType::SHARED_BUFFER, false, false, 0); auto *gpuAllocation = memoryManager->createGraphicsAllocationFromSharedHandle(osHandle, properties, true, false); ASSERT_NE(nullptr, gpuAllocation); if constexpr (is64bit) { EXPECT_TRUE(gpuAllocation->is32BitAllocation()); uint64_t base = memoryManager->getExternalHeapBaseAddress(gpuAllocation->getRootDeviceIndex(), gpuAllocation->isAllocatedInLocalMemoryPool()); EXPECT_EQ(GmmHelper::canonize(base), gpuAllocation->getGpuBaseAddress()); } memoryManager->freeGraphicsMemory(gpuAllocation); } TEST_F(WddmMemoryManagerTest, GivenForce32bitAddressingAndNotRequiredSpecificBitnessWhenCreatingAllocationFromSharedHandleThenNon32BitAllocationIsReturned) { auto osHandle = 1u; void *pSysMem = reinterpret_cast(0x1000); std::unique_ptr gmm(new Gmm(rootDeviceEnvironment->getGmmClientContext(), pSysMem, 4096u, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); setSizesFcn(gmm->gmmResourceInfo.get(), 1u, 1024u, 1u); memoryManager->setForce32BitAllocations(true); AllocationProperties properties(0, false, 4096u, AllocationType::SHARED_BUFFER, false, false, 0); auto *gpuAllocation = memoryManager->createGraphicsAllocationFromSharedHandle(osHandle, properties, false, false); ASSERT_NE(nullptr, gpuAllocation); EXPECT_FALSE(gpuAllocation->is32BitAllocation()); if constexpr (is64bit) { uint64_t base = 0; EXPECT_EQ(base, gpuAllocation->getGpuBaseAddress()); } memoryManager->freeGraphicsMemory(gpuAllocation); } TEST_F(WddmMemoryManagerTest, givenWddmMemoryManagerWhenFreeAllocFromSharedHandleIsCalledThenDestroyResourceHandle) { auto osHandle = 1u; void *pSysMem = reinterpret_cast(0x1000); std::unique_ptr gmm(new Gmm(rootDeviceEnvironment->getGmmClientContext(), pSysMem, 4096u, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); setSizesFcn(gmm->gmmResourceInfo.get(), 1u, 1024u, 1u); AllocationProperties properties(0, false, 4096u, AllocationType::SHARED_BUFFER, false, false, 0); auto gpuAllocation = (WddmAllocation *)memoryManager->createGraphicsAllocationFromSharedHandle(osHandle, properties, false, false); EXPECT_NE(nullptr, gpuAllocation); auto expectedDestroyHandle = gpuAllocation->resourceHandle; EXPECT_NE(0u, expectedDestroyHandle); auto lastDestroyed = getMockLastDestroyedResHandleFcn(); EXPECT_EQ(0u, lastDestroyed); memoryManager->freeGraphicsMemory(gpuAllocation); lastDestroyed = getMockLastDestroyedResHandleFcn(); EXPECT_EQ(lastDestroyed, expectedDestroyHandle); } TEST_F(WddmMemoryManagerTest, givenWddmMemoryManagerSizeZeroWhenCreateFromSharedHandleIsCalledThenUpdateSize) { auto osHandle = 1u; auto size = 4096u; void *pSysMem = reinterpret_cast(0x1000); std::unique_ptr gmm(new Gmm(rootDeviceEnvironment->getGmmClientContext(), pSysMem, size, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); setSizesFcn(gmm->gmmResourceInfo.get(), 1u, 1024u, 1u); AllocationProperties properties(0, false, size, AllocationType::SHARED_BUFFER, false, false, 0); auto *gpuAllocation = memoryManager->createGraphicsAllocationFromSharedHandle(osHandle, properties, false, false); ASSERT_NE(nullptr, gpuAllocation); EXPECT_EQ(size, gpuAllocation->getUnderlyingBufferSize()); memoryManager->freeGraphicsMemory(gpuAllocation); } HWTEST_F(WddmMemoryManagerTest, givenWddmMemoryManagerWhenAllocateGraphicsMemoryWithSetAllocattionPropertisWithAllocationTypeBufferCompressedIsCalledThenIsRendeCompressedTrueAndGpuMappingIsSetWithGoodAddressRange) { void *ptr = reinterpret_cast(0x1001); auto size = MemoryConstants::pageSize; HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.ftrRenderCompressedBuffers = true; rootDeviceEnvironment->setHwInfo(&hwInfo); auto memoryManager = std::make_unique(true, false, *executionEnvironment); if (memoryManager->isLimitedGPU(0)) { GTEST_SKIP(); } rootDeviceEnvironment->executionEnvironment.initializeMemoryManager(); memoryManager->allocateGraphicsMemoryInNonDevicePool = true; MockAllocationProperties properties = {mockRootDeviceIndex, true, size, AllocationType::BUFFER, mockDeviceBitfield}; properties.flags.preferCompressed = true; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties, ptr); auto gfxPartition = memoryManager->getGfxPartition(mockRootDeviceIndex); D3DGPU_VIRTUAL_ADDRESS standard64kbRangeMinimumAddress = gfxPartition->getHeapMinimalAddress(HeapIndex::HEAP_STANDARD64KB); D3DGPU_VIRTUAL_ADDRESS standard64kbRangeMaximumAddress = gfxPartition->getHeapLimit(HeapIndex::HEAP_STANDARD64KB); ASSERT_NE(nullptr, allocation); EXPECT_TRUE(memoryManager->allocationGraphicsMemory64kbCreated); EXPECT_TRUE(allocation->getDefaultGmm()->isCompressionEnabled); if ((is32bit || rootDeviceEnvironment->isFullRangeSvm()) && allocation->getDefaultGmm()->gmmResourceInfo->is64KBPageSuitable()) { EXPECT_GE(GmmHelper::decanonize(allocation->getGpuAddress()), standard64kbRangeMinimumAddress); EXPECT_LE(GmmHelper::decanonize(allocation->getGpuAddress()), standard64kbRangeMaximumAddress); } memoryManager->freeGraphicsMemory(allocation); } HWTEST_F(WddmMemoryManagerTest, givenInternalHeapOrLinearStreamTypeWhenAllocatingThenSetCorrectUsage) { auto memoryManager = std::make_unique(true, false, *executionEnvironment); rootDeviceEnvironment->executionEnvironment.initializeMemoryManager(); { MockAllocationProperties properties = {mockRootDeviceIndex, true, 1, AllocationType::INTERNAL_HEAP, mockDeviceBitfield}; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties, nullptr); ASSERT_NE(nullptr, allocation); EXPECT_TRUE(allocation->getDefaultGmm()->resourceParams.Usage == GMM_RESOURCE_USAGE_TYPE::GMM_RESOURCE_USAGE_OCL_STATE_HEAP_BUFFER); memoryManager->freeGraphicsMemory(allocation); } { MockAllocationProperties properties = {mockRootDeviceIndex, true, 1, AllocationType::LINEAR_STREAM, mockDeviceBitfield}; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties, nullptr); ASSERT_NE(nullptr, allocation); EXPECT_TRUE(allocation->getDefaultGmm()->resourceParams.Usage == GMM_RESOURCE_USAGE_TYPE::GMM_RESOURCE_USAGE_OCL_STATE_HEAP_BUFFER); memoryManager->freeGraphicsMemory(allocation); } } HWTEST_F(WddmMemoryManagerTest, givenWddmMemoryManagerWhenAllocateGraphicsMemoryWithSetAllocattionPropertisWithAllocationTypeBufferIsCalledThenIsRendeCompressedFalseAndCorrectAddressRange) { void *ptr = reinterpret_cast(0x1001); auto size = MemoryConstants::pageSize; HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.ftrRenderCompressedBuffers = true; rootDeviceEnvironment->setHwInfo(&hwInfo); auto memoryManager = std::make_unique(false, false, *executionEnvironment); memoryManager->allocateGraphicsMemoryInNonDevicePool = true; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{mockRootDeviceIndex, true, size, AllocationType::BUFFER, mockDeviceBitfield}, ptr); auto gfxPartition = memoryManager->getGfxPartition(mockRootDeviceIndex); D3DGPU_VIRTUAL_ADDRESS svmRangeMinimumAddress = gfxPartition->getHeapMinimalAddress(HeapIndex::HEAP_SVM); D3DGPU_VIRTUAL_ADDRESS svmRangeMaximumAddress = gfxPartition->getHeapLimit(HeapIndex::HEAP_SVM); ASSERT_NE(nullptr, allocation); EXPECT_FALSE(memoryManager->allocationGraphicsMemory64kbCreated); EXPECT_FALSE(allocation->getDefaultGmm()->isCompressionEnabled); if (is32bit || rootDeviceEnvironment->isFullRangeSvm()) { EXPECT_GE(GmmHelper::decanonize(allocation->getGpuAddress()), svmRangeMinimumAddress); EXPECT_LE(GmmHelper::decanonize(allocation->getGpuAddress()), svmRangeMaximumAddress); } memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmMemoryManagerTest, givenWddmMemoryManagerWhenCreateFromSharedHandleFailsThenReturnNull) { auto osHandle = 1u; auto size = 4096u; void *pSysMem = reinterpret_cast(0x1000); std::unique_ptr gmm(new Gmm(rootDeviceEnvironment->getGmmClientContext(), pSysMem, size, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); setSizesFcn(gmm->gmmResourceInfo.get(), 1u, 1024u, 1u); wddm->failOpenSharedHandle = true; AllocationProperties properties(0, false, size, AllocationType::SHARED_BUFFER, false, false, 0); auto *gpuAllocation = memoryManager->createGraphicsAllocationFromSharedHandle(osHandle, properties, false, false); EXPECT_EQ(nullptr, gpuAllocation); } HWTEST_F(WddmMemoryManagerTest, givenWddmMemoryManagerWhenTiledImageWithMipCountZeroIsBeingCreatedThenallocateGraphicsMemoryForImageIsUsed) { if (!UnitTestHelper::tiledImagesSupported) { GTEST_SKIP(); } MockContext context; context.memoryManager = memoryManager.get(); cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; cl_image_desc imageDesc = {}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 64u; imageDesc.image_height = 64u; auto retVal = CL_SUCCESS; cl_mem_flags flags = CL_MEM_WRITE_ONLY; auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); std::unique_ptr dstImage( Image::create(&context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, dstImage); auto imageGraphicsAllocation = dstImage->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex()); ASSERT_NE(nullptr, imageGraphicsAllocation); EXPECT_EQ(GMM_RESOURCE_USAGE_TYPE::GMM_RESOURCE_USAGE_OCL_IMAGE, imageGraphicsAllocation->getDefaultGmm()->resourceParams.Usage); } TEST_F(WddmMemoryManagerTest, givenWddmMemoryManagerWhenTiledImageWithMipCountNonZeroIsBeingCreatedThenallocateGraphicsMemoryForImageIsUsed) { MockContext context; context.memoryManager = memoryManager.get(); cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; cl_image_desc imageDesc = {}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 64u; imageDesc.image_height = 64u; imageDesc.num_mip_levels = 1u; auto retVal = CL_SUCCESS; cl_mem_flags flags = CL_MEM_WRITE_ONLY; auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); std::unique_ptr dstImage( Image::create(&context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, dstImage); EXPECT_EQ(static_cast(imageDesc.num_mip_levels), dstImage->peekMipCount()); auto imageGraphicsAllocation = dstImage->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex()); ASSERT_NE(nullptr, imageGraphicsAllocation); EXPECT_EQ(GMM_RESOURCE_USAGE_TYPE::GMM_RESOURCE_USAGE_OCL_IMAGE, imageGraphicsAllocation->getDefaultGmm()->resourceParams.Usage); } HWTEST_F(WddmMemoryManagerTest, givenWddmMemoryManagerWhenTiledImageIsBeingCreatedFromHostPtrThenallocateGraphicsMemoryForImageIsUsed) { if (!UnitTestHelper::tiledImagesSupported) { GTEST_SKIP(); } auto device = std::make_unique(MockDevice::createWithExecutionEnvironment(defaultHwInfo.get(), executionEnvironment, 0u)); MockContext context(device.get()); context.memoryManager = memoryManager.get(); cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; cl_image_desc imageDesc = {}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 64u; imageDesc.image_height = 64u; char data[64u * 64u * 4 * 8]; auto retVal = CL_SUCCESS; cl_mem_flags flags = CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); std::unique_ptr dstImage( Image::create(&context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, data, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, dstImage); auto imageGraphicsAllocation = dstImage->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex()); ASSERT_NE(nullptr, imageGraphicsAllocation); EXPECT_EQ(GMM_RESOURCE_USAGE_TYPE::GMM_RESOURCE_USAGE_OCL_IMAGE, imageGraphicsAllocation->getDefaultGmm()->resourceParams.Usage); } TEST_F(WddmMemoryManagerTest, givenWddmMemoryManagerWhenNonTiledImgWithMipCountNonZeroisBeingCreatedThenAllocateGraphicsMemoryForImageIsUsed) { MockContext context; context.memoryManager = memoryManager.get(); cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; cl_image_desc imageDesc = {}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_width = 64u; imageDesc.num_mip_levels = 1u; auto retVal = CL_SUCCESS; cl_mem_flags flags = CL_MEM_WRITE_ONLY; auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); std::unique_ptr dstImage( Image::create(&context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, dstImage); EXPECT_EQ(static_cast(imageDesc.num_mip_levels), dstImage->peekMipCount()); auto imageGraphicsAllocation = dstImage->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex()); ASSERT_NE(nullptr, imageGraphicsAllocation); EXPECT_EQ(GMM_RESOURCE_USAGE_TYPE::GMM_RESOURCE_USAGE_OCL_IMAGE, imageGraphicsAllocation->getDefaultGmm()->resourceParams.Usage); } TEST_F(WddmMemoryManagerTest, GivenOffsetsWhenAllocatingGpuMemHostThenAllocatedOnlyIfInBounds) { if (memoryManager->isLimitedGPU(0)) { GTEST_SKIP(); } MockWddmAllocation alloc(rootDeviceEnvironment->getGmmClientContext()), allocOffseted(rootDeviceEnvironment->getGmmClientContext()); // three pages void *ptr = alignedMalloc(4 * 4096, 4096); ASSERT_NE(nullptr, ptr); size_t baseOffset = 1024; // misalligned buffer spanning accross 3 pages auto *gpuAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, false, 2 * MemoryConstants::pageSize}, (char *)ptr + baseOffset); // Should be same cpu ptr and gpu ptr EXPECT_EQ((char *)ptr + baseOffset, gpuAllocation->getUnderlyingBuffer()); auto hostPtrManager = memoryManager->getHostPtrManager(); auto fragment = hostPtrManager->getFragment({ptr, rootDeviceIndex}); ASSERT_NE(nullptr, fragment); EXPECT_TRUE(fragment->refCount == 1); EXPECT_NE(fragment->osInternalStorage, nullptr); // offset by 3 pages, not in boundary auto fragment2 = hostPtrManager->getFragment({reinterpret_cast(ptr) + 3 * 4096, rootDeviceIndex}); EXPECT_EQ(nullptr, fragment2); // offset by one page, still in boundary void *offsetPtr = ptrOffset(ptr, 4096); auto *gpuAllocation2 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, false, MemoryConstants::pageSize}, offsetPtr); // Should be same cpu ptr and gpu ptr EXPECT_EQ(offsetPtr, gpuAllocation2->getUnderlyingBuffer()); auto fragment3 = hostPtrManager->getFragment({offsetPtr, rootDeviceIndex}); ASSERT_NE(nullptr, fragment3); EXPECT_TRUE(fragment3->refCount == 2); EXPECT_EQ(alloc.handle, allocOffseted.handle); EXPECT_EQ(alloc.getUnderlyingBufferSize(), allocOffseted.getUnderlyingBufferSize()); EXPECT_EQ(alloc.getAlignedCpuPtr(), allocOffseted.getAlignedCpuPtr()); memoryManager->freeGraphicsMemory(gpuAllocation2); auto fragment4 = hostPtrManager->getFragment({ptr, rootDeviceIndex}); ASSERT_NE(nullptr, fragment4); EXPECT_TRUE(fragment4->refCount == 1); memoryManager->freeGraphicsMemory(gpuAllocation); fragment4 = hostPtrManager->getFragment({ptr, rootDeviceIndex}); EXPECT_EQ(nullptr, fragment4); alignedFree(ptr); } TEST_F(WddmMemoryManagerTest, WhenAllocatingGpuMemThenOsInternalStorageIsPopulatedCorrectly) { if (memoryManager->isLimitedGPU(0)) { GTEST_SKIP(); } MockWddmAllocation allocation(rootDeviceEnvironment->getGmmClientContext()); // three pages void *ptr = alignedMalloc(3 * 4096, 4096); auto *gpuAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, false, 3 * MemoryConstants::pageSize}, ptr); // Should be same cpu ptr and gpu ptr ASSERT_NE(nullptr, gpuAllocation); EXPECT_EQ(ptr, gpuAllocation->getUnderlyingBuffer()); auto fragment = memoryManager->getHostPtrManager()->getFragment({ptr, rootDeviceIndex}); ASSERT_NE(nullptr, fragment); EXPECT_TRUE(fragment->refCount == 1); EXPECT_NE(static_cast(fragment->osInternalStorage)->handle, 0); EXPECT_NE(static_cast(fragment->osInternalStorage)->gmm, nullptr); memoryManager->freeGraphicsMemory(gpuAllocation); alignedFree(ptr); } TEST_F(WddmMemoryManagerTest, GivenAlignedPointerWhenAllocate32BitMemoryThenGmmCalledWithCorrectPointerAndSize) { MockWddmAllocation allocation(rootDeviceEnvironment->getGmmClientContext()); uint32_t size = 4096; void *ptr = reinterpret_cast(4096); auto *gpuAllocation = memoryManager->allocate32BitGraphicsMemory(rootDeviceIndex, size, ptr, AllocationType::BUFFER); EXPECT_EQ(ptr, reinterpret_cast(gpuAllocation->getDefaultGmm()->resourceParams.pExistingSysMem)); EXPECT_EQ(size, gpuAllocation->getDefaultGmm()->resourceParams.ExistingSysMemSize); memoryManager->freeGraphicsMemory(gpuAllocation); } TEST_F(WddmMemoryManagerTest, GivenUnAlignedPointerAndSizeWhenAllocate32BitMemoryThenGmmCalledWithCorrectPointerAndSize) { MockWddmAllocation allocation(rootDeviceEnvironment->getGmmClientContext()); uint32_t size = 0x1001; void *ptr = reinterpret_cast(0x1001); auto *gpuAllocation = memoryManager->allocate32BitGraphicsMemory(rootDeviceIndex, size, ptr, AllocationType::BUFFER); EXPECT_EQ(reinterpret_cast(0x1000), reinterpret_cast(gpuAllocation->getDefaultGmm()->resourceParams.pExistingSysMem)); EXPECT_EQ(0x2000, gpuAllocation->getDefaultGmm()->resourceParams.ExistingSysMemSize); memoryManager->freeGraphicsMemory(gpuAllocation); } TEST_F(WddmMemoryManagerTest, WhenInitializingWddmThenSystemSharedMemoryIsCorrect) { executionEnvironment->prepareRootDeviceEnvironments(4u); for (auto i = 0u; i < 4u; i++) { executionEnvironment->rootDeviceEnvironments[i]->osInterface.reset(); auto mockWddm = Wddm::createWddm(nullptr, *executionEnvironment->rootDeviceEnvironments[i].get()); mockWddm->init(); int64_t mem = memoryManager->getSystemSharedMemory(i); EXPECT_EQ(mem, 4249540608); } } TEST_F(WddmMemoryManagerTest, GivenBitnessWhenGettingMaxAddressThenCorrectAddressIsReturned) { uint64_t maxAddr = memoryManager->getMaxApplicationAddress(); if (is32bit) { EXPECT_EQ(maxAddr, MemoryConstants::max32BitAppAddress); } else { EXPECT_EQ(maxAddr, MemoryConstants::max64BitAppAddress); } } TEST_F(WddmMemoryManagerTest, GivenNullptrWhenAllocating32BitMemoryThenAddressIsCorrect) { auto *gpuAllocation = memoryManager->allocate32BitGraphicsMemory(rootDeviceIndex, 3 * MemoryConstants::pageSize, nullptr, AllocationType::BUFFER); ASSERT_NE(nullptr, gpuAllocation); EXPECT_LT(GmmHelper::canonize(memoryManager->getGfxPartition(0)->getHeapBase(HeapIndex::HEAP_EXTERNAL)), gpuAllocation->getGpuAddress()); EXPECT_GT(GmmHelper::canonize(memoryManager->getGfxPartition(0)->getHeapLimit(HeapIndex::HEAP_EXTERNAL)), gpuAllocation->getGpuAddress() + gpuAllocation->getUnderlyingBufferSize()); EXPECT_EQ(0u, gpuAllocation->fragmentsStorage.fragmentCount); memoryManager->freeGraphicsMemory(gpuAllocation); } TEST_F(WddmMemoryManagerTest, given32BitAllocationWhenItIsCreatedThenItHasNonZeroGpuAddressToPatch) { auto *gpuAllocation = memoryManager->allocate32BitGraphicsMemory(rootDeviceIndex, 3 * MemoryConstants::pageSize, nullptr, AllocationType::BUFFER); ASSERT_NE(nullptr, gpuAllocation); EXPECT_NE(0llu, gpuAllocation->getGpuAddressToPatch()); EXPECT_LT(GmmHelper::canonize(memoryManager->getGfxPartition(0)->getHeapBase(HeapIndex::HEAP_EXTERNAL)), gpuAllocation->getGpuAddress()); EXPECT_GT(GmmHelper::canonize(memoryManager->getGfxPartition(0)->getHeapLimit(HeapIndex::HEAP_EXTERNAL)), gpuAllocation->getGpuAddress() + gpuAllocation->getUnderlyingBufferSize()); memoryManager->freeGraphicsMemory(gpuAllocation); } TEST_F(WddmMemoryManagerTest, GivenMisalignedHostPtrWhenAllocating32BitMemoryThenTripleAllocationDoesNotOccur) { size_t misalignedSize = 0x2500; void *misalignedPtr = reinterpret_cast(0x12500); auto *gpuAllocation = memoryManager->allocate32BitGraphicsMemory(rootDeviceIndex, misalignedSize, misalignedPtr, AllocationType::BUFFER); ASSERT_NE(nullptr, gpuAllocation); EXPECT_EQ(alignSizeWholePage(misalignedPtr, misalignedSize), gpuAllocation->getUnderlyingBufferSize()); EXPECT_LT(GmmHelper::canonize(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapBase(HeapIndex::HEAP_EXTERNAL)), gpuAllocation->getGpuAddress()); EXPECT_GT(GmmHelper::canonize(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapLimit(HeapIndex::HEAP_EXTERNAL)), gpuAllocation->getGpuAddress() + gpuAllocation->getUnderlyingBufferSize()); EXPECT_EQ(0u, gpuAllocation->fragmentsStorage.fragmentCount); void *alignedPtr = alignDown(misalignedPtr, MemoryConstants::allocationAlignment); uint64_t offset = ptrDiff(misalignedPtr, alignedPtr); EXPECT_EQ(offset, gpuAllocation->getAllocationOffset()); memoryManager->freeGraphicsMemory(gpuAllocation); } TEST_F(WddmMemoryManagerTest, WhenAllocating32BitMemoryThenGpuBaseAddressIsCannonized) { auto *gpuAllocation = memoryManager->allocate32BitGraphicsMemory(rootDeviceIndex, 3 * MemoryConstants::pageSize, nullptr, AllocationType::BUFFER); ASSERT_NE(nullptr, gpuAllocation); uint64_t cannonizedAddress = GmmHelper::canonize(memoryManager->getGfxPartition(0)->getHeapBase(MemoryManager::selectExternalHeap(gpuAllocation->isAllocatedInLocalMemoryPool()))); EXPECT_EQ(cannonizedAddress, gpuAllocation->getGpuBaseAddress()); memoryManager->freeGraphicsMemory(gpuAllocation); } TEST_F(WddmMemoryManagerTest, GivenThreeOsHandlesWhenAskedForDestroyAllocationsThenAllMarkedAllocationsAreDestroyed) { OsHandleStorage storage; void *pSysMem = reinterpret_cast(0x1000); uint32_t maxOsContextCount = 1u; auto osHandle0 = new OsHandleWin(); auto osHandle1 = new OsHandleWin(); auto osHandle2 = new OsHandleWin(); storage.fragmentStorageData[0].osHandleStorage = osHandle0; storage.fragmentStorageData[0].residency = new ResidencyData(maxOsContextCount); osHandle0->handle = ALLOCATION_HANDLE; storage.fragmentStorageData[0].freeTheFragment = true; osHandle0->gmm = new Gmm(rootDeviceEnvironment->getGmmClientContext(), pSysMem, 4096u, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true); storage.fragmentStorageData[1].osHandleStorage = osHandle1; osHandle1->handle = ALLOCATION_HANDLE; storage.fragmentStorageData[1].residency = new ResidencyData(maxOsContextCount); storage.fragmentStorageData[1].freeTheFragment = false; storage.fragmentStorageData[2].osHandleStorage = osHandle2; osHandle2->handle = ALLOCATION_HANDLE; storage.fragmentStorageData[2].freeTheFragment = true; osHandle2->gmm = new Gmm(rootDeviceEnvironment->getGmmClientContext(), pSysMem, 4096u, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true); storage.fragmentStorageData[2].residency = new ResidencyData(maxOsContextCount); memoryManager->cleanOsHandles(storage, 0); auto destroyWithResourceHandleCalled = 0u; D3DKMT_DESTROYALLOCATION2 *ptrToDestroyAlloc2 = nullptr; getSizesFcn(destroyWithResourceHandleCalled, ptrToDestroyAlloc2); EXPECT_EQ(0u, ptrToDestroyAlloc2->Flags.SynchronousDestroy); EXPECT_EQ(1u, ptrToDestroyAlloc2->Flags.AssumeNotInUse); EXPECT_EQ(ALLOCATION_HANDLE, osHandle1->handle); delete storage.fragmentStorageData[1].osHandleStorage; delete storage.fragmentStorageData[1].residency; } TEST_F(WddmMemoryManagerTest, GivenNullptrWhenFreeingAllocationThenCrashDoesNotOccur) { EXPECT_NO_THROW(memoryManager->freeGraphicsMemory(nullptr)); } TEST_F(WddmMemoryManagerTest, givenDefaultWddmMemoryManagerWhenAskedForAlignedMallocRestrictionsThenValueIsReturned) { AlignedMallocRestrictions *mallocRestrictions = memoryManager->getAlignedMallocRestrictions(); ASSERT_NE(nullptr, mallocRestrictions); EXPECT_EQ(NEO::windowsMinAddress, mallocRestrictions->minAddress); } TEST_F(WddmMemoryManagerTest, givenWddmMemoryManagerWhenCpuMemNotMeetRestrictionsThenReserveMemRangeForMap) { if (memoryManager->isLimitedGPU(0)) { GTEST_SKIP(); } void *cpuPtr = reinterpret_cast(memoryManager->getAlignedMallocRestrictions()->minAddress - 0x1000); size_t size = 0x1000; auto allocation = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{0, false, size}, cpuPtr)); void *expectReserve = reinterpret_cast(wddm->virtualAllocAddress); ASSERT_NE(nullptr, allocation); EXPECT_EQ(expectReserve, allocation->getReservedAddressPtr()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmMemoryManagerTest, givenManagerWithDisabledDeferredDeleterWhenMapGpuVaFailThenFailToCreateAllocation) { void *ptr = reinterpret_cast(0x1000); size_t size = 0x1000; std::unique_ptr gmm(new Gmm(rootDeviceEnvironment->getGmmClientContext(), ptr, size, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); memoryManager->setDeferredDeleter(nullptr); setMapGpuVaFailConfigFcn(0, 1); WddmAllocation allocation(0, AllocationType::BUFFER, ptr, size, nullptr, MemoryPool::MemoryNull, 0u, 1u); allocation.setDefaultGmm(gmm.get()); bool ret = memoryManager->createWddmAllocation(&allocation, allocation.getAlignedCpuPtr()); EXPECT_FALSE(ret); } TEST_F(WddmMemoryManagerTest, givenManagerWithEnabledDeferredDeleterWhenFirstMapGpuVaFailSecondAfterDrainSuccessThenCreateAllocation) { void *ptr = reinterpret_cast(0x10000); size_t size = 0x1000; std::unique_ptr gmm(new Gmm(rootDeviceEnvironment->getGmmClientContext(), ptr, size, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); MockDeferredDeleter *deleter = new MockDeferredDeleter; memoryManager->setDeferredDeleter(deleter); setMapGpuVaFailConfigFcn(0, 1); WddmAllocation allocation(0, AllocationType::BUFFER, ptr, size, nullptr, MemoryPool::MemoryNull, 0u, 1u); allocation.setDefaultGmm(gmm.get()); bool ret = memoryManager->createWddmAllocation(&allocation, allocation.getAlignedCpuPtr()); EXPECT_TRUE(ret); } TEST_F(WddmMemoryManagerTest, givenManagerWithEnabledDeferredDeleterWhenFirstAndMapGpuVaFailSecondAfterDrainFailThenFailToCreateAllocation) { void *ptr = reinterpret_cast(0x1000); size_t size = 0x1000; std::unique_ptr gmm(new Gmm(rootDeviceEnvironment->getGmmClientContext(), ptr, size, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); MockDeferredDeleter *deleter = new MockDeferredDeleter; memoryManager->setDeferredDeleter(deleter); setMapGpuVaFailConfigFcn(0, 2); WddmAllocation allocation(0, AllocationType::BUFFER, ptr, size, nullptr, MemoryPool::MemoryNull, 0u, 1u); allocation.setDefaultGmm(gmm.get()); bool ret = memoryManager->createWddmAllocation(&allocation, allocation.getAlignedCpuPtr()); EXPECT_FALSE(ret); } TEST_F(WddmMemoryManagerTest, givenNullPtrAndSizePassedToCreateInternalAllocationWhenCallIsMadeThenAllocationIsCreatedIn32BitHeapInternal) { auto wddmAllocation = static_cast(memoryManager->allocate32BitGraphicsMemory(rootDeviceIndex, MemoryConstants::pageSize, nullptr, AllocationType::INTERNAL_HEAP)); ASSERT_NE(nullptr, wddmAllocation); EXPECT_EQ(wddmAllocation->getGpuBaseAddress(), GmmHelper::canonize(memoryManager->getInternalHeapBaseAddress(wddmAllocation->getRootDeviceIndex(), wddmAllocation->isAllocatedInLocalMemoryPool()))); EXPECT_NE(nullptr, wddmAllocation->getUnderlyingBuffer()); EXPECT_EQ(4096u, wddmAllocation->getUnderlyingBufferSize()); EXPECT_NE((uint64_t)wddmAllocation->getUnderlyingBuffer(), wddmAllocation->getGpuAddress()); auto cannonizedHeapBase = GmmHelper::canonize(memoryManager->getInternalHeapBaseAddress(rootDeviceIndex, wddmAllocation->isAllocatedInLocalMemoryPool())); auto cannonizedHeapEnd = GmmHelper::canonize(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapLimit(MemoryManager::selectInternalHeap(wddmAllocation->isAllocatedInLocalMemoryPool()))); EXPECT_GT(wddmAllocation->getGpuAddress(), cannonizedHeapBase); EXPECT_LT(wddmAllocation->getGpuAddress() + wddmAllocation->getUnderlyingBufferSize(), cannonizedHeapEnd); EXPECT_NE(nullptr, wddmAllocation->getDriverAllocatedCpuPtr()); EXPECT_TRUE(wddmAllocation->is32BitAllocation()); memoryManager->freeGraphicsMemory(wddmAllocation); } TEST_F(WddmMemoryManagerTest, givenPtrAndSizePassedToCreateInternalAllocationWhenCallIsMadeThenAllocationIsCreatedIn32BitHeapInternal) { auto ptr = reinterpret_cast(0x1000000); auto wddmAllocation = static_cast(memoryManager->allocate32BitGraphicsMemory(rootDeviceIndex, MemoryConstants::pageSize, ptr, AllocationType::INTERNAL_HEAP)); ASSERT_NE(nullptr, wddmAllocation); EXPECT_EQ(wddmAllocation->getGpuBaseAddress(), GmmHelper::canonize(memoryManager->getInternalHeapBaseAddress(rootDeviceIndex, wddmAllocation->isAllocatedInLocalMemoryPool()))); EXPECT_EQ(ptr, wddmAllocation->getUnderlyingBuffer()); EXPECT_EQ(4096u, wddmAllocation->getUnderlyingBufferSize()); EXPECT_NE((uint64_t)wddmAllocation->getUnderlyingBuffer(), wddmAllocation->getGpuAddress()); auto cannonizedHeapBase = GmmHelper::canonize(memoryManager->getInternalHeapBaseAddress(rootDeviceIndex, wddmAllocation->isAllocatedInLocalMemoryPool())); auto cannonizedHeapEnd = GmmHelper::canonize(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapLimit(MemoryManager::selectInternalHeap(wddmAllocation->isAllocatedInLocalMemoryPool()))); EXPECT_GT(wddmAllocation->getGpuAddress(), cannonizedHeapBase); EXPECT_LT(wddmAllocation->getGpuAddress() + wddmAllocation->getUnderlyingBufferSize(), cannonizedHeapEnd); EXPECT_EQ(nullptr, wddmAllocation->getDriverAllocatedCpuPtr()); EXPECT_TRUE(wddmAllocation->is32BitAllocation()); memoryManager->freeGraphicsMemory(wddmAllocation); } TEST_F(BufferWithWddmMemory, WhenCreatingBufferThenBufferIsCreatedCorrectly) { flags = CL_MEM_USE_HOST_PTR | CL_MEM_FORCE_HOST_MEMORY_INTEL; auto ptr = alignedMalloc(MemoryConstants::preferredAlignment, MemoryConstants::preferredAlignment); auto buffer = Buffer::create( &context, flags, MemoryConstants::preferredAlignment, ptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, buffer); auto address = buffer->getCpuAddress(); if (buffer->isMemObjZeroCopy()) { EXPECT_EQ(ptr, address); } else { EXPECT_NE(address, ptr); } EXPECT_NE(nullptr, buffer->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex())); EXPECT_NE(nullptr, buffer->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex())->getUnderlyingBuffer()); delete buffer; alignedFree(ptr); } TEST_F(BufferWithWddmMemory, GivenNullOsHandleStorageWhenPopulatingThenFilledPointerIsReturned) { OsHandleStorage storage; storage.fragmentStorageData[0].cpuPtr = reinterpret_cast(0x1000); storage.fragmentStorageData[0].fragmentSize = MemoryConstants::pageSize; memoryManager->populateOsHandles(storage, 0); EXPECT_NE(nullptr, storage.fragmentStorageData[0].osHandleStorage); EXPECT_NE(nullptr, static_cast(storage.fragmentStorageData[0].osHandleStorage)->gmm); EXPECT_EQ(nullptr, storage.fragmentStorageData[1].osHandleStorage); EXPECT_EQ(nullptr, storage.fragmentStorageData[2].osHandleStorage); storage.fragmentStorageData[0].freeTheFragment = true; memoryManager->cleanOsHandles(storage, 0); } TEST_F(BufferWithWddmMemory, GivenMisalignedHostPtrAndMultiplePagesSizeWhenAskedForGraphicsAllocationThenItContainsAllFragmentsWithProperGpuAdrresses) { if (memoryManager->isLimitedGPU(0)) { GTEST_SKIP(); } auto ptr = reinterpret_cast(wddm->virtualAllocAddress + 0x1001); auto size = MemoryConstants::pageSize * 10; auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), false, size, context.getDevice(0)->getDeviceBitfield()}, ptr); auto hostPtrManager = static_cast(memoryManager->getHostPtrManager()); ASSERT_EQ(3u, hostPtrManager->getFragmentCount()); auto reqs = MockHostPtrManager::getAllocationRequirements(context.getDevice(0)->getRootDeviceIndex(), ptr, size); for (int i = 0; i < maxFragmentsCount; i++) { auto osHandle = static_cast(graphicsAllocation->fragmentsStorage.fragmentStorageData[i].osHandleStorage); EXPECT_NE((D3DKMT_HANDLE) nullptr, osHandle->handle); EXPECT_NE(nullptr, osHandle->gmm); EXPECT_EQ(reqs.allocationFragments[i].allocationPtr, reinterpret_cast(osHandle->gmm->resourceParams.pExistingSysMem)); EXPECT_EQ(reqs.allocationFragments[i].allocationSize, osHandle->gmm->resourceParams.BaseWidth); } memoryManager->freeGraphicsMemory(graphicsAllocation); EXPECT_EQ(0u, hostPtrManager->getFragmentCount()); } TEST_F(BufferWithWddmMemory, GivenPointerAndSizeWhenAskedToCreateGrahicsAllocationThenGraphicsAllocationIsCreated) { OsHandleStorage handleStorage; auto ptr = reinterpret_cast(wddm->virtualAllocAddress + 0x1000); auto ptr2 = reinterpret_cast(wddm->virtualAllocAddress + 0x1001); auto size = MemoryConstants::pageSize; handleStorage.fragmentStorageData[0].cpuPtr = ptr; handleStorage.fragmentStorageData[1].cpuPtr = ptr2; handleStorage.fragmentStorageData[2].cpuPtr = nullptr; handleStorage.fragmentStorageData[0].fragmentSize = size; handleStorage.fragmentStorageData[1].fragmentSize = size * 2; handleStorage.fragmentStorageData[2].fragmentSize = size * 3; AllocationData allocationData; allocationData.size = size; allocationData.hostPtr = ptr; auto allocation = memoryManager->createGraphicsAllocation(handleStorage, allocationData); EXPECT_EQ(ptr, allocation->getUnderlyingBuffer()); EXPECT_EQ(size, allocation->getUnderlyingBufferSize()); EXPECT_EQ(ptr, allocation->fragmentsStorage.fragmentStorageData[0].cpuPtr); EXPECT_EQ(ptr2, allocation->fragmentsStorage.fragmentStorageData[1].cpuPtr); EXPECT_EQ(nullptr, allocation->fragmentsStorage.fragmentStorageData[2].cpuPtr); EXPECT_EQ(size, allocation->fragmentsStorage.fragmentStorageData[0].fragmentSize); EXPECT_EQ(size * 2, allocation->fragmentsStorage.fragmentStorageData[1].fragmentSize); EXPECT_EQ(size * 3, allocation->fragmentsStorage.fragmentStorageData[2].fragmentSize); EXPECT_NE(&allocation->fragmentsStorage, &handleStorage); memoryManager->freeGraphicsMemory(allocation); } TEST_F(BufferWithWddmMemory, givenFragmentsThatAreNotInOrderWhenGraphicsAllocationIsBeingCreatedThenGraphicsAddressIsPopulatedFromProperFragment) { memoryManager->setForce32bitAllocations(true); OsHandleStorage handleStorage = {}; D3DGPU_VIRTUAL_ADDRESS gpuAdress = MemoryConstants::pageSize * 1; auto ptr = reinterpret_cast(wddm->virtualAllocAddress + MemoryConstants::pageSize); auto size = MemoryConstants::pageSize * 2; auto maxOsContextCount = 1u; auto osHandle = new OsHandleWin(); handleStorage.fragmentStorageData[0].cpuPtr = ptr; handleStorage.fragmentStorageData[0].fragmentSize = size; handleStorage.fragmentStorageData[0].osHandleStorage = osHandle; handleStorage.fragmentStorageData[0].residency = new ResidencyData(maxOsContextCount); handleStorage.fragmentStorageData[0].freeTheFragment = true; auto rootDeviceEnvironment = executionEnvironment->rootDeviceEnvironments[0].get(); osHandle->gmm = new Gmm(rootDeviceEnvironment->getGmmClientContext(), ptr, size, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true); handleStorage.fragmentCount = 1; FragmentStorage fragment = {}; fragment.driverAllocation = true; fragment.fragmentCpuPointer = ptr; fragment.fragmentSize = size; fragment.osInternalStorage = handleStorage.fragmentStorageData[0].osHandleStorage; osHandle->gpuPtr = gpuAdress; memoryManager->getHostPtrManager()->storeFragment(rootDeviceIndex, fragment); AllocationData allocationData; allocationData.size = size; allocationData.hostPtr = ptr; auto allocation = memoryManager->createGraphicsAllocation(handleStorage, allocationData); EXPECT_EQ(ptr, allocation->getUnderlyingBuffer()); EXPECT_EQ(size, allocation->getUnderlyingBufferSize()); EXPECT_EQ(gpuAdress, allocation->getGpuAddress()); EXPECT_EQ(0ULL, allocation->getAllocationOffset()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(BufferWithWddmMemory, givenFragmentsThatAreNotInOrderWhenGraphicsAllocationIsBeingCreatedNotAllignedToPageThenGraphicsAddressIsPopulatedFromProperFragmentAndOffsetisAssigned) { memoryManager->setForce32bitAllocations(true); OsHandleStorage handleStorage = {}; D3DGPU_VIRTUAL_ADDRESS gpuAdress = MemoryConstants::pageSize * 1; auto ptr = reinterpret_cast(wddm->virtualAllocAddress + MemoryConstants::pageSize); auto size = MemoryConstants::pageSize * 2; auto maxOsContextCount = 1u; auto osHandle = new OsHandleWin(); handleStorage.fragmentStorageData[0].cpuPtr = ptr; handleStorage.fragmentStorageData[0].fragmentSize = size; handleStorage.fragmentStorageData[0].osHandleStorage = osHandle; handleStorage.fragmentStorageData[0].residency = new ResidencyData(maxOsContextCount); handleStorage.fragmentStorageData[0].freeTheFragment = true; auto rootDeviceEnvironment = executionEnvironment->rootDeviceEnvironments[0].get(); osHandle->gmm = new Gmm(rootDeviceEnvironment->getGmmClientContext(), ptr, size, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true); handleStorage.fragmentCount = 1; FragmentStorage fragment = {}; fragment.driverAllocation = true; fragment.fragmentCpuPointer = ptr; fragment.fragmentSize = size; fragment.osInternalStorage = handleStorage.fragmentStorageData[0].osHandleStorage; osHandle->gpuPtr = gpuAdress; memoryManager->getHostPtrManager()->storeFragment(rootDeviceIndex, fragment); auto offset = 80; auto allocationPtr = ptrOffset(ptr, offset); AllocationData allocationData; allocationData.size = size; allocationData.hostPtr = allocationPtr; auto allocation = memoryManager->createGraphicsAllocation(handleStorage, allocationData); EXPECT_EQ(allocationPtr, allocation->getUnderlyingBuffer()); EXPECT_EQ(size, allocation->getUnderlyingBufferSize()); EXPECT_EQ(gpuAdress + offset, allocation->getGpuAddress()); // getGpuAddress returns gpuAddress + allocationOffset EXPECT_EQ(offset, allocation->getAllocationOffset()); memoryManager->freeGraphicsMemory(allocation); } struct WddmMemoryManagerWithAsyncDeleterTest : public ::testing::Test { void SetUp() override { executionEnvironment = getExecutionEnvironmentImpl(hwInfo, 1); executionEnvironment->incRefInternal(); wddm = static_cast(executionEnvironment->rootDeviceEnvironments[0]->osInterface->getDriverModel()->as()); wddm->resetGdi(new MockGdi()); wddm->callBaseDestroyAllocations = false; wddm->init(); deleter = new MockDeferredDeleter; memoryManager = std::make_unique(*executionEnvironment); memoryManager->setDeferredDeleter(deleter); } void TearDown() override { executionEnvironment->decRefInternal(); } MockDeferredDeleter *deleter = nullptr; std::unique_ptr memoryManager; ExecutionEnvironment *executionEnvironment; HardwareInfo *hwInfo; WddmMock *wddm; }; TEST_F(WddmMemoryManagerWithAsyncDeleterTest, givenWddmWhenAsyncDeleterIsEnabledThenCanDeferDeletions) { EXPECT_EQ(0, deleter->deferDeletionCalled); memoryManager->tryDeferDeletions(nullptr, 0, 0, 0); EXPECT_EQ(1, deleter->deferDeletionCalled); EXPECT_EQ(1u, wddm->destroyAllocationResult.called); } TEST_F(WddmMemoryManagerWithAsyncDeleterTest, givenWddmWhenAsyncDeleterIsDisabledThenCannotDeferDeletions) { memoryManager->setDeferredDeleter(nullptr); memoryManager->tryDeferDeletions(nullptr, 0, 0, 0); EXPECT_EQ(1u, wddm->destroyAllocationResult.called); } TEST_F(WddmMemoryManagerWithAsyncDeleterTest, givenMemoryManagerWithAsyncDeleterWhenCannotAllocateMemoryForTiledImageThenDrainIsCalledAndCreateAllocationIsCalledTwice) { UltDeviceFactory deviceFactory{1, 0}; ImageDescriptor imgDesc = {}; imgDesc.imageType = ImageType::Image3D; ImageInfo imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); wddm->createAllocationStatus = STATUS_GRAPHICS_NO_VIDEO_MEMORY; EXPECT_EQ(0, deleter->drainCalled); EXPECT_EQ(0u, wddm->createAllocationResult.called); deleter->expectDrainBlockingValue(true); auto memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(0, 0, 0, deviceFactory.rootDevices[0]); AllocationProperties allocProperties = MemObjHelper::getAllocationPropertiesWithImageInfo(0, imgInfo, true, memoryProperties, *hwInfo, mockDeviceBitfield, true); memoryManager->allocateGraphicsMemoryInPreferredPool(allocProperties, nullptr); EXPECT_EQ(1, deleter->drainCalled); EXPECT_EQ(2u, wddm->createAllocationResult.called); } TEST_F(WddmMemoryManagerWithAsyncDeleterTest, givenMemoryManagerWithAsyncDeleterWhenCanAllocateMemoryForTiledImageThenDrainIsNotCalledAndCreateAllocationIsCalledOnce) { UltDeviceFactory deviceFactory{1, 0}; ImageDescriptor imgDesc; imgDesc.imageType = ImageType::Image3D; ImageInfo imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); wddm->createAllocationStatus = STATUS_SUCCESS; wddm->mapGpuVaStatus = true; wddm->callBaseMapGpuVa = false; EXPECT_EQ(0, deleter->drainCalled); EXPECT_EQ(0u, wddm->createAllocationResult.called); EXPECT_EQ(0u, wddm->mapGpuVirtualAddressResult.called); auto memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(0, 0, 0, deviceFactory.rootDevices[0]); AllocationProperties allocProperties = MemObjHelper::getAllocationPropertiesWithImageInfo(0, imgInfo, true, memoryProperties, *hwInfo, mockDeviceBitfield, true); auto allocation = memoryManager->allocateGraphicsMemoryInPreferredPool(allocProperties, nullptr); EXPECT_EQ(0, deleter->drainCalled); EXPECT_EQ(1u, wddm->createAllocationResult.called); EXPECT_EQ(1u, wddm->mapGpuVirtualAddressResult.called); memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmMemoryManagerWithAsyncDeleterTest, givenMemoryManagerWithoutAsyncDeleterWhenCannotAllocateMemoryForTiledImageThenCreateAllocationIsCalledOnce) { UltDeviceFactory deviceFactory{1, 0}; memoryManager->setDeferredDeleter(nullptr); ImageDescriptor imgDesc; imgDesc.imageType = ImageType::Image3D; ImageInfo imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); wddm->createAllocationStatus = STATUS_GRAPHICS_NO_VIDEO_MEMORY; EXPECT_EQ(0u, wddm->createAllocationResult.called); auto memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(0, 0, 0, deviceFactory.rootDevices[0]); AllocationProperties allocProperties = MemObjHelper::getAllocationPropertiesWithImageInfo(0, imgInfo, true, memoryProperties, *hwInfo, mockDeviceBitfield, true); memoryManager->allocateGraphicsMemoryInPreferredPool(allocProperties, nullptr); EXPECT_EQ(1u, wddm->createAllocationResult.called); } TEST(WddmMemoryManagerDefaults, givenDefaultWddmMemoryManagerWhenItIsQueriedForInternalHeapBaseThenHeapInternalBaseIsReturned) { HardwareInfo *hwInfo; auto executionEnvironment = getExecutionEnvironmentImpl(hwInfo, 1); executionEnvironment->incRefInternal(); { auto wddm = new WddmMock(*executionEnvironment->rootDeviceEnvironments[0].get()); executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel(std::unique_ptr(wddm)); executionEnvironment->rootDeviceEnvironments[0]->memoryOperationsInterface = std::make_unique(wddm); wddm->init(); MockWddmMemoryManager memoryManager(*executionEnvironment); auto heapBase = wddm->getGfxPartition().Heap32[static_cast(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY)].Base; heapBase = std::max(heapBase, static_cast(wddm->getWddmMinAddress())); EXPECT_EQ(heapBase, memoryManager.getInternalHeapBaseAddress(0, true)); } executionEnvironment->decRefInternal(); } TEST_F(MockWddmMemoryManagerTest, givenValidateAllocationFunctionWhenItIsCalledWithTripleAllocationThenSuccessIsReturned) { wddm->init(); MockWddmMemoryManager memoryManager(*executionEnvironment); auto wddmAlloc = static_cast(memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, false, MemoryConstants::pageSize}, reinterpret_cast(0x1000))); EXPECT_TRUE(memoryManager.validateAllocationMock(wddmAlloc)); memoryManager.freeGraphicsMemory(wddmAlloc); } TEST_F(MockWddmMemoryManagerTest, givenCreateOrReleaseDeviceSpecificMemResourcesWhenCreatingMemoryManagerObjectThenTheseMethodsAreEmpty) { wddm->init(); MockWddmMemoryManager memoryManager(*executionEnvironment); memoryManager.createDeviceSpecificMemResources(1); memoryManager.releaseDeviceSpecificMemResources(1); } TEST_F(MockWddmMemoryManagerTest, givenWddmMemoryManagerWhenVerifySharedHandleThenVerifySharedHandleIsCalled) { wddm->init(); MockWddmMemoryManager memoryManager(*executionEnvironment); osHandle handle = 1; memoryManager.verifyHandle(handle, 0, false); EXPECT_EQ(0, wddm->counterVerifyNTHandle); EXPECT_EQ(1, wddm->counterVerifySharedHandle); } TEST_F(MockWddmMemoryManagerTest, givenWddmMemoryManagerWhenVerifyNTHandleThenVerifyNTHandleIsCalled) { wddm->init(); MockWddmMemoryManager memoryManager(*executionEnvironment); osHandle handle = 1; memoryManager.verifyHandle(handle, 0, true); EXPECT_EQ(1, wddm->counterVerifyNTHandle); EXPECT_EQ(0, wddm->counterVerifySharedHandle); } TEST_F(MockWddmMemoryManagerTest, givenWddmMemoryManagerWhenIsNTHandleisCalledThenVerifyNTHandleisCalled) { wddm->init(); MockWddmMemoryManager memoryManager(*executionEnvironment); osHandle handle = 1; memoryManager.isNTHandle(handle, 0); EXPECT_EQ(1, wddm->counterVerifyNTHandle); EXPECT_EQ(0, wddm->counterVerifySharedHandle); } TEST_F(MockWddmMemoryManagerTest, givenEnabled64kbpagesWhenCreatingGraphicsMemoryForBufferWithoutHostPtrThen64kbAdressIsAllocated) { DebugManagerStateRestore dbgRestore; wddm->init(); DebugManager.flags.Enable64kbpages.set(true); MemoryManagerCreate memoryManager64k(true, false, *executionEnvironment); if (memoryManager64k.isLimitedGPU(0)) { GTEST_SKIP(); } EXPECT_EQ(0U, wddm->createAllocationResult.called); GraphicsAllocation *galloc = memoryManager64k.allocateGraphicsMemoryWithProperties({rootDeviceIndex, MemoryConstants::pageSize64k, AllocationType::BUFFER_HOST_MEMORY, mockDeviceBitfield}); EXPECT_NE(0U, wddm->createAllocationResult.called); EXPECT_NE(nullptr, galloc); EXPECT_EQ(true, galloc->isLocked()); EXPECT_NE(nullptr, galloc->getUnderlyingBuffer()); EXPECT_EQ(0u, (uintptr_t)galloc->getUnderlyingBuffer() % MemoryConstants::pageSize64k); EXPECT_EQ(0u, (uintptr_t)galloc->getGpuAddress() % MemoryConstants::pageSize64k); memoryManager64k.freeGraphicsMemory(galloc); } TEST_F(OsAgnosticMemoryManagerUsingWddmTest, givenEnabled64kbPagesWhenAllocationIsCreatedWithSizeSmallerThan64kbThenGraphicsAllocationsHas64kbAlignedUnderlyingSize) { DebugManagerStateRestore dbgRestore; wddm->init(); DebugManager.flags.Enable64kbpages.set(true); MockWddmMemoryManager memoryManager(true, false, *executionEnvironment); AllocationData allocationData; allocationData.size = 1u; auto graphicsAllocation = memoryManager.allocateGraphicsMemory64kb(allocationData); EXPECT_NE(nullptr, graphicsAllocation); EXPECT_EQ(MemoryConstants::pageSize64k, graphicsAllocation->getUnderlyingBufferSize()); EXPECT_NE(0llu, graphicsAllocation->getGpuAddress()); EXPECT_NE(nullptr, graphicsAllocation->getUnderlyingBuffer()); EXPECT_EQ(1u, graphicsAllocation->getDefaultGmm()->resourceParams.Flags.Info.Cacheable); memoryManager.freeGraphicsMemory(graphicsAllocation); } TEST_F(MockWddmMemoryManagerTest, givenWddmWhenallocateGraphicsMemory64kbThenLockResultAndmapGpuVirtualAddressIsCalled) { DebugManagerStateRestore dbgRestore; DebugManager.flags.Enable64kbpages.set(true); wddm->init(); MockWddmMemoryManager memoryManager64k(*executionEnvironment); uint32_t lockCount = wddm->lockResult.called; uint32_t mapGpuVirtualAddressResult = wddm->mapGpuVirtualAddressResult.called; AllocationData allocationData; allocationData.size = MemoryConstants::pageSize64k; GraphicsAllocation *galloc = memoryManager64k.allocateGraphicsMemory64kb(allocationData); EXPECT_EQ(lockCount + 1, wddm->lockResult.called); EXPECT_EQ(mapGpuVirtualAddressResult + 1, wddm->mapGpuVirtualAddressResult.called); if (is32bit || executionEnvironment->rootDeviceEnvironments[0]->isFullRangeSvm()) { EXPECT_NE(nullptr, wddm->mapGpuVirtualAddressResult.cpuPtrPassed); } else { EXPECT_EQ(nullptr, wddm->mapGpuVirtualAddressResult.cpuPtrPassed); } memoryManager64k.freeGraphicsMemory(galloc); } TEST_F(MockWddmMemoryManagerTest, givenAllocateGraphicsMemoryForBufferAndRequestedSizeIsHugeThenResultAllocationIsSplitted) { DebugManagerStateRestore dbgRestore; wddm->init(); wddm->mapGpuVaStatus = true; VariableBackup restorer{&wddm->callBaseMapGpuVa, false}; for (bool enable64KBpages : {true, false}) { wddm->createAllocationResult.called = 0U; DebugManager.flags.Enable64kbpages.set(enable64KBpages); MemoryManagerCreate memoryManager(true, false, *executionEnvironment); if (memoryManager.isLimitedGPU(0)) { GTEST_SKIP(); } EXPECT_EQ(0, wddm->createAllocationResult.called); memoryManager.hugeGfxMemoryChunkSize = MemoryConstants::pageSize64k - MemoryConstants::pageSize; WddmAllocation *wddmAlloc = static_cast(memoryManager.allocateGraphicsMemoryWithProperties({rootDeviceIndex, MemoryConstants::pageSize64k * 3, AllocationType::BUFFER, mockDeviceBitfield})); EXPECT_NE(nullptr, wddmAlloc); EXPECT_EQ(4, wddmAlloc->getNumGmms()); EXPECT_EQ(4, wddm->createAllocationResult.called); EXPECT_EQ(wddmAlloc->getGpuAddressToModify(), GmmHelper::canonize(wddmAlloc->reservedGpuVirtualAddress)); memoryManager.freeGraphicsMemory(wddmAlloc); } } TEST_F(MockWddmMemoryManagerTest, givenDefaultMemoryManagerWhenItIsCreatedThenCorrectHugeGfxMemoryChunkIsSet) { MockWddmMemoryManager memoryManager(*executionEnvironment); EXPECT_EQ(memoryManager.getHugeGfxMemoryChunkSize(GfxMemoryAllocationMethod::AllocateByKmd), 4 * MemoryConstants::gigaByte - MemoryConstants::pageSize64k); EXPECT_EQ(memoryManager.getHugeGfxMemoryChunkSize(GfxMemoryAllocationMethod::UseUmdSystemPtr), 4 * MemoryConstants::gigaByte - MemoryConstants::pageSize64k); } TEST_F(MockWddmMemoryManagerTest, givenAllocateGraphicsMemoryForHostBufferAndRequestedSizeIsHugeThenResultAllocationIsSplitted) { DebugManagerStateRestore dbgRestore; wddm->init(); wddm->mapGpuVaStatus = true; VariableBackup restorer{&wddm->callBaseMapGpuVa, false}; DebugManager.flags.Enable64kbpages.set(true); MemoryManagerCreate memoryManager(true, false, *executionEnvironment); if (memoryManager.isLimitedGPU(0)) { GTEST_SKIP(); } EXPECT_EQ(0, wddm->createAllocationResult.called); memoryManager.hugeGfxMemoryChunkSize = MemoryConstants::pageSize64k - MemoryConstants::pageSize; std::vector hostPtr(MemoryConstants::pageSize64k * 3); AllocationProperties allocProps{rootDeviceIndex, MemoryConstants::pageSize64k * 3, AllocationType::BUFFER_HOST_MEMORY, mockDeviceBitfield}; allocProps.flags.allocateMemory = false; WddmAllocation *wddmAlloc = static_cast(memoryManager.allocateGraphicsMemoryWithProperties(allocProps, hostPtr.data())); EXPECT_NE(nullptr, wddmAlloc); EXPECT_EQ(4, wddmAlloc->getNumGmms()); EXPECT_EQ(4, wddm->createAllocationResult.called); EXPECT_EQ(wddmAlloc->getGpuAddressToModify(), GmmHelper::canonize(wddmAlloc->reservedGpuVirtualAddress)); memoryManager.freeGraphicsMemory(wddmAlloc); } TEST_F(MockWddmMemoryManagerTest, givenDefaultMemoryManagerWhenItIsCreatedThenAsyncDeleterEnabledIsTrue) { wddm->init(); WddmMemoryManager memoryManager(*executionEnvironment); EXPECT_TRUE(memoryManager.isAsyncDeleterEnabled()); EXPECT_NE(nullptr, memoryManager.getDeferredDeleter()); } TEST_F(WddmMemoryManagerTest, givenWddmMemoryManagerWithNoRegisteredOsContextsWhenCallingIsMemoryBudgetExhaustedThenReturnFalse) { EXPECT_FALSE(memoryManager->isMemoryBudgetExhausted()); } TEST_F(WddmMemoryManagerTest, givenWddmMemoryManagerAnd32bitBuildThenSvmPartitionIsAlwaysInitialized) { if (is32bit) { EXPECT_EQ(memoryManager->getGfxPartition(0)->getHeapLimit(HeapIndex::HEAP_SVM), MemoryConstants::max32BitAddress); } } TEST_F(WddmMemoryManagerTest, givenWddmMemoryManagerWithRegisteredOsContextWhenCallingIsMemoryBudgetExhaustedThenReturnFalse) { executionEnvironment->prepareRootDeviceEnvironments(3u); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(defaultHwInfo.get()); } executionEnvironment->initializeMemoryManager(); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { executionEnvironment->rootDeviceEnvironments[i]->osInterface.reset(); auto wddm = static_cast(Wddm::createWddm(nullptr, *executionEnvironment->rootDeviceEnvironments[i].get())); wddm->init(); executionEnvironment->rootDeviceEnvironments[i]->memoryOperationsInterface = std::make_unique(wddm); } std::unique_ptr csr(createCommandStream(*executionEnvironment, 0u, 1)); std::unique_ptr csr1(createCommandStream(*executionEnvironment, 1u, 2)); std::unique_ptr csr2(createCommandStream(*executionEnvironment, 2u, 3)); memoryManager->createAndRegisterOsContext(csr.get(), EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::Regular}, PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo), 1)); memoryManager->createAndRegisterOsContext(csr1.get(), EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::Regular}, PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo), 2)); memoryManager->createAndRegisterOsContext(csr2.get(), EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::Regular}, PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo), 3)); EXPECT_FALSE(memoryManager->isMemoryBudgetExhausted()); } TEST_F(WddmMemoryManagerTest, givenWddmMemoryManagerWithRegisteredOsContextWithExhaustedMemoryBudgetWhenCallingIsMemoryBudgetExhaustedThenReturnTrue) { executionEnvironment->prepareRootDeviceEnvironments(3u); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(defaultHwInfo.get()); } executionEnvironment->initializeMemoryManager(); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { executionEnvironment->rootDeviceEnvironments[i]->osInterface.reset(); auto wddm = static_cast(Wddm::createWddm(nullptr, *executionEnvironment->rootDeviceEnvironments[i].get())); wddm->init(); executionEnvironment->rootDeviceEnvironments[i]->memoryOperationsInterface = std::make_unique(wddm); } std::unique_ptr csr(createCommandStream(*executionEnvironment, 0u, 1)); std::unique_ptr csr1(createCommandStream(*executionEnvironment, 1u, 2)); std::unique_ptr csr2(createCommandStream(*executionEnvironment, 2u, 3)); memoryManager->createAndRegisterOsContext(csr.get(), EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::Regular}, PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo), 1)); memoryManager->createAndRegisterOsContext(csr1.get(), EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::Regular}, PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo), 2)); memoryManager->createAndRegisterOsContext(csr2.get(), EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::Regular}, PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo), 3)); auto osContext = static_cast(memoryManager->getRegisteredEngines()[1].osContext); osContext->getResidencyController().setMemoryBudgetExhausted(); EXPECT_TRUE(memoryManager->isMemoryBudgetExhausted()); } TEST_F(MockWddmMemoryManagerTest, givenEnabledAsyncDeleterFlagWhenMemoryManagerIsCreatedThenAsyncDeleterEnabledIsTrueAndDeleterIsNotNullptr) { wddm->init(); bool defaultEnableDeferredDeleterFlag = DebugManager.flags.EnableDeferredDeleter.get(); DebugManager.flags.EnableDeferredDeleter.set(true); WddmMemoryManager memoryManager(*executionEnvironment); EXPECT_TRUE(memoryManager.isAsyncDeleterEnabled()); EXPECT_NE(nullptr, memoryManager.getDeferredDeleter()); DebugManager.flags.EnableDeferredDeleter.set(defaultEnableDeferredDeleterFlag); } TEST_F(MockWddmMemoryManagerTest, givenDisabledAsyncDeleterFlagWhenMemoryManagerIsCreatedThenAsyncDeleterEnabledIsFalseAndDeleterIsNullptr) { wddm->init(); bool defaultEnableDeferredDeleterFlag = DebugManager.flags.EnableDeferredDeleter.get(); DebugManager.flags.EnableDeferredDeleter.set(false); WddmMemoryManager memoryManager(*executionEnvironment); EXPECT_FALSE(memoryManager.isAsyncDeleterEnabled()); EXPECT_EQ(nullptr, memoryManager.getDeferredDeleter()); DebugManager.flags.EnableDeferredDeleter.set(defaultEnableDeferredDeleterFlag); } TEST_F(MockWddmMemoryManagerTest, givenPageTableManagerWhenMapAuxGpuVaCalledThenUseWddmToMap) { if (!HwInfoConfig::get(defaultHwInfo->platform.eProductFamily)->isPageTableManagerSupported(*defaultHwInfo)) { GTEST_SKIP(); } wddm->init(); WddmMemoryManager memoryManager(*executionEnvironment); auto csr = std::unique_ptr(createCommandStream(*executionEnvironment, 1u, 1)); auto hwInfo = *defaultHwInfo; EngineInstancesContainer regularEngines = { {aub_stream::ENGINE_CCS, EngineUsage::Regular}}; memoryManager.createAndRegisterOsContext(csr.get(), EngineDescriptorHelper::getDefaultDescriptor(regularEngines[0], PreemptionHelper::getDefaultPreemptionMode(hwInfo))); auto mockMngr = new MockGmmPageTableMngr(); for (auto engine : memoryManager.getRegisteredEngines()) { engine.commandStreamReceiver->pageTableManager.reset(mockMngr); } auto allocation = memoryManager.allocateGraphicsMemoryWithProperties(AllocationProperties(1, MemoryConstants::pageSize, AllocationType::INTERNAL_HOST_MEMORY, mockDeviceBitfield)); GMM_DDI_UPDATEAUXTABLE expectedDdiUpdateAuxTable = {}; expectedDdiUpdateAuxTable.BaseGpuVA = allocation->getGpuAddress(); expectedDdiUpdateAuxTable.BaseResInfo = allocation->getDefaultGmm()->gmmResourceInfo->peekGmmResourceInfo(); expectedDdiUpdateAuxTable.DoNotWait = true; expectedDdiUpdateAuxTable.Map = true; auto expectedCallCount = static_cast(regularEngines.size()); auto result = memoryManager.mapAuxGpuVA(allocation); EXPECT_TRUE(result); EXPECT_TRUE(memcmp(&expectedDdiUpdateAuxTable, &mockMngr->updateAuxTableParamsPassed[0].ddiUpdateAuxTable, sizeof(GMM_DDI_UPDATEAUXTABLE)) == 0); memoryManager.freeGraphicsMemory(allocation); EXPECT_EQ(expectedCallCount, mockMngr->updateAuxTableCalled); } TEST_F(MockWddmMemoryManagerTest, givenCompressedAllocationWhenMappedGpuVaAndPageTableNotSupportedThenMapAuxVa) { if (HwInfoConfig::get(defaultHwInfo->platform.eProductFamily)->isPageTableManagerSupported(*defaultHwInfo)) { GTEST_SKIP(); } auto rootDeviceEnvironment = executionEnvironment->rootDeviceEnvironments[1].get(); std::unique_ptr gmm(new Gmm(rootDeviceEnvironment->getGmmClientContext(), reinterpret_cast(123), 4096u, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); gmm->isCompressionEnabled = true; D3DGPU_VIRTUAL_ADDRESS gpuVa = 0; WddmMock wddm(*executionEnvironment->rootDeviceEnvironments[1].get()); wddm.init(); auto csr = std::unique_ptr(createCommandStream(*executionEnvironment, 1u, 1)); auto hwInfo = *defaultHwInfo; EngineInstancesContainer regularEngines = { {aub_stream::ENGINE_CCS, EngineUsage::Regular}}; executionEnvironment->memoryManager->createAndRegisterOsContext(csr.get(), EngineDescriptorHelper::getDefaultDescriptor(regularEngines[0], PreemptionHelper::getDefaultPreemptionMode(hwInfo))); auto mockMngr = new MockGmmPageTableMngr(); for (auto engine : executionEnvironment->memoryManager.get()->getRegisteredEngines()) { engine.commandStreamReceiver->pageTableManager.reset(mockMngr); } auto hwInfoMock = hardwareInfoTable[wddm.getGfxPlatform()->eProductFamily]; ASSERT_NE(nullptr, hwInfoMock); auto result = wddm.mapGpuVirtualAddress(gmm.get(), ALLOCATION_HANDLE, wddm.getGfxPartition().Standard.Base, wddm.getGfxPartition().Standard.Limit, 0u, gpuVa); ASSERT_TRUE(result); EXPECT_EQ(GmmHelper::canonize(wddm.getGfxPartition().Standard.Base), gpuVa); EXPECT_EQ(0u, mockMngr->updateAuxTableCalled); } TEST_F(MockWddmMemoryManagerTest, givenCompressedAllocationWhenMappedGpuVaAndPageTableSupportedThenMapAuxVa) { if (!HwInfoConfig::get(defaultHwInfo->platform.eProductFamily)->isPageTableManagerSupported(*defaultHwInfo)) { GTEST_SKIP(); } auto rootDeviceEnvironment = executionEnvironment->rootDeviceEnvironments[1].get(); std::unique_ptr gmm(new Gmm(rootDeviceEnvironment->getGmmClientContext(), reinterpret_cast(123), 4096u, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); gmm->isCompressionEnabled = true; D3DGPU_VIRTUAL_ADDRESS gpuVa = 0; WddmMock wddm(*executionEnvironment->rootDeviceEnvironments[1].get()); wddm.init(); auto csr = std::unique_ptr(createCommandStream(*executionEnvironment, 1u, 1)); auto hwInfo = *defaultHwInfo; EngineInstancesContainer regularEngines = { {aub_stream::ENGINE_CCS, EngineUsage::Regular}}; executionEnvironment->memoryManager->createAndRegisterOsContext(csr.get(), EngineDescriptorHelper::getDefaultDescriptor(regularEngines[0], PreemptionHelper::getDefaultPreemptionMode(hwInfo))); auto mockMngr = new MockGmmPageTableMngr(); for (auto engine : executionEnvironment->memoryManager.get()->getRegisteredEngines()) { engine.commandStreamReceiver->pageTableManager.reset(mockMngr); } GMM_DDI_UPDATEAUXTABLE expectedDdiUpdateAuxTable = {}; expectedDdiUpdateAuxTable.BaseGpuVA = GmmHelper::canonize(wddm.getGfxPartition().Standard.Base); expectedDdiUpdateAuxTable.BaseResInfo = gmm->gmmResourceInfo->peekGmmResourceInfo(); expectedDdiUpdateAuxTable.DoNotWait = true; expectedDdiUpdateAuxTable.Map = true; auto expectedCallCount = executionEnvironment->memoryManager.get()->getRegisteredEnginesCount(); auto hwInfoMock = hardwareInfoTable[wddm.getGfxPlatform()->eProductFamily]; ASSERT_NE(nullptr, hwInfoMock); auto result = wddm.mapGpuVirtualAddress(gmm.get(), ALLOCATION_HANDLE, wddm.getGfxPartition().Standard.Base, wddm.getGfxPartition().Standard.Limit, 0u, gpuVa); ASSERT_TRUE(result); EXPECT_EQ(GmmHelper::canonize(wddm.getGfxPartition().Standard.Base), gpuVa); EXPECT_TRUE(memcmp(&expectedDdiUpdateAuxTable, &mockMngr->updateAuxTableParamsPassed[0].ddiUpdateAuxTable, sizeof(GMM_DDI_UPDATEAUXTABLE)) == 0); EXPECT_EQ(expectedCallCount, mockMngr->updateAuxTableCalled); } TEST_F(MockWddmMemoryManagerTest, givenCompressedAllocationAndPageTableSupportedWhenReleaseingThenUnmapAuxVa) { if (!HwInfoConfig::get(defaultHwInfo->platform.eProductFamily)->isPageTableManagerSupported(*defaultHwInfo)) { GTEST_SKIP(); } wddm->init(); WddmMemoryManager memoryManager(*executionEnvironment); D3DGPU_VIRTUAL_ADDRESS gpuVa = 123; auto csr = std::unique_ptr(createCommandStream(*executionEnvironment, 1u, 1)); auto hwInfo = *defaultHwInfo; EngineInstancesContainer regularEngines = { {aub_stream::ENGINE_CCS, EngineUsage::Regular}}; memoryManager.createAndRegisterOsContext(csr.get(), EngineDescriptorHelper::getDefaultDescriptor(regularEngines[0], PreemptionHelper::getDefaultPreemptionMode(hwInfo))); auto mockMngr = new MockGmmPageTableMngr(); for (auto engine : memoryManager.getRegisteredEngines()) { engine.commandStreamReceiver->pageTableManager.reset(mockMngr); } auto wddmAlloc = static_cast(memoryManager.allocateGraphicsMemoryWithProperties(AllocationProperties(1, MemoryConstants::pageSize, AllocationType::INTERNAL_HOST_MEMORY, mockDeviceBitfield))); wddmAlloc->setGpuAddress(gpuVa); wddmAlloc->getDefaultGmm()->isCompressionEnabled = true; GMM_DDI_UPDATEAUXTABLE expectedDdiUpdateAuxTable = {}; expectedDdiUpdateAuxTable.BaseGpuVA = gpuVa; expectedDdiUpdateAuxTable.BaseResInfo = wddmAlloc->getDefaultGmm()->gmmResourceInfo->peekGmmResourceInfo(); expectedDdiUpdateAuxTable.DoNotWait = true; expectedDdiUpdateAuxTable.Map = false; auto expectedCallCount = memoryManager.getRegisteredEnginesCount(); memoryManager.freeGraphicsMemory(wddmAlloc); EXPECT_TRUE(memcmp(&expectedDdiUpdateAuxTable, &mockMngr->updateAuxTableParamsPassed[0].ddiUpdateAuxTable, sizeof(GMM_DDI_UPDATEAUXTABLE)) == 0); EXPECT_EQ(expectedCallCount, mockMngr->updateAuxTableCalled); } TEST_F(MockWddmMemoryManagerTest, givenNonCompressedAllocationWhenReleaseingThenDontUnmapAuxVa) { wddm->init(); WddmMemoryManager memoryManager(*executionEnvironment); auto csr = std::unique_ptr(createCommandStream(*executionEnvironment, 1u, 1)); auto hwInfo = *defaultHwInfo; EngineInstancesContainer regularEngines = { {aub_stream::ENGINE_CCS, EngineUsage::Regular}}; memoryManager.createAndRegisterOsContext(csr.get(), EngineDescriptorHelper::getDefaultDescriptor(regularEngines[0], PreemptionHelper::getDefaultPreemptionMode(hwInfo))); auto mockMngr = new MockGmmPageTableMngr(); for (auto engine : memoryManager.getRegisteredEngines()) { engine.commandStreamReceiver->pageTableManager.reset(mockMngr); } auto wddmAlloc = static_cast(memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, MemoryConstants::pageSize})); wddmAlloc->getDefaultGmm()->isCompressionEnabled = false; memoryManager.freeGraphicsMemory(wddmAlloc); EXPECT_EQ(0u, mockMngr->updateAuxTableCalled); } TEST_F(MockWddmMemoryManagerTest, givenNonCompressedAllocationWhenMappedGpuVaThenDontMapAuxVa) { auto rootDeviceEnvironment = executionEnvironment->rootDeviceEnvironments[1].get(); std::unique_ptr gmm(new Gmm(rootDeviceEnvironment->getGmmClientContext(), reinterpret_cast(123), 4096u, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); gmm->isCompressionEnabled = false; D3DGPU_VIRTUAL_ADDRESS gpuVa = 0; WddmMock wddm(*rootDeviceEnvironment); wddm.init(); auto csr = std::unique_ptr(createCommandStream(*executionEnvironment, 1u, 1)); auto hwInfo = *defaultHwInfo; EngineInstancesContainer regularEngines = { {aub_stream::ENGINE_CCS, EngineUsage::Regular}}; executionEnvironment->memoryManager->createAndRegisterOsContext(csr.get(), EngineDescriptorHelper::getDefaultDescriptor(regularEngines[0], PreemptionHelper::getDefaultPreemptionMode(hwInfo))); auto mockMngr = new MockGmmPageTableMngr(); for (auto engine : executionEnvironment->memoryManager.get()->getRegisteredEngines()) { engine.commandStreamReceiver->pageTableManager.reset(mockMngr); } auto result = wddm.mapGpuVirtualAddress(gmm.get(), ALLOCATION_HANDLE, wddm.getGfxPartition().Standard.Base, wddm.getGfxPartition().Standard.Limit, 0u, gpuVa); ASSERT_TRUE(result); EXPECT_EQ(0u, mockMngr->updateAuxTableCalled); } TEST_F(MockWddmMemoryManagerTest, givenFailingAllocationWhenMappedGpuVaThenReturnFalse) { auto rootDeviceEnvironment = executionEnvironment->rootDeviceEnvironments[1].get(); std::unique_ptr gmm(new Gmm(rootDeviceEnvironment->getGmmClientContext(), reinterpret_cast(123), 4096u, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); gmm->isCompressionEnabled = false; D3DGPU_VIRTUAL_ADDRESS gpuVa = 0; WddmMock wddm(*rootDeviceEnvironment); wddm.init(); auto result = wddm.mapGpuVirtualAddress(gmm.get(), 0, 0, 0, 0, gpuVa); ASSERT_FALSE(result); } TEST_F(MockWddmMemoryManagerTest, givenCompressedFlagSetWhenInternalIsUnsetThenDontUpdateAuxTable) { D3DGPU_VIRTUAL_ADDRESS gpuVa = 0; wddm->init(); WddmMemoryManager memoryManager(*executionEnvironment); auto csr = std::unique_ptr(createCommandStream(*executionEnvironment, 1u, 1)); auto hwInfo = *defaultHwInfo; EngineInstancesContainer regularEngines = { {aub_stream::ENGINE_CCS, EngineUsage::Regular}}; memoryManager.createAndRegisterOsContext(csr.get(), EngineDescriptorHelper::getDefaultDescriptor(regularEngines[0], PreemptionHelper::getDefaultPreemptionMode(hwInfo))); auto mockMngr = new MockGmmPageTableMngr(); auto rootDeviceEnvironment = executionEnvironment->rootDeviceEnvironments[1].get(); for (auto engine : memoryManager.getRegisteredEngines()) { engine.commandStreamReceiver->pageTableManager.reset(mockMngr); } auto myGmm = new Gmm(rootDeviceEnvironment->getGmmClientContext(), reinterpret_cast(123), 4096u, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true); myGmm->isCompressionEnabled = false; myGmm->gmmResourceInfo->getResourceFlags()->Info.RenderCompressed = 1; auto wddmAlloc = static_cast(memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, MemoryConstants::pageSize})); delete wddmAlloc->getDefaultGmm(); wddmAlloc->setDefaultGmm(myGmm); auto result = wddm->mapGpuVirtualAddress(myGmm, ALLOCATION_HANDLE, wddm->getGfxPartition().Standard.Base, wddm->getGfxPartition().Standard.Limit, 0u, gpuVa); EXPECT_TRUE(result); memoryManager.freeGraphicsMemory(wddmAlloc); EXPECT_EQ(0u, mockMngr->updateAuxTableCalled); } TEST_F(MockWddmMemoryManagerTest, givenCompressedFlagSetWhenInternalIsSetThenUpdateAuxTable) { if (!HwInfoConfig::get(defaultHwInfo->platform.eProductFamily)->isPageTableManagerSupported(*defaultHwInfo)) { GTEST_SKIP(); } D3DGPU_VIRTUAL_ADDRESS gpuVa = 0; wddm->init(); WddmMemoryManager memoryManager(*executionEnvironment); auto csr = std::unique_ptr(createCommandStream(*executionEnvironment, 1u, 1)); auto hwInfo = *defaultHwInfo; EngineInstancesContainer regularEngines = { {aub_stream::ENGINE_CCS, EngineUsage::Regular}}; memoryManager.createAndRegisterOsContext(csr.get(), EngineDescriptorHelper::getDefaultDescriptor(regularEngines[0], PreemptionHelper::getDefaultPreemptionMode(hwInfo))); auto mockMngr = new MockGmmPageTableMngr(); auto rootDeviceEnvironment = executionEnvironment->rootDeviceEnvironments[1].get(); rootDeviceEnvironment->executionEnvironment.initializeMemoryManager(); for (auto engine : memoryManager.getRegisteredEngines()) { engine.commandStreamReceiver->pageTableManager.reset(mockMngr); } auto myGmm = new Gmm(rootDeviceEnvironment->getGmmClientContext(), reinterpret_cast(123), 4096u, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true); myGmm->isCompressionEnabled = true; myGmm->gmmResourceInfo->getResourceFlags()->Info.RenderCompressed = 1; auto wddmAlloc = static_cast(memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, MemoryConstants::pageSize})); delete wddmAlloc->getDefaultGmm(); wddmAlloc->setDefaultGmm(myGmm); auto expectedCallCount = memoryManager.getRegisteredEnginesCount(); auto result = wddm->mapGpuVirtualAddress(myGmm, ALLOCATION_HANDLE, wddm->getGfxPartition().Standard.Base, wddm->getGfxPartition().Standard.Limit, 0u, gpuVa); EXPECT_TRUE(result); memoryManager.freeGraphicsMemory(wddmAlloc); EXPECT_EQ(expectedCallCount, mockMngr->updateAuxTableCalled); } TEST_F(WddmMemoryManagerTest2, givenReadOnlyMemoryWhenCreateAllocationFailsThenPopulateOsHandlesReturnsInvalidPointer) { OsHandleStorage handleStorage; handleStorage.fragmentCount = 1; handleStorage.fragmentStorageData[0].cpuPtr = reinterpret_cast(0x1000); handleStorage.fragmentStorageData[0].fragmentSize = 0x1000; handleStorage.fragmentStorageData[0].freeTheFragment = false; wddm->callBaseCreateAllocationsAndMapGpuVa = false; wddm->createAllocationsAndMapGpuVaStatus = STATUS_GRAPHICS_NO_VIDEO_MEMORY; auto result = memoryManager->populateOsHandles(handleStorage, 0); EXPECT_EQ(MemoryManager::AllocationStatus::InvalidHostPointer, result); handleStorage.fragmentStorageData[0].freeTheFragment = true; memoryManager->cleanOsHandles(handleStorage, 0); } TEST_F(WddmMemoryManagerTest2, givenReadOnlyMemoryPassedToPopulateOsHandlesWhenCreateAllocationFailsThenAllocatedFragmentsAreNotStored) { OsHandleStorage handleStorage; OsHandleWin handle; handleStorage.fragmentCount = 2; handleStorage.fragmentStorageData[0].osHandleStorage = &handle; handleStorage.fragmentStorageData[0].cpuPtr = reinterpret_cast(0x1000); handleStorage.fragmentStorageData[0].fragmentSize = 0x1000; handleStorage.fragmentStorageData[1].cpuPtr = reinterpret_cast(0x2000); handleStorage.fragmentStorageData[1].fragmentSize = 0x6000; wddm->callBaseCreateAllocationsAndMapGpuVa = false; wddm->createAllocationsAndMapGpuVaStatus = STATUS_GRAPHICS_NO_VIDEO_MEMORY; auto result = memoryManager->populateOsHandles(handleStorage, mockRootDeviceIndex); auto hostPtrManager = static_cast(memoryManager->getHostPtrManager()); EXPECT_EQ(MemoryManager::AllocationStatus::InvalidHostPointer, result); auto numberOfStoredFragments = hostPtrManager->getFragmentCount(); EXPECT_EQ(0u, numberOfStoredFragments); EXPECT_EQ(nullptr, hostPtrManager->getFragment({handleStorage.fragmentStorageData[1].cpuPtr, mockRootDeviceIndex})); handleStorage.fragmentStorageData[1].freeTheFragment = true; memoryManager->cleanOsHandles(handleStorage, mockRootDeviceIndex); } TEST(WddmMemoryManagerCleanupTest, givenUsedTagAllocationInWddmMemoryManagerWhenCleanupMemoryManagerThenDontAccessCsr) { ExecutionEnvironment &executionEnvironment = *platform()->peekExecutionEnvironment(); auto csr = std::unique_ptr(createCommandStream(executionEnvironment, 0, 1)); auto wddm = new WddmMock(*executionEnvironment.rootDeviceEnvironments[0].get()); auto preemptionMode = PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo); wddm->init(); executionEnvironment.rootDeviceEnvironments[0]->memoryOperationsInterface = std::make_unique(wddm); executionEnvironment.memoryManager = std::make_unique(executionEnvironment); auto osContext = executionEnvironment.memoryManager->createAndRegisterOsContext(csr.get(), EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::Regular}, preemptionMode)); csr->setupContext(*osContext); auto tagAllocator = csr->getEventPerfCountAllocator(100); auto allocation = tagAllocator->getTag()->getBaseGraphicsAllocation(); allocation->getDefaultGraphicsAllocation()->updateTaskCount(1, csr->getOsContext().getContextId()); csr.reset(); EXPECT_NO_THROW(executionEnvironment.memoryManager.reset()); } TEST_F(WddmMemoryManagerSimpleTest, whenDestroyingLockedAllocationThatDoesntNeedMakeResidentBeforeLockThenDontEvictAllocationFromWddmTemporaryResources) { auto allocation = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize})); memoryManager->lockResource(allocation); EXPECT_FALSE(allocation->needsMakeResidentBeforeLock); memoryManager->freeGraphicsMemory(allocation); EXPECT_EQ(0u, mockTemporaryResources->evictResourceResult.called); } TEST_F(WddmMemoryManagerSimpleTest, whenDestroyingNotLockedAllocationThatDoesntNeedMakeResidentBeforeLockThenDontEvictAllocationFromWddmTemporaryResources) { auto allocation = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize})); EXPECT_FALSE(allocation->isLocked()); EXPECT_FALSE(allocation->needsMakeResidentBeforeLock); memoryManager->freeGraphicsMemory(allocation); EXPECT_EQ(0u, mockTemporaryResources->evictResourceResult.called); } TEST_F(WddmMemoryManagerSimpleTest, whenDestroyingLockedAllocationThatNeedsMakeResidentBeforeLockThenRemoveTemporaryResource) { auto allocation = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize})); allocation->needsMakeResidentBeforeLock = true; memoryManager->lockResource(allocation); memoryManager->freeGraphicsMemory(allocation); EXPECT_EQ(1u, mockTemporaryResources->removeResourceResult.called); } TEST_F(WddmMemoryManagerSimpleTest, whenDestroyingNotLockedAllocationThatNeedsMakeResidentBeforeLockThenDontEvictAllocationFromWddmTemporaryResources) { auto allocation = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize})); allocation->needsMakeResidentBeforeLock = true; EXPECT_FALSE(allocation->isLocked()); memoryManager->freeGraphicsMemory(allocation); EXPECT_EQ(0u, mockTemporaryResources->evictResourceResult.called); } TEST_F(WddmMemoryManagerSimpleTest, whenDestroyingAllocationWithReservedGpuVirtualAddressThenReleaseTheAddress) { auto allocation = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize})); uint64_t gpuAddress = 0x123; uint64_t sizeForFree = 0x1234; allocation->reservedGpuVirtualAddress = gpuAddress; allocation->reservedSizeForGpuVirtualAddress = sizeForFree; memoryManager->freeGraphicsMemory(allocation); EXPECT_EQ(1u, wddm->freeGpuVirtualAddressResult.called); EXPECT_EQ(gpuAddress, wddm->freeGpuVirtualAddressResult.uint64ParamPassed); EXPECT_EQ(sizeForFree, wddm->freeGpuVirtualAddressResult.sizePassed); } TEST_F(WddmMemoryManagerSimpleTest, givenAllocationWithReservedGpuVirtualAddressWhenMapCallFailsDuringCreateWddmAllocationThenReleasePreferredAddress) { MockWddmAllocation allocation(rootDeviceEnvironment->getGmmClientContext(), 1); allocation.setAllocationType(AllocationType::KERNEL_ISA); uint64_t gpuAddress = 0x123; uint64_t sizeForFree = 0x1234; allocation.reservedGpuVirtualAddress = gpuAddress; allocation.reservedSizeForGpuVirtualAddress = sizeForFree; wddm->callBaseMapGpuVa = false; wddm->mapGpuVaStatus = false; memoryManager->createWddmAllocation(&allocation, nullptr); EXPECT_EQ(1u, wddm->freeGpuVirtualAddressResult.called); EXPECT_EQ(gpuAddress, wddm->freeGpuVirtualAddressResult.uint64ParamPassed); EXPECT_EQ(sizeForFree, wddm->freeGpuVirtualAddressResult.sizePassed); } TEST_F(WddmMemoryManagerSimpleTest, givenMultiHandleAllocationAndPreferredGpuVaIsSpecifiedWhenCreateAllocationIsCalledThenAllocationHasProperGpuAddressAndHeapSvmIsUsed) { if (memoryManager->isLimitedRange(0)) { GTEST_SKIP(); } uint32_t numGmms = 10; MockWddmAllocation allocation(rootDeviceEnvironment->getGmmClientContext(), numGmms); allocation.setAllocationType(AllocationType::BUFFER); allocation.storageInfo.multiStorage = true; wddm->callBaseMapGpuVa = true; uint64_t gpuPreferredVa = 0x20000ull; memoryManager->createWddmAllocation(&allocation, reinterpret_cast(gpuPreferredVa)); EXPECT_EQ(gpuPreferredVa, allocation.getGpuAddress()); EXPECT_EQ(numGmms, wddm->mapGpuVirtualAddressResult.called); auto gmmSize = allocation.getDefaultGmm()->gmmResourceInfo->getSizeAllocation(); auto lastRequiredAddress = (numGmms - 1) * gmmSize + gpuPreferredVa; EXPECT_EQ(lastRequiredAddress, wddm->mapGpuVirtualAddressResult.uint64ParamPassed); EXPECT_GT(lastRequiredAddress, memoryManager->getGfxPartition(0)->getHeapMinimalAddress(HeapIndex::HEAP_SVM)); EXPECT_LT(lastRequiredAddress, memoryManager->getGfxPartition(0)->getHeapLimit(HeapIndex::HEAP_SVM)); } TEST_F(WddmMemoryManagerSimpleTest, givenSvmCpuAllocationWhenSizeAndAlignmentProvidedThenAllocateMemoryReserveGpuVa) { if (memoryManager->isLimitedGPU(0)) { GTEST_SKIP(); } size_t size = 2 * MemoryConstants::megaByte; MockAllocationProperties properties{csr->getRootDeviceIndex(), true, size, AllocationType::SVM_CPU, mockDeviceBitfield}; properties.alignment = size; auto allocation = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(properties)); ASSERT_NE(nullptr, allocation); EXPECT_EQ(size, allocation->getUnderlyingBufferSize()); EXPECT_NE(nullptr, allocation->getUnderlyingBuffer()); EXPECT_EQ(allocation->getUnderlyingBuffer(), allocation->getDriverAllocatedCpuPtr()); // limited platforms will not use heap HeapIndex::HEAP_SVM if (executionEnvironment->rootDeviceEnvironments[allocation->getRootDeviceIndex()]->isFullRangeSvm()) { EXPECT_EQ(alignUp(allocation->getReservedAddressPtr(), size), reinterpret_cast(allocation->getGpuAddress())); } EXPECT_EQ((2 * size), allocation->getReservedAddressSize()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmMemoryManagerSimpleTest, givenWriteCombinedAllocationThenCpuAddressIsEqualToGpuAddress) { if (is32bit) { GTEST_SKIP(); } memoryManager.reset(new MockWddmMemoryManager(true, true, *executionEnvironment)); size_t size = 2 * MemoryConstants::megaByte; auto allocation = static_cast(memoryManager->allocateGraphicsMemoryWithProperties({0, size, AllocationType::WRITE_COMBINED, mockDeviceBitfield})); ASSERT_NE(nullptr, allocation); EXPECT_EQ(size, allocation->getUnderlyingBufferSize()); EXPECT_NE(nullptr, allocation->getUnderlyingBuffer()); EXPECT_NE(nullptr, reinterpret_cast(allocation->getGpuAddress())); if (executionEnvironment->rootDeviceEnvironments[allocation->getRootDeviceIndex()]->isFullRangeSvm()) { EXPECT_EQ(allocation->getUnderlyingBuffer(), reinterpret_cast(allocation->getGpuAddress())); } memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmMemoryManagerSimpleTest, givenDebugVariableWhenCreatingWddmMemoryManagerThenSetSupportForMultiStorageResources) { DebugManagerStateRestore restore; EXPECT_TRUE(memoryManager->supportsMultiStorageResources); { DebugManager.flags.EnableMultiStorageResources.set(0); MockWddmMemoryManager memoryManager(true, true, *executionEnvironment); EXPECT_FALSE(memoryManager.supportsMultiStorageResources); } { DebugManager.flags.EnableMultiStorageResources.set(1); MockWddmMemoryManager memoryManager(true, true, *executionEnvironment); EXPECT_TRUE(memoryManager.supportsMultiStorageResources); } } TEST_F(WddmMemoryManagerSimpleTest, givenBufferHostMemoryAllocationAndLimitedRangeAnd32BitThenAllocationGoesToExternalHeap) { if (executionEnvironment->rootDeviceEnvironments[0]->isFullRangeSvm() || !is32bit) { GTEST_SKIP(); } memoryManager.reset(new MockWddmMemoryManager(true, true, *executionEnvironment)); size_t size = 2 * MemoryConstants::megaByte; auto allocation = static_cast(memoryManager->allocateGraphicsMemoryWithProperties({0, size, AllocationType::BUFFER_HOST_MEMORY, mockDeviceBitfield})); ASSERT_NE(nullptr, allocation); EXPECT_EQ(size, allocation->getUnderlyingBufferSize()); EXPECT_NE(nullptr, allocation->getUnderlyingBuffer()); uint64_t gpuAddress = allocation->getGpuAddress(); EXPECT_NE(0ULL, gpuAddress); EXPECT_EQ(0ULL, gpuAddress & 0xffFFffF000000000); EXPECT_LT(GmmHelper::canonize(memoryManager->getGfxPartition(0)->getHeapBase(HeapIndex::HEAP_EXTERNAL)), gpuAddress); EXPECT_GT(GmmHelper::canonize(memoryManager->getGfxPartition(0)->getHeapLimit(HeapIndex::HEAP_EXTERNAL)), gpuAddress); memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmMemoryManagerSimpleTest, givenDebugModuleAreaTypeWhenCreatingAllocationThen32BitAllocationWithFrontWindowGpuVaIsReturned) { const auto size = MemoryConstants::pageSize64k; NEO::AllocationProperties properties{0, true, size, NEO::AllocationType::DEBUG_MODULE_AREA, false, mockDeviceBitfield}; auto moduleDebugArea = memoryManager->allocateGraphicsMemoryWithProperties(properties); EXPECT_NE(nullptr, moduleDebugArea); EXPECT_NE(nullptr, moduleDebugArea->getUnderlyingBuffer()); EXPECT_GE(moduleDebugArea->getUnderlyingBufferSize(), size); auto address64bit = moduleDebugArea->getGpuAddressToPatch(); EXPECT_LT(address64bit, MemoryConstants::max32BitAddress); EXPECT_TRUE(moduleDebugArea->is32BitAllocation()); auto frontWindowBase = GmmHelper::canonize(memoryManager->getGfxPartition(moduleDebugArea->getRootDeviceIndex())->getHeapBase(memoryManager->selectInternalHeap(moduleDebugArea->isAllocatedInLocalMemoryPool()))); EXPECT_EQ(frontWindowBase, moduleDebugArea->getGpuBaseAddress()); EXPECT_EQ(frontWindowBase, moduleDebugArea->getGpuAddress()); memoryManager->freeGraphicsMemory(moduleDebugArea); } TEST(WddmMemoryManager, givenMultipleRootDeviceWhenMemoryManagerGetsWddmThenWddmIsFromCorrectRootDevice) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleRootDevices.set(4); VariableBackup backup{&ultHwConfig}; ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false; auto executionEnvironment = platform()->peekExecutionEnvironment(); prepareDeviceEnvironments(*executionEnvironment); MockWddmMemoryManager wddmMemoryManager(*executionEnvironment); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { auto wddmFromRootDevice = executionEnvironment->rootDeviceEnvironments[i]->osInterface->getDriverModel()->as(); EXPECT_EQ(wddmFromRootDevice, &wddmMemoryManager.getWddm(i)); } } TEST(WddmMemoryManager, givenMultipleRootDeviceWhenCreateMemoryManagerThenTakeMaxMallocRestrictionAvailable) { uint32_t numRootDevices = 4u; DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleRootDevices.set(numRootDevices); VariableBackup backup{&ultHwConfig}; ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false; auto executionEnvironment = platform()->peekExecutionEnvironment(); prepareDeviceEnvironments(*executionEnvironment); for (auto i = 0u; i < numRootDevices; i++) { auto wddm = static_cast(executionEnvironment->rootDeviceEnvironments[i]->osInterface->getDriverModel()->as()); wddm->minAddress = i * (numRootDevices - i); } MockWddmMemoryManager wddmMemoryManager(*executionEnvironment); EXPECT_EQ(4u, wddmMemoryManager.getAlignedMallocRestrictions()->minAddress); } TEST(WddmMemoryManager, givenNoLocalMemoryOnAnyDeviceWhenIsCpuCopyRequiredIsCalledThenFalseIsReturned) { DebugManagerStateRestore restorer; DebugManager.flags.EnableLocalMemory.set(false); VariableBackup backup{&ultHwConfig}; ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false; auto executionEnvironment = platform()->peekExecutionEnvironment(); prepareDeviceEnvironments(*executionEnvironment); MockWddmMemoryManager wddmMemoryManager(*executionEnvironment); EXPECT_FALSE(wddmMemoryManager.isCpuCopyRequired(&restorer)); } TEST(WddmMemoryManager, givenLocalPointerPassedToIsCpuCopyRequiredThenFalseIsReturned) { auto executionEnvironment = platform()->peekExecutionEnvironment(); VariableBackup backup{&ultHwConfig}; ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false; prepareDeviceEnvironments(*executionEnvironment); MockWddmMemoryManager wddmMemoryManager(*executionEnvironment); EXPECT_FALSE(wddmMemoryManager.isCpuCopyRequired(&backup)); // call multiple times to make sure that result is constant EXPECT_FALSE(wddmMemoryManager.isCpuCopyRequired(&backup)); EXPECT_FALSE(wddmMemoryManager.isCpuCopyRequired(&backup)); EXPECT_FALSE(wddmMemoryManager.isCpuCopyRequired(&backup)); EXPECT_FALSE(wddmMemoryManager.isCpuCopyRequired(&backup)); } TEST_F(WddmMemoryManagerSimpleTest, whenWddmMemoryManagerIsCreatedThenAlignmentSelectorHasExpectedAlignments) { std::vector expectedAlignments = { {MemoryConstants::pageSize2Mb, false, 0.1f, HeapIndex::TOTAL_HEAPS}, {MemoryConstants::pageSize64k, true, AlignmentSelector::anyWastage, HeapIndex::TOTAL_HEAPS}, }; MockWddmMemoryManager memoryManager(true, true, *executionEnvironment); EXPECT_EQ(expectedAlignments, memoryManager.alignmentSelector.peekCandidateAlignments()); } TEST_F(WddmMemoryManagerSimpleTest, given2MbPagesDisabledWhenWddmMemoryManagerIsCreatedThenAlignmentSelectorHasExpectedAlignments) { DebugManagerStateRestore restore{}; DebugManager.flags.AlignLocalMemoryVaTo2MB.set(0); std::vector expectedAlignments = { {MemoryConstants::pageSize64k, true, AlignmentSelector::anyWastage, HeapIndex::TOTAL_HEAPS}, }; MockWddmMemoryManager memoryManager(true, true, *executionEnvironment); EXPECT_EQ(expectedAlignments, memoryManager.alignmentSelector.peekCandidateAlignments()); } TEST_F(WddmMemoryManagerSimpleTest, givenCustomAlignmentWhenWddmMemoryManagerIsCreatedThenAlignmentSelectorHasExpectedAlignments) { DebugManagerStateRestore restore{}; { DebugManager.flags.ExperimentalEnableCustomLocalMemoryAlignment.set(MemoryConstants::megaByte); std::vector expectedAlignments = { {MemoryConstants::pageSize2Mb, false, 0.1f, HeapIndex::TOTAL_HEAPS}, {MemoryConstants::megaByte, false, AlignmentSelector::anyWastage, HeapIndex::TOTAL_HEAPS}, {MemoryConstants::pageSize64k, true, AlignmentSelector::anyWastage, HeapIndex::TOTAL_HEAPS}, }; MockWddmMemoryManager memoryManager(true, true, *executionEnvironment); EXPECT_EQ(expectedAlignments, memoryManager.alignmentSelector.peekCandidateAlignments()); } { DebugManager.flags.ExperimentalEnableCustomLocalMemoryAlignment.set(2 * MemoryConstants::pageSize2Mb); std::vector expectedAlignments = { {2 * MemoryConstants::pageSize2Mb, false, AlignmentSelector::anyWastage, HeapIndex::TOTAL_HEAPS}, {MemoryConstants::pageSize2Mb, false, 0.1f, HeapIndex::TOTAL_HEAPS}, {MemoryConstants::pageSize64k, true, AlignmentSelector::anyWastage, HeapIndex::TOTAL_HEAPS}, }; MockWddmMemoryManager memoryManager(true, true, *executionEnvironment); EXPECT_EQ(expectedAlignments, memoryManager.alignmentSelector.peekCandidateAlignments()); } } TEST_F(WddmMemoryManagerSimpleTest, givenWddmMemoryManagerWhenGettingGlobalMemoryPercentThenCorrectValueIsReturned) { MockWddmMemoryManager memoryManager(true, true, *executionEnvironment); uint32_t rootDeviceIndex = 0u; EXPECT_EQ(memoryManager.getPercentOfGlobalMemoryAvailable(rootDeviceIndex), 0.8); } TEST_F(WddmMemoryManagerSimpleTest, whenAlignmentRequirementExceedsPageSizeThenAllocateGraphicsMemoryFromSystemPtr) { struct MockWddmMemoryManagerAllocateWithAlignment : MockWddmMemoryManager { using MockWddmMemoryManager::MockWddmMemoryManager; GraphicsAllocation *allocateSystemMemoryAndCreateGraphicsAllocationFromIt(const AllocationData &allocationData) override { ++callCount.allocateSystemMemoryAndCreateGraphicsAllocationFromIt; return nullptr; } GraphicsAllocation *allocateGraphicsMemoryUsingKmdAndMapItToCpuVA(const AllocationData &allocationData, bool allowLargePages) override { ++callCount.allocateGraphicsMemoryUsingKmdAndMapItToCpuVA; return nullptr; } struct { int allocateSystemMemoryAndCreateGraphicsAllocationFromIt = 0; int allocateGraphicsMemoryUsingKmdAndMapItToCpuVA = 0; } callCount; }; MockWddmMemoryManagerAllocateWithAlignment memoryManager(true, true, *executionEnvironment); AllocationData allocData = {}; allocData.size = 1024; allocData.alignment = MemoryConstants::pageSize64k * 4; memoryManager.allocateGraphicsMemoryWithAlignment(allocData); EXPECT_EQ(1U, memoryManager.callCount.allocateSystemMemoryAndCreateGraphicsAllocationFromIt); EXPECT_EQ(0U, memoryManager.callCount.allocateGraphicsMemoryUsingKmdAndMapItToCpuVA); memoryManager.callCount.allocateSystemMemoryAndCreateGraphicsAllocationFromIt = 0; memoryManager.callCount.allocateGraphicsMemoryUsingKmdAndMapItToCpuVA = 0; allocData.size = 1024; allocData.alignment = MemoryConstants::pageSize; memoryManager.allocateGraphicsMemoryWithAlignment(allocData); if (preferredAllocationMethod == GfxMemoryAllocationMethod::AllocateByKmd) { EXPECT_EQ(0U, memoryManager.callCount.allocateSystemMemoryAndCreateGraphicsAllocationFromIt); EXPECT_EQ(1U, memoryManager.callCount.allocateGraphicsMemoryUsingKmdAndMapItToCpuVA); } else { EXPECT_EQ(1U, memoryManager.callCount.allocateSystemMemoryAndCreateGraphicsAllocationFromIt); EXPECT_EQ(0U, memoryManager.callCount.allocateGraphicsMemoryUsingKmdAndMapItToCpuVA); } } struct WddmWithMockedLock : public WddmMock { using WddmMock::WddmMock; void *lockResource(const D3DKMT_HANDLE &handle, bool applyMakeResidentPriorToLock, size_t size) override { if (handle < storageLocked.size()) { storageLocked.set(handle); } return storages[handle]; } std::bitset<4> storageLocked{}; uint8_t storages[EngineLimits::maxHandleCount][MemoryConstants::pageSize64k] = {0u}; }; TEST(WddmMemoryManagerCopyMemoryToAllocationBanksTest, givenAllocationWithMultiTilePlacementWhenCopyDataSpecificMemoryBanksThenLockOnlySpecificStorages) { uint8_t sourceData[32]{}; size_t offset = 3; size_t sourceAllocationSize = sizeof(sourceData); auto hwInfo = *defaultHwInfo; hwInfo.featureTable.flags.ftrLocalMemory = true; MockExecutionEnvironment executionEnvironment(&hwInfo); executionEnvironment.initGmm(); auto wddm = new WddmWithMockedLock(*executionEnvironment.rootDeviceEnvironments[0]); wddm->init(); MemoryManagerCreate memoryManager(true, true, executionEnvironment); MockWddmAllocation mockAllocation(executionEnvironment.rootDeviceEnvironments[0]->getGmmClientContext()); mockAllocation.storageInfo.memoryBanks = 0b1111; DeviceBitfield memoryBanksToCopy = 0b1010; mockAllocation.handles.resize(4); for (auto index = 0u; index < 4; index++) { wddm->storageLocked.set(index, false); if (mockAllocation.storageInfo.memoryBanks.test(index)) { mockAllocation.handles[index] = index; } } std::vector dataToCopy(sourceAllocationSize, 1u); auto ret = memoryManager.copyMemoryToAllocationBanks(&mockAllocation, offset, dataToCopy.data(), dataToCopy.size(), memoryBanksToCopy); EXPECT_TRUE(ret); EXPECT_FALSE(wddm->storageLocked.test(0)); ASSERT_TRUE(wddm->storageLocked.test(1)); EXPECT_FALSE(wddm->storageLocked.test(2)); ASSERT_TRUE(wddm->storageLocked.test(3)); EXPECT_EQ(0, memcmp(ptrOffset(wddm->storages[1], offset), dataToCopy.data(), dataToCopy.size())); EXPECT_EQ(0, memcmp(ptrOffset(wddm->storages[3], offset), dataToCopy.data(), dataToCopy.size())); } class WddmMemoryManagerMock : public MockWddmMemoryManagerFixture, public ::testing::Test { public: void SetUp() override { MockWddmMemoryManagerFixture::SetUp(); } void TearDown() override { MockWddmMemoryManagerFixture::TearDown(); } }; TEST_F(WddmMemoryManagerMock, givenAllocationWithReservedGpuVirtualAddressWhenMapCallFailsDuringCreateWddmAllocationThenReleasePreferredAddress) { MockWddmAllocation allocation(rootDeviceEnvironment->getGmmClientContext(), 4); allocation.setAllocationType(AllocationType::KERNEL_ISA); uint64_t gpuAddress = 0x123; uint64_t sizeForFree = 0x1234; allocation.reservedGpuVirtualAddress = gpuAddress; allocation.reservedSizeForGpuVirtualAddress = sizeForFree; wddm->callBaseMapGpuVa = false; wddm->mapGpuVaStatus = false; memoryManager->createWddmAllocation(&allocation, nullptr); EXPECT_EQ(1u, wddm->freeGpuVirtualAddressResult.called); EXPECT_EQ(gpuAddress, wddm->freeGpuVirtualAddressResult.uint64ParamPassed); EXPECT_EQ(sizeForFree, wddm->freeGpuVirtualAddressResult.sizePassed); } struct PlatformWithFourDevicesTest : public ::testing::Test { PlatformWithFourDevicesTest() { ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false; } void SetUp() override { DebugManager.flags.CreateMultipleSubDevices.set(4); initPlatform(); } DebugManagerStateRestore restorer; VariableBackup backup{&ultHwConfig}; }; TEST_F(PlatformWithFourDevicesTest, whenCreateColoredAllocationAndWddmReturnsCanonizedAddressDuringMapingVAThenAddressIsBeingDecanonizedAndAbortIsNotThrownFromUnrecoverableIfStatement) { struct CanonizeAddressMockWddm : public WddmMock { using WddmMock::WddmMock; bool mapGpuVirtualAddress(Gmm *gmm, D3DKMT_HANDLE handle, D3DGPU_VIRTUAL_ADDRESS minimumAddress, D3DGPU_VIRTUAL_ADDRESS maximumAddress, D3DGPU_VIRTUAL_ADDRESS preferredAddress, D3DGPU_VIRTUAL_ADDRESS &gpuPtr) override { gpuPtr = GmmHelper::canonize(preferredAddress); return mapGpuVaStatus; } }; auto wddm = new CanonizeAddressMockWddm(*platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0]); wddm->init(); auto osInterfaceMock = new OSInterface(); auto callBaseDestroyBackup = wddm->callBaseDestroyAllocations; wddm->callBaseDestroyAllocations = false; wddm->mapGpuVaStatus = true; osInterfaceMock->setDriverModel(std::unique_ptr(wddm)); auto osInterfaceBackUp = platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.release(); platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.reset(osInterfaceMock); MockWddmMemoryManager memoryManager(true, true, *platform()->peekExecutionEnvironment()); memoryManager.supportsMultiStorageResources = true; platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0]->getMutableHardwareInfo()->featureTable.flags.ftrLocalMemory = true; platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0]->getMutableHardwareInfo()->featureTable.flags.ftrMultiTileArch = true; GraphicsAllocation *allocation = nullptr; EXPECT_NO_THROW(allocation = memoryManager.allocateGraphicsMemoryWithProperties({mockRootDeviceIndex, true, 4 * MemoryConstants::pageSize64k, AllocationType::BUFFER, true, mockDeviceBitfield})); EXPECT_NE(nullptr, allocation); memoryManager.freeGraphicsMemory(allocation); wddm->callBaseDestroyAllocations = callBaseDestroyBackup; platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.reset(osInterfaceBackUp); } TEST_F(PlatformWithFourDevicesTest, givenDifferentAllocationSizesWhenColourAllocationThenResourceIsSpreadProperly) { auto wddm = reinterpret_cast(platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface->getDriverModel()->as()); wddm->mapGpuVaStatus = true; VariableBackup restorer{&wddm->callBaseMapGpuVa, false}; MockWddmMemoryManager memoryManager(true, true, *platform()->peekExecutionEnvironment()); memoryManager.supportsMultiStorageResources = true; platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0]->getMutableHardwareInfo()->featureTable.flags.ftrLocalMemory = true; platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0]->getMutableHardwareInfo()->featureTable.flags.ftrMultiTileArch = true; // We are allocating memory from 4 to 12 pages and want to check if remainders (1, 2 or 3 pages in case of 4 devices) are spread equally. for (int additionalSize = 0; additionalSize <= 8; additionalSize++) { auto allocation = static_cast(memoryManager.allocateGraphicsMemoryWithProperties({mockRootDeviceIndex, true, (4 + additionalSize) * MemoryConstants::pageSize64k, AllocationType::BUFFER, true, 0b1111})); auto handles = allocation->getNumGmms(); EXPECT_EQ(4u, handles); auto size = allocation->getAlignedSize() / MemoryConstants::pageSize64k; switch (size % handles) { case 0: EXPECT_EQ(size / handles * MemoryConstants::pageSize64k, allocation->getGmm(0)->gmmResourceInfo->getSizeAllocation()); EXPECT_EQ(size / handles * MemoryConstants::pageSize64k, allocation->getGmm(1)->gmmResourceInfo->getSizeAllocation()); EXPECT_EQ(size / handles * MemoryConstants::pageSize64k, allocation->getGmm(2)->gmmResourceInfo->getSizeAllocation()); EXPECT_EQ(size / handles * MemoryConstants::pageSize64k, allocation->getGmm(3)->gmmResourceInfo->getSizeAllocation()); break; case 1: EXPECT_EQ((size / handles + 1) * MemoryConstants::pageSize64k, allocation->getGmm(0)->gmmResourceInfo->getSizeAllocation()); EXPECT_EQ(size / handles * MemoryConstants::pageSize64k, allocation->getGmm(1)->gmmResourceInfo->getSizeAllocation()); EXPECT_EQ(size / handles * MemoryConstants::pageSize64k, allocation->getGmm(2)->gmmResourceInfo->getSizeAllocation()); EXPECT_EQ(size / handles * MemoryConstants::pageSize64k, allocation->getGmm(3)->gmmResourceInfo->getSizeAllocation()); break; case 2: EXPECT_EQ((size / handles + 1) * MemoryConstants::pageSize64k, allocation->getGmm(0)->gmmResourceInfo->getSizeAllocation()); EXPECT_EQ((size / handles + 1) * MemoryConstants::pageSize64k, allocation->getGmm(1)->gmmResourceInfo->getSizeAllocation()); EXPECT_EQ(size / handles * MemoryConstants::pageSize64k, allocation->getGmm(2)->gmmResourceInfo->getSizeAllocation()); EXPECT_EQ(size / handles * MemoryConstants::pageSize64k, allocation->getGmm(3)->gmmResourceInfo->getSizeAllocation()); break; case 3: EXPECT_EQ((size / handles + 1) * MemoryConstants::pageSize64k, allocation->getGmm(0)->gmmResourceInfo->getSizeAllocation()); EXPECT_EQ((size / handles + 1) * MemoryConstants::pageSize64k, allocation->getGmm(1)->gmmResourceInfo->getSizeAllocation()); EXPECT_EQ((size / handles + 1) * MemoryConstants::pageSize64k, allocation->getGmm(2)->gmmResourceInfo->getSizeAllocation()); EXPECT_EQ(size / handles * MemoryConstants::pageSize64k, allocation->getGmm(3)->gmmResourceInfo->getSizeAllocation()); default: break; } memoryManager.freeGraphicsMemory(allocation); } } TEST_F(PlatformWithFourDevicesTest, whenCreateScratchSpaceInSingleTileQueueThenTheAllocationHasOneHandle) { MemoryManagerCreate memoryManager(true, true, *platform()->peekExecutionEnvironment()); AllocationProperties properties{mockRootDeviceIndex, true, 1u, AllocationType::SCRATCH_SURFACE, false, false, mockDeviceBitfield}; auto allocation = static_cast(memoryManager.allocateGraphicsMemoryWithProperties(properties)); EXPECT_EQ(1u, allocation->getNumGmms()); memoryManager.freeGraphicsMemory(allocation); } compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/windows/wddm_memory_manager_tests.h000066400000000000000000000164651422164147700334060ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/os_interface.h" #include "shared/source/os_interface/windows/os_environment_win.h" #include "shared/source/os_interface/windows/wddm_memory_operations_handler.h" #include "shared/test/common/helpers/engine_descriptor_helper.h" #include "shared/test/common/helpers/execution_environment_helper.h" #include "shared/test/common/mocks/mock_gmm.h" #include "shared/test/common/mocks/mock_gmm_page_table_mngr.h" #include "shared/test/common/mocks/mock_wddm_residency_allocations_container.h" #include "shared/test/common/mocks/windows/mock_gdi_interface.h" #include "shared/test/common/os_interface/windows/mock_wddm_memory_manager.h" #include "shared/test/common/os_interface/windows/wddm_fixture.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "gtest/gtest.h" #include using namespace NEO; using namespace ::testing; class WddmMemoryManagerFixture : public GdiDllFixture { public: void SetUp() override; void TearDown() override { GdiDllFixture::TearDown(); } ExecutionEnvironment *executionEnvironment; RootDeviceEnvironment *rootDeviceEnvironment = nullptr; std::unique_ptr memoryManager; WddmMock *wddm = nullptr; const uint32_t rootDeviceIndex = 0u; }; typedef ::Test WddmMemoryManagerTest; class MockWddmMemoryManagerFixture { public: void SetUp() { executionEnvironment = platform()->peekExecutionEnvironment(); rootDeviceEnvironment = executionEnvironment->rootDeviceEnvironments[0].get(); auto osEnvironment = new OsEnvironmentWin(); gdi = new MockGdi(); osEnvironment->gdi.reset(gdi); executionEnvironment->osEnvironment.reset(osEnvironment); wddm = static_cast(Wddm::createWddm(nullptr, *rootDeviceEnvironment)); constexpr uint64_t heap32Base = (is32bit) ? 0x1000 : 0x800000000000; wddm->setHeap32(heap32Base, 1000 * MemoryConstants::pageSize - 1); wddm->init(); rootDeviceEnvironment->memoryOperationsInterface = std::make_unique(wddm); executionEnvironment->initializeMemoryManager(); memoryManager = std::make_unique(*executionEnvironment); csr.reset(createCommandStream(*executionEnvironment, 0u, 1)); auto hwInfo = rootDeviceEnvironment->getHardwareInfo(); osContext = memoryManager->createAndRegisterOsContext(csr.get(), EngineDescriptorHelper::getDefaultDescriptor(HwHelper::get(hwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*hwInfo)[0], PreemptionHelper::getDefaultPreemptionMode(*hwInfo))); osContext->ensureContextInitialized(); osContext->incRefInternal(); mockTemporaryResources = reinterpret_cast(wddm->getTemporaryResourcesContainer()); } void TearDown() { osContext->decRefInternal(); } RootDeviceEnvironment *rootDeviceEnvironment = nullptr; ExecutionEnvironment *executionEnvironment; std::unique_ptr memoryManager; std::unique_ptr csr; WddmMock *wddm = nullptr; MockWddmResidentAllocationsContainer *mockTemporaryResources; OsContext *osContext = nullptr; MockGdi *gdi = nullptr; }; typedef ::Test WddmMemoryManagerResidencyTest; class ExecutionEnvironmentFixture : public ::testing::Test { public: ExecutionEnvironmentFixture() { executionEnvironment = platform()->peekExecutionEnvironment(); } ExecutionEnvironment *executionEnvironment; }; class WddmMemoryManagerFixtureWithGmockWddm : public ExecutionEnvironmentFixture { public: MockWddmMemoryManager *memoryManager = nullptr; void SetUp() override { // wddm is deleted by memory manager wddm = new WddmMock(*executionEnvironment->rootDeviceEnvironments[0].get()); ASSERT_NE(nullptr, wddm); auto preemptionMode = PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo); wddm->init(); executionEnvironment->rootDeviceEnvironments[0]->memoryOperationsInterface = std::make_unique(wddm); osInterface = executionEnvironment->rootDeviceEnvironments[0]->osInterface.get(); memoryManager = new (std::nothrow) MockWddmMemoryManager(*executionEnvironment); executionEnvironment->memoryManager.reset(memoryManager); //assert we have memory manager ASSERT_NE(nullptr, memoryManager); csr.reset(createCommandStream(*executionEnvironment, 0u, 1)); auto hwInfo = executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo(); osContext = memoryManager->createAndRegisterOsContext(csr.get(), EngineDescriptorHelper::getDefaultDescriptor(HwHelper::get(hwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*hwInfo)[0], preemptionMode)); osContext->incRefInternal(); } void TearDown() override { osContext->decRefInternal(); } WddmMock *wddm = nullptr; std::unique_ptr csr; OSInterface *osInterface; OsContext *osContext; }; using WddmMemoryManagerTest2 = WddmMemoryManagerFixtureWithGmockWddm; class BufferWithWddmMemory : public ::testing::Test, public WddmMemoryManagerFixture { public: protected: void SetUp() { WddmMemoryManagerFixture::SetUp(); tmp = context.getMemoryManager(); context.memoryManager = memoryManager.get(); flags = 0; } void TearDown() { context.memoryManager = tmp; WddmMemoryManagerFixture::TearDown(); } MemoryManager *tmp; MockContext context; cl_mem_flags flags; cl_int retVal; }; class WddmMemoryManagerSimpleTest : public MockWddmMemoryManagerFixture, public ::testing::Test { public: void SetUp() override { MockWddmMemoryManagerFixture::SetUp(); } void TearDown() override { MockWddmMemoryManagerFixture::TearDown(); } }; class MockWddmMemoryManagerTest : public ::testing::Test { public: void SetUp() override { executionEnvironment = getExecutionEnvironmentImpl(hwInfo, 2); executionEnvironment->incRefInternal(); wddm = new WddmMock(*executionEnvironment->rootDeviceEnvironments[1].get()); executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->osInterface->setDriverModel(std::unique_ptr(wddm)); executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->memoryOperationsInterface = std::make_unique(wddm); } void TearDown() override { executionEnvironment->decRefInternal(); } HardwareInfo *hwInfo = nullptr; WddmMock *wddm = nullptr; ExecutionEnvironment *executionEnvironment = nullptr; const uint32_t rootDeviceIndex = 0u; }; using OsAgnosticMemoryManagerUsingWddmTest = MockWddmMemoryManagerTest; wddm_residency_controller_tests.cpp000066400000000000000000001511051422164147700350770ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/windows/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/memory_manager/memory_operations_handler.h" #include "shared/source/os_interface/os_context.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/os_interface/windows/os_context_win.h" #include "shared/source/os_interface/windows/wddm/wddm_interface.h" #include "shared/source/os_interface/windows/wddm_memory_operations_handler.h" #include "shared/source/os_interface/windows/wddm_residency_controller.h" #include "shared/test/common/helpers/engine_descriptor_helper.h" #include "shared/test/common/libult/create_command_stream.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/mocks/mock_wddm.h" #include "shared/test/common/mocks/windows/mock_gdi_interface.h" #include "shared/test/common/os_interface/windows/mock_wddm_memory_manager.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/os_interface/windows/mock_wddm_allocation.h" #include "gtest/gtest.h" #include using namespace NEO; class MockWddmResidencyController : public WddmResidencyController { public: using WddmResidencyController::lastTrimFenceValue; using WddmResidencyController::trimCallbackHandle; using WddmResidencyController::trimCandidateList; using WddmResidencyController::trimCandidatesCount; using WddmResidencyController::trimResidency; using WddmResidencyController::trimResidencyToBudget; using WddmResidencyController::WddmResidencyController; uint32_t acquireLockCallCount = 0u; bool forceTrimCandidateListCompaction = false; std::unique_lock acquireLock() override { acquireLockCallCount++; return WddmResidencyController::acquireLock(); } bool checkTrimCandidateListCompaction() override { return forceTrimCandidateListCompaction || WddmResidencyController::checkTrimCandidateListCompaction(); } }; class MockOsContextWin : public OsContextWin { public: MockOsContextWin(Wddm &wddm, uint32_t contextId, const EngineDescriptor &engineDescriptor) : OsContextWin(wddm, contextId, engineDescriptor), mockResidencyController(wddm, contextId) {} WddmResidencyController &getResidencyController() override { return mockResidencyController; }; MockWddmResidencyController mockResidencyController; }; struct WddmResidencyControllerTest : ::testing::Test { const uint32_t osContextId = 0u; void SetUp() { executionEnvironment = std::make_unique(); rootDeviceEnvironment = std::make_unique(*executionEnvironment); wddm = static_cast(Wddm::createWddm(nullptr, *rootDeviceEnvironment)); wddm->init(); mockOsContextWin = std::make_unique(*wddm, osContextId, EngineDescriptorHelper::getDefaultDescriptor()); wddm->getWddmInterface()->createMonitoredFence(*mockOsContextWin); residencyController = &mockOsContextWin->mockResidencyController; } std::unique_ptr executionEnvironment; std::unique_ptr rootDeviceEnvironment; WddmMock *wddm = nullptr; std::unique_ptr mockOsContextWin; MockWddmResidencyController *residencyController = nullptr; }; struct WddmResidencyControllerWithGdiTest : ::testing::Test { const uint32_t osContextId = 0u; void SetUp() { executionEnvironment = std::make_unique(); rootDeviceEnvironment = std::make_unique(*executionEnvironment); wddm = static_cast(Wddm::createWddm(nullptr, *rootDeviceEnvironment)); gdi = new MockGdi(); wddm->resetGdi(gdi); wddm->init(); mockOsContextWin = std::make_unique(*wddm, osContextId, EngineDescriptorHelper::getDefaultDescriptor()); wddm->getWddmInterface()->createMonitoredFence(*mockOsContextWin); residencyController = &mockOsContextWin->mockResidencyController; residencyController->registerCallback(); } std::unique_ptr executionEnvironment; std::unique_ptr rootDeviceEnvironment; WddmMock *wddm = nullptr; std::unique_ptr mockOsContextWin; MockWddmResidencyController *residencyController = nullptr; MockGdi *gdi = nullptr; }; struct WddmResidencyControllerWithMockWddmTest : public WddmResidencyControllerTest { void SetUp() { executionEnvironment = platform()->peekExecutionEnvironment(); wddm = new WddmMock(*executionEnvironment->rootDeviceEnvironments[0].get()); wddm->resetGdi(new MockGdi()); auto preemptionMode = PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo); wddm->init(); executionEnvironment->rootDeviceEnvironments[0]->memoryOperationsInterface = std::make_unique(wddm); executionEnvironment->initializeMemoryManager(); memoryManager = std::make_unique(*executionEnvironment); csr.reset(createCommandStream(*executionEnvironment, 0u, 1)); auto hwInfo = executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo(); osContext = memoryManager->createAndRegisterOsContext(csr.get(), EngineDescriptorHelper::getDefaultDescriptor(HwHelper::get(hwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*hwInfo)[0], preemptionMode)); osContext->ensureContextInitialized(); osContext->incRefInternal(); residencyController = &static_cast(osContext)->getResidencyController(); gmmClientContext = executionEnvironment->rootDeviceEnvironments[0]->getGmmClientContext(); } void TearDown() { osContext->decRefInternal(); } ExecutionEnvironment *executionEnvironment; std::unique_ptr memoryManager; std::unique_ptr csr; WddmMock *wddm = nullptr; OsContext *osContext; WddmResidencyController *residencyController; GmmClientContext *gmmClientContext = nullptr; }; struct WddmResidencyControllerWithGdiAndMemoryManagerTest : ::testing::Test { const uint32_t osContextId = 0u; void SetUp() { executionEnvironment = platform()->peekExecutionEnvironment(); wddm = static_cast(Wddm::createWddm(nullptr, *executionEnvironment->rootDeviceEnvironments[0].get())); wddm->init(); gdi = new MockGdi(); wddm->resetGdi(gdi); executionEnvironment->rootDeviceEnvironments[0]->memoryOperationsInterface = std::make_unique(wddm); executionEnvironment->initializeMemoryManager(); memoryManager = std::make_unique(*executionEnvironment); csr.reset(createCommandStream(*executionEnvironment, 0u, 1)); auto hwInfo = executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo(); osContext = memoryManager->createAndRegisterOsContext(csr.get(), EngineDescriptorHelper::getDefaultDescriptor(HwHelper::get(hwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*hwInfo)[0], PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo))); osContext->ensureContextInitialized(); osContext->incRefInternal(); residencyController = &static_cast(osContext)->getResidencyController(); gmmClientContext = executionEnvironment->rootDeviceEnvironments[0]->getGmmClientContext(); } void TearDown() { osContext->decRefInternal(); } ExecutionEnvironment *executionEnvironment; std::unique_ptr memoryManager; std::unique_ptr csr; WddmMock *wddm = nullptr; OsContext *osContext = nullptr; MockGdi *gdi = nullptr; WddmResidencyController *residencyController = nullptr; GmmClientContext *gmmClientContext = nullptr; }; TEST(WddmResidencyController, givenWddmResidencyControllerWhenItIsConstructedThenDoNotRegisterTrimCallback) { MockExecutionEnvironment executionEnvironment; executionEnvironment.prepareRootDeviceEnvironments(1); auto gdi = new MockGdi(); auto wddm = static_cast(Wddm::createWddm(nullptr, *executionEnvironment.rootDeviceEnvironments[0].get())); wddm->resetGdi(gdi); wddm->init(); std::memset(&gdi->getRegisterTrimNotificationArg(), 0, sizeof(D3DKMT_REGISTERTRIMNOTIFICATION)); MockWddmResidencyController residencyController{*wddm, 0u}; EXPECT_EQ(0u, wddm->registerTrimCallbackResult.called); EXPECT_EQ(nullptr, residencyController.trimCallbackHandle); EXPECT_EQ(nullptr, gdi->getRegisterTrimNotificationArg().Callback); EXPECT_EQ(nullptr, gdi->getRegisterTrimNotificationArg().Context); EXPECT_EQ(0u, gdi->getRegisterTrimNotificationArg().hDevice); } TEST(WddmResidencyController, givenWddmResidencyControllerWhenRegisterCallbackThenCallbackIsSetUpProperly) { MockExecutionEnvironment executionEnvironment; executionEnvironment.prepareRootDeviceEnvironments(1); auto gdi = new MockGdi(); auto wddm = static_cast(Wddm::createWddm(nullptr, *executionEnvironment.rootDeviceEnvironments[0].get())); wddm->resetGdi(gdi); wddm->init(); std::memset(&gdi->getRegisterTrimNotificationArg(), 0, sizeof(D3DKMT_REGISTERTRIMNOTIFICATION)); WddmResidencyController residencyController{*wddm, 0u}; residencyController.registerCallback(); EXPECT_EQ(1u, wddm->registerTrimCallbackResult.called); EXPECT_EQ(reinterpret_cast(WddmResidencyController::trimCallback), gdi->getRegisterTrimNotificationArg().Callback); EXPECT_EQ(reinterpret_cast(&residencyController), gdi->getRegisterTrimNotificationArg().Context); EXPECT_EQ(wddm->getDeviceHandle(), gdi->getRegisterTrimNotificationArg().hDevice); } TEST_F(WddmResidencyControllerTest, givenWddmResidencyControllerWhenCallingWasAllocationUsedSinceLastTrimThenReturnCorrectValues) { residencyController->lastTrimFenceValue = 100; EXPECT_FALSE(residencyController->wasAllocationUsedSinceLastTrim(99)); EXPECT_FALSE(residencyController->wasAllocationUsedSinceLastTrim(99)); EXPECT_TRUE(residencyController->wasAllocationUsedSinceLastTrim(101)); } TEST_F(WddmResidencyControllerTest, givenWddmResidencyControllerThenUpdateLastTrimFenceValueUsesMonitoredFence) { *residencyController->getMonitoredFence().cpuAddress = 1234; residencyController->updateLastTrimFenceValue(); EXPECT_EQ(1234, residencyController->lastTrimFenceValue); *residencyController->getMonitoredFence().cpuAddress = 12345; residencyController->updateLastTrimFenceValue(); EXPECT_EQ(12345, residencyController->lastTrimFenceValue); } TEST_F(WddmResidencyControllerWithGdiTest, givenWddmResidencyControllerWhenItIsDestructedThenUnregisterTrimCallback) { auto trimCallbackHandle = residencyController->trimCallbackHandle; auto trimCallbackAddress = reinterpret_cast(WddmResidencyController::trimCallback); std::memset(&gdi->getUnregisterTrimNotificationArg(), 0, sizeof(D3DKMT_UNREGISTERTRIMNOTIFICATION)); mockOsContextWin.reset(); EXPECT_EQ(trimCallbackAddress, gdi->getUnregisterTrimNotificationArg().Callback); EXPECT_EQ(trimCallbackHandle, gdi->getUnregisterTrimNotificationArg().Handle); } TEST_F(WddmResidencyControllerWithGdiTest, givenWddmResidencyControllerWhenItIsDestructedDuringProcessShutdownThenDontUnregisterTrimCallback) { wddm->shutdownStatus = true; std::memset(&gdi->getUnregisterTrimNotificationArg(), 0, sizeof(D3DKMT_UNREGISTERTRIMNOTIFICATION)); mockOsContextWin.reset(); EXPECT_EQ(nullptr, gdi->getUnregisterTrimNotificationArg().Callback); EXPECT_EQ(nullptr, gdi->getUnregisterTrimNotificationArg().Handle); } TEST_F(WddmResidencyControllerTest, givenUsedAllocationWhenCallingRemoveFromTrimCandidateListIfUsedThenRemoveIt) { MockWddmAllocation allocation(rootDeviceEnvironment->getGmmClientContext()); residencyController->addToTrimCandidateList(&allocation); residencyController->removeFromTrimCandidateListIfUsed(&allocation, false); EXPECT_EQ(trimListUnusedPosition, allocation.getTrimCandidateListPosition(osContextId)); } TEST_F(WddmResidencyControllerTest, givenWddmResidencyControllerWhenIsMemoryExhaustedIsCalledThenReturnCorrectResult) { EXPECT_FALSE(residencyController->isMemoryBudgetExhausted()); residencyController->setMemoryBudgetExhausted(); EXPECT_TRUE(residencyController->isMemoryBudgetExhausted()); } TEST_F(WddmResidencyControllerTest, givenUnusedAllocationWhenCallingRemoveFromTrimCandidateListIfUsedThenIgnore) { MockWddmAllocation allocation(rootDeviceEnvironment->getGmmClientContext()); residencyController->removeFromTrimCandidateListIfUsed(&allocation, false); EXPECT_EQ(trimListUnusedPosition, allocation.getTrimCandidateListPosition(osContextId)); } TEST_F(WddmResidencyControllerTest, WhenAddingToTrimCandidateListThenAllocationIsPlacedInContainerAndAssignedPosition) { MockWddmAllocation allocation(rootDeviceEnvironment->getGmmClientContext()); residencyController->addToTrimCandidateList(&allocation); EXPECT_NE(0u, residencyController->trimCandidateList.size()); EXPECT_NE(trimListUnusedPosition, allocation.getTrimCandidateListPosition(osContextId)); size_t position = allocation.getTrimCandidateListPosition(osContextId); ASSERT_LT(position, residencyController->trimCandidateList.size()); EXPECT_EQ(&allocation, residencyController->trimCandidateList[position]); } TEST_F(WddmResidencyControllerTest, WhenAddingToTrimCandidateListThenDoNotInsertAllocationAlreadyOnTheList) { MockWddmAllocation allocation(rootDeviceEnvironment->getGmmClientContext()); residencyController->trimCandidateList.resize(0); residencyController->addToTrimCandidateList(&allocation); EXPECT_NE(trimListUnusedPosition, allocation.getTrimCandidateListPosition(osContextId)); size_t position = allocation.getTrimCandidateListPosition(osContextId); ASSERT_LT(position, residencyController->trimCandidateList.size()); EXPECT_EQ(&allocation, residencyController->trimCandidateList[position]); size_t previousSize = residencyController->trimCandidateList.size(); residencyController->addToTrimCandidateList(&allocation); EXPECT_EQ(previousSize, residencyController->trimCandidateList.size()); EXPECT_EQ(position, allocation.getTrimCandidateListPosition(osContextId)); } TEST_F(WddmResidencyControllerTest, WhenRemovingFromTrimCandidateListThenUnusedPositionIsAssigned) { MockWddmAllocation allocation(rootDeviceEnvironment->getGmmClientContext()); residencyController->addToTrimCandidateList(&allocation); residencyController->removeFromTrimCandidateList(&allocation, false); EXPECT_EQ(trimListUnusedPosition, allocation.getTrimCandidateListPosition(osContextId)); } TEST_F(WddmResidencyControllerTest, WhenRemovingFromTrimCandidateListThenAllocationInAssignedPositionIsRemoved) { MockWddmAllocation allocation(rootDeviceEnvironment->getGmmClientContext()); residencyController->addToTrimCandidateList(&allocation); size_t position = allocation.getTrimCandidateListPosition(osContextId); residencyController->removeFromTrimCandidateList(&allocation, false); if (residencyController->trimCandidateList.size() > position) { EXPECT_NE(&allocation, residencyController->trimCandidateList[position]); } } TEST_F(WddmResidencyControllerTest, GivenOneAllocationWhenRemovingFromTrimCandidateListThenTrimCandidateListIsEmpty) { MockWddmAllocation allocation(rootDeviceEnvironment->getGmmClientContext()); residencyController->trimCandidateList.resize(0); residencyController->addToTrimCandidateList(&allocation); residencyController->removeFromTrimCandidateList(&allocation, false); EXPECT_EQ(0u, residencyController->trimCandidateList.size()); } TEST_F(WddmResidencyControllerTest, WhenRemovingFromTrimCandidateListThenLastAllocationAndAllPreviousEmptyEntriesAreRemoved) { MockWddmAllocation allocation1(rootDeviceEnvironment->getGmmClientContext()); MockWddmAllocation allocation2(rootDeviceEnvironment->getGmmClientContext()); residencyController->trimCandidateList.resize(0); residencyController->addToTrimCandidateList(&allocation1); residencyController->trimCandidateList.push_back(nullptr); residencyController->trimCandidateList.push_back(nullptr); residencyController->trimCandidateList.push_back(nullptr); residencyController->addToTrimCandidateList(&allocation2); EXPECT_EQ(5u, residencyController->trimCandidateList.size()); residencyController->removeFromTrimCandidateList(&allocation2, false); EXPECT_EQ(1u, residencyController->trimCandidateList.size()); } TEST_F(WddmResidencyControllerTest, WhenAddingToTrimCandidateListThenSuccessivePositionIsAssigned) { MockWddmAllocation allocation1(rootDeviceEnvironment->getGmmClientContext()); MockWddmAllocation allocation2(rootDeviceEnvironment->getGmmClientContext()); MockWddmAllocation allocation3(rootDeviceEnvironment->getGmmClientContext()); residencyController->addToTrimCandidateList(&allocation1); residencyController->addToTrimCandidateList(&allocation2); residencyController->addToTrimCandidateList(&allocation3); EXPECT_EQ(3u, residencyController->trimCandidateList.size()); EXPECT_NE(allocation1.getTrimCandidateListPosition(osContextId), allocation2.getTrimCandidateListPosition(osContextId)); EXPECT_NE(allocation2.getTrimCandidateListPosition(osContextId), allocation3.getTrimCandidateListPosition(osContextId)); } TEST_F(WddmResidencyControllerTest, GivenAllocationThatIsNotLastWhenRemovingFromTrimCandidateListAndCompactingThenRemoveEntry) { MockWddmAllocation allocation1(rootDeviceEnvironment->getGmmClientContext()); MockWddmAllocation allocation2(rootDeviceEnvironment->getGmmClientContext()); MockWddmAllocation allocation3(rootDeviceEnvironment->getGmmClientContext()); residencyController->addToTrimCandidateList(&allocation1); residencyController->addToTrimCandidateList(&allocation2); residencyController->addToTrimCandidateList(&allocation3); residencyController->forceTrimCandidateListCompaction = true; residencyController->removeFromTrimCandidateList(&allocation2, true); EXPECT_EQ(2u, residencyController->trimCandidateList.size()); EXPECT_EQ(1u, allocation3.getTrimCandidateListPosition(osContextId)); EXPECT_EQ(trimListUnusedPosition, allocation2.getTrimCandidateListPosition(osContextId)); } TEST_F(WddmResidencyControllerTest, GivenAllocationThatIsNotLastWhenRemovingFromTrimCandidateListThenReplaceWithNullEntry) { MockWddmAllocation allocation1(rootDeviceEnvironment->getGmmClientContext()); MockWddmAllocation allocation2(rootDeviceEnvironment->getGmmClientContext()); MockWddmAllocation allocation3(rootDeviceEnvironment->getGmmClientContext()); residencyController->addToTrimCandidateList(&allocation1); residencyController->addToTrimCandidateList(&allocation2); residencyController->addToTrimCandidateList(&allocation3); size_t position2 = allocation2.getTrimCandidateListPosition(osContextId); size_t position3 = allocation3.getTrimCandidateListPosition(osContextId); residencyController->removeFromTrimCandidateList(&allocation2, false); EXPECT_EQ(3u, residencyController->trimCandidateList.size()); EXPECT_EQ(2u, position3); EXPECT_EQ(nullptr, residencyController->trimCandidateList[position2]); } TEST_F(WddmResidencyControllerTest, WhenCompactingTrimCandidateListThenInitialNullEntriesAreRemovedAndPositionsAreUpdated) { MockWddmAllocation allocation1(rootDeviceEnvironment->getGmmClientContext()); MockWddmAllocation allocation2(rootDeviceEnvironment->getGmmClientContext()); MockWddmAllocation allocation3(rootDeviceEnvironment->getGmmClientContext()); MockWddmAllocation allocation4(rootDeviceEnvironment->getGmmClientContext()); residencyController->addToTrimCandidateList(&allocation1); residencyController->addToTrimCandidateList(&allocation2); residencyController->addToTrimCandidateList(&allocation3); residencyController->addToTrimCandidateList(&allocation4); allocation3.getTrimCandidateListPosition(osContextId); allocation4.getTrimCandidateListPosition(osContextId); residencyController->removeFromTrimCandidateList(&allocation2, false); residencyController->removeFromTrimCandidateList(&allocation1, false); EXPECT_EQ(4u, residencyController->trimCandidateList.size()); residencyController->compactTrimCandidateList(); EXPECT_EQ(2u, residencyController->trimCandidateList.size()); EXPECT_EQ(residencyController->trimCandidateList[0], &allocation3); EXPECT_EQ(0u, allocation3.getTrimCandidateListPosition(osContextId)); EXPECT_EQ(residencyController->trimCandidateList[1], &allocation4); EXPECT_EQ(1u, allocation4.getTrimCandidateListPosition(osContextId)); } TEST_F(WddmResidencyControllerTest, WhenCompactingTrimCandidateListThenNonNullEntriesAreNotRemoved) { MockWddmAllocation allocation1(rootDeviceEnvironment->getGmmClientContext()); MockWddmAllocation allocation2(rootDeviceEnvironment->getGmmClientContext()); MockWddmAllocation allocation3(rootDeviceEnvironment->getGmmClientContext()); MockWddmAllocation allocation4(rootDeviceEnvironment->getGmmClientContext()); residencyController->addToTrimCandidateList(&allocation1); residencyController->addToTrimCandidateList(&allocation2); residencyController->addToTrimCandidateList(&allocation3); residencyController->addToTrimCandidateList(&allocation4); EXPECT_EQ(4u, residencyController->trimCandidateList.size()); residencyController->compactTrimCandidateList(); EXPECT_EQ(4u, residencyController->trimCandidateList.size()); } TEST_F(WddmResidencyControllerTest, GivenListSizeLessThenDoubleCandidateCountWhenCheckingTrimCandidateListCompactionThenCompactionIsRequired) { bool comapactionRequired; residencyController->trimCandidatesCount = 10; residencyController->trimCandidateList.resize(20); comapactionRequired = residencyController->checkTrimCandidateListCompaction(); EXPECT_TRUE(comapactionRequired); residencyController->trimCandidatesCount = 5; residencyController->trimCandidateList.resize(20); comapactionRequired = residencyController->checkTrimCandidateListCompaction(); EXPECT_TRUE(comapactionRequired); residencyController->trimCandidatesCount = 18; residencyController->trimCandidateList.resize(20); comapactionRequired = residencyController->checkTrimCandidateListCompaction(); EXPECT_FALSE(comapactionRequired); } TEST_F(WddmResidencyControllerWithGdiTest, givenNotUsedAllocationsFromPreviousPeriodicTrimWhenTrimResidencyPeriodicTrimIsCalledThenAllocationsAreEvictedMarkedAndRemovedFromTrimCandidateList) { D3DKMT_TRIMNOTIFICATION trimNotification = {0}; trimNotification.Flags.PeriodicTrim = 1; trimNotification.NumBytesToTrim = 0; // allocations have fence value == 0 by default MockWddmAllocation allocation1(rootDeviceEnvironment->getGmmClientContext()); MockWddmAllocation allocation2(rootDeviceEnvironment->getGmmClientContext()); allocation1.getResidencyData().updateCompletionData(0, osContextId); allocation2.getResidencyData().updateCompletionData(0, osContextId); allocation1.getResidencyData().resident[osContextId] = true; allocation2.getResidencyData().resident[osContextId] = true; // Set last periodic fence value residencyController->lastTrimFenceValue = 10; // Set current fence value to greater value residencyController->getMonitoredFence().currentFenceValue = 20; wddm->evictResult.called = 0; residencyController->addToTrimCandidateList(&allocation1); residencyController->addToTrimCandidateList(&allocation2); residencyController->trimResidency(trimNotification.Flags, trimNotification.NumBytesToTrim); // 2 allocations evicted EXPECT_EQ(2u, wddm->evictResult.called); // removed from trim candidate list EXPECT_EQ(0u, residencyController->peekTrimCandidateList().size()); // marked nonresident EXPECT_FALSE(allocation1.getResidencyData().resident[osContextId]); EXPECT_FALSE(allocation2.getResidencyData().resident[osContextId]); } TEST_F(WddmResidencyControllerWithGdiTest, givenOneUsedAllocationFromPreviousPeriodicTrimWhenTrimResidencyPeriodicTrimIsCalledThenOneAllocationIsTrimmed) { D3DKMT_TRIMNOTIFICATION trimNotification = {0}; trimNotification.Flags.PeriodicTrim = 1; trimNotification.NumBytesToTrim = 0; // allocations have fence value == 0 by default MockWddmAllocation allocation1(rootDeviceEnvironment->getGmmClientContext()); MockWddmAllocation allocation2(rootDeviceEnvironment->getGmmClientContext()); allocation1.getResidencyData().resident[osContextId] = true; // mark allocation used from last periodic trim allocation1.getResidencyData().updateCompletionData(0, osContextId); allocation2.getResidencyData().updateCompletionData(11, osContextId); allocation2.getResidencyData().resident[osContextId] = true; // Set last periodic fence value residencyController->lastTrimFenceValue = 10; // Set current fence value to greater value residencyController->getMonitoredFence().currentFenceValue = 20; wddm->evictResult.called = 0; residencyController->addToTrimCandidateList(&allocation1); residencyController->addToTrimCandidateList(&allocation2); residencyController->trimResidency(trimNotification.Flags, trimNotification.NumBytesToTrim); // 1 allocation evicted EXPECT_EQ(1u, wddm->evictResult.called); // removed from trim candidate list EXPECT_EQ(trimListUnusedPosition, allocation1.getTrimCandidateListPosition(osContextId)); //marked nonresident EXPECT_FALSE(allocation1.getResidencyData().resident[osContextId]); // second stays resident EXPECT_TRUE(allocation2.getResidencyData().resident[osContextId]); } TEST_F(WddmResidencyControllerWithGdiAndMemoryManagerTest, givenTripleAllocationWithUsedAndUnusedFragmentsSincePreviousTrimWhenTrimResidencyPeriodicTrimIsCalledThenProperFragmentsAreEvictedAndMarked) { if (memoryManager->isLimitedGPU(0)) { GTEST_SKIP(); } D3DKMT_TRIMNOTIFICATION trimNotification = {0}; trimNotification.Flags.PeriodicTrim = 1; trimNotification.NumBytesToTrim = 0; // 3-fragment Allocation void *ptr = reinterpret_cast(wddm->virtualAllocAddress + 0x1500); auto allocationTriple = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, 2 * MemoryConstants::pageSize}, ptr)); // whole allocation unused since previous trim allocationTriple->getResidencyData().updateCompletionData(0, osContextId); EXPECT_EQ(3u, allocationTriple->fragmentsStorage.fragmentCount); allocationTriple->fragmentsStorage.fragmentStorageData[0].residency->updateCompletionData(0, osContextId); allocationTriple->fragmentsStorage.fragmentStorageData[0].residency->resident[osContextId] = true; // this fragment was used allocationTriple->fragmentsStorage.fragmentStorageData[1].residency->updateCompletionData(11, osContextId); allocationTriple->fragmentsStorage.fragmentStorageData[0].residency->resident[osContextId] = true; allocationTriple->fragmentsStorage.fragmentStorageData[2].residency->updateCompletionData(0, osContextId); allocationTriple->fragmentsStorage.fragmentStorageData[2].residency->resident[osContextId] = true; // Set last periodic fence value *residencyController->getMonitoredFence().cpuAddress = 10; residencyController->updateLastTrimFenceValue(); // Set current fence value to greater value residencyController->getMonitoredFence().currentFenceValue = 20; wddm->evictResult.called = 0; residencyController->addToTrimCandidateList(allocationTriple); residencyController->trimResidency(trimNotification.Flags, trimNotification.NumBytesToTrim); // 2 fragments evicted with one call EXPECT_EQ(1u, wddm->evictResult.called); // marked nonresident EXPECT_FALSE(allocationTriple->fragmentsStorage.fragmentStorageData[0].residency->resident[osContextId]); EXPECT_FALSE(allocationTriple->fragmentsStorage.fragmentStorageData[2].residency->resident[osContextId]); memoryManager->freeGraphicsMemory(allocationTriple); } TEST_F(WddmResidencyControllerWithGdiTest, givenPeriodicTrimWhenTrimCallbackCalledThenLastPeriodicTrimFenceIsSetToCurrentFenceValue) { D3DKMT_TRIMNOTIFICATION trimNotification = {0}; trimNotification.Flags.PeriodicTrim = 1; trimNotification.NumBytesToTrim = 0; // Set last periodic fence value residencyController->lastTrimFenceValue = 10; // Set current fence value to greater value *residencyController->getMonitoredFence().cpuAddress = 20; residencyController->trimResidency(trimNotification.Flags, trimNotification.NumBytesToTrim); EXPECT_EQ(20u, residencyController->lastTrimFenceValue); } TEST_F(WddmResidencyControllerWithGdiTest, givenRestartPeriodicTrimWhenTrimCallbackCalledThenLastPeriodicTrimFenceIsSetToCurrentFenceValue) { D3DKMT_TRIMNOTIFICATION trimNotification = {0}; trimNotification.Flags.RestartPeriodicTrim = 1; trimNotification.NumBytesToTrim = 0; // Set last periodic fence value residencyController->lastTrimFenceValue = 10; // Set current fence value to greater value *residencyController->getMonitoredFence().cpuAddress = 20; residencyController->trimResidency(trimNotification.Flags, trimNotification.NumBytesToTrim); EXPECT_EQ(20u, residencyController->lastTrimFenceValue); } TEST_F(WddmResidencyControllerWithGdiTest, GivenZeroWhenTrimmingToBudgetThenTrueIsReturned) { bool status = residencyController->trimResidencyToBudget(0); EXPECT_TRUE(status); } TEST_F(WddmResidencyControllerWithGdiTest, WhenTrimmingToBudgetThenAllDoneAllocationsAreTrimmed) { gdi->setNonZeroNumBytesToTrimInEvict(); MockWddmAllocation allocation1(rootDeviceEnvironment->getGmmClientContext()); MockWddmAllocation allocation2(rootDeviceEnvironment->getGmmClientContext()); MockWddmAllocation allocation3(rootDeviceEnvironment->getGmmClientContext()); allocation1.getResidencyData().resident[osContextId] = true; allocation1.getResidencyData().updateCompletionData(0, osContextId); allocation2.getResidencyData().updateCompletionData(1, osContextId); allocation2.getResidencyData().resident[osContextId] = true; allocation3.getResidencyData().updateCompletionData(2, osContextId); allocation3.getResidencyData().resident[osContextId] = true; *residencyController->getMonitoredFence().cpuAddress = 1; residencyController->getMonitoredFence().lastSubmittedFence = 1; residencyController->getMonitoredFence().currentFenceValue = 1; wddm->evictResult.called = 0; residencyController->addToTrimCandidateList(&allocation1); residencyController->addToTrimCandidateList(&allocation2); residencyController->addToTrimCandidateList(&allocation3); residencyController->trimResidencyToBudget(3 * 4096); EXPECT_EQ(2u, wddm->evictResult.called); EXPECT_EQ(1u, residencyController->peekTrimCandidatesCount()); residencyController->compactTrimCandidateList(); EXPECT_EQ(1u, residencyController->peekTrimCandidateList().size()); EXPECT_EQ(trimListUnusedPosition, allocation1.getTrimCandidateListPosition(osContextId)); EXPECT_EQ(trimListUnusedPosition, allocation2.getTrimCandidateListPosition(osContextId)); EXPECT_NE(trimListUnusedPosition, allocation3.getTrimCandidateListPosition(osContextId)); } TEST_F(WddmResidencyControllerWithGdiTest, GivenNumBytesToTrimIsNotZeroWhenTrimmingToBudgetThenFalseIsReturned) { gdi->setNonZeroNumBytesToTrimInEvict(); MockWddmAllocation allocation1(rootDeviceEnvironment->getGmmClientContext()); allocation1.getResidencyData().resident[osContextId] = true; allocation1.getResidencyData().updateCompletionData(0, osContextId); *residencyController->getMonitoredFence().cpuAddress = 1; residencyController->getMonitoredFence().lastSubmittedFence = 1; wddm->evictResult.called = 0; residencyController->addToTrimCandidateList(&allocation1); bool status = residencyController->trimResidencyToBudget(3 * 4096); EXPECT_EQ(1u, wddm->evictResult.called); EXPECT_EQ(0u, residencyController->peekTrimCandidateList().size()); EXPECT_FALSE(status); } TEST_F(WddmResidencyControllerWithGdiTest, GivenNumBytesToTrimIsZeroWhenTrimmingToBudgetThenEvictingStops) { WddmAllocation allocation1(0, AllocationType::UNKNOWN, reinterpret_cast(0x1000), 0x1000, nullptr, MemoryPool::MemoryNull, 0u, 1u); WddmAllocation allocation2(0, AllocationType::UNKNOWN, reinterpret_cast(0x1000), 0x3000, nullptr, MemoryPool::MemoryNull, 0u, 1u); WddmAllocation allocation3(0, AllocationType::UNKNOWN, reinterpret_cast(0x1000), 0x1000, nullptr, MemoryPool::MemoryNull, 0u, 1u); allocation1.getResidencyData().resident[osContextId] = true; allocation1.getResidencyData().updateCompletionData(0, osContextId); allocation2.getResidencyData().updateCompletionData(1, osContextId); allocation2.getResidencyData().resident[osContextId] = true; allocation3.getResidencyData().updateCompletionData(2, osContextId); allocation3.getResidencyData().resident[osContextId] = true; *residencyController->getMonitoredFence().cpuAddress = 1; residencyController->getMonitoredFence().lastSubmittedFence = 1; residencyController->getMonitoredFence().currentFenceValue = 1; wddm->evictResult.called = 0; residencyController->addToTrimCandidateList(&allocation1); residencyController->addToTrimCandidateList(&allocation2); residencyController->addToTrimCandidateList(&allocation3); bool status = residencyController->trimResidencyToBudget(3 * 4096); EXPECT_TRUE(status); EXPECT_EQ(2u, wddm->evictResult.called); EXPECT_EQ(1u, residencyController->peekTrimCandidateList().size()); EXPECT_EQ(trimListUnusedPosition, allocation1.getTrimCandidateListPosition(osContextId)); EXPECT_EQ(trimListUnusedPosition, allocation2.getTrimCandidateListPosition(osContextId)); EXPECT_NE(trimListUnusedPosition, allocation3.getTrimCandidateListPosition(osContextId)); } TEST_F(WddmResidencyControllerWithGdiTest, WhenTrimmingToBudgetThenEvictedAllocationIsMarkedNonResident) { gdi->setNonZeroNumBytesToTrimInEvict(); MockWddmAllocation allocation1(rootDeviceEnvironment->getGmmClientContext()); MockWddmAllocation allocation2(rootDeviceEnvironment->getGmmClientContext()); MockWddmAllocation allocation3(rootDeviceEnvironment->getGmmClientContext()); allocation1.getResidencyData().resident[osContextId] = true; allocation1.getResidencyData().updateCompletionData(0, osContextId); allocation2.getResidencyData().updateCompletionData(1, osContextId); allocation2.getResidencyData().resident[osContextId] = true; allocation3.getResidencyData().updateCompletionData(2, osContextId); allocation3.getResidencyData().resident[osContextId] = true; *residencyController->getMonitoredFence().cpuAddress = 1; residencyController->getMonitoredFence().lastSubmittedFence = 1; residencyController->getMonitoredFence().currentFenceValue = 1; wddm->evictResult.called = 0; residencyController->addToTrimCandidateList(&allocation1); residencyController->addToTrimCandidateList(&allocation2); residencyController->addToTrimCandidateList(&allocation3); residencyController->trimResidencyToBudget(3 * 4096); EXPECT_FALSE(allocation1.getResidencyData().resident[osContextId]); EXPECT_FALSE(allocation2.getResidencyData().resident[osContextId]); EXPECT_TRUE(allocation3.getResidencyData().resident[osContextId]); } TEST_F(WddmResidencyControllerWithGdiTest, GivenLastFenceIsGreaterThanMonitoredWhenTrimmingToBudgetThenWaitForCpu) { gdi->setNonZeroNumBytesToTrimInEvict(); MockWddmAllocation allocation1(rootDeviceEnvironment->getGmmClientContext()); allocation1.getResidencyData().resident[osContextId] = true; allocation1.getResidencyData().updateCompletionData(2, osContextId); *residencyController->getMonitoredFence().cpuAddress = 1; residencyController->getMonitoredFence().lastSubmittedFence = 2; residencyController->getMonitoredFence().currentFenceValue = 3; wddm->evictResult.called = 0; wddm->waitFromCpuResult.called = 0; residencyController->addToTrimCandidateList(&allocation1); gdi->getWaitFromCpuArg().hDevice = 0; residencyController->trimResidencyToBudget(3 * 4096); EXPECT_EQ(1u, wddm->evictResult.called); EXPECT_FALSE(allocation1.getResidencyData().resident[osContextId]); EXPECT_EQ(wddm->getDeviceHandle(), gdi->getWaitFromCpuArg().hDevice); } TEST_F(WddmResidencyControllerWithGdiAndMemoryManagerTest, WhenTrimmingToBudgetThenOnlyDoneFragmentsAreEvicted) { if (memoryManager->isLimitedGPU(0)) { GTEST_SKIP(); } gdi->setNonZeroNumBytesToTrimInEvict(); void *ptr = reinterpret_cast(wddm->virtualAllocAddress + 0x1000); WddmAllocation allocation1(0, AllocationType::UNKNOWN, ptr, 0x1000, nullptr, MemoryPool::MemoryNull, 0u, 1u); WddmAllocation allocation2(0, AllocationType::UNKNOWN, ptr, 0x1000, nullptr, MemoryPool::MemoryNull, 0u, 1u); allocation1.getResidencyData().resident[osContextId] = true; allocation1.getResidencyData().updateCompletionData(0, osContextId); allocation2.getResidencyData().updateCompletionData(1, osContextId); allocation2.getResidencyData().resident[osContextId] = true; void *ptrTriple = reinterpret_cast(reinterpret_cast(ptr) + 0x500); WddmAllocation *allocationTriple = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, 2 * MemoryConstants::pageSize}, ptrTriple)); allocationTriple->getResidencyData().updateCompletionData(1, osContextId); allocationTriple->getResidencyData().resident[osContextId] = true; EXPECT_EQ(3u, allocationTriple->fragmentsStorage.fragmentCount); for (uint32_t i = 0; i < 3; i++) { allocationTriple->fragmentsStorage.fragmentStorageData[i].residency->updateCompletionData(1, osContextId); allocationTriple->fragmentsStorage.fragmentStorageData[i].residency->resident[osContextId] = true; } // This should not be evicted allocationTriple->fragmentsStorage.fragmentStorageData[1].residency->updateCompletionData(2, osContextId); residencyController->addToTrimCandidateList(&allocation1); residencyController->addToTrimCandidateList(allocationTriple); residencyController->addToTrimCandidateList(&allocation2); *residencyController->getMonitoredFence().cpuAddress = 1; residencyController->getMonitoredFence().lastSubmittedFence = 1; residencyController->getMonitoredFence().currentFenceValue = 2; wddm->evictResult.called = 0; residencyController->trimResidencyToBudget(3 * 4096); EXPECT_EQ(2u, wddm->evictResult.called); EXPECT_FALSE(allocationTriple->fragmentsStorage.fragmentStorageData[0].residency->resident[osContextId]); EXPECT_TRUE(allocationTriple->fragmentsStorage.fragmentStorageData[1].residency->resident[osContextId]); EXPECT_FALSE(allocationTriple->fragmentsStorage.fragmentStorageData[2].residency->resident[osContextId]); memoryManager->freeGraphicsMemory(allocationTriple); } TEST_F(WddmResidencyControllerWithGdiTest, givenThreeAllocationsAlignedSizeBiggerThanAllocSizeWhenBudgetEqualTwoAlignedAllocationThenEvictOnlyTwo) { gdi->setNonZeroNumBytesToTrimInEvict(); size_t underlyingSize = 0xF00; size_t alignedSize = 0x1000; size_t budget = 2 * alignedSize; //trim budget should consider aligned size, not underlying, so if function considers underlying, it should evict three, not two EXPECT_GT((3 * underlyingSize), budget); EXPECT_LT((2 * underlyingSize), budget); void *ptr1 = reinterpret_cast(wddm->virtualAllocAddress + 0x1000); void *ptr2 = reinterpret_cast(wddm->virtualAllocAddress + 0x3000); void *ptr3 = reinterpret_cast(wddm->virtualAllocAddress + 0x5000); WddmAllocation allocation1(0, AllocationType::UNKNOWN, ptr1, underlyingSize, nullptr, MemoryPool::MemoryNull, 0u, 1u); WddmAllocation allocation2(0, AllocationType::UNKNOWN, ptr2, underlyingSize, nullptr, MemoryPool::MemoryNull, 0u, 1u); WddmAllocation allocation3(0, AllocationType::UNKNOWN, ptr3, underlyingSize, nullptr, MemoryPool::MemoryNull, 0u, 1u); allocation1.getResidencyData().resident[osContextId] = true; allocation1.getResidencyData().updateCompletionData(0, osContextId); allocation2.getResidencyData().updateCompletionData(1, osContextId); allocation2.getResidencyData().resident[osContextId] = true; allocation3.getResidencyData().updateCompletionData(1, osContextId); allocation3.getResidencyData().resident[osContextId] = true; *residencyController->getMonitoredFence().cpuAddress = 1; residencyController->getMonitoredFence().lastSubmittedFence = 1; residencyController->getMonitoredFence().currentFenceValue = 1; wddm->evictResult.called = 0; residencyController->addToTrimCandidateList(&allocation1); residencyController->addToTrimCandidateList(&allocation2); residencyController->addToTrimCandidateList(&allocation3); bool status = residencyController->trimResidencyToBudget(budget); EXPECT_TRUE(status); EXPECT_FALSE(allocation1.getResidencyData().resident[osContextId]); EXPECT_FALSE(allocation2.getResidencyData().resident[osContextId]); EXPECT_TRUE(allocation3.getResidencyData().resident[osContextId]); } using WddmResidencyControllerLockTest = WddmResidencyControllerWithGdiTest; TEST_F(WddmResidencyControllerLockTest, givenPeriodicTrimWhenTrimmingResidencyThenLockOnce) { D3DKMT_TRIMNOTIFICATION trimNotification = {0}; trimNotification.Flags.PeriodicTrim = 1; trimNotification.NumBytesToTrim = 0; residencyController->trimResidency(trimNotification.Flags, trimNotification.NumBytesToTrim); EXPECT_EQ(1, residencyController->acquireLockCallCount); } TEST_F(WddmResidencyControllerLockTest, givenTrimToBudgetWhenTrimmingResidencyThenLockOnce) { D3DKMT_TRIMNOTIFICATION trimNotification = {0}; trimNotification.Flags.TrimToBudget = 1; trimNotification.NumBytesToTrim = 0; residencyController->trimResidency(trimNotification.Flags, trimNotification.NumBytesToTrim); EXPECT_EQ(1, residencyController->acquireLockCallCount); } TEST_F(WddmResidencyControllerLockTest, givenPeriodicTrimAndTrimToBudgetWhenTrimmingResidencyThenLockTwice) { D3DKMT_TRIMNOTIFICATION trimNotification = {0}; trimNotification.Flags.PeriodicTrim = 1; trimNotification.Flags.TrimToBudget = 1; trimNotification.NumBytesToTrim = 0; residencyController->trimResidency(trimNotification.Flags, trimNotification.NumBytesToTrim); EXPECT_EQ(2, residencyController->acquireLockCallCount); } TEST_F(WddmResidencyControllerWithGdiAndMemoryManagerTest, WhenMakingResidentResidencyAllocationsThenAllAllocationsAreMarked) { MockWddmAllocation allocation1(gmmClientContext); MockWddmAllocation allocation2(gmmClientContext); MockWddmAllocation allocation3(gmmClientContext); MockWddmAllocation allocation4(gmmClientContext); ResidencyContainer residencyPack{&allocation1, &allocation2, &allocation3, &allocation4}; residencyController->makeResidentResidencyAllocations(residencyPack); EXPECT_TRUE(allocation1.getResidencyData().resident[osContextId]); EXPECT_TRUE(allocation2.getResidencyData().resident[osContextId]); EXPECT_TRUE(allocation3.getResidencyData().resident[osContextId]); EXPECT_TRUE(allocation4.getResidencyData().resident[osContextId]); } TEST_F(WddmResidencyControllerWithGdiAndMemoryManagerTest, WhenMakingResidentResidencyAllocationsThenLastFenceIsUpdated) { MockWddmAllocation allocation1(gmmClientContext); MockWddmAllocation allocation2(gmmClientContext); MockWddmAllocation allocation3(gmmClientContext); MockWddmAllocation allocation4(gmmClientContext); ResidencyContainer residencyPack{&allocation1, &allocation2, &allocation3, &allocation4}; residencyController->getMonitoredFence().currentFenceValue = 20; residencyController->makeResidentResidencyAllocations(residencyPack); EXPECT_EQ(20u, allocation1.getResidencyData().getFenceValueForContextId(osContext->getContextId())); EXPECT_EQ(20u, allocation2.getResidencyData().getFenceValueForContextId(osContext->getContextId())); EXPECT_EQ(20u, allocation3.getResidencyData().getFenceValueForContextId(osContext->getContextId())); EXPECT_EQ(20u, allocation4.getResidencyData().getFenceValueForContextId(osContext->getContextId())); } TEST_F(WddmResidencyControllerWithGdiAndMemoryManagerTest, GivenTripleAllocationsWhenMakingResidentResidencyAllocationsThenAllAllocationsAreMarkedResident) { if (executionEnvironment->memoryManager.get()->isLimitedGPU(0)) { GTEST_SKIP(); } MockWddmAllocation allocation1(gmmClientContext); MockWddmAllocation allocation2(gmmClientContext); void *ptr = reinterpret_cast(wddm->virtualAllocAddress + 0x1500); wddm->callBaseMakeResident = true; WddmAllocation *allocationTriple = (WddmAllocation *)memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, 2 * MemoryConstants::pageSize}, ptr); ResidencyContainer residencyPack{&allocation1, allocationTriple, &allocation2}; residencyController->makeResidentResidencyAllocations(residencyPack); for (uint32_t i = 0; i < allocationTriple->fragmentsStorage.fragmentCount; i++) { EXPECT_TRUE(allocationTriple->fragmentsStorage.fragmentStorageData[i].residency->resident[osContextId]); } EXPECT_EQ(EngineLimits::maxHandleCount + 3 + EngineLimits::maxHandleCount, gdi->getMakeResidentArg().NumAllocations); memoryManager->freeGraphicsMemory(allocationTriple); } TEST_F(WddmResidencyControllerWithGdiAndMemoryManagerTest, GivenTripleAllocationsWhenMakingResidentResidencyAllocationsThenLastFencePlusOneIsSet) { MockWddmAllocation allocation1(gmmClientContext); MockWddmAllocation allocation2(gmmClientContext); WddmAllocation *allocationTriple = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, 2 * MemoryConstants::pageSize}, reinterpret_cast(0x1500))); residencyController->getMonitoredFence().currentFenceValue = 20; ResidencyContainer residencyPack{&allocation1, allocationTriple, &allocation2}; residencyController->makeResidentResidencyAllocations(residencyPack); for (uint32_t i = 0; i < allocationTriple->fragmentsStorage.fragmentCount; i++) { EXPECT_EQ(20u, allocationTriple->fragmentsStorage.fragmentStorageData[i].residency->getFenceValueForContextId(0)); } memoryManager->freeGraphicsMemory(allocationTriple); } TEST_F(WddmResidencyControllerWithMockWddmTest, givenMakeResidentFailsWhenCallingMakeResidentResidencyAllocationsThenDontMarkAllocationsAsResident) { MockWddmAllocation allocation1(gmmClientContext); MockWddmAllocation allocation2(gmmClientContext); MockWddmAllocation allocation3(gmmClientContext); MockWddmAllocation allocation4(gmmClientContext); wddm->makeResidentNumberOfBytesToTrim = 4 * 4096; wddm->makeResidentStatus = false; ResidencyContainer residencyPack{&allocation1, &allocation2, &allocation3, &allocation4}; bool result = residencyController->makeResidentResidencyAllocations(residencyPack); EXPECT_FALSE(result); EXPECT_FALSE(allocation1.getResidencyData().resident[osContextId]); EXPECT_FALSE(allocation2.getResidencyData().resident[osContextId]); EXPECT_FALSE(allocation3.getResidencyData().resident[osContextId]); EXPECT_FALSE(allocation4.getResidencyData().resident[osContextId]); EXPECT_EQ(2u, wddm->makeResidentResult.called); } TEST_F(WddmResidencyControllerWithMockWddmTest, givenMakeResidentFailsWhenCallingMakeResidentResidencyAllocationsThenDontMarkTripleAllocationsAsResident) { MockWddmAllocation allocation1(gmmClientContext); MockWddmAllocation allocation2(gmmClientContext); wddm->callBaseCreateAllocationsAndMapGpuVa = true; void *ptr = reinterpret_cast(wddm->getWddmMinAddress() + 0x1500); WddmAllocation *allocationTriple = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, 2 * MemoryConstants::pageSize}, ptr)); ASSERT_NE(nullptr, allocationTriple); wddm->makeResidentNumberOfBytesToTrim = 4 * 4096; wddm->makeResidentStatus = false; ResidencyContainer residencyPack{&allocation1, allocationTriple, &allocation2}; bool result = residencyController->makeResidentResidencyAllocations(residencyPack); EXPECT_FALSE(result); for (uint32_t i = 0; i < allocationTriple->fragmentsStorage.fragmentCount; i++) { EXPECT_FALSE(allocationTriple->fragmentsStorage.fragmentStorageData[i].residency->resident[osContextId]); } memoryManager->freeGraphicsMemory(allocationTriple); EXPECT_EQ(2u, wddm->makeResidentResult.called); } TEST_F(WddmResidencyControllerWithMockWddmTest, givenMakeResidentFailsWhenCallingMakeResidentResidencyAllocationsThenCallItAgainWithCantTrimFurtherSetToTrue) { MockWddmAllocation allocation1(gmmClientContext); wddm->makeResidentNumberOfBytesToTrim = 4 * 4096; wddm->makeResidentStatus = false; ResidencyContainer residencyPack{&allocation1}; bool result = residencyController->makeResidentResidencyAllocations(residencyPack); EXPECT_FALSE(result); EXPECT_NE(wddm->makeResidentParamsPassed[0].cantTrimFurther, wddm->makeResidentParamsPassed[1].cantTrimFurther); EXPECT_EQ(2u, wddm->makeResidentResult.called); } TEST_F(WddmResidencyControllerWithMockWddmTest, givenAllocationPackPassedWhenCallingMakeResidentResidencyAllocationsThenItIsUsed) { MockWddmAllocation allocation1(gmmClientContext); MockWddmAllocation allocation2(gmmClientContext); allocation1.handle = 1; allocation2.handle = 2; ResidencyContainer residencyPack{&allocation1, &allocation2}; bool result = residencyController->makeResidentResidencyAllocations(residencyPack); EXPECT_TRUE(result); EXPECT_EQ(2 * EngineLimits::maxHandleCount, wddm->makeResidentResult.handleCount); EXPECT_EQ(false, wddm->makeResidentResult.cantTrimFurther); EXPECT_EQ(1, wddm->makeResidentResult.handlePack[0 * EngineLimits::maxHandleCount]); EXPECT_EQ(2, wddm->makeResidentResult.handlePack[1 * EngineLimits::maxHandleCount]); EXPECT_EQ(1u, wddm->makeResidentResult.called); } TEST_F(WddmResidencyControllerWithMockWddmTest, givenMakeResidentFailsAndTrimToBudgetSuceedsWhenCallingMakeResidentResidencyAllocationsThenSucceed) { MockWddmAllocation allocation1(gmmClientContext); void *cpuPtr = reinterpret_cast(wddm->getWddmMinAddress() + 0x1000); size_t allocationSize = 0x1000; WddmAllocation allocationToTrim(0, AllocationType::UNKNOWN, cpuPtr, allocationSize, nullptr, MemoryPool::MemoryNull, 0u, 1u); allocationToTrim.getResidencyData().updateCompletionData(residencyController->getMonitoredFence().lastSubmittedFence, osContext->getContextId()); wddm->makeResidentNumberOfBytesToTrim = allocationSize; wddm->makeResidentResults = {false, true}; residencyController->addToTrimCandidateList(&allocationToTrim); ResidencyContainer residencyPack{&allocation1}; bool result = residencyController->makeResidentResidencyAllocations(residencyPack); EXPECT_TRUE(result); EXPECT_TRUE(allocation1.getResidencyData().resident[osContextId]); EXPECT_EQ(2u, wddm->makeResidentResult.called); } TEST_F(WddmResidencyControllerWithMockWddmTest, givenMakeResidentFailsWhenCallingMakeResidentResidencyAllocationsThenMemoryBudgetExhaustedIsSetToTrue) { MockWddmAllocation allocation1(gmmClientContext); ResidencyContainer residencyPack{&allocation1}; wddm->makeResidentResults = {false, true}; residencyController->makeResidentResidencyAllocations(residencyPack); EXPECT_TRUE(residencyController->isMemoryBudgetExhausted()); EXPECT_EQ(2u, wddm->makeResidentResult.called); } wddm_residency_handler_tests.cpp000066400000000000000000000154661422164147700343420ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/os_interface/windows/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/windows/wddm_memory_operations_handler.h" #include "shared/source/utilities/stackvec.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/ult_hw_config.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_wddm.h" #include "shared/test/common/os_interface/windows/wddm_fixture.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/mem_obj/mem_obj.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/os_interface/windows/mock_wddm_allocation.h" using namespace NEO; struct WddmMemoryOperationsHandlerTest : public WddmTest { void SetUp() override { WddmTest::SetUp(); wddmMemoryOperationsHandler = std::make_unique(wddm); wddmAllocation = std::make_unique(rootDeviceEnvironment->getGmmClientContext()); wddmFragmentedAllocation = std::make_unique(rootDeviceEnvironment->getGmmClientContext()); wddmAllocation->handle = 0x2u; osHandleStorageFirst = std::make_unique(); osHandleStorageSecond = std::make_unique(); wddmFragmentedAllocation->fragmentsStorage.fragmentCount = 2; wddmFragmentedAllocation->fragmentsStorage.fragmentStorageData[0].osHandleStorage = osHandleStorageFirst.get(); static_cast(wddmFragmentedAllocation->fragmentsStorage.fragmentStorageData[0].osHandleStorage)->handle = 0x3u; wddmFragmentedAllocation->fragmentsStorage.fragmentStorageData[1].osHandleStorage = osHandleStorageSecond.get(); static_cast(wddmFragmentedAllocation->fragmentsStorage.fragmentStorageData[1].osHandleStorage)->handle = 0x4u; allocationPtr = wddmAllocation.get(); allocationData.push_back(wddmAllocation.get()); allocationData.push_back(wddmFragmentedAllocation.get()); } std::unique_ptr wddmMemoryOperationsHandler; std::unique_ptr wddmAllocation; std::unique_ptr wddmFragmentedAllocation; std::unique_ptr osHandleStorageFirst; std::unique_ptr osHandleStorageSecond; GraphicsAllocation *allocationPtr; StackVec allocationData; }; TEST_F(WddmMemoryOperationsHandlerTest, givenRegularAllocationWhenMakingResidentAllocationThenMakeResidentCalled) { EXPECT_EQ(wddmMemoryOperationsHandler->makeResident(nullptr, ArrayRef(&allocationPtr, 1)), MemoryOperationsStatus::SUCCESS); EXPECT_EQ(wddmMemoryOperationsHandler->isResident(nullptr, *wddmAllocation), MemoryOperationsStatus::SUCCESS); } TEST_F(WddmMemoryOperationsHandlerTest, givenFragmentedAllocationWhenMakingResidentAllocationThenMakeResidentCalled) { allocationPtr = wddmFragmentedAllocation.get(); EXPECT_EQ(wddmMemoryOperationsHandler->makeResident(nullptr, ArrayRef(&allocationPtr, 1)), MemoryOperationsStatus::SUCCESS); EXPECT_EQ(wddmMemoryOperationsHandler->isResident(nullptr, *wddmFragmentedAllocation), MemoryOperationsStatus::SUCCESS); } TEST_F(WddmMemoryOperationsHandlerTest, givenVariousAllocationsWhenMakingResidentAllocationThenMakeResidentCalled) { EXPECT_EQ(wddmMemoryOperationsHandler->makeResident(nullptr, ArrayRef(allocationData)), MemoryOperationsStatus::SUCCESS); EXPECT_EQ(wddmMemoryOperationsHandler->isResident(nullptr, *wddmAllocation), MemoryOperationsStatus::SUCCESS); EXPECT_EQ(wddmMemoryOperationsHandler->isResident(nullptr, *wddmFragmentedAllocation), MemoryOperationsStatus::SUCCESS); } TEST_F(WddmMemoryOperationsHandlerTest, givenRegularAllocationWhenEvictingResidentAllocationThenEvictCalled) { EXPECT_EQ(wddmMemoryOperationsHandler->makeResident(nullptr, ArrayRef(&allocationPtr, 1)), MemoryOperationsStatus::SUCCESS); EXPECT_EQ(wddmMemoryOperationsHandler->evict(nullptr, *wddmAllocation), MemoryOperationsStatus::SUCCESS); EXPECT_EQ(wddmMemoryOperationsHandler->isResident(nullptr, *wddmAllocation), MemoryOperationsStatus::MEMORY_NOT_FOUND); } TEST_F(WddmMemoryOperationsHandlerTest, givenFragmentedAllocationWhenEvictingResidentAllocationThenEvictCalled) { allocationPtr = wddmFragmentedAllocation.get(); EXPECT_EQ(wddmMemoryOperationsHandler->makeResident(nullptr, ArrayRef(&allocationPtr, 1)), MemoryOperationsStatus::SUCCESS); EXPECT_EQ(wddmMemoryOperationsHandler->evict(nullptr, *wddmFragmentedAllocation), MemoryOperationsStatus::SUCCESS); EXPECT_EQ(wddmMemoryOperationsHandler->isResident(nullptr, *wddmFragmentedAllocation), MemoryOperationsStatus::MEMORY_NOT_FOUND); } TEST_F(WddmMemoryOperationsHandlerTest, givenVariousAllocationsWhenEvictingResidentAllocationThenEvictCalled) { EXPECT_EQ(wddmMemoryOperationsHandler->makeResident(nullptr, ArrayRef(allocationData)), MemoryOperationsStatus::SUCCESS); EXPECT_EQ(wddmMemoryOperationsHandler->evict(nullptr, *wddmAllocation), MemoryOperationsStatus::SUCCESS); EXPECT_EQ(wddmMemoryOperationsHandler->isResident(nullptr, *wddmAllocation), MemoryOperationsStatus::MEMORY_NOT_FOUND); EXPECT_EQ(wddmMemoryOperationsHandler->evict(nullptr, *wddmFragmentedAllocation), MemoryOperationsStatus::SUCCESS); EXPECT_EQ(wddmMemoryOperationsHandler->isResident(nullptr, *wddmFragmentedAllocation), MemoryOperationsStatus::MEMORY_NOT_FOUND); } TEST(WddmResidentBufferTests, whenBuffersIsCreatedWithMakeResidentFlagSetThenItIsMadeResidentUponCreation) { VariableBackup backup(&ultHwConfig); ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false; ultHwConfig.forceOsAgnosticMemoryManager = false; DebugManagerStateRestore restorer; DebugManager.flags.MakeAllBuffersResident.set(true); initPlatform(); auto device = platform()->getClDevice(0u); MockContext context(device, false); auto retValue = CL_SUCCESS; auto clBuffer = clCreateBuffer(&context, 0u, 4096u, nullptr, &retValue); ASSERT_EQ(retValue, CL_SUCCESS); auto memoryOperationsHandler = device->getRootDeviceEnvironment().memoryOperationsInterface.get(); auto neoBuffer = castToObject(clBuffer); auto bufferAllocation = neoBuffer->getGraphicsAllocation(device->getRootDeviceIndex()); auto status = memoryOperationsHandler->isResident(nullptr, *bufferAllocation); EXPECT_EQ(status, MemoryOperationsStatus::SUCCESS); clReleaseMemObject(clBuffer); } compute-runtime-22.14.22890/opencl/test/unit_test/perf_tests/000077500000000000000000000000001422164147700240055ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/perf_tests/CMakeLists.txt000066400000000000000000000010031422164147700265370ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # cmake_minimum_required(VERSION 3.2.0 FATAL_ERROR) add_subdirectory(api) add_subdirectory(fixtures) # Setting up our local list of test files set(IGDRCL_SRCS_performance_tests ${IGDRCL_SRCS_perf_tests_api} ${IGDRCL_SRCS_perf_tests_fixtures} "${CMAKE_CURRENT_SOURCE_DIR}/options_perf_tests.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/perf_test_utils.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/perf_test_utils.h" PARENT_SCOPE ) compute-runtime-22.14.22890/opencl/test/unit_test/perf_tests/api/000077500000000000000000000000001422164147700245565ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/perf_tests/api/CMakeLists.txt000066400000000000000000000005071422164147700273200ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_perf_tests_api "${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt" "${CMAKE_CURRENT_SOURCE_DIR}/api_tests.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/api_tests.h" "${CMAKE_CURRENT_SOURCE_DIR}/context_tests.cpp" PARENT_SCOPE ) compute-runtime-22.14.22890/opencl/test/unit_test/perf_tests/api/api_tests.cpp000066400000000000000000000020731422164147700272570ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/compiler_interface/compiler_interface.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/platform/platform.h" #include "cl_api_tests.h" namespace NEO { api_fixture::api_fixture() : retVal(CL_SUCCESS), retSize(0), pContext(nullptr), pKernel(nullptr), pProgram(nullptr) { } void api_fixture::SetUp() { setReferenceTime(); PlatformFixture::SetUp(numPlatformDevices, platformDevices); DeviceFixture::SetUp(); ASSERT_NE(nullptr, pDevice); auto pDevice = pPlatform->getDevice(0); ASSERT_NE(nullptr, pDevice); cl_device_id clDevice = pDevice; pContext = Context::create(nullptr, DeviceVector(&clDevice, 1), nullptr, nullptr, retVal); CommandQueueHwFixture::SetUp(pDevice, pContext); } void api_fixture::TearDown() { delete pKernel; delete pContext; delete pProgram; CommandQueueHwFixture::TearDown(); DeviceFixture::TearDown(); PlatformFixture::TearDown(); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/perf_tests/api/api_tests.h000066400000000000000000000022541422164147700267250ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/program/program.h" #include "opencl/test/unit_test/perf_tests/fixtures/command_queue_fixture.h" #include "opencl/test/unit_test/perf_tests/fixtures/device_fixture.h" #include "opencl/test/unit_test/perf_tests/fixtures/platform_fixture.h" #include "opencl/test/unit_test/perf_tests/perf_test_utils.h" #include "gtest/gtest.h" namespace NEO { struct api_fixture : public PlatformFixture, public CommandQueueHwFixture, public DeviceFixture { public: api_fixture(void); protected: virtual void SetUp(); virtual void TearDown(); cl_int retVal; size_t retSize; CommandQueue *pCommandQueue; Context *pContext; Kernel *pKernel; Program *pProgram; }; struct api_tests : public api_fixture, public ::testing::Test { void SetUp() override { api_fixture::SetUp(); } void TearDown() override { api_fixture::TearDown(); } }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/perf_tests/api/context_tests.cpp000066400000000000000000000070701422164147700301740ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/compiler_interface/compiler_interface.h" #include "opencl/source/helpers/file_io.h" #include "opencl/test/unit_test/helpers/memory_management.h" #include "opencl/test/unit_test/helpers/test_files.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests ContextTest; namespace ULT { // multiplier of reference ratio that is compared ( checked if less than ) with current result const double multiplier = 1.5000; // ratio results that are not checked be EXPECT ( very short time tests are not chceked due to high fluctuations ) const double ratioThreshold = 0.005; //------------------------------------------------------------------------------ // clCreateContext //------------------------------------------------------------------------------ TEST_F(ContextTest, WhenCreatingContextThenTimeIsLowerThanReference) { double previousRatio = -1.0; uint64_t hash = getHash(__FUNCTION__, strlen(__FUNCTION__)); bool success = getTestRatio(hash, previousRatio); long long times[3] = {0, 0, 0}; for (int i = 0; i < 3; i++) { Timer t; t.start(); auto context = clCreateContext(nullptr, num_devices, devices, nullptr, nullptr, &retVal); t.end(); times[i] = t.get(); ((Context *)context)->release(); } long long time = majorityVote(times[0], times[1], times[2]); double ratio = static_cast(time) / static_cast(refTime); if (success && previousRatio > ratioThreshold) { EXPECT_TRUE(isLowerThanReference(ratio, previousRatio, multiplier)) << "Current: " << ratio << " previous: " << previousRatio << "\n"; } updateTestRatio(hash, ratio); } TEST_F(ContextTest, WhenReleasingContextThenTimeIsLowerThanReference) { double previousRatio = -1.0; uint64_t hash = getHash(__FUNCTION__, strlen(__FUNCTION__)); bool success = getTestRatio(hash, previousRatio); long long times[3] = {0, 0, 0}; cl_context contexts[3]; cl_device_id clDevice = pDevice; for (int i = 0; i < 3; i++) { contexts[i] = Context::create(nullptr, DeviceVector(&clDevice, 1), nullptr, nullptr, retVal); Timer t; t.start(); auto retVal = clReleaseContext(contexts[i]); t.end(); times[i] = t.get(); } long long time = majorityVote(times[0], times[1], times[2]); double ratio = static_cast(time) / static_cast(refTime); if (success && previousRatio > ratioThreshold) { EXPECT_TRUE(isLowerThanReference(ratio, previousRatio, multiplier)) << "Current: " << ratio << " previous: " << previousRatio << "\n"; } updateTestRatio(hash, ratio); } TEST_F(ContextTest, WhenRetainingContextThenTimeIsLowerThanReference) { double previousRatio = -1.0; uint64_t hash = getHash(__FUNCTION__, strlen(__FUNCTION__)); bool success = getTestRatio(hash, previousRatio); long long times[3] = {0, 0, 0}; for (int i = 0; i < 3; i++) { Timer t; t.start(); auto retVal = clRetainContext(pContext); t.end(); times[i] = t.get(); pContext->release(); } long long time = majorityVote(times[0], times[1], times[2]); double ratio = static_cast(time) / static_cast(refTime); if (success && previousRatio > ratioThreshold) { EXPECT_TRUE(isLowerThanReference(ratio, previousRatio, multiplier)) << "Current: " << ratio << " previous: " << previousRatio << "\n"; } updateTestRatio(hash, ratio); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/perf_tests/fixtures/000077500000000000000000000000001422164147700256565ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/perf_tests/fixtures/CMakeLists.txt000066400000000000000000000010041422164147700304110ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_perf_tests_fixtures "${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt" "${CMAKE_CURRENT_SOURCE_DIR}/device_fixture.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/device_fixture.h" "${CMAKE_CURRENT_SOURCE_DIR}/command_queue_fixture.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/command_queue_fixture.h" "${CMAKE_CURRENT_SOURCE_DIR}/platform_fixture.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/platform_fixture.h" PARENT_SCOPE ) compute-runtime-22.14.22890/opencl/test/unit_test/perf_tests/fixtures/command_queue_fixture.cpp000066400000000000000000000034211422164147700327520ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/perf_tests/fixtures/command_queue_fixture.h" #include "shared/source/device/device.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/context/context.h" #include "gtest/gtest.h" namespace NEO { // Global table of create functions extern CommandQueueCreateFunc commandQueueFactory[IGFX_MAX_CORE]; CommandQueueHwFixture::CommandQueueHwFixture() : pCmdQ(nullptr) { } CommandQueue *CommandQueueHwFixture::createCommandQueue( Context *context, Device *pDevice, cl_command_queue_properties properties) { auto funcCreate = commandQueueFactory[pDevice->getHardwareInfo().platform->eRenderCoreFamily]; assert(nullptr != funcCreate); return funcCreate(context, pDevice, properties); } void CommandQueueHwFixture::SetUp() { ASSERT_NE(nullptr, pCmdQ); } void CommandQueueHwFixture::SetUp( Device *pDevice, Context *context) { ASSERT_NE(nullptr, pDevice); pCmdQ = createCommandQueue(context, pDevice); CommandQueueHwFixture::SetUp(); } void CommandQueueHwFixture::TearDown() { delete pCmdQ; } CommandQueueFixture::CommandQueueFixture() : pCmdQ(nullptr) { } CommandQueue *CommandQueueFixture::createCommandQueue( Context *context, Device *device, cl_command_queue_properties properties) { return new CommandQueue( context, device, properties); } void CommandQueueFixture::SetUp( Context *context, Device *device, cl_command_queue_properties properties) { pCmdQ = createCommandQueue( context, device, properties); } void CommandQueueFixture::TearDown() { delete pCmdQ; pCmdQ = nullptr; } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/perf_tests/fixtures/command_queue_fixture.h000066400000000000000000000021401422164147700324140ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/command_queue/command_queue.h" #include "CL/cl.h" #include "gtest/gtest.h" namespace NEO { class Context; class Device; struct CommandQueueHwFixture { CommandQueueHwFixture(); CommandQueue *createCommandQueue( Context *context, Device *device, cl_command_queue_properties _properties = 0); virtual void SetUp(); virtual void SetUp(Device *_pDevice, Context *context); virtual void TearDown(); CommandQueue *pCmdQ; }; struct CommandQueueFixture { CommandQueueFixture(); virtual void SetUp( Context *context, Device *device, cl_command_queue_properties properties = 0); virtual void TearDown(); CommandQueue *createCommandQueue( Context *context, Device *device, cl_command_queue_properties properties); CommandQueue *pCmdQ; }; static const cl_command_queue_properties DefaultCommandQueueProperties[] = { 0, CL_QUEUE_PROFILING_ENABLE, }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/perf_tests/fixtures/device_fixture.cpp000066400000000000000000000012021422164147700313620ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "device_fixture.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "gtest/gtest.h" using NEO::Device; using NEO::HardwareInfo; using NEO::platformDevices; void DeviceFixture::SetUp() { pDevice = ClDeviceHelper<>::create(); ASSERT_NE(nullptr, pDevice); auto &commandStreamReceiver = pDevice->getGpgpuCommandStreamReceiver(); pTagMemory = commandStreamReceiver.getTagAddress(); ASSERT_NE(nullptr, const_cast(pTagMemory)); } void DeviceFixture::TearDown() { delete pDevice; } compute-runtime-22.14.22890/opencl/test/unit_test/perf_tests/fixtures/device_fixture.h000066400000000000000000000016151422164147700310370ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/device/device.h" #include namespace NEO { struct HardwareInfo; extern const HardwareInfo **platformDevices; } // namespace NEO // Even though there aren't any defaults, this pattern is used // throughout testing. Included here for consistency. struct DeviceDefaults { }; template struct ClDeviceHelper { static NEO::Device *create(const NEO::HardwareInfo *hardwareInfo = nullptr) { auto device = NEO::Device::create(hardwareInfo); assert(device != nullptr); return device; } }; struct DeviceFixture { DeviceFixture() : pDevice(nullptr), pTagMemory(nullptr) { } void SetUp(); void TearDown(); NEO::Device *pDevice; volatile uint32_t *pTagMemory; }; compute-runtime-22.14.22890/opencl/test/unit_test/perf_tests/fixtures/platform_fixture.cpp000066400000000000000000000022271422164147700317570ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "shared/source/device/device.h" #include "gtest/gtest.h" namespace NEO { PlatformFixture::PlatformFixture() : pPlatform(nullptr), num_devices(0), devices(nullptr) { } void PlatformFixture::SetUp(size_t numDevices, const HardwareInfo **pDevices) { pPlatform = platform(); ASSERT_EQ(0u, pPlatform->getNumDevices()); // setup platform / context bool isInitialized = pPlatform->initialize(numDevices, pDevices); ASSERT_EQ(true, isInitialized); num_devices = static_cast(pPlatform->getNumDevices()); ASSERT_GT(num_devices, 0u); auto allDev = pPlatform->getDevices(); ASSERT_NE(nullptr, allDev); devices = new cl_device_id[num_devices]; for (cl_uint deviceOrdinal = 0; deviceOrdinal < num_devices; ++deviceOrdinal) { auto device = allDev[deviceOrdinal]; ASSERT_NE(nullptr, device); devices[deviceOrdinal] = device; } } void PlatformFixture::TearDown() { pPlatform->shutdown(); delete[] devices; } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/perf_tests/fixtures/platform_fixture.h000066400000000000000000000013501422164147700314200ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/api/cl_types.h" #include "opencl/source/platform/platform.h" namespace NEO { struct HardwareInfo; //////////////////////////////////////////////////////////////////////////////// // CPlatformFixture // Used to test the Platform class (and many others) //////////////////////////////////////////////////////////////////////////////// class PlatformFixture { public: PlatformFixture(); protected: virtual void SetUp(size_t numDevices, const HardwareInfo **pDevices); virtual void TearDown(); Platform *pPlatform; cl_uint num_devices; cl_device_id *devices; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/perf_tests/options_perf_tests.cpp000066400000000000000000000014541422164147700304460ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/array_count.h" #include "hw_cmds.h" namespace NEO { // IP address for TBX server const char *tbxServerIp = "127.0.0.1"; // AUB file folder location const char *folderAUB = "aub_out"; // Initial value for HW tag // Set to 0 if using HW or simulator, otherwise 0xFFFFFF00, needs to be lower then Event::EventNotReady. uint32_t initialHardwareTag = static_cast(0); // Number of devices in the platform static const HardwareInfo *DefaultPlatformDevices[] = { &DEFAULT_PLATFORM::hwInfo, }; size_t numPlatformDevices = ARRAY_COUNT(DefaultPlatformDevices); const HardwareInfo **platformDevices = DefaultPlatformDevices; } // namespace NEO bool printMemoryOpCallStack = true; compute-runtime-22.14.22890/opencl/test/unit_test/perf_tests/perf_test_utils.cpp000066400000000000000000000057741422164147700277410ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "perf_test_utils.h" #include "shared/source/helpers/aligned_memory.h" #include "opencl/source/helpers/hash.h" #include #include using namespace NEO; const char *perfLogPath = "perf_logs/"; // Global reference time long long refTime = 0; void setReferenceTime() { if (refTime == 0) { Timer t1, t2, t3; long long time1 = 0; long long time2 = 0; long long time3 = 0; Timer::setFreq(); void *bufferDst = alignedMalloc(128 * 4096, 4096); void *bufferSrc1 = alignedMalloc(128 * 4096, 4096); void *bufferSrc2 = alignedMalloc(128 * 4096, 4096); void *bufferSrc3 = alignedMalloc(128 * 4096, 4096); t1.start(); memset(bufferSrc1, 0, 128 * 4096); memcpy(bufferDst, bufferSrc1, 128 * 4096); t1.end(); t2.start(); memset(bufferSrc2, 1, 128 * 4096); memcpy(bufferDst, bufferSrc2, 128 * 4096); t2.end(); t3.start(); memset(bufferSrc3, 2, 128 * 4096); memcpy(bufferDst, bufferSrc3, 128 * 4096); t3.end(); time1 = t1.get(); time2 = t2.get(); time3 = t3.get(); refTime = majorityVote(time1, time2, time3); alignedFree(bufferDst); alignedFree(bufferSrc1); alignedFree(bufferSrc2); alignedFree(bufferSrc3); } } bool getTestRatio(uint64_t hash, double &ratio) { ifstream file; string filename(perfLogPath); double data = 0.0; filename.append(std::to_string(hash)); file.open(filename); if (file.is_open()) { file >> data; ratio = data; file.close(); return true; } ratio = 0.0; return false; } bool saveTestRatio(uint64_t hash, double ratio) { ofstream file; string filename(perfLogPath); double data = 0.0; filename.append(std::to_string(hash)); file.open(filename); if (file.is_open()) { file << ratio; file.close(); return true; } return false; } bool isInRange(double data, double reference, double multiplier) { double lower = reference / multiplier; double higher = reference * multiplier; return data >= lower && data <= higher; } bool isLowerThanReference(double data, double reference, double multiplier) { double higher = multiplier * reference; if (data <= higher) { return true; } return false; } bool updateTestRatio(uint64_t hash, double ratio) { double oldRatio = 0.0; if (getTestRatio(hash, oldRatio)) { if (oldRatio != 0.0) { if (isInRange(ratio, oldRatio, 2.000)) { double newRatio = (0.8000 * oldRatio + 0.2000 * ratio); if (newRatio < 0.8 * oldRatio) return false; saveTestRatio(hash, newRatio); return true; } } } else { saveTestRatio(hash, ratio); } return false; } compute-runtime-22.14.22890/opencl/test/unit_test/perf_tests/perf_test_utils.h000066400000000000000000000016531422164147700273760ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/utilities/timer_util.h" #include "gtest/gtest.h" #include extern const char *perfLogPath; extern long long refTime; void setReferenceTime(); bool getTestRatio(uint64_t hash, double &ratio); bool saveTestRatio(uint64_t hash, double ratio); bool isInRange(double data, double reference, double rangePercentage); bool isLowerThanReference(double data, double reference, double rangePercentage); bool updateTestRatio(uint64_t hash, double ratio); template T majorityVote(T time1, T time2, T time3) { T minTime1 = 0; T minTime2 = 0; if (time1 < time2) { minTime1 = time1; minTime2 = time2; } else { minTime1 = time2; minTime2 = time1; } if (minTime2 > time3) minTime2 = time3; return (minTime1 + minTime2) / 2; } compute-runtime-22.14.22890/opencl/test/unit_test/platform/000077500000000000000000000000001422164147700234535ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/platform/CMakeLists.txt000066400000000000000000000005171422164147700262160ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_platform ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/platform_icd_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/platform_tests.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_platform}) compute-runtime-22.14.22890/opencl/test/unit_test/platform/platform_icd_tests.cpp000066400000000000000000000046041422164147700300500ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/string.h" #include "opencl/source/api/dispatch.h" #include "opencl/source/sharings/sharing_factory.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "gtest/gtest.h" using namespace NEO; class IcdRestore : public SharingFactory { public: IcdRestore() { icdSnapshot = icdGlobalDispatchTable; memcpy_s(savedState, sizeof(savedState), sharingContextBuilder, sizeof(sharingContextBuilder)); for (auto &builder : sharingContextBuilder) { builder = nullptr; } } ~IcdRestore() { memcpy_s(sharingContextBuilder, sizeof(sharingContextBuilder), savedState, sizeof(savedState)); icdGlobalDispatchTable = icdSnapshot; } template void registerSharing(SharingType type) { auto object = std::make_unique(); sharingContextBuilder[type] = object.get(); sharings.push_back(std::move(object)); } protected: decltype(icdGlobalDispatchTable) icdSnapshot; decltype(SharingFactory::sharingContextBuilder) savedState; std::vector> sharings; }; void fakeGlF() { } class PlatformTestedSharingBuilderFactory : public SharingBuilderFactory { public: std::unique_ptr createContextBuilder() override { return nullptr; } std::string getExtensions(DriverInfo *driverInfo) override { return "--extension--"; }; void fillGlobalDispatchTable() override { icdGlobalDispatchTable.clCreateFromGLBuffer = (decltype(icdGlobalDispatchTable.clCreateFromGLBuffer)) & fakeGlF; }; void *getExtensionFunctionAddress(const std::string &functionName) override { return nullptr; } }; TEST(PlatformIcdTest, WhenPlatformSetupThenDispatchTableInitialization) { IcdRestore icdRestore; icdGlobalDispatchTable.clCreateFromGLBuffer = nullptr; EXPECT_EQ(nullptr, icdGlobalDispatchTable.clCreateFromGLBuffer); MockPlatform myPlatform; myPlatform.fillGlobalDispatchTable(); EXPECT_EQ(nullptr, icdGlobalDispatchTable.clCreateFromGLBuffer); icdRestore.registerSharing(SharingType::CLGL_SHARING); myPlatform.fillGlobalDispatchTable(); EXPECT_NE(nullptr, icdGlobalDispatchTable.clCreateFromGLBuffer); } compute-runtime-22.14.22890/opencl/test/unit_test/platform/platform_tests.cpp000066400000000000000000000525561422164147700272420ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/compiler_interface/oclc_extensions.h" #include "shared/source/device/device.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/string.h" #include "shared/source/os_interface/device_factory.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/ult_hw_config.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/mocks/mock_builtins.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/mocks/mock_sip.h" #include "shared/test/common/mocks/mock_source_level_debugger.h" #include "shared/test/unit_test/fixtures/mock_aub_center_fixture.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/sharings/sharing_factory.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/mocks/ult_cl_device_factory.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" #include "gtest/gtest.h" using namespace NEO; struct PlatformTest : public ::testing::Test { void SetUp() override { MockSipData::clearUseFlags(); backupSipInitType = std::make_unique>(&MockSipData::useMockSip, true); pPlatform.reset(new MockPlatform()); } void TearDown() override { MockSipData::clearUseFlags(); } std::unique_ptr pPlatform; std::unique_ptr> backupSipInitType; cl_int retVal = CL_SUCCESS; }; struct MockPlatformWithMockExecutionEnvironment : public MockPlatform { MockPlatformWithMockExecutionEnvironment() : MockPlatform(*(new MockExecutionEnvironment(nullptr, false, 1))) { MockAubCenterFixture::setMockAubCenter(*executionEnvironment.rootDeviceEnvironments[0]); } }; TEST_F(PlatformTest, GivenUninitializedPlatformWhenInitializeIsCalledThenPlatformIsInitialized) { EXPECT_FALSE(pPlatform->isInitialized()); pPlatform->initializeWithNewDevices(); EXPECT_TRUE(pPlatform->isInitialized()); } TEST_F(PlatformTest, WhenGetNumDevicesIsCalledThenExpectedValuesAreReturned) { EXPECT_EQ(0u, pPlatform->getNumDevices()); pPlatform->initializeWithNewDevices(); EXPECT_GT(pPlatform->getNumDevices(), 0u); } TEST_F(PlatformTest, WhenGetDeviceIsCalledThenExpectedValuesAreReturned) { EXPECT_EQ(nullptr, pPlatform->getClDevice(0)); pPlatform->initializeWithNewDevices(); EXPECT_NE(nullptr, pPlatform->getClDevice(0)); auto numDevices = pPlatform->getNumDevices(); EXPECT_EQ(nullptr, pPlatform->getClDevice(numDevices)); } TEST_F(PlatformTest, WhenGetClDevicesIsCalledThenExpectedValuesAreReturned) { EXPECT_EQ(nullptr, pPlatform->getClDevices()); pPlatform->initializeWithNewDevices(); EXPECT_NE(nullptr, pPlatform->getClDevices()); } TEST_F(PlatformTest, givenSupportingCl21WhenGettingExtensionsStringThenSubgroupsIsEnabled) { REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); pPlatform->initializeWithNewDevices(); auto compilerExtensions = pPlatform->getClDevice(0)->peekCompilerExtensions(); auto isIndependentForwardProgressSupported = pPlatform->getClDevice(0)->getDeviceInfo().independentForwardProgress; EXPECT_TRUE(hasSubstr(compilerExtensions, std::string(" -cl-ext=-all,+cl"))); if (isIndependentForwardProgressSupported) { EXPECT_TRUE(hasSubstr(compilerExtensions, std::string("cl_khr_subgroups"))); } } TEST_F(PlatformTest, givenMidThreadPreemptionWhenInitializingPlatformThenCallGetSipKernel) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForcePreemptionMode.set(static_cast(PreemptionMode::MidThread)); auto builtIns = new MockBuiltins(); auto executionEnvironment = pPlatform->peekExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->builtins.reset(builtIns); EXPECT_EQ(SipKernelType::COUNT, MockSipData::calledType); EXPECT_FALSE(MockSipData::called); pPlatform->initializeWithNewDevices(); EXPECT_EQ(SipKernelType::Csr, MockSipData::calledType); EXPECT_TRUE(MockSipData::called); } TEST_F(PlatformTest, givenDisabledPreemptionAndNoSourceLevelDebuggerWhenInitializingPlatformThenDoNotCallGetSipKernel) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForcePreemptionMode.set(static_cast(PreemptionMode::Disabled)); auto builtIns = new MockBuiltins(); auto executionEnvironment = pPlatform->peekExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->builtins.reset(builtIns); EXPECT_EQ(SipKernelType::COUNT, MockSipData::calledType); EXPECT_FALSE(MockSipData::called); pPlatform->initializeWithNewDevices(); EXPECT_EQ(SipKernelType::COUNT, MockSipData::calledType); EXPECT_FALSE(MockSipData::called); } TEST_F(PlatformTest, givenDisabledPreemptionInactiveSourceLevelDebuggerWhenInitializingPlatformThenDoNotCallGetSipKernel) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForcePreemptionMode.set(static_cast(PreemptionMode::Disabled)); auto builtIns = new MockBuiltins(); auto executionEnvironment = pPlatform->peekExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->builtins.reset(builtIns); auto sourceLevelDebugger = new MockSourceLevelDebugger(); sourceLevelDebugger->setActive(false); executionEnvironment->rootDeviceEnvironments[0]->debugger.reset(sourceLevelDebugger); EXPECT_EQ(SipKernelType::COUNT, MockSipData::calledType); EXPECT_FALSE(MockSipData::called); pPlatform->initializeWithNewDevices(); EXPECT_EQ(SipKernelType::COUNT, MockSipData::calledType); EXPECT_FALSE(MockSipData::called); } TEST_F(PlatformTest, givenDisabledPreemptionActiveSourceLevelDebuggerWhenInitializingPlatformThenCallGetSipKernel) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForcePreemptionMode.set(static_cast(PreemptionMode::Disabled)); auto builtIns = new MockBuiltins(); auto executionEnvironment = pPlatform->peekExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->builtins.reset(builtIns); executionEnvironment->rootDeviceEnvironments[0]->debugger.reset(new MockActiveSourceLevelDebugger()); EXPECT_EQ(SipKernelType::COUNT, MockSipData::calledType); EXPECT_FALSE(MockSipData::called); pPlatform->initializeWithNewDevices(); EXPECT_TRUE(MockSipData::called); EXPECT_LE(SipKernelType::DbgCsr, MockSipData::calledType); EXPECT_GE(SipKernelType::DbgCsrLocal, MockSipData::calledType); } TEST(PlatformTestSimple, givenCsrHwTypeWhenPlatformIsInitializedThenInitAubCenterIsNotCalled) { DebugManagerStateRestore stateRestore; DebugManager.flags.SetCommandStreamReceiver.set(0); MockPlatformWithMockExecutionEnvironment platform; bool ret = platform.initializeWithNewDevices(); EXPECT_TRUE(ret); auto rootDeviceEnvironment = static_cast(platform.peekExecutionEnvironment()->rootDeviceEnvironments[0].get()); EXPECT_FALSE(rootDeviceEnvironment->initAubCenterCalled); } TEST(PlatformTestSimple, givenNotCsrHwTypeWhenPlatformIsInitializedThenInitAubCenterIsCalled) { DebugManagerStateRestore stateRestore; DebugManager.flags.SetCommandStreamReceiver.set(1); VariableBackup backup(&ultHwConfig); ultHwConfig.useHwCsr = true; MockPlatformWithMockExecutionEnvironment platform; bool ret = platform.initializeWithNewDevices(); EXPECT_TRUE(ret); auto rootDeviceEnvironment = static_cast(platform.peekExecutionEnvironment()->rootDeviceEnvironments[0].get()); EXPECT_TRUE(rootDeviceEnvironment->initAubCenterCalled); } TEST(PlatformTestSimple, WhenConvertingCustomOclCFeaturesToCompilerInternalOptionsThenResultIsCorrect) { OpenClCFeaturesContainer customOpenclCFeatures; cl_name_version feature; strcpy_s(feature.name, CL_NAME_VERSION_MAX_NAME_SIZE, "custom_feature"); customOpenclCFeatures.push_back(feature); auto compilerOption = convertEnabledExtensionsToCompilerInternalOptions("", customOpenclCFeatures); EXPECT_STREQ(" -cl-ext=-all,+custom_feature ", compilerOption.c_str()); strcpy_s(feature.name, CL_NAME_VERSION_MAX_NAME_SIZE, "other_extra_feature"); customOpenclCFeatures.push_back(feature); compilerOption = convertEnabledExtensionsToCompilerInternalOptions("", customOpenclCFeatures); EXPECT_STREQ(" -cl-ext=-all,+custom_feature,+other_extra_feature ", compilerOption.c_str()); } TEST(PlatformTestSimple, WhenConvertingOclCFeaturesToCompilerInternalOptionsThenResultIsCorrect) { UltClDeviceFactory deviceFactory{1, 0}; auto pClDevice = deviceFactory.rootDevices[0]; std::string expectedCompilerOption = " -cl-ext=-all,"; for (auto &openclCFeature : pClDevice->deviceInfo.openclCFeatures) { expectedCompilerOption += "+"; expectedCompilerOption += openclCFeature.name; expectedCompilerOption += ","; } expectedCompilerOption.erase(expectedCompilerOption.size() - 1, 1); expectedCompilerOption += " "; auto compilerOption = convertEnabledExtensionsToCompilerInternalOptions("", pClDevice->deviceInfo.openclCFeatures); EXPECT_STREQ(expectedCompilerOption.c_str(), compilerOption.c_str()); } namespace NEO { extern CommandStreamReceiverCreateFunc commandStreamReceiverFactory[2 * IGFX_MAX_CORE]; } CommandStreamReceiver *createMockCommandStreamReceiver(bool withAubDump, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield) { return nullptr; }; class PlatformFailingTest : public PlatformTest { public: PlatformFailingTest() { ultHwConfig.useHwCsr = true; } void SetUp() override { PlatformTest::SetUp(); hwInfo = defaultHwInfo.get(); commandStreamReceiverCreateFunc = commandStreamReceiverFactory[hwInfo->platform.eRenderCoreFamily]; commandStreamReceiverFactory[hwInfo->platform.eRenderCoreFamily] = createMockCommandStreamReceiver; } void TearDown() override { commandStreamReceiverFactory[hwInfo->platform.eRenderCoreFamily] = commandStreamReceiverCreateFunc; PlatformTest::TearDown(); } VariableBackup backup{&ultHwConfig}; CommandStreamReceiverCreateFunc commandStreamReceiverCreateFunc; const HardwareInfo *hwInfo; }; TEST_F(PlatformFailingTest, givenPlatformInitializationWhenIncorrectHwInfoThenInitializationFails) { auto platform = new MockPlatform(); bool ret = platform->initializeWithNewDevices(); EXPECT_FALSE(ret); EXPECT_FALSE(platform->isInitialized()); delete platform; } TEST_F(PlatformTest, givenSupportingCl21WhenPlatformSupportsFp64ThenFillMatchingSubstringsAndMandatoryTrailingSpace) { const HardwareInfo *hwInfo; hwInfo = defaultHwInfo.get(); std::string extensionsList = getExtensionsList(*hwInfo); OpenClCFeaturesContainer features; getOpenclCFeaturesList(*hwInfo, features); std::string compilerExtensions = convertEnabledExtensionsToCompilerInternalOptions(extensionsList.c_str(), features); EXPECT_TRUE(hasSubstr(compilerExtensions, std::string(" -cl-ext=-all,+cl"))); if (hwInfo->capabilityTable.supportsOcl21Features) { EXPECT_TRUE(hasSubstr(compilerExtensions, std::string("cl_khr_subgroups"))); if (hwInfo->capabilityTable.supportsVme) { EXPECT_TRUE(hasSubstr(compilerExtensions, std::string("cl_intel_spirv_device_side_avc_motion_estimation"))); } else { EXPECT_FALSE(hasSubstr(compilerExtensions, std::string("cl_intel_spirv_device_side_avc_motion_estimation"))); } if (hwInfo->capabilityTable.supportsMediaBlock) { EXPECT_TRUE(hasSubstr(compilerExtensions, std::string("cl_intel_spirv_media_block_io"))); } else { EXPECT_FALSE(hasSubstr(compilerExtensions, std::string("cl_intel_spirv_media_block_io"))); } EXPECT_TRUE(hasSubstr(compilerExtensions, std::string("cl_intel_spirv_subgroups"))); EXPECT_TRUE(hasSubstr(compilerExtensions, std::string("cl_khr_spirv_no_integer_wrap_decoration"))); } if (hwInfo->capabilityTable.ftrSupportsFP64) { EXPECT_TRUE(hasSubstr(compilerExtensions, std::string("cl_khr_fp64"))); } if (hwInfo->capabilityTable.supportsImages) { EXPECT_TRUE(hasSubstr(extensionsList, std::string("cl_khr_3d_image_writes"))); } EXPECT_TRUE(endsWith(compilerExtensions, std::string(" "))); } TEST_F(PlatformTest, givenNotSupportingCl21WhenPlatformNotSupportFp64ThenNotFillMatchingSubstringAndFillMandatoryTrailingSpace) { HardwareInfo TesthwInfo = *defaultHwInfo; TesthwInfo.capabilityTable.ftrSupportsFP64 = false; TesthwInfo.capabilityTable.clVersionSupport = 10; TesthwInfo.capabilityTable.supportsOcl21Features = false; std::string extensionsList = getExtensionsList(TesthwInfo); OpenClCFeaturesContainer features; getOpenclCFeaturesList(*defaultHwInfo, features); if (TesthwInfo.capabilityTable.supportsImages) { EXPECT_TRUE(hasSubstr(extensionsList, std::string("cl_khr_3d_image_writes"))); } std::string compilerExtensions = convertEnabledExtensionsToCompilerInternalOptions(extensionsList.c_str(), features); EXPECT_TRUE(hasSubstr(compilerExtensions, std::string("-cl-ext=-all,+cl"))); EXPECT_FALSE(hasSubstr(compilerExtensions, std::string("cl_khr_fp64"))); EXPECT_FALSE(hasSubstr(compilerExtensions, std::string("cl_khr_subgroups"))); EXPECT_TRUE(endsWith(compilerExtensions, std::string(" "))); } TEST_F(PlatformTest, givenFtrSupportAtomicsWhenCreateExtentionsListThenGetMatchingSubstrings) { const HardwareInfo *hwInfo; hwInfo = defaultHwInfo.get(); std::string extensionsList = getExtensionsList(*hwInfo); OpenClCFeaturesContainer features; getOpenclCFeaturesList(*hwInfo, features); std::string compilerExtensions = convertEnabledExtensionsToCompilerInternalOptions(extensionsList.c_str(), features); if (hwInfo->capabilityTable.ftrSupportsInteger64BitAtomics) { EXPECT_TRUE(hasSubstr(compilerExtensions, std::string("cl_khr_int64_base_atomics"))); EXPECT_TRUE(hasSubstr(compilerExtensions, std::string("cl_khr_int64_extended_atomics"))); } else { EXPECT_FALSE(hasSubstr(compilerExtensions, std::string("cl_khr_int64_base_atomics"))); EXPECT_FALSE(hasSubstr(compilerExtensions, std::string("cl_khr_int64_extended_atomics"))); } } TEST_F(PlatformTest, givenSupportedMediaBlockAndClVersion21WhenCreateExtentionsListThenDeviceReportsSpritvMediaBlockIoExtension) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsMediaBlock = true; hwInfo.capabilityTable.clVersionSupport = 21; hwInfo.capabilityTable.supportsOcl21Features = true; std::string extensionsList = getExtensionsList(hwInfo); OpenClCFeaturesContainer features; getOpenclCFeaturesList(*defaultHwInfo, features); std::string compilerExtensions = convertEnabledExtensionsToCompilerInternalOptions(extensionsList.c_str(), features); EXPECT_TRUE(hasSubstr(compilerExtensions, std::string("cl_intel_spirv_media_block_io"))); } TEST_F(PlatformTest, givenNotSupportedMediaBlockAndClVersion21WhenCreateExtentionsListThenDeviceNotReportsSpritvMediaBlockIoExtension) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsMediaBlock = false; hwInfo.capabilityTable.clVersionSupport = 21; std::string extensionsList = getExtensionsList(hwInfo); OpenClCFeaturesContainer features; getOpenclCFeaturesList(*defaultHwInfo, features); std::string compilerExtensions = convertEnabledExtensionsToCompilerInternalOptions(extensionsList.c_str(), features); EXPECT_FALSE(hasSubstr(compilerExtensions, std::string("cl_intel_spirv_media_block_io"))); } TEST_F(PlatformTest, givenSupportedImagesWhenCreateExtentionsListThenDeviceNotReportsKhr3DImageWritesExtension) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsImages = true; std::string extensionsList = getExtensionsList(hwInfo); OpenClCFeaturesContainer features; getOpenclCFeaturesList(*defaultHwInfo, features); std::string compilerExtensions = convertEnabledExtensionsToCompilerInternalOptions(extensionsList.c_str(), features); EXPECT_TRUE(hasSubstr(compilerExtensions, std::string("cl_khr_3d_image_writes"))); } TEST_F(PlatformTest, givenNotSupportedImagesWhenCreateExtentionsListThenDeviceNotReportsKhr3DImageWritesExtension) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsImages = false; std::string extensionsList = getExtensionsList(hwInfo); OpenClCFeaturesContainer features; getOpenclCFeaturesList(*defaultHwInfo, features); std::string compilerExtensions = convertEnabledExtensionsToCompilerInternalOptions(extensionsList.c_str(), features); EXPECT_FALSE(hasSubstr(compilerExtensions, std::string("cl_khr_3d_image_writes"))); } TEST(PlatformConstructionTest, givenPlatformConstructorWhenItIsCalledTwiceThenTheSamePlatformIsReturned) { platformsImpl->clear(); auto platform1 = constructPlatform(); EXPECT_EQ(platform1, platform()); auto platform2 = constructPlatform(); EXPECT_EQ(platform2, platform1); EXPECT_NE(platform1, nullptr); } TEST(PlatformConstructionTest, givenPlatformConstructorWhenItIsCalledAfterResetThenNewPlatformIsConstructed) { platformsImpl->clear(); auto platform = constructPlatform(); std::unique_ptr temporaryOwnership(std::move((*platformsImpl)[0])); platformsImpl->clear(); auto platform2 = constructPlatform(); EXPECT_NE(platform2, platform); EXPECT_NE(platform, nullptr); EXPECT_NE(platform2, nullptr); platformsImpl->clear(); } TEST(PlatformInitTest, givenNullptrDeviceInPassedDeviceVectorWhenInitializePlatformThenExceptionIsThrown) { std::vector> devices; devices.push_back(nullptr); EXPECT_THROW(platform()->initialize(std::move(devices)), std::exception); } TEST(PlatformInitTest, givenInitializedPlatformWhenInitializeIsCalledOneMoreTimeWithNullptrDeviceThenSuccessIsEarlyReturned) { initPlatform(); EXPECT_TRUE(platform()->isInitialized()); std::vector> devices; devices.push_back(nullptr); EXPECT_TRUE(platform()->initialize(std::move(devices))); } TEST(PlatformInitTest, givenSingleDeviceWithNonZeroRootDeviceIndexInPassedDeviceVectorWhenInitializePlatformThenCreateOnlyOneClDevice) { std::vector> devices; auto executionEnvironment = new MockExecutionEnvironment(defaultHwInfo.get(), false, 3); devices.push_back(std::make_unique(executionEnvironment, 2)); auto status = platform()->initialize(std::move(devices)); EXPECT_TRUE(status); size_t expectedNumDevices = 1u; EXPECT_EQ(expectedNumDevices, platform()->getNumDevices()); EXPECT_EQ(2u, platform()->getClDevice(0)->getRootDeviceIndex()); } TEST(PlatformGroupDevicesTest, whenMultipleDevicesAreCreatedThenGroupDevicesCreatesVectorPerEachProductFamily) { DebugManagerStateRestore restorer; const size_t numRootDevices = 5u; DebugManager.flags.CreateMultipleRootDevices.set(numRootDevices); auto executionEnvironment = new ExecutionEnvironment(); for (auto i = 0u; i < numRootDevices; i++) { executionEnvironment->rootDeviceEnvironments.push_back(std::make_unique(*executionEnvironment)); } auto inputDevices = DeviceFactory::createDevices(*executionEnvironment); EXPECT_EQ(numRootDevices, inputDevices.size()); auto skl0Device = inputDevices[0].get(); auto kbl0Device = inputDevices[1].get(); auto skl1Device = inputDevices[2].get(); auto skl2Device = inputDevices[3].get(); auto cfl0Device = inputDevices[4].get(); executionEnvironment->rootDeviceEnvironments[0]->getMutableHardwareInfo()->platform.eProductFamily = IGFX_SKYLAKE; executionEnvironment->rootDeviceEnvironments[1]->getMutableHardwareInfo()->platform.eProductFamily = IGFX_KABYLAKE; executionEnvironment->rootDeviceEnvironments[2]->getMutableHardwareInfo()->platform.eProductFamily = IGFX_SKYLAKE; executionEnvironment->rootDeviceEnvironments[3]->getMutableHardwareInfo()->platform.eProductFamily = IGFX_SKYLAKE; executionEnvironment->rootDeviceEnvironments[4]->getMutableHardwareInfo()->platform.eProductFamily = IGFX_COFFEELAKE; auto groupedDevices = Platform::groupDevices(std::move(inputDevices)); EXPECT_EQ(3u, groupedDevices.size()); EXPECT_EQ(1u, groupedDevices[0].size()); EXPECT_EQ(1u, groupedDevices[1].size()); EXPECT_EQ(3u, groupedDevices[2].size()); EXPECT_EQ(skl0Device, groupedDevices[2][0].get()); EXPECT_EQ(skl1Device, groupedDevices[2][1].get()); EXPECT_EQ(skl2Device, groupedDevices[2][2].get()); EXPECT_EQ(kbl0Device, groupedDevices[1][0].get()); EXPECT_EQ(cfl0Device, groupedDevices[0][0].get()); } compute-runtime-22.14.22890/opencl/test/unit_test/preemption/000077500000000000000000000000001422164147700240115ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/preemption/CMakeLists.txt000066400000000000000000000004031422164147700265460ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(igdrcl_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/preemption_tests.cpp ) add_subdirectories() compute-runtime-22.14.22890/opencl/test/unit_test/preemption/preemption_tests.cpp000066400000000000000000000343761422164147700301360ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/preemption.h" #include "shared/source/kernel/kernel_descriptor_from_patchtokens.h" #include "opencl/source/helpers/cl_preemption_helper.h" #include "opencl/test/unit_test/fixtures/cl_preemption_fixture.h" #include "gtest/gtest.h" using namespace NEO; class ThreadGroupPreemptionTests : public DevicePreemptionTests { void SetUp() override { dbgRestore.reset(new DebugManagerStateRestore()); DebugManager.flags.ForcePreemptionMode.set(static_cast(PreemptionMode::ThreadGroup)); preemptionMode = PreemptionMode::ThreadGroup; DevicePreemptionTests::SetUp(); } }; class MidThreadPreemptionTests : public DevicePreemptionTests { public: void SetUp() override { dbgRestore.reset(new DebugManagerStateRestore()); DebugManager.flags.ForcePreemptionMode.set(static_cast(PreemptionMode::MidThread)); preemptionMode = PreemptionMode::MidThread; DevicePreemptionTests::SetUp(); } }; TEST_F(ThreadGroupPreemptionTests, GivenDisallowedByKmdThenThreadGroupPreemptionIsDisabled) { waTable->flags.waDisablePerCtxtPreemptionGranularityControl = 1; PreemptionFlags flags = PreemptionHelper::createPreemptionLevelFlags(device->getDevice(), &kernel->getDescriptor()); EXPECT_FALSE(PreemptionHelper::allowThreadGroupPreemption(flags)); EXPECT_EQ(PreemptionMode::MidBatch, PreemptionHelper::taskPreemptionMode(device->getPreemptionMode(), flags)); } TEST_F(ThreadGroupPreemptionTests, GivenDisallowByDeviceThenThreadGroupPreemptionIsDisabled) { device->setPreemptionMode(PreemptionMode::MidThread); PreemptionFlags flags = PreemptionHelper::createPreemptionLevelFlags(device->getDevice(), &kernel->getDescriptor()); EXPECT_TRUE(PreemptionHelper::allowThreadGroupPreemption(flags)); EXPECT_EQ(PreemptionMode::MidThread, PreemptionHelper::taskPreemptionMode(device->getPreemptionMode(), flags)); } TEST_F(ThreadGroupPreemptionTests, GivenDisallowByReadWriteFencesWaThenThreadGroupPreemptionIsDisabled) { kernelInfo->kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages = true; waTable->flags.waDisableLSQCROPERFforOCL = 1; PreemptionFlags flags = PreemptionHelper::createPreemptionLevelFlags(device->getDevice(), &kernel->getDescriptor()); EXPECT_FALSE(PreemptionHelper::allowThreadGroupPreemption(flags)); EXPECT_EQ(PreemptionMode::MidBatch, PreemptionHelper::taskPreemptionMode(device->getPreemptionMode(), flags)); } TEST_F(ThreadGroupPreemptionTests, GivenDisallowByVmeKernelThenThreadGroupPreemptionIsDisabled) { kernelInfo->kernelDescriptor.kernelAttributes.flags.usesVme = true; kernel.reset(new MockKernel(program.get(), *kernelInfo, *device)); PreemptionFlags flags = PreemptionHelper::createPreemptionLevelFlags(device->getDevice(), &kernel->getDescriptor()); EXPECT_FALSE(PreemptionHelper::allowThreadGroupPreemption(flags)); EXPECT_EQ(PreemptionMode::MidBatch, PreemptionHelper::taskPreemptionMode(device->getPreemptionMode(), flags)); } TEST_F(ThreadGroupPreemptionTests, GivenDefaultThenThreadGroupPreemptionIsEnabled) { PreemptionFlags flags = {}; EXPECT_TRUE(PreemptionHelper::allowThreadGroupPreemption(flags)); EXPECT_EQ(PreemptionMode::ThreadGroup, PreemptionHelper::taskPreemptionMode(device->getPreemptionMode(), flags)); } TEST_F(ThreadGroupPreemptionTests, GivenDefaultModeForNonKernelRequestThenThreadGroupPreemptionIsEnabled) { PreemptionFlags flags = PreemptionHelper::createPreemptionLevelFlags(device->getDevice(), nullptr); EXPECT_EQ(PreemptionMode::ThreadGroup, PreemptionHelper::taskPreemptionMode(device->getPreemptionMode(), flags)); } TEST_F(ThreadGroupPreemptionTests, givenKernelWithEnvironmentPatchSetWhenLSQCWaIsTurnedOnThenThreadGroupPreemptionIsBeingSelected) { kernelInfo->kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages = false; waTable->flags.waDisableLSQCROPERFforOCL = 1; PreemptionFlags flags = PreemptionHelper::createPreemptionLevelFlags(device->getDevice(), &kernel->getDescriptor()); EXPECT_TRUE(PreemptionHelper::allowThreadGroupPreemption(flags)); EXPECT_EQ(PreemptionMode::ThreadGroup, PreemptionHelper::taskPreemptionMode(device->getPreemptionMode(), flags)); } TEST_F(ThreadGroupPreemptionTests, givenKernelWithEnvironmentPatchSetWhenLSQCWaIsTurnedOffThenThreadGroupPreemptionIsBeingSelected) { kernelInfo->kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages = true; waTable->flags.waDisableLSQCROPERFforOCL = 0; PreemptionFlags flags = PreemptionHelper::createPreemptionLevelFlags(device->getDevice(), &kernel->getDescriptor()); EXPECT_TRUE(PreemptionHelper::allowThreadGroupPreemption(flags)); EXPECT_EQ(PreemptionMode::ThreadGroup, PreemptionHelper::taskPreemptionMode(device->getPreemptionMode(), flags)); } TEST_F(ThreadGroupPreemptionTests, GivenDefaultThenMidBatchPreemptionIsEnabled) { device->setPreemptionMode(PreemptionMode::MidBatch); PreemptionFlags flags = PreemptionHelper::createPreemptionLevelFlags(device->getDevice(), nullptr); EXPECT_EQ(PreemptionMode::MidBatch, PreemptionHelper::taskPreemptionMode(device->getPreemptionMode(), flags)); } TEST_F(ThreadGroupPreemptionTests, GivenDisabledThenPreemptionIsDisabled) { device->setPreemptionMode(PreemptionMode::Disabled); PreemptionFlags flags = PreemptionHelper::createPreemptionLevelFlags(device->getDevice(), nullptr); EXPECT_EQ(PreemptionMode::Disabled, PreemptionHelper::taskPreemptionMode(device->getPreemptionMode(), flags)); } TEST_F(ThreadGroupPreemptionTests, GivenZeroSizedMdiThenThreadGroupPreemptioIsEnabled) { MultiDispatchInfo multiDispatchInfo; EXPECT_EQ(PreemptionMode::ThreadGroup, ClPreemptionHelper::taskPreemptionMode(device->getDevice(), multiDispatchInfo)); } TEST_F(ThreadGroupPreemptionTests, GivenValidKernelsInMdiThenThreadGroupPreemptioIsEnabled) { MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(*dispatchInfo); multiDispatchInfo.push(*dispatchInfo); EXPECT_EQ(PreemptionMode::ThreadGroup, ClPreemptionHelper::taskPreemptionMode(device->getDevice(), multiDispatchInfo)); } TEST_F(ThreadGroupPreemptionTests, GivenValidKernelsInMdiAndDisabledPremptionThenPreemptionIsDisabled) { device->setPreemptionMode(PreemptionMode::Disabled); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(*dispatchInfo); multiDispatchInfo.push(*dispatchInfo); EXPECT_EQ(PreemptionMode::Disabled, ClPreemptionHelper::taskPreemptionMode(device->getDevice(), multiDispatchInfo)); } TEST_F(MidThreadPreemptionTests, GivenMidThreadPreemptionThenMidThreadPreemptionIsEnabled) { device->setPreemptionMode(PreemptionMode::MidThread); kernelInfo->kernelDescriptor.kernelAttributes.flags.requiresDisabledMidThreadPreemption = false; PreemptionFlags flags = PreemptionHelper::createPreemptionLevelFlags(device->getDevice(), &kernel->getDescriptor()); EXPECT_TRUE(PreemptionHelper::allowMidThreadPreemption(flags)); } TEST_F(MidThreadPreemptionTests, GivenNullKernelThenMidThreadPreemptionIsEnabled) { device->setPreemptionMode(PreemptionMode::MidThread); PreemptionFlags flags = PreemptionHelper::createPreemptionLevelFlags(device->getDevice(), nullptr); EXPECT_TRUE(PreemptionHelper::allowMidThreadPreemption(flags)); } TEST_F(MidThreadPreemptionTests, GivenMidThreadPreemptionDeviceSupportPreemptionOnVmeKernelThenMidThreadPreemptionIsEnabled) { device->setPreemptionMode(PreemptionMode::MidThread); device->sharedDeviceInfo.vmeAvcSupportsPreemption = true; kernelInfo->kernelDescriptor.kernelAttributes.flags.usesVme = true; kernel.reset(new MockKernel(program.get(), *kernelInfo, *device)); PreemptionFlags flags = PreemptionHelper::createPreemptionLevelFlags(device->getDevice(), &kernel->getDescriptor()); EXPECT_TRUE(PreemptionHelper::allowMidThreadPreemption(flags)); } TEST_F(MidThreadPreemptionTests, GivenDisallowMidThreadPreemptionByDeviceThenMidThreadPreemptionIsEnabled) { device->setPreemptionMode(PreemptionMode::ThreadGroup); kernelInfo->kernelDescriptor.kernelAttributes.flags.requiresDisabledMidThreadPreemption = false; PreemptionFlags flags = PreemptionHelper::createPreemptionLevelFlags(device->getDevice(), &kernel->getDescriptor()); EXPECT_TRUE(PreemptionHelper::allowMidThreadPreemption(flags)); EXPECT_EQ(PreemptionMode::ThreadGroup, PreemptionHelper::taskPreemptionMode(device->getPreemptionMode(), flags)); } TEST_F(MidThreadPreemptionTests, GivenDisallowMidThreadPreemptionByKernelThenMidThreadPreemptionIsEnabled) { device->setPreemptionMode(PreemptionMode::MidThread); kernelInfo->kernelDescriptor.kernelAttributes.flags.requiresDisabledMidThreadPreemption = true; PreemptionFlags flags = PreemptionHelper::createPreemptionLevelFlags(device->getDevice(), &kernel->getDescriptor()); EXPECT_FALSE(PreemptionHelper::allowMidThreadPreemption(flags)); } TEST_F(MidThreadPreemptionTests, GivenDisallowMidThreadPreemptionByVmeKernelThenMidThreadPreemptionIsEnabled) { device->setPreemptionMode(PreemptionMode::MidThread); device->sharedDeviceInfo.vmeAvcSupportsPreemption = false; kernelInfo->kernelDescriptor.kernelAttributes.flags.usesVme = true; kernel.reset(new MockKernel(program.get(), *kernelInfo, *device)); PreemptionFlags flags = PreemptionHelper::createPreemptionLevelFlags(device->getDevice(), &kernel->getDescriptor()); EXPECT_FALSE(PreemptionHelper::allowMidThreadPreemption(flags)); } TEST_F(MidThreadPreemptionTests, GivenTaskPreemptionDisallowMidThreadByDeviceThenThreadGroupPreemptionIsEnabled) { kernelInfo->kernelDescriptor.kernelAttributes.flags.requiresDisabledMidThreadPreemption = false; device->setPreemptionMode(PreemptionMode::ThreadGroup); PreemptionFlags flags = PreemptionHelper::createPreemptionLevelFlags(device->getDevice(), &kernel->getDescriptor()); PreemptionMode outMode = PreemptionHelper::taskPreemptionMode(device->getPreemptionMode(), flags); EXPECT_EQ(PreemptionMode::ThreadGroup, outMode); } TEST_F(MidThreadPreemptionTests, GivenTaskPreemptionDisallowMidThreadByKernelThenThreadGroupPreemptionIsEnabled) { kernelInfo->kernelDescriptor.kernelAttributes.flags.requiresDisabledMidThreadPreemption = true; device->setPreemptionMode(PreemptionMode::MidThread); PreemptionFlags flags = PreemptionHelper::createPreemptionLevelFlags(device->getDevice(), &kernel->getDescriptor()); PreemptionMode outMode = PreemptionHelper::taskPreemptionMode(device->getPreemptionMode(), flags); EXPECT_EQ(PreemptionMode::ThreadGroup, outMode); } TEST_F(MidThreadPreemptionTests, GivenTaskPreemptionDisallowMidThreadByVmeKernelThenThreadGroupPreemptionIsEnabled) { kernelInfo->kernelDescriptor.kernelAttributes.flags.usesVme = true; device->sharedDeviceInfo.vmeAvcSupportsPreemption = false; kernel.reset(new MockKernel(program.get(), *kernelInfo, *device)); device->setPreemptionMode(PreemptionMode::MidThread); PreemptionFlags flags = PreemptionHelper::createPreemptionLevelFlags(device->getDevice(), &kernel->getDescriptor()); PreemptionMode outMode = PreemptionHelper::taskPreemptionMode(device->getPreemptionMode(), flags); //VME disables mid thread and thread group when device does not support it EXPECT_EQ(PreemptionMode::MidBatch, outMode); } TEST_F(MidThreadPreemptionTests, GivenDeviceSupportsMidThreadPreemptionThenMidThreadPreemptionIsEnabled) { kernelInfo->kernelDescriptor.kernelAttributes.flags.requiresDisabledMidThreadPreemption = false; device->setPreemptionMode(PreemptionMode::MidThread); PreemptionFlags flags = PreemptionHelper::createPreemptionLevelFlags(device->getDevice(), &kernel->getDescriptor()); PreemptionMode outMode = PreemptionHelper::taskPreemptionMode(device->getPreemptionMode(), flags); EXPECT_EQ(PreemptionMode::MidThread, outMode); } TEST_F(MidThreadPreemptionTests, GivenTaskPreemptionAllowDeviceSupportsPreemptionOnVmeKernelThenMidThreadPreemptionIsEnabled) { kernelInfo->kernelDescriptor.kernelAttributes.flags.requiresDisabledMidThreadPreemption = false; kernelInfo->kernelDescriptor.kernelAttributes.flags.usesVme = true; kernel.reset(new MockKernel(program.get(), *kernelInfo, *device)); device->sharedDeviceInfo.vmeAvcSupportsPreemption = true; device->setPreemptionMode(PreemptionMode::MidThread); PreemptionFlags flags = PreemptionHelper::createPreemptionLevelFlags(device->getDevice(), &kernel->getDescriptor()); PreemptionMode outMode = PreemptionHelper::taskPreemptionMode(device->getPreemptionMode(), flags); EXPECT_EQ(PreemptionMode::MidThread, outMode); } TEST_F(ThreadGroupPreemptionTests, GivenDebugKernelPreemptionWhenDeviceSupportsThreadGroupThenExpectDebugKeyMidThreadValue) { DebugManager.flags.ForceKernelPreemptionMode.set(static_cast(PreemptionMode::MidThread)); EXPECT_EQ(PreemptionMode::ThreadGroup, device->getPreemptionMode()); kernel.reset(new MockKernel(program.get(), *kernelInfo, *device)); PreemptionFlags flags = PreemptionHelper::createPreemptionLevelFlags(device->getDevice(), &kernel->getDescriptor()); PreemptionMode outMode = PreemptionHelper::taskPreemptionMode(device->getPreemptionMode(), flags); EXPECT_EQ(PreemptionMode::MidThread, outMode); } TEST_F(MidThreadPreemptionTests, GivenDebugKernelPreemptionWhenDeviceSupportsMidThreadThenExpectDebugKeyMidBatchValue) { DebugManager.flags.ForceKernelPreemptionMode.set(static_cast(PreemptionMode::MidBatch)); EXPECT_EQ(PreemptionMode::MidThread, device->getPreemptionMode()); kernel.reset(new MockKernel(program.get(), *kernelInfo, *device)); PreemptionFlags flags = PreemptionHelper::createPreemptionLevelFlags(device->getDevice(), &kernel->getDescriptor()); PreemptionMode outMode = PreemptionHelper::taskPreemptionMode(device->getPreemptionMode(), flags); EXPECT_EQ(PreemptionMode::MidBatch, outMode); } TEST_F(MidThreadPreemptionTests, GivenMultiDispatchWithoutKernelWhenDevicePreemptionIsMidThreadThenTaskPreemptionIsMidThread) { dispatchInfo.reset(new DispatchInfo(device.get(), nullptr, 1, Vec3(1, 1, 1), Vec3(1, 1, 1), Vec3(0, 0, 0))); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(*dispatchInfo); EXPECT_EQ(PreemptionMode::MidThread, ClPreemptionHelper::taskPreemptionMode(device->getDevice(), multiDispatchInfo)); } compute-runtime-22.14.22890/opencl/test/unit_test/profiling/000077500000000000000000000000001422164147700236205ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/profiling/CMakeLists.txt000066400000000000000000000004331422164147700263600ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_profiling ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/profiling_tests.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_profiling}) compute-runtime-22.14.22890/opencl/test/unit_test/profiling/profiling_tests.cpp000066400000000000000000001574451422164147700275570ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/os_interface.h" #include "shared/source/utilities/tag_allocator.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_timestamp_container.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/enqueue_common.h" #include "opencl/source/command_queue/enqueue_kernel.h" #include "opencl/source/command_queue/enqueue_marker.h" #include "opencl/source/command_queue/enqueue_migrate_mem_objects.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/event/event_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "opencl/test/unit_test/os_interface/mock_performance_counters.h" namespace NEO { struct ProfilingTests : public CommandEnqueueFixture, public ::testing::Test { void SetUp() override { CommandEnqueueFixture::SetUp(CL_QUEUE_PROFILING_ENABLE); program = ReleaseableObjectPtr(new MockProgram(toClDeviceVector(*pClDevice))); program->setContext(&ctx); kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 32; kernelInfo.setCrossThreadDataSize(sizeof(crossThreadData)); kernelInfo.setLocalIds({1, 1, 1}); kernelInfo.heapInfo.pKernelHeap = kernelIsa; kernelInfo.heapInfo.KernelHeapSize = sizeof(kernelIsa); } void TearDown() override { CommandEnqueueFixture::TearDown(); } ReleaseableObjectPtr program; SKernelBinaryHeaderCommon kernelHeader = {}; MockKernelInfo kernelInfo; MockContext ctx; uint32_t kernelIsa[32]; uint32_t crossThreadData[32]; }; HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfilingAndForWorkloadWithKernelWhenGetCSFromCmdQueueThenEnoughSpaceInCS) { typedef typename FamilyType::MI_STORE_REGISTER_MEM MI_STORE_REGISTER_MEM; typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; MockKernel kernel(program.get(), kernelInfo, *pClDevice); uint64_t requiredSize = 2 * sizeof(PIPE_CONTROL) + 2 * sizeof(MI_STORE_REGISTER_MEM) + sizeof(GPGPU_WALKER) + HardwareCommandsHelper::getSizeRequiredCS(); MultiDispatchInfo multiDispatchInfo(&kernel); auto &commandStreamNDRangeKernel = getCommandStream(*pCmdQ, CsrDependencies(), true, false, false, multiDispatchInfo, nullptr, 0, false, false); auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, true, false, *pCmdQ, &kernel, {}); EXPECT_GE(expectedSizeCS, requiredSize); EXPECT_GE(commandStreamNDRangeKernel.getAvailableSpace(), requiredSize); auto &commandStreamTask = getCommandStream(*pCmdQ, CsrDependencies(), true, false, false, multiDispatchInfo, nullptr, 0, false, false); expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_TASK, true, false, *pCmdQ, &kernel, {}); EXPECT_GE(expectedSizeCS, requiredSize); EXPECT_GE(commandStreamTask.getAvailableSpace(), requiredSize); } HWTEST_F(ProfilingTests, GivenCommandQueueWithProfilingAndForWorkloadWithNoKernelWhenGetCSFromCmdQueueThenEnoughSpaceInCS) { typedef typename FamilyType::MI_STORE_REGISTER_MEM MI_STORE_REGISTER_MEM; typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; typedef typename FamilyType::WALKER_TYPE GPGPU_WALKER; uint64_t requiredSize = 2 * sizeof(PIPE_CONTROL) + 4 * sizeof(MI_STORE_REGISTER_MEM); MultiDispatchInfo multiDispatchInfo(nullptr); auto &commandStreamMigrateMemObjects = getCommandStream(*pCmdQ, CsrDependencies(), true, false, false, multiDispatchInfo, nullptr, 0, false, false); auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_MIGRATE_MEM_OBJECTS, true, false, *pCmdQ, nullptr, {}); EXPECT_GE(expectedSizeCS, requiredSize); EXPECT_GE(commandStreamMigrateMemObjects.getAvailableSpace(), requiredSize); auto &commandStreamMarker = getCommandStream(*pCmdQ, CsrDependencies(), true, false, false, multiDispatchInfo, nullptr, 0, false, false); expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_MARKER, true, false, *pCmdQ, nullptr, {}); EXPECT_GE(expectedSizeCS, requiredSize); EXPECT_GE(commandStreamMarker.getAvailableSpace(), requiredSize); } HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfilingAndForWorkloadWithTwoKernelsInMdiWhenGetCSFromCmdQueueThenEnoughSpaceInCS) { typedef typename FamilyType::MI_STORE_REGISTER_MEM MI_STORE_REGISTER_MEM; typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; MockKernel kernel(program.get(), kernelInfo, *pClDevice); uint64_t requiredSize = 2 * sizeof(PIPE_CONTROL) + 4 * sizeof(MI_STORE_REGISTER_MEM) + HardwareCommandsHelper::getSizeRequiredCS(); requiredSize += 2 * sizeof(GPGPU_WALKER); DispatchInfo dispatchInfo; dispatchInfo.setKernel(&kernel); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); multiDispatchInfo.push(dispatchInfo); auto &commandStreamTask = getCommandStream(*pCmdQ, CsrDependencies(), true, false, false, multiDispatchInfo, nullptr, 0, false, false); auto expectedSizeCS = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_TASK, CsrDependencies(), true, false, false, *pCmdQ, multiDispatchInfo, false, false); EXPECT_GE(expectedSizeCS, requiredSize); EXPECT_GE(commandStreamTask.getAvailableSpace(), requiredSize); } /* # Two additional PIPE_CONTROLs are expected before first MI_STORE_REGISTER_MEM (which is before GPGPU_WALKER) # and after second MI_STORE_REGISTER_MEM (which is after GPGPU_WALKER). */ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfolingWhenWalkerIsDispatchedThenPipeControlWithTimeStampIsPresentInCS) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {1, 1, 1}; uint32_t dimensions = 1; cl_event event; static_cast *>(pCmdQ)->enqueueKernel( &kernel, dimensions, globalOffsets, workItems, nullptr, 0, nullptr, &event); parseCommands(*pCmdQ); // Find GPGPU_WALKER auto itorGPGPUWalkerCmd = find(cmdList.begin(), cmdList.end()); GenCmdList::reverse_iterator rItorGPGPUWalkerCmd(itorGPGPUWalkerCmd); ASSERT_NE(cmdList.end(), itorGPGPUWalkerCmd); // Check PIPE_CONTROLs auto itorBeforePC = reverse_find(rItorGPGPUWalkerCmd, cmdList.rbegin()); ASSERT_NE(cmdList.rbegin(), itorBeforePC); auto pBeforePC = genCmdCast(*itorBeforePC); ASSERT_NE(nullptr, pBeforePC); EXPECT_EQ(1u, pBeforePC->getCommandStreamerStallEnable()); auto itorAfterPC = find(itorGPGPUWalkerCmd, cmdList.end()); ASSERT_NE(cmdList.end(), itorAfterPC); auto pAfterPC = genCmdCast(*itorAfterPC); ASSERT_NE(nullptr, pAfterPC); EXPECT_EQ(1u, pAfterPC->getCommandStreamerStallEnable()); EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, pBeforePC->getPostSyncOperation()); EXPECT_TRUE(static_cast *>(event)->calcProfilingData()); clReleaseEvent(event); } HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfilingWhenNonBlockedEnqueueIsExecutedThenSubmittedTimestampDoesntHaveGPUTime) { MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {1, 1, 1}; uint32_t dimensions = 1; cl_event event; static_cast *>(pCmdQ)->enqueueKernel( &kernel, dimensions, globalOffsets, workItems, nullptr, 0, nullptr, &event); auto mockEvent = static_cast *>(event); EXPECT_NE(0u, mockEvent->queueTimeStamp.GPUTimeStamp); EXPECT_NE(0u, mockEvent->queueTimeStamp.CPUTimeinNS); EXPECT_LT(mockEvent->queueTimeStamp.CPUTimeinNS, mockEvent->submitTimeStamp.CPUTimeinNS); EXPECT_EQ(0u, mockEvent->submitTimeStamp.GPUTimeStamp); clReleaseEvent(event); } /* # One additional MI_STORE_REGISTER_MEM is expected before and after GPGPU_WALKER. */ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProflingWhenWalkerIsDispatchedThenMiStoreRegisterMemIsPresentInCS) { typedef typename FamilyType::MI_STORE_REGISTER_MEM MI_STORE_REGISTER_MEM; typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {1, 1, 1}; uint32_t dimensions = 1; cl_event event; static_cast *>(pCmdQ)->enqueueKernel( &kernel, dimensions, globalOffsets, workItems, nullptr, 0, nullptr, &event); parseCommands(*pCmdQ); // Find GPGPU_WALKER auto itorGPGPUWalkerCmd = find(cmdList.begin(), cmdList.end()); GenCmdList::reverse_iterator rItorGPGPUWalkerCmd(itorGPGPUWalkerCmd); ASSERT_NE(cmdList.end(), itorGPGPUWalkerCmd); // Check MI_STORE_REGISTER_MEMs auto itorBeforeMI = reverse_find(rItorGPGPUWalkerCmd, cmdList.rbegin()); ASSERT_NE(cmdList.rbegin(), itorBeforeMI); auto pBeforeMI = genCmdCast(*itorBeforeMI); pBeforeMI = genCmdCast(*itorBeforeMI); ASSERT_NE(nullptr, pBeforeMI); EXPECT_EQ(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, pBeforeMI->getRegisterAddress()); auto itorAfterMI = find(itorGPGPUWalkerCmd, cmdList.end()); ASSERT_NE(cmdList.end(), itorAfterMI); auto pAfterMI = genCmdCast(*itorAfterMI); ASSERT_NE(nullptr, pAfterMI); EXPECT_EQ(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, pAfterMI->getRegisterAddress()); ++itorAfterMI; pAfterMI = genCmdCast(*itorAfterMI); EXPECT_EQ(nullptr, pAfterMI); clReleaseEvent(event); } /* # Two additional PIPE_CONTROLs are expected before first MI_STORE_REGISTER_MEM (which is before GPGPU_WALKER) # and after second MI_STORE_REGISTER_MEM (which is after GPGPU_WALKER). # If queue is blocked commands should be added to event */ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueBlockedWithProfilingWhenWalkerIsDispatchedThenPipeControlWithTimeStampIsPresentInCS) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); kernel.incRefInternal(); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {1, 1, 1}; uint32_t dimensions = 1; cl_event event; cl_event ue = new UserEvent(); static_cast *>(pCmdQ)->enqueueKernel( &kernel, dimensions, globalOffsets, workItems, nullptr, 1, // one user event to block queue &ue, // user event not signaled &event); //rseCommands(*pCmdQ); ASSERT_NE(nullptr, pCmdQ->virtualEvent); ASSERT_NE(nullptr, pCmdQ->virtualEvent->peekCommand()); NEO::LinearStream *eventCommandStream = pCmdQ->virtualEvent->peekCommand()->getCommandStream(); ASSERT_NE(nullptr, eventCommandStream); parseCommands(*eventCommandStream); // Find GPGPU_WALKER auto itorGPGPUWalkerCmd = find(cmdList.begin(), cmdList.end()); GenCmdList::reverse_iterator rItorGPGPUWalkerCmd(itorGPGPUWalkerCmd); ASSERT_NE(cmdList.end(), itorGPGPUWalkerCmd); // Check PIPE_CONTROLs auto itorBeforePC = reverse_find(rItorGPGPUWalkerCmd, cmdList.rbegin()); ASSERT_NE(cmdList.rbegin(), itorBeforePC); auto pBeforePC = genCmdCast(*itorBeforePC); ASSERT_NE(nullptr, pBeforePC); auto itorAfterPC = find(itorGPGPUWalkerCmd, cmdList.end()); ASSERT_NE(cmdList.end(), itorAfterPC); auto pAfterPC = genCmdCast(*itorAfterPC); ASSERT_NE(nullptr, pAfterPC); EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, pBeforePC->getPostSyncOperation()); clReleaseEvent(event); ((UserEvent *)ue)->release(); pCmdQ->isQueueBlocked(); } /* # One additional MI_STORE_REGISTER_MEM is expected before and after GPGPU_WALKER. # If queue is blocked commands should be added to event */ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueBlockedWithProfilingWhenWalkerIsDispatchedThenMiStoreRegisterMemIsPresentInCS) { typedef typename FamilyType::MI_STORE_REGISTER_MEM MI_STORE_REGISTER_MEM; typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); kernel.incRefInternal(); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {1, 1, 1}; uint32_t dimensions = 1; cl_event event; cl_event ue = new UserEvent(); static_cast *>(pCmdQ)->enqueueKernel( &kernel, dimensions, globalOffsets, workItems, nullptr, 1, // one user event to block queue &ue, // user event not signaled &event); // parseCommands(*pCmdQ); ASSERT_NE(nullptr, pCmdQ->virtualEvent); ASSERT_NE(nullptr, pCmdQ->virtualEvent->peekCommand()); NEO::LinearStream *eventCommandStream = pCmdQ->virtualEvent->peekCommand()->getCommandStream(); ASSERT_NE(nullptr, eventCommandStream); parseCommands(*eventCommandStream); // Find GPGPU_WALKER auto itorGPGPUWalkerCmd = find(cmdList.begin(), cmdList.end()); GenCmdList::reverse_iterator rItorGPGPUWalkerCmd(itorGPGPUWalkerCmd); ASSERT_NE(cmdList.end(), itorGPGPUWalkerCmd); // Check MI_STORE_REGISTER_MEMs auto itorBeforeMI = reverse_find(rItorGPGPUWalkerCmd, cmdList.rbegin()); ASSERT_NE(cmdList.rbegin(), itorBeforeMI); auto pBeforeMI = genCmdCast(*itorBeforeMI); pBeforeMI = genCmdCast(*itorBeforeMI); ASSERT_NE(nullptr, pBeforeMI); EXPECT_EQ(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, pBeforeMI->getRegisterAddress()); auto itorAfterMI = find(itorGPGPUWalkerCmd, cmdList.end()); ASSERT_NE(cmdList.end(), itorAfterMI); auto pAfterMI = genCmdCast(*itorAfterMI); ASSERT_NE(nullptr, pAfterMI); EXPECT_EQ(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, pAfterMI->getRegisterAddress()); ++itorAfterMI; EXPECT_EQ(itorAfterMI, cmdList.end()); clReleaseEvent(event); ((UserEvent *)ue)->release(); pCmdQ->isQueueBlocked(); } HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProflingWhenMarkerIsDispatchedThenPipeControlIsPresentInCS) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; cl_event event; static_cast *>(pCmdQ)->enqueueMarkerWithWaitList( 0, nullptr, &event); parseCommands(*pCmdQ); // Check PIPE_CONTROLs auto itorFirstPC = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itorFirstPC); auto pFirstPC = genCmdCast(*itorFirstPC); ASSERT_NE(nullptr, pFirstPC); auto itorSecondPC = find(itorFirstPC, cmdList.end()); ASSERT_NE(cmdList.end(), itorSecondPC); auto pSecondPC = genCmdCast(*itorSecondPC); ASSERT_NE(nullptr, pSecondPC); EXPECT_TRUE(static_cast *>(event)->calcProfilingData()); clReleaseEvent(event); } HWTEST_F(ProfilingTests, givenNonKernelEnqueueWhenNonBlockedEnqueueThenSetCpuPath) { cl_event event; pCmdQ->enqueueBarrierWithWaitList(0, nullptr, &event); auto eventObj = static_cast(event); EXPECT_TRUE(eventObj->isCPUProfilingPath() == CL_TRUE); pCmdQ->finish(); uint64_t queued, submit, start, end; cl_int retVal; retVal = eventObj->getEventProfilingInfo(CL_PROFILING_COMMAND_QUEUED, sizeof(uint64_t), &queued, 0); EXPECT_EQ(CL_SUCCESS, retVal); retVal = eventObj->getEventProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, sizeof(uint64_t), &submit, 0); EXPECT_EQ(CL_SUCCESS, retVal); retVal = eventObj->getEventProfilingInfo(CL_PROFILING_COMMAND_START, sizeof(uint64_t), &start, 0); EXPECT_EQ(CL_SUCCESS, retVal); retVal = eventObj->getEventProfilingInfo(CL_PROFILING_COMMAND_END, sizeof(uint64_t), &end, 0); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_LT(0u, queued); EXPECT_LT(queued, submit); EXPECT_LT(submit, start); EXPECT_LT(start, end); eventObj->release(); } HWTEST_F(ProfilingTests, givenMarkerEnqueueWhenNonBlockedEnqueueThenSetGpuPath) { cl_event event; pCmdQ->enqueueMarkerWithWaitList(0, nullptr, &event); auto eventObj = static_cast(event); EXPECT_TRUE(eventObj->isCPUProfilingPath() == CL_FALSE); pCmdQ->finish(); uint64_t queued, submit; cl_int retVal; retVal = eventObj->getEventProfilingInfo(CL_PROFILING_COMMAND_QUEUED, sizeof(uint64_t), &queued, 0); EXPECT_EQ(CL_SUCCESS, retVal); retVal = eventObj->getEventProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, sizeof(uint64_t), &submit, 0); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_LT(0u, queued); EXPECT_LT(queued, submit); eventObj->release(); } HWTEST_F(ProfilingTests, givenMarkerEnqueueWhenBlockedEnqueueThenSetGpuPath) { cl_event event = nullptr; cl_event userEvent = new UserEvent(); pCmdQ->enqueueMarkerWithWaitList(1, &userEvent, &event); auto eventObj = static_cast(event); EXPECT_FALSE(eventObj->isCPUProfilingPath()); auto userEventObj = static_cast(userEvent); pCmdQ->flush(); userEventObj->setStatus(CL_COMPLETE); Event::waitForEvents(1, &event); uint64_t queued = 0u, submit = 0u; cl_int retVal; retVal = eventObj->getEventProfilingInfo(CL_PROFILING_COMMAND_QUEUED, sizeof(uint64_t), &queued, 0); EXPECT_EQ(CL_SUCCESS, retVal); retVal = eventObj->getEventProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, sizeof(uint64_t), &submit, 0); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_LT(0u, queued); EXPECT_LT(queued, submit); eventObj->release(); userEventObj->release(); } HWTEST_F(ProfilingTests, givenMarkerEnqueueWhenBlockedEnqueueThenPipeControlsArePresentInCS) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; cl_event event = nullptr; cl_event userEvent = new UserEvent(); static_cast *>(pCmdQ)->enqueueMarkerWithWaitList(1, &userEvent, &event); auto eventObj = static_cast(event); EXPECT_FALSE(eventObj->isCPUProfilingPath()); auto userEventObj = static_cast(userEvent); pCmdQ->flush(); userEventObj->setStatus(CL_COMPLETE); Event::waitForEvents(1, &event); parseCommands(*pCmdQ); // Check PIPE_CONTROLs auto itorFirstPC = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itorFirstPC); auto pFirstPC = genCmdCast(*itorFirstPC); ASSERT_NE(nullptr, pFirstPC); auto itorSecondPC = find(itorFirstPC, cmdList.end()); ASSERT_NE(cmdList.end(), itorSecondPC); auto pSecondPC = genCmdCast(*itorSecondPC); ASSERT_NE(nullptr, pSecondPC); EXPECT_TRUE(static_cast *>(event)->calcProfilingData()); eventObj->release(); userEventObj->release(); pCmdQ->isQueueBlocked(); } template struct MockTagNode : public TagNode { public: using TagNode::tagForCpuAccess; using TagNode::gfxAllocation; MockTagNode() { gfxAllocation = nullptr; tagForCpuAccess = nullptr; } }; class MyOSDeviceTime : public DeviceTime { double getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const override { EXPECT_FALSE(true); return 1.0; } uint64_t getDynamicDeviceTimerClock(HardwareInfo const &hwInfo) const override { EXPECT_FALSE(true); return 0; } bool getCpuGpuTime(TimeStampData *pGpuCpuTime, OSTime *) override { EXPECT_FALSE(true); return false; } }; class MyOSTime : public OSTime { public: static int instanceNum; MyOSTime() { instanceNum++; this->deviceTime = std::make_unique(); } bool getCpuTime(uint64_t *timeStamp) override { EXPECT_FALSE(true); return false; }; double getHostTimerResolution() const override { EXPECT_FALSE(true); return 0; } uint64_t getCpuRawTimestamp() override { EXPECT_FALSE(true); return 0; } }; int MyOSTime::instanceNum = 0; using EventProfilingTest = ProfilingTests; HWCMDTEST_F(IGFX_GEN8_CORE, EventProfilingTest, givenEventWhenCompleteIsZeroThenCalcProfilingDataSetsEndTimestampInCompleteTimestampAndDoesntCallOsTimeMethods) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MyOSTime::instanceNum = 0; device->setOSTime(new MyOSTime()); EXPECT_EQ(1, MyOSTime::instanceNum); MockContext context; cl_command_queue_properties props[5] = {0, 0, 0, 0, 0}; MockCommandQueue cmdQ(&context, device.get(), props, false); cmdQ.setProfilingEnabled(); cmdQ.device = device.get(); HwTimeStamps timestamp; timestamp.GlobalStartTS = 10; timestamp.ContextStartTS = 20; timestamp.GlobalEndTS = 80; timestamp.ContextEndTS = 56; timestamp.GlobalCompleteTS = 0; timestamp.ContextCompleteTS = 0; MockTagNode timestampNode; timestampNode.tagForCpuAccess = ×tamp; MockEvent event(&cmdQ, CL_COMPLETE, 0, 0); event.setCPUProfilingPath(false); event.timeStampNode = ×tampNode; event.calcProfilingData(); EXPECT_EQ(timestamp.ContextEndTS, timestamp.ContextCompleteTS); cmdQ.device = nullptr; event.timeStampNode = nullptr; } using EventProfilingTests = ProfilingTests; HWCMDTEST_F(IGFX_GEN8_CORE, EventProfilingTests, givenRawTimestampsDebugModeWhenDataIsQueriedThenRawDataIsReturned) { DebugManagerStateRestore stateRestore; DebugManager.flags.ReturnRawGpuTimestamps.set(1); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MyOSTime::instanceNum = 0; device->setOSTime(new MyOSTime()); EXPECT_EQ(1, MyOSTime::instanceNum); MockContext context; cl_command_queue_properties props[5] = {0, 0, 0, 0, 0}; MockCommandQueue cmdQ(&context, device.get(), props, false); cmdQ.setProfilingEnabled(); cmdQ.device = device.get(); HwTimeStamps timestamp; timestamp.GlobalStartTS = 10; timestamp.ContextStartTS = 20; timestamp.GlobalEndTS = 80; timestamp.ContextEndTS = 56; timestamp.GlobalCompleteTS = 0; timestamp.ContextCompleteTS = 70; MockTagNode timestampNode; timestampNode.tagForCpuAccess = ×tamp; MockEvent event(&cmdQ, CL_COMPLETE, 0, 0); cl_event clEvent = &event; event.queueTimeStamp.CPUTimeinNS = 1; event.queueTimeStamp.GPUTimeStamp = 2; event.submitTimeStamp.CPUTimeinNS = 3; event.submitTimeStamp.GPUTimeStamp = 4; event.setCPUProfilingPath(false); event.timeStampNode = ×tampNode; event.calcProfilingData(); cl_ulong queued, submited, start, end, complete; clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), &queued, nullptr); clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &submited, nullptr); clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, nullptr); clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, nullptr); clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_COMPLETE, sizeof(cl_ulong), &complete, nullptr); EXPECT_EQ(timestamp.ContextCompleteTS, complete); EXPECT_EQ(timestamp.ContextEndTS, end); EXPECT_EQ(timestamp.ContextStartTS, start); EXPECT_EQ(event.submitTimeStamp.GPUTimeStamp, submited); EXPECT_EQ(event.queueTimeStamp.GPUTimeStamp, queued); event.timeStampNode = nullptr; } HWCMDTEST_F(IGFX_GEN8_CORE, EventProfilingTest, givenRawTimestampsDebugModeWhenStartTimeStampLTQueueTimeStampThenIncreaseStartTimeStamp) { DebugManagerStateRestore stateRestore; DebugManager.flags.ReturnRawGpuTimestamps.set(1); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MyOSTime::instanceNum = 0; device->setOSTime(new MyOSTime()); EXPECT_EQ(1, MyOSTime::instanceNum); MockContext context(device.get()); MockCommandQueue cmdQ(&context, device.get(), nullptr, false); cmdQ.setProfilingEnabled(); cmdQ.device = device.get(); HwTimeStamps timestamp; timestamp.GlobalStartTS = 0; timestamp.ContextStartTS = 20; timestamp.GlobalEndTS = 80; timestamp.ContextEndTS = 56; timestamp.GlobalCompleteTS = 0; timestamp.ContextCompleteTS = 70; MockTagNode timestampNode; timestampNode.tagForCpuAccess = ×tamp; MockEvent event(&cmdQ, CL_COMPLETE, 0, 0); cl_event clEvent = &event; event.queueTimeStamp.CPUTimeinNS = 83; event.queueTimeStamp.GPUTimeStamp = 1; event.setCPUProfilingPath(false); event.timeStampNode = ×tampNode; event.calcProfilingData(); cl_ulong queued, start; clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), &queued, nullptr); clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, nullptr); EXPECT_LT(queued, start); event.timeStampNode = nullptr; } struct ProfilingWithPerfCountersTests : public PerformanceCountersFixture, ::testing::Test { void SetUp() override { SetUp(defaultHwInfo.get()); } void SetUp(const NEO::HardwareInfo *hardwareInfo) { PerformanceCountersFixture::SetUp(); createPerfCounters(); HardwareInfo hwInfo = *hardwareInfo; if (hwInfo.capabilityTable.defaultEngineType == aub_stream::EngineType::ENGINE_CCS) { hwInfo.featureTable.flags.ftrCCSNode = true; } pDevice = MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0); pClDevice = std::make_unique(*pDevice, nullptr); pDevice->setPerfCounters(performanceCountersBase.release()); context = std::make_unique(pClDevice.get()); cl_int retVal = CL_SUCCESS; cl_queue_properties properties[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0}; pCmdQ.reset(CommandQueue::create(context.get(), pClDevice.get(), properties, false, retVal)); kernel = std::make_unique(*pClDevice); } void TearDown() override { PerformanceCountersFixture::TearDown(); } template GenCmdList::iterator expectStoreRegister(const GenCmdList &cmdList, GenCmdList::iterator itor, uint64_t memoryAddress, uint32_t registerAddress) { using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; itor = find(itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); auto pStore = genCmdCast(*itor); EXPECT_EQ(memoryAddress, pStore->getMemoryAddress()); EXPECT_EQ(registerAddress, pStore->getRegisterAddress()); itor++; return itor; } MockDevice *pDevice = nullptr; std::unique_ptr pClDevice; std::unique_ptr context; std::unique_ptr pCmdQ; std::unique_ptr kernel; }; struct ProfilingWithPerfCountersOnCCSTests : ProfilingWithPerfCountersTests { void SetUp() override { auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_CCS; ProfilingWithPerfCountersTests::SetUp(&hwInfo); } void TearDown() override { ProfilingWithPerfCountersTests::TearDown(); } }; HWTEST_F(ProfilingWithPerfCountersTests, GivenCommandQueueWithProfilingPerfCounterAndForWorkloadWithNoKernelWhenGetCSFromCmdQueueThenEnoughSpaceInCS) { typedef typename FamilyType::MI_STORE_REGISTER_MEM MI_STORE_REGISTER_MEM; typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; typedef typename FamilyType::WALKER_TYPE GPGPU_WALKER; pCmdQ->setPerfCountersEnabled(); uint64_t requiredSize = 2 * sizeof(PIPE_CONTROL) + 4 * sizeof(MI_STORE_REGISTER_MEM); MultiDispatchInfo multiDispatchInfo(nullptr); auto &commandStreamMigrateMemObjects = getCommandStream(*pCmdQ, CsrDependencies(), true, true, false, multiDispatchInfo, nullptr, 0, false, false); auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_MIGRATE_MEM_OBJECTS, true, true, *pCmdQ, nullptr, {}); EXPECT_GE(expectedSizeCS, requiredSize); EXPECT_GE(commandStreamMigrateMemObjects.getAvailableSpace(), requiredSize); auto &commandStreamMarker = getCommandStream(*pCmdQ, CsrDependencies(), true, true, false, multiDispatchInfo, nullptr, 0, false, false); expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_MARKER, true, true, *pCmdQ, nullptr, {}); EXPECT_GE(expectedSizeCS, requiredSize); EXPECT_GE(commandStreamMarker.getAvailableSpace(), requiredSize); } HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingWithPerfCountersTests, GivenCommandQueueWithProfilingPerfCountersWhenWalkerIsDispatchedThenPipeControlWithTimeStampIsPresentInCS) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; typedef typename FamilyType::MI_REPORT_PERF_COUNT MI_REPORT_PERF_COUNT; pCmdQ->setPerfCountersEnabled(); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {1, 1, 1}; uint32_t dimensions = 1; cl_event event; static_cast *>(pCmdQ.get())->enqueueKernel(kernel->mockKernel, dimensions, globalOffsets, workItems, nullptr, 0, nullptr, &event); ClHardwareParse parse; auto &cmdList = parse.cmdList; parse.parseCommands(*pCmdQ); // expect MI_REPORT_PERF_COUNT before WALKER auto itorBeforeReportPerf = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itorBeforeReportPerf); // Find GPGPU_WALKER auto itorGPGPUWalkerCmd = find(itorBeforeReportPerf, cmdList.end()); GenCmdList::reverse_iterator rItorGPGPUWalkerCmd(itorGPGPUWalkerCmd); ASSERT_NE(cmdList.end(), itorGPGPUWalkerCmd); // Check PIPE_CONTROLs auto itorBeforePC = reverse_find(rItorGPGPUWalkerCmd, cmdList.rbegin()); ASSERT_NE(cmdList.rbegin(), itorBeforePC); auto pBeforePC = genCmdCast(*itorBeforePC); ASSERT_NE(nullptr, pBeforePC); EXPECT_EQ(1u, pBeforePC->getCommandStreamerStallEnable()); auto itorAfterPC = find(itorGPGPUWalkerCmd, cmdList.end()); ASSERT_NE(cmdList.end(), itorAfterPC); auto pAfterPC = genCmdCast(*itorAfterPC); ASSERT_NE(nullptr, pAfterPC); EXPECT_EQ(1u, pAfterPC->getCommandStreamerStallEnable()); EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, pBeforePC->getPostSyncOperation()); // expect MI_REPORT_PERF_COUNT after WALKER auto itorAfterReportPerf = find(itorGPGPUWalkerCmd, cmdList.end()); ASSERT_NE(cmdList.end(), itorAfterReportPerf); EXPECT_TRUE(static_cast *>(event)->calcProfilingData()); clReleaseEvent(event); } HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingWithPerfCountersTests, GivenCommandQueueWithProfilingPerfCountersNoUserRegistersWhenWalkerIsDispatchedThenPipeControlWithTimeStampIsPresentInCS) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; typedef typename FamilyType::MI_REPORT_PERF_COUNT MI_REPORT_PERF_COUNT; pCmdQ->setPerfCountersEnabled(); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {1, 1, 1}; uint32_t dimensions = 1; cl_event event; static_cast *>(pCmdQ.get())->enqueueKernel(kernel->mockKernel, dimensions, globalOffsets, workItems, nullptr, 0, nullptr, &event); ClHardwareParse parse; auto &cmdList = parse.cmdList; parse.parseCommands(*pCmdQ); // expect MI_REPORT_PERF_COUNT before WALKER auto itorBeforeReportPerf = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itorBeforeReportPerf); // Find GPGPU_WALKER auto itorGPGPUWalkerCmd = find(itorBeforeReportPerf, cmdList.end()); GenCmdList::reverse_iterator rItorGPGPUWalkerCmd(itorGPGPUWalkerCmd); ASSERT_NE(cmdList.end(), itorGPGPUWalkerCmd); // Check PIPE_CONTROLs auto itorBeforePC = reverse_find(rItorGPGPUWalkerCmd, cmdList.rbegin()); ASSERT_NE(cmdList.rbegin(), itorBeforePC); auto pBeforePC = genCmdCast(*itorBeforePC); ASSERT_NE(nullptr, pBeforePC); EXPECT_EQ(1u, pBeforePC->getCommandStreamerStallEnable()); auto itorAfterPC = find(itorGPGPUWalkerCmd, cmdList.end()); ASSERT_NE(cmdList.end(), itorAfterPC); auto pAfterPC = genCmdCast(*itorAfterPC); ASSERT_NE(nullptr, pAfterPC); EXPECT_EQ(1u, pAfterPC->getCommandStreamerStallEnable()); EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, pBeforePC->getPostSyncOperation()); // expect MI_REPORT_PERF_COUNT after WALKER auto itorAfterReportPerf = find(itorGPGPUWalkerCmd, cmdList.end()); ASSERT_NE(cmdList.end(), itorAfterReportPerf); EXPECT_TRUE(static_cast *>(event)->calcProfilingData()); clReleaseEvent(event); } HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingWithPerfCountersTests, GivenCommandQueueBlockedWithProflingPerfCounterWhenWalkerIsDispatchedThenPipeControlWithTimeStampIsPresentInCS) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; typedef typename FamilyType::MI_REPORT_PERF_COUNT MI_REPORT_PERF_COUNT; pCmdQ->setPerfCountersEnabled(); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {1, 1, 1}; uint32_t dimensions = 1; cl_event event; cl_event ue = new UserEvent(); static_cast *>(pCmdQ.get())->enqueueKernel(kernel->mockKernel, dimensions, globalOffsets, workItems, nullptr, 1, // one user event to block queue &ue, // user event not signaled &event); //rseCommands(*pCmdQ); ASSERT_NE(nullptr, pCmdQ->virtualEvent); ASSERT_NE(nullptr, pCmdQ->virtualEvent->peekCommand()); NEO::LinearStream *eventCommandStream = pCmdQ->virtualEvent->peekCommand()->getCommandStream(); ASSERT_NE(nullptr, eventCommandStream); HardwareParse parse; auto &cmdList = parse.cmdList; parse.parseCommands(*eventCommandStream); // expect MI_REPORT_PERF_COUNT before WALKER auto itorBeforeReportPerf = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itorBeforeReportPerf); // Find GPGPU_WALKER auto itorGPGPUWalkerCmd = find(itorBeforeReportPerf, cmdList.end()); GenCmdList::reverse_iterator rItorGPGPUWalkerCmd(itorGPGPUWalkerCmd); ASSERT_NE(cmdList.end(), itorGPGPUWalkerCmd); // Check PIPE_CONTROLs auto itorBeforePC = reverse_find(rItorGPGPUWalkerCmd, cmdList.rbegin()); ASSERT_NE(cmdList.rbegin(), itorBeforePC); auto pBeforePC = genCmdCast(*itorBeforePC); ASSERT_NE(nullptr, pBeforePC); auto itorAfterPC = find(itorGPGPUWalkerCmd, cmdList.end()); ASSERT_NE(cmdList.end(), itorAfterPC); auto pAfterPC = genCmdCast(*itorAfterPC); ASSERT_NE(nullptr, pAfterPC); EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, pBeforePC->getPostSyncOperation()); // expect MI_REPORT_PERF_COUNT after WALKER auto itorAfterReportPerf = find(itorGPGPUWalkerCmd, cmdList.end()); ASSERT_NE(cmdList.end(), itorAfterReportPerf); clReleaseEvent(event); ((UserEvent *)ue)->release(); pCmdQ->isQueueBlocked(); } HWTEST_F(ProfilingWithPerfCountersTests, GivenCommandQueueWithProfilingPerfCountersNoEventWhenWalkerIsDispatchedThenPipeControlWithTimeStampIsNotPresentInCS) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; typedef typename FamilyType::WALKER_TYPE GPGPU_WALKER; typedef typename FamilyType::MI_REPORT_PERF_COUNT MI_REPORT_PERF_COUNT; pCmdQ->setPerfCountersEnabled(); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {1, 1, 1}; uint32_t dimensions = 1; static_cast *>(pCmdQ.get())->enqueueKernel(kernel->mockKernel, dimensions, globalOffsets, workItems, nullptr, 0, nullptr, nullptr); ClHardwareParse parse; auto &cmdList = parse.cmdList; parse.parseCommands(*pCmdQ); // expect no MI_REPORT_PERF_COUNT before WALKER auto itorBeforeReportPerf = find(cmdList.begin(), cmdList.end()); ASSERT_EQ(cmdList.end(), itorBeforeReportPerf); // Find GPGPU_WALKER auto itorGPGPUWalkerCmd = find(cmdList.begin(), cmdList.end()); GenCmdList::reverse_iterator rItorGPGPUWalkerCmd(itorGPGPUWalkerCmd); ASSERT_NE(cmdList.end(), itorGPGPUWalkerCmd); // Check PIPE_CONTROLs auto itorBeforePC = reverse_find(rItorGPGPUWalkerCmd, cmdList.rbegin()); ASSERT_NE(cmdList.rbegin(), itorBeforePC); auto pBeforePC = genCmdCast(*itorBeforePC); ASSERT_NE(nullptr, pBeforePC); EXPECT_EQ(1u, pBeforePC->getCommandStreamerStallEnable()); auto itorAfterPC = find(itorGPGPUWalkerCmd, cmdList.end()); ASSERT_NE(cmdList.end(), itorAfterPC); auto pAfterPC = genCmdCast(*itorAfterPC); ASSERT_NE(nullptr, pAfterPC); EXPECT_EQ(1u, pAfterPC->getCommandStreamerStallEnable()); EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_NO_WRITE, pBeforePC->getPostSyncOperation()); // expect MI_REPORT_PERF_COUNT after WALKER auto itorAfterReportPerf = find(itorGPGPUWalkerCmd, cmdList.end()); ASSERT_EQ(cmdList.end(), itorAfterReportPerf); } template struct FixedGpuAddressTagAllocator : MockTagAllocator { using TagAllocator::usedTags; using TagAllocator::deferredTags; struct MockTagNode : TagNode { void setGpuAddress(uint64_t value) { this->gpuAddress = value; } }; FixedGpuAddressTagAllocator(CommandStreamReceiver &csr, uint64_t gpuAddress) : MockTagAllocator(csr.getRootDeviceIndex(), csr.getMemoryManager(), csr.getPreferredTagPoolSize(), MemoryConstants::cacheLineSize, sizeof(TagType), false, csr.getOsContext().getDeviceBitfield()) { auto tag = reinterpret_cast(this->freeTags.peekHead()); tag->setGpuAddress(gpuAddress); } }; HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingWithPerfCountersTests, GivenCommandQueueWithProfilingPerfCountersWhenWalkerIsDispatchedThenRegisterStoresArePresentInCS) { uint64_t timeStampGpuAddress = 0x123456000; uint64_t perfCountersGpuAddress = 0xabcdef000; auto &csr = pDevice->getUltCommandStreamReceiver(); csr.profilingTimeStampAllocator.reset(new FixedGpuAddressTagAllocator(csr, timeStampGpuAddress)); csr.perfCounterAllocator.reset(new FixedGpuAddressTagAllocator(csr, perfCountersGpuAddress)); pCmdQ->setPerfCountersEnabled(); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {1, 1, 1}; uint32_t dimensions = 1; cl_event event; static_cast *>(pCmdQ.get())->enqueueKernel(kernel->mockKernel, dimensions, globalOffsets, workItems, nullptr, 0, nullptr, &event); auto pEvent = static_cast *>(event); EXPECT_EQ(pEvent->getHwTimeStampNode()->getGpuAddress(), timeStampGpuAddress); EXPECT_EQ(pEvent->getHwPerfCounterNode()->getGpuAddress(), perfCountersGpuAddress); ClHardwareParse parse; auto &cmdList = parse.cmdList; parse.parseCommands(*pCmdQ); auto itor = expectStoreRegister(cmdList, cmdList.begin(), timeStampGpuAddress + offsetof(HwTimeStamps, ContextStartTS), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW); // after WALKER: itor = expectStoreRegister(cmdList, itor, timeStampGpuAddress + offsetof(HwTimeStamps, ContextEndTS), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW); EXPECT_TRUE(pEvent->calcProfilingData()); clReleaseEvent(event); } HWTEST_F(ProfilingWithPerfCountersTests, givenTimestampPacketsEnabledWhenEnqueueIsCalledThenDontAllocateHwTimeStamps) { auto &csr = pDevice->getUltCommandStreamReceiver(); csr.timestampPacketWriteEnabled = true; auto mockAllocator = new FixedGpuAddressTagAllocator(csr, 0x123); csr.profilingTimeStampAllocator.reset(mockAllocator); auto myCmdQ = std::make_unique>(pCmdQ->getContextPtr(), pClDevice.get(), nullptr); myCmdQ->setProfilingEnabled(); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {1, 1, 1}; cl_event event; myCmdQ->enqueueKernel(kernel->mockKernel, 1, globalOffsets, workItems, nullptr, 0, nullptr, &event); EXPECT_EQ(!!myCmdQ->getTimestampPacketContainer(), mockAllocator->usedTags.peekIsEmpty()); EXPECT_TRUE(mockAllocator->deferredTags.peekIsEmpty()); clReleaseEvent(event); } HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingWithPerfCountersOnCCSTests, givenCommandQueueBlockedWithProfilingPerfCountersWhenWalkerIsDispatchedThenPipeControlWithTimeStampIsPresentInCS) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; using MI_REPORT_PERF_COUNT = typename FamilyType::MI_REPORT_PERF_COUNT; pCmdQ->setPerfCountersEnabled(); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {1, 1, 1}; uint32_t dimensions = 1; cl_event event; cl_event userEvent = clCreateUserEvent(context.get(), nullptr); CommandQueueHw *cmdQHw = static_cast *>(pCmdQ.get()); cmdQHw->enqueueKernel(kernel->mockKernel, dimensions, globalOffsets, workItems, nullptr, 1, &userEvent, &event); ASSERT_NE(nullptr, pCmdQ->virtualEvent); ASSERT_NE(nullptr, pCmdQ->virtualEvent->peekCommand()); NEO::LinearStream *eventCommandStream = pCmdQ->virtualEvent->peekCommand()->getCommandStream(); ASSERT_NE(nullptr, eventCommandStream); HardwareParse parse; auto &cmdList = parse.cmdList; parse.parseCommands(*eventCommandStream); // expect MI_REPORT_PERF_COUNT before WALKER auto itorBeforeReportPerf = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itorBeforeReportPerf); // find GPGPU_WALKER auto itorGPGPUWalkerCmd = find(itorBeforeReportPerf, cmdList.end()); GenCmdList::reverse_iterator rItorGPGPUWalkerCmd(itorGPGPUWalkerCmd); ASSERT_NE(cmdList.end(), itorGPGPUWalkerCmd); // check PIPE_CONTROLs auto itorBeforePC = reverse_find(rItorGPGPUWalkerCmd, cmdList.rbegin()); ASSERT_NE(cmdList.rbegin(), itorBeforePC); auto pBeforePC = genCmdCast(*itorBeforePC); ASSERT_NE(nullptr, pBeforePC); auto itorAfterPC = find(itorGPGPUWalkerCmd, cmdList.end()); ASSERT_NE(cmdList.end(), itorAfterPC); auto pAfterPC = genCmdCast(*itorAfterPC); ASSERT_NE(nullptr, pAfterPC); EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, pBeforePC->getPostSyncOperation()); // expect MI_REPORT_PERF_COUNT after WALKER auto itorAfterReportPerf = find(itorGPGPUWalkerCmd, cmdList.end()); ASSERT_NE(cmdList.end(), itorAfterReportPerf); clReleaseEvent(event); clReleaseEvent(userEvent); } HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingWithPerfCountersOnCCSTests, givenCommandQueueWithProfilingPerfCountersWhenWalkerIsDispatchedThenPipeControlWithTimeStampIsPresentInCS) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; using MI_REPORT_PERF_COUNT = typename FamilyType::MI_REPORT_PERF_COUNT; pCmdQ->setPerfCountersEnabled(); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {1, 1, 1}; uint32_t dimensions = 1; cl_event event; CommandQueueHw *cmdQHw = static_cast *>(pCmdQ.get()); cmdQHw->enqueueKernel(kernel->mockKernel, dimensions, globalOffsets, workItems, nullptr, 0, nullptr, &event); ClHardwareParse parse; auto &cmdList = parse.cmdList; parse.parseCommands(*pCmdQ); // expect MI_REPORT_PERF_COUNT before WALKER auto itorBeforeReportPerf = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itorBeforeReportPerf); // find GPGPU_WALKER auto itorGPGPUWalkerCmd = find(itorBeforeReportPerf, cmdList.end()); GenCmdList::reverse_iterator rItorGPGPUWalkerCmd(itorGPGPUWalkerCmd); ASSERT_NE(cmdList.end(), itorGPGPUWalkerCmd); // check PIPE_CONTROLs auto itorBeforePC = reverse_find(rItorGPGPUWalkerCmd, cmdList.rbegin()); ASSERT_NE(cmdList.rbegin(), itorBeforePC); auto pBeforePC = genCmdCast(*itorBeforePC); ASSERT_NE(nullptr, pBeforePC); EXPECT_EQ(1u, pBeforePC->getCommandStreamerStallEnable()); auto itorAfterPC = find(itorGPGPUWalkerCmd, cmdList.end()); ASSERT_NE(cmdList.end(), itorAfterPC); auto pAfterPC = genCmdCast(*itorAfterPC); ASSERT_NE(nullptr, pAfterPC); EXPECT_EQ(1u, pAfterPC->getCommandStreamerStallEnable()); EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, pBeforePC->getPostSyncOperation()); // expect MI_REPORT_PERF_COUNT after WALKER auto itorAfterReportPerf = find(itorGPGPUWalkerCmd, cmdList.end()); ASSERT_NE(cmdList.end(), itorAfterReportPerf); EXPECT_TRUE(static_cast *>(event)->calcProfilingData()); clReleaseEvent(event); } struct MockTimestampContainer : public TimestampPacketContainer { ~MockTimestampContainer() override { for (const auto &node : timestampPacketNodes) { auto mockNode = static_cast> *>(node); delete mockNode->tagForCpuAccess; delete node; } timestampPacketNodes.clear(); } }; struct ProfilingTimestampPacketsTest : public ::testing::Test { void SetUp() override { DebugManager.flags.ReturnRawGpuTimestamps.set(true); cmdQ->setProfilingEnabled(); ev->timestampPacketContainer = std::make_unique(); } void addTimestampNode(uint32_t contextStart, uint32_t contextEnd, uint32_t globalStart, uint32_t globalEnd) { auto node = new MockTagNode>(); auto timestampPacketStorage = new TimestampPackets(); node->tagForCpuAccess = timestampPacketStorage; uint32_t values[4] = {contextStart, globalStart, contextEnd, globalEnd}; timestampPacketStorage->assignDataToAllTimestamps(0, values); ev->timestampPacketContainer->add(node); } void addTimestampNodeMultiOsContext(uint32_t globalStart[16], uint32_t globalEnd[16], uint32_t contextStart[16], uint32_t contextEnd[16], uint32_t size) { auto node = new MockTagNode>(); auto timestampPacketStorage = new TimestampPackets(); node->setPacketsUsed(size); for (uint32_t i = 0u; i < node->getPacketsUsed(); ++i) { uint32_t values[4] = {contextStart[i], globalStart[i], contextEnd[i], globalEnd[i]}; timestampPacketStorage->assignDataToAllTimestamps(i, values); } node->tagForCpuAccess = timestampPacketStorage; ev->timestampPacketContainer->add(node); } void initTimestampNodeMultiOsContextData(uint32_t globalStart[16], uint32_t globalEnd[16], uint32_t size) { for (uint32_t i = 0u; i < size; ++i) { globalStart[i] = 100; } globalStart[5] = {50}; for (uint32_t i = 0u; i < size; ++i) { globalEnd[i] = 200; } globalEnd[7] = {350}; } DebugManagerStateRestore restorer; MockContext context; cl_command_queue_properties props[5] = {0, 0, 0, 0, 0}; ReleaseableObjectPtr cmdQ = clUniquePtr(new MockCommandQueue(&context, context.getDevice(0), props, false)); ReleaseableObjectPtr> ev = clUniquePtr(new MockEvent(cmdQ.get(), CL_COMMAND_USER, CompletionStamp::notReady, CompletionStamp::notReady)); }; TEST_F(ProfilingTimestampPacketsTest, givenTimestampsPacketContainerWithOneElementAndTimestampNodeWhenCalculatingProfilingThenTimesAreTakenFromPacket) { addTimestampNode(10, 11, 12, 13); HwTimeStamps hwTimestamps; hwTimestamps.ContextStartTS = 100; hwTimestamps.ContextEndTS = 110; hwTimestamps.GlobalStartTS = 120; MockTagNode hwTimestampsNode; hwTimestampsNode.tagForCpuAccess = &hwTimestamps; ev->timeStampNode = &hwTimestampsNode; ev->calcProfilingData(); EXPECT_EQ(12u, ev->getStartTimeStamp()); EXPECT_EQ(13u, ev->getEndTimeStamp()); EXPECT_EQ(12u, ev->getGlobalStartTimestamp()); ev->timeStampNode = nullptr; } TEST_F(ProfilingTimestampPacketsTest, givenMultiOsContextCapableSetToTrueWhenCalcProfilingDataIsCalledThenCorrectedValuesAreReturned) { uint32_t globalStart[16] = {0}; uint32_t globalEnd[16] = {0}; uint32_t contextStart[16] = {0}; uint32_t contextEnd[16] = {0}; initTimestampNodeMultiOsContextData(globalStart, globalEnd, 16u); addTimestampNodeMultiOsContext(globalStart, globalEnd, contextStart, contextEnd, 16u); auto &device = reinterpret_cast(cmdQ->getDevice()); auto &csr = device.getUltCommandStreamReceiver(); csr.multiOsContextCapable = true; ev->calcProfilingData(); EXPECT_EQ(50u, ev->getStartTimeStamp()); EXPECT_EQ(350u, ev->getEndTimeStamp()); } TEST_F(ProfilingTimestampPacketsTest, givenTimestampPacketWithoutProfilingDataWhenCalculatingThenDontUseThatPacket) { uint32_t globalStart0 = 20; uint32_t globalEnd0 = 51; uint32_t contextStart0 = 21; uint32_t contextEnd0 = 50; uint32_t globalStart1 = globalStart0 - 1; uint32_t globalEnd1 = globalEnd0 + 1; uint32_t contextStart1 = contextStart0 - 1; uint32_t contextEnd1 = contextEnd0 + 1; addTimestampNodeMultiOsContext(&globalStart0, &globalEnd0, &contextStart0, &contextEnd0, 1); addTimestampNodeMultiOsContext(&globalStart1, &globalEnd1, &contextStart1, &contextEnd1, 1); auto &device = reinterpret_cast(cmdQ->getDevice()); auto &csr = device.getUltCommandStreamReceiver(); csr.multiOsContextCapable = true; ev->timestampPacketContainer->peekNodes()[1]->setProfilingCapable(false); ev->calcProfilingData(); EXPECT_EQ(static_cast(globalStart0), ev->getStartTimeStamp()); EXPECT_EQ(static_cast(globalEnd0), ev->getEndTimeStamp()); } TEST_F(ProfilingTimestampPacketsTest, givenPrintTimestampPacketContentsSetWhenCalcProfilingDataThenTimeStampsArePrinted) { DebugManagerStateRestore restorer; DebugManager.flags.PrintTimestampPacketContents.set(true); testing::internal::CaptureStdout(); auto &device = reinterpret_cast(cmdQ->getDevice()); auto &csr = device.getUltCommandStreamReceiver(); csr.multiOsContextCapable = true; uint32_t globalStart[16] = {0}; uint32_t globalEnd[16] = {0}; uint32_t contextStart[16] = {0}; uint32_t contextEnd[16] = {0}; for (int i = 0; i < 16; i++) { globalStart[i] = 2 * i; globalEnd[i] = 500 * i; contextStart[i] = 7 * i; contextEnd[i] = 94 * i; } addTimestampNodeMultiOsContext(globalStart, globalEnd, contextStart, contextEnd, 16u); ev->calcProfilingData(); std::string output = testing::internal::GetCapturedStdout(); std::stringstream expected; expected << "Timestamp 0, cmd type: " << ev->getCommandType() << ", "; for (int i = 0; i < 16; i++) { expected << "packet " << i << ": " << "global start: " << globalStart[i] << ", " << "global end: " << globalEnd[i] << ", " << "context start: " << contextStart[i] << ", " << "context end: " << contextEnd[i] << ", " << "global delta: " << globalEnd[i] - globalStart[i] << ", " << "context delta: " << contextEnd[i] - contextStart[i] << std::endl; } EXPECT_EQ(0, output.compare(expected.str().c_str())); } TEST_F(ProfilingTimestampPacketsTest, givenTimestampsPacketContainerWithThreeElementsWhenCalculatingProfilingThenTimesAreTakenFromProperPacket) { addTimestampNode(10, 11, 12, 13); addTimestampNode(1, 21, 22, 13); addTimestampNode(5, 31, 2, 13); ev->calcProfilingData(); EXPECT_EQ(2u, ev->getStartTimeStamp()); EXPECT_EQ(13u, ev->getEndTimeStamp()); EXPECT_EQ(2u, ev->getGlobalStartTimestamp()); } TEST_F(ProfilingTimestampPacketsTest, givenTimestampsPacketContainerWithZeroElementsWhenCalculatingProfilingThenDataIsNotCalculated) { EXPECT_EQ(0u, ev->timestampPacketContainer->peekNodes().size()); ev->calcProfilingData(); EXPECT_FALSE(ev->getDataCalcStatus()); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/program/000077500000000000000000000000001422164147700232765ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/program/CMakeLists.txt000066400000000000000000000030231422164147700260340ustar00rootroot00000000000000# # Copyright (C) 2018-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_program ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/kernel_data.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_data_OCL2_0.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_info_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_info_from_patchtokens_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/printf_handler_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/printf_helper_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/process_debug_data_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/process_elf_binary_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/process_spir_binary_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/program_data_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/program_from_binary.h ${CMAKE_CURRENT_SOURCE_DIR}/program_nonuniform.cpp ${CMAKE_CURRENT_SOURCE_DIR}/program_spec_constants_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/program_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/program_tests.h ${CMAKE_CURRENT_SOURCE_DIR}/program_with_kernel_debug_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/program_with_source.h ${CMAKE_CURRENT_SOURCE_DIR}/program_with_zebin.h ${CMAKE_CURRENT_SOURCE_DIR}/program_with_zebin.cpp ${CMAKE_CURRENT_SOURCE_DIR}/program_with_zebin_tests.cpp ) get_property(NEO_CORE_SRCS_tests_program GLOBAL PROPERTY NEO_CORE_SRCS_tests_program) list(APPEND IGDRCL_SRCS_tests_program ${NEO_CORE_SRCS_tests_program} ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_program}) add_subdirectories() compute-runtime-22.14.22890/opencl/test/unit_test/program/evaluate_unhandled_token_tests.cpp000066400000000000000000000146701422164147700322640ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device_binary_format/patchtokens_decoder.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/test/unit_test/device_binary_format/patchtokens_tests.h" #include "opencl/source/program/create.inl" #include "opencl/source/program/program.h" #include "gtest/gtest.h" using namespace NEO; extern GFXCORE_FAMILY renderCoreFamily; template inline void PushBackToken(ContainerT &container, const TokenT &token) { container.insert(container.end(), reinterpret_cast(&token), reinterpret_cast(&token) + sizeof(token)); } struct MockProgramRecordUnhandledTokens : public Program { bool allowUnhandledTokens; mutable int lastUnhandledTokenFound; MockProgramRecordUnhandledTokens(ExecutionEnvironment &executionEnvironment) : Program(executionEnvironment) {} MockProgramRecordUnhandledTokens(ExecutionEnvironment &executionEnvironment, Context *context, bool isBuiltinKernel) : Program(executionEnvironment, context, isBuiltinKernel) {} bool isSafeToSkipUnhandledToken(unsigned int token) const override { lastUnhandledTokenFound = static_cast(token); return allowUnhandledTokens; } bool getDefaultIsSafeToSkipUnhandledToken() const { return Program::isSafeToSkipUnhandledToken(iOpenCL::NUM_PATCH_TOKENS); } }; inline cl_int GetDecodeErrorCode(const std::vector &binary, bool allowUnhandledTokens, int defaultUnhandledTokenId, int &foundUnhandledTokenId) { NEO::ExecutionEnvironment executionEnvironment; using PT = MockProgramRecordUnhandledTokens; std::unique_ptr prog; cl_int errorCode = CL_INVALID_BINARY; prog.reset(NEO::Program::createFromGenBinary(executionEnvironment, nullptr, binary.data(), binary.size(), false, &errorCode)); prog->allowUnhandledTokens = allowUnhandledTokens; prog->lastUnhandledTokenFound = defaultUnhandledTokenId; auto ret = prog->processGenBinary(); foundUnhandledTokenId = prog->lastUnhandledTokenFound; return ret; }; inline std::vector CreateBinary(bool addUnhandledProgramScopePatchToken, bool addUnhandledKernelScopePatchToken, int32_t unhandledTokenId = static_cast(iOpenCL::NUM_PATCH_TOKENS)) { std::vector ret; if (addUnhandledProgramScopePatchToken && addUnhandledKernelScopePatchToken) { return {}; } if (addUnhandledProgramScopePatchToken) { PatchTokensTestData::ValidProgramWithConstantSurface programWithUnhandledToken; iOpenCL::SPatchItemHeader &unhandledToken = *programWithUnhandledToken.constSurfMutable; unhandledToken.Size += programWithUnhandledToken.constSurfMutable->InlineDataSize; unhandledToken.Token = static_cast(unhandledTokenId); ret.assign(reinterpret_cast(programWithUnhandledToken.storage.data()), reinterpret_cast(programWithUnhandledToken.storage.data() + programWithUnhandledToken.storage.size())); } else if (addUnhandledKernelScopePatchToken) { PatchTokensTestData::ValidProgramWithKernelAndArg programWithKernelWithUnhandledToken; iOpenCL::SPatchItemHeader &unhandledToken = *programWithKernelWithUnhandledToken.arg0InfoMutable; unhandledToken.Token = static_cast(unhandledTokenId); programWithKernelWithUnhandledToken.recalcTokPtr(); ret.assign(reinterpret_cast(programWithKernelWithUnhandledToken.storage.data()), reinterpret_cast(programWithKernelWithUnhandledToken.storage.data() + programWithKernelWithUnhandledToken.storage.size())); } else { PatchTokensTestData::ValidProgramWithKernel regularProgramTokens; ret.assign(reinterpret_cast(regularProgramTokens.storage.data()), reinterpret_cast(regularProgramTokens.storage.data() + regularProgramTokens.storage.size())); } return ret; } constexpr int32_t unhandledTokenId = iOpenCL::NUM_PATCH_TOKENS; TEST(EvaluateUnhandledToken, GivenDefaultWhenSkippingUnhandledTokenThenUltAreNotAffected) { ExecutionEnvironment executionEnvironment; MockProgramRecordUnhandledTokens program(executionEnvironment); EXPECT_TRUE(program.getDefaultIsSafeToSkipUnhandledToken()); } TEST(EvaluateUnhandledToken, GivenAllTokensAreSupportedWhenDecodingProgramBinaryThenDecodingSucceeds) { int lastUnhandledTokenFound = -1; auto retVal = GetDecodeErrorCode(CreateBinary(false, false), false, -7, lastUnhandledTokenFound); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(-7, lastUnhandledTokenFound); } TEST(EvaluateUnhandledToken, GivenUnhandledTokenIsFoundAndIsSafeToSkipWhenDecodingProgramBinaryThenDecodingSucceeds) { int lastUnhandledTokenFound = -1; auto retVal = GetDecodeErrorCode(CreateBinary(true, false, unhandledTokenId), true, -7, lastUnhandledTokenFound); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(unhandledTokenId, lastUnhandledTokenFound); } TEST(EvaluateUnhandledToken, GivenUnhandledTokenIsFoundAndIsUnsafeToSkipWhenDecodingProgramBinaryThenDecodingFails) { int lastUnhandledTokenFound = -1; auto retVal = GetDecodeErrorCode(CreateBinary(true, false, unhandledTokenId), false, -7, lastUnhandledTokenFound); EXPECT_EQ(CL_INVALID_BINARY, retVal); EXPECT_EQ(unhandledTokenId, lastUnhandledTokenFound); } TEST(EvaluateUnhandledToken, GivenUnhandledTokenIsFoundAndIsSafeToSkipWhenDecodingKernelBinaryThenDecodingSucceeds) { int lastUnhandledTokenFound = -1; auto retVal = GetDecodeErrorCode(CreateBinary(false, true, unhandledTokenId), true, -7, lastUnhandledTokenFound); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(unhandledTokenId, lastUnhandledTokenFound); } TEST(EvaluateUnhandledToken, GivenUnhandledTokenIsFoundAndIsUnsafeToSkipWhenDecodingKernelBinaryThenDecodingFails) { int lastUnhandledTokenFound = -1; auto retVal = GetDecodeErrorCode(CreateBinary(false, true, unhandledTokenId), false, -7, lastUnhandledTokenFound); EXPECT_EQ(CL_INVALID_BINARY, retVal); EXPECT_EQ(unhandledTokenId, lastUnhandledTokenFound); } compute-runtime-22.14.22890/opencl/test/unit_test/program/kernel_data.cpp000066400000000000000000001620751422164147700262660ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device_binary_format/patchtokens_decoder.h" #include "shared/source/helpers/string.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "opencl/source/platform/platform.h" #include "opencl/source/program/program.h" #include "opencl/test/unit_test/fixtures/kernel_data_fixture.h" TEST_F(KernelDataTest, GivenKernelNameWhenBuildingThenProgramIsCorrect) { kernelName = "myTestKernel"; kernelNameSize = (uint32_t)alignUp(strlen(kernelName.c_str()) + 1, sizeof(uint32_t)); buildAndDecode(); } TEST_F(KernelDataTest, GivenHeapsWhenBuildingThenProgramIsCorrect) { char gshData[8] = "a"; char dshData[8] = "bb"; char sshData[8] = "ccc"; char kernelHeapData[8] = "dddd"; pGsh = gshData; pDsh = dshData; pSsh = sshData; pKernelHeap = kernelHeapData; gshSize = 4; dshSize = 4; sshSize = 4; kernelHeapSize = 4; buildAndDecode(); } TEST_F(KernelDataTest, GivenAllocateLocalSurfaceWhenBuildingThenProgramIsCorrect) { iOpenCL::SPatchAllocateLocalSurface allocateLocalSurface; allocateLocalSurface.Token = PATCH_TOKEN_ALLOCATE_LOCAL_SURFACE; allocateLocalSurface.Size = sizeof(SPatchAllocateLocalSurface); allocateLocalSurface.Offset = 0; // think this is SSH offset for local memory when we used to have surface state for local memory allocateLocalSurface.TotalInlineLocalMemorySize = 4; // 4 bytes of local memory just for test pPatchList = &allocateLocalSurface; patchListSize = allocateLocalSurface.Size; buildAndDecode(); EXPECT_EQ_VAL(allocateLocalSurface.TotalInlineLocalMemorySize, pKernelInfo->kernelDescriptor.kernelAttributes.slmInlineSize); } TEST_F(KernelDataTest, GivenAllocateStatelessConstantMemoryWithInitWhenBuildingThenProgramIsCorrect) { iOpenCL::SPatchAllocateStatelessConstantMemorySurfaceWithInitialization allocateStatelessConstantMemoryWithInit; allocateStatelessConstantMemoryWithInit.Token = PATCH_TOKEN_ALLOCATE_STATELESS_CONSTANT_MEMORY_SURFACE_WITH_INITIALIZATION; allocateStatelessConstantMemoryWithInit.Size = sizeof(SPatchAllocateStatelessConstantMemorySurfaceWithInitialization); allocateStatelessConstantMemoryWithInit.ConstantBufferIndex = 0; allocateStatelessConstantMemoryWithInit.SurfaceStateHeapOffset = 0xddu; pPatchList = &allocateStatelessConstantMemoryWithInit; patchListSize = allocateStatelessConstantMemoryWithInit.Size; buildAndDecode(); EXPECT_EQ_VAL(0xddu, pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.bindful); } TEST_F(KernelDataTest, GivenAllocateStatelessGlobalMemoryWithInitWhenBuildingThenProgramIsCorrect) { iOpenCL::SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization allocateStatelessGlobalMemoryWithInit; allocateStatelessGlobalMemoryWithInit.Token = PATCH_TOKEN_ALLOCATE_STATELESS_GLOBAL_MEMORY_SURFACE_WITH_INITIALIZATION; allocateStatelessGlobalMemoryWithInit.Size = sizeof(SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization); allocateStatelessGlobalMemoryWithInit.GlobalBufferIndex = 0; allocateStatelessGlobalMemoryWithInit.SurfaceStateHeapOffset = 0xddu; pPatchList = &allocateStatelessGlobalMemoryWithInit; patchListSize = allocateStatelessGlobalMemoryWithInit.Size; buildAndDecode(); EXPECT_EQ_VAL(0xddu, pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.bindful); } TEST_F(KernelDataTest, GivenPrintfStringWhenBuildingThenProgramIsCorrect) { char stringValue[] = "%d\n"; size_t strSize = strlen(stringValue) + 1; iOpenCL::SPatchString printfString; printfString.Token = PATCH_TOKEN_STRING; printfString.Size = static_cast(sizeof(SPatchString) + strSize); printfString.Index = 0; printfString.StringSize = static_cast(strSize); iOpenCL::SPatchString emptyString; emptyString.Token = PATCH_TOKEN_STRING; emptyString.Size = static_cast(sizeof(SPatchString)); emptyString.Index = 1; emptyString.StringSize = 0; cl_char *pPrintfString = new cl_char[printfString.Size + emptyString.Size]; memcpy_s(pPrintfString, sizeof(SPatchString), &printfString, sizeof(SPatchString)); memcpy_s((cl_char *)pPrintfString + sizeof(printfString), strSize, stringValue, strSize); memcpy_s((cl_char *)pPrintfString + printfString.Size, emptyString.Size, &emptyString, emptyString.Size); pPatchList = (void *)pPrintfString; patchListSize = printfString.Size + emptyString.Size; buildAndDecode(); EXPECT_EQ_VAL(0, strcmp(stringValue, pKernelInfo->kernelDescriptor.kernelMetadata.printfStringsMap.find(0)->second.c_str())); delete[] pPrintfString; } TEST_F(KernelDataTest, GivenMediaVfeStateWhenBuildingThenProgramIsCorrect) { iOpenCL::SPatchMediaVFEState MediaVFEState; MediaVFEState.Token = PATCH_TOKEN_MEDIA_VFE_STATE; MediaVFEState.Size = sizeof(SPatchMediaVFEState); MediaVFEState.PerThreadScratchSpace = 1; // lets say 1KB of perThreadScratchSpace MediaVFEState.ScratchSpaceOffset = 0; pPatchList = &MediaVFEState; patchListSize = MediaVFEState.Size; buildAndDecode(); EXPECT_EQ_VAL(MediaVFEState.PerThreadScratchSpace, pKernelInfo->kernelDescriptor.kernelAttributes.perThreadScratchSize[0]); } TEST_F(KernelDataTest, WhenMediaVfeStateSlot1TokenIsParsedThenCorrectValuesAreSet) { iOpenCL::SPatchMediaVFEState MediaVFEState; MediaVFEState.Token = PATCH_TOKEN_MEDIA_VFE_STATE_SLOT1; MediaVFEState.Size = sizeof(SPatchMediaVFEState); MediaVFEState.PerThreadScratchSpace = 1; MediaVFEState.ScratchSpaceOffset = 0; pPatchList = &MediaVFEState; patchListSize = MediaVFEState.Size; buildAndDecode(); EXPECT_EQ_VAL(MediaVFEState.PerThreadScratchSpace, pKernelInfo->kernelDescriptor.kernelAttributes.perThreadScratchSize[1]); } TEST_F(KernelDataTest, GivenSyncBufferTokenWhenParsingProgramThenTokenIsFound) { SPatchAllocateSyncBuffer token; token.Token = PATCH_TOKEN_ALLOCATE_SYNC_BUFFER; token.Size = static_cast(sizeof(SPatchAllocateSyncBuffer)); token.SurfaceStateHeapOffset = 32; token.DataParamOffset = 1024; token.DataParamSize = 2; pPatchList = &token; patchListSize = token.Size; buildAndDecode(); EXPECT_TRUE(pKernelInfo->kernelDescriptor.kernelAttributes.flags.usesSyncBuffer); EXPECT_EQ(token.SurfaceStateHeapOffset, pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.syncBufferAddress.bindful); EXPECT_EQ(token.DataParamOffset, pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.syncBufferAddress.stateless); EXPECT_EQ(token.DataParamSize, pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.syncBufferAddress.pointerSize); } TEST_F(KernelDataTest, GivenSamplerArgumentWhenBuildingThenProgramIsCorrect) { iOpenCL::SPatchSamplerKernelArgument samplerData; samplerData.Token = PATCH_TOKEN_SAMPLER_KERNEL_ARGUMENT; samplerData.ArgumentNumber = 3; samplerData.Offset = 0x40; samplerData.Type = iOpenCL::SAMPLER_OBJECT_TEXTURE; samplerData.Size = sizeof(samplerData); pPatchList = &samplerData; patchListSize = samplerData.Size; buildAndDecode(); EXPECT_TRUE(pKernelInfo->getArgDescriptorAt(3).is()); EXPECT_EQ_VAL(samplerData.Offset, pKernelInfo->getArgDescriptorAt(3).as().bindful); } TEST_F(KernelDataTest, GivenAcceleratorArgumentWhenBuildingThenProgramIsCorrect) { iOpenCL::SPatchSamplerKernelArgument samplerData; samplerData.Token = PATCH_TOKEN_SAMPLER_KERNEL_ARGUMENT; samplerData.ArgumentNumber = 3; samplerData.Offset = 0x40; samplerData.Type = iOpenCL::SAMPLER_OBJECT_VME; samplerData.Size = sizeof(samplerData); pPatchList = &samplerData; patchListSize = samplerData.Size; buildAndDecode(); EXPECT_TRUE(pKernelInfo->getArgDescriptorAt(3).is()); EXPECT_TRUE(pKernelInfo->getArgDescriptorAt(3).getExtendedTypeInfo().isAccelerator); EXPECT_EQ_VAL(samplerData.Offset, pKernelInfo->getArgDescriptorAt(3).as().bindful); } TEST_F(KernelDataTest, GivenBindingTableStateWhenBuildingThenProgramIsCorrect) { iOpenCL::SPatchBindingTableState bindingTableState; bindingTableState.Token = PATCH_TOKEN_BINDING_TABLE_STATE; bindingTableState.Size = sizeof(SPatchBindingTableState); bindingTableState.Count = 0xaa; bindingTableState.Offset = 0xbb; bindingTableState.SurfaceStateOffset = 0xcc; pPatchList = &bindingTableState; patchListSize = bindingTableState.Size; buildAndDecode(); EXPECT_EQ_CONST(bindingTableState.Count, pKernelInfo->kernelDescriptor.payloadMappings.bindingTable.numEntries); EXPECT_EQ_CONST(bindingTableState.Offset, pKernelInfo->kernelDescriptor.payloadMappings.bindingTable.tableOffset); } TEST_F(KernelDataTest, GivenDataParameterStreamWhenBuildingThenProgramIsCorrect) { iOpenCL::SPatchDataParameterStream dataParameterStream; dataParameterStream.Token = PATCH_TOKEN_DATA_PARAMETER_STREAM; dataParameterStream.Size = sizeof(SPatchDataParameterStream); dataParameterStream.DataParameterStreamSize = 64; pPatchList = &dataParameterStream; patchListSize = dataParameterStream.Size; buildAndDecode(); EXPECT_EQ_CONST(dataParameterStream.DataParameterStreamSize, pKernelInfo->kernelDescriptor.kernelAttributes.crossThreadDataSize); } TEST_F(KernelDataTest, GivenExecutionEnvironmentNoReqdWorkGroupSizeWhenBuildingThenProgramIsCorrect) { iOpenCL::SPatchExecutionEnvironment executionEnvironment = {}; executionEnvironment.Token = PATCH_TOKEN_EXECUTION_ENVIRONMENT; executionEnvironment.Size = sizeof(SPatchExecutionEnvironment); executionEnvironment.RequiredWorkGroupSizeX = 0; executionEnvironment.RequiredWorkGroupSizeY = 0; executionEnvironment.RequiredWorkGroupSizeZ = 0; executionEnvironment.LargestCompiledSIMDSize = 32; executionEnvironment.CompiledSubGroupsNumber = 0xaa; executionEnvironment.HasBarriers = false; executionEnvironment.DisableMidThreadPreemption = true; executionEnvironment.MayAccessUndeclaredResource = false; executionEnvironment.UsesFencesForReadWriteImages = false; executionEnvironment.UsesStatelessSpillFill = false; executionEnvironment.IsCoherent = true; executionEnvironment.IsInitializer = false; executionEnvironment.IsFinalizer = false; executionEnvironment.SubgroupIndependentForwardProgressRequired = false; executionEnvironment.CompiledForGreaterThan4GBBuffers = false; executionEnvironment.IndirectStatelessCount = 0; pPatchList = &executionEnvironment; patchListSize = executionEnvironment.Size; buildAndDecode(); EXPECT_EQ_VAL(0, pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0]); EXPECT_EQ_VAL(0, pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1]); EXPECT_EQ_VAL(0, pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2]); EXPECT_FALSE(pKernelInfo->hasIndirectStatelessAccess); } TEST_F(KernelDataTest, GivenExecutionEnvironmentWhenBuildingThenProgramIsCorrect) { iOpenCL::SPatchExecutionEnvironment executionEnvironment = {}; executionEnvironment.Token = PATCH_TOKEN_EXECUTION_ENVIRONMENT; executionEnvironment.Size = sizeof(SPatchExecutionEnvironment); executionEnvironment.RequiredWorkGroupSizeX = 32; executionEnvironment.RequiredWorkGroupSizeY = 16; executionEnvironment.RequiredWorkGroupSizeZ = 8; executionEnvironment.LargestCompiledSIMDSize = 32; executionEnvironment.CompiledSubGroupsNumber = 0xaa; executionEnvironment.HasBarriers = false; executionEnvironment.DisableMidThreadPreemption = true; executionEnvironment.MayAccessUndeclaredResource = false; executionEnvironment.UsesFencesForReadWriteImages = false; executionEnvironment.UsesStatelessSpillFill = false; executionEnvironment.IsCoherent = true; executionEnvironment.IsInitializer = false; executionEnvironment.IsFinalizer = false; executionEnvironment.SubgroupIndependentForwardProgressRequired = false; executionEnvironment.CompiledForGreaterThan4GBBuffers = false; executionEnvironment.IndirectStatelessCount = 1; pPatchList = &executionEnvironment; patchListSize = executionEnvironment.Size; buildAndDecode(); EXPECT_EQ(32u, pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0]); EXPECT_EQ(16u, pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1]); EXPECT_EQ(8u, pKernelInfo->kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2]); EXPECT_TRUE(pKernelInfo->hasIndirectStatelessAccess); EXPECT_EQ(KernelDescriptor::BindfulAndStateless, pKernelInfo->kernelDescriptor.kernelAttributes.bufferAddressingMode); } TEST_F(KernelDataTest, GivenExecutionEnvironmentCompiledForGreaterThan4gbBuffersWhenBuildingThenProgramIsCorrect) { iOpenCL::SPatchExecutionEnvironment executionEnvironment = {}; executionEnvironment.Token = PATCH_TOKEN_EXECUTION_ENVIRONMENT; executionEnvironment.Size = sizeof(SPatchExecutionEnvironment); executionEnvironment.RequiredWorkGroupSizeX = 32; executionEnvironment.RequiredWorkGroupSizeY = 16; executionEnvironment.RequiredWorkGroupSizeZ = 8; executionEnvironment.LargestCompiledSIMDSize = 32; executionEnvironment.CompiledSubGroupsNumber = 0xaa; executionEnvironment.HasBarriers = false; executionEnvironment.DisableMidThreadPreemption = true; executionEnvironment.MayAccessUndeclaredResource = false; executionEnvironment.UsesFencesForReadWriteImages = false; executionEnvironment.UsesStatelessSpillFill = false; executionEnvironment.IsCoherent = true; executionEnvironment.IsInitializer = false; executionEnvironment.IsFinalizer = false; executionEnvironment.SubgroupIndependentForwardProgressRequired = false; executionEnvironment.CompiledForGreaterThan4GBBuffers = true; pPatchList = &executionEnvironment; patchListSize = executionEnvironment.Size; buildAndDecode(); EXPECT_EQ(KernelDescriptor::Stateless, pKernelInfo->kernelDescriptor.kernelAttributes.bufferAddressingMode); } TEST_F(KernelDataTest, WhenDecodingExecutionEnvironmentTokenThenWalkOrderIsForcedToXMajor) { iOpenCL::SPatchExecutionEnvironment executionEnvironment = {}; executionEnvironment.Token = PATCH_TOKEN_EXECUTION_ENVIRONMENT; executionEnvironment.Size = sizeof(SPatchExecutionEnvironment); pPatchList = &executionEnvironment; patchListSize = executionEnvironment.Size; buildAndDecode(); const uint8_t expectedWalkOrder[3] = {0, 1, 2}; const uint8_t expectedDimsIds[3] = {0, 1, 2}; EXPECT_EQ(0, memcmp(expectedWalkOrder, pKernelInfo->kernelDescriptor.kernelAttributes.workgroupWalkOrder, sizeof(expectedWalkOrder))); EXPECT_EQ(0, memcmp(expectedDimsIds, pKernelInfo->kernelDescriptor.kernelAttributes.workgroupDimensionsOrder, sizeof(expectedDimsIds))); EXPECT_FALSE(pKernelInfo->kernelDescriptor.kernelAttributes.flags.requiresWorkgroupWalkOrder); } TEST_F(KernelDataTest, whenWorkgroupOrderIsSpecifiedViaPatchTokenThenProperWorkGroupOrderIsParsed) { iOpenCL::SPatchExecutionEnvironment executionEnvironment = {}; executionEnvironment.Token = PATCH_TOKEN_EXECUTION_ENVIRONMENT; executionEnvironment.Size = sizeof(SPatchExecutionEnvironment); //dim0 : [0 : 1]; dim1 : [2 : 3]; dim2 : [4 : 5] executionEnvironment.WorkgroupWalkOrderDims = 1 | (2 << 2); pPatchList = &executionEnvironment; patchListSize = executionEnvironment.Size; buildAndDecode(); uint8_t expectedWalkOrder[3] = {1, 2, 0}; uint8_t expectedDimsIds[3] = {2, 0, 1}; EXPECT_EQ(0, memcmp(expectedWalkOrder, pKernelInfo->kernelDescriptor.kernelAttributes.workgroupWalkOrder, sizeof(expectedWalkOrder))); EXPECT_EQ(0, memcmp(expectedDimsIds, pKernelInfo->kernelDescriptor.kernelAttributes.workgroupDimensionsOrder, sizeof(expectedDimsIds))); EXPECT_TRUE(pKernelInfo->kernelDescriptor.kernelAttributes.flags.requiresWorkgroupWalkOrder); } TEST_F(KernelDataTest, whenWorkgroupOrderIsSpecifiedViaPatchToken2ThenProperWorkGroupOrderIsParsed) { iOpenCL::SPatchExecutionEnvironment executionEnvironment = {}; executionEnvironment.Token = PATCH_TOKEN_EXECUTION_ENVIRONMENT; executionEnvironment.Size = sizeof(SPatchExecutionEnvironment); //dim0 : [0 : 1]; dim1 : [2 : 3]; dim2 : [4 : 5] executionEnvironment.WorkgroupWalkOrderDims = 2 | (1 << 4); pPatchList = &executionEnvironment; patchListSize = executionEnvironment.Size; buildAndDecode(); uint8_t expectedWalkOrder[3] = {2, 0, 1}; uint8_t expectedDimsIds[3] = {1, 2, 0}; EXPECT_EQ(0, memcmp(expectedWalkOrder, pKernelInfo->kernelDescriptor.kernelAttributes.workgroupWalkOrder, sizeof(expectedWalkOrder))); EXPECT_EQ(0, memcmp(expectedDimsIds, pKernelInfo->kernelDescriptor.kernelAttributes.workgroupDimensionsOrder, sizeof(expectedDimsIds))); EXPECT_TRUE(pKernelInfo->kernelDescriptor.kernelAttributes.flags.requiresWorkgroupWalkOrder); } // Test all the different data parameters with the same "made up" data class DataParameterTest : public KernelDataTest, public testing::WithParamInterface {}; TEST_P(DataParameterTest, GivenTokenTypeWhenBuildingThenProgramIsCorrect) { SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = GetParam(); dataParameterToken.ArgumentNumber = 1; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; dataParameterToken.Offset = 0; dataParameterToken.SourceOffset = 8; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); if (DATA_PARAMETER_KERNEL_ARGUMENT == GetParam()) { ASSERT_NE(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); EXPECT_EQ(dataParameterToken.ArgumentNumber, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments[0].argNum); EXPECT_EQ(dataParameterToken.Offset, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments[0].byValueElement.offset); EXPECT_EQ(dataParameterToken.SourceOffset, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments[0].byValueElement.sourceOffset); EXPECT_EQ(dataParameterToken.DataSize, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments[0].byValueElement.size); } else { EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); } if (pKernelInfo->kernelDescriptor.payloadMappings.explicitArgs.size() > 0) { EXPECT_EQ(dataParameterToken.ArgumentNumber + 1, pKernelInfo->kernelDescriptor.payloadMappings.explicitArgs.size()); const auto &arg = pKernelInfo->getArgDescriptorAt(dataParameterToken.ArgumentNumber); if (arg.is()) { const auto &argAsPtr = arg.as(); EXPECT_EQ(GetParam() == DATA_PARAMETER_BUFFER_STATEFUL, argAsPtr.isPureStateful()); } } } // note that we start at '2' because we test kernel arg tokens elsewhere INSTANTIATE_TEST_CASE_P(DataParameterTests, DataParameterTest, testing::Range(2u, static_cast(NUM_DATA_PARAMETER_TOKENS))); class KernelDataParameterTest : public KernelDataTest {}; TEST_F(KernelDataParameterTest, GivenDataParameterBufferOffsetWhenBuildingThenProgramIsCorrect) { SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_BUFFER_OFFSET; dataParameterToken.ArgumentNumber = 1; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; dataParameterToken.Offset = 128; dataParameterToken.SourceOffset = 8; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); ASSERT_EQ(2U, pKernelInfo->kernelDescriptor.payloadMappings.explicitArgs.size()); EXPECT_EQ_VAL(pKernelInfo->getArgDescriptorAt(1).as().bufferOffset, dataParameterToken.Offset) } TEST_F(KernelDataParameterTest, givenDataParameterBufferStatefulWhenDecodingThenSetArgAsPureStateful) { SPatchDataParameterBuffer dataParameterToken = {}; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_BUFFER_STATEFUL; dataParameterToken.ArgumentNumber = 1; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); ASSERT_EQ(2U, pKernelInfo->getExplicitArgs().size()); EXPECT_TRUE(pKernelInfo->getArgDescriptorAt(1).as().isPureStateful()); } TEST_F(KernelDataParameterTest, givenUnknownDataParameterWhenDecodedThenParameterIsIgnored) { SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = NUM_DATA_PARAMETER_TOKENS + 1; dataParameterToken.ArgumentNumber = 1; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; dataParameterToken.Offset = 0; dataParameterToken.SourceOffset = 8; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ_VAL(0u, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); } TEST_F(KernelDataTest, GivenDataParameterSumOfLocalMemoryObjectArgumentSizesWhenBuildingThenProgramIsCorrect) { uint32_t argumentNumber = 1; uint32_t alignment = 16; uint32_t offsetCrossThread = 4; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_SUM_OF_LOCAL_MEMORY_OBJECT_ARGUMENT_SIZES; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = offsetCrossThread; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = alignment; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); ASSERT_EQ(2U, pKernelInfo->getExplicitArgs().size()); const auto &argAsPtr = pKernelInfo->getArgDescriptorAt(argumentNumber).as(); EXPECT_EQ(alignment, argAsPtr.requiredSlmAlignment); ASSERT_EQ(offsetCrossThread, argAsPtr.slmOffset); } TEST_F(KernelDataTest, GivenDataParameterImageWidthWhenBuildingThenProgramIsCorrect) { uint32_t argumentNumber = 1; uint32_t alignment = 16; uint32_t offsetImgWidth = 4; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_IMAGE_WIDTH; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = offsetImgWidth; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = alignment; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); ASSERT_EQ(2U, pKernelInfo->getExplicitArgs().size()); EXPECT_EQ(offsetImgWidth, pKernelInfo->getArgDescriptorAt(argumentNumber).as().metadataPayload.imgWidth); } TEST_F(KernelDataTest, GivenDataParameterImageHeightWhenBuildingThenProgramIsCorrect) { uint32_t argumentNumber = 1; uint32_t alignment = 16; uint32_t offsetImgHeight = 8; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_IMAGE_HEIGHT; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = offsetImgHeight; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = alignment; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); ASSERT_EQ(2U, pKernelInfo->getExplicitArgs().size()); EXPECT_EQ(offsetImgHeight, pKernelInfo->getArgDescriptorAt(argumentNumber).as().metadataPayload.imgHeight); } TEST_F(KernelDataTest, GivenDataParameterImageDepthWhenBuildingThenProgramIsCorrect) { uint32_t argumentNumber = 1; uint32_t alignment = 16; uint32_t offsetImgDepth = 12; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_IMAGE_DEPTH; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = offsetImgDepth; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = alignment; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); ASSERT_EQ(2U, pKernelInfo->getExplicitArgs().size()); EXPECT_EQ(offsetImgDepth, pKernelInfo->getArgDescriptorAt(argumentNumber).as().metadataPayload.imgDepth); } TEST_F(KernelDataTest, GivenDataParameterImageNumSamplersWhenBuildingThenProgramIsCorrect) { uint32_t argumentNumber = 1; uint32_t alignment = 16; uint32_t offsetNumSamples = 60; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_IMAGE_NUM_SAMPLES; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = offsetNumSamples; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = alignment; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); ASSERT_EQ(2U, pKernelInfo->getExplicitArgs().size()); EXPECT_EQ(offsetNumSamples, pKernelInfo->getArgDescriptorAt(argumentNumber).as().metadataPayload.numSamples); } TEST_F(KernelDataTest, GivenDataParameterImageNumMipLevelsWhenBuildingThenProgramIsCorrect) { uint32_t argumentNumber = 1; uint32_t alignment = 16; uint32_t offsetNumMipLevels = 60; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_IMAGE_NUM_MIP_LEVELS; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = offsetNumMipLevels; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = alignment; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); ASSERT_EQ(2U, pKernelInfo->getExplicitArgs().size()); EXPECT_EQ(offsetNumMipLevels, pKernelInfo->getArgDescriptorAt(argumentNumber).as().metadataPayload.numMipLevels); } TEST_F(KernelDataTest, givenFlatImageDataParamTokenWhenDecodingThenSetAllOffsets) { uint32_t argumentNumber = 1; uint32_t alignment = 16; auto testToken = [&](iOpenCL::DATA_PARAMETER_TOKEN token, uint32_t offsetToken) { { // reset program if (pKernelData) { alignedFree(pKernelData); } program = std::make_unique(pContext, false, toClDeviceVector(*pContext->getDevice(0))); } SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = token; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = offsetToken; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = alignment; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); ASSERT_EQ(2U, pKernelInfo->getExplicitArgs().size()); }; testToken(iOpenCL::DATA_PARAMETER_TOKEN::DATA_PARAMETER_FLAT_IMAGE_BASEOFFSET, 10u); EXPECT_EQ(10u, pKernelInfo->getArgDescriptorAt(argumentNumber).as().metadataPayload.flatBaseOffset); testToken(iOpenCL::DATA_PARAMETER_TOKEN::DATA_PARAMETER_FLAT_IMAGE_WIDTH, 14u); EXPECT_EQ(14u, pKernelInfo->getArgDescriptorAt(argumentNumber).as().metadataPayload.flatWidth); testToken(iOpenCL::DATA_PARAMETER_TOKEN::DATA_PARAMETER_FLAT_IMAGE_HEIGHT, 16u); EXPECT_EQ(16u, pKernelInfo->getArgDescriptorAt(argumentNumber).as().metadataPayload.flatHeight); testToken(iOpenCL::DATA_PARAMETER_TOKEN::DATA_PARAMETER_FLAT_IMAGE_PITCH, 18u); EXPECT_EQ(18u, pKernelInfo->getArgDescriptorAt(argumentNumber).as().metadataPayload.flatPitch); } TEST_F(KernelDataTest, GivenDataParameterImageDataTypeWhenBuildingThenProgramIsCorrect) { uint32_t argumentNumber = 1; uint32_t alignment = 16; uint32_t offsetChannelDataType = 52; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_IMAGE_CHANNEL_DATA_TYPE; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = offsetChannelDataType; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = alignment; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); ASSERT_EQ(2U, pKernelInfo->getExplicitArgs().size()); EXPECT_EQ(offsetChannelDataType, pKernelInfo->getArgDescriptorAt(argumentNumber).as().metadataPayload.channelDataType); } TEST_F(KernelDataTest, GivenDataParameterImageChannelOrderWhenBuildingThenProgramIsCorrect) { uint32_t argumentNumber = 1; uint32_t alignment = 16; uint32_t offsetChannelOrder = 56; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_IMAGE_CHANNEL_ORDER; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = offsetChannelOrder; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = alignment; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); ASSERT_EQ(2U, pKernelInfo->getExplicitArgs().size()); EXPECT_EQ(offsetChannelOrder, pKernelInfo->getArgDescriptorAt(argumentNumber).as().metadataPayload.channelOrder); } TEST_F(KernelDataTest, GivenDataParameterImageArraySizeWhenBuildingThenProgramIsCorrect) { uint32_t argumentNumber = 1; uint32_t alignment = 16; uint32_t offsetImageArraySize = 60; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_IMAGE_ARRAY_SIZE; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = offsetImageArraySize; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = alignment; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); ASSERT_EQ(2U, pKernelInfo->getExplicitArgs().size()); EXPECT_EQ(offsetImageArraySize, pKernelInfo->getArgDescriptorAt(argumentNumber).as().metadataPayload.arraySize); } TEST_F(KernelDataTest, GivenDataParameterWorkDimensionsWhenBuildingThenProgramIsCorrect) { uint32_t argumentNumber = 1; uint32_t alignment = 16; uint32_t offsetWorkDim = 12; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_WORK_DIMENSIONS; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = offsetWorkDim; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = alignment; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); EXPECT_EQ(0U, pKernelInfo->getExplicitArgs().size()); EXPECT_EQ(offsetWorkDim, pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.workDim); } TEST_F(KernelDataTest, GivenDataParameterSimdSizeWhenBuildingThenProgramIsCorrect) { uint32_t argumentNumber = 17; uint32_t alignment = 16; uint32_t offsetSimdSize = 16; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_SIMD_SIZE; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = offsetSimdSize; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = alignment; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); EXPECT_EQ(0u, pKernelInfo->getExplicitArgs().size()); EXPECT_EQ(offsetSimdSize, pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.simdSize); } TEST_F(KernelDataTest, GivenParameterPrivateMemoryStatelessSizeWhenBuildingThenProgramIsCorrect) { uint32_t argumentNumber = 17; uint32_t alignment = 16; uint32_t offset = 16; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_PRIVATE_MEMORY_STATELESS_SIZE; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = offset; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = alignment; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); EXPECT_EQ(0u, pKernelInfo->getExplicitArgs().size()); } TEST_F(KernelDataTest, GivenDataParameterLocalMemoryStatelessWindowSizeWhenBuildingThenProgramIsCorrect) { uint32_t argumentNumber = 17; uint32_t alignment = 16; uint32_t offset = 16; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_LOCAL_MEMORY_STATELESS_WINDOW_SIZE; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = offset; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = alignment; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); EXPECT_EQ(0u, pKernelInfo->getExplicitArgs().size()); } TEST_F(KernelDataTest, GivenDataParameterLocalMemoryStatelessWindowStartAddressWhenBuildingThenProgramIsCorrect) { uint32_t argumentNumber = 17; uint32_t alignment = 16; uint32_t offset = 16; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_LOCAL_MEMORY_STATELESS_WINDOW_START_ADDRESS; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = offset; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = alignment; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); EXPECT_EQ(0u, pKernelInfo->getExplicitArgs().size()); } TEST_F(KernelDataTest, GivenDataParameterNumWorkGroupsWhenBuildingThenProgramIsCorrect) { uint32_t argumentNumber = 1; uint32_t alignment = 4; uint32_t offsetNumWorkGroups[3] = {0, 4, 8}; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_NUM_WORK_GROUPS; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = offsetNumWorkGroups[argumentNumber]; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = argumentNumber * alignment; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); EXPECT_EQ(0U, pKernelInfo->getExplicitArgs().size()); EXPECT_EQ(offsetNumWorkGroups[argumentNumber], pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.numWorkGroups[argumentNumber]); } TEST_F(KernelDataTest, GivenDataParameterMaxWorkgroupSizeWhenBuildingThenProgramIsCorrect) { uint32_t argumentNumber = 1; uint32_t alignment = 4; uint32_t offsetMaxWorkGroupSize = 4; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_MAX_WORKGROUP_SIZE; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = offsetMaxWorkGroupSize; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = alignment; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); EXPECT_EQ(0U, pKernelInfo->getExplicitArgs().size()); EXPECT_EQ(offsetMaxWorkGroupSize, pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.maxWorkGroupSize); } TEST_F(KernelDataTest, GivenDataParameterSamplerAddressModeWhenBuildingThenProgramIsCorrect) { uint32_t argumentNumber = 0; uint32_t dataOffset = 20; uint32_t dataSize = sizeof(uint32_t); SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_SAMPLER_ADDRESS_MODE; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = dataOffset; dataParameterToken.DataSize = dataSize; dataParameterToken.SourceOffset = 0; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); ASSERT_EQ(1U, pKernelInfo->getExplicitArgs().size()); EXPECT_EQ(dataOffset, pKernelInfo->getArgDescriptorAt(0).as().metadataPayload.samplerAddressingMode); } TEST_F(KernelDataTest, GivenDataParameterSamplerCoordinateSnapWaIsRequiredThenKernelInfoIsCorrect) { uint32_t argumentNumber = 1; uint32_t dataOffset = 20; uint32_t dataSize = sizeof(uint32_t); SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_SAMPLER_COORDINATE_SNAP_WA_REQUIRED; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = dataOffset; dataParameterToken.DataSize = dataSize; dataParameterToken.SourceOffset = 0; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); ASSERT_EQ(2U, pKernelInfo->getExplicitArgs().size()); EXPECT_EQ(dataOffset, pKernelInfo->getArgDescriptorAt(argumentNumber).as().metadataPayload.samplerSnapWa); } TEST_F(KernelDataTest, GivenDataParameterSamplerNormalizedCoordsThenKernelInfoIsCorrect) { uint32_t argumentNumber = 1; uint32_t dataOffset = 20; uint32_t dataSize = sizeof(uint32_t); SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_SAMPLER_NORMALIZED_COORDS; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = dataOffset; dataParameterToken.DataSize = dataSize; dataParameterToken.SourceOffset = 0; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); ASSERT_EQ(2U, pKernelInfo->getExplicitArgs().size()); EXPECT_EQ(dataOffset, pKernelInfo->getArgDescriptorAt(argumentNumber).as().metadataPayload.samplerNormalizedCoords); } TEST_F(KernelDataTest, GivenDataParameterKernelArgumentWhenBuildingThenProgramIsCorrect) { uint32_t argumentNumber = 0; uint32_t dataOffset = 20; uint32_t dataSize = sizeof(uint32_t); SPatchDataParameterBuffer dataParameterTokens[2]; dataParameterTokens[0].Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterTokens[0].Size = sizeof(SPatchDataParameterBuffer); dataParameterTokens[0].Type = DATA_PARAMETER_KERNEL_ARGUMENT; dataParameterTokens[0].ArgumentNumber = argumentNumber; dataParameterTokens[0].Offset = dataOffset + dataSize * 0; dataParameterTokens[0].DataSize = dataSize; dataParameterTokens[0].SourceOffset = 0; dataParameterTokens[0].LocationIndex = 0x0; dataParameterTokens[0].LocationIndex2 = 0x0; dataParameterTokens[1].Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterTokens[1].Size = sizeof(SPatchDataParameterBuffer); dataParameterTokens[1].Type = DATA_PARAMETER_KERNEL_ARGUMENT; dataParameterTokens[1].ArgumentNumber = argumentNumber; dataParameterTokens[1].Offset = dataOffset + dataSize * 1; dataParameterTokens[1].DataSize = dataSize; dataParameterTokens[1].SourceOffset = dataSize * 1; dataParameterTokens[1].LocationIndex = 0x0; dataParameterTokens[1].LocationIndex2 = 0x0; pPatchList = &dataParameterTokens[0]; patchListSize = dataParameterTokens[0].Size * (sizeof(dataParameterTokens) / sizeof(SPatchDataParameterBuffer)); buildAndDecode(); ASSERT_EQ(2U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); ASSERT_EQ(1u, pKernelInfo->getExplicitArgs().size()); auto &elements = pKernelInfo->getArgDescriptorAt(0).as().elements; ASSERT_EQ(2u, elements.size()); EXPECT_EQ(dataSize, elements[0].size); EXPECT_EQ(dataOffset + dataSize * 0, elements[0].offset); EXPECT_EQ(dataSize, elements[1].size); EXPECT_EQ(dataOffset + dataSize * 1, elements[1].offset); } TEST_F(KernelDataTest, GivenPatchTokenAllocateLocalSurfaceWhenBuildingThenProgramIsCorrect) { SPatchAllocateLocalSurface slmToken; slmToken.TotalInlineLocalMemorySize = 1024; slmToken.Size = sizeof(SPatchAllocateLocalSurface); slmToken.Token = PATCH_TOKEN_ALLOCATE_LOCAL_SURFACE; pPatchList = &slmToken; patchListSize = slmToken.Size; buildAndDecode(); EXPECT_EQ(1024u, pKernelInfo->kernelDescriptor.kernelAttributes.slmInlineSize); } TEST_F(KernelDataTest, GivenPatchTokenAllocateStatelessPrintfSurfaceWhenBuildingThenProgramIsCorrect) { SPatchAllocateStatelessPrintfSurface printfSurface; printfSurface.Token = PATCH_TOKEN_ALLOCATE_STATELESS_PRINTF_SURFACE; printfSurface.Size = static_cast(sizeof(SPatchAllocateStatelessPrintfSurface)); printfSurface.PrintfSurfaceIndex = 33; printfSurface.SurfaceStateHeapOffset = 0x1FF0; printfSurface.DataParamOffset = 0x3FF0; printfSurface.DataParamSize = 0xFF; pPatchList = &printfSurface; patchListSize = printfSurface.Size; buildAndDecode(); EXPECT_TRUE(pKernelInfo->kernelDescriptor.kernelAttributes.flags.usesPrintf); EXPECT_EQ(printfSurface.SurfaceStateHeapOffset, pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.printfSurfaceAddress.bindful); EXPECT_EQ(printfSurface.DataParamOffset, pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.printfSurfaceAddress.stateless); EXPECT_EQ(printfSurface.DataParamSize, pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.printfSurfaceAddress.pointerSize); } TEST_F(KernelDataTest, GivenPatchTokenSamplerStateArrayWhenBuildingThenProgramIsCorrect) { SPatchSamplerStateArray token; token.Token = PATCH_TOKEN_SAMPLER_STATE_ARRAY; token.Size = static_cast(sizeof(SPatchSamplerStateArray)); token.Offset = 33; token.Count = 0xF0; token.BorderColorOffset = 0x3FF0; pPatchList = &token; patchListSize = token.Size; buildAndDecode(); EXPECT_EQ_VAL(token.Offset, pKernelInfo->kernelDescriptor.payloadMappings.samplerTable.tableOffset); EXPECT_EQ_VAL(token.Count, pKernelInfo->kernelDescriptor.payloadMappings.samplerTable.numSamplers); EXPECT_EQ_VAL(token.BorderColorOffset, pKernelInfo->kernelDescriptor.payloadMappings.samplerTable.borderColor); } TEST_F(KernelDataTest, GivenPatchTokenAllocateStatelessPrivateMemoryWhenBuildingThenProgramIsCorrect) { SPatchAllocateStatelessPrivateSurface token; token.Token = PATCH_TOKEN_ALLOCATE_STATELESS_PRIVATE_MEMORY; token.Size = static_cast(sizeof(SPatchAllocateStatelessPrivateSurface)); token.SurfaceStateHeapOffset = 64; token.DataParamOffset = 40; token.DataParamSize = 8; token.PerThreadPrivateMemorySize = 112; pPatchList = &token; patchListSize = token.Size; buildAndDecode(); EXPECT_EQ_VAL(token.SurfaceStateHeapOffset, pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.privateMemoryAddress.bindful); EXPECT_EQ_VAL(token.DataParamOffset, pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.privateMemoryAddress.stateless); EXPECT_EQ_VAL(token.DataParamSize, pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.privateMemoryAddress.pointerSize); EXPECT_EQ_VAL(PatchTokenBinary::getPerHwThreadPrivateSurfaceSize(token, pKernelInfo->kernelDescriptor.kernelAttributes.simdSize), pKernelInfo->kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize); } TEST_F(KernelDataTest, GivenDataParameterVmeMbBlockTypeWhenBuildingThenProgramIsCorrect) { uint32_t argumentNumber = 1; uint32_t alignment = 16; uint32_t offsetVmeMbBlockType = 0xaa; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_VME_MB_BLOCK_TYPE; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = offsetVmeMbBlockType; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = alignment; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); ASSERT_EQ(2U, pKernelInfo->getExplicitArgs().size()); EXPECT_TRUE(pKernelInfo->getArgDescriptorAt(argumentNumber).getExtendedTypeInfo().hasVmeExtendedDescriptor); ASSERT_EQ(2U, pKernelInfo->kernelDescriptor.payloadMappings.explicitArgsExtendedDescriptors.size()); auto vmeArgDesc = reinterpret_cast(pKernelInfo->kernelDescriptor.payloadMappings.explicitArgsExtendedDescriptors[1].get()); EXPECT_EQ(offsetVmeMbBlockType, vmeArgDesc->mbBlockType); } TEST_F(KernelDataTest, GivenDataParameterDataVmeSubpixelModeWhenBuildingThenProgramIsCorrect) { uint32_t argumentNumber = 1; uint32_t alignment = 17; uint32_t offsetVmeSubpixelMode = 0xab; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_VME_SUBPIXEL_MODE; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = offsetVmeSubpixelMode; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = alignment; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); ASSERT_EQ(2U, pKernelInfo->getExplicitArgs().size()); EXPECT_TRUE(pKernelInfo->getArgDescriptorAt(argumentNumber).getExtendedTypeInfo().hasVmeExtendedDescriptor); ASSERT_EQ(2U, pKernelInfo->kernelDescriptor.payloadMappings.explicitArgsExtendedDescriptors.size()); auto vmeArgDesc = reinterpret_cast(pKernelInfo->kernelDescriptor.payloadMappings.explicitArgsExtendedDescriptors[1].get()); EXPECT_EQ(offsetVmeSubpixelMode, vmeArgDesc->subpixelMode); } TEST_F(KernelDataTest, GivenDataParameterVmeSadAdjustModeWhenBuildingThenProgramIsCorrect) { uint32_t argumentNumber = 1; uint32_t alignment = 18; uint32_t offsetVmeSadAdjustMode = 0xac; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_VME_SAD_ADJUST_MODE; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = offsetVmeSadAdjustMode; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = alignment; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); ASSERT_EQ(2U, pKernelInfo->getExplicitArgs().size()); EXPECT_TRUE(pKernelInfo->getArgDescriptorAt(argumentNumber).getExtendedTypeInfo().hasVmeExtendedDescriptor); ASSERT_EQ(2U, pKernelInfo->kernelDescriptor.payloadMappings.explicitArgsExtendedDescriptors.size()); auto vmeArgDesc = reinterpret_cast(pKernelInfo->kernelDescriptor.payloadMappings.explicitArgsExtendedDescriptors[1].get()); EXPECT_EQ(offsetVmeSadAdjustMode, vmeArgDesc->sadAdjustMode); } TEST_F(KernelDataTest, GivenDataParameterVmeSearchPathTypeWhenBuildingThenProgramIsCorrect) { uint32_t argumentNumber = 1; uint32_t alignment = 19; uint32_t offsetVmeSearchPathType = 0xad; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_VME_SEARCH_PATH_TYPE; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = offsetVmeSearchPathType; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = alignment; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); ASSERT_EQ(2U, pKernelInfo->getExplicitArgs().size()); EXPECT_TRUE(pKernelInfo->getArgDescriptorAt(argumentNumber).getExtendedTypeInfo().hasVmeExtendedDescriptor); ASSERT_EQ(2U, pKernelInfo->kernelDescriptor.payloadMappings.explicitArgsExtendedDescriptors.size()); auto vmeArgDesc = reinterpret_cast(pKernelInfo->kernelDescriptor.payloadMappings.explicitArgsExtendedDescriptors[1].get()); EXPECT_EQ(offsetVmeSearchPathType, vmeArgDesc->searchPathType); } TEST_F(KernelDataTest, GivenPatchTokenStateSipWhenBuildingThenProgramIsCorrect) { SPatchStateSIP token; token.Token = PATCH_TOKEN_STATE_SIP; token.Size = static_cast(sizeof(SPatchStateSIP)); token.SystemKernelOffset = 33; pPatchList = &token; patchListSize = token.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->kernelDescriptor.kernelMetadata.allByValueKernelArguments.size()); EXPECT_EQ(0U, pKernelInfo->getExplicitArgs().size()); EXPECT_EQ_VAL(token.SystemKernelOffset, pKernelInfo->systemKernelOffset); } TEST_F(KernelDataTest, givenSymbolTablePatchTokenThenLinkerInputIsCreated) { SPatchFunctionTableInfo token; token.Token = PATCH_TOKEN_PROGRAM_SYMBOL_TABLE; token.Size = static_cast(sizeof(SPatchFunctionTableInfo)); token.NumEntries = 0; pPatchList = &token; patchListSize = token.Size; buildAndDecode(); EXPECT_NE(nullptr, program->getLinkerInput(pContext->getDevice(0)->getRootDeviceIndex())); } TEST_F(KernelDataTest, givenRelocationTablePatchTokenThenLinkerInputIsCreated) { SPatchFunctionTableInfo token; token.Token = PATCH_TOKEN_PROGRAM_RELOCATION_TABLE; token.Size = static_cast(sizeof(SPatchFunctionTableInfo)); token.NumEntries = 0; pPatchList = &token; patchListSize = token.Size; buildAndDecode(); EXPECT_NE(nullptr, program->getLinkerInput(pContext->getDevice(0)->getRootDeviceIndex())); } compute-runtime-22.14.22890/opencl/test/unit_test/program/kernel_data_OCL2_0.cpp000066400000000000000000000150261422164147700272550ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/fixtures/kernel_data_fixture.h" #include "patch_g7.h" TEST_F(KernelDataTest, GIVENpatchTokenAllocateStatelessEventPoolSurfaceWHENdecodeTokensTHENtokenLocatedInPatchInfo) { iOpenCL::SPatchAllocateStatelessEventPoolSurface allocateStatelessEventPoolSurface; allocateStatelessEventPoolSurface.Token = PATCH_TOKEN_ALLOCATE_STATELESS_EVENT_POOL_SURFACE; allocateStatelessEventPoolSurface.Size = sizeof(SPatchAllocateStatelessEventPoolSurface); allocateStatelessEventPoolSurface.DataParamSize = 7; allocateStatelessEventPoolSurface.DataParamOffset = 0xABC; allocateStatelessEventPoolSurface.SurfaceStateHeapOffset = 0xDEF; pPatchList = &allocateStatelessEventPoolSurface; patchListSize = allocateStatelessEventPoolSurface.Size; buildAndDecode(); const auto &eventPoolArg = pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress; EXPECT_EQ_VAL(allocateStatelessEventPoolSurface.DataParamOffset, eventPoolArg.stateless); EXPECT_EQ_VAL(allocateStatelessEventPoolSurface.DataParamSize, eventPoolArg.pointerSize); EXPECT_EQ_VAL(allocateStatelessEventPoolSurface.SurfaceStateHeapOffset, eventPoolArg.bindful); } TEST_F(KernelDataTest, GIVENpatchTokenAllocateStatelessDefaultDeviceQueueSurfaceWHENdecodeTokensTHENtokenLocatedInPatchInfo) { iOpenCL::SPatchAllocateStatelessDefaultDeviceQueueSurface allocateStatelessDefaultDeviceQueueSurface; allocateStatelessDefaultDeviceQueueSurface.Token = PATCH_TOKEN_ALLOCATE_STATELESS_DEFAULT_DEVICE_QUEUE_SURFACE; allocateStatelessDefaultDeviceQueueSurface.Size = sizeof(SPatchAllocateStatelessDefaultDeviceQueueSurface); allocateStatelessDefaultDeviceQueueSurface.DataParamSize = 7; allocateStatelessDefaultDeviceQueueSurface.DataParamOffset = 0xABC; allocateStatelessDefaultDeviceQueueSurface.SurfaceStateHeapOffset = 0xDEF; pPatchList = &allocateStatelessDefaultDeviceQueueSurface; patchListSize = allocateStatelessDefaultDeviceQueueSurface.Size; buildAndDecode(); const auto &defaultQueueSurfaceAddress = pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress; EXPECT_EQ(allocateStatelessDefaultDeviceQueueSurface.DataParamOffset, defaultQueueSurfaceAddress.stateless); EXPECT_EQ(allocateStatelessDefaultDeviceQueueSurface.DataParamSize, defaultQueueSurfaceAddress.pointerSize); EXPECT_EQ(allocateStatelessDefaultDeviceQueueSurface.SurfaceStateHeapOffset, defaultQueueSurfaceAddress.bindful); } TEST_F(KernelDataTest, GIVENpatchTokenStatelessDeviceQueueKernelArgumentWHENdecodeTokensTHENapropriateKernelArgInfoFilled) { iOpenCL::SPatchStatelessDeviceQueueKernelArgument deviceQueueKernelArgument; deviceQueueKernelArgument.Token = PATCH_TOKEN_STATELESS_DEVICE_QUEUE_KERNEL_ARGUMENT; deviceQueueKernelArgument.Size = sizeof(SPatchStatelessDeviceQueueKernelArgument); deviceQueueKernelArgument.ArgumentNumber = 3; deviceQueueKernelArgument.DataParamSize = 7; deviceQueueKernelArgument.DataParamOffset = 0xABC; deviceQueueKernelArgument.SurfaceStateHeapOffset = 0xDEF; pPatchList = &deviceQueueKernelArgument; patchListSize = deviceQueueKernelArgument.Size; buildAndDecode(); ASSERT_GE(pKernelInfo->getExplicitArgs().size(), size_t(4u)); EXPECT_TRUE(pKernelInfo->getArgDescriptorAt(3).getExtendedTypeInfo().isDeviceQueue); const auto &argAsPtr = pKernelInfo->getArgDescriptorAt(3).as(); EXPECT_EQ(deviceQueueKernelArgument.DataParamOffset, argAsPtr.stateless); EXPECT_EQ(deviceQueueKernelArgument.DataParamSize, argAsPtr.pointerSize); EXPECT_EQ(deviceQueueKernelArgument.SurfaceStateHeapOffset, argAsPtr.bindful); } TEST_F(KernelDataTest, GIVENdataParameterParentEventWHENdecodeTokensTHENoffsetLocatedInWorkloadInfo) { const uint32_t offsetSimdSize = 0xABC; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_PARENT_EVENT; dataParameterToken.ArgumentNumber = 0; dataParameterToken.Offset = offsetSimdSize; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = 0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueParentEvent, offsetSimdSize); } TEST_F(KernelDataTest, GIVENdataParameterPreferredWorkgroupMultipleTokenWHENbinaryIsdecodedTHENcorrectOffsetIsAssigned) { const uint32_t offset = 0x100; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_PREFERRED_WORKGROUP_MULTIPLE; dataParameterToken.ArgumentNumber = 0; dataParameterToken.Offset = offset; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = 0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.preferredWkgMultiple, offset); } TEST_F(KernelDataTest, GIVENdataParameterObjectIdWHENdecodeTokensTHENoffsetLocatedInKernelArgInfo) { const uint32_t offsetObjectId = 0xABC; const uint32_t argNum = 7; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_OBJECT_ID; dataParameterToken.ArgumentNumber = argNum; dataParameterToken.Offset = offsetObjectId; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = 0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); ASSERT_GE(pKernelInfo->getExplicitArgs().size(), size_t(argNum + 1)); EXPECT_TRUE(pKernelInfo->getArgDescriptorAt(argNum).getExtendedTypeInfo().hasDeviceSideEnqueueExtendedDescriptor); auto deviceSideEnqueueDesc = reinterpret_cast(pKernelInfo->kernelDescriptor.payloadMappings.explicitArgsExtendedDescriptors[argNum].get()); EXPECT_EQ(offsetObjectId, deviceSideEnqueueDesc->objectId); } compute-runtime-22.14.22890/opencl/test/unit_test/program/kernel_info_from_patchtokens_tests.cpp000066400000000000000000000412201422164147700331440ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device_binary_format/patchtokens_decoder.h" #include "shared/source/program/kernel_info.h" #include "shared/source/program/kernel_info_from_patchtokens.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/device_binary_format/patchtokens_tests.h" #include "gtest/gtest.h" TEST(KernelInfoFromPatchTokens, GivenValidEmptyKernelFromPatchtokensThenReturnEmptyKernelInfo) { std::vector storage; auto src = PatchTokensTestData::ValidEmptyKernel::create(storage); NEO::KernelInfo dst = {}; NEO::populateKernelInfo(dst, src, 4); NEO::KernelInfo expectedKernelInfo = {}; expectedKernelInfo.kernelDescriptor.kernelMetadata.kernelName = std::string(src.name.begin()).c_str(); EXPECT_STREQ(expectedKernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str(), dst.kernelDescriptor.kernelMetadata.kernelName.c_str()); EXPECT_EQ(src.header->KernelHeapSize, dst.heapInfo.KernelHeapSize); EXPECT_EQ(src.header->GeneralStateHeapSize, dst.heapInfo.GeneralStateHeapSize); EXPECT_EQ(src.header->DynamicStateHeapSize, dst.heapInfo.DynamicStateHeapSize); EXPECT_EQ(src.header->SurfaceStateHeapSize, dst.heapInfo.SurfaceStateHeapSize); EXPECT_EQ(src.header->KernelUnpaddedSize, dst.heapInfo.KernelUnpaddedSize); } TEST(KernelInfoFromPatchTokens, GivenDataParameterStreamWithEmptySizeThenTokenIsIgnored) { std::vector storage; auto src = PatchTokensTestData::ValidEmptyKernel::create(storage); iOpenCL::SPatchDataParameterStream dataParameterStream = {}; src.tokens.dataParameterStream = &dataParameterStream; dataParameterStream.DataParameterStreamSize = 0U; NEO::KernelInfo dst; NEO::populateKernelInfo(dst, src, 4); EXPECT_EQ(nullptr, dst.crossThreadData); } TEST(KernelInfoFromPatchTokens, GivenDataParameterStreamWithNonEmptySizeThenCrossthreadDataIsAllocated) { std::vector storage; auto src = PatchTokensTestData::ValidEmptyKernel::create(storage); iOpenCL::SPatchDataParameterStream dataParameterStream = {}; src.tokens.dataParameterStream = &dataParameterStream; dataParameterStream.DataParameterStreamSize = 256U; NEO::KernelInfo dst; NEO::populateKernelInfo(dst, src, 4); EXPECT_NE(nullptr, dst.crossThreadData); } TEST(KernelInfoFromPatchTokens, GivenDataParameterStreamWhenTokensRequiringDeviceInfoPayloadConstantsArePresentThenCrossthreadDataIsProperlyPatched) { std::vector storage; auto src = PatchTokensTestData::ValidEmptyKernel::create(storage); iOpenCL::SPatchDataParameterStream dataParameterStream = {}; src.tokens.dataParameterStream = &dataParameterStream; dataParameterStream.DataParameterStreamSize = 256U; NEO::DeviceInfoKernelPayloadConstants deviceInfoConstants; deviceInfoConstants.computeUnitsUsedForScratch = 128U; deviceInfoConstants.maxWorkGroupSize = 64U; std::unique_ptr slm = std::make_unique(); deviceInfoConstants.slmWindow = slm.get(); deviceInfoConstants.slmWindowSize = 512U; iOpenCL::SPatchAllocateStatelessPrivateSurface privateSurface = {}; privateSurface.PerThreadPrivateMemorySize = 8U; privateSurface.IsSimtThread = 1; src.tokens.allocateStatelessPrivateSurface = &privateSurface; iOpenCL::SPatchDataParameterBuffer privateMemorySize = {}; privateMemorySize.Offset = 8U; src.tokens.crossThreadPayloadArgs.privateMemoryStatelessSize = &privateMemorySize; iOpenCL::SPatchDataParameterBuffer localMemoryWindowStartVA = {}; localMemoryWindowStartVA.Offset = 16U; src.tokens.crossThreadPayloadArgs.localMemoryStatelessWindowStartAddress = &localMemoryWindowStartVA; iOpenCL::SPatchDataParameterBuffer localMemoryWindowsSize = {}; localMemoryWindowsSize.Offset = 24U; src.tokens.crossThreadPayloadArgs.localMemoryStatelessWindowSize = &localMemoryWindowsSize; iOpenCL::SPatchDataParameterBuffer maxWorkgroupSize = {}; maxWorkgroupSize.Offset = 32U; src.tokens.crossThreadPayloadArgs.maxWorkGroupSize = &maxWorkgroupSize; NEO::KernelInfo dst; NEO::populateKernelInfo(dst, src, 4); ASSERT_NE(nullptr, dst.crossThreadData); dst.apply(deviceInfoConstants); uint32_t expectedPrivateMemorySize = privateSurface.PerThreadPrivateMemorySize * deviceInfoConstants.computeUnitsUsedForScratch * src.tokens.executionEnvironment->LargestCompiledSIMDSize; EXPECT_EQ(expectedPrivateMemorySize, *reinterpret_cast(dst.crossThreadData + privateMemorySize.Offset)); EXPECT_EQ(deviceInfoConstants.slmWindowSize, *reinterpret_cast(dst.crossThreadData + localMemoryWindowsSize.Offset)); EXPECT_EQ(deviceInfoConstants.maxWorkGroupSize, *reinterpret_cast(dst.crossThreadData + maxWorkgroupSize.Offset)); EXPECT_EQ(reinterpret_cast(deviceInfoConstants.slmWindow), *reinterpret_cast(dst.crossThreadData + localMemoryWindowStartVA.Offset)); } TEST(KernelInfoFromPatchTokens, givenIsSimtThreadNotSetWhenConfiguringThenDontUseSimdSizeForPrivateSizeCalculation) { std::vector storage; auto src = PatchTokensTestData::ValidEmptyKernel::create(storage); iOpenCL::SPatchDataParameterStream dataParameterStream = {}; src.tokens.dataParameterStream = &dataParameterStream; dataParameterStream.DataParameterStreamSize = 256U; NEO::DeviceInfoKernelPayloadConstants deviceInfoConstants; deviceInfoConstants.computeUnitsUsedForScratch = 128U; deviceInfoConstants.maxWorkGroupSize = 64U; std::unique_ptr slm = std::make_unique(); deviceInfoConstants.slmWindow = slm.get(); deviceInfoConstants.slmWindowSize = 512U; iOpenCL::SPatchAllocateStatelessPrivateSurface privateSurface = {}; privateSurface.PerThreadPrivateMemorySize = 8U; privateSurface.IsSimtThread = 0; src.tokens.allocateStatelessPrivateSurface = &privateSurface; iOpenCL::SPatchDataParameterBuffer privateMemorySize = {}; privateMemorySize.Offset = 8U; src.tokens.crossThreadPayloadArgs.privateMemoryStatelessSize = &privateMemorySize; iOpenCL::SPatchDataParameterBuffer localMemoryWindowStartVA = {}; localMemoryWindowStartVA.Offset = 16U; src.tokens.crossThreadPayloadArgs.localMemoryStatelessWindowStartAddress = &localMemoryWindowStartVA; iOpenCL::SPatchDataParameterBuffer localMemoryWindowsSize = {}; localMemoryWindowsSize.Offset = 24U; src.tokens.crossThreadPayloadArgs.localMemoryStatelessWindowSize = &localMemoryWindowsSize; iOpenCL::SPatchDataParameterBuffer maxWorkgroupSize = {}; maxWorkgroupSize.Offset = 32U; src.tokens.crossThreadPayloadArgs.maxWorkGroupSize = &maxWorkgroupSize; NEO::KernelInfo dst; NEO::populateKernelInfo(dst, src, 4); ASSERT_NE(nullptr, dst.crossThreadData); dst.apply(deviceInfoConstants); uint32_t expectedPrivateMemorySize = privateSurface.PerThreadPrivateMemorySize * deviceInfoConstants.computeUnitsUsedForScratch; EXPECT_EQ(expectedPrivateMemorySize, *reinterpret_cast(dst.crossThreadData + privateMemorySize.Offset)); EXPECT_EQ(deviceInfoConstants.slmWindowSize, *reinterpret_cast(dst.crossThreadData + localMemoryWindowsSize.Offset)); EXPECT_EQ(deviceInfoConstants.maxWorkGroupSize, *reinterpret_cast(dst.crossThreadData + maxWorkgroupSize.Offset)); EXPECT_EQ(reinterpret_cast(deviceInfoConstants.slmWindow), *reinterpret_cast(dst.crossThreadData + localMemoryWindowStartVA.Offset)); } TEST(KernelInfoFromPatchTokens, GivenDataParameterStreamWhenPrivateSurfaceIsNotAllocatedButPrivateSurfaceMemorySizePatchIsNeededThenPatchWithZero) { std::vector storage; auto src = PatchTokensTestData::ValidEmptyKernel::create(storage); iOpenCL::SPatchDataParameterStream dataParameterStream = {}; src.tokens.dataParameterStream = &dataParameterStream; dataParameterStream.DataParameterStreamSize = 256U; NEO::DeviceInfoKernelPayloadConstants deviceInfoConstants; deviceInfoConstants.computeUnitsUsedForScratch = 128U; deviceInfoConstants.maxWorkGroupSize = 64U; std::unique_ptr slm = std::make_unique(); deviceInfoConstants.slmWindow = slm.get(); deviceInfoConstants.slmWindowSize = 512U; iOpenCL::SPatchDataParameterBuffer privateMemorySize = {}; privateMemorySize.Offset = 8U; src.tokens.crossThreadPayloadArgs.privateMemoryStatelessSize = &privateMemorySize; NEO::KernelInfo dst; NEO::populateKernelInfo(dst, src, 4); ASSERT_NE(nullptr, dst.crossThreadData); dst.apply(deviceInfoConstants); uint32_t expectedPrivateMemorySize = 0U; EXPECT_EQ(expectedPrivateMemorySize, *reinterpret_cast(dst.crossThreadData + privateMemorySize.Offset)); } TEST(KernelInfoFromPatchTokens, GivenKernelWithGtpinInfoTokenThenKernelInfoIsProperlyPopulated) { std::vector storage; NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens = PatchTokensTestData::ValidEmptyKernel::create(storage); iOpenCL::SPatchItemHeader gtpinInfo = {}; gtpinInfo.Token = iOpenCL::PATCH_TOKEN_GTPIN_INFO; gtpinInfo.Size = sizeof(iOpenCL::SPatchItemHeader); kernelTokens.tokens.gtpinInfo = >pinInfo; NEO::KernelInfo kernelInfo = {}; NEO::populateKernelInfo(kernelInfo, kernelTokens, sizeof(void *)); EXPECT_NE(nullptr, kernelInfo.igcInfoForGtpin); } TEST(KernelInfoFromPatchTokens, GivenKernelWithGlobalObjectArgWhenAddressingModeIsBindlessThenBindlessOffsetIsSetProperly) { DebugManagerStateRestore restorer; DebugManager.flags.UseBindlessMode.set(1); std::vector storage; NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens = PatchTokensTestData::ValidEmptyKernel::create(storage); iOpenCL::SPatchGlobalMemoryObjectKernelArgument globalMemArg = {}; globalMemArg.Token = iOpenCL::PATCH_TOKEN_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT; globalMemArg.Size = sizeof(iOpenCL::SPatchGlobalMemoryObjectKernelArgument); globalMemArg.ArgumentNumber = 0; globalMemArg.Offset = 0x40; kernelTokens.tokens.kernelArgs.resize(1); kernelTokens.tokens.kernelArgs[0].objectArg = &globalMemArg; NEO::KernelInfo kernelInfo = {}; NEO::populateKernelInfo(kernelInfo, kernelTokens, sizeof(void *)); auto &argPointer = kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[0].as(true); EXPECT_TRUE(NEO::isValidOffset(argPointer.bindless)); EXPECT_FALSE(NEO::isValidOffset(argPointer.bindful)); } TEST(KernelInfoFromPatchTokens, GivenKernelWithGlobalObjectArgWhenAddressingModeIsBindfulThenBindlessOffsetIsSetProperly) { DebugManagerStateRestore restorer; DebugManager.flags.UseBindlessMode.set(0); std::vector storage; NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens = PatchTokensTestData::ValidEmptyKernel::create(storage); iOpenCL::SPatchGlobalMemoryObjectKernelArgument globalMemArg = {}; globalMemArg.Token = iOpenCL::PATCH_TOKEN_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT; globalMemArg.Size = sizeof(iOpenCL::SPatchGlobalMemoryObjectKernelArgument); globalMemArg.ArgumentNumber = 0; globalMemArg.Offset = 0x40; kernelTokens.tokens.kernelArgs.resize(1); kernelTokens.tokens.kernelArgs[0].objectArg = &globalMemArg; NEO::KernelInfo kernelInfo = {}; kernelInfo.kernelDescriptor.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindfulAndStateless; NEO::populateKernelInfo(kernelInfo, kernelTokens, sizeof(void *)); auto &argPointer = kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[0].as(true); EXPECT_FALSE(NEO::isValidOffset(argPointer.bindless)); EXPECT_TRUE(NEO::isValidOffset(argPointer.bindful)); } TEST(KernelInfoFromPatchTokens, GivenKernelWithImageObjectArgWhenAddressingModeIsBindlessThenBindlessOffsetIsSetProperly) { DebugManagerStateRestore restorer; DebugManager.flags.UseBindlessMode.set(1); std::vector storage; NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens = PatchTokensTestData::ValidEmptyKernel::create(storage); iOpenCL::SPatchImageMemoryObjectKernelArgument globalMemArg = {}; globalMemArg.Token = iOpenCL::PATCH_TOKEN_IMAGE_MEMORY_OBJECT_KERNEL_ARGUMENT; globalMemArg.Size = sizeof(iOpenCL::SPatchImageMemoryObjectKernelArgument); globalMemArg.ArgumentNumber = 0; globalMemArg.Offset = 0x40; kernelTokens.tokens.kernelArgs.resize(1); kernelTokens.tokens.kernelArgs[0].objectArg = &globalMemArg; NEO::KernelInfo kernelInfo = {}; NEO::populateKernelInfo(kernelInfo, kernelTokens, sizeof(void *)); auto &argPointer = kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[0].as(true); EXPECT_TRUE(NEO::isValidOffset(argPointer.bindless)); EXPECT_FALSE(NEO::isValidOffset(argPointer.bindful)); } TEST(KernelInfoFromPatchTokens, GivenKernelWithImageObjectArgWhenAddressingModeIsBindfulThenBindlessOffsetIsSetProperly) { DebugManagerStateRestore restorer; DebugManager.flags.UseBindlessMode.set(0); std::vector storage; NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens = PatchTokensTestData::ValidEmptyKernel::create(storage); iOpenCL::SPatchImageMemoryObjectKernelArgument globalMemArg = {}; globalMemArg.Token = iOpenCL::PATCH_TOKEN_IMAGE_MEMORY_OBJECT_KERNEL_ARGUMENT; globalMemArg.Size = sizeof(iOpenCL::SPatchImageMemoryObjectKernelArgument); globalMemArg.ArgumentNumber = 0; globalMemArg.Offset = 0x40; kernelTokens.tokens.kernelArgs.resize(1); kernelTokens.tokens.kernelArgs[0].objectArg = &globalMemArg; NEO::KernelInfo kernelInfo = {}; kernelInfo.kernelDescriptor.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindfulAndStateless; NEO::populateKernelInfo(kernelInfo, kernelTokens, sizeof(void *)); auto &argPointer = kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[0].as(true); EXPECT_FALSE(NEO::isValidOffset(argPointer.bindless)); EXPECT_TRUE(NEO::isValidOffset(argPointer.bindful)); } TEST(KernelInfoFromPatchTokens, GivenKernelWithStatelessObjectArgWhenAddressingModeIsBindlessThenBindlessOffsetIsSetProperly) { DebugManagerStateRestore restorer; DebugManager.flags.UseBindlessMode.set(1); std::vector storage; NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens = PatchTokensTestData::ValidEmptyKernel::create(storage); auto surfaceStateHeapOffset = 0x40; auto dataParamOffset = 0x32; iOpenCL::SPatchStatelessGlobalMemoryObjectKernelArgument globalMemArg = {}; globalMemArg.Token = iOpenCL::PATCH_TOKEN_STATELESS_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT; globalMemArg.Size = sizeof(iOpenCL::SPatchStatelessGlobalMemoryObjectKernelArgument); globalMemArg.ArgumentNumber = 0; globalMemArg.SurfaceStateHeapOffset = surfaceStateHeapOffset; globalMemArg.DataParamOffset = dataParamOffset; kernelTokens.tokens.kernelArgs.resize(1); kernelTokens.tokens.kernelArgs[0].objectArg = &globalMemArg; NEO::KernelInfo kernelInfo = {}; NEO::populateKernelInfo(kernelInfo, kernelTokens, sizeof(void *)); auto &argPointer = kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[0].as(true); EXPECT_TRUE(NEO::isValidOffset(argPointer.bindless)); EXPECT_TRUE(NEO::isValidOffset(argPointer.stateless)); EXPECT_FALSE(NEO::isValidOffset(argPointer.bindful)); EXPECT_EQ(argPointer.bindless, surfaceStateHeapOffset); EXPECT_EQ(argPointer.stateless, dataParamOffset); } TEST(KernelInfoFromPatchTokens, GivenKernelWithStatelessObjectArgWhenAddressingModeIsBindfulThenBindlessOffsetIsSetProperly) { DebugManagerStateRestore restorer; DebugManager.flags.UseBindlessMode.set(0); std::vector storage; NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens = PatchTokensTestData::ValidEmptyKernel::create(storage); iOpenCL::SPatchStatelessGlobalMemoryObjectKernelArgument globalMemArg = {}; globalMemArg.Token = iOpenCL::PATCH_TOKEN_STATELESS_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT; globalMemArg.Size = sizeof(iOpenCL::SPatchStatelessGlobalMemoryObjectKernelArgument); globalMemArg.ArgumentNumber = 0; globalMemArg.SurfaceStateHeapOffset = 0x40; kernelTokens.tokens.kernelArgs.resize(1); kernelTokens.tokens.kernelArgs[0].objectArg = &globalMemArg; NEO::KernelInfo kernelInfo = {}; kernelInfo.kernelDescriptor.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindfulAndStateless; NEO::populateKernelInfo(kernelInfo, kernelTokens, sizeof(void *)); auto &argPointer = kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[0].as(true); EXPECT_FALSE(NEO::isValidOffset(argPointer.bindless)); EXPECT_TRUE(NEO::isValidOffset(argPointer.bindful)); } compute-runtime-22.14.22890/opencl/test/unit_test/program/kernel_info_tests.cpp000066400000000000000000000157731422164147700275340ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/memory_manager/os_agnostic_memory_manager.h" #include "shared/source/program/kernel_info.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "gtest/gtest.h" #include #include using namespace NEO; TEST(KernelInfo, WhenKernelInfoIsCreatedThenItIsNotMoveableAndNotCopyable) { static_assert(false == std::is_move_constructible::value, ""); static_assert(false == std::is_copy_constructible::value, ""); static_assert(false == std::is_move_assignable::value, ""); static_assert(false == std::is_copy_assignable::value, ""); } TEST(KernelInfoTest, givenKernelInfoWhenCreateKernelAllocationThenCopyWholeKernelHeapToKernelAllocation) { KernelInfo kernelInfo; auto factory = UltDeviceFactory{1, 0}; auto device = factory.rootDevices[0]; const size_t heapSize = 0x40; char heap[heapSize]; kernelInfo.heapInfo.KernelHeapSize = heapSize; kernelInfo.heapInfo.pKernelHeap = &heap; for (size_t i = 0; i < heapSize; i++) { heap[i] = static_cast(i); } auto retVal = kernelInfo.createKernelAllocation(*device, false); EXPECT_TRUE(retVal); auto allocation = kernelInfo.kernelAllocation; EXPECT_EQ(0, memcmp(allocation->getUnderlyingBuffer(), heap, heapSize)); size_t isaPadding = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getPaddingForISAAllocation(); EXPECT_EQ(allocation->getUnderlyingBufferSize(), heapSize + isaPadding); device->getMemoryManager()->checkGpuUsageAndDestroyGraphicsAllocations(allocation); } TEST(KernelInfoTest, givenKernelInfoWhenCreatingKernelAllocationWithInternalIsaFalseTypeThenCorrectAllocationTypeIsUsed) { KernelInfo kernelInfo; auto factory = UltDeviceFactory{1, 0}; auto device = factory.rootDevices[0]; const size_t heapSize = 0x40; char heap[heapSize]; kernelInfo.heapInfo.KernelHeapSize = heapSize; kernelInfo.heapInfo.pKernelHeap = &heap; auto retVal = kernelInfo.createKernelAllocation(*device, false); EXPECT_TRUE(retVal); auto allocation = kernelInfo.kernelAllocation; EXPECT_EQ(AllocationType::KERNEL_ISA, allocation->getAllocationType()); device->getMemoryManager()->checkGpuUsageAndDestroyGraphicsAllocations(allocation); } TEST(KernelInfoTest, givenKernelInfoWhenCreatingKernelAllocationWithInternalIsaTrueTypeThenCorrectAllocationTypeIsUsed) { KernelInfo kernelInfo; auto factory = UltDeviceFactory{1, 0}; auto device = factory.rootDevices[0]; const size_t heapSize = 0x40; char heap[heapSize]; kernelInfo.heapInfo.KernelHeapSize = heapSize; kernelInfo.heapInfo.pKernelHeap = &heap; auto retVal = kernelInfo.createKernelAllocation(*device, true); EXPECT_TRUE(retVal); auto allocation = kernelInfo.kernelAllocation; EXPECT_EQ(AllocationType::KERNEL_ISA_INTERNAL, allocation->getAllocationType()); device->getMemoryManager()->checkGpuUsageAndDestroyGraphicsAllocations(allocation); } class MyMemoryManager : public OsAgnosticMemoryManager { public: using OsAgnosticMemoryManager::OsAgnosticMemoryManager; GraphicsAllocation *allocate32BitGraphicsMemoryImpl(const AllocationData &allocationData, bool useLocalMemory) override { return nullptr; } }; TEST(KernelInfoTest, givenKernelInfoWhenCreateKernelAllocationAndCannotAllocateMemoryThenReturnsFalse) { KernelInfo kernelInfo; auto executionEnvironment = new MockExecutionEnvironment(defaultHwInfo.get()); executionEnvironment->memoryManager.reset(new MyMemoryManager(*executionEnvironment)); if (executionEnvironment->memoryManager->isLimitedGPU(0)) { GTEST_SKIP(); } auto device = std::unique_ptr(Device::create(executionEnvironment, mockRootDeviceIndex)); auto retVal = kernelInfo.createKernelAllocation(*device, false); EXPECT_FALSE(retVal); } TEST(KernelInfoTest, givenReuseKernelBinariesWhenCreateKernelAllocationThenReuseAllocationFromMap) { DebugManagerStateRestore restorer; DebugManager.flags.ReuseKernelBinaries.set(1); auto factory = UltDeviceFactory{1, 0}; auto device = factory.rootDevices[0]; const size_t heapSize = 0x40; char heap[heapSize]; KernelInfo kernelInfo; kernelInfo.heapInfo.KernelHeapSize = heapSize; kernelInfo.heapInfo.pKernelHeap = &heap; KernelInfo kernelInfo2; kernelInfo2.heapInfo.KernelHeapSize = heapSize; kernelInfo2.heapInfo.pKernelHeap = &heap; EXPECT_EQ(0u, device->getMemoryManager()->getKernelAllocationMap().size()); auto retVal = kernelInfo.createKernelAllocation(*device, true); EXPECT_EQ(1u, device->getMemoryManager()->getKernelAllocationMap().size()); EXPECT_TRUE(retVal); retVal = kernelInfo2.createKernelAllocation(*device, true); EXPECT_EQ(1u, device->getMemoryManager()->getKernelAllocationMap().size()); EXPECT_TRUE(retVal); device->getMemoryManager()->checkGpuUsageAndDestroyGraphicsAllocations(kernelInfo.kernelAllocation); } using KernelInfoMultiRootDeviceTests = MultiRootDeviceFixture; TEST_F(KernelInfoMultiRootDeviceTests, WhenCreatingKernelAllocationThenItHasCorrectRootDeviceIndex) { KernelInfo kernelInfo; const size_t heapSize = 0x40; char heap[heapSize]; kernelInfo.heapInfo.KernelHeapSize = heapSize; kernelInfo.heapInfo.pKernelHeap = &heap; auto retVal = kernelInfo.createKernelAllocation(device1->getDevice(), false); EXPECT_TRUE(retVal); auto allocation = kernelInfo.kernelAllocation; ASSERT_NE(nullptr, allocation); EXPECT_EQ(expectedRootDeviceIndex, allocation->getRootDeviceIndex()); mockMemoryManager->checkGpuUsageAndDestroyGraphicsAllocations(allocation); } TEST(KernelInfo, whenGetKernelNamesStringIsCalledThenNamesAreProperlyConcatenated) { ExecutionEnvironment execEnv; KernelInfo kernel1 = {}; kernel1.kernelDescriptor.kernelMetadata.kernelName = "kern1"; KernelInfo kernel2 = {}; kernel2.kernelDescriptor.kernelMetadata.kernelName = "kern2"; std::vector kernelInfoArray; kernelInfoArray.push_back(&kernel1); kernelInfoArray.push_back(&kernel2); EXPECT_STREQ("kern1;kern2", concatenateKernelNames(kernelInfoArray).c_str()); } TEST(KernelInfo, givenNumbersOfSamplerWhenCheckSamplerStateCountAndSamplerStateArraySizeThenCorrectValueAreReturned) { KernelInfo kernel = {}; uint8_t numSamplers = 5u; kernel.kernelDescriptor.payloadMappings.samplerTable.numSamplers = numSamplers; auto samplerSize = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getSamplerStateSize(); EXPECT_EQ(kernel.getSamplerStateArrayCount(), numSamplers); EXPECT_EQ(kernel.getSamplerStateArraySize(*defaultHwInfo), static_cast(numSamplers * samplerSize)); } compute-runtime-22.14.22890/opencl/test/unit_test/program/printf_handler_tests.cpp000066400000000000000000000405011422164147700302230ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/wait_status.h" #include "shared/source/helpers/local_memory_access_modes.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/common/test_macros/test_checks_shared.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/event/event.h" #include "opencl/source/event/user_event.h" #include "opencl/source/program/printf_handler.h" #include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_mdi.h" #include "opencl/test/unit_test/mocks/mock_program.h" using namespace NEO; using PrintfHandlerTests = ::testing::Test; TEST_F(PrintfHandlerTests, givenPrintfHandlerWhenBeingConstructedThenStorePrintfSurfaceInitialDataSize) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); struct MockPrintfHandler : public PrintfHandler { using PrintfHandler::PrintfHandler; using PrintfHandler::printfSurfaceInitialDataSizePtr; MockPrintfHandler(ClDevice &device) : PrintfHandler(device) {} }; MockPrintfHandler printfHandler(*device); EXPECT_NE(nullptr, printfHandler.printfSurfaceInitialDataSizePtr); EXPECT_EQ(sizeof(uint32_t), *printfHandler.printfSurfaceInitialDataSizePtr); } TEST_F(PrintfHandlerTests, givenNotPreparedPrintfHandlerWhenGetSurfaceIsCalledThenResultIsNullptr) { MockClDevice *device = new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr)}; MockContext context; auto pKernelInfo = std::make_unique(); pKernelInfo->setPrintfSurface(sizeof(uintptr_t), 0); MockProgram *pProgram = new MockProgram(&context, false, toClDeviceVector(*device)); MockKernel *pKernel = new MockKernel(pProgram, *pKernelInfo, *device); MockMultiDispatchInfo multiDispatchInfo(device, pKernel); PrintfHandler *printfHandler = PrintfHandler::create(multiDispatchInfo, *device); EXPECT_EQ(nullptr, printfHandler->getSurface()); delete printfHandler; delete pKernel; delete pProgram; delete device; } TEST_F(PrintfHandlerTests, givenPreparedPrintfHandlerWithUndefinedSshOffsetWhenGetSurfaceIsCalledThenResultIsNotNullptr) { MockClDevice *device = new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr)}; MockContext context; auto pKernelInfo = std::make_unique(); pKernelInfo->setPrintfSurface(sizeof(uintptr_t), 0); MockProgram *pProgram = new MockProgram(&context, false, toClDeviceVector(*device)); uint64_t crossThread[10]; MockKernel *pKernel = new MockKernel(pProgram, *pKernelInfo, *device); pKernel->setCrossThreadData(&crossThread, sizeof(uint64_t) * 8); MockMultiDispatchInfo multiDispatchInfo(device, pKernel); PrintfHandler *printfHandler = PrintfHandler::create(multiDispatchInfo, *device); printfHandler->prepareDispatch(multiDispatchInfo); EXPECT_NE(nullptr, printfHandler->getSurface()); delete printfHandler; delete pKernel; delete pProgram; delete device; } TEST_F(PrintfHandlerTests, givenKernelWithImplicitArgsWhenPreparingPrintfHandlerThenProperAddressIsPatchedInImplicitArgsStruct) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockContext context(device.get()); auto pKernelInfo = std::make_unique(); pKernelInfo->setPrintfSurface(sizeof(uintptr_t), 0); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 16; pKernelInfo->kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs = true; MockProgram program{&context, false, toClDeviceVector(*device)}; uint64_t crossThread[10]; MockKernel kernel{&program, *pKernelInfo, *device}; kernel.setCrossThreadData(&crossThread, sizeof(uint64_t) * 10); kernel.initialize(); MockMultiDispatchInfo multiDispatchInfo(device.get(), &kernel); auto printfHandler = std::unique_ptr(PrintfHandler::create(multiDispatchInfo, *device)); printfHandler->prepareDispatch(multiDispatchInfo); auto printfSurface = printfHandler->getSurface(); ASSERT_NE(nullptr, printfSurface); auto pImplicitArgs = kernel.getImplicitArgs(); ASSERT_NE(nullptr, pImplicitArgs); EXPECT_EQ(printfSurface->getGpuAddress(), pImplicitArgs->printfBufferPtr); } HWTEST_F(PrintfHandlerTests, givenEnabledStatelessCompressionWhenPrintEnqueueOutputIsCalledThenBCSEngineIsUsedToDecompressPrintfOutput) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.blitterOperationsSupported = true; REQUIRE_BLITTER_OR_SKIP(&hwInfo); DebugManagerStateRestore restore; for (auto enable : {-1, 0, 1}) { DebugManager.flags.EnableStatelessCompression.set(enable); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); MockContext context(device.get()); auto kernelInfo = std::make_unique(); kernelInfo->setPrintfSurface(sizeof(uintptr_t), 0); auto program = std::make_unique(&context, false, toClDeviceVector(*device)); uint64_t crossThread[10]; auto kernel = std::make_unique(program.get(), *kernelInfo, *device); kernel->setCrossThreadData(&crossThread, sizeof(uint64_t) * 8); MockMultiDispatchInfo multiDispatchInfo(device.get(), kernel.get()); std::unique_ptr printfHandler(PrintfHandler::create(multiDispatchInfo, *device)); printfHandler->prepareDispatch(multiDispatchInfo); EXPECT_NE(nullptr, printfHandler->getSurface()); printfHandler->printEnqueueOutput(); auto &bcsEngine = device->getEngine(EngineHelpers::getBcsEngineType(device->getHardwareInfo(), device->getDeviceBitfield(), device->getSelectorCopyEngine(), true), EngineUsage::Regular); auto bcsCsr = static_cast *>(bcsEngine.commandStreamReceiver); if (enable > 0) { EXPECT_EQ(1u, bcsCsr->blitBufferCalled); EXPECT_EQ(BlitterConstants::BlitDirection::BufferToHostPtr, bcsCsr->receivedBlitProperties[0].blitDirection); } else { EXPECT_EQ(0u, bcsCsr->blitBufferCalled); } } } HWTEST_F(PrintfHandlerTests, givenDisallowedLocalMemoryCpuAccessWhenPrintEnqueueOutputIsCalledThenBCSEngineIsUsedToCopyPrintfOutput) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.blitterOperationsSupported = true; REQUIRE_BLITTER_OR_SKIP(&hwInfo); class MockPrintfHandler : public PrintfHandler { public: using PrintfHandler::PrintfHandler; using PrintfHandler::printfSurface; MockPrintfHandler(ClDevice &device) : PrintfHandler(device) {} }; DebugManagerStateRestore restore; DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast(LocalMemoryAccessMode::CpuAccessDisallowed)); DebugManager.flags.EnableLocalMemory.set(1); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); MockContext context(device.get()); auto kernelInfo = std::make_unique(); kernelInfo->setPrintfSurface(sizeof(uintptr_t), 0); auto program = std::make_unique(&context, false, toClDeviceVector(*device)); uint64_t crossThread[10]{}; auto kernel = std::make_unique(program.get(), *kernelInfo, *device); kernel->setCrossThreadData(&crossThread, sizeof(uint64_t) * 8); MockMultiDispatchInfo multiDispatchInfo(device.get(), kernel.get()); auto printfHandler = std::make_unique(*device); printfHandler->prepareDispatch(multiDispatchInfo); EXPECT_NE(nullptr, printfHandler->getSurface()); device->getMemoryManager()->freeGraphicsMemory(printfHandler->printfSurface); auto allocation = new MockGraphicsAllocation(reinterpret_cast(0x1000), 0x1000); allocation->memoryPool = MemoryPool::LocalMemory; printfHandler->printfSurface = allocation; printfHandler->printEnqueueOutput(); auto &bcsEngine = device->getEngine(EngineHelpers::getBcsEngineType(device->getHardwareInfo(), device->getDeviceBitfield(), device->getSelectorCopyEngine(), true), EngineUsage::Regular); auto bcsCsr = static_cast *>(bcsEngine.commandStreamReceiver); EXPECT_TRUE(bcsCsr->blitBufferCalled >= 1); EXPECT_EQ(BlitterConstants::BlitDirection::BufferToHostPtr, bcsCsr->receivedBlitProperties[0].blitDirection); } HWTEST_F(PrintfHandlerTests, givenPrintfHandlerWhenEnqueueIsBlockedThenDontUsePrintfObjectAfterMove) { DebugManagerStateRestore restore; DebugManager.flags.MakeEachEnqueueBlocking.set(true); class MyMockCommandQueueHw : public CommandQueueHw { public: using CommandQueueHw::CommandQueueHw; using CommandQueueHw::enqueueKernel; WaitStatus waitForAllEngines(bool blockedQueue, PrintfHandler *printfHandler, bool cleanTemporaryAllocationsList) override { waitCalled = true; printfHandlerUsedForWait = printfHandler; return waitForAllEnginesReturnValue; } bool waitCalled = false; PrintfHandler *printfHandlerUsedForWait = nullptr; WaitStatus waitForAllEnginesReturnValue = WaitStatus::Ready; }; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockContext context; MyMockCommandQueueHw cmdQ(&context, device.get(), nullptr, false); auto kernelInfo = std::make_unique(); kernelInfo->setPrintfSurface(sizeof(uintptr_t), 0); uint64_t crossThread[10]; auto program = std::make_unique(&context, false, toClDeviceVector(*device)); auto kernel = std::make_unique(program.get(), *kernelInfo, *device); kernel->setCrossThreadData(&crossThread, sizeof(uint64_t) * 8); kernel->incRefInternal(); UserEvent userEvent; cl_event waitlist[] = {&userEvent}; size_t gws[] = {1, 1, 1}; cmdQ.enqueueKernel(kernel.get(), 1, nullptr, gws, nullptr, 1, waitlist, nullptr); EXPECT_TRUE(cmdQ.waitCalled); EXPECT_EQ(nullptr, cmdQ.printfHandlerUsedForWait); userEvent.setStatus(CL_COMPLETE); EXPECT_FALSE(cmdQ.isQueueBlocked()); } TEST_F(PrintfHandlerTests, givenMultiDispatchInfoWithMultipleKernelsWhenCreatingAndDispatchingPrintfHandlerThenPickMainKernel) { MockContext context; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto program = std::make_unique(&context, false, toClDeviceVector(*device)); auto pMainKernelInfo = std::make_unique(); pMainKernelInfo->setPrintfSurface(sizeof(uintptr_t), 0); auto pKernelInfo = std::make_unique(); auto mainKernel = std::make_unique(program.get(), *pMainKernelInfo, *device); auto kernel1 = std::make_unique(program.get(), *pKernelInfo, *device); auto kernel2 = std::make_unique(program.get(), *pKernelInfo, *device); uint64_t crossThread[8]; mainKernel->setCrossThreadData(&crossThread, sizeof(uint64_t) * 8); DispatchInfo mainDispatchInfo(device.get(), mainKernel.get(), 1, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}); DispatchInfo dispatchInfo1(device.get(), kernel1.get(), 1, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}); DispatchInfo dispatchInfo2(device.get(), kernel2.get(), 1, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}); MultiDispatchInfo multiDispatchInfo(mainKernel.get()); multiDispatchInfo.push(dispatchInfo1); multiDispatchInfo.push(mainDispatchInfo); multiDispatchInfo.push(dispatchInfo2); std::unique_ptr printfHandler(PrintfHandler::create(multiDispatchInfo, *device)); ASSERT_NE(nullptr, printfHandler.get()); printfHandler->prepareDispatch(multiDispatchInfo); EXPECT_NE(nullptr, printfHandler->getSurface()); } TEST_F(PrintfHandlerTests, GivenEmptyMultiDispatchInfoWhenCreatingPrintfHandlerThenPrintfHandlerIsNotCreated) { MockClDevice device{new MockDevice}; MockKernelWithInternals mockKernelWithInternals{device}; MockMultiDispatchInfo multiDispatchInfo{&device, mockKernelWithInternals.mockKernel}; multiDispatchInfo.dispatchInfos.resize(0); EXPECT_EQ(nullptr, multiDispatchInfo.peekMainKernel()); auto printfHandler = PrintfHandler::create(multiDispatchInfo, device); EXPECT_EQ(nullptr, printfHandler); } TEST_F(PrintfHandlerTests, GivenAllocationInLocalMemoryWhichRequiresBlitterWhenPreparingPrintfSurfaceDispatchThenBlitterIsUsed) { REQUIRE_BLITTER_OR_SKIP(defaultHwInfo.get()); DebugManagerStateRestore restorer; auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.blitterOperationsSupported = true; uint32_t blitsCounter = 0; uint32_t expectedBlitsCount = 0; auto mockBlitMemoryToAllocation = [&blitsCounter](const Device &device, GraphicsAllocation *memory, size_t offset, const void *hostPtr, Vec3 size) -> BlitOperationResult { blitsCounter++; return BlitOperationResult::Success; }; VariableBackup blitMemoryToAllocationFuncBackup{ &BlitHelperFunctions::blitMemoryToAllocation, mockBlitMemoryToAllocation}; LocalMemoryAccessMode localMemoryAccessModes[] = { LocalMemoryAccessMode::Default, LocalMemoryAccessMode::CpuAccessAllowed, LocalMemoryAccessMode::CpuAccessDisallowed}; for (auto localMemoryAccessMode : localMemoryAccessModes) { DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast(localMemoryAccessMode)); for (auto isLocalMemorySupported : ::testing::Bool()) { DebugManager.flags.EnableLocalMemory.set(isLocalMemorySupported); auto pClDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); MockContext context{pClDevice.get()}; auto pKernelInfo = std::make_unique(); pKernelInfo->setPrintfSurface(sizeof(uintptr_t), 0); auto program = std::make_unique(&context, false, toClDeviceVector(*pClDevice)); uint64_t crossThread[10]; auto kernel = std::make_unique(program.get(), *pKernelInfo, *pClDevice); kernel->setCrossThreadData(&crossThread, sizeof(uint64_t) * 8); MockMultiDispatchInfo multiDispatchInfo(pClDevice.get(), kernel.get()); std::unique_ptr printfHandler(PrintfHandler::create(multiDispatchInfo, *pClDevice)); printfHandler->prepareDispatch(multiDispatchInfo); if (printfHandler->getSurface()->isAllocatedInLocalMemoryPool() && (localMemoryAccessMode == LocalMemoryAccessMode::CpuAccessDisallowed)) { expectedBlitsCount++; } EXPECT_EQ(expectedBlitsCount, blitsCounter); } } } using PrintfHandlerMultiRootDeviceTests = MultiRootDeviceFixture; TEST_F(PrintfHandlerMultiRootDeviceTests, GivenPrintfSurfaceThenItHasCorrectRootDeviceIndex) { auto pKernelInfo = std::make_unique(); pKernelInfo->setPrintfSurface(sizeof(uintptr_t), 0); auto program = std::make_unique(context.get(), false, toClDeviceVector(*device1)); auto kernel = std::make_unique(program.get(), *pKernelInfo, *device1); uint64_t crossThread[10]; kernel->setCrossThreadData(&crossThread, sizeof(uint64_t) * 8); MockMultiDispatchInfo multiDispatchInfo(device1, kernel.get()); std::unique_ptr printfHandler(PrintfHandler::create(multiDispatchInfo, *device1)); printfHandler->prepareDispatch(multiDispatchInfo); auto surface = printfHandler->getSurface(); ASSERT_NE(nullptr, surface); EXPECT_EQ(expectedRootDeviceIndex, surface->getRootDeviceIndex()); } compute-runtime-22.14.22890/opencl/test/unit_test/program/printf_helper_tests.cpp000066400000000000000000000747151422164147700301030ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/string.h" #include "shared/source/program/print_formatter.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "gtest/gtest.h" #include using namespace NEO; using namespace iOpenCL; // -------------------- Base Fixture ------------------------ class PrintFormatterTest : public testing::Test { public: std::unique_ptr printFormatter; static const size_t maxPrintfOutputLength = 4096; static const size_t printfBufferSize = 1024; std::string format; uint8_t buffer; MockGraphicsAllocation *data; MockKernel *kernel; std::unique_ptr program; std::unique_ptr kernelInfo; ClDevice *device; uint8_t underlyingBuffer[maxPrintfOutputLength]; uint32_t offset; int maxStringIndex; protected: void SetUp() override { offset = 4; maxStringIndex = 0; data = new MockGraphicsAllocation(underlyingBuffer, maxPrintfOutputLength); kernelInfo = std::make_unique(); device = new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr)}; program = std::make_unique(toClDeviceVector(*device)); kernel = new MockKernel(program.get(), *kernelInfo, *device); printFormatter = std::unique_ptr(new PrintFormatter(static_cast(data->getUnderlyingBuffer()), printfBufferSize, is32bit, &kernelInfo->kernelDescriptor.kernelMetadata.printfStringsMap)); underlyingBuffer[0] = 0; underlyingBuffer[1] = 0; underlyingBuffer[2] = 0; underlyingBuffer[3] = 0; } void TearDown() override { delete data; delete kernel; delete device; } enum class PRINTF_DATA_TYPE : int { INVALID, BYTE, SHORT, INT, FLOAT, STRING, LONG, POINTER, DOUBLE, VECTOR_BYTE, VECTOR_SHORT, VECTOR_INT, VECTOR_LONG, VECTOR_FLOAT, VECTOR_DOUBLE }; PRINTF_DATA_TYPE getPrintfDataType(int8_t value) { return PRINTF_DATA_TYPE::BYTE; }; PRINTF_DATA_TYPE getPrintfDataType(uint8_t value) { return PRINTF_DATA_TYPE::BYTE; }; PRINTF_DATA_TYPE getPrintfDataType(int16_t value) { return PRINTF_DATA_TYPE::SHORT; }; PRINTF_DATA_TYPE getPrintfDataType(uint16_t value) { return PRINTF_DATA_TYPE::SHORT; }; PRINTF_DATA_TYPE getPrintfDataType(int32_t value) { return PRINTF_DATA_TYPE::INT; }; PRINTF_DATA_TYPE getPrintfDataType(uint32_t value) { return PRINTF_DATA_TYPE::INT; }; PRINTF_DATA_TYPE getPrintfDataType(int64_t value) { return PRINTF_DATA_TYPE::LONG; }; PRINTF_DATA_TYPE getPrintfDataType(uint64_t value) { return PRINTF_DATA_TYPE::LONG; }; PRINTF_DATA_TYPE getPrintfDataType(float value) { return PRINTF_DATA_TYPE::FLOAT; }; PRINTF_DATA_TYPE getPrintfDataType(double value) { return PRINTF_DATA_TYPE::DOUBLE; }; PRINTF_DATA_TYPE getPrintfDataType(char *value) { return PRINTF_DATA_TYPE::STRING; }; template void injectValue(T value) { storeData(getPrintfDataType(value)); storeData(value); } void injectStringValue(int value) { storeData(PRINTF_DATA_TYPE::STRING); storeData(value); } template void storeData(T value) { T *valuePointer = reinterpret_cast(underlyingBuffer + offset); if (isAligned(valuePointer)) *valuePointer = value; else { memcpy_s(valuePointer, sizeof(underlyingBuffer) - offset, &value, sizeof(T)); } offset += sizeof(T); // first four bytes always store the size uint32_t *pointer = reinterpret_cast(underlyingBuffer); *pointer = offset; } int injectFormatString(std::string str) { auto index = maxStringIndex++; kernelInfo->addToPrintfStringsMap(index, str); return index; } }; // for tests printing a single value template struct SingleValueTestParam { std::string format; T value; }; typedef SingleValueTestParam Int8Params; typedef SingleValueTestParam Uint8Params; typedef SingleValueTestParam Int16Params; typedef SingleValueTestParam Uint16Params; typedef SingleValueTestParam Int32Params; typedef SingleValueTestParam Uint32Params; typedef SingleValueTestParam Int64Params; typedef SingleValueTestParam Uint64Params; typedef SingleValueTestParam FloatParams; typedef SingleValueTestParam DoubleParams; typedef SingleValueTestParam StringParams; Int8Params byteValues[] = { {"%c", 'a'}, }; class PrintfInt8Test : public PrintFormatterTest, public ::testing::WithParamInterface {}; TEST_P(PrintfInt8Test, GivenFormatContainingIntWhenPrintingThenValueIsInserted) { auto input = GetParam(); auto stringIndex = injectFormatString(input.format); storeData(stringIndex); injectValue(input.value); char referenceOutput[maxPrintfOutputLength]; char actualOutput[maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, maxPrintfOutputLength, str, maxPrintfOutputLength); }); snprintf(referenceOutput, sizeof(referenceOutput), input.format.c_str(), input.value); EXPECT_STREQ(referenceOutput, actualOutput); } INSTANTIATE_TEST_CASE_P(PrintfInt8Test, PrintfInt8Test, ::testing::ValuesIn(byteValues)); Int32Params intValues[] = { {"%d", 0}, {"%d", 1}, {"%d", -1}, {"%d", INT32_MAX}, {"%d", INT32_MIN}, {"%5d", 10}, {"%-5d", 10}, {"%05d", 10}, {"%+5d", 10}, {"%-+5d", 10}, {"%.5i", 100}, {"%6.5i", 100}, {"%-06i", 100}, {"%06.5i", 100}}; class PrintfInt32Test : public PrintFormatterTest, public ::testing::WithParamInterface {}; TEST_P(PrintfInt32Test, GivenFormatContainingIntWhenPrintingThenValueIsInserted) { auto input = GetParam(); auto stringIndex = injectFormatString(input.format); storeData(stringIndex); injectValue(input.value); char referenceOutput[maxPrintfOutputLength]; char actualOutput[maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, maxPrintfOutputLength, str, maxPrintfOutputLength); }); snprintf(referenceOutput, sizeof(referenceOutput), input.format.c_str(), input.value); EXPECT_STREQ(referenceOutput, actualOutput); } INSTANTIATE_TEST_CASE_P(PrintfInt32Test, PrintfInt32Test, ::testing::ValuesIn(intValues)); Uint32Params uintValues[] = { {"%u", 0}, {"%u", 1}, {"%u", UINT32_MAX}, {"%.0u", 0}, // octal {"%o", 10}, {"%.5o", 10}, {"%#o", 100000000}, {"%04.5o", 10}, // hexadecimal {"%#x", 0xABCDEF}, {"%#X", 0xABCDEF}, {"%#X", 0}, {"%8x", 399}, {"%04x", 399}}; class PrintfUint32Test : public PrintFormatterTest, public ::testing::WithParamInterface {}; TEST_P(PrintfUint32Test, GivenFormatContainingUintWhenPrintingThenValueIsInserted) { auto input = GetParam(); auto stringIndex = injectFormatString(input.format); storeData(stringIndex); injectValue(input.value); char referenceOutput[maxPrintfOutputLength]; char actualOutput[maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, maxPrintfOutputLength, str, maxPrintfOutputLength); }); snprintf(referenceOutput, sizeof(referenceOutput), input.format.c_str(), input.value); EXPECT_STREQ(referenceOutput, actualOutput); } TEST_P(PrintfUint32Test, GivenBufferSizeGreaterThanPrintBufferWhenPrintingThenBufferIsTrimmed) { auto input = GetParam(); printFormatter = std::unique_ptr(new PrintFormatter(static_cast(data->getUnderlyingBuffer()), 0, is32bit, &kernelInfo->kernelDescriptor.kernelMetadata.printfStringsMap)); auto stringIndex = injectFormatString(input.format); storeData(stringIndex); injectValue(input.value); char referenceOutput[maxPrintfOutputLength] = ""; char actualOutput[maxPrintfOutputLength] = ""; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, maxPrintfOutputLength, str, maxPrintfOutputLength); }); EXPECT_STREQ(referenceOutput, actualOutput); } INSTANTIATE_TEST_CASE_P(PrintfUint32Test, PrintfUint32Test, ::testing::ValuesIn(uintValues)); FloatParams floatValues[] = { {"%f", 10.3456f}, {"%.1f", 10.3456f}, {"%.2f", 10.3456f}, {"%8.3f", 10.3456f}, {"%08.2f", 10.3456f}, {"%-8.2f", 10.3456f}, {"%+8.2f", -10.3456f}, {"%.0f", 0.1f}, {"%.0f", 0.6f}, {"%0f", 0.6f}, }; class PrintfFloatTest : public PrintFormatterTest, public ::testing::WithParamInterface {}; TEST_P(PrintfFloatTest, GivenFormatContainingFloatWhenPrintingThenValueIsInserted) { auto input = GetParam(); auto stringIndex = injectFormatString(input.format); storeData(stringIndex); injectValue(input.value); char referenceOutput[maxPrintfOutputLength]; char actualOutput[maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, maxPrintfOutputLength, str, maxPrintfOutputLength); }); snprintf(referenceOutput, sizeof(referenceOutput), input.format.c_str(), input.value); EXPECT_STREQ(referenceOutput, actualOutput); } INSTANTIATE_TEST_CASE_P(PrintfFloatTest, PrintfFloatTest, ::testing::ValuesIn(floatValues)); class PrintfDoubleToFloatTest : public PrintFormatterTest, public ::testing::WithParamInterface {}; DoubleParams doubleToFloatValues[] = { {"%f", 10.3456}, {"%.1f", 10.3456}, {"%.2f", 10.3456}, {"%8.3f", 10.3456}, {"%08.2f", 10.3456}, {"%-8.2f", 10.3456}, {"%+8.2f", -10.3456}, {"%.0f", 0.1}, {"%0f", 0.6}, {"%4g", 12345.6789}, {"%4.2g", 12345.6789}, {"%4G", 0.0000023}, {"%4G", 0.023}, {"%-#20.15e", 19456120.0}, {"%+#21.15E", 19456120.0}, {"%.6a", 0.1}, {"%10.2a", 9990.235}}; TEST_P(PrintfDoubleToFloatTest, GivenFormatContainingFloatAndDoubleWhenPrintingThenValueIsInserted) { auto input = GetParam(); auto stringIndex = injectFormatString(input.format); storeData(stringIndex); injectValue(input.value); char referenceOutput[maxPrintfOutputLength]; char actualOutput[maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, maxPrintfOutputLength, str, maxPrintfOutputLength); }); snprintf(referenceOutput, sizeof(referenceOutput), input.format.c_str(), input.value); EXPECT_STREQ(referenceOutput, actualOutput); } INSTANTIATE_TEST_CASE_P(PrintfDoubleToFloatTest, PrintfDoubleToFloatTest, ::testing::ValuesIn(doubleToFloatValues)); DoubleParams doubleValues[] = { {"%f", 302230.12156260}, {"%+f", 2937289102.1562}, {"% #F", (double)-1254}, {"%6.2f", 0.1562}, {"%06.2f", -0.1562}, {"%e", 0.1562}, {"%E", -1254.0001001}, {"%+.10e", 0.1562000102241}, {"% E", (double)1254}, {"%10.2e", 100230.1562}, {"%g", 74010.00001562}, {"%G", -1254.0001001}, {"%+g", 325001.00001562}, {"%+#G", -1254.0001001}, {"%8.2g", 19.844}, {"%1.5G", -1.1}, {"%.13a", 1890.00001562}, {"%.13A", -1254.0001001}, }; class PrintfDoubleTest : public PrintFormatterTest, public ::testing::WithParamInterface {}; TEST_P(PrintfDoubleTest, GivenFormatContainingDoubleWhenPrintingThenValueIsInserted) { auto input = GetParam(); auto stringIndex = injectFormatString(input.format); storeData(stringIndex); injectValue(input.value); char referenceOutput[maxPrintfOutputLength]; char actualOutput[maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, maxPrintfOutputLength, str, maxPrintfOutputLength); }); if (input.format[input.format.length() - 1] == 'F') input.format[input.format.length() - 1] = 'f'; snprintf(referenceOutput, sizeof(referenceOutput), input.format.c_str(), input.value); EXPECT_STREQ(referenceOutput, actualOutput); } INSTANTIATE_TEST_CASE_P(PrintfDoubleTest, PrintfDoubleTest, ::testing::ValuesIn(doubleValues)); std::pair specialValues[] = { {"%%", "%"}, {"nothing%", "nothing"}, }; class PrintfSpecialTest : public PrintFormatterTest, public ::testing::WithParamInterface> {}; TEST_P(PrintfSpecialTest, GivenFormatContainingDoublePercentageWhenPrintingThenValueIsInsertedCorrectly) { auto input = GetParam(); auto stringIndex = injectFormatString(input.first); storeData(stringIndex); char actualOutput[maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, maxPrintfOutputLength, str, maxPrintfOutputLength); }); EXPECT_STREQ(input.second.c_str(), actualOutput); } INSTANTIATE_TEST_CASE_P(PrintfSpecialTest, PrintfSpecialTest, ::testing::ValuesIn(specialValues)); // ------------------------- Testing Strings only with no Formatting ------------------------ class PrintfNoArgumentsTest : public PrintFormatterTest, public ::testing::WithParamInterface> {}; // escape/non-escaped strings are specified manually to avoid converting them in code // automatic code would have to do the same thing it is testing and therefore would be prone to mistakes // this is needed because compiler doesn't escape the format strings and provides them exactly as they were typed in kernel source std::pair stringValues[] = { {R"(test)", "test"}, {R"(test\n)", "test\n"}, }; TEST_P(PrintfNoArgumentsTest, GivenNoArgumentsWhenPrintingThenCharsAreEscaped) { auto input = GetParam(); auto stringIndex = injectFormatString(input.first); storeData(stringIndex); char actualOutput[maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, maxPrintfOutputLength, str, maxPrintfOutputLength); }); EXPECT_STREQ(input.second.c_str(), actualOutput); } INSTANTIATE_TEST_CASE_P(PrintfNoArgumentsTest, PrintfNoArgumentsTest, ::testing::ValuesIn(stringValues)); StringParams stringValues2[] = { {"%s", "foo"}, }; class PrintfStringTest : public PrintFormatterTest, public ::testing::WithParamInterface {}; TEST_P(PrintfStringTest, GivenFormatContainingStringWhenPrintingThenValueIsInserted) { auto input = GetParam(); auto stringIndex = injectFormatString(input.format); storeData(stringIndex); auto inputIndex = injectFormatString(input.value); injectStringValue(inputIndex); char referenceOutput[maxPrintfOutputLength]; char actualOutput[maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, maxPrintfOutputLength, str, maxPrintfOutputLength); }); snprintf(referenceOutput, sizeof(referenceOutput), input.format.c_str(), input.value.c_str()); EXPECT_STREQ(input.value.c_str(), actualOutput); } INSTANTIATE_TEST_CASE_P(PrintfStringTest, PrintfStringTest, ::testing::ValuesIn(stringValues2)); TEST_F(PrintFormatterTest, GivenLongStringValueWhenPrintedThenFullStringIsPrinted) { char testedLongString[maxPrintfOutputLength]; memset(testedLongString, 'a', maxPrintfOutputLength - 1); testedLongString[maxPrintfOutputLength - 1] = '\0'; auto stringIndex = injectFormatString(testedLongString); storeData(stringIndex); char actualOutput[maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, maxPrintfOutputLength, str, maxPrintfOutputLength); }); EXPECT_STREQ(testedLongString, actualOutput); } TEST_F(PrintFormatterTest, GivenStringSpecifierWhenLongStringIsPassedAsValueThenFullStringIsPrinted) { char testedLongString[maxPrintfOutputLength]; memset(testedLongString, 'a', maxPrintfOutputLength - 5); testedLongString[maxPrintfOutputLength - 5] = '\0'; auto stringIndex = injectFormatString("%s"); storeData(stringIndex); auto inputIndex = injectFormatString(testedLongString); injectStringValue(inputIndex); char actualOutput[maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, maxPrintfOutputLength, str, maxPrintfOutputLength); }); EXPECT_STREQ(testedLongString, actualOutput); } TEST_F(PrintFormatterTest, GivenTooLongStringWhenPrintedThenOutputIsTruncated) { std::unique_ptr testedLongString(new char[PrintFormatter::maxSinglePrintStringLength + 1024]); memset(testedLongString.get(), 'a', PrintFormatter::maxSinglePrintStringLength + 1024 - 1); testedLongString[PrintFormatter::maxSinglePrintStringLength + 1024 - 1] = '\0'; auto stringIndex = injectFormatString(testedLongString.get()); storeData(stringIndex); std::unique_ptr actualOutput(new char[PrintFormatter::maxSinglePrintStringLength + 1024]); printFormatter->printKernelOutput([&actualOutput](char *str) { size_t length = strnlen_s(str, PrintFormatter::maxSinglePrintStringLength + 1024); strncpy_s(actualOutput.get(), PrintFormatter::maxSinglePrintStringLength + 1024, str, length); }); auto testedLength = strnlen_s(testedLongString.get(), PrintFormatter::maxSinglePrintStringLength + 1024); auto actualLength = strnlen_s(actualOutput.get(), PrintFormatter::maxSinglePrintStringLength + 1024); EXPECT_GT(testedLength, actualLength); } TEST_F(PrintFormatterTest, GivenNullTokenWhenPrintingThenNullIsInserted) { auto stringIndex = injectFormatString("%s"); storeData(stringIndex); storeData(PRINTF_DATA_TYPE::VECTOR_INT); storeData(0); char actualOutput[maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, maxPrintfOutputLength, str, maxPrintfOutputLength); }); EXPECT_STREQ("(null)", actualOutput); } // ----------------------- Vector channel count --------------------------------- TEST_F(PrintFormatterTest, GivenVector2WhenPrintingThenAllValuesAreInserted) { int channelCount = 2; auto stringIndex = injectFormatString("%v2d"); storeData(stringIndex); storeData(PRINTF_DATA_TYPE::VECTOR_INT); // channel count storeData(channelCount); // channel values for (int i = 0; i < channelCount; i++) storeData(i + 1); char actualOutput[maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, maxPrintfOutputLength, str, maxPrintfOutputLength); }); EXPECT_STREQ("1,2", actualOutput); } TEST_F(PrintFormatterTest, GivenVector4WhenPrintingThenAllValuesAreInserted) { int channelCount = 4; auto stringIndex = injectFormatString("%v4d"); storeData(stringIndex); storeData(PRINTF_DATA_TYPE::VECTOR_INT); // channel count storeData(channelCount); // channel values for (int i = 0; i < channelCount; i++) storeData(i + 1); char actualOutput[maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, maxPrintfOutputLength, str, maxPrintfOutputLength); }); EXPECT_STREQ("1,2,3,4", actualOutput); } TEST_F(PrintFormatterTest, GivenVector8WhenPrintingThenAllValuesAreInserted) { int channelCount = 8; auto stringIndex = injectFormatString("%v8d"); storeData(stringIndex); storeData(PRINTF_DATA_TYPE::VECTOR_INT); // channel count storeData(channelCount); // channel values for (int i = 0; i < channelCount; i++) storeData(i + 1); char actualOutput[maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, maxPrintfOutputLength, str, maxPrintfOutputLength); }); EXPECT_STREQ("1,2,3,4,5,6,7,8", actualOutput); } TEST_F(PrintFormatterTest, GivenVector16WhenPrintingThenAllValuesAreInserted) { int channelCount = 16; auto stringIndex = injectFormatString("%v16d"); storeData(stringIndex); storeData(PRINTF_DATA_TYPE::VECTOR_INT); // channel count storeData(channelCount); // channel values for (int i = 0; i < channelCount; i++) storeData(i + 1); char actualOutput[maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, maxPrintfOutputLength, str, maxPrintfOutputLength); }); EXPECT_STREQ("1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16", actualOutput); } // ------------------- vector types ---------------------------- TEST_F(PrintFormatterTest, GivenVectorOfBytesWhenPrintingThenAllValuesAreInserted) { int channelCount = 2; auto stringIndex = injectFormatString("%v2hhd"); storeData(stringIndex); storeData(PRINTF_DATA_TYPE::VECTOR_BYTE); // channel count storeData(channelCount); storeData(1); storeData(2); char actualOutput[maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, maxPrintfOutputLength, str, maxPrintfOutputLength); }); EXPECT_STREQ("1,2", actualOutput); } TEST_F(PrintFormatterTest, GivenVectorOfShortsWhenPrintingThenAllValuesAreInserted) { int channelCount = 2; auto stringIndex = injectFormatString("%v2hd"); storeData(stringIndex); storeData(PRINTF_DATA_TYPE::VECTOR_SHORT); // channel count storeData(channelCount); storeData(1); storeData(2); char actualOutput[maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, maxPrintfOutputLength, str, maxPrintfOutputLength); }); EXPECT_STREQ("1,2", actualOutput); } TEST_F(PrintFormatterTest, GivenVectorOfIntsWhenPrintingThenAllValuesAreInserted) { int channelCount = 2; auto stringIndex = injectFormatString("%v2d"); storeData(stringIndex); storeData(PRINTF_DATA_TYPE::VECTOR_INT); // channel count storeData(channelCount); storeData(1); storeData(2); char actualOutput[maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, maxPrintfOutputLength, str, maxPrintfOutputLength); }); EXPECT_STREQ("1,2", actualOutput); } TEST_F(PrintFormatterTest, GivenSpecialVectorWhenPrintingThenAllValuesAreInserted) { int channelCount = 2; auto stringIndex = injectFormatString("%v2hld"); storeData(stringIndex); storeData(PRINTF_DATA_TYPE::VECTOR_INT); // channel count storeData(channelCount); storeData(1); storeData(2); char actualOutput[maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, maxPrintfOutputLength, str, maxPrintfOutputLength); }); EXPECT_STREQ("1,2", actualOutput); } TEST_F(PrintFormatterTest, GivenVectorOfLongsWhenPrintingThenAllValuesAreInserted) { int channelCount = 2; auto stringIndex = injectFormatString("%v2lld"); storeData(stringIndex); storeData(PRINTF_DATA_TYPE::VECTOR_LONG); // channel count storeData(channelCount); storeData(1); storeData(2); char actualOutput[maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, maxPrintfOutputLength, str, maxPrintfOutputLength); }); EXPECT_STREQ("1,2", actualOutput); } TEST_F(PrintFormatterTest, GivenVectorOfFloatsWhenPrintingThenAllValuesAreInserted) { int channelCount = 2; auto stringIndex = injectFormatString("%v2f"); storeData(stringIndex); storeData(PRINTF_DATA_TYPE::VECTOR_FLOAT); // channel count storeData(channelCount); storeData(1.0); storeData(2.0); char actualOutput[maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, maxPrintfOutputLength, str, maxPrintfOutputLength); }); EXPECT_STREQ("1.000000,2.000000", actualOutput); } TEST_F(PrintFormatterTest, GivenVectorOfDoublesWhenPrintingThenAllValuesAreInserted) { int channelCount = 2; auto stringIndex = injectFormatString("%v2f"); storeData(stringIndex); storeData(PRINTF_DATA_TYPE::VECTOR_DOUBLE); // channel count storeData(channelCount); storeData(1.0); storeData(2.0); char actualOutput[maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, maxPrintfOutputLength, str, maxPrintfOutputLength); }); EXPECT_STREQ("1.000000,2.000000", actualOutput); } TEST_F(PrintFormatterTest, GivenPointerWhenPrintingThenValueIsInserted) { auto stringIndex = injectFormatString("%p"); storeData(stringIndex); int temp; storeData(PRINTF_DATA_TYPE::POINTER); // channel count storeData(reinterpret_cast(&temp)); // on 32bit configurations add extra 4 bytes when storing pointers, IGC always stores pointers on 8 bytes if (is32bit) { uint32_t padding = 0; storeData(padding); } char actualOutput[maxPrintfOutputLength]; char referenceOutput[maxPrintfOutputLength]; snprintf(referenceOutput, sizeof(referenceOutput), "%p", reinterpret_cast(&temp)); printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, maxPrintfOutputLength, str, maxPrintfOutputLength); }); EXPECT_STREQ(referenceOutput, actualOutput); } TEST_F(PrintFormatterTest, GivenPointerWith32BitKernelWhenPrintingThen32BitPointerIsPrinted) { printFormatter.reset(new PrintFormatter(static_cast(data->getUnderlyingBuffer()), printfBufferSize, true, &kernelInfo->kernelDescriptor.kernelMetadata.printfStringsMap)); auto stringIndex = injectFormatString("%p"); storeData(stringIndex); kernelInfo->kernelDescriptor.kernelAttributes.gpuPointerSize = 4; storeData(PRINTF_DATA_TYPE::POINTER); // store pointer uint32_t addressValue = 0; storeData(addressValue); void *pointer = nullptr; // store non zero padding uint32_t padding = 0xdeadbeef; storeData(padding); char actualOutput[maxPrintfOutputLength]; char referenceOutput[maxPrintfOutputLength]; snprintf(referenceOutput, sizeof(referenceOutput), "%p", pointer); printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, maxPrintfOutputLength, str, maxPrintfOutputLength); }); EXPECT_STREQ(referenceOutput, actualOutput); } TEST_F(PrintFormatterTest, Given2ByteVectorsWhenPrintingThenDataBufferParsedProperly) { int channelCount = 4; auto stringIndex = injectFormatString("%v4hhd %v4hhd"); storeData(stringIndex); storeData(PRINTF_DATA_TYPE::VECTOR_BYTE); // channel count storeData(channelCount); // channel values for (int i = 0; i < channelCount; i++) storeData(static_cast(i + 1)); // filler, should not be printed for (int i = 0; i < 12; i++) storeData(static_cast(0)); storeData(PRINTF_DATA_TYPE::VECTOR_BYTE); // channel count storeData(channelCount); // channel values for (int i = 0; i < channelCount; i++) storeData(static_cast(i + 1)); // filler, should not be printed for (int i = 0; i < 12; i++) storeData(static_cast(0)); char actualOutput[maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, maxPrintfOutputLength, str, maxPrintfOutputLength); }); EXPECT_STREQ("1,2,3,4 1,2,3,4", actualOutput); } TEST_F(PrintFormatterTest, GivenEmptyBufferWhenPrintingThenFailSafely) { char actualOutput[maxPrintfOutputLength]; actualOutput[0] = 0; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, maxPrintfOutputLength, str, maxPrintfOutputLength); }); EXPECT_STREQ("", actualOutput); } TEST_F(PrintFormatterTest, GivenNoStringMapAndBufferWithFormatStringThenItIsPrintedProperly) { printFormatter.reset(new PrintFormatter(static_cast(data->getUnderlyingBuffer()), printfBufferSize, true)); const char *formatString = "test string"; storeData(formatString); char output[maxPrintfOutputLength]; printFormatter->printKernelOutput([&output](char *str) { strncpy_s(output, maxPrintfOutputLength, str, maxPrintfOutputLength); }); EXPECT_STREQ(formatString, output); } TEST_F(PrintFormatterTest, GivenNoStringMapAndBufferWithFormatStringAnd2StringsThenDataIsParsedAndPrintedProperly) { printFormatter.reset(new PrintFormatter(static_cast(data->getUnderlyingBuffer()), printfBufferSize, true)); const char *formatString = "%s %s"; storeData(formatString); const char *string1 = "str1"; storeData(PRINTF_DATA_TYPE::POINTER); storeData(string1); const char *string2 = "str2"; storeData(PRINTF_DATA_TYPE::POINTER); storeData(string2); const char *expectedOutput = "str1 str2"; char output[maxPrintfOutputLength]; printFormatter->printKernelOutput([&output](char *str) { strncpy_s(output, maxPrintfOutputLength, str, maxPrintfOutputLength); }); EXPECT_STREQ(expectedOutput, output); } TEST(printToSTDOUTTest, GivenStringWhenPrintingToStdoutThenOutputOccurs) { testing::internal::CaptureStdout(); printToSTDOUT("test"); std::string output = testing::internal::GetCapturedStdout(); EXPECT_STREQ("test", output.c_str()); } TEST(simpleSprintf, GivenEmptyFormatStringWhenSimpleSprintfIsCalledThenBailOutWith0) { char out[1024] = {7, 0}; auto ret = simple_sprintf(out, sizeof(out), "", 3.0f); EXPECT_EQ(0U, ret); EXPECT_EQ(0, out[0]); EXPECT_EQ(0, out[1]); } compute-runtime-22.14.22890/opencl/test/unit_test/program/process_debug_data_tests.cpp000066400000000000000000000104721422164147700310450ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "opencl/test/unit_test/program/program_tests.h" #include "program_debug_data.h" #include using namespace iOpenCL; using namespace NEO; TEST_F(ProgramTests, GivenProgramWithDebugDataForTwoKernelsWhenPorcessedThenDebugDataIsSetInKernelInfos) { const char kernelName1[] = "kernel1"; const char kernelName2[] = "kernel2"; uint32_t kernelNameSize = static_cast(sizeof(kernelName1)); uint32_t genIsaSize = 8; uint32_t visaSize = 8; size_t debugDataSize = sizeof(SProgramDebugDataHeaderIGC) + 2 * (sizeof(SKernelDebugDataHeaderIGC) + kernelNameSize + genIsaSize + visaSize); std::unique_ptr debugData(new char[debugDataSize]); auto kernelInfo1 = new KernelInfo(); kernelInfo1->kernelDescriptor.kernelMetadata.kernelName = kernelName1; auto kernelInfo2 = new KernelInfo(); kernelInfo2->kernelDescriptor.kernelMetadata.kernelName = kernelName2; auto program = std::make_unique(toClDeviceVector(*pClDevice)); SProgramDebugDataHeaderIGC *programDebugHeader = reinterpret_cast(debugData.get()); programDebugHeader->NumberOfKernels = 2; SKernelDebugDataHeaderIGC *kernelDebugHeader = reinterpret_cast(ptrOffset(programDebugHeader, sizeof(SProgramDebugDataHeaderIGC))); kernelDebugHeader->KernelNameSize = kernelNameSize; kernelDebugHeader->SizeGenIsaDbgInBytes = genIsaSize; kernelDebugHeader->SizeVisaDbgInBytes = visaSize; char *kernelName = reinterpret_cast(ptrOffset(kernelDebugHeader, sizeof(SKernelDebugDataHeaderIGC))); memcpy_s(kernelName, kernelNameSize, kernelName1, kernelNameSize); char *vIsa1 = (ptrOffset(kernelName, kernelNameSize)); memset(vIsa1, 10, visaSize); char *genIsa1 = (ptrOffset(vIsa1, visaSize)); memset(genIsa1, 20, genIsaSize); kernelDebugHeader = reinterpret_cast(ptrOffset(vIsa1, genIsaSize + visaSize)); kernelDebugHeader->KernelNameSize = kernelNameSize; kernelDebugHeader->SizeGenIsaDbgInBytes = genIsaSize; kernelDebugHeader->SizeVisaDbgInBytes = visaSize; kernelName = reinterpret_cast(ptrOffset(kernelDebugHeader, sizeof(SKernelDebugDataHeaderIGC))); memcpy_s(kernelName, kernelNameSize, kernelName2, kernelNameSize); char *vIsa2 = (ptrOffset(kernelName, kernelNameSize)); memset(vIsa2, 10, visaSize); char *genIsa2 = (ptrOffset(vIsa2, visaSize)); memset(genIsa2, 20, genIsaSize); program->buildInfos[rootDeviceIndex].debugData = makeCopy(debugData.get(), debugDataSize); program->buildInfos[rootDeviceIndex].debugDataSize = debugDataSize; program->addKernelInfo(kernelInfo1, rootDeviceIndex); program->addKernelInfo(kernelInfo2, rootDeviceIndex); program->processDebugData(rootDeviceIndex); EXPECT_EQ(genIsaSize, kernelInfo1->debugData.genIsaSize); EXPECT_EQ(visaSize, kernelInfo1->debugData.vIsaSize); EXPECT_EQ(ptrDiff(vIsa1, debugData.get()), ptrDiff(kernelInfo1->debugData.vIsa, program->getDebugData(rootDeviceIndex))); EXPECT_EQ(ptrDiff(genIsa1, debugData.get()), ptrDiff(kernelInfo1->debugData.genIsa, program->getDebugData(rootDeviceIndex))); EXPECT_EQ(genIsaSize, kernelInfo2->debugData.genIsaSize); EXPECT_EQ(visaSize, kernelInfo2->debugData.vIsaSize); EXPECT_EQ(ptrDiff(vIsa2, debugData.get()), ptrDiff(kernelInfo2->debugData.vIsa, program->getDebugData(rootDeviceIndex))); EXPECT_EQ(ptrDiff(genIsa2, debugData.get()), ptrDiff(kernelInfo2->debugData.genIsa, program->getDebugData(rootDeviceIndex))); } TEST_F(ProgramTests, GivenProgramWithoutDebugDataWhenPorcessedThenDebugDataIsNotSetInKernelInfo) { const char kernelName1[] = "kernel1"; auto kernelInfo1 = new KernelInfo(); kernelInfo1->kernelDescriptor.kernelMetadata.kernelName = kernelName1; auto program = std::make_unique(toClDeviceVector(*pClDevice)); program->addKernelInfo(kernelInfo1, rootDeviceIndex); program->processDebugData(rootDeviceIndex); EXPECT_EQ(0u, kernelInfo1->debugData.genIsaSize); EXPECT_EQ(0u, kernelInfo1->debugData.vIsaSize); EXPECT_EQ(nullptr, program->getDebugData(rootDeviceIndex)); } compute-runtime-22.14.22890/opencl/test/unit_test/program/process_elf_binary_tests.cpp000066400000000000000000000265751422164147700311130ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/source/device_binary_format/elf/elf.h" #include "shared/source/device_binary_format/elf/elf_decoder.h" #include "shared/source/device_binary_format/elf/ocl_elf.h" #include "shared/source/helpers/file_io.h" #include "shared/source/helpers/string.h" #include "shared/test/common/helpers/test_files.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/unit_test/device_binary_format/patchtokens_tests.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "compiler_options.h" #include "gtest/gtest.h" #include using namespace NEO; class ProcessElfBinaryTests : public ::testing::Test { public: void SetUp() override { device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr, rootDeviceIndex)); program = std::make_unique(nullptr, false, toClDeviceVector(*device)); } std::unique_ptr program; std::unique_ptr device; const uint32_t rootDeviceIndex = 1; }; TEST_F(ProcessElfBinaryTests, GivenNullWhenCreatingProgramFromBinaryThenInvalidBinaryErrorIsReturned) { cl_int retVal = program->createProgramFromBinary(nullptr, 0, *device); EXPECT_EQ(CL_INVALID_BINARY, retVal); } TEST_F(ProcessElfBinaryTests, GivenInvalidBinaryWhenCreatingProgramFromBinaryThenInvalidBinaryErrorIsReturned) { char pBinary[] = "thisistotallyinvalid\0"; size_t binarySize = strnlen_s(pBinary, 21); cl_int retVal = program->createProgramFromBinary(pBinary, binarySize, *device); EXPECT_EQ(CL_INVALID_BINARY, retVal); } TEST_F(ProcessElfBinaryTests, GivenValidBinaryWhenCreatingProgramFromBinaryThenSuccessIsReturned) { std::string filePath; retrieveBinaryKernelFilename(filePath, "CopyBuffer_simd16_", ".bin"); size_t binarySize = 0; auto pBinary = loadDataFromFile(filePath.c_str(), binarySize); cl_int retVal = program->createProgramFromBinary(pBinary.get(), binarySize, *device); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0, memcmp(pBinary.get(), program->buildInfos[rootDeviceIndex].packedDeviceBinary.get(), binarySize)); } TEST_F(ProcessElfBinaryTests, GivenValidSpirBinaryWhenCreatingProgramFromBinaryThenSuccessIsReturned) { //clCreateProgramWithIL => SPIR-V stored as source code const uint32_t spirvBinary[2] = {0x03022307, 0x07230203}; size_t spirvBinarySize = sizeof(spirvBinary); //clCompileProgram => SPIR-V stored as IR binary program->isSpirV = true; program->irBinary = makeCopy(spirvBinary, spirvBinarySize); program->irBinarySize = spirvBinarySize; program->deviceBuildInfos[device.get()].programBinaryType = CL_PROGRAM_BINARY_TYPE_LIBRARY; EXPECT_NE(nullptr, program->irBinary); EXPECT_NE(0u, program->irBinarySize); EXPECT_TRUE(program->getIsSpirV()); //clGetProgramInfo => SPIR-V stored as ELF binary cl_int retVal = program->packDeviceBinary(*device); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, program->buildInfos[rootDeviceIndex].packedDeviceBinary); EXPECT_NE(0u, program->buildInfos[rootDeviceIndex].packedDeviceBinarySize); //use ELF reader to parse and validate ELF binary std::string decodeErrors; std::string decodeWarnings; auto elf = NEO::Elf::decodeElf(ArrayRef(reinterpret_cast(program->buildInfos[rootDeviceIndex].packedDeviceBinary.get()), program->buildInfos[rootDeviceIndex].packedDeviceBinarySize), decodeErrors, decodeWarnings); auto header = elf.elfFileHeader; ASSERT_NE(nullptr, header); //check if ELF binary contains section SECTION_HEADER_TYPE_SPIRV bool hasSpirvSection = false; for (const auto &elfSectionHeader : elf.sectionHeaders) { if (elfSectionHeader.header->type == NEO::Elf::SHT_OPENCL_SPIRV) { hasSpirvSection = true; break; } } EXPECT_TRUE(hasSpirvSection); //clCreateProgramWithBinary => new program should recognize SPIR-V binary program->isSpirV = false; auto elfBinary = makeCopy(program->buildInfos[rootDeviceIndex].packedDeviceBinary.get(), program->buildInfos[rootDeviceIndex].packedDeviceBinarySize); retVal = program->createProgramFromBinary(elfBinary.get(), program->buildInfos[rootDeviceIndex].packedDeviceBinarySize, *device); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(program->getIsSpirV()); } unsigned int BinaryTypeValues[] = { CL_PROGRAM_BINARY_TYPE_EXECUTABLE, CL_PROGRAM_BINARY_TYPE_LIBRARY, CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT}; class ProcessElfBinaryTestsWithBinaryType : public ::testing::TestWithParam { public: void SetUp() override { device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr, rootDeviceIndex)); program = std::make_unique(nullptr, false, toClDeviceVector(*device)); } std::unique_ptr program; std::unique_ptr device; const uint32_t rootDeviceIndex = 1; }; TEST_P(ProcessElfBinaryTestsWithBinaryType, GivenBinaryTypeWhenResolveProgramThenProgramIsProperlyResolved) { std::string filePath; retrieveBinaryKernelFilename(filePath, "CopyBuffer_simd16_", ".bin"); size_t binarySize = 0; auto pBinary = loadDataFromFile(filePath.c_str(), binarySize); cl_int retVal = program->createProgramFromBinary(pBinary.get(), binarySize, *device); auto options = program->options; auto genBinary = makeCopy(program->buildInfos[rootDeviceIndex].unpackedDeviceBinary.get(), program->buildInfos[rootDeviceIndex].unpackedDeviceBinarySize); auto genBinarySize = program->buildInfos[rootDeviceIndex].unpackedDeviceBinarySize; auto irBinary = makeCopy(program->irBinary.get(), program->irBinarySize); auto irBinarySize = program->irBinarySize; EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(binarySize, program->buildInfos[rootDeviceIndex].packedDeviceBinarySize); EXPECT_EQ(0, memcmp(pBinary.get(), program->buildInfos[rootDeviceIndex].packedDeviceBinary.get(), binarySize)); // delete program's elf reference to force a resolve program->buildInfos[rootDeviceIndex].packedDeviceBinary.reset(); program->buildInfos[rootDeviceIndex].packedDeviceBinarySize = 0U; program->deviceBuildInfos[device.get()].programBinaryType = GetParam(); retVal = program->packDeviceBinary(*device); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, program->buildInfos[rootDeviceIndex].packedDeviceBinary); std::string decodeErrors; std::string decodeWarnings; auto elf = NEO::Elf::decodeElf(ArrayRef(reinterpret_cast(program->buildInfos[rootDeviceIndex].packedDeviceBinary.get()), program->buildInfos[rootDeviceIndex].packedDeviceBinarySize), decodeErrors, decodeWarnings); ASSERT_NE(nullptr, elf.elfFileHeader); ArrayRef decodedIr; ArrayRef decodedDeviceBinary; ArrayRef decodedOptions; for (auto §ion : elf.sectionHeaders) { switch (section.header->type) { default: break; case NEO::Elf::SHT_OPENCL_LLVM_BINARY: decodedIr = section.data; break; case NEO::Elf::SHT_OPENCL_SPIRV: decodedIr = section.data; break; case NEO::Elf::SHT_OPENCL_DEV_BINARY: decodedDeviceBinary = section.data; break; case NEO::Elf::SHT_OPENCL_OPTIONS: decodedDeviceBinary = section.data; break; } } ASSERT_EQ(options.size(), decodedOptions.size()); ASSERT_EQ(genBinarySize, decodedDeviceBinary.size()); ASSERT_EQ(irBinarySize, decodedIr.size()); EXPECT_EQ(0, memcmp(genBinary.get(), decodedDeviceBinary.begin(), genBinarySize)); EXPECT_EQ(0, memcmp(irBinary.get(), decodedIr.begin(), irBinarySize)); } INSTANTIATE_TEST_CASE_P(ResolveBinaryTests, ProcessElfBinaryTestsWithBinaryType, ::testing::ValuesIn(BinaryTypeValues)); TEST_F(ProcessElfBinaryTests, GivenMultipleCallsWhenCreatingProgramFromBinaryThenEachProgramIsCorrect) { std::string filePath; retrieveBinaryKernelFilename(filePath, "CopyBuffer_simd16_", ".bin"); size_t binarySize = 0; auto pBinary = loadDataFromFile(filePath.c_str(), binarySize); cl_int retVal = program->createProgramFromBinary(pBinary.get(), binarySize, *device); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0, memcmp(pBinary.get(), program->buildInfos[rootDeviceIndex].packedDeviceBinary.get(), binarySize)); std::string filePath2; retrieveBinaryKernelFilename(filePath2, "simple_arg_int_", ".bin"); pBinary = loadDataFromFile(filePath2.c_str(), binarySize); retVal = program->createProgramFromBinary(pBinary.get(), binarySize, *device); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0, memcmp(pBinary.get(), program->buildInfos[rootDeviceIndex].packedDeviceBinary.get(), binarySize)); } TEST_F(ProcessElfBinaryTests, GivenEmptyBuildOptionsWhenCreatingProgramFromBinaryThenSuccessIsReturned) { std::string filePath; retrieveBinaryKernelFilename(filePath, "simple_kernels_", ".bin"); size_t binarySize = 0; auto pBinary = loadDataFromFile(filePath.c_str(), binarySize); cl_int retVal = program->createProgramFromBinary(pBinary.get(), binarySize, *device); EXPECT_EQ(CL_SUCCESS, retVal); const auto &options = program->getOptions(); size_t optionsSize = strlen(options.c_str()) + 1; EXPECT_EQ(0, memcmp("", options.c_str(), optionsSize)); } TEST_F(ProcessElfBinaryTests, GivenNonEmptyBuildOptionsWhenCreatingProgramFromBinaryThenSuccessIsReturned) { std::string filePath; retrieveBinaryKernelFilename(filePath, "simple_kernels_opts_", ".bin"); size_t binarySize = 0; auto pBinary = loadDataFromFile(filePath.c_str(), binarySize); cl_int retVal = program->createProgramFromBinary(pBinary.get(), binarySize, *device); EXPECT_EQ(CL_SUCCESS, retVal); const auto &options = program->getOptions(); std::string buildOptionsNotEmpty = CompilerOptions::concatenate(CompilerOptions::optDisable, "-DDEF_WAS_SPECIFIED=1"); EXPECT_TRUE(hasSubstr(options, buildOptionsNotEmpty)); } TEST_F(ProcessElfBinaryTests, GivenBinaryWhenIncompatiblePatchtokenVerionThenProramCreationFails) { PatchTokensTestData::ValidEmptyProgram programTokens; { NEO::Elf::ElfEncoder<> elfEncoder; elfEncoder.getElfFileHeader().type = NEO::Elf::ET_OPENCL_EXECUTABLE; elfEncoder.appendSection(NEO::Elf::SHT_OPENCL_DEV_BINARY, NEO::Elf::SectionNamesOpenCl::deviceBinary, programTokens.storage); auto elfBinary = elfEncoder.encode(); cl_int retVal = program->createProgramFromBinary(elfBinary.data(), elfBinary.size(), *device); EXPECT_EQ(CL_SUCCESS, retVal); } { programTokens.headerMutable->Version -= 1; NEO::Elf::ElfEncoder<> elfEncoder; elfEncoder.getElfFileHeader().type = NEO::Elf::ET_OPENCL_EXECUTABLE; elfEncoder.appendSection(NEO::Elf::SHT_OPENCL_DEV_BINARY, NEO::Elf::SectionNamesOpenCl::deviceBinary, programTokens.storage); auto elfBinary = elfEncoder.encode(); cl_int retVal = program->createProgramFromBinary(elfBinary.data(), elfBinary.size(), *device); EXPECT_EQ(CL_INVALID_BINARY, retVal); } } compute-runtime-22.14.22890/opencl/test/unit_test/program/process_spir_binary_tests.cpp000066400000000000000000000042201422164147700313010ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/source/helpers/string.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "gtest/gtest.h" using namespace NEO; class ProcessSpirBinaryTests : public ::testing::Test { public: void SetUp() override { device = std::make_unique(new MockDevice()); program = std::make_unique(toClDeviceVector(*device)); } std::unique_ptr device; std::unique_ptr program; }; TEST_F(ProcessSpirBinaryTests, GivenNullBinaryWhenProcessingSpirBinaryThenSourceCodeIsEmpty) { auto retVal = program->processSpirBinary(nullptr, 0, false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(true, program->sourceCode.empty()); } TEST_F(ProcessSpirBinaryTests, GivenInvalidSizeBinaryWhenProcessingSpirBinaryThenIrBinarSizeIsSetToPassedValue) { char pBinary[] = "somebinary\0"; size_t binarySize = 1; auto retVal = program->processSpirBinary(pBinary, binarySize, false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(binarySize, program->irBinarySize); } TEST_F(ProcessSpirBinaryTests, WhenProcessingSpirBinaryThenIrBinaryIsSetCorrectly) { char pBinary[] = "somebinary\0"; size_t binarySize = strnlen_s(pBinary, 11); auto retVal = program->processSpirBinary(pBinary, binarySize, false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0, memcmp(pBinary, program->irBinary.get(), program->irBinarySize)); EXPECT_EQ(binarySize, program->irBinarySize); // Verify no built log is available std::string buildLog = program->getBuildLog(0); EXPECT_TRUE(buildLog.empty()); } TEST_F(ProcessSpirBinaryTests, WhenProcessingSpirBinaryThenIsSpirvIsSetBasedonPassedValue) { const uint32_t pBinary[2] = {0x03022307, 0x07230203}; size_t binarySize = sizeof(pBinary); program->processSpirBinary(pBinary, binarySize, false); EXPECT_FALSE(program->getIsSpirV()); program->processSpirBinary(pBinary, binarySize, true); EXPECT_TRUE(program->getIsSpirV()); } compute-runtime-22.14.22890/opencl/test/unit_test/program/program_data_tests.cpp000066400000000000000000001201011422164147700276570ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/string.h" #include "shared/source/memory_manager/allocations_list.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/source/program/program_info_from_patchtokens.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/compiler_interface/linker_mock.h" #include "shared/test/unit_test/device_binary_format/patchtokens_tests.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "opencl/source/platform/platform.h" #include "opencl/source/program/program.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "opencl/test/unit_test/program/program_with_source.h" using namespace NEO; using namespace iOpenCL; static const char constValue[] = "11223344"; static const char globalValue[] = "55667788"; class ProgramDataTestBase : public testing::Test, public ContextFixture, public PlatformFixture, public ProgramFixture { using ContextFixture::SetUp; using PlatformFixture::SetUp; public: ProgramDataTestBase() { memset(&programBinaryHeader, 0x00, sizeof(SProgramBinaryHeader)); pCurPtr = nullptr; pProgramPatchList = nullptr; programPatchListSize = 0; } void buildAndDecodeProgramPatchList(); void SetUp() override { PlatformFixture::SetUp(); pClDevice = pPlatform->getClDevice(0); rootDeviceIndex = pClDevice->getRootDeviceIndex(); cl_device_id device = pClDevice; ContextFixture::SetUp(1, &device); ProgramFixture::SetUp(); CreateProgramWithSource( pContext, "CopyBuffer_simd16.cl"); } void TearDown() override { knownSource.reset(); ProgramFixture::TearDown(); ContextFixture::TearDown(); PlatformFixture::TearDown(); } size_t setupConstantAllocation() { size_t constSize = strlen(constValue) + 1; EXPECT_EQ(nullptr, pProgram->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex())); SPatchAllocateConstantMemorySurfaceProgramBinaryInfo allocateConstMemorySurface; allocateConstMemorySurface.Token = PATCH_TOKEN_ALLOCATE_CONSTANT_MEMORY_SURFACE_PROGRAM_BINARY_INFO; allocateConstMemorySurface.Size = static_cast(sizeof(SPatchAllocateConstantMemorySurfaceProgramBinaryInfo)); allocateConstMemorySurface.ConstantBufferIndex = 0; allocateConstMemorySurface.InlineDataSize = static_cast(constSize); pAllocateConstMemorySurface.reset(new cl_char[allocateConstMemorySurface.Size + constSize]); memcpy_s(pAllocateConstMemorySurface.get(), sizeof(SPatchAllocateConstantMemorySurfaceProgramBinaryInfo), &allocateConstMemorySurface, sizeof(SPatchAllocateConstantMemorySurfaceProgramBinaryInfo)); memcpy_s((cl_char *)pAllocateConstMemorySurface.get() + sizeof(allocateConstMemorySurface), constSize, constValue, constSize); pProgramPatchList = (void *)pAllocateConstMemorySurface.get(); programPatchListSize = static_cast(allocateConstMemorySurface.Size + constSize); return constSize; } size_t setupGlobalAllocation() { size_t globalSize = strlen(globalValue) + 1; EXPECT_EQ(nullptr, pProgram->getGlobalSurface(pContext->getDevice(0)->getRootDeviceIndex())); SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo allocateGlobalMemorySurface; allocateGlobalMemorySurface.Token = PATCH_TOKEN_ALLOCATE_GLOBAL_MEMORY_SURFACE_PROGRAM_BINARY_INFO; allocateGlobalMemorySurface.Size = static_cast(sizeof(SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo)); allocateGlobalMemorySurface.GlobalBufferIndex = 0; allocateGlobalMemorySurface.InlineDataSize = static_cast(globalSize); pAllocateGlobalMemorySurface.reset(new cl_char[allocateGlobalMemorySurface.Size + globalSize]); memcpy_s(pAllocateGlobalMemorySurface.get(), sizeof(SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo), &allocateGlobalMemorySurface, sizeof(SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo)); memcpy_s((cl_char *)pAllocateGlobalMemorySurface.get() + sizeof(allocateGlobalMemorySurface), globalSize, globalValue, globalSize); pProgramPatchList = pAllocateGlobalMemorySurface.get(); programPatchListSize = static_cast(allocateGlobalMemorySurface.Size + globalSize); return globalSize; } std::unique_ptr pAllocateConstMemorySurface; std::unique_ptr pAllocateGlobalMemorySurface; char *pCurPtr; SProgramBinaryHeader programBinaryHeader; void *pProgramPatchList; uint32_t programPatchListSize; cl_int patchlistDecodeErrorCode = 0; bool allowDecodeFailure = false; ClDevice *pClDevice = nullptr; uint32_t rootDeviceIndex; }; void ProgramDataTestBase::buildAndDecodeProgramPatchList() { size_t headerSize = sizeof(SProgramBinaryHeader); cl_int error = CL_SUCCESS; programBinaryHeader.Magic = 0x494E5443; programBinaryHeader.Version = CURRENT_ICBE_VERSION; programBinaryHeader.Device = defaultHwInfo->platform.eRenderCoreFamily; programBinaryHeader.GPUPointerSizeInBytes = 8; programBinaryHeader.NumberOfKernels = 0; programBinaryHeader.PatchListSize = programPatchListSize; char *pProgramData = new char[headerSize + programBinaryHeader.PatchListSize]; ASSERT_NE(nullptr, pProgramData); pCurPtr = pProgramData; // program header memset(pCurPtr, 0, sizeof(SProgramBinaryHeader)); *(SProgramBinaryHeader *)pCurPtr = programBinaryHeader; pCurPtr += sizeof(SProgramBinaryHeader); // patch list memcpy_s(pCurPtr, programPatchListSize, pProgramPatchList, programPatchListSize); pCurPtr += programPatchListSize; auto rootDeviceIndex = pPlatform->getClDevice(0)->getRootDeviceIndex(); //as we use mock compiler in unit test, replace the genBinary here. pProgram->buildInfos[rootDeviceIndex].unpackedDeviceBinary = makeCopy(pProgramData, headerSize + programBinaryHeader.PatchListSize); pProgram->buildInfos[rootDeviceIndex].unpackedDeviceBinarySize = headerSize + programBinaryHeader.PatchListSize; error = pProgram->processGenBinary(*pClDevice); patchlistDecodeErrorCode = error; if (allowDecodeFailure == false) { EXPECT_EQ(CL_SUCCESS, error); } delete[] pProgramData; } using ProgramDataTest = ProgramDataTestBase; TEST_F(ProgramDataTest, GivenEmptyProgramBinaryHeaderWhenBuildingAndDecodingThenSucessIsReturned) { buildAndDecodeProgramPatchList(); } TEST_F(ProgramDataTest, WhenAllocatingConstantMemorySurfaceThenUnderlyingBufferIsSetCorrectly) { auto constSize = setupConstantAllocation(); buildAndDecodeProgramPatchList(); EXPECT_NE(nullptr, pProgram->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex())); EXPECT_EQ(0, memcmp(constValue, pProgram->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex())->getUnderlyingBuffer(), constSize)); } TEST_F(ProgramDataTest, givenProgramWhenAllocatingConstantMemorySurfaceThenProperDeviceBitfieldIsPassed) { auto executionEnvironment = pClDevice->getExecutionEnvironment(); auto memoryManager = new MockMemoryManager(*executionEnvironment); std::unique_ptr memoryManagerBackup(memoryManager); std::swap(memoryManagerBackup, executionEnvironment->memoryManager); EXPECT_NE(pClDevice->getDeviceBitfield(), memoryManager->recentlyPassedDeviceBitfield); setupConstantAllocation(); buildAndDecodeProgramPatchList(); EXPECT_NE(nullptr, pProgram->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex())); EXPECT_EQ(pClDevice->getDeviceBitfield(), memoryManager->recentlyPassedDeviceBitfield); std::swap(memoryManagerBackup, executionEnvironment->memoryManager); } TEST_F(ProgramDataTest, whenGlobalConstantsAreExportedThenAllocateSurfacesAsSvm) { if (this->pContext->getSVMAllocsManager() == nullptr) { return; } char constantData[128] = {}; ProgramInfo programInfo; programInfo.globalConstants.initData = constantData; programInfo.globalConstants.size = sizeof(constantData); std::unique_ptr> mockLinkerInput = std::make_unique>(); mockLinkerInput->traits.exportsGlobalConstants = true; programInfo.linkerInput = std::move(mockLinkerInput); this->pProgram->processProgramInfo(programInfo, *pClDevice); ASSERT_NE(nullptr, pProgram->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex())); EXPECT_NE(nullptr, this->pContext->getSVMAllocsManager()->getSVMAlloc(reinterpret_cast(pProgram->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex())->getGpuAddress()))); } TEST_F(ProgramDataTest, whenGlobalConstantsAreNotExportedThenAllocateSurfacesAsNonSvm) { if (this->pContext->getSVMAllocsManager() == nullptr) { return; } char constantData[128] = {}; ProgramInfo programInfo; programInfo.globalConstants.initData = constantData; programInfo.globalConstants.size = sizeof(constantData); std::unique_ptr> mockLinkerInput = std::make_unique>(); mockLinkerInput->traits.exportsGlobalConstants = false; programInfo.linkerInput = std::move(mockLinkerInput); this->pProgram->processProgramInfo(programInfo, *pClDevice); ASSERT_NE(nullptr, pProgram->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex())); EXPECT_EQ(nullptr, this->pContext->getSVMAllocsManager()->getSVMAlloc(reinterpret_cast( pProgram->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex())->getGpuAddress()))); } TEST_F(ProgramDataTest, whenGlobalConstantsAreExportedButContextUnavailableThenAllocateSurfacesAsNonSvm) { if (this->pContext->getSVMAllocsManager() == nullptr) { return; } char constantData[128] = {}; ProgramInfo programInfo; programInfo.globalConstants.initData = constantData; programInfo.globalConstants.size = sizeof(constantData); std::unique_ptr> mockLinkerInput = std::make_unique>(); mockLinkerInput->traits.exportsGlobalConstants = true; programInfo.linkerInput = std::move(mockLinkerInput); pProgram->context = nullptr; this->pProgram->processProgramInfo(programInfo, *pClDevice); pProgram->context = pContext; ASSERT_NE(nullptr, pProgram->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex())); EXPECT_EQ(nullptr, this->pContext->getSVMAllocsManager()->getSVMAlloc(reinterpret_cast( pProgram->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex())->getGpuAddress()))); } TEST_F(ProgramDataTest, whenGlobalVariablesAreExportedThenAllocateSurfacesAsSvm) { if (this->pContext->getSVMAllocsManager() == nullptr) { return; } char globalData[128] = {}; ProgramInfo programInfo; programInfo.globalVariables.initData = globalData; programInfo.globalVariables.size = sizeof(globalData); std::unique_ptr> mockLinkerInput = std::make_unique>(); mockLinkerInput->traits.exportsGlobalVariables = true; programInfo.linkerInput = std::move(mockLinkerInput); this->pProgram->processProgramInfo(programInfo, *pClDevice); ASSERT_NE(nullptr, pProgram->getGlobalSurface(pContext->getDevice(0)->getRootDeviceIndex())); EXPECT_NE(nullptr, this->pContext->getSVMAllocsManager()->getSVMAlloc(reinterpret_cast(pProgram->getGlobalSurface(pContext->getDevice(0)->getRootDeviceIndex())->getGpuAddress()))); } TEST_F(ProgramDataTest, whenGlobalVariablesAreExportedButContextUnavailableThenAllocateSurfacesAsNonSvm) { if (this->pContext->getSVMAllocsManager() == nullptr) { return; } char globalData[128] = {}; ProgramInfo programInfo; programInfo.globalVariables.initData = globalData; programInfo.globalVariables.size = sizeof(globalData); std::unique_ptr> mockLinkerInput = std::make_unique>(); mockLinkerInput->traits.exportsGlobalVariables = true; programInfo.linkerInput = std::move(mockLinkerInput); pProgram->context = nullptr; this->pProgram->processProgramInfo(programInfo, *pClDevice); pProgram->context = pContext; ASSERT_NE(nullptr, pProgram->getGlobalSurface(pContext->getDevice(0)->getRootDeviceIndex())); EXPECT_EQ(nullptr, this->pContext->getSVMAllocsManager()->getSVMAlloc(reinterpret_cast(pProgram->getGlobalSurface(pContext->getDevice(0)->getRootDeviceIndex())->getGpuAddress()))); } TEST_F(ProgramDataTest, whenGlobalVariablesAreNotExportedThenAllocateSurfacesAsNonSvm) { if (this->pContext->getSVMAllocsManager() == nullptr) { return; } char globalData[128] = {}; ProgramInfo programInfo; programInfo.globalVariables.initData = globalData; programInfo.globalVariables.size = sizeof(globalData); std::unique_ptr> mockLinkerInput = std::make_unique>(); mockLinkerInput->traits.exportsGlobalVariables = false; programInfo.linkerInput = std::move(mockLinkerInput); this->pProgram->processProgramInfo(programInfo, *pClDevice); ASSERT_NE(nullptr, pProgram->getGlobalSurface(pContext->getDevice(0)->getRootDeviceIndex())); EXPECT_EQ(nullptr, this->pContext->getSVMAllocsManager()->getSVMAlloc(reinterpret_cast(pProgram->getGlobalSurface(pContext->getDevice(0)->getRootDeviceIndex())->getGpuAddress()))); } TEST_F(ProgramDataTest, givenConstantAllocationThatIsInUseByGpuWhenProgramIsBeingDestroyedThenItIsAddedToTemporaryAllocationList) { setupConstantAllocation(); buildAndDecodeProgramPatchList(); auto &csr = *pPlatform->getClDevice(0)->getDefaultEngine().commandStreamReceiver; auto tagAddress = csr.getTagAddress(); auto constantSurface = pProgram->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex()); constantSurface->updateTaskCount(*tagAddress + 1, csr.getOsContext().getContextId()); EXPECT_TRUE(csr.getTemporaryAllocations().peekIsEmpty()); delete pProgram; pProgram = nullptr; EXPECT_FALSE(csr.getTemporaryAllocations().peekIsEmpty()); EXPECT_EQ(constantSurface, csr.getTemporaryAllocations().peekHead()); } TEST_F(ProgramDataTest, givenGlobalAllocationThatIsInUseByGpuWhenProgramIsBeingDestroyedThenItIsAddedToTemporaryAllocationList) { setupGlobalAllocation(); buildAndDecodeProgramPatchList(); auto &csr = *pPlatform->getClDevice(0)->getDefaultEngine().commandStreamReceiver; auto tagAddress = csr.getTagAddress(); auto globalSurface = pProgram->getGlobalSurface(pContext->getDevice(0)->getRootDeviceIndex()); globalSurface->updateTaskCount(*tagAddress + 1, csr.getOsContext().getContextId()); EXPECT_TRUE(csr.getTemporaryAllocations().peekIsEmpty()); delete pProgram; pProgram = nullptr; EXPECT_FALSE(csr.getTemporaryAllocations().peekIsEmpty()); EXPECT_EQ(globalSurface, csr.getTemporaryAllocations().peekHead()); } TEST_F(ProgramDataTest, GivenDeviceForcing32BitMessagesWhenConstAllocationIsPresentInProgramBinariesThen32BitStorageIsAllocated) { auto constSize = setupConstantAllocation(); this->pContext->getDevice(0)->getMemoryManager()->setForce32BitAllocations(true); buildAndDecodeProgramPatchList(); EXPECT_NE(nullptr, pProgram->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex())); EXPECT_EQ(0, memcmp(constValue, pProgram->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex())->getUnderlyingBuffer(), constSize)); if constexpr (is64bit) { EXPECT_TRUE(pProgram->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex())->is32BitAllocation()); } } TEST_F(ProgramDataTest, WhenAllocatingGlobalMemorySurfaceThenUnderlyingBufferIsSetCorrectly) { auto globalSize = setupGlobalAllocation(); buildAndDecodeProgramPatchList(); EXPECT_NE(nullptr, pProgram->getGlobalSurface(pContext->getDevice(0)->getRootDeviceIndex())); EXPECT_EQ(0, memcmp(globalValue, pProgram->getGlobalSurface(pContext->getDevice(0)->getRootDeviceIndex())->getUnderlyingBuffer(), globalSize)); } TEST_F(ProgramDataTest, givenProgramWhenAllocatingGlobalMemorySurfaceThenProperDeviceBitfieldIsPassed) { auto executionEnvironment = pClDevice->getExecutionEnvironment(); auto memoryManager = new MockMemoryManager(*executionEnvironment); std::unique_ptr memoryManagerBackup(memoryManager); std::swap(memoryManagerBackup, executionEnvironment->memoryManager); EXPECT_NE(pClDevice->getDeviceBitfield(), memoryManager->recentlyPassedDeviceBitfield); setupGlobalAllocation(); buildAndDecodeProgramPatchList(); EXPECT_NE(nullptr, pProgram->getGlobalSurface(pContext->getDevice(0)->getRootDeviceIndex())); EXPECT_EQ(pClDevice->getDeviceBitfield(), memoryManager->recentlyPassedDeviceBitfield); std::swap(memoryManagerBackup, executionEnvironment->memoryManager); } TEST_F(ProgramDataTest, Given32BitDeviceWhenGlobalMemorySurfaceIsPresentThenItHas32BitStorage) { char globalValue[] = "55667788"; size_t globalSize = strlen(globalValue) + 1; this->pContext->getDevice(0)->getMemoryManager()->setForce32BitAllocations(true); EXPECT_EQ(nullptr, pProgram->getGlobalSurface(pContext->getDevice(0)->getRootDeviceIndex())); SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo allocateGlobalMemorySurface; allocateGlobalMemorySurface.Token = PATCH_TOKEN_ALLOCATE_GLOBAL_MEMORY_SURFACE_PROGRAM_BINARY_INFO; allocateGlobalMemorySurface.Size = static_cast(sizeof(SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo)); allocateGlobalMemorySurface.GlobalBufferIndex = 0; allocateGlobalMemorySurface.InlineDataSize = static_cast(globalSize); cl_char *pAllocateGlobalMemorySurface = new cl_char[allocateGlobalMemorySurface.Size + globalSize]; memcpy_s(pAllocateGlobalMemorySurface, sizeof(SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo), &allocateGlobalMemorySurface, sizeof(SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo)); memcpy_s((cl_char *)pAllocateGlobalMemorySurface + sizeof(allocateGlobalMemorySurface), globalSize, globalValue, globalSize); pProgramPatchList = (void *)pAllocateGlobalMemorySurface; programPatchListSize = static_cast(allocateGlobalMemorySurface.Size + globalSize); buildAndDecodeProgramPatchList(); EXPECT_NE(nullptr, pProgram->getGlobalSurface(pContext->getDevice(0)->getRootDeviceIndex())); EXPECT_EQ(0, memcmp(globalValue, pProgram->getGlobalSurface(pContext->getDevice(0)->getRootDeviceIndex())->getUnderlyingBuffer(), globalSize)); if constexpr (is64bit) { EXPECT_TRUE(pProgram->getGlobalSurface(pContext->getDevice(0)->getRootDeviceIndex())->is32BitAllocation()); } delete[] pAllocateGlobalMemorySurface; } TEST(ProgramScopeMetadataTest, WhenPatchingGlobalSurfaceThenPickProperSourceBuffer) { MockExecutionEnvironment execEnv; execEnv.incRefInternal(); MockClDevice device{new MockDevice(&execEnv, 0)}; execEnv.memoryManager = std::make_unique(); PatchTokensTestData::ValidProgramWithMixedGlobalVarAndConstSurfacesAndPointers decodedProgram; decodedProgram.globalPointerMutable->GlobalPointerOffset = 0U; decodedProgram.constantPointerMutable->ConstantPointerOffset = 0U; memset(decodedProgram.globalSurfMutable + 1, 0U, sizeof(uintptr_t)); memset(decodedProgram.constSurfMutable + 1, 0U, sizeof(uintptr_t)); ProgramInfo programInfo; MockProgram program(toClDeviceVector(device)); NEO::populateProgramInfo(programInfo, decodedProgram); program.processProgramInfo(programInfo, device); auto &buildInfo = program.buildInfos[device.getRootDeviceIndex()]; ASSERT_NE(nullptr, buildInfo.globalSurface); ASSERT_NE(nullptr, buildInfo.constantSurface); ASSERT_NE(nullptr, buildInfo.globalSurface->getUnderlyingBuffer()); ASSERT_NE(nullptr, buildInfo.constantSurface->getUnderlyingBuffer()); EXPECT_EQ(static_cast(buildInfo.globalSurface->getGpuAddressToPatch()), *reinterpret_cast(buildInfo.constantSurface->getUnderlyingBuffer())); EXPECT_EQ(static_cast(buildInfo.constantSurface->getGpuAddressToPatch()), *reinterpret_cast(buildInfo.globalSurface->getUnderlyingBuffer())); } TEST_F(ProgramDataTest, GivenProgramWith32bitPointerOptWhenProgramScopeConstantBufferPatchTokensAreReadThenConstantPointerOffsetIsPatchedWith32bitPointer) { CreateProgramWithSource(pContext, "CopyBuffer_simd16.cl"); ASSERT_NE(nullptr, pProgram); MockProgram *prog = pProgram; // simulate case when constant surface was not allocated EXPECT_EQ(nullptr, prog->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex())); ProgramInfo programInfo; programInfo.prepareLinkerInputStorage(); NEO::LinkerInput::RelocationInfo relocInfo; relocInfo.relocationSegment = NEO::SegmentType::GlobalConstants; relocInfo.offset = 0U; relocInfo.type = NEO::LinkerInput::RelocationInfo::Type::Address; relocInfo.symbolName = "GlobalConstantPointer"; NEO::SymbolInfo symbol = {}; symbol.offset = 0U; symbol.size = 8U; symbol.segment = NEO::SegmentType::GlobalConstants; programInfo.linkerInput->addSymbol("GlobalConstantPointer", symbol); programInfo.linkerInput->addDataRelocationInfo(relocInfo); programInfo.linkerInput->setPointerSize(LinkerInput::Traits::PointerSize::Ptr32bit); MockBuffer constantSurface; ASSERT_LT(8U, constantSurface.getSize()); prog->setConstantSurface(&constantSurface.mockGfxAllocation); constantSurface.mockGfxAllocation.set32BitAllocation(true); uint32_t *constantSurfaceStorage = reinterpret_cast(constantSurface.getCpuAddress()); uint32_t sentinel = 0x17192329U; constantSurfaceStorage[0] = 0U; constantSurfaceStorage[1] = sentinel; programInfo.globalConstants.initData = constantSurface.mockGfxAllocation.getUnderlyingBuffer(); pProgram->setLinkerInput(pClDevice->getRootDeviceIndex(), std::move(programInfo.linkerInput)); pProgram->linkBinary(&pClDevice->getDevice(), programInfo.globalConstants.initData, programInfo.globalVariables.initData, {}, prog->externalFunctions); uint32_t expectedAddr = static_cast(constantSurface.getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddressToPatch()); EXPECT_EQ(expectedAddr, constantSurfaceStorage[0]); EXPECT_EQ(sentinel, constantSurfaceStorage[1]); constantSurface.mockGfxAllocation.set32BitAllocation(false); prog->setConstantSurface(nullptr); } TEST_F(ProgramDataTest, GivenProgramWith32bitPointerOptWhenProgramScopeGlobalPointerPatchTokensAreReadThenGlobalPointerOffsetIsPatchedWith32bitPointer) { CreateProgramWithSource(pContext, "CopyBuffer_simd16.cl"); ASSERT_NE(nullptr, pProgram); MockProgram *prog = pProgram; // simulate case when constant surface was not allocated EXPECT_EQ(nullptr, prog->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex())); ProgramInfo programInfo; programInfo.prepareLinkerInputStorage(); NEO::LinkerInput::RelocationInfo relocInfo; relocInfo.offset = 0U; relocInfo.type = NEO::LinkerInput::RelocationInfo::Type::Address; relocInfo.relocationSegment = NEO::SegmentType::GlobalVariables; relocInfo.symbolName = "GlobalVariablePointer"; NEO::SymbolInfo symbol = {}; symbol.offset = 0U; symbol.size = 8U; symbol.segment = NEO::SegmentType::GlobalVariables; programInfo.linkerInput->addSymbol("GlobalVariablePointer", symbol); programInfo.linkerInput->addDataRelocationInfo(relocInfo); programInfo.linkerInput->setPointerSize(LinkerInput::Traits::PointerSize::Ptr32bit); MockBuffer globalSurface; ASSERT_LT(8U, globalSurface.getSize()); prog->setGlobalSurface(&globalSurface.mockGfxAllocation); globalSurface.mockGfxAllocation.set32BitAllocation(true); uint32_t *globalSurfaceStorage = reinterpret_cast(globalSurface.getCpuAddress()); uint32_t sentinel = 0x17192329U; globalSurfaceStorage[0] = 0U; globalSurfaceStorage[1] = sentinel; programInfo.globalVariables.initData = globalSurface.mockGfxAllocation.getUnderlyingBuffer(); pProgram->setLinkerInput(pClDevice->getRootDeviceIndex(), std::move(programInfo.linkerInput)); pProgram->linkBinary(&pClDevice->getDevice(), programInfo.globalConstants.initData, programInfo.globalVariables.initData, {}, prog->externalFunctions); uint32_t expectedAddr = static_cast(globalSurface.getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddressToPatch()); EXPECT_EQ(expectedAddr, globalSurfaceStorage[0]); EXPECT_EQ(sentinel, globalSurfaceStorage[1]); globalSurface.mockGfxAllocation.set32BitAllocation(false); prog->setGlobalSurface(nullptr); } TEST_F(ProgramDataTest, givenSymbolTablePatchTokenThenLinkerInputIsCreated) { SPatchFunctionTableInfo token; token.Token = PATCH_TOKEN_PROGRAM_SYMBOL_TABLE; token.Size = static_cast(sizeof(SPatchFunctionTableInfo)); token.NumEntries = 0; pProgramPatchList = &token; programPatchListSize = token.Size; buildAndDecodeProgramPatchList(); EXPECT_NE(nullptr, pProgram->getLinkerInput(pContext->getDevice(0)->getRootDeviceIndex())); } TEST(ProgramLinkBinaryTest, whenLinkerInputEmptyThenLinkSuccessful) { auto linkerInput = std::make_unique>(); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockProgram program{nullptr, false, toClDeviceVector(*device)}; program.setLinkerInput(device->getRootDeviceIndex(), std::move(linkerInput)); auto ret = program.linkBinary(&device->getDevice(), nullptr, nullptr, {}, program.externalFunctions); EXPECT_EQ(CL_SUCCESS, ret); } TEST(ProgramLinkBinaryTest, whenLinkerUnresolvedExternalThenLinkFailedAndBuildLogAvailable) { auto linkerInput = std::make_unique>(); NEO::LinkerInput::RelocationInfo relocation = {}; relocation.symbolName = "A"; relocation.offset = 0; linkerInput->relocations.push_back(NEO::LinkerInput::Relocations{relocation}); linkerInput->traits.requiresPatchingOfInstructionSegments = true; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); auto rootDeviceIndex = device->getRootDeviceIndex(); MockProgram program{nullptr, false, toClDeviceVector(*device)}; KernelInfo kernelInfo = {}; kernelInfo.kernelDescriptor.kernelMetadata.kernelName = "onlyKernel"; std::vector kernelHeap; kernelHeap.resize(32, 7); kernelInfo.heapInfo.pKernelHeap = kernelHeap.data(); kernelInfo.heapInfo.KernelHeapSize = static_cast(kernelHeap.size()); program.getKernelInfoArray(rootDeviceIndex).push_back(&kernelInfo); program.setLinkerInput(rootDeviceIndex, std::move(linkerInput)); std::string buildLog = program.getBuildLog(device->getRootDeviceIndex()); EXPECT_TRUE(buildLog.empty()); auto ret = program.linkBinary(&device->getDevice(), nullptr, nullptr, {}, program.externalFunctions); EXPECT_NE(CL_SUCCESS, ret); program.getKernelInfoArray(rootDeviceIndex).clear(); buildLog = program.getBuildLog(rootDeviceIndex); EXPECT_FALSE(buildLog.empty()); Linker::UnresolvedExternals expectedUnresolvedExternals; expectedUnresolvedExternals.push_back(Linker::UnresolvedExternal{relocation, 0, false}); auto expectedError = constructLinkerErrorMessage(expectedUnresolvedExternals, std::vector{"kernel : " + kernelInfo.kernelDescriptor.kernelMetadata.kernelName}); EXPECT_TRUE(hasSubstr(buildLog, expectedError)); } TEST_F(ProgramDataTest, whenLinkerInputValidThenIsaIsProperlyPatched) { auto linkerInput = std::make_unique>(); linkerInput->symbols["A"] = NEO::SymbolInfo{4U, 4U, NEO::SegmentType::GlobalVariables}; linkerInput->symbols["B"] = NEO::SymbolInfo{8U, 4U, NEO::SegmentType::GlobalConstants}; linkerInput->symbols["C"] = NEO::SymbolInfo{16U, 4U, NEO::SegmentType::Instructions}; auto relocationType = NEO::LinkerInput::RelocationInfo::Type::Address; linkerInput->relocations.push_back({NEO::LinkerInput::RelocationInfo{"A", 8U, relocationType}, NEO::LinkerInput::RelocationInfo{"B", 16U, relocationType}, NEO::LinkerInput::RelocationInfo{"C", 24U, relocationType}}); linkerInput->traits.requiresPatchingOfInstructionSegments = true; linkerInput->exportedFunctionsSegmentId = 0; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockProgram program{nullptr, false, toClDeviceVector(*device)}; auto &buildInfo = program.buildInfos[device->getRootDeviceIndex()]; KernelInfo kernelInfo = {}; kernelInfo.kernelDescriptor.kernelMetadata.kernelName = "onlyKernel"; std::vector kernelHeap; kernelHeap.resize(32, 7); kernelInfo.heapInfo.pKernelHeap = kernelHeap.data(); kernelInfo.heapInfo.KernelHeapSize = static_cast(kernelHeap.size()); MockGraphicsAllocation kernelIsa(kernelHeap.data(), kernelHeap.size()); kernelInfo.kernelAllocation = &kernelIsa; program.getKernelInfoArray(rootDeviceIndex).push_back(&kernelInfo); program.setLinkerInput(device->getRootDeviceIndex(), std::move(linkerInput)); buildInfo.exportedFunctionsSurface = kernelInfo.kernelAllocation; std::vector globalVariablesBuffer; globalVariablesBuffer.resize(32, 7); std::vector globalConstantsBuffer; globalConstantsBuffer.resize(32, 7); std::vector globalVariablesInitData{32, 0}; std::vector globalConstantsInitData{32, 0}; buildInfo.globalSurface = new MockGraphicsAllocation(globalVariablesBuffer.data(), globalVariablesBuffer.size()); buildInfo.constantSurface = new MockGraphicsAllocation(globalConstantsBuffer.data(), globalConstantsBuffer.size()); auto ret = program.linkBinary(&pClDevice->getDevice(), globalConstantsInitData.data(), globalVariablesInitData.data(), {}, program.externalFunctions); EXPECT_EQ(CL_SUCCESS, ret); linkerInput.reset(static_cast *>(buildInfo.linkerInput.release())); for (size_t i = 0; i < linkerInput->relocations.size(); ++i) { auto expectedPatch = buildInfo.globalSurface->getGpuAddress() + linkerInput->symbols[linkerInput->relocations[0][0].symbolName].offset; auto relocationAddress = kernelHeap.data() + linkerInput->relocations[0][0].offset; EXPECT_EQ(static_cast(expectedPatch), *reinterpret_cast(relocationAddress)) << i; } program.getKernelInfoArray(rootDeviceIndex).clear(); delete buildInfo.globalSurface; buildInfo.globalSurface = nullptr; delete buildInfo.constantSurface; buildInfo.constantSurface = nullptr; } TEST_F(ProgramDataTest, whenRelocationsAreNotNeededThenIsaIsPreserved) { auto linkerInput = std::make_unique>(); linkerInput->symbols["A"] = NEO::SymbolInfo{4U, 4U, NEO::SegmentType::GlobalVariables}; linkerInput->symbols["B"] = NEO::SymbolInfo{8U, 4U, NEO::SegmentType::GlobalConstants}; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockProgram program{nullptr, false, toClDeviceVector(*device)}; auto &buildInfo = program.buildInfos[device->getRootDeviceIndex()]; KernelInfo kernelInfo = {}; kernelInfo.kernelDescriptor.kernelMetadata.kernelName = "onlyKernel"; std::vector kernelHeapData; kernelHeapData.resize(32, 7); std::vector kernelHeap(kernelHeapData.begin(), kernelHeapData.end()); kernelInfo.heapInfo.pKernelHeap = kernelHeap.data(); kernelInfo.heapInfo.KernelHeapSize = static_cast(kernelHeap.size()); MockGraphicsAllocation kernelIsa(kernelHeap.data(), kernelHeap.size()); kernelInfo.kernelAllocation = &kernelIsa; program.getKernelInfoArray(rootDeviceIndex).push_back(&kernelInfo); program.setLinkerInput(rootDeviceIndex, std::move(linkerInput)); std::vector globalVariablesBuffer; globalVariablesBuffer.resize(32, 7); std::vector globalConstantsBuffer; globalConstantsBuffer.resize(32, 7); std::vector globalVariablesInitData{32, 0}; std::vector globalConstantsInitData{32, 0}; buildInfo.globalSurface = new MockGraphicsAllocation(globalVariablesBuffer.data(), globalVariablesBuffer.size()); buildInfo.constantSurface = new MockGraphicsAllocation(globalConstantsBuffer.data(), globalConstantsBuffer.size()); auto ret = program.linkBinary(&pClDevice->getDevice(), globalConstantsInitData.data(), globalVariablesInitData.data(), {}, program.externalFunctions); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_EQ(kernelHeapData, kernelHeap); program.getKernelInfoArray(rootDeviceIndex).clear(); delete buildInfo.globalSurface; buildInfo.globalSurface = nullptr; delete buildInfo.constantSurface; buildInfo.constantSurface = nullptr; } TEST(ProgramStringSectionTest, WhenConstStringBufferIsPresentThenUseItForLinking) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); auto rootDeviceIndex = device->getRootDeviceIndex(); MockProgram program{nullptr, false, toClDeviceVector(*device)}; uint8_t kernelHeapData[64] = {}; MockGraphicsAllocation kernelIsa(kernelHeapData, 64); KernelInfo kernelInfo = {}; kernelInfo.kernelDescriptor.kernelMetadata.kernelName = "onlyKernel"; kernelInfo.heapInfo.pKernelHeap = kernelHeapData; kernelInfo.heapInfo.KernelHeapSize = 64; kernelInfo.kernelAllocation = &kernelIsa; program.getKernelInfoArray(rootDeviceIndex).push_back(&kernelInfo); auto linkerInput = std::make_unique>(); linkerInput->relocations.push_back({{".str", 0x8, LinkerInput::RelocationInfo::Type::Address, SegmentType::Instructions}}); linkerInput->symbols.insert({".str", {0x0, 0x8, SegmentType::GlobalStrings}}); linkerInput->traits.requiresPatchingOfInstructionSegments = true; program.setLinkerInput(rootDeviceIndex, std::move(linkerInput)); auto isaCpuPtr = reinterpret_cast(kernelInfo.getGraphicsAllocation()->getUnderlyingBuffer()); auto patchAddr = ptrOffset(isaCpuPtr, 0x8); const char constStringData[] = "Hello World!\n"; auto stringsAddr = reinterpret_cast(constStringData); auto ret = program.linkBinary(&device->getDevice(), nullptr, nullptr, {constStringData, sizeof(constStringData)}, program.externalFunctions); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_EQ(static_cast(stringsAddr), *reinterpret_cast(patchAddr)); program.getKernelInfoArray(rootDeviceIndex).clear(); } TEST(ProgramImplicitArgsTest, givenImplicitRelocationAndStackCallsThenKernelRequiresImplicitArgs) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); auto rootDeviceIndex = device->getRootDeviceIndex(); MockProgram program{nullptr, false, toClDeviceVector(*device)}; KernelInfo kernelInfo = {}; kernelInfo.kernelDescriptor.kernelMetadata.kernelName = "onlyKernel"; kernelInfo.kernelDescriptor.kernelAttributes.flags.useStackCalls = true; uint8_t kernelHeapData[64] = {}; kernelInfo.heapInfo.pKernelHeap = kernelHeapData; kernelInfo.heapInfo.KernelHeapSize = 64; MockGraphicsAllocation kernelIsa(kernelHeapData, 64); kernelInfo.kernelAllocation = &kernelIsa; program.getKernelInfoArray(rootDeviceIndex).push_back(&kernelInfo); auto linkerInput = std::make_unique>(); linkerInput->relocations.push_back({{implicitArgsRelocationSymbolNames[0], 0x8, LinkerInput::RelocationInfo::Type::AddressLow, SegmentType::Instructions}}); linkerInput->traits.requiresPatchingOfInstructionSegments = true; program.setLinkerInput(rootDeviceIndex, std::move(linkerInput)); auto ret = program.linkBinary(&device->getDevice(), nullptr, nullptr, {}, program.externalFunctions); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_TRUE(kernelInfo.kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs); program.getKernelInfoArray(rootDeviceIndex).clear(); } TEST(ProgramImplicitArgsTest, givenImplicitRelocationAndEnabledDebuggerThenKernelRequiresImplicitArgs) { if (!defaultHwInfo->capabilityTable.debuggerSupported) { GTEST_SKIP(); } DebugManagerStateRestore restorer; DebugManager.flags.EnableMockSourceLevelDebugger.set(1); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); EXPECT_NE(nullptr, device->getDebugger()); auto rootDeviceIndex = device->getRootDeviceIndex(); MockProgram program{nullptr, false, toClDeviceVector(*device)}; KernelInfo kernelInfo = {}; kernelInfo.kernelDescriptor.kernelMetadata.kernelName = "onlyKernel"; kernelInfo.kernelDescriptor.kernelAttributes.flags.useStackCalls = false; uint8_t kernelHeapData[64] = {}; kernelInfo.heapInfo.pKernelHeap = kernelHeapData; kernelInfo.heapInfo.KernelHeapSize = 64; MockGraphicsAllocation kernelIsa(kernelHeapData, 64); kernelInfo.kernelAllocation = &kernelIsa; program.getKernelInfoArray(rootDeviceIndex).push_back(&kernelInfo); auto linkerInput = std::make_unique>(); linkerInput->relocations.push_back({{implicitArgsRelocationSymbolNames[0], 0x8, LinkerInput::RelocationInfo::Type::AddressLow, SegmentType::Instructions}}); linkerInput->traits.requiresPatchingOfInstructionSegments = true; program.setLinkerInput(rootDeviceIndex, std::move(linkerInput)); auto ret = program.linkBinary(&device->getDevice(), nullptr, nullptr, {}, program.externalFunctions); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_TRUE(kernelInfo.kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs); program.getKernelInfoArray(rootDeviceIndex).clear(); } TEST(ProgramImplicitArgsTest, givenImplicitRelocationAndNoStackCallsAndDisabledDebuggerThenKernelDoesntRequireImplicitArgs) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); EXPECT_EQ(nullptr, device->getDebugger()); auto rootDeviceIndex = device->getRootDeviceIndex(); MockProgram program{nullptr, false, toClDeviceVector(*device)}; KernelInfo kernelInfo = {}; kernelInfo.kernelDescriptor.kernelMetadata.kernelName = "onlyKernel"; kernelInfo.kernelDescriptor.kernelAttributes.flags.useStackCalls = false; uint8_t kernelHeapData[64] = {}; kernelInfo.heapInfo.pKernelHeap = kernelHeapData; kernelInfo.heapInfo.KernelHeapSize = 64; MockGraphicsAllocation kernelIsa(kernelHeapData, 64); kernelInfo.kernelAllocation = &kernelIsa; program.getKernelInfoArray(rootDeviceIndex).push_back(&kernelInfo); auto linkerInput = std::make_unique>(); linkerInput->relocations.push_back({{implicitArgsRelocationSymbolNames[0], 0x8, LinkerInput::RelocationInfo::Type::AddressLow, SegmentType::Instructions}}); linkerInput->traits.requiresPatchingOfInstructionSegments = true; program.setLinkerInput(rootDeviceIndex, std::move(linkerInput)); auto ret = program.linkBinary(&device->getDevice(), nullptr, nullptr, {}, program.externalFunctions); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_FALSE(kernelInfo.kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs); program.getKernelInfoArray(rootDeviceIndex).clear(); } compute-runtime-22.14.22890/opencl/test/unit_test/program/program_from_binary.h000066400000000000000000000054701422164147700275130ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/fixtures/program_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include namespace NEO { //////////////////////////////////////////////////////////////////////////////// // ProgramFromBinaryTest Test Fixture // Used to test the Program class //////////////////////////////////////////////////////////////////////////////// struct ProgramFromBinaryFixture : public ClDeviceFixture, public ContextFixture, public ProgramFixture, public testing::Test { using ContextFixture::SetUp; void SetUp() override { ProgramFromBinaryFixture::SetUp("CopyBuffer_simd32", "CopyBuffer"); } void SetUp(const char *binaryFileName, const char *kernelName) { this->binaryFileName = binaryFileName; this->kernelName = kernelName; ClDeviceFixture::SetUp(); cl_device_id device = pClDevice; ContextFixture::SetUp(1, &device); ProgramFixture::SetUp(); if (options.size()) CreateProgramFromBinary(pContext, pContext->getDevices(), binaryFileName, options); else CreateProgramFromBinary(pContext, pContext->getDevices(), binaryFileName); } void TearDown() override { knownSource.reset(); ProgramFixture::TearDown(); ContextFixture::TearDown(); ClDeviceFixture::TearDown(); } void setOptions(std::string &optionsIn) { options = optionsIn; } const char *binaryFileName = nullptr; const char *kernelName = nullptr; cl_int retVal = CL_SUCCESS; std::string options; }; //////////////////////////////////////////////////////////////////////////////// // ProgramSimpleFixture Test Fixture // Used to test the Program class, but not using parameters //////////////////////////////////////////////////////////////////////////////// class ProgramSimpleFixture : public ClDeviceFixture, public ContextFixture, public ProgramFixture { using ContextFixture::SetUp; public: void SetUp() override { ClDeviceFixture::SetUp(); cl_device_id device = pClDevice; ContextFixture::SetUp(1, &device); ProgramFixture::SetUp(); } void TearDown() override { knownSource.reset(); ProgramFixture::TearDown(); ContextFixture::TearDown(); ClDeviceFixture::TearDown(); } protected: cl_int retVal = CL_SUCCESS; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/program/program_nonuniform.cpp000066400000000000000000000244361422164147700277340ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/hash.h" #include "shared/source/helpers/ptr_math.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/kernel/kernel.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/test/unit_test/fixtures/program_fixture.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "opencl/test/unit_test/program/program_from_binary.h" #include "opencl/test/unit_test/program/program_with_source.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" #include "gtest/gtest.h" #include "program_tests.h" #include #include #include using namespace NEO; class MyMockProgram : public MockProgram { public: MyMockProgram() : MockProgram(toClDeviceVector(*(new MockClDevice(new MockDevice())))), device(this->clDevices[0]) {} private: std::unique_ptr device; }; TEST(ProgramNonUniform, GivenNoBuildOptionsWhenUpdatingAllowNonUniformThenNonUniformNotAllowed) { MyMockProgram pm; EXPECT_FALSE(pm.getAllowNonUniform()); EXPECT_EQ(12u, pm.getProgramOptionVersion()); pm.setBuildOptions(nullptr); pm.updateNonUniformFlag(); EXPECT_FALSE(pm.getAllowNonUniform()); EXPECT_EQ(12u, pm.getProgramOptionVersion()); } TEST(ProgramNonUniform, GivenBuildOptionsCl12WhenUpdatingAllowNonUniformThenNonUniformNotAllowed) { MyMockProgram pm; EXPECT_FALSE(pm.getAllowNonUniform()); EXPECT_EQ(12u, pm.getProgramOptionVersion()); pm.setBuildOptions("-cl-std=CL1.2"); pm.updateNonUniformFlag(); EXPECT_FALSE(pm.getAllowNonUniform()); EXPECT_EQ(12u, pm.getProgramOptionVersion()); } TEST(ProgramNonUniform, GivenBuildOptionsCl20WhenUpdatingAllowNonUniformThenNonUniformAllowed) { MyMockProgram pm; EXPECT_FALSE(pm.getAllowNonUniform()); EXPECT_EQ(12u, pm.getProgramOptionVersion()); pm.setBuildOptions("-cl-std=CL2.0"); pm.updateNonUniformFlag(); EXPECT_TRUE(pm.getAllowNonUniform()); EXPECT_EQ(20u, pm.getProgramOptionVersion()); } TEST(ProgramNonUniform, GivenBuildOptionsCl21WhenUpdatingAllowNonUniformThenNonUniformAllowed) { MyMockProgram pm; EXPECT_FALSE(pm.getAllowNonUniform()); EXPECT_EQ(12u, pm.getProgramOptionVersion()); pm.setBuildOptions("-cl-std=CL2.1"); pm.updateNonUniformFlag(); EXPECT_TRUE(pm.getAllowNonUniform()); EXPECT_EQ(21u, pm.getProgramOptionVersion()); } TEST(ProgramNonUniform, GivenBuildOptionsCl20AndUniformFlagWhenUpdatingAllowNonUniformThenNonUniformNotAllowed) { MyMockProgram pm; EXPECT_FALSE(pm.getAllowNonUniform()); EXPECT_EQ(12u, pm.getProgramOptionVersion()); pm.setBuildOptions("-cl-std=CL2.0 -cl-uniform-work-group-size"); pm.updateNonUniformFlag(); EXPECT_FALSE(pm.getAllowNonUniform()); EXPECT_EQ(20u, pm.getProgramOptionVersion()); } TEST(ProgramNonUniform, GivenBuildOptionsCl21AndUniformFlagWhenUpdatingAllowNonUniformThenNonUniformNotAllowed) { MyMockProgram pm; EXPECT_FALSE(pm.getAllowNonUniform()); EXPECT_EQ(12u, pm.getProgramOptionVersion()); pm.setBuildOptions("-cl-std=CL2.1 -cl-uniform-work-group-size"); pm.updateNonUniformFlag(); EXPECT_FALSE(pm.getAllowNonUniform()); EXPECT_EQ(21u, pm.getProgramOptionVersion()); } TEST(KernelNonUniform, WhenSettingAllowNonUniformThenGettingAllowNonUniformReturnsCorrectValue) { KernelInfo kernelInfo; MockClDevice device{new MockDevice()}; MockProgram program(toClDeviceVector(device)); struct KernelMock : Kernel { KernelMock(Program *program, KernelInfo &kernelInfos, ClDevice &clDeviceArg) : Kernel(program, kernelInfos, clDeviceArg) { } }; KernelMock k{&program, kernelInfo, device}; program.setAllowNonUniform(false); EXPECT_FALSE(k.getAllowNonUniform()); program.setAllowNonUniform(true); EXPECT_TRUE(k.getAllowNonUniform()); program.setAllowNonUniform(false); EXPECT_FALSE(k.getAllowNonUniform()); } TEST(ProgramNonUniform, WhenSettingAllowNonUniformThenGettingAllowNonUniformReturnsCorrectValue) { MockClDevice device{new MockDevice()}; auto deviceVector = toClDeviceVector(device); MockProgram program(deviceVector); MockProgram program1(deviceVector); MockProgram program2(deviceVector); const MockProgram *inputPrograms[] = {&program1, &program2}; cl_uint numInputPrograms = 2; program1.setAllowNonUniform(false); program2.setAllowNonUniform(false); program.updateNonUniformFlag((const Program **)inputPrograms, numInputPrograms); EXPECT_FALSE(program.getAllowNonUniform()); program1.setAllowNonUniform(false); program2.setAllowNonUniform(true); program.updateNonUniformFlag((const Program **)inputPrograms, numInputPrograms); EXPECT_FALSE(program.getAllowNonUniform()); program1.setAllowNonUniform(true); program2.setAllowNonUniform(false); program.updateNonUniformFlag((const Program **)inputPrograms, numInputPrograms); EXPECT_FALSE(program.getAllowNonUniform()); program1.setAllowNonUniform(true); program2.setAllowNonUniform(true); program.updateNonUniformFlag((const Program **)inputPrograms, numInputPrograms); EXPECT_TRUE(program.getAllowNonUniform()); } class ProgramNonUniformTest : public ContextFixture, public PlatformFixture, public ProgramFixture, public CommandQueueHwFixture, public testing::Test { using ContextFixture::SetUp; using PlatformFixture::SetUp; protected: ProgramNonUniformTest() { } void SetUp() override { PlatformFixture::SetUp(); device = pPlatform->getClDevice(0); rootDeviceIndex = pPlatform->getClDevice(0)->getRootDeviceIndex(); ContextFixture::SetUp(1, &device); ProgramFixture::SetUp(); CommandQueueHwFixture::SetUp(pPlatform->getClDevice(0), 0); } void TearDown() override { CommandQueueHwFixture::TearDown(); ProgramFixture::TearDown(); ContextFixture::TearDown(); PlatformFixture::TearDown(); } cl_device_id device; uint32_t rootDeviceIndex; cl_int retVal = CL_SUCCESS; }; TEST_F(ProgramNonUniformTest, GivenCl21WhenExecutingKernelWithNonUniformThenEnqueueSucceeds) { REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); CreateProgramFromBinary(pContext, pContext->getDevices(), "kernel_data_param"); auto mockProgram = pProgram; ASSERT_NE(nullptr, mockProgram); mockProgram->setBuildOptions("-cl-std=CL2.1"); retVal = mockProgram->build( mockProgram->getDevices(), nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); auto pKernelInfo = mockProgram->Program::getKernelInfo("test_get_local_size", rootDeviceIndex); EXPECT_NE(nullptr, pKernelInfo); // create a kernel auto pKernel = Kernel::create(mockProgram, *pKernelInfo, *pPlatform->getClDevice(0), &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, pKernel); size_t globalWorkSize[3] = {12, 12, 12}; size_t localWorkSize[3] = {11, 12, 1}; retVal = pCmdQ->enqueueKernel( pKernel, 3, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); delete pKernel; } TEST_F(ProgramNonUniformTest, GivenCl20WhenExecutingKernelWithNonUniformThenEnqueueSucceeds) { REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); CreateProgramFromBinary(pContext, pContext->getDevices(), "kernel_data_param"); auto mockProgram = pProgram; ASSERT_NE(nullptr, mockProgram); mockProgram->setBuildOptions("-cl-std=CL2.0"); retVal = mockProgram->build( mockProgram->getDevices(), nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); auto pKernelInfo = mockProgram->Program::getKernelInfo("test_get_local_size", rootDeviceIndex); EXPECT_NE(nullptr, pKernelInfo); // create a kernel auto pKernel = Kernel::create(mockProgram, *pKernelInfo, *pPlatform->getClDevice(0), &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, pKernel); size_t globalWorkSize[3] = {12, 12, 12}; size_t localWorkSize[3] = {11, 12, 1}; retVal = pCmdQ->enqueueKernel( pKernel, 3, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); delete pKernel; } TEST_F(ProgramNonUniformTest, GivenCl12WhenExecutingKernelWithNonUniformThenInvalidWorkGroupSizeIsReturned) { CreateProgramFromBinary(pContext, pContext->getDevices(), "kernel_data_param"); auto mockProgram = pProgram; ASSERT_NE(nullptr, mockProgram); mockProgram->setBuildOptions("-cl-std=CL1.2"); retVal = mockProgram->build( mockProgram->getDevices(), nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); auto pKernelInfo = mockProgram->Program::getKernelInfo("test_get_local_size", rootDeviceIndex); EXPECT_NE(nullptr, pKernelInfo); // create a kernel auto pKernel = Kernel::create(mockProgram, *pKernelInfo, *pPlatform->getClDevice(0), &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, pKernel); size_t globalWorkSize[3] = {12, 12, 12}; size_t localWorkSize[3] = {11, 12, 12}; retVal = pCmdQ->enqueueKernel( pKernel, 3, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_WORK_GROUP_SIZE, retVal); delete pKernel; } compute-runtime-22.14.22890/opencl/test/unit_test/program/program_spec_constants_tests.cpp000066400000000000000000000072161422164147700320070ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/compiler_interface/compiler_interface.inl" #include "shared/test/common/mocks/mock_cif.h" #include "shared/test/common/mocks/mock_compilers.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "gtest/gtest.h" #include using namespace NEO; struct UpdateSpecConstantsTest : public ::testing::Test { void SetUp() override { mockProgram.reset(new MockProgram(toClDeviceVector(device))); mockProgram->specConstantsIds.reset(new MockCIFBuffer()); mockProgram->specConstantsSizes.reset(new MockCIFBuffer()); mockProgram->specConstantsIds->PushBackRawCopy(id1); mockProgram->specConstantsIds->PushBackRawCopy(id2); mockProgram->specConstantsIds->PushBackRawCopy(id3); uint32_t size1 = sizeof(char), size2 = sizeof(uint16_t), size3 = sizeof(int); mockProgram->specConstantsSizes->PushBackRawCopy(size1); mockProgram->specConstantsSizes->PushBackRawCopy(size2); mockProgram->specConstantsSizes->PushBackRawCopy(size3); mockProgram->specConstantsValues.insert({id1, static_cast(val1)}); mockProgram->specConstantsValues.insert({id2, static_cast(val2)}); mockProgram->specConstantsValues.insert({id3, static_cast(val3)}); values = &mockProgram->specConstantsValues; EXPECT_EQ(val1, static_cast(values->at(id1))); EXPECT_EQ(val2, static_cast(values->at(id2))); EXPECT_EQ(val3, static_cast(values->at(id3))); } MockClDevice device{new MockDevice()}; std::unique_ptr mockProgram; uint32_t id1 = 1u; uint32_t id2 = 2u; uint32_t id3 = 3u; char val1 = 5; uint16_t val2 = 50; int val3 = 500; specConstValuesMap *values; }; TEST_F(UpdateSpecConstantsTest, givenNewSpecConstValueWhenUpdateSpecializationConstantThenProperValueIsCopiedAndUpdated) { int newSpecConstVal3 = 5000; auto ret = mockProgram->updateSpecializationConstant(3, sizeof(int), &newSpecConstVal3); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_EQ(val1, static_cast(values->at(id1))); EXPECT_EQ(val2, static_cast(values->at(id2))); EXPECT_EQ(newSpecConstVal3, static_cast(values->at(id3))); newSpecConstVal3 = 50000; EXPECT_NE(newSpecConstVal3, static_cast(values->at(id3))); ret = mockProgram->updateSpecializationConstant(3, sizeof(int), &newSpecConstVal3); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_EQ(newSpecConstVal3, static_cast(values->at(id3))); } TEST_F(UpdateSpecConstantsTest, givenNewSpecConstValueWithUnproperSizeWhenUpdateSpecializationConstantThenErrorIsReturned) { int newSpecConstVal3 = 5000; auto ret = mockProgram->updateSpecializationConstant(3, 10 * sizeof(int), &newSpecConstVal3); EXPECT_EQ(CL_INVALID_VALUE, ret); EXPECT_EQ(val1, static_cast(values->at(id1))); EXPECT_EQ(val2, static_cast(values->at(id2))); EXPECT_EQ(val3, static_cast(values->at(id3))); } TEST_F(UpdateSpecConstantsTest, givenNewSpecConstValueWithUnproperIdAndSizeWhenUpdateSpecializationConstantThenErrorIsReturned) { int newSpecConstVal3 = 5000; auto ret = mockProgram->updateSpecializationConstant(4, sizeof(int), &newSpecConstVal3); EXPECT_EQ(CL_INVALID_SPEC_ID, ret); EXPECT_EQ(val1, static_cast(values->at(id1))); EXPECT_EQ(val2, static_cast(values->at(id2))); EXPECT_EQ(val3, static_cast(values->at(id3))); } compute-runtime-22.14.22890/opencl/test/unit_test/program/program_tests.cpp000066400000000000000000004247751422164147700267160ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/program/program_tests.h" #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/compiler_interface/compiler_warnings/compiler_warnings.h" #include "shared/source/compiler_interface/intermediate_representations.h" #include "shared/source/device_binary_format/elf/elf_decoder.h" #include "shared/source/device_binary_format/elf/ocl_elf.h" #include "shared/source/device_binary_format/patchtokens_decoder.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/hash.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/helpers/string.h" #include "shared/source/memory_manager/allocations_list.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/surface.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/kernel_binary_helper.h" #include "shared/test/common/libult/global_environment.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/mocks/mock_compiler_interface.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/device_binary_format/patchtokens_tests.h" #include "shared/test/unit_test/device_binary_format/zebin_tests.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/gtpin/gtpin_notify.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/program/create.inl" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "opencl/test/unit_test/program/program_from_binary.h" #include "opencl/test/unit_test/program/program_with_source.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" #include "compiler_options.h" #include "gtest/gtest.h" #include #include #include #include using namespace NEO; void ProgramTests::SetUp() { ClDeviceFixture::SetUp(); cl_device_id device = pClDevice; ContextFixture::SetUp(1, &device); } void ProgramTests::TearDown() { ContextFixture::TearDown(); ClDeviceFixture::TearDown(); } class NoCompilerInterfaceRootDeviceEnvironment : public RootDeviceEnvironment { public: NoCompilerInterfaceRootDeviceEnvironment(ExecutionEnvironment &executionEnvironment) : RootDeviceEnvironment(executionEnvironment) { *hwInfo = *defaultHwInfo; } CompilerInterface *getCompilerInterface() override { return nullptr; } bool initAilConfiguration() override { return true; } }; class FailingGenBinaryProgram : public MockProgram { public: using MockProgram::MockProgram; cl_int processGenBinary(const ClDevice &clDevice) override { return CL_INVALID_BINARY; } }; class SucceedingGenBinaryProgram : public MockProgram { public: using MockProgram::MockProgram; cl_int processGenBinary(const ClDevice &clDevice) override { return CL_SUCCESS; } }; using ProgramFromBinaryTest = ProgramFromBinaryFixture; TEST_F(ProgramFromBinaryTest, WhenBuildingProgramThenSuccessIsReturned) { retVal = pProgram->build( pProgram->getDevices(), nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(ProgramFromBinaryTest, WhenGettingProgramContextInfoThenCorrectContextIsReturned) { cl_context contextRet = reinterpret_cast(static_cast(0xdeaddead)); size_t paramValueSizeRet = 0; retVal = pProgram->getInfo( CL_PROGRAM_CONTEXT, sizeof(cl_context), &contextRet, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pContext, contextRet); EXPECT_EQ(sizeof(cl_context), paramValueSizeRet); } TEST_F(ProgramFromBinaryTest, GivenNonNullParamValueWhenGettingProgramBinaryInfoThenCorrectBinaryIsReturned) { size_t paramValueSize = sizeof(unsigned char **); size_t paramValueSizeRet = 0; auto testBinary = std::make_unique(knownSourceSize); retVal = pProgram->getInfo( CL_PROGRAM_BINARIES, paramValueSize, &testBinary, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValueSize, paramValueSizeRet); EXPECT_STREQ((const char *)knownSource.get(), (const char *)testBinary.get()); } TEST_F(ProgramFromBinaryTest, GivenNullParamValueWhenGettingProgramBinaryInfoThenSuccessIsReturned) { size_t paramValueSize = sizeof(unsigned char **); size_t paramValueSizeRet = 0; retVal = pProgram->getInfo( CL_PROGRAM_BINARIES, 0, nullptr, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValueSize, paramValueSizeRet); } TEST_F(ProgramFromBinaryTest, GivenNonNullParamValueAndParamValueSizeZeroWhenGettingProgramBinaryInfoThenInvalidValueErrorIsReturned) { size_t paramValueSizeRet = 0; auto testBinary = std::make_unique(knownSourceSize); retVal = pProgram->getInfo( CL_PROGRAM_BINARIES, 0, &testBinary, ¶mValueSizeRet); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(ProgramFromBinaryTest, GivenInvalidParametersWhenGettingProgramInfoThenValueSizeRetIsNotUpdated) { size_t paramValueSizeRet = 0x1234; auto testBinary = std::make_unique(knownSourceSize); retVal = pProgram->getInfo( CL_PROGRAM_BINARIES, 0, &testBinary, ¶mValueSizeRet); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(0x1234u, paramValueSizeRet); } TEST_F(ProgramFromBinaryTest, GivenInvalidParamWhenGettingProgramBinaryInfoThenInvalidValueErrorIsReturned) { size_t paramValueSizeRet = 0; auto testBinary = std::make_unique(knownSourceSize); retVal = pProgram->getInfo( CL_PROGRAM_BUILD_STATUS, 0, nullptr, ¶mValueSizeRet); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(ProgramFromBinaryTest, WhenGettingBinarySizesThenCorrectSizesAreReturned) { size_t paramValueSize = sizeof(size_t *); size_t paramValue[1]; size_t paramValueSizeRet = 0; retVal = pProgram->getInfo( CL_PROGRAM_BINARY_SIZES, paramValueSize, paramValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(knownSourceSize, paramValue[0]); EXPECT_EQ(paramValueSize, paramValueSizeRet); } TEST_F(ProgramFromBinaryTest, GivenProgramWithOneKernelWhenGettingNumKernelsThenOneIsReturned) { size_t paramValue = 0; size_t paramValueSize = sizeof(paramValue); size_t paramValueSizeRet = 0; retVal = pProgram->build( pProgram->getDevices(), nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); retVal = pProgram->getInfo( CL_PROGRAM_NUM_KERNELS, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, paramValue); EXPECT_EQ(paramValueSize, paramValueSizeRet); } TEST_F(ProgramFromBinaryTest, GivenProgramWithNoExecutableCodeWhenGettingNumKernelsThenInvalidProgramExecutableErrorIsReturned) { size_t paramValue = 0; size_t paramValueSize = sizeof(paramValue); size_t paramValueSizeRet = 0; CreateProgramFromBinary(pContext, pContext->getDevices(), binaryFileName); MockProgram *p = pProgram; p->setBuildStatus(CL_BUILD_NONE); retVal = pProgram->getInfo( CL_PROGRAM_NUM_KERNELS, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_INVALID_PROGRAM_EXECUTABLE, retVal); } TEST_F(ProgramFromBinaryTest, WhenGettingKernelNamesThenCorrectNameIsReturned) { size_t paramValueSize = sizeof(size_t *); size_t paramValueSizeRet = 0; retVal = pProgram->build( pProgram->getDevices(), nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); // get info successfully about required sizes for kernel names retVal = pProgram->getInfo( CL_PROGRAM_KERNEL_NAMES, 0, nullptr, ¶mValueSizeRet); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(0u, paramValueSizeRet); // get info successfully about kernel names auto paramValue = std::make_unique(paramValueSizeRet); paramValueSize = paramValueSizeRet; ASSERT_NE(paramValue, nullptr); size_t expectedKernelsStringSize = strlen(kernelName) + 1; retVal = pProgram->getInfo( CL_PROGRAM_KERNEL_NAMES, paramValueSize, paramValue.get(), ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_STREQ(kernelName, (char *)paramValue.get()); EXPECT_EQ(expectedKernelsStringSize, paramValueSizeRet); } TEST_F(ProgramFromBinaryTest, GivenProgramWithNoExecutableCodeWhenGettingKernelNamesThenInvalidProgramExecutableErrorIsReturned) { size_t paramValueSize = sizeof(size_t *); size_t paramValueSizeRet = 0; CreateProgramFromBinary(pContext, pContext->getDevices(), binaryFileName); MockProgram *p = pProgram; p->setBuildStatus(CL_BUILD_NONE); retVal = pProgram->getInfo( CL_PROGRAM_KERNEL_NAMES, paramValueSize, nullptr, ¶mValueSizeRet); EXPECT_EQ(CL_INVALID_PROGRAM_EXECUTABLE, retVal); } TEST_F(ProgramFromBinaryTest, WhenGettingProgramScopeGlobalCtorsAndDtorsPresentInfoThenCorrectValueIsReturned) { cl_uint paramRet = 0; cl_uint expectedParam = CL_FALSE; size_t paramSizeRet = 0; retVal = pProgram->getInfo( CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT, sizeof(cl_uint), ¶mRet, ¶mSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_uint), paramSizeRet); EXPECT_EQ(expectedParam, paramRet); retVal = pProgram->getInfo( CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT, sizeof(cl_uint), ¶mRet, ¶mSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_uint), paramSizeRet); EXPECT_EQ(expectedParam, paramRet); } TEST_F(ProgramFromBinaryTest, GivenNullDeviceWhenGettingBuildStatusThenBuildNoneIsReturned) { cl_device_id device = pClDevice; cl_build_status buildStatus = 0; size_t paramValueSize = sizeof(buildStatus); size_t paramValueSizeRet = 0; retVal = pProgram->getBuildInfo( device, CL_PROGRAM_BUILD_STATUS, paramValueSize, &buildStatus, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValueSize, paramValueSizeRet); EXPECT_EQ(CL_BUILD_NONE, buildStatus); } TEST_F(ProgramFromBinaryTest, GivenInvalidParametersWhenGettingBuildInfoThenValueSizeRetIsNotUpdated) { cl_device_id device = pClDevice; cl_build_status buildStatus = 0; size_t paramValueSize = sizeof(buildStatus); size_t paramValueSizeRet = 0x1234; retVal = pProgram->getBuildInfo( device, 0, paramValueSize, &buildStatus, ¶mValueSizeRet); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(0x1234u, paramValueSizeRet); } TEST_F(ProgramFromBinaryTest, GivenDefaultDeviceWhenGettingBuildOptionsThenBuildOptionsAreEmpty) { cl_device_id device = pClDevice; size_t paramValueSizeRet = 0u; size_t paramValueSize = 0u; retVal = pProgram->getBuildInfo( device, CL_PROGRAM_BUILD_OPTIONS, 0, nullptr, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(paramValueSizeRet, 0u); auto paramValue = std::make_unique(paramValueSizeRet); paramValueSize = paramValueSizeRet; retVal = pProgram->getBuildInfo( device, CL_PROGRAM_BUILD_OPTIONS, paramValueSize, paramValue.get(), ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_STREQ("", (char *)paramValue.get()); } TEST_F(ProgramFromBinaryTest, GivenDefaultDeviceWhenGettingLogThenLogEmpty) { cl_device_id device = pClDevice; size_t paramValueSizeRet = 0u; size_t paramValueSize = 0u; retVal = pProgram->getBuildInfo( device, CL_PROGRAM_BUILD_LOG, 0, nullptr, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(paramValueSizeRet, 0u); auto paramValue = std::make_unique(paramValueSizeRet); paramValueSize = paramValueSizeRet; retVal = pProgram->getBuildInfo( device, CL_PROGRAM_BUILD_LOG, paramValueSize, paramValue.get(), ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_STREQ("", (char *)paramValue.get()); } TEST_F(ProgramFromBinaryTest, GivenLogEntriesWhenGetBuildLogThenLogIsApended) { cl_device_id device = pClDevice; size_t paramValueSizeRet = 0u; size_t paramValueSize = 0u; retVal = pProgram->getBuildInfo( device, CL_PROGRAM_BUILD_LOG, 0, nullptr, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(paramValueSizeRet, 0u); auto paramValue = std::make_unique(paramValueSizeRet); paramValueSize = paramValueSizeRet; retVal = pProgram->getBuildInfo( device, CL_PROGRAM_BUILD_LOG, paramValueSize, paramValue.get(), ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_STREQ("", (char *)paramValue.get()); // Add more text to the log pProgram->updateBuildLog(pClDevice->getRootDeviceIndex(), "testing", 8); pProgram->updateBuildLog(pClDevice->getRootDeviceIndex(), "several", 8); retVal = pProgram->getBuildInfo( device, CL_PROGRAM_BUILD_LOG, 0, nullptr, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_GE(paramValueSizeRet, 16u); paramValue = std::make_unique(paramValueSizeRet); paramValueSize = paramValueSizeRet; retVal = pProgram->getBuildInfo( device, CL_PROGRAM_BUILD_LOG, paramValueSize, paramValue.get(), ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, strstr(paramValue.get(), "testing")); const char *paramValueContinued = strstr(paramValue.get(), "testing") + 7; ASSERT_NE(nullptr, strstr(paramValueContinued, "several")); } TEST_F(ProgramFromBinaryTest, GivenNullParamValueWhenGettingProgramBinaryTypeThenParamValueSizeIsReturned) { cl_device_id device = pClDevice; size_t paramValueSizeRet = 0u; size_t paramValueSize = 0u; retVal = pProgram->getBuildInfo( device, CL_PROGRAM_BINARY_TYPE, paramValueSize, nullptr, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(paramValueSizeRet, 0u); } TEST_F(ProgramFromBinaryTest, WhenGettingProgramBinaryTypeThenCorrectProgramTypeIsReturned) { cl_device_id device = pClDevice; cl_program_binary_type programType = 0; char *paramValue = (char *)&programType; size_t paramValueSizeRet = 0u; size_t paramValueSize = 0u; retVal = pProgram->getBuildInfo( device, CL_PROGRAM_BINARY_TYPE, paramValueSize, nullptr, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(paramValueSizeRet, 0u); paramValueSize = paramValueSizeRet; retVal = pProgram->getBuildInfo( device, CL_PROGRAM_BINARY_TYPE, paramValueSize, paramValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ((cl_program_binary_type)CL_PROGRAM_BINARY_TYPE_EXECUTABLE, programType); } TEST_F(ProgramFromBinaryTest, GivenInvalidParamWhenGettingBuildInfoThenInvalidValueErrorIsReturned) { cl_device_id device = pClDevice; size_t paramValueSizeRet = 0u; retVal = pProgram->getBuildInfo( device, CL_PROGRAM_KERNEL_NAMES, 0, nullptr, ¶mValueSizeRet); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(ProgramFromBinaryTest, GivenGlobalVariableTotalSizeSetWhenGettingBuildGlobalVariableTotalSizeThenCorrectSizeIsReturned) { cl_device_id device = pClDevice; size_t globalVarSize = 22; size_t paramValueSize = sizeof(globalVarSize); size_t paramValueSizeRet = 0; char *paramValue = (char *)&globalVarSize; // get build info as is retVal = pProgram->getBuildInfo( device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, paramValueSize, paramValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValueSizeRet, sizeof(globalVarSize)); EXPECT_EQ(globalVarSize, 0u); // Set GlobalVariableTotalSize as 1024 CreateProgramFromBinary(pContext, pContext->getDevices(), binaryFileName); MockProgram *p = pProgram; ProgramInfo programInfo; char constantData[1024] = {}; programInfo.globalVariables.initData = constantData; programInfo.globalVariables.size = sizeof(constantData); p->processProgramInfo(programInfo, *pClDevice); // get build info once again retVal = pProgram->getBuildInfo( device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, paramValueSize, paramValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValueSizeRet, sizeof(globalVarSize)); if (castToObject(pClDevice)->areOcl21FeaturesEnabled()) { EXPECT_EQ(globalVarSize, 1024u); } else { EXPECT_EQ(globalVarSize, 0u); } } TEST_F(ProgramFromBinaryTest, givenProgramWhenItIsBeingBuildThenItContainsGraphicsAllocationInKernelInfo) { pProgram->build(pProgram->getDevices(), nullptr, true); auto kernelInfo = pProgram->getKernelInfo(size_t(0), rootDeviceIndex); auto graphicsAllocation = kernelInfo->getGraphicsAllocation(); ASSERT_NE(nullptr, graphicsAllocation); EXPECT_TRUE(graphicsAllocation->is32BitAllocation()); auto &hwHelper = NEO::HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily); size_t isaPadding = hwHelper.getPaddingForISAAllocation(); EXPECT_EQ(graphicsAllocation->getUnderlyingBufferSize(), kernelInfo->heapInfo.KernelHeapSize + isaPadding); auto kernelIsa = graphicsAllocation->getUnderlyingBuffer(); EXPECT_NE(kernelInfo->heapInfo.pKernelHeap, kernelIsa); EXPECT_EQ(0, memcmp(kernelIsa, kernelInfo->heapInfo.pKernelHeap, kernelInfo->heapInfo.KernelHeapSize)); auto rootDeviceIndex = graphicsAllocation->getRootDeviceIndex(); EXPECT_EQ(GmmHelper::decanonize(graphicsAllocation->getGpuBaseAddress()), pDevice->getMemoryManager()->getInternalHeapBaseAddress(rootDeviceIndex, graphicsAllocation->isAllocatedInLocalMemoryPool())); } TEST_F(ProgramFromBinaryTest, whenProgramIsBeingRebuildThenOutdatedGlobalBuffersAreFreed) { pProgram->build(pProgram->getDevices(), nullptr, true); EXPECT_EQ(nullptr, pProgram->buildInfos[pClDevice->getRootDeviceIndex()].constantSurface); EXPECT_EQ(nullptr, pProgram->buildInfos[pClDevice->getRootDeviceIndex()].globalSurface); pProgram->buildInfos[pClDevice->getRootDeviceIndex()].constantSurface = new MockGraphicsAllocation(); pProgram->processGenBinary(*pClDevice); EXPECT_EQ(nullptr, pProgram->buildInfos[pClDevice->getRootDeviceIndex()].constantSurface); EXPECT_EQ(nullptr, pProgram->buildInfos[pClDevice->getRootDeviceIndex()].globalSurface); pProgram->buildInfos[pClDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation(); pProgram->processGenBinary(*pClDevice); EXPECT_EQ(nullptr, pProgram->buildInfos[pClDevice->getRootDeviceIndex()].constantSurface); EXPECT_EQ(nullptr, pProgram->buildInfos[pClDevice->getRootDeviceIndex()].globalSurface); } TEST_F(ProgramFromBinaryTest, givenProgramWhenCleanKernelInfoIsCalledThenKernelAllocationIsFreed) { pProgram->build(pProgram->getDevices(), nullptr, true); EXPECT_EQ(1u, pProgram->getNumKernels()); for (auto i = 0u; i < pProgram->buildInfos.size(); i++) { pProgram->cleanCurrentKernelInfo(i); } EXPECT_EQ(0u, pProgram->getNumKernels()); } TEST_F(ProgramFromBinaryTest, givenReuseKernelBinariesWhenCleanCurrentKernelInfoThenDecreaseAllocationReuseCounter) { DebugManagerStateRestore restorer; DebugManager.flags.ReuseKernelBinaries.set(1); pProgram->build(pProgram->getDevices(), nullptr, true); auto &kernelAllocMap = pProgram->peekExecutionEnvironment().memoryManager->getKernelAllocationMap(); auto kernelName = pProgram->buildInfos[0].kernelInfoArray[0]->kernelDescriptor.kernelMetadata.kernelName; auto kernelAllocations = kernelAllocMap.find(kernelName); kernelAllocations->second.reuseCounter = 2u; EXPECT_EQ(1u, pProgram->getNumKernels()); for (auto i = 0u; i < pProgram->buildInfos.size(); i++) { pProgram->cleanCurrentKernelInfo(i); } EXPECT_EQ(0u, pProgram->getNumKernels()); EXPECT_EQ(1u, kernelAllocations->second.reuseCounter); pProgram->peekExecutionEnvironment().memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(kernelAllocations->second.kernelAllocation); } TEST_F(ProgramFromBinaryTest, givenReuseKernelBinariesWhenCleanCurrentKernelInfoAndCounterEqualsZeroThenFreeAllocation) { DebugManagerStateRestore restorer; DebugManager.flags.ReuseKernelBinaries.set(1); pProgram->build(pProgram->getDevices(), nullptr, true); auto &kernelAllocMap = pProgram->peekExecutionEnvironment().memoryManager->getKernelAllocationMap(); EXPECT_EQ(1u, pProgram->getNumKernels()); for (auto i = 0u; i < pProgram->buildInfos.size(); i++) { pProgram->cleanCurrentKernelInfo(i); } EXPECT_EQ(0u, pProgram->getNumKernels()); EXPECT_EQ(0u, kernelAllocMap.size()); } HWTEST_F(ProgramFromBinaryTest, givenProgramWhenCleanCurrentKernelInfoIsCalledButGpuIsNotYetDoneThenKernelAllocationIsPutOnDeferredFreeListAndCsrRegistersCacheFlush) { auto &csr = pDevice->getGpgpuCommandStreamReceiver(); EXPECT_TRUE(csr.getTemporaryAllocations().peekIsEmpty()); pProgram->build(pProgram->getDevices(), nullptr, true); auto kernelAllocation = pProgram->getKernelInfo(static_cast(0u), rootDeviceIndex)->getGraphicsAllocation(); kernelAllocation->updateTaskCount(100, csr.getOsContext().getContextId()); *csr.getTagAddress() = 0; pProgram->cleanCurrentKernelInfo(rootDeviceIndex); EXPECT_FALSE(csr.getTemporaryAllocations().peekIsEmpty()); EXPECT_EQ(csr.getTemporaryAllocations().peekHead(), kernelAllocation); EXPECT_TRUE(this->pDevice->getUltCommandStreamReceiver().requiresInstructionCacheFlush); } HWTEST_F(ProgramFromBinaryTest, givenIsaAllocationUsedByMultipleCsrsWhenItIsDeletedThenItRegistersCacheFlushInEveryCsrThatUsedIt) { auto &csr0 = this->pDevice->getUltCommandStreamReceiverFromIndex(0u); auto &csr1 = this->pDevice->getUltCommandStreamReceiverFromIndex(1u); pProgram->build(pProgram->getDevices(), nullptr, true); auto kernelAllocation = pProgram->getKernelInfo(static_cast(0u), rootDeviceIndex)->getGraphicsAllocation(); csr0.makeResident(*kernelAllocation); csr1.makeResident(*kernelAllocation); csr0.processResidency(csr0.getResidencyAllocations(), 0u); csr1.processResidency(csr1.getResidencyAllocations(), 0u); csr0.makeNonResident(*kernelAllocation); csr1.makeNonResident(*kernelAllocation); EXPECT_FALSE(csr0.requiresInstructionCacheFlush); EXPECT_FALSE(csr1.requiresInstructionCacheFlush); pProgram->cleanCurrentKernelInfo(rootDeviceIndex); EXPECT_TRUE(csr0.requiresInstructionCacheFlush); EXPECT_TRUE(csr1.requiresInstructionCacheFlush); } TEST_F(ProgramFromSourceTest, GivenSpecificParamatersWhenBuildingProgramThenSuccessOrCorrectErrorCodeIsReturned) { KernelBinaryHelper kbHelper(binaryFileName, true); auto device = pPlatform->getClDevice(0); CreateProgramWithSource( pContext, sourceFileName); // Order of following microtests is important - do not change. // Add new microtests at end. auto pMockProgram = pProgram; // fail build - another build is already in progress pMockProgram->setBuildStatus(CL_BUILD_IN_PROGRESS); retVal = pProgram->build(pProgram->getDevices(), nullptr, false); EXPECT_EQ(CL_INVALID_OPERATION, retVal); pMockProgram->setBuildStatus(CL_BUILD_NONE); // fail build - CompilerInterface cannot be obtained auto executionEnvironment = device->getExecutionEnvironment(); std::unique_ptr rootDeviceEnvironment = std::make_unique(*executionEnvironment); std::swap(rootDeviceEnvironment, executionEnvironment->rootDeviceEnvironments[device->getRootDeviceIndex()]); auto p2 = std::make_unique(toClDeviceVector(*device)); retVal = p2->build(p2->getDevices(), nullptr, false); EXPECT_EQ(CL_OUT_OF_HOST_MEMORY, retVal); p2.reset(nullptr); std::swap(rootDeviceEnvironment, executionEnvironment->rootDeviceEnvironments[device->getRootDeviceIndex()]); // fail build - any build error (here caused by specifying unrecognized option) retVal = pProgram->build(pProgram->getDevices(), "-invalid-option", false); EXPECT_EQ(CL_BUILD_PROGRAM_FAILURE, retVal); // fail build - linked code is corrupted and cannot be postprocessed auto p3 = std::make_unique(toClDeviceVector(*device)); std::string testFile; size_t sourceSize; testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); // source file auto pSourceBuffer = loadDataFromFile(testFile.c_str(), sourceSize); EXPECT_NE(0u, sourceSize); EXPECT_NE(nullptr, pSourceBuffer); p3->sourceCode = pSourceBuffer.get(); p3->createdFrom = Program::CreatedFrom::SOURCE; retVal = p3->build(p3->getDevices(), nullptr, false); EXPECT_EQ(CL_INVALID_BINARY, retVal); p3.reset(nullptr); // build successfully - build kernel and write it to Kernel Cache pMockProgram->clearOptions(); std::string receivedInternalOptions; auto debugVars = NEO::getFclDebugVars(); debugVars.receivedInternalOptionsOutput = &receivedInternalOptions; gEnvironment->fclPushDebugVars(debugVars); retVal = pProgram->build(pProgram->getDevices(), nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(CompilerOptions::contains(receivedInternalOptions, pPlatform->getClDevice(0)->peekCompilerExtensions())) << receivedInternalOptions; gEnvironment->fclPopDebugVars(); // get build log size_t param_value_size_ret = 0u; retVal = pProgram->getBuildInfo( device, CL_PROGRAM_BUILD_LOG, 0, nullptr, ¶m_value_size_ret); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(param_value_size_ret, 0u); // get build log when the log does not exist pMockProgram->clearLog(device->getRootDeviceIndex()); retVal = pProgram->getBuildInfo( device, CL_PROGRAM_BUILD_LOG, 0, nullptr, ¶m_value_size_ret); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(param_value_size_ret, 0u); // build successfully - build kernel but do not write it to Kernel Cache (kernel is already in the Cache) pMockProgram->setBuildStatus(CL_BUILD_NONE); retVal = pProgram->build(pProgram->getDevices(), nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); // build successfully - kernel is already in Kernel Cache, do not build and take it from Cache retVal = pProgram->build(pProgram->getDevices(), nullptr, true); EXPECT_EQ(CL_SUCCESS, retVal); // fail build - code to be build does not exist pMockProgram->sourceCode = ""; // set source code as non-existent (invalid) pMockProgram->createdFrom = Program::CreatedFrom::SOURCE; pMockProgram->setBuildStatus(CL_BUILD_NONE); pMockProgram->setCreatedFromBinary(false); retVal = pProgram->build(pProgram->getDevices(), nullptr, false); EXPECT_EQ(CL_INVALID_PROGRAM, retVal); } TEST_F(ProgramFromSourceTest, GivenDuplicateOptionsWhenCreatingWithSourceThenBuildSucceeds) { KernelBinaryHelper kbHelper(binaryFileName, false); retVal = pProgram->build(pProgram->getDevices(), nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pProgram->build(pProgram->getDevices(), CompilerOptions::fastRelaxedMath.data(), false); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pProgram->build(pProgram->getDevices(), CompilerOptions::fastRelaxedMath.data(), false); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pProgram->build(pProgram->getDevices(), CompilerOptions::finiteMathOnly.data(), false); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pProgram->build(pProgram->getDevices(), nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(ProgramFromSourceTest, WhenBuildingProgramThenFeaturesAndExtraExtensionsAreNotAdded) { auto cip = new MockCompilerInterfaceCaptureBuildOptions(); auto pClDevice = pContext->getDevice(0); pClDevice->getExecutionEnvironment()->rootDeviceEnvironments[pClDevice->getRootDeviceIndex()]->compilerInterface.reset(cip); auto extensionsOption = static_cast(devices[0])->peekCompilerExtensions(); auto extensionsWithFeaturesOption = static_cast(devices[0])->peekCompilerExtensionsWithFeatures(); EXPECT_FALSE(hasSubstr(cip->buildInternalOptions, extensionsOption)); EXPECT_FALSE(hasSubstr(cip->buildInternalOptions, extensionsWithFeaturesOption)); EXPECT_FALSE(hasSubstr(cip->buildInternalOptions, std::string{"+cl_khr_3d_image_writes "})); retVal = pProgram->build(pProgram->getDevices(), nullptr, false); EXPECT_TRUE(hasSubstr(cip->buildInternalOptions, extensionsOption)); EXPECT_FALSE(hasSubstr(cip->buildInternalOptions, extensionsWithFeaturesOption)); EXPECT_FALSE(hasSubstr(cip->buildInternalOptions, std::string{"+cl_khr_3d_image_writes "})); } TEST_F(ProgramFromSourceTest, WhenBuildingProgramWithOpenClC20ThenExtraExtensionsAreAdded) { auto cip = new MockCompilerInterfaceCaptureBuildOptions(); auto pClDevice = pContext->getDevice(0); pClDevice->getExecutionEnvironment()->rootDeviceEnvironments[pClDevice->getRootDeviceIndex()]->compilerInterface.reset(cip); auto pProgram = std::make_unique(toClDeviceVector(*pClDevice)); pProgram->sourceCode = "__kernel mock() {}"; pProgram->createdFrom = Program::CreatedFrom::SOURCE; MockProgram::getInternalOptionsCalled = 0; auto extensionsOption = static_cast(devices[0])->peekCompilerExtensions(); auto extensionsWithFeaturesOption = static_cast(devices[0])->peekCompilerExtensionsWithFeatures(); EXPECT_FALSE(hasSubstr(cip->buildInternalOptions, std::string{"+cl_khr_3d_image_writes "})); retVal = pProgram->build(pProgram->getDevices(), "-cl-std=CL2.0", false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(hasSubstr(cip->buildInternalOptions, std::string{"+cl_khr_3d_image_writes "})); EXPECT_EQ(1, MockProgram::getInternalOptionsCalled); } TEST_F(ProgramFromSourceTest, WhenBuildingProgramWithOpenClC30ThenFeaturesAreAdded) { auto cip = new MockCompilerInterfaceCaptureBuildOptions(); auto pClDevice = pContext->getDevice(0); pClDevice->getExecutionEnvironment()->rootDeviceEnvironments[pClDevice->getRootDeviceIndex()]->compilerInterface.reset(cip); auto pProgram = std::make_unique(toClDeviceVector(*pClDevice)); pProgram->sourceCode = "__kernel mock() {}"; pProgram->createdFrom = Program::CreatedFrom::SOURCE; MockProgram::getInternalOptionsCalled = 0; auto extensionsOption = static_cast(devices[0])->peekCompilerExtensions(); auto extensionsWithFeaturesOption = static_cast(devices[0])->peekCompilerExtensionsWithFeatures(); EXPECT_FALSE(hasSubstr(cip->buildInternalOptions, extensionsOption)); EXPECT_FALSE(hasSubstr(cip->buildInternalOptions, extensionsWithFeaturesOption)); retVal = pProgram->build(pProgram->getDevices(), "-cl-std=CL3.0", false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(hasSubstr(cip->buildInternalOptions, extensionsOption)); EXPECT_TRUE(hasSubstr(cip->buildInternalOptions, extensionsWithFeaturesOption)); EXPECT_EQ(1, MockProgram::getInternalOptionsCalled); } TEST_F(ProgramFromSourceTest, WhenBuildingProgramWithOpenClC30ThenFeaturesAreAddedOnlyOnce) { auto cip = new MockCompilerInterfaceCaptureBuildOptions(); auto pClDevice = pContext->getDevice(0); pClDevice->getExecutionEnvironment()->rootDeviceEnvironments[pClDevice->getRootDeviceIndex()]->compilerInterface.reset(cip); auto pProgram = std::make_unique(toClDeviceVector(*pClDevice)); pProgram->sourceCode = "__kernel mock() {}"; pProgram->createdFrom = Program::CreatedFrom::SOURCE; retVal = pProgram->build(pProgram->getDevices(), "-cl-std=CL3.0", false); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pProgram->build(pProgram->getDevices(), "-cl-std=CL3.0", false); EXPECT_EQ(CL_SUCCESS, retVal); auto extensionsWithFeaturesOption = pClDevice->peekCompilerExtensionsWithFeatures(); auto &internalOptions = cip->buildInternalOptions; auto pos = internalOptions.find(extensionsWithFeaturesOption); EXPECT_NE(std::string::npos, pos); pos = internalOptions.find(extensionsWithFeaturesOption, pos + 1); EXPECT_EQ(std::string::npos, pos); } TEST_F(ProgramFromSourceTest, WhenCompilingProgramThenFeaturesAndExtraExtensionsAreNotAdded) { auto pCompilerInterface = new MockCompilerInterfaceCaptureBuildOptions(); auto pClDevice = static_cast(devices[0]); pClDevice->getExecutionEnvironment()->rootDeviceEnvironments[pClDevice->getRootDeviceIndex()]->compilerInterface.reset(pCompilerInterface); auto extensionsOption = pClDevice->peekCompilerExtensions(); auto extensionsWithFeaturesOption = pClDevice->peekCompilerExtensionsWithFeatures(); EXPECT_FALSE(hasSubstr(pCompilerInterface->buildInternalOptions, extensionsOption)); EXPECT_FALSE(hasSubstr(pCompilerInterface->buildInternalOptions, extensionsWithFeaturesOption)); EXPECT_FALSE(hasSubstr(pCompilerInterface->buildInternalOptions, std::string{"+cl_khr_3d_image_writes "})); MockProgram::getInternalOptionsCalled = 0; retVal = pProgram->compile(pProgram->getDevices(), nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(hasSubstr(pCompilerInterface->buildInternalOptions, extensionsOption)); EXPECT_FALSE(hasSubstr(pCompilerInterface->buildInternalOptions, extensionsWithFeaturesOption)); EXPECT_FALSE(hasSubstr(pCompilerInterface->buildInternalOptions, std::string{"+cl_khr_3d_image_writes "})); EXPECT_EQ(1, MockProgram::getInternalOptionsCalled); } TEST_F(ProgramFromSourceTest, WhenCompilingProgramWithOpenClC20ThenExtraExtensionsAreAdded) { auto pCompilerInterface = new MockCompilerInterfaceCaptureBuildOptions(); auto pClDevice = static_cast(devices[0]); pClDevice->getExecutionEnvironment()->rootDeviceEnvironments[pClDevice->getRootDeviceIndex()]->compilerInterface.reset(pCompilerInterface); auto extensionsOption = pClDevice->peekCompilerExtensions(); auto extensionsWithFeaturesOption = pClDevice->peekCompilerExtensionsWithFeatures(); EXPECT_FALSE(hasSubstr(pCompilerInterface->buildInternalOptions, std::string{"+cl_khr_3d_image_writes "})); MockProgram::getInternalOptionsCalled = 0; retVal = pProgram->compile(pProgram->getDevices(), "-cl-std=CL2.0", 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(hasSubstr(pCompilerInterface->buildInternalOptions, std::string{"+cl_khr_3d_image_writes "})); EXPECT_EQ(1, MockProgram::getInternalOptionsCalled); } TEST_F(ProgramFromSourceTest, WhenCompilingProgramWithOpenClC30ThenFeaturesAreAdded) { auto pCompilerInterface = new MockCompilerInterfaceCaptureBuildOptions(); auto pClDevice = pContext->getDevice(0); pClDevice->getExecutionEnvironment()->rootDeviceEnvironments[pClDevice->getRootDeviceIndex()]->compilerInterface.reset(pCompilerInterface); auto pProgram = std::make_unique(toClDeviceVector(*pClDevice)); pProgram->sourceCode = "__kernel mock() {}"; pProgram->createdFrom = Program::CreatedFrom::SOURCE; auto extensionsOption = pClDevice->peekCompilerExtensions(); auto extensionsWithFeaturesOption = pClDevice->peekCompilerExtensionsWithFeatures(); EXPECT_FALSE(hasSubstr(pCompilerInterface->buildInternalOptions, extensionsOption)); EXPECT_FALSE(hasSubstr(pCompilerInterface->buildInternalOptions, extensionsWithFeaturesOption)); retVal = pProgram->compile(pProgram->getDevices(), "-cl-std=CL3.0", 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(hasSubstr(pCompilerInterface->buildInternalOptions, extensionsOption)); EXPECT_TRUE(hasSubstr(pCompilerInterface->buildInternalOptions, extensionsWithFeaturesOption)); } class Callback { public: Callback() { this->oldCallback = MemoryManagement::deleteCallback; MemoryManagement::deleteCallback = thisCallback; } ~Callback() { MemoryManagement::deleteCallback = this->oldCallback; } static void watch(const void *p) { watchList[p] = 0u; } static void unwatch(const void *p) { EXPECT_GT(watchList[p], 0u); watchList.erase(p); } private: void (*oldCallback)(void *); static void thisCallback(void *p) { if (watchList.find(p) != watchList.end()) watchList[p]++; } static std::map watchList; }; std::map Callback::watchList; TEST_F(ProgramFromSourceTest, GivenDifferentCommpilerOptionsWhenBuildingProgramThenKernelHashesAreDifferent) { KernelBinaryHelper kbHelper(binaryFileName, true); auto rootDeviceIndex = pContext->getDevice(0)->getRootDeviceIndex(); CreateProgramWithSource( pContext, sourceFileName); Callback callback; retVal = pProgram->build(pProgram->getDevices(), nullptr, true); EXPECT_EQ(CL_SUCCESS, retVal); auto hash1 = pProgram->getCachedFileName(); auto kernel1 = pProgram->getKernelInfo("CopyBuffer", rootDeviceIndex); Callback::watch(kernel1); EXPECT_NE(nullptr, kernel1); retVal = pProgram->build(pProgram->getDevices(), CompilerOptions::fastRelaxedMath.data(), true); EXPECT_EQ(CL_SUCCESS, retVal); auto hash2 = pProgram->getCachedFileName(); auto kernel2 = pProgram->getKernelInfo("CopyBuffer", rootDeviceIndex); EXPECT_NE(nullptr, kernel2); EXPECT_NE(hash1, hash2); Callback::unwatch(kernel1); Callback::watch(kernel2); retVal = pProgram->build(pProgram->getDevices(), CompilerOptions::finiteMathOnly.data(), true); EXPECT_EQ(CL_SUCCESS, retVal); auto hash3 = pProgram->getCachedFileName(); auto kernel3 = pProgram->getKernelInfo("CopyBuffer", rootDeviceIndex); EXPECT_NE(nullptr, kernel3); EXPECT_NE(hash1, hash3); EXPECT_NE(hash2, hash3); Callback::unwatch(kernel2); Callback::watch(kernel3); pProgram->createdFrom = NEO::Program::CreatedFrom::BINARY; pProgram->setIrBinary(new char[16], true); pProgram->setIrBinarySize(16, true); retVal = pProgram->build(pProgram->getDevices(), nullptr, true); EXPECT_EQ(CL_SUCCESS, retVal); auto hash4 = pProgram->getCachedFileName(); auto kernel4 = pProgram->getKernelInfo("CopyBuffer", rootDeviceIndex); EXPECT_NE(nullptr, kernel4); EXPECT_EQ(hash3, hash4); Callback::unwatch(kernel3); Callback::watch(kernel4); pProgram->createdFrom = NEO::Program::CreatedFrom::SOURCE; retVal = pProgram->build(pProgram->getDevices(), nullptr, true); EXPECT_EQ(CL_SUCCESS, retVal); auto hash5 = pProgram->getCachedFileName(); auto kernel5 = pProgram->getKernelInfo("CopyBuffer", rootDeviceIndex); EXPECT_NE(nullptr, kernel5); EXPECT_EQ(hash1, hash5); Callback::unwatch(kernel4); } TEST_F(ProgramFromSourceTest, GivenEmptyProgramWhenCreatingProgramThenInvalidValueErrorIsReturned) { auto p = Program::create(pContext, 0, nullptr, nullptr, retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(nullptr, p); delete p; } TEST_F(ProgramFromSourceTest, GivenSpecificParamatersWhenCompilingProgramThenSuccessOrCorrectErrorCodeIsReturned) { CreateProgramWithSource( pContext, sourceFileName); cl_program inputHeaders; const char *headerIncludeNames = ""; cl_program nullprogram = nullptr; cl_program invprogram = (cl_program)pContext; // Order of following microtests is important - do not change. // Add new microtests at end. // invalid compile parameters: combinations of numInputHeaders==0 & inputHeaders & headerIncludeNames retVal = pProgram->compile(pProgram->getDevices(), nullptr, 0, &inputHeaders, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = pProgram->compile(pProgram->getDevices(), nullptr, 0, nullptr, &headerIncludeNames); EXPECT_EQ(CL_INVALID_VALUE, retVal); // invalid compile parameters: combinations of numInputHeaders!=0 & inputHeaders & headerIncludeNames retVal = pProgram->compile(pProgram->getDevices(), nullptr, 1, &inputHeaders, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = pProgram->compile(pProgram->getDevices(), nullptr, 1, nullptr, &headerIncludeNames); EXPECT_EQ(CL_INVALID_VALUE, retVal); // fail compilation - another compilation is already in progress pProgram->setBuildStatus(CL_BUILD_IN_PROGRESS); retVal = pProgram->compile(pProgram->getDevices(), nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_OPERATION, retVal); pProgram->setBuildStatus(CL_BUILD_NONE); // invalid compile parameters: invalid header Program object==nullptr retVal = pProgram->compile(pProgram->getDevices(), nullptr, 1, &nullprogram, &headerIncludeNames); EXPECT_EQ(CL_INVALID_PROGRAM, retVal); // invalid compile parameters: invalid header Program object==non Program object retVal = pProgram->compile(pProgram->getDevices(), nullptr, 1, &invprogram, &headerIncludeNames); EXPECT_EQ(CL_INVALID_PROGRAM, retVal); // compile successfully kernel with header std::string testFile; size_t sourceSize; MockProgram *p3; // header Program object testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); // header source file auto pSourceBuffer = loadDataFromFile(testFile.c_str(), sourceSize); EXPECT_NE(0u, sourceSize); EXPECT_NE(nullptr, pSourceBuffer); const char *sources[1] = {pSourceBuffer.get()}; p3 = Program::create(pContext, 1, sources, &sourceSize, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, p3); inputHeaders = p3; retVal = pProgram->compile(pProgram->getDevices(), nullptr, 1, &inputHeaders, &headerIncludeNames); EXPECT_EQ(CL_SUCCESS, retVal); // fail compilation of kernel with header - header is invalid p3->sourceCode = ""; // set header source code as non-existent (invalid) retVal = p3->compile(p3->getDevices(), nullptr, 1, &inputHeaders, &headerIncludeNames); EXPECT_EQ(CL_INVALID_PROGRAM, retVal); delete p3; // fail compilation - CompilerInterface cannot be obtained auto device = pContext->getDevice(0); auto executionEnvironment = device->getExecutionEnvironment(); std::unique_ptr rootDeviceEnvironment = std::make_unique(*executionEnvironment); std::swap(rootDeviceEnvironment, executionEnvironment->rootDeviceEnvironments[device->getRootDeviceIndex()]); auto p2 = std::make_unique(toClDeviceVector(*device)); retVal = p2->compile(p2->getDevices(), nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_OUT_OF_HOST_MEMORY, retVal); p2.reset(nullptr); std::swap(rootDeviceEnvironment, executionEnvironment->rootDeviceEnvironments[device->getRootDeviceIndex()]); // fail compilation - any compilation error (here caused by specifying unrecognized option) retVal = pProgram->compile(pProgram->getDevices(), "-invalid-option", 0, nullptr, nullptr); EXPECT_EQ(CL_COMPILE_PROGRAM_FAILURE, retVal); // compile successfully retVal = pProgram->compile(pProgram->getDevices(), nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(ProgramFromSourceTest, GivenFlagsWhenCompilingProgramThenBuildOptionsHaveBeenApplied) { auto cip = new MockCompilerInterfaceCaptureBuildOptions(); auto pDevice = pContext->getDevice(0); pDevice->getExecutionEnvironment()->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->compilerInterface.reset(cip); auto program = std::make_unique(toClDeviceVector(*pDevice)); program->sourceCode = "__kernel mock() {}"; // Ask to build created program without NEO::CompilerOptions::gtpinRera and NEO::CompilerOptions::greaterThan4gbBuffersRequired flags. cl_int retVal = program->compile(pProgram->getDevices(), CompilerOptions::fastRelaxedMath.data(), 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); // Check build options that were applied EXPECT_TRUE(CompilerOptions::contains(cip->buildOptions, CompilerOptions::fastRelaxedMath)) << cip->buildOptions; EXPECT_FALSE(CompilerOptions::contains(cip->buildInternalOptions, CompilerOptions::gtpinRera)) << cip->buildInternalOptions; if (!pDevice->areSharedSystemAllocationsAllowed()) { EXPECT_FALSE(CompilerOptions::contains(cip->buildInternalOptions, CompilerOptions::greaterThan4gbBuffersRequired)) << cip->buildInternalOptions; } EXPECT_TRUE(CompilerOptions::contains(cip->buildInternalOptions, pPlatform->getClDevice(0)->peekCompilerExtensions())) << cip->buildInternalOptions; // Ask to build created program with NEO::CompilerOptions::gtpinRera and NEO::CompilerOptions::greaterThan4gbBuffersRequired flags. cip->buildOptions.clear(); cip->buildInternalOptions.clear(); auto options = CompilerOptions::concatenate(CompilerOptions::greaterThan4gbBuffersRequired, CompilerOptions::gtpinRera, CompilerOptions::finiteMathOnly); retVal = program->compile(pProgram->getDevices(), options.c_str(), 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); // Check build options that were applied EXPECT_FALSE(CompilerOptions::contains(cip->buildOptions, CompilerOptions::fastRelaxedMath)) << cip->buildOptions; EXPECT_TRUE(CompilerOptions::contains(cip->buildOptions, CompilerOptions::finiteMathOnly)) << cip->buildOptions; EXPECT_TRUE(CompilerOptions::contains(cip->buildInternalOptions, CompilerOptions::gtpinRera)) << cip->buildInternalOptions; EXPECT_TRUE(CompilerOptions::contains(cip->buildInternalOptions, CompilerOptions::greaterThan4gbBuffersRequired)) << cip->buildInternalOptions; EXPECT_TRUE(CompilerOptions::contains(cip->buildInternalOptions, pPlatform->getClDevice(0)->peekCompilerExtensions())) << cip->buildInternalOptions; } TEST_F(ProgramTests, GivenFlagsWhenLinkingProgramThenBuildOptionsHaveBeenApplied) { auto cip = new MockCompilerInterfaceCaptureBuildOptions(); auto pProgram = std::make_unique(toClDeviceVector(*pClDevice)); pProgram->sourceCode = "__kernel mock() {}"; pProgram->createdFrom = Program::CreatedFrom::SOURCE; MockProgram::getInternalOptionsCalled = 0; cl_program program = pProgram.get(); // compile successfully a kernel to be linked later cl_int retVal = pProgram->compile(pProgram->getDevices(), nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1, MockProgram::getInternalOptionsCalled); // Ask to link created program with NEO::CompilerOptions::gtpinRera and NEO::CompilerOptions::greaterThan4gbBuffersRequired flags. auto options = CompilerOptions::concatenate(CompilerOptions::greaterThan4gbBuffersRequired, CompilerOptions::gtpinRera, CompilerOptions::finiteMathOnly); pDevice->getExecutionEnvironment()->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->compilerInterface.reset(cip); retVal = pProgram->link(pProgram->getDevices(), options.c_str(), 1, &program); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2, MockProgram::getInternalOptionsCalled); // Check build options that were applied EXPECT_FALSE(CompilerOptions::contains(cip->buildOptions, CompilerOptions::fastRelaxedMath)) << cip->buildOptions; EXPECT_TRUE(CompilerOptions::contains(cip->buildOptions, CompilerOptions::finiteMathOnly)) << cip->buildOptions; EXPECT_TRUE(CompilerOptions::contains(cip->buildInternalOptions, CompilerOptions::gtpinRera)) << cip->buildInternalOptions; EXPECT_TRUE(CompilerOptions::contains(cip->buildInternalOptions, CompilerOptions::greaterThan4gbBuffersRequired)) << cip->buildInternalOptions; } TEST_F(ProgramFromSourceTest, GivenAdvancedOptionsWhenCreatingProgramThenSuccessIsReturned) { std::string testFile; size_t sourceSize = 0; Program *p; testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); auto pSourceBuffer = loadDataFromFile(testFile.c_str(), sourceSize); const char *sources[1] = {pSourceBuffer.get()}; EXPECT_NE(nullptr, pSourceBuffer); // According to spec: If lengths is NULL, all strings in the strings argument are considered null-terminated. p = Program::create(pContext, 1, sources, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, p); delete p; // According to spec: If an element in lengths is zero, its accompanying string is null-terminated. p = Program::create(pContext, 1, sources, &sourceSize, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, p); delete p; std::stringstream dataStream(pSourceBuffer.get()); std::string line; std::vector lines; while (std::getline(dataStream, line, '\n')) { char *ptr = new char[line.length() + 1](); strcpy_s(ptr, line.length() + 1, line.c_str()); lines.push_back(ptr); } // Work on array of strings p = Program::create(pContext, 1, &lines[0], nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, p); delete p; std::vector sizes; for (auto ptr : lines) sizes.push_back(strlen(ptr)); sizes[sizes.size() / 2] = 0; p = Program::create(pContext, (cl_uint)sizes.size(), &lines[0], &sizes[0], retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, p); delete p; for (auto ptr : lines) delete[] ptr; } TEST_F(ProgramFromSourceTest, GivenSpecificParamatersWhenLinkingProgramThenSuccessOrCorrectErrorCodeIsReturned) { CreateProgramWithSource( pContext, sourceFileName); cl_program program = pProgram; cl_program nullprogram = nullptr; cl_program invprogram = (cl_program)pContext; // Order of following microtests is important - do not change. // Add new microtests at end. // invalid link parameters: combinations of numInputPrograms & inputPrograms retVal = pProgram->link(pProgram->getDevices(), nullptr, 0, &program); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = pProgram->link(pProgram->getDevices(), nullptr, 1, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); // fail linking - another linking is already in progress pProgram->setBuildStatus(CL_BUILD_IN_PROGRESS); retVal = pProgram->link(pProgram->getDevices(), nullptr, 1, &program); EXPECT_EQ(CL_INVALID_OPERATION, retVal); pProgram->setBuildStatus(CL_BUILD_NONE); // invalid link parameters: invalid Program object==nullptr retVal = pProgram->link(pProgram->getDevices(), nullptr, 1, &nullprogram); EXPECT_EQ(CL_INVALID_PROGRAM, retVal); // invalid link parameters: invalid Program object==non Program object retVal = pProgram->link(pProgram->getDevices(), nullptr, 1, &invprogram); EXPECT_EQ(CL_INVALID_PROGRAM, retVal); // compile successfully a kernel to be linked later retVal = pProgram->compile(pProgram->getDevices(), nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); // fail linking - code to be linked does not exist bool isSpirvTmp = pProgram->getIsSpirV(); char *pIrBin = pProgram->irBinary.get(); pProgram->irBinary.release(); size_t irBinSize = pProgram->irBinarySize; pProgram->setIrBinary(nullptr, false); retVal = pProgram->link(pProgram->getDevices(), nullptr, 1, &program); EXPECT_EQ(CL_INVALID_PROGRAM, retVal); pProgram->setIrBinary(pIrBin, isSpirvTmp); // fail linking - size of code to be linked is == 0 pProgram->setIrBinarySize(0, isSpirvTmp); retVal = pProgram->link(pProgram->getDevices(), nullptr, 1, &program); EXPECT_EQ(CL_INVALID_PROGRAM, retVal); pProgram->setIrBinarySize(irBinSize, isSpirvTmp); // fail linking - any link error (here caused by specifying unrecognized option) retVal = pProgram->link(pProgram->getDevices(), "-invalid-option", 1, &program); EXPECT_EQ(CL_LINK_PROGRAM_FAILURE, retVal); // fail linking - linked code is corrupted and cannot be postprocessed auto p2 = std::make_unique(pProgram->getDevices()); retVal = p2->link(p2->getDevices(), nullptr, 1, &program); EXPECT_EQ(CL_INVALID_BINARY, retVal); p2.reset(nullptr); // link successfully retVal = pProgram->link(pProgram->getDevices(), nullptr, 1, &program); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(ProgramFromSourceTest, GivenInvalidOptionsWhenCreatingLibraryThenCorrectErrorIsReturned) { cl_program program = pProgram; // Order of following microtests is important - do not change. // Add new microtests at end. // compile successfully a kernel to be later used to create library retVal = pProgram->compile(pProgram->getDevices(), nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); // create library successfully retVal = pProgram->link(pProgram->getDevices(), CompilerOptions::createLibrary.data(), 1, &program); EXPECT_EQ(CL_SUCCESS, retVal); // fail library creation - any link error (here caused by specifying unrecognized option) retVal = pProgram->link(pProgram->getDevices(), CompilerOptions::concatenate(CompilerOptions::createLibrary, "-invalid-option").c_str(), 1, &program); EXPECT_EQ(CL_LINK_PROGRAM_FAILURE, retVal); auto device = pContext->getDevice(0); auto executionEnvironment = device->getExecutionEnvironment(); std::unique_ptr rootDeviceEnvironment = std::make_unique(*executionEnvironment); std::swap(rootDeviceEnvironment, executionEnvironment->rootDeviceEnvironments[device->getRootDeviceIndex()]); auto failingProgram = std::make_unique(toClDeviceVector(*device)); // fail library creation - CompilerInterface cannot be obtained retVal = failingProgram->link(failingProgram->getDevices(), CompilerOptions::createLibrary.data(), 1, &program); EXPECT_EQ(CL_OUT_OF_HOST_MEMORY, retVal); std::swap(rootDeviceEnvironment, executionEnvironment->rootDeviceEnvironments[device->getRootDeviceIndex()]); } class PatchTokenFromBinaryTest : public ProgramSimpleFixture { public: void SetUp() override { ProgramSimpleFixture::SetUp(); } void TearDown() override { ProgramSimpleFixture::TearDown(); } }; using PatchTokenTests = Test; template class CommandStreamReceiverMock : public UltCommandStreamReceiver { using BaseClass = UltCommandStreamReceiver; using BaseClass::BaseClass; public: void makeResident(GraphicsAllocation &graphicsAllocation) override { residency[graphicsAllocation.getUnderlyingBuffer()] = graphicsAllocation.getUnderlyingBufferSize(); CommandStreamReceiver::makeResident(graphicsAllocation); } void makeNonResident(GraphicsAllocation &graphicsAllocation) override { residency.erase(graphicsAllocation.getUnderlyingBuffer()); CommandStreamReceiver::makeNonResident(graphicsAllocation); } std::map residency; }; HWTEST_F(PatchTokenTests, givenKernelRequiringConstantAllocationWhenMakeResidentIsCalledThenConstantAllocationIsMadeResident) { CreateProgramFromBinary(pContext, pContext->getDevices(), "test_constant_memory"); ASSERT_NE(nullptr, pProgram); retVal = pProgram->build( pProgram->getDevices(), nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); auto pKernelInfo = pProgram->getKernelInfo("test", rootDeviceIndex); ASSERT_NE(nullptr, pProgram->getConstantSurface(pClDevice->getRootDeviceIndex())); uint32_t expected_values[] = {0xabcd5432u, 0xaabb5533u}; uint32_t *constBuff = reinterpret_cast(pProgram->getConstantSurface(pClDevice->getRootDeviceIndex())->getUnderlyingBuffer()); EXPECT_EQ(expected_values[0], constBuff[0]); EXPECT_EQ(expected_values[1], constBuff[1]); std::unique_ptr pKernel(Kernel::create(pProgram, *pKernelInfo, *pClDevice, &retVal)); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, pKernel); auto pCommandStreamReceiver = new CommandStreamReceiverMock(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); ASSERT_NE(nullptr, pCommandStreamReceiver); pDevice->resetCommandStreamReceiver(pCommandStreamReceiver); pCommandStreamReceiver->residency.clear(); pKernel->makeResident(*pCommandStreamReceiver); EXPECT_EQ(2u, pCommandStreamReceiver->residency.size()); auto &residencyVector = pCommandStreamReceiver->getResidencyAllocations(); // we expect kernel ISA here and constant allocation auto kernelIsa = pKernel->getKernelInfo().getGraphicsAllocation(); auto constantAllocation = pProgram->getConstantSurface(pDevice->getRootDeviceIndex()); auto element = std::find(residencyVector.begin(), residencyVector.end(), kernelIsa); EXPECT_NE(residencyVector.end(), element); element = std::find(residencyVector.begin(), residencyVector.end(), constantAllocation); EXPECT_NE(residencyVector.end(), element); auto crossThreadData = pKernel->getCrossThreadData(); uint32_t *constBuffGpuAddr = reinterpret_cast(pProgram->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex())->getGpuAddressToPatch()); uintptr_t *pDst = reinterpret_cast(crossThreadData + pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.stateless); EXPECT_EQ(*pDst, reinterpret_cast(constBuffGpuAddr)); pCommandStreamReceiver->makeSurfacePackNonResident(pCommandStreamReceiver->getResidencyAllocations()); EXPECT_EQ(0u, pCommandStreamReceiver->residency.size()); std::vector surfaces; pKernel->getResidency(surfaces); EXPECT_EQ(2u, surfaces.size()); for (Surface *surface : surfaces) { delete surface; } } TEST_F(PatchTokenTests, WhenBuildingProgramThenGwsIsSet) { CreateProgramFromBinary(pContext, pContext->getDevices(), "kernel_data_param"); ASSERT_NE(nullptr, pProgram); retVal = pProgram->build( pProgram->getDevices(), nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); auto pKernelInfo = pProgram->getKernelInfo("test", rootDeviceIndex); ASSERT_NE(static_cast(-1), pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.globalWorkSize[0]); ASSERT_NE(static_cast(-1), pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.globalWorkSize[1]); ASSERT_NE(static_cast(-1), pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.globalWorkSize[2]); } TEST_F(PatchTokenTests, WhenBuildingProgramThenLwsIsSet) { CreateProgramFromBinary(pContext, pContext->getDevices(), "kernel_data_param"); ASSERT_NE(nullptr, pProgram); retVal = pProgram->build( pProgram->getDevices(), nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); auto pKernelInfo = pProgram->getKernelInfo("test", rootDeviceIndex); ASSERT_NE(static_cast(-1), pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[0]); ASSERT_NE(static_cast(-1), pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[1]); ASSERT_NE(static_cast(-1), pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[2]); pKernelInfo = pProgram->getKernelInfo("test_get_local_size", rootDeviceIndex); ASSERT_NE(static_cast(-1), pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[0]); ASSERT_NE(static_cast(-1), pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[1]); ASSERT_NE(static_cast(-1), pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize[2]); ASSERT_NE(static_cast(-1), pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize2[0]); ASSERT_NE(static_cast(-1), pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize2[1]); ASSERT_NE(static_cast(-1), pKernelInfo->kernelDescriptor.payloadMappings.dispatchTraits.localWorkSize2[2]); } TEST_F(PatchTokenTests, WhenBuildingProgramThenConstantKernelArgsAreAvailable) { // PATCH_TOKEN_STATELESS_CONSTANT_MEMORY_OBJECT_KERNEL_ARGUMENT CreateProgramFromBinary(pContext, pContext->getDevices(), "test_basic_constant"); ASSERT_NE(nullptr, pProgram); retVal = pProgram->build( pProgram->getDevices(), nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); auto pKernelInfo = pProgram->getKernelInfo("constant_kernel", rootDeviceIndex); ASSERT_NE(nullptr, pKernelInfo); auto pKernel = Kernel::create( pProgram, *pKernelInfo, *pClDevice, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, pKernel); uint32_t numArgs; retVal = pKernel->getInfo(CL_KERNEL_NUM_ARGS, sizeof(numArgs), &numArgs, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(3u, numArgs); uint32_t sizeOfPtr = sizeof(void *); EXPECT_EQ(pKernelInfo->getArgDescriptorAt(0).as().pointerSize, sizeOfPtr); EXPECT_EQ(pKernelInfo->getArgDescriptorAt(1).as().pointerSize, sizeOfPtr); delete pKernel; } TEST_F(PatchTokenTests, GivenVmeKernelWhenBuildingKernelThenArgAvailable) { if (!pDevice->getHardwareInfo().capabilityTable.supportsVme) { GTEST_SKIP(); } // PATCH_TOKEN_INLINE_VME_SAMPLER_INFO token indicates a VME kernel. CreateProgramFromBinary(pContext, pContext->getDevices(), "vme_kernels"); ASSERT_NE(nullptr, pProgram); retVal = pProgram->build( pProgram->getDevices(), nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); auto pKernelInfo = pProgram->getKernelInfo("device_side_block_motion_estimate_intel", rootDeviceIndex); ASSERT_NE(nullptr, pKernelInfo); EXPECT_EQ(true, pKernelInfo->kernelDescriptor.kernelAttributes.flags.usesVme); auto pKernel = Kernel::create( pProgram, *pKernelInfo, *pClDevice, &retVal); ASSERT_NE(nullptr, pKernel); delete pKernel; } class ProgramPatchTokenFromBinaryTest : public ProgramSimpleFixture { public: void SetUp() override { ProgramSimpleFixture::SetUp(); } void TearDown() override { ProgramSimpleFixture::TearDown(); } }; typedef Test ProgramPatchTokenTests; TEST(ProgramFromBinaryTests, givenBinaryWithInvalidICBEThenErrorIsReturned) { cl_int retVal = CL_INVALID_BINARY; SProgramBinaryHeader binHeader; memset(&binHeader, 0, sizeof(binHeader)); binHeader.Magic = iOpenCL::MAGIC_CL; binHeader.Version = iOpenCL::CURRENT_ICBE_VERSION - 3; binHeader.Device = defaultHwInfo->platform.eRenderCoreFamily; binHeader.GPUPointerSizeInBytes = 8; binHeader.NumberOfKernels = 0; binHeader.SteppingId = 0; binHeader.PatchListSize = 0; size_t binSize = sizeof(SProgramBinaryHeader); { const unsigned char *binaries[1] = {reinterpret_cast(&binHeader)}; MockContext context; std::unique_ptr pProgram(Program::create(&context, context.getDevices(), &binSize, binaries, nullptr, retVal)); EXPECT_EQ(nullptr, pProgram.get()); EXPECT_EQ(CL_INVALID_BINARY, retVal); } { // whatever method we choose CL_INVALID_BINARY is always returned auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr, mockRootDeviceIndex)); std::unique_ptr pProgram(Program::createBuiltInFromGenBinary(nullptr, toClDeviceVector(*device), &binHeader, binSize, &retVal)); ASSERT_NE(nullptr, pProgram.get()); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pProgram->processGenBinary(*device); EXPECT_EQ(CL_INVALID_BINARY, retVal); } } TEST(ProgramFromBinaryTests, givenEmptyProgramThenErrorIsReturned) { cl_int retVal = CL_INVALID_BINARY; SProgramBinaryHeader binHeader; memset(&binHeader, 0, sizeof(binHeader)); binHeader.Magic = iOpenCL::MAGIC_CL; binHeader.Version = iOpenCL::CURRENT_ICBE_VERSION; binHeader.Device = defaultHwInfo->platform.eRenderCoreFamily; binHeader.GPUPointerSizeInBytes = 8; binHeader.NumberOfKernels = 0; binHeader.SteppingId = 0; binHeader.PatchListSize = 0; size_t binSize = sizeof(SProgramBinaryHeader); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr, mockRootDeviceIndex)); std::unique_ptr pProgram(MockProgram::createBuiltInFromGenBinary(nullptr, toClDeviceVector(*device), &binHeader, binSize, &retVal)); ASSERT_NE(nullptr, pProgram.get()); EXPECT_EQ(CL_SUCCESS, retVal); auto rootDeviceIndex = mockRootDeviceIndex; pProgram->buildInfos[rootDeviceIndex].unpackedDeviceBinary.reset(nullptr); retVal = pProgram->processGenBinary(*device); EXPECT_EQ(CL_INVALID_BINARY, retVal); } using ProgramWithDebugSymbolsTests = Test; TEST_F(ProgramWithDebugSymbolsTests, GivenProgramCreatedWithDashGOptionWhenGettingProgramBinariesThenDebugDataIsIncluded) { CreateProgramFromBinary(pContext, pContext->getDevices(), "CopyBuffer_simd16", "-g"); ASSERT_NE(nullptr, pProgram); retVal = pProgram->build( pProgram->getDevices(), "-g", false); EXPECT_EQ(CL_SUCCESS, retVal); size_t paramValueSize = sizeof(size_t); size_t paramValueSizeRet = 0; size_t size = 0; pProgram->buildInfos[rootDeviceIndex].packedDeviceBinary.reset(); pProgram->buildInfos[rootDeviceIndex].packedDeviceBinarySize = 0U; retVal = pProgram->packDeviceBinary(*pClDevice); retVal = pProgram->getInfo( CL_PROGRAM_BINARY_SIZES, paramValueSize, &size, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); auto testBinary = std::make_unique(size); retVal = pProgram->getInfo( CL_PROGRAM_BINARIES, paramValueSize, &testBinary, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); ArrayRef archive(reinterpret_cast(testBinary.get()), size); auto productAbbreviation = hardwarePrefix[pDevice->getHardwareInfo().platform.eProductFamily]; TargetDevice targetDevice = NEO::targetDeviceFromHwInfo(pDevice->getHardwareInfo()); std::string decodeErrors; std::string decodeWarnings; auto singleDeviceBinary = unpackSingleDeviceBinary(archive, ConstStringRef(productAbbreviation, strlen(productAbbreviation)), targetDevice, decodeErrors, decodeWarnings); EXPECT_FALSE(singleDeviceBinary.debugData.empty()); } TEST_F(ProgramTests, WhenProgramIsCreatedThenCorrectOclVersionIsInOptions) { DebugManagerStateRestore restorer; DebugManager.flags.DisableStatelessToStatefulOptimization.set(false); MockProgram program(pContext, false, toClDeviceVector(*pClDevice)); auto internalOptions = program.getInternalOptions(); if (pClDevice->getEnabledClVersion() == 30) { EXPECT_TRUE(CompilerOptions::contains(internalOptions, "-ocl-version=300")) << internalOptions; } else if (pClDevice->getEnabledClVersion() == 21) { EXPECT_TRUE(CompilerOptions::contains(internalOptions, "-ocl-version=210")) << internalOptions; } else { EXPECT_TRUE(CompilerOptions::contains(internalOptions, "-ocl-version=120")) << internalOptions; } } TEST_F(ProgramTests, GivenForcedClVersionWhenProgramIsCreatedThenCorrectOclOptionIsPresent) { std::pair testedValues[] = { {0, "-ocl-version=120"}, {12, "-ocl-version=120"}, {21, "-ocl-version=210"}, {30, "-ocl-version=300"}}; for (auto &testedValue : testedValues) { pClDevice->enabledClVersion = testedValue.first; MockProgram program{pContext, false, toClDeviceVector(*pClDevice)}; auto internalOptions = program.getInternalOptions(); EXPECT_TRUE(CompilerOptions::contains(internalOptions, testedValue.second)); } } TEST_F(ProgramTests, GivenStatelessToStatefulIsDisabledWhenProgramIsCreatedThenGreaterThan4gbBuffersRequiredOptionIsSet) { DebugManagerStateRestore restorer; DebugManager.flags.DisableStatelessToStatefulOptimization.set(true); MockProgram program(pContext, false, toClDeviceVector(*pClDevice)); auto internalOptions = program.getInternalOptions(); EXPECT_TRUE(CompilerOptions::contains(internalOptions, NEO::CompilerOptions::greaterThan4gbBuffersRequired)); } TEST_F(ProgramTests, WhenCreatingProgramThenBindlessIsEnabledOnlyIfDebugFlagIsEnabled) { using namespace testing; DebugManagerStateRestore restorer; { DebugManager.flags.UseBindlessMode.set(0); MockProgram programNoBindless(pContext, false, toClDeviceVector(*pClDevice)); auto internalOptionsNoBindless = programNoBindless.getInternalOptions(); EXPECT_FALSE(CompilerOptions::contains(internalOptionsNoBindless, CompilerOptions::bindlessMode)) << internalOptionsNoBindless; } { DebugManager.flags.UseBindlessMode.set(1); MockProgram programBindless(pContext, false, toClDeviceVector(*pClDevice)); auto internalOptionsBindless = programBindless.getInternalOptions(); EXPECT_TRUE(CompilerOptions::contains(internalOptionsBindless, CompilerOptions::bindlessMode)) << internalOptionsBindless; } } TEST_F(ProgramTests, givenDeviceThatSupportsSharedSystemMemoryAllocationWhenProgramIsCompiledThenItForcesStatelessCompilation) { pClDevice->deviceInfo.sharedSystemMemCapabilities = CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL; pClDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable.sharedSystemMemCapabilities = 1; MockProgram program(pContext, false, toClDeviceVector(*pClDevice)); auto internalOptions = program.getInternalOptions(); EXPECT_TRUE(CompilerOptions::contains(internalOptions.c_str(), CompilerOptions::greaterThan4gbBuffersRequired)) << internalOptions; } TEST_F(ProgramTests, GivenForce32BitAddressessWhenProgramIsCreatedThenGreaterThan4gbBuffersRequiredIsCorrectlySet) { DebugManagerStateRestore dbgRestorer; cl_int retVal = CL_DEVICE_NOT_FOUND; DebugManager.flags.DisableStatelessToStatefulOptimization.set(false); if (pDevice) { const_cast(&pDevice->getDeviceInfo())->force32BitAddressess = true; MockProgram program(pContext, false, toClDeviceVector(*pClDevice)); auto internalOptions = program.getInternalOptions(); if (pDevice->areSharedSystemAllocationsAllowed()) { EXPECT_TRUE(CompilerOptions::contains(internalOptions, CompilerOptions::greaterThan4gbBuffersRequired)) << internalOptions; } else { EXPECT_FALSE(CompilerOptions::contains(internalOptions, NEO::CompilerOptions::greaterThan4gbBuffersRequired)) << internalOptions; } } else { EXPECT_NE(CL_DEVICE_NOT_FOUND, retVal); } } TEST_F(ProgramTests, Given32bitSupportWhenProgramIsCreatedThenGreaterThan4gbBuffersRequiredIsCorrectlySet) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.DisableStatelessToStatefulOptimization.set(false); std::unique_ptr program{Program::createBuiltInFromSource("", pContext, pContext->getDevices(), nullptr)}; auto internalOptions = program->getInternalOptions(); if ((false == pDevice->areSharedSystemAllocationsAllowed()) && (false == is32bit)) { EXPECT_FALSE(CompilerOptions::contains(internalOptions, NEO::CompilerOptions::greaterThan4gbBuffersRequired)) << internalOptions; } else { EXPECT_TRUE(CompilerOptions::contains(internalOptions, NEO::CompilerOptions::greaterThan4gbBuffersRequired)) << internalOptions; } } TEST_F(ProgramTests, GivenStatelessToStatefulIsDisabledWhenProgramIsCreatedThenGreaterThan4gbBuffersRequiredIsCorrectlySet) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.DisableStatelessToStatefulOptimization.set(true); std::unique_ptr program{Program::createBuiltInFromSource("", pContext, pContext->getDevices(), nullptr)}; auto internalOptions = program->getInternalOptions(); EXPECT_TRUE(CompilerOptions::contains(internalOptions, NEO::CompilerOptions::greaterThan4gbBuffersRequired)) << internalOptions; } TEST_F(ProgramTests, givenProgramWhenItIsCompiledThenItAlwaysHavePreserveVec3TypeInternalOptionSet) { std::unique_ptr program(Program::createBuiltInFromSource("", pContext, pContext->getDevices(), nullptr)); auto internalOptions = program->getInternalOptions(); EXPECT_TRUE(CompilerOptions::contains(internalOptions, CompilerOptions::preserveVec3Type)) << internalOptions; } TEST_F(ProgramTests, Force32BitAddressessWhenProgramIsCreatedThenGreaterThan4gbBuffersRequiredIsCorrectlySet) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.DisableStatelessToStatefulOptimization.set(false); const_cast(&pDevice->getDeviceInfo())->force32BitAddressess = true; std::unique_ptr program{Program::createBuiltInFromSource("", pContext, pContext->getDevices(), nullptr)}; auto internalOptions = program->getInternalOptions(); if (is32bit) { EXPECT_TRUE(CompilerOptions::contains(internalOptions, CompilerOptions::greaterThan4gbBuffersRequired)) << internalOptions; } else { if (false == pDevice->areSharedSystemAllocationsAllowed()) { EXPECT_FALSE(CompilerOptions::contains(internalOptions, NEO::CompilerOptions::greaterThan4gbBuffersRequired)) << internalOptions; } else { EXPECT_TRUE(CompilerOptions::contains(internalOptions, NEO::CompilerOptions::greaterThan4gbBuffersRequired)) << internalOptions; } } } TEST_F(ProgramTests, GivenStatelessToStatefulBufferOffsetOptimizationWhenProgramIsCreatedThenBufferOffsetArgIsSet) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.set(1); cl_int errorCode = CL_SUCCESS; const char programSource[] = "program"; const char *programPointer = programSource; const char **programSources = reinterpret_cast(&programPointer); size_t length = sizeof(programSource); std::unique_ptr program(Program::create(pContext, 1u, programSources, &length, errorCode)); auto internalOptions = program->getInternalOptions(); EXPECT_TRUE(CompilerOptions::contains(internalOptions, CompilerOptions::hasBufferOffsetArg)) << internalOptions; } TEST_F(ProgramTests, givenStatelessToStatefullOptimizationOffWHenProgramIsCreatedThenOptimizationStringIsNotPresent) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.set(0); cl_int errorCode = CL_SUCCESS; const char programSource[] = "program"; const char *programPointer = programSource; const char **programSources = reinterpret_cast(&programPointer); size_t length = sizeof(programSource); std::unique_ptr program(Program::create(pContext, 1u, programSources, &length, errorCode)); auto internalOptions = program->getInternalOptions(); EXPECT_FALSE(CompilerOptions::contains(internalOptions, CompilerOptions::hasBufferOffsetArg)) << internalOptions; } TEST_F(ProgramTests, GivenContextWhenCreateProgramThenIncrementContextRefCount) { auto initialApiRefCount = pContext->getReference(); auto initialInternalRefCount = pContext->getRefInternalCount(); MockProgram *program = new MockProgram(pContext, false, pContext->getDevices()); EXPECT_EQ(pContext->getReference(), initialApiRefCount); EXPECT_EQ(pContext->getRefInternalCount(), initialInternalRefCount + 1); program->release(); EXPECT_EQ(pContext->getReference(), initialApiRefCount); EXPECT_EQ(pContext->getRefInternalCount(), initialInternalRefCount); } TEST_F(ProgramTests, GivenContextWhenCreateProgramFromSourceThenIncrementContextRefCount) { auto initialApiRefCount = pContext->getReference(); auto initialInternalRefCount = pContext->getRefInternalCount(); auto tempProgram = new Program(nullptr, false, pContext->getDevices()); EXPECT_FALSE(tempProgram->getIsBuiltIn()); auto program = new Program(pContext, false, pContext->getDevices()); EXPECT_FALSE(program->getIsBuiltIn()); EXPECT_EQ(pContext->getReference(), initialApiRefCount); EXPECT_EQ(pContext->getRefInternalCount(), initialInternalRefCount + 1); program->release(); EXPECT_EQ(pContext->getReference(), initialApiRefCount); EXPECT_EQ(pContext->getRefInternalCount(), initialInternalRefCount); tempProgram->release(); EXPECT_EQ(pContext->getReference(), initialApiRefCount); EXPECT_EQ(pContext->getRefInternalCount(), initialInternalRefCount); } TEST_F(ProgramTests, GivenContextWhenCreateBuiltInProgramFromSourceThenDontIncrementContextRefCount) { auto initialApiRefCount = pContext->getReference(); auto initialInternalRefCount = pContext->getRefInternalCount(); auto tempProgram = new Program(nullptr, true, pContext->getDevices()); EXPECT_TRUE(tempProgram->getIsBuiltIn()); auto program = new Program(pContext, true, pContext->getDevices()); EXPECT_TRUE(program->getIsBuiltIn()); EXPECT_EQ(pContext->getReference(), initialApiRefCount); EXPECT_EQ(pContext->getRefInternalCount(), initialInternalRefCount); program->release(); EXPECT_EQ(pContext->getReference(), initialApiRefCount); EXPECT_EQ(pContext->getRefInternalCount(), initialInternalRefCount); tempProgram->release(); EXPECT_EQ(pContext->getReference(), initialApiRefCount); EXPECT_EQ(pContext->getRefInternalCount(), initialInternalRefCount); } TEST_F(ProgramTests, WhenBuildingProgramThenPointerToProgramIsReturned) { cl_int retVal = CL_DEVICE_NOT_FOUND; Program *pProgram = Program::createBuiltInFromSource("", pContext, pContext->getDevices(), &retVal); EXPECT_NE(nullptr, pProgram); EXPECT_EQ(CL_SUCCESS, retVal); delete pProgram; pProgram = Program::createBuiltInFromSource("", pContext, pContext->getDevices(), nullptr); EXPECT_NE(nullptr, pProgram); delete pProgram; } TEST_F(ProgramTests, GivenNullBinaryWhenCreatingProgramFromGenBinaryThenInvalidValueErrorIsReturned) { cl_int retVal = CL_SUCCESS; Program *pProgram = Program::createBuiltInFromGenBinary(pContext, pContext->getDevices(), nullptr, 0, &retVal); EXPECT_EQ(nullptr, pProgram); EXPECT_NE(CL_SUCCESS, retVal); } TEST_F(ProgramTests, WhenCreatingProgramFromGenBinaryThenSuccessIsReturned) { cl_int retVal = CL_INVALID_BINARY; char binary[10] = {1, 2, 3, 4, 5, 6, 7, 8, 9, '\0'}; size_t size = 10; Program *pProgram = Program::createBuiltInFromGenBinary(pContext, pContext->getDevices(), binary, size, &retVal); EXPECT_NE(nullptr, pProgram); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ((uint32_t)CL_PROGRAM_BINARY_TYPE_EXECUTABLE, (uint32_t)pProgram->getProgramBinaryType(pClDevice)); EXPECT_TRUE(pProgram->getIsBuiltIn()); cl_device_id deviceId = pContext->getDevice(0); cl_build_status status = 0; pProgram->getBuildInfo(deviceId, CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status), &status, nullptr); EXPECT_EQ(CL_BUILD_SUCCESS, status); delete pProgram; } TEST_F(ProgramTests, GivenRetValNullPointerWhenCreatingProgramFromGenBinaryThenSuccessIsReturned) { char binary[10] = {1, 2, 3, 4, 5, 6, 7, 8, 9, '\0'}; size_t size = 10; Program *pProgram = Program::createBuiltInFromGenBinary(pContext, pContext->getDevices(), binary, size, nullptr); EXPECT_NE(nullptr, pProgram); EXPECT_EQ((uint32_t)CL_PROGRAM_BINARY_TYPE_EXECUTABLE, (uint32_t)pProgram->getProgramBinaryType(pClDevice)); cl_device_id deviceId = pContext->getDevice(0); cl_build_status status = 0; pProgram->getBuildInfo(deviceId, CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status), &status, nullptr); EXPECT_EQ(CL_BUILD_SUCCESS, status); delete pProgram; } TEST_F(ProgramTests, GivenNullContextWhenCreatingProgramFromGenBinaryThenSuccessIsReturned) { cl_int retVal = CL_INVALID_BINARY; char binary[10] = {1, 2, 3, 4, 5, 6, 7, 8, 9, '\0'}; size_t size = 10; Program *pProgram = Program::createBuiltInFromGenBinary(nullptr, toClDeviceVector(*pClDevice), binary, size, &retVal); EXPECT_NE(nullptr, pProgram); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ((uint32_t)CL_PROGRAM_BINARY_TYPE_EXECUTABLE, (uint32_t)pProgram->getProgramBinaryType(pClDevice)); cl_build_status status = 0; pProgram->getBuildInfo(pClDevice, CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status), &status, nullptr); EXPECT_EQ(CL_BUILD_SUCCESS, status); delete pProgram; } TEST_F(ProgramTests, givenValidZebinPrepareLinkerInput) { ZebinTestData::ValidEmptyProgram zebin; const std::string validZeInfo = std::string("version :\'") + toString(zeInfoDecoderVersion) + R"===(' kernels: - name : some_kernel execution_env : simd_size : 8 )==="; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr, mockRootDeviceIndex)); { auto program = std::make_unique(nullptr, false, toClDeviceVector(*pClDevice)); program->buildInfos[rootDeviceIndex].unpackedDeviceBinary = makeCopy(zebin.storage.data(), zebin.storage.size()); program->buildInfos[rootDeviceIndex].unpackedDeviceBinarySize = zebin.storage.size(); auto retVal = program->processGenBinary(*pClDevice); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, program->buildInfos[rootDeviceIndex].linkerInput.get()); } { zebin.removeSection(NEO::Elf::SHT_ZEBIN::SHT_ZEBIN_ZEINFO, NEO::Elf::SectionsNamesZebin::zeInfo); zebin.appendSection(NEO::Elf::SHT_ZEBIN::SHT_ZEBIN_ZEINFO, NEO::Elf::SectionsNamesZebin::zeInfo, ArrayRef::fromAny(validZeInfo.data(), validZeInfo.size())); zebin.appendSection(NEO::Elf::SHT_PROGBITS, NEO::Elf::SectionsNamesZebin::textPrefix.str() + "some_kernel", {}); auto program = std::make_unique(nullptr, false, toClDeviceVector(*pClDevice)); program->buildInfos[rootDeviceIndex].unpackedDeviceBinary = makeCopy(zebin.storage.data(), zebin.storage.size()); program->buildInfos[rootDeviceIndex].unpackedDeviceBinarySize = zebin.storage.size(); auto retVal = program->processGenBinary(*pClDevice); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, program->buildInfos[rootDeviceIndex].linkerInput.get()); } } TEST_F(ProgramTests, givenProgramFromGenBinaryWhenSLMSizeIsBiggerThenDeviceLimitThenReturnError) { PatchTokensTestData::ValidProgramWithKernelUsingSlm patchtokensProgram; patchtokensProgram.slmMutable->TotalInlineLocalMemorySize = static_cast(pDevice->getDeviceInfo().localMemSize * 2); patchtokensProgram.recalcTokPtr(); auto program = std::make_unique(nullptr, false, toClDeviceVector(*pClDevice)); program->buildInfos[rootDeviceIndex].unpackedDeviceBinary = makeCopy(patchtokensProgram.storage.data(), patchtokensProgram.storage.size()); program->buildInfos[rootDeviceIndex].unpackedDeviceBinarySize = patchtokensProgram.storage.size(); auto retVal = program->processGenBinary(*pClDevice); EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal); } TEST_F(ProgramTests, givenExistingConstantSurfacesWhenProcessGenBinaryThenCleanupTheSurfaceOnlyForSpecificDevice) { PatchTokensTestData::ValidProgramWithKernelUsingSlm patchtokensProgram; auto program = std::make_unique(nullptr, false, toClDeviceVector(*pClDevice)); program->buildInfos.resize(2); program->buildInfos[0].constantSurface = pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, MemoryConstants::cacheLineSize, AllocationType::CONSTANT_SURFACE, pDevice->getDeviceBitfield()}); program->buildInfos[1].constantSurface = pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, MemoryConstants::cacheLineSize, AllocationType::CONSTANT_SURFACE, pDevice->getDeviceBitfield()}); program->buildInfos[rootDeviceIndex].unpackedDeviceBinary = makeCopy(patchtokensProgram.storage.data(), patchtokensProgram.storage.size()); program->buildInfos[rootDeviceIndex].unpackedDeviceBinarySize = patchtokensProgram.storage.size(); auto constantSurface0 = program->buildInfos[0].constantSurface; EXPECT_NE(nullptr, constantSurface0); auto constantSurface1 = program->buildInfos[1].constantSurface; EXPECT_NE(nullptr, constantSurface1); auto retVal = program->processGenBinary(*pClDevice); EXPECT_EQ(nullptr, program->buildInfos[0].constantSurface); EXPECT_EQ(constantSurface1, program->buildInfos[1].constantSurface); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(ProgramTests, givenExistingGlobalSurfacesWhenProcessGenBinaryThenCleanupTheSurfaceOnlyForSpecificDevice) { PatchTokensTestData::ValidProgramWithKernelUsingSlm patchtokensProgram; auto program = std::make_unique(nullptr, false, toClDeviceVector(*pClDevice)); program->buildInfos.resize(2); program->buildInfos[0].globalSurface = pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, MemoryConstants::cacheLineSize, AllocationType::GLOBAL_SURFACE, pDevice->getDeviceBitfield()}); program->buildInfos[1].globalSurface = pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, MemoryConstants::cacheLineSize, AllocationType::GLOBAL_SURFACE, pDevice->getDeviceBitfield()}); program->buildInfos[rootDeviceIndex].unpackedDeviceBinary = makeCopy(patchtokensProgram.storage.data(), patchtokensProgram.storage.size()); program->buildInfos[rootDeviceIndex].unpackedDeviceBinarySize = patchtokensProgram.storage.size(); auto globalSurface0 = program->buildInfos[0].globalSurface; EXPECT_NE(nullptr, globalSurface0); auto globalSurface1 = program->buildInfos[1].globalSurface; EXPECT_NE(nullptr, globalSurface1); auto retVal = program->processGenBinary(*pClDevice); EXPECT_EQ(nullptr, program->buildInfos[0].globalSurface); EXPECT_EQ(globalSurface1, program->buildInfos[1].globalSurface); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(ProgramTests, GivenNoCompilerInterfaceRootDeviceEnvironmentWhenRebuildingBinaryThenOutOfHostMemoryErrorIsReturned) { auto pDevice = pContext->getDevice(0); auto executionEnvironment = pDevice->getExecutionEnvironment(); std::unique_ptr rootDeviceEnvironment = std::make_unique(*executionEnvironment); rootDeviceEnvironment->setHwInfo(&pDevice->getHardwareInfo()); std::swap(rootDeviceEnvironment, executionEnvironment->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]); auto program = std::make_unique(toClDeviceVector(*pDevice)); EXPECT_NE(nullptr, program); // Load a binary program file std::string filePath; retrieveBinaryKernelFilename(filePath, "CopyBuffer_simd16_", ".bin"); size_t binarySize = 0; auto pBinary = loadDataFromFile(filePath.c_str(), binarySize); EXPECT_NE(0u, binarySize); // Create program from loaded binary cl_int retVal = program->createProgramFromBinary(pBinary.get(), binarySize, *pClDevice); EXPECT_EQ(CL_SUCCESS, retVal); // Ask to rebuild program from its IR binary - it should fail (no Compiler Interface) retVal = program->rebuildProgramFromIr(); EXPECT_EQ(CL_OUT_OF_HOST_MEMORY, retVal); std::swap(rootDeviceEnvironment, executionEnvironment->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]); } TEST_F(ProgramTests, GivenGtpinReraFlagWhenBuildingProgramThenCorrectOptionsAreSet) { auto cip = new MockCompilerInterfaceCaptureBuildOptions(); auto pDevice = pContext->getDevice(0); pDevice->getExecutionEnvironment()->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->compilerInterface.reset(cip); auto program = std::make_unique(toClDeviceVector(*pDevice)); program->sourceCode = "__kernel mock() {}"; program->createdFrom = Program::CreatedFrom::SOURCE; // Ask to build created program without NEO::CompilerOptions::gtpinRera flag. cl_int retVal = program->build(program->getDevices(), CompilerOptions::fastRelaxedMath.data(), false); EXPECT_EQ(CL_SUCCESS, retVal); // Check build options that were applied EXPECT_TRUE(CompilerOptions::contains(cip->buildOptions, CompilerOptions::fastRelaxedMath)) << cip->buildOptions; EXPECT_FALSE(CompilerOptions::contains(cip->buildOptions, CompilerOptions::gtpinRera)) << cip->buildInternalOptions; // Ask to build created program with NEO::CompilerOptions::gtpinRera flag. cip->buildOptions.clear(); cip->buildInternalOptions.clear(); retVal = program->build(program->getDevices(), CompilerOptions::concatenate(CompilerOptions::gtpinRera, CompilerOptions::finiteMathOnly).c_str(), false); EXPECT_EQ(CL_SUCCESS, retVal); // Check build options that were applied EXPECT_FALSE(CompilerOptions::contains(cip->buildOptions, CompilerOptions::fastRelaxedMath)) << cip->buildOptions; EXPECT_TRUE(CompilerOptions::contains(cip->buildOptions, CompilerOptions::finiteMathOnly)) << cip->buildOptions; EXPECT_TRUE(CompilerOptions::contains(cip->buildInternalOptions, CompilerOptions::gtpinRera)) << cip->buildInternalOptions; } TEST_F(ProgramTests, GivenFailingGenBinaryProgramWhenRebuildingBinaryThenInvalidBinaryErrorIsReturned) { cl_int retVal; auto program = std::make_unique(toClDeviceVector(*pClDevice)); EXPECT_NE(nullptr, program); // Load a binary program file std::string filePath; retrieveBinaryKernelFilename(filePath, "CopyBuffer_simd16_", ".bin"); size_t binarySize = 0; auto pBinary = loadDataFromFile(filePath.c_str(), binarySize); EXPECT_NE(0u, binarySize); // Create program from loaded binary retVal = program->createProgramFromBinary(pBinary.get(), binarySize, *pClDevice); EXPECT_EQ(CL_SUCCESS, retVal); // Ask to rebuild program from its IR binary - it should fail (simulated invalid binary) retVal = program->rebuildProgramFromIr(); EXPECT_EQ(CL_INVALID_BINARY, retVal); } class Program32BitTests : public ProgramTests { public: void SetUp() override { DebugManager.flags.Force32bitAddressing.set(true); ProgramTests::SetUp(); } void TearDown() override { ProgramTests::TearDown(); DebugManager.flags.Force32bitAddressing.set(false); } }; TEST_F(Program32BitTests, givenDeviceWithForce32BitAddressingOnWhenBuiltinIsCreatedThenNoFlagsArePassedAsInternalOptions) { MockProgram program(toClDeviceVector(*pClDevice)); auto internalOptions = program.getInternalOptions(); EXPECT_TRUE(hasSubstr(internalOptions, std::string(""))); } TEST_F(Program32BitTests, givenDeviceWithForce32BitAddressingOnWhenProgramIsCreatedThen32bitFlagIsPassedAsInternalOption) { MockProgram program(pContext, false, toClDeviceVector(*pClDevice)); auto internalOptions = program.getInternalOptions(); std::string s1 = internalOptions; size_t pos = s1.find(NEO::CompilerOptions::arch32bit.data()); if constexpr (is64bit) { EXPECT_NE(pos, std::string::npos); } else { EXPECT_EQ(pos, std::string::npos); } } HWTEST_F(ProgramTests, givenNewProgramThenStatelessToStatefulBufferOffsetOptimizationIsMatchingThePlatformEnablingStatus) { MockProgram program(pContext, false, toClDeviceVector(*pClDevice)); auto internalOptions = program.getInternalOptions(); if (HwHelperHw::get().isStatelesToStatefullWithOffsetSupported()) { EXPECT_TRUE(CompilerOptions::contains(internalOptions, CompilerOptions::hasBufferOffsetArg)); } else { EXPECT_FALSE(CompilerOptions::contains(internalOptions, CompilerOptions::hasBufferOffsetArg)); } } TEST(ProgramTest, givenImagesSupportedWhenCreatingProgramThenInternalOptionsAreCorrectlyInitialized) { VariableBackup supportsImagesCapability{&defaultHwInfo->capabilityTable.supportsImages}; for (auto areImagesSupported : ::testing::Bool()) { supportsImagesCapability = areImagesSupported; UltClDeviceFactory clDeviceFactory{1, 0}; MockContext context{clDeviceFactory.rootDevices[0]}; MockProgram program(&context, false, toClDeviceVector(*clDeviceFactory.rootDevices[0])); auto internalOptions = program.getInternalOptions(); EXPECT_EQ(areImagesSupported, CompilerOptions::contains(internalOptions, CompilerOptions::enableImageSupport)); } } template struct CreateProgramFromBinaryMock : public MockProgram { using MockProgram::MockProgram; cl_int createProgramFromBinary(const void *pBinary, size_t binarySize, ClDevice &clDevice) override { this->irBinary.reset(new char[binarySize]); this->irBinarySize = binarySize; this->isSpirV = spirv; memcpy_s(this->irBinary.get(), binarySize, pBinary, binarySize); return ErrCodeToReturn; } }; TEST_F(ProgramTests, GivenFailedBinaryWhenCreatingFromIlThenInvalidBinaryErrorIsReturned) { const uint32_t notSpirv[16] = {0xDEADBEEF}; cl_int retVal = CL_SUCCESS; auto prog = Program::createFromIL>(pContext, reinterpret_cast(notSpirv), sizeof(notSpirv), retVal); EXPECT_EQ(nullptr, prog); EXPECT_EQ(CL_INVALID_BINARY, retVal); } TEST_F(ProgramTests, GivenSuccessfullyBuiltBinaryWhenCreatingFromIlThenValidProgramIsReturned) { const uint32_t spirv[16] = {0x03022307}; cl_int retVal = CL_SUCCESS; auto prog = Program::createFromIL>(pContext, reinterpret_cast(spirv), sizeof(spirv), retVal); ASSERT_NE(nullptr, prog); EXPECT_EQ(CL_SUCCESS, retVal); prog->release(); } TEST_F(ProgramTests, givenProgramCreatedFromILWhenCompileIsCalledThenReuseTheILInsteadOfCallingCompilerInterface) { const uint32_t spirv[16] = {0x03022307}; cl_int errCode = 0; auto pProgram = Program::createFromIL(pContext, reinterpret_cast(spirv), sizeof(spirv), errCode); ASSERT_NE(nullptr, pProgram); auto debugVars = NEO::getIgcDebugVars(); debugVars.forceBuildFailure = true; gEnvironment->fclPushDebugVars(debugVars); auto compilerErr = pProgram->compile(pProgram->getDevices(), nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, compilerErr); gEnvironment->fclPopDebugVars(); pProgram->release(); } TEST_F(ProgramTests, givenProgramCreatedFromIntermediateBinaryRepresentationWhenCompileIsCalledThenReuseTheILInsteadOfCallingCompilerInterface) { const uint32_t spirv[16] = {0x03022307}; cl_int errCode = 0; size_t lengths = sizeof(spirv); const unsigned char *binaries[1] = {reinterpret_cast(spirv)}; auto pProgram = Program::create(pContext, pContext->getDevices(), &lengths, binaries, nullptr, errCode); ASSERT_NE(nullptr, pProgram); auto debugVars = NEO::getIgcDebugVars(); debugVars.forceBuildFailure = true; gEnvironment->fclPushDebugVars(debugVars); auto compilerErr = pProgram->compile(pProgram->getDevices(), nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, compilerErr); gEnvironment->fclPopDebugVars(); pProgram->release(); } TEST_F(ProgramTests, GivenIlIsNullptrWhenCreatingFromIlThenInvalidBinaryErrorIsReturned) { cl_int retVal = CL_SUCCESS; auto prog = Program::createFromIL>(pContext, nullptr, 16, retVal); EXPECT_EQ(nullptr, prog); EXPECT_EQ(CL_INVALID_BINARY, retVal); } TEST_F(ProgramTests, GivenIlSizeZeroWhenCreatingFromIlThenInvalidBinaryErrorIsReturned) { const uint32_t spirv[16] = {0x03022307}; cl_int retVal = CL_SUCCESS; auto prog = Program::createFromIL>(pContext, reinterpret_cast(spirv), 0, retVal); EXPECT_EQ(nullptr, prog); EXPECT_EQ(CL_INVALID_BINARY, retVal); } TEST_F(ProgramTests, WhenCreatingFromIlThenIsSpirvIsSetCorrectly) { const uint32_t spirv[16] = {0x03022307}; cl_int retVal = CL_SUCCESS; auto prog = Program::createFromIL(pContext, reinterpret_cast(spirv), sizeof(spirv), retVal); EXPECT_NE(nullptr, prog); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(prog->getIsSpirV()); prog->release(); const char llvmBc[16] = {'B', 'C', '\xc0', '\xde'}; prog = Program::createFromIL(pContext, reinterpret_cast(llvmBc), sizeof(llvmBc), retVal); EXPECT_NE(nullptr, prog); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(prog->getIsSpirV()); prog->release(); } static const uint8_t llvmBinary[] = "BC\xc0\xde "; TEST(isValidLlvmBinary, whenLlvmMagicWasFoundThenBinaryIsValidLLvm) { EXPECT_TRUE(NEO::isLlvmBitcode(llvmBinary)); } TEST(isValidLlvmBinary, whenBinaryIsNullptrThenBinaryIsNotValidLLvm) { EXPECT_FALSE(NEO::isLlvmBitcode(ArrayRef())); } TEST(isValidLlvmBinary, whenBinaryIsShorterThanLllvMagicThenBinaryIsNotValidLLvm) { EXPECT_FALSE(NEO::isLlvmBitcode(ArrayRef(llvmBinary, 2))); } TEST(isValidLlvmBinary, whenBinaryDoesNotContainLllvMagicThenBinaryIsNotValidLLvm) { const uint8_t notLlvmBinary[] = "ABCDEFGHIJKLMNO"; EXPECT_FALSE(NEO::isLlvmBitcode(notLlvmBinary)); } const uint32_t spirv[16] = {0x03022307}; const uint32_t spirvInvEndianes[16] = {0x07230203}; TEST(isValidSpirvBinary, whenSpirvMagicWasFoundThenBinaryIsValidSpirv) { EXPECT_TRUE(NEO::isSpirVBitcode(ArrayRef(reinterpret_cast(&spirv), sizeof(spirv)))); EXPECT_TRUE(NEO::isSpirVBitcode(ArrayRef(reinterpret_cast(&spirvInvEndianes), sizeof(spirvInvEndianes)))); } TEST(isValidSpirvBinary, whenBinaryIsNullptrThenBinaryIsNotValidLLvm) { EXPECT_FALSE(NEO::isSpirVBitcode(ArrayRef())); } TEST(isValidSpirvBinary, whenBinaryIsShorterThanLllvMagicThenBinaryIsNotValidLLvm) { EXPECT_FALSE(NEO::isSpirVBitcode(ArrayRef(reinterpret_cast(&spirvInvEndianes), 2))); } TEST(isValidSpirvBinary, whenBinaryDoesNotContainLllvMagicThenBinaryIsNotValidLLvm) { const uint8_t notSpirvBinary[] = "ABCDEFGHIJKLMNO"; EXPECT_FALSE(NEO::isSpirVBitcode(notSpirvBinary)); } TEST_F(ProgramTests, WhenLinkingTwoValidSpirvProgramsThenValidProgramIsReturned) { const uint32_t spirv[16] = {0x03022307}; cl_int errCode = CL_SUCCESS; auto node1 = Program::createFromIL>(pContext, reinterpret_cast(spirv), sizeof(spirv), errCode); ASSERT_NE(nullptr, node1); EXPECT_EQ(CL_SUCCESS, errCode); auto node2 = Program::createFromIL>(pContext, reinterpret_cast(spirv), sizeof(spirv), errCode); ASSERT_NE(nullptr, node2); EXPECT_EQ(CL_SUCCESS, errCode); auto prog = Program::createFromIL>(pContext, reinterpret_cast(spirv), sizeof(spirv), errCode); ASSERT_NE(nullptr, prog); EXPECT_EQ(CL_SUCCESS, errCode); cl_program linkNodes[] = {node1, node2}; errCode = prog->link(prog->getDevices(), nullptr, 2, linkNodes); EXPECT_EQ(CL_SUCCESS, errCode); prog->release(); node2->release(); node1->release(); } TEST(ProgramDestructionTests, givenProgramUsingDeviceWhenItIsDestroyedAfterPlatfromCleanupThenItIsCleanedUpProperly) { initPlatform(); auto device = platform()->getClDevice(0); MockContext *context = new MockContext(device, false); MockProgram *pProgram = new MockProgram(context, false, toClDeviceVector(*device)); auto globalAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); pProgram->setGlobalSurface(globalAllocation); platformsImpl->clear(); EXPECT_EQ(1, device->getRefInternalCount()); EXPECT_EQ(1, pProgram->getRefInternalCount()); context->decRefInternal(); pProgram->decRefInternal(); } TEST_F(ProgramTests, givenProgramWithSpirvWhenRebuildProgramIsCalledThenSpirvPathIsTaken) { auto compilerInterface = new MockCompilerInterface(); auto compilerMain = new MockCIFMain(); compilerInterface->setFclMain(compilerMain); compilerMain->Retain(); compilerInterface->setIgcMain(compilerMain); compilerMain->setDefaultCreatorFunc(NEO::MockIgcOclDeviceCtx::Create); compilerMain->setDefaultCreatorFunc(NEO::MockFclOclDeviceCtx::Create); pDevice->getExecutionEnvironment()->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->compilerInterface.reset(compilerInterface); std::string receivedInput; MockCompilerDebugVars debugVars = {}; debugVars.receivedInput = &receivedInput; debugVars.forceBuildFailure = true; gEnvironment->igcPushDebugVars(debugVars); std::unique_ptr igcDebugVarsAutoPop{&gEnvironment, [](void *) { gEnvironment->igcPopDebugVars(); }}; auto program = clUniquePtr(new MockProgram(toClDeviceVector(*pClDevice))); uint32_t spirv[16] = {0x03022307, 0x23471113, 0x17192329}; program->irBinary = makeCopy(spirv, sizeof(spirv)); program->irBinarySize = sizeof(spirv); program->isSpirV = true; auto buildRet = program->rebuildProgramFromIr(); EXPECT_NE(CL_SUCCESS, buildRet); ASSERT_EQ(sizeof(spirv), receivedInput.size()); EXPECT_EQ(0, memcmp(spirv, receivedInput.c_str(), receivedInput.size())); ASSERT_EQ(1U, compilerInterface->requestedTranslationCtxs.size()); EXPECT_EQ(IGC::CodeType::spirV, compilerInterface->requestedTranslationCtxs[0].first); EXPECT_EQ(IGC::CodeType::oclGenBin, compilerInterface->requestedTranslationCtxs[0].second); } TEST_F(ProgramTests, givenProgramWithSpirvWhenRebuildIsCalledThenRebuildWarningIsIssued) { const auto program{clUniquePtr(new MockProgram(toClDeviceVector(*pClDevice)))}; uint32_t spirv[16] = {0x03022307, 0x23471113, 0x17192329}; program->irBinary = makeCopy(spirv, sizeof(spirv)); program->irBinarySize = sizeof(spirv); program->isSpirV = true; const auto buildResult{program->rebuildProgramFromIr()}; ASSERT_EQ(CL_SUCCESS, buildResult); const std::string buildLog{program->getBuildLog(pClDevice->getRootDeviceIndex())}; const auto containsWarning{buildLog.find(CompilerWarnings::recompiledFromIr.data()) != std::string::npos}; EXPECT_TRUE(containsWarning); } TEST_F(ProgramTests, givenProgramWithSpirvWhenRebuildIsCalledButSuppressFlagIsEnabledThenRebuildWarningIsNotIssued) { const auto program{clUniquePtr(new MockProgram(toClDeviceVector(*pClDevice)))}; uint32_t spirv[16] = {0x03022307, 0x23471113, 0x17192329}; program->irBinary = makeCopy(spirv, sizeof(spirv)); program->irBinarySize = sizeof(spirv); program->isSpirV = true; const auto buildOptions{CompilerOptions::noRecompiledFromIr}; program->setBuildOptions(buildOptions.data()); const auto buildResult{program->rebuildProgramFromIr()}; ASSERT_EQ(CL_SUCCESS, buildResult); const std::string buildLog{program->getBuildLog(pClDevice->getRootDeviceIndex())}; const auto containsWarning{buildLog.find(CompilerWarnings::recompiledFromIr.data()) != std::string::npos}; EXPECT_FALSE(containsWarning); } TEST_F(ProgramTests, givenProgramWithSpirvWhenRecompileIsCalledThenRebuildWarningIsIssued) { const auto program{clUniquePtr(new MockProgram(toClDeviceVector(*pClDevice)))}; uint32_t spirv[16] = {0x03022307, 0x23471113, 0x17192329}; program->irBinary = makeCopy(spirv, sizeof(spirv)); program->irBinarySize = sizeof(spirv); program->isSpirV = true; const auto compileResult{program->recompile()}; ASSERT_EQ(CL_SUCCESS, compileResult); const std::string buildLog{program->getBuildLog(pClDevice->getRootDeviceIndex())}; const auto containsWarning{buildLog.find(CompilerWarnings::recompiledFromIr.data()) != std::string::npos}; EXPECT_TRUE(containsWarning); } TEST_F(ProgramTests, givenProgramWithSpirvWhenRecompileIsCalledButSuppressFlagIsEnabledThenRebuildWarningIsNotIssued) { const auto program{clUniquePtr(new MockProgram(toClDeviceVector(*pClDevice)))}; uint32_t spirv[16] = {0x03022307, 0x23471113, 0x17192329}; program->irBinary = makeCopy(spirv, sizeof(spirv)); program->irBinarySize = sizeof(spirv); program->isSpirV = true; const auto buildOptions{CompilerOptions::noRecompiledFromIr}; program->setBuildOptions(buildOptions.data()); const auto compileResult{program->recompile()}; ASSERT_EQ(CL_SUCCESS, compileResult); const std::string buildLog{program->getBuildLog(pClDevice->getRootDeviceIndex())}; const auto containsWarning{buildLog.find(CompilerWarnings::recompiledFromIr.data()) != std::string::npos}; EXPECT_FALSE(containsWarning); } TEST_F(ProgramTests, whenRebuildingProgramThenStoreDeviceBinaryProperly) { auto compilerInterface = new MockCompilerInterface(); pDevice->getExecutionEnvironment()->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->compilerInterface.reset(compilerInterface); auto compilerMain = new MockCIFMain(); compilerInterface->setIgcMain(compilerMain); compilerMain->setDefaultCreatorFunc(NEO::MockIgcOclDeviceCtx::Create); MockCompilerDebugVars debugVars = {}; char binaryToReturn[] = "abcdfghijklmnop"; debugVars.binaryToReturn = binaryToReturn; debugVars.binaryToReturnSize = sizeof(binaryToReturn); gEnvironment->igcPushDebugVars(debugVars); std::unique_ptr igcDebugVarsAutoPop{&gEnvironment, [](void *) { gEnvironment->igcPopDebugVars(); }}; auto program = clUniquePtr(new MockProgram(toClDeviceVector(*pClDevice))); uint32_t ir[16] = {0x03022307, 0x23471113, 0x17192329}; program->irBinary = makeCopy(ir, sizeof(ir)); program->irBinarySize = sizeof(ir); EXPECT_EQ(nullptr, program->buildInfos[rootDeviceIndex].unpackedDeviceBinary); EXPECT_EQ(0U, program->buildInfos[rootDeviceIndex].unpackedDeviceBinarySize); program->rebuildProgramFromIr(); ASSERT_NE(nullptr, program->buildInfos[rootDeviceIndex].unpackedDeviceBinary); ASSERT_EQ(sizeof(binaryToReturn), program->buildInfos[rootDeviceIndex].unpackedDeviceBinarySize); EXPECT_EQ(0, memcmp(binaryToReturn, program->buildInfos[rootDeviceIndex].unpackedDeviceBinary.get(), program->buildInfos[rootDeviceIndex].unpackedDeviceBinarySize)); } TEST_F(ProgramTests, givenProgramWhenInternalOptionsArePassedThenTheyAreAddedToProgramInternalOptions) { MockProgram program(toClDeviceVector(*pClDevice)); std::string buildOptions = NEO::CompilerOptions::gtpinRera.str(); std::string internalOptions; program.extractInternalOptions(buildOptions, internalOptions); EXPECT_STREQ(internalOptions.c_str(), NEO::CompilerOptions::gtpinRera.data()); } TEST_F(ProgramTests, givenProgramWhenUnknownInternalOptionsArePassedThenTheyAreNotAddedToProgramInternalOptions) { MockProgram program(toClDeviceVector(*pClDevice)); const char *internalOption = "-unknown-internal-options-123"; std::string buildOptions(internalOption); std::string internalOptions; program.extractInternalOptions(buildOptions, internalOptions); EXPECT_EQ(0u, internalOptions.length()); } TEST_F(ProgramTests, givenProgramWhenAllInternalOptionsArePassedMixedWithUnknownInputThenTheyAreParsedCorrectly) { MockProgram program(toClDeviceVector(*pClDevice)); std::string buildOptions = CompilerOptions::concatenate("###", CompilerOptions::gtpinRera, "###", CompilerOptions::greaterThan4gbBuffersRequired, "###"); std::string expectedOutput = CompilerOptions::concatenate(CompilerOptions::gtpinRera, CompilerOptions::greaterThan4gbBuffersRequired); std::string internalOptions; program.extractInternalOptions(buildOptions, internalOptions); EXPECT_EQ(expectedOutput, internalOptions); } TEST_F(ProgramTests, givenProgramWhenInternalOptionsArePassedWithValidValuesThenTheyAreAddedToProgramInternalOptions) { MockProgram program(toClDeviceVector(*pClDevice)); program.isFlagOptionOverride = false; program.isOptionValueValidOverride = true; std::string buildOptions = CompilerOptions::concatenate(CompilerOptions::gtpinRera, "someValue"); std::string internalOptions; program.extractInternalOptions(buildOptions, internalOptions); EXPECT_EQ(buildOptions, internalOptions) << internalOptions; } TEST_F(ProgramTests, givenProgramWhenInternalOptionsArePassedWithInvalidValuesThenTheyAreNotAddedToProgramInternalOptions) { MockProgram program(toClDeviceVector(*pClDevice)); program.isFlagOptionOverride = false; std::string buildOptions = CompilerOptions::concatenate(CompilerOptions::gtpinRera, "someValue"); std::string expectedOutput = ""; std::string internalOptions; program.extractInternalOptions(buildOptions, internalOptions); EXPECT_EQ(expectedOutput, internalOptions); program.isOptionValueValidOverride = true; buildOptions = std::string(CompilerOptions::gtpinRera); internalOptions.erase(); program.extractInternalOptions(buildOptions, internalOptions); EXPECT_EQ(expectedOutput, internalOptions); } TEST_F(ProgramTests, GivenInjectInternalBuildOptionsWhenBuildingProgramThenInternalOptionsWereAppended) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.InjectInternalBuildOptions.set("-abc"); auto cip = new MockCompilerInterfaceCaptureBuildOptions(); auto pDevice = pContext->getDevice(0); pDevice->getExecutionEnvironment()->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->compilerInterface.reset(cip); auto program = std::make_unique(toClDeviceVector(*pDevice)); program->sourceCode = "__kernel mock() {}"; program->createdFrom = Program::CreatedFrom::SOURCE; cl_int retVal = program->build(program->getDevices(), "", false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(CompilerOptions::contains(cip->buildInternalOptions, "-abc")) << cip->buildInternalOptions; } TEST_F(ProgramTests, GivenInjectInternalBuildOptionsWhenBuildingBuiltInProgramThenInternalOptionsAreNotAppended) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.InjectInternalBuildOptions.set("-abc"); auto cip = new MockCompilerInterfaceCaptureBuildOptions(); auto pDevice = pContext->getDevice(0); pDevice->getExecutionEnvironment()->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->compilerInterface.reset(cip); auto program = std::make_unique(toClDeviceVector(*pDevice)); program->sourceCode = "__kernel mock() {}"; program->createdFrom = Program::CreatedFrom::SOURCE; program->isBuiltIn = true; cl_int retVal = program->build(program->getDevices(), "", false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(CompilerOptions::contains(cip->buildInternalOptions, "-abc")) << cip->buildInternalOptions; } TEST_F(ProgramTests, GivenInjectInternalBuildOptionsWhenCompilingProgramThenInternalOptionsWereAppended) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.InjectInternalBuildOptions.set("-abc"); auto cip = new MockCompilerInterfaceCaptureBuildOptions(); auto pDevice = pContext->getDevice(0); pDevice->getExecutionEnvironment()->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->compilerInterface.reset(cip); auto program = std::make_unique(toClDeviceVector(*pDevice)); program->sourceCode = "__kernel mock() {}"; program->createdFrom = Program::CreatedFrom::SOURCE; cl_int retVal = program->compile(program->getDevices(), nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(CompilerOptions::contains(cip->buildInternalOptions, "-abc")) << cip->buildInternalOptions; } TEST_F(ProgramTests, GivenInjectInternalBuildOptionsWhenCompilingBuiltInProgramThenInternalOptionsAreNotAppended) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.InjectInternalBuildOptions.set("-abc"); auto cip = new MockCompilerInterfaceCaptureBuildOptions(); auto pDevice = pContext->getDevice(0); pDevice->getExecutionEnvironment()->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->compilerInterface.reset(cip); auto program = std::make_unique(toClDeviceVector(*pDevice)); program->sourceCode = "__kernel mock() {}"; program->createdFrom = Program::CreatedFrom::SOURCE; program->isBuiltIn = true; cl_int retVal = program->compile(program->getDevices(), nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(CompilerOptions::contains(cip->buildInternalOptions, "-abc")) << cip->buildInternalOptions; } class AdditionalOptionsMockProgram : public MockProgram { public: using MockProgram::MockProgram; void applyAdditionalOptions(std::string &internalOptions) override { applyAdditionalOptionsCalled++; MockProgram::applyAdditionalOptions(internalOptions); } uint32_t applyAdditionalOptionsCalled = 0; }; TEST_F(ProgramTests, givenProgramWhenBuiltThenAdditionalOptionsAreApplied) { AdditionalOptionsMockProgram program(toClDeviceVector(*pClDevice)); program.build(program.getDevices(), nullptr, false); EXPECT_EQ(1u, program.applyAdditionalOptionsCalled); } TEST(CreateProgramFromBinaryTests, givenBinaryProgramBuiltInWhenKernelRebulildIsForcedThenDeviceBinaryIsNotUsed) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.RebuildPrecompiledKernels.set(true); cl_int retVal = CL_INVALID_BINARY; PatchTokensTestData::ValidEmptyProgram programTokens; auto clDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); std::unique_ptr pProgram(Program::createBuiltInFromGenBinary(nullptr, toClDeviceVector(*clDevice), programTokens.storage.data(), programTokens.storage.size(), &retVal)); ASSERT_NE(nullptr, pProgram.get()); EXPECT_EQ(CL_SUCCESS, retVal); auto rootDeviceIndex = clDevice->getRootDeviceIndex(); retVal = pProgram->createProgramFromBinary(programTokens.storage.data(), programTokens.storage.size(), *clDevice); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, pProgram->buildInfos[rootDeviceIndex].unpackedDeviceBinary.get()); EXPECT_EQ(0U, pProgram->buildInfos[rootDeviceIndex].unpackedDeviceBinarySize); EXPECT_EQ(nullptr, pProgram->buildInfos[rootDeviceIndex].packedDeviceBinary); EXPECT_EQ(0U, pProgram->buildInfos[rootDeviceIndex].packedDeviceBinarySize); } TEST(CreateProgramFromBinaryTests, givenBinaryProgramBuiltInWhenKernelRebulildIsForcedThenRebuildWarningIsEnabled) { DebugManagerStateRestore dbgRestorer{}; DebugManager.flags.RebuildPrecompiledKernels.set(true); PatchTokensTestData::ValidEmptyProgram programTokens; cl_int retVal{CL_INVALID_BINARY}; const auto clDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); std::unique_ptr pProgram(Program::createBuiltInFromGenBinary(nullptr, toClDeviceVector(*clDevice), programTokens.storage.data(), programTokens.storage.size(), &retVal)); ASSERT_NE(nullptr, pProgram.get()); ASSERT_EQ(CL_SUCCESS, retVal); retVal = pProgram->createProgramFromBinary(programTokens.storage.data(), programTokens.storage.size(), *clDevice); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_TRUE(pProgram->shouldWarnAboutRebuild); } TEST(CreateProgramFromBinaryTests, givenBinaryProgramNotBuiltInWhenBuiltInKernelRebulildIsForcedThenDeviceBinaryIsUsed) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.RebuildPrecompiledKernels.set(true); cl_int retVal = CL_INVALID_BINARY; PatchTokensTestData::ValidEmptyProgram programTokens; const unsigned char *binaries[] = {programTokens.storage.data()}; size_t lengths[] = {programTokens.storage.size()}; auto clDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); std::unique_ptr pProgram(Program::create( nullptr, toClDeviceVector(*clDevice), lengths, binaries, nullptr, retVal)); ASSERT_NE(nullptr, pProgram.get()); EXPECT_EQ(CL_SUCCESS, retVal); auto rootDeviceIndex = clDevice->getRootDeviceIndex(); EXPECT_NE(nullptr, pProgram->buildInfos[rootDeviceIndex].unpackedDeviceBinary.get()); EXPECT_LT(0U, pProgram->buildInfos[rootDeviceIndex].unpackedDeviceBinarySize); EXPECT_NE(nullptr, pProgram->buildInfos[rootDeviceIndex].packedDeviceBinary); EXPECT_LT(0U, pProgram->buildInfos[rootDeviceIndex].packedDeviceBinarySize); } TEST(CreateProgramFromBinaryTests, givenBinaryProgramWhenKernelRebulildIsNotForcedThenDeviceBinaryIsUsed) { cl_int retVal = CL_INVALID_BINARY; PatchTokensTestData::ValidEmptyProgram programTokens; auto clDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); std::unique_ptr pProgram(Program::createBuiltInFromGenBinary(nullptr, toClDeviceVector(*clDevice), programTokens.storage.data(), programTokens.storage.size(), &retVal)); ASSERT_NE(nullptr, pProgram.get()); EXPECT_EQ(CL_SUCCESS, retVal); auto rootDeviceIndex = clDevice->getRootDeviceIndex(); retVal = pProgram->createProgramFromBinary(programTokens.storage.data(), programTokens.storage.size(), *clDevice); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, reinterpret_cast(pProgram->buildInfos[rootDeviceIndex].unpackedDeviceBinary.get())); EXPECT_EQ(programTokens.storage.size(), pProgram->buildInfos[rootDeviceIndex].unpackedDeviceBinarySize); EXPECT_NE(nullptr, reinterpret_cast(pProgram->buildInfos[rootDeviceIndex].packedDeviceBinary.get())); EXPECT_EQ(programTokens.storage.size(), pProgram->buildInfos[rootDeviceIndex].packedDeviceBinarySize); } struct SpecializationConstantProgramMock : public MockProgram { using MockProgram::MockProgram; cl_int updateSpecializationConstant(cl_uint specId, size_t specSize, const void *specValue) override { return CL_SUCCESS; } }; struct SpecializationConstantCompilerInterfaceMock : public CompilerInterface { TranslationOutput::ErrorCode retVal = TranslationOutput::ErrorCode::Success; int counter = 0; const char *spirV = nullptr; TranslationOutput::ErrorCode getSpecConstantsInfo(const NEO::Device &device, ArrayRef srcSpirV, SpecConstantInfo &output) override { counter++; spirV = srcSpirV.begin(); return retVal; } void returnError() { retVal = TranslationOutput::ErrorCode::CompilationFailure; } }; struct SpecializationConstantRootDeviceEnvironemnt : public RootDeviceEnvironment { SpecializationConstantRootDeviceEnvironemnt(ExecutionEnvironment &executionEnvironment) : RootDeviceEnvironment(executionEnvironment) { compilerInterface.reset(new SpecializationConstantCompilerInterfaceMock()); } CompilerInterface *getCompilerInterface() override { return compilerInterface.get(); } bool initAilConfiguration() override { return true; } }; struct setProgramSpecializationConstantTests : public ::testing::Test { setProgramSpecializationConstantTests() : device(new MockDevice()) {} void SetUp() override { mockCompiler = new SpecializationConstantCompilerInterfaceMock(); auto rootDeviceEnvironment = device.getExecutionEnvironment()->rootDeviceEnvironments[0].get(); rootDeviceEnvironment->compilerInterface.reset(mockCompiler); mockProgram.reset(new SpecializationConstantProgramMock(toClDeviceVector(device))); mockProgram->isSpirV = true; EXPECT_FALSE(mockProgram->areSpecializationConstantsInitialized); EXPECT_EQ(0, mockCompiler->counter); } SpecializationConstantCompilerInterfaceMock *mockCompiler = nullptr; std::unique_ptr mockProgram; MockClDevice device; int specValue = 1; }; TEST_F(setProgramSpecializationConstantTests, whenSetProgramSpecializationConstantThenBinarySourceIsUsed) { auto retVal = mockProgram->setProgramSpecializationConstant(1, sizeof(int), &specValue); EXPECT_EQ(1, mockCompiler->counter); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(mockProgram->areSpecializationConstantsInitialized); EXPECT_EQ(mockProgram->irBinary.get(), mockCompiler->spirV); } TEST_F(setProgramSpecializationConstantTests, whenSetProgramSpecializationConstantMultipleTimesThenSpecializationConstantsAreInitializedOnce) { auto retVal = mockProgram->setProgramSpecializationConstant(1, sizeof(int), &specValue); EXPECT_EQ(1, mockCompiler->counter); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(mockProgram->areSpecializationConstantsInitialized); retVal = mockProgram->setProgramSpecializationConstant(1, sizeof(int), &specValue); EXPECT_EQ(1, mockCompiler->counter); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(mockProgram->areSpecializationConstantsInitialized); } TEST_F(setProgramSpecializationConstantTests, givenInvalidGetSpecConstantsInfoReturnValueWhenSetProgramSpecializationConstantThenErrorIsReturned) { mockCompiler->returnError(); auto retVal = mockProgram->setProgramSpecializationConstant(1, sizeof(int), &specValue); EXPECT_EQ(1, mockCompiler->counter); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_FALSE(mockProgram->areSpecializationConstantsInitialized); } TEST(setProgramSpecializationConstantTest, givenUninitializedCompilerinterfaceWhenSetProgramSpecializationConstantThenErrorIsReturned) { auto executionEnvironment = new MockExecutionEnvironment(); executionEnvironment->rootDeviceEnvironments[0] = std::make_unique(*executionEnvironment); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); MockClDevice mockDevice(new MockDevice{executionEnvironment, 0}); SpecializationConstantProgramMock mockProgram(toClDeviceVector(mockDevice)); mockProgram.isSpirV = true; int specValue = 1; auto retVal = mockProgram.setProgramSpecializationConstant(1, sizeof(int), &specValue); EXPECT_EQ(CL_OUT_OF_HOST_MEMORY, retVal); } using ProgramBinTest = Test; TEST_F(ProgramBinTest, givenPrintProgramBinaryProcessingTimeSetWhenBuildProgramThenProcessingTimeIsPrinted) { DebugManagerStateRestore restorer; DebugManager.flags.PrintProgramBinaryProcessingTime.set(true); testing::internal::CaptureStdout(); CreateProgramFromBinary(pContext, pContext->getDevices(), "kernel_data_param"); auto retVal = pProgram->build( pProgram->getDevices(), nullptr, false); auto output = testing::internal::GetCapturedStdout(); EXPECT_FALSE(output.compare(0, 14, "Elapsed time: ")); EXPECT_EQ(CL_SUCCESS, retVal); } struct DebugDataGuard { DebugDataGuard(const DebugDataGuard &) = delete; DebugDataGuard(DebugDataGuard &&) = delete; DebugDataGuard() { for (size_t n = 0; n < sizeof(mockDebugData); n++) { mockDebugData[n] = (char)n; } auto vars = NEO::getIgcDebugVars(); vars.debugDataToReturn = mockDebugData; vars.debugDataToReturnSize = sizeof(mockDebugData); NEO::setIgcDebugVars(vars); } ~DebugDataGuard() { auto vars = NEO::getIgcDebugVars(); vars.debugDataToReturn = nullptr; vars.debugDataToReturnSize = 0; NEO::setIgcDebugVars(vars); } char mockDebugData[32]; }; TEST_F(ProgramBinTest, GivenBuildWithDebugDataThenBuildDataAvailableViaGetInfo) { DebugDataGuard debugDataGuard; const char *sourceCode = "__kernel void\nCB(\n__global unsigned int* src, __global unsigned int* dst)\n{\nint id = (int)get_global_id(0);\ndst[id] = src[id];\n}\n"; pProgram = Program::create( pContext, 1, &sourceCode, &knownSourceSize, retVal); retVal = pProgram->build(pProgram->getDevices(), nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); // Verify size_t debugDataSize = 0; retVal = pProgram->getInfo(CL_PROGRAM_DEBUG_INFO_SIZES_INTEL, sizeof(debugDataSize), &debugDataSize, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); std::unique_ptr debugData{new char[debugDataSize]}; for (size_t n = 0; n < sizeof(debugData); n++) { debugData[n] = 0; } char *pDebugData = &debugData[0]; size_t retData = 0; bool isOK = true; retVal = pProgram->getInfo(CL_PROGRAM_DEBUG_INFO_INTEL, 1, &pDebugData, &retData); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = pProgram->getInfo(CL_PROGRAM_DEBUG_INFO_INTEL, debugDataSize, &pDebugData, &retData); EXPECT_EQ(CL_SUCCESS, retVal); cl_uint numDevices; retVal = clGetProgramInfo(pProgram, CL_PROGRAM_NUM_DEVICES, sizeof(numDevices), &numDevices, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(numDevices * sizeof(debugData), retData); // Check integrity of returned debug data for (size_t n = 0; n < debugDataSize; n++) { if (debugData[n] != (char)n) { isOK = false; break; } } EXPECT_TRUE(isOK); for (size_t n = debugDataSize; n < sizeof(debugData); n++) { if (debugData[n] != (char)0) { isOK = false; break; } } EXPECT_TRUE(isOK); retData = 0; retVal = pProgram->getInfo(CL_PROGRAM_DEBUG_INFO_INTEL, debugDataSize, nullptr, &retData); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(numDevices * sizeof(debugData), retData); } TEST_F(ProgramBinTest, givenNoDebugDataAvailableThenDebugDataIsNotAvailableViaGetInfo) { const char *sourceCode = "__kernel void\nCB(\n__global unsigned int* src, __global unsigned int* dst)\n{\nint id = (int)get_global_id(0);\ndst[id] = src[id];\n}\n"; pProgram = Program::create( pContext, 1, &sourceCode, &knownSourceSize, retVal); EXPECT_EQ(0u, pProgram->buildInfos[rootDeviceIndex].debugDataSize); EXPECT_EQ(nullptr, pProgram->buildInfos[rootDeviceIndex].debugData); size_t debugDataSize = 0; retVal = pProgram->getInfo(CL_PROGRAM_DEBUG_INFO_SIZES_INTEL, sizeof(debugDataSize), &debugDataSize, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, debugDataSize); cl_uint numDevices; retVal = clGetProgramInfo(pProgram, CL_PROGRAM_NUM_DEVICES, sizeof(numDevices), &numDevices, nullptr); debugDataSize = numDevices * sizeof(void **); std::unique_ptr debugData{new char[debugDataSize]}; for (size_t n = 0; n < sizeof(debugData); n++) { debugData[n] = 0; } char *pDebugData = &debugData[0]; size_t retData = 0; retVal = pProgram->getInfo(CL_PROGRAM_DEBUG_INFO_INTEL, debugDataSize, &pDebugData, &retData); EXPECT_EQ(CL_SUCCESS, retVal); for (size_t n = 0; n < sizeof(debugData); n++) { EXPECT_EQ(0, debugData[n]); } } TEST_F(ProgramBinTest, GivenDebugDataAvailableWhenLinkingProgramThenDebugDataIsStoredInProgram) { DebugDataGuard debugDataGuard; const char *sourceCode = "__kernel void\nCB(\n__global unsigned int* src, __global unsigned int* dst)\n{\nint id = (int)get_global_id(0);\ndst[id] = src[id];\n}\n"; pProgram = Program::create( pContext, 1, &sourceCode, &knownSourceSize, retVal); retVal = pProgram->compile(pProgram->getDevices(), nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); cl_program programToLink = pProgram; retVal = pProgram->link(pProgram->getDevices(), nullptr, 1, &programToLink); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, pProgram->getDebugData(rootDeviceIndex)); } using ProgramMultiRootDeviceTests = MultiRootDeviceFixture; TEST_F(ProgramMultiRootDeviceTests, WhenProgramIsCreatedThenBuildInfosVectorIsProperlyResized) { { ClDeviceVector deviceVector; deviceVector.push_back(device1); deviceVector.push_back(device2); EXPECT_EQ(1u, deviceVector[0]->getRootDeviceIndex()); auto program = std::make_unique(context.get(), false, deviceVector); EXPECT_EQ(3u, program->buildInfos.size()); } { ClDeviceVector deviceVector; deviceVector.push_back(device2); deviceVector.push_back(device1); EXPECT_EQ(2u, deviceVector[0]->getRootDeviceIndex()); auto program = std::make_unique(context.get(), false, deviceVector); EXPECT_EQ(3u, program->buildInfos.size()); } } class MockCompilerInterfaceWithGtpinParam : public CompilerInterface { public: TranslationOutput::ErrorCode link( const NEO::Device &device, const TranslationInput &input, TranslationOutput &output) override { gtpinInfoPassed = input.GTPinInput; return CompilerInterface::link(device, input, output); } void *gtpinInfoPassed; }; TEST_F(ProgramBinTest, GivenSourceKernelWhenLinkingProgramThenGtpinInitInfoIsPassed) { void *pIgcInitPtr = reinterpret_cast(0x1234); gtpinSetIgcInit(pIgcInitPtr); const char *sourceCode = "__kernel void\nCB(\n__global unsigned int* src, __global unsigned int* dst)\n{\nint id = (int)get_global_id(0);\ndst[id] = src[id];\n}\n"; pProgram = Program::create( pContext, 1, &sourceCode, &knownSourceSize, retVal); std::unique_ptr mockCompilerInterface(new MockCompilerInterfaceWithGtpinParam); retVal = pProgram->compile(pProgram->getDevices(), nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); pDevice->getExecutionEnvironment()->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->compilerInterface.reset(mockCompilerInterface.get()); cl_program programToLink = pProgram; retVal = pProgram->link(pProgram->getDevices(), nullptr, 1, &programToLink); EXPECT_EQ(pIgcInitPtr, mockCompilerInterface->gtpinInfoPassed); mockCompilerInterface.release(); } TEST(ProgramReplaceDeviceBinary, GivenBinaryZebinThenUseAsBothPackedAndUnpackedBinaryContainer) { ZebinTestData::ValidEmptyProgram zebin; std::unique_ptr src = makeCopy(zebin.storage.data(), zebin.storage.size()); MockContext context; auto device = context.getDevice(0); auto rootDeviceIndex = device->getRootDeviceIndex(); MockProgram program{&context, false, toClDeviceVector(*device)}; program.replaceDeviceBinary(std::move(src), zebin.storage.size(), rootDeviceIndex); ASSERT_EQ(zebin.storage.size(), program.buildInfos[rootDeviceIndex].packedDeviceBinarySize); ASSERT_EQ(zebin.storage.size(), program.buildInfos[rootDeviceIndex].unpackedDeviceBinarySize); ASSERT_NE(nullptr, program.buildInfos[rootDeviceIndex].packedDeviceBinary); ASSERT_NE(nullptr, program.buildInfos[rootDeviceIndex].unpackedDeviceBinary); EXPECT_EQ(0, memcmp(program.buildInfos[rootDeviceIndex].packedDeviceBinary.get(), zebin.storage.data(), program.buildInfos[rootDeviceIndex].packedDeviceBinarySize)); EXPECT_EQ(0, memcmp(program.buildInfos[rootDeviceIndex].unpackedDeviceBinary.get(), zebin.storage.data(), program.buildInfos[rootDeviceIndex].unpackedDeviceBinarySize)); } TEST(ProgramCallbackTest, whenFunctionIsNullptrThenUserDataNeedsToBeNullptr) { void *userData = nullptr; EXPECT_TRUE(Program::isValidCallback(nullptr, nullptr)); EXPECT_FALSE(Program::isValidCallback(nullptr, &userData)); } void CL_CALLBACK callbackFuncProgram( cl_program program, void *userData) { *reinterpret_cast(userData) = true; } TEST(ProgramCallbackTest, whenFunctionIsNotNullptrThenUserDataDoesntMatter) { void *userData = nullptr; EXPECT_TRUE(Program::isValidCallback(callbackFuncProgram, nullptr)); EXPECT_TRUE(Program::isValidCallback(callbackFuncProgram, &userData)); } TEST(ProgramCallbackTest, whenInvokeCallbackIsCalledThenFunctionIsProperlyInvoked) { bool functionCalled = false; MockContext context; MockProgram program{&context, false, context.getDevices()}; program.invokeCallback(callbackFuncProgram, &functionCalled); EXPECT_TRUE(functionCalled); program.invokeCallback(nullptr, nullptr); } TEST(BuildProgramTest, givenMultiDeviceProgramWhenBuildingThenStoreAndProcessBinaryOnlyOncePerRootDevice) { MockProgram *pProgram = nullptr; std::unique_ptr pSource = nullptr; size_t sourceSize = 0; std::string testFile; KernelBinaryHelper kbHelper("CopyBuffer_simd16"); testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); pSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pSource); const char *sources[1] = {pSource.get()}; MockUnrestrictiveContextMultiGPU context; cl_int retVal = CL_INVALID_PROGRAM; pProgram = Program::create( &context, 1, sources, &sourceSize, retVal); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); cl_build_status buildStatus; for (const auto &device : context.getDevices()) { retVal = clGetProgramBuildInfo(pProgram, device, CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_BUILD_NONE, buildStatus); } retVal = clBuildProgram( pProgram, 0, nullptr, nullptr, nullptr, nullptr); for (auto &rootDeviceIndex : context.getRootDeviceIndices()) { EXPECT_EQ(1, pProgram->replaceDeviceBinaryCalledPerRootDevice[rootDeviceIndex]); EXPECT_EQ(1, pProgram->processGenBinaryCalledPerRootDevice[rootDeviceIndex]); } ASSERT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(BuildProgramTest, givenMultiDeviceProgramWhenBuildingThenStoreKernelInfoPerEachRootDevice) { MockProgram *pProgram = nullptr; std::unique_ptr pSource = nullptr; size_t sourceSize = 0; std::string testFile; KernelBinaryHelper kbHelper("CopyBuffer_simd16"); testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); pSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pSource); const char *sources[1] = {pSource.get()}; MockUnrestrictiveContextMultiGPU context; cl_int retVal = CL_INVALID_PROGRAM; pProgram = Program::create( &context, 1, sources, &sourceSize, retVal); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); cl_build_status buildStatus; for (const auto &device : context.getDevices()) { retVal = clGetProgramBuildInfo(pProgram, device, CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_BUILD_NONE, buildStatus); } retVal = clBuildProgram( pProgram, 0, nullptr, nullptr, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); for (auto &rootDeviceIndex : context.getRootDeviceIndices()) { EXPECT_LT(0u, pProgram->getNumKernels()); for (auto i = 0u; i < pProgram->getNumKernels(); i++) { EXPECT_NE(nullptr, pProgram->getKernelInfo(i, rootDeviceIndex)); } } retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(ProgramTest, whenProgramIsBuiltAsAnExecutableForAtLeastOneDeviceThenIsBuiltMethodReturnsTrue) { MockSpecializedContext context; MockProgram program(&context, false, context.getDevices()); EXPECT_FALSE(program.isBuilt()); program.deviceBuildInfos[context.getDevice(0)].buildStatus = CL_BUILD_SUCCESS; program.deviceBuildInfos[context.getDevice(0)].programBinaryType = CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT; program.deviceBuildInfos[context.getDevice(1)].buildStatus = CL_BUILD_ERROR; EXPECT_FALSE(program.isBuilt()); program.deviceBuildInfos[context.getDevice(0)].buildStatus = CL_BUILD_SUCCESS; program.deviceBuildInfos[context.getDevice(0)].programBinaryType = CL_PROGRAM_BINARY_TYPE_EXECUTABLE; EXPECT_TRUE(program.isBuilt()); } TEST(ProgramTest, givenUnlockedProgramWhenRetainForKernelIsCalledThenProgramIsLocked) { MockSpecializedContext context; MockProgram program(&context, false, context.getDevices()); EXPECT_FALSE(program.isLocked()); program.retainForKernel(); EXPECT_TRUE(program.isLocked()); } TEST(ProgramTest, givenLockedProgramWhenReleasingForKernelIsCalledForEachRetainThenProgramIsUnlocked) { MockSpecializedContext context; MockProgram program(&context, false, context.getDevices()); EXPECT_FALSE(program.isLocked()); program.retainForKernel(); EXPECT_TRUE(program.isLocked()); program.retainForKernel(); EXPECT_TRUE(program.isLocked()); program.releaseForKernel(); EXPECT_TRUE(program.isLocked()); program.releaseForKernel(); EXPECT_FALSE(program.isLocked()); } TEST_F(ProgramTests, givenValidZebinWithKernelCallingExternalFunctionThenUpdateKernelsBarrierCount) { ZebinTestData::ZebinWithExternalFunctionsInfo zebin; auto program = std::make_unique(nullptr, false, toClDeviceVector(*pClDevice)); program->buildInfos[rootDeviceIndex].unpackedDeviceBinary = makeCopy(zebin.storage.data(), zebin.storage.size()); program->buildInfos[rootDeviceIndex].unpackedDeviceBinarySize = zebin.storage.size(); auto retVal = program->processGenBinary(*pClDevice); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(2U, program->buildInfos[rootDeviceIndex].kernelInfoArray.size()); auto &kernelInfo = program->buildInfos[rootDeviceIndex].kernelInfoArray[0]; EXPECT_EQ(zebin.barrierCount, kernelInfo->kernelDescriptor.kernelAttributes.barrierCount); } compute-runtime-22.14.22890/opencl/test/unit_test/program/program_tests.h000066400000000000000000000010171422164147700263370ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "gtest/gtest.h" #include class ProgramTests : public NEO::ClDeviceFixture, public ::testing::Test, public NEO::ContextFixture { using NEO::ContextFixture::SetUp; public: void SetUp() override; void TearDown() override; }; compute-runtime-22.14.22890/opencl/test/unit_test/program/program_with_kernel_debug_tests.cpp000066400000000000000000000417221422164147700324420ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/kernel_binary_helper.h" #include "shared/test/common/helpers/kernel_filename_helper.h" #include "shared/test/common/libult/global_environment.h" #include "shared/test/common/mocks/mock_source_level_debugger.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/fixtures/program_fixture.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "opencl/test/unit_test/program/program_from_binary.h" #include "opencl/test/unit_test/program/program_tests.h" #include "compiler_options.h" #include "gtest/gtest.h" #include #include #include #include using namespace NEO; TEST_F(ProgramTests, givenDeafultProgramObjectWhenKernelDebugEnabledIsQueriedThenFalseIsReturned) { MockProgram program(pContext, false, toClDeviceVector(*pClDevice)); EXPECT_FALSE(program.isKernelDebugEnabled()); } TEST_F(ProgramTests, givenProgramObjectWhenEnableKernelDebugIsCalledThenProgramHasKernelDebugEnabled) { MockProgram program(pContext, false, toClDeviceVector(*pClDevice)); program.enableKernelDebug(); EXPECT_TRUE(program.isKernelDebugEnabled()); } TEST(ProgramFromBinary, givenBinaryWithDebugDataWhenCreatingProgramFromBinaryThenDebugDataIsAvailable) { if (!defaultHwInfo->capabilityTable.debuggerSupported) { GTEST_SKIP(); } std::string filePath; retrieveBinaryKernelFilename(filePath, "-cl-kernel-debug-enable_", ".bin"); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto program = std::make_unique(toClDeviceVector(*device)); program->enableKernelDebug(); size_t binarySize = 0; auto pBinary = loadDataFromFile(filePath.c_str(), binarySize); cl_int retVal = program->createProgramFromBinary(pBinary.get(), binarySize, *device); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, program->getDebugData(device->getRootDeviceIndex())); EXPECT_NE(0u, program->getDebugDataSize(device->getRootDeviceIndex())); } class ProgramWithKernelDebuggingTest : public ProgramFixture, public ::testing::Test { public: void SetUp() override { pDevice = static_cast(&mockContext.getDevice(0)->getDevice()); if (!pDevice->getHardwareInfo().capabilityTable.debuggerSupported) { GTEST_SKIP(); } std::string filename; std::string kernelOption(CompilerOptions::debugKernelEnable); KernelFilenameHelper::getKernelFilenameFromInternalOption(kernelOption, filename); kbHelper = std::make_unique(filename, false); CreateProgramWithSource( &mockContext, "copybuffer.cl"); pProgram->enableKernelDebug(); } void TearDown() override { ProgramFixture::TearDown(); } std::unique_ptr kbHelper; MockUnrestrictiveContext mockContext; MockDevice *pDevice = nullptr; }; TEST_F(ProgramWithKernelDebuggingTest, givenEnabledKernelDebugWhenProgramIsCompiledThenInternalOptionsIncludeDebugFlag) { std::string receivedInternalOptions; auto debugVars = NEO::getFclDebugVars(); debugVars.receivedInternalOptionsOutput = &receivedInternalOptions; gEnvironment->fclPushDebugVars(debugVars); cl_int retVal = pProgram->compile(pProgram->getDevices(), nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(CompilerOptions::contains(receivedInternalOptions, CompilerOptions::debugKernelEnable)) << receivedInternalOptions; gEnvironment->fclPopDebugVars(); } TEST_F(ProgramWithKernelDebuggingTest, givenEnabledKernelDebugWhenProgramIsCompiledThenInternalOptionsIncludeDashGFlag) { cl_int retVal = pProgram->compile(pProgram->getDevices(), nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(hasSubstr(pProgram->getOptions(), "-g")); } TEST_F(ProgramWithKernelDebuggingTest, givenEnabledKernelDebugAndOptDisabledWhenProgramIsCompiledThenOptionsIncludeClOptDisableFlag) { MockActiveSourceLevelDebugger *sourceLevelDebugger = new MockActiveSourceLevelDebugger; sourceLevelDebugger->isOptDisabled = true; pDevice->executionEnvironment->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->debugger.reset(sourceLevelDebugger); cl_int retVal = pProgram->compile(pProgram->getDevices(), nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(hasSubstr(pProgram->getOptions(), CompilerOptions::optDisable.data())); } TEST_F(ProgramWithKernelDebuggingTest, GivenDebugVarDebuggerOptDisableZeroWhenOptDisableIsTrueFromDebuggerThenOptDisableIsNotAdded) { DebugManagerStateRestore dgbRestorer; NEO::DebugManager.flags.DebuggerOptDisable.set(0); MockActiveSourceLevelDebugger *sourceLevelDebugger = new MockActiveSourceLevelDebugger; sourceLevelDebugger->isOptDisabled = true; pDevice->executionEnvironment->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->debugger.reset(sourceLevelDebugger); cl_int retVal = pProgram->compile(pProgram->getDevices(), nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(hasSubstr(pProgram->getOptions(), CompilerOptions::optDisable.data())); } TEST_F(ProgramWithKernelDebuggingTest, GivenDebugVarDebuggerOptDisableOneWhenOptDisableIsFalseFromDebuggerThenOptDisableIsAdded) { DebugManagerStateRestore dgbRestorer; NEO::DebugManager.flags.DebuggerOptDisable.set(1); MockActiveSourceLevelDebugger *sourceLevelDebugger = new MockActiveSourceLevelDebugger; sourceLevelDebugger->isOptDisabled = false; pDevice->executionEnvironment->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->debugger.reset(sourceLevelDebugger); cl_int retVal = pProgram->compile(pProgram->getDevices(), nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(hasSubstr(pProgram->getOptions(), CompilerOptions::optDisable.data())); } TEST_F(ProgramWithKernelDebuggingTest, givenEnabledKernelDebugWhenProgramIsCompiledThenOptionsStartsWithDashSFilename) { MockActiveSourceLevelDebugger *sourceLevelDebugger = new MockActiveSourceLevelDebugger; sourceLevelDebugger->sourceCodeFilename = "debugFileName"; pDevice->executionEnvironment->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->debugger.reset(sourceLevelDebugger); cl_int retVal = pProgram->compile(pProgram->getDevices(), nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(startsWith(pProgram->getOptions(), "-s debugFileName")); } TEST_F(ProgramWithKernelDebuggingTest, givenEnabledKernelDebugWhenProgramIsCompiledWithCmCOptionThenDashSFilenameIsNotPrepended) { MockActiveSourceLevelDebugger *sourceLevelDebugger = new MockActiveSourceLevelDebugger; sourceLevelDebugger->sourceCodeFilename = "debugFileName"; pDevice->executionEnvironment->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->debugger.reset(sourceLevelDebugger); char options[] = "-cmc -cl-opt-disable"; cl_int retVal = pProgram->compile(pProgram->getDevices(), options, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(startsWith(pProgram->getOptions(), "-s debugFileName")); EXPECT_TRUE(hasSubstr(pProgram->getOptions(), CompilerOptions::optDisable.data())); } TEST_F(ProgramWithKernelDebuggingTest, givenEnabledKernelDebugWhenProgramIsBuiltThenInternalOptionsIncludeDebugFlag) { std::string receivedInternalOptions; auto debugVars = NEO::getFclDebugVars(); debugVars.receivedInternalOptionsOutput = &receivedInternalOptions; gEnvironment->fclPushDebugVars(debugVars); cl_int retVal = pProgram->build(pProgram->getDevices(), nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(CompilerOptions::contains(receivedInternalOptions, CompilerOptions::debugKernelEnable)) << receivedInternalOptions; gEnvironment->fclPopDebugVars(); } TEST_F(ProgramWithKernelDebuggingTest, givenEnabledKernelDebugWhenProgramIsBuiltThenOptionsIncludeDashGFlag) { cl_int retVal = pProgram->build(pProgram->getDevices(), nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(hasSubstr(pProgram->getOptions(), "-g")); } TEST_F(ProgramWithKernelDebuggingTest, givenEnabledKernelDebugAndOptDisabledWhenProgramIsBuiltThenOptionsIncludeClOptDisableFlag) { MockActiveSourceLevelDebugger *sourceLevelDebugger = new MockActiveSourceLevelDebugger; sourceLevelDebugger->isOptDisabled = true; pDevice->executionEnvironment->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->debugger.reset(sourceLevelDebugger); cl_int retVal = pProgram->build(pProgram->getDevices(), nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(hasSubstr(pProgram->getOptions(), CompilerOptions::optDisable.data())); } TEST_F(ProgramWithKernelDebuggingTest, givenEnabledKernelDebugWhenProgramIsBuiltThenOptionsStartsWithDashSFilename) { MockActiveSourceLevelDebugger *sourceLevelDebugger = new MockActiveSourceLevelDebugger; sourceLevelDebugger->sourceCodeFilename = "debugFileName"; pDevice->executionEnvironment->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->debugger.reset(sourceLevelDebugger); cl_int retVal = pProgram->build(pProgram->getDevices(), nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(startsWith(pProgram->getOptions(), "-s debugFileName")); } TEST_F(ProgramWithKernelDebuggingTest, givenEnabledKernelDebugWhenProgramIsBuiltWithCmCOptionThenDashSFilenameIsNotPrepended) { MockActiveSourceLevelDebugger *sourceLevelDebugger = new MockActiveSourceLevelDebugger; sourceLevelDebugger->sourceCodeFilename = "debugFileName"; pDevice->executionEnvironment->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->debugger.reset(sourceLevelDebugger); char options[] = "-cmc -cl-opt-disable"; cl_int retVal = pProgram->build(pProgram->getDevices(), options, false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(startsWith(pProgram->getOptions(), "-s debugFileName")); } TEST_F(ProgramWithKernelDebuggingTest, givenEnabledKernelDebugWhenProgramIsLinkedThenKernelDebugOptionsAreAppended) { MockActiveSourceLevelDebugger *sourceLevelDebugger = new MockActiveSourceLevelDebugger; pDevice->executionEnvironment->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->debugger.reset(sourceLevelDebugger); cl_int retVal = pProgram->compile(pProgram->getDevices(), nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); auto program = std::unique_ptr(new MockProgramAppendKernelDebugOptions(&mockContext, false, mockContext.getDevices())); program->enableKernelDebug(); cl_program clProgramToLink = pProgram; retVal = program->link(pProgram->getDevices(), nullptr, 1, &clProgramToLink); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(static_cast(mockContext.getRootDeviceIndices().size()), program->appendKernelDebugOptionsCalled); } TEST_F(ProgramWithKernelDebuggingTest, givenEnabledKernelDebugWhenProgramIsBuiltThenDebuggerIsNotifiedWithKernelDebugData) { const size_t rootDeviceIndicesSize = mockContext.getRootDeviceIndices().size(); std::vector sourceLevelDebugger(rootDeviceIndicesSize, nullptr); size_t i = 0; for (auto &rootDeviceIndex : mockContext.getRootDeviceIndices()) { sourceLevelDebugger[i] = new MockSourceLevelDebugger(nullptr); sourceLevelDebugger[i]->setActive(true); pDevice->executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->debugger.reset(sourceLevelDebugger[i]); i++; } cl_int retVal = pProgram->build(pProgram->getDevices(), nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); for (auto &el : sourceLevelDebugger) { EXPECT_EQ(1u, el->isOptimizationDisabledCalled); EXPECT_EQ(false, el->isOptimizationDisabledResult); EXPECT_EQ(1u, el->notifySourceCodeCalled); EXPECT_EQ(false, el->notifySourceCodeResult); EXPECT_EQ(1u, el->notifyKernelDebugDataCalled); } } TEST_F(ProgramWithKernelDebuggingTest, givenEnabledKernelDebugWhenProgramIsLinkedThenDebuggerIsNotifiedWithKernelDebugData) { const size_t rootDeviceIndicesSize = mockContext.getRootDeviceIndices().size(); std::vector sourceLevelDebugger(rootDeviceIndicesSize, nullptr); size_t i = 0; for (auto &rootDeviceIndex : mockContext.getRootDeviceIndices()) { sourceLevelDebugger[i] = new MockSourceLevelDebugger(nullptr); sourceLevelDebugger[i]->setActive(true); pDevice->executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->debugger.reset(sourceLevelDebugger[i]); i++; } cl_int retVal = pProgram->compile(pProgram->getDevices(), nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); cl_program program = pProgram; retVal = pProgram->link(pProgram->getDevices(), nullptr, 1, &program); EXPECT_EQ(CL_SUCCESS, retVal); for (auto &el : sourceLevelDebugger) { EXPECT_EQ(2u, el->isOptimizationDisabledCalled); EXPECT_EQ(false, el->isOptimizationDisabledResult); EXPECT_EQ(1u, el->notifySourceCodeCalled); EXPECT_EQ(false, el->notifySourceCodeResult); EXPECT_EQ(1u, el->notifyKernelDebugDataCalled); } } TEST_F(ProgramWithKernelDebuggingTest, givenProgramWithKernelDebugEnabledWhenBuiltThenPatchTokenAllocateSipSurfaceHasSizeGreaterThanZero) { auto retVal = pProgram->build(pProgram->getDevices(), CompilerOptions::debugKernelEnable.data(), false); EXPECT_EQ(CL_SUCCESS, retVal); auto kernelInfo = pProgram->getKernelInfo("CopyBuffer", pDevice->getRootDeviceIndex()); EXPECT_NE(0u, kernelInfo->kernelDescriptor.kernelAttributes.perThreadSystemThreadSurfaceSize); } TEST_F(ProgramWithKernelDebuggingTest, givenGtpinInitializedWhenCreatingProgramFromBinaryThenDebugDataIsAvailable) { bool gtpinInitializedBackup = NEO::isGTPinInitialized; NEO::isGTPinInitialized = true; auto retVal = pProgram->build(pProgram->getDevices(), CompilerOptions::debugKernelEnable.data(), false); EXPECT_EQ(CL_SUCCESS, retVal); auto kernelInfo = pProgram->getKernelInfo("CopyBuffer", pDevice->getRootDeviceIndex()); EXPECT_NE(kernelInfo->debugData.vIsa, nullptr); EXPECT_NE(0u, kernelInfo->debugData.vIsaSize); NEO::isGTPinInitialized = gtpinInitializedBackup; } TEST_F(ProgramWithKernelDebuggingTest, givenGtpinNotInitializedWhenCreatingProgramFromBinaryThenDebugDataINullptr) { bool gtpinInitializedBackup = NEO::isGTPinInitialized; NEO::isGTPinInitialized = false; pProgram->kernelDebugEnabled = false; auto retVal = pProgram->build(pProgram->getDevices(), CompilerOptions::debugKernelEnable.data(), false); EXPECT_EQ(CL_SUCCESS, retVal); auto kernelInfo = pProgram->getKernelInfo("CopyBuffer", pDevice->getRootDeviceIndex()); EXPECT_EQ(kernelInfo->debugData.vIsa, nullptr); EXPECT_EQ(0u, kernelInfo->debugData.vIsaSize); NEO::isGTPinInitialized = gtpinInitializedBackup; } TEST_F(ProgramWithKernelDebuggingTest, givenKernelDebugEnabledWhenProgramIsBuiltThenDebugDataIsStored) { auto retVal = pProgram->build(pProgram->getDevices(), nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); auto debugData = pProgram->getDebugData(pDevice->getRootDeviceIndex()); EXPECT_NE(nullptr, debugData); EXPECT_NE(0u, pProgram->getDebugDataSize(pDevice->getRootDeviceIndex())); } TEST_F(ProgramWithKernelDebuggingTest, givenProgramWithKernelDebugEnabledWhenProcessDebugDataIsCalledThenKernelInfosAreFilledWithDebugData) { auto retVal = pProgram->build(pProgram->getDevices(), nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); pProgram->processDebugData(pDevice->getRootDeviceIndex()); auto kernelInfo = pProgram->getKernelInfo("CopyBuffer", pDevice->getRootDeviceIndex()); EXPECT_NE(0u, kernelInfo->debugData.vIsaSize); EXPECT_NE(nullptr, kernelInfo->debugData.vIsa); } TEST_F(ProgramWithKernelDebuggingTest, givenProgramWithNonZebinaryFormatAndKernelDebugEnabledWhenProgramIsBuiltThenProcessDebugDataIsCalledAndDebuggerNotified) { MockSourceLevelDebugger *sourceLevelDebugger = new MockSourceLevelDebugger; pDevice->executionEnvironment->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->debugger.reset(sourceLevelDebugger); pProgram->enableKernelDebug(); cl_int retVal = pProgram->build(pProgram->getDevices(), nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(pProgram->wasCreateDebugZebinCalled); EXPECT_TRUE(pProgram->wasProcessDebugDataCalled); EXPECT_EQ(1u, sourceLevelDebugger->notifyKernelDebugDataCalled); }compute-runtime-22.14.22890/opencl/test/unit_test/program/program_with_source.h000066400000000000000000000040721422164147700275340ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/test/common/fixtures/memory_management_fixture.h" #include "shared/test/common/helpers/kernel_binary_helper.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/test/unit_test/fixtures/program_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" namespace NEO { // ProgramFromSource Test Fixture // Used to test the Program class //////////////////////////////////////////////////////////////////////////////// class ProgramFromSourceTest : public ContextFixture, public PlatformFixture, public ProgramFixture, public testing::TestWithParam> { using ContextFixture::SetUp; using PlatformFixture::SetUp; protected: void SetUp() override { sourceFileName = "CopyBuffer_simd16.cl"; binaryFileName = "CopyBuffer_simd16"; kernelName = "CopyBuffer"; kbHelper = new KernelBinaryHelper(binaryFileName); PlatformFixture::SetUp(); cl_device_id device = pPlatform->getClDevice(0); rootDeviceIndex = pPlatform->getClDevice(0)->getRootDeviceIndex(); ContextFixture::SetUp(1, &device); ProgramFixture::SetUp(); CreateProgramWithSource( pContext, sourceFileName); } void TearDown() override { knownSource.reset(); ProgramFixture::TearDown(); ContextFixture::TearDown(); PlatformFixture::TearDown(); delete kbHelper; } KernelBinaryHelper *kbHelper = nullptr; const char *sourceFileName = nullptr; const char *binaryFileName = nullptr; const char *kernelName = nullptr; cl_int retVal = CL_SUCCESS; uint32_t rootDeviceIndex = std::numeric_limits::max(); }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/program/program_with_zebin.cpp000066400000000000000000000036721422164147700277030ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/program/program_with_zebin.h" #include "shared/test/unit_test/device_binary_format/zebin_tests.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" using namespace NEO; void ProgramWithZebinFixture::SetUp() { ProgramTests::SetUp(); program = std::make_unique(toClDeviceVector(*pClDevice)); } void ProgramWithZebinFixture::TearDown() { program->setGlobalSurface(nullptr); program->setConstantSurface(nullptr); program->getKernelInfoArray(rootDeviceIndex).clear(); ProgramTests::TearDown(); } void ProgramWithZebinFixture::addEmptyZebin(NEO::MockProgram *program) { auto zebin = ZebinTestData::ValidEmptyProgram(); program->buildInfos[rootDeviceIndex].unpackedDeviceBinarySize = zebin.storage.size(); program->buildInfos[rootDeviceIndex].unpackedDeviceBinary.reset(new char[zebin.storage.size()]); memcpy_s(program->buildInfos[rootDeviceIndex].unpackedDeviceBinary.get(), program->buildInfos[rootDeviceIndex].unpackedDeviceBinarySize, zebin.storage.data(), zebin.storage.size()); } void ProgramWithZebinFixture::populateProgramWithSegments(NEO::MockProgram *program) { kernelInfo = std::make_unique(); kernelInfo->kernelDescriptor.kernelMetadata.kernelName = kernelName; mockAlloc = std::make_unique(); kernelInfo->kernelAllocation = mockAlloc.get(); program->addKernelInfo(kernelInfo.get(), rootDeviceIndex); globalSurface = std::make_unique(); constantSurface = std::make_unique(); program->setGlobalSurface(&globalSurface->mockGfxAllocation); program->setConstantSurface(&constantSurface->mockGfxAllocation); program->buildInfos[rootDeviceIndex].constStringSectionData.initData = &strings; program->buildInfos[rootDeviceIndex].constStringSectionData.size = sizeof(strings); }compute-runtime-22.14.22890/opencl/test/unit_test/program/program_with_zebin.h000066400000000000000000000015151422164147700273420ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/test/unit_test/mocks/mock_program.h" #include "opencl/test/unit_test/program/program_tests.h" using namespace NEO; class MockBuffer; class ProgramWithZebinFixture : public ProgramTests { public: std::unique_ptr program; std::unique_ptr kernelInfo; std::unique_ptr mockAlloc; std::unique_ptr globalSurface; std::unique_ptr constantSurface; const char strings[12] = "Hello olleH"; const char kernelName[8] = "kernel1"; void SetUp() override; void TearDown() override; void addEmptyZebin(MockProgram *program); void populateProgramWithSegments(MockProgram *program); ~ProgramWithZebinFixture() = default; };compute-runtime-22.14.22890/opencl/test/unit_test/program/program_with_zebin_tests.cpp000066400000000000000000000155111422164147700311200ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/mocks/mock_source_level_debugger.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/program/program_with_zebin.h" #include using namespace NEO; TEST_F(ProgramWithZebinFixture, givenNoZebinThenSegmentsAreEmpty) { auto segments = program->getZebinSegments(pClDevice->getRootDeviceIndex()); EXPECT_EQ(std::numeric_limits::max(), segments.constData.address); EXPECT_EQ(0ULL, segments.constData.size); EXPECT_EQ(std::numeric_limits::max(), segments.varData.address); EXPECT_EQ(0ULL, segments.varData.size); EXPECT_EQ(std::numeric_limits::max(), segments.stringData.address); EXPECT_EQ(0ULL, segments.stringData.size); EXPECT_TRUE(segments.nameToSegMap.empty()); } TEST_F(ProgramWithZebinFixture, givenZebinSegmentsThenSegmentsArePopulated) { populateProgramWithSegments(program.get()); auto segments = program->getZebinSegments(rootDeviceIndex); auto checkGPUSeg = [](NEO::GraphicsAllocation *alloc, NEO::Debug::Segments::Segment segment) { EXPECT_EQ(static_cast(alloc->getGpuAddress()), segment.address); EXPECT_EQ(static_cast(alloc->getUnderlyingBufferSize()), segment.size); }; checkGPUSeg(program->buildInfos[rootDeviceIndex].constantSurface, segments.constData); checkGPUSeg(program->buildInfos[rootDeviceIndex].globalSurface, segments.varData); checkGPUSeg(program->getKernelInfoArray(rootDeviceIndex)[0]->getGraphicsAllocation(), segments.nameToSegMap["kernel1"]); EXPECT_EQ(reinterpret_cast(program->buildInfos[rootDeviceIndex].constStringSectionData.initData), segments.stringData.address); EXPECT_EQ(reinterpret_cast(program->buildInfos[rootDeviceIndex].constStringSectionData.initData), strings); EXPECT_EQ(program->buildInfos[rootDeviceIndex].constStringSectionData.size, sizeof(strings)); } TEST_F(ProgramWithZebinFixture, givenNonEmptyDebugDataThenDebugZebinIsNotCreated) { addEmptyZebin(program.get()); populateProgramWithSegments(program.get()); program->buildInfos[rootDeviceIndex].debugDataSize = 8u; program->buildInfos[rootDeviceIndex].debugData.reset(nullptr); program->createDebugZebin(rootDeviceIndex); EXPECT_EQ(nullptr, program->buildInfos[rootDeviceIndex].debugData.get()); } TEST_F(ProgramWithZebinFixture, givenEmptyDebugDataThenDebugZebinIsCreatedAndStoredInDebugData) { addEmptyZebin(program.get()); populateProgramWithSegments(program.get()); program->buildInfos[rootDeviceIndex].debugDataSize = 0u; program->buildInfos[rootDeviceIndex].debugData.reset(nullptr); program->createDebugZebin(rootDeviceIndex); EXPECT_NE(nullptr, program->buildInfos[rootDeviceIndex].debugData.get()); } TEST_F(ProgramWithZebinFixture, givenEmptyDebugDataAndZebinBinaryFormatThenCreateDebugZebinAndReturnOnGetInfo) { addEmptyZebin(program.get()); populateProgramWithSegments(program.get()); program->buildInfos[rootDeviceIndex].debugDataSize = 0u; program->buildInfos[rootDeviceIndex].debugData.reset(nullptr); EXPECT_FALSE(program->wasCreateDebugZebinCalled); auto retVal = CL_INVALID_VALUE; size_t debugDataSize = 0; retVal = program->getInfo(CL_PROGRAM_DEBUG_INFO_SIZES_INTEL, sizeof(debugDataSize), &debugDataSize, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, program->buildInfos[rootDeviceIndex].debugData); EXPECT_TRUE(program->wasCreateDebugZebinCalled); program->wasCreateDebugZebinCalled = false; retVal = program->getInfo(CL_PROGRAM_DEBUG_INFO_SIZES_INTEL, sizeof(debugDataSize), &debugDataSize, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(program->wasCreateDebugZebinCalled); std::unique_ptr debugData = std::make_unique(debugDataSize); for (size_t n = 0; n < sizeof(debugData); n++) { debugData[n] = 0; } char *pDebugData = &debugData[0]; size_t retData = 0; retVal = program->getInfo(CL_PROGRAM_DEBUG_INFO_INTEL, debugDataSize, &pDebugData, &retData); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(program->wasCreateDebugZebinCalled); program->buildInfos[rootDeviceIndex].debugDataSize = 0u; program->buildInfos[rootDeviceIndex].debugData.reset(nullptr); std::unique_ptr debugData2 = std::make_unique(debugDataSize); for (size_t n = 0; n < sizeof(debugData2); n++) { debugData2[n] = 0; } char *pDebugData2 = &debugData2[0]; size_t retData2 = 0; retVal = program->getInfo(CL_PROGRAM_DEBUG_INFO_INTEL, debugDataSize, &pDebugData2, &retData2); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(program->wasCreateDebugZebinCalled); cl_uint numDevices; retVal = clGetProgramInfo(program.get(), CL_PROGRAM_NUM_DEVICES, sizeof(numDevices), &numDevices, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(numDevices * sizeof(debugData), retData); } TEST_F(ProgramWithZebinFixture, givenZebinFormatAndDebuggerNotAvailableWhenNotifyingDebuggerThenCreateDebugZebinIsCalled) { pClDevice->getExecutionEnvironment()->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->debugger.reset(nullptr); addEmptyZebin(program.get()); populateProgramWithSegments(program.get()); auto &buildInfo = program->buildInfos[rootDeviceIndex]; buildInfo.debugDataSize = 0u; buildInfo.debugData.reset(nullptr); for (auto &device : program->getDevices()) { program->notifyDebuggerWithDebugData(device); } EXPECT_TRUE(program->wasCreateDebugZebinCalled); EXPECT_FALSE(program->wasProcessDebugDataCalled); EXPECT_NE(nullptr, program->buildInfos[rootDeviceIndex].debugData); EXPECT_GT(program->buildInfos[rootDeviceIndex].debugDataSize, 0u); } TEST_F(ProgramWithZebinFixture, givenZebinFormatAndDebuggerAvailableWhenNotifyingDebuggerThenCreateDebugZebinIsCalledAndDebuggerNotified) { MockSourceLevelDebugger *sourceLevelDebugger = new MockSourceLevelDebugger; sourceLevelDebugger->setActive(true); pClDevice->getExecutionEnvironment()->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->debugger.reset(sourceLevelDebugger); addEmptyZebin(program.get()); populateProgramWithSegments(program.get()); auto &buildInfo = program->buildInfos[rootDeviceIndex]; buildInfo.debugDataSize = 0u; buildInfo.debugData.reset(nullptr); for (auto &device : program->getDevices()) { program->notifyDebuggerWithDebugData(device); } EXPECT_TRUE(program->wasCreateDebugZebinCalled); EXPECT_FALSE(program->wasProcessDebugDataCalled); EXPECT_NE(nullptr, program->buildInfos[rootDeviceIndex].debugData); EXPECT_GT(program->buildInfos[rootDeviceIndex].debugDataSize, 0u); EXPECT_EQ(1u, sourceLevelDebugger->notifyKernelDebugDataCalled); }compute-runtime-22.14.22890/opencl/test/unit_test/sampler/000077500000000000000000000000001422164147700232725ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/sampler/CMakeLists.txt000066400000000000000000000010621422164147700260310ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_sampler ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/get_sampler_info_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler_set_arg_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler_tests.cpp ) if(TESTS_XEHP_AND_LATER) list(APPEND IGDRCL_SRCS_tests_sampler ${CMAKE_CURRENT_SOURCE_DIR}/sampler_tests_xehp_and_later.cpp ) endif() target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_sampler}) add_subdirectories() compute-runtime-22.14.22890/opencl/test/unit_test/sampler/get_sampler_info_tests.cpp000066400000000000000000000044171422164147700305430ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/sampler/sampler.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" using namespace NEO; TEST(GetSamplerInfo, GivenInvalidFlagsWhenGettingSamplerInfoThenInvalidValueErrorIsReturnedAndValueSizeRetIsNotUpdated) { MockContext context; auto retVal = CL_INVALID_VALUE; auto normalizedCoords = CL_TRUE; auto addressingMode = CL_ADDRESS_MIRRORED_REPEAT; auto filterMode = CL_FILTER_NEAREST; auto sampler = Sampler::create(&context, normalizedCoords, addressingMode, filterMode, retVal); size_t valueSizeRet = 0x1234; retVal = sampler->getInfo(0, 0, nullptr, &valueSizeRet); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(0x1234u, valueSizeRet); delete sampler; } struct GetSamplerInfo : public ::testing::TestWithParam { void SetUp() override { param = GetParam(); } cl_sampler_info param; }; TEST_P(GetSamplerInfo, GivenValidParamWhenGettingInfoThenSuccessIsReturned) { MockContext context; auto retVal = CL_INVALID_VALUE; auto normalizedCoords = CL_TRUE; auto addressingMode = CL_ADDRESS_MIRRORED_REPEAT; auto filterMode = CL_FILTER_NEAREST; auto sampler = Sampler::create(&context, normalizedCoords, addressingMode, filterMode, CL_FILTER_NEAREST, 2.0f, 3.0f, retVal); size_t sizeReturned = 0; retVal = sampler->getInfo(param, 0, nullptr, &sizeReturned); ASSERT_EQ(CL_SUCCESS, retVal) << " param = " << param; ASSERT_NE(0u, sizeReturned); auto *object = new char[sizeReturned]; retVal = sampler->getInfo(param, sizeReturned, object, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); delete[] object; delete sampler; } // Define new command types to run the parameterized tests cl_sampler_info samplerInfoParams[] = { CL_SAMPLER_REFERENCE_COUNT, CL_SAMPLER_CONTEXT, CL_SAMPLER_NORMALIZED_COORDS, CL_SAMPLER_ADDRESSING_MODE, CL_SAMPLER_FILTER_MODE, CL_SAMPLER_MIP_FILTER_MODE, CL_SAMPLER_LOD_MIN, CL_SAMPLER_LOD_MAX}; INSTANTIATE_TEST_CASE_P( Sampler_, GetSamplerInfo, testing::ValuesIn(samplerInfoParams)); compute-runtime-22.14.22890/opencl/test/unit_test/sampler/sampler_set_arg_tests.cpp000066400000000000000000000510321422164147700303700ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/ptr_math.h" #include "shared/source/utilities/numeric.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/helpers/sampler_helpers.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/sampler/sampler.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" using namespace NEO; namespace NEO { class Surface; }; class SamplerSetArgFixture : public ClDeviceFixture { public: SamplerSetArgFixture() { memset(&kernelHeader, 0, sizeof(kernelHeader)); } protected: void SetUp() { ClDeviceFixture::SetUp(); pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; pKernelInfo->heapInfo.pDsh = samplerStateHeap; pKernelInfo->heapInfo.DynamicStateHeapSize = sizeof(samplerStateHeap); // setup kernel arg offsets pKernelInfo->addArgSampler(0, 0x40, 0x8, 0x10, 0x4); pKernelInfo->addExtendedDeviceSideEnqueueDescriptor(0, 0x0); pKernelInfo->addArgSampler(1, 0x40); program = std::make_unique(toClDeviceVector(*pClDevice)); retVal = CL_INVALID_VALUE; pMultiDeviceKernel = MultiDeviceKernel::create(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), &retVal); pKernel = static_cast(pMultiDeviceKernel->getKernel(rootDeviceIndex)); ASSERT_NE(nullptr, pKernel); ASSERT_EQ(CL_SUCCESS, retVal); pKernel->setKernelArgHandler(0, &Kernel::setArgSampler); pKernel->setKernelArgHandler(1, &Kernel::setArgSampler); uint32_t crossThreadData[crossThreadDataSize] = {}; pKernel->setCrossThreadData(crossThreadData, sizeof(crossThreadData)); context = new MockContext(pClDevice); retVal = CL_INVALID_VALUE; } void TearDown() { delete pMultiDeviceKernel; delete sampler; delete context; ClDeviceFixture::TearDown(); } bool crossThreadDataUnchanged() { for (uint32_t i = 0; i < crossThreadDataSize; i++) { if (pKernel->mockCrossThreadData[i] != 0u) { return false; } } return true; } void createSampler() { sampler = Sampler::create( context, CL_TRUE, CL_ADDRESS_MIRRORED_REPEAT, CL_FILTER_NEAREST, retVal); } static const uint32_t crossThreadDataSize = 0x40; cl_int retVal = CL_SUCCESS; std::unique_ptr program; MockKernel *pKernel = nullptr; MultiDeviceKernel *pMultiDeviceKernel = nullptr; SKernelBinaryHeaderCommon kernelHeader; std::unique_ptr pKernelInfo; char samplerStateHeap[0x80]; MockContext *context; Sampler *sampler = nullptr; }; typedef Test SamplerSetArgTest; HWTEST_F(SamplerSetArgTest, WhenSettingKernelArgSamplerThenSamplerStatesAreCorrect) { typedef typename FamilyType::SAMPLER_STATE SAMPLER_STATE; createSampler(); cl_sampler samplerObj = sampler; retVal = clSetKernelArg( pMultiDeviceKernel, 0, sizeof(samplerObj), &samplerObj); ASSERT_EQ(CL_SUCCESS, retVal); auto samplerState = reinterpret_cast( ptrOffset(pKernel->getDynamicStateHeap(), pKernelInfo->argAsSmp(0).bindful)); EXPECT_EQ(static_cast(CL_TRUE), static_cast(!samplerState->getNonNormalizedCoordinateEnable())); EXPECT_EQ(SAMPLER_STATE::TEXTURE_COORDINATE_MODE_MIRROR, samplerState->getTcxAddressControlMode()); EXPECT_EQ(SAMPLER_STATE::TEXTURE_COORDINATE_MODE_MIRROR, samplerState->getTcyAddressControlMode()); EXPECT_EQ(SAMPLER_STATE::TEXTURE_COORDINATE_MODE_MIRROR, samplerState->getTczAddressControlMode()); EXPECT_EQ(SAMPLER_STATE::MIN_MODE_FILTER_NEAREST, samplerState->getMinModeFilter()); EXPECT_EQ(SAMPLER_STATE::MAG_MODE_FILTER_NEAREST, samplerState->getMagModeFilter()); EXPECT_EQ(SAMPLER_STATE::MIP_MODE_FILTER_NEAREST, samplerState->getMipModeFilter()); EXPECT_EQ(SAMPLER_STATE::LOD_PRECLAMP_MODE::LOD_PRECLAMP_MODE_OGL, samplerState->getLodPreclampMode()); std::vector surfaces; pKernel->getResidency(surfaces); EXPECT_EQ(0u, surfaces.size()); } HWTEST_F(SamplerSetArgTest, WhenGettingKernelArgThenSamplerIsReturned) { createSampler(); cl_sampler samplerObj = sampler; retVal = pKernel->setArg( 0, sizeof(samplerObj), &samplerObj); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(samplerObj, pKernel->getKernelArg(0)); } HWTEST_F(SamplerSetArgTest, GivenSamplerObjectWhenSetKernelArgIsCalledThenIncreaseSamplerRefcount) { cl_sampler samplerObj = Sampler::create( context, CL_TRUE, CL_ADDRESS_MIRRORED_REPEAT, CL_FILTER_NEAREST, retVal); auto pSampler = castToObject(samplerObj); auto refCountBefore = pSampler->getRefInternalCount(); retVal = pKernel->setArg( 0, sizeof(samplerObj), &samplerObj); ASSERT_EQ(CL_SUCCESS, retVal); auto refCountAfter = pSampler->getRefInternalCount(); EXPECT_EQ(refCountBefore + 1, refCountAfter); retVal = clReleaseSampler(samplerObj); ASSERT_EQ(CL_SUCCESS, retVal); } HWTEST_F(SamplerSetArgTest, GivenSamplerObjectWhenSetKernelArgIsCalledThenSamplerObjectSurvivesClReleaseSampler) { cl_sampler samplerObj = Sampler::create( context, CL_TRUE, CL_ADDRESS_MIRRORED_REPEAT, CL_FILTER_NEAREST, retVal); auto pSampler = castToObject(samplerObj); auto refCountBefore = pSampler->getRefInternalCount(); retVal = pKernel->setArg( 0, sizeof(samplerObj), &samplerObj); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clReleaseSampler(samplerObj); ASSERT_EQ(CL_SUCCESS, retVal); auto refCountAfter = pSampler->getRefInternalCount(); EXPECT_EQ(refCountBefore, refCountAfter); } HWTEST_F(SamplerSetArgTest, GivenSamplerObjectWhenSetKernelArgIsCalledAndKernelIsDeletedThenRefCountIsUnchanged) { auto myKernel = std::make_unique(program.get(), *pKernelInfo, *pClDevice); ASSERT_NE(nullptr, myKernel.get()); ASSERT_EQ(CL_SUCCESS, myKernel->initialize()); myKernel->setKernelArgHandler(0, &Kernel::setArgSampler); myKernel->setKernelArgHandler(1, &Kernel::setArgSampler); uint32_t crossThreadData[crossThreadDataSize] = {}; myKernel->setCrossThreadData(crossThreadData, sizeof(crossThreadData)); cl_sampler samplerObj = Sampler::create( context, CL_TRUE, CL_ADDRESS_MIRRORED_REPEAT, CL_FILTER_NEAREST, retVal); auto pSampler = castToObject(samplerObj); auto refCountBefore = pSampler->getRefInternalCount(); retVal = myKernel->setArg( 0, sizeof(samplerObj), &samplerObj); ASSERT_EQ(CL_SUCCESS, retVal); myKernel.reset(); auto refCountAfter = pSampler->getRefInternalCount(); EXPECT_EQ(refCountBefore, refCountAfter); retVal = clReleaseSampler(samplerObj); ASSERT_EQ(CL_SUCCESS, retVal); } HWTEST_F(SamplerSetArgTest, GivenNewSamplerObjectWhensSetKernelArgIsCalledThenDecreaseOldSamplerRefcount) { cl_sampler samplerObj = Sampler::create( context, CL_TRUE, CL_ADDRESS_MIRRORED_REPEAT, CL_FILTER_NEAREST, retVal); auto pSampler = castToObject(samplerObj); retVal = pKernel->setArg( 0, sizeof(samplerObj), &samplerObj); ASSERT_EQ(CL_SUCCESS, retVal); auto refCountBefore = pSampler->getRefInternalCount(); cl_sampler samplerObj2 = Sampler::create( context, CL_TRUE, CL_ADDRESS_MIRRORED_REPEAT, CL_FILTER_NEAREST, retVal); retVal = pKernel->setArg( 0, sizeof(samplerObj2), &samplerObj2); ASSERT_EQ(CL_SUCCESS, retVal); auto refCountAfter = pSampler->getRefInternalCount(); EXPECT_EQ(refCountBefore - 1, refCountAfter); retVal = clReleaseSampler(samplerObj); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clReleaseSampler(samplerObj2); ASSERT_EQ(CL_SUCCESS, retVal); } HWTEST_F(SamplerSetArgTest, GivenIncorrentSamplerObjectWhenSetKernelArgSamplerIsCalledThenLeaveRefcountAsIs) { auto notSamplerObj = std::unique_ptr(ImageHelper::create(context)); auto pNotSampler = castToObject(notSamplerObj.get()); auto refCountBefore = pNotSampler->getRefInternalCount(); retVal = pKernel->setArgSampler( 0, sizeof(notSamplerObj.get()), notSamplerObj.get()); auto refCountAfter = pNotSampler->getRefInternalCount(); EXPECT_EQ(refCountBefore, refCountAfter); } HWTEST_F(SamplerSetArgTest, GivenFilteringNearestAndAddressingClampWhenSettingKernelArgumentThenConstantBufferIsSet) { sampler = Sampler::create( context, CL_TRUE, CL_ADDRESS_CLAMP, CL_FILTER_NEAREST, retVal); cl_sampler samplerObj = sampler; retVal = pKernel->setArg( 0, sizeof(samplerObj), &samplerObj); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(samplerObj, pKernel->getKernelArg(0)); auto crossThreadData = reinterpret_cast(pKernel->getCrossThreadData()); auto snapWaCrossThreadData = ptrOffset(crossThreadData, 0x4); unsigned int snapWaValue = 0xffffffff; unsigned int objectId = SAMPLER_OBJECT_ID_SHIFT + pKernelInfo->argAsSmp(0).bindful; EXPECT_EQ(snapWaValue, *snapWaCrossThreadData); EXPECT_EQ(objectId, *crossThreadData); } HWTEST_F(SamplerSetArgTest, GivenKernelWithoutObjIdOffsetWhenSettingArgThenObjIdNotPatched) { sampler = Sampler::create( context, CL_TRUE, CL_ADDRESS_CLAMP, CL_FILTER_NEAREST, retVal); cl_sampler samplerObj = sampler; retVal = pKernel->setArg( 1, sizeof(samplerObj), &samplerObj); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(samplerObj, pKernel->getKernelArg(1)); EXPECT_TRUE(crossThreadDataUnchanged()); } HWTEST_F(SamplerSetArgTest, GivenNullWhenSettingKernelArgThenInvalidSamplerErrorIsReturned) { createSampler(); cl_sampler samplerObj = sampler; retVal = pKernel->setArg( 0, sizeof(samplerObj), nullptr); ASSERT_EQ(CL_INVALID_SAMPLER, retVal); } HWTEST_F(SamplerSetArgTest, GivenInvalidSamplerWhenSettingKernelArgThenInvalidSamplerErrorIsReturned) { createSampler(); cl_sampler samplerObj = sampler; const void *notASampler = reinterpret_cast(pKernel); retVal = pKernel->setArg( 0, sizeof(samplerObj), notASampler); ASSERT_EQ(CL_INVALID_SAMPLER, retVal); } TEST_F(SamplerSetArgTest, givenSamplerTypeStrAndIsSamplerTrueWhenInitializeKernelThenKernelArgumentsTypeIsSamplerObj) { pKernelInfo->addExtendedMetadata(0, "", "sampler*"); pKernelInfo->addExtendedMetadata(1, "", "sampler"); auto pMockKernell = std::make_unique(program.get(), *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, pMockKernell->initialize()); EXPECT_EQ(pMockKernell->getKernelArguments()[0].type, MockKernel::SAMPLER_OBJ); EXPECT_EQ(pMockKernell->getKernelArguments()[1].type, MockKernel::SAMPLER_OBJ); } TEST_F(SamplerSetArgTest, givenSamplerTypeStrAndAndIsSamplerFalseWhenInitializeKernelThenKernelArgumentsTypeIsNotSamplerObj) { pKernelInfo->kernelDescriptor.payloadMappings.explicitArgs.clear(); pKernelInfo->addArgBuffer(0); pKernelInfo->addArgBuffer(1); pKernelInfo->addExtendedMetadata(0, "", "sampler*"); pKernelInfo->addExtendedMetadata(1, "", "sampler"); auto pMockKernell = std::make_unique(program.get(), *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, pMockKernell->initialize()); EXPECT_NE(pMockKernell->getKernelArguments()[0].type, MockKernel::SAMPLER_OBJ); EXPECT_NE(pMockKernell->getKernelArguments()[1].type, MockKernel::SAMPLER_OBJ); } //////////////////////////////////////////////////////////////////////////////// struct NormalizedTest : public SamplerSetArgFixture, public ::testing::TestWithParam { void SetUp() override { SamplerSetArgFixture::SetUp(); } void TearDown() override { SamplerSetArgFixture::TearDown(); } }; HWTEST_P(NormalizedTest, WhenSettingKernelArgSamplerThenCoordsAreCorrect) { typedef typename FamilyType::SAMPLER_STATE SAMPLER_STATE; auto normalizedCoordinates = GetParam(); sampler = Sampler::create( context, normalizedCoordinates, CL_ADDRESS_MIRRORED_REPEAT, CL_FILTER_NEAREST, retVal); cl_sampler samplerObj = sampler; retVal = pKernel->setArg( 0, sizeof(samplerObj), &samplerObj); ASSERT_EQ(CL_SUCCESS, retVal); auto samplerState = reinterpret_cast( ptrOffset(pKernel->getDynamicStateHeap(), pKernelInfo->argAsSmp(0).bindful)); EXPECT_EQ(normalizedCoordinates, static_cast(!samplerState->getNonNormalizedCoordinateEnable())); auto crossThreadData = reinterpret_cast(pKernel->getCrossThreadData()); auto normalizedCoordsAddress = ptrOffset(crossThreadData, 0x10); unsigned int normalizedCoordsValue = GetNormCoordsEnum(normalizedCoordinates); EXPECT_EQ(normalizedCoordsValue, *normalizedCoordsAddress); } cl_bool normalizedCoordinatesCases[] = { CL_FALSE, CL_TRUE}; INSTANTIATE_TEST_CASE_P(SamplerSetArg, NormalizedTest, ::testing::ValuesIn(normalizedCoordinatesCases)); //////////////////////////////////////////////////////////////////////////////// struct AddressingModeTest : public SamplerSetArgFixture, public ::testing::TestWithParam { void SetUp() override { SamplerSetArgFixture::SetUp(); } void TearDown() override { SamplerSetArgFixture::TearDown(); } }; HWTEST_P(AddressingModeTest, WhenSettingKernelArgSamplerThenModesAreCorrect) { typedef typename FamilyType::SAMPLER_STATE SAMPLER_STATE; auto addressingMode = GetParam(); sampler = Sampler::create( context, CL_TRUE, addressingMode, CL_FILTER_NEAREST, retVal); cl_sampler samplerObj = sampler; retVal = pKernel->setArg( 0, sizeof(samplerObj), &samplerObj); ASSERT_EQ(CL_SUCCESS, retVal); auto samplerState = reinterpret_cast( ptrOffset(pKernel->getDynamicStateHeap(), pKernelInfo->argAsSmp(0).bindful)); auto expectedModeX = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_MIRROR; auto expectedModeY = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_MIRROR; auto expectedModeZ = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_MIRROR; // clang-format off switch (addressingMode) { case CL_ADDRESS_NONE: case CL_ADDRESS_CLAMP: expectedModeX = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP_BORDER; expectedModeY = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP_BORDER; expectedModeZ = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP_BORDER; break; case CL_ADDRESS_CLAMP_TO_EDGE: expectedModeX = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP; expectedModeY = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP; expectedModeZ = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP; break; case CL_ADDRESS_MIRRORED_REPEAT: expectedModeX = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_MIRROR; expectedModeY = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_MIRROR; expectedModeZ = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_MIRROR; break; case CL_ADDRESS_REPEAT: expectedModeX = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_WRAP; expectedModeY = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_WRAP; expectedModeZ = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_WRAP; break; } // clang-format on EXPECT_EQ(expectedModeX, samplerState->getTcxAddressControlMode()); EXPECT_EQ(expectedModeY, samplerState->getTcyAddressControlMode()); EXPECT_EQ(expectedModeZ, samplerState->getTczAddressControlMode()); auto crossThreadData = reinterpret_cast(pKernel->getCrossThreadData()); auto addressingModeAddress = ptrOffset(crossThreadData, 0x8); unsigned int addresingValue = GetAddrModeEnum(addressingMode); EXPECT_EQ(addresingValue, *addressingModeAddress); } cl_addressing_mode addressingModeCases[] = { CL_ADDRESS_NONE, CL_ADDRESS_CLAMP_TO_EDGE, CL_ADDRESS_CLAMP, CL_ADDRESS_REPEAT, CL_ADDRESS_MIRRORED_REPEAT}; INSTANTIATE_TEST_CASE_P(SamplerSetArg, AddressingModeTest, ::testing::ValuesIn(addressingModeCases)); HWTEST_F(SamplerSetArgTest, GivenMipmapsWhenSettingKernelArgSamplerThenLodAreCorrect) { typedef typename FamilyType::SAMPLER_STATE SAMPLER_STATE; FixedU4D8 minLod = 2.0f; FixedU4D8 maxLod = 3.0f; sampler = Sampler::create( context, CL_TRUE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, CL_FILTER_LINEAR, minLod.asFloat(), maxLod.asFloat(), retVal); cl_sampler samplerObj = sampler; retVal = pKernel->setArg( 0, sizeof(samplerObj), &samplerObj); ASSERT_EQ(CL_SUCCESS, retVal); auto samplerState = reinterpret_cast( ptrOffset(pKernel->getDynamicStateHeap(), pKernelInfo->argAsSmp(0).bindful)); EXPECT_EQ(FamilyType::SAMPLER_STATE::MIP_MODE_FILTER_LINEAR, samplerState->getMipModeFilter()); EXPECT_EQ(minLod.getRawAccess(), samplerState->getMinLod()); EXPECT_EQ(maxLod.getRawAccess(), samplerState->getMaxLod()); } //////////////////////////////////////////////////////////////////////////////// struct FilterModeTest : public SamplerSetArgFixture, public ::testing::TestWithParam { void SetUp() override { SamplerSetArgFixture::SetUp(); } void TearDown() override { SamplerSetArgFixture::TearDown(); } }; HWTEST_P(FilterModeTest, WhenSettingKernelArgSamplerThenFiltersAreCorrect) { typedef typename FamilyType::SAMPLER_STATE SAMPLER_STATE; auto filterMode = GetParam(); sampler = Sampler::create( context, CL_TRUE, CL_ADDRESS_NONE, filterMode, retVal); auto samplerState = reinterpret_cast( ptrOffset(pKernel->getDynamicStateHeap(), pKernelInfo->argAsSmp(0).bindful)); sampler->setArg(const_cast(samplerState), *defaultHwInfo); if (CL_FILTER_NEAREST == filterMode) { EXPECT_EQ(SAMPLER_STATE::MIN_MODE_FILTER_NEAREST, samplerState->getMinModeFilter()); EXPECT_EQ(SAMPLER_STATE::MAG_MODE_FILTER_NEAREST, samplerState->getMagModeFilter()); EXPECT_EQ(SAMPLER_STATE::MIP_MODE_FILTER_NEAREST, samplerState->getMipModeFilter()); EXPECT_FALSE(samplerState->getUAddressMinFilterRoundingEnable()); EXPECT_FALSE(samplerState->getUAddressMagFilterRoundingEnable()); EXPECT_FALSE(samplerState->getVAddressMinFilterRoundingEnable()); EXPECT_FALSE(samplerState->getVAddressMagFilterRoundingEnable()); EXPECT_FALSE(samplerState->getRAddressMagFilterRoundingEnable()); EXPECT_FALSE(samplerState->getRAddressMinFilterRoundingEnable()); } else { EXPECT_EQ(SAMPLER_STATE::MIN_MODE_FILTER_LINEAR, samplerState->getMinModeFilter()); EXPECT_EQ(SAMPLER_STATE::MAG_MODE_FILTER_LINEAR, samplerState->getMagModeFilter()); EXPECT_EQ(SAMPLER_STATE::MIP_MODE_FILTER_NEAREST, samplerState->getMipModeFilter()); EXPECT_TRUE(samplerState->getUAddressMinFilterRoundingEnable()); EXPECT_TRUE(samplerState->getUAddressMagFilterRoundingEnable()); EXPECT_TRUE(samplerState->getVAddressMinFilterRoundingEnable()); EXPECT_TRUE(samplerState->getVAddressMagFilterRoundingEnable()); EXPECT_TRUE(samplerState->getRAddressMagFilterRoundingEnable()); EXPECT_TRUE(samplerState->getRAddressMinFilterRoundingEnable()); } } cl_filter_mode filterModeCase[] = { CL_FILTER_NEAREST, CL_FILTER_LINEAR}; INSTANTIATE_TEST_CASE_P(SamplerSetArg, FilterModeTest, ::testing::ValuesIn(filterModeCase)); compute-runtime-22.14.22890/opencl/test/unit_test/sampler/sampler_tests.cpp000066400000000000000000000113061422164147700266640ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/sampler/sampler.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_sampler.h" #include "gtest/gtest.h" #include "patch_list.h" #include using namespace NEO; struct CreateSampler : public ::testing::TestWithParam< std::tuple> { CreateSampler() { } void SetUp() override { std::tie(normalizedCoords, addressingMode, filterMode) = GetParam(); context = new MockContext(); } void TearDown() override { delete context; } MockContext *context; cl_int retVal = CL_INVALID_VALUE; cl_bool normalizedCoords; cl_addressing_mode addressingMode; cl_filter_mode filterMode; }; TEST_P(CreateSampler, WhenSamplerIsCreatedThenSuccessIsReturned) { auto sampler = Sampler::create( context, normalizedCoords, addressingMode, filterMode, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, sampler); delete sampler; } TEST_P(CreateSampler, GivenModeWhenSamplerIsCreatedThenParamsAreSetCorrectly) { auto sampler = new MockSampler( context, normalizedCoords, addressingMode, filterMode); ASSERT_NE(nullptr, sampler); EXPECT_EQ(context, sampler->getContext()); EXPECT_EQ(normalizedCoords, sampler->getNormalizedCoordinates()); EXPECT_EQ(addressingMode, sampler->getAddressingMode()); EXPECT_EQ(filterMode, sampler->getFilterMode()); //check for SnapWA bool snapWaNeeded = addressingMode == CL_ADDRESS_CLAMP && filterMode == CL_FILTER_NEAREST; auto snapWaValue = snapWaNeeded ? iOpenCL::CONSTANT_REGISTER_BOOLEAN_TRUE : iOpenCL::CONSTANT_REGISTER_BOOLEAN_FALSE; EXPECT_EQ(snapWaValue, sampler->getSnapWaValue()); delete sampler; } static cl_bool normalizedCoordModes[] = { CL_FALSE, CL_TRUE}; static cl_addressing_mode addressingModes[] = { CL_ADDRESS_MIRRORED_REPEAT, CL_ADDRESS_REPEAT, CL_ADDRESS_CLAMP_TO_EDGE, CL_ADDRESS_CLAMP, CL_ADDRESS_NONE}; static cl_filter_mode filterModes[] = { CL_FILTER_NEAREST, CL_FILTER_LINEAR}; INSTANTIATE_TEST_CASE_P(Sampler, CreateSampler, ::testing::Combine( ::testing::ValuesIn(normalizedCoordModes), ::testing::ValuesIn(addressingModes), ::testing::ValuesIn(filterModes))); typedef ::testing::TestWithParam> TransformableSamplerTest; TEST_P(TransformableSamplerTest, givenSamplerWhenHasProperParametersThenIsTransformable) { bool expectedRetVal; bool retVal; cl_bool normalizedCoords; cl_addressing_mode addressingMode; cl_filter_mode filterMode; std::tie(normalizedCoords, addressingMode, filterMode) = GetParam(); expectedRetVal = addressingMode == CL_ADDRESS_CLAMP_TO_EDGE && filterMode == CL_FILTER_NEAREST && normalizedCoords == CL_FALSE; MockSampler sampler(nullptr, normalizedCoords, addressingMode, filterMode); retVal = sampler.isTransformable(); EXPECT_EQ(expectedRetVal, retVal); } INSTANTIATE_TEST_CASE_P(Sampler, TransformableSamplerTest, ::testing::Combine( ::testing::ValuesIn(normalizedCoordModes), ::testing::ValuesIn(addressingModes), ::testing::ValuesIn(filterModes))); TEST(castToSamplerTest, GivenGenericPointerWhichHoldsSamplerObjectWhenCastToSamplerIsCalledThenCastWithSuccess) { cl_int retVal; auto context = std::make_unique(); cl_sampler clSampler = Sampler::create( context.get(), CL_TRUE, CL_ADDRESS_MIRRORED_REPEAT, CL_FILTER_NEAREST, retVal); ASSERT_EQ(CL_SUCCESS, retVal); auto ptr = reinterpret_cast(clSampler); auto sampler = castToObject(ptr); EXPECT_NE(nullptr, sampler); clReleaseSampler(clSampler); } TEST(castToSamplerTest, GivenGenericPointerWhichDoestNotHoldSamplerObjectWhenCastToSamplerIsCalledThenCastWithAFailure) { auto context = std::make_unique(); auto notSamplerObj = std::unique_ptr(ImageHelper::create(context.get())); void *ptr = notSamplerObj.get(); auto notSampler = castToObject(ptr); EXPECT_EQ(nullptr, notSampler); } compute-runtime-22.14.22890/opencl/test/unit_test/sampler/sampler_tests_xehp_and_later.cpp000066400000000000000000000013321422164147700317170ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include using namespace NEO; using XeHPAndLaterSamplerTest = Test; HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterSamplerTest, GivenDefaultThenLowQualityFilterIsDisabled) { using SAMPLER_STATE = typename FamilyType::SAMPLER_STATE; auto state = FamilyType::cmdInitSamplerState; EXPECT_EQ(SAMPLER_STATE::LOW_QUALITY_FILTER_DISABLE, state.getLowQualityFilter()); } compute-runtime-22.14.22890/opencl/test/unit_test/scenarios/000077500000000000000000000000001422164147700236155ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/scenarios/CMakeLists.txt000066400000000000000000000006301422164147700263540ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_scenarios ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/blocked_enqueue_barrier_scenario_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/blocked_enqueue_with_callback_scenario_tests.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_scenarios}) add_subdirectories() blocked_enqueue_barrier_scenario_tests.cpp000066400000000000000000000035341422164147700342140ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/scenarios/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/event/user_event.h" #include "opencl/test/unit_test/fixtures/scenario_test_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "gtest/gtest.h" using namespace NEO; typedef ScenarioTest BarrierScenarioTest; HWTEST_F(BarrierScenarioTest, givenBlockedEnqueueBarrierOnOOQWhenUserEventIsUnblockedThenNextEnqueuesAreNotBlocked) { cl_command_queue clCommandQ = nullptr; cl_queue_properties properties[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0}; auto mockCmdQ = clUniquePtr(new MockCommandQueueHw(context, pPlatform->getClDevice(0), properties)); clCommandQ = mockCmdQ.get(); cl_kernel clKernel = kernelInternals->mockMultiDeviceKernel; size_t offset[] = {0, 0, 0}; size_t gws[] = {1, 1, 1}; cl_int retVal = CL_SUCCESS; cl_int success = CL_SUCCESS; UserEvent *userEvent = new UserEvent(context); cl_event eventBlocking = userEvent; retVal = clEnqueueBarrierWithWaitList(clCommandQ, 1, &eventBlocking, nullptr); EXPECT_EQ(success, retVal); EXPECT_EQ(CompletionStamp::notReady, mockCmdQ->taskLevel); EXPECT_NE(nullptr, mockCmdQ->virtualEvent); clSetUserEventStatus(eventBlocking, CL_COMPLETE); userEvent->release(); mockCmdQ->isQueueBlocked(); EXPECT_NE(CompletionStamp::notReady, mockCmdQ->taskLevel); EXPECT_EQ(nullptr, mockCmdQ->virtualEvent); retVal = clEnqueueNDRangeKernel(clCommandQ, clKernel, 1, offset, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(success, retVal); retVal = clFinish(clCommandQ); EXPECT_EQ(success, retVal); } blocked_enqueue_with_callback_scenario_tests.cpp000066400000000000000000000101571422164147700353540ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/scenarios/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/source/event/async_events_handler.h" #include "opencl/source/event/user_event.h" #include "opencl/test/unit_test/fixtures/scenario_test_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "gtest/gtest.h" using namespace NEO; struct CallbackData { cl_kernel kernel; cl_command_queue queue; bool callbackCalled = false; UserEvent *signalCallbackDoneEvent = nullptr; }; void CL_CALLBACK callback(cl_event event, cl_int status, void *data) { CallbackData *callbackData = (CallbackData *)data; size_t offset[] = {0, 0, 0}; size_t gws[] = {1, 1, 1}; clEnqueueNDRangeKernel(callbackData->queue, callbackData->kernel, 1, offset, gws, nullptr, 0, nullptr, nullptr); clFinish(callbackData->queue); callbackData->callbackCalled = true; if (callbackData->signalCallbackDoneEvent) { cl_event callbackEvent = callbackData->signalCallbackDoneEvent; clSetUserEventStatus(callbackEvent, CL_COMPLETE); // No need to reatin and release this synchronization event //clReleaseEvent(callbackEvent); } } TEST_F(ScenarioTest, givenAsyncHandlerDisabledAndUserEventBlockingEnqueueAndOutputEventWithCallbackWhenUserEventIsSetCompleteThenCallbackIsExecuted) { DebugManager.flags.EnableAsyncEventsHandler.set(false); cl_command_queue clCommandQ = nullptr; cl_queue_properties properties = 0; cl_kernel clKernel = kernelInternals->mockMultiDeviceKernel; size_t offset[] = {0, 0, 0}; size_t gws[] = {1, 1, 1}; cl_int retVal = CL_SUCCESS; cl_int success = CL_SUCCESS; UserEvent *userEvent = new UserEvent(context); cl_event eventBlocking = userEvent; cl_event eventOut = nullptr; clCommandQ = clCreateCommandQueue(context, devices[0], properties, &retVal); retVal = clEnqueueNDRangeKernel(clCommandQ, clKernel, 1, offset, gws, nullptr, 1, &eventBlocking, &eventOut); EXPECT_EQ(success, retVal); ASSERT_NE(nullptr, eventOut); CallbackData data; data.kernel = clKernel; data.queue = clCommandQ; data.callbackCalled = false; clSetEventCallback(eventOut, CL_COMPLETE, callback, &data); EXPECT_FALSE(data.callbackCalled); clSetUserEventStatus(eventBlocking, CL_COMPLETE); userEvent->release(); clWaitForEvents(1, &eventOut); EXPECT_TRUE(data.callbackCalled); clReleaseEvent(eventOut); clReleaseCommandQueue(clCommandQ); } TEST_F(ScenarioTest, givenAsyncHandlerEnabledAndUserEventBlockingEnqueueAndOutputEventWithCallbackWhenUserEventIsSetCompleteThenCallbackIsExecuted) { DebugManager.flags.EnableAsyncEventsHandler.set(true); cl_command_queue clCommandQ = nullptr; cl_queue_properties properties = 0; cl_kernel clKernel = kernelInternals->mockMultiDeviceKernel; size_t offset[] = {0, 0, 0}; size_t gws[] = {1, 1, 1}; cl_int retVal = CL_SUCCESS; cl_int success = CL_SUCCESS; UserEvent *userEvent = new UserEvent(context); cl_event eventBlocking = userEvent; cl_event eventOut = nullptr; clCommandQ = clCreateCommandQueue(context, devices[0], properties, &retVal); retVal = clEnqueueNDRangeKernel(clCommandQ, clKernel, 1, offset, gws, nullptr, 1, &eventBlocking, &eventOut); EXPECT_EQ(success, retVal); ASSERT_NE(nullptr, eventOut); CallbackData data; data.kernel = clKernel; data.queue = clCommandQ; data.callbackCalled = false; data.signalCallbackDoneEvent = new UserEvent(context); cl_event callbackEvent = data.signalCallbackDoneEvent; clSetEventCallback(eventOut, CL_COMPLETE, callback, &data); EXPECT_FALSE(data.callbackCalled); clSetUserEventStatus(eventBlocking, CL_COMPLETE); userEvent->release(); clWaitForEvents(1, &eventOut); clWaitForEvents(1, &callbackEvent); EXPECT_TRUE(data.callbackCalled); data.signalCallbackDoneEvent->release(); clReleaseEvent(eventOut); clReleaseCommandQueue(clCommandQ); context->getAsyncEventsHandler().closeThread(); } compute-runtime-22.14.22890/opencl/test/unit_test/scenarios/windows/000077500000000000000000000000001422164147700253075ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/scenarios/windows/CMakeLists.txt000066400000000000000000000005411422164147700300470ustar00rootroot00000000000000# # Copyright (C) 2019-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_scenarios_windows ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_write_buffer_scenarios_windows_tests.cpp ) if(WIN32) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_scenarios_windows}) endif() enqueue_read_write_buffer_scenarios_windows_tests.cpp000066400000000000000000000132411422164147700402040ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/scenarios/windows/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/compiler_hw_info_config.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/os_interface/windows/wddm_device_command_stream.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/os_interface/windows/mock_wddm_memory_manager.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/helpers/cl_execution_environment_helper.h" #include "opencl/test/unit_test/helpers/cl_hw_parse.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" using namespace NEO; struct EnqueueBufferWindowsTest : public ClHardwareParse, public ::testing::Test { EnqueueBufferWindowsTest(void) : buffer(nullptr) { } void SetUp() override { DebugManager.flags.EnableBlitterForEnqueueOperations.set(0); executionEnvironment = getClExecutionEnvironmentImpl(hwInfo, 1); } void TearDown() override { buffer.reset(nullptr); } template void initializeFixture() { EnvironmentWithCsrWrapper environment; environment.setCsrType>(); memoryManager = new MockWddmMemoryManager(*executionEnvironment); executionEnvironment->memoryManager.reset(memoryManager); device = std::make_unique(Device::create(executionEnvironment, rootDeviceIndex)); context = std::make_unique(device.get()); const size_t bufferMisalignment = 1; const size_t bufferSize = 16; bufferMemory = std::make_unique(alignUp(bufferSize + bufferMisalignment, sizeof(uint32_t))); cl_int retVal = 0; buffer.reset(Buffer::create(context.get(), CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, bufferSize, reinterpret_cast(bufferMemory.get()) + bufferMisalignment, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); } protected: DebugManagerStateRestore restore; HardwareInfo hardwareInfo; HardwareInfo *hwInfo = nullptr; ExecutionEnvironment *executionEnvironment; cl_queue_properties properties = {}; std::unique_ptr bufferMemory; std::unique_ptr device; std::unique_ptr context; std::unique_ptr buffer; const uint32_t rootDeviceIndex = 0u; MockWddmMemoryManager *memoryManager = nullptr; }; HWTEST_F(EnqueueBufferWindowsTest, givenMisalignedHostPtrWhenEnqueueReadBufferCalledThenStateBaseAddressAddressIsAlignedAndMatchesKernelDispatchInfoParams) { if (executionEnvironment->memoryManager.get()->isLimitedGPU(0)) { GTEST_SKIP(); } initializeFixture(); const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(defaultHwInfo->platform.eProductFamily); if (compilerHwInfoConfig.isForceToStatelessRequired()) { GTEST_SKIP(); } auto cmdQ = std::make_unique>(context.get(), device.get(), &properties); char *misalignedPtr = reinterpret_cast(device->getMemoryManager()->getAlignedMallocRestrictions()->minAddress + 1); buffer->forceDisallowCPUCopy = true; auto retVal = cmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, 4, misalignedPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(0, cmdQ->lastEnqueuedKernels.size()); Kernel *kernel = cmdQ->lastEnqueuedKernels[0]; auto hostPtrAllocation = cmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()->getTemporaryAllocations().peekHead(); while (hostPtrAllocation != nullptr) { if (hostPtrAllocation->getUnderlyingBuffer() == misalignedPtr) { break; } hostPtrAllocation = hostPtrAllocation->next; } ASSERT_NE(nullptr, hostPtrAllocation); uint64_t gpuVa = hostPtrAllocation->getGpuAddress(); cmdQ->finish(); parseCommands(*cmdQ); auto &kernelInfo = kernel->getKernelInfo(); if (hwInfo->capabilityTable.gpuAddressSpace == MemoryConstants::max48BitAddress) { const auto &surfaceStateDst = getSurfaceState(&cmdQ->getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0), 1); const auto &arg1AsPtr = kernelInfo.getArgDescriptorAt(1).as(); if (arg1AsPtr.pointerSize == sizeof(uint64_t)) { auto pKernelArg = (uint64_t *)(kernel->getCrossThreadData() + arg1AsPtr.stateless); EXPECT_EQ(alignDown(gpuVa, 4), static_cast(*pKernelArg)); EXPECT_EQ(*pKernelArg, surfaceStateDst.getSurfaceBaseAddress()); } else if (arg1AsPtr.pointerSize == sizeof(uint32_t)) { auto pKernelArg = (uint32_t *)(kernel->getCrossThreadData() + arg1AsPtr.stateless); EXPECT_EQ(alignDown(gpuVa, 4), static_cast(*pKernelArg)); EXPECT_EQ(static_cast(*pKernelArg), surfaceStateDst.getSurfaceBaseAddress()); } } auto arg3AsVal = kernelInfo.getArgDescriptorAt(3).as(); EXPECT_EQ(sizeof(uint32_t), arg3AsVal.elements[0].size); auto dstOffset = (uint32_t *)(kernel->getCrossThreadData() + arg3AsVal.elements[0].offset); EXPECT_EQ(ptrDiff(misalignedPtr, alignDown(misalignedPtr, 4)), *dstOffset); } compute-runtime-22.14.22890/opencl/test/unit_test/sharings/000077500000000000000000000000001422164147700234455ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/sharings/CMakeLists.txt000066400000000000000000000005461422164147700262120ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_sharings ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/sharing_factory_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sharing_tests.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_sharings}) add_subdirectories() compute-runtime-22.14.22890/opencl/test/unit_test/sharings/d3d/000077500000000000000000000000001422164147700241175ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/sharings/d3d/CMakeLists.txt000066400000000000000000000004771422164147700266670ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) set(IGDRCL_SRCS_tests_sharings_d3d ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/context_d3d_tests.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_sharings_d3d}) endif() compute-runtime-22.14.22890/opencl/test/unit_test/sharings/d3d/context_d3d_tests.cpp000066400000000000000000000205711422164147700302700ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "opencl/source/os_interface/windows/d3d_sharing_functions.h" #include "opencl/source/sharings/d3d/cl_d3d_api.h" #include "opencl/test/unit_test/api/cl_api_tests.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" using namespace NEO; TEST(D3DContextTest, WhenContextIsCreatedThenSharingIsNotPresent) { MockContext context; EXPECT_EQ(nullptr, context.getSharing>()); EXPECT_EQ(nullptr, context.getSharing>()); EXPECT_EQ(nullptr, context.getSharing>()); } TEST(D3DContextTest, givenDispatchTableThenItContainsValidEntries) { sharingFactory.fillGlobalDispatchTable(); MockContext context; EXPECT_EQ(&clGetDeviceIDsFromDX9INTEL, context.dispatch.crtDispatch->clGetDeviceIDsFromDX9INTEL); EXPECT_EQ(&clCreateFromDX9MediaSurfaceINTEL, context.dispatch.crtDispatch->clCreateFromDX9MediaSurfaceINTEL); EXPECT_EQ(&clEnqueueAcquireDX9ObjectsINTEL, context.dispatch.crtDispatch->clEnqueueAcquireDX9ObjectsINTEL); EXPECT_EQ(&clEnqueueReleaseDX9ObjectsINTEL, context.dispatch.crtDispatch->clEnqueueReleaseDX9ObjectsINTEL); } struct clIntelSharingFormatQueryDX9 : public api_tests { std::vector supportedNonPlanarFormats; std::vector supportedPlanarFormats; std::vector supportedPlane1Formats; std::vector supportedPlane2Formats; std::vector retrievedFormats; cl_uint numImageFormats; void SetUp() override { api_tests::SetUp(); supportedNonPlanarFormats = {D3DFMT_R32F, D3DFMT_R16F, D3DFMT_L16, D3DFMT_A8, D3DFMT_L8, D3DFMT_G32R32F, D3DFMT_G16R16F, D3DFMT_G16R16, D3DFMT_A8L8, D3DFMT_A32B32G32R32F, D3DFMT_A16B16G16R16F, D3DFMT_A16B16G16R16, D3DFMT_A8B8G8R8, D3DFMT_X8B8G8R8, D3DFMT_A8R8G8B8, D3DFMT_X8R8G8B8}; supportedPlanarFormats = {D3DFMT_YUY2, D3DFMT_UYVY, static_cast(MAKEFOURCC('N', 'V', '1', '2')), static_cast(MAKEFOURCC('Y', 'V', '1', '2')), static_cast(MAKEFOURCC('Y', 'V', 'Y', 'U')), static_cast(MAKEFOURCC('V', 'Y', 'U', 'Y'))}; supportedPlane1Formats = {static_cast(MAKEFOURCC('N', 'V', '1', '2')), static_cast(MAKEFOURCC('Y', 'V', '1', '2'))}; supportedPlane2Formats = {static_cast(MAKEFOURCC('Y', 'V', '1', '2'))}; retrievedFormats.assign(supportedNonPlanarFormats.size() + supportedPlanarFormats.size(), D3DFMT_UNKNOWN); } void TearDown() override { api_tests::TearDown(); } }; namespace ULT { TEST_F(clIntelSharingFormatQueryDX9, givenInvalidContextWhenMediaSurfaceFormatsRequestedThenInvalidContextError) { retVal = clGetSupportedDX9MediaSurfaceFormatsINTEL(nullptr, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, static_cast(retrievedFormats.size()), &retrievedFormats[0], &numImageFormats); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } TEST_F(clIntelSharingFormatQueryDX9, givenInvalidFlagsWhenMediaSurfaceFormatsRequestedThenInvalidValueError) { retVal = clGetSupportedDX9MediaSurfaceFormatsINTEL( pContext, 0, CL_MEM_OBJECT_IMAGE2D, 0, static_cast(retrievedFormats.size()), &retrievedFormats[0], &numImageFormats); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clIntelSharingFormatQueryDX9, givenInvalidImageTypeWhenMediaSurfaceFormatsRequestedThenInvalidValueError) { retVal = clGetSupportedDX9MediaSurfaceFormatsINTEL(pContext, CL_MEM_READ_WRITE, 0, 0, static_cast(retrievedFormats.size()), &retrievedFormats[0], &numImageFormats); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clIntelSharingFormatQueryDX9, givenValidParametersWhenRequestedMediaSurfaceFormatsBelowMaximumThenExceedingFormatAreaRemainsUntouched) { for (cl_uint i = 0; i <= static_cast(retrievedFormats.size()); ++i) { retVal = clGetSupportedDX9MediaSurfaceFormatsINTEL(pContext, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, i, &retrievedFormats[0], &numImageFormats); EXPECT_EQ(retVal, CL_SUCCESS); for (cl_uint j = i; j < retrievedFormats.size(); ++j) { EXPECT_EQ(retrievedFormats[j], D3DFMT_UNKNOWN); } } } TEST_F(clIntelSharingFormatQueryDX9, givenValidParametersWhenRequestingMediaSurfaceFormatsForPlane0ThenAllKnownFormatsAreIncludedInTheResult) { retVal = clGetSupportedDX9MediaSurfaceFormatsINTEL(pContext, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, static_cast(retrievedFormats.size()), &retrievedFormats[0], &numImageFormats); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(supportedNonPlanarFormats.size() + supportedPlanarFormats.size(), numImageFormats); for (auto format : supportedNonPlanarFormats) { auto found = std::find(retrievedFormats.begin(), retrievedFormats.end(), format); EXPECT_NE(found, retrievedFormats.end()); } for (auto format : supportedPlanarFormats) { auto found = std::find(retrievedFormats.begin(), retrievedFormats.end(), format); EXPECT_NE(found, retrievedFormats.end()); } } TEST_F(clIntelSharingFormatQueryDX9, givenValidParametersWhenRequestingMediaSurfaceFormatsForPlane1ThenOnlyPlanarFormatsAreIncludedInTheResult) { retVal = clGetSupportedDX9MediaSurfaceFormatsINTEL(pContext, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 1, static_cast(retrievedFormats.size()), &retrievedFormats[0], &numImageFormats); ASSERT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(supportedPlane1Formats.size(), numImageFormats); for (auto format : supportedPlane1Formats) { auto found = std::find(retrievedFormats.begin(), retrievedFormats.end(), format); EXPECT_NE(found, retrievedFormats.end()); } for (auto format : supportedNonPlanarFormats) { auto found = std::find(retrievedFormats.begin(), retrievedFormats.end(), format); EXPECT_EQ(found, retrievedFormats.end()); } } TEST_F(clIntelSharingFormatQueryDX9, givenValidParametersWhenRequestingMediaSurfaceFormatsForPlane2ThenOnlyYV12FormatIsIncludedInTheResult) { retVal = clGetSupportedDX9MediaSurfaceFormatsINTEL(pContext, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 2, static_cast(retrievedFormats.size()), &retrievedFormats[0], &numImageFormats); ASSERT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(supportedPlane2Formats.size(), numImageFormats); for (auto format : supportedPlane2Formats) { auto found = std::find(retrievedFormats.begin(), retrievedFormats.end(), format); EXPECT_NE(found, retrievedFormats.end()); } for (auto format : supportedNonPlanarFormats) { auto found = std::find(retrievedFormats.begin(), retrievedFormats.end(), format); EXPECT_EQ(found, retrievedFormats.end()); } } TEST_F(clIntelSharingFormatQueryDX9, givenValidParametersWhenRequestingMediaSurfaceFormatsForPlaneGraterThan2ThenZeroNumFormatsIsReturned) { retVal = clGetSupportedDX9MediaSurfaceFormatsINTEL(pContext, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 3, 0, nullptr, &numImageFormats); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(0u, numImageFormats); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/sharings/gl/000077500000000000000000000000001422164147700240475ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/sharings/gl/CMakeLists.txt000066400000000000000000000004601422164147700266070ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_sharings_gl ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/gl_dll_helper.h ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_sharings_gl}) add_subdirectories() compute-runtime-22.14.22890/opencl/test/unit_test/sharings/gl/gl_dll_helper.h000066400000000000000000000066501422164147700270230ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/debug_helpers.h" #include "shared/source/os_interface/os_library.h" #include "opencl/extensions/public/cl_gl_private_intel.h" #include "GL/gl.h" #include namespace Os { extern const char *openglDllName; } namespace NEO { struct GLMockReturnedValues; using GLString = void (*)(const char *, unsigned int); using GLSharedOCLContext = void (*)(GLboolean); using glBoolean = GLboolean (*)(); using Void = void (*)(const char *); using Int = int (*)(const char *); using BufferParam = void (*)(CL_GL_BUFFER_INFO); using TextureParam = void (*)(CL_GL_RESOURCE_INFO); using BuffInfo = CL_GL_BUFFER_INFO (*)(); using TextureInfo = CL_GL_RESOURCE_INFO (*)(); using GLMockValue = GLMockReturnedValues (*)(); using setGLMockValue = void (*)(GLMockReturnedValues); struct GlDllHelper { public: GlDllHelper() { glDllLoad.reset(OsLibrary::load(Os::openglDllName)); if (glDllLoad) { glSetString = (*glDllLoad)["glSetString"]; UNRECOVERABLE_IF(glSetString == nullptr); glSetStringi = (*glDllLoad)["glSetStringi"]; UNRECOVERABLE_IF(glSetStringi == nullptr); setGLSetSharedOCLContextStateReturnedValue = (*glDllLoad)["setGLSetSharedOCLContextStateReturnedValue"]; UNRECOVERABLE_IF(setGLSetSharedOCLContextStateReturnedValue == nullptr); getGLSetSharedOCLContextStateReturnedValue = (*glDllLoad)["getGLSetSharedOCLContextStateReturnedValue"]; UNRECOVERABLE_IF(getGLSetSharedOCLContextStateReturnedValue == nullptr); resetParam = (*glDllLoad)["resetParam"]; UNRECOVERABLE_IF(resetParam == nullptr); getParam = (*glDllLoad)["getParam"]; UNRECOVERABLE_IF(getParam == nullptr); loadBuffer = (*glDllLoad)["loadBuffer"]; UNRECOVERABLE_IF(loadBuffer == nullptr); getBufferInfo = (*glDllLoad)["getBufferInfo"]; UNRECOVERABLE_IF(getBufferInfo == nullptr); getTextureInfo = (*glDllLoad)["getTextureInfo"]; UNRECOVERABLE_IF(getTextureInfo == nullptr); Void memParam = (*glDllLoad)["memParam"]; UNRECOVERABLE_IF(memParam == nullptr); loadTexture = (*glDllLoad)["loadTexture"]; UNRECOVERABLE_IF(loadTexture == nullptr); getGlMockReturnedValues = (*glDllLoad)["getGlMockReturnedValues"]; UNRECOVERABLE_IF(getGlMockReturnedValues == nullptr); setGlMockReturnedValues = (*glDllLoad)["setGlMockReturnedValues"]; UNRECOVERABLE_IF(setGlMockReturnedValues == nullptr); } } ~GlDllHelper() { if (glDllLoad) { glSetString("Intel", GL_VENDOR); glSetString("4.0", GL_VERSION); glSetStringi("GL_OES_framebuffer_object", 0); glSetStringi("GL_EXT_framebuffer_object", 1); } } GLString glSetString; GLString glSetStringi; GLSharedOCLContext setGLSetSharedOCLContextStateReturnedValue; glBoolean getGLSetSharedOCLContextStateReturnedValue; Void resetParam; Int getParam; BufferParam loadBuffer; BuffInfo getBufferInfo; TextureInfo getTextureInfo; TextureParam loadTexture; GLMockValue getGlMockReturnedValues; setGLMockValue setGlMockReturnedValues; private: std::unique_ptr glDllLoad; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/sharings/gl/windows/000077500000000000000000000000001422164147700255415ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/sharings/gl/windows/CMakeLists.txt000066400000000000000000000014451422164147700303050ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) set(IGDRCL_SRCS_tests_sharings_gl_windows ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/gl_arb_sync_event_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gl_create_from_texture_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gl_library_name.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gl_os_sharing_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gl_reused_buffers_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gl_sharing_enable_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gl_sharing_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gl_texture_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gl_types_tests.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_sharings_gl_windows}) endif() compute-runtime-22.14.22890/opencl/test/unit_test/sharings/gl/windows/gl_arb_sync_event_tests.cpp000066400000000000000000000352621422164147700331620ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/os_interface/os_interface.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/context/context.h" #include "opencl/source/event/user_event.h" #include "opencl/source/platform/platform.h" #include "opencl/source/sharings/gl/gl_arb_sync_event.h" #include "opencl/source/sharings/sharing.h" #include "opencl/test/unit_test/mocks/gl/windows/mock_gl_arb_sync_event_windows.h" #include "opencl/test/unit_test/mocks/gl/windows/mock_gl_sharing_windows.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "opencl/test/unit_test/mocks/mock_platform.h" using namespace NEO; TEST(GlArbSyncEvent, whenCreateArbSyncEventNameIsCalledMultipleTimesThenEachCallReturnsUniqueName) { char *name1 = NEO::createArbSyncEventName(); EXPECT_NE(nullptr, name1); EXPECT_STRNE("", name1); char *name2 = NEO::createArbSyncEventName(); EXPECT_NE(nullptr, name2); EXPECT_STRNE("", name2); char *name3 = NEO::createArbSyncEventName(); EXPECT_NE(nullptr, name3); EXPECT_STRNE("", name3); EXPECT_STRNE(name1, name2); EXPECT_STRNE(name1, name3); EXPECT_STRNE(name2, name3); NEO::destroyArbSyncEventName(name1); NEO::destroyArbSyncEventName(name2); NEO::destroyArbSyncEventName(name3); } template inline void glArbSyncObjectWaitServerMock(NEO::OSInterface &osInterface, CL_GL_SYNC_INFO &glSyncInfo) { glSyncInfo.waitCalled = SignalWaited; } struct MockBaseEvent : Event { using Event::Event; bool wasUpdated = false; void updateExecutionStatus() override { Event::updateExecutionStatus(); wasUpdated = true; } }; struct GlArbSyncEventTest : public ::testing::Test { GlArbSyncEventTest(void) { } void SetUp() override { executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->memoryManager = std::make_unique(*executionEnvironment); device = std::make_unique(MockDevice::create(executionEnvironment, 0u)); auto mockCsr = new MockCommandStreamReceiver(*executionEnvironment, 0, device->getDeviceBitfield()); device->resetCommandStreamReceiver(mockCsr); ctx.reset(new MockContext); cmdQ.reset(new MockCommandQueue(ctx.get(), device.get(), nullptr, false)); sharing = new GlSharingFunctionsMock(); ctx->setSharingFunctions(sharing); sharing->pfnGlArbSyncObjectCleanup = glArbSyncObjectCleanupMockDoNothing; sharing->pfnGlArbSyncObjectSetup = mockGlArbSyncObjectSetup; sharing->pfnGlArbSyncObjectSignal = glArbSyncObjectSignalMockDoNothing; sharing->pfnGlArbSyncObjectWaitServer = glArbSyncObjectWaitServerMock; osInterface = new OSInterface; executionEnvironment->rootDeviceEnvironments[0]->osInterface.reset(osInterface); } void TearDown() override { if (baseEvent) { triggerEvent->setStatus(-1); baseEvent->release(); triggerEvent->release(); } } template T *createArbEventMock() { T *ret = new T(*ctx); ret->osInterface = osInterface; ret->baseEvent = getBaseEvent(); baseEvent->incRefInternal(); baseEvent->addChild(*ret); return ret; } MockBaseEvent *getBaseEvent() { if (baseEvent == nullptr) { triggerEvent = new UserEvent(ctx.get()); baseEvent = new MockBaseEvent(cmdQ.get(), CL_COMMAND_RELEASE_GL_OBJECTS, CompletionStamp::notReady, CompletionStamp::notReady); triggerEvent->addChild(*baseEvent); } return baseEvent; } void failSyncObjectCreation() { sharing->pfnGlArbSyncObjectSetup = mockGlArbSyncObjectSetup; } void setWaitCalledFlagOnServerWait() { sharing->pfnGlArbSyncObjectWaitServer = glArbSyncObjectWaitServerMock; } std::unique_ptr device; std::unique_ptr ctx; std::unique_ptr cmdQ; OSInterface *osInterface = nullptr; Event *triggerEvent = nullptr; MockBaseEvent *baseEvent = nullptr; GlSharingFunctionsMock *sharing = nullptr; ExecutionEnvironment *executionEnvironment = nullptr; }; TEST_F(GlArbSyncEventTest, whenGlArbEventIsCreatedThenBaseEventObjectIsConstructedWithProperContextAndCommandType) { auto *syncEv = createArbEventMock>(); EXPECT_EQ(static_cast(CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR), syncEv->getCommandType()); EXPECT_EQ(ctx.get(), syncEv->getContext()); EXPECT_NE(nullptr, syncEv->glSyncInfo); syncEv->release(); } TEST_F(GlArbSyncEventTest, whenGetSyncInfoisCalledThenEventsSyncInfoIsReturned) { auto *syncEv = createArbEventMock>(); EXPECT_NE(nullptr, syncEv->glSyncInfo); EXPECT_EQ(syncEv->glSyncInfo.get(), syncEv->getSyncInfo()); syncEv->release(); } TEST_F(GlArbSyncEventTest, whenSetBaseEventIsCalledThenProperMembersOfParentEventAreCopiedToSyncEventAndReferenceCountersAreUpdated) { ASSERT_NE(nullptr, getBaseEvent()->getCommandQueue()); EXPECT_EQ(2, getBaseEvent()->getRefInternalCount()); EXPECT_EQ(2, getBaseEvent()->getCommandQueue()->getRefInternalCount()); EXPECT_FALSE(getBaseEvent()->peekHasChildEvents()); auto *syncEv = new DummyArbEvent(*ctx); EXPECT_EQ(nullptr, syncEv->baseEvent); EXPECT_EQ(nullptr, syncEv->osInterface); EXPECT_EQ(nullptr, syncEv->getCommandQueue()); syncEv->useBaseSetEvent = true; bool ret = syncEv->setBaseEvent(*getBaseEvent()); EXPECT_TRUE(ret); EXPECT_TRUE(getBaseEvent()->peekHasChildEvents()); EXPECT_EQ(getBaseEvent(), syncEv->baseEvent); EXPECT_EQ(getBaseEvent()->getCommandQueue(), syncEv->getCommandQueue()); EXPECT_EQ(syncEv->getCommandQueue()->getGpgpuCommandStreamReceiver().getOSInterface(), syncEv->osInterface); EXPECT_EQ(3, getBaseEvent()->getRefInternalCount()); EXPECT_EQ(3, getBaseEvent()->getCommandQueue()->getRefInternalCount()); EXPECT_TRUE(getBaseEvent()->peekHasChildEvents()); syncEv->release(); } TEST_F(GlArbSyncEventTest, whenSetBaseEventIsCalledButGlArbSyncObjectCreationFailsThenOperationIsAborted) { ASSERT_NE(nullptr, getBaseEvent()->getCommandQueue()); EXPECT_EQ(2, getBaseEvent()->getRefInternalCount()); EXPECT_EQ(2, getBaseEvent()->getCommandQueue()->getRefInternalCount()); EXPECT_FALSE(getBaseEvent()->peekHasChildEvents()); auto *syncEv = new DummyArbEvent(*ctx); EXPECT_EQ(nullptr, syncEv->baseEvent); EXPECT_EQ(nullptr, syncEv->osInterface); EXPECT_EQ(nullptr, syncEv->getCommandQueue()); syncEv->useBaseSetEvent = true; failSyncObjectCreation(); bool ret = syncEv->setBaseEvent(*getBaseEvent()); EXPECT_FALSE(ret); EXPECT_EQ(2, getBaseEvent()->getRefInternalCount()); EXPECT_EQ(2, getBaseEvent()->getCommandQueue()->getRefInternalCount()); EXPECT_FALSE(getBaseEvent()->peekHasChildEvents()); EXPECT_EQ(nullptr, syncEv->baseEvent); EXPECT_EQ(nullptr, syncEv->osInterface); EXPECT_EQ(nullptr, syncEv->getCommandQueue()); syncEv->osInterface = this->osInterface; syncEv->baseEvent = getBaseEvent(); getBaseEvent()->incRefInternal(); syncEv->release(); } TEST_F(GlArbSyncEventTest, whenGlArbSyncEventGetsUnblockedByTerminatedBaseEventThenSyncObjectDoesntGetSignalled) { auto *syncEv = createArbEventMock>(); triggerEvent->setStatus(-1); EXPECT_FALSE(syncEv->getSyncInfo()->waitCalled); syncEv->release(); } TEST_F(GlArbSyncEventTest, whenGlArbSyncEventGetsUnblockedByQueuedBaseEventThenSyncObjectDoesntGetSignalled) { auto *syncEv = createArbEventMock>(); syncEv->unblockEventBy(*this->baseEvent, 0, CL_QUEUED); EXPECT_FALSE(syncEv->getSyncInfo()->waitCalled); syncEv->release(); } TEST_F(GlArbSyncEventTest, whenGlArbSyncEventGetsUnblockedBySubmittedOrCompletedEventThenSyncObjectGetsSignalled) { setWaitCalledFlagOnServerWait(); auto *syncEv = createArbEventMock>(); triggerEvent->setStatus(CL_COMPLETE); EXPECT_TRUE(syncEv->getSyncInfo()->waitCalled); syncEv->release(); } TEST_F(GlArbSyncEventTest, whenGlArbSyncEventIsCreatedFromBaseEventWithoutValidContextThenCreationFails) { Event *baseEvent = new Event(nullptr, CL_COMMAND_RELEASE_GL_OBJECTS, CompletionStamp::notReady, CompletionStamp::notReady); auto *arbEvent = GlArbSyncEvent::create(*baseEvent); EXPECT_EQ(nullptr, arbEvent); baseEvent->release(); } TEST_F(GlArbSyncEventTest, whenGlArbSyncEventIsCreatedAndSetEventFailsThenCreationFails) { failSyncObjectCreation(); auto *arbEvent = GlArbSyncEvent::create(*this->getBaseEvent()); EXPECT_EQ(nullptr, arbEvent); } TEST_F(GlArbSyncEventTest, whenGlArbSyncEventIsCreatedThenBaseEventIsProperlySet) { auto *arbEvent = GlArbSyncEvent::create(*this->getBaseEvent()); EXPECT_NE(nullptr, arbEvent); EXPECT_TRUE(this->baseEvent->peekHasChildEvents()); EXPECT_EQ(arbEvent, this->baseEvent->peekChildEvents()->ref); arbEvent->release(); } TEST_F(GlArbSyncEventTest, whenClEnqueueMarkerWithSyncObjectINTELIsCalledThenInvalidOperationErrorCodeIsReturned) { cl_command_queue queue = static_cast(this->cmdQ.get()); auto ret = clEnqueueMarkerWithSyncObjectINTEL(queue, nullptr, nullptr); EXPECT_EQ(CL_INVALID_OPERATION, ret); } TEST_F(GlArbSyncEventTest, whenClGetCLObjectInfoINTELIsCalledThenInvalidOperationErrorCodeIsReturned) { cl_mem mem = {}; auto ret = clGetCLObjectInfoINTEL(mem, nullptr); EXPECT_EQ(CL_INVALID_OPERATION, ret); } TEST_F(GlArbSyncEventTest, givenNullSynInfoParameterWhenClGetCLEventInfoINTELIsCalledThenInvalidArgValueErrorCodeIsReturned) { cl_event ev = getBaseEvent(); cl_context ctxRet = {}; auto ret = clGetCLEventInfoINTEL(ev, nullptr, &ctxRet); EXPECT_EQ(CL_INVALID_ARG_VALUE, ret); } TEST_F(GlArbSyncEventTest, givenNullContextParameterWhenClGetCLEventInfoINTELIsCalledThenInvalidArgValueErrorCodeIsReturned) { cl_event ev = getBaseEvent(); CL_GL_SYNC_INFO *synInfoRet = nullptr; auto ret = clGetCLEventInfoINTEL(ev, &synInfoRet, nullptr); EXPECT_EQ(CL_INVALID_ARG_VALUE, ret); } TEST_F(GlArbSyncEventTest, givenUnknownEventWhenclGetCLEventInfoINTELIsCalledThenInvalidEventErrorCodeIsReturned) { auto deadEvent = new MockEvent(nullptr, 0, 0, 0); deadEvent->magic = Event::deadMagic; cl_event unknownEvent = deadEvent; CL_GL_SYNC_INFO *synInfoRet = nullptr; cl_context ctxRet = {}; auto ret = clGetCLEventInfoINTEL(unknownEvent, &synInfoRet, &ctxRet); EXPECT_EQ(CL_INVALID_EVENT, ret); deadEvent->release(); } TEST_F(GlArbSyncEventTest, givenEventWithCommandDifferentThanReleaseGlObjectsWhenClGetCLEventInfoINTELIsCalledThenValidContextIsReturned) { getBaseEvent(); cl_event ev = triggerEvent; CL_GL_SYNC_INFO *synInfoRet = reinterpret_cast(static_cast(0xFF)); cl_context ctxRet = {}; auto ret = clGetCLEventInfoINTEL(ev, &synInfoRet, &ctxRet); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_EQ(nullptr, synInfoRet); EXPECT_EQ(ctxRet, ctx.get()); } TEST_F(GlArbSyncEventTest, givenDisabledSharingWhenClGetCLEventInfoINTELIsCalledThenInvalidOperationErrorCodeIsReturned) { getBaseEvent(); cl_event ev = baseEvent; CL_GL_SYNC_INFO *synInfoRet = reinterpret_cast(static_cast(0xFF)); cl_context ctxRet = {}; auto sharing = ctx->getSharing(); ctx->sharingFunctions[sharing->getId()] = nullptr; auto ret = clGetCLEventInfoINTEL(ev, &synInfoRet, &ctxRet); ctx->setSharingFunctions(new GlSharingFunctionsMock()); EXPECT_EQ(CL_INVALID_OPERATION, ret); } TEST_F(GlArbSyncEventTest, givenCallToClGetCLEventInfoINTELWhenGetOrCreateGlArbSyncFailsThenOutOfMemoryErrorCodeIsReturned) { getBaseEvent(); cl_event ev = this->baseEvent; CL_GL_SYNC_INFO *synInfoRet = reinterpret_cast(static_cast(0xFF)); cl_context ctxRet = {}; sharing->pfnGlArbSyncObjectSetup = mockGlArbSyncObjectSetup; auto ret = clGetCLEventInfoINTEL(ev, &synInfoRet, &ctxRet); EXPECT_EQ(CL_OUT_OF_RESOURCES, ret); } TEST_F(GlArbSyncEventTest, givenCallToClGetCLEventInfoINTELWhenFunctionSucceedsThenEventsGetUpdatedAndValidContextAndSyncInfoAreReturned) { auto *arbEvent = GlArbSyncEvent::create(*this->getBaseEvent()); this->sharing->glArbEventMapping[this->baseEvent] = arbEvent; cl_event ev = this->baseEvent; CL_GL_SYNC_INFO *synInfoRet = reinterpret_cast(static_cast(0xFF)); cl_context ctxRet = {}; EXPECT_FALSE(this->baseEvent->wasUpdated); auto ret = clGetCLEventInfoINTEL(ev, &synInfoRet, &ctxRet); EXPECT_TRUE(this->baseEvent->wasUpdated); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_EQ(ctx.get(), ctxRet); EXPECT_EQ(arbEvent->getSyncInfo(), synInfoRet); arbEvent->release(); } TEST_F(GlArbSyncEventTest, givenUnknownEventWhenClReleaseGlSharedEventINTELIsCalledThenInvalidEventErrorCodeIsReturned) { auto deadEvent = new MockEvent(nullptr, 0, 0, 0); deadEvent->magic = Event::deadMagic; cl_event unknownEvent = deadEvent; auto ret = clReleaseGlSharedEventINTEL(unknownEvent); EXPECT_EQ(CL_INVALID_EVENT, ret); deadEvent->release(); } TEST_F(GlArbSyncEventTest, givenEventWithoutArbSyncWhenClReleaseGlSharedEventINTELIsCalledThenThisEventsRefcountIsDecreased) { this->getBaseEvent(); triggerEvent->retain(); EXPECT_EQ(2, triggerEvent->getRefInternalCount()); cl_event ev = triggerEvent; auto ret = clReleaseGlSharedEventINTEL(ev); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_EQ(1, triggerEvent->getRefInternalCount()); } TEST_F(GlArbSyncEventTest, givenEventWithArbSyncWhenClReleaseGlSharedEventINTELIsCalledThenThisEventsAndArbSyncsRefcountsAreDecreased) { auto *arbEvent = GlArbSyncEvent::create(*this->getBaseEvent()); baseEvent->retain(); arbEvent->retain(); this->sharing->glArbEventMapping[baseEvent] = arbEvent; EXPECT_EQ(4, baseEvent->getRefInternalCount()); EXPECT_EQ(3, arbEvent->getRefInternalCount()); cl_event ev = baseEvent; auto ret = clReleaseGlSharedEventINTEL(ev); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_EQ(3, baseEvent->getRefInternalCount()); EXPECT_EQ(2, arbEvent->getRefInternalCount()); arbEvent->release(); } gl_create_from_texture_tests.cpp000066400000000000000000000345651422164147700341550ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/sharings/gl/windows/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/get_info.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/mocks/mock_gmm.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/helpers/gmm_types_converter.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/sharings/gl/gl_texture.h" #include "opencl/test/unit_test/mocks/gl/windows/mock_gl_sharing_windows.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" namespace NEO { class CreateFromGlTexture : public ::testing::Test { public: // temp solution - we need to query size from GMM: class TempMM : public OsAgnosticMemoryManager { public: TempMM() : OsAgnosticMemoryManager(*(new MockExecutionEnvironment(defaultHwInfo.get()))) { mockExecutionEnvironment.reset(&executionEnvironment); } GraphicsAllocation *createGraphicsAllocationFromSharedHandle(osHandle handle, const AllocationProperties &properties, bool requireSpecificBitness, bool isHostIpcAllocation) override { auto alloc = OsAgnosticMemoryManager::createGraphicsAllocationFromSharedHandle(handle, properties, requireSpecificBitness, isHostIpcAllocation); if (handle == CreateFromGlTexture::mcsHandle) { alloc->setDefaultGmm(forceMcsGmm); } else { alloc->setDefaultGmm(forceGmm); } return alloc; } size_t forceAllocationSize; Gmm *forceGmm = nullptr; Gmm *forceMcsGmm = nullptr; std::unique_ptr mockExecutionEnvironment; }; void SetUp() override { imgDesc = {}; imgInfo = {}; clContext.setSharingFunctions(glSharing->sharingFunctions.release()); clContext.memoryManager = &tempMM; } void TearDown() override { gmm.release(); mcsGmm.release(); } void updateImgInfoAndForceGmm() { imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); gmm = MockGmm::queryImgParams(clContext.getDevice(0)->getGmmClientContext(), imgInfo, false); tempMM.forceAllocationSize = imgInfo.size; tempMM.forceGmm = gmm.get(); if (glSharing->m_textureInfoOutput.globalShareHandleMCS != 0) { ImageDescriptor mcsImgDesc = {}; mcsImgDesc.imageHeight = 128; mcsImgDesc.imageRowPitch = 256; mcsImgDesc.imageWidth = 128; mcsImgDesc.imageType = ImageType::Image2D; auto mcsImgInfo = MockGmm::initImgInfo(mcsImgDesc, 0, nullptr); mcsGmm = MockGmm::queryImgParams(clContext.getDevice(0)->getGmmClientContext(), mcsImgInfo, false); tempMM.forceMcsGmm = mcsGmm.get(); } } ImageDescriptor imgDesc; ImageInfo imgInfo = {}; std::unique_ptr gmm; std::unique_ptr mcsGmm; TempMM tempMM; MockContext clContext; std::unique_ptr glSharing = std::make_unique(); cl_int retVal; static const unsigned int mcsHandle = 0xFF; }; class CreateFromGlTextureTestsWithParams : public CreateFromGlTexture, public ::testing::WithParamInterface { }; class CreateFromGlTextureTests : public CreateFromGlTexture { }; INSTANTIATE_TEST_CASE_P( CreateFromGlTextureTestsWithParams, CreateFromGlTextureTestsWithParams, testing::ValuesIn(glTextureTargets::supportedTargets)); TEST_P(CreateFromGlTextureTestsWithParams, givenAllTextureSpecificParamsWhenCreateIsCalledThenFillImageDescription) { unsigned int target = GetParam(); unsigned int baseTarget = GlTexture::getBaseTargetType(target); imgDesc.imageType = Image::convertType(GlTexture::getClMemObjectType(target)); imgDesc.imageWidth = 5; if (target == GL_TEXTURE_1D_ARRAY || target == GL_TEXTURE_2D_ARRAY || target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) { imgDesc.imageArraySize = 5; } if (target == GL_TEXTURE_2D || target == GL_TEXTURE_RECTANGLE || target == GL_TEXTURE_2D_ARRAY || target == GL_TEXTURE_3D || target == GL_RENDERBUFFER_EXT || baseTarget == GL_TEXTURE_CUBE_MAP_ARB || target == GL_TEXTURE_2D_MULTISAMPLE || target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) { imgDesc.imageHeight = 5; } if (target == GL_TEXTURE_3D) { imgDesc.imageDepth = 5; } if (target == GL_TEXTURE_BUFFER) { // size and width for texture buffer are queried from textureInfo - not from gmm glSharing->m_textureInfoOutput.textureBufferWidth = 64; glSharing->m_textureInfoOutput.textureBufferSize = 1024; glSharing->uploadDataToTextureInfo(); } if (target == GL_TEXTURE_2D_MULTISAMPLE || target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) { imgDesc.numSamples = 16; glSharing->m_textureInfoOutput.numberOfSamples = 16; glSharing->m_textureInfoOutput.globalShareHandleMCS = CreateFromGlTexture::mcsHandle; glSharing->uploadDataToTextureInfo(); } updateImgInfoAndForceGmm(); auto glImage = GlTexture::createSharedGlTexture(&clContext, (cl_mem_flags)0, target, 0, 0, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); if (target == GL_RENDERBUFFER_EXT) { EXPECT_EQ(1, glSharing->dllParam->getParam("GLAcquireSharedRenderBufferCalled")); } else { EXPECT_EQ(1, glSharing->dllParam->getParam("GLAcquireSharedTextureCalled")); } EXPECT_EQ(GmmTypesConverter::getCubeFaceIndex(target), glImage->getCubeFaceIndex()); auto glTexture = reinterpret_cast(glImage->peekSharingHandler()); EXPECT_EQ(glTexture->getTarget(), target); EXPECT_EQ(glImage->getImageDesc().image_type, Image::convertType(imgDesc.imageType)); if (target == GL_TEXTURE_BUFFER) { EXPECT_EQ(glImage->getImageDesc().image_width, static_cast(glTexture->getTextureInfo()->textureBufferWidth)); EXPECT_EQ(glImage->getImageDesc().image_row_pitch, static_cast(glTexture->getTextureInfo()->textureBufferSize)); } else { EXPECT_EQ(glImage->getImageDesc().image_width, gmm->gmmResourceInfo->getBaseWidth()); size_t slicePitch = glImage->getHostPtrSlicePitch(); size_t rowPitch = glImage->getHostPtrRowPitch(); EXPECT_EQ(glImage->getImageDesc().image_row_pitch, rowPitch); EXPECT_EQ(glImage->getImageDesc().image_slice_pitch, slicePitch); size_t gmmRowPitch = gmm->gmmResourceInfo->getRenderPitch(); if (gmmRowPitch == 0) { size_t alignedWidth = alignUp(glImage->getImageDesc().image_width, gmm->gmmResourceInfo->getHAlign()); size_t bpp = gmm->gmmResourceInfo->getBitsPerPixel() >> 3; EXPECT_EQ(glImage->getImageDesc().image_row_pitch, alignedWidth * bpp); } else { EXPECT_EQ(glImage->getImageDesc().image_row_pitch, gmmRowPitch); } size_t ImageInfoRowPitch = 0; retVal = clGetImageInfo(glImage, CL_IMAGE_ROW_PITCH, sizeof(size_t), &ImageInfoRowPitch, NULL); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(rowPitch, ImageInfoRowPitch); size_t ImageInfoSlicePitch = 0; slicePitch *= !(glImage->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE2D || glImage->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D || glImage->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER); retVal = clGetImageInfo(glImage, CL_IMAGE_SLICE_PITCH, sizeof(size_t), &ImageInfoSlicePitch, NULL); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(slicePitch, ImageInfoSlicePitch); } EXPECT_EQ(glImage->getImageDesc().image_height, gmm->gmmResourceInfo->getBaseHeight()); EXPECT_EQ(glImage->getImageDesc().image_array_size, gmm->gmmResourceInfo->getArraySize()); if (target == GL_TEXTURE_3D) { EXPECT_EQ(glImage->getImageDesc().image_depth, gmm->gmmResourceInfo->getBaseDepth()); } else { EXPECT_EQ(glImage->getImageDesc().image_depth, 0u); } if (imgDesc.imageArraySize > 1 || imgDesc.imageDepth > 1) { GMM_REQ_OFFSET_INFO GMMReqInfo = {}; GMMReqInfo.ArrayIndex = imgDesc.imageArraySize > 1 ? 1 : 0; GMMReqInfo.Slice = imgDesc.imageDepth > 1 ? 1 : 0; GMMReqInfo.ReqLock = 1; gmm->gmmResourceInfo->getOffset(GMMReqInfo); size_t expectedSlicePitch = GMMReqInfo.Lock.Offset; EXPECT_EQ(glImage->getImageDesc().image_slice_pitch, expectedSlicePitch); } else { EXPECT_EQ(glImage->getImageDesc().image_slice_pitch, imgInfo.size); } EXPECT_EQ(glImage->getQPitch(), gmm->queryQPitch(gmm->gmmResourceInfo->getResourceType())); // gmm returns 1 by default - OCL requires 0 uint32_t numSamples = static_cast(gmm->gmmResourceInfo->getNumSamples()); auto expectedNumSamples = getValidParam(numSamples, 0u, 1u); EXPECT_EQ(expectedNumSamples, glImage->getImageDesc().num_samples); if (target == GL_TEXTURE_2D_MULTISAMPLE || target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) { EXPECT_NE(nullptr, glImage->getMcsAllocation()); EXPECT_EQ(getValidParam(static_cast(mcsGmm->gmmResourceInfo->getRenderPitch() / 128)), glImage->getMcsSurfaceInfo().pitch); EXPECT_EQ(static_cast(mcsGmm->gmmResourceInfo->getQPitch()), glImage->getMcsSurfaceInfo().qPitch); EXPECT_EQ(GmmTypesConverter::getRenderMultisamplesCount(static_cast(gmm->gmmResourceInfo->getNumSamples())), glImage->getMcsSurfaceInfo().multisampleCount); } delete glImage; } TEST_P(CreateFromGlTextureTestsWithParams, givenArrayTextureTargetAndArraySizeEqualOneWhenCreateIsCalledThenSlicePitchAndSizeAreEqual) { unsigned int target = GetParam(); // only array targets if (target == GL_TEXTURE_1D_ARRAY || target == GL_TEXTURE_2D_ARRAY) { imgDesc.imageType = Image::convertType(GlTexture::getClMemObjectType(target)); imgDesc.imageWidth = 5; if (target == GL_TEXTURE_2D_ARRAY) { imgDesc.imageHeight = 5; } imgDesc.imageArraySize = 1; updateImgInfoAndForceGmm(); auto glImage = GlTexture::createSharedGlTexture(&clContext, (cl_mem_flags)0, target, 0, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(glImage->getImageDesc().image_slice_pitch, imgInfo.size); delete glImage; } } TEST_P(CreateFromGlTextureTestsWithParams, givenZeroRowPitchFromGmmWhenCreatingTextureThenComputeIt) { unsigned int target = GL_TEXTURE_2D; imgDesc.imageType = Image::convertType(GlTexture::getClMemObjectType(target)); imgDesc.imageWidth = 5; imgDesc.imageHeight = 5; imgDesc.imageArraySize = 1; updateImgInfoAndForceGmm(); auto mockResInfo = static_cast(gmm->gmmResourceInfo.get()); mockResInfo->overrideReturnedRenderPitch(0u); auto alignedWidth = alignUp(imgDesc.imageWidth, gmm->gmmResourceInfo->getHAlign()); auto expectedRowPitch = alignedWidth * (gmm->gmmResourceInfo->getBitsPerPixel() >> 3); auto glImage = std::unique_ptr(GlTexture::createSharedGlTexture(&clContext, (cl_mem_flags)0, target, 0, 0, &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(imgInfo.size, glImage->getImageDesc().image_slice_pitch); EXPECT_EQ(expectedRowPitch, glImage->getImageDesc().image_row_pitch); } TEST_F(CreateFromGlTextureTests, GivenGlTextureTargetAndMipLevelNegativeWhenCreateIsCalledThenMipMappedImageIsCreated) { unsigned int target = GL_TEXTURE_3D; cl_GLint miplevel = -1; imgDesc.imageType = Image::convertType(GlTexture::getClMemObjectType(target)); imgDesc.imageHeight = 13; imgDesc.imageWidth = 15; imgDesc.imageDepth = 7; updateImgInfoAndForceGmm(); auto glImage = std::unique_ptr(GlTexture::createSharedGlTexture(&clContext, 0u, target, miplevel, 0, &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); size_t actualHeight = 0; size_t actualWidth = 0; size_t actualDepth = 0; glImage->getImageInfo(CL_IMAGE_HEIGHT, sizeof(size_t), &actualHeight, nullptr); glImage->getImageInfo(CL_IMAGE_WIDTH, sizeof(size_t), &actualWidth, nullptr); glImage->getImageInfo(CL_IMAGE_DEPTH, sizeof(size_t), &actualDepth, nullptr); EXPECT_EQ(13u, actualHeight); EXPECT_EQ(15u, actualWidth); EXPECT_EQ(7u, actualDepth); EXPECT_EQ(gmm->gmmResourceInfo->getMaxLod() + 1, glImage->getImageDesc().num_mip_levels); EXPECT_EQ(glImage->peekBaseMipLevel(), 0); } TEST_F(CreateFromGlTextureTests, GivenGlTextureTargetAndMipLevelNonNegativeWhenCreateIsCalledThenImageFromChosenMipLevelIsCreated) { unsigned int target = GL_TEXTURE_3D; cl_GLint miplevel = 2; imgDesc.imageType = Image::convertType(GlTexture::getClMemObjectType(target)); imgDesc.imageHeight = 13; imgDesc.imageWidth = 15; imgDesc.imageDepth = 7; updateImgInfoAndForceGmm(); auto glImage = std::unique_ptr(GlTexture::createSharedGlTexture(&clContext, 0u, target, miplevel, 0, &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); size_t actualHeight = 0; size_t actualWidth = 0; size_t actualDepth = 0; glImage->getImageInfo(CL_IMAGE_HEIGHT, sizeof(size_t), &actualHeight, nullptr); glImage->getImageInfo(CL_IMAGE_WIDTH, sizeof(size_t), &actualWidth, nullptr); glImage->getImageInfo(CL_IMAGE_DEPTH, sizeof(size_t), &actualDepth, nullptr); EXPECT_EQ(3u, actualHeight); EXPECT_EQ(3u, actualWidth); EXPECT_EQ(1u, actualDepth); EXPECT_GE(1u, glImage->getImageDesc().num_mip_levels); EXPECT_EQ(glImage->peekBaseMipLevel(), 2); } TEST_F(CreateFromGlTextureTests, GivenGlTextureWhenCreateIsCalledThenAllocationTypeIsSharedImage) { unsigned int target = GL_TEXTURE_3D; cl_GLint miplevel = 2; imgDesc.imageType = Image::convertType(GlTexture::getClMemObjectType(target)); imgDesc.imageHeight = 13; imgDesc.imageWidth = 15; imgDesc.imageDepth = 7; updateImgInfoAndForceGmm(); auto glImage = std::unique_ptr(GlTexture::createSharedGlTexture(&clContext, 0u, target, miplevel, 0, &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); auto graphicsAllocation = glImage->getGraphicsAllocation(clContext.getDevice(0)->getRootDeviceIndex()); ASSERT_NE(nullptr, graphicsAllocation); EXPECT_EQ(AllocationType::SHARED_IMAGE, graphicsAllocation->getAllocationType()); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/sharings/gl/windows/gl_library_name.cpp000066400000000000000000000003441422164147700313740ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include // above is workaround for copyright checker issue namespace Os { const char *openglDllName = "mock_opengl32.dll"; } compute-runtime-22.14.22890/opencl/test/unit_test/sharings/gl/windows/gl_os_sharing_tests.cpp000066400000000000000000000442551422164147700323170ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/os_interface/windows/os_context_win.h" #include "shared/source/os_interface/windows/wddm/wddm.h" #include "shared/source/os_interface/windows/wddm_memory_operations_handler.h" #include "shared/test/common/helpers/engine_descriptor_helper.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/os_interface/windows/mock_sys_calls.h" #include "shared/test/common/os_interface/windows/wddm_fixture.h" #include "opencl/extensions/public/cl_gl_private_intel.h" #include "opencl/source/sharings/gl/gl_arb_sync_event.h" #include "opencl/source/sharings/gl/windows/gl_sharing_windows.h" #include "opencl/test/unit_test/mocks/gl/windows/mock_gl_sharing_windows.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "gtest/gtest.h" #include using namespace NEO; struct MockOSInterface : OSInterface { static HANDLE createEvent(LPSECURITY_ATTRIBUTES lpEventAttributes, BOOL bManualReset, BOOL bInitialState, LPCSTR lpName, void *data) { MockOSInterface *self = reinterpret_cast(data); if (self->eventNum++ == self->failEventNum) { return INVALID_HANDLE; } return handleValue; } static BOOL closeHandle(HANDLE hObject, void *data) { MockOSInterface *self = reinterpret_cast(data); ++self->closedEventsCount; return (reinterpret_cast(dummyHandle) == hObject) ? TRUE : FALSE; } int eventNum = 1; int failEventNum = 0; int closedEventsCount = 0; }; TEST(glSharingBasicTest, GivenSharingFunctionsWhenItIsConstructedThenBackupContextIsCreated) { GLType GLHDCType = CL_WGL_HDC_KHR; GLContext GLHGLRCHandle = 0; GLDisplay GLHDCHandle = 0; int32_t expectedContextAttrs[3] = {0}; GlDllHelper dllHelper; auto glSharingFunctions = new GlSharingFunctionsMock(GLHDCType, GLHGLRCHandle, GLHGLRCHandle, GLHDCHandle); EXPECT_EQ(1, dllHelper.getParam("WGLCreateContextCalled")); EXPECT_EQ(1, dllHelper.getParam("WGLShareListsCalled")); EXPECT_EQ(0, EGLChooseConfigCalled); EXPECT_EQ(0, EGLCreateContextCalled); EXPECT_EQ(0, GlxChooseFBConfigCalled); EXPECT_EQ(0, GlxQueryContextCalled); EXPECT_EQ(0, GlxCreateNewContextCalled); EXPECT_EQ(0, GlxIsDirectCalled); EXPECT_EQ(0, eglBkpContextParams.configAttrs); EXPECT_EQ(0, eglBkpContextParams.numConfigs); EXPECT_TRUE(glSharingFunctions->getBackupContextHandle() != 0); EXPECT_TRUE(memcmp(eglBkpContextParams.contextAttrs, expectedContextAttrs, 3 * sizeof(int32_t)) == 0); EXPECT_EQ(0, glxBkpContextParams.FBConfigAttrs); EXPECT_EQ(0, glxBkpContextParams.queryAttribute); EXPECT_EQ(0, glxBkpContextParams.renderType); delete glSharingFunctions; EXPECT_EQ(1, dllHelper.getParam("WGLDeleteContextCalled")); EXPECT_EQ(1, dllHelper.getParam("GLDeleteContextCalled")); } struct GlArbSyncEventOsTest : public ::testing::Test { void SetUp() override { rootDeviceEnvironment = std::make_unique(executionEnvironment); sharing.GLContextHandle = 0x2cU; sharing.GLDeviceHandle = 0x3cU; wddm = new WddmMock(*rootDeviceEnvironment); rootDeviceEnvironment->osInterface = std::make_unique(); osInterface = rootDeviceEnvironment->osInterface.get(); osInterface->setDriverModel(std::unique_ptr(wddm)); gdi = new MockGdi(); wddm->resetGdi(gdi); } MockExecutionEnvironment executionEnvironment; std::unique_ptr rootDeviceEnvironment; GlSharingFunctionsMock sharing; MockGdi *gdi = nullptr; WddmMock *wddm = nullptr; CL_GL_SYNC_INFO syncInfo = {}; OSInterface *osInterface = nullptr; }; TEST_F(GlArbSyncEventOsTest, WhenCreateSynchronizationObjectSucceedsThenAllHAndlesAreValid) { struct CreateSyncObjectMock { static int &getHandle() { static int handle = 1; return handle; } static void reset() { getHandle() = 1; } static NTSTATUS __stdcall createSynchObject(D3DKMT_CREATESYNCHRONIZATIONOBJECT *pData) { if (pData == nullptr) { return STATUS_INVALID_PARAMETER; } EXPECT_NE(NULL, pData->hDevice); EXPECT_EQ(D3DDDI_SEMAPHORE, pData->Info.Type); EXPECT_EQ(32, pData->Info.Semaphore.MaxCount); EXPECT_EQ(0, pData->Info.Semaphore.InitialCount); pData->hSyncObject = getHandle()++; return STATUS_SUCCESS; } static NTSTATUS __stdcall createSynchObject2(D3DKMT_CREATESYNCHRONIZATIONOBJECT2 *pData) { if (pData == nullptr) { return STATUS_INVALID_PARAMETER; } EXPECT_NE(NULL, pData->hDevice); EXPECT_EQ(D3DDDI_CPU_NOTIFICATION, pData->Info.Type); EXPECT_NE(nullptr, pData->Info.CPUNotification.Event); pData->hSyncObject = getHandle()++; return STATUS_SUCCESS; } }; CreateSyncObjectMock::reset(); wddm->init(); gdi->createSynchronizationObject.mFunc = CreateSyncObjectMock::createSynchObject; gdi->createSynchronizationObject2.mFunc = CreateSyncObjectMock::createSynchObject2; auto ret = setupArbSyncObject(sharing, *osInterface, syncInfo); EXPECT_TRUE(ret); EXPECT_EQ(1U, syncInfo.serverSynchronizationObject); EXPECT_EQ(2U, syncInfo.clientSynchronizationObject); EXPECT_EQ(3U, syncInfo.submissionSynchronizationObject); EXPECT_EQ(sharing.GLContextHandle, syncInfo.hContextToBlock); EXPECT_NE(nullptr, syncInfo.event); EXPECT_NE(nullptr, syncInfo.eventName); EXPECT_NE(nullptr, syncInfo.submissionEvent); EXPECT_NE(nullptr, syncInfo.submissionEventName); EXPECT_FALSE(syncInfo.waitCalled); cleanupArbSyncObject(*osInterface, &syncInfo); } TEST_F(GlArbSyncEventOsTest, GivenNewGlSyncInfoWhenCreateSynchronizationObjectFailsThenSetupArbSyncObjectFails) { struct CreateSyncObjectMock { static int &getHandle() { static int handle = 1; return handle; } static int &getFailHandleId() { static int failHandleId = 0; return failHandleId; } static void reset() { getHandle() = 1; getFailHandleId() = 0; } static NTSTATUS __stdcall createSynchObject(D3DKMT_CREATESYNCHRONIZATIONOBJECT *pData) { auto newHandle = getHandle()++; if (newHandle == getFailHandleId()) { return STATUS_INVALID_PARAMETER; } return STATUS_SUCCESS; } static NTSTATUS __stdcall createSynchObject2(D3DKMT_CREATESYNCHRONIZATIONOBJECT2 *pData) { auto newHandle = getHandle()++; if (newHandle == getFailHandleId()) { return STATUS_INVALID_PARAMETER; } return STATUS_SUCCESS; } }; CreateSyncObjectMock::reset(); wddm->init(); gdi->createSynchronizationObject.mFunc = CreateSyncObjectMock::createSynchObject; gdi->createSynchronizationObject2.mFunc = CreateSyncObjectMock::createSynchObject2; CreateSyncObjectMock::getFailHandleId() = CreateSyncObjectMock::getHandle(); int failuresCount = 0; auto ret = setupArbSyncObject(sharing, *osInterface, syncInfo); while (false == ret) { ++failuresCount; CreateSyncObjectMock::getHandle() = 1; ++CreateSyncObjectMock::getFailHandleId(); ret = setupArbSyncObject(sharing, *osInterface, syncInfo); } EXPECT_EQ(3, failuresCount); cleanupArbSyncObject(*osInterface, &syncInfo); } TEST_F(GlArbSyncEventOsTest, GivenNewGlSyncInfoWhenCreateEventFailsThenSetupArbSyncObjectFails) { auto rootDeviceEnvironment = platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0].get(); rootDeviceEnvironment->osInterface = std::make_unique(); MockOSInterface mockOsInterface; auto createEventMock = changeSysCallMock(mockCreateEventClb, mockCreateEventClbData, MockOSInterface::createEvent, &mockOsInterface); auto wddm = new WddmMock(*rootDeviceEnvironment); auto gdi = new MockGdi(); wddm->resetGdi(gdi); wddm->init(); mockOsInterface.setDriverModel(std::unique_ptr(wddm)); mockOsInterface.failEventNum = mockOsInterface.eventNum; int failuresCount = 0; auto ret = setupArbSyncObject(sharing, mockOsInterface, syncInfo); while (false == ret) { ++failuresCount; mockOsInterface.eventNum = 1; ++mockOsInterface.failEventNum; ret = setupArbSyncObject(sharing, mockOsInterface, syncInfo); } EXPECT_EQ(2, failuresCount); cleanupArbSyncObject(mockOsInterface, &syncInfo); } TEST_F(GlArbSyncEventOsTest, GivenInvalidGlSyncInfoWhenCleanupArbSyncObjectIsCalledThenDestructorsOfSyncOrEventsAreNotInvoked) { struct DestroySyncObjectMock { static NTSTATUS __stdcall destroySynchObject(_In_ CONST D3DKMT_DESTROYSYNCHRONIZATIONOBJECT *sync) { EXPECT_FALSE(true); return STATUS_INVALID_PARAMETER; } }; auto rootDeviceEnvironment = platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0].get(); rootDeviceEnvironment->osInterface = std::make_unique(); auto wddm = new WddmMock(*rootDeviceEnvironment); auto gdi = new MockGdi(); wddm->resetGdi(gdi); wddm->init(); MockOSInterface mockOsInterface; auto closeHandleMock = changeSysCallMock(mockCloseHandleClb, mockCloseHandleClbData, MockOSInterface::closeHandle, &mockOsInterface); mockOsInterface.setDriverModel(std::unique_ptr(wddm)); gdi->destroySynchronizationObject = DestroySyncObjectMock::destroySynchObject; cleanupArbSyncObject(mockOsInterface, nullptr); EXPECT_EQ(0, mockOsInterface.closedEventsCount); } TEST_F(GlArbSyncEventOsTest, GivenValidGlSyncInfoWhenCleanupArbSyncObjectIsCalledThenProperCountOfDestructorsOfSyncAndEventsIsNotInvoked) { struct CreateDestroySyncObjectMock { static int &getDestroyCounter() { static int counter = 0; return counter; } static NTSTATUS __stdcall destroySynchObject(_In_ CONST D3DKMT_DESTROYSYNCHRONIZATIONOBJECT *sync) { ++getDestroyCounter(); return STATUS_SUCCESS; } static void reset() { getDestroyCounter() = 0; } }; auto rootDeviceEnvironment = platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0].get(); rootDeviceEnvironment->osInterface = std::make_unique(); auto wddm = new WddmMock(*rootDeviceEnvironment); auto gdi = new MockGdi(); wddm->resetGdi(gdi); wddm->init(); MockOSInterface mockOsInterface; auto closeHandleMock = changeSysCallMock(mockCloseHandleClb, mockCloseHandleClbData, MockOSInterface::closeHandle, &mockOsInterface); mockOsInterface.setDriverModel(std::unique_ptr(wddm)); CreateDestroySyncObjectMock::reset(); gdi->destroySynchronizationObject = CreateDestroySyncObjectMock::destroySynchObject; auto ret = setupArbSyncObject(sharing, mockOsInterface, syncInfo); EXPECT_TRUE(ret); syncInfo.serverSynchronizationObject = 0x5cU; syncInfo.clientSynchronizationObject = 0x7cU; syncInfo.submissionSynchronizationObject = 0x13cU; cleanupArbSyncObject(mockOsInterface, &syncInfo); EXPECT_EQ(2, mockOsInterface.closedEventsCount); EXPECT_EQ(3, CreateDestroySyncObjectMock::getDestroyCounter()); } TEST_F(GlArbSyncEventOsTest, GivenCallToSignalArbSyncObjectWhenSignalSynchronizationObjectForServerClientSyncFailsThenSubmissionSyncDoesNotGetSignalled) { struct FailSignalSyncObjectMock { static NTSTATUS __stdcall signal(_In_ CONST D3DKMT_SIGNALSYNCHRONIZATIONOBJECT *obj) { EXPECT_NE(nullptr, obj); if (obj == nullptr) { return STATUS_INVALID_PARAMETER; } EXPECT_EQ(2, obj->ObjectCount); EXPECT_EQ(getExpectedSynchHandle0(), obj->ObjectHandleArray[0]); EXPECT_EQ(getExpectedSynchHandle1(), obj->ObjectHandleArray[1]); EXPECT_EQ(0, obj->Flags.SignalAtSubmission); EXPECT_EQ(getExpectedContextHandle(), obj->hContext); return STATUS_INVALID_PARAMETER; } static D3DKMT_HANDLE &getExpectedSynchHandle0() { static D3DKMT_HANDLE handle = INVALID_HANDLE; return handle; } static D3DKMT_HANDLE &getExpectedSynchHandle1() { static D3DKMT_HANDLE handle = INVALID_HANDLE; return handle; } static D3DKMT_HANDLE &getExpectedContextHandle() { static D3DKMT_HANDLE handle = INVALID_HANDLE; return handle; } static void reset() { getExpectedSynchHandle0() = INVALID_HANDLE; getExpectedSynchHandle1() = INVALID_HANDLE; getExpectedContextHandle() = INVALID_HANDLE; } }; FailSignalSyncObjectMock::reset(); auto preemptionMode = PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo); wddm->init(); OsContextWin osContext(*wddm, 0u, EngineDescriptorHelper::getDefaultDescriptor(HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo)[0], preemptionMode)); CL_GL_SYNC_INFO syncInfo = {}; syncInfo.serverSynchronizationObject = 0x5cU; syncInfo.clientSynchronizationObject = 0x6cU; gdi->signalSynchronizationObject.mFunc = FailSignalSyncObjectMock::signal; FailSignalSyncObjectMock::getExpectedContextHandle() = osContext.getWddmContextHandle(); FailSignalSyncObjectMock::getExpectedSynchHandle0() = syncInfo.serverSynchronizationObject; FailSignalSyncObjectMock::getExpectedSynchHandle1() = syncInfo.clientSynchronizationObject; signalArbSyncObject(osContext, syncInfo); } TEST_F(GlArbSyncEventOsTest, GivenCallToSignalArbSyncObjectWhenSignalSynchronizationObjectForServerClientSyncSucceedsThenSubmissionSyncGetsSignalledAsWell) { struct FailSignalSyncObjectMock { static NTSTATUS __stdcall signal(_In_ CONST D3DKMT_SIGNALSYNCHRONIZATIONOBJECT *obj) { EXPECT_NE(nullptr, obj); if (obj == nullptr) { return STATUS_INVALID_PARAMETER; } // validating only second call to signal if (getCounter()++ != 1) { return STATUS_SUCCESS; } EXPECT_EQ(1, obj->ObjectCount); EXPECT_EQ(getExpectedSynchHandle0(), obj->ObjectHandleArray[0]); EXPECT_EQ(1, obj->Flags.SignalAtSubmission); EXPECT_EQ(getExpectedContextHandle(), obj->hContext); return STATUS_SUCCESS; } static D3DKMT_HANDLE &getExpectedSynchHandle0() { static D3DKMT_HANDLE handle = INVALID_HANDLE; return handle; } static int &getCounter() { static int counter = 0; return counter; } static D3DKMT_HANDLE &getExpectedContextHandle() { static D3DKMT_HANDLE handle = INVALID_HANDLE; return handle; } static void reset() { getExpectedSynchHandle0() = INVALID_HANDLE; getCounter() = 0; getExpectedContextHandle() = INVALID_HANDLE; } }; FailSignalSyncObjectMock::reset(); auto preemptionMode = PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo); wddm->init(); OsContextWin osContext(*wddm, 0u, EngineDescriptorHelper::getDefaultDescriptor(HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo)[0], preemptionMode)); CL_GL_SYNC_INFO syncInfo = {}; syncInfo.submissionSynchronizationObject = 0x7cU; gdi->signalSynchronizationObject.mFunc = FailSignalSyncObjectMock::signal; FailSignalSyncObjectMock::getExpectedContextHandle() = osContext.getWddmContextHandle(); FailSignalSyncObjectMock::getExpectedSynchHandle0() = syncInfo.submissionSynchronizationObject; signalArbSyncObject(osContext, syncInfo); } TEST_F(GlArbSyncEventOsTest, GivenCallToServerWaitForArbSyncObjectWhenWaitForSynchronizationObjectFailsThenWaitFlagDoesNotGetSet) { struct FailWaitSyncObjectMock { static NTSTATUS __stdcall waitForSynchObject(_In_ CONST_FROM_WDK_10_0_18328_0 D3DKMT_WAITFORSYNCHRONIZATIONOBJECT *waitData) { EXPECT_NE(nullptr, waitData); if (waitData == nullptr) { return STATUS_INVALID_PARAMETER; } EXPECT_EQ(1, waitData->ObjectCount); EXPECT_EQ(getExpectedSynchHandle0(), waitData->ObjectHandleArray[0]); EXPECT_EQ(getExpectedContextHandle(), waitData->hContext); return STATUS_INVALID_PARAMETER; } static D3DKMT_HANDLE &getExpectedSynchHandle0() { static D3DKMT_HANDLE handle = INVALID_HANDLE; return handle; } static D3DKMT_HANDLE &getExpectedContextHandle() { static D3DKMT_HANDLE handle = INVALID_HANDLE; return handle; } static void reset() { getExpectedSynchHandle0() = INVALID_HANDLE; getExpectedContextHandle() = INVALID_HANDLE; } }; FailWaitSyncObjectMock::reset(); CL_GL_SYNC_INFO syncInfo = {}; syncInfo.hContextToBlock = 0x4cU; FailWaitSyncObjectMock::getExpectedSynchHandle0() = syncInfo.serverSynchronizationObject; FailWaitSyncObjectMock::getExpectedContextHandle() = syncInfo.hContextToBlock; gdi->waitForSynchronizationObject.mFunc = FailWaitSyncObjectMock::waitForSynchObject; EXPECT_FALSE(syncInfo.waitCalled); serverWaitForArbSyncObject(*osInterface, syncInfo); EXPECT_FALSE(syncInfo.waitCalled); } TEST_F(GlArbSyncEventOsTest, GivenCallToServerWaitForArbSyncObjectWhenWaitForSynchronizationObjectSucceedsThenWaitFlagGetsSet) { CL_GL_SYNC_INFO syncInfo = {}; syncInfo.serverSynchronizationObject = 0x7cU; EXPECT_FALSE(syncInfo.waitCalled); serverWaitForArbSyncObject(*osInterface, syncInfo); EXPECT_TRUE(syncInfo.waitCalled); } compute-runtime-22.14.22890/opencl/test/unit_test/sharings/gl/windows/gl_reused_buffers_tests.cpp000066400000000000000000000246321422164147700331630ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/resource_info.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/sharings/gl/gl_buffer.h" #include "opencl/test/unit_test/mocks/gl/windows/mock_gl_sharing_windows.h" #include "opencl/test/unit_test/mocks/mock_context.h" using namespace NEO; struct GlReusedBufferTests : public ::testing::Test { void SetUp() override { glSharingFunctions = new GlSharingFunctionsMock(); context.setSharingFunctions(glSharingFunctions); rootDeviceIndex = context.getDevice(0)->getRootDeviceIndex(); graphicsAllocationsForGlBufferReuse = &glSharingFunctions->graphicsAllocationsForGlBufferReuse; } GlSharingFunctionsMock *glSharingFunctions = nullptr; uint32_t rootDeviceIndex = 0; MockContext context; std::vector> *graphicsAllocationsForGlBufferReuse = nullptr; unsigned int bufferId1 = 5; unsigned int bufferId2 = 7; cl_int retVal = CL_SUCCESS; }; class FailingMemoryManager : public MockMemoryManager { public: GraphicsAllocation *createGraphicsAllocationFromSharedHandle(osHandle handle, const AllocationProperties &properties, bool requireSpecificBitness, bool isHostIpcAllocation) override { return nullptr; } }; TEST_F(GlReusedBufferTests, givenMultipleBuffersWithTheSameIdWhenCreatedThenReuseGraphicsAllocation) { std::unique_ptr glBuffers[10]; // first 5 with bufferId1, next 5 with bufferId2 for (size_t i = 0; i < 10; i++) { glBuffers[i].reset(GlBuffer::createSharedGlBuffer(&context, CL_MEM_READ_WRITE, (i < 5 ? bufferId1 : bufferId2), &retVal)); EXPECT_NE(nullptr, glBuffers[i].get()); EXPECT_NE(nullptr, glBuffers[i]->getGraphicsAllocation(rootDeviceIndex)); } EXPECT_EQ(2u, graphicsAllocationsForGlBufferReuse->size()); EXPECT_EQ(bufferId1, graphicsAllocationsForGlBufferReuse->at(0).first); EXPECT_EQ(bufferId2, graphicsAllocationsForGlBufferReuse->at(1).first); auto storedGraphicsAllocation1 = graphicsAllocationsForGlBufferReuse->at(0).second; auto storedGraphicsAllocation2 = graphicsAllocationsForGlBufferReuse->at(1).second; EXPECT_EQ(5u, storedGraphicsAllocation1->peekReuseCount()); EXPECT_EQ(5u, storedGraphicsAllocation2->peekReuseCount()); for (size_t i = 0; i < 10; i++) { EXPECT_EQ(i < 5 ? storedGraphicsAllocation1 : storedGraphicsAllocation2, glBuffers[i]->getGraphicsAllocation(rootDeviceIndex)); } } TEST_F(GlReusedBufferTests, givenMultipleBuffersWithReusedAllocationWhenReleasingThenClearVectorByLastObject) { std::unique_ptr glBuffer1(GlBuffer::createSharedGlBuffer(&context, CL_MEM_READ_WRITE, bufferId1, &retVal)); std::unique_ptr glBuffer2(GlBuffer::createSharedGlBuffer(&context, CL_MEM_READ_WRITE, bufferId1, &retVal)); EXPECT_EQ(1u, graphicsAllocationsForGlBufferReuse->size()); EXPECT_EQ(2u, graphicsAllocationsForGlBufferReuse->at(0).second->peekReuseCount()); glBuffer1.reset(nullptr); EXPECT_EQ(1u, graphicsAllocationsForGlBufferReuse->size()); EXPECT_EQ(1u, graphicsAllocationsForGlBufferReuse->at(0).second->peekReuseCount()); glBuffer2.reset(nullptr); EXPECT_EQ(0u, graphicsAllocationsForGlBufferReuse->size()); } TEST_F(GlReusedBufferTests, givenMultipleBuffersWithReusedAllocationWhenCreatingThenReuseGmmResourceToo) { std::unique_ptr glBuffer1(GlBuffer::createSharedGlBuffer(&context, CL_MEM_READ_WRITE, bufferId1, &retVal)); glBuffer1->getGraphicsAllocation(rootDeviceIndex)->setDefaultGmm(new Gmm(context.getDevice(0)->getGmmClientContext(), (void *)0x100, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true)); std::unique_ptr glBuffer2(GlBuffer::createSharedGlBuffer(&context, CL_MEM_READ_WRITE, bufferId1, &retVal)); EXPECT_EQ(glBuffer1->getGraphicsAllocation(rootDeviceIndex)->getDefaultGmm()->gmmResourceInfo->peekHandle(), glBuffer2->getGraphicsAllocation(rootDeviceIndex)->getDefaultGmm()->gmmResourceInfo->peekHandle()); } TEST_F(GlReusedBufferTests, givenGlobalShareHandleChangedWhenAcquiringSharedBufferThenChangeGraphicsAllocation) { GlDllHelper dllParam; CL_GL_BUFFER_INFO bufferInfoOutput = dllParam.getBufferInfo(); bufferInfoOutput.globalShareHandle = 40; dllParam.loadBuffer(bufferInfoOutput); auto clBuffer = std::unique_ptr(GlBuffer::createSharedGlBuffer(&context, CL_MEM_READ_WRITE, bufferId1, &retVal)); auto glBuffer = clBuffer->peekSharingHandler(); auto oldGraphicsAllocation = clBuffer->getGraphicsAllocation(rootDeviceIndex); ASSERT_EQ(40, oldGraphicsAllocation->peekSharedHandle()); bufferInfoOutput.globalShareHandle = 41; dllParam.loadBuffer(bufferInfoOutput); glBuffer->acquire(clBuffer.get(), rootDeviceIndex); auto newGraphicsAllocation = clBuffer->getGraphicsAllocation(rootDeviceIndex); EXPECT_NE(oldGraphicsAllocation, newGraphicsAllocation); EXPECT_EQ(41, newGraphicsAllocation->peekSharedHandle()); glBuffer->release(clBuffer.get(), rootDeviceIndex); } TEST_F(GlReusedBufferTests, givenGlobalShareHandleDidNotChangeWhenAcquiringSharedBufferThenDontDynamicallyAllocateBufferInfo) { class MyGlBuffer : public GlBuffer { public: MyGlBuffer(GLSharingFunctions *sharingFunctions, unsigned int glObjectId) : GlBuffer(sharingFunctions, glObjectId) {} protected: void resolveGraphicsAllocationChange(osHandle currentSharedHandle, UpdateData *updateData) override { EXPECT_EQ(nullptr, updateData->updateData); GlBuffer::resolveGraphicsAllocationChange(currentSharedHandle, updateData); } }; GlDllHelper dllParam; CL_GL_BUFFER_INFO bufferInfoOutput = dllParam.getBufferInfo(); bufferInfoOutput.globalShareHandle = 40; dllParam.loadBuffer(bufferInfoOutput); auto clBuffer = std::unique_ptr(GlBuffer::createSharedGlBuffer(&context, CL_MEM_READ_WRITE, bufferId1, &retVal)); auto glBuffer = new MyGlBuffer(context.getSharing(), bufferId1); clBuffer->setSharingHandler(glBuffer); glBuffer->acquire(clBuffer.get(), rootDeviceIndex); glBuffer->release(clBuffer.get(), rootDeviceIndex); } TEST_F(GlReusedBufferTests, givenGlobalShareHandleChangedWhenAcquiringSharedBufferThenDynamicallyAllocateBufferInfo) { class MyGlBuffer : public GlBuffer { public: MyGlBuffer(GLSharingFunctions *sharingFunctions, unsigned int glObjectId) : GlBuffer(sharingFunctions, glObjectId) {} protected: void resolveGraphicsAllocationChange(osHandle currentSharedHandle, UpdateData *updateData) override { EXPECT_NE(nullptr, updateData->updateData); GlBuffer::resolveGraphicsAllocationChange(currentSharedHandle, updateData); } }; GlDllHelper dllParam; CL_GL_BUFFER_INFO bufferInfoOutput = dllParam.getBufferInfo(); bufferInfoOutput.globalShareHandle = 40; dllParam.loadBuffer(bufferInfoOutput); auto clBuffer = std::unique_ptr(GlBuffer::createSharedGlBuffer(&context, CL_MEM_READ_WRITE, bufferId1, &retVal)); auto glBuffer = new MyGlBuffer(context.getSharing(), bufferId1); clBuffer->setSharingHandler(glBuffer); bufferInfoOutput.globalShareHandle = 41; dllParam.loadBuffer(bufferInfoOutput); glBuffer->acquire(clBuffer.get(), rootDeviceIndex); glBuffer->release(clBuffer.get(), rootDeviceIndex); } TEST_F(GlReusedBufferTests, givenMultipleBuffersAndGlobalShareHandleChangedWhenAcquiringSharedBufferThenDeleteOldGfxAllocationFromReuseVector) { GlDllHelper dllParam; CL_GL_BUFFER_INFO bufferInfoOutput = dllParam.getBufferInfo(); bufferInfoOutput.globalShareHandle = 40; dllParam.loadBuffer(bufferInfoOutput); auto clBuffer1 = std::unique_ptr(GlBuffer::createSharedGlBuffer(&context, CL_MEM_READ_WRITE, bufferId1, &retVal)); auto clBuffer2 = std::unique_ptr(GlBuffer::createSharedGlBuffer(&context, CL_MEM_READ_WRITE, bufferId1, &retVal)); auto graphicsAllocation1 = clBuffer1->getGraphicsAllocation(rootDeviceIndex); auto graphicsAllocation2 = clBuffer2->getGraphicsAllocation(rootDeviceIndex); ASSERT_EQ(graphicsAllocation1, graphicsAllocation2); ASSERT_EQ(2, graphicsAllocation1->peekReuseCount()); ASSERT_EQ(1, graphicsAllocationsForGlBufferReuse->size()); bufferInfoOutput.globalShareHandle = 41; dllParam.loadBuffer(bufferInfoOutput); clBuffer1->peekSharingHandler()->acquire(clBuffer1.get(), rootDeviceIndex); auto newGraphicsAllocation = clBuffer1->getGraphicsAllocation(rootDeviceIndex); EXPECT_EQ(1, graphicsAllocationsForGlBufferReuse->size()); EXPECT_EQ(newGraphicsAllocation, graphicsAllocationsForGlBufferReuse->at(0).second); clBuffer2->peekSharingHandler()->acquire(clBuffer2.get(), rootDeviceIndex); EXPECT_EQ(clBuffer2->getGraphicsAllocation(rootDeviceIndex), newGraphicsAllocation); EXPECT_EQ(1, graphicsAllocationsForGlBufferReuse->size()); EXPECT_EQ(newGraphicsAllocation, graphicsAllocationsForGlBufferReuse->at(0).second); clBuffer1->peekSharingHandler()->release(clBuffer1.get(), rootDeviceIndex); clBuffer2->peekSharingHandler()->release(clBuffer2.get(), rootDeviceIndex); } TEST_F(GlReusedBufferTests, givenGraphicsAllocationCreationReturnsNullptrWhenAcquiringGlBufferThenReturnOutOfResourcesAndNullifyAllocation) { auto suceedingMemoryManager = context.getMemoryManager(); auto failingMemoryManager = std::unique_ptr(new FailingMemoryManager()); GlDllHelper dllParam; CL_GL_BUFFER_INFO bufferInfoOutput = dllParam.getBufferInfo(); bufferInfoOutput.globalShareHandle = 40; dllParam.loadBuffer(bufferInfoOutput); auto clBuffer = std::unique_ptr(GlBuffer::createSharedGlBuffer(&context, CL_MEM_READ_WRITE, bufferId1, &retVal)); auto glBuffer = clBuffer->peekSharingHandler(); bufferInfoOutput.globalShareHandle = 41; dllParam.loadBuffer(bufferInfoOutput); context.memoryManager = failingMemoryManager.get(); auto result = glBuffer->acquire(clBuffer.get(), rootDeviceIndex); EXPECT_EQ(CL_OUT_OF_RESOURCES, result); EXPECT_EQ(nullptr, clBuffer->getGraphicsAllocation(rootDeviceIndex)); context.memoryManager = suceedingMemoryManager; } compute-runtime-22.14.22890/opencl/test/unit_test/sharings/gl/windows/gl_sharing_enable_tests.cpp000066400000000000000000000150111422164147700331100ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/fixtures/memory_management_fixture.h" #include "opencl/source/sharings/gl/windows/win_enable_gl.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" using namespace NEO; class GlSharingEnablerTests : public ::testing::Test { public: void SetUp() override { factory.reset(new GlSharingBuilderFactory()); ASSERT_NE(nullptr, factory.get()); } std::unique_ptr factory; }; TEST_F(GlSharingEnablerTests, givenGlFactoryWhenAskedThenExtensionsAreReturned) { auto ext = factory->getExtensions(nullptr); EXPECT_GT(ext.length(), 0u); EXPECT_STRNE("", ext.c_str()); } TEST_F(GlSharingEnablerTests, givenGlFactoryWhenAskedThenGlobalIcdIsConfigured) { class IcdRestore { public: IcdRestore() { icdSnapshot = icdGlobalDispatchTable; } ~IcdRestore() { icdGlobalDispatchTable = icdSnapshot; } decltype(icdGlobalDispatchTable) icdSnapshot; }; // we play with global table, so first save state then restore it with use of RAII IcdRestore icdRestore; // clear ICD table icdGlobalDispatchTable.clCreateFromGLBuffer = nullptr; icdGlobalDispatchTable.clCreateFromGLTexture = nullptr; icdGlobalDispatchTable.clCreateFromGLTexture2D = nullptr; icdGlobalDispatchTable.clCreateFromGLTexture3D = nullptr; icdGlobalDispatchTable.clCreateFromGLRenderbuffer = nullptr; icdGlobalDispatchTable.clGetGLObjectInfo = nullptr; icdGlobalDispatchTable.clGetGLTextureInfo = nullptr; icdGlobalDispatchTable.clEnqueueAcquireGLObjects = nullptr; icdGlobalDispatchTable.clEnqueueReleaseGLObjects = nullptr; icdGlobalDispatchTable.clCreateEventFromGLsyncKHR = nullptr; icdGlobalDispatchTable.clGetGLContextInfoKHR = nullptr; factory->fillGlobalDispatchTable(); EXPECT_NE(nullptr, icdGlobalDispatchTable.clCreateFromGLBuffer); EXPECT_NE(nullptr, icdGlobalDispatchTable.clCreateFromGLTexture); EXPECT_NE(nullptr, icdGlobalDispatchTable.clCreateFromGLTexture2D); EXPECT_NE(nullptr, icdGlobalDispatchTable.clCreateFromGLTexture3D); EXPECT_NE(nullptr, icdGlobalDispatchTable.clCreateFromGLRenderbuffer); EXPECT_NE(nullptr, icdGlobalDispatchTable.clGetGLObjectInfo); EXPECT_NE(nullptr, icdGlobalDispatchTable.clGetGLTextureInfo); EXPECT_NE(nullptr, icdGlobalDispatchTable.clEnqueueAcquireGLObjects); EXPECT_NE(nullptr, icdGlobalDispatchTable.clEnqueueReleaseGLObjects); EXPECT_NE(nullptr, icdGlobalDispatchTable.clCreateEventFromGLsyncKHR); EXPECT_NE(nullptr, icdGlobalDispatchTable.clGetGLContextInfoKHR); } TEST_F(GlSharingEnablerTests, givenGlFactoryWhenAskedThenBuilderIsCreated) { auto builder = factory->createContextBuilder(); EXPECT_NE(nullptr, builder); } TEST_F(GlSharingEnablerTests, givenGlBuilderWhenUnknownPropertyThenFalseIsReturned) { auto builder = factory->createContextBuilder(); ASSERT_NE(nullptr, builder); cl_context_properties property = CL_CONTEXT_PLATFORM; cl_context_properties value; auto res = builder->processProperties(property, value); EXPECT_FALSE(res); } TEST_F(GlSharingEnablerTests, givenGlBuilderWhenInvalidPropertyThenFalseIsReturned) { auto builder = factory->createContextBuilder(); ASSERT_NE(nullptr, builder); cl_context_properties property = CL_CGL_SHAREGROUP_KHR; cl_context_properties value; auto res = builder->processProperties(property, value); EXPECT_FALSE(res); } TEST_F(GlSharingEnablerTests, givenGlBuilderWhenValidPropertyThenTrueIsReturned) { cl_context_properties props[] = {CL_GL_CONTEXT_KHR, CL_WGL_HDC_KHR, CL_GLX_DISPLAY_KHR, CL_EGL_DISPLAY_KHR}; for (auto currProperty : props) { auto builder = factory->createContextBuilder(); ASSERT_NE(nullptr, builder); cl_context_properties property = currProperty; cl_context_properties value = 0x10000; auto res = builder->processProperties(property, value); EXPECT_TRUE(res); // repeat to check if we don't allocate twice auto prevAllocations = MemoryManagement::numAllocations.load(); res = builder->processProperties(property, value); EXPECT_TRUE(res); auto currAllocations = MemoryManagement::numAllocations.load(); EXPECT_EQ(prevAllocations, currAllocations); } } TEST_F(GlSharingEnablerTests, givenGlBuilderWhenNoPropertiesThenFinalizerReturnsTrue) { auto builder = factory->createContextBuilder(); ASSERT_NE(nullptr, builder); MockContext context; int32_t errcodeRet = CL_SUCCESS; auto res = builder->finalizeProperties(context, errcodeRet); EXPECT_TRUE(res); EXPECT_EQ(CL_SUCCESS, errcodeRet); } TEST_F(GlSharingEnablerTests, givenGlBuilderWhenInvalidPropertiesThenFinalizerReturnsTrue) { auto builder = factory->createContextBuilder(); ASSERT_NE(nullptr, builder); cl_context_properties property = CL_CONTEXT_PLATFORM; cl_context_properties value; auto res = builder->processProperties(property, value); EXPECT_FALSE(res); MockContext context; int32_t errcodeRet = CL_SUCCESS; res = builder->finalizeProperties(context, errcodeRet); EXPECT_TRUE(res); EXPECT_EQ(CL_SUCCESS, errcodeRet); } TEST_F(GlSharingEnablerTests, givenGlBuilderWhenNullHandleThenFinalizerReturnsTrueAndNoSharingRegistered) { auto builder = factory->createContextBuilder(); ASSERT_NE(nullptr, builder); cl_context_properties property = CL_GL_CONTEXT_KHR; cl_context_properties value = 0x0; auto res = builder->processProperties(property, value); EXPECT_TRUE(res); MockContext context; int32_t errcodeRet = CL_SUCCESS; res = builder->finalizeProperties(context, errcodeRet); EXPECT_TRUE(res); EXPECT_EQ(CL_SUCCESS, errcodeRet); auto sharing = context.getSharing(); EXPECT_EQ(nullptr, sharing); } TEST_F(GlSharingEnablerTests, givenGlBuilderWhenHandleThenFinalizerReturnsTrueAndSharingIsRegistered) { auto builder = factory->createContextBuilder(); ASSERT_NE(nullptr, builder); cl_context_properties property = CL_GL_CONTEXT_KHR; cl_context_properties value = 0x1000; auto res = builder->processProperties(property, value); EXPECT_TRUE(res); MockContext context; int32_t errcodeRet = CL_SUCCESS; res = builder->finalizeProperties(context, errcodeRet); EXPECT_TRUE(res); EXPECT_EQ(CL_SUCCESS, errcodeRet); auto sharing = context.getSharing(); EXPECT_NE(nullptr, sharing); } compute-runtime-22.14.22890/opencl/test/unit_test/sharings/gl/windows/gl_sharing_tests.cpp000066400000000000000000001671761422164147700316260ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/helpers/array_count.h" #include "shared/source/os_interface/os_interface.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_gmm.h" #include "shared/test/common/mocks/mock_gmm_resource_info.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/event/user_event.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/sharings/gl/cl_gl_api_intel.h" #include "opencl/source/sharings/gl/gl_arb_sync_event.h" #include "opencl/source/sharings/gl/gl_buffer.h" #include "opencl/source/sharings/gl/gl_context_guard.h" #include "opencl/source/sharings/gl/gl_sync_event.h" #include "opencl/source/sharings/gl/gl_texture.h" #include "opencl/source/sharings/gl/windows/gl_sharing_windows.h" #include "opencl/source/sharings/sharing.h" #include "opencl/test/unit_test/mocks/gl/windows/mock_gl_arb_sync_event_windows.h" #include "opencl/test/unit_test/mocks/gl/windows/mock_gl_sharing_windows.h" #include "opencl/test/unit_test/mocks/mock_async_event_handler.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "gl_types.h" using namespace NEO; bool MockGLSharingFunctions::SharingEnabled = false; class glSharingTests : public ::testing::Test { public: void SetUp() override { rootDeviceIndex = context.getDevice(0)->getRootDeviceIndex(); mockGlSharingFunctions = mockGlSharing->sharingFunctions.release(); context.setSharingFunctions(mockGlSharingFunctions); mockGlSharing->m_bufferInfoOutput.globalShareHandle = bufferId; mockGlSharing->m_bufferInfoOutput.bufferSize = 4096u; mockGlSharing->uploadDataToBufferInfo(); } uint32_t rootDeviceIndex; MockContext context; std::unique_ptr mockGlSharing = std::make_unique(); GlSharingFunctionsMock *mockGlSharingFunctions; unsigned int bufferId = 1u; }; TEST_F(glSharingTests, givenGlMockWhenItIsCreatedThenNonZeroObjectIsReturned) { EXPECT_NE(nullptr, &mockGlSharing); EXPECT_NE(nullptr, &mockGlSharing->m_clGlResourceInfo); EXPECT_NE(nullptr, &mockGlSharing->m_glClResourceInfo); } TEST_F(glSharingTests, givenGLSharingFunctionsWhenAskedForIdThenClGlSharingIdIsReturned) { auto v = SharingType::CLGL_SHARING; EXPECT_EQ(v, mockGlSharingFunctions->getId()); } TEST_F(glSharingTests, givenMockGlWhenGlBufferIsCreatedThenMemObjectHasGlHandler) { auto retVal = CL_SUCCESS; auto glBuffer = GlBuffer::createSharedGlBuffer(&context, CL_MEM_READ_WRITE, bufferId, &retVal); EXPECT_NE(nullptr, glBuffer); EXPECT_NE(nullptr, glBuffer->getGraphicsAllocation(rootDeviceIndex)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(4096u, glBuffer->getGraphicsAllocation(rootDeviceIndex)->getUnderlyingBufferSize()); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLAcquireSharedBufferCalled")); EXPECT_EQ(bufferId, mockGlSharing->dllParam->getBufferInfo().bufferName); EXPECT_EQ(4096u, glBuffer->getSize()); size_t flagsExpected = CL_MEM_READ_WRITE; EXPECT_EQ(flagsExpected, glBuffer->getFlags()); auto handler = glBuffer->peekSharingHandler(); ASSERT_NE(nullptr, handler); auto glHandler = static_cast(handler); EXPECT_EQ(glHandler->peekFunctionsHandler(), mockGlSharingFunctions); delete glBuffer; } class FailingMemoryManager : public MockMemoryManager { public: GraphicsAllocation *createGraphicsAllocationFromSharedHandle(osHandle handle, const AllocationProperties &properties, bool requireSpecificBitness, bool isHostIpcAllocation) override { return nullptr; } }; TEST_F(glSharingTests, givenMockGlWhenGlBufferIsCreatedFromWrongHandleThenErrorAndNoBufferIsReturned) { auto tempMemoryManager = context.getMemoryManager(); auto memoryManager = std::unique_ptr(new FailingMemoryManager()); context.memoryManager = memoryManager.get(); auto retVal = CL_SUCCESS; auto glBuffer = GlBuffer::createSharedGlBuffer(&context, CL_MEM_READ_WRITE, 0, &retVal); EXPECT_EQ(nullptr, glBuffer); EXPECT_EQ(CL_INVALID_GL_OBJECT, retVal); context.memoryManager = tempMemoryManager; } TEST_F(glSharingTests, givenContextWhenClCreateFromGlBufferIsCalledThenBufferIsReturned) { auto retVal = CL_SUCCESS; auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, glBuffer); retVal = clReleaseMemObject(glBuffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(glSharingTests, givenContextWithoutSharingWhenClCreateFromGlBufferIsCalledThenErrorIsReturned) { context.resetSharingFunctions(CLGL_SHARING); auto retVal = CL_SUCCESS; auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, &retVal); ASSERT_EQ(CL_INVALID_CONTEXT, retVal); ASSERT_EQ(nullptr, glBuffer); } GLboolean OSAPI mockGLAcquireSharedBuffer(GLDisplay, GLContext, GLContext, GLvoid *pResourceInfo) { return GL_FALSE; }; TEST_F(glSharingTests, givenContextWithSharingWhenClCreateFromGlBufferIsCalledWithIncorrectThenErrorIsReturned) { mockGlSharingFunctions->setGLAcquireSharedBufferMock(mockGLAcquireSharedBuffer); auto retVal = CL_SUCCESS; auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, &retVal); ASSERT_EQ(CL_INVALID_GL_OBJECT, retVal); ASSERT_EQ(nullptr, glBuffer); } TEST_F(glSharingTests, givenContextAnd32BitAddressingWhenClCreateFromGlBufferIsCalledThenBufferIsReturned) { auto flagToRestore = DebugManager.flags.Force32bitAddressing.get(); DebugManager.flags.Force32bitAddressing.set(true); auto retVal = CL_SUCCESS; auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, glBuffer); EXPECT_TRUE(castToObject(glBuffer)->getGraphicsAllocation(rootDeviceIndex)->is32BitAllocation()); retVal = clReleaseMemObject(glBuffer); EXPECT_EQ(CL_SUCCESS, retVal); DebugManager.flags.Force32bitAddressing.set(flagToRestore); } TEST_F(glSharingTests, givenGlClBufferWhenAskedForCLGLGetInfoThenIdAndTypeIsReturned) { auto retVal = CL_SUCCESS; auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, &retVal); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLAcquireSharedBufferCalled")); cl_gl_object_type objectType = 0u; cl_GLuint objectId = 0u; retVal = clGetGLObjectInfo(glBuffer, &objectType, &objectId); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(objectType, (cl_gl_object_type)CL_GL_OBJECT_BUFFER); EXPECT_EQ(objectId, bufferId); retVal = clGetGLObjectInfo(glBuffer, &objectType, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(objectType, (cl_gl_object_type)CL_GL_OBJECT_BUFFER); retVal = clGetGLObjectInfo(glBuffer, nullptr, &objectId); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(objectId, bufferId); retVal = clGetGLObjectInfo(glBuffer, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(glBuffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(glSharingTests, givenClBufferWhenAskedForCLGLGetInfoThenErrorIsReturned) { auto retVal = CL_SUCCESS; auto glBuffer = clCreateBuffer(&context, 0, 1, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); cl_gl_object_type objectType = 0u; cl_GLuint objectId = 0u; retVal = clGetGLObjectInfo(glBuffer, &objectType, &objectId); EXPECT_EQ(CL_INVALID_GL_OBJECT, retVal); retVal = clReleaseMemObject(glBuffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(glSharingTests, givenClGLBufferWhenItIsAcquiredThenAcuqireCountIsIncremented) { auto retVal = CL_SUCCESS; auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, &retVal); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLAcquireSharedBufferCalled")); auto memObject = castToObject(glBuffer); EXPECT_FALSE(memObject->isMemObjZeroCopy()); EXPECT_FALSE(memObject->isReadWriteOnCpuAllowed(context.getDevice(0)->getDevice())); auto currentGraphicsAllocation = memObject->getGraphicsAllocation(rootDeviceIndex); memObject->peekSharingHandler()->acquire(memObject, rootDeviceIndex); EXPECT_EQ(2, mockGlSharing->dllParam->getParam("GLAcquireSharedBufferCalled")); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLGetCurrentContextCalled")); auto currentGraphicsAllocation2 = memObject->getGraphicsAllocation(rootDeviceIndex); EXPECT_EQ(currentGraphicsAllocation2, currentGraphicsAllocation); retVal = clReleaseMemObject(glBuffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(glSharingTests, givenClGLBufferWhenItIsAcquiredTwiceThenAcuqireIsNotCalled) { auto retVal = CL_SUCCESS; auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, &retVal); auto memObject = castToObject(glBuffer); memObject->peekSharingHandler()->acquire(memObject, context.getDevice(0)->getRootDeviceIndex()); EXPECT_EQ(2, mockGlSharing->dllParam->getParam("GLAcquireSharedBufferCalled")); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLGetCurrentContextCalled")); memObject->peekSharingHandler()->acquire(memObject, context.getDevice(0)->getRootDeviceIndex()); EXPECT_EQ(2, mockGlSharing->dllParam->getParam("GLAcquireSharedBufferCalled")); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLGetCurrentContextCalled")); memObject->peekSharingHandler()->release(memObject, context.getDevice(0)->getRootDeviceIndex()); memObject->peekSharingHandler()->release(memObject, context.getDevice(0)->getRootDeviceIndex()); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLGetCurrentContextCalled")); retVal = clReleaseMemObject(glBuffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(glSharingTests, givenClGLBufferWhenItIsCreatedAndGmmIsAvailableThenItIsUsedInGraphicsAllocation) { void *ptr = (void *)0x1000; auto rootDeviceIndex = context.getDevice(0)->getRootDeviceIndex(); auto gmm = new Gmm(context.getDevice(0)->getGmmClientContext(), ptr, 4096u, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true); mockGlSharing->m_bufferInfoOutput.pGmmResInfo = gmm->gmmResourceInfo->peekGmmResourceInfo(); mockGlSharing->uploadDataToBufferInfo(); auto retVal = CL_SUCCESS; auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, &retVal); auto memObject = castToObject(glBuffer); auto graphicsAllocation = memObject->getGraphicsAllocation(rootDeviceIndex); ASSERT_NE(nullptr, graphicsAllocation->getDefaultGmm()); EXPECT_NE(nullptr, graphicsAllocation->getDefaultGmm()->gmmResourceInfo->peekHandle()); retVal = clReleaseMemObject(glBuffer); EXPECT_EQ(CL_SUCCESS, retVal); delete gmm; } TEST_F(glSharingTests, givenClGLBufferWhenItIsAcquiredTwiceAfterReleaseThenAcuqireIsIncremented) { auto retVal = CL_SUCCESS; auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, &retVal); auto memObject = castToObject(glBuffer); memObject->peekSharingHandler()->acquire(memObject, context.getDevice(0)->getRootDeviceIndex()); EXPECT_EQ(2, mockGlSharing->dllParam->getParam("GLAcquireSharedBufferCalled")); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLGetCurrentContextCalled")); memObject->peekSharingHandler()->release(memObject, context.getDevice(0)->getRootDeviceIndex()); memObject->peekSharingHandler()->acquire(memObject, context.getDevice(0)->getRootDeviceIndex()); EXPECT_EQ(3, mockGlSharing->dllParam->getParam("GLAcquireSharedBufferCalled")); EXPECT_EQ(2, mockGlSharing->dllParam->getParam("GLGetCurrentContextCalled")); retVal = clReleaseMemObject(glBuffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(glSharingTests, givenClGLBufferWhenItIsAcquireCountIsDecrementedToZeroThenCallReleaseFunction) { std::unique_ptr buffer(GlBuffer::createSharedGlBuffer(&context, CL_MEM_READ_WRITE, bufferId, nullptr)); auto sharingHandler = buffer->peekSharingHandler(); sharingHandler->acquire(buffer.get(), context.getDevice(0)->getRootDeviceIndex()); sharingHandler->acquire(buffer.get(), context.getDevice(0)->getRootDeviceIndex()); sharingHandler->release(buffer.get(), context.getDevice(0)->getRootDeviceIndex()); EXPECT_EQ(0, mockGlSharing->dllParam->getParam("GLReleaseSharedBufferCalled")); sharingHandler->release(buffer.get(), context.getDevice(0)->getRootDeviceIndex()); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLReleaseSharedBufferCalled")); EXPECT_EQ(bufferId, mockGlSharing->dllParam->getBufferInfo().bufferName); } TEST_F(glSharingTests, givenClGLBufferWhenItIsAcquiredWithDifferentOffsetThenGraphicsAllocationContainsLatestOffsetValue) { auto retVal = CL_SUCCESS; auto rootDeviceIndex = context.getDevice(0)->getRootDeviceIndex(); auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, &retVal); auto memObject = castToObject(glBuffer); auto graphicsAddress = memObject->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(); mockGlSharing->m_bufferInfoOutput.bufferOffset = 50u; mockGlSharing->uploadDataToBufferInfo(); memObject->peekSharingHandler()->acquire(memObject, rootDeviceIndex); auto offsetedGraphicsAddress = memObject->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(); EXPECT_EQ(offsetedGraphicsAddress, graphicsAddress + mockGlSharing->m_bufferInfoOutput.bufferOffset); retVal = clReleaseMemObject(glBuffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(glSharingTests, givenHwCommandQueueWhenAcquireIsCalledThenAcquireCountIsIncremented) { auto retVal = CL_SUCCESS; auto commandQueue = clCreateCommandQueue(&context, context.getDevice(0), 0, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, &retVal); auto buffer = castToObject(glBuffer); EXPECT_EQ(0u, buffer->acquireCount); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLAcquireSharedBufferCalled")); retVal = clEnqueueAcquireGLObjects(commandQueue, 1, &glBuffer, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2, mockGlSharing->dllParam->getParam("GLAcquireSharedBufferCalled")); EXPECT_EQ(1u, buffer->acquireCount); retVal = clEnqueueReleaseGLObjects(commandQueue, 1, &glBuffer, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, buffer->acquireCount); retVal = clEnqueueAcquireGLObjects(commandQueue, 1, &glBuffer, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(3, mockGlSharing->dllParam->getParam("GLAcquireSharedBufferCalled")); EXPECT_EQ(1u, buffer->acquireCount); retVal = clReleaseCommandQueue(commandQueue); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(glBuffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(glSharingTests, givenHwCommandQueueWhenAcquireIsCalledWithIncorrectWaitlistThenReturnError) { auto retVal = CL_SUCCESS; auto commandQueue = clCreateCommandQueue(&context, context.getDevice(0), 0, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, &retVal); auto buffer = castToObject(glBuffer); EXPECT_EQ(0u, buffer->acquireCount); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLAcquireSharedBufferCalled")); retVal = clEnqueueAcquireGLObjects(commandQueue, 0, &glBuffer, 1, nullptr, nullptr); EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, retVal); retVal = clReleaseCommandQueue(commandQueue); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(glBuffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(glSharingTests, givenEnabledAsyncEventsHandlerWhenAcquireGlObjectsIsCalledWithIncompleteExternallySynchronizedEventThenItIsAddedToAsyncEventsHandler) { std::unique_ptr dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(true); auto handler = new MockHandler(false); context.getAsyncEventsHandlerUniquePtr().reset(handler); struct ExternallySynchronizedEvent : Event { ExternallySynchronizedEvent(Context *ctx) : Event(ctx, nullptr, 0, 0, 0) { } bool isExternallySynchronized() const override { return true; } void updateExecutionStatus() override { ++updateCount; if (complete) { transitionExecutionStatus(CL_COMPLETE); } } bool complete = false; uint32_t updateCount = 0; }; auto *event = new ExternallySynchronizedEvent(&context); cl_event clEvent = static_cast(event); auto commandQueue = clCreateCommandQueue(&context, context.getDevice(0), 0, nullptr); auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, nullptr); EXPECT_EQ(CL_SUCCESS, clEnqueueAcquireGLObjects(commandQueue, 1, &glBuffer, 1, &clEvent, nullptr)); EXPECT_EQ(CL_SUCCESS, clReleaseCommandQueue(commandQueue)); EXPECT_EQ(CL_SUCCESS, clReleaseMemObject(glBuffer)); EXPECT_LT(CL_SUCCESS, event->peekExecutionStatus()); EXPECT_FALSE(handler->peekIsRegisterListEmpty()); uint32_t updateCount = event->updateCount; handler->process(); EXPECT_LT(updateCount, event->updateCount); updateCount = event->updateCount; handler->process(); EXPECT_LT(updateCount, event->updateCount); updateCount = event->updateCount; event->complete = true; handler->process(); EXPECT_LE(updateCount, event->updateCount); updateCount = event->updateCount; handler->process(); EXPECT_EQ(updateCount, event->updateCount); event->release(); } TEST_F(glSharingTests, givenDisabledAsyncEventsHandlerWhenAcquireGlObjectsIsCalledWithIncompleteExternallySynchronizedEventThenItIsNotAddedToAsyncEventsHandler) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); auto handler = new MockHandler(false); context.getAsyncEventsHandlerUniquePtr().reset(handler); struct ExternallySynchronizedEvent : Event { ExternallySynchronizedEvent() : Event(nullptr, 0, 0, 0) { } bool isExternallySynchronized() const override { return true; } }; auto *event = new ExternallySynchronizedEvent; cl_event clEvent = static_cast(event); auto commandQueue = clCreateCommandQueue(&context, context.getDevice(0), 0, nullptr); auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, nullptr); EXPECT_EQ(CL_SUCCESS, clEnqueueAcquireGLObjects(commandQueue, 1, &glBuffer, 1, &clEvent, nullptr)); EXPECT_EQ(CL_SUCCESS, clReleaseCommandQueue(commandQueue)); EXPECT_EQ(CL_SUCCESS, clReleaseMemObject(glBuffer)); EXPECT_LT(CL_SUCCESS, event->peekExecutionStatus()); EXPECT_TRUE(handler->peekIsRegisterListEmpty()); event->release(); } TEST_F(glSharingTests, givenEnabledAsyncEventsHandlerWhenAcquireGlObjectsIsCalledWithIncompleteButNotExternallySynchronizedEventThenItIsNotAddedToAsyncEventsHandler) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); auto handler = new MockHandler(false); context.getAsyncEventsHandlerUniquePtr().reset(handler); auto *event = new UserEvent; cl_event clEvent = static_cast(event); auto commandQueue = clCreateCommandQueue(&context, context.getDevice(0), 0, nullptr); auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, nullptr); EXPECT_EQ(CL_SUCCESS, clEnqueueAcquireGLObjects(commandQueue, 1, &glBuffer, 1, &clEvent, nullptr)); EXPECT_EQ(CL_SUCCESS, clReleaseCommandQueue(commandQueue)); EXPECT_EQ(CL_SUCCESS, clReleaseMemObject(glBuffer)); EXPECT_LT(CL_SUCCESS, event->peekExecutionStatus()); EXPECT_TRUE(handler->peekIsRegisterListEmpty()); event->release(); } TEST_F(glSharingTests, givenHwCommandQueueWhenReleaseIsCalledWithIncorrectWaitlistThenReturnError) { auto retVal = CL_SUCCESS; auto commandQueue = clCreateCommandQueue(&context, context.getDevice(0), 0, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, &retVal); auto buffer = castToObject(glBuffer); EXPECT_EQ(0u, buffer->acquireCount); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLAcquireSharedBufferCalled")); retVal = clEnqueueAcquireGLObjects(commandQueue, 1, &glBuffer, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueReleaseGLObjects(commandQueue, 1, &glBuffer, 1, nullptr, nullptr); EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, retVal); retVal = clEnqueueReleaseGLObjects(commandQueue, 1, &glBuffer, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseCommandQueue(commandQueue); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(glBuffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(glSharingTests, givenContextWithoutSharingWhenAcquireIsCalledThenErrorIsReturned) { auto retVal = CL_SUCCESS; auto commandQueue = clCreateCommandQueue(&context, context.getDevice(0), 0, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, &retVal); auto buffer = castToObject(glBuffer); EXPECT_EQ(0u, buffer->acquireCount); context.releaseSharingFunctions(CLGL_SHARING); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLAcquireSharedBufferCalled")); retVal = clEnqueueAcquireGLObjects(commandQueue, 1, &glBuffer, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); context.setSharingFunctions(mockGlSharingFunctions); retVal = clReleaseCommandQueue(commandQueue); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(glBuffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(glSharingTests, givenContextWithoutSharingWhenReleaseIsCalledThenErrorIsReturned) { auto retVal = CL_SUCCESS; auto commandQueue = clCreateCommandQueue(&context, context.getDevice(0), 0, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, &retVal); auto buffer = castToObject(glBuffer); EXPECT_EQ(0u, buffer->acquireCount); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLAcquireSharedBufferCalled")); retVal = clEnqueueAcquireGLObjects(commandQueue, 1, &glBuffer, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2, mockGlSharing->dllParam->getParam("GLAcquireSharedBufferCalled")); EXPECT_EQ(1u, buffer->acquireCount); context.releaseSharingFunctions(CLGL_SHARING); retVal = clEnqueueReleaseGLObjects(commandQueue, 1, &glBuffer, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); context.setSharingFunctions(mockGlSharingFunctions); retVal = clReleaseCommandQueue(commandQueue); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(glBuffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(glSharingTests, givenHwCommandQueueWhenAcquireAndReleaseCallsAreMadeWithEventsThenProperCmdTypeIsReturned) { cl_event retEvent; auto retVal = CL_SUCCESS; auto commandQueue = clCreateCommandQueue(&context, context.getDevice(0), 0, &retVal); auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, &retVal); retVal = clEnqueueAcquireGLObjects(commandQueue, 1, &glBuffer, 0, nullptr, &retEvent); EXPECT_EQ(CL_SUCCESS, retVal); cl_command_type cmdType = 0; size_t sizeReturned = 0; retVal = clGetEventInfo(retEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(static_cast(CL_COMMAND_ACQUIRE_GL_OBJECTS), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); retVal = clReleaseEvent(retEvent); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueReleaseGLObjects(commandQueue, 1, &glBuffer, 0, nullptr, &retEvent); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetEventInfo(retEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(static_cast(CL_COMMAND_RELEASE_GL_OBJECTS), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); retVal = clReleaseEvent(retEvent); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseCommandQueue(commandQueue); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(glBuffer); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(glSharingTests, givenCommandQueueWhenReleaseGlObjectIsCalledThenFinishIsCalled) { MockCommandQueueHw mockCmdQueue(&context, context.getDevice(0), nullptr); auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, nullptr); EXPECT_EQ(CL_SUCCESS, clEnqueueAcquireGLObjects(&mockCmdQueue, 1, &glBuffer, 0, nullptr, nullptr)); mockCmdQueue.taskCount = 5u; EXPECT_EQ(CL_SUCCESS, clEnqueueReleaseGLObjects(&mockCmdQueue, 1, &glBuffer, 0, nullptr, nullptr)); EXPECT_EQ(5u, mockCmdQueue.latestTaskCountWaited); clReleaseMemObject(glBuffer); } TEST_F(glSharingTests, givenMockGLWhenFunctionsAreCalledThenCallsAreReceived) { auto ptrToStruct = &mockGlSharing->m_clGlResourceInfo; auto glDisplay = (GLDisplay)1; auto glContext = (GLContext)1; mockGlSharing->overrideGetCurrentValues(glContext, glDisplay); EXPECT_EQ(1u, mockGlSharingFunctions->setSharedOCLContextState()); EXPECT_EQ(1u, mockGlSharingFunctions->acquireSharedBufferINTEL(ptrToStruct)); EXPECT_EQ(1u, mockGlSharingFunctions->acquireSharedRenderBuffer(ptrToStruct)); EXPECT_EQ(1u, mockGlSharingFunctions->acquireSharedTexture(ptrToStruct)); EXPECT_EQ(1u, mockGlSharingFunctions->releaseSharedBufferINTEL(ptrToStruct)); EXPECT_EQ(1u, mockGlSharingFunctions->releaseSharedRenderBuffer(ptrToStruct)); EXPECT_EQ(1u, mockGlSharingFunctions->releaseSharedTexture(ptrToStruct)); EXPECT_EQ(glContext, mockGlSharingFunctions->getCurrentContext()); EXPECT_EQ(glDisplay, mockGlSharingFunctions->getCurrentDisplay()); EXPECT_EQ(1u, mockGlSharingFunctions->makeCurrent(glContext, glDisplay)); EXPECT_EQ(1, mockGlSharing->dllParam->getGLSetSharedOCLContextStateReturnedValue()); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLAcquireSharedBufferCalled")); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLAcquireSharedRenderBufferCalled")); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLAcquireSharedTextureCalled")); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLReleaseSharedBufferCalled")); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLReleaseSharedRenderBufferCalled")); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLReleaseSharedTextureCalled")); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLGetCurrentContextCalled")); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLGetCurrentDisplayCalled")); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLMakeCurrentCalled")); } TEST(glSharingBasicTest, GivenSharingFunctionsWhenItIsConstructedThenOglContextFunctionIsCalled) { GLType GLHDCType = 0; GLContext GLHGLRCHandle = 0; GLDisplay GLHDCHandle = 0; GlDllHelper getDllParam; GlSharingFunctionsMock glSharingFunctions(GLHDCType, GLHGLRCHandle, GLHGLRCHandle, GLHDCHandle); EXPECT_EQ(1, getDllParam.getGLSetSharedOCLContextStateReturnedValue()); } TEST(glSharingBasicTest, givenInvalidExtensionNameWhenCheckGLExtensionSupportedThenReturnFalse) { MockGLSharingFunctions glSharingFunctions; const unsigned char invalidExtension[] = "InvalidExtensionName"; bool RetVal = glSharingFunctions.isOpenGlExtensionSupported(invalidExtension); EXPECT_FALSE(RetVal); } TEST(glSharingBasicTest, givenglGetIntegervIsNullWhenCheckGLExtensionSupportedThenReturnFalse) { MockGLSharingFunctions glSharingFunctions; glSharingFunctions.glGetIntegerv = nullptr; const unsigned char invalidExtension[] = "InvalidExtensionName"; bool RetVal = glSharingFunctions.isOpenGlExtensionSupported(invalidExtension); EXPECT_FALSE(RetVal); } TEST(glSharingBasicTest, givenValidExtensionNameWhenCheckGLExtensionSupportedThenReturnTrue) { MockGLSharingFunctions glSharingFunctions; const unsigned char supportGLOES[] = "GL_OES_framebuffer_object"; bool RetVal = glSharingFunctions.isOpenGlExtensionSupported(supportGLOES); EXPECT_TRUE(RetVal); } TEST(glSharingBasicTest, givenWhenCheckGLSharingSupportedThenReturnTrue) { MockGLSharingFunctions glSharingFunctions; bool RetVal = glSharingFunctions.isOpenGlSharingSupported(); EXPECT_TRUE(RetVal); } TEST(glSharingBasicTest, givenVendorisNullWhenCheckGLSharingSupportedThenReturnFalse) { auto invalidGetStringFcn = [](GLenum name) { return (const GLubyte *)""; }; MockGLSharingFunctions glSharingFunctions; glSharingFunctions.glGetString = invalidGetStringFcn; bool RetVal = glSharingFunctions.isOpenGlSharingSupported(); EXPECT_FALSE(RetVal); } TEST(glSharingBasicTest, givenVersionisNullWhenCheckGLSharingSupportedThenReturnFalse) { MockGLSharingFunctions glSharingFunctions; glSharingFunctions.dllParam->glSetString("", GL_VERSION); // version returns null bool RetVal = glSharingFunctions.isOpenGlSharingSupported(); EXPECT_FALSE(RetVal); glSharingFunctions.dllParam->glSetString("Int..", GL_VENDOR); RetVal = glSharingFunctions.isOpenGlSharingSupported(); EXPECT_FALSE(RetVal); } TEST(glSharingBasicTest, givenVersionisGlesWhenCheckGLSharingSupportedThenReturnFalse) { MockGLSharingFunctions glSharingFunctions; glSharingFunctions.dllParam->glSetString("OpenGL ES", GL_VERSION); bool RetVal = glSharingFunctions.isOpenGlSharingSupported(); EXPECT_TRUE(RetVal); glSharingFunctions.dllParam->glSetString("OpenGL ES 1.", GL_VERSION); RetVal = glSharingFunctions.isOpenGlSharingSupported(); EXPECT_TRUE(RetVal); glSharingFunctions.dllParam->glSetString("2.0", GL_VERSION); RetVal = glSharingFunctions.isOpenGlSharingSupported(); EXPECT_TRUE(RetVal); glSharingFunctions.dllParam->glSetStringi("GL_EXT_framebuffer_o...", 1); RetVal = glSharingFunctions.isOpenGlSharingSupported(); EXPECT_FALSE(RetVal); glSharingFunctions.dllParam->glSetStringi("GL_EXT_framebuffer_object", 1); RetVal = glSharingFunctions.isOpenGlSharingSupported(); EXPECT_TRUE(RetVal); glSharingFunctions.dllParam->glSetString("OpenGL ES 1.", GL_VERSION); glSharingFunctions.dllParam->glSetStringi("GL_OES_framebuffer_o...", 0); RetVal = glSharingFunctions.isOpenGlSharingSupported(); EXPECT_FALSE(RetVal); } TEST(glSharingBasicTest, givensetSharedOCLContextStateWhenCallThenCorrectValue) { MockGLSharingFunctions glSharingFunctions; glSharingFunctions.dllParam->setGLSetSharedOCLContextStateReturnedValue(0u); EXPECT_EQ(0u, glSharingFunctions.setSharedOCLContextState()); glSharingFunctions.dllParam->setGLSetSharedOCLContextStateReturnedValue(1u); EXPECT_EQ(1u, glSharingFunctions.setSharedOCLContextState()); } TEST(glSharingBasicTest, givenGlSharingFunctionsWhenItIsConstructedThenFunctionsAreLoaded) { GLType GLHDCType = 0; GLContext GLHGLRCHandle = 0; GLDisplay GLHDCHandle = 0; GlSharingFunctionsMock glSharingFunctions(GLHDCType, GLHGLRCHandle, GLHGLRCHandle, GLHDCHandle); EXPECT_NE(nullptr, glSharingFunctions.GLGetCurrentContext); EXPECT_NE(nullptr, glSharingFunctions.GLGetCurrentDisplay); EXPECT_NE(nullptr, glSharingFunctions.glGetString); EXPECT_NE(nullptr, glSharingFunctions.glGetIntegerv); EXPECT_NE(nullptr, glSharingFunctions.pfnWglCreateContext); EXPECT_NE(nullptr, glSharingFunctions.pfnWglDeleteContext); EXPECT_NE(nullptr, glSharingFunctions.pfnWglShareLists); EXPECT_NE(nullptr, glSharingFunctions.wglMakeCurrent); EXPECT_NE(nullptr, glSharingFunctions.GLSetSharedOCLContextState); EXPECT_NE(nullptr, glSharingFunctions.GLAcquireSharedBuffer); EXPECT_NE(nullptr, glSharingFunctions.GLReleaseSharedBuffer); EXPECT_NE(nullptr, glSharingFunctions.GLAcquireSharedRenderBuffer); EXPECT_NE(nullptr, glSharingFunctions.GLReleaseSharedRenderBuffer); EXPECT_NE(nullptr, glSharingFunctions.GLAcquireSharedTexture); EXPECT_NE(nullptr, glSharingFunctions.GLReleaseSharedTexture); EXPECT_NE(nullptr, glSharingFunctions.GLRetainSync); EXPECT_NE(nullptr, glSharingFunctions.GLReleaseSync); EXPECT_NE(nullptr, glSharingFunctions.GLGetSynciv); EXPECT_NE(nullptr, glSharingFunctions.glGetStringi); } TEST(glSharingBasicTest, givenNumEntriesLowerThanSupportedFormatsWhenGettingSupportedFormatsThenOnlyNumEntiresAreReturned) { MockGLSharingFunctions glSharingFunctions; cl_mem_flags flags = CL_MEM_READ_WRITE; cl_mem_object_type image_type = CL_MEM_OBJECT_IMAGE2D; cl_uint numImageFormats = 0; cl_GLenum glFormats[3] = {}; auto retVal = glSharingFunctions.getSupportedFormats(flags, image_type, 1, glFormats, &numImageFormats); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(static_cast(GlSharing::glToCLFormats.size()), numImageFormats); EXPECT_NE(0u, glFormats[0]); EXPECT_EQ(0u, glFormats[1]); EXPECT_EQ(0u, glFormats[2]); } TEST(glSharingBasicTest, givenCorrectFlagsWhenGettingSupportedFormatsThenCorrectListIsReturned) { MockGLSharingFunctions glSharingFunctions; cl_mem_flags flags[] = {CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY, CL_MEM_READ_WRITE, CL_MEM_KERNEL_READ_AND_WRITE}; cl_mem_object_type image_type = CL_MEM_OBJECT_IMAGE2D; cl_GLenum glFormats[3] = {}; cl_uint numImageFormats = 0; for (auto flag : flags) { auto result = glSharingFunctions.getSupportedFormats(flag, image_type, arrayCount(glFormats), glFormats, &numImageFormats); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(GlSharing::glToCLFormats.size()), numImageFormats); for (uint32_t formatIndex = 0; formatIndex < arrayCount(glFormats); formatIndex++) { EXPECT_NE(GlSharing::glToCLFormats.end(), GlSharing::glToCLFormats.find(glFormats[formatIndex])); } } } TEST(glSharingBasicTest, givenSupportedImageTypesWhenGettingSupportedFormatsThenCorrectListIsReturned) { MockGLSharingFunctions glSharingFunctions; cl_mem_flags flags = CL_MEM_READ_WRITE; cl_mem_object_type image_types[] = {CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE3D, CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE1D_BUFFER, CL_MEM_OBJECT_IMAGE2D_ARRAY}; cl_GLenum glFormats[3] = {}; cl_uint numImageFormats = 0; for (auto image_type : image_types) { auto result = glSharingFunctions.getSupportedFormats(flags, image_type, arrayCount(glFormats), glFormats, &numImageFormats); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(GlSharing::glToCLFormats.size()), numImageFormats); for (auto glFormat : glFormats) { EXPECT_NE(GlSharing::glToCLFormats.end(), GlSharing::glToCLFormats.find(glFormat)); } } } TEST(glSharingBasicTest, givenZeroNumEntriesWhenGettingSupportedFormatsThenNumFormatsIsReturned) { MockGLSharingFunctions glSharingFunctions; cl_mem_flags flags = CL_MEM_READ_WRITE; cl_mem_object_type image_type = CL_MEM_OBJECT_IMAGE2D; cl_uint numImageFormats = 0; auto result = glSharingFunctions.getSupportedFormats(flags, image_type, 0, nullptr, &numImageFormats); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(GlSharing::glToCLFormats.size()), numImageFormats); } TEST(glSharingBasicTest, givenNullNumImageFormatsWhenGettingSupportedFormatsThenNumFormatsIsNotDereferenced) { MockGLSharingFunctions glSharingFunctions; cl_mem_flags flags = CL_MEM_READ_WRITE; cl_mem_object_type image_type = CL_MEM_OBJECT_IMAGE2D; auto result = glSharingFunctions.getSupportedFormats(flags, image_type, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, result); } TEST(glSharingBasicTest, givenInvalidImageTypeWhenGettingSupportedFormatsThenIvalidValueErrorIsReturned) { MockGLSharingFunctions glSharingFunctions; cl_mem_flags flags = CL_MEM_READ_WRITE; cl_mem_object_type image_type = CL_MEM_OBJECT_PIPE; cl_GLenum glFormats[3] = {}; cl_uint numImageFormats = 0; auto result = glSharingFunctions.getSupportedFormats(flags, image_type, arrayCount(glFormats), glFormats, &numImageFormats); EXPECT_EQ(CL_INVALID_VALUE, result); EXPECT_EQ(0u, numImageFormats); } TEST(glSharingBasicTest, givenInvalidFlagsWhenGettingSupportedFormatsThenIvalidValueErrorIsReturned) { MockGLSharingFunctions glSharingFunctions; cl_mem_flags flags = CL_MEM_NO_ACCESS_INTEL; cl_mem_object_type image_type = CL_MEM_OBJECT_IMAGE2D; cl_GLenum glFormats[3] = {}; cl_uint numImageFormats = 0; auto result = glSharingFunctions.getSupportedFormats(flags, image_type, arrayCount(glFormats), glFormats, &numImageFormats); EXPECT_EQ(CL_INVALID_VALUE, result); EXPECT_EQ(0u, numImageFormats); } TEST_F(glSharingTests, givenContextWhenCreateFromSharedBufferThenSharedImageIsReturned) { auto retVal = CL_SUCCESS; auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, glBuffer); auto parentbBuffer = castToObject(glBuffer); auto hardwareInfo = context.getDevice(0)->getRootDeviceEnvironment().getMutableHardwareInfo(); hardwareInfo->capabilityTable.supportsImages = true; cl_image_format format = {CL_RGBA, CL_FLOAT}; cl_image_desc image_desc = {CL_MEM_OBJECT_IMAGE1D_BUFFER, 1, 1, 1, 1, 0, 0, 0, 0, {glBuffer}}; cl_mem image = clCreateImage(&context, CL_MEM_READ_WRITE, &format, &image_desc, 0, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, glBuffer); auto childImage = castToObject(image); EXPECT_EQ(parentbBuffer->peekSharingHandler(), childImage->peekSharingHandler()); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(glBuffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(glSharingTests, givenClGLBufferWhenMapAndUnmapBufferIsCalledThenCopyOnGpu) { auto retVal = CL_SUCCESS; auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, &retVal); auto buffer = castToObject(glBuffer); EXPECT_EQ(buffer->getCpuAddressForMemoryTransfer(), nullptr); // no cpu ptr auto gfxAllocation = buffer->getGraphicsAllocation(rootDeviceIndex); auto pClDevice = context.getDevice(0); for (auto handleId = 0u; handleId < gfxAllocation->getNumGmms(); handleId++) { gfxAllocation->setGmm(new MockGmm(pClDevice->getGmmClientContext()), handleId); } auto commandQueue = CommandQueue::create(&context, pClDevice, 0, false, retVal); ASSERT_EQ(CL_SUCCESS, retVal); size_t offset = 1; auto taskCount = commandQueue->taskCount; auto mappedPtr = clEnqueueMapBuffer(commandQueue, glBuffer, CL_TRUE, CL_MAP_WRITE, offset, (buffer->getSize() - offset), 0, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(taskCount + 1, commandQueue->taskCount); MapInfo mapInfo; EXPECT_TRUE(buffer->findMappedPtr(mappedPtr, mapInfo)); EXPECT_EQ(mappedPtr, ptrOffset(buffer->getAllocatedMapPtr(), offset)); EXPECT_EQ(mapInfo.size[0], buffer->getSize() - offset); EXPECT_EQ(mapInfo.offset[0], offset); retVal = commandQueue->enqueueUnmapMemObject(buffer, mappedPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(taskCount + 2, commandQueue->taskCount); EXPECT_FALSE(buffer->findMappedPtr(mappedPtr, mapInfo)); // delete in destructor retVal = clReleaseMemObject(glBuffer); EXPECT_EQ(CL_SUCCESS, retVal); delete commandQueue; } TEST_F(glSharingTests, givenClGLBufferWhenretValIsNotPassedToCreateFunctionThenBufferIsCreated) { auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, nullptr); ASSERT_NE(nullptr, glBuffer); auto retVal = clReleaseMemObject(glBuffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(glSharingTests, givenClGLBufferWhenMapAndUnmapBufferIsCalledTwiceThenReuseStorage) { auto retVal = CL_SUCCESS; auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, &retVal); auto buffer = castToObject(glBuffer); EXPECT_EQ(buffer->getCpuAddressForMemoryTransfer(), nullptr); // no cpu ptr auto gfxAllocation = buffer->getGraphicsAllocation(rootDeviceIndex); auto pClDevice = context.getDevice(0); for (auto handleId = 0u; handleId < gfxAllocation->getNumGmms(); handleId++) { gfxAllocation->setGmm(new MockGmm(pClDevice->getGmmClientContext()), handleId); } auto commandQueue = CommandQueue::create(&context, pClDevice, 0, false, retVal); ASSERT_EQ(CL_SUCCESS, retVal); auto mappedPtr = clEnqueueMapBuffer(commandQueue, glBuffer, CL_TRUE, CL_MAP_READ, 0, buffer->getSize(), 0, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueUnmapMemObject(commandQueue, glBuffer, mappedPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); auto mappedPtr2 = clEnqueueMapBuffer(commandQueue, glBuffer, CL_TRUE, CL_MAP_READ, 0, buffer->getSize(), 0, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mappedPtr, mappedPtr2); retVal = clEnqueueUnmapMemObject(commandQueue, glBuffer, mappedPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(glBuffer); EXPECT_EQ(CL_SUCCESS, retVal); delete commandQueue; } TEST(APIclCreateEventFromGLsyncKHR, givenInvalidContexWhenCreateThenReturnError) { cl_int retVal = CL_SUCCESS; cl_GLsync sync = {0}; auto event = clCreateEventFromGLsyncKHR(nullptr, sync, &retVal); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); EXPECT_EQ(nullptr, event); } TEST_F(glSharingTests, givenContextWithoutSharingWhenCreateEventFromGLThenErrorIsReturned) { context.resetSharingFunctions(CLGL_SHARING); cl_int retVal = CL_SUCCESS; cl_GLsync sync = {0}; auto event = clCreateEventFromGLsyncKHR(&context, sync, &retVal); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); EXPECT_EQ(nullptr, event); } TEST(glSharingContextSwitch, givenContextOrBkpContextHandleAsCurrentWhenSwitchAttemptedThenDontMakeSwitch) { GLType type = 0; auto context = (GLContext)1; auto display = (GLDisplay)2; auto bkpContext = (GLContext)3; MockGlSharing mockGlSharing(type, context, bkpContext, display); mockGlSharing.overrideGetCurrentValues(context, display); { GLContextGuard guard(*mockGlSharing.sharingFunctions); EXPECT_TRUE(mockGlSharing.dllParam->getGlMockReturnedValues().currentContext == context); EXPECT_TRUE(mockGlSharing.dllParam->getGlMockReturnedValues().currentDisplay == display); } EXPECT_EQ(0, mockGlSharing.dllParam->getParam("GLMakeCurrentCalled")); EXPECT_EQ(1, mockGlSharing.dllParam->getParam("GLGetCurrentContextCalled")); EXPECT_EQ(1, mockGlSharing.dllParam->getParam("GLGetCurrentDisplayCalled")); mockGlSharing.overrideGetCurrentValues(bkpContext, display); { GLContextGuard guard(*mockGlSharing.sharingFunctions); EXPECT_EQ(0, mockGlSharing.dllParam->getParam("GLMakeCurrentCalled")); EXPECT_TRUE(mockGlSharing.dllParam->getGlMockReturnedValues().currentContext == bkpContext); } EXPECT_EQ(1, mockGlSharing.dllParam->getParam("GLMakeCurrentCalled")); // destructor EXPECT_TRUE(mockGlSharing.dllParam->getGlMockReturnedValues().madeCurrentContext == bkpContext); } TEST(glSharingContextSwitch, givenUnknownCurrentContextAndNoFailsOnCallWhenSwitchAttemptedThenMakeSwitchToCtxHandle) { GLType type = 0; auto context = (GLContext)1; auto display = (GLDisplay)2; auto bkpContext = (GLContext)3; auto unknownContext = (GLContext)4; MockGlSharing mockGlSharing(type, context, bkpContext, display); mockGlSharing.overrideGetCurrentValues(unknownContext, display, false); { GLContextGuard guard(*mockGlSharing.sharingFunctions); EXPECT_TRUE(mockGlSharing.dllParam->getGlMockReturnedValues().currentContext == unknownContext); EXPECT_EQ(1, mockGlSharing.dllParam->getParam("GLMakeCurrentCalled")); EXPECT_TRUE(mockGlSharing.dllParam->getGlMockReturnedValues().madeCurrentContext == context); } EXPECT_EQ(2, mockGlSharing.dllParam->getParam("GLMakeCurrentCalled")); // destructor EXPECT_TRUE(mockGlSharing.dllParam->getGlMockReturnedValues().madeCurrentContext == unknownContext); } TEST(glSharingContextSwitch, givenUnknownCurrentContextAndOneFailOnCallWhenSwitchAttemptedThenMakeSwitchToBkpCtxHandle) { GLType type = 0; auto context = (GLContext)1; auto display = (GLDisplay)2; auto bkpContext = (GLContext)3; auto unknownContext = (GLContext)4; MockGlSharing mockGlSharing(type, context, bkpContext, display); mockGlSharing.overrideGetCurrentValues(unknownContext, display, true, 1); { GLContextGuard guard(*mockGlSharing.sharingFunctions); EXPECT_TRUE(mockGlSharing.dllParam->getGlMockReturnedValues().currentContext == unknownContext); EXPECT_EQ(2, mockGlSharing.dllParam->getParam("GLMakeCurrentCalled")); EXPECT_TRUE(mockGlSharing.dllParam->getGlMockReturnedValues().madeCurrentContext == bkpContext); } EXPECT_EQ(3, mockGlSharing.dllParam->getParam("GLMakeCurrentCalled")); // destructor EXPECT_TRUE(mockGlSharing.dllParam->getGlMockReturnedValues().madeCurrentContext == unknownContext); } TEST(glSharingContextSwitch, givenUnknownCurrentContextAndMultipleFailOnCallWhenSwitchAttemptedThenMakeSwitchToBkpCtxHandleUntilSuccess) { GLType type = 0; auto context = (GLContext)1; auto display = (GLDisplay)2; auto bkpContext = (GLContext)3; auto unknownContext = (GLContext)4; MockGlSharing mockGlSharing(type, context, bkpContext, display); mockGlSharing.overrideGetCurrentValues(unknownContext, display, true, 5); { GLContextGuard guard(*mockGlSharing.sharingFunctions); EXPECT_TRUE(mockGlSharing.dllParam->getGlMockReturnedValues().currentContext == unknownContext); EXPECT_EQ(6, mockGlSharing.dllParam->getParam("GLMakeCurrentCalled")); EXPECT_TRUE(mockGlSharing.dllParam->getGlMockReturnedValues().madeCurrentContext == bkpContext); } EXPECT_EQ(7, mockGlSharing.dllParam->getParam("GLMakeCurrentCalled")); // destructor EXPECT_TRUE(mockGlSharing.dllParam->getGlMockReturnedValues().madeCurrentContext == unknownContext); } TEST(glSharingContextSwitch, givenZeroCurrentContextWhenSwitchAttemptedThenMakeSwitchToBkpCtxHandle) { GLType type = 0; auto context = (GLContext)1; auto display = (GLDisplay)2; auto bkpContext = (GLContext)3; auto zeroContext = (GLContext)0; MockGlSharing mockGlSharing(type, context, bkpContext, display); mockGlSharing.overrideGetCurrentValues(zeroContext, display, false); { GLContextGuard guard(*mockGlSharing.sharingFunctions); EXPECT_TRUE(mockGlSharing.dllParam->getGlMockReturnedValues().currentContext == zeroContext); EXPECT_EQ(1, mockGlSharing.dllParam->getParam("GLMakeCurrentCalled")); EXPECT_TRUE(mockGlSharing.dllParam->getGlMockReturnedValues().madeCurrentContext == bkpContext); } EXPECT_EQ(2, mockGlSharing.dllParam->getParam("GLMakeCurrentCalled")); // destructor EXPECT_TRUE(mockGlSharing.dllParam->getGlMockReturnedValues().madeCurrentContext == zeroContext); } TEST(glSharingContextSwitch, givenSharingFunctionsWhenGlDeleteContextIsNotPresentThenItIsNotCalled) { auto glSharingFunctions = new GLSharingFunctionsWindows(); GlDllHelper dllParam; auto currentGlDeleteContextCalledCount = dllParam.getParam("GLDeleteContextCalled"); delete glSharingFunctions; EXPECT_EQ(currentGlDeleteContextCalledCount, dllParam.getParam("GLDeleteContextCalled")); } HWTEST_F(glSharingTests, givenSyncObjectWhenCreateEventIsCalledThenCreateGLSyncObj) { cl_int retVal = CL_SUCCESS; GLsync glSync = {0}; auto event = clCreateEventFromGLsyncKHR(&context, glSync, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, event); auto &csr = reinterpret_cast(context.getDevice(0))->getUltCommandStreamReceiver(); csr.taskLevel = 123; auto eventObj = castToObject(event); EXPECT_TRUE(eventObj->getCommandType() == CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR); EXPECT_TRUE(eventObj->peekExecutionStatus() == CL_SUBMITTED); EXPECT_EQ(CompletionStamp::notReady, eventObj->taskLevel); EXPECT_EQ(CompletionStamp::notReady, eventObj->getTaskLevel()); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLRetainSyncCalled")); eventObj->setStatus(CL_COMPLETE); EXPECT_EQ(0u, eventObj->getTaskLevel()); clReleaseEvent(event); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLReleaseSyncCalled")); } HWTEST_F(glSharingTests, givenEventCreatedFromFenceObjectWhenItIsPassedToAcquireThenItsStatusIsUpdated) { GLsync glSync = {0}; auto retVal = CL_SUCCESS; auto event = clCreateEventFromGLsyncKHR(&context, glSync, &retVal); auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, &retVal); auto commandQueue = clCreateCommandQueue(&context, context.getDevice(0), 0, &retVal); mockGlSharing->setGetSyncivReturnValue(GL_SIGNALED); auto neoEvent = castToObject(event); EXPECT_FALSE(neoEvent->isReadyForSubmission()); retVal = clEnqueueAcquireGLObjects(commandQueue, 1, &glBuffer, 1, &event, nullptr); EXPECT_TRUE(neoEvent->isReadyForSubmission()); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseCommandQueue(commandQueue); clReleaseMemObject(glBuffer); clReleaseEvent(event); } TEST_F(glSharingTests, GivenGlSyncEventThenReportsAsExternallySynchronized) { GLsync glSync = {0}; auto syncEvent = GlSyncEvent::create(context, glSync, nullptr); ASSERT_NE(nullptr, syncEvent); EXPECT_TRUE(syncEvent->isExternallySynchronized()); syncEvent->release(); } TEST_F(glSharingTests, givenSyncEventWhenUpdateExecutionStatusIsCalledThenGLGetSyncivCalled) { GLsync glSync = {0}; auto syncEvent = GlSyncEvent::create(context, glSync, nullptr); ASSERT_NE(nullptr, syncEvent); mockGlSharing->setGetSyncivReturnValue(GL_UNSIGNALED); syncEvent->updateExecutionStatus(); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLGetSyncivCalled")); EXPECT_TRUE(syncEvent->updateEventAndReturnCurrentStatus() == CL_SUBMITTED); EXPECT_EQ(2, mockGlSharing->dllParam->getParam("GLGetSyncivCalled")); // updateExecutionStatus called in peekExecutionStatus mockGlSharing->setGetSyncivReturnValue(GL_SIGNALED); syncEvent->updateExecutionStatus(); EXPECT_EQ(3, mockGlSharing->dllParam->getParam("GLGetSyncivCalled")); EXPECT_TRUE(syncEvent->peekExecutionStatus() == CL_COMPLETE); delete syncEvent; } TEST_F(glSharingTests, givenContextWhenEmptySharingTableEmptyThenReturnsNullptr) { MockContext context; context.clearSharingFunctions(); GLSharingFunctions *sharingF = context.getSharing(); EXPECT_EQ(sharingF, nullptr); } TEST_F(glSharingTests, givenUnknownBaseEventWhenGetGlArbSyncEventIsCalledThenNullptrIsReturned) { auto *sharing = context.getSharing(); ASSERT_NE(nullptr, sharing); auto event = new MockEvent(); MockContext context; EXPECT_EQ(nullptr, sharing->getGlArbSyncEvent(*event)); event->release(); } TEST_F(glSharingTests, givenKnownBaseEventWhenGetGlArbSyncEventIsCalledThenProperArbEventIsReturned) { auto *sharing = static_cast(context.getSharing()); ASSERT_NE(nullptr, sharing); auto baseEvent = new MockEvent; auto arbSyncEvent = reinterpret_cast(0x1c); sharing->glArbEventMapping[baseEvent] = arbSyncEvent; EXPECT_EQ(arbSyncEvent, sharing->getGlArbSyncEvent(*baseEvent)); baseEvent->release(); } TEST_F(glSharingTests, givenKnownBaseEventWhenRemoveGlArbSyncEventMappingIsCalledThenProperArbEventIsRemovedFromMap) { auto *sharing = static_cast(context.getSharing()); ASSERT_NE(nullptr, sharing); auto baseEvent = new MockEvent; auto arbSyncEvent = reinterpret_cast(0x1c); sharing->glArbEventMapping[baseEvent] = arbSyncEvent; EXPECT_NE(sharing->glArbEventMapping.end(), sharing->glArbEventMapping.find(baseEvent)); sharing->removeGlArbSyncEventMapping(*baseEvent); EXPECT_EQ(sharing->glArbEventMapping.end(), sharing->glArbEventMapping.find(baseEvent)); baseEvent->release(); } TEST_F(glSharingTests, givenUnknownBaseEventWhenRemoveGlArbSyncEventMappingIsCalledThenProperArbEventIsRemovedFromMap) { auto *sharing = static_cast(context.getSharing()); ASSERT_NE(nullptr, sharing); auto baseEvent = new MockEvent; auto unknownBaseEvent = new MockEvent; auto arbSyncEvent = reinterpret_cast(0x1c); sharing->glArbEventMapping[baseEvent] = arbSyncEvent; EXPECT_NE(sharing->glArbEventMapping.end(), sharing->glArbEventMapping.find(baseEvent)); EXPECT_EQ(sharing->glArbEventMapping.end(), sharing->glArbEventMapping.find(unknownBaseEvent)); sharing->removeGlArbSyncEventMapping(*unknownBaseEvent); EXPECT_NE(sharing->glArbEventMapping.end(), sharing->glArbEventMapping.find(baseEvent)); EXPECT_EQ(sharing->glArbEventMapping.end(), sharing->glArbEventMapping.find(unknownBaseEvent)); unknownBaseEvent->release(); baseEvent->release(); } TEST_F(glSharingTests, givenUnknownBaseEventWhenGetOrCreateGlArbSyncEventIsCalledThenNewArbEventIsReturned) { auto *sharing = static_cast(context.getSharing()); sharing->pfnGlArbSyncObjectCleanup = glArbSyncObjectCleanupMockDoNothing; ASSERT_NE(nullptr, sharing); auto commandQueue = clCreateCommandQueue(&context, context.getDevice(0), 0, nullptr); ASSERT_NE(nullptr, commandQueue); auto baseEvent = new Event(castToObjectOrAbort(commandQueue), CL_COMMAND_RELEASE_GL_OBJECTS, -1, -1); auto syncEv = sharing->getOrCreateGlArbSyncEvent>(*baseEvent); ASSERT_NE(nullptr, syncEv); EXPECT_NE(nullptr, syncEv->getSyncInfo()); std::unique_ptr osInterface{new OSInterface}; static_cast *>(syncEv)->osInterface = osInterface.get(); syncEv->release(); clReleaseCommandQueue(commandQueue); } TEST_F(glSharingTests, givenKnownBaseEventWhenGetOrCreateGlArbSyncEventIsCalledThenOldArbEventIsReused) { auto *sharing = static_cast(context.getSharing()); sharing->pfnGlArbSyncObjectCleanup = glArbSyncObjectCleanupMockDoNothing; ASSERT_NE(nullptr, sharing); auto commandQueue = clCreateCommandQueue(&context, context.getDevice(0), 0, nullptr); ASSERT_NE(nullptr, commandQueue); auto baseEvent = new Event(castToObjectOrAbort(commandQueue), CL_COMMAND_RELEASE_GL_OBJECTS, -1, -1); auto syncEv = sharing->getOrCreateGlArbSyncEvent>(*baseEvent); ASSERT_NE(nullptr, syncEv); EXPECT_EQ(syncEv, sharing->getOrCreateGlArbSyncEvent>(*baseEvent)); std::unique_ptr osInterface{new OSInterface}; static_cast *>(syncEv)->osInterface = osInterface.get(); syncEv->release(); clReleaseCommandQueue(commandQueue); } TEST_F(glSharingTests, WhenArbSyncEventCreationFailsThenGetOrCreateGlArbSyncEventReturnsNull) { auto *sharing = static_cast(context.getSharing()); ASSERT_NE(nullptr, sharing); auto commandQueue = clCreateCommandQueue(&context, context.getDevice(0), 0, nullptr); ASSERT_NE(nullptr, commandQueue); auto baseEvent = new Event(castToObjectOrAbort(commandQueue), CL_COMMAND_RELEASE_GL_OBJECTS, -1, -1); auto syncEv = sharing->getOrCreateGlArbSyncEvent>(*baseEvent); EXPECT_EQ(nullptr, syncEv); baseEvent->release(); clReleaseCommandQueue(commandQueue); } TEST_F(glSharingTests, whenGetGlDeviceHandleIsCalledThenProperHandleIsReturned) { auto *sharing = static_cast(context.getSharing()); ASSERT_NE(nullptr, sharing); sharing->GLDeviceHandle = 0x2c; EXPECT_EQ(0x2cU, sharing->getGLDeviceHandle()); } TEST_F(glSharingTests, whenGetGlContextHandleIsCalledThenProperHandleIsReturned) { auto *sharing = static_cast(context.getSharing()); ASSERT_NE(nullptr, sharing); sharing->GLContextHandle = 0x2c; EXPECT_EQ(0x2cU, sharing->getGLContextHandle()); } TEST_F(glSharingTests, givenClGLBufferWhenCreatedThenSharedBufferAllocatoinTypeIsSet) { std::unique_ptr buffer(GlBuffer::createSharedGlBuffer(&context, CL_MEM_READ_WRITE, bufferId, nullptr)); ASSERT_NE(nullptr, buffer->getGraphicsAllocation(rootDeviceIndex)); EXPECT_EQ(AllocationType::SHARED_BUFFER, buffer->getGraphicsAllocation(rootDeviceIndex)->getAllocationType()); } using clGetSupportedGLTextureFormatsINTELTests = glSharingTests; TEST_F(clGetSupportedGLTextureFormatsINTELTests, givenContextWithoutGlSharingWhenGettingFormatsThenInvalidContextErrorIsReturned) { MockContext context; auto retVal = clGetSupportedGLTextureFormatsINTEL(&context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } TEST_F(clGetSupportedGLTextureFormatsINTELTests, givenValidInputsWhenGettingFormatsThenSuccesAndValidFormatsAreReturned) { cl_uint numFormats = 0; cl_GLenum glFormats[2] = {}; auto glFormatsCount = static_cast(arrayCount(glFormats)); auto retVal = clGetSupportedGLTextureFormatsINTEL(&context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, glFormatsCount, glFormats, &numFormats); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(0u, numFormats); for (uint32_t i = 0; i < glFormatsCount; i++) { EXPECT_NE(GlSharing::glToCLFormats.end(), GlSharing::glToCLFormats.find(glFormats[i])); } } TEST(GlSharingAdapterLuid, whenInitializingGlSharingThenProperAdapterLuidIsObtained) { GlDllHelper dllParam; dllParam.resetParam("glGetLuidCalled"); { dllParam.resetParam("glGetLuidFuncAvailable"); MockGLSharingFunctions glSharing; LUID expectedLuid{}; expectedLuid.HighPart = 0x1d2e; expectedLuid.LowPart = 0x3f4a; EXPECT_EQ(0, dllParam.getParam("glGetLuidCalled")); auto luid = glSharing.getAdapterLuid(reinterpret_cast(0x1)); EXPECT_EQ(1, dllParam.getParam("glGetLuidCalled")); EXPECT_EQ(expectedLuid.HighPart, luid.HighPart); EXPECT_EQ(expectedLuid.LowPart, luid.LowPart); dllParam.resetParam("glGetLuidCalled"); } { dllParam.resetParam("glGetLuidFuncAvailable"); MockGLSharingFunctions glSharing; LUID expectedLuid{}; expectedLuid.HighPart = 0x5d2e; expectedLuid.LowPart = 0x3f4a; EXPECT_EQ(0, dllParam.getParam("glGetLuidCalled")); auto luid = glSharing.getAdapterLuid(reinterpret_cast(0x2)); EXPECT_EQ(1, dllParam.getParam("glGetLuidCalled")); EXPECT_EQ(expectedLuid.HighPart, luid.HighPart); EXPECT_EQ(expectedLuid.LowPart, luid.LowPart); dllParam.resetParam("glGetLuidCalled"); } { dllParam.resetParam("glGetLuidFuncNotAvailable"); MockGLSharingFunctions glSharing; EXPECT_EQ(0, dllParam.getParam("glGetLuidCalled")); auto luid = glSharing.getAdapterLuid(reinterpret_cast(0x1)); EXPECT_EQ(0, dllParam.getParam("glGetLuidCalled")); EXPECT_EQ(0u, luid.HighPart); EXPECT_EQ(0u, luid.LowPart); } dllParam.resetParam("glGetLuidFuncAvailable"); } compute-runtime-22.14.22890/opencl/test/unit_test/sharings/gl/windows/gl_texture_tests.cpp000066400000000000000000000703431422164147700316600ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/mocks/mock_gmm.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "opencl/source/helpers/gmm_types_converter.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/platform/platform.h" #include "opencl/source/sharings/gl/gl_texture.h" #include "opencl/test/unit_test/mocks/gl/windows/mock_gl_sharing_windows.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "gtest/gtest.h" using namespace NEO; class GlSharingTextureTests : public ::testing::Test { public: // temp solution - we need to query size from GMM: class TempMM : public MockMemoryManager { public: using MockMemoryManager::MockMemoryManager; GraphicsAllocation *createGraphicsAllocationFromSharedHandle(osHandle handle, const AllocationProperties &properties, bool requireSpecificBitness, bool isHostIpcAllocation) override { auto alloc = OsAgnosticMemoryManager::createGraphicsAllocationFromSharedHandle(handle, properties, requireSpecificBitness, isHostIpcAllocation); if (useForcedGmm) { alloc->setDefaultGmm(forceGmm.get()); } return alloc; } void freeGraphicsMemoryImpl(GraphicsAllocation *gfxAllocation) override { if (useForcedGmm) { forceGmm.release(); } OsAgnosticMemoryManager::freeGraphicsMemoryImpl(gfxAllocation); } bool mapAuxGpuVA(GraphicsAllocation *graphicsAllocation) override { mapAuxGpuVACalled++; return false; } uint32_t mapAuxGpuVACalled = 0u; size_t forceAllocationSize; std::unique_ptr forceGmm; bool useForcedGmm = true; }; void SetUp() override { executionEnvironment = platform()->peekExecutionEnvironment(); imgDesc = {}; imgDesc.imageType = ImageType::Image1D; imgDesc.imageWidth = 10; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); tempMM = new TempMM(*executionEnvironment); executionEnvironment->memoryManager.reset(tempMM); device = std::make_unique(MockDevice::create(executionEnvironment, 0)); clContext = std::make_unique(device.get()); mockGlSharingFunctions = glSharing->sharingFunctions.release(); clContext->setSharingFunctions(mockGlSharingFunctions); tempMM->forceGmm = MockGmm::queryImgParams(device->getGmmClientContext(), imgInfo, false); tempMM->forceAllocationSize = textureSize; textureSize = imgInfo.size; textureId = 1; } void setUnifiedAuxSurf() { tempMM->useForcedGmm = true; auto mockGmmResInfo = static_cast(tempMM->forceGmm->gmmResourceInfo.get()); mockGmmResInfo->setUnifiedAuxTranslationCapable(); } ExecutionEnvironment *executionEnvironment; ImageDescriptor imgDesc; TempMM *tempMM; std::unique_ptr device; std::unique_ptr clContext; std::unique_ptr glSharing = std::make_unique(); GlSharingFunctionsMock *mockGlSharingFunctions; size_t textureSize; unsigned int textureId; }; TEST_F(GlSharingTextureTests, givenMockGlWhen1dGlTextureIsCreatedThenMemObjectHasGlHandler) { cl_int retVal = CL_INVALID_VALUE; glSharing->uploadDataToTextureInfo(textureId); auto glTexture = GlTexture::createSharedGlTexture(clContext.get(), (cl_mem_flags)0, GL_TEXTURE_1D, 0, textureId, &retVal); ASSERT_NE(nullptr, glTexture); auto graphicsAllocation = glTexture->getGraphicsAllocation(device->getRootDeviceIndex()); EXPECT_NE(nullptr, graphicsAllocation); EXPECT_EQ(textureSize, graphicsAllocation->getUnderlyingBufferSize()); EXPECT_EQ(1, glSharing->dllParam->getParam("GLAcquireSharedTextureCalled")); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(textureId, glSharing->dllParam->getTextureInfo().name); //input auto handler = glTexture->peekSharingHandler(); ASSERT_NE(nullptr, handler); auto glHandler = static_cast(handler); EXPECT_EQ(glHandler->peekFunctionsHandler(), mockGlSharingFunctions); delete glTexture; } class FailingMemoryManager : public MockMemoryManager { public: GraphicsAllocation *createGraphicsAllocationFromSharedHandle(osHandle handle, const AllocationProperties &properties, bool requireSpecificBitness, bool isHostIpcAllocation) override { return nullptr; } }; TEST_F(GlSharingTextureTests, givenMockGlWhenGlTextureIsCreatedFromWrongHandleThenErrorAndNoTextureIsReturned) { auto tempMemoryManager = clContext->getMemoryManager(); tempMM->useForcedGmm = false; auto memoryManager = std::unique_ptr(new FailingMemoryManager()); clContext->memoryManager = memoryManager.get(); auto retVal = CL_SUCCESS; auto glTexture = GlTexture::createSharedGlTexture(clContext.get(), (cl_mem_flags)0, GL_TEXTURE_1D, 0, textureId, &retVal); EXPECT_EQ(nullptr, glTexture); EXPECT_EQ(CL_INVALID_GL_OBJECT, retVal); clContext->memoryManager = tempMemoryManager; } GLboolean OSAPI mockGLAcquireSharedTexture(GLDisplay, GLContext, GLContext, GLvoid *pResourceInfo) { auto pTextureInfo = (CL_GL_RESOURCE_INFO *)pResourceInfo; GlDllHelper dllParam; pTextureInfo->globalShareHandle = dllParam.getTextureInfo().globalShareHandle; pTextureInfo->globalShareHandleMCS = dllParam.getTextureInfo().globalShareHandleMCS; if (pTextureInfo->target == GL_TEXTURE_BUFFER) { // size and width for texture buffer are queried from textureInfo - not from gmm pTextureInfo->textureBufferSize = dllParam.getTextureInfo().textureBufferSize; pTextureInfo->textureBufferWidth = dllParam.getTextureInfo().textureBufferWidth; } pTextureInfo->pGmmResInfo = dllParam.getTextureInfo().pGmmResInfo; pTextureInfo->glInternalFormat = 99999; pTextureInfo->glHWFormat = dllParam.getTextureInfo().glHWFormat; pTextureInfo->textureBufferOffset = dllParam.getTextureInfo().textureBufferOffset; dllParam.loadTexture(*pTextureInfo); return (GLboolean)1; }; TEST_F(GlSharingTextureTests, givenMockGlWhenGlTextureIsCreatedFromIncorrectFormatThenErrorAndNoTextureIsReturned) { mockGlSharingFunctions->setGLAcquireSharedTextureMock(mockGLAcquireSharedTexture); auto retVal = CL_SUCCESS; auto glTexture = GlTexture::createSharedGlTexture(clContext.get(), (cl_mem_flags)0, GL_TEXTURE_1D, 0, textureId, &retVal); EXPECT_EQ(nullptr, glTexture); EXPECT_EQ(CL_INVALID_GL_OBJECT, retVal); } TEST_F(GlSharingTextureTests, givenMockGlWhenRenderBufferTextureIsCreatedThenMemObjectHasGlHandler) { cl_int retVal = CL_INVALID_VALUE; glSharing->uploadDataToTextureInfo(textureId); auto glTexture = GlTexture::createSharedGlTexture(clContext.get(), (cl_mem_flags)0, GL_RENDERBUFFER_EXT, 0, textureId, &retVal); ASSERT_NE(nullptr, glTexture); auto graphicsAllocation = glTexture->getGraphicsAllocation(device->getRootDeviceIndex()); EXPECT_NE(nullptr, graphicsAllocation); EXPECT_EQ(textureSize, graphicsAllocation->getUnderlyingBufferSize()); EXPECT_EQ(1, glSharing->dllParam->getParam("GLAcquireSharedRenderBufferCalled")); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(textureId, glSharing->dllParam->getTextureInfo().name); auto handler = glTexture->peekSharingHandler(); ASSERT_NE(nullptr, handler); auto glHandler = static_cast(handler); EXPECT_EQ(glHandler->peekFunctionsHandler(), mockGlSharingFunctions); delete glTexture; } TEST_F(GlSharingTextureTests, givenGmmResourceAsInputWhenTextureIsCreatedThenItHasGmmSet) { cl_int retVal = CL_INVALID_VALUE; glSharing->m_textureInfoOutput.globalShareHandle = textureId; glSharing->m_textureInfoOutput.pGmmResInfo = this->tempMM->forceGmm->gmmResourceInfo->peekGmmResourceInfo(); this->tempMM->useForcedGmm = false; glSharing->m_textureInfoOutput.pGmmResInfo = this->tempMM->forceGmm->gmmResourceInfo->peekGmmResourceInfo(); glSharing->uploadDataToTextureInfo(); auto glTexture = GlTexture::createSharedGlTexture(clContext.get(), (cl_mem_flags)0, GL_TEXTURE_1D, 0, textureId, &retVal); ASSERT_NE(nullptr, glTexture); auto graphicsAllocation = glTexture->getGraphicsAllocation(device->getRootDeviceIndex()); EXPECT_NE(nullptr, graphicsAllocation); ASSERT_NE(nullptr, graphicsAllocation->getDefaultGmm()); ASSERT_NE(nullptr, graphicsAllocation->getDefaultGmm()->gmmResourceInfo->peekHandle()); delete glTexture; } TEST_F(GlSharingTextureTests, givenDifferentHwFormatWhenSurfaceFormatInfoIsSetThenOverwrite) { cl_int retVal = CL_INVALID_VALUE; cl_image_format imageFormat = {}; GlTexture::setClImageFormat(GL_DEPTH32F_STENCIL8, imageFormat); auto format = Image::getSurfaceFormatFromTable(CL_MEM_READ_ONLY, &imageFormat, defaultHwInfo->capabilityTable.supportsOcl21Features); ASSERT_NE(format, nullptr); auto newHwFormat = 217u; EXPECT_TRUE(format->surfaceFormat.GenxSurfaceFormat != newHwFormat); glSharing->m_textureInfoOutput.glHWFormat = newHwFormat; glSharing->m_textureInfoOutput.glInternalFormat = GL_DEPTH32F_STENCIL8; glSharing->uploadDataToTextureInfo(); auto glTexture = GlTexture::createSharedGlTexture(clContext.get(), CL_MEM_READ_ONLY, GL_TEXTURE_2D, 0, textureId, &retVal); ASSERT_NE(nullptr, glTexture); EXPECT_TRUE(newHwFormat == glTexture->getSurfaceFormatInfo().surfaceFormat.GenxSurfaceFormat); delete glTexture; } TEST_F(GlSharingTextureTests, givenGLRGB10FormatWhenSharedGlTextureIsCreatedThenItHasCorrectGenxSurfaceFormatAssigned) { cl_int retVal = CL_INVALID_VALUE; glSharing->m_textureInfoOutput.glInternalFormat = GL_RGB10; glSharing->m_textureInfoOutput.glHWFormat = GFX3DSTATE_SURFACEFORMAT_R16G16B16X16_UNORM; glSharing->uploadDataToTextureInfo(); std::unique_ptr glTexture(GlTexture::createSharedGlTexture(clContext.get(), CL_MEM_READ_ONLY, GL_TEXTURE_2D, 0, textureId, &retVal)); ASSERT_NE(nullptr, glTexture); EXPECT_EQ(glTexture->getSurfaceFormatInfo().surfaceFormat.GenxSurfaceFormat, GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_UNORM); } TEST_F(GlSharingTextureTests, givenContextAnd1dTextureWhenClCreateFromGlTextureIsCalledThenImageIsReturned) { cl_int retVal = CL_INVALID_GL_OBJECT; auto glImage = clCreateFromGLTexture(clContext.get(), 0, GL_TEXTURE_1D, 0, textureId, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, glImage); retVal = clReleaseMemObject(glImage); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(GlSharingTextureTests, givenContextWithoutSharingAnd1dTextureWhenClCreateFromGlTextureIsCalledThenErrorIsReturned) { tempMM->useForcedGmm = false; clContext->resetSharingFunctions(CLGL_SHARING); cl_int retVal = CL_INVALID_GL_OBJECT; auto glImage = clCreateFromGLTexture(clContext.get(), 0, GL_TEXTURE_1D, 0, textureId, &retVal); ASSERT_EQ(CL_INVALID_CONTEXT, retVal); ASSERT_EQ(nullptr, glImage); } TEST_F(GlSharingTextureTests, givenContextAndRenderBufferTextureWhenClCreateFromGlTextureIsCalledThenImageIsReturned) { cl_int retVal = CL_INVALID_GL_OBJECT; auto glImage = clCreateFromGLRenderbuffer(clContext.get(), 0, textureId, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, glImage); retVal = clReleaseMemObject(glImage); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(GlSharingTextureTests, givenContextWithoutSharingAndRenderBufferTextureWhenClCreateFromGlTextureIsCalledThenErrorIsReturned) { tempMM->useForcedGmm = false; clContext->resetSharingFunctions(CLGL_SHARING); cl_int retVal = CL_INVALID_GL_OBJECT; auto glImage = clCreateFromGLRenderbuffer(clContext.get(), 0, textureId, &retVal); ASSERT_EQ(CL_INVALID_CONTEXT, retVal); ASSERT_EQ(nullptr, glImage); } TEST_F(GlSharingTextureTests, givenGlCl1dTextureWhenAskedForCLGLGetInfoThenIdAndTypeIsReturned) { auto retVal = CL_SUCCESS; auto glImage = clCreateFromGLTexture(clContext.get(), 0, GL_TEXTURE_1D, 0, textureId, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, glImage); cl_gl_object_type objectType = 0u; cl_GLuint objectId = 0u; retVal = clGetGLObjectInfo(glImage, &objectType, &objectId); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(objectType, (cl_gl_object_type)CL_GL_OBJECT_TEXTURE1D); EXPECT_EQ(objectId, textureId); retVal = clReleaseMemObject(glImage); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(GlSharingTextureTests, givenHwCommandQueueAndGlTextureWhenItIsCreatedWithClCreateFromGlTexture2dThenImageObjectIsReturned) { auto retVal = CL_SUCCESS; auto glImage = clCreateFromGLTexture2D(clContext.get(), 0, GL_TEXTURE_2D, 0, textureId, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, glImage); cl_gl_object_type objectType = 0u; cl_GLuint objectId = 0u; retVal = clGetGLObjectInfo(glImage, &objectType, &objectId); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(objectType, (cl_gl_object_type)CL_GL_OBJECT_TEXTURE2D); EXPECT_EQ(objectId, textureId); retVal = clReleaseMemObject(glImage); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(GlSharingTextureTests, givenContextWithoutSharingAndGlTextureWhenItIsCreatedWithClCreateFromGlTexture2dThenErrorIsReturned) { tempMM->useForcedGmm = false; clContext->resetSharingFunctions(CLGL_SHARING); auto retVal = CL_SUCCESS; auto glImage = clCreateFromGLTexture2D(clContext.get(), 0, GL_TEXTURE_2D, 0, textureId, &retVal); ASSERT_EQ(CL_INVALID_CONTEXT, retVal); ASSERT_EQ(nullptr, glImage); } TEST_F(GlSharingTextureTests, givenHwCommandQueueAndGlTextureWhenItIsCreatedWithClCreateFromGlTexture3dThenImageObjectIsReturned) { auto retVal = CL_SUCCESS; auto glImage = clCreateFromGLTexture3D(clContext.get(), 0, GL_TEXTURE_3D, 0, textureId, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, glImage); cl_gl_object_type objectType = 0u; cl_GLuint objectId = 0u; retVal = clGetGLObjectInfo(glImage, &objectType, &objectId); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(objectType, (cl_gl_object_type)CL_GL_OBJECT_TEXTURE3D); EXPECT_EQ(objectId, textureId); retVal = clReleaseMemObject(glImage); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(GlSharingTextureTests, givenContextWithoutSharingAndGlTextureWhenItIsCreatedWithClCreateFromGlTexture3dThenErrorIsReturned) { tempMM->useForcedGmm = false; clContext->resetSharingFunctions(CLGL_SHARING); auto retVal = CL_SUCCESS; auto glImage = clCreateFromGLTexture3D(clContext.get(), 0, GL_TEXTURE_3D, 0, textureId, &retVal); ASSERT_EQ(CL_INVALID_CONTEXT, retVal); ASSERT_EQ(nullptr, glImage); } TEST_F(GlSharingTextureTests, givenHwCommandQueueAndGlTextureWhenAcquireIsCalledThenAcquireCountIsIncremented) { glSharing->uploadDataToTextureInfo(textureId); auto retVal = CL_SUCCESS; auto commandQueue = clCreateCommandQueue(clContext.get(), clContext->getDevice(0), 0, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); auto glImage = clCreateFromGLTexture(clContext.get(), 0, GL_TEXTURE_1D, 0, textureId, &retVal); EXPECT_EQ(1, glSharing->dllParam->getParam("GLAcquireSharedTextureCalled")); retVal = clEnqueueAcquireGLObjects(commandQueue, 1, &glImage, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2, glSharing->dllParam->getParam("GLAcquireSharedTextureCalled")); retVal = clEnqueueReleaseGLObjects(commandQueue, 1, &glImage, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueAcquireGLObjects(commandQueue, 1, &glImage, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(3, glSharing->dllParam->getParam("GLAcquireSharedTextureCalled")); retVal = clReleaseCommandQueue(commandQueue); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(glImage); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(GlSharingTextureTests, GivenGlTextureThenBufferOffsetIsCorrect) { glSharing->uploadDataToTextureInfo(textureId); auto rootDeviceIndex = clContext->getDevice(0)->getRootDeviceIndex(); auto retVal = CL_SUCCESS; auto commandQueue = clCreateCommandQueue(clContext.get(), clContext->getDevice(0), 0, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); auto glImage = clCreateFromGLTexture(clContext.get(), 0, GL_TEXTURE_1D, 0, textureId, &retVal); EXPECT_NE(glImage, nullptr); retVal = clEnqueueAcquireGLObjects(commandQueue, 1, &glImage, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); auto memObj = castToObject(glImage); EXPECT_NE(memObj, nullptr); EXPECT_EQ(memObj->getGraphicsAllocation(rootDeviceIndex)->getAllocationOffset(), 0u); retVal = clEnqueueReleaseGLObjects(commandQueue, 1, &glImage, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); glSharing->uploadTextureBufferOffsetToTextureInfo(0x660); retVal = clEnqueueAcquireGLObjects(commandQueue, 1, &glImage, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); memObj = castToObject(glImage); EXPECT_NE(memObj, nullptr); EXPECT_EQ(memObj->getGraphicsAllocation(rootDeviceIndex)->getAllocationOffset(), 0x660u); retVal = clEnqueueReleaseGLObjects(commandQueue, 1, &glImage, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseCommandQueue(commandQueue); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(glImage); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(GlSharingTextureTests, givenHwCommandQueueAndGlRenderBufferWhenAcquireIsCalledThenAcquireCountIsIncremented) { glSharing->uploadDataToTextureInfo(textureId); auto retVal = CL_SUCCESS; auto commandQueue = clCreateCommandQueue(clContext.get(), clContext->getDevice(0), 0, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); auto glImage = clCreateFromGLRenderbuffer(clContext.get(), 0, textureId, &retVal); EXPECT_EQ(1, glSharing->dllParam->getParam("GLAcquireSharedRenderBufferCalled")); retVal = clEnqueueAcquireGLObjects(commandQueue, 1, &glImage, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2, glSharing->dllParam->getParam("GLAcquireSharedRenderBufferCalled")); retVal = clEnqueueReleaseGLObjects(commandQueue, 1, &glImage, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueAcquireGLObjects(commandQueue, 1, &glImage, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(3, glSharing->dllParam->getParam("GLAcquireSharedRenderBufferCalled")); retVal = clReleaseCommandQueue(commandQueue); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(glImage); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(GlSharingTextureTests, givenSharedGlTextureWhenItIsAcquireCountIsDecrementedToZeroThenCallReleaseFunction) { std::unique_ptr image(GlTexture::createSharedGlTexture(clContext.get(), CL_MEM_READ_ONLY, GL_TEXTURE_2D, 0, textureId, nullptr)); auto sharingHandler = image->peekSharingHandler(); sharingHandler->acquire(image.get(), clContext->getDevice(0)->getRootDeviceIndex()); sharingHandler->acquire(image.get(), clContext->getDevice(0)->getRootDeviceIndex()); sharingHandler->release(image.get(), clContext->getDevice(0)->getRootDeviceIndex()); EXPECT_EQ(0, glSharing->dllParam->getParam("GLReleaseSharedTextureCalled")); sharingHandler->release(image.get(), clContext->getDevice(0)->getRootDeviceIndex()); EXPECT_EQ(1, glSharing->dllParam->getParam("GLReleaseSharedTextureCalled")); EXPECT_EQ(0, glSharing->dllParam->getParam("GLReleaseSharedRenderBufferCalled")); EXPECT_EQ(textureId, glSharing->dllParam->getTextureInfo().name); } TEST_F(GlSharingTextureTests, givenSharedRenderBufferWhenItIsAcquireCountIsDecrementedToZeroThenCallReleaseFunction) { std::unique_ptr image(GlTexture::createSharedGlTexture(clContext.get(), CL_MEM_READ_WRITE, GL_RENDERBUFFER_EXT, 0, textureId, nullptr)); auto sharingHandler = image->peekSharingHandler(); sharingHandler->acquire(image.get(), clContext->getDevice(0)->getRootDeviceIndex()); sharingHandler->acquire(image.get(), clContext->getDevice(0)->getRootDeviceIndex()); sharingHandler->release(image.get(), clContext->getDevice(0)->getRootDeviceIndex()); EXPECT_EQ(0, glSharing->dllParam->getParam("GLReleaseSharedRenderBufferCalled")); sharingHandler->release(image.get(), clContext->getDevice(0)->getRootDeviceIndex()); EXPECT_EQ(1, glSharing->dllParam->getParam("GLReleaseSharedRenderBufferCalled")); EXPECT_EQ(0, glSharing->dllParam->getParam("GLReleaseSharedTextureCalled")); EXPECT_EQ(textureId, glSharing->dllParam->getTextureInfo().name); } TEST_F(GlSharingTextureTests, givenMultisampleTextureWithMoreThanOneSampleWhenAskedForNumSamplesThenReturnCorrectValue) { GLsizei expectedNumSamples = 2; glSharing->m_textureInfoOutput.numberOfSamples = expectedNumSamples; glSharing->uploadDataToTextureInfo(); std::unique_ptr image(GlTexture::createSharedGlTexture(clContext.get(), CL_MEM_READ_WRITE, GL_TEXTURE_2D_MULTISAMPLE, 0, textureId, nullptr)); GLsizei numSamples = 0; size_t retSize = 0; auto retVal = clGetGLTextureInfo(image.get(), CL_GL_NUM_SAMPLES, sizeof(GLsizei), &numSamples, &retSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedNumSamples, numSamples); EXPECT_EQ(sizeof(GLsizei), retSize); } TEST_F(GlSharingTextureTests, givenTextureWithOneSampleWhenAskedForNumSamplesThenReturnZero) { glSharing->m_textureInfoOutput.numberOfSamples = 1; glSharing->uploadDataToTextureInfo(); std::unique_ptr image(GlTexture::createSharedGlTexture(clContext.get(), CL_MEM_READ_WRITE, GL_TEXTURE_2D_MULTISAMPLE, 0, textureId, nullptr)); GLenum numSamples = 0; size_t retSize = 0; auto retVal = clGetGLTextureInfo(image.get(), CL_GL_NUM_SAMPLES, sizeof(GLsizei), &numSamples, &retSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0, numSamples); EXPECT_EQ(sizeof(GLsizei), retSize); } TEST_F(GlSharingTextureTests, givenTextureWithZeroSamplesWhenAskedForNumSamplesThenReturnZero) { glSharing->m_textureInfoOutput.numberOfSamples = 0; glSharing->uploadDataToTextureInfo(); std::unique_ptr image(GlTexture::createSharedGlTexture(clContext.get(), CL_MEM_READ_WRITE, GL_TEXTURE_2D_MULTISAMPLE, 0, textureId, nullptr)); GLenum numSamples = 0; size_t retSize = 0; auto retVal = clGetGLTextureInfo(image.get(), CL_GL_NUM_SAMPLES, sizeof(GLsizei), &numSamples, &retSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0, numSamples); EXPECT_EQ(sizeof(GLsizei), retSize); } TEST_F(GlSharingTextureTests, givenMockGlWhenGlTextureIsCreatedFromFormatNotIncludedInSurfaceFormatsThenErrorAndNoTextureIsReturned) { cl_int retVal = CL_SUCCESS; auto textureInfoOutput = std::make_unique(); textureInfoOutput->glInternalFormat = GL_SRGB8_ALPHA8; glSharing->dllParam->loadTexture(*textureInfoOutput); auto glTexture = GlTexture::createSharedGlTexture(clContext.get(), CL_MEM_WRITE_ONLY, GL_SRGB8_ALPHA8, 0, textureId, &retVal); EXPECT_EQ(nullptr, glTexture); EXPECT_EQ(CL_INVALID_GL_OBJECT, retVal); } TEST_F(GlSharingTextureTests, givenMockGlWhenGlTextureIsCreatedWithUnifiedAuxSurfThenMapAuxGpuVaIsCalled) { CL_GL_RESOURCE_INFO textureInfoToReturn = {}; textureInfoToReturn.isAuxEnabled = GL_TRUE; glSharing->dllParam->loadTexture(textureInfoToReturn); cl_int retVal = CL_SUCCESS; setUnifiedAuxSurf(); EXPECT_EQ(0u, tempMM->mapAuxGpuVACalled); auto glTexture = std::unique_ptr(GlTexture::createSharedGlTexture(clContext.get(), CL_MEM_WRITE_ONLY, GL_SRGB8_ALPHA8, 0, textureId, &retVal)); const auto &hwInfo = clContext->getDevice(0)->getHardwareInfo(); const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily); uint32_t expectedMapAuxGpuVaCalls = hwInfoConfig.isPageTableManagerSupported(hwInfo) ? 1 : 0; EXPECT_EQ(expectedMapAuxGpuVaCalls, tempMM->mapAuxGpuVACalled); } TEST_F(GlSharingTextureTests, givenAuxDisabledAndUnifiedAuxCapableWhenGlTextureIsCreatedThenAllocationIsTreatedAsUncompressed) { CL_GL_RESOURCE_INFO textureInfoToReturn = {}; textureInfoToReturn.isAuxEnabled = GL_FALSE; glSharing->dllParam->loadTexture(textureInfoToReturn); cl_int retVal = CL_SUCCESS; setUnifiedAuxSurf(); ASSERT_EQ(0u, tempMM->mapAuxGpuVACalled); auto glTexture = std::unique_ptr(GlTexture::createSharedGlTexture(clContext.get(), CL_MEM_WRITE_ONLY, GL_SRGB8_ALPHA8, 0, textureId, &retVal)); EXPECT_EQ(0u, tempMM->mapAuxGpuVACalled); auto graphicsAllocation = glTexture->getGraphicsAllocation(device->getRootDeviceIndex()); EXPECT_FALSE(graphicsAllocation->getDefaultGmm()->isCompressionEnabled); } class GetGlTextureInfoTests : public GlSharingTextureTests, public ::testing::WithParamInterface { }; INSTANTIATE_TEST_CASE_P( GetGlTextureInfoTests, GetGlTextureInfoTests, testing::ValuesIn(glTextureTargets::supportedTargets)); TEST_P(GetGlTextureInfoTests, givenGlTextureWhenAskedForCLGetGLTextureInfoThenReturnValidInfo) { auto retVal = CL_SUCCESS; GLenum expectedTarget = GetParam(); GLint mipLevel = 1u; auto glImage = clCreateFromGLTexture(clContext.get(), 0, expectedTarget, mipLevel, textureId, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, glImage); auto pMemObj = castToObject(glImage); auto glTextureObj = (GlTexture *)pMemObj->peekSharingHandler(); GLenum textureTarget = 0u; size_t retSize = 0; retVal = clGetGLTextureInfo(glImage, CL_GL_TEXTURE_TARGET, sizeof(GLenum), &textureTarget, &retSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedTarget, textureTarget); EXPECT_EQ(sizeof(GLenum), retSize); retVal = clGetGLTextureInfo(glImage, CL_GL_MIPMAP_LEVEL, sizeof(GLenum), &mipLevel, &retSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(glTextureObj->getMiplevel(), mipLevel); EXPECT_EQ(sizeof(GLint), retSize); retVal = clGetGLTextureInfo(glImage, CL_INVALID_VALUE, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); auto image = castToObject(glImage); EXPECT_EQ(mipLevel, image->peekBaseMipLevel()); retVal = clReleaseMemObject(glImage); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_P(GetGlTextureInfoTests, givenApiTargetTypeWhenAskedForBaseTypeThenConvertOnlyCubeMaps) { tempMM->useForcedGmm = false; auto apiTarget = GetParam(); unsigned int expectedBaseType; switch (apiTarget) { case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: case GL_TEXTURE_CUBE_MAP_POSITIVE_X: case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: expectedBaseType = GL_TEXTURE_CUBE_MAP_ARB; break; default: expectedBaseType = apiTarget; break; } EXPECT_EQ(GlTexture::getBaseTargetType(apiTarget), expectedBaseType); } TEST_P(GetGlTextureInfoTests, givenApiTargetTypeWhenAskedForGmmCubeFaceIndexThenReturnValidOnlyForCubeType) { tempMM->useForcedGmm = false; auto apiTarget = GetParam(); auto gmmCubeFaceIndex = static_cast(GmmTypesConverter::getCubeFaceIndex(apiTarget)); switch (apiTarget) { case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: EXPECT_EQ(gmmCubeFaceIndex, static_cast(__GMM_CUBE_FACE_NEG_X)); break; case GL_TEXTURE_CUBE_MAP_POSITIVE_X: EXPECT_EQ(gmmCubeFaceIndex, static_cast(__GMM_CUBE_FACE_POS_X)); break; case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: EXPECT_EQ(gmmCubeFaceIndex, static_cast(__GMM_CUBE_FACE_NEG_Y)); break; case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: EXPECT_EQ(gmmCubeFaceIndex, static_cast(__GMM_CUBE_FACE_POS_Y)); break; case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: EXPECT_EQ(gmmCubeFaceIndex, static_cast(__GMM_CUBE_FACE_NEG_Z)); break; case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: EXPECT_EQ(gmmCubeFaceIndex, static_cast(__GMM_CUBE_FACE_POS_Z)); break; default: EXPECT_EQ(gmmCubeFaceIndex, static_cast(__GMM_NO_CUBE_MAP)); break; } } compute-runtime-22.14.22890/opencl/test/unit_test/sharings/gl/windows/gl_types_tests.cpp000066400000000000000000000143401422164147700313170ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/sharings/gl/gl_texture.h" #include "config.h" #include "gtest/gtest.h" namespace NEO { namespace glTypes { static const std::tuple allImageFormats[] = { // input, output, output std::make_tuple(GL_RGBA8, CL_UNORM_INT8, CL_RGBA), std::make_tuple(GL_RGBA8I, CL_SIGNED_INT8, CL_RGBA), std::make_tuple(GL_RGBA16, CL_UNORM_INT16, CL_RGBA), std::make_tuple(GL_RGBA16I, CL_SIGNED_INT16, CL_RGBA), std::make_tuple(GL_RGBA32I, CL_SIGNED_INT32, CL_RGBA), std::make_tuple(GL_RGBA8UI, CL_UNSIGNED_INT8, CL_RGBA), std::make_tuple(GL_RGBA16UI, CL_UNSIGNED_INT16, CL_RGBA), std::make_tuple(GL_RGBA32UI, CL_UNSIGNED_INT32, CL_RGBA), std::make_tuple(GL_RGBA16F, CL_HALF_FLOAT, CL_RGBA), std::make_tuple(GL_RGBA32F, CL_FLOAT, CL_RGBA), std::make_tuple(GL_RGBA, CL_UNORM_INT8, CL_RGBA), std::make_tuple(GL_RGBA8_SNORM, CL_SNORM_INT8, CL_RGBA), std::make_tuple(GL_RGBA16_SNORM, CL_SNORM_INT16, CL_RGBA), std::make_tuple(GL_BGRA, CL_UNORM_INT8, CL_BGRA), std::make_tuple(GL_R8, CL_UNORM_INT8, CL_R), std::make_tuple(GL_R8_SNORM, CL_SNORM_INT8, CL_R), std::make_tuple(GL_R16, CL_UNORM_INT16, CL_R), std::make_tuple(GL_R16_SNORM, CL_SNORM_INT16, CL_R), std::make_tuple(GL_R16F, CL_HALF_FLOAT, CL_R), std::make_tuple(GL_R32F, CL_FLOAT, CL_R), std::make_tuple(GL_R8I, CL_SIGNED_INT8, CL_R), std::make_tuple(GL_R16I, CL_SIGNED_INT16, CL_R), std::make_tuple(GL_R32I, CL_SIGNED_INT32, CL_R), std::make_tuple(GL_R8UI, CL_UNSIGNED_INT8, CL_R), std::make_tuple(GL_R16UI, CL_UNSIGNED_INT16, CL_R), std::make_tuple(GL_R32UI, CL_UNSIGNED_INT32, CL_R), std::make_tuple(GL_DEPTH_COMPONENT32F, CL_FLOAT, CL_DEPTH), std::make_tuple(GL_DEPTH_COMPONENT16, CL_UNORM_INT16, CL_DEPTH), std::make_tuple(GL_DEPTH24_STENCIL8, CL_UNORM_INT24, CL_DEPTH_STENCIL), std::make_tuple(GL_DEPTH32F_STENCIL8, CL_FLOAT, CL_DEPTH_STENCIL), std::make_tuple(GL_SRGB8_ALPHA8, CL_UNORM_INT8, CL_sRGBA), std::make_tuple(GL_RG8, CL_UNORM_INT8, CL_RG), std::make_tuple(GL_RG8_SNORM, CL_SNORM_INT8, CL_RG), std::make_tuple(GL_RG16, CL_UNORM_INT16, CL_RG), std::make_tuple(GL_RG16_SNORM, CL_SNORM_INT16, CL_RG), std::make_tuple(GL_RG16F, CL_HALF_FLOAT, CL_RG), std::make_tuple(GL_RG32F, CL_FLOAT, CL_RG), std::make_tuple(GL_RG8I, CL_SIGNED_INT8, CL_RG), std::make_tuple(GL_RG16I, CL_SIGNED_INT16, CL_RG), std::make_tuple(GL_RG32I, CL_SIGNED_INT32, CL_RG), std::make_tuple(GL_RG8UI, CL_UNSIGNED_INT8, CL_RG), std::make_tuple(GL_RG16UI, CL_UNSIGNED_INT16, CL_RG), std::make_tuple(GL_RG32UI, CL_UNSIGNED_INT32, CL_RG), std::make_tuple(GL_RGB10, CL_UNORM_INT16, CL_RGBA), std::make_tuple(CL_INVALID_VALUE, 0, 0)}; static const std::tuple allObjTypes[] = { // input, output, output std::make_tuple(GL_TEXTURE_1D, CL_GL_OBJECT_TEXTURE1D, CL_MEM_OBJECT_IMAGE1D), std::make_tuple(GL_TEXTURE_1D_ARRAY, CL_GL_OBJECT_TEXTURE1D_ARRAY, CL_MEM_OBJECT_IMAGE1D_ARRAY), std::make_tuple(GL_TEXTURE_2D, CL_GL_OBJECT_TEXTURE2D, CL_MEM_OBJECT_IMAGE2D), std::make_tuple(GL_TEXTURE_RECTANGLE, CL_GL_OBJECT_TEXTURE2D, CL_MEM_OBJECT_IMAGE2D), std::make_tuple(GL_TEXTURE_CUBE_MAP_NEGATIVE_X, CL_GL_OBJECT_TEXTURE2D, CL_MEM_OBJECT_IMAGE2D), std::make_tuple(GL_TEXTURE_CUBE_MAP_POSITIVE_X, CL_GL_OBJECT_TEXTURE2D, CL_MEM_OBJECT_IMAGE2D), std::make_tuple(GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, CL_GL_OBJECT_TEXTURE2D, CL_MEM_OBJECT_IMAGE2D), std::make_tuple(GL_TEXTURE_CUBE_MAP_POSITIVE_Y, CL_GL_OBJECT_TEXTURE2D, CL_MEM_OBJECT_IMAGE2D), std::make_tuple(GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, CL_GL_OBJECT_TEXTURE2D, CL_MEM_OBJECT_IMAGE2D), std::make_tuple(GL_TEXTURE_CUBE_MAP_POSITIVE_Z, CL_GL_OBJECT_TEXTURE2D, CL_MEM_OBJECT_IMAGE2D), std::make_tuple(GL_TEXTURE_2D_MULTISAMPLE, CL_GL_OBJECT_TEXTURE2D, CL_MEM_OBJECT_IMAGE2D), std::make_tuple(GL_TEXTURE_2D_ARRAY, CL_GL_OBJECT_TEXTURE2D_ARRAY, CL_MEM_OBJECT_IMAGE2D_ARRAY), std::make_tuple(GL_TEXTURE_2D_MULTISAMPLE_ARRAY, CL_GL_OBJECT_TEXTURE2D_ARRAY, CL_MEM_OBJECT_IMAGE2D_ARRAY), std::make_tuple(GL_TEXTURE_3D, CL_GL_OBJECT_TEXTURE3D, CL_MEM_OBJECT_IMAGE3D), std::make_tuple(GL_TEXTURE_BUFFER, CL_GL_OBJECT_TEXTURE_BUFFER, CL_MEM_OBJECT_IMAGE1D_BUFFER), std::make_tuple(GL_RENDERBUFFER_EXT, CL_GL_OBJECT_RENDERBUFFER, CL_MEM_OBJECT_IMAGE2D), std::make_tuple(CL_INVALID_VALUE, 0, 0)}; } // namespace glTypes struct GlClImageFormatTests : public ::testing::WithParamInterface>, public ::testing::Test {}; INSTANTIATE_TEST_CASE_P(GlClImageFormatTests, GlClImageFormatTests, testing::ValuesIn(glTypes::allImageFormats)); TEST_P(GlClImageFormatTests, WhenSettingClImageFormatThenValidFormatIsSet) { cl_image_format imgFormat = {}; auto glFormat = std::get<0>(GetParam()); auto expectedClChannelType = static_cast(std::get<1>(GetParam())); auto expectedClChannelOrder = static_cast(std::get<2>(GetParam())); GlTexture::setClImageFormat(glFormat, imgFormat); EXPECT_EQ(imgFormat.image_channel_data_type, expectedClChannelType); EXPECT_EQ(imgFormat.image_channel_order, expectedClChannelOrder); } struct GlClObjTypesTests : public ::testing::WithParamInterface>, public ::testing::Test {}; INSTANTIATE_TEST_CASE_P(GlClObjTypesTests, GlClObjTypesTests, testing::ValuesIn(glTypes::allObjTypes)); TEST_P(GlClObjTypesTests, WhenConvertingTypeThenTypeIsSetCorrectly) { auto glType = static_cast(std::get<0>(GetParam())); auto expectedClGlObjType = static_cast(std::get<1>(GetParam())); auto expectedClMemObjType = static_cast(std::get<2>(GetParam())); auto clGlObjType = GlTexture::getClGlObjectType(glType); auto clMemObjType = GlTexture::getClMemObjectType(glType); EXPECT_EQ(expectedClGlObjType, clGlObjType); EXPECT_EQ(clMemObjType, expectedClMemObjType); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/sharings/sharing_factory_tests.cpp000066400000000000000000000244721422164147700305660ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/compiler_interface/oclc_extensions.h" #include "shared/source/device/device.h" #include "shared/source/helpers/string.h" #include "shared/test/common/fixtures/memory_management_fixture.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "opencl/source/context/context.h" #include "opencl/source/platform/platform.h" #include "opencl/source/sharings/sharing.h" #include "opencl/source/sharings/sharing_factory.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/mocks/mock_sharing_factory.h" #include "gtest/gtest.h" using namespace NEO; class SharingFactoryStateRestore : public SharingFactory { public: SharingFactoryStateRestore() { memcpy_s(savedState, sizeof(savedState), sharingContextBuilder, sizeof(sharingContextBuilder)); } ~SharingFactoryStateRestore() { memcpy_s(sharingContextBuilder, sizeof(sharingContextBuilder), savedState, sizeof(savedState)); } void clearCurrentState() { for (auto &builder : sharingContextBuilder) { builder = nullptr; } } template void registerSharing(SharingType type) { auto object = std::make_unique(); sharingContextBuilder[type] = object.get(); sharings.push_back(std::move(object)); } template Sharing *getSharing(); protected: decltype(SharingFactory::sharingContextBuilder) savedState; std::vector> sharings; }; class TestedSharingBuilderFactory : public SharingBuilderFactory { public: std::unique_ptr createContextBuilder() override { return nullptr; } std::string getExtensions(DriverInfo *driverInfo) override { return extension; }; void fillGlobalDispatchTable() override { invocationCount++; }; void *getExtensionFunctionAddress(const std::string &functionName) override { if (functionName == "someFunction") invocationCount++; return nullptr; } static const std::string extension; uint32_t invocationCount = 0u; }; const std::string TestedSharingBuilderFactory::extension("--extensions--"); template <> TestedSharingBuilderFactory *SharingFactoryStateRestore::getSharing() { return reinterpret_cast(sharingContextBuilder[SharingType::CLGL_SHARING]); } void dummyHandler() { } const cl_context_properties mockContextPassFinalize = 1; const cl_context_properties mockContextFailFinalize = 2; const cl_context_properties clContextPropertyMock = 0x2000; class MockSharingContextBuilder : public SharingContextBuilder { cl_context_properties value; public: bool processProperties(cl_context_properties &propertyType, cl_context_properties &propertyValue) override; bool finalizeProperties(Context &context, int32_t &errcodeRet) override; }; bool MockSharingContextBuilder::processProperties(cl_context_properties &propertyType, cl_context_properties &propertyValue) { if (propertyType == clContextPropertyMock) { if (propertyValue) { value = propertyValue; return true; } } return false; } class VASharingFunctionsMock : public SharingFunctions { public: static const uint32_t sharingId = VA_SHARING; uint32_t getId() const override { return sharingId; } }; struct VAMockSharingContextBuilder : public MockSharingContextBuilder { bool finalizeProperties(Context &context, int32_t &errcodeRet) override; }; bool VAMockSharingContextBuilder::finalizeProperties(Context &context, int32_t &errcodeRet) { auto &mockContext = static_cast(context); mockContext.registerSharingWithId(new VASharingFunctionsMock(), VA_SHARING); return true; } bool MockSharingContextBuilder::finalizeProperties(Context &context, int32_t &errcodeRet) { if (value == mockContextPassFinalize) { return true; } else { return false; } } class MockSharingBuilderFactory : public TestedSharingBuilderFactory { public: std::unique_ptr createContextBuilder() override { return std::unique_ptr(new MockSharingContextBuilder()); } void *getExtensionFunctionAddress(const std::string &functionName) override { if (functionName == "dummyHandler") { return reinterpret_cast(dummyHandler); } else { return nullptr; } } }; class VAMockSharingBuilderFactory : public TestedSharingBuilderFactory { public: std::unique_ptr createContextBuilder() override { return std::unique_ptr(new VAMockSharingContextBuilder()); } }; TEST(SharingFactoryTests, givenFactoryWithEmptyTableWhenAskedForExtensionThenEmptyStringIsReturned) { SharingFactoryStateRestore stateRestore; stateRestore.clearCurrentState(); auto ext = stateRestore.getExtensions(nullptr); EXPECT_EQ(0u, ext.length()); EXPECT_STREQ("", ext.c_str()); } TEST(SharingFactoryTests, givenFactoryWithSharingWhenAskedForExtensionThenStringIsReturned) { SharingFactoryStateRestore stateRestore; stateRestore.clearCurrentState(); stateRestore.registerSharing(SharingType::CLGL_SHARING); auto ext = stateRestore.getExtensions(nullptr); EXPECT_LE(TestedSharingBuilderFactory::extension.length(), ext.length()); EXPECT_TRUE(hasSubstr(ext, TestedSharingBuilderFactory::extension)); } TEST(SharingFactoryTests, givenFactoryWithSharingWhenDispatchFillRequestedThenMethodsAreInvoked) { SharingFactoryStateRestore stateRestore; stateRestore.clearCurrentState(); stateRestore.registerSharing(SharingType::CLGL_SHARING); auto sharing = stateRestore.getSharing(); ASSERT_EQ(0u, sharing->invocationCount); stateRestore.fillGlobalDispatchTable(); EXPECT_EQ(1u, sharing->invocationCount); } TEST(SharingFactoryTests, givenFactoryWithSharingWhenAskedThenAddressIsReturned) { SharingFactoryStateRestore stateRestore; stateRestore.clearCurrentState(); stateRestore.registerSharing(SharingType::CLGL_SHARING); auto sharing = stateRestore.getSharing(); ASSERT_EQ(0u, sharing->invocationCount); auto ptr = stateRestore.getExtensionFunctionAddress("someFunction"); EXPECT_EQ(nullptr, ptr); EXPECT_EQ(1u, sharing->invocationCount); } TEST(SharingFactoryTests, givenMockFactoryWithSharingWhenAskedThenAddressIsReturned) { SharingFactoryStateRestore stateRestore; stateRestore.clearCurrentState(); stateRestore.registerSharing(SharingType::CLGL_SHARING); auto ptr = stateRestore.getExtensionFunctionAddress("dummyHandler"); EXPECT_EQ(reinterpret_cast(dummyHandler), ptr); ptr = clGetExtensionFunctionAddress("dummyHandler"); EXPECT_EQ(reinterpret_cast(dummyHandler), ptr); } TEST(Context, givenMockSharingBuilderWhenContextWithInvalidPropertiesThenContextCreateShouldFail) { SharingFactoryStateRestore stateRestore; stateRestore.clearCurrentState(); stateRestore.registerSharing(SharingType::CLGL_SHARING); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); cl_device_id clDevice = device.get(); auto deviceVector = ClDeviceVector(&clDevice, 1); cl_int retVal; cl_platform_id platformId[] = {platform()}; cl_context_properties validProperties[5] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platformId[0], clContextPropertyMock, mockContextPassFinalize, 0}; cl_context_properties invalidProperties[5] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platformId[0], clContextPropertyMock, 0, 0}; cl_context_properties invalidPropertiesFailFinalize[5] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platformId[0], clContextPropertyMock, mockContextFailFinalize, 0}; std::unique_ptr context; context.reset(Context::create(invalidProperties, deviceVector, nullptr, nullptr, retVal)); EXPECT_EQ(nullptr, context.get()); context.reset(Context::create(invalidPropertiesFailFinalize, deviceVector, nullptr, nullptr, retVal)); EXPECT_EQ(nullptr, context.get()); context.reset(Context::create(validProperties, deviceVector, nullptr, nullptr, retVal)); EXPECT_NE(nullptr, context.get()); }; TEST(SharingFactoryTests, givenDisabledFormatQueryAndFactoryWithSharingWhenAskedForExtensionThenFormatQueryExtensionIsNotReturned) { DebugManagerStateRestore restorer; DebugManager.flags.EnableFormatQuery.set(false); SharingFactoryStateRestore stateRestore; stateRestore.clearCurrentState(); stateRestore.registerSharing(SharingType::CLGL_SHARING); auto extensionsList = sharingFactory.getExtensions(nullptr); EXPECT_FALSE(hasSubstr(extensionsList, Extensions::sharingFormatQuery)); } TEST(SharingFactoryTests, givenEnabledFormatQueryAndFactoryWithSharingWhenAskedForExtensionThenFormatQueryExtensionIsReturned) { DebugManagerStateRestore restorer; DebugManager.flags.EnableFormatQuery.set(true); SharingFactoryStateRestore stateRestore; stateRestore.clearCurrentState(); stateRestore.registerSharing(SharingType::CLGL_SHARING); auto extensionsList = sharingFactory.getExtensions(nullptr); EXPECT_TRUE(hasSubstr(extensionsList, Extensions::sharingFormatQuery)); } TEST(SharingFactoryTests, givenEnabledFormatQueryAndFactoryWithNoSharingsWhenAskedForExtensionThenNoExtensionIsReturned) { DebugManagerStateRestore restorer; DebugManager.flags.EnableFormatQuery.set(true); SharingFactoryStateRestore sharingFactory; sharingFactory.clearCurrentState(); auto extensionsList = sharingFactory.getExtensions(nullptr); EXPECT_FALSE(hasSubstr(extensionsList, Extensions::sharingFormatQuery)); } compute-runtime-22.14.22890/opencl/test/unit_test/sharings/sharing_tests.cpp000066400000000000000000000120761422164147700270340ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "opencl/source/mem_obj/mem_obj.h" #include "opencl/source/sharings/sharing.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" using namespace NEO; TEST(sharingHandler, givenBasicSharingHandlerWhenSynchronizeObjectThenErrorIsReturned) { struct SH : SharingHandler { int synchronizeHandlerMock(UpdateData &updateData) { return synchronizeHandler(updateData); } } sharingHandler; const uint32_t rootDeviceIndex = 1u; UpdateData updateData{rootDeviceIndex}; sharingHandler.synchronizeHandlerMock(updateData); EXPECT_EQ(SynchronizeStatus::SYNCHRONIZE_ERROR, updateData.synchronizationStatus); size_t paramSize = 0; void *paramValue = nullptr; // there is no default implementation. parameters should be unchanged. sharingHandler.getMemObjectInfo(paramSize, paramValue); EXPECT_EQ(paramSize, 0u); EXPECT_EQ(paramValue, nullptr); } TEST(sharingHandler, givenMemObjWhenAcquireIncrementCounterThenReleaseShouldDecrementIt) { char buffer[64]; MockContext context; MockGraphicsAllocation *mockAllocation = new MockGraphicsAllocation(buffer, sizeof(buffer)); std::unique_ptr memObj( new MemObj(&context, CL_MEM_OBJECT_BUFFER, ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_USE_HOST_PTR, 0, 0, &context.getDevice(0)->getDevice()), CL_MEM_USE_HOST_PTR, 0, sizeof(buffer), buffer, buffer, GraphicsAllocationHelper::toMultiGraphicsAllocation(mockAllocation), true, false, false)); struct MockSharingHandler : SharingHandler { using SharingHandler::acquireCount; void synchronizeObject(UpdateData &updateData) override { updateData.synchronizationStatus = ACQUIRE_SUCCESFUL; } } sharingHandler; EXPECT_EQ(0u, sharingHandler.acquireCount); sharingHandler.acquire(memObj.get(), mockAllocation->getRootDeviceIndex()); EXPECT_EQ(1u, sharingHandler.acquireCount); sharingHandler.release(memObj.get(), mockAllocation->getRootDeviceIndex()); EXPECT_EQ(0u, sharingHandler.acquireCount); } TEST(sharingHandler, givenMemObjWhenAcquireTwoTimesThenReleaseShouldBeCalledTwoTimesToReleaseObject) { char buffer[64]; MockContext context; MockGraphicsAllocation *mockAllocation = new MockGraphicsAllocation(buffer, sizeof(buffer)); std::unique_ptr memObj( new MemObj(&context, CL_MEM_OBJECT_BUFFER, ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_USE_HOST_PTR, 0, 0, &context.getDevice(0)->getDevice()), CL_MEM_USE_HOST_PTR, 0, sizeof(buffer), buffer, buffer, GraphicsAllocationHelper::toMultiGraphicsAllocation(mockAllocation), true, false, false)); struct MockSharingHandler : SharingHandler { using SharingHandler::acquireCount; void synchronizeObject(UpdateData &updateData) override { updateData.synchronizationStatus = ACQUIRE_SUCCESFUL; } void releaseResource(MemObj *memObject, uint32_t rootDeviceIndex) override { releaseCount++; }; int releaseCount = 0; } sharingHandler; EXPECT_EQ(0u, sharingHandler.acquireCount); sharingHandler.acquire(memObj.get(), mockAllocation->getRootDeviceIndex()); EXPECT_EQ(1u, sharingHandler.acquireCount); sharingHandler.acquire(memObj.get(), mockAllocation->getRootDeviceIndex()); EXPECT_EQ(2u, sharingHandler.acquireCount); sharingHandler.release(memObj.get(), mockAllocation->getRootDeviceIndex()); EXPECT_EQ(1u, sharingHandler.acquireCount); EXPECT_EQ(0, sharingHandler.releaseCount); sharingHandler.release(memObj.get(), mockAllocation->getRootDeviceIndex()); EXPECT_EQ(0u, sharingHandler.acquireCount); EXPECT_EQ(1, sharingHandler.releaseCount); } TEST(sharingHandler, givenSharingHandlerWhenValidateUpdateDataIsCalledWithNonNullInputThenAbortIsNotCalled) { class MockSharingHandler : SharingHandler { public: using SharingHandler::validateUpdateData; }; MockSharingHandler sharingHandler; const uint32_t rootDeviceIndex = 1u; UpdateData updateData{rootDeviceIndex}; sharingHandler.validateUpdateData(updateData); } TEST(sharingHandler, givenSharingHandlerWhenAcquiringThenReturnErrorCode) { SharingHandler sharingHandler; MockContext context; MockGraphicsAllocation *graphicsAllocation = new MockGraphicsAllocation(nullptr, 0); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_USE_HOST_PTR, 0, 0, &context.getDevice(0)->getDevice()), CL_MEM_USE_HOST_PTR, 0, 1, nullptr, nullptr, GraphicsAllocationHelper::toMultiGraphicsAllocation(graphicsAllocation), true, false, false); auto result = sharingHandler.acquire(&memObj, graphicsAllocation->getRootDeviceIndex()); EXPECT_NE(CL_SUCCESS, result); } compute-runtime-22.14.22890/opencl/test/unit_test/sharings/unified/000077500000000000000000000000001422164147700250705ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/sharings/unified/CMakeLists.txt000066400000000000000000000012371422164147700276330ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_sharings_unified ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/unified_sharing_buffer_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/unified_sharing_fixtures.h ${CMAKE_CURRENT_SOURCE_DIR}/unified_sharing_image_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/unified_sharing_mocks.h ${CMAKE_CURRENT_SOURCE_DIR}/unified_sharing_tests.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_sharings_unified}) set_property(GLOBAL PROPERTY IGDRCL_SRCS_tests_sharings_unified ${IGDRCL_SRCS_tests_sharings_unified}) add_subdirectories() compute-runtime-22.14.22890/opencl/test/unit_test/sharings/unified/unified_sharing_buffer_tests.cpp000066400000000000000000000063551422164147700335160ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/sharings/unified/unified_buffer.h" #include "opencl/test/unit_test/sharings/unified/unified_sharing_fixtures.h" #include "opencl/test/unit_test/sharings/unified/unified_sharing_mocks.h" using namespace NEO; using UnifiedSharingBufferTestsWithMemoryManager = UnifiedSharingFixture; using UnifiedSharingBufferTestsWithInvalidMemoryManager = UnifiedSharingFixture; TEST_F(UnifiedSharingBufferTestsWithMemoryManager, givenUnifiedBufferThenItCanBeAcquiredAndReleased) { cl_mem_flags flags{}; UnifiedSharingMemoryDescription desc{}; desc.handle = reinterpret_cast(0x1234); desc.type = UnifiedSharingHandleType::Win32Nt; cl_int retVal{}; auto buffer = std::unique_ptr(UnifiedBuffer::createSharedUnifiedBuffer(context.get(), flags, desc, &retVal)); ASSERT_EQ(CL_SUCCESS, retVal); UnifiedSharingFunctions sharingFunctions; MockUnifiedBuffer *sharingHandler = new MockUnifiedBuffer(&sharingFunctions, desc.type); buffer->setSharingHandler(sharingHandler); ASSERT_EQ(0u, sharingHandler->acquireCount); ASSERT_EQ(CL_SUCCESS, sharingHandler->acquire(buffer.get(), context->getDevice(0)->getRootDeviceIndex())); EXPECT_EQ(1u, sharingHandler->acquireCount); ASSERT_EQ(CL_SUCCESS, sharingHandler->acquire(buffer.get(), context->getDevice(0)->getRootDeviceIndex())); EXPECT_EQ(2u, sharingHandler->acquireCount); sharingHandler->release(buffer.get(), context->getDevice(0)->getRootDeviceIndex()); EXPECT_EQ(1u, sharingHandler->acquireCount); sharingHandler->release(buffer.get(), context->getDevice(0)->getRootDeviceIndex()); EXPECT_EQ(0u, sharingHandler->acquireCount); } TEST_F(UnifiedSharingBufferTestsWithInvalidMemoryManager, givenValidContextAndAllocationFailsWhenCreatingBufferFromSharedHandleThenReturnInvalidMemObject) { cl_mem_flags flags{}; UnifiedSharingMemoryDescription desc{}; desc.handle = reinterpret_cast(0x1234); desc.type = UnifiedSharingHandleType::Win32Nt; cl_int retVal{}; auto buffer = std::unique_ptr(UnifiedBuffer::createSharedUnifiedBuffer(context.get(), flags, desc, &retVal)); ASSERT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(UnifiedSharingBufferTestsWithMemoryManager, givenUnsupportedHandleTypeWhenCreatingBufferFromSharedHandleThenReturnInvalidMemObject) { cl_mem_flags flags{}; cl_int retVal{}; UnifiedSharingMemoryDescription desc{}; desc.handle = reinterpret_cast(0x1234); auto buffer = std::unique_ptr(UnifiedBuffer::createSharedUnifiedBuffer(context.get(), flags, desc, &retVal)); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(UnifiedSharingBufferTestsWithMemoryManager, givenValidContextAndMemoryManagerWhenCreatingBufferFromSharedHandleThenReturnSuccess) { cl_mem_flags flags{}; UnifiedSharingMemoryDescription desc{}; desc.handle = reinterpret_cast(0x1234); desc.type = UnifiedSharingHandleType::Win32Nt; cl_int retVal{}; auto buffer = std::unique_ptr(UnifiedBuffer::createSharedUnifiedBuffer(context.get(), flags, desc, &retVal)); ASSERT_EQ(CL_SUCCESS, retVal); } compute-runtime-22.14.22890/opencl/test/unit_test/sharings/unified/unified_sharing_fixtures.h000066400000000000000000000075051422164147700323370ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_gmm.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/sharings/unified/unified_sharing_types.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_context.h" namespace NEO { template struct UnifiedSharingContextFixture : ::testing::Test { void SetUp() override { device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); cl_device_id deviceId = device.get(); deviceVector = std::make_unique(&deviceId, 1); if (validContext) { context = createValidContext(); } else { context = createInvalidContext(); } } std::unique_ptr createContext(const cl_context_properties *contextProperties) { cl_int retVal{}; auto context = std::unique_ptr(Context::create(contextProperties, *deviceVector, nullptr, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); return context; } std::unique_ptr createValidContext() { const cl_context_properties contextProperties[] = { static_cast(UnifiedSharingContextType::DeviceHandle), 0, CL_CONTEXT_INTEROP_USER_SYNC, 1, 0}; return createContext(contextProperties); } std::unique_ptr createInvalidContext() { return createContext(nullptr); } std::unique_ptr device; std::unique_ptr deviceVector; std::unique_ptr context; }; template struct UnifiedSharingMockMemoryManager : MockMemoryManager { using MockMemoryManager::MockMemoryManager; GraphicsAllocation *createGraphicsAllocationFromNTHandle(void *handle, uint32_t rootDeviceIndex, AllocationType allocType) override { if (!validMemoryManager) { return nullptr; } auto graphicsAllocation = createMemoryAllocation(AllocationType::INTERNAL_HOST_MEMORY, nullptr, reinterpret_cast(1), 1, 4096u, reinterpret_cast(handle), MemoryPool::SystemCpuInaccessible, rootDeviceIndex, false, false, false); graphicsAllocation->setSharedHandle(static_cast(reinterpret_cast(handle))); graphicsAllocation->set32BitAllocation(false); graphicsAllocation->setDefaultGmm(new MockGmm(executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getGmmClientContext())); return graphicsAllocation; } }; template struct UnifiedSharingFixture : UnifiedSharingContextFixture { void SetUp() override { UnifiedSharingContextFixture::SetUp(); this->memoryManager = std::make_unique>(*this->device->getExecutionEnvironment()); this->memoryManagerBackup = std::make_unique>(&this->context->memoryManager, this->memoryManager.get()); } void TearDown() override { UnifiedSharingContextFixture::TearDown(); } std::unique_ptr> memoryManager; std::unique_ptr> memoryManagerBackup; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/sharings/unified/unified_sharing_image_tests.cpp000066400000000000000000000203011422164147700333120ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/gmm.h" #include "shared/test/common/mocks/mock_gmm_resource_info.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/sharings/unified/unified_image.h" #include "opencl/test/unit_test/os_interface/raii_hw_info_config.h" #include "opencl/test/unit_test/sharings/unified/unified_sharing_fixtures.h" #include "opencl/test/unit_test/sharings/unified/unified_sharing_mocks.h" using namespace NEO; using UnifiedSharingImageTestsWithMemoryManager = UnifiedSharingFixture; using UnifiedSharingImageTestsWithInvalidMemoryManager = UnifiedSharingFixture; static UnifiedSharingMemoryDescription getValidUnifiedSharingDesc() { UnifiedSharingMemoryDescription desc{}; desc.handle = reinterpret_cast(0x1234); desc.type = UnifiedSharingHandleType::Win32Nt; return desc; } static cl_image_format getValidImageFormat() { cl_image_format format{}; format.image_channel_data_type = CL_UNORM_INT8; format.image_channel_order = CL_RGBA; return format; } static cl_image_desc getValidImageDesc() { cl_image_desc imageDesc{}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 128; imageDesc.image_height = 128; imageDesc.image_depth = 1; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 256; imageDesc.image_slice_pitch = 0u; imageDesc.num_mip_levels = 1; imageDesc.num_samples = 0; imageDesc.buffer = nullptr; return imageDesc; } TEST_F(UnifiedSharingImageTestsWithInvalidMemoryManager, givenValidContextAndAllocationFailsWhenCreatingImageFromSharedHandleThenReturnInvalidMemObject) { cl_mem_flags flags{}; cl_int retVal{}; const auto format = getValidImageFormat(); const auto imageDesc = getValidImageDesc(); auto image = std::unique_ptr(UnifiedImage::createSharedUnifiedImage(context.get(), flags, getValidUnifiedSharingDesc(), &format, &imageDesc, &retVal)); ASSERT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(UnifiedSharingImageTestsWithMemoryManager, givenUnsupportedHandleTypeWhenCreatingImageFromSharedHandleThenReturnInvalidMemObject) { cl_mem_flags flags{}; cl_int retVal{}; UnifiedSharingMemoryDescription desc{}; desc.handle = reinterpret_cast(0x1234); const auto format = getValidImageFormat(); const auto imageDesc = getValidImageDesc(); auto image = std::unique_ptr(UnifiedImage::createSharedUnifiedImage(context.get(), flags, desc, &format, &imageDesc, &retVal)); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(UnifiedSharingImageTestsWithMemoryManager, givenValidContextAndMemoryManagerWhenCreatingImageFromSharedHandleThenReturnSuccess) { cl_mem_flags flags{}; cl_int retVal{}; const auto format = getValidImageFormat(); const auto imageDesc = getValidImageDesc(); auto image = std::unique_ptr(UnifiedImage::createSharedUnifiedImage(context.get(), flags, getValidUnifiedSharingDesc(), &format, &imageDesc, &retVal)); ASSERT_EQ(CL_SUCCESS, retVal); } TEST_F(UnifiedSharingImageTestsWithMemoryManager, givenPassedFormatWhenCreatingUnifiedImageThenFormatIsCorrectlySetInImageObject) { cl_image_format format{}; format.image_channel_data_type = CL_HALF_FLOAT; format.image_channel_order = CL_RG; cl_mem_flags flags{}; cl_int retVal{}; const auto imageDesc = getValidImageDesc(); auto image = std::unique_ptr(UnifiedImage::createSharedUnifiedImage(context.get(), flags, getValidUnifiedSharingDesc(), &format, &imageDesc, &retVal)); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(GMM_FORMAT_R16G16_FLOAT_TYPE, image->getSurfaceFormatInfo().surfaceFormat.GMMSurfaceFormat); EXPECT_EQ(GFX3DSTATE_SURFACEFORMAT_R16G16_FLOAT, image->getSurfaceFormatInfo().surfaceFormat.GenxSurfaceFormat); } template class MockHwInfoConfig : public HwInfoConfigHw { public: bool isPageTableManagerSupported(const HardwareInfo &hwInfo) const override { return pageTableManagerSupported; } }; struct MemoryManagerReturningCompressedAllocations : UnifiedSharingMockMemoryManager { GraphicsAllocation *createGraphicsAllocationFromNTHandle(void *handle, uint32_t rootDeviceIndex, AllocationType allocType) override { auto allocation = UnifiedSharingMockMemoryManager::createGraphicsAllocationFromNTHandle(handle, rootDeviceIndex, AllocationType::SHARED_IMAGE); auto gmm = allocation->getDefaultGmm(); auto mockGmmResourceInfo = std::make_unique(gmm->gmmResourceInfo->peekGmmResourceInfo()); mockGmmResourceInfo->setUnifiedAuxTranslationCapable(); gmm->gmmResourceInfo = std::move(mockGmmResourceInfo); return allocation; } bool mapAuxGpuVA(GraphicsAllocation *graphicsAllocation) override { calledMapAuxGpuVA++; return resultOfMapAuxGpuVA; } unsigned int calledMapAuxGpuVA{}; bool resultOfMapAuxGpuVA{}; }; HWTEST_F(UnifiedSharingImageTestsWithMemoryManager, givenCompressedImageAndNoPageTableManagerWhenCreatingUnifiedImageThenSetCorrespondingFieldInGmmAndDoNotUsePageTableManager) { MemoryManagerReturningCompressedAllocations memoryManager{}; VariableBackup memoryManagerBackup{&this->context->memoryManager, &memoryManager}; using HwInfoConfigNotSupportingPageTableManager = MockHwInfoConfig; RAIIHwInfoConfigFactory hwInfoConfigBackup{this->context->getDevice(0)->getHardwareInfo().platform.eProductFamily}; cl_mem_flags flags{}; cl_int retVal{}; const auto format = getValidImageFormat(); const auto imageDesc = getValidImageDesc(); auto image = std::unique_ptr(UnifiedImage::createSharedUnifiedImage(context.get(), flags, getValidUnifiedSharingDesc(), &format, &imageDesc, &retVal)); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(image->getGraphicsAllocation(device->getRootDeviceIndex())->getDefaultGmm()->isCompressionEnabled); EXPECT_EQ(0u, memoryManager.calledMapAuxGpuVA); } HWTEST_F(UnifiedSharingImageTestsWithMemoryManager, givenCompressedImageAndPageTableManagerWhenCreatingUnifiedImageThenSetCorrespondingFieldInGmmBasedOnAuxGpuVaMappingResult) { MemoryManagerReturningCompressedAllocations memoryManager{}; VariableBackup memoryManagerBackup{&this->context->memoryManager, &memoryManager}; using HwInfoConfigNotSupportingPageTableManager = MockHwInfoConfig; RAIIHwInfoConfigFactory hwInfoConfigBackup{this->context->getDevice(0)->getHardwareInfo().platform.eProductFamily}; cl_mem_flags flags{}; cl_int retVal{}; const auto format = getValidImageFormat(); const auto imageDesc = getValidImageDesc(); memoryManager.resultOfMapAuxGpuVA = true; auto image = std::unique_ptr(UnifiedImage::createSharedUnifiedImage(context.get(), flags, getValidUnifiedSharingDesc(), &format, &imageDesc, &retVal)); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(memoryManager.resultOfMapAuxGpuVA, image->getGraphicsAllocation(device->getRootDeviceIndex())->getDefaultGmm()->isCompressionEnabled); EXPECT_EQ(1u, memoryManager.calledMapAuxGpuVA); memoryManager.resultOfMapAuxGpuVA = false; image = std::unique_ptr(UnifiedImage::createSharedUnifiedImage(context.get(), flags, getValidUnifiedSharingDesc(), &format, &imageDesc, &retVal)); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(memoryManager.resultOfMapAuxGpuVA, image->getGraphicsAllocation(device->getRootDeviceIndex())->getDefaultGmm()->isCompressionEnabled); EXPECT_EQ(2u, memoryManager.calledMapAuxGpuVA); } compute-runtime-22.14.22890/opencl/test/unit_test/sharings/unified/unified_sharing_mocks.h000066400000000000000000000005071422164147700315750ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/sharings/unified/unified_buffer.h" namespace NEO { struct MockUnifiedBuffer : UnifiedBuffer { using UnifiedBuffer::acquireCount; using UnifiedBuffer::UnifiedBuffer; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/sharings/unified/unified_sharing_tests.cpp000066400000000000000000000260061422164147700321600ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/sharings/unified/enable_unified.h" #include "opencl/source/sharings/unified/unified_buffer.h" #include "opencl/source/sharings/unified/unified_sharing.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/sharings/unified/unified_sharing_fixtures.h" using namespace NEO; TEST(UnifiedSharingTests, givenContextCreatedWithExternalDeviceHandlePropertyWhenGettingUnifiedSharingThenReturnIt) { MockClDevice device{MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())}; cl_device_id deviceId = &device; ClDeviceVector allDevs(&deviceId, 1); cl_int retVal{}; const cl_context_properties context_props[] = { static_cast(UnifiedSharingContextType::DeviceHandle), 0, CL_CONTEXT_INTEROP_USER_SYNC, 1, 0}; auto context = std::unique_ptr(Context::create(context_props, allDevs, nullptr, nullptr, retVal)); auto sharingFunctions = context->getSharing(); EXPECT_NE(nullptr, sharingFunctions); } struct MockUnifiedSharingContextBuilder : UnifiedSharingContextBuilder { using UnifiedSharingContextBuilder::contextData; }; TEST(UnifiedSharingTests, givenExternalDeviceHandleWhenProcessingBySharingContextBuilderThenResultIsTrue) { MockUnifiedSharingContextBuilder builder{}; cl_context_properties propertyType = static_cast(UnifiedSharingContextType::DeviceHandle); cl_context_properties propertyValue = 0x1234; bool result = builder.processProperties(propertyType, propertyValue); EXPECT_TRUE(result); EXPECT_NE(nullptr, builder.contextData); } TEST(UnifiedSharingTests, givenExternalDeviceGroupHandleWhenProcessingBySharingContextBuilderThenResultIsTrue) { MockUnifiedSharingContextBuilder builder{}; cl_context_properties propertyType = static_cast(UnifiedSharingContextType::DeviceGroup); cl_context_properties propertyValue = 0x1234; bool result = builder.processProperties(propertyType, propertyValue); EXPECT_TRUE(result); EXPECT_NE(nullptr, builder.contextData); } TEST(UnifiedSharingTests, givenExternalDeviceGroupHandleWhenProcessingBySharingContextBuilderThenReturnSuccess) { MockUnifiedSharingContextBuilder builder{}; cl_context_properties propertyType = CL_CONTEXT_PLATFORM; cl_context_properties propertyValue = 0x1234; bool result = builder.processProperties(propertyType, propertyValue); EXPECT_FALSE(result); EXPECT_EQ(nullptr, builder.contextData); } TEST(UnifiedSharingTests, givenContextWithUserSyncWhenFinalizingPropertiesBySharingContextBuilderThenRegisterSharingInContextAndClearContextData) { MockUnifiedSharingContextBuilder builder{}; builder.contextData = std::make_unique(); MockContext context{}; context.setInteropUserSyncEnabled(true); cl_int retVal{}; bool result = builder.finalizeProperties(context, retVal); EXPECT_TRUE(result); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context.sharingFunctions[UnifiedSharingFunctions::sharingId]); EXPECT_EQ(nullptr, builder.contextData); } TEST(UnifiedSharingTests, givenContextWithoutUserSyncWhenFinalizingPropertiesBySharingContextBuilderThenDoNotRegisterSharingInContextAndClearContextData) { MockUnifiedSharingContextBuilder builder{}; builder.contextData = std::make_unique(); MockContext context{}; context.setInteropUserSyncEnabled(false); cl_int retVal{}; bool result = builder.finalizeProperties(context, retVal); EXPECT_TRUE(result); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, context.sharingFunctions[UnifiedSharingFunctions::sharingId]); EXPECT_EQ(nullptr, builder.contextData); } TEST(UnifiedSharingTests, givenBuilderWithoutContextDataWhenFinalizingPropertiesBySharingContextBuilderThenDoNotRegisterSharingInContext) { MockUnifiedSharingContextBuilder builder{}; MockContext context{}; cl_int retVal{}; bool result = builder.finalizeProperties(context, retVal); EXPECT_TRUE(result); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, context.sharingFunctions[UnifiedSharingFunctions::sharingId]); EXPECT_EQ(nullptr, builder.contextData); } TEST(UnifiedSharingTests, givenSharingHandlerThenItReturnsCorrectValues) { UnifiedSharingFunctions sharingFunctions; EXPECT_EQ(UnifiedSharingFunctions::sharingId, sharingFunctions.getId()); UnifiedSharing sharingHandler{&sharingFunctions, UnifiedSharingHandleType::LinuxFd}; EXPECT_EQ(&sharingFunctions, sharingHandler.peekFunctionsHandler()); EXPECT_EQ(UnifiedSharingHandleType::LinuxFd, sharingHandler.getExternalMemoryType()); } using UnifiedSharingTestsWithMemoryManager = UnifiedSharingFixture; TEST_F(UnifiedSharingTestsWithMemoryManager, givenUnifiedSharingHandlerWhenAcquiringAndReleasingThenMethodsAreCalledAppropriately) { struct MockSharingHandler : UnifiedSharing { using UnifiedSharing::UnifiedSharing; unsigned int synchronizeObjectCalled = 0u; unsigned int releaseResourceCalled = 0u; void synchronizeObject(UpdateData &updateData) override { UnifiedSharing::synchronizeObject(updateData); synchronizeObjectCalled++; } void releaseResource(MemObj *memObject, uint32_t rootDeviceIndex) override { UnifiedSharing::releaseResource(memObject, rootDeviceIndex); releaseResourceCalled++; }; }; cl_mem_flags flags{}; UnifiedSharingMemoryDescription desc{}; desc.handle = reinterpret_cast(0x1234); desc.type = UnifiedSharingHandleType::Win32Nt; cl_int retVal{}; auto buffer = std::unique_ptr(UnifiedBuffer::createSharedUnifiedBuffer(context.get(), flags, desc, &retVal)); ASSERT_EQ(CL_SUCCESS, retVal); UnifiedSharingFunctions sharingFunctions; MockSharingHandler *sharingHandler = new MockSharingHandler(&sharingFunctions, desc.type); buffer->setSharingHandler(sharingHandler); ASSERT_EQ(0u, sharingHandler->synchronizeObjectCalled); ASSERT_EQ(CL_SUCCESS, sharingHandler->acquire(buffer.get(), context->getDevice(0)->getRootDeviceIndex())); EXPECT_EQ(1u, sharingHandler->synchronizeObjectCalled); ASSERT_EQ(CL_SUCCESS, sharingHandler->acquire(buffer.get(), context->getDevice(0)->getRootDeviceIndex())); EXPECT_EQ(1u, sharingHandler->synchronizeObjectCalled); ASSERT_EQ(0u, sharingHandler->releaseResourceCalled); sharingHandler->release(buffer.get(), context->getDevice(0)->getRootDeviceIndex()); EXPECT_EQ(0u, sharingHandler->releaseResourceCalled); sharingHandler->release(buffer.get(), context->getDevice(0)->getRootDeviceIndex()); EXPECT_EQ(1u, sharingHandler->releaseResourceCalled); } struct UnifiedSharingCreateAllocationTests : UnifiedSharingTestsWithMemoryManager { struct MemoryManagerCheckingAllocationMethod : MockMemoryManager { using MockMemoryManager::MockMemoryManager; GraphicsAllocation *createGraphicsAllocationFromNTHandle(void *handle, uint32_t rootDeviceIndex, AllocationType allocType) override { this->createFromNTHandleCalled = true; this->handle = toOsHandle(handle); return nullptr; } GraphicsAllocation *createGraphicsAllocationFromSharedHandle(osHandle handle, const AllocationProperties &properties, bool requireSpecificBitness, bool isHostIpcAllocation) override { this->createFromSharedHandleCalled = true; this->handle = handle; this->properties = std::make_unique(properties); return nullptr; } bool createFromNTHandleCalled = false; bool createFromSharedHandleCalled = false; osHandle handle; std::unique_ptr properties; }; struct MockSharingHandler : UnifiedSharing { using UnifiedSharing::createGraphicsAllocation; }; void SetUp() override { UnifiedSharingTestsWithMemoryManager::SetUp(); this->memoryManager = std::make_unique(); this->memoryManagerBackup = std::make_unique>(&this->context->memoryManager, this->memoryManager.get()); } std::unique_ptr memoryManager; std::unique_ptr> memoryManagerBackup; }; TEST_F(UnifiedSharingCreateAllocationTests, givenWindowsNtHandleWhenCreateGraphicsAllocationIsCalledThenUseNtHandleMethod) { UnifiedSharingMemoryDescription desc{}; desc.handle = reinterpret_cast(0x1234); desc.type = UnifiedSharingHandleType::Win32Nt; AllocationType allocationType = AllocationType::SHARED_IMAGE; MockSharingHandler::createGraphicsAllocation(this->context.get(), desc, allocationType); EXPECT_TRUE(memoryManager->createFromNTHandleCalled); EXPECT_FALSE(memoryManager->createFromSharedHandleCalled); EXPECT_EQ(toOsHandle(desc.handle), memoryManager->handle); } TEST_F(UnifiedSharingCreateAllocationTests, givenWindowsSharedHandleWhenCreateGraphicsAllocationIsCalledThenUseSharedHandleMethod) { UnifiedSharingMemoryDescription desc{}; desc.handle = reinterpret_cast(0x1234); desc.type = UnifiedSharingHandleType::Win32Shared; AllocationType allocationType = AllocationType::SHARED_IMAGE; MockSharingHandler::createGraphicsAllocation(this->context.get(), desc, allocationType); EXPECT_FALSE(memoryManager->createFromNTHandleCalled); EXPECT_TRUE(memoryManager->createFromSharedHandleCalled); EXPECT_EQ(toOsHandle(desc.handle), memoryManager->handle); const AllocationProperties expectedProperties{0u, false, 0u, allocationType, false, {}}; EXPECT_EQ(expectedProperties.allFlags, memoryManager->properties->allFlags); } TEST_F(UnifiedSharingCreateAllocationTests, givenLinuxSharedHandleWhenCreateGraphicsAllocationIsCalledThenUseSharedHandleMethod) { UnifiedSharingMemoryDescription desc{}; desc.handle = reinterpret_cast(0x1234); desc.type = UnifiedSharingHandleType::LinuxFd; AllocationType allocationType = AllocationType::SHARED_IMAGE; MockSharingHandler::createGraphicsAllocation(this->context.get(), desc, allocationType); EXPECT_FALSE(memoryManager->createFromNTHandleCalled); EXPECT_TRUE(memoryManager->createFromSharedHandleCalled); EXPECT_EQ(toOsHandle(desc.handle), memoryManager->handle); const AllocationProperties expectedProperties{0u, false, 0u, allocationType, false, {}}; EXPECT_EQ(expectedProperties.allFlags, memoryManager->properties->allFlags); } compute-runtime-22.14.22890/opencl/test/unit_test/sharings/va/000077500000000000000000000000001422164147700240535ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/sharings/va/CMakeLists.txt000066400000000000000000000023261422164147700266160ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_sharings_va ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_from_va_media_surface_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_acquire_va_media_surfaces_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_release_va_media_surfaces_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_extension_function_address_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/context_va_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_va_image_arg_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_va_sharing.h ${CMAKE_CURRENT_SOURCE_DIR}/va_base_object_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/va_device_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/va_sharing_linux_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/va_sharing_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/va_sharing_factory_tests.cpp ) set_property(GLOBAL PROPERTY IGDRCL_SRCS_tests_sharings_va ${IGDRCL_SRCS_tests_sharings_va}) if(NEO__LIBVA_FOUND) list(APPEND IGDRCL_SRCS_tests_sharings_va ${CMAKE_CURRENT_SOURCE_DIR}/va_sharing_enable_tests.cpp) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_sharings_va}) endif() add_subdirectories() cl_create_from_va_media_surface_tests.cpp000066400000000000000000000011031422164147700342160ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/sharings/va/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/api/cl_api_tests.h" #include using namespace NEO; typedef api_tests clCreateFromVaMediaSurfaceTests; namespace ULT { TEST_F(clCreateFromVaMediaSurfaceTests, givenNullContextWhenCreateIsCalledThenErrorIsReturned) { auto memObj = clCreateFromVA_APIMediaSurfaceINTEL(nullptr, CL_MEM_READ_WRITE, nullptr, 0, &retVal); EXPECT_EQ(nullptr, memObj); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } } // namespace ULT cl_enqueue_acquire_va_media_surfaces_tests.cpp000066400000000000000000000011001422164147700352700ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/sharings/va/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/api/cl_api_tests.h" #include using namespace NEO; typedef api_tests clEnqueueAcquireVaMediaSurfacesTests; namespace ULT { TEST_F(clEnqueueAcquireVaMediaSurfacesTests, givenNullCommandQueueWhenAcquireIsCalledThenInvalidCommandQueueIsReturned) { retVal = clEnqueueAcquireVA_APIMediaSurfacesINTEL(nullptr, 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(retVal, CL_INVALID_COMMAND_QUEUE); } } // namespace ULT cl_enqueue_release_va_media_surfaces_tests.cpp000066400000000000000000000011071422164147700352660ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/sharings/va/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/api/cl_api_tests.h" #include using namespace NEO; typedef api_tests clEnqueueReleaseVaMediaSurfacesTests; namespace ULT { TEST_F(clEnqueueReleaseVaMediaSurfacesTests, givenNullCommandQueueWhenReleaseObjectsIsCalledThenInvalidCommandQueueIsReturned) { retVal = clEnqueueReleaseVA_APIMediaSurfacesINTEL(nullptr, 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(retVal, CL_INVALID_COMMAND_QUEUE); } } // namespace ULT cl_get_extension_function_address_tests.cpp000066400000000000000000000045771422164147700347020ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/sharings/va/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "opencl/source/sharings/va/cl_va_api.h" #include "opencl/test/unit_test/api/cl_api_tests.h" using namespace NEO; typedef api_tests clGetExtensionFunctionAddressTests; namespace ULT { TEST_F(clGetExtensionFunctionAddressTests, GivenClCreateFromVaMediaSurfaceIntelWhenGettingFunctionAddressThenCorrectPointerReturned) { auto retVal = clGetExtensionFunctionAddress("clCreateFromVA_APIMediaSurfaceINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clCreateFromVA_APIMediaSurfaceINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClEnqueueAcquireVaApiMediaSurfacesIntelWhenGettingFunctionAddressThenCorrectPointerReturned) { auto retVal = clGetExtensionFunctionAddress("clEnqueueAcquireVA_APIMediaSurfacesINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clEnqueueAcquireVA_APIMediaSurfacesINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClEnqueueReleaseVaApiMediaSurfacesIntelWhenGettingFunctionAddressThenCorrectPointerReturned) { auto retVal = clGetExtensionFunctionAddress("clEnqueueReleaseVA_APIMediaSurfacesINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clEnqueueReleaseVA_APIMediaSurfacesINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClGetDeviceIDsFromVaApiMediaAdapterIntelWhenGettingFunctionAddressThenCorrectPointerReturned) { auto retVal = clGetExtensionFunctionAddress("clGetDeviceIDsFromVA_APIMediaAdapterINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clGetDeviceIDsFromVA_APIMediaAdapterINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, givenEnabledFormatQueryWhenGettingFuncionAddressThenCorrectAddressIsReturned) { DebugManagerStateRestore restorer; DebugManager.flags.EnableFormatQuery.set(true); auto retVal = clGetExtensionFunctionAddress("clGetSupportedVA_APIMediaSurfaceFormatsINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clGetSupportedVA_APIMediaSurfaceFormatsINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, givenDisabledFormatQueryWhenGettingFuncionAddressThenNullptrIsReturned) { DebugManagerStateRestore restorer; DebugManager.flags.EnableFormatQuery.set(false); auto retVal = clGetExtensionFunctionAddress("clGetSupportedVA_APIMediaSurfaceFormatsINTEL"); EXPECT_EQ(retVal, nullptr); } } // namespace ULT compute-runtime-22.14.22890/opencl/test/unit_test/sharings/va/context_va_tests.cpp000066400000000000000000000043501422164147700301550ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "opencl/source/sharings/va/va_sharing.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "gtest/gtest.h" using namespace NEO; struct VAContextTest : public PlatformFixture, public ::testing::Test { using PlatformFixture::SetUp; VAContextTest() { } void SetUp() override { PlatformFixture::SetUp(); cl_platform_id platform = pPlatform; properties = new cl_context_properties[3]; properties[0] = CL_CONTEXT_PLATFORM; properties[1] = (cl_context_properties)platform; properties[2] = 0; context = Context::create(properties, ClDeviceVector(devices, num_devices), nullptr, nullptr, retVal); ASSERT_NE(nullptr, context); } void TearDown() override { delete[] properties; delete context; PlatformFixture::TearDown(); } cl_int retVal = CL_SUCCESS; Context *context = nullptr; cl_context_properties *properties = nullptr; }; TEST_F(VAContextTest, GivenDefaultThenSharingNotPresent) { ASSERT_EQ(context->getSharing(), nullptr); } TEST_F(VAContextTest, GivenVaContextParamWhenCreateContextThenReturnError) { cl_device_id deviceID = devices[0]; auto pPlatform = NEO::platform(); cl_platform_id pid[1]; pid[0] = pPlatform; DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableVaLibCalls.set(false); // avoid libva calls on initialization cl_context_properties validProperties[5] = {CL_CONTEXT_PLATFORM, (cl_context_properties)pid[0], CL_CONTEXT_VA_API_DISPLAY_INTEL, 0x10000, 0}; cl_int retVal = CL_SUCCESS; auto ctx = Context::create(validProperties, ClDeviceVector(&deviceID, 1), nullptr, nullptr, retVal); // not supported by default // use MockVaSharing to test va-sharing functionality EXPECT_EQ(CL_INVALID_VA_API_MEDIA_ADAPTER_INTEL, retVal); EXPECT_EQ(nullptr, ctx); } compute-runtime-22.14.22890/opencl/test/unit_test/sharings/va/kernel_va_image_arg_tests.cpp000066400000000000000000000024261422164147700317460ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/source/sharings/va/va_surface.h" #include "opencl/test/unit_test/fixtures/kernel_arg_fixture.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "opencl/test/unit_test/sharings/va/mock_va_sharing.h" #include "gtest/gtest.h" using namespace NEO; TEST_F(KernelImageArgTest, givenSharedImageWhenSetArgIsCalledThenReportSharedObjUsage) { MockVaSharing vaSharing; VASurfaceID vaSurfaceId = 0u; vaSharing.updateAcquiredHandle(1u); std::unique_ptr sharedImage(VASurface::createSharedVaSurface(context.get(), &vaSharing.sharingFunctions, CL_MEM_READ_WRITE, 0, &vaSurfaceId, 0, nullptr)); auto sharedMem = static_cast(sharedImage.get()); auto nonSharedMem = static_cast(image.get()); EXPECT_FALSE(pKernel->isUsingSharedObjArgs()); this->pKernel->setArg(0, sizeof(cl_mem *), &nonSharedMem); EXPECT_FALSE(pKernel->isUsingSharedObjArgs()); this->pKernel->setArg(0, sizeof(cl_mem *), &sharedMem); EXPECT_TRUE(pKernel->isUsingSharedObjArgs()); } compute-runtime-22.14.22890/opencl/test/unit_test/sharings/va/mock_va_sharing.h000066400000000000000000000122721422164147700273620ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/aligned_memory.h" #include "opencl/source/sharings/va/va_sharing.h" #include #include namespace NEO { class VASharingFunctionsMock : public VASharingFunctions { public: using VASharingFunctions::mutex; using VASharingFunctions::supported2PlaneFormats; using VASharingFunctions::supported3PlaneFormats; VAImage mockVaImage = {}; int32_t derivedImageFormatFourCC = VA_FOURCC_NV12; int32_t derivedImageFormatBpp = 8; uint16_t derivedImageHeight = 256; uint16_t derivedImageWidth = 256; VAStatus queryImageFormatsReturnStatus = VA_STATUS_SUCCESS; VAStatus syncSurfaceReturnStatus = VA_STATUS_SUCCESS; bool isValidDisplayCalled = false; bool deriveImageCalled = false; bool destroyImageCalled = false; bool syncSurfaceCalled = false; bool extGetSurfaceHandleCalled = false; bool exportSurfaceHandleCalled = false; VASurfaceID syncedSurfaceID = 0; osHandle acquiredVaHandle = 0; bool haveExportSurfaceHandle = false; uint32_t receivedSurfaceMemType = 0; uint32_t receivedSurfaceFlags = 0; VADRMPRIMESurfaceDescriptor mockVaSurfaceDesc{ VA_FOURCC_NV12, 256, 256, 1, {{8, 98304, I915_FORMAT_MOD_Y_TILED}, {}, {}, {}}, 2, { {DRM_FORMAT_R8, 1, {}, {0, 0, 0, 0}, {256, 0, 0, 0}}, {DRM_FORMAT_GR88, 1, {}, {65536, 0, 0, 0}, {256, 0, 0, 0}}, {0, 0, {}, {0, 0, 0, 0}, {0, 0, 0, 0}}, {0, 0, {}, {0, 0, 0, 0}, {0, 0, 0, 0}}, }}; VASharingFunctionsMock(VADisplay vaDisplay) : VASharingFunctions(vaDisplay) {} VASharingFunctionsMock() : VASharingFunctionsMock(nullptr){}; VAStatus deriveImage(VASurfaceID vaSurface, VAImage *vaImage) override { deriveImageCalled = true; uint32_t pitch; vaImage->height = derivedImageHeight; vaImage->width = derivedImageWidth; pitch = alignUp(derivedImageWidth, 128); vaImage->offsets[1] = alignUp(vaImage->height, 32) * pitch; vaImage->offsets[2] = vaImage->offsets[1] + 1; vaImage->pitches[0] = pitch; vaImage->pitches[1] = pitch; vaImage->pitches[2] = pitch; vaImage->format.fourcc = derivedImageFormatFourCC; vaImage->format.bits_per_pixel = derivedImageFormatBpp; mockVaImage.width = vaImage->width; mockVaImage.height = vaImage->height; return VA_STATUS_SUCCESS; } bool isValidVaDisplay() override { isValidDisplayCalled = true; return 1; } VAStatus destroyImage(VAImageID vaImageId) override { destroyImageCalled = true; return VA_STATUS_SUCCESS; } VAStatus extGetSurfaceHandle(VASurfaceID *vaSurface, unsigned int *handleId) override { extGetSurfaceHandleCalled = true; *handleId = acquiredVaHandle; return VA_STATUS_SUCCESS; } VAStatus exportSurfaceHandle(VASurfaceID vaSurface, uint32_t memType, uint32_t flags, void *descriptor) override { exportSurfaceHandleCalled = true; receivedSurfaceMemType = memType; receivedSurfaceFlags = flags; if (haveExportSurfaceHandle) { if (memType != VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2) { return VA_STATUS_ERROR_UNSUPPORTED_MEMORY_TYPE; } *(static_cast(descriptor)) = mockVaSurfaceDesc; return VA_STATUS_SUCCESS; } return VA_STATUS_ERROR_UNIMPLEMENTED; } VAStatus syncSurface(VASurfaceID vaSurface) override { syncSurfaceCalled = true; syncedSurfaceID = vaSurface; return syncSurfaceReturnStatus; } VAStatus queryImageFormats(VADisplay vaDisplay, VAImageFormat *formatList, int *numFormats) override { if (queryImageFormatsReturnStatus != VA_STATUS_SUCCESS) { return queryImageFormatsReturnStatus; } if (numFormats) { *numFormats = 4; } if (formatList) { formatList[0].fourcc = VA_FOURCC_NV12; formatList[0].bits_per_pixel = 12; formatList[0].byte_order = VA_LSB_FIRST; formatList[1].fourcc = VA_FOURCC_P010; formatList[1].bits_per_pixel = 10; formatList[1].byte_order = VA_LSB_FIRST; formatList[2].fourcc = VA_FOURCC_P016; formatList[2].bits_per_pixel = 16; formatList[2].byte_order = VA_LSB_FIRST; formatList[3].fourcc = VA_FOURCC_RGBP; formatList[3].bits_per_pixel = 8; formatList[3].byte_order = VA_LSB_FIRST; } return VA_STATUS_SUCCESS; } int maxNumImageFormats(VADisplay vaDisplay) override { return 4; } }; class MockVaSharing { public: void updateAcquiredHandle() { sharingFunctions.acquiredVaHandle = sharingHandle; } void updateAcquiredHandle(unsigned int handle) { sharingHandle = handle; sharingFunctions.acquiredVaHandle = sharingHandle; } VASharingFunctionsMock sharingFunctions; osHandle sharingHandle = 0; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/sharings/va/va_base_object_tests.cpp000066400000000000000000000037471422164147700307420ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/accelerators/intel_accelerator.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/helpers/base_object.h" #include "opencl/source/sharings/sharing_factory.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "gtest/gtest.h" namespace NEO { template struct VABaseObjectTests : public ::testing::Test { void SetUp() override { } void TearDown() override { } }; typedef ::testing::Types< MockPlatform, IntelAccelerator, //Context, //Program, //Kernel, //Sampler //others... MockCommandQueue> BaseObjectTypes; TYPED_TEST_CASE(VABaseObjectTests, BaseObjectTypes); TYPED_TEST(VABaseObjectTests, GivenCommonRuntimeThenDispatchTableAtFirstPointerInObject) { TypeParam objectDrv; // Automatic downcasting to _cl_type *. typename TypeParam::BaseType *objectCL = &objectDrv; sharingFactory.fillGlobalDispatchTable(); // Common runtime casts to generic type assuming // the dispatch table is the first ptr in the structure auto genericObject = reinterpret_cast(objectCL); EXPECT_EQ(reinterpret_cast(clCreateFromVA_APIMediaSurfaceINTEL), genericObject->dispatch.crtDispatch->clCreateFromVA_APIMediaSurfaceINTEL); EXPECT_EQ(reinterpret_cast(clEnqueueAcquireVA_APIMediaSurfacesINTEL), genericObject->dispatch.crtDispatch->clEnqueueAcquireVA_APIMediaSurfacesINTEL); EXPECT_EQ(reinterpret_cast(clEnqueueReleaseVA_APIMediaSurfacesINTEL), genericObject->dispatch.crtDispatch->clEnqueueReleaseVA_APIMediaSurfacesINTEL); EXPECT_EQ(reinterpret_cast(clGetDeviceIDsFromVA_APIMediaAdapterINTEL), genericObject->dispatch.crtDispatch->clGetDeviceIDsFromVA_APIMediaAdapterINTEL); } } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/sharings/va/va_device_tests.cpp000066400000000000000000000001321422164147700277220ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ compute-runtime-22.14.22890/opencl/test/unit_test/sharings/va/va_sharing_enable_tests.cpp000066400000000000000000000162751422164147700314430ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/string.h" #include "shared/source/os_interface/driver_info.h" #include "shared/test/common/fixtures/memory_management_fixture.h" #include "shared/test/common/helpers/variable_backup.h" #include "opencl/source/sharings/va/enable_va.h" #include "opencl/source/sharings/va/va_sharing_functions.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" using namespace NEO; static int vaDisplayIsValidRet = 1; extern "C" int vaDisplayIsValid(VADisplay vaDisplay) { return vaDisplayIsValidRet; } class MockDriverInfo : public DriverInfo { public: MockDriverInfo(bool imageSupport) : imageSupport(imageSupport) {} bool getImageSupport() override { return imageSupport; }; bool imageSupport = true; }; class VaSharingEnablerTests : public MemoryManagementFixture, public ::testing::Test { public: void SetUp() override { MemoryManagementFixture::SetUp(); factory.reset(new VaSharingBuilderFactory()); ASSERT_NE(nullptr, factory.get()); } void TearDown() override { factory.reset(nullptr); MemoryManagementFixture::TearDown(); } std::unique_ptr factory; }; TEST_F(VaSharingEnablerTests, givenVaFactoryWhenImagesUnsupportedOrLibVaUnavailableThenNoExtensionIsReturned) { // hijack dlopen function VariableBackup> bkp(&VASharingFunctions::fdlopen); bkp = [&](const char *filename, int flag) -> void * { // no libva in system return nullptr; }; for (bool imagesSupported : {false, true}) { MockDriverInfo driverInfo(imagesSupported); auto ext = factory->getExtensions(&driverInfo); EXPECT_EQ(0u, ext.length()); EXPECT_STREQ("", ext.c_str()); } } TEST_F(VaSharingEnablerTests, givenVaFactoryWhenImagesSupportedAndLibVaAvailableThenExtensionStringIsReturned) { VariableBackup> bkpOpen(&VASharingFunctions::fdlopen); bkpOpen = [&](const char *filename, int flag) -> void * { return this; }; VariableBackup> bkpClose(&VASharingFunctions::fdlclose); bkpClose = [&](void *handle) -> int { return 0; }; VariableBackup> bkpSym(&VASharingFunctions::fdlsym); bkpSym = [&](void *handle, const char *symbol) -> void * { return nullptr; }; for (bool imagesSupported : {false, true}) { MockDriverInfo driverInfo(imagesSupported); auto ext = factory->getExtensions(&driverInfo); EXPECT_STREQ(imagesSupported ? "cl_intel_va_api_media_sharing " : "", ext.c_str()); } } TEST_F(VaSharingEnablerTests, givenVaFactoryWhenAskedThenGlobalIcdIsConfigured) { class CrtRestore { public: CrtRestore() { crtSnapshot = crtGlobalDispatchTable; } ~CrtRestore() { crtGlobalDispatchTable = crtSnapshot; } decltype(crtGlobalDispatchTable) crtSnapshot; }; // we play with global table, so first save state then restore it with use of RAII CrtRestore crtRestore; crtGlobalDispatchTable.clCreateFromVA_APIMediaSurfaceINTEL = nullptr; crtGlobalDispatchTable.clEnqueueReleaseVA_APIMediaSurfacesINTEL = nullptr; crtGlobalDispatchTable.clEnqueueAcquireVA_APIMediaSurfacesINTEL = nullptr; crtGlobalDispatchTable.clGetDeviceIDsFromVA_APIMediaAdapterINTEL = nullptr; factory->fillGlobalDispatchTable(); EXPECT_NE(nullptr, crtGlobalDispatchTable.clCreateFromVA_APIMediaSurfaceINTEL); EXPECT_NE(nullptr, crtGlobalDispatchTable.clEnqueueAcquireVA_APIMediaSurfacesINTEL); EXPECT_NE(nullptr, crtGlobalDispatchTable.clEnqueueAcquireVA_APIMediaSurfacesINTEL); EXPECT_NE(nullptr, crtGlobalDispatchTable.clGetDeviceIDsFromVA_APIMediaAdapterINTEL); } TEST_F(VaSharingEnablerTests, givenVaFactoryWhenAskedThenBuilderIsCreated) { auto builder = factory->createContextBuilder(); EXPECT_NE(nullptr, builder); } TEST_F(VaSharingEnablerTests, givenVaBuilderWhenUnknownPropertyThenFalseIsReturned) { auto builder = factory->createContextBuilder(); ASSERT_NE(nullptr, builder); cl_context_properties property = CL_CONTEXT_PLATFORM; cl_context_properties value; auto res = builder->processProperties(property, value); EXPECT_FALSE(res); } TEST_F(VaSharingEnablerTests, givenVaBuilderWhenValidPropertyThenTrueIsReturned) { auto builder = factory->createContextBuilder(); ASSERT_NE(nullptr, builder); cl_context_properties property = CL_CONTEXT_VA_API_DISPLAY_INTEL; cl_context_properties value = 0x1243; auto res = builder->processProperties(property, value); EXPECT_TRUE(res); //repeat to check if we don't allocate twice auto prevAllocations = MemoryManagement::numAllocations.load(); res = builder->processProperties(property, value); EXPECT_TRUE(res); auto currAllocations = MemoryManagement::numAllocations.load(); EXPECT_EQ(prevAllocations, currAllocations); } TEST_F(VaSharingEnablerTests, givenVaBuilderWhenNoPropertiesThenFinalizerReturnsTrue) { auto builder = factory->createContextBuilder(); ASSERT_NE(nullptr, builder); MockContext context; int32_t errcodeRet = CL_SUCCESS; auto res = builder->finalizeProperties(context, errcodeRet); EXPECT_TRUE(res); EXPECT_EQ(CL_SUCCESS, errcodeRet); } TEST_F(VaSharingEnablerTests, givenVaBuilderWhenInvalidPropertiesThenFinalizerReturnsTrue) { auto builder = factory->createContextBuilder(); ASSERT_NE(nullptr, builder); cl_context_properties property = CL_CONTEXT_PLATFORM; cl_context_properties value; auto res = builder->processProperties(property, value); EXPECT_FALSE(res); MockContext context; int32_t errcodeRet = CL_SUCCESS; res = builder->finalizeProperties(context, errcodeRet); EXPECT_TRUE(res); EXPECT_EQ(CL_SUCCESS, errcodeRet); } TEST_F(VaSharingEnablerTests, givenVaBuilderWhenValidPropertyButInvalidDisplayThenFinalizerReturnsFalseAndErrcode) { auto builder = factory->createContextBuilder(); ASSERT_NE(nullptr, builder); vaDisplayIsValidRet = 0; cl_context_properties property = CL_CONTEXT_VA_API_DISPLAY_INTEL; cl_context_properties value = 0x10000; auto res = builder->processProperties(property, value); EXPECT_TRUE(res); MockContext context; int32_t errcodeRet = CL_SUCCESS; res = builder->finalizeProperties(context, errcodeRet); EXPECT_FALSE(res); EXPECT_EQ(CL_INVALID_VA_API_MEDIA_ADAPTER_INTEL, errcodeRet); } TEST_F(VaSharingEnablerTests, givenVaBuilderWhenValidPropertyButValidDisplayThenFinalizerReturnsTrue) { auto builder = factory->createContextBuilder(); ASSERT_NE(nullptr, builder); vaDisplayIsValidRet = 1; cl_context_properties property = CL_CONTEXT_VA_API_DISPLAY_INTEL; cl_context_properties value = 0x10000; auto res = builder->processProperties(property, value); EXPECT_TRUE(res); MockContext context; int32_t errcodeRet = CL_SUCCESS; res = builder->finalizeProperties(context, errcodeRet); EXPECT_TRUE(res); EXPECT_EQ(CL_SUCCESS, errcodeRet); } compute-runtime-22.14.22890/opencl/test/unit_test/sharings/va/va_sharing_factory_tests.cpp000066400000000000000000000016211422164147700316510ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/sharings/va/enable_va.h" #include "gtest/gtest.h" using namespace NEO; class TestVaSharingBuilderFactory : public VaSharingBuilderFactory { public: void *getExtensionFunctionAddressExtra(const std::string &functionName) override { if (functionName == "someFunction") { invocationCount++; return reinterpret_cast(0x1234); } return nullptr; } uint32_t invocationCount = 0u; }; TEST(SharingFactoryTests, givenVaFactoryWithSharingExtraWhenAskedThenAddressIsReturned) { TestVaSharingBuilderFactory sharing; ASSERT_EQ(0u, sharing.invocationCount); auto ptr = sharing.getExtensionFunctionAddress("someFunction"); EXPECT_NE(nullptr, ptr); EXPECT_EQ(1u, sharing.invocationCount); }compute-runtime-22.14.22890/opencl/test/unit_test/sharings/va/va_sharing_linux_tests.cpp000066400000000000000000000161521422164147700313460ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/variable_backup.h" #include "opencl/source/sharings/va/va_sharing_functions.h" #include "opencl/test/unit_test/sharings/va/mock_va_sharing.h" #include "gtest/gtest.h" #include #include using namespace NEO; class VASharingFunctionsTested : public VASharingFunctions { public: VASharingFunctionsTested() : VASharingFunctions(nullptr) {} bool wereFunctionsAssigned() const { return vaDisplayIsValidPFN != nullptr && vaDeriveImagePFN != nullptr && vaDestroyImagePFN != nullptr && vaSyncSurfacePFN != nullptr && vaGetLibFuncPFN != nullptr && vaExtGetSurfaceHandlePFN != nullptr; } bool wereFunctionsAssignedNull() const { return vaDisplayIsValidPFN == nullptr && vaDeriveImagePFN == nullptr && vaDestroyImagePFN == nullptr && vaSyncSurfacePFN == nullptr && vaGetLibFuncPFN == nullptr && vaExtGetSurfaceHandlePFN == nullptr; } }; TEST(VASharingFunctions, GivenInitFunctionsWhenDLOpenFailsThenFunctionsAreNull) { VariableBackup dlopenBackup(&VASharingFunctions::fdlopen); VariableBackup dlsymBackup(&VASharingFunctions::fdlsym); VariableBackup dlcloseBackup(&VASharingFunctions::fdlclose); VASharingFunctions::fdlopen = [&](const char *filename, int flag) -> void * { return nullptr; }; VASharingFunctions::fdlsym = [&](void *handle, const char *symbol) -> void * { return nullptr; }; VASharingFunctions::fdlclose = [&](void *handle) -> int { return 0; }; VASharingFunctionsTested functions; EXPECT_TRUE(functions.wereFunctionsAssignedNull()); } void *GetLibFunc(VADisplay vaDisplay, const char *func) { return reinterpret_cast(uintptr_t(0xdeadbeef)); } TEST(VASharingFunctions, GivenInitFunctionsWhenDLOpenSuccedsThenFunctionsAreNotNull) { VariableBackup dlopenBackup(&VASharingFunctions::fdlopen); VariableBackup dlsymBackup(&VASharingFunctions::fdlsym); VariableBackup dlcloseBackup(&VASharingFunctions::fdlclose); std::unique_ptr valib(new uint32_t); ASSERT_NE(nullptr, valib.get()); VASharingFunctions::fdlopen = [&](const char *filename, int flag) -> void * { return valib.get(); }; VASharingFunctions::fdlsym = [&](void *handle, const char *symbol) -> void * { return (void *)GetLibFunc; }; VASharingFunctions::fdlclose = [&](void *handle) -> int { return 0; }; VASharingFunctionsTested functions; EXPECT_TRUE(functions.wereFunctionsAssigned()); } TEST(VASharingFunctions, GivenInitFunctionsWhenEnableVaLibCallsThenFunctionsAreAssigned) { DebugManagerStateRestore restorer; VariableBackup dlopenBackup(&VASharingFunctions::fdlopen); VariableBackup dlsymBackup(&VASharingFunctions::fdlsym); VariableBackup dlcloseBackup(&VASharingFunctions::fdlclose); std::unique_ptr valib(new uint32_t); ASSERT_NE(nullptr, valib.get()); VASharingFunctions::fdlopen = [&](const char *filename, int flag) -> void * { return valib.get(); }; VASharingFunctions::fdlsym = [&](void *handle, const char *symbol) -> void * { return (void *)GetLibFunc; }; VASharingFunctions::fdlclose = [&](void *handle) -> int { return 0; }; EXPECT_EQ(-1, DebugManager.flags.EnableVaLibCalls.get()); VASharingFunctionsTested functionsWithDefaultVaLibCalls; EXPECT_TRUE(functionsWithDefaultVaLibCalls.wereFunctionsAssigned()); DebugManager.flags.EnableVaLibCalls.set(1); VASharingFunctionsTested functionsWithEnabledVaLibCalls; EXPECT_TRUE(functionsWithEnabledVaLibCalls.wereFunctionsAssigned()); } TEST(VASharingFunctions, GivenFunctionsWhenNoLibvaThenDlcloseNotCalled) { VariableBackup dlopenBackup(&VASharingFunctions::fdlopen); VariableBackup dlsymBackup(&VASharingFunctions::fdlsym); VariableBackup dlcloseBackup(&VASharingFunctions::fdlclose); uint32_t closeCalls = 0; VASharingFunctions::fdlopen = [&](const char *filename, int flag) -> void * { return nullptr; }; VASharingFunctions::fdlsym = [&](void *handle, const char *symbol) -> void * { return nullptr; }; VASharingFunctions::fdlclose = [&](void *handle) -> int { closeCalls++; return 0; }; { // we need this to properly track closeCalls VASharingFunctionsTested functions; } EXPECT_EQ(0u, closeCalls); } TEST(VASharingFunctions, GivenFunctionsWhenLibvaLoadedThenDlcloseIsCalled) { VariableBackup dlopenBackup(&VASharingFunctions::fdlopen); VariableBackup dlsymBackup(&VASharingFunctions::fdlsym); VariableBackup dlcloseBackup(&VASharingFunctions::fdlclose); std::unique_ptr valib(new uint32_t); ASSERT_NE(nullptr, valib.get()); uint32_t closeCalls = 0; VASharingFunctions::fdlopen = [&](const char *filename, int flag) -> void * { return valib.get(); }; VASharingFunctions::fdlsym = [&](void *handle, const char *symbol) -> void * { return nullptr; }; VASharingFunctions::fdlclose = [&](void *handle) -> int { if (handle == valib.get()) { closeCalls++; } return 0; }; { // we need this to properly track closeCalls VASharingFunctionsTested functions; } EXPECT_EQ(1u, closeCalls); } TEST(VASharingFunctions, givenEnabledExtendedVaFormatsWhenQueryingSupportedFormatsThenAllSupportedFormatsAreStored) { DebugManagerStateRestore restore; DebugManager.flags.EnableExtendedVaFormats.set(true); VASharingFunctionsMock sharingFunctions; sharingFunctions.querySupportedVaImageFormats(VADisplay(1)); EXPECT_EQ(3u, sharingFunctions.supported2PlaneFormats.size()); EXPECT_EQ(1u, sharingFunctions.supported3PlaneFormats.size()); size_t allFormatsFound = 0; for (const auto &supported2PlaneFormat : sharingFunctions.supported2PlaneFormats) { if (supported2PlaneFormat.fourcc == VA_FOURCC_NV12 || supported2PlaneFormat.fourcc == VA_FOURCC_P010 || supported2PlaneFormat.fourcc == VA_FOURCC_P016) { allFormatsFound++; } } for (const auto &supported3PlaneFormat : sharingFunctions.supported3PlaneFormats) { if (supported3PlaneFormat.fourcc == VA_FOURCC_RGBP) { allFormatsFound++; } } EXPECT_EQ(4u, allFormatsFound); } compute-runtime-22.14.22890/opencl/test/unit_test/sharings/va/va_sharing_tests.cpp000066400000000000000000001725711422164147700301370ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/helpers/array_count.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/os_interface/linux/drm_neo.h" #include "shared/source/os_interface/os_interface.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/ult_hw_config.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/libult/create_command_stream.h" #include "shared/test/common/libult/linux/drm_mock.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/api/api.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/platform/platform.h" #include "opencl/source/sharings/va/cl_va_api.h" #include "opencl/source/sharings/va/va_device.h" #include "opencl/source/sharings/va/va_sharing.h" #include "opencl/source/sharings/va/va_surface.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/sharings/va/mock_va_sharing.h" #include "gtest/gtest.h" #include using namespace NEO; class VaSharingTests : public ::testing::Test, public PlatformFixture { public: void SetUp() override { rootDeviceIndex = context.getDevice(0)->getRootDeviceIndex(); PlatformFixture::SetUp(); vaSharing = new MockVaSharing; context.setSharingFunctions(&vaSharing->sharingFunctions); vaSharing->sharingFunctions.querySupportedVaImageFormats(VADisplay(1)); vaSharing->updateAcquiredHandle(sharingHandle); sharedImg = nullptr; sharedClMem = nullptr; } void TearDown() override { if (sharedImg) { delete sharedImg; } context.releaseSharingFunctions(SharingType::VA_SHARING); delete vaSharing; PlatformFixture::TearDown(); } void updateAcquiredHandle(unsigned int handle) { sharingHandle = handle; vaSharing->updateAcquiredHandle(sharingHandle); } void createMediaSurface(cl_uint plane = 0, cl_mem_flags flags = CL_MEM_READ_WRITE) { sharedClMem = clCreateFromVA_APIMediaSurfaceINTEL(&context, flags, &vaSurfaceId, plane, &errCode); ASSERT_NE(nullptr, sharedClMem); EXPECT_EQ(CL_SUCCESS, errCode); sharedImg = castToObject(sharedClMem); ASSERT_NE(sharedImg, nullptr); } uint32_t rootDeviceIndex = 0; Image *sharedImg = nullptr; cl_mem sharedClMem = nullptr; MockContext context{}; MockVaSharing *vaSharing = nullptr; VASurfaceID vaSurfaceId = 0u; VAImage vaImage = {}; cl_int errCode = -1; unsigned int sharingHandle = 1u; }; TEST(VaSharingTest, givenVASharingFunctionsObjectWhenFunctionsAreCalledThenCallsAreRedirectedToVaFunctionPointers) { unsigned int handle = 0u; VASurfaceID vaSurfaceId = 0u; VAImage vaImage = {}; VADRMPRIMESurfaceDescriptor vaDrmPrimeSurfaceDesc = {}; class VASharingFunctionsGlobalFunctionPointersMock : public VASharingFunctions { public: VASharingFunctionsGlobalFunctionPointersMock() : VASharingFunctions(nullptr) { initMembers(); } bool vaDisplayIsValidCalled = false; bool vaDeriveImageCalled = false; bool vaDestroyImageCalled = false; bool vaSyncSurfaceCalled = false; bool vaGetLibFuncCalled = false; bool vaExtGetSurfaceHandleCalled = false; bool vaExportSurfaceHandleCalled = false; bool vaQueryImageFormatsCalled = false; bool vaMaxNumImageFormatsCalled = false; void initMembers() { vaDisplayIsValidPFN = mockVaDisplayIsValid; vaDeriveImagePFN = mockVaDeriveImage; vaDestroyImagePFN = mockVaDestroyImage; vaSyncSurfacePFN = mockVaSyncSurface; vaGetLibFuncPFN = mockVaGetLibFunc; vaExtGetSurfaceHandlePFN = mockExtGetSurfaceHandle; vaExportSurfaceHandlePFN = mockExportSurfaceHandle; vaQueryImageFormatsPFN = mockVaQueryImageFormats; vaMaxNumImageFormatsPFN = mockVaMaxNumImageFormats; } static VASharingFunctionsGlobalFunctionPointersMock *getInstance(bool release) { static VASharingFunctionsGlobalFunctionPointersMock *vaSharingFunctions = nullptr; if (!vaSharingFunctions) { vaSharingFunctions = new VASharingFunctionsGlobalFunctionPointersMock; } else if (release) { delete vaSharingFunctions; vaSharingFunctions = nullptr; } return vaSharingFunctions; } static int mockVaDisplayIsValid(VADisplay vaDisplay) { getInstance(false)->vaDisplayIsValidCalled = true; return 1; }; static VAStatus mockVaDeriveImage(VADisplay vaDisplay, VASurfaceID vaSurface, VAImage *vaImage) { getInstance(false)->vaDeriveImageCalled = true; return VA_STATUS_SUCCESS; }; static VAStatus mockVaDestroyImage(VADisplay vaDisplay, VAImageID vaImageId) { getInstance(false)->vaDestroyImageCalled = true; return VA_STATUS_SUCCESS; }; static VAStatus mockVaSyncSurface(VADisplay vaDisplay, VASurfaceID vaSurface) { getInstance(false)->vaSyncSurfaceCalled = true; return VA_STATUS_SUCCESS; }; static void *mockVaGetLibFunc(VADisplay vaDisplay, const char *func) { getInstance(false)->vaGetLibFuncCalled = true; return nullptr; }; static VAStatus mockExtGetSurfaceHandle(VADisplay vaDisplay, VASurfaceID *vaSurface, unsigned int *handleId) { getInstance(false)->vaExtGetSurfaceHandleCalled = true; return VA_STATUS_SUCCESS; }; static VAStatus mockExportSurfaceHandle(VADisplay vaDisplay, VASurfaceID vaSurface, uint32_t memType, uint32_t flags, void *descriptor) { getInstance(false)->vaExportSurfaceHandleCalled = true; return VA_STATUS_SUCCESS; }; static VAStatus mockVaQueryImageFormats(VADisplay vaDisplay, VAImageFormat *formatList, int *numFormats) { getInstance(false)->vaQueryImageFormatsCalled = true; return VA_STATUS_SUCCESS; }; static int mockVaMaxNumImageFormats(VADisplay vaDisplay) { getInstance(false)->vaMaxNumImageFormatsCalled = true; return 0; }; }; auto vaSharingFunctions = VASharingFunctionsGlobalFunctionPointersMock::getInstance(false); EXPECT_TRUE(vaSharingFunctions->isValidVaDisplay()); EXPECT_EQ(0, vaSharingFunctions->deriveImage(vaSurfaceId, &vaImage)); EXPECT_EQ(0, vaSharingFunctions->destroyImage(vaImage.image_id)); EXPECT_EQ(0, vaSharingFunctions->syncSurface(vaSurfaceId)); EXPECT_TRUE(nullptr == vaSharingFunctions->getLibFunc("funcName")); EXPECT_EQ(0, vaSharingFunctions->extGetSurfaceHandle(&vaSurfaceId, &handle)); EXPECT_EQ(0, vaSharingFunctions->exportSurfaceHandle(vaSurfaceId, VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2, 0, &vaDrmPrimeSurfaceDesc)); int numFormats = 0; EXPECT_EQ(0, vaSharingFunctions->queryImageFormats(VADisplay(1), nullptr, &numFormats)); EXPECT_EQ(0, vaSharingFunctions->maxNumImageFormats(VADisplay(1))); EXPECT_EQ(0u, handle); EXPECT_TRUE(VASharingFunctionsGlobalFunctionPointersMock::getInstance(false)->vaDisplayIsValidCalled); EXPECT_TRUE(VASharingFunctionsGlobalFunctionPointersMock::getInstance(false)->vaDeriveImageCalled); EXPECT_TRUE(VASharingFunctionsGlobalFunctionPointersMock::getInstance(false)->vaDestroyImageCalled); EXPECT_TRUE(VASharingFunctionsGlobalFunctionPointersMock::getInstance(false)->vaSyncSurfaceCalled); EXPECT_TRUE(VASharingFunctionsGlobalFunctionPointersMock::getInstance(false)->vaGetLibFuncCalled); EXPECT_TRUE(VASharingFunctionsGlobalFunctionPointersMock::getInstance(false)->vaExtGetSurfaceHandleCalled); EXPECT_TRUE(VASharingFunctionsGlobalFunctionPointersMock::getInstance(false)->vaExportSurfaceHandleCalled); EXPECT_TRUE(VASharingFunctionsGlobalFunctionPointersMock::getInstance(false)->vaQueryImageFormatsCalled); EXPECT_TRUE(VASharingFunctionsGlobalFunctionPointersMock::getInstance(false)->vaMaxNumImageFormatsCalled); VASharingFunctionsGlobalFunctionPointersMock::getInstance(true); } TEST_F(VaSharingTests, givenMockVaWithExportSurfaceHandlerWhenVaSurfaceIsCreatedThenCallHandlerWithDrmPrime2ToGetSurfaceFormatsInDescriptor) { vaSharing->sharingFunctions.haveExportSurfaceHandle = true; for (int plane = 0; plane < 2; plane++) { auto vaSurface = std::unique_ptr(VASurface::createSharedVaSurface( &context, &vaSharing->sharingFunctions, CL_MEM_READ_WRITE, 0, &vaSurfaceId, plane, &errCode)); ASSERT_NE(nullptr, vaSurface); auto handler = vaSurface->peekSharingHandler(); ASSERT_NE(nullptr, handler); auto vaHandler = static_cast(handler); EXPECT_EQ(vaHandler->peekFunctionsHandler(), &vaSharing->sharingFunctions); auto &sharingFunctions = vaSharing->sharingFunctions; EXPECT_FALSE(sharingFunctions.deriveImageCalled); EXPECT_FALSE(sharingFunctions.destroyImageCalled); EXPECT_TRUE(sharingFunctions.exportSurfaceHandleCalled); EXPECT_EQ(static_cast(VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2), sharingFunctions.receivedSurfaceMemType); EXPECT_EQ(static_cast(VA_EXPORT_SURFACE_READ_WRITE | VA_EXPORT_SURFACE_SEPARATE_LAYERS), sharingFunctions.receivedSurfaceFlags); if (plane == 0) { EXPECT_EQ(256u, vaSurface->getImageDesc().image_width); EXPECT_EQ(256u, vaSurface->getImageDesc().image_height); } if (plane == 1) { EXPECT_EQ(128u, vaSurface->getImageDesc().image_width); EXPECT_EQ(128u, vaSurface->getImageDesc().image_height); SurfaceOffsets surfaceOffsets; vaSurface->getSurfaceOffsets(surfaceOffsets); auto vaSurfaceDesc = sharingFunctions.mockVaSurfaceDesc; EXPECT_EQ(vaSurfaceDesc.layers[1].offset[0], surfaceOffsets.offset); EXPECT_EQ(0u, surfaceOffsets.xOffset); EXPECT_EQ(0u, surfaceOffsets.yOffset); EXPECT_EQ(vaSurfaceDesc.layers[1].offset[0] / vaSurfaceDesc.layers[1].pitch[0], surfaceOffsets.yOffsetForUVplane); } EXPECT_TRUE(vaSurface->isTiledAllocation()); EXPECT_EQ(8u, vaSurface->getGraphicsAllocation(rootDeviceIndex)->peekSharedHandle()); } } TEST_F(VaSharingTests, givenMockVaWhenVaSurfaceIsCreatedThenMemObjectHasVaHandler) { auto vaSurface = VASurface::createSharedVaSurface(&context, &vaSharing->sharingFunctions, CL_MEM_READ_WRITE, 0, &vaSurfaceId, 0, &errCode); EXPECT_NE(nullptr, vaSurface); auto graphicsAllocation = vaSurface->getGraphicsAllocation(rootDeviceIndex); EXPECT_NE(nullptr, graphicsAllocation); EXPECT_EQ(4096u, graphicsAllocation->getUnderlyingBufferSize()); EXPECT_EQ(1u, graphicsAllocation->peekSharedHandle()); EXPECT_EQ(4096u, vaSurface->getSize()); auto handler = vaSurface->peekSharingHandler(); ASSERT_NE(nullptr, handler); auto vaHandler = static_cast(handler); EXPECT_EQ(vaHandler->peekFunctionsHandler(), &vaSharing->sharingFunctions); EXPECT_EQ(1u, vaSharing->sharingFunctions.acquiredVaHandle); EXPECT_TRUE(vaSharing->sharingFunctions.deriveImageCalled); EXPECT_TRUE(vaSharing->sharingFunctions.destroyImageCalled); EXPECT_TRUE(vaSharing->sharingFunctions.extGetSurfaceHandleCalled); size_t paramSize = 0; void *paramValue = nullptr; handler->getMemObjectInfo(paramSize, paramValue); EXPECT_EQ(sizeof(VASurfaceID *), paramSize); VASurfaceID **paramSurfaceId = reinterpret_cast(paramValue); EXPECT_EQ(vaSurfaceId, **paramSurfaceId); delete vaSurface; } TEST_F(VaSharingTests, givenInvalidPlaneWhenVaSurfaceIsCreatedAndNotRGBPThenUnrecoverableIsCalled) { EXPECT_THROW(VASurface::createSharedVaSurface(&context, &vaSharing->sharingFunctions, CL_MEM_READ_WRITE, 0, &vaSurfaceId, 2, &errCode), std::exception); } TEST_F(VaSharingTests, givenInvalidPlaneWhenVaSurfaceIsCreatedThenUnrecoverableIsCalled) { EXPECT_THROW(VASurface::createSharedVaSurface(&context, &vaSharing->sharingFunctions, CL_MEM_READ_WRITE, 0, &vaSurfaceId, 3, &errCode), std::exception); } TEST_F(VaSharingTests, givenInvalidPlaneInputWhenVaSurfaceIsCreatedThenInvalidValueErrorIsReturned) { sharedClMem = clCreateFromVA_APIMediaSurfaceINTEL(&context, CL_MEM_READ_WRITE, &vaSurfaceId, 2, &errCode); EXPECT_EQ(nullptr, sharedClMem); EXPECT_EQ(CL_INVALID_VALUE, errCode); } TEST_F(VaSharingTests, givenValidPlaneInputWhenVaSurfaceIsCreatedAndDebugFlagEnabledThenCLSuccessIsReturned) { DebugManagerStateRestore restore; DebugManager.flags.EnableExtendedVaFormats.set(true); vaSharing->sharingFunctions.mockVaSurfaceDesc.fourcc = VA_FOURCC_RGBP; vaSharing->sharingFunctions.mockVaSurfaceDesc.objects[1] = {8, 98304, I915_FORMAT_MOD_Y_TILED}; vaSharing->sharingFunctions.mockVaSurfaceDesc.objects[2] = {8, 98304, I915_FORMAT_MOD_Y_TILED}; vaSharing->sharingFunctions.mockVaSurfaceDesc.num_layers = 3; vaSharing->sharingFunctions.mockVaSurfaceDesc.layers[1] = {DRM_FORMAT_R8, 1, {}, {0, 0, 0, 0}, {256, 0, 0, 0}}; vaSharing->sharingFunctions.mockVaSurfaceDesc.layers[2] = {DRM_FORMAT_R8, 1, {}, {0, 0, 0, 0}, {256, 0, 0, 0}}; vaSharing->sharingFunctions.derivedImageFormatBpp = 8; vaSharing->sharingFunctions.derivedImageFormatFourCC = VA_FOURCC_RGBP; sharedClMem = clCreateFromVA_APIMediaSurfaceINTEL(&context, CL_MEM_READ_WRITE, &vaSurfaceId, 2, &errCode); EXPECT_NE(nullptr, sharedClMem); EXPECT_EQ(CL_SUCCESS, errCode); errCode = clReleaseMemObject(sharedClMem); EXPECT_EQ(CL_SUCCESS, errCode); } TEST_F(VaSharingTests, givenMockVaWhenVaSurfaceIsCreatedWithNotAlignedWidthAndHeightThenSurfaceOffsetsUseAlignedValues) { vaSharing->sharingFunctions.derivedImageWidth = 256 + 16; vaSharing->sharingFunctions.derivedImageHeight = 512 + 16; auto vaSurface = VASurface::createSharedVaSurface(&context, &vaSharing->sharingFunctions, CL_MEM_READ_WRITE, 0, &vaSurfaceId, 1, &errCode); EXPECT_NE(nullptr, vaSurface); auto graphicsAllocation = vaSurface->getGraphicsAllocation(rootDeviceIndex); EXPECT_NE(nullptr, graphicsAllocation); EXPECT_EQ(4096u, graphicsAllocation->getUnderlyingBufferSize()); EXPECT_EQ(1u, graphicsAllocation->peekSharedHandle()); EXPECT_EQ(4096u, vaSurface->getSize()); auto handler = vaSurface->peekSharingHandler(); ASSERT_NE(nullptr, handler); auto vaHandler = static_cast(handler); EXPECT_EQ(vaHandler->peekFunctionsHandler(), &vaSharing->sharingFunctions); EXPECT_EQ(1u, vaSharing->sharingFunctions.acquiredVaHandle); EXPECT_TRUE(vaSharing->sharingFunctions.deriveImageCalled); EXPECT_TRUE(vaSharing->sharingFunctions.destroyImageCalled); EXPECT_TRUE(vaSharing->sharingFunctions.extGetSurfaceHandleCalled); SurfaceOffsets surfaceOffsets; uint16_t alignedWidth = alignUp(vaSharing->sharingFunctions.derivedImageWidth, 128); uint16_t alignedHeight = alignUp(vaSharing->sharingFunctions.derivedImageHeight, 32); uint64_t alignedOffset = alignedWidth * alignedHeight; vaSurface->getSurfaceOffsets(surfaceOffsets); EXPECT_EQ(alignedHeight, surfaceOffsets.yOffsetForUVplane); EXPECT_EQ(alignedOffset, surfaceOffsets.offset); EXPECT_EQ(0u, surfaceOffsets.yOffset); EXPECT_EQ(0u, surfaceOffsets.xOffset); delete vaSurface; } TEST_F(VaSharingTests, givenContextWhenClCreateFromVaApiMediaSurfaceIsCalledThenSurfaceIsReturned) { sharedClMem = clCreateFromVA_APIMediaSurfaceINTEL(&context, CL_MEM_READ_WRITE, &vaSurfaceId, 0, &errCode); ASSERT_EQ(CL_SUCCESS, errCode); ASSERT_NE(nullptr, sharedClMem); errCode = clReleaseMemObject(sharedClMem); EXPECT_EQ(CL_SUCCESS, errCode); } TEST_F(VaSharingTests, givenVASurfaceWhenItIsAcquiredTwiceThenAcquireIsNotCalled) { createMediaSurface(); sharedImg->peekSharingHandler()->acquire(sharedImg, context.getDevice(0)->getRootDeviceIndex()); EXPECT_TRUE(vaSharing->sharingFunctions.extGetSurfaceHandleCalled); vaSharing->sharingFunctions.extGetSurfaceHandleCalled = false; sharedImg->peekSharingHandler()->acquire(sharedImg, context.getDevice(0)->getRootDeviceIndex()); EXPECT_FALSE(vaSharing->sharingFunctions.extGetSurfaceHandleCalled); } TEST_F(VaSharingTests, givenHwCommandQueueWhenEnqueueAcquireIsCalledMultipleTimesThenSharingFunctionAcquireIsNotCalledMultipleTimes) { auto commandQueue = clCreateCommandQueue(&context, context.getDevice(0), 0, &errCode); ASSERT_EQ(CL_SUCCESS, errCode); createMediaSurface(); EXPECT_TRUE(vaSharing->sharingFunctions.extGetSurfaceHandleCalled); vaSharing->sharingFunctions.extGetSurfaceHandleCalled = false; errCode = clEnqueueAcquireVA_APIMediaSurfacesINTEL(commandQueue, 1, &sharedClMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, errCode); EXPECT_FALSE(vaSharing->sharingFunctions.extGetSurfaceHandleCalled); errCode = clEnqueueReleaseVA_APIMediaSurfacesINTEL(commandQueue, 1, &sharedClMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, errCode); errCode = clEnqueueAcquireVA_APIMediaSurfacesINTEL(commandQueue, 1, &sharedClMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, errCode); EXPECT_FALSE(vaSharing->sharingFunctions.extGetSurfaceHandleCalled); errCode = clReleaseCommandQueue(commandQueue); EXPECT_EQ(CL_SUCCESS, errCode); } TEST_F(VaSharingTests, givenHwCommandQueueWhenAcquireAndReleaseCallsAreMadeWithEventsThenProperCmdTypeIsReturned) { cl_event retEvent = nullptr; cl_command_type cmdType = 0; size_t sizeReturned = 0; createMediaSurface(); auto commandQueue = clCreateCommandQueue(&context, context.getDevice(0), 0, &errCode); errCode = clEnqueueAcquireVA_APIMediaSurfacesINTEL(commandQueue, 1, &sharedClMem, 0, nullptr, &retEvent); EXPECT_EQ(CL_SUCCESS, errCode); ASSERT_NE(retEvent, nullptr); errCode = clGetEventInfo(retEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, errCode); EXPECT_EQ(static_cast(CL_COMMAND_ACQUIRE_VA_API_MEDIA_SURFACES_INTEL), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); errCode = clReleaseEvent(retEvent); EXPECT_EQ(CL_SUCCESS, errCode); errCode = clEnqueueReleaseVA_APIMediaSurfacesINTEL(commandQueue, 1, &sharedClMem, 0, nullptr, &retEvent); EXPECT_EQ(CL_SUCCESS, errCode); errCode = clGetEventInfo(retEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, errCode); EXPECT_EQ(static_cast(CL_COMMAND_RELEASE_VA_API_MEDIA_SURFACES_INTEL), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); errCode = clReleaseEvent(retEvent); EXPECT_EQ(CL_SUCCESS, errCode); errCode = clReleaseCommandQueue(commandQueue); EXPECT_EQ(CL_SUCCESS, errCode); } TEST_F(VaSharingTests, givenVaMediaSurfaceWhenGetMemObjectInfoIsCalledThenSurfaceIdIsReturned) { vaSurfaceId = 1u; createMediaSurface(); VASurfaceID *retVaSurfaceId = nullptr; size_t retSize = 0; vaSurfaceId = 0; errCode = clGetMemObjectInfo(sharedClMem, CL_MEM_VA_API_MEDIA_SURFACE_INTEL, sizeof(VASurfaceID *), &retVaSurfaceId, &retSize); EXPECT_EQ(CL_SUCCESS, errCode); EXPECT_EQ(sizeof(VASurfaceID *), retSize); EXPECT_EQ(1u, *retVaSurfaceId); } TEST_F(VaSharingTests, givenVaMediaSurfaceWhenGetImageInfoIsCalledThenPlaneIsReturned) { cl_uint plane = 1u; createMediaSurface(plane); cl_uint retPlane = 0u; size_t retSize = 0; errCode = clGetImageInfo(sharedClMem, CL_IMAGE_VA_API_PLANE_INTEL, sizeof(cl_uint), &retPlane, &retSize); EXPECT_EQ(CL_SUCCESS, errCode); EXPECT_EQ(sizeof(cl_uint), retSize); EXPECT_EQ(plane, retPlane); } TEST_F(VaSharingTests, givenPlaneWhenCreateSurfaceIsCalledThenSetPlaneFields) { cl_uint planes[2] = {0, 1}; updateAcquiredHandle(2); for (int i = 0; i < 2; i++) { createMediaSurface(planes[i]); EXPECT_TRUE(sharedImg->getSurfaceFormatInfo().OCLImageFormat.image_channel_data_type == CL_UNORM_INT8); EXPECT_EQ(planes[i], sharedImg->getMediaPlaneType()); if (planes[i] == 0u) { EXPECT_TRUE(sharedImg->getSurfaceFormatInfo().OCLImageFormat.image_channel_order == CL_R); } else if (planes[i] == 1) { EXPECT_TRUE(sharedImg->getSurfaceFormatInfo().OCLImageFormat.image_channel_order == CL_RG); } delete sharedImg; sharedImg = nullptr; } } TEST_F(VaSharingTests, givenSimpleParamsWhenCreateSurfaceIsCalledThenSetImgObject) { updateAcquiredHandle(2); createMediaSurface(0u); EXPECT_TRUE(sharedImg->getImageDesc().buffer == nullptr); EXPECT_EQ(0u, sharedImg->getImageDesc().image_array_size); EXPECT_EQ(0u, sharedImg->getImageDesc().image_depth); EXPECT_EQ(vaSharing->sharingFunctions.mockVaImage.height, static_cast(sharedImg->getImageDesc().image_height)); EXPECT_EQ(vaSharing->sharingFunctions.mockVaImage.width, static_cast(sharedImg->getImageDesc().image_width)); EXPECT_TRUE(CL_MEM_OBJECT_IMAGE2D == sharedImg->getImageDesc().image_type); EXPECT_EQ(0u, sharedImg->getImageDesc().image_slice_pitch); EXPECT_NE(0u, sharedImg->getImageDesc().image_row_pitch); EXPECT_EQ(0u, sharedImg->getHostPtrSlicePitch()); EXPECT_NE(0u, sharedImg->getHostPtrRowPitch()); EXPECT_TRUE(sharedImg->getFlags() == CL_MEM_READ_WRITE); EXPECT_TRUE(sharedImg->getCubeFaceIndex() == __GMM_NO_CUBE_MAP); EXPECT_EQ(vaSharing->sharingHandle, sharedImg->getGraphicsAllocation(rootDeviceIndex)->peekSharedHandle()); } TEST_F(VaSharingTests, givenNonInteropUserSyncContextWhenAcquireIsCalledThenSyncSurface) { context.setInteropUserSyncEnabled(false); vaSurfaceId = 1u; createMediaSurface(); vaSurfaceId = 0u; auto memObj = castToObject(sharedClMem); EXPECT_FALSE(vaSharing->sharingFunctions.syncSurfaceCalled); auto ret = memObj->peekSharingHandler()->acquire(sharedImg, context.getDevice(0)->getRootDeviceIndex()); EXPECT_TRUE(vaSharing->sharingFunctions.syncSurfaceCalled); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_EQ(1u, vaSharing->sharingFunctions.syncedSurfaceID); } TEST_F(VaSharingTests, givenInteropUserSyncContextWhenAcquireIsCalledThenDontSyncSurface) { context.setInteropUserSyncEnabled(true); createMediaSurface(); EXPECT_FALSE(vaSharing->sharingFunctions.syncSurfaceCalled); sharedImg->peekSharingHandler()->acquire(sharedImg, context.getDevice(0)->getRootDeviceIndex()); EXPECT_FALSE(vaSharing->sharingFunctions.syncSurfaceCalled); } TEST_F(VaSharingTests, whenSyncSurfaceFailedThenReturnOutOfResource) { vaSharing->sharingFunctions.syncSurfaceReturnStatus = VA_STATUS_ERROR_INVALID_SURFACE; createMediaSurface(); auto ret = sharedImg->peekSharingHandler()->acquire(sharedImg, context.getDevice(0)->getRootDeviceIndex()); EXPECT_EQ(CL_OUT_OF_RESOURCES, ret); } TEST_F(VaSharingTests, givenYuvPlaneWhenCreateIsCalledThenChangeWidthAndHeight) { cl_uint planeTypes[] = { 0, //Y 1 //U }; context.setInteropUserSyncEnabled(true); for (int i = 0; i < 2; i++) { createMediaSurface(planeTypes[i]); size_t retParam; errCode = clGetImageInfo(sharedClMem, CL_IMAGE_WIDTH, sizeof(size_t), &retParam, nullptr); EXPECT_EQ(CL_SUCCESS, errCode); if (planeTypes[i] == 1) { EXPECT_EQ(128u, retParam); } else { EXPECT_EQ(256u, retParam); } errCode = clGetImageInfo(sharedClMem, CL_IMAGE_HEIGHT, sizeof(size_t), &retParam, nullptr); EXPECT_EQ(CL_SUCCESS, errCode); if (planeTypes[i] == 1) { EXPECT_EQ(128u, retParam); } else { EXPECT_EQ(256u, retParam); } delete sharedImg; sharedImg = nullptr; } } TEST_F(VaSharingTests, givenContextWhenSharingTableEmptyThenReturnsNullptr) { MockContext context; context.clearSharingFunctions(); VASharingFunctions *sharingF = context.getSharing(); EXPECT_EQ(sharingF, nullptr); } TEST_F(VaSharingTests, givenInValidPlatformWhenGetDeviceIdsFromVaApiMediaAdapterCalledThenReturnFirstDevice) { cl_device_id devices = 0; cl_uint numDevices = 0; auto errCode = clGetDeviceIDsFromVA_APIMediaAdapterINTEL(nullptr, 0u, nullptr, 0u, 1, &devices, &numDevices); EXPECT_EQ(CL_INVALID_PLATFORM, errCode); EXPECT_EQ(0u, numDevices); EXPECT_EQ(0u, devices); } TEST_F(VaSharingTests, givenP010FormatWhenCreatingSharedVaSurfaceForPlane0ThenCorrectFormatIsUsedByImageAndGMM) { vaSharing->sharingFunctions.derivedImageFormatBpp = 16; vaSharing->sharingFunctions.derivedImageFormatFourCC = VA_FOURCC_P010; auto vaSurface = std::unique_ptr(VASurface::createSharedVaSurface(&context, &vaSharing->sharingFunctions, CL_MEM_READ_WRITE, 0, &vaSurfaceId, 0, &errCode)); auto graphicsAllocation = vaSurface->getGraphicsAllocation(rootDeviceIndex); EXPECT_EQ(static_cast(CL_UNORM_INT16), vaSurface->getImageFormat().image_channel_data_type); EXPECT_EQ(static_cast(CL_R), vaSurface->getImageFormat().image_channel_order); EXPECT_EQ(GMM_RESOURCE_FORMAT::GMM_FORMAT_R16_UNORM, vaSurface->getSurfaceFormatInfo().surfaceFormat.GMMSurfaceFormat); EXPECT_EQ(GMM_RESOURCE_FORMAT::GMM_FORMAT_P010, graphicsAllocation->getDefaultGmm()->resourceParams.Format); EXPECT_EQ(CL_SUCCESS, errCode); } TEST_F(VaSharingTests, givenP010FormatWhenCreatingSharedVaSurfaceForPlane1ThenCorrectFormatIsUsedByImageAndGMM) { vaSharing->sharingFunctions.derivedImageFormatBpp = 16; vaSharing->sharingFunctions.derivedImageFormatFourCC = VA_FOURCC_P010; auto vaSurface = std::unique_ptr(VASurface::createSharedVaSurface(&context, &vaSharing->sharingFunctions, CL_MEM_READ_WRITE, 0, &vaSurfaceId, 1, &errCode)); auto graphicsAllocation = vaSurface->getGraphicsAllocation(rootDeviceIndex); EXPECT_EQ(static_cast(CL_UNORM_INT16), vaSurface->getImageFormat().image_channel_data_type); EXPECT_EQ(static_cast(CL_RG), vaSurface->getImageFormat().image_channel_order); EXPECT_EQ(GMM_RESOURCE_FORMAT::GMM_FORMAT_R16G16_UNORM, vaSurface->getSurfaceFormatInfo().surfaceFormat.GMMSurfaceFormat); EXPECT_EQ(GMM_RESOURCE_FORMAT::GMM_FORMAT_P010, graphicsAllocation->getDefaultGmm()->resourceParams.Format); EXPECT_EQ(CL_SUCCESS, errCode); } TEST_F(VaSharingTests, givenP016FormatWhenCreatingSharedVaSurfaceForPlane0ThenCorrectFormatIsUsedByImageAndGMM) { vaSharing->sharingFunctions.derivedImageFormatBpp = 16; vaSharing->sharingFunctions.derivedImageFormatFourCC = VA_FOURCC_P016; auto vaSurface = std::unique_ptr(VASurface::createSharedVaSurface(&context, &vaSharing->sharingFunctions, CL_MEM_READ_WRITE, 0, &vaSurfaceId, 0, &errCode)); auto graphicsAllocation = vaSurface->getGraphicsAllocation(rootDeviceIndex); EXPECT_EQ(static_cast(CL_UNORM_INT16), vaSurface->getImageFormat().image_channel_data_type); EXPECT_EQ(static_cast(CL_R), vaSurface->getImageFormat().image_channel_order); EXPECT_EQ(GMM_RESOURCE_FORMAT::GMM_FORMAT_R16_UNORM, vaSurface->getSurfaceFormatInfo().surfaceFormat.GMMSurfaceFormat); EXPECT_EQ(GMM_RESOURCE_FORMAT::GMM_FORMAT_P016, graphicsAllocation->getDefaultGmm()->resourceParams.Format); EXPECT_EQ(CL_SUCCESS, errCode); } TEST_F(VaSharingTests, givenP016FormatWhenCreatingSharedVaSurfaceForPlane1ThenCorrectFormatIsUsedByImageAndGMM) { vaSharing->sharingFunctions.derivedImageFormatBpp = 16; vaSharing->sharingFunctions.derivedImageFormatFourCC = VA_FOURCC_P016; auto vaSurface = std::unique_ptr(VASurface::createSharedVaSurface(&context, &vaSharing->sharingFunctions, CL_MEM_READ_WRITE, 0, &vaSurfaceId, 1, &errCode)); auto graphicsAllocation = vaSurface->getGraphicsAllocation(rootDeviceIndex); EXPECT_EQ(static_cast(CL_UNORM_INT16), vaSurface->getImageFormat().image_channel_data_type); EXPECT_EQ(static_cast(CL_RG), vaSurface->getImageFormat().image_channel_order); EXPECT_EQ(GMM_RESOURCE_FORMAT::GMM_FORMAT_R16G16_UNORM, vaSurface->getSurfaceFormatInfo().surfaceFormat.GMMSurfaceFormat); EXPECT_EQ(GMM_RESOURCE_FORMAT::GMM_FORMAT_P016, graphicsAllocation->getDefaultGmm()->resourceParams.Format); EXPECT_EQ(CL_SUCCESS, errCode); } TEST_F(VaSharingTests, givenEnabledExtendedVaFormatsAndRGBPFormatWhenCreatingSharedVaSurfaceForPlane0ThenCorrectFormatIsUsedByImageAndGMM) { DebugManagerStateRestore restore; DebugManager.flags.EnableExtendedVaFormats.set(true); vaSharing->sharingFunctions.mockVaSurfaceDesc.fourcc = VA_FOURCC_RGBP; vaSharing->sharingFunctions.mockVaSurfaceDesc.objects[1] = {8, 98304, I915_FORMAT_MOD_Y_TILED}; vaSharing->sharingFunctions.mockVaSurfaceDesc.objects[2] = {8, 98304, I915_FORMAT_MOD_Y_TILED}; vaSharing->sharingFunctions.mockVaSurfaceDesc.num_layers = 3; vaSharing->sharingFunctions.mockVaSurfaceDesc.layers[1] = {DRM_FORMAT_R8, 1, {}, {0, 0, 0, 0}, {256, 0, 0, 0}}; vaSharing->sharingFunctions.mockVaSurfaceDesc.layers[2] = {DRM_FORMAT_R8, 1, {}, {0, 0, 0, 0}, {256, 0, 0, 0}}; vaSharing->sharingFunctions.derivedImageFormatBpp = 8; vaSharing->sharingFunctions.derivedImageFormatFourCC = VA_FOURCC_RGBP; auto vaSurface = std::unique_ptr(VASurface::createSharedVaSurface(&context, &vaSharing->sharingFunctions, CL_MEM_READ_WRITE, 0, &vaSurfaceId, 0, &errCode)); auto graphicsAllocation = vaSurface->getGraphicsAllocation(rootDeviceIndex); EXPECT_EQ(static_cast(CL_UNORM_INT8), vaSurface->getImageFormat().image_channel_data_type); EXPECT_EQ(static_cast(CL_R), vaSurface->getImageFormat().image_channel_order); EXPECT_EQ(GMM_RESOURCE_FORMAT::GMM_FORMAT_R8_UNORM, vaSurface->getSurfaceFormatInfo().surfaceFormat.GMMSurfaceFormat); EXPECT_EQ(GMM_RESOURCE_FORMAT::GMM_FORMAT_RGBP, graphicsAllocation->getDefaultGmm()->resourceParams.Format); EXPECT_EQ(CL_SUCCESS, errCode); } TEST_F(VaSharingTests, givenEnabledExtendedVaFormatsAndRGBPFormatWhenCreatingSharedVaSurfaceForPlane1ThenCorrectFormatIsUsedByImageAndGMM) { DebugManagerStateRestore restore; DebugManager.flags.EnableExtendedVaFormats.set(true); vaSharing->sharingFunctions.mockVaSurfaceDesc.fourcc = VA_FOURCC_RGBP; vaSharing->sharingFunctions.mockVaSurfaceDesc.objects[1] = {8, 98304, I915_FORMAT_MOD_Y_TILED}; vaSharing->sharingFunctions.mockVaSurfaceDesc.objects[2] = {8, 98304, I915_FORMAT_MOD_Y_TILED}; vaSharing->sharingFunctions.mockVaSurfaceDesc.num_layers = 3; vaSharing->sharingFunctions.mockVaSurfaceDesc.layers[1] = {DRM_FORMAT_R8, 1, {}, {0, 0, 0, 0}, {256, 0, 0, 0}}; vaSharing->sharingFunctions.mockVaSurfaceDesc.layers[2] = {DRM_FORMAT_R8, 1, {}, {0, 0, 0, 0}, {256, 0, 0, 0}}; vaSharing->sharingFunctions.derivedImageFormatBpp = 8; vaSharing->sharingFunctions.derivedImageFormatFourCC = VA_FOURCC_RGBP; auto vaSurface = std::unique_ptr(VASurface::createSharedVaSurface(&context, &vaSharing->sharingFunctions, CL_MEM_READ_WRITE, 0, &vaSurfaceId, 1, &errCode)); auto graphicsAllocation = vaSurface->getGraphicsAllocation(rootDeviceIndex); EXPECT_EQ(static_cast(CL_UNORM_INT8), vaSurface->getImageFormat().image_channel_data_type); EXPECT_EQ(static_cast(CL_R), vaSurface->getImageFormat().image_channel_order); EXPECT_EQ(GMM_RESOURCE_FORMAT::GMM_FORMAT_R8_UNORM, vaSurface->getSurfaceFormatInfo().surfaceFormat.GMMSurfaceFormat); EXPECT_EQ(GMM_RESOURCE_FORMAT::GMM_FORMAT_RGBP, graphicsAllocation->getDefaultGmm()->resourceParams.Format); EXPECT_EQ(CL_SUCCESS, errCode); } TEST_F(VaSharingTests, givenEnabledExtendedVaFormatsAndRGBPFormatWhenCreatingSharedVaSurfaceForPlane2ThenCorrectFormatIsUsedByImageAndGMM) { DebugManagerStateRestore restore; DebugManager.flags.EnableExtendedVaFormats.set(true); vaSharing->sharingFunctions.mockVaSurfaceDesc.fourcc = VA_FOURCC_RGBP; vaSharing->sharingFunctions.mockVaSurfaceDesc.objects[1] = {8, 98304, I915_FORMAT_MOD_Y_TILED}; vaSharing->sharingFunctions.mockVaSurfaceDesc.objects[2] = {8, 98304, I915_FORMAT_MOD_Y_TILED}; vaSharing->sharingFunctions.mockVaSurfaceDesc.num_layers = 3; vaSharing->sharingFunctions.mockVaSurfaceDesc.layers[1] = {DRM_FORMAT_R8, 1, {}, {0, 0, 0, 0}, {256, 0, 0, 0}}; vaSharing->sharingFunctions.mockVaSurfaceDesc.layers[2] = {DRM_FORMAT_R8, 1, {}, {0, 0, 0, 0}, {256, 0, 0, 0}}; vaSharing->sharingFunctions.derivedImageFormatBpp = 8; vaSharing->sharingFunctions.derivedImageFormatFourCC = VA_FOURCC_RGBP; auto vaSurface = std::unique_ptr(VASurface::createSharedVaSurface(&context, &vaSharing->sharingFunctions, CL_MEM_READ_WRITE, 0, &vaSurfaceId, 2, &errCode)); auto graphicsAllocation = vaSurface->getGraphicsAllocation(rootDeviceIndex); EXPECT_EQ(static_cast(CL_UNORM_INT8), vaSurface->getImageFormat().image_channel_data_type); EXPECT_EQ(static_cast(CL_R), vaSurface->getImageFormat().image_channel_order); EXPECT_EQ(GMM_RESOURCE_FORMAT::GMM_FORMAT_R8_UNORM, vaSurface->getSurfaceFormatInfo().surfaceFormat.GMMSurfaceFormat); EXPECT_EQ(GMM_RESOURCE_FORMAT::GMM_FORMAT_RGBP, graphicsAllocation->getDefaultGmm()->resourceParams.Format); EXPECT_EQ(CL_SUCCESS, errCode); } TEST_F(VaSharingTests, givenMockVaWithExportSurfaceHandlerAndRGBPWhenVaSurfaceIsCreatedThenCallHandlerWithDrmPrime2ToGetSurfaceFormatsInDescriptor) { DebugManagerStateRestore restore; DebugManager.flags.EnableExtendedVaFormats.set(true); vaSharing->sharingFunctions.haveExportSurfaceHandle = true; vaSharing->sharingFunctions.mockVaSurfaceDesc.fourcc = VA_FOURCC_RGBP; vaSharing->sharingFunctions.mockVaSurfaceDesc.objects[1] = {8, 98304, I915_FORMAT_MOD_Y_TILED}; vaSharing->sharingFunctions.mockVaSurfaceDesc.objects[2] = {8, 98304, I915_FORMAT_MOD_Y_TILED}; vaSharing->sharingFunctions.mockVaSurfaceDesc.num_layers = 3; vaSharing->sharingFunctions.mockVaSurfaceDesc.layers[1] = {DRM_FORMAT_R8, 1, {}, {0, 0, 0, 0}, {256, 0, 0, 0}}; vaSharing->sharingFunctions.mockVaSurfaceDesc.layers[2] = {DRM_FORMAT_R8, 1, {}, {0, 0, 0, 0}, {256, 0, 0, 0}}; vaSharing->sharingFunctions.derivedImageFormatBpp = 8; vaSharing->sharingFunctions.derivedImageFormatFourCC = VA_FOURCC_RGBP; for (int plane = 0; plane < 3; plane++) { auto vaSurface = std::unique_ptr(VASurface::createSharedVaSurface( &context, &vaSharing->sharingFunctions, CL_MEM_READ_WRITE, 0, &vaSurfaceId, plane, &errCode)); ASSERT_NE(nullptr, vaSurface); auto graphicsAllocation = vaSurface->getGraphicsAllocation(rootDeviceIndex); auto handler = vaSurface->peekSharingHandler(); ASSERT_NE(nullptr, handler); auto vaHandler = static_cast(handler); EXPECT_EQ(vaHandler->peekFunctionsHandler(), &vaSharing->sharingFunctions); auto &sharingFunctions = vaSharing->sharingFunctions; EXPECT_FALSE(sharingFunctions.deriveImageCalled); EXPECT_FALSE(sharingFunctions.destroyImageCalled); EXPECT_TRUE(sharingFunctions.exportSurfaceHandleCalled); EXPECT_EQ(static_cast(VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2), sharingFunctions.receivedSurfaceMemType); EXPECT_EQ(static_cast(VA_EXPORT_SURFACE_READ_WRITE | VA_EXPORT_SURFACE_SEPARATE_LAYERS), sharingFunctions.receivedSurfaceFlags); EXPECT_EQ(256u, vaSurface->getImageDesc().image_width); EXPECT_EQ(256u, vaSurface->getImageDesc().image_height); if (plane != 0) { SurfaceOffsets surfaceOffsets; vaSurface->getSurfaceOffsets(surfaceOffsets); auto vaSurfaceDesc = sharingFunctions.mockVaSurfaceDesc; EXPECT_EQ(vaSurfaceDesc.layers[plane].offset[0], surfaceOffsets.offset); EXPECT_EQ(0u, surfaceOffsets.xOffset); EXPECT_EQ(0u, surfaceOffsets.yOffset); EXPECT_EQ(vaSurfaceDesc.layers[plane].offset[0] / vaSurfaceDesc.layers[plane].pitch[0], surfaceOffsets.yOffsetForUVplane); } EXPECT_TRUE(vaSurface->isTiledAllocation()); EXPECT_EQ(8u, graphicsAllocation->peekSharedHandle()); } } TEST_F(VaSharingTests, givenEnabledExtendedVaFormatsAndNV12FormatWhenCreatingSharedVaSurfaceForPlane0ThenCorrectFormatIsUsedByImageAndGMM) { DebugManagerStateRestore restore; DebugManager.flags.EnableExtendedVaFormats.set(true); vaSharing->sharingFunctions.derivedImageFormatBpp = 12; vaSharing->sharingFunctions.derivedImageFormatFourCC = VA_FOURCC_NV12; auto vaSurface = std::unique_ptr(VASurface::createSharedVaSurface(&context, &vaSharing->sharingFunctions, CL_MEM_READ_WRITE, 0, &vaSurfaceId, 0, &errCode)); auto graphicsAllocation = vaSurface->getGraphicsAllocation(rootDeviceIndex); EXPECT_EQ(static_cast(CL_UNORM_INT8), vaSurface->getImageFormat().image_channel_data_type); EXPECT_EQ(static_cast(CL_R), vaSurface->getImageFormat().image_channel_order); EXPECT_EQ(GMM_RESOURCE_FORMAT::GMM_FORMAT_R8_UNORM, vaSurface->getSurfaceFormatInfo().surfaceFormat.GMMSurfaceFormat); EXPECT_EQ(GMM_RESOURCE_FORMAT::GMM_FORMAT_NV12, graphicsAllocation->getDefaultGmm()->resourceParams.Format); EXPECT_EQ(CL_SUCCESS, errCode); } using ApiVaSharingTests = VaSharingTests; TEST_F(ApiVaSharingTests, givenSupportedImageTypeWhenGettingSupportedVAApiFormatsThenCorrectListIsReturned) { cl_mem_flags flags[] = {CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY, CL_MEM_READ_WRITE}; cl_mem_object_type image_type = CL_MEM_OBJECT_IMAGE2D; VAImageFormat vaApiFormats[10] = {}; cl_uint numImageFormats = 0; std::vector> supportedFormats; supportedFormats.push_back(std::make_unique(VAImageFormat{VA_FOURCC_NV12, VA_LSB_FIRST, 0, 0, 0, 0, 0, 0})); supportedFormats.push_back(std::make_unique(VAImageFormat{VA_FOURCC_P010, VA_LSB_FIRST, 0, 0, 0, 0, 0, 0})); supportedFormats.push_back(std::make_unique(VAImageFormat{VA_FOURCC_P016, VA_LSB_FIRST, 0, 0, 0, 0, 0, 0})); for (auto flag : flags) { for (auto plane : {0, 1}) { cl_int result = clGetSupportedVA_APIMediaSurfaceFormatsINTEL( &context, flag, image_type, plane, arrayCount(vaApiFormats), vaApiFormats, &numImageFormats); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(3u, numImageFormats); int i = 0; for (auto &format : supportedFormats) { EXPECT_EQ(format->fourcc, vaApiFormats[i++].fourcc); } } } } TEST_F(ApiVaSharingTests, givenZeroNumEntriesWhenGettingSupportedVAApiFormatsThenNumFormatsIsReturned) { cl_mem_flags flags = CL_MEM_READ_WRITE; cl_mem_object_type image_type = CL_MEM_OBJECT_IMAGE2D; cl_uint numImageFormats = 0; for (auto plane : {0, 1}) { cl_int result = clGetSupportedVA_APIMediaSurfaceFormatsINTEL( &context, flags, image_type, plane, 0, nullptr, &numImageFormats); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(3u, numImageFormats); } } TEST_F(ApiVaSharingTests, givenNullNumImageFormatsWhenGettingSupportedVAApiFormatsThenNumFormatsIsNotDereferenced) { cl_mem_flags flags = CL_MEM_READ_WRITE; cl_mem_object_type image_type = CL_MEM_OBJECT_IMAGE2D; cl_int result = clGetSupportedVA_APIMediaSurfaceFormatsINTEL( &context, flags, image_type, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, result); } TEST_F(ApiVaSharingTests, givenOtherThanImage2DImageTypeWhenGettingSupportedVAApiFormatsThenSuccessAndZeroFormatsAreReturned) { cl_mem_flags flags = CL_MEM_KERNEL_READ_AND_WRITE; cl_mem_object_type image_type = CL_MEM_OBJECT_IMAGE3D; VAImageFormat vaApiFormats[10] = {}; cl_uint numImageFormats = 0; cl_int result = clGetSupportedVA_APIMediaSurfaceFormatsINTEL( &context, flags, image_type, 0, arrayCount(vaApiFormats), vaApiFormats, &numImageFormats); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(0u, numImageFormats); } TEST_F(ApiVaSharingTests, givenInvalidFlagsWhenGettingSupportedVAApiFormatsThenIvalidValueErrorIsReturned) { cl_mem_flags flags = CL_MEM_NO_ACCESS_INTEL; cl_mem_object_type image_type = CL_MEM_OBJECT_IMAGE2D; VAImageFormat vaApiFormats[10] = {}; cl_uint numImageFormats = 0; cl_int result = clGetSupportedVA_APIMediaSurfaceFormatsINTEL( &context, flags, image_type, 0, arrayCount(vaApiFormats), vaApiFormats, &numImageFormats); EXPECT_EQ(CL_INVALID_VALUE, result); EXPECT_EQ(0u, numImageFormats); } TEST_F(ApiVaSharingTests, givenInvalidContextWhenGettingSupportedVAApiFormatsThenIvalidContextErrorIsReturned) { cl_mem_flags flags = CL_MEM_READ_WRITE; cl_mem_object_type image_type = CL_MEM_OBJECT_IMAGE2D; VAImageFormat vaApiFormats[10] = {}; cl_uint numImageFormats = 0; MockContext contextWihtoutVASharing; cl_int result = clGetSupportedVA_APIMediaSurfaceFormatsINTEL( &contextWihtoutVASharing, flags, image_type, 0, arrayCount(vaApiFormats), vaApiFormats, &numImageFormats); EXPECT_EQ(CL_INVALID_CONTEXT, result); EXPECT_EQ(0u, numImageFormats); } TEST(VaSurface, givenValidPlaneAndFlagsWhenValidatingInputsThenTrueIsReturned) { for (cl_uint plane = 0; plane <= 1; plane++) { EXPECT_TRUE(VASurface::validate(CL_MEM_READ_ONLY, plane)); EXPECT_TRUE(VASurface::validate(CL_MEM_WRITE_ONLY, plane)); EXPECT_TRUE(VASurface::validate(CL_MEM_READ_WRITE, plane)); } } TEST(VaSurface, givenInValidPlaneOrFlagsWhenValidatingInputsThenTrueIsReturned) { cl_uint plane = 2; EXPECT_FALSE(VASurface::validate(CL_MEM_READ_ONLY, plane)); EXPECT_FALSE(VASurface::validate(CL_MEM_USE_HOST_PTR, 0)); } TEST(VaSurface, givenNotSupportedVaFormatsWhenCheckingIfSupportedThenFalseIsReturned) { EXPECT_FALSE(VASurface::isSupportedFourCCTwoPlaneFormat(VA_FOURCC_NV11)); DebugManagerStateRestore restore; DebugManager.flags.EnableExtendedVaFormats.set(true); EXPECT_FALSE(VASurface::isSupportedFourCCThreePlaneFormat(VA_FOURCC_NV11)); EXPECT_EQ(nullptr, VASurface::getExtendedSurfaceFormatInfo(VA_FOURCC_NV11)); } TEST(VaSharingFunctions, givenErrorReturnedFromVaLibWhenQuerySupportedVaImageFormatsThenSupportedFormatsAreNotSet) { VASharingFunctionsMock sharingFunctions; sharingFunctions.queryImageFormatsReturnStatus = VA_STATUS_ERROR_INVALID_VALUE; sharingFunctions.querySupportedVaImageFormats(VADisplay(1)); EXPECT_EQ(0u, sharingFunctions.supported2PlaneFormats.size()); EXPECT_EQ(0u, sharingFunctions.supported3PlaneFormats.size()); } TEST(VaSharingFunctions, givenNoSupportedFormatsWhenQuerySupportedVaImageFormatsThenSupportedFormatsAreNotSet) { VASharingFunctionsMock sharingFunctions; EXPECT_EQ(0u, sharingFunctions.supported2PlaneFormats.size()); EXPECT_EQ(0u, sharingFunctions.supported3PlaneFormats.size()); cl_mem_flags flags = CL_MEM_READ_WRITE; cl_mem_object_type image_type = CL_MEM_OBJECT_IMAGE2D; cl_uint numImageFormats = 0; VAImageFormat vaApiFormats[10] = {}; sharingFunctions.getSupportedFormats( flags, image_type, 0, 10, vaApiFormats, &numImageFormats); EXPECT_EQ(0u, numImageFormats); } TEST(VaSharingFunctions, givenNumEntriesLowerThanSupportedFormatsWhenGettingSupportedFormatsThenOnlyNumEntiresAreReturned) { VASharingFunctionsMock sharingFunctions; VAImageFormat imageFormat = {VA_FOURCC_NV12, 1, 12}; sharingFunctions.supported2PlaneFormats.emplace_back(imageFormat); imageFormat.fourcc = VA_FOURCC_P010; sharingFunctions.supported2PlaneFormats.emplace_back(imageFormat); EXPECT_EQ(2u, sharingFunctions.supported2PlaneFormats.size()); cl_mem_flags flags = CL_MEM_READ_WRITE; cl_mem_object_type image_type = CL_MEM_OBJECT_IMAGE2D; cl_uint numImageFormats = 0; VAImageFormat vaApiFormats[3] = {}; sharingFunctions.getSupportedFormats( flags, image_type, 0, 1, vaApiFormats, &numImageFormats); EXPECT_EQ(2u, numImageFormats); EXPECT_EQ(static_cast(VA_FOURCC_NV12), vaApiFormats[0].fourcc); EXPECT_EQ(0u, vaApiFormats[1].fourcc); EXPECT_EQ(0u, vaApiFormats[2].fourcc); } TEST_F(VaSharingTests, givenInteropUserSyncIsNotSpecifiedDuringContextCreationWhenEnqueueReleaseVAIsCalledThenAllWorkAlreadySubmittedShouldCompleteExecution) { struct MockCommandQueueToTestFinish : MockCommandQueue { MockCommandQueueToTestFinish(Context *context, ClDevice *device, const cl_queue_properties *props) : MockCommandQueue(context, device, props, false) { } cl_int finish() override { finishCalled = true; return CL_SUCCESS; } bool finishCalled = false; }; MockContext mockContext; MockCommandQueueToTestFinish mockCommandQueue(&mockContext, mockContext.getDevice(0), 0); createMediaSurface(); for (bool specifyInteropUseSync : {false, true}) { mockContext.setInteropUserSyncEnabled(specifyInteropUseSync); mockCommandQueue.finishCalled = false; errCode = clEnqueueAcquireVA_APIMediaSurfacesINTEL(&mockCommandQueue, 1, &sharedClMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, errCode); errCode = clEnqueueReleaseVA_APIMediaSurfacesINTEL(&mockCommandQueue, 1, &sharedClMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, errCode); EXPECT_EQ(!specifyInteropUseSync, mockCommandQueue.finishCalled); } } TEST_F(VaSharingTests, givenPlaneArgumentEquals2WithEmptySupported3PlaneFormatsVectorThentNoFormatIsReturned) { VASharingFunctionsMock sharingFunctions; EXPECT_EQ(sharingFunctions.supported3PlaneFormats.size(), 0u); cl_mem_flags flags = CL_MEM_READ_WRITE; cl_mem_object_type image_type = CL_MEM_OBJECT_IMAGE2D; cl_uint numImageFormats = 4; VAImageFormat vaApiFormats[4] = {}; sharingFunctions.getSupportedFormats( flags, image_type, 2, 1, vaApiFormats, &numImageFormats); EXPECT_EQ(0u, vaApiFormats[0].fourcc); } TEST_F(VaSharingTests, givenPlaneArgumentGreaterThan2ThenNoFormatIsReturned) { VASharingFunctionsMock sharingFunctions; EXPECT_EQ(sharingFunctions.supported2PlaneFormats.size(), 0u); EXPECT_EQ(sharingFunctions.supported3PlaneFormats.size(), 0u); VAImageFormat imageFormat = {VA_FOURCC_RGBP, 1, 12}; sharingFunctions.supported3PlaneFormats.emplace_back(imageFormat); imageFormat = {VA_FOURCC_NV12, 1, 12}; sharingFunctions.supported2PlaneFormats.emplace_back(imageFormat); cl_mem_flags flags = CL_MEM_READ_WRITE; cl_mem_object_type image_type = CL_MEM_OBJECT_IMAGE2D; cl_uint numImageFormats = 2; VAImageFormat vaApiFormats[2] = {}; sharingFunctions.getSupportedFormats( flags, image_type, 3, 1, vaApiFormats, &numImageFormats); EXPECT_EQ(0u, vaApiFormats[0].fourcc); EXPECT_EQ(0u, vaApiFormats[1].fourcc); } TEST_F(VaSharingTests, givenPlaneArgumentEquals2ThenOnlyRGBPFormatIsReturned) { VASharingFunctionsMock sharingFunctions; EXPECT_EQ(sharingFunctions.supported2PlaneFormats.size(), 0u); EXPECT_EQ(sharingFunctions.supported3PlaneFormats.size(), 0u); VAImageFormat imageFormat = {VA_FOURCC_RGBP, 1, 12}; sharingFunctions.supported3PlaneFormats.emplace_back(imageFormat); cl_mem_flags flags = CL_MEM_READ_WRITE; cl_mem_object_type image_type = CL_MEM_OBJECT_IMAGE2D; cl_uint numImageFormats = 1; VAImageFormat vaApiFormats[3] = {}; sharingFunctions.getSupportedFormats( flags, image_type, 2, 1, vaApiFormats, &numImageFormats); EXPECT_EQ(static_cast(VA_FOURCC_RGBP), vaApiFormats[0].fourcc); } TEST_F(VaSharingTests, givenPlaneArgumentLessThan2WithProperFormatsAndEmptySupportedFormatsVectorsThenNoFormatIsReturned) { VASharingFunctionsMock sharingFunctions; EXPECT_EQ(sharingFunctions.supported2PlaneFormats.size(), 0u); EXPECT_EQ(sharingFunctions.supported3PlaneFormats.size(), 0u); cl_mem_flags flags = CL_MEM_READ_WRITE; cl_mem_object_type image_type = CL_MEM_OBJECT_IMAGE2D; cl_uint numImageFormats = 1; VAImageFormat vaApiFormats[3] = {}; sharingFunctions.getSupportedFormats( flags, image_type, 0, 1, vaApiFormats, &numImageFormats); EXPECT_EQ(0u, vaApiFormats[0].fourcc); VAImageFormat imageFormat = {VA_FOURCC_NV12, 1, 12}; sharingFunctions.supported2PlaneFormats.emplace_back(imageFormat); sharingFunctions.supported3PlaneFormats.emplace_back(imageFormat); sharingFunctions.getSupportedFormats( flags, image_type, 0, 1, nullptr, &numImageFormats); EXPECT_EQ(0u, vaApiFormats[0].fourcc); } TEST_F(VaSharingTests, givenPlaneArgumentLessThan2WithProperFormatsAndSupportedFormatsVectorsThenAll2And3PlaneFormatsAreReturned) { VASharingFunctionsMock sharingFunctions; EXPECT_EQ(sharingFunctions.supported2PlaneFormats.size(), 0u); EXPECT_EQ(sharingFunctions.supported3PlaneFormats.size(), 0u); cl_mem_flags flags = CL_MEM_READ_WRITE; cl_mem_object_type image_type = CL_MEM_OBJECT_IMAGE2D; cl_uint numImageFormats = 4; VAImageFormat vaApiFormats[4] = {}; sharingFunctions.supported2PlaneFormats.push_back(VAImageFormat{VA_FOURCC_NV12, VA_LSB_FIRST, 0, 0, 0, 0, 0, 0}); sharingFunctions.supported2PlaneFormats.push_back(VAImageFormat{VA_FOURCC_P010, VA_LSB_FIRST, 0, 0, 0, 0, 0, 0}); sharingFunctions.supported2PlaneFormats.push_back(VAImageFormat{VA_FOURCC_P016, VA_LSB_FIRST, 0, 0, 0, 0, 0, 0}); sharingFunctions.supported3PlaneFormats.push_back(VAImageFormat{VA_FOURCC_RGBP, VA_LSB_FIRST, 0, 0, 0, 0, 0, 0}); sharingFunctions.getSupportedFormats( flags, image_type, 0, 4, vaApiFormats, &numImageFormats); EXPECT_EQ(static_cast(VA_FOURCC_NV12), vaApiFormats[0].fourcc); EXPECT_EQ(static_cast(VA_FOURCC_P010), vaApiFormats[1].fourcc); EXPECT_EQ(static_cast(VA_FOURCC_P016), vaApiFormats[2].fourcc); EXPECT_EQ(static_cast(VA_FOURCC_RGBP), vaApiFormats[3].fourcc); } TEST_F(VaSharingTests, givenPlaneArgumentLessThan2WithProperFormatsAndOnly3PlaneSupportedFormatsVectorThen3PlaneFormatIsReturned) { VASharingFunctionsMock sharingFunctions; EXPECT_EQ(sharingFunctions.supported2PlaneFormats.size(), 0u); cl_mem_flags flags = CL_MEM_READ_WRITE; cl_mem_object_type image_type = CL_MEM_OBJECT_IMAGE2D; cl_uint numImageFormats = 4; VAImageFormat vaApiFormats[4] = {}; sharingFunctions.supported3PlaneFormats.push_back(VAImageFormat{VA_FOURCC_RGBP, VA_LSB_FIRST, 0, 0, 0, 0, 0, 0}); EXPECT_EQ(sharingFunctions.supported2PlaneFormats.size(), 0u); sharingFunctions.getSupportedFormats( flags, image_type, 0, 4, vaApiFormats, &numImageFormats); EXPECT_EQ(static_cast(VA_FOURCC_RGBP), vaApiFormats[0].fourcc); EXPECT_EQ(0u, vaApiFormats[1].fourcc); EXPECT_EQ(0u, vaApiFormats[2].fourcc); EXPECT_EQ(0u, vaApiFormats[3].fourcc); } TEST_F(VaSharingTests, givenPlaneArgumentLessThan2WithProperFormatsAndOnly2PlaneSupportedFormatsVectorThen2PlaneFormatsAreReturned) { VASharingFunctionsMock sharingFunctions; EXPECT_EQ(sharingFunctions.supported2PlaneFormats.size(), 0u); cl_mem_flags flags = CL_MEM_READ_WRITE; cl_mem_object_type image_type = CL_MEM_OBJECT_IMAGE2D; cl_uint numImageFormats = 4; VAImageFormat vaApiFormats[4] = {}; sharingFunctions.supported2PlaneFormats.push_back(VAImageFormat{VA_FOURCC_NV12, VA_LSB_FIRST, 0, 0, 0, 0, 0, 0}); sharingFunctions.supported2PlaneFormats.push_back(VAImageFormat{VA_FOURCC_P010, VA_LSB_FIRST, 0, 0, 0, 0, 0, 0}); sharingFunctions.supported2PlaneFormats.push_back(VAImageFormat{VA_FOURCC_P016, VA_LSB_FIRST, 0, 0, 0, 0, 0, 0}); EXPECT_EQ(sharingFunctions.supported3PlaneFormats.size(), 0u); sharingFunctions.getSupportedFormats( flags, image_type, 0, 4, vaApiFormats, &numImageFormats); EXPECT_EQ(static_cast(VA_FOURCC_NV12), vaApiFormats[0].fourcc); EXPECT_EQ(static_cast(VA_FOURCC_P010), vaApiFormats[1].fourcc); EXPECT_EQ(static_cast(VA_FOURCC_P016), vaApiFormats[2].fourcc); EXPECT_EQ(0u, vaApiFormats[3].fourcc); } TEST_F(VaSharingTests, givenPlaneArgumentEquals2WithoutNoProperFormatsThenReturn) { VASharingFunctionsMock sharingFunctions; EXPECT_EQ(sharingFunctions.supported2PlaneFormats.size(), 0u); EXPECT_EQ(sharingFunctions.supported3PlaneFormats.size(), 0u); cl_mem_flags flags = CL_MEM_READ_WRITE; cl_mem_object_type image_type = CL_MEM_OBJECT_IMAGE2D; cl_uint numImageFormats = 1; sharingFunctions.supported3PlaneFormats.push_back(VAImageFormat{VA_FOURCC_RGBP, VA_LSB_FIRST, 0, 0, 0, 0, 0, 0}); cl_int result = sharingFunctions.getSupportedFormats( flags, image_type, 2, 4, nullptr, &numImageFormats); EXPECT_EQ(result, CL_SUCCESS); } class VaDeviceTests : public Test { public: VaDeviceTests() { ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false; } VariableBackup backup{&ultHwConfig}; }; TEST_F(VaDeviceTests, givenVADeviceWhenGetDeviceFromVAIsCalledThenRootDeviceIsReturned) { auto vaDisplay = std::make_unique(); vaDisplay->vadpy_magic = 0x56414430; auto contextPtr = std::make_unique(); auto drmState = std::make_unique(); vaDisplay->pDriverContext = contextPtr.get(); contextPtr->drm_state = drmState.get(); *static_cast(contextPtr->drm_state) = 1; auto device = pPlatform->getClDevice(0); NEO::Device *neoDevice = &device->getDevice(); auto mockDrm = static_cast(neoDevice->getRootDeviceEnvironment().osInterface->getDriverModel()->as()); mockDrm->setPciPath("0000:00:02.0"); VADevice vaDevice{}; auto clDevice = vaDevice.getDeviceFromVA(pPlatform, vaDisplay.get()); EXPECT_NE(clDevice, nullptr); } TEST_F(VaDeviceTests, givenVADeviceAndInvalidPciPathOfClDeviceWhenGetDeviceFromVAIsCalledThenNullptrIsReturned) { auto vaDisplay = std::make_unique(); vaDisplay->vadpy_magic = 0x56414430; auto contextPtr = std::make_unique(); auto drmState = std::make_unique(); vaDisplay->pDriverContext = contextPtr.get(); contextPtr->drm_state = drmState.get(); *static_cast(contextPtr->drm_state) = 1; auto device = pPlatform->getClDevice(0); NEO::Device *neoDevice = &device->getDevice(); auto mockDrm = static_cast(neoDevice->getRootDeviceEnvironment().osInterface->getDriverModel()->as()); mockDrm->setPciPath("00:00.0"); VADevice vaDevice{}; auto clDevice = vaDevice.getDeviceFromVA(pPlatform, vaDisplay.get()); EXPECT_EQ(clDevice, nullptr); } TEST_F(VaDeviceTests, givenVADeviceAndInvalidFDWhenGetDeviceFromVAIsCalledThenNullptrIsReturned) { auto vaDisplay = std::make_unique(); vaDisplay->vadpy_magic = 0x56414430; auto contextPtr = std::make_unique(); auto drmState = std::make_unique(); vaDisplay->pDriverContext = contextPtr.get(); contextPtr->drm_state = drmState.get(); *static_cast(contextPtr->drm_state) = 0; VADevice vaDevice{}; auto clDevice = vaDevice.getDeviceFromVA(pPlatform, vaDisplay.get()); EXPECT_EQ(clDevice, nullptr); } TEST_F(VaDeviceTests, givenVADeviceAndInvalidMagicNumberWhenGetDeviceFromVAIsCalledThenUnrecoverableIsCalled) { auto vaDisplay = std::make_unique(); vaDisplay->vadpy_magic = 0x0; VADevice vaDevice{}; EXPECT_ANY_THROW(vaDevice.getDeviceFromVA(pPlatform, vaDisplay.get())); } TEST_F(VaDeviceTests, givenVADeviceAndNegativeFdWhenGetDeviceFromVAIsCalledThenUnrecoverableIsCalled) { auto vaDisplay = std::make_unique(); vaDisplay->vadpy_magic = 0x56414430; auto contextPtr = std::make_unique(); auto drmState = std::make_unique(); vaDisplay->pDriverContext = contextPtr.get(); contextPtr->drm_state = drmState.get(); *static_cast(contextPtr->drm_state) = -1; VADevice vaDevice{}; EXPECT_ANY_THROW(vaDevice.getDeviceFromVA(pPlatform, vaDisplay.get())); } namespace NEO { namespace SysCalls { extern bool makeFakeDevicePath; extern bool allowFakeDevicePath; } // namespace SysCalls } // namespace NEO TEST_F(VaDeviceTests, givenVADeviceAndFakeDevicePathWhenGetDeviceFromVAIsCalledThenNullptrIsReturned) { VariableBackup makeFakePathBackup(&SysCalls::makeFakeDevicePath, true); auto vaDisplay = std::make_unique(); vaDisplay->vadpy_magic = 0x56414430; auto contextPtr = std::make_unique(); auto drmState = std::make_unique(); vaDisplay->pDriverContext = contextPtr.get(); contextPtr->drm_state = drmState.get(); *static_cast(contextPtr->drm_state) = 1; VADevice vaDevice{}; auto clDevice = vaDevice.getDeviceFromVA(pPlatform, vaDisplay.get()); EXPECT_EQ(clDevice, nullptr); } TEST_F(VaDeviceTests, givenVADeviceAndAbsolutePathWhenGetDeviceFromVAIsCalledThenNullptrIsReturned) { VariableBackup makeFakePathBackup(&SysCalls::makeFakeDevicePath, true); VariableBackup allowFakeDevicePathBackup(&SysCalls::allowFakeDevicePath, true); auto vaDisplay = std::make_unique(); vaDisplay->vadpy_magic = 0x56414430; auto contextPtr = std::make_unique(); auto drmState = std::make_unique(); vaDisplay->pDriverContext = contextPtr.get(); contextPtr->drm_state = drmState.get(); *static_cast(contextPtr->drm_state) = 1; VADevice vaDevice{}; auto clDevice = vaDevice.getDeviceFromVA(pPlatform, vaDisplay.get()); EXPECT_EQ(clDevice, nullptr); } TEST_F(VaDeviceTests, givenValidPlatformWithInvalidVaDisplayWhenGetDeviceIdsFromVaApiMediaAdapterCalledThenReturnNullptrAndZeroDevices) { cl_device_id devices = 0; cl_uint numDevices = 0; cl_platform_id platformId = pPlatform; auto vaDisplay = std::make_unique(); vaDisplay->vadpy_magic = 0x56414430; auto contextPtr = std::make_unique(); auto drmState = std::make_unique(); vaDisplay->pDriverContext = contextPtr.get(); contextPtr->drm_state = drmState.get(); *static_cast(contextPtr->drm_state) = 1; auto device = pPlatform->getClDevice(0); NEO::Device *neoDevice = &device->getDevice(); auto mockDrm = static_cast(neoDevice->getRootDeviceEnvironment().osInterface->getDriverModel()->as()); mockDrm->setPciPath("00:00.0"); auto errCode = clGetDeviceIDsFromVA_APIMediaAdapterINTEL(platformId, 0u, vaDisplay.get(), 0u, 1, &devices, &numDevices); EXPECT_EQ(CL_DEVICE_NOT_FOUND, errCode); EXPECT_EQ(0u, numDevices); EXPECT_EQ(nullptr, devices); } TEST_F(VaDeviceTests, givenValidPlatformWhenGetDeviceIdsFromVaApiMediaAdapterCalledThenReturnFirstDevice) { cl_device_id devices = 0; cl_uint numDevices = 0; cl_platform_id platformId = pPlatform; auto vaDisplay = std::make_unique(); vaDisplay->vadpy_magic = 0x56414430; auto contextPtr = std::make_unique(); auto drmState = std::make_unique(); vaDisplay->pDriverContext = contextPtr.get(); contextPtr->drm_state = drmState.get(); *static_cast(contextPtr->drm_state) = 1; auto device = pPlatform->getClDevice(0); NEO::Device *neoDevice = &device->getDevice(); auto mockDrm = static_cast(neoDevice->getRootDeviceEnvironment().osInterface->getDriverModel()->as()); mockDrm->setPciPath("0000:00:02.0"); auto errCode = clGetDeviceIDsFromVA_APIMediaAdapterINTEL(platformId, 0u, vaDisplay.get(), 0u, 1, &devices, &numDevices); EXPECT_EQ(CL_SUCCESS, errCode); EXPECT_EQ(1u, numDevices); EXPECT_EQ(pPlatform->getClDevice(0), devices); } compute-runtime-22.14.22890/opencl/test/unit_test/sku_info/000077500000000000000000000000001422164147700234445ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/sku_info/CMakeLists.txt000066400000000000000000000010001422164147700261730ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_sku_info ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/sku_info_base_reference.h ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}sku_info_transfer_tests.cpp ) if(WIN32) list(APPEND IGDRCL_SRCS_tests_sku_info ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}sku_info_receiver_tests.cpp ) endif() target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_sku_info}) compute-runtime-22.14.22890/opencl/test/unit_test/sku_info/sku_info_base_reference.h000066400000000000000000000157121422164147700304500ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/gmm_helper/gmm_lib.h" #include "sku_info.h" namespace NEO { struct SkuInfoBaseReference { static void fillReferenceFtrForTransfer(_SKU_FEATURE_TABLE &refFtrTable) { memset(&refFtrTable, 0, sizeof(refFtrTable)); refFtrTable.FtrStandardMipTailFormat = 1; refFtrTable.FtrULT = 1; refFtrTable.FtrEDram = 1; refFtrTable.FtrFrameBufferLLC = 1; refFtrTable.FtrCrystalwell = 1; refFtrTable.FtrDisplayEngineS3d = 1; refFtrTable.FtrTileY = 1; refFtrTable.FtrDisplayYTiling = 1; refFtrTable.FtrFbc = 1; refFtrTable.FtrLCIA = 1; refFtrTable.FtrIA32eGfxPTEs = 1; refFtrTable.FtrWddm2GpuMmu = 1; refFtrTable.FtrWddm2_1_64kbPages = 1; refFtrTable.FtrTranslationTable = 1; refFtrTable.FtrUserModeTranslationTable = 1; refFtrTable.FtrWddm2Svm = 1; refFtrTable.FtrLLCBypass = 1; refFtrTable.FtrE2ECompression = 1; refFtrTable.FtrLinearCCS = 1; refFtrTable.FtrCCSRing = 1; refFtrTable.FtrCCSNode = 1; refFtrTable.FtrMemTypeMocsDeferPAT = 1; refFtrTable.FtrLocalMemory = 1; refFtrTable.FtrLocalMemoryAllows4KB = 1; refFtrTable.FtrSVM = 1; refFtrTable.FtrFlatPhysCCS = 1; refFtrTable.FtrMultiTileArch = 1; refFtrTable.FtrCCSMultiInstance = 1; refFtrTable.FtrPpgtt64KBWalkOptimization = 1; refFtrTable.FtrUnified3DMediaCompressionFormats = 1; refFtrTable.Ftr57bGPUAddressing = 1; } static void fillReferenceWaForTransfer(_WA_TABLE &refWaTable) { memset(&refWaTable, 0, sizeof(refWaTable)); refWaTable.WaFbcLinearSurfaceStride = 1; refWaTable.WaDisableEdramForDisplayRT = 1; refWaTable.WaEncryptedEdramOnlyPartials = 1; refWaTable.WaLosslessCompressionSurfaceStride = 1; refWaTable.WaRestrictPitch128KB = 1; refWaTable.WaLimit128BMediaCompr = 1; refWaTable.WaUntypedBufferCompression = 1; refWaTable.WaAuxTable16KGranular = 1; refWaTable.WaAuxTable64KGranular = 1; } static void fillReferenceFtrToReceive(FeatureTable &refFtrTable) { refFtrTable = {}; refFtrTable.flags.ftrDesktop = true; refFtrTable.flags.ftrChannelSwizzlingXOREnabled = true; refFtrTable.flags.ftrIVBM0M1Platform = true; refFtrTable.flags.ftrSGTPVSKUStrapPresent = true; refFtrTable.flags.ftr5Slice = true; refFtrTable.flags.ftrGpGpuMidBatchPreempt = true; refFtrTable.flags.ftrGpGpuThreadGroupLevelPreempt = true; refFtrTable.flags.ftrGpGpuMidThreadLevelPreempt = true; refFtrTable.flags.ftrIoMmuPageFaulting = true; refFtrTable.flags.ftrWddm2Svm = true; refFtrTable.flags.ftrPooledEuEnabled = true; refFtrTable.flags.ftrResourceStreamer = true; refFtrTable.flags.ftrPPGTT = true; refFtrTable.flags.ftrSVM = true; refFtrTable.flags.ftrEDram = true; refFtrTable.flags.ftrL3IACoherency = true; refFtrTable.flags.ftrIA32eGfxPTEs = true; refFtrTable.flags.ftr3dMidBatchPreempt = true; refFtrTable.flags.ftr3dObjectLevelPreempt = true; refFtrTable.flags.ftrPerCtxtPreemptionGranularityControl = true; refFtrTable.flags.ftrTileY = true; refFtrTable.flags.ftrDisplayYTiling = true; refFtrTable.flags.ftrTranslationTable = true; refFtrTable.flags.ftrUserModeTranslationTable = true; refFtrTable.flags.ftrEnableGuC = true; refFtrTable.flags.ftrFbc = true; refFtrTable.flags.ftrFbc2AddressTranslation = true; refFtrTable.flags.ftrFbcBlitterTracking = true; refFtrTable.flags.ftrFbcCpuTracking = true; refFtrTable.flags.ftrULT = true; refFtrTable.flags.ftrLCIA = true; refFtrTable.flags.ftrGttCacheInvalidation = true; refFtrTable.flags.ftrTileMappedResource = true; refFtrTable.flags.ftrAstcHdr2D = true; refFtrTable.flags.ftrAstcLdr2D = true; refFtrTable.flags.ftrStandardMipTailFormat = true; refFtrTable.flags.ftrFrameBufferLLC = true; refFtrTable.flags.ftrCrystalwell = true; refFtrTable.flags.ftrLLCBypass = true; refFtrTable.flags.ftrDisplayEngineS3d = true; refFtrTable.flags.ftrWddm2GpuMmu = true; refFtrTable.flags.ftrWddm2_1_64kbPages = true; refFtrTable.flags.ftrKmdDaf = true; refFtrTable.flags.ftrSimulationMode = true; refFtrTable.flags.ftrE2ECompression = true; refFtrTable.flags.ftrLinearCCS = true; refFtrTable.flags.ftrCCSRing = true; refFtrTable.flags.ftrCCSNode = true; refFtrTable.flags.ftrRcsNode = true; refFtrTable.flags.ftrMemTypeMocsDeferPAT = true; refFtrTable.flags.ftrLocalMemory = true; refFtrTable.flags.ftrLocalMemoryAllows4KB = true; refFtrTable.flags.ftrFlatPhysCCS = true; refFtrTable.flags.ftrMultiTileArch = true; refFtrTable.flags.ftrCCSMultiInstance = true; refFtrTable.flags.ftrPpgtt64KBWalkOptimization = true; refFtrTable.flags.ftrUnified3DMediaCompressionFormats = true; refFtrTable.flags.ftr57bGPUAddressing = true; } static void fillReferenceWaToReceive(WorkaroundTable &refWaTable) { refWaTable = {}; refWaTable.flags.waDoNotUseMIReportPerfCount = true; refWaTable.flags.waEnablePreemptionGranularityControlByUMD = true; refWaTable.flags.waSendMIFLUSHBeforeVFE = true; refWaTable.flags.waReportPerfCountUseGlobalContextID = true; refWaTable.flags.waDisableLSQCROPERFforOCL = true; refWaTable.flags.waMsaa8xTileYDepthPitchAlignment = true; refWaTable.flags.waLosslessCompressionSurfaceStride = true; refWaTable.flags.waFbcLinearSurfaceStride = true; refWaTable.flags.wa4kAlignUVOffsetNV12LinearSurface = true; refWaTable.flags.waEncryptedEdramOnlyPartials = true; refWaTable.flags.waDisableEdramForDisplayRT = true; refWaTable.flags.waForcePcBbFullCfgRestore = true; refWaTable.flags.waCompressedResourceRequiresConstVA21 = true; refWaTable.flags.waDisablePerCtxtPreemptionGranularityControl = true; refWaTable.flags.waLLCCachingUnsupported = true; refWaTable.flags.waUseVAlign16OnTileXYBpp816 = true; refWaTable.flags.waModifyVFEStateAfterGPGPUPreemption = true; refWaTable.flags.waCSRUncachable = true; refWaTable.flags.waSamplerCacheFlushBetweenRedescribedSurfaceReads = true; refWaTable.flags.waRestrictPitch128KB = true; refWaTable.flags.waLimit128BMediaCompr = true; refWaTable.flags.waUntypedBufferCompression = true; refWaTable.flags.waAuxTable16KGranular = true; refWaTable.flags.waDisableFusedThreadScheduling = true; refWaTable.flags.waAuxTable64KGranular = true; } }; // namespace SkuInfoBaseReference } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/sku_info/sku_info_receiver_tests.cpp000066400000000000000000000060631422164147700311000ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/sku_info/operations/windows/sku_info_receiver.h" #include "opencl/test/unit_test/sku_info/sku_info_base_reference.h" #include "gtest/gtest.h" using namespace NEO; inline bool operator==(const FeatureTable &lhs, const FeatureTable &rhs) { return lhs.ftrBcsInfo == rhs.ftrBcsInfo && lhs.packed == rhs.packed; } TEST(SkuInfoReceiverTest, givenAdapterInfoWhenReceivingThenUpdateFtrTable) { FeatureTable refFeatureTable = {}; FeatureTable requestedFeatureTable = {}; ADAPTER_INFO adapterInfo = {}; memset(&adapterInfo.SkuTable, ~0, sizeof(adapterInfo.SkuTable)); EXPECT_EQ(1lu, requestedFeatureTable.ftrBcsInfo.to_ulong()); SkuInfoReceiver::receiveFtrTableFromAdapterInfo(&requestedFeatureTable, &adapterInfo); SkuInfoBaseReference::fillReferenceFtrToReceive(refFeatureTable); EXPECT_EQ(1lu, requestedFeatureTable.ftrBcsInfo.to_ulong()); EXPECT_TRUE(refFeatureTable == requestedFeatureTable); refFeatureTable.flags.ftr3dMidBatchPreempt = false; requestedFeatureTable.flags.ftr3dMidBatchPreempt = true; EXPECT_FALSE(refFeatureTable == requestedFeatureTable); } TEST(SkuInfoReceiverTest, givenFeatureTableWhenDifferentDataThenEqualityOperatorReturnsCorrectScore) { FeatureTable refFeatureTable = {}; FeatureTable requestedFeatureTable = {}; refFeatureTable.ftrBcsInfo = 1; requestedFeatureTable.ftrBcsInfo = 0; EXPECT_FALSE(refFeatureTable == requestedFeatureTable); refFeatureTable.ftrBcsInfo = 0; requestedFeatureTable.ftrBcsInfo = 1; EXPECT_FALSE(refFeatureTable == requestedFeatureTable); refFeatureTable.ftrBcsInfo = 1; requestedFeatureTable.ftrBcsInfo = 1; refFeatureTable.packed[0] = 1u; requestedFeatureTable.packed[0] = 0; EXPECT_FALSE(refFeatureTable == requestedFeatureTable); refFeatureTable.packed[0] = 0; requestedFeatureTable.packed[0] = 1; EXPECT_FALSE(refFeatureTable == requestedFeatureTable); refFeatureTable.packed[0] = 0; requestedFeatureTable.packed[0] = 0; refFeatureTable.packed[1] = 0; requestedFeatureTable.packed[1] = 1; EXPECT_FALSE(refFeatureTable == requestedFeatureTable); refFeatureTable.packed[0] = 0; requestedFeatureTable.packed[0] = 0; refFeatureTable.packed[1] = 1; requestedFeatureTable.packed[1] = 0; EXPECT_FALSE(refFeatureTable == requestedFeatureTable); refFeatureTable.packed[1] = 1; requestedFeatureTable.packed[1] = 1; EXPECT_TRUE(refFeatureTable == requestedFeatureTable); } TEST(SkuInfoReceiverTest, givenAdapterInfoWhenReceivingThenUpdateWaTable) { WorkaroundTable refWaTable = {}; WorkaroundTable requestedWaTable = {}; ADAPTER_INFO adapterInfo = {}; memset(&adapterInfo.WaTable, ~0, sizeof(adapterInfo.WaTable)); SkuInfoReceiver::receiveWaTableFromAdapterInfo(&requestedWaTable, &adapterInfo); SkuInfoBaseReference::fillReferenceWaToReceive(refWaTable); EXPECT_TRUE(memcmp(&requestedWaTable, &refWaTable, sizeof(WorkaroundTable)) == 0); } compute-runtime-22.14.22890/opencl/test/unit_test/sku_info/sku_info_transfer_tests.cpp000066400000000000000000000025301422164147700311130ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/sku_info/operations/sku_info_transfer.h" #include "opencl/test/unit_test/sku_info/sku_info_base_reference.h" #include "gtest/gtest.h" using namespace NEO; TEST(SkuInfoTransferTest, givenFeatureTableWhenFillingStructureForGmmThenCopyOnlySelectedValues) { _SKU_FEATURE_TABLE requestedFtrTable = {}; _SKU_FEATURE_TABLE refFtrTable = {}; FeatureTable featureTable; for (auto &e : featureTable.packed) { e = std::numeric_limits::max(); } SkuInfoTransfer::transferFtrTableForGmm(&requestedFtrTable, &featureTable); SkuInfoBaseReference::fillReferenceFtrForTransfer(refFtrTable); EXPECT_TRUE(memcmp(&requestedFtrTable, &refFtrTable, sizeof(_SKU_FEATURE_TABLE)) == 0); } TEST(SkuInfoTransferTest, givenWaTableWhenFillingStructureForGmmThenCopyOnlySelectedValues) { _WA_TABLE requestedWaTable = {}; _WA_TABLE refWaTable = {}; WorkaroundTable waTable; refWaTable = {}; for (auto &e : waTable.packed) { e = std::numeric_limits::max(); } SkuInfoTransfer::transferWaTableForGmm(&requestedWaTable, &waTable); SkuInfoBaseReference::fillReferenceWaForTransfer(refWaTable); EXPECT_TRUE(memcmp(&requestedWaTable, &refWaTable, sizeof(_WA_TABLE)) == 0); } compute-runtime-22.14.22890/opencl/test/unit_test/source_level_debugger/000077500000000000000000000000001422164147700261625ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/source_level_debugger/CMakeLists.txt000066400000000000000000000013231422164147700307210ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_source_level_debugger ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/source_level_debugger_device_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/source_level_debugger_csr_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/source_level_debugger_csr_tests.h ${CMAKE_CURRENT_SOURCE_DIR}/source_level_debugger_tests.cpp ) get_property(NEO_CORE_TESTS_SOURCE_LEVEL_DEBUGGER GLOBAL PROPERTY NEO_CORE_TESTS_SOURCE_LEVEL_DEBUGGER) list(APPEND IGDRCL_SRCS_tests_source_level_debugger ${NEO_CORE_TESTS_SOURCE_LEVEL_DEBUGGER}) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_source_level_debugger}) source_level_debugger_csr_tests.cpp000066400000000000000000000164501422164147700352410ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/source_level_debugger/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/source_level_debugger/source_level_debugger_csr_tests.h" #include "shared/source/source_level_debugger/source_level_debugger.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/helpers/dispatch_flags_helper.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/mocks/mock_os_library.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/command_queue/command_queue_hw.h" #include HWTEST_F(CommandStreamReceiverWithActiveDebuggerTest, givenCsrWithActiveDebuggerAndDisabledPreemptionWhenFlushTaskIsCalledThenSipKernelIsMadeResident) { auto mockCsr = createCSR(); auto sipType = SipKernel::getSipKernelType(device->getDevice()); SipKernel::initSipKernel(sipType, device->getDevice()); CommandQueueHw commandQueue(nullptr, device.get(), 0, false); auto &commandStream = commandQueue.getCS(4096u); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); void *buffer = alignedMalloc(MemoryConstants::pageSize, MemoryConstants::pageSize64k); std::unique_ptr allocation(new MockGraphicsAllocation(buffer, MemoryConstants::pageSize)); std::unique_ptr heap(new IndirectHeap(allocation.get())); auto &baseDevice = device->getDevice(); mockCsr->flushTask(commandStream, 0, heap.get(), heap.get(), heap.get(), 0, dispatchFlags, baseDevice); auto sipAllocation = SipKernel::getSipKernel(baseDevice).getSipAllocation(); bool found = false; for (auto allocation : mockCsr->copyOfAllocations) { if (allocation == sipAllocation) { found = true; break; } } EXPECT_TRUE(found); alignedFree(buffer); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverWithActiveDebuggerTest, givenCsrWithActiveDebuggerAndDisabledPreemptionWhenFlushTaskIsCalledThenStateSipCmdIsProgrammed) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; using STATE_SIP = typename FamilyType::STATE_SIP; auto mockCsr = createCSR(); if (device->getHardwareInfo().capabilityTable.defaultPreemptionMode == PreemptionMode::MidThread) { CommandQueueHw commandQueue(nullptr, device.get(), 0, false); auto &commandStream = commandQueue.getCS(4096u); auto &preambleStream = mockCsr->getCS(0); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); void *buffer = alignedMalloc(MemoryConstants::pageSize, MemoryConstants::pageSize64k); std::unique_ptr allocation(new MockGraphicsAllocation(buffer, MemoryConstants::pageSize)); std::unique_ptr heap(new IndirectHeap(allocation.get())); auto &baseDevice = device->getDevice(); mockCsr->flushTask(commandStream, 0, heap.get(), heap.get(), heap.get(), 0, dispatchFlags, baseDevice); auto sipAllocation = SipKernel::getSipKernel(baseDevice).getSipAllocation(); HardwareParse hwParser; hwParser.parseCommands(preambleStream); auto itorStateBaseAddr = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); auto itorStateSip = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), itorStateBaseAddr); ASSERT_NE(hwParser.cmdList.end(), itorStateSip); STATE_BASE_ADDRESS *sba = (STATE_BASE_ADDRESS *)*itorStateBaseAddr; STATE_SIP *stateSipCmd = (STATE_SIP *)*itorStateSip; EXPECT_LT(reinterpret_cast(sba), reinterpret_cast(stateSipCmd)); auto sipAddress = stateSipCmd->getSystemInstructionPointer(); EXPECT_EQ(sipAllocation->getGpuAddressToPatch(), sipAddress); alignedFree(buffer); } } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverWithActiveDebuggerTest, givenCsrWithActiveDebuggerAndWhenFlushTaskIsCalledThenAlwaysProgramStateBaseAddressAndSip) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; using STATE_SIP = typename FamilyType::STATE_SIP; auto mockCsr = createCSR(); if (device->getHardwareInfo().capabilityTable.defaultPreemptionMode == PreemptionMode::MidThread) { mockCsr->overrideDispatchPolicy(DispatchMode::ImmediateDispatch); CommandQueueHw commandQueue(nullptr, device.get(), 0, false); auto &commandStream = commandQueue.getCS(4096u); auto &preambleStream = mockCsr->getCS(0); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); void *buffer = alignedMalloc(MemoryConstants::pageSize, MemoryConstants::pageSize64k); std::unique_ptr allocation(new MockGraphicsAllocation(buffer, MemoryConstants::pageSize)); std::unique_ptr heap(new IndirectHeap(allocation.get())); auto &baseDevice = device->getDevice(); mockCsr->flushTask(commandStream, 0, heap.get(), heap.get(), heap.get(), 0, dispatchFlags, baseDevice); mockCsr->flushBatchedSubmissions(); mockCsr->flushTask(commandStream, 0, heap.get(), heap.get(), heap.get(), 0, dispatchFlags, baseDevice); auto sipAllocation = SipKernel::getSipKernel(baseDevice).getSipAllocation(); HardwareParse hwParser; hwParser.parseCommands(preambleStream); auto itorStateBaseAddr = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); auto itorStateSip = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), itorStateBaseAddr); ASSERT_NE(hwParser.cmdList.end(), itorStateSip); auto itorStateBaseAddr2 = find(std::next(itorStateBaseAddr), hwParser.cmdList.end()); auto itorStateSip2 = find(std::next(itorStateSip), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), itorStateBaseAddr2); ASSERT_NE(hwParser.cmdList.end(), itorStateSip2); STATE_BASE_ADDRESS *sba = (STATE_BASE_ADDRESS *)*itorStateBaseAddr2; STATE_SIP *stateSipCmd = (STATE_SIP *)*itorStateSip2; EXPECT_LT(reinterpret_cast(sba), reinterpret_cast(stateSipCmd)); auto sipAddress = stateSipCmd->getSystemInstructionPointer(); EXPECT_EQ(sipAllocation->getGpuAddressToPatch(), sipAddress); alignedFree(buffer); } } source_level_debugger_csr_tests.h000066400000000000000000000034751422164147700347110ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/source_level_debugger/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/execution_environment_helper.h" #include "shared/test/common/mocks/mock_builtins.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/mocks/mock_os_library.h" #include "shared/test/common/mocks/mock_sip.h" #include "shared/test/common/mocks/mock_source_level_debugger.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include class CommandStreamReceiverWithActiveDebuggerTest : public ::testing::Test { protected: template auto createCSR() { hwInfo = nullptr; EnvironmentWithCsrWrapper environment; environment.setCsrType>(); executionEnvironment = getExecutionEnvironmentImpl(hwInfo, 1); hwInfo->capabilityTable = defaultHwInfo->capabilityTable; hwInfo->capabilityTable.debuggerSupported = true; auto mockMemoryManager = new MockMemoryManager(*executionEnvironment); executionEnvironment->memoryManager.reset(mockMemoryManager); executionEnvironment->rootDeviceEnvironments[0]->debugger.reset(new MockActiveSourceLevelDebugger(new MockOsLibrary)); device = std::make_unique(Device::create(executionEnvironment, 0)); device->setSourceLevelDebuggerActive(true); return static_cast *>(device->getDefaultEngine().commandStreamReceiver); } void TearDown() override { device->setSourceLevelDebuggerActive(false); } std::unique_ptr device; ExecutionEnvironment *executionEnvironment = nullptr; HardwareInfo *hwInfo = nullptr; }; source_level_debugger_device_tests.cpp000066400000000000000000000033611422164147700357060ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/source_level_debugger/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/source_level_debugger/source_level_debugger.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_os_library.h" #include "shared/test/common/mocks/mock_source_level_debugger.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_platform.h" using PreambleTest = ::testing::Test; using namespace NEO; TEST(DeviceWithSourceLevelDebugger, givenDeviceWithSourceLevelDebuggerActiveWhenDeviceIsDestructedThenSourceLevelDebuggerIsNotified) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); auto mock = new MockSourceLevelDebugger(new MockOsLibrary); executionEnvironment->rootDeviceEnvironments[0]->debugger.reset(mock); { auto device = std::make_unique(MockDevice::create(executionEnvironment, 0u)); EXPECT_EQ(0u, mock->notifyDeviceDestructionCalled); } EXPECT_EQ(1u, mock->notifyDeviceDestructionCalled); } TEST(DeviceWithSourceLevelDebugger, givenDeviceWithSourceLevelDebuggerActiveWhenDeviceIsCreatedThenPreemptionIsDisabled) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->rootDeviceEnvironments[0]->debugger.reset(new MockActiveSourceLevelDebugger(new MockOsLibrary)); auto device = std::unique_ptr(MockDevice::create(executionEnvironment, 0u)); EXPECT_EQ(PreemptionMode::Disabled, device->getPreemptionMode()); } source_level_debugger_tests.cpp000066400000000000000000001107661422164147700343770ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/source_level_debugger/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/cmdcontainer.h" #include "shared/source/device/device.h" #include "shared/source/helpers/file_io.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/program/kernel_info.h" #include "shared/source/source_level_debugger/source_level_debugger.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/execution_environment_helper.h" #include "shared/test/common/helpers/ult_hw_config.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/libult/source_level_debugger_library.h" #include "shared/test/common/mocks/mock_gmm_helper.h" #include "shared/test/common/mocks/mock_source_level_debugger.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include #include using namespace NEO; using std::string; using std::unique_ptr; class DebuggerLibraryRestorer { public: DebuggerLibraryRestorer() { restoreActiveState = DebuggerLibrary::getDebuggerActive(); restoreAvailableState = DebuggerLibrary::getLibraryAvailable(); } ~DebuggerLibraryRestorer() { DebuggerLibrary::clearDebuggerLibraryInterceptor(); DebuggerLibrary::setDebuggerActive(restoreActiveState); DebuggerLibrary::setLibraryAvailable(restoreAvailableState); } bool restoreActiveState = false; bool restoreAvailableState = false; }; TEST(SourceLevelDebugger, whenSourceLevelDebuggerIsCreatedThenLegacyModeIsTrue) { DebuggerLibraryRestorer restorer; DebuggerLibrary::setLibraryAvailable(true); MockSourceLevelDebugger debugger; EXPECT_TRUE(debugger.isLegacy()); } TEST(SourceLevelDebugger, givenPlatformWhenItIsCreatedThenSourceLevelDebuggerIsCreatedInExecutionEnvironment) { DebuggerLibraryRestorer restorer; if (defaultHwInfo->capabilityTable.debuggerSupported) { DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(true); auto executionEnvironment = new ExecutionEnvironment(); MockPlatform platform(*executionEnvironment); platform.initializeWithNewDevices(); EXPECT_NE(nullptr, executionEnvironment->rootDeviceEnvironments[0]->debugger); } } TEST(SourceLevelDebugger, givenPlatformWhenSourceLevelDebuggerIsCreatedThenRuntimeCapabilityHasFusedEusDisabled) { DebuggerLibraryRestorer restorer; if (defaultHwInfo->capabilityTable.debuggerSupported) { DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(true); auto executionEnvironment = new ExecutionEnvironment(); MockPlatform platform(*executionEnvironment); platform.initializeWithNewDevices(); ASSERT_NE(nullptr, executionEnvironment->rootDeviceEnvironments[0]->debugger); EXPECT_FALSE(executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo()->capabilityTable.fusedEuEnabled); } } TEST(SourceLevelDebugger, givenPlatformWhenInitializingSourceLevelDebuggerFailsThenRuntimeCapabilityFusedEusAreNotModified) { DebuggerLibraryRestorer restorer; if (defaultHwInfo->capabilityTable.debuggerSupported) { DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(true); interceptor.initRetVal = -1; DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); auto executionEnvironment = new ExecutionEnvironment(); MockPlatform platform(*executionEnvironment); platform.initializeWithNewDevices(); bool defaultValue = defaultHwInfo->capabilityTable.fusedEuEnabled; ASSERT_NE(nullptr, executionEnvironment->rootDeviceEnvironments[0]->debugger); EXPECT_EQ(defaultValue, executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo()->capabilityTable.fusedEuEnabled); } } TEST(SourceLevelDebugger, givenNoKernelDebuggerLibraryWhenSourceLevelDebuggerIsCreatedThenLibraryIsNotLoaded) { DebuggerLibraryRestorer restorer; DebuggerLibrary::setLibraryAvailable(false); MockSourceLevelDebugger debugger; EXPECT_EQ(nullptr, debugger.debuggerLibrary.get()); } TEST(SourceLevelDebugger, givenKernelDebuggerLibraryAvailableWhenSourceLevelDebuggerIsConstructedThenLibraryIsLoaded) { DebuggerLibraryRestorer restorer; DebuggerLibrary::setLibraryAvailable(true); MockSourceLevelDebugger debugger; EXPECT_NE(nullptr, debugger.debuggerLibrary.get()); } TEST(SourceLevelDebugger, givenKernelDebuggerLibraryAvailableWhenIsDebuggerActiveIsCalledThenFalseIsReturned) { DebuggerLibraryRestorer restorer; DebuggerLibrary::setLibraryAvailable(true); MockSourceLevelDebugger debugger; bool active = debugger.isDebuggerActive(); EXPECT_FALSE(active); } TEST(SourceLevelDebugger, givenKernelDebuggerLibraryActiveWhenIsDebuggerActiveIsCalledThenTrueIsReturned) { DebuggerLibraryRestorer restorer; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(true); MockSourceLevelDebugger debugger; bool active = debugger.isDebuggerActive(); EXPECT_TRUE(active); } TEST(SourceLevelDebugger, givenKernelDebuggerLibraryNotAvailableWhenIsDebuggerActiveIsCalledThenFalseIsReturned) { DebuggerLibraryRestorer restorer; DebuggerLibrary::setLibraryAvailable(false); MockSourceLevelDebugger debugger; bool active = debugger.isDebuggerActive(); EXPECT_FALSE(active); } TEST(SourceLevelDebugger, givenKernelDebuggerLibraryActiveWhenNotifySourceCodeIsCalledThenDebuggerLibraryFunctionIsCalled) { DebuggerLibraryRestorer restorer; DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(true); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); MockSourceLevelDebugger debugger; GfxDbgSourceCode argOut; char fileName[] = "filename"; argOut.sourceName = fileName; argOut.sourceNameMaxLen = sizeof(fileName); interceptor.sourceCodeArgOut = &argOut; const char source[] = "sourceCode"; string file; debugger.callBaseNotifySourceCode = true; debugger.notifySourceCode(source, sizeof(source), file); EXPECT_TRUE(interceptor.sourceCodeCalled); EXPECT_EQ(reinterpret_cast(static_cast(MockSourceLevelDebugger::mockDeviceHandle)), interceptor.sourceCodeArgIn.hDevice); EXPECT_EQ(source, interceptor.sourceCodeArgIn.sourceCode); EXPECT_EQ(sizeof(source), interceptor.sourceCodeArgIn.sourceCodeSize); EXPECT_NE(nullptr, interceptor.sourceCodeArgIn.sourceName); EXPECT_NE(0u, interceptor.sourceCodeArgIn.sourceNameMaxLen); EXPECT_STREQ(fileName, file.c_str()); } TEST(SourceLevelDebugger, givenKernelDebuggerLibraryNotActiveWhenNotifySourceCodeIsCalledThenDebuggerLibraryFunctionIsNotCalled) { DebuggerLibraryRestorer restorer; DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(false); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); MockSourceLevelDebugger debugger; debugger.setActive(false); const char source[] = "sourceCode"; string file; debugger.callBaseNotifySourceCode = true; debugger.notifySourceCode(source, sizeof(source), file); EXPECT_FALSE(interceptor.sourceCodeCalled); } TEST(SourceLevelDebugger, givenKernelDebuggerLibraryActiveWhenNotifyNewDeviceIsCalledThenDebuggerLibraryFunctionIsCalled) { DebuggerLibraryRestorer restorer; DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(true); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); MockSourceLevelDebugger debugger; debugger.callBaseNotifyNewDevice = true; debugger.notifyNewDevice(4); EXPECT_TRUE(interceptor.newDeviceCalled); EXPECT_EQ(reinterpret_cast(static_cast(4u)), interceptor.newDeviceArgIn.dh); EXPECT_EQ(4u, debugger.deviceHandle); } TEST(SourceLevelDebugger, givenKernelDebuggerLibraryNotActiveWhenNotifyNewDeviceIsCalledThenDebuggerLibraryFunctionIsNotCalled) { DebuggerLibraryRestorer restorer; DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(false); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); MockSourceLevelDebugger debugger; debugger.callBaseNotifyNewDevice = true; debugger.setActive(false); debugger.notifyNewDevice(4); EXPECT_FALSE(interceptor.newDeviceCalled); } TEST(SourceLevelDebugger, givenKernelDebuggerLibraryActiveWhenIsOptimizationDisabledIsCalledThenDebuggerLibraryFunctionIsCalled) { DebuggerLibraryRestorer restorer; DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(true); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); MockSourceLevelDebugger debugger; debugger.callBaseIsOptimizationDisabled = true; bool isOptDisabled = debugger.isOptimizationDisabled(); EXPECT_FALSE(isOptDisabled); EXPECT_TRUE(interceptor.optionCalled); EXPECT_EQ(GfxDbgOptionNames::DBG_OPTION_IS_OPTIMIZATION_DISABLED, interceptor.optionArgIn.optionName); EXPECT_NE(nullptr, interceptor.optionArgIn.value); EXPECT_LT(0u, interceptor.optionArgIn.valueLen); } TEST(SourceLevelDebugger, givenKernelDebuggerLibraryNotActiveWhenIsOptimizationDisabledIsCalledThenDebuggerLibraryFunctionIsNotCalled) { DebuggerLibraryRestorer restorer; DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); MockSourceLevelDebugger debugger; debugger.setActive(false); debugger.callBaseIsOptimizationDisabled = true; bool isOptDisabled = debugger.isOptimizationDisabled(); EXPECT_FALSE(isOptDisabled); EXPECT_FALSE(interceptor.optionCalled); } TEST(SourceLevelDebugger, givenActiveDebuggerWhenGetDebuggerOptionReturnsZeroThenIsOptimizationDisabledReturnsFalse) { DebuggerLibraryRestorer restorer; DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(true); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); char value = '1'; GfxDbgOption optionArgOut; interceptor.optionArgOut = &optionArgOut; interceptor.optionArgOut->value = &value; interceptor.optionArgOut->valueLen = sizeof(value); interceptor.optionRetVal = 0; MockSourceLevelDebugger debugger; debugger.callBaseIsOptimizationDisabled = true; bool isOptDisabled = debugger.isOptimizationDisabled(); EXPECT_FALSE(isOptDisabled); } TEST(SourceLevelDebugger, givenActiveDebuggerAndOptDisabledWhenGetDebuggerOptionReturnsNonZeroAndOneInValueThenIsOptimizationDisabledReturnsTrue) { DebuggerLibraryRestorer restorer; DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(true); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); char value[2] = {'1', 0}; GfxDbgOption optionArgOut; interceptor.optionArgOut = &optionArgOut; interceptor.optionArgOut->value = value; interceptor.optionArgOut->valueLen = sizeof(value); interceptor.optionRetVal = 1; MockSourceLevelDebugger debugger; debugger.callBaseIsOptimizationDisabled = true; bool isOptDisabled = debugger.isOptimizationDisabled(); EXPECT_TRUE(isOptDisabled); } TEST(SourceLevelDebugger, givenActiveDebuggerAndOptDisabledWhenGetDebuggerOptionReturnsNonZeroAndZeroInValueThenIsOptimizationDisabledReturnsFalse) { DebuggerLibraryRestorer restorer; DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(true); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); char value = '0'; GfxDbgOption optionArgOut; interceptor.optionArgOut = &optionArgOut; interceptor.optionArgOut->value = &value; interceptor.optionArgOut->valueLen = sizeof(value); interceptor.optionRetVal = 1; MockSourceLevelDebugger debugger; debugger.callBaseIsOptimizationDisabled = true; bool isOptDisabled = debugger.isOptimizationDisabled(); EXPECT_FALSE(isOptDisabled); } TEST(SourceLevelDebugger, givenKernelDebuggerLibraryActiveWhenNotifyKernelDebugDataIsCalledThenDebuggerLibraryFunctionIsCalled) { DebuggerLibraryRestorer restorer; DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(true); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); MockSourceLevelDebugger debugger; char isa[8]; char dbgIsa[10]; char visa[12]; KernelInfo info; info.debugData.genIsa = dbgIsa; info.debugData.vIsa = visa; info.debugData.genIsaSize = sizeof(dbgIsa); info.debugData.vIsaSize = sizeof(visa); info.kernelDescriptor.kernelMetadata.kernelName = "debugKernel"; info.heapInfo.KernelHeapSize = sizeof(isa); info.heapInfo.pKernelHeap = isa; debugger.callBaseNotifyKernelDebugData = true; debugger.notifyKernelDebugData(&info.debugData, info.kernelDescriptor.kernelMetadata.kernelName, info.heapInfo.pKernelHeap, info.heapInfo.KernelHeapSize); EXPECT_TRUE(interceptor.kernelDebugDataCalled); EXPECT_EQ(static_cast(IGFXDBG_CURRENT_VERSION), interceptor.kernelDebugDataArgIn.version); EXPECT_EQ(reinterpret_cast(static_cast(MockSourceLevelDebugger::mockDeviceHandle)), interceptor.kernelDebugDataArgIn.hDevice); EXPECT_EQ(reinterpret_cast(0), interceptor.kernelDebugDataArgIn.hProgram); EXPECT_EQ(dbgIsa, interceptor.kernelDebugDataArgIn.dbgGenIsaBuffer); EXPECT_EQ(sizeof(dbgIsa), interceptor.kernelDebugDataArgIn.dbgGenIsaSize); EXPECT_EQ(visa, interceptor.kernelDebugDataArgIn.dbgVisaBuffer); EXPECT_EQ(sizeof(visa), interceptor.kernelDebugDataArgIn.dbgVisaSize); EXPECT_EQ(info.heapInfo.KernelHeapSize, interceptor.kernelDebugDataArgIn.KernelBinSize); EXPECT_EQ(isa, interceptor.kernelDebugDataArgIn.kernelBinBuffer); EXPECT_STREQ(info.kernelDescriptor.kernelMetadata.kernelName.c_str(), interceptor.kernelDebugDataArgIn.kernelName); } TEST(SourceLevelDebugger, givenKernelDebuggerLibraryActiveWhenNullptrDebugDataIsPassedToNotifyThenDebuggerNotifiedWithNullPointersAndZeroSizes) { DebuggerLibraryRestorer restorer; DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(true); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); MockSourceLevelDebugger debugger; char isa[8]; KernelInfo info; info.kernelDescriptor.kernelMetadata.kernelName = "debugKernel"; info.heapInfo.KernelHeapSize = sizeof(isa); info.heapInfo.pKernelHeap = isa; debugger.callBaseNotifyKernelDebugData = true; debugger.notifyKernelDebugData(nullptr, info.kernelDescriptor.kernelMetadata.kernelName, info.heapInfo.pKernelHeap, info.heapInfo.KernelHeapSize); EXPECT_TRUE(interceptor.kernelDebugDataCalled); EXPECT_EQ(static_cast(IGFXDBG_CURRENT_VERSION), interceptor.kernelDebugDataArgIn.version); EXPECT_EQ(reinterpret_cast(static_cast(MockSourceLevelDebugger::mockDeviceHandle)), interceptor.kernelDebugDataArgIn.hDevice); EXPECT_EQ(reinterpret_cast(0), interceptor.kernelDebugDataArgIn.hProgram); EXPECT_EQ(nullptr, interceptor.kernelDebugDataArgIn.dbgGenIsaBuffer); EXPECT_EQ(0u, interceptor.kernelDebugDataArgIn.dbgGenIsaSize); EXPECT_EQ(nullptr, interceptor.kernelDebugDataArgIn.dbgVisaBuffer); EXPECT_EQ(0u, interceptor.kernelDebugDataArgIn.dbgVisaSize); EXPECT_EQ(info.heapInfo.KernelHeapSize, interceptor.kernelDebugDataArgIn.KernelBinSize); EXPECT_EQ(isa, interceptor.kernelDebugDataArgIn.kernelBinBuffer); EXPECT_STREQ(info.kernelDescriptor.kernelMetadata.kernelName.c_str(), interceptor.kernelDebugDataArgIn.kernelName); } TEST(SourceLevelDebugger, givenNoVisaWhenNotifyKernelDebugDataIsCalledThenDebuggerLibraryFunctionIsCalledWithIsa) { DebuggerLibraryRestorer restorer; DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(true); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); MockSourceLevelDebugger debugger; char isa[8]; char dbgIsa[10]; KernelInfo info; info.debugData.genIsa = dbgIsa; info.debugData.vIsa = nullptr; info.debugData.genIsaSize = sizeof(dbgIsa); info.debugData.vIsaSize = 0; info.kernelDescriptor.kernelMetadata.kernelName = "debugKernel"; info.heapInfo.KernelHeapSize = sizeof(isa); info.heapInfo.pKernelHeap = isa; debugger.callBaseNotifyKernelDebugData = true; debugger.notifyKernelDebugData(&info.debugData, info.kernelDescriptor.kernelMetadata.kernelName, info.heapInfo.pKernelHeap, info.heapInfo.KernelHeapSize); EXPECT_TRUE(interceptor.kernelDebugDataCalled); EXPECT_EQ(isa, interceptor.kernelDebugDataArgIn.kernelBinBuffer); } TEST(SourceLevelDebugger, givenNoGenIsaWhenNotifyKernelDebugDataIsCalledThenDebuggerLibraryFunctionIsCalledWithIsa) { DebuggerLibraryRestorer restorer; DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(true); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); MockSourceLevelDebugger debugger; char isa[8]; char visa[12]; KernelInfo info; info.debugData.genIsa = nullptr; info.debugData.vIsa = visa; info.debugData.genIsaSize = 0; info.debugData.vIsaSize = sizeof(visa); info.kernelDescriptor.kernelMetadata.kernelName = "debugKernel"; info.heapInfo.KernelHeapSize = sizeof(isa); info.heapInfo.pKernelHeap = isa; debugger.callBaseNotifyKernelDebugData = true; debugger.notifyKernelDebugData(&info.debugData, info.kernelDescriptor.kernelMetadata.kernelName, isa, sizeof(isa)); EXPECT_TRUE(interceptor.kernelDebugDataCalled); EXPECT_EQ(isa, interceptor.kernelDebugDataArgIn.kernelBinBuffer); } TEST(SourceLevelDebugger, givenKernelDebuggerLibraryNotActiveWhenNotifyKernelDebugDataIsCalledThenDebuggerLibraryFunctionIsNotCalled) { DebuggerLibraryRestorer restorer; DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(false); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); MockSourceLevelDebugger debugger; debugger.setActive(false); KernelInfo info; debugger.callBaseNotifyKernelDebugData = true; debugger.notifyKernelDebugData(&info.debugData, info.kernelDescriptor.kernelMetadata.kernelName, nullptr, 0); EXPECT_FALSE(interceptor.kernelDebugDataCalled); } TEST(SourceLevelDebugger, givenKernelDebuggerLibraryActiveWhenInitializeIsCalledWithLocalMemoryUsageFalseThenDebuggerFunctionIsCalledWithCorrectArg) { DebuggerLibraryRestorer restorer; DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(true); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); MockSourceLevelDebugger debugger; debugger.callBaseInitialize = true; debugger.initialize(false); EXPECT_TRUE(interceptor.initCalled); EXPECT_FALSE(interceptor.targetCapsArgIn.supportsLocalMemory); } TEST(SourceLevelDebugger, givenKernelDebuggerLibraryActiveWhenInitializeReturnsErrorThenIsActiveIsSetToFalse) { DebuggerLibraryRestorer restorer; DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(true); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); MockSourceLevelDebugger debugger; interceptor.initRetVal = IgfxdbgRetVal::IGFXDBG_FAILURE; debugger.callBaseInitialize = true; debugger.initialize(false); EXPECT_TRUE(interceptor.initCalled); EXPECT_FALSE(debugger.isDebuggerActive()); } TEST(SourceLevelDebugger, givenKernelDebuggerLibraryActiveWhenInitializeIsCalledWithLocalMemoryUsageTrueThenDebuggerFunctionIsCalledWithCorrectArg) { DebuggerLibraryRestorer restorer; DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(true); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); MockSourceLevelDebugger debugger; debugger.callBaseInitialize = true; debugger.initialize(true); EXPECT_TRUE(interceptor.initCalled); EXPECT_TRUE(interceptor.targetCapsArgIn.supportsLocalMemory); } TEST(SourceLevelDebugger, givenKernelDebuggerLibraryNotActiveWhenInitializeIsCalledThenDebuggerFunctionIsNotCalled) { DebuggerLibraryRestorer restorer; DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(false); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); MockSourceLevelDebugger debugger; debugger.callBaseInitialize = true; debugger.initialize(false); EXPECT_FALSE(interceptor.initCalled); } TEST(SourceLevelDebugger, givenKernelDebuggerLibraryActiveWhenDeviceIsConstructedThenDebuggerIsInitialized) { DebuggerLibraryRestorer restorer; if (defaultHwInfo->capabilityTable.debuggerSupported) { DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(true); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); EXPECT_TRUE(interceptor.initCalled); } } TEST(SourceLevelDebugger, givenKernelDebuggerLibraryActiveWhenDeviceImplIsCreatedThenDebuggerIsNotified) { DebuggerLibraryRestorer restorer; if (defaultHwInfo->capabilityTable.debuggerSupported) { DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(true); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); unique_ptr device(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); unique_ptr pClDevice(new MockClDevice{device.get()}); EXPECT_TRUE(interceptor.newDeviceCalled); uint32_t deviceHandleExpected = device->getGpgpuCommandStreamReceiver().getOSInterface() != nullptr ? device->getGpgpuCommandStreamReceiver().getOSInterface()->getDriverModel()->getDeviceHandle() : 0; EXPECT_EQ(reinterpret_cast(static_cast(deviceHandleExpected)), interceptor.newDeviceArgIn.dh); pClDevice.reset(); device.release(); } } TEST(SourceLevelDebugger, givenKernelDebuggerLibraryActiveWhenDeviceImplIsCreatedWithOsCsrThenDebuggerIsNotifiedWithCorrectDeviceHandle) { DebuggerLibraryRestorer restorer; if (defaultHwInfo->capabilityTable.debuggerSupported) { DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(true); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); VariableBackup backup(&ultHwConfig); ultHwConfig.useHwCsr = true; HardwareInfo *hwInfo = nullptr; ExecutionEnvironment *executionEnvironment = getExecutionEnvironmentImpl(hwInfo, 1); hwInfo->capabilityTable.instrumentationEnabled = true; unique_ptr device(Device::create(executionEnvironment, 0)); unique_ptr pClDevice(new MockClDevice{device.get()}); ASSERT_NE(nullptr, device->getGpgpuCommandStreamReceiver().getOSInterface()); EXPECT_TRUE(interceptor.newDeviceCalled); uint32_t deviceHandleExpected = device->getGpgpuCommandStreamReceiver().getOSInterface()->getDriverModel()->getDeviceHandle(); EXPECT_EQ(reinterpret_cast(static_cast(deviceHandleExpected)), interceptor.newDeviceArgIn.dh); device.release(); } } TEST(SourceLevelDebugger, givenKernelDebuggerLibraryNotActiveWhenDeviceIsCreatedThenDebuggerIsNotCreatedInitializedAndNotNotified) { DebuggerLibraryRestorer restorer; DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(false); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); EXPECT_EQ(nullptr, device->getDebugger()); EXPECT_FALSE(interceptor.initCalled); EXPECT_FALSE(interceptor.newDeviceCalled); } TEST(SourceLevelDebugger, givenKernelDebuggerLibraryNotActiveWhenGettingSourceLevelDebuggerThenNullptrIsReturned) { DebuggerLibraryRestorer restorer; DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(false); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); EXPECT_EQ(nullptr, device->getSourceLevelDebugger()); } TEST(SourceLevelDebugger, givenDeviceWithDebuggerActiveSetWhenSourceLevelDebuggerIsNotCreatedThenNotificationsAreNotCalled) { DebuggerLibraryRestorer restorer; DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(false); DebuggerLibrary::setDebuggerActive(false); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); EXPECT_TRUE(device->isDebuggerActive()); EXPECT_EQ(nullptr, device->getDebugger()); EXPECT_FALSE(interceptor.newDeviceCalled); EXPECT_FALSE(interceptor.deviceDestructionCalled); } TEST(SourceLevelDebugger, givenTwoRootDevicesWhenSecondIsCreatedThenCreatingNewSourceLevelDebugger) { DebuggerLibraryRestorer restorer; if (defaultHwInfo->capabilityTable.debuggerSupported) { DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(true); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(2); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(defaultHwInfo.get()); } auto device1 = std::make_unique(Device::create(executionEnvironment, 0u)); EXPECT_NE(nullptr, executionEnvironment->memoryManager); EXPECT_TRUE(interceptor.initCalled); interceptor.initCalled = false; auto device2 = std::make_unique(Device::create(executionEnvironment, 1u)); EXPECT_NE(nullptr, executionEnvironment->memoryManager); EXPECT_TRUE(interceptor.initCalled); } } TEST(SourceLevelDebugger, givenMultipleRootDevicesWhenCreatedThenUseDedicatedSourceLevelDebugger) { DebuggerLibraryRestorer restorer; if (defaultHwInfo->capabilityTable.debuggerSupported) { DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(true); ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(2); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(defaultHwInfo.get()); } auto device1 = std::make_unique(Device::create(executionEnvironment, 0u)); auto sourceLevelDebugger = device1->getDebugger(); auto device2 = std::make_unique(Device::create(executionEnvironment, 1u)); EXPECT_NE(sourceLevelDebugger, device2->getDebugger()); } } TEST(SourceLevelDebugger, whenCaptureSBACalledThenNoCommandsAreAddedToStream) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); auto device = std::unique_ptr(Device::create(executionEnvironment, 0u)); MockSourceLevelDebugger debugger; CommandContainer container; container.initialize(device.get(), nullptr, true); NEO::Debugger::SbaAddresses sbaAddresses = {}; debugger.captureStateBaseAddress(container, sbaAddresses); EXPECT_EQ(0u, container.getCommandStream()->getUsed()); } TEST(SourceLevelDebugger, givenEnableMockSourceLevelDebuggerWhenInitializingExecEnvThenActiveDebuggerWithEmptyInterfaceIsCreated) { if (!defaultHwInfo->capabilityTable.debuggerSupported) { GTEST_SKIP_("Source Level Debugger not supported"); } DebugManagerStateRestore stateRestore; DebuggerLibraryRestorer restorer; DebuggerLibrary::setLibraryAvailable(false); DebugManager.flags.EnableMockSourceLevelDebugger.set(1); auto executionEnvironment = new ExecutionEnvironment(); MockPlatform platform(*executionEnvironment); platform.initializeWithNewDevices(); auto debugger = static_cast(executionEnvironment->rootDeviceEnvironments[0]->debugger.get()); ASSERT_NE(nullptr, debugger); EXPECT_TRUE(debugger->isDebuggerActive()); EXPECT_FALSE(debugger->initialize(false)); debugger->notifyNewDevice(4); EXPECT_TRUE(debugger->isOptimizationDisabled()); const char source[] = "sourceCode"; string file; debugger->notifySourceCode(source, sizeof(source), file); char isa[8]; char dbgIsa[10]; char visa[12]; KernelInfo info; info.debugData.genIsa = dbgIsa; info.debugData.vIsa = visa; info.debugData.genIsaSize = sizeof(dbgIsa); info.debugData.vIsaSize = sizeof(visa); info.kernelDescriptor.kernelMetadata.kernelName = "debugKernel"; info.heapInfo.KernelHeapSize = sizeof(isa); info.heapInfo.pKernelHeap = isa; debugger->notifyKernelDebugData(&info.debugData, info.kernelDescriptor.kernelMetadata.kernelName, info.heapInfo.pKernelHeap, info.heapInfo.KernelHeapSize); debugger->notifyKernelDebugData(nullptr, info.kernelDescriptor.kernelMetadata.kernelName, info.heapInfo.pKernelHeap, info.heapInfo.KernelHeapSize); debugger->notifyKernelDebugData(nullptr, info.kernelDescriptor.kernelMetadata.kernelName, nullptr, 0); EXPECT_TRUE(debugger->notifyDeviceDestruction()); } TEST(SourceLevelDebugger, givenMode1InEnableMockSourceLevelDebuggerWhenDebuggerCreatedThenIsOptimizationDisabledReturnsTrue) { if (!defaultHwInfo->capabilityTable.debuggerSupported) { GTEST_SKIP_("Source Level Debugger not supported"); } DebugManagerStateRestore stateRestore; DebuggerLibraryRestorer restorer; DebuggerLibrary::setLibraryAvailable(false); DebugManager.flags.EnableMockSourceLevelDebugger.set(1); auto sld = std::unique_ptr(SourceLevelDebugger::create()); EXPECT_TRUE(sld->isOptimizationDisabled()); } TEST(SourceLevelDebugger, givenMode2InEnableMockSourceLevelDebuggerWhenDebuggerCreatedThenIsOptimizationDisabledReturnsFalse) { if (!defaultHwInfo->capabilityTable.debuggerSupported) { GTEST_SKIP_("Source Level Debugger not supported"); } DebugManagerStateRestore stateRestore; DebuggerLibraryRestorer restorer; DebuggerLibrary::setLibraryAvailable(false); DebugManager.flags.EnableMockSourceLevelDebugger.set(2); auto sld = std::unique_ptr(SourceLevelDebugger::create()); EXPECT_FALSE(sld->isOptimizationDisabled()); } TEST(SourceLevelDebugger, givenDebugVarDumpElfWhenNotifyKernelDebugDataIsCalledThenElfFileIsCreated) { DebugManagerStateRestore stateRestore; DebugManager.flags.DebuggerLogBitmask.set(NEO::DebugVariables::DEBUGGER_LOG_BITMASK::DUMP_ELF); DebuggerLibraryRestorer restorer; DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(true); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); MockSourceLevelDebugger debugger; char isa[8]; char dbgIsa[10]; char visa[12]; KernelInfo info; info.debugData.genIsa = dbgIsa; info.debugData.vIsa = visa; info.debugData.genIsaSize = sizeof(dbgIsa); info.debugData.vIsaSize = sizeof(visa); info.kernelDescriptor.kernelMetadata.kernelName = "debugKernel"; info.heapInfo.KernelHeapSize = sizeof(isa); info.heapInfo.pKernelHeap = isa; std::string fileName = info.kernelDescriptor.kernelMetadata.kernelName + ".elf"; EXPECT_FALSE(fileExists(fileName)); debugger.callBaseNotifyKernelDebugData = true; debugger.notifyKernelDebugData(&info.debugData, info.kernelDescriptor.kernelMetadata.kernelName, info.heapInfo.pKernelHeap, info.heapInfo.KernelHeapSize); EXPECT_TRUE(fileExists(fileName)); std::remove(fileName.c_str()); } TEST(SourceLevelDebugger, givenDebugVarDumpElfWhenElfFileExistsWhileNotifyingDebugDataThenSuffixIsAppendedToFileName) { DebugManagerStateRestore stateRestore; DebugManager.flags.DebuggerLogBitmask.set(NEO::DebugVariables::DEBUGGER_LOG_BITMASK::DUMP_ELF); DebuggerLibraryRestorer restorer; DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(true); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); MockSourceLevelDebugger debugger; char isa[8]; char dbgIsa[10]; char visa[12]; KernelInfo info; info.debugData.genIsa = dbgIsa; info.debugData.vIsa = visa; info.debugData.genIsaSize = sizeof(dbgIsa); info.debugData.vIsaSize = sizeof(visa); info.kernelDescriptor.kernelMetadata.kernelName = "debugKernel"; info.heapInfo.KernelHeapSize = sizeof(isa); info.heapInfo.pKernelHeap = isa; std::string fileName = info.kernelDescriptor.kernelMetadata.kernelName + ".elf"; char data[4]; writeDataToFile(fileName.c_str(), data, 4); EXPECT_TRUE(fileExists(fileName)); std::string fileName2 = info.kernelDescriptor.kernelMetadata.kernelName + "_0.elf"; debugger.callBaseNotifyKernelDebugData = true; debugger.notifyKernelDebugData(&info.debugData, info.kernelDescriptor.kernelMetadata.kernelName, info.heapInfo.pKernelHeap, info.heapInfo.KernelHeapSize); EXPECT_TRUE(fileExists(fileName2)); std::remove(fileName.c_str()); std::remove(fileName2.c_str()); } TEST(SourceLevelDebugger, givenDebuggerLibraryAvailableAndExperimentalEnableSourceLevelDebuggerThenDebuggerIsCreated) { DebugManagerStateRestore stateRestore; DebuggerLibraryRestorer restorer; DebuggerLibrary::setDebuggerActive(true); DebuggerLibrary::setLibraryAvailable(true); DebugManager.flags.ExperimentalEnableSourceLevelDebugger.set(1); auto hwInfo = *defaultHwInfo; auto debugger = std::unique_ptr(Debugger::create(&hwInfo)); ASSERT_NE(nullptr, debugger.get()); EXPECT_TRUE(debugger->isLegacy()); } using LegacyDebuggerTest = ::testing::Test; using NotXeHPOrDG2 = AreNotGfxCores; HWTEST2_F(LegacyDebuggerTest, givenNotAtsOrDg2AndDebugIsActiveThenDisableL3CacheInGmmHelperIsNotSet, NotXeHPOrDG2) { DebugManagerStateRestore stateRestore; DebugManager.flags.EnableMockSourceLevelDebugger.set(1); auto executionEnvironment = new ExecutionEnvironment(); MockPlatform platform(*executionEnvironment); platform.initializeWithNewDevices(); EXPECT_FALSE(static_cast(platform.getClDevice(0)->getDevice().getGmmHelper())->allResourcesUncached); } using ATSOrDG2 = IsWithinGfxCore; HWTEST2_F(LegacyDebuggerTest, givenAtsOrDg2AndDebugIsActiveThenDisableL3CacheInGmmHelperIsSet, ATSOrDG2) { DebugManagerStateRestore stateRestore; DebugManager.flags.EnableMockSourceLevelDebugger.set(1); auto executionEnvironment = new ExecutionEnvironment(); MockPlatform platform(*executionEnvironment); platform.initializeWithNewDevices(); EXPECT_TRUE(static_cast(platform.getClDevice(0)->getDevice().getGmmHelper())->allResourcesUncached); } compute-runtime-22.14.22890/opencl/test/unit_test/test_dynamic_lib/000077500000000000000000000000001422164147700251405ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/test_dynamic_lib/CMakeLists.txt000066400000000000000000000007101422164147700276760ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # project(test_dynamic_lib) if(WIN32) set(OS_SUFFIX win) else() set(OS_SUFFIX lin) endif() add_library(test_dynamic_lib SHARED test_dynamic_lib_${OS_SUFFIX}.cpp) create_project_source_tree(test_dynamic_lib) set_target_properties(test_dynamic_lib PROPERTIES FOLDER "test mocks") set_property(TARGET test_dynamic_lib APPEND_STRING PROPERTY COMPILE_FLAGS ${ASAN_FLAGS}) compute-runtime-22.14.22890/opencl/test/unit_test/test_dynamic_lib/test_dynamic_lib_lin.cpp000066400000000000000000000002571422164147700320230ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ extern "C" __attribute__((visibility("default"))) void testDynamicLibraryFunc() { } compute-runtime-22.14.22890/opencl/test/unit_test/test_dynamic_lib/test_dynamic_lib_win.cpp000066400000000000000000000002361422164147700320330ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ extern "C" __declspec(dllexport) void testDynamicLibraryFunc() { } compute-runtime-22.14.22890/opencl/test/unit_test/test_files/000077500000000000000000000000001422164147700237705ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/test_files/CopyBuffer_simd16.cl000066400000000000000000000004441422164147700275410ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ __attribute__((intel_reqd_sub_group_size(16))) __kernel void CopyBuffer( __global unsigned int* src, __global unsigned int* dst ) { int id = (int)get_global_id(0); dst[id] = src[id]; } compute-runtime-22.14.22890/opencl/test/unit_test/test_files/CopyBuffer_simd8.cl000066400000000000000000000004431422164147700274610ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ __attribute__((intel_reqd_sub_group_size(8))) __kernel void CopyBuffer( __global unsigned int* src, __global unsigned int* dst ) { int id = (int)get_global_id(0); dst[id] = src[id]; } compute-runtime-22.14.22890/opencl/test/unit_test/test_files/binary_with_zeroes000066400000000000000000000001001422164147700276100ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/test_files/copy_buffer_to_image.cl000066400000000000000000000013151422164147700304570ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable __kernel void CopyBufferToImage3d(__global uchar *src, __write_only image3d_t output, int srcOffset, int4 dstOffset, uint2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); int4 dstCoord = (int4)(x, y, z, 0) + dstOffset; uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y); write_imageui(output, dstCoord, (uint4)(*(src + LOffset + x), 0, 0, 1)); }compute-runtime-22.14.22890/opencl/test/unit_test/test_files/copybuffer.cl000066400000000000000000000004211422164147700264510ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ __kernel void CopyBuffer(__global unsigned int *src, __global unsigned int *dst) { int id = (int)get_global_id(0); dst[id] = lgamma((float)src[id]); dst[id] = src[id]; } compute-runtime-22.14.22890/opencl/test/unit_test/test_files/copybuffer_with_header.cl000066400000000000000000000004171422164147700310210ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "simple_header.h" __kernel void CopyBuffer( __global unsigned int *src, __global unsigned int *dst) { int id = (int)get_global_id(0); dst[id] = src[id]; } compute-runtime-22.14.22890/opencl/test/unit_test/test_files/emptykernel.cl000066400000000000000000000002341422164147700266460ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ /* No character after the last "}" */ __kernel void empty() { }compute-runtime-22.14.22890/opencl/test/unit_test/test_files/igdrcl.config000066400000000000000000000317261422164147700264340ustar00rootroot00000000000000TbxServer = 127.0.0.1 ProductFamilyOverride = unk HardwareInfoOverride = default ForceCompilerUsePlatform = unk AUBDumpBufferFormat = unk AUBDumpImageFormat = unk AUBDumpCaptureFileName = unk AUBDumpFilterKernelName = unk AUBDumpToggleFileName = unk OverrideGdiPath = unk AubDumpAddMmioRegistersList = unk ZE_AFFINITY_MASK = default ZE_ENABLE_PCI_ID_DEVICE_ORDER = 0 AUBDumpFilterNamedKernelStartIdx = 0 AUBDumpFilterNamedKernelEndIdx = -1 AUBDumpSubCaptureMode = 0 AUBDumpFilterKernelStartIdx = 0 AUBDumpFilterKernelEndIdx = -1 AUBDumpToggleCaptureOnOff = 0 AubDumpOverrideMmioRegister = 0 AubDumpOverrideMmioRegisterValue = 0 SetCommandStreamReceiver = -1 TbxPort = 4321 TbxFrontdoorMode = 0 FlattenBatchBufferForAUBDump = 0 AddPatchInfoCommentsForAUBDump = 0 UseAubStream = 1 AUBDumpAllocsOnEnqueueReadOnly = 0 AUBDumpAllocsOnEnqueueSVMMemcpyOnly = 0 AUBDumpForceAllToLocalMemory = 0 GenerateAubFilePerProcessId = 0 EnableSWTags = 0 DumpSWTagsBXML = 0 ForceDeviceId = unk FilterDeviceId = unk FilterBdfPath = unk LoadBinarySipFromFile = unk InjectInternalBuildOptions = unk OverrideCsrAllocationSize = -1 ForceL1Caching = -1 UseKmdMigration = -1 EnableExperimentalCommandBuffer = 0 OverrideStatelessMocsIndex = -1 OverrideMocsIndexForScratchSpace = -1 CFEFusedEUDispatch = -1 ForceAuxTranslationMode = -1 OverrideGpuAddressSpace = -1 OverrideMaxWorkgroupSize = -1 EnableFlushTaskSubmission = -1 DoCpuCopyOnReadBuffer = -1 DoCpuCopyOnWriteBuffer = -1 PauseOnEnqueue = -1 EnableDebugBreak = 1 FlushAllCaches = 0 MakeEachEnqueueBlocking = 0 DisableResourceRecycling = 0 TrackParentEvents = 0 RebuildPrecompiledKernels = 0 LoopAtDriverInit = 0 DoNotRegisterTrimCallback = 0 OverrideInvalidEngineWithDefault = 0 EnableKernelTunning = -1 ForceAuxTranslationEnabled = -1 DisableTimestampPacketOptimizations = 0 DisableCachingForStatefulBufferAccess = 0 PrintDebugSettings = 0 PrintDebugMessages = 0 DumpKernels = 0 DumpKernelArgs = 0 LogApiCalls = 0 LogPatchTokens = 0 LogTaskCounts = 0 LogAlignedAllocations = 0 LogAllocationMemoryPool = 0 LogMemoryObject = 0 ResidencyDebugEnable = 0 EventsDebugEnable = 0 EventsTrackerEnable = 0 PrintLWSSizes = 0 PrintDispatchParameters = 0 PrintProgramBinaryProcessingTime = 0 PrintRelocations = 0 PrintTimestampPacketContents = 0 WddmResidencyLogger = 0 PrintBOCreateDestroyResult = 0 PrintBOBindingResult = 0 PrintDriverDiagnostics = -1 PrintDeviceAndEngineIdOnSubmission = 0 EnableDirectSubmission = -1 DirectSubmissionBufferPlacement = -1 DirectSubmissionSemaphorePlacement = -1 DirectSubmissionBufferAddressing = -1 DirectSubmissionSemaphoreAddressing = -1 DirectSubmissionDisableCpuCacheFlush = -1 DirectSubmissionEnableDebugBuffer = 0 DirectSubmissionDiagnosticExecutionCount = 30 DirectSubmissionNewResourceTlbFlush = -1 DirectSubmissionDisableCacheFlush = -1 DirectSubmissionDisableMonitorFence = -1 DirectSubmissionPrintBuffers = 0 USMEvictAfterMigration = 0 EnableDirectSubmissionController = -1 DirectSubmissionControllerTimeout = -1 UseVmBind = -1 PassBoundBOToExec = -1 EnableNullHardware = 0 ForceLinearImages = 0 ForceSLML3Config = 0 Force32bitAddressing = 0 ForceCsrFlushing = 0 ForceCsrReprogramming = 0 OmitTimestampPacketDependencies = 0 DisableStatelessToStatefulOptimization = 0 DisableConcurrentBlockExecution = 0 UseNoRingFlushesKmdMode = 1 DisableZeroCopyForUseHostPtr = 0 DisableZeroCopyForBuffers = 0 DisableDcFlushInEpilogue = 0 EnableBOMmapCreate = -1 EnableHostPtrTracking = -1 EnableNV12 = 1 EnablePackedYuv = 1 EnableDeferredDeleter = 1 EnableAsyncDestroyAllocations = 1 EnableAsyncEventsHandler = 1 EnableForcePin = 1 EnableGemCloseWorker = -1 EnableHostPtrValidation = -1 EnableComputeWorkSizeND = 1 EnableMultiRootDeviceContexts = 1 EnableComputeWorkSizeSquared = 0 EnableVaLibCalls = -1 EnableExtendedVaFormats = 0 AddClGlSharing = -1 EnableFormatQuery = 1 EnableFreeMemory = 0 ForceSamplerLowFilteringPrecision = 0 MakeAllBuffersResident = 0 EnableIntelVme = -1 EnableIntelAdvancedVme = -1 EnableBlitterOperationsSupport = -1 EnableBlitterForEnqueueOperations = -1 EnableBlitterForEnqueueImageOperations = -1 EnableCacheFlushAfterWalker = -1 EnableLocalMemory = -1 EnableStatelessToStatefulBufferOffsetOpt = -1 CreateMultipleRootDevices = 0 CreateMultipleSubDevices = 0 LimitAmountOfReturnedDevices = 0 Enable64kbpages = -1 OverrideEnableKmdNotify = -1 OverrideKmdNotifyDelayMicroseconds = -1 OverrideEnableQuickKmdSleep = -1 OverrideQuickKmdSleepDelayMicroseconds = -1 OverrideEnableQuickKmdSleepForSporadicWaits = -1 OverrideDelayQuickKmdSleepForSporadicWaitsMicroseconds = -1 OverrideEnableQuickKmdSleepForDirectSubmission = -1 OverrideDelayQuickKmdSleepForDirectSubmissionMicroseconds = -1 PowerSavingMode = 0 CsrDispatchMode = 0 OverrideDefaultFP64Settings = -1 RenderCompressedImagesEnabled = -1 RenderCompressedBuffersEnabled = -1 EnableSharedSystemUsmSupport = -1 EnablePassInlineData = -1 ForceFineGrainedSVMSupport = -1 ForcePipeSupport = -1 ForceSystemMemoryPlacement = 0 ForceNonSystemMemoryPlacement = 0 DisableIndirectAccess = -1 ForceOCLVersion = 0 ForceOCL21FeaturesSupport = -1 ForcePreemptionMode = -1 UsmInitialPlacement = -1 ForceKernelPreemptionMode = -1 NodeOrdinal = -1 OverrideThreadArbitrationPolicy = -1 OverrideAubDeviceId = -1 EnableTimestampPacket = -1 AllocateSharedAllocationsWithCpuAndGpuStorage = -1 UseMaxSimdSizeToDeduceMaxWorkgroupSize = 0 ReturnRawGpuTimestamps = 0 EnableDeviceBasedTimestamps = 0 MaxHwThreadsPercent = 0 MinHwThreadsUnoccupied = 0 LimitBlitterMaxWidth = -1 LimitBlitterMaxHeight = -1 PostBlitCommand = -1 UseCommandBufferHeaderSizeForWddmQueueSubmission = 1 OverridePreemptionSurfaceSizeInMb = -1 OverrideLeastOccupiedBank = -1 UseAsyncDrmExec = -1 EnableMultiStorageResources = -1 MultiStorageGranularity = -1 MultiStoragePolicy = -1; PrintExecutionBuffer = 0 PrintBOsForSubmit = 0 PauseOnBlitCopy = -1 ForceImplicitFlush = 0 ForcePipeControlPriorToWalker = 0 OverrideRevision = -1 ForceCacheFlushForBcs = -1 ForceGpgpuSubmissionForBcsEnqueue = -1 ForceSemaphoreDelayBetweenWaits = -1 ForceLocalMemoryAccessMode = -1 ZebinAppendElws = 0 ZebinIgnoreIcbeVersion = 0 LogWaitingForCompletion = 0 ForceUserptrAlignment = -1 ForceCommandBufferAlignment = -1 ForceDefaultHeapSize = -1 UseExternalAllocatorForSshAndDsh = 0 DirectSubmissionDrmContext = -1 DirectSubmissionOverrideBlitterSupport = -1 DirectSubmissionOverrideRenderSupport = -1 DirectSubmissionOverrideComputeSupport = -1 EnableUsmCompression = -1 PerformImplicitFlushEveryEnqueueCount = -1 PerformImplicitFlushForNewResource = -1 PerformImplicitFlushForIdleGpu = -1 ProvideVerboseImplicitFlush = false PauseOnGpuMode = -1 PrintTagAllocationAddress = 0 DoNotFlushCaches = false UseBindlessMode = -1 MediaVfeStateMaxSubSlices = -1 PrintBlitDispatchDetails = 0 EnableMockSourceLevelDebugger = 0 EnableHostPointerImport = -1 EnableHostUsmSupport = -1 ForceBtpPrefetchMode = -1 OverrideProfilingTimerResolution = -1 PrintIoctlTimes = 0 PrintIoctlEntries = 0 PrintUmdSharedMigration = 0 UpdateTaskCountFromWait = -1 EnableTimestampWait = -1 PreferCopyEngineForCopyBufferToBuffer = -1 EnableStaticPartitioning = -1 DisableDeepBind = 0 GpuScratchRegWriteAfterWalker = -1 GpuScratchRegWriteRegisterData = 0 GpuScratchRegWriteRegisterOffset = 0 UseBindlessDebugSip = 0 OverrideTimestampEvents= -1 OverrideSlmAllocationSize = -1 OverrideSlmSize = -1 UseCyclesPerSecondTimer = 0 PrintOsContextInitializations = 0 WaitLoopCount = -1 DebuggerLogBitmask = 0 GTPinAllocateBufferInSharedMemory = -1 DeferOsContextInitialization = -1 DebuggerOptDisable = -1 DebuggerForceSbaTrackingMode = -1 ExperimentalEnableCustomLocalMemoryAlignment = 0 AlignLocalMemoryVaTo2MB = -1 EngineInstancedSubDevices = 0 OverrideTimestampPacketSize = -1 CFEComputeOverdispatchDisable = -1 CFEWeightedDispatchModeDisable = -1 CFESingleSliceDispatchCCSMode = -1 CleanStateInPreamble = 0 CFENumberOfWalkers = -1 CFEMaximumNumberOfThreads = -1 CFEOverDispatchControl = -1 CFELargeGRFThreadAdjustDisable = -1 SynchronizeWalkerInWparidMode = -1 EnableWalkerPartition = -1 OverrideNumComputeUnitsForScratch = -1 ForceThreadGroupDispatchSize = -1 ForceStatelessL1CachingPolicy = -1 ForceMemoryBankIndexOverride = -1 SynchronizeWithSemaphores = -1 UseCrossAtomicSynchronization = -1 EnableStatelessCompression = -1 EnableMultiTileCompression = -1 EnablePrivateScratchSlot1 = -1 DisablePipeControlPrecedingPostSyncCommand = -1 UseClearColorAllocationForBlitter = false OverrideMultiStoragePlacement = -1 MultiTileIsaPlacement = -1 FormatForStatelessCompressionWithUnifiedMemory = 0xF ForceMultiGpuPartialWritesInComputeMode = -1 ForceMultiGpuPartialWrites = -1 ForceMultiGpuAtomicsInComputeMode = -1 ForceMultiGpuAtomics = -1 ForceBufferCompressionFormat = -1 ExperimentalSetWalkerPartitionCount = 0 EnableStatelessCompressionWithUnifiedMemory = 0 EnableMultiGpuAtomicsOptimization = 1 EnableHwGenerationLocalIds = -1 WalkerPartitionPreferHighestDimension = -1 SetMinimalPartitionSize = -1 OverrideBlitterTargetMemory = -1 OverrideBlitterMocs = -1 GlobalSequencerFlushOnCopyEngine = false ForceCompressionDisabledForCompressedBlitCopies = -1 ExperimentalSetWalkerPartitionType = -1 UseImmDataWriteModeOnPostSyncOperation = 0 OverridePostSyncMocs = -1 EnableImmediateVmBindExt = -1 ForceExecutionTile = -1 DisableCachingForHeaps = 0 OverrideTimestampPacketSize = -1 ClDeviceGlobalMemSizeAvailablePercent = -1 DebugApiUsed = 0 ForceHostPointerImport = -1 OverrideMaxWorkGroupCount = -1 UseUmKmDataTranslator = 0 EnableUserFenceForCompletionWait = -1 EnableUserFenceUseCtxId = -1 EnableResourceTags = 0 SetKmdWaitTimeout = -1 OverrideNotifyEnableForTagUpdatePostSync = -1 OverrideUseKmdWaitFunction = -1 EnableCacheFlushAfterWalkerForAllQueues = -1 Force32BitDriverSupport = -1 EnableCmdQRoundRobindEngineAssign = -1 CmdQRoundRobindEngineAssignBitfield = -1 CmdQRoundRobindEngineAssignNTo1 = -1 EnableCmdQRoundRobindBcsEngineAssign = -1 EnableCmdQRoundRobindBcsEngineAssignLimit = -1 EnableCmdQRoundRobindBcsEngineAssignStartingValue = -1 ForceBCSForInternalCopyEngine = -1 OverrideCmdQueueSynchronousMode = -1 UseAtomicsForSelfCleanupSection = -1 HBMSizePerTileInGigabytes = 0 OverrideSystolicPipelineSelect = -1 OverrideSystolicInComputeWalker = -1 OverrideKernelSizeLimitForSmallDispatch = -1 SkipFlushingEventsOnGetStatusCalls = 0 AllowUnrestrictedSize = 0 ForceDefaultThreadArbitrationPolicyIfNotSpecified = 0 DoNotFreeResources = 0 OverrideGmmResourceUsageField = -1 LogAllocationType = 0 LogAllocationStdout = 0 ProgramExtendedPipeControlPriorToNonPipelinedStateCommand = -1 ProgramWalkerPartitionSelfCleanup = -1 WparidRegisterProgramming = -1 UsePipeControlAfterPartitionedWalker = -1 OverrideBufferSuitableForRenderCompression = -1 AllowMixingRegularAndCooperativeKernels = 0 AllowPatchingVfeStateInCommandLists = 0 PrintMemoryRegionSizes = 0 OverrideDrmRegion = -1 AllowSingleTileEngineInstancedSubDevices = 0 BinaryCacheTrace = false OverrideL1CacheControlInSurfaceState = -1 OverrideL1CacheControlInSurfaceStateForScratchSpace = -1 OverridePreferredSlmAllocationSizePerDss = -1 ForceL3PrefetchForComputeWalker = -1 ForceZPassAsyncComputeThreadLimit = -1 ForcePixelAsyncComputeThreadLimit = -1 ToggleHintKernelDisableCompression = -1 EnableImplicitScaling = -1 DecompressInL3ForImage2dFromBuffer = -1 CFEComputeDispatchAllWalkerEnable = -1 ComputeDispatchAllWalkerEnableInComputeWalker = -1 EnableMemoryPrefetch = -1 ForceCsStallForStatePrefetch = -1 ProgramGlobalFenceAsMiMemFenceCommandInCommandStream = -1 ProgramGlobalFenceAsPostSyncOperationInComputeWalker = -1 ProgramGlobalFenceAsKernelInstructionInEUKernel = -1 DoNotReportTile1BscWaActive = -1 ForceTile0PlacementForTile1ResourcesWaActive = -1 ClosEnabled = -1 ClosNumCacheWays = -1 EngineUsageHint = -1 AddStatePrefetchCmdToMemoryPrefetchAPI = -1 UpdateCrossThreadDataSize = 0 ForceBcsEngineIndex = -1 ResolveDependenciesViaPipeControls = -1 EnableDrmCompletionFence = -1 UseDrmCompletionFenceForAllAllocations = -1 ExperimentalEnableSourceLevelDebugger = 0 Force2dImageAsArray = -1 ForceExtendedBufferSize = -1 ForceExtendedUSMBufferSize = -1 MakeIndirectAllocationsResidentAsPack = -1 MakeEachAllocationResident = -1 AssignBCSAtEnqueue = -1 ReuseKernelBinaries = -1 EnableChipsetUniqueUUID = -1 ForceSimdMessageSizeInWalker = -1 UseNewQueryTopoIoctl = 1 DisableGpuHangDetection = 0 EnableRecoverablePageFaults = -1 EnableImplicitMigrationOnFaultableHardware = -1 UseDrmVirtualEnginesForCcs = -1 UseDrmVirtualEnginesForBcs = -1 LimitEngineCountForVirtualBcs = -1 LimitEngineCountForVirtualCcs = -1 ForceRunAloneContext = -1 AppendMemoryPrefetchForKmdMigratedSharedAllocations = -1 CreateContextWithAccessCounters = -1 AccessCountersTrigger = -1 AccessCountersGranularity = -1 OverridePatIndex = -1 UseTileMemoryBankInVirtualMemoryCreation = -1 DisableScratchPages = 0 ForceAllResourcesUncached = 0 ForcePreParserEnabledForMiArbCheck = -1 BatchBufferStartPrepatchingWaEnabled = -1 SetVmAdviseAtomicAttribute = -1 DirectSubmissionForceLocalMemoryStorageMode = -1 EnableRingSwitchTagUpdateWa = -1 DirectSubmissionReadBackCommandBuffer = -1 DirectSubmissionReadBackRingBuffer = -1 ReadBackCommandBufferAllocation = -1 PrintImageBlitBlockCopyCmdDetails = 0 DirectSubmissionInsertExtraMiMemFenceCommands = -1 DirectSubmissionInsertSfenceInstructionPriorToSubmission = -1 compute-runtime-22.14.22890/opencl/test/unit_test/test_files/igdrcl_string.config000066400000000000000000000001011422164147700300010ustar00rootroot00000000000000StringTestKey = TestValue IntTestKey = 123 IntTestKeyHex = 0xABCDcompute-runtime-22.14.22890/opencl/test/unit_test/test_files/indirect_access_kernel.cl000066400000000000000000000004221422164147700307700ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ __kernel void testIndirect(__global long* buf) { size_t gid = get_global_id(0); if (gid == 0) { __global char* val = (__global char*)buf[0]; *val = 1; } } indirect_access_kernel_internal_options.txt000066400000000000000000000002011422164147700345740ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/test_files/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ -cl-intel-greater-than-4GB-buffer-required compute-runtime-22.14.22890/opencl/test/unit_test/test_files/kernel_data_param.cl000066400000000000000000000005461422164147700277460ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ __kernel void test(__global int *dst) { int tid = get_global_id(0); int n = get_global_size(0); dst[tid] = n; }; __kernel void test_get_local_size(__global int *dst) { int tid = get_global_id(0); int n = get_local_size(0); dst[tid] = n; }; compute-runtime-22.14.22890/opencl/test/unit_test/test_files/kernel_for_specific_device.skl000066400000000000000000000000611422164147700320120ustar00rootroot00000000000000__kernel void simple_device_specific_kernel() { }compute-runtime-22.14.22890/opencl/test/unit_test/test_files/kernel_for_specific_device_options.txt000066400000000000000000000001461422164147700336170ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ -cl-opt-disable compute-runtime-22.14.22890/opencl/test/unit_test/test_files/kernel_num_args.cl000066400000000000000000000003111422164147700274560ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ __kernel void test(__global float *argGlobal, __read_only image3d_t argImg3D, __constant float *argConst) { } compute-runtime-22.14.22890/opencl/test/unit_test/test_files/linux/000077500000000000000000000000001422164147700251275ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/test_files/linux/devices/000077500000000000000000000000001422164147700265515ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/test_files/linux/devices/device/000077500000000000000000000000001422164147700300105ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/test_files/linux/devices/device/drm/000077500000000000000000000000001422164147700305725ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/test_files/linux/devices/device/drm/card1/000077500000000000000000000000001422164147700315645ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/test_files/linux/devices/device/drm/card1/gt/000077500000000000000000000000001422164147700321765ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/test_files/linux/devices/device/drm/card1/gt/gt0/000077500000000000000000000000001422164147700326705ustar00rootroot00000000000000rps_max_freq_mhz000066400000000000000000000000041422164147700360720ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/test_files/linux/devices/device/drm/card1/gt/gt03000compute-runtime-22.14.22890/opencl/test/unit_test/test_files/linux/devices/device/drm/card1/gt/gt1/000077500000000000000000000000001422164147700326715ustar00rootroot00000000000000rps_max_freq_mhz000066400000000000000000000000041422164147700360730ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/test_files/linux/devices/device/drm/card1/gt/gt14000gt_max_freq_mhz000066400000000000000000000000041422164147700345740ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/test_files/linux/devices/device/drm/card12000prelim_uapi_version000066400000000000000000000000041422164147700354750ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/test_files/linux/devices/device/drm/card12.0 compute-runtime-22.14.22890/opencl/test/unit_test/test_files/linux/proc/000077500000000000000000000000001422164147700260725ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/test_files/linux/proc/self/000077500000000000000000000000001422164147700270235ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/test_files/linux/proc/self/maps000066400000000000000000000031641422164147700277120ustar00rootroot00000000000000564fcd1fa000-564fcd202000 r-xp 00000000 08:03 3670041 /bin/cat 564fcd401000-564fcd402000 r--p 00007000 08:03 3670041 /bin/cat 564fcd402000-564fcd403000 rw-p 00008000 08:03 3670041 /bin/cat 564fcdf40000-564fcdf61000 rw-p 00000000 00:00 0 [heap] 7fded3d79000-7fded4879000 r--p 00000000 08:03 3938831 /usr/lib/locale/locale-archive 7fded4879000-7fded4a60000 r-xp 00000000 08:03 4199137 /lib/x86_64-linux-gnu/libc-2.27.so 7fded4a60000-7fded4c60000 ---p 001e7000 08:03 4199137 /lib/x86_64-linux-gnu/libc-2.27.so 7fded4c60000-7fded4c64000 r--p 001e7000 08:03 4199137 /lib/x86_64-linux-gnu/libc-2.27.so 7fded4c64000-7fded4c66000 rw-p 001eb000 08:03 4199137 /lib/x86_64-linux-gnu/libc-2.27.so 7fded4c66000-7fded4c6a000 rw-p 00000000 00:00 0 7fded4c6a000-7fded4c91000 r-xp 00000000 08:03 4199109 /lib/x86_64-linux-gnu/ld-2.27.so 7fded4e54000-7fded4e78000 rw-p 00000000 00:00 0 7fded4e91000-7fded4e92000 r--p 00027000 08:03 4199109 /lib/x86_64-linux-gnu/ld-2.27.so 7fded4e92000-7fded4e93000 rw-p 00028000 08:03 4199109 /lib/x86_64-linux-gnu/ld-2.27.so 7fded4e93000-7fded4e94000 rw-p 00000000 00:00 0 7ffd6dfa2000-7ffd6dfc3000 rw-p 00000000 00:00 0 [stack] 7ffd6dfe8000-7ffd6dfeb000 r--p 00000000 00:00 0 [vvar] 7ffd6dfeb000-7ffd6dfec000 r-xp 00000000 00:00 0 [vdso] ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall] compute-runtime-22.14.22890/opencl/test/unit_test/test_files/media_kernels_backend.cl000066400000000000000000001164051422164147700305700ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ // VME KERNELS __kernel __attribute__((reqd_work_group_size(16, 1, 1))) void block_motion_estimate_intel(sampler_t accelerator, __read_only image2d_t srcImg, __read_only image2d_t refImg, __global short2 *prediction_motion_vector_buffer, __global short2 *motion_vector_buffer, __global ushort *residuals, int height, int width, int stride) { __local uint dst[64]; __local ushort *dist = (__local ushort *)&dst[8 * 5]; int sid_0 = stride * get_group_id(0); int gid_0 = sid_0 / height; int gid_1 = sid_0 % height; for (int sid = sid_0; sid < sid_0 + stride && gid_0 < width && gid_1 < height; sid++, gid_0 = sid / height, gid_1 = sid % height) { int2 srcCoord = 0; int2 refCoord = 0; srcCoord.x = gid_0 * 16 + get_global_offset(0); srcCoord.y = gid_1 * 16 + get_global_offset(1); short2 predMV = 0; #ifndef HW_NULL_CHECK if (prediction_motion_vector_buffer != NULL) #endif { predMV = prediction_motion_vector_buffer[gid_0 + gid_1 * width]; refCoord.x = predMV.x / 4; refCoord.y = predMV.y / 4; refCoord.y = refCoord.y & 0xFFFE; } { intel_work_group_vme_mb_query(dst, srcCoord, refCoord, srcImg, refImg, accelerator); } barrier(CLK_LOCAL_MEM_FENCE); // Write Out Result // 4x4 if (intel_get_accelerator_mb_block_type(accelerator) == 0x2) { int x = get_local_id(0) % 4; int y = get_local_id(0) / 4; int index = (gid_0 * 4 + x) + (gid_1 * 4 + y) * width * 4; short2 val = as_short2(dst[8 + (y * 4 + x) * 2]); motion_vector_buffer[index] = val; #ifndef HW_NULL_CHECK if (residuals != NULL) #endif { residuals[index] = dist[y * 4 + x]; } } // 8x8 if (intel_get_accelerator_mb_block_type(accelerator) == 0x1) { if (get_local_id(0) < 4) { int x = get_local_id(0) % 2; int y = get_local_id(0) / 2; int index = (gid_0 * 2 + x) + (gid_1 * 2 + y) * width * 2; short2 val = as_short2(dst[8 + (y * 2 + x) * 8]); motion_vector_buffer[index] = val; #ifndef HW_NULL_CHECK if (residuals != NULL) #endif { residuals[index] = dist[(y * 2 + x) * 4]; } } } // 16x16 if (intel_get_accelerator_mb_block_type(accelerator) == 0x0) { if (get_local_id(0) == 0) { int index = gid_0 + gid_1 * width; short2 val = as_short2(dst[8]); motion_vector_buffer[index] = val; #ifndef HW_NULL_CHECK if (residuals != NULL) #endif { residuals[index] = dist[0]; } } } } } __kernel __attribute__((reqd_work_group_size(16, 1, 1))) void block_advanced_motion_estimate_check_intel( sampler_t accelerator, __read_only image2d_t srcImg, __read_only image2d_t refImg, uint flags, uint skip_block_type, uint search_cost_penalty, uint search_cost_precision, __global short2 *count_motion_vector_buffer, __global short2 *predictors_buffer, __global short2 *skip_motion_vector_buffer, __global short2 *motion_vector_buffer, __global char *intra_search_predictor_modes, __global ushort *residuals, __global ushort *skip_residuals, __global ushort *intra_residuals, __read_only image2d_t intraSrcImg, int height, int width, int stride) { __local uint dstSearch[64]; // 8 GRFs __local uint dstSkipIntra[64 + 24]; // 11 GRFs (8 for inter, 3 for intra) __local ushort *distSearch = (__local ushort *)&dstSearch[8 * 5]; // distortion in the 6th GRF // Initialize the MV cost table: // MV Cost in U4U4 format: // No cost : 0, 0, 0, 0, 0, 0, 0, 0 // Low Cost : 1, 4, 5, 9, 10, 12, 14, 15 // Normal Cost: 5, 26, 29, 43, 45, 47, 57, 57 // High Cost : 29, 61, 72, 78, 88, 89, 91, 92 uint2 MVCostTable; if (search_cost_penalty == 1) { MVCostTable.s0 = 0x09050401; MVCostTable.s1 = 0x0F0E0C0A; } else if (search_cost_penalty == 2) { MVCostTable.s0 = 0x2B1D1A05; MVCostTable.s1 = 0x39392F2D; } else if (search_cost_penalty == 3) { MVCostTable.s0 = 0x4E483D1D; MVCostTable.s1 = 0x5C5B5958; } else { MVCostTable.s0 = 0; MVCostTable.s1 = 0; } uint MVCostPrecision = ((uint)search_cost_precision) << 16; // Frame is divided into rows * columns of MBs. // One h/w thread per WG. // One WG processes 'row' MBs - one row per iteration and one MB per row. // Number of WGs (or h/w threads) is number of columns MBs // Each iteration processes the MB in a row - gid_0 is the MB id in a row and // gid_1 is the row offset. int sid_0 = stride * get_group_id(0); int gid_0 = sid_0 / height; int gid_1 = sid_0 % height; for (int sid = sid_0; sid < sid_0 + stride && gid_0 < width && gid_1 < height; sid++, gid_0 = sid / height, gid_1 = sid % height) { int2 srcCoord; srcCoord.x = gid_0 * 16 + get_global_offset(0); // 16 pixels wide MBs (globally scalar) srcCoord.y = gid_1 * 16 + get_global_offset(1); // 16 pixels tall MBs (globally scalar) uint curMB = gid_0 + gid_1 * width; // current MB id short2 count = count_motion_vector_buffer[curMB]; int countPredMVs = count.x; if (countPredMVs != 0) { uint offset = curMB * 8; // 8 predictors per MB offset += get_local_id(0) % 8; // 16 work-items access 8 MVs for MB // one predictor for MB per SIMD channel // Reduce predictors from Q-pixel to integer precision. int2 predMV = 0; if (get_local_id(0) < countPredMVs) { predMV = convert_int2(predictors_buffer[offset]); // one MV per work-item predMV.x /= 4; predMV.y /= 4; predMV.y &= 0xFFFE; } // Do up to 8 IMEs, get the best MVs and their distortions, and optionally // a FBR of the best MVs. // Finally the results are written out to SLM. intel_work_group_vme_mb_multi_query_8( dstSearch, // best search MV and its distortions into SLM countPredMVs, // count of predictor MVs (globally scalar - value range // 1 to 8) MVCostPrecision, // MV cost precision MVCostTable, // MV cost table srcCoord, // MB 2-D offset (globally scalar) predMV, // predictor MVs (up to 8 distinct MVs for SIMD16 thread) srcImg, // source refImg, // reference accelerator); // vme object } int doIntra = (flags & 0x2) != 0; int intraEdges = 0; if (doIntra) { // Enable all edges by default. intraEdges = 0x3C; // If this is a left-edge MB, then disable left edges. if ((gid_0 == 0) & (get_global_offset(0) == 0)) { intraEdges &= 0x14; } // If this is a right edge MB then disable right edges. if (gid_0 == width - 1) { intraEdges &= 0x38; } // If this is a top-edge MB, then disable top edges. if ((gid_1 == 0) & (get_global_offset(1) == 0)) { intraEdges &= 0x20; } // Set bit6=bit5. intraEdges |= ((intraEdges & 0x20) << 1); intraEdges <<= 8; } int countSkipMVs = count.y; if (countSkipMVs != 0 || doIntra == true) { uint offset = curMB * 8; // 8 sets of skip check MVs per MB offset += (get_local_id(0) % 8); // 16 work-items access 8 sets of MVs for MB // one set of skip MV per SIMD channel // Do up to 8 skip checks and get the distortions for each of them. // Finally the results are written out to SLM. if ((skip_block_type == 0x0) | ((doIntra) & (countSkipMVs == 0))) { int skipMVs = 0; if (get_local_id(0) < countSkipMVs) { __global int *skip1_motion_vector_buffer = (__global int *)skip_motion_vector_buffer; skipMVs = skip1_motion_vector_buffer[offset]; // one packed MV for one // work-item } intel_work_group_vme_mb_multi_check_16x16( dstSkipIntra, // distortions into SLM countSkipMVs, // count of skip check MVs (value range 0 to 8) doIntra, // compute intra modes intraEdges, // intra edges to use srcCoord, // MB 2-D offset (globally scalar) skipMVs, // skip check MVs (up to 8 sets of skip check MVs for // SIMD16 thread) srcImg, // source refImg, // reference intraSrcImg, // intra source accelerator); } if ((skip_block_type == 0x1) & (countSkipMVs > 0)) { int4 skipMVs = 0; if (get_local_id(0) < countSkipMVs) { __global int4 *skip4_motion_vector_buffer = (__global int4 *)(skip_motion_vector_buffer); skipMVs = skip4_motion_vector_buffer[offset]; // four component MVs // per work-item } intel_work_group_vme_mb_multi_check_8x8( dstSkipIntra, // distortions into SLM countSkipMVs, // count of skip check MVs per MB (value range 0 to 8) doIntra, // compute intra modes intraEdges, // intra edges to use srcCoord, // MB 2-D offset (globally scalar) skipMVs, // skip check MVs (up to 8 ets of skip check MVs for SIMD16 // thread) srcImg, // source refImg, // reference intraSrcImg, // intra source accelerator); } } barrier(CLK_LOCAL_MEM_FENCE); // Write Out motion estimation result: // Result format // Hierarchical row-major layout // i.e. row-major of blocks MVs in MBs, and row-major of 8 sets of // MVs/distortion in blocks if (countPredMVs != 0) { // 4x4 if (intel_get_accelerator_mb_block_type(accelerator) == 0x2) { int index = (gid_0 * 16 + get_local_id(0)) + (gid_1 * 16 * width); // 1. 16 work-items enabled. // 2. Work-items gather fwd MVs in strided dword locations 0, 2, .., 30 // (interleaved // fwd/bdw MVs) with constant offset 8 (control data size) from SLM // into contiguous // short2 locations 0, 1, .., 15 of global buffer // search_motion_vector_buffer with // offset index. // 3. Work-items gather contiguous ushort locations 0, 1, .., 15 from // distSearch into // contiguous ushort locations 0, 1, .., 15 of search_residuals with // offset index. short2 val = as_short2(dstSearch[8 + get_local_id(0) * 2]); motion_vector_buffer[index] = val; #ifndef HW_NULL_CHECK if (residuals != NULL) #endif { residuals[index] = distSearch[get_local_id(0)]; } } // 8x8 else if (intel_get_accelerator_mb_block_type(accelerator) == 0x1) { // Only 1st 4 work-item are needed. if (get_local_id(0) < 4) { int index = (gid_0 * 4 + get_local_id(0)) + (gid_1 * 4 * width); // 1. 4 work-items enabled. // 2. Work-items gather fw MVs in strided dword locations 0, 8, 16, 24 // (interleaved // fwd/bdw MVs) with constant offset 8 from SLM into contiguous // short2 locations // 0, 1, .., 15 of global buffer search_motion_vector_buffer with // offset index. // 3. Work-items gather strided ushort locations 0, 4, 8, 12 from // distSearch into // contiguous ushort locations 0, 1, .., 15 of search_residuals // with offset index. short2 val = as_short2(dstSearch[8 + get_local_id(0) * 4 * 2]); motion_vector_buffer[index] = val; #ifndef HW_NULL_CHECK if (residuals != NULL) #endif { residuals[index] = distSearch[get_local_id(0) * 4]; } } } // 16x16 else if (intel_get_accelerator_mb_block_type(accelerator) == 0x0) { // One 1st work is needed. if (get_local_id(0) == 0) { int index = gid_0 + gid_1 * width; // 1. 1 work-item enabled. // 2. Work-item gathers fwd MV in dword location 0 with constant // offset 8 from // SLM into short2 locations 0 of global buffer // search_motion_vector_buffer. // 3. Work-item gathers ushort location 0 from distSearch into ushort // location 0 of search_residuals with offset index. short2 val = as_short2(dstSearch[8]); motion_vector_buffer[index] = val; #ifndef HW_NULL_CHECK if (residuals != NULL) #endif { residuals[index] = distSearch[0]; } } } } // Write out motion skip check result: // Result format // Hierarchical row-major layout // i.e. row-major of blocks in MBs, and row-major of 8 sets of // distortions in blocks if (countSkipMVs != 0) { if (skip_block_type == 0x0) { // Copy out 8 (1 component) sets of distortion values. int index = (gid_0 * 8) + (get_local_id(0)) + (gid_1 * 8 * width); if (get_local_id(0) < countSkipMVs) { __local ushort *distSkip = (__local ushort *)&dstSkipIntra[0]; // 1. Up to 8 work-items are enabled. // 2. The work-item gathers distSkip locations 0, 16*1, .., 16*7 and // copies them to contiguous skip_residual locations 0, 1, 2, .., // 7. skip_residuals[index] = distSkip[get_local_id(0) * 16]; } } else { // Copy out 8 (4 component) sets of distortion values. int index = (gid_0 * 8 * 4) + (get_local_id(0)) + (gid_1 * 8 * 4 * width); __local ushort *distSkip = (__local ushort *)&dstSkipIntra[0]; if (get_local_id(0) < countSkipMVs * 4) { // 1. Up to 16 work-items are enabled. // 2. The work-item gathers distSkip locations 0, 4*1, .., 4*31 and // copies them to contiguous skip_residual locations 0, 1, 2, .., // 31. skip_residuals[index] = distSkip[get_local_id(0) * 4]; skip_residuals[index + 16] = distSkip[(get_local_id(0) + 16) * 4]; } } } // Write out intra search result: if (doIntra) { int index_low = (gid_0 * 22) + (get_local_id(0) * 2) + (gid_1 * 22 * width); int index_high = (gid_0 * 22) + (get_local_id(0) * 2) + 1 + (gid_1 * 22 * width); // Write out the 4x4 intra modes if (get_local_id(0) < 8) { __local char *dstIntra_4x4 = (__local char *)(&dstSkipIntra[64 + 16 + 4]); char value = dstIntra_4x4[get_local_id(0)]; char value_low = (value)&0xf; char value_high = (value >> 4) & 0xf; intra_search_predictor_modes[index_low + 5] = value_low; intra_search_predictor_modes[index_high + 5] = value_high; } // Write out the 8x8 intra modes if (get_local_id(0) < 4) { __local char *dstIntra_8x8 = (__local char *)(&dstSkipIntra[64 + 8 + 4]); char value = dstIntra_8x8[get_local_id(0) * 2]; char value_low = (value)&0xf; int index = (gid_0 * 22) + (get_local_id(0)) + (gid_1 * 22 * width); intra_search_predictor_modes[index + 1] = value_low; } // Write out the 16x16 intra modes if (get_local_id(0) < 1) { __local char *dstIntra_16x16 = (__local char *)(&dstSkipIntra[64 + 0 + 4]); char value = dstIntra_16x16[get_local_id(0)]; char value_low = (value)&0xf; intra_search_predictor_modes[index_low] = value_low; } // Get the intra residuals. #ifndef HW_NULL_CHECK if (intra_residuals != NULL) #endif { int index = (gid_0 * 4) + (gid_1 * 4 * width); if (get_local_id(0) < 1) { __local ushort *distIntra_4x4 = (__local ushort *)(&dstSkipIntra[64 + 16 + 3]); __local ushort *distIntra_8x8 = (__local ushort *)(&dstSkipIntra[64 + 8 + 3]); __local ushort *distIntra_16x16 = (__local ushort *)(&dstSkipIntra[64 + 0 + 3]); intra_residuals[index + 2] = distIntra_4x4[0]; intra_residuals[index + 1] = distIntra_8x8[0]; intra_residuals[index + 0] = distIntra_16x16[0]; } } } } } /************************************************************************************************* Built-in kernel: block_advanced_motion_estimate_bidirectional_check_intel Description: 1. Do motion estimation with 0 to 4 predictor MVs using 0 to 4 (integer motion estimation) IMEs per macro-block, calculating the best search MVs per specified (16x16, 8x8, 4x4) luma block with lowest distortion from amongst the 0 to 4 IME results, and optionally do (fractional bi-directional refinement) FBR on the best IME search results to refine the best search results. The best search (FBR if done, or IME) MVs and their distortions are returned. 2. Do undirectional or bidirectional skip (zero search) checks with 0 to 4 sets of skip check MVs for (16x16, 8x8) luma blocks using 0 to 4 (skip and intra check) SICs and return the distortions associated with the input sets of skip check MVs per specified luma block. 4x4 blocks are not supported by h/w for skip checks. 3. Do intra-prediction for (16x16, 8x8, 4x4) luma blocks and (8x8) chroma blocks using 3 SICs and returning the predictor modes and their associated distortions. Intra-prediction is done for all block sizes. Support for 8x8 chroma blocks cannot be enabled until NV image formats are supported in OCL. **************************************************************************************************/ __kernel __attribute__((reqd_work_group_size(16, 1, 1))) void block_advanced_motion_estimate_bidirectional_check_intel( sampler_t accelerator, __read_only image2d_t srcImg, __read_only image2d_t refImg, __read_only image2d_t src_check_image, __read_only image2d_t ref0_check_image, __read_only image2d_t ref1_check_image, uint flags, uint search_cost_penalty, uint search_cost_precision, short2 count_global, uchar bidir_weight, __global short2 *count_motion_vector_buffer, __global short2 *prediction_motion_vector_buffer, __global char *skip_input_mode_buffer, __global short2 *skip_motion_vector_buffer, __global short2 *search_motion_vector_buffer, __global char *intra_search_predictor_modes, __global ushort *search_residuals, __global ushort *skip_residuals, __global ushort *intra_residuals, __read_only image2d_t intraSrcImg, int height, int width, int stride) { __local uint dstSearch[64]; // 8 GRFs __local uint dstSkipIntra[32 + 24]; // 7 GRFs (4 for inter, 3 for intra) // distortion in the 6th GRF __local ushort *distSearch = (__local ushort *)&dstSearch[8 * 5]; // Initialize the MV cost table: // MV Cost in U4U4 format: // No cost : 0, 0, 0, 0, 0, 0, 0, 0 // Low Cost : 1, 4, 5, 9, 10, 12, 14, 15 // Normal Cost: 5, 26, 29, 43, 45, 47, 57, 57 // High Cost : 29, 61, 72, 78, 88, 89, 91, 92 uint2 MVCostTable; if (search_cost_penalty == 1) { MVCostTable.s0 = 0x09050401; MVCostTable.s1 = 0x0F0E0C0A; } else if (search_cost_penalty == 2) { MVCostTable.s0 = 0x2B1D1A05; MVCostTable.s1 = 0x39392F2D; } else if (search_cost_penalty == 3) { MVCostTable.s0 = 0x4E483D1D; MVCostTable.s1 = 0x5C5B5958; } else { MVCostTable.s0 = 0; MVCostTable.s1 = 0; } uint MVCostPrecision = ((uint)search_cost_precision) << 16; // Frame is divided into rows * columns of MBs. // One h/w thread per WG. // One WG processes "row" MBs - one row per iteration and one MB per row. // Number of WGs (or h/w threads) is number of columns MBs.Each iteration // processes the MB in a row - gid_0 is the MB id in a row and gid_1 is the // row offset. int sid_0 = stride * get_group_id(0); int gid_0 = sid_0 / height; int gid_1 = sid_0 % height; for (int sid = sid_0; sid < sid_0 + stride && gid_0 < width && gid_1 < height; sid++, gid_0 = sid / height, gid_1 = sid % height) { int2 srcCoord; srcCoord.x = gid_0 * 16 + get_global_offset(0); // 16 pixels wide MBs (globally scalar) srcCoord.y = gid_1 * 16 + get_global_offset(1); // 16 pixels tall MBs (globally scalar) uint curMB = gid_0 + gid_1 * width; // current MB id short2 count; // If either the search or skip vector counts are per-MB, then we need to // read in // the count motion vector buffer. if ((count_global.s0 == -1) | (count_global.s1 == -1)) { count = count_motion_vector_buffer[curMB]; } // If either the search or skip vector counts are per-frame, we need to use // those. if (count_global.s0 >= 0) { count.s0 = count_global.s0; } if (count_global.s1 >= 0) { count.s1 = count_global.s1; } int countPredMVs = count.x; if (countPredMVs != 0) { uint offset = curMB * 4; // 4 predictors per MB offset += get_local_id(0) % 4; // 16 work-items access 4 MVs for MB // one predictor for MB per SIMD channel // Reduce predictors from Q-pixel to integer precision. int2 predMV = 0; if (get_local_id(0) < countPredMVs) { // one MV per work-item predMV = convert_int2(prediction_motion_vector_buffer[offset]); // Predictors are input in QP resolution. Convert that to integer // resolution. predMV.x /= 4; predMV.y /= 4; predMV.y &= 0xFFFFFFFE; } // Do up to 4 IMEs, get the best MVs and their distortions, and optionally // a FBR of // the best MVs. Finally the results are written out to SLM. intel_work_group_vme_mb_multi_query_4( dstSearch, // best search MV and its distortions into SLM countPredMVs, // count of predictor MVs (globally scalar - value range // 1 to 4) MVCostPrecision, // MV cost precision MVCostTable, // MV cost table srcCoord, // MB 2-D offset (globally scalar) predMV, // predictor MVs (up to 4 distinct MVs for SIMD16 thread) srcImg, // source refImg, // reference accelerator); // vme object } int doIntra = ((flags & 0x2) != 0); int intraEdges = 0; if (doIntra) { // Enable all edges by default. intraEdges = 0x3C; // If this is a left-edge MB, then disable left edges. if ((gid_0 == 0) & (get_global_offset(0) == 0)) { intraEdges &= 0x14; } // If this is a right edge MB then disable right edges. if (gid_0 == width - 1) { intraEdges &= 0x38; } // If this is a top-edge MB, then disable top edges. if ((gid_1 == 0) & (get_global_offset(1) == 0)) { intraEdges &= 0x20; } // Set bit6=bit5. intraEdges |= ((intraEdges & 0x20) << 1); intraEdges <<= 8; } int skip_block_type_8x8 = flags & 0x4; int countSkipMVs = count.y; if (countSkipMVs != 0 || doIntra == true) { // one set of skip MV per SIMD channel // Do up to 4 skip checks and get the distortions for each of them. // Finally the results are written out to SLM. if ((skip_block_type_8x8 == 0) | ((doIntra) & (countSkipMVs == 0))) { // 16x16: uint offset = curMB * 4 * 2; // 4 sets of skip check MVs per MB int skipMV = 0; if (get_local_id(0) < countSkipMVs * 2) // need 2 values per MV { offset += (get_local_id(0)); // 16 work-items access 4 sets of MVs for MB __global int *skip1_motion_vector_buffer = (__global int *)skip_motion_vector_buffer; skipMV = skip1_motion_vector_buffer[offset]; // one MV per work-item } uchar skipMode = 0; if (get_local_id(0) < countSkipMVs) { skipMode = skip_input_mode_buffer[curMB]; if (skipMode == 0) { skipMode = 1; } if (skipMode > 3) { skipMode = 3; } } intel_work_group_vme_mb_multi_bidir_check_16x16( dstSkipIntra, // distortions into SLM countSkipMVs, // count of skip check MVs (globally scalar - value // range 1 to 4) doIntra, // compute intra modes intraEdges, // intra edges to use srcCoord, // MB 2-D offset (globally scalar) bidir_weight, // bidirectional weight skipMode, // skip modes skipMV, // skip check MVs (up to 4 distinct sets of skip check MVs // for SIMD16 thread) src_check_image, // source ref0_check_image, // reference fwd ref1_check_image, // reference bwd intraSrcImg, // intra source accelerator); // vme object } else { // 8x8: uint offset = curMB * 4 * 8; // 4 sets of skip check MVs, 16 shorts (8 ints) each per MB int2 skipMVs = 0; if (get_local_id(0) < countSkipMVs * 8) // need 8 values per MV { offset += (get_local_id(0)); // 16 work-items access 4 sets of MVs for MB __global int *skip1_motion_vector_buffer = (__global int *)(skip_motion_vector_buffer); skipMVs.x = skip1_motion_vector_buffer[offset]; // four component MVs // per work-item skipMVs.y = skip1_motion_vector_buffer[offset + 16]; } uchar skipModes = 0; if (get_local_id(0) < countSkipMVs) { skipModes = skip_input_mode_buffer[curMB]; } intel_work_group_vme_mb_multi_bidir_check_8x8( dstSkipIntra, // distortions into SLM countSkipMVs, // count of skip check MVs per MB (globally scalar - // value range 1 to 4) doIntra, // compute intra modes intraEdges, // intra edges to use srcCoord, // MB 2-D offset (globally scalar) bidir_weight, // bidirectional weight skipModes, // skip modes skipMVs, // skip check MVs (up to 4 distinct sets of skip check MVs // for SIMD16 thread) src_check_image, // source ref0_check_image, // reference fwd ref1_check_image, // reference bwd intraSrcImg, // intra source accelerator); // vme object } } barrier(CLK_LOCAL_MEM_FENCE); // Write Out motion estimation result: // Result format // Hierarchical row-major layout // i.e. row-major of blocks MVs in MBs, and row-major of 4 sets of // MVs/distortion in blocks if (countPredMVs != 0) { // 4x4 if (intel_get_accelerator_mb_block_type(accelerator) == 0x2) { int index = (gid_0 * 16 + get_local_id(0)) + (gid_1 * 16 * width); // 1. 16 work-items enabled. // 2. Work-items gather fwd MVs in strided dword locations 0, 2, .., 30 // (interleaved // fwd/bdw MVs) with constant offset 8 (control data size) from SLM // into contiguous // short2 locations 0, 1, .., 15 of global buffer // search_motion_vector_buffer with // offset index. // 3. Work-items gather contiguous ushort locations 0, 1, .., 15 from // distSearch into // contiguous ushort locations 0, 1, .., 15 of search_residuals with // offset index. short2 val = as_short2(dstSearch[8 + get_local_id(0) * 2]); search_motion_vector_buffer[index] = val; #ifndef HW_NULL_CHECK if (search_residuals != NULL) #endif { search_residuals[index] = distSearch[get_local_id(0)]; } } // 8x8 else if (intel_get_accelerator_mb_block_type(accelerator) == 0x1) { // Only 1st 4 work-item are needed. if (get_local_id(0) < 4) { int index = (gid_0 * 4 + get_local_id(0)) + (gid_1 * 4 * width); // 1. 4 work-items enabled. // 2. Work-items gather fw MVs in strided dword locations 0, 8, 16, 24 // (interleaved // fwd/bdw MVs) with constant offset 8 from SLM into contiguous // short2 locations // 0, 1, .., 15 of global buffer search_motion_vector_buffer with // offset index. // 3. Work-items gather strided ushort locations 0, 4, 8, 12 from // distSearch into // contiguous ushort locations 0, 1, .., 15 of search_residuals // with offset index. short2 val = as_short2(dstSearch[8 + get_local_id(0) * 4 * 2]); search_motion_vector_buffer[index] = val; #ifndef HW_NULL_CHECK if (search_residuals != NULL) #endif { search_residuals[index] = distSearch[get_local_id(0) * 4]; } } } // 16x16 else if (intel_get_accelerator_mb_block_type(accelerator) == 0x0) { // One 1st work is needed. if (get_local_id(0) == 0) { int index = gid_0 + gid_1 * width; // 1. 1 work-item enabled. // 2. Work-item gathers fwd MV in dword location 0 with constant // offset 8 from // SLM into short2 locations 0 of global buffer // search_motion_vector_buffer. // 3. Work-item gathers ushort location 0 from distSearch into ushort // location 0 of search_residuals with offset index. short2 val = as_short2(dstSearch[8]); search_motion_vector_buffer[index] = val; #ifndef HW_NULL_CHECK if (search_residuals != NULL) #endif { search_residuals[index] = distSearch[0]; } } } } // Write out motion skip check result: // Result format // Hierarchical row-major layout // i.e. row-major of blocks in MBs, and row-major of 8 sets of // distortions in blocks if (countSkipMVs != 0) { if (skip_block_type_8x8 == false) { // Copy out 4 (1 component) sets of distortion values. int index = (gid_0 * 4) + (get_local_id(0)) + (gid_1 * 4 * width); if (get_local_id(0) < countSkipMVs) { // 1. Up to 4 work-items are enabled. // 2. The work-item gathers distSkip locations 0, 16*1, .., 16*7 and // copies them to contiguous skip_residual locations 0, 1, 2, .., // 7. __local ushort *distSkip = (__local ushort *)&dstSkipIntra[0]; skip_residuals[index] = distSkip[get_local_id(0) * 16]; } } else { // Copy out 4 (4 component) sets of distortion values. int index = (gid_0 * 4 * 4) + (get_local_id(0)) + (gid_1 * 4 * 4 * width); if (get_local_id(0) < countSkipMVs * 4) { // 1. Up to 16 work-items are enabled. // 2. The work-item gathers distSkip locations 0, 4*1, .., 4*15 and // copies them to contiguous skip_residual locations 0, 1, 2, .., // 15. __local ushort *distSkip = (__local ushort *)&dstSkipIntra[0]; skip_residuals[index] = distSkip[get_local_id(0) * 4]; } } } // Write out intra search result: if (doIntra) { // Write out the 4x4 intra modes if (get_local_id(0) < 8) { __local char *dstIntra_4x4 = (__local char *)(&dstSkipIntra[32 + 16 + 4]); char value = dstIntra_4x4[get_local_id(0)]; char value_low = (value)&0xf; char value_high = (value >> 4) & 0xf; int index_low = (gid_0 * 22) + (get_local_id(0) * 2) + (gid_1 * 22 * width); int index_high = (gid_0 * 22) + (get_local_id(0) * 2) + 1 + (gid_1 * 22 * width); intra_search_predictor_modes[index_low + 5] = value_low; intra_search_predictor_modes[index_high + 5] = value_high; } // Write out the 8x8 intra modes if (get_local_id(0) < 4) { __local char *dstIntra_8x8 = (__local char *)(&dstSkipIntra[32 + 8 + 4]); char value = dstIntra_8x8[get_local_id(0) * 2]; char value_low = (value)&0xf; int index = (gid_0 * 22) + (get_local_id(0)) + (gid_1 * 22 * width); intra_search_predictor_modes[index + 1] = value_low; } // Write out the 16x16 intra modes if (get_local_id(0) < 1) { __local char *dstIntra_16x16 = (__local char *)(&dstSkipIntra[32 + 0 + 4]); char value = dstIntra_16x16[0]; char value_low = (value)&0xf; int index = (gid_0 * 22) + (gid_1 * 22 * width); intra_search_predictor_modes[index] = value_low; } // Get the intra residuals. #ifndef HW_NULL_CHECK if (intra_residuals != NULL) #endif { int index = (gid_0 * 4) + (gid_1 * 4 * width); if (get_local_id(0) < 1) { __local ushort *distIntra_4x4 = (__local ushort *)(&dstSkipIntra[32 + 16 + 3]); __local ushort *distIntra_8x8 = (__local ushort *)(&dstSkipIntra[32 + 8 + 3]); __local ushort *distIntra_16x16 = (__local ushort *)(&dstSkipIntra[32 + 0 + 3]); intra_residuals[index + 2] = distIntra_4x4[0]; intra_residuals[index + 1] = distIntra_8x8[0]; intra_residuals[index + 0] = distIntra_16x16[0]; } } } } } // VEBOX KERNELS: __kernel void ve_enhance_intel(sampler_t accelerator, int flags, __read_only image2d_t current_input, __write_only image2d_t current_output) { } __kernel void ve_dn_enhance_intel(sampler_t accelerator, int flags, __read_only image2d_t ref_input, __read_only image2d_t current_input, __write_only image2d_t current_output) { } __kernel void ve_dn_di_enhance_intel(sampler_t accelerator, int flags, __read_only image2d_t current_input, __read_only image2d_t ref_input, __write_only image2d_t current_output, __write_only image2d_t ref_output, __write_only image2d_t dndi_output) { } compute-runtime-22.14.22890/opencl/test/unit_test/test_files/media_kernels_backend_options.txt000066400000000000000000000004571422164147700325630ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ -D cl_intel_device_side_advanced_vme_enable -D cl_intel_device_side_avc_vme_enable -D cl_intel_device_side_vme_enable -D cl_intel_media_block_io -cl-unsafe-math-optimizations -cl-mad-enable -cl-fast-relaxed-math compute-runtime-22.14.22890/opencl/test/unit_test/test_files/media_kernels_frontend.cl000066400000000000000000000057441422164147700310230ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ // VME KERNELS __kernel __attribute__((reqd_work_group_size(16, 1, 1))) void block_motion_estimate_intel(sampler_t accelerator, __read_only image2d_t srcImg, __read_only image2d_t refImg, __global short2 *prediction_motion_vector_buffer, __global short2 *motion_vector_buffer, __global ushort *residuals) { } __kernel __attribute__((reqd_work_group_size(16, 1, 1))) void block_advanced_motion_estimate_check_intel( sampler_t accelerator, __read_only image2d_t srcImg, __read_only image2d_t refImg, uint flags, uint skip_block_type, uint search_cost_penalty, uint search_cost_precision, __global short2 *count_motion_vector_buffer, __global short2 *predictors_buffer, __global short2 *skip_motion_vector_buffer, __global short2 *motion_vector_buffer, __global char *intra_search_predictor_modes, __global ushort *residuals, __global ushort *skip_residuals, __global ushort *intra_residuals) { } __kernel __attribute__((reqd_work_group_size(16, 1, 1))) void block_advanced_motion_estimate_bidirectional_check_intel( sampler_t accelerator, __read_only image2d_t srcImg, __read_only image2d_t refImg, __read_only image2d_t src_check_image, __read_only image2d_t ref0_check_image, __read_only image2d_t ref1_check_image, uint flags, uint search_cost_penalty, uint search_cost_precision, short2 count_global, uchar bidir_weight, __global short2 *count_motion_vector_buffer, __global short2 *prediction_motion_vector_buffer, __global char *skip_input_mode_buffer, __global short2 *skip_motion_vector_buffer, __global short2 *search_motion_vector_buffer, __global char *intra_search_predictor_modes, __global ushort *search_residuals, __global ushort *skip_residuals, __global ushort *intra_residuals) { } // VEBOX KERNELS: __kernel void ve_enhance_intel(sampler_t accelerator, int flags, __read_only image2d_t current_input, __write_only image2d_t current_output) { } __kernel void ve_dn_enhance_intel(sampler_t accelerator, int flags, __read_only image2d_t ref_input, __read_only image2d_t current_input, __write_only image2d_t current_output) { } __kernel void ve_dn_di_enhance_intel(sampler_t accelerator, int flags, __read_only image2d_t current_input, __read_only image2d_t ref_input, __write_only image2d_t current_output, __write_only image2d_t ref_output, __write_only image2d_t dndi_output) { } compute-runtime-22.14.22890/opencl/test/unit_test/test_files/media_kernels_frontend_options.txt000066400000000000000000000004571422164147700330130ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ -D cl_intel_device_side_advanced_vme_enable -D cl_intel_device_side_avc_vme_enable -D cl_intel_device_side_vme_enable -D cl_intel_media_block_io -cl-unsafe-math-optimizations -cl-mad-enable -cl-fast-relaxed-math compute-runtime-22.14.22890/opencl/test/unit_test/test_files/patch_list.h000066400000000000000000000173061422164147700263020ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ // clang-format off #pragma once #pragma pack( push, 1 ) #include const uint32_t MAGIC_CL = 0x494E5443; struct SProgramBinaryHeader { uint32_t Magic; uint32_t Version; uint32_t Device; uint32_t GPUPointerSizeInBytes; uint32_t NumberOfKernels; uint32_t SteppingId; uint32_t PatchListSize; }; static_assert( sizeof( SProgramBinaryHeader ) == 28 , "The size of SProgramBinaryHeader is not what is expected" ); struct SKernelBinaryHeader { uint32_t CheckSum; uint64_t ShaderHashCode; uint32_t KernelNameSize; uint32_t PatchListSize; }; static_assert( sizeof( SKernelBinaryHeader ) == 20 , "The size of SKernelBinaryHeader is not what is expected" ); struct SKernelBinaryHeaderCommon : SKernelBinaryHeader { uint32_t KernelHeapSize; uint32_t GeneralStateHeapSize; uint32_t DynamicStateHeapSize; uint32_t SurfaceStateHeapSize; uint32_t KernelUnpaddedSize; }; static_assert( sizeof( SKernelBinaryHeaderCommon ) == ( 20 + sizeof( SKernelBinaryHeader ) ) , "The size of SKernelBinaryHeaderCommon is not what is expected" ); enum PATCH_TOKEN { PATCH_TOKEN_UNKNOWN, // 0 - (Unused) PATCH_TOKEN_MEDIA_STATE_POINTERS, // 1 - (Unused) PATCH_TOKEN_STATE_SIP, // 2 @SPatchStateSIP@ PATCH_TOKEN_CS_URB_STATE, // 3 - (Unused) PATCH_TOKEN_CONSTANT_BUFFER, // 4 - (Unused) PATCH_TOKEN_SAMPLER_STATE_ARRAY, // 5 @SPatchSamplerStateArray@ PATCH_TOKEN_INTERFACE_DESCRIPTOR, // 6 - (Unused) PATCH_TOKEN_VFE_STATE, // 7 - (Unused) PATCH_TOKEN_BINDING_TABLE_STATE, // 8 @SPatchBindingTableState@ PATCH_TOKEN_ALLOCATE_SCRATCH_SURFACE, // 9 - (Unused) PATCH_TOKEN_ALLOCATE_SIP_SURFACE, // 10 @SPatchAllocateSystemThreadSurface@ PATCH_TOKEN_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT, // 11 @SPatchGlobalMemoryObjectKernelArgument@ - OpenCL PATCH_TOKEN_IMAGE_MEMORY_OBJECT_KERNEL_ARGUMENT, // 12 @SPatchImageMemoryObjectKernelArgument@ - OpenCL PATCH_TOKEN_CONSTANT_MEMORY_OBJECT_KERNEL_ARGUMENT, // 13 - (Unused) - OpenCL PATCH_TOKEN_ALLOCATE_SURFACE_WITH_INITIALIZATION, // 14 - (Unused) PATCH_TOKEN_ALLOCATE_LOCAL_SURFACE, // 15 @SPatchAllocateLocalSurface@ PATCH_TOKEN_SAMPLER_KERNEL_ARGUMENT, // 16 @SPatchSamplerKernelArgument@ - OpenCL PATCH_TOKEN_DATA_PARAMETER_BUFFER, // 17 @SPatchDataParameterBuffer@ - OpenCL PATCH_TOKEN_MEDIA_VFE_STATE, // 18 @SPatchMediaVFEState@ PATCH_TOKEN_MEDIA_INTERFACE_DESCRIPTOR_LOAD, // 19 @SPatchMediaInterfaceDescriptorLoad@ PATCH_TOKEN_MEDIA_CURBE_LOAD, // 20 - (Unused) PATCH_TOKEN_INTERFACE_DESCRIPTOR_DATA, // 21 @SPatchInterfaceDescriptorData@ PATCH_TOKEN_THREAD_PAYLOAD, // 22 @SPatchThreadPayload@ PATCH_TOKEN_EXECUTION_ENVIRONMENT, // 23 @SPatchExecutionEnvironment@ PATCH_TOKEN_ALLOCATE_PRIVATE_MEMORY, // 24 - (Unused) PATCH_TOKEN_DATA_PARAMETER_STREAM, // 25 @SPatchDataParameterStream PATCH_TOKEN_KERNEL_ARGUMENT_INFO, // 26 @SPatchKernelArgumentInfo@ - OpenCL PATCH_TOKEN_KERNEL_ATTRIBUTES_INFO, // 27 @SPatchKernelAttributesInfo@ - OpenCL PATCH_TOKEN_STRING, // 28 @SPatchString@ - OpenCL PATCH_TOKEN_ALLOCATE_PRINTF_SURFACE, // 29 - (Unused) - OpenCL PATCH_TOKEN_STATELESS_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT, // 30 @SPatchStatelessGlobalMemoryObjectKernelArgument@ - OpenCL PATCH_TOKEN_STATELESS_CONSTANT_MEMORY_OBJECT_KERNEL_ARGUMENT,//31 @SPatchStatelessConstantMemoryObjectKernelArgument@ - OpenCL PATCH_TOKEN_ALLOCATE_STATELESS_SURFACE_WITH_INITIALIZATION, // 32 - (Unused) PATCH_TOKEN_ALLOCATE_STATELESS_PRINTF_SURFACE, // 33 @SPatchAllocateStatelessPrintfSurface@ PATCH_TOKEN_CB_MAPPING, // 34 - (Unused) PATCH_TOKEN_CB2CR_GATHER_TABLE, // 35 - (Unused) PATCH_TOKEN_ALLOCATE_STATELESS_EVENT_POOL_SURFACE, // 36 @SPatchAllocateStatelessEventPoolSurface@ PATCH_TOKEN_NULL_SURFACE_LOCATION, // 37 - (Unused) PATCH_TOKEN_ALLOCATE_STATELESS_PRIVATE_MEMORY, // 38 @SPatchAllocateStatelessPrivateSurface@ PATCH_TOKEN_ALLOCATE_CONSTANT_MEMORY_SURFACE_WITH_INITIALIZATION, // 39 - (Unused) PATCH_TOKEN_ALLOCATE_GLOBAL_MEMORY_SURFACE_WITH_INITIALIZATION, // 40 - (Unused) PATCH_TOKEN_ALLOCATE_GLOBAL_MEMORY_SURFACE_PROGRAM_BINARY_INFO, // 41 @SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo@ PATCH_TOKEN_ALLOCATE_CONSTANT_MEMORY_SURFACE_PROGRAM_BINARY_INFO, // 42 @SPatchAllocateConstantMemorySurfaceProgramBinaryInfo@ PATCH_TOKEN_ALLOCATE_STATELESS_GLOBAL_MEMORY_SURFACE_WITH_INITIALIZATION, // 43 @SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization@ PATCH_TOKEN_ALLOCATE_STATELESS_CONSTANT_MEMORY_SURFACE_WITH_INITIALIZATION, // 44 @SPatchAllocateStatelessConstantMemorySurfaceWithInitialization@ PATCH_TOKEN_ALLOCATE_STATELESS_DEFAULT_DEVICE_QUEUE_SURFACE, // 45 @SPatchAllocateStatelessDefaultDeviceQueueSurface@ PATCH_TOKEN_STATELESS_DEVICE_QUEUE_KERNEL_ARGUMENT, // 46 @SPatchStatelessDeviceQueueKernelArgument@ PATCH_TOKEN_GLOBAL_POINTER_PROGRAM_BINARY_INFO, // 47 @SPatchGlobalPointerProgramBinaryInfo@ PATCH_TOKEN_CONSTANT_POINTER_PROGRAM_BINARY_INFO, // 48 @SPatchConstantPointerProgramBinaryInfo@ PATCH_TOKEN_CONSTRUCTOR_DESTRUCTOR_KERNEL_PROGRAM_BINARY_INFO, // 49 - (Unused) PATCH_TOKEN_INLINE_VME_SAMPLER_INFO, // 50 - (Unused) PATCH_TOKEN_GTPIN_FREE_GRF_INFO, // 51 @SPatchGtpinFreeGRFInfo@ PATCH_TOKEN_GTPIN_INFO, NUM_PATCH_TOKENS }; struct SPatchItemHeader { uint32_t Token; uint32_t Size; }; struct SPatchDataParameterBuffer : SPatchItemHeader { uint32_t Type; uint32_t ArgumentNumber; uint32_t Offset; uint32_t DataSize; uint32_t SourceOffset; uint32_t LocationIndex; uint32_t LocationIndex2; uint32_t IsEmulationArgument; }; struct SPatchMediaInterfaceDescriptorLoad : SPatchItemHeader { uint32_t InterfaceDescriptorDataOffset; }; static_assert( sizeof( SPatchMediaInterfaceDescriptorLoad ) == ( 4 + sizeof( SPatchItemHeader ) ) , "The size of SPatchMediaInterfaceDescriptorLoad is not what is expected" ); struct SPatchStateSIP : SPatchItemHeader { uint32_t SystemKernelOffset; }; struct SPatchSamplerStateArray : SPatchItemHeader { uint32_t Offset; uint32_t Count; uint32_t BorderColorOffset; }; struct SPatchAllocateConstantMemorySurfaceProgramBinaryInfo : SPatchItemHeader { uint32_t ConstantBufferIndex; uint32_t InlineDataSize; }; static_assert( sizeof( SPatchAllocateConstantMemorySurfaceProgramBinaryInfo ) == ( 8 + sizeof( SPatchItemHeader ) ) , "The size of SPatchAllocateConstantMemorySurfaceProgramBinaryInfo is not what is expected" ); #pragma pack( pop ) // clang-format oncompute-runtime-22.14.22890/opencl/test/unit_test/test_files/printf.cl000066400000000000000000000003501422164147700256100ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ __kernel void test() { printf("OpenCL\n"); } __kernel void test_printf_number(__global uint* in) { printf("in[0] = %d\n", in[0]); } compute-runtime-22.14.22890/opencl/test/unit_test/test_files/required_work_group.cl000066400000000000000000000007551422164147700304150ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ __kernel __attribute__((reqd_work_group_size(8, 2, 2))) void CopyBuffer( __global unsigned int *src, __global unsigned int *dst) { int id = (int)get_global_id(0); dst[id] = src[id]; } __kernel __attribute__((reqd_work_group_size(1, 1, 1))) void CopyBuffer2( __global unsigned int *src, __global unsigned int *dst) { int id = (int)get_global_id(0); dst[id] = src[id]; } compute-runtime-22.14.22890/opencl/test/unit_test/test_files/shouldfail.cl000066400000000000000000000003501422164147700264400ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ __kernel void shouldfail(global ushort *dst) { // idx and dummy are not defined, compiler should fail the build. dst[idx] = dummy; } compute-runtime-22.14.22890/opencl/test/unit_test/test_files/shouldfail_internal_options.txt000066400000000000000000000001651422164147700323340ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ -shouldfailInternalOptionscompute-runtime-22.14.22890/opencl/test/unit_test/test_files/shouldfail_options.txt000066400000000000000000000001551422164147700304370ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ -shouldfailOptionscompute-runtime-22.14.22890/opencl/test/unit_test/test_files/simple_arg_int.cl000066400000000000000000000003121422164147700273000ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ __kernel void SimpleArg(int src, __global int *dst) { int id = (int)get_global_id(0); dst[id] = src; } compute-runtime-22.14.22890/opencl/test/unit_test/test_files/simple_header.h000066400000000000000000000002741422164147700267450ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once extern __kernel void AddBuffer( __global float *src, __global float *dst); compute-runtime-22.14.22890/opencl/test/unit_test/test_files/simple_kernels.cl000066400000000000000000000065351422164147700273350ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable __kernel void simple_kernel_0( const uint arg0, const float arg1, __global uint *dst) { uint idx = get_global_id(0); uint data = arg0 + (uint)arg1; dst[idx] = data; } __kernel void simple_kernel_1( __global const uint *src, const uint arg1, __global uint *dst) { uint idx = get_global_id(0); dst[idx] = src[idx] + arg1; } __kernel void simple_kernel_2( const uint arg0, __global uint *dst) { uint idx = get_global_id(0); dst[idx] = arg0; } __kernel void simple_kernel_3( __global uint *dst) { dst[get_global_id(0)] = 0; } __kernel void simple_kernel_4() { } __kernel void simple_kernel_5(__global uint *dst) { //first uint holds the total work item count atomic_inc(dst); uint groupIdX = get_group_id(0); uint groupIdY = get_group_id(1); uint groupIdZ = get_group_id(2); uint groupCountX = get_num_groups(0); uint groupCountY = get_num_groups(1); uint groupCountZ = get_num_groups(2); __global uint *groupCounters = dst + 1; //store current group position in 3D array uint destination = groupIdZ * groupCountY * groupCountX + groupIdY * groupCountX + groupIdX; atomic_inc(&groupCounters[destination]); } #define SIMPLE_KERNEL_6_ARRAY_SIZE 32 __kernel void simple_kernel_6(__global uint *dst, __constant uint2 *src, uint scalar, uint maxIterations, uint maxIterations2) { __private uint2 array[SIMPLE_KERNEL_6_ARRAY_SIZE]; __private uint2 sum; __private size_t gid = get_global_id(0); __private size_t lid = get_local_id(0); __private uint multi = 1; if (lid == 1024) { multi = 4; } sum = (uint2)(0, 0); for (int i = 0; i < maxIterations; ++i) { array[i] = src[i] + (uint2)(i * multi, i * multi + scalar); } for (int i = 0; i < maxIterations2; ++i) { sum.x = array[i].x + sum.x; sum.y = array[i].y + sum.y; } vstore2(sum, gid, dst); } typedef long16 TYPE; __attribute__((reqd_work_group_size(32, 1, 1))) // force LWS to 32 __attribute__((intel_reqd_sub_group_size(16))) // force SIMD to 16 __kernel void simple_kernel_7(__global int *resIdx, global TYPE *src, global TYPE *dst) { size_t lid = get_local_id(0); size_t gid = get_global_id(0); TYPE res1 = src[gid * 3]; TYPE res2 = src[gid * 3 + 1]; TYPE res3 = src[gid * 3 + 2]; __local TYPE locMem[32]; locMem[lid] = res1; barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_GLOBAL_MEM_FENCE); TYPE res = (locMem[resIdx[gid]] * res3) * res2 + res1; dst[gid] = res; } __kernel void simple_kernel_8(__global uint *dst, uint incrementationsCount) { uint groupIdX = get_group_id(0); uint groupIdY = get_group_id(1); uint groupIdZ = get_group_id(2); uint groupCountX = get_num_groups(0); uint groupCountY = get_num_groups(1); uint groupCountZ = get_num_groups(2); uint destination = groupIdZ * groupCountY * groupCountX + groupIdY * groupCountX + groupIdX; for (uint i = 0; i < incrementationsCount; i++) { dst[destination]++; } } __kernel void simple_kernel_9(__global uint *dst) { uint offset = get_max_sub_group_size() * get_sub_group_id(); dst[get_sub_group_local_id() + offset] = get_local_id(0); } compute-runtime-22.14.22890/opencl/test/unit_test/test_files/simple_kernels_opts.cl000066400000000000000000000004401422164147700303670ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ __kernel void SimpleArg(int src, __global int *dst) { int id = (int)get_global_id(0); #ifdef DEF_WAS_SPECIFIED int val = 1; #else // fail to compile #endif dst[id] = src + val; } compute-runtime-22.14.22890/opencl/test/unit_test/test_files/simple_kernels_opts_options.txt000066400000000000000000000004001422164147700323570ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ -cl-opt-disable -DDEF_WAS_SPECIFIED=1 -DARGS=", const __global int *arg1, float arg2, const __global int *arg3, float arg4" compute-runtime-22.14.22890/opencl/test/unit_test/test_files/simple_nonuniform.cl000066400000000000000000000010251422164147700300510ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ __kernel void simpleNonUniform(int atomicOffset, __global volatile int *dst) { int id = (int)(get_global_id(2) * (get_global_size(1) * get_global_size(0)) + get_global_id(1) * get_global_size(0) + get_global_id(0)); dst[id] = id; __global volatile atomic_int *atomic_dst = ( __global volatile atomic_int * )dst; atomic_fetch_add_explicit( &atomic_dst[atomicOffset], 1 , memory_order_relaxed, memory_scope_all_svm_devices ); }compute-runtime-22.14.22890/opencl/test/unit_test/test_files/stateful_copy_buffer.cl000066400000000000000000000003521422164147700305220ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ __kernel void StatefulCopyBuffer( const __global uchar* src, __global uchar* dst) { uint id = get_global_id(0); dst[id] = src[id]; }compute-runtime-22.14.22890/opencl/test/unit_test/test_files/stateful_copy_buffer_ocloc_options.txt000066400000000000000000000001461422164147700336760ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ -force_stos_opt compute-runtime-22.14.22890/opencl/test/unit_test/test_files/stateless_copy_buffer.cl000066400000000000000000000003541422164147700307040ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ __kernel void StatelessCopyBuffer( const __global uchar* src, __global uchar* dst) { uint id = get_global_id(0); dst[id] = src[id]; } stateless_copy_buffer_internal_options.txt000066400000000000000000000002011422164147700345040ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/test_files/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ -cl-intel-greater-than-4GB-buffer-required compute-runtime-22.14.22890/opencl/test/unit_test/test_files/stateless_kernel.cl000066400000000000000000000003071422164147700276570ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ __kernel void statelessKernel(__global uchar* src) { uint tid = get_global_id(0); src[tid] = 0xCD; }compute-runtime-22.14.22890/opencl/test/unit_test/test_files/test_basic_constant.cl000066400000000000000000000004651422164147700303460ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ __kernel void constant_kernel(__global float *out, __constant float *tmpF, __constant int *tmpI) { int tid = get_global_id(0); float ftmp = tmpF[tid]; float Itmp = tmpI[tid]; out[tid] = ftmp * Itmp; } test_basic_kernel_memory_alignment_private.cl000066400000000000000000000010101422164147700350610ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/test_files/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ __kernel void test(__global ulong *results) { __private char mem0[3]; __private char2 mem2[3]; __private char3 mem3[3]; __private char4 mem4[3]; __private char8 mem8[3]; __private char16 mem16[3]; results[0] = (ulong)&mem0[0]; results[1] = (ulong)&mem2[0]; results[2] = (ulong)&mem3[0]; results[3] = (ulong)&mem4[0]; results[4] = (ulong)&mem8[0]; results[5] = (ulong)&mem16[0]; } compute-runtime-22.14.22890/opencl/test/unit_test/test_files/test_constant_memory.cl000066400000000000000000000005361422164147700305740ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ __constant uint constant_a[2] = {0xabcd5432u, 0xaabb5533u}; __kernel void test(__global uint *in, __global uint *out) { int i = get_global_id(0); int j = get_global_id(0) % (sizeof(constant_a) / sizeof(constant_a[0])); out[i] = constant_a[j]; } compute-runtime-22.14.22890/opencl/test/unit_test/test_files/valid_kernel.cl000066400000000000000000000004211422164147700267440ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ __kernel void CopyBuffer(__global unsigned int *src, __global unsigned int *dst) { int id = (int)get_global_id(0); dst[id] = lgamma((float)src[id]); dst[id] = src[id]; } compute-runtime-22.14.22890/opencl/test/unit_test/test_files/valid_kernel_ocloc_options.txt000066400000000000000000000001541422164147700321220ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ -invalid_ocloc_option compute-runtime-22.14.22890/opencl/test/unit_test/test_files/vme_kernels.cl000066400000000000000000000115401422164147700266230ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ /*************************************************************************************************\ Device-side basic vme kernel: device_side_block_motion_estimate_intel Description: Computes motion vectors by comparing a 2d image source with a 2d reference image, producing a vector field motion vectors. The algorithm searches the best match of each macroblock pixel block in the source image by searching an image region in the reference image, centered on the coordinates of that pixel macroblock in the source image (optionally offset by the prediction motion vectors). This kernel optionally takes a vector field of motion vector predictors via the prediction_motion_vector_image kernel argument. The kernel also optionally returns a vector field of per-macroblock pixel-block information records. Each record contains the best-match distortion (SAD) value and additional search result information. This kernel needs to be compiled with following compiler option: " -D cl_intel_device_side_avc_vme_enable " \*************************************************************************************************/ __kernel __attribute__((reqd_work_group_size(16, 1, 1))) void device_side_block_motion_estimate_intel(__read_only image2d_t srcImg, __read_only image2d_t refImg, __global short2 *prediction_motion_vector_buffer, __global short2 *motion_vector_buffer, __global ushort *residuals_buffer, __global uchar2 *shapes_buffer, int iterations, int partitionMask) { int gid_0 = get_group_id(0); int gid_1 = 0; sampler_t vme_samp = 0; for (int i = 0; i < iterations; i++, gid_1++) { ushort2 srcCoord = 0; short2 refCoord = 0; short2 predMV = 0; srcCoord.x = gid_0 * 16 + get_global_offset(0); srcCoord.y = gid_1 * 16 + get_global_offset(1); if (prediction_motion_vector_buffer != NULL) { predMV = prediction_motion_vector_buffer[gid_0 + gid_1 * get_num_groups(0)]; refCoord.x = predMV.x / 4; refCoord.y = predMV.y / 4; refCoord.y = refCoord.y & 0xFFFE; } uchar partition_mask = (uchar)partitionMask; uchar sad_adjustment = CLK_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL; uchar pixel_mode = CLK_AVC_ME_SUBPIXEL_MODE_QPEL_INTEL; intel_sub_group_avc_ime_payload_t payload = intel_sub_group_avc_ime_initialize(srcCoord, partition_mask, sad_adjustment); payload = intel_sub_group_avc_ime_set_single_reference(refCoord, CLK_AVC_ME_SEARCH_WINDOW_16x12_RADIUS_INTEL, payload); intel_sub_group_avc_ime_result_t result = intel_sub_group_avc_ime_evaluate_with_single_reference(srcImg, refImg, vme_samp, payload); // Process Results long mvs = intel_sub_group_avc_ime_get_motion_vectors(result); ushort sads = intel_sub_group_avc_ime_get_inter_distortions(result); uchar major_shape = intel_sub_group_avc_ime_get_inter_major_shape(result); uchar minor_shapes = intel_sub_group_avc_ime_get_inter_minor_shapes(result); uchar2 shapes = {major_shape, minor_shapes}; uchar directions = intel_sub_group_avc_ime_get_inter_directions(result); // Perform FME for non-Integer Pixel mode if (pixel_mode != CLK_AVC_ME_SUBPIXEL_MODE_INTEGER_INTEL) { intel_sub_group_avc_ref_payload_t payload = intel_sub_group_avc_fme_initialize(srcCoord, mvs, major_shape, minor_shapes, directions, pixel_mode, sad_adjustment); intel_sub_group_avc_ref_result_t result = intel_sub_group_avc_ref_evaluate_with_single_reference(srcImg, refImg, vme_samp, payload); mvs = intel_sub_group_avc_ref_get_motion_vectors(result); sads = intel_sub_group_avc_ref_get_inter_distortions(result); } // Write Out Result if ((get_local_id(0) % 4) == 0) { int x = get_local_id(0) % 4; int y = get_local_id(0) / 4; int width = get_image_width(srcImg); int index = (gid_0 * 4 + x) + (gid_1 * width / 4 + y); int2 bi_mvs = as_int2(mvs); motion_vector_buffer[index] = as_short2(bi_mvs.s0); if (residuals_buffer != NULL) { residuals_buffer[index] = sads; } shapes_buffer[gid_0 + gid_1 * get_num_groups(0)] = shapes; } } } __kernel void non_vme_kernel(__global unsigned int *src, __global unsigned int *dst) { int id = (int)get_global_id(0); dst[id] = lgamma((float)src[id]); dst[id] = src[id]; } compute-runtime-22.14.22890/opencl/test/unit_test/test_files/vme_kernels_options.txt000066400000000000000000000002171422164147700306160ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ -D cl_intel_device_side_vme_enable -D HW_NULL_CHECK compute-runtime-22.14.22890/opencl/test/unit_test/test_macros/000077500000000000000000000000001422164147700241525ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/test_macros/CMakeLists.txt000066400000000000000000000011361422164147700267130ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_test_macros ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_checks_ocl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_checks_ocl.h ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_test_macros}) get_property(NEO_CORE_test_macros GLOBAL PROPERTY NEO_CORE_test_macros) list(APPEND IGDRCL_SRCS_test_macros ${NEO_CORE_test_macros} ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_test_macros}) set_property(GLOBAL PROPERTY IGDRCL_SRCS_test_macros ${IGDRCL_SRCS_test_macros}) compute-runtime-22.14.22890/opencl/test/unit_test/test_macros/test_checks_ocl.cpp000066400000000000000000000026711422164147700300200ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" #include "shared/source/device/device_info.h" #include "shared/source/helpers/hw_helper.h" #include "shared/test/common/helpers/default_hw_info.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/source/kernel/kernel.h" using namespace NEO; bool TestChecks::supportsSvm(const ClDevice *pClDevice) { return supportsSvm(&pClDevice->getDevice()); } bool TestChecks::supportsImages(const Context *pContext) { return pContext->getDevice(0)->getSharedDeviceInfo().imageSupport; } bool TestChecks::supportsOcl21(const std::unique_ptr &pHardwareInfo) { return (pHardwareInfo->capabilityTable.supportsOcl21Features && pHardwareInfo->capabilityTable.supportsPipes && pHardwareInfo->capabilityTable.supportsIndependentForwardProgress); } bool TestChecks::supportsAuxResolves() { KernelInfo kernelInfo{}; kernelInfo.kernelDescriptor.payloadMappings.explicitArgs.resize(1); kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[0].as(true).accessedUsingStatelessAddressingMode = true; auto &clHwHelper = ClHwHelper::get(defaultHwInfo->platform.eRenderCoreFamily); return clHwHelper.requiresAuxResolves(kernelInfo, *defaultHwInfo); } compute-runtime-22.14.22890/opencl/test/unit_test/test_macros/test_checks_ocl.h000066400000000000000000000024001422164147700274530ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include namespace NEO { class ClDevice; class Context; struct HardwareInfo; namespace TestChecks { bool supportsSvm(const ClDevice *pClDevice); bool supportsImages(const Context *pContext); bool supportsOcl21(const std::unique_ptr &pHardwareInfo); bool supportsPipes(const ClDevice *pClDevice); bool supportsAuxResolves(); } // namespace TestChecks } // namespace NEO #include "shared/test/common/test_macros/test_checks_shared.h" #define REQUIRE_IMAGE_SUPPORT_OR_SKIP(param) \ auto hwInfo = castToObject(param)->getDevice(0)->getHardwareInfo(); \ if (!hwInfo.capabilityTable.supportsImages) { \ GTEST_SKIP(); \ } #define REQUIRE_OCL_21_OR_SKIP(param) \ if (NEO::TestChecks::supportsOcl21(param) == false) { \ GTEST_SKIP(); \ } #define REQUIRE_AUX_RESOLVES() \ if (NEO::TestChecks::supportsAuxResolves() == false) { \ GTEST_SKIP(); \ } compute-runtime-22.14.22890/opencl/test/unit_test/ult_config_listener.cpp000066400000000000000000000022401422164147700263670ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/ult_config_listener.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/test/common/helpers/default_hw_info.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/mocks/mock_platform.h" void NEO::UltConfigListener::OnTestStart(const ::testing::TestInfo &testInfo) { BaseUltConfigListener::OnTestStart(testInfo); auto executionEnvironment = constructPlatform()->peekExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); executionEnvironment->calculateMaxOsContextCount(); executionEnvironment->rootDeviceEnvironments[0]->initGmm(); } void NEO::UltConfigListener::OnTestEnd(const ::testing::TestInfo &testInfo) { // Clear global platform that it shouldn't be reused between tests platformsImpl->clear(); MemoryManager::maxOsContextCount = 0u; BaseUltConfigListener::OnTestEnd(testInfo); } compute-runtime-22.14.22890/opencl/test/unit_test/ult_config_listener.h000066400000000000000000000006071422164147700260410ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/unit_test/base_ult_config_listener.h" namespace NEO { class UltConfigListener : public BaseUltConfigListener { private: void OnTestStart(const ::testing::TestInfo &) override; void OnTestEnd(const ::testing::TestInfo &) override; }; } // namespace NEO compute-runtime-22.14.22890/opencl/test/unit_test/user_settings_32.user000066400000000000000000000025431422164147700257350ustar00rootroot00000000000000 --gtest_filter=* --gtest_catch_exceptions=0 --enable_default_listener --disable_pagefaulting_tests $(TargetDir) WindowsLocalDebugger --gtest_filter=* --gtest_catch_exceptions=0 --enable_default_listener $(TargetDir) WindowsLocalDebugger --gtest_filter=* --gtest_catch_exceptions=0 --enable_default_listener $(TargetDir) WindowsLocalDebugger compute-runtime-22.14.22890/opencl/test/unit_test/user_settings_64.user000066400000000000000000000025351422164147700257430ustar00rootroot00000000000000 --gtest_filter=* --gtest_catch_exceptions=0 --enable_default_listener --disable_pagefaulting_tests $(TargetDir) WindowsLocalDebugger --gtest_filter=* --gtest_catch_exceptions=0 --enable_default_listener $(TargetDir) WindowsLocalDebugger --gtest_filter=* --gtest_catch_exceptions=0 --enable_default_listener $(TargetDir) WindowsLocalDebugger compute-runtime-22.14.22890/opencl/test/unit_test/utilities/000077500000000000000000000000001422164147700236425ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/utilities/.clang-tidy000066400000000000000000000027331422164147700257030ustar00rootroot00000000000000--- Checks: 'clang-diagnostic-*,clang-analyzer-*,google-default-arguments,modernize-use-override,modernize-use-default-member-init,-clang-analyzer-alpha*,readability-identifier-naming,-clang-analyzer-core.UndefinedBinaryOperatorResult' # WarningsAsErrors: '.*' HeaderFilterRegex: '^((?!^third_party\/).+)\.(h|hpp|inl)$' AnalyzeTemporaryDtors: false CheckOptions: - key: google-readability-braces-around-statements.ShortStatementLines value: '1' - key: google-readability-function-size.StatementThreshold value: '800' - key: google-readability-namespace-comments.ShortNamespaceLines value: '10' - key: google-readability-namespace-comments.SpacesBeforeComments value: '2' - key: readability-identifier-naming.ParameterCase value: camelBack - key: modernize-loop-convert.MaxCopySize value: '16' - key: modernize-loop-convert.MinConfidence value: reasonable - key: modernize-loop-convert.NamingStyle value: CamelCase - key: modernize-pass-by-value.IncludeStyle value: llvm - key: modernize-replace-auto-ptr.IncludeStyle value: llvm - key: modernize-use-nullptr.NullMacros value: 'NULL' - key: modernize-use-default-member-init.UseAssignment value: '1' ... compute-runtime-22.14.22890/opencl/test/unit_test/utilities/CMakeLists.txt000066400000000000000000000011251422164147700264010ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_utilities ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}debug_file_reader_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/debug_file_reader_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/debug_settings_reader_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/file_logger_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/file_logger_tests.h ${CMAKE_CURRENT_SOURCE_DIR}/tag_allocator_tests.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_utilities}) compute-runtime-22.14.22890/opencl/test/unit_test/utilities/debug_file_reader_tests.cpp000066400000000000000000000024671422164147700312100ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/utilities/debug_file_reader_tests.inl" using namespace NEO; TEST(SettingsFileReader, givenTestFileWithDefaultValuesWhenTheyAreQueriedThenDefaultValuesMatch) { // Use test settings file std::unique_ptr reader = std::unique_ptr(new TestSettingsFileReader(TestSettingsFileReader::testPath)); ASSERT_NE(nullptr, reader); size_t debugVariableCount = 0; bool variableFound = false; bool compareSuccessful = false; #define DECLARE_DEBUG_VARIABLE(dataType, variableName, defaultValue, description) \ variableFound = reader->hasSetting(#variableName); \ EXPECT_TRUE(variableFound) << #variableName; \ compareSuccessful = (defaultValue == reader->getSetting(#variableName, defaultValue)); \ EXPECT_TRUE(compareSuccessful) << #variableName; \ debugVariableCount++; #include "shared/source/debug_settings/release_variables.inl" #include "debug_variables.inl" #undef DECLARE_DEBUG_VARIABLE size_t mapCount = reader->getStringSettingsCount(); EXPECT_EQ(mapCount, debugVariableCount); } compute-runtime-22.14.22890/opencl/test/unit_test/utilities/debug_file_reader_tests.inl000066400000000000000000000225071422164147700312050ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/file_io.h" #include "shared/source/utilities/debug_file_reader.h" #include "shared/test/common/test_macros/test.h" #include "gtest/gtest.h" #include #include using namespace NEO; class TestSettingsFileReader : public SettingsFileReader { public: using SettingsFileReader::parseStream; TestSettingsFileReader(const char *filePath = nullptr) : SettingsFileReader(filePath) { } ~TestSettingsFileReader() override { } bool hasSetting(const char *settingName) { std::map::iterator it = settingStringMap.find(std::string(settingName)); return (it != settingStringMap.end()); } size_t getStringSettingsCount() { return settingStringMap.size(); } static const char *testPath; static const char *stringTestPath; }; const char *TestSettingsFileReader::testPath = "./test_files/igdrcl.config"; const char *TestSettingsFileReader::stringTestPath = "./test_files/igdrcl_string.config"; TEST(SettingsFileReader, GivenFilesDoesNotExistWhenCreatingFileReaderThenCreationSucceeds) { bool settingsFileExists = fileExists(SettingsReader::settingsFileName); // if settings file exists, remove it if (settingsFileExists) { remove(SettingsReader::settingsFileName); } // Use current location for file read auto reader = std::make_unique(); ASSERT_NE(nullptr, reader); EXPECT_EQ(0u, reader->getStringSettingsCount()); } TEST(SettingsFileReader, WhenGettingSettingThenCorrectStringValueIsReturned) { // Use test settings file auto reader = std::make_unique(TestSettingsFileReader::stringTestPath); ASSERT_NE(nullptr, reader); std::string retValue; // StringTestKey is defined in file: unit_tests\helpers\test_debug_variables.inl std::string returnedStringValue = reader->getSetting("StringTestKey", retValue); // "Test Value" is a value that should be read from file defined in stringTestPath member EXPECT_STREQ(returnedStringValue.c_str(), "TestValue"); #define DECLARE_DEBUG_VARIABLE(dataType, variableName, defaultValue, description) \ { \ dataType defaultData = defaultValue; \ dataType tempData = reader->getSetting(#variableName, defaultData); \ if (tempData == defaultData) { \ EXPECT_TRUE(true); \ } \ } #include "shared/test/unit_test/helpers/test_debug_variables.inl" #undef DECLARE_DEBUG_VARIABLE } TEST(SettingsFileReader, givenDebugFileSettingInWhichStringIsFollowedByIntegerWhenItIsParsedThenProperValuesAreObtained) { auto reader = std::make_unique(TestSettingsFileReader::stringTestPath); ASSERT_NE(nullptr, reader.get()); int32_t retValue = 0; int32_t returnedIntValue = reader->getSetting("IntTestKey", retValue); EXPECT_EQ(123, returnedIntValue); int32_t returnedIntValueHex = reader->getSetting("IntTestKeyHex", 0); EXPECT_EQ(0xABCD, returnedIntValueHex); std::string retValueString; std::string returnedStringValue = reader->getSetting("StringTestKey", retValueString); EXPECT_STREQ(returnedStringValue.c_str(), "TestValue"); } TEST(SettingsFileReader, GivenSettingNotInFileWhenGettingSettingThenProvidedDefaultIsReturned) { // Use test settings file auto reader = std::make_unique(TestSettingsFileReader::testPath); ASSERT_NE(nullptr, reader); bool defaultBoolValue = false; bool returnedBoolValue = reader->getSetting("BoolSettingNotExistingInFile", defaultBoolValue); EXPECT_EQ(defaultBoolValue, returnedBoolValue); int32_t defaultIntValue = 123; int32_t returnedIntValue = reader->getSetting("IntSettingNotExistingInFile", defaultIntValue); EXPECT_EQ(defaultIntValue, returnedIntValue); std::string defaultStringValue = "ABCD"; std::string returnedStringValue = reader->getSetting("StringSettingNotExistingInFile", defaultStringValue); EXPECT_EQ(defaultStringValue, returnedStringValue); } TEST(SettingsFileReader, WhenGettingAppSpecificLocationThenCorrectLocationIsReturned) { std::unique_ptr reader(new TestSettingsFileReader(TestSettingsFileReader::testPath)); std::string appSpecific = "cl_cache_dir"; EXPECT_EQ(appSpecific, reader->appSpecificLocation(appSpecific)); } TEST(SettingsFileReader, givenHexNumbersSemiColonSeparatedListInInputStreamWhenParsingThenCorrectStringValueIsStored) { auto reader = std::make_unique(); ASSERT_NE(nullptr, reader); //No settings should be parsed initially EXPECT_EQ(0u, reader->getStringSettingsCount()); std::stringstream inputLineWithSemiColonList("KeyName = 0x1234;0x5555"); reader->parseStream(inputLineWithSemiColonList); std::string defaultStringValue = "FailedToParse"; std::string returnedStringValue = reader->getSetting("KeyName", defaultStringValue); EXPECT_STREQ("0x1234;0x5555", returnedStringValue.c_str()); } TEST(SettingsFileReader, given64bitKeyValueWhenGetSettingThenValueIsCorrect) { auto reader = std::make_unique(); ASSERT_NE(nullptr, reader); EXPECT_EQ(0u, reader->getStringSettingsCount()); std::stringstream inputLine("Example64BitKey = -18764712120594"); reader->parseStream(inputLine); int64_t defaultValue = 0; int64_t returnedValue = reader->getSetting("Example64BitKey", defaultValue); EXPECT_EQ(-18764712120594, returnedValue); } TEST(SettingsFileReader, givenKeyValueWithoutSpacesWhenGetSettingThenValueIsCorrect) { auto reader = std::make_unique(); ASSERT_NE(nullptr, reader); EXPECT_EQ(0u, reader->getStringSettingsCount()); std::stringstream inputLine("SomeKey=12"); reader->parseStream(inputLine); int64_t returnedValue = reader->getSetting("SomeKey", 0); EXPECT_EQ(1u, reader->getStringSettingsCount()); EXPECT_EQ(12, returnedValue); } TEST(SettingsFileReader, givenKeyValueWithAdditionalWhitespaceCharactersWhenGetSettingThenValueIsCorrect) { auto reader = std::make_unique(); ASSERT_NE(nullptr, reader); EXPECT_EQ(0u, reader->getStringSettingsCount()); std::stringstream inputLine("\t \t SomeKey\t \t =\t \t 12\t \t "); reader->parseStream(inputLine); int64_t returnedValue = reader->getSetting("SomeKey", 0); EXPECT_EQ(1u, reader->getStringSettingsCount()); EXPECT_EQ(12, returnedValue); } TEST(SettingsFileReader, givenKeyValueWithAdditionalCharactersWhenGetSettingThenValueIsIncorrect) { { auto reader = std::make_unique(); ASSERT_NE(nullptr, reader); EXPECT_EQ(0u, reader->getStringSettingsCount()); std::stringstream inputLine("Some Key = 12"); reader->parseStream(inputLine); EXPECT_EQ(0u, reader->getStringSettingsCount()); } { auto reader = std::make_unique(); ASSERT_NE(nullptr, reader); EXPECT_EQ(0u, reader->getStringSettingsCount()); std::stringstream inputLine("SomeKey = 1 2"); reader->parseStream(inputLine); EXPECT_EQ(0u, reader->getStringSettingsCount()); } } TEST(SettingsFileReader, givenMultipleKeysWhenGetSettingThenInvalidKeysAreSkipped) { auto reader = std::make_unique(); ASSERT_NE(nullptr, reader); EXPECT_EQ(0u, reader->getStringSettingsCount()); std::string testFile; testFile.append("InvalidKey1 = 1 2\n"); testFile.append("ValidKey1 = 12\n"); testFile.append("InvalidKey2 = - 1\n"); testFile.append("ValidKey2 = 128\n"); std::stringstream inputFile(testFile); reader->parseStream(inputFile); EXPECT_EQ(2u, reader->getStringSettingsCount()); EXPECT_EQ(0, reader->getSetting("InvalidKey1", 0)); EXPECT_EQ(0, reader->getSetting("InvalidKey2", 0)); EXPECT_EQ(12, reader->getSetting("ValidKey1", 0)); EXPECT_EQ(128, reader->getSetting("ValidKey2", 0)); } TEST(SettingsFileReader, givenNoKeyOrNoValueWhenGetSettingThenExceptionIsNotThrown) { { auto reader = std::make_unique(); ASSERT_NE(nullptr, reader); EXPECT_EQ(0u, reader->getStringSettingsCount()); std::stringstream inputLine("= 12"); EXPECT_NO_THROW(reader->parseStream(inputLine)); EXPECT_EQ(0u, reader->getStringSettingsCount()); } { auto reader = std::make_unique(); ASSERT_NE(nullptr, reader); EXPECT_EQ(0u, reader->getStringSettingsCount()); std::stringstream inputLine("SomeKey ="); EXPECT_NO_THROW(reader->parseStream(inputLine)); EXPECT_EQ(0u, reader->getStringSettingsCount()); } { auto reader = std::make_unique(); ASSERT_NE(nullptr, reader); EXPECT_EQ(0u, reader->getStringSettingsCount()); std::stringstream inputLine("="); EXPECT_NO_THROW(reader->parseStream(inputLine)); EXPECT_EQ(0u, reader->getStringSettingsCount()); } } compute-runtime-22.14.22890/opencl/test/unit_test/utilities/debug_settings_reader_tests.cpp000066400000000000000000000076261422164147700321330ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/file_io.h" #include "shared/source/utilities/debug_settings_reader.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/os_interface/ocl_reg_path.h" #include "gtest/gtest.h" #include #include using namespace NEO; class MockSettingsReader : public SettingsReader { public: std::string getSetting(const char *settingName, const std::string &value) override { return value; } bool getSetting(const char *settingName, bool defaultValue) override { return defaultValue; }; int64_t getSetting(const char *settingName, int64_t defaultValue) override { return defaultValue; }; int32_t getSetting(const char *settingName, int32_t defaultValue) override { return defaultValue; }; const char *appSpecificLocation(const std::string &name) override { return name.c_str(); }; }; TEST(SettingsReader, WhenCreatingSettingsReaderThenReaderIsCreated) { SettingsReader *reader = SettingsReader::create(oclRegPath); EXPECT_NE(nullptr, reader); delete reader; } TEST(SettingsReader, GivenNoSettingsFileWhenCreatingSettingsReaderThenOsReaderIsCreated) { remove(SettingsReader::settingsFileName); auto fileReader = std::unique_ptr(SettingsReader::createFileReader()); EXPECT_EQ(nullptr, fileReader.get()); auto osReader = std::unique_ptr(SettingsReader::create(oclRegPath)); EXPECT_NE(nullptr, osReader.get()); } TEST(SettingsReader, GivenSettingsFileExistsWhenCreatingSettingsReaderThenReaderIsCreated) { bool settingsFileExists = fileExists(SettingsReader::settingsFileName); if (!settingsFileExists) { const char data[] = "ProductFamilyOverride = test"; writeDataToFile(SettingsReader::settingsFileName, &data, sizeof(data)); } auto reader = std::unique_ptr(SettingsReader::create(oclRegPath)); EXPECT_NE(nullptr, reader.get()); std::string defaultValue("unk"); EXPECT_STREQ("test", reader->getSetting("ProductFamilyOverride", defaultValue).c_str()); std::remove(SettingsReader::settingsFileName); } TEST(SettingsReader, WhenCreatingFileReaderThenReaderIsCreated) { bool settingsFileExists = fileExists(SettingsReader::settingsFileName); if (!settingsFileExists) { char data = 0; writeDataToFile(SettingsReader::settingsFileName, &data, 0); } SettingsReader *reader = SettingsReader::createFileReader(); EXPECT_NE(nullptr, reader); if (!settingsFileExists) { remove(SettingsReader::settingsFileName); } delete reader; } TEST(SettingsReader, WhenCreatingOsReaderThenReaderIsCreated) { SettingsReader *reader = SettingsReader::createOsReader(false, oclRegPath); EXPECT_NE(nullptr, reader); delete reader; } TEST(SettingsReader, GivenRegKeyWhenCreatingOsReaderThenReaderIsCreated) { std::string regKey = oclRegPath; std::unique_ptr reader(SettingsReader::createOsReader(false, regKey)); EXPECT_NE(nullptr, reader); } TEST(SettingsReader, GivenTrueWhenPrintingDebugStringThenPrintsToOutput) { int i = 4; testing::internal::CaptureStdout(); PRINT_DEBUG_STRING(true, stdout, "testing error %d", i); std::string output = testing::internal::GetCapturedStdout(); EXPECT_STRNE(output.c_str(), ""); } TEST(SettingsReader, GivenFalseWhenPrintingDebugStringThenNoOutput) { int i = 4; testing::internal::CaptureStdout(); PRINT_DEBUG_STRING(false, stderr, "Error String %d", i); std::string output = testing::internal::GetCapturedStdout(); EXPECT_STREQ(output.c_str(), ""); } TEST(SettingsReader, givenNonExistingEnvVarWhenGettingEnvThenNullptrIsReturned) { MockSettingsReader reader; auto value = reader.getenv("ThisEnvVarDoesNotExist"); EXPECT_EQ(nullptr, value); } compute-runtime-22.14.22890/opencl/test/unit_test/utilities/file_logger_tests.cpp000066400000000000000000001216061422164147700300540ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/utilities/file_logger_tests.h" #include "shared/source/utilities/logger.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_mdi.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include #include #include #include using namespace NEO; TEST(FileLogger, WhenFileLoggerIsCreatedThenItIsEnabled) { DebugVariables flags; FullyEnabledFileLogger fileLogger(std::string(""), flags); EXPECT_TRUE(fileLogger.enabled()); } TEST(FileLogger, GivenFileLoggerWhenSettingFileNameThenCorrectFilenameIsSet) { DebugVariables flags; FullyEnabledFileLogger fileLogger(std::string(""), flags); fileLogger.setLogFileName("new_filename"); EXPECT_STREQ("new_filename", fileLogger.getLogFileName()); } TEST(FileLogger, GivenEnabledDebugFunctinalityWhenLoggingApiCallsThenDumpToFile) { DebugVariables flags; flags.LogApiCalls.set(true); FullyEnabledFileLogger fileLogger(std::string("test.log"), flags); fileLogger.logApiCall("searchString", true, 0); fileLogger.logApiCall("searchString2", false, 0); fileLogger.logInputs("searchString3", "any"); fileLogger.logInputs("searchString3", "any", "and more"); fileLogger.log(false, "searchString4"); // Log file not created EXPECT_TRUE(fileLogger.wasFileCreated(fileLogger.getLogFileName())); if (fileLogger.wasFileCreated(fileLogger.getLogFileName())) { auto str = fileLogger.getFileString(fileLogger.getLogFileName()); EXPECT_TRUE(str.find("searchString") != std::string::npos); EXPECT_TRUE(str.find("searchString2") != std::string::npos); EXPECT_TRUE(str.find("searchString3") != std::string::npos); EXPECT_FALSE(str.find("searchString4") != std::string::npos); } fileLogger.log(true, "searchString4"); if (fileLogger.wasFileCreated(fileLogger.getLogFileName())) { auto str = fileLogger.getFileString(fileLogger.getLogFileName()); EXPECT_TRUE(str.find("searchString4") != std::string::npos); } } TEST(FileLogger, GivenDisabledDebugFunctinalityWhenLoggingApiCallsThenFileIsNotCreated) { DebugVariables flags; flags.LogApiCalls.set(true); FullyDisabledFileLogger fileLogger(std::string(" "), flags); // Log file not created bool logFileCreated = fileExists(fileLogger.getLogFileName()); EXPECT_FALSE(logFileCreated); fileLogger.logApiCall("searchString", true, 0); fileLogger.logApiCall("searchString2", false, 0); fileLogger.logInputs("searchString3", "any"); fileLogger.logInputs("searchString3", "any", "and more"); fileLogger.log(false, "searchString4"); EXPECT_FALSE(fileLogger.wasFileCreated(fileLogger.getLogFileName())); } TEST(FileLogger, GivenIncorrectFilenameFileWhenLoggingApiCallsThenFileIsNotCreated) { DebugVariables flags; flags.LogApiCalls.set(true); FullyEnabledFileLogger fileLogger(std::string("test.log"), flags); fileLogger.useRealFiles(true); fileLogger.writeToFile("", "", 0, std::ios_base::in | std::ios_base::out); EXPECT_FALSE(fileLogger.wasFileCreated(fileLogger.getLogFileName())); } TEST(FileLogger, GivenCorrectFilenameFileWhenLoggingApiCallsThenFileIsCreated) { std::string testFile = "testfile"; DebugVariables flags; FullyEnabledFileLogger fileLogger(testFile, flags); fileLogger.useRealFiles(true); fileLogger.writeToFile(testFile, "test", 4, std::fstream::out); EXPECT_TRUE(fileExists(testFile)); if (fileExists(testFile)) { std::remove(testFile.c_str()); } } TEST(FileLogger, GivenSameFileNameWhenCreatingNewInstanceThenOldFileIsRemoved) { std::string testFile = "testfile"; DebugVariables flags; flags.LogApiCalls.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); fileLogger.useRealFiles(true); fileLogger.writeToFile(fileLogger.getLogFileName(), "test", 4, std::fstream::out); EXPECT_TRUE(fileExists(fileLogger.getLogFileName())); FullyEnabledFileLogger fileLogger2(testFile, flags); EXPECT_FALSE(fileExists(fileLogger.getLogFileName())); } TEST(FileLogger, GivenFlagIsFalseWhenLoggingThenOnlyCustomLogsAreDumped) { std::string testFile = "testfile"; DebugVariables flags; flags.LogApiCalls.set(false); FullyEnabledFileLogger fileLogger(testFile, flags); // Log file not created bool logFileCreated = fileExists(fileLogger.getLogFileName()); EXPECT_FALSE(logFileCreated); fileLogger.logApiCall("searchString", true, 0); fileLogger.logApiCall("searchString2", false, 0); fileLogger.logInputs("searchString3", "any"); fileLogger.logInputs("searchString3", "any", "and more"); fileLogger.log(false, "searchString4"); if (fileLogger.wasFileCreated(fileLogger.getLogFileName())) { auto str = fileLogger.getFileString(fileLogger.getLogFileName()); EXPECT_FALSE(str.find("searchString\n") != std::string::npos); EXPECT_FALSE(str.find("searchString2\n") != std::string::npos); EXPECT_FALSE(str.find("searchString3") != std::string::npos); EXPECT_FALSE(str.find("searchString4") != std::string::npos); } // Log still works fileLogger.log(true, "searchString4"); if (fileLogger.wasFileCreated(fileLogger.getLogFileName())) { auto str = fileLogger.getFileString(fileLogger.getLogFileName()); EXPECT_TRUE(str.find("searchString4") != std::string::npos); } } TEST(FileLogger, WhenGettingInputThenCorrectValueIsReturned) { std::string testFile = "testfile"; DebugVariables flags; FullyEnabledFileLogger fileLogger(testFile, flags); // getInput returns 0 size_t input = 5; size_t output = fileLogger.getInput(&input, 0); EXPECT_EQ(input, output); } TEST(FileLogger, GivenNullInputWhenGettingInputThenZeroIsReturned) { std::string testFile = "testfile"; DebugVariables flags; FullyEnabledFileLogger fileLogger(testFile, flags); // getInput returns 0 size_t output = fileLogger.getInput(nullptr, 2); EXPECT_EQ(0u, output); } TEST(FileLogger, WhenGettingSizesThenCorrectValueIsReturned) { std::string testFile = "testfile"; DebugVariables flags; flags.LogApiCalls.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); // getSizes returns string uintptr_t input[3] = {1, 2, 3}; std::string lwsSizes = fileLogger.getSizes(input, 3, true); std::string gwsSizes = fileLogger.getSizes(input, 3, false); std::string lwsExpected = "localWorkSize[0]: \t1\nlocalWorkSize[1]: \t2\nlocalWorkSize[2]: \t3\n"; std::string gwsExpected = "globalWorkSize[0]: \t1\nglobalWorkSize[1]: \t2\nglobalWorkSize[2]: \t3\n"; EXPECT_EQ(lwsExpected, lwsSizes); EXPECT_EQ(gwsExpected, gwsSizes); } TEST(FileLogger, GivenNullInputWhenGettingSizesThenZeroIsReturned) { std::string testFile = "testfile"; DebugVariables flags; flags.LogApiCalls.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); // getSizes returns string std::string lwsSizes = fileLogger.getSizes(nullptr, 3, true); std::string gwsSizes = fileLogger.getSizes(nullptr, 3, false); EXPECT_EQ(0u, lwsSizes.size()); EXPECT_EQ(0u, gwsSizes.size()); } TEST(FileLogger, GivenDisabledDebugFunctionalityWhenGettingSizesThenEmptyStringIsReturned) { std::string testFile = "testfile"; DebugVariables flags; flags.LogApiCalls.set(true); FullyDisabledFileLogger fileLogger(testFile, flags); uintptr_t input[3] = {1, 2, 3}; std::string lwsSizes = fileLogger.getSizes(input, 3, true); std::string gwsSizes = fileLogger.getSizes(input, 3, false); EXPECT_EQ(0u, lwsSizes.size()); EXPECT_EQ(0u, gwsSizes.size()); } TEST(FileLogger, WhenGettingEventsThenCorrectValueIsReturned) { std::string testFile = "testfile"; DebugVariables flags; flags.LogApiCalls.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); FullyEnabledClFileLogger clFileLogger(fileLogger, flags); // getEvents returns string uintptr_t event = 8; uintptr_t *input[3] = {&event, &event, &event}; std::string eventsString = clFileLogger.getEvents((uintptr_t *)input, 2); EXPECT_NE(0u, eventsString.size()); } TEST(FileLogger, GivenNullInputWhenGettingEventsThenZeroIsReturned) { std::string testFile = "testfile"; DebugVariables flags; FullyEnabledFileLogger fileLogger(testFile, flags); FullyEnabledClFileLogger clFileLogger(fileLogger, flags); // getEvents returns 0 sized string std::string event = clFileLogger.getEvents(nullptr, 2); EXPECT_EQ(0u, event.size()); } TEST(FileLogger, GivenLoggerWithDebugFunctionalityWhenGetMemObjectsIsCalledThenCorrectStringIsReturned) { std::string testFile = "testfile"; DebugVariables flags; flags.LogApiCalls.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); FullyEnabledClFileLogger clFileLogger(fileLogger, flags); MockBuffer buffer; MemObj *memoryObject = &buffer; cl_mem clMem = memoryObject; cl_mem clMemObjects[] = {clMem, clMem}; cl_uint numObjects = 2; std::string memObjectString = clFileLogger.getMemObjects(reinterpret_cast(clMemObjects), numObjects); EXPECT_NE(0u, memObjectString.size()); std::stringstream output; output << "cl_mem " << clMem << ", MemObj " << memoryObject; EXPECT_TRUE(hasSubstr(memObjectString, output.str())); } TEST(FileLogger, GivenDebugFunctionalityWhenGetMemObjectsIsCalledWithNullptrThenStringIsEmpty) { std::string testFile = "testfile"; DebugVariables flags; FullyEnabledFileLogger fileLogger(testFile, flags); FullyEnabledClFileLogger clFileLogger(fileLogger, flags); std::string memObjectString = clFileLogger.getMemObjects(nullptr, 2); EXPECT_EQ(0u, memObjectString.size()); } TEST(FileLogger, GiveDisabledDebugFunctionalityWhenGetMemObjectsIsCalledThenCallReturnsImmediately) { std::string testFile = "testfile"; DebugVariables flags; FullyDisabledFileLogger fileLogger(testFile, flags); FullyDisabledClFileLogger clFileLogger(fileLogger, flags); std::string memObjectString = clFileLogger.getMemObjects(nullptr, 2); EXPECT_EQ(0u, memObjectString.size()); } TEST(FileLogger, WhenDumpingKernelThenFileIsCreated) { std::string testFile = "testfile"; DebugVariables flags; flags.DumpKernels.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); std::string kernelDumpFile = "testDumpKernel"; // test kernel dumping fileLogger.dumpKernel(kernelDumpFile, "kernel source here"); EXPECT_TRUE(fileLogger.wasFileCreated(kernelDumpFile.append(".txt"))); } TEST(FileLogger, GivenDisabledDebugFunctionalityWhenDumpingKernelThenFileIsNotCreated) { std::string testFile = "testfile"; DebugVariables flags; flags.DumpKernels.set(false); std::string kernelDumpFile = "testDumpKernel"; FullyEnabledFileLogger fileLogger(testFile, flags); // test kernel dumping fileLogger.dumpKernel(kernelDumpFile, "kernel source here"); EXPECT_FALSE(fileLogger.wasFileCreated(kernelDumpFile.append(".txt"))); } TEST(FileLogger, WhenDumpingBinaryFileThenFileIsCreated) { std::string testFile = "testfile"; DebugVariables flags; flags.DumpKernels.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); std::string programDumpFile = "programBinary.bin"; size_t length = 4; unsigned char binary[4]; const unsigned char *ptrBinary = binary; fileLogger.dumpBinaryProgram(1, &length, &ptrBinary); EXPECT_TRUE(fileLogger.wasFileCreated(programDumpFile)); } TEST(FileLogger, GivenNullPointerWhenDumpingBinaryFileThenFileIsNotCreated) { std::string testFile = "testfile"; DebugVariables flags; flags.DumpKernels.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); std::string programDumpFile = "programBinary.bin"; size_t length = 4; fileLogger.dumpBinaryProgram(1, &length, nullptr); EXPECT_FALSE(fileLogger.wasFileCreated(programDumpFile)); } TEST(FileLogger, GivenNullMdiWhenDumpingKernelsThenFileIsNotCreated) { std::string testFile = "testfile"; DebugVariables flags; flags.DumpKernelArgs.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); FullyEnabledClFileLogger clFileLogger(fileLogger, flags); clFileLogger.dumpKernelArgs(nullptr); EXPECT_EQ(fileLogger.createdFilesCount(), 0); } TEST(FileLogger, GivenDebugFunctionalityWhenDebugFlagIsDisabledThenDoNotDumpKernelArgsForMdi) { auto kernelInfo = std::make_unique(); kernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockProgram program(toClDeviceVector(*device)); auto kernel = std::unique_ptr(new MockKernel(&program, *kernelInfo, *device)); auto multiDispatchInfo = std::unique_ptr(new MockMultiDispatchInfo(device.get(), kernel.get())); kernelInfo->addArgImmediate(0, 32, 32); size_t crossThreadDataSize = 64; auto crossThreadData = std::unique_ptr(new uint8_t[crossThreadDataSize]); kernel->setCrossThreadData(crossThreadData.get(), static_cast(crossThreadDataSize)); std::string testFile = "testfile"; DebugVariables flags; flags.DumpKernelArgs.set(false); FullyEnabledFileLogger fileLogger(testFile, flags); FullyEnabledClFileLogger clFileLogger(fileLogger, flags); clFileLogger.dumpKernelArgs(multiDispatchInfo.get()); // check if file was created std::string expectedFile = "_arg_0_immediate_size_32_flags_0.bin"; EXPECT_FALSE(fileLogger.wasFileCreated(expectedFile)); // no files should be created EXPECT_EQ(fileLogger.createdFilesCount(), 0); } TEST(FileLogger, GivenMdiWhenDumpingKernelArgsThenFileIsCreated) { auto kernelInfo = std::make_unique(); kernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockProgram program(toClDeviceVector(*device)); auto kernel = std::unique_ptr(new MockKernel(&program, *kernelInfo, *device)); auto multiDispatchInfo = std::unique_ptr(new MockMultiDispatchInfo(device.get(), kernel.get())); kernelInfo->addArgImmediate(0, 32, 32); size_t crossThreadDataSize = 64; auto crossThreadData = std::unique_ptr(new uint8_t[crossThreadDataSize]); kernel->setCrossThreadData(crossThreadData.get(), static_cast(crossThreadDataSize)); std::string testFile = "testfile"; DebugVariables flags; flags.DumpKernelArgs.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); FullyEnabledClFileLogger clFileLogger(fileLogger, flags); clFileLogger.dumpKernelArgs(multiDispatchInfo.get()); // check if file was created std::string expectedFile = "_arg_0_immediate_size_32_flags_0.bin"; EXPECT_TRUE(fileLogger.wasFileCreated(expectedFile)); // file should be created EXPECT_EQ(fileLogger.createdFilesCount(), 1); } TEST(FileLogger, GivenNullWhenDumpingKernelArgsThenFileIsNotCreated) { std::string testFile = "testfile"; DebugVariables flags; flags.DumpKernelArgs.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); FullyEnabledClFileLogger clFileLogger(fileLogger, flags); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto multiDispatchInfo = std::unique_ptr(new MockMultiDispatchInfo(device.get(), nullptr)); clFileLogger.dumpKernelArgs(multiDispatchInfo.get()); EXPECT_EQ(fileLogger.createdFilesCount(), 0); } TEST(FileLogger, GivenEmptyKernelWhenDumpingKernelArgsThenFileIsNotCreated) { auto kernelInfo = std::make_unique(); kernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockProgram program(toClDeviceVector(*device)); auto kernel = std::unique_ptr(new MockKernel(&program, *kernelInfo, *device)); auto multiDispatchInfo = std::unique_ptr(new MockMultiDispatchInfo(device.get(), kernel.get())); std::string testFile = "testfile"; DebugVariables flags; flags.DumpKernelArgs.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); FullyEnabledClFileLogger clFileLogger(fileLogger, flags); clFileLogger.dumpKernelArgs(multiDispatchInfo.get()); EXPECT_EQ(fileLogger.createdFilesCount(), 0); } TEST(FileLogger, GivenImmediateWhenDumpingKernelArgsThenFileIsCreated) { auto kernelInfo = std::make_unique(); kernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockProgram program(toClDeviceVector(*device)); auto kernel = std::unique_ptr(new MockKernel(&program, *kernelInfo, *device)); auto multiDispatchInfo = std::unique_ptr(new MockMultiDispatchInfo(device.get(), kernel.get())); kernelInfo->addArgImmediate(0, 32, 32); size_t crossThreadDataSize = 64; auto crossThreadData = std::unique_ptr(new uint8_t[crossThreadDataSize]); kernel->setCrossThreadData(crossThreadData.get(), static_cast(crossThreadDataSize)); std::string testFile = "testfile"; DebugVariables flags; flags.DumpKernelArgs.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); FullyEnabledClFileLogger clFileLogger(fileLogger, flags); clFileLogger.dumpKernelArgs(multiDispatchInfo.get()); // check if file was created EXPECT_TRUE(fileLogger.wasFileCreated("_arg_0_immediate_size_32_flags_0.bin")); // no files should be created for local buffer EXPECT_EQ(fileLogger.createdFilesCount(), 1); } TEST(FileLogger, GivenImmediateZeroSizeWhenDumpingKernelArgsThenFileIsNotCreated) { auto kernelInfo = std::make_unique(); kernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockProgram program(toClDeviceVector(*device)); auto kernel = std::unique_ptr(new MockKernel(&program, *kernelInfo, *device)); auto multiDispatchInfo = std::unique_ptr(new MockMultiDispatchInfo(device.get(), kernel.get())); kernelInfo->addArgImmediate(0, 0, 32); size_t crossThreadDataSize = sizeof(64); auto crossThreadData = std::unique_ptr(new uint8_t[crossThreadDataSize]); kernel->setCrossThreadData(crossThreadData.get(), static_cast(crossThreadDataSize)); std::string testFile = "testfile"; DebugVariables flags; flags.DumpKernelArgs.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); FullyEnabledClFileLogger clFileLogger(fileLogger, flags); clFileLogger.dumpKernelArgs(multiDispatchInfo.get()); // no files should be created for zero size EXPECT_EQ(fileLogger.createdFilesCount(), 0); } TEST(FileLogger, GivenLocalBufferWhenDumpingKernelArgsThenFileIsNotCreated) { auto kernelInfo = std::make_unique(); kernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockProgram program(toClDeviceVector(*device)); auto kernel = std::unique_ptr(new MockKernel(&program, *kernelInfo, *device)); auto multiDispatchInfo = std::unique_ptr(new MockMultiDispatchInfo(device.get(), kernel.get())); kernelInfo->addArgBuffer(0); kernelInfo->setAddressQualifier(0, KernelArgMetadata::AddrLocal); std::string testFile = "testfile"; DebugVariables flags; flags.DumpKernelArgs.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); FullyEnabledClFileLogger clFileLogger(fileLogger, flags); clFileLogger.dumpKernelArgs(multiDispatchInfo.get()); // no files should be created for local buffer EXPECT_EQ(fileLogger.createdFilesCount(), 0); } TEST(FileLogger, GivenBufferNotSetWhenDumpingKernelArgsThenFileIsNotCreated) { auto kernelInfo = std::make_unique(); kernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto context = clUniquePtr(new MockContext(device.get())); auto program = clUniquePtr(new MockProgram(context.get(), false, toClDeviceVector(*device))); auto kernel = std::make_unique(program.get(), *kernelInfo, *device); auto multiDispatchInfo = std::unique_ptr(new MockMultiDispatchInfo(device.get(), kernel.get())); kernelInfo->addArgBuffer(0); kernelInfo->addExtendedMetadata(0, "", "uint8 *buffer"); kernel->initialize(); size_t crossThreadDataSize = sizeof(void *); auto crossThreadData = std::unique_ptr(new uint8_t[crossThreadDataSize]); kernel->setCrossThreadData(crossThreadData.get(), static_cast(crossThreadDataSize)); std::string testFile = "testfile"; DebugVariables flags; flags.DumpKernelArgs.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); FullyEnabledClFileLogger clFileLogger(fileLogger, flags); clFileLogger.dumpKernelArgs(multiDispatchInfo.get()); // no files should be created for local buffer EXPECT_EQ(fileLogger.createdFilesCount(), 0); } TEST(FileLogger, GivenBufferWhenDumpingKernelArgsThenFileIsCreated) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto context = clUniquePtr(new MockContext(device.get())); auto buffer = BufferHelper<>::create(context.get()); cl_mem clObj = buffer; auto kernelInfo = std::make_unique(); kernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; auto program = clUniquePtr(new MockProgram(context.get(), false, toClDeviceVector(*device))); auto kernel = std::make_unique(program.get(), *kernelInfo, *device); auto multiDispatchInfo = std::unique_ptr(new MockMultiDispatchInfo(device.get(), kernel.get())); kernelInfo->addArgBuffer(0); kernelInfo->addExtendedMetadata(0, "", "uint8 *buffer"); kernel->initialize(); size_t crossThreadDataSize = sizeof(void *); auto crossThreadData = std::unique_ptr(new uint8_t[crossThreadDataSize]); kernel->setCrossThreadData(crossThreadData.get(), static_cast(crossThreadDataSize)); kernel->setArg(0, clObj); std::string testFile = "testfile"; DebugVariables flags; flags.DumpKernelArgs.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); FullyEnabledClFileLogger clFileLogger(fileLogger, flags); clFileLogger.dumpKernelArgs(multiDispatchInfo.get()); buffer->release(); // check if file was created EXPECT_TRUE(fileLogger.wasFileCreated("_arg_0_buffer_size_16_flags_1.bin")); // no files should be created for local buffer EXPECT_EQ(fileLogger.createdFilesCount(), 1); } TEST(FileLogger, GivenSamplerWhenDumpingKernelArgsThenFileIsNotCreated) { auto kernelInfo = std::make_unique(); kernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto context = clUniquePtr(new MockContext(device.get())); auto program = clUniquePtr(new MockProgram(context.get(), false, toClDeviceVector(*device))); auto kernel = std::make_unique(program.get(), *kernelInfo, *device); auto multiDispatchInfo = std::unique_ptr(new MockMultiDispatchInfo(device.get(), kernel.get())); kernelInfo->addArgSampler(0); kernelInfo->addExtendedMetadata(0, "", "sampler test"); kernel->initialize(); std::string testFile = "testfile"; DebugVariables flags; flags.DumpKernelArgs.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); FullyEnabledClFileLogger clFileLogger(fileLogger, flags); clFileLogger.dumpKernelArgs(multiDispatchInfo.get()); // no files should be created for sampler arg EXPECT_EQ(fileLogger.createdFilesCount(), 0); } TEST(FileLogger, GivenImageNotSetWhenDumpingKernelArgsThenFileIsNotCreated) { auto kernelInfo = std::make_unique(); kernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto context = clUniquePtr(new MockContext(device.get())); auto program = clUniquePtr(new MockProgram(context.get(), false, toClDeviceVector(*device))); auto kernel = std::make_unique(program.get(), *kernelInfo, *device); auto multiDispatchInfo = std::unique_ptr(new MockMultiDispatchInfo(device.get(), kernel.get())); char surfaceStateHeap[0x80]; kernelInfo->heapInfo.pSsh = surfaceStateHeap; kernelInfo->heapInfo.SurfaceStateHeapSize = sizeof(surfaceStateHeap); kernelInfo->addArgImage(0); kernelInfo->argAsImg(0).metadataPayload.imgWidth = 0x4; kernelInfo->addExtendedMetadata(0, "", "image2d buffer"); kernel->initialize(); size_t crossThreadDataSize = sizeof(void *); auto crossThreadData = std::unique_ptr(new uint8_t[crossThreadDataSize]); kernel->setCrossThreadData(crossThreadData.get(), static_cast(crossThreadDataSize)); std::string testFile = "testfile"; DebugVariables flags; flags.DumpKernelArgs.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); FullyEnabledClFileLogger clFileLogger(fileLogger, flags); clFileLogger.dumpKernelArgs(multiDispatchInfo.get()); // no files should be created for local buffer EXPECT_EQ(fileLogger.createdFilesCount(), 0); } TEST(FileLogger, GivenNullsWhenDumpingKernelArgsThenFileIsNotCreated) { std::string testFile = "testfile"; DebugVariables flags; flags.DumpKernels.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); size_t length = 1; unsigned char binary[4]; const unsigned char *ptrBinary = binary; fileLogger.dumpBinaryProgram(1, nullptr, nullptr); fileLogger.dumpBinaryProgram(1, nullptr, &ptrBinary); fileLogger.dumpBinaryProgram(1, &length, nullptr); length = 0; fileLogger.dumpBinaryProgram(1, &length, &ptrBinary); fileLogger.dumpBinaryProgram(1, &length, nullptr); EXPECT_EQ(fileLogger.createdFilesCount(), 0); } TEST(FileLogger, GivenDisabledDebugFunctionalityWhenLoggingThenDumpingDoesNotOccur) { std::string path = "."; std::vector files = Directory::getFiles(path); size_t initialNumberOfFiles = files.size(); std::string testFile = "testfile"; DebugVariables flags; flags.DumpKernels.set(true); flags.LogApiCalls.set(true); flags.DumpKernelArgs.set(true); FullyDisabledFileLogger fileLogger(testFile, flags); FullyDisabledClFileLogger clFileLogger(fileLogger, flags); // Should not be enabled without debug functionality EXPECT_FALSE(fileLogger.enabled()); // Log file not created bool logFileCreated = fileExists(fileLogger.getLogFileName()); EXPECT_FALSE(logFileCreated); // test kernel dumping bool kernelDumpCreated = false; std::string kernelDumpFile = "testDumpKernel"; fileLogger.dumpKernel(kernelDumpFile, "kernel source here"); kernelDumpCreated = fileExists(kernelDumpFile.append(".txt")); EXPECT_FALSE(kernelDumpCreated); // test api logging fileLogger.logApiCall(__FUNCTION__, true, 0); logFileCreated = fileExists(fileLogger.getLogFileName()); EXPECT_FALSE(logFileCreated); // getInput returns 0 size_t input = 5; size_t output = fileLogger.getInput(&input, 0); EXPECT_EQ(0u, output); // getEvents returns 0-size string std::string event = clFileLogger.getEvents(&input, 0); EXPECT_EQ(0u, event.size()); // getSizes returns 0-size string std::string lwsSizes = fileLogger.getSizes(&input, 0, true); std::string gwsSizes = fileLogger.getSizes(&input, 0, false); EXPECT_EQ(0u, lwsSizes.size()); EXPECT_EQ(0u, gwsSizes.size()); // no programDump file std::string programDumpFile = "programBinary.bin"; size_t length = 4; unsigned char binary[4]; const unsigned char *ptrBinary = binary; fileLogger.dumpBinaryProgram(1, &length, &ptrBinary); EXPECT_FALSE(fileLogger.wasFileCreated(programDumpFile)); auto kernelInfo = std::make_unique(); kernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto context = clUniquePtr(new MockContext(device.get())); auto program = clUniquePtr(new MockProgram(context.get(), false, toClDeviceVector(*device))); auto kernel = std::make_unique(program.get(), *kernelInfo, *device); auto multiDispatchInfo = std::unique_ptr(new MockMultiDispatchInfo(device.get(), kernel.get())); kernelInfo->addArgBuffer(0); kernelInfo->addExtendedMetadata(0, "", "uint8 *buffer"); kernel->initialize(); size_t crossThreadDataSize = sizeof(void *); auto crossThreadData = std::unique_ptr(new uint8_t[crossThreadDataSize]); kernel->setCrossThreadData(crossThreadData.get(), static_cast(crossThreadDataSize)); kernel->setArg(0, nullptr); clFileLogger.dumpKernelArgs(multiDispatchInfo.get()); // test api input logging fileLogger.logInputs("Arg name", "value"); fileLogger.logInputs("int", 5); logFileCreated = fileExists(fileLogger.getLogFileName()); EXPECT_FALSE(logFileCreated); // check Log fileLogger.log(true, "string to be logged"); logFileCreated = fileExists(fileLogger.getLogFileName()); EXPECT_FALSE(logFileCreated); files = Directory::getFiles(path); size_t finalNumberOfFiles = files.size(); EXPECT_EQ(initialNumberOfFiles, finalNumberOfFiles); } TEST(LoggerApiEnterWrapper, GivenDebugFunctionalityEnabledWhenApiWrapperIsCreatedThenEntryIsLogged) { const char *name = "function"; int error = 0; { auto debugApiWrapper = std::make_unique>(name, nullptr); EXPECT_TRUE(debugApiWrapper->loggedEnter); } { auto debugApiWrapper2 = std::make_unique>(name, &error); EXPECT_TRUE(debugApiWrapper2->loggedEnter); } } TEST(LoggerApiEnterWrapper, GivenDebugFunctionalityDisabledWhenApiWrapperIsCreatedThenEntryIsNotLogged) { const char *name = "function"; int error = 0; { auto debugApiWrapper = std::make_unique>(name, nullptr); EXPECT_FALSE(debugApiWrapper->loggedEnter); } { auto debugApiWrapper2 = std::make_unique>(name, &error); EXPECT_FALSE(debugApiWrapper2->loggedEnter); } } TEST(FileLogger, WhenConvertingInfoPointerToStringThenCorrectStringIsReturned) { std::string testFile = "testfile"; DebugVariables flags; FullyEnabledFileLogger fileLogger(testFile, flags); uint64_t value64bit = 64; std::string string64bit = fileLogger.infoPointerToString(&value64bit, sizeof(uint64_t)); uint32_t value32bit = 32; std::string string32bit = fileLogger.infoPointerToString(&value32bit, sizeof(uint32_t)); uint8_t value8bit = 0; std::string string8bit = fileLogger.infoPointerToString(&value8bit, sizeof(uint8_t)); EXPECT_STREQ("64", string64bit.c_str()); EXPECT_STREQ("32", string32bit.c_str()); EXPECT_STREQ("0", string8bit.c_str()); std::string stringNonValue = fileLogger.infoPointerToString(nullptr, 56); EXPECT_STREQ("", stringNonValue.c_str()); char valueChar = 0; stringNonValue = fileLogger.infoPointerToString(&valueChar, 56); EXPECT_STREQ("", stringNonValue.c_str()); } TEST(FileLogger, givenDisabledDebugFunctionalityWhenLogLazyEvaluateArgsIsCalledThenCallToLambdaIsDropped) { std::string testFile = "testfile"; DebugVariables flags; FullyDisabledFileLogger fileLogger(testFile, flags); bool wasCalled = false; fileLogger.logLazyEvaluateArgs(true, [&] { wasCalled = true; }); EXPECT_FALSE(wasCalled); } TEST(FileLogger, givenDisabledPredicateWhenLogLazyEvaluateArgsIsCalledThenCallToLambdaIsDropped) { std::string testFile = "testfile"; DebugVariables flags; FullyEnabledFileLogger fileLogger(testFile, flags); bool wasCalled = false; fileLogger.logLazyEvaluateArgs(false, [&] { wasCalled = true; }); EXPECT_FALSE(wasCalled); } TEST(FileLogger, givenEnabledPredicateWhenLogLazyEvaluateArgsIsCalledThenCallToLambdaIsExecuted) { std::string testFile = "testfile"; DebugVariables flags; FullyEnabledFileLogger fileLogger(testFile, flags); bool wasCalled = false; fileLogger.logLazyEvaluateArgs(true, [&] { wasCalled = true; }); EXPECT_TRUE(wasCalled); } struct DummyEvaluator { DummyEvaluator(bool &wasCalled) { wasCalled = true; } operator const char *() { return ""; } }; TEST(FileLogger, givenDisabledPredicateWhenDbgLogLazyEvaluateArgsIsCalledThenInputParametersAreNotEvaluated) { std::string testFile = "testfile"; DebugVariables flags; flags.LogApiCalls.set(false); FullyEnabledFileLogger fileLogger(testFile, flags); bool wasCalled = false; DBG_LOG_LAZY_EVALUATE_ARGS(fileLogger, false, log, true, DummyEvaluator(wasCalled)); EXPECT_FALSE(wasCalled); } TEST(FileLogger, givenEnabledPredicateWhenDbgLogLazyEvaluateArgsIsCalledThenInputParametersAreEvaluated) { std::string testFile = "testfile"; DebugVariables flags; flags.LogApiCalls.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); bool wasCalled = false; DBG_LOG_LAZY_EVALUATE_ARGS(fileLogger, true, log, true, DummyEvaluator(wasCalled)); EXPECT_TRUE(wasCalled); } TEST(FileLogger, whenDisabledThenDebugFunctionalityIsNotAvailableAtCompileTime) { std::string testFile = "testfile"; DebugVariables flags; FileLogger fileLogger(testFile, flags); static_assert(false == fileLogger.enabled(), ""); } TEST(FileLogger, whenFullyEnabledThenAllDebugFunctionalityIsAvailableAtCompileTime) { std::string testFile = "testfile"; DebugVariables flags; FileLogger fileLogger(testFile, flags); static_assert(true == fileLogger.enabled(), ""); } struct AllocationTypeTestCase { AllocationType type; const char *str; }; AllocationTypeTestCase allocationTypeValues[] = { {AllocationType::BUFFER, "BUFFER"}, {AllocationType::BUFFER_HOST_MEMORY, "BUFFER_HOST_MEMORY"}, {AllocationType::COMMAND_BUFFER, "COMMAND_BUFFER"}, {AllocationType::CONSTANT_SURFACE, "CONSTANT_SURFACE"}, {AllocationType::EXTERNAL_HOST_PTR, "EXTERNAL_HOST_PTR"}, {AllocationType::FILL_PATTERN, "FILL_PATTERN"}, {AllocationType::GLOBAL_SURFACE, "GLOBAL_SURFACE"}, {AllocationType::IMAGE, "IMAGE"}, {AllocationType::INDIRECT_OBJECT_HEAP, "INDIRECT_OBJECT_HEAP"}, {AllocationType::INSTRUCTION_HEAP, "INSTRUCTION_HEAP"}, {AllocationType::INTERNAL_HEAP, "INTERNAL_HEAP"}, {AllocationType::INTERNAL_HOST_MEMORY, "INTERNAL_HOST_MEMORY"}, {AllocationType::KERNEL_ISA, "KERNEL_ISA"}, {AllocationType::KERNEL_ISA_INTERNAL, "KERNEL_ISA_INTERNAL"}, {AllocationType::LINEAR_STREAM, "LINEAR_STREAM"}, {AllocationType::MAP_ALLOCATION, "MAP_ALLOCATION"}, {AllocationType::MCS, "MCS"}, {AllocationType::PIPE, "PIPE"}, {AllocationType::PREEMPTION, "PREEMPTION"}, {AllocationType::PRINTF_SURFACE, "PRINTF_SURFACE"}, {AllocationType::PRIVATE_SURFACE, "PRIVATE_SURFACE"}, {AllocationType::PROFILING_TAG_BUFFER, "PROFILING_TAG_BUFFER"}, {AllocationType::SCRATCH_SURFACE, "SCRATCH_SURFACE"}, {AllocationType::WORK_PARTITION_SURFACE, "WORK_PARTITION_SURFACE"}, {AllocationType::SHARED_BUFFER, "SHARED_BUFFER"}, {AllocationType::SHARED_CONTEXT_IMAGE, "SHARED_CONTEXT_IMAGE"}, {AllocationType::SHARED_IMAGE, "SHARED_IMAGE"}, {AllocationType::SHARED_RESOURCE_COPY, "SHARED_RESOURCE_COPY"}, {AllocationType::SURFACE_STATE_HEAP, "SURFACE_STATE_HEAP"}, {AllocationType::SVM_CPU, "SVM_CPU"}, {AllocationType::SVM_GPU, "SVM_GPU"}, {AllocationType::SVM_ZERO_COPY, "SVM_ZERO_COPY"}, {AllocationType::TAG_BUFFER, "TAG_BUFFER"}, {AllocationType::GLOBAL_FENCE, "GLOBAL_FENCE"}, {AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, "TIMESTAMP_PACKET_TAG_BUFFER"}, {AllocationType::UNKNOWN, "UNKNOWN"}, {AllocationType::WRITE_COMBINED, "WRITE_COMBINED"}, {AllocationType::DEBUG_CONTEXT_SAVE_AREA, "DEBUG_CONTEXT_SAVE_AREA"}, {AllocationType::DEBUG_SBA_TRACKING_BUFFER, "DEBUG_SBA_TRACKING_BUFFER"}, {AllocationType::DEBUG_MODULE_AREA, "DEBUG_MODULE_AREA"}, {AllocationType::SW_TAG_BUFFER, "SW_TAG_BUFFER"}}; class AllocationTypeLogging : public ::testing::TestWithParam {}; TEST_P(AllocationTypeLogging, givenGraphicsAllocationTypeWhenConvertingToStringThenCorrectStringIsReturned) { std::string testFile = "testfile"; DebugVariables flags; FullyEnabledFileLogger fileLogger(testFile, flags); auto input = GetParam(); GraphicsAllocation graphicsAllocation(0, input.type, nullptr, 0ull, 0ull, 0, MemoryPool::MemoryNull); auto result = getAllocationTypeString(&graphicsAllocation); EXPECT_STREQ(result, input.str); } INSTANTIATE_TEST_CASE_P(AllAllocationTypes, AllocationTypeLogging, ::testing::ValuesIn(allocationTypeValues)); TEST(AllocationTypeLoggingSingle, givenGraphicsAllocationTypeWhenConvertingToStringIllegalValueThenILLEGAL_VALUEIsReturned) { std::string testFile = "testfile"; DebugVariables flags; FullyEnabledFileLogger fileLogger(testFile, flags); GraphicsAllocation graphicsAllocation(0, static_cast(999), nullptr, 0ull, 0ull, 0, MemoryPool::MemoryNull); auto result = getAllocationTypeString(&graphicsAllocation); EXPECT_STREQ(result, "ILLEGAL_VALUE"); } TEST(AllocationTypeLoggingSingle, givenAllocationTypeWhenConvertingToStringThenSupportAll) { std::string testFile = "testfile"; DebugVariables flags; FullyEnabledFileLogger fileLogger(testFile, flags); GraphicsAllocation graphicsAllocation(0, AllocationType::UNKNOWN, nullptr, 0ull, 0ull, 0, MemoryPool::MemoryNull); for (uint32_t i = 0; i < static_cast(AllocationType::COUNT); i++) { graphicsAllocation.setAllocationType(static_cast(i)); auto result = getAllocationTypeString(&graphicsAllocation); EXPECT_STRNE(result, "ILLEGAL_VALUE"); } } TEST(AllocationTypeLoggingSingle, givenDebugVariableToCaptureAllocationTypeWhenFunctionIsCalledThenProperAllocationTypeIsPrinted) { std::string testFile = "testfile"; DebugVariables flags; flags.LogAllocationType.set(1); FullyEnabledFileLogger fileLogger(testFile, flags); GraphicsAllocation graphicsAllocation(0, AllocationType::COMMAND_BUFFER, nullptr, 0ull, 0ull, 0, MemoryPool::MemoryNull); testing::internal::CaptureStdout(); fileLogger.logAllocation(&graphicsAllocation); std::string output = testing::internal::GetCapturedStdout(); std::string expectedOutput = "Created Graphics Allocation of type COMMAND_BUFFER\n"; EXPECT_STREQ(output.c_str(), expectedOutput.c_str()); } TEST(AllocationTypeLoggingSingle, givenLogAllocationTypeWhenLoggingAllocationThenTypeIsLoggedToFile) { std::string testFile = "testfile"; DebugVariables flags; flags.LogAllocationType.set(1); FullyEnabledFileLogger fileLogger(testFile, flags); GraphicsAllocation graphicsAllocation(0, AllocationType::COMMAND_BUFFER, nullptr, 0ull, 0ull, 0, MemoryPool::MemoryNull); // Log file not created bool logFileCreated = fileExists(fileLogger.getLogFileName()); EXPECT_FALSE(logFileCreated); testing::internal::CaptureStdout(); fileLogger.logAllocation(&graphicsAllocation); std::string output = testing::internal::GetCapturedStdout(); std::string expectedOutput = "Created Graphics Allocation of type COMMAND_BUFFER\n"; EXPECT_STREQ(output.c_str(), expectedOutput.c_str()); if (fileLogger.wasFileCreated(fileLogger.getLogFileName())) { auto str = fileLogger.getFileString(fileLogger.getLogFileName()); EXPECT_TRUE(str.find("AllocationType: ") != std::string::npos); } else { EXPECT_FALSE(true); } } compute-runtime-22.14.22890/opencl/test/unit_test/utilities/file_logger_tests.h000066400000000000000000000043011422164147700275110ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/file_io.h" #include "shared/source/helpers/string_helpers.h" #include "shared/source/utilities/directory.h" #include "shared/source/utilities/logger.h" #include "opencl/source/utilities/cl_logger.h" #include template class TestFileLogger : public NEO::FileLogger { public: using NEO::FileLogger::FileLogger; ~TestFileLogger() { std::remove(NEO::FileLogger::logFileName.c_str()); } void useRealFiles(bool value) { mockFileSystem = !value; } void writeToFile(std::string filename, const char *str, size_t length, std::ios_base::openmode mode) override { savedFiles[filename] << std::string(str, str + length); if (mockFileSystem == false) { NEO::FileLogger::writeToFile(filename, str, length, mode); } }; int32_t createdFilesCount() { return static_cast(savedFiles.size()); } bool wasFileCreated(std::string filename) { return savedFiles.find(filename) != savedFiles.end(); } std::string getFileString(std::string filename) { return savedFiles[filename].str(); } protected: bool mockFileSystem = true; std::map savedFiles; }; using FullyEnabledFileLogger = TestFileLogger; using FullyDisabledFileLogger = TestFileLogger; using FullyEnabledClFileLogger = NEO::ClFileLogger; using FullyDisabledClFileLogger = NEO::ClFileLogger; template class TestLoggerApiEnterWrapper : public NEO::LoggerApiEnterWrapper { public: TestLoggerApiEnterWrapper(const char *functionName, int *errCode) : NEO::LoggerApiEnterWrapper(functionName, errCode), loggedEnter(false) { if (DebugFunctionality) { loggedEnter = true; } } bool loggedEnter; }; compute-runtime-22.14.22890/opencl/test/unit_test/utilities/tag_allocator_tests.cpp000066400000000000000000000554721422164147700304200ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/utilities/tag_allocator.h" #include "shared/test/common/fixtures/memory_allocator_fixture.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/engine_descriptor_helper.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/test.h" #include "gtest/gtest.h" #include using namespace NEO; struct TagAllocatorTest : public Test { class MockTimestampPackets32 : public TimestampPackets { public: void setTagToReadyState() { initialize(); uint32_t zeros[4] = {}; for (uint32_t i = 0; i < TimestampPacketSizeControl::preferredPacketCount; i++) { assignDataToAllTimestamps(i, zeros); } } void setToNonReadyState() { packets[0].contextEnd = 1; } }; void SetUp() override { DebugManager.flags.CreateMultipleSubDevices.set(4); MemoryAllocatorFixture::SetUp(); } const DeviceBitfield deviceBitfield{0xf}; DebugManagerStateRestore restorer; }; struct TimeStamps { void initialize() { start = 1; end = 2; } static constexpr AllocationType getAllocationType() { return AllocationType::PROFILING_TAG_BUFFER; } static constexpr TagNodeType getTagNodeType() { return TagNodeType::HwTimeStamps; } uint64_t getContextStartValue(uint32_t packetIndex) const { return start; } uint64_t getGlobalStartValue(uint32_t packetIndex) const { return start; } uint64_t getContextEndValue(uint32_t packetIndex) const { return end; } uint64_t getGlobalEndValue(uint32_t packetIndex) const { return end; } void const *getContextEndAddress(uint32_t packetIndex) const { return &end; } uint64_t start; uint64_t end; uint64_t ContextCompleteTS; uint64_t GlobalEndTS; }; template class MockTagAllocator : public TagAllocator { using BaseClass = TagAllocator; using TagNodeT = TagNode; public: using BaseClass::deferredTags; using BaseClass::doNotReleaseNodes; using BaseClass::freeTags; using BaseClass::gfxAllocations; using BaseClass::populateFreeTags; using BaseClass::releaseDeferredTags; using BaseClass::returnTagToDeferredPool; using BaseClass::rootDeviceIndices; using BaseClass::TagAllocator; using BaseClass::usedTags; using BaseClass::TagAllocatorBase::cleanUpResources; MockTagAllocator(uint32_t rootDeviceIndex, MemoryManager *memoryManager, size_t tagCount, size_t tagAlignment, size_t tagSize, bool doNotReleaseNodes, DeviceBitfield deviceBitfield) : BaseClass(std::vector{rootDeviceIndex}, memoryManager, tagCount, tagAlignment, tagSize, doNotReleaseNodes, deviceBitfield) { } MockTagAllocator(MemoryManager *memMngr, size_t tagCount, size_t tagAlignment, bool disableCompletionCheck, DeviceBitfield deviceBitfield) : MockTagAllocator(0, memMngr, tagCount, tagAlignment, sizeof(TagType), disableCompletionCheck, deviceBitfield) { } MockTagAllocator(MemoryManager *memMngr, size_t tagCount, size_t tagAlignment, DeviceBitfield deviceBitfield) : MockTagAllocator(memMngr, tagCount, tagAlignment, false, deviceBitfield) { } GraphicsAllocation *getGraphicsAllocation(size_t id = 0) { return this->gfxAllocations[id]->getDefaultGraphicsAllocation(); } TagNodeT *getFreeTagsHead() { return this->freeTags.peekHead(); } TagNodeT *getUsedTagsHead() { return this->usedTags.peekHead(); } size_t getGraphicsAllocationsCount() { return this->gfxAllocations.size(); } size_t getTagPoolCount() { return this->tagPoolMemory.size(); } }; TEST_F(TagAllocatorTest, givenTagNodeTypeWhenCopyingOrMovingThenDisallow) { EXPECT_FALSE(std::is_move_constructible>::value); EXPECT_FALSE(std::is_copy_constructible>::value); } TEST_F(TagAllocatorTest, WhenTagAllocatorIsCreatedThenItIsCorrectlyInitialized) { MockTagAllocator tagAllocator(memoryManager, 100, 64, deviceBitfield); ASSERT_NE(nullptr, tagAllocator.getGraphicsAllocation()); ASSERT_NE(nullptr, tagAllocator.getFreeTagsHead()); EXPECT_EQ(nullptr, tagAllocator.getUsedTagsHead()); void *gfxMemory = tagAllocator.getGraphicsAllocation()->getUnderlyingBuffer(); void *head = reinterpret_cast(tagAllocator.getFreeTagsHead()->tagForCpuAccess); EXPECT_EQ(gfxMemory, head); } TEST_F(TagAllocatorTest, WhenGettingAndReturningTagThenFreeAndUsedListsAreUpdated) { MockTagAllocator tagAllocator(memoryManager, 10, 16, deviceBitfield); ASSERT_NE(nullptr, tagAllocator.getGraphicsAllocation()); ASSERT_NE(nullptr, tagAllocator.getFreeTagsHead()); EXPECT_EQ(nullptr, tagAllocator.getUsedTagsHead()); auto tagNode = static_cast *>(tagAllocator.getTag()); EXPECT_NE(nullptr, tagNode); IDList> &freeList = tagAllocator.freeTags; IDList> &usedList = tagAllocator.usedTags; bool isFoundOnUsedList = usedList.peekContains(*tagNode); bool isFoundOnFreeList = freeList.peekContains(*tagNode); EXPECT_FALSE(isFoundOnFreeList); EXPECT_TRUE(isFoundOnUsedList); tagAllocator.returnTag(tagNode); isFoundOnUsedList = usedList.peekContains(*tagNode); isFoundOnFreeList = freeList.peekContains(*tagNode); EXPECT_TRUE(isFoundOnFreeList); EXPECT_FALSE(isFoundOnUsedList); } TEST_F(TagAllocatorTest, WhenTagAllocatorIsCreatedThenItPopulatesTagsWithProperDeviceBitfield) { size_t alignment = 64; EXPECT_NE(deviceBitfield, memoryManager->recentlyPassedDeviceBitfield); MockTagAllocator tagAllocator(memoryManager, 10, alignment, deviceBitfield); EXPECT_EQ(deviceBitfield, memoryManager->recentlyPassedDeviceBitfield); } TEST_F(TagAllocatorTest, WhenTagIsAllocatedThenItIsAligned) { size_t alignment = 64; MockTagAllocator tagAllocator(memoryManager, 10, alignment, deviceBitfield); ASSERT_NE(nullptr, tagAllocator.getFreeTagsHead()); TagNode *tagNode = static_cast *>(tagAllocator.getTag()); ASSERT_NE(nullptr, tagNode); EXPECT_EQ(0u, (uintptr_t)tagNode->tagForCpuAccess % alignment); tagAllocator.returnTag(tagNode); } TEST_F(TagAllocatorTest, givenTagAllocatorWhenAllNodesWereUsedThenCreateNewGraphicsAllocation) { // Big alignment to force only 4 tags size_t alignment = 1024; MockTagAllocator tagAllocator(memoryManager, 4, alignment, deviceBitfield); ASSERT_NE(nullptr, tagAllocator.getFreeTagsHead()); TagNode *tagNodes[4]; for (size_t i = 0; i < 4; i++) { tagNodes[i] = static_cast *>(tagAllocator.getTag()); EXPECT_NE(nullptr, tagNodes[i]); } EXPECT_EQ(1u, tagAllocator.getGraphicsAllocationsCount()); EXPECT_EQ(1u, tagAllocator.getTagPoolCount()); TagNode *tagNode = static_cast *>(tagAllocator.getTag()); EXPECT_NE(nullptr, tagNode); EXPECT_EQ(2u, tagAllocator.getGraphicsAllocationsCount()); EXPECT_EQ(2u, tagAllocator.getTagPoolCount()); } TEST_F(TagAllocatorTest, givenInputTagCountWhenCreatingAllocatorThenRequestedNumberOfNodesIsCreated) { class MyMockMemoryManager : public MockMemoryManager { public: using MockMemoryManager::MockMemoryManager; GraphicsAllocation *allocateGraphicsMemoryWithAlignment(const AllocationData &allocationData) override { return new MemoryAllocation(0, TimestampPackets::getAllocationType(), nullptr, nullptr, 0, MemoryConstants::pageSize, 1, MemoryPool::System4KBPages, false, false, MemoryManager::maxOsContextCount); } }; auto mockMemoryManager = std::make_unique(true, true, *executionEnvironment); const size_t tagsCount = 3; MockTagAllocator> tagAllocator(mockMemoryManager.get(), tagsCount, 1, deviceBitfield); size_t nodesFound = 0; auto head = tagAllocator.freeTags.peekHead(); while (head) { nodesFound++; head = head->next; } EXPECT_EQ(tagsCount, nodesFound); } TEST_F(TagAllocatorTest, GivenSpecificOrderWhenReturningTagsThenFreeListIsUpdatedCorrectly) { // Big alignment to force only 4 tags size_t alignment = 1024; MockTagAllocator tagAllocator(memoryManager, 4, alignment, deviceBitfield); ASSERT_NE(nullptr, tagAllocator.getFreeTagsHead()); TagNode *tagNodes[4]; for (int i = 0; i < 4; i++) { tagNodes[i] = static_cast *>(tagAllocator.getTag()); EXPECT_NE(nullptr, tagNodes[i]); } EXPECT_EQ(1u, tagAllocator.getGraphicsAllocationsCount()); EXPECT_EQ(1u, tagAllocator.getTagPoolCount()); TagNode *tagNode2 = static_cast *>(tagAllocator.getTag()); EXPECT_NE(nullptr, tagNode2); EXPECT_EQ(2u, tagAllocator.getGraphicsAllocationsCount()); EXPECT_EQ(2u, tagAllocator.getTagPoolCount()); IDList> &freeList = tagAllocator.freeTags; bool isFoundOnFreeList = freeList.peekContains(*tagNodes[0]); EXPECT_FALSE(isFoundOnFreeList); tagAllocator.returnTag(tagNodes[2]); isFoundOnFreeList = freeList.peekContains(*tagNodes[2]); EXPECT_TRUE(isFoundOnFreeList); EXPECT_NE(nullptr, tagAllocator.getFreeTagsHead()); tagAllocator.returnTag(tagNodes[3]); isFoundOnFreeList = freeList.peekContains(*tagNodes[3]); EXPECT_TRUE(isFoundOnFreeList); tagAllocator.returnTag(tagNodes[1]); isFoundOnFreeList = freeList.peekContains(*tagNodes[1]); EXPECT_TRUE(isFoundOnFreeList); isFoundOnFreeList = freeList.peekContains(*tagNodes[0]); EXPECT_FALSE(isFoundOnFreeList); tagAllocator.returnTag(tagNodes[0]); } TEST_F(TagAllocatorTest, WhenGettingTagsFromTwoPoolsThenTagsAreDifferent) { // Big alignment to force only 1 tag size_t alignment = 4096; MockTagAllocator tagAllocator(memoryManager, 1, alignment, deviceBitfield); ASSERT_NE(nullptr, tagAllocator.getFreeTagsHead()); TagNode *tagNode1, *tagNode2; tagNode1 = static_cast *>(tagAllocator.getTag()); ASSERT_NE(nullptr, tagNode1); tagNode2 = static_cast *>(tagAllocator.getTag()); ASSERT_NE(nullptr, tagNode2); EXPECT_EQ(2u, tagAllocator.getGraphicsAllocationsCount()); EXPECT_EQ(2u, tagAllocator.getTagPoolCount()); EXPECT_NE(tagNode1->getBaseGraphicsAllocation(), tagNode2->getBaseGraphicsAllocation()); tagAllocator.returnTag(tagNode1); tagAllocator.returnTag(tagNode2); } TEST_F(TagAllocatorTest, WhenCleaningUpResourcesThenAllResourcesAreReleased) { // Big alignment to force only 1 tag size_t alignment = 4096; MockTagAllocator tagAllocator(memoryManager, 1, alignment, deviceBitfield); TagNode *tagNode1, *tagNode2; // Allocate first Pool tagNode1 = static_cast *>(tagAllocator.getTag()); EXPECT_NE(nullptr, tagNode1); // Allocate second Pool tagNode2 = static_cast *>(tagAllocator.getTag()); ASSERT_NE(nullptr, tagNode2); // Two pools should have different gfxAllocations EXPECT_NE(tagNode1->getBaseGraphicsAllocation(), tagNode2->getBaseGraphicsAllocation()); // Return tags tagAllocator.returnTag(tagNode1); tagAllocator.returnTag(tagNode2); // Should cleanup all resources tagAllocator.cleanUpResources(); EXPECT_EQ(0u, tagAllocator.getGraphicsAllocationsCount()); } TEST_F(TagAllocatorTest, whenNewTagIsTakenThenItIsInitialized) { MockTagAllocator tagAllocator(memoryManager, 1, 2, deviceBitfield); tagAllocator.getFreeTagsHead()->tagForCpuAccess->start = 3; tagAllocator.getFreeTagsHead()->tagForCpuAccess->end = 4; tagAllocator.getFreeTagsHead()->setProfilingCapable(false); auto node = static_cast *>(tagAllocator.getTag()); EXPECT_EQ(1u, node->tagForCpuAccess->start); EXPECT_EQ(2u, node->tagForCpuAccess->end); EXPECT_TRUE(node->isProfilingCapable()); } TEST_F(TagAllocatorTest, givenMultipleReferencesOnTagWhenReleasingThenReturnWhenAllRefCountsAreReleased) { MockTagAllocator tagAllocator(memoryManager, 2, 1, deviceBitfield); auto tag = tagAllocator.getTag(); EXPECT_NE(nullptr, tagAllocator.getUsedTagsHead()); tagAllocator.returnTag(tag); EXPECT_EQ(nullptr, tagAllocator.getUsedTagsHead()); // only 1 reference tag = tagAllocator.getTag(); tag->incRefCount(); EXPECT_NE(nullptr, tagAllocator.getUsedTagsHead()); tagAllocator.returnTag(tag); EXPECT_NE(nullptr, tagAllocator.getUsedTagsHead()); // 1 reference left tagAllocator.returnTag(tag); EXPECT_EQ(nullptr, tagAllocator.getUsedTagsHead()); } TEST_F(TagAllocatorTest, givenNotReadyTagWhenReturnedThenMoveToFreeList) { MockTagAllocator tagAllocator(memoryManager, 1, 1, deviceBitfield); auto node = static_cast *>(tagAllocator.getTag()); node->tagForCpuAccess->setToNonReadyState(); EXPECT_TRUE(tagAllocator.deferredTags.peekIsEmpty()); tagAllocator.returnTag(node); EXPECT_TRUE(tagAllocator.deferredTags.peekIsEmpty()); EXPECT_FALSE(tagAllocator.freeTags.peekIsEmpty()); } TEST_F(TagAllocatorTest, givenTagNodeWhenCompletionCheckIsDisabledThenStatusIsMarkedAsNotReady) { MockTagAllocator tagAllocator(memoryManager, 1, 1, deviceBitfield); EXPECT_FALSE(tagAllocator.doNotReleaseNodes); auto node = tagAllocator.getTag(); EXPECT_TRUE(node->canBeReleased()); node->setDoNotReleaseNodes(true); EXPECT_FALSE(node->canBeReleased()); tagAllocator.returnTag(node); EXPECT_FALSE(tagAllocator.deferredTags.peekIsEmpty()); EXPECT_TRUE(tagAllocator.freeTags.peekIsEmpty()); tagAllocator.releaseDeferredTags(); EXPECT_FALSE(tagAllocator.deferredTags.peekIsEmpty()); EXPECT_TRUE(tagAllocator.freeTags.peekIsEmpty()); } TEST_F(TagAllocatorTest, givenTagAllocatorWhenDisabledCompletionCheckThenNodeInheritsItsState) { MockTagAllocator tagAllocator(memoryManager, 1, 1, true, deviceBitfield); EXPECT_TRUE(tagAllocator.doNotReleaseNodes); auto node = tagAllocator.getTag(); EXPECT_FALSE(node->canBeReleased()); node->setDoNotReleaseNodes(false); EXPECT_TRUE(node->canBeReleased()); tagAllocator.returnTag(node); EXPECT_TRUE(tagAllocator.deferredTags.peekIsEmpty()); EXPECT_FALSE(tagAllocator.freeTags.peekIsEmpty()); } TEST_F(TagAllocatorTest, givenReadyTagWhenReturnedThenMoveToFreeList) { MockTagAllocator tagAllocator(memoryManager, 1, 1, deviceBitfield); auto node = static_cast *>(tagAllocator.getTag()); node->tagForCpuAccess->setTagToReadyState(); EXPECT_TRUE(tagAllocator.deferredTags.peekIsEmpty()); tagAllocator.returnTag(node); EXPECT_TRUE(tagAllocator.deferredTags.peekIsEmpty()); EXPECT_FALSE(tagAllocator.freeTags.peekIsEmpty()); } TEST_F(TagAllocatorTest, givenEmptyFreeListWhenAskingForNewTagThenTryToReleaseDeferredListFirst) { MockTagAllocator tagAllocator(memoryManager, 1, 1, deviceBitfield); auto node = static_cast *>(tagAllocator.getTag()); tagAllocator.returnTagToDeferredPool(node); EXPECT_TRUE(tagAllocator.freeTags.peekIsEmpty()); node = static_cast *>(tagAllocator.getTag()); EXPECT_NE(nullptr, node); EXPECT_TRUE(tagAllocator.freeTags.peekIsEmpty()); // empty again - new pool wasnt allocated } TEST_F(TagAllocatorTest, givenTagAllocatorWhenGraphicsAllocationIsCreatedThenSetValidllocationType) { MockTagAllocator> timestampPacketAllocator(mockRootDeviceIndex, memoryManager, 1, 1, sizeof(TimestampPackets), false, mockDeviceBitfield); MockTagAllocator hwTimeStampsAllocator(mockRootDeviceIndex, memoryManager, 1, 1, sizeof(HwTimeStamps), false, mockDeviceBitfield); MockTagAllocator hwPerfCounterAllocator(mockRootDeviceIndex, memoryManager, 1, 1, sizeof(HwPerfCounter), false, mockDeviceBitfield); auto timestampPacketTag = timestampPacketAllocator.getTag(); auto hwTimeStampsTag = hwTimeStampsAllocator.getTag(); auto hwPerfCounterTag = hwPerfCounterAllocator.getTag(); EXPECT_EQ(AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, timestampPacketTag->getBaseGraphicsAllocation()->getAllocationType()); EXPECT_EQ(AllocationType::PROFILING_TAG_BUFFER, hwTimeStampsTag->getBaseGraphicsAllocation()->getAllocationType()); EXPECT_EQ(AllocationType::PROFILING_TAG_BUFFER, hwPerfCounterTag->getBaseGraphicsAllocation()->getAllocationType()); } TEST_F(TagAllocatorTest, givenMultipleRootDevicesWhenPopulatingTagsThenCreateMultiGraphicsAllocation) { constexpr uint32_t maxRootDeviceIndex = 4; auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(maxRootDeviceIndex + 1); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(defaultHwInfo.get()); } auto testMemoryManager = new MockMemoryManager(false, false, *executionEnvironment); executionEnvironment->memoryManager.reset(testMemoryManager); const std::set indices = {0, 2, maxRootDeviceIndex}; const std::vector indicesVector = {indices.begin(), indices.end()}; MockTagAllocator> timestampPacketAllocator(indicesVector, testMemoryManager, 1, 1, sizeof(TimestampPackets), false, mockDeviceBitfield); EXPECT_EQ(1u, timestampPacketAllocator.getGraphicsAllocationsCount()); auto multiGraphicsAllocation = timestampPacketAllocator.gfxAllocations[0].get(); for (uint32_t i = 0; i <= maxRootDeviceIndex; i++) { if (indices.find(i) != indices.end()) { EXPECT_NE(nullptr, multiGraphicsAllocation->getGraphicsAllocation(i)); } else { EXPECT_EQ(nullptr, multiGraphicsAllocation->getGraphicsAllocation(i)); } } } HWTEST_F(TagAllocatorTest, givenMultipleRootDevicesWhenCallingMakeResidentThenUseCorrectRootDeviceIndex) { constexpr uint32_t maxRootDeviceIndex = 1; auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(maxRootDeviceIndex + 1); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(defaultHwInfo.get()); } auto testMemoryManager = new MockMemoryManager(false, false, *executionEnvironment); executionEnvironment->memoryManager.reset(testMemoryManager); const std::vector indicesVector = {0, 1}; MockTagAllocator> timestampPacketAllocator(indicesVector, testMemoryManager, 1, 1, sizeof(TimestampPackets), false, mockDeviceBitfield); EXPECT_EQ(1u, timestampPacketAllocator.getGraphicsAllocationsCount()); auto multiGraphicsAllocation = timestampPacketAllocator.gfxAllocations[0].get(); auto rootCsr0 = std::unique_ptr>(static_cast *>(createCommandStream(*executionEnvironment, 0, 1))); auto osContext0 = testMemoryManager->createAndRegisterOsContext(rootCsr0.get(), EngineDescriptorHelper::getDefaultDescriptor({aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular}, true)); rootCsr0->setupContext(*osContext0); auto rootCsr1 = std::unique_ptr>(static_cast *>(createCommandStream(*executionEnvironment, 1, 1))); auto osContext1 = testMemoryManager->createAndRegisterOsContext(rootCsr1.get(), EngineDescriptorHelper::getDefaultDescriptor({aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular}, true)); rootCsr1->setupContext(*osContext1); rootCsr0->storeMakeResidentAllocations = true; rootCsr1->storeMakeResidentAllocations = true; rootCsr0->makeResident(*multiGraphicsAllocation); EXPECT_TRUE(rootCsr0->isMadeResident(multiGraphicsAllocation->getGraphicsAllocation(0))); EXPECT_FALSE(rootCsr0->isMadeResident(multiGraphicsAllocation->getGraphicsAllocation(1))); rootCsr1->makeResident(*multiGraphicsAllocation); EXPECT_FALSE(rootCsr1->isMadeResident(multiGraphicsAllocation->getGraphicsAllocation(0))); EXPECT_TRUE(rootCsr1->isMadeResident(multiGraphicsAllocation->getGraphicsAllocation(1))); } TEST_F(TagAllocatorTest, givenNotSupportedTagTypeWhenCallingMethodThenAbortOrReturnInitialValue) { { TagNode perfCounterNode = {}; EXPECT_ANY_THROW(perfCounterNode.getGlobalStartOffset()); EXPECT_ANY_THROW(perfCounterNode.getContextStartOffset()); EXPECT_ANY_THROW(perfCounterNode.getContextEndOffset()); EXPECT_ANY_THROW(perfCounterNode.getGlobalEndOffset()); EXPECT_ANY_THROW(perfCounterNode.getContextStartValue(0)); EXPECT_ANY_THROW(perfCounterNode.getGlobalStartValue(0)); EXPECT_ANY_THROW(perfCounterNode.getContextEndValue(0)); EXPECT_ANY_THROW(perfCounterNode.getGlobalEndValue(0)); EXPECT_ANY_THROW(perfCounterNode.getContextEndAddress(0)); EXPECT_ANY_THROW(perfCounterNode.getContextCompleteRef()); EXPECT_ANY_THROW(perfCounterNode.getGlobalEndRef()); EXPECT_ANY_THROW(perfCounterNode.getSinglePacketSize()); EXPECT_ANY_THROW(perfCounterNode.assignDataToAllTimestamps(0, nullptr)); } { TagNode hwTimestampNode = {}; EXPECT_ANY_THROW(hwTimestampNode.getGlobalStartOffset()); EXPECT_ANY_THROW(hwTimestampNode.getContextStartOffset()); EXPECT_ANY_THROW(hwTimestampNode.getContextEndOffset()); EXPECT_ANY_THROW(hwTimestampNode.getGlobalEndOffset()); EXPECT_ANY_THROW(hwTimestampNode.getSinglePacketSize()); EXPECT_ANY_THROW(hwTimestampNode.assignDataToAllTimestamps(0, nullptr)); EXPECT_ANY_THROW(hwTimestampNode.getQueryHandleRef()); } { TagNode> timestampPacketsNode = {}; EXPECT_ANY_THROW(timestampPacketsNode.getContextCompleteRef()); EXPECT_ANY_THROW(timestampPacketsNode.getGlobalEndRef()); EXPECT_ANY_THROW(timestampPacketsNode.getQueryHandleRef()); } } compute-runtime-22.14.22890/opencl/test/unit_test/windows/000077500000000000000000000000001422164147700233215ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/windows/CMakeLists.txt000066400000000000000000000060671422164147700260720ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) project(igdrcl_windows_dll_tests) set(NEO_IGDRCL_WINDOWS_DLL_TESTS_TARGET_OBJECTS $ $ $ $ $ $ ) add_executable(igdrcl_windows_dll_tests ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}get_devices_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_interface_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/wddm_create_tests.cpp ${NEO_IGDRCL_WINDOWS_DLL_TESTS_TARGET_OBJECTS} ${NEO_SHARED_DIRECTORY}/dll/direct_submission_controller_enabled.cpp ${NEO_SHARED_DIRECTORY}/dll/get_devices.cpp ${NEO_SHARED_DIRECTORY}/dll/windows/os_interface.cpp ${NEO_SHARED_DIRECTORY}/dll/create_command_stream.cpp ${NEO_SHARED_DIRECTORY}/os_interface/windows/wddm/wddm_create.cpp ${NEO_SHARED_TEST_DIRECTORY}/common/mocks/mock_wddm.cpp ${NEO_SHARED_TEST_DIRECTORY}/common/os_interface/windows/create_wddm_memory_manager.cpp ${NEO_SHARED_TEST_DIRECTORY}/common/os_interface/windows/mock_environment_variables.cpp ${NEO_SHARED_TEST_DIRECTORY}/common/os_interface/windows/options.cpp ${NEO_SHARED_TEST_DIRECTORY}/common/os_interface/windows/sys_calls.cpp ${NEO_SHARED_TEST_DIRECTORY}/common/os_interface/windows/wddm_calls.cpp ${NEO_SHARED_TEST_DIRECTORY}/common/test_macros/test_checks_shared.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/test_macros/test_checks_ocl.cpp ) target_link_libraries(igdrcl_windows_dll_tests ${NEO_MOCKABLE_LIB_NAME} ${NEO_SHARED_MOCKABLE_LIB_NAME} igdrcl_mocks gmock-gtest ${NEO_EXTRA_LIBS}) target_include_directories(igdrcl_windows_dll_tests PRIVATE ${NEO_SHARED_TEST_DIRECTORY}/common/test_configuration/unit_tests ${NEO_SHARED_TEST_DIRECTORY}/common/test_macros/header${BRANCH_DIR_SUFFIX} ${NEO_SOURCE_DIR}/opencl/test/unit_test/gen_common${BRANCH_DIR_SUFFIX} ) create_project_source_tree(igdrcl_windows_dll_tests) add_custom_target(run_windows_dll_tests ALL DEPENDS unit_tests igdrcl_windows_dll_tests) add_custom_command( TARGET run_windows_dll_tests POST_BUILD COMMAND WORKING_DIRECTORY ${TargetDir} COMMAND echo Target Directory is: ${TargetDir} COMMAND echo Running Windows dll tests COMMAND igdrcl_windows_dll_tests ) add_dependencies(run_unit_tests run_windows_dll_tests) set_target_properties(igdrcl_windows_dll_tests PROPERTIES FOLDER ${OPENCL_TEST_PROJECTS_FOLDER}) set_target_properties(run_windows_dll_tests PROPERTIES FOLDER ${OPENCL_TEST_PROJECTS_FOLDER}) endif() compute-runtime-22.14.22890/opencl/test/unit_test/windows/get_devices_tests.cpp000066400000000000000000000053721422164147700275370ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/constants.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/os_interface/device_factory.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/common/test_macros/test_checks_shared.h" namespace NEO { bool prepareDeviceEnvironments(ExecutionEnvironment &executionEnvironment); using PrepareDeviceEnvironmentsTests = ::testing::Test; HWTEST_F(PrepareDeviceEnvironmentsTests, WhenPrepareDeviceEnvironmentsIsCalledThenSuccessIsReturned) { ExecutionEnvironment executionEnvironment; auto returnValue = DeviceFactory::prepareDeviceEnvironments(executionEnvironment); EXPECT_EQ(true, returnValue); } HWTEST_F(PrepareDeviceEnvironmentsTests, whenPrepareDeviceEnvironmentsIsCalledThenGmmIsBeingInitializedAfterFillingHwInfo) { ExecutionEnvironment executionEnvironment; executionEnvironment.prepareRootDeviceEnvironments(1u); auto hwInfo = executionEnvironment.rootDeviceEnvironments[0]->getMutableHardwareInfo(); hwInfo->platform.eProductFamily = PRODUCT_FAMILY::IGFX_UNKNOWN; hwInfo->platform.ePCHProductFamily = PCH_PRODUCT_FAMILY::PCH_UNKNOWN; EXPECT_EQ(nullptr, executionEnvironment.rootDeviceEnvironments[0]->getGmmHelper()); auto returnValue = DeviceFactory::prepareDeviceEnvironments(executionEnvironment); EXPECT_TRUE(returnValue); EXPECT_NE(nullptr, executionEnvironment.rootDeviceEnvironments[0]->getGmmHelper()); } HWTEST_F(PrepareDeviceEnvironmentsTests, givenRcsAndCcsNotSupportedWhenInitializingThenReturnFalse) { REQUIRE_64BIT_OR_SKIP(); NEO::ExecutionEnvironment executionEnviornment; HardwareInfo hwInfo = *defaultHwInfo; auto hwInfoConfig = HwInfoConfig::get(hwInfo.platform.eProductFamily); hwInfoConfig->configureHardwareCustom(&hwInfo, nullptr); bool expectedValue = false; if (hwInfo.featureTable.flags.ftrRcsNode || hwInfo.featureTable.flags.ftrCCSNode) { expectedValue = true; } EXPECT_EQ(expectedValue, NEO::prepareDeviceEnvironments(executionEnviornment)); } HWTEST_F(PrepareDeviceEnvironmentsTests, Given32bitApplicationWhenDebugKeyIsSetThenSupportIsReported) { NEO::ExecutionEnvironment executionEnviornment; DebugManagerStateRestore restorer; DebugManager.flags.Force32BitDriverSupport.set(true); EXPECT_TRUE(NEO::prepareDeviceEnvironments(executionEnviornment)); } } // namespace NEOcompute-runtime-22.14.22890/opencl/test/unit_test/windows/os_interface_tests.cpp000066400000000000000000000036731422164147700277210ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/constants.h" #include "shared/source/os_interface/os_interface.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/mocks/mock_wddm.h" #include "shared/test/common/test_macros/test.h" using namespace NEO; TEST(osInterfaceTests, GivenDefaultOsInterfaceThenLocalMemoryEnabled) { EXPECT_TRUE(OSInterface::osEnableLocalMemory); } TEST(osInterfaceTests, whenOsInterfaceSetupGmmInputArgsThenArgsAreSet) { MockExecutionEnvironment executionEnvironment; RootDeviceEnvironment rootDeviceEnvironment(executionEnvironment); auto wddm = new WddmMock(rootDeviceEnvironment); EXPECT_EQ(nullptr, rootDeviceEnvironment.osInterface.get()); wddm->init(); EXPECT_NE(nullptr, rootDeviceEnvironment.osInterface.get()); wddm->deviceRegistryPath = "registryPath"; auto expectedRegistryPath = wddm->deviceRegistryPath.c_str(); auto &adapterBDF = wddm->adapterBDF; uint32_t bus = 0x12; adapterBDF.Bus = bus; uint32_t device = 0x34; adapterBDF.Device = device; uint32_t function = 0x56; adapterBDF.Function = function; auto adapterBDFretrieved = wddm->getAdapterBDF(); EXPECT_EQ(bus, adapterBDFretrieved.Bus); EXPECT_EQ(device, adapterBDFretrieved.Device); EXPECT_EQ(function, adapterBDFretrieved.Function); GMM_INIT_IN_ARGS gmmInputArgs = {}; EXPECT_NE(0, memcmp(&wddm->getAdapterBDF(), &gmmInputArgs.stAdapterBDF, sizeof(ADAPTER_BDF))); EXPECT_STRNE(expectedRegistryPath, gmmInputArgs.DeviceRegistryPath); rootDeviceEnvironment.osInterface->getDriverModel()->setGmmInputArgs(&gmmInputArgs); EXPECT_EQ(0, memcmp(&wddm->getAdapterBDF(), &gmmInputArgs.stAdapterBDF, sizeof(ADAPTER_BDF))); EXPECT_EQ(GMM_CLIENT::GMM_OCL_VISTA, gmmInputArgs.ClientType); EXPECT_STREQ(expectedRegistryPath, gmmInputArgs.DeviceRegistryPath); } compute-runtime-22.14.22890/opencl/test/unit_test/windows/wddm_create_tests.cpp000066400000000000000000000017571422164147700275370ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/os_interface/windows/wddm/wddm.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/test_macros/test.h" #include using namespace NEO; TEST(wddmCreateTests, givenInputVersionWhenCreatingThenCreateRequestedObject) { MockExecutionEnvironment executionEnvironment; RootDeviceEnvironment rootDeviceEnvironment(executionEnvironment); auto hwDeviceIds = OSInterface::discoverDevices(executionEnvironment); std::unique_ptr wddm(Wddm::createWddm(std::unique_ptr(hwDeviceIds[0].release()->as()), rootDeviceEnvironment)); EXPECT_NE(nullptr, wddm); } compute-runtime-22.14.22890/opencl/test/unit_test/xe_hp_core/000077500000000000000000000000001422164147700237425ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/xe_hp_core/CMakeLists.txt000066400000000000000000000015671422164147700265130ustar00rootroot00000000000000# # Copyright (C) 2021-2022 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_XE_HP_CORE) set(IGDRCL_SRCS_tests_xe_hp_core_excludes ${CMAKE_CURRENT_SOURCE_DIR}/excludes_ocl_xe_hp_core.cpp ) set_property(GLOBAL APPEND PROPERTY IGDRCL_SRCS_tests_excludes ${IGDRCL_SRCS_tests_xe_hp_core_excludes}) set(IGDRCL_SRCS_tests_xe_hp_core ${IGDRCL_SRCS_tests_xe_hp_core_excludes} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/copy_engine_tests_xe_hp_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_xe_hp_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hw_helper_tests_xe_hp_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_platform_caps_xe_hp_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_sample_xe_hp_core.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_xe_hp_core}) add_subdirectories() endif() compute-runtime-22.14.22890/opencl/test/unit_test/xe_hp_core/copy_engine_tests_xe_hp_core.cpp000066400000000000000000001177151422164147700323760ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/client_context/gmm_client_context.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_gmm.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_context.h" using namespace NEO; struct BlitXE_HP_CORETests : public ::testing::Test { void SetUp() override { if (is32bit) { GTEST_SKIP(); } DebugManager.flags.RenderCompressedBuffersEnabled.set(true); DebugManager.flags.EnableLocalMemory.set(true); HardwareInfo hwInfo = *defaultHwInfo; hwInfo.featureTable.ftrBcsInfo = 1; hwInfo.capabilityTable.blitterOperationsSupported = true; clDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); } uint32_t flushBcsTask(CommandStreamReceiver *csr, const BlitProperties &blitProperties, bool blocking, Device &device) { BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties); return csr->flushBcsTask(blitPropertiesContainer, blocking, false, device); } std::unique_ptr clDevice; TimestampPacketContainer timestampPacketContainer; CsrDependencies csrDependencies; DebugManagerStateRestore debugRestorer; }; XE_HP_CORE_TEST_F(BlitXE_HP_CORETests, givenCompressedBufferWhenProgrammingBltCommandThenSetCompressionFields) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; auto csr = static_cast *>(clDevice->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular).commandStreamReceiver); MockContext context(clDevice.get()); cl_int retVal = CL_SUCCESS; auto bufferCompressed = clUniquePtr(Buffer::create(&context, CL_MEM_READ_WRITE, 2048, nullptr, retVal)); bufferCompressed->getGraphicsAllocation(clDevice->getRootDeviceIndex())->getDefaultGmm()->isCompressionEnabled = true; auto bufferNotCompressed = clUniquePtr(Buffer::create(&context, CL_MEM_READ_WRITE, 2048, nullptr, retVal)); bufferNotCompressed->getGraphicsAllocation(clDevice->getRootDeviceIndex())->getDefaultGmm()->isCompressionEnabled = false; auto gmmHelper = clDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->getGmmHelper(); uint32_t compressionFormat = gmmHelper->getClientContext()->getSurfaceStateCompressionFormat(GMM_RESOURCE_FORMAT::GMM_FORMAT_GENERIC_8BIT); { auto blitProperties = BlitProperties::constructPropertiesForCopy(bufferNotCompressed->getGraphicsAllocation(clDevice->getRootDeviceIndex()), bufferCompressed->getGraphicsAllocation(clDevice->getRootDeviceIndex()), 0, 0, {2048, 1, 1}, 0, 0, 0, 0, csr->getClearColorAllocation()); flushBcsTask(csr, blitProperties, true, clDevice->getDevice()); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream); auto bltCmd = genCmdCast(*(hwParser.cmdList.begin())); EXPECT_NE(nullptr, bltCmd); EXPECT_EQ(bltCmd->getDestinationCompressionEnable(), XY_COPY_BLT::COMPRESSION_ENABLE::COMPRESSION_ENABLE_COMPRESSION_DISABLE); EXPECT_EQ(bltCmd->getDestinationAuxiliarysurfacemode(), XY_COPY_BLT::AUXILIARY_SURFACE_MODE_AUX_NONE); EXPECT_EQ(bltCmd->getDestinationCompressionFormat(), 0u); EXPECT_EQ(bltCmd->getSourceCompressionEnable(), XY_COPY_BLT::COMPRESSION_ENABLE::COMPRESSION_ENABLE_COMPRESSION_ENABLE); EXPECT_EQ(bltCmd->getSourceAuxiliarysurfacemode(), XY_COPY_BLT::AUXILIARY_SURFACE_MODE_AUX_CCS_E); EXPECT_EQ(bltCmd->getSourceCompressionFormat(), compressionFormat); } { auto offset = csr->commandStream.getUsed(); auto blitProperties = BlitProperties::constructPropertiesForCopy(bufferCompressed->getGraphicsAllocation(clDevice->getRootDeviceIndex()), bufferNotCompressed->getGraphicsAllocation(clDevice->getRootDeviceIndex()), 0, 0, {2048, 1, 1}, 0, 0, 0, 0, csr->getClearColorAllocation()); flushBcsTask(csr, blitProperties, true, clDevice->getDevice()); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream, offset); auto bltCmd = genCmdCast(*(hwParser.cmdList.begin())); EXPECT_NE(nullptr, bltCmd); EXPECT_EQ(bltCmd->getDestinationCompressionEnable(), XY_COPY_BLT::COMPRESSION_ENABLE::COMPRESSION_ENABLE_COMPRESSION_ENABLE); EXPECT_EQ(bltCmd->getDestinationAuxiliarysurfacemode(), XY_COPY_BLT::AUXILIARY_SURFACE_MODE_AUX_CCS_E); EXPECT_EQ(bltCmd->getDestinationCompressionFormat(), compressionFormat); EXPECT_EQ(bltCmd->getSourceCompressionEnable(), XY_COPY_BLT::COMPRESSION_ENABLE::COMPRESSION_ENABLE_COMPRESSION_DISABLE); EXPECT_EQ(bltCmd->getSourceAuxiliarysurfacemode(), XY_COPY_BLT::AUXILIARY_SURFACE_MODE_AUX_NONE); EXPECT_EQ(bltCmd->getSourceCompressionFormat(), 0u); } } XE_HP_CORE_TEST_F(BlitXE_HP_CORETests, givenDebugFlagSetWhenCompressionEnabledThenForceCompressionFormat) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; uint32_t compressionFormat = 3; DebugManager.flags.ForceBufferCompressionFormat.set(compressionFormat); auto csr = static_cast *>(clDevice->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular).commandStreamReceiver); MockContext context(clDevice.get()); cl_int retVal = CL_SUCCESS; auto bufferCompressed = clUniquePtr(Buffer::create(&context, CL_MEM_READ_WRITE, 2048, nullptr, retVal)); bufferCompressed->getGraphicsAllocation(clDevice->getRootDeviceIndex())->getDefaultGmm()->isCompressionEnabled = true; auto bufferNotCompressed = clUniquePtr(Buffer::create(&context, CL_MEM_READ_WRITE, 2048, nullptr, retVal)); bufferNotCompressed->getGraphicsAllocation(clDevice->getRootDeviceIndex())->getDefaultGmm()->isCompressionEnabled = false; { auto blitProperties = BlitProperties::constructPropertiesForCopy(bufferNotCompressed->getGraphicsAllocation(clDevice->getRootDeviceIndex()), bufferCompressed->getGraphicsAllocation(clDevice->getRootDeviceIndex()), 0, 0, {2048, 1, 1}, 0, 0, 0, 0, csr->getClearColorAllocation()); flushBcsTask(csr, blitProperties, true, clDevice->getDevice()); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream); auto bltCmd = genCmdCast(*(hwParser.cmdList.begin())); EXPECT_NE(nullptr, bltCmd); EXPECT_EQ(bltCmd->getDestinationCompressionEnable(), XY_COPY_BLT::COMPRESSION_ENABLE::COMPRESSION_ENABLE_COMPRESSION_DISABLE); EXPECT_EQ(bltCmd->getDestinationAuxiliarysurfacemode(), XY_COPY_BLT::AUXILIARY_SURFACE_MODE_AUX_NONE); EXPECT_EQ(bltCmd->getDestinationCompressionFormat(), 0u); EXPECT_EQ(bltCmd->getSourceCompressionEnable(), XY_COPY_BLT::COMPRESSION_ENABLE::COMPRESSION_ENABLE_COMPRESSION_ENABLE); EXPECT_EQ(bltCmd->getSourceAuxiliarysurfacemode(), XY_COPY_BLT::AUXILIARY_SURFACE_MODE_AUX_CCS_E); EXPECT_EQ(bltCmd->getSourceCompressionFormat(), compressionFormat); } { auto offset = csr->commandStream.getUsed(); auto blitProperties = BlitProperties::constructPropertiesForCopy(bufferCompressed->getGraphicsAllocation(clDevice->getRootDeviceIndex()), bufferNotCompressed->getGraphicsAllocation(clDevice->getRootDeviceIndex()), 0, 0, {2048, 1, 1}, 0, 0, 0, 0, csr->getClearColorAllocation()); flushBcsTask(csr, blitProperties, true, clDevice->getDevice()); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream, offset); auto bltCmd = genCmdCast(*(hwParser.cmdList.begin())); EXPECT_NE(nullptr, bltCmd); EXPECT_EQ(bltCmd->getDestinationCompressionEnable(), XY_COPY_BLT::COMPRESSION_ENABLE::COMPRESSION_ENABLE_COMPRESSION_ENABLE); EXPECT_EQ(bltCmd->getDestinationAuxiliarysurfacemode(), XY_COPY_BLT::AUXILIARY_SURFACE_MODE_AUX_CCS_E); EXPECT_EQ(bltCmd->getDestinationCompressionFormat(), compressionFormat); EXPECT_EQ(bltCmd->getSourceCompressionEnable(), XY_COPY_BLT::COMPRESSION_ENABLE::COMPRESSION_ENABLE_COMPRESSION_DISABLE); EXPECT_EQ(bltCmd->getSourceAuxiliarysurfacemode(), XY_COPY_BLT::AUXILIARY_SURFACE_MODE_AUX_NONE); EXPECT_EQ(bltCmd->getSourceCompressionFormat(), 0u); } } XE_HP_CORE_TEST_F(BlitXE_HP_CORETests, givenBufferWhenProgrammingBltCommandThenSetMocs) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; auto &bcsEngine = clDevice->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular); auto csr = static_cast *>(bcsEngine.commandStreamReceiver); MockContext context(clDevice.get()); cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(&context, CL_MEM_READ_WRITE, 1, nullptr, retVal)); auto blitProperties = BlitProperties::constructPropertiesForCopy(buffer->getGraphicsAllocation(clDevice->getRootDeviceIndex()), buffer->getGraphicsAllocation(clDevice->getRootDeviceIndex()), 0, 0, {1, 1, 1}, 0, 0, 0, 0, csr->getClearColorAllocation()); flushBcsTask(csr, blitProperties, true, clDevice->getDevice()); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream); auto bltCmd = genCmdCast(*(hwParser.cmdList.begin())); EXPECT_NE(nullptr, bltCmd); auto mocs = clDevice->getRootDeviceEnvironment().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED); EXPECT_EQ(mocs, bltCmd->getDestinationMOCS()); EXPECT_EQ(mocs, bltCmd->getSourceMOCS()); } XE_HP_CORE_TEST_F(BlitXE_HP_CORETests, givenBufferWhenProgrammingBltCommandThenSetMocsToValueOfDebugKey) { DebugManagerStateRestore restorer; DebugManager.flags.OverrideBlitterMocs.set(0u); using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; auto &bcsEngine = clDevice->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular); auto csr = static_cast *>(bcsEngine.commandStreamReceiver); MockContext context(clDevice.get()); cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(&context, CL_MEM_READ_WRITE, 1, nullptr, retVal)); auto blitProperties = BlitProperties::constructPropertiesForCopy(buffer->getGraphicsAllocation(clDevice->getRootDeviceIndex()), buffer->getGraphicsAllocation(clDevice->getRootDeviceIndex()), 0, 0, {1, 1, 1}, 0, 0, 0, 0, csr->getClearColorAllocation()); flushBcsTask(csr, blitProperties, true, clDevice->getDevice()); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream); auto bltCmd = genCmdCast(*(hwParser.cmdList.begin())); EXPECT_NE(nullptr, bltCmd); EXPECT_EQ(0u, bltCmd->getDestinationMOCS()); EXPECT_EQ(0u, bltCmd->getSourceMOCS()); } XE_HP_CORE_TEST_F(BlitXE_HP_CORETests, givenCompressedBufferWhenResolveBlitIsCalledThenProgramSpecialOperationMode) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; auto &bcsEngine = clDevice->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular); auto csr = static_cast *>(bcsEngine.commandStreamReceiver); MockContext context(clDevice.get()); cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(&context, CL_MEM_READ_WRITE, 2048, nullptr, retVal)); auto blitProperties = BlitProperties::constructPropertiesForAuxTranslation(AuxTranslationDirection::AuxToNonAux, buffer->getGraphicsAllocation(clDevice->getRootDeviceIndex()), csr->getClearColorAllocation()); flushBcsTask(csr, blitProperties, false, clDevice->getDevice()); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream); auto bltCmd = genCmdCast(*(hwParser.cmdList.begin())); EXPECT_NE(nullptr, bltCmd); EXPECT_EQ(XY_COPY_BLT::SPECIAL_MODE_OF_OPERATION::SPECIAL_MODE_OF_OPERATION_FULL_RESOLVE, bltCmd->getSpecialModeofOperation()); } XE_HP_CORE_TEST_F(BlitXE_HP_CORETests, givenCompressedBufferWhenNonAuxToAuxBlitIsCalledThenDontProgramSourceCompression) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; auto &bcsEngine = clDevice->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular); auto csr = static_cast *>(bcsEngine.commandStreamReceiver); MockContext context(clDevice.get()); cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(&context, CL_MEM_READ_WRITE, 2048, nullptr, retVal)); auto blitProperties = BlitProperties::constructPropertiesForAuxTranslation(AuxTranslationDirection::NonAuxToAux, buffer->getGraphicsAllocation(clDevice->getRootDeviceIndex()), csr->getClearColorAllocation()); flushBcsTask(csr, blitProperties, false, clDevice->getDevice()); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream); auto bltCmd = genCmdCast(*(hwParser.cmdList.begin())); EXPECT_NE(nullptr, bltCmd); EXPECT_EQ(XY_COPY_BLT::COMPRESSION_ENABLE::COMPRESSION_ENABLE_COMPRESSION_DISABLE, bltCmd->getSourceCompressionEnable()); } XE_HP_CORE_TEST_F(BlitXE_HP_CORETests, given2dBlitCommandWhenDispatchingThenSetValidSurfaceType) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; auto &bcsEngine = clDevice->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular); auto csr = static_cast *>(bcsEngine.commandStreamReceiver); MockContext context(clDevice.get()); cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(&context, CL_MEM_READ_WRITE, 1, nullptr, retVal)); auto allocation = buffer->getGraphicsAllocation(clDevice->getRootDeviceIndex()); size_t offset = 0; { // 1D auto blitProperties = BlitProperties::constructPropertiesForCopy(allocation, allocation, 0, 0, {BlitterConstants::maxBlitWidth - 1, 1, 1}, 0, 0, 0, 0, csr->getClearColorAllocation()); flushBcsTask(csr, blitProperties, false, clDevice->getDevice()); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream); auto bltCmd = genCmdCast(*(hwParser.cmdList.begin())); EXPECT_NE(nullptr, bltCmd); EXPECT_EQ(XY_COPY_BLT::SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_1D, bltCmd->getDestinationSurfaceType()); EXPECT_EQ(XY_COPY_BLT::SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_1D, bltCmd->getSourceSurfaceType()); EXPECT_EQ(bltCmd->getSourceSurfaceWidth(), bltCmd->getDestinationX2CoordinateRight()); EXPECT_EQ(bltCmd->getSourceSurfaceHeight(), bltCmd->getDestinationY2CoordinateBottom()); EXPECT_EQ(bltCmd->getDestinationSurfaceWidth(), bltCmd->getDestinationX2CoordinateRight()); EXPECT_EQ(bltCmd->getDestinationSurfaceHeight(), bltCmd->getDestinationY2CoordinateBottom()); offset = csr->commandStream.getUsed(); } { // 2D auto blitProperties = BlitProperties::constructPropertiesForCopy(allocation, allocation, 0, 0, {(2 * BlitterConstants::maxBlitWidth) + 1, 1, 1}, 0, 0, 0, 0, csr->getClearColorAllocation()); flushBcsTask(csr, blitProperties, false, clDevice->getDevice()); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream, offset); auto bltCmd = genCmdCast(*(hwParser.cmdList.begin())); EXPECT_NE(nullptr, bltCmd); EXPECT_EQ(XY_COPY_BLT::SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_2D, bltCmd->getDestinationSurfaceType()); EXPECT_EQ(XY_COPY_BLT::SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_2D, bltCmd->getSourceSurfaceType()); EXPECT_EQ(bltCmd->getSourceSurfaceWidth(), bltCmd->getDestinationX2CoordinateRight()); EXPECT_EQ(bltCmd->getSourceSurfaceHeight(), bltCmd->getDestinationY2CoordinateBottom()); EXPECT_EQ(bltCmd->getDestinationSurfaceWidth(), bltCmd->getDestinationX2CoordinateRight()); EXPECT_EQ(bltCmd->getDestinationSurfaceHeight(), bltCmd->getDestinationY2CoordinateBottom()); } } XE_HP_CORE_TEST_F(BlitXE_HP_CORETests, givenBufferWhenProgrammingBltCommandThenSetTargetMemory) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; auto csr = static_cast *>(clDevice->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular).commandStreamReceiver); MockContext context(clDevice.get()); cl_int retVal = CL_SUCCESS; auto bufferInSystemPool = clUniquePtr(Buffer::create(&context, CL_MEM_FORCE_HOST_MEMORY_INTEL, 2048, nullptr, retVal)); EXPECT_TRUE(MemoryPool::isSystemMemoryPool(bufferInSystemPool->getGraphicsAllocation(clDevice->getRootDeviceIndex())->getMemoryPool())); auto bufferInLocalPool = clUniquePtr(Buffer::create(&context, CL_MEM_READ_WRITE, 2048, nullptr, retVal)); EXPECT_FALSE(MemoryPool::isSystemMemoryPool(bufferInLocalPool->getGraphicsAllocation(clDevice->getRootDeviceIndex())->getMemoryPool())); { auto blitProperties = BlitProperties::constructPropertiesForCopy(bufferInSystemPool->getGraphicsAllocation(clDevice->getRootDeviceIndex()), bufferInLocalPool->getGraphicsAllocation(clDevice->getRootDeviceIndex()), 0, 0, {2048, 1, 1}, 0, 0, 0, 0, csr->getClearColorAllocation()); flushBcsTask(csr, blitProperties, true, clDevice->getDevice()); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream); auto bltCmd = genCmdCast(*(hwParser.cmdList.begin())); EXPECT_NE(nullptr, bltCmd); EXPECT_EQ(bltCmd->getSourceTargetMemory(), XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_LOCAL_MEM); EXPECT_EQ(bltCmd->getDestinationTargetMemory(), XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_SYSTEM_MEM); } { auto offset = csr->commandStream.getUsed(); auto blitProperties = BlitProperties::constructPropertiesForCopy(bufferInLocalPool->getGraphicsAllocation(clDevice->getRootDeviceIndex()), bufferInSystemPool->getGraphicsAllocation(clDevice->getRootDeviceIndex()), 0, 0, {2048, 1, 1}, 0, 0, 0, 0, csr->getClearColorAllocation()); flushBcsTask(csr, blitProperties, true, clDevice->getDevice()); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream, offset); auto bltCmd = genCmdCast(*(hwParser.cmdList.begin())); EXPECT_NE(nullptr, bltCmd); EXPECT_EQ(bltCmd->getDestinationTargetMemory(), XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_LOCAL_MEM); EXPECT_EQ(bltCmd->getSourceTargetMemory(), XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_SYSTEM_MEM); } } XE_HP_CORE_TEST_F(BlitXE_HP_CORETests, givenBufferWhenProgrammingBltCommandThenSetTargetMemoryInCpuAccesingLocalMemoryMode) { DebugManagerStateRestore restorer; DebugManager.flags.ForceLocalMemoryAccessMode.set(1); using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; PLATFORM platform = clDevice->getHardwareInfo().platform; const auto &hwInfoConfig = *HwInfoConfig::get(platform.eProductFamily); const bool isXeHPRev0 = (platform.eProductFamily == IGFX_XE_HP_SDV) && (hwInfoConfig.getSteppingFromHwRevId(clDevice->getHardwareInfo()) < REVISION_B); auto csr = static_cast *>(clDevice->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular).commandStreamReceiver); MockContext context(clDevice.get()); cl_int retVal = CL_SUCCESS; auto bufferInSystemPool = clUniquePtr(Buffer::create(&context, CL_MEM_FORCE_HOST_MEMORY_INTEL, 2048, nullptr, retVal)); EXPECT_TRUE(MemoryPool::isSystemMemoryPool(bufferInSystemPool->getGraphicsAllocation(clDevice->getRootDeviceIndex())->getMemoryPool())); auto bufferInLocalPool = clUniquePtr(Buffer::create(&context, CL_MEM_READ_WRITE, 2048, nullptr, retVal)); EXPECT_FALSE(MemoryPool::isSystemMemoryPool(bufferInLocalPool->getGraphicsAllocation(clDevice->getRootDeviceIndex())->getMemoryPool())); { auto blitProperties = BlitProperties::constructPropertiesForCopy(bufferInSystemPool->getGraphicsAllocation(clDevice->getRootDeviceIndex()), bufferInLocalPool->getGraphicsAllocation(clDevice->getRootDeviceIndex()), 0, 0, {2048, 1, 1}, 0, 0, 0, 0, csr->getClearColorAllocation()); flushBcsTask(csr, blitProperties, true, clDevice->getDevice()); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream); auto bltCmd = genCmdCast(*(hwParser.cmdList.begin())); EXPECT_NE(nullptr, bltCmd); if (isXeHPRev0) { EXPECT_EQ(bltCmd->getSourceTargetMemory(), XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_SYSTEM_MEM); } else { EXPECT_EQ(bltCmd->getSourceTargetMemory(), XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_LOCAL_MEM); } EXPECT_EQ(bltCmd->getDestinationTargetMemory(), XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_SYSTEM_MEM); } { auto offset = csr->commandStream.getUsed(); auto blitProperties = BlitProperties::constructPropertiesForCopy(bufferInLocalPool->getGraphicsAllocation(clDevice->getRootDeviceIndex()), bufferInSystemPool->getGraphicsAllocation(clDevice->getRootDeviceIndex()), 0, 0, {2048, 1, 1}, 0, 0, 0, 0, csr->getClearColorAllocation()); flushBcsTask(csr, blitProperties, true, clDevice->getDevice()); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream, offset); auto bltCmd = genCmdCast(*(hwParser.cmdList.begin())); EXPECT_NE(nullptr, bltCmd); if (isXeHPRev0) { EXPECT_EQ(bltCmd->getDestinationTargetMemory(), XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_SYSTEM_MEM); } else { EXPECT_EQ(bltCmd->getDestinationTargetMemory(), XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_LOCAL_MEM); } EXPECT_EQ(bltCmd->getSourceTargetMemory(), XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_SYSTEM_MEM); } } XE_HP_CORE_TEST_F(BlitXE_HP_CORETests, givenBufferWhenProgrammingBltCommandThenSetTargetMemoryToSystemWhenDebugKeyPresent) { DebugManagerStateRestore restorer; using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; auto csr = static_cast *>(clDevice->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular).commandStreamReceiver); MockContext context(clDevice.get()); cl_int retVal = CL_SUCCESS; auto bufferInSystemPool = clUniquePtr(Buffer::create(&context, CL_MEM_FORCE_HOST_MEMORY_INTEL, 2048, nullptr, retVal)); EXPECT_TRUE(MemoryPool::isSystemMemoryPool(bufferInSystemPool->getGraphicsAllocation(clDevice->getRootDeviceIndex())->getMemoryPool())); auto bufferInLocalPool = clUniquePtr(Buffer::create(&context, CL_MEM_READ_WRITE, 2048, nullptr, retVal)); EXPECT_FALSE(MemoryPool::isSystemMemoryPool(bufferInLocalPool->getGraphicsAllocation(clDevice->getRootDeviceIndex())->getMemoryPool())); DebugManager.flags.OverrideBlitterTargetMemory.set(0u); { auto blitProperties = BlitProperties::constructPropertiesForCopy(bufferInSystemPool->getGraphicsAllocation(clDevice->getRootDeviceIndex()), bufferInLocalPool->getGraphicsAllocation(clDevice->getRootDeviceIndex()), 0, 0, {2048, 1, 1}, 0, 0, 0, 0, csr->getClearColorAllocation()); flushBcsTask(csr, blitProperties, true, clDevice->getDevice()); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream); auto bltCmd = genCmdCast(*(hwParser.cmdList.begin())); EXPECT_NE(nullptr, bltCmd); EXPECT_EQ(bltCmd->getDestinationTargetMemory(), XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_SYSTEM_MEM); EXPECT_EQ(bltCmd->getSourceTargetMemory(), XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_SYSTEM_MEM); } DebugManager.flags.OverrideBlitterTargetMemory.set(1u); { auto offset = csr->commandStream.getUsed(); auto blitProperties = BlitProperties::constructPropertiesForCopy(bufferInLocalPool->getGraphicsAllocation(clDevice->getRootDeviceIndex()), bufferInSystemPool->getGraphicsAllocation(clDevice->getRootDeviceIndex()), 0, 0, {2048, 1, 1}, 0, 0, 0, 0, csr->getClearColorAllocation()); flushBcsTask(csr, blitProperties, true, clDevice->getDevice()); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream, offset); auto bltCmd = genCmdCast(*(hwParser.cmdList.begin())); EXPECT_NE(nullptr, bltCmd); EXPECT_EQ(bltCmd->getDestinationTargetMemory(), XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_LOCAL_MEM); EXPECT_EQ(bltCmd->getSourceTargetMemory(), XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_LOCAL_MEM); } DebugManager.flags.OverrideBlitterTargetMemory.set(2u); { auto offset = csr->commandStream.getUsed(); auto blitProperties = BlitProperties::constructPropertiesForCopy(bufferInLocalPool->getGraphicsAllocation(clDevice->getRootDeviceIndex()), bufferInSystemPool->getGraphicsAllocation(clDevice->getRootDeviceIndex()), 0, 0, {2048, 1, 1}, 0, 0, 0, 0, csr->getClearColorAllocation()); flushBcsTask(csr, blitProperties, true, clDevice->getDevice()); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream, offset); auto bltCmd = genCmdCast(*(hwParser.cmdList.begin())); EXPECT_NE(nullptr, bltCmd); EXPECT_EQ(bltCmd->getDestinationTargetMemory(), XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_LOCAL_MEM); EXPECT_EQ(bltCmd->getSourceTargetMemory(), XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_SYSTEM_MEM); } } XE_HP_CORE_TEST_F(BlitXE_HP_CORETests, givenBufferWhenProgrammingBltCommandAndRevisionB0ThenSetTargetMemory) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; HardwareInfo *hwInfo = clDevice->getRootDeviceEnvironment().getMutableHardwareInfo(); const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo->platform.eProductFamily); hwInfo->platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_B, *hwInfo); auto csr = static_cast *>(clDevice->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular).commandStreamReceiver); MockContext context(clDevice.get()); cl_int retVal = CL_SUCCESS; auto bufferInSystemPool = clUniquePtr(Buffer::create(&context, CL_MEM_FORCE_HOST_MEMORY_INTEL, 2048, nullptr, retVal)); EXPECT_TRUE(MemoryPool::isSystemMemoryPool(bufferInSystemPool->getGraphicsAllocation(clDevice->getRootDeviceIndex())->getMemoryPool())); auto bufferInLocalPool = clUniquePtr(Buffer::create(&context, CL_MEM_READ_WRITE, 2048, nullptr, retVal)); EXPECT_FALSE(MemoryPool::isSystemMemoryPool(bufferInLocalPool->getGraphicsAllocation(clDevice->getRootDeviceIndex())->getMemoryPool())); { auto blitProperties = BlitProperties::constructPropertiesForCopy(bufferInSystemPool->getGraphicsAllocation(clDevice->getRootDeviceIndex()), bufferInLocalPool->getGraphicsAllocation(clDevice->getRootDeviceIndex()), 0, 0, {2048, 1, 1}, 0, 0, 0, 0, csr->getClearColorAllocation()); flushBcsTask(csr, blitProperties, true, clDevice->getDevice()); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream); auto bltCmd = genCmdCast(*(hwParser.cmdList.begin())); EXPECT_NE(nullptr, bltCmd); EXPECT_EQ(bltCmd->getSourceTargetMemory(), XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_LOCAL_MEM); EXPECT_EQ(bltCmd->getDestinationTargetMemory(), XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_SYSTEM_MEM); } { auto offset = csr->commandStream.getUsed(); auto blitProperties = BlitProperties::constructPropertiesForCopy(bufferInLocalPool->getGraphicsAllocation(clDevice->getRootDeviceIndex()), bufferInSystemPool->getGraphicsAllocation(clDevice->getRootDeviceIndex()), 0, 0, {2048, 1, 1}, 0, 0, 0, 0, csr->getClearColorAllocation()); flushBcsTask(csr, blitProperties, true, clDevice->getDevice()); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream, offset); auto bltCmd = genCmdCast(*(hwParser.cmdList.begin())); EXPECT_NE(nullptr, bltCmd); EXPECT_EQ(bltCmd->getDestinationTargetMemory(), XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_LOCAL_MEM); EXPECT_EQ(bltCmd->getSourceTargetMemory(), XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_SYSTEM_MEM); } } XE_HP_CORE_TEST_F(BlitXE_HP_CORETests, givenDebugFlagSetWhenCompressionIsUsedThenForceCompressionEnableFields) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; auto blitCmd = FamilyType::cmdInitXyCopyBlt; blitCmd.setDestinationX2CoordinateRight(1); blitCmd.setDestinationY2CoordinateBottom(1); auto gmm = std::make_unique(clDevice->getGmmClientContext()); gmm->isCompressionEnabled = true; MockGraphicsAllocation mockAllocation(0, AllocationType::INTERNAL_HOST_MEMORY, reinterpret_cast(0x1234), 0x1000, 0, sizeof(uint32_t), MemoryPool::System4KBPages, MemoryManager::maxOsContextCount); mockAllocation.setGmm(gmm.get(), 0); BlitProperties properties = {}; properties.srcAllocation = &mockAllocation; properties.dstAllocation = &mockAllocation; properties.clearColorAllocation = &mockAllocation; { DebugManager.flags.ForceCompressionDisabledForCompressedBlitCopies.set(1); BlitCommandsHelper::appendBlitCommandsForBuffer(properties, blitCmd, *clDevice->getExecutionEnvironment()->rootDeviceEnvironments[clDevice->getRootDeviceIndex()]); EXPECT_EQ(blitCmd.getDestinationCompressionEnable(), XY_COPY_BLT::COMPRESSION_ENABLE::COMPRESSION_ENABLE_COMPRESSION_ENABLE); EXPECT_EQ(blitCmd.getDestinationAuxiliarysurfacemode(), XY_COPY_BLT::AUXILIARY_SURFACE_MODE_AUX_CCS_E); EXPECT_EQ(blitCmd.getSourceCompressionEnable(), XY_COPY_BLT::COMPRESSION_ENABLE::COMPRESSION_ENABLE_COMPRESSION_ENABLE); EXPECT_EQ(blitCmd.getSourceAuxiliarysurfacemode(), XY_COPY_BLT::AUXILIARY_SURFACE_MODE_AUX_CCS_E); } { DebugManager.flags.ForceCompressionDisabledForCompressedBlitCopies.set(0); BlitCommandsHelper::appendBlitCommandsForBuffer(properties, blitCmd, *clDevice->getExecutionEnvironment()->rootDeviceEnvironments[clDevice->getRootDeviceIndex()]); EXPECT_EQ(blitCmd.getDestinationCompressionEnable(), XY_COPY_BLT::COMPRESSION_ENABLE::COMPRESSION_ENABLE_COMPRESSION_DISABLE); EXPECT_EQ(blitCmd.getDestinationAuxiliarysurfacemode(), XY_COPY_BLT::AUXILIARY_SURFACE_MODE_AUX_CCS_E); EXPECT_EQ(blitCmd.getSourceCompressionEnable(), XY_COPY_BLT::COMPRESSION_ENABLE::COMPRESSION_ENABLE_COMPRESSION_DISABLE); EXPECT_EQ(blitCmd.getSourceAuxiliarysurfacemode(), XY_COPY_BLT::AUXILIARY_SURFACE_MODE_AUX_CCS_E); } } XE_HP_CORE_TEST_F(BlitXE_HP_CORETests, givenDebugFlagForClearColorNotSetWhenProgrammingBlitCommandForBuffersThenClearColorAddressIsNotProgrammed) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; DebugManagerStateRestore restore; DebugManager.flags.UseClearColorAllocationForBlitter.set(false); auto blitCmd = FamilyType::cmdInitXyCopyBlt; blitCmd.setDestinationX2CoordinateRight(1); blitCmd.setDestinationY2CoordinateBottom(1); MockGraphicsAllocation mockAllocation; BlitProperties properties = {}; properties.srcAllocation = &mockAllocation; properties.dstAllocation = &mockAllocation; properties.clearColorAllocation = &mockAllocation; BlitCommandsHelper::appendBlitCommandsForBuffer(properties, blitCmd, *clDevice->getExecutionEnvironment()->rootDeviceEnvironments[clDevice->getRootDeviceIndex()]); EXPECT_EQ(blitCmd.getSourceClearValueEnable(), XY_COPY_BLT::CLEAR_VALUE_ENABLE::CLEAR_VALUE_ENABLE_DISABLE); EXPECT_EQ(0u, blitCmd.getSourceClearAddressLow()); EXPECT_EQ(0u, blitCmd.getSourceClearAddressHigh()); EXPECT_EQ(blitCmd.getDestinationClearValueEnable(), XY_COPY_BLT::CLEAR_VALUE_ENABLE::CLEAR_VALUE_ENABLE_DISABLE); EXPECT_EQ(0u, blitCmd.getDestinationClearAddressLow()); EXPECT_EQ(0u, blitCmd.getDestinationClearAddressHigh()); } XE_HP_CORE_TEST_F(BlitXE_HP_CORETests, givenDebugFlagForClearColorSetWhenProgrammingBlitCommandForBuffersThenClearColorAddressIsProgrammed) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; DebugManagerStateRestore restore; DebugManager.flags.UseClearColorAllocationForBlitter.set(true); auto blitCmd = FamilyType::cmdInitXyCopyBlt; blitCmd.setDestinationX2CoordinateRight(1); blitCmd.setDestinationY2CoordinateBottom(1); MockGraphicsAllocation mockAllocation; BlitProperties properties = {}; properties.srcAllocation = &mockAllocation; properties.dstAllocation = &mockAllocation; properties.clearColorAllocation = &mockAllocation; BlitCommandsHelper::appendBlitCommandsForBuffer(properties, blitCmd, *clDevice->getExecutionEnvironment()->rootDeviceEnvironments[clDevice->getRootDeviceIndex()]); auto addressLow = static_cast(mockAllocation.getGpuAddress() & 0xFFFFFFFFULL); auto addressHigh = static_cast(mockAllocation.getGpuAddress() >> 32); EXPECT_EQ(blitCmd.getSourceClearValueEnable(), XY_COPY_BLT::CLEAR_VALUE_ENABLE::CLEAR_VALUE_ENABLE_ENABLE); EXPECT_EQ(addressLow, blitCmd.getSourceClearAddressLow()); EXPECT_EQ(addressHigh, blitCmd.getSourceClearAddressHigh()); EXPECT_EQ(blitCmd.getDestinationClearValueEnable(), XY_COPY_BLT::CLEAR_VALUE_ENABLE::CLEAR_VALUE_ENABLE_ENABLE); EXPECT_EQ(addressLow, blitCmd.getDestinationClearAddressLow()); EXPECT_EQ(addressHigh, blitCmd.getDestinationClearAddressHigh()); } XE_HP_CORE_TEST_F(BlitXE_HP_CORETests, givenDebugFlagForClearColorNotSetWhenProgrammingBlitCommandForImagesThenClearColorAddressIsNotProgrammed) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; DebugManagerStateRestore restore; DebugManager.flags.UseClearColorAllocationForBlitter.set(false); auto blitCmd = FamilyType::cmdInitXyCopyBlt; MockGraphicsAllocation mockAllocation; BlitProperties properties = {}; properties.srcSize = {1, 1, 1}; properties.dstSize = {1, 1, 1}; properties.srcAllocation = &mockAllocation; properties.dstAllocation = &mockAllocation; properties.clearColorAllocation = &mockAllocation; uint32_t srcSlicePitch = 0u; uint32_t dstSlicePitch = 0u; BlitCommandsHelper::appendBlitCommandsForImages(properties, blitCmd, *clDevice->getExecutionEnvironment()->rootDeviceEnvironments[clDevice->getRootDeviceIndex()], srcSlicePitch, dstSlicePitch); EXPECT_EQ(blitCmd.getSourceClearValueEnable(), XY_COPY_BLT::CLEAR_VALUE_ENABLE::CLEAR_VALUE_ENABLE_DISABLE); EXPECT_EQ(0u, blitCmd.getSourceClearAddressLow()); EXPECT_EQ(0u, blitCmd.getSourceClearAddressHigh()); EXPECT_EQ(blitCmd.getDestinationClearValueEnable(), XY_COPY_BLT::CLEAR_VALUE_ENABLE::CLEAR_VALUE_ENABLE_DISABLE); EXPECT_EQ(0u, blitCmd.getDestinationClearAddressLow()); EXPECT_EQ(0u, blitCmd.getDestinationClearAddressHigh()); } XE_HP_CORE_TEST_F(BlitXE_HP_CORETests, givenDebugFlagForClearColorSetWhenProgrammingBlitCommandForImagesThenClearColorAddressIsProgrammed) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; DebugManagerStateRestore restore; DebugManager.flags.UseClearColorAllocationForBlitter.set(true); auto blitCmd = FamilyType::cmdInitXyCopyBlt; MockGraphicsAllocation mockAllocation; BlitProperties properties = {}; properties.srcSize = {1, 1, 1}; properties.dstSize = {1, 1, 1}; properties.srcAllocation = &mockAllocation; properties.dstAllocation = &mockAllocation; properties.clearColorAllocation = &mockAllocation; uint32_t srcSlicePitch = 0; uint32_t dstSlicePitch = 0; BlitCommandsHelper::appendBlitCommandsForImages(properties, blitCmd, *clDevice->getExecutionEnvironment()->rootDeviceEnvironments[clDevice->getRootDeviceIndex()], srcSlicePitch, dstSlicePitch); auto addressLow = static_cast(mockAllocation.getGpuAddress() & 0xFFFFFFFFULL); auto addressHigh = static_cast(mockAllocation.getGpuAddress() >> 32); EXPECT_EQ(blitCmd.getSourceClearValueEnable(), XY_COPY_BLT::CLEAR_VALUE_ENABLE::CLEAR_VALUE_ENABLE_ENABLE); EXPECT_EQ(addressLow, blitCmd.getSourceClearAddressLow()); EXPECT_EQ(addressHigh, blitCmd.getSourceClearAddressHigh()); EXPECT_EQ(blitCmd.getDestinationClearValueEnable(), XY_COPY_BLT::CLEAR_VALUE_ENABLE::CLEAR_VALUE_ENABLE_ENABLE); EXPECT_EQ(addressLow, blitCmd.getDestinationClearAddressLow()); EXPECT_EQ(addressHigh, blitCmd.getDestinationClearAddressHigh()); } XE_HP_CORE_TEST_F(BlitXE_HP_CORETests, givenCommandQueueWhenAskingForCacheFlushOnBcsThenReturnTrue) { auto clDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockContext context(clDevice.get()); cl_int retVal = CL_SUCCESS; auto cmdQ = std::unique_ptr(CommandQueue::create(&context, clDevice.get(), nullptr, false, retVal)); auto pHwQ = static_cast *>(cmdQ.get()); EXPECT_TRUE(pHwQ->isCacheFlushForBcsRequired()); } compute-runtime-22.14.22890/opencl/test/unit_test/xe_hp_core/excludes_ocl_xe_hp_core.cpp000066400000000000000000000032201422164147700313070ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" HWTEST_EXCLUDE_PRODUCT(BufferSetSurfaceTests, givenAlignedCacheableReadOnlyBufferThenChoseOclBufferPolicy, IGFX_XE_HP_CORE); HWTEST_EXCLUDE_PRODUCT(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemoryIsUnalignedToCachelineButReadOnlyThenL3CacheShouldBeStillOn, IGFX_XE_HP_CORE); HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, WhenAllowCompressionIsCalledThenTrueIsReturned, IGFX_XE_HP_CORE); HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHardwareInfoWhenCallingIsMaxThreadsForWorkgroupWARequiredThenFalseIsReturned, IGFX_XE_HP_CORE); HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, whenCallingGetDeviceMemoryNameThenDdrIsReturned, IGFX_XE_HP_CORE); HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfExtraParametersAreInvalidThenFalseIsReturned, IGFX_XE_HP_CORE); HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfTile64With3DSurfaceOnBCSIsSupportedThenTrueIsReturned, IGFX_XE_HP_CORE); HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfBlitterForImagesIsSupportedThenFalseIsReturned, IGFX_XE_HP_CORE); HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfPipeControlPriorToNonPipelinedStateCommandsWARequiredThenFalseIsReturned, IGFX_XE_HP_CORE); HWTEST_EXCLUDE_PRODUCT(HwHelperTest, whenGettingDefaultRevisionIdThenCorrectValueIsReturned, IGFX_XE_HP_CORE); HWTEST_EXCLUDE_PRODUCT(CommandStreamReceiverFlushTaskXeHPAndLaterTests, givenProgramExtendedPipeControlPriorToNonPipelinedStateCommandEnabledAndStateSipWhenItIsRequiredThenThereIsPipeControlPriorToIt, IGFX_XE_HP_CORE); compute-runtime-22.14.22890/opencl/test/unit_test/xe_hp_core/hw_helper_tests_xe_hp_core.cpp000066400000000000000000000420201422164147700320360ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/program/kernel_info.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/hw_helper_tests.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/test/unit_test/mocks/mock_cl_hw_helper.h" using HwHelperTestXE_HP_CORE = HwHelperTest; XE_HP_CORE_TEST_F(HwHelperTestXE_HP_CORE, givenGenHelperWhenKernelArgumentIsNotPureStatefulThenRequireNonAuxMode) { auto &clHwHelper = ClHwHelper::get(renderCoreFamily); for (auto isPureStateful : {false, true}) { ArgDescPointer argAsPtr{}; argAsPtr.accessedUsingStatelessAddressingMode = !isPureStateful; EXPECT_EQ(!argAsPtr.isPureStateful(), clHwHelper.requiresNonAuxMode(argAsPtr, *defaultHwInfo)); } } XE_HP_CORE_TEST_F(HwHelperTestXE_HP_CORE, givenGenHelperWhenEnableStatelessCompressionThenDontRequireNonAuxMode) { DebugManagerStateRestore restore; DebugManager.flags.EnableStatelessCompression.set(1); auto &clHwHelper = ClHwHelper::get(renderCoreFamily); for (auto isPureStateful : {false, true}) { ArgDescPointer argAsPtr{}; argAsPtr.accessedUsingStatelessAddressingMode = !isPureStateful; EXPECT_FALSE(clHwHelper.requiresNonAuxMode(argAsPtr, *defaultHwInfo)); } } XE_HP_CORE_TEST_F(HwHelperTestXE_HP_CORE, givenXE_HP_COREThenAuxTranslationIsRequired) { auto &clHwHelper = ClHwHelper::get(renderCoreFamily); for (auto isPureStateful : {false, true}) { KernelInfo kernelInfo{}; kernelInfo.kernelDescriptor.payloadMappings.explicitArgs.resize(1); kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[0].as(true).accessedUsingStatelessAddressingMode = !isPureStateful; EXPECT_EQ(!isPureStateful, clHwHelper.requiresAuxResolves(kernelInfo, *defaultHwInfo)); } } XE_HP_CORE_TEST_F(HwHelperTestXE_HP_CORE, givenXE_HP_COREWhenEnableStatelessCompressionThenAuxTranslationIsNotRequired) { DebugManagerStateRestore restore; DebugManager.flags.EnableStatelessCompression.set(1); auto &clHwHelper = ClHwHelper::get(renderCoreFamily); KernelInfo kernelInfo{}; EXPECT_FALSE(clHwHelper.requiresAuxResolves(kernelInfo, *defaultHwInfo)); } XE_HP_CORE_TEST_F(HwHelperTestXE_HP_CORE, givenDifferentBufferSizesWhenEnableStatelessCompressionThenEveryBufferSizeIsSuitableForCompression) { DebugManagerStateRestore restore; DebugManager.flags.EnableStatelessCompression.set(1); auto &helper = HwHelper::get(renderCoreFamily); const size_t sizesToCheck[] = {1, 128, 256, 1024, 2048}; for (size_t size : sizesToCheck) { EXPECT_TRUE(helper.isBufferSizeSuitableForCompression(size, *defaultHwInfo)); } } XE_HP_CORE_TEST_F(HwHelperTestXE_HP_CORE, givenStatelessCompressionEnabledWhenSetExtraAllocationDataThenDontRequireCpuAccessNorMakeResourceLocableForCompressedAllocations) { DebugManagerStateRestore restore; DebugManager.flags.EnableStatelessCompression.set(1); HardwareInfo hwInfo = *defaultHwInfo; auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); for (auto allocType : {AllocationType::CONSTANT_SURFACE, AllocationType::GLOBAL_SURFACE, AllocationType::PRINTF_SURFACE}) { AllocationData allocData; AllocationProperties allocProperties(mockRootDeviceIndex, true, allocType, mockDeviceBitfield); hwHelper.setExtraAllocationData(allocData, allocProperties, hwInfo); EXPECT_FALSE(allocData.flags.requiresCpuAccess); EXPECT_FALSE(allocData.storageInfo.isLockable); } } XE_HP_CORE_TEST_F(HwHelperTestXE_HP_CORE, givenRevisionEnumAndPlatformFamilyTypeThenProperValueForIsWorkaroundRequiredIsReturned) { uint32_t steppings[] = { REVISION_A0, REVISION_A1, REVISION_C, REVISION_D, CommonConstants::invalidStepping, }; const auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); const auto &hwInfoConfig = *HwInfoConfig::get(hardwareInfo.platform.eProductFamily); for (auto stepping : steppings) { hardwareInfo.platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(stepping, hardwareInfo); if (hardwareInfo.platform.eProductFamily == IGFX_XE_HP_SDV) { if (stepping == REVISION_A0) { EXPECT_TRUE(hwHelper.isWorkaroundRequired(REVISION_A0, REVISION_B, hardwareInfo)); EXPECT_TRUE(hwHelper.isWorkaroundRequired(REVISION_A0, REVISION_A1, hardwareInfo)); EXPECT_FALSE(hwHelper.isWorkaroundRequired(REVISION_B, REVISION_A0, hardwareInfo)); } else if (stepping == REVISION_A1) { EXPECT_FALSE(hwHelper.isWorkaroundRequired(REVISION_A0, REVISION_A1, hardwareInfo)); } else if (stepping == REVISION_C || stepping == REVISION_D) { //undefined EXPECT_FALSE(hwHelper.isWorkaroundRequired(REVISION_A0, REVISION_D, hardwareInfo)); } } else { EXPECT_FALSE(hwHelper.isWorkaroundRequired(REVISION_A0, REVISION_D, hardwareInfo)); } } } XE_HP_CORE_TEST_F(HwHelperTestXE_HP_CORE, givenRevisionEnumThenProperMaxThreadsForWorkgroupIsReturned) { const auto &hwInfoConfig = *HwInfoConfig::get(hardwareInfo.platform.eProductFamily); hardwareInfo.platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_A0, hardwareInfo); EXPECT_EQ(64u, hwInfoConfig.getMaxThreadsForWorkgroupInDSSOrSS(hardwareInfo, 64u, 64u)); hardwareInfo.platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_B, hardwareInfo); uint32_t numThreadsPerEU = hardwareInfo.gtSystemInfo.ThreadCount / hardwareInfo.gtSystemInfo.EUCount; EXPECT_EQ(64u * numThreadsPerEU, hwInfoConfig.getMaxThreadsForWorkgroupInDSSOrSS(hardwareInfo, 64u, 64u)); } XE_HP_CORE_TEST_F(HwHelperTestXE_HP_CORE, givenDisablePipeControlFlagIsDefaultWhenLocalMemoryIsEnabledThenReturnFalseAndDoNotProgramPipeControl) { hardwareInfo.featureTable.flags.ftrLocalMemory = true; EXPECT_FALSE(MemorySynchronizationCommands::isPipeControlWArequired(hardwareInfo)); constexpr size_t bufferSize = 128u; uint8_t buffer[bufferSize]; LinearStream cmdStream(buffer, bufferSize); MemorySynchronizationCommands::addPipeControlWA(cmdStream, 0x1000, hardwareInfo); EXPECT_EQ(0u, cmdStream.getUsed()); } XE_HP_CORE_TEST_F(HwHelperTestXE_HP_CORE, givenDisablePipeControlFlagIsDisabledWhenLocalMemoryIsEnabledThenReturnFalseAndDoNotProgramPipeControl) { DebugManagerStateRestore restore; DebugManager.flags.DisablePipeControlPrecedingPostSyncCommand.set(0); hardwareInfo.featureTable.flags.ftrLocalMemory = true; EXPECT_FALSE(MemorySynchronizationCommands::isPipeControlWArequired(hardwareInfo)); constexpr size_t bufferSize = 128u; uint8_t buffer[bufferSize]; LinearStream cmdStream(buffer, bufferSize); MemorySynchronizationCommands::addPipeControlWA(cmdStream, 0x1000, hardwareInfo); EXPECT_EQ(0u, cmdStream.getUsed()); } XE_HP_CORE_TEST_F(HwHelperTestXE_HP_CORE, givenDisablePipeControlFlagIsEnabledWhenLocalMemoryIsEnabledThenReturnTrueAndProgramPipeControl) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; DebugManagerStateRestore restore; DebugManager.flags.DisablePipeControlPrecedingPostSyncCommand.set(1); hardwareInfo.featureTable.flags.ftrLocalMemory = true; EXPECT_TRUE(MemorySynchronizationCommands::isPipeControlWArequired(hardwareInfo)); constexpr size_t bufferSize = 128u; uint8_t buffer[bufferSize]; LinearStream cmdStream(buffer, bufferSize); MemorySynchronizationCommands::addPipeControlWA(cmdStream, 0x1000, hardwareInfo); EXPECT_EQ(sizeof(PIPE_CONTROL), cmdStream.getUsed()); } XE_HP_CORE_TEST_F(HwHelperTestXE_HP_CORE, givenDisablePipeControlFlagIsEnabledWhenLocalMemoryIsDisabledThenReturnTrueAndDoNotProgramPipeControl) { DebugManagerStateRestore restore; DebugManager.flags.DisablePipeControlPrecedingPostSyncCommand.set(1); hardwareInfo.featureTable.flags.ftrLocalMemory = false; EXPECT_FALSE(MemorySynchronizationCommands::isPipeControlWArequired(hardwareInfo)); constexpr size_t bufferSize = 128u; uint8_t buffer[bufferSize]; LinearStream cmdStream(buffer, bufferSize); MemorySynchronizationCommands::addPipeControlWA(cmdStream, 0x1000, hardwareInfo); EXPECT_EQ(0u, cmdStream.getUsed()); } using HwInfoConfigTestXE_HP_CORE = ::testing::Test; XE_HP_CORE_TEST_F(HwInfoConfigTestXE_HP_CORE, givenDebugVariableSetWhenConfigureIsCalledThenSetupBlitterOperationsSupportedFlag) { DebugManagerStateRestore restore; auto hwInfoConfig = HwInfoConfig::get(productFamily); HardwareInfo hwInfo = *defaultHwInfo; DebugManager.flags.EnableBlitterOperationsSupport.set(0); hwInfoConfig->configureHardwareCustom(&hwInfo, nullptr); EXPECT_FALSE(hwInfo.capabilityTable.blitterOperationsSupported); DebugManager.flags.EnableBlitterOperationsSupport.set(1); hwInfoConfig->configureHardwareCustom(&hwInfo, nullptr); EXPECT_TRUE(hwInfo.capabilityTable.blitterOperationsSupported); } XE_HP_CORE_TEST_F(HwInfoConfigTestXE_HP_CORE, givenMultitileConfigWhenConfiguringHwInfoThenEnableBlitter) { auto hwInfoConfig = HwInfoConfig::get(productFamily); HardwareInfo hwInfo = *defaultHwInfo; for (uint32_t tileCount = 0; tileCount <= 4; tileCount++) { hwInfo.gtSystemInfo.MultiTileArchInfo.TileCount = tileCount; hwInfoConfig->configureHardwareCustom(&hwInfo, nullptr); EXPECT_EQ(true, hwInfo.capabilityTable.blitterOperationsSupported); } } using LriHelperTestsXE_HP_CORE = ::testing::Test; XE_HP_CORE_TEST_F(LriHelperTestsXE_HP_CORE, whenProgrammingLriCommandThenExpectMmioRemapEnableCorrectlySet) { using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; std::unique_ptr buffer(new uint8_t[128]); LinearStream stream(buffer.get(), 128); uint32_t address = 0x8888; uint32_t data = 0x1234; auto expectedLri = FamilyType::cmdInitLoadRegisterImm; EXPECT_FALSE(expectedLri.getMmioRemapEnable()); expectedLri.setRegisterOffset(address); expectedLri.setDataDword(data); expectedLri.setMmioRemapEnable(true); LriHelper::program(&stream, address, data, true); MI_LOAD_REGISTER_IMM *lri = genCmdCast(buffer.get()); ASSERT_NE(nullptr, lri); EXPECT_EQ(sizeof(MI_LOAD_REGISTER_IMM), stream.getUsed()); EXPECT_EQ(lri, stream.getCpuBase()); EXPECT_TRUE(memcmp(lri, &expectedLri, sizeof(MI_LOAD_REGISTER_IMM)) == 0); } XE_HP_CORE_TEST_F(HwHelperTestXE_HP_CORE, GivenVariousValuesWhenAlignSlmSizeIsCalledThenCorrectValueIsReturned) { EXPECT_EQ(0u, HwHelperHw::get().alignSlmSize(0)); EXPECT_EQ(1024u, HwHelperHw::get().alignSlmSize(1)); EXPECT_EQ(1024u, HwHelperHw::get().alignSlmSize(1024)); EXPECT_EQ(2048u, HwHelperHw::get().alignSlmSize(1025)); EXPECT_EQ(2048u, HwHelperHw::get().alignSlmSize(2048)); EXPECT_EQ(4096u, HwHelperHw::get().alignSlmSize(2049)); EXPECT_EQ(4096u, HwHelperHw::get().alignSlmSize(4096)); EXPECT_EQ(8192u, HwHelperHw::get().alignSlmSize(4097)); EXPECT_EQ(8192u, HwHelperHw::get().alignSlmSize(8192)); EXPECT_EQ(16384u, HwHelperHw::get().alignSlmSize(8193)); EXPECT_EQ(16384u, HwHelperHw::get().alignSlmSize(16384)); EXPECT_EQ(32768u, HwHelperHw::get().alignSlmSize(16385)); EXPECT_EQ(32768u, HwHelperHw::get().alignSlmSize(32768)); EXPECT_EQ(65536u, HwHelperHw::get().alignSlmSize(32769)); EXPECT_EQ(65536u, HwHelperHw::get().alignSlmSize(65536)); } XE_HP_CORE_TEST_F(HwHelperTestXE_HP_CORE, givenHwHelperWhenGettingThreadsPerEUConfigsThenCorrectConfigsAreReturned) { auto &helper = HwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily); EXPECT_NE(nullptr, &helper); auto &configs = helper.getThreadsPerEUConfigs(); EXPECT_EQ(2U, configs.size()); EXPECT_EQ(4U, configs[0]); EXPECT_EQ(8U, configs[1]); } XE_HP_CORE_TEST_F(HwHelperTestXE_HP_CORE, WhenGettingDeviceIpVersionThenMakeCorrectDeviceIpVersion) { EXPECT_EQ(ClHwHelperMock::makeDeviceIpVersion(12, 5, 1), ClHwHelper::get(renderCoreFamily).getDeviceIpVersion(*defaultHwInfo)); } XE_HP_CORE_TEST_F(HwHelperTestXE_HP_CORE, whenGettingDefaultRevisionThenB0IsReturned) { EXPECT_EQ(HwInfoConfigHw::get()->getHwRevIdFromStepping(REVISION_B, *defaultHwInfo), HwHelperHw::get().getDefaultRevisionId(*defaultHwInfo)); } XE_HP_CORE_TEST_F(HwHelperTestXE_HP_CORE, givenDebugFlagAndLocalMemoryIsNotAvailableWhenProgrammingPostSyncPipeControlThenExpectNotAddingWaPipeControl) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; DebugManagerStateRestore restore; DebugManager.flags.DisablePipeControlPrecedingPostSyncCommand.set(1); constexpr size_t bufferSize = 256u; uint8_t buffer[bufferSize]; LinearStream cmdStream(buffer, bufferSize); HardwareInfo hardwareInfo = *defaultHwInfo; hardwareInfo.featureTable.flags.ftrLocalMemory = false; PipeControlArgs args; uint64_t gpuAddress = 0xABC0; uint64_t immediateValue = 0x10; MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation(cmdStream, POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, gpuAddress, immediateValue, hardwareInfo, args); EXPECT_EQ(sizeof(PIPE_CONTROL), cmdStream.getUsed()); HardwareParse hwParser; hwParser.parsePipeControl = true; hwParser.parseCommands(cmdStream, 0); hwParser.findHardwareCommands(); ASSERT_EQ(1u, hwParser.pipeControlList.size()); auto pipeControl = reinterpret_cast(*hwParser.pipeControlList.begin()); EXPECT_EQ(gpuAddress, UnitTestHelper::getPipeControlPostSyncAddress(*pipeControl)); EXPECT_EQ(immediateValue, pipeControl->getImmediateData()); } XE_HP_CORE_TEST_F(HwHelperTestXE_HP_CORE, givenDebugFlagAndLocalMemoryIsAvailableWhenProgrammingPostSyncPipeControlThenExpectAddingWaPipeControl) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; DebugManagerStateRestore restore; DebugManager.flags.DisablePipeControlPrecedingPostSyncCommand.set(1); constexpr size_t bufferSize = 256u; uint8_t buffer[bufferSize]; LinearStream cmdStream(buffer, bufferSize); HardwareInfo hardwareInfo = *defaultHwInfo; hardwareInfo.featureTable.flags.ftrLocalMemory = true; PipeControlArgs args; uint64_t gpuAddress = 0xABC0; uint64_t immediateValue = 0x10; MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation(cmdStream, POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, gpuAddress, immediateValue, hardwareInfo, args); EXPECT_EQ(sizeof(PIPE_CONTROL) * 2, cmdStream.getUsed()); HardwareParse hwParser; hwParser.parsePipeControl = true; hwParser.parseCommands(cmdStream, 0); hwParser.findHardwareCommands(); ASSERT_EQ(2u, hwParser.pipeControlList.size()); auto pipeControlItor = hwParser.pipeControlList.begin(); auto pipeControl = reinterpret_cast(*pipeControlItor); constexpr uint64_t zeroGpuAddress = 0; constexpr uint64_t zeroImmediateValue = 0; EXPECT_EQ(zeroGpuAddress, UnitTestHelper::getPipeControlPostSyncAddress(*pipeControl)); EXPECT_EQ(zeroImmediateValue, pipeControl->getImmediateData()); pipeControlItor++; pipeControl = reinterpret_cast(*pipeControlItor); EXPECT_EQ(gpuAddress, UnitTestHelper::getPipeControlPostSyncAddress(*pipeControl)); EXPECT_EQ(immediateValue, pipeControl->getImmediateData()); } compute-runtime-22.14.22890/opencl/test/unit_test/xe_hp_core/test_device_caps_xe_hp_core.cpp000066400000000000000000000210721422164147700321470ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_helper.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" using namespace NEO; typedef Test XE_HP_COREDeviceCaps; HWCMDTEST_F(IGFX_XE_HP_CORE, XE_HP_COREDeviceCaps, givenKernelWhenCanTransformImagesIsCalledThenReturnsTrue) { MockKernelWithInternals mockKernel(*pClDevice); auto retVal = mockKernel.mockKernel->Kernel::canTransformImages(); EXPECT_FALSE(retVal); } HWCMDTEST_F(IGFX_XE_HP_CORE, XE_HP_COREDeviceCaps, givenKernelThatDoesStatelessWritesWhenItIsCreatedThenItHasProperFieldSet) { MockKernelWithInternals mockKernel(*pClDevice); mockKernel.mockKernel->initialize(); bool statelessWritesEmitted = mockKernel.mockKernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.usesStatelessWrites; EXPECT_EQ(statelessWritesEmitted, mockKernel.mockKernel->areStatelessWritesUsed()); } XE_HP_CORE_TEST_F(XE_HP_COREDeviceCaps, givenXE_HP_COREWhenCheckFtrSupportsInteger64BitAtomicsThenReturnTrue) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.ftrSupportsInteger64BitAtomics); } XE_HP_CORE_TEST_F(XE_HP_COREDeviceCaps, givenXE_HP_COREWhenCheckingImageSupportThenReturnTrue) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.supportsImages); } XE_HP_CORE_TEST_F(XE_HP_COREDeviceCaps, givenXE_HP_COREWhenCheckingCoherencySupportThenReturnFalse) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftrSupportsCoherency); } XE_HP_CORE_TEST_F(XE_HP_COREDeviceCaps, givenXE_HP_COREWhenCheckExtensionsThenDeviceDoesNotReportClKhrSubgroupsExtension) { const auto &caps = pClDevice->getDeviceInfo(); EXPECT_FALSE(hasSubstr(caps.deviceExtensions, std::string("cl_khr_subgroups"))); } XE_HP_CORE_TEST_F(XE_HP_COREDeviceCaps, givenXE_HP_COREWhenDeviceCapsInitializedThenAddXE_HP_COREExtensions) { const auto &caps = pClDevice->getDeviceInfo(); EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_dot_accumulate"))); EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_subgroup_local_block_io"))); EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_subgroup_matrix_multiply_accumulate"))); EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_subgroup_split_matrix_multiply_accumulate"))); } XE_HP_CORE_TEST_F(XE_HP_COREDeviceCaps, givenXE_HP_COREWhenCheckingCapsThenDeviceDoesNotSupportIndependentForwardProgress) { const auto &caps = pClDevice->getDeviceInfo(); EXPECT_FALSE(caps.independentForwardProgress); } XE_HP_CORE_TEST_F(XE_HP_COREDeviceCaps, givenEnabledFtrPooledEuAndA0SteppingWhenCalculatingMaxEuPerSSThenDontIgnoreEuCountPerPoolMin) { HardwareInfo myHwInfo = *defaultHwInfo; GT_SYSTEM_INFO &mySysInfo = myHwInfo.gtSystemInfo; FeatureTable &mySkuTable = myHwInfo.featureTable; PLATFORM &myPlatform = myHwInfo.platform; const auto &hwInfoConfig = *HwInfoConfig::get(myPlatform.eProductFamily); mySysInfo.EUCount = 20; mySysInfo.EuCountPerPoolMin = 99999; mySkuTable.flags.ftrPooledEuEnabled = 1; myPlatform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_A0, myHwInfo); auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&myHwInfo)); auto expectedMaxWGS = mySysInfo.EuCountPerPoolMin * (mySysInfo.ThreadCount / mySysInfo.EUCount) * 8; expectedMaxWGS = std::min(Math::prevPowerOfTwo(expectedMaxWGS), 512u); EXPECT_EQ(expectedMaxWGS, device->getDeviceInfo().maxWorkGroupSize); } XE_HP_CORE_TEST_F(XE_HP_COREDeviceCaps, givenDeviceThatHasHighNumberOfExecutionUnitsAndA0SteppingWhenMaxWorkgroupSizeIsComputedThenItIsLimitedTo512) { HardwareInfo myHwInfo = *defaultHwInfo; GT_SYSTEM_INFO &mySysInfo = myHwInfo.gtSystemInfo; PLATFORM &myPlatform = myHwInfo.platform; const auto &hwInfoConfig = *HwInfoConfig::get(myPlatform.eProductFamily); mySysInfo.EUCount = 32; mySysInfo.SubSliceCount = 2; mySysInfo.DualSubSliceCount = 2; mySysInfo.ThreadCount = 32 * 8; myPlatform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_A0, myHwInfo); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&myHwInfo)); EXPECT_EQ(512u, device->sharedDeviceInfo.maxWorkGroupSize); EXPECT_EQ(device->sharedDeviceInfo.maxWorkGroupSize / 8, device->getDeviceInfo().maxNumOfSubGroups); } XE_HP_CORE_TEST_F(XE_HP_COREDeviceCaps, givenEnabledFtrPooledEuAndNotA0SteppingWhenCalculatingMaxEuPerSSThenDontIgnoreEuCountPerPoolMin) { HardwareInfo myHwInfo = *defaultHwInfo; GT_SYSTEM_INFO &mySysInfo = myHwInfo.gtSystemInfo; FeatureTable &mySkuTable = myHwInfo.featureTable; PLATFORM &myPlatform = myHwInfo.platform; const auto &hwInfoConfig = *HwInfoConfig::get(myPlatform.eProductFamily); mySysInfo.EUCount = 20; mySysInfo.EuCountPerPoolMin = 99999; mySkuTable.flags.ftrPooledEuEnabled = 1; myPlatform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_B, myHwInfo); auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&myHwInfo)); auto expectedMaxWGS = mySysInfo.EuCountPerPoolMin * (mySysInfo.ThreadCount / mySysInfo.EUCount) * 8; expectedMaxWGS = std::min(Math::prevPowerOfTwo(expectedMaxWGS), 1024u); EXPECT_EQ(expectedMaxWGS, device->getDeviceInfo().maxWorkGroupSize); } XE_HP_CORE_TEST_F(XE_HP_COREDeviceCaps, givenDeviceThatHasHighNumberOfExecutionUnitsAndNotA0SteppingWhenMaxWorkgroupSizeIsComputedThenItIsLimitedTo1024) { HardwareInfo myHwInfo = *defaultHwInfo; GT_SYSTEM_INFO &mySysInfo = myHwInfo.gtSystemInfo; PLATFORM &myPlatform = myHwInfo.platform; const auto &hwInfoConfig = *HwInfoConfig::get(myPlatform.eProductFamily); mySysInfo.EUCount = 32; mySysInfo.SubSliceCount = 2; mySysInfo.DualSubSliceCount = 2; mySysInfo.ThreadCount = 32 * 8; // 128 threads per subslice, in simd 8 gives 1024 myPlatform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_B, myHwInfo); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&myHwInfo)); EXPECT_EQ(1024u, device->sharedDeviceInfo.maxWorkGroupSize); EXPECT_EQ(device->sharedDeviceInfo.maxWorkGroupSize / 8, device->getDeviceInfo().maxNumOfSubGroups); } XE_HP_CORE_TEST_F(XE_HP_COREDeviceCaps, givenHwInfoWhenRequestedComputeUnitsUsedForScratchAndMaxSubSlicesSupportedIsSmallerThanMinMaxSubSlicesSupportedThenReturnValidValue) { HardwareInfo hwInfo = *defaultHwInfo; GT_SYSTEM_INFO &testSysInfo = hwInfo.gtSystemInfo; testSysInfo.MaxSubSlicesSupported = 24; auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); uint32_t minMaxSubSlicesSupported = 32; uint32_t minCalculation = minMaxSubSlicesSupported * hwInfo.gtSystemInfo.MaxEuPerSubSlice * hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount; EXPECT_LE(testSysInfo.MaxSubSlicesSupported, minMaxSubSlicesSupported); EXPECT_EQ(minCalculation, hwHelper.getComputeUnitsUsedForScratch(&hwInfo)); } XE_HP_CORE_TEST_F(XE_HP_COREDeviceCaps, givenHwInfoWhenRequestedComputeUnitsUsedForScratchAndMaxSubSlicesSupportedIsGreaterThanMinMaxSubSlicesSupportedThenReturnValidValue) { HardwareInfo hwInfo = *defaultHwInfo; GT_SYSTEM_INFO &testSysInfo = hwInfo.gtSystemInfo; testSysInfo.MaxSubSlicesSupported = 40; auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); uint32_t minMaxSubSlicesSupported = 32; uint32_t minCalculation = minMaxSubSlicesSupported * hwInfo.gtSystemInfo.MaxEuPerSubSlice * hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount; uint32_t properCalculation = testSysInfo.MaxSubSlicesSupported * hwInfo.gtSystemInfo.MaxEuPerSubSlice * hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount; EXPECT_GT(testSysInfo.MaxSubSlicesSupported, minMaxSubSlicesSupported); EXPECT_GT(properCalculation, minCalculation); EXPECT_EQ(properCalculation, hwHelper.getComputeUnitsUsedForScratch(&hwInfo)); } HWTEST_EXCLUDE_PRODUCT(DeviceGetCapsTest, givenEnabledFtrPooledEuWhenCalculatingMaxEuPerSSThenDontIgnoreEuCountPerPoolMin, IGFX_XE_HP_CORE); HWTEST_EXCLUDE_PRODUCT(DeviceGetCapsTest, givenDeviceThatHasHighNumberOfExecutionUnitsWhenMaxWorkgroupSizeIsComputedItIsLimitedTo1024, IGFX_XE_HP_CORE); compute-runtime-22.14.22890/opencl/test/unit_test/xe_hp_core/test_platform_caps_xe_hp_core.cpp000066400000000000000000000012271422164147700325340ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" using namespace NEO; struct XE_HP_COREPlatformCaps : public PlatformFixture, public ::testing::Test { void SetUp() override { PlatformFixture::SetUp(); } void TearDown() override { PlatformFixture::TearDown(); } }; HWTEST2_F(XE_HP_COREPlatformCaps, givenXeHPSkusThenItSupportFP64, IsXEHP) { const auto &caps = pPlatform->getPlatformInfo(); EXPECT_NE(std::string::npos, caps.extensions.find(std::string("cl_khr_fp64"))); }compute-runtime-22.14.22890/opencl/test/unit_test/xe_hp_core/test_sample_xe_hp_core.cpp000066400000000000000000000006611422164147700311640ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" using namespace NEO; typedef Test XE_HP_COREOnlyTest; XE_HP_CORE_TEST_F(XE_HP_COREOnlyTest, WhenGettingRenderCoreFamilyThenOnlyXeHpCoreIsReturned) { EXPECT_EQ(IGFX_XE_HP_CORE, pDevice->getRenderCoreFamily()); } compute-runtime-22.14.22890/opencl/test/unit_test/xe_hp_core/xehp/000077500000000000000000000000001422164147700247065ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/xe_hp_core/xehp/CMakeLists.txt000066400000000000000000000027351422164147700274550ustar00rootroot00000000000000# # Copyright (C) 2021-2022 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_XE_HP_SDV) set(IGDRCL_SRCS_tests_xe_hp_core_xehp ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cache_flush_tests_xehp.inl ${CMAKE_CURRENT_SOURCE_DIR}/get_device_info_xehp.inl ${CMAKE_CURRENT_SOURCE_DIR}/hw_helper_tests_xehp.inl ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_tests_xehp.inl ${CMAKE_CURRENT_SOURCE_DIR}/memory_manager_tests_xehp.inl ${CMAKE_CURRENT_SOURCE_DIR}/sampler_tests_xehp.inl ${CMAKE_CURRENT_SOURCE_DIR}/source_level_debugger_csr_tests_xehp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_buffer_xe_hp_sdv.inl ${CMAKE_CURRENT_SOURCE_DIR}/test_command_stream_receiver_xehp.inl ${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_xehp.inl ${CMAKE_CURRENT_SOURCE_DIR}/test_hw_info_config_xehp.inl ${CMAKE_CURRENT_SOURCE_DIR}/test_image_xe_hp_sdv.inl ${CMAKE_CURRENT_SOURCE_DIR}/test_local_work_size_xehp.inl ${CMAKE_CURRENT_SOURCE_DIR}/test_sub_devices_xehp.inl ${CMAKE_CURRENT_SOURCE_DIR}/test_preamble_xehp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_platform_caps_xehp.inl ${CMAKE_CURRENT_SOURCE_DIR}/test_wrapper_xehp.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_xe_hp_core_xehp}) add_subdirectories() neo_copy_test_files_with_revision(copy_test_files_xe_hp_sdv_4 xe_hp_sdv 4) add_dependencies(copy_test_files_per_product copy_test_files_xe_hp_sdv_4) endif() compute-runtime-22.14.22890/opencl/test/unit_test/xe_hp_core/xehp/cache_flush_tests_xehp.inl000066400000000000000000000126441422164147700321330ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/test/unit_test/kernel/cache_flush_xehp_and_later_tests.inl" #include "opencl/test/unit_test/mocks/mock_command_queue.h" using namespace NEO; using GivenCacheFlushAfterWalkerEnabledWhenSvmAllocationsSetAsCacheFlushRequiringThenExpectCacheFlushCommandXEHP = GivenCacheFlushAfterWalkerEnabledWhenSvmAllocationsSetAsCacheFlushRequiringThenExpectCacheFlushCommand; XEHPTEST_F(GivenCacheFlushAfterWalkerEnabledWhenSvmAllocationsSetAsCacheFlushRequiringThenExpectCacheFlushCommandXEHP, I) { TestBodyImpl(); } using GivenCacheFlushAfterWalkerEnabledWhenKernelArgIsSetAsCacheFlushRequiredThenExpectCacheFlushCommandXEHP = GivenCacheFlushAfterWalkerEnabledWhenKernelArgIsSetAsCacheFlushRequiredThenExpectCacheFlushCommand; XEHPTEST_F(GivenCacheFlushAfterWalkerEnabledWhenKernelArgIsSetAsCacheFlushRequiredThenExpectCacheFlushCommandXEHP, I) { TestBodyImpl(); } using GivenCacheFlushAfterWalkerEnabledWhenProgramGlobalSurfacePresentThenExpectCacheFlushCommandXEHP = GivenCacheFlushAfterWalkerEnabledWhenProgramGlobalSurfacePresentThenExpectCacheFlushCommand; XEHPTEST_F(GivenCacheFlushAfterWalkerEnabledWhenProgramGlobalSurfacePresentThenExpectCacheFlushCommandXEHP, I) { TestBodyImpl(); } using GivenCacheFlushAfterWalkerEnabledWhenProgramGlobalSurfacePresentAndPostSyncRequiredThenExpectProperCacheFlushCommandXEHP = GivenCacheFlushAfterWalkerEnabledWhenProgramGlobalSurfacePresentAndPostSyncRequiredThenExpectProperCacheFlushCommand; XEHPTEST_F(GivenCacheFlushAfterWalkerEnabledWhenProgramGlobalSurfacePresentAndPostSyncRequiredThenExpectProperCacheFlushCommandXEHP, I) { TestBodyImpl(); } using GivenCacheFlushAfterWalkerEnabledWhenAllocationRequiresCacheFlushThenFlushCommandPresentAfterWalkerXEHP = GivenCacheFlushAfterWalkerEnabledWhenAllocationRequiresCacheFlushThenFlushCommandPresentAfterWalker; XEHPTEST_F(GivenCacheFlushAfterWalkerEnabledWhenAllocationRequiresCacheFlushThenFlushCommandPresentAfterWalkerXEHP, I) { TestBodyImpl(); } using GivenCacheFlushAfterWalkerDisabledWhenAllocationRequiresCacheFlushThenFlushCommandNotPresentAfterWalkerXEHP = GivenCacheFlushAfterWalkerDisabledWhenAllocationRequiresCacheFlushThenFlushCommandNotPresentAfterWalker; XEHPTEST_F(GivenCacheFlushAfterWalkerDisabledWhenAllocationRequiresCacheFlushThenFlushCommandNotPresentAfterWalkerXEHP, I) { TestBodyImpl(); } using GivenCacheFlushAfterWalkerAndTimestampPacketsEnabledWhenAllocationRequiresCacheFlushThenFlushCommandPresentAfterWalkerXEHP = GivenCacheFlushAfterWalkerAndTimestampPacketsEnabledWhenAllocationRequiresCacheFlushThenFlushCommandPresentAfterWalker; XEHPTEST_F(GivenCacheFlushAfterWalkerAndTimestampPacketsEnabledWhenAllocationRequiresCacheFlushThenFlushCommandPresentAfterWalkerXEHP, I) { TestBodyImpl(); } using GivenCacheFlushAfterWalkerEnabledWhenSvmAllocationsSetAsCacheFlushRequiringThenExpectCorrectCommandSizeXEHP = GivenCacheFlushAfterWalkerEnabledWhenSvmAllocationsSetAsCacheFlushRequiringThenExpectCorrectCommandSize; XEHPTEST_F(GivenCacheFlushAfterWalkerEnabledWhenSvmAllocationsSetAsCacheFlushRequiringThenExpectCorrectCommandSizeXEHP, I) { TestBodyImpl(); } using GivenCacheFlushAfterWalkerEnabledWhenMoreThan126AllocationRangesRequiresCacheFlushThenAtLeatsTwoFlushCommandPresentAfterWalkerXEHP = GivenCacheFlushAfterWalkerEnabledWhenMoreThan126AllocationRangesRequiresCacheFlushThenAtLeatsTwoFlushCommandPresentAfterWalker; XEHPTEST_F(GivenCacheFlushAfterWalkerEnabledWhenMoreThan126AllocationRangesRequiresCacheFlushThenAtLeatsTwoFlushCommandPresentAfterWalkerXEHP, I) { TestBodyImpl(); } using GivenCacheFlushAfterWalkerEnabledWhen126AllocationRangesRequiresCacheFlushThenExpectOneFlushXEHP = GivenCacheFlushAfterWalkerEnabledWhen126AllocationRangesRequiresCacheFlushThenExpectOneFlush; XEHPTEST_F(GivenCacheFlushAfterWalkerEnabledWhen126AllocationRangesRequiresCacheFlushThenExpectOneFlushXEHP, I) { TestBodyImpl(); } using GivenCacheFlushAfterWalkerAndTimestampPacketsEnabledWhenMoreThan126AllocationRangesRequiresCacheFlushThenExpectFlushWithOutPostSyncAndThenWithPostSyncXEHP = GivenCacheFlushAfterWalkerAndTimestampPacketsEnabledWhenMoreThan126AllocationRangesRequiresCacheFlushThenExpectFlushWithOutPostSyncAndThenWithPostSync; XEHPTEST_F(GivenCacheFlushAfterWalkerAndTimestampPacketsEnabledWhenMoreThan126AllocationRangesRequiresCacheFlushThenExpectFlushWithOutPostSyncAndThenWithPostSyncXEHP, I) { TestBodyImpl(); } using CommandQueueHwCacheFlushTest = ::testing::Test; XEHPTEST_F(CommandQueueHwCacheFlushTest, givenHwCommandQueueForSpecificTileThenCacheFlushAfterWalkerIsNeeded) { UltClDeviceFactory clDeviceFactory{1, 2}; cl_device_id devices[] = {clDeviceFactory.subDevices[0], clDeviceFactory.subDevices[1]}; MockContext context{ClDeviceVector{devices, 2}}; { MockCommandQueueHw commandQueue{&context, clDeviceFactory.rootDevices[0], nullptr}; EXPECT_FALSE(commandQueue.getRequiresCacheFlushAfterWalker()); } { MockCommandQueueHw commandQueue{&context, clDeviceFactory.subDevices[0], nullptr}; EXPECT_TRUE(commandQueue.getRequiresCacheFlushAfterWalker()); } } compute-runtime-22.14.22890/opencl/test/unit_test/xe_hp_core/xehp/get_device_info_xehp.inl000066400000000000000000000070341422164147700315530ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/device_info_fixture.h" using namespace NEO; HWTEST_EXCLUDE_PRODUCT(GetDeviceInfoMemCapabilitiesTest, GivenValidParametersWhenGetDeviceInfoIsCalledForXE_HP_COREThenClSuccessIsReturned, IGFX_XE_HP_SDV); XEHPTEST_F(GetDeviceInfoMemCapabilitiesTest, GivenValidParametersWhenGetDeviceInfoIsCalledForXEHPThenClSuccessIsReturned) { std::vector params = { {CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL, (CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL)}, {CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL, (CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL)}, {CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL, (CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL)}, {CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL, 0}}; check(params); } XEHPTEST_F(GetDeviceInfoMemCapabilitiesTest, GivenA0ThenUsmHostMemSupportIsEnabledByDefault) { std::vector params = {{CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL, CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL}}; check(params); } XEHPTEST_F(GetDeviceInfoMemCapabilitiesTest, GivenDebugVariableIsEnabledThenUsmHostMemSupportIsEnabled) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableHostUsmSupport.set(1); std::vector params = {{CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL, CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL}}; check(params); } XEHPTEST_F(GetDeviceInfoMemCapabilitiesTest, GivenA0AndDebugVariableIsDisabledThenUsmHostMemSupportIsDisabled) { VariableBackup backupHwInfo(defaultHwInfo.get()); const auto &hwInfoConfig = *HwInfoConfig::get(defaultHwInfo->platform.eProductFamily); defaultHwInfo->platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_A0, *defaultHwInfo); DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableHostUsmSupport.set(0); std::vector params = {{CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL, 0}}; std::vector enabledParams = {{CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL, CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL}}; check(params); DebugManager.flags.ForceLocalMemoryAccessMode.set(1); check(params); DebugManager.flags.EnableHostUsmSupport.set(1); check(enabledParams); DebugManager.flags.EnableHostUsmSupport.set(-1); check(params); } XEHPTEST_F(GetDeviceInfoMemCapabilitiesTest, GivenB0ThenUsmHostMemSupportIsSetCorrectly) { const auto &hwInfoConfig = *HwInfoConfig::get(defaultHwInfo->platform.eProductFamily); VariableBackup backupHwInfo(defaultHwInfo.get()); defaultHwInfo->platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_B, *defaultHwInfo); std::vector params = {{CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL, CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL}}; std::vector disabledParameters = {{CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL, 0u}}; check(params); { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableHostUsmSupport.set(0); check(disabledParameters); } { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableHostUsmSupport.set(1); check(params); } } compute-runtime-22.14.22890/opencl/test/unit_test/xe_hp_core/xehp/hw_helper_tests_xehp.inl000066400000000000000000000115401422164147700316360ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/hw_helper_tests.h" using HwHelperTestsXeHP = HwHelperTest; HWTEST_EXCLUDE_PRODUCT(HwHelperTest, WhenIsBankOverrideRequiredIsCalledThenFalseIsReturned, IGFX_XE_HP_SDV); XEHPTEST_F(HwHelperTestsXeHP, givenXEHPWhenIsBankOverrideRequiredIsCalledThenCorrectValueIsReturned) { DebugManagerStateRestore restore; auto &helper = HwHelper::get(renderCoreFamily); const auto &hwInfoConfig = *HwInfoConfig::get(productFamily); auto hwInfo = *defaultHwInfo; hwInfo.gtSystemInfo.MultiTileArchInfo.IsValid = true; { hwInfo.gtSystemInfo.MultiTileArchInfo.TileCount = 4; hwInfo.platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_A0, hwInfo); EXPECT_TRUE(helper.isBankOverrideRequired(hwInfo)); } { hwInfo.gtSystemInfo.MultiTileArchInfo.TileCount = 4; hwInfo.platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_B, hwInfo); EXPECT_FALSE(helper.isBankOverrideRequired(hwInfo)); } { hwInfo.gtSystemInfo.MultiTileArchInfo.TileCount = 2; hwInfo.platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_A0, hwInfo); EXPECT_FALSE(helper.isBankOverrideRequired(hwInfo)); } { DebugManager.flags.ForceMemoryBankIndexOverride.set(1); hwInfo.gtSystemInfo.MultiTileArchInfo.TileCount = 1; hwInfo.platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_A0, hwInfo); EXPECT_TRUE(helper.isBankOverrideRequired(hwInfo)); } { DebugManager.flags.ForceMemoryBankIndexOverride.set(0); hwInfo.gtSystemInfo.MultiTileArchInfo.TileCount = 4; hwInfo.platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_A0, hwInfo); EXPECT_FALSE(helper.isBankOverrideRequired(hwInfo)); } } XEHPTEST_F(HwHelperTestsXeHP, givenRcsDisabledWhenGetGpgpuEnginesCalledThenDontSetRcs) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.featureTable.flags.ftrCCSNode = true; hwInfo.featureTable.ftrBcsInfo = 1; hwInfo.featureTable.flags.ftrRcsNode = false; hwInfo.capabilityTable.blitterOperationsSupported = true; hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_CCS; hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 4; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); EXPECT_EQ(8u, device->allEngines.size()); auto &engines = HwHelperHw::get().getGpgpuEngineInstances(hwInfo); EXPECT_EQ(8u, engines.size()); EXPECT_EQ(aub_stream::ENGINE_CCS, engines[0].first); EXPECT_EQ(aub_stream::ENGINE_CCS1, engines[1].first); EXPECT_EQ(aub_stream::ENGINE_CCS2, engines[2].first); EXPECT_EQ(aub_stream::ENGINE_CCS3, engines[3].first); EXPECT_EQ(hwInfo.capabilityTable.defaultEngineType, engines[4].first); // low priority EXPECT_EQ(hwInfo.capabilityTable.defaultEngineType, engines[5].first); // internal EXPECT_EQ(aub_stream::ENGINE_BCS, engines[6].first); EXPECT_EQ(aub_stream::ENGINE_BCS, engines[7].first); } XEHPTEST_F(HwHelperTestsXeHP, givenRcsDisabledButDebugVariableSetWhenGetGpgpuEnginesCalledThenSetRcs) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.featureTable.flags.ftrCCSNode = true; hwInfo.featureTable.ftrBcsInfo = 1; hwInfo.featureTable.flags.ftrRcsNode = false; hwInfo.capabilityTable.blitterOperationsSupported = true; hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_CCS; hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 4; DebugManagerStateRestore restore; DebugManager.flags.NodeOrdinal.set(static_cast(aub_stream::EngineType::ENGINE_RCS)); auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); EXPECT_EQ(9u, device->allEngines.size()); auto &engines = HwHelperHw::get().getGpgpuEngineInstances(hwInfo); EXPECT_EQ(9u, engines.size()); EXPECT_EQ(aub_stream::ENGINE_CCS, engines[0].first); EXPECT_EQ(aub_stream::ENGINE_CCS1, engines[1].first); EXPECT_EQ(aub_stream::ENGINE_CCS2, engines[2].first); EXPECT_EQ(aub_stream::ENGINE_CCS3, engines[3].first); EXPECT_EQ(aub_stream::ENGINE_RCS, engines[4].first); EXPECT_EQ(aub_stream::ENGINE_RCS, engines[5].first); // low priority EXPECT_EQ(aub_stream::ENGINE_RCS, engines[6].first); // internal EXPECT_EQ(aub_stream::ENGINE_BCS, engines[7].first); EXPECT_EQ(aub_stream::ENGINE_BCS, engines[8].first); } XEHPTEST_F(HwHelperTestsXeHP, GivenVariousValuesWhenComputeSlmSizeIsCalledThenCorrectValueIsReturned) { auto &hwInfo = pDevice->getHardwareInfo(); for (auto &testInput : computeSlmValuesXeHPAndLaterTestsInput) { EXPECT_EQ(testInput.expected, HwHelperHw::get().computeSlmValues(hwInfo, testInput.slmSize)); } } compute-runtime-22.14.22890/opencl/test/unit_test/xe_hp_core/xehp/hw_info_tests_xehp.inl000066400000000000000000000061351422164147700313160ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_info.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" using namespace NEO; using XeHPHwInfoTest = ::testing::Test; XEHPTEST_F(XeHPHwInfoTest, whenSetupHardwareInfoWithSetupFeatureTableFlagTrueOrFalseIsCalledThenFeatureTableHasCorrectValues) { HardwareInfo hwInfo = *defaultHwInfo; FeatureTable &featureTable = hwInfo.featureTable; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; EXPECT_FALSE(featureTable.flags.ftrLocalMemory); EXPECT_FALSE(featureTable.flags.ftrFlatPhysCCS); EXPECT_FALSE(featureTable.flags.ftrLinearCCS); EXPECT_FALSE(featureTable.flags.ftrE2ECompression); EXPECT_FALSE(featureTable.flags.ftrCCSNode); EXPECT_FALSE(featureTable.flags.ftrCCSRing); EXPECT_FALSE(featureTable.flags.ftrMultiTileArch); EXPECT_FALSE(featureTable.flags.ftrCCSMultiInstance); EXPECT_FALSE(featureTable.flags.ftrLinearCCS); EXPECT_FALSE(gtSystemInfo.IsL3HashModeEnabled); EXPECT_FALSE(gtSystemInfo.IsDynamicallyPopulated); EXPECT_EQ(8u, gtSystemInfo.CsrSizeInMb); XE_HP_SDV_CONFIG::setupHardwareInfo(&hwInfo, false); EXPECT_FALSE(featureTable.flags.ftrLocalMemory); EXPECT_FALSE(featureTable.flags.ftrFlatPhysCCS); EXPECT_FALSE(featureTable.flags.ftrLinearCCS); EXPECT_FALSE(featureTable.flags.ftrE2ECompression); EXPECT_FALSE(featureTable.flags.ftrCCSNode); EXPECT_FALSE(featureTable.flags.ftrCCSRing); EXPECT_FALSE(featureTable.flags.ftrMultiTileArch); EXPECT_FALSE(featureTable.flags.ftrCCSMultiInstance); EXPECT_FALSE(featureTable.flags.ftrLinearCCS); EXPECT_FALSE(gtSystemInfo.IsL3HashModeEnabled); EXPECT_FALSE(gtSystemInfo.IsDynamicallyPopulated); EXPECT_EQ(8u, gtSystemInfo.CsrSizeInMb); XE_HP_SDV_CONFIG::setupHardwareInfo(&hwInfo, true); EXPECT_TRUE(featureTable.flags.ftrLocalMemory); EXPECT_TRUE(featureTable.flags.ftrFlatPhysCCS); EXPECT_TRUE(featureTable.flags.ftrLinearCCS); EXPECT_TRUE(featureTable.flags.ftrE2ECompression); EXPECT_TRUE(featureTable.flags.ftrCCSNode); EXPECT_TRUE(featureTable.flags.ftrCCSRing); EXPECT_TRUE(featureTable.flags.ftrMultiTileArch); EXPECT_TRUE(featureTable.flags.ftrCCSMultiInstance); EXPECT_TRUE(featureTable.flags.ftrLinearCCS); EXPECT_FALSE(gtSystemInfo.IsL3HashModeEnabled); EXPECT_FALSE(gtSystemInfo.IsDynamicallyPopulated); EXPECT_EQ(8u, gtSystemInfo.CsrSizeInMb); } XEHPTEST_F(XeHPHwInfoTest, givenAlreadyInitializedHwInfoWhenSetupCalledThenDontOverride) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.gtSystemInfo.SliceCount = 0; XE_HP_SDV_CONFIG::setupHardwareInfo(&hwInfo, false); EXPECT_NE(0u, hwInfo.gtSystemInfo.SliceCount); auto expectedValue = ++hwInfo.gtSystemInfo.SliceCount; XE_HP_SDV_CONFIG::setupHardwareInfo(&hwInfo, false); EXPECT_EQ(expectedValue, hwInfo.gtSystemInfo.SliceCount); }compute-runtime-22.14.22890/opencl/test/unit_test/xe_hp_core/xehp/linux/000077500000000000000000000000001422164147700260455ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/xe_hp_core/xehp/linux/CMakeLists.txt000066400000000000000000000005421422164147700306060ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_xe_hp_core_xehp_linux ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_tests_xehp.cpp ) if(UNIX) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_xe_hp_core_xehp_linux}) add_subdirectories() endif() compute-runtime-22.14.22890/opencl/test/unit_test/xe_hp_core/xehp/linux/dll/000077500000000000000000000000001422164147700266205ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/xe_hp_core/xehp/linux/dll/CMakeLists.txt000066400000000000000000000005051422164147700313600ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_linux_dll_tests_xe_hp_core_xehp ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/device_id_tests_xehp.cpp ) target_sources(igdrcl_linux_dll_tests PRIVATE ${IGDRCL_SRCS_linux_dll_tests_xe_hp_core_xehp}) compute-runtime-22.14.22890/opencl/test/unit_test/xe_hp_core/xehp/linux/dll/device_id_tests_xehp.cpp000066400000000000000000000046451422164147700335160ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/fixtures/linux/device_id_fixture.h" using namespace NEO; TEST_F(DeviceIdTests, GivenXeHpSdvSupportedDeviceIdThenConfigIsCorrect) { std::array expectedDescriptors = {{{0x0201, &XE_HP_SDV_CONFIG::hwInfo, &XE_HP_SDV_CONFIG::setupHardwareInfo}, {0x0202, &XE_HP_SDV_CONFIG::hwInfo, &XE_HP_SDV_CONFIG::setupHardwareInfo}, {0x0203, &XE_HP_SDV_CONFIG::hwInfo, &XE_HP_SDV_CONFIG::setupHardwareInfo}, {0x0204, &XE_HP_SDV_CONFIG::hwInfo, &XE_HP_SDV_CONFIG::setupHardwareInfo}, {0x0205, &XE_HP_SDV_CONFIG::hwInfo, &XE_HP_SDV_CONFIG::setupHardwareInfo}, {0x0206, &XE_HP_SDV_CONFIG::hwInfo, &XE_HP_SDV_CONFIG::setupHardwareInfo}, {0x0207, &XE_HP_SDV_CONFIG::hwInfo, &XE_HP_SDV_CONFIG::setupHardwareInfo}, {0x0208, &XE_HP_SDV_CONFIG::hwInfo, &XE_HP_SDV_CONFIG::setupHardwareInfo}, {0x0209, &XE_HP_SDV_CONFIG::hwInfo, &XE_HP_SDV_CONFIG::setupHardwareInfo}, {0x020A, &XE_HP_SDV_CONFIG::hwInfo, &XE_HP_SDV_CONFIG::setupHardwareInfo}, {0x020B, &XE_HP_SDV_CONFIG::hwInfo, &XE_HP_SDV_CONFIG::setupHardwareInfo}, {0x020C, &XE_HP_SDV_CONFIG::hwInfo, &XE_HP_SDV_CONFIG::setupHardwareInfo}, {0x020D, &XE_HP_SDV_CONFIG::hwInfo, &XE_HP_SDV_CONFIG::setupHardwareInfo}, {0x020E, &XE_HP_SDV_CONFIG::hwInfo, &XE_HP_SDV_CONFIG::setupHardwareInfo}, {0x020F, &XE_HP_SDV_CONFIG::hwInfo, &XE_HP_SDV_CONFIG::setupHardwareInfo}, {0x0210, &XE_HP_SDV_CONFIG::hwInfo, &XE_HP_SDV_CONFIG::setupHardwareInfo}}}; testImpl(expectedDescriptors); } hw_info_config_tests_xehp.cpp000066400000000000000000000031361422164147700337210ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/xe_hp_core/xehp/linux/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/os_interface.h" #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/libult/linux/drm_mock.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/os_interface/linux/hw_info_config_linux_tests.h" using namespace NEO; struct HwInfoConfigTestLinuxXeHp : HwInfoConfigTestLinux { void SetUp() override { HwInfoConfigTestLinux::SetUp(); drm = new DrmMock(*executionEnvironment->rootDeviceEnvironments[0]); osInterface->setDriverModel(std::unique_ptr(drm)); drm->storedDeviceID = 0x0201; } }; XEHPTEST_F(HwInfoConfigTestLinuxXeHp, WhenConfiguringHwInfoThenZeroIsReturned) { auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); } XEHPTEST_F(HwInfoConfigTestLinuxXeHp, GivenXeHpSdvWhenConfigureHardwareCustomThenKmdNotifyIsEnabled) { HwInfoConfig *hwInfoConfig = HwInfoConfig::get(productFamily); hwInfoConfig->configureHardwareCustom(&pInHwInfo, osInterface); EXPECT_TRUE(pInHwInfo.capabilityTable.kmdNotifyProperties.enableKmdNotify); EXPECT_EQ(150ll, pInHwInfo.capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds); EXPECT_TRUE(pInHwInfo.capabilityTable.kmdNotifyProperties.enableQuickKmdSleepForDirectSubmission); EXPECT_EQ(20ll, pInHwInfo.capabilityTable.kmdNotifyProperties.delayQuickKmdSleepForDirectSubmissionMicroseconds); } compute-runtime-22.14.22890/opencl/test/unit_test/xe_hp_core/xehp/memory_manager_tests_xehp.inl000066400000000000000000000020201422164147700326540ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/test_macros/test.h" using namespace NEO; using MemoryManagerTestsXeHP = ::testing::Test; XEHPTEST_F(MemoryManagerTestsXeHP, givenEnabledLocalMemoryWhenLinearStreamIsAllocatedInPreferredPoolThenLocalMemoryPoolIsUsed) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, true, executionEnvironment); auto allocation = memoryManager.allocateGraphicsMemoryInPreferredPool({mockRootDeviceIndex, MemoryConstants::pageSize, AllocationType::LINEAR_STREAM, mockDeviceBitfield}, nullptr); EXPECT_EQ(MemoryPool::LocalMemory, allocation->getMemoryPool()); EXPECT_TRUE(memoryManager.allocationInDevicePoolCreated); memoryManager.freeGraphicsMemory(allocation); } compute-runtime-22.14.22890/opencl/test/unit_test/xe_hp_core/xehp/sampler_tests_xehp.inl000066400000000000000000000033741422164147700313320ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include using namespace NEO; typedef Test XeHPSamplerTest; XEHPTEST_F(XeHPSamplerTest, givenXeHPSamplerWhenUsingDefaultFilteringAndAppendSamplerStateParamsThenDisableLowQualityFilter) { EXPECT_FALSE(DebugManager.flags.ForceSamplerLowFilteringPrecision.get()); typedef typename FamilyType::SAMPLER_STATE SAMPLER_STATE; auto state = FamilyType::cmdInitSamplerState; EXPECT_EQ(SAMPLER_STATE::LOW_QUALITY_FILTER_DISABLE, state.getLowQualityFilter()); HwInfoConfig::get(defaultHwInfo->platform.eProductFamily)->adjustSamplerState(&state, *defaultHwInfo); EXPECT_EQ(SAMPLER_STATE::LOW_QUALITY_FILTER_DISABLE, state.getLowQualityFilter()); } XEHPTEST_F(XeHPSamplerTest, givenXeHPSamplerWhenForcingLowQualityFilteringAndAppendSamplerStateParamsThenEnableLowQualityFilter) { DebugManagerStateRestore dbgRestore; DebugManager.flags.ForceSamplerLowFilteringPrecision.set(true); EXPECT_TRUE(DebugManager.flags.ForceSamplerLowFilteringPrecision.get()); typedef typename FamilyType::SAMPLER_STATE SAMPLER_STATE; auto state = FamilyType::cmdInitSamplerState; EXPECT_EQ(SAMPLER_STATE::LOW_QUALITY_FILTER_DISABLE, state.getLowQualityFilter()); HwInfoConfig::get(defaultHwInfo->platform.eProductFamily)->adjustSamplerState(&state, *defaultHwInfo); EXPECT_EQ(SAMPLER_STATE::LOW_QUALITY_FILTER_ENABLE, state.getLowQualityFilter()); } source_level_debugger_csr_tests_xehp.cpp000066400000000000000000000125551422164147700350130ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/xe_hp_core/xehp/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/source_level_debugger/source_level_debugger.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/helpers/dispatch_flags_helper.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/source_level_debugger/source_level_debugger_csr_tests.h" #include using CommandStreamReceiverWithActiveDebuggerXehpTest = CommandStreamReceiverWithActiveDebuggerTest; XEHPTEST_F(CommandStreamReceiverWithActiveDebuggerXehpTest, GivenASteppingAndActiveDebuggerAndWhenFlushTaskIsCalledThenAlwaysProgramStateBaseAddressAndGlobalSip) { using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; using MI_NOOP = typename FamilyType::MI_NOOP; hwInfo = platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0]->getMutableHardwareInfo(); const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo->platform.eProductFamily); hwInfo->platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVID::REVISION_A0, *hwInfo); auto mockCsr = createCSR(); mockCsr->overrideDispatchPolicy(DispatchMode::ImmediateDispatch); CommandQueueHw commandQueue(nullptr, device.get(), 0, false); auto &commandStream = commandQueue.getCS(4096u); auto &csrStream = mockCsr->getCS(0); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); void *buffer = alignedMalloc(MemoryConstants::pageSize, MemoryConstants::pageSize64k); std::unique_ptr allocation(new MockGraphicsAllocation(buffer, MemoryConstants::pageSize)); std::unique_ptr heap(new IndirectHeap(allocation.get())); auto &neoDevice = device->getDevice(); mockCsr->flushTask(commandStream, 0, heap.get(), heap.get(), heap.get(), 0, dispatchFlags, neoDevice); mockCsr->flushBatchedSubmissions(); auto noops = reinterpret_cast(commandStream.getSpace(8 * sizeof(MI_NOOP))); for (int i = 0; i < 8; i++) { noops[i] = FamilyType::cmdInitNoop; } mockCsr->flushTask(commandStream, 0, heap.get(), heap.get(), heap.get(), 0, dispatchFlags, neoDevice); auto sipAllocation = SipKernel::getSipKernel(neoDevice).getSipAllocation(); HardwareParse hwParser; hwParser.parseCommands(csrStream); hwParser.parseCommands(commandStream); auto itorStateBaseAddr = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); auto itorBbEnd = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); auto itorStateBaseAddr2 = find(std::next(itorStateBaseAddr), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), itorStateBaseAddr); ASSERT_NE(hwParser.cmdList.end(), itorStateBaseAddr2); auto itorGlobalSip1 = findMmio(itorStateBaseAddr, itorBbEnd, GlobalSipRegister::registerOffset); auto itorGlobalSip2 = findMmio(std::next(itorGlobalSip1), itorBbEnd, GlobalSipRegister::registerOffset); auto itorGlobalSip3 = findMmio(itorBbEnd, hwParser.cmdList.end(), GlobalSipRegister::registerOffset); auto itorGlobalSip4 = findMmio(std::next(itorGlobalSip3), hwParser.cmdList.end(), GlobalSipRegister::registerOffset); ASSERT_NE(hwParser.cmdList.end(), itorGlobalSip1); ASSERT_NE(hwParser.cmdList.end(), itorGlobalSip2); ASSERT_NE(hwParser.cmdList.end(), itorGlobalSip3); ASSERT_NE(hwParser.cmdList.end(), itorGlobalSip4); EXPECT_NE(itorGlobalSip1, itorGlobalSip2); auto expectedSipPosition = --itorBbEnd; EXPECT_EQ(expectedSipPosition, itorGlobalSip2); auto itorBbEnd2 = find(itorGlobalSip3, hwParser.cmdList.end()); expectedSipPosition = --itorBbEnd2; EXPECT_EQ(expectedSipPosition, itorGlobalSip4); MI_LOAD_REGISTER_IMM *globalSip = genCmdCast(*itorGlobalSip1); auto sipAddress = globalSip->getDataDword(); EXPECT_EQ(sipAllocation->getGpuAddressToPatch(), sipAddress & 0xfffffff8); globalSip = genCmdCast(*itorGlobalSip2); auto sipAddress2 = globalSip->getDataDword(); EXPECT_EQ(0u, sipAddress2); globalSip = genCmdCast(*itorGlobalSip3); sipAddress = globalSip->getDataDword(); EXPECT_EQ(sipAllocation->getGpuAddressToPatch(), sipAddress & 0xfffffff8); globalSip = genCmdCast(*itorGlobalSip4); sipAddress2 = globalSip->getDataDword(); EXPECT_EQ(0u, sipAddress2); alignedFree(buffer); } compute-runtime-22.14.22890/opencl/test/unit_test/xe_hp_core/xehp/test_buffer_xe_hp_sdv.inl000066400000000000000000000331121422164147700317610ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/implicit_scaling.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_platform.h" using XeHpSdvBufferTests = ::testing::Test; XEHPTEST_F(XeHpSdvBufferTests, givenContextTypeDefaultWhenBufferIsWritableAndOnlyOneTileIsAvailableThenRemainFlagsToTrue) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(1); initPlatform(); EXPECT_EQ(0u, platform()->getClDevice(0)->getNumGenericSubDevices()); using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; MockContext context(platform()->getClDevice(0)); context.contextType = ContextType::CONTEXT_TYPE_DEFAULT; size_t size = 0x1000; auto retVal = CL_SUCCESS; auto buffer = std::unique_ptr( Buffer::create( &context, CL_MEM_READ_WRITE, size, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); RENDER_SURFACE_STATE surfaceState = FamilyType::cmdInitRenderSurfaceState; EXPECT_TRUE(surfaceState.getDisableSupportForMultiGpuAtomics()); EXPECT_TRUE(surfaceState.getDisableSupportForMultiGpuPartialWrites()); surfaceState.setDisableSupportForMultiGpuAtomics(false); surfaceState.setDisableSupportForMultiGpuPartialWrites(false); buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, false); EXPECT_TRUE(surfaceState.getDisableSupportForMultiGpuAtomics()); EXPECT_TRUE(surfaceState.getDisableSupportForMultiGpuPartialWrites()); } XEHPTEST_F(XeHpSdvBufferTests, givenContextTypeDefaultWhenBufferIsWritableThenFlipPartialFlagsToFalse) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(4); initPlatform(); using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; MockContext context(platform()->getClDevice(0)); context.contextType = ContextType::CONTEXT_TYPE_DEFAULT; size_t size = 0x1000; auto retVal = CL_SUCCESS; auto buffer = std::unique_ptr( Buffer::create( &context, CL_MEM_READ_WRITE, size, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); RENDER_SURFACE_STATE surfaceState = FamilyType::cmdInitRenderSurfaceState; EXPECT_TRUE(surfaceState.getDisableSupportForMultiGpuAtomics()); EXPECT_TRUE(surfaceState.getDisableSupportForMultiGpuPartialWrites()); buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), true, true); EXPECT_FALSE(surfaceState.getDisableSupportForMultiGpuAtomics()); EXPECT_FALSE(surfaceState.getDisableSupportForMultiGpuPartialWrites()); } XEHPTEST_F(XeHpSdvBufferTests, givenContextTypeUnrestrictiveWhenBufferIsWritableThenFlipPartialFlagsToFalse) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(4); initPlatform(); using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; MockContext context(platform()->getClDevice(0)); context.contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE; size_t size = 0x1000; auto retVal = CL_SUCCESS; auto buffer = std::unique_ptr( Buffer::create( &context, CL_MEM_READ_WRITE, size, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); RENDER_SURFACE_STATE surfaceState = FamilyType::cmdInitRenderSurfaceState; EXPECT_TRUE(surfaceState.getDisableSupportForMultiGpuAtomics()); EXPECT_TRUE(surfaceState.getDisableSupportForMultiGpuPartialWrites()); buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), true, true); EXPECT_FALSE(surfaceState.getDisableSupportForMultiGpuAtomics()); EXPECT_FALSE(surfaceState.getDisableSupportForMultiGpuPartialWrites()); } XEHPTEST_F(XeHpSdvBufferTests, givenContextTypeDefaultWhenBufferIsNotWritableThenRemainPartialFlagsToTrue) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; MockContext context; context.contextType = ContextType::CONTEXT_TYPE_DEFAULT; size_t size = 0x1000; auto retVal = CL_SUCCESS; auto buffer = std::unique_ptr(Buffer::create( &context, CL_MEM_READ_ONLY, size, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); RENDER_SURFACE_STATE surfaceState = FamilyType::cmdInitRenderSurfaceState; EXPECT_TRUE(surfaceState.getDisableSupportForMultiGpuAtomics()); EXPECT_TRUE(surfaceState.getDisableSupportForMultiGpuPartialWrites()); surfaceState.setDisableSupportForMultiGpuAtomics(false); surfaceState.setDisableSupportForMultiGpuPartialWrites(false); buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), true, false); EXPECT_TRUE(surfaceState.getDisableSupportForMultiGpuAtomics()); EXPECT_TRUE(surfaceState.getDisableSupportForMultiGpuPartialWrites()); } XEHPTEST_F(XeHpSdvBufferTests, givenContextTypeSpecializedWhenBufferIsWritableThenRemainPartialFlagsToTrue) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; MockContext context; context.contextType = ContextType::CONTEXT_TYPE_SPECIALIZED; size_t size = 0x1000; auto retVal = CL_SUCCESS; auto buffer = std::unique_ptr(Buffer::create( &context, CL_MEM_READ_WRITE, size, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); RENDER_SURFACE_STATE surfaceState = FamilyType::cmdInitRenderSurfaceState; EXPECT_TRUE(surfaceState.getDisableSupportForMultiGpuAtomics()); EXPECT_TRUE(surfaceState.getDisableSupportForMultiGpuPartialWrites()); surfaceState.setDisableSupportForMultiGpuAtomics(false); surfaceState.setDisableSupportForMultiGpuPartialWrites(false); buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, false); EXPECT_TRUE(surfaceState.getDisableSupportForMultiGpuAtomics()); EXPECT_TRUE(surfaceState.getDisableSupportForMultiGpuPartialWrites()); } XEHPTEST_F(XeHpSdvBufferTests, givenDebugFlagForMultiTileSupportWhenSurfaceStateIsSetThenValuesMatch) { DebugManagerStateRestore restore; using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; MockContext context; context.contextType = ContextType::CONTEXT_TYPE_SPECIALIZED; size_t size = 0x1000; auto retVal = CL_SUCCESS; auto buffer = std::unique_ptr(Buffer::create( &context, CL_MEM_READ_WRITE, size, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); RENDER_SURFACE_STATE surfaceState = FamilyType::cmdInitRenderSurfaceState; DebugManager.flags.ForceMultiGpuAtomics.set(0); DebugManager.flags.ForceMultiGpuPartialWrites.set(0); buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, false); EXPECT_EQ(0u, surfaceState.getDisableSupportForMultiGpuAtomics()); EXPECT_EQ(0u, surfaceState.getDisableSupportForMultiGpuPartialWrites()); DebugManager.flags.ForceMultiGpuAtomics.set(1); DebugManager.flags.ForceMultiGpuPartialWrites.set(1); buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, false); EXPECT_EQ(1u, surfaceState.getDisableSupportForMultiGpuAtomics()); EXPECT_EQ(1u, surfaceState.getDisableSupportForMultiGpuPartialWrites()); } XEHPTEST_F(XeHpSdvBufferTests, givenNullContextWhenBufferAllocationIsNullThenRemainPartialFlagsToTrue) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = FamilyType::cmdInitRenderSurfaceState; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); auto size = MemoryConstants::pageSize; auto ptr = alignedMalloc(size, MemoryConstants::pageSize); surfaceState.setDisableSupportForMultiGpuAtomics(false); surfaceState.setDisableSupportForMultiGpuPartialWrites(false); Buffer::setSurfaceState(device.get(), &surfaceState, false, false, size, ptr, 0, nullptr, 0, 0, false, false); EXPECT_TRUE(surfaceState.getDisableSupportForMultiGpuAtomics()); EXPECT_TRUE(surfaceState.getDisableSupportForMultiGpuPartialWrites()); alignedFree(ptr); } struct MultiGpuGlobalAtomicsBufferTest : public XeHpSdvBufferTests, public ::testing::WithParamInterface> { }; XEHPTEST_P(MultiGpuGlobalAtomicsBufferTest, givenSetArgStatefulCalledThenDisableSupportForMultiGpuAtomicsIsSetCorrectly) { unsigned int numAvailableDevices, bufferFlags; bool useGlobalAtomics, areMultipleSubDevicesInContext, enableMultiGpuAtomicsOptimization; std::tie(numAvailableDevices, bufferFlags, useGlobalAtomics, areMultipleSubDevicesInContext, enableMultiGpuAtomicsOptimization) = GetParam(); DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(numAvailableDevices); DebugManager.flags.EnableMultiGpuAtomicsOptimization.set(enableMultiGpuAtomicsOptimization); initPlatform(); if (numAvailableDevices == 1) { EXPECT_EQ(0u, platform()->getClDevice(0)->getNumGenericSubDevices()); } else { EXPECT_EQ(numAvailableDevices, platform()->getClDevice(0)->getNumGenericSubDevices()); } using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; MockContext context(platform()->getClDevice(0)); context.contextType = ContextType::CONTEXT_TYPE_DEFAULT; size_t size = 0x1000; auto retVal = CL_SUCCESS; auto buffer = std::unique_ptr( Buffer::create( &context, bufferFlags, size, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); RENDER_SURFACE_STATE surfaceState = FamilyType::cmdInitRenderSurfaceState; EXPECT_TRUE(surfaceState.getDisableSupportForMultiGpuAtomics()); surfaceState.setDisableSupportForMultiGpuAtomics(false); buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), useGlobalAtomics, areMultipleSubDevicesInContext); DeviceBitfield deviceBitfield{static_cast(maxNBitValue(numAvailableDevices))}; bool implicitScaling = ImplicitScalingHelper::isImplicitScalingEnabled(deviceBitfield, true); bool enabled = implicitScaling; if (enableMultiGpuAtomicsOptimization) { enabled = useGlobalAtomics && (enabled || areMultipleSubDevicesInContext); } EXPECT_EQ(!enabled, surfaceState.getDisableSupportForMultiGpuAtomics()); } XEHPTEST_P(MultiGpuGlobalAtomicsBufferTest, givenSetSurfaceStateCalledThenDisableSupportForMultiGpuAtomicsIsSetCorrectly) { unsigned int numAvailableDevices, bufferFlags; bool useGlobalAtomics, areMultipleSubDevicesInContext, enableMultiGpuAtomicsOptimization; std::tie(numAvailableDevices, bufferFlags, useGlobalAtomics, areMultipleSubDevicesInContext, enableMultiGpuAtomicsOptimization) = GetParam(); DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(numAvailableDevices); DebugManager.flags.EnableMultiGpuAtomicsOptimization.set(enableMultiGpuAtomicsOptimization); initPlatform(); if (numAvailableDevices == 1) { EXPECT_EQ(0u, platform()->getClDevice(0)->getNumGenericSubDevices()); } else { EXPECT_EQ(numAvailableDevices, platform()->getClDevice(0)->getNumGenericSubDevices()); } auto size = MemoryConstants::pageSize; auto ptr = alignedMalloc(size, MemoryConstants::pageSize); MockGraphicsAllocation gfxAllocation(ptr, size); gfxAllocation.setMemObjectsAllocationWithWritableFlags(bufferFlags == CL_MEM_READ_WRITE); using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = FamilyType::cmdInitRenderSurfaceState; EXPECT_TRUE(surfaceState.getDisableSupportForMultiGpuAtomics()); surfaceState.setDisableSupportForMultiGpuAtomics(false); Buffer::setSurfaceState(&platform()->getClDevice(0)->getDevice(), &surfaceState, false, false, 0, nullptr, 0, &gfxAllocation, bufferFlags, 0, useGlobalAtomics, areMultipleSubDevicesInContext); DeviceBitfield deviceBitfield{static_cast(maxNBitValue(numAvailableDevices))}; bool implicitScaling = ImplicitScalingHelper::isImplicitScalingEnabled(deviceBitfield, true); bool enabled = implicitScaling; if (enableMultiGpuAtomicsOptimization) { enabled = useGlobalAtomics && (enabled || areMultipleSubDevicesInContext); } EXPECT_EQ(!enabled, surfaceState.getDisableSupportForMultiGpuAtomics()); alignedFree(ptr); } static unsigned int numAvailableDevices[] = {1, 2}; static unsigned int bufferFlags[] = {CL_MEM_READ_ONLY, CL_MEM_READ_WRITE}; INSTANTIATE_TEST_CASE_P(MultiGpuGlobalAtomicsBufferTest, MultiGpuGlobalAtomicsBufferTest, ::testing::Combine( ::testing::ValuesIn(numAvailableDevices), ::testing::ValuesIn(bufferFlags), ::testing::Bool(), ::testing::Bool(), ::testing::Bool()));test_command_stream_receiver_xehp.inl000066400000000000000000000154001422164147700342730ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/xe_hp_core/xehp/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "gtest/gtest.h" using namespace NEO; class CommandStreamReceiverHwTestWithLocalMemory : public ClDeviceFixture, public HardwareParse, public ::testing::Test { public: void SetUp() override { dbgRestore = std::make_unique(); DebugManager.flags.EnableLocalMemory.set(1); ClDeviceFixture::SetUp(); HardwareParse::SetUp(); } void TearDown() override { HardwareParse::TearDown(); ClDeviceFixture::TearDown(); } private: std::unique_ptr dbgRestore; }; XEHPTEST_F(CommandStreamReceiverHwTestWithLocalMemory, givenUseClearColorAllocationForBlitterIsNotSetWhenCallingGetClearColorAllocationThenClearAllocationIsNotCreated) { DebugManagerStateRestore restore; DebugManager.flags.UseClearColorAllocationForBlitter.set(false); MockCsrHw commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); commandStreamReceiver.setupContext(pDevice->getGpgpuCommandStreamReceiver().getOsContext()); auto gfxAllocation = commandStreamReceiver.getClearColorAllocation(); EXPECT_EQ(nullptr, gfxAllocation); } XEHPTEST_F(CommandStreamReceiverHwTestWithLocalMemory, givenUseClearColorAllocationForBlitterIsSetWhenCallingGetClearColorAllocationThenClearAllocationIsCreated) { DebugManagerStateRestore restore; DebugManager.flags.UseClearColorAllocationForBlitter.set(true); MockCsrHw commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); commandStreamReceiver.setupContext(pDevice->getGpgpuCommandStreamReceiver().getOsContext()); auto gfxAllocation = commandStreamReceiver.getClearColorAllocation(); ASSERT_NE(nullptr, gfxAllocation); EXPECT_TRUE(gfxAllocation->storageInfo.readOnlyMultiStorage); } XEHPTEST_F(CommandStreamReceiverHwTestWithLocalMemory, givenUseClearColorAllocationForBlitterIsSetWhenClearColorAllocationIsAlreadyCreatedThenCallingGetClearColorAllocationReturnsAlreadyCreatedAllocation) { DebugManagerStateRestore restore; DebugManager.flags.UseClearColorAllocationForBlitter.set(true); MockCsrHw commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); commandStreamReceiver.setupContext(pDevice->getGpgpuCommandStreamReceiver().getOsContext()); auto mockAllocation = std::make_unique(); auto expectedResult = mockAllocation.get(); commandStreamReceiver.clearColorAllocation = mockAllocation.release(); auto gfxAllocation = commandStreamReceiver.getClearColorAllocation(); EXPECT_EQ(expectedResult, gfxAllocation); } template struct MockCsrHwWithRace : public MockCsrHw { MockCsrHwWithRace() = delete; MockCsrHwWithRace(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield) : MockCsrHw(executionEnvironment, rootDeviceIndex, deviceBitfield) { mockGraphicsAllocation.reset(new MockGraphicsAllocation()); } std::unique_lock obtainUniqueOwnership() override { if (raceLost) { this->clearColorAllocation = mockGraphicsAllocation.release(); } return MockCsrHw::obtainUniqueOwnership(); } bool raceLost = false; std::unique_ptr mockGraphicsAllocation; }; XEHPTEST_F(CommandStreamReceiverHwTestWithLocalMemory, givenUseClearColorAllocationForBlitterIsSetWhenCallingGetClearColorAllocationAndRaceIsWonThenClearAllocationIsCreatedInCurrentThread) { DebugManagerStateRestore restore; DebugManager.flags.UseClearColorAllocationForBlitter.set(true); MockCsrHwWithRace commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); commandStreamReceiver.setupContext(pDevice->getGpgpuCommandStreamReceiver().getOsContext()); auto gfxAllocation = commandStreamReceiver.getClearColorAllocation(); EXPECT_EQ(commandStreamReceiver.clearColorAllocation, gfxAllocation); EXPECT_NE(commandStreamReceiver.mockGraphicsAllocation.get(), gfxAllocation); } XEHPTEST_F(CommandStreamReceiverHwTestWithLocalMemory, givenUseClearColorAllocationForBlitterIsSetWhenCallingGetClearColorAllocationAndRaceIsLostThenClearAllocationIsNotCreatedInCurrentThread) { DebugManagerStateRestore restore; DebugManager.flags.UseClearColorAllocationForBlitter.set(true); MockCsrHwWithRace commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); commandStreamReceiver.setupContext(pDevice->getGpgpuCommandStreamReceiver().getOsContext()); commandStreamReceiver.raceLost = true; auto expectedClearColorAllocation = commandStreamReceiver.mockGraphicsAllocation.get(); auto gfxAllocation = commandStreamReceiver.getClearColorAllocation(); EXPECT_EQ(commandStreamReceiver.clearColorAllocation, gfxAllocation); EXPECT_EQ(expectedClearColorAllocation, gfxAllocation); } XEHPTEST_F(CommandStreamReceiverHwTestWithLocalMemory, givenEnableStatelessCompressionWhenCallingGetMemoryCompressionStateThenReturnCorrectValue) { DebugManagerStateRestore restore; CommandStreamReceiverHw commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); DebugManager.flags.EnableStatelessCompression.set(0); for (bool auxTranslationRequired : {false, true}) { auto memoryCompressionState = commandStreamReceiver.getMemoryCompressionState(auxTranslationRequired, pDevice->getHardwareInfo()); EXPECT_EQ(MemoryCompressionState::NotApplicable, memoryCompressionState); } DebugManager.flags.EnableStatelessCompression.set(1); for (bool auxTranslationRequired : {false, true}) { auto memoryCompressionState = commandStreamReceiver.getMemoryCompressionState(auxTranslationRequired, pDevice->getHardwareInfo()); if (auxTranslationRequired) { EXPECT_EQ(MemoryCompressionState::Disabled, memoryCompressionState); } else { EXPECT_EQ(MemoryCompressionState::Enabled, memoryCompressionState); } } } compute-runtime-22.14.22890/opencl/test/unit_test/xe_hp_core/xehp/test_device_caps_xehp.inl000066400000000000000000000024111422164147700317400ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_helper.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "gtest/gtest.h" using namespace NEO; typedef Test XeHPUsDeviceIdTest; XEHPTEST_F(XeHPUsDeviceIdTest, WhenGettingHardwareInfoThenProductFamilyIsXeHpSdv) { EXPECT_EQ(IGFX_XE_HP_SDV, pDevice->getHardwareInfo().platform.eProductFamily); } XEHPTEST_F(XeHPUsDeviceIdTest, givenXeHPWhenCheckftr64KBpagesThenTrue) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.ftr64KBpages); } XEHPTEST_F(XeHPUsDeviceIdTest, WheCheckingIsSimulationThenFalseIsReturned) { EXPECT_FALSE(pDevice->isSimulation()); } XEHPTEST_F(XeHPUsDeviceIdTest, givenXeHPSkusThenItSupportCorrectlyRoundedDivSqrtBit) { EXPECT_TRUE(pClDevice->getHardwareInfo().capabilityTable.ftrSupports64BitMath); cl_device_fp_config actual = pClDevice->getDeviceInfo().singleFpConfig & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT; EXPECT_NE(0ull, actual); } XEHPTEST_F(XeHPUsDeviceIdTest, givenXeHPWhenCheckSupportCacheFlushAfterWalkerThenTrue) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.supportCacheFlushAfterWalker); } compute-runtime-22.14.22890/opencl/test/unit_test/xe_hp_core/xehp/test_hw_info_config_xehp.inl000066400000000000000000000144551422164147700324640ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/helpers/hw_helper_tests.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" using namespace NEO; using XeHPHwInfoConfig = ::testing::Test; XEHPTEST_F(XeHPHwInfoConfig, givenConfigStringWhenSettingUpHardwareThenThrow) { HardwareInfo hwInfo = *defaultHwInfo; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; uint64_t config = 0xdeadbeef; gtSystemInfo = {0}; EXPECT_ANY_THROW(hardwareInfoSetup[productFamily](&hwInfo, false, config)); EXPECT_EQ(0u, gtSystemInfo.SliceCount); EXPECT_EQ(0u, gtSystemInfo.SubSliceCount); EXPECT_EQ(0u, gtSystemInfo.DualSubSliceCount); EXPECT_EQ(0u, gtSystemInfo.EUCount); } XEHPTEST_F(XeHPHwInfoConfig, givenXeHPMultiConfigWhenConfigureHardwareCustomIsCalledThenCapabilityTableIsSetProperly) { auto hwInfoConfig = HwInfoConfig::get(productFamily); HardwareInfo hwInfo = *defaultHwInfo; hwInfo.featureTable.flags.ftrE2ECompression = true; hwInfo.gtSystemInfo.EUCount = 256u; hwInfoConfig->configureHardwareCustom(&hwInfo, nullptr); EXPECT_FALSE(hwInfo.capabilityTable.ftrRenderCompressedBuffers); EXPECT_FALSE(hwInfo.capabilityTable.ftrRenderCompressedImages); hwInfo.gtSystemInfo.EUCount = 512u; hwInfoConfig->configureHardwareCustom(&hwInfo, nullptr); EXPECT_TRUE(hwInfo.capabilityTable.ftrRenderCompressedBuffers); EXPECT_TRUE(hwInfo.capabilityTable.ftrRenderCompressedImages); } XEHPTEST_F(XeHPHwInfoConfig, givenXeHPWhenConfiguringThenDisableRcs) { auto hwInfoConfig = HwInfoConfig::get(productFamily); HardwareInfo hwInfo = *defaultHwInfo; hwInfoConfig->configureHardwareCustom(&hwInfo, nullptr); EXPECT_FALSE(hwInfo.featureTable.flags.ftrRcsNode); } XEHPTEST_F(XeHPHwInfoConfig, givenDebugVariableSetWhenConfiguringThenEnableRcs) { DebugManagerStateRestore restore; DebugManager.flags.NodeOrdinal.set(static_cast(aub_stream::EngineType::ENGINE_RCS)); auto hwInfoConfig = HwInfoConfig::get(productFamily); HardwareInfo hwInfo = *defaultHwInfo; hwInfoConfig->configureHardwareCustom(&hwInfo, nullptr); EXPECT_TRUE(hwInfo.featureTable.flags.ftrRcsNode); } XEHPTEST_F(XeHPHwInfoConfig, givenXeHpWhenCallingGetDeviceMemoryNameThenHbmIsReturned) { auto hwInfoConfig = HwInfoConfig::get(productFamily); auto deviceMemoryName = hwInfoConfig->getDeviceMemoryName(); EXPECT_TRUE(hasSubstr(deviceMemoryName, std::string("HBM"))); } XEHPTEST_F(XeHPHwInfoConfig, givenA0OrA1SteppingWhenAskingIfExtraParametersAreInvalidThenReturnTrue) { auto hwInfoConfig = HwInfoConfig::get(productFamily); std::array, 4> revisions = { {{REVISION_A0, true}, {REVISION_A1, true}, {REVISION_B, false}, {REVISION_C, false}}}; for (const auto &[revision, paramBool] : revisions) { auto hwInfo = *defaultHwInfo; hwInfo.platform.usRevId = hwInfoConfig->getHwRevIdFromStepping(revision, hwInfo); hwInfoConfig->configureHardwareCustom(&hwInfo, nullptr); EXPECT_EQ(paramBool, hwInfoConfig->extraParametersInvalid(hwInfo)); } } using XeHPHwHelperTest = HwHelperTest; XEHPTEST_F(XeHPHwHelperTest, givenXeHPMultiConfigWhenAllowCompressionIsCalledThenCorrectValueIsReturned) { auto hwInfoConfig = HwInfoConfig::get(productFamily); HardwareInfo hwInfo = *defaultHwInfo; hwInfo.gtSystemInfo.EUCount = 512u; EXPECT_TRUE(hwInfoConfig->allowCompression(hwInfo)); hwInfo.gtSystemInfo.EUCount = 256u; EXPECT_FALSE(hwInfoConfig->allowCompression(hwInfo)); } XEHPTEST_F(XeHPHwInfoConfig, givenHwInfoConfigWhenAdditionalKernelExecInfoSupportCheckedThenCorrectValueIsReturned) { const auto &hwInfoConfig = *HwInfoConfig::get(productFamily); auto hwInfo = *defaultHwInfo; hwInfo.platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_A0, hwInfo); EXPECT_FALSE(hwInfoConfig.isDisableOverdispatchAvailable(hwInfo)); hwInfo.platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_B, hwInfo); EXPECT_TRUE(hwInfoConfig.isDisableOverdispatchAvailable(hwInfo)); } XEHPTEST_F(XeHPHwInfoConfig, givenHwInfoConfigWithMultipleCSSWhenIsPipeControlPriorToNonPipelinedStateCommandsWARequiredIsCalledThenTrueIsReturned) { const auto &hwInfoConfig = *HwInfoConfig::get(productFamily); auto hwInfo = *defaultHwInfo; hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 2; auto isRcs = false; const auto &[isBasicWARequired, isExtendedWARequired] = hwInfoConfig.isPipeControlPriorToNonPipelinedStateCommandsWARequired(hwInfo, isRcs); EXPECT_TRUE(isExtendedWARequired); EXPECT_TRUE(isBasicWARequired); } XEHPTEST_F(XeHPHwInfoConfig, givenProgramExtendedPipeControlPriorToNonPipelinedStateCommandEnabledWhenIsPipeControlPriorToNonPipelinedStateCommandsWARequiredIsCalledThenTrueIsReturned) { DebugManagerStateRestore restorer; DebugManager.flags.ProgramExtendedPipeControlPriorToNonPipelinedStateCommand.set(true); const auto &hwInfoConfig = *HwInfoConfig::get(productFamily); auto hwInfo = *defaultHwInfo; auto isRcs = false; const auto &[isBasicWARequired, isExtendedWARequired] = hwInfoConfig.isPipeControlPriorToNonPipelinedStateCommandsWARequired(hwInfo, isRcs); EXPECT_TRUE(isExtendedWARequired); EXPECT_TRUE(isBasicWARequired); } XEHPTEST_F(XeHPHwInfoConfig, givenProgramExtendedPipeControlPriorToNonPipelinedStateCommandDisabledWhenIsPipeControlPriorToNonPipelinedStateCommandsWARequiredIsCalledThenFalseIsReturned) { DebugManagerStateRestore restorer; DebugManager.flags.ProgramExtendedPipeControlPriorToNonPipelinedStateCommand.set(0); const auto &hwInfoConfig = *HwInfoConfig::get(productFamily); auto hwInfo = *defaultHwInfo; auto isRcs = false; const auto &[isBasicWARequired, isExtendedWARequired] = hwInfoConfig.isPipeControlPriorToNonPipelinedStateCommandsWARequired(hwInfo, isRcs); EXPECT_FALSE(isExtendedWARequired); EXPECT_TRUE(isBasicWARequired); } compute-runtime-22.14.22890/opencl/test/unit_test/xe_hp_core/xehp/test_image_xe_hp_sdv.inl000066400000000000000000000371471422164147700316060ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/mem_obj/image_compression_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_platform.h" using XeHpSdvImageTests = ::testing::Test; XEHPTEST_F(XeHpSdvImageTests, givenContextTypeDefaultWhenImageIsWritableAndOnlyOneTileIsAvailableThenRemainFlagsToTrue) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(1); initPlatform(); EXPECT_EQ(0u, platform()->getClDevice(0)->getNumGenericSubDevices()); using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; MockContext context(platform()->getClDevice(0)); context.contextType = ContextType::CONTEXT_TYPE_DEFAULT; cl_int retVal = CL_SUCCESS; cl_image_format imageFormat = {}; cl_image_desc imageDesc = {}; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_RGBA; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_height = 128; imageDesc.image_width = 256; auto surfaceFormat = Image::getSurfaceFormatFromTable( CL_MEM_READ_WRITE, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto image = std::unique_ptr(Image::create( &context, ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_READ_WRITE, 0, 0, &context.getDevice(0)->getDevice()), CL_MEM_READ_WRITE, 0, surfaceFormat, &imageDesc, NULL, retVal)); auto imageHw = static_cast *>(image.get()); RENDER_SURFACE_STATE surfaceState = FamilyType::cmdInitRenderSurfaceState; EXPECT_TRUE(surfaceState.getDisableSupportForMultiGpuAtomics()); EXPECT_TRUE(surfaceState.getDisableSupportForMultiGpuPartialWrites()); surfaceState.setDisableSupportForMultiGpuAtomics(false); surfaceState.setDisableSupportForMultiGpuPartialWrites(false); imageHw->appendSurfaceStateParams(&surfaceState, context.getDevice(0)->getRootDeviceIndex(), true); EXPECT_TRUE(surfaceState.getDisableSupportForMultiGpuAtomics()); EXPECT_TRUE(surfaceState.getDisableSupportForMultiGpuPartialWrites()); } XEHPTEST_F(XeHpSdvImageTests, givenContextTypeDefaultWhenImageIsWritableThenFlipPartialFlagsToFalse) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(4); initPlatform(); using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; MockContext context(platform()->getClDevice(0)); context.contextType = ContextType::CONTEXT_TYPE_DEFAULT; cl_int retVal = CL_SUCCESS; cl_image_format imageFormat = {}; cl_image_desc imageDesc = {}; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_RGBA; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_height = 128; imageDesc.image_width = 256; auto surfaceFormat = Image::getSurfaceFormatFromTable( CL_MEM_READ_WRITE, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto image = std::unique_ptr(Image::create( &context, ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_READ_WRITE, 0, 0, &context.getDevice(0)->getDevice()), CL_MEM_READ_WRITE, 0, surfaceFormat, &imageDesc, NULL, retVal)); auto imageHw = static_cast *>(image.get()); RENDER_SURFACE_STATE surfaceState = FamilyType::cmdInitRenderSurfaceState; EXPECT_TRUE(surfaceState.getDisableSupportForMultiGpuAtomics()); EXPECT_TRUE(surfaceState.getDisableSupportForMultiGpuPartialWrites()); imageHw->appendSurfaceStateParams(&surfaceState, context.getDevice(0)->getRootDeviceIndex(), true); EXPECT_FALSE(surfaceState.getDisableSupportForMultiGpuAtomics()); EXPECT_FALSE(surfaceState.getDisableSupportForMultiGpuPartialWrites()); } XEHPTEST_F(XeHpSdvImageTests, givenDebugFlagForMultiTileSupportWhenSurfaceStateIsProgrammedThenItHasDesiredValues) { DebugManagerStateRestore restorer; using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; MockContext context; context.contextType = ContextType::CONTEXT_TYPE_DEFAULT; cl_int retVal = CL_SUCCESS; cl_image_format imageFormat = {}; cl_image_desc imageDesc = {}; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_RGBA; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_height = 128; imageDesc.image_width = 256; auto surfaceFormat = Image::getSurfaceFormatFromTable( CL_MEM_READ_ONLY, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto image = std::unique_ptr(Image::create( &context, ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_READ_WRITE, 0, 0, &context.getDevice(0)->getDevice()), CL_MEM_READ_WRITE, 0, surfaceFormat, &imageDesc, NULL, retVal)); auto imageHw = static_cast *>(image.get()); RENDER_SURFACE_STATE surfaceState = FamilyType::cmdInitRenderSurfaceState; DebugManager.flags.ForceMultiGpuAtomics.set(0); DebugManager.flags.ForceMultiGpuPartialWrites.set(0); imageHw->appendSurfaceStateParams(&surfaceState, context.getDevice(0)->getRootDeviceIndex(), true); EXPECT_EQ(0u, surfaceState.getDisableSupportForMultiGpuAtomics()); EXPECT_EQ(0u, surfaceState.getDisableSupportForMultiGpuPartialWrites()); DebugManager.flags.ForceMultiGpuAtomics.set(1); DebugManager.flags.ForceMultiGpuPartialWrites.set(1); imageHw->appendSurfaceStateParams(&surfaceState, context.getDevice(0)->getRootDeviceIndex(), true); EXPECT_EQ(1u, surfaceState.getDisableSupportForMultiGpuAtomics()); EXPECT_EQ(1u, surfaceState.getDisableSupportForMultiGpuPartialWrites()); } XEHPTEST_F(XeHpSdvImageTests, givenContextTypeUnrestrictiveWhenImageIsWritableThenFlipPartialFlagsToFalse) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(4); initPlatform(); using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; MockContext context(platform()->getClDevice(0)); context.contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE; cl_int retVal = CL_SUCCESS; cl_image_format imageFormat = {}; cl_image_desc imageDesc = {}; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_RGBA; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_height = 128; imageDesc.image_width = 256; auto surfaceFormat = Image::getSurfaceFormatFromTable( CL_MEM_READ_WRITE, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto image = std::unique_ptr(Image::create( &context, ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_READ_WRITE, 0, 0, &context.getDevice(0)->getDevice()), CL_MEM_READ_WRITE, 0, surfaceFormat, &imageDesc, NULL, retVal)); auto imageHw = static_cast *>(image.get()); RENDER_SURFACE_STATE surfaceState = FamilyType::cmdInitRenderSurfaceState; EXPECT_TRUE(surfaceState.getDisableSupportForMultiGpuAtomics()); EXPECT_TRUE(surfaceState.getDisableSupportForMultiGpuPartialWrites()); imageHw->appendSurfaceStateParams(&surfaceState, context.getDevice(0)->getRootDeviceIndex(), true); EXPECT_FALSE(surfaceState.getDisableSupportForMultiGpuAtomics()); EXPECT_FALSE(surfaceState.getDisableSupportForMultiGpuPartialWrites()); } XEHPTEST_F(XeHpSdvImageTests, givenContextTypeDefaultWhenImageIsNotWritableThenRemainPartialFlagsToTrue) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; MockContext context; context.contextType = ContextType::CONTEXT_TYPE_DEFAULT; cl_int retVal = CL_SUCCESS; cl_image_format imageFormat = {}; cl_image_desc imageDesc = {}; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_RGBA; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_height = 128; imageDesc.image_width = 256; auto surfaceFormat = Image::getSurfaceFormatFromTable( CL_MEM_READ_ONLY, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto image = std::unique_ptr(Image::create( &context, ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_READ_WRITE, 0, 0, &context.getDevice(0)->getDevice()), CL_MEM_READ_WRITE, 0, surfaceFormat, &imageDesc, NULL, retVal)); auto imageHw = static_cast *>(image.get()); RENDER_SURFACE_STATE surfaceState = FamilyType::cmdInitRenderSurfaceState; EXPECT_TRUE(surfaceState.getDisableSupportForMultiGpuAtomics()); EXPECT_TRUE(surfaceState.getDisableSupportForMultiGpuPartialWrites()); surfaceState.setDisableSupportForMultiGpuAtomics(false); surfaceState.setDisableSupportForMultiGpuPartialWrites(false); imageHw->appendSurfaceStateParams(&surfaceState, context.getDevice(0)->getRootDeviceIndex(), true); EXPECT_TRUE(surfaceState.getDisableSupportForMultiGpuAtomics()); EXPECT_TRUE(surfaceState.getDisableSupportForMultiGpuPartialWrites()); } XEHPTEST_F(XeHpSdvImageTests, givenContextTypeSpecializedWhenImageIsWritableThenRemainPartialFlagsToTrue) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; MockContext context; context.contextType = ContextType::CONTEXT_TYPE_SPECIALIZED; cl_int retVal = CL_SUCCESS; cl_image_format imageFormat = {}; cl_image_desc imageDesc = {}; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_RGBA; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_height = 128; imageDesc.image_width = 256; auto surfaceFormat = Image::getSurfaceFormatFromTable( CL_MEM_READ_WRITE, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto image = std::unique_ptr(Image::create( &context, ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_READ_WRITE, 0, 0, &context.getDevice(0)->getDevice()), CL_MEM_READ_WRITE, 0, surfaceFormat, &imageDesc, NULL, retVal)); auto imageHw = static_cast *>(image.get()); RENDER_SURFACE_STATE surfaceState = FamilyType::cmdInitRenderSurfaceState; EXPECT_TRUE(surfaceState.getDisableSupportForMultiGpuAtomics()); EXPECT_TRUE(surfaceState.getDisableSupportForMultiGpuPartialWrites()); surfaceState.setDisableSupportForMultiGpuAtomics(false); surfaceState.setDisableSupportForMultiGpuPartialWrites(false); imageHw->appendSurfaceStateParams(&surfaceState, context.getDevice(0)->getRootDeviceIndex(), true); EXPECT_TRUE(surfaceState.getDisableSupportForMultiGpuAtomics()); EXPECT_TRUE(surfaceState.getDisableSupportForMultiGpuPartialWrites()); } struct MultiGpuGlobalAtomicsImageTest : public XeHpSdvImageTests, public ::testing::WithParamInterface> { }; XEHPTEST_P(MultiGpuGlobalAtomicsImageTest, givenAppendSurfaceStateParamCalledThenDisableSupportForMultiGpuAtomicsIsSetCorrectly) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; unsigned int numAvailableSubDevices, memFlags; bool useGlobalAtomics, enableMultiGpuAtomicsOptimization; std::tie(numAvailableSubDevices, memFlags, useGlobalAtomics, enableMultiGpuAtomicsOptimization) = GetParam(); DebugManagerStateRestore restorer; DebugManager.flags.EnableMultiGpuAtomicsOptimization.set(enableMultiGpuAtomicsOptimization); UltClDeviceFactory deviceFactory{1, 2}; ClDeviceVector deviceVector; for (auto i = 0u; i < numAvailableSubDevices; i++) { deviceVector.push_back(deviceFactory.subDevices[i]); } if (deviceVector.empty()) { deviceVector.push_back(deviceFactory.rootDevices[0]); } MockContext context(deviceVector); cl_int retVal = CL_SUCCESS; cl_image_format imageFormat = {}; cl_image_desc imageDesc = {}; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_RGBA; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_height = 128; imageDesc.image_width = 256; auto surfaceFormat = Image::getSurfaceFormatFromTable( memFlags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto image = std::unique_ptr(Image::create( &context, ClMemoryPropertiesHelper::createMemoryProperties(memFlags, 0, 0, &context.getDevice(0)->getDevice()), memFlags, 0, surfaceFormat, &imageDesc, NULL, retVal)); auto imageHw = static_cast *>(image.get()); RENDER_SURFACE_STATE surfaceState = FamilyType::cmdInitRenderSurfaceState; EXPECT_TRUE(surfaceState.getDisableSupportForMultiGpuAtomics()); surfaceState.setDisableSupportForMultiGpuAtomics(false); surfaceState.setDisableSupportForMultiGpuPartialWrites(false); imageHw->appendSurfaceStateParams(&surfaceState, context.getDevice(0)->getRootDeviceIndex(), useGlobalAtomics); bool enableGlobalAtomics = numAvailableSubDevices != 1u; if (enableMultiGpuAtomicsOptimization) { enableGlobalAtomics &= useGlobalAtomics; } EXPECT_EQ(!enableGlobalAtomics, surfaceState.getDisableSupportForMultiGpuAtomics()); } static unsigned int numAvailableSubDevicesForMultiGpuGlobalAtomicsImageTest[] = {0, 1, 2}; static unsigned int memFlags[] = {CL_MEM_READ_ONLY, CL_MEM_READ_WRITE}; INSTANTIATE_TEST_CASE_P(MultiGpuGlobalAtomicsImageTest, MultiGpuGlobalAtomicsImageTest, ::testing::Combine( ::testing::ValuesIn(numAvailableSubDevicesForMultiGpuGlobalAtomicsImageTest), ::testing::ValuesIn(memFlags), ::testing::Bool(), ::testing::Bool())); XEHPTEST_F(ImageCompressionTests, givenXeHpCoreAndRedescribableFormatWhenCreatingAllocationThenDoNotPreferCompression) { MockContext context{}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 5; imageDesc.image_height = 5; auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto image = std::unique_ptr(Image::create( mockContext.get(), ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); ASSERT_NE(nullptr, image); EXPECT_EQ(UnitTestHelper::tiledImagesSupported, myMemoryManager->capturedPreferCompressed); imageFormat.image_channel_order = CL_RG; surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); image = std::unique_ptr(Image::create( mockContext.get(), ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); ASSERT_NE(nullptr, image); EXPECT_TRUE(myMemoryManager->capturedPreferCompressed); } compute-runtime-22.14.22890/opencl/test/unit_test/xe_hp_core/xehp/test_local_work_size_xehp.inl000066400000000000000000000064771422164147700327010ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" using namespace NEO; using XeHPComputeWorkgroupSizeTest = Test; XEHPTEST_F(XeHPComputeWorkgroupSizeTest, giveXeHpA0WhenKernelIsaIsBelowThresholdAndThereAreNoImageBarriersAndSlmThenSmallWorkgorupSizeIsSelected) { auto program = std::make_unique(toClDeviceVector(*pClDevice)); MockKernelWithInternals mockKernel(*pClDevice); auto &kernel = *mockKernel.mockKernel; kernel.initialize(); Vec3 elws{0, 0, 0}; Vec3 gws{1024, 1, 1}; Vec3 offset{0, 0, 0}; DispatchInfo dispatchInfo{pClDevice, &kernel, 1, gws, elws, offset}; auto maxWgSize = pClDevice->getSharedDeviceInfo().maxWorkGroupSize; auto &hwInfo = *pDevice->getRootDeviceEnvironment().getMutableHardwareInfo(); const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily); hwInfo.platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_A0, hwInfo); { auto expectedLws = computeWorkgroupSize(dispatchInfo); EXPECT_EQ(64u, expectedLws.x * expectedLws.y * expectedLws.z); EXPECT_EQ(64u, expectedLws.x); } mockKernel.mockKernel->slmTotalSize = 1000u; { auto expectedLws = computeWorkgroupSize(dispatchInfo); EXPECT_EQ(maxWgSize, expectedLws.x * expectedLws.y * expectedLws.z); EXPECT_EQ(maxWgSize, expectedLws.x); } mockKernel.mockKernel->slmTotalSize = 0u; mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.barrierCount = 1u; { auto expectedLws = computeWorkgroupSize(dispatchInfo); EXPECT_EQ(maxWgSize, expectedLws.x * expectedLws.y * expectedLws.z); EXPECT_EQ(maxWgSize, expectedLws.x); } mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.barrierCount = 0u; //on B0 algorithm is disabled hwInfo.platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_B, hwInfo); { auto expectedLws = computeWorkgroupSize(dispatchInfo); EXPECT_EQ(maxWgSize, expectedLws.x * expectedLws.y * expectedLws.z); EXPECT_EQ(maxWgSize, expectedLws.x); } } XEHPTEST_F(XeHPComputeWorkgroupSizeTest, givenSmallKernelAndGwsThatIsNotDivisableBySmallerLimitWhenLwsIsComputedThenBigWorgkroupIsSelected) { auto program = std::make_unique(toClDeviceVector(*pClDevice)); MockKernelWithInternals mockKernel(*pClDevice); auto &kernel = *mockKernel.mockKernel; kernel.initialize(); Vec3 elws{0, 0, 0}; Vec3 gws{636056, 1, 1}; Vec3 offset{0, 0, 0}; DispatchInfo dispatchInfo{pClDevice, &kernel, 1, gws, elws, offset}; auto maxWgSize = pClDevice->getSharedDeviceInfo().maxWorkGroupSize; { auto calculatedLws = computeWorkgroupSize(dispatchInfo); auto expectedLws = (maxWgSize < 344) ? 8u : 344u; EXPECT_EQ(expectedLws, calculatedLws.x * calculatedLws.y * calculatedLws.z); EXPECT_EQ(expectedLws, calculatedLws.x); } } compute-runtime-22.14.22890/opencl/test/unit_test/xe_hp_core/xehp/test_platform_caps_xehp.inl000066400000000000000000000012201422164147700323220ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" using namespace NEO; struct XE_HP_COREPlatformCaps : public PlatformFixture, public ::testing::Test { void SetUp() override { PlatformFixture::SetUp(); } void TearDown() override { PlatformFixture::TearDown(); } }; XEHPTEST_F(XE_HP_COREPlatformCaps, givenXeHPSkusThenItSupportFP64) { const auto &caps = pPlatform->getPlatformInfo(); EXPECT_NE(std::string::npos, caps.extensions.find(std::string("cl_khr_fp64"))); }compute-runtime-22.14.22890/opencl/test/unit_test/xe_hp_core/xehp/test_preamble_xehp.cpp000066400000000000000000000140621422164147700312670ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/stream_properties.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/preamble/preamble_fixture.h" #include "opencl/source/helpers/hardware_commands_helper.h" using namespace NEO; using PreambleCfeState = PreambleFixture; HWTEST2_F(PreambleCfeState, givenXehpAndFlagCFEWeightedDispatchModeDisableSetFalseWhenCallingProgramVFEStateThenFieldWeightedDispatchModeDisableAreNotSet, IsXEHP) { using CFE_STATE = typename FamilyType::CFE_STATE; DebugManagerStateRestore dbgRestore; DebugManager.flags.CFEWeightedDispatchModeDisable.set(false); auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, *defaultHwInfo, EngineGroupType::RenderCompute); StreamProperties streamProperties{}; streamProperties.frontEndState.setProperties(false, false, false, false, *defaultHwInfo); PreambleHelper::programVfeState(pVfeCmd, *defaultHwInfo, 0u, 0, 0, streamProperties); parseCommands(linearStream); auto cfeStateIt = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), cfeStateIt); auto cfeState = reinterpret_cast(*cfeStateIt); EXPECT_FALSE(cfeState->getWeightedDispatchModeDisable()); } HWTEST2_F(PreambleCfeState, givenXehpAndFlagCFEWeightedDispatchModeDisableSetTrueWhenCallingProgramVFEStateThenFieldWeightedDispatchModeDisableAreSet, IsXEHP) { using CFE_STATE = typename FamilyType::CFE_STATE; DebugManagerStateRestore dbgRestore; DebugManager.flags.CFEWeightedDispatchModeDisable.set(true); auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, *defaultHwInfo, EngineGroupType::RenderCompute); StreamProperties streamProperties{}; streamProperties.frontEndState.setProperties(false, false, false, false, *defaultHwInfo); PreambleHelper::programVfeState(pVfeCmd, *defaultHwInfo, 0u, 0, 0, streamProperties); parseCommands(linearStream); auto cfeStateIt = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), cfeStateIt); auto cfeState = reinterpret_cast(*cfeStateIt); EXPECT_TRUE(cfeState->getWeightedDispatchModeDisable()); } HWTEST2_F(PreambleCfeState, givenXehpAndFlagCFEComputeOverdispatchDisableSetFalseWhenCallingProgramVFEStateThenFieldComputeOverdispatchDisableAreNotSet, IsXEHP) { using CFE_STATE = typename FamilyType::CFE_STATE; DebugManagerStateRestore dbgRestore; DebugManager.flags.CFEComputeOverdispatchDisable.set(false); auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, *defaultHwInfo, EngineGroupType::RenderCompute); StreamProperties streamProperties{}; streamProperties.frontEndState.setProperties(false, false, false, false, *defaultHwInfo); PreambleHelper::programVfeState(pVfeCmd, *defaultHwInfo, 0u, 0, 0, streamProperties); parseCommands(linearStream); auto cfeStateIt = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), cfeStateIt); auto cfeState = reinterpret_cast(*cfeStateIt); EXPECT_FALSE(cfeState->getComputeOverdispatchDisable()); } HWTEST2_F(PreambleCfeState, givenXehpAndFlagCFEComputeOverdispatchDisableSetTrueWhenCallingProgramVFEStateThenFieldComputeOverdispatchDisableAreSet, IsXEHP) { using CFE_STATE = typename FamilyType::CFE_STATE; DebugManagerStateRestore dbgRestore; DebugManager.flags.CFEComputeOverdispatchDisable.set(true); auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, *defaultHwInfo, EngineGroupType::RenderCompute); StreamProperties streamProperties{}; streamProperties.frontEndState.setProperties(false, false, false, false, *defaultHwInfo); PreambleHelper::programVfeState(pVfeCmd, *defaultHwInfo, 0u, 0, 0, streamProperties); parseCommands(linearStream); auto cfeStateIt = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), cfeStateIt); auto cfeState = reinterpret_cast(*cfeStateIt); EXPECT_TRUE(cfeState->getComputeOverdispatchDisable()); } HWTEST2_F(PreambleCfeState, givenXehpAndDisabledFusedEuWhenCfeStateProgrammedThenFusedEuDispatchSetToTrue, IsXEHP) { using CFE_STATE = typename FamilyType::CFE_STATE; auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.fusedEuEnabled = false; auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, hwInfo, EngineGroupType::RenderCompute); StreamProperties streamProperties{}; streamProperties.frontEndState.setProperties(false, false, false, false, hwInfo); PreambleHelper::programVfeState(pVfeCmd, hwInfo, 0u, 0, 0, streamProperties); parseCommands(linearStream); auto cfeStateIt = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), cfeStateIt); auto cfeState = reinterpret_cast(*cfeStateIt); EXPECT_TRUE(cfeState->getFusedEuDispatch()); } HWTEST2_F(PreambleCfeState, givenXehpAndEnabledFusedEuWhenCfeStateProgrammedThenFusedEuDispatchSetToFalse, IsXEHP) { using CFE_STATE = typename FamilyType::CFE_STATE; auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.fusedEuEnabled = true; auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, hwInfo, EngineGroupType::RenderCompute); StreamProperties streamProperties{}; streamProperties.frontEndState.setProperties(false, false, false, false, hwInfo); PreambleHelper::programVfeState(pVfeCmd, hwInfo, 0u, 0, 0, streamProperties); parseCommands(linearStream); auto cfeStateIt = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), cfeStateIt); auto cfeState = reinterpret_cast(*cfeStateIt); EXPECT_FALSE(cfeState->getFusedEuDispatch()); }compute-runtime-22.14.22890/opencl/test/unit_test/xe_hp_core/xehp/test_sub_devices_xehp.inl000066400000000000000000000046421422164147700317760ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" using namespace NEO; using XeHPUsDeviceIdTest = Test; HWTEST_EXCLUDE_PRODUCT(SubDeviceTests, givenCCSEngineWhenCallingGetDefaultEngineWithWaThenTheSameEngineIsReturned, IGFX_XE_HP_SDV); XEHPTEST_F(XeHPUsDeviceIdTest, givenRevisionAWhenCreatingEngineWithSubdevicesThenEngineTypeIsSetToCCS) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(2); auto executionEnvironment = new MockExecutionEnvironment; MockDevice device(executionEnvironment, 0); EXPECT_EQ(0u, device.allEngines.size()); device.createSubDevices(); device.createEngines(); EXPECT_EQ(2u, device.getNumGenericSubDevices()); auto hwInfo = device.getRootDeviceEnvironment().getMutableHardwareInfo(); const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo->platform.eProductFamily); hwInfo->platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_A0, *hwInfo); device.createEngines(); auto engines = device.getAllEngines(); for (auto engine : engines) { EXPECT_EQ(aub_stream::ENGINE_CCS, engine.osContext->getEngineType()); } } XEHPTEST_F(XeHPUsDeviceIdTest, givenRevisionBWhenCreatingEngineWithSubdevicesThenEngineTypeIsSetToCCS) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(2); auto executionEnvironment = new MockExecutionEnvironment; MockDevice device(executionEnvironment, 0); EXPECT_EQ(0u, device.allEngines.size()); device.createSubDevices(); device.createEngines(); EXPECT_EQ(2u, device.getNumGenericSubDevices()); auto hwInfo = device.getRootDeviceEnvironment().getMutableHardwareInfo(); const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo->platform.eProductFamily); hwInfo->platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_B, *hwInfo); device.createEngines(); auto engines = device.getAllEngines(); for (auto engine : engines) { EXPECT_EQ(aub_stream::ENGINE_CCS, engine.osContext->getEngineType()); } } compute-runtime-22.14.22890/opencl/test/unit_test/xe_hp_core/xehp/test_wrapper_xehp.cpp000066400000000000000000000011621422164147700311550ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "cache_flush_tests_xehp.inl" #include "get_device_info_xehp.inl" #include "hw_helper_tests_xehp.inl" #include "hw_info_tests_xehp.inl" #include "memory_manager_tests_xehp.inl" #include "sampler_tests_xehp.inl" #include "test_buffer_xe_hp_sdv.inl" #include "test_command_stream_receiver_xehp.inl" #include "test_device_caps_xehp.inl" #include "test_hw_info_config_xehp.inl" #include "test_image_xe_hp_sdv.inl" #include "test_local_work_size_xehp.inl" #include "test_platform_caps_xehp.inl" #include "test_sub_devices_xehp.inl" compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpc_core/000077500000000000000000000000001422164147700241055ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpc_core/CMakeLists.txt000066400000000000000000000025021422164147700266440ustar00rootroot00000000000000# # Copyright (C) 2021-2022 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_XE_HPC_CORE) set(IGDRCL_SRCS_tests_xe_hpc_core_excludes ${CMAKE_CURRENT_SOURCE_DIR}/excludes_ocl_xe_hpc_core.cpp ) set_property(GLOBAL APPEND PROPERTY IGDRCL_SRCS_tests_excludes ${IGDRCL_SRCS_tests_xe_hpc_core_excludes}) set(IGDRCL_SRCS_tests_xe_hpc_core ${IGDRCL_SRCS_tests_xe_hpc_core_excludes} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/built_in_xe_hpc_core_tests_ocl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_cmds_programming_xe_hpc_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/copy_engine_tests_xe_hpc_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_tests_xe_hpc_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_tests_xe_hpc_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hw_helper_tests_xe_hpc_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_xe_hpc_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_platform_caps_xe_hpc_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/api_tests_xe_hpc_core.cpp ) get_property(NEO_CORE_TESTS_XE_HPC_CORE GLOBAL PROPERTY NEO_CORE_TESTS_XE_HPC_CORE) list(APPEND IGDRCL_SRCS_tests_xe_hpc_core ${NEO_CORE_TESTS_XE_HPC_CORE}) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_xe_hpc_core}) add_subdirectories() endif() compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpc_core/api_tests_xe_hpc_core.cpp000066400000000000000000000025121422164147700311420ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/hw_info_config.h" #include "opencl/test/unit_test/api/cl_api_tests.h" using namespace NEO; using EnqueueKernelTestGenXeHpcCore = api_tests; XE_HPC_CORETEST_F(EnqueueKernelTestGenXeHpcCore, givenCommandQueueWithCCCSEngineAndRevisionBWhenCallingClEnqueueNDCountKernelINTELThenInvalidCommandQueueIsReturned) { size_t workgroupCount[3] = {2, 1, 1}; size_t localWorkSize[3] = {256, 1, 1}; cl_int retVal = CL_SUCCESS; auto &hwInfo = *pDevice->getRootDeviceEnvironment().getMutableHardwareInfo(); auto &hwConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily); hwInfo.platform.usRevId = hwConfig.getHwRevIdFromStepping(REVISION_B, hwInfo); pProgram->mockKernelInfo.kernelDescriptor.kernelAttributes.flags.usesSyncBuffer = true; pCommandQueue->getGpgpuEngine().osContext = pCommandQueue->getDevice().getEngine(aub_stream::ENGINE_CCCS, EngineUsage::Regular).osContext; retVal = pMultiDeviceKernel->setKernelExecutionType(CL_KERNEL_EXEC_INFO_CONCURRENT_TYPE_INTEL); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueNDCountKernelINTEL(pCommandQueue, pMultiDeviceKernel, 1, nullptr, workgroupCount, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpc_core/built_in_xe_hpc_core_tests_ocl.cpp000066400000000000000000000032231422164147700330330ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/fixtures/device_fixture.h" #include "shared/test/common/mocks/mock_builtinslib.h" #include "shared/test/common/test_macros/test.h" using namespace NEO; using XeHpcCoreBuiltInTests = Test; XE_HPC_CORETEST_F(XeHpcCoreBuiltInTests, GivenBuiltinTypeBinaryWhenGettingBuiltinResourceForNotRegisteredRevisionThenBinaryBuiltinIsNotAvailable) { pDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->platform.usRevId += 0xdead; auto mockBuiltinsLib = std::unique_ptr(new MockBuiltinsLib()); const std::array builtinTypes{EBuiltInOps::CopyBufferToBuffer, EBuiltInOps::CopyBufferRect, EBuiltInOps::FillBuffer, EBuiltInOps::CopyBufferToImage3d, EBuiltInOps::CopyImage3dToBuffer, EBuiltInOps::CopyImageToImage1d, EBuiltInOps::CopyImageToImage2d, EBuiltInOps::CopyImageToImage3d, EBuiltInOps::FillImage1d, EBuiltInOps::FillImage2d, EBuiltInOps::FillImage3d}; for (auto &builtinType : builtinTypes) { EXPECT_EQ(0u, mockBuiltinsLib->getBuiltinResource(builtinType, BuiltinCode::ECodeType::Binary, *pDevice).size()); } } command_stream_receiver_hw_tests_xe_hpc_core.cpp000066400000000000000000000355131422164147700356740ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpc_core/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/linear_stream.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_timestamp_container.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/command_queue/hardware_interface.h" #include "opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_mdi.h" using namespace NEO; struct MemorySynchronizationViaMiSemaphoreWaitTest : public UltCommandStreamReceiverTest { void SetUp() override { DebugManager.flags.EnableLocalMemory.set(1); DebugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0); DebugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0); DebugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0); UltCommandStreamReceiverTest::SetUp(); } DebugManagerStateRestore restore; }; XE_HPC_CORETEST_F(MemorySynchronizationViaMiSemaphoreWaitTest, givenMemorySynchronizationViaMiSemaphoreWaitWhenProgramEnginePrologueIsCalledThenNoCommandIsProgrammed) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); EXPECT_FALSE(commandStreamReceiver.isEnginePrologueSent); auto requiredSize = commandStreamReceiver.getCmdSizeForPrologue(); EXPECT_EQ(0u, requiredSize); StackVec buffer(requiredSize); LinearStream cmdStream(buffer.begin(), buffer.size()); commandStreamReceiver.programEnginePrologue(cmdStream); EXPECT_TRUE(commandStreamReceiver.isEnginePrologueSent); HardwareParse hwParser; hwParser.parseCommands(cmdStream); EXPECT_EQ(0u, hwParser.cmdList.size()); } struct SystemMemoryFenceInDisabledConfigurationTest : public UltCommandStreamReceiverTest { void SetUp() override { DebugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0); DebugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0); DebugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0); UltCommandStreamReceiverTest::SetUp(); } DebugManagerStateRestore restore; }; XE_HPC_CORETEST_F(SystemMemoryFenceInDisabledConfigurationTest, givenNoSystemMemoryFenceWhenEnqueueKernelIsCalledThenDontGenerateFenceCommands) { using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename FamilyType::STATE_SYSTEM_MEM_FENCE_ADDRESS; using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE; MockKernelWithInternals kernel(*pClDevice); MockContext context(pClDevice); MockCommandQueueHw commandQueue(&context, pClDevice, nullptr); size_t globalWorkSize[3] = {1, 1, 1}; commandQueue.enqueueKernel(kernel, 1, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr); ClHardwareParse hwParser; hwParser.parseCommands(commandQueue); auto itorSystemMemFenceAddress = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); EXPECT_EQ(hwParser.cmdList.end(), itorSystemMemFenceAddress); auto itorComputeWalker = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), itorComputeWalker); auto walkerCmd = genCmdCast(*itorComputeWalker); auto &postSyncData = walkerCmd->getPostSync(); EXPECT_FALSE(postSyncData.getSystemMemoryFenceRequest()); auto itorMiMemFence = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); EXPECT_EQ(hwParser.cmdList.end(), itorMiMemFence); } struct SystemMemoryFenceViaMiMemFenceTest : public UltCommandStreamReceiverTest { void SetUp() override { DebugManager.flags.EnableLocalMemory.set(1); DebugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(1); UltCommandStreamReceiverTest::SetUp(); } DebugManagerStateRestore restore; }; XE_HPC_CORETEST_F(SystemMemoryFenceViaMiMemFenceTest, givenCommadStreamReceiverWhenProgramEnginePrologueIsCalledThenIsEnginePrologueSentIsSetToTrue) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); EXPECT_FALSE(commandStreamReceiver.isEnginePrologueSent); auto requiredSize = commandStreamReceiver.getCmdSizeForPrologue(); StackVec buffer(requiredSize); LinearStream cmdStream(buffer.begin(), buffer.size()); commandStreamReceiver.programEnginePrologue(cmdStream); EXPECT_TRUE(commandStreamReceiver.isEnginePrologueSent); } XE_HPC_CORETEST_F(SystemMemoryFenceViaMiMemFenceTest, givenIsEnginePrologueSentIsSetToTrueWhenGetRequiredCmdStreamSizeIsCalledThenSizeForEnginePrologueIsNotIncluded) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); EXPECT_FALSE(commandStreamReceiver.isEnginePrologueSent); auto sizeForEnginePrologue = commandStreamReceiver.getCmdSizeForPrologue(); auto sizeWhenEnginePrologueIsNotSent = commandStreamReceiver.getRequiredCmdStreamSize(dispatchFlags, *pDevice); commandStreamReceiver.isEnginePrologueSent = true; auto sizeWhenEnginePrologueIsSent = commandStreamReceiver.getRequiredCmdStreamSize(dispatchFlags, *pDevice); EXPECT_EQ(sizeForEnginePrologue, sizeWhenEnginePrologueIsNotSent - sizeWhenEnginePrologueIsSent); } XE_HPC_CORETEST_F(SystemMemoryFenceViaMiMemFenceTest, givenSystemMemoryFenceGeneratedAsMiFenceCommandInCommandStreamWhenBlitBufferIsCalledThenSystemMemFenceAddressIsProgrammed) { using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename FamilyType::STATE_SYSTEM_MEM_FENCE_ADDRESS; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto &cmdStream = commandStreamReceiver.getCS(0); EXPECT_FALSE(commandStreamReceiver.isEnginePrologueSent); BlitPropertiesContainer blitPropertiesContainer; commandStreamReceiver.flushBcsTask(blitPropertiesContainer, false, false, *pDevice); EXPECT_TRUE(commandStreamReceiver.isEnginePrologueSent); HardwareParse hwParser; hwParser.parseCommands(cmdStream); auto itorSystemMemFenceAddress = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress); auto systemMemFenceAddressCmd = genCmdCast(*itorSystemMemFenceAddress); EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress()); } struct SystemMemoryFenceViaComputeWalkerTest : public UltCommandStreamReceiverTest { void SetUp() override { DebugManager.flags.EnableLocalMemory.set(1); DebugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0); DebugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(1); DebugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0); UltCommandStreamReceiverTest::SetUp(); } DebugManagerStateRestore restore; }; XE_HPC_CORETEST_F(SystemMemoryFenceViaComputeWalkerTest, givenSystemMemoryFenceGeneratedAsPostSyncOperationInComputeWalkerWhenProgramEnginePrologueIsCalledThenSystemMemFenceAddressIsProgrammed) { using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename FamilyType::STATE_SYSTEM_MEM_FENCE_ADDRESS; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); EXPECT_FALSE(commandStreamReceiver.isEnginePrologueSent); auto requiredSize = commandStreamReceiver.getCmdSizeForPrologue(); StackVec buffer(requiredSize); LinearStream cmdStream(buffer.begin(), buffer.size()); commandStreamReceiver.programEnginePrologue(cmdStream); EXPECT_TRUE(commandStreamReceiver.isEnginePrologueSent); HardwareParse hwParser; hwParser.parseCommands(cmdStream); auto itorSystemMemFenceAddress = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress); auto systemMemFenceAddressCmd = genCmdCast(*itorSystemMemFenceAddress); EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress()); } XE_HPC_CORETEST_F(SystemMemoryFenceViaComputeWalkerTest, givenSystemMemoryFenceGeneratedAsPostSyncOperationInComputeWalkerWhenDispatchWalkerIsCalledThenSystemMemoryFenceRequestInPostSyncDataIsProgrammed) { using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; MockKernelWithInternals kernel(*pClDevice); MockMultiDispatchInfo multiDispatchInfo(pClDevice, kernel.mockKernel); MockContext context(pClDevice); MockCommandQueue commandQueue(&context, pClDevice, nullptr, false); auto &cmdStream = commandQueue.getCS(0); MockTimestampPacketContainer timestampPacket(*pClDevice->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); HardwareInterface::dispatchWalker( commandQueue, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, ×tampPacket, CL_COMMAND_NDRANGE_KERNEL); HardwareParse hwParser; hwParser.parseCommands(cmdStream); auto itorComputeWalker = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), itorComputeWalker); auto walkerCmd = genCmdCast(*itorComputeWalker); auto &postSyncData = walkerCmd->getPostSync(); EXPECT_TRUE(postSyncData.getSystemMemoryFenceRequest()); } struct SystemMemoryFenceViaKernelInstructionTest : public UltCommandStreamReceiverTest { void SetUp() override { DebugManager.flags.EnableLocalMemory.set(1); DebugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0); DebugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0); DebugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(1); UltCommandStreamReceiverTest::SetUp(); } DebugManagerStateRestore restore; }; XE_HPC_CORETEST_F(SystemMemoryFenceViaKernelInstructionTest, givenSystemMemoryFenceGeneratedAsKernelInstructionInKernelCodeWhenProgramEnginePrologueIsCalledThenSystemMemFenceAddressIsProgrammed) { using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename FamilyType::STATE_SYSTEM_MEM_FENCE_ADDRESS; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); EXPECT_FALSE(commandStreamReceiver.isEnginePrologueSent); auto requiredSize = commandStreamReceiver.getCmdSizeForPrologue(); StackVec buffer(requiredSize); LinearStream cmdStream(buffer.begin(), buffer.size()); commandStreamReceiver.programEnginePrologue(cmdStream); EXPECT_TRUE(commandStreamReceiver.isEnginePrologueSent); HardwareParse hwParser; hwParser.parseCommands(cmdStream); auto itorSystemMemFenceAddress = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress); auto systemMemFenceAddressCmd = genCmdCast(*itorSystemMemFenceAddress); EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress()); } struct SystemMemoryFenceInDefaultConfigurationTest : public UltCommandStreamReceiverTest { void SetUp() override { DebugManager.flags.EnableLocalMemory.set(1); DebugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(-1); DebugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(-1); DebugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(-1); UltCommandStreamReceiverTest::SetUp(); } DebugManagerStateRestore restore; }; XE_HPC_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTest, whenEnqueueKernelIsCalledThenFenceCommandsCanBeGenerated) { using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename FamilyType::STATE_SYSTEM_MEM_FENCE_ADDRESS; using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE; VariableBackup revisionId(&defaultHwInfo->platform.usRevId); if (defaultHwInfo->platform.eProductFamily != IGFX_PVC) { GTEST_SKIP(); } unsigned short revisions[] = {0x0, 0x3}; for (auto revision : revisions) { revisionId = revision; UltClDeviceFactory ultClDeviceFactory{1, 0}; auto isPvcXlA0Stepping = (revision == 0x0); auto &clDevice = *ultClDeviceFactory.rootDevices[0]; MockKernelWithInternals kernel(clDevice); MockContext context(&clDevice); MockCommandQueueHw commandQueue(&context, &clDevice, nullptr); auto &commandStreamReceiver = clDevice.getUltCommandStreamReceiver(); size_t globalWorkSize[3] = {1, 1, 1}; commandQueue.enqueueKernel(kernel, 1, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr); ClHardwareParse hwParser; hwParser.parseCommands(commandQueue); auto itorSystemMemFenceAddress = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress); auto systemMemFenceAddressCmd = genCmdCast(*itorSystemMemFenceAddress); EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress()); auto itorComputeWalker = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), itorComputeWalker); auto walkerCmd = genCmdCast(*itorComputeWalker); auto &postSyncData = walkerCmd->getPostSync(); auto itorMiMemFence = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); if (isPvcXlA0Stepping) { EXPECT_FALSE(postSyncData.getSystemMemoryFenceRequest()); EXPECT_EQ(hwParser.cmdList.end(), itorMiMemFence); } else { EXPECT_TRUE(postSyncData.getSystemMemoryFenceRequest()); ASSERT_NE(hwParser.cmdList.end(), itorMiMemFence); auto fenceCmd = genCmdCast(*itorMiMemFence); ASSERT_NE(nullptr, fenceCmd); EXPECT_EQ(MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE, fenceCmd->getAFenceType()); } } } compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpc_core/copy_engine_tests_xe_hpc_core.cpp000066400000000000000000000327561422164147700327050ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/aub_command_stream_receiver.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/timestamp_packet.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_context.h" using namespace NEO; struct BlitXeHpcCoreTests : public ::testing::Test { void SetUp() override { if (is32bit) { GTEST_SKIP(); } DebugManager.flags.RenderCompressedBuffersEnabled.set(true); DebugManager.flags.EnableLocalMemory.set(true); HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.blitterOperationsSupported = true; clDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); } uint32_t flushBcsTask(CommandStreamReceiver *csr, const BlitProperties &blitProperties, bool blocking, Device &device) { BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties); return csr->flushBcsTask(blitPropertiesContainer, blocking, false, device); } std::unique_ptr clDevice; TimestampPacketContainer timestampPacketContainer; CsrDependencies csrDependencies; DebugManagerStateRestore debugRestorer; }; XE_HPC_CORETEST_F(BlitXeHpcCoreTests, givenCompressedBufferWhenProgrammingBltCommandThenSetCompressionFields) { using MEM_COPY = typename FamilyType::MEM_COPY; auto csr = static_cast *>(clDevice->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular).commandStreamReceiver); MockContext context(clDevice.get()); cl_int retVal = CL_SUCCESS; auto bufferCompressed = clUniquePtr(Buffer::create(&context, CL_MEM_READ_WRITE, 2048, nullptr, retVal)); bufferCompressed->getGraphicsAllocation(clDevice->getRootDeviceIndex())->getDefaultGmm()->isCompressionEnabled = true; auto bufferNotCompressed = clUniquePtr(Buffer::create(&context, CL_MEM_READ_WRITE, 2048, nullptr, retVal)); bufferNotCompressed->getGraphicsAllocation(clDevice->getRootDeviceIndex())->getDefaultGmm()->isCompressionEnabled = false; MockGraphicsAllocation clearColorAlloc; { auto blitProperties = BlitProperties::constructPropertiesForCopy(bufferNotCompressed->getGraphicsAllocation(clDevice->getRootDeviceIndex()), bufferCompressed->getGraphicsAllocation(clDevice->getRootDeviceIndex()), 0, 0, {2048, 1, 1}, 0, 0, 0, 0, &clearColorAlloc); flushBcsTask(csr, blitProperties, true, clDevice->getDevice()); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream); auto itorBltCmd = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), itorBltCmd); MEM_COPY *bltCmd = (MEM_COPY *)*itorBltCmd; EXPECT_EQ(bltCmd->getDestinationCompressionEnable(), MEM_COPY::DESTINATION_COMPRESSION_ENABLE::DESTINATION_COMPRESSION_ENABLE_DISABLE); EXPECT_EQ(bltCmd->getDestinationCompressible(), MEM_COPY::DESTINATION_COMPRESSIBLE::DESTINATION_COMPRESSIBLE_NOT_COMPRESSIBLE); EXPECT_EQ(bltCmd->getSourceCompressible(), MEM_COPY::SOURCE_COMPRESSIBLE::SOURCE_COMPRESSIBLE_COMPRESSIBLE); } { auto offset = csr->commandStream.getUsed(); auto blitProperties = BlitProperties::constructPropertiesForCopy(bufferCompressed->getGraphicsAllocation(clDevice->getRootDeviceIndex()), bufferNotCompressed->getGraphicsAllocation(clDevice->getRootDeviceIndex()), 0, 0, {2048, 1, 1}, 0, 0, 0, 0, &clearColorAlloc); flushBcsTask(csr, blitProperties, true, clDevice->getDevice()); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream, offset); auto bltCmd = genCmdCast(*(hwParser.cmdList.begin())); EXPECT_NE(nullptr, bltCmd); EXPECT_EQ(bltCmd->getDestinationCompressionEnable(), MEM_COPY::DESTINATION_COMPRESSION_ENABLE::DESTINATION_COMPRESSION_ENABLE_ENABLE); EXPECT_EQ(bltCmd->getDestinationCompressible(), MEM_COPY::DESTINATION_COMPRESSIBLE::DESTINATION_COMPRESSIBLE_COMPRESSIBLE); EXPECT_EQ(bltCmd->getSourceCompressible(), MEM_COPY::SOURCE_COMPRESSIBLE::SOURCE_COMPRESSIBLE_NOT_COMPRESSIBLE); } } XE_HPC_CORETEST_F(BlitXeHpcCoreTests, givenBufferWhenProgrammingBltCommandThenSetMocs) { using MEM_COPY = typename FamilyType::MEM_COPY; auto &bcsEngine = clDevice->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular); auto csr = static_cast *>(bcsEngine.commandStreamReceiver); MockContext context(clDevice.get()); MockGraphicsAllocation clearColorAlloc; cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(&context, CL_MEM_READ_WRITE, 1, nullptr, retVal)); auto blitProperties = BlitProperties::constructPropertiesForCopy(buffer->getGraphicsAllocation(clDevice->getRootDeviceIndex()), buffer->getGraphicsAllocation(clDevice->getRootDeviceIndex()), 0, 0, {1, 1, 1}, 0, 0, 0, 0, &clearColorAlloc); flushBcsTask(csr, blitProperties, true, clDevice->getDevice()); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream); auto itorBltCmd = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), itorBltCmd); MEM_COPY *bltCmd = (MEM_COPY *)*itorBltCmd; auto mocsL3enabled = 0x10u; EXPECT_EQ(mocsL3enabled, bltCmd->getDestinationMOCS()); EXPECT_EQ(mocsL3enabled, bltCmd->getSourceMOCS()); } XE_HPC_CORETEST_F(BlitXeHpcCoreTests, givenBufferWhenProgrammingBltCommandThenSetMocsToValueOfDebugKey) { DebugManagerStateRestore restorer; DebugManager.flags.OverrideBlitterMocs.set(0u); using MEM_COPY = typename FamilyType::MEM_COPY; MockGraphicsAllocation clearColorAlloc; auto &bcsEngine = clDevice->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular); auto csr = static_cast *>(bcsEngine.commandStreamReceiver); MockContext context(clDevice.get()); cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(&context, CL_MEM_READ_WRITE, 1, nullptr, retVal)); auto blitProperties = BlitProperties::constructPropertiesForCopy(buffer->getGraphicsAllocation(clDevice->getRootDeviceIndex()), buffer->getGraphicsAllocation(clDevice->getRootDeviceIndex()), 0, 0, {1, 1, 1}, 0, 0, 0, 0, &clearColorAlloc); flushBcsTask(csr, blitProperties, true, clDevice->getDevice()); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream); auto itorBltCmd = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), itorBltCmd); MEM_COPY *bltCmd = (MEM_COPY *)*itorBltCmd; EXPECT_EQ(0u, bltCmd->getDestinationMOCS()); EXPECT_EQ(0u, bltCmd->getSourceMOCS()); } XE_HPC_CORETEST_F(BlitXeHpcCoreTests, givenCompressedBufferWhenResolveBlitIsCalledThenProgramSpecialOperationMode) { using MEM_COPY = typename FamilyType::MEM_COPY; auto &bcsEngine = clDevice->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular); auto csr = static_cast *>(bcsEngine.commandStreamReceiver); MockContext context(clDevice.get()); MockGraphicsAllocation clearColorAlloc; cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(&context, CL_MEM_READ_WRITE, 2048, nullptr, retVal)); auto blitProperties = BlitProperties::constructPropertiesForAuxTranslation(AuxTranslationDirection::AuxToNonAux, buffer->getGraphicsAllocation(clDevice->getRootDeviceIndex()), &clearColorAlloc); flushBcsTask(csr, blitProperties, false, clDevice->getDevice()); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream); auto itorBltCmd = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), itorBltCmd); MEM_COPY *bltCmd = (MEM_COPY *)*itorBltCmd; EXPECT_EQ(bltCmd->getDestinationCompressionEnable(), MEM_COPY::DESTINATION_COMPRESSION_ENABLE::DESTINATION_COMPRESSION_ENABLE_DISABLE); EXPECT_EQ(bltCmd->getDestinationCompressible(), MEM_COPY::DESTINATION_COMPRESSIBLE::DESTINATION_COMPRESSIBLE_COMPRESSIBLE); EXPECT_EQ(bltCmd->getSourceCompressible(), MEM_COPY::SOURCE_COMPRESSIBLE::SOURCE_COMPRESSIBLE_COMPRESSIBLE); } XE_HPC_CORETEST_F(BlitXeHpcCoreTests, given2dBlitCommandWhenDispatchingThenSetValidSurfaceType) { using MEM_COPY = typename FamilyType::MEM_COPY; auto &bcsEngine = clDevice->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular); auto csr = static_cast *>(bcsEngine.commandStreamReceiver); MockContext context(clDevice.get()); cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(&context, CL_MEM_READ_WRITE, 1, nullptr, retVal)); auto allocation = buffer->getGraphicsAllocation(clDevice->getRootDeviceIndex()); MockGraphicsAllocation clearColorAlloc; size_t offset = 0; { // 1D auto blitProperties = BlitProperties::constructPropertiesForCopy(allocation, allocation, 0, 0, {BlitterConstants::maxBlitWidth - 1, 1, 1}, 0, 0, 0, 0, &clearColorAlloc); flushBcsTask(csr, blitProperties, false, clDevice->getDevice()); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream); auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), cmdIterator); auto bltCmd = genCmdCast(*cmdIterator); EXPECT_NE(nullptr, bltCmd); EXPECT_EQ(MEM_COPY::COPY_TYPE::COPY_TYPE_LINEAR_COPY, bltCmd->getCopyType()); offset = csr->commandStream.getUsed(); MockGraphicsAllocation clearColorAlloc; } { // 2D auto blitProperties = BlitProperties::constructPropertiesForCopy(allocation, allocation, 0, 0, {(2 * BlitterConstants::maxBlitWidth) + 1, 1, 1}, 0, 0, 0, 0, &clearColorAlloc); flushBcsTask(csr, blitProperties, false, clDevice->getDevice()); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream, offset); auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), cmdIterator); auto bltCmd = genCmdCast(*cmdIterator); EXPECT_NE(nullptr, bltCmd); EXPECT_EQ(MEM_COPY::COPY_TYPE::COPY_TYPE_MATRIX_COPY, bltCmd->getCopyType()); } } HWTEST_EXCLUDE_PRODUCT(CommandQueueHwTest, givenCommandQueueWhenAskingForCacheFlushOnBcsThenReturnTrue, IGFX_XE_HPC_CORE); using XeHpcCoreCopyEngineTests = ::testing::Test; XE_HPC_CORETEST_F(XeHpcCoreCopyEngineTests, givenCommandQueueWhenAskingForCacheFlushOnBcsThenReturnFalse) { auto clDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockContext context(clDevice.get()); cl_int retVal = CL_SUCCESS; auto commandQueue = std::unique_ptr(CommandQueue::create(&context, clDevice.get(), nullptr, false, retVal)); auto commandQueueHw = static_cast *>(commandQueue.get()); EXPECT_FALSE(commandQueueHw->isCacheFlushForBcsRequired()); } XE_HPC_CORETEST_F(XeHpcCoreCopyEngineTests, givenDebugFlagSetWhenCheckingBcsCacheFlushRequirementThenReturnCorrectValueForGen12p8) { DebugManagerStateRestore restorer; auto clDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockContext context(clDevice.get()); cl_int retVal = CL_SUCCESS; auto commandQueue = std::unique_ptr(CommandQueue::create(&context, clDevice.get(), nullptr, false, retVal)); auto commandQueueHw = static_cast *>(commandQueue.get()); DebugManager.flags.ForceCacheFlushForBcs.set(0); EXPECT_FALSE(commandQueueHw->isCacheFlushForBcsRequired()); DebugManager.flags.ForceCacheFlushForBcs.set(1); EXPECT_TRUE(commandQueueHw->isCacheFlushForBcsRequired()); } compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpc_core/enqueue_tests_xe_hpc_core.cpp000066400000000000000000000172521422164147700320470ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_helper.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/command_queue/hardware_interface.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" using namespace NEO; struct EnqueueFixtureXeHpcCore : public ::testing::Test { void SetUp() override { DebugManager.flags.EnableMemoryPrefetch.set(1); clDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get(), mockRootDeviceIndex)); context = std::make_unique(clDevice.get()); mockKernel = std::make_unique(*clDevice, context.get()); mockKernel->kernelInfo.createKernelAllocation(clDevice->getDevice(), false); dispatchInfo = {clDevice.get(), mockKernel->mockKernel, 1, 0, 0, 0}; } void TearDown() override { clDevice->getMemoryManager()->freeGraphicsMemory(mockKernel->kernelInfo.getGraphicsAllocation()); } template std::unique_ptr> createCommandQueue() { return std::make_unique>(context.get(), clDevice.get(), nullptr); } DebugManagerStateRestore restore; std::unique_ptr clDevice; std::unique_ptr context; std::unique_ptr mockKernel; DispatchInfo dispatchInfo; }; using MemoryPrefetchTestsXeHpcCore = EnqueueFixtureXeHpcCore; XE_HPC_CORETEST_F(MemoryPrefetchTestsXeHpcCore, givenKernelWhenWalkerIsProgrammedThenPrefetchIsaBeforeWalker) { using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; using STATE_PREFETCH = typename FamilyType::STATE_PREFETCH; auto commandQueue = createCommandQueue(); auto &commandStream = commandQueue->getCS(1024); auto &heap = commandQueue->getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 1); size_t workSize[] = {1, 1, 1}; Vec3 wgInfo = {1, 1, 1}; uint32_t iddIndex = 0; mockKernel->kernelInfo.heapInfo.KernelHeapSize = 1; HardwareInterface::programWalker(commandStream, *mockKernel->mockKernel, *commandQueue, nullptr, heap, heap, heap, workSize, workSize, PreemptionMode::Disabled, 0, iddIndex, dispatchInfo, 0, wgInfo, wgInfo); HardwareParse hwParse; hwParse.parseCommands(commandStream, 0); auto itorWalker = find(hwParse.cmdList.begin(), hwParse.cmdList.end()); EXPECT_NE(hwParse.cmdList.end(), itorWalker); auto itorStatePrefetch = find(hwParse.cmdList.begin(), itorWalker); EXPECT_NE(itorWalker, itorStatePrefetch); auto statePrefetchCmd = genCmdCast(*itorStatePrefetch); EXPECT_NE(nullptr, statePrefetchCmd); EXPECT_EQ(mockKernel->kernelInfo.getGraphicsAllocation()->getGpuAddress(), statePrefetchCmd->getAddress()); EXPECT_TRUE(statePrefetchCmd->getKernelInstructionPrefetch()); } XE_HPC_CORETEST_F(MemoryPrefetchTestsXeHpcCore, givenPrefetchEnabledWhenEstimatingCommandsSizeThenAddStatePrefetch) { auto commandQueue = createCommandQueue(); size_t numPipeControls = MemorySynchronizationCommands::isPipeControlWArequired(clDevice->getHardwareInfo()) ? 2 : 1; size_t expected = sizeof(typename FamilyType::COMPUTE_WALKER) + (sizeof(typename FamilyType::PIPE_CONTROL) * numPipeControls) + HardwareCommandsHelper::getSizeRequiredCS() + EncodeMemoryPrefetch::getSizeForMemoryPrefetch(mockKernel->kernelInfo.heapInfo.KernelHeapSize); EXPECT_EQ(expected, EnqueueOperation::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *commandQueue, mockKernel->mockKernel, {})); } using ProgramWalkerTestsXeHpcCore = EnqueueFixtureXeHpcCore; XE_HPC_CORETEST_F(ProgramWalkerTestsXeHpcCore, givenDebugVariableSetWhenProgrammingWalkerThenSetL3Prefetch) { using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; auto commandQueue = createCommandQueue(); auto &commandStream = commandQueue->getCS(1024); auto &heap = commandQueue->getIndirectHeap(IndirectHeap::Type::DYNAMIC_STATE, 1); size_t workSize[] = {1, 1, 1}; Vec3 wgInfo = {1, 1, 1}; uint32_t iddIndex = 0; size_t commandsOffset = 0; { // default HardwareInterface::programWalker(commandStream, *mockKernel->mockKernel, *commandQueue, nullptr, heap, heap, heap, workSize, workSize, PreemptionMode::Disabled, 0, iddIndex, dispatchInfo, 0, wgInfo, wgInfo); HardwareParse hwParse; hwParse.parseCommands(commandStream, 0); auto itorWalker = find(hwParse.cmdList.begin(), hwParse.cmdList.end()); EXPECT_NE(hwParse.cmdList.end(), itorWalker); auto walkerCmd = genCmdCast(*itorWalker); EXPECT_NE(nullptr, walkerCmd); EXPECT_FALSE(walkerCmd->getL3PrefetchDisable()); } { // debug flag == 1 commandsOffset = commandStream.getUsed(); DebugManager.flags.ForceL3PrefetchForComputeWalker.set(1); HardwareInterface::programWalker(commandStream, *mockKernel->mockKernel, *commandQueue, nullptr, heap, heap, heap, workSize, workSize, PreemptionMode::Disabled, 0, iddIndex, dispatchInfo, 0, wgInfo, wgInfo); HardwareParse hwParse; hwParse.parseCommands(commandStream, commandsOffset); auto itorWalker = find(hwParse.cmdList.begin(), hwParse.cmdList.end()); EXPECT_NE(hwParse.cmdList.end(), itorWalker); auto walkerCmd = genCmdCast(*itorWalker); EXPECT_NE(nullptr, walkerCmd); EXPECT_FALSE(walkerCmd->getL3PrefetchDisable()); } { // debug flag == 0 commandsOffset = commandStream.getUsed(); DebugManager.flags.ForceL3PrefetchForComputeWalker.set(0); HardwareInterface::programWalker(commandStream, *mockKernel->mockKernel, *commandQueue, nullptr, heap, heap, heap, workSize, workSize, PreemptionMode::Disabled, 0, iddIndex, dispatchInfo, 0, wgInfo, wgInfo); HardwareParse hwParse; hwParse.parseCommands(commandStream, commandsOffset); auto itorWalker = find(hwParse.cmdList.begin(), hwParse.cmdList.end()); EXPECT_NE(hwParse.cmdList.end(), itorWalker); auto walkerCmd = genCmdCast(*itorWalker); EXPECT_NE(nullptr, walkerCmd); EXPECT_TRUE(walkerCmd->getL3PrefetchDisable()); } } compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpc_core/excludes_ocl_xe_hpc_core.cpp000066400000000000000000000172601422164147700316260ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" HWTEST_EXCLUDE_PRODUCT(BufferSetSurfaceTests, givenAlignedCacheableReadOnlyBufferThenChoseOclBufferPolicy, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemoryIsUnalignedToCachelineButReadOnlyThenL3CacheShouldBeStillOn, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(HwHelperTestXeHPAndLater, givenVariousCachesRequestProperMOCSIndexesAreBeingReturned, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(PipeControlHelperTestsXeHPAndLater, WhenAddingPipeControlWAThenCorrectCommandsAreProgrammed, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(QueueFamilyNameTest, givenRcsWhenGettingQueueFamilyNameThenReturnProperValue, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenVariousValuesWhenConvertingHwRevIdAndSteppingThenConversionIsCorrect, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(PipeControlHelperTests, givenHwHelperwhenAskingForDcFlushThenReturnTrue, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(MultiRootDeviceCommandStreamReceiverBufferTests, givenMultipleEventInMultiRootDeviceEnvironmentWhenTheyArePassedToEnqueueWithSubmissionThenCsIsWaitingForEventsFromPreviousDevices, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(EnqueueCopyBufferToImageStatelessTest, givenBigBufferWhenCopyingBufferToImageStatelessThenSuccessIsReturned, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(EnqueueCopyImageToBufferHwStatelessTest, givenBigBufferWhenCopyingImageToBufferStatelessThenSuccessIsReturned, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(BuiltInTests, givenBigOffsetAndSizeWhenBuilderCopyImageToBufferStatelessIsUsedThenParamsAreCorrect, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(DrmMemoryManagerTest, givenDrmAllocationWithHostPtrWhenItIsCreatedWithIncorrectCacheRegionThenReturnNull, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(DrmMemoryManagerTest, givenDrmAllocationWithWithAlignmentFromUserptrWhenItIsCreatedWithIncorrectCacheRegionThenReturnNull, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(ClDeviceHelperTests, givenDeviceWithoutClosBasedCacheReservationSupportWhenQueryingNumCacheClosDeviceInfoThenReturnZeroCacheClosRegions, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(HwHelperTest, whenGettingNumberOfCacheRegionsThenReturnZero, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(HwHelperTest, givenHwHelperWhenGettingISAPaddingThenCorrectValueIsReturned, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(LocalWorkSizeTest, givenDispatchInfoWhenWorkSizeInfoIsCreatedThenTestEuFusionFtr, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, whenCallingGetDeviceMemoryNameThenDdrIsReturned, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHardwareInfoWhenCallingIsMaxThreadsForWorkgroupWARequiredThenFalseIsReturned, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfPipeControlPriorToNonPipelinedStateCommandsWARequiredThenFalseIsReturned, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(CommandStreamReceiverFlushTaskDg2AndLaterTests, givenProgramExtendedPipeControlPriorToNonPipelinedStateCommandEnabledWhenPerDssBackedBufferThenThereIsPipeControlPriorToIt_MatcherIsRTCapable, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(CommandStreamReceiverHwTestDg2AndLater, givenGen12AndLaterWhenRayTracingEnabledThenCommandIsAddedToBatchBuffer_MatcherIsRTCapable, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(CommandStreamReceiverHwTestDg2AndLater, givenGen12AndLaterWhenRayTracingEnabledButAlreadySentThenCommandIsNotAddedToBatchBuffer_MatcherIsRTCapable, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfTile64With3DSurfaceOnBCSIsSupportedThenTrueIsReturned, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(HardwareCommandsTest, GivenVariousValuesWhenAlignSlmSizeIsCalledThenCorrectValueIsReturned, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(HardwareCommandsTest, GivenVariousValuesWhenComputeSlmSizeIsCalledThenCorrectValueIsReturned, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(XeHPAndLaterDeviceCapsTests, givenHwInfoWhenSlmSizeIsRequiredThenReturnCorrectValue, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(DeviceGetCapsTest, givenDeviceWhenAskingForSubGroupSizesThenReturnCorrectValues, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(HwHelperTestXeHPAndLater, givenAllFlagsSetWhenGetGpgpuEnginesThenReturnThreeRcsEnginesFourCcsEnginesAndOneBcsEngine, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(HwHelperTestXeHPAndLater, givenCcsDisabledWhenGetGpgpuEnginesThenReturnRcsAndOneBcsEngine, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(HwHelperTestXeHPAndLater, givenBcsDisabledWhenGetGpgpuEnginesThenReturnThreeRcsEnginesFourCcsEngines, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(HwHelperTestXeHPAndLater, givenCcsDisabledAndNumberOfCcsEnabledWhenGetGpgpuEnginesThenReturnRcsAndOneBcsEngine, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(HwHelperTest, givenDefaultHwHelperHwWhenMinimalSIMDSizeIsQueriedThen8IsReturned, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(PipeControlHelperTestsXeHPAndLater, WhenGettingSizeForAdditionalSynchronizationThenCorrectValueIsReturned, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(HwHelperTest, whenQueryingMaxNumSamplersThenReturnSixteen, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(MemoryManagerGetAlloctionDataTests, givenCommandBufferAllocationTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsRequested, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(MultiDeviceStorageInfoTest, givenSingleTileCsrWhenAllocatingCsrSpecificAllocationsThenStoreThemInSystemMemory, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(HwHelperTest, givenKernelInfoWhenCheckingRequiresAuxResolvesThenCorrectValuesAreReturned, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(HwHelperTest, givenHwHelperWhenGettingIfRevisionSpecificBinaryBuiltinIsRequiredThenFalseIsReturned, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(BuiltInSharedTest, GivenBuiltinTypeBinaryWhenGettingBuiltinResourceForNotRegisteredRevisionThenBuiltinFromDefaultRevisionIsTaken, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(EnqueueReadBufferRectStatefulTest, WhenReadingBufferRectStatefulThenSuccessIsReturned, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(EnqueueCopyBufferRectStateful, GivenValidParametersWhenCopyingBufferRectStatefulThenSuccessIsReturned, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(EnqueueSvmMemCopyHwTest, givenEnqueueSVMMemCopyWhenUsingCopyBufferToBufferStatefulBuilderThenSuccessIsReturned, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(EnqueueSvmMemFillHwTest, givenEnqueueSVMMemFillWhenUsingCopyBufferToBufferStatefulBuilderThenSuccessIsReturned, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(EnqueueFillBufferStatefullTest, givenBuffersWhenFillingBufferStatefullThenSuccessIsReturned, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(EnqueueCopyBufferStatefulTest, givenBuffersWhenCopyingBufferStatefulThenSuccessIsReturned, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(EnqueueWriteBufferStatefulTest, WhenWritingBufferStatefulThenSuccessIsReturned, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(EnqueueReadBufferStatefulTest, WhenReadingBufferStatefulThenSuccessIsReturned, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(EnqueueWriteBufferRectStatefulTest, WhenWritingBufferRectStatefulThenSuccessIsReturned, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(CommandStreamReceiverFlushTaskTests, givenOverrideThreadArbitrationPolicyDebugVariableSetWhenFlushingThenRequestRequiredMode, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(XeHPAndLaterAubCommandStreamReceiverWithoutFixtureTests, GivenCopyHostPtrAndHostNoAccessAndReadOnlyFlagsWhenAllocatingBufferThenAllocationIsCopiedToEveryTile, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(GetAllocationDataTestHw, givenRingBufferAllocationWhenGetAllocationDataIsCalledThenItHasProperFieldsSet, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(GetAllocationDataTestHw, givenSemaphoreBufferAllocationWhenGetAllocationDataIsCalledThenItHasProperFieldsSet, IGFX_XE_HPC_CORE); compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpc_core/hw_helper_tests_xe_hpc_core.cpp000066400000000000000000001360601422164147700323540ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/engine_node_helper.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/hw_helper_tests.h" #include "shared/test/common/helpers/ult_hw_config.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_cl_hw_helper.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_platform.h" using HwHelperTestsXeHpcCore = Test; XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenPvcThenAuxTranslationIsNotRequired) { auto &clHwHelper = ClHwHelper::get(renderCoreFamily); KernelInfo kernelInfo{}; EXPECT_FALSE(clHwHelper.requiresAuxResolves(kernelInfo, *defaultHwInfo)); } XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenHwHelperwhenAskingForDcFlushThenReturnFalse) { EXPECT_FALSE(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo)); } XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenCommandBufferAllocationTypeWhenGetAllocationDataIsCalledThenLocalMemoryIsRequested) { AllocationData allocData; AllocationProperties properties(mockRootDeviceIndex, true, 10, AllocationType::COMMAND_BUFFER, false, mockDeviceBitfield); MockMemoryManager mockMemoryManager; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_FALSE(allocData.flags.useSystemMemory); } XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenSingleTileCsrWhenAllocatingCsrSpecificAllocationsThenStoreThemInProperMemoryPool) { const uint32_t numDevices = 4u; const uint32_t tileIndex = 2u; const DeviceBitfield singleTileMask{static_cast(1u << tileIndex)}; DebugManagerStateRestore restore; VariableBackup backup{&ultHwConfig}; ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false; DebugManager.flags.CreateMultipleSubDevices.set(numDevices); DebugManager.flags.EnableLocalMemory.set(true); initPlatform(); auto clDevice = platform()->getClDevice(0); auto hwInfo = clDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->getMutableHardwareInfo(); hwInfo->platform.usRevId = 0b111000; // not BD A0 auto commandStreamReceiver = clDevice->getSubDevice(tileIndex)->getDefaultEngine().commandStreamReceiver; auto &heap = commandStreamReceiver->getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, MemoryConstants::pageSize64k); auto heapAllocation = heap.getGraphicsAllocation(); if (commandStreamReceiver->canUse4GbHeaps) { EXPECT_EQ(AllocationType::INTERNAL_HEAP, heapAllocation->getAllocationType()); } else { EXPECT_EQ(AllocationType::LINEAR_STREAM, heapAllocation->getAllocationType()); } EXPECT_EQ(singleTileMask, heapAllocation->storageInfo.memoryBanks); commandStreamReceiver->ensureCommandBufferAllocation(heap, heap.getAvailableSpace() + 1, 0u); auto commandBufferAllocation = heap.getGraphicsAllocation(); EXPECT_EQ(AllocationType::COMMAND_BUFFER, commandBufferAllocation->getAllocationType()); EXPECT_NE(heapAllocation, commandBufferAllocation); EXPECT_EQ(commandBufferAllocation->getMemoryPool(), MemoryPool::LocalMemory); } XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenMultiTileCsrWhenAllocatingCsrSpecificAllocationsThenStoreThemInLocalMemoryPool) { const uint32_t numDevices = 4u; const DeviceBitfield tile0Mask{0x1}; DebugManagerStateRestore restore; VariableBackup backup{&ultHwConfig}; ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false; DebugManager.flags.CreateMultipleSubDevices.set(numDevices); DebugManager.flags.EnableLocalMemory.set(true); DebugManager.flags.OverrideLeastOccupiedBank.set(0u); initPlatform(); auto clDevice = platform()->getClDevice(0); auto hwInfo = clDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->getMutableHardwareInfo(); hwInfo->platform.usRevId = 0b111000; // not BD A0 auto commandStreamReceiver = clDevice->getDefaultEngine().commandStreamReceiver; auto &heap = commandStreamReceiver->getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, MemoryConstants::pageSize64k); auto heapAllocation = heap.getGraphicsAllocation(); if (commandStreamReceiver->canUse4GbHeaps) { EXPECT_EQ(AllocationType::INTERNAL_HEAP, heapAllocation->getAllocationType()); } else { EXPECT_EQ(AllocationType::LINEAR_STREAM, heapAllocation->getAllocationType()); } EXPECT_EQ(tile0Mask, heapAllocation->storageInfo.memoryBanks); commandStreamReceiver->ensureCommandBufferAllocation(heap, heap.getAvailableSpace() + 1, 0u); auto commandBufferAllocation = heap.getGraphicsAllocation(); EXPECT_EQ(AllocationType::COMMAND_BUFFER, commandBufferAllocation->getAllocationType()); EXPECT_NE(heapAllocation, commandBufferAllocation); EXPECT_EQ(commandBufferAllocation->getMemoryPool(), MemoryPool::LocalMemory); } XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenSingleTileBdA0CsrWhenAllocatingCsrSpecificAllocationsThenStoreThemInProperMemoryPool) { const uint32_t numDevices = 4u; const uint32_t tileIndex = 2u; const DeviceBitfield tile0Mask = 1; DebugManagerStateRestore restore; VariableBackup backup{&ultHwConfig}; ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false; DebugManager.flags.CreateMultipleSubDevices.set(numDevices); DebugManager.flags.EnableLocalMemory.set(true); initPlatform(); auto clDevice = platform()->getClDevice(0); auto hwInfo = clDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->getMutableHardwareInfo(); hwInfo->platform.usRevId = 0; // BD A0 auto commandStreamReceiver = clDevice->getSubDevice(tileIndex)->getDefaultEngine().commandStreamReceiver; auto &heap = commandStreamReceiver->getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT, MemoryConstants::pageSize64k); auto heapAllocation = heap.getGraphicsAllocation(); if (commandStreamReceiver->canUse4GbHeaps) { EXPECT_EQ(AllocationType::INTERNAL_HEAP, heapAllocation->getAllocationType()); } else { EXPECT_EQ(AllocationType::LINEAR_STREAM, heapAllocation->getAllocationType()); } EXPECT_EQ(tile0Mask, heapAllocation->storageInfo.memoryBanks); commandStreamReceiver->ensureCommandBufferAllocation(heap, heap.getAvailableSpace() + 1, 0u); auto commandBufferAllocation = heap.getGraphicsAllocation(); EXPECT_EQ(AllocationType::COMMAND_BUFFER, commandBufferAllocation->getAllocationType()); EXPECT_NE(heapAllocation, commandBufferAllocation); EXPECT_EQ(commandBufferAllocation->getMemoryPool(), MemoryPool::LocalMemory); } XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenPvcWhenAskedForMinimialSimdThen16IsReturned) { auto &helper = HwHelper::get(renderCoreFamily); EXPECT_EQ(16u, helper.getMinimalSIMDSize()); } XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, whenQueryingMaxNumSamplersThenReturnZero) { auto &helper = HwHelper::get(renderCoreFamily); EXPECT_EQ(0u, helper.getMaxNumSamplers()); } XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenRevisionEnumAndPlatformFamilyTypeThenProperValueForIsWorkaroundRequiredIsReturned) { uint32_t steppings[] = { REVISION_A0, REVISION_B, REVISION_C, REVISION_D, CommonConstants::invalidStepping, }; if (hardwareInfo.platform.eProductFamily != IGFX_PVC) { GTEST_SKIP(); } const auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); const auto &hwInfoConfig = *HwInfoConfig::get(hardwareInfo.platform.eProductFamily); for (auto stepping : steppings) { hardwareInfo.platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(stepping, hardwareInfo); if (stepping == REVISION_A0) { EXPECT_TRUE(hwHelper.isWorkaroundRequired(REVISION_A0, REVISION_B, hardwareInfo)); EXPECT_TRUE(hwHelper.isWorkaroundRequired(REVISION_A0, REVISION_C, hardwareInfo)); EXPECT_FALSE(hwHelper.isWorkaroundRequired(REVISION_B, REVISION_C, hardwareInfo)); } else if (stepping == REVISION_B) { EXPECT_FALSE(hwHelper.isWorkaroundRequired(REVISION_A0, REVISION_B, hardwareInfo)); EXPECT_TRUE(hwHelper.isWorkaroundRequired(REVISION_A0, REVISION_C, hardwareInfo)); EXPECT_TRUE(hwHelper.isWorkaroundRequired(REVISION_B, REVISION_C, hardwareInfo)); } else { EXPECT_FALSE(hwHelper.isWorkaroundRequired(REVISION_A0, REVISION_B, hardwareInfo)); EXPECT_FALSE(hwHelper.isWorkaroundRequired(REVISION_A0, REVISION_C, hardwareInfo)); EXPECT_FALSE(hwHelper.isWorkaroundRequired(REVISION_B, REVISION_C, hardwareInfo)); } EXPECT_FALSE(hwHelper.isWorkaroundRequired(REVISION_B, REVISION_A0, hardwareInfo)); EXPECT_FALSE(hwHelper.isWorkaroundRequired(REVISION_C, REVISION_A0, hardwareInfo)); EXPECT_FALSE(hwHelper.isWorkaroundRequired(REVISION_C, REVISION_B, hardwareInfo)); EXPECT_FALSE(hwHelper.isWorkaroundRequired(REVISION_A0, REVISION_D, hardwareInfo)); EXPECT_FALSE(hwHelper.isWorkaroundRequired(REVISION_D, REVISION_A0, hardwareInfo)); EXPECT_FALSE(hwHelper.isWorkaroundRequired(REVISION_A0, REVISION_A1, hardwareInfo)); EXPECT_FALSE(hwHelper.isWorkaroundRequired(REVISION_A1, REVISION_A0, hardwareInfo)); } } XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, whenGetGpgpuEnginesThenReturnTwoCccsEnginesAndFourCcsEnginesAndLinkCopyEngines) { const size_t numEngines = 17; HardwareInfo hwInfo = *defaultHwInfo; hwInfo.featureTable.flags.ftrCCSNode = true; hwInfo.featureTable.ftrBcsInfo = maxNBitValue(9); hwInfo.capabilityTable.blitterOperationsSupported = true; hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_CCS; hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 4; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); EXPECT_EQ(numEngines, device->allEngines.size()); auto &engines = HwHelperHw::get().getGpgpuEngineInstances(device->getHardwareInfo()); EXPECT_EQ(numEngines, engines.size()); struct EnginePropertiesMap { aub_stream::EngineType engineType; bool isCcs; bool isBcs; }; const std::array enginePropertiesMap = {{ {aub_stream::ENGINE_CCS, true, false}, {aub_stream::ENGINE_CCS1, true, false}, {aub_stream::ENGINE_CCS2, true, false}, {aub_stream::ENGINE_CCS3, true, false}, {aub_stream::ENGINE_CCCS, false, false}, {aub_stream::ENGINE_CCS, true, false}, {aub_stream::ENGINE_CCS, true, false}, {aub_stream::ENGINE_BCS, false, true}, {aub_stream::ENGINE_BCS, false, true}, {aub_stream::ENGINE_BCS1, false, true}, {aub_stream::ENGINE_BCS2, false, true}, {aub_stream::ENGINE_BCS3, false, true}, {aub_stream::ENGINE_BCS4, false, true}, {aub_stream::ENGINE_BCS5, false, true}, {aub_stream::ENGINE_BCS6, false, true}, {aub_stream::ENGINE_BCS7, false, true}, {aub_stream::ENGINE_BCS8, false, true}, }}; for (size_t i = 0; i < numEngines; i++) { EXPECT_EQ(enginePropertiesMap[i].engineType, engines[i].first); EXPECT_EQ(enginePropertiesMap[i].isCcs, EngineHelpers::isCcs(enginePropertiesMap[i].engineType)); EXPECT_EQ(enginePropertiesMap[i].isBcs, EngineHelpers::isBcs(enginePropertiesMap[i].engineType)); } } XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, whenGetGpgpuEnginesThenReturnTwoCccsEnginesAndFourCcsEnginesAndEightLinkCopyEngines) { const size_t numEngines = 17; HardwareInfo hwInfo = *defaultHwInfo; hwInfo.featureTable.flags.ftrCCSNode = true; hwInfo.featureTable.ftrBcsInfo = maxNBitValue(9); hwInfo.capabilityTable.blitterOperationsSupported = true; hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_CCS; hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 4; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); EXPECT_EQ(numEngines, device->allEngines.size()); auto &engines = HwHelperHw::get().getGpgpuEngineInstances(device->getHardwareInfo()); EXPECT_EQ(numEngines, engines.size()); struct EnginePropertiesMap { aub_stream::EngineType engineType; bool isCcs; bool isBcs; }; const std::array enginePropertiesMap = {{ {aub_stream::ENGINE_CCS, true, false}, {aub_stream::ENGINE_CCS1, true, false}, {aub_stream::ENGINE_CCS2, true, false}, {aub_stream::ENGINE_CCS3, true, false}, {aub_stream::ENGINE_CCCS, false, false}, {aub_stream::ENGINE_CCS, true, false}, {aub_stream::ENGINE_CCS, true, false}, {aub_stream::ENGINE_BCS, false, true}, {aub_stream::ENGINE_BCS, false, true}, {aub_stream::ENGINE_BCS1, false, true}, {aub_stream::ENGINE_BCS2, false, true}, {aub_stream::ENGINE_BCS3, false, true}, {aub_stream::ENGINE_BCS4, false, true}, {aub_stream::ENGINE_BCS5, false, true}, {aub_stream::ENGINE_BCS6, false, true}, {aub_stream::ENGINE_BCS7, false, true}, {aub_stream::ENGINE_BCS8, false, true}, }}; for (size_t i = 0; i < numEngines; i++) { EXPECT_EQ(enginePropertiesMap[i].engineType, engines[i].first); EXPECT_EQ(enginePropertiesMap[i].isCcs, EngineHelpers::isCcs(enginePropertiesMap[i].engineType)); EXPECT_EQ(enginePropertiesMap[i].isBcs, EngineHelpers::isBcs(enginePropertiesMap[i].engineType)); } } XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenCccsAsDefaultEngineWhenGetEnginesCalledThenChangeDefaultEngine) { const size_t numEngines = 17; HardwareInfo hwInfo = *defaultHwInfo; hwInfo.featureTable.flags.ftrCCSNode = true; hwInfo.featureTable.ftrBcsInfo = maxNBitValue(9); hwInfo.capabilityTable.blitterOperationsSupported = true; hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_CCCS; hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 4; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); EXPECT_EQ(numEngines, device->allEngines.size()); auto &engines = HwHelperHw::get().getGpgpuEngineInstances(device->getHardwareInfo()); EXPECT_EQ(numEngines, engines.size()); struct EnginePropertiesMap { aub_stream::EngineType engineType; bool isCcs; bool isBcs; }; const std::array enginePropertiesMap = {{ {aub_stream::ENGINE_CCS, true, false}, {aub_stream::ENGINE_CCS1, true, false}, {aub_stream::ENGINE_CCS2, true, false}, {aub_stream::ENGINE_CCS3, true, false}, {aub_stream::ENGINE_CCCS, false, false}, {aub_stream::ENGINE_CCCS, false, false}, {aub_stream::ENGINE_CCCS, false, false}, {aub_stream::ENGINE_BCS, false, true}, {aub_stream::ENGINE_BCS, false, true}, {aub_stream::ENGINE_BCS1, false, true}, {aub_stream::ENGINE_BCS2, false, true}, {aub_stream::ENGINE_BCS3, false, true}, {aub_stream::ENGINE_BCS4, false, true}, {aub_stream::ENGINE_BCS5, false, true}, {aub_stream::ENGINE_BCS6, false, true}, {aub_stream::ENGINE_BCS7, false, true}, {aub_stream::ENGINE_BCS8, false, true}, }}; for (size_t i = 0; i < numEngines; i++) { EXPECT_EQ(enginePropertiesMap[i].engineType, engines[i].first); EXPECT_EQ(enginePropertiesMap[i].isCcs, EngineHelpers::isCcs(enginePropertiesMap[i].engineType)); EXPECT_EQ(enginePropertiesMap[i].isBcs, EngineHelpers::isBcs(enginePropertiesMap[i].engineType)); } } XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenOneCcsEnabledWhenGetEnginesCalledThenCreateOnlyOneCcs) { const size_t numEngines = 14; HardwareInfo hwInfo = *defaultHwInfo; hwInfo.featureTable.flags.ftrCCSNode = true; hwInfo.featureTable.ftrBcsInfo = maxNBitValue(9); hwInfo.capabilityTable.blitterOperationsSupported = true; hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_CCS; hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 1; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); EXPECT_EQ(numEngines, device->allEngines.size()); auto &engines = HwHelperHw::get().getGpgpuEngineInstances(device->getHardwareInfo()); EXPECT_EQ(numEngines, engines.size()); struct EnginePropertiesMap { aub_stream::EngineType engineType; bool isCcs; bool isBcs; }; const std::array enginePropertiesMap = {{ {aub_stream::ENGINE_CCS, true, false}, {aub_stream::ENGINE_CCCS, false, false}, {aub_stream::ENGINE_CCS, true, false}, {aub_stream::ENGINE_CCS, true, false}, {aub_stream::ENGINE_BCS, false, true}, {aub_stream::ENGINE_BCS, false, true}, {aub_stream::ENGINE_BCS1, false, true}, {aub_stream::ENGINE_BCS2, false, true}, {aub_stream::ENGINE_BCS3, false, true}, {aub_stream::ENGINE_BCS4, false, true}, {aub_stream::ENGINE_BCS5, false, true}, {aub_stream::ENGINE_BCS6, false, true}, {aub_stream::ENGINE_BCS7, false, true}, {aub_stream::ENGINE_BCS8, false, true}, }}; for (size_t i = 0; i < numEngines; i++) { EXPECT_EQ(enginePropertiesMap[i].engineType, engines[i].first); EXPECT_EQ(enginePropertiesMap[i].isCcs, EngineHelpers::isCcs(enginePropertiesMap[i].engineType)); EXPECT_EQ(enginePropertiesMap[i].isBcs, EngineHelpers::isBcs(enginePropertiesMap[i].engineType)); } } XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenNotAllCopyEnginesWhenSettingEngineTableThenDontAddUnsupported) { const size_t numEngines = 9; HardwareInfo hwInfo = *defaultHwInfo; hwInfo.featureTable.flags.ftrCCSNode = true; hwInfo.featureTable.ftrBcsInfo = maxNBitValue(9); hwInfo.featureTable.ftrBcsInfo.set(0, false); hwInfo.featureTable.ftrBcsInfo.set(2, false); hwInfo.featureTable.ftrBcsInfo.set(7, false); hwInfo.featureTable.ftrBcsInfo.set(8, false); hwInfo.capabilityTable.blitterOperationsSupported = true; hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_CCS; hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 1; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); EXPECT_EQ(numEngines, device->allEngines.size()); auto &engines = HwHelperHw::get().getGpgpuEngineInstances(device->getHardwareInfo()); EXPECT_EQ(numEngines, engines.size()); struct EnginePropertiesMap { aub_stream::EngineType engineType; bool isCcs; bool isBcs; }; const std::array enginePropertiesMap = {{ {aub_stream::ENGINE_CCS, true, false}, {aub_stream::ENGINE_CCCS, false, false}, {aub_stream::ENGINE_CCS, true, false}, {aub_stream::ENGINE_CCS, true, false}, {aub_stream::ENGINE_BCS1, false, true}, {aub_stream::ENGINE_BCS3, false, true}, {aub_stream::ENGINE_BCS4, false, true}, {aub_stream::ENGINE_BCS5, false, true}, {aub_stream::ENGINE_BCS6, false, true}, }}; for (size_t i = 0; i < numEngines; i++) { EXPECT_EQ(enginePropertiesMap[i].engineType, engines[i].first); EXPECT_EQ(enginePropertiesMap[i].isCcs, EngineHelpers::isCcs(enginePropertiesMap[i].engineType)); EXPECT_EQ(enginePropertiesMap[i].isBcs, EngineHelpers::isBcs(enginePropertiesMap[i].engineType)); } } XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenOneBcsEnabledWhenGetEnginesCalledThenCreateOnlyOneBcs) { const size_t numEngines = 9; HardwareInfo hwInfo = *defaultHwInfo; hwInfo.featureTable.flags.ftrCCSNode = true; hwInfo.featureTable.ftrBcsInfo = 1; hwInfo.capabilityTable.blitterOperationsSupported = true; hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_CCS; hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 4; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); EXPECT_EQ(numEngines, device->allEngines.size()); auto &engines = HwHelperHw::get().getGpgpuEngineInstances(device->getHardwareInfo()); EXPECT_EQ(numEngines, engines.size()); struct EnginePropertiesMap { aub_stream::EngineType engineType; bool isCcs; bool isBcs; }; const std::array enginePropertiesMap = {{ {aub_stream::ENGINE_CCS, true, false}, {aub_stream::ENGINE_CCS1, true, false}, {aub_stream::ENGINE_CCS2, true, false}, {aub_stream::ENGINE_CCS3, true, false}, {aub_stream::ENGINE_CCCS, false, false}, {aub_stream::ENGINE_CCS, true, false}, {aub_stream::ENGINE_CCS, true, false}, {aub_stream::ENGINE_BCS, false, true}, {aub_stream::ENGINE_BCS, false, true}, }}; for (size_t i = 0; i < numEngines; i++) { EXPECT_EQ(enginePropertiesMap[i].engineType, engines[i].first); EXPECT_EQ(enginePropertiesMap[i].isCcs, EngineHelpers::isCcs(enginePropertiesMap[i].engineType)); EXPECT_EQ(enginePropertiesMap[i].isBcs, EngineHelpers::isBcs(enginePropertiesMap[i].engineType)); } } XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenBcsDisabledWhenGetEnginesCalledThenDontCreateAnyBcs) { const size_t numEngines = 7; HardwareInfo hwInfo = *defaultHwInfo; hwInfo.featureTable.flags.ftrCCSNode = true; hwInfo.featureTable.ftrBcsInfo = 0; hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_CCS; hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 4; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); EXPECT_EQ(numEngines, device->allEngines.size()); auto &engines = HwHelperHw::get().getGpgpuEngineInstances(device->getHardwareInfo()); EXPECT_EQ(numEngines, engines.size()); struct EnginePropertiesMap { aub_stream::EngineType engineType; bool isCcs; bool isBcs; }; const std::array enginePropertiesMap = {{ {aub_stream::ENGINE_CCS, true, false}, {aub_stream::ENGINE_CCS1, true, false}, {aub_stream::ENGINE_CCS2, true, false}, {aub_stream::ENGINE_CCS3, true, false}, {aub_stream::ENGINE_CCCS, false, false}, {aub_stream::ENGINE_CCS, true, false}, {aub_stream::ENGINE_CCS, true, false}, }}; for (size_t i = 0; i < numEngines; i++) { EXPECT_EQ(enginePropertiesMap[i].engineType, engines[i].first); EXPECT_EQ(enginePropertiesMap[i].isCcs, EngineHelpers::isCcs(enginePropertiesMap[i].engineType)); EXPECT_EQ(enginePropertiesMap[i].isBcs, EngineHelpers::isBcs(enginePropertiesMap[i].engineType)); } } XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenCcsDisabledAndNumberOfCcsEnabledWhenGetGpgpuEnginesThenReturnCccsEngines) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.featureTable.flags.ftrCCSNode = false; hwInfo.featureTable.ftrBcsInfo = 0; hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_CCCS; hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 4; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); EXPECT_EQ(3u, device->allEngines.size()); auto &engines = HwHelperHw::get().getGpgpuEngineInstances(hwInfo); EXPECT_EQ(3u, engines.size()); EXPECT_EQ(aub_stream::ENGINE_CCCS, engines[0].first); EXPECT_EQ(aub_stream::ENGINE_CCCS, engines[1].first); EXPECT_EQ(aub_stream::ENGINE_CCCS, engines[2].first); } XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenCcsDisabledWhenGetGpgpuEnginesThenReturnCccsEngines) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.featureTable.flags.ftrCCSNode = false; hwInfo.featureTable.ftrBcsInfo = 0; hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_CCCS; hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 0; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); EXPECT_EQ(3u, device->allEngines.size()); auto &engines = HwHelperHw::get().getGpgpuEngineInstances(hwInfo); EXPECT_EQ(3u, engines.size()); EXPECT_EQ(aub_stream::ENGINE_CCCS, engines[0].first); EXPECT_EQ(aub_stream::ENGINE_CCCS, engines[1].first); EXPECT_EQ(aub_stream::ENGINE_CCCS, engines[2].first); } XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, whenNonBcsEngineIsVerifiedThenReturnFalse) { EXPECT_FALSE(EngineHelpers::isBcs(static_cast(aub_stream::ENGINE_BCS8 + 1))); } XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, whenPipecontrolWaIsProgrammedThenFlushL1Cache) { DebugManagerStateRestore restorer; DebugManager.flags.DisablePipeControlPrecedingPostSyncCommand.set(1); using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; uint32_t buffer[64] = {}; LinearStream cmdStream(buffer, sizeof(buffer)); uint64_t gpuAddress = 0x1234; MemorySynchronizationCommands::addPipeControlWA(cmdStream, gpuAddress, *defaultHwInfo); auto pipeControl = genCmdCast(buffer); ASSERT_NE(nullptr, pipeControl); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); EXPECT_TRUE(pipeControl->getHdcPipelineFlush()); EXPECT_TRUE(pipeControl->getUnTypedDataPortCacheFlush()); } XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenHwHelperWhenAskedIfFenceAllocationRequiredThenReturnCorrectValue) { DebugManagerStateRestore dbgRestore; auto hwInfo = *defaultHwInfo; auto &helper = HwHelper::get(renderCoreFamily); DebugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(-1); DebugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(-1); DebugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(-1); EXPECT_TRUE(helper.isFenceAllocationRequired(hwInfo)); DebugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0); DebugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0); DebugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0); EXPECT_FALSE(helper.isFenceAllocationRequired(hwInfo)); DebugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(1); DebugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0); DebugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0); EXPECT_TRUE(helper.isFenceAllocationRequired(hwInfo)); DebugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0); DebugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(1); DebugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0); EXPECT_TRUE(helper.isFenceAllocationRequired(hwInfo)); DebugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0); DebugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0); DebugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(1); EXPECT_TRUE(helper.isFenceAllocationRequired(hwInfo)); } XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenDefaultMemorySynchronizationCommandsWhenGettingSizeForAdditionalSynchronizationThenCorrectValueIsReturned) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; if (hardwareInfo.platform.eProductFamily != IGFX_PVC) { GTEST_SKIP(); } EXPECT_EQ(sizeof(MI_SEMAPHORE_WAIT), MemorySynchronizationCommands::getSizeForAdditonalSynchronization(*defaultHwInfo)); } XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenDebugMemorySynchronizationCommandsWhenGettingSizeForAdditionalSynchronizationThenCorrectValueIsReturned) { DebugManagerStateRestore restorer; DebugManager.flags.DisablePipeControlPrecedingPostSyncCommand.set(1); using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; if (hardwareInfo.platform.eProductFamily != IGFX_PVC) { GTEST_SKIP(); } EXPECT_EQ(2 * sizeof(MI_SEMAPHORE_WAIT), MemorySynchronizationCommands::getSizeForAdditonalSynchronization(*defaultHwInfo)); } XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenDontProgramGlobalFenceAsMiMemFenceCommandInCommandStreamWhenGettingSizeForAdditionalSynchronizationThenCorrectValueIsReturned) { DebugManagerStateRestore debugRestorer; DebugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0); using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto hardwareInfo = *defaultHwInfo; EXPECT_EQ(sizeof(MI_SEMAPHORE_WAIT), MemorySynchronizationCommands::getSizeForAdditonalSynchronization(hardwareInfo)); } XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenProgramGlobalFenceAsMiMemFenceCommandInCommandStreamWhenGettingSizeForAdditionalSynchronizationThenCorrectValueIsReturned) { DebugManagerStateRestore debugRestorer; DebugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(1); using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE; auto hardwareInfo = *defaultHwInfo; EXPECT_EQ(sizeof(MI_MEM_FENCE), MemorySynchronizationCommands::getSizeForAdditonalSynchronization(hardwareInfo)); } XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenMemorySynchronizationCommandsWhenAddingSynchronizationThenCorrectMethodIsUsed) { using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; struct { unsigned short revisionId; int32_t programGlobalFenceAsMiMemFenceCommandInCommandStream; bool expectMiSemaphoreWait; } testInputs[] = { {0x0, -1, true}, {0x3, -1, false}, {0x0, 0, true}, {0x3, 0, true}, {0x0, 1, false}, {0x3, 1, false}, }; DebugManagerStateRestore debugRestorer; auto hardwareInfo = *defaultHwInfo; if (hardwareInfo.platform.eProductFamily != IGFX_PVC) { GTEST_SKIP(); } hardwareInfo.featureTable.flags.ftrLocalMemory = true; uint8_t buffer[128] = {}; uint64_t gpuAddress = 0x12345678; for (auto &testInput : testInputs) { hardwareInfo.platform.usRevId = testInput.revisionId; DebugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set( testInput.programGlobalFenceAsMiMemFenceCommandInCommandStream); LinearStream commandStream(buffer, 128); auto synchronizationSize = MemorySynchronizationCommands::getSizeForSingleAdditionalSynchronization(hardwareInfo); MemorySynchronizationCommands::addAdditionalSynchronization(commandStream, gpuAddress, false, hardwareInfo); HardwareParse hwParser; hwParser.parseCommands(commandStream); EXPECT_EQ(1u, hwParser.cmdList.size()); if (testInput.expectMiSemaphoreWait) { EXPECT_EQ(sizeof(MI_SEMAPHORE_WAIT), synchronizationSize); auto semaphoreCmd = genCmdCast(*hwParser.cmdList.begin()); ASSERT_NE(nullptr, semaphoreCmd); EXPECT_EQ(static_cast(-2), semaphoreCmd->getSemaphoreDataDword()); EXPECT_EQ(gpuAddress, semaphoreCmd->getSemaphoreGraphicsAddress()); EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, semaphoreCmd->getCompareOperation()); } else { EXPECT_EQ(sizeof(MI_MEM_FENCE), synchronizationSize); auto fenceCmd = genCmdCast(*hwParser.cmdList.begin()); ASSERT_NE(nullptr, fenceCmd); EXPECT_EQ(MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE, fenceCmd->getAFenceType()); } } } XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenHwHelperWhenGettingThreadsPerEUConfigsThenCorrectConfigsAreReturned) { auto &helper = HwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily); EXPECT_NE(nullptr, &helper); auto &configs = helper.getThreadsPerEUConfigs(); EXPECT_EQ(2U, configs.size()); EXPECT_EQ(4U, configs[0]); EXPECT_EQ(8U, configs[1]); } XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenDefaultHwHelperHwWhenGettingIsBlitCopyRequiredForLocalMemoryThenFalseIsReturned) { auto &helper = HwHelper::get(renderCoreFamily); MockGraphicsAllocation allocation; allocation.overrideMemoryPool(MemoryPool::LocalMemory); allocation.setAllocationType(AllocationType::BUFFER_HOST_MEMORY); EXPECT_FALSE(helper.isBlitCopyRequiredForLocalMemory(*defaultHwInfo, allocation)); } XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenNonTile0AccessWhenGettingIsBlitCopyRequiredForLocalMemoryThenTrueIsReturned) { auto &helper = HwHelper::get(renderCoreFamily); HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.blitterOperationsSupported = true; MockGraphicsAllocation graphicsAllocation; graphicsAllocation.setAllocationType(AllocationType::BUFFER_HOST_MEMORY); EXPECT_TRUE(GraphicsAllocation::isLockable(graphicsAllocation.getAllocationType())); graphicsAllocation.overrideMemoryPool(MemoryPool::LocalMemory); hwInfo.platform.usRevId = FamilyType::pvcBaseDieA0Masked; graphicsAllocation.storageInfo.cloningOfPageTables = false; graphicsAllocation.storageInfo.memoryBanks = 0b11; EXPECT_TRUE(helper.isBlitCopyRequiredForLocalMemory(hwInfo, graphicsAllocation)); graphicsAllocation.storageInfo.memoryBanks = 0b10; EXPECT_TRUE(helper.isBlitCopyRequiredForLocalMemory(hwInfo, graphicsAllocation)); { VariableBackup revisionId{&hwInfo.platform.usRevId}; revisionId = FamilyType::pvcBaseDieA0Masked ^ FamilyType::pvcBaseDieRevMask; EXPECT_FALSE(helper.isBlitCopyRequiredForLocalMemory(hwInfo, graphicsAllocation)); } { VariableBackup cloningOfPageTables{&graphicsAllocation.storageInfo.cloningOfPageTables}; cloningOfPageTables = true; EXPECT_TRUE(helper.isBlitCopyRequiredForLocalMemory(hwInfo, graphicsAllocation)); } { VariableBackup memoryBanks{&graphicsAllocation.storageInfo.memoryBanks}; memoryBanks = 0b1; EXPECT_FALSE(helper.isBlitCopyRequiredForLocalMemory(hwInfo, graphicsAllocation)); } } XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenCCCSEngineAndRevisionBWhenCallingIsCooperativeDispatchSupportedThenFalseIsReturned) { auto &helper = HwHelper::get(renderCoreFamily); auto context = new NEO::MockContext(pClDevice); auto commandQueue = reinterpret_cast(new MockCommandQueueHw(context, pClDevice, 0)); auto engineGroupType = helper.getEngineGroupType(commandQueue->getGpgpuEngine().getEngineType(), commandQueue->getGpgpuEngine().getEngineUsage(), hardwareInfo); auto retVal = helper.isCooperativeDispatchSupported(engineGroupType, hardwareInfo); EXPECT_TRUE(retVal); auto &hwConfig = *HwInfoConfig::get(hardwareInfo.platform.eProductFamily); hardwareInfo.platform.usRevId = hwConfig.getHwRevIdFromStepping(REVISION_B, hardwareInfo); retVal = helper.isCooperativeDispatchSupported(engineGroupType, hardwareInfo); EXPECT_FALSE(retVal); commandQueue->release(); context->decRefInternal(); } XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenCCSEngineWhenCallingIsCooperativeDispatchSupportedThenTrueIsReturned) { auto &helper = HwHelper::get(renderCoreFamily); auto hwInfo = *defaultHwInfo; uint64_t hwInfoConfig = defaultHardwareInfoConfigTable[productFamily]; hardwareInfoSetup[productFamily](&hwInfo, true, hwInfoConfig); auto device = MockDevice::createWithNewExecutionEnvironment(&hwInfo); ASSERT_NE(nullptr, device); auto clDevice = new MockClDevice{device}; ASSERT_NE(nullptr, clDevice); auto context = new NEO::MockContext(clDevice); auto commandQueue = reinterpret_cast(new MockCommandQueueHw(context, clDevice, 0)); auto engineGroupType = helper.getEngineGroupType(commandQueue->getGpgpuEngine().getEngineType(), commandQueue->getGpgpuEngine().getEngineUsage(), hardwareInfo); auto retVal = helper.isCooperativeDispatchSupported(engineGroupType, hwInfo); ASSERT_TRUE(retVal); commandQueue->release(); context->decRefInternal(); delete clDevice; } using HwInfoConfigTestXeHpcCore = ::testing::Test; XE_HPC_CORETEST_F(HwInfoConfigTestXeHpcCore, givenDebugVariableSetWhenConfigureIsCalledThenSetupBlitterOperationsSupportedFlag) { DebugManagerStateRestore restore; auto hwInfoConfig = HwInfoConfig::get(productFamily); HardwareInfo hwInfo = *defaultHwInfo; DebugManager.flags.EnableBlitterOperationsSupport.set(0); hwInfoConfig->configureHardwareCustom(&hwInfo, nullptr); EXPECT_FALSE(hwInfo.capabilityTable.blitterOperationsSupported); DebugManager.flags.EnableBlitterOperationsSupport.set(1); hwInfoConfig->configureHardwareCustom(&hwInfo, nullptr); EXPECT_TRUE(hwInfo.capabilityTable.blitterOperationsSupported); } XE_HPC_CORETEST_F(HwInfoConfigTestXeHpcCore, givenMultitileConfigWhenConfiguringHwInfoThenEnableBlitter) { auto hwInfoConfig = HwInfoConfig::get(productFamily); HardwareInfo hwInfo = *defaultHwInfo; for (uint32_t tileCount = 0; tileCount <= 4; tileCount++) { hwInfo.gtSystemInfo.MultiTileArchInfo.TileCount = tileCount; hwInfoConfig->configureHardwareCustom(&hwInfo, nullptr); EXPECT_EQ(true, hwInfo.capabilityTable.blitterOperationsSupported); } } using LriHelperTestsXeHpcCore = ::testing::Test; XE_HPC_CORETEST_F(LriHelperTestsXeHpcCore, whenProgrammingLriCommandThenExpectMmioRemapEnableCorrectlySet) { using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; std::unique_ptr buffer(new uint8_t[128]); LinearStream stream(buffer.get(), 128); uint32_t address = 0x8888; uint32_t data = 0x1234; auto expectedLri = FamilyType::cmdInitLoadRegisterImm; EXPECT_FALSE(expectedLri.getMmioRemapEnable()); expectedLri.setRegisterOffset(address); expectedLri.setDataDword(data); expectedLri.setMmioRemapEnable(true); LriHelper::program(&stream, address, data, true); MI_LOAD_REGISTER_IMM *lri = genCmdCast(buffer.get()); ASSERT_NE(nullptr, lri); EXPECT_EQ(sizeof(MI_LOAD_REGISTER_IMM), stream.getUsed()); EXPECT_EQ(lri, stream.getCpuBase()); EXPECT_TRUE(memcmp(lri, &expectedLri, sizeof(MI_LOAD_REGISTER_IMM)) == 0); } XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenCccsDisabledWhenGetGpgpuEnginesCalledThenDontSetCccs) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.featureTable.flags.ftrCCSNode = true; hwInfo.featureTable.ftrBcsInfo = 1; hwInfo.featureTable.flags.ftrRcsNode = false; hwInfo.capabilityTable.blitterOperationsSupported = true; hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_CCS; hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 4; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); EXPECT_EQ(8u, device->allEngines.size()); auto &engines = HwHelperHw::get().getGpgpuEngineInstances(hwInfo); EXPECT_EQ(8u, engines.size()); EXPECT_EQ(aub_stream::ENGINE_CCS, engines[0].first); EXPECT_EQ(aub_stream::ENGINE_CCS1, engines[1].first); EXPECT_EQ(aub_stream::ENGINE_CCS2, engines[2].first); EXPECT_EQ(aub_stream::ENGINE_CCS3, engines[3].first); EXPECT_EQ(hwInfo.capabilityTable.defaultEngineType, engines[4].first); // low priority EXPECT_EQ(hwInfo.capabilityTable.defaultEngineType, engines[5].first); // internal EXPECT_EQ(aub_stream::ENGINE_BCS, engines[6].first); EXPECT_EQ(aub_stream::ENGINE_BCS, engines[7].first); } XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenCccsDisabledButDebugVariableSetWhenGetGpgpuEnginesCalledThenSetCccs) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.featureTable.flags.ftrCCSNode = true; hwInfo.featureTable.ftrBcsInfo = 1; hwInfo.featureTable.flags.ftrRcsNode = false; hwInfo.capabilityTable.blitterOperationsSupported = true; hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_CCS; hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 4; DebugManagerStateRestore restore; DebugManager.flags.NodeOrdinal.set(static_cast(aub_stream::EngineType::ENGINE_CCCS)); auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); EXPECT_EQ(9u, device->allEngines.size()); auto &engines = HwHelperHw::get().getGpgpuEngineInstances(hwInfo); EXPECT_EQ(9u, engines.size()); EXPECT_EQ(aub_stream::ENGINE_CCS, engines[0].first); EXPECT_EQ(aub_stream::ENGINE_CCS1, engines[1].first); EXPECT_EQ(aub_stream::ENGINE_CCS2, engines[2].first); EXPECT_EQ(aub_stream::ENGINE_CCS3, engines[3].first); EXPECT_EQ(aub_stream::ENGINE_CCCS, engines[4].first); EXPECT_EQ(aub_stream::ENGINE_CCCS, engines[5].first); // low priority EXPECT_EQ(aub_stream::ENGINE_CCCS, engines[6].first); // internal EXPECT_EQ(aub_stream::ENGINE_BCS, engines[7].first); // internal EXPECT_EQ(aub_stream::ENGINE_BCS, engines[8].first); } XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, WhenCheckingSipWAThenFalseIsReturned) { EXPECT_FALSE(HwHelper::get(renderCoreFamily).isSipWANeeded(*defaultHwInfo)); } XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, WhenCheckingPreferenceForBlitterForLocalToLocalTransfersThenReturnFalse) { EXPECT_FALSE(ClHwHelper::get(renderCoreFamily).preferBlitterForLocalToLocalTransfers()); } XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenBdA0WhenBcsSubDeviceSupportIsCheckedThenReturnFalse) { DebugManagerStateRestore restore; HardwareInfo hwInfo = *defaultHwInfo; auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); constexpr uint8_t bdRev[4] = {0, 0b111001, 0b101001, 0b000101}; for (int32_t debugFlag : {-1, 0, 1}) { DebugManager.flags.DoNotReportTile1BscWaActive.set(debugFlag); for (uint64_t subDevice = 0; subDevice < 4; subDevice++) { for (auto rev : bdRev) { hwInfo.platform.usRevId = rev; for (uint32_t engineType = 0; engineType < static_cast(aub_stream::EngineType::NUM_ENGINES); engineType++) { auto engineTypeT = static_cast(engineType); bool result = hwHelper.isSubDeviceEngineSupported(hwInfo, DeviceBitfield(1llu << subDevice), engineTypeT); bool affectedEngine = ((subDevice == 1) && (aub_stream::ENGINE_BCS == engineTypeT || aub_stream::ENGINE_BCS1 == engineTypeT || aub_stream::ENGINE_BCS3 == engineTypeT)); bool isBdA0 = ((rev & FamilyType::pvcBaseDieRevMask) == FamilyType::pvcBaseDieA0Masked); bool applyWa = affectedEngine; applyWa &= isBdA0 || (debugFlag == 1); applyWa &= (debugFlag != 0); EXPECT_EQ(!applyWa, result); } } } } } XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenBdA0WhenAllocatingOnNonTileZeroThenForceTile0) { DebugManagerStateRestore restore; HardwareInfo hwInfo = *defaultHwInfo; auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); constexpr uint8_t bdRev[4] = {0, 0b111001, 0b101001, 0b000101}; constexpr DeviceBitfield originalTileMasks[4] = {0b1, 0b11, 0b10, 0b1011}; constexpr DeviceBitfield tile0Mask = 1; constexpr DeviceBitfield allTilesMask = 0b1111; const AllocationProperties allocProperties(0, 1, AllocationType::UNKNOWN, allTilesMask); for (int32_t debugFlag : {-1, 0, 1}) { DebugManager.flags.ForceTile0PlacementForTile1ResourcesWaActive.set(debugFlag); for (auto rev : bdRev) { hwInfo.platform.usRevId = rev; bool isBdA0 = ((hwInfo.platform.usRevId & FamilyType::pvcBaseDieRevMask) == FamilyType::pvcBaseDieA0Masked); for (auto originalMask : originalTileMasks) { AllocationData allocData; allocData.flags.requiresCpuAccess = true; allocData.storageInfo.memoryBanks = originalMask; hwHelper.setExtraAllocationData(allocData, allocProperties, hwInfo); bool applyWa = (isBdA0 || (debugFlag == 1)); applyWa &= (debugFlag != 0); if (applyWa) { EXPECT_EQ(tile0Mask, allocData.storageInfo.memoryBanks); } else { EXPECT_EQ(originalMask, allocData.storageInfo.memoryBanks); } } } } } XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenCommandBufferAllocationWhenSetExtraAllocationDataThenUseSystemLocalMemoryOnlyForImplicitScalingCommandBuffers) { HardwareInfo hwInfo = *defaultHwInfo; auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); constexpr DeviceBitfield singleTileBitfield = 0b0100; constexpr DeviceBitfield allTilesBitfield = 0b1111; const AllocationProperties singleTileAllocProperties(0, 1, AllocationType::COMMAND_BUFFER, singleTileBitfield); const AllocationProperties allTilesAllocProperties(0, 1, AllocationType::COMMAND_BUFFER, allTilesBitfield); AllocationData allocData; allocData.flags.useSystemMemory = false; hwHelper.setExtraAllocationData(allocData, singleTileAllocProperties, hwInfo); EXPECT_FALSE(allocData.flags.useSystemMemory); hwHelper.setExtraAllocationData(allocData, allTilesAllocProperties, hwInfo); EXPECT_FALSE(allocData.flags.useSystemMemory); } XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, WhenGettingDeviceIpVersionThenMakeCorrectDeviceIpVersion) { EXPECT_EQ(ClHwHelperMock::makeDeviceIpVersion(12, 8, 1), ClHwHelper::get(renderCoreFamily).getDeviceIpVersion(*defaultHwInfo)); } XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, GivenRevisionIdWhenGetComputeUnitsUsedForScratchThenReturnValidValue) { auto &helper = HwHelper::get(renderCoreFamily); auto hwInfo = *defaultHwInfo; hwInfo.gtSystemInfo.EUCount *= 2; if (hwInfo.platform.eProductFamily != IGFX_PVC) { GTEST_SKIP(); } uint32_t expectedValue = hwInfo.gtSystemInfo.MaxSubSlicesSupported * hwInfo.gtSystemInfo.MaxEuPerSubSlice; struct { unsigned short revId; uint32_t expectedRatio; } testInputs[] = { {0x0, 8}, {0x1, 8}, {0x3, 16}, {0x5, 16}, {0x6, 16}, {0x7, 16}, }; for (auto &testInput : testInputs) { hwInfo.platform.usRevId = testInput.revId; EXPECT_EQ(expectedValue * testInput.expectedRatio, helper.getComputeUnitsUsedForScratch(&hwInfo)); } } compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpc_core/pvc/000077500000000000000000000000001422164147700246755ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpc_core/pvc/CMakeLists.txt000066400000000000000000000016571422164147700274460ustar00rootroot00000000000000# # Copyright (C) 2021-2022 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_PVC) set(IGDRCL_SRCS_tests_xe_hpc_core_pvc ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_tests_pvc.cpp ${CMAKE_CURRENT_SOURCE_DIR}/engine_node_helper_tests_pvc.cpp ${CMAKE_CURRENT_SOURCE_DIR}/get_device_info_pvc.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler_tests_pvc.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_pvc.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_hw_info_config_pvc.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_xe_hpc_core_pvc}) add_subdirectories() neo_copy_test_files_with_revision(copy_test_files_pvc_0 pvc 0) neo_copy_test_files_with_revision(copy_test_files_pvc_3 pvc 3) add_dependencies(copy_test_files_per_product copy_test_files_pvc_0) add_dependencies(copy_test_files_per_product copy_test_files_pvc_3) endif() command_stream_receiver_hw_tests_pvc.cpp000066400000000000000000000344521422164147700347770ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpc_core/pvc/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/command_stream/thread_arbitration_policy.h" #include "shared/source/gmm_helper/client_context/gmm_client_context.h" #include "shared/source/gmm_helper/resource_info.h" #include "shared/source/helpers/blit_commands_helper.h" #include "shared/source/helpers/engine_node_helper.h" #include "shared/source/helpers/preamble.h" #include "shared/source/os_interface/device_factory.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/libult/ult_aub_command_stream_receiver.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" using namespace NEO; using PvcCommandStreamReceiverFlushTaskTests = UltCommandStreamReceiverTest; PVCTEST_F(PvcCommandStreamReceiverFlushTaskTests, givenOverrideThreadArbitrationPolicyDebugVariableSetForPvcWhenFlushingThenRequestRequiredMode) { using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE; DebugManagerStateRestore restore; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); DebugManager.flags.OverrideThreadArbitrationPolicy.set(ThreadArbitrationPolicy::RoundRobin); EXPECT_EQ(-1, commandStreamReceiver.streamProperties.stateComputeMode.threadArbitrationPolicy.value); flushTask(commandStreamReceiver); EXPECT_EQ(ThreadArbitrationPolicy::RoundRobin, commandStreamReceiver.streamProperties.stateComputeMode.threadArbitrationPolicy.value); } PVCTEST_F(PvcCommandStreamReceiverFlushTaskTests, givenNotExistPolicyWhenFlushingThenDefaultPolicyIsProgrammed) { using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE; char buff[1024] = {0}; LinearStream stream(buff, 1024); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); int32_t notExistPolicy = -2; flushTaskFlags.threadArbitrationPolicy = notExistPolicy; flushTask(commandStreamReceiver); EXPECT_EQ(notExistPolicy, commandStreamReceiver.streamProperties.stateComputeMode.threadArbitrationPolicy.value); } PVCTEST_F(PvcCommandStreamReceiverFlushTaskTests, givenRevisionBAndAboveWhenLastSpecialPipelineSelectModeIsTrueAndFlushTaskIsCalledThenDontReprogramPipelineSelect) { auto hwInfo = pDevice->getRootDeviceEnvironment().getMutableHardwareInfo(); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); flushTaskFlags.pipelineSelectArgs.specialPipelineSelectMode = true; flushTaskFlags.pipelineSelectArgs.mediaSamplerRequired = false; struct { unsigned short revId; bool expectedValue; } testInputs[] = { {0x0, true}, {0x1, true}, {0x3, true}, {0x5, false}, {0x6, false}, {0x7, false}, }; for (auto &testInput : testInputs) { hwInfo->platform.usRevId = testInput.revId; commandStreamReceiver.isPreambleSent = true; commandStreamReceiver.lastMediaSamplerConfig = false; flushTask(commandStreamReceiver); EXPECT_EQ(testInput.expectedValue, commandStreamReceiver.lastSpecialPipelineSelectMode); commandStreamReceiver.lastSpecialPipelineSelectMode = false; } } struct PVcBcsTests : public UltCommandStreamReceiverTest { void SetUp() override { DebugManager.flags.EnableLocalMemory.set(true); UltCommandStreamReceiverTest::SetUp(); context = std::make_unique(pClDevice); } void TearDown() override { context.reset(); UltCommandStreamReceiverTest::TearDown(); } DebugManagerStateRestore restore; std::unique_ptr context; cl_int retVal = CL_SUCCESS; }; PVCTEST_F(PVcBcsTests, givenCompressibleBuffersWhenStatefulCompressionIsEnabledThenProgramBlitterWithStatefulCompressionSettings) { DebugManager.flags.RenderCompressedBuffersEnabled.set(1); using MEM_COPY = typename FamilyType::MEM_COPY; char buff[1024] = {0}; LinearStream stream(buff, 1024); MockGraphicsAllocation clearColorAlloc; auto srcBuffer = clUniquePtr(Buffer::create(context.get(), {}, MemoryConstants::pageSize64k, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); auto dstBuffer = clUniquePtr(Buffer::create(context.get(), {}, MemoryConstants::pageSize64k, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); auto srcAllocation = srcBuffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex()); EXPECT_TRUE(srcAllocation->getDefaultGmm()->isCompressionEnabled); auto dstAllocation = dstBuffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex()); EXPECT_TRUE(dstAllocation->getDefaultGmm()->isCompressionEnabled); auto blitProperties = BlitProperties::constructPropertiesForCopy(srcAllocation, dstAllocation, 0, 0, {BlitterConstants::maxBlitWidth - 1, 1, 1}, 0, 0, 0, 0, &clearColorAlloc); auto bltCmd = stream.getSpaceForCmd(); *bltCmd = FamilyType::cmdInitXyCopyBlt; platformsImpl->clear(); EXPECT_EQ(platform(), nullptr); const auto &rootDeviceEnvironment = context->getDevice(0)->getRootDeviceEnvironment(); BlitCommandsHelper::appendBlitCommandsForBuffer(blitProperties, *bltCmd, rootDeviceEnvironment); EXPECT_EQ(bltCmd->getDestinationCompressionEnable(), MEM_COPY::DESTINATION_COMPRESSION_ENABLE::DESTINATION_COMPRESSION_ENABLE_ENABLE); EXPECT_EQ(bltCmd->getDestinationCompressible(), MEM_COPY::DESTINATION_COMPRESSIBLE::DESTINATION_COMPRESSIBLE_COMPRESSIBLE); EXPECT_EQ(bltCmd->getSourceCompressible(), MEM_COPY::SOURCE_COMPRESSIBLE::SOURCE_COMPRESSIBLE_COMPRESSIBLE); auto resourceFormat = srcAllocation->getDefaultGmm()->gmmResourceInfo->getResourceFormat(); auto compressionFormat = rootDeviceEnvironment.getGmmClientContext()->getSurfaceStateCompressionFormat(resourceFormat); EXPECT_EQ(compressionFormat, bltCmd->getCompressionFormat()); } PVCTEST_F(PVcBcsTests, givenBufferInDeviceMemoryWhenStatelessCompressionIsEnabledThenBlitterForBufferUsesStatelessCompressedSettings) { using MEM_COPY = typename FamilyType::MEM_COPY; char buff[1024] = {0}; LinearStream stream(buff, 1024); MockGraphicsAllocation clearColorAlloc; auto buffer = clUniquePtr(Buffer::create(context.get(), {}, MemoryConstants::pageSize64k, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); auto allocation = buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex()); EXPECT_TRUE(!MemoryPool::isSystemMemoryPool(allocation->getMemoryPool())); auto blitProperties = BlitProperties::constructPropertiesForCopy(allocation, allocation, 0, 0, {BlitterConstants::maxBlitWidth - 1, 1, 1}, 0, 0, 0, 0, &clearColorAlloc); auto bltCmd = stream.getSpaceForCmd(); *bltCmd = FamilyType::cmdInitXyCopyBlt; DebugManager.flags.EnableStatelessCompressionWithUnifiedMemory.set(true); platformsImpl->clear(); EXPECT_EQ(platform(), nullptr); BlitCommandsHelper::appendBlitCommandsForBuffer(blitProperties, *bltCmd, context->getDevice(0)->getRootDeviceEnvironment()); EXPECT_EQ(bltCmd->getDestinationCompressionEnable(), MEM_COPY::DESTINATION_COMPRESSION_ENABLE::DESTINATION_COMPRESSION_ENABLE_ENABLE); EXPECT_EQ(bltCmd->getDestinationCompressible(), MEM_COPY::DESTINATION_COMPRESSIBLE::DESTINATION_COMPRESSIBLE_COMPRESSIBLE); EXPECT_EQ(bltCmd->getSourceCompressible(), MEM_COPY::SOURCE_COMPRESSIBLE::SOURCE_COMPRESSIBLE_COMPRESSIBLE); EXPECT_EQ(static_cast(DebugManager.flags.FormatForStatelessCompressionWithUnifiedMemory.get()), bltCmd->getCompressionFormat()); } PVCTEST_F(PVcBcsTests, givenBufferInSystemMemoryWhenStatelessCompressionIsEnabledThenBlitterForBufferDoesntUseStatelessCompressedSettings) { using MEM_COPY = typename FamilyType::MEM_COPY; char buff[1024] = {0}; LinearStream stream(buff, 1024); MockGraphicsAllocation clearColorAlloc; auto buffer = clUniquePtr(Buffer::create(context.get(), CL_MEM_FORCE_HOST_MEMORY_INTEL, MemoryConstants::pageSize64k, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); auto allocation = buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex()); EXPECT_TRUE(MemoryPool::isSystemMemoryPool(allocation->getMemoryPool())); auto blitProperties = BlitProperties::constructPropertiesForCopy(allocation, allocation, 0, 0, {BlitterConstants::maxBlitWidth - 1, 1, 1}, 0, 0, 0, 0, &clearColorAlloc); auto bltCmd = stream.getSpaceForCmd(); *bltCmd = FamilyType::cmdInitXyCopyBlt; DebugManager.flags.EnableStatelessCompressionWithUnifiedMemory.set(true); platformsImpl->clear(); EXPECT_EQ(platform(), nullptr); BlitCommandsHelper::appendBlitCommandsForBuffer(blitProperties, *bltCmd, context->getDevice(0)->getRootDeviceEnvironment()); EXPECT_EQ(bltCmd->getDestinationCompressionEnable(), MEM_COPY::DESTINATION_COMPRESSION_ENABLE::DESTINATION_COMPRESSION_ENABLE_DISABLE); EXPECT_EQ(bltCmd->getDestinationCompressible(), MEM_COPY::DESTINATION_COMPRESSIBLE::DESTINATION_COMPRESSIBLE_NOT_COMPRESSIBLE); EXPECT_EQ(bltCmd->getSourceCompressible(), MEM_COPY::SOURCE_COMPRESSIBLE::SOURCE_COMPRESSIBLE_NOT_COMPRESSIBLE); EXPECT_EQ(0u, bltCmd->getCompressionFormat()); } using PvcMultiRootDeviceCommandStreamReceiverBufferTests = MultiRootDeviceFixture; PVCTEST_F(PvcMultiRootDeviceCommandStreamReceiverBufferTests, givenMultipleEventInMultiRootDeviceEnvironmentOnPvcWhenTheyArePassedToEnqueueWithSubmissionThenCsIsWaitingForEventsFromPreviousDevices) { REQUIRE_SVM_OR_SKIP(device1); REQUIRE_SVM_OR_SKIP(device2); using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; cl_int retVal = 0; size_t offset = 0; size_t size = 1; auto pCmdQ1 = context.get()->getSpecialQueue(1u); auto pCmdQ2 = context.get()->getSpecialQueue(2u); std::unique_ptr program(Program::createBuiltInFromSource("FillBufferBytes", context.get(), context.get()->getDevices(), &retVal)); program->build(program->getDevices(), nullptr, false); std::unique_ptr kernel(Kernel::create(program.get(), program->getKernelInfoForKernel("FillBufferBytes"), *context.get()->getDevice(0), &retVal)); size_t svmSize = 4096; void *svmPtr = alignedMalloc(svmSize, MemoryConstants::pageSize); MockGraphicsAllocation svmAlloc(svmPtr, svmSize); Event event1(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 5, 15); Event event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, 6, 16); Event event3(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 4, 20); Event event4(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 3, 4); Event event5(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 2, 7); UserEvent userEvent1(&pCmdQ1->getContext()); UserEvent userEvent2(&pCmdQ2->getContext()); userEvent1.setStatus(CL_COMPLETE); userEvent2.setStatus(CL_COMPLETE); cl_event eventWaitList[] = { &event1, &event2, &event3, &event4, &event5, &userEvent1, &userEvent2, }; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); { kernel->setSvmKernelExecInfo(&svmAlloc); retVal = pCmdQ1->enqueueKernel( kernel.get(), 1, &offset, &size, &size, numEventsInWaitList, eventWaitList, nullptr); HardwareParse csHwParser; csHwParser.parseCommands(pCmdQ1->getCS(0)); auto semaphores = findAll(csHwParser.cmdList.begin(), csHwParser.cmdList.end()); EXPECT_EQ(3u, semaphores.size()); auto semaphoreCmd0 = genCmdCast(*(semaphores[0])); EXPECT_EQ(4u, semaphoreCmd0->getSemaphoreDataDword()); EXPECT_EQ(reinterpret_cast(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress()); auto semaphoreCmd1 = genCmdCast(*(semaphores[1])); EXPECT_EQ(7u, semaphoreCmd1->getSemaphoreDataDword()); EXPECT_EQ(reinterpret_cast(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress()); } { kernel->setSvmKernelExecInfo(&svmAlloc); retVal = pCmdQ2->enqueueKernel( kernel.get(), 1, &offset, &size, &size, numEventsInWaitList, eventWaitList, nullptr); HardwareParse csHwParser; csHwParser.parseCommands(pCmdQ2->getCS(0)); auto semaphores = findAll(csHwParser.cmdList.begin(), csHwParser.cmdList.end()); EXPECT_EQ(3u, semaphores.size()); auto semaphoreCmd0 = genCmdCast(*(semaphores[0])); EXPECT_EQ(15u, semaphoreCmd0->getSemaphoreDataDword()); EXPECT_EQ(reinterpret_cast(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress()); auto semaphoreCmd1 = genCmdCast(*(semaphores[1])); EXPECT_EQ(20u, semaphoreCmd1->getSemaphoreDataDword()); EXPECT_EQ(reinterpret_cast(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress()); } alignedFree(svmPtr); } compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpc_core/pvc/engine_node_helper_tests_pvc.cpp000066400000000000000000000053731422164147700333140ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/engine_node_helper.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" using namespace NEO; using EngineNodeHelperPvcTests = ::Test; PVCTEST_F(EngineNodeHelperPvcTests, WhenGetBcsEngineTypeIsCalledForPVCThenCorrectBcsEngineIsReturned) { using namespace aub_stream; auto pHwInfo = pDevice->getRootDeviceEnvironment().getMutableHardwareInfo(); auto deviceBitfield = pDevice->getDeviceBitfield(); pHwInfo->featureTable.ftrBcsInfo = 1; auto &selectorCopyEngine = pDevice->getNearestGenericSubDevice(0)->getSelectorCopyEngine(); selectorCopyEngine.isMainUsed.store(true); EXPECT_EQ(ENGINE_BCS, EngineHelpers::getBcsEngineType(*pHwInfo, deviceBitfield, selectorCopyEngine, false)); pHwInfo->featureTable.ftrBcsInfo = 0b111; EXPECT_EQ(ENGINE_BCS2, EngineHelpers::getBcsEngineType(*pHwInfo, deviceBitfield, selectorCopyEngine, false)); EXPECT_EQ(ENGINE_BCS1, EngineHelpers::getBcsEngineType(*pHwInfo, deviceBitfield, selectorCopyEngine, false)); EXPECT_EQ(ENGINE_BCS2, EngineHelpers::getBcsEngineType(*pHwInfo, deviceBitfield, selectorCopyEngine, false)); EXPECT_EQ(ENGINE_BCS1, EngineHelpers::getBcsEngineType(*pHwInfo, deviceBitfield, selectorCopyEngine, false)); pHwInfo->featureTable.ftrBcsInfo = 0b11; EXPECT_EQ(ENGINE_BCS1, EngineHelpers::getBcsEngineType(*pHwInfo, deviceBitfield, selectorCopyEngine, false)); EXPECT_EQ(ENGINE_BCS1, EngineHelpers::getBcsEngineType(*pHwInfo, deviceBitfield, selectorCopyEngine, false)); pHwInfo->featureTable.ftrBcsInfo = 0b101; EXPECT_EQ(ENGINE_BCS2, EngineHelpers::getBcsEngineType(*pHwInfo, deviceBitfield, selectorCopyEngine, false)); EXPECT_EQ(ENGINE_BCS2, EngineHelpers::getBcsEngineType(*pHwInfo, deviceBitfield, selectorCopyEngine, false)); } PVCTEST_F(EngineNodeHelperPvcTests, givenPvcBaseDieA0AndTile1WhenGettingBcsEngineTypeThenDoNotUseBcs1) { using namespace aub_stream; auto pHwInfo = pDevice->getRootDeviceEnvironment().getMutableHardwareInfo(); pHwInfo->featureTable.ftrBcsInfo = 0b11111; auto deviceBitfield = 0b10; auto &selectorCopyEngine = pDevice->getNearestGenericSubDevice(0)->getSelectorCopyEngine(); EXPECT_EQ(ENGINE_BCS2, EngineHelpers::getBcsEngineType(*pHwInfo, deviceBitfield, selectorCopyEngine, true)); EXPECT_EQ(ENGINE_BCS4, EngineHelpers::getBcsEngineType(*pHwInfo, deviceBitfield, selectorCopyEngine, true)); EXPECT_EQ(ENGINE_BCS2, EngineHelpers::getBcsEngineType(*pHwInfo, deviceBitfield, selectorCopyEngine, true)); EXPECT_EQ(ENGINE_BCS4, EngineHelpers::getBcsEngineType(*pHwInfo, deviceBitfield, selectorCopyEngine, true)); } compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpc_core/pvc/get_device_info_pvc.cpp000066400000000000000000000022611422164147700313630ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/device_info_fixture.h" using namespace NEO; HWTEST_EXCLUDE_PRODUCT(GetDeviceInfoMemCapabilitiesTest, GivenValidParametersWhenGetDeviceInfoIsCalledForXE_HP_COREThenClSuccessIsReturned, IGFX_XE_HPC_CORE); PVCTEST_F(GetDeviceInfoMemCapabilitiesTest, GivenValidParametersWhenGetDeviceInfoIsCalledForPVCThenClSuccessIsReturned) { std::vector params = { {CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL, CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL}, {CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL, (CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL)}, {CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL, (CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL)}, {CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL, (CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL)}, {CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL, 0}}; check(params); } compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpc_core/pvc/linux/000077500000000000000000000000001422164147700260345ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpc_core/pvc/linux/CMakeLists.txt000066400000000000000000000005371422164147700306010ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_xe_hp_core_pvc_linux ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_tests_pvc.cpp ) if(UNIX) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_xe_hp_core_pvc_linux}) add_subdirectories() endif() compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpc_core/pvc/linux/dll/000077500000000000000000000000001422164147700266075ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpc_core/pvc/linux/dll/CMakeLists.txt000066400000000000000000000005311422164147700313460ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_linux_dll_tests_xe_hpc_core_pvc ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/${BRANCH_DIR_SUFFIX}/device_id_tests_pvc.cpp ) target_sources(igdrcl_linux_dll_tests PRIVATE ${IGDRCL_SRCS_linux_dll_tests_xe_hpc_core_pvc}) compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpc_core/pvc/linux/dll/device_id_tests_pvc.cpp000066400000000000000000000007621422164147700333250ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/fixtures/linux/device_id_fixture.h" using namespace NEO; TEST_F(DeviceIdTests, GivenPvcSupportedDeviceIdThenConfigIsCorrect) { std::array expectedDescriptors = {{ {0x0BD0, &PVC_CONFIG::hwInfo, &PVC_CONFIG::setupHardwareInfo}, {0x0BD5, &PVC_CONFIG::hwInfo, &PVC_CONFIG::setupHardwareInfo}, }}; testImpl(expectedDescriptors); } compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpc_core/pvc/linux/hw_info_config_tests_pvc.cpp000066400000000000000000000046101422164147700336110ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/os_interface.h" #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/libult/linux/drm_mock.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/os_interface/linux/hw_info_config_linux_tests.h" using namespace NEO; struct HwInfoConfigTestLinuxPvc : HwInfoConfigTestLinux { void SetUp() override { HwInfoConfigTestLinux::SetUp(); drm = new DrmMock(*executionEnvironment->rootDeviceEnvironments[0]); osInterface->setDriverModel(std::unique_ptr(drm)); drm->storedDeviceID = 0x0BD0; } }; PVCTEST_F(HwInfoConfigTestLinuxPvc, WhenConfiguringHwInfoThenZeroIsReturned) { auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); } PVCTEST_F(HwInfoConfigTestLinuxPvc, given57bAddressSpaceWhenConfiguringHwInfoThenSetFtrFlag) { auto hwInfoConfig = HwInfoConfig::get(productFamily); outHwInfo.featureTable.flags.ftr57bGPUAddressing = false; outHwInfo.platform.eRenderCoreFamily = defaultHwInfo->platform.eRenderCoreFamily; outHwInfo.capabilityTable.gpuAddressSpace = maxNBitValue(48); int ret = hwInfoConfig->configureHardwareCustom(&outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_FALSE(outHwInfo.featureTable.flags.ftr57bGPUAddressing); outHwInfo.capabilityTable.gpuAddressSpace = maxNBitValue(57); ret = hwInfoConfig->configureHardwareCustom(&outHwInfo, osInterface); EXPECT_EQ(0, ret); auto value = outHwInfo.featureTable.flags.ftr57bGPUAddressing; EXPECT_EQ(1u, value); } PVCTEST_F(HwInfoConfigTestLinuxPvc, GivenPvcWhenConfigureHardwareCustomThenKmdNotifyIsEnabled) { HwInfoConfig *hwInfoConfig = HwInfoConfig::get(productFamily); OSInterface osIface; hwInfoConfig->configureHardwareCustom(&pInHwInfo, &osIface); EXPECT_TRUE(pInHwInfo.capabilityTable.kmdNotifyProperties.enableKmdNotify); EXPECT_EQ(150ll, pInHwInfo.capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds); EXPECT_TRUE(pInHwInfo.capabilityTable.kmdNotifyProperties.enableQuickKmdSleepForDirectSubmission); EXPECT_EQ(20ll, pInHwInfo.capabilityTable.kmdNotifyProperties.delayQuickKmdSleepForDirectSubmissionMicroseconds); } compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpc_core/pvc/sampler_tests_pvc.cpp000066400000000000000000000033711422164147700311420ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include using namespace NEO; typedef Test PvcSamplerTest; PVCTEST_F(PvcSamplerTest, givenPvcSamplerWhenUsingDefaultFilteringAndAppendSamplerStateParamsThenDisableLowQualityFilter) { EXPECT_FALSE(DebugManager.flags.ForceSamplerLowFilteringPrecision.get()); typedef typename FamilyType::SAMPLER_STATE SAMPLER_STATE; auto state = FamilyType::cmdInitSamplerState; EXPECT_EQ(SAMPLER_STATE::LOW_QUALITY_FILTER_DISABLE, state.getLowQualityFilter()); HwInfoConfig::get(defaultHwInfo->platform.eProductFamily)->adjustSamplerState(&state, *defaultHwInfo); EXPECT_EQ(SAMPLER_STATE::LOW_QUALITY_FILTER_DISABLE, state.getLowQualityFilter()); } PVCTEST_F(PvcSamplerTest, givenPvcSamplerWhenForcingLowQualityFilteringAndAppendSamplerStateParamsThenEnableLowQualityFilter) { DebugManagerStateRestore dbgRestore; DebugManager.flags.ForceSamplerLowFilteringPrecision.set(true); EXPECT_TRUE(DebugManager.flags.ForceSamplerLowFilteringPrecision.get()); typedef typename FamilyType::SAMPLER_STATE SAMPLER_STATE; auto state = FamilyType::cmdInitSamplerState; EXPECT_EQ(SAMPLER_STATE::LOW_QUALITY_FILTER_DISABLE, state.getLowQualityFilter()); HwInfoConfig::get(defaultHwInfo->platform.eProductFamily)->adjustSamplerState(&state, *defaultHwInfo); EXPECT_EQ(SAMPLER_STATE::LOW_QUALITY_FILTER_ENABLE, state.getLowQualityFilter()); } compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpc_core/pvc/test_device_caps_pvc.cpp000066400000000000000000000035201422164147700315550ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_helper.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "gtest/gtest.h" using namespace NEO; using PvcDeviceCapsTests = Test; PVCTEST_F(PvcDeviceCapsTests, givenPvcProductWhenCheckBlitterOperationsSupportThenReturnFalse) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.blitterOperationsSupported); } PVCTEST_F(PvcDeviceCapsTests, givenPvcProductWhenCheckingSldSupportThenReturnFalse) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.debuggerSupported); } PVCTEST_F(PvcDeviceCapsTests, givenPvcWhenAskingForCacheFlushAfterWalkerThenReturnFalse) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.supportCacheFlushAfterWalker); } PVCTEST_F(PvcDeviceCapsTests, givenPvcProductWhenCheckImagesSupportThenReturnFalse) { EXPECT_FALSE(PVC::hwInfo.capabilityTable.supportsImages); } PVCTEST_F(PvcDeviceCapsTests, givenPvcProductWhenDeviceCapsInitializedThenAddPvcExtensions) { const auto &dInfo = pClDevice->getDeviceInfo(); EXPECT_TRUE(hasSubstr(dInfo.deviceExtensions, std::string("cl_intel_create_buffer_with_properties"))); EXPECT_TRUE(hasSubstr(dInfo.deviceExtensions, std::string("cl_intel_dot_accumulate"))); EXPECT_TRUE(hasSubstr(dInfo.deviceExtensions, std::string("cl_intel_subgroup_local_block_io"))); EXPECT_TRUE(hasSubstr(dInfo.deviceExtensions, std::string("cl_intel_subgroup_matrix_multiply_accumulate_for_PVC"))); EXPECT_TRUE(hasSubstr(dInfo.deviceExtensions, std::string("cl_khr_subgroup_named_barrier"))); EXPECT_TRUE(hasSubstr(dInfo.deviceExtensions, std::string("cl_intel_subgroup_extended_block_read"))); } compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpc_core/pvc/test_hw_info_config_pvc.cpp000066400000000000000000000154021422164147700322700ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/compiler_hw_info_config.h" #include "shared/source/helpers/constants.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "device_ids_configs_pvc.h" #include "gtest/gtest.h" using namespace NEO; using PvcHwInfoConfig = ::testing::Test; PVCTEST_F(PvcHwInfoConfig, givenErrorneousConfigStringThenThrow) { HardwareInfo hwInfo = *defaultHwInfo; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; uint64_t config = 0xdeadbeef; gtSystemInfo = {0}; EXPECT_ANY_THROW(hardwareInfoSetup[productFamily](&hwInfo, false, config)); EXPECT_EQ(0u, gtSystemInfo.SliceCount); EXPECT_EQ(0u, gtSystemInfo.SubSliceCount); EXPECT_EQ(0u, gtSystemInfo.DualSubSliceCount); EXPECT_EQ(0u, gtSystemInfo.EUCount); EXPECT_EQ(0u, gtSystemInfo.ThreadCount); } PVCTEST_F(PvcHwInfoConfig, givenPvcWhenCallingGetDeviceMemoryNameThenHbmIsReturned) { auto hwInfoConfig = HwInfoConfig::get(defaultHwInfo->platform.eProductFamily); auto deviceMemoryName = hwInfoConfig->getDeviceMemoryName(); EXPECT_TRUE(hasSubstr(deviceMemoryName, std::string("HBM"))); } PVCTEST_F(PvcHwInfoConfig, givenHwInfoConfigWhenAdditionalKernelExecInfoSupportCheckedThenCorrectValueIsReturned) { const auto &hwInfoConfig = *HwInfoConfig::get(productFamily); auto hwInfo = *defaultHwInfo; EXPECT_FALSE(hwInfoConfig.isDisableOverdispatchAvailable(hwInfo)); hwInfo.platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_B, hwInfo); EXPECT_TRUE(hwInfoConfig.isDisableOverdispatchAvailable(hwInfo)); } PVCTEST_F(PvcHwInfoConfig, givenHwInfoConfigWhenAskedIfPipeControlPriorToNonPipelinedStateCommandsWARequiredThenTrueIsReturned) { const auto &hwInfoConfig = *HwInfoConfig::get(productFamily); auto hwInfo = *defaultHwInfo; auto isRcs = false; const auto &[isBasicWARequired, isExtendedWARequired] = hwInfoConfig.isPipeControlPriorToNonPipelinedStateCommandsWARequired(hwInfo, isRcs); EXPECT_TRUE(isBasicWARequired); EXPECT_TRUE(isExtendedWARequired); } PVCTEST_F(PvcHwInfoConfig, givenPvcHwInfoConfigWhenCheckDirectSubmissionSupportedThenTrueIsReturned) { const auto &hwInfoConfig = *HwInfoConfig::get(productFamily); auto hwInfo = *defaultHwInfo; EXPECT_TRUE(hwInfoConfig.isDirectSubmissionSupported(hwInfo)); } PVCTEST_F(PvcHwInfoConfig, givenHwInfoConfigAndProgramExtendedPipeControlPriorToNonPipelinedStateCommandDisabledWhenAskedIfPipeControlPriorToNonPipelinedStateCommandsWARequiredThenFalseIsReturned) { DebugManagerStateRestore restore; DebugManager.flags.ProgramExtendedPipeControlPriorToNonPipelinedStateCommand.set(0); const auto &hwInfoConfig = *HwInfoConfig::get(productFamily); auto hwInfo = *defaultHwInfo; auto isRcs = false; const auto &[isBasicWARequired, isExtendedWARequired] = hwInfoConfig.isPipeControlPriorToNonPipelinedStateCommandsWARequired(hwInfo, isRcs); EXPECT_FALSE(isExtendedWARequired); EXPECT_TRUE(isBasicWARequired); } using CompilerHwInfoConfigHelperTestsPvc = ::testing::Test; PVCTEST_F(CompilerHwInfoConfigHelperTestsPvc, givenPvcWhenIsForceToStatelessRequiredIsCalledThenReturnsTrue) { EXPECT_TRUE(CompilerHwInfoConfig::get(productFamily)->isForceToStatelessRequired()); } using PvcHwInfo = ::testing::Test; PVCTEST_F(PvcHwInfo, givenPvcWhenConfiguringThenDisableCccs) { auto hwInfoConfig = HwInfoConfig::get(productFamily); HardwareInfo hwInfo = *defaultHwInfo; hwInfoConfig->configureHardwareCustom(&hwInfo, nullptr); EXPECT_FALSE(hwInfo.featureTable.flags.ftrRcsNode); } PVCTEST_F(PvcHwInfo, givenDebugVariableSetWhenConfiguringThenEnableCccs) { DebugManagerStateRestore restore; DebugManager.flags.NodeOrdinal.set(static_cast(aub_stream::EngineType::ENGINE_CCCS)); auto hwInfoConfig = HwInfoConfig::get(productFamily); HardwareInfo hwInfo = *defaultHwInfo; hwInfoConfig->configureHardwareCustom(&hwInfo, nullptr); EXPECT_TRUE(hwInfo.featureTable.flags.ftrRcsNode); } PVCTEST_F(PvcHwInfo, givenDeviceIdThenProperMaxThreadsForWorkgroupIsReturned) { HardwareInfo hwInfo = *defaultHwInfo; auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily); for (auto &deviceId : PVC_XL_IDS) { hwInfo.platform.usDeviceID = deviceId; EXPECT_EQ(64u, hwInfoConfig.getMaxThreadsForWorkgroupInDSSOrSS(hwInfo, 64u, 64u)); } for (auto &deviceId : PVC_XT_IDS) { hwInfo.platform.usDeviceID = deviceId; uint32_t numThreadsPerEU = hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount; EXPECT_EQ(64u * numThreadsPerEU, hwInfoConfig.getMaxThreadsForWorkgroupInDSSOrSS(hwInfo, 64u, 64u)); } } PVCTEST_F(PvcHwInfo, givenVariousValuesWhenConvertingHwRevIdAndSteppingThenConversionIsCorrect) { auto hwInfo = *defaultHwInfo; const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily); std::vector deviceIds = PVC_XL_IDS; deviceIds.insert(deviceIds.end(), PVC_XT_IDS.begin(), PVC_XT_IDS.end()); for (uint32_t testValue = 0; testValue < 0xFF; testValue++) { for (auto deviceId : deviceIds) { hwInfo.platform.usDeviceID = deviceId; auto hwRevIdFromStepping = hwInfoConfig.getHwRevIdFromStepping(testValue, hwInfo); if (hwRevIdFromStepping != CommonConstants::invalidStepping) { hwInfo.platform.usRevId = hwRevIdFromStepping; EXPECT_EQ(testValue, hwInfoConfig.getSteppingFromHwRevId(hwInfo)); } } hwInfo.platform.usRevId = testValue; auto steppingFromHwRevId = hwInfoConfig.getSteppingFromHwRevId(hwInfo); if (steppingFromHwRevId != CommonConstants::invalidStepping) { bool anyMatchAfterConversionFromStepping = false; for (auto deviceId : deviceIds) { hwInfo.platform.usDeviceID = deviceId; auto hwRevId = hwInfoConfig.getHwRevIdFromStepping(steppingFromHwRevId, hwInfo); EXPECT_NE(CommonConstants::invalidStepping, hwRevId); // expect values to match. 0x1 and 0x0 translate to the same stepping so they are interpreted as a match too. if (((testValue & PVC::pvcSteppingBits) == (hwRevId & PVC::pvcSteppingBits)) || (((testValue & PVC::pvcSteppingBits) == 0x1) && ((hwRevId & PVC::pvcSteppingBits) == 0x0))) { anyMatchAfterConversionFromStepping = true; } } EXPECT_TRUE(anyMatchAfterConversionFromStepping); } } } compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpc_core/test_cmds_programming_xe_hpc_core.cpp000066400000000000000000000157601422164147700335470ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/client_context/gmm_client_context.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/state_base_address.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" using namespace NEO; using CmdsProgrammingTestsXeHpcCore = UltCommandStreamReceiverTest; XE_HPC_CORETEST_F(CmdsProgrammingTestsXeHpcCore, givenL3ToL1DebugFlagWhenStatelessMocsIsProgrammedThenItHasL1CachingOn) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; DebugManagerStateRestore restore; DebugManager.flags.ForceL1Caching.set(1u); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); flushTask(commandStreamReceiver); HardwareParse hwParserCsr; hwParserCsr.parseCommands(commandStreamReceiver.commandStream, 0); hwParserCsr.findHardwareCommands(); ASSERT_NE(nullptr, hwParserCsr.cmdStateBaseAddress); auto stateBaseAddress = static_cast(hwParserCsr.cmdStateBaseAddress); auto actualL1CachePolocy = static_cast(stateBaseAddress->getL1CachePolicyL1CacheControl()); const uint8_t expectedL1CachePolicy = 0; EXPECT_EQ(expectedL1CachePolicy, actualL1CachePolocy); } XE_HPC_CORETEST_F(CmdsProgrammingTestsXeHpcCore, givenSpecificProductFamilyWhenAppendingSbaThenProgramWtL1CachePolicy) { auto memoryManager = pDevice->getExecutionEnvironment()->memoryManager.get(); AllocationProperties properties(pDevice->getRootDeviceIndex(), 1, AllocationType::BUFFER, pDevice->getDeviceBitfield()); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties); IndirectHeap indirectHeap(allocation, 1); DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags(); auto sbaCmd = FamilyType::cmdInitStateBaseAddress; StateBaseAddressHelper::appendStateBaseAddressParameters(&sbaCmd, &indirectHeap, true, 0, pDevice->getRootDeviceEnvironment().getGmmHelper(), false, MemoryCompressionState::NotApplicable, true, false, 1u); EXPECT_EQ(FamilyType::STATE_BASE_ADDRESS::L1_CACHE_POLICY_WBP, sbaCmd.getL1CachePolicyL1CacheControl()); memoryManager->freeGraphicsMemory(allocation); } XE_HPC_CORETEST_F(CmdsProgrammingTestsXeHpcCore, givenL1CachingOverrideWhenStateBaseAddressIsProgrammedThenItMatchesTheOverrideValue) { DebugManagerStateRestore restorer; DebugManager.flags.ForceStatelessL1CachingPolicy.set(0u); auto memoryManager = pDevice->getExecutionEnvironment()->memoryManager.get(); AllocationProperties properties(pDevice->getRootDeviceIndex(), 1, AllocationType::BUFFER, pDevice->getDeviceBitfield()); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties); IndirectHeap indirectHeap(allocation, 1); DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags(); auto sbaCmd = FamilyType::cmdInitStateBaseAddress; StateBaseAddressHelper::appendStateBaseAddressParameters(&sbaCmd, &indirectHeap, true, 0, pDevice->getRootDeviceEnvironment().getGmmHelper(), false, MemoryCompressionState::NotApplicable, true, false, 1u); EXPECT_EQ(0u, sbaCmd.getL1CachePolicyL1CacheControl()); DebugManager.flags.ForceStatelessL1CachingPolicy.set(1u); StateBaseAddressHelper::appendStateBaseAddressParameters(&sbaCmd, &indirectHeap, true, 0, pDevice->getRootDeviceEnvironment().getGmmHelper(), false, MemoryCompressionState::NotApplicable, true, false, 1u); EXPECT_EQ(1u, sbaCmd.getL1CachePolicyL1CacheControl()); memoryManager->freeGraphicsMemory(allocation); } XE_HPC_CORETEST_F(CmdsProgrammingTestsXeHpcCore, whenAppendingRssThenProgramWtL1CachePolicy) { auto memoryManager = pDevice->getExecutionEnvironment()->memoryManager.get(); size_t allocationSize = MemoryConstants::pageSize; AllocationProperties properties(pDevice->getRootDeviceIndex(), allocationSize, AllocationType::BUFFER, pDevice->getDeviceBitfield()); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties); auto rssCmd = FamilyType::cmdInitRenderSurfaceState; MockContext context(pClDevice); auto multiGraphicsAllocation = MultiGraphicsAllocation(pClDevice->getRootDeviceIndex()); multiGraphicsAllocation.addAllocation(allocation); std::unique_ptr> buffer(static_cast *>( BufferHw::create(&context, {}, 0, 0, allocationSize, nullptr, nullptr, multiGraphicsAllocation, false, false, false))); NEO::EncodeSurfaceStateArgs args; args.outMemory = &rssCmd; args.graphicsAddress = allocation->getGpuAddress(); args.size = allocation->getUnderlyingBufferSize(); args.mocs = buffer->getMocsValue(false, false, pClDevice->getRootDeviceIndex()); args.numAvailableDevices = pClDevice->getNumGenericSubDevices(); args.allocation = allocation; args.gmmHelper = pClDevice->getGmmHelper(); args.areMultipleSubDevicesInContext = true; EncodeSurfaceState::encodeBuffer(args); EXPECT_EQ(FamilyType::RENDER_SURFACE_STATE::L1_CACHE_POLICY_WBP, rssCmd.getL1CachePolicyL1CacheControl()); } XE_HPC_CORETEST_F(CmdsProgrammingTestsXeHpcCore, givenAlignedCacheableReadOnlyBufferThenChoseOclBufferConstPolicy) { MockContext context; const auto size = MemoryConstants::pageSize; const auto ptr = (void *)alignedMalloc(size * 2, MemoryConstants::pageSize); const auto flags = CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY; auto retVal = CL_SUCCESS; auto buffer = std::unique_ptr(Buffer::create( &context, flags, size, ptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, false); const auto expectedMocs = context.getDevice(0)->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST); const auto actualMocs = surfaceState.getMemoryObjectControlState(); EXPECT_EQ(expectedMocs, actualMocs); auto actualL1CachePolocy = static_cast(surfaceState.getL1CachePolicyL1CacheControl()); const uint8_t expectedL1CachePolicy = 0; EXPECT_EQ(expectedL1CachePolicy, actualL1CachePolocy); alignedFree(ptr); } compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpc_core/test_device_caps_xe_hpc_core.cpp000066400000000000000000000056411422164147700324610ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_helper.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/device_info_fixture.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" using namespace NEO; typedef Test XeHpcCoreDeviceCaps; XE_HPC_CORETEST_F(XeHpcCoreDeviceCaps, givenXeHpcCoreWhenCheckFtrSupportsInteger64BitAtomicsThenReturnTrue) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.ftrSupportsInteger64BitAtomics); } XE_HPC_CORETEST_F(XeHpcCoreDeviceCaps, givenXeHpcCoreWhenCheckingImageSupportThenReturnFalse) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.supportsImages); } XE_HPC_CORETEST_F(XeHpcCoreDeviceCaps, givenXeHpcCoreWhenCheckingMediaBlockSupportThenReturnFalse) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.supportsMediaBlock); } XE_HPC_CORETEST_F(XeHpcCoreDeviceCaps, givenXeHpcCoreWhenCheckingCoherencySupportThenReturnFalse) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftrSupportsCoherency); } XE_HPC_CORETEST_F(XeHpcCoreDeviceCaps, givenHwInfoWhenSlmSizeIsRequiredThenReturnCorrectValue) { EXPECT_EQ(128u, pDevice->getHardwareInfo().capabilityTable.slmSize); } XE_HPC_CORETEST_F(XeHpcCoreDeviceCaps, givenXeHpcCoreWhenCheckExtensionsThenDeviceDoesNotReportClKhrSubgroupsExtension) { const auto &caps = pClDevice->getDeviceInfo(); EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_khr_subgroups"))); EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_subgroup_matrix_multiply_accumulate"))); EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_subgroup_split_matrix_multiply_accumulate"))); } XE_HPC_CORETEST_F(XeHpcCoreDeviceCaps, givenXeHpcCoreWhenCheckingCapsThenDeviceDoesNotSupportIndependentForwardProgress) { const auto &caps = pClDevice->getDeviceInfo(); EXPECT_TRUE(caps.independentForwardProgress); } XE_HPC_CORETEST_F(XeHpcCoreDeviceCaps, givenDeviceWhenAskingForSubGroupSizesThenReturnCorrectValues) { auto &hwHelper = HwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily); auto deviceSubgroups = hwHelper.getDeviceSubGroupSizes(); EXPECT_EQ(2u, deviceSubgroups.size()); EXPECT_EQ(16u, deviceSubgroups[0]); EXPECT_EQ(32u, deviceSubgroups[1]); } using QueueFamilyNameTestXeHpcCore = QueueFamilyNameTest; XE_HPC_CORETEST_F(QueueFamilyNameTestXeHpcCore, givenCccsWhenGettingQueueFamilyNameThenReturnProperValue) { verify(EngineGroupType::RenderCompute, "cccs"); } XE_HPC_CORETEST_F(QueueFamilyNameTestXeHpcCore, givenLinkedBcsWhenGettingQueueFamilyNameThenReturnProperValue) { verify(EngineGroupType::LinkedCopy, "linked bcs"); } compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpc_core/test_platform_caps_xe_hpc_core.cpp000066400000000000000000000012331422164147700330370ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" using namespace NEO; struct XeHpcCorePlatformCaps : public PlatformFixture, public ::testing::Test { void SetUp() override { PlatformFixture::SetUp(); } void TearDown() override { PlatformFixture::TearDown(); } }; XE_HPC_CORETEST_F(XeHpcCorePlatformCaps, givenXeHpcSkusThenItSupportFP64) { const auto &caps = pPlatform->getPlatformInfo(); EXPECT_NE(std::string::npos, caps.extensions.find(std::string("cl_khr_fp64"))); }compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpg_core/000077500000000000000000000000001422164147700241115ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpg_core/CMakeLists.txt000066400000000000000000000021751422164147700266560ustar00rootroot00000000000000# # Copyright (C) 2021-2022 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_XE_HPG_CORE) set(IGDRCL_SRCS_tests_xe_hpg_core_excludes ${CMAKE_CURRENT_SOURCE_DIR}/excludes_ocl_xe_hpg_core.cpp ) set_property(GLOBAL APPEND PROPERTY IGDRCL_SRCS_tests_excludes ${IGDRCL_SRCS_tests_xe_hpg_core_excludes}) set(IGDRCL_SRCS_tests_xe_hpg_core ${IGDRCL_SRCS_tests_xe_hpg_core_excludes} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cl_hw_helper_tests_xe_hpg_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_tests_xe_hpg_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_cmds_programming_xe_hpg_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/copy_engine_tests_xe_hpg_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_xe_hpg_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_tests_xe_hpg_core.cpp ) get_property(NEO_CORE_TESTS_XE_HPG_CORE GLOBAL PROPERTY NEO_CORE_TESTS_XE_HPG_CORE) list(APPEND IGDRCL_SRCS_tests_xe_hpg_core ${NEO_CORE_TESTS_XE_HPG_CORE}) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_xe_hpg_core}) add_subdirectories() endif() compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpg_core/cl_hw_helper_tests_xe_hpg_core.cpp000066400000000000000000000114031422164147700330330ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/program/kernel_info.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/test/unit_test/mocks/mock_cl_hw_helper.h" #include "opencl/test/unit_test/mocks/mock_context.h" using ClHwHelperTestsXeHpgCore = ::testing::Test; using namespace NEO; XE_HPG_CORETEST_F(ClHwHelperTestsXeHpgCore, WhenGettingDeviceIpVersionThenMakeCorrectDeviceIpVersion) { if (defaultHwInfo->capabilityTable.isIntegratedDevice) { EXPECT_EQ(ClHwHelperMock::makeDeviceIpVersion(12, 7, 0), ClHwHelper::get(renderCoreFamily).getDeviceIpVersion(*defaultHwInfo)); } else { EXPECT_EQ(ClHwHelperMock::makeDeviceIpVersion(12, 7, 1), ClHwHelper::get(renderCoreFamily).getDeviceIpVersion(*defaultHwInfo)); } } XE_HPG_CORETEST_F(ClHwHelperTestsXeHpgCore, givenGenHelperWhenKernelArgumentIsNotPureStatefulThenRequireNonAuxMode) { auto &clHwHelper = ClHwHelper::get(renderCoreFamily); for (auto isPureStateful : ::testing::Bool()) { ArgDescPointer argAsPtr{}; argAsPtr.accessedUsingStatelessAddressingMode = !isPureStateful; EXPECT_EQ(!argAsPtr.isPureStateful(), clHwHelper.requiresNonAuxMode(argAsPtr, *defaultHwInfo)); } } XE_HPG_CORETEST_F(ClHwHelperTestsXeHpgCore, givenGenHelperWhenEnableStatelessCompressionThenDontRequireNonAuxMode) { DebugManagerStateRestore restore; DebugManager.flags.EnableStatelessCompression.set(1); auto &clHwHelper = ClHwHelper::get(renderCoreFamily); for (auto isPureStateful : ::testing::Bool()) { ArgDescPointer argAsPtr{}; argAsPtr.accessedUsingStatelessAddressingMode = !isPureStateful; EXPECT_FALSE(clHwHelper.requiresNonAuxMode(argAsPtr, *defaultHwInfo)); } } XE_HPG_CORETEST_F(ClHwHelperTestsXeHpgCore, givenGenHelperWhenCheckAuxTranslationThenAuxResolvesIsRequired) { auto &clHwHelper = ClHwHelper::get(renderCoreFamily); for (auto isPureStateful : ::testing::Bool()) { KernelInfo kernelInfo{}; kernelInfo.kernelDescriptor.payloadMappings.explicitArgs.resize(1); kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[0].as(true).accessedUsingStatelessAddressingMode = !isPureStateful; EXPECT_EQ(!isPureStateful, clHwHelper.requiresAuxResolves(kernelInfo, *defaultHwInfo)); } } XE_HPG_CORETEST_F(ClHwHelperTestsXeHpgCore, givenGenHelperWhenEnableStatelessCompressionThenAuxTranslationIsNotRequired) { DebugManagerStateRestore restore; DebugManager.flags.EnableStatelessCompression.set(1); auto &clHwHelper = ClHwHelper::get(renderCoreFamily); KernelInfo kernelInfo{}; EXPECT_FALSE(clHwHelper.requiresAuxResolves(kernelInfo, *defaultHwInfo)); } XE_HPG_CORETEST_F(ClHwHelperTestsXeHpgCore, givenDifferentCLImageFormatsWhenCallingAllowImageCompressionThenCorrectValueReturned) { struct ImageFormatCompression { cl_image_format imageFormat; bool isCompressable; }; const std::vector imageFormats = { {{CL_LUMINANCE, CL_UNORM_INT8}, false}, {{CL_LUMINANCE, CL_UNORM_INT16}, false}, {{CL_LUMINANCE, CL_HALF_FLOAT}, false}, {{CL_LUMINANCE, CL_FLOAT}, false}, {{CL_INTENSITY, CL_UNORM_INT8}, false}, {{CL_INTENSITY, CL_UNORM_INT16}, false}, {{CL_INTENSITY, CL_HALF_FLOAT}, false}, {{CL_INTENSITY, CL_FLOAT}, false}, {{CL_A, CL_UNORM_INT16}, false}, {{CL_A, CL_HALF_FLOAT}, false}, {{CL_A, CL_FLOAT}, false}, {{CL_R, CL_UNSIGNED_INT8}, true}, {{CL_R, CL_UNSIGNED_INT16}, true}, {{CL_R, CL_UNSIGNED_INT32}, true}, {{CL_RG, CL_UNSIGNED_INT32}, true}, {{CL_RGBA, CL_UNSIGNED_INT32}, true}, {{CL_RGBA, CL_UNORM_INT8}, true}, {{CL_RGBA, CL_UNORM_INT16}, true}, {{CL_RGBA, CL_SIGNED_INT8}, true}, {{CL_RGBA, CL_SIGNED_INT16}, true}, {{CL_RGBA, CL_SIGNED_INT32}, true}, {{CL_RGBA, CL_UNSIGNED_INT8}, true}, {{CL_RGBA, CL_UNSIGNED_INT16}, true}, {{CL_RGBA, CL_UNSIGNED_INT32}, true}, {{CL_RGBA, CL_HALF_FLOAT}, true}, {{CL_RGBA, CL_FLOAT}, true}, {{CL_BGRA, CL_UNORM_INT8}, true}, {{CL_R, CL_FLOAT}, true}, {{CL_R, CL_UNORM_INT8}, true}, {{CL_R, CL_UNORM_INT16}, true}, }; MockContext context; auto &clHwHelper = ClHwHelper::get(context.getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily); for (const auto &format : imageFormats) { bool result = clHwHelper.allowImageCompression(format.imageFormat); EXPECT_EQ(format.isCompressable, result); } } command_stream_receiver_hw_tests_xe_hpg_core.cpp000066400000000000000000000035271422164147700357040ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpg_core/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h" using namespace NEO; class CommandStreamReceiverHwTestXeHpgCore : public ClDeviceFixture, public ::testing::Test { public: void SetUp() override { DebugManager.flags.EnableLocalMemory.set(1); ClDeviceFixture::SetUp(); } void TearDown() override { ClDeviceFixture::TearDown(); } private: DebugManagerStateRestore restorer; }; XE_HPG_CORETEST_F(CommandStreamReceiverHwTestXeHpgCore, givenEnableStatelessCompressionWhenCallingGetMemoryCompressionStateThenReturnCorrectValue) { DebugManagerStateRestore restore; CommandStreamReceiverHw commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); DebugManager.flags.EnableStatelessCompression.set(0); for (bool auxTranslationRequired : {false, true}) { auto memoryCompressionState = commandStreamReceiver.getMemoryCompressionState(auxTranslationRequired, pDevice->getHardwareInfo()); EXPECT_EQ(MemoryCompressionState::NotApplicable, memoryCompressionState); } DebugManager.flags.EnableStatelessCompression.set(1); for (bool auxTranslationRequired : {false, true}) { auto memoryCompressionState = commandStreamReceiver.getMemoryCompressionState(auxTranslationRequired, pDevice->getHardwareInfo()); if (auxTranslationRequired) { EXPECT_EQ(MemoryCompressionState::Disabled, memoryCompressionState); } else { EXPECT_EQ(MemoryCompressionState::Enabled, memoryCompressionState); } } } compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpg_core/copy_engine_tests_xe_hpg_core.cpp000066400000000000000000000513271422164147700327100ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/client_context/gmm_client_context.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_context.h" using namespace NEO; struct BlitXeHpgCoreTests : public ::testing::Test { void SetUp() override { if (is32bit) { GTEST_SKIP(); } DebugManager.flags.RenderCompressedBuffersEnabled.set(true); DebugManager.flags.EnableLocalMemory.set(true); HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.blitterOperationsSupported = true; clDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); } uint32_t flushBcsTask(CommandStreamReceiver *csr, const BlitProperties &blitProperties, bool blocking, Device &device) { BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties); return csr->flushBcsTask(blitPropertiesContainer, blocking, false, device); } std::unique_ptr clDevice; TimestampPacketContainer timestampPacketContainer; CsrDependencies csrDependencies; DebugManagerStateRestore debugRestorer; }; XE_HPG_CORETEST_F(BlitXeHpgCoreTests, givenBufferWhenProgrammingBltCommandThenSetMocs) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; auto &bcsEngine = clDevice->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular); auto csr = static_cast *>(bcsEngine.commandStreamReceiver); MockContext context(clDevice.get()); MockGraphicsAllocation clearColorAlloc; cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(&context, CL_MEM_READ_WRITE, 1, nullptr, retVal)); auto blitProperties = BlitProperties::constructPropertiesForCopy(buffer->getGraphicsAllocation(clDevice->getRootDeviceIndex()), buffer->getGraphicsAllocation(clDevice->getRootDeviceIndex()), 0, 0, {1, 1, 1}, 0, 0, 0, 0, &clearColorAlloc); flushBcsTask(csr, blitProperties, true, clDevice->getDevice()); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream); auto bltCmd = genCmdCast(*(hwParser.cmdList.begin())); EXPECT_NE(nullptr, bltCmd); auto mocs = clDevice->getRootDeviceEnvironment().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED); EXPECT_EQ(mocs, bltCmd->getDestinationMOCS()); EXPECT_EQ(mocs, bltCmd->getSourceMOCS()); } XE_HPG_CORETEST_F(BlitXeHpgCoreTests, givenBufferWhenProgrammingBltCommandThenSetMocsToValueOfDebugKey) { DebugManagerStateRestore restorer; DebugManager.flags.OverrideBlitterMocs.set(0u); using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; auto &bcsEngine = clDevice->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular); auto csr = static_cast *>(bcsEngine.commandStreamReceiver); MockContext context(clDevice.get()); MockGraphicsAllocation clearColorAlloc; cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(&context, CL_MEM_READ_WRITE, 1, nullptr, retVal)); auto blitProperties = BlitProperties::constructPropertiesForCopy(buffer->getGraphicsAllocation(clDevice->getRootDeviceIndex()), buffer->getGraphicsAllocation(clDevice->getRootDeviceIndex()), 0, 0, {1, 1, 1}, 0, 0, 0, 0, &clearColorAlloc); flushBcsTask(csr, blitProperties, true, clDevice->getDevice()); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream); auto bltCmd = genCmdCast(*(hwParser.cmdList.begin())); EXPECT_NE(nullptr, bltCmd); EXPECT_EQ(0u, bltCmd->getDestinationMOCS()); EXPECT_EQ(0u, bltCmd->getSourceMOCS()); } XE_HPG_CORETEST_F(BlitXeHpgCoreTests, given2dBlitCommandWhenDispatchingThenSetValidSurfaceType) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; auto &bcsEngine = clDevice->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular); auto csr = static_cast *>(bcsEngine.commandStreamReceiver); MockContext context(clDevice.get()); cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(&context, CL_MEM_READ_WRITE, 1, nullptr, retVal)); auto allocation = buffer->getGraphicsAllocation(clDevice->getRootDeviceIndex()); MockGraphicsAllocation clearColorAlloc; size_t offset = 0; { // 1D auto blitProperties = BlitProperties::constructPropertiesForCopy(allocation, allocation, 0, 0, {BlitterConstants::maxBlitWidth - 1, 1, 1}, 0, 0, 0, 0, &clearColorAlloc); flushBcsTask(csr, blitProperties, false, clDevice->getDevice()); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream); auto bltCmd = genCmdCast(*(hwParser.cmdList.begin())); EXPECT_NE(nullptr, bltCmd); EXPECT_EQ(XY_COPY_BLT::SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_1D, bltCmd->getDestinationSurfaceType()); EXPECT_EQ(XY_COPY_BLT::SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_1D, bltCmd->getSourceSurfaceType()); EXPECT_EQ(bltCmd->getSourceSurfaceWidth(), bltCmd->getDestinationX2CoordinateRight()); EXPECT_EQ(bltCmd->getSourceSurfaceHeight(), bltCmd->getDestinationY2CoordinateBottom()); EXPECT_EQ(bltCmd->getDestinationSurfaceWidth(), bltCmd->getDestinationX2CoordinateRight()); EXPECT_EQ(bltCmd->getDestinationSurfaceHeight(), bltCmd->getDestinationY2CoordinateBottom()); offset = csr->commandStream.getUsed(); } { // 2D auto blitProperties = BlitProperties::constructPropertiesForCopy(allocation, allocation, 0, 0, {(2 * BlitterConstants::maxBlitWidth) + 1, 1, 1}, 0, 0, 0, 0, &clearColorAlloc); flushBcsTask(csr, blitProperties, false, clDevice->getDevice()); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream, offset); auto bltCmd = genCmdCast(*(hwParser.cmdList.begin())); EXPECT_NE(nullptr, bltCmd); EXPECT_EQ(XY_COPY_BLT::SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_2D, bltCmd->getDestinationSurfaceType()); EXPECT_EQ(XY_COPY_BLT::SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_2D, bltCmd->getSourceSurfaceType()); EXPECT_EQ(bltCmd->getSourceSurfaceWidth(), bltCmd->getDestinationX2CoordinateRight()); EXPECT_EQ(bltCmd->getSourceSurfaceHeight(), bltCmd->getDestinationY2CoordinateBottom()); EXPECT_EQ(bltCmd->getDestinationSurfaceWidth(), bltCmd->getDestinationX2CoordinateRight()); EXPECT_EQ(bltCmd->getDestinationSurfaceHeight(), bltCmd->getDestinationY2CoordinateBottom()); } } XE_HPG_CORETEST_F(BlitXeHpgCoreTests, givenBufferWhenProgrammingBltCommandThenSetTargetMemory) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; auto csr = static_cast *>(clDevice->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular).commandStreamReceiver); MockContext context(clDevice.get()); MockGraphicsAllocation clearColorAlloc; cl_int retVal = CL_SUCCESS; auto bufferInSystemPool = clUniquePtr(Buffer::create(&context, CL_MEM_FORCE_HOST_MEMORY_INTEL, 2048, nullptr, retVal)); EXPECT_TRUE(MemoryPool::isSystemMemoryPool(bufferInSystemPool->getGraphicsAllocation(clDevice->getRootDeviceIndex())->getMemoryPool())); auto bufferInLocalPool = clUniquePtr(Buffer::create(&context, CL_MEM_READ_WRITE, 2048, nullptr, retVal)); EXPECT_FALSE(MemoryPool::isSystemMemoryPool(bufferInLocalPool->getGraphicsAllocation(clDevice->getRootDeviceIndex())->getMemoryPool())); { auto blitProperties = BlitProperties::constructPropertiesForCopy(bufferInSystemPool->getGraphicsAllocation(clDevice->getRootDeviceIndex()), bufferInLocalPool->getGraphicsAllocation(clDevice->getRootDeviceIndex()), 0, 0, {2048, 1, 1}, 0, 0, 0, 0, &clearColorAlloc); flushBcsTask(csr, blitProperties, true, clDevice->getDevice()); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream); auto bltCmd = genCmdCast(*(hwParser.cmdList.begin())); EXPECT_NE(nullptr, bltCmd); EXPECT_EQ(bltCmd->getSourceTargetMemory(), XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_LOCAL_MEM); EXPECT_EQ(bltCmd->getDestinationTargetMemory(), XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_SYSTEM_MEM); } { auto offset = csr->commandStream.getUsed(); auto blitProperties = BlitProperties::constructPropertiesForCopy(bufferInLocalPool->getGraphicsAllocation(clDevice->getRootDeviceIndex()), bufferInSystemPool->getGraphicsAllocation(clDevice->getRootDeviceIndex()), 0, 0, {2048, 1, 1}, 0, 0, 0, 0, &clearColorAlloc); flushBcsTask(csr, blitProperties, true, clDevice->getDevice()); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream, offset); auto bltCmd = genCmdCast(*(hwParser.cmdList.begin())); EXPECT_NE(nullptr, bltCmd); EXPECT_EQ(bltCmd->getDestinationTargetMemory(), XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_LOCAL_MEM); EXPECT_EQ(bltCmd->getSourceTargetMemory(), XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_SYSTEM_MEM); } } XE_HPG_CORETEST_F(BlitXeHpgCoreTests, givenBufferWhenProgrammingBltCommandThenSetTargetMemoryInCpuAccesingLocalMemoryMode) { DebugManagerStateRestore restorer; DebugManager.flags.ForceLocalMemoryAccessMode.set(1); using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; auto csr = static_cast *>(clDevice->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular).commandStreamReceiver); MockContext context(clDevice.get()); MockGraphicsAllocation clearColorAlloc; cl_int retVal = CL_SUCCESS; auto bufferInSystemPool = clUniquePtr(Buffer::create(&context, CL_MEM_FORCE_HOST_MEMORY_INTEL, 2048, nullptr, retVal)); EXPECT_TRUE(MemoryPool::isSystemMemoryPool(bufferInSystemPool->getGraphicsAllocation(clDevice->getRootDeviceIndex())->getMemoryPool())); auto bufferInLocalPool = clUniquePtr(Buffer::create(&context, CL_MEM_READ_WRITE, 2048, nullptr, retVal)); EXPECT_FALSE(MemoryPool::isSystemMemoryPool(bufferInLocalPool->getGraphicsAllocation(clDevice->getRootDeviceIndex())->getMemoryPool())); { auto blitProperties = BlitProperties::constructPropertiesForCopy(bufferInSystemPool->getGraphicsAllocation(clDevice->getRootDeviceIndex()), bufferInLocalPool->getGraphicsAllocation(clDevice->getRootDeviceIndex()), 0, 0, {2048, 1, 1}, 0, 0, 0, 0, &clearColorAlloc); flushBcsTask(csr, blitProperties, true, clDevice->getDevice()); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream); auto bltCmd = genCmdCast(*(hwParser.cmdList.begin())); EXPECT_NE(nullptr, bltCmd); EXPECT_EQ(bltCmd->getSourceTargetMemory(), XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_LOCAL_MEM); EXPECT_EQ(bltCmd->getDestinationTargetMemory(), XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_SYSTEM_MEM); } { auto offset = csr->commandStream.getUsed(); auto blitProperties = BlitProperties::constructPropertiesForCopy(bufferInLocalPool->getGraphicsAllocation(clDevice->getRootDeviceIndex()), bufferInSystemPool->getGraphicsAllocation(clDevice->getRootDeviceIndex()), 0, 0, {2048, 1, 1}, 0, 0, 0, 0, &clearColorAlloc); flushBcsTask(csr, blitProperties, true, clDevice->getDevice()); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream, offset); auto bltCmd = genCmdCast(*(hwParser.cmdList.begin())); EXPECT_NE(nullptr, bltCmd); EXPECT_EQ(bltCmd->getDestinationTargetMemory(), XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_LOCAL_MEM); EXPECT_EQ(bltCmd->getSourceTargetMemory(), XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_SYSTEM_MEM); } } XE_HPG_CORETEST_F(BlitXeHpgCoreTests, givenBufferWhenProgrammingBltCommandThenSetTargetMemoryToSystemWhenDebugKeyPresent) { DebugManagerStateRestore restorer; using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; auto csr = static_cast *>(clDevice->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular).commandStreamReceiver); MockContext context(clDevice.get()); MockGraphicsAllocation clearColorAlloc; cl_int retVal = CL_SUCCESS; auto bufferInSystemPool = clUniquePtr(Buffer::create(&context, CL_MEM_FORCE_HOST_MEMORY_INTEL, 2048, nullptr, retVal)); EXPECT_TRUE(MemoryPool::isSystemMemoryPool(bufferInSystemPool->getGraphicsAllocation(clDevice->getRootDeviceIndex())->getMemoryPool())); auto bufferInLocalPool = clUniquePtr(Buffer::create(&context, CL_MEM_READ_WRITE, 2048, nullptr, retVal)); EXPECT_FALSE(MemoryPool::isSystemMemoryPool(bufferInLocalPool->getGraphicsAllocation(clDevice->getRootDeviceIndex())->getMemoryPool())); DebugManager.flags.OverrideBlitterTargetMemory.set(0u); { auto blitProperties = BlitProperties::constructPropertiesForCopy(bufferInSystemPool->getGraphicsAllocation(clDevice->getRootDeviceIndex()), bufferInLocalPool->getGraphicsAllocation(clDevice->getRootDeviceIndex()), 0, 0, {2048, 1, 1}, 0, 0, 0, 0, &clearColorAlloc); flushBcsTask(csr, blitProperties, true, clDevice->getDevice()); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream); auto bltCmd = genCmdCast(*(hwParser.cmdList.begin())); EXPECT_NE(nullptr, bltCmd); EXPECT_EQ(bltCmd->getDestinationTargetMemory(), XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_SYSTEM_MEM); EXPECT_EQ(bltCmd->getSourceTargetMemory(), XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_SYSTEM_MEM); } DebugManager.flags.OverrideBlitterTargetMemory.set(1u); { auto offset = csr->commandStream.getUsed(); auto blitProperties = BlitProperties::constructPropertiesForCopy(bufferInLocalPool->getGraphicsAllocation(clDevice->getRootDeviceIndex()), bufferInSystemPool->getGraphicsAllocation(clDevice->getRootDeviceIndex()), 0, 0, {2048, 1, 1}, 0, 0, 0, 0, &clearColorAlloc); flushBcsTask(csr, blitProperties, true, clDevice->getDevice()); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream, offset); auto bltCmd = genCmdCast(*(hwParser.cmdList.begin())); EXPECT_NE(nullptr, bltCmd); EXPECT_EQ(bltCmd->getDestinationTargetMemory(), XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_LOCAL_MEM); EXPECT_EQ(bltCmd->getSourceTargetMemory(), XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_LOCAL_MEM); } DebugManager.flags.OverrideBlitterTargetMemory.set(2u); { auto offset = csr->commandStream.getUsed(); auto blitProperties = BlitProperties::constructPropertiesForCopy(bufferInLocalPool->getGraphicsAllocation(clDevice->getRootDeviceIndex()), bufferInSystemPool->getGraphicsAllocation(clDevice->getRootDeviceIndex()), 0, 0, {2048, 1, 1}, 0, 0, 0, 0, &clearColorAlloc); flushBcsTask(csr, blitProperties, true, clDevice->getDevice()); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream, offset); auto bltCmd = genCmdCast(*(hwParser.cmdList.begin())); EXPECT_NE(nullptr, bltCmd); EXPECT_EQ(bltCmd->getDestinationTargetMemory(), XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_LOCAL_MEM); EXPECT_EQ(bltCmd->getSourceTargetMemory(), XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_SYSTEM_MEM); } } XE_HPG_CORETEST_F(BlitXeHpgCoreTests, givenBufferWhenProgrammingBltCommandAndRevisionB0ThenSetTargetMemory) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; HardwareInfo *hwInfo = clDevice->getRootDeviceEnvironment().getMutableHardwareInfo(); const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo->platform.eProductFamily); hwInfo->platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_B, *hwInfo); auto csr = static_cast *>(clDevice->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular).commandStreamReceiver); MockContext context(clDevice.get()); MockGraphicsAllocation clearColorAlloc; cl_int retVal = CL_SUCCESS; auto bufferInSystemPool = clUniquePtr(Buffer::create(&context, CL_MEM_FORCE_HOST_MEMORY_INTEL, 2048, nullptr, retVal)); EXPECT_TRUE(MemoryPool::isSystemMemoryPool(bufferInSystemPool->getGraphicsAllocation(clDevice->getRootDeviceIndex())->getMemoryPool())); auto bufferInLocalPool = clUniquePtr(Buffer::create(&context, CL_MEM_READ_WRITE, 2048, nullptr, retVal)); EXPECT_FALSE(MemoryPool::isSystemMemoryPool(bufferInLocalPool->getGraphicsAllocation(clDevice->getRootDeviceIndex())->getMemoryPool())); { auto blitProperties = BlitProperties::constructPropertiesForCopy(bufferInSystemPool->getGraphicsAllocation(clDevice->getRootDeviceIndex()), bufferInLocalPool->getGraphicsAllocation(clDevice->getRootDeviceIndex()), 0, 0, {2048, 1, 1}, 0, 0, 0, 0, &clearColorAlloc); flushBcsTask(csr, blitProperties, true, clDevice->getDevice()); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream); auto bltCmd = genCmdCast(*(hwParser.cmdList.begin())); EXPECT_NE(nullptr, bltCmd); EXPECT_EQ(bltCmd->getSourceTargetMemory(), XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_LOCAL_MEM); EXPECT_EQ(bltCmd->getDestinationTargetMemory(), XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_SYSTEM_MEM); } { auto offset = csr->commandStream.getUsed(); auto blitProperties = BlitProperties::constructPropertiesForCopy(bufferInLocalPool->getGraphicsAllocation(clDevice->getRootDeviceIndex()), bufferInSystemPool->getGraphicsAllocation(clDevice->getRootDeviceIndex()), 0, 0, {2048, 1, 1}, 0, 0, 0, 0, &clearColorAlloc); flushBcsTask(csr, blitProperties, true, clDevice->getDevice()); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream, offset); auto bltCmd = genCmdCast(*(hwParser.cmdList.begin())); EXPECT_NE(nullptr, bltCmd); EXPECT_EQ(bltCmd->getDestinationTargetMemory(), XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_LOCAL_MEM); EXPECT_EQ(bltCmd->getSourceTargetMemory(), XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_SYSTEM_MEM); } } compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpg_core/dg2/000077500000000000000000000000001422164147700245655ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpg_core/dg2/CMakeLists.txt000066400000000000000000000017541422164147700273340ustar00rootroot00000000000000# # Copyright (C) 2021-2022 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_DG2) set(IGDRCL_SRCS_tests_xe_hpg_core_dg2_excludes ${CMAKE_CURRENT_SOURCE_DIR}/excludes_ocl_dg2.cpp ) set_property(GLOBAL APPEND PROPERTY IGDRCL_SRCS_tests_excludes ${IGDRCL_SRCS_tests_xe_hpg_core_dg2_excludes}) set(IGDRCL_SRCS_tests_xe_hpg_core_dg2 ${IGDRCL_SRCS_tests_xe_hpg_core_dg2_excludes} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/get_device_info_dg2.cpp ${CMAKE_CURRENT_SOURCE_DIR}/memory_manager_tests_dg2.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler_tests_dg2.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_cmds_programming_dg2.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_dg2.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_xe_hpg_core_dg2}) add_subdirectories() neo_copy_test_files_with_revision(copy_test_files_dg2_0 dg2 0) add_dependencies(copy_test_files_per_product copy_test_files_dg2_0) endif() compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpg_core/dg2/excludes_ocl_dg2.cpp000066400000000000000000000040451422164147700305010ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" HWTEST_EXCLUDE_PRODUCT(HwHelperTest, GivenZeroSlmSizeWhenComputeSlmSizeIsCalledThenCorrectValueIsReturned, IGFX_DG2); HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, WhenAllowCompressionIsCalledThenTrueIsReturned, IGFX_DG2); HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHardwareInfoWhenCallingIsAdditionalStateBaseAddressWARequiredThenFalseIsReturned, IGFX_DG2); HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHardwareInfoWhenCallingIsMaxThreadsForWorkgroupWARequiredThenFalseIsReturned, IGFX_DG2); HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHwInfoConfigWhenAskedForDefaultEngineTypeAdjustmentThenFalseIsReturned, IGFX_DG2); HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfAllocationSizeAdjustmentIsRequiredThenFalseIsReturned, IGFX_DG2); HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfPrefetchDisablingIsRequiredThenFalseIsReturned, IGFX_DG2); HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfPipeControlPriorToNonPipelinedStateCommandsWARequiredThenFalseIsReturned, IGFX_DG2); HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfTile64With3DSurfaceOnBCSIsSupportedThenTrueIsReturned, IGFX_DG2); HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTestXeHpAndLater, givenXeHPAndLaterPlatformWhenAskedIfTile64With3DSurfaceOnBCSIsSupportedThenFalseIsReturned, IGFX_DG2); HWTEST_EXCLUDE_PRODUCT(WddmMemoryManagerSimpleTest, givenLinearStreamWhenItIsAllocatedThenItIsInLocalMemoryHasCpuPointerAndHasStandardHeap64kbAsGpuAddress, IGFX_DG2); HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, WhenAllowRenderCompressionIsCalledThenTrueIsReturned, IGFX_DG2); HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, whenConvertingTimestampsToCsDomainThenNothingIsChanged, IGFX_DG2); HWTEST_EXCLUDE_PRODUCT(HwHelperTestXeHPAndLater, GiveCcsNodeThenDefaultEngineTypeIsCcs, IGFX_DG2); HWTEST_EXCLUDE_PRODUCT(XeHPAndLaterDeviceCapsTests, givenHwInfoWhenRequestedComputeUnitsUsedForScratchThenReturnValidValue, IGFX_DG2); compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpg_core/dg2/get_device_info_dg2.cpp000066400000000000000000000022331422164147700311360ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/device_info_fixture.h" using namespace NEO; HWTEST_EXCLUDE_PRODUCT(GetDeviceInfoMemCapabilitiesTest, GivenValidParametersWhenGetDeviceInfoIsCalledForXE_HP_COREThenClSuccessIsReturned, IGFX_DG2); DG2TEST_F(GetDeviceInfoMemCapabilitiesTest, GivenValidParametersWhenGetDeviceInfoIsCalledForDG2ThenClSuccessIsReturned) { std::vector params = { {CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL, CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL}, {CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL, (CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL)}, {CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL, (CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL)}, {CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL, (CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL)}, {CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL, 0}}; check(params); } compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpg_core/dg2/linux/000077500000000000000000000000001422164147700257245ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpg_core/dg2/linux/CMakeLists.txt000066400000000000000000000005461422164147700304710ustar00rootroot00000000000000# # Copyright (C) 2021-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_xe_hpg_core_dg2_linux ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_tests_dg2.cpp ) if(UNIX) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_xe_hpg_core_dg2_linux}) add_subdirectories() endif() compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpg_core/dg2/linux/dll/000077500000000000000000000000001422164147700264775ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpg_core/dg2/linux/dll/CMakeLists.txt000066400000000000000000000005311422164147700312360ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_linux_dll_tests_xe_hpg_core_dg2 ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/${BRANCH_DIR_SUFFIX}/device_id_tests_dg2.cpp ) target_sources(igdrcl_linux_dll_tests PRIVATE ${IGDRCL_SRCS_linux_dll_tests_xe_hpg_core_dg2}) compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpg_core/dg2/linux/dll/device_id_tests_dg2.cpp000066400000000000000000000033751422164147700331040ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/fixtures/linux/device_id_fixture.h" using namespace NEO; TEST_F(DeviceIdTests, GivenDg2SupportedDeviceIdThenDeviceDescriptorTableExists) { std::array expectedDescriptors = {{ {0x4F80, &DG2_CONFIG::hwInfo, &DG2_CONFIG::setupHardwareInfo}, {0x4F81, &DG2_CONFIG::hwInfo, &DG2_CONFIG::setupHardwareInfo}, {0x4F82, &DG2_CONFIG::hwInfo, &DG2_CONFIG::setupHardwareInfo}, {0x4F83, &DG2_CONFIG::hwInfo, &DG2_CONFIG::setupHardwareInfo}, {0x4F84, &DG2_CONFIG::hwInfo, &DG2_CONFIG::setupHardwareInfo}, {0x4F87, &DG2_CONFIG::hwInfo, &DG2_CONFIG::setupHardwareInfo}, {0x4F88, &DG2_CONFIG::hwInfo, &DG2_CONFIG::setupHardwareInfo}, {0x5690, &DG2_CONFIG::hwInfo, &DG2_CONFIG::setupHardwareInfo}, {0x5691, &DG2_CONFIG::hwInfo, &DG2_CONFIG::setupHardwareInfo}, {0x5692, &DG2_CONFIG::hwInfo, &DG2_CONFIG::setupHardwareInfo}, {0x5693, &DG2_CONFIG::hwInfo, &DG2_CONFIG::setupHardwareInfo}, {0x5694, &DG2_CONFIG::hwInfo, &DG2_CONFIG::setupHardwareInfo}, {0x5695, &DG2_CONFIG::hwInfo, &DG2_CONFIG::setupHardwareInfo}, {0x56A0, &DG2_CONFIG::hwInfo, &DG2_CONFIG::setupHardwareInfo}, {0x56A1, &DG2_CONFIG::hwInfo, &DG2_CONFIG::setupHardwareInfo}, {0x56A2, &DG2_CONFIG::hwInfo, &DG2_CONFIG::setupHardwareInfo}, {0x56A5, &DG2_CONFIG::hwInfo, &DG2_CONFIG::setupHardwareInfo}, {0x56A6, &DG2_CONFIG::hwInfo, &DG2_CONFIG::setupHardwareInfo}, {0x56C0, &DG2_CONFIG::hwInfo, &DG2_CONFIG::setupHardwareInfo}, {0x56C1, &DG2_CONFIG::hwInfo, &DG2_CONFIG::setupHardwareInfo}, }}; testImpl(expectedDescriptors); } compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpg_core/dg2/linux/hw_info_config_tests_dg2.cpp000066400000000000000000000031531422164147700333660ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/os_interface.h" #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/libult/linux/drm_mock.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/os_interface/linux/hw_info_config_linux_tests.h" using namespace NEO; struct HwInfoConfigTestLinuxDg2 : HwInfoConfigTestLinux { void SetUp() override { HwInfoConfigTestLinux::SetUp(); drm = new DrmMock(*executionEnvironment->rootDeviceEnvironments[0]); osInterface->setDriverModel(std::unique_ptr(drm)); drm->storedDeviceID = 0x1234; } }; DG2TEST_F(HwInfoConfigTestLinuxDg2, WhenConfiguringHwInfoThenZeroIsReturned) { auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfoDrm(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); } DG2TEST_F(HwInfoConfigTestLinuxDg2, GivenDg2WhenConfigureHardwareCustomThenKmdNotifyIsEnabled) { HwInfoConfig *hwInfoConfig = HwInfoConfig::get(productFamily); OSInterface osIface; hwInfoConfig->configureHardwareCustom(&pInHwInfo, &osIface); EXPECT_TRUE(pInHwInfo.capabilityTable.kmdNotifyProperties.enableKmdNotify); EXPECT_EQ(150ll, pInHwInfo.capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds); EXPECT_TRUE(pInHwInfo.capabilityTable.kmdNotifyProperties.enableQuickKmdSleepForDirectSubmission); EXPECT_EQ(20ll, pInHwInfo.capabilityTable.kmdNotifyProperties.delayQuickKmdSleepForDirectSubmissionMicroseconds); } compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpg_core/dg2/memory_manager_tests_dg2.cpp000066400000000000000000000174321422164147700322600ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/test_macros/test.h" using namespace NEO; HWTEST_EXCLUDE_PRODUCT(GetAllocationDataTestHw, givenLinearStreamTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsNotRequested, IGFX_DG2); HWTEST_EXCLUDE_PRODUCT(GetAllocationDataTestHw, givenLinearStreamWhenGetAllocationDataIsCalledThenSystemMemoryIsNotRequested, IGFX_DG2); HWTEST_EXCLUDE_PRODUCT(GetAllocationDataTestHw, givenKernelIsaTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsNotRequested, IGFX_DG2); HWTEST_EXCLUDE_PRODUCT(GetAllocationDataTestHw, givenInternalHeapTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsNotRequested, IGFX_DG2); HWTEST_EXCLUDE_PRODUCT(GetAllocationDataTestHw, givenRingBufferAllocationWhenGetAllocationDataIsCalledThenItHasProperFieldsSet, IGFX_DG2); HWTEST_EXCLUDE_PRODUCT(GetAllocationDataTestHw, givenSemaphoreBufferAllocationWhenGetAllocationDataIsCalledThenItHasProperFieldsSet, IGFX_DG2); HWTEST_EXCLUDE_PRODUCT(GetAllocationDataTestHw, givenPrintfAllocationWhenGetAllocationDataIsCalledThenDontUseSystemMemoryAndRequireCpuAccess, IGFX_DG2); HWTEST_EXCLUDE_PRODUCT(GetAllocationDataTestHw, givenGpuTimestampDeviceBufferTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsNotRequested, IGFX_DG2); HWTEST_EXCLUDE_PRODUCT(GetAllocationDataTestHw, givenPrintfAllocationWhenGetAllocationDataIsCalledThenDontForceSystemMemoryAndRequireCpuAccess, IGFX_DG2); HWTEST_EXCLUDE_PRODUCT(GetAllocationDataTestHw, givenLinearStreamAllocationWhenGetAllocationDataIsCalledThenDontForceSystemMemoryAndRequireCpuAccess, IGFX_DG2); HWTEST_EXCLUDE_PRODUCT(GetAllocationDataTestHw, givenConstantSurfaceAllocationWhenGetAllocationDataIsCalledThenDontForceSystemMemoryAndRequireCpuAccess, IGFX_DG2); using MemoryManagerTestsDg2 = ::testing::Test; DG2TEST_F(MemoryManagerTestsDg2, givenEnabledLocalMemoryWhenLinearStreamIsAllocatedInPreferredPoolThenLocalMemoryPoolIsNotUsed) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, true, executionEnvironment); auto allocation = memoryManager.allocateGraphicsMemoryInPreferredPool({mockRootDeviceIndex, MemoryConstants::pageSize, AllocationType::LINEAR_STREAM, mockDeviceBitfield}, nullptr); EXPECT_NE(MemoryPool::LocalMemory, allocation->getMemoryPool()); EXPECT_FALSE(memoryManager.allocationInDevicePoolCreated); memoryManager.freeGraphicsMemory(allocation); } DG2TEST_F(MemoryManagerTestsDg2, givenLinearStreamTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsRequested) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{mockRootDeviceIndex, 1, AllocationType::LINEAR_STREAM, mockDeviceBitfield}; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.useSystemMemory); EXPECT_TRUE(allocData.flags.requiresCpuAccess); } DG2TEST_F(MemoryManagerTestsDg2, givenLinearStreamWhenGetAllocationDataIsCalledThenSystemMemoryIsRequested) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{mockRootDeviceIndex, 1, AllocationType::LINEAR_STREAM, mockDeviceBitfield}; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.useSystemMemory); EXPECT_TRUE(allocData.flags.requiresCpuAccess); } DG2TEST_F(MemoryManagerTestsDg2, givenEnabledLocalMemoryWhenAllocateInternalHeapInSystemPoolThenLocalMemoryPoolIsNotUsed) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, true, executionEnvironment); auto allocation = memoryManager.allocateGraphicsMemoryInPreferredPool({mockRootDeviceIndex, MemoryConstants::pageSize, AllocationType::KERNEL_ISA, mockDeviceBitfield}, nullptr); EXPECT_NE(MemoryPool::LocalMemory, allocation->getMemoryPool()); EXPECT_FALSE(memoryManager.allocationInDevicePoolCreated); memoryManager.freeGraphicsMemory(allocation); } DG2TEST_F(MemoryManagerTestsDg2, givenKernelIsaTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsRequested) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{mockRootDeviceIndex, 1, AllocationType::KERNEL_ISA, mockDeviceBitfield}; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.useSystemMemory); } DG2TEST_F(MemoryManagerTestsDg2, givenRingBufferAllocationWhenGetAllocationDataIsCalledThenItHasProperFieldsSet) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{0, 0x10000u, AllocationType::RING_BUFFER, 1}; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.useSystemMemory); EXPECT_TRUE(allocData.flags.allocateMemory); EXPECT_FALSE(allocData.flags.allow32Bit); EXPECT_FALSE(allocData.flags.allow64kbPages); EXPECT_EQ(0x10000u, allocData.size); EXPECT_EQ(nullptr, allocData.hostPtr); EXPECT_TRUE(allocData.flags.requiresCpuAccess); } DG2TEST_F(MemoryManagerTestsDg2, givenSemaphoreBufferAllocationWhenGetAllocationDataIsCalledThenItHasProperFieldsSet) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{0, 0x1000u, AllocationType::SEMAPHORE_BUFFER, 1}; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.useSystemMemory); EXPECT_TRUE(allocData.flags.allocateMemory); EXPECT_FALSE(allocData.flags.allow32Bit); EXPECT_FALSE(allocData.flags.allow64kbPages); EXPECT_EQ(0x1000u, allocData.size); EXPECT_EQ(nullptr, allocData.hostPtr); EXPECT_TRUE(allocData.flags.requiresCpuAccess); } DG2TEST_F(MemoryManagerTestsDg2, givenConstantSurfaceTypeWhenGetAllocationDataIsCalledThenLocalMemoryIsRequestedWithoutCpuAccess) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{mockRootDeviceIndex, 1, AllocationType::CONSTANT_SURFACE, mockDeviceBitfield}; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_FALSE(allocData.flags.useSystemMemory); EXPECT_FALSE(allocData.flags.requiresCpuAccess); } DG2TEST_F(MemoryManagerTestsDg2, givenPrintfAllocationWhenGetAllocationDataIsCalledThenUseSystemMemoryAndRequireCpuAccess) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{mockRootDeviceIndex, 1, AllocationType::PRINTF_SURFACE, mockDeviceBitfield}; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.useSystemMemory); EXPECT_TRUE(allocData.flags.requiresCpuAccess); } DG2TEST_F(MemoryManagerTestsDg2, givenGpuTimestampTagBufferTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsRequested) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{mockRootDeviceIndex, 1, AllocationType::GPU_TIMESTAMP_DEVICE_BUFFER, mockDeviceBitfield}; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.useSystemMemory); } compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpg_core/dg2/sampler_tests_dg2.cpp000066400000000000000000000126461422164147700307230ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/sampler/sampler.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include using namespace NEO; using SamplerTest = Test; HWTEST2_F(SamplerTest, givenDg2SamplerWhenUsingDefaultFilteringAndAppendSamplerStateParamsThenNotEnableLowQualityFilter, IsDG2) { EXPECT_FALSE(DebugManager.flags.ForceSamplerLowFilteringPrecision.get()); typedef typename FamilyType::SAMPLER_STATE SAMPLER_STATE; auto state = FamilyType::cmdInitSamplerState; EXPECT_EQ(SAMPLER_STATE::LOW_QUALITY_FILTER_DISABLE, state.getLowQualityFilter()); HwInfoConfig::get(defaultHwInfo->platform.eProductFamily)->adjustSamplerState(&state, *defaultHwInfo); EXPECT_EQ(SAMPLER_STATE::LOW_QUALITY_FILTER_DISABLE, state.getLowQualityFilter()); } HWTEST2_F(SamplerTest, givenDg2SamplerWhenForcingLowQualityFilteringAndAppendSamplerStateParamsThenEnableLowQualityFilter, IsDG2) { DebugManagerStateRestore dbgRestore; DebugManager.flags.ForceSamplerLowFilteringPrecision.set(true); EXPECT_TRUE(DebugManager.flags.ForceSamplerLowFilteringPrecision.get()); typedef typename FamilyType::SAMPLER_STATE SAMPLER_STATE; auto state = FamilyType::cmdInitSamplerState; EXPECT_EQ(SAMPLER_STATE::LOW_QUALITY_FILTER_DISABLE, state.getLowQualityFilter()); HwInfoConfig::get(defaultHwInfo->platform.eProductFamily)->adjustSamplerState(&state, *defaultHwInfo); EXPECT_EQ(SAMPLER_STATE::LOW_QUALITY_FILTER_ENABLE, state.getLowQualityFilter()); } HWTEST2_F(SamplerTest, givenDg2BelowC0WhenProgrammingSamplerForNearestFilterWithMirrorAddressThenRoundEnableForRDirectionIsEnabled, IsDG2) { using SAMPLER_STATE = typename FamilyType::SAMPLER_STATE; const auto &hwInfoConfig = *HwInfoConfig::get(defaultHwInfo->platform.eProductFamily); uint32_t revisions[] = {REVISION_A0, REVISION_B, REVISION_C}; for (auto &revision : revisions) { pDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(revision, *defaultHwInfo); auto context = clUniquePtr(new MockContext()); auto sampler = clUniquePtr(new SamplerHw(context.get(), CL_FALSE, CL_ADDRESS_MIRRORED_REPEAT, CL_FILTER_NEAREST)); auto state = FamilyType::cmdInitSamplerState; EXPECT_FALSE(state.getRAddressMinFilterRoundingEnable()); EXPECT_FALSE(state.getRAddressMagFilterRoundingEnable()); sampler->setArg(&state, pDevice->getHardwareInfo()); if (REVISION_C == revision) { EXPECT_FALSE(state.getRAddressMinFilterRoundingEnable()); EXPECT_FALSE(state.getRAddressMagFilterRoundingEnable()); } else { EXPECT_TRUE(state.getRAddressMinFilterRoundingEnable()); EXPECT_TRUE(state.getRAddressMagFilterRoundingEnable()); } } } HWTEST2_F(SamplerTest, givenDg2BelowC0WhenProgrammingSamplerForNearestFilterWitouthMirrorAddressThenRoundEnableForRDirectionIsDisabled, IsDG2) { using SAMPLER_STATE = typename FamilyType::SAMPLER_STATE; const auto &hwInfoConfig = *HwInfoConfig::get(defaultHwInfo->platform.eProductFamily); uint32_t revisions[] = {REVISION_A0, REVISION_B, REVISION_C}; for (auto &revision : revisions) { pDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(revision, *defaultHwInfo); auto context = clUniquePtr(new MockContext()); auto sampler = clUniquePtr(new SamplerHw(context.get(), CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST)); auto state = FamilyType::cmdInitSamplerState; EXPECT_FALSE(state.getRAddressMinFilterRoundingEnable()); EXPECT_FALSE(state.getRAddressMagFilterRoundingEnable()); sampler->setArg(&state, pDevice->getHardwareInfo()); EXPECT_FALSE(state.getRAddressMinFilterRoundingEnable()); EXPECT_FALSE(state.getRAddressMagFilterRoundingEnable()); } } HWTEST2_F(SamplerTest, givenDg2BelowC0WhenProgrammingSamplerForLinearFilterWithMirrorAddressThenRoundEnableForRDirectionIsEnabled, IsDG2) { using SAMPLER_STATE = typename FamilyType::SAMPLER_STATE; const auto &hwInfoConfig = *HwInfoConfig::get(defaultHwInfo->platform.eProductFamily); uint32_t revisions[] = {REVISION_A0, REVISION_B, REVISION_C}; for (auto &revision : revisions) { pDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(revision, *defaultHwInfo); auto context = clUniquePtr(new MockContext()); auto sampler = clUniquePtr(new SamplerHw(context.get(), CL_FALSE, CL_ADDRESS_MIRRORED_REPEAT, CL_FILTER_LINEAR)); auto state = FamilyType::cmdInitSamplerState; EXPECT_FALSE(state.getRAddressMinFilterRoundingEnable()); EXPECT_FALSE(state.getRAddressMagFilterRoundingEnable()); sampler->setArg(&state, pDevice->getHardwareInfo()); EXPECT_TRUE(state.getRAddressMinFilterRoundingEnable()); EXPECT_TRUE(state.getRAddressMagFilterRoundingEnable()); } } compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpg_core/dg2/test_cmds_programming_dg2.cpp000066400000000000000000000177041422164147700324250ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/client_context/gmm_client_context.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/state_base_address.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" using namespace NEO; using CmdsProgrammingTestsDg2 = UltCommandStreamReceiverTest; DG2TEST_F(CmdsProgrammingTestsDg2, givenL3ToL1DebugFlagWhenStatelessMocsIsProgrammedThenItHasL1CachingOn) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; DebugManagerStateRestore restore; DebugManager.flags.ForceL1Caching.set(1u); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); flushTask(commandStreamReceiver); HardwareParse hwParserCsr; hwParserCsr.parseCommands(commandStreamReceiver.commandStream, 0); hwParserCsr.findHardwareCommands(); ASSERT_NE(nullptr, hwParserCsr.cmdStateBaseAddress); auto stateBaseAddress = static_cast(hwParserCsr.cmdStateBaseAddress); auto actualL1CachePolocy = static_cast(stateBaseAddress->getL1CachePolicyL1CacheControl()); const uint8_t expectedL1CachePolicy = 0; EXPECT_EQ(expectedL1CachePolicy, actualL1CachePolocy); } DG2TEST_F(CmdsProgrammingTestsDg2, givenSpecificProductFamilyWhenAppendingSbaThenProgramWtL1CachePolicy) { auto memoryManager = pDevice->getExecutionEnvironment()->memoryManager.get(); AllocationProperties properties(pDevice->getRootDeviceIndex(), 1, AllocationType::BUFFER, pDevice->getDeviceBitfield()); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties); IndirectHeap indirectHeap(allocation, 1); DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags(); auto sbaCmd = FamilyType::cmdInitStateBaseAddress; StateBaseAddressHelper::appendStateBaseAddressParameters(&sbaCmd, &indirectHeap, true, 0, pDevice->getRootDeviceEnvironment().getGmmHelper(), false, MemoryCompressionState::NotApplicable, true, false, 1u); EXPECT_EQ(FamilyType::STATE_BASE_ADDRESS::L1_CACHE_POLICY_WBP, sbaCmd.getL1CachePolicyL1CacheControl()); memoryManager->freeGraphicsMemory(allocation); } DG2TEST_F(CmdsProgrammingTestsDg2, givenL1CachingOverrideWhenStateBaseAddressIsProgrammedThenItMatchesTheOverrideValue) { DebugManagerStateRestore restorer; DebugManager.flags.ForceStatelessL1CachingPolicy.set(0u); auto memoryManager = pDevice->getExecutionEnvironment()->memoryManager.get(); AllocationProperties properties(pDevice->getRootDeviceIndex(), 1, AllocationType::BUFFER, pDevice->getDeviceBitfield()); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties); IndirectHeap indirectHeap(allocation, 1); DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags(); auto sbaCmd = FamilyType::cmdInitStateBaseAddress; StateBaseAddressHelper::appendStateBaseAddressParameters(&sbaCmd, &indirectHeap, true, 0, pDevice->getRootDeviceEnvironment().getGmmHelper(), false, MemoryCompressionState::NotApplicable, true, false, 1u); EXPECT_EQ(0u, sbaCmd.getL1CachePolicyL1CacheControl()); DebugManager.flags.ForceStatelessL1CachingPolicy.set(1u); StateBaseAddressHelper::appendStateBaseAddressParameters(&sbaCmd, &indirectHeap, true, 0, pDevice->getRootDeviceEnvironment().getGmmHelper(), false, MemoryCompressionState::NotApplicable, true, false, 1u); EXPECT_EQ(1u, sbaCmd.getL1CachePolicyL1CacheControl()); memoryManager->freeGraphicsMemory(allocation); } DG2TEST_F(CmdsProgrammingTestsDg2, whenAppendingRssThenProgramWtL1CachePolicy) { auto memoryManager = pDevice->getExecutionEnvironment()->memoryManager.get(); size_t allocationSize = MemoryConstants::pageSize; AllocationProperties properties(pDevice->getRootDeviceIndex(), allocationSize, AllocationType::BUFFER, pDevice->getDeviceBitfield()); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties); auto rssCmd = FamilyType::cmdInitRenderSurfaceState; MockContext context(pClDevice); auto multiGraphicsAllocation = MultiGraphicsAllocation(pClDevice->getRootDeviceIndex()); multiGraphicsAllocation.addAllocation(allocation); std::unique_ptr> buffer(static_cast *>( BufferHw::create(&context, {}, 0, 0, allocationSize, nullptr, nullptr, multiGraphicsAllocation, false, false, false))); NEO::EncodeSurfaceStateArgs args; args.outMemory = &rssCmd; args.graphicsAddress = allocation->getGpuAddress(); args.size = allocation->getUnderlyingBufferSize(); args.mocs = buffer->getMocsValue(false, false, pClDevice->getRootDeviceIndex()); args.numAvailableDevices = pClDevice->getNumGenericSubDevices(); args.allocation = allocation; args.gmmHelper = pClDevice->getGmmHelper(); args.areMultipleSubDevicesInContext = true; EncodeSurfaceState::encodeBuffer(args); EXPECT_EQ(FamilyType::RENDER_SURFACE_STATE::L1_CACHE_POLICY_WBP, rssCmd.getL1CachePolicyL1CacheControl()); } DG2TEST_F(CmdsProgrammingTestsDg2, givenAlignedCacheableReadOnlyBufferThenChoseOclBufferConstPolicy) { MockContext context; const auto size = MemoryConstants::pageSize; const auto ptr = (void *)alignedMalloc(size * 2, MemoryConstants::pageSize); const auto flags = CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY; auto retVal = CL_SUCCESS; auto buffer = std::unique_ptr(Buffer::create( &context, flags, size, ptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, false); const auto expectedMocs = context.getDevice(0)->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST); const auto actualMocs = surfaceState.getMemoryObjectControlState(); EXPECT_EQ(expectedMocs, actualMocs); auto actualL1CachePolocy = static_cast(surfaceState.getL1CachePolicyL1CacheControl()); const uint8_t expectedL1CachePolicy = 0; EXPECT_EQ(expectedL1CachePolicy, actualL1CachePolocy); alignedFree(ptr); } DG2TEST_F(CmdsProgrammingTestsDg2, givenDG2WithBSteppingWhenFlushingTaskThenAdditionalStateBaseAddressCommandIsPresent) { auto &hwInfo = *pDevice->getRootDeviceEnvironment().getMutableHardwareInfo(); const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily); hwInfo.platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_B, hwInfo); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); flushTask(commandStreamReceiver); EXPECT_GT(commandStreamReceiver.commandStream.getUsed(), 0u); parseCommands(commandStreamReceiver.commandStream, 0); auto stateBaseAddressItor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), stateBaseAddressItor); stateBaseAddressItor++; stateBaseAddressItor = find(stateBaseAddressItor, cmdList.end()); EXPECT_NE(cmdList.end(), stateBaseAddressItor); } compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpg_core/dg2/test_device_caps_dg2.cpp000066400000000000000000000064701422164147700313400ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_helper.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/helpers/hw_helper_tests.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "gtest/gtest.h" using namespace NEO; using Dg2UsDeviceIdTest = Test; DG2TEST_F(Dg2UsDeviceIdTest, givenDg2ProductWhenCheckBlitterOperationsSupportThenReturnFalse) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.blitterOperationsSupported); } DG2TEST_F(Dg2UsDeviceIdTest, givenDg2ProductWhenCheckFp64SupportThenReturnFalse) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftrSupportsFP64); } DG2TEST_F(Dg2UsDeviceIdTest, givenDeviceThatHasHighNumberOfExecutionUnitsAndA0SteppingWhenMaxWorkgroupSizeIsComputedThenItIsLimitedTo512) { HardwareInfo myHwInfo = *defaultHwInfo; GT_SYSTEM_INFO &mySysInfo = myHwInfo.gtSystemInfo; PLATFORM &myPlatform = myHwInfo.platform; const auto &hwInfoConfig = *HwInfoConfig::get(hardwareInfo.platform.eProductFamily); mySysInfo.EUCount = 32; mySysInfo.SubSliceCount = 2; mySysInfo.DualSubSliceCount = 2; mySysInfo.ThreadCount = 32 * 8; myPlatform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_A0, hardwareInfo); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&myHwInfo)); EXPECT_EQ(512u, device->sharedDeviceInfo.maxWorkGroupSize); EXPECT_EQ(device->sharedDeviceInfo.maxWorkGroupSize / 8, device->getDeviceInfo().maxNumOfSubGroups); } DG2TEST_F(Dg2UsDeviceIdTest, givenEnabledFtrPooledEuAndA0SteppingWhenCalculatingMaxEuPerSSThenDontIgnoreEuCountPerPoolMin) { HardwareInfo myHwInfo = *defaultHwInfo; GT_SYSTEM_INFO &mySysInfo = myHwInfo.gtSystemInfo; FeatureTable &mySkuTable = myHwInfo.featureTable; PLATFORM &myPlatform = myHwInfo.platform; const auto &hwInfoConfig = *HwInfoConfig::get(hardwareInfo.platform.eProductFamily); mySysInfo.EUCount = 20; mySysInfo.EuCountPerPoolMin = 99999; mySkuTable.flags.ftrPooledEuEnabled = 1; myPlatform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_A0, hardwareInfo); auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&myHwInfo)); auto expectedMaxWGS = mySysInfo.EuCountPerPoolMin * (mySysInfo.ThreadCount / mySysInfo.EUCount) * 8; expectedMaxWGS = std::min(Math::prevPowerOfTwo(expectedMaxWGS), 512u); EXPECT_EQ(expectedMaxWGS, device->getDeviceInfo().maxWorkGroupSize); } DG2TEST_F(Dg2UsDeviceIdTest, givenRevisionEnumThenProperMaxThreadsForWorkgroupIsReturned) { const auto &hwInfoConfig = *HwInfoConfig::get(hardwareInfo.platform.eProductFamily); hardwareInfo.platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_A0, hardwareInfo); EXPECT_EQ(64u, hwInfoConfig.getMaxThreadsForWorkgroupInDSSOrSS(hardwareInfo, 64u, 64u)); hardwareInfo.platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_A1, hardwareInfo); uint32_t numThreadsPerEU = hardwareInfo.gtSystemInfo.ThreadCount / hardwareInfo.gtSystemInfo.EUCount; EXPECT_EQ(64u * numThreadsPerEU, hwInfoConfig.getMaxThreadsForWorkgroupInDSSOrSS(hardwareInfo, 64u, 64u)); } compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpg_core/dg2/windows/000077500000000000000000000000001422164147700262575ustar00rootroot00000000000000compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpg_core/dg2/windows/CMakeLists.txt000066400000000000000000000004561422164147700310240ustar00rootroot00000000000000# # Copyright (C) 2021-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_xe_hpg_core_dg2_windows ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) if(WIN32) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_xe_hpg_core_dg2_windows}) add_subdirectories() endif() compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpg_core/excludes_ocl_xe_hpg_core.cpp000066400000000000000000000021401422164147700316250ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" HWTEST_EXCLUDE_PRODUCT(BufferSetSurfaceTests, givenAlignedCacheableReadOnlyBufferThenChoseOclBufferPolicy, IGFX_XE_HPG_CORE); HWTEST_EXCLUDE_PRODUCT(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemoryIsUnalignedToCachelineButReadOnlyThenL3CacheShouldBeStillOn, IGFX_XE_HPG_CORE); HWTEST_EXCLUDE_PRODUCT(XeHPAndLaterImageTests, givenCompressionWhenAppendingImageFromBufferThenTwoIsSetAsCompressionFormat, IGFX_XE_HPG_CORE); HWTEST_EXCLUDE_PRODUCT(XeHPAndLaterImageTests, givenImageFromBufferWhenSettingSurfaceStateThenPickCompressionFormatFromDebugVariable, IGFX_XE_HPG_CORE); HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenSamplerStateWhenAdjustSamplerStateThenNothingIsChanged, IGFX_XE_HPG_CORE); HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfBlitterForImagesIsSupportedThenFalseIsReturned, IGFX_XE_HPG_CORE); HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfTile64With3DSurfaceOnBCSIsSupportedThenTrueIsReturned, IGFX_XE_HPG_CORE); compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpg_core/image_tests_xe_hpg_core.cpp000066400000000000000000000067331422164147700314740ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/helpers/unit_test_helper.h" #include "opencl/source/helpers/cl_hw_helper.h" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/mem_obj/image_compression_fixture.h" XE_HPG_CORETEST_F(ImageCompressionTests, GivenDifferentImageFormatsWhenCreatingImageThenCompressionIsCorrectlySet) { imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 5; imageDesc.image_height = 5; flags = CL_MEM_READ_ONLY; MockContext context; struct ImageFormatCompression { cl_image_format imageFormat; bool isCompressable; }; const std::vector imageFormats = { {{CL_LUMINANCE, CL_UNORM_INT8}, false}, {{CL_LUMINANCE, CL_UNORM_INT16}, false}, {{CL_LUMINANCE, CL_HALF_FLOAT}, false}, {{CL_LUMINANCE, CL_FLOAT}, false}, {{CL_INTENSITY, CL_UNORM_INT8}, false}, {{CL_INTENSITY, CL_UNORM_INT16}, false}, {{CL_INTENSITY, CL_HALF_FLOAT}, false}, {{CL_INTENSITY, CL_FLOAT}, false}, {{CL_A, CL_UNORM_INT16}, false}, {{CL_A, CL_HALF_FLOAT}, false}, {{CL_A, CL_FLOAT}, false}, {{CL_R, CL_UNSIGNED_INT8}, true}, {{CL_R, CL_UNSIGNED_INT16}, true}, {{CL_R, CL_UNSIGNED_INT32}, true}, {{CL_RG, CL_UNSIGNED_INT32}, true}, {{CL_RGBA, CL_UNSIGNED_INT32}, true}}; for (const auto &format : imageFormats) { auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &format.imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto image = std::unique_ptr(Image::create( mockContext.get(), ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); ASSERT_NE(nullptr, image); EXPECT_TRUE(myMemoryManager->mockMethodCalled); EXPECT_EQ(format.isCompressable, myMemoryManager->capturedPreferCompressed); } } XE_HPG_CORETEST_F(ImageCompressionTests, givenRedescribableFormatWhenCreatingAllocationThenDoNotPreferCompression) { MockContext context{}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 5; imageDesc.image_height = 5; auto surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto image = std::unique_ptr(Image::create( mockContext.get(), ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); ASSERT_NE(nullptr, image); EXPECT_EQ(UnitTestHelper::tiledImagesSupported, myMemoryManager->capturedPreferCompressed); imageFormat.image_channel_order = CL_RG; surfaceFormat = Image::getSurfaceFormatFromTable( flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); image = std::unique_ptr(Image::create( mockContext.get(), ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context.getDevice(0)->getDevice()), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); ASSERT_NE(nullptr, image); EXPECT_TRUE(myMemoryManager->capturedPreferCompressed); }compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpg_core/test_cmds_programming_xe_hpg_core.cpp000066400000000000000000000530311422164147700335500ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/client_context/gmm_client_context.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/state_base_address.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/preamble/preamble_fixture.h" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" using namespace NEO; using CmdsProgrammingTestsXeHpgCore = UltCommandStreamReceiverTest; XE_HPG_CORETEST_F(CmdsProgrammingTestsXeHpgCore, givenL3ToL1DebugFlagWhenStatelessMocsIsProgrammedThenItHasL1CachingOn) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; DebugManagerStateRestore restore; DebugManager.flags.ForceL1Caching.set(1u); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); flushTask(commandStreamReceiver); HardwareParse hwParserCsr; hwParserCsr.parseCommands(commandStreamReceiver.commandStream, 0); hwParserCsr.findHardwareCommands(); ASSERT_NE(nullptr, hwParserCsr.cmdStateBaseAddress); auto stateBaseAddress = static_cast(hwParserCsr.cmdStateBaseAddress); auto actualL1CachePolocy = static_cast(stateBaseAddress->getL1CachePolicyL1CacheControl()); const uint8_t expectedL1CachePolicy = 0; EXPECT_EQ(expectedL1CachePolicy, actualL1CachePolocy); } XE_HPG_CORETEST_F(CmdsProgrammingTestsXeHpgCore, givenSpecificProductFamilyWhenAppendingSbaThenProgramWtL1CachePolicy) { auto memoryManager = pDevice->getExecutionEnvironment()->memoryManager.get(); AllocationProperties properties(pDevice->getRootDeviceIndex(), 1, AllocationType::BUFFER, pDevice->getDeviceBitfield()); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties); IndirectHeap indirectHeap(allocation, 1); DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags(); auto sbaCmd = FamilyType::cmdInitStateBaseAddress; StateBaseAddressHelper::appendStateBaseAddressParameters(&sbaCmd, &indirectHeap, true, 0, pDevice->getRootDeviceEnvironment().getGmmHelper(), false, MemoryCompressionState::NotApplicable, true, false, 1u); EXPECT_EQ(FamilyType::STATE_BASE_ADDRESS::L1_CACHE_POLICY_WBP, sbaCmd.getL1CachePolicyL1CacheControl()); memoryManager->freeGraphicsMemory(allocation); } XE_HPG_CORETEST_F(CmdsProgrammingTestsXeHpgCore, givenL1CachingOverrideWhenStateBaseAddressIsProgrammedThenItMatchesTheOverrideValue) { DebugManagerStateRestore restorer; DebugManager.flags.ForceStatelessL1CachingPolicy.set(0u); auto memoryManager = pDevice->getExecutionEnvironment()->memoryManager.get(); AllocationProperties properties(pDevice->getRootDeviceIndex(), 1, AllocationType::BUFFER, pDevice->getDeviceBitfield()); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties); IndirectHeap indirectHeap(allocation, 1); DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags(); auto sbaCmd = FamilyType::cmdInitStateBaseAddress; StateBaseAddressHelper::appendStateBaseAddressParameters(&sbaCmd, &indirectHeap, true, 0, pDevice->getRootDeviceEnvironment().getGmmHelper(), false, MemoryCompressionState::NotApplicable, true, false, 1u); EXPECT_EQ(0u, sbaCmd.getL1CachePolicyL1CacheControl()); DebugManager.flags.ForceStatelessL1CachingPolicy.set(1u); StateBaseAddressHelper::appendStateBaseAddressParameters(&sbaCmd, &indirectHeap, true, 0, pDevice->getRootDeviceEnvironment().getGmmHelper(), false, MemoryCompressionState::NotApplicable, true, false, 1u); EXPECT_EQ(1u, sbaCmd.getL1CachePolicyL1CacheControl()); memoryManager->freeGraphicsMemory(allocation); } XE_HPG_CORETEST_F(CmdsProgrammingTestsXeHpgCore, whenAppendingRssThenProgramWtL1CachePolicy) { auto memoryManager = pDevice->getExecutionEnvironment()->memoryManager.get(); size_t allocationSize = MemoryConstants::pageSize; AllocationProperties properties(pDevice->getRootDeviceIndex(), allocationSize, AllocationType::BUFFER, pDevice->getDeviceBitfield()); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties); auto rssCmd = FamilyType::cmdInitRenderSurfaceState; MockContext context(pClDevice); auto multiGraphicsAllocation = MultiGraphicsAllocation(pClDevice->getRootDeviceIndex()); multiGraphicsAllocation.addAllocation(allocation); std::unique_ptr> buffer(static_cast *>( BufferHw::create(&context, {}, 0, 0, allocationSize, nullptr, nullptr, multiGraphicsAllocation, false, false, false))); NEO::EncodeSurfaceStateArgs args; args.outMemory = &rssCmd; args.graphicsAddress = allocation->getGpuAddress(); args.size = allocation->getUnderlyingBufferSize(); args.mocs = buffer->getMocsValue(false, false, pClDevice->getRootDeviceIndex()); args.numAvailableDevices = pClDevice->getNumGenericSubDevices(); args.allocation = allocation; args.gmmHelper = pClDevice->getGmmHelper(); args.areMultipleSubDevicesInContext = true; EncodeSurfaceState::encodeBuffer(args); EXPECT_EQ(FamilyType::RENDER_SURFACE_STATE::L1_CACHE_POLICY_WBP, rssCmd.getL1CachePolicyL1CacheControl()); } XE_HPG_CORETEST_F(CmdsProgrammingTestsXeHpgCore, givenAlignedCacheableReadOnlyBufferThenChoseOclBufferConstPolicy) { MockContext context; const auto size = MemoryConstants::pageSize; const auto ptr = (void *)alignedMalloc(size * 2, MemoryConstants::pageSize); const auto flags = CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY; auto retVal = CL_SUCCESS; auto buffer = std::unique_ptr(Buffer::create( &context, flags, size, ptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, false); const auto expectedMocs = context.getDevice(0)->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST); const auto actualMocs = surfaceState.getMemoryObjectControlState(); EXPECT_EQ(expectedMocs, actualMocs); auto actualL1CachePolocy = static_cast(surfaceState.getL1CachePolicyL1CacheControl()); const uint8_t expectedL1CachePolicy = 0; EXPECT_EQ(expectedL1CachePolicy, actualL1CachePolocy); alignedFree(ptr); } XE_HPG_CORETEST_F(CmdsProgrammingTestsXeHpgCore, givenDecompressInL3ForImage2dFromBufferEnabledWhenProgrammingStateForImage2dFrom3dCompressedBufferThenCorrectFlagsAreSet) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; MockContext context; cl_int retVal = CL_SUCCESS; cl_image_format imageFormat = {}; cl_image_desc imageDesc = {}; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_height = 128; imageDesc.image_width = 256; imageDesc.mem_object = clCreateBuffer(&context, CL_MEM_READ_WRITE, imageDesc.image_height * imageDesc.image_width, nullptr, &retVal); auto gmm = new Gmm(context.getDevice(0)->getGmmHelper()->getClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true); gmm->isCompressionEnabled = true; auto buffer = castToObject(imageDesc.mem_object); buffer->getGraphicsAllocation(0)->setGmm(gmm, 0); auto gmmResourceInfo = buffer->getMultiGraphicsAllocation().getDefaultGraphicsAllocation()->getDefaultGmm()->gmmResourceInfo.get(); auto bufferCompressionFormat = context.getDevice(0)->getGmmClientContext()->getSurfaceStateCompressionFormat(gmmResourceInfo->getResourceFormat()); auto surfaceFormat = Image::getSurfaceFormatFromTable(CL_MEM_READ_WRITE, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto image = std::unique_ptr(Image::create(&context, ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_READ_WRITE, 0, 0, &context.getDevice(0)->getDevice()), CL_MEM_READ_WRITE, 0, surfaceFormat, &imageDesc, NULL, retVal)); auto imageHw = static_cast *>(image.get()); { DebugManagerStateRestore restorer; DebugManager.flags.DecompressInL3ForImage2dFromBuffer.set(-1); uint32_t forcedCompressionFormat = 3; DebugManager.flags.ForceBufferCompressionFormat.set(forcedCompressionFormat); auto surfaceState = FamilyType::cmdInitRenderSurfaceState; imageHw->setImageArg(&surfaceState, false, 0, context.getDevice(0)->getRootDeviceIndex(), false); EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE, surfaceState.getAuxiliarySurfaceMode()); EXPECT_EQ(1u, surfaceState.getDecompressInL3()); EXPECT_EQ(1u, surfaceState.getMemoryCompressionEnable()); EXPECT_EQ(RENDER_SURFACE_STATE::MEMORY_COMPRESSION_TYPE::MEMORY_COMPRESSION_TYPE_3D_COMPRESSION, surfaceState.getMemoryCompressionType()); EXPECT_EQ(forcedCompressionFormat, surfaceState.getCompressionFormat()); } { DebugManagerStateRestore restorer; DebugManager.flags.DecompressInL3ForImage2dFromBuffer.set(1); auto surfaceState = FamilyType::cmdInitRenderSurfaceState; imageHw->setImageArg(&surfaceState, false, 0, context.getDevice(0)->getRootDeviceIndex(), false); EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE, surfaceState.getAuxiliarySurfaceMode()); EXPECT_EQ(1u, surfaceState.getDecompressInL3()); EXPECT_EQ(1u, surfaceState.getMemoryCompressionEnable()); EXPECT_EQ(RENDER_SURFACE_STATE::MEMORY_COMPRESSION_TYPE::MEMORY_COMPRESSION_TYPE_3D_COMPRESSION, surfaceState.getMemoryCompressionType()); EXPECT_EQ(bufferCompressionFormat, surfaceState.getCompressionFormat()); } { DebugManagerStateRestore restorer; DebugManager.flags.DecompressInL3ForImage2dFromBuffer.set(0); auto surfaceState = FamilyType::cmdInitRenderSurfaceState; imageHw->setImageArg(&surfaceState, false, 0, context.getDevice(0)->getRootDeviceIndex(), false); EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E, surfaceState.getAuxiliarySurfaceMode()); EXPECT_EQ(0u, surfaceState.getDecompressInL3()); EXPECT_EQ(0u, surfaceState.getMemoryCompressionEnable()); EXPECT_EQ(RENDER_SURFACE_STATE::MEMORY_COMPRESSION_TYPE::MEMORY_COMPRESSION_TYPE_MEDIA_COMPRESSION, surfaceState.getMemoryCompressionType()); EXPECT_EQ(bufferCompressionFormat, surfaceState.getCompressionFormat()); } clReleaseMemObject(imageDesc.mem_object); } XE_HPG_CORETEST_F(CmdsProgrammingTestsXeHpgCore, givenDecompressInL3ForImage2dFromBufferEnabledWhenProgrammingStateForImage1dFromCompressedBufferThenCorrectFlagsAreSet) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; MockContext context; cl_int retVal = CL_SUCCESS; cl_image_format imageFormat = {}; cl_image_desc imageDesc = {}; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; imageDesc.image_height = 1; imageDesc.image_width = 256; imageDesc.mem_object = clCreateBuffer(&context, CL_MEM_READ_WRITE, imageDesc.image_height * imageDesc.image_width, nullptr, &retVal); auto gmm = new Gmm(context.getDevice(0)->getGmmHelper()->getClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true); gmm->isCompressionEnabled = true; auto buffer = castToObject(imageDesc.mem_object); buffer->getGraphicsAllocation(0)->setGmm(gmm, 0); auto surfaceFormat = Image::getSurfaceFormatFromTable(CL_MEM_READ_WRITE, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto image = std::unique_ptr(Image::create(&context, ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_READ_WRITE, 0, 0, &context.getDevice(0)->getDevice()), CL_MEM_READ_WRITE, 0, surfaceFormat, &imageDesc, NULL, retVal)); auto imageHw = static_cast *>(image.get()); for (auto &decompressInL3 : ::testing::Bool()) { DebugManagerStateRestore restorer; DebugManager.flags.DecompressInL3ForImage2dFromBuffer.set(decompressInL3); auto surfaceState = FamilyType::cmdInitRenderSurfaceState; imageHw->setImageArg(&surfaceState, false, 0, context.getDevice(0)->getRootDeviceIndex(), false); EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E, surfaceState.getAuxiliarySurfaceMode()); EXPECT_EQ(0u, surfaceState.getDecompressInL3()); EXPECT_EQ(0u, surfaceState.getMemoryCompressionEnable()); EXPECT_EQ(RENDER_SURFACE_STATE::MEMORY_COMPRESSION_TYPE::MEMORY_COMPRESSION_TYPE_MEDIA_COMPRESSION, surfaceState.getMemoryCompressionType()); } clReleaseMemObject(imageDesc.mem_object); } XE_HPG_CORETEST_F(CmdsProgrammingTestsXeHpgCore, givenDecompressInL3ForImage2dFromBufferEnabledWhenProgrammingStateForImage2dFromNotCompressedBufferThenCorrectFlagsAreSet) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; MockContext context; cl_int retVal = CL_SUCCESS; cl_image_format imageFormat = {}; cl_image_desc imageDesc = {}; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_height = 128; imageDesc.image_width = 256; imageDesc.mem_object = clCreateBuffer(&context, CL_MEM_READ_WRITE, imageDesc.image_height * imageDesc.image_width, nullptr, &retVal); auto gmm = new Gmm(context.getDevice(0)->getGmmHelper()->getClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true); gmm->isCompressionEnabled = false; auto buffer = castToObject(imageDesc.mem_object); buffer->getGraphicsAllocation(0)->setGmm(gmm, 0); auto surfaceFormat = Image::getSurfaceFormatFromTable(CL_MEM_READ_WRITE, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto image = std::unique_ptr(Image::create(&context, ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_READ_WRITE, 0, 0, &context.getDevice(0)->getDevice()), CL_MEM_READ_WRITE, 0, surfaceFormat, &imageDesc, NULL, retVal)); auto imageHw = static_cast *>(image.get()); for (auto &decompressInL3 : ::testing::Bool()) { DebugManagerStateRestore restorer; DebugManager.flags.DecompressInL3ForImage2dFromBuffer.set(decompressInL3); auto surfaceState = FamilyType::cmdInitRenderSurfaceState; imageHw->setImageArg(&surfaceState, false, 0, context.getDevice(0)->getRootDeviceIndex(), false); EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE, surfaceState.getAuxiliarySurfaceMode()); EXPECT_EQ(0u, surfaceState.getDecompressInL3()); EXPECT_EQ(0u, surfaceState.getMemoryCompressionEnable()); EXPECT_EQ(RENDER_SURFACE_STATE::MEMORY_COMPRESSION_TYPE::MEMORY_COMPRESSION_TYPE_MEDIA_COMPRESSION, surfaceState.getMemoryCompressionType()); } clReleaseMemObject(imageDesc.mem_object); } XE_HPG_CORETEST_F(CmdsProgrammingTestsXeHpgCore, givenDecompressInL3ForImage2dFromBufferEnabledWhenProgrammingStateForImage2dFromMediaCompressedBufferThenCorrectFlagsAreSet) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; MockContext context; cl_int retVal = CL_SUCCESS; cl_image_format imageFormat = {}; cl_image_desc imageDesc = {}; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_height = 128; imageDesc.image_width = 256; imageDesc.mem_object = clCreateBuffer(&context, CL_MEM_READ_WRITE, imageDesc.image_height * imageDesc.image_width, nullptr, &retVal); auto gmm = new Gmm(context.getDevice(0)->getGmmHelper()->getClientContext(), nullptr, 1, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true); gmm->isCompressionEnabled = true; gmm->gmmResourceInfo->getResourceFlags()->Info.MediaCompressed = true; auto buffer = castToObject(imageDesc.mem_object); buffer->getGraphicsAllocation(0)->setGmm(gmm, 0); auto surfaceFormat = Image::getSurfaceFormatFromTable(CL_MEM_READ_WRITE, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.supportsOcl21Features); auto image = std::unique_ptr(Image::create(&context, ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_READ_WRITE, 0, 0, &context.getDevice(0)->getDevice()), CL_MEM_READ_WRITE, 0, surfaceFormat, &imageDesc, NULL, retVal)); auto imageHw = static_cast *>(image.get()); for (auto &decompressInL3 : ::testing::Bool()) { DebugManagerStateRestore restorer; DebugManager.flags.DecompressInL3ForImage2dFromBuffer.set(decompressInL3); auto surfaceState = FamilyType::cmdInitRenderSurfaceState; imageHw->setImageArg(&surfaceState, false, 0, context.getDevice(0)->getRootDeviceIndex(), false); EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE, surfaceState.getAuxiliarySurfaceMode()); EXPECT_EQ(0u, surfaceState.getDecompressInL3()); EXPECT_EQ(1u, surfaceState.getMemoryCompressionEnable()); EXPECT_EQ(RENDER_SURFACE_STATE::MEMORY_COMPRESSION_TYPE::MEMORY_COMPRESSION_TYPE_MEDIA_COMPRESSION, surfaceState.getMemoryCompressionType()); } clReleaseMemObject(imageDesc.mem_object); } using PreambleCfeState = PreambleFixture; HWTEST2_F(PreambleCfeState, givenXehpAndDisabledFusedEuWhenCfeStateProgrammedThenFusedEuDispatchSetToTrue, IsXeHpgCore) { using CFE_STATE = typename FamilyType::CFE_STATE; auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.fusedEuEnabled = false; auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, hwInfo, EngineGroupType::RenderCompute); StreamProperties streamProperties{}; streamProperties.frontEndState.setProperties(false, false, false, false, hwInfo); PreambleHelper::programVfeState(pVfeCmd, hwInfo, 0u, 0, 0, streamProperties); parseCommands(linearStream); auto cfeStateIt = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), cfeStateIt); auto cfeState = reinterpret_cast(*cfeStateIt); EXPECT_TRUE(cfeState->getFusedEuDispatch()); } HWTEST2_F(PreambleCfeState, givenXehpEnabledFusedEuAndDisableFusedDispatchFromKernelWhenCfeStateProgrammedThenFusedEuDispatchSetToFalse, IsXeHpgCore) { using CFE_STATE = typename FamilyType::CFE_STATE; DebugManagerStateRestore dbgRestorer; DebugManager.flags.CFEFusedEUDispatch.set(0); auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.fusedEuEnabled = true; auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, hwInfo, EngineGroupType::RenderCompute); StreamProperties streamProperties{}; streamProperties.frontEndState.setProperties(false, true, false, false, hwInfo); PreambleHelper::programVfeState(pVfeCmd, hwInfo, 0u, 0, 0, streamProperties); parseCommands(linearStream); auto cfeStateIt = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), cfeStateIt); auto cfeState = reinterpret_cast(*cfeStateIt); EXPECT_FALSE(cfeState->getFusedEuDispatch()); } HWTEST2_F(PreambleCfeState, givenXehpAndEnabledFusedEuWhenCfeStateProgrammedThenFusedEuDispatchSetToFalse, IsXeHpgCore) { using CFE_STATE = typename FamilyType::CFE_STATE; auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.fusedEuEnabled = true; auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, hwInfo, EngineGroupType::RenderCompute); StreamProperties streamProperties{}; streamProperties.frontEndState.setProperties(false, false, false, false, hwInfo); PreambleHelper::programVfeState(pVfeCmd, hwInfo, 0u, 0, 0, streamProperties); parseCommands(linearStream); auto cfeStateIt = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), cfeStateIt); auto cfeState = reinterpret_cast(*cfeStateIt); EXPECT_FALSE(cfeState->getFusedEuDispatch()); } compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpg_core/test_device_caps_xe_hpg_core.cpp000066400000000000000000000100711422164147700324620ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_helper.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" using namespace NEO; typedef Test XeHpgCoreDeviceCaps; XE_HPG_CORETEST_F(XeHpgCoreDeviceCaps, givenXeHpgCoreWhenCheckFtrSupportsInteger64BitAtomicsThenReturnTrue) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.ftrSupportsInteger64BitAtomics); } XE_HPG_CORETEST_F(XeHpgCoreDeviceCaps, givenXeHpgCoreWhenCheckingImageSupportThenReturnTrue) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.supportsImages); } XE_HPG_CORETEST_F(XeHpgCoreDeviceCaps, givenXeHpgCoreWhenCheckingMediaBlockSupportThenReturnTrue) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.supportsMediaBlock); } XE_HPG_CORETEST_F(XeHpgCoreDeviceCaps, givenXeHpgCoreWhenCheckingCoherencySupportThenReturnFalse) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftrSupportsCoherency); } XE_HPG_CORETEST_F(XeHpgCoreDeviceCaps, givenXeHpgCoreWhenCheckExtensionsThenDeviceDoesNotReportClKhrSubgroupsExtension) { const auto &caps = pClDevice->getDeviceInfo(); EXPECT_FALSE(hasSubstr(caps.deviceExtensions, std::string("cl_khr_subgroups"))); } XE_HPG_CORETEST_F(XeHpgCoreDeviceCaps, giveDeviceExtensionsWhenDeviceCapsInitializedThenAddProperExtensions) { const auto &caps = pClDevice->getDeviceInfo(); EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_create_buffer_with_properties"))); EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_dot_accumulate"))); EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_subgroup_local_block_io"))); EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_intel_subgroup_matrix_multiply_accumulate"))); } XE_HPG_CORETEST_F(XeHpgCoreDeviceCaps, givenXeHpgCoreWhenCheckingCapsThenDeviceDoesNotSupportIndependentForwardProgress) { const auto &caps = pClDevice->getDeviceInfo(); EXPECT_FALSE(caps.independentForwardProgress); } XE_HPG_CORETEST_F(XeHpgCoreDeviceCaps, givenEnabledFtrPooledEuAndNotA0SteppingWhenCalculatingMaxEuPerSSThenDontIgnoreEuCountPerPoolMin) { HardwareInfo myHwInfo = *defaultHwInfo; GT_SYSTEM_INFO &mySysInfo = myHwInfo.gtSystemInfo; FeatureTable &mySkuTable = myHwInfo.featureTable; PLATFORM &myPlatform = myHwInfo.platform; mySysInfo.EUCount = 20; mySysInfo.EuCountPerPoolMin = 99999; mySkuTable.flags.ftrPooledEuEnabled = 1; myPlatform.usRevId = 0x4; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&myHwInfo)); auto expectedMaxWGS = mySysInfo.EuCountPerPoolMin * (mySysInfo.ThreadCount / mySysInfo.EUCount) * 8; expectedMaxWGS = std::min(Math::prevPowerOfTwo(expectedMaxWGS), 1024u); EXPECT_EQ(expectedMaxWGS, device->getDeviceInfo().maxWorkGroupSize); } XE_HPG_CORETEST_F(XeHpgCoreDeviceCaps, givenDeviceThatHasHighNumberOfExecutionUnitsAndNotA0SteppingWhenMaxWorkgroupSizeIsComputedThenItIsLimitedTo1024) { HardwareInfo myHwInfo = *defaultHwInfo; GT_SYSTEM_INFO &mySysInfo = myHwInfo.gtSystemInfo; PLATFORM &myPlatform = myHwInfo.platform; mySysInfo.EUCount = 32; mySysInfo.SubSliceCount = 2; mySysInfo.DualSubSliceCount = 2; mySysInfo.ThreadCount = 32 * 8; // 128 threads per subslice, in simd 8 gives 1024 myPlatform.usRevId = 0x4; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&myHwInfo)); EXPECT_EQ(1024u, device->sharedDeviceInfo.maxWorkGroupSize); EXPECT_EQ(device->sharedDeviceInfo.maxWorkGroupSize / 8, device->getDeviceInfo().maxNumOfSubGroups); } HWTEST_EXCLUDE_PRODUCT(DeviceGetCapsTest, givenEnabledFtrPooledEuWhenCalculatingMaxEuPerSSThenDontIgnoreEuCountPerPoolMin, IGFX_XE_HPG_CORE); HWTEST_EXCLUDE_PRODUCT(DeviceGetCapsTest, givenDeviceThatHasHighNumberOfExecutionUnitsWhenMaxWorkgroupSizeIsComputedItIsLimitedTo1024, IGFX_XE_HPG_CORE); compute-runtime-22.14.22890/opencl/test/unit_test/xe_hpg_core/test_sample_xe_hpg_core.cpp000066400000000000000000000006571422164147700315070ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" using namespace NEO; typedef Test XeHpgCoreOnlyTeset; XE_HPG_CORETEST_F(XeHpgCoreOnlyTeset, WhenGettingRenderCoreFamilyThenXeHpgCoreIsReturned) { EXPECT_EQ(IGFX_XE_HPG_CORE, pDevice->getRenderCoreFamily()); } compute-runtime-22.14.22890/os_release_info.cmake000066400000000000000000000133071422164147700215140ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(NOT DEFINED _os_release_info) set(_os_release_info TRUE) # os_release_info.cmake - Function to dump OS name and version # This file has no dependencies on other files (e.g., functions or definitions) # of the local cmake environment. # Set cmake policies for at least this level: cmake_minimum_required(VERSION 2.8.12) # Function get_os_release_info - Determine and return OS name and version # # Args: # 1. the name of a variable to receive os_name # 2. the name of a variable to receive os_version # # Return values: (Quotation marks are always stripped). # Upon failure, return values are null strings. # # Examples: # os_name os_version # -------------- ------- # clear-linux-os 21180 (Changes twice daily) # ubuntu 12.04 16.04 17.10 18.04 # fedora 27 # centos 6.9 7.4.1708 # # Potential sources are tried (in order of preference) until a # suitable one is found. # Implementation documentation: # # The potential sources, in order, are as follows. # - /etc/centos-release # Centos 7 also has /etc/os-release. File /etc/os-release is less # precise about the Centos version (e.g., "7" instead of "7.4.1708"). # For that reason, this file is checked first. # Examples: # CentOS release 6.9 (Final) # CentOS Linux release 7.4.1708 (Core) # - /usr/lib/os-release # Present for Clear Linux, modern Fedora, and Ubuntu since some time # between 14.04 and 16.04. The ID and VERSION_ID values are used. # Examples: # ID=clear-linux-os VERSION_ID=21180 # ID=fedora VERSION_ID=27 # ID=ubuntu VERSION_ID="14.04" # ID=ubuntu VERSION_ID="16.04" # ID="ubuntu" VERSION_ID="17.10" # - /etc/os-release - Same form as (sometimes a link to) /usr/lib/os-release # ID="Ubuntu" VERSION_ID="12.04" # ID="Ubuntu" VERSION_ID="14.04" # with a symbolic link: /etc/os-release -> ../usr/lib/os-release # ID="CentOS Linux" VERSION_ID="7" Also: ID_LIKE="rhel fedora" # - /etc/lsb-release # For Centos, not too meaningful. # Other "OS"s are more reasonable: # DISTRIB_ID=Ubuntu DISTRIB_RELEASE=12.04 # DISTRIB_ID=Ubuntu DISTRIB_RELEASE=14.04 # DISTRIB_ID=Ubuntu DISTRIB_RELEASE=17.10 function(get_os_release_info _vn_id _vn_version_id _vn_codename) set(_var_id "") set(_var_version_id "") set(_var_codename "") if("${_var_id}" STREQUAL "") set(file_path "/etc/centos-release") if(EXISTS "${file_path}") # Example: CentOS release 6.9 (Final) file(STRINGS "${file_path}" file_list LIMIT_COUNT 1) list(GET file_list 0 file_line) # Remove all parenthesized items. string(REGEX REPLACE "\\([^)]+\\)" "" file_line "${file_line}") # Extract start and end, discard optional "version" or "release" string(REGEX MATCH "^([A-Za-z0-9_]+)( +(version|release))? +(.*)$" _dummy "${file_line}") # 1 2 3 4 set(_var_id "${CMAKE_MATCH_1}") set(_var_version_id "${CMAKE_MATCH_4}") endif() endif() if("${_var_id}" STREQUAL "") if(EXISTS "/usr/lib/os-release") set(file_path "/usr/lib/os-release") elseif(EXISTS "/etc/os-release") set(file_path "/etc/os-release") else() set(file_path "") endif() if(NOT "${file_path}" STREQUAL "") file(STRINGS "${file_path}" data_list REGEX "^(ID|VERSION_ID|VERSION_CODENAME)=") # Look for lines like "ID="..." and VERSION_ID="..." foreach(_var ${data_list}) if("${_var}" MATCHES "^(ID)=(.*)$") set(_var_id "${CMAKE_MATCH_2}") elseif("${_var}" MATCHES "^(VERSION_ID)=(.*)$") set(_var_version_id "${CMAKE_MATCH_2}") elseif("${_var}" MATCHES "^(VERSION_CODENAME)=(.*)$") set(_var_codename "${CMAKE_MATCH_2}") endif() endforeach() endif() endif() if("${_var_id}" STREQUAL "") set(file_path "/etc/lsb-release") if(EXISTS "${file_path}") file(STRINGS "${file_path}" data_list REGEX "^(DISTRIB_ID|DISTRIB_RELEASE|DISTRIB_CODENAME)=") # Look for lines like "DISTRIB_ID="..." and DISTRIB_RELEASE="..." foreach(_var ${data_list}) if("${_var}" MATCHES "^(DISTRIB_ID)=(.*)$") set(_var_id "${CMAKE_MATCH_2}") elseif("${_var}" MATCHES "^(DISTRIB_RELEASE)=(.*)$") set(_var_version_id "${CMAKE_MATCH_2}") elseif("${_var}" MATCHES "^(DISTRIB_CODENAME)=(.*)$") set(_var_codename "${CMAKE_MATCH_2}") endif() endforeach() endif() endif() string(TOLOWER "${_var_id}" "_var_id") string(STRIP "${_var_id}" _var_id) string(STRIP "${_var_version_id}" _var_version_id) string(STRIP "${_var_codename}" _var_codename) # Remove any enclosing quotation marks string(REGEX REPLACE "^\"(.*)\"$" "\\1" _var_id "${_var_id}") string(REGEX REPLACE "^\"(.*)\"$" "\\1" _var_version_id "${_var_version_id}") string(REGEX REPLACE "^\"(.*)\"$" "\\1" _var_codename "${_var_codename}") if(NOT "${_vn_id}" STREQUAL "") set(${_vn_id} "${_var_id}" PARENT_SCOPE) endif() if(NOT "${_vn_version_id}" STREQUAL "") set(${_vn_version_id} "${_var_version_id}" PARENT_SCOPE) endif() if(NOT "${_vn_codename}" STREQUAL "") set(${_vn_codename} "${_var_codename}" PARENT_SCOPE) endif() endfunction() endif() compute-runtime-22.14.22890/package.cmake000066400000000000000000000257641422164147700177650ustar00rootroot00000000000000# # Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # if(CMAKE_SIZEOF_VOID_P EQUAL 8) set(CPACK_PACKAGE_ARCHITECTURE "x86_64") else() set(CPACK_PACKAGE_ARCHITECTURE "x86") endif() set(CPACK_PACKAGE_RELOCATABLE FALSE) set(CPACK_PACKAGE_NAME "intel") set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Intel(R) Graphics Compute Runtime") set(CPACK_PACKAGE_VENDOR "Intel") if(NEO_BUILD_L0_PACKAGE) set(CPACK_PACKAGE_VERSION_MAJOR ${NEO_L0_VERSION_MAJOR}) set(CPACK_PACKAGE_VERSION_MINOR ${NEO_L0_VERSION_MINOR}) else() set(CPACK_PACKAGE_VERSION_MAJOR ${NEO_OCL_VERSION_MAJOR}) set(CPACK_PACKAGE_VERSION_MINOR ${NEO_OCL_VERSION_MINOR}) endif() set(CPACK_PACKAGE_VERSION_PATCH ${NEO_VERSION_BUILD}) if(UNIX) set(package_input_dir ${NEO_BINARY_DIR}/packageinput) set(package_output_dir ${NEO_BINARY_DIR}/packages) if(NEO_BUILD_OCL_PACKAGE AND NEO_BUILD_L0_PACKAGE) message(FATAL_ERROR "OpenCL and LevelZero packages cannot be created simultaneously") endif() if(NOT DEFINED NEO_OCL_VERSION_MAJOR) set(NEO_OCL_VERSION_MAJOR 1) endif() if(NOT DEFINED NEO_OCL_VERSION_MINOR) set(NEO_OCL_VERSION_MINOR 0) endif() if(NOT DEFINED NEO_VERSION_BUILD) set(NEO_VERSION_BUILD 0) endif() include("os_release_info.cmake") get_os_release_info(os_name os_version os_codename) if(NOT DEFINED OCL_ICD_VENDORDIR) if("${os_name}" STREQUAL "clear-linux-os") # clear-linux-os distribution avoids /etc for distribution defaults. set(OCL_ICD_VENDORDIR "/usr/share/defaults/etc/OpenCL/vendors") else() set(OCL_ICD_VENDORDIR "/etc/OpenCL/vendors") endif() endif() if(NEO_BUILD_WITH_OCL) get_target_property(OCL_RUNTIME_LIB_NAME igdrcl_dll OUTPUT_NAME) install( CODE "file( WRITE ${NEO_BINARY_DIR}/intel.icd \"${CMAKE_INSTALL_FULL_LIBDIR}/intel-opencl/${CMAKE_SHARED_LIBRARY_PREFIX}${OCL_RUNTIME_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}\n\" )" CODE "file( WRITE ${NEO_BINARY_DIR}/tmp/postinst \"/sbin/ldconfig\n\" )" CODE "file( WRITE ${NEO_BINARY_DIR}/tmp/postrm \"/sbin/ldconfig\n\" )" CODE "file( COPY ${NEO_BINARY_DIR}/tmp/postinst DESTINATION ${NEO_BINARY_DIR} FILE_PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE )" CODE "file( COPY ${NEO_BINARY_DIR}/tmp/postrm DESTINATION ${NEO_BINARY_DIR} FILE_PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE )" COMPONENT opencl ) install(FILES ${NEO_BINARY_DIR}/intel.icd DESTINATION ${OCL_ICD_VENDORDIR} COMPONENT opencl) endif() if(NEO_BUILD_DEBUG_SYMBOLS_PACKAGE) get_property(DEBUG_SYMBOL_FILES GLOBAL PROPERTY DEBUG_SYMBOL_FILES) install(FILES ${DEBUG_SYMBOL_FILES} DESTINATION ${DEBUG_SYMBOL_INSTALL_DIR} COMPONENT opencl-debuginfo ) get_property(IGDRCL_SYMBOL_FILE GLOBAL PROPERTY IGDRCL_SYMBOL_FILE) install(FILES ${IGDRCL_SYMBOL_FILE} DESTINATION ${DEBUG_SYMBOL_INSTALL_DIR}/intel-opencl COMPONENT opencl-debuginfo ) endif() if(NEO_CPACK_GENERATOR) set(CPACK_GENERATOR "${NEO_CPACK_GENERATOR}") else() # If generators list was not define build native package for current distro if(EXISTS "/etc/debian_version") set(CPACK_GENERATOR "DEB") elseif(EXISTS "/etc/redhat-release") set(CPACK_GENERATOR "RPM") else() set(CPACK_GENERATOR "TXZ") endif() endif() set(CPACK_SET_DESTDIR TRUE) set(CPACK_DEBIAN_PACKAGE_ARCHITECTURE "amd64") set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "postinst;postrm") set(CPACK_DEBIAN_PACKAGE_HOMEPAGE "http://01.org/compute-runtime") set(CPACK_DEBIAN_PACKAGE_SHLIBDEPS ON) set(CPACK_RPM_COMPRESSION_TYPE "xz") set(CPACK_RPM_PACKAGE_ARCHITECTURE "x86_64") set(CPACK_RPM_PACKAGE_AUTOREQ OFF) set(CPACK_RPM_PACKAGE_DESCRIPTION "Intel OpenCL GPU driver") set(CPACK_RPM_PACKAGE_GROUP "System Environment/Libraries") set(CPACK_RPM_PACKAGE_LICENSE "MIT") set(CPACK_RPM_PACKAGE_RELEASE 1) set(CPACK_RPM_PACKAGE_RELEASE_DIST ON) set(CPACK_RPM_PACKAGE_URL "http://01.org/compute-runtime") set(CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${NEO_BINARY_DIR}/postinst") set(CPACK_RPM_POST_UNINSTALL_SCRIPT_FILE "${NEO_BINARY_DIR}/postrm") set(CPACK_PACKAGE_INSTALL_DIRECTORY ${CMAKE_INSTALL_PREFIX}) set(CPACK_PACKAGE_CONTACT "Intel Corporation") set(CPACK_DEB_COMPONENT_INSTALL ON) set(CPACK_RPM_COMPONENT_INSTALL ON) set(CPACK_ARCHIVE_COMPONENT_INSTALL ON) if(NEO_BUILD_OCL_PACKAGE) get_property(CPACK_COMPONENTS_ALL GLOBAL PROPERTY NEO_OCL_COMPONENTS_LIST) endif() if(NEO_BUILD_L0_PACKAGE) get_property(CPACK_COMPONENTS_ALL GLOBAL PROPERTY NEO_L0_COMPONENTS_LIST) endif() set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION /etc/ld.so.conf.d /usr/local /usr/local/lib64 /usr/local/bin ) if(CMAKE_VERSION VERSION_GREATER 3.6 OR CMAKE_VERSION VERSION_EQUAL 3.6) set(CPACK_DEBIAN_OPENCL_FILE_NAME "intel-opencl_${NEO_OCL_VERSION_MAJOR}.${NEO_OCL_VERSION_MINOR}.${NEO_VERSION_BUILD}-1~${os_codename}_${CPACK_DEBIAN_PACKAGE_ARCHITECTURE}.deb") set(CPACK_DEBIAN_OCLOC_FILE_NAME "intel-ocloc_${NEO_OCL_VERSION_MAJOR}.${NEO_OCL_VERSION_MINOR}.${NEO_VERSION_BUILD}-1~${os_codename}_${CPACK_DEBIAN_PACKAGE_ARCHITECTURE}.deb") set(CPACK_DEBIAN_LEVEL-ZERO-GPU_FILE_NAME "intel-level-zero-gpu_${NEO_L0_VERSION_MAJOR}.${NEO_L0_VERSION_MINOR}.${NEO_VERSION_BUILD}-1~${os_codename}_${CPACK_DEBIAN_PACKAGE_ARCHITECTURE}.deb") set(CPACK_DEBIAN_OPENCL-DEBUGINFO_FILE_NAME "intel-opencl-debuginfo_${NEO_OCL_VERSION_MAJOR}.${NEO_OCL_VERSION_MINOR}.${NEO_VERSION_BUILD}-1~${os_codename}_${CPACK_DEBIAN_PACKAGE_ARCHITECTURE}.deb") set(CPACK_RPM_OPENCL_FILE_NAME "intel-opencl-${NEO_OCL_VERSION_MAJOR}.${NEO_OCL_VERSION_MINOR}.${NEO_VERSION_BUILD}-${CPACK_RPM_PACKAGE_RELEASE}%{?dist}.${CPACK_RPM_PACKAGE_ARCHITECTURE}.rpm") set(CPACK_RPM_OCLOC_FILE_NAME "intel-ocloc-${NEO_OCL_VERSION_MAJOR}.${NEO_OCL_VERSION_MINOR}.${NEO_VERSION_BUILD}-${CPACK_RPM_PACKAGE_RELEASE}%{?dist}.${CPACK_RPM_PACKAGE_ARCHITECTURE}.rpm") set(CPACK_RPM_LEVEL-ZERO-GPU_FILE_NAME "intel-level-zero-gpu-${NEO_L0_VERSION_MAJOR}.${NEO_L0_VERSION_MINOR}.${NEO_VERSION_BUILD}-${CPACK_RPM_PACKAGE_RELEASE}%{?dist}.${CPACK_RPM_PACKAGE_ARCHITECTURE}.rpm") set(CPACK_RPM_OPENCL-DEBUGINFO_FILE_NAME "intel-opencl-debuginfo-${NEO_OCL_VERSION_MAJOR}.${NEO_OCL_VERSION_MINOR}.${NEO_VERSION_BUILD}-${CPACK_RPM_PACKAGE_RELEASE}%{?dist}.${CPACK_RPM_PACKAGE_ARCHITECTURE}.rpm") set(CPACK_ARCHIVE_OPENCL_FILE_NAME "intel-opencl-${NEO_OCL_VERSION_MAJOR}.${NEO_OCL_VERSION_MINOR}.${NEO_VERSION_BUILD}-${os_codename}-${CPACK_PACKAGE_ARCHITECTURE}") set(CPACK_ARCHIVE_OCLOC_FILE_NAME "intel-ocloc-${NEO_OCL_VERSION_MAJOR}.${NEO_OCL_VERSION_MINOR}.${NEO_VERSION_BUILD}-${os_codename}-${CPACK_PACKAGE_ARCHITECTURE}") set(CPACK_ARCHIVE_LEVEL-ZERO-GPU_FILE_NAME "intel-level-zero-gpu-${NEO_L0_VERSION_MAJOR}.${NEO_L0_VERSION_MINOR}.${NEO_VERSION_BUILD}-${os_codename}_${CPACK_PACKAGE_ARCHITECTURE}") set(CPACK_ARCHIVE_OPENCL-DEBUGINFO_FILE_NAME "intel-opencl-debuginfo-${NEO_OCL_VERSION_MAJOR}.${NEO_OCL_VERSION_MINOR}.${NEO_VERSION_BUILD}-${os_codename}-${CPACK_PACKAGE_ARCHITECTURE}") else() if(CPACK_GENERATOR STREQUAL "DEB") set(CPACK_PACKAGE_FILE_NAME "intel-compute-runtime_${NEO_OCL_VERSION_MAJOR}.${NEO_OCL_VERSION_MINOR}.${NEO_VERSION_BUILD}_${CPACK_DEBIAN_PACKAGE_ARCHITECTURE}") elseif(CPACK_GENERATOR STREQUAL "RPM") set(CPACK_PACKAGE_FILE_NAME "intel-compute-runtime-${NEO_OCL_VERSION_MAJOR}.${NEO_OCL_VERSION_MINOR}.${NEO_VERSION_BUILD}-${CPACK_RPM_PACKAGE_RELEASE}%{?dist}.${CPACK_RPM_PACKAGE_ARCHITECTURE}.rpm") else() set(CPACK_PACKAGE_FILE_NAME "intel-compute-runtime-${NEO_OCL_VERSION_MAJOR}.${NEO_OCL_VERSION_MINOR}.${NEO_VERSION_BUILD}-${CPACK_PACKAGE_ARCHITECTURE}") endif() endif() if(NEO__GMM_FOUND) list(APPEND _external_package_dependencies_debian "intel-gmmlib(=${NEO__GMM_VERSION})") list(APPEND _external_package_dependencies_rpm "intel-gmmlib = ${NEO__GMM_VERSION}") else() list(APPEND _external_package_dependencies_debian "intel-gmmlib") list(APPEND _external_package_dependencies_rpm "intel-gmmlib") endif() if(NEO__IGC_FOUND) list(APPEND _external_package_dependencies_debian "intel-igc-opencl(=${NEO__IGC_VERSION})") list(APPEND _external_package_dependencies_rpm "intel-igc-opencl = ${NEO__IGC_VERSION}") list(APPEND _igc_package_dependencies_debian "intel-igc-opencl(=${NEO__IGC_VERSION})") list(APPEND _igc_package_dependencies_rpm "intel-igc-opencl = ${NEO__IGC_VERSION}") else() list(APPEND _external_package_dependencies_debian "intel-igc-opencl") list(APPEND _external_package_dependencies_rpm "intel-igc-opencl") list(APPEND _igc_package_dependencies_debian "intel-igc-opencl") list(APPEND _igc_package_dependencies_rpm "intel-igc-opencl") endif() set(_external_package_dependencies_debian_level_zero_gpu "${_external_package_dependencies_debian}") set(_external_package_optionals_debian_level_zero_gpu "${_external_package_optionals_debian}") set(_external_package_dependencies_rpm_level_zero_gpu "${_external_package_dependencies_rpm}") set(_external_package_optionals_rpm_level_zero_gpu "${_external_package_optionals_rpm}") list(APPEND _external_package_optionals_debian_level_zero_gpu "level-zero") list(APPEND _external_package_optionals_rpm_level_zero_gpu "level-zero") if(PC_LIBXML_FOUND) list(APPEND _external_package_optionals_debian_level_zero_gpu "libxml2") list(APPEND _external_package_optionals_rpm_level_zero_gpu "libxml2") endif() string(REPLACE ";" ", " CPACK_DEBIAN_OPENCL_PACKAGE_DEPENDS "${_external_package_dependencies_debian}") string(REPLACE ";" ", " CPACK_DEBIAN_OCLOC_PACKAGE_DEPENDS "${_igc_package_dependencies_debian}") string(REPLACE ";" ", " CPACK_DEBIAN_LEVEL-ZERO-GPU_PACKAGE_DEPENDS "${_external_package_dependencies_debian_level_zero_gpu}") string(REPLACE ";" ", " CPACK_DEBIAN_LEVEL-ZERO-GPU_PACKAGE_RECOMMENDS "${_external_package_optionals_debian_level_zero_gpu}") string(REPLACE ";" ", " CPACK_RPM_OPENCL_PACKAGE_REQUIRES "${_external_package_dependencies_rpm}") string(REPLACE ";" ", " CPACK_RPM_OCLOC_PACKAGE_REQUIRES "${_igc_package_dependencies_rpm}") string(REPLACE ";" ", " CPACK_RPM_LEVEL-ZERO-GPU_PACKAGE_REQUIRES "${_external_package_dependencies_rpm_level_zero_gpu}") string(REPLACE ";" ", " CPACK_RPM_LEVEL-ZERO-GPU_PACKAGE_SUGGESTS "${_external_package_optionals_rpm_level_zero_gpu}") set(CPACK_DEBIAN_LEVEL-ZERO-GPU_PACKAGE_SUGGESTS "level-zero") set(CPACK_PROPERTIES_FILE "${CMAKE_CURRENT_SOURCE_DIR}/package_config.cmake") set(CPACK_LD_LIBRARY_PATH "${NEO__GMM_LIBRARY_PATH}") include(CPack) get_directory_property(__HAS_PARENT PARENT_DIRECTORY) if(__HAS_PARENT) set(NEO__COMPONENT_NAME "opencl" PARENT_SCOPE) endif() elseif(WIN32) set(CPACK_ARCHIVE_COMPONENT_INSTALL ON) set(CPACK_COMPONENTS_ALL ocloc) set(CPACK_ARCHIVE_OCLOC_FILE_NAME "ocloc-${NEO_OCL_VERSION_MAJOR}.${NEO_OCL_VERSION_MINOR}.${NEO_VERSION_BUILD}-${CPACK_PACKAGE_ARCHITECTURE}") include(CPack) endif() compute-runtime-22.14.22890/package_config.cmake000066400000000000000000000002001422164147700212640ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(ENV{LD_LIBRARY_PATH} ${CPACK_LD_LIBRARY_PATH}) compute-runtime-22.14.22890/platforms.cmake000066400000000000000000000233731422164147700203730ustar00rootroot00000000000000# # Copyright (C) 2018-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(MAX_CORE 64) set(ALL_CORE_TYPES "") include(${CMAKE_CURRENT_SOURCE_DIR}/cmake${BRANCH_DIR_SUFFIX}fill_core_types.cmake) set(ALL_CORE_TYPES_REVERSED ${ALL_CORE_TYPES}) list(REVERSE ALL_CORE_TYPES_REVERSED) macro(FIND_IDX_FOR_CORE_TYPE CORE_TYPE CORE_IDX) list(FIND ALL_CORE_TYPES "${CORE_TYPE}" CORE_IDX) if(${CORE_IDX} EQUAL -1) message(FATAL_ERROR "No ${CORE_TYPE} allowed, exiting") endif() endmacro() macro(INIT_LIST LIST_TYPE ELEMENT_TYPE) foreach(IT RANGE 0 ${MAX_CORE} 1) list(APPEND ALL_${ELEMENT_TYPE}_${LIST_TYPE} " ") endforeach() endmacro() macro(GET_LIST_FOR_CORE_TYPE LIST_TYPE ELEMENT_TYPE CORE_IDX OUT_LIST) list(GET ALL_${ELEMENT_TYPE}_${LIST_TYPE} ${CORE_IDX} CORE_X_${LIST_TYPE}) string(REPLACE "," ";" ${OUT_LIST} ${CORE_X_${LIST_TYPE}}) endmacro() macro(ADD_ITEM_FOR_CORE_TYPE LIST_TYPE ELEMENT_TYPE CORE_TYPE ITEM) FIND_IDX_FOR_CORE_TYPE(${CORE_TYPE} CORE_IDX) list(GET ALL_${ELEMENT_TYPE}_${LIST_TYPE} ${CORE_IDX} CORE_X_LIST) string(REPLACE " " "" CORE_X_LIST ${CORE_X_LIST}) if("${CORE_X_LIST}" STREQUAL "") set(CORE_X_LIST "${ITEM}") else() set(CORE_X_LIST "${CORE_X_LIST},${ITEM}") endif() list(REMOVE_AT ALL_${ELEMENT_TYPE}_${LIST_TYPE} ${CORE_IDX}) list(INSERT ALL_${ELEMENT_TYPE}_${LIST_TYPE} ${CORE_IDX} ${CORE_X_LIST}) endmacro() macro(CORE_CONTAINS_PLATFORMS TYPE CORE_TYPE OUT_FLAG) FIND_IDX_FOR_CORE_TYPE(${CORE_TYPE} CORE_IDX) GET_LIST_FOR_CORE_TYPE("PLATFORMS" ${TYPE} ${CORE_IDX} CORE_X_PLATFORMS) string(REPLACE " " "" CORE_X_PLATFORMS ${CORE_X_PLATFORMS}) if("${CORE_X_PLATFORMS}" STREQUAL "") set(${OUT_FLAG} FALSE) else() set(${OUT_FLAG} TRUE) endif() endmacro() macro(INIT_PRODUCTS_LIST TYPE) list(APPEND ALL_${TYPE}_PRODUCT_FAMILY " ") list(APPEND ALL_${TYPE}_PRODUCT_TO_PRODUCT_FAMILY " ") endmacro() macro(ADD_PRODUCT TYPE PRODUCT ITEM) list(APPEND ALL_${TYPE}_PRODUCT_FAMILY ${ITEM}) list(APPEND ALL_${TYPE}_PRODUCT_TO_PRODUCT_FAMILY ${PRODUCT}) endmacro() macro(GET_AVAILABLE_PRODUCTS TYPE PRODUCT_FAMILY_LIST DEFAULT_PRODUCT_FAMILY) list(REMOVE_ITEM ALL_${TYPE}_PRODUCT_FAMILY " ") list(REMOVE_ITEM ALL_${TYPE}_PRODUCT_TO_PRODUCT_FAMILY " ") set(${PRODUCT_FAMILY_LIST} ${ALL_${TYPE}_PRODUCT_FAMILY}) set(${DEFAULT_PRODUCT_FAMILY}) if(NOT "${DEFAULT_${TYPE}_PLATFORM}" STREQUAL "") list(FIND ALL_${TYPE}_PRODUCT_TO_PRODUCT_FAMILY ${DEFAULT_${TYPE}_PLATFORM} INDEX) if(${INDEX} EQUAL -1) message(FATAL_ERROR "${DEFAULT_${TYPE}_PLATFORM} not found in product families.") endif() list(GET ALL_${TYPE}_PRODUCT_FAMILY ${INDEX} DEFAULT) set(${DEFAULT_PRODUCT_FAMILY} ${DEFAULT}) endif() endmacro() macro(GET_AVAILABLE_PLATFORMS TYPE FLAG_NAME OUT_STR) set(${TYPE}_PLATFORM_LIST) set(${TYPE}_CORE_FLAGS_DEFINITONS) if(NOT DEFAULT_${TYPE}_PLATFORM AND DEFINED PREFERRED_PLATFORM AND ${FLAG_NAME}_${PREFERRED_PLATFORM}) set(DEFAULT_${TYPE}_PLATFORM ${PREFERRED_PLATFORM}) endif() foreach(CORE_TYPE ${ALL_CORE_TYPES_REVERSED}) CORE_CONTAINS_PLATFORMS(${TYPE} ${CORE_TYPE} COREX_HAS_PLATFORMS) if(${COREX_HAS_PLATFORMS}) FIND_IDX_FOR_CORE_TYPE(${CORE_TYPE} CORE_IDX) list(APPEND ${TYPE}_CORE_FLAGS_DEFINITONS ${FLAG_NAME}_${CORE_TYPE}) GET_LIST_FOR_CORE_TYPE("PLATFORMS" ${TYPE} ${CORE_IDX} ${TYPE}_COREX_PLATFORMS) list(APPEND ${TYPE}_PLATFORM_LIST ${${TYPE}_COREX_PLATFORMS}) if(NOT DEFAULT_${TYPE}_PLATFORM) list(GET ${TYPE}_PLATFORM_LIST 0 DEFAULT_${TYPE}_PLATFORM ${PLATFORM_IT}) endif() if(NOT DEFAULT_${TYPE}_${CORE_TYPE}_PLATFORM) list(GET ${TYPE}_COREX_PLATFORMS 0 DEFAULT_${TYPE}_${CORE_TYPE}_PLATFORM) endif() endif() endforeach() foreach(PLATFORM_IT ${${TYPE}_PLATFORM_LIST}) set(${OUT_STR} "${${OUT_STR}} ${PLATFORM_IT}") list(APPEND ${TYPE}_CORE_FLAGS_DEFINITONS ${FLAG_NAME}_${PLATFORM_IT}) endforeach() endmacro() macro(GET_PLATFORMS_FOR_CORE_TYPE TYPE CORE_TYPE OUT_LIST) FIND_IDX_FOR_CORE_TYPE(${CORE_TYPE} CORE_IDX) GET_LIST_FOR_CORE_TYPE("PLATFORMS" ${TYPE} ${CORE_IDX} ${OUT_LIST}) endmacro() macro(PLATFORM_HAS_2_0 CORE_TYPE PLATFORM_NAME OUT_FLAG) FIND_IDX_FOR_CORE_TYPE(${CORE_TYPE} CORE_IDX) GET_LIST_FOR_CORE_TYPE("PLATFORMS" "SUPPORTED_2_0" ${CORE_IDX} CORE_X_PLATFORMS) list(FIND CORE_X_PLATFORMS ${PLATFORM_NAME} PLATFORM_EXISTS) if("${PLATFORM_EXISTS}" LESS 0) set(${OUT_FLAG} FALSE) else() set(${OUT_FLAG} TRUE) endif() endmacro() macro(PLATFORM_HAS_VME CORE_TYPE PLATFORM_NAME OUT_FLAG) FIND_IDX_FOR_CORE_TYPE(${CORE_TYPE} CORE_IDX) GET_LIST_FOR_CORE_TYPE("PLATFORMS" "SUPPORTED_VME" ${CORE_IDX} CORE_X_PLATFORMS) list(FIND CORE_X_PLATFORMS ${PLATFORM_NAME} PLATFORM_EXISTS) if("${PLATFORM_EXISTS}" LESS 0) set(${OUT_FLAG} FALSE) else() set(${OUT_FLAG} TRUE) endif() endmacro() # default flag for CoreX devices support set(SUPPORT_GEN_DEFAULT TRUE CACHE BOOL "default value for SUPPORT_COREx") # default flag for platform support set(SUPPORT_PLATFORM_DEFAULT TRUE CACHE BOOL "default value for support platform") # Define the hardware configurations we support and test macro(SET_FLAGS_FOR CORE_TYPE) foreach(SKU_NAME ${ARGN}) if(SUPPORT_${SKU_NAME}) if(NOT SUPPORT_${CORE_TYPE}) message(STATUS "Auto-Enabling ${CORE_TYPE} support for ${SKU_NAME}") set(SUPPORT_${CORE_TYPE} TRUE CACHE BOOL "Support ${CORE_TYPE} devices" FORCE) endif() if(NOT TESTS_${CORE_TYPE}) message(STATUS "Auto-Enabling ${CORE_TYPE} tests for ${SKU_NAME}") set(TESTS_${CORE_TYPE} TRUE CACHE BOOL "Build ULTs for ${CORE_TYPE} devices" FORCE) endif() endif() endforeach() set(SUPPORT_${CORE_TYPE} ${SUPPORT_GEN_DEFAULT} CACHE BOOL "Support ${CORE_TYPE} devices") set(TESTS_${CORE_TYPE} ${SUPPORT_${CORE_TYPE}} CACHE BOOL "Build ULTs for ${CORE_TYPE} devices") if(NOT SUPPORT_${CORE_TYPE} OR NEO_SKIP_UNIT_TESTS) set(TESTS_${CORE_TYPE} FALSE) endif() if(SUPPORT_${CORE_TYPE}) list(APPEND ALL_SUPPORTED_CORE_FAMILIES ${CORE_TYPE}) list(REMOVE_DUPLICATES ALL_SUPPORTED_CORE_FAMILIES) foreach(${CORE_TYPE}_PLATFORM ${ARGN}) set(SUPPORT_${${CORE_TYPE}_PLATFORM} ${SUPPORT_PLATFORM_DEFAULT} CACHE BOOL "Support ${${CORE_TYPE}_PLATFORM}") if(TESTS_${CORE_TYPE}) set(TESTS_${${CORE_TYPE}_PLATFORM} ${SUPPORT_${${CORE_TYPE}_PLATFORM}} CACHE BOOL "Build ULTs for ${${CORE_TYPE}_PLATFORM}") endif() if(NOT SUPPORT_${${CORE_TYPE}_PLATFORM} OR NOT TESTS_${CORE_TYPE} OR NEO_SKIP_UNIT_TESTS) set(TESTS_${${CORE_TYPE}_PLATFORM} FALSE) endif() endforeach() endif() if(TESTS_${CORE_TYPE}) list(APPEND ALL_TESTED_CORE_FAMILIES ${CORE_TYPE}) list(REMOVE_DUPLICATES ALL_TESTED_CORE_FAMILIES) endif() endmacro() macro(ADD_PLATFORM_FOR_CORE_TYPE LIST_TYPE CORE_TYPE PLATFORM_NAME PLATFORM_TYPE) list(APPEND PLATFORM_TYPES ${PLATFORM_TYPE}) list(REMOVE_DUPLICATES PLATFORM_TYPES) ADD_ITEM_FOR_CORE_TYPE("PLATFORMS" ${LIST_TYPE} ${CORE_TYPE} ${PLATFORM_NAME}) set(${CORE_TYPE}_HAS_${PLATFORM_TYPE} TRUE) set(${PLATFORM_NAME}_IS_${PLATFORM_TYPE} TRUE) if(NOT DEFAULT_${LIST_TYPE}_${CORE_TYPE}_${PLATFORM_TYPE}_PLATFORM) string(TOLOWER ${PLATFORM_NAME} DEFAULT_${LIST_TYPE}_${CORE_TYPE}_${PLATFORM_TYPE}_PLATFORM) endif() endmacro() # Init lists INIT_LIST("FAMILY_NAME" "TESTED") INIT_LIST("PLATFORMS" "SUPPORTED") INIT_LIST("PLATFORMS" "SUPPORTED_2_0") INIT_LIST("PLATFORMS" "SUPPORTED_VME") INIT_LIST("PLATFORMS" "SUPPORTED_IMAGES") INIT_LIST("PLATFORMS" "SUPPORTED_AUX_TRANSLATION") INIT_LIST("PLATFORMS" "TESTED") INIT_PRODUCTS_LIST("TESTED") INIT_PRODUCTS_LIST("SUPPORTED") include(${CMAKE_CURRENT_SOURCE_DIR}/cmake${BRANCH_DIR_SUFFIX}setup_platform_flags.cmake) # Get platform lists, flag definition and set default platforms GET_AVAILABLE_PLATFORMS("SUPPORTED" "SUPPORT" ALL_AVAILABLE_SUPPORTED_PLATFORMS) GET_AVAILABLE_PLATFORMS("TESTED" "TESTS" ALL_AVAILABLE_TESTED_PLATFORMS) GET_AVAILABLE_PRODUCTS("TESTED" ALL_PRODUCT_FAMILY_LIST DEFAULT_TESTED_PRODUCT_FAMILY) GET_AVAILABLE_PRODUCTS("SUPPORTED" ALL_PRODUCT_FAMILY_LIST DEFAULT_SUPPORTED_PRODUCT_FAMILY) # Output platforms message(STATUS "All supported platforms: ${ALL_AVAILABLE_SUPPORTED_PLATFORMS}") message(STATUS "All tested platforms: ${ALL_AVAILABLE_TESTED_PLATFORMS}") message(STATUS "Default supported platform: ${DEFAULT_SUPPORTED_PLATFORM}") message(STATUS "Default tested platform: ${DEFAULT_TESTED_PLATFORM}") # Output families message(STATUS "All supported core families: ${ALL_SUPPORTED_CORE_FAMILIES}") message(STATUS "All tested core families: ${ALL_TESTED_CORE_FAMILIES}") list(FIND SUPPORTED_PLATFORM_LIST ${DEFAULT_SUPPORTED_PLATFORM} VALID_DEFAULT_SUPPORTED_PLATFORM) if(VALID_DEFAULT_SUPPORTED_PLATFORM LESS 0) message(FATAL_ERROR "Not a valid supported platform: ${DEFAULT_SUPPORTED_PLATFORM}") endif() if(DEFAULT_TESTED_PLATFORM) list(FIND TESTED_PLATFORM_LIST ${DEFAULT_TESTED_PLATFORM} VALID_DEFAULT_TESTED_PLATFORM) if(VALID_DEFAULT_TESTED_PLATFORM LESS 0) message(FATAL_ERROR "Not a valid tested platform: ${DEFAULT_TESTED_PLATFORM}") endif() else() set(NEO_SKIP_UNIT_TESTS TRUE) endif() if(NOT DEFAULT_TESTED_FAMILY_NAME) if(DEFINED PREFERRED_FAMILY_NAME) list(FIND ALL_TESTED_FAMILY_NAME ${PREFERRED_FAMILY_NAME} CORE_IDX) if(${CORE_IDX} GREATER -1) set(DEFAULT_TESTED_FAMILY_NAME ${PREFERRED_FAMILY_NAME}) endif() endif() if(NOT DEFINED DEFAULT_TESTED_FAMILY_NAME) foreach(CORE_TYPE ${ALL_CORE_TYPES_REVERSED}) FIND_IDX_FOR_CORE_TYPE(${CORE_TYPE} CORE_IDX) list(GET ALL_TESTED_FAMILY_NAME ${CORE_IDX} CORE_FAMILY_NAME) if(NOT CORE_FAMILY_NAME STREQUAL " ") set(DEFAULT_TESTED_FAMILY_NAME ${CORE_FAMILY_NAME}) break() endif() endforeach() endif() endif() message(STATUS "Default tested family name: ${DEFAULT_TESTED_FAMILY_NAME}") compute-runtime-22.14.22890/programmers-guide/000077500000000000000000000000001422164147700210035ustar00rootroot00000000000000compute-runtime-22.14.22890/programmers-guide/IMPLICIT_SCALING.md000066400000000000000000000106431422164147700237030ustar00rootroot00000000000000 # Implicit Scaling * [Overview](#Overview) * [Availability](#Availability) * [Debug Keys](#Debug-Keys) * [Limitations](#Limitations) # Overview Multi-tile devices, such as Xe HPC (PVC) and XeHP_SDV, contain smaller GPU devices called tiles. Each tile has its own dedicated set of resources. * Each tile has Execution Units (EUs) doing the actual computation work, which can be accessed using the available devices. * Similarly, copy engines (BCSs) may be present on each tile. * Each tile has HBM (High-Bandwidth Memory) local memory that is allocated directly on the chip. * Each tile has its own portion of L3 cache to speed up things. For a system like this, the following UMD (User Mode Driver) device handles are exposed: * Root device handle. * Sub-device handle for each tile. To manage the resources on those sub-devices, the UMD introduces two main development models: * *Implicit scaling* model, on which application allocates and submits to the root device and driver is responsible for distribution of work and memory across tiles. * *Explicit scaling* model, on which application is responsible for distributing work and memory across tiles using sub-device handles. When doing allocations in implicit scaling mode, driver *colors* an allocation among the available tiles. Default coloring divides an allocation size evenly by the number of avaialable tiles. Other policies include dividing the allocation in chunks of a given size, which are then interleaved on each tile. When scheduling a kernel for execution, driver distributes the kernel workgroups among the available tiles. Default mechanism is called *Static Partitioning*, where the workgroups are evenly distributed among tiles. For instance, in a 2-tile system, half of the workgroups go to tile 0, and the other half to tile 1. The number of CCSs, or compute engines, currently available with implicit scaing on the root device is one. This is because with implicit scaling the driver automatically uses all the EUs available in the device, so no other CCSs are exposed. Even though only one CCS is exposed, multiple kernels submitted to the root device using implicit scaling may execute concurrently on PVC, depending on EU availability. On XeHP_SDV, they may be serialized. See [Limitations](#Limitations) section below. Since implicit scaling is only done for EUs, which are associated only with kernels submitted to CCS, BCSs are currently not being exposed and access to them are done through sub-device handles. # Availability * OpenCL has implicit scaling enabled by default on all platforms. * Level Zero has it enabled by default on Xe HPC (PVC) B and later steppings. # Debug Keys ## Implicit scaling enabling Implicit scaling can be enabled and disabled in Level Zero by setting the `EnableImplicitScaling` debug key to `1` or `0`, respectively. When implicit scaling is enabled, the root device is seen as a monolithic device, with internal resources being managed by the driver. When it is disabled, the root device has the same resources as tile 0, and kernel submissions and allocations to both the root device handle and tile 0 device handle have the same effect. In OpenCL, implicit scaling is always enabled. To not use implicit scaling with OpenCL, the affinity mask targetting one of the sub-devices may be used, e.g., `ZE_AFFINITY_MASK=0.0`. ## Coloring scheme By default, allocations are equally split among available tiles. Allocations can also be colored by chunks and interleaved on each tile (`MultiStoragePolicy=1` or `MultiStoragePolicy=2`, respectively) with a size set on `MultiStorageGranularity`, which must be equal or greater than 64 kB, with 64 kB being the default. # Limitations ## XeHP_SDV For workloads with no coherent L3 caches among tiles, such as XeHP_SDV, the following considerations are made: * Partial writes and atomics are moved to global memory. This can be controlled with: * `ForceMultiGpuPartialWrites`: Set to `0` to handle partial writes on global memory (slow mode for multi-tile) and `1` to handle partial writes on L3 cache (fast mode for on tile). * `ForceMultiGpuAtomics`: Set to `0` to have global atomics (slow mode for multi-tile) and `1` to have atomics on L3 cache (fast mode for on tile). * Caches are flushed after every kernel. This can be disabled with `DoNotFlushCaches=1`. * Kernels are serialized to maintain functional correctness of split execution.compute-runtime-22.14.22890/programmers-guide/PROGRAMMERS_GUIDE.md000066400000000000000000000007701422164147700240440ustar00rootroot00000000000000 # Compute-Runtime Programmers and Optimization Guide ## Introduction This document provides the architectural design followed in the Intel(R) Graphics Compute Runtime for oneAPI Level Zero and OpenCL(TM) Driver. Implementation details and optimization guidelines are explained, as well as a description of the different features available for the different supported platforms. ### [Implicit scaling](IMPLICIT_SCALING.md)compute-runtime-22.14.22890/scripts/000077500000000000000000000000001422164147700170415ustar00rootroot00000000000000compute-runtime-22.14.22890/scripts/driver-version.bat000066400000000000000000000030431422164147700225070ustar00rootroot00000000000000:: :: Copyright (C) 2019-2021 Intel Corporation :: :: SPDX-License-Identifier: MIT :: @ECHO OFF :: One parameter is expected IF NOT B"%~2"==B"" ( ECHO %0 called with no parameters, prints the version of the installed OpenCL driver ECHO %0 called with a single parameter containing expected version number, ECHO returns success ^(ERRORLEVEL=0^) if installed the specified driver version or newer ECHO returns failure ^(ERRORLEVEL=1^) if no driver or older than specified ECHO example: ECHO driver-version.bat 26.20.100.7158 EXIT /B 1 ) SET DriverVersion= FOR /F "tokens=3" %%D in ('WMIC path Win32_VideoController where AdapterCompatibility^="Intel Corporation" get AdapterCompatibility^, DriverVersion ^| findstr "Intel"') do ( set DriverVersion=%%D ) IF B"%DriverVersion%"==B"" ( ECHO No driver detected in the system EXIT /B 1 ) IF B"%~1"==B"" ( ECHO %DriverVersion% EXIT /B 1 ) FOR /F "delims=. tokens=1-4" %%A IN ("%DriverVersion%") DO ( SET d1=%%A SET d2=%%B SET d3=%%C SET d4=%%D ) FOR /F "delims=. tokens=1-4" %%A IN ("%~1") DO ( SET p1=%%A SET p2=%%B SET p3=%%C SET p4=%%D ) IF %d1% LSS %p1% GOTO FAIL IF %d1% GTR %p1% GOTO PASS IF %d2% LSS %p2% GOTO FAIL IF %d2% GTR %p2% GOTO PASS IF %d3% LSS %p3% GOTO FAIL IF %d3% GTR %p3% GOTO PASS IF %d4% LSS %p4% GOTO FAIL :PASS ECHO Driver %DriverVersion% is newer than or equal to referenced version passed from command line %1 EXIT /B 0 :FAIL ECHO Driver %DriverVersion% is older than referenced from command line %1 EXIT /B 1 compute-runtime-22.14.22890/scripts/driver-version.sh000077500000000000000000000037071422164147700223650ustar00rootroot00000000000000#!/bin/bash # # Copyright (C) 2019-2021 Intel Corporation # # SPDX-License-Identifier: MIT # check_deb() { DriverVer=$(dpkg -l 'intel-opencl' | awk '/ii intel-opencl / { print $3 }') if [ -z $DriverVer ] then DriverVer=$(dpkg -l 'intel-opencl-icd' | awk '/ii intel-opencl-icd / { print $3 }') fi } check_rpm() { DriverVer=$(rpm -q --queryformat '%{VERSION}' intel-opencl) if [ $? != 0 ] then DriverVer="" fi } check_pacman() { DriverVer=$(pacman -Q intel-compute-runtime | awk '{print $2}' | sed "s/-.*$//") } if [ -f /etc/os-release ] then source /etc/os-release if [ -z "${ID}" ] then echo "Unknown OS" exit 1 fi fi case "${ID}" in debian | ubuntu ) check_deb ;; fedora | centos | rhel) check_rpm ;; arch ) check_pacman ;; * ) echo "Unsupported OS: ${ID}" exit 1 ;; esac if [ -z $DriverVer ] then echo No driver detected in the system exit 1 fi if [ $# -eq 0 ] then echo $DriverVer exit 1 fi if [ $# -ne 1 ] || [ $1 == "-h" ] || [ $1 == "--help" ] then echo $0 called with no parameters, prints the version of the installed OpenCL driver echo $0 called with a single parameter containing expected version number, echo returns success \(0\) if installed the specified driver version or newer echo returns failure \(1\) if no driver or older than specified exit 1 fi if ! [[ $1 =~ ^[0-9]+\.[0-9]+\.[0-9]+.* ]]; then echo Invalid version format exit 1 fi TestedString=$(echo "$1" | awk -F. '{ printf("%d.%02d.%d\n", $1,$2,$3); }';) DriverStatus=$( echo -e "${DriverVer}\n${TestedString}" | sort -V -C -r ; echo $? ) if [ $DriverStatus -eq 1 ] then echo Driver $DriverVer is older than referenced version passed from command line ${TestedString} else echo Driver $DriverVer is newer than or equal to referenced version passed from command line ${TestedString} fi exit $DriverStatus compute-runtime-22.14.22890/scripts/neo_ww_calculator.py000077500000000000000000000021351422164147700231260ustar00rootroot00000000000000#!/usr/bin/env python3 # # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # import sys from datetime import datetime, timezone def convert_ww(epoch): dt = datetime.fromtimestamp(epoch, timezone.utc) # get some info from epoch yr = int(dt.strftime("%y")) doy = int(dt.strftime("%j")) # and day of week for Jan 1st dow1 = int(datetime(dt.year, 1, 1).strftime("%w")) # number of days in a year _is_leap = yr % 400 == 0 or (yr % 4 == 0 and yr % 100 != 0) _y_days = 366 if _is_leap else 365 _doy = doy - 1 + dow1 # shift day of year to simulate Jan 1st as Sunday _ww = int(_doy / 7) + 1 # get workweek _wd = int(_doy % 7) # get days of week _y_days = _y_days + dow1 # adjusted number of days in year _w_days = _y_days - _doy + _wd # numer of week days days to end of year if _w_days < 7: # last week has less than 7 days yr = yr + 1 _ww = 1 print("{:02d}.{:02d}".format(yr, _ww)) return 0 if __name__ == '__main__': sys.exit(convert_ww(int(sys.argv[1]))) compute-runtime-22.14.22890/scripts/packaging/000077500000000000000000000000001422164147700207655ustar00rootroot00000000000000compute-runtime-22.14.22890/scripts/packaging/functions.sh000077500000000000000000000007001422164147700233310ustar00rootroot00000000000000#!/bin/bash # # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # set -ex get_api_version() { API_VERSION="${API_VERSION:-}" API_VERSION_SRC="${API_VERSION_SRC:-}" API_DEB_MODEL_LINK="" API_RPM_MODEL_LINK="" if [ "${COMPONENT_MODEL}" != "ci" ]; then API_DEB_MODEL_LINK="~${COMPONENT_MODEL:-unknown}${BUILD_ID:-0}" API_RPM_MODEL_LINK=".${COMPONENT_MODEL:-unknown}${BUILD_ID:-0}" fi } compute-runtime-22.14.22890/scripts/packaging/l0_gpu_driver/000077500000000000000000000000001422164147700235265ustar00rootroot00000000000000compute-runtime-22.14.22890/scripts/packaging/l0_gpu_driver/build_l0_gpu_driver_deb.sh000077500000000000000000000112701422164147700306200ustar00rootroot00000000000000#!/usr/bin/env bash # # Copyright (C) 2021-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set -ex DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" REPO_DIR="$( cd "$( dirname "${DIR}/../../../../" )" && pwd )" BUILD_DIR="${REPO_DIR}/../build_l0_gpu_driver" NEO_SKIP_UNIT_TESTS=${NEO_SKIP_UNIT_TESTS:-FALSE} BRANCH_SUFFIX="$( cat ${REPO_DIR}/.branch )" ENABLE_L0_GPU_DRIVER="${ENABLE_L0_GPU_DRIVER:-1}" if [ "${ENABLE_L0_GPU_DRIVER}" == "0" ]; then exit 0 fi LOG_CCACHE_STATS="${LOG_CCACHE_STATS:-0}" export BUILD_ID="${BUILD_ID:-1}" export CMAKE_BUILD_TYPE="${CMAKE_BUILD_TYPE:-Release}" source "${REPO_DIR}/scripts/packaging/${BRANCH_SUFFIX}/functions.sh" source "${REPO_DIR}/scripts/packaging/${BRANCH_SUFFIX}/l0_gpu_driver/l0_gpu_driver.sh" get_api_version # API_VERSION-API_VERSION_SRC and API_DEB_MODEL_LINK get_l0_gpu_driver_version # NEO_L0_VERSION_MAJOR.NEO_L0_VERSION_MINOR.NEO_L0_VERSION_PATCH if [ -z "${BRANCH_SUFFIX}" ]; then VERSION="${NEO_L0_VERSION_MAJOR}.${NEO_L0_VERSION_MINOR}.${NEO_L0_VERSION_PATCH}${API_DEB_MODEL_LINK}" else VERSION="${NEO_L0_VERSION_MAJOR}.${NEO_L0_VERSION_MINOR}.${NEO_L0_VERSION_PATCH}.${API_VERSION}-${API_VERSION_SRC}${API_DEB_MODEL_LINK}" fi PKG_VERSION=${VERSION} if [ "${CMAKE_BUILD_TYPE}" != "Release" ]; then PKG_VERSION="${PKG_VERSION}+$(echo "$CMAKE_BUILD_TYPE" | tr '[:upper:]' '[:lower:]')1" fi rm -rf $BUILD_DIR mkdir -p $BUILD_DIR/debian COPYRIGHT="${REPO_DIR}/scripts/packaging/${BRANCH_SUFFIX}/l0_gpu_driver/${OS_TYPE}/copyright" CONTROL="${REPO_DIR}/scripts/packaging/${BRANCH_SUFFIX}/l0_gpu_driver/${OS_TYPE}/control" SHLIBS="${REPO_DIR}/scripts/packaging/${BRANCH_SUFFIX}/l0_gpu_driver/${OS_TYPE}/shlibs.local" cp -pR ${REPO_DIR}/scripts/packaging/l0_gpu_driver/${OS_TYPE}/debian/* $BUILD_DIR/debian/ cp $COPYRIGHT $BUILD_DIR/debian/ cp $CONTROL $BUILD_DIR/debian/ if [ -f "${SHLIBS}" ]; then cp $SHLIBS $BUILD_DIR/debian/ fi LEVEL_ZERO_DEVEL_NAME=${LEVEL_ZERO_DEVEL_NAME:-level-zero-devel} LEVEL_ZERO_DEVEL_VERSION=$(apt-cache policy ${LEVEL_ZERO_DEVEL_NAME} | grep Installed | cut -f2- -d ':' | xargs) if [ ! -z "${LEVEL_ZERO_DEVEL_VERSION}" ]; then perl -pi -e "s/^ level-zero-devel(?=,|$)/ ${LEVEL_ZERO_DEVEL_NAME} (=$LEVEL_ZERO_DEVEL_VERSION)/" "$BUILD_DIR/debian/control" fi if [ -z "${BRANCH_SUFFIX}" ]; then GMM_VERSION=$(apt-cache policy intel-gmmlib | grep Installed | cut -f2- -d ':' | xargs) if [ ! -z "${GMM_VERSION}" ]; then perl -pi -e "s/^ intel-gmmlib(?=,|$)/ intel-gmmlib (=$GMM_VERSION)/" "$BUILD_DIR/debian/control" fi GMM_DEVEL_VERSION=$(apt-cache policy intel-gmmlib-devel | grep Installed | cut -f2- -d ':' | xargs) if [ ! -z "${GMM_DEVEL_VERSION}" ]; then perl -pi -e "s/^ intel-gmmlib-devel(?=,|$)/ intel-gmmlib-devel (=$GMM_DEVEL_VERSION)/" "$BUILD_DIR/debian/control" fi IGC_VERSION=$(apt-cache policy intel-igc-opencl | grep Installed | cut -f2- -d ':' | xargs) if [ ! -z "${IGC_VERSION}" ]; then perl -pi -e "s/^ intel-igc-opencl(?=,|$)/ intel-igc-opencl (=$IGC_VERSION)/" "$BUILD_DIR/debian/control" fi IGC_DEVEL_VERSION=$(apt-cache policy intel-igc-opencl-devel | grep Installed | cut -f2- -d ':' | xargs) if [ ! -z "${IGC_DEVEL_VERSION}" ]; then perl -pi -e "s/^ intel-igc-opencl-devel(?=,|$)/ intel-igc-opencl-devel (=$IGC_DEVEL_VERSION)/" "$BUILD_DIR/debian/control" fi fi # Update rules file with new version perl -pi -e "s/^ver = .*/ver = $NEO_L0_VERSION_PATCH/" $BUILD_DIR/debian/rules #needs a top level CMAKE file cat << EOF | tee $BUILD_DIR/CMakeLists.txt cmake_minimum_required (VERSION 3.2 FATAL_ERROR) project(neo) add_subdirectory($REPO_DIR neo) EOF ( cd $BUILD_DIR if [ "${LOG_CCACHE_STATS}" == "1" ]; then ccache -z fi export DEB_BUILD_OPTIONS="nodocs notest nocheck" export DH_VERBOSE=1 if [ "${CMAKE_BUILD_TYPE}" != "Release" ]; then export DH_INTERNAL_BUILDFLAGS=1 fi if [ "${ENABLE_ULT}" == "0" ]; then NEO_SKIP_UNIT_TESTS="TRUE" fi if [ "${TARGET_ARCH}" == "aarch64" ]; then NEO_SKIP_UNIT_TESTS="TRUE" export NEO_DISABLE_BUILTINS_COMPILATION="TRUE" fi export NEO_SKIP_UNIT_TESTS dch -v ${PKG_VERSION} -m "build $PKG_VERSION" dpkg-buildpackage -j`nproc --all` -us -uc -b -rfakeroot sudo dpkg -i --force-depends ../*.deb if [ "${LOG_CCACHE_STATS}" == "1" ]; then ccache -s ccache -s | grep 'cache hit rate' | cut -d ' ' -f 4- | xargs -I{} echo LevelZero GPU Driver {} >> $REPO_DIR/../output/logs/ccache.log fi ) mkdir -p ${REPO_DIR}/../output/dbgsym mv ${REPO_DIR}/../*.deb ${REPO_DIR}/../output/ find ${REPO_DIR}/.. -maxdepth 1 -name \*.ddeb -type f -print0 | xargs -0r mv -t ${REPO_DIR}/../output/dbgsym/ compute-runtime-22.14.22890/scripts/packaging/l0_gpu_driver/build_l0_gpu_driver_rpm.sh000077500000000000000000000072221422164147700306660ustar00rootroot00000000000000#!/usr/bin/env bash # # Copyright (C) 2021-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set -ex DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" REPO_DIR="$( cd "$( dirname "${DIR}/../../../../" )" && pwd )" BUILD_DIR="${REPO_DIR}/../build_l0_gpu_driver" ENABLE_L0_GPU_DRIVER="${ENABLE_L0_GPU_DRIVER:-1}" if [ "${ENABLE_L0_GPU_DRIVER}" == "0" ]; then exit 0 fi BUILD_SRPM="${BUILD_SRPM:-1}" BUILD_RPM="${BUILD_RPM:-1}" SPEC_FILE="${SPEC_FILE:-${OS_TYPE}}" if [[ ${SPEC_FILE} == rhel* ]]; then SPEC_FILE=${SPEC_FILE%.*} fi export BUILD_ID="${BUILD_ID:-1}" export CMAKE_BUILD_TYPE="${CMAKE_BUILD_TYPE:-Release}" ( if [ "${BUILD_SRPM}" == "1" ]; then BRANCH_SUFFIX="$( cat ${REPO_DIR}/.branch )" PACKAGING_DIR="$REPO_DIR/scripts/packaging/l0_gpu_driver/${SPEC_FILE}" SPEC_SRC="$PACKAGING_DIR/SPECS/l0_gpu_driver.spec" SPEC="$BUILD_DIR/SPECS/l0_gpu_driver.spec" COPYRIGHT="${REPO_DIR}/scripts/packaging/${BRANCH_SUFFIX}/l0_gpu_driver/${SPEC_FILE}/copyright" build_args=() if [ "${CMAKE_BUILD_TYPE}" == "Debug" ]; then export CFLAGS=" " export CXXFLAGS=" " export FFLAGS=" " build_args+=(--define 'name_suffix -debug') fi source "${REPO_DIR}/scripts/packaging/${BRANCH_SUFFIX}/functions.sh" source "${REPO_DIR}/scripts/packaging/${BRANCH_SUFFIX}/l0_gpu_driver/l0_gpu_driver.sh" get_api_version # API_VERSION-API_VERSION_SRC and API_RPM_MODEL_LINK get_l0_gpu_driver_version # NEO_L0_VERSION_MAJOR.NEO_L0_VERSION_MINOR.NEO_L0_VERSION_PATCH VERSION="${NEO_L0_VERSION_MAJOR}.${NEO_L0_VERSION_MINOR}.${NEO_L0_VERSION_PATCH}.${API_VERSION}" RELEASE="${API_VERSION_SRC}${API_RPM_MODEL_LINK}" RELEASE_WITH_REGKEYS="${RELEASE_WITH_REGKEYS:-FALSE}" #setup rpm build tree rm -rf $BUILD_DIR mkdir -p $BUILD_DIR/{BUILD,BUILDROOT,RPMS,SOURCES,SPECS,SRPMS} tar -c -I 'xz -6 -T0' -f $BUILD_DIR/SOURCES/compute-runtime-$VERSION.tar.xz -C $REPO_DIR --transform "s,${REPO_DIR:1},compute-runtime-$VERSION," --exclude=.git\* $REPO_DIR cp $COPYRIGHT $BUILD_DIR/SOURCES/ cp $SPEC_SRC $BUILD_DIR/SPECS/ PATCH_SPEC="${REPO_DIR}/scripts/packaging/${BRANCH_SUFFIX}/patch_spec.sh" if [ -f "$PATCH_SPEC" ]; then source "$PATCH_SPEC" fi # Update spec file with new version perl -pi -e "s/^%global ver .*/%global ver ${VERSION}/" $SPEC perl -pi -e "s/^%global rel .*/%global rel ${RELEASE}/" $SPEC perl -pi -e "s/^%global NEO_RELEASE_WITH_REGKEYS .*/%global NEO_RELEASE_WITH_REGKEYS ${RELEASE_WITH_REGKEYS}/" $SPEC perl -pi -e "s/^%global build_id .*/%global build_id ${NEO_L0_VERSION_PATCH}/" $SPEC rpmbuild --define "_topdir $BUILD_DIR" -bs $SPEC --define 'build_type ${CMAKE_BUILD_TYPE}' "${build_args[@]}" mkdir -p ${REPO_DIR}/../output/SRPMS echo -n ${VERSION} > ${REPO_DIR}/../output/.l0_gpu.version cp -v $BUILD_DIR/SRPMS/*.rpm ${REPO_DIR}/../output/SRPMS/ fi ) if [ "${BUILD_RPM}" == "1" ]; then LOG_CCACHE_STATS="${LOG_CCACHE_STATS:-0}" rm -rf $BUILD_DIR mkdir -p $BUILD_DIR/{BUILD,BUILDROOT,RPMS,SOURCES,SPECS,SRPMS} build_args=() build_args+=(--define "_topdir $BUILD_DIR") VERSION=$(cat ${REPO_DIR}/../output/.l0_gpu.version) if [ "${LOG_CCACHE_STATS}" == "1" ]; then ccache -z fi rpmbuild --rebuild ${REPO_DIR}/../output/SRPMS/intel-level-zero-gpu-${VERSION}*.src.rpm "${build_args[@]}" if [ "${LOG_CCACHE_STATS}" == "1" ]; then ccache -s ccache -s | grep 'cache hit rate' | cut -d ' ' -f 4- | xargs -I{} echo LevelZero GPU Driver {} >> $REPO_DIR/../output/logs/ccache.log fi sudo rpm -Uvh --force $BUILD_DIR/RPMS/*/*.rpm cp $BUILD_DIR/RPMS/*/*.rpm $REPO_DIR/../output/ fi compute-runtime-22.14.22890/scripts/packaging/l0_gpu_driver/l0_gpu_driver.sh000077500000000000000000000024731422164147700266340ustar00rootroot00000000000000#!/usr/bin/env bash # # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # set -ex get_l0_gpu_driver_version() { __NEO_L0_VERSION_MAJOR_TMP=$(grep -m1 NEO_L0_VERSION_MAJOR ${REPO_DIR}/version.cmake | awk -F"MAJOR " '{ print $2 }' | awk -F")" '{ print $1 }') NEO_L0_VERSION_MAJOR="${NEO_L0_VERSION_MAJOR:-$__NEO_L0_VERSION_MAJOR_TMP}" unset __NEO_L0_VERSION_MAJOR_TMP __NEO_L0_VERSION_MINOR_TMP=$(grep -m1 NEO_L0_VERSION_MINOR ${REPO_DIR}/version.cmake | awk -F"MINOR " '{ print $2 }' | awk -F")" '{ print $1 }') NEO_L0_VERSION_MINOR="${NEO_L0_VERSION_MINOR:-$__NEO_L0_VERSION_MINOR_TMP}" unset __NEO_L0_VERSION_MINOR_TMP __NEO_TAG_TMP=$(git -C ${REPO_DIR} describe --abbrev=1 --tags | awk -F"." '{ nn=split($NF, nfa, "."); if(nn==2) {printf("%s-%s", nfa[1], nfa[2]);} else {print $NF;} }') NEO_TAG="${NEO_TAG:-$__NEO_TAG_TMP}" unset __NEO_TAG_TMP __NEO_L0_VERSION_PATCH_TMP=$(echo $NEO_TAG | awk -F '-' '{ print $1; }' | sed 's/^0*//') NEO_L0_VERSION_PATCH="${NEO_L0_VERSION_PATCH:-$__NEO_L0_VERSION_PATCH_TMP}" unset __NEO_L0_VERSION_PATCH_TMP __NEO_L0_VERSION_HOTFIX_TMP=$(echo $NEO_TAG | awk -F '-' '{ if(NF>1) {printf(".%s", $2);} }') NEO_L0_VERSION_HOTFIX="${NEO_L0_VERSION_HOTFIX:-$__NEO_L0_VERSION_HOTFIX_TMP}" unset __NEO_L0_VERSION_HOTFIX_TMP } compute-runtime-22.14.22890/scripts/packaging/l0_gpu_driver/rhel_8/000077500000000000000000000000001422164147700247075ustar00rootroot00000000000000compute-runtime-22.14.22890/scripts/packaging/l0_gpu_driver/rhel_8/SPECS/000077500000000000000000000000001422164147700255645ustar00rootroot00000000000000compute-runtime-22.14.22890/scripts/packaging/l0_gpu_driver/rhel_8/SPECS/l0_gpu_driver.spec000066400000000000000000000054051422164147700312050ustar00rootroot00000000000000#it's changed by external script %global ver xxx %global rel xxx %global build_id xxx %global NEO_RELEASE_WITH_REGKEYS FALSE %define _source_payload w5T16.xzdio %define _binary_payload w5T16.xzdio Name: intel-level-zero-gpu Version: %{ver} Release: %{rel}%{?dist} Summary: Intel(R) GPU Driver for oneAPI Level Zero. Group: System Environment/Libraries License: MIT URL: https://github.com/intel/compute-runtime Source0: %{url}/archive/%{version}/compute-runtime-%{version}.tar.xz Source1: copyright BuildRequires: make libva-devel gcc-c++ cmake BuildRequires: intel-gmmlib-devel BuildRequires: intel-igc-opencl-devel Requires: intel-gmmlib Requires: intel-igc-opencl %description Runtime library providing the ability to use Intel GPUs with the oneAPI Level Zero programming interface. Level Zero is the primary low-level interface for language and runtime libraries. Level Zero offers fine-grain control over accelerators capabilities, delivering a simplified and low-latency interface to hardware, and efficiently exposing hardware capabilities to applications. %define debug_package %{nil} %prep %autosetup -p1 -n compute-runtime-%{ver} %build mkdir build cd build %cmake .. \ -DNEO_VERSION_BUILD=%{build_id} \ -DCMAKE_BUILD_TYPE=Release \ -DNEO_BUILD_WITH_OCL=FALSE \ -DNEO_SKIP_UNIT_TESTS=1 \ -DNEO_ENABLE_i915_PRELIM_DETECTION=TRUE \ -DRELEASE_WITH_REGKEYS=%{NEO_RELEASE_WITH_REGKEYS} \ -DCMAKE_INSTALL_PREFIX=/usr \ -DL0_INSTALL_UDEV_RULES=1 \ -DUDEV_RULES_DIR=/etc/udev/rules.d/ %make_build %install cd build %make_install #Remove OpenCL files before installing rm -rf %{buildroot}%{_libdir}/intel-opencl/ rm -rf %{buildroot}%{_sysconfdir}/OpenCL/ rm -rf %{buildroot}%{_bindir}/ocloc rm -rf %{buildroot}%{_libdir}/libocloc.so rm -rf %{buildroot}%{_includedir}/ocloc_api.h #Remove debug files rm -f %{buildroot}/%{_libdir}/intel-opencl/libigdrcl.so.debug rm -f %{buildroot}/%{_libdir}/libocloc.so.debug rm -rf %{buildroot}/usr/lib/debug/ #insert license into package mkdir -p %{buildroot}/usr/share/doc/intel-level-zero-gpu/ cp -pR %{_sourcedir}/copyright %{buildroot}/usr/share/doc/intel-level-zero-gpu/. %files %defattr(-,root,root) %{_libdir}/libze_intel_gpu.so.* %{_sharedstatedir}/libze_intel_gpu/pci_bind_status_file %{_sharedstatedir}/libze_intel_gpu/wedged_file %{_sysconfdir}/udev/rules.d/99-drm_ze_intel_gpu.rules /usr/share/doc/intel-level-zero-gpu/copyright %config(noreplace) %doc %changelog * Mon Aug 10 2020 Spruit, Neil R - 1.0.17625 - Update to 1.0.17625 * Tue Apr 28 2020 Jacek Danecki - 0.8.16582-1 - Update to 0.8.16582 * Tue Mar 24 2020 Spruit, Neil R - 0.8.0 * Update to 0.8.0 * Fri Mar 13 2020 Pavel Androniychuk - 0.4.1 - Spec file init compute-runtime-22.14.22890/scripts/packaging/l0_gpu_driver/rhel_8/copyright000066400000000000000000000020741422164147700266450ustar00rootroot00000000000000--- MIT License Copyright (c) 2018-2021 Intel Corporation Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. compute-runtime-22.14.22890/scripts/packaging/l0_gpu_driver/sles_15.3/000077500000000000000000000000001422164147700251425ustar00rootroot00000000000000compute-runtime-22.14.22890/scripts/packaging/l0_gpu_driver/sles_15.3/SPECS/000077500000000000000000000000001422164147700260175ustar00rootroot00000000000000compute-runtime-22.14.22890/scripts/packaging/l0_gpu_driver/sles_15.3/SPECS/l0_gpu_driver.spec000066400000000000000000000056151422164147700314430ustar00rootroot00000000000000# spec file for package intel-level-zero-gpu #it's changed by external script %global ver xxx %global rel xxx %global build_id xxx %global NEO_RELEASE_WITH_REGKEYS FALSE %define gmmlib_sover 12 %define igc_sover 1 %if !0%{?build_type:1} %define build_type Release %endif %define _source_payload w5T16.xzdio %define _binary_payload w5T16.xzdio Name: intel-level-zero-gpu Version: %{ver} Release: %{rel}%{?dist} Summary: Intel(R) GPU Driver for oneAPI Level Zero. Group: System Environment/Libraries License: MIT URL: https://github.com/intel/compute-runtime Source0: %{url}/archive/%{version}/compute-runtime-%{version}.tar.xz Source1: copyright ExclusiveArch: x86_64 BuildRequires: cmake make gcc-c++ #BuildRequires: libva-devel BuildRequires: libigdgmm%{?name_suffix}-devel BuildRequires: libigdfcl%{?name_suffix}-devel Requires: libigc%{igc_sover}%{?name_suffix} Requires: libigdfcl%{igc_sover}%{?name_suffix} Requires: libigdgmm%{gmmlib_sover}%{?name_suffix} %description Runtime library providing the ability to use Intel GPUs with the oneAPI Level Zero programming interface. Level Zero is the primary low-level interface for language and runtime libraries. Level Zero offers fine-grain control over accelerators capabilities, delivering a simplified and low-latency interface to hardware, and efficiently exposing hardware capabilities to applications. %debug_package %prep %autosetup -p1 -n compute-runtime-%{version} %build %cmake .. \ -DNEO_VERSION_BUILD=%{build_id} \ -DCMAKE_BUILD_TYPE=%{build_type} \ -DCMAKE_INSTALL_PREFIX=/usr \ -DNEO_BUILD_WITH_OCL=FALSE \ -DNEO_SKIP_UNIT_TESTS=TRUE \ -DNEO_ENABLE_i915_PRELIM_DETECTION=TRUE \ -DRELEASE_WITH_REGKEYS=%{NEO_RELEASE_WITH_REGKEYS} \ -DL0_INSTALL_UDEV_RULES=1 \ -DUDEV_RULES_DIR=/etc/udev/rules.d/ \ -Wno-dev %make_build %install cd build %make_install #Remove OpenCL files before installing rm -rf %{buildroot}%{_libdir}/intel-opencl/ rm -rf %{buildroot}%{_sysconfdir}/OpenCL/ rm -rf %{buildroot}%{_bindir}/ocloc rm -rf %{buildroot}%{_libdir}/libocloc.so rm -rf %{buildroot}%{_includedir}/ocloc_api.h #Remove debug files rm -f %{buildroot}/%{_libdir}/intel-opencl/libigdrcl.so.debug rm -f %{buildroot}/%{_libdir}/libocloc.so.debug rm -rf %{buildroot}/usr/lib/debug/ #insert license into package mkdir -p %{buildroot}/usr/share/doc/intel-level-zero-gpu%{?name_suffix}/ cp -pR %{_sourcedir}/copyright %{buildroot}/usr/share/doc/intel-level-zero-gpu%{?name_suffix}/. %files -n intel-level-zero-gpu%{?name_suffix} %defattr(-,root,root) %{_libdir}/libze_intel_gpu.so.* %{_sharedstatedir}/libze_intel_gpu/pci_bind_status_file %{_sharedstatedir}/libze_intel_gpu/wedged_file %{_sysconfdir}/udev/rules.d/99-drm_ze_intel_gpu.rules /usr/share/doc/intel-level-zero-gpu%{?name_suffix}/copyright %config(noreplace) %doc %changelog * Mon Sep 13 2021 Compute-Runtime-Automation - Initial spec file for SLES 15.3 compute-runtime-22.14.22890/scripts/packaging/l0_gpu_driver/sles_15.3/copyright000066400000000000000000000020741422164147700271000ustar00rootroot00000000000000--- MIT License Copyright (c) 2018-2021 Intel Corporation Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. compute-runtime-22.14.22890/scripts/packaging/l0_gpu_driver/ubuntu_20.04/000077500000000000000000000000001422164147700255735ustar00rootroot00000000000000compute-runtime-22.14.22890/scripts/packaging/l0_gpu_driver/ubuntu_20.04/control000066400000000000000000000016141422164147700272000ustar00rootroot00000000000000Source: intel-compute-runtime Section: libs Priority: optional Maintainer: Intel Graphics Team XSBC-Original-Maintainer: Debian OpenCL Maintainers Build-Depends: debhelper (>= 11), cmake, level-zero-devel, intel-igc-opencl-devel, intel-gmmlib-devel, # libva-dev, pkg-config Standards-Version: 4.3.0 Homepage: https://github.com/oneapi-src/level-zero Package: intel-level-zero-gpu Architecture: amd64 Depends: ${shlibs:Depends}, ${misc:Depends}, intel-igc-opencl, intel-gmmlib Description: Intel(R) Graphics Compute Runtime for oneAPI Level Zero. Level Zero is the primary low-level interface for language and runtime libraries. Level Zero offers fine-grain control over accelerators capabilities, delivering a simplified and low-latency interface to hardware, and efficiently exposing hardware capabilities to applications. compute-runtime-22.14.22890/scripts/packaging/l0_gpu_driver/ubuntu_20.04/copyright000066400000000000000000000026321422164147700275310ustar00rootroot00000000000000Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ Upstream-Name: level-zero Source: https://github.com/oneapi-src/level-zero Files: third_party/opencl_headers/* third_party/opengl_headers/GL/glext.h Copyright: 2008-2021 The Khronos Group Inc. 2008-2021 Intel Corporation License: MIT Files: * Copyright: 2018-2021 Intel Corporation License: MIT Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: . The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. . THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. compute-runtime-22.14.22890/scripts/packaging/l0_gpu_driver/ubuntu_20.04/debian/000077500000000000000000000000001422164147700270155ustar00rootroot00000000000000compute-runtime-22.14.22890/scripts/packaging/l0_gpu_driver/ubuntu_20.04/debian/changelog000066400000000000000000000005561422164147700306750ustar00rootroot00000000000000intel-compute-runtime (1.0) unstable; urgency=medium * Level Zero GPU Driver implemented based on v1.0 L0 Specification -- Neil Spruit Fri, 7 Aug 2020 12:26:37 +0300 intel-compute-runtime (0.4.0) unstable; urgency=medium * Ubuntu package init -- Pavel Androniychuk Fri, 14 Feb 2020 12:26:37 +0300 compute-runtime-22.14.22890/scripts/packaging/l0_gpu_driver/ubuntu_20.04/debian/compat000066400000000000000000000000031422164147700302140ustar00rootroot0000000000000011 compute-runtime-22.14.22890/scripts/packaging/l0_gpu_driver/ubuntu_20.04/debian/rules000077500000000000000000000023741422164147700301030ustar00rootroot00000000000000#!/usr/bin/make -f ver = xxx %: dh $@ --builddir build/ --buildsystem=cmake+ninja NEO_DISABLE_BUILTINS_COMPILATION ?= FALSE RELEASE_WITH_REGKEYS ?= FALSE IGDRCL_FORCE_USE_LIBVA ?= FALSE NEO_SKIP_UNIT_TESTS ?= FALSE NEO_ENABLE_i915_PRELIM_DETECTION ?= TRUE override_dh_auto_configure: dh_auto_configure -- ${NEO_BUILD_EXTRA_OPTS} \ -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \ -DNEO_VERSION_BUILD=$(ver) \ -DNEO_SKIP_UNIT_TESTS=${NEO_SKIP_UNIT_TESTS} \ -DNEO_ENABLE_i915_PRELIM_DETECTION=${NEO_ENABLE_i915_PRELIM_DETECTION} \ -DNEO_DISABLE_BUILTINS_COMPILATION=${NEO_DISABLE_BUILTINS_COMPILATION} \ -DNEO_BUILD_WITH_OCL=FALSE \ -DRELEASE_WITH_REGKEYS=${RELEASE_WITH_REGKEYS} \ -DIGDRCL_FORCE_USE_LIBVA=${IGDRCL_FORCE_USE_LIBVA} \ -DL0_INSTALL_UDEV_RULES=1 \ -DUDEV_RULES_DIR=/etc/udev/rules.d/ \ -Wno-dev override_dh_auto_build: ${BUILD_PREFIX} dh_auto_build override_dh_install: rm -rf debian/intel-level-zero-gpu/etc/ rm -rf debian/intel-level-zero-gpu/usr/lib/${DEB_TARGET_MULTIARCH}/intel-opencl/ rm -rf debian/intel-level-zero-gpu/usr/bin/ rm -rf debian/intel-level-zero-gpu/usr/include/ocloc_api.h rm -rf debian/intel-level-zero-gpu/usr/lib/${DEB_TARGET_MULTIARCH}/libocloc.so rm -rvf debian/intel-level-zero-gpu/usr/lib/debug dh_install compute-runtime-22.14.22890/scripts/packaging/l0_gpu_driver/ubuntu_20.04/debian/source/000077500000000000000000000000001422164147700303155ustar00rootroot00000000000000compute-runtime-22.14.22890/scripts/packaging/l0_gpu_driver/ubuntu_20.04/debian/source/format000066400000000000000000000000141422164147700315230ustar00rootroot000000000000003.0 (quilt) compute-runtime-22.14.22890/scripts/packaging/l0_gpu_driver/ubuntu_20.04/shlibs.local000066400000000000000000000000641422164147700300730ustar00rootroot00000000000000libigdgmm 11 intel-gmmlib libigdgmm 12 intel-gmmlib compute-runtime-22.14.22890/scripts/packaging/opencl/000077500000000000000000000000001422164147700222455ustar00rootroot00000000000000compute-runtime-22.14.22890/scripts/packaging/opencl/build_opencl_deb.sh000077500000000000000000000104431422164147700260570ustar00rootroot00000000000000#!/usr/bin/env bash # # Copyright (C) 2021-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set -ex DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" REPO_DIR="$( cd "$( dirname "${DIR}/../../../../" )" && pwd )" BUILD_DIR="${REPO_DIR}/../build_opencl" NEO_SKIP_UNIT_TESTS=${NEO_SKIP_UNIT_TESTS:-FALSE} BRANCH_SUFFIX="$( cat ${REPO_DIR}/.branch )" ENABLE_OPENCL="${ENABLE_OPENCL:-1}" if [ "${ENABLE_OPENCL}" == "0" ]; then exit 0 fi LOG_CCACHE_STATS="${LOG_CCACHE_STATS:-0}" export BUILD_ID="${BUILD_ID:-1}" export CMAKE_BUILD_TYPE="${CMAKE_BUILD_TYPE:-Release}" export DO_NOT_RUN_AUB_TESTS="${DO_NOT_RUN_AUB_TESTS:-FALSE}" source "${REPO_DIR}/scripts/packaging/${BRANCH_SUFFIX}/functions.sh" source "${REPO_DIR}/scripts/packaging/${BRANCH_SUFFIX}/opencl/opencl.sh" get_api_version # API_VERSION-API_VERSION_SRC and API_DEB_MODEL_LINK get_opencl_version # NEO_OCL_VERSION_MAJOR.NEO_OCL_VERSION_MINOR.NEO_OCL_VERSION_BUILD export NEO_OCL_VERSION_MAJOR export NEO_OCL_VERSION_MINOR export NEO_OCL_VERSION_BUILD if [ -z "${BRANCH_SUFFIX}" ]; then VERSION="${NEO_OCL_VERSION_MAJOR}.${NEO_OCL_VERSION_MINOR}.${NEO_OCL_VERSION_BUILD}${API_DEB_MODEL_LINK}" else VERSION="1:${NEO_OCL_VERSION_MAJOR}.${NEO_OCL_VERSION_MINOR}.${NEO_OCL_VERSION_BUILD}.${API_VERSION}-${API_VERSION_SRC}${API_DEB_MODEL_LINK}" fi PKG_VERSION=${VERSION} if [ "${CMAKE_BUILD_TYPE}" != "Release" ]; then PKG_VERSION="${PKG_VERSION}+$(echo "$CMAKE_BUILD_TYPE" | tr '[:upper:]' '[:lower:]')1" fi rm -rf $BUILD_DIR mkdir -p $BUILD_DIR/debian COPYRIGHT="${REPO_DIR}/scripts/packaging/${BRANCH_SUFFIX}/opencl/${OS_TYPE}/copyright" CONTROL="${REPO_DIR}/scripts/packaging/${BRANCH_SUFFIX}/opencl/${OS_TYPE}/control" SHLIBS="${REPO_DIR}/scripts/packaging/${BRANCH_SUFFIX}/opencl/${OS_TYPE}/shlibs.local" cp -pR ${REPO_DIR}/scripts/packaging/opencl/${OS_TYPE}/debian/* $BUILD_DIR/debian/ cp $COPYRIGHT $BUILD_DIR/debian/ cp $CONTROL $BUILD_DIR/debian/ if [ -f "${SHLIBS}" ]; then cp $SHLIBS $BUILD_DIR/debian/ fi if [ -z "${BRANCH_SUFFIX}" ]; then GMM_VERSION=$(apt-cache policy intel-gmmlib | grep Installed | cut -f2- -d ':' | xargs) if [ ! -z "${GMM_VERSION}" ]; then perl -pi -e "s/^ intel-gmmlib(?=,|$)/ intel-gmmlib (=$GMM_VERSION)/" "$BUILD_DIR/debian/control" fi GMM_DEVEL_VERSION=$(apt-cache policy intel-gmmlib-devel | grep Installed | cut -f2- -d ':' | xargs) if [ ! -z "${GMM_DEVEL_VERSION}" ]; then perl -pi -e "s/^ intel-gmmlib-devel(?=,|$)/ intel-gmmlib-devel (=$GMM_DEVEL_VERSION)/" "$BUILD_DIR/debian/control" fi IGC_VERSION=$(apt-cache policy intel-igc-opencl | grep Installed | cut -f2- -d ':' | xargs) if [ ! -z "${IGC_VERSION}" ]; then perl -pi -e "s/^ intel-igc-opencl(?=,|$)/ intel-igc-opencl (=$IGC_VERSION)/" "$BUILD_DIR/debian/control" fi IGC_DEVEL_VERSION=$(apt-cache policy intel-igc-opencl-devel | grep Installed | cut -f2- -d ':' | xargs) if [ ! -z "${IGC_DEVEL_VERSION}" ]; then perl -pi -e "s/^ intel-igc-opencl-devel(?=,|$)/ intel-igc-opencl-devel (=$IGC_DEVEL_VERSION)/" "$BUILD_DIR/debian/control" fi fi #needs a top level CMAKE file cat << EOF | tee $BUILD_DIR/CMakeLists.txt cmake_minimum_required (VERSION 3.2 FATAL_ERROR) project(neo) add_subdirectory($REPO_DIR neo) EOF ( cd $BUILD_DIR if [ "${LOG_CCACHE_STATS}" == "1" ]; then ccache -z fi export DEB_BUILD_OPTIONS="nodocs notest nocheck" export DH_VERBOSE=1 if [ "${CMAKE_BUILD_TYPE}" != "Release" ]; then export DH_INTERNAL_BUILDFLAGS=1 fi if [ "${ENABLE_ULT}" == "0" ]; then NEO_SKIP_UNIT_TESTS="TRUE" fi if [ "${TARGET_ARCH}" == "aarch64" ]; then NEO_SKIP_UNIT_TESTS="TRUE" export NEO_DISABLE_BUILTINS_COMPILATION="TRUE" fi export NEO_SKIP_UNIT_TESTS dch -v ${PKG_VERSION} -m "build $PKG_VERSION" -b dpkg-buildpackage -j`nproc --all` -us -uc -b -rfakeroot sudo dpkg -i --force-depends ../*.deb if [ "${LOG_CCACHE_STATS}" == "1" ]; then ccache -s ccache -s | grep 'cache hit rate' | cut -d ' ' -f 4- | xargs -I{} echo OpenCL {} >> $REPO_DIR/../output/logs/ccache.log fi ) mkdir -p ${REPO_DIR}/../output/dbgsym mv ${REPO_DIR}/../*.deb ${REPO_DIR}/../output/ find ${REPO_DIR}/.. -maxdepth 1 -name \*.ddeb -type f -print0 | xargs -0r mv -t ${REPO_DIR}/../output/dbgsym/ compute-runtime-22.14.22890/scripts/packaging/opencl/build_opencl_rpm.sh000077500000000000000000000077301422164147700261300ustar00rootroot00000000000000#!/usr/bin/env bash # # Copyright (C) 2021-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set -ex DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" REPO_DIR="$( cd "$( dirname "${DIR}/../../../../" )" && pwd )" BUILD_DIR="${REPO_DIR}/../build_opencl" ENABLE_OPENCL="${ENABLE_OPENCL:-1}" if [ "${ENABLE_OPENCL}" == "0" ]; then exit 0 fi BUILD_SRPM="${BUILD_SRPM:-1}" BUILD_RPM="${BUILD_RPM:-1}" SPEC_FILE="${SPEC_FILE:-${OS_TYPE}}" if [[ ${SPEC_FILE} == rhel* ]]; then SPEC_FILE=${SPEC_FILE%.*} fi export BUILD_ID="${BUILD_ID:-1}" export CMAKE_BUILD_TYPE="${CMAKE_BUILD_TYPE:-Release}" ( if [ "${BUILD_SRPM}" == "1" ]; then BRANCH_SUFFIX="$( cat ${REPO_DIR}/.branch )" PACKAGING_DIR="$REPO_DIR/scripts/packaging/opencl/${SPEC_FILE}" SPEC_SRC="$PACKAGING_DIR/SPECS/opencl.spec" SPEC="$BUILD_DIR/SPECS/opencl.spec" COPYRIGHT="${REPO_DIR}/scripts/packaging/${BRANCH_SUFFIX}/opencl/${SPEC_FILE}/copyright" build_args=() if [ "${CMAKE_BUILD_TYPE}" == "Debug" ]; then export CFLAGS=" " export CXXFLAGS=" " export FFLAGS=" " build_args+=(--define 'name_suffix -debug') fi source "${REPO_DIR}/scripts/packaging/${BRANCH_SUFFIX}/functions.sh" source "${REPO_DIR}/scripts/packaging/${BRANCH_SUFFIX}/opencl/opencl.sh" get_opencl_version # NEO_OCL_VERSION_MAJOR.NEO_OCL_VERSION_MINOR.NEO_OCL_VERSION_BUILD get_api_version # API_VERSION-API_VERSION_SRC and API_RPM_MODEL_LINK VERSION="${NEO_OCL_VERSION_MAJOR}.${NEO_OCL_VERSION_MINOR}.${NEO_OCL_VERSION_BUILD}.${API_VERSION}" RELEASE="${API_VERSION_SRC}${API_RPM_MODEL_LINK}" RELEASE_WITH_REGKEYS="${RELEASE_WITH_REGKEYS:-FALSE}" #setup rpm build tree rm -rf $BUILD_DIR mkdir -p $BUILD_DIR/{BUILD,BUILDROOT,RPMS,SOURCES,SPECS,SRPMS} tar -c -I 'xz -6 -T0' -f $BUILD_DIR/SOURCES/compute-runtime-$VERSION.tar.xz -C $REPO_DIR --transform "s,${REPO_DIR:1},compute-runtime-$VERSION," --exclude=.git\* $REPO_DIR cp $COPYRIGHT $BUILD_DIR/SOURCES/ cp $SPEC_SRC $BUILD_DIR/SPECS/ PATCH_SPEC="${REPO_DIR}/scripts/packaging/${BRANCH_SUFFIX}/patch_spec.sh" if [ -f "$PATCH_SPEC" ]; then source "$PATCH_SPEC" fi if [ -z "${BRANCH_SUFFIX}" ]; then sed -i '/^Epoch: /d' ${SPEC} fi # Update spec file with new version perl -pi -e "s/^%global NEO_OCL_VERSION_MAJOR .*/%global NEO_OCL_VERSION_MAJOR ${NEO_OCL_VERSION_MAJOR}/" $BUILD_DIR/SPECS/opencl.spec perl -pi -e "s/^%global NEO_OCL_VERSION_MINOR .*/%global NEO_OCL_VERSION_MINOR ${NEO_OCL_VERSION_MINOR}/" $BUILD_DIR/SPECS/opencl.spec perl -pi -e "s/^%global NEO_OCL_VERSION_BUILD .*/%global NEO_OCL_VERSION_BUILD ${NEO_OCL_VERSION_BUILD}/" $BUILD_DIR/SPECS/opencl.spec perl -pi -e "s/^%global NEO_RELEASE_WITH_REGKEYS .*/%global NEO_RELEASE_WITH_REGKEYS ${RELEASE_WITH_REGKEYS}/" $BUILD_DIR/SPECS/opencl.spec perl -pi -e "s/^%global rel .*/%global rel ${RELEASE}/" $SPEC perl -pi -e "s/^%global ver .*/%global ver ${VERSION}/" $SPEC rpmbuild --define "_topdir $BUILD_DIR" -bs $SPEC --define 'build_type ${CMAKE_BUILD_TYPE}' "${build_args[@]}" mkdir -p ${REPO_DIR}/../output/SRPMS echo -n ${VERSION} > ${REPO_DIR}/../output/.opencl.version cp -v $BUILD_DIR/SRPMS/*.rpm ${REPO_DIR}/../output/SRPMS/ fi ) if [ "${BUILD_RPM}" == "1" ]; then LOG_CCACHE_STATS="${LOG_CCACHE_STATS:-0}" rm -rf $BUILD_DIR mkdir -p $BUILD_DIR/{BUILD,BUILDROOT,RPMS,SOURCES,SPECS,SRPMS} build_args=() build_args+=(--define "_topdir $BUILD_DIR") VERSION=$(cat ${REPO_DIR}/../output/.opencl.version) if [ "${LOG_CCACHE_STATS}" == "1" ]; then ccache -z fi rpmbuild --rebuild ${REPO_DIR}/../output/SRPMS/intel-opencl-${VERSION}*.src.rpm "${build_args[@]}" if [ "${LOG_CCACHE_STATS}" == "1" ]; then ccache -s ccache -s | grep 'cache hit rate' | cut -d ' ' -f 4- | xargs -I{} echo OpenCL {} >> $REPO_DIR/../output/logs/ccache.log fi sudo rpm -Uvh --force $BUILD_DIR/RPMS/*/*.rpm cp $BUILD_DIR/RPMS/*/*.rpm $REPO_DIR/../output/ fi compute-runtime-22.14.22890/scripts/packaging/opencl/opencl.sh000077500000000000000000000025311422164147700240650ustar00rootroot00000000000000#!/usr/bin/env bash # # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # set -ex get_opencl_version() { commit_time=$(git -C ${REPO_DIR} show -s --format=%ct) commit_ww=($(${REPO_DIR}/scripts/neo_ww_calculator.py ${commit_time})) date_m=${commit_ww[1]} __NEO_OCL_VERSION_MAJOR_TMP=$(echo $commit_ww | awk -F '.' '{print $1;}') NEO_OCL_VERSION_MAJOR="${NEO_OCL_VERSION_MAJOR:-$__NEO_OCL_VERSION_MAJOR_TMP}" unset __NEO_OCL_VERSION_MAJOR_TMP __NEO_OCL_VERSION_MINOR_TMP=$(echo $commit_ww | awk -F '.' '{print $2;}') NEO_OCL_VERSION_MINOR="${NEO_OCL_VERSION_MINOR:-$__NEO_OCL_VERSION_MINOR_TMP}" unset __NEO_OCL_VERSION_MINOR_TMP __NEO_TAG_TMP=$(git -C ${REPO_DIR} describe --abbrev=1 --tags | awk -F"." '{ nn=split($NF, nfa, "."); if(nn==2) {printf("%s-%s", nfa[1], nfa[2]);} else {print $NF;} }') NEO_TAG="${NEO_TAG:-$__NEO_TAG_TMP}" unset __NEO_TAG_TMP __NEO_OCL_VERSION_BUILD_TMP=$(echo $NEO_TAG | awk -F '-' '{ print $1; }' | sed 's/^0*//') NEO_OCL_VERSION_BUILD="${NEO_OCL_VERSION_BUILD:-$__NEO_OCL_VERSION_BUILD_TMP}" unset __NEO_OCL_VERSION_BUILD_TMP __NEO_OCL_VERSION_HOTFIX_TMP=$(echo $NEO_TAG | awk -F '-' '{ if(NF>1) {printf(".%s", $2);} }') NEO_OCL_VERSION_HOTFIX="${NEO_OCL_VERSION_HOTFIX:-$__NEO_OCL_VERSION_HOTFIX_TMP}" unset __NEO_OCL_VERSION_HOTFIX_TMP } compute-runtime-22.14.22890/scripts/packaging/opencl/rhel_8/000077500000000000000000000000001422164147700234265ustar00rootroot00000000000000compute-runtime-22.14.22890/scripts/packaging/opencl/rhel_8/SPECS/000077500000000000000000000000001422164147700243035ustar00rootroot00000000000000compute-runtime-22.14.22890/scripts/packaging/opencl/rhel_8/SPECS/opencl.spec000066400000000000000000000061501422164147700264410ustar00rootroot00000000000000#it's changed by external script %global rel i1 %global ver xxx %global NEO_OCL_VERSION_MAJOR xxx %global NEO_OCL_VERSION_MINOR xxx %global NEO_OCL_VERSION_BUILD xxx %global NEO_RELEASE_WITH_REGKEYS FALSE %define _source_payload w5T16.xzdio %define _binary_payload w5T16.xzdio Name: intel-opencl Epoch: 1 Version: %{ver} Release: %{rel}%{?dist} Summary: Intel(R) Graphics Compute Runtime for OpenCL(TM) Group: System Environment/Libraries License: MIT URL: https://github.com/intel/compute-runtime Source0: %{url}/archive/%{version}/compute-runtime-%{version}.tar.xz Source1: copyright Requires: intel-gmmlib Requires: intel-igc-opencl BuildRequires: make libva-devel gcc-c++ cmake BuildRequires: intel-gmmlib-devel BuildRequires: intel-igc-opencl-devel %description Intel(R) Graphics Compute Runtime for OpenCL(TM) is a open source project to converge Intel's development efforts on OpenCL(TM) compute stacks supporting the GEN graphics hardware architecture. %package -n intel-ocloc Summary: ocloc package for opencl Requires: intel-igc-opencl %description -n intel-ocloc Intel(R) Graphics Compute Runtime for OpenCL(TM) is a open source project to converge Intel's development efforts on OpenCL(TM) compute stacks supporting the GEN graphics hardware architecture. %define debug_package %{nil} %prep %autosetup -p1 -n compute-runtime-%{ver} %build mkdir build cd build %cmake .. \ -DNEO_OCL_VERSION_MAJOR=%{NEO_OCL_VERSION_MAJOR} \ -DNEO_OCL_VERSION_MINOR=%{NEO_OCL_VERSION_MINOR} \ -DNEO_VERSION_BUILD=%{NEO_OCL_VERSION_BUILD} \ -DCMAKE_BUILD_TYPE=Release \ -DBUILD_WITH_L0=FALSE \ -DNEO_SKIP_UNIT_TESTS=TRUE \ -DNEO_ENABLE_i915_PRELIM_DETECTION=TRUE \ -DCMAKE_INSTALL_PREFIX=/usr \ -DRELEASE_WITH_REGKEYS=%{NEO_RELEASE_WITH_REGKEYS} %make_build %install cd build %make_install chmod +x %{buildroot}/%{_libdir}/intel-opencl/libigdrcl.so chmod +x %{buildroot}/%{_libdir}/libocloc.so rm -f %{buildroot}/%{_libdir}/intel-opencl/libigdrcl.so.debug rm -f %{buildroot}/%{_libdir}/libocloc.so.debug rm -rf %{buildroot}/usr/lib/debug/ #insert license into package mkdir -p %{buildroot}/usr/share/doc/intel-opencl/ cp -pR %{_sourcedir}/copyright %{buildroot}/usr/share/doc/intel-opencl/. mkdir -p %{buildroot}/usr/share/doc/intel-ocloc/ cp -pR %{_sourcedir}/copyright %{buildroot}/usr/share/doc/intel-ocloc/. %files %defattr(-,root,root) %{_libdir}/intel-opencl/libigdrcl.so %config(noreplace) /etc/OpenCL/vendors/intel.icd /usr/share/doc/intel-opencl/copyright %files -n intel-ocloc %defattr(-,root,root) %{_bindir}/ocloc %{_libdir}/libocloc.so %{_includedir}/ocloc_api.h %config(noreplace) /usr/share/doc/intel-ocloc/copyright %doc %changelog * Wed May 6 2020 Pavel Androniychuk - 20.17.16650 - Update spec files to pull version automatically. * Tue Apr 28 2020 Jacek Danecki - 20.16.16582-1 - Update to 20.16.16582 * Tue Mar 03 2020 Jacek Danecki - 20.08.15750-1 - Update to 20.08.15750 * Tue Jan 14 2020 Jacek Danecki - 20.01.15264-1 - Update to 20.01.15264 - Updated IGC - Updated gmmlib compute-runtime-22.14.22890/scripts/packaging/opencl/rhel_8/copyright000066400000000000000000000045371422164147700253720ustar00rootroot00000000000000--- MIT License Copyright (c) 2018-2021 Intel Corporation Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. --- Copyright (c) 2008-2015 The Khronos Group Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and/or associated documentation files (the "Materials"), to deal in the Materials without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Materials, and to permit persons to whom the Materials are furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Materials. MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. compute-runtime-22.14.22890/scripts/packaging/opencl/sles_15.3/000077500000000000000000000000001422164147700236615ustar00rootroot00000000000000compute-runtime-22.14.22890/scripts/packaging/opencl/sles_15.3/SPECS/000077500000000000000000000000001422164147700245365ustar00rootroot00000000000000compute-runtime-22.14.22890/scripts/packaging/opencl/sles_15.3/SPECS/opencl.spec000066400000000000000000000057241422164147700267020ustar00rootroot00000000000000# spec file for package intel-opencl #it's changed by external script %global rel i1 %global ver xxx %global NEO_OCL_VERSION_MAJOR xxx %global NEO_OCL_VERSION_MINOR xxx %global NEO_OCL_VERSION_BUILD xxx %global NEO_RELEASE_WITH_REGKEYS FALSE %define gmmlib_sover 12 %define igc_sover 1 %if !0%{?build_type:1} %define build_type Release %endif %define _source_payload w5T16.xzdio %define _binary_payload w5T16.xzdio Name: intel-opencl Epoch: 1 Version: %{ver} Release: %{rel}%{?dist} Summary: Intel(R) Graphics Compute Runtime for OpenCL(TM) License: MIT Group: System Environment/Libraries Url: https://github.com/intel/compute-runtime Source0: %{url}/archive/%{version}/compute-runtime-%{version}.tar.xz Source1: copyright ExclusiveArch: x86_64 BuildRequires: cmake make gcc-c++ #BuildRequires: libva-devel BuildRequires: libigdgmm%{?name_suffix}-devel BuildRequires: libigdfcl%{?name_suffix}-devel Requires: libigc%{igc_sover}%{?name_suffix} Requires: libigdfcl%{igc_sover}%{?name_suffix} Requires: libigdgmm%{gmmlib_sover}%{?name_suffix} %description Intel(R) Graphics Compute Runtime for OpenCL(TM) is a open source project to converge Intel's development efforts on OpenCL(TM) compute stacks supporting the GEN graphics hardware architecture. %package -n intel-ocloc%{?name_suffix} Summary: ocloc package for opencl %description -n intel-ocloc%{?name_suffix} %debug_package %prep %autosetup -p1 -n compute-runtime-%{version} %build %cmake .. \ -DNEO_OCL_VERSION_MAJOR=%{NEO_OCL_VERSION_MAJOR} \ -DNEO_OCL_VERSION_MINOR=%{NEO_OCL_VERSION_MINOR} \ -DNEO_VERSION_BUILD=%{NEO_OCL_VERSION_BUILD} \ -DCMAKE_BUILD_TYPE=%{build_type} \ -DBUILD_WITH_L0=FALSE \ -DCMAKE_INSTALL_PREFIX=/usr \ -DNEO_SKIP_UNIT_TESTS=TRUE \ -DNEO_ENABLE_i915_PRELIM_DETECTION=TRUE \ -DRELEASE_WITH_REGKEYS=%{NEO_RELEASE_WITH_REGKEYS} \ -Wno-dev %make_build %install cd build %make_install chmod +x %{buildroot}/%{_libdir}/intel-opencl/libigdrcl.so rm -f %{buildroot}/%{_libdir}/intel-opencl/libigdrcl.so.debug rm -f %{buildroot}/%{_libdir}/libocloc.so.debug rm -rf %{buildroot}/usr/lib/debug/ #insert license into package mkdir -p %{buildroot}/usr/share/doc/intel-opencl%{?name_suffix}/ cp -pR %{_sourcedir}/copyright %{buildroot}/usr/share/doc/intel-opencl%{?name_suffix}/. mkdir -p %{buildroot}/usr/share/doc/intel-ocloc%{?name_suffix}/ cp -pR %{_sourcedir}/copyright %{buildroot}/usr/share/doc/intel-ocloc%{?name_suffix}/. %files -n intel-opencl%{?name_suffix} %defattr(-,root,root) %{_sysconfdir}/OpenCL %{_libdir}/intel-opencl/libigdrcl.so /usr/share/doc/intel-opencl%{?name_suffix}/copyright %files -n intel-ocloc%{?name_suffix} %defattr(-,root,root) %{_bindir}/ocloc %{_libdir}/libocloc.so %{_includedir}/ocloc_api.h /usr/share/doc/intel-ocloc%{?name_suffix}/copyright %changelog * Mon Sep 13 2021 Compute-Runtime-Automation - Initial spec file for SLES 15.3 compute-runtime-22.14.22890/scripts/packaging/opencl/sles_15.3/copyright000066400000000000000000000045371422164147700256250ustar00rootroot00000000000000--- MIT License Copyright (c) 2018-2021 Intel Corporation Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. --- Copyright (c) 2008-2015 The Khronos Group Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and/or associated documentation files (the "Materials"), to deal in the Materials without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Materials, and to permit persons to whom the Materials are furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Materials. MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. compute-runtime-22.14.22890/scripts/packaging/opencl/ubuntu_20.04/000077500000000000000000000000001422164147700243125ustar00rootroot00000000000000compute-runtime-22.14.22890/scripts/packaging/opencl/ubuntu_20.04/control000066400000000000000000000015651422164147700257240ustar00rootroot00000000000000Source: intel-compute-runtime Section: libs Priority: optional Maintainer: Intel Graphics Team XSBC-Original-Maintainer: Debian OpenCL Maintainers Build-Depends: debhelper (>= 11), cmake, intel-igc-opencl-devel, intel-gmmlib-devel, # libva-dev, pkg-config Standards-Version: 4.3.0 Homepage: https://github.com/intel/compute-runtime Package: intel-opencl-icd Architecture: amd64 arm64 Depends: ${shlibs:Depends}, ${misc:Depends}, intel-igc-opencl, intel-gmmlib, ocl-icd-libopencl1, Breaks: intel-opencl Replaces: intel-opencl Provides: opencl-icd Description: Intel graphics compute runtime for OpenCL The Intel(R) Graphics Compute Runtime for OpenCL(TM) is a open source project to converge Intel's development efforts on OpenCL(TM) compute stacks supporting the GEN graphics hardware architecture. compute-runtime-22.14.22890/scripts/packaging/opencl/ubuntu_20.04/copyright000066400000000000000000000117411422164147700262510ustar00rootroot00000000000000Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ Upstream-Name: compute-runtime Source: https://github.com/intel/compute-runtime Files: * Copyright: 2018-2020 Intel Corporation License: MIT Files: third_party/gtest/* Copyright: 2005-2008 Google Inc. License: BSD-3-clause Files: third_party/opencl_headers/* third_party/opengl_headers/GL/glext.h Copyright: 2008-2015 The Khronos Group Inc. 2008-2020 Intel Corporation License: MIT Files: third_party/opengl_headers/GL/gl.h Copyright: 1991-2000 Silicon Graphics, Inc. License: SGI Files: third_party/source_level_debugger/* Copyright: 2014-2016 Intel Corporation License: BSD-3-clause Files: third_party/uapi/* Copyright: 1999 Precision Insight, Inc. 2000 VA Linux Systems, Inc. 2007 Dave Airlie 2007 Jakob Bornecrantz 2008 Red Hat Inc. 2003-2008 Tungsten Graphics, Inc. 2007-2008 Intel Corporation License: MIT Files: debian/* Copyright: 2019 Timo Aaltonen License: MIT License: MIT Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: . The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. . THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. License: BSD-3-clause Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: . * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. . THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. License: SGI License Applicability. Except to the extent portions of this file are made subject to an alternative license as permitted in the SGI Free Software License B, Version 1.1 (the "License"), the contents of this file are subject only to the provisions of the License. You may not use this file except in compliance with the License. You may obtain a copy of the License at Silicon Graphics, Inc., attn: Legal Services, 1600 Amphitheatre Parkway, Mountain View, CA 94043-1351, or at: . http://oss.sgi.com/projects/FreeB . Note that, as provided in the License, the Software is distributed on an "AS IS" basis, with ALL EXPRESS AND IMPLIED WARRANTIES AND CONDITIONS DISCLAIMED, INCLUDING, WITHOUT LIMITATION, ANY IMPLIED WARRANTIES AND CONDITIONS OF MERCHANTABILITY, SATISFACTORY QUALITY, FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. . Original Code. The Original Code is: OpenGL Sample Implementation, Version 1.2.1, released January 26, 2000, developed by Silicon Graphics, Inc. The Original Code is Copyright (c) 1991-2000 Silicon Graphics, Inc. Copyright in any portions created by third parties is as indicated elsewhere herein. All Rights Reserved. . Additional Notice Provisions: This software was created using the OpenGL(R) version 1.2.1 Sample Implementation published by SGI, but has not been independently verified as being compliant with the OpenGL(R) version 1.2.1 Specification. compute-runtime-22.14.22890/scripts/packaging/opencl/ubuntu_20.04/debian/000077500000000000000000000000001422164147700255345ustar00rootroot00000000000000compute-runtime-22.14.22890/scripts/packaging/opencl/ubuntu_20.04/debian/changelog000066400000000000000000000002601422164147700274040ustar00rootroot00000000000000intel-compute-runtime (19.29.13530-1) unstable; urgency=medium * Initial release (Closes: #926706) -- Timo Aaltonen Thu, 01 Aug 2019 12:26:37 +0300 compute-runtime-22.14.22890/scripts/packaging/opencl/ubuntu_20.04/debian/compat000066400000000000000000000000031422164147700267330ustar00rootroot0000000000000011 compute-runtime-22.14.22890/scripts/packaging/opencl/ubuntu_20.04/debian/rules000077500000000000000000000020361422164147700266150ustar00rootroot00000000000000#!/usr/bin/make -f %: dh $@ --builddir build/ --buildsystem=cmake+ninja NEO_DISABLE_BUILTINS_COMPILATION ?= FALSE RELEASE_WITH_REGKEYS ?= FALSE IGDRCL_FORCE_USE_LIBVA ?= FALSE NEO_SKIP_UNIT_TESTS ?= FALSE NEO_ENABLE_i915_PRELIM_DETECTION ?= TRUE override_dh_auto_configure: dh_auto_configure -- ${NEO_BUILD_EXTRA_OPTS} \ -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \ -DNEO_OCL_VERSION_MAJOR=${NEO_OCL_VERSION_MAJOR} \ -DNEO_OCL_VERSION_MINOR=${NEO_OCL_VERSION_MINOR} \ -DNEO_VERSION_BUILD=${NEO_OCL_VERSION_BUILD} \ -DDO_NOT_RUN_AUB_TESTS=${DO_NOT_RUN_AUB_TESTS} \ -DNEO_SKIP_UNIT_TESTS=${NEO_SKIP_UNIT_TESTS} \ -DNEO_ENABLE_i915_PRELIM_DETECTION=${NEO_ENABLE_i915_PRELIM_DETECTION} \ -DNEO_DISABLE_BUILTINS_COMPILATION=${NEO_DISABLE_BUILTINS_COMPILATION} \ -DBUILD_WITH_L0=FALSE \ -DRELEASE_WITH_REGKEYS=${RELEASE_WITH_REGKEYS} \ -DIGDRCL_FORCE_USE_LIBVA=${IGDRCL_FORCE_USE_LIBVA} \ -Wno-dev override_dh_auto_build: ${BUILD_PREFIX} dh_auto_build override_dh_install: rm -rvf debian/intel-opencl-icd/usr/lib/debug dh_install compute-runtime-22.14.22890/scripts/packaging/opencl/ubuntu_20.04/debian/source/000077500000000000000000000000001422164147700270345ustar00rootroot00000000000000compute-runtime-22.14.22890/scripts/packaging/opencl/ubuntu_20.04/debian/source/format000066400000000000000000000000141422164147700302420ustar00rootroot000000000000003.0 (quilt) compute-runtime-22.14.22890/scripts/packaging/opencl/ubuntu_20.04/shlibs.local000066400000000000000000000000641422164147700266120ustar00rootroot00000000000000libigdgmm 11 intel-gmmlib libigdgmm 12 intel-gmmlib compute-runtime-22.14.22890/scripts/tests/000077500000000000000000000000001422164147700202035ustar00rootroot00000000000000compute-runtime-22.14.22890/scripts/tests/copyright/000077500000000000000000000000001422164147700222135ustar00rootroot00000000000000compute-runtime-22.14.22890/scripts/tests/copyright/out/000077500000000000000000000000001422164147700230225ustar00rootroot00000000000000compute-runtime-22.14.22890/scripts/tests/copyright/out/file1.cpp000066400000000000000000000001641422164147700245270ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ /* * No copyright at all */ compute-runtime-22.14.22890/scripts/tests/copyright/out/file1.sh000066400000000000000000000001431422164147700243540ustar00rootroot00000000000000#!/bin/bash # # Copyright (C) 2022 Intel Corporation # # SPDX-License-Identifier: MIT # echo 123 compute-runtime-22.14.22890/scripts/tests/copyright/out/file2.cpp000066400000000000000000000001321422164147700245230ustar00rootroot00000000000000/* * Copyright (C) 2017-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ compute-runtime-22.14.22890/scripts/tests/copyright/out/file2.sh000066400000000000000000000001211422164147700243510ustar00rootroot00000000000000# # Copyright (C) 2017-2022 Intel Corporation # # SPDX-License-Identifier: MIT # compute-runtime-22.14.22890/scripts/tests/copyright/out/file3.cpp000066400000000000000000000002011422164147700245210ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ // // This comment shouldn't be removed // compute-runtime-22.14.22890/scripts/tests/copyright/out/file3.sh000066400000000000000000000001651422164147700243620ustar00rootroot00000000000000# # Copyright (C) 2022 Intel Corporation # # SPDX-License-Identifier: MIT # # # This comment shouldn't be removed # compute-runtime-22.14.22890/scripts/tests/copyright/out/file4.cpp000066400000000000000000000002211422164147700245240ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ /* * No copyright at all */ #include "file.h" class C; compute-runtime-22.14.22890/scripts/tests/copyright/out/file4.sh000066400000000000000000000001761422164147700243650ustar00rootroot00000000000000# # Copyright (C) 2022 Intel Corporation # # SPDX-License-Identifier: MIT # # # No copyright at all # echo "file.h" exit 1 compute-runtime-22.14.22890/scripts/tests/copyright/out/file5.cpp000066400000000000000000000001321422164147700245260ustar00rootroot00000000000000/* * Copyright (C) 2012-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ compute-runtime-22.14.22890/scripts/tests/copyright/out/file6.cpp000066400000000000000000000002101422164147700245240ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once // header file with # in first line compute-runtime-22.14.22890/scripts/tests/copyright/out/file7.cpp000066400000000000000000000002161422164147700245330ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include // header file with # in first line compute-runtime-22.14.22890/scripts/verify.bat000066400000000000000000000006751422164147700210450ustar00rootroot00000000000000:: :: Copyright (C) 2020-2021 Intel Corporation :: :: SPDX-License-Identifier: MIT :: @where appverif @if not "%ERRORLEVEL%"=="0" ( @echo No appverif command. cmd /c exit /b 0 set testError=0 goto end ) appverif.exe -enable Exceptions Handles Heaps Leak Locks Memory Threadpool TLS DirtyStacks -for %1 %* set testError=%errorlevel% echo App Verifier returned: %testError% appverif.exe -disable * -for * > nul :end exit /b %testError% compute-runtime-22.14.22890/shared/000077500000000000000000000000001422164147700166205ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/CMakeLists.txt000066400000000000000000000005341422164147700213620ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # add_subdirectory(source) add_subdirectory(generate_cpp_array) add_subdirectory(test/unit_test) set(TOOLS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/tools${BRANCH_DIR_SUFFIX}") if(EXISTS ${TOOLS_DIR} AND EXISTS ${TOOLS_DIR}/CMakeLists.txt) add_subdirectory(${TOOLS_DIR}) endif() compute-runtime-22.14.22890/shared/generate_cpp_array/000077500000000000000000000000001422164147700224525ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/generate_cpp_array/CMakeLists.txt000066400000000000000000000006331422164147700252140ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(SHARED_PROJECTS_FOLDER "neo shared") set(CPP_GENERATE_TOOL_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/source/generate_cpp_array.cpp ) add_executable(cpp_generate_tool "${CPP_GENERATE_TOOL_SOURCES}") set_target_properties(cpp_generate_tool PROPERTIES FOLDER "${SHARED_PROJECTS_FOLDER}") compute-runtime-22.14.22890/shared/generate_cpp_array/source/000077500000000000000000000000001422164147700237525ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/generate_cpp_array/source/generate_cpp_array.cpp000066400000000000000000000114151422164147700303120ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include #include #include #include #include constexpr int MIN_ARG_COUNT = 7; static void show_usage(std::string name) { std::cerr << "Usage " << name << " - ALL BUT -p, --platform MUST BE SPECIFIED\n" << "Options :\n" << "\t -f, --file\t\tA file which content will be parsed into a uint32_t array in a .cpp file\n" << "\t -o, --out\t\t.Cpp output file name\n" << "\t -p, --platform\t\tOPTIONAL - Family name with type\n" << "\t -a, --array\t\tName of an uin32_t type array containing parsed input file" << std::endl; } std::string parseToCharArray(std::unique_ptr &binary, size_t size, std::string &builtinName, std::string &platform, std::string revisionId, bool isSpirV) { std::ostringstream out; out << "#include \n"; out << "#include \n\n"; out << "size_t " << builtinName << "BinarySize_" << platform << "_" << revisionId << " = " << size << ";\n"; out << "uint32_t " << builtinName << "Binary_" << platform << "_" << revisionId << "[" << (size + 3) / 4 << "] = {" << std::endl << " "; uint32_t *binaryUint = reinterpret_cast(binary.get()); for (size_t i = 0; i < (size + 3) / 4; i++) { if (i != 0) { out << ", "; if (i % 8 == 0) { out << std::endl << " "; } } if (i < size / 4) { out << "0x" << std::hex << std::setw(8) << std::setfill('0') << binaryUint[i]; } else { uint32_t lastBytes = size & 0x3; uint32_t lastUint = 0; uint8_t *pLastUint = (uint8_t *)&lastUint; for (uint32_t j = 0; j < lastBytes; j++) { pLastUint[sizeof(uint32_t) - 1 - j] = binary.get()[i * 4 + j]; } out << "0x" << std::hex << std::setw(8) << std::setfill('0') << lastUint; } } out << "};" << std::endl; out << std::endl << "#include \"shared/source/built_ins/registry/built_ins_registry.h\"\n" << std::endl; out << "namespace NEO {" << std::endl; out << "static RegisterEmbeddedResource register" << builtinName; isSpirV ? out << "Ir(" : out << "Bin("; out << std::endl; out << " \""; platform != "" ? out << platform << "_" << revisionId << "_" << builtinName : out << builtinName; isSpirV ? out << ".builtin_kernel.bc\"," : out << ".builtin_kernel.bin\","; out << std::endl; out << " (const char *)" << builtinName << "Binary_" << platform << "_" << revisionId << "," << std::endl; out << " " << builtinName << "BinarySize_" << platform << "_" << revisionId << ");" << std::endl; out << "}" << std::endl; return out.str(); } int main(int argc, char *argv[]) { if (argc < MIN_ARG_COUNT) { show_usage(argv[0]); return 1; } std::string fileName; std::string cppOutputName; std::string arrayName; std::string platform = ""; std::string revisionId = "0"; size_t size = 0; std::fstream inputFile; bool isSpirV; for (int i = 1; i < argc; i++) { std::string arg = argv[i]; if ((arg == "-f") || (arg == "--file")) { fileName = argv[++i]; } else if ((arg == "-o") || (arg == "--output")) { cppOutputName = argv[++i]; } else if ((arg == "-a") || (arg == "--array")) { arrayName = argv[++i]; } else if ((arg == "-p") || (arg == "--platform")) { platform = argv[++i]; } else if ((arg == "-r") || (arg == "--revision_id")) { revisionId = argv[++i]; } else { return 1; } } if (fileName.empty() || cppOutputName.empty() || arrayName.empty()) { std::cerr << "All three: fileName, cppOutputName and arrayName must be specified!" << std::endl; return 1; } inputFile.open(fileName.c_str(), std::ios::in | std::ios::binary | std::ios::ate); if (inputFile.is_open()) { size = static_cast(inputFile.tellg()); std::unique_ptr memblock = std::make_unique(size); inputFile.clear(); inputFile.seekg(0, std::ios::beg); inputFile.read(reinterpret_cast(memblock.get()), size); inputFile.close(); isSpirV = fileName.find(".spv") != std::string::npos; std::string cpp = parseToCharArray(memblock, size, arrayName, platform, revisionId, isSpirV); std::fstream(cppOutputName.c_str(), std::ios::out | std::ios::binary).write(cpp.c_str(), cpp.size()); } else { std::cerr << "File cannot be opened!" << std::endl; return 1; } return 0; } compute-runtime-22.14.22890/shared/offline_compiler/000077500000000000000000000000001422164147700221345ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/offline_compiler/CMakeLists.txt000066400000000000000000000002441422164147700246740ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # add_subdirectory(source) set(CLOC_LIB_SRCS_LIB ${CLOC_LIB_SRCS_LIB} PARENT_SCOPE) compute-runtime-22.14.22890/shared/offline_compiler/source/000077500000000000000000000000001422164147700234345ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/offline_compiler/source/CMakeLists.txt000066400000000000000000000267141422164147700262060ustar00rootroot00000000000000# # Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT # project(${OCLOC_NAME}_lib) set(OCLOC_NAME "ocloc") set(OCLOC_FOLDER_NAME "offline_compiler") set(CLOC_LIB_SRCS_LIB ${NEO_SHARED_DIRECTORY}/compiler_interface/compiler_options/compiler_options_base.cpp ${NEO_SHARED_DIRECTORY}/compiler_interface/create_main.cpp ${NEO_SHARED_DIRECTORY}/device_binary_format/ar/ar.h ${NEO_SHARED_DIRECTORY}/device_binary_format/ar/ar_decoder.cpp ${NEO_SHARED_DIRECTORY}/device_binary_format/ar/ar_decoder.h ${NEO_SHARED_DIRECTORY}/device_binary_format/ar/ar_encoder.cpp ${NEO_SHARED_DIRECTORY}/device_binary_format/ar/ar_encoder.h ${NEO_SHARED_DIRECTORY}/device_binary_format/elf/elf.h ${NEO_SHARED_DIRECTORY}/device_binary_format/elf/elf_decoder.cpp ${NEO_SHARED_DIRECTORY}/device_binary_format/elf/elf_decoder.h ${NEO_SHARED_DIRECTORY}/device_binary_format/elf/elf_encoder.cpp ${NEO_SHARED_DIRECTORY}/device_binary_format/elf/elf_encoder.h ${NEO_SHARED_DIRECTORY}/device_binary_format/elf/ocl_elf.h ${NEO_SHARED_DIRECTORY}/dll/devices${BRANCH_DIR_SUFFIX}devices.inl ${NEO_SHARED_DIRECTORY}/dll/devices${BRANCH_DIR_SUFFIX}devices_additional.inl ${NEO_SHARED_DIRECTORY}/dll/devices/devices_base.inl ${NEO_SHARED_DIRECTORY}/dll/devices${BRANCH_DIR_SUFFIX}/product_config.inl ${NEO_SHARED_DIRECTORY}/dll/devices/product_config_base.inl ${NEO_SHARED_DIRECTORY}/dll/devices${BRANCH_DIR_SUFFIX}platforms.h ${NEO_SHARED_DIRECTORY}/helpers/abort.cpp ${NEO_SHARED_DIRECTORY}/helpers/compiler_hw_info_config.h ${NEO_SHARED_DIRECTORY}/helpers/compiler_hw_info_config.cpp ${NEO_SHARED_DIRECTORY}/helpers/compiler_hw_info_config_base.inl ${NEO_SHARED_DIRECTORY}/helpers/compiler_hw_info_config_bdw_and_later.inl ${NEO_SHARED_DIRECTORY}/helpers/compiler_options_parser.cpp ${NEO_SHARED_DIRECTORY}/helpers/compiler_options_parser.h ${NEO_SHARED_DIRECTORY}/helpers/debug_helpers.cpp ${NEO_SHARED_DIRECTORY}/helpers/file_io.cpp ${NEO_SHARED_DIRECTORY}/helpers/hw_info.cpp ${NEO_SHARED_DIRECTORY}/helpers/hw_info.h ${NEO_SHARED_DIRECTORY}/helpers${BRANCH_DIR_SUFFIX}hw_info_extended.cpp ${NEO_SHARED_DIRECTORY}/kernel${BRANCH_DIR_SUFFIX}kernel_descriptor.cpp ${NEO_SHARED_DIRECTORY}/kernel${BRANCH_DIR_SUFFIX}kernel_descriptor.h ${NEO_SHARED_DIRECTORY}/os_interface/os_library.h ${NEO_SHARED_DIRECTORY}/compiler_interface/oclc_extensions.cpp ${NEO_SHARED_DIRECTORY}/compiler_interface/oclc_extensions.h ${OCLOC_DIRECTORY}/source/decoder/binary_decoder.cpp ${OCLOC_DIRECTORY}/source/decoder/binary_decoder.h ${OCLOC_DIRECTORY}/source/decoder/binary_encoder.cpp ${OCLOC_DIRECTORY}/source/decoder/binary_encoder.h ${OCLOC_DIRECTORY}/source/decoder/helper.cpp ${OCLOC_DIRECTORY}/source/decoder/helper.h ${OCLOC_DIRECTORY}/source/decoder/iga_wrapper.h ${OCLOC_DIRECTORY}/source/decoder/translate_platform_base.h ${OCLOC_DIRECTORY}/source/multi_command.cpp ${OCLOC_DIRECTORY}/source/multi_command.h ${OCLOC_DIRECTORY}/source/ocloc_api.cpp ${OCLOC_DIRECTORY}/source/ocloc_api.h ${OCLOC_DIRECTORY}/source/ocloc_arg_helper.cpp ${OCLOC_DIRECTORY}/source/ocloc_arg_helper.h ${OCLOC_DIRECTORY}/source/ocloc_error_code.h ${OCLOC_DIRECTORY}/source/ocloc_fatbinary.cpp ${OCLOC_DIRECTORY}/source/ocloc_fatbinary.h ${OCLOC_DIRECTORY}/source/ocloc_validator.cpp ${OCLOC_DIRECTORY}/source/ocloc_validator.h ${OCLOC_DIRECTORY}/source/offline_compiler.cpp ${OCLOC_DIRECTORY}/source/offline_compiler.h ${OCLOC_DIRECTORY}/source/offline_compiler_helper.cpp ${OCLOC_DIRECTORY}/source/offline_compiler_options.cpp ${OCLOC_DIRECTORY}/source/offline_linker.cpp ${OCLOC_DIRECTORY}/source/offline_linker.h ${OCLOC_DIRECTORY}/source/queries.h ${OCLOC_DIRECTORY}/source/utilities/get_git_version_info.h ${OCLOC_DIRECTORY}/source/utilities/get_git_version_info.cpp ${NEO_SOURCE_DIR}/shared/source/device_binary_format/device_binary_format_zebin.cpp ${NEO_SOURCE_DIR}/shared/source/device_binary_format/zebin_decoder.cpp ${NEO_SOURCE_DIR}/shared/source/device_binary_format/yaml/yaml_parser.cpp ) if(${IGA_HEADERS_AVAILABLE}) set(CLOC_LIB_SRCS_LIB ${CLOC_LIB_SRCS_LIB} ${OCLOC_DIRECTORY}/source/decoder/iga_wrapper.cpp ${OCLOC_DIRECTORY}/source/decoder${BRANCH_DIR_SUFFIX}translate_platform.cpp ) else() set(CLOC_LIB_SRCS_LIB ${CLOC_LIB_SRCS_LIB} ${OCLOC_DIRECTORY}/source/decoder/iga_stubs.cpp ) endif() if(WIN32) list(APPEND CLOC_LIB_SRCS_LIB ${NEO_SHARED_DIRECTORY}/dll/windows/options_windows.cpp ${NEO_SHARED_DIRECTORY}/os_interface/windows/os_library_win.cpp ${NEO_SHARED_DIRECTORY}/os_interface/windows/os_library_win.h ) else() list(APPEND CLOC_LIB_SRCS_LIB ${NEO_SHARED_DIRECTORY}/dll/linux/options_linux.cpp ${NEO_SHARED_DIRECTORY}/os_interface/linux/os_library_linux.cpp ${NEO_SHARED_DIRECTORY}/os_interface/linux/os_library_linux.h ${NEO_SHARED_DIRECTORY}/os_interface/linux/sys_calls_linux.cpp ${OCLOC_DIRECTORY}/source/linux/os_library_ocloc_helper.cpp ) endif() string(REPLACE ";" "," ALL_SUPPORTED_PRODUCT_FAMILIES "${ALL_SUPPORTED_PRODUCT_FAMILY}") set(CLOC_LIB_LIB_FLAGS_DEFINITIONS -DCIF_HEADERS_ONLY_BUILD -DALL_SUPPORTED_PRODUCT_FAMILIES=${ALL_SUPPORTED_PRODUCT_FAMILIES} ) set(NEO_SHARED_COREX_CPP_FILES hw_info ) macro(macro_for_each_platform) foreach(BRANCH_DIR ${BRANCH_DIR_LIST}) foreach(BRANCH ${BRANCH_DIR_LIST}) foreach(SRC_FILE ${NEO_SOURCE_DIR}/shared/source${BRANCH}${CORE_TYPE_LOWER}/definitions${BRANCH_DIR_SUFFIX}hw_info_setup_${PLATFORM_IT_LOWER}.inl ${NEO_SOURCE_DIR}/shared/source${BRANCH}${CORE_TYPE_LOWER}${BRANCH_DIR}hw_info_${PLATFORM_IT_LOWER}.cpp ${NEO_SOURCE_DIR}/shared/source${BRANCH}${CORE_TYPE_LOWER}/compiler_hw_info_config_${PLATFORM_IT_LOWER}.inl ) if(EXISTS ${SRC_FILE}) list(APPEND CLOC_LIB_SRCS_LIB ${SRC_FILE}) endif() endforeach() if(WIN32) set(SRC_FILE ${NEO_SOURCE_DIR}/shared/source${BRANCH}${CORE_TYPE_LOWER}${BRANCH_DIR}windows/hw_info_extra_${PLATFORM_IT_LOWER}.cpp) if(EXISTS ${SRC_FILE}) list(APPEND CLOC_LIB_SRCS_LIB ${SRC_FILE}) endif() else() set(SRC_FILE ${NEO_SOURCE_DIR}/shared/source${BRANCH}${CORE_TYPE_LOWER}${BRANCH_DIR}linux/hw_info_extra_${PLATFORM_IT_LOWER}.cpp) if(EXISTS ${SRC_FILE}) list(APPEND CLOC_LIB_SRCS_LIB ${SRC_FILE}) endif() endif() endforeach() endforeach() endmacro() macro(macro_for_each_core_type) foreach(SRC_IT ${NEO_SHARED_COREX_CPP_FILES}) foreach(BRANCH_DIR ${BRANCH_DIR_LIST}) foreach(BRANCH ${BRANCH_DIR_LIST}) set(SRC_FILE ${NEO_SOURCE_DIR}/shared/source${BRANCH}${CORE_TYPE_LOWER}${BRANCH_DIR}${SRC_IT}) if(EXISTS ${SRC_FILE}_${CORE_TYPE_LOWER}.cpp) list(APPEND CLOC_LIB_SRCS_LIB ${SRC_FILE}_${CORE_TYPE_LOWER}.cpp) endif() endforeach() endforeach() endforeach() apply_macro_for_each_platform() endmacro() apply_macro_for_each_core_type("SUPPORTED") set(CLOC_LIB_SRCS ${CLOC_LIB_SRCS_LIB} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) add_library(${OCLOC_NAME}_lib SHARED ${CLOC_LIB_SRCS}) add_subdirectories() create_project_source_tree(${OCLOC_NAME}_lib) set_target_properties(${OCLOC_NAME}_lib PROPERTIES FOLDER ${OCLOC_FOLDER_NAME}) set(CLOC_LIB_INCLUDES ${ENGINE_NODE_DIR} ${IGC_OCL_ADAPTOR_DIR} ${CIF_BASE_DIR} ${NEO__GMM_INCLUDE_DIR} ${KHRONOS_HEADERS_DIR} ${NEO__IGC_INCLUDE_DIR} ) target_include_directories(${OCLOC_NAME}_lib BEFORE PRIVATE ${CLOC_LIB_INCLUDES}) target_include_directories(${OCLOC_NAME}_lib BEFORE PRIVATE ${IGA_INCLUDE_DIR}) target_compile_definitions(${OCLOC_NAME}_lib PUBLIC ${CLOC_LIB_LIB_FLAGS_DEFINITIONS} ${SUPPORTED_CORE_FLAGS_DEFINITONS} DEFAULT_PLATFORM=${DEFAULT_SUPPORTED_PLATFORM} IGA_LIBRARY_NAME=${CMAKE_SHARED_LIBRARY_PREFIX}${IGA_LIBRARY_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX} ) target_compile_definitions(${OCLOC_NAME}_lib PUBLIC ${NEO__IGC_COMPILE_DEFINITIONS}) if(MSVC) target_link_libraries(${OCLOC_NAME}_lib dbghelp) endif() if(UNIX) target_link_libraries(${OCLOC_NAME}_lib dl pthread) endif() set(CLOC_LIB_SRCS_LIB ${CLOC_LIB_SRCS_LIB} PARENT_SCOPE) set(OCLOC_SRCS ${OCLOC_DIRECTORY}/source/ocloc_api.h ${OCLOC_DIRECTORY}/source/main.cpp ) add_executable(${OCLOC_NAME} ${OCLOC_SRCS}) target_link_libraries(${OCLOC_NAME} ${OCLOC_NAME}_lib) create_project_source_tree(${OCLOC_NAME}) set_target_properties(${OCLOC_NAME} PROPERTIES FOLDER ${OCLOC_FOLDER_NAME}) if(MSVC) if(CMAKE_SIZEOF_VOID_P EQUAL 4) set(OCLOC_BITNESS_SUFFIX 32) elseif(CMAKE_SIZEOF_VOID_P EQUAL 8) set(OCLOC_BITNESS_SUFFIX 64) endif() else() set(OCLOC_BITNESS_SUFFIX "") endif() if(UNIX) if(NEO_BUILD_DEBUG_SYMBOLS_PACKAGE) get_filename_component(lib_file_name $ NAME_WE) set(symbols_file_name ${lib_file_name}.debug) set(debug_symbols_target_name "${STRIP_SYMBOLS_TARGET}_${OCLOC_NAME}_lib") add_custom_target(${debug_symbols_target_name} COMMAND sh -c "objcopy --only-keep-debug ${lib_file_name} ${symbols_file_name}" COMMAND sh -c "strip -g ${lib_file_name}" COMMAND sh -c "objcopy --add-gnu-debuglink=${symbols_file_name} ${lib_file_name}" ) add_dependencies(${debug_symbols_target_name} ${OCLOC_NAME}_lib) add_dependencies(${STRIP_SYMBOLS_TARGET} ${debug_symbols_target_name}) set_property(GLOBAL APPEND PROPERTY DEBUG_SYMBOL_FILES "${symbols_file_name}") endif() set_property(GLOBAL APPEND PROPERTY NEO_OCL_COMPONENTS_LIST ${OCLOC_NAME}) install(FILES $ DESTINATION ${CMAKE_INSTALL_BINDIR} PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE COMPONENT ${OCLOC_NAME} ) install(FILES $ DESTINATION ${CMAKE_INSTALL_LIBDIR} PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE COMPONENT ${OCLOC_NAME} ) install(FILES ${OCLOC_DIRECTORY}/source/ocloc_api.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} PERMISSIONS OWNER_READ OWNER_WRITE GROUP_READ WORLD_READ COMPONENT ${OCLOC_NAME} ) elseif(WIN32) if(NEO_WINDOWS_INSTALL) install(TARGETS ${OCLOC_NAME} DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT ocloc ) install(TARGETS ${OCLOC_NAME}_lib DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT ocloc ) install(FILES ${OCLOC_DIRECTORY}/source/ocloc_api.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} COMPONENT ocloc ) endif() endif() set(OCLOC_OUTPUT_NAME "${OCLOC_NAME}${OCLOC_BITNESS_SUFFIX}") set_target_properties(${OCLOC_NAME}_lib PROPERTIES OUTPUT_NAME ${OCLOC_OUTPUT_NAME}) add_custom_target(copy_compiler_files DEPENDS ${NEO__IGC_TARGETS}) set_target_properties(copy_compiler_files PROPERTIES FOLDER ${OCLOC_FOLDER_NAME}) if(WIN32) foreach(TARGET_tmp ${NEO__IGC_TARGETS}) add_custom_command( TARGET copy_compiler_files PRE_BUILD COMMAND ${CMAKE_COMMAND} -E make_directory $ COMMAND ${CMAKE_COMMAND} -E copy_if_different $ $ ) endforeach() endif() compute-runtime-22.14.22890/shared/offline_compiler/source/decoder/000077500000000000000000000000001422164147700250415ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/offline_compiler/source/decoder/binary_decoder.cpp000066400000000000000000000535531422164147700305310ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/offline_compiler/source/decoder/binary_decoder.h" #include "shared/offline_compiler/source/decoder/helper.h" #include "shared/offline_compiler/source/offline_compiler.h" #include "shared/source/device_binary_format/device_binary_formats.h" #include "shared/source/device_binary_format/elf/elf_decoder.h" #include "shared/source/device_binary_format/elf/ocl_elf.h" #include "shared/source/helpers/file_io.h" #include "shared/source/helpers/ptr_math.h" #include #include #include #ifdef _WIN32 #include #define MakeDirectory _mkdir #else #include #define MakeDirectory(dir) mkdir(dir, 0777) #endif template T readUnaligned(const void *ptr) { T retVal = 0; const uint8_t *tmp1 = reinterpret_cast(ptr); uint8_t *tmp2 = reinterpret_cast(&retVal); for (uint8_t i = 0; i < sizeof(T); ++i) { *(tmp2++) = *(tmp1++); } return retVal; } int BinaryDecoder::decode() { parseTokens(); std::stringstream ptmFile; auto devBinPtr = getDevBinary(); if (devBinPtr == nullptr) { argHelper->printf("Error! Device Binary section was not found.\n"); exit(1); } return processBinary(devBinPtr, ptmFile); } void BinaryDecoder::dumpField(const void *&binaryPtr, const PTField &field, std::ostream &ptmFile) { ptmFile << '\t' << static_cast(field.size) << ' '; switch (field.size) { case 1: { auto val = readUnaligned(binaryPtr); ptmFile << field.name << " " << +val << '\n'; break; } case 2: { auto val = readUnaligned(binaryPtr); ptmFile << field.name << " " << val << '\n'; break; } case 4: { auto val = readUnaligned(binaryPtr); ptmFile << field.name << " " << val << '\n'; break; } case 8: { auto val = readUnaligned(binaryPtr); ptmFile << field.name << " " << val << '\n'; break; } default: argHelper->printf("Error! Unknown size.\n"); exit(1); } binaryPtr = ptrOffset(binaryPtr, field.size); } template bool isPatchtokensBinary(const ContainerT &data) { static constexpr NEO::ConstStringRef intcMagic = "CTNI"; auto binaryMagicLen = std::min(intcMagic.size(), data.size()); NEO::ConstStringRef binaryMagic(reinterpret_cast(&*data.begin()), binaryMagicLen); return intcMagic == binaryMagic; } const void *BinaryDecoder::getDevBinary() { binary = argHelper->readBinaryFile(binaryFile); const void *data = nullptr; if (isPatchtokensBinary(binary)) { return binary.data(); } std::string decoderErrors; std::string decoderWarnings; auto input = ArrayRef(reinterpret_cast(binary.data()), binary.size()); auto elf = NEO::Elf::decodeElf(input, decoderErrors, decoderWarnings); for (const auto §ionHeader : elf.sectionHeaders) { //Finding right section auto sectionData = ArrayRef(reinterpret_cast(sectionHeader.data.begin()), sectionHeader.data.size()); switch (sectionHeader.header->type) { case NEO::Elf::SHT_OPENCL_LLVM_BINARY: { argHelper->saveOutput(pathToDump + "llvm.bin", sectionData.begin(), sectionData.size()); break; } case NEO::Elf::SHT_OPENCL_SPIRV: { argHelper->saveOutput(pathToDump + "spirv.bin", sectionData.begin(), sectionData.size()); break; } case NEO::Elf::SHT_OPENCL_OPTIONS: { argHelper->saveOutput(pathToDump + "build.bin", sectionData.begin(), sectionData.size()); break; } case NEO::Elf::SHT_OPENCL_DEV_BINARY: { data = sectionData.begin(); break; } default: break; } } return data; } uint8_t BinaryDecoder::getSize(const std::string &typeStr) { if (typeStr == "uint8_t") { return 1; } else if (typeStr == "uint16_t") { return 2; } else if (typeStr == "uint32_t") { return 4; } else if (typeStr == "uint64_t") { return 8; } else { argHelper->printf("Unhandled type : %s\n", typeStr.c_str()); exit(1); } } std::vector BinaryDecoder::loadPatchList() { if (argHelper->hasHeaders()) { return argHelper->headersToVectorOfStrings(); } else { std::vector patchList; if (pathToPatch.empty()) { argHelper->printf("Path to patch list not provided - using defaults, skipping patchokens as undefined.\n"); patchList = { "struct SProgramBinaryHeader", "{", " uint32_t Magic;", " uint32_t Version;", " uint32_t Device;", " uint32_t GPUPointerSizeInBytes;", " uint32_t NumberOfKernels;", " uint32_t SteppingId;", " uint32_t PatchListSize;", "};", "", "struct SKernelBinaryHeader", "{", " uint32_t CheckSum;", " uint64_t ShaderHashCode;", " uint32_t KernelNameSize;", " uint32_t PatchListSize;", "};", "", "struct SKernelBinaryHeaderCommon :", " SKernelBinaryHeader", "{", " uint32_t KernelHeapSize;", " uint32_t GeneralStateHeapSize;", " uint32_t DynamicStateHeapSize;", " uint32_t SurfaceStateHeapSize;", " uint32_t KernelUnpaddedSize;", "};", "", "enum PATCH_TOKEN", "{", " PATCH_TOKEN_ALLOCATE_GLOBAL_MEMORY_SURFACE_PROGRAM_BINARY_INFO, // 41 @SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo@", " PATCH_TOKEN_ALLOCATE_CONSTANT_MEMORY_SURFACE_PROGRAM_BINARY_INFO, // 42 @SPatchAllocateConstantMemorySurfaceProgramBinaryInfo@", "};", "struct SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo :", " SPatchItemHeader", "{", " uint32_t Type;", " uint32_t GlobalBufferIndex;", " uint32_t InlineDataSize;", "};", "struct SPatchAllocateConstantMemorySurfaceProgramBinaryInfo :", " SPatchItemHeader", "{", " uint32_t ConstantBufferIndex;", " uint32_t InlineDataSize;", "};", }; } else { readFileToVectorOfStrings(patchList, pathToPatch + "patch_list.h", true); readFileToVectorOfStrings(patchList, pathToPatch + "patch_shared.h", true); readFileToVectorOfStrings(patchList, pathToPatch + "patch_g7.h", true); readFileToVectorOfStrings(patchList, pathToPatch + "patch_g8.h", true); readFileToVectorOfStrings(patchList, pathToPatch + "patch_g9.h", true); readFileToVectorOfStrings(patchList, pathToPatch + "patch_g10.h", true); } return patchList; } } void BinaryDecoder::parseTokens() { //Creating patchlist definitions auto patchList = loadPatchList(); size_t pos = findPos(patchList, "struct SProgramBinaryHeader"); if (pos == patchList.size()) { argHelper->printf("While parsing patchtoken definitions: couldn't find SProgramBinaryHeader."); exit(1); } pos = findPos(patchList, "enum PATCH_TOKEN"); if (pos == patchList.size()) { argHelper->printf("While parsing patchtoken definitions: couldn't find enum PATCH_TOKEN."); exit(1); } pos = findPos(patchList, "struct SKernelBinaryHeader"); if (pos == patchList.size()) { argHelper->printf("While parsing patchtoken definitions: couldn't find SKernelBinaryHeader."); exit(1); } pos = findPos(patchList, "struct SKernelBinaryHeaderCommon :"); if (pos == patchList.size()) { argHelper->printf("While parsing patchtoken definitions: couldn't find SKernelBinaryHeaderCommon."); exit(1); } // Reading all Patch Tokens and according structs size_t patchTokenEnumPos = findPos(patchList, "enum PATCH_TOKEN"); if (patchTokenEnumPos == patchList.size()) { exit(1); } for (auto i = patchTokenEnumPos + 1; i < patchList.size(); ++i) { if (patchList[i].find("};") != std::string::npos) { break; } else if (patchList[i].find("PATCH_TOKEN") == std::string::npos) { continue; } else if (patchList[i].find("@") == std::string::npos) { continue; } size_t patchTokenNoStartPos, patchTokenNoEndPos; patchTokenNoStartPos = patchList[i].find('/') + 3; patchTokenNoEndPos = patchList[i].find(' ', patchTokenNoStartPos); std::stringstream patchTokenNoStream(patchList[i].substr(patchTokenNoStartPos, patchTokenNoEndPos - patchTokenNoStartPos)); int patchNo; patchTokenNoStream >> patchNo; auto patchTokenPtr = std::make_unique(); size_t nameStartPos, nameEndPos; nameStartPos = patchList[i].find("PATCH_TOKEN"); nameEndPos = patchList[i].find(',', nameStartPos); patchTokenPtr->name = patchList[i].substr(nameStartPos, nameEndPos - nameStartPos); nameStartPos = patchList[i].find("@"); nameEndPos = patchList[i].find('@', nameStartPos + 1); if (nameEndPos == std::string::npos) { continue; } std::string structName = "struct " + patchList[i].substr(nameStartPos + 1, nameEndPos - nameStartPos - 1) + " :"; size_t structPos = findPos(patchList, structName); if (structPos == patchList.size()) { continue; } patchTokenPtr->size = readStructFields(patchList, structPos + 1, patchTokenPtr->fields); patchTokens[static_cast(patchNo)] = std::move(patchTokenPtr); } //Finding and reading Program Binary Header size_t structPos = findPos(patchList, "struct SProgramBinaryHeader") + 1; programHeader.size = readStructFields(patchList, structPos, programHeader.fields); //Finding and reading Kernel Binary Header structPos = findPos(patchList, "struct SKernelBinaryHeader") + 1; kernelHeader.size = readStructFields(patchList, structPos, kernelHeader.fields); structPos = findPos(patchList, "struct SKernelBinaryHeaderCommon :") + 1; kernelHeader.size += readStructFields(patchList, structPos, kernelHeader.fields); } void BinaryDecoder::printHelp() { argHelper->printf(R"===(Disassembles Intel Compute GPU device binary files. Output of such operation is a set of files that can be later used to reassemble back a valid Intel Compute GPU device binary (using ocloc 'asm' command). This set of files contains: Program-scope data : - spirv.bin (optional) - spirV representation of the program from which the input binary was generated - build.bin - build options that were used when generating the input binary - PTM.txt - 'patch tokens' describing program-scope and kernel-scope metadata about the input binary Kernel-scope data ( is replaced by corresponding kernel's name): - _DynamicStateHeap.bin - initial DynamicStateHeap (binary file) - _SurfaceStateHeap.bin - initial SurfaceStateHeap (binary file) - _KernelHeap.asm - list of instructions describing the kernel function (text file) Usage: ocloc disasm -file [-patch ] [-dump ] [-device ] [-ignore_isa_padding] -file Input file to be disassembled. This file should be an Intel Compute GPU device binary. -patch Optional path to the directory containing patchtoken definitions (patchlist.h, etc.) as defined in intel-graphics-compiler (IGC) repo, IGC subdirectory : IGC/AdaptorOCL/ocl_igc_shared/executable_format By default (when patchtokens_dir is not provided) patchtokens won't be decoded. -dump Optional path for files representing decoded binary. Default is './dump'. -device Optional target device of input binary can be: %s By default ocloc will pick base device within a generation - i.e. both skl and kbl will fallback to skl. If specific product (e.g. kbl) is needed, provide it as device_type. -ignore_isa_padding Ignores Kernel Heap padding - Kernel Heap binary will be saved without padding. --help Print this usage message. Examples: Disassemble Intel Compute GPU device binary ocloc disasm -file source_file_Gen9core.bin )===", NEO::getDevicesTypes().c_str()); } int BinaryDecoder::processBinary(const void *&ptr, std::ostream &ptmFile) { ptmFile << "ProgramBinaryHeader:\n"; uint32_t numberOfKernels = 0, patchListSize = 0, device = 0; for (const auto &v : programHeader.fields) { if (v.name == "NumberOfKernels") { numberOfKernels = readUnaligned(ptr); } else if (v.name == "PatchListSize") { patchListSize = readUnaligned(ptr); } else if (v.name == "Device") { device = readUnaligned(ptr); } dumpField(ptr, v, ptmFile); } if (numberOfKernels == 0) { argHelper->printf("Warning! Number of Kernels is 0.\n"); } readPatchTokens(ptr, patchListSize, ptmFile); iga->setGfxCore(static_cast(device)); //Reading Kernels for (uint32_t i = 0; i < numberOfKernels; ++i) { ptmFile << "Kernel #" << i << '\n'; processKernel(ptr, ptmFile); } argHelper->saveOutput(pathToDump + "PTM.txt", ptmFile); return 0; } void BinaryDecoder::processKernel(const void *&ptr, std::ostream &ptmFile) { uint32_t KernelNameSize = 0, KernelPatchListSize = 0, KernelHeapSize = 0, KernelHeapUnpaddedSize = 0, GeneralStateHeapSize = 0, DynamicStateHeapSize = 0, SurfaceStateHeapSize = 0; ptmFile << "KernelBinaryHeader:\n"; for (const auto &v : kernelHeader.fields) { if (v.name == "PatchListSize") KernelPatchListSize = readUnaligned(ptr); else if (v.name == "KernelNameSize") KernelNameSize = readUnaligned(ptr); else if (v.name == "KernelHeapSize") KernelHeapSize = readUnaligned(ptr); else if (v.name == "KernelUnpaddedSize") KernelHeapUnpaddedSize = readUnaligned(ptr); else if (v.name == "GeneralStateHeapSize") GeneralStateHeapSize = readUnaligned(ptr); else if (v.name == "DynamicStateHeapSize") DynamicStateHeapSize = readUnaligned(ptr); else if (v.name == "SurfaceStateHeapSize") SurfaceStateHeapSize = readUnaligned(ptr); dumpField(ptr, v, ptmFile); } if (KernelNameSize == 0) { argHelper->printf("Error! KernelNameSize was 0.\n"); exit(1); } ptmFile << "\tKernelName "; std::string kernelName(static_cast(ptr), 0, KernelNameSize); ptmFile << kernelName << '\n'; ptr = ptrOffset(ptr, KernelNameSize); std::string fileName = pathToDump + kernelName + "_KernelHeap"; argHelper->printf("Trying to disassemble %s.krn\n", kernelName.c_str()); std::string disassembledKernel; if (iga->tryDisassembleGenISA(ptr, KernelHeapUnpaddedSize, disassembledKernel)) { argHelper->saveOutput(fileName + ".asm", disassembledKernel.data(), disassembledKernel.size()); } else { if (ignoreIsaPadding) { argHelper->saveOutput(fileName + ".dat", ptr, KernelHeapUnpaddedSize); } else { argHelper->saveOutput(fileName + ".dat", ptr, KernelHeapSize); } } ptr = ptrOffset(ptr, KernelHeapSize); if (GeneralStateHeapSize != 0) { argHelper->printf("Warning! GeneralStateHeapSize wasn't 0.\n"); fileName = pathToDump + kernelName + "_GeneralStateHeap.bin"; argHelper->saveOutput(fileName, ptr, DynamicStateHeapSize); ptr = ptrOffset(ptr, GeneralStateHeapSize); } fileName = pathToDump + kernelName + "_DynamicStateHeap.bin"; argHelper->saveOutput(fileName, ptr, DynamicStateHeapSize); ptr = ptrOffset(ptr, DynamicStateHeapSize); fileName = pathToDump + kernelName + "_SurfaceStateHeap.bin"; argHelper->saveOutput(fileName, ptr, SurfaceStateHeapSize); ptr = ptrOffset(ptr, SurfaceStateHeapSize); if (KernelPatchListSize == 0) { argHelper->printf("Warning! Kernel's patch list size was 0.\n"); } readPatchTokens(ptr, KernelPatchListSize, ptmFile); } void BinaryDecoder::readPatchTokens(const void *&patchListPtr, uint32_t patchListSize, std::ostream &ptmFile) { auto endPatchListPtr = ptrOffset(patchListPtr, patchListSize); while (patchListPtr != endPatchListPtr) { auto patchTokenPtr = patchListPtr; auto token = readUnaligned(patchTokenPtr); patchTokenPtr = ptrOffset(patchTokenPtr, sizeof(uint32_t)); auto Size = readUnaligned(patchTokenPtr); patchTokenPtr = ptrOffset(patchTokenPtr, sizeof(uint32_t)); if (patchTokens.count(token) > 0) { ptmFile << patchTokens[(token)]->name << ":\n"; } else { ptmFile << "Unidentified PatchToken:\n"; } ptmFile << '\t' << "4 Token " << token << '\n'; ptmFile << '\t' << "4 Size " << Size << '\n'; if (patchTokens.count(token) > 0) { uint32_t fieldsSize = 0; for (const auto &v : patchTokens[(token)]->fields) { if ((fieldsSize += static_cast(v.size)) > (Size - sizeof(uint32_t) * 2)) { break; } if (v.name == "InlineDataSize") { // Because InlineData field value is not added to PT size auto inlineDataSize = readUnaligned(patchTokenPtr); patchListPtr = ptrOffset(patchListPtr, inlineDataSize); } dumpField(patchTokenPtr, v, ptmFile); } } patchListPtr = ptrOffset(patchListPtr, Size); if (patchListPtr > patchTokenPtr) { ptmFile << "\tHex"; const uint8_t *byte = reinterpret_cast(patchTokenPtr); while (ptrDiff(patchListPtr, patchTokenPtr) != 0) { ptmFile << ' ' << std::hex << +*(byte++); patchTokenPtr = ptrOffset(patchTokenPtr, sizeof(uint8_t)); } ptmFile << std::dec << '\n'; } } } uint32_t BinaryDecoder::readStructFields(const std::vector &patchList, const size_t &structPos, std::vector &fields) { std::string typeStr, fieldName; uint8_t size; uint32_t fullSize = 0; size_t f1, f2; for (auto i = structPos; i < patchList.size(); ++i) { if (patchList[i].find("};") != std::string::npos) { break; } else if (patchList[i].find("int") == std::string::npos) { continue; } f1 = patchList[i].find_first_not_of(' '); f2 = patchList[i].find(' ', f1 + 1); typeStr = patchList[i].substr(f1, f2 - f1); size = getSize(typeStr); f1 = patchList[i].find_first_not_of(' ', f2); f2 = patchList[i].find(';'); fieldName = patchList[i].substr(f1, f2 - f1); fields.push_back(PTField{size, fieldName}); fullSize += size; } return fullSize; } int BinaryDecoder::validateInput(const std::vector &args) { for (size_t argIndex = 2; argIndex < args.size(); ++argIndex) { const auto &currArg = args[argIndex]; const bool hasMoreArgs = (argIndex + 1 < args.size()); if ("-file" == currArg && hasMoreArgs) { binaryFile = args[++argIndex]; } else if ("-device" == currArg && hasMoreArgs) { iga->setProductFamily(getProductFamilyFromDeviceName(args[++argIndex])); } else if ("-patch" == currArg && hasMoreArgs) { pathToPatch = args[++argIndex]; addSlash(pathToPatch); } else if ("-dump" == currArg && hasMoreArgs) { pathToDump = args[++argIndex]; addSlash(pathToDump); } else if ("--help" == currArg) { showHelp = true; return 0; } else if ("-ignore_isa_padding" == currArg) { ignoreIsaPadding = true; } else if ("-q" == currArg) { argHelper->getPrinterRef() = MessagePrinter(true); iga->setMessagePrinter(argHelper->getPrinterRef()); } else { argHelper->printf("Unknown argument %s\n", currArg.c_str()); return -1; } } if (false == iga->isKnownPlatform()) { argHelper->printf("Warning : missing or invalid -device parameter - results may be inacurate\n"); } if (!argHelper->outputEnabled()) { if (pathToDump.empty()) { argHelper->printf("Warning : Path to dump folder not specificed - using ./dump as default.\n"); pathToDump = std::string("dump/"); } MakeDirectory(pathToDump.c_str()); } return 0; } compute-runtime-22.14.22890/shared/offline_compiler/source/decoder/binary_decoder.h000066400000000000000000000037651422164147700301760ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/offline_compiler/source/decoder/helper.h" #include "shared/offline_compiler/source/decoder/iga_wrapper.h" #include "shared/offline_compiler/source/ocloc_arg_helper.h" #include #include #include #include struct PTField { uint8_t size = 0U; std::string name; }; struct BinaryHeader { std::vector fields; uint32_t size = 0U; }; struct PatchToken : BinaryHeader { std::string name; }; using PTMap = std::unordered_map>; class BinaryDecoder { public: BinaryDecoder(const std::string &file, const std::string &patch, const std::string &dump) : binaryFile(file), pathToPatch(patch), pathToDump(dump){}; BinaryDecoder(OclocArgHelper *helper) : argHelper(helper), iga(new IgaWrapper) { iga->setMessagePrinter(argHelper->getPrinterRef()); }; int decode(); int validateInput(const std::vector &args); bool showHelp = false; void printHelp(); protected: OclocArgHelper *argHelper = nullptr; bool ignoreIsaPadding = false; BinaryHeader programHeader, kernelHeader; std::vector binary; std::unique_ptr iga; PTMap patchTokens; std::string binaryFile, pathToPatch, pathToDump; void dumpField(const void *&binaryPtr, const PTField &field, std::ostream &ptmFile); uint8_t getSize(const std::string &typeStr); const void *getDevBinary(); std::vector loadPatchList(); void parseTokens(); int processBinary(const void *&ptr, std::ostream &ptmFile); void processKernel(const void *&ptr, std::ostream &ptmFile); void readPatchTokens(const void *&patchListPtr, uint32_t patchListSize, std::ostream &ptmFile); uint32_t readStructFields(const std::vector &patchList, const size_t &structPos, std::vector &fields); }; compute-runtime-22.14.22890/shared/offline_compiler/source/decoder/binary_encoder.cpp000066400000000000000000000407461422164147700305430ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "binary_encoder.h" #include "shared/offline_compiler/source/offline_compiler.h" #include "shared/source/device_binary_format/elf/elf_encoder.h" #include "shared/source/device_binary_format/elf/ocl_elf.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/file_io.h" #include "shared/source/helpers/hash.h" #include "CL/cl.h" #include "helper.h" #include #include #include #include void BinaryEncoder::calculatePatchListSizes(std::vector &ptmFile) { size_t patchListPos = 0; for (size_t i = 0; i < ptmFile.size(); ++i) { if (ptmFile[i].find("PatchListSize") != std::string::npos) { patchListPos = i; } else if (ptmFile[i].find("PATCH_TOKEN") != std::string::npos) { uint32_t calcSize = 0; i++; while (i < ptmFile.size() && ptmFile[i].find("Kernel #") == std::string::npos) { if (ptmFile[i].find(':') == std::string::npos) { if (ptmFile[i].find("Hex") != std::string::npos) { calcSize += static_cast(std::count(ptmFile[i].begin(), ptmFile[i].end(), ' ')); } else { calcSize += std::atoi(&ptmFile[i][1]); } } i++; } uint32_t size = static_cast(std::stoul(ptmFile[patchListPos].substr(ptmFile[patchListPos].find_last_of(' ') + 1))); if (size != calcSize) { argHelper->printf("Warning! Calculated PatchListSize ( %u ) differs from file ( %u ) - changing it. Line %d\n", calcSize, size, static_cast(patchListPos + 1)); ptmFile[patchListPos] = ptmFile[patchListPos].substr(0, ptmFile[patchListPos].find_last_of(' ') + 1); ptmFile[patchListPos] += std::to_string(calcSize); } } } } bool BinaryEncoder::copyBinaryToBinary(const std::string &srcFileName, std::ostream &outBinary, uint32_t *binaryLength) { if (!argHelper->fileExists(srcFileName)) { return false; } auto binary = argHelper->readBinaryFile(srcFileName); auto length = binary.size(); outBinary.write(binary.data(), length); if (binaryLength) { *binaryLength = static_cast(length); } return true; } int BinaryEncoder::createElf(std::stringstream &deviceBinary) { NEO::Elf::ElfEncoder ElfEncoder; ElfEncoder.getElfFileHeader().type = NEO::Elf::ET_OPENCL_EXECUTABLE; //Build Options if (argHelper->fileExists(pathToDump + "build.bin")) { auto binary = argHelper->readBinaryFile(pathToDump + "build.bin"); ElfEncoder.appendSection(NEO::Elf::SHT_OPENCL_OPTIONS, "BuildOptions", ArrayRef(reinterpret_cast(binary.data()), binary.size())); } else { argHelper->printf("Warning! Missing build section.\n"); } //LLVM or SPIRV if (argHelper->fileExists(pathToDump + "llvm.bin")) { auto binary = argHelper->readBinaryFile(pathToDump + "llvm.bin"); ElfEncoder.appendSection(NEO::Elf::SHT_OPENCL_LLVM_BINARY, "Intel(R) OpenCL LLVM Object", ArrayRef(reinterpret_cast(binary.data()), binary.size())); } else if (argHelper->fileExists(pathToDump + "spirv.bin")) { auto binary = argHelper->readBinaryFile(pathToDump + "spirv.bin"); ElfEncoder.appendSection(NEO::Elf::SHT_OPENCL_SPIRV, "SPIRV Object", ArrayRef(reinterpret_cast(binary.data()), binary.size())); } else { argHelper->printf("Warning! Missing llvm/spirv section.\n"); } //Device Binary auto deviceBinaryStr = deviceBinary.str(); std::vector binary(deviceBinaryStr.begin(), deviceBinaryStr.end()); ElfEncoder.appendSection(NEO::Elf::SHT_OPENCL_DEV_BINARY, "Intel(R) OpenCL Device Binary", ArrayRef(reinterpret_cast(binary.data()), binary.size())); //Resolve Elf Binary auto elfBinary = ElfEncoder.encode(); argHelper->saveOutput(elfName, elfBinary.data(), elfBinary.size()); return 0; } void BinaryEncoder::printHelp() { argHelper->printf(R"===(Assembles Intel Compute GPU device binary from input files. It's expected that input files were previously generated by 'ocloc disasm' command or are compatible with 'ocloc disasm' output (especially in terms of file naming scheme). See 'ocloc disasm --help' for additional info. Usage: ocloc asm -out [-dump ] [-device ] [-ignore_isa_padding] -out Filename for newly assembled binary. -dump Path to the input directory containing disassembled binary (as disassembled by ocloc's disasm command). Default is './dump'. -device Optional target device of output binary can be: %s By default ocloc will pick base device within a generation - i.e. both skl and kbl will fallback to skl. If specific product (e.g. kbl) is needed, provide it as device_type. -ignore_isa_padding Ignores Kernel Heap padding - padding will not be added to Kernel Heap binary. --help Print this usage message. Examples: Assemble to Intel Compute GPU device binary ocloc asm -out reassembled.bin )===", NEO::getDevicesTypes().c_str()); } int BinaryEncoder::encode() { std::vector ptmFile; if (!argHelper->fileExists(pathToDump + "PTM.txt")) { argHelper->printf("Error! Couldn't find PTM.txt"); return -1; } argHelper->readFileToVectorOfStrings(pathToDump + "PTM.txt", ptmFile); calculatePatchListSizes(ptmFile); std::stringstream deviceBinary; //(pathToDump + "device_binary.bin", std::ios::binary); int retVal = processBinary(ptmFile, deviceBinary); argHelper->saveOutput(pathToDump + "device_binary.bin", deviceBinary.str().c_str(), deviceBinary.str().length()); if (retVal != 0) { return retVal; } retVal = createElf(deviceBinary); return retVal; } int BinaryEncoder::processBinary(const std::vector &ptmFileLines, std::ostream &deviceBinary) { if (false == iga->isKnownPlatform()) { auto deviceMarker = findPos(ptmFileLines, "Device"); if (deviceMarker != ptmFileLines.size()) { std::stringstream ss(ptmFileLines[deviceMarker]); ss.ignore(32, ' '); ss.ignore(32, ' '); uint32_t gfxCore = 0; ss >> gfxCore; iga->setGfxCore(static_cast(gfxCore)); } } size_t i = 0; while (i < ptmFileLines.size()) { if (ptmFileLines[i].find("Kernel #") != std::string::npos) { if (processKernel(++i, ptmFileLines, deviceBinary)) { argHelper->printf("Warning while processing kernel!\n"); return -1; } } else if (writeDeviceBinary(ptmFileLines[i++], deviceBinary)) { argHelper->printf("Error while writing to binary!\n"); return -1; } } return 0; } void BinaryEncoder::addPadding(std::ostream &out, size_t numBytes) { for (size_t i = 0; i < numBytes; ++i) { const char nullByte = 0; out.write(&nullByte, 1U); } } int BinaryEncoder::processKernel(size_t &line, const std::vector &ptmFileLines, std::ostream &deviceBinary) { auto kernelInfoBeginMarker = line; auto kernelInfoEndMarker = ptmFileLines.size(); auto kernelNameMarker = ptmFileLines.size(); auto kernelPatchtokensMarker = ptmFileLines.size(); std::stringstream kernelBlob; // Normally these are added by the compiler, need to take or of them when reassembling constexpr size_t isaPaddingSizeInBytes = 128; constexpr uint32_t kernelHeapAlignmentInBytes = 64; uint32_t kernelNameSizeInBinary = 0; std::string kernelName; // Scan PTM lines for kernel info while (line < ptmFileLines.size()) { if (ptmFileLines[line].find("KernelName ") != std::string::npos) { kernelName = std::string(ptmFileLines[line], ptmFileLines[line].find(' ') + 1); kernelNameMarker = line; kernelPatchtokensMarker = kernelNameMarker + 1; // patchtokens come after name } else if (ptmFileLines[line].find("KernelNameSize") != std::string::npos) { std::stringstream ss(ptmFileLines[line]); ss.ignore(32, ' '); ss.ignore(32, ' '); ss >> kernelNameSizeInBinary; } else if (ptmFileLines[line].find("Kernel #") != std::string::npos) { kernelInfoEndMarker = line; break; } ++line; } // Write KernelName and padding kernelBlob.write(kernelName.c_str(), kernelName.size()); addPadding(kernelBlob, kernelNameSizeInBinary - kernelName.size()); // Write KernelHeap and padding uint32_t kernelHeapSizeUnpadded = 0U; bool heapsCopiedSuccesfully = true; // Use .asm if available, fallback to .dat if (argHelper->fileExists(pathToDump + kernelName + "_KernelHeap.asm")) { auto kernelAsAsm = argHelper->readBinaryFile(pathToDump + kernelName + "_KernelHeap.asm"); std::string kernelAsBinary; argHelper->printf("Trying to assemble %s.asm\n", kernelName.c_str()); if (false == iga->tryAssembleGenISA(std::string(kernelAsAsm.begin(), kernelAsAsm.end()), kernelAsBinary)) { argHelper->printf("Error : Could not assemble : %s\n", kernelName.c_str()); return -1; } kernelHeapSizeUnpadded = static_cast(kernelAsBinary.size()); kernelBlob.write(kernelAsBinary.data(), kernelAsBinary.size()); } else { heapsCopiedSuccesfully = copyBinaryToBinary(pathToDump + kernelName + "_KernelHeap.dat", kernelBlob, &kernelHeapSizeUnpadded); } uint32_t kernelHeapSize = 0U; // Adding padding and alignment if (ignoreIsaPadding) { kernelHeapSize = kernelHeapSizeUnpadded; } else { addPadding(kernelBlob, isaPaddingSizeInBytes); const uint32_t kernelHeapPaddedSize = kernelHeapSizeUnpadded + isaPaddingSizeInBytes; kernelHeapSize = alignUp(kernelHeapPaddedSize, kernelHeapAlignmentInBytes); addPadding(kernelBlob, kernelHeapSize - kernelHeapPaddedSize); } // Write GeneralStateHeap, DynamicStateHeap, SurfaceStateHeap if (argHelper->fileExists(pathToDump + kernelName + "_GeneralStateHeap.bin")) { heapsCopiedSuccesfully = heapsCopiedSuccesfully && copyBinaryToBinary(pathToDump + kernelName + "_GeneralStateHeap.bin", kernelBlob); } heapsCopiedSuccesfully = heapsCopiedSuccesfully && copyBinaryToBinary(pathToDump + kernelName + "_DynamicStateHeap.bin", kernelBlob); heapsCopiedSuccesfully = heapsCopiedSuccesfully && copyBinaryToBinary(pathToDump + kernelName + "_SurfaceStateHeap.bin", kernelBlob); if (false == heapsCopiedSuccesfully) { return -1; } // Write kernel patchtokens for (size_t i = kernelPatchtokensMarker; i < kernelInfoEndMarker; ++i) { if (writeDeviceBinary(ptmFileLines[i], kernelBlob)) { argHelper->printf("Error while writing to binary.\n"); return -1; } } auto kernelBlobData = kernelBlob.str(); uint64_t hashValue = NEO::Hash::hash(reinterpret_cast(kernelBlobData.data()), kernelBlobData.size()); uint32_t calcCheckSum = hashValue & 0xFFFFFFFF; // Add kernel header for (size_t i = kernelInfoBeginMarker; i < kernelNameMarker; ++i) { if (ptmFileLines[i].find("CheckSum") != std::string::npos) { static_assert(std::is_same::value, ""); deviceBinary.write(reinterpret_cast(&calcCheckSum), sizeof(uint32_t)); } else if (ptmFileLines[i].find("KernelHeapSize") != std::string::npos) { static_assert(sizeof(kernelHeapSize) == sizeof(uint32_t), ""); deviceBinary.write(reinterpret_cast(&kernelHeapSize), sizeof(uint32_t)); } else if (ptmFileLines[i].find("KernelUnpaddedSize") != std::string::npos) { static_assert(sizeof(kernelHeapSizeUnpadded) == sizeof(uint32_t), ""); deviceBinary.write(reinterpret_cast(&kernelHeapSizeUnpadded), sizeof(uint32_t)); } else { if (writeDeviceBinary(ptmFileLines[i], deviceBinary)) { argHelper->printf("Error while writing to binary.\n"); return -1; } } } // Add kernel blob after the header deviceBinary.write(kernelBlobData.c_str(), kernelBlobData.size()); return 0; } int BinaryEncoder::validateInput(const std::vector &args) { for (size_t argIndex = 2; argIndex < args.size(); ++argIndex) { const auto &currArg = args[argIndex]; const bool hasMoreArgs = (argIndex + 1 < args.size()); if ("-dump" == currArg && hasMoreArgs) { pathToDump = args[++argIndex]; addSlash(pathToDump); } else if ("-device" == currArg && hasMoreArgs) { iga->setProductFamily(getProductFamilyFromDeviceName(args[++argIndex])); } else if ("-out" == currArg && hasMoreArgs) { elfName = args[++argIndex]; } else if ("--help" == currArg) { showHelp = true; return 0; } else if ("-ignore_isa_padding" == currArg) { ignoreIsaPadding = true; } else if ("-q" == currArg) { argHelper->getPrinterRef() = MessagePrinter(true); iga->setMessagePrinter(argHelper->getPrinterRef()); } else { argHelper->printf("Unknown argument %s\n", currArg.c_str()); return -1; } } if (pathToDump.empty()) { if (!argHelper->outputEnabled()) { argHelper->printf("Warning : Path to dump folder not specificed - using ./dump as default.\n"); pathToDump = "dump"; addSlash(pathToDump); } } if (elfName.find(".bin") == std::string::npos) { argHelper->printf(".bin extension is expected for binary file.\n"); return -1; } if (false == iga->isKnownPlatform()) { argHelper->printf("Warning : missing or invalid -device parameter - results may be inacurate\n"); } return 0; } template void BinaryEncoder::write(std::stringstream &in, std::ostream &deviceBinary) { T val; in >> val; deviceBinary.write(reinterpret_cast(&val), sizeof(T)); } template <> void BinaryEncoder::write(std::stringstream &in, std::ostream &deviceBinary) { uint8_t val; uint16_t help; in >> help; val = static_cast(help); deviceBinary.write(reinterpret_cast(&val), sizeof(uint8_t)); } template void BinaryEncoder::write(std::stringstream &in, std::ostream &deviceBinary); template void BinaryEncoder::write(std::stringstream &in, std::ostream &deviceBinary); template void BinaryEncoder::write(std::stringstream &in, std::ostream &deviceBinary); int BinaryEncoder::writeDeviceBinary(const std::string &line, std::ostream &deviceBinary) { if (line.find(':') != std::string::npos) { return 0; } else if (line.find("Hex") != std::string::npos) { std::stringstream ss(line); ss.ignore(32, ' '); uint16_t tmp; uint8_t byte; while (!ss.eof()) { ss >> std::hex >> tmp; byte = static_cast(tmp); deviceBinary.write(reinterpret_cast(&byte), sizeof(uint8_t)); } } else { std::stringstream ss(line); uint16_t size; std::string name; ss >> size; ss >> name; switch (size) { case 1: write(ss, deviceBinary); break; case 2: write(ss, deviceBinary); break; case 4: write(ss, deviceBinary); break; case 8: write(ss, deviceBinary); break; default: argHelper->printf("Unknown size in line: %s\n", line.c_str()); return -1; } } return 0; } compute-runtime-22.14.22890/shared/offline_compiler/source/decoder/binary_encoder.h000066400000000000000000000032421422164147700301760ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/offline_compiler/source/ocloc_arg_helper.h" #include "helper.h" #include "iga_wrapper.h" #include #include #include class BinaryEncoder { public: BinaryEncoder(const std::string &dump, const std::string &elf) : pathToDump(dump), elfName(elf){}; BinaryEncoder(OclocArgHelper *helper) : argHelper(helper), iga(new IgaWrapper) { iga->setMessagePrinter(argHelper->getPrinterRef()); } int encode(); int validateInput(const std::vector &args); bool showHelp = false; void printHelp(); protected: OclocArgHelper *argHelper = nullptr; bool ignoreIsaPadding = false; std::string pathToDump, elfName; std::unique_ptr iga; void calculatePatchListSizes(std::vector &ptmFile); MOCKABLE_VIRTUAL bool copyBinaryToBinary(const std::string &srcFileName, std::ostream &outBinary, uint32_t *binaryLength); bool copyBinaryToBinary(const std::string &srcFileName, std::ostream &outBinary) { return copyBinaryToBinary(srcFileName, outBinary, nullptr); } int createElf(std::stringstream &deviceBinary); int processBinary(const std::vector &ptmFile, std::ostream &deviceBinary); int processKernel(size_t &i, const std::vector &ptmFileLines, std::ostream &deviceBinary); template void write(std::stringstream &in, std::ostream &deviceBinary); int writeDeviceBinary(const std::string &line, std::ostream &deviceBinary); void addPadding(std::ostream &out, size_t numBytes); }; compute-runtime-22.14.22890/shared/offline_compiler/source/decoder/helper.cpp000066400000000000000000000050271422164147700270300ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "helper.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/os_interface/os_inc_base.h" #include "shared/source/os_interface/os_library.h" #include "igfxfmid.h" #include #include void addSlash(std::string &path) { if (!path.empty()) { auto lastChar = *path.rbegin(); if ((lastChar != '/') && (lastChar != '\\')) { path.append("/"); } } } std::vector readBinaryFile(const std::string &fileName) { std::ifstream file(fileName, std::ios_base::binary); if (file.good()) { size_t length; file.seekg(0, file.end); length = static_cast(file.tellg()); file.seekg(0, file.beg); std::vector binary(length); file.read(binary.data(), length); return binary; } else { printf("Error! Couldn't open %s\n", fileName.c_str()); exit(1); } } void readFileToVectorOfStrings(std::vector &lines, const std::string &fileName, bool replaceTabs) { std::ifstream file(fileName); if (file.good()) { if (replaceTabs) { for (std::string line; std::getline(file, line);) { std::replace_if( line.begin(), line.end(), [](auto c) { return c == '\t'; }, ' '); lines.push_back(std::move(line)); } } else { for (std::string line; std::getline(file, line);) { lines.push_back(std::move(line)); } } } } size_t findPos(const std::vector &lines, const std::string &whatToFind) { for (size_t i = 0; i < lines.size(); ++i) { auto it = lines[i].find(whatToFind); if (it != std::string::npos) { if (it + whatToFind.size() == lines[i].size()) { return i; } char delimiter = lines[i][it + whatToFind.size()]; if ((' ' == delimiter) || ('\t' == delimiter) || ('\n' == delimiter) || ('\r' == delimiter)) { return i; } } } return lines.size(); } PRODUCT_FAMILY getProductFamilyFromDeviceName(const std::string &deviceName) { for (unsigned int productId = 0; productId < IGFX_MAX_PRODUCT; ++productId) { if (NEO::hardwarePrefix[productId] != nullptr && deviceName == NEO::hardwarePrefix[productId]) { return static_cast(productId); } } return IGFX_UNKNOWN; } compute-runtime-22.14.22890/shared/offline_compiler/source/decoder/helper.h000066400000000000000000000035311422164147700264730ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/os_library.h" #include "igfxfmid.h" #include #include #include #include #include void addSlash(std::string &path); std::vector readBinaryFile(const std::string &fileName); void readFileToVectorOfStrings(std::vector &lines, const std::string &fileName, bool replaceTabs = false); size_t findPos(const std::vector &lines, const std::string &whatToFind); PRODUCT_FAMILY getProductFamilyFromDeviceName(const std::string &deviceName); class MessagePrinter { public: MessagePrinter() = default; MessagePrinter(bool suppressMessages) : suppressMessages(suppressMessages) {} void printf(const char *message) { if (!suppressMessages) { ::printf("%s", message); } ss << std::string(message); } template void printf(const char *format, Args... args) { if (!suppressMessages) { ::printf(format, std::forward(args)...); } ss << stringFormat(format, std::forward(args)...); } const std::stringstream &getLog() { return ss; } bool isSuppressed() const { return suppressMessages; } private: template std::string stringFormat(const std::string &format, Args... args) { std::string outputString; size_t size = static_cast(snprintf(nullptr, 0, format.c_str(), args...) + 1); if (size <= 0) { return outputString; } outputString.resize(size); snprintf(&*outputString.begin(), size, format.c_str(), args...); return outputString.c_str(); } std::stringstream ss; bool suppressMessages = false; }; compute-runtime-22.14.22890/shared/offline_compiler/source/decoder/iga_stubs.cpp000066400000000000000000000016541422164147700275330ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "helper.h" #include "iga_wrapper.h" struct IgaWrapper::Impl { }; IgaWrapper::IgaWrapper() = default; IgaWrapper::~IgaWrapper() = default; bool IgaWrapper::tryDisassembleGenISA(const void *kernelPtr, uint32_t kernelSize, std::string &out) { messagePrinter->printf("Warning: ocloc built without support for IGA - kernel binaries won't be disassembled.\n"); return false; } bool IgaWrapper::tryAssembleGenISA(const std::string &inAsm, std::string &outBinary) { messagePrinter->printf("Warning: ocloc built without support for IGA - kernel binaries won't be assembled.\n"); return false; } bool IgaWrapper::tryLoadIga() { return false; } void IgaWrapper::setGfxCore(GFXCORE_FAMILY core) { } void IgaWrapper::setProductFamily(PRODUCT_FAMILY product) { } bool IgaWrapper::isKnownPlatform() const { return false; } compute-runtime-22.14.22890/shared/offline_compiler/source/decoder/iga_wrapper.cpp000066400000000000000000000142301422164147700300450ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "iga_wrapper.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/os_interface/os_inc_base.h" #include "shared/source/os_interface/os_library.h" #include "helper.h" #include "igfxfmid.h" #include "translate_platform_base.h" #include struct IgaLibrary { pIGAAssemble assemble = nullptr; pIGAContextCreate contextCreate = nullptr; pIGAContextGetErrors contextGetErrors = nullptr; pIGAContextGetWarnings contextGetWarnings = nullptr; pIGAContextRelease contextRelease = nullptr; pIGADisassemble disassemble = nullptr; pIGAStatusToString statusToString = nullptr; iga_context_options_t OptsContext = {}; std::unique_ptr library; bool isLoaded() { return library != nullptr; } }; struct IgaWrapper::Impl { iga_gen_t igaGen = IGA_GEN_INVALID; IgaLibrary igaLib; void loadIga() { IgaLibrary iga; iga.OptsContext.cb = sizeof(igaLib.OptsContext); iga.OptsContext.gen = igaGen; #define STR2(X) #X #define STR(X) STR2(X) iga.library.reset(NEO::OsLibrary::load(STR(IGA_LIBRARY_NAME))); if (iga.library == nullptr) { return; } #define LOAD_OR_ERROR(MEMBER, FUNC_NAME) \ if (nullptr == (iga.MEMBER = reinterpret_cast(iga.library->getProcAddress(FUNC_NAME)))) { \ printf("Warning : Couldn't find %s in %s\n", FUNC_NAME, STR(IGA_LIBRARY_NAME)); \ return; \ } LOAD_OR_ERROR(assemble, IGA_ASSEMBLE_STR); LOAD_OR_ERROR(contextCreate, IGA_CONTEXT_CREATE_STR); LOAD_OR_ERROR(contextGetErrors, IGA_CONTEXT_GET_ERRORS_STR); LOAD_OR_ERROR(contextGetWarnings, IGA_CONTEXT_GET_WARNINGS_STR); LOAD_OR_ERROR(contextRelease, IGA_CONTEXT_RELEASE_STR); LOAD_OR_ERROR(disassemble, IGA_DISASSEMBLE_STR); LOAD_OR_ERROR(statusToString, IGA_STATUS_TO_STRING_STR); #undef LOAD_OR_ERROR #undef STR #undef STR2 this->igaLib = std::move(iga); } }; IgaWrapper::IgaWrapper() : pimpl(std::make_unique()) { } IgaWrapper::~IgaWrapper() = default; bool IgaWrapper::tryDisassembleGenISA(const void *kernelPtr, uint32_t kernelSize, std::string &out) { if (false == tryLoadIga()) { messagePrinter->printf("Warning: couldn't load iga - kernel binaries won't be disassembled.\n"); return false; } iga_context_t context; iga_disassemble_options_t disassembleOptions = IGA_DISASSEMBLE_OPTIONS_INIT(); iga_status_t stat; stat = pimpl->igaLib.contextCreate(&pimpl->igaLib.OptsContext, &context); if (stat != 0) { messagePrinter->printf("Error while creating IGA Context! Error msg: %s", pimpl->igaLib.statusToString(stat)); return false; } char kernelText = '\0'; char *pKernelText = &kernelText; stat = pimpl->igaLib.disassemble(context, &disassembleOptions, kernelPtr, kernelSize, nullptr, nullptr, &pKernelText); if (stat != 0) { messagePrinter->printf("Error while disassembling with IGA!\nStatus msg: %s\n", pimpl->igaLib.statusToString(stat)); const iga_diagnostic_t *errors; uint32_t size = 100; pimpl->igaLib.contextGetErrors(context, &errors, &size); if (errors != nullptr) { messagePrinter->printf("Errors: %s\n", errors->message); } pimpl->igaLib.contextRelease(context); return false; } const iga_diagnostic_t *warnings; uint32_t warningsSize = 100; pimpl->igaLib.contextGetWarnings(context, &warnings, &warningsSize); if (warningsSize > 0 && warnings != nullptr) { messagePrinter->printf("Warnings: %s\n", warnings->message); } out = pKernelText; pimpl->igaLib.contextRelease(context); return true; } bool IgaWrapper::tryAssembleGenISA(const std::string &inAsm, std::string &outBinary) { if (false == tryLoadIga()) { messagePrinter->printf("Warning: couldn't load iga - kernel binaries won't be assembled.\n"); return false; } iga_context_t context; iga_status_t stat; iga_assemble_options_t assembleOptions = IGA_ASSEMBLE_OPTIONS_INIT(); stat = pimpl->igaLib.contextCreate(&pimpl->igaLib.OptsContext, &context); if (stat != 0) { messagePrinter->printf("Error while creating IGA Context! Error msg: %s", pimpl->igaLib.statusToString(stat)); return false; } uint32_t size = 0; void *pOutput = nullptr; stat = pimpl->igaLib.assemble(context, &assembleOptions, inAsm.c_str(), &pOutput, &size); if (stat != 0) { messagePrinter->printf("Error while assembling with IGA!\nStatus msg: %s\n", pimpl->igaLib.statusToString(stat)); const iga_diagnostic_t *errors; uint32_t size = 100; pimpl->igaLib.contextGetErrors(context, &errors, &size); if (errors != nullptr) { messagePrinter->printf("Errors: %s\n", errors->message); } pimpl->igaLib.contextRelease(context); return false; } const iga_diagnostic_t *warnings; uint32_t context_size; pimpl->igaLib.contextGetWarnings(context, &warnings, &context_size); if (context_size > 0 && warnings != nullptr) { messagePrinter->printf("Warnings: %s\n", warnings->message); } outBinary.assign(reinterpret_cast(pOutput), reinterpret_cast(pOutput) + size); pimpl->igaLib.contextRelease(context); return true; } bool IgaWrapper::tryLoadIga() { if (false == pimpl->igaLib.isLoaded()) { pimpl->loadIga(); } return pimpl->igaLib.isLoaded(); } void IgaWrapper::setGfxCore(GFXCORE_FAMILY core) { if (pimpl->igaGen == IGA_GEN_INVALID) { pimpl->igaGen = translateToIgaGen(core); } } void IgaWrapper::setProductFamily(PRODUCT_FAMILY product) { if (pimpl->igaGen == IGA_GEN_INVALID) { pimpl->igaGen = translateToIgaGen(product); } } bool IgaWrapper::isKnownPlatform() const { return pimpl->igaGen != IGA_GEN_INVALID; } compute-runtime-22.14.22890/shared/offline_compiler/source/decoder/iga_wrapper.h000066400000000000000000000021261422164147700275130ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "igfxfmid.h" #include #include class MessagePrinter; struct IgaWrapper { IgaWrapper(); MOCKABLE_VIRTUAL ~IgaWrapper(); IgaWrapper(IgaWrapper &) = delete; IgaWrapper(const IgaWrapper &&) = delete; IgaWrapper &operator=(const IgaWrapper &) = delete; IgaWrapper &operator=(IgaWrapper &&) = delete; MOCKABLE_VIRTUAL bool tryDisassembleGenISA(const void *kernelPtr, uint32_t kernelSize, std::string &out); MOCKABLE_VIRTUAL bool tryAssembleGenISA(const std::string &inAsm, std::string &outBinary); MOCKABLE_VIRTUAL void setGfxCore(GFXCORE_FAMILY core); MOCKABLE_VIRTUAL void setProductFamily(PRODUCT_FAMILY product); MOCKABLE_VIRTUAL bool isKnownPlatform() const; void setMessagePrinter(MessagePrinter &messagePrinter) { this->messagePrinter = &messagePrinter; } protected: MOCKABLE_VIRTUAL bool tryLoadIga(); struct Impl; std::unique_ptr pimpl; MessagePrinter *messagePrinter = nullptr; }; compute-runtime-22.14.22890/shared/offline_compiler/source/decoder/translate_platform.cpp000066400000000000000000000006011422164147700314430ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/offline_compiler/source/decoder/translate_platform_base.h" iga_gen_t translateToIgaGen(PRODUCT_FAMILY productFamily) { return translateToIgaGenBase(productFamily); } iga_gen_t translateToIgaGen(GFXCORE_FAMILY coreFamily) { return translateToIgaGenBase(coreFamily); } compute-runtime-22.14.22890/shared/offline_compiler/source/decoder/translate_platform_base.h000066400000000000000000000022271422164147700321100ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "igad.h" #include "igfxfmid.h" inline iga_gen_t translateToIgaGenBase(PRODUCT_FAMILY productFamily) { switch (productFamily) { default: return IGA_GEN_INVALID; case IGFX_BROADWELL: return IGA_GEN8; case IGFX_CHERRYVIEW: return IGA_GEN8lp; case IGFX_SKYLAKE: return IGA_GEN9; case IGFX_BROXTON: return IGA_GEN9lp; case IGFX_KABYLAKE: return IGA_GEN9p5; case IGFX_COFFEELAKE: return IGA_GEN9p5; case IGFX_ICELAKE: return IGA_GEN11; case IGFX_ICELAKE_LP: return IGA_GEN11; } } inline iga_gen_t translateToIgaGenBase(GFXCORE_FAMILY coreFamily) { switch (coreFamily) { default: return IGA_GEN_INVALID; case IGFX_GEN8_CORE: return IGA_GEN8; case IGFX_GEN9_CORE: return IGA_GEN9; case IGFX_GEN11_CORE: return IGA_GEN11; case IGFX_GEN11LP_CORE: return IGA_GEN11; } } iga_gen_t translateToIgaGen(PRODUCT_FAMILY productFamily); iga_gen_t translateToIgaGen(GFXCORE_FAMILY coreFamily); compute-runtime-22.14.22890/shared/offline_compiler/source/linux/000077500000000000000000000000001422164147700245735ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/offline_compiler/source/linux/os_library_ocloc_helper.cpp000066400000000000000000000004151422164147700321620ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/linux/os_library_linux.h" namespace NEO { namespace Linux { void adjustLibraryFlags(int &dlopenFlag) { } } // namespace Linux } // namespace NEO compute-runtime-22.14.22890/shared/offline_compiler/source/main.cpp000066400000000000000000000005471422164147700250720ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "ocloc_api.h" int main(int argc, const char *argv[]) { return oclocInvoke(argc, argv, 0, nullptr, nullptr, nullptr, 0, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr); } compute-runtime-22.14.22890/shared/offline_compiler/source/multi_command.cpp000066400000000000000000000162401422164147700267730ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/offline_compiler/source/multi_command.h" #include "shared/offline_compiler/source/ocloc_error_code.h" #include "shared/offline_compiler/source/ocloc_fatbinary.h" #include "shared/source/utilities/const_stringref.h" #include namespace NEO { int MultiCommand::singleBuild(const std::vector &args) { int retVal = OclocErrorCode::SUCCESS; if (requestedFatBinary(args, argHelper)) { retVal = buildFatBinary(args, argHelper); } else { std::unique_ptr pCompiler{OfflineCompiler::create(args.size(), args, true, retVal, argHelper)}; if (retVal == OclocErrorCode::SUCCESS) { retVal = buildWithSafetyGuard(pCompiler.get()); std::string &buildLog = pCompiler->getBuildLog(); if (buildLog.empty() == false) { argHelper->printf("%s\n", buildLog.c_str()); } } outFileName += ".bin"; } if (retVal == OclocErrorCode::SUCCESS) { if (!quiet) argHelper->printf("Build succeeded.\n"); } else { argHelper->printf("Build failed with error code: %d\n", retVal); } if (retVal == OclocErrorCode::SUCCESS) { outputFile << getCurrentDirectoryOwn(outDirForBuilds) + outFileName; } else { outputFile << "Unsuccesful build"; } outputFile << '\n'; return retVal; } MultiCommand *MultiCommand::create(const std::vector &args, int &retVal, OclocArgHelper *helper) { retVal = OclocErrorCode::SUCCESS; auto pMultiCommand = new MultiCommand(); if (pMultiCommand) { pMultiCommand->argHelper = helper; retVal = pMultiCommand->initialize(args); } if (retVal != OclocErrorCode::SUCCESS) { delete pMultiCommand; pMultiCommand = nullptr; } return pMultiCommand; } void MultiCommand::addAdditionalOptionsToSingleCommandLine(std::vector &singleLineWithArguments, size_t buildId) { bool hasOutDir = false; bool hasOutName = false; for (const auto &arg : singleLineWithArguments) { if (ConstStringRef("-out_dir") == arg) { hasOutDir = true; } else if (ConstStringRef("-output") == arg) { hasOutName = true; } } if (!hasOutDir) { singleLineWithArguments.push_back("-out_dir"); outDirForBuilds = OfflineCompiler::getFileNameTrunk(pathToCommandFile); singleLineWithArguments.push_back(outDirForBuilds); } if (!hasOutName) { singleLineWithArguments.push_back("-output"); outFileName = "build_no_" + std::to_string(buildId + 1); singleLineWithArguments.push_back(outFileName); } if (quiet) singleLineWithArguments.push_back("-q"); } int MultiCommand::initialize(const std::vector &args) { if (args[args.size() - 1] == "--help") { printHelp(); return -1; } for (size_t argIndex = 1; argIndex < args.size(); argIndex++) { const auto &currArg = args[argIndex]; const bool hasMoreArgs = (argIndex + 1 < args.size()); if (hasMoreArgs && ConstStringRef("multi") == currArg) { pathToCommandFile = args[++argIndex]; } else if (hasMoreArgs && ConstStringRef("-output_file_list") == currArg) { outputFileList = args[++argIndex]; } else if (ConstStringRef("-q") == currArg) { quiet = true; } else { argHelper->printf("Invalid option (arg %zu): %s\n", argIndex, currArg.c_str()); printHelp(); return OclocErrorCode::INVALID_COMMAND_LINE; } } //save file with builds arguments to vector of strings, line by line if (argHelper->fileExists(pathToCommandFile)) { argHelper->readFileToVectorOfStrings(pathToCommandFile, lines); if (lines.empty()) { argHelper->printf("Command file was empty.\n"); return OclocErrorCode::INVALID_FILE; } } else { argHelper->printf("Could not find/open file with builds argument.s\n"); return OclocErrorCode::INVALID_FILE; } runBuilds(args[0]); if (outputFileList != "") { argHelper->saveOutput(outputFileList, outputFile); } return showResults(); } void MultiCommand::runBuilds(const std::string &argZero) { for (size_t i = 0; i < lines.size(); ++i) { std::vector args = {argZero}; int retVal = splitLineInSeparateArgs(args, lines[i], i); if (retVal != OclocErrorCode::SUCCESS) { retValues.push_back(retVal); continue; } if (!quiet) { argHelper->printf("Command numer %zu: \n", i + 1); } addAdditionalOptionsToSingleCommandLine(args, i); retVal = singleBuild(args); retValues.push_back(retVal); } } void MultiCommand::printHelp() { argHelper->printf(R"===(Compiles multiple files using a config file. Usage: ocloc multi Input file containing a list of arguments for subsequent ocloc invocations. Expected format of each line inside such file is: '-file -device [compile_options]'. See 'ocloc compile --help' for available compile_options. Results of subsequent compilations will be dumped into a directory with name indentical file_name's base name. -output_file_list Name of optional file containing paths to outputs .bin files )==="); } int MultiCommand::splitLineInSeparateArgs(std::vector &qargs, const std::string &commandsLine, size_t numberOfBuild) { size_t start, end, argLen; for (size_t i = 0; i < commandsLine.length(); ++i) { const char &currChar = commandsLine[i]; if ('\"' == currChar) { start = i + 1; end = commandsLine.find('\"', start); } else if ('\'' == currChar) { start = i + 1; end = commandsLine.find('\'', start); } else if (' ' == currChar) { continue; } else { start = i; end = commandsLine.find(" ", start); end = (end == std::string::npos) ? commandsLine.length() : end; } if (end == std::string::npos) { argHelper->printf("One of the quotes is open in build number %zu\n", numberOfBuild + 1); return OclocErrorCode::INVALID_FILE; } argLen = end - start; i = end; qargs.push_back(commandsLine.substr(start, argLen)); } return OclocErrorCode::SUCCESS; } int MultiCommand::showResults() { int retValue = OclocErrorCode::SUCCESS; int indexRetVal = 0; for (int retVal : retValues) { retValue |= retVal; if (!quiet) { if (retVal != OclocErrorCode::SUCCESS) { argHelper->printf("Build command %d: failed. Error code: %d\n", indexRetVal, retVal); } else { argHelper->printf("Build command %d: successful\n", indexRetVal); } } indexRetVal++; } return retValue; } } // namespace NEO compute-runtime-22.14.22890/shared/offline_compiler/source/multi_command.h000066400000000000000000000031611422164147700264360ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/offline_compiler/source/decoder/binary_decoder.h" #include "shared/offline_compiler/source/decoder/binary_encoder.h" #include "shared/offline_compiler/source/offline_compiler.h" #include "shared/offline_compiler/source/utilities/get_current_dir.h" #include "shared/offline_compiler/source/utilities/safety_caller.h" #include "shared/source/os_interface/os_library.h" #include #include #include namespace NEO { class MultiCommand { public: MultiCommand &operator=(const MultiCommand &) = delete; MultiCommand(const MultiCommand &) = delete; MOCKABLE_VIRTUAL ~MultiCommand() = default; static MultiCommand *create(const std::vector &args, int &retVal, OclocArgHelper *helper); std::string outDirForBuilds; std::string outputFileList; protected: MultiCommand() = default; int initialize(const std::vector &args); int splitLineInSeparateArgs(std::vector &qargs, const std::string &command, size_t numberOfBuild); int showResults(); int singleBuild(const std::vector &args); void addAdditionalOptionsToSingleCommandLine(std::vector &, size_t buildId); void printHelp(); void runBuilds(const std::string &argZero); OclocArgHelper *argHelper = nullptr; std::vector retValues; std::vector lines; std::string outFileName; std::string pathToCommandFile; std::stringstream outputFile; bool quiet = false; }; } // namespace NEO compute-runtime-22.14.22890/shared/offline_compiler/source/ocloc_api.cpp000066400000000000000000000207361422164147700261000ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "ocloc_api.h" #include "shared/offline_compiler/source/decoder/binary_decoder.h" #include "shared/offline_compiler/source/decoder/binary_encoder.h" #include "shared/offline_compiler/source/multi_command.h" #include "shared/offline_compiler/source/ocloc_error_code.h" #include "shared/offline_compiler/source/ocloc_fatbinary.h" #include "shared/offline_compiler/source/ocloc_validator.h" #include "shared/offline_compiler/source/offline_compiler.h" #include "shared/offline_compiler/source/offline_linker.h" #include using namespace NEO; const char *help = R"===(ocloc is a tool for managing Intel Compute GPU device binary format. It can be used for generation (as part of 'compile' command) as well as manipulation (decoding/modifying - as part of 'disasm'/'asm' commands) of such binary files. Intel Compute GPU device binary is a format used by Intel Compute GPU runtime (aka NEO). Intel Compute GPU runtime will return this binary format when queried using clGetProgramInfo(..., CL_PROGRAM_BINARIES, ...). It will also honor this format as input to clCreateProgramWithBinary function call. ocloc does not require Intel GPU device to be present in the system nor does it depend on Intel Compute GPU runtime driver to be installed. It does however rely on the same set of compilers (IGC, common_clang) as the runtime driver. Usage: ocloc [--help] [] Available commands are listed below. Use 'ocloc --help' to get help about specific command. Commands: compile Compiles input to Intel Compute GPU device binary. link Links several IR files. disasm Disassembles Intel Compute GPU device binary. asm Assembles Intel Compute GPU device binary. multi Compiles multiple files using a config file. validate Validates Intel Compute GPU device binary. query Extracts versioning info. Default command (when none provided) is 'compile'. Examples: Compile file to Intel Compute GPU device binary (out = source_file_Gen9core.bin) ocloc -file source_file.cl -device skl Link two SPIR-V files. ocloc link -file sample1.spv -file sample2.spv -out_format LLVM_BC -out samples_merged.llvm_bc Disassemble Intel Compute GPU device binary ocloc disasm -file source_file_Gen9core.bin Assemble to Intel Compute GPU device binary (after above disasm) ocloc asm -out reassembled.bin Validate Intel Compute GPU device binary ocloc validate -file source_file_Gen9core.bin Extract driver version ocloc query OCL_DRIVER_VERSION )==="; extern "C" { void printOclocCmdLine(unsigned int numArgs, const char *argv[], std::unique_ptr &helper) { printf("Command was:"); bool useQuotes = false; for (auto i = 0u; i < numArgs; ++i) { const char *currArg = argv[i]; if (useQuotes) { printf(" \"%s\"", currArg); useQuotes = false; } else { printf(" %s", currArg); useQuotes = helper->areQuotesRequired(currArg); } } printf("\n"); } void printOclocOptionsReadFromFile(OfflineCompiler *pCompiler) { if (pCompiler) { std::string options = pCompiler->getOptionsReadFromFile(); if (options != "") { printf("Compiling options read from file were:\n%s\n", options.c_str()); } std::string internalOptions = pCompiler->getInternalOptionsReadFromFile(); if (internalOptions != "") { printf("Internal options read from file were:\n%s\n", internalOptions.c_str()); } } } int oclocInvoke(unsigned int numArgs, const char *argv[], const uint32_t numSources, const uint8_t **dataSources, const uint64_t *lenSources, const char **nameSources, const uint32_t numInputHeaders, const uint8_t **dataInputHeaders, const uint64_t *lenInputHeaders, const char **nameInputHeaders, uint32_t *numOutputs, uint8_t ***dataOutputs, uint64_t **lenOutputs, char ***nameOutputs) { auto helper = std::make_unique( numSources, dataSources, lenSources, nameSources, numInputHeaders, dataInputHeaders, lenInputHeaders, nameInputHeaders, numOutputs, dataOutputs, lenOutputs, nameOutputs); std::vector allArgs; if (numArgs > 1) { allArgs.assign(argv, argv + numArgs); } try { if (numArgs == 1 || (numArgs > 1 && (ConstStringRef("-h") == allArgs[1] || ConstStringRef("--help") == allArgs[1]))) { helper->printf("%s", help); return OclocErrorCode::SUCCESS; } else if (numArgs > 1 && ConstStringRef("disasm") == allArgs[1]) { BinaryDecoder disasm(helper.get()); int retVal = disasm.validateInput(allArgs); if (disasm.showHelp) { disasm.printHelp(); return retVal; } if (retVal == 0) { return disasm.decode(); } else { return retVal; } } else if (numArgs > 1 && ConstStringRef("asm") == allArgs[1]) { BinaryEncoder assembler(helper.get()); int retVal = assembler.validateInput(allArgs); if (assembler.showHelp) { assembler.printHelp(); return retVal; } if (retVal == 0) { return assembler.encode(); } else { return retVal; } } else if (numArgs > 1 && ConstStringRef("multi") == allArgs[1]) { int retValue = OclocErrorCode::SUCCESS; std::unique_ptr pMulti{(MultiCommand::create(allArgs, retValue, helper.get()))}; return retValue; } else if (requestedFatBinary(allArgs, helper.get())) { return buildFatBinary(allArgs, helper.get()); } else if (numArgs > 1 && ConstStringRef("validate") == allArgs[1]) { return NEO::Ocloc::validate(allArgs, helper.get()); } else if (numArgs > 1 && ConstStringRef("query") == allArgs[1]) { return OfflineCompiler::query(numArgs, allArgs, helper.get()); } else if (numArgs > 1 && ConstStringRef("link") == allArgs[1]) { int createResult{OclocErrorCode::SUCCESS}; const auto linker{OfflineLinker::create(numArgs, allArgs, createResult, helper.get())}; const auto linkingResult{linkWithSafetyGuard(linker.get())}; const auto buildLog = linker->getBuildLog(); if (!buildLog.empty()) { helper->printf("%s\n", buildLog.c_str()); } if (createResult == OclocErrorCode::SUCCESS && linkingResult == OclocErrorCode::SUCCESS) { helper->printf("Linker execution has succeeded!\n"); } return createResult | linkingResult; } else { int retVal = OclocErrorCode::SUCCESS; std::unique_ptr pCompiler{OfflineCompiler::create(numArgs, allArgs, true, retVal, helper.get())}; if (retVal == OclocErrorCode::SUCCESS) { retVal = buildWithSafetyGuard(pCompiler.get()); std::string buildLog = pCompiler->getBuildLog(); if (buildLog.empty() == false) { helper->printf("%s\n", buildLog.c_str()); } if (retVal == OclocErrorCode::SUCCESS) { if (!pCompiler->isQuiet()) helper->printf("Build succeeded.\n"); } else { helper->printf("Build failed with error code: %d\n", retVal); } } if (retVal != OclocErrorCode::SUCCESS) { printOclocOptionsReadFromFile(pCompiler.get()); printOclocCmdLine(numArgs, argv, helper); } return retVal; } } catch (const std::exception &e) { helper->printf("%s\n", e.what()); printOclocCmdLine(numArgs, argv, helper); return -1; } return -1; } int oclocFreeOutput(uint32_t *numOutputs, uint8_t ***dataOutputs, uint64_t **lenOutputs, char ***nameOutputs) { for (uint32_t i = 0; i < *numOutputs; i++) { delete[](*dataOutputs)[i]; delete[](*nameOutputs)[i]; } delete[](*dataOutputs); delete[](*lenOutputs); delete[](*nameOutputs); return 0; } int oclocVersion() { return static_cast(ocloc_version_t::OCLOC_VERSION_CURRENT); } } compute-runtime-22.14.22890/shared/offline_compiler/source/ocloc_api.h000066400000000000000000000075061422164147700255450ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #ifndef _OCLOC_API_H #define _OCLOC_API_H #if defined(__cplusplus) #pragma once #endif #include #ifndef OCLOC_MAKE_VERSION /// Generates ocloc API versions #define OCLOC_MAKE_VERSION(_major, _minor) ((_major << 16) | (_minor & 0x0000ffff)) #endif // OCLOC_MAKE_VERSION typedef enum _ocloc_version_t { OCLOC_VERSION_1_0 = OCLOC_MAKE_VERSION(1, 0), ///< version 1.0 OCLOC_VERSION_CURRENT = OCLOC_MAKE_VERSION(1, 0), ///< latest known version OCLOC_VERSION_FORCE_UINT32 = 0x7fffffff } ocloc_version_t; #ifdef _WIN32 #define SIGNATURE __declspec(dllexport) int __cdecl #else #define SIGNATURE int #endif extern "C" { /// Invokes ocloc API using C interface. Supported commands match /// the functionality of ocloc executable (check ocloc's "help" /// for reference : shared/offline_compiler/source/ocloc_api.cpp). /// /// numArgs and argv params represent the command line. /// Remaining params represent I/O. /// Output params should be freed using oclocFreeOutput when /// no longer needed. /// List and names of outputs match outputs of ocloc executable. /// /// \param numArgs is the number of arguments to pass to ocloc /// /// \param argv is an array of arguments to be passed to ocloc /// /// \param numSources is the number of in-memory representations /// of source files to be passed to ocloc /// /// \param dataSources is an array of in-memory representations /// of source files to be passed to ocloc /// /// \param lenSources is an array of sizes of in-memory representations /// of source files passed to ocloc as dataSources /// /// \param nameSources is an array of names of in-memory representations /// of source files passed to ocloc as dataSources /// /// \param numInputHeaders is the number of in-memory representations /// of header files to be passed to ocloc /// /// \param dataInputHeaders is an array of in-memory representations /// of header files to be passed to ocloc /// /// \param lenInputHeaders is an array of sizes of in-memory representations /// of header files passed to ocloc as dataInputHeaders /// /// \param nameInputHeaders is an array of names of in-memory representations /// of header files passed to ocloc as dataInputHeaders /// /// \param numOutputs returns the number of outputs /// /// \param dataOutputs returns an array of in-memory representations /// of output files /// /// \param lenOutputs returns an array of sizes of in-memory representations /// of output files /// /// \param nameOutputs returns an array of names of in-memory representations /// of output files. Special name stdout.log describes output that contains /// messages generated by ocloc (e.g. compiler errors/warnings) /// /// \returns 0 on succes. Returns non-0 in case of failure. SIGNATURE oclocInvoke(unsigned int numArgs, const char *argv[], const uint32_t numSources, const uint8_t **dataSources, const uint64_t *lenSources, const char **nameSources, const uint32_t numInputHeaders, const uint8_t **dataInputHeaders, const uint64_t *lenInputHeaders, const char **nameInputHeaders, uint32_t *numOutputs, uint8_t ***dataOutputs, uint64_t **lenOutputs, char ***nameOutputs); /// Frees results of oclocInvoke /// /// \param numOutputs is number of outputs as returned by oclocInvoke /// /// \param dataOutputs is array of outputs as returned by oclocInvoke /// /// \param lenOutputs is array of sizes of outputs as returned by oclocInvoke /// /// \param nameOutputs is array of names of outputs as returned by oclocInvoke /// /// \returns 0 on succes. Returns non-0 in case of failure. SIGNATURE oclocFreeOutput(uint32_t *numOutputs, uint8_t ***dataOutputs, uint64_t **lenOutputs, char ***nameOutputs); /// Returns the current version of oclock SIGNATURE oclocVersion(); } #endif //_OCLOC_API_H compute-runtime-22.14.22890/shared/offline_compiler/source/ocloc_arg_helper.cpp000066400000000000000000000333321422164147700274330ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "ocloc_arg_helper.h" #include "shared/source/helpers/file_io.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/string.h" #include "hw_cmds.h" #include #include #include void Source::toVectorOfStrings(std::vector &lines, bool replaceTabs) { std::string line; const char *file = reinterpret_cast(data); while (*file != '\0') { if (replaceTabs && *file == '\t') { line += ' '; } else if (*file == '\n') { if (!line.empty()) { lines.push_back(line); line = ""; } } else { line += *file; } file++; } if (!line.empty()) { lines.push_back(std::move(line)); } } Output::Output(const std::string &name, const void *data, const size_t &size) : name(name), size(size) { this->data = new uint8_t[size]; memcpy_s(reinterpret_cast(this->data), this->size, data, size); }; OclocArgHelper::OclocArgHelper(const uint32_t numSources, const uint8_t **dataSources, const uint64_t *lenSources, const char **nameSources, const uint32_t numInputHeaders, const uint8_t **dataInputHeaders, const uint64_t *lenInputHeaders, const char **nameInputHeaders, uint32_t *numOutputs, uint8_t ***dataOutputs, uint64_t **lenOutputs, char ***nameOutputs) : numOutputs(numOutputs), nameOutputs(nameOutputs), dataOutputs(dataOutputs), lenOutputs(lenOutputs), hasOutput(numOutputs != nullptr), messagePrinter(hasOutput), deviceProductTable({ #define NAMEDDEVICE(devId, product, ignored_devName) {devId, NEO::hardwarePrefix[NEO::product::hwInfo.platform.eProductFamily]}, #define DEVICE(devId, product) {devId, NEO::hardwarePrefix[NEO::product::hwInfo.platform.eProductFamily]}, #include "devices.inl" #undef DEVICE #undef NAMEDDEVICE {0u, std::string("")}}), deviceMap({ #define DEVICE_CONFIG_IDS_AND_REVISION(product, productConfig, deviceIds, revision_id) {product, &NEO::productConfig::hwInfo, &NEO::deviceIds, NEO::productConfig::setupFeatureAndWorkaroundTable, revision_id}, #define DEVICE_CONFIG_IDS(product, productConfig, deviceIds) {product, &NEO::productConfig::hwInfo, &NEO::deviceIds, NEO::productConfig::setupFeatureAndWorkaroundTable, NEO::productConfig::hwInfo.platform.usRevId}, #define DEVICE_CONFIG(product, productConfig) {product, &NEO::productConfig::hwInfo, nullptr, NEO::productConfig::setupFeatureAndWorkaroundTable, NEO::productConfig::hwInfo.platform.usRevId}, #include "product_config.inl" #undef DEVICE_CONFIG #undef DEVICE_CONFIG_IDS #undef DEVICE_CONFIG_IDS_AND_REVISION }) { for (uint32_t i = 0; i < numSources; ++i) { inputs.push_back(Source(dataSources[i], static_cast(lenSources[i]), nameSources[i])); } for (uint32_t i = 0; i < numInputHeaders; ++i) { headers.push_back(Source(dataInputHeaders[i], static_cast(lenInputHeaders[i]), nameInputHeaders[i])); } for (unsigned int family = 0; family < IGFX_MAX_CORE; ++family) { if (NEO::familyName[family] == nullptr) { continue; } insertGenNames(static_cast(family)); } std::sort(deviceMap.begin(), deviceMap.end(), compareConfigs); deviceMap.erase(std::unique(deviceMap.begin(), deviceMap.end(), isDuplicateConfig), deviceMap.end()); } OclocArgHelper::OclocArgHelper() : OclocArgHelper(0, nullptr, nullptr, nullptr, 0, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr) {} OclocArgHelper::~OclocArgHelper() { if (outputEnabled()) { saveOutput(oclocStdoutLogName, messagePrinter.getLog()); moveOutputs(); } } bool OclocArgHelper::fileExists(const std::string &filename) const { return sourceFileExists(filename) || ::fileExists(filename); } void OclocArgHelper::moveOutputs() { *numOutputs = static_cast(outputs.size()); *nameOutputs = new char *[outputs.size()]; *dataOutputs = new uint8_t *[outputs.size()]; *lenOutputs = new uint64_t[outputs.size()]; for (size_t i = 0; i < outputs.size(); ++i) { size_t size = outputs[i]->name.length() + 1; (*nameOutputs)[i] = new char[size]; strncpy_s((*nameOutputs)[i], size, outputs[i]->name.c_str(), outputs[i]->name.length() + 1); (*dataOutputs)[i] = outputs[i]->data; (*lenOutputs)[i] = outputs[i]->size; } } Source *OclocArgHelper::findSourceFile(const std::string &filename) { for (auto &source : inputs) { if (filename == source.name) { return &source; } } return nullptr; } bool OclocArgHelper::sourceFileExists(const std::string &filename) const { for (auto &input : inputs) { if (filename == input.name) { return true; } } return false; } std::vector OclocArgHelper::headersToVectorOfStrings() { std::vector lines; for (auto &header : headers) { header.toVectorOfStrings(lines, true); } return lines; } void OclocArgHelper::readFileToVectorOfStrings(const std::string &filename, std::vector &lines) { if (Source *s = findSourceFile(filename)) { s->toVectorOfStrings(lines); } else { ::readFileToVectorOfStrings(lines, filename); } } std::vector OclocArgHelper::readBinaryFile(const std::string &filename) { if (Source *s = findSourceFile(filename)) { return s->toBinaryVector(); } else { return ::readBinaryFile(filename); } } std::unique_ptr OclocArgHelper::loadDataFromFile(const std::string &filename, size_t &retSize) { if (Source *s = findSourceFile(filename)) { auto size = s->length; std::unique_ptr ret(new char[size]()); memcpy_s(ret.get(), size, s->data, s->length); retSize = s->length; return ret; } else { return ::loadDataFromFile(filename.c_str(), retSize); } } void OclocArgHelper::setDeviceInfoForFatbinaryTarget(const DeviceMapping &device) { deviceForFatbinary.hwInfo = device.hwInfo; deviceForFatbinary.setupFeatureAndWorkaroundTable = device.setupFeatureAndWorkaroundTable; deviceForFatbinary.revId = device.revId; deviceForFatbinary.deviceIds = device.deviceIds; } void OclocArgHelper::setHwInfoForFatbinaryTarget(NEO::HardwareInfo &hwInfo) { hwInfo = *deviceForFatbinary.hwInfo; deviceForFatbinary.setupFeatureAndWorkaroundTable(&hwInfo); hwInfo.platform.usRevId = deviceForFatbinary.revId; if (deviceForFatbinary.deviceIds) { hwInfo.platform.usDeviceID = deviceForFatbinary.deviceIds->front(); } } bool OclocArgHelper::getHwInfoForProductConfig(uint32_t config, NEO::HardwareInfo &hwInfo) { bool retVal = false; if (config == UNKNOWN_ISA) { return retVal; } for (auto &deviceConfig : deviceMap) { if (deviceConfig.config == config) { hwInfo = *deviceConfig.hwInfo; deviceConfig.setupFeatureAndWorkaroundTable(&hwInfo); hwInfo.platform.usRevId = deviceConfig.revId; if (deviceConfig.deviceIds) { hwInfo.platform.usDeviceID = deviceConfig.deviceIds->front(); } retVal = true; return retVal; } } return retVal; } void OclocArgHelper::getProductConfigsForGfxCoreFamily(GFXCORE_FAMILY core, std::vector &out) { for (auto &deviceConfig : deviceMap) { if (deviceConfig.hwInfo->platform.eRenderCoreFamily == core) { out.push_back(deviceConfig); } } } void OclocArgHelper::saveOutput(const std::string &filename, const void *pData, const size_t &dataSize) { if (outputEnabled()) { addOutput(filename, pData, dataSize); } else { writeDataToFile(filename.c_str(), pData, dataSize); } } void OclocArgHelper::saveOutput(const std::string &filename, const std::ostream &stream) { std::stringstream ss; ss << stream.rdbuf(); if (outputEnabled()) { addOutput(filename, ss.str().c_str(), ss.str().length()); } else { std::ofstream file(filename); file << ss.str(); } } std::string OclocArgHelper::returnProductNameForDevice(unsigned short deviceId) { std::string res = ""; for (int i = 0; deviceProductTable[i].deviceId != 0; i++) { if (deviceProductTable[i].deviceId == deviceId) { res = deviceProductTable[i].product; } } return res; } std::vector &OclocArgHelper::getAllSupportedDeviceConfigs() { return deviceMap; } const std::string OclocArgHelper::parseProductConfigFromValue(PRODUCT_CONFIG config) { auto configValue = static_cast(config); std::stringstream stringConfig; uint32_t major = (configValue & 0xff0000) >> 16; uint32_t minor = (configValue & 0x00ff00) >> 8; uint32_t revision = configValue & 0x0000ff; stringConfig << major << "." << minor << "." << revision; return stringConfig.str(); } std::vector OclocArgHelper::getAllSupportedProductConfigs() { std::vector allConfigs; for (auto &deviceConfig : deviceMap) { allConfigs.push_back(deviceConfig.config); } std::sort(allConfigs.begin(), allConfigs.end()); return allConfigs; } int OclocArgHelper::parseProductConfigFromString(const std::string &device, size_t begin, size_t end) { if (begin == end) { return CONFIG_STATUS::MISMATCHED_VALUE; } if (end == std::string::npos) { if (!std::all_of(device.begin() + begin, device.end(), (::isdigit))) { return CONFIG_STATUS::MISMATCHED_VALUE; } return std::stoi(device.substr(begin, device.size() - begin)); } else { if (!std::all_of(device.begin() + begin, device.begin() + end, (::isdigit))) { return CONFIG_STATUS::MISMATCHED_VALUE; } return std::stoi(device.substr(begin, end - begin)); } } std::vector OclocArgHelper::getMajorMinorRevision(const std::string &device) { std::vector numeration; auto major_pos = device.find("."); auto major = parseProductConfigFromString(device, 0, major_pos); if (major == CONFIG_STATUS::MISMATCHED_VALUE) { return {}; } numeration.push_back(major); if (major_pos == std::string::npos) { return numeration; } auto minor_pos = device.find(".", ++major_pos); auto minor = parseProductConfigFromString(device, major_pos, minor_pos); if (minor == CONFIG_STATUS::MISMATCHED_VALUE) { return {}; } numeration.push_back(minor); if (minor_pos == std::string::npos) { return numeration; } auto revision = parseProductConfigFromString(device, minor_pos + 1, device.size()); if (revision == CONFIG_STATUS::MISMATCHED_VALUE) { return {}; } numeration.push_back(revision); return numeration; } uint32_t OclocArgHelper::getProductConfig(std::vector &numeration) { uint32_t config = 0x0; config = numeration.at(0) << 16; if (numeration.size() > 1) { config |= (numeration.at(1) << 8); } if (numeration.size() > 2) { config |= numeration.at(2); } return config; } uint32_t OclocArgHelper::getMaskForConfig(std::vector &numeration) { uint32_t mask = 0xffffff; if (numeration.size() == 1) { mask = 0xff0000; } else if (numeration.size() == 2) { mask = 0xffff00; } return mask; } bool OclocArgHelper::isGen(const std::string &device) { std::string buf(device); std::transform(buf.begin(), buf.end(), buf.begin(), ::tolower); auto it = genIGFXMap.find(buf); return it == genIGFXMap.end() ? false : true; } unsigned int OclocArgHelper::returnIGFXforGen(const std::string &device) { std::string buf(device); std::transform(buf.begin(), buf.end(), buf.begin(), ::tolower); auto it = genIGFXMap.find(buf); if (it == genIGFXMap.end()) return 0; return it->second; } bool OclocArgHelper::areQuotesRequired(const std::string_view &argName) { return argName == "-options" || argName == "-internal_options"; } PRODUCT_CONFIG OclocArgHelper::findConfigMatch(const std::string &device, bool firstAppearance) { auto numeration = getMajorMinorRevision(device); if (numeration.empty()) { return PRODUCT_CONFIG::UNKNOWN_ISA; } std::vector allMatchedConfigs; std::vector allConfigs = getAllSupportedProductConfigs(); auto configValue = getProductConfig(numeration); uint32_t mask = getMaskForConfig(numeration); if (!firstAppearance) { // find last appearance std::reverse(allConfigs.begin(), allConfigs.end()); } for (auto &productConfig : allConfigs) { uint32_t value = static_cast(productConfig) & mask; if (value == configValue) { return productConfig; } } return PRODUCT_CONFIG::UNKNOWN_ISA; } void OclocArgHelper::insertGenNames(GFXCORE_FAMILY family) { std::string genName = NEO::familyName[family]; std::transform(genName.begin(), genName.end(), genName.begin(), ::tolower); genIGFXMap.insert({genName, family}); auto findCore = genName.find("_core"); if (findCore != std::string::npos) { genName = genName.substr(0, findCore); genIGFXMap.insert({genName, family}); } auto findUnderline = genName.find("_"); if (findUnderline != std::string::npos) { genName.erase(std::remove(genName.begin(), genName.end(), '_'), genName.end()); genIGFXMap.insert({genName, family}); } } compute-runtime-22.14.22890/shared/offline_compiler/source/ocloc_arg_helper.h000066400000000000000000000132061422164147700270760ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/offline_compiler/source/decoder/helper.h" #include "shared/source/helpers/hw_info.h" #include "device_ids_configs.h" #include "hw_cmds.h" #include "platforms.h" #include #include #include #include #include #include #pragma once static constexpr auto *oclocStdoutLogName = "stdout.log"; struct Source { const uint8_t *data; const size_t length; const char *name; Source(const uint8_t *data, const size_t length, const char *name) : data(data), length(length), name(name){}; void toVectorOfStrings(std::vector &lines, bool replaceTabs = false); inline std::vector toBinaryVector() { return std::vector(data, data + length); }; }; struct Output { std::string name; uint8_t *data; const size_t size; Output(const std::string &name, const void *data, const size_t &size); }; struct DeviceProduct { unsigned short deviceId; std::string product; }; struct DeviceMapping { PRODUCT_CONFIG config; const NEO::HardwareInfo *hwInfo; const std::vector *deviceIds; void (*setupFeatureAndWorkaroundTable)(NEO::HardwareInfo *hwInfo); unsigned int revId; bool operator==(const DeviceMapping &rhs) { return config == rhs.config && hwInfo == rhs.hwInfo && setupFeatureAndWorkaroundTable == rhs.setupFeatureAndWorkaroundTable && revId == rhs.revId; } }; class OclocArgHelper { protected: std::vector inputs, headers; std::vector outputs; uint32_t *numOutputs = nullptr; char ***nameOutputs = nullptr; uint8_t ***dataOutputs = nullptr; uint64_t **lenOutputs = nullptr; bool hasOutput = false; MessagePrinter messagePrinter; const std::vector deviceProductTable; std::vector deviceMap; DeviceMapping deviceForFatbinary; std::map genIGFXMap; bool fatBinary = false; void moveOutputs(); Source *findSourceFile(const std::string &filename); bool sourceFileExists(const std::string &filename) const; inline void addOutput(const std::string &filename, const void *data, const size_t &size) { outputs.push_back(new Output(filename, data, size)); } static bool compareConfigs(DeviceMapping deviceMap0, DeviceMapping deviceMap1) { return deviceMap0.config < deviceMap1.config; } static bool isDuplicateConfig(DeviceMapping deviceMap0, DeviceMapping deviceMap1) { return deviceMap0.config == deviceMap1.config; } public: OclocArgHelper(); OclocArgHelper(const uint32_t numSources, const uint8_t **dataSources, const uint64_t *lenSources, const char **nameSources, const uint32_t numInputHeaders, const uint8_t **dataInputHeaders, const uint64_t *lenInputHeaders, const char **nameInputHeaders, uint32_t *numOutputs, uint8_t ***dataOutputs, uint64_t **lenOutputs, char ***nameOutputs); virtual ~OclocArgHelper(); enum CONFIG_STATUS { MISMATCHED_VALUE = -1, }; MOCKABLE_VIRTUAL bool fileExists(const std::string &filename) const; int parseProductConfigFromString(const std::string &device, size_t begin, size_t end); const std::string parseProductConfigFromValue(PRODUCT_CONFIG config); bool getHwInfoForProductConfig(uint32_t config, NEO::HardwareInfo &hwInfo); void getProductConfigsForGfxCoreFamily(GFXCORE_FAMILY core, std::vector &out); void setDeviceInfoForFatbinaryTarget(const DeviceMapping &device); void setHwInfoForFatbinaryTarget(NEO::HardwareInfo &hwInfo); std::vector getAllSupportedProductConfigs(); std::vector &getAllSupportedDeviceConfigs(); std::vector getMajorMinorRevision(const std::string &device); uint32_t getProductConfig(std::vector &numeration); uint32_t getMaskForConfig(std::vector &numeration); PRODUCT_CONFIG findConfigMatch(const std::string &device, bool firstAppearance); void insertGenNames(GFXCORE_FAMILY family); std::vector headersToVectorOfStrings(); MOCKABLE_VIRTUAL void readFileToVectorOfStrings(const std::string &filename, std::vector &lines); MOCKABLE_VIRTUAL std::vector readBinaryFile(const std::string &filename); MOCKABLE_VIRTUAL std::unique_ptr loadDataFromFile(const std::string &filename, size_t &retSize); bool outputEnabled() const { return hasOutput; } bool hasHeaders() const { return headers.size() > 0; } const std::vector &getHeaders() const { return headers; } void setFatbinary(bool isFatBinary) { this->fatBinary = isFatBinary; } bool isFatbinary() { return fatBinary; } MOCKABLE_VIRTUAL void saveOutput(const std::string &filename, const void *pData, const size_t &dataSize); void saveOutput(const std::string &filename, const std::ostream &stream); MessagePrinter &getPrinterRef() { return messagePrinter; } void printf(const char *message) { messagePrinter.printf(message); } template void printf(const char *format, Args... args) { messagePrinter.printf(format, std::forward(args)...); } std::string returnProductNameForDevice(unsigned short deviceId); bool isGen(const std::string &device); unsigned int returnIGFXforGen(const std::string &device); bool areQuotesRequired(const std::string_view &argName); }; compute-runtime-22.14.22890/shared/offline_compiler/source/ocloc_error_code.h000066400000000000000000000006211422164147700271060ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once namespace NEO::OclocErrorCode { enum { SUCCESS = 0, OUT_OF_HOST_MEMORY = -6, BUILD_PROGRAM_FAILURE = -11, INVALID_DEVICE = -33, INVALID_PROGRAM = -44, INVALID_COMMAND_LINE = -5150, INVALID_FILE = -5151, COMPILATION_CRASH = -5152, }; } // namespace NEO::OclocErrorCodecompute-runtime-22.14.22890/shared/offline_compiler/source/ocloc_fatbinary.cpp000066400000000000000000000506531422164147700273070ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/offline_compiler/source/ocloc_fatbinary.h" #include "shared/offline_compiler/source/ocloc_error_code.h" #include "shared/offline_compiler/source/utilities/safety_caller.h" #include "shared/source/compiler_interface/intermediate_representations.h" #include "shared/source/device_binary_format/elf/elf_encoder.h" #include "shared/source/device_binary_format/elf/ocl_elf.h" #include "shared/source/helpers/file_io.h" #include "shared/source/helpers/hw_info.h" #include "igfxfmid.h" #include #include #include namespace NEO { std::vector getAllMatchedConfigs(const std::string device, OclocArgHelper *argHelper) { std::vector allMatchedConfigs; auto numeration = argHelper->getMajorMinorRevision(device); if (numeration.empty()) { return {}; } auto config = argHelper->getProductConfig(numeration); std::vector allConfigs = argHelper->getAllSupportedProductConfigs(); uint32_t mask = argHelper->getMaskForConfig(numeration); for (auto &productConfig : allConfigs) { auto prod = static_cast(productConfig) & mask; if (config == prod) { allMatchedConfigs.push_back(productConfig); } } return allMatchedConfigs; } bool requestedFatBinary(const std::vector &args, OclocArgHelper *helper) { for (size_t argIndex = 1; argIndex < args.size(); argIndex++) { const auto &currArg = args[argIndex]; const bool hasMoreArgs = (argIndex + 1 < args.size()); if ((ConstStringRef("-device") == currArg) && hasMoreArgs) { ConstStringRef deviceArg(args[argIndex + 1]); auto products = getAllMatchedConfigs(deviceArg.str(), helper); if (products.size() > 1) { return true; } return deviceArg.contains("*") || deviceArg.contains("-") || deviceArg.contains(",") || helper->isGen(deviceArg.str()); } } return false; } std::vector getAllSupportedTargetPlatforms() { return std::vector{ALL_SUPPORTED_PRODUCT_FAMILIES}; } std::vector toProductNames(const std::vector &productIds) { std::vector ret; for (auto prodId : productIds) { ret.push_back(ConstStringRef(hardwarePrefix[prodId], strlen(hardwarePrefix[prodId]))); } return ret; } PRODUCT_FAMILY asProductId(ConstStringRef product, const std::vector &allSupportedPlatforms) { for (auto &family : allSupportedPlatforms) { if (product == hardwarePrefix[family]) { return family; } } return IGFX_UNKNOWN; } std::vector getProductConfigsForOpenRange(ConstStringRef openRange, OclocArgHelper *argHelper, bool rangeTo) { std::vector requestedConfigs; std::vector allSupportedDeviceConfigs = argHelper->getAllSupportedDeviceConfigs(); if (argHelper->isGen(openRange.str())) { std::vector coreIdList; auto coreId = argHelper->returnIGFXforGen(openRange.str()); coreIdList.push_back(static_cast(coreId)); if (rangeTo) { auto coreId = coreIdList.back(); unsigned int coreIt = IGFX_UNKNOWN_CORE; ++coreIt; while (coreIt <= static_cast(coreId)) { argHelper->getProductConfigsForGfxCoreFamily(static_cast(coreIt), requestedConfigs); ++coreIt; } } else { unsigned int coreIt = coreIdList.front(); while (coreIt < static_cast(IGFX_MAX_CORE)) { argHelper->getProductConfigsForGfxCoreFamily(static_cast(coreIt), requestedConfigs); ++coreIt; } } } else { auto productConfig = argHelper->findConfigMatch(openRange.str(), !rangeTo); if (productConfig == PRODUCT_CONFIG::UNKNOWN_ISA) { argHelper->printf("Unknown device : %s\n", openRange.str().c_str()); return {}; } auto configIt = std::find_if(allSupportedDeviceConfigs.begin(), allSupportedDeviceConfigs.end(), [&cf = productConfig](const DeviceMapping &c) -> bool { return cf == c.config; }); if (rangeTo) { for (auto &deviceConfig : allSupportedDeviceConfigs) { if (deviceConfig.config <= productConfig) { requestedConfigs.push_back(deviceConfig); } } } else { requestedConfigs.insert(requestedConfigs.end(), configIt, allSupportedDeviceConfigs.end()); } } return requestedConfigs; } std::vector getProductConfigsForClosedRange(ConstStringRef rangeFrom, ConstStringRef rangeTo, OclocArgHelper *argHelper) { std::vector requestedConfigs; std::vector allSupportedDeviceConfigs = argHelper->getAllSupportedDeviceConfigs(); if (argHelper->isGen(rangeFrom.str())) { if (false == argHelper->isGen(rangeTo.str())) { argHelper->printf("Ranges mixing configs and architecture is not supported, should be architectureFrom-architectureTo or configFrom-configTo\n"); return {}; } auto coreFrom = argHelper->returnIGFXforGen(rangeFrom.str()); auto coreTo = argHelper->returnIGFXforGen(rangeTo.str()); if (static_cast(coreFrom) > static_cast(coreTo)) { std::swap(coreFrom, coreTo); } while (coreFrom <= coreTo) { argHelper->getProductConfigsForGfxCoreFamily(static_cast(coreFrom), requestedConfigs); coreFrom = static_cast(static_cast(coreFrom) + 1); } } else { auto configFrom = argHelper->findConfigMatch(rangeFrom.str(), true); if (configFrom == PRODUCT_CONFIG::UNKNOWN_ISA) { argHelper->printf("Unknown device range : %s\n", rangeFrom.str().c_str()); return {}; } auto configTo = argHelper->findConfigMatch(rangeTo.str(), false); if (configTo == PRODUCT_CONFIG::UNKNOWN_ISA) { argHelper->printf("Unknown device range : %s\n", rangeTo.str().c_str()); return {}; } if (configFrom > configTo) { configFrom = argHelper->findConfigMatch(rangeTo.str(), true); configTo = argHelper->findConfigMatch(rangeFrom.str(), false); } for (auto &deviceConfig : allSupportedDeviceConfigs) { if (deviceConfig.config >= configFrom && deviceConfig.config <= configTo) { requestedConfigs.push_back(deviceConfig); } } } return requestedConfigs; } std::vector getPlatformsForClosedRange(ConstStringRef rangeFrom, ConstStringRef rangeTo, PRODUCT_FAMILY platformFrom, OclocArgHelper *argHelper) { std::vector requestedPlatforms; std::vector allSupportedPlatforms = getAllSupportedTargetPlatforms(); auto platformTo = asProductId(rangeTo, allSupportedPlatforms); if (IGFX_UNKNOWN == platformTo) { argHelper->printf("Unknown device : %s\n", rangeTo.str().c_str()); return {}; } if (platformFrom > platformTo) { std::swap(platformFrom, platformTo); } auto from = std::find(allSupportedPlatforms.begin(), allSupportedPlatforms.end(), platformFrom); auto to = std::find(allSupportedPlatforms.begin(), allSupportedPlatforms.end(), platformTo) + 1; requestedPlatforms.insert(requestedPlatforms.end(), from, to); return toProductNames(requestedPlatforms); } std::vector getPlatformsForOpenRange(ConstStringRef openRange, PRODUCT_FAMILY prodId, OclocArgHelper *argHelper, bool rangeTo) { std::vector requestedPlatforms; std::vector allSupportedPlatforms = getAllSupportedTargetPlatforms(); auto prodIt = std::find(allSupportedPlatforms.begin(), allSupportedPlatforms.end(), prodId); assert(prodIt != allSupportedPlatforms.end()); if (rangeTo) { requestedPlatforms.insert(requestedPlatforms.end(), allSupportedPlatforms.begin(), prodIt + 1); } else { requestedPlatforms.insert(requestedPlatforms.end(), prodIt, allSupportedPlatforms.end()); } return toProductNames(requestedPlatforms); } std::vector getProductConfigsForSpecificTargets(CompilerOptions::TokenizedString targets, OclocArgHelper *argHelper) { std::vector requestedConfigs; std::vector allSupportedDeviceConfigs = argHelper->getAllSupportedDeviceConfigs(); for (auto &target : targets) { if (argHelper->isGen(target.str())) { auto coreId = argHelper->returnIGFXforGen(target.str()); argHelper->getProductConfigsForGfxCoreFamily(static_cast(coreId), requestedConfigs); } else { auto configFirstEl = argHelper->findConfigMatch(target.str(), true); if (configFirstEl == PRODUCT_CONFIG::UNKNOWN_ISA) { argHelper->printf("Unknown device range : %s\n", target.str().c_str()); return {}; } auto configLastEl = argHelper->findConfigMatch(target.str(), false); for (auto &deviceConfig : allSupportedDeviceConfigs) { if (deviceConfig.config >= configFirstEl && deviceConfig.config <= configLastEl) { requestedConfigs.push_back(deviceConfig); } } } } return requestedConfigs; } std::vector getPlatformsForSpecificTargets(CompilerOptions::TokenizedString targets, OclocArgHelper *argHelper) { std::vector requestedPlatforms; std::vector allSupportedPlatforms = getAllSupportedTargetPlatforms(); for (auto &target : targets) { auto prodId = asProductId(target, allSupportedPlatforms); if (IGFX_UNKNOWN == prodId) { argHelper->printf("Unknown device : %s\n", target.str().c_str()); return {}; } requestedPlatforms.push_back(prodId); } return toProductNames(requestedPlatforms); } bool isDeviceWithPlatformAbbreviation(ConstStringRef deviceArg, OclocArgHelper *argHelper) { std::vector allSupportedPlatforms = getAllSupportedTargetPlatforms(); PRODUCT_FAMILY prodId = IGFX_UNKNOWN; auto sets = CompilerOptions::tokenize(deviceArg, ','); if (sets[0].contains("-")) { auto range = CompilerOptions::tokenize(deviceArg, '-'); prodId = asProductId(range[0], allSupportedPlatforms); } else { prodId = asProductId(sets[0], allSupportedPlatforms); } return prodId != IGFX_UNKNOWN; } std::vector getTargetPlatformsForFatbinary(ConstStringRef deviceArg, OclocArgHelper *argHelper) { std::vector allSupportedPlatforms = getAllSupportedTargetPlatforms(); std::vector retVal; auto sets = CompilerOptions::tokenize(deviceArg, ','); if (sets[0].contains("-")) { auto range = CompilerOptions::tokenize(deviceArg, '-'); if (range.size() > 2) { argHelper->printf("Invalid range : %s - should be from-to or -to or from-\n", sets[0].str().c_str()); return {}; } auto prodId = asProductId(range[0], allSupportedPlatforms); if (range.size() == 1) { bool rangeTo = ('-' == sets[0][0]); retVal = getPlatformsForOpenRange(range[0], prodId, argHelper, rangeTo); } else { retVal = getPlatformsForClosedRange(range[0], range[1], prodId, argHelper); } } else { retVal = getPlatformsForSpecificTargets(sets, argHelper); } return retVal; } std::vector getTargetConfigsForFatbinary(ConstStringRef deviceArg, OclocArgHelper *argHelper) { if (deviceArg == "*") { return argHelper->getAllSupportedDeviceConfigs(); } std::vector retVal; auto sets = CompilerOptions::tokenize(deviceArg, ','); if (sets[0].contains("-")) { auto range = CompilerOptions::tokenize(deviceArg, '-'); if (range.size() > 2) { argHelper->printf("Invalid range : %s - should be from-to or -to or from-\n", sets[0].str().c_str()); return {}; } if (range.size() == 1) { bool rangeTo = ('-' == sets[0][0]); retVal = getProductConfigsForOpenRange(range[0], argHelper, rangeTo); } else { retVal = getProductConfigsForClosedRange(range[0], range[1], argHelper); } } else { retVal = getProductConfigsForSpecificTargets(sets, argHelper); } return retVal; } int buildFatBinaryForTarget(int retVal, const std::vector &argsCopy, std::string pointerSize, Ar::ArEncoder &fatbinary, OfflineCompiler *pCompiler, OclocArgHelper *argHelper, const std::string &deviceConfig) { std::string product = hardwarePrefix[pCompiler->getHardwareInfo().platform.eProductFamily]; auto stepping = pCompiler->getHardwareInfo().platform.usRevId; if (retVal == 0) { retVal = buildWithSafetyGuard(pCompiler); std::string buildLog = pCompiler->getBuildLog(); if (buildLog.empty() == false) { argHelper->printf("%s\n", buildLog.c_str()); } if (retVal == 0) { if (!pCompiler->isQuiet()) argHelper->printf("Build succeeded for : %s.\n", deviceConfig.c_str()); } else { argHelper->printf("Build failed for : %s with error code: %d\n", deviceConfig.c_str(), retVal); argHelper->printf("Command was:"); for (const auto &arg : argsCopy) argHelper->printf(" %s", arg.c_str()); argHelper->printf("\n"); } } if (retVal) { return retVal; } fatbinary.appendFileEntry(pointerSize + "." + product + "." + std::to_string(stepping), pCompiler->getPackedDeviceBinaryOutput()); return retVal; } int buildFatBinary(const std::vector &args, OclocArgHelper *argHelper) { std::string pointerSizeInBits = (sizeof(void *) == 4) ? "32" : "64"; size_t deviceArgIndex = -1; std::string inputFileName = ""; std::string outputFileName = ""; std::string outputDirectory = ""; bool spirvInput = false; bool excludeIr = false; std::vector argsCopy(args); for (size_t argIndex = 1; argIndex < args.size(); argIndex++) { const auto &currArg = args[argIndex]; const bool hasMoreArgs = (argIndex + 1 < args.size()); if ((ConstStringRef("-device") == currArg) && hasMoreArgs) { deviceArgIndex = argIndex + 1; ++argIndex; } else if ((CompilerOptions::arch32bit == currArg) || (ConstStringRef("-32") == currArg)) { pointerSizeInBits = "32"; } else if ((CompilerOptions::arch64bit == currArg) || (ConstStringRef("-64") == currArg)) { pointerSizeInBits = "64"; } else if ((ConstStringRef("-file") == currArg) && hasMoreArgs) { inputFileName = args[argIndex + 1]; ++argIndex; } else if ((ConstStringRef("-output") == currArg) && hasMoreArgs) { outputFileName = args[argIndex + 1]; ++argIndex; } else if ((ConstStringRef("-out_dir") == currArg) && hasMoreArgs) { outputDirectory = args[argIndex + 1]; ++argIndex; } else if (ConstStringRef("-exclude_ir") == currArg) { excludeIr = true; } else if (ConstStringRef("-spirv_input") == currArg) { spirvInput = true; } } const bool shouldPreserveGenericIr = spirvInput && !excludeIr; if (shouldPreserveGenericIr) { argsCopy.push_back("-exclude_ir"); } Ar::ArEncoder fatbinary(true); if (isDeviceWithPlatformAbbreviation(ConstStringRef(args[deviceArgIndex]), argHelper)) { std::vector targetPlatforms; targetPlatforms = getTargetPlatformsForFatbinary(ConstStringRef(args[deviceArgIndex]), argHelper); if (targetPlatforms.empty()) { argHelper->printf("Failed to parse target devices from : %s\n", args[deviceArgIndex].c_str()); return 1; } for (auto &targetPlatform : targetPlatforms) { int retVal = 0; argsCopy[deviceArgIndex] = targetPlatform.str(); std::unique_ptr pCompiler{OfflineCompiler::create(argsCopy.size(), argsCopy, false, retVal, argHelper)}; if (OclocErrorCode::SUCCESS != retVal) { argHelper->printf("Error! Couldn't create OfflineCompiler. Exiting.\n"); return retVal; } std::string product = hardwarePrefix[pCompiler->getHardwareInfo().platform.eProductFamily]; auto stepping = pCompiler->getHardwareInfo().platform.usRevId; auto targetPlatforms = product + "." + std::to_string(stepping); retVal = buildFatBinaryForTarget(retVal, argsCopy, pointerSizeInBits, fatbinary, pCompiler.get(), argHelper, targetPlatforms); if (retVal) { return retVal; } } } else { std::vector targetConfigs; targetConfigs = getTargetConfigsForFatbinary(ConstStringRef(args[deviceArgIndex]), argHelper); if (targetConfigs.empty()) { argHelper->printf("Failed to parse target devices from : %s\n", args[deviceArgIndex].c_str()); return 1; } for (auto &targetConfig : targetConfigs) { int retVal = 0; argHelper->setFatbinary(true); argHelper->setDeviceInfoForFatbinaryTarget(targetConfig); std::unique_ptr pCompiler{OfflineCompiler::create(argsCopy.size(), argsCopy, false, retVal, argHelper)}; if (OclocErrorCode::SUCCESS != retVal) { argHelper->printf("Error! Couldn't create OfflineCompiler. Exiting.\n"); return retVal; } auto targetConfigStr = argHelper->parseProductConfigFromValue(targetConfig.config); retVal = buildFatBinaryForTarget(retVal, argsCopy, pointerSizeInBits, fatbinary, pCompiler.get(), argHelper, targetConfigStr); if (retVal) { return retVal; } } } if (shouldPreserveGenericIr) { const auto errorCode = appendGenericIr(fatbinary, inputFileName, argHelper); if (errorCode != OclocErrorCode::SUCCESS) { argHelper->printf("Error! Couldn't append generic IR file!\n"); return errorCode; } } auto fatbinaryData = fatbinary.encode(); std::string fatbinaryFileName = outputFileName; if (outputFileName.empty() && (false == inputFileName.empty())) { fatbinaryFileName = OfflineCompiler::getFileNameTrunk(inputFileName) + ".ar"; } if (false == outputDirectory.empty()) { fatbinaryFileName = outputDirectory + "/" + outputFileName; } argHelper->saveOutput(fatbinaryFileName, fatbinaryData.data(), fatbinaryData.size()); return 0; } int appendGenericIr(Ar::ArEncoder &fatbinary, const std::string &inputFile, OclocArgHelper *argHelper) { std::size_t fileSize = 0; std::unique_ptr fileContents = argHelper->loadDataFromFile(inputFile, fileSize); if (fileSize == 0) { argHelper->printf("Error! Couldn't read input file!\n"); return OclocErrorCode::INVALID_FILE; } const auto ir = ArrayRef::fromAny(fileContents.get(), fileSize); if (!isSpirVBitcode(ir)) { argHelper->printf("Error! Input file is not in supported generic IR format! " "Currently supported format is SPIR-V.\n"); return OclocErrorCode::INVALID_FILE; } const auto encodedElf = createEncodedElfWithSpirv(ir); ArrayRef genericIrFile{encodedElf.data(), encodedElf.size()}; fatbinary.appendFileEntry("generic_ir", genericIrFile); return OclocErrorCode::SUCCESS; } std::vector createEncodedElfWithSpirv(const ArrayRef &spirv) { using namespace NEO::Elf; ElfEncoder elfEncoder; elfEncoder.getElfFileHeader().type = ET_OPENCL_OBJECTS; elfEncoder.appendSection(SHT_OPENCL_SPIRV, SectionNamesOpenCl::spirvObject, spirv); return elfEncoder.encode(); } } // namespace NEOcompute-runtime-22.14.22890/shared/offline_compiler/source/ocloc_fatbinary.h000066400000000000000000000056201422164147700267460ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/offline_compiler/source/offline_compiler.h" #include "shared/source/device_binary_format/ar/ar_encoder.h" #include "shared/source/utilities/const_stringref.h" #include "compiler_options.h" #include "igfxfmid.h" #include #include #include namespace NEO { bool requestedFatBinary(const std::vector &args, OclocArgHelper *helper); inline bool requestedFatBinary(int argc, const char *argv[], OclocArgHelper *helper) { std::vector args; args.assign(argv, argv + argc); return requestedFatBinary(args, helper); } int buildFatBinary(const std::vector &args, OclocArgHelper *argHelper); inline int buildFatBinary(int argc, const char *argv[], OclocArgHelper *argHelper) { std::vector args; args.assign(argv, argv + argc); return buildFatBinary(args, argHelper); } bool isDeviceWithPlatformAbbreviation(ConstStringRef deviceArg, OclocArgHelper *argHelper); std::vector getAllSupportedTargetPlatforms(); std::vector getAllMatchedConfigs(const std::string device, OclocArgHelper *argHelper); std::vector getTargetConfigsForFatbinary(ConstStringRef deviceArg, OclocArgHelper *argHelper); std::vector getTargetPlatformsForFatbinary(ConstStringRef deviceArg, OclocArgHelper *argHelper); std::vector getProductConfigsForOpenRange(ConstStringRef openRange, OclocArgHelper *argHelper, bool rangeTo); std::vector getProductConfigsForClosedRange(ConstStringRef rangeFrom, ConstStringRef rangeTo, OclocArgHelper *argHelper); std::vector getPlatformsForClosedRange(ConstStringRef rangeFrom, ConstStringRef rangeTo, PRODUCT_FAMILY platformFrom, OclocArgHelper *argHelper); std::vector getPlatformsForOpenRange(ConstStringRef openRange, PRODUCT_FAMILY prodId, OclocArgHelper *argHelper, bool rangeTo); std::vector getProductConfigsForSpecificTargets(CompilerOptions::TokenizedString targets, OclocArgHelper *argHelper); std::vector getPlatformsForSpecificTargets(CompilerOptions::TokenizedString targets, OclocArgHelper *argHelper); std::vector toProductNames(const std::vector &productIds); PRODUCT_FAMILY asProductId(ConstStringRef product, const std::vector &allSupportedPlatforms); int buildFatBinaryForTarget(int retVal, const std::vector &argsCopy, std::string pointerSize, Ar::ArEncoder &fatbinary, OfflineCompiler *pCompiler, OclocArgHelper *argHelper, const std::string &deviceConfig); int appendGenericIr(Ar::ArEncoder &fatbinary, const std::string &inputFile, OclocArgHelper *argHelper); std::vector createEncodedElfWithSpirv(const ArrayRef &spirv); } // namespace NEO compute-runtime-22.14.22890/shared/offline_compiler/source/ocloc_validator.cpp000066400000000000000000000071751422164147700273160ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/offline_compiler/source/ocloc_validator.h" #include "shared/offline_compiler/source/ocloc_arg_helper.h" #include "shared/source/device_binary_format/device_binary_formats.h" #include "shared/source/program/kernel_info.h" #include "shared/source/program/program_info.h" namespace NEO { ProgramInfo::~ProgramInfo() { for (auto &kernelInfo : kernelInfos) { delete kernelInfo; } kernelInfos.clear(); } KernelInfo::~KernelInfo() { delete[] crossThreadData; } namespace Ocloc { int validate(const std::vector &args, OclocArgHelper *argHelper) { NEO::ProgramInfo programInfo; NEO::SingleDeviceBinary deviceBinary; std::string errors; std::string warnings; UNRECOVERABLE_IF(nullptr == argHelper) std::string fileName; for (uint32_t i = 0; i < args.size(); ++i) { if (args.size() > (i + 1) && (ConstStringRef("-file") == args[i])) { fileName = args[i + 1]; } } if (fileName.empty()) { argHelper->printf("Error : Mandatory argument -file is missing.\n"); return -1; } if (false == argHelper->fileExists(fileName)) { argHelper->printf("Error : Input file missing : %s\n", fileName.c_str()); return -1; } auto fileData = argHelper->readBinaryFile(fileName); argHelper->printf("Validating : %s (%d bytes).\n", fileName.c_str(), fileData.size()); deviceBinary.deviceBinary = deviceBinary.deviceBinary.fromAny(fileData.data(), fileData.size()); if (false == NEO::isDeviceBinaryFormat(deviceBinary.deviceBinary)) { argHelper->printf("Input is not a Zebin file (not elf or wrong elf object file type)\n", errors.c_str()); return -2; } auto decodeResult = NEO::decodeSingleDeviceBinary(programInfo, deviceBinary, errors, warnings); if (false == warnings.empty()) { argHelper->printf("Validator detected potential problems :\n%s\n", warnings.c_str()); } if (false == errors.empty()) { argHelper->printf("Validator detected errors :\n%s\n", errors.c_str()); } argHelper->printf("Binary is %s (%s).\n", ((NEO::DecodeError::Success == decodeResult) ? "VALID" : "INVALID"), NEO::asString(decodeResult)); if (NEO::DecodeError::Success == decodeResult) { argHelper->printf("Statistics : \n"); if (0 != programInfo.globalVariables.size) { argHelper->printf("Binary contains global variables section of size : %d.\n", programInfo.globalVariables.size); } if (0 != programInfo.globalConstants.size) { argHelper->printf("Binary contains global constants section of size : %d.\n", programInfo.globalConstants.size); } argHelper->printf("Binary contains %d kernels.\n", programInfo.kernelInfos.size()); for (size_t i = 0U; i < programInfo.kernelInfos.size(); ++i) { const auto &kernelDescriptor = programInfo.kernelInfos[i]->kernelDescriptor; argHelper->printf("\nKernel #%d named %s:\n", static_cast(i), kernelDescriptor.kernelMetadata.kernelName.c_str()); argHelper->printf(" * Number of binding table entries %d:\n", kernelDescriptor.payloadMappings.bindingTable.numEntries); argHelper->printf(" * Cross-thread data size %d:\n", kernelDescriptor.kernelAttributes.crossThreadDataSize); argHelper->printf(" * Per-thread data size %d:\n", kernelDescriptor.kernelAttributes.perThreadDataSize); } } return static_cast(decodeResult); } } // namespace Ocloc } // namespace NEO compute-runtime-22.14.22890/shared/offline_compiler/source/ocloc_validator.h000066400000000000000000000004501422164147700267500ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include #include class OclocArgHelper; namespace NEO { namespace Ocloc { int validate(const std::vector &args, OclocArgHelper *argHelper); } } // namespace NEO compute-runtime-22.14.22890/shared/offline_compiler/source/ocloc_wrapper.cpp000066400000000000000000000066431422164147700270100ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "ocloc_wrapper.h" #include "shared/source/os_interface/os_library.h" #include #include typedef int (*pOclocInvoke)( unsigned int numArgs, const char *argv[], const uint32_t numSources, const uint8_t **dataSources, const uint64_t *lenSources, const char **nameSources, const uint32_t numInputHeaders, const uint8_t **dataInputHeaders, const uint64_t *lenInputHeaders, const char **nameInputHeaders, uint32_t *numOutputs, uint8_t ***dataOutputs, uint64_t **lenOutputs, char ***nameOutputs); typedef int (*pOclocFreeOutput)( uint32_t *numOutputs, uint8_t ***dataOutputs, uint64_t **lenOutputs, char ***nameOutputs); struct OclocLibrary { pOclocInvoke invoke = nullptr; pOclocFreeOutput freeOutput = nullptr; std::unique_ptr library; bool isLoaded() { return library != nullptr; } }; OclocWrapper::OclocWrapper() : pImpl(std::make_unique()){}; OclocWrapper::~OclocWrapper() = default; struct OclocWrapper::Impl { OclocLibrary oclocLib; void loadOcloc() { OclocLibrary ocloc; std::string oclocLibName = OCLOC_LIB_NAME; ocloc.library.reset(NEO::OsLibrary::load(oclocLibName)); if (nullptr == (ocloc.invoke = reinterpret_cast(ocloc.library->getProcAddress("oclocInvoke")))) { std::cout << "Error! Couldn't find OclocInvoke function.\n"; return; } if (nullptr == (ocloc.freeOutput = reinterpret_cast(ocloc.library->getProcAddress("oclocFreeOutput")))) { std::cout << "Error! Couldn't find OclocFreeOutput function.\n"; return; } this->oclocLib = std::move(ocloc); } }; int OclocWrapper::invokeOcloc(unsigned int numArgs, const char *argv[]) { return invokeOcloc(numArgs, argv, 0, nullptr, nullptr, nullptr, 0, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr); } int OclocWrapper::invokeOcloc(unsigned int numArgs, const char *argv[], const uint32_t numSources, const uint8_t **dataSources, const uint64_t *lenSources, const char **nameSources, const uint32_t numInputHeaders, const uint8_t **dataInputHeaders, const uint64_t *lenInputHeaders, const char **nameInputHeaders, uint32_t *numOutputs, uint8_t ***dataOutputs, uint64_t **lenOutputs, char ***nameOutputs) { if (false == tryLoadOcloc()) { std::cout << "Error! Ocloc Library couldn't be loaded.\n"; return -1; } return pImpl->oclocLib.invoke(numArgs, argv, numSources, dataSources, lenSources, nameSources, numInputHeaders, dataInputHeaders, lenInputHeaders, nameInputHeaders, numOutputs, dataOutputs, lenOutputs, nameOutputs); } int OclocWrapper::freeOutput(uint32_t *numOutputs, uint8_t ***dataOutputs, uint64_t **lenOutputs, char ***nameOutputs) { if (false == tryLoadOcloc()) { std::cout << "Error! Ocloc Library couldn't be loaded.\n"; return -1; } return pImpl->oclocLib.freeOutput(numOutputs, dataOutputs, lenOutputs, nameOutputs); } bool OclocWrapper::tryLoadOcloc() { if (false == pImpl->oclocLib.isLoaded()) { pImpl->loadOcloc(); } return pImpl->oclocLib.isLoaded(); } compute-runtime-22.14.22890/shared/offline_compiler/source/ocloc_wrapper.h000066400000000000000000000021661422164147700264510ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include #include #include struct OclocWrapper { public: OclocWrapper(); ~OclocWrapper(); OclocWrapper(OclocWrapper &) = delete; OclocWrapper(const OclocWrapper &&) = delete; OclocWrapper &operator=(const OclocWrapper &) = delete; OclocWrapper &operator=(OclocWrapper &&) = delete; int invokeOcloc(unsigned int numArgs, const char *argv[]); int invokeOcloc(unsigned int numArgs, const char *argv[], const uint32_t numSources, const uint8_t **dataSources, const uint64_t *lenSources, const char **nameSources, const uint32_t numInputHeaders, const uint8_t **dataInputHeaders, const uint64_t *lenInputHeaders, const char **nameInputHeaders, uint32_t *numOutputs, uint8_t ***dataOutputs, uint64_t **lenOutputs, char ***nameOutputs); int freeOutput(uint32_t *numOutputs, uint8_t ***dataOutputs, uint64_t **lenOutputs, char ***nameOutputs); protected: bool tryLoadOcloc(); struct Impl; std::unique_ptr pImpl; }; compute-runtime-22.14.22890/shared/offline_compiler/source/offline_compiler.cpp000066400000000000000000001455011422164147700274620ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "offline_compiler.h" #include "shared/offline_compiler/source/ocloc_error_code.h" #include "shared/offline_compiler/source/queries.h" #include "shared/offline_compiler/source/utilities/get_git_version_info.h" #include "shared/source/compiler_interface/intermediate_representations.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/device_binary_format/device_binary_formats.h" #include "shared/source/device_binary_format/elf/elf_encoder.h" #include "shared/source/device_binary_format/elf/ocl_elf.h" #include "shared/source/helpers/compiler_hw_info_config.h" #include "shared/source/helpers/compiler_options_parser.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/file_io.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/string.h" #include "shared/source/helpers/validators.h" #include "shared/source/os_interface/os_inc_base.h" #include "shared/source/os_interface/os_library.h" #include "cif/common/cif_main.h" #include "cif/helpers/error.h" #include "cif/import/library_api.h" #include "compiler_options.h" #include "igfxfmid.h" #include "ocl_igc_interface/code_type.h" #include "ocl_igc_interface/fcl_ocl_device_ctx.h" #include "ocl_igc_interface/igc_ocl_device_ctx.h" #include "ocl_igc_interface/platform_helper.h" #include #include #include #include #ifdef _WIN32 #include #define MakeDirectory _mkdir #define GetCurrentWorkingDirectory _getcwd #else #include #define MakeDirectory(dir) mkdir(dir, 0777) #define GetCurrentWorkingDirectory getcwd #endif using namespace NEO::OclocErrorCode; namespace NEO { CIF::CIFMain *createMainNoSanitize(CIF::CreateCIFMainFunc_t createFunc); std::string convertToPascalCase(const std::string &inString) { std::string outString; bool capitalize = true; for (unsigned int i = 0; i < inString.length(); i++) { if (isalpha(inString[i]) && capitalize == true) { outString += toupper(inString[i]); capitalize = false; } else if (inString[i] == '_') { capitalize = true; } else { outString += inString[i]; } } return outString; } OfflineCompiler::OfflineCompiler() = default; OfflineCompiler::~OfflineCompiler() { pBuildInfo.reset(); delete[] irBinary; delete[] genBinary; delete[] debugDataBinary; } OfflineCompiler *OfflineCompiler::create(size_t numArgs, const std::vector &allArgs, bool dumpFiles, int &retVal, OclocArgHelper *helper) { retVal = SUCCESS; auto pOffCompiler = new OfflineCompiler(); if (pOffCompiler) { pOffCompiler->argHelper = helper; retVal = pOffCompiler->initialize(numArgs, allArgs, dumpFiles); } if (retVal != SUCCESS) { delete pOffCompiler; pOffCompiler = nullptr; } return pOffCompiler; } void printQueryHelp(OclocArgHelper *helper) { helper->printf(OfflineCompiler::queryHelp.data()); } int OfflineCompiler::query(size_t numArgs, const std::vector &allArgs, OclocArgHelper *helper) { if (allArgs.size() != 3) { helper->printf("Error: Invalid command line. Expected ocloc query "); return INVALID_COMMAND_LINE; } auto retVal = SUCCESS; auto &arg = allArgs[2]; if (Queries::queryNeoRevision == arg) { auto revision = NEO::getRevision(); helper->saveOutput(Queries::queryNeoRevision.data(), revision.c_str(), revision.size() + 1); } else if (Queries::queryOCLDriverVersion == arg) { auto driverVersion = NEO::getOclDriverVersion(); helper->saveOutput(Queries::queryOCLDriverVersion.data(), driverVersion.c_str(), driverVersion.size() + 1); } else if ("--help" == arg) { printQueryHelp(helper); } else { helper->printf("Error: Invalid command line. Uknown argument %s.", arg.c_str()); retVal = INVALID_COMMAND_LINE; } return retVal; } struct OfflineCompiler::buildInfo { std::unique_ptr> fclOptions; std::unique_ptr> fclInternalOptions; std::unique_ptr> fclOutput; IGC::CodeType::CodeType_t intermediateRepresentation; }; int OfflineCompiler::buildIrBinary() { int retVal = SUCCESS; UNRECOVERABLE_IF(fclDeviceCtx == nullptr); pBuildInfo->intermediateRepresentation = useLlvmText ? IGC::CodeType::llvmLl : (useLlvmBc ? IGC::CodeType::llvmBc : preferredIntermediateRepresentation); //sourceCode.size() returns the number of characters without null terminated char CIF::RAII::UPtr_t fclSrc = nullptr; pBuildInfo->fclOptions = CIF::Builtins::CreateConstBuffer(fclMain.get(), options.c_str(), options.size()); pBuildInfo->fclInternalOptions = CIF::Builtins::CreateConstBuffer(fclMain.get(), internalOptions.c_str(), internalOptions.size()); auto err = CIF::Builtins::CreateConstBuffer(fclMain.get(), nullptr, 0); auto srcType = IGC::CodeType::undefined; std::vector tempSrcStorage; if (this->argHelper->hasHeaders()) { srcType = IGC::CodeType::elf; NEO::Elf::ElfEncoder<> elfEncoder(true, true, 1U); elfEncoder.getElfFileHeader().type = NEO::Elf::ET_OPENCL_SOURCE; elfEncoder.appendSection(NEO::Elf::SHT_OPENCL_SOURCE, "CLMain", sourceCode); for (const auto &header : this->argHelper->getHeaders()) { ArrayRef headerData(header.data, header.length); ConstStringRef headerName = header.name; elfEncoder.appendSection(NEO::Elf::SHT_OPENCL_HEADER, headerName, headerData); } tempSrcStorage = elfEncoder.encode(); fclSrc = CIF::Builtins::CreateConstBuffer(fclMain.get(), tempSrcStorage.data(), tempSrcStorage.size()); } else { srcType = IGC::CodeType::oclC; fclSrc = CIF::Builtins::CreateConstBuffer(fclMain.get(), sourceCode.c_str(), sourceCode.size() + 1); } auto fclTranslationCtx = fclDeviceCtx->CreateTranslationCtx(srcType, pBuildInfo->intermediateRepresentation, err.get()); if (true == NEO::areNotNullptr(err->GetMemory())) { updateBuildLog(err->GetMemory(), err->GetSizeRaw()); retVal = BUILD_PROGRAM_FAILURE; return retVal; } if (false == NEO::areNotNullptr(fclSrc.get(), pBuildInfo->fclOptions.get(), pBuildInfo->fclInternalOptions.get(), fclTranslationCtx.get())) { retVal = OUT_OF_HOST_MEMORY; return retVal; } pBuildInfo->fclOutput = fclTranslationCtx->Translate(fclSrc.get(), pBuildInfo->fclOptions.get(), pBuildInfo->fclInternalOptions.get(), nullptr, 0); if (pBuildInfo->fclOutput == nullptr) { retVal = OUT_OF_HOST_MEMORY; return retVal; } UNRECOVERABLE_IF(pBuildInfo->fclOutput->GetBuildLog() == nullptr); UNRECOVERABLE_IF(pBuildInfo->fclOutput->GetOutput() == nullptr); if (pBuildInfo->fclOutput->Successful() == false) { updateBuildLog(pBuildInfo->fclOutput->GetBuildLog()->GetMemory(), pBuildInfo->fclOutput->GetBuildLog()->GetSizeRaw()); retVal = BUILD_PROGRAM_FAILURE; return retVal; } storeBinary(irBinary, irBinarySize, pBuildInfo->fclOutput->GetOutput()->GetMemory(), pBuildInfo->fclOutput->GetOutput()->GetSizeRaw()); isSpirV = pBuildInfo->intermediateRepresentation == IGC::CodeType::spirV; updateBuildLog(pBuildInfo->fclOutput->GetBuildLog()->GetMemory(), pBuildInfo->fclOutput->GetBuildLog()->GetSizeRaw()); return retVal; } std::string OfflineCompiler::validateInputType(const std::string &input, bool isLlvm, bool isSpirv) { auto asBitcode = ArrayRef::fromAny(input.data(), input.size()); if (isSpirv) { if (NEO::isSpirVBitcode(asBitcode)) { return ""; } return "Warning : file does not look like spirv bitcode (wrong magic numbers)"; } if (isLlvm) { if (NEO::isLlvmBitcode(asBitcode)) { return ""; } return "Warning : file does not look like llvm bitcode (wrong magic numbers)"; } if (NEO::isSpirVBitcode(asBitcode)) { return "Warning : file looks like spirv bitcode (based on magic numbers) - please make sure proper CLI flags are present"; } if (NEO::isLlvmBitcode(asBitcode)) { return "Warning : file looks like llvm bitcode (based on magic numbers) - please make sure proper CLI flags are present"; } return ""; } int OfflineCompiler::buildSourceCode() { int retVal = SUCCESS; do { if (sourceCode.empty()) { retVal = INVALID_PROGRAM; break; } UNRECOVERABLE_IF(igcDeviceCtx == nullptr); auto inputTypeWarnings = validateInputType(sourceCode, inputFileLlvm, inputFileSpirV); this->argHelper->printf(inputTypeWarnings.c_str()); CIF::RAII::UPtr_t igcOutput; bool inputIsIntermediateRepresentation = inputFileLlvm || inputFileSpirV; if (false == inputIsIntermediateRepresentation) { retVal = buildIrBinary(); if (retVal != SUCCESS) break; auto igcTranslationCtx = igcDeviceCtx->CreateTranslationCtx(pBuildInfo->intermediateRepresentation, IGC::CodeType::oclGenBin); igcOutput = igcTranslationCtx->Translate(pBuildInfo->fclOutput->GetOutput(), pBuildInfo->fclOptions.get(), pBuildInfo->fclInternalOptions.get(), nullptr, 0); } else { storeBinary(irBinary, irBinarySize, sourceCode.c_str(), sourceCode.size()); isSpirV = inputFileSpirV; auto igcSrc = CIF::Builtins::CreateConstBuffer(igcMain.get(), sourceCode.c_str(), sourceCode.size()); auto igcOptions = CIF::Builtins::CreateConstBuffer(igcMain.get(), options.c_str(), options.size()); auto igcInternalOptions = CIF::Builtins::CreateConstBuffer(igcMain.get(), internalOptions.c_str(), internalOptions.size()); auto igcTranslationCtx = igcDeviceCtx->CreateTranslationCtx(inputFileSpirV ? IGC::CodeType::spirV : IGC::CodeType::llvmBc, IGC::CodeType::oclGenBin); igcOutput = igcTranslationCtx->Translate(igcSrc.get(), igcOptions.get(), igcInternalOptions.get(), nullptr, 0); } if (igcOutput == nullptr) { retVal = OUT_OF_HOST_MEMORY; break; } UNRECOVERABLE_IF(igcOutput->GetBuildLog() == nullptr); UNRECOVERABLE_IF(igcOutput->GetOutput() == nullptr); updateBuildLog(igcOutput->GetBuildLog()->GetMemory(), igcOutput->GetBuildLog()->GetSizeRaw()); if (igcOutput->GetOutput()->GetSizeRaw() != 0) { storeBinary(genBinary, genBinarySize, igcOutput->GetOutput()->GetMemory(), igcOutput->GetOutput()->GetSizeRaw()); } if (igcOutput->GetDebugData()->GetSizeRaw() != 0) { storeBinary(debugDataBinary, debugDataBinarySize, igcOutput->GetDebugData()->GetMemory(), igcOutput->GetDebugData()->GetSizeRaw()); } retVal = igcOutput->Successful() ? SUCCESS : BUILD_PROGRAM_FAILURE; } while (0); return retVal; } int OfflineCompiler::build() { int retVal = SUCCESS; if (isOnlySpirV()) { retVal = buildIrBinary(); } else { retVal = buildSourceCode(); } generateElfBinary(); if (dumpFiles) { writeOutAllFiles(); } return retVal; } void OfflineCompiler::updateBuildLog(const char *pErrorString, const size_t errorStringSize) { std::string errorString = (errorStringSize && pErrorString) ? std::string(pErrorString, pErrorString + errorStringSize) : ""; if (errorString[0] != '\0') { if (buildLog.empty()) { buildLog.assign(errorString.c_str()); } else { buildLog.append("\n"); buildLog.append(errorString.c_str()); } } } std::string &OfflineCompiler::getBuildLog() { return buildLog; } void OfflineCompiler::setFamilyType() { familyNameWithType.clear(); familyNameWithType.append(familyName[hwInfo.platform.eRenderCoreFamily]); familyNameWithType.append(hwInfo.capabilityTable.platformType); } int OfflineCompiler::initHardwareInfo(std::string deviceName) { int retVal = INVALID_DEVICE; if (deviceName.empty()) { return retVal; } if (argHelper->isFatbinary()) { argHelper->setHwInfoForFatbinaryTarget(hwInfo); setFamilyType(); retVal = SUCCESS; return retVal; } overridePlatformName(deviceName); std::transform(deviceName.begin(), deviceName.end(), deviceName.begin(), ::tolower); const char hexPrefix = 2; auto deviceId = -1; std::string product(""); auto numeration = argHelper->getMajorMinorRevision(deviceName); if (!numeration.empty()) { uint32_t productConfig = argHelper->getProductConfig(numeration); if (argHelper->getHwInfoForProductConfig(productConfig, hwInfo)) { deviceConfig = static_cast(productConfig); setFamilyType(); retVal = SUCCESS; return retVal; } argHelper->printf("Could not determine target based on product config: %s\n", deviceName.c_str()); return retVal; } else if (deviceName.find(".") != std::string::npos) { argHelper->printf("Could not determine target based on product config: %s\n", deviceName.c_str()); return retVal; } if (deviceName.substr(0, hexPrefix) == "0x" && std::all_of(deviceName.begin() + hexPrefix, deviceName.end(), (::isxdigit))) { deviceId = stoi(deviceName, 0, 16); product = argHelper->returnProductNameForDevice(deviceId); if (!product.empty()) { argHelper->printf("Auto-detected target based on %s device id: %s\n", deviceName.c_str(), product.c_str()); deviceName = product; } else { argHelper->printf("Could not determine target based on device id: %s\n", deviceName.c_str()); return retVal; } } for (unsigned int productId = 0; productId < IGFX_MAX_PRODUCT; ++productId) { if (hardwarePrefix[productId] && (0 == strcmp(deviceName.c_str(), hardwarePrefix[productId]))) { if (hardwareInfoTable[productId]) { hwInfo = *hardwareInfoTable[productId]; if (revisionId != -1) { hwInfo.platform.usRevId = revisionId; } if (deviceId != -1) { hwInfo.platform.usDeviceID = deviceId; } auto hwInfoConfig = defaultHardwareInfoConfigTable[hwInfo.platform.eProductFamily]; setHwInfoValuesFromConfig(hwInfoConfig, hwInfo); hardwareInfoSetup[hwInfo.platform.eProductFamily](&hwInfo, true, hwInfoConfig); setFamilyType(); retVal = SUCCESS; break; } } } return retVal; } std::string OfflineCompiler::getStringWithinDelimiters(const std::string &src) { size_t start = src.find("R\"===("); size_t stop = src.find(")===\""); DEBUG_BREAK_IF(std::string::npos == start); DEBUG_BREAK_IF(std::string::npos == stop); start += strlen("R\"===("); size_t size = stop - start; std::string dst(src, start, size + 1); dst[size] = '\0'; // put null char at the end return dst; } int OfflineCompiler::initialize(size_t numArgs, const std::vector &allArgs, bool dumpFiles) { this->dumpFiles = dumpFiles; int retVal = SUCCESS; const char *source = nullptr; std::unique_ptr sourceFromFile; size_t sourceFromFileSize = 0; this->pBuildInfo = std::make_unique(); retVal = parseCommandLine(numArgs, allArgs); if (showHelp) { printUsage(); return retVal; } else if (retVal != SUCCESS) { return retVal; } if (options.empty()) { // try to read options from file if not provided by commandline size_t ext_start = inputFile.find_last_of("."); if (ext_start != std::string::npos) { std::string oclocOptionsFileName = inputFile.substr(0, ext_start); oclocOptionsFileName.append("_ocloc_options.txt"); std::string oclocOptionsFromFile; bool oclocOptionsRead = readOptionsFromFile(oclocOptionsFromFile, oclocOptionsFileName, argHelper); if (oclocOptionsRead) { argHelper->printf("Building with ocloc options:\n%s\n", oclocOptionsFromFile.c_str()); std::istringstream iss(allArgs[0] + " " + oclocOptionsFromFile); std::vector tokens{ std::istream_iterator{iss}, std::istream_iterator{}}; retVal = parseCommandLine(tokens.size(), tokens); if (retVal != SUCCESS) { if (isQuiet()) { printf("Failed with ocloc options from file:\n%s\n", oclocOptionsFromFile.c_str()); } return retVal; } } std::string optionsFileName = inputFile.substr(0, ext_start); optionsFileName.append("_options.txt"); bool optionsRead = readOptionsFromFile(options, optionsFileName, argHelper); if (optionsRead) { optionsReadFromFile = std::string(options); } if (optionsRead && !isQuiet()) { argHelper->printf("Building with options:\n%s\n", options.c_str()); } std::string internalOptionsFileName = inputFile.substr(0, ext_start); internalOptionsFileName.append("_internal_options.txt"); std::string internalOptionsFromFile; bool internalOptionsRead = readOptionsFromFile(internalOptionsFromFile, internalOptionsFileName, argHelper); if (internalOptionsRead) { internalOptionsReadFromFile = std::string(internalOptionsFromFile); } if (internalOptionsRead && !isQuiet()) { argHelper->printf("Building with internal options:\n%s\n", internalOptionsFromFile.c_str()); } CompilerOptions::concatenateAppend(internalOptions, internalOptionsFromFile); } } retVal = deviceName.empty() ? SUCCESS : initHardwareInfo(deviceName.c_str()); if (retVal != SUCCESS) { argHelper->printf("Error: Cannot get HW Info for device %s.\n", deviceName.c_str()); return retVal; } if (CompilerOptions::contains(options, CompilerOptions::generateDebugInfo.str())) { if (hwInfo.platform.eRenderCoreFamily >= IGFX_GEN9_CORE) { internalOptions = CompilerOptions::concatenate(internalOptions, CompilerOptions::debugKernelEnable); } } if (deviceName.empty()) { internalOptions = CompilerOptions::concatenate("-ocl-version=300 -cl-ext=-all,+cl_khr_3d_image_writes", internalOptions); CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::enableImageSupport); } else { appendExtensionsToInternalOptions(hwInfo, options, internalOptions); appendExtraInternalOptions(internalOptions); } parseDebugSettings(); // set up the device inside the program sourceFromFile = argHelper->loadDataFromFile(inputFile, sourceFromFileSize); if (sourceFromFileSize == 0) { retVal = INVALID_FILE; return retVal; } if (inputFileLlvm || inputFileSpirV) { // use the binary input "as is" sourceCode.assign(sourceFromFile.get(), sourceFromFileSize); } else { // for text input, we also accept files used as runtime builtins source = strstr((const char *)sourceFromFile.get(), "R\"===("); sourceCode = (source != nullptr) ? getStringWithinDelimiters(sourceFromFile.get()) : sourceFromFile.get(); } if ((inputFileSpirV == false) && (inputFileLlvm == false)) { auto fclLibFile = OsLibrary::load(Os::frontEndDllName); if (fclLibFile == nullptr) { argHelper->printf("Error: Failed to load %s\n", Os::frontEndDllName); return OUT_OF_HOST_MEMORY; } this->fclLib.reset(fclLibFile); if (this->fclLib == nullptr) { return OUT_OF_HOST_MEMORY; } auto fclCreateMain = reinterpret_cast(this->fclLib->getProcAddress(CIF::CreateCIFMainFuncName)); if (fclCreateMain == nullptr) { return OUT_OF_HOST_MEMORY; } this->fclMain = CIF::RAII::UPtr(createMainNoSanitize(fclCreateMain)); if (this->fclMain == nullptr) { return OUT_OF_HOST_MEMORY; } if (false == this->fclMain->IsCompatible()) { argHelper->printf("Incompatible interface in FCL : %s\n", CIF::InterfaceIdCoder::Dec(this->fclMain->FindIncompatible()).c_str()); DEBUG_BREAK_IF(true); return OUT_OF_HOST_MEMORY; } this->fclDeviceCtx = this->fclMain->CreateInterface(); if (this->fclDeviceCtx == nullptr) { return OUT_OF_HOST_MEMORY; } fclDeviceCtx->SetOclApiVersion(hwInfo.capabilityTable.clVersionSupport * 10); preferredIntermediateRepresentation = fclDeviceCtx->GetPreferredIntermediateRepresentation(); if (this->fclDeviceCtx->GetUnderlyingVersion() > 4U) { auto igcPlatform = fclDeviceCtx->GetPlatformHandle(); if (nullptr == igcPlatform) { return OUT_OF_HOST_MEMORY; } IGC::PlatformHelper::PopulateInterfaceWith(*igcPlatform, hwInfo.platform); } } else { if (!isQuiet()) { argHelper->printf("Compilation from IR - skipping loading of FCL\n"); } preferredIntermediateRepresentation = IGC::CodeType::spirV; } this->igcLib.reset(OsLibrary::load(Os::igcDllName)); if (this->igcLib == nullptr) { return OUT_OF_HOST_MEMORY; } auto igcCreateMain = reinterpret_cast(this->igcLib->getProcAddress(CIF::CreateCIFMainFuncName)); if (igcCreateMain == nullptr) { return OUT_OF_HOST_MEMORY; } this->igcMain = CIF::RAII::UPtr(createMainNoSanitize(igcCreateMain)); if (this->igcMain == nullptr) { return OUT_OF_HOST_MEMORY; } std::vector interfacesToIgnore = {IGC::OclGenBinaryBase::GetInterfaceId()}; if (false == this->igcMain->IsCompatible(&interfacesToIgnore)) { argHelper->printf("Incompatible interface in IGC : %s\n", CIF::InterfaceIdCoder::Dec(this->igcMain->FindIncompatible(&interfacesToIgnore)).c_str()); DEBUG_BREAK_IF(true); return OUT_OF_HOST_MEMORY; } CIF::Version_t verMin = 0, verMax = 0; if (false == this->igcMain->FindSupportedVersions(IGC::OclGenBinaryBase::GetInterfaceId(), verMin, verMax)) { argHelper->printf("Patchtoken interface is missing"); return OUT_OF_HOST_MEMORY; } this->igcDeviceCtx = this->igcMain->CreateInterface(); if (this->igcDeviceCtx == nullptr) { return OUT_OF_HOST_MEMORY; } this->igcDeviceCtx->SetProfilingTimerResolution(static_cast(hwInfo.capabilityTable.defaultProfilingTimerResolution)); auto igcPlatform = this->igcDeviceCtx->GetPlatformHandle(); auto igcGtSystemInfo = this->igcDeviceCtx->GetGTSystemInfoHandle(); auto igcFtrWa = this->igcDeviceCtx->GetIgcFeaturesAndWorkaroundsHandle(); if ((igcPlatform == nullptr) || (igcGtSystemInfo == nullptr) || (igcFtrWa == nullptr)) { return OUT_OF_HOST_MEMORY; } auto compilerHwInfoConfig = CompilerHwInfoConfig::get(hwInfo.platform.eProductFamily); auto copyHwInfo = hwInfo; if (compilerHwInfoConfig) { compilerHwInfoConfig->adjustHwInfoForIgc(copyHwInfo); } IGC::PlatformHelper::PopulateInterfaceWith(*igcPlatform.get(), copyHwInfo.platform); IGC::GtSysInfoHelper::PopulateInterfaceWith(*igcGtSystemInfo.get(), copyHwInfo.gtSystemInfo); // populate with features igcFtrWa.get()->SetFtrDesktop(hwInfo.featureTable.flags.ftrDesktop); igcFtrWa.get()->SetFtrChannelSwizzlingXOREnabled(hwInfo.featureTable.flags.ftrChannelSwizzlingXOREnabled); igcFtrWa.get()->SetFtrIVBM0M1Platform(hwInfo.featureTable.flags.ftrIVBM0M1Platform); igcFtrWa.get()->SetFtrSGTPVSKUStrapPresent(hwInfo.featureTable.flags.ftrSGTPVSKUStrapPresent); igcFtrWa.get()->SetFtr5Slice(hwInfo.featureTable.flags.ftr5Slice); if (compilerHwInfoConfig) { igcFtrWa.get()->SetFtrGpGpuMidThreadLevelPreempt(compilerHwInfoConfig->isMidThreadPreemptionSupported(hwInfo)); } igcFtrWa.get()->SetFtrIoMmuPageFaulting(hwInfo.featureTable.flags.ftrIoMmuPageFaulting); igcFtrWa.get()->SetFtrWddm2Svm(hwInfo.featureTable.flags.ftrWddm2Svm); igcFtrWa.get()->SetFtrPooledEuEnabled(hwInfo.featureTable.flags.ftrPooledEuEnabled); igcFtrWa.get()->SetFtrResourceStreamer(hwInfo.featureTable.flags.ftrResourceStreamer); return retVal; } int OfflineCompiler::parseCommandLine(size_t numArgs, const std::vector &argv) { int retVal = SUCCESS; bool compile32 = false; bool compile64 = false; if (numArgs < 2) { showHelp = true; return INVALID_COMMAND_LINE; } for (uint32_t argIndex = 1; argIndex < numArgs; argIndex++) { const auto &currArg = argv[argIndex]; const bool hasMoreArgs = (argIndex + 1 < numArgs); if ("compile" == currArg) { //skip it } else if (("-file" == currArg) && hasMoreArgs) { inputFile = argv[argIndex + 1]; argIndex++; } else if (("-output" == currArg) && hasMoreArgs) { outputFile = argv[argIndex + 1]; argIndex++; } else if ((CompilerOptions::arch32bit == currArg) || ("-32" == currArg)) { compile32 = true; CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::arch32bit); } else if ((CompilerOptions::arch64bit == currArg) || ("-64" == currArg)) { compile64 = true; CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::arch64bit); } else if (CompilerOptions::greaterThan4gbBuffersRequired == currArg) { CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::greaterThan4gbBuffersRequired); } else if (("-device" == currArg) && hasMoreArgs) { deviceName = argv[argIndex + 1]; argIndex++; } else if ("-llvm_text" == currArg) { useLlvmText = true; } else if ("-llvm_bc" == currArg) { useLlvmBc = true; } else if ("-llvm_input" == currArg) { inputFileLlvm = true; } else if ("-spirv_input" == currArg) { inputFileSpirV = true; } else if ("-cpp_file" == currArg) { useCppFile = true; } else if ("-gen_file" == currArg) { useGenFile = true; } else if (("-options" == currArg) && hasMoreArgs) { options = argv[argIndex + 1]; argIndex++; } else if (("-internal_options" == currArg) && hasMoreArgs) { CompilerOptions::concatenateAppend(internalOptions, argv[argIndex + 1]); argIndex++; } else if ("-options_name" == currArg) { useOptionsSuffix = true; } else if ("-force_stos_opt" == currArg) { forceStatelessToStatefulOptimization = true; } else if (("-out_dir" == currArg) && hasMoreArgs) { outputDirectory = argv[argIndex + 1]; argIndex++; } else if ("-q" == currArg) { argHelper->getPrinterRef() = MessagePrinter(true); quiet = true; } else if ("-spv_only" == currArg) { onlySpirV = true; } else if ("-output_no_suffix" == currArg) { outputNoSuffix = true; } else if ("--help" == currArg) { showHelp = true; return SUCCESS; } else if (("-revision_id" == currArg) && hasMoreArgs) { revisionId = std::stoi(argv[argIndex + 1], nullptr, 0); argIndex++; } else if ("-exclude_ir" == currArg) { excludeIr = true; } else { argHelper->printf("Invalid option (arg %d): %s\n", argIndex, argv[argIndex].c_str()); retVal = INVALID_COMMAND_LINE; break; } } unifyExcludeIrFlags(); if (DebugManager.flags.OverrideRevision.get() != -1) { revisionId = static_cast(DebugManager.flags.OverrideRevision.get()); } if (retVal == SUCCESS) { if (compile32 && compile64) { argHelper->printf("Error: Cannot compile for 32-bit and 64-bit, please choose one.\n"); retVal |= INVALID_COMMAND_LINE; } if (deviceName.empty() && (false == onlySpirV)) { argHelper->printf("Error: Device name missing.\n"); retVal = INVALID_COMMAND_LINE; } if (inputFile.empty()) { argHelper->printf("Error: Input file name missing.\n"); retVal = INVALID_COMMAND_LINE; } else if (!argHelper->fileExists(inputFile)) { argHelper->printf("Error: Input file %s missing.\n", inputFile.c_str()); retVal = INVALID_FILE; } } return retVal; } void OfflineCompiler::unifyExcludeIrFlags() { const auto excludeIrFromZebin{internalOptions.find(CompilerOptions::excludeIrFromZebin.data()) != std::string::npos}; if (!excludeIr && excludeIrFromZebin) { excludeIr = true; } else if (excludeIr && !excludeIrFromZebin) { const std::string prefix{"-ze"}; CompilerOptions::concatenateAppend(internalOptions, prefix + CompilerOptions::excludeIrFromZebin.data()); } } void OfflineCompiler::setStatelessToStatefullBufferOffsetFlag() { bool isStatelessToStatefulBufferOffsetSupported = true; if (!deviceName.empty()) { const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(hwInfo.platform.eProductFamily); isStatelessToStatefulBufferOffsetSupported = compilerHwInfoConfig.isStatelessToStatefulBufferOffsetSupported(); } if (DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.get() != -1) { isStatelessToStatefulBufferOffsetSupported = DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.get() != 0; } if (isStatelessToStatefulBufferOffsetSupported) { CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::hasBufferOffsetArg); } } void OfflineCompiler::appendExtraInternalOptions(std::string &internalOptions) { const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(hwInfo.platform.eProductFamily); if (compilerHwInfoConfig.isForceToStatelessRequired() && !forceStatelessToStatefulOptimization) { CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::greaterThan4gbBuffersRequired); } if (compilerHwInfoConfig.isForceEmuInt32DivRemSPRequired()) { CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::forceEmuInt32DivRemSP); } } void OfflineCompiler::parseDebugSettings() { setStatelessToStatefullBufferOffsetFlag(); } std::string OfflineCompiler::parseBinAsCharArray(uint8_t *binary, size_t size, std::string &fileName) { std::string builtinName = convertToPascalCase(fileName); std::ostringstream out; // Convert binary to cpp out << "#include \n"; out << "#include \n\n"; out << "size_t " << builtinName << "BinarySize_" << familyNameWithType << " = " << size << ";\n"; out << "uint32_t " << builtinName << "Binary_" << familyNameWithType << "[" << (size + 3) / 4 << "] = {" << std::endl << " "; uint32_t *binaryUint = (uint32_t *)binary; for (size_t i = 0; i < (size + 3) / 4; i++) { if (i != 0) { out << ", "; if (i % 8 == 0) { out << std::endl << " "; } } if (i < size / 4) { out << "0x" << std::hex << std::setw(8) << std::setfill('0') << binaryUint[i]; } else { uint32_t lastBytes = size & 0x3; uint32_t lastUint = 0; uint8_t *pLastUint = (uint8_t *)&lastUint; for (uint32_t j = 0; j < lastBytes; j++) { pLastUint[sizeof(uint32_t) - 1 - j] = binary[i * 4 + j]; } out << "0x" << std::hex << std::setw(8) << std::setfill('0') << lastUint; } } out << "};" << std::endl; out << std::endl << "#include \"shared/source/built_ins/registry/built_ins_registry.h\"\n" << std::endl; out << "namespace NEO {" << std::endl; out << "static RegisterEmbeddedResource register" << builtinName << "Bin(" << std::endl; out << " \"" << familyNameWithType << "_0_" << fileName.c_str() << ".builtin_kernel.bin\"," << std::endl; out << " (const char *)" << builtinName << "Binary_" << familyNameWithType << "," << std::endl; out << " " << builtinName << "BinarySize_" << familyNameWithType << ");" << std::endl; out << "}" << std::endl; return out.str(); } std::string OfflineCompiler::getFileNameTrunk(std::string &filePath) { size_t slashPos = filePath.find_last_of("\\/", filePath.size()) + 1; size_t extPos = filePath.find_last_of(".", filePath.size()); if (extPos == std::string::npos) { extPos = filePath.size(); } std::string fileTrunk = filePath.substr(slashPos, (extPos - slashPos)); return fileTrunk; } std::string getDevicesTypes() { std::list prefixes; for (int j = 0; j < IGFX_MAX_PRODUCT; j++) { if (hardwarePrefix[j] == nullptr) continue; prefixes.push_back(hardwarePrefix[j]); } std::ostringstream os; for (auto it = prefixes.begin(); it != prefixes.end(); it++) { if (it != prefixes.begin()) os << ", "; os << *it; } return os.str(); } std::string getDevicesFamilies() { std::list prefixes; for (unsigned int i = 0; i < IGFX_MAX_CORE; ++i) { if (familyName[i] == nullptr) continue; prefixes.push_back(familyName[i]); } std::ostringstream os; for (auto it = prefixes.begin(); it != prefixes.end(); it++) { if (it != prefixes.begin()) os << ", "; os << *it; } return os.str(); } std::string OfflineCompiler::getDevicesConfigs() { std::list configNum; auto allSupportedConfigs = argHelper->getAllSupportedProductConfigs(); for (auto &config : allSupportedConfigs) { auto numeration = argHelper->parseProductConfigFromValue(config); configNum.push_back(numeration); } std::ostringstream os; for (auto it = configNum.begin(); it != configNum.end(); it++) { if (it != configNum.begin()) os << ", "; os << *it; } return os.str(); } void OfflineCompiler::printUsage() { argHelper->printf(R"===(Compiles input file to Intel Compute GPU device binary (*.bin). Additionally, outputs intermediate representation (e.g. spirV). Different input and intermediate file formats are available. Usage: ocloc [compile] -file -device [-output ] [-out_dir ] [-options ] [-32|-64] [-internal_options ] [-llvm_text|-llvm_input|-spirv_input] [-options_name] [-q] [-cpp_file] [-output_no_suffix] [--help] -file The input file to be compiled (by default input source format is OpenCL C kernel language). -device Target device. can be: %s, %s or hexadecimal value with 0x prefix - can be single or multiple target devices. The [[.]] numbers: - family of graphics products, - can be omitted, then ocloc will compile for all of the matching devices. - can be omitted, then ocloc will compile for all of the . matching devices. The hexadecimal value represents device ID. If such value is provided, ocloc will try to match it with corresponding device type. For example, 0xFF20 device ID will be translated to tgllp. If multiple target devices are provided, ocloc will compile for each of these targets and will create a fatbinary archive that contains all of device binaries produced this way. Supported -device patterns examples: -device 0xFF20 ; will compile 1 target (tgllp) -device 12.0.7 ; will compile 1 target (dg1) -device 11 ; will compile the architecture (gen11) -device 9.0,11.0 ; will compile 2 targets (skl & icllp) -device 9.0-11.0 ; will compile all targets in range (inclusive) -device 9.0- ; will compile all targets newer/same as provided -device -9.0 ; will compile all targets older/same as provided -device * ; will compile all targets known to ocloc Deprecated notation that is still supported: can be: %s - can be single or multiple target devices. Supported -device patterns examples: -device skl ; will compile 1 target -device skl,icllp ; will compile 2 targets -device skl-icllp ; will compile all targets in range (inclusive) -device skl- ; will compile all targets newer/same as provided -device -skl ; will compile all targets older/same as provided -device gen9 ; will compile all targets matching the same gen -device gen9-gen11 ; will compile all targets in range (inclusive) -device gen9- ; will compile all targets newer/same as provided -device -gen9 ; will compile all targets older/same as provided -output Optional output file base name. Default is input file's base name. This base name will be used for all output files. Proper sufixes (describing file formats) will be added automatically. -out_dir Optional output directory. Default is current working directory. -options Optional OpenCL C compilation options as defined by OpenCL specification. Special options for Vector Compute: -vc-codegen compile from SPIRV -cmc compile from CM sources -32 Forces target architecture to 32-bit pointers. Default pointer size is inherited from ocloc's pointer size. This option is exclusive with -64. -64 Forces target architecture to 64-bit pointers. Default pointer size is inherited from ocloc's pointer size. This option is exclusive with -32. -internal_options Optional compiler internal options as defined by compilers used underneath. Check intel-graphics-compiler (IGC) project for details on available internal options. You also may provide explicit --help to inquire information about option, mentioned in -options -llvm_text Forces intermediate representation (IR) format to human-readable LLVM IR (.ll). This option affects only output files and should not be used in combination with '-llvm_input' option. Default IR is spirV. This option is exclusive with -spirv_input. This option is exclusive with -llvm_input. -llvm_input Indicates that input file is an llvm binary. Default is OpenCL C kernel language. This option is exclusive with -spirv_input. This option is exclusive with -llvm_text. -spirv_input Indicates that input file is a spirV binary. Default is OpenCL C kernel language format. This option is exclusive with -llvm_input. This option is exclusive with -llvm_text. -options_name Will add suffix to output files. This suffix will be generated based on input options (useful when rebuilding with different set of options so that results won't get overwritten). This suffix is added always as the last part of the filename (even after file's extension). It does not affect '--output' parameter and can be used along with it ('--output' parameter defines the base name - i.e. prefix). -force_stos_opt Will forcibly enable stateless to stateful optimization, i.e. skip "-cl-intel-greater-than-4GB-buffer-required". -q Will silence most of output messages. -spv_only Will generate only spirV file. -cpp_file Will generate c++ file with C-array containing Intel Compute device binary. -gen_file Will generate gen file. -output_no_suffix Prevents ocloc from adding family name suffix. --help Print this usage message. -revision_id Target stepping. Can be decimal or hexadecimal value. -exclude_ir Excludes IR from the output binary file. Examples : Compile file to Intel Compute GPU device binary (out = source_file_Gen9core.bin) ocloc -file source_file.cl -device skl )===", getDevicesConfigs().c_str(), NEO::getDevicesFamilies().c_str(), NEO::getDevicesTypes().c_str()); } void OfflineCompiler::storeBinary( char *&pDst, size_t &dstSize, const void *pSrc, const size_t srcSize) { dstSize = 0; DEBUG_BREAK_IF(!(pSrc && srcSize > 0)); delete[] pDst; pDst = new char[srcSize]; dstSize = static_cast(srcSize); memcpy_s(pDst, dstSize, pSrc, srcSize); } bool OfflineCompiler::generateElfBinary() { if (!genBinary || !genBinarySize) { return false; } // return "as is" if zebin format if (isDeviceBinaryFormat(ArrayRef(reinterpret_cast(genBinary), genBinarySize))) { this->elfBinary = std::vector(genBinary, genBinary + genBinarySize); return true; } SingleDeviceBinary binary = {}; binary.buildOptions = this->options; binary.intermediateRepresentation = ArrayRef(reinterpret_cast(this->irBinary), this->irBinarySize); binary.deviceBinary = ArrayRef(reinterpret_cast(this->genBinary), this->genBinarySize); binary.debugData = ArrayRef(reinterpret_cast(this->debugDataBinary), this->debugDataBinarySize); std::string packErrors; std::string packWarnings; using namespace NEO::Elf; ElfEncoder ElfEncoder; ElfEncoder.getElfFileHeader().type = ET_OPENCL_EXECUTABLE; if (binary.buildOptions.empty() == false) { ElfEncoder.appendSection(SHT_OPENCL_OPTIONS, SectionNamesOpenCl::buildOptions, ArrayRef(reinterpret_cast(binary.buildOptions.data()), binary.buildOptions.size())); } if (!binary.intermediateRepresentation.empty() && !excludeIr) { if (isSpirV) { ElfEncoder.appendSection(SHT_OPENCL_SPIRV, SectionNamesOpenCl::spirvObject, binary.intermediateRepresentation); } else { ElfEncoder.appendSection(SHT_OPENCL_LLVM_BINARY, SectionNamesOpenCl::llvmObject, binary.intermediateRepresentation); } } if (binary.debugData.empty() == false) { ElfEncoder.appendSection(SHT_OPENCL_DEV_DEBUG, SectionNamesOpenCl::deviceDebug, binary.debugData); } if (binary.deviceBinary.empty() == false) { ElfEncoder.appendSection(SHT_OPENCL_DEV_BINARY, SectionNamesOpenCl::deviceBinary, binary.deviceBinary); } this->elfBinary = ElfEncoder.encode(); return true; } void OfflineCompiler::writeOutAllFiles() { std::string fileBase; std::string fileTrunk = getFileNameTrunk(inputFile); if (outputNoSuffix) { if (outputFile.empty()) { fileBase = fileTrunk; } else { fileBase = outputFile; } } else { if (outputFile.empty()) { fileBase = fileTrunk + "_" + familyNameWithType; } else { fileBase = outputFile + "_" + familyNameWithType; } } if (outputDirectory != "") { std::list dirList; std::string tmp = outputDirectory; size_t pos = outputDirectory.size() + 1; do { dirList.push_back(tmp); pos = tmp.find_last_of("/\\", pos); tmp = tmp.substr(0, pos); } while (pos != std::string::npos); while (!dirList.empty()) { MakeDirectory(dirList.back().c_str()); dirList.pop_back(); } } if (irBinary && !inputFileSpirV) { std::string irOutputFileName = generateFilePathForIr(fileBase) + generateOptsSuffix(); argHelper->saveOutput(irOutputFileName, irBinary, irBinarySize); } if (genBinary) { std::string genOutputFile = generateFilePath(outputDirectory, fileBase, ".gen") + generateOptsSuffix(); argHelper->saveOutput(genOutputFile, genBinary, genBinarySize); if (useCppFile) { std::string cppOutputFile = generateFilePath(outputDirectory, fileBase, ".cpp"); std::string cpp = parseBinAsCharArray((uint8_t *)genBinary, genBinarySize, fileTrunk); argHelper->saveOutput(cppOutputFile, cpp.c_str(), cpp.size()); } } if (!elfBinary.empty()) { std::string elfOutputFile; if (outputNoSuffix) { elfOutputFile = generateFilePath(outputDirectory, fileBase, ""); } else { elfOutputFile = generateFilePath(outputDirectory, fileBase, ".bin") + generateOptsSuffix(); } argHelper->saveOutput( elfOutputFile, elfBinary.data(), elfBinary.size()); } if (debugDataBinary) { std::string debugOutputFile = generateFilePath(outputDirectory, fileBase, ".dbg") + generateOptsSuffix(); argHelper->saveOutput( debugOutputFile, debugDataBinary, debugDataBinarySize); } } bool OfflineCompiler::readOptionsFromFile(std::string &options, const std::string &file, OclocArgHelper *helper) { if (!helper->fileExists(file)) { return false; } size_t optionsSize = 0U; auto optionsFromFile = helper->loadDataFromFile(file, optionsSize); if (optionsSize > 0) { // Remove comment containing copyright header options = optionsFromFile.get(); size_t commentBegin = options.find("/*"); size_t commentEnd = options.rfind("*/"); if (commentBegin != std::string::npos && commentEnd != std::string::npos) { auto sizeToReplace = commentEnd - commentBegin + 2; options = options.replace(commentBegin, sizeToReplace, ""); size_t optionsBegin = options.find_first_not_of(" \t\n\r"); if (optionsBegin != std::string::npos) { options = options.substr(optionsBegin, options.length()); } } auto trimPos = options.find_last_not_of(" \n\r"); options = options.substr(0, trimPos + 1); } return true; } std::string generateFilePath(const std::string &directory, const std::string &fileNameBase, const char *extension) { UNRECOVERABLE_IF(extension == nullptr); if (directory.empty()) { return fileNameBase + extension; } bool hasTrailingSlash = (*directory.rbegin() == '/'); std::string ret; ret.reserve(directory.size() + (hasTrailingSlash ? 0 : 1) + fileNameBase.size() + strlen(extension) + 1); ret.append(directory); if (false == hasTrailingSlash) { ret.append("/", 1); } ret.append(fileNameBase); ret.append(extension); return ret; } } // namespace NEO compute-runtime-22.14.22890/shared/offline_compiler/source/offline_compiler.h000066400000000000000000000132461422164147700271270ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/offline_compiler/source/ocloc_arg_helper.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/os_interface/os_library.h" #include "shared/source/utilities/arrayref.h" #include "shared/source/utilities/const_stringref.h" #include "cif/common/cif_main.h" #include "ocl_igc_interface/fcl_ocl_device_ctx.h" #include "ocl_igc_interface/igc_ocl_device_ctx.h" #include #include #include namespace NEO { struct HardwareInfo; class OsLibrary; std::string convertToPascalCase(const std::string &inString); std::string generateFilePath(const std::string &directory, const std::string &fileNameBase, const char *extension); std::string getDevicesTypes(); class OfflineCompiler { public: static int query(size_t numArgs, const std::vector &allArgs, OclocArgHelper *helper); static OfflineCompiler *create(size_t numArgs, const std::vector &allArgs, bool dumpFiles, int &retVal, OclocArgHelper *helper); MOCKABLE_VIRTUAL int build(); std::string &getBuildLog(); void printUsage(); std::string getDevicesConfigs(); static constexpr ConstStringRef queryHelp = "Depending on will generate file\n" "(with a name adequate to )\n" "containing either driver version or NEO revision hash.\n\n" "Usage: ocloc query \n\n" "Supported query options:\n" " OCL_DRIVER_VERSION ; returns driver version\n" " NEO_REVISION ; returns NEO revision hash\n\n" "Examples:\n" " Extract driver version\n" " ocloc query OCL_DRIVER_VERSION\n"; OfflineCompiler &operator=(const OfflineCompiler &) = delete; OfflineCompiler(const OfflineCompiler &) = delete; MOCKABLE_VIRTUAL ~OfflineCompiler(); bool isQuiet() const { return quiet; } bool isOnlySpirV() const { return onlySpirV; } std::string parseBinAsCharArray(uint8_t *binary, size_t size, std::string &fileName); static bool readOptionsFromFile(std::string &optionsOut, const std::string &file, OclocArgHelper *helper); ArrayRef getPackedDeviceBinaryOutput() { return this->elfBinary; } static std::string getFileNameTrunk(std::string &filePath); const HardwareInfo &getHardwareInfo() const { return hwInfo; } std::string getOptionsReadFromFile() const { return optionsReadFromFile; } std::string getInternalOptionsReadFromFile() const { return internalOptionsReadFromFile; } protected: OfflineCompiler(); void setFamilyType(); int initHardwareInfo(std::string deviceName); std::string getStringWithinDelimiters(const std::string &src); int initialize(size_t numArgs, const std::vector &allArgs, bool dumpFiles); int parseCommandLine(size_t numArgs, const std::vector &allArgs); void setStatelessToStatefullBufferOffsetFlag(); void appendExtraInternalOptions(std::string &internalOptions); void parseDebugSettings(); void storeBinary(char *&pDst, size_t &dstSize, const void *pSrc, const size_t srcSize); MOCKABLE_VIRTUAL int buildSourceCode(); MOCKABLE_VIRTUAL std::string validateInputType(const std::string &input, bool isLlvm, bool isSpirv); int buildIrBinary(); void updateBuildLog(const char *pErrorString, const size_t errorStringSize); MOCKABLE_VIRTUAL bool generateElfBinary(); std::string generateFilePathForIr(const std::string &fileNameBase) { const char *ext = (isSpirV) ? ".spv" : ".bc"; return generateFilePath(outputDirectory, fileNameBase, useLlvmText ? ".ll" : ext); } std::string generateOptsSuffix() { std::string suffix{useOptionsSuffix ? options : ""}; std::replace(suffix.begin(), suffix.end(), ' ', '_'); return suffix; } MOCKABLE_VIRTUAL void writeOutAllFiles(); void unifyExcludeIrFlags(); HardwareInfo hwInfo; PRODUCT_CONFIG deviceConfig = UNKNOWN_ISA; std::string deviceName; std::string familyNameWithType; std::string inputFile; std::string outputFile; std::string outputDirectory; std::string options; std::string internalOptions; std::string sourceCode; std::string buildLog; std::string optionsReadFromFile = ""; std::string internalOptionsReadFromFile = ""; bool dumpFiles = true; bool useLlvmText = false; bool useLlvmBc = false; bool useCppFile = false; bool useGenFile = false; bool useOptionsSuffix = false; bool quiet = false; bool onlySpirV = false; bool inputFileLlvm = false; bool inputFileSpirV = false; bool outputNoSuffix = false; bool forceStatelessToStatefulOptimization = false; bool isSpirV = false; bool showHelp = false; bool excludeIr = false; std::vector elfBinary; char *genBinary = nullptr; size_t genBinarySize = 0; char *irBinary = nullptr; size_t irBinarySize = 0; char *debugDataBinary = nullptr; size_t debugDataBinarySize = 0; struct buildInfo; std::unique_ptr pBuildInfo; std::unique_ptr igcLib = nullptr; CIF::RAII::UPtr_t igcMain = nullptr; CIF::RAII::UPtr_t igcDeviceCtx = nullptr; int revisionId = -1; std::unique_ptr fclLib = nullptr; CIF::RAII::UPtr_t fclMain = nullptr; CIF::RAII::UPtr_t fclDeviceCtx = nullptr; IGC::CodeType::CodeType_t preferredIntermediateRepresentation; OclocArgHelper *argHelper = nullptr; }; } // namespace NEO compute-runtime-22.14.22890/shared/offline_compiler/source/offline_compiler_helper.cpp000066400000000000000000000012401422164147700310100ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/utilities/debug_settings_reader_creator.h" namespace NEO { template DebugSettingsManager::DebugSettingsManager(const char *registryPath) { } template DebugSettingsManager::~DebugSettingsManager() { readerImpl.reset(); }; // Global Debug Settings Manager DebugSettingsManager DebugManager(""); } // namespace NEO compute-runtime-22.14.22890/shared/offline_compiler/source/offline_compiler_options.cpp000066400000000000000000000005041422164147700312260ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include #include namespace NEO { // AUB file folder location const char *folderAUB = "aub_out"; // Initial value for HW tag uint32_t initialHardwareTag = std::numeric_limits::max(); } // namespace NEO compute-runtime-22.14.22890/shared/offline_compiler/source/offline_linker.cpp000066400000000000000000000362521422164147700271360ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "offline_linker.h" #include "shared/offline_compiler/source/ocloc_arg_helper.h" #include "shared/offline_compiler/source/ocloc_error_code.h" #include "shared/source/compiler_interface/intermediate_representations.h" #include "shared/source/device_binary_format/elf/elf_encoder.h" #include "shared/source/device_binary_format/elf/ocl_elf.h" #include "shared/source/helpers/compiler_hw_info_config.h" #include "shared/source/helpers/string.h" #include "shared/source/os_interface/os_inc_base.h" #include "shared/source/os_interface/os_library.h" #include "cif/common/cif_main.h" #include "ocl_igc_interface/platform_helper.h" #include #include namespace NEO { CIF::CIFMain *createMainNoSanitize(CIF::CreateCIFMainFunc_t createFunc); std::unique_ptr OfflineLinker::create(size_t argsCount, const std::vector &args, int &errorCode, OclocArgHelper *argHelper) { std::unique_ptr linker{new OfflineLinker{argHelper}}; errorCode = linker->initialize(argsCount, args); return linker; } OfflineLinker::OfflineLinker(OclocArgHelper *argHelper) : argHelper{argHelper}, operationMode{OperationMode::SKIP_EXECUTION}, outputFilename{"linker_output"}, outputFormat{IGC::CodeType::llvmBc} {} OfflineLinker::~OfflineLinker() = default; int OfflineLinker::initialize(size_t argsCount, const std::vector &args) { const auto parsingResult{parseCommand(argsCount, args)}; if (parsingResult != OclocErrorCode::SUCCESS) { return parsingResult; } // If a user requested help, then stop here. if (operationMode == OperationMode::SHOW_HELP) { return OclocErrorCode::SUCCESS; } const auto verificationResult{verifyLinkerCommand()}; if (verificationResult != OclocErrorCode::SUCCESS) { return verificationResult; } const auto loadingResult{loadInputFilesContent()}; if (loadingResult != OclocErrorCode::SUCCESS) { return loadingResult; } const auto hwInfoInitializationResult{initHardwareInfo()}; if (hwInfoInitializationResult != OclocErrorCode::SUCCESS) { return hwInfoInitializationResult; } const auto igcPreparationResult{prepareIgc()}; if (igcPreparationResult != OclocErrorCode::SUCCESS) { return igcPreparationResult; } operationMode = OperationMode::LINK_FILES; return OclocErrorCode::SUCCESS; } int OfflineLinker::parseCommand(size_t argsCount, const std::vector &args) { if (argsCount < 2u) { operationMode = OperationMode::SHOW_HELP; return OclocErrorCode::INVALID_COMMAND_LINE; } for (size_t argIndex = 1u; argIndex < argsCount; ++argIndex) { const auto ¤tArg{args[argIndex]}; const auto hasMoreArgs{argIndex + 1 < argsCount}; if (currentArg == "link") { continue; } else if ((currentArg == "-file") && hasMoreArgs) { inputFilenames.push_back(args[argIndex + 1]); ++argIndex; } else if (currentArg == "-out" && hasMoreArgs) { outputFilename = args[argIndex + 1]; ++argIndex; } else if ((currentArg == "-out_format") && hasMoreArgs) { outputFormat = parseOutputFormat(args[argIndex + 1]); ++argIndex; } else if ((currentArg == "-options") && hasMoreArgs) { options = args[argIndex + 1]; ++argIndex; } else if ((currentArg == "-internal_options") && hasMoreArgs) { internalOptions = args[argIndex + 1]; ++argIndex; } else if (currentArg == "--help") { operationMode = OperationMode::SHOW_HELP; return OclocErrorCode::SUCCESS; } else { argHelper->printf("Invalid option (arg %zd): %s\n", argIndex, currentArg.c_str()); return OclocErrorCode::INVALID_COMMAND_LINE; } } return OclocErrorCode::SUCCESS; } IGC::CodeType::CodeType_t OfflineLinker::parseOutputFormat(const std::string &outputFormatName) { constexpr static std::array supportedFormatNames = { std::pair{"ELF", IGC::CodeType::elf}, std::pair{"LLVM_BC", IGC::CodeType::llvmBc}}; for (const auto &[name, format] : supportedFormatNames) { if (name == outputFormatName) { return format; } } return IGC::CodeType::invalid; } int OfflineLinker::verifyLinkerCommand() { if (inputFilenames.empty()) { argHelper->printf("Error: Input name is missing! At least one input file is required!\n"); return OclocErrorCode::INVALID_COMMAND_LINE; } for (const auto &filename : inputFilenames) { if (filename.empty()) { argHelper->printf("Error: Empty filename cannot be used!\n"); return OclocErrorCode::INVALID_COMMAND_LINE; } if (!argHelper->fileExists(filename)) { argHelper->printf("Error: Input file %s missing.\n", filename.c_str()); return OclocErrorCode::INVALID_FILE; } } if (outputFormat == IGC::CodeType::invalid) { argHelper->printf("Error: Invalid output type!\n"); return OclocErrorCode::INVALID_COMMAND_LINE; } return OclocErrorCode::SUCCESS; } int OfflineLinker::loadInputFilesContent() { std::unique_ptr bytes{}; size_t size{}; IGC::CodeType::CodeType_t codeType{}; inputFilesContent.reserve(inputFilenames.size()); for (const auto &filename : inputFilenames) { size = 0; bytes = argHelper->loadDataFromFile(filename, size); if (size == 0) { argHelper->printf("Error: Cannot read input file: %s\n", filename.c_str()); return OclocErrorCode::INVALID_FILE; } codeType = detectCodeType(bytes.get(), size); if (codeType == IGC::CodeType::invalid) { argHelper->printf("Error: Unsupported format of input file: %s\n", filename.c_str()); return OclocErrorCode::INVALID_PROGRAM; } inputFilesContent.emplace_back(std::move(bytes), size, codeType); } return OclocErrorCode::SUCCESS; } IGC::CodeType::CodeType_t OfflineLinker::detectCodeType(char *bytes, size_t size) const { const auto bytesArray = ArrayRef::fromAny(bytes, size); if (isSpirVBitcode(bytesArray)) { return IGC::CodeType::spirV; } if (isLlvmBitcode(bytesArray)) { return IGC::CodeType::llvmBc; } return IGC::CodeType::invalid; } int OfflineLinker::initHardwareInfo() { // In spite of linking input files to intermediate representation instead of native binaries, // we have to initialize hardware info. Without that, initialization of IGC fails. // Therefore, we select the first valid hardware info entry and use it. const auto hwInfoTable{getHardwareInfoTable()}; for (auto productId = 0u; productId < hwInfoTable.size(); ++productId) { if (hwInfoTable[productId]) { hwInfo = *hwInfoTable[productId]; const auto hwInfoConfig = defaultHardwareInfoConfigTable[hwInfo.platform.eProductFamily]; setHwInfoValuesFromConfig(hwInfoConfig, hwInfo); hardwareInfoSetup[hwInfo.platform.eProductFamily](&hwInfo, true, hwInfoConfig); return OclocErrorCode::SUCCESS; } } argHelper->printf("Error! Cannot retrieve any valid hardware information!\n"); return OclocErrorCode::INVALID_DEVICE; } ArrayRef OfflineLinker::getHardwareInfoTable() const { return {hardwareInfoTable}; } int OfflineLinker::prepareIgc() { igcLib = loadIgcLibrary(); if (!igcLib) { argHelper->printf("Error! Loading of IGC library has failed! Filename: %s\n", Os::igcDllName); return OclocErrorCode::OUT_OF_HOST_MEMORY; } const auto igcCreateMainFunction = loadCreateIgcMainFunction(); if (!igcCreateMainFunction) { argHelper->printf("Error! Cannot load required functions from IGC library.\n"); return OclocErrorCode::OUT_OF_HOST_MEMORY; } igcMain = createIgcMain(igcCreateMainFunction); if (!igcMain) { argHelper->printf("Error! Cannot create IGC main component!\n"); return OclocErrorCode::OUT_OF_HOST_MEMORY; } igcDeviceCtx = createIgcDeviceContext(); if (!igcDeviceCtx) { argHelper->printf("Error! Cannot create IGC device context!\n"); return OclocErrorCode::OUT_OF_HOST_MEMORY; } const auto igcPlatform = getIgcPlatformHandle(); const auto igcGtSystemInfo = getGTSystemInfoHandle(); if (!igcPlatform || !igcGtSystemInfo) { argHelper->printf("Error! IGC device context has not been properly created!\n"); return OclocErrorCode::OUT_OF_HOST_MEMORY; } IGC::PlatformHelper::PopulateInterfaceWith(*igcPlatform.get(), hwInfo.platform); IGC::GtSysInfoHelper::PopulateInterfaceWith(*igcGtSystemInfo.get(), hwInfo.gtSystemInfo); return OclocErrorCode::SUCCESS; } std::unique_ptr OfflineLinker::loadIgcLibrary() const { return std::unique_ptr{OsLibrary::load(Os::igcDllName)}; } CIF::CreateCIFMainFunc_t OfflineLinker::loadCreateIgcMainFunction() const { return reinterpret_cast(igcLib->getProcAddress(CIF::CreateCIFMainFuncName)); } CIF::RAII::UPtr_t OfflineLinker::createIgcMain(CIF::CreateCIFMainFunc_t createMainFunction) const { return CIF::RAII::UPtr(createMainNoSanitize(createMainFunction)); } CIF::RAII::UPtr_t OfflineLinker::createIgcDeviceContext() const { return igcMain->CreateInterface(); } CIF::RAII::UPtr_t OfflineLinker::getIgcPlatformHandle() const { return igcDeviceCtx->GetPlatformHandle(); } CIF::RAII::UPtr_t OfflineLinker::getGTSystemInfoHandle() const { return igcDeviceCtx->GetGTSystemInfoHandle(); } int OfflineLinker::execute() { switch (operationMode) { case OperationMode::SHOW_HELP: return showHelp(); case OperationMode::LINK_FILES: return link(); case OperationMode::SKIP_EXECUTION: [[fallthrough]]; default: argHelper->printf("Error: Linker cannot be executed due to unsuccessful initialization!\n"); return OclocErrorCode::INVALID_COMMAND_LINE; } } int OfflineLinker::showHelp() { constexpr auto help{R"===(Links several IR files to selected output format (LLVM BC, ELF). Input files can be given in SPIR-V or LLVM BC. Usage: ocloc link [-file ]... -out [-out_format ] [-options ] [-internal_options ] [--help] -file The input file to be linked. Multiple files can be passed using repetition of this arguments. Please see examples below. -out Output filename. -out_format Output file format. Supported ones are ELF and LLVM_BC. When not specified, LLVM_BC is used. -options Optional OpenCL C compilation options as defined by OpenCL specification. -internal_options Optional compiler internal options as defined by compilers used underneath. Check intel-graphics-compiler (IGC) project for details on available internal options. You also may provide explicit --help to inquire information about option, mentioned in -options. --help Print this usage message. Examples: Link two SPIR-V files to LLVM BC output ocloc link -file first_file.spv -file second_file.spv -out linker_output.llvmbc Link two LLVM BC files to ELF output ocloc link -file first_file.llvmbc -file second_file.llvmbc -out_format ELF -out translated.elf )==="}; argHelper->printf(help); return OclocErrorCode::SUCCESS; } int OfflineLinker::link() { const auto encodedElfFile{createSingleInputFile()}; if (outputFormat == IGC::CodeType::elf) { argHelper->saveOutput(outputFilename, encodedElfFile.data(), encodedElfFile.size()); return OclocErrorCode::SUCCESS; } const auto [translationResult, translatedBitcode] = translateToOutputFormat(encodedElfFile); if (translationResult == OclocErrorCode::SUCCESS) { argHelper->saveOutput(outputFilename, translatedBitcode.data(), translatedBitcode.size()); } return translationResult; } std::vector OfflineLinker::createSingleInputFile() const { NEO::Elf::ElfEncoder<> elfEncoder{true, false, 1U}; elfEncoder.getElfFileHeader().type = Elf::ET_OPENCL_OBJECTS; for (const auto &[bytes, size, codeType] : inputFilesContent) { const auto isSpirv = codeType == IGC::CodeType::spirV; const auto sectionType = isSpirv ? Elf::SHT_OPENCL_SPIRV : Elf::SHT_OPENCL_LLVM_BINARY; const auto sectionName = isSpirv ? Elf::SectionNamesOpenCl::spirvObject : Elf::SectionNamesOpenCl::llvmObject; const auto bytesArray = ArrayRef::fromAny(bytes.get(), size); elfEncoder.appendSection(sectionType, sectionName, bytesArray); } return elfEncoder.encode(); } std::pair> OfflineLinker::translateToOutputFormat(const std::vector &elfInput) { auto igcSrc = CIF::Builtins::CreateConstBuffer(igcMain.get(), elfInput.data(), elfInput.size()); auto igcOptions = CIF::Builtins::CreateConstBuffer(igcMain.get(), options.c_str(), options.size()); auto igcInternalOptions = CIF::Builtins::CreateConstBuffer(igcMain.get(), internalOptions.c_str(), internalOptions.size()); auto igcTranslationCtx = igcDeviceCtx->CreateTranslationCtx(IGC::CodeType::elf, outputFormat); const auto tracingOptions{nullptr}; const auto tracingOptionsSize{0}; const auto igcOutput = igcTranslationCtx->Translate(igcSrc.get(), igcOptions.get(), igcInternalOptions.get(), tracingOptions, tracingOptionsSize); std::vector outputFileContent{}; if (!igcOutput) { argHelper->printf("Error: Translation has failed! IGC output is nullptr!\n"); return {OclocErrorCode::OUT_OF_HOST_MEMORY, std::move(outputFileContent)}; } if (igcOutput->GetOutput()->GetSizeRaw() != 0) { outputFileContent.resize(igcOutput->GetOutput()->GetSizeRaw()); memcpy_s(outputFileContent.data(), outputFileContent.size(), igcOutput->GetOutput()->GetMemory(), igcOutput->GetOutput()->GetSizeRaw()); } tryToStoreBuildLog(igcOutput->GetBuildLog()->GetMemory(), igcOutput->GetBuildLog()->GetSizeRaw()); const auto errorCode{igcOutput->Successful() ? OclocErrorCode::SUCCESS : OclocErrorCode::BUILD_PROGRAM_FAILURE}; if (errorCode != OclocErrorCode::SUCCESS) { argHelper->printf("Error: Translation has failed! IGC returned empty output.\n"); } return {errorCode, std::move(outputFileContent)}; } std::string OfflineLinker::getBuildLog() const { return buildLog; } void OfflineLinker::tryToStoreBuildLog(const char *buildLogRaw, size_t size) { if (buildLogRaw && size != 0) { buildLog = std::string{buildLogRaw, buildLogRaw + size}; } } } // namespace NEOcompute-runtime-22.14.22890/shared/offline_compiler/source/offline_linker.h000066400000000000000000000061561422164147700266030ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/hw_info.h" #include "shared/source/utilities/arrayref.h" #include "cif/common/cif_main.h" #include "cif/import/library_api.h" #include "ocl_igc_interface/code_type.h" #include "ocl_igc_interface/igc_ocl_device_ctx.h" #include #include #include #include #include class OclocArgHelper; namespace NEO { class OsLibrary; class OfflineLinker { protected: enum class OperationMode { SKIP_EXECUTION = 0, SHOW_HELP = 1, LINK_FILES = 2, }; struct InputFileContent { InputFileContent(std::unique_ptr bytes, size_t size, IGC::CodeType::CodeType_t codeType) : bytes{std::move(bytes)}, size{size}, codeType{codeType} {} std::unique_ptr bytes{}; size_t size{}; IGC::CodeType::CodeType_t codeType{}; }; public: static std::unique_ptr create(size_t argsCount, const std::vector &args, int &errorCode, OclocArgHelper *argHelper); MOCKABLE_VIRTUAL ~OfflineLinker(); int execute(); std::string getBuildLog() const; protected: explicit OfflineLinker(OclocArgHelper *argHelper); int initialize(size_t argsCount, const std::vector &args); int parseCommand(size_t argsCount, const std::vector &args); IGC::CodeType::CodeType_t parseOutputFormat(const std::string &outputFormatName); int verifyLinkerCommand(); int loadInputFilesContent(); IGC::CodeType::CodeType_t detectCodeType(char *bytes, size_t size) const; int initHardwareInfo(); int prepareIgc(); int link(); int showHelp(); std::vector createSingleInputFile() const; std::pair> translateToOutputFormat(const std::vector &elfInput); void tryToStoreBuildLog(const char *buildLogRaw, size_t size); MOCKABLE_VIRTUAL ArrayRef getHardwareInfoTable() const; MOCKABLE_VIRTUAL std::unique_ptr loadIgcLibrary() const; MOCKABLE_VIRTUAL CIF::CreateCIFMainFunc_t loadCreateIgcMainFunction() const; MOCKABLE_VIRTUAL CIF::RAII::UPtr_t createIgcMain(CIF::CreateCIFMainFunc_t createMainFunction) const; MOCKABLE_VIRTUAL CIF::RAII::UPtr_t createIgcDeviceContext() const; MOCKABLE_VIRTUAL CIF::RAII::UPtr_t getIgcPlatformHandle() const; MOCKABLE_VIRTUAL CIF::RAII::UPtr_t getGTSystemInfoHandle() const; OclocArgHelper *argHelper{}; OperationMode operationMode{}; std::vector inputFilenames{}; std::vector inputFilesContent{}; std::string outputFilename{}; IGC::CodeType::CodeType_t outputFormat{}; std::string options{}; std::string internalOptions{}; std::unique_ptr igcLib{}; CIF::RAII::UPtr_t igcMain{}; CIF::RAII::UPtr_t igcDeviceCtx{}; HardwareInfo hwInfo{}; std::string buildLog{}; }; } // namespace NEOcompute-runtime-22.14.22890/shared/offline_compiler/source/queries.h000066400000000000000000000005541422164147700252660ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/utilities/const_stringref.h" namespace NEO { namespace Queries { static constexpr ConstStringRef queryNeoRevision = "NEO_REVISION"; static constexpr ConstStringRef queryOCLDriverVersion = "OCL_DRIVER_VERSION"; }; // namespace Queries } // namespace NEO compute-runtime-22.14.22890/shared/offline_compiler/source/utilities/000077500000000000000000000000001422164147700254475ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/offline_compiler/source/utilities/CMakeLists.txt000066400000000000000000000015101422164147700302040ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(CLOC_LIB_SRCS_UTILITIES ${CMAKE_CURRENT_SOURCE_DIR}/safety_caller.h ${CMAKE_CURRENT_SOURCE_DIR}/get_current_dir.h ) if(WIN32) list(APPEND CLOC_LIB_SRCS_UTILITIES ${CMAKE_CURRENT_SOURCE_DIR}/windows/safety_caller_windows.cpp ${CMAKE_CURRENT_SOURCE_DIR}/windows/safety_guard_windows.h ${CMAKE_CURRENT_SOURCE_DIR}/windows/seh_exception.cpp ${CMAKE_CURRENT_SOURCE_DIR}/windows/seh_exception.h ${CMAKE_CURRENT_SOURCE_DIR}/windows/get_current_dir_windows.cpp ) else() list(APPEND CLOC_LIB_SRCS_UTILITIES ${CMAKE_CURRENT_SOURCE_DIR}/linux/safety_caller_linux.cpp ${CMAKE_CURRENT_SOURCE_DIR}/linux/get_current_dir_linux.cpp ) endif() target_sources(ocloc_lib PRIVATE ${CLOC_LIB_SRCS_UTILITIES}) compute-runtime-22.14.22890/shared/offline_compiler/source/utilities/get_current_dir.h000066400000000000000000000002601422164147700307750ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once extern std::string getCurrentDirectoryOwn(std::string outDirForBuilds);compute-runtime-22.14.22890/shared/offline_compiler/source/utilities/get_git_version_info.cpp000066400000000000000000000010101422164147700323450ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "driver_version.h" #include #ifdef QTR #undef QTR #endif #ifdef TOSTR #undef TOSTR #endif #define QTR(a) #a #define TOSTR(b) QTR(b) namespace NEO { std::string getRevision() { #ifdef NEO_REVISION return NEO_REVISION; #else return ""; #endif } std::string getOclDriverVersion() { #ifdef NEO_OCL_DRIVER_VERSION return TOSTR(NEO_OCL_DRIVER_VERSION); #else return ""; #endif } } // namespace NEO compute-runtime-22.14.22890/shared/offline_compiler/source/utilities/get_git_version_info.h000066400000000000000000000003451422164147700320240ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include namespace NEO { extern std::string getRevision(); extern std::string getOclDriverVersion(); } // namespace NEO compute-runtime-22.14.22890/shared/offline_compiler/source/utilities/linux/000077500000000000000000000000001422164147700266065ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/offline_compiler/source/utilities/linux/get_current_dir_linux.cpp000066400000000000000000000006051422164147700337110ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include #include std::string getCurrentDirectoryOwn(std::string outDirForBuilds) { char buf[256]; if (getcwd(buf, sizeof(buf)) != NULL) return std::string(buf) + "/" + outDirForBuilds + "/"; else return std::string("./") + outDirForBuilds + "/"; } compute-runtime-22.14.22890/shared/offline_compiler/source/utilities/linux/safety_caller_linux.cpp000066400000000000000000000016551422164147700333550ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/offline_compiler/source/ocloc_error_code.h" #include "shared/offline_compiler/source/offline_compiler.h" #include "shared/offline_compiler/source/offline_linker.h" #include "shared/offline_compiler/source/utilities/linux/safety_guard_linux.h" #include "shared/source/os_interface/os_library.h" using namespace NEO; int buildWithSafetyGuard(OfflineCompiler *compiler) { SafetyGuardLinux safetyGuard; int retVal = NEO::OclocErrorCode::COMPILATION_CRASH; return safetyGuard.call(compiler, &OfflineCompiler::build, retVal); } int linkWithSafetyGuard(OfflineLinker *linker) { SafetyGuardLinux safetyGuard{}; int returnValueOnCrash{NEO::OclocErrorCode::COMPILATION_CRASH}; return safetyGuard.call(linker, &OfflineLinker::execute, returnValueOnCrash); } compute-runtime-22.14.22890/shared/offline_compiler/source/utilities/linux/safety_guard_linux.h000066400000000000000000000037121422164147700326560ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/abort.h" #include #include #include #include #include static jmp_buf jmpbuf; class SafetyGuardLinux { public: SafetyGuardLinux() { struct sigaction sigact; sigact.sa_sigaction = sigAction; sigact.sa_flags = SA_RESTART | SA_SIGINFO; sigaction(SIGSEGV, &sigact, &previousSigSegvAction); sigaction(SIGILL, &sigact, &previousSigIllvAction); } ~SafetyGuardLinux() { if (previousSigSegvAction.sa_sigaction) { sigaction(SIGSEGV, &previousSigSegvAction, NULL); } if (previousSigIllvAction.sa_sigaction) { sigaction(SIGILL, &previousSigIllvAction, NULL); } } static void sigAction(int sigNum, siginfo_t *info, void *ucontext) { const int callstackDepth = 30; void *addresses[callstackDepth]; char **callstack; int backtraceSize = 0; backtraceSize = backtrace(addresses, callstackDepth); callstack = backtrace_symbols(addresses, backtraceSize); for (int i = 0; i < backtraceSize; ++i) { printf("[%d]: %s\n", i, callstack[i]); } free(callstack); longjmp(jmpbuf, 1); } template T call(Object *object, Method method, T retValueOnCrash) { int jump = 0; jump = setjmp(jmpbuf); if (jump == 0) { return (object->*method)(); } else { if (onSigSegv) { onSigSegv(); } else { NEO::abortExecution(); } } return retValueOnCrash; } typedef void (*callbackFunction)(); callbackFunction onSigSegv = nullptr; struct sigaction previousSigSegvAction = {}; struct sigaction previousSigIllvAction = {}; }; compute-runtime-22.14.22890/shared/offline_compiler/source/utilities/safety_caller.h000066400000000000000000000004641422164147700304410ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once namespace NEO { class OfflineCompiler; class OfflineLinker; } // namespace NEO extern int buildWithSafetyGuard(NEO::OfflineCompiler *compiler); extern int linkWithSafetyGuard(NEO::OfflineLinker *linker);compute-runtime-22.14.22890/shared/offline_compiler/source/utilities/windows/000077500000000000000000000000001422164147700271415ustar00rootroot00000000000000get_current_dir_windows.cpp000066400000000000000000000004741422164147700345240ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/offline_compiler/source/utilities/windows/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "windows.h" #include std::string getCurrentDirectoryOwn(std::string outDirForBuilds) { char buf[256]; GetCurrentDirectoryA(256, buf); return std::string(buf) + "\\" + outDirForBuilds + "\\"; } safety_caller_windows.cpp000066400000000000000000000016011422164147700341530ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/offline_compiler/source/utilities/windows/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/offline_compiler/source/ocloc_error_code.h" #include "shared/offline_compiler/source/offline_compiler.h" #include "shared/offline_compiler/source/offline_linker.h" #include "shared/offline_compiler/source/utilities/windows/safety_guard_windows.h" using namespace NEO; int buildWithSafetyGuard(OfflineCompiler *compiler) { SafetyGuardWindows safetyGuard; int retVal = NEO::OclocErrorCode::COMPILATION_CRASH; return safetyGuard.call(compiler, &OfflineCompiler::build, retVal); } int linkWithSafetyGuard(OfflineLinker *linker) { SafetyGuardWindows safetyGuard{}; int returnValueOnCrash{NEO::OclocErrorCode::COMPILATION_CRASH}; return safetyGuard.call(linker, &OfflineLinker::execute, returnValueOnCrash); } compute-runtime-22.14.22890/shared/offline_compiler/source/utilities/windows/safety_guard_windows.h000066400000000000000000000016421422164147700335440ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/offline_compiler/source/utilities/windows/seh_exception.h" #include "shared/source/helpers/abort.h" #include static jmp_buf jmpbuf; class SafetyGuardWindows { public: template T call(Object *object, Method method, T retValueOnCrash) { int jump = 0; jump = setjmp(jmpbuf); if (jump == 0) { __try { return (object->*method)(); } __except (SehException::filter(GetExceptionCode(), GetExceptionInformation())) { if (onExcept) { onExcept(); } longjmp(jmpbuf, 1); } } return retValueOnCrash; } typedef void (*callbackFunction)(); callbackFunction onExcept = nullptr; }; compute-runtime-22.14.22890/shared/offline_compiler/source/utilities/windows/seh_exception.cpp000066400000000000000000000114421422164147700325040ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "seh_exception.h" #include "shared/source/os_interface/os_library.h" #include #include #pragma warning(push) #pragma warning(disable : 4091) #include #pragma warning(pop) #include #include #include std::string SehException::getExceptionDescription(unsigned int code) { switch (code) { case EXCEPTION_ACCESS_VIOLATION: return "Access violation"; case EXCEPTION_DATATYPE_MISALIGNMENT: return "Datatype misalignement"; case EXCEPTION_FLT_DIVIDE_BY_ZERO: case EXCEPTION_INT_DIVIDE_BY_ZERO: return "Divide by zero"; case EXCEPTION_STACK_OVERFLOW: return "Stack overflow"; default: break; } return "Unknown"; } int SehException::filter(unsigned int code, struct _EXCEPTION_POINTERS *ep) { printf("EXCEPTION: %s\n", SehException::getExceptionDescription(code).c_str()); if (code != EXCEPTION_STACK_OVERFLOW) { std::string callstack; SehException::getCallStack(code, ep, callstack); printf("Callstack:\n\n%s", callstack.c_str()); } return EXCEPTION_EXECUTE_HANDLER; } void SehException::getCallStack(unsigned int code, struct _EXCEPTION_POINTERS *ep, std::string &stack) { DWORD machine = 0; HANDLE hProcess = GetCurrentProcess(); HANDLE hThread = GetCurrentThread(); SYSTEM_INFO systemInfo; GetSystemInfo(&systemInfo); if (systemInfo.wProcessorArchitecture == PROCESSOR_ARCHITECTURE_INTEL) { machine = IMAGE_FILE_MACHINE_I386; } else if (systemInfo.wProcessorArchitecture == PROCESSOR_ARCHITECTURE_AMD64) { machine = IMAGE_FILE_MACHINE_AMD64; } else { stack = "invalid processor arch"; return; } stack.clear(); BOOL result = SymInitialize(hProcess, NULL, TRUE); if (result == FALSE) { return; } STACKFRAME64 stackFrame; memset(&stackFrame, 0, sizeof(STACKFRAME64)); const int nameSize = 255; char buffer[sizeof(IMAGEHLP_SYMBOL64) + (nameSize + 1) * sizeof(char)]; IMAGEHLP_SYMBOL64 *symbol = reinterpret_cast(buffer); symbol->MaxNameLength = nameSize; DWORD displacement = 0; DWORD64 displacement64 = 0; std::unique_ptr psApiLib(NEO::OsLibrary::load("psapi.dll")); auto getMappedFileName = reinterpret_cast(psApiLib->getProcAddress("GetMappedFileNameA")); size_t callstackCounter = 0; const size_t maxCallstackDepth = 1000; #ifdef _WIN64 stackFrame.AddrPC.Offset = ep->ContextRecord->Rip; stackFrame.AddrPC.Mode = AddrModeFlat; stackFrame.AddrStack.Offset = ep->ContextRecord->Rsp; stackFrame.AddrStack.Mode = AddrModeFlat; stackFrame.AddrFrame.Offset = ep->ContextRecord->Rbp; stackFrame.AddrFrame.Mode = AddrModeFlat; #else stackFrame.AddrPC.Offset = ep->ContextRecord->Eip; stackFrame.AddrPC.Mode = AddrModeFlat; stackFrame.AddrStack.Offset = ep->ContextRecord->Esp; stackFrame.AddrStack.Mode = AddrModeFlat; stackFrame.AddrFrame.Offset = ep->ContextRecord->Ebp; stackFrame.AddrFrame.Mode = AddrModeFlat; #endif while (callstackCounter < maxCallstackDepth) { symbol->Name[255] = '\0'; if (!StackWalk64(machine, hProcess, hThread, &stackFrame, ep->ContextRecord, nullptr, SymFunctionTableAccess64, SymGetModuleBase64, 0)) { break; } if (stackFrame.AddrFrame.Offset == 0) { break; } std::string lineInCode; std::string module; std::string symbolName; DWORD64 address = stackFrame.AddrPC.Offset; IMAGEHLP_LINE64 imageLine; imageLine.SizeOfStruct = sizeof(IMAGEHLP_LINE64); if (SymGetLineFromAddr64(hProcess, address, &displacement, &imageLine)) { lineInCode = imageLine.FileName; char filename[MAX_PATH + 1]; filename[MAX_PATH] = '\0'; if (getMappedFileName(hProcess, reinterpret_cast(imageLine.Address), filename, MAX_PATH)) { module = filename; } } if (SymGetSymFromAddr64(hProcess, address, &displacement64, symbol)) { symbolName = symbol->Name; } addLineToCallstack(stack, callstackCounter, module, lineInCode, symbolName); callstackCounter++; } } void SehException::addLineToCallstack(std::string &callstack, size_t counter, std::string &module, std::string &line, std::string &symbol) { callstack += "["; callstack += std::to_string(counter); callstack += "]: "; if (module.size()) { callstack += "Module:"; callstack += module + "\n\t"; } if (line.size()) { callstack += line + ":"; } callstack += symbol + "\n"; } compute-runtime-22.14.22890/shared/offline_compiler/source/utilities/windows/seh_exception.h000066400000000000000000000013021422164147700321430ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "excpt.h" #include "windows.h" #include class SehException { public: static std::string getExceptionDescription(unsigned int code); static void getCallStack(unsigned int code, struct _EXCEPTION_POINTERS *ep, std::string &stack); static int filter(unsigned int code, struct _EXCEPTION_POINTERS *ep); protected: static void addLineToCallstack(std::string &callstack, size_t counter, std::string &module, std::string &line, std::string &symbol); typedef DWORD(WINAPI *getMappedFileNameFunction)(HANDLE hProcess, LPVOID lpv, LPSTR lpFilename, DWORD nSize); }; compute-runtime-22.14.22890/shared/source/000077500000000000000000000000001422164147700201205ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/CMakeLists.txt000066400000000000000000000201171422164147700226610ustar00rootroot00000000000000# # Copyright (C) 2019-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(SHARED_SOURCE_PROJECTS_FOLDER "neo shared") if(NOT DEFINED NEO_SHARED_RELEASE_LIB_NAME) MESSAGE(FATAL_ERROR "NEO Shared library name undefined!") endif() include(enable_cores.cmake) function(generate_shared_lib LIB_NAME MOCKABLE) add_library(${LIB_NAME} STATIC ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/enable_cores.cmake ${CORE_SOURCES} ) if(${MOCKABLE}) target_compile_definitions(${LIB_NAME} PUBLIC MOCKABLE_VIRTUAL=virtual DEFAULT_TEST_PLATFORM=${DEFAULT_TESTED_PLATFORM} DEFAULT_TEST_FAMILY_NAME=${DEFAULT_TESTED_FAMILY_NAME} ${TESTED_CORE_FLAGS_DEFINITONS}) else() target_compile_definitions(${LIB_NAME} PUBLIC MOCKABLE_VIRTUAL=) endif() list(APPEND LIB_FLAGS_DEFINITIONS ${SUPPORTED_CORE_FLAGS_DEFINITONS}) set_property(TARGET ${LIB_NAME} APPEND_STRING PROPERTY COMPILE_FLAGS ${ASAN_FLAGS} ${TSAN_FLAGS}) set_target_properties(${LIB_NAME} PROPERTIES FOLDER "${SHARED_SOURCE_PROJECTS_FOLDER}") target_compile_definitions(${LIB_NAME} PUBLIC PUBLIC GMM_LIB_DLL ${LIB_FLAGS_DEFINITIONS} DEFAULT_PLATFORM=${DEFAULT_SUPPORTED_PLATFORM} PRIVATE OGL=1) target_compile_definitions(${LIB_NAME} PUBLIC ${NEO_CORE_COMPILE_DEFS} ${NEO__IGC_COMPILE_DEFINITIONS}) target_include_directories(${LIB_NAME} PUBLIC ${KMDAF_HEADERS_DIR} ${ENGINE_NODE_DIR} ${NEO__GMM_INCLUDE_DIR} ${CIF_BASE_DIR} ${IGC_OCL_ADAPTOR_DIR} ${NEO__IGC_INCLUDE_DIR} ${KHRONOS_HEADERS_DIR} ${SOURCE_LEVEL_DEBUGGER_HEADERS_DIR} ${VISA_INCLUDE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/os_interface/create_command_stream_receiver_${DRIVER_MODEL} ${CMAKE_CURRENT_SOURCE_DIR}/direct_submission/create_direct_submission_${DRIVER_MODEL} ) if(WIN32 OR NOT DISABLE_WDDM_LINUX) target_include_directories(${LIB_NAME} PUBLIC ${WDK_INCLUDE_PATHS}) endif() if(WIN32) target_include_directories(${LIB_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/os_interface/windows ) else() target_include_directories(${LIB_NAME} PUBLIC ${I915_INCLUDES_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/os_interface/linux ) endif() create_project_source_tree(${LIB_NAME}) # Enable SSE4/AVX2 options for files that need them if(MSVC) set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/helpers/${NEO_TARGET_PROCESSOR}/local_id_gen_avx2.cpp PROPERTIES COMPILE_FLAGS /arch:AVX2) else() if(COMPILER_SUPPORTS_AVX2) set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/helpers/${NEO_TARGET_PROCESSOR}/local_id_gen_avx2.cpp PROPERTIES COMPILE_FLAGS -mavx2) endif() if(COMPILER_SUPPORTS_SSE42) set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/helpers/local_id_gen_sse4.cpp PROPERTIES COMPILE_FLAGS -msse4.2) endif() endif() if(DEFINED AUB_STREAM_PROJECT_NAME) target_link_libraries(${LIB_NAME} ${AUB_STREAM_PROJECT_NAME}) endif() endfunction() set(NEO_CORE_COMPILE_DEFS "") set(CORE_SOURCES ${CORE_SRCS_COREX_ALL_BASE}) add_subdirectories() if(WIN32) list(APPEND CORE_SOURCES ${CORE_SRCS_COREX_ALL_WINDOWS}) endif() if(WIN32 OR NOT DISABLE_WDDM_LINUX) list(APPEND CORE_SOURCES ${CORE_SRCS_COREX_ALL_WDDM}) endif() if(NOT WIN32) list(APPEND CORE_SOURCES ${CORE_SRCS_COREX_ALL_LINUX}) endif() macro(macro_for_each_core_type) append_sources_from_properties(CORE_SOURCES SHARED_SRCS_ADDITIONAL_FILES_${CORE_TYPE} ) endmacro() apply_macro_for_each_core_type("SUPPORTED") append_sources_from_properties(CORE_SOURCES NEO_CORE_AIL NEO_CORE_AUB NEO_CORE_AUB_MEM_DUMP NEO_CORE_BUILT_INS NEO_CORE_COMMANDS NEO_CORE_COMMAND_CONTAINER NEO_CORE_COMMAND_ENCODERS NEO_CORE_COMMAND_STREAM NEO_CORE_COMPILER_INTERFACE NEO_CORE_DEBUGGER NEO_CORE_DEBUG_SETTINGS NEO_CORE_DEVICE NEO_CORE_DIRECT_SUBMISSION NEO_CORE_DIRECT_SUBMISSION_DISPATCHERS NEO_CORE_EXECUTION_ENVIRONMENT NEO_CORE_GEN_COMMON NEO_CORE_GMM_HELPER NEO_CORE_HELPERS NEO_CORE_IMAGE NEO_CORE_INDIRECT_HEAP NEO_CORE_KERNEL NEO_CORE_MEMORY_MANAGER NEO_CORE_MEMORY_PROPERTIES NEO_CORE_OS_INTERFACE NEO_CORE_PAGE_FAULT_MANAGER NEO_CORE_PROGRAM NEO_CORE_SKU_INFO_BASE NEO_CORE_SRCS_BUILT_INS NEO_CORE_SRCS_BUILT_INS_OPS NEO_CORE_SRCS_BUILT_IN_KERNELS NEO_CORE_SRCS_SOURCE_LEVEL_DEBUGGER NEO_CORE_TBX NEO_CORE_UTILITIES NEO_DEVICE_BINARY_FORMAT NEO_UNIFIED_MEMORY ) if(WIN32) append_sources_from_properties(CORE_SOURCES NEO_CORE_GMM_HELPER_WINDOWS NEO_CORE_HELPERS_GMM_CALLBACKS_WINDOWS NEO_CORE_DIRECT_SUBMISSION_WINDOWS NEO_CORE_OS_INTERFACE_WINDOWS NEO_CORE_OS_INTERFACE_WDDM NEO_CORE_PAGE_FAULT_MANAGER_WINDOWS NEO_CORE_SKU_INFO_WINDOWS NEO_CORE_SRCS_HELPERS_WINDOWS NEO_CORE_UTILITIES_WINDOWS NEO_CORE_EXECUTION_ENVIRONMENT_WINDOWS NEO_CORE_AIL_WINDOWS ) else() append_sources_from_properties(CORE_SOURCES NEO_CORE_DIRECT_SUBMISSION_LINUX NEO_CORE_OS_INTERFACE_LINUX NEO_CORE_PAGE_FAULT_MANAGER_LINUX NEO_CORE_UTILITIES_LINUX NEO_CORE_EXECUTION_ENVIRONMENT_LINUX NEO_CORE_AIL_LINUX ) if(NOT DISABLE_WDDM_LINUX) append_sources_from_properties(CORE_SOURCES NEO_CORE_HELPERS_GMM_CALLBACKS_WINDOWS NEO_CORE_SRCS_HELPERS_WINDOWS NEO_CORE_GMM_HELPER_WINDOWS NEO_CORE_OS_INTERFACE_WDDM NEO_CORE_SKU_INFO_WINDOWS ) endif() endif() if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${BRANCH_TYPE}/core_sources.cmake) include(${CMAKE_CURRENT_SOURCE_DIR}/${BRANCH_TYPE}/core_sources.cmake) endif() if(NOT MSVC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fpermissive -fPIC") endif() if(DONT_CARE_OF_VIRTUALS) generate_shared_lib(${NEO_SHARED_RELEASE_LIB_NAME} TRUE) else() generate_shared_lib(${NEO_SHARED_RELEASE_LIB_NAME} FALSE) if(NOT NEO_SKIP_UNIT_TESTS) generate_shared_lib(${NEO_SHARED_MOCKABLE_LIB_NAME} TRUE) endif() endif() set_property(GLOBAL APPEND PROPERTY NEO_CORE_SRCS_LINK ${CORE_SRCS_LINK}) get_property(NEO_SRCS_ENABLE_CORE GLOBAL PROPERTY NEO_SRCS_ENABLE_CORE) target_sources(ocloc_lib PRIVATE ${NEO_SRCS_ENABLE_CORE}) if(UNIX) set_property(GLOBAL APPEND PROPERTY NEO_CORE_SRCS_LINK ${CORE_SRCS_LINK_LINUX}) endif() compute-runtime-22.14.22890/shared/source/ail/000077500000000000000000000000001422164147700206655ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/ail/CMakeLists.txt000066400000000000000000000005351422164147700234300ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(NEO_CORE_AIL ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/ail_configuration.h ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}ail_configuration.cpp ) set_property(GLOBAL PROPERTY NEO_CORE_AIL ${NEO_CORE_AIL}) add_subdirectories() compute-runtime-22.14.22890/shared/source/ail/ail_configuration.cpp000066400000000000000000000024721422164147700250720ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/ail/ail_configuration.h" #include #include namespace NEO { /* * fp64 support is unavailable on some Intel GPUs, and the SW emulation in IGC should not be enabled by default. * For Blender, fp64 is not performance-critical - SW emulation is good enough for the application to be usable * (some versions would not function correctly without it). * */ std::map> applicationMap = {{"blender", {AILEnumeration::ENABLE_FP64}}}; AILConfiguration *ailConfigurationTable[IGFX_MAX_PRODUCT] = {}; AILConfiguration *AILConfiguration::get(PRODUCT_FAMILY productFamily) { return ailConfigurationTable[productFamily]; } void AILConfiguration::apply(RuntimeCapabilityTable &runtimeCapabilityTable) { auto search = applicationMap.find(processName); if (search != applicationMap.end()) { for (size_t i = 0; i < search->second.size(); ++i) { switch (search->second[i]) { case AILEnumeration::ENABLE_FP64: runtimeCapabilityTable.ftrSupportsFP64 = true; break; default: break; } } } applyExt(runtimeCapabilityTable); } } // namespace NEOcompute-runtime-22.14.22890/shared/source/ail/ail_configuration.h000066400000000000000000000035211422164147700245330ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_info.h" #include #pragma once /* * AIL (Application Intelligence Layer) is a set of per-application controls that influence driver behavior. * The primary goal is to improve user experience and/or performance. * * AIL provides application detection mechanism based on running processes in the system. * Mechanism works on Windows and Linux, is flexible and easily extendable to new applications. * * E.g. AIL can detect running Blender application and enable fp64 emulation on hardware * that does not support native fp64. * * Disclaimer: we should never use this for benchmarking or conformance purposes - this would be cheating. * */ namespace NEO { enum class AILEnumeration : uint32_t { DISABLE_COMPRESSION, ENABLE_FP64, AIL_MAX_OPTIONS_COUNT }; class AILConfiguration { public: AILConfiguration() = default; MOCKABLE_VIRTUAL bool initProcessExecutableName(); static AILConfiguration *get(PRODUCT_FAMILY productFamily); virtual void apply(RuntimeCapabilityTable &runtimeCapabilityTable); protected: virtual void applyExt(RuntimeCapabilityTable &runtimeCapabilityTable) = 0; std::string processName; }; extern AILConfiguration *ailConfigurationTable[IGFX_MAX_PRODUCT]; template class AILConfigurationHw : public AILConfiguration { public: static AILConfigurationHw &get() { static AILConfigurationHw ailConfiguration; return ailConfiguration; } void applyExt(RuntimeCapabilityTable &runtimeCapabilityTable) override; }; template struct EnableAIL { EnableAIL() { ailConfigurationTable[product] = &AILConfigurationHw::get(); } }; } // namespace NEO compute-runtime-22.14.22890/shared/source/ail/gen12lp/000077500000000000000000000000001422164147700221355ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/ail/gen12lp/adlp/000077500000000000000000000000001422164147700230555ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/ail/gen12lp/adlp/ail_configuration_adlp.cpp000066400000000000000000000006721422164147700302620ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/ail/ail_configuration.h" #include namespace NEO { static EnableAIL enableAILADLP; std::map> applicationMapADLP = {}; template <> inline void AILConfigurationHw::applyExt(RuntimeCapabilityTable &runtimeCapabilityTable) { } } // namespace NEO compute-runtime-22.14.22890/shared/source/ail/gen12lp/adls/000077500000000000000000000000001422164147700230605ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/ail/gen12lp/adls/ail_configuration_adls.cpp000066400000000000000000000006721422164147700302700ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/ail/ail_configuration.h" #include namespace NEO { static EnableAIL enableAILADLS; std::map> applicationMapADLS = {}; template <> inline void AILConfigurationHw::applyExt(RuntimeCapabilityTable &runtimeCapabilityTable) { } } // namespace NEO compute-runtime-22.14.22890/shared/source/ail/gen12lp/dg1/000077500000000000000000000000001422164147700226105ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/ail/gen12lp/dg1/ail_configuration_dg1.cpp000066400000000000000000000006501422164147700275440ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/ail/ail_configuration.h" #include namespace NEO { static EnableAIL enableAILDG1; std::map> applicationMapDG1 = {}; template <> inline void AILConfigurationHw::applyExt(RuntimeCapabilityTable &runtimeCapabilityTable) { } } // namespace NEO compute-runtime-22.14.22890/shared/source/ail/gen12lp/rkl/000077500000000000000000000000001422164147700227255ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/ail/gen12lp/rkl/ail_configuration_rkl.cpp000066400000000000000000000006661422164147700300050ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/ail/ail_configuration.h" #include namespace NEO { static EnableAIL enableAILRKL; std::map> applicationMapRKL = {}; template <> inline void AILConfigurationHw::applyExt(RuntimeCapabilityTable &runtimeCapabilityTable) { } } // namespace NEO compute-runtime-22.14.22890/shared/source/ail/gen12lp/tgllp/000077500000000000000000000000001422164147700232575ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/ail/gen12lp/tgllp/ail_configuration_tgllp.cpp000066400000000000000000000006761422164147700306720ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/ail/ail_configuration.h" #include namespace NEO { static EnableAIL enableAILTGLLP; std::map> applicationMapTGLLP = {}; template <> inline void AILConfigurationHw::applyExt(RuntimeCapabilityTable &runtimeCapabilityTable) { } } // namespace NEO compute-runtime-22.14.22890/shared/source/ail/linux/000077500000000000000000000000001422164147700220245ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/ail/linux/CMakeLists.txt000066400000000000000000000004561422164147700245710ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(NEO_CORE_AIL_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/ail_configuration_linux.cpp ) set_property(GLOBAL PROPERTY NEO_CORE_AIL_LINUX ${NEO_CORE_AIL_LINUX}) add_subdirectories() compute-runtime-22.14.22890/shared/source/ail/linux/ail_configuration_linux.cpp000066400000000000000000000013731422164147700274470ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/ail/ail_configuration.h" #include "shared/source/os_interface/linux/sys_calls.h" #include // Application detection is performed using the process name of the given application. namespace NEO { bool AILConfiguration::initProcessExecutableName() { char path[512] = {0}; int result = SysCalls::readlink("/proc/self/exe", path, sizeof(path) - 1); if (result == -1) { return false; } path[result] = '\0'; std::string_view pathView(path); auto lastPosition = pathView.find_last_of("/"); pathView.remove_prefix(lastPosition + 1u); processName = pathView; return true; } } // namespace NEOcompute-runtime-22.14.22890/shared/source/ail/windows/000077500000000000000000000000001422164147700223575ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/ail/windows/CMakeLists.txt000066400000000000000000000004661422164147700251250ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(NEO_CORE_AIL_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/ail_configuration_windows.cpp ) set_property(GLOBAL PROPERTY NEO_CORE_AIL_WINDOWS ${NEO_CORE_AIL_WINDOWS}) add_subdirectories() compute-runtime-22.14.22890/shared/source/ail/windows/ail_configuration_windows.cpp000066400000000000000000000020031422164147700303240ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/ail/ail_configuration.h" #include "shared/source/os_interface/windows/sys_calls.h" // Application detection is performed using the process name of the given application. namespace NEO { bool AILConfiguration::initProcessExecutableName() { const DWORD length = MAX_PATH; WCHAR processFilenameW[length]; char processFilename[length] = ""; auto status = SysCalls::getModuleFileName(nullptr, processFilenameW, MAX_PATH); if (status != 0) { std::wcstombs(processFilename, processFilenameW, MAX_PATH); } std::string_view pathView(processFilename); auto lastPosition = pathView.find_last_of("\\"); pathView.remove_prefix(lastPosition + 1u); lastPosition = pathView.find(".exe"); if (lastPosition != std::string_view::npos) { pathView.remove_suffix(pathView.size() - lastPosition); } processName = pathView; return status; } } // namespace NEOcompute-runtime-22.14.22890/shared/source/ail/xe_hp_core/000077500000000000000000000000001422164147700230005ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/ail/xe_hp_core/xe_hp_sdv/000077500000000000000000000000001422164147700247575ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/ail/xe_hp_core/xe_hp_sdv/ail_configuration_xe_hp_sdv.cpp000066400000000000000000000006741422164147700332250ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/ail/ail_configuration.h" #include namespace NEO { static EnableAIL enableAILXEHPSDV; std::map> applicationMapXEHPSDV = {}; template <> inline void AILConfigurationHw::applyExt(RuntimeCapabilityTable &runtimeCapabilityTable) { } } // namespace NEO compute-runtime-22.14.22890/shared/source/ail/xe_hpc_core/000077500000000000000000000000001422164147700231435ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/ail/xe_hpc_core/pvc/000077500000000000000000000000001422164147700237335ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/ail/xe_hpc_core/pvc/ail_configuration_pvc.cpp000066400000000000000000000006501422164147700310040ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/ail/ail_configuration.h" #include namespace NEO { static EnableAIL enableAILPVC; std::map> applicationMapPVC = {}; template <> inline void AILConfigurationHw::applyExt(RuntimeCapabilityTable &runtimeCapabilityTable) { } } // namespace NEO compute-runtime-22.14.22890/shared/source/ail/xe_hpg_core/000077500000000000000000000000001422164147700231475ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/ail/xe_hpg_core/dg2/000077500000000000000000000000001422164147700236235ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/ail/xe_hpg_core/dg2/ail_configuration_dg2.cpp000066400000000000000000000006501422164147700305600ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/ail/ail_configuration.h" #include namespace NEO { static EnableAIL enableAILDG2; std::map> applicationMapDG2 = {}; template <> inline void AILConfigurationHw::applyExt(RuntimeCapabilityTable &runtimeCapabilityTable) { } } // namespace NEO compute-runtime-22.14.22890/shared/source/aub/000077500000000000000000000000001422164147700206675ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/aub/CMakeLists.txt000066400000000000000000000017361422164147700234360ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(NEO_CORE_AUB ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/aub_center.cpp ${CMAKE_CURRENT_SOURCE_DIR}/aub_center.h ${CMAKE_CURRENT_SOURCE_DIR}/aub_helper.cpp ${CMAKE_CURRENT_SOURCE_DIR}/aub_helper.h ${CMAKE_CURRENT_SOURCE_DIR}/aub_helper_add_mmio.cpp ${CMAKE_CURRENT_SOURCE_DIR}/aub_helper_base.inl ${CMAKE_CURRENT_SOURCE_DIR}/aub_helper_bdw_and_later.inl ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}aub_helper_extra.cpp ${CMAKE_CURRENT_SOURCE_DIR}/aub_mapper_base.h ${CMAKE_CURRENT_SOURCE_DIR}/aub_stream_provider.h ${CMAKE_CURRENT_SOURCE_DIR}/aub_subcapture.cpp ${CMAKE_CURRENT_SOURCE_DIR}/aub_subcapture.h ) if(SUPPORT_XEHP_AND_LATER) list(APPEND NEO_CORE_AUB ${CMAKE_CURRENT_SOURCE_DIR}/aub_helper_xehp_and_later.inl ) endif() set_property(GLOBAL PROPERTY NEO_CORE_AUB ${NEO_CORE_AUB}) add_subdirectories() compute-runtime-22.14.22890/shared/source/aub/aub_center.cpp000066400000000000000000000103041422164147700235000ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/aub/aub_center.h" #include "shared/source/aub/aub_helper.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/os_interface/hw_info_config.h" #include "third_party/aub_stream/headers/aub_manager.h" #include "third_party/aub_stream/headers/aubstream.h" namespace NEO { extern aub_stream::AubManager *createAubManager(uint32_t productFamily, uint32_t devicesCount, uint64_t memoryBankSize, uint32_t stepping, bool localMemorySupported, uint32_t streamMode, uint64_t gpuAddressSpace); AubCenter::AubCenter(const HardwareInfo *pHwInfo, const GmmHelper &gmmHelper, bool localMemoryEnabled, const std::string &aubFileName, CommandStreamReceiverType csrType) { if (DebugManager.flags.UseAubStream.get()) { auto devicesCount = HwHelper::getSubDevicesCount(pHwInfo); auto memoryBankSize = AubHelper::getPerTileLocalMemorySize(pHwInfo); CommandStreamReceiverType type = csrType; if (DebugManager.flags.SetCommandStreamReceiver.get() >= CommandStreamReceiverType::CSR_HW) { type = static_cast(DebugManager.flags.SetCommandStreamReceiver.get()); } aubStreamMode = getAubStreamMode(aubFileName, type); auto &hwHelper = HwHelper::get(pHwInfo->platform.eRenderCoreFamily); const auto &hwInfoConfig = *HwInfoConfig::get(pHwInfo->platform.eProductFamily); stepping = hwInfoConfig.getAubStreamSteppingFromHwRevId(*pHwInfo); aub_stream::MMIOList extraMmioList = hwHelper.getExtraMmioList(*pHwInfo, gmmHelper); aub_stream::MMIOList debugMmioList = AubHelper::getAdditionalMmioList(); extraMmioList.insert(extraMmioList.end(), debugMmioList.begin(), debugMmioList.end()); aub_stream::injectMMIOList(extraMmioList); AubHelper::setTbxConfiguration(); aubManager.reset(createAubManager(pHwInfo->platform.eProductFamily, devicesCount, memoryBankSize, stepping, localMemoryEnabled, aubStreamMode, pHwInfo->capabilityTable.gpuAddressSpace)); } addressMapper = std::make_unique(); streamProvider = std::make_unique(); subCaptureCommon = std::make_unique(); if (DebugManager.flags.AUBDumpSubCaptureMode.get()) { this->subCaptureCommon->subCaptureMode = static_cast(DebugManager.flags.AUBDumpSubCaptureMode.get()); this->subCaptureCommon->subCaptureFilter.dumpKernelStartIdx = static_cast(DebugManager.flags.AUBDumpFilterKernelStartIdx.get()); this->subCaptureCommon->subCaptureFilter.dumpKernelEndIdx = static_cast(DebugManager.flags.AUBDumpFilterKernelEndIdx.get()); this->subCaptureCommon->subCaptureFilter.dumpNamedKernelStartIdx = static_cast(DebugManager.flags.AUBDumpFilterNamedKernelStartIdx.get()); this->subCaptureCommon->subCaptureFilter.dumpNamedKernelEndIdx = static_cast(DebugManager.flags.AUBDumpFilterNamedKernelEndIdx.get()); if (DebugManager.flags.AUBDumpFilterKernelName.get() != "unk") { this->subCaptureCommon->subCaptureFilter.dumpKernelName = DebugManager.flags.AUBDumpFilterKernelName.get(); } } } AubCenter::AubCenter() { addressMapper = std::make_unique(); streamProvider = std::make_unique(); subCaptureCommon = std::make_unique(); } uint32_t AubCenter::getAubStreamMode(const std::string &aubFileName, uint32_t csrType) { uint32_t mode = aub_stream::mode::aubFile; switch (csrType) { case CommandStreamReceiverType::CSR_HW_WITH_AUB: case CommandStreamReceiverType::CSR_AUB: mode = aub_stream::mode::aubFile; break; case CommandStreamReceiverType::CSR_TBX: mode = aub_stream::mode::tbx; break; case CommandStreamReceiverType::CSR_TBX_WITH_AUB: mode = aub_stream::mode::aubFileAndTbx; break; default: break; } return mode; } } // namespace NEO compute-runtime-22.14.22890/shared/source/aub/aub_center.h000066400000000000000000000035661422164147700231610ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/aub/aub_stream_provider.h" #include "shared/source/aub/aub_subcapture.h" #include "shared/source/helpers/options.h" #include "shared/source/memory_manager/address_mapper.h" #include "shared/source/memory_manager/physical_address_allocator.h" #include "third_party/aub_stream/headers/aub_manager.h" namespace NEO { struct HardwareInfo; class GmmHelper; class AubCenter { public: AubCenter(const HardwareInfo *pHwInfo, const GmmHelper &gmmHelper, bool localMemoryEnabled, const std::string &aubFileName, CommandStreamReceiverType csrType); AubCenter(); virtual ~AubCenter() = default; void initPhysicalAddressAllocator(PhysicalAddressAllocator *pPhysicalAddressAllocator) { physicalAddressAllocator = std::unique_ptr(pPhysicalAddressAllocator); } PhysicalAddressAllocator *getPhysicalAddressAllocator() const { return physicalAddressAllocator.get(); } AddressMapper *getAddressMapper() const { return addressMapper.get(); } AubStreamProvider *getStreamProvider() const { return streamProvider.get(); } AubSubCaptureCommon *getSubCaptureCommon() const { return subCaptureCommon.get(); } aub_stream::AubManager *getAubManager() const { return aubManager.get(); } static uint32_t getAubStreamMode(const std::string &aubFileName, uint32_t csrType); protected: std::unique_ptr physicalAddressAllocator; std::unique_ptr addressMapper; std::unique_ptr streamProvider; std::unique_ptr subCaptureCommon; std::unique_ptr aubManager; uint32_t aubStreamMode = 0; uint32_t stepping = 0; }; } // namespace NEO compute-runtime-22.14.22890/shared/source/aub/aub_helper.cpp000066400000000000000000000030741422164147700235050ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/aub/aub_helper.h" #include "shared/source/aub_mem_dump/aub_mem_dump.h" #include "shared/source/aub_mem_dump/page_table_entry_bits.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/constants.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/tbx/tbx_proto.h" #include "third_party/aub_stream/headers/aubstream.h" namespace NEO { uint64_t AubHelper::getTotalMemBankSize() { return 32ull * MemoryConstants::gigaByte; } int AubHelper::getMemTrace(uint64_t pdEntryBits) { if (pdEntryBits & BIT(PageTableEntry::localMemoryBit)) { return AubMemDump::AddressSpaceValues::TraceLocal; } return AubMemDump::AddressSpaceValues::TraceNonlocal; } uint64_t AubHelper::getPTEntryBits(uint64_t pdEntryBits) { pdEntryBits &= ~BIT(PageTableEntry::localMemoryBit); return pdEntryBits; } uint32_t AubHelper::getMemType(uint32_t addressSpace) { if (addressSpace == AubMemDump::AddressSpaceValues::TraceLocal) { return mem_types::MEM_TYPE_LOCALMEM; } return mem_types::MEM_TYPE_SYSTEM; } uint64_t AubHelper::getPerTileLocalMemorySize(const HardwareInfo *pHwInfo) { if (DebugManager.flags.HBMSizePerTileInGigabytes.get() > 0) { return DebugManager.flags.HBMSizePerTileInGigabytes.get() * MemoryConstants::gigaByte; } return getTotalMemBankSize() / HwHelper::getSubDevicesCount(pHwInfo); } } // namespace NEO compute-runtime-22.14.22890/shared/source/aub/aub_helper.h000066400000000000000000000053041422164147700231500ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/aub/aub_mapper_base.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/non_copyable_or_moveable.h" #include "shared/source/memory_manager/graphics_allocation.h" namespace NEO { class AubHelper : public NonCopyableOrMovableClass { public: static bool isOneTimeAubWritableAllocationType(const AllocationType &type) { switch (type) { case AllocationType::PIPE: case AllocationType::CONSTANT_SURFACE: case AllocationType::GLOBAL_SURFACE: case AllocationType::KERNEL_ISA: case AllocationType::KERNEL_ISA_INTERNAL: case AllocationType::PRIVATE_SURFACE: case AllocationType::SCRATCH_SURFACE: case AllocationType::WORK_PARTITION_SURFACE: case AllocationType::BUFFER: case AllocationType::BUFFER_HOST_MEMORY: case AllocationType::IMAGE: case AllocationType::TIMESTAMP_PACKET_TAG_BUFFER: case AllocationType::EXTERNAL_HOST_PTR: case AllocationType::MAP_ALLOCATION: case AllocationType::SVM_GPU: return true; default: return false; } } static uint64_t getTotalMemBankSize(); static int getMemTrace(uint64_t pdEntryBits); static uint64_t getPTEntryBits(uint64_t pdEntryBits); static uint32_t getMemType(uint32_t addressSpace); static uint64_t getPerTileLocalMemorySize(const HardwareInfo *pHwInfo); static MMIOList getAdditionalMmioList(); static void setTbxConfiguration(); virtual int getDataHintForPml4Entry() const = 0; virtual int getDataHintForPdpEntry() const = 0; virtual int getDataHintForPdEntry() const = 0; virtual int getDataHintForPtEntry() const = 0; virtual int getMemTraceForPml4Entry() const = 0; virtual int getMemTraceForPdpEntry() const = 0; virtual int getMemTraceForPdEntry() const = 0; virtual int getMemTraceForPtEntry() const = 0; static MMIOList splitMMIORegisters(const std::string ®isters, char delimiter); }; template class AubHelperHw : public AubHelper { public: AubHelperHw(bool localMemoryEnabled) : localMemoryEnabled(localMemoryEnabled){}; int getDataHintForPml4Entry() const override; int getDataHintForPdpEntry() const override; int getDataHintForPdEntry() const override; int getDataHintForPtEntry() const override; int getMemTraceForPml4Entry() const override; int getMemTraceForPdpEntry() const override; int getMemTraceForPdEntry() const override; int getMemTraceForPtEntry() const override; protected: bool localMemoryEnabled; }; } // namespace NEO compute-runtime-22.14.22890/shared/source/aub/aub_helper_add_mmio.cpp000066400000000000000000000031531422164147700253340ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/aub/aub_helper.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "third_party/aub_stream/headers/aubstream.h" namespace NEO { MMIOList AubHelper::getAdditionalMmioList() { return splitMMIORegisters(DebugManager.flags.AubDumpAddMmioRegistersList.get(), ';'); } MMIOList AubHelper::splitMMIORegisters(const std::string ®isters, char delimiter) { MMIOList result; bool firstElementInPair = false; std::string token; uint32_t registerOffset = 0; uint32_t registerValue = 0; std::istringstream stream(""); for (std::string::const_iterator i = registers.begin();; i++) { if (i == registers.end() || *i == delimiter) { if (token.size() > 0) { stream.str(token); stream.clear(); firstElementInPair = !firstElementInPair; stream >> std::hex >> (firstElementInPair ? registerOffset : registerValue); if (stream.fail()) { result.clear(); break; } token.clear(); if (!firstElementInPair) { result.push_back(std::pair(registerOffset, registerValue)); registerValue = 0; registerOffset = 0; } } if (i == registers.end()) { break; } } else { token.push_back(*i); } } return result; } } // namespace NEO compute-runtime-22.14.22890/shared/source/aub/aub_helper_base.inl000066400000000000000000000022611422164147700244740ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/aub/aub_helper.h" #include "shared/source/aub_mem_dump/aub_mem_dump.h" namespace NEO { template int AubHelperHw::getMemTraceForPml4Entry() const { if (localMemoryEnabled) { return AubMemDump::AddressSpaceValues::TraceLocal; } return AubMemDump::AddressSpaceValues::TracePml4Entry; } template int AubHelperHw::getMemTraceForPdpEntry() const { if (localMemoryEnabled) { return AubMemDump::AddressSpaceValues::TraceLocal; } return AubMemDump::AddressSpaceValues::TracePhysicalPdpEntry; } template int AubHelperHw::getMemTraceForPdEntry() const { if (localMemoryEnabled) { return AubMemDump::AddressSpaceValues::TraceLocal; } return AubMemDump::AddressSpaceValues::TracePpgttPdEntry; } template int AubHelperHw::getMemTraceForPtEntry() const { if (localMemoryEnabled) { return AubMemDump::AddressSpaceValues::TraceLocal; } return AubMemDump::AddressSpaceValues::TracePpgttEntry; } } // namespace NEO compute-runtime-22.14.22890/shared/source/aub/aub_helper_bdw_and_later.inl000066400000000000000000000014101422164147700263420ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/aub/aub_helper_base.inl" namespace NEO { template int AubHelperHw::getDataHintForPml4Entry() const { return AubMemDump::DataTypeHintValues::TraceNotype; } template int AubHelperHw::getDataHintForPdpEntry() const { return AubMemDump::DataTypeHintValues::TraceNotype; } template int AubHelperHw::getDataHintForPdEntry() const { return AubMemDump::DataTypeHintValues::TraceNotype; } template int AubHelperHw::getDataHintForPtEntry() const { return AubMemDump::DataTypeHintValues::TraceNotype; } } // namespace NEO compute-runtime-22.14.22890/shared/source/aub/aub_helper_extra.cpp000066400000000000000000000011241422164147700247020ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/aub/aub_helper.h" #include "shared/source/aub_mem_dump/aub_mem_dump.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "third_party/aub_stream/headers/aubstream.h" namespace NEO { void AubHelper::setTbxConfiguration() { aub_stream::setTbxServerIp(DebugManager.flags.TbxServer.get()); aub_stream::setTbxServerPort(DebugManager.flags.TbxPort.get()); aub_stream::setTbxFrontdoorMode(DebugManager.flags.TbxFrontdoorMode.get()); } } // namespace NEO compute-runtime-22.14.22890/shared/source/aub/aub_helper_xehp_and_later.inl000066400000000000000000000022271422164147700265410ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/aub/aub_helper_base.inl" namespace NEO { template int AubHelperHw::getDataHintForPml4Entry() const { if (localMemoryEnabled) { return AubMemDump::DataTypeHintValues::TracePpgttLevel4; } return AubMemDump::DataTypeHintValues::TraceNotype; } template int AubHelperHw::getDataHintForPdpEntry() const { if (localMemoryEnabled) { return AubMemDump::DataTypeHintValues::TracePpgttLevel3; } return AubMemDump::DataTypeHintValues::TraceNotype; } template int AubHelperHw::getDataHintForPdEntry() const { if (localMemoryEnabled) { return AubMemDump::DataTypeHintValues::TracePpgttLevel2; } return AubMemDump::DataTypeHintValues::TraceNotype; } template int AubHelperHw::getDataHintForPtEntry() const { if (localMemoryEnabled) { return AubMemDump::DataTypeHintValues::TracePpgttLevel1; } return AubMemDump::DataTypeHintValues::TraceNotype; } } // namespace NEO compute-runtime-22.14.22890/shared/source/aub/aub_mapper_base.h000066400000000000000000000006351422164147700241510ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/aub_mem_dump/aub_mem_dump.h" #include "shared/source/helpers/completion_stamp.h" #include namespace NEO { template struct AUBFamilyMapper { }; using MMIOPair = std::pair; using MMIOList = std::vector; } // namespace NEO compute-runtime-22.14.22890/shared/source/aub/aub_stream_interface.cpp000066400000000000000000000010131422164147700255300ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/aub/aub_center.h" using namespace aub_stream; namespace NEO { AubManager *createAubManager(uint32_t gfxFamily, uint32_t devicesCount, uint64_t memoryBankSize, uint32_t stepping, bool localMemorySupported, uint32_t streamMode, uint64_t gpuAddressSpace) { return AubManager::create(gfxFamily, devicesCount, memoryBankSize, stepping, localMemorySupported, streamMode, gpuAddressSpace); } } // namespace NEO compute-runtime-22.14.22890/shared/source/aub/aub_stream_provider.h000066400000000000000000000010361422164147700250740ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/aub_mem_dump/aub_mem_dump.h" namespace NEO { class AubStreamProvider { public: virtual ~AubStreamProvider() = default; virtual AubMemDump::AubFileStream *getStream() = 0; }; class AubFileStreamProvider : public AubStreamProvider { public: AubMemDump::AubFileStream *getStream() override { return &stream; }; protected: AubMemDump::AubFileStream stream; }; } // namespace NEO compute-runtime-22.14.22890/shared/source/aub/aub_subcapture.cpp000066400000000000000000000135031422164147700244010ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/aub/aub_subcapture.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/string.h" #include "shared/source/utilities/debug_settings_reader.h" namespace NEO { AubSubCaptureManager::AubSubCaptureManager(const std::string &fileName, AubSubCaptureCommon &subCaptureCommon, const char *regPath) : initialFileName(fileName), subCaptureCommon(subCaptureCommon) { settingsReader.reset(SettingsReader::createOsReader(true, regPath)); } AubSubCaptureManager::~AubSubCaptureManager() = default; bool AubSubCaptureManager::isSubCaptureEnabled() const { auto guard = this->lock(); return subCaptureIsActive || subCaptureWasActiveInPreviousEnqueue; } void AubSubCaptureManager::disableSubCapture() { auto guard = this->lock(); subCaptureIsActive = subCaptureWasActiveInPreviousEnqueue = false; }; AubSubCaptureStatus AubSubCaptureManager::checkAndActivateSubCapture(const std::string &kernelName) { if (kernelName.empty()) { return {false, false}; } auto guard = this->lock(); kernelCurrentIdx = subCaptureCommon.getKernelCurrentIndexAndIncrement(); subCaptureWasActiveInPreviousEnqueue = subCaptureIsActive; subCaptureIsActive = false; switch (subCaptureCommon.subCaptureMode) { case SubCaptureMode::Toggle: subCaptureIsActive = isSubCaptureToggleActive(); break; case SubCaptureMode::Filter: subCaptureIsActive = isSubCaptureFilterActive(kernelName); break; default: DEBUG_BREAK_IF(false); break; } return {subCaptureIsActive, subCaptureWasActiveInPreviousEnqueue}; } AubSubCaptureStatus AubSubCaptureManager::getSubCaptureStatus() const { auto guard = this->lock(); return {this->subCaptureIsActive, this->subCaptureWasActiveInPreviousEnqueue}; } const std::string &AubSubCaptureManager::getSubCaptureFileName(const std::string &kernelName) { auto guard = this->lock(); if (useToggleFileName) { currentFileName = getToggleFileName(); } if (currentFileName.empty()) { currentFileName = getAubCaptureFileName(); useToggleFileName = false; } switch (subCaptureCommon.subCaptureMode) { case SubCaptureMode::Filter: if (currentFileName.empty()) { currentFileName = generateFilterFileName(); useToggleFileName = false; } break; case SubCaptureMode::Toggle: if (currentFileName.empty()) { currentFileName = generateToggleFileName(kernelName); useToggleFileName = false; } break; default: DEBUG_BREAK_IF(false); break; } return currentFileName; } bool AubSubCaptureManager::isKernelIndexInSubCaptureRange(uint32_t kernelIdx, uint32_t rangeStartIdx, uint32_t rangeEndIdx) const { return ((rangeStartIdx <= kernelIdx) && (kernelIdx <= rangeEndIdx)); } bool AubSubCaptureManager::isSubCaptureToggleActive() const { return settingsReader->getSetting("AUBDumpToggleCaptureOnOff", false); } std::string AubSubCaptureManager::getToggleFileName() const { return settingsReader->getSetting("AUBDumpToggleFileName", std::string("")); } std::string AubSubCaptureManager::getAubCaptureFileName() const { if (DebugManager.flags.AUBDumpCaptureFileName.get() != "unk") { return DebugManager.flags.AUBDumpCaptureFileName.get(); } return {}; } std::string AubSubCaptureManager::generateFilterFileName() const { std::string baseFileName = initialFileName.substr(0, initialFileName.length() - strlen(".aub")); std::string filterFileName = baseFileName + "_filter"; filterFileName += "_from_" + std::to_string(subCaptureCommon.subCaptureFilter.dumpKernelStartIdx); filterFileName += "_to_" + std::to_string(subCaptureCommon.subCaptureFilter.dumpKernelEndIdx); if (!subCaptureCommon.subCaptureFilter.dumpKernelName.empty()) { filterFileName += "_" + subCaptureCommon.subCaptureFilter.dumpKernelName; filterFileName += "_from_" + std::to_string(subCaptureCommon.subCaptureFilter.dumpNamedKernelStartIdx); filterFileName += "_to_" + std::to_string(subCaptureCommon.subCaptureFilter.dumpNamedKernelEndIdx); } filterFileName += ".aub"; return filterFileName; } std::string AubSubCaptureManager::generateToggleFileName(const std::string &kernelName) const { std::string baseFileName = initialFileName.substr(0, initialFileName.length() - strlen(".aub")); std::string toggleFileName = baseFileName + "_toggle"; toggleFileName += "_from_" + std::to_string(kernelCurrentIdx); if (!kernelName.empty()) { toggleFileName += "_" + kernelName; } toggleFileName += ".aub"; return toggleFileName; } bool AubSubCaptureManager::isSubCaptureFilterActive(const std::string &kernelName) { auto subCaptureIsActive = false; if (subCaptureCommon.subCaptureFilter.dumpKernelName.empty()) { if (isKernelIndexInSubCaptureRange(kernelCurrentIdx, subCaptureCommon.subCaptureFilter.dumpKernelStartIdx, subCaptureCommon.subCaptureFilter.dumpKernelEndIdx)) { subCaptureIsActive = true; } } else { if (0 == kernelName.compare(subCaptureCommon.subCaptureFilter.dumpKernelName)) { kernelNameMatchesNum = subCaptureCommon.getKernelNameMatchesNumAndIncrement(); if (isKernelIndexInSubCaptureRange(kernelNameMatchesNum, subCaptureCommon.subCaptureFilter.dumpNamedKernelStartIdx, subCaptureCommon.subCaptureFilter.dumpNamedKernelEndIdx)) { subCaptureIsActive = true; } } } return subCaptureIsActive; } std::unique_lock AubSubCaptureManager::lock() const { return std::unique_lock{mutex}; } } // namespace NEO compute-runtime-22.14.22890/shared/source/aub/aub_subcapture.h000066400000000000000000000055751422164147700240600ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/aub_subcapture_status.h" #include #include #include #include namespace NEO { class SettingsReader; class AubSubCaptureCommon { public: enum class SubCaptureMode { Off = 0, //subcapture off Filter, //subcapture kernel specified by filter (static regkey) Toggle //toggle subcapture on/off (dynamic regkey) } subCaptureMode = SubCaptureMode::Off; struct SubCaptureFilter { std::string dumpKernelName = ""; uint32_t dumpNamedKernelStartIdx = 0; uint32_t dumpNamedKernelEndIdx = static_cast(-1); uint32_t dumpKernelStartIdx = 0; uint32_t dumpKernelEndIdx = static_cast(-1); } subCaptureFilter; inline uint32_t getKernelCurrentIndexAndIncrement() { return kernelCurrentIdx.fetch_add(1); } inline uint32_t getKernelNameMatchesNumAndIncrement() { return kernelNameMatchesNum.fetch_add(1); } protected: std::atomic kernelCurrentIdx{0}; std::atomic kernelNameMatchesNum{0}; }; class AubSubCaptureManager { public: using SubCaptureMode = AubSubCaptureCommon::SubCaptureMode; using SubCaptureFilter = AubSubCaptureCommon::SubCaptureFilter; inline bool isSubCaptureMode() const { return subCaptureCommon.subCaptureMode > SubCaptureMode::Off; } bool isSubCaptureEnabled() const; void disableSubCapture(); AubSubCaptureStatus checkAndActivateSubCapture(const std::string &kernelName); AubSubCaptureStatus getSubCaptureStatus() const; const std::string &getSubCaptureFileName(const std::string &kernelName); AubSubCaptureManager(const std::string &fileName, AubSubCaptureCommon &subCaptureCommon, const char *regPath); virtual ~AubSubCaptureManager(); protected: MOCKABLE_VIRTUAL bool isSubCaptureToggleActive() const; bool isSubCaptureFilterActive(const std::string &kernelName); MOCKABLE_VIRTUAL std::string getAubCaptureFileName() const; MOCKABLE_VIRTUAL std::string getToggleFileName() const; MOCKABLE_VIRTUAL std::string generateFilterFileName() const; MOCKABLE_VIRTUAL std::string generateToggleFileName(const std::string &kernelName) const; bool isKernelIndexInSubCaptureRange(uint32_t kernelIdx, uint32_t rangeStartIdx, uint32_t rangeEndIdx) const; MOCKABLE_VIRTUAL std::unique_lock lock() const; bool subCaptureIsActive = false; bool subCaptureWasActiveInPreviousEnqueue = false; uint32_t kernelCurrentIdx = 0; uint32_t kernelNameMatchesNum = 0; bool useToggleFileName = true; std::string initialFileName; std::string currentFileName; std::unique_ptr settingsReader; AubSubCaptureCommon &subCaptureCommon; mutable std::mutex mutex; }; } // namespace NEO compute-runtime-22.14.22890/shared/source/aub_mem_dump/000077500000000000000000000000001422164147700225525ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/aub_mem_dump/CMakeLists.txt000066400000000000000000000021671422164147700253200ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(NEO_CORE_AUB_MEM_DUMP ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/aub_alloc_dump.h ${CMAKE_CURRENT_SOURCE_DIR}/aub_alloc_dump.inl ${CMAKE_CURRENT_SOURCE_DIR}/aub_data.h ${CMAKE_CURRENT_SOURCE_DIR}/aub_header.h ${CMAKE_CURRENT_SOURCE_DIR}/aub_mem_dump.h ${CMAKE_CURRENT_SOURCE_DIR}/aub_mem_dump.cpp ${CMAKE_CURRENT_SOURCE_DIR}/aub_mem_dump.inl ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}context_flags.cpp ${CMAKE_CURRENT_SOURCE_DIR}/page_table_entry_bits.h ) if(NOT DEFINED AUB_STREAM_PROJECT_NAME) list(APPEND NEO_CORE_AUB_MEM_DUMP ${CMAKE_CURRENT_SOURCE_DIR}/aub_stream_stubs.cpp ) endif() if(SUPPORT_XEHP_AND_LATER) list(APPEND NEO_CORE_AUB_MEM_DUMP ${CMAKE_CURRENT_SOURCE_DIR}/aub_mem_dump_xehp_and_later.inl ) endif() if(SUPPORT_PVC_AND_LATER) list(APPEND NEO_CORE_AUB_MEM_DUMP ${CMAKE_CURRENT_SOURCE_DIR}/aub_mem_dump_pvc_and_later.inl ) endif() set_property(GLOBAL PROPERTY NEO_CORE_AUB_MEM_DUMP ${NEO_CORE_AUB_MEM_DUMP}) add_subdirectories() compute-runtime-22.14.22890/shared/source/aub_mem_dump/aub_alloc_dump.h000066400000000000000000000067611422164147700257030ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/aub_mem_dump/aub_mem_dump.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/gmm_helper/resource_info.h" #include "shared/source/memory_manager/graphics_allocation.h" using namespace NEO; namespace aub_stream { struct SurfaceInfo; } namespace AubAllocDump { enum DumpFormat { NONE, BUFFER_BIN, BUFFER_TRE, IMAGE_BMP, IMAGE_TRE, }; inline bool isWritableBuffer(GraphicsAllocation &gfxAllocation) { return (gfxAllocation.getAllocationType() == AllocationType::BUFFER || gfxAllocation.getAllocationType() == AllocationType::BUFFER_HOST_MEMORY || gfxAllocation.getAllocationType() == AllocationType::EXTERNAL_HOST_PTR || gfxAllocation.getAllocationType() == AllocationType::MAP_ALLOCATION || gfxAllocation.getAllocationType() == AllocationType::SVM_GPU) && gfxAllocation.isMemObjectsAllocationWithWritableFlags(); } inline bool isWritableImage(GraphicsAllocation &gfxAllocation) { return (gfxAllocation.getAllocationType() == AllocationType::IMAGE) && gfxAllocation.isMemObjectsAllocationWithWritableFlags(); } inline DumpFormat getDumpFormat(GraphicsAllocation &gfxAllocation) { auto dumpBufferFormat = DebugManager.flags.AUBDumpBufferFormat.get(); auto dumpImageFormat = DebugManager.flags.AUBDumpImageFormat.get(); auto isDumpableBuffer = isWritableBuffer(gfxAllocation); auto isDumpableImage = isWritableImage(gfxAllocation); auto dumpFormat = DumpFormat::NONE; if (isDumpableBuffer) { if (0 == dumpBufferFormat.compare("BIN")) { dumpFormat = DumpFormat::BUFFER_BIN; } else if (0 == dumpBufferFormat.compare("TRE")) { dumpFormat = DumpFormat::BUFFER_TRE; } } else if (isDumpableImage) { if (0 == dumpImageFormat.compare("BMP")) { dumpFormat = DumpFormat::IMAGE_BMP; } else if (0 == dumpImageFormat.compare("TRE")) { dumpFormat = DumpFormat::IMAGE_TRE; } } return dumpFormat; } inline bool isBufferDumpFormat(DumpFormat dumpFormat) { return (AubAllocDump::DumpFormat::BUFFER_BIN == dumpFormat) || (dumpFormat == AubAllocDump::DumpFormat::BUFFER_TRE); } inline bool isImageDumpFormat(DumpFormat dumpFormat) { return (AubAllocDump::DumpFormat::IMAGE_BMP == dumpFormat) || (dumpFormat == AubAllocDump::DumpFormat::IMAGE_TRE); } template aub_stream::SurfaceInfo *getDumpSurfaceInfo(GraphicsAllocation &gfxAllocation, DumpFormat dumpFormat); template uint32_t getImageSurfaceTypeFromGmmResourceType(GMM_RESOURCE_TYPE gmmResourceType); template void dumpBufferInBinFormat(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context); template void dumpImageInBmpFormat(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context); template void dumpBufferInTreFormat(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context); template void dumpImageInTreFormat(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context); template void dumpAllocation(DumpFormat dumpFormat, GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context); } // namespace AubAllocDump compute-runtime-22.14.22890/shared/source/aub_mem_dump/aub_alloc_dump.inl000066400000000000000000000222161422164147700262270ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/aub_mem_dump/aub_alloc_dump.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "third_party/aub_stream/headers/aubstream.h" using namespace NEO; using namespace aub_stream; namespace AubAllocDump { template uint32_t getImageSurfaceTypeFromGmmResourceType(GMM_RESOURCE_TYPE gmmResourceType) { using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE; auto surfaceType = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL; switch (gmmResourceType) { case GMM_RESOURCE_TYPE::RESOURCE_1D: surfaceType = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_1D; break; case GMM_RESOURCE_TYPE::RESOURCE_2D: surfaceType = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_2D; break; case GMM_RESOURCE_TYPE::RESOURCE_3D: surfaceType = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_3D; break; default: DEBUG_BREAK_IF(true); break; } return surfaceType; } template SurfaceInfo *getDumpSurfaceInfo(GraphicsAllocation &gfxAllocation, DumpFormat dumpFormat) { SurfaceInfo *surfaceInfo = nullptr; if (isBufferDumpFormat(dumpFormat)) { using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE; using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT; surfaceInfo = new SurfaceInfo(); surfaceInfo->address = GmmHelper::decanonize(gfxAllocation.getGpuAddress()); surfaceInfo->width = static_cast(gfxAllocation.getUnderlyingBufferSize()); surfaceInfo->height = 1; surfaceInfo->pitch = static_cast(gfxAllocation.getUnderlyingBufferSize()); surfaceInfo->format = SURFACE_FORMAT::SURFACE_FORMAT_RAW; surfaceInfo->tilingType = RENDER_SURFACE_STATE::TILE_MODE_LINEAR; surfaceInfo->surftype = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER; surfaceInfo->compressed = gfxAllocation.isCompressionEnabled(); surfaceInfo->dumpType = (AubAllocDump::DumpFormat::BUFFER_TRE == dumpFormat) ? dumpType::tre : dumpType::bin; } else if (isImageDumpFormat(dumpFormat)) { auto gmm = gfxAllocation.getDefaultGmm(); if (gmm->gmmResourceInfo->getNumSamples() > 1) { return nullptr; } surfaceInfo = new SurfaceInfo(); surfaceInfo->address = GmmHelper::decanonize(gfxAllocation.getGpuAddress()); surfaceInfo->width = static_cast(gmm->gmmResourceInfo->getBaseWidth()); surfaceInfo->height = static_cast(gmm->gmmResourceInfo->getBaseHeight()); surfaceInfo->pitch = static_cast(gmm->gmmResourceInfo->getRenderPitch()); surfaceInfo->format = gmm->gmmResourceInfo->getResourceFormatSurfaceState(); surfaceInfo->tilingType = gmm->gmmResourceInfo->getTileModeSurfaceState(); surfaceInfo->surftype = getImageSurfaceTypeFromGmmResourceType(gmm->gmmResourceInfo->getResourceType()); surfaceInfo->compressed = gfxAllocation.isCompressionEnabled(); surfaceInfo->dumpType = (AubAllocDump::DumpFormat::IMAGE_TRE == dumpFormat) ? dumpType::tre : dumpType::bmp; } return surfaceInfo; } template void dumpBufferInBinFormat(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context) { AubMemDump::AubCaptureBinaryDumpHD cmd; memset(&cmd, 0, sizeof(cmd)); cmd.Header.Type = 0x7; cmd.Header.Opcode = 0x1; cmd.Header.SubOp = 0x15; cmd.Header.DwordLength = ((sizeof(cmd) - sizeof(cmd.Header)) / sizeof(uint32_t)) - 1; cmd.setHeight(1); cmd.setWidth(gfxAllocation.getUnderlyingBufferSize()); cmd.setBaseAddr(gfxAllocation.getGpuAddress()); cmd.setPitch(gfxAllocation.getUnderlyingBufferSize()); cmd.GttType = 1; cmd.DirectoryHandle = context; stream->write(reinterpret_cast(&cmd), sizeof(cmd)); } template void dumpImageInBmpFormat(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context) { auto gmm = gfxAllocation.getDefaultGmm(); AubMemDump::AubCmdDumpBmpHd cmd; memset(&cmd, 0, sizeof(cmd)); cmd.Header.Type = 0x7; cmd.Header.Opcode = 0x1; cmd.Header.SubOp = 0x44; cmd.Header.DwordLength = ((sizeof(cmd) - sizeof(cmd.Header)) / sizeof(uint32_t)) - 1; cmd.Xmin = 0; cmd.Ymin = 0; auto pitch = gmm->gmmResourceInfo->getRenderPitch(); auto bitsPerPixel = gmm->gmmResourceInfo->getBitsPerPixel(); auto pitchInPixels = static_cast(8 * pitch / bitsPerPixel); cmd.BufferPitch = pitchInPixels; cmd.BitsPerPixel = bitsPerPixel; cmd.Format = gmm->gmmResourceInfo->getResourceFormatSurfaceState(); cmd.Xsize = static_cast(gmm->gmmResourceInfo->getBaseWidth()); cmd.Ysize = static_cast(gmm->gmmResourceInfo->getBaseHeight()); cmd.setBaseAddr(gfxAllocation.getGpuAddress()); cmd.Secure = 0; cmd.UseFence = 0; auto flagInfo = gmm->gmmResourceInfo->getResourceFlags()->Info; cmd.TileOn = flagInfo.TiledW || flagInfo.TiledX || flagInfo.TiledY || flagInfo.TiledYf || flagInfo.TiledYs; cmd.WalkY = flagInfo.TiledY; cmd.UsePPGTT = 1; cmd.Use32BitDump = 1; // Dump out in 32bpp vs 24bpp cmd.UseFullFormat = 1; cmd.DirectoryHandle = context; stream->write(reinterpret_cast(&cmd), sizeof(cmd)); } template void dumpBufferInTreFormat(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context) { using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE; using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT; AubMemDump::CmdServicesMemTraceDumpCompress cmd; memset(&cmd, 0, sizeof(AubMemDump::CmdServicesMemTraceDumpCompress)); cmd.dwordCount = (sizeof(AubMemDump::CmdServicesMemTraceDumpCompress) - 1) / 4; cmd.instructionSubOpcode = 0x10; cmd.instructionOpcode = 0x2e; cmd.instructionType = 0x7; cmd.setSurfaceAddress(gfxAllocation.getGpuAddress()); cmd.surfaceWidth = static_cast(gfxAllocation.getUnderlyingBufferSize()); cmd.surfaceHeight = 1; cmd.surfacePitch = static_cast(gfxAllocation.getUnderlyingBufferSize()); cmd.surfaceFormat = SURFACE_FORMAT::SURFACE_FORMAT_RAW; cmd.dumpType = AubMemDump::CmdServicesMemTraceDumpCompress::DumpTypeValues::Tre; cmd.surfaceTilingType = RENDER_SURFACE_STATE::TILE_MODE_LINEAR; cmd.surfaceType = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER; cmd.algorithm = AubMemDump::CmdServicesMemTraceDumpCompress::AlgorithmValues::Uncompressed; cmd.gttType = 1; cmd.directoryHandle = context; stream->write(reinterpret_cast(&cmd), sizeof(cmd)); } template void dumpImageInTreFormat(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context) { using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE; auto gmm = gfxAllocation.getDefaultGmm(); if ((gmm->gmmResourceInfo->getNumSamples() > 1) || (gfxAllocation.isCompressionEnabled())) { DEBUG_BREAK_IF(true); //unsupported return; } auto surfaceType = getImageSurfaceTypeFromGmmResourceType(gmm->gmmResourceInfo->getResourceType()); AubMemDump::CmdServicesMemTraceDumpCompress cmd; memset(&cmd, 0, sizeof(AubMemDump::CmdServicesMemTraceDumpCompress)); cmd.dwordCount = (sizeof(AubMemDump::CmdServicesMemTraceDumpCompress) - 1) / 4; cmd.instructionSubOpcode = 0x10; cmd.instructionOpcode = 0x2e; cmd.instructionType = 0x7; cmd.setSurfaceAddress(gfxAllocation.getGpuAddress()); cmd.surfaceWidth = static_cast(gmm->gmmResourceInfo->getBaseWidth()); cmd.surfaceHeight = static_cast(gmm->gmmResourceInfo->getBaseHeight()); cmd.surfacePitch = static_cast(gmm->gmmResourceInfo->getRenderPitch()); cmd.surfaceFormat = gmm->gmmResourceInfo->getResourceFormatSurfaceState(); cmd.dumpType = AubMemDump::CmdServicesMemTraceDumpCompress::DumpTypeValues::Tre; cmd.surfaceTilingType = gmm->gmmResourceInfo->getTileModeSurfaceState(); cmd.surfaceType = surfaceType; cmd.algorithm = AubMemDump::CmdServicesMemTraceDumpCompress::AlgorithmValues::Uncompressed; cmd.gttType = 1; cmd.directoryHandle = context; stream->write(reinterpret_cast(&cmd), sizeof(cmd)); } template void dumpAllocation(DumpFormat dumpFormat, GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context) { switch (dumpFormat) { case DumpFormat::BUFFER_BIN: dumpBufferInBinFormat(gfxAllocation, stream, context); break; case DumpFormat::BUFFER_TRE: dumpBufferInTreFormat(gfxAllocation, stream, context); break; case DumpFormat::IMAGE_BMP: dumpImageInBmpFormat(gfxAllocation, stream, context); break; case DumpFormat::IMAGE_TRE: dumpImageInTreFormat(gfxAllocation, stream, context); break; default: break; } } } // namespace AubAllocDump compute-runtime-22.14.22890/shared/source/aub_mem_dump/aub_data.h000066400000000000000000000002741422164147700244660ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include struct AubGTTData { bool present; bool localMemory; }; compute-runtime-22.14.22890/shared/source/aub_mem_dump/aub_header.h000066400000000000000000000061631422164147700250100ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include #include #ifndef WIN32 #pragma pack(4) #else #pragma pack(push, 4) #endif inline void setMisalignedUint64(uint64_t *address, const uint64_t value) { uint32_t *addressBits = reinterpret_cast(address); addressBits[0] = static_cast(value); addressBits[1] = static_cast(value >> 32); } inline uint64_t getMisalignedUint64(const uint64_t *address) { const uint32_t *addressBits = reinterpret_cast(address); return static_cast(static_cast(addressBits[1]) << 32) | addressBits[0]; } struct AubCmdHdr { uint32_t DwordLength : 16, SubOp : 7, Opcode : 6, Type : 3; }; static_assert(4 == sizeof(AubCmdHdr), "Invalid size for AubCmdHdr"); struct AubCmdDumpBmpHd { AubCmdHdr Header; uint32_t Xmin; uint32_t Ymin; uint32_t BufferPitch; uint32_t BitsPerPixel : 8, Format : 8, Reserved_0 : 16; uint32_t Xsize; uint32_t Ysize; uint64_t BaseAddr; uint32_t Secure : 1, UseFence : 1, TileOn : 1, WalkY : 1, UsePPGTT : 1, Use32BitDump : 1, UseFullFormat : 1, Reserved_1 : 25; uint32_t DirectoryHandle; uint64_t getBaseAddr() const { return getMisalignedUint64(&this->BaseAddr); } void setBaseAddr(const uint64_t baseAddr) { setMisalignedUint64(&this->BaseAddr, baseAddr); } }; static_assert(44 == sizeof(AubCmdDumpBmpHd), "Invalid size for AubCmdDumpBmpHd"); struct AubPpgttContextCreate { AubCmdHdr Header; uint32_t Handle; uint32_t AdvancedContext : 1, SixtyFourBit : 1, Reserved_31_2 : 30; uint64_t PageDirPointer[4]; }; static_assert(44 == sizeof(AubPpgttContextCreate), "Invalid size for AubPpgttContextCreate"); struct AubCaptureBinaryDumpHD { AubCmdHdr Header; uint64_t BaseAddr; uint64_t Width; uint64_t Height; uint64_t Pitch; uint32_t SurfaceType : 4, GttType : 2, Reserved_31_6 : 26; uint32_t DirectoryHandle; uint32_t ReservedDW1; uint32_t ReservedDW2; char OutputFile[4]; uint64_t getBaseAddr() const { return getMisalignedUint64(&this->BaseAddr); } void setBaseAddr(const uint64_t baseAddr) { setMisalignedUint64(&this->BaseAddr, baseAddr); } uint64_t getWidth() const { return getMisalignedUint64(&this->Width); } void setWidth(const uint64_t width) { setMisalignedUint64(&this->Width, width); } uint64_t getHeight() const { return getMisalignedUint64(&this->Height); } void setHeight(const uint64_t height) { setMisalignedUint64(&this->Height, height); } uint64_t getPitch() const { return getMisalignedUint64(&this->Pitch); } void setPitch(const uint64_t pitch) { setMisalignedUint64(&this->Pitch, pitch); } }; static_assert(56 == sizeof(AubCaptureBinaryDumpHD), "Invalid size for AubCaptureBinaryDumpHD"); #ifndef WIN32 #pragma pack() #else #pragma pack(pop) #endif compute-runtime-22.14.22890/shared/source/aub_mem_dump/aub_mem_dump.cpp000066400000000000000000000207201422164147700257110ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/aub_mem_dump/aub_mem_dump.h" #include "shared/source/aub/aub_helper.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/ptr_math.h" namespace AubMemDump { const uint64_t g_pageMask = ~(4096ull - 1); const size_t g_dwordCountMax = 65536; // Some page table constants used in virtualizing the page tables. // clang-format off // 32 bit page table traits const uint64_t PageTableTraits<32>::physicalMemory = 0; // 1ull <::numPTEntries = BIT(PageTableTraits<32>::addressingBits - PageTableTraits<32>::NUM_OFFSET_BITS); const uint64_t PageTableTraits<32>::sizePT = BIT(PageTableTraits<32>::addressingBits - PageTableTraits<32>::NUM_OFFSET_BITS) * sizeof(uint64_t); const uint64_t PageTableTraits<32>::ptBaseAddress = BIT(38); const uint64_t PageTableTraits<32>::numPDEntries = BIT(PageTableTraits<32>::addressingBits - PageTableTraits<32>::NUM_OFFSET_BITS - PageTableTraits<32>::NUM_PTE_BITS); const uint64_t PageTableTraits<32>::sizePD = BIT(PageTableTraits<32>::addressingBits - PageTableTraits<32>::NUM_OFFSET_BITS - PageTableTraits<32>::NUM_PTE_BITS) * sizeof(uint64_t); const uint64_t PageTableTraits<32>::pdBaseAddress = BIT(37); const uint64_t PageTableTraits<32>::numPDPEntries = BIT(PageTableTraits<32>::addressingBits - PageTableTraits<32>::NUM_OFFSET_BITS - PageTableTraits<32>::NUM_PTE_BITS - PageTableTraits<32>::NUM_PDE_BITS); const uint64_t PageTableTraits<32>::sizePDP = BIT(PageTableTraits<32>::addressingBits - PageTableTraits<32>::NUM_OFFSET_BITS - PageTableTraits<32>::NUM_PTE_BITS - PageTableTraits<32>::NUM_PDE_BITS) * sizeof(uint64_t); const uint64_t PageTableTraits<32>::pdpBaseAddress = BIT(36); // 48 bit page table traits const uint64_t PageTableTraits<48>::physicalMemory = 0; // 1ull <::numPTEntries = BIT(PageTableTraits<48>::addressingBits - PageTableTraits<48>::NUM_OFFSET_BITS); const uint64_t PageTableTraits<48>::sizePT = BIT(PageTableTraits<48>::addressingBits - PageTableTraits<48>::NUM_OFFSET_BITS) * sizeof(uint64_t); const uint64_t PageTableTraits<48>::ptBaseAddress = BIT(32); const uint64_t PageTableTraits<48>::numPDEntries = BIT(PageTableTraits<48>::addressingBits - PageTableTraits<48>::NUM_OFFSET_BITS - PageTableTraits<48>::NUM_PTE_BITS); const uint64_t PageTableTraits<48>::sizePD = BIT(PageTableTraits<48>::addressingBits - PageTableTraits<48>::NUM_OFFSET_BITS - PageTableTraits<48>::NUM_PTE_BITS) * sizeof(uint64_t); const uint64_t PageTableTraits<48>::pdBaseAddress = BIT(31); const uint64_t PageTableTraits<48>::numPDPEntries = BIT(PageTableTraits<48>::addressingBits - PageTableTraits<48>::NUM_OFFSET_BITS - PageTableTraits<48>::NUM_PTE_BITS - PageTableTraits<48>::NUM_PDE_BITS); const uint64_t PageTableTraits<48>::sizePDP = BIT(PageTableTraits<48>::addressingBits - PageTableTraits<48>::NUM_OFFSET_BITS - PageTableTraits<48>::NUM_PTE_BITS - PageTableTraits<48>::NUM_PDE_BITS) * sizeof(uint64_t); const uint64_t PageTableTraits<48>::pdpBaseAddress = BIT(30); const uint64_t PageTableTraits<48>::numPML4Entries = BIT(NUM_PML4_BITS); const uint64_t PageTableTraits<48>::sizePML4 = BIT(NUM_PML4_BITS) * sizeof(uint64_t); const uint64_t PageTableTraits<48>::pml4BaseAddress = BIT(29); // clang-format on void LrcaHelper::setRingTail(void *pLRCIn, uint32_t ringTail) const { auto pLRCA = ptrOffset(reinterpret_cast(pLRCIn), offsetContext + offsetRingRegisters + offsetRingTail); *pLRCA++ = AubMemDump::computeRegisterOffset(mmioBase, 0x2030); *pLRCA++ = ringTail; } void LrcaHelper::setRingHead(void *pLRCIn, uint32_t ringHead) const { auto pLRCA = ptrOffset(reinterpret_cast(pLRCIn), offsetContext + offsetRingRegisters + offsetRingHead); *pLRCA++ = AubMemDump::computeRegisterOffset(mmioBase, 0x2034); *pLRCA++ = ringHead; } void LrcaHelper::setRingBase(void *pLRCIn, uint32_t ringBase) const { auto pLRCA = ptrOffset(reinterpret_cast(pLRCIn), offsetContext + offsetRingRegisters + offsetRingBase); *pLRCA++ = AubMemDump::computeRegisterOffset(mmioBase, 0x2038); *pLRCA++ = ringBase; } void LrcaHelper::setRingCtrl(void *pLRCIn, uint32_t ringCtrl) const { auto pLRCA = ptrOffset(reinterpret_cast(pLRCIn), offsetContext + offsetRingRegisters + offsetRingCtrl); *pLRCA++ = AubMemDump::computeRegisterOffset(mmioBase, 0x203c); *pLRCA++ = ringCtrl; } void LrcaHelper::setPDP0(void *pLRCIn, uint64_t address) const { auto pLRCA = ptrOffset(reinterpret_cast(pLRCIn), offsetContext + offsetPageTableRegisters + offsetPDP0); *pLRCA++ = AubMemDump::computeRegisterOffset(mmioBase, 0x2274); *pLRCA++ = address >> 32; *pLRCA++ = AubMemDump::computeRegisterOffset(mmioBase, 0x2270); *pLRCA++ = address & 0xffffffff; } void LrcaHelper::setPDP1(void *pLRCIn, uint64_t address) const { auto pLRCA = ptrOffset(reinterpret_cast(pLRCIn), offsetContext + offsetPageTableRegisters + offsetPDP1); *pLRCA++ = AubMemDump::computeRegisterOffset(mmioBase, 0x227c); *pLRCA++ = address >> 32; *pLRCA++ = AubMemDump::computeRegisterOffset(mmioBase, 0x2278); *pLRCA++ = address & 0xffffffff; } void LrcaHelper::setPDP2(void *pLRCIn, uint64_t address) const { auto pLRCA = ptrOffset(reinterpret_cast(pLRCIn), offsetContext + offsetPageTableRegisters + offsetPDP2); *pLRCA++ = AubMemDump::computeRegisterOffset(mmioBase, 0x2284); *pLRCA++ = address >> 32; *pLRCA++ = AubMemDump::computeRegisterOffset(mmioBase, 0x2280); *pLRCA++ = address & 0xffffffff; } void LrcaHelper::setPDP3(void *pLRCIn, uint64_t address) const { auto pLRCA = ptrOffset(reinterpret_cast(pLRCIn), offsetContext + offsetPageTableRegisters + offsetPDP3); *pLRCA++ = AubMemDump::computeRegisterOffset(mmioBase, 0x228c); *pLRCA++ = address >> 32; *pLRCA++ = AubMemDump::computeRegisterOffset(mmioBase, 0x2288); *pLRCA++ = address & 0xffffffff; } void LrcaHelper::setPML4(void *pLRCIn, uint64_t address) const { setPDP0(pLRCIn, address); } void LrcaHelper::initialize(void *pLRCIn) const { auto pLRCABase = reinterpret_cast(pLRCIn); // Initialize to known but benign garbage for (size_t i = 0; i < sizeLRCA / sizeof(uint32_t); i++) { pLRCABase[i] = 0x1; } auto pLRCA = ptrOffset(pLRCABase, offsetContext); // Initialize the ring context of the LRCA auto pLRI = ptrOffset(pLRCA, offsetLRI0); auto numRegs = numRegsLRI0; *pLRI++ = 0x11001000 | (2 * numRegs - 1); uint32_t ctxSrCtlValue = 0x00010001; // Inhibit context-restore setContextSaveRestoreFlags(ctxSrCtlValue); while (numRegs-- > 0) { *pLRI++ = AubMemDump::computeRegisterOffset(mmioBase, 0x2244); // CTXT_SR_CTL *pLRI++ = ctxSrCtlValue; } // Initialize the other LRI DEBUG_BREAK_IF(offsetLRI1 != 0x21 * sizeof(uint32_t)); pLRI = ptrOffset(pLRCA, offsetLRI1); numRegs = numRegsLRI1; *pLRI++ = 0x11001000 | (2 * numRegs - 1); while (numRegs-- > 0) { *pLRI++ = AubMemDump::computeRegisterOffset(mmioBase, 0x20d8); // DEBUG *pLRI++ = 0x00200020; } DEBUG_BREAK_IF(offsetLRI2 != 0x41 * sizeof(uint32_t)); pLRI = ptrOffset(pLRCA, offsetLRI2); numRegs = numRegsLRI2; *pLRI++ = 0x11000000 | (2 * numRegs - 1); while (numRegs-- > 0) { *pLRI++ = AubMemDump::computeRegisterOffset(mmioBase, 0x2094); // NOP ID *pLRI++ = 0x00000000; } setRingHead(pLRCIn, 0); setRingTail(pLRCIn, 0); setRingBase(pLRCIn, 0); setRingCtrl(pLRCIn, 0); setPDP0(pLRCIn, 0); setPDP1(pLRCIn, 0); setPDP2(pLRCIn, 0); setPDP3(pLRCIn, 0); } void AubStream::writeMMIO(uint32_t offset, uint32_t value) { auto dbgOffset = NEO::DebugManager.flags.AubDumpOverrideMmioRegister.get(); if (dbgOffset > 0) { if (offset == static_cast(dbgOffset)) { offset = static_cast(dbgOffset); value = static_cast(NEO::DebugManager.flags.AubDumpOverrideMmioRegisterValue.get()); } } writeMMIOImpl(offset, value); } } // namespace AubMemDump compute-runtime-22.14.22890/shared/source/aub_mem_dump/aub_mem_dump.h000066400000000000000000000375321422164147700253670ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/aub_mem_dump/aub_data.h" #include #include #include namespace NEO { class AubHelper; } namespace AubMemDump { #include "aub_services.h" constexpr uint32_t rcsRegisterBase = 0x2000; #ifndef BIT #define BIT(x) (((uint64_t)1) << (x)) #endif inline uint32_t computeRegisterOffset(uint32_t mmioBase, uint32_t rcsRegisterOffset) { return mmioBase + rcsRegisterOffset - rcsRegisterBase; } template inline void setAddress(Cmd &cmd, uint64_t address) { cmd.address = address; } template <> inline void setAddress(CmdServicesMemTraceMemoryCompare &cmd, uint64_t address) { cmd.address = static_cast(address); cmd.addressHigh = static_cast(address >> 32); } union IAPageTableEntry { struct { uint64_t Present : 1; //[0] uint64_t Writable : 1; //[1] uint64_t UserSupervisor : 1; //[2] uint64_t PWT : 1; //[3] uint64_t PCD : 1; //[4] uint64_t Accessed : 1; //[5] uint64_t Dirty : 1; //[6] uint64_t PAT : 1; //[7] uint64_t Global : 1; //[8] uint64_t Reserved_9 : 1; //[9] uint64_t Reserved_10 : 1; //[10] uint64_t Reserved_11 : 1; //[11] uint64_t PhysicalAddress : 27; //[38:12] uint64_t Reserved_51_39 : 13; //[51:39] uint64_t Ignored : 11; //[62:52] uint64_t ExecuteDisable : 1; //[63] } pageConfig; uint32_t dwordData[2]; uint64_t uiData; }; union MiGttEntry { struct { uint64_t Present : 1; //[0] uint64_t LocalMemory : 1; //[1] uint64_t FunctionNumber : 10; //[11:2] uint64_t PhysicalAddress : 35; //[46:12] uint64_t Ignored : 17; //[63:47] } pageConfig; uint32_t dwordData[2]; uint64_t uiData; }; // Use the latest DeviceValues enumerations available typedef CmdServicesMemTraceVersion::DeviceValues DeviceValues; typedef CmdServicesMemTraceVersion::SteppingValues SteppingValues; typedef CmdServicesMemTraceMemoryWrite::AddressSpaceValues AddressSpaceValues; typedef CmdServicesMemTraceMemoryWrite::DataTypeHintValues DataTypeHintValues; typedef CmdServicesMemTraceMemoryDump::TilingValues TilingValues; typedef CmdServicesMemTraceMemoryWrite::RepeatMemoryValues RepeatMemoryValues; typedef CmdServicesMemTraceRegisterWrite::MessageSourceIdValues MessageSourceIdValues; typedef CmdServicesMemTraceRegisterWrite::RegisterSizeValues RegisterSizeValues; typedef CmdServicesMemTraceRegisterWrite::RegisterSpaceValues RegisterSpaceValues; typedef CmdServicesMemTraceMemoryPoll::DataSizeValues DataSizeValues; template struct Traits { typedef struct AubStream Stream; enum { addressingBits = addressingBitsIn, device = deviceIn }; }; struct AubStream { virtual void open(const char *filePath) = 0; virtual void close() = 0; virtual bool init(uint32_t stepping, uint32_t device) = 0; virtual void createContext(const AubPpgttContextCreate &cmd) {} virtual void writeMemory(uint64_t physAddress, const void *memory, size_t sizeToDumpThisIteration, uint32_t addressSpace, uint32_t hint) = 0; virtual void writeMemoryWriteHeader(uint64_t physAddress, size_t size, uint32_t addressSpace, uint32_t hint) = 0; virtual void writeMemoryWriteHeader(uint64_t physAddress, size_t size, uint32_t addressSpace) { writeMemoryWriteHeader(physAddress, size, addressSpace, CmdServicesMemTraceMemoryWrite::DataTypeHintValues::TraceNotype); } virtual void writePTE(uint64_t physAddress, uint64_t entry, uint32_t addressSpace) = 0; virtual void writeGTT(uint32_t offset, uint64_t entry) = 0; void writeMMIO(uint32_t offset, uint32_t value); virtual void registerPoll(uint32_t registerOffset, uint32_t mask, uint32_t value, bool pollNotEqual, uint32_t timeoutAction) = 0; virtual ~AubStream() = default; protected: virtual void writeMMIOImpl(uint32_t offset, uint32_t value) = 0; }; struct AubFileStream : public AubStream { void open(const char *filePath) override; void close() override; bool init(uint32_t stepping, uint32_t device) override; void createContext(const AubPpgttContextCreate &cmd) override; void writeMemory(uint64_t physAddress, const void *memory, size_t size, uint32_t addressSpace, uint32_t hint) override; void writeMemoryWriteHeader(uint64_t physAddress, size_t size, uint32_t addressSpace, uint32_t hint) override; void writePTE(uint64_t physAddress, uint64_t entry, uint32_t addressSpace) override; void writeGTT(uint32_t offset, uint64_t entry) override; void writeMMIOImpl(uint32_t offset, uint32_t value) override; void registerPoll(uint32_t registerOffset, uint32_t mask, uint32_t value, bool pollNotEqual, uint32_t timeoutAction) override; MOCKABLE_VIRTUAL bool isOpen() const { return fileHandle.is_open(); } MOCKABLE_VIRTUAL const std::string &getFileName() const { return fileName; } MOCKABLE_VIRTUAL void write(const char *data, size_t size); MOCKABLE_VIRTUAL void flush(); MOCKABLE_VIRTUAL void expectMMIO(uint32_t mmioRegister, uint32_t expectedValue); MOCKABLE_VIRTUAL void expectMemory(uint64_t physAddress, const void *memory, size_t size, uint32_t addressSpace, uint32_t compareOperation); MOCKABLE_VIRTUAL bool addComment(const char *message); MOCKABLE_VIRTUAL std::unique_lock lockStream(); std::ofstream fileHandle; std::string fileName; std::mutex mutex; }; template struct PageTableTraits { }; template <> struct PageTableTraits<32> { // clang-format off enum { addressingBits = 32, NUM_OFFSET_BITS = 12, NUM_PTE_BITS = 9, NUM_PDE_BITS = 9, NUM_PDP_BITS = addressingBits - NUM_PDE_BITS - NUM_PTE_BITS - NUM_OFFSET_BITS, }; static const uint64_t physicalMemory; static const uint64_t numPTEntries; static const uint64_t sizePT; static const uint64_t ptBaseAddress; static const uint64_t numPDEntries; static const uint64_t sizePD; static const uint64_t pdBaseAddress; static const uint64_t numPDPEntries; static const uint64_t sizePDP; static const uint64_t pdpBaseAddress; // clang-format on }; template <> struct PageTableTraits<48> { // clang-format off enum { addressingBits = 48, NUM_OFFSET_BITS = PageTableTraits<32>::NUM_OFFSET_BITS, NUM_PTE_BITS = PageTableTraits<32>::NUM_PTE_BITS, NUM_PDE_BITS = PageTableTraits<32>::NUM_PDE_BITS, NUM_PDP_BITS = PageTableTraits<32>::NUM_PDP_BITS, NUM_PML4_BITS = addressingBits - NUM_PDP_BITS - NUM_PDE_BITS - NUM_PTE_BITS - NUM_OFFSET_BITS }; static const uint64_t physicalMemory; static const uint64_t numPTEntries; static const uint64_t sizePT; static const uint64_t ptBaseAddress; static const uint64_t numPDEntries; static const uint64_t sizePD; static const uint64_t pdBaseAddress; static const uint64_t numPDPEntries; static const uint64_t sizePDP; static const uint64_t pdpBaseAddress; static const uint64_t numPML4Entries; static const uint64_t sizePML4; static const uint64_t pml4BaseAddress; // clang-format on }; template struct AubPageTableHelper { typedef AubMemDump::PageTableTraits PageTableTraits; enum { addressingBits = Traits::addressingBits }; static inline uint32_t ptrToGGTT(const void *memory) { return static_cast(reinterpret_cast(memory)); } static inline uintptr_t ptrToPPGTT(const void *memory) { return reinterpret_cast(memory); } static inline uint64_t getPTEAddress(uint64_t ptIndex) { return PageTableTraits::ptBaseAddress + ptIndex * sizeof(uint64_t); } static inline uint64_t getPDEAddress(uint64_t pdIndex) { return PageTableTraits::pdBaseAddress + pdIndex * sizeof(uint64_t); } static inline uint64_t getPDPAddress(uint64_t pdpIndex) { return PageTableTraits::pdpBaseAddress + pdpIndex * sizeof(uint64_t); } }; template struct AubPageTableHelper32 : public AubPageTableHelper, PageTableTraits<32> { typedef AubPageTableHelper BaseClass; static void createContext(typename Traits::Stream &stream, uint32_t context); static uint64_t reserveAddressPPGTT(typename Traits::Stream &stream, uintptr_t gfxAddress, size_t blockSize, uint64_t physAddress, uint64_t additionalBits, const NEO::AubHelper &aubHelper); static void fixupLRC(uint8_t *pLrc); }; template struct AubPageTableHelper64 : public AubPageTableHelper, PageTableTraits<48> { typedef AubPageTableHelper BaseClass; static inline uint64_t getPML4Address(uint64_t pml4Index) { return pml4BaseAddress + pml4Index * sizeof(uint64_t); } static void createContext(typename Traits::Stream &stream, uint32_t context); static uint64_t reserveAddressPPGTT(typename Traits::Stream &stream, uintptr_t gfxAddress, size_t blockSize, uint64_t physAddress, uint64_t additionalBits, const NEO::AubHelper &aubHelper); static void fixupLRC(uint8_t *pLrc); }; template struct AubDump : public std::conditional, AubPageTableHelper64>::type { using Traits = TraitsIn; using AddressType = typename std::conditional::type; using BaseHelper = typename std::conditional, AubPageTableHelper64>::type; using Stream = typename Traits::Stream; typedef union _MiContextDescriptorReg_ { struct { uint64_t Valid : 1; //[0] uint64_t ForcePageDirRestore : 1; //[1] uint64_t ForceRestore : 1; //[2] uint64_t Legacy : 1; //[3] uint64_t ADor64bitSupport : 1; //[4] Selects 64-bit PPGTT in Legacy mode uint64_t LlcCoherencySupport : 1; //[5] uint64_t FaultSupport : 2; //[7:6] uint64_t PrivilegeAccessOrPPGTT : 1; //[8] Selects PPGTT in Legacy mode uint64_t FunctionType : 3; //[11:9] uint64_t LogicalRingCtxAddress : 20; //[31:12] uint64_t ContextID : 32; //[63:32] } sData; uint32_t ulData[2]; uint64_t qwordData[2 / 2]; } MiContextDescriptorReg, *pMiContextDescriptorReg; // Write a block of memory to a given address space using an optional hint static void addMemoryWrite(Stream &stream, uint64_t addr, const void *memory, size_t blockSize, int addressSpace, int hint = DataTypeHintValues::TraceNotype); static uint64_t reserveAddressGGTT(Stream &stream, uint32_t addr, size_t size, uint64_t physStart, AubGTTData data); static uint64_t reserveAddressGGTT(Stream &stream, const void *memory, size_t size, uint64_t physStart, AubGTTData data); static void reserveAddressGGTTAndWriteMmeory(Stream &stream, uintptr_t gfxAddress, const void *memory, uint64_t physAddress, size_t size, size_t offset, uint64_t additionalBits, const NEO::AubHelper &aubHelper); static void setGttEntry(MiGttEntry &entry, uint64_t address, AubGTTData data); private: static uint64_t reserveAddress(Stream &stream, uint32_t addr, size_t size, unsigned int addressSpace /* = AddressSpaceValues::TraceGttEntry*/, uint64_t physStart, AubGTTData data); }; struct LrcaHelper { LrcaHelper(uint32_t base) : mmioBase(base) { } int aubHintLRCA = DataTypeHintValues::TraceNotype; int aubHintCommandBuffer = DataTypeHintValues::TraceCommandBuffer; int aubHintBatchBuffer = DataTypeHintValues::TraceBatchBuffer; const char *name = "XCS"; uint32_t mmioBase = 0; size_t sizeLRCA = 0x2000; uint32_t alignLRCA = 0x1000; uint32_t offsetContext = 0x1000; uint32_t offsetLRI0 = 0x01 * sizeof(uint32_t); uint32_t numRegsLRI0 = 14; uint32_t numNoops0 = 3; uint32_t offsetLRI1 = offsetLRI0 + (1 + numRegsLRI0 * 2 + numNoops0) * sizeof(uint32_t); //offsetLRI == 0x21 * sizeof(uint32_t); uint32_t numRegsLRI1 = 9; uint32_t numNoops1 = 13; uint32_t offsetLRI2 = offsetLRI1 + (1 + numRegsLRI1 * 2 + numNoops1) * sizeof(uint32_t); //offsetLR2 == 0x41 * sizeof(uint32_t); uint32_t numRegsLRI2 = 1; uint32_t offsetRingRegisters = offsetLRI0 + (3 * sizeof(uint32_t)); uint32_t offsetRingHead = 0x0 * sizeof(uint32_t); uint32_t offsetRingTail = 0x2 * sizeof(uint32_t); uint32_t offsetRingBase = 0x4 * sizeof(uint32_t); uint32_t offsetRingCtrl = 0x6 * sizeof(uint32_t); uint32_t offsetPageTableRegisters = offsetLRI1 + (3 * sizeof(uint32_t)); uint32_t offsetPDP0 = 0xc * sizeof(uint32_t); uint32_t offsetPDP1 = 0x8 * sizeof(uint32_t); uint32_t offsetPDP2 = 0x4 * sizeof(uint32_t); uint32_t offsetPDP3 = 0x0 * sizeof(uint32_t); void initialize(void *pLRCIn) const; void setRingHead(void *pLRCIn, uint32_t ringHead) const; void setRingTail(void *pLRCIn, uint32_t ringTail) const; void setRingBase(void *pLRCIn, uint32_t ringBase) const; void setRingCtrl(void *pLRCIn, uint32_t ringCtrl) const; void setPDP0(void *pLRCIn, uint64_t address) const; void setPDP1(void *pLRCIn, uint64_t address) const; void setPDP2(void *pLRCIn, uint64_t address) const; void setPDP3(void *pLRCIn, uint64_t address) const; void setPML4(void *pLRCIn, uint64_t address) const; MOCKABLE_VIRTUAL void setContextSaveRestoreFlags(uint32_t &value) const; }; struct LrcaHelperRcs : public LrcaHelper { LrcaHelperRcs(uint32_t base) : LrcaHelper(base) { aubHintLRCA = DataTypeHintValues::TraceLogicalRingContextRcs; aubHintCommandBuffer = DataTypeHintValues::TraceCommandBufferPrimary; aubHintBatchBuffer = DataTypeHintValues::TraceBatchBufferPrimary; sizeLRCA = 0x11000; name = "RCS"; } }; struct LrcaHelperBcs : public LrcaHelper { LrcaHelperBcs(uint32_t base) : LrcaHelper(base) { aubHintLRCA = DataTypeHintValues::TraceLogicalRingContextBcs; aubHintCommandBuffer = DataTypeHintValues::TraceCommandBufferBlt; aubHintBatchBuffer = DataTypeHintValues::TraceBatchBufferBlt; name = "BCS"; } }; struct LrcaHelperVcs : public LrcaHelper { LrcaHelperVcs(uint32_t base) : LrcaHelper(base) { aubHintLRCA = DataTypeHintValues::TraceLogicalRingContextVcs; aubHintCommandBuffer = DataTypeHintValues::TraceCommandBufferMfx; aubHintBatchBuffer = DataTypeHintValues::TraceBatchBufferMfx; name = "VCS"; } }; struct LrcaHelperVecs : public LrcaHelper { LrcaHelperVecs(uint32_t base) : LrcaHelper(base) { aubHintLRCA = DataTypeHintValues::TraceLogicalRingContextVecs; name = "VECS"; } }; struct LrcaHelperCcs : public LrcaHelper { LrcaHelperCcs(uint32_t base) : LrcaHelper(base) { aubHintLRCA = DataTypeHintValues::TraceLogicalRingContextCcs; name = "CCS"; } }; struct LrcaHelperLinkBcs : public LrcaHelperBcs { LrcaHelperLinkBcs(uint32_t base, uint32_t engineId) : LrcaHelperBcs(base) { std::string nameStr("BCS" + std::to_string(engineId)); name = nameStr.c_str(); } }; struct LrcaHelperCccs : public LrcaHelper { LrcaHelperCccs(uint32_t base) : LrcaHelper(base) { name = "CCCS"; } }; extern const uint64_t g_pageMask; extern const size_t g_dwordCountMax; } // namespace AubMemDumpcompute-runtime-22.14.22890/shared/source/aub_mem_dump/aub_mem_dump.inl000066400000000000000000000332101422164147700257070ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/aub/aub_helper.h" #include "shared/source/aub_mem_dump/aub_mem_dump.h" #include "shared/source/helpers/constants.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/ptr_math.h" #include #include namespace AubMemDump { template void AubPageTableHelper32::fixupLRC(uint8_t *pLRC) { uint32_t pdAddress; pdAddress = BaseClass::getPDEAddress(0x600) >> 32; *(uint32_t *)(pLRC + 0x1094) = pdAddress; pdAddress = BaseClass::getPDEAddress(0x600) & 0xffffffff; *(uint32_t *)(pLRC + 0x109c) = pdAddress; pdAddress = BaseClass::getPDEAddress(0x400) >> 32; *(uint32_t *)(pLRC + 0x10a4) = pdAddress; pdAddress = BaseClass::getPDEAddress(0x400) & 0xffffffff; *(uint32_t *)(pLRC + 0x10ac) = pdAddress; pdAddress = BaseClass::getPDEAddress(0x200) >> 32; *(uint32_t *)(pLRC + 0x10b4) = pdAddress; pdAddress = BaseClass::getPDEAddress(0x200) & 0xffffffff; *(uint32_t *)(pLRC + 0x10bc) = pdAddress; pdAddress = BaseClass::getPDEAddress(0) >> 32; *(uint32_t *)(pLRC + 0x10c4) = pdAddress; pdAddress = BaseClass::getPDEAddress(0) & 0xffffffff; *(uint32_t *)(pLRC + 0x10cc) = pdAddress; } template void AubPageTableHelper64::fixupLRC(uint8_t *pLRC) { uint32_t pml4Address = getPML4Address(0) >> 32; *(uint32_t *)(pLRC + 0x10c4) = pml4Address; pml4Address = getPML4Address(0) & 0xffffffff; *(uint32_t *)(pLRC + 0x10cc) = pml4Address; } // Write a block of memory to a given address space using an optional hint template void AubDump::addMemoryWrite(typename Traits::Stream &stream, uint64_t addr, const void *memory, size_t sizeRemaining, int addressSpace, int hint) { // We can only dump a relatively small amount per CmdServicesMemTraceMemoryWrite auto sizeMemoryWriteHeader = sizeof(CmdServicesMemTraceMemoryWrite) - sizeof(CmdServicesMemTraceMemoryWrite::data); auto blockSizeMax = g_dwordCountMax * sizeof(uint32_t) - sizeMemoryWriteHeader; if (hint == CmdServicesMemTraceMemoryWrite::DataTypeHintValues::TraceLogicalRingContextRcs || hint == CmdServicesMemTraceMemoryWrite::DataTypeHintValues::TraceLogicalRingContextBcs || hint == CmdServicesMemTraceMemoryWrite::DataTypeHintValues::TraceLogicalRingContextVcs || hint == CmdServicesMemTraceMemoryWrite::DataTypeHintValues::TraceLogicalRingContextVecs || hint == CmdServicesMemTraceMemoryWrite::DataTypeHintValues::TraceLogicalRingContextCcs) { DEBUG_BREAK_IF(sizeRemaining <= 0x10cc); uint8_t *pLRC = reinterpret_cast(const_cast(memory)); BaseHelper::fixupLRC(pLRC); } // loop to dump all of the blocks while (sizeRemaining > 0) { auto sizeThisIteration = std::min(blockSizeMax, sizeRemaining); stream.writeMemory(addr, memory, sizeThisIteration, addressSpace, hint); sizeRemaining -= sizeThisIteration; memory = (uint8_t *)memory + sizeThisIteration; addr += sizeThisIteration; } } // Reserve memory in the GGTT. template uint64_t AubDump::reserveAddress(typename Traits::Stream &stream, uint32_t addr, size_t size, unsigned int addressSpace, uint64_t physStart, AubGTTData data) { auto startPage = addr & g_pageMask; auto endPage = (addr + size - 1) & g_pageMask; auto numPages = (uint32_t)(((endPage - startPage) / 4096) + 1); // Can only handle 16 bits of dwordCount. DEBUG_BREAK_IF(!(numPages > 0 && (numPages + 4) < 65536)); auto gttTableOffset = static_cast((((uint32_t)startPage) / 4096) * sizeof(MiGttEntry)); // Write header { typedef AubMemDump::CmdServicesMemTraceMemoryWrite CmdServicesMemTraceMemoryWrite; stream.writeMemoryWriteHeader(gttTableOffset, numPages * sizeof(AubMemDump::MiGttEntry), addressSpace, CmdServicesMemTraceMemoryWrite::DataTypeHintValues::TraceNotype); } uint64_t physAddress = physStart; while (startPage <= endPage) { MiGttEntry entry; setGttEntry(entry, physAddress, data); stream.writeGTT(gttTableOffset, entry.uiData); gttTableOffset += sizeof(entry); physAddress += 4096; startPage += 4096; } return physStart; } template uint64_t AubDump::reserveAddressGGTT(typename Traits::Stream &stream, uint32_t addr, size_t size, uint64_t physStart, AubGTTData data) { return AubDump::reserveAddress(stream, addr, size, AddressSpaceValues::TraceGttEntry, physStart, data); } template uint64_t AubDump::reserveAddressGGTT(typename Traits::Stream &stream, const void *memory, size_t size, uint64_t physStart, AubGTTData data) { auto gfxAddress = BaseHelper::ptrToGGTT(memory); return AubDump::reserveAddress(stream, gfxAddress, size, AddressSpaceValues::TraceGttEntry, physStart, data); } template void AubDump::reserveAddressGGTTAndWriteMmeory(typename Traits::Stream &stream, uintptr_t gfxAddress, const void *memory, uint64_t physAddress, size_t size, size_t offset, uint64_t additionalBits, const NEO::AubHelper &aubHelper) { auto vmAddr = (gfxAddress + offset) & ~(MemoryConstants::pageSize - 1); auto pAddr = physAddress & ~(MemoryConstants::pageSize - 1); AubDump::reserveAddressPPGTT(stream, vmAddr, MemoryConstants::pageSize, pAddr, additionalBits, aubHelper); int hint = NEO::AubHelper::getMemTrace(additionalBits); AubDump::addMemoryWrite(stream, physAddress, reinterpret_cast(reinterpret_cast(memory) + offset), size, hint); } template void AubDump::setGttEntry(MiGttEntry &entry, uint64_t address, AubGTTData data) { entry.uiData = 0; entry.pageConfig.PhysicalAddress = address / 4096; entry.pageConfig.Present = data.present; entry.pageConfig.LocalMemory = data.localMemory; } template uint64_t AubPageTableHelper32::reserveAddressPPGTT(typename Traits::Stream &stream, uintptr_t gfxAddress, size_t blockSize, uint64_t physAddress, uint64_t additionalBits, const NEO::AubHelper &aubHelper) { auto startAddress = gfxAddress; auto endAddress = gfxAddress + blockSize - 1; auto startPTE = startAddress >> 12; auto endPTE = endAddress >> 12; auto numPTEs = endPTE - startPTE + 1; auto startPDE = startPTE >> 9; auto endPDE = endPTE >> 9; auto numPDEs = endPDE - startPDE + 1; // Process the PD entries bool writePDE = true; if (writePDE) { auto startAddress = BaseClass::getPDEAddress(startPDE); auto addressSpace = aubHelper.getMemTraceForPdEntry(); auto hint = aubHelper.getDataHintForPdEntry(); stream.writeMemoryWriteHeader(startAddress, numPDEs * sizeof(uint64_t), addressSpace, hint); auto currPDE = startPDE; auto physPage = BaseClass::getPTEAddress(startPTE) & g_pageMask; while (currPDE <= endPDE) { auto pde = physPage | NEO::AubHelper::getPTEntryBits(additionalBits); stream.writePTE(startAddress, pde, addressSpace); startAddress += sizeof(pde); physPage += 4096; currPDE++; } } // Process the PT entries bool writePTE = true; if (writePTE) { auto startAddress = BaseClass::getPTEAddress(startPTE); auto addressSpace = aubHelper.getMemTraceForPtEntry(); auto hint = aubHelper.getDataHintForPtEntry(); stream.writeMemoryWriteHeader(startAddress, numPTEs * sizeof(uint64_t), addressSpace, hint); auto currPTE = startPTE; auto physPage = physAddress & g_pageMask; while (currPTE <= endPTE) { auto pte = physPage | additionalBits; stream.writePTE(startAddress, pte, addressSpace); startAddress += sizeof(pte); physPage += 4096; currPTE++; } } return physAddress; } template uint64_t AubPageTableHelper64::reserveAddressPPGTT(typename Traits::Stream &stream, uintptr_t gfxAddress, size_t blockSize, uint64_t physAddress, uint64_t additionalBits, const NEO::AubHelper &aubHelper) { auto startAddress = gfxAddress; auto endAddress = gfxAddress + blockSize - 1; auto startPTE = startAddress >> 12; auto endPTE = endAddress >> 12; auto numPTEs = endPTE - startPTE + 1; auto startPDE = startPTE >> 9; auto endPDE = endPTE >> 9; auto numPDEs = endPDE - startPDE + 1; auto startPDP = startPDE >> 9; auto endPDP = endPDE >> 9; auto numPDPs = endPDP - startPDP + 1; auto startPML4 = startPDP >> 9; auto endPML4 = endPDP >> 9; auto numPML4s = endPML4 - startPML4 + 1; // Process the PML4 entries bool writePML4 = true; if (writePML4) { auto startAddress = getPML4Address(startPML4); auto addressSpace = aubHelper.getMemTraceForPml4Entry(); auto hint = aubHelper.getDataHintForPml4Entry(); stream.writeMemoryWriteHeader(startAddress, numPML4s * sizeof(uint64_t), addressSpace, hint); auto currPML4 = startPML4; auto physPage = BaseClass::getPDPAddress(startPDP) & g_pageMask; while (currPML4 <= endPML4) { auto pml4 = physPage | NEO::AubHelper::getPTEntryBits(additionalBits); stream.writePTE(startAddress, pml4, addressSpace); startAddress += sizeof(pml4); physPage += 4096; currPML4++; } } // Process the PDP entries bool writePDPE = true; if (writePDPE) { auto startAddress = BaseClass::getPDPAddress(startPDP); auto addressSpace = aubHelper.getMemTraceForPdpEntry(); auto hint = aubHelper.getDataHintForPdpEntry(); stream.writeMemoryWriteHeader(startAddress, numPDPs * sizeof(uint64_t), addressSpace, hint); auto currPDP = startPDP; auto physPage = BaseClass::getPDEAddress(startPDE) & g_pageMask; while (currPDP <= endPDP) { auto pdp = physPage | NEO::AubHelper::getPTEntryBits(additionalBits); stream.writePTE(startAddress, pdp, addressSpace); startAddress += sizeof(pdp); physPage += 4096; currPDP++; } } // Process the PD entries bool writePDE = true; if (writePDE) { auto startAddress = BaseClass::getPDEAddress(startPDE); auto addressSpace = aubHelper.getMemTraceForPdEntry(); auto hint = aubHelper.getDataHintForPdEntry(); stream.writeMemoryWriteHeader(startAddress, numPDEs * sizeof(uint64_t), addressSpace, hint); auto currPDE = startPDE; auto physPage = BaseClass::getPTEAddress(startPTE) & g_pageMask; while (currPDE <= endPDE) { auto pde = physPage | NEO::AubHelper::getPTEntryBits(additionalBits); stream.writePTE(startAddress, pde, addressSpace); startAddress += sizeof(pde); physPage += 4096; currPDE++; } } // Process the PT entries bool writePTE = true; if (writePTE) { auto startAddress = BaseClass::getPTEAddress(startPTE); auto addressSpace = aubHelper.getMemTraceForPtEntry(); auto hint = aubHelper.getDataHintForPtEntry(); stream.writeMemoryWriteHeader(startAddress, numPTEs * sizeof(uint64_t), addressSpace, hint); auto currPTE = startPTE; auto physPage = physAddress & g_pageMask; while (currPTE <= endPTE) { auto pte = physPage | additionalBits; stream.writePTE(startAddress, pte, addressSpace); startAddress += sizeof(pte); physPage += 4096; currPTE++; } } return physAddress; } template void AubPageTableHelper32::createContext(typename Traits::Stream &stream, uint32_t context) { AubPpgttContextCreate cmd; memset(&cmd, 0, sizeof(cmd)); cmd.Header.Type = 0x7; cmd.Header.Opcode = 0x1; cmd.Header.SubOp = 0x14; cmd.Header.DwordLength = ((sizeof(cmd) - sizeof(cmd.Header)) / sizeof(uint32_t)) - 1; cmd.Handle = context; cmd.AdvancedContext = false; cmd.SixtyFourBit = 0; cmd.PageDirPointer[0] = BaseClass::getPDEAddress(0x000); cmd.PageDirPointer[1] = BaseClass::getPDEAddress(0x200); cmd.PageDirPointer[2] = BaseClass::getPDEAddress(0x400); cmd.PageDirPointer[3] = BaseClass::getPDEAddress(0x600); stream.createContext(cmd); } template void AubPageTableHelper64::createContext(typename Traits::Stream &stream, uint32_t context) { AubPpgttContextCreate cmd; memset(&cmd, 0, sizeof(cmd)); cmd.Header.Type = 0x7; cmd.Header.Opcode = 0x1; cmd.Header.SubOp = 0x14; cmd.Header.DwordLength = ((sizeof(cmd) - sizeof(cmd.Header)) / sizeof(uint32_t)) - 1; cmd.Handle = context; cmd.AdvancedContext = false; cmd.SixtyFourBit = 1; cmd.PageDirPointer[0] = getPML4Address(0); stream.createContext(cmd); } } // namespace AubMemDump compute-runtime-22.14.22890/shared/source/aub_mem_dump/aub_mem_dump_pvc_and_later.inl000066400000000000000000000325111422164147700305730ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/aub_mem_dump/aub_alloc_dump.inl" #include "shared/source/aub_mem_dump/aub_mem_dump.inl" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/array_count.h" #include "shared/source/helpers/completion_stamp.h" #include "shared/source/helpers/hw_helper.h" #include "aub_mapper.h" #include "config.h" #include "reg_configs_common.h" namespace AubMemDump { enum { device = deviceValue }; // Instantiate these common template implementations. template struct AubDump>; template struct AubDump>; template struct AubPageTableHelper32>; template struct AubPageTableHelper64>; } // namespace AubMemDump namespace NEO { static const AubMemDump::LrcaHelperRcs rcs(0x002000); static const AubMemDump::LrcaHelperBcs bcs(0x022000); static const AubMemDump::LrcaHelperVcs vcs(0x1c0000); static const AubMemDump::LrcaHelperVecs vecs(0x1c8000); static const AubMemDump::LrcaHelperCcs ccs(0x1a000); static const AubMemDump::LrcaHelperCcs ccs1(0x1c000); static const AubMemDump::LrcaHelperCcs ccs2(0x1e000); static const AubMemDump::LrcaHelperCcs ccs3(0x26000); static const AubMemDump::LrcaHelperCccs cccs(-1); static const AubMemDump::LrcaHelperLinkBcs linkBcs1(-1, 1); static const AubMemDump::LrcaHelperLinkBcs linkBcs2(-1, 2); static const AubMemDump::LrcaHelperLinkBcs linkBcs3(-1, 3); static const AubMemDump::LrcaHelperLinkBcs linkBcs4(-1, 4); static const AubMemDump::LrcaHelperLinkBcs linkBcs5(-1, 5); static const AubMemDump::LrcaHelperLinkBcs linkBcs6(-1, 6); static const AubMemDump::LrcaHelperLinkBcs linkBcs7(-1, 7); static const AubMemDump::LrcaHelperLinkBcs linkBcs8(-1, 8); const AubMemDump::LrcaHelper *const AUBFamilyMapper::csTraits[aub_stream::NUM_ENGINES] = { &rcs, &bcs, &vcs, &vecs, &ccs, &ccs1, &ccs2, &ccs3, &cccs, &linkBcs1, &linkBcs2, &linkBcs3, &linkBcs4, &linkBcs5, &linkBcs6, &linkBcs7, &linkBcs8}; const MMIOList AUBFamilyMapper::globalMMIO = { // GLOBAL_MOCS MMIOPair(0x00004000, 0x00000008), MMIOPair(0x00004004, 0x00000038), MMIOPair(0x00004008, 0x00000038), MMIOPair(0x0000400C, 0x00000008), MMIOPair(0x00004010, 0x00000018), MMIOPair(0x00004014, 0x00060038), MMIOPair(0x00004018, 0x00000000), MMIOPair(0x0000401C, 0x00000033), MMIOPair(0x00004020, 0x00060037), MMIOPair(0x00004024, 0x0000003B), MMIOPair(0x00004028, 0x00000032), MMIOPair(0x0000402C, 0x00000036), MMIOPair(0x00004030, 0x0000003A), MMIOPair(0x00004034, 0x00000033), MMIOPair(0x00004038, 0x00000037), MMIOPair(0x0000403C, 0x0000003B), MMIOPair(0x00004040, 0x00000030), MMIOPair(0x00004044, 0x00000034), MMIOPair(0x00004048, 0x00000038), MMIOPair(0x0000404C, 0x00000031), MMIOPair(0x00004050, 0x00000032), MMIOPair(0x00004054, 0x00000036), MMIOPair(0x00004058, 0x0000003A), MMIOPair(0x0000405C, 0x00000033), MMIOPair(0x00004060, 0x00000037), MMIOPair(0x00004064, 0x0000003B), MMIOPair(0x00004068, 0x00000032), MMIOPair(0x0000406C, 0x00000036), MMIOPair(0x00004070, 0x0000003A), MMIOPair(0x00004074, 0x00000033), MMIOPair(0x00004078, 0x00000037), MMIOPair(0x0000407C, 0x0000003B), MMIOPair(0x00004080, 0x00000030), MMIOPair(0x00004084, 0x00000034), MMIOPair(0x00004088, 0x00000038), MMIOPair(0x0000408C, 0x00000031), MMIOPair(0x00004090, 0x00000032), MMIOPair(0x00004094, 0x00000036), MMIOPair(0x00004098, 0x0000003A), MMIOPair(0x0000409C, 0x00000033), MMIOPair(0x000040A0, 0x00000037), MMIOPair(0x000040A4, 0x0000003B), MMIOPair(0x000040A8, 0x00000032), MMIOPair(0x000040AC, 0x00000036), MMIOPair(0x000040B0, 0x0000003A), MMIOPair(0x000040B4, 0x00000033), MMIOPair(0x000040B8, 0x00000037), MMIOPair(0x000040BC, 0x0000003B), MMIOPair(0x000040C0, 0x00000038), MMIOPair(0x000040C4, 0x00000034), MMIOPair(0x000040C8, 0x00000038), MMIOPair(0x000040CC, 0x00000031), MMIOPair(0x000040D0, 0x00000032), MMIOPair(0x000040D4, 0x00000036), MMIOPair(0x000040D8, 0x0000003A), MMIOPair(0x000040DC, 0x00000033), MMIOPair(0x000040E0, 0x00000037), MMIOPair(0x000040E4, 0x0000003B), MMIOPair(0x000040E8, 0x00000032), MMIOPair(0x000040EC, 0x00000036), MMIOPair(0x000040F0, 0x00000038), MMIOPair(0x000040F4, 0x00000038), MMIOPair(0x000040F8, 0x00000038), MMIOPair(0x000040FC, 0x00000038), // LNCF_MOCS MMIOPair(0x0000B020, 0x00300010), MMIOPair(0x0000B024, 0x00300010), MMIOPair(0x0000B028, 0x00300030), MMIOPair(0x0000B02C, 0x00000000), MMIOPair(0x0000B030, 0x0030001F), MMIOPair(0x0000B034, 0x00170013), MMIOPair(0x0000B038, 0x0000001F), MMIOPair(0x0000B03C, 0x00000000), MMIOPair(0x0000B040, 0x00100000), MMIOPair(0x0000B044, 0x00170013), MMIOPair(0x0000B048, 0x0010001F), MMIOPair(0x0000B04C, 0x00170013), MMIOPair(0x0000B050, 0x0030001F), MMIOPair(0x0000B054, 0x00170013), MMIOPair(0x0000B058, 0x0000001F), MMIOPair(0x0000B05C, 0x00000000), MMIOPair(0x0000B060, 0x00100000), MMIOPair(0x0000B064, 0x00170013), MMIOPair(0x0000B068, 0x0010001F), MMIOPair(0x0000B06C, 0x00170013), MMIOPair(0x0000B070, 0x0030001F), MMIOPair(0x0000B074, 0x00170013), MMIOPair(0x0000B078, 0x0000001F), MMIOPair(0x0000B07C, 0x00000000), MMIOPair(0x0000B080, 0x00300030), MMIOPair(0x0000B084, 0x00170013), MMIOPair(0x0000B088, 0x0010001F), MMIOPair(0x0000B08C, 0x00170013), MMIOPair(0x0000B090, 0x0030001F), MMIOPair(0x0000B094, 0x00170013), MMIOPair(0x0000B098, 0x00300010), MMIOPair(0x0000B09C, 0x00300010), //PAT_INDEX MMIOPair(0x00004100, 0x0000000), MMIOPair(0x00004104, 0x0000000), MMIOPair(0x00004108, 0x0000000), MMIOPair(0x0000410c, 0x0000000), MMIOPair(0x00004110, 0x0000000), MMIOPair(0x00004114, 0x0000000), MMIOPair(0x00004118, 0x0000000), MMIOPair(0x0000411c, 0x0000000), MMIOPair(0x00004b80, 0xffff1001), //GACB_PERF_CTRL_REG MMIOPair(0x00007000, 0xffff0000), //CACHE_MODE_0 MMIOPair(0x00007004, 0xffff0000), //CACHE_MODE_1 MMIOPair(0x000043F8, 0x00000000), //Gen12 (A-step) chicken bit for AuxT granularity MMIOPair(0x00009008, 0x00000200), //IDICR MMIOPair(0x0000900c, 0x00001b40), //SNPCR MMIOPair(0x0000b120, 0x14000002), //LTCDREG MMIOPair(0x00042080, 0x00000000), //CHICKEN_MISC_1 MMIOPair(0x000020D4, 0xFFFF0000), //Chicken bit for CSFE MMIOPair(0x0000B0A0, 0x00000000), //SCRATCH 2 for LNCF unit MMIOPair(0x000094D4, 0x00000000), //Slice unit Level Clock Gating Control // Capture Perf MMIO register programming MMIOPair(0x0000B004, 0x2FC0100B), //KM_ARBITER_CTRL_REG MMIOPair(0x0000B404, 0x00000160), //KM_GLOBAL_INVALIDATION_REG MMIOPair(0x00008708, 0x00000000), //KM_GEN12_IDI_CONTROL_REGISTER // Tiled Resources VA Translation Table L3 Pointer MMIOPair(0x00004410, 0xffffffff), //GEN12_TRTT_NULL_TILE_REG MMIOPair(0x00004414, 0xfffffffe), //GEN12_TRTT_INVD_TILE_REG MMIOPair(0x00004404, 0x000000ff), //GEN12_TRTT_VA_MASKDATA_REG MMIOPair(0x00004408, 0x00000000), //LDWORD GMM_GEN12_TRTT_L3_POINTER MMIOPair(0x0000440C, 0x00000000), //UDWORD GMM_GEN12_TRTT_L3_POINTER MMIOPair(0x00004400, 0x00000001), //GEN12_TRTT_TABLE_CONTROL MMIOPair(0x00004DFC, 0x00000000), //GEN9_TR_CHICKEN_BIT_VECTOR }; static const MMIOList mmioListRCS = { MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x00002058), 0x00000000), //CTX_WA_PTR_RCSUNIT MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000020a8), 0x00000000), //IMR MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x0000229c), 0xffff8280), //GFX_MODE MMIOPair(0x00002090, 0xffff0000), //CHICKEN_PWR_CTX_RASTER_1 MMIOPair(0x000020e0, 0xffff4000), //FF_SLICE_CS_CHICKEN1_RCSUNIT MMIOPair(0x000020e4, 0xffff0000), //FF_SLICE_CS_CHICKEN2_RCSUNIT MMIOPair(0x000020ec, 0xffff0051), //CS_DEBUG_MODE1 // FORCE_TO_NONPRIV MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024d0), 0x00007014), MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024d4), 0x0000e000), MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024d8), 0x0000e000), MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024dc), 0x0000e000), MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024e0), 0x0000e000), MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024e4), 0x0000e000), MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024e8), 0x0000e000), MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024ec), 0x0000e000), MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024f0), 0x0000e000), MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024f4), 0x0000e000), MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024f8), 0x0000e000), MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024fc), 0x0000e000), MMIOPair(0x00002580, 0xffff0005), //CS_CHICKEN1 MMIOPair(0x0000e194, 0xffff0002), //CHICKEN_SAMPLER_2 MMIOPair(0x0000B134, 0xA0000000) //L3ALLOCREG }; static const MMIOList mmioListBCS = { MMIOPair(AubMemDump::computeRegisterOffset(bcs.mmioBase, 0x0000229c), 0xffff8280), //GFX_MODE }; static const MMIOList mmioListVCS = { MMIOPair(AubMemDump::computeRegisterOffset(vcs.mmioBase, 0x0000229c), 0xffff8280), //GFX_MODE }; static const MMIOList mmioListVECS = { MMIOPair(AubMemDump::computeRegisterOffset(vecs.mmioBase, 0x0000229c), 0xffff8280), //GFX_MODE }; static MMIOList mmioListCCSInstance(uint32_t mmioBase) { MMIOList mmioList; mmioList.push_back(MMIOPair(0x0000ce90, 0x00030003)); //GFX_MULT_CTXT_CTL - enable multi-context with 4CCS mmioList.push_back(MMIOPair(0x0000b170, 0x00030003)); //MULT_CTXT_CTL - enable multi-context with 4CCS mmioList.push_back(MMIOPair(0x00014800, 0xFFFF0001)); //RCU_MODE mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x0000229c), 0xffff8280)); //GFX_MODE // FORCE_TO_NONPRIV mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024d0), 0x0000e000)); mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024d4), 0x0000e000)); mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024d8), 0x0000e000)); mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024dc), 0x0000e000)); mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024e0), 0x0000e000)); mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024e4), 0x0000e000)); mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024e8), 0x0000e000)); mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024ec), 0x0000e000)); mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024f0), 0x0000e000)); mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024f4), 0x0000e000)); mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024f8), 0x0000e000)); mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024fc), 0x0000e000)); mmioList.push_back(MMIOPair(0x0000B234, 0xA0000000)); //L3ALLOCREG_CCS0 return mmioList; }; static const MMIOList mmioListCCS = mmioListCCSInstance(ccs.mmioBase); static const MMIOList mmioListCCS1 = mmioListCCSInstance(ccs1.mmioBase); static const MMIOList mmioListCCS2 = mmioListCCSInstance(ccs2.mmioBase); static const MMIOList mmioListCCS3 = mmioListCCSInstance(ccs3.mmioBase); static const MMIOList mmioListCCCS = {}; static const MMIOList mmioListLinkBCS = {}; const MMIOList *AUBFamilyMapper::perEngineMMIO[aub_stream::NUM_ENGINES] = { &mmioListRCS, &mmioListBCS, &mmioListVCS, &mmioListVECS, &mmioListCCS, &mmioListCCS1, &mmioListCCS2, &mmioListCCS3, &mmioListCCCS, &mmioListLinkBCS, &mmioListLinkBCS, &mmioListLinkBCS, &mmioListLinkBCS, &mmioListLinkBCS, &mmioListLinkBCS, &mmioListLinkBCS, &mmioListLinkBCS}; } // namespace NEO namespace AubAllocDump { using namespace NEO; template SurfaceInfo *getDumpSurfaceInfo(GraphicsAllocation &gfxAllocation, DumpFormat dumpFormat); template uint32_t getImageSurfaceTypeFromGmmResourceType(GMM_RESOURCE_TYPE gmmResourceType); template void dumpBufferInBinFormat(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context); template void dumpImageInBmpFormat(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context); template void dumpBufferInTreFormat(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context); template void dumpImageInTreFormat(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context); template void dumpAllocation(DumpFormat dumpFormat, GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context); } // namespace AubAllocDump compute-runtime-22.14.22890/shared/source/aub_mem_dump/aub_mem_dump_xehp_and_later.inl000066400000000000000000000306341422164147700307530ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/aub_mem_dump/aub_alloc_dump.inl" #include "shared/source/aub_mem_dump/aub_mem_dump.inl" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/array_count.h" #include "shared/source/helpers/completion_stamp.h" #include "shared/source/helpers/hw_helper.h" #include "aub_mapper.h" #include "config.h" #include "reg_configs_common.h" namespace AubMemDump { enum { device = deviceValue }; // Instantiate these common template implementations. template struct AubDump>; template struct AubDump>; template struct AubPageTableHelper32>; template struct AubPageTableHelper64>; } // namespace AubMemDump namespace NEO { static const AubMemDump::LrcaHelperRcs rcs(0x002000); static const AubMemDump::LrcaHelperBcs bcs(0x022000); static const AubMemDump::LrcaHelperVcs vcs(0x1c0000); static const AubMemDump::LrcaHelperVecs vecs(0x1c8000); static const AubMemDump::LrcaHelperCcs ccs(0x1a000); static const AubMemDump::LrcaHelperCcs ccs1(0x1c000); static const AubMemDump::LrcaHelperCcs ccs2(0x1e000); static const AubMemDump::LrcaHelperCcs ccs3(0x26000); const AubMemDump::LrcaHelper *const AUBFamilyMapper::csTraits[aub_stream::NUM_ENGINES] = { &rcs, &bcs, &vcs, &vecs, &ccs, &ccs1, &ccs2, &ccs3}; const MMIOList AUBFamilyMapper::globalMMIO = { // GLOBAL_MOCS MMIOPair(0x00004000, 0x00000008), MMIOPair(0x00004004, 0x00000038), MMIOPair(0x00004008, 0x00000038), MMIOPair(0x0000400C, 0x00000008), MMIOPair(0x00004010, 0x00000018), MMIOPair(0x00004014, 0x00060038), MMIOPair(0x00004018, 0x00000000), MMIOPair(0x0000401C, 0x00000033), MMIOPair(0x00004020, 0x00060037), MMIOPair(0x00004024, 0x0000003B), MMIOPair(0x00004028, 0x00000032), MMIOPair(0x0000402C, 0x00000036), MMIOPair(0x00004030, 0x0000003A), MMIOPair(0x00004034, 0x00000033), MMIOPair(0x00004038, 0x00000037), MMIOPair(0x0000403C, 0x0000003B), MMIOPair(0x00004040, 0x00000030), MMIOPair(0x00004044, 0x00000034), MMIOPair(0x00004048, 0x00000038), MMIOPair(0x0000404C, 0x00000031), MMIOPair(0x00004050, 0x00000032), MMIOPair(0x00004054, 0x00000036), MMIOPair(0x00004058, 0x0000003A), MMIOPair(0x0000405C, 0x00000033), MMIOPair(0x00004060, 0x00000037), MMIOPair(0x00004064, 0x0000003B), MMIOPair(0x00004068, 0x00000032), MMIOPair(0x0000406C, 0x00000036), MMIOPair(0x00004070, 0x0000003A), MMIOPair(0x00004074, 0x00000033), MMIOPair(0x00004078, 0x00000037), MMIOPair(0x0000407C, 0x0000003B), MMIOPair(0x00004080, 0x00000030), MMIOPair(0x00004084, 0x00000034), MMIOPair(0x00004088, 0x00000038), MMIOPair(0x0000408C, 0x00000031), MMIOPair(0x00004090, 0x00000032), MMIOPair(0x00004094, 0x00000036), MMIOPair(0x00004098, 0x0000003A), MMIOPair(0x0000409C, 0x00000033), MMIOPair(0x000040A0, 0x00000037), MMIOPair(0x000040A4, 0x0000003B), MMIOPair(0x000040A8, 0x00000032), MMIOPair(0x000040AC, 0x00000036), MMIOPair(0x000040B0, 0x0000003A), MMIOPair(0x000040B4, 0x00000033), MMIOPair(0x000040B8, 0x00000037), MMIOPair(0x000040BC, 0x0000003B), MMIOPair(0x000040C0, 0x00000038), MMIOPair(0x000040C4, 0x00000034), MMIOPair(0x000040C8, 0x00000038), MMIOPair(0x000040CC, 0x00000031), MMIOPair(0x000040D0, 0x00000032), MMIOPair(0x000040D4, 0x00000036), MMIOPair(0x000040D8, 0x0000003A), MMIOPair(0x000040DC, 0x00000033), MMIOPair(0x000040E0, 0x00000037), MMIOPair(0x000040E4, 0x0000003B), MMIOPair(0x000040E8, 0x00000032), MMIOPair(0x000040EC, 0x00000036), MMIOPair(0x000040F0, 0x00000038), MMIOPair(0x000040F4, 0x00000038), MMIOPair(0x000040F8, 0x00000038), MMIOPair(0x000040FC, 0x00000038), // LNCF_MOCS MMIOPair(0x0000B020, 0x00300010), MMIOPair(0x0000B024, 0x00300010), MMIOPair(0x0000B028, 0x00300030), MMIOPair(0x0000B02C, 0x00000000), MMIOPair(0x0000B030, 0x0030001F), MMIOPair(0x0000B034, 0x00170013), MMIOPair(0x0000B038, 0x0000001F), MMIOPair(0x0000B03C, 0x00000000), MMIOPair(0x0000B040, 0x00100000), MMIOPair(0x0000B044, 0x00170013), MMIOPair(0x0000B048, 0x0010001F), MMIOPair(0x0000B04C, 0x00170013), MMIOPair(0x0000B050, 0x0030001F), MMIOPair(0x0000B054, 0x00170013), MMIOPair(0x0000B058, 0x0000001F), MMIOPair(0x0000B05C, 0x00000000), MMIOPair(0x0000B060, 0x00100000), MMIOPair(0x0000B064, 0x00170013), MMIOPair(0x0000B068, 0x0010001F), MMIOPair(0x0000B06C, 0x00170013), MMIOPair(0x0000B070, 0x0030001F), MMIOPair(0x0000B074, 0x00170013), MMIOPair(0x0000B078, 0x0000001F), MMIOPair(0x0000B07C, 0x00000000), MMIOPair(0x0000B080, 0x00300030), MMIOPair(0x0000B084, 0x00170013), MMIOPair(0x0000B088, 0x0010001F), MMIOPair(0x0000B08C, 0x00170013), MMIOPair(0x0000B090, 0x0030001F), MMIOPair(0x0000B094, 0x00170013), MMIOPair(0x0000B098, 0x00300010), MMIOPair(0x0000B09C, 0x00300010), //PAT_INDEX MMIOPair(0x00004100, 0x0000000), MMIOPair(0x00004104, 0x0000000), MMIOPair(0x00004108, 0x0000000), MMIOPair(0x0000410c, 0x0000000), MMIOPair(0x00004110, 0x0000000), MMIOPair(0x00004114, 0x0000000), MMIOPair(0x00004118, 0x0000000), MMIOPair(0x0000411c, 0x0000000), MMIOPair(0x00004b80, 0xffff1001), //GACB_PERF_CTRL_REG MMIOPair(0x00007000, 0xffff0000), //CACHE_MODE_0 MMIOPair(0x00007004, 0xffff0000), //CACHE_MODE_1 MMIOPair(0x000043F8, 0x00000000), //Gen12 (A-step) chicken bit for AuxT granularity MMIOPair(0x00009008, 0x00000200), //IDICR MMIOPair(0x0000900c, 0x00001b40), //SNPCR MMIOPair(0x0000b120, 0x14000002), //LTCDREG MMIOPair(0x00042080, 0x00000000), //CHICKEN_MISC_1 MMIOPair(0x000020D4, 0xFFFF0000), //Chicken bit for CSFE MMIOPair(0x0000B0A0, 0x00000000), //SCRATCH 2 for LNCF unit MMIOPair(0x000094D4, 0x00000000), //Slice unit Level Clock Gating Control // Capture Perf MMIO register programming MMIOPair(0x0000B004, 0x2FC0100B), //KM_ARBITER_CTRL_REG MMIOPair(0x0000B404, 0x00000160), //KM_GLOBAL_INVALIDATION_REG MMIOPair(0x00008708, 0x00000000), //KM_GEN12_IDI_CONTROL_REGISTER // Tiled Resources VA Translation Table L3 Pointer MMIOPair(0x00004410, 0xffffffff), //GEN12_TRTT_NULL_TILE_REG MMIOPair(0x00004414, 0xfffffffe), //GEN12_TRTT_INVD_TILE_REG MMIOPair(0x00004404, 0x000000ff), //GEN12_TRTT_VA_MASKDATA_REG MMIOPair(0x00004408, 0x00000000), //LDWORD GMM_GEN12_TRTT_L3_POINTER MMIOPair(0x0000440C, 0x00000000), //UDWORD GMM_GEN12_TRTT_L3_POINTER MMIOPair(0x00004400, 0x00000001), //GEN12_TRTT_TABLE_CONTROL MMIOPair(0x00004DFC, 0x00000000), //GEN9_TR_CHICKEN_BIT_VECTOR }; static const MMIOList mmioListRCS = { MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x00002058), 0x00000000), //CTX_WA_PTR_RCSUNIT MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000020a8), 0x00000000), //IMR MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x0000229c), 0xffff8280), //GFX_MODE MMIOPair(0x00002090, 0xffff0000), //CHICKEN_PWR_CTX_RASTER_1 MMIOPair(0x000020e0, 0xffff4000), //FF_SLICE_CS_CHICKEN1_RCSUNIT MMIOPair(0x000020e4, 0xffff0000), //FF_SLICE_CS_CHICKEN2_RCSUNIT MMIOPair(0x000020ec, 0xffff0051), //CS_DEBUG_MODE1 // FORCE_TO_NONPRIV MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024d0), 0x00007014), MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024d4), 0x0000e000), MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024d8), 0x0000e000), MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024dc), 0x0000e000), MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024e0), 0x0000e000), MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024e4), 0x0000e000), MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024e8), 0x0000e000), MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024ec), 0x0000e000), MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024f0), 0x0000e000), MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024f4), 0x0000e000), MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024f8), 0x0000e000), MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024fc), 0x0000e000), MMIOPair(0x00002580, 0xffff0005), //CS_CHICKEN1 MMIOPair(0x0000e194, 0xffff0002), //CHICKEN_SAMPLER_2 MMIOPair(0x0000B134, 0xA0000000) //L3ALLOCREG }; static const MMIOList mmioListBCS = { MMIOPair(AubMemDump::computeRegisterOffset(bcs.mmioBase, 0x0000229c), 0xffff8280), //GFX_MODE }; static const MMIOList mmioListVCS = { MMIOPair(AubMemDump::computeRegisterOffset(vcs.mmioBase, 0x0000229c), 0xffff8280), //GFX_MODE }; static const MMIOList mmioListVECS = { MMIOPair(AubMemDump::computeRegisterOffset(vecs.mmioBase, 0x0000229c), 0xffff8280), //GFX_MODE }; static MMIOList mmioListCCSInstance(uint32_t mmioBase) { MMIOList mmioList; mmioList.push_back(MMIOPair(0x0000ce90, 0x00030003)); //GFX_MULT_CTXT_CTL - enable multi-context with 4CCS mmioList.push_back(MMIOPair(0x0000b170, 0x00030003)); //MULT_CTXT_CTL - enable multi-context with 4CCS mmioList.push_back(MMIOPair(0x00014800, 0xFFFF0001)); //RCU_MODE mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x0000229c), 0xffff8280)); //GFX_MODE // FORCE_TO_NONPRIV mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024d0), 0x0000e000)); mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024d4), 0x0000e000)); mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024d8), 0x0000e000)); mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024dc), 0x0000e000)); mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024e0), 0x0000e000)); mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024e4), 0x0000e000)); mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024e8), 0x0000e000)); mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024ec), 0x0000e000)); mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024f0), 0x0000e000)); mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024f4), 0x0000e000)); mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024f8), 0x0000e000)); mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024fc), 0x0000e000)); mmioList.push_back(MMIOPair(0x0000B234, 0xA0000000)); //L3ALLOCREG_CCS0 return mmioList; }; static const MMIOList mmioListCCS = mmioListCCSInstance(ccs.mmioBase); static const MMIOList mmioListCCS1 = mmioListCCSInstance(ccs1.mmioBase); static const MMIOList mmioListCCS2 = mmioListCCSInstance(ccs2.mmioBase); static const MMIOList mmioListCCS3 = mmioListCCSInstance(ccs3.mmioBase); const MMIOList *AUBFamilyMapper::perEngineMMIO[aub_stream::NUM_ENGINES] = { &mmioListRCS, &mmioListBCS, &mmioListVCS, &mmioListVECS, &mmioListCCS, &mmioListCCS1, &mmioListCCS2, &mmioListCCS3}; } // namespace NEO namespace AubAllocDump { using namespace NEO; template SurfaceInfo *getDumpSurfaceInfo(GraphicsAllocation &gfxAllocation, DumpFormat dumpFormat); template uint32_t getImageSurfaceTypeFromGmmResourceType(GMM_RESOURCE_TYPE gmmResourceType); template void dumpBufferInBinFormat(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context); template void dumpImageInBmpFormat(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context); template void dumpBufferInTreFormat(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context); template void dumpImageInTreFormat(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context); template void dumpAllocation(DumpFormat dumpFormat, GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context); } // namespace AubAllocDump compute-runtime-22.14.22890/shared/source/aub_mem_dump/aub_stream_stubs.cpp000066400000000000000000000022601422164147700266200ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "third_party/aub_stream/headers/aub_manager.h" #include "third_party/aub_stream/headers/aubstream.h" namespace aub_stream_stubs { uint16_t tbxServerPort = 4321; std::string tbxServerIp = "127.0.0.1"; bool tbxFrontdoorMode = false; } // namespace aub_stream_stubs namespace aub_stream { AubManager *AubManager::create(uint32_t productFamily, uint32_t devicesCount, uint64_t memoryBankSizeInGB, uint32_t stepping, bool localMemorySupported, uint32_t streamMode, uint64_t gpuAddressSpace) { return nullptr; } extern "C" { void injectMMIOList(MMIOList mmioList){}; void setTbxServerPort(uint16_t port) { aub_stream_stubs::tbxServerPort = port; }; void setTbxServerIp(std::string server) { // better to avoid reassigning global variables which assume memory allocations since // we could step into false-positive memory leak detection with embedded leak check helper if (aub_stream_stubs::tbxServerIp != server) aub_stream_stubs::tbxServerIp = server; }; void setTbxFrontdoorMode(bool frontdoor) { aub_stream_stubs::tbxFrontdoorMode = frontdoor; } } } // namespace aub_stream compute-runtime-22.14.22890/shared/source/aub_mem_dump/context_flags.cpp000066400000000000000000000004221422164147700261140ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/aub_mem_dump//aub_mem_dump.h" namespace AubMemDump { void LrcaHelper::setContextSaveRestoreFlags(uint32_t &ctxSrCtlValue) const { } } // namespace AubMemDump compute-runtime-22.14.22890/shared/source/aub_mem_dump/definitions/000077500000000000000000000000001422164147700250655ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/aub_mem_dump/definitions/CMakeLists.txt000066400000000000000000000005771422164147700276360ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # get_property(NEO_CORE_AUB_MEM_DUMP GLOBAL PROPERTY NEO_CORE_AUB_MEM_DUMP) list(APPEND NEO_CORE_AUB_MEM_DUMP ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/aub_services.h ) set_property(GLOBAL PROPERTY NEO_CORE_AUB_MEM_DUMP ${NEO_CORE_AUB_MEM_DUMP}) add_subdirectories() compute-runtime-22.14.22890/shared/source/aub_mem_dump/definitions/aub_services.h000066400000000000000000001163231422164147700277160ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/aub_mem_dump/aub_header.h" #include #ifndef WIN32 #pragma pack(4) #else #pragma pack(push, 4) #endif struct CmdServicesMemTraceVersion { union { AubCmdHdr Header; struct { uint32_t dwordCount : 16; uint32_t instructionSubOpcode : 7; uint32_t instructionOpcode : 6; uint32_t instructionType : 3; }; }; uint32_t memtraceFileVersion; struct { uint32_t metal : 3; uint32_t stepping : 5; uint32_t device : 8; uint32_t csxSwizzling : 2; uint32_t recordingMethod : 2; uint32_t pch : 8; uint32_t captureTool : 4; }; uint32_t primaryVersion; uint32_t secondaryVersion; char commandLine[4]; int32_t getCommandLineLength() const { return getPacketSize() - (5); } int32_t getPacketSize() const { return dwordCount + 1; } int32_t getLengthBias() const { return 1; } uint32_t getBaseLength() const { return 4; } bool matchesHeader() const { if (instructionType != 0x7) return false; if (instructionOpcode != 0x2e) return false; if (instructionSubOpcode != 0xe) return false; return true; } void setHeader() { instructionType = 0x7; instructionOpcode = 0x2e; instructionSubOpcode = 0xe; } static uint32_t type() { return 0x7; } static uint32_t opcode() { return 0x2e; } static uint32_t subOpcode() { return 0xe; } struct CaptureToolValues { enum { GenKmdCapture = 1, Aubload = 0, Amber = 3, Ghal3DUlt = 2, AubDump = 4 }; }; struct DeviceValues { enum { Blc = 2, Il = 5, Glk = 17, Skl = 12, Hsw = 9, Bxt = 14, Sbr = 6, Cnl = 15, Ivb = 7, Chv = 13, El = 4, Ctg = 3, Lrb2 = 8, Bwr = 0, Vlv = 10, Cln = 1, Kbl = 16, Bdw = 11, Icllp = 19, Tgllp = 22, Cfl = 24, Lkf = 25, Ehl = 28, Dg1 = 30, Adls = 37, XeHP_SDV = 29, Adlp = 34, Dg2 = 36, Pvc = 39, }; }; struct RecordingMethodValues { enum { Phy = 1, Gfx = 0 }; }; struct CsxSwizzlingValues { enum { Disabled = 0, Enabled = 1 }; }; struct PchValues { enum { LynxPoint = 4, CougarPoint = 2, PantherPoint = 3, Default = 0, IbexPeak = 1 }; }; struct SteppingValues { enum { N = 13, O = 14, L = 11, M = 12, B = 1, C = 2, A = 0, F = 5, G = 6, D = 3, E = 4, Z = 25, X = 23, Y = 24, R = 17, S = 18, P = 15, Q = 16, V = 21, W = 22, T = 19, U = 20, J = 9, K = 10, H = 7, I = 8 }; }; }; struct CmdServicesMemTraceRegisterCompare { union { AubCmdHdr Header; struct { uint32_t dwordCount : 16; uint32_t instructionSubOpcode : 7; uint32_t instructionOpcode : 6; uint32_t instructionType : 3; }; }; uint32_t registerOffset; struct { uint32_t noReadExpect : 1; uint32_t : 15; uint32_t registerSize : 4; uint32_t : 8; uint32_t registerSpace : 4; }; uint32_t readMaskLow; uint32_t readMaskHigh; uint32_t data[1]; int32_t getDataLength() const { return getPacketSize() - (5); } int32_t getPacketSize() const { return dwordCount + 1; } int32_t getLengthBias() const { return 1; } uint32_t getBaseLength() const { return 4; } bool matchesHeader() const { if (instructionType != 0x7) return false; if (instructionOpcode != 0x2e) return false; if (instructionSubOpcode != 0x1) return false; return true; } void setHeader() { instructionType = 0x7; instructionOpcode = 0x2e; instructionSubOpcode = 0x1; } static uint32_t type() { return 0x7; } static uint32_t opcode() { return 0x2e; } static uint32_t subOpcode() { return 0x1; } struct RegisterSpaceValues { enum { MchBar = 1, Mmio = 0, VtdBar = 5, PciConfig = 2, IO = 4, AzaliaBar = 3 }; }; struct RegisterSizeValues { enum { Qword = 3, Dword = 2, Word = 1, Byte = 0 }; }; struct NoReadExpectValues { enum { ReadExpect = 0, ReadWithoutExpect = 1 }; }; }; struct CmdServicesMemTraceRegisterPoll { union { AubCmdHdr Header; struct { uint32_t dwordCount : 16; uint32_t instructionSubOpcode : 7; uint32_t instructionOpcode : 6; uint32_t instructionType : 3; }; }; uint32_t registerOffset; struct { uint32_t : 1; uint32_t timeoutAction : 1; uint32_t pollNotEqual : 1; uint32_t : 1; uint32_t operationType : 4; uint32_t : 8; uint32_t registerSize : 4; uint32_t : 8; uint32_t registerSpace : 4; }; uint32_t pollMaskLow; uint32_t pollMaskHigh; uint32_t data[1]; int32_t getDataLength() const { return getPacketSize() - (5); } int32_t getPacketSize() const { return dwordCount + 1; } int32_t getLengthBias() const { return 1; } uint32_t getBaseLength() const { return 4; } bool matchesHeader() const { if (instructionType != 0x7) return false; if (instructionOpcode != 0x2e) return false; if (instructionSubOpcode != 0x2) return false; return true; } void setHeader() { instructionType = 0x7; instructionOpcode = 0x2e; instructionSubOpcode = 0x2; } static uint32_t type() { return 0x7; } static uint32_t opcode() { return 0x2e; } static uint32_t subOpcode() { return 0x2; } struct OperationTypeValues { enum { Normal = 0, InterlacedCrc = 1 }; }; struct RegisterSpaceValues { enum { MchBar = 1, Mmio = 0, VtdBar = 5, PciConfig = 2, IO = 4, AzaliaBar = 3 }; }; struct TimeoutActionValues { enum { Abort = 0, Ignore = 1 }; }; struct RegisterSizeValues { enum { Qword = 3, Dword = 2, Word = 1, Byte = 0 }; }; }; struct CmdServicesMemTraceRegisterWrite { union { AubCmdHdr Header; struct { uint32_t dwordCount : 16; uint32_t instructionSubOpcode : 7; uint32_t instructionOpcode : 6; uint32_t instructionType : 3; }; }; uint32_t registerOffset; struct { uint32_t : 4; uint32_t messageSourceId : 4; uint32_t : 8; uint32_t registerSize : 4; uint32_t : 8; uint32_t registerSpace : 4; }; uint32_t writeMaskLow; uint32_t writeMaskHigh; uint32_t data[1]; int32_t getDataLength() const { return getPacketSize() - (5); } int32_t getPacketSize() const { return dwordCount + 1; } int32_t getLengthBias() const { return 1; } uint32_t getBaseLength() const { return 4; } bool matchesHeader() const { if (instructionType != 0x7) return false; if (instructionOpcode != 0x2e) return false; if (instructionSubOpcode != 0x3) return false; return true; } void setHeader() { instructionType = 0x7; instructionOpcode = 0x2e; instructionSubOpcode = 0x3; } static uint32_t type() { return 0x7; } static uint32_t opcode() { return 0x2e; } static uint32_t subOpcode() { return 0x3; } struct MessageSourceIdValues { enum { Workaround = 4, Gt = 2, Ia = 0, Me = 1, Pch = 3 }; }; struct RegisterSpaceValues { enum { MchBar = 1, Mmio = 0, VtdBar = 5, PciConfig = 2, IO = 4, AzaliaBar = 3 }; }; struct RegisterSizeValues { enum { Qword = 3, Dword = 2, Word = 1, Byte = 0 }; }; }; struct CmdServicesMemTraceMemoryCompare { union { AubCmdHdr Header; struct { uint32_t dwordCount : 16; uint32_t instructionSubOpcode : 7; uint32_t instructionOpcode : 6; uint32_t instructionType : 3; }; }; uint32_t address; uint32_t addressHigh; struct { uint32_t noReadExpect : 1; uint32_t repeatMemory : 1; uint32_t tiling : 2; uint32_t : 2; uint32_t crcCompare : 1; uint32_t compareOperation : 1; uint32_t : 12; uint32_t dataTypeHint : 8; uint32_t addressSpace : 4; }; uint32_t dataSizeInBytes; uint32_t data[1]; int32_t getDataLength() const { return getPacketSize() - (5); } int32_t getPacketSize() const { return dwordCount + 1; } int32_t getLengthBias() const { return 1; } uint32_t getBaseLength() const { return 4; } bool matchesHeader() const { if (instructionType != 0x7) return false; if (instructionOpcode != 0x2e) return false; if (instructionSubOpcode != 0x4) return false; return true; } void setHeader() { instructionType = 0x7; instructionOpcode = 0x2e; instructionSubOpcode = 0x4; } static uint32_t type() { return 0x7; } static uint32_t opcode() { return 0x2e; } static uint32_t subOpcode() { return 0x4; } struct RepeatMemoryValues { enum { NoRepeat = 0, Repeat = 1 }; }; struct DataTypeHintValues { enum { TraceInterfaceDescriptor = 29, TraceCommandBufferPrimary = 39, TraceRemap = 37, TraceVertexShaderState = 16, TraceSfViewport = 23, TraceMediaObjectIndirectData = 36, Trace1DMap = 10, TraceVolumeMap = 9, TraceVldState = 30, TraceBatchBufferPrimary = 42, TraceSamplerDefaultColor = 28, TraceClipViewport = 22, TraceStripsFansState = 19, TraceNotype = 0, TraceAudioLinkTable = 46, TraceGeometryShaderState = 17, TraceConstantBuffer = 11, TraceBatchBufferBlt = 43, TraceBinBuffer = 2, TraceIndexBuffer = 13, Trace2DMap = 6, TraceCubeMap = 7, TraceVfeState = 31, TraceDepthStencilState = 33, TraceBatchBufferMfx = 44, TraceRenderSurfaceState = 35, TraceWindowerIzState = 20, TraceCommandBufferMfx = 41, TraceBatchBuffer = 1, TraceCcViewport = 24, TraceColorCalcState = 21, TraceCommandBuffer = 38, TraceAudioData = 47, TraceSlowStateBuffer = 4, TraceAudioCommandBuffer = 45, TraceCommandBufferBlt = 40, TraceKernelInstructions = 26, TraceConstantUrbEntry = 12, TraceBlendState = 32, TraceIndirectStateBuffer = 8, TraceClipperState = 18, TraceSamplerState = 25, TraceBindingTableState = 34, TraceBinPointerList = 3, TraceVertexBufferState = 5, TraceScratchSpace = 27 }; }; struct TilingValues { enum { NoTiling = 0, WTiling = 3, YTiling = 2, XTiling = 1 }; }; struct CrcCompareValues { enum { Crc = 1, NoCrc = 0 }; }; struct NoReadExpectValues { enum { ReadExpect = 0, ReadWithoutExpect = 1 }; }; struct AddressSpaceValues { enum { TraceGttEntry = 4, TraceNonapetureGttGfx = 7, TraceLocal = 1, TracePml4Entry = 10, TraceGttGfx = 0, TraceNonlocal = 2, TraceGttPdEntry = 3, TracePpgttEntry = 6, TracePpgttGfx = 5, TracePpgttPdEntry = 9, TracePhysicalPdpEntry = 8 }; }; struct CompareOperationValues { enum { CompareNotEqual = 1, CompareEqual = 0 }; }; }; struct CmdServicesMemTraceMemoryPoll { union { AubCmdHdr Header; struct { uint32_t dwordCount : 16; uint32_t instructionSubOpcode : 7; uint32_t instructionOpcode : 6; uint32_t instructionType : 3; }; }; uint32_t address; uint32_t addressHigh; struct { uint32_t pollNotEqual : 1; uint32_t : 1; uint32_t tiling : 2; uint32_t dataSize : 2; uint32_t : 2; uint32_t timeoutAction : 1; uint32_t : 11; uint32_t dataTypeHint : 8; uint32_t addressSpace : 4; }; uint32_t pollMaskLow; uint32_t pollMaskHigh; uint32_t data[1]; int32_t getDataLength() const { return getPacketSize() - (6); } int32_t getPacketSize() const { return dwordCount + 1; } int32_t getLengthBias() const { return 1; } uint32_t getBaseLength() const { return 5; } bool matchesHeader() const { if (instructionType != 0x7) return false; if (instructionOpcode != 0x2e) return false; if (instructionSubOpcode != 0x5) return false; return true; } void setHeader() { instructionType = 0x7; instructionOpcode = 0x2e; instructionSubOpcode = 0x5; } static uint32_t type() { return 0x7; } static uint32_t opcode() { return 0x2e; } static uint32_t subOpcode() { return 0x5; } struct DataTypeHintValues { enum { TraceInterfaceDescriptor = 29, TraceCommandBufferPrimary = 39, TraceRemap = 37, TraceVertexShaderState = 16, TraceSfViewport = 23, TraceMediaObjectIndirectData = 36, Trace1DMap = 10, TraceVolumeMap = 9, TraceVldState = 30, TraceBatchBufferPrimary = 42, TraceSamplerDefaultColor = 28, TraceClipViewport = 22, TraceStripsFansState = 19, TraceNotype = 0, TraceAudioLinkTable = 46, TraceGeometryShaderState = 17, TraceConstantBuffer = 11, TraceBatchBufferBlt = 43, TraceBinBuffer = 2, TraceIndexBuffer = 13, Trace2DMap = 6, TraceCubeMap = 7, TraceVfeState = 31, TraceDepthStencilState = 33, TraceBatchBufferMfx = 44, TraceRenderSurfaceState = 35, TraceWindowerIzState = 20, TraceCommandBufferMfx = 41, TraceBatchBuffer = 1, TraceCcViewport = 24, TraceColorCalcState = 21, TraceCommandBuffer = 38, TraceAudioData = 47, TraceSlowStateBuffer = 4, TraceAudioCommandBuffer = 45, TraceCommandBufferBlt = 40, TraceKernelInstructions = 26, TraceConstantUrbEntry = 12, TraceBlendState = 32, TraceIndirectStateBuffer = 8, TraceClipperState = 18, TraceSamplerState = 25, TraceBindingTableState = 34, TraceBinPointerList = 3, TraceVertexBufferState = 5, TraceScratchSpace = 27 }; }; struct DataSizeValues { enum { Qword = 3, Dword = 2, Word = 1, Byte = 0 }; }; struct TilingValues { enum { NoTiling = 0, WTiling = 3, YTiling = 2, XTiling = 1 }; }; struct TimeoutActionValues { enum { Abort = 0, Ignore = 1 }; }; struct AddressSpaceValues { enum { TraceGttEntry = 4, TraceNonapetureGttGfx = 7, TraceLocal = 1, TracePml4Entry = 10, TraceGttGfx = 0, TraceNonlocal = 2, TraceGttPdEntry = 3, TracePpgttEntry = 6, TracePpgttGfx = 5, TracePpgttPdEntry = 9, TracePhysicalPdpEntry = 8 }; }; }; struct CmdServicesMemTraceMemoryWrite { union { AubCmdHdr Header; struct { uint32_t dwordCount : 16; uint32_t instructionSubOpcode : 7; uint32_t instructionOpcode : 6; uint32_t instructionType : 3; }; }; uint64_t address; int32_t getAddressLength() const { return 2 - (1) + 1; } struct { uint32_t frontDoorAccess : 1; uint32_t repeatMemory : 1; uint32_t tiling : 2; uint32_t : 16; uint32_t dataTypeHint : 8; uint32_t addressSpace : 4; }; uint32_t dataSizeInBytes; uint32_t data[1]; int32_t getDataLength() const { return getPacketSize() - (5); } int32_t getPacketSize() const { return dwordCount + 1; } int32_t getLengthBias() const { return 1; } uint32_t getBaseLength() const { return 4; } bool matchesHeader() const { if (instructionType != 0x7) return false; if (instructionOpcode != 0x2e) return false; if (instructionSubOpcode != 0x6) return false; return true; } void setHeader() { instructionType = 0x7; instructionOpcode = 0x2e; instructionSubOpcode = 0x6; } static uint32_t type() { return 0x7; } static uint32_t opcode() { return 0x2e; } static uint32_t subOpcode() { return 0x6; } struct RepeatMemoryValues { enum { NoRepeat = 0, Repeat = 1 }; }; struct DataTypeHintValues { enum { TraceVertexBufferState = 5, TraceCommandBufferPrimary = 39, TraceVertexShaderState = 16, TraceExtendedRootTableEntry = 52, TraceClipViewport = 22, Trace1DMap = 10, TraceBatchBufferPrimary = 42, TraceClipperState = 18, TraceLogicalRingContextVecs = 51, TraceRingContextVcs = 57, TraceLri = 59, TraceBlendState = 32, TraceBinBuffer = 2, TraceSlowStateBuffer = 4, TraceRemap = 37, TraceDepthStencilState = 33, TraceAudioData = 47, TraceDummyGgttEntry = 62, TraceWindowerIzState = 20, Trace2DMap = 6, TraceBindingTableState = 34, TraceGucProcessDescriptor = 60, TraceIndirectStateBuffer = 8, TraceConstantBuffer = 11, TraceMediaObjectIndirectData = 36, TraceStripsFansState = 19, TraceBatchBuffer = 1, TraceLogicalRingContextVcs = 50, TraceSfViewport = 23, TraceCommandBufferBlt = 40, TraceRingContextBcs = 56, TraceCcViewport = 24, TraceLogicalRingContextCcs = 64, TraceIndexBuffer = 13, TraceScratchSpace = 27, TraceGucContextDescriptor = 61, TraceBatchBufferMfx = 44, TraceCommandBufferMfx = 41, TraceBatchBufferBlt = 43, TraceSamplerState = 25, TraceRingContextRcs = 55, TraceAudioLinkTable = 46, TraceRenderSurfaceState = 35, TraceSamplerDefaultColor = 28, TraceVldState = 30, TraceVfeState = 31, TraceExtendedContextTableEntry = 53, TraceLogicalRingContextRcs = 48, TraceInterfaceDescriptor = 29, TraceConstantUrbEntry = 12, TraceCommandBuffer = 38, TracePasidTableEntry = 54, TraceBinPointerList = 3, TraceRingContextVecs = 58, TraceNotype = 0, TraceGeometryShaderState = 17, TraceAudioCommandBuffer = 45, TraceColorCalcState = 21, TraceKernelInstructions = 26, TraceVolumeMap = 9, TraceCubeMap = 7, TraceLogicalRingContextBcs = 49, TracePpgttLevel1 = 65, TracePpgttLevel2 = 66, TracePpgttLevel3 = 67, TracePpgttLevel4 = 68 }; }; struct TilingValues { enum { NoTiling = 0, WTiling = 3, YTiling = 2, XTiling = 1 }; }; struct AddressSpaceValues { enum { TraceGttEntry = 4, TraceNonapetureGttGfx = 7, TraceLocal = 1, TracePml4Entry = 10, TraceGttGfx = 0, TraceNonlocal = 2, TraceGttPdEntry = 3, TracePpgttEntry = 6, TracePpgttGfx = 5, TracePpgttPdEntry = 9, TracePowerContext = 11, TracePhysicalPdpEntry = 8 }; }; }; struct CmdServicesMemTraceMemoryWriteDiscontiguous { union { AubCmdHdr Header; struct { uint32_t dwordCount : 16; uint32_t instructionSubOpcode : 7; uint32_t instructionOpcode : 6; uint32_t instructionType : 3; }; }; struct { uint32_t frontDoorAccess : 1; uint32_t repeatMemory : 1; uint32_t tiling : 2; uint32_t numberOfAddressDataPairs : 16; uint32_t dataTypeHint : 8; uint32_t addressSpace : 4; }; struct { uint64_t address; uint32_t dataSizeInBytes; } Dword_2_To_190[63]; int32_t getDword2To190Length() const { return 190 - (2) + 1; } uint32_t data[1]; int32_t getDataLength() const { return getPacketSize() - (191); } int32_t getPacketSize() const { return dwordCount + 1; } int32_t getLengthBias() const { return 1; } uint32_t getBaseLength() const { return 190; } bool matchesHeader() const { if (instructionType != 0x7) return false; if (instructionOpcode != 0x2e) return false; if (instructionSubOpcode != 0xb) return false; return true; } void setHeader() { instructionType = 0x7; instructionOpcode = 0x2e; instructionSubOpcode = 0xb; } static uint32_t type() { return 0x7; } static uint32_t opcode() { return 0x2e; } static uint32_t subOpcode() { return 0xb; } struct RepeatMemoryValues { enum { NoRepeat = 0, Repeat = 1 }; }; struct DataTypeHintValues { enum { TraceVertexBufferState = 5, TraceCommandBufferPrimary = 39, TraceRingContextBcs = 56, TraceExtendedRootTableEntry = 52, TraceClipViewport = 22, Trace1DMap = 10, TraceBatchBufferPrimary = 42, TraceClipperState = 18, TraceRingContextVcs = 57, TraceVolumeMap = 9, TraceBlendState = 32, TraceSlowStateBuffer = 4, TraceRemap = 37, TraceDepthStencilState = 33, TraceAudioData = 47, TraceColorCalcState = 21, TraceWindowerIzState = 20, Trace2DMap = 6, TraceBindingTableState = 34, TraceIndirectStateBuffer = 8, TraceConstantBuffer = 11, TraceMediaObjectIndirectData = 36, TraceStripsFansState = 19, TraceBatchBuffer = 1, TraceSfViewport = 23, TraceCommandBufferBlt = 40, TraceBinBuffer = 2, TraceCcViewport = 24, TraceIndexBuffer = 13, TraceScratchSpace = 27, TraceLogicalRingContextVecs = 51, TraceBatchBufferMfx = 44, TraceCommandBufferMfx = 41, TraceBatchBufferBlt = 43, TraceSamplerState = 25, TraceRingContextRcs = 55, TraceAudioLinkTable = 46, TraceRenderSurfaceState = 35, TraceSamplerDefaultColor = 28, TraceVldState = 30, TraceVfeState = 31, TraceExtendedContextTableEntry = 53, TraceLogicalRingContextRcs = 48, TraceInterfaceDescriptor = 29, TraceConstantUrbEntry = 12, TraceCommandBuffer = 38, TraceVertexShaderState = 16, TraceBinPointerList = 3, TraceRingContextVecs = 58, TraceNotype = 0, TraceGeometryShaderState = 17, TraceAudioCommandBuffer = 45, TraceLogicalRingContextVcs = 50, TraceKernelInstructions = 26, TracePasidTableEntry = 54, TraceCubeMap = 7, TraceLogicalRingContextBcs = 49 }; }; struct TilingValues { enum { NoTiling = 0, WTiling = 3, YTiling = 2, XTiling = 1 }; }; struct AddressSpaceValues { enum { TraceGttEntry = 4, TraceNonapetureGttGfx = 7, TraceLocal = 1, TracePml4Entry = 10, TraceGttGfx = 0, TraceNonlocal = 2, TraceGttPdEntry = 3, TracePpgttEntry = 6, TracePpgttGfx = 5, TracePpgttPdEntry = 9, TracePowerContext = 11, TracePhysicalPdpEntry = 8 }; }; }; struct CmdServicesMemTraceFrameBegin { union { AubCmdHdr Header; struct { uint32_t dwordCount : 16; uint32_t instructionSubOpcode : 7; uint32_t instructionOpcode : 6; uint32_t instructionType : 3; }; }; struct { uint32_t frameNumber : 16; uint32_t : 16; }; int32_t getPacketSize() const { return dwordCount + 1; } int32_t getLengthBias() const { return 1; } uint32_t getBaseLength() const { return 1; } bool matchesHeader() const { if (instructionType != 0x7) return false; if (instructionOpcode != 0x2e) return false; if (instructionSubOpcode != 0x7) return false; return true; } void setHeader() { instructionType = 0x7; instructionOpcode = 0x2e; instructionSubOpcode = 0x7; } static uint32_t type() { return 0x7; } static uint32_t opcode() { return 0x2e; } static uint32_t subOpcode() { return 0x7; } }; struct CmdServicesMemTraceComment { union { AubCmdHdr Header; struct { uint32_t dwordCount : 16; uint32_t instructionSubOpcode : 7; uint32_t instructionOpcode : 6; uint32_t instructionType : 3; }; }; struct { uint32_t syncOnComment : 1; uint32_t syncOnSimulatorDisplay : 1; uint32_t : 30; }; char comment[4]; int32_t getCommentLength() const { return getPacketSize() - (2); } int32_t getPacketSize() const { return dwordCount + 1; } int32_t getLengthBias() const { return 1; } uint32_t getBaseLength() const { return 1; } bool matchesHeader() const { if (instructionType != 0x7) return false; if (instructionOpcode != 0x2e) return false; if (instructionSubOpcode != 0x8) return false; return true; } void setHeader() { instructionType = 0x7; instructionOpcode = 0x2e; instructionSubOpcode = 0x8; } static uint32_t type() { return 0x7; } static uint32_t opcode() { return 0x2e; } static uint32_t subOpcode() { return 0x8; } }; struct CmdServicesMemTraceDelay { union { AubCmdHdr Header; struct { uint32_t dwordCount : 16; uint32_t instructionSubOpcode : 7; uint32_t instructionOpcode : 6; uint32_t instructionType : 3; }; }; uint32_t time; int32_t getPacketSize() const { return dwordCount + 1; } int32_t getLengthBias() const { return 1; } uint32_t getBaseLength() const { return 1; } bool matchesHeader() const { if (instructionType != 0x7) return false; if (instructionOpcode != 0x2e) return false; if (instructionSubOpcode != 0x9) return false; return true; } void setHeader() { instructionType = 0x7; instructionOpcode = 0x2e; instructionSubOpcode = 0x9; } static uint32_t type() { return 0x7; } static uint32_t opcode() { return 0x2e; } static uint32_t subOpcode() { return 0x9; } }; struct CmdServicesMemTraceMemoryDump { union { AubCmdHdr Header; struct { uint32_t dwordCount : 16; uint32_t instructionSubOpcode : 7; uint32_t instructionOpcode : 6; uint32_t instructionType : 3; }; }; uint32_t physicalAddressDwordLow; uint32_t physicalAddressDwordHigh; uint32_t stride; uint32_t width; uint32_t height; struct { uint32_t addressSpace : 2; uint32_t : 2; uint32_t tiling : 2; uint32_t : 26; }; char filename[4]; int32_t getFilenameLength() const { return getPacketSize() - (7); } int32_t getPacketSize() const { return dwordCount + 1; } int32_t getLengthBias() const { return 1; } uint32_t getBaseLength() const { return 5; } bool matchesHeader() const { if (instructionType != 0x7) return false; if (instructionOpcode != 0x2e) return false; if (instructionSubOpcode != 0xa) return false; return true; } void setHeader() { instructionType = 0x7; instructionOpcode = 0x2e; instructionSubOpcode = 0xa; } static uint32_t type() { return 0x7; } static uint32_t opcode() { return 0x2e; } static uint32_t subOpcode() { return 0xa; } struct TilingValues { enum { NoTiling = 0, WTiling = 3, YTiling = 2, XTiling = 1 }; }; struct AddressSpaceValues { enum { TraceGttGfx = 0, TraceLocal = 1 }; }; }; struct CmdServicesMemTraceTestPhaseMarker { union { AubCmdHdr Header; struct { uint32_t dwordCount : 16; uint32_t instructionSubOpcode : 7; uint32_t instructionOpcode : 6; uint32_t instructionType : 3; }; }; struct { uint32_t toolSpecificSubPhase : 12; uint32_t beginTestPhase : 4; uint32_t : 16; }; int32_t getPacketSize() const { return dwordCount + 1; } int32_t getLengthBias() const { return 1; } uint32_t getBaseLength() const { return 1; } bool matchesHeader() const { if (instructionType != 0x7) return false; if (instructionOpcode != 0x2e) return false; if (instructionSubOpcode != 0xc) return false; return true; } void setHeader() { instructionType = 0x7; instructionOpcode = 0x2e; instructionSubOpcode = 0xc; } static uint32_t type() { return 0x7; } static uint32_t opcode() { return 0x2e; } static uint32_t subOpcode() { return 0xc; } struct BeginTestPhaseValues { enum { PollForTestCompletion = 8, SetupPhase = 2, DispatchPhase = 3, VerificationPhase = 10, MemoryInitializationPhase = 0, ExecutePhase = 4 }; }; }; struct CmdServicesMemTraceMemoryContinuousRegion { union { AubCmdHdr Header; struct { uint32_t dwordCount : 16; uint32_t instructionSubOpcode : 7; uint32_t instructionOpcode : 6; uint32_t instructionType : 3; }; }; uint64_t address; int32_t getAddressLength() const { return 2 - (1) + 1; } uint64_t regionSize; int32_t getRegionSizeLength() const { return 4 - (3) + 1; } int32_t getPacketSize() const { return dwordCount + 1; } int32_t getLengthBias() const { return 1; } uint32_t getBaseLength() const { return 4; } bool matchesHeader() const { if (instructionType != 0x7) return false; if (instructionOpcode != 0x2e) return false; if (instructionSubOpcode != 0xd) return false; return true; } void setHeader() { instructionType = 0x7; instructionOpcode = 0x2e; instructionSubOpcode = 0xd; } static uint32_t type() { return 0x7; } static uint32_t opcode() { return 0x2e; } static uint32_t subOpcode() { return 0xd; } }; struct CmdServicesMemTracePredicate { union { AubCmdHdr Header; struct { uint32_t dwordCount : 16; uint32_t instructionSubOpcode : 7; uint32_t instructionOpcode : 6; uint32_t instructionType : 3; }; }; struct { uint32_t predicateState : 1; uint32_t target : 4; uint32_t : 27; }; int32_t getPacketSize() const { return dwordCount + 1; } int32_t getLengthBias() const { return 1; } uint32_t getBaseLength() const { return 4; } bool matchesHeader() const { if (instructionType != 0x7) return false; if (instructionOpcode != 0x2e) return false; if (instructionSubOpcode != 0xf) return false; return true; } void setHeader() { instructionType = 0x7; instructionOpcode = 0x2e; instructionSubOpcode = 0xf; } static uint32_t type() { return 0x7; } static uint32_t opcode() { return 0x2e; } static uint32_t subOpcode() { return 0xf; } struct PredicateStateValues { enum { Disabled = 0, Enabled = 1 }; }; struct TargetValues { enum { Fpgarunlist = 8, Simulator = 0, Pipe = 1, Silicon = 4, Uncore = 6, Emulator = 3, Pipe2D = 7, Fpgamedia = 5, Pipegt = 2 }; }; }; struct CmdServicesMemTraceDumpCompress { union { AubCmdHdr Header; struct { uint32_t dwordCount : 16; uint32_t instructionSubOpcode : 7; uint32_t instructionOpcode : 6; uint32_t instructionType : 3; }; }; uint64_t surfaceAddress; uint64_t getSurfaceAddress() const { return getMisalignedUint64(&this->surfaceAddress); } void setSurfaceAddress(const uint64_t surfaceAddress) { setMisalignedUint64(&this->surfaceAddress, surfaceAddress); } int getSurfaceAddressLength() const { return 2 - (1) + 1; } uint32_t surfaceWidth; uint32_t surfaceHeight; uint32_t surfacePitch; struct { uint32_t surfaceFormat : 12; uint32_t dumpType : 3; uint32_t : 1; uint32_t surfaceTilingType : 3; uint32_t : 3; uint32_t surfaceType : 3; uint32_t : 3; uint32_t tiledResourceMode : 2; uint32_t : 1; uint32_t useClearValue : 1; }; uint64_t auxSurfaceAddress; int getAuxSurfaceAddressLength() const { return 8 - (7) + 1; } uint32_t auxSurfaceWidth; uint32_t auxSurfaceHeight; uint32_t auxSurfacePitch; struct { uint32_t auxSurfaceQPitch : 17; uint32_t : 4; uint32_t auxSurfaceTilingType : 3; uint32_t : 8; }; struct { uint32_t blockWidth : 8; uint32_t blockHeight : 8; uint32_t blockDepth : 8; uint32_t mode : 1; uint32_t algorithm : 3; uint32_t : 4; }; uint32_t tileWidth; uint32_t tileHeight; uint32_t tileDepth; uint32_t clearColorRed; uint32_t clearColorGreen; uint32_t clearColorBlue; uint32_t clearColorAlpha; struct { uint32_t gttType : 2; uint32_t clearColorType : 1; uint32_t : 29; }; uint32_t directoryHandle; uint64_t clearColorAddress; int getClearColorAddressLength() const { return 24 - (23) + 1; } int32_t getPacketSize() const { return dwordCount + 1; } int32_t getLengthBias() const { return 1; } uint32_t getBaseLength() const { return 19; } bool matchesHeader() const { if (instructionType != 0x7) return false; if (instructionOpcode != 0x2e) return false; if (instructionSubOpcode != 0x10) return false; return true; } void setHeader() { instructionType = 0x7; instructionOpcode = 0x2e; instructionSubOpcode = 0x10; } static uint32_t type() { return 0x7; } static uint32_t opcode() { return 0x2e; } static uint32_t subOpcode() { return 0x10; } struct GttTypeValues { enum { Ppgtt = 1, Ggtt = 0 }; }; struct SurfaceTilingTypeValues { enum { YmajorS = 4, Xmajor = 2, YmajorF = 5, Linear = 0, Wmajor = 1, Ymajor = 3 }; }; struct ModeValues { enum { Horizontal = 1, Vertical = 0 }; }; struct ClearColorTypeValues { enum { Immediate = 0, Address = 1 }; }; struct SurfaceTypeValues { enum { SurftypeCube = 3, SurftypeStrbuf = 5, SurftypeBuffer = 4, Surftype3D = 2, Surftype2D = 1, Surftype1D = 0, SurftypeNull = 6 }; }; struct AlgorithmValues { enum { Uncompressed = 4, Astc = 1, Lossless = 2, Media = 0, Msaa = 3 }; }; struct AuxSurfaceTilingTypeValues { enum { YmajorS = 4, Xmajor = 2, YmajorF = 5, Linear = 0, Wmajor = 1, Ymajor = 3 }; }; struct DumpTypeValues { enum { Bin = 1, Png = 4, Bmp = 0, Bmp32 = 2, Tre = 3 }; }; struct TiledResourceModeValues { enum { TrmodeNone = 0, TrmodeYf = 1, TrmodeYs = 2 }; }; }; #ifndef WIN32 #pragma pack() #else #pragma pack(pop) #endif compute-runtime-22.14.22890/shared/source/aub_mem_dump/page_table_entry_bits.h000066400000000000000000000006251422164147700272530ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include #include namespace PageTableEntry { const uint32_t presentBit = 0; const uint32_t writableBit = 1; const uint32_t userSupervisorBit = 2; const uint32_t localMemoryBit = 11; const uint64_t nonValidBits = std::numeric_limits::max(); } // namespace PageTableEntry compute-runtime-22.14.22890/shared/source/built_ins/000077500000000000000000000000001422164147700221105ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/built_ins/CMakeLists.txt000066400000000000000000000044321422164147700246530ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(SHARED_BUILTINS_PROJECTS_FOLDER "built_ins") set(NEO_CORE_SRCS_BUILT_INS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/built_ins_storage.cpp ${CMAKE_CURRENT_SOURCE_DIR}/built_ins.cpp ${CMAKE_CURRENT_SOURCE_DIR}/built_ins.h ${CMAKE_CURRENT_SOURCE_DIR}/built_in_ops_base.h ${CMAKE_CURRENT_SOURCE_DIR}/sip.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sip.h ${CMAKE_CURRENT_SOURCE_DIR}/sip_kernel_type.h ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}unknown_built_in_name.cpp ) add_subdirectory(builtinops) set_property(GLOBAL PROPERTY NEO_CORE_SRCS_BUILT_INS ${NEO_CORE_SRCS_BUILT_INS}) set(NEO_CORE_SRCS_BUILT_IN_KERNELS ${CMAKE_CURRENT_SOURCE_DIR}/kernels/aux_translation.builtin_kernel ${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_rect.builtin_kernel ${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_rect_stateless.builtin_kernel ${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_to_buffer.builtin_kernel ${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_to_buffer_stateless.builtin_kernel ${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_to_image3d.builtin_kernel ${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_to_image3d_stateless.builtin_kernel ${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_kernel_timestamps.builtin_kernel ${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image3d_to_buffer.builtin_kernel ${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image3d_to_buffer_stateless.builtin_kernel ${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image_to_image1d.builtin_kernel ${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image_to_image2d.builtin_kernel ${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image_to_image3d.builtin_kernel ${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_buffer.builtin_kernel ${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_buffer_stateless.builtin_kernel ${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_image1d.builtin_kernel ${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_image2d.builtin_kernel ${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_image3d.builtin_kernel ) set_property(GLOBAL PROPERTY NEO_CORE_SRCS_BUILT_IN_KERNELS ${NEO_CORE_SRCS_BUILT_IN_KERNELS}) if(NOT (TARGET ${BUILTINS_BINARIES_BINDFUL_LIB_NAME})) include(builtins_binary.cmake) endif() compute-runtime-22.14.22890/shared/source/built_ins/built_in_ops_base.h000066400000000000000000000017361422164147700257500ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include namespace NEO { namespace EBuiltInOps { using Type = uint32_t; constexpr Type AuxTranslation{0}; constexpr Type CopyBufferToBuffer{1}; constexpr Type CopyBufferToBufferStateless{2}; constexpr Type CopyBufferRect{3}; constexpr Type CopyBufferRectStateless{4}; constexpr Type FillBuffer{5}; constexpr Type FillBufferStateless{6}; constexpr Type CopyBufferToImage3d{7}; constexpr Type CopyBufferToImage3dStateless{8}; constexpr Type CopyImage3dToBuffer{9}; constexpr Type CopyImage3dToBufferStateless{10}; constexpr Type CopyImageToImage1d{11}; constexpr Type CopyImageToImage2d{12}; constexpr Type CopyImageToImage3d{13}; constexpr Type FillImage1d{14}; constexpr Type FillImage2d{15}; constexpr Type FillImage3d{16}; constexpr Type QueryKernelTimestamps{17}; constexpr Type MaxBaseValue{17}; constexpr Type COUNT{64}; } // namespace EBuiltInOps } // namespace NEO compute-runtime-22.14.22890/shared/source/built_ins/built_ins.cpp000066400000000000000000000052401422164147700246050ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/source/built_ins/sip.h" #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/device_binary_format/device_binary_formats.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/memory_manager/memory_manager.h" #include "compiler_options.h" #include #include namespace NEO { BuiltIns::BuiltIns() { builtinsLib.reset(new BuiltinsLib()); } BuiltIns::~BuiltIns() = default; const SipKernel &BuiltIns::getSipKernel(SipKernelType type, Device &device) { uint32_t kernelId = static_cast(type); UNRECOVERABLE_IF(kernelId >= static_cast(SipKernelType::COUNT)); auto &sipBuiltIn = this->sipKernels[kernelId]; auto initializer = [&] { std::vector sipBinary; std::vector stateSaveAreaHeader; auto compilerInteface = device.getCompilerInterface(); UNRECOVERABLE_IF(compilerInteface == nullptr); auto ret = compilerInteface->getSipKernelBinary(device, type, sipBinary, stateSaveAreaHeader); UNRECOVERABLE_IF(ret != TranslationOutput::ErrorCode::Success); UNRECOVERABLE_IF(sipBinary.size() == 0); const auto allocType = AllocationType::KERNEL_ISA_INTERNAL; AllocationProperties properties = {device.getRootDeviceIndex(), sipBinary.size(), allocType, device.getDeviceBitfield()}; properties.flags.use32BitFrontWindow = false; auto sipAllocation = device.getMemoryManager()->allocateGraphicsMemoryWithProperties(properties); auto &hwInfo = device.getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); if (sipAllocation) { MemoryTransferHelper::transferMemoryToAllocation(hwHelper.isBlitCopyRequiredForLocalMemory(hwInfo, *sipAllocation), device, sipAllocation, 0, sipBinary.data(), sipBinary.size()); } sipBuiltIn.first.reset(new SipKernel(type, sipAllocation, std::move(stateSaveAreaHeader))); }; std::call_once(sipBuiltIn.second, initializer); UNRECOVERABLE_IF(sipBuiltIn.first == nullptr); return *sipBuiltIn.first; } void BuiltIns::freeSipKernels(MemoryManager *memoryManager) { for (auto &sipKernel : sipKernels) { if (sipKernel.first.get()) { memoryManager->freeGraphicsMemory(sipKernel.first->getSipAllocation()); } } } } // namespace NEO compute-runtime-22.14.22890/shared/source/built_ins/built_ins.h000066400000000000000000000116271422164147700242600ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/sip_kernel_type.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/non_copyable_or_moveable.h" #include "shared/source/helpers/vec.h" #include "built_in_ops.h" #include "compiler_options.h" #include #include #include #include #include #include #include #include #include namespace NEO { typedef std::vector BuiltinResourceT; class Device; class SipKernel; class MemoryManager; static constexpr ConstStringRef mediaKernelsBuildOptionsList[] = { "-D cl_intel_device_side_advanced_vme_enable", "-D cl_intel_device_side_avc_vme_enable", "-D cl_intel_device_side_vme_enable", "-D cl_intel_media_block_io", CompilerOptions::fastRelaxedMath}; static constexpr CompilerOptions::ConstConcatenation<> mediaKernelsBuildOptions{mediaKernelsBuildOptionsList}; BuiltinResourceT createBuiltinResource(const char *ptr, size_t size); BuiltinResourceT createBuiltinResource(const BuiltinResourceT &r); std::string createBuiltinResourceName(EBuiltInOps::Type builtin, const std::string &extension, const std::string &platformName = "", uint32_t deviceRevId = 0); std::string joinPath(const std::string &lhs, const std::string &rhs); const char *getBuiltinAsString(EBuiltInOps::Type builtin); const char *getUnknownBuiltinAsString(EBuiltInOps::Type builtin); const char *getAdditionalBuiltinAsString(EBuiltInOps::Type builtin); class Storage { public: Storage(const std::string &rootPath) : rootPath(rootPath) { } virtual ~Storage() = default; BuiltinResourceT load(const std::string &resourceName); protected: virtual BuiltinResourceT loadImpl(const std::string &fullResourceName) = 0; std::string rootPath; }; class FileStorage : public Storage { public: FileStorage(const std::string &rootPath = "") : Storage(rootPath) { } protected: BuiltinResourceT loadImpl(const std::string &fullResourceName) override; }; struct EmbeddedStorageRegistry { static EmbeddedStorageRegistry &getInstance() { static EmbeddedStorageRegistry gsr; return gsr; } void store(const std::string &name, BuiltinResourceT &&resource) { resources.emplace(name, BuiltinResourceT(std::move(resource))); } const BuiltinResourceT *get(const std::string &name) const; private: using ResourcesContainer = std::unordered_map; ResourcesContainer resources; }; class EmbeddedStorage : public Storage { public: EmbeddedStorage(const std::string &rootPath) : Storage(rootPath) { } protected: BuiltinResourceT loadImpl(const std::string &fullResourceName) override; }; struct BuiltinCode { enum class ECodeType { Any = 0, // for requesting "any" code available - priorities as below Binary = 1, // ISA - highest priority Intermediate = 2, // SPIR/LLVM - medium prioroty Source = 3, // OCL C - lowest priority COUNT, INVALID }; static const char *getExtension(ECodeType ct) { switch (ct) { default: return ""; case ECodeType::Binary: return ".bin"; case ECodeType::Intermediate: return ".bc"; case ECodeType::Source: return ".cl"; } } ECodeType type; BuiltinResourceT resource; Device *targetDevice; }; class BuiltinsLib { public: BuiltinsLib(); BuiltinCode getBuiltinCode(EBuiltInOps::Type builtin, BuiltinCode::ECodeType requestedCodeType, Device &device); protected: BuiltinResourceT getBuiltinResource(EBuiltInOps::Type builtin, BuiltinCode::ECodeType requestedCodeType, Device &device); using StoragesContainerT = std::vector>; StoragesContainerT allStorages; // sorted by priority allStorages[0] will be checked before allStorages[1], etc. std::mutex mutex; }; class BuiltIns { public: BuiltIns(); virtual ~BuiltIns(); MOCKABLE_VIRTUAL const SipKernel &getSipKernel(SipKernelType type, Device &device); MOCKABLE_VIRTUAL void freeSipKernels(MemoryManager *memoryManager); BuiltinsLib &getBuiltinsLib() { DEBUG_BREAK_IF(!builtinsLib.get()); return *builtinsLib; } void setCacheingEnableState(bool enableCacheing) { this->enableCacheing = enableCacheing; } bool isCacheingEnabled() const { return this->enableCacheing; } protected: // sip builtins std::pair, std::once_flag> sipKernels[static_cast(SipKernelType::COUNT)]; std::unique_ptr builtinsLib; bool enableCacheing = true; }; template class BuiltInOp; } // namespace NEO compute-runtime-22.14.22890/shared/source/built_ins/built_ins_storage.cpp000066400000000000000000000173521422164147700263400ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/device/device.h" #include "shared/source/helpers/api_specific_config.h" #include "shared/source/helpers/hw_helper.h" #include "os_inc.h" #include namespace NEO { const char *getBuiltinAsString(EBuiltInOps::Type builtin) { const char *builtinString = getAdditionalBuiltinAsString(builtin); if (builtinString) { return builtinString; } switch (builtin) { default: return getUnknownBuiltinAsString(builtin); case EBuiltInOps::AuxTranslation: return "aux_translation.builtin_kernel"; case EBuiltInOps::CopyBufferToBuffer: return "copy_buffer_to_buffer.builtin_kernel"; case EBuiltInOps::CopyBufferToBufferStateless: return "copy_buffer_to_buffer_stateless.builtin_kernel"; case EBuiltInOps::CopyBufferRect: return "copy_buffer_rect.builtin_kernel"; case EBuiltInOps::CopyBufferRectStateless: return "copy_buffer_rect_stateless.builtin_kernel"; case EBuiltInOps::FillBuffer: return "fill_buffer.builtin_kernel"; case EBuiltInOps::FillBufferStateless: return "fill_buffer_stateless.builtin_kernel"; case EBuiltInOps::CopyBufferToImage3d: return "copy_buffer_to_image3d.builtin_kernel"; case EBuiltInOps::CopyBufferToImage3dStateless: return "copy_buffer_to_image3d_stateless.builtin_kernel"; case EBuiltInOps::CopyImage3dToBuffer: return "copy_image3d_to_buffer.builtin_kernel"; case EBuiltInOps::CopyImage3dToBufferStateless: return "copy_image3d_to_buffer_stateless.builtin_kernel"; case EBuiltInOps::CopyImageToImage1d: return "copy_image_to_image1d.builtin_kernel"; case EBuiltInOps::CopyImageToImage2d: return "copy_image_to_image2d.builtin_kernel"; case EBuiltInOps::CopyImageToImage3d: return "copy_image_to_image3d.builtin_kernel"; case EBuiltInOps::FillImage1d: return "fill_image1d.builtin_kernel"; case EBuiltInOps::FillImage2d: return "fill_image2d.builtin_kernel"; case EBuiltInOps::FillImage3d: return "fill_image3d.builtin_kernel"; case EBuiltInOps::QueryKernelTimestamps: return "copy_kernel_timestamps.builtin_kernel"; }; } BuiltinResourceT createBuiltinResource(const char *ptr, size_t size) { return BuiltinResourceT(ptr, ptr + size); } BuiltinResourceT createBuiltinResource(const BuiltinResourceT &r) { return BuiltinResourceT(r); } std::string createBuiltinResourceName(EBuiltInOps::Type builtin, const std::string &extension, const std::string &platformName, uint32_t deviceRevId) { std::string ret; if (platformName.size() > 0) { ret = platformName; ret += "_" + std::to_string(deviceRevId); ret += "_"; } if (extension == ".bin") { ret += ApiSpecificConfig::getBindlessConfiguration() ? "bindless_" : "bindful_"; } ret += getBuiltinAsString(builtin); if (extension.size() > 0) { ret += extension; } return ret; } std::string joinPath(const std::string &lhs, const std::string &rhs) { if (lhs.size() == 0) { return rhs; } if (rhs.size() == 0) { return lhs; } if (*lhs.rbegin() == PATH_SEPARATOR) { return lhs + rhs; } return lhs + PATH_SEPARATOR + rhs; } std::string getDriverInstallationPath() { return ""; } BuiltinResourceT Storage::load(const std::string &resourceName) { return loadImpl(joinPath(rootPath, resourceName)); } BuiltinResourceT FileStorage::loadImpl(const std::string &fullResourceName) { BuiltinResourceT ret; std::ifstream f{fullResourceName, std::ios::in | std::ios::binary | std::ios::ate}; auto end = f.tellg(); f.seekg(0, std::ios::beg); auto beg = f.tellg(); auto s = end - beg; ret.resize(static_cast(s)); f.read(ret.data(), s); return ret; } const BuiltinResourceT *EmbeddedStorageRegistry::get(const std::string &name) const { auto it = resources.find(name); if (resources.end() == it) { return nullptr; } return &it->second; } BuiltinResourceT EmbeddedStorage::loadImpl(const std::string &fullResourceName) { auto *constResource = EmbeddedStorageRegistry::getInstance().get(fullResourceName); if (constResource == nullptr) { BuiltinResourceT ret; return ret; } return createBuiltinResource(*constResource); } BuiltinsLib::BuiltinsLib() { allStorages.push_back(std::unique_ptr(new EmbeddedStorage(""))); allStorages.push_back(std::unique_ptr(new FileStorage(getDriverInstallationPath()))); } BuiltinCode BuiltinsLib::getBuiltinCode(EBuiltInOps::Type builtin, BuiltinCode::ECodeType requestedCodeType, Device &device) { std::lock_guard lockRaii{mutex}; BuiltinResourceT bc; BuiltinCode::ECodeType usedCodetType = BuiltinCode::ECodeType::INVALID; if (requestedCodeType == BuiltinCode::ECodeType::Any) { uint32_t codeType = static_cast(BuiltinCode::ECodeType::Binary); if (DebugManager.flags.RebuildPrecompiledKernels.get()) { codeType = static_cast(BuiltinCode::ECodeType::Source); } for (uint32_t e = static_cast(BuiltinCode::ECodeType::COUNT); codeType != e; ++codeType) { bc = getBuiltinResource(builtin, static_cast(codeType), device); if (bc.size() > 0) { usedCodetType = static_cast(codeType); break; } } } else { bc = getBuiltinResource(builtin, requestedCodeType, device); usedCodetType = requestedCodeType; } BuiltinCode ret; std::swap(ret.resource, bc); ret.type = usedCodetType; ret.targetDevice = &device; return ret; } BuiltinResourceT BuiltinsLib::getBuiltinResource(EBuiltInOps::Type builtin, BuiltinCode::ECodeType requestedCodeType, Device &device) { BuiltinResourceT bc; auto &hwInfo = device.getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); std::string resourceNameGeneric = createBuiltinResourceName(builtin, BuiltinCode::getExtension(requestedCodeType)); std::string resourceNameForPlatformType = createBuiltinResourceName(builtin, BuiltinCode::getExtension(requestedCodeType), getFamilyNameWithType(hwInfo), hwHelper.getDefaultRevisionId(hwInfo)); std::string resourceNameForPlatformTypeAndStepping = createBuiltinResourceName(builtin, BuiltinCode::getExtension(requestedCodeType), getFamilyNameWithType(hwInfo), hwInfo.platform.usRevId); StackVec resourcesToLookup; resourcesToLookup.push_back(&resourceNameForPlatformTypeAndStepping); if (BuiltinCode::ECodeType::Binary != requestedCodeType || !hwHelper.isRevisionSpecificBinaryBuiltinRequired()) { resourcesToLookup.push_back(&resourceNameForPlatformType); resourcesToLookup.push_back(&resourceNameGeneric); } for (auto &rn : resourcesToLookup) { // first look for dedicated version, only fallback to generic one for (auto &s : allStorages) { UNRECOVERABLE_IF(!rn); bc = s.get()->load(*rn); if (bc.size() != 0) { return bc; } } } return bc; } } // namespace NEO compute-runtime-22.14.22890/shared/source/built_ins/builtinops/000077500000000000000000000000001422164147700243005ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/built_ins/builtinops/CMakeLists.txt000066400000000000000000000005241422164147700270410ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(NEO_CORE_SRCS_BUILT_INS_OPS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}built_in_ops.h ) add_subdirectories() set_property(GLOBAL PROPERTY NEO_CORE_SRCS_BUILT_INS_OPS ${NEO_CORE_SRCS_BUILT_INS_OPS}) compute-runtime-22.14.22890/shared/source/built_ins/builtinops/built_in_ops.h000066400000000000000000000004071422164147700271400ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_in_ops_base.h" namespace NEO { namespace EBuiltInOps { constexpr Type MaxCoreValue{MaxBaseValue}; } } // namespace NEOcompute-runtime-22.14.22890/shared/source/built_ins/builtins_binary.cmake000066400000000000000000000065571422164147700263240ustar00rootroot00000000000000# # Copyright (C) 2018-2022 Intel Corporation # # SPDX-License-Identifier: MIT # add_library(${BUILTINS_BINARIES_BINDFUL_LIB_NAME} OBJECT EXCLUDE_FROM_ALL builtins_binary.cmake) add_library(${BUILTINS_BINARIES_BINDLESS_LIB_NAME} OBJECT EXCLUDE_FROM_ALL builtins_binary.cmake) target_compile_definitions(${BUILTINS_BINARIES_BINDFUL_LIB_NAME} PUBLIC MOCKABLE_VIRTUAL=) target_compile_definitions(${BUILTINS_BINARIES_BINDLESS_LIB_NAME} PUBLIC MOCKABLE_VIRTUAL=) # Add builtins sources add_subdirectory(registry) list(APPEND BIND_MODES "bindful" "bindless" ) set(GENERATED_BUILTINS "copy_buffer_rect" "copy_buffer_to_buffer" "copy_kernel_timestamps" "fill_buffer" ) set(GENERATED_BUILTINS_AUX_TRANSLATION "aux_translation" ) set(GENERATED_BUILTINS_IMAGES "copy_buffer_to_image3d" "copy_image3d_to_buffer" "copy_image_to_image1d" "copy_image_to_image2d" "copy_image_to_image3d" "fill_image1d" "fill_image2d" "fill_image3d" ) set(GENERATED_BUILTINS_IMAGES_STATELESS "copy_buffer_to_image3d_stateless" "copy_image3d_to_buffer_stateless" ) set(GENERATED_BUILTINS_STATELESS "copy_buffer_to_buffer_stateless" "copy_buffer_rect_stateless" "fill_buffer_stateless" ) # Generate builtins cpps if(COMPILE_BUILT_INS) add_subdirectory(kernels) endif() foreach(MODE ${BIND_MODES}) get_property(GENERATED_BUILTINS_CPPS_${MODE} GLOBAL PROPERTY GENERATED_BUILTINS_CPPS_${MODE}) source_group("generated files\\${CORE_TYPE_LOWER}" FILES GENERATED_BUILTINS_CPPS_${MODE}) endforeach() if(COMPILE_BUILT_INS) target_sources(${BUILTINS_BINARIES_BINDFUL_LIB_NAME} PUBLIC ${GENERATED_BUILTINS_CPPS_bindful}) set_source_files_properties(${GENERATED_BUILTINS_CPPS_bindful} PROPERTIES GENERATED TRUE) endif() set_target_properties(${BUILTINS_BINARIES_BINDFUL_LIB_NAME} PROPERTIES LINKER_LANGUAGE CXX) set_target_properties(${BUILTINS_BINARIES_BINDFUL_LIB_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON) set_target_properties(${BUILTINS_BINARIES_BINDFUL_LIB_NAME} PROPERTIES FOLDER "${SHARED_SOURCE_PROJECTS_FOLDER}/${SHARED_BUILTINS_PROJECTS_FOLDER}") target_include_directories(${BUILTINS_BINARIES_BINDFUL_LIB_NAME} PRIVATE ${ENGINE_NODE_DIR} ${KHRONOS_HEADERS_DIR} ${KHRONOS_GL_HEADERS_DIR} ${NEO__GMM_INCLUDE_DIR} ${NEO__IGC_INCLUDE_DIR} ${THIRD_PARTY_DIR} ) if(COMPILE_BUILT_INS) target_sources(${BUILTINS_BINARIES_BINDLESS_LIB_NAME} PUBLIC ${GENERATED_BUILTINS_CPPS_bindless}) set_source_files_properties(${GENERATED_BUILTINS_CPPS_bindless} PROPERTIES GENERATED TRUE) endif() set_target_properties(${BUILTINS_BINARIES_BINDLESS_LIB_NAME} PROPERTIES LINKER_LANGUAGE CXX) set_target_properties(${BUILTINS_BINARIES_BINDLESS_LIB_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON) set_target_properties(${BUILTINS_BINARIES_BINDLESS_LIB_NAME} PROPERTIES FOLDER "${SHARED_SOURCE_PROJECTS_FOLDER}/${SHARED_BUILTINS_PROJECTS_FOLDER}") target_include_directories(${BUILTINS_BINARIES_BINDLESS_LIB_NAME} PRIVATE ${ENGINE_NODE_DIR} ${KHRONOS_HEADERS_DIR} ${KHRONOS_GL_HEADERS_DIR} ${NEO__GMM_INCLUDE_DIR} ${NEO__IGC_INCLUDE_DIR} ${THIRD_PARTY_DIR} ) compute-runtime-22.14.22890/shared/source/built_ins/kernels/000077500000000000000000000000001422164147700235535ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/built_ins/kernels/CMakeLists.txt000066400000000000000000000302461422164147700263200ustar00rootroot00000000000000# # Copyright (C) 2018-2022 Intel Corporation # # SPDX-License-Identifier: MIT # add_custom_target(builtins) set_target_properties(builtins PROPERTIES FOLDER "${SHARED_SOURCE_PROJECTS_FOLDER}/${SHARED_BUILTINS_PROJECTS_FOLDER}") set(BUILTINS_OUTDIR_WITH_ARCH "${TargetDir}/built_ins/${NEO_ARCH}") add_dependencies(${BUILTINS_BINARIES_BINDFUL_LIB_NAME} builtins) add_dependencies(${BUILTINS_BINARIES_BINDLESS_LIB_NAME} builtins) add_subdirectories() set(GENERATED_BUILTINS ${GENERATED_BUILTINS} PARENT_SCOPE) set(GENERATED_BUILTINS_AUX_TRANSLATION ${GENERATED_BUILTINS_AUX_TRANSLATION} PARENT_SCOPE) set(GENERATED_BUILTINS_IMAGES ${GENERATED_BUILTINS_IMAGES} PARENT_SCOPE) set(GENERATED_BUILTINS_STATELESS ${GENERATED_BUILTINS_STATELESS} PARENT_SCOPE) set(BUILTIN_OPTIONS_STATELESS "-cl-intel-greater-than-4GB-buffer-required" ) set(bindless_OPTIONS -internal_options "-cl-intel-use-bindless-mode -cl-intel-use-bindless-advanced-mode" ) set(bindful_OPTIONS "" ) if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug") list(APPEND __ocloc__options__ "-D DEBUG") endif() set(BUILTINS_INCLUDE_DIR ${TargetDir} PARENT_SCOPE) set(BUILTIN_CPP "") function(get_bits_for_stateless core_type platform_type) # Force 32bits compiling on gen9lp for stateless builtins if((${core_type} STREQUAL "GEN9") AND (${platform_type} STREQUAL "LP")) set(BITS "32" PARENT_SCOPE) else() set(BITS ${NEO_BITS} PARENT_SCOPE) endif() endfunction() function(get_builtin_options core_type neo_arch) if("${neo_arch}" STREQUAL "x32") set(BUILTIN_OPTIONS "-cl-intel-greater-than-4GB-buffer-required" PARENT_SCOPE) elseif("${core_type}" STREQUAL "XE_HPC_CORE") set(BUILTIN_OPTIONS "" PARENT_SCOPE) else() set(BUILTIN_OPTIONS "-force_stos_opt" PARENT_SCOPE) endif() endfunction() # Define function for compiling built-ins (with ocloc) function(compile_builtin core_type platform_type builtin bits builtin_options mode) string(TOLOWER ${core_type} core_type_lower) get_family_name_with_type(${core_type} ${platform_type}) set(OUTPUTDIR "${BUILTINS_OUTDIR_WITH_ARCH}/${core_type_lower}") # get filename set(FILENAME ${builtin}) # get name of the file w/o extension get_filename_component(BASENAME ${builtin} NAME_WE) get_filename_component(absolute_filepath ${builtin} ABSOLUTE) set(OUTPUT_FILE_SPV ${OUTPUTDIR}/${mode}_${BASENAME}_${family_name_with_type}.spv ) list(APPEND __ocloc__options__ "-cl-kernel-arg-info") set(INTERNAL_OPTIONS "${${mode}_OPTIONS}") add_custom_command( OUTPUT ${OUTPUT_FILE_SPV} COMMAND ${ocloc_cmd_prefix} -q -file ${absolute_filepath} -spv_only -device ${DEFAULT_SUPPORTED_${core_type}_${platform_type}_PLATFORM} ${builtin_options} -${bits} -output ${mode}_${BASENAME} -out_dir ${OUTPUTDIR} ${INTERNAL_OPTIONS} -options "$" WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} DEPENDS ${builtin} ocloc copy_compiler_files ) foreach(REVISION_ID ${${platform_type}_${core_type}_REVISIONS}) set(OUTPUT_FILE_CPP ${OUTPUTDIR}/${mode}_${BASENAME}_${family_name_with_type}_${REVISION_ID}.cpp ) set(BINARY_OUTPUT "${OUTPUTDIR}/${mode}_${BASENAME}_${REVISION_ID}_${family_name_with_type}") if(NOT NEO_DISABLE_BUILTINS_COMPILATION) set(OUTPUT_FILES_BINARIES ${BINARY_OUTPUT}.gen ${BINARY_OUTPUT}.bin ) get_filename_component(absolute_filepath_spv ${OUTPUT_FILE_SPV} ABSOLUTE) add_custom_command( OUTPUT ${OUTPUT_FILES_BINARIES} COMMAND ${ocloc_cmd_prefix} -q -file ${absolute_filepath_spv} -spirv_input -device ${DEFAULT_SUPPORTED_${core_type}_${platform_type}_PLATFORM} ${builtin_options} -${bits} -output ${mode}_${BASENAME}_${REVISION_ID} -out_dir ${OUTPUTDIR} -revision_id ${REVISION_ID} ${INTERNAL_OPTIONS} -options "$" WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} DEPENDS ${OUTPUT_FILE_SPV} ocloc copy_compiler_files ) add_custom_command( OUTPUT ${OUTPUT_FILE_CPP} COMMAND $ --file ${BINARY_OUTPUT}.gen --output ${OUTPUT_FILE_CPP} --array ${mode}_${BASENAME} --platform ${family_name_with_type} --revision_id ${REVISION_ID} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} DEPENDS ${OUTPUT_FILES_BINARIES} $ ) list(APPEND BUILTINS_COMMANDS "${OUTPUT_FILE_CPP}") else() foreach(_file_name "gen" "bin") set(_file_prebuilt "${NEO_KERNELS_BIN_DIR}/built_ins/${NEO_ARCH}/${core_type_lower}/${mode}_${BASENAME}_${REVISION_ID}_${family_name_with_type}.${_file_name}") if(EXISTS ${_file_prebuilt}) add_custom_command( OUTPUT ${BINARY_OUTPUT}.${_file_name} COMMAND ${CMAKE_COMMAND} -E make_directory ${OUTPUTDIR} COMMAND ${CMAKE_COMMAND} -E copy_if_different ${_file_prebuilt} ${OUTPUTDIR} ) endif() endforeach() set(_file_prebuilt "${NEO_KERNELS_BIN_DIR}/built_ins/${NEO_ARCH}/${core_type_lower}/${mode}_${BASENAME}_${family_name_with_type}_${REVISION_ID}.cpp") if(EXISTS ${_file_prebuilt}) add_custom_command( OUTPUT ${OUTPUT_FILE_CPP} COMMAND ${CMAKE_COMMAND} -E make_directory ${OUTPUTDIR} COMMAND ${CMAKE_COMMAND} -E copy_if_different ${_file_prebuilt} ${OUTPUTDIR} ) list(APPEND BUILTINS_COMMANDS "${OUTPUT_FILE_CPP}") endif() endif() endforeach() set(BUILTINS_COMMANDS ${BUILTINS_COMMANDS} PARENT_SCOPE) endfunction() function(generate_cpp_spirv builtin) get_filename_component(BASENAME ${builtin} NAME_WE) get_filename_component(DIR ${builtin} DIRECTORY) set(INPUT_FILENAME ${builtin}.builtin_kernel) get_filename_component(absolute_filepath ${INPUT_FILENAME} ABSOLUTE) set(OUTPUTDIR "${BUILTINS_OUTDIR_WITH_ARCH}/spirv/${DIR}") string(REPLACE "//" "/" OUTPUTDIR ${OUTPUTDIR}) set(GENERATED_SPV_INPUT ${OUTPUTDIR}/${BASENAME}.spv) set(OUTPUT_FILE_CPP ${OUTPUTDIR}/${BASENAME}.cpp ) if(NOT NEO_DISABLE_BUILTINS_COMPILATION) add_custom_command( OUTPUT ${GENERATED_SPV_INPUT} COMMAND ${ocloc_cmd_prefix} -q -spv_only -file ${absolute_filepath} -out_dir ${OUTPUTDIR} -output_no_suffix -options "-cl-kernel-arg-info" WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} DEPENDS ${INPUT_FILENAME} ocloc copy_compiler_files ) add_custom_command( OUTPUT ${OUTPUT_FILE_CPP} COMMAND $ --file ${GENERATED_SPV_INPUT} --output ${OUTPUT_FILE_CPP} --array ${BASENAME} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} DEPENDS ${GENERATED_SPV_INPUT} $ ) set(OUTPUT_LIST_CPP_FILES ${OUTPUT_LIST_CPP_FILES} ${OUTPUT_FILE_CPP} PARENT_SCOPE) else() set(_file_prebuilt "${NEO_KERNELS_BIN_DIR}/built_ins/${NEO_ARCH}/spirv/${DIR}/${BASENAME}.spv") if(EXISTS ${_file_prebuilt}) add_custom_command( OUTPUT ${GENERATED_SPV_INPUT} COMMAND ${CMAKE_COMMAND} -E make_directory ${OUTPUTDIR} COMMAND ${CMAKE_COMMAND} -E copy_if_different ${_file_prebuilt} ${OUTPUTDIR} ) endif() set(_file_prebuilt "${NEO_KERNELS_BIN_DIR}/built_ins/${NEO_ARCH}/spirv/${DIR}/${BASENAME}.cpp") if(EXISTS ${_file_prebuilt}) add_custom_command( OUTPUT ${OUTPUT_FILE_CPP} COMMAND ${CMAKE_COMMAND} -E make_directory ${OUTPUTDIR} COMMAND ${CMAKE_COMMAND} -E copy_if_different ${_file_prebuilt} ${OUTPUTDIR} ) set(OUTPUT_LIST_CPP_FILES ${OUTPUT_LIST_CPP_FILES} ${OUTPUT_FILE_CPP} PARENT_SCOPE) endif() endif() endfunction() macro(macro_for_each_core_type) foreach(PLATFORM_TYPE ${PLATFORM_TYPES}) if(${CORE_TYPE}_HAS_${PLATFORM_TYPE}) unset(IMAGE_SUPPORT) unset(AUX_TRANSLATION_SUPPORT) CORE_CONTAINS_PLATFORMS("SUPPORTED_IMAGES" ${CORE_TYPE} IMAGE_SUPPORT) CORE_CONTAINS_PLATFORMS("SUPPORTED_AUX_TRANSLATION" ${CORE_TYPE} AUX_TRANSLATION_SUPPORT) get_family_name_with_type(${CORE_TYPE} ${PLATFORM_TYPE}) string(TOLOWER ${PLATFORM_TYPE} PLATFORM_TYPE_LOWER) get_bits_for_stateless(${CORE_TYPE} ${PLATFORM_TYPE}) get_builtin_options(${CORE_TYPE} ${NEO_ARCH}) set(target_name builtins_${family_name_with_type}) add_custom_target(${target_name}) add_dependencies(builtins ${target_name}) set_target_properties(${target_name} PROPERTIES FOLDER "${SHARED_SOURCE_PROJECTS_FOLDER}/${SHARED_BUILTINS_PROJECTS_FOLDER}/${family_name_with_type}") foreach(MODE ${BIND_MODES}) unset(BUILTINS_COMMANDS) if((${MODE} STREQUAL "bindless") AND (NOT BUILD_WITH_L0 OR("${CORE_TYPE}" STREQUAL "GEN8"))) continue() endif() foreach(GENERATED_BUILTIN ${GENERATED_BUILTINS}) compile_builtin(${CORE_TYPE} ${PLATFORM_TYPE} ${GENERATED_BUILTIN}.builtin_kernel ${NEO_BITS} "${BUILTIN_OPTIONS}" ${MODE}) endforeach() foreach(GENERATED_BUILTIN_STATELESS ${GENERATED_BUILTINS_STATELESS}) compile_builtin(${CORE_TYPE} ${PLATFORM_TYPE} ${GENERATED_BUILTIN_STATELESS}.builtin_kernel ${BITS} "${BUILTIN_OPTIONS_STATELESS}" ${MODE}) endforeach() if(${IMAGE_SUPPORT}) foreach(GENERATED_BUILTINS_IMAGES ${GENERATED_BUILTINS_IMAGES}) compile_builtin(${CORE_TYPE} ${PLATFORM_TYPE} ${GENERATED_BUILTINS_IMAGES}.builtin_kernel ${NEO_BITS} "${BUILTIN_OPTIONS}" ${MODE}) endforeach() foreach(GENERATED_BUILTIN_IMAGES_STATELESS ${GENERATED_BUILTINS_IMAGES_STATELESS}) compile_builtin(${CORE_TYPE} ${PLATFORM_TYPE} ${GENERATED_BUILTIN_IMAGES_STATELESS}.builtin_kernel ${BITS} "${BUILTIN_OPTIONS_STATELESS}" ${MODE}) endforeach() endif() if(${AUX_TRANSLATION_SUPPORT}) foreach(GENERATED_BUILTIN_AUX_TRANSLATION ${GENERATED_BUILTINS_AUX_TRANSLATION}) compile_builtin(${CORE_TYPE} ${PLATFORM_TYPE} ${GENERATED_BUILTIN_AUX_TRANSLATION}.builtin_kernel ${NEO_BITS} "${BUILTIN_OPTIONS}" ${MODE}) endforeach() endif() get_property(GENERATED_BUILTINS_CPPS_${MODE} GLOBAL PROPERTY GENERATED_BUILTINS_CPPS_${MODE}) foreach(BUILTIN ${BUILTINS_COMMANDS}) list(APPEND GENERATED_BUILTINS_CPPS_${MODE} ${BUILTIN}) endforeach() set_property(GLOBAL PROPERTY GENERATED_BUILTINS_CPPS_${MODE} ${GENERATED_BUILTINS_CPPS_${MODE}}) add_custom_target(${target_name}_${MODE} DEPENDS ${BUILTINS_COMMANDS}) add_dependencies(${target_name} ${target_name}_${MODE}) set_target_properties(${target_name}_${MODE} PROPERTIES FOLDER "${SHARED_SOURCE_PROJECTS_FOLDER}/${SHARED_BUILTINS_PROJECTS_FOLDER}/${family_name_with_type}") endforeach() endif() endforeach() endmacro() file(MAKE_DIRECTORY "${BUILTINS_OUTDIR_WITH_ARCH}/spirv") foreach(builtin ${GENERATED_BUILTINS}) generate_cpp_spirv(${builtin}) endforeach() foreach(builtin_images ${GENERATED_BUILTINS_IMAGES}) generate_cpp_spirv(${builtin_images}) endforeach() foreach(builtin_aux_translation ${GENERATED_BUILTINS_AUX_TRANSLATION}) generate_cpp_spirv(${builtin_aux_translation}) endforeach() foreach(builtin_stateless ${GENERATED_BUILTINS_STATELESS}) generate_cpp_spirv(${builtin_stateless}) endforeach() foreach(builtin_images_stateless ${GENERATED_BUILTINS_IMAGES_STATELESS}) generate_cpp_spirv(${builtin_images_stateless}) endforeach() if(NOT "${OUTPUT_LIST_CPP_FILES}" STREQUAL "") add_library(${BUILTINS_SPIRV_LIB_NAME} OBJECT ${OUTPUT_LIST_CPP_FILES}) target_compile_definitions(${BUILTINS_SPIRV_LIB_NAME} PUBLIC MOCKABLE_VIRTUAL=) set_target_properties(${BUILTINS_SPIRV_LIB_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON FOLDER "${SHARED_SOURCE_PROJECTS_FOLDER}/${SHARED_BUILTINS_PROJECTS_FOLDER}" ) endif() apply_macro_for_each_core_type("SUPPORTED") compute-runtime-22.14.22890/shared/source/built_ins/kernels/aux_translation.builtin_kernel000066400000000000000000000004361422164147700317210ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( __kernel void fullCopy(__global const uint* src, __global uint* dst) { unsigned int gid = get_global_id(0); uint4 loaded = vload4(gid, src); vstore4(loaded, gid, dst); } )===" compute-runtime-22.14.22890/shared/source/built_ins/kernels/copy_buffer_rect.builtin_kernel000066400000000000000000000024111422164147700320210ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( ////////////////////////////////////////////////////////////////////////////// __kernel void CopyBufferRectBytes2d( __global const char* src, __global char* dst, uint4 SrcOrigin, uint4 DstOrigin, uint2 SrcPitch, uint2 DstPitch ) { int x = get_global_id(0); int y = get_global_id(1); uint LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ); uint LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ); *( dst + LDstOffset ) = *( src + LSrcOffset ); } ////////////////////////////////////////////////////////////////////////////// __kernel void CopyBufferRectBytes3d( __global const char* src, __global char* dst, uint4 SrcOrigin, uint4 DstOrigin, uint2 SrcPitch, uint2 DstPitch ) { int x = get_global_id(0); int y = get_global_id(1); int z = get_global_id(2); uint LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y ); uint LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y ); *( dst + LDstOffset ) = *( src + LSrcOffset ); } )===" copy_buffer_rect_stateless.builtin_kernel000066400000000000000000000024251422164147700340360ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/built_ins/kernels/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( ////////////////////////////////////////////////////////////////////////////// __kernel void CopyBufferRectBytes2d( __global const char* src, __global char* dst, ulong4 SrcOrigin, ulong4 DstOrigin, ulong2 SrcPitch, ulong2 DstPitch ) { size_t x = get_global_id(0); size_t y = get_global_id(1); size_t LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ); size_t LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ); *( dst + LDstOffset ) = *( src + LSrcOffset ); } ////////////////////////////////////////////////////////////////////////////// __kernel void CopyBufferRectBytes3d( __global const char* src, __global char* dst, ulong4 SrcOrigin, ulong4 DstOrigin, ulong2 SrcPitch, ulong2 DstPitch ) { size_t x = get_global_id(0); size_t y = get_global_id(1); size_t z = get_global_id(2); size_t LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y ); size_t LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y ); *( dst + LDstOffset ) = *( src + LSrcOffset ); } )===" compute-runtime-22.14.22890/shared/source/built_ins/kernels/copy_buffer_to_buffer.builtin_kernel000066400000000000000000000067351422164147700330540ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( __kernel void CopyBufferToBufferBytes( const __global uchar* pSrc, __global uchar* pDst, uint srcOffsetInBytes, uint dstOffsetInBytes, uint bytesToRead ) { pSrc += ( srcOffsetInBytes + get_global_id(0) ); pDst += ( dstOffsetInBytes + get_global_id(0) ); pDst[ 0 ] = pSrc[ 0 ]; } __kernel void CopyBufferToBufferLeftLeftover( const __global uchar* pSrc, __global uchar* pDst, uint srcOffsetInBytes, uint dstOffsetInBytes) { unsigned int gid = get_global_id(0); pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ]; } __kernel void CopyBufferToBufferMiddle( const __global uint* pSrc, __global uint* pDst, uint srcOffsetInBytes, uint dstOffsetInBytes) { unsigned int gid = get_global_id(0); pDst += dstOffsetInBytes >> 2; pSrc += srcOffsetInBytes >> 2; uint4 loaded = vload4(gid, pSrc); vstore4(loaded, gid, pDst); } __kernel void CopyBufferToBufferMiddleMisaligned( __global const uint* pSrc, __global uint* pDst, uint srcOffsetInBytes, uint dstOffsetInBytes, uint misalignmentInBits) { const size_t gid = get_global_id(0); pDst += dstOffsetInBytes >> 2; pSrc += srcOffsetInBytes >> 2; const uint4 src0 = vload4(gid, pSrc); const uint4 src1 = vload4(gid + 1, pSrc); uint4 result; result.x = (src0.x >> misalignmentInBits) | (src0.y << (32 - misalignmentInBits)); result.y = (src0.y >> misalignmentInBits) | (src0.z << (32 - misalignmentInBits)); result.z = (src0.z >> misalignmentInBits) | (src0.w << (32 - misalignmentInBits)); result.w = (src0.w >> misalignmentInBits) | (src1.x << (32 - misalignmentInBits)); vstore4(result, gid, pDst); } __kernel void CopyBufferToBufferRightLeftover( const __global uchar* pSrc, __global uchar* pDst, uint srcOffsetInBytes, uint dstOffsetInBytes) { unsigned int gid = get_global_id(0); pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ]; } __kernel void copyBufferToBufferBytesSingle(__global uchar *dst, const __global uchar *src) { unsigned int gid = get_global_id(0); dst[gid] = (uchar)(src[gid]); } __kernel void CopyBufferToBufferSideRegion( __global uchar* pDst, const __global uchar* pSrc, unsigned int len, uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment uint srcSshOffset // Offset needed in case ptr has been adjusted for SSH alignment ) { unsigned int gid = get_global_id(0); __global uchar* pDstWithOffset = (__global uchar*)((__global uchar*)pDst + dstSshOffset); __global uchar* pSrcWithOffset = (__global uchar*)((__global uchar*)pSrc + srcSshOffset); if (gid < len) { pDstWithOffset[ gid ] = pSrcWithOffset[ gid ]; } } __kernel void CopyBufferToBufferMiddleRegion( __global uint* pDst, const __global uint* pSrc, unsigned int elems, uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment uint srcSshOffset // Offset needed in case ptr has been adjusted for SSH alignment ) { unsigned int gid = get_global_id(0); __global uint* pDstWithOffset = (__global uint*)((__global uchar*)pDst + dstSshOffset); __global uint* pSrcWithOffset = (__global uint*)((__global uchar*)pSrc + srcSshOffset); if (gid < elems) { uint4 loaded = vload4(gid, pSrcWithOffset); vstore4(loaded, gid, pDstWithOffset); } } )==="copy_buffer_to_buffer_stateless.builtin_kernel000066400000000000000000000064741422164147700350640ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/built_ins/kernels/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( __kernel void CopyBufferToBufferBytes( const __global uchar* pSrc, __global uchar* pDst, ulong srcOffsetInBytes, ulong dstOffsetInBytes, ulong bytesToRead ) { pSrc += ( srcOffsetInBytes + get_global_id(0) ); pDst += ( dstOffsetInBytes + get_global_id(0) ); pDst[ 0 ] = pSrc[ 0 ]; } __kernel void CopyBufferToBufferLeftLeftover( const __global uchar* pSrc, __global uchar* pDst, ulong srcOffsetInBytes, ulong dstOffsetInBytes) { size_t gid = get_global_id(0); pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ]; } __kernel void CopyBufferToBufferMiddle( const __global uint* pSrc, __global uint* pDst, ulong srcOffsetInBytes, ulong dstOffsetInBytes) { size_t gid = get_global_id(0); pDst += dstOffsetInBytes >> 2; pSrc += srcOffsetInBytes >> 2; uint4 loaded = vload4(gid, pSrc); vstore4(loaded, gid, pDst); } __kernel void CopyBufferToBufferMiddleMisaligned( __global const uint* pSrc, __global uint* pDst, ulong srcOffsetInBytes, ulong dstOffsetInBytes, uint misalignmentInBits) { const size_t gid = get_global_id(0); pDst += dstOffsetInBytes >> 2; pSrc += srcOffsetInBytes >> 2; const uint4 src0 = vload4(gid, pSrc); const uint4 src1 = vload4(gid + 1, pSrc); uint4 result; result.x = (src0.x >> misalignmentInBits) | (src0.y << (32 - misalignmentInBits)); result.y = (src0.y >> misalignmentInBits) | (src0.z << (32 - misalignmentInBits)); result.z = (src0.z >> misalignmentInBits) | (src0.w << (32 - misalignmentInBits)); result.w = (src0.w >> misalignmentInBits) | (src1.x << (32 - misalignmentInBits)); vstore4(result, gid, pDst); } __kernel void CopyBufferToBufferRightLeftover( const __global uchar* pSrc, __global uchar* pDst, ulong srcOffsetInBytes, ulong dstOffsetInBytes) { size_t gid = get_global_id(0); pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ]; } __kernel void copyBufferToBufferBytesSingle(__global uchar *dst, const __global uchar *src) { size_t gid = get_global_id(0); dst[gid] = (uchar)(src[gid]); } __kernel void CopyBufferToBufferSideRegion( __global uchar* pDst, const __global uchar* pSrc, ulong len, ulong dstSshOffset, ulong srcSshOffset ) { size_t gid = get_global_id(0); __global uchar* pDstWithOffset = (__global uchar*)((__global uchar*)pDst + dstSshOffset); __global uchar* pSrcWithOffset = (__global uchar*)((__global uchar*)pSrc + srcSshOffset); if (gid < len) { pDstWithOffset[ gid ] = pSrcWithOffset[ gid ]; } } __kernel void CopyBufferToBufferMiddleRegion( __global uint* pDst, const __global uint* pSrc, ulong elems, ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment ulong srcSshOffset // Offset needed in case ptr has been adjusted for SSH alignment ) { size_t gid = get_global_id(0); __global uint* pDstWithOffset = (__global uint*)((__global uchar*)pDst + dstSshOffset); __global uint* pSrcWithOffset = (__global uint*)((__global uchar*)pSrc + srcSshOffset); if (gid < elems) { uint4 loaded = vload4(gid, pSrcWithOffset); vstore4(loaded, gid, pDstWithOffset); } } )==="compute-runtime-22.14.22890/shared/source/built_ins/kernels/copy_buffer_to_image3d.builtin_kernel000066400000000000000000000147501422164147700331100ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( #pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable __kernel void CopyBufferToImage3dBytes(__global uchar *src, __write_only image3d_t output, int srcOffset, int4 dstOffset, uint2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); int4 dstCoord = (int4)(x, y, z, 0) + dstOffset; uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y); write_imageui(output, dstCoord, (uint4)(*(src + LOffset + x), 0, 0, 1)); } __kernel void CopyBufferToImage3d2Bytes(__global uchar *src, __write_only image3d_t output, int srcOffset, int4 dstOffset, uint2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); int4 dstCoord = (int4)(x, y, z, 0) + dstOffset; uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y); uint4 c = (uint4)(0, 0, 0, 1); if(( ulong )(src + srcOffset) & 0x00000001){ ushort upper = *((__global uchar*)(src + LOffset + x * 2 + 1)); ushort lower = *((__global uchar*)(src + LOffset + x * 2)); ushort combined = (upper << 8) | lower; c.x = (uint)combined; } else{ c.x = (uint)(*(__global ushort*)(src + LOffset + x * 2)); } write_imageui(output, dstCoord, c); } __kernel void CopyBufferToImage3d4Bytes(__global uchar *src, __write_only image3d_t output, int srcOffset, int4 dstOffset, uint2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); int4 dstCoord = (int4)(x, y, z, 0) + dstOffset; uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y); uint4 c = (uint4)(0, 0, 0, 1); if(( ulong )(src + srcOffset) & 0x00000003){ uint upper2 = *((__global uchar*)(src + LOffset + x * 4 + 3)); uint upper = *((__global uchar*)(src + LOffset + x * 4 + 2)); uint lower2 = *((__global uchar*)(src + LOffset + x * 4 + 1)); uint lower = *((__global uchar*)(src + LOffset + x * 4)); uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower; c.x = combined; } else{ c.x = (*(__global uint*)(src + LOffset + x * 4)); } write_imageui(output, dstCoord, c); } __kernel void CopyBufferToImage3d8Bytes(__global uchar *src, __write_only image3d_t output, int srcOffset, int4 dstOffset, uint2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); int4 dstCoord = (int4)(x, y, z, 0) + dstOffset; uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y); uint2 c = (uint2)(0, 0);//*((__global uint2*)(src + LOffset + x * 8)); if(( ulong )(src + srcOffset) & 0x00000007){ uint upper2 = *((__global uchar*)(src + LOffset + x * 8 + 3)); uint upper = *((__global uchar*)(src + LOffset + x * 8 + 2)); uint lower2 = *((__global uchar*)(src + LOffset + x * 8 + 1)); uint lower = *((__global uchar*)(src + LOffset + x * 8)); uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower; c.x = combined; upper2 = *((__global uchar*)(src + LOffset + x * 8 + 7)); upper = *((__global uchar*)(src + LOffset + x * 8 + 6)); lower2 = *((__global uchar*)(src + LOffset + x * 8 + 5)); lower = *((__global uchar*)(src + LOffset + x * 8 + 4)); combined = ((uint)upper2 << 24) | ((uint)upper << 16) | ((uint)lower2 << 8) | lower; c.y = combined; } else{ c = *((__global uint2*)(src + LOffset + x * 8)); } write_imageui(output, dstCoord, (uint4)(c.x, c.y, 0, 1)); } __kernel void CopyBufferToImage3d16Bytes(__global uchar *src, __write_only image3d_t output, int srcOffset, int4 dstOffset, uint2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); int4 dstCoord = (int4)(x, y, z, 0) + dstOffset; uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y); uint4 c = (uint4)(0, 0, 0, 0); if(( ulong )(src + srcOffset) & 0x0000000f){ uint upper2 = *((__global uchar*)(src + LOffset + x * 16 + 3)); uint upper = *((__global uchar*)(src + LOffset + x * 16 + 2)); uint lower2 = *((__global uchar*)(src + LOffset + x * 16 + 1)); uint lower = *((__global uchar*)(src + LOffset + x * 16)); uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower; c.x = combined; upper2 = *((__global uchar*)(src + LOffset + x * 16 + 7)); upper = *((__global uchar*)(src + LOffset + x * 16 + 6)); lower2 = *((__global uchar*)(src + LOffset + x * 16 + 5)); lower = *((__global uchar*)(src + LOffset + x * 16 + 4)); combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower; c.y = combined; upper2 = *((__global uchar*)(src + LOffset + x * 16 + 11)); upper = *((__global uchar*)(src + LOffset + x * 16 + 10)); lower2 = *((__global uchar*)(src + LOffset + x * 16 + 9)); lower = *((__global uchar*)(src + LOffset + x * 16 + 8)); combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower; c.z = combined; upper2 = *((__global uchar*)(src + LOffset + x * 16 + 15)); upper = *((__global uchar*)(src + LOffset + x * 16 + 14)); lower2 = *((__global uchar*)(src + LOffset + x * 16 + 13)); lower = *((__global uchar*)(src + LOffset + x * 16 + 12)); combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower; c.w = combined; } else{ c = *((__global uint4 *)(src + LOffset + x * 16)); } write_imageui(output, dstCoord, c); } )===" copy_buffer_to_image3d_stateless.builtin_kernel000066400000000000000000000147741422164147700351260ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/built_ins/kernels/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( #pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable __kernel void CopyBufferToImage3dBytes(__global uchar *src, __write_only image3d_t output, ulong srcOffset, int4 dstOffset, ulong2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); int4 dstCoord = (int4)(x, y, z, 0) + dstOffset; ulong LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y); write_imageui(output, dstCoord, (uint4)(*(src + LOffset + x), 0, 0, 1)); } __kernel void CopyBufferToImage3d2Bytes(__global uchar *src, __write_only image3d_t output, ulong srcOffset, int4 dstOffset, ulong2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); int4 dstCoord = (int4)(x, y, z, 0) + dstOffset; ulong LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y); uint4 c = (uint4)(0, 0, 0, 1); if(( ulong )(src + srcOffset) & 0x00000001){ ushort upper = *((__global uchar*)(src + LOffset + x * 2 + 1)); ushort lower = *((__global uchar*)(src + LOffset + x * 2)); ushort combined = (upper << 8) | lower; c.x = (uint)combined; } else{ c.x = (uint)(*(__global ushort*)(src + LOffset + x * 2)); } write_imageui(output, dstCoord, c); } __kernel void CopyBufferToImage3d4Bytes(__global uchar *src, __write_only image3d_t output, ulong srcOffset, int4 dstOffset, ulong2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); int4 dstCoord = (int4)(x, y, z, 0) + dstOffset; ulong LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y); uint4 c = (uint4)(0, 0, 0, 1); if(( ulong )(src + srcOffset) & 0x00000003){ uint upper2 = *((__global uchar*)(src + LOffset + x * 4 + 3)); uint upper = *((__global uchar*)(src + LOffset + x * 4 + 2)); uint lower2 = *((__global uchar*)(src + LOffset + x * 4 + 1)); uint lower = *((__global uchar*)(src + LOffset + x * 4)); uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower; c.x = combined; } else{ c.x = (*(__global uint*)(src + LOffset + x * 4)); } write_imageui(output, dstCoord, c); } __kernel void CopyBufferToImage3d8Bytes(__global uchar *src, __write_only image3d_t output, ulong srcOffset, int4 dstOffset, ulong2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); int4 dstCoord = (int4)(x, y, z, 0) + dstOffset; ulong LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y); uint2 c = (uint2)(0, 0);//*((__global uint2*)(src + LOffset + x * 8)); if(( ulong )(src + srcOffset) & 0x00000007){ uint upper2 = *((__global uchar*)(src + LOffset + x * 8 + 3)); uint upper = *((__global uchar*)(src + LOffset + x * 8 + 2)); uint lower2 = *((__global uchar*)(src + LOffset + x * 8 + 1)); uint lower = *((__global uchar*)(src + LOffset + x * 8)); uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower; c.x = combined; upper2 = *((__global uchar*)(src + LOffset + x * 8 + 7)); upper = *((__global uchar*)(src + LOffset + x * 8 + 6)); lower2 = *((__global uchar*)(src + LOffset + x * 8 + 5)); lower = *((__global uchar*)(src + LOffset + x * 8 + 4)); combined = ((uint)upper2 << 24) | ((uint)upper << 16) | ((uint)lower2 << 8) | lower; c.y = combined; } else{ c = *((__global uint2*)(src + LOffset + x * 8)); } write_imageui(output, dstCoord, (uint4)(c.x, c.y, 0, 1)); } __kernel void CopyBufferToImage3d16Bytes(__global uchar *src, __write_only image3d_t output, ulong srcOffset, int4 dstOffset, ulong2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); int4 dstCoord = (int4)(x, y, z, 0) + dstOffset; ulong LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y); uint4 c = (uint4)(0, 0, 0, 0); if(( ulong )(src + srcOffset) & 0x0000000f){ uint upper2 = *((__global uchar*)(src + LOffset + x * 16 + 3)); uint upper = *((__global uchar*)(src + LOffset + x * 16 + 2)); uint lower2 = *((__global uchar*)(src + LOffset + x * 16 + 1)); uint lower = *((__global uchar*)(src + LOffset + x * 16)); uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower; c.x = combined; upper2 = *((__global uchar*)(src + LOffset + x * 16 + 7)); upper = *((__global uchar*)(src + LOffset + x * 16 + 6)); lower2 = *((__global uchar*)(src + LOffset + x * 16 + 5)); lower = *((__global uchar*)(src + LOffset + x * 16 + 4)); combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower; c.y = combined; upper2 = *((__global uchar*)(src + LOffset + x * 16 + 11)); upper = *((__global uchar*)(src + LOffset + x * 16 + 10)); lower2 = *((__global uchar*)(src + LOffset + x * 16 + 9)); lower = *((__global uchar*)(src + LOffset + x * 16 + 8)); combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower; c.z = combined; upper2 = *((__global uchar*)(src + LOffset + x * 16 + 15)); upper = *((__global uchar*)(src + LOffset + x * 16 + 14)); lower2 = *((__global uchar*)(src + LOffset + x * 16 + 13)); lower = *((__global uchar*)(src + LOffset + x * 16 + 12)); combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower; c.w = combined; } else{ c = *((__global uint4 *)(src + LOffset + x * 16)); } write_imageui(output, dstCoord, c); } )===" compute-runtime-22.14.22890/shared/source/built_ins/kernels/copy_image3d_to_buffer.builtin_kernel000066400000000000000000000146521422164147700331110ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( __kernel void CopyImage3dToBufferBytes(__read_only image3d_t input, __global uchar *dst, int4 srcOffset, int dstOffset, uint2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset; uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y); uint4 c = read_imageui(input, srcCoord); *(dst + DstOffset + x) = convert_uchar_sat(c.x); } __kernel void CopyImage3dToBuffer2Bytes(__read_only image3d_t input, __global uchar *dst, int4 srcOffset, int dstOffset, uint2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset; uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y); uint4 c = read_imageui(input, srcCoord); if(( ulong )(dst + dstOffset) & 0x00000001){ *((__global uchar*)(dst + DstOffset + x * 2 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 2)) = convert_uchar_sat(c.x & 0xff); } else{ *((__global ushort*)(dst + DstOffset + x * 2)) = convert_ushort_sat(c.x); } } __kernel void CopyImage3dToBuffer4Bytes(__read_only image3d_t input, __global uchar *dst, int4 srcOffset, int dstOffset, uint2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset; uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y); uint4 c = read_imageui(input, srcCoord); if(( ulong )(dst + dstOffset) & 0x00000003){ *((__global uchar*)(dst + DstOffset + x * 4 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 4 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 4 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 4)) = convert_uchar_sat(c.x & 0xff); } else{ *((__global uint*)(dst + DstOffset + x * 4)) = c.x; } } __kernel void CopyImage3dToBuffer8Bytes(__read_only image3d_t input, __global uchar *dst, int4 srcOffset, int dstOffset, uint2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset; uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y); uint4 c = read_imageui(input, srcCoord); if(( ulong )(dst + dstOffset) & 0x00000007){ *((__global uchar*)(dst + DstOffset + x * 8 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 8 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 8 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 8)) = convert_uchar_sat(c.x & 0xff); *((__global uchar*)(dst + DstOffset + x * 8 + 7)) = convert_uchar_sat((c.y >> 24 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 8 + 6)) = convert_uchar_sat((c.y >> 16 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 8 + 5)) = convert_uchar_sat((c.y >> 8 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 8 + 4)) = convert_uchar_sat(c.y & 0xff); } else{ uint2 d = (uint2)(c.x,c.y); *((__global uint2*)(dst + DstOffset + x * 8)) = d; } } __kernel void CopyImage3dToBuffer16Bytes(__read_only image3d_t input, __global uchar *dst, int4 srcOffset, int dstOffset, uint2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset; uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y); const uint4 c = read_imageui(input, srcCoord); if(( ulong )(dst + dstOffset) & 0x0000000f){ *((__global uchar*)(dst + DstOffset + x * 16 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16)) = convert_uchar_sat(c.x & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 7)) = convert_uchar_sat((c.y >> 24 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 6)) = convert_uchar_sat((c.y >> 16 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 5)) = convert_uchar_sat((c.y >> 8 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 4)) = convert_uchar_sat(c.y & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 11)) = convert_uchar_sat((c.z >> 24 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 10)) = convert_uchar_sat((c.z >> 16 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 9)) = convert_uchar_sat((c.z >> 8 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 8)) = convert_uchar_sat(c.z & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 15)) = convert_uchar_sat((c.w >> 24 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 14)) = convert_uchar_sat((c.w >> 16 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 13)) = convert_uchar_sat((c.w >> 8 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 12)) = convert_uchar_sat(c.w & 0xff); } else{ *(__global uint4*)(dst + DstOffset + x * 16) = c; } } )===" copy_image3d_to_buffer_stateless.builtin_kernel000066400000000000000000000146721422164147700351230ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/built_ins/kernels/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( __kernel void CopyImage3dToBufferBytes(__read_only image3d_t input, __global uchar *dst, int4 srcOffset, ulong dstOffset, ulong2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset; ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y); uint4 c = read_imageui(input, srcCoord); *(dst + DstOffset + x) = convert_uchar_sat(c.x); } __kernel void CopyImage3dToBuffer2Bytes(__read_only image3d_t input, __global uchar *dst, int4 srcOffset, ulong dstOffset, ulong2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset; ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y); uint4 c = read_imageui(input, srcCoord); if(( ulong )(dst + dstOffset) & 0x00000001){ *((__global uchar*)(dst + DstOffset + x * 2 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 2)) = convert_uchar_sat(c.x & 0xff); } else{ *((__global ushort*)(dst + DstOffset + x * 2)) = convert_ushort_sat(c.x); } } __kernel void CopyImage3dToBuffer4Bytes(__read_only image3d_t input, __global uchar *dst, int4 srcOffset, ulong dstOffset, ulong2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset; ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y); uint4 c = read_imageui(input, srcCoord); if(( ulong )(dst + dstOffset) & 0x00000003){ *((__global uchar*)(dst + DstOffset + x * 4 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 4 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 4 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 4)) = convert_uchar_sat(c.x & 0xff); } else{ *((__global uint*)(dst + DstOffset + x * 4)) = c.x; } } __kernel void CopyImage3dToBuffer8Bytes(__read_only image3d_t input, __global uchar *dst, int4 srcOffset, ulong dstOffset, ulong2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset; ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y); uint4 c = read_imageui(input, srcCoord); if(( ulong )(dst + dstOffset) & 0x00000007){ *((__global uchar*)(dst + DstOffset + x * 8 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 8 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 8 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 8)) = convert_uchar_sat(c.x & 0xff); *((__global uchar*)(dst + DstOffset + x * 8 + 7)) = convert_uchar_sat((c.y >> 24 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 8 + 6)) = convert_uchar_sat((c.y >> 16 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 8 + 5)) = convert_uchar_sat((c.y >> 8 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 8 + 4)) = convert_uchar_sat(c.y & 0xff); } else{ uint2 d = (uint2)(c.x,c.y); *((__global uint2*)(dst + DstOffset + x * 8)) = d; } } __kernel void CopyImage3dToBuffer16Bytes(__read_only image3d_t input, __global uchar *dst, int4 srcOffset, ulong dstOffset, ulong2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset; ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y); const uint4 c = read_imageui(input, srcCoord); if(( ulong )(dst + dstOffset) & 0x0000000f){ *((__global uchar*)(dst + DstOffset + x * 16 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16)) = convert_uchar_sat(c.x & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 7)) = convert_uchar_sat((c.y >> 24 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 6)) = convert_uchar_sat((c.y >> 16 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 5)) = convert_uchar_sat((c.y >> 8 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 4)) = convert_uchar_sat(c.y & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 11)) = convert_uchar_sat((c.z >> 24 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 10)) = convert_uchar_sat((c.z >> 16 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 9)) = convert_uchar_sat((c.z >> 8 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 8)) = convert_uchar_sat(c.z & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 15)) = convert_uchar_sat((c.w >> 24 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 14)) = convert_uchar_sat((c.w >> 16 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 13)) = convert_uchar_sat((c.w >> 8 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 12)) = convert_uchar_sat(c.w & 0xff); } else{ *(__global uint4*)(dst + DstOffset + x * 16) = c; } } )===" compute-runtime-22.14.22890/shared/source/built_ins/kernels/copy_image_to_image1d.builtin_kernel000066400000000000000000000007161422164147700327140ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( __kernel void CopyImageToImage1d( __read_only image1d_t input, __write_only image1d_t output, int4 srcOffset, int4 dstOffset) { const int x = get_global_id(0); const int srcCoord = x + srcOffset.x; const int dstCoord = x + dstOffset.x; const uint4 c = read_imageui(input, srcCoord); write_imageui(output, dstCoord, c); } )===" compute-runtime-22.14.22890/shared/source/built_ins/kernels/copy_image_to_image2d.builtin_kernel000066400000000000000000000010641422164147700327120ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( __kernel void CopyImageToImage2d( __read_only image2d_t input, __write_only image2d_t output, int4 srcOffset, int4 dstOffset) { const int x = get_global_id(0); const int y = get_global_id(1); const int2 srcCoord = (int2)(x, y) + (int2)(srcOffset.x, srcOffset.y); const int2 dstCoord = (int2)(x, y) + (int2)(dstOffset.x, dstOffset.y); const uint4 c = read_imageui(input, srcCoord); write_imageui(output, dstCoord, c); } )===" compute-runtime-22.14.22890/shared/source/built_ins/kernels/copy_image_to_image3d.builtin_kernel000066400000000000000000000011601422164147700327100ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( #pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable __kernel void CopyImageToImage3d( __read_only image3d_t input, __write_only image3d_t output, int4 srcOffset, int4 dstOffset) { const int x = get_global_id(0); const int y = get_global_id(1); const int z = get_global_id(2); const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset; const int4 dstCoord = (int4)(x, y, z, 0) + dstOffset; const uint4 c = read_imageui(input, srcCoord); write_imageui(output, dstCoord, c); } )===" compute-runtime-22.14.22890/shared/source/built_ins/kernels/copy_kernel_timestamps.builtin_kernel000066400000000000000000000111611422164147700332630ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( void SetDstData(__global ulong* dst, uint currentOffset, ulong contextStart, ulong globalStart, ulong contextEnd, ulong globalEnd, uint useOnlyGlobalTimestamps) { dst[currentOffset] = globalStart; dst[currentOffset + 1] = globalEnd; if (useOnlyGlobalTimestamps != 0) { dst[currentOffset + 2] = globalStart; dst[currentOffset + 3] = globalEnd; } else { dst[currentOffset + 2] = contextStart; dst[currentOffset + 3] = contextEnd; } } ulong GetTimestampValue(ulong srcPtr, ulong timestampSizeInDw, uint index) { if(timestampSizeInDw == 1) { __global uint *src = (__global uint *) srcPtr; return src[index]; } else if(timestampSizeInDw == 2) { __global ulong *src = (__global ulong *) srcPtr; return src[index]; } return 0; } __kernel void QueryKernelTimestamps(__global ulong* srcEvents, __global ulong* dst, uint useOnlyGlobalTimestamps) { uint gid = get_global_id(0); uint currentOffset = gid * 4; dst[currentOffset] = 0; dst[currentOffset + 1] = 0; dst[currentOffset + 2] = 0; dst[currentOffset + 3] = 0; uint eventOffsetData = 3 * gid; ulong srcPtr = srcEvents[eventOffsetData]; ulong packetUsed = srcEvents[eventOffsetData + 1]; ulong timestampSizeInDw = srcEvents[eventOffsetData + 2]; ulong contextStart = GetTimestampValue(srcPtr, timestampSizeInDw, 0); ulong globalStart = GetTimestampValue(srcPtr, timestampSizeInDw, 1); ulong contextEnd = GetTimestampValue(srcPtr, timestampSizeInDw, 2); ulong globalEnd = GetTimestampValue(srcPtr, timestampSizeInDw, 3); if(packetUsed > 1) { for(uint i = 1; i < packetUsed; i++) { uint timestampsOffsets = 4 * i; if(contextStart > GetTimestampValue(srcPtr, timestampSizeInDw, timestampsOffsets)) { contextStart = GetTimestampValue(srcPtr, timestampSizeInDw, timestampsOffsets); } if(globalStart > GetTimestampValue(srcPtr, timestampSizeInDw, timestampsOffsets + 1)) { globalStart = GetTimestampValue(srcPtr, timestampSizeInDw, timestampsOffsets + 1); } if(contextEnd < GetTimestampValue(srcPtr, timestampSizeInDw, timestampsOffsets + 2)) { contextEnd = GetTimestampValue(srcPtr, timestampSizeInDw, timestampsOffsets + 2); } if(globalEnd < GetTimestampValue(srcPtr, timestampSizeInDw, timestampsOffsets + 3)) { globalEnd = GetTimestampValue(srcPtr, timestampSizeInDw, timestampsOffsets + 3); } } } SetDstData(dst, currentOffset, contextStart, globalStart, contextEnd, globalEnd, useOnlyGlobalTimestamps); } __kernel void QueryKernelTimestampsWithOffsets(__global ulong* srcEvents, __global ulong* dst, __global ulong *offsets, uint useOnlyGlobalTimestamps) { uint gid = get_global_id(0); uint currentOffset = offsets[gid] / 8; dst[currentOffset] = 0; dst[currentOffset + 1] = 0; dst[currentOffset + 2] = 0; dst[currentOffset + 3] = 0; uint eventOffsetData = 3 * gid; ulong srcPtr = srcEvents[eventOffsetData]; ulong packetUsed = srcEvents[eventOffsetData + 1]; ulong timestampSizeInDw = srcEvents[eventOffsetData + 2]; ulong contextStart = GetTimestampValue(srcPtr, timestampSizeInDw, 0); ulong globalStart = GetTimestampValue(srcPtr, timestampSizeInDw, 1); ulong contextEnd = GetTimestampValue(srcPtr, timestampSizeInDw, 2); ulong globalEnd = GetTimestampValue(srcPtr, timestampSizeInDw, 3); if(packetUsed > 1) { for(uint i = 1; i < packetUsed; i++) { uint timestampsOffsets = 4 * i; if(contextStart > GetTimestampValue(srcPtr, timestampSizeInDw, timestampsOffsets)) { contextStart = GetTimestampValue(srcPtr, timestampSizeInDw, timestampsOffsets); } if(globalStart > GetTimestampValue(srcPtr, timestampSizeInDw, timestampsOffsets + 1)) { globalStart = GetTimestampValue(srcPtr, timestampSizeInDw, timestampsOffsets + 1); } if(contextEnd < GetTimestampValue(srcPtr, timestampSizeInDw, timestampsOffsets + 2)) { contextEnd = GetTimestampValue(srcPtr, timestampSizeInDw, timestampsOffsets + 2); } if(globalEnd < GetTimestampValue(srcPtr, timestampSizeInDw, timestampsOffsets + 3)) { globalEnd = GetTimestampValue(srcPtr, timestampSizeInDw, timestampsOffsets + 3); } } } SetDstData(dst, currentOffset, contextStart, globalStart, contextEnd, globalEnd, useOnlyGlobalTimestamps); } )===" compute-runtime-22.14.22890/shared/source/built_ins/kernels/fill_buffer.builtin_kernel000066400000000000000000000040621422164147700307640ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( // assumption is local work size = pattern size __kernel void FillBufferBytes( __global uchar* pDst, uint dstOffsetInBytes, const __global uchar* pPattern ) { uint dstIndex = get_global_id(0) + dstOffsetInBytes; uint srcIndex = get_local_id(0); pDst[dstIndex] = pPattern[srcIndex]; } __kernel void FillBufferLeftLeftover( __global uchar* pDst, uint dstOffsetInBytes, const __global uchar* pPattern, const uint patternSizeInEls ) { uint gid = get_global_id(0); pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ]; } __kernel void FillBufferMiddle( __global uchar* pDst, uint dstOffsetInBytes, const __global uint* pPattern, const uint patternSizeInEls ) { uint gid = get_global_id(0); ((__global uint*)(pDst + dstOffsetInBytes))[gid] = pPattern[ gid & (patternSizeInEls - 1) ]; } __kernel void FillBufferRightLeftover( __global uchar* pDst, uint dstOffsetInBytes, const __global uchar* pPattern, const uint patternSizeInEls ) { uint gid = get_global_id(0); pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ]; } __kernel void FillBufferImmediate( __global uchar* ptr, uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment const uint value) { uint dstIndex = get_global_id(0); __global uchar* pDst = (__global uchar*)ptr + dstSshOffset; pDst[dstIndex] = value; } __kernel void FillBufferSSHOffset( __global uchar* ptr, uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment const __global uchar* pPattern, uint patternSshOffset // Offset needed in case pPattern has been adjusted for SSH alignment ) { uint dstIndex = get_global_id(0); uint srcIndex = get_local_id(0); __global uchar* pDst = (__global uchar*)ptr + dstSshOffset; __global uchar* pSrc = (__global uchar*)pPattern + patternSshOffset; pDst[dstIndex] = pSrc[srcIndex]; } )==="compute-runtime-22.14.22890/shared/source/built_ins/kernels/fill_buffer_stateless.builtin_kernel000066400000000000000000000041141422164147700330510ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( // assumption is local work size = pattern size __kernel void FillBufferBytes( __global uchar* pDst, ulong dstOffsetInBytes, const __global uchar* pPattern ) { size_t dstIndex = get_global_id(0) + dstOffsetInBytes; size_t srcIndex = get_local_id(0); pDst[dstIndex] = pPattern[srcIndex]; } __kernel void FillBufferLeftLeftover( __global uchar* pDst, ulong dstOffsetInBytes, const __global uchar* pPattern, const ulong patternSizeInEls ) { size_t gid = get_global_id(0); pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ]; } __kernel void FillBufferMiddle( __global uchar* pDst, ulong dstOffsetInBytes, const __global uint* pPattern, const ulong patternSizeInEls ) { size_t gid = get_global_id(0); ((__global uint*)(pDst + dstOffsetInBytes))[gid] = pPattern[ gid & (patternSizeInEls - 1) ]; } __kernel void FillBufferRightLeftover( __global uchar* pDst, ulong dstOffsetInBytes, const __global uchar* pPattern, const ulong patternSizeInEls ) { size_t gid = get_global_id(0); pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ]; } __kernel void FillBufferImmediate( __global uchar* ptr, ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment const uint value) { size_t dstIndex = get_global_id(0); __global uchar* pDst = (__global uchar*)ptr + dstSshOffset; pDst[dstIndex] = value; } __kernel void FillBufferSSHOffset( __global uchar* ptr, ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment const __global uchar* pPattern, ulong patternSshOffset // Offset needed in case pPattern has been adjusted for SSH alignment ) { size_t dstIndex = get_global_id(0); size_t srcIndex = get_local_id(0); __global uchar* pDst = (__global uchar*)ptr + dstSshOffset; __global uchar* pSrc = (__global uchar*)pPattern + patternSshOffset; pDst[dstIndex] = pSrc[srcIndex]; } )==="compute-runtime-22.14.22890/shared/source/built_ins/kernels/fill_image1d.builtin_kernel000066400000000000000000000005121422164147700310160ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( __kernel void FillImage1d( __write_only image1d_t output, uint4 color, int4 dstOffset) { const int x = get_global_id(0); const int dstCoord = x + dstOffset.x; write_imageui(output, dstCoord, color); } )===" compute-runtime-22.14.22890/shared/source/built_ins/kernels/fill_image2d.builtin_kernel000066400000000000000000000006171422164147700310250ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( __kernel void FillImage2d( __write_only image2d_t output, uint4 color, int4 dstOffset) { const int x = get_global_id(0); const int y = get_global_id(1); const int2 dstCoord = (int2)(x, y) + (int2)(dstOffset.x, dstOffset.y); write_imageui(output, dstCoord, color); } )===" compute-runtime-22.14.22890/shared/source/built_ins/kernels/fill_image3d.builtin_kernel000066400000000000000000000007341422164147700310260ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( #pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable __kernel void FillImage3d( __write_only image3d_t output, uint4 color, int4 dstOffset) { const int x = get_global_id(0); const int y = get_global_id(1); const int z = get_global_id(2); const int4 dstCoord = (int4)(x, y, z, 0) + dstOffset; write_imageui(output, dstCoord, color); } )===" compute-runtime-22.14.22890/shared/source/built_ins/registry/000077500000000000000000000000001422164147700237605ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/built_ins/registry/CMakeLists.txt000066400000000000000000000015511422164147700265220ustar00rootroot00000000000000# # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # add_library(${BUILTINS_SOURCES_LIB_NAME} OBJECT EXCLUDE_FROM_ALL CMakeLists.txt built_ins_registry.h register_copy_kernels_source.cpp ) set_target_properties(${BUILTINS_SOURCES_LIB_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON) set_target_properties(${BUILTINS_SOURCES_LIB_NAME} PROPERTIES FOLDER "${SHARED_SOURCE_PROJECTS_FOLDER}/${SHARED_BUILTINS_PROJECTS_FOLDER}") target_include_directories(${BUILTINS_SOURCES_LIB_NAME} PRIVATE ${ENGINE_NODE_DIR} ${KHRONOS_HEADERS_DIR} ${KHRONOS_GL_HEADERS_DIR} ${NEO__GMM_INCLUDE_DIR} ${NEO__IGC_INCLUDE_DIR} ${THIRD_PARTY_DIR} ) add_subdirectories() compute-runtime-22.14.22890/shared/source/built_ins/registry/built_ins_registry.h000066400000000000000000000012601422164147700300500ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include #include namespace NEO { struct RegisterEmbeddedResource { RegisterEmbeddedResource(const char *name, const char *resource, size_t resourceLength) { auto &storageRegistry = EmbeddedStorageRegistry::getInstance(); storageRegistry.store(name, createBuiltinResource(resource, resourceLength)); } RegisterEmbeddedResource(const char *name, std::string &&resource) : RegisterEmbeddedResource(name, resource.data(), resource.size() + 1) { } }; } // namespace NEO compute-runtime-22.14.22890/shared/source/built_ins/registry/register_copy_kernels_source.cpp000066400000000000000000000137731422164147700324600ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/registry/built_ins_registry.h" #include namespace NEO { static RegisterEmbeddedResource registerCopyBufferToBufferSrc( createBuiltinResourceName( EBuiltInOps::CopyBufferToBuffer, BuiltinCode::getExtension(BuiltinCode::ECodeType::Source)) .c_str(), std::string( #include "shared/source/built_ins/kernels/copy_buffer_to_buffer.builtin_kernel" )); static RegisterEmbeddedResource registerCopyBufferToBufferStatelessSrc( createBuiltinResourceName( EBuiltInOps::CopyBufferToBufferStateless, BuiltinCode::getExtension(BuiltinCode::ECodeType::Source)) .c_str(), std::string( #include "shared/source/built_ins/kernels/copy_buffer_to_buffer_stateless.builtin_kernel" )); static RegisterEmbeddedResource registerCopyBufferRectSrc( createBuiltinResourceName( EBuiltInOps::CopyBufferRect, BuiltinCode::getExtension(BuiltinCode::ECodeType::Source)) .c_str(), std::string( #include "shared/source/built_ins/kernels/copy_buffer_rect.builtin_kernel" )); static RegisterEmbeddedResource registerCopyBufferRectStatelessSrc( createBuiltinResourceName( EBuiltInOps::CopyBufferRectStateless, BuiltinCode::getExtension(BuiltinCode::ECodeType::Source)) .c_str(), std::string( #include "shared/source/built_ins/kernels/copy_buffer_rect_stateless.builtin_kernel" )); static RegisterEmbeddedResource registerFillBufferSrc( createBuiltinResourceName( EBuiltInOps::FillBuffer, BuiltinCode::getExtension(BuiltinCode::ECodeType::Source)) .c_str(), std::string( #include "shared/source/built_ins/kernels/fill_buffer.builtin_kernel" )); static RegisterEmbeddedResource registerFillBufferStatelessSrc( createBuiltinResourceName( EBuiltInOps::FillBufferStateless, BuiltinCode::getExtension(BuiltinCode::ECodeType::Source)) .c_str(), std::string( #include "shared/source/built_ins/kernels/fill_buffer_stateless.builtin_kernel" )); static RegisterEmbeddedResource registerCopyBufferToImage3dSrc( createBuiltinResourceName( EBuiltInOps::CopyBufferToImage3d, BuiltinCode::getExtension(BuiltinCode::ECodeType::Source)) .c_str(), std::string( #include "shared/source/built_ins/kernels/copy_buffer_to_image3d.builtin_kernel" )); static RegisterEmbeddedResource registerCopyBufferToImage3dStatelessSrc( createBuiltinResourceName( EBuiltInOps::CopyBufferToImage3dStateless, BuiltinCode::getExtension(BuiltinCode::ECodeType::Source)) .c_str(), std::string( #include "shared/source/built_ins/kernels/copy_buffer_to_image3d_stateless.builtin_kernel" )); static RegisterEmbeddedResource registerCopyImage3dToBufferSrc( createBuiltinResourceName( EBuiltInOps::CopyImage3dToBuffer, BuiltinCode::getExtension(BuiltinCode::ECodeType::Source)) .c_str(), std::string( #include "shared/source/built_ins/kernels/copy_image3d_to_buffer.builtin_kernel" )); static RegisterEmbeddedResource registerCopyImage3dToBufferStatelessSrc( createBuiltinResourceName( EBuiltInOps::CopyImage3dToBufferStateless, BuiltinCode::getExtension(BuiltinCode::ECodeType::Source)) .c_str(), std::string( #include "shared/source/built_ins/kernels/copy_image3d_to_buffer_stateless.builtin_kernel" )); static RegisterEmbeddedResource registerCopyImageToImage1dSrc( createBuiltinResourceName( EBuiltInOps::CopyImageToImage1d, BuiltinCode::getExtension(BuiltinCode::ECodeType::Source)) .c_str(), std::string( #include "shared/source/built_ins/kernels/copy_image_to_image1d.builtin_kernel" )); static RegisterEmbeddedResource registerCopyImageToImage2dSrc( createBuiltinResourceName( EBuiltInOps::CopyImageToImage2d, BuiltinCode::getExtension(BuiltinCode::ECodeType::Source)) .c_str(), std::string( #include "shared/source/built_ins/kernels/copy_image_to_image2d.builtin_kernel" )); static RegisterEmbeddedResource registerCopyImageToImage3dSrc( createBuiltinResourceName( EBuiltInOps::CopyImageToImage3d, BuiltinCode::getExtension(BuiltinCode::ECodeType::Source)) .c_str(), std::string( #include "shared/source/built_ins/kernels/copy_image_to_image3d.builtin_kernel" )); static RegisterEmbeddedResource registerFillImage1dSrc( createBuiltinResourceName( EBuiltInOps::FillImage1d, BuiltinCode::getExtension(BuiltinCode::ECodeType::Source)) .c_str(), std::string( #include "shared/source/built_ins/kernels/fill_image1d.builtin_kernel" )); static RegisterEmbeddedResource registerFillImage2dSrc( createBuiltinResourceName( EBuiltInOps::FillImage2d, BuiltinCode::getExtension(BuiltinCode::ECodeType::Source)) .c_str(), std::string( #include "shared/source/built_ins/kernels/fill_image2d.builtin_kernel" )); static RegisterEmbeddedResource registerFillImage3dSrc( createBuiltinResourceName( EBuiltInOps::FillImage3d, BuiltinCode::getExtension(BuiltinCode::ECodeType::Source)) .c_str(), std::string( #include "shared/source/built_ins/kernels/fill_image3d.builtin_kernel" )); static RegisterEmbeddedResource registerAuxTranslationSrc( createBuiltinResourceName( EBuiltInOps::AuxTranslation, BuiltinCode::getExtension(BuiltinCode::ECodeType::Source)) .c_str(), std::string( #include "shared/source/built_ins/kernels/aux_translation.builtin_kernel" )); static RegisterEmbeddedResource registerCopyKernelTimestampsSrc( createBuiltinResourceName( EBuiltInOps::FillImage3d, BuiltinCode::getExtension(BuiltinCode::ECodeType::Source)) .c_str(), std::string( #include "shared/source/built_ins/kernels/copy_kernel_timestamps.builtin_kernel" )); } // namespace NEO compute-runtime-22.14.22890/shared/source/built_ins/sip.cpp000066400000000000000000000243261422164147700234160ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/sip.h" #include "shared/source/built_ins/built_ins.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/device/device.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/helpers/string.h" #include "shared/source/memory_manager/allocation_properties.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/utilities/io_functions.h" #include "common/StateSaveAreaHeader.h" namespace NEO { SipClassType SipKernel::classType = SipClassType::Init; std::vector readFile(const std::string &fileName, size_t &retSize) { std::vector retBuf; FILE *fileDescriptor = nullptr; long int size = 0; size_t bytesRead = 0u; retSize = 0; fileDescriptor = IoFunctions::fopenPtr(fileName.c_str(), "rb"); if (fileDescriptor == NULL) { return retBuf; } IoFunctions::fseekPtr(fileDescriptor, 0, SEEK_END); size = IoFunctions::ftellPtr(fileDescriptor); IoFunctions::rewindPtr(fileDescriptor); retBuf.resize(size); bytesRead = IoFunctions::freadPtr(retBuf.data(), 1, size, fileDescriptor); IoFunctions::fclosePtr(fileDescriptor); if (static_cast(bytesRead) != size || bytesRead == 0u) { retBuf.clear(); } else { retSize = bytesRead; } return retBuf; } SipKernel::~SipKernel() = default; SipKernel::SipKernel(SipKernelType type, GraphicsAllocation *sipAlloc, std::vector ssah) : stateSaveAreaHeader(ssah), sipAllocation(sipAlloc), type(type) { } GraphicsAllocation *SipKernel::getSipAllocation() const { return sipAllocation; } const std::vector &SipKernel::getStateSaveAreaHeader() const { return stateSaveAreaHeader; } size_t SipKernel::getStateSaveAreaSize(Device *device) const { auto &hwInfo = device->getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); auto maxDbgSurfaceSize = hwHelper.getSipKernelMaxDbgSurfaceSize(hwInfo); auto stateSaveAreaHeader = getStateSaveAreaHeader(); if (stateSaveAreaHeader.empty()) { return maxDbgSurfaceSize; } if (strcmp(stateSaveAreaHeader.data(), "tssarea")) { return maxDbgSurfaceSize; } auto hdr = reinterpret_cast(stateSaveAreaHeader.data()); DEBUG_BREAK_IF(hdr->versionHeader.size * 8 != sizeof(SIP::StateSaveAreaHeader)); auto stateSaveAreaSize = hdr->regHeader.num_slices * hdr->regHeader.num_subslices_per_slice * hdr->regHeader.num_eus_per_subslice * hdr->regHeader.num_threads_per_eu * hdr->regHeader.state_save_size + hdr->versionHeader.size * 8 + hdr->regHeader.state_area_offset; return alignUp(stateSaveAreaSize, MemoryConstants::pageSize); } SipKernelType SipKernel::getSipKernelType(Device &device) { bool debuggingEnabled = device.getDebugger() != nullptr || device.isDebuggerActive(); return getSipKernelType(device, debuggingEnabled); } SipKernelType SipKernel::getSipKernelType(Device &device, bool debuggingEnabled) { auto &hwHelper = HwHelper::get(device.getHardwareInfo().platform.eRenderCoreFamily); return hwHelper.getSipKernelType(debuggingEnabled); } bool SipKernel::initBuiltinsSipKernel(SipKernelType type, Device &device) { device.getBuiltIns()->getSipKernel(type, device); return true; } bool SipKernel::initRawBinaryFromFileKernel(SipKernelType type, Device &device, std::string &fileName) { uint32_t sipIndex = static_cast(type); uint32_t rootDeviceIndex = device.getRootDeviceIndex(); if (device.getExecutionEnvironment()->rootDeviceEnvironments[rootDeviceIndex]->sipKernels[sipIndex].get() != nullptr) { return true; } size_t bytesRead = 0u; auto fileData = readFile(fileName, bytesRead); if (bytesRead) { void *alignedBuffer = alignedMalloc(bytesRead, MemoryConstants::pageSize); memcpy_s(alignedBuffer, bytesRead, fileData.data(), bytesRead); const auto allocType = AllocationType::KERNEL_ISA_INTERNAL; AllocationProperties properties = {rootDeviceIndex, bytesRead, allocType, device.getDeviceBitfield()}; properties.flags.use32BitFrontWindow = false; auto sipAllocation = device.getMemoryManager()->allocateGraphicsMemoryWithProperties(properties); if (sipAllocation == nullptr) { alignedFree(alignedBuffer); return false; } auto &hwInfo = device.getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); MemoryTransferHelper::transferMemoryToAllocation(hwHelper.isBlitCopyRequiredForLocalMemory(hwInfo, *sipAllocation), device, sipAllocation, 0, alignedBuffer, bytesRead); alignedFree(alignedBuffer); auto headerFilename = createHeaderFilename(fileName); std::vector stateSaveAreaHeader = readStateSaveAreaHeaderFromFile(headerFilename); device.getExecutionEnvironment()->rootDeviceEnvironments[rootDeviceIndex]->sipKernels[sipIndex] = std::make_unique(type, sipAllocation, std::move(stateSaveAreaHeader)); return true; } return false; } std::vector SipKernel::readStateSaveAreaHeaderFromFile(const std::string &fileName) { std::vector data; size_t bytesRead = 0u; data = readFile(fileName, bytesRead); return data; } std::string SipKernel::createHeaderFilename(const std::string &fileName) { std::string_view coreName(fileName); auto extensionPos = coreName.find('.'); std::string ext = ""; if (extensionPos != coreName.npos) { ext = coreName.substr(extensionPos, coreName.size() - extensionPos); coreName.remove_suffix(coreName.size() - extensionPos); } std::string headerFilename(coreName); headerFilename += "_header" + ext; return headerFilename; } bool SipKernel::initHexadecimalArraySipKernel(SipKernelType type, Device &device) { uint32_t sipIndex = static_cast(type); uint32_t rootDeviceIndex = device.getRootDeviceIndex(); auto sipKenel = device.getExecutionEnvironment()->rootDeviceEnvironments[rootDeviceIndex]->sipKernels[sipIndex].get(); if (sipKenel != nullptr) { return true; } uint32_t *sipKernelBinary = nullptr; size_t kernelBinarySize = 0u; auto &hwInfo = device.getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); hwHelper.setSipKernelData(sipKernelBinary, kernelBinarySize); const auto allocType = AllocationType::KERNEL_ISA_INTERNAL; AllocationProperties properties = {rootDeviceIndex, kernelBinarySize, allocType, device.getDeviceBitfield()}; properties.flags.use32BitFrontWindow = false; auto sipAllocation = device.getMemoryManager()->allocateGraphicsMemoryWithProperties(properties); if (sipAllocation == nullptr) { return false; } MemoryTransferHelper::transferMemoryToAllocation(hwHelper.isBlitCopyRequiredForLocalMemory(hwInfo, *sipAllocation), device, sipAllocation, 0, sipKernelBinary, kernelBinarySize); std::vector emptyStateSaveAreaHeader; device.getExecutionEnvironment()->rootDeviceEnvironments[rootDeviceIndex]->sipKernels[sipIndex] = std::make_unique(type, sipAllocation, std::move(emptyStateSaveAreaHeader)); return true; } void SipKernel::freeSipKernels(RootDeviceEnvironment *rootDeviceEnvironment, MemoryManager *memoryManager) { for (auto &sipKernel : rootDeviceEnvironment->sipKernels) { if (sipKernel.get()) { memoryManager->freeGraphicsMemory(sipKernel->getSipAllocation()); } } } void SipKernel::selectSipClassType(std::string &fileName, const HardwareInfo &hwInfo) { const std::string unknown("unk"); if (fileName.compare(unknown) == 0) { SipKernel::classType = HwHelper::get(hwInfo.platform.eRenderCoreFamily).isSipKernelAsHexadecimalArrayPreferred() ? SipClassType::HexadecimalHeaderFile : SipClassType::Builtins; } else { SipKernel::classType = SipClassType::RawBinaryFromFile; } } bool SipKernel::initSipKernelImpl(SipKernelType type, Device &device) { std::string fileName = DebugManager.flags.LoadBinarySipFromFile.get(); SipKernel::selectSipClassType(fileName, *device.getRootDeviceEnvironment().getHardwareInfo()); switch (SipKernel::classType) { case SipClassType::RawBinaryFromFile: return SipKernel::initRawBinaryFromFileKernel(type, device, fileName); case SipClassType::HexadecimalHeaderFile: return SipKernel::initHexadecimalArraySipKernel(type, device); default: return SipKernel::initBuiltinsSipKernel(type, device); } } const SipKernel &SipKernel::getSipKernelImpl(Device &device) { auto sipType = SipKernel::getSipKernelType(device); switch (SipKernel::classType) { case SipClassType::RawBinaryFromFile: case SipClassType::HexadecimalHeaderFile: return *device.getRootDeviceEnvironment().sipKernels[static_cast(sipType)].get(); default: return device.getBuiltIns()->getSipKernel(sipType, device); } } const SipKernel &SipKernel::getBindlessDebugSipKernel(Device &device) { auto debugSipType = SipKernelType::DbgBindless; SipKernel::initSipKernelImpl(debugSipType, device); switch (SipKernel::classType) { case SipClassType::RawBinaryFromFile: return *device.getRootDeviceEnvironment().sipKernels[static_cast(debugSipType)].get(); default: return device.getBuiltIns()->getSipKernel(debugSipType, device); } } } // namespace NEO compute-runtime-22.14.22890/shared/source/built_ins/sip.h000066400000000000000000000045721422164147700230640ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/sip_kernel_type.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/program/program_info.h" #include #include namespace NEO { class Device; class GraphicsAllocation; class MemoryManager; struct RootDeviceEnvironment; class SipKernel { public: SipKernel(SipKernelType type, GraphicsAllocation *sipAlloc, std::vector ssah); SipKernel(const SipKernel &) = delete; SipKernel &operator=(const SipKernel &) = delete; SipKernel(SipKernel &&) = delete; SipKernel &operator=(SipKernel &&) = delete; virtual ~SipKernel(); SipKernelType getType() const { return type; } MOCKABLE_VIRTUAL GraphicsAllocation *getSipAllocation() const; MOCKABLE_VIRTUAL const std::vector &getStateSaveAreaHeader() const; MOCKABLE_VIRTUAL size_t getStateSaveAreaSize(Device *device) const; static bool initSipKernel(SipKernelType type, Device &device); static void freeSipKernels(RootDeviceEnvironment *rootDeviceEnvironment, MemoryManager *memoryManager); static const SipKernel &getSipKernel(Device &device); static const SipKernel &getBindlessDebugSipKernel(Device &device); static SipKernelType getSipKernelType(Device &device); static SipKernelType getSipKernelType(Device &device, bool debuggingEnable); static SipClassType classType; enum class COMMAND : uint32_t { RESUME, READY, SLM_READ, SLM_WRITE }; protected: static bool initSipKernelImpl(SipKernelType type, Device &device); static const SipKernel &getSipKernelImpl(Device &device); static bool initBuiltinsSipKernel(SipKernelType type, Device &device); static bool initRawBinaryFromFileKernel(SipKernelType type, Device &device, std::string &fileName); static std::vector readStateSaveAreaHeaderFromFile(const std::string &fileName); static std::string createHeaderFilename(const std::string &filename); static bool initHexadecimalArraySipKernel(SipKernelType type, Device &device); static void selectSipClassType(std::string &fileName, const HardwareInfo &hwInfo); const std::vector stateSaveAreaHeader; GraphicsAllocation *sipAllocation = nullptr; SipKernelType type = SipKernelType::COUNT; }; } // namespace NEO compute-runtime-22.14.22890/shared/source/built_ins/sip_init.cpp000066400000000000000000000006201422164147700244300ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/sip.h" namespace NEO { bool SipKernel::initSipKernel(SipKernelType type, Device &device) { return SipKernel::initSipKernelImpl(type, device); } const SipKernel &SipKernel::getSipKernel(Device &device) { return SipKernel::getSipKernelImpl(device); } } // namespace NEO compute-runtime-22.14.22890/shared/source/built_ins/sip_kernel_type.h000066400000000000000000000006201422164147700254530ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include namespace NEO { enum class SipKernelType : std::uint32_t { Csr = 0, DbgCsr, DbgCsrLocal, DbgBindless, COUNT }; enum class SipClassType : std::uint32_t { Init = 0, Builtins, RawBinaryFromFile, HexadecimalHeaderFile }; } // namespace NEO compute-runtime-22.14.22890/shared/source/built_ins/unknown_built_in_name.cpp000066400000000000000000000004051422164147700271770ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" namespace NEO { const char *getUnknownBuiltinAsString(EBuiltInOps::Type builtin) { return "unknown"; } } // namespace NEO compute-runtime-22.14.22890/shared/source/command_container/000077500000000000000000000000001422164147700236005ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/command_container/CMakeLists.txt000066400000000000000000000034551422164147700263470ustar00rootroot00000000000000# # Copyright (C) 2019-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(NEO_CORE_COMMAND_CONTAINER ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cmdcontainer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cmdcontainer.h ${CMAKE_CURRENT_SOURCE_DIR}/command_encoder.h ${CMAKE_CURRENT_SOURCE_DIR}/command_encoder.inl ${CMAKE_CURRENT_SOURCE_DIR}/command_encoder_bdw_and_later.inl ${CMAKE_CURRENT_SOURCE_DIR}/command_encoder_tgllp_and_later.inl ${CMAKE_CURRENT_SOURCE_DIR}/encode_compute_mode_bdw_and_later.inl ${CMAKE_CURRENT_SOURCE_DIR}/encode_compute_mode_tgllp_and_later.inl ${CMAKE_CURRENT_SOURCE_DIR}/implicit_scaling.cpp ${CMAKE_CURRENT_SOURCE_DIR}/implicit_scaling.h ${CMAKE_CURRENT_SOURCE_DIR}/definitions/encode_surface_state_args_base.h ${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}encode_surface_state.inl ${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}encode_surface_state_args.h ) if(SUPPORT_XEHP_AND_LATER) list(APPEND NEO_CORE_COMMAND_CONTAINER ${CMAKE_CURRENT_SOURCE_DIR}/command_encoder_raytracing_xehp_and_later.inl ${CMAKE_CURRENT_SOURCE_DIR}/command_encoder_xehp_and_later.inl ${CMAKE_CURRENT_SOURCE_DIR}/implicit_scaling_xehp_and_later.inl ${CMAKE_CURRENT_SOURCE_DIR}/walker_partition_interface.h ${CMAKE_CURRENT_SOURCE_DIR}/walker_partition_xehp_and_later.h ) endif() if(SUPPORT_DG2_AND_LATER) list(APPEND NEO_CORE_COMMAND_CONTAINER ${CMAKE_CURRENT_SOURCE_DIR}/command_encoder_xe_hpg_core_and_later.inl ) endif() if(SUPPORT_PVC_AND_LATER) list(APPEND NEO_CORE_COMMAND_CONTAINER ${CMAKE_CURRENT_SOURCE_DIR}/memory_fence_encoder.h ) endif() set_property(GLOBAL PROPERTY NEO_CORE_COMMAND_CONTAINER ${NEO_CORE_COMMAND_CONTAINER}) add_subdirectories() compute-runtime-22.14.22890/shared/source/command_container/cmdcontainer.cpp000066400000000000000000000326631422164147700267640ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/cmdcontainer.h" #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/device/device.h" #include "shared/source/helpers/api_specific_config.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/heap_helper.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/indirect_heap/indirect_heap.h" #include "shared/source/memory_manager/allocations_list.h" #include "shared/source/memory_manager/memory_manager.h" namespace NEO { CommandContainer::~CommandContainer() { if (!device) { DEBUG_BREAK_IF(device); return; } this->handleCmdBufferAllocations(0u); for (auto allocationIndirectHeap : allocationIndirectHeaps) { if (heapHelper) { heapHelper->storeHeapAllocation(allocationIndirectHeap); } } for (auto deallocation : deallocationContainer) { if (((deallocation->getAllocationType() == AllocationType::INTERNAL_HEAP) || (deallocation->getAllocationType() == AllocationType::LINEAR_STREAM))) { getHeapHelper()->storeHeapAllocation(deallocation); } } } CommandContainer::CommandContainer() { for (auto &indirectHeap : indirectHeaps) { indirectHeap = nullptr; } for (auto &allocationIndirectHeap : allocationIndirectHeaps) { allocationIndirectHeap = nullptr; } } CommandContainer::CommandContainer(uint32_t maxNumAggregatedIdds) : CommandContainer() { numIddsPerBlock = maxNumAggregatedIdds; } ErrorCode CommandContainer::initialize(Device *device, AllocationsList *reusableAllocationList, bool requireHeaps) { this->device = device; this->reusableAllocationList = reusableAllocationList; size_t alignedSize = alignUp(totalCmdBufferSize, MemoryConstants::pageSize64k); auto cmdBufferAllocation = this->obtainNextCommandBufferAllocation(); if (!cmdBufferAllocation) { return ErrorCode::OUT_OF_DEVICE_MEMORY; } cmdBufferAllocations.push_back(cmdBufferAllocation); const auto &hardwareInfo = device->getHardwareInfo(); auto &hwHelper = NEO::HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); commandStream = std::make_unique(cmdBufferAllocation->getUnderlyingBuffer(), alignedSize - cmdBufferReservedSize, this, hwHelper.getBatchBufferEndSize()); commandStream->replaceGraphicsAllocation(cmdBufferAllocation); if (!getFlushTaskUsedForImmediate()) { addToResidencyContainer(cmdBufferAllocation); } if (requireHeaps) { constexpr size_t heapSize = 65536u; heapHelper = std::unique_ptr(new HeapHelper(device->getMemoryManager(), device->getDefaultEngine().commandStreamReceiver->getInternalAllocationStorage(), device->getNumGenericSubDevices() > 1u)); for (uint32_t i = 0; i < IndirectHeap::Type::NUM_TYPES; i++) { if (NEO::ApiSpecificConfig::getBindlessConfiguration() && i != IndirectHeap::Type::INDIRECT_OBJECT) { continue; } if (!hardwareInfo.capabilityTable.supportsImages && IndirectHeap::Type::DYNAMIC_STATE == i) { continue; } allocationIndirectHeaps[i] = heapHelper->getHeapAllocation(i, heapSize, alignedSize, device->getRootDeviceIndex()); if (!allocationIndirectHeaps[i]) { return ErrorCode::OUT_OF_DEVICE_MEMORY; } residencyContainer.push_back(allocationIndirectHeaps[i]); bool requireInternalHeap = (IndirectHeap::Type::INDIRECT_OBJECT == i); indirectHeaps[i] = std::make_unique(allocationIndirectHeaps[i], requireInternalHeap); if (i == IndirectHeap::Type::SURFACE_STATE) { indirectHeaps[i]->getSpace(reservedSshSize); } } indirectObjectHeapBaseAddress = device->getMemoryManager()->getInternalHeapBaseAddress(device->getRootDeviceIndex(), allocationIndirectHeaps[IndirectHeap::Type::INDIRECT_OBJECT]->isAllocatedInLocalMemoryPool()); instructionHeapBaseAddress = device->getMemoryManager()->getInternalHeapBaseAddress(device->getRootDeviceIndex(), device->getMemoryManager()->isLocalMemoryUsedForIsa(device->getRootDeviceIndex())); iddBlock = nullptr; nextIddInBlock = this->getNumIddPerBlock(); } return ErrorCode::SUCCESS; } void CommandContainer::addToResidencyContainer(GraphicsAllocation *alloc) { if (alloc == nullptr) { return; } this->residencyContainer.push_back(alloc); } void CommandContainer::removeDuplicatesFromResidencyContainer() { std::sort(this->residencyContainer.begin(), this->residencyContainer.end()); this->residencyContainer.erase(std::unique(this->residencyContainer.begin(), this->residencyContainer.end()), this->residencyContainer.end()); } void CommandContainer::reset() { setDirtyStateForAllHeaps(true); slmSize = std::numeric_limits::max(); getResidencyContainer().clear(); getDeallocationContainer().clear(); sshAllocations.clear(); this->handleCmdBufferAllocations(1u); cmdBufferAllocations.erase(cmdBufferAllocations.begin() + 1, cmdBufferAllocations.end()); commandStream->replaceBuffer(cmdBufferAllocations[0]->getUnderlyingBuffer(), defaultListCmdBufferSize); commandStream->replaceGraphicsAllocation(cmdBufferAllocations[0]); addToResidencyContainer(commandStream->getGraphicsAllocation()); for (auto &indirectHeap : indirectHeaps) { if (indirectHeap != nullptr) { indirectHeap->replaceBuffer(indirectHeap->getCpuBase(), indirectHeap->getMaxAvailableSpace()); addToResidencyContainer(indirectHeap->getGraphicsAllocation()); } } if (indirectHeaps[IndirectHeap::Type::SURFACE_STATE] != nullptr) { indirectHeaps[IndirectHeap::Type::SURFACE_STATE]->getSpace(reservedSshSize); } iddBlock = nullptr; nextIddInBlock = this->getNumIddPerBlock(); lastPipelineSelectModeRequired = false; lastSentUseGlobalAtomics = false; } void *CommandContainer::getHeapSpaceAllowGrow(HeapType heapType, size_t size) { auto indirectHeap = getIndirectHeap(heapType); if (indirectHeap->getAvailableSpace() < size) { size_t newSize = indirectHeap->getUsed() + indirectHeap->getAvailableSpace(); newSize *= 2; newSize = std::max(newSize, indirectHeap->getAvailableSpace() + size); newSize = alignUp(newSize, MemoryConstants::pageSize); auto oldAlloc = getIndirectHeapAllocation(heapType); auto newAlloc = getHeapHelper()->getHeapAllocation(heapType, newSize, MemoryConstants::pageSize, device->getRootDeviceIndex()); UNRECOVERABLE_IF(!oldAlloc); UNRECOVERABLE_IF(!newAlloc); auto oldBase = indirectHeap->getHeapGpuBase(); indirectHeap->replaceGraphicsAllocation(newAlloc); indirectHeap->replaceBuffer(newAlloc->getUnderlyingBuffer(), newAlloc->getUnderlyingBufferSize()); auto newBase = indirectHeap->getHeapGpuBase(); getResidencyContainer().push_back(newAlloc); getDeallocationContainer().push_back(oldAlloc); setIndirectHeapAllocation(heapType, newAlloc); if (oldBase != newBase) { setHeapDirty(heapType); } } return indirectHeap->getSpace(size); } IndirectHeap *CommandContainer::getHeapWithRequiredSizeAndAlignment(HeapType heapType, size_t sizeRequired, size_t alignment) { auto indirectHeap = getIndirectHeap(heapType); auto sizeRequested = sizeRequired; auto heapBuffer = indirectHeap->getSpace(0); if (alignment && (heapBuffer != alignUp(heapBuffer, alignment))) { sizeRequested += alignment; } if (indirectHeap->getAvailableSpace() < sizeRequested) { size_t newSize = indirectHeap->getUsed() + indirectHeap->getAvailableSpace(); newSize = alignUp(newSize, MemoryConstants::pageSize); auto oldAlloc = getIndirectHeapAllocation(heapType); auto newAlloc = getHeapHelper()->getHeapAllocation(heapType, newSize, MemoryConstants::pageSize, device->getRootDeviceIndex()); UNRECOVERABLE_IF(!oldAlloc); UNRECOVERABLE_IF(!newAlloc); auto oldBase = indirectHeap->getHeapGpuBase(); indirectHeap->replaceGraphicsAllocation(newAlloc); indirectHeap->replaceBuffer(newAlloc->getUnderlyingBuffer(), newAlloc->getUnderlyingBufferSize()); auto newBase = indirectHeap->getHeapGpuBase(); getResidencyContainer().push_back(newAlloc); getDeallocationContainer().push_back(oldAlloc); setIndirectHeapAllocation(heapType, newAlloc); if (oldBase != newBase) { setHeapDirty(heapType); } if (heapType == HeapType::SURFACE_STATE) { indirectHeap->getSpace(reservedSshSize); sshAllocations.push_back(oldAlloc); } } if (alignment) { indirectHeap->align(alignment); } return indirectHeap; } void CommandContainer::handleCmdBufferAllocations(size_t startIndex) { for (size_t i = startIndex; i < cmdBufferAllocations.size(); i++) { if (this->reusableAllocationList) { this->device->getMemoryManager()->handleFenceCompletion(cmdBufferAllocations[i]); reusableAllocationList->pushFrontOne(*cmdBufferAllocations[i]); } else { this->device->getMemoryManager()->freeGraphicsMemory(cmdBufferAllocations[i]); } } } GraphicsAllocation *CommandContainer::obtainNextCommandBufferAllocation() { size_t alignedSize = alignUp(totalCmdBufferSize, MemoryConstants::pageSize64k); GraphicsAllocation *cmdBufferAllocation = nullptr; if (this->reusableAllocationList) { cmdBufferAllocation = this->reusableAllocationList->detachAllocation(alignedSize, nullptr, nullptr, AllocationType::COMMAND_BUFFER).release(); } if (!cmdBufferAllocation) { AllocationProperties properties{device->getRootDeviceIndex(), true /* allocateMemory*/, alignedSize, AllocationType::COMMAND_BUFFER, (device->getNumGenericSubDevices() > 1u) /* multiOsContextCapable */, false, device->getDeviceBitfield()}; cmdBufferAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties); } return cmdBufferAllocation; } void CommandContainer::allocateNextCommandBuffer() { auto cmdBufferAllocation = this->obtainNextCommandBufferAllocation(); UNRECOVERABLE_IF(!cmdBufferAllocation); cmdBufferAllocations.push_back(cmdBufferAllocation); size_t alignedSize = alignUp(totalCmdBufferSize, MemoryConstants::pageSize64k); commandStream->replaceBuffer(cmdBufferAllocation->getUnderlyingBuffer(), alignedSize - cmdBufferReservedSize); commandStream->replaceGraphicsAllocation(cmdBufferAllocation); if (!getFlushTaskUsedForImmediate()) { addToResidencyContainer(cmdBufferAllocation); } } void CommandContainer::closeAndAllocateNextCommandBuffer() { auto &hwHelper = NEO::HwHelper::get(device->getHardwareInfo().platform.eRenderCoreFamily); auto bbEndSize = hwHelper.getBatchBufferEndSize(); auto ptr = commandStream->getSpace(0u); memcpy_s(ptr, bbEndSize, hwHelper.getBatchBufferEndReference(), bbEndSize); allocateNextCommandBuffer(); } void CommandContainer::prepareBindfulSsh() { if (ApiSpecificConfig::getBindlessConfiguration()) { if (allocationIndirectHeaps[IndirectHeap::Type::SURFACE_STATE] == nullptr) { size_t alignedSize = alignUp(totalCmdBufferSize, MemoryConstants::pageSize64k); constexpr size_t heapSize = 65536u; allocationIndirectHeaps[IndirectHeap::Type::SURFACE_STATE] = heapHelper->getHeapAllocation(IndirectHeap::Type::SURFACE_STATE, heapSize, alignedSize, device->getRootDeviceIndex()); UNRECOVERABLE_IF(!allocationIndirectHeaps[IndirectHeap::Type::SURFACE_STATE]); residencyContainer.push_back(allocationIndirectHeaps[IndirectHeap::Type::SURFACE_STATE]); indirectHeaps[IndirectHeap::Type::SURFACE_STATE] = std::make_unique(allocationIndirectHeaps[IndirectHeap::Type::SURFACE_STATE], false); indirectHeaps[IndirectHeap::Type::SURFACE_STATE]->getSpace(reservedSshSize); } setHeapDirty(IndirectHeap::Type::SURFACE_STATE); } } IndirectHeap *CommandContainer::getIndirectHeap(HeapType heapType) { return indirectHeaps[heapType].get(); } } // namespace NEO compute-runtime-22.14.22890/shared/source/command_container/cmdcontainer.h000066400000000000000000000113201422164147700264140ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/csr_definitions.h" #include "shared/source/helpers/heap_helper.h" #include "shared/source/helpers/non_copyable_or_moveable.h" #include "shared/source/indirect_heap/indirect_heap_type.h" #include #include #include #include namespace NEO { class Device; class GraphicsAllocation; class LinearStream; class AllocationsList; class IndirectHeap; using ResidencyContainer = std::vector; using CmdBufferContainer = std::vector; using HeapContainer = std::vector; using HeapType = IndirectHeapType; enum class ErrorCode { SUCCESS = 0, OUT_OF_DEVICE_MEMORY = 1 }; class CommandContainer : public NonCopyableOrMovableClass { public: static constexpr size_t defaultListCmdBufferSize = MemoryConstants::kiloByte * 256; static constexpr size_t cmdBufferReservedSize = MemoryConstants::cacheLineSize + CSRequirements::csOverfetchSize; static constexpr size_t totalCmdBufferSize = defaultListCmdBufferSize + cmdBufferReservedSize; CommandContainer(); CommandContainer(uint32_t maxNumAggregatedIdds); CmdBufferContainer &getCmdBufferAllocations() { return cmdBufferAllocations; } ResidencyContainer &getResidencyContainer() { return residencyContainer; } std::vector &getDeallocationContainer() { return deallocationContainer; } void addToResidencyContainer(GraphicsAllocation *alloc); void removeDuplicatesFromResidencyContainer(); LinearStream *getCommandStream() { return commandStream.get(); } IndirectHeap *getIndirectHeap(HeapType heapType); HeapHelper *getHeapHelper() { return heapHelper.get(); } GraphicsAllocation *getIndirectHeapAllocation(HeapType heapType) { return allocationIndirectHeaps[heapType]; } void setIndirectHeapAllocation(HeapType heapType, GraphicsAllocation *allocation) { allocationIndirectHeaps[heapType] = allocation; } uint64_t getInstructionHeapBaseAddress() const { return instructionHeapBaseAddress; } uint64_t getIndirectObjectHeapBaseAddress() const { return indirectObjectHeapBaseAddress; } void *getHeapSpaceAllowGrow(HeapType heapType, size_t size); ErrorCode initialize(Device *device, AllocationsList *reusableAllocationList, bool requireHeaps); void prepareBindfulSsh(); virtual ~CommandContainer(); uint32_t slmSize = std::numeric_limits::max(); uint32_t nextIddInBlock = 0; bool lastPipelineSelectModeRequired = false; bool lastSentUseGlobalAtomics = false; Device *getDevice() const { return device; } IndirectHeap *getHeapWithRequiredSizeAndAlignment(HeapType heapType, size_t sizeRequired, size_t alignment); void allocateNextCommandBuffer(); void closeAndAllocateNextCommandBuffer(); void handleCmdBufferAllocations(size_t startIndex); GraphicsAllocation *obtainNextCommandBufferAllocation(); void reset(); bool isHeapDirty(HeapType heapType) const { return (dirtyHeaps & (1u << heapType)); } bool isAnyHeapDirty() const { return dirtyHeaps != 0; } void setHeapDirty(HeapType heapType) { dirtyHeaps |= (1u << heapType); } void setDirtyStateForAllHeaps(bool dirty) { dirtyHeaps = dirty ? std::numeric_limits::max() : 0; } void setIddBlock(void *iddBlock) { this->iddBlock = iddBlock; } void *getIddBlock() { return iddBlock; } uint32_t getNumIddPerBlock() const { return numIddsPerBlock; } void setReservedSshSize(size_t reserveSize) { reservedSshSize = reserveSize; } HeapContainer sshAllocations; bool getFlushTaskUsedForImmediate() const { return isFlushTaskUsedForImmediate; } void setFlushTaskUsedForImmediate(bool flushTaskUsedForImmediate) { isFlushTaskUsedForImmediate = flushTaskUsedForImmediate; } protected: void *iddBlock = nullptr; Device *device = nullptr; AllocationsList *reusableAllocationList = nullptr; std::unique_ptr heapHelper; CmdBufferContainer cmdBufferAllocations; GraphicsAllocation *allocationIndirectHeaps[HeapType::NUM_TYPES] = {}; uint64_t instructionHeapBaseAddress = 0u; uint64_t indirectObjectHeapBaseAddress = 0u; uint32_t dirtyHeaps = std::numeric_limits::max(); uint32_t numIddsPerBlock = 64; size_t reservedSshSize = 0; std::unique_ptr commandStream; std::unique_ptr indirectHeaps[HeapType::NUM_TYPES]; ResidencyContainer residencyContainer; std::vector deallocationContainer; bool isFlushTaskUsedForImmediate = false; }; } // namespace NEO compute-runtime-22.14.22890/shared/source/command_container/command_encoder.h000066400000000000000000000521231422164147700270710ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_container/cmdcontainer.h" #include "shared/source/debugger/debugger.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/gmm_helper/gmm_lib.h" #include "shared/source/helpers/definitions/mi_flush_args.h" #include "shared/source/helpers/pipe_control_args.h" #include "shared/source/helpers/register_offsets.h" #include "shared/source/helpers/simd_helper.h" #include "shared/source/helpers/vec.h" #include "shared/source/kernel/dispatch_kernel_encoder_interface.h" #include "shared/source/kernel/kernel_arg_descriptor.h" #include "encode_surface_state_args.h" #include namespace NEO { class BindlessHeapsHelper; class GmmHelper; class IndirectHeap; class Gmm; struct HardwareInfo; struct StateComputeModeProperties; struct EncodeDispatchKernelArgs { uint64_t eventAddress = 0ull; Device *device = nullptr; DispatchKernelEncoderI *dispatchInterface = nullptr; const void *pThreadGroupDimensions = nullptr; PreemptionMode preemptionMode = PreemptionMode::Initial; uint32_t partitionCount = 0u; bool isIndirect = false; bool isPredicate = false; bool isTimestampEvent = false; bool L3FlushEnable = false; bool requiresUncachedMocs = false; bool useGlobalAtomics = false; bool isInternal = false; bool isCooperative = false; }; template struct EncodeDispatchKernel { using WALKER_TYPE = typename GfxFamily::WALKER_TYPE; using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA; using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE; static void encode(CommandContainer &container, EncodeDispatchKernelArgs &args); static void encodeAdditionalWalkerFields(const HardwareInfo &hwInfo, WALKER_TYPE &walkerCmd, KernelExecutionType kernelExecutionType); static void appendAdditionalIDDFields(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const HardwareInfo &hwInfo, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy); static void setGrfInfo(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, uint32_t numGrf, const size_t &sizeCrossThreadData, const size_t &sizePerThreadData); static void *getInterfaceDescriptor(CommandContainer &container, uint32_t &iddOffset); static bool isRuntimeLocalIdsGenerationRequired(uint32_t activeChannels, const size_t *lws, std::array walkOrder, bool requireInputWalkOrder, uint32_t &requiredWalkOrder, uint32_t simd); static bool inlineDataProgrammingRequired(const KernelDescriptor &kernelDesc); static void encodeThreadData(WALKER_TYPE &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, uint32_t simd, uint32_t localIdDimensions, uint32_t threadsPerThreadGroup, uint32_t threadExecutionMask, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, bool isIndirect, uint32_t requiredWorkGroupOrder); static void programBarrierEnable(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo); static void adjustInterfaceDescriptorData(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const HardwareInfo &hwInfo); static void adjustBindingTablePrefetch(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t samplerCount, uint32_t bindingTableEntryCount); static void adjustTimestampPacket(WALKER_TYPE &walkerCmd, const HardwareInfo &hwInfo); static void setupPostSyncMocs(WALKER_TYPE &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment); }; template struct EncodeStates { using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE; using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA; using SAMPLER_STATE = typename GfxFamily::SAMPLER_STATE; using SAMPLER_BORDER_COLOR_STATE = typename GfxFamily::SAMPLER_BORDER_COLOR_STATE; static const uint32_t alignIndirectStatePointer = MemoryConstants::cacheLineSize; static const size_t alignInterfaceDescriptorData = MemoryConstants::cacheLineSize; static uint32_t copySamplerState(IndirectHeap *dsh, uint32_t samplerStateOffset, uint32_t samplerCount, uint32_t borderColorOffset, const void *fnDynamicStateHeap, BindlessHeapsHelper *bindlessHeapHelper, const HardwareInfo &hwInfo); }; template struct EncodeMath { using MI_MATH_ALU_INST_INLINE = typename GfxFamily::MI_MATH_ALU_INST_INLINE; using MI_MATH = typename GfxFamily::MI_MATH; constexpr static size_t streamCommandSize = sizeof(MI_MATH) + sizeof(MI_MATH_ALU_INST_INLINE) * NUM_ALU_INST_FOR_READ_MODIFY_WRITE; static uint32_t *commandReserve(CommandContainer &container); static uint32_t *commandReserve(LinearStream &cmdStream); static void greaterThan(CommandContainer &container, AluRegisters firstOperandRegister, AluRegisters secondOperandRegister, AluRegisters finalResultRegister); static void addition(CommandContainer &container, AluRegisters firstOperandRegister, AluRegisters secondOperandRegister, AluRegisters finalResultRegister); static void addition(LinearStream &cmdStream, AluRegisters firstOperandRegister, AluRegisters secondOperandRegister, AluRegisters finalResultRegister); static void bitwiseAnd(CommandContainer &container, AluRegisters firstOperandRegister, AluRegisters secondOperandRegister, AluRegisters finalResultRegister); }; template struct EncodeMathMMIO { using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; using MI_MATH_ALU_INST_INLINE = typename GfxFamily::MI_MATH_ALU_INST_INLINE; using MI_MATH = typename GfxFamily::MI_MATH; static const size_t size = sizeof(MI_STORE_REGISTER_MEM); static void encodeMulRegVal(CommandContainer &container, uint32_t offset, uint32_t val, uint64_t dstAddress); static void encodeGreaterThanPredicate(CommandContainer &container, uint64_t lhsVal, uint32_t rhsVal); static void encodeBitwiseAndVal(CommandContainer &container, uint32_t regOffset, uint32_t immVal, uint64_t dstAddress); static void encodeAlu(MI_MATH_ALU_INST_INLINE *pAluParam, AluRegisters srcA, AluRegisters srcB, AluRegisters op, AluRegisters dest, AluRegisters result); static void encodeAluSubStoreCarry(MI_MATH_ALU_INST_INLINE *pAluParam, AluRegisters regA, AluRegisters regB, AluRegisters finalResultRegister); static void encodeAluAdd(MI_MATH_ALU_INST_INLINE *pAluParam, AluRegisters firstOperandRegister, AluRegisters secondOperandRegister, AluRegisters finalResultRegister); static void encodeAluAnd(MI_MATH_ALU_INST_INLINE *pAluParam, AluRegisters firstOperandRegister, AluRegisters secondOperandRegister, AluRegisters finalResultRegister); }; template struct EncodeIndirectParams { using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM; using MI_LOAD_REGISTER_MEM = typename GfxFamily::MI_LOAD_REGISTER_MEM; using MI_LOAD_REGISTER_REG = typename GfxFamily::MI_LOAD_REGISTER_REG; using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; using MI_MATH = typename GfxFamily::MI_MATH; using MI_MATH_ALU_INST_INLINE = typename GfxFamily::MI_MATH_ALU_INST_INLINE; static void encode(CommandContainer &container, uint64_t crossThreadDataGpuVa, DispatchKernelEncoderI *dispatchInterface, uint64_t implicitArgsGpuPtr); static void setGroupCountIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset offsets[3], uint64_t crossThreadAddress); static void setWorkDimIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset offset, uint64_t crossThreadAddress, const uint32_t *groupSize); static void setGlobalWorkSizeIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset offsets[3], uint64_t crossThreadAddress, const uint32_t *lws); static size_t getCmdsSizeForSetWorkDimIndirect(const uint32_t *groupSize, bool misalignedPtr); }; template struct EncodeSetMMIO { using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM; using MI_LOAD_REGISTER_MEM = typename GfxFamily::MI_LOAD_REGISTER_MEM; using MI_LOAD_REGISTER_REG = typename GfxFamily::MI_LOAD_REGISTER_REG; static const size_t sizeIMM = sizeof(MI_LOAD_REGISTER_IMM); static const size_t sizeMEM = sizeof(MI_LOAD_REGISTER_MEM); static const size_t sizeREG = sizeof(MI_LOAD_REGISTER_REG); static void encodeIMM(CommandContainer &container, uint32_t offset, uint32_t data, bool remap); static void encodeMEM(CommandContainer &container, uint32_t offset, uint64_t address); static void encodeREG(CommandContainer &container, uint32_t dstOffset, uint32_t srcOffset); static void encodeIMM(LinearStream &cmdStream, uint32_t offset, uint32_t data, bool remap); static void encodeMEM(LinearStream &cmdStream, uint32_t offset, uint64_t address); static void encodeREG(LinearStream &cmdStream, uint32_t dstOffset, uint32_t srcOffset); static bool isRemapApplicable(uint32_t offset); static void remapOffset(MI_LOAD_REGISTER_MEM *pMiLoadReg); static void remapOffset(MI_LOAD_REGISTER_REG *pMiLoadReg); }; template struct EncodeL3State { static void encode(CommandContainer &container, bool enableSLM); }; template struct EncodeMediaInterfaceDescriptorLoad { using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA; static void encode(CommandContainer &container); }; template struct EncodeStateBaseAddress { using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS; static void encode(CommandContainer &container, STATE_BASE_ADDRESS &sbaCmd, bool multiOsContextCapable); static void encode(CommandContainer &container, STATE_BASE_ADDRESS &sbaCmd, uint32_t statelessMocsIndex, bool useGlobalAtomics, bool multiOsContextCapable); static void setIohAddressForDebugger(NEO::Debugger::SbaAddresses &sbaAddress, const STATE_BASE_ADDRESS &sbaCmd); static size_t getRequiredSizeForStateBaseAddress(Device &device, CommandContainer &container); }; template struct EncodeStoreMMIO { using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; static const size_t size = sizeof(MI_STORE_REGISTER_MEM); static void encode(LinearStream &csr, uint32_t offset, uint64_t address); static void remapOffset(MI_STORE_REGISTER_MEM *pStoreRegMem); }; template struct AppendStoreMMIO { using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; static void appendRemap(MI_STORE_REGISTER_MEM *cmd); }; template struct EncodeSurfaceState { using R_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE; using SURFACE_FORMAT = typename R_SURFACE_STATE::SURFACE_FORMAT; using AUXILIARY_SURFACE_MODE = typename R_SURFACE_STATE::AUXILIARY_SURFACE_MODE; using COHERENCY_TYPE = typename R_SURFACE_STATE::COHERENCY_TYPE; static void encodeBuffer(EncodeSurfaceStateArgs &args); static void encodeExtraBufferParams(EncodeSurfaceStateArgs &args); static void encodeImplicitScalingParams(const EncodeSurfaceStateArgs &args); static void encodeExtraCacheSettings(R_SURFACE_STATE *surfaceState, const HardwareInfo &hwInfo); static void appendBufferSurfaceState(EncodeSurfaceStateArgs &args); static constexpr uintptr_t getSurfaceBaseAddressAlignmentMask() { return ~(getSurfaceBaseAddressAlignment() - 1); } static constexpr uintptr_t getSurfaceBaseAddressAlignment() { return 4; } static void getSshAlignedPointer(uintptr_t &ptr, size_t &offset); static bool doBindingTablePrefetch(); static size_t pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, size_t bindingTableCount, const void *srcKernelSsh, size_t srcKernelSshSize, size_t numberOfBindingTableStates, size_t offsetOfBindingTable); static void appendImageCompressionParams(R_SURFACE_STATE *surfaceState, GraphicsAllocation *allocation, GmmHelper *gmmHelper, bool imageFromBuffer, GMM_YUV_PLANE_ENUM plane); static void setCoherencyType(R_SURFACE_STATE *surfaceState, COHERENCY_TYPE coherencyType); static void setBufferAuxParamsForCCS(R_SURFACE_STATE *surfaceState); static void setImageAuxParamsForCCS(R_SURFACE_STATE *surfaceState, Gmm *gmm); static bool isAuxModeEnabled(R_SURFACE_STATE *surfaceState, Gmm *gmm); static void setAuxParamsForMCSCCS(R_SURFACE_STATE *surfaceState); static void setClearColorParams(R_SURFACE_STATE *surfaceState, Gmm *gmm); static void setFlagsForMediaCompression(R_SURFACE_STATE *surfaceState, Gmm *gmm); static void disableCompressionFlags(R_SURFACE_STATE *surfaceState); static void appendParamsForImageFromBuffer(R_SURFACE_STATE *surfaceState); }; template struct EncodeComputeMode { static size_t getCmdSizeForComputeMode(const HardwareInfo &hwInfo, bool hasSharedHandles, bool isRcs); static void programComputeModeCommandWithSynchronization(LinearStream &csr, StateComputeModeProperties &properties, const PipelineSelectArgs &args, bool hasSharedHandles, const HardwareInfo &hwInfo, bool isRcs); static void programComputeModeCommand(LinearStream &csr, StateComputeModeProperties &properties, const HardwareInfo &hwInfo); static void adjustPipelineSelect(CommandContainer &container, const NEO::KernelDescriptor &kernelDescriptor); }; template struct EncodeWA { static void encodeAdditionalPipelineSelect(LinearStream &stream, const PipelineSelectArgs &args, bool is3DPipeline, const HardwareInfo &hwInfo, bool isRcs); static size_t getAdditionalPipelineSelectSize(Device &device); static void addPipeControlPriorToNonPipelinedStateCommand(LinearStream &commandStream, PipeControlArgs args, const HardwareInfo &hwInfo, bool isRcs); static void setAdditionalPipeControlFlagsForNonPipelineStateCommand(PipeControlArgs &args); static void addPipeControlBeforeStateBaseAddress(LinearStream &commandStream, const HardwareInfo &hwInfo, bool isRcs); }; template struct EncodeSempahore { using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT; using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION; static constexpr uint32_t invalidHardwareTag = -2; static void programMiSemaphoreWait(MI_SEMAPHORE_WAIT *cmd, uint64_t compareAddress, uint32_t compareData, COMPARE_OPERATION compareMode, bool registerPollMode); static void addMiSemaphoreWaitCommand(LinearStream &commandStream, uint64_t compareAddress, uint32_t compareData, COMPARE_OPERATION compareMode, bool registerPollMode); static void addMiSemaphoreWaitCommand(LinearStream &commandStream, uint64_t compareAddress, uint32_t compareData, COMPARE_OPERATION compareMode); static size_t getSizeMiSemaphoreWait(); }; template struct EncodeAtomic { using MI_ATOMIC = typename GfxFamily::MI_ATOMIC; using ATOMIC_OPCODES = typename GfxFamily::MI_ATOMIC::ATOMIC_OPCODES; using DATA_SIZE = typename GfxFamily::MI_ATOMIC::DATA_SIZE; static void programMiAtomic(LinearStream &commandStream, uint64_t writeAddress, ATOMIC_OPCODES opcode, DATA_SIZE dataSize, uint32_t returnDataControl, uint32_t csStall, uint32_t operand1dword0, uint32_t operand1dword1); static void programMiAtomic(MI_ATOMIC *atomic, uint64_t writeAddress, ATOMIC_OPCODES opcode, DATA_SIZE dataSize, uint32_t returnDataControl, uint32_t csStall, uint32_t operand1dword0, uint32_t operand1dword1); static void setMiAtomicAddress(MI_ATOMIC &atomic, uint64_t writeAddress); }; template struct EncodeBatchBufferStartOrEnd { using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START; using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END; static void programBatchBufferStart(LinearStream *commandStream, uint64_t address, bool secondLevel); static void programBatchBufferEnd(CommandContainer &container); }; template struct EncodeMiFlushDW { using MI_FLUSH_DW = typename GfxFamily::MI_FLUSH_DW; static void programMiFlushDw(LinearStream &commandStream, uint64_t immediateDataGpuAddress, uint64_t immediateData, MiFlushArgs &args, const HardwareInfo &hwInfo); static void programMiFlushDwWA(LinearStream &commandStream); static void appendMiFlushDw(MI_FLUSH_DW *miFlushDwCmd, const HardwareInfo &hwInfo); static size_t getMiFlushDwCmdSizeForDataWrite(); static size_t getMiFlushDwWaSize(); }; template struct EncodeMemoryPrefetch { static void programMemoryPrefetch(LinearStream &commandStream, const GraphicsAllocation &graphicsAllocation, uint32_t size, size_t offset, const HardwareInfo &hwInfo); static size_t getSizeForMemoryPrefetch(size_t size); }; template struct EncodeMiArbCheck { using MI_ARB_CHECK = typename GfxFamily::MI_ARB_CHECK; static void program(LinearStream &commandStream); static void adjust(MI_ARB_CHECK &miArbCheck); static size_t getCommandSize(); }; template struct EncodeEnableRayTracing { static void programEnableRayTracing(LinearStream &commandStream, GraphicsAllocation &backBuffer); static void append3dStateBtd(void *ptr3dStateBtd); }; template struct EncodeNoop { static void alignToCacheLine(LinearStream &commandStream); static void emitNoop(LinearStream &commandStream, size_t bytesToUpdate); }; template struct EncodeStoreMemory { using MI_STORE_DATA_IMM = typename GfxFamily::MI_STORE_DATA_IMM; static void programStoreDataImm(LinearStream &commandStream, uint64_t gpuAddress, uint32_t dataDword0, uint32_t dataDword1, bool storeQword, bool workloadPartitionOffset); static void programStoreDataImm(MI_STORE_DATA_IMM *cmdBuffer, uint64_t gpuAddress, uint32_t dataDword0, uint32_t dataDword1, bool storeQword, bool workloadPartitionOffset); static size_t getStoreDataImmSize() { return sizeof(MI_STORE_DATA_IMM); } }; } // namespace NEO compute-runtime-22.14.22890/shared/source/command_container/command_encoder.inl000066400000000000000000001254221422164147700274270ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/device/device.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/api_specific_config.h" #include "shared/source/helpers/bindless_heaps_helper.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/local_id_gen.h" #include "shared/source/helpers/preamble.h" #include "shared/source/helpers/register_offsets.h" #include "shared/source/helpers/simd_helper.h" #include "shared/source/helpers/string.h" #include "shared/source/image/image_surface_state.h" #include "shared/source/indirect_heap/indirect_heap.h" #include "shared/source/kernel/dispatch_kernel_encoder_interface.h" #include "shared/source/kernel/implicit_args.h" #include "shared/source/kernel/kernel_descriptor.h" #include "shared/source/os_interface/hw_info_config.h" #include "encode_surface_state.inl" #include namespace NEO { template uint32_t EncodeStates::copySamplerState(IndirectHeap *dsh, uint32_t samplerStateOffset, uint32_t samplerCount, uint32_t borderColorOffset, const void *fnDynamicStateHeap, BindlessHeapsHelper *bindlessHeapHelper, const HardwareInfo &hwInfo) { auto sizeSamplerState = sizeof(SAMPLER_STATE) * samplerCount; auto borderColorSize = samplerStateOffset - borderColorOffset; SAMPLER_STATE *dstSamplerState = nullptr; uint32_t samplerStateOffsetInDsh = 0; dsh->align(EncodeStates::alignIndirectStatePointer); uint32_t borderColorOffsetInDsh = 0; if (!ApiSpecificConfig::getBindlessConfiguration()) { borderColorOffsetInDsh = static_cast(dsh->getUsed()); auto borderColor = dsh->getSpace(borderColorSize); memcpy_s(borderColor, borderColorSize, ptrOffset(fnDynamicStateHeap, borderColorOffset), borderColorSize); dsh->align(INTERFACE_DESCRIPTOR_DATA::SAMPLERSTATEPOINTER_ALIGN_SIZE); samplerStateOffsetInDsh = static_cast(dsh->getUsed()); dstSamplerState = reinterpret_cast(dsh->getSpace(sizeSamplerState)); } else { auto borderColor = reinterpret_cast(ptrOffset(fnDynamicStateHeap, borderColorOffset)); if (borderColor->getBorderColorRed() != 0.0f || borderColor->getBorderColorGreen() != 0.0f || borderColor->getBorderColorBlue() != 0.0f || (borderColor->getBorderColorAlpha() != 0.0f && borderColor->getBorderColorAlpha() != 1.0f)) { UNRECOVERABLE_IF(true); } else if (borderColor->getBorderColorAlpha() == 0.0f) { borderColorOffsetInDsh = bindlessHeapHelper->getDefaultBorderColorOffset(); } else { borderColorOffsetInDsh = bindlessHeapHelper->getAlphaBorderColorOffset(); } dsh->align(INTERFACE_DESCRIPTOR_DATA::SAMPLERSTATEPOINTER_ALIGN_SIZE); auto samplerStateInDsh = bindlessHeapHelper->allocateSSInHeap(sizeSamplerState, nullptr, BindlessHeapsHelper::BindlesHeapType::GLOBAL_DSH); dstSamplerState = reinterpret_cast(samplerStateInDsh.ssPtr); samplerStateOffsetInDsh = static_cast(samplerStateInDsh.surfaceStateOffset); } auto srcSamplerState = reinterpret_cast(ptrOffset(fnDynamicStateHeap, samplerStateOffset)); SAMPLER_STATE state = {}; for (uint32_t i = 0; i < samplerCount; i++) { state = srcSamplerState[i]; state.setIndirectStatePointer(static_cast(borderColorOffsetInDsh)); HwInfoConfig::get(hwInfo.platform.eProductFamily)->adjustSamplerState(&state, hwInfo); dstSamplerState[i] = state; } return samplerStateOffsetInDsh; } // namespace NEO template void EncodeMathMMIO::encodeMulRegVal(CommandContainer &container, uint32_t offset, uint32_t val, uint64_t dstAddress) { int logLws = 0; int i = val; while (val >> logLws) { logLws++; } EncodeSetMMIO::encodeREG(container, CS_GPR_R0, offset); EncodeSetMMIO::encodeIMM(container, CS_GPR_R1, 0, true); i = 0; while (i < logLws) { if (val & (1 << i)) { EncodeMath::addition(container, AluRegisters::R_1, AluRegisters::R_0, AluRegisters::R_2); EncodeSetMMIO::encodeREG(container, CS_GPR_R1, CS_GPR_R2); } EncodeMath::addition(container, AluRegisters::R_0, AluRegisters::R_0, AluRegisters::R_2); EncodeSetMMIO::encodeREG(container, CS_GPR_R0, CS_GPR_R2); i++; } EncodeStoreMMIO::encode(*container.getCommandStream(), CS_GPR_R1, dstAddress); } /* * Compute *firstOperand > secondOperand and store the result in * MI_PREDICATE_RESULT where firstOperand is an device memory address. * * To calculate the "greater than" operation in the device, * (secondOperand - *firstOperand) is used, and if the carry flag register is * set, then (*firstOperand) is greater than secondOperand. */ template void EncodeMathMMIO::encodeGreaterThanPredicate(CommandContainer &container, uint64_t firstOperand, uint32_t secondOperand) { EncodeSetMMIO::encodeMEM(container, CS_GPR_R0, firstOperand); EncodeSetMMIO::encodeIMM(container, CS_GPR_R1, secondOperand, true); /* CS_GPR_R* registers map to AluRegisters::R_* registers */ EncodeMath::greaterThan(container, AluRegisters::R_0, AluRegisters::R_1, AluRegisters::R_2); EncodeSetMMIO::encodeREG(container, CS_PREDICATE_RESULT, CS_GPR_R2); } /* * Compute bitwise AND between a register value from regOffset and immVal * and store it into dstAddress. */ template void EncodeMathMMIO::encodeBitwiseAndVal(CommandContainer &container, uint32_t regOffset, uint32_t immVal, uint64_t dstAddress) { EncodeSetMMIO::encodeREG(container, CS_GPR_R0, regOffset); EncodeSetMMIO::encodeIMM(container, CS_GPR_R1, immVal, true); EncodeMath::bitwiseAnd(container, AluRegisters::R_0, AluRegisters::R_1, AluRegisters::R_2); EncodeStoreMMIO::encode(*container.getCommandStream(), CS_GPR_R2, dstAddress); } /* * encodeAlu() performs operations that leave a state including the result of * an operation such as the carry flag, and the accu flag with subtraction and * addition result. * * Parameter "postOperationStateRegister" is the ALU register with the result * from the operation that the function caller is interested in obtaining. * * Parameter "finalResultRegister" is the final destination register where * data from "postOperationStateRegister" will be copied. */ template void EncodeMathMMIO::encodeAlu(MI_MATH_ALU_INST_INLINE *pAluParam, AluRegisters srcA, AluRegisters srcB, AluRegisters op, AluRegisters finalResultRegister, AluRegisters postOperationStateRegister) { MI_MATH_ALU_INST_INLINE aluParam; aluParam.DW0.Value = 0x0; aluParam.DW0.BitField.ALUOpcode = static_cast(AluRegisters::OPCODE_LOAD); aluParam.DW0.BitField.Operand1 = static_cast(AluRegisters::R_SRCA); aluParam.DW0.BitField.Operand2 = static_cast(srcA); *pAluParam = aluParam; pAluParam++; aluParam.DW0.Value = 0x0; aluParam.DW0.BitField.ALUOpcode = static_cast(AluRegisters::OPCODE_LOAD); aluParam.DW0.BitField.Operand1 = static_cast(AluRegisters::R_SRCB); aluParam.DW0.BitField.Operand2 = static_cast(srcB); *pAluParam = aluParam; pAluParam++; /* Order of operation: Operand1 Operand2 */ aluParam.DW0.Value = 0x0; aluParam.DW0.BitField.ALUOpcode = static_cast(op); aluParam.DW0.BitField.Operand1 = 0; aluParam.DW0.BitField.Operand2 = 0; *pAluParam = aluParam; pAluParam++; aluParam.DW0.Value = 0x0; aluParam.DW0.BitField.ALUOpcode = static_cast(AluRegisters::OPCODE_STORE); aluParam.DW0.BitField.Operand1 = static_cast(finalResultRegister); aluParam.DW0.BitField.Operand2 = static_cast(postOperationStateRegister); *pAluParam = aluParam; pAluParam++; } template uint32_t *EncodeMath::commandReserve(CommandContainer &container) { return commandReserve(*container.getCommandStream()); } template uint32_t *EncodeMath::commandReserve(LinearStream &cmdStream) { size_t size = sizeof(MI_MATH) + sizeof(MI_MATH_ALU_INST_INLINE) * NUM_ALU_INST_FOR_READ_MODIFY_WRITE; auto cmd = reinterpret_cast(cmdStream.getSpace(size)); MI_MATH mathBuffer; mathBuffer.DW0.Value = 0x0; mathBuffer.DW0.BitField.InstructionType = MI_MATH::COMMAND_TYPE_MI_COMMAND; mathBuffer.DW0.BitField.InstructionOpcode = MI_MATH::MI_COMMAND_OPCODE_MI_MATH; mathBuffer.DW0.BitField.DwordLength = NUM_ALU_INST_FOR_READ_MODIFY_WRITE - 1; *reinterpret_cast(cmd) = mathBuffer; cmd++; return cmd; } template void EncodeMathMMIO::encodeAluAdd(MI_MATH_ALU_INST_INLINE *pAluParam, AluRegisters firstOperandRegister, AluRegisters secondOperandRegister, AluRegisters finalResultRegister) { encodeAlu(pAluParam, firstOperandRegister, secondOperandRegister, AluRegisters::OPCODE_ADD, finalResultRegister, AluRegisters::R_ACCU); } template void EncodeMathMMIO::encodeAluSubStoreCarry(MI_MATH_ALU_INST_INLINE *pAluParam, AluRegisters regA, AluRegisters regB, AluRegisters finalResultRegister) { /* regB is subtracted from regA */ encodeAlu(pAluParam, regA, regB, AluRegisters::OPCODE_SUB, finalResultRegister, AluRegisters::R_CF); } template void EncodeMathMMIO::encodeAluAnd(MI_MATH_ALU_INST_INLINE *pAluParam, AluRegisters firstOperandRegister, AluRegisters secondOperandRegister, AluRegisters finalResultRegister) { encodeAlu(pAluParam, firstOperandRegister, secondOperandRegister, AluRegisters::OPCODE_AND, finalResultRegister, AluRegisters::R_ACCU); } /* * greaterThan() tests if firstOperandRegister is greater than * secondOperandRegister. */ template void EncodeMath::greaterThan(CommandContainer &container, AluRegisters firstOperandRegister, AluRegisters secondOperandRegister, AluRegisters finalResultRegister) { uint32_t *cmd = EncodeMath::commandReserve(container); /* firstOperandRegister will be subtracted from secondOperandRegister */ EncodeMathMMIO::encodeAluSubStoreCarry(reinterpret_cast(cmd), secondOperandRegister, firstOperandRegister, finalResultRegister); } template void EncodeMath::addition(CommandContainer &container, AluRegisters firstOperandRegister, AluRegisters secondOperandRegister, AluRegisters finalResultRegister) { uint32_t *cmd = EncodeMath::commandReserve(container); EncodeMathMMIO::encodeAluAdd(reinterpret_cast(cmd), firstOperandRegister, secondOperandRegister, finalResultRegister); } template void EncodeMath::addition(LinearStream &cmdStream, AluRegisters firstOperandRegister, AluRegisters secondOperandRegister, AluRegisters finalResultRegister) { uint32_t *cmd = EncodeMath::commandReserve(cmdStream); EncodeMathMMIO::encodeAluAdd(reinterpret_cast(cmd), firstOperandRegister, secondOperandRegister, finalResultRegister); } template void EncodeMath::bitwiseAnd(CommandContainer &container, AluRegisters firstOperandRegister, AluRegisters secondOperandRegister, AluRegisters finalResultRegister) { uint32_t *cmd = EncodeMath::commandReserve(container); EncodeMathMMIO::encodeAluAnd(reinterpret_cast(cmd), firstOperandRegister, secondOperandRegister, finalResultRegister); } template inline void EncodeSetMMIO::encodeIMM(CommandContainer &container, uint32_t offset, uint32_t data, bool remap) { EncodeSetMMIO::encodeIMM(*container.getCommandStream(), offset, data, remap); } template inline void EncodeSetMMIO::encodeMEM(CommandContainer &container, uint32_t offset, uint64_t address) { EncodeSetMMIO::encodeMEM(*container.getCommandStream(), offset, address); } template inline void EncodeSetMMIO::encodeREG(CommandContainer &container, uint32_t dstOffset, uint32_t srcOffset) { EncodeSetMMIO::encodeREG(*container.getCommandStream(), dstOffset, srcOffset); } template inline void EncodeSetMMIO::encodeIMM(LinearStream &cmdStream, uint32_t offset, uint32_t data, bool remap) { LriHelper::program(&cmdStream, offset, data, remap); } template void EncodeSetMMIO::encodeMEM(LinearStream &cmdStream, uint32_t offset, uint64_t address) { MI_LOAD_REGISTER_MEM cmd = Family::cmdInitLoadRegisterMem; cmd.setRegisterAddress(offset); cmd.setMemoryAddress(address); remapOffset(&cmd); auto buffer = cmdStream.getSpaceForCmd(); *buffer = cmd; } template void EncodeSetMMIO::encodeREG(LinearStream &cmdStream, uint32_t dstOffset, uint32_t srcOffset) { MI_LOAD_REGISTER_REG cmd = Family::cmdInitLoadRegisterReg; cmd.setSourceRegisterAddress(srcOffset); cmd.setDestinationRegisterAddress(dstOffset); remapOffset(&cmd); auto buffer = cmdStream.getSpaceForCmd(); *buffer = cmd; } template void EncodeStoreMMIO::encode(LinearStream &csr, uint32_t offset, uint64_t address) { MI_STORE_REGISTER_MEM cmd = Family::cmdInitStoreRegisterMem; cmd.setRegisterAddress(offset); cmd.setMemoryAddress(address); remapOffset(&cmd); auto buffer = csr.getSpaceForCmd(); *buffer = cmd; } template void EncodeSurfaceState::encodeBuffer(EncodeSurfaceStateArgs &args) { auto surfaceState = reinterpret_cast(args.outMemory); UNRECOVERABLE_IF(!isAligned(args.size)); SURFACE_STATE_BUFFER_LENGTH Length = {0}; Length.Length = static_cast(args.size - 1); surfaceState->setWidth(Length.SurfaceState.Width + 1); surfaceState->setHeight(Length.SurfaceState.Height + 1); surfaceState->setDepth(Length.SurfaceState.Depth + 1); surfaceState->setSurfaceType((args.graphicsAddress != 0) ? R_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER : R_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL); surfaceState->setSurfaceFormat(SURFACE_FORMAT::SURFACE_FORMAT_RAW); surfaceState->setSurfaceVerticalAlignment(R_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4); surfaceState->setSurfaceHorizontalAlignment(R_SURFACE_STATE::SURFACE_HORIZONTAL_ALIGNMENT_HALIGN_DEFAULT); surfaceState->setTileMode(R_SURFACE_STATE::TILE_MODE_LINEAR); surfaceState->setVerticalLineStride(0); surfaceState->setVerticalLineStrideOffset(0); surfaceState->setMemoryObjectControlState(args.mocs); surfaceState->setSurfaceBaseAddress(args.graphicsAddress); surfaceState->setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE); setCoherencyType(surfaceState, args.cpuCoherent ? R_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT : R_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT); auto compressionEnabled = args.allocation ? args.allocation->isCompressionEnabled() : false; if (compressionEnabled && !args.forceNonAuxMode) { // Its expected to not program pitch/qpitch/baseAddress for Aux surface in CCS scenarios setCoherencyType(surfaceState, R_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT); setBufferAuxParamsForCCS(surfaceState); } if (DebugManager.flags.DisableCachingForStatefulBufferAccess.get()) { surfaceState->setMemoryObjectControlState(args.gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED)); } EncodeSurfaceState::encodeExtraBufferParams(args); EncodeSurfaceState::appendBufferSurfaceState(args); } template void EncodeSurfaceState::getSshAlignedPointer(uintptr_t &ptr, size_t &offset) { auto sshAlignmentMask = getSurfaceBaseAddressAlignmentMask(); uintptr_t alignedPtr = ptr & sshAlignmentMask; offset = 0; if (ptr != alignedPtr) { offset = ptrDiff(ptr, alignedPtr); ptr = alignedPtr; } } // Returned binding table pointer is relative to given heap (which is assumed to be the Surface state base addess) // as required by the INTERFACE_DESCRIPTOR_DATA. template size_t EncodeSurfaceState::pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, size_t bindingTableCount, const void *srcKernelSsh, size_t srcKernelSshSize, size_t numberOfBindingTableStates, size_t offsetOfBindingTable) { using BINDING_TABLE_STATE = typename Family::BINDING_TABLE_STATE; using INTERFACE_DESCRIPTOR_DATA = typename Family::INTERFACE_DESCRIPTOR_DATA; using RENDER_SURFACE_STATE = typename Family::RENDER_SURFACE_STATE; if (bindingTableCount == 0) { // according to compiler, kernel does not reference BTIs to stateful surfaces, so there's nothing to patch return 0; } size_t sshSize = srcKernelSshSize; DEBUG_BREAK_IF(srcKernelSsh == nullptr); auto srcSurfaceState = srcKernelSsh; // Allocate space for new ssh data auto dstSurfaceState = dstHeap.getSpace(sshSize); // Compiler sends BTI table that is already populated with surface state pointers relative to local SSH. // We may need to patch these pointers so that they are relative to surface state base address if (dstSurfaceState == dstHeap.getCpuBase()) { // nothing to patch, we're at the start of heap (which is assumed to be the surface state base address) // we need to simply copy the ssh (including BTIs from compiler) memcpy_s(dstSurfaceState, sshSize, srcSurfaceState, sshSize); return offsetOfBindingTable; } // We can copy-over the surface states, but BTIs will need to be patched memcpy_s(dstSurfaceState, sshSize, srcSurfaceState, offsetOfBindingTable); uint32_t surfaceStatesOffset = static_cast(ptrDiff(dstSurfaceState, dstHeap.getCpuBase())); // march over BTIs and offset the pointers based on surface state base address auto *dstBtiTableBase = reinterpret_cast(ptrOffset(dstSurfaceState, offsetOfBindingTable)); DEBUG_BREAK_IF(reinterpret_cast(dstBtiTableBase) % INTERFACE_DESCRIPTOR_DATA::BINDINGTABLEPOINTER_ALIGN_SIZE != 0); auto *srcBtiTableBase = reinterpret_cast(ptrOffset(srcSurfaceState, offsetOfBindingTable)); BINDING_TABLE_STATE bti = Family::cmdInitBindingTableState; for (uint32_t i = 0, e = static_cast(numberOfBindingTableStates); i != e; ++i) { uint32_t localSurfaceStateOffset = srcBtiTableBase[i].getSurfaceStatePointer(); uint32_t offsetedSurfaceStateOffset = localSurfaceStateOffset + surfaceStatesOffset; bti.setSurfaceStatePointer(offsetedSurfaceStateOffset); // patch just the SurfaceStatePointer bits dstBtiTableBase[i] = bti; DEBUG_BREAK_IF(bti.getRawData(0) % sizeof(BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE) != 0); } return ptrDiff(dstBtiTableBase, dstHeap.getCpuBase()); } template inline void EncodeSurfaceState::encodeExtraCacheSettings(R_SURFACE_STATE *surfaceState, const HardwareInfo &hwInfo) {} template void EncodeSurfaceState::setImageAuxParamsForCCS(R_SURFACE_STATE *surfaceState, Gmm *gmm) { using AUXILIARY_SURFACE_MODE = typename Family::RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; // Its expected to not program pitch/qpitch/baseAddress for Aux surface in CCS scenarios surfaceState->setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E); setFlagsForMediaCompression(surfaceState, gmm); setClearColorParams(surfaceState, gmm); setUnifiedAuxBaseAddress(surfaceState, gmm); } template void EncodeSurfaceState::setBufferAuxParamsForCCS(R_SURFACE_STATE *surfaceState) { using AUXILIARY_SURFACE_MODE = typename R_SURFACE_STATE::AUXILIARY_SURFACE_MODE; surfaceState->setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E); } template bool EncodeSurfaceState::isAuxModeEnabled(R_SURFACE_STATE *surfaceState, Gmm *gmm) { using AUXILIARY_SURFACE_MODE = typename R_SURFACE_STATE::AUXILIARY_SURFACE_MODE; return (surfaceState->getAuxiliarySurfaceMode() == AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E); } template void EncodeSurfaceState::appendParamsForImageFromBuffer(R_SURFACE_STATE *surfaceState) { } template void EncodeSurfaceState::encodeImplicitScalingParams(const EncodeSurfaceStateArgs &args) {} template void *EncodeDispatchKernel::getInterfaceDescriptor(CommandContainer &container, uint32_t &iddOffset) { if (container.nextIddInBlock == container.getNumIddPerBlock()) { if (ApiSpecificConfig::getBindlessConfiguration()) { container.getDevice()->getBindlessHeapsHelper()->getHeap(BindlessHeapsHelper::BindlesHeapType::GLOBAL_DSH)->align(EncodeStates::alignInterfaceDescriptorData); container.setIddBlock(container.getDevice()->getBindlessHeapsHelper()->getSpaceInHeap(sizeof(INTERFACE_DESCRIPTOR_DATA) * container.getNumIddPerBlock(), BindlessHeapsHelper::BindlesHeapType::GLOBAL_DSH)); } else { container.getIndirectHeap(HeapType::DYNAMIC_STATE)->align(EncodeStates::alignInterfaceDescriptorData); container.setIddBlock(container.getHeapSpaceAllowGrow(HeapType::DYNAMIC_STATE, sizeof(INTERFACE_DESCRIPTOR_DATA) * container.getNumIddPerBlock())); } container.nextIddInBlock = 0; EncodeMediaInterfaceDescriptorLoad::encode(container); } iddOffset = container.nextIddInBlock; auto interfaceDescriptorData = static_cast(container.getIddBlock()); return &interfaceDescriptorData[container.nextIddInBlock++]; } template bool EncodeDispatchKernel::inlineDataProgrammingRequired(const KernelDescriptor &kernelDesc) { auto checkKernelForInlineData = true; if (DebugManager.flags.EnablePassInlineData.get() != -1) { checkKernelForInlineData = !!DebugManager.flags.EnablePassInlineData.get(); } if (checkKernelForInlineData) { return kernelDesc.kernelAttributes.flags.passInlineData; } return false; } template void EncodeDispatchKernel::adjustTimestampPacket(WALKER_TYPE &walkerCmd, const HardwareInfo &hwInfo) {} template void EncodeIndirectParams::encode(CommandContainer &container, uint64_t crossThreadDataGpuVa, DispatchKernelEncoderI *dispatchInterface, uint64_t implicitArgsGpuPtr) { const auto &kernelDescriptor = dispatchInterface->getKernelDescriptor(); setGroupCountIndirect(container, kernelDescriptor.payloadMappings.dispatchTraits.numWorkGroups, crossThreadDataGpuVa); setGlobalWorkSizeIndirect(container, kernelDescriptor.payloadMappings.dispatchTraits.globalWorkSize, crossThreadDataGpuVa, dispatchInterface->getGroupSize()); UNRECOVERABLE_IF(NEO::isValidOffset(kernelDescriptor.payloadMappings.dispatchTraits.workDim) && (kernelDescriptor.payloadMappings.dispatchTraits.workDim & 0b11) != 0u); setWorkDimIndirect(container, kernelDescriptor.payloadMappings.dispatchTraits.workDim, crossThreadDataGpuVa, dispatchInterface->getGroupSize()); if (implicitArgsGpuPtr) { CrossThreadDataOffset groupCountOffset[] = {offsetof(ImplicitArgs, groupCountX), offsetof(ImplicitArgs, groupCountY), offsetof(ImplicitArgs, groupCountZ)}; CrossThreadDataOffset globalSizeOffset[] = {offsetof(ImplicitArgs, globalSizeX), offsetof(ImplicitArgs, globalSizeY), offsetof(ImplicitArgs, globalSizeZ)}; setGroupCountIndirect(container, groupCountOffset, implicitArgsGpuPtr); setGlobalWorkSizeIndirect(container, globalSizeOffset, implicitArgsGpuPtr, dispatchInterface->getGroupSize()); setWorkDimIndirect(container, offsetof(ImplicitArgs, numWorkDim), implicitArgsGpuPtr, dispatchInterface->getGroupSize()); } } template void EncodeIndirectParams::setGroupCountIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset offsets[3], uint64_t crossThreadAddress) { for (int i = 0; i < 3; ++i) { if (NEO::isUndefinedOffset(offsets[i])) { continue; } EncodeStoreMMIO::encode(*container.getCommandStream(), GPUGPU_DISPATCHDIM[i], ptrOffset(crossThreadAddress, offsets[i])); } } template void EncodeIndirectParams::setWorkDimIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset workDimOffset, uint64_t crossThreadAddress, const uint32_t *groupSize) { if (NEO::isValidOffset(workDimOffset)) { auto dstPtr = ptrOffset(crossThreadAddress, workDimOffset); constexpr uint32_t RESULT_REGISTER = CS_GPR_R0; constexpr AluRegisters RESULT_ALU_REGISTER = AluRegisters::R_0; const uint32_t offset = static_cast((1ull << 8 * (dstPtr & 0b11)) - 1); const uint32_t memoryMask = std::numeric_limits::max() - static_cast((1ull << 8 * ((dstPtr & 0b11) + 1)) - 1) + offset; /* * if ( groupSize[2] > 1 || groupCount[2] > 1 ) { workdim = 3 } * else if ( groupSize[1] + groupCount[1] > 2 ) { workdim = 2 } * else { workdim = 1 } */ if (groupSize[2] > 1) { EncodeSetMMIO::encodeIMM(container, RESULT_REGISTER, 3 << (8 * (dstPtr & 0b11)), true); } else { constexpr uint32_t GROUP_COUNT_2_REGISTER = CS_GPR_R1; constexpr AluRegisters GROUP_COUNT_2_ALU_REGISTER = AluRegisters::R_1; constexpr uint32_t GROUP_SIZE_1_REGISTER = CS_GPR_R0; constexpr AluRegisters GROUP_SIZE_1_ALU_REGISTER = AluRegisters::R_0; constexpr uint32_t GROUP_COUNT_1_REGISTER = CS_GPR_R1; constexpr AluRegisters GROUP_COUNT_1_ALU_REGISTER = AluRegisters::R_1; constexpr AluRegisters SUM_ALU_REGISTER = AluRegisters::R_0; constexpr AluRegisters WORK_DIM_EQ_3_ALU_REGISTER = AluRegisters::R_3; constexpr AluRegisters WORK_DIM_GE_2_ALU_REGISTER = AluRegisters::R_4; constexpr uint32_t CONSTANT_ONE_REGISTER = CS_GPR_R5; constexpr AluRegisters CONSTANT_ONE_ALU_REGISTER = AluRegisters::R_5; constexpr uint32_t CONSTANT_TWO_REGISTER = CS_GPR_R6; constexpr AluRegisters CONSTANT_TWO_ALU_REGISTER = AluRegisters::R_6; constexpr uint32_t BACKUP_REGISTER = CS_GPR_R7; constexpr AluRegisters BACKUP_ALU_REGISTER = AluRegisters::R_7; constexpr uint32_t MEMORY_MASK_REGISTER = CS_GPR_R8; constexpr AluRegisters MEMORY_MASK_ALU_REGISTER = AluRegisters::R_8; constexpr uint32_t OFFSET_REGISTER = CS_GPR_R8; constexpr AluRegisters OFFSET_ALU_REGISTER = AluRegisters::R_8; if (offset) { EncodeSetMMIO::encodeMEM(container, BACKUP_REGISTER, dstPtr); EncodeSetMMIO::encodeIMM(container, MEMORY_MASK_REGISTER, memoryMask, true); EncodeMath::bitwiseAnd(container, MEMORY_MASK_ALU_REGISTER, BACKUP_ALU_REGISTER, BACKUP_ALU_REGISTER); EncodeSetMMIO::encodeIMM(container, OFFSET_REGISTER, offset, true); } EncodeSetMMIO::encodeIMM(container, CONSTANT_ONE_REGISTER, 1, true); EncodeSetMMIO::encodeIMM(container, CONSTANT_TWO_REGISTER, 2, true); EncodeSetMMIO::encodeREG(container, GROUP_COUNT_2_REGISTER, GPUGPU_DISPATCHDIM[2]); EncodeMath::greaterThan(container, GROUP_COUNT_2_ALU_REGISTER, CONSTANT_ONE_ALU_REGISTER, WORK_DIM_EQ_3_ALU_REGISTER); EncodeMath::bitwiseAnd(container, WORK_DIM_EQ_3_ALU_REGISTER, CONSTANT_ONE_ALU_REGISTER, WORK_DIM_EQ_3_ALU_REGISTER); EncodeSetMMIO::encodeIMM(container, GROUP_SIZE_1_REGISTER, groupSize[1], true); EncodeSetMMIO::encodeREG(container, GROUP_COUNT_1_REGISTER, GPUGPU_DISPATCHDIM[1]); EncodeMath::addition(container, GROUP_SIZE_1_ALU_REGISTER, GROUP_COUNT_1_ALU_REGISTER, SUM_ALU_REGISTER); EncodeMath::addition(container, SUM_ALU_REGISTER, WORK_DIM_EQ_3_ALU_REGISTER, SUM_ALU_REGISTER); EncodeMath::greaterThan(container, SUM_ALU_REGISTER, CONSTANT_TWO_ALU_REGISTER, WORK_DIM_GE_2_ALU_REGISTER); EncodeMath::bitwiseAnd(container, WORK_DIM_GE_2_ALU_REGISTER, CONSTANT_ONE_ALU_REGISTER, WORK_DIM_GE_2_ALU_REGISTER); if (offset) { EncodeMath::addition(container, CONSTANT_ONE_ALU_REGISTER, OFFSET_ALU_REGISTER, CONSTANT_ONE_ALU_REGISTER); EncodeMath::addition(container, WORK_DIM_EQ_3_ALU_REGISTER, OFFSET_ALU_REGISTER, WORK_DIM_EQ_3_ALU_REGISTER); EncodeMath::bitwiseAnd(container, WORK_DIM_EQ_3_ALU_REGISTER, CONSTANT_ONE_ALU_REGISTER, WORK_DIM_EQ_3_ALU_REGISTER); EncodeMath::addition(container, WORK_DIM_GE_2_ALU_REGISTER, OFFSET_ALU_REGISTER, WORK_DIM_GE_2_ALU_REGISTER); EncodeMath::bitwiseAnd(container, WORK_DIM_GE_2_ALU_REGISTER, CONSTANT_ONE_ALU_REGISTER, WORK_DIM_GE_2_ALU_REGISTER); } EncodeSetMMIO::encodeREG(container, RESULT_REGISTER, CONSTANT_ONE_REGISTER); EncodeMath::addition(container, RESULT_ALU_REGISTER, WORK_DIM_GE_2_ALU_REGISTER, RESULT_ALU_REGISTER); EncodeMath::addition(container, RESULT_ALU_REGISTER, WORK_DIM_EQ_3_ALU_REGISTER, RESULT_ALU_REGISTER); if (offset) { EncodeMath::addition(container, RESULT_ALU_REGISTER, BACKUP_ALU_REGISTER, RESULT_ALU_REGISTER); } } EncodeStoreMMIO::encode(*container.getCommandStream(), RESULT_REGISTER, dstPtr); } } template void EncodeDispatchKernel::adjustBindingTablePrefetch(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t samplerCount, uint32_t bindingTableEntryCount) { auto enablePrefetch = EncodeSurfaceState::doBindingTablePrefetch(); if (DebugManager.flags.ForceBtpPrefetchMode.get() != -1) { enablePrefetch = static_cast(DebugManager.flags.ForceBtpPrefetchMode.get()); } if (enablePrefetch) { interfaceDescriptor.setSamplerCount(static_cast((samplerCount + 3) / 4)); interfaceDescriptor.setBindingTableEntryCount(std::min(bindingTableEntryCount, 31u)); } else { interfaceDescriptor.setSamplerCount(INTERFACE_DESCRIPTOR_DATA::SAMPLER_COUNT::SAMPLER_COUNT_NO_SAMPLERS_USED); interfaceDescriptor.setBindingTableEntryCount(0u); } } template void EncodeDispatchKernel::adjustInterfaceDescriptorData(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const HardwareInfo &hwInfo) {} template void EncodeIndirectParams::setGlobalWorkSizeIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset offsets[3], uint64_t crossThreadAddress, const uint32_t *lws) { for (int i = 0; i < 3; ++i) { if (NEO::isUndefinedOffset(offsets[i])) { continue; } EncodeMathMMIO::encodeMulRegVal(container, GPUGPU_DISPATCHDIM[i], lws[i], ptrOffset(crossThreadAddress, offsets[i])); } } template inline size_t EncodeIndirectParams::getCmdsSizeForSetWorkDimIndirect(const uint32_t *groupSize, bool misaligedPtr) { constexpr uint32_t aluCmdSize = sizeof(MI_MATH) + sizeof(MI_MATH_ALU_INST_INLINE) * NUM_ALU_INST_FOR_READ_MODIFY_WRITE; auto requiredSize = sizeof(MI_STORE_REGISTER_MEM) + sizeof(MI_LOAD_REGISTER_IMM); UNRECOVERABLE_IF(!groupSize); if (groupSize[2] < 2) { requiredSize += 2 * sizeof(MI_LOAD_REGISTER_IMM) + 3 * sizeof(MI_LOAD_REGISTER_REG) + 8 * aluCmdSize; if (misaligedPtr) { requiredSize += 2 * sizeof(MI_LOAD_REGISTER_IMM) + sizeof(MI_LOAD_REGISTER_MEM) + 7 * aluCmdSize; } } return requiredSize; } template void EncodeSempahore::addMiSemaphoreWaitCommand(LinearStream &commandStream, uint64_t compareAddress, uint32_t compareData, COMPARE_OPERATION compareMode) { addMiSemaphoreWaitCommand(commandStream, compareAddress, compareData, compareMode, false); } template void EncodeSempahore::addMiSemaphoreWaitCommand(LinearStream &commandStream, uint64_t compareAddress, uint32_t compareData, COMPARE_OPERATION compareMode, bool registerPollMode) { auto semaphoreCommand = commandStream.getSpaceForCmd(); programMiSemaphoreWait(semaphoreCommand, compareAddress, compareData, compareMode, registerPollMode); } template inline size_t EncodeSempahore::getSizeMiSemaphoreWait() { return sizeof(MI_SEMAPHORE_WAIT); } template inline void EncodeAtomic::setMiAtomicAddress(MI_ATOMIC &atomic, uint64_t writeAddress) { atomic.setMemoryAddress(static_cast(writeAddress & 0x0000FFFFFFFFULL)); atomic.setMemoryAddressHigh(static_cast(writeAddress >> 32)); } template void EncodeAtomic::programMiAtomic(MI_ATOMIC *atomic, uint64_t writeAddress, ATOMIC_OPCODES opcode, DATA_SIZE dataSize, uint32_t returnDataControl, uint32_t csStall, uint32_t operand1dword0, uint32_t operand1dword1) { MI_ATOMIC cmd = Family::cmdInitAtomic; cmd.setAtomicOpcode(opcode); cmd.setDataSize(dataSize); EncodeAtomic::setMiAtomicAddress(cmd, writeAddress); cmd.setReturnDataControl(returnDataControl); cmd.setCsStall(csStall); if (opcode == ATOMIC_OPCODES::ATOMIC_4B_MOVE || opcode == ATOMIC_OPCODES::ATOMIC_8B_MOVE) { cmd.setDwordLength(MI_ATOMIC::DWORD_LENGTH::DWORD_LENGTH_INLINE_DATA_1); cmd.setInlineData(0x1); cmd.setOperand1DataDword0(operand1dword0); cmd.setOperand1DataDword1(operand1dword1); } *atomic = cmd; } template void EncodeAtomic::programMiAtomic(LinearStream &commandStream, uint64_t writeAddress, ATOMIC_OPCODES opcode, DATA_SIZE dataSize, uint32_t returnDataControl, uint32_t csStall, uint32_t operand1dword0, uint32_t operand1dword1) { auto miAtomic = commandStream.getSpaceForCmd(); EncodeAtomic::programMiAtomic(miAtomic, writeAddress, opcode, dataSize, returnDataControl, csStall, operand1dword0, operand1dword1); } template void EncodeBatchBufferStartOrEnd::programBatchBufferStart(LinearStream *commandStream, uint64_t address, bool secondLevel) { MI_BATCH_BUFFER_START cmd = Family::cmdInitBatchBufferStart; if (secondLevel) { cmd.setSecondLevelBatchBuffer(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH); } cmd.setAddressSpaceIndicator(MI_BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR_PPGTT); cmd.setBatchBufferStartAddress(address); auto buffer = commandStream->getSpaceForCmd(); *buffer = cmd; } template void EncodeBatchBufferStartOrEnd::programBatchBufferEnd(CommandContainer &container) { MI_BATCH_BUFFER_END cmd = Family::cmdInitBatchBufferEnd; auto buffer = container.getCommandStream()->getSpaceForCmd(); *buffer = cmd; } template void EncodeMiFlushDW::programMiFlushDw(LinearStream &commandStream, uint64_t immediateDataGpuAddress, uint64_t immediateData, MiFlushArgs &args, const HardwareInfo &hwInfo) { programMiFlushDwWA(commandStream); auto miFlushDwCmd = commandStream.getSpaceForCmd(); MI_FLUSH_DW miFlush = Family::cmdInitMiFlushDw; if (args.commandWithPostSync) { auto postSyncType = args.timeStampOperation ? MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_TIMESTAMP_REGISTER : MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA_QWORD; miFlush.setPostSyncOperation(postSyncType); miFlush.setDestinationAddress(immediateDataGpuAddress); miFlush.setImmediateData(immediateData); } miFlush.setNotifyEnable(args.notifyEnable); miFlush.setTlbInvalidate(args.tlbFlush); appendMiFlushDw(&miFlush, hwInfo); *miFlushDwCmd = miFlush; } template size_t EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite() { return sizeof(typename Family::MI_FLUSH_DW) + EncodeMiFlushDW::getMiFlushDwWaSize(); } template inline void EncodeMemoryPrefetch::programMemoryPrefetch(LinearStream &commandStream, const GraphicsAllocation &graphicsAllocation, uint32_t size, size_t offset, const HardwareInfo &hwInfo) {} template inline size_t EncodeMemoryPrefetch::getSizeForMemoryPrefetch(size_t size) { return 0u; } template void EncodeMiArbCheck::program(LinearStream &commandStream) { MI_ARB_CHECK cmd = Family::cmdInitArbCheck; EncodeMiArbCheck::adjust(cmd); auto miArbCheckStream = commandStream.getSpaceForCmd(); *miArbCheckStream = cmd; } template inline size_t EncodeMiArbCheck::getCommandSize() { return sizeof(MI_ARB_CHECK); } template inline void EncodeNoop::alignToCacheLine(LinearStream &commandStream) { auto used = commandStream.getUsed(); auto alignment = MemoryConstants::cacheLineSize; auto partialCacheline = used & (alignment - 1); if (partialCacheline) { auto amountToPad = alignment - partialCacheline; auto pCmd = commandStream.getSpace(amountToPad); memset(pCmd, 0, amountToPad); } } template inline void EncodeNoop::emitNoop(LinearStream &commandStream, size_t bytesToUpdate) { if (bytesToUpdate) { auto ptr = commandStream.getSpace(bytesToUpdate); memset(ptr, 0, bytesToUpdate); } } template inline void EncodeStoreMemory::programStoreDataImm(LinearStream &commandStream, uint64_t gpuAddress, uint32_t dataDword0, uint32_t dataDword1, bool storeQword, bool workloadPartitionOffset) { auto miStoreDataImmBuffer = commandStream.getSpaceForCmd(); EncodeStoreMemory::programStoreDataImm(miStoreDataImmBuffer, gpuAddress, dataDword0, dataDword1, storeQword, workloadPartitionOffset); } template void EncodeEnableRayTracing::append3dStateBtd(void *ptr3dStateBtd) {} template inline void EncodeWA::setAdditionalPipeControlFlagsForNonPipelineStateCommand(PipeControlArgs &args) {} } // namespace NEO compute-runtime-22.14.22890/shared/source/command_container/command_encoder_bdw_and_later.inl000066400000000000000000000621771422164147700323030ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/api_specific_config.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/pipe_control_args.h" #include "shared/source/helpers/simd_helper.h" #include "shared/source/helpers/state_base_address.h" #include "shared/source/kernel/dispatch_kernel_encoder_interface.h" #include "shared/source/kernel/implicit_args.h" #include namespace NEO { template void EncodeDispatchKernel::setGrfInfo(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, uint32_t numGrf, const size_t &sizeCrossThreadData, const size_t &sizePerThreadData) { auto grfSize = sizeof(typename Family::GRF); DEBUG_BREAK_IF((sizeCrossThreadData % grfSize) != 0); auto numGrfCrossThreadData = static_cast(sizeCrossThreadData / grfSize); DEBUG_BREAK_IF(numGrfCrossThreadData == 0); pInterfaceDescriptor->setCrossThreadConstantDataReadLength(numGrfCrossThreadData); DEBUG_BREAK_IF((sizePerThreadData % grfSize) != 0); auto numGrfPerThreadData = static_cast(sizePerThreadData / grfSize); // at least 1 GRF of perThreadData for each thread in a thread group when sizeCrossThreadData != 0 numGrfPerThreadData = std::max(numGrfPerThreadData, 1u); pInterfaceDescriptor->setConstantIndirectUrbEntryReadLength(numGrfPerThreadData); } template void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDispatchKernelArgs &args) { using MEDIA_STATE_FLUSH = typename Family::MEDIA_STATE_FLUSH; using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename Family::MEDIA_INTERFACE_DESCRIPTOR_LOAD; using MI_BATCH_BUFFER_END = typename Family::MI_BATCH_BUFFER_END; using STATE_BASE_ADDRESS = typename Family::STATE_BASE_ADDRESS; auto &kernelDescriptor = args.dispatchInterface->getKernelDescriptor(); auto sizeCrossThreadData = args.dispatchInterface->getCrossThreadDataSize(); auto sizePerThreadData = args.dispatchInterface->getPerThreadDataSize(); auto sizePerThreadDataForWholeGroup = args.dispatchInterface->getPerThreadDataSizeForWholeThreadGroup(); auto pImplicitArgs = args.dispatchInterface->getImplicitArgs(); const HardwareInfo &hwInfo = args.device->getHardwareInfo(); LinearStream *listCmdBufferStream = container.getCommandStream(); size_t sshOffset = 0; auto threadDims = static_cast(args.pThreadGroupDimensions); const Vec3 threadStartVec{0, 0, 0}; Vec3 threadDimsVec{0, 0, 0}; if (!args.isIndirect) { threadDimsVec = {threadDims[0], threadDims[1], threadDims[2]}; } WALKER_TYPE cmd = Family::cmdInitGpgpuWalker; auto idd = Family::cmdInitInterfaceDescriptorData; { auto alloc = args.dispatchInterface->getIsaAllocation(); UNRECOVERABLE_IF(nullptr == alloc); auto offset = alloc->getGpuAddressToPatch(); idd.setKernelStartPointer(offset); idd.setKernelStartPointerHigh(0u); } auto numThreadsPerThreadGroup = args.dispatchInterface->getNumThreadsPerThreadGroup(); idd.setNumberOfThreadsInGpgpuThreadGroup(numThreadsPerThreadGroup); EncodeDispatchKernel::programBarrierEnable(idd, kernelDescriptor.kernelAttributes.barrierCount, hwInfo); auto slmSize = static_cast( HwHelperHw::get().computeSlmValues(hwInfo, args.dispatchInterface->getSlmTotalSize())); idd.setSharedLocalMemorySize(slmSize); uint32_t bindingTableStateCount = kernelDescriptor.payloadMappings.bindingTable.numEntries; uint32_t bindingTablePointer = 0u; bool isBindlessKernel = kernelDescriptor.kernelAttributes.bufferAddressingMode == KernelDescriptor::BindlessAndStateless; if (!isBindlessKernel) { container.prepareBindfulSsh(); if (bindingTableStateCount > 0u) { auto ssh = container.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, args.dispatchInterface->getSurfaceStateHeapDataSize(), BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); sshOffset = ssh->getUsed(); bindingTablePointer = static_cast(EncodeSurfaceState::pushBindingTableAndSurfaceStates( *ssh, bindingTableStateCount, args.dispatchInterface->getSurfaceStateHeapData(), args.dispatchInterface->getSurfaceStateHeapDataSize(), bindingTableStateCount, kernelDescriptor.payloadMappings.bindingTable.tableOffset)); } } idd.setBindingTablePointer(bindingTablePointer); PreemptionHelper::programInterfaceDescriptorDataPreemption(&idd, args.preemptionMode); auto heap = ApiSpecificConfig::getBindlessConfiguration() ? args.device->getBindlessHeapsHelper()->getHeap(BindlessHeapsHelper::GLOBAL_DSH) : container.getIndirectHeap(HeapType::DYNAMIC_STATE); UNRECOVERABLE_IF(!heap); uint32_t samplerStateOffset = 0; uint32_t samplerCount = 0; if (kernelDescriptor.payloadMappings.samplerTable.numSamplers > 0) { samplerCount = kernelDescriptor.payloadMappings.samplerTable.numSamplers; samplerStateOffset = EncodeStates::copySamplerState(heap, kernelDescriptor.payloadMappings.samplerTable.tableOffset, kernelDescriptor.payloadMappings.samplerTable.numSamplers, kernelDescriptor.payloadMappings.samplerTable.borderColor, args.dispatchInterface->getDynamicStateHeapData(), args.device->getBindlessHeapsHelper(), hwInfo); } idd.setSamplerStatePointer(samplerStateOffset); if (!isBindlessKernel) { EncodeDispatchKernel::adjustBindingTablePrefetch(idd, samplerCount, bindingTableStateCount); } EncodeDispatchKernel::setGrfInfo(&idd, kernelDescriptor.kernelAttributes.numGrfRequired, sizeCrossThreadData, sizePerThreadData); uint32_t sizeThreadData = sizePerThreadDataForWholeGroup + sizeCrossThreadData; uint32_t sizeForImplicitArgsPatching = NEO::ImplicitArgsHelper::getSizeForImplicitArgsPatching(pImplicitArgs, kernelDescriptor, hwInfo); uint32_t iohRequiredSize = sizeThreadData + sizeForImplicitArgsPatching; uint64_t offsetThreadData = 0u; { auto heapIndirect = container.getIndirectHeap(HeapType::INDIRECT_OBJECT); UNRECOVERABLE_IF(!(heapIndirect)); heapIndirect->align(WALKER_TYPE::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); auto ptr = container.getHeapSpaceAllowGrow(HeapType::INDIRECT_OBJECT, iohRequiredSize); UNRECOVERABLE_IF(!(ptr)); offsetThreadData = heapIndirect->getHeapGpuStartOffset() + static_cast(heapIndirect->getUsed() - sizeThreadData); uint64_t implicitArgsGpuVA = 0u; if (pImplicitArgs) { implicitArgsGpuVA = heapIndirect->getGraphicsAllocation()->getGpuAddress() + static_cast(heapIndirect->getUsed() - iohRequiredSize); auto implicitArgsCrossThreadPtr = ptrOffset(const_cast(reinterpret_cast(args.dispatchInterface->getCrossThreadData())), kernelDescriptor.payloadMappings.implicitArgs.implicitArgsBuffer); *implicitArgsCrossThreadPtr = implicitArgsGpuVA; ptr = NEO::ImplicitArgsHelper::patchImplicitArgs(ptr, *pImplicitArgs, kernelDescriptor, hwInfo, {}); } memcpy_s(ptr, sizeCrossThreadData, args.dispatchInterface->getCrossThreadData(), sizeCrossThreadData); if (args.isIndirect) { auto crossThreadDataGpuVA = heapIndirect->getGraphicsAllocation()->getGpuAddress() + heapIndirect->getUsed() - sizeThreadData; EncodeIndirectParams::encode(container, crossThreadDataGpuVA, args.dispatchInterface, implicitArgsGpuVA); } ptr = ptrOffset(ptr, sizeCrossThreadData); memcpy_s(ptr, sizePerThreadDataForWholeGroup, args.dispatchInterface->getPerThreadData(), sizePerThreadDataForWholeGroup); } auto slmSizeNew = args.dispatchInterface->getSlmTotalSize(); bool dirtyHeaps = container.isAnyHeapDirty(); bool flush = container.slmSize != slmSizeNew || dirtyHeaps || args.requiresUncachedMocs; if (flush) { PipeControlArgs syncArgs; syncArgs.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo); if (dirtyHeaps) { syncArgs.hdcPipelineFlush = true; } MemorySynchronizationCommands::addPipeControl(*container.getCommandStream(), syncArgs); if (dirtyHeaps || args.requiresUncachedMocs) { STATE_BASE_ADDRESS sba; auto gmmHelper = container.getDevice()->getGmmHelper(); uint32_t statelessMocsIndex = args.requiresUncachedMocs ? (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) >> 1) : (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1); EncodeStateBaseAddress::encode(container, sba, statelessMocsIndex, false, false); container.setDirtyStateForAllHeaps(false); args.requiresUncachedMocs = false; } if (container.slmSize != slmSizeNew) { EncodeL3State::encode(container, slmSizeNew != 0u); container.slmSize = slmSizeNew; if (container.nextIddInBlock != container.getNumIddPerBlock()) { EncodeMediaInterfaceDescriptorLoad::encode(container); } } } uint32_t numIDD = 0u; void *ptr = getInterfaceDescriptor(container, numIDD); memcpy_s(ptr, sizeof(idd), &idd, sizeof(idd)); cmd.setIndirectDataStartAddress(static_cast(offsetThreadData)); cmd.setIndirectDataLength(sizeThreadData); cmd.setInterfaceDescriptorOffset(numIDD); EncodeDispatchKernel::encodeThreadData(cmd, nullptr, threadDims, args.dispatchInterface->getGroupSize(), kernelDescriptor.kernelAttributes.simdSize, kernelDescriptor.kernelAttributes.numLocalIdChannels, args.dispatchInterface->getNumThreadsPerThreadGroup(), args.dispatchInterface->getThreadExecutionMask(), true, false, args.isIndirect, args.dispatchInterface->getRequiredWorkgroupOrder()); cmd.setPredicateEnable(args.isPredicate); if (ApiSpecificConfig::getBindlessConfiguration()) { container.getResidencyContainer().push_back(args.device->getBindlessHeapsHelper()->getHeap(NEO::BindlessHeapsHelper::BindlesHeapType::GLOBAL_DSH)->getGraphicsAllocation()); } EncodeDispatchKernel::adjustInterfaceDescriptorData(idd, hwInfo); PreemptionHelper::applyPreemptionWaCmdsBegin(listCmdBufferStream, *args.device); auto buffer = listCmdBufferStream->getSpace(sizeof(cmd)); *(decltype(cmd) *)buffer = cmd; PreemptionHelper::applyPreemptionWaCmdsEnd(listCmdBufferStream, *args.device); { auto mediaStateFlush = listCmdBufferStream->getSpace(sizeof(MEDIA_STATE_FLUSH)); *reinterpret_cast(mediaStateFlush) = Family::cmdInitMediaStateFlush; } args.partitionCount = 1; } template void EncodeMediaInterfaceDescriptorLoad::encode(CommandContainer &container) { using MEDIA_STATE_FLUSH = typename Family::MEDIA_STATE_FLUSH; using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename Family::MEDIA_INTERFACE_DESCRIPTOR_LOAD; auto heapBase = ApiSpecificConfig::getBindlessConfiguration() ? container.getDevice()->getBindlessHeapsHelper()->getHeap(BindlessHeapsHelper::GLOBAL_DSH)->getGraphicsAllocation()->getUnderlyingBuffer() : container.getIndirectHeap(HeapType::DYNAMIC_STATE)->getCpuBase(); auto mediaStateFlush = container.getCommandStream()->getSpaceForCmd(); *mediaStateFlush = Family::cmdInitMediaStateFlush; auto iddOffset = static_cast(ptrDiff(container.getIddBlock(), heapBase)); iddOffset += ApiSpecificConfig::getBindlessConfiguration() ? static_cast(container.getDevice()->getBindlessHeapsHelper()->getHeap(BindlessHeapsHelper::GLOBAL_DSH)->getGraphicsAllocation()->getGpuAddress() - container.getDevice()->getBindlessHeapsHelper()->getHeap(BindlessHeapsHelper::GLOBAL_DSH)->getGraphicsAllocation()->getGpuBaseAddress()) : 0; MEDIA_INTERFACE_DESCRIPTOR_LOAD cmd = Family::cmdInitMediaInterfaceDescriptorLoad; cmd.setInterfaceDescriptorDataStartAddress(iddOffset); cmd.setInterfaceDescriptorTotalLength(sizeof(INTERFACE_DESCRIPTOR_DATA) * container.getNumIddPerBlock()); auto buffer = container.getCommandStream()->getSpace(sizeof(cmd)); *(decltype(cmd) *)buffer = cmd; } template inline bool EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired(uint32_t activeChannels, const size_t *lws, std::array walkOrder, bool requireInputWalkOrder, uint32_t &requiredWalkOrder, uint32_t simd) { requiredWalkOrder = 0u; return true; } template void EncodeDispatchKernel::encodeThreadData(WALKER_TYPE &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, uint32_t simd, uint32_t localIdDimensions, uint32_t threadsPerThreadGroup, uint32_t threadExecutionMask, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, bool isIndirect, uint32_t requiredWorkGroupOrder) { if (isIndirect) { walkerCmd.setIndirectParameterEnable(true); } else { walkerCmd.setThreadGroupIdXDimension(static_cast(numWorkGroups[0])); walkerCmd.setThreadGroupIdYDimension(static_cast(numWorkGroups[1])); walkerCmd.setThreadGroupIdZDimension(static_cast(numWorkGroups[2])); } if (startWorkGroup) { walkerCmd.setThreadGroupIdStartingX(static_cast(startWorkGroup[0])); walkerCmd.setThreadGroupIdStartingY(static_cast(startWorkGroup[1])); walkerCmd.setThreadGroupIdStartingResumeZ(static_cast(startWorkGroup[2])); } walkerCmd.setSimdSize(getSimdConfig(simd)); auto localWorkSize = workGroupSizes[0] * workGroupSizes[1] * workGroupSizes[2]; if (threadsPerThreadGroup == 0) { threadsPerThreadGroup = static_cast(getThreadsPerWG(simd, localWorkSize)); } walkerCmd.setThreadWidthCounterMaximum(threadsPerThreadGroup); uint64_t executionMask = threadExecutionMask; if (executionMask == 0) { auto remainderSimdLanes = localWorkSize & (simd - 1); executionMask = maxNBitValue(remainderSimdLanes); if (!executionMask) executionMask = ~executionMask; } constexpr uint32_t maxDword = std::numeric_limits::max(); walkerCmd.setRightExecutionMask(static_cast(executionMask)); walkerCmd.setBottomExecutionMask(maxDword); } template void EncodeDispatchKernel::programBarrierEnable(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo) { interfaceDescriptor.setBarrierEnable(value); } template inline void EncodeDispatchKernel::encodeAdditionalWalkerFields(const HardwareInfo &hwInfo, WALKER_TYPE &walkerCmd, KernelExecutionType kernelExecutionType) {} template void EncodeDispatchKernel::appendAdditionalIDDFields(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const HardwareInfo &hwInfo, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) {} template inline void EncodeComputeMode::adjustPipelineSelect(CommandContainer &container, const NEO::KernelDescriptor &kernelDescriptor) { } template void EncodeStateBaseAddress::setIohAddressForDebugger(NEO::Debugger::SbaAddresses &sbaAddress, const STATE_BASE_ADDRESS &sbaCmd) { sbaAddress.IndirectObjectBaseAddress = sbaCmd.getIndirectObjectBaseAddress(); } template void EncodeStateBaseAddress::encode(CommandContainer &container, STATE_BASE_ADDRESS &sbaCmd, bool multiOsContextCapable) { auto gmmHelper = container.getDevice()->getRootDeviceEnvironment().getGmmHelper(); uint32_t statelessMocsIndex = (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1); EncodeStateBaseAddress::encode(container, sbaCmd, statelessMocsIndex, false, multiOsContextCapable); } template void EncodeStateBaseAddress::encode(CommandContainer &container, STATE_BASE_ADDRESS &sbaCmd, uint32_t statelessMocsIndex, bool useGlobalAtomics, bool multiOsContextCapable) { auto &device = *container.getDevice(); auto &hwInfo = device.getHardwareInfo(); auto isRcs = device.getDefaultEngine().commandStreamReceiver->isRcs(); if (container.isAnyHeapDirty()) { EncodeWA::encodeAdditionalPipelineSelect(*container.getCommandStream(), {}, true, hwInfo, isRcs); } auto gmmHelper = device.getGmmHelper(); StateBaseAddressHelper::programStateBaseAddress( &sbaCmd, container.isHeapDirty(HeapType::DYNAMIC_STATE) ? container.getIndirectHeap(HeapType::DYNAMIC_STATE) : nullptr, container.isHeapDirty(HeapType::INDIRECT_OBJECT) ? container.getIndirectHeap(HeapType::INDIRECT_OBJECT) : nullptr, container.isHeapDirty(HeapType::SURFACE_STATE) ? container.getIndirectHeap(HeapType::SURFACE_STATE) : nullptr, 0, false, statelessMocsIndex, container.getIndirectObjectHeapBaseAddress(), container.getInstructionHeapBaseAddress(), 0, false, false, gmmHelper, false, MemoryCompressionState::NotApplicable, useGlobalAtomics, 1u); auto pCmd = reinterpret_cast(container.getCommandStream()->getSpace(sizeof(STATE_BASE_ADDRESS))); *pCmd = sbaCmd; EncodeWA::encodeAdditionalPipelineSelect(*container.getCommandStream(), {}, false, hwInfo, isRcs); } template size_t EncodeStateBaseAddress::getRequiredSizeForStateBaseAddress(Device &device, CommandContainer &container) { return sizeof(typename Family::STATE_BASE_ADDRESS) + 2 * EncodeWA::getAdditionalPipelineSelectSize(device); } template void EncodeL3State::encode(CommandContainer &container, bool enableSLM) { auto offset = L3CNTLRegisterOffset::registerOffset; auto data = PreambleHelper::getL3Config(container.getDevice()->getHardwareInfo(), enableSLM); EncodeSetMMIO::encodeIMM(container, offset, data, false); } template void EncodeMiFlushDW::appendMiFlushDw(MI_FLUSH_DW *miFlushDwCmd, const HardwareInfo &hwInfo) {} template void EncodeMiFlushDW::programMiFlushDwWA(LinearStream &commandStream) {} template size_t EncodeMiFlushDW::getMiFlushDwWaSize() { return 0; } template inline void EncodeWA::encodeAdditionalPipelineSelect(LinearStream &stream, const PipelineSelectArgs &args, bool is3DPipeline, const HardwareInfo &hwInfo, bool isRcs) {} template inline size_t EncodeWA::getAdditionalPipelineSelectSize(Device &device) { return 0; } template inline void EncodeWA::addPipeControlPriorToNonPipelinedStateCommand(LinearStream &commandStream, PipeControlArgs args, const HardwareInfo &hwInfo, bool isRcs) { MemorySynchronizationCommands::addPipeControl(commandStream, args); } template inline void EncodeWA::addPipeControlBeforeStateBaseAddress(LinearStream &commandStream, const HardwareInfo &hwInfo, bool isRcs) { PipeControlArgs args; args.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo); args.textureCacheInvalidationEnable = true; NEO::EncodeWA::addPipeControlPriorToNonPipelinedStateCommand(commandStream, args, hwInfo, isRcs); } template inline void EncodeSurfaceState::encodeExtraBufferParams(EncodeSurfaceStateArgs &args) { auto surfaceState = reinterpret_cast(args.outMemory); encodeExtraCacheSettings(surfaceState, *args.gmmHelper->getHardwareInfo()); } template bool EncodeSurfaceState::doBindingTablePrefetch() { return true; } template inline void EncodeSurfaceState::setCoherencyType(R_SURFACE_STATE *surfaceState, COHERENCY_TYPE coherencyType) { surfaceState->setCoherencyType(coherencyType); } template void EncodeSempahore::programMiSemaphoreWait(MI_SEMAPHORE_WAIT *cmd, uint64_t compareAddress, uint32_t compareData, COMPARE_OPERATION compareMode, bool registerPollMode) { MI_SEMAPHORE_WAIT localCmd = Family::cmdInitMiSemaphoreWait; localCmd.setCompareOperation(compareMode); localCmd.setSemaphoreDataDword(compareData); localCmd.setSemaphoreGraphicsAddress(compareAddress); localCmd.setWaitMode(MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE); *cmd = localCmd; } template void EncodeEnableRayTracing::programEnableRayTracing(LinearStream &commandStream, GraphicsAllocation &backBuffer) { } template inline void EncodeStoreMemory::programStoreDataImm(MI_STORE_DATA_IMM *cmdBuffer, uint64_t gpuAddress, uint32_t dataDword0, uint32_t dataDword1, bool storeQword, bool workloadPartitionOffset) { MI_STORE_DATA_IMM storeDataImmediate = Family::cmdInitStoreDataImm; storeDataImmediate.setAddress(gpuAddress); storeDataImmediate.setStoreQword(storeQword); storeDataImmediate.setDataDword0(dataDword0); if (storeQword) { storeDataImmediate.setDataDword1(dataDword1); storeDataImmediate.setDwordLength(MI_STORE_DATA_IMM::DWORD_LENGTH::DWORD_LENGTH_STORE_QWORD); } else { storeDataImmediate.setDwordLength(MI_STORE_DATA_IMM::DWORD_LENGTH::DWORD_LENGTH_STORE_DWORD); } *cmdBuffer = storeDataImmediate; } template inline void EncodeMiArbCheck::adjust(MI_ARB_CHECK &miArbCheck) { } template void EncodeDispatchKernel::setupPostSyncMocs(WALKER_TYPE &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment) {} } // namespace NEO command_encoder_raytracing_xehp_and_later.inl000066400000000000000000000006361422164147700346270ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/command_container/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/command_encoder.h" #include "shared/source/helpers/hw_info.h" #include "hw_cmds.h" namespace NEO { template void EncodeEnableRayTracing::programEnableRayTracing(LinearStream &commandStream, GraphicsAllocation &backBuffer) { } } // namespace NEO compute-runtime-22.14.22890/shared/source/command_container/command_encoder_tgllp_and_later.inl000066400000000000000000000015551422164147700326420ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/pipe_control_args.h" namespace NEO { template <> void EncodeWA::addPipeControlBeforeStateBaseAddress(LinearStream &commandStream, const HardwareInfo &hwInfo, bool isRcs) { PipeControlArgs args; args.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo); args.textureCacheInvalidationEnable = true; args.hdcPipelineFlush = true; NEO::EncodeWA::addPipeControlPriorToNonPipelinedStateCommand(commandStream, args, hwInfo, isRcs); } } // namespace NEO command_encoder_xe_hpg_core_and_later.inl000066400000000000000000000027731422164147700337260ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/command_container/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/command_encoder.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/hw_info.h" #include "hw_cmds.h" namespace NEO { template <> void EncodeSurfaceState::encodeExtraCacheSettings(R_SURFACE_STATE *surfaceState, const HardwareInfo &hwInfo) { surfaceState->setL1CachePolicyL1CacheControl(R_SURFACE_STATE::L1_CACHE_POLICY_WBP); if (DebugManager.flags.OverrideL1CacheControlInSurfaceState.get() != -1) { surfaceState->setL1CachePolicyL1CacheControl(static_cast(DebugManager.flags.OverrideL1CacheControlInSurfaceState.get())); } } template void EncodeEnableRayTracing::programEnableRayTracing(LinearStream &commandStream, GraphicsAllocation &backBuffer) { auto cmd = GfxFamily::cmd3dStateBtd; cmd.getBtdStateBody().setPerDssMemoryBackedBufferSize(static_cast(RayTracingHelper::getMemoryBackedFifoSizeToPatch())); cmd.getBtdStateBody().setMemoryBackedBufferBasePointer(backBuffer.getGpuAddress()); append3dStateBtd(&cmd); *commandStream.getSpaceForCmd() = cmd; } template <> inline void EncodeWA::setAdditionalPipeControlFlagsForNonPipelineStateCommand(PipeControlArgs &args) { args.unTypedDataPortCacheFlush = true; } } // namespace NEO compute-runtime-22.14.22890/shared/source/command_container/command_encoder_xehp_and_later.inl000066400000000000000000001060621422164147700324630ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_container/implicit_scaling.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/command_stream/stream_properties.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/gmm_helper/client_context/gmm_client_context.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/constants.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/hw_walk_order.h" #include "shared/source/helpers/pipe_control_args.h" #include "shared/source/helpers/pipeline_select_helper.h" #include "shared/source/helpers/ray_tracing_helper.h" #include "shared/source/helpers/simd_helper.h" #include "shared/source/helpers/state_base_address.h" #include "shared/source/kernel/dispatch_kernel_encoder_interface.h" #include "shared/source/kernel/implicit_args.h" #include "shared/source/kernel/kernel_descriptor.h" #include "shared/source/os_interface/hw_info_config.h" #include namespace NEO { constexpr size_t TimestampDestinationAddressAlignment = 16; template void EncodeDispatchKernel::setGrfInfo(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, uint32_t numGrf, const size_t &sizeCrossThreadData, const size_t &sizePerThreadData) {} template void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDispatchKernelArgs &args) { using SHARED_LOCAL_MEMORY_SIZE = typename Family::INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE; using STATE_BASE_ADDRESS = typename Family::STATE_BASE_ADDRESS; using MI_BATCH_BUFFER_END = typename Family::MI_BATCH_BUFFER_END; using INLINE_DATA = typename Family::INLINE_DATA; const HardwareInfo &hwInfo = args.device->getHardwareInfo(); const auto &kernelDescriptor = args.dispatchInterface->getKernelDescriptor(); auto sizeCrossThreadData = args.dispatchInterface->getCrossThreadDataSize(); auto sizePerThreadData = args.dispatchInterface->getPerThreadDataSize(); auto sizePerThreadDataForWholeGroup = args.dispatchInterface->getPerThreadDataSizeForWholeThreadGroup(); auto pImplicitArgs = args.dispatchInterface->getImplicitArgs(); LinearStream *listCmdBufferStream = container.getCommandStream(); size_t sshOffset = 0; auto threadDims = static_cast(args.pThreadGroupDimensions); const Vec3 threadStartVec{0, 0, 0}; Vec3 threadDimsVec{0, 0, 0}; if (!args.isIndirect) { threadDimsVec = {threadDims[0], threadDims[1], threadDims[2]}; } bool specialModeRequired = kernelDescriptor.kernelAttributes.flags.usesSpecialPipelineSelectMode; if (PreambleHelper::isSpecialPipelineSelectModeChanged(container.lastPipelineSelectModeRequired, specialModeRequired, hwInfo)) { container.lastPipelineSelectModeRequired = specialModeRequired; EncodeComputeMode::adjustPipelineSelect(container, kernelDescriptor); } WALKER_TYPE walkerCmd = Family::cmdInitGpgpuWalker; auto &idd = walkerCmd.getInterfaceDescriptor(); EncodeDispatchKernel::setGrfInfo(&idd, kernelDescriptor.kernelAttributes.numGrfRequired, sizeCrossThreadData, sizePerThreadData); bool localIdsGenerationByRuntime = args.dispatchInterface->requiresGenerationOfLocalIdsByRuntime(); auto requiredWorkgroupOrder = args.dispatchInterface->getRequiredWorkgroupOrder(); bool inlineDataProgramming = EncodeDispatchKernel::inlineDataProgrammingRequired(kernelDescriptor); { auto alloc = args.dispatchInterface->getIsaAllocation(); UNRECOVERABLE_IF(nullptr == alloc); auto offset = alloc->getGpuAddressToPatch(); if (!localIdsGenerationByRuntime) { offset += kernelDescriptor.entryPoints.skipPerThreadDataLoad; } idd.setKernelStartPointer(offset); } auto threadsPerThreadGroup = args.dispatchInterface->getNumThreadsPerThreadGroup(); idd.setNumberOfThreadsInGpgpuThreadGroup(threadsPerThreadGroup); EncodeDispatchKernel::programBarrierEnable(idd, kernelDescriptor.kernelAttributes.barrierCount, hwInfo); auto slmSize = static_cast( HwHelperHw::get().computeSlmValues(hwInfo, args.dispatchInterface->getSlmTotalSize())); if (DebugManager.flags.OverrideSlmAllocationSize.get() != -1) { slmSize = static_cast(DebugManager.flags.OverrideSlmAllocationSize.get()); } idd.setSharedLocalMemorySize(slmSize); auto bindingTableStateCount = kernelDescriptor.payloadMappings.bindingTable.numEntries; uint32_t bindingTablePointer = 0u; if (kernelDescriptor.kernelAttributes.bufferAddressingMode == KernelDescriptor::BindfulAndStateless) { container.prepareBindfulSsh(); if (bindingTableStateCount > 0u) { auto ssh = container.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, args.dispatchInterface->getSurfaceStateHeapDataSize(), BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); sshOffset = ssh->getUsed(); bindingTablePointer = static_cast(EncodeSurfaceState::pushBindingTableAndSurfaceStates( *ssh, bindingTableStateCount, args.dispatchInterface->getSurfaceStateHeapData(), args.dispatchInterface->getSurfaceStateHeapDataSize(), bindingTableStateCount, kernelDescriptor.payloadMappings.bindingTable.tableOffset)); } } idd.setBindingTablePointer(bindingTablePointer); PreemptionHelper::programInterfaceDescriptorDataPreemption(&idd, args.preemptionMode); if constexpr (Family::supportsSampler) { auto heap = ApiSpecificConfig::getBindlessConfiguration() ? args.device->getBindlessHeapsHelper()->getHeap(BindlessHeapsHelper::GLOBAL_DSH) : container.getIndirectHeap(HeapType::DYNAMIC_STATE); UNRECOVERABLE_IF(!heap); uint32_t samplerStateOffset = 0; uint32_t samplerCount = 0; if (kernelDescriptor.payloadMappings.samplerTable.numSamplers > 0) { samplerCount = kernelDescriptor.payloadMappings.samplerTable.numSamplers; samplerStateOffset = EncodeStates::copySamplerState( heap, kernelDescriptor.payloadMappings.samplerTable.tableOffset, kernelDescriptor.payloadMappings.samplerTable.numSamplers, kernelDescriptor.payloadMappings.samplerTable.borderColor, args.dispatchInterface->getDynamicStateHeapData(), args.device->getBindlessHeapsHelper(), hwInfo); if (ApiSpecificConfig::getBindlessConfiguration()) { container.getResidencyContainer().push_back(args.device->getBindlessHeapsHelper()->getHeap(NEO::BindlessHeapsHelper::BindlesHeapType::GLOBAL_DSH)->getGraphicsAllocation()); } } idd.setSamplerStatePointer(samplerStateOffset); EncodeDispatchKernel::adjustBindingTablePrefetch(idd, samplerCount, bindingTableStateCount); } else { EncodeDispatchKernel::adjustBindingTablePrefetch(idd, 0u, bindingTableStateCount); } uint64_t offsetThreadData = 0u; const uint32_t inlineDataSize = sizeof(INLINE_DATA); auto crossThreadData = args.dispatchInterface->getCrossThreadData(); uint32_t inlineDataProgrammingOffset = 0u; if (inlineDataProgramming) { inlineDataProgrammingOffset = std::min(inlineDataSize, sizeCrossThreadData); auto dest = reinterpret_cast(walkerCmd.getInlineDataPointer()); memcpy_s(dest, inlineDataProgrammingOffset, crossThreadData, inlineDataProgrammingOffset); sizeCrossThreadData -= inlineDataProgrammingOffset; crossThreadData = ptrOffset(crossThreadData, inlineDataProgrammingOffset); inlineDataProgramming = inlineDataProgrammingOffset != 0; } uint32_t sizeThreadData = sizePerThreadDataForWholeGroup + sizeCrossThreadData; uint32_t sizeForImplicitArgsPatching = NEO::ImplicitArgsHelper::getSizeForImplicitArgsPatching(pImplicitArgs, kernelDescriptor, hwInfo); uint32_t iohRequiredSize = sizeThreadData + sizeForImplicitArgsPatching; { auto heap = container.getIndirectHeap(HeapType::INDIRECT_OBJECT); UNRECOVERABLE_IF(!heap); heap->align(WALKER_TYPE::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); auto ptr = container.getHeapSpaceAllowGrow(HeapType::INDIRECT_OBJECT, iohRequiredSize); UNRECOVERABLE_IF(!ptr); offsetThreadData = (is64bit ? heap->getHeapGpuStartOffset() : heap->getHeapGpuBase()) + static_cast(heap->getUsed() - sizeThreadData); if (pImplicitArgs) { offsetThreadData -= sizeof(ImplicitArgs); pImplicitArgs->localIdTablePtr = heap->getGraphicsAllocation()->getGpuAddress() + heap->getUsed() - iohRequiredSize; ptr = NEO::ImplicitArgsHelper::patchImplicitArgs(ptr, *pImplicitArgs, kernelDescriptor, hwInfo, std::make_pair(localIdsGenerationByRuntime, requiredWorkgroupOrder)); } if (sizeCrossThreadData > 0) { memcpy_s(ptr, sizeCrossThreadData, crossThreadData, sizeCrossThreadData); } if (args.isIndirect) { auto gpuPtr = heap->getGraphicsAllocation()->getGpuAddress() + static_cast(heap->getUsed() - sizeThreadData - inlineDataProgrammingOffset); uint64_t implicitArgsGpuPtr = 0u; if (pImplicitArgs) { implicitArgsGpuPtr = gpuPtr + inlineDataProgrammingOffset - sizeof(ImplicitArgs); } EncodeIndirectParams::encode(container, gpuPtr, args.dispatchInterface, implicitArgsGpuPtr); } auto perThreadDataPtr = args.dispatchInterface->getPerThreadData(); if (perThreadDataPtr != nullptr) { ptr = ptrOffset(ptr, sizeCrossThreadData); memcpy_s(ptr, sizePerThreadDataForWholeGroup, perThreadDataPtr, sizePerThreadDataForWholeGroup); } } bool requiresGlobalAtomicsUpdate = false; if (args.partitionCount > 1) { requiresGlobalAtomicsUpdate = container.lastSentUseGlobalAtomics != args.useGlobalAtomics; container.lastSentUseGlobalAtomics = args.useGlobalAtomics; } if (container.isAnyHeapDirty() || args.requiresUncachedMocs || requiresGlobalAtomicsUpdate) { PipeControlArgs syncArgs; syncArgs.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo); MemorySynchronizationCommands::addPipeControl(*container.getCommandStream(), syncArgs); STATE_BASE_ADDRESS sbaCmd; auto gmmHelper = container.getDevice()->getGmmHelper(); uint32_t statelessMocsIndex = args.requiresUncachedMocs ? (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) >> 1) : (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1); EncodeStateBaseAddress::encode(container, sbaCmd, statelessMocsIndex, args.useGlobalAtomics, args.partitionCount > 1); container.setDirtyStateForAllHeaps(false); args.requiresUncachedMocs = false; } walkerCmd.setIndirectDataStartAddress(static_cast(offsetThreadData)); walkerCmd.setIndirectDataLength(sizeThreadData); EncodeDispatchKernel::encodeThreadData(walkerCmd, nullptr, threadDims, args.dispatchInterface->getGroupSize(), kernelDescriptor.kernelAttributes.simdSize, kernelDescriptor.kernelAttributes.numLocalIdChannels, args.dispatchInterface->getNumThreadsPerThreadGroup(), args.dispatchInterface->getThreadExecutionMask(), localIdsGenerationByRuntime, inlineDataProgramming, args.isIndirect, requiredWorkgroupOrder); using POSTSYNC_DATA = typename Family::POSTSYNC_DATA; auto &postSync = walkerCmd.getPostSync(); if (args.eventAddress != 0) { postSync.setDataportPipelineFlush(true); if (args.isTimestampEvent) { postSync.setOperation(POSTSYNC_DATA::OPERATION_WRITE_TIMESTAMP); } else { uint32_t STATE_SIGNALED = 0u; postSync.setOperation(POSTSYNC_DATA::OPERATION_WRITE_IMMEDIATE_DATA); postSync.setImmediateData(STATE_SIGNALED); } UNRECOVERABLE_IF(!(isAligned(args.eventAddress))); postSync.setDestinationAddress(args.eventAddress); EncodeDispatchKernel::setupPostSyncMocs(walkerCmd, args.device->getRootDeviceEnvironment()); EncodeDispatchKernel::adjustTimestampPacket(walkerCmd, hwInfo); } walkerCmd.setPredicateEnable(args.isPredicate); EncodeDispatchKernel::adjustInterfaceDescriptorData(idd, hwInfo); EncodeDispatchKernel::appendAdditionalIDDFields(&idd, hwInfo, threadsPerThreadGroup, args.dispatchInterface->getSlmTotalSize(), args.dispatchInterface->getSlmPolicy()); EncodeDispatchKernel::encodeAdditionalWalkerFields(hwInfo, walkerCmd, args.isCooperative ? KernelExecutionType::Concurrent : KernelExecutionType::Default); PreemptionHelper::applyPreemptionWaCmdsBegin(listCmdBufferStream, *args.device); if ((args.partitionCount > 1 && !args.isCooperative) && !args.isInternal) { const uint64_t workPartitionAllocationGpuVa = args.device->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocationGpuAddress(); if (args.eventAddress != 0) { postSync.setOperation(POSTSYNC_DATA::OPERATION_WRITE_TIMESTAMP); } ImplicitScalingDispatch::dispatchCommands(*listCmdBufferStream, walkerCmd, args.device->getDeviceBitfield(), args.partitionCount, true, true, false, workPartitionAllocationGpuVa, hwInfo); } else { args.partitionCount = 1; auto buffer = listCmdBufferStream->getSpace(sizeof(walkerCmd)); *(decltype(walkerCmd) *)buffer = walkerCmd; } PreemptionHelper::applyPreemptionWaCmdsEnd(listCmdBufferStream, *args.device); } template inline void EncodeDispatchKernel::setupPostSyncMocs(WALKER_TYPE &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment) { auto &postSyncData = walkerCmd.getPostSync(); auto gmmHelper = rootDeviceEnvironment.getGmmHelper(); const auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo(); if (MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo)) { postSyncData.setMocs(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED)); } else { postSyncData.setMocs(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER)); } if (DebugManager.flags.OverridePostSyncMocs.get() != -1) { postSyncData.setMocs(DebugManager.flags.OverridePostSyncMocs.get()); } } template inline void EncodeDispatchKernel::encodeAdditionalWalkerFields(const HardwareInfo &hwInfo, WALKER_TYPE &walkerCmd, KernelExecutionType kernelExecutionType) { } template bool EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired(uint32_t activeChannels, const size_t *lws, std::array walkOrder, bool requireInputWalkOrder, uint32_t &requiredWalkOrder, uint32_t simd) { if (simd == 1) { return true; } bool hwGenerationOfLocalIdsEnabled = true; if (DebugManager.flags.EnableHwGenerationLocalIds.get() != -1) { hwGenerationOfLocalIdsEnabled = !!DebugManager.flags.EnableHwGenerationLocalIds.get(); } if (hwGenerationOfLocalIdsEnabled) { if (activeChannels == 0) { return false; } size_t totalLwsSize = 1u; for (auto dimension = 0u; dimension < activeChannels; dimension++) { totalLwsSize *= lws[dimension]; } if (totalLwsSize > 1024u) { return true; } //check if we need to follow kernel requirements if (requireInputWalkOrder) { for (uint32_t dimension = 0; dimension < activeChannels - 1; dimension++) { if (!Math::isPow2(lws[walkOrder[dimension]])) { return true; } } auto index = 0u; while (index < HwWalkOrderHelper::walkOrderPossibilties) { if (walkOrder[0] == HwWalkOrderHelper::compatibleDimensionOrders[index][0] && walkOrder[1] == HwWalkOrderHelper::compatibleDimensionOrders[index][1]) { break; }; index++; } DEBUG_BREAK_IF(index >= HwWalkOrderHelper::walkOrderPossibilties); requiredWalkOrder = index; return false; } //kernel doesn't specify any walk order requirements, check if we have any compatible for (uint32_t walkOrder = 0; walkOrder < HwWalkOrderHelper::walkOrderPossibilties; walkOrder++) { bool allDimensionsCompatible = true; for (uint32_t dimension = 0; dimension < activeChannels - 1; dimension++) { if (!Math::isPow2(lws[HwWalkOrderHelper::compatibleDimensionOrders[walkOrder][dimension]])) { allDimensionsCompatible = false; break; } } if (allDimensionsCompatible) { requiredWalkOrder = walkOrder; return false; } } } return true; } template void EncodeDispatchKernel::encodeThreadData(WALKER_TYPE &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, uint32_t simd, uint32_t localIdDimensions, uint32_t threadsPerThreadGroup, uint32_t threadExecutionMask, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, bool isIndirect, uint32_t requiredWorkGroupOrder) { if (isIndirect) { walkerCmd.setIndirectParameterEnable(true); } else { walkerCmd.setThreadGroupIdXDimension(static_cast(numWorkGroups[0])); walkerCmd.setThreadGroupIdYDimension(static_cast(numWorkGroups[1])); walkerCmd.setThreadGroupIdZDimension(static_cast(numWorkGroups[2])); } if (startWorkGroup) { walkerCmd.setThreadGroupIdStartingX(static_cast(startWorkGroup[0])); walkerCmd.setThreadGroupIdStartingY(static_cast(startWorkGroup[1])); walkerCmd.setThreadGroupIdStartingZ(static_cast(startWorkGroup[2])); } uint64_t executionMask = threadExecutionMask; if (executionMask == 0) { auto workGroupSize = workGroupSizes[0] * workGroupSizes[1] * workGroupSizes[2]; auto remainderSimdLanes = workGroupSize & (simd - 1); executionMask = maxNBitValue(remainderSimdLanes); if (!executionMask) { executionMask = maxNBitValue((simd == 1) ? 32 : simd); } } walkerCmd.setExecutionMask(static_cast(executionMask)); walkerCmd.setSimdSize(getSimdConfig(simd)); walkerCmd.setMessageSimd(walkerCmd.getSimdSize()); if (DebugManager.flags.ForceSimdMessageSizeInWalker.get() != -1) { walkerCmd.setMessageSimd(DebugManager.flags.ForceSimdMessageSizeInWalker.get()); } //1) cross-thread inline data will be put into R1, but if kernel uses local ids, then cross-thread should be put further back //so whenever local ids are driver or hw generated, reserve space by setting right values for emitLocalIds //2) Auto-generation of local ids should be possible, when in fact local ids are used if (!localIdsGenerationByRuntime && localIdDimensions > 0) { UNRECOVERABLE_IF(localIdDimensions != 3); uint32_t emitLocalIdsForDim = (1 << 0) | (1 << 1) | (1 << 2); walkerCmd.setEmitLocalId(emitLocalIdsForDim); walkerCmd.setLocalXMaximum(static_cast(workGroupSizes[0] - 1)); walkerCmd.setLocalYMaximum(static_cast(workGroupSizes[1] - 1)); walkerCmd.setLocalZMaximum(static_cast(workGroupSizes[2] - 1)); walkerCmd.setGenerateLocalId(1); walkerCmd.setWalkOrder(requiredWorkGroupOrder); } if (inlineDataProgrammingRequired == true) { walkerCmd.setEmitInlineParameter(1); } } template void EncodeStateBaseAddress::setIohAddressForDebugger(NEO::Debugger::SbaAddresses &sbaAddress, const STATE_BASE_ADDRESS &sbaCmd) { } template void EncodeStateBaseAddress::encode(CommandContainer &container, STATE_BASE_ADDRESS &sbaCmd, bool multiOsContextCapable) { auto gmmHelper = container.getDevice()->getRootDeviceEnvironment().getGmmHelper(); uint32_t statelessMocsIndex = (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1); EncodeStateBaseAddress::encode(container, sbaCmd, statelessMocsIndex, false, multiOsContextCapable); } template void EncodeStateBaseAddress::encode(CommandContainer &container, STATE_BASE_ADDRESS &sbaCmd, uint32_t statelessMocsIndex, bool useGlobalAtomics, bool multiOsContextCapable) { auto gmmHelper = container.getDevice()->getRootDeviceEnvironment().getGmmHelper(); StateBaseAddressHelper::programStateBaseAddress( &sbaCmd, container.isHeapDirty(HeapType::DYNAMIC_STATE) ? container.getIndirectHeap(HeapType::DYNAMIC_STATE) : nullptr, container.isHeapDirty(HeapType::INDIRECT_OBJECT) ? container.getIndirectHeap(HeapType::INDIRECT_OBJECT) : nullptr, container.isHeapDirty(HeapType::SURFACE_STATE) ? container.getIndirectHeap(HeapType::SURFACE_STATE) : nullptr, 0, true, statelessMocsIndex, container.getIndirectObjectHeapBaseAddress(), container.getInstructionHeapBaseAddress(), 0, true, false, gmmHelper, multiOsContextCapable, MemoryCompressionState::NotApplicable, useGlobalAtomics, 1u); auto pCmd = reinterpret_cast(container.getCommandStream()->getSpace(sizeof(STATE_BASE_ADDRESS))); *pCmd = sbaCmd; auto &hwInfo = container.getDevice()->getHardwareInfo(); auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily); if (hwInfoConfig.isAdditionalStateBaseAddressWARequired(hwInfo)) { pCmd = reinterpret_cast(container.getCommandStream()->getSpace(sizeof(STATE_BASE_ADDRESS))); *pCmd = sbaCmd; } if (container.isHeapDirty(HeapType::SURFACE_STATE)) { auto heap = container.getIndirectHeap(HeapType::SURFACE_STATE); auto cmd = Family::cmdInitStateBindingTablePoolAlloc; cmd.setBindingTablePoolBaseAddress(heap->getHeapGpuBase()); cmd.setBindingTablePoolBufferSize(heap->getHeapSizeInPages()); cmd.setSurfaceObjectControlStateIndexToMocsTables(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_STATE_HEAP_BUFFER)); auto buffer = container.getCommandStream()->getSpace(sizeof(cmd)); *(typename Family::_3DSTATE_BINDING_TABLE_POOL_ALLOC *)buffer = cmd; } } template size_t EncodeStateBaseAddress::getRequiredSizeForStateBaseAddress(Device &device, CommandContainer &container) { auto &hwInfo = device.getHardwareInfo(); auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily); size_t size = sizeof(typename Family::STATE_BASE_ADDRESS); if (hwInfoConfig.isAdditionalStateBaseAddressWARequired(hwInfo)) { size += sizeof(typename Family::STATE_BASE_ADDRESS); } if (container.isHeapDirty(HeapType::SURFACE_STATE)) { size += sizeof(typename Family::_3DSTATE_BINDING_TABLE_POOL_ALLOC); } return size; } template void EncodeComputeMode::programComputeModeCommand(LinearStream &csr, StateComputeModeProperties &properties, const HardwareInfo &hwInfo) { using STATE_COMPUTE_MODE = typename Family::STATE_COMPUTE_MODE; using FORCE_NON_COHERENT = typename STATE_COMPUTE_MODE::FORCE_NON_COHERENT; STATE_COMPUTE_MODE stateComputeMode = Family::cmdInitStateComputeMode; auto maskBits = stateComputeMode.getMaskBits(); FORCE_NON_COHERENT coherencyValue = (properties.isCoherencyRequired.value == 1) ? FORCE_NON_COHERENT::FORCE_NON_COHERENT_FORCE_DISABLED : FORCE_NON_COHERENT::FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT; stateComputeMode.setForceNonCoherent(coherencyValue); maskBits |= Family::stateComputeModeForceNonCoherentMask; stateComputeMode.setLargeGrfMode(properties.largeGrfMode.value == 1); maskBits |= Family::stateComputeModeLargeGrfModeMask; if (DebugManager.flags.ForceMultiGpuAtomics.get() != -1) { stateComputeMode.setForceDisableSupportForMultiGpuAtomics(!!DebugManager.flags.ForceMultiGpuAtomics.get()); maskBits |= Family::stateComputeModeForceDisableSupportMultiGpuAtomics; } if (DebugManager.flags.ForceMultiGpuPartialWrites.get() != -1) { stateComputeMode.setForceDisableSupportForMultiGpuPartialWrites(!!DebugManager.flags.ForceMultiGpuPartialWrites.get()); maskBits |= Family::stateComputeModeForceDisableSupportMultiGpuPartialWrites; } stateComputeMode.setMaskBits(maskBits); auto buffer = csr.getSpaceForCmd(); *buffer = stateComputeMode; } template void EncodeComputeMode::adjustPipelineSelect(CommandContainer &container, const NEO::KernelDescriptor &kernelDescriptor) { using PIPELINE_SELECT = typename Family::PIPELINE_SELECT; auto pipelineSelectCmd = Family::cmdInitPipelineSelect; auto isSpecialModeSelected = kernelDescriptor.kernelAttributes.flags.usesSpecialPipelineSelectMode; PreambleHelper::appendProgramPipelineSelect(&pipelineSelectCmd, isSpecialModeSelected, container.getDevice()->getHardwareInfo()); pipelineSelectCmd.setPipelineSelection(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU); auto buffer = container.getCommandStream()->getSpace(sizeof(pipelineSelectCmd)); *(decltype(pipelineSelectCmd) *)buffer = pipelineSelectCmd; } template inline void EncodeMediaInterfaceDescriptorLoad::encode(CommandContainer &container) { } template void EncodeMiFlushDW::appendMiFlushDw(MI_FLUSH_DW *miFlushDwCmd, const HardwareInfo &hwInfo) { miFlushDwCmd->setFlushCcs(1); miFlushDwCmd->setFlushLlc(1); } template void EncodeMiFlushDW::programMiFlushDwWA(LinearStream &commandStream) { auto miFlushDwCmd = commandStream.getSpaceForCmd(); *miFlushDwCmd = Family::cmdInitMiFlushDw; } template size_t EncodeMiFlushDW::getMiFlushDwWaSize() { return sizeof(typename Family::MI_FLUSH_DW); } template bool EncodeSurfaceState::doBindingTablePrefetch() { return false; } template void EncodeSurfaceState::encodeExtraBufferParams(EncodeSurfaceStateArgs &args) { auto surfaceState = reinterpret_cast(args.outMemory); Gmm *gmm = args.allocation ? args.allocation->getDefaultGmm() : nullptr; uint32_t compressionFormat = 0; bool setConstCachePolicy = false; if (args.allocation && args.allocation->getAllocationType() == AllocationType::CONSTANT_SURFACE) { setConstCachePolicy = true; } if (surfaceState->getMemoryObjectControlState() == args.gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) && DebugManager.flags.ForceL1Caching.get() != 0) { setConstCachePolicy = true; } if (setConstCachePolicy == true) { surfaceState->setMemoryObjectControlState(args.gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST)); } encodeExtraCacheSettings(surfaceState, *args.gmmHelper->getHardwareInfo()); encodeImplicitScalingParams(args); if (EncodeSurfaceState::isAuxModeEnabled(surfaceState, gmm)) { auto resourceFormat = gmm->gmmResourceInfo->getResourceFormat(); compressionFormat = args.gmmHelper->getClientContext()->getSurfaceStateCompressionFormat(resourceFormat); if (DebugManager.flags.ForceBufferCompressionFormat.get() != -1) { compressionFormat = DebugManager.flags.ForceBufferCompressionFormat.get(); } } if (DebugManager.flags.EnableStatelessCompressionWithUnifiedMemory.get()) { if (args.allocation && !MemoryPool::isSystemMemoryPool(args.allocation->getMemoryPool())) { setCoherencyType(surfaceState, R_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT); setBufferAuxParamsForCCS(surfaceState); compressionFormat = DebugManager.flags.FormatForStatelessCompressionWithUnifiedMemory.get(); } } surfaceState->setCompressionFormat(compressionFormat); } template inline void EncodeSurfaceState::setCoherencyType(R_SURFACE_STATE *surfaceState, COHERENCY_TYPE coherencyType) { surfaceState->setCoherencyType(R_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT); } template void EncodeSempahore::programMiSemaphoreWait(MI_SEMAPHORE_WAIT *cmd, uint64_t compareAddress, uint32_t compareData, COMPARE_OPERATION compareMode, bool registerPollMode) { MI_SEMAPHORE_WAIT localCmd = Family::cmdInitMiSemaphoreWait; localCmd.setCompareOperation(compareMode); localCmd.setSemaphoreDataDword(compareData); localCmd.setSemaphoreGraphicsAddress(compareAddress); localCmd.setWaitMode(MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE); localCmd.setRegisterPollMode(registerPollMode ? MI_SEMAPHORE_WAIT::REGISTER_POLL_MODE::REGISTER_POLL_MODE_REGISTER_POLL : MI_SEMAPHORE_WAIT::REGISTER_POLL_MODE::REGISTER_POLL_MODE_MEMORY_POLL); *cmd = localCmd; } template inline void EncodeWA::encodeAdditionalPipelineSelect(LinearStream &stream, const PipelineSelectArgs &args, bool is3DPipeline, const HardwareInfo &hwInfo, bool isRcs) {} template inline size_t EncodeWA::getAdditionalPipelineSelectSize(Device &device) { return 0u; } template inline void EncodeWA::addPipeControlPriorToNonPipelinedStateCommand(LinearStream &commandStream, PipeControlArgs args, const HardwareInfo &hwInfo, bool isRcs) { auto &hwInfoConfig = (*HwInfoConfig::get(hwInfo.platform.eProductFamily)); const auto &[isBasicWARequired, isExtendedWARequired] = hwInfoConfig.isPipeControlPriorToNonPipelinedStateCommandsWARequired(hwInfo, isRcs); if (isExtendedWARequired) { args.textureCacheInvalidationEnable = true; args.hdcPipelineFlush = true; args.amfsFlushEnable = true; args.instructionCacheInvalidateEnable = true; args.constantCacheInvalidationEnable = true; args.stateCacheInvalidationEnable = true; args.dcFlushEnable = false; NEO::EncodeWA::setAdditionalPipeControlFlagsForNonPipelineStateCommand(args); } else if (isBasicWARequired) { args.hdcPipelineFlush = true; NEO::EncodeWA::setAdditionalPipeControlFlagsForNonPipelineStateCommand(args); } MemorySynchronizationCommands::addPipeControl(commandStream, args); } template inline void EncodeStoreMemory::programStoreDataImm(MI_STORE_DATA_IMM *cmdBuffer, uint64_t gpuAddress, uint32_t dataDword0, uint32_t dataDword1, bool storeQword, bool workloadPartitionOffset) { MI_STORE_DATA_IMM storeDataImmediate = Family::cmdInitStoreDataImm; storeDataImmediate.setAddress(gpuAddress); storeDataImmediate.setStoreQword(storeQword); storeDataImmediate.setDataDword0(dataDword0); if (storeQword) { storeDataImmediate.setDataDword1(dataDword1); storeDataImmediate.setDwordLength(MI_STORE_DATA_IMM::DWORD_LENGTH::DWORD_LENGTH_STORE_QWORD); } else { storeDataImmediate.setDwordLength(MI_STORE_DATA_IMM::DWORD_LENGTH::DWORD_LENGTH_STORE_DWORD); } storeDataImmediate.setWorkloadPartitionIdOffsetEnable(workloadPartitionOffset); *cmdBuffer = storeDataImmediate; } template inline void EncodeMiArbCheck::adjust(MI_ARB_CHECK &miArbCheck) { if (DebugManager.flags.ForcePreParserEnabledForMiArbCheck.get() != -1) { miArbCheck.setPreParserDisable(!DebugManager.flags.ForcePreParserEnabledForMiArbCheck.get()); } } } // namespace NEO compute-runtime-22.14.22890/shared/source/command_container/definitions/000077500000000000000000000000001422164147700261135ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/command_container/definitions/encode_surface_state.inl000066400000000000000000000005211422164147700327620ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/definitions/encode_surface_state_args_base.h" namespace NEO { template void EncodeSurfaceState::appendBufferSurfaceState(EncodeSurfaceStateArgs &args) { } } // namespace NEO compute-runtime-22.14.22890/shared/source/command_container/definitions/encode_surface_state_args.h000066400000000000000000000005101422164147700334410ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_container/definitions/encode_surface_state_args_base.h" namespace NEO { struct EncodeSurfaceStateArgs : EncodeSurfaceStateArgsBase { EncodeSurfaceStateArgs() = default; }; } // namespace NEO encode_surface_state_args_base.h000066400000000000000000000013571422164147700343660ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/command_container/definitions/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include namespace NEO { class GmmHelper; class GraphicsAllocation; struct EncodeSurfaceStateArgsBase { uint64_t graphicsAddress = 0ull; size_t size = 0u; void *outMemory = nullptr; GraphicsAllocation *allocation = nullptr; GmmHelper *gmmHelper = nullptr; uint32_t numAvailableDevices = 0u; uint32_t mocs = 0u; bool cpuCoherent = false; bool forceNonAuxMode = false; bool isReadOnly = false; bool useGlobalAtomics = false; bool areMultipleSubDevicesInContext = false; bool implicitScaling = false; protected: EncodeSurfaceStateArgsBase() = default; }; } // namespace NEO compute-runtime-22.14.22890/shared/source/command_container/encode_compute_mode_bdw_and_later.inl000066400000000000000000000022641422164147700331520ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_stream/linear_stream.h" namespace NEO { template inline void EncodeComputeMode::programComputeModeCommandWithSynchronization( LinearStream &csr, StateComputeModeProperties &properties, const PipelineSelectArgs &args, bool hasSharedHandles, const HardwareInfo &hwInfo, bool isRcs) { EncodeComputeMode::programComputeModeCommand(csr, properties, hwInfo); } template inline void EncodeStoreMMIO::remapOffset(MI_STORE_REGISTER_MEM *pStoreRegMem) { } template inline void EncodeSetMMIO::remapOffset(MI_LOAD_REGISTER_MEM *pMiLoadReg) { } template inline void EncodeSetMMIO::remapOffset(MI_LOAD_REGISTER_REG *pMiLoadReg) { } template inline bool EncodeSetMMIO::isRemapApplicable(uint32_t offset) { return false; } template void EncodeSurfaceState::disableCompressionFlags(R_SURFACE_STATE *surfaceState) { } } // namespace NEO compute-runtime-22.14.22890/shared/source/command_container/encode_compute_mode_tgllp_and_later.inl000066400000000000000000000071511422164147700335200ustar00rootroot00000000000000/* * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_stream/linear_stream.h" namespace NEO { template size_t EncodeComputeMode::getCmdSizeForComputeMode(const HardwareInfo &hwInfo, bool hasSharedHandles, bool isRcs) { size_t size = 0; auto &hwInfoConfig = (*HwInfoConfig::get(hwInfo.platform.eProductFamily)); const auto &[isBasicWARequired, isExtendedWARequired] = hwInfoConfig.isPipeControlPriorToNonPipelinedStateCommandsWARequired(hwInfo, isRcs); std::ignore = isExtendedWARequired; if (isBasicWARequired) { size += sizeof(typename Family::PIPE_CONTROL); } size += sizeof(typename Family::STATE_COMPUTE_MODE); if (hasSharedHandles) { size += sizeof(typename Family::PIPE_CONTROL); } if (hwInfoConfig.is3DPipelineSelectWARequired() && isRcs) { size += (2 * PreambleHelper::getCmdSizeForPipelineSelect(hwInfo)); } return size; } template inline void EncodeComputeMode::programComputeModeCommandWithSynchronization( LinearStream &csr, StateComputeModeProperties &properties, const PipelineSelectArgs &args, bool hasSharedHandles, const HardwareInfo &hwInfo, bool isRcs) { using PIPE_CONTROL = typename Family::PIPE_CONTROL; NEO::EncodeWA::encodeAdditionalPipelineSelect(csr, args, true, hwInfo, isRcs); auto &hwInfoConfig = (*HwInfoConfig::get(hwInfo.platform.eProductFamily)); const auto &[isBasicWARequired, isExtendedWARequired] = hwInfoConfig.isPipeControlPriorToNonPipelinedStateCommandsWARequired(hwInfo, isRcs); std::ignore = isExtendedWARequired; if (isBasicWARequired) { PipeControlArgs args; args.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo); NEO::EncodeWA::addPipeControlPriorToNonPipelinedStateCommand(csr, args, hwInfo, isRcs); } EncodeComputeMode::programComputeModeCommand(csr, properties, hwInfo); if (hasSharedHandles) { auto pc = csr.getSpaceForCmd(); *pc = Family::cmdInitPipeControl; } NEO::EncodeWA::encodeAdditionalPipelineSelect(csr, args, false, hwInfo, isRcs); } template inline void EncodeStoreMMIO::remapOffset(MI_STORE_REGISTER_MEM *pStoreRegMem) { pStoreRegMem->setMmioRemapEnable(true); } template void EncodeSetMMIO::remapOffset(MI_LOAD_REGISTER_MEM *pMiLoadReg) { if (isRemapApplicable(pMiLoadReg->getRegisterAddress())) { pMiLoadReg->setMmioRemapEnable(true); } } template void EncodeSetMMIO::remapOffset(MI_LOAD_REGISTER_REG *pMiLoadReg) { if (isRemapApplicable(pMiLoadReg->getSourceRegisterAddress())) { pMiLoadReg->setMmioRemapEnableSource(true); } if (isRemapApplicable(pMiLoadReg->getDestinationRegisterAddress())) { pMiLoadReg->setMmioRemapEnableDestination(true); } } template inline bool EncodeSetMMIO::isRemapApplicable(uint32_t offset) { return (0x2000 <= offset && offset <= 0x27ff) || (0x4200 <= offset && offset <= 0x420f) || (0x4400 <= offset && offset <= 0x441f); } template void EncodeSurfaceState::disableCompressionFlags(R_SURFACE_STATE *surfaceState) { surfaceState->setAuxiliarySurfaceMode(Family::RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE); surfaceState->setMemoryCompressionEnable(false); } } // namespace NEO compute-runtime-22.14.22890/shared/source/command_container/image_surface_state/000077500000000000000000000000001422164147700275725ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/command_container/image_surface_state/CMakeLists.txt000066400000000000000000000010141422164147700323260ustar00rootroot00000000000000# # Copyright (C) 2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(NEO_CORE_COMMAND_CONTAINER_IMAGE_SURFACE_STATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/compression_params_bdw_and_later.inl ${CMAKE_CURRENT_SOURCE_DIR}/compression_params_tgllp_and_later.inl ${CMAKE_CURRENT_SOURCE_DIR}/compression_params_xehp_and_later.inl ) set_property(GLOBAL APPEND PROPERTY NEO_CORE_COMMAND_CONTAINER ${NEO_CORE_COMMAND_CONTAINER_IMAGE_SURFACE_STATE}) add_subdirectories() compression_params_bdw_and_later.inl000066400000000000000000000006371422164147700367760ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/command_container/image_surface_state/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ namespace NEO { template void EncodeSurfaceState::appendImageCompressionParams(R_SURFACE_STATE *surfaceState, GraphicsAllocation *allocation, GmmHelper *gmmHelper, bool imageFromBuffer, GMM_YUV_PLANE_ENUM plane) { } } // namespace NEO compression_params_tgllp_and_later.inl000066400000000000000000000027621422164147700373450ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/command_container/image_surface_state/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ namespace NEO { template void EncodeSurfaceState::setAuxParamsForMCSCCS(R_SURFACE_STATE *surfaceState) { surfaceState->setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_MCS_LCE); } template void EncodeSurfaceState::setClearColorParams(R_SURFACE_STATE *surfaceState, Gmm *gmm) { if (gmm->gmmResourceInfo->getResourceFlags()->Gpu.IndirectClearColor) { surfaceState->setClearValueAddressEnable(true); uint64_t clearColorAddress = GmmHelper::decanonize(surfaceState->getSurfaceBaseAddress() + gmm->gmmResourceInfo->getUnifiedAuxSurfaceOffset(GMM_UNIFIED_AUX_TYPE::GMM_AUX_CC)); surfaceState->setClearColorAddress(static_cast(clearColorAddress & 0xFFFFFFFFULL)); surfaceState->setClearColorAddressHigh(static_cast(clearColorAddress >> 32)); } } template void EncodeSurfaceState::setFlagsForMediaCompression(R_SURFACE_STATE *surfaceState, Gmm *gmm) { if (gmm->gmmResourceInfo->getResourceFlags()->Info.MediaCompressed) { surfaceState->setAuxiliarySurfaceMode(Family::RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE); surfaceState->setMemoryCompressionEnable(true); } else { surfaceState->setMemoryCompressionEnable(false); } } } // namespace NEO compression_params_xehp_and_later.inl000066400000000000000000000036651422164147700371720ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/command_container/image_surface_state/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/client_context/gmm_client_context.h" #include "shared/source/gmm_helper/resource_info.h" namespace NEO { template void EncodeSurfaceState::appendImageCompressionParams(R_SURFACE_STATE *surfaceState, GraphicsAllocation *allocation, GmmHelper *gmmHelper, bool imageFromBuffer, GMM_YUV_PLANE_ENUM plane) { const auto ccsMode = R_SURFACE_STATE::AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E; const auto mcsLceMode = R_SURFACE_STATE::AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_MCS_LCE; if ((ccsMode == surfaceState->getAuxiliarySurfaceMode() || mcsLceMode == surfaceState->getAuxiliarySurfaceMode() || surfaceState->getMemoryCompressionEnable())) { uint8_t compressionFormat; auto gmmResourceInfo = allocation->getDefaultGmm()->gmmResourceInfo.get(); if (gmmResourceInfo->getResourceFlags()->Info.MediaCompressed) { compressionFormat = gmmHelper->getClientContext()->getMediaSurfaceStateCompressionFormat(gmmResourceInfo->getResourceFormat()); if (plane == GMM_PLANE_Y) { compressionFormat &= 0xf; } else if ((plane == GMM_PLANE_U) || (plane == GMM_PLANE_V)) { compressionFormat |= 0x10; } } else { compressionFormat = gmmHelper->getClientContext()->getSurfaceStateCompressionFormat(gmmResourceInfo->getResourceFormat()); } if (imageFromBuffer) { if (DebugManager.flags.ForceBufferCompressionFormat.get() != -1) { compressionFormat = DebugManager.flags.ForceBufferCompressionFormat.get(); } appendParamsForImageFromBuffer(surfaceState); } surfaceState->setCompressionFormat(compressionFormat); } } } // namespace NEO compute-runtime-22.14.22890/shared/source/command_container/implicit_scaling.cpp000066400000000000000000000076301422164147700276240ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/implicit_scaling.h" #include "shared/source/command_container/walker_partition_interface.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/os_interface/os_interface.h" namespace NEO { bool ImplicitScalingHelper::isImplicitScalingEnabled(const DeviceBitfield &devices, bool preCondition) { bool apiSupport = ImplicitScaling::apiSupport; int32_t overrideEnableImplicitScaling = DebugManager.flags.EnableImplicitScaling.get(); if (overrideEnableImplicitScaling != -1) { apiSupport = !!overrideEnableImplicitScaling; preCondition = apiSupport; } bool partitionWalker = (devices.count() > 1u) && preCondition && apiSupport; if (DebugManager.flags.EnableWalkerPartition.get() != -1) { partitionWalker = !!DebugManager.flags.EnableWalkerPartition.get(); } //we can't do this without local memory partitionWalker &= OSInterface::osEnableLocalMemory; return partitionWalker; } bool ImplicitScalingHelper::isSynchronizeBeforeExecutionRequired() { auto synchronizeBeforeExecution = false; int overrideSynchronizeBeforeExecution = DebugManager.flags.SynchronizeWalkerInWparidMode.get(); if (overrideSynchronizeBeforeExecution != -1) { synchronizeBeforeExecution = !!overrideSynchronizeBeforeExecution; } return synchronizeBeforeExecution; } bool ImplicitScalingHelper::isSemaphoreProgrammingRequired() { auto semaphoreProgrammingRequired = false; int overrideSemaphoreProgrammingRequired = DebugManager.flags.SynchronizeWithSemaphores.get(); if (overrideSemaphoreProgrammingRequired != -1) { semaphoreProgrammingRequired = !!overrideSemaphoreProgrammingRequired; } return semaphoreProgrammingRequired; } bool ImplicitScalingHelper::isCrossTileAtomicRequired(bool defaultCrossTileRequirement) { auto crossTileAtomicSynchronization = defaultCrossTileRequirement; int overrideCrossTileAtomicSynchronization = DebugManager.flags.UseCrossAtomicSynchronization.get(); if (overrideCrossTileAtomicSynchronization != -1) { crossTileAtomicSynchronization = !!overrideCrossTileAtomicSynchronization; } return crossTileAtomicSynchronization; } bool ImplicitScalingHelper::isAtomicsUsedForSelfCleanup() { bool useAtomics = false; int overrideUseAtomics = DebugManager.flags.UseAtomicsForSelfCleanupSection.get(); if (overrideUseAtomics != -1) { useAtomics = !!(overrideUseAtomics); } return useAtomics; } bool ImplicitScalingHelper::isSelfCleanupRequired(const WalkerPartition::WalkerPartitionArgs &args, bool apiSelfCleanup) { bool defaultSelfCleanup = apiSelfCleanup && (args.crossTileAtomicSynchronization || args.synchronizeBeforeExecution || !args.staticPartitioning); int overrideProgramSelfCleanup = DebugManager.flags.ProgramWalkerPartitionSelfCleanup.get(); if (overrideProgramSelfCleanup != -1) { defaultSelfCleanup = !!(overrideProgramSelfCleanup); } return defaultSelfCleanup; } bool ImplicitScalingHelper::isWparidRegisterInitializationRequired() { bool initWparidRegister = false; int overrideInitWparidRegister = DebugManager.flags.WparidRegisterProgramming.get(); if (overrideInitWparidRegister != -1) { initWparidRegister = !!(overrideInitWparidRegister); } return initWparidRegister; } bool ImplicitScalingHelper::isPipeControlStallRequired(bool defaultEmitPipeControl) { int overrideUsePipeControl = DebugManager.flags.UsePipeControlAfterPartitionedWalker.get(); if (overrideUsePipeControl != -1) { defaultEmitPipeControl = !!(overrideUsePipeControl); } return defaultEmitPipeControl; } } // namespace NEO compute-runtime-22.14.22890/shared/source/command_container/implicit_scaling.h000066400000000000000000000066751422164147700273010ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/common_types.h" #include "shared/source/helpers/vec.h" namespace WalkerPartition { struct WalkerPartitionArgs; } namespace NEO { struct HardwareInfo; class LinearStream; struct PipeControlArgs; namespace ImplicitScaling { extern bool apiSupport; } // namespace ImplicitScaling struct ImplicitScalingHelper { static bool isImplicitScalingEnabled(const DeviceBitfield &devices, bool preCondition); static bool isSemaphoreProgrammingRequired(); static bool isCrossTileAtomicRequired(bool defaultCrossTileRequirement); static bool isSynchronizeBeforeExecutionRequired(); static bool isAtomicsUsedForSelfCleanup(); static bool isSelfCleanupRequired(const WalkerPartition::WalkerPartitionArgs &args, bool apiSelfCleanup); static bool isWparidRegisterInitializationRequired(); static bool isPipeControlStallRequired(bool defaultEmitPipeControl); }; template struct ImplicitScalingDispatch { using WALKER_TYPE = typename GfxFamily::WALKER_TYPE; static size_t getSize(bool apiSelfCleanup, bool preferStaticPartitioning, const DeviceBitfield &devices, const Vec3 &groupStart, const Vec3 &groupCount); static void dispatchCommands(LinearStream &commandStream, WALKER_TYPE &walkerCmd, const DeviceBitfield &devices, uint32_t &partitionCount, bool useSecondaryBatchBuffer, bool apiSelfCleanup, bool usesImages, uint64_t workPartitionAllocationGpuVa, const HardwareInfo &hwInfo); static bool &getPipeControlStallRequired(); static size_t getBarrierSize(const HardwareInfo &hwInfo, bool apiSelfCleanup, bool usePostSync); static void dispatchBarrierCommands(LinearStream &commandStream, const DeviceBitfield &devices, PipeControlArgs &flushArgs, const HardwareInfo &hwInfo, uint64_t gpuAddress, uint64_t immediateData, bool apiSelfCleanup, bool useSecondaryBatchBuffer); static size_t getRegisterConfigurationSize(); static void dispatchRegisterConfiguration(LinearStream &commandStream, uint64_t workPartitionSurfaceAddress, uint32_t addressOffset); static size_t getOffsetRegisterSize(); static void dispatchOffsetRegister(LinearStream &commandStream, uint32_t addressOffset); static uint32_t getPostSyncOffset(); static bool platformSupportsImplicitScaling(const HardwareInfo &hwInfo); private: static bool pipeControlStallRequired; }; template struct PartitionRegisters { enum { wparidCCSOffset = 0x221C, addressOffsetCCSOffset = 0x23B4 }; }; } // namespace NEO compute-runtime-22.14.22890/shared/source/command_container/implicit_scaling_xehp_and_later.inl000066400000000000000000000323521422164147700326600ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_container/implicit_scaling.h" #include "shared/source/command_container/walker_partition_xehp_and_later.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/memory_manager/graphics_allocation.h" namespace NEO { template WalkerPartition::WalkerPartitionArgs prepareWalkerPartitionArgs(uint64_t workPartitionAllocationGpuVa, uint32_t tileCount, uint32_t partitionCount, bool emitSelfCleanup, bool preferStaticPartitioning, bool staticPartitioning, bool useSecondaryBatchBuffer) { WalkerPartition::WalkerPartitionArgs args = {}; args.workPartitionAllocationGpuVa = workPartitionAllocationGpuVa; args.partitionCount = partitionCount; args.tileCount = tileCount; args.staticPartitioning = staticPartitioning; args.preferredStaticPartitioning = preferStaticPartitioning; args.useAtomicsForSelfCleanup = ImplicitScalingHelper::isAtomicsUsedForSelfCleanup(); args.initializeWparidRegister = ImplicitScalingHelper::isWparidRegisterInitializationRequired(); args.emitPipeControlStall = ImplicitScalingHelper::isPipeControlStallRequired(ImplicitScalingDispatch::getPipeControlStallRequired()); args.synchronizeBeforeExecution = ImplicitScalingHelper::isSynchronizeBeforeExecutionRequired(); args.crossTileAtomicSynchronization = ImplicitScalingHelper::isCrossTileAtomicRequired(args.emitPipeControlStall); args.semaphoreProgrammingRequired = ImplicitScalingHelper::isSemaphoreProgrammingRequired(); args.emitSelfCleanup = ImplicitScalingHelper::isSelfCleanupRequired(args, emitSelfCleanup); args.emitBatchBufferEnd = false; args.secondaryBatchBuffer = useSecondaryBatchBuffer; return args; } template size_t ImplicitScalingDispatch::getSize(bool apiSelfCleanup, bool preferStaticPartitioning, const DeviceBitfield &devices, const Vec3 &groupStart, const Vec3 &groupCount) { typename GfxFamily::COMPUTE_WALKER::PARTITION_TYPE partitionType{}; bool staticPartitioning = false; const uint32_t tileCount = static_cast(devices.count()); const uint32_t partitionCount = WalkerPartition::computePartitionCountAndPartitionType(tileCount, preferStaticPartitioning, groupStart, groupCount, {}, &partitionType, &staticPartitioning); UNRECOVERABLE_IF(staticPartitioning && (tileCount != partitionCount)); WalkerPartition::WalkerPartitionArgs args = prepareWalkerPartitionArgs(0u, tileCount, partitionCount, apiSelfCleanup, preferStaticPartitioning, staticPartitioning, false); return static_cast(WalkerPartition::estimateSpaceRequiredInCommandBuffer(args)); } template void ImplicitScalingDispatch::dispatchCommands(LinearStream &commandStream, WALKER_TYPE &walkerCmd, const DeviceBitfield &devices, uint32_t &partitionCount, bool useSecondaryBatchBuffer, bool apiSelfCleanup, bool usesImages, uint64_t workPartitionAllocationGpuVa, const HardwareInfo &hwInfo) { uint32_t totalProgrammedSize = 0u; const uint32_t tileCount = static_cast(devices.count()); const bool preferStaticPartitioning = workPartitionAllocationGpuVa != 0u; bool staticPartitioning = false; partitionCount = WalkerPartition::computePartitionCountAndSetPartitionType(&walkerCmd, tileCount, preferStaticPartitioning, usesImages, &staticPartitioning); WalkerPartition::WalkerPartitionArgs args = prepareWalkerPartitionArgs(workPartitionAllocationGpuVa, tileCount, partitionCount, apiSelfCleanup, preferStaticPartitioning, staticPartitioning, useSecondaryBatchBuffer); auto dispatchCommandsSize = getSize(apiSelfCleanup, preferStaticPartitioning, devices, {walkerCmd.getThreadGroupIdStartingX(), walkerCmd.getThreadGroupIdStartingY(), walkerCmd.getThreadGroupIdStartingZ()}, {walkerCmd.getThreadGroupIdXDimension(), walkerCmd.getThreadGroupIdYDimension(), walkerCmd.getThreadGroupIdZDimension()}); void *commandBuffer = commandStream.getSpace(dispatchCommandsSize); uint64_t cmdBufferGpuAddress = commandStream.getGraphicsAllocation()->getGpuAddress() + commandStream.getUsed() - dispatchCommandsSize; if (staticPartitioning) { UNRECOVERABLE_IF(tileCount != partitionCount); WalkerPartition::constructStaticallyPartitionedCommandBuffer(commandBuffer, cmdBufferGpuAddress, &walkerCmd, totalProgrammedSize, args, hwInfo); } else { if (DebugManager.flags.ExperimentalSetWalkerPartitionCount.get()) { partitionCount = DebugManager.flags.ExperimentalSetWalkerPartitionCount.get(); if (partitionCount == 1u) { walkerCmd.setPartitionType(GfxFamily::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED); } args.partitionCount = partitionCount; } WalkerPartition::constructDynamicallyPartitionedCommandBuffer(commandBuffer, cmdBufferGpuAddress, &walkerCmd, totalProgrammedSize, args, hwInfo); } UNRECOVERABLE_IF(totalProgrammedSize != dispatchCommandsSize); } template bool &ImplicitScalingDispatch::getPipeControlStallRequired() { return ImplicitScalingDispatch::pipeControlStallRequired; } template size_t ImplicitScalingDispatch::getBarrierSize(const HardwareInfo &hwInfo, bool apiSelfCleanup, bool usePostSync) { WalkerPartition::WalkerPartitionArgs args = {}; args.emitSelfCleanup = apiSelfCleanup; args.useAtomicsForSelfCleanup = ImplicitScalingHelper::isAtomicsUsedForSelfCleanup(); args.usePostSync = usePostSync; return static_cast(WalkerPartition::estimateBarrierSpaceRequiredInCommandBuffer(args, hwInfo)); } template void ImplicitScalingDispatch::dispatchBarrierCommands(LinearStream &commandStream, const DeviceBitfield &devices, PipeControlArgs &flushArgs, const HardwareInfo &hwInfo, uint64_t gpuAddress, uint64_t immediateData, bool apiSelfCleanup, bool useSecondaryBatchBuffer) { uint32_t totalProgrammedSize = 0u; WalkerPartition::WalkerPartitionArgs args = {}; args.emitSelfCleanup = apiSelfCleanup; args.useAtomicsForSelfCleanup = ImplicitScalingHelper::isAtomicsUsedForSelfCleanup(); args.tileCount = static_cast(devices.count()); args.secondaryBatchBuffer = useSecondaryBatchBuffer; args.usePostSync = gpuAddress > 0; args.postSyncGpuAddress = gpuAddress; args.postSyncImmediateValue = immediateData; auto barrierCommandsSize = getBarrierSize(hwInfo, apiSelfCleanup, args.usePostSync); void *commandBuffer = commandStream.getSpace(barrierCommandsSize); uint64_t cmdBufferGpuAddress = commandStream.getGraphicsAllocation()->getGpuAddress() + commandStream.getUsed() - barrierCommandsSize; WalkerPartition::constructBarrierCommandBuffer(commandBuffer, cmdBufferGpuAddress, totalProgrammedSize, args, flushArgs, hwInfo); UNRECOVERABLE_IF(totalProgrammedSize != barrierCommandsSize); } template inline size_t ImplicitScalingDispatch::getRegisterConfigurationSize() { return EncodeSetMMIO::sizeMEM + getOffsetRegisterSize(); } template inline void ImplicitScalingDispatch::dispatchRegisterConfiguration(LinearStream &commandStream, uint64_t workPartitionSurfaceAddress, uint32_t addressOffset) { EncodeSetMMIO::encodeMEM(commandStream, PartitionRegisters::wparidCCSOffset, workPartitionSurfaceAddress); dispatchOffsetRegister(commandStream, addressOffset); } template inline size_t ImplicitScalingDispatch::getOffsetRegisterSize() { return EncodeSetMMIO::sizeIMM; } template inline void ImplicitScalingDispatch::dispatchOffsetRegister(LinearStream &commandStream, uint32_t addressOffset) { EncodeSetMMIO::encodeIMM(commandStream, PartitionRegisters::addressOffsetCCSOffset, addressOffset, true); } template inline uint32_t ImplicitScalingDispatch::getPostSyncOffset() { return static_cast(HwHelperHw::getSingleTimestampPacketSizeHw()); } template inline bool ImplicitScalingDispatch::platformSupportsImplicitScaling(const HardwareInfo &hwInfo) { return false; } } // namespace NEO compute-runtime-22.14.22890/shared/source/command_container/memory_fence_encoder.h000066400000000000000000000020041422164147700301140ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/linear_stream.h" #include "shared/source/memory_manager/graphics_allocation.h" namespace NEO { template struct EncodeMemoryFence { using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename GfxFamily::STATE_SYSTEM_MEM_FENCE_ADDRESS; static size_t getSystemMemoryFenceSize() { return sizeof(STATE_SYSTEM_MEM_FENCE_ADDRESS); } static void encodeSystemMemoryFence(LinearStream &commandStream, const GraphicsAllocation *globalFenceAllocation) { auto stateSystemFenceAddressSpace = commandStream.getSpaceForCmd(); STATE_SYSTEM_MEM_FENCE_ADDRESS stateSystemFenceAddress = GfxFamily::cmdInitStateSystemMemFenceAddress; stateSystemFenceAddress.setSystemMemoryFenceAddress(globalFenceAllocation->getGpuAddress()); *stateSystemFenceAddressSpace = stateSystemFenceAddress; } }; } // namespace NEOcompute-runtime-22.14.22890/shared/source/command_container/walker_partition_interface.h000066400000000000000000000043041422164147700313500ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include #include namespace WalkerPartition { struct WalkerPartitionArgs { uint64_t workPartitionAllocationGpuVa = 0; uint64_t postSyncGpuAddress = 0; uint64_t postSyncImmediateValue = 0; uint32_t partitionCount = 0; uint32_t tileCount = 0; bool emitBatchBufferEnd = false; bool secondaryBatchBuffer = false; bool synchronizeBeforeExecution = false; bool crossTileAtomicSynchronization = false; bool semaphoreProgrammingRequired = false; bool staticPartitioning = false; bool emitSelfCleanup = false; bool useAtomicsForSelfCleanup = false; bool initializeWparidRegister = false; bool emitPipeControlStall = false; bool preferredStaticPartitioning = false; bool usePostSync = false; }; constexpr uint32_t wparidCCSOffset = 0x221C; constexpr uint32_t addressOffsetCCSOffset = 0x23B4; constexpr uint32_t predicationMaskCCSOffset = 0x21FC; constexpr uint32_t generalPurposeRegister0 = 0x2600; constexpr uint32_t generalPurposeRegister1 = 0x2608; constexpr uint32_t generalPurposeRegister2 = 0x2610; constexpr uint32_t generalPurposeRegister3 = 0x2618; constexpr uint32_t generalPurposeRegister4 = 0x2620; constexpr uint32_t generalPurposeRegister5 = 0x2628; constexpr uint32_t generalPurposeRegister6 = 0x2630; struct BatchBufferControlData { uint32_t partitionCount = 0u; uint32_t tileCount = 0u; uint32_t inTileCount = 0u; uint32_t finalSyncTileCount = 0u; }; constexpr size_t dynamicPartitioningFieldsForCleanupCount = sizeof(BatchBufferControlData) / sizeof(uint32_t) - 1; struct StaticPartitioningControlSection { uint32_t synchronizeBeforeWalkerCounter = 0; uint32_t synchronizeAfterWalkerCounter = 0; uint32_t finalSyncTileCounter = 0; }; constexpr size_t staticPartitioningFieldsForCleanupCount = sizeof(StaticPartitioningControlSection) / sizeof(uint32_t) - 1; struct BarrierControlSection { uint32_t crossTileSyncCount = 0u; uint32_t finalSyncTileCount = 0; }; constexpr size_t barrierControlSectionFieldsForCleanupCount = sizeof(BarrierControlSection) / sizeof(uint32_t) - 1; } // namespace WalkerPartition compute-runtime-22.14.22890/shared/source/command_container/walker_partition_xehp_and_later.h000066400000000000000000001251561422164147700323760ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_container/walker_partition_interface.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/pipe_control_args.h" #include "shared/source/helpers/ptr_math.h" #include #include namespace NEO { struct PipeControlArgs; } namespace WalkerPartition { template using COMPUTE_WALKER = typename GfxFamily::COMPUTE_WALKER; template using POSTSYNC_DATA = typename GfxFamily::POSTSYNC_DATA; template using BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START; template using BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END; template using LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM; template using LOAD_REGISTER_MEM = typename GfxFamily::MI_LOAD_REGISTER_MEM; template using MI_SET_PREDICATE = typename GfxFamily::MI_SET_PREDICATE; template using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT; template using MI_ATOMIC = typename GfxFamily::MI_ATOMIC; template using DATA_SIZE = typename GfxFamily::MI_ATOMIC::DATA_SIZE; template using LOAD_REGISTER_REG = typename GfxFamily::MI_LOAD_REGISTER_REG; template using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; template using MI_STORE_DATA_IMM = typename GfxFamily::MI_STORE_DATA_IMM; template using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; template Command *putCommand(void *&inputAddress, uint32_t &totalBytesProgrammed) { totalBytesProgrammed += sizeof(Command); auto commandToReturn = reinterpret_cast(inputAddress); inputAddress = ptrOffset(inputAddress, sizeof(Command)); return commandToReturn; } template uint32_t computePartitionCountAndPartitionType(uint32_t preferredMinimalPartitionCount, bool preferStaticPartitioning, const Vec3 &groupStart, const Vec3 &groupCount, std::optional::PARTITION_TYPE> requestedPartitionType, typename COMPUTE_WALKER::PARTITION_TYPE *outSelectedPartitionType, bool *outSelectStaticPartitioning) { // For non uniform starting point, there is no support for partition in Hardware. Disable partitioning and select dynamic algorithm if (groupStart.x || groupStart.y || groupStart.z) { *outSelectedPartitionType = COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED; *outSelectStaticPartitioning = false; return 1u; } size_t workgroupCount = 0u; bool disablePartitionForPartitionCountOne{}; if (NEO::DebugManager.flags.ExperimentalSetWalkerPartitionType.get() != -1) { requestedPartitionType = static_cast::PARTITION_TYPE>(NEO::DebugManager.flags.ExperimentalSetWalkerPartitionType.get()); } if (requestedPartitionType.has_value()) { switch (requestedPartitionType.value()) { case COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X: workgroupCount = groupCount.x; break; case COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y: workgroupCount = groupCount.y; break; case COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Z: workgroupCount = groupCount.z; break; default: UNRECOVERABLE_IF(true); } *outSelectedPartitionType = requestedPartitionType.value(); disablePartitionForPartitionCountOne = false; } else { const size_t maxDimension = std::max({groupCount.z, groupCount.y, groupCount.x}); auto goWithMaxAlgorithm = !preferStaticPartitioning; if (NEO::DebugManager.flags.WalkerPartitionPreferHighestDimension.get() != -1) { goWithMaxAlgorithm = !!!NEO::DebugManager.flags.WalkerPartitionPreferHighestDimension.get(); } //compute misaligned %, accept imbalance below threshold in favor of Z/Y/X distribution. const float minimalThreshold = 0.05f; float zImbalance = static_cast(groupCount.z - alignDown(groupCount.z, preferredMinimalPartitionCount)) / static_cast(groupCount.z); float yImbalance = static_cast(groupCount.y - alignDown(groupCount.y, preferredMinimalPartitionCount)) / static_cast(groupCount.y); //we first try with deepest dimension to see if we can partition there if (groupCount.z > 1 && (zImbalance <= minimalThreshold)) { *outSelectedPartitionType = COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Z; } else if (groupCount.y > 1 && (yImbalance < minimalThreshold)) { *outSelectedPartitionType = COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y; } else if (groupCount.x % preferredMinimalPartitionCount == 0) { *outSelectedPartitionType = COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X; } //if we are here then there is no dimension that results in even distribution, choose max dimension to minimize impact else { goWithMaxAlgorithm = true; } if (goWithMaxAlgorithm) { // default mode, select greatest dimension if (maxDimension == groupCount.x) { *outSelectedPartitionType = COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X; } else if (maxDimension == groupCount.y) { *outSelectedPartitionType = COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y; } else { *outSelectedPartitionType = COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Z; } } workgroupCount = maxDimension; disablePartitionForPartitionCountOne = true; } // Static partitioning - partition count == tile count *outSelectStaticPartitioning = preferStaticPartitioning; if (preferStaticPartitioning) { return preferredMinimalPartitionCount; } // Dynamic partitioning - compute optimal partition count size_t partitionCount = std::min(static_cast(16u), workgroupCount); partitionCount = Math::prevPowerOfTwo(partitionCount); if (NEO::DebugManager.flags.SetMinimalPartitionSize.get() != 0) { const auto workgroupPerPartitionThreshold = NEO::DebugManager.flags.SetMinimalPartitionSize.get() == -1 ? 512u : static_cast(NEO::DebugManager.flags.SetMinimalPartitionSize.get()); preferredMinimalPartitionCount = std::max(2u, preferredMinimalPartitionCount); while (partitionCount > preferredMinimalPartitionCount) { auto workgroupsPerPartition = workgroupCount / partitionCount; if (workgroupsPerPartition >= workgroupPerPartitionThreshold) { break; } partitionCount = partitionCount / 2; } } if (partitionCount == 1u && disablePartitionForPartitionCountOne) { *outSelectedPartitionType = COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED; } return static_cast(partitionCount); } template uint32_t computePartitionCountAndSetPartitionType(COMPUTE_WALKER *walker, uint32_t preferredMinimalPartitionCount, bool preferStaticPartitioning, bool usesImages, bool *outSelectStaticPartitioning) { const Vec3 groupStart = {walker->getThreadGroupIdStartingX(), walker->getThreadGroupIdStartingY(), walker->getThreadGroupIdStartingZ()}; const Vec3 groupCount = {walker->getThreadGroupIdXDimension(), walker->getThreadGroupIdYDimension(), walker->getThreadGroupIdZDimension()}; std::optional::PARTITION_TYPE> requestedPartitionType{}; if (usesImages) { requestedPartitionType = COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X; } typename COMPUTE_WALKER::PARTITION_TYPE partitionType{}; const auto partitionCount = computePartitionCountAndPartitionType(preferredMinimalPartitionCount, preferStaticPartitioning, groupStart, groupCount, requestedPartitionType, &partitionType, outSelectStaticPartitioning); walker->setPartitionType(partitionType); return partitionCount; } template void programRegisterWithValue(void *&inputAddress, uint32_t registerOffset, uint32_t &totalBytesProgrammed, uint32_t registerValue) { auto loadRegisterImmediate = putCommand>(inputAddress, totalBytesProgrammed); LOAD_REGISTER_IMM cmd = GfxFamily::cmdInitLoadRegisterImm; cmd.setRegisterOffset(registerOffset); cmd.setDataDword(registerValue); cmd.setMmioRemapEnable(true); *loadRegisterImmediate = cmd; } template void programWaitForSemaphore(void *&inputAddress, uint32_t &totalBytesProgrammed, uint64_t gpuAddress, uint32_t semaphoreCompareValue, typename MI_SEMAPHORE_WAIT::COMPARE_OPERATION compareOperation) { auto semaphoreWait = putCommand>(inputAddress, totalBytesProgrammed); MI_SEMAPHORE_WAIT cmd = GfxFamily::cmdInitMiSemaphoreWait; cmd.setSemaphoreDataDword(semaphoreCompareValue); cmd.setSemaphoreGraphicsAddress(gpuAddress); cmd.setWaitMode(MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE); cmd.setCompareOperation(compareOperation); *semaphoreWait = cmd; } template bool programWparidMask(void *&inputAddress, uint32_t &totalBytesProgrammed, uint32_t partitionCount) { //currently only power of 2 values of partitionCount are being supported if (!Math::isPow2(partitionCount) || partitionCount > 16) { return false; } auto mask = 0xFFE0; auto fillValue = 0x10; auto count = partitionCount; while (count < 16) { fillValue |= (fillValue >> 1); count *= 2; } mask |= (mask | fillValue); programRegisterWithValue(inputAddress, predicationMaskCCSOffset, totalBytesProgrammed, mask); return true; } template void programWparidPredication(void *&inputAddress, uint32_t &totalBytesProgrammed, bool predicationEnabled) { auto miSetPredicate = putCommand>(inputAddress, totalBytesProgrammed); MI_SET_PREDICATE cmd = GfxFamily::cmdInitSetPredicate; if (predicationEnabled) { cmd.setPredicateEnableWparid(MI_SET_PREDICATE::PREDICATE_ENABLE_WPARID::PREDICATE_ENABLE_WPARID_NOOP_ON_NON_ZERO_VALUE); } else { cmd.setPredicateEnable(MI_SET_PREDICATE::PREDICATE_ENABLE::PREDICATE_ENABLE_PREDICATE_DISABLE); } *miSetPredicate = cmd; } template void programMiAtomic(void *&inputAddress, uint32_t &totalBytesProgrammed, uint64_t gpuAddress, bool requireReturnValue, typename MI_ATOMIC::ATOMIC_OPCODES atomicOpcode) { auto miAtomic = putCommand>(inputAddress, totalBytesProgrammed); NEO::EncodeAtomic::programMiAtomic(miAtomic, gpuAddress, atomicOpcode, DATA_SIZE::DATA_SIZE_DWORD, requireReturnValue, requireReturnValue, 0x0u, 0x0u); } template void programMiBatchBufferStart(void *&inputAddress, uint32_t &totalBytesProgrammed, uint64_t gpuAddress, bool predicationEnabled, bool secondary) { auto batchBufferStart = putCommand>(inputAddress, totalBytesProgrammed); BATCH_BUFFER_START cmd = GfxFamily::cmdInitBatchBufferStart; cmd.setSecondLevelBatchBuffer(static_cast::SECOND_LEVEL_BATCH_BUFFER>(secondary)); cmd.setAddressSpaceIndicator(BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR::ADDRESS_SPACE_INDICATOR_PPGTT); cmd.setPredicationEnable(predicationEnabled); cmd.setBatchBufferStartAddress(gpuAddress); *batchBufferStart = cmd; } template void programMiLoadRegisterReg(void *&inputAddress, uint32_t &totalBytesProgrammed, uint32_t sourceRegisterOffset, uint32_t destinationRegisterOffset) { auto loadRegisterReg = putCommand>(inputAddress, totalBytesProgrammed); LOAD_REGISTER_REG cmd = GfxFamily::cmdInitLoadRegisterReg; cmd.setMmioRemapEnableSource(true); cmd.setMmioRemapEnableDestination(true); cmd.setSourceRegisterAddress(sourceRegisterOffset); cmd.setDestinationRegisterAddress(destinationRegisterOffset); *loadRegisterReg = cmd; } template void programMiLoadRegisterMem(void *&inputAddress, uint32_t &totalBytesProgrammed, uint64_t gpuAddressToLoad, uint32_t destinationRegisterOffset) { auto loadRegisterReg = putCommand>(inputAddress, totalBytesProgrammed); LOAD_REGISTER_MEM cmd = GfxFamily::cmdInitLoadRegisterMem; cmd.setMmioRemapEnable(true); cmd.setMemoryAddress(gpuAddressToLoad); cmd.setRegisterAddress(destinationRegisterOffset); *loadRegisterReg = cmd; } template void programPipeControlCommand(void *&inputAddress, uint32_t &totalBytesProgrammed, NEO::PipeControlArgs &flushArgs) { auto pipeControl = putCommand>(inputAddress, totalBytesProgrammed); PIPE_CONTROL cmd = GfxFamily::cmdInitPipeControl; NEO::MemorySynchronizationCommands::setPipeControl(cmd, flushArgs); *pipeControl = cmd; } template void programPostSyncPipeControlCommand(void *&inputAddress, uint32_t &totalBytesProgrammed, WalkerPartitionArgs &args, NEO::PipeControlArgs &flushArgs, const NEO::HardwareInfo &hwInfo) { NEO::MemorySynchronizationCommands::setPipeControlAndProgramPostSyncOperation(inputAddress, POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, args.postSyncGpuAddress, args.postSyncImmediateValue, hwInfo, flushArgs); totalBytesProgrammed += static_cast(NEO::MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo)); } template void programStoreMemImmediateDword(void *&inputAddress, uint32_t &totalBytesProgrammed, uint64_t gpuAddress, uint32_t data) { auto storeDataImmediate = putCommand>(inputAddress, totalBytesProgrammed); MI_STORE_DATA_IMM cmd = GfxFamily::cmdInitStoreDataImm; cmd.setAddress(gpuAddress); cmd.setStoreQword(false); cmd.setDwordLength(MI_STORE_DATA_IMM::DWORD_LENGTH::DWORD_LENGTH_STORE_DWORD); cmd.setDataDword0(static_cast(data)); *storeDataImmediate = cmd; } template uint64_t computeSelfCleanupSectionSize(bool useAtomicsForSelfCleanup) { if (useAtomicsForSelfCleanup) { return sizeof(MI_ATOMIC); } else { return sizeof(MI_STORE_DATA_IMM); } } template void programSelfCleanupSection(void *&inputAddress, uint32_t &totalBytesProgrammed, uint64_t address, bool useAtomicsForSelfCleanup) { if (useAtomicsForSelfCleanup) { programMiAtomic(inputAddress, totalBytesProgrammed, address, false, MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_MOVE); } else { programStoreMemImmediateDword(inputAddress, totalBytesProgrammed, address, 0u); } } template uint64_t computeTilesSynchronizationWithAtomicsSectionSize() { return sizeof(MI_ATOMIC) + sizeof(MI_SEMAPHORE_WAIT); } template void programTilesSynchronizationWithAtomics(void *¤tBatchBufferPointer, uint32_t &totalBytesProgrammed, uint64_t atomicAddress, uint32_t tileCount) { programMiAtomic(currentBatchBufferPointer, totalBytesProgrammed, atomicAddress, false, MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT); programWaitForSemaphore(currentBatchBufferPointer, totalBytesProgrammed, atomicAddress, tileCount, MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD); } template uint64_t computeSelfCleanupEndSectionSize(size_t fieldsForCleanupCount, bool useAtomicsForSelfCleanup) { return fieldsForCleanupCount * computeSelfCleanupSectionSize(useAtomicsForSelfCleanup) + 2 * computeTilesSynchronizationWithAtomicsSectionSize(); } template void programSelfCleanupEndSection(void *&inputAddress, uint32_t &totalBytesProgrammed, uint64_t finalSyncTileCountAddress, uint64_t baseAddressForCleanup, size_t fieldsForCleanupCount, uint32_t tileCount, bool useAtomicsForSelfCleanup) { // Synchronize tiles, so the fields are not cleared while still in use programTilesSynchronizationWithAtomics(inputAddress, totalBytesProgrammed, finalSyncTileCountAddress, tileCount); for (auto fieldIndex = 0u; fieldIndex < fieldsForCleanupCount; fieldIndex++) { const uint64_t addressForCleanup = baseAddressForCleanup + fieldIndex * sizeof(uint32_t); programSelfCleanupSection(inputAddress, totalBytesProgrammed, addressForCleanup, useAtomicsForSelfCleanup); } //this synchronization point ensures that all tiles finished zeroing and will fairly access control section atomic variables programTilesSynchronizationWithAtomics(inputAddress, totalBytesProgrammed, finalSyncTileCountAddress, 2 * tileCount); } template void programTilesSynchronizationWithPostSyncs(void *¤tBatchBufferPointer, uint32_t &totalBytesProgrammed, COMPUTE_WALKER *inputWalker, uint32_t partitionCount) { const auto postSyncAddress = inputWalker->getPostSync().getDestinationAddress() + 8llu; for (uint32_t partitionId = 0u; partitionId < partitionCount; partitionId++) { programWaitForSemaphore(currentBatchBufferPointer, totalBytesProgrammed, postSyncAddress + partitionId * 16llu, 1u, MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD); } } template uint64_t computeWalkerSectionSize() { return sizeof(BATCH_BUFFER_START) + sizeof(COMPUTE_WALKER); } template uint64_t computeControlSectionOffset(WalkerPartitionArgs &args) { uint64_t size = 0u; size += args.synchronizeBeforeExecution ? computeTilesSynchronizationWithAtomicsSectionSize() : 0; size += sizeof(LOAD_REGISTER_IMM); //predication mask size += sizeof(MI_ATOMIC); //current id for partition size += sizeof(LOAD_REGISTER_REG); //id into register size += sizeof(MI_SET_PREDICATE) * 2 + sizeof(BATCH_BUFFER_START) * 2; size += (args.semaphoreProgrammingRequired ? sizeof(MI_SEMAPHORE_WAIT) * args.partitionCount : 0u); size += computeWalkerSectionSize(); size += args.emitPipeControlStall ? sizeof(PIPE_CONTROL) : 0u; if (args.crossTileAtomicSynchronization || args.emitSelfCleanup) { size += computeTilesSynchronizationWithAtomicsSectionSize(); } if (args.emitSelfCleanup) { size += computeSelfCleanupSectionSize(args.useAtomicsForSelfCleanup); } size += args.preferredStaticPartitioning ? sizeof(LOAD_REGISTER_MEM) : 0u; return size; } template uint64_t computeWalkerSectionStart(WalkerPartitionArgs &args) { return computeControlSectionOffset(args) - computeWalkerSectionSize(); } template void programPartitionedWalker(void *&inputAddress, uint32_t &totalBytesProgrammed, COMPUTE_WALKER *inputWalker, uint32_t partitionCount) { auto computeWalker = putCommand>(inputAddress, totalBytesProgrammed); COMPUTE_WALKER cmd = *inputWalker; if (partitionCount > 1) { auto partitionType = inputWalker->getPartitionType(); assert(inputWalker->getThreadGroupIdStartingX() == 0u); assert(inputWalker->getThreadGroupIdStartingY() == 0u); assert(inputWalker->getThreadGroupIdStartingZ() == 0u); assert(partitionType != COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED); cmd.setWorkloadPartitionEnable(true); auto workgroupCount = 0u; if (partitionType == COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X) { workgroupCount = inputWalker->getThreadGroupIdXDimension(); } else if (partitionType == COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y) { workgroupCount = inputWalker->getThreadGroupIdYDimension(); } else { workgroupCount = inputWalker->getThreadGroupIdZDimension(); } cmd.setPartitionSize((workgroupCount + partitionCount - 1u) / partitionCount); } *computeWalker = cmd; } /* SAMPLE COMMAND BUFFER STRUCTURE, birds eye view for 16 partitions, 4 tiles //inital setup section 1. MI_LOAD_REGISTER(PREDICATION_MASK, active partition mask ) //loop 1 - loop as long as there are partitions to be serviced 2. MI_ATOMIC_INC( ATOMIC LOCATION #31 within CMD buffer ) 3. MI_LOAD_REGISTER_REG ( ATOMIC RESULT -> WPARID ) 4. MI_SET_PREDICATE( WPARID MODE ) 5. BATCH_BUFFER_START( LOCATION #28 ) // this will not be executed if partition outside of active virtual partitions //loop 1 ends here, if we are here it means there are no more partitions 6. MI_SET_PREDICATE ( OFF ) //Walker synchronization section starts here, make sure that Walker is done 7, PIPE_CONTROL ( DC_FLUSH ) //wait for all post syncs to make sure whole work is done, caller needs to set them to 1. //now epilogue starts synchro all engines prior to coming back to RING, this will be once per command buffer to make sure that all engines actually passed via cmd buffer. //epilogue section, make sure every tile completed prior to continuing //This is cross-tile synchronization 24. ATOMIC_INC( LOCATION #31) 25. WAIT_FOR_SEMAPHORE ( LOCATION #31, LOWER THEN 4 ) // wait till all tiles hit atomic 26. PIPE_CONTROL ( TAG UPDATE ) (not implemented) 27. BATCH_BUFFER_STAT (LOCATION #32) // go to the very end //Walker section 28. COMPUTE_WALKER 29. BATCH BUFFER_START ( GO BACK TO #2) //Batch Buffer Control Data section, there are no real commands here but we have memory here //That will be updated via atomic operations. 30. uint32_t virtualPartitionID //atomic location 31. uint32_t completionTileID //all tiles needs to report completion 32. BATCH_BUFFER_END ( optional ) */ template void constructDynamicallyPartitionedCommandBuffer(void *cpuPointer, uint64_t gpuAddressOfAllocation, COMPUTE_WALKER *inputWalker, uint32_t &totalBytesProgrammed, WalkerPartitionArgs &args, const NEO::HardwareInfo &hwInfo) { totalBytesProgrammed = 0u; void *currentBatchBufferPointer = cpuPointer; auto controlSectionOffset = computeControlSectionOffset(args); if (args.synchronizeBeforeExecution) { auto tileAtomicAddress = gpuAddressOfAllocation + controlSectionOffset + offsetof(BatchBufferControlData, inTileCount); programTilesSynchronizationWithAtomics(currentBatchBufferPointer, totalBytesProgrammed, tileAtomicAddress, args.tileCount); } programWparidMask(currentBatchBufferPointer, totalBytesProgrammed, args.partitionCount); programMiAtomic(currentBatchBufferPointer, totalBytesProgrammed, gpuAddressOfAllocation + controlSectionOffset, true, MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT); //move atomic result to wparid programMiLoadRegisterReg(currentBatchBufferPointer, totalBytesProgrammed, generalPurposeRegister4, wparidCCSOffset); //enable predication basing on wparid value programWparidPredication(currentBatchBufferPointer, totalBytesProgrammed, true); programMiBatchBufferStart(currentBatchBufferPointer, totalBytesProgrammed, gpuAddressOfAllocation + computeWalkerSectionStart(args), true, args.secondaryBatchBuffer); //disable predication to not noop subsequent commands. programWparidPredication(currentBatchBufferPointer, totalBytesProgrammed, false); if (args.emitSelfCleanup) { const auto finalSyncTileCountField = gpuAddressOfAllocation + controlSectionOffset + offsetof(BatchBufferControlData, finalSyncTileCount); programSelfCleanupSection(currentBatchBufferPointer, totalBytesProgrammed, finalSyncTileCountField, args.useAtomicsForSelfCleanup); } if (args.emitPipeControlStall) { NEO::PipeControlArgs args; args.dcFlushEnable = NEO::MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo); programPipeControlCommand(currentBatchBufferPointer, totalBytesProgrammed, args); } if (args.semaphoreProgrammingRequired) { auto postSyncAddress = inputWalker->getPostSync().getDestinationAddress() + 8llu; for (uint32_t partitionId = 0u; partitionId < args.partitionCount; partitionId++) { programWaitForSemaphore(currentBatchBufferPointer, totalBytesProgrammed, postSyncAddress + partitionId * 16llu, 1u, MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD); } } if (args.crossTileAtomicSynchronization || args.emitSelfCleanup) { auto tileAtomicAddress = gpuAddressOfAllocation + controlSectionOffset + offsetof(BatchBufferControlData, tileCount); programTilesSynchronizationWithAtomics(currentBatchBufferPointer, totalBytesProgrammed, tileAtomicAddress, args.tileCount); } if (args.preferredStaticPartitioning) { programMiLoadRegisterMem(currentBatchBufferPointer, totalBytesProgrammed, args.workPartitionAllocationGpuVa, wparidCCSOffset); } //this bb start goes to the end of partitioned command buffer programMiBatchBufferStart( currentBatchBufferPointer, totalBytesProgrammed, gpuAddressOfAllocation + controlSectionOffset + sizeof(BatchBufferControlData), false, args.secondaryBatchBuffer); //Walker section programPartitionedWalker(currentBatchBufferPointer, totalBytesProgrammed, inputWalker, args.partitionCount); programMiBatchBufferStart(currentBatchBufferPointer, totalBytesProgrammed, gpuAddressOfAllocation, false, args.secondaryBatchBuffer); auto controlSection = reinterpret_cast(ptrOffset(cpuPointer, static_cast(controlSectionOffset))); controlSection->partitionCount = 0u; controlSection->tileCount = 0u; controlSection->inTileCount = 0u; controlSection->finalSyncTileCount = 0u; totalBytesProgrammed += sizeof(BatchBufferControlData); currentBatchBufferPointer = ptrOffset(currentBatchBufferPointer, sizeof(BatchBufferControlData)); if (args.emitSelfCleanup) { const auto finalSyncTileCountAddress = gpuAddressOfAllocation + controlSectionOffset + offsetof(BatchBufferControlData, finalSyncTileCount); programSelfCleanupEndSection(currentBatchBufferPointer, totalBytesProgrammed, finalSyncTileCountAddress, gpuAddressOfAllocation + controlSectionOffset, dynamicPartitioningFieldsForCleanupCount, args.tileCount, args.useAtomicsForSelfCleanup); } if (args.emitBatchBufferEnd) { auto batchBufferEnd = putCommand>(currentBatchBufferPointer, totalBytesProgrammed); *batchBufferEnd = GfxFamily::cmdInitBatchBufferEnd; } } template bool isStartAndControlSectionRequired(WalkerPartitionArgs &args) { return args.synchronizeBeforeExecution || args.crossTileAtomicSynchronization || args.emitSelfCleanup; } template uint64_t computeStaticPartitioningControlSectionOffset(WalkerPartitionArgs &args) { const auto beforeExecutionSyncAtomicSize = args.synchronizeBeforeExecution ? computeTilesSynchronizationWithAtomicsSectionSize() : 0u; const auto afterExecutionSyncAtomicSize = (args.crossTileAtomicSynchronization || args.emitSelfCleanup) ? computeTilesSynchronizationWithAtomicsSectionSize() : 0u; const auto afterExecutionSyncPostSyncSize = args.semaphoreProgrammingRequired ? sizeof(MI_SEMAPHORE_WAIT) * args.partitionCount : 0u; const auto selfCleanupSectionSize = args.emitSelfCleanup ? computeSelfCleanupSectionSize(args.useAtomicsForSelfCleanup) : 0u; const auto wparidRegisterSize = args.initializeWparidRegister ? sizeof(LOAD_REGISTER_MEM) : 0u; const auto pipeControlSize = args.emitPipeControlStall ? sizeof(PIPE_CONTROL) : 0u; const auto bbStartSize = isStartAndControlSectionRequired(args) ? sizeof(BATCH_BUFFER_START) : 0u; return beforeExecutionSyncAtomicSize + wparidRegisterSize + pipeControlSize + sizeof(COMPUTE_WALKER) + selfCleanupSectionSize + afterExecutionSyncAtomicSize + afterExecutionSyncPostSyncSize + bbStartSize; } template void constructStaticallyPartitionedCommandBuffer(void *cpuPointer, uint64_t gpuAddressOfAllocation, COMPUTE_WALKER *inputWalker, uint32_t &totalBytesProgrammed, WalkerPartitionArgs &args, const NEO::HardwareInfo &hwInfo) { totalBytesProgrammed = 0u; void *currentBatchBufferPointer = cpuPointer; // Get address of the control section const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset(args); const auto afterControlSectionOffset = controlSectionOffset + sizeof(StaticPartitioningControlSection); // Synchronize tiles before walker if (args.synchronizeBeforeExecution) { const auto atomicAddress = gpuAddressOfAllocation + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeBeforeWalkerCounter); programTilesSynchronizationWithAtomics(currentBatchBufferPointer, totalBytesProgrammed, atomicAddress, args.tileCount); } // Load partition ID to wparid register and execute walker if (args.initializeWparidRegister) { programMiLoadRegisterMem(currentBatchBufferPointer, totalBytesProgrammed, args.workPartitionAllocationGpuVa, wparidCCSOffset); } programPartitionedWalker(currentBatchBufferPointer, totalBytesProgrammed, inputWalker, args.partitionCount); // Prepare for cleanup section if (args.emitSelfCleanup) { const auto finalSyncTileCountField = gpuAddressOfAllocation + controlSectionOffset + offsetof(StaticPartitioningControlSection, finalSyncTileCounter); programSelfCleanupSection(currentBatchBufferPointer, totalBytesProgrammed, finalSyncTileCountField, args.useAtomicsForSelfCleanup); } if (args.emitPipeControlStall) { NEO::PipeControlArgs args; args.dcFlushEnable = NEO::MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo); programPipeControlCommand(currentBatchBufferPointer, totalBytesProgrammed, args); } // Synchronize tiles after walker if (args.semaphoreProgrammingRequired) { programTilesSynchronizationWithPostSyncs(currentBatchBufferPointer, totalBytesProgrammed, inputWalker, args.partitionCount); } if (args.crossTileAtomicSynchronization || args.emitSelfCleanup) { const auto atomicAddress = gpuAddressOfAllocation + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeAfterWalkerCounter); programTilesSynchronizationWithAtomics(currentBatchBufferPointer, totalBytesProgrammed, atomicAddress, args.tileCount); } // Jump over the control section only when needed if (isStartAndControlSectionRequired(args)) { programMiBatchBufferStart(currentBatchBufferPointer, totalBytesProgrammed, gpuAddressOfAllocation + afterControlSectionOffset, false, args.secondaryBatchBuffer); // Control section DEBUG_BREAK_IF(totalBytesProgrammed != controlSectionOffset); StaticPartitioningControlSection *controlSection = putCommand(currentBatchBufferPointer, totalBytesProgrammed); controlSection->synchronizeBeforeWalkerCounter = 0u; controlSection->synchronizeAfterWalkerCounter = 0u; controlSection->finalSyncTileCounter = 0u; DEBUG_BREAK_IF(totalBytesProgrammed != afterControlSectionOffset); } // Cleanup section if (args.emitSelfCleanup) { const auto finalSyncTileCountAddress = gpuAddressOfAllocation + controlSectionOffset + offsetof(StaticPartitioningControlSection, finalSyncTileCounter); programSelfCleanupEndSection(currentBatchBufferPointer, totalBytesProgrammed, finalSyncTileCountAddress, gpuAddressOfAllocation + controlSectionOffset, staticPartitioningFieldsForCleanupCount, args.tileCount, args.useAtomicsForSelfCleanup); } } template uint64_t estimateSpaceRequiredInCommandBuffer(WalkerPartitionArgs &args) { uint64_t size = {}; if (args.staticPartitioning) { size += computeStaticPartitioningControlSectionOffset(args); size += isStartAndControlSectionRequired(args) ? sizeof(StaticPartitioningControlSection) : 0u; size += args.emitSelfCleanup ? computeSelfCleanupEndSectionSize(staticPartitioningFieldsForCleanupCount, args.useAtomicsForSelfCleanup) : 0u; } else { size += computeControlSectionOffset(args); size += sizeof(BatchBufferControlData); size += args.emitBatchBufferEnd ? sizeof(BATCH_BUFFER_END) : 0u; size += args.emitSelfCleanup ? computeSelfCleanupEndSectionSize(dynamicPartitioningFieldsForCleanupCount, args.useAtomicsForSelfCleanup) : 0u; } return size; } template uint64_t computeBarrierControlSectionOffset(WalkerPartitionArgs &args, const NEO::HardwareInfo &hwInfo) { uint64_t offset = 0u; if (args.emitSelfCleanup) { offset += computeSelfCleanupSectionSize(args.useAtomicsForSelfCleanup); } if (args.usePostSync) { offset += NEO::MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo); } else { offset += sizeof(PIPE_CONTROL); } offset += (computeTilesSynchronizationWithAtomicsSectionSize() + sizeof(BATCH_BUFFER_START)); return offset; } template uint64_t estimateBarrierSpaceRequiredInCommandBuffer(WalkerPartitionArgs &args, const NEO::HardwareInfo &hwInfo) { uint64_t size = computeBarrierControlSectionOffset(args, hwInfo) + sizeof(BarrierControlSection); if (args.emitSelfCleanup) { size += computeSelfCleanupEndSectionSize(barrierControlSectionFieldsForCleanupCount, args.useAtomicsForSelfCleanup); } return size; } template void constructBarrierCommandBuffer(void *cpuPointer, uint64_t gpuAddressOfAllocation, uint32_t &totalBytesProgrammed, WalkerPartitionArgs &args, NEO::PipeControlArgs &flushArgs, const NEO::HardwareInfo &hwInfo) { void *currentBatchBufferPointer = cpuPointer; const auto controlSectionOffset = computeBarrierControlSectionOffset(args, hwInfo); const auto finalSyncTileCountField = gpuAddressOfAllocation + controlSectionOffset + offsetof(BarrierControlSection, finalSyncTileCount); if (args.emitSelfCleanup) { programSelfCleanupSection(currentBatchBufferPointer, totalBytesProgrammed, finalSyncTileCountField, args.useAtomicsForSelfCleanup); } if (args.usePostSync) { programPostSyncPipeControlCommand(currentBatchBufferPointer, totalBytesProgrammed, args, flushArgs, hwInfo); } else { programPipeControlCommand(currentBatchBufferPointer, totalBytesProgrammed, flushArgs); } const auto crossTileSyncCountField = gpuAddressOfAllocation + controlSectionOffset + offsetof(BarrierControlSection, crossTileSyncCount); programTilesSynchronizationWithAtomics(currentBatchBufferPointer, totalBytesProgrammed, crossTileSyncCountField, args.tileCount); const auto afterControlSectionOffset = controlSectionOffset + sizeof(BarrierControlSection); programMiBatchBufferStart(currentBatchBufferPointer, totalBytesProgrammed, gpuAddressOfAllocation + afterControlSectionOffset, false, args.secondaryBatchBuffer); DEBUG_BREAK_IF(totalBytesProgrammed != controlSectionOffset); BarrierControlSection *controlSection = putCommand(currentBatchBufferPointer, totalBytesProgrammed); controlSection->crossTileSyncCount = 0u; controlSection->finalSyncTileCount = 0u; DEBUG_BREAK_IF(totalBytesProgrammed != afterControlSectionOffset); if (args.emitSelfCleanup) { programSelfCleanupEndSection(currentBatchBufferPointer, totalBytesProgrammed, finalSyncTileCountField, gpuAddressOfAllocation + controlSectionOffset, barrierControlSectionFieldsForCleanupCount, args.tileCount, args.useAtomicsForSelfCleanup); } } } // namespace WalkerPartition compute-runtime-22.14.22890/shared/source/command_stream/000077500000000000000000000000001422164147700231115ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/command_stream/CMakeLists.txt000066400000000000000000000105241422164147700256530ustar00rootroot00000000000000# # Copyright (C) 2019-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(NEO_CORE_COMMAND_STREAM ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_receiver.cpp ${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_receiver.h ${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_receiver_hw.h ${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_receiver_hw_base.inl ${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_receiver_hw_bdw_and_later.inl ${CMAKE_CURRENT_SOURCE_DIR}/aub_subcapture_status.h ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver.h ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw.h ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_base.inl ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_bdw_and_later.inl ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_simulated_common_hw.h ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_simulated_common_hw_base.inl ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_simulated_common_hw_bdw_and_later.inl ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_simulated_hw.h ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_with_aub_dump.h ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_with_aub_dump.inl ${CMAKE_CURRENT_SOURCE_DIR}/create_command_stream_impl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/create_command_stream_impl.h ${CMAKE_CURRENT_SOURCE_DIR}/csr_definitions.h ${CMAKE_CURRENT_SOURCE_DIR}/csr_deps.cpp ${CMAKE_CURRENT_SOURCE_DIR}/csr_deps.h ${CMAKE_CURRENT_SOURCE_DIR}/csr_properties_flags.h ${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}command_stream_receiver_hw_ext.inl ${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}stream_properties.inl ${CMAKE_CURRENT_SOURCE_DIR}/device_command_stream.h ${CMAKE_CURRENT_SOURCE_DIR}/experimental_command_buffer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/experimental_command_buffer.h ${CMAKE_CURRENT_SOURCE_DIR}/experimental_command_buffer.inl ${CMAKE_CURRENT_SOURCE_DIR}/linear_stream.cpp ${CMAKE_CURRENT_SOURCE_DIR}/linear_stream.h ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}per_dss_backed_buffer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/preemption.cpp ${CMAKE_CURRENT_SOURCE_DIR}/preemption.h ${CMAKE_CURRENT_SOURCE_DIR}/preemption.inl ${CMAKE_CURRENT_SOURCE_DIR}/preemption_mode.h ${CMAKE_CURRENT_SOURCE_DIR}/scratch_space_controller.cpp ${CMAKE_CURRENT_SOURCE_DIR}/scratch_space_controller.h ${CMAKE_CURRENT_SOURCE_DIR}/scratch_space_controller_base.cpp ${CMAKE_CURRENT_SOURCE_DIR}/scratch_space_controller_base.h ${CMAKE_CURRENT_SOURCE_DIR}/stream_properties.cpp ${CMAKE_CURRENT_SOURCE_DIR}/stream_properties.h ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}stream_properties_extra.cpp ${CMAKE_CURRENT_SOURCE_DIR}/stream_property.h ${CMAKE_CURRENT_SOURCE_DIR}/submission_status.h ${CMAKE_CURRENT_SOURCE_DIR}/submissions_aggregator.cpp ${CMAKE_CURRENT_SOURCE_DIR}/submissions_aggregator.h ${CMAKE_CURRENT_SOURCE_DIR}/tbx_command_stream_receiver.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tbx_command_stream_receiver.h ${CMAKE_CURRENT_SOURCE_DIR}/tbx_command_stream_receiver_hw.h ${CMAKE_CURRENT_SOURCE_DIR}/tbx_command_stream_receiver_hw.inl ${CMAKE_CURRENT_SOURCE_DIR}/tbx_stream.cpp ${CMAKE_CURRENT_SOURCE_DIR}/thread_arbitration_policy.h ${CMAKE_CURRENT_SOURCE_DIR}/wait_status.h ) if(SUPPORT_XEHP_AND_LATER) list(APPEND NEO_CORE_COMMAND_STREAM ${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_receiver_hw_xehp_and_later.inl ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_xehp_and_later.inl ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_simulated_common_hw_xehp_and_later.inl ${CMAKE_CURRENT_SOURCE_DIR}/preemption_xehp_and_later.inl ${CMAKE_CURRENT_SOURCE_DIR}/scratch_space_controller_xehp_and_later.cpp ${CMAKE_CURRENT_SOURCE_DIR}/scratch_space_controller_xehp_and_later.h ${CMAKE_CURRENT_SOURCE_DIR}/tbx_command_stream_receiver_xehp_and_later.inl ) endif() if(SUPPORT_DG2_AND_LATER) list(APPEND NEO_CORE_COMMAND_STREAM ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_dg2_and_later.inl ) endif() set_property(GLOBAL PROPERTY NEO_CORE_COMMAND_STREAM ${NEO_CORE_COMMAND_STREAM}) add_subdirectories() compute-runtime-22.14.22890/shared/source/command_stream/aub_command_stream_receiver.cpp000066400000000000000000000304571422164147700313320ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/aub_command_stream_receiver.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/options.h" #include "shared/source/memory_manager/os_agnostic_memory_manager.h" #include "shared/source/os_interface/os_inc_base.h" #include "shared/source/os_interface/sys_calls_common.h" #include #include #include namespace NEO { AubCommandStreamReceiverCreateFunc aubCommandStreamReceiverFactory[IGFX_MAX_CORE] = {}; std::string AUBCommandStreamReceiver::createFullFilePath(const HardwareInfo &hwInfo, const std::string &filename, uint32_t rootDeviceIndex) { std::string hwPrefix = hardwarePrefix[hwInfo.platform.eProductFamily]; // Generate the full filename const auto >SystemInfo = hwInfo.gtSystemInfo; std::stringstream strfilename; auto subDevicesCount = HwHelper::getSubDevicesCount(&hwInfo); uint32_t subSlicesPerSlice = gtSystemInfo.SubSliceCount / gtSystemInfo.SliceCount; strfilename << hwPrefix << "_"; if (subDevicesCount > 1) { strfilename << subDevicesCount << "tx"; } std::stringstream strExtendedFileName; strExtendedFileName << filename; if (DebugManager.flags.GenerateAubFilePerProcessId.get()) { strExtendedFileName << "_PID_" << SysCalls::getProcessId(); } strfilename << gtSystemInfo.SliceCount << "x" << subSlicesPerSlice << "x" << gtSystemInfo.MaxEuPerSubSlice << "_" << rootDeviceIndex << "_" << strExtendedFileName.str() << ".aub"; // clean-up any fileName issues because of the file system incompatibilities auto fileName = strfilename.str(); for (char &i : fileName) { i = i == '/' ? '_' : i; } std::string filePath(folderAUB); filePath.append(Os::fileSeparator); filePath.append(fileName); return filePath; } CommandStreamReceiver *AUBCommandStreamReceiver::create(const std::string &baseName, bool standalone, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield) { auto hwInfo = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo(); std::string filePath = AUBCommandStreamReceiver::createFullFilePath(*hwInfo, baseName, rootDeviceIndex); if (DebugManager.flags.AUBDumpCaptureFileName.get() != "unk") { filePath.assign(DebugManager.flags.AUBDumpCaptureFileName.get()); } if (hwInfo->platform.eRenderCoreFamily >= IGFX_MAX_CORE) { DEBUG_BREAK_IF(!false); return nullptr; } auto pCreate = aubCommandStreamReceiverFactory[hwInfo->platform.eRenderCoreFamily]; return pCreate ? pCreate(filePath, standalone, executionEnvironment, rootDeviceIndex, deviceBitfield) : nullptr; } } // namespace NEO namespace AubMemDump { using CmdServicesMemTraceMemoryCompare = AubMemDump::CmdServicesMemTraceMemoryCompare; using CmdServicesMemTraceMemoryWrite = AubMemDump::CmdServicesMemTraceMemoryWrite; using CmdServicesMemTraceRegisterPoll = AubMemDump::CmdServicesMemTraceRegisterPoll; using CmdServicesMemTraceRegisterWrite = AubMemDump::CmdServicesMemTraceRegisterWrite; using CmdServicesMemTraceVersion = AubMemDump::CmdServicesMemTraceVersion; static auto sizeMemoryWriteHeader = sizeof(CmdServicesMemTraceMemoryWrite) - sizeof(CmdServicesMemTraceMemoryWrite::data); extern const size_t g_dwordCountMax; void AubFileStream::open(const char *filePath) { fileHandle.open(filePath, std::ofstream::binary); fileName.assign(filePath); } void AubFileStream::close() { fileHandle.close(); fileName.clear(); } void AubFileStream::write(const char *data, size_t size) { fileHandle.write(data, size); } void AubFileStream::flush() { fileHandle.flush(); } bool AubFileStream::init(uint32_t stepping, uint32_t device) { CmdServicesMemTraceVersion header = {}; header.setHeader(); header.dwordCount = (sizeof(header) / sizeof(uint32_t)) - 1; header.stepping = stepping; header.metal = 0; header.device = device; header.csxSwizzling = CmdServicesMemTraceVersion::CsxSwizzlingValues::Disabled; //Which recording method used: // Phys is required for GGTT memory to be written directly to phys vs through aperture. header.recordingMethod = CmdServicesMemTraceVersion::RecordingMethodValues::Phy; header.pch = CmdServicesMemTraceVersion::PchValues::Default; header.captureTool = CmdServicesMemTraceVersion::CaptureToolValues::GenKmdCapture; header.primaryVersion = 0; header.secondaryVersion = 0; header.commandLine[0] = 'N'; header.commandLine[1] = 'E'; header.commandLine[2] = 'O'; header.commandLine[3] = 0; write(reinterpret_cast(&header), sizeof(header)); return true; } void AubFileStream::writeMemory(uint64_t physAddress, const void *memory, size_t size, uint32_t addressSpace, uint32_t hint) { writeMemoryWriteHeader(physAddress, size, addressSpace, hint); // Copy the contents from source to destination. write(reinterpret_cast(memory), size); auto sizeRemainder = size % sizeof(uint32_t); if (sizeRemainder) { //if input size is not 4 byte aligned, write extra zeros to AUB uint32_t zero = 0; write(reinterpret_cast(&zero), sizeof(uint32_t) - sizeRemainder); } } void AubFileStream::writeMemoryWriteHeader(uint64_t physAddress, size_t size, uint32_t addressSpace, uint32_t hint) { CmdServicesMemTraceMemoryWrite header = {}; auto alignedBlockSize = (size + sizeof(uint32_t) - 1) & ~(sizeof(uint32_t) - 1); auto dwordCount = (sizeMemoryWriteHeader + alignedBlockSize) / sizeof(uint32_t); DEBUG_BREAK_IF(dwordCount > AubMemDump::g_dwordCountMax); header.setHeader(); header.dwordCount = static_cast(dwordCount - 1); header.address = physAddress; header.repeatMemory = CmdServicesMemTraceMemoryWrite::RepeatMemoryValues::NoRepeat; header.tiling = CmdServicesMemTraceMemoryWrite::TilingValues::NoTiling; header.dataTypeHint = hint; header.addressSpace = addressSpace; header.dataSizeInBytes = static_cast(size); write(reinterpret_cast(&header), sizeMemoryWriteHeader); } void AubFileStream::writeGTT(uint32_t gttOffset, uint64_t entry) { write(reinterpret_cast(&entry), sizeof(entry)); } void AubFileStream::writePTE(uint64_t physAddress, uint64_t entry, uint32_t addressSpace) { write(reinterpret_cast(&entry), sizeof(entry)); } void AubFileStream::writeMMIOImpl(uint32_t offset, uint32_t value) { CmdServicesMemTraceRegisterWrite header = {}; header.setHeader(); header.dwordCount = (sizeof(header) / sizeof(uint32_t)) - 1; header.registerOffset = offset; header.messageSourceId = MessageSourceIdValues::Ia; header.registerSize = RegisterSizeValues::Dword; header.registerSpace = RegisterSpaceValues::Mmio; header.writeMaskLow = 0xffffffff; header.writeMaskHigh = 0x00000000; header.data[0] = value; write(reinterpret_cast(&header), sizeof(header)); } void AubFileStream::registerPoll(uint32_t registerOffset, uint32_t mask, uint32_t value, bool pollNotEqual, uint32_t timeoutAction) { CmdServicesMemTraceRegisterPoll header = {}; header.setHeader(); header.registerOffset = registerOffset; header.timeoutAction = timeoutAction; header.pollNotEqual = pollNotEqual; header.operationType = CmdServicesMemTraceRegisterPoll::OperationTypeValues::Normal; header.registerSize = CmdServicesMemTraceRegisterPoll::RegisterSizeValues::Dword; header.registerSpace = CmdServicesMemTraceRegisterPoll::RegisterSpaceValues::Mmio; header.pollMaskLow = mask; header.data[0] = value; header.dwordCount = (sizeof(header) / sizeof(uint32_t)) - 1; write(reinterpret_cast(&header), sizeof(header)); } void AubFileStream::expectMMIO(uint32_t mmioRegister, uint32_t expectedValue) { using AubMemDump::CmdServicesMemTraceRegisterCompare; CmdServicesMemTraceRegisterCompare header; memset(&header, 0, sizeof(header)); header.setHeader(); header.data[0] = expectedValue; header.registerOffset = mmioRegister; header.noReadExpect = CmdServicesMemTraceRegisterCompare::NoReadExpectValues::ReadExpect; header.registerSize = CmdServicesMemTraceRegisterCompare::RegisterSizeValues::Dword; header.registerSpace = CmdServicesMemTraceRegisterCompare::RegisterSpaceValues::Mmio; header.readMaskLow = 0xffffffff; header.readMaskHigh = 0xffffffff; header.dwordCount = (sizeof(header) / sizeof(uint32_t)) - 1; write(reinterpret_cast(&header), sizeof(header)); } void AubFileStream::expectMemory(uint64_t physAddress, const void *memory, size_t sizeRemaining, uint32_t addressSpace, uint32_t compareOperation) { using CmdServicesMemTraceMemoryCompare = AubMemDump::CmdServicesMemTraceMemoryCompare; CmdServicesMemTraceMemoryCompare header = {}; header.setHeader(); header.noReadExpect = CmdServicesMemTraceMemoryCompare::NoReadExpectValues::ReadExpect; header.repeatMemory = CmdServicesMemTraceMemoryCompare::RepeatMemoryValues::NoRepeat; header.tiling = CmdServicesMemTraceMemoryCompare::TilingValues::NoTiling; header.crcCompare = CmdServicesMemTraceMemoryCompare::CrcCompareValues::NoCrc; header.compareOperation = compareOperation; header.dataTypeHint = CmdServicesMemTraceMemoryCompare::DataTypeHintValues::TraceNotype; header.addressSpace = addressSpace; auto headerSize = sizeof(CmdServicesMemTraceMemoryCompare) - sizeof(CmdServicesMemTraceMemoryCompare::data); auto blockSizeMax = g_dwordCountMax * sizeof(uint32_t) - headerSize; // We have to decompose memory into chunks that can be streamed per iteration while (sizeRemaining > 0) { AubMemDump::setAddress(header, physAddress); auto sizeThisIteration = std::min(sizeRemaining, blockSizeMax); // Round up to the number of dwords auto dwordCount = Math::divideAndRoundUp(headerSize + sizeThisIteration, sizeof(uint32_t)); header.dwordCount = static_cast(dwordCount - 1); header.dataSizeInBytes = static_cast(sizeThisIteration); // Write the header write(reinterpret_cast(&header), headerSize); // Copy the contents from source to destination. write(reinterpret_cast(memory), sizeThisIteration); sizeRemaining -= sizeThisIteration; memory = (uint8_t *)memory + sizeThisIteration; physAddress += sizeThisIteration; auto remainder = sizeThisIteration & (sizeof(uint32_t) - 1); if (remainder) { //if size is not 4 byte aligned, write extra zeros to AUB uint32_t zero = 0; write(reinterpret_cast(&zero), sizeof(uint32_t) - remainder); } } } void AubFileStream::createContext(const AubPpgttContextCreate &cmd) { write(reinterpret_cast(&cmd), sizeof(cmd)); } bool AubFileStream::addComment(const char *message) { using CmdServicesMemTraceComment = AubMemDump::CmdServicesMemTraceComment; CmdServicesMemTraceComment cmd = {}; cmd.setHeader(); cmd.syncOnComment = false; cmd.syncOnSimulatorDisplay = false; auto messageLen = strlen(message) + 1; auto dwordLen = ((messageLen + sizeof(uint32_t) - 1) & ~(sizeof(uint32_t) - 1)) / sizeof(uint32_t); cmd.dwordCount = static_cast(dwordLen + 1); write(reinterpret_cast(&cmd), sizeof(cmd) - sizeof(cmd.comment)); write(message, messageLen); auto remainder = messageLen & (sizeof(uint32_t) - 1); if (remainder) { //if size is not 4 byte aligned, write extra zeros to AUB uint32_t zero = 0; write(reinterpret_cast(&zero), sizeof(uint32_t) - remainder); } return true; } std::unique_lock AubFileStream::lockStream() { return std::unique_lock(mutex); } } // namespace AubMemDump compute-runtime-22.14.22890/shared/source/command_stream/aub_command_stream_receiver.h000066400000000000000000000026071422164147700307730ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/aub_mem_dump/aub_mem_dump.h" #include "shared/source/helpers/common_types.h" #include namespace NEO { struct HardwareInfo; class CommandStreamReceiver; class ExecutionEnvironment; struct AUBCommandStreamReceiver { static CommandStreamReceiver *create(const std::string &filename, bool standalone, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield); static std::string createFullFilePath(const HardwareInfo &hwInfo, const std::string &filename, uint32_t rootDeviceIndex); using AubFileStream = AubMemDump::AubFileStream; }; typedef CommandStreamReceiver *(*AubCommandStreamReceiverCreateFunc)(const std::string &fileName, bool standalone, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield); } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/aub_command_stream_receiver_hw.h000066400000000000000000000125731422164147700314740ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/aub/aub_center.h" #include "shared/source/command_stream/aub_command_stream_receiver.h" #include "shared/source/command_stream/command_stream_receiver_simulated_hw.h" #include "shared/source/command_stream/submission_status.h" #include "shared/source/command_stream/wait_status.h" #include "shared/source/helpers/array_count.h" #include "shared/source/memory_manager/os_agnostic_memory_manager.h" #include "shared/source/memory_manager/page_table.h" #include "shared/source/memory_manager/physical_address_allocator.h" #include "shared/source/utilities/spinlock.h" #include "aub_mapper.h" namespace NEO { class AubSubCaptureManager; template class AUBCommandStreamReceiverHw : public CommandStreamReceiverSimulatedHw { protected: typedef CommandStreamReceiverSimulatedHw BaseClass; using AUB = typename AUBFamilyMapper::AUB; using ExternalAllocationsContainer = std::vector; using BaseClass::getParametersForWriteMemory; using BaseClass::osContext; public: using CommandStreamReceiverSimulatedCommonHw::initAdditionalMMIO; using CommandStreamReceiverSimulatedCommonHw::aubManager; using CommandStreamReceiverSimulatedCommonHw::hardwareContextController; using CommandStreamReceiverSimulatedCommonHw::engineInfo; using CommandStreamReceiverSimulatedCommonHw::stream; SubmissionStatus flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override; void processResidency(const ResidencyContainer &allocationsForResidency, uint32_t handleId) override; void makeResidentExternal(AllocationView &allocationView); void makeNonResidentExternal(uint64_t gpuAddress); AubMemDump::AubFileStream *getAubStream() const { return static_cast(this->stream); } void writeMemory(uint64_t gpuAddress, void *cpuAddress, size_t size, uint32_t memoryBank, uint64_t entryBits) override; bool writeMemory(GraphicsAllocation &gfxAllocation) override; MOCKABLE_VIRTUAL bool writeMemory(AllocationView &allocationView); void writeMMIO(uint32_t offset, uint32_t value) override; void expectMMIO(uint32_t mmioRegister, uint32_t expectedValue); bool expectMemory(const void *gfxAddress, const void *srcAddress, size_t length, uint32_t compareOperation) override; AubSubCaptureStatus checkAndActivateAubSubCapture(const std::string &kernelName) override; void addAubComment(const char *message) override; // Family specific version MOCKABLE_VIRTUAL void submitBatchBufferAub(uint64_t batchBufferGpuAddress, const void *batchBuffer, size_t batchBufferSize, uint32_t memoryBank, uint64_t entryBits); void pollForCompletion() override; void pollForCompletionImpl() override; WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) override; uint32_t getDumpHandle(); MOCKABLE_VIRTUAL void addContextToken(uint32_t dumpHandle); void dumpAllocation(GraphicsAllocation &gfxAllocation) override; static CommandStreamReceiver *create(const std::string &fileName, bool standalone, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield); AUBCommandStreamReceiverHw(const std::string &fileName, bool standalone, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield); ~AUBCommandStreamReceiverHw() override; AUBCommandStreamReceiverHw(const AUBCommandStreamReceiverHw &) = delete; AUBCommandStreamReceiverHw &operator=(const AUBCommandStreamReceiverHw &) = delete; MOCKABLE_VIRTUAL void openFile(const std::string &fileName); MOCKABLE_VIRTUAL bool reopenFile(const std::string &fileName); MOCKABLE_VIRTUAL void initFile(const std::string &fileName); MOCKABLE_VIRTUAL void closeFile(); MOCKABLE_VIRTUAL bool isFileOpen() const; MOCKABLE_VIRTUAL const std::string getFileName(); MOCKABLE_VIRTUAL void initializeEngine() override; std::unique_ptr subCaptureManager; uint32_t aubDeviceId; bool standalone; std::unique_ptr::type> ppgtt; std::unique_ptr ggtt; // remap CPU VA -> GGTT VA AddressMapper *gttRemap; MOCKABLE_VIRTUAL bool addPatchInfoComments(); void addGUCStartMessage(uint64_t batchBufferAddress); uint32_t getGUCWorkQueueItemHeader(); CommandStreamReceiverType getType() override { return CommandStreamReceiverType::CSR_AUB; } int getAddressSpaceFromPTEBits(uint64_t entryBits) const; protected: constexpr static uint32_t getMaskAndValueForPollForCompletion(); bool dumpAubNonWritable = false; bool isEngineInitialized = false; ExternalAllocationsContainer externalAllocations; uint32_t pollForCompletionTaskCount = 0u; SpinLock pollForCompletionLock; }; } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/aub_command_stream_receiver_hw_base.inl000066400000000000000000001053201422164147700330120ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/aub/aub_helper.h" #include "shared/source/aub/aub_stream_provider.h" #include "shared/source/aub/aub_subcapture.h" #include "shared/source/aub_mem_dump/aub_alloc_dump.h" #include "shared/source/aub_mem_dump/aub_alloc_dump.inl" #include "shared/source/aub_mem_dump/page_table_entry_bits.h" #include "shared/source/command_stream/aub_command_stream_receiver_hw.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/wait_status.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/api_specific_config.h" #include "shared/source/helpers/constants.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/engine_node_helper.h" #include "shared/source/helpers/hardware_context_controller.h" #include "shared/source/helpers/hash.h" #include "shared/source/helpers/neo_driver_version.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/helpers/string.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/memory_banks.h" #include "shared/source/memory_manager/os_agnostic_memory_manager.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/os_interface/os_context.h" #include "third_party/aub_stream/headers/aub_manager.h" #include "third_party/aub_stream/headers/aubstream.h" #include #include namespace NEO { template AUBCommandStreamReceiverHw::AUBCommandStreamReceiverHw(const std::string &fileName, bool standalone, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield) : BaseClass(executionEnvironment, rootDeviceIndex, deviceBitfield), standalone(standalone) { executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->initAubCenter(this->isLocalMemoryEnabled(), fileName, this->getType()); auto aubCenter = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->aubCenter.get(); UNRECOVERABLE_IF(nullptr == aubCenter); auto subCaptureCommon = aubCenter->getSubCaptureCommon(); UNRECOVERABLE_IF(nullptr == subCaptureCommon); subCaptureManager = std::make_unique(fileName, *subCaptureCommon, ApiSpecificConfig::getRegistryPath()); aubManager = aubCenter->getAubManager(); if (!aubCenter->getPhysicalAddressAllocator()) { aubCenter->initPhysicalAddressAllocator(this->createPhysicalAddressAllocator(&this->peekHwInfo())); } auto physicalAddressAllocator = aubCenter->getPhysicalAddressAllocator(); UNRECOVERABLE_IF(nullptr == physicalAddressAllocator); ppgtt = std::make_unique::type>(physicalAddressAllocator); ggtt = std::make_unique(physicalAddressAllocator); gttRemap = aubCenter->getAddressMapper(); UNRECOVERABLE_IF(nullptr == gttRemap); auto streamProvider = aubCenter->getStreamProvider(); UNRECOVERABLE_IF(nullptr == streamProvider); stream = streamProvider->getStream(); UNRECOVERABLE_IF(nullptr == stream); this->dispatchMode = DispatchMode::BatchedDispatch; if (DebugManager.flags.CsrDispatchMode.get()) { this->dispatchMode = (DispatchMode)DebugManager.flags.CsrDispatchMode.get(); } auto debugDeviceId = DebugManager.flags.OverrideAubDeviceId.get(); this->aubDeviceId = debugDeviceId == -1 ? this->peekHwInfo().capabilityTable.aubDeviceId : static_cast(debugDeviceId); this->defaultSshSize = 64 * KB; } template AUBCommandStreamReceiverHw::~AUBCommandStreamReceiverHw() { if (osContext) { pollForCompletion(); } this->freeEngineInfo(*gttRemap); } template void AUBCommandStreamReceiverHw::openFile(const std::string &fileName) { auto streamLocked = getAubStream()->lockStream(); initFile(fileName); } template bool AUBCommandStreamReceiverHw::reopenFile(const std::string &fileName) { auto streamLocked = getAubStream()->lockStream(); if (isFileOpen()) { if (fileName != getFileName()) { closeFile(); this->freeEngineInfo(*gttRemap); } } if (!isFileOpen()) { initFile(fileName); return true; } return false; } template void AUBCommandStreamReceiverHw::initFile(const std::string &fileName) { if (aubManager) { if (!aubManager->isOpen()) { aubManager->open(fileName); // This UNRECOVERABLE_IF most probably means you are not executing aub tests with correct current directory (containing aub_out folder) UNRECOVERABLE_IF(!aubManager->isOpen()); std::ostringstream str; str << "driver version: " << driverVersion; aubManager->addComment(str.str().c_str()); } return; } if (!getAubStream()->isOpen()) { // Open our file stream->open(fileName.c_str()); if (!getAubStream()->isOpen()) { // This UNRECOVERABLE_IF most probably means you are not executing aub tests with correct current directory (containing aub_out folder) // try adding _aub UNRECOVERABLE_IF(true); } // Add the file header auto &hwInfo = this->peekHwInfo(); const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily); stream->init(hwInfoConfig.getAubStreamSteppingFromHwRevId(hwInfo), aubDeviceId); } } template void AUBCommandStreamReceiverHw::closeFile() { aubManager ? aubManager->close() : stream->close(); } template bool AUBCommandStreamReceiverHw::isFileOpen() const { return aubManager ? aubManager->isOpen() : getAubStream()->isOpen(); } template const std::string AUBCommandStreamReceiverHw::getFileName() { return aubManager ? aubManager->getFileName() : getAubStream()->getFileName(); } template void AUBCommandStreamReceiverHw::initializeEngine() { auto streamLocked = getAubStream()->lockStream(); isEngineInitialized = true; if (hardwareContextController) { hardwareContextController->initialize(); return; } auto csTraits = this->getCsTraits(osContext->getEngineType()); if (engineInfo.pLRCA) { return; } this->initGlobalMMIO(); this->initEngineMMIO(); this->initAdditionalMMIO(); // Write driver version { std::ostringstream str; str << "driver version: " << driverVersion; getAubStream()->addComment(str.str().c_str()); } // Global HW Status Page { const size_t sizeHWSP = 0x1000; const size_t alignHWSP = 0x1000; engineInfo.pGlobalHWStatusPage = alignedMalloc(sizeHWSP, alignHWSP); engineInfo.ggttHWSP = gttRemap->map(engineInfo.pGlobalHWStatusPage, sizeHWSP); auto physHWSP = ggtt->map(engineInfo.ggttHWSP, sizeHWSP, this->getGTTBits(), this->getMemoryBankForGtt()); // Write our GHWSP { std::ostringstream str; str << "ggtt: " << std::hex << std::showbase << engineInfo.ggttHWSP; getAubStream()->addComment(str.str().c_str()); } AubGTTData data = {0}; this->getGTTData(reinterpret_cast(physHWSP), data); AUB::reserveAddressGGTT(*stream, engineInfo.ggttHWSP, sizeHWSP, physHWSP, data); stream->writeMMIO(AubMemDump::computeRegisterOffset(csTraits.mmioBase, 0x2080), engineInfo.ggttHWSP); } // Allocate the LRCA const size_t sizeLRCA = csTraits.sizeLRCA; const size_t alignLRCA = csTraits.alignLRCA; auto pLRCABase = alignedMalloc(sizeLRCA, alignLRCA); engineInfo.pLRCA = pLRCABase; // Initialize the LRCA to a known state csTraits.initialize(pLRCABase); // Reserve the ring buffer engineInfo.sizeRingBuffer = 0x4 * 0x1000; { const size_t alignRingBuffer = 0x1000; engineInfo.pRingBuffer = alignedMalloc(engineInfo.sizeRingBuffer, alignRingBuffer); engineInfo.ggttRingBuffer = gttRemap->map(engineInfo.pRingBuffer, engineInfo.sizeRingBuffer); auto physRingBuffer = ggtt->map(engineInfo.ggttRingBuffer, engineInfo.sizeRingBuffer, this->getGTTBits(), this->getMemoryBankForGtt()); { std::ostringstream str; str << "ggtt: " << std::hex << std::showbase << engineInfo.ggttRingBuffer; getAubStream()->addComment(str.str().c_str()); } AubGTTData data = {0}; this->getGTTData(reinterpret_cast(physRingBuffer), data); AUB::reserveAddressGGTT(*stream, engineInfo.ggttRingBuffer, engineInfo.sizeRingBuffer, physRingBuffer, data); } // Initialize the ring MMIO registers { uint32_t ringHead = 0x000; uint32_t ringTail = 0x000; auto ringBase = engineInfo.ggttRingBuffer; auto ringCtrl = (uint32_t)((engineInfo.sizeRingBuffer - 0x1000) | 1); csTraits.setRingHead(pLRCABase, ringHead); csTraits.setRingTail(pLRCABase, ringTail); csTraits.setRingBase(pLRCABase, ringBase); csTraits.setRingCtrl(pLRCABase, ringCtrl); } // Write our LRCA { engineInfo.ggttLRCA = gttRemap->map(engineInfo.pLRCA, sizeLRCA); auto lrcAddressPhys = ggtt->map(engineInfo.ggttLRCA, sizeLRCA, this->getGTTBits(), this->getMemoryBankForGtt()); { std::ostringstream str; str << "ggtt: " << std::hex << std::showbase << engineInfo.ggttLRCA; getAubStream()->addComment(str.str().c_str()); } AubGTTData data = {0}; this->getGTTData(reinterpret_cast(lrcAddressPhys), data); AUB::reserveAddressGGTT(*stream, engineInfo.ggttLRCA, sizeLRCA, lrcAddressPhys, data); AUB::addMemoryWrite( *stream, lrcAddressPhys, pLRCABase, sizeLRCA, this->getAddressSpace(csTraits.aubHintLRCA), csTraits.aubHintLRCA); } // Create a context to facilitate AUB dumping of memory using PPGTT addContextToken(getDumpHandle()); DEBUG_BREAK_IF(!engineInfo.pLRCA); } template CommandStreamReceiver *AUBCommandStreamReceiverHw::create(const std::string &fileName, bool standalone, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield) { auto csr = std::make_unique>(fileName, standalone, executionEnvironment, rootDeviceIndex, deviceBitfield); if (!csr->subCaptureManager->isSubCaptureMode()) { csr->openFile(fileName); } return csr.release(); } template SubmissionStatus AUBCommandStreamReceiverHw::flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) { if (subCaptureManager->isSubCaptureMode()) { if (!subCaptureManager->isSubCaptureEnabled()) { if (this->standalone) { volatile uint32_t *pollAddress = this->tagAddress; for (uint32_t i = 0; i < this->activePartitions; i++) { *pollAddress = this->peekLatestSentTaskCount(); pollAddress = ptrOffset(pollAddress, this->postSyncWriteOffset); } } return SubmissionStatus::SUCCESS; } } initializeEngine(); // Write our batch buffer auto pBatchBuffer = ptrOffset(batchBuffer.commandBufferAllocation->getUnderlyingBuffer(), batchBuffer.startOffset); auto batchBufferGpuAddress = ptrOffset(batchBuffer.commandBufferAllocation->getGpuAddress(), batchBuffer.startOffset); auto currentOffset = batchBuffer.usedSize; DEBUG_BREAK_IF(currentOffset < batchBuffer.startOffset); auto sizeBatchBuffer = currentOffset - batchBuffer.startOffset; std::unique_ptr> flatBatchBuffer( nullptr, [&](GraphicsAllocation *ptr) { this->getMemoryManager()->freeGraphicsMemory(ptr); }); if (DebugManager.flags.FlattenBatchBufferForAUBDump.get()) { flatBatchBuffer.reset(this->flatBatchBufferHelper->flattenBatchBuffer(this->rootDeviceIndex, batchBuffer, sizeBatchBuffer, this->dispatchMode, this->getOsContext().getDeviceBitfield())); if (flatBatchBuffer.get() != nullptr) { pBatchBuffer = flatBatchBuffer->getUnderlyingBuffer(); batchBufferGpuAddress = flatBatchBuffer->getGpuAddress(); batchBuffer.commandBufferAllocation = flatBatchBuffer.get(); } } allocationsForResidency.push_back(batchBuffer.commandBufferAllocation); processResidency(allocationsForResidency, 0u); if (!this->standalone || DebugManager.flags.FlattenBatchBufferForAUBDump.get()) { allocationsForResidency.pop_back(); } submitBatchBufferAub(batchBufferGpuAddress, pBatchBuffer, sizeBatchBuffer, this->getMemoryBank(batchBuffer.commandBufferAllocation), this->getPPGTTAdditionalBits(batchBuffer.commandBufferAllocation)); if (this->standalone) { volatile uint32_t *pollAddress = this->tagAddress; for (uint32_t i = 0; i < this->activePartitions; i++) { *pollAddress = this->peekLatestSentTaskCount(); pollAddress = ptrOffset(pollAddress, this->postSyncWriteOffset); } } if (subCaptureManager->isSubCaptureMode()) { pollForCompletion(); subCaptureManager->disableSubCapture(); } if (DebugManager.flags.FlattenBatchBufferForAUBDump.get()) { pollForCompletion(); } getAubStream()->flush(); return SubmissionStatus::SUCCESS; } template bool AUBCommandStreamReceiverHw::addPatchInfoComments() { std::map allocationsMap; std::ostringstream str; str << "PatchInfoData" << std::endl; for (auto &patchInfoData : this->flatBatchBufferHelper->getPatchInfoCollection()) { str << std::hex << patchInfoData.sourceAllocation << ";"; str << std::hex << patchInfoData.sourceAllocationOffset << ";"; str << std::hex << patchInfoData.sourceType << ";"; str << std::hex << patchInfoData.targetAllocation << ";"; str << std::hex << patchInfoData.targetAllocationOffset << ";"; str << std::hex << patchInfoData.targetType << ";"; str << std::endl; if (patchInfoData.sourceAllocation) { allocationsMap.insert(std::pair(patchInfoData.sourceAllocation, ppgtt->map(static_cast(patchInfoData.sourceAllocation), 1, 0, MemoryBanks::MainBank))); } if (patchInfoData.targetAllocation) { allocationsMap.insert(std::pair(patchInfoData.targetAllocation, ppgtt->map(static_cast(patchInfoData.targetAllocation), 1, 0, MemoryBanks::MainBank))); } } bool result = getAubStream()->addComment(str.str().c_str()); this->flatBatchBufferHelper->getPatchInfoCollection().clear(); if (!result) { return false; } std::ostringstream allocationStr; allocationStr << "AllocationsList" << std::endl; for (auto &element : allocationsMap) { allocationStr << std::hex << element.first << ";" << element.second << std::endl; } result = getAubStream()->addComment(allocationStr.str().c_str()); if (!result) { return false; } return true; } template void AUBCommandStreamReceiverHw::submitBatchBufferAub(uint64_t batchBufferGpuAddress, const void *batchBuffer, size_t batchBufferSize, uint32_t memoryBank, uint64_t entryBits) { auto streamLocked = getAubStream()->lockStream(); if (hardwareContextController) { if (batchBufferSize) { hardwareContextController->submit(batchBufferGpuAddress, batchBuffer, batchBufferSize, memoryBank, MemoryConstants::pageSize64k, false); } return; } auto csTraits = this->getCsTraits(osContext->getEngineType()); { { std::ostringstream str; str << "ppgtt: " << std::hex << std::showbase << batchBuffer; getAubStream()->addComment(str.str().c_str()); } auto physBatchBuffer = ppgtt->map(static_cast(batchBufferGpuAddress), batchBufferSize, entryBits, memoryBank); AubHelperHw aubHelperHw(this->isLocalMemoryEnabled()); AUB::reserveAddressPPGTT(*stream, static_cast(batchBufferGpuAddress), batchBufferSize, physBatchBuffer, entryBits, aubHelperHw); AUB::addMemoryWrite( *stream, physBatchBuffer, batchBuffer, batchBufferSize, this->getAddressSpace(AubMemDump::DataTypeHintValues::TraceBatchBufferPrimary), AubMemDump::DataTypeHintValues::TraceBatchBufferPrimary); } if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) { addGUCStartMessage(static_cast(reinterpret_cast(batchBuffer))); addPatchInfoComments(); } // Add a batch buffer start to the ring buffer auto previousTail = engineInfo.tailRingBuffer; { typedef typename GfxFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; typedef typename GfxFamily::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START; typedef typename GfxFamily::MI_NOOP MI_NOOP; auto pTail = ptrOffset(engineInfo.pRingBuffer, engineInfo.tailRingBuffer); auto ggttTail = ptrOffset(engineInfo.ggttRingBuffer, engineInfo.tailRingBuffer); auto sizeNeeded = sizeof(MI_BATCH_BUFFER_START) + sizeof(MI_LOAD_REGISTER_IMM); auto tailAlignment = sizeof(uint64_t); sizeNeeded = alignUp(sizeNeeded, tailAlignment); if (engineInfo.tailRingBuffer + sizeNeeded >= engineInfo.sizeRingBuffer) { // Pad the remaining ring with NOOPs auto sizeToWrap = engineInfo.sizeRingBuffer - engineInfo.tailRingBuffer; memset(pTail, 0, sizeToWrap); // write remaining ring auto physDumpStart = ggtt->map(ggttTail, sizeToWrap, this->getGTTBits(), this->getMemoryBankForGtt()); AUB::addMemoryWrite( *stream, physDumpStart, pTail, sizeToWrap, this->getAddressSpace(AubMemDump::DataTypeHintValues::TraceCommandBuffer), AubMemDump::DataTypeHintValues::TraceCommandBuffer); previousTail = 0; engineInfo.tailRingBuffer = 0; pTail = engineInfo.pRingBuffer; } else if (engineInfo.tailRingBuffer == 0) { // Add a LRI if this is our first submission auto lri = GfxFamily::cmdInitLoadRegisterImm; lri.setRegisterOffset(AubMemDump::computeRegisterOffset(csTraits.mmioBase, 0x2244)); lri.setDataDword(0x00010000); *(MI_LOAD_REGISTER_IMM *)pTail = lri; pTail = ((MI_LOAD_REGISTER_IMM *)pTail) + 1; } // Add our BBS auto bbs = GfxFamily::cmdInitBatchBufferStart; bbs.setBatchBufferStartAddress(static_cast(batchBufferGpuAddress)); bbs.setAddressSpaceIndicator(MI_BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR_PPGTT); *(MI_BATCH_BUFFER_START *)pTail = bbs; pTail = ((MI_BATCH_BUFFER_START *)pTail) + 1; // Compute our new ring tail. engineInfo.tailRingBuffer = (uint32_t)ptrDiff(pTail, engineInfo.pRingBuffer); // Add NOOPs as needed as our tail needs to be aligned while (engineInfo.tailRingBuffer % tailAlignment) { *(MI_NOOP *)pTail = GfxFamily::cmdInitNoop; pTail = ((MI_NOOP *)pTail) + 1; engineInfo.tailRingBuffer = (uint32_t)ptrDiff(pTail, engineInfo.pRingBuffer); } UNRECOVERABLE_IF((engineInfo.tailRingBuffer % tailAlignment) != 0); // Only dump the new commands auto ggttDumpStart = ptrOffset(engineInfo.ggttRingBuffer, previousTail); auto dumpStart = ptrOffset(engineInfo.pRingBuffer, previousTail); auto dumpLength = engineInfo.tailRingBuffer - previousTail; // write ring { std::ostringstream str; str << "ggtt: " << std::hex << std::showbase << ggttDumpStart; getAubStream()->addComment(str.str().c_str()); } auto physDumpStart = ggtt->map(ggttDumpStart, dumpLength, this->getGTTBits(), this->getMemoryBankForGtt()); AUB::addMemoryWrite( *stream, physDumpStart, dumpStart, dumpLength, this->getAddressSpace(AubMemDump::DataTypeHintValues::TraceCommandBuffer), AubMemDump::DataTypeHintValues::TraceCommandBuffer); // update the ring mmio tail in the LRCA { std::ostringstream str; str << "ggtt: " << std::hex << std::showbase << engineInfo.ggttLRCA + 0x101c; getAubStream()->addComment(str.str().c_str()); } auto physLRCA = ggtt->map(engineInfo.ggttLRCA, sizeof(engineInfo.tailRingBuffer), this->getGTTBits(), this->getMemoryBankForGtt()); AUB::addMemoryWrite( *stream, physLRCA + 0x101c, &engineInfo.tailRingBuffer, sizeof(engineInfo.tailRingBuffer), this->getAddressSpace(csTraits.aubHintLRCA)); DEBUG_BREAK_IF(engineInfo.tailRingBuffer >= engineInfo.sizeRingBuffer); } // Submit our execlist by submitting to the execlist submit ports { typename AUB::MiContextDescriptorReg contextDescriptor = {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}; contextDescriptor.sData.Valid = true; contextDescriptor.sData.ForcePageDirRestore = false; contextDescriptor.sData.ForceRestore = false; contextDescriptor.sData.Legacy = true; contextDescriptor.sData.FaultSupport = 0; contextDescriptor.sData.PrivilegeAccessOrPPGTT = true; contextDescriptor.sData.ADor64bitSupport = AUB::Traits::addressingBits > 32; auto ggttLRCA = engineInfo.ggttLRCA; contextDescriptor.sData.LogicalRingCtxAddress = ggttLRCA / 4096; contextDescriptor.sData.ContextID = 0; this->submitLRCA(contextDescriptor); } } template void AUBCommandStreamReceiverHw::pollForCompletion() { const auto lock = std::unique_lock{pollForCompletionLock}; if (this->pollForCompletionTaskCount == this->latestSentTaskCount) { return; } pollForCompletionImpl(); } template void AUBCommandStreamReceiverHw::pollForCompletionImpl() { this->pollForCompletionTaskCount = this->latestSentTaskCount; if (subCaptureManager->isSubCaptureMode()) { if (!subCaptureManager->isSubCaptureEnabled()) { return; } } auto streamLocked = getAubStream()->lockStream(); if (hardwareContextController) { hardwareContextController->pollForCompletion(); return; } const auto mmioBase = this->getCsTraits(osContext->getEngineType()).mmioBase; const bool pollNotEqual = false; const uint32_t mask = getMaskAndValueForPollForCompletion(); const uint32_t value = mask; stream->registerPoll( AubMemDump::computeRegisterOffset(mmioBase, 0x2234), //EXECLIST_STATUS mask, value, pollNotEqual, AubMemDump::CmdServicesMemTraceRegisterPoll::TimeoutActionValues::Abort); } template inline WaitStatus AUBCommandStreamReceiverHw::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) { const auto result = CommandStreamReceiverSimulatedHw::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, throttle); pollForCompletion(); return result; } template void AUBCommandStreamReceiverHw::makeResidentExternal(AllocationView &allocationView) { externalAllocations.push_back(allocationView); } template void AUBCommandStreamReceiverHw::makeNonResidentExternal(uint64_t gpuAddress) { for (auto it = externalAllocations.begin(); it != externalAllocations.end(); it++) { if (it->first == gpuAddress) { externalAllocations.erase(it); break; } } } template void AUBCommandStreamReceiverHw::writeMemory(uint64_t gpuAddress, void *cpuAddress, size_t size, uint32_t memoryBank, uint64_t entryBits) { UNRECOVERABLE_IF(!isEngineInitialized); { std::ostringstream str; str << "ppgtt: " << std::hex << std::showbase << gpuAddress << " end address: " << gpuAddress + size << " cpu address: " << cpuAddress << " size: " << std::dec << size; getAubStream()->addComment(str.str().c_str()); } AubHelperHw aubHelperHw(this->isLocalMemoryEnabled()); PageWalker walker = [&](uint64_t physAddress, size_t size, size_t offset, uint64_t entryBits) { AUB::reserveAddressGGTTAndWriteMmeory(*stream, static_cast(gpuAddress), cpuAddress, physAddress, size, offset, entryBits, aubHelperHw); }; ppgtt->pageWalk(static_cast(gpuAddress), size, 0, entryBits, walker, memoryBank); } template bool AUBCommandStreamReceiverHw::writeMemory(GraphicsAllocation &gfxAllocation) { UNRECOVERABLE_IF(!isEngineInitialized); if (!this->isAubWritable(gfxAllocation)) { return false; } bool ownsLock = !gfxAllocation.isLocked(); uint64_t gpuAddress; void *cpuAddress; size_t size; if (!this->getParametersForWriteMemory(gfxAllocation, gpuAddress, cpuAddress, size)) { return false; } auto streamLocked = getAubStream()->lockStream(); if (aubManager) { this->writeMemoryWithAubManager(gfxAllocation); } else { writeMemory(gpuAddress, cpuAddress, size, this->getMemoryBank(&gfxAllocation), this->getPPGTTAdditionalBits(&gfxAllocation)); } streamLocked.unlock(); if (gfxAllocation.isLocked() && ownsLock) { this->getMemoryManager()->unlockResource(&gfxAllocation); } if (AubHelper::isOneTimeAubWritableAllocationType(gfxAllocation.getAllocationType())) { this->setAubWritable(false, gfxAllocation); } return true; } template bool AUBCommandStreamReceiverHw::writeMemory(AllocationView &allocationView) { GraphicsAllocation gfxAllocation(this->rootDeviceIndex, AllocationType::UNKNOWN, reinterpret_cast(allocationView.first), allocationView.first, 0llu, allocationView.second, MemoryPool::MemoryNull, 0u); return writeMemory(gfxAllocation); } template void AUBCommandStreamReceiverHw::writeMMIO(uint32_t offset, uint32_t value) { auto streamLocked = getAubStream()->lockStream(); if (hardwareContextController) { hardwareContextController->writeMMIO(offset, value); } } template void AUBCommandStreamReceiverHw::expectMMIO(uint32_t mmioRegister, uint32_t expectedValue) { if (hardwareContextController) { //Add support for expectMMIO to AubStream return; } this->getAubStream()->expectMMIO(mmioRegister, expectedValue); } template bool AUBCommandStreamReceiverHw::expectMemory(const void *gfxAddress, const void *srcAddress, size_t length, uint32_t compareOperation) { pollForCompletion(); auto streamLocked = getAubStream()->lockStream(); if (hardwareContextController) { hardwareContextController->expectMemory(reinterpret_cast(gfxAddress), srcAddress, length, compareOperation); } PageWalker walker = [&](uint64_t physAddress, size_t size, size_t offset, uint64_t entryBits) { UNRECOVERABLE_IF(offset > length); this->getAubStream()->expectMemory(physAddress, ptrOffset(srcAddress, offset), size, this->getAddressSpaceFromPTEBits(entryBits), compareOperation); }; this->ppgtt->pageWalk(reinterpret_cast(gfxAddress), length, 0, PageTableEntry::nonValidBits, walker, MemoryBanks::BankNotSpecified); return true; } template void AUBCommandStreamReceiverHw::processResidency(const ResidencyContainer &allocationsForResidency, uint32_t handleId) { if (subCaptureManager->isSubCaptureMode()) { if (!subCaptureManager->isSubCaptureEnabled()) { return; } } for (auto &externalAllocation : externalAllocations) { if (!writeMemory(externalAllocation)) { DEBUG_BREAK_IF(externalAllocation.second != 0); } } for (auto &gfxAllocation : allocationsForResidency) { if (dumpAubNonWritable) { this->setAubWritable(true, *gfxAllocation); } if (!writeMemory(*gfxAllocation)) { DEBUG_BREAK_IF(!((gfxAllocation->getUnderlyingBufferSize() == 0) || !this->isAubWritable(*gfxAllocation))); } gfxAllocation->updateResidencyTaskCount(this->taskCount + 1, this->osContext->getContextId()); } dumpAubNonWritable = false; } template void AUBCommandStreamReceiverHw::dumpAllocation(GraphicsAllocation &gfxAllocation) { bool isBcsCsr = EngineHelpers::isBcs(this->osContext->getEngineType()); if (isBcsCsr != gfxAllocation.getAubInfo().bcsDumpOnly) { return; } if (DebugManager.flags.AUBDumpAllocsOnEnqueueReadOnly.get() || DebugManager.flags.AUBDumpAllocsOnEnqueueSVMMemcpyOnly.get()) { if (!gfxAllocation.isAllocDumpable()) { return; } gfxAllocation.setAllocDumpable(false, isBcsCsr); } auto dumpFormat = AubAllocDump::getDumpFormat(gfxAllocation); if (dumpFormat > AubAllocDump::DumpFormat::NONE) { pollForCompletion(); } auto streamLocked = getAubStream()->lockStream(); if (hardwareContextController) { auto surfaceInfo = std::unique_ptr(AubAllocDump::getDumpSurfaceInfo(gfxAllocation, dumpFormat)); if (nullptr != surfaceInfo) { hardwareContextController->dumpSurface(*surfaceInfo.get()); } return; } AubAllocDump::dumpAllocation(dumpFormat, gfxAllocation, getAubStream(), getDumpHandle()); } template AubSubCaptureStatus AUBCommandStreamReceiverHw::checkAndActivateAubSubCapture(const std::string &kernelName) { auto status = subCaptureManager->checkAndActivateSubCapture(kernelName); if (status.isActive) { auto &subCaptureFile = subCaptureManager->getSubCaptureFileName(kernelName); auto isReopened = reopenFile(subCaptureFile); if (isReopened) { dumpAubNonWritable = true; } } if (this->standalone) { this->programForAubSubCapture(status.wasActiveInPreviousEnqueue, status.isActive); } return status; } template void AUBCommandStreamReceiverHw::addAubComment(const char *message) { auto streamLocked = getAubStream()->lockStream(); if (aubManager) { aubManager->addComment(message); return; } getAubStream()->addComment(message); } template uint32_t AUBCommandStreamReceiverHw::getDumpHandle() { return hashPtrToU32(this); } template void AUBCommandStreamReceiverHw::addGUCStartMessage(uint64_t batchBufferAddress) { typedef typename GfxFamily::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START; auto bufferSize = sizeof(uint32_t) + sizeof(MI_BATCH_BUFFER_START); AubHelperHw aubHelperHw(this->isLocalMemoryEnabled()); std::unique_ptr> buffer(this->getMemoryManager()->alignedMallocWrapper(bufferSize, MemoryConstants::pageSize), [&](void *ptr) { this->getMemoryManager()->alignedFreeWrapper(ptr); }); LinearStream linearStream(buffer.get(), bufferSize); uint32_t *header = static_cast(linearStream.getSpace(sizeof(uint32_t))); *header = getGUCWorkQueueItemHeader(); MI_BATCH_BUFFER_START *miBatchBufferStartSpace = linearStream.getSpaceForCmd(); DEBUG_BREAK_IF(bufferSize != linearStream.getUsed()); auto miBatchBufferStart = GfxFamily::cmdInitBatchBufferStart; miBatchBufferStart.setBatchBufferStartAddress(AUB::ptrToPPGTT(buffer.get())); miBatchBufferStart.setAddressSpaceIndicator(MI_BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR_PPGTT); *miBatchBufferStartSpace = miBatchBufferStart; auto physBufferAddres = ppgtt->map(reinterpret_cast(buffer.get()), bufferSize, this->getPPGTTAdditionalBits(linearStream.getGraphicsAllocation()), MemoryBanks::MainBank); AUB::reserveAddressPPGTT(*stream, reinterpret_cast(buffer.get()), bufferSize, physBufferAddres, this->getPPGTTAdditionalBits(linearStream.getGraphicsAllocation()), aubHelperHw); AUB::addMemoryWrite( *stream, physBufferAddres, buffer.get(), bufferSize, this->getAddressSpace(AubMemDump::DataTypeHintValues::TraceNotype)); PatchInfoData patchInfoData(batchBufferAddress, 0u, PatchInfoAllocationType::Default, reinterpret_cast(buffer.get()), sizeof(uint32_t) + sizeof(MI_BATCH_BUFFER_START) - sizeof(uint64_t), PatchInfoAllocationType::GUCStartMessage); this->flatBatchBufferHelper->setPatchInfoData(patchInfoData); } } // namespace NEO aub_command_stream_receiver_hw_bdw_and_later.inl000066400000000000000000000017331422164147700346110ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/command_stream/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/aub_command_stream_receiver_hw_base.inl" namespace NEO { template constexpr uint32_t AUBCommandStreamReceiverHw::getMaskAndValueForPollForCompletion() { return 0x100; } template void AUBCommandStreamReceiverHw::addContextToken(uint32_t dumpHandle) { // Some simulator versions don't support adding the context token. // This hook allows specialization for those that do. } template uint32_t AUBCommandStreamReceiverHw::getGUCWorkQueueItemHeader() { uint32_t GUCWorkQueueItemHeader = 0x00030001; return GUCWorkQueueItemHeader; } template int AUBCommandStreamReceiverHw::getAddressSpaceFromPTEBits(uint64_t entryBits) const { return AubMemDump::AddressSpaceValues::TraceNonlocal; } } // namespace NEO aub_command_stream_receiver_hw_xehp_and_later.inl000066400000000000000000000022261422164147700347770ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/command_stream/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/aub_mem_dump/page_table_entry_bits.h" #include "shared/source/command_stream/aub_command_stream_receiver_hw_base.inl" #include "shared/source/helpers/engine_node_helper.h" namespace NEO { template constexpr uint32_t AUBCommandStreamReceiverHw::getMaskAndValueForPollForCompletion() { return 0x00008000; } template void AUBCommandStreamReceiverHw::addContextToken(uint32_t dumpHandle) { AUB::createContext(*stream, dumpHandle); } template uint32_t AUBCommandStreamReceiverHw::getGUCWorkQueueItemHeader() { if (EngineHelpers::isCcs(osContext->getEngineType())) { return 0x00030401; } return 0x00030001; } template int AUBCommandStreamReceiverHw::getAddressSpaceFromPTEBits(uint64_t entryBits) const { if (entryBits & BIT(PageTableEntry::localMemoryBit)) { return AubMemDump::AddressSpaceValues::TraceLocal; } return AubMemDump::AddressSpaceValues::TraceNonlocal; } } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/aub_subcapture_status.h000066400000000000000000000003401422164147700276660ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once namespace NEO { struct AubSubCaptureStatus { bool isActive; bool wasActiveInPreviousEnqueue; }; } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/command_stream_receiver.cpp000066400000000000000000001073331422164147700305010ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_container/implicit_scaling.h" #include "shared/source/command_stream/experimental_command_buffer.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/command_stream/scratch_space_controller.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/device/device.h" #include "shared/source/direct_submission/direct_submission_controller.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/gmm_helper/page_table_mngr.h" #include "shared/source/helpers/api_specific_config.h" #include "shared/source/helpers/array_count.h" #include "shared/source/helpers/cache_policy.h" #include "shared/source/helpers/flush_stamp.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/pause_on_gpu_properties.h" #include "shared/source/helpers/string.h" #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/surface.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/os_interface/os_context.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/os_interface/sys_calls_common.h" #include "shared/source/utilities/cpuintrinsics.h" #include "shared/source/utilities/tag_allocator.h" #include "shared/source/utilities/wait_util.h" namespace NEO { // Global table of CommandStreamReceiver factories for HW and tests CommandStreamReceiverCreateFunc commandStreamReceiverFactory[2 * IGFX_MAX_CORE] = {}; CommandStreamReceiver::CommandStreamReceiver(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield) : executionEnvironment(executionEnvironment), rootDeviceIndex(rootDeviceIndex), deviceBitfield(deviceBitfield) { residencyAllocations.reserve(20); latestSentStatelessMocsConfig = CacheSettings::unknownMocs; submissionAggregator.reset(new SubmissionAggregator()); if (ApiSpecificConfig::getApiType() == ApiSpecificConfig::L0) { this->dispatchMode = DispatchMode::ImmediateDispatch; } if (DebugManager.flags.CsrDispatchMode.get()) { this->dispatchMode = (DispatchMode)DebugManager.flags.CsrDispatchMode.get(); } flushStamp.reset(new FlushStampTracker(true)); for (int i = 0; i < IndirectHeap::Type::NUM_TYPES; ++i) { indirectHeap[i] = nullptr; } internalAllocationStorage = std::make_unique(*this); const auto &hwInfo = peekHwInfo(); uint32_t subDeviceCount = static_cast(deviceBitfield.count()); bool platformImplicitScaling = HwHelper::get(hwInfo.platform.eRenderCoreFamily).platformSupportsImplicitScaling(hwInfo); if (NEO::ImplicitScalingHelper::isImplicitScalingEnabled(deviceBitfield, platformImplicitScaling) && subDeviceCount > 1 && DebugManager.flags.EnableStaticPartitioning.get() != 0) { this->activePartitions = subDeviceCount; this->staticWorkPartitioningEnabled = true; } } CommandStreamReceiver::~CommandStreamReceiver() { if (userPauseConfirmation) { { std::unique_lock lock{debugPauseStateLock}; *debugPauseStateAddress = DebugPauseState::terminate; } userPauseConfirmation->join(); } for (int i = 0; i < IndirectHeap::Type::NUM_TYPES; ++i) { if (indirectHeap[i] != nullptr) { auto allocation = indirectHeap[i]->getGraphicsAllocation(); if (allocation != nullptr) { internalAllocationStorage->storeAllocation(std::unique_ptr(allocation), REUSABLE_ALLOCATION); } delete indirectHeap[i]; } } cleanupResources(); internalAllocationStorage->cleanAllocationList(-1, REUSABLE_ALLOCATION); internalAllocationStorage->cleanAllocationList(-1, TEMPORARY_ALLOCATION); getMemoryManager()->unregisterEngineForCsr(this); } SubmissionStatus CommandStreamReceiver::submitBatchBuffer(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) { this->latestSentTaskCount = taskCount + 1; SubmissionStatus retVal = this->flush(batchBuffer, allocationsForResidency); if (!isUpdateTagFromWaitEnabled()) { this->latestFlushedTaskCount = taskCount + 1; } taskCount++; return retVal; } void CommandStreamReceiver::makeResident(MultiGraphicsAllocation &gfxAllocation) { makeResident(*gfxAllocation.getGraphicsAllocation(rootDeviceIndex)); } void CommandStreamReceiver::makeResident(GraphicsAllocation &gfxAllocation) { auto submissionTaskCount = this->taskCount + 1; if (gfxAllocation.isResidencyTaskCountBelow(submissionTaskCount, osContext->getContextId())) { auto pushAllocations = true; if (DebugManager.flags.MakeEachAllocationResident.get() != -1) { pushAllocations = !DebugManager.flags.MakeEachAllocationResident.get(); } if (pushAllocations) { this->getResidencyAllocations().push_back(&gfxAllocation); } checkForNewResources(submissionTaskCount, gfxAllocation.getTaskCount(osContext->getContextId()), gfxAllocation); gfxAllocation.updateTaskCount(submissionTaskCount, osContext->getContextId()); if (!gfxAllocation.isResident(osContext->getContextId())) { this->totalMemoryUsed += gfxAllocation.getUnderlyingBufferSize(); } } gfxAllocation.updateResidencyTaskCount(submissionTaskCount, osContext->getContextId()); } void CommandStreamReceiver::processEviction() { this->getEvictionAllocations().clear(); } void CommandStreamReceiver::makeNonResident(GraphicsAllocation &gfxAllocation) { if (gfxAllocation.isResident(osContext->getContextId())) { if (gfxAllocation.peekEvictable()) { this->getEvictionAllocations().push_back(&gfxAllocation); } else { gfxAllocation.setEvictable(true); } } if (!gfxAllocation.isAlwaysResident(this->osContext->getContextId())) { gfxAllocation.releaseResidencyInOsContext(this->osContext->getContextId()); } } void CommandStreamReceiver::makeSurfacePackNonResident(ResidencyContainer &allocationsForResidency) { for (auto &surface : allocationsForResidency) { this->makeNonResident(*surface); } allocationsForResidency.clear(); this->processEviction(); } void CommandStreamReceiver::makeResidentHostPtrAllocation(GraphicsAllocation *gfxAllocation) { makeResident(*gfxAllocation); } WaitStatus CommandStreamReceiver::waitForTaskCount(uint32_t requiredTaskCount) { auto address = getTagAddress(); if (address) { this->downloadTagAllocation(); return baseWaitFunction(address, WaitParams{false, false, 0}, requiredTaskCount); } return WaitStatus::Ready; } WaitStatus CommandStreamReceiver::waitForTaskCountAndCleanAllocationList(uint32_t requiredTaskCount, uint32_t allocationUsage) { WaitStatus waitStatus{WaitStatus::Ready}; auto &list = allocationUsage == TEMPORARY_ALLOCATION ? internalAllocationStorage->getTemporaryAllocations() : internalAllocationStorage->getAllocationsForReuse(); if (!list.peekIsEmpty()) { waitStatus = this->CommandStreamReceiver::waitForTaskCount(requiredTaskCount); } internalAllocationStorage->cleanAllocationList(requiredTaskCount, allocationUsage); return waitStatus; } WaitStatus CommandStreamReceiver::waitForTaskCountAndCleanTemporaryAllocationList(uint32_t requiredTaskCount) { return waitForTaskCountAndCleanAllocationList(requiredTaskCount, TEMPORARY_ALLOCATION); } void CommandStreamReceiver::ensureCommandBufferAllocation(LinearStream &commandStream, size_t minimumRequiredSize, size_t additionalAllocationSize) { if (commandStream.getAvailableSpace() >= minimumRequiredSize) { return; } auto alignment = MemoryConstants::pageSize64k; if (DebugManager.flags.ForceCommandBufferAlignment.get() != -1) { alignment = DebugManager.flags.ForceCommandBufferAlignment.get() * MemoryConstants::kiloByte; } const auto allocationSize = alignUp(minimumRequiredSize + additionalAllocationSize, alignment); constexpr static auto allocationType = AllocationType::COMMAND_BUFFER; auto allocation = this->getInternalAllocationStorage()->obtainReusableAllocation(allocationSize, allocationType).release(); if (allocation == nullptr) { const AllocationProperties commandStreamAllocationProperties{rootDeviceIndex, true, allocationSize, allocationType, isMultiOsContextCapable(), false, osContext->getDeviceBitfield()}; allocation = this->getMemoryManager()->allocateGraphicsMemoryWithProperties(commandStreamAllocationProperties); } DEBUG_BREAK_IF(allocation == nullptr); if (commandStream.getGraphicsAllocation() != nullptr) { getInternalAllocationStorage()->storeAllocation(std::unique_ptr(commandStream.getGraphicsAllocation()), REUSABLE_ALLOCATION); } commandStream.replaceBuffer(allocation->getUnderlyingBuffer(), allocationSize - additionalAllocationSize); commandStream.replaceGraphicsAllocation(allocation); } MemoryManager *CommandStreamReceiver::getMemoryManager() const { DEBUG_BREAK_IF(!executionEnvironment.memoryManager); return executionEnvironment.memoryManager.get(); } LinearStream &CommandStreamReceiver::getCS(size_t minRequiredSize) { constexpr static auto additionalAllocationSize = MemoryConstants::cacheLineSize + CSRequirements::csOverfetchSize; ensureCommandBufferAllocation(this->commandStream, minRequiredSize, additionalAllocationSize); return commandStream; } OSInterface *CommandStreamReceiver::getOSInterface() const { return executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->osInterface.get(); } uint64_t CommandStreamReceiver::getWorkPartitionAllocationGpuAddress() const { if (isStaticWorkPartitioningEnabled()) { return getWorkPartitionAllocation()->getGpuAddress(); } return 0; } bool CommandStreamReceiver::isRcs() const { return this->osContext->getEngineType() == aub_stream::ENGINE_RCS; } bool CommandStreamReceiver::skipResourceCleanup() const { return this->getOSInterface() && this->getOSInterface()->getDriverModel() && this->getOSInterface()->getDriverModel()->skipResourceCleanup(); } bool CommandStreamReceiver::isGpuHangDetected() const { if (DebugManager.flags.DisableGpuHangDetection.get()) { return false; } return this->osContext && this->getOSInterface() && this->getOSInterface()->getDriverModel() && this->getOSInterface()->getDriverModel()->isGpuHangDetected(*osContext); } void CommandStreamReceiver::cleanupResources() { if (this->skipResourceCleanup()) { return; } waitForTaskCountAndCleanAllocationList(this->latestFlushedTaskCount, TEMPORARY_ALLOCATION); waitForTaskCountAndCleanAllocationList(this->latestFlushedTaskCount, REUSABLE_ALLOCATION); if (debugSurface) { getMemoryManager()->freeGraphicsMemory(debugSurface); debugSurface = nullptr; } if (commandStream.getCpuBase()) { getMemoryManager()->freeGraphicsMemory(commandStream.getGraphicsAllocation()); commandStream.replaceGraphicsAllocation(nullptr); commandStream.replaceBuffer(nullptr, 0); } if (tagsMultiAllocation) { // Null tag address to prevent waiting for tag update when freeing it tagAllocation = nullptr; tagAddress = nullptr; DEBUG_BREAK_IF(tagAllocation != nullptr); DEBUG_BREAK_IF(tagAddress != nullptr); for (auto graphicsAllocation : tagsMultiAllocation->getGraphicsAllocations()) { getMemoryManager()->freeGraphicsMemory(graphicsAllocation); } delete tagsMultiAllocation; tagsMultiAllocation = nullptr; } if (globalFenceAllocation) { getMemoryManager()->freeGraphicsMemory(globalFenceAllocation); globalFenceAllocation = nullptr; } if (preemptionAllocation) { getMemoryManager()->freeGraphicsMemory(preemptionAllocation); preemptionAllocation = nullptr; } if (perDssBackedBuffer) { getMemoryManager()->freeGraphicsMemory(perDssBackedBuffer); perDssBackedBuffer = nullptr; } if (clearColorAllocation) { getMemoryManager()->freeGraphicsMemory(clearColorAllocation); clearColorAllocation = nullptr; } if (workPartitionAllocation) { getMemoryManager()->freeGraphicsMemory(workPartitionAllocation); workPartitionAllocation = nullptr; } } WaitStatus CommandStreamReceiver::waitForCompletionWithTimeout(const WaitParams ¶ms, uint32_t taskCountToWait) { uint32_t latestSentTaskCount = this->latestFlushedTaskCount; if (latestSentTaskCount < taskCountToWait) { if (!this->flushBatchedSubmissions()) { const auto isGpuHang{isGpuHangDetected()}; return isGpuHang ? WaitStatus::GpuHang : WaitStatus::NotReady; } } return baseWaitFunction(getTagAddress(), params, taskCountToWait); } WaitStatus CommandStreamReceiver::baseWaitFunction(volatile uint32_t *pollAddress, const WaitParams ¶ms, uint32_t taskCountToWait) { std::chrono::microseconds elapsedTimeSinceGpuHangCheck{0}; std::chrono::high_resolution_clock::time_point waitStartTime, lastHangCheckTime, currentTime; int64_t timeDiff = 0; uint32_t latestSentTaskCount = this->latestFlushedTaskCount; if (latestSentTaskCount < taskCountToWait) { this->flushTagUpdate(); } volatile uint32_t *partitionAddress = pollAddress; waitStartTime = std::chrono::high_resolution_clock::now(); lastHangCheckTime = waitStartTime; for (uint32_t i = 0; i < activePartitions; i++) { while (*partitionAddress < taskCountToWait && timeDiff <= params.waitTimeout) { if (!params.indefinitelyPoll && WaitUtils::waitFunction(partitionAddress, taskCountToWait)) { break; } currentTime = std::chrono::high_resolution_clock::now(); elapsedTimeSinceGpuHangCheck = std::chrono::duration_cast(currentTime - lastHangCheckTime); if (elapsedTimeSinceGpuHangCheck.count() >= gpuHangCheckPeriod.count()) { lastHangCheckTime = currentTime; if (isGpuHangDetected()) { return WaitStatus::GpuHang; } } if (params.enableTimeout) { timeDiff = std::chrono::duration_cast(currentTime - waitStartTime).count(); } } partitionAddress = ptrOffset(partitionAddress, this->postSyncWriteOffset); } partitionAddress = pollAddress; for (uint32_t i = 0; i < activePartitions; i++) { if (*partitionAddress < taskCountToWait) { return WaitStatus::NotReady; } partitionAddress = ptrOffset(partitionAddress, this->postSyncWriteOffset); } return WaitStatus::Ready; } void CommandStreamReceiver::setTagAllocation(GraphicsAllocation *allocation) { this->tagAllocation = allocation; UNRECOVERABLE_IF(allocation == nullptr); this->tagAddress = reinterpret_cast(allocation->getUnderlyingBuffer()); this->debugPauseStateAddress = reinterpret_cast( reinterpret_cast(allocation->getUnderlyingBuffer()) + debugPauseStateAddressOffset); } MultiGraphicsAllocation &CommandStreamReceiver::createTagsMultiAllocation() { std::vector rootDeviceIndices; if (ApiSpecificConfig::getApiType() == ApiSpecificConfig::L0) { rootDeviceIndices.push_back(rootDeviceIndex); } else { for (auto index = 0u; index < this->executionEnvironment.rootDeviceEnvironments.size(); index++) { if (this->executionEnvironment.rootDeviceEnvironments[index].get()->getHardwareInfo()->platform.eProductFamily == this->executionEnvironment.rootDeviceEnvironments[this->rootDeviceIndex].get()->getHardwareInfo()->platform.eProductFamily) { rootDeviceIndices.push_back(index); } } } auto maxRootDeviceIndex = *std::max_element(rootDeviceIndices.begin(), rootDeviceIndices.end(), std::less()); auto allocations = new MultiGraphicsAllocation(maxRootDeviceIndex); AllocationProperties unifiedMemoryProperties{rootDeviceIndices.at(0), MemoryConstants::pageSize, AllocationType::TAG_BUFFER, systemMemoryBitfield}; this->getMemoryManager()->createMultiGraphicsAllocationInSystemMemoryPool(rootDeviceIndices, unifiedMemoryProperties, *allocations); return *allocations; } FlushStamp CommandStreamReceiver::obtainCurrentFlushStamp() const { return flushStamp->peekStamp(); } void CommandStreamReceiver::setRequiredScratchSizes(uint32_t newRequiredScratchSize, uint32_t newRequiredPrivateScratchSize) { if (newRequiredScratchSize > requiredScratchSize) { requiredScratchSize = newRequiredScratchSize; } if (newRequiredPrivateScratchSize > requiredPrivateScratchSize) { requiredPrivateScratchSize = newRequiredPrivateScratchSize; } } GraphicsAllocation *CommandStreamReceiver::getScratchAllocation() { return scratchSpaceController->getScratchSpaceAllocation(); } void CommandStreamReceiver::initProgrammingFlags() { isPreambleSent = false; GSBAFor32BitProgrammed = false; bindingTableBaseAddressRequired = true; mediaVfeStateDirty = true; lastVmeSubslicesConfig = false; lastSentL3Config = 0; lastMediaSamplerConfig = -1; lastPreemptionMode = PreemptionMode::Initial; latestSentStatelessMocsConfig = 0; lastSentUseGlobalAtomics = false; } void CommandStreamReceiver::programForAubSubCapture(bool wasActiveInPreviousEnqueue, bool isActive) { if (!wasActiveInPreviousEnqueue && isActive) { // force CSR reprogramming upon subcapture activation this->initProgrammingFlags(); } if (wasActiveInPreviousEnqueue && !isActive) { // flush BB upon subcapture deactivation this->flushBatchedSubmissions(); } } ResidencyContainer &CommandStreamReceiver::getResidencyAllocations() { return this->residencyAllocations; } ResidencyContainer &CommandStreamReceiver::getEvictionAllocations() { return this->evictionAllocations; } AubSubCaptureStatus CommandStreamReceiver::checkAndActivateAubSubCapture(const std::string &kernelName) { return {false, false}; } void CommandStreamReceiver::addAubComment(const char *comment) {} void CommandStreamReceiver::downloadAllocation(GraphicsAllocation &gfxAllocation) { if (this->downloadAllocationImpl) { this->downloadAllocationImpl(gfxAllocation); } } void CommandStreamReceiver::startControllingDirectSubmissions() { auto controller = this->executionEnvironment.directSubmissionController.get(); if (controller) { controller->startControlling(); } } GraphicsAllocation *CommandStreamReceiver::allocateDebugSurface(size_t size) { UNRECOVERABLE_IF(debugSurface != nullptr); debugSurface = getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, size, AllocationType::INTERNAL_HOST_MEMORY, getOsContext().getDeviceBitfield()}); return debugSurface; } IndirectHeap &CommandStreamReceiver::getIndirectHeap(IndirectHeap::Type heapType, size_t minRequiredSize) { DEBUG_BREAK_IF(static_cast(heapType) >= arrayCount(indirectHeap)); auto &heap = indirectHeap[heapType]; GraphicsAllocation *heapMemory = nullptr; if (heap) heapMemory = heap->getGraphicsAllocation(); if (heap && heap->getAvailableSpace() < minRequiredSize && heapMemory) { internalAllocationStorage->storeAllocation(std::unique_ptr(heapMemory), REUSABLE_ALLOCATION); heapMemory = nullptr; } if (!heapMemory) { allocateHeapMemory(heapType, minRequiredSize, heap); } return *heap; } void CommandStreamReceiver::allocateHeapMemory(IndirectHeap::Type heapType, size_t minRequiredSize, IndirectHeap *&indirectHeap) { size_t reservedSize = 0; auto finalHeapSize = getDefaultHeapSize(); if (IndirectHeap::Type::SURFACE_STATE == heapType) { finalHeapSize = defaultSshSize; } bool requireInternalHeap = IndirectHeap::Type::INDIRECT_OBJECT == heapType ? canUse4GbHeaps : false; if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) { requireInternalHeap = false; } minRequiredSize += reservedSize; finalHeapSize = alignUp(std::max(finalHeapSize, minRequiredSize), MemoryConstants::pageSize); auto allocationType = AllocationType::LINEAR_STREAM; if (requireInternalHeap) { allocationType = AllocationType::INTERNAL_HEAP; } auto heapMemory = internalAllocationStorage->obtainReusableAllocation(finalHeapSize, allocationType).release(); if (!heapMemory) { heapMemory = getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, true, finalHeapSize, allocationType, isMultiOsContextCapable(), false, osContext->getDeviceBitfield()}); } else { finalHeapSize = std::max(heapMemory->getUnderlyingBufferSize(), finalHeapSize); } if (IndirectHeap::Type::SURFACE_STATE == heapType) { DEBUG_BREAK_IF(minRequiredSize > defaultSshSize - MemoryConstants::pageSize); finalHeapSize = defaultSshSize - MemoryConstants::pageSize; } if (indirectHeap) { indirectHeap->replaceBuffer(heapMemory->getUnderlyingBuffer(), finalHeapSize); indirectHeap->replaceGraphicsAllocation(heapMemory); } else { indirectHeap = new IndirectHeap(heapMemory, requireInternalHeap); indirectHeap->overrideMaxSize(finalHeapSize); } scratchSpaceController->reserveHeap(heapType, indirectHeap); } void CommandStreamReceiver::releaseIndirectHeap(IndirectHeap::Type heapType) { DEBUG_BREAK_IF(static_cast(heapType) >= arrayCount(indirectHeap)); auto &heap = indirectHeap[heapType]; if (heap) { auto heapMemory = heap->getGraphicsAllocation(); if (heapMemory != nullptr) internalAllocationStorage->storeAllocation(std::unique_ptr(heapMemory), REUSABLE_ALLOCATION); heap->replaceBuffer(nullptr, 0); heap->replaceGraphicsAllocation(nullptr); } } void CommandStreamReceiver::setExperimentalCmdBuffer(std::unique_ptr &&cmdBuffer) { experimentalCmdBuffer = std::move(cmdBuffer); } void *CommandStreamReceiver::asyncDebugBreakConfirmation(void *arg) { auto self = reinterpret_cast(arg); do { auto debugPauseStateValue = DebugPauseState::waitingForUserStartConfirmation; if (DebugManager.flags.PauseOnGpuMode.get() != PauseOnGpuProperties::PauseMode::AfterWorkload) { do { { std::unique_lock lock{self->debugPauseStateLock}; debugPauseStateValue = *self->debugPauseStateAddress; } if (debugPauseStateValue == DebugPauseState::terminate) { return nullptr; } std::this_thread::yield(); } while (debugPauseStateValue != DebugPauseState::waitingForUserStartConfirmation); std::cout << "Debug break: Press enter to start workload" << std::endl; self->debugConfirmationFunction(); debugPauseStateValue = DebugPauseState::hasUserStartConfirmation; { std::unique_lock lock{self->debugPauseStateLock}; *self->debugPauseStateAddress = debugPauseStateValue; } } if (DebugManager.flags.PauseOnGpuMode.get() != PauseOnGpuProperties::PauseMode::BeforeWorkload) { do { { std::unique_lock lock{self->debugPauseStateLock}; debugPauseStateValue = *self->debugPauseStateAddress; } if (debugPauseStateValue == DebugPauseState::terminate) { return nullptr; } std::this_thread::yield(); } while (debugPauseStateValue != DebugPauseState::waitingForUserEndConfirmation); std::cout << "Debug break: Workload ended, press enter to continue" << std::endl; self->debugConfirmationFunction(); { std::unique_lock lock{self->debugPauseStateLock}; *self->debugPauseStateAddress = DebugPauseState::hasUserEndConfirmation; } } } while (DebugManager.flags.PauseOnEnqueue.get() == PauseOnGpuProperties::DebugFlagValues::OnEachEnqueue || DebugManager.flags.PauseOnBlitCopy.get() == PauseOnGpuProperties::DebugFlagValues::OnEachEnqueue); return nullptr; } bool CommandStreamReceiver::initializeTagAllocation() { this->tagsMultiAllocation = &this->createTagsMultiAllocation(); auto tagAllocation = tagsMultiAllocation->getGraphicsAllocation(rootDeviceIndex); if (!tagAllocation) { return false; } this->setTagAllocation(tagAllocation); auto initValue = DebugManager.flags.EnableNullHardware.get() ? static_cast(-1) : initialHardwareTag; auto tagAddress = this->tagAddress; uint32_t subDevices = static_cast(this->deviceBitfield.count()); for (uint32_t i = 0; i < subDevices; i++) { *tagAddress = initValue; tagAddress = ptrOffset(tagAddress, this->postSyncWriteOffset); } *this->debugPauseStateAddress = DebugManager.flags.EnableNullHardware.get() ? DebugPauseState::disabled : DebugPauseState::waitingForFirstSemaphore; PRINT_DEBUG_STRING(DebugManager.flags.PrintTagAllocationAddress.get(), stdout, "\nCreated tag allocation %p for engine %u\n", this->tagAddress, static_cast(osContext->getEngineType())); if (DebugManager.flags.PauseOnEnqueue.get() != -1 || DebugManager.flags.PauseOnBlitCopy.get() != -1) { userPauseConfirmation = Thread::create(CommandStreamReceiver::asyncDebugBreakConfirmation, reinterpret_cast(this)); } return true; } bool CommandStreamReceiver::createWorkPartitionAllocation(const Device &device) { if (!staticWorkPartitioningEnabled) { return false; } UNRECOVERABLE_IF(device.getNumGenericSubDevices() < 2); AllocationProperties properties{this->rootDeviceIndex, true, 4096u, AllocationType::WORK_PARTITION_SURFACE, true, false, deviceBitfield}; this->workPartitionAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(properties); if (this->workPartitionAllocation == nullptr) { return false; } uint32_t logicalId = 0; for (uint32_t deviceIndex = 0; deviceIndex < deviceBitfield.size(); deviceIndex++) { if (!deviceBitfield.test(deviceIndex)) { continue; } const uint32_t copySrc[2] = {logicalId++, deviceIndex}; DeviceBitfield copyBitfield{}; copyBitfield.set(deviceIndex); auto copySuccess = MemoryTransferHelper::transferMemoryToAllocationBanks(device, workPartitionAllocation, 0, copySrc, sizeof(copySrc), copyBitfield); if (!copySuccess) { return false; } } return true; } bool CommandStreamReceiver::createGlobalFenceAllocation() { auto hwInfo = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo(); if (!HwHelper::get(hwInfo->platform.eRenderCoreFamily).isFenceAllocationRequired(*hwInfo)) { return true; } DEBUG_BREAK_IF(this->globalFenceAllocation != nullptr); this->globalFenceAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, MemoryConstants::pageSize, AllocationType::GLOBAL_FENCE, osContext->getDeviceBitfield()}); return this->globalFenceAllocation != nullptr; } bool CommandStreamReceiver::createPreemptionAllocation() { auto hwInfo = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo(); size_t preemptionSurfaceSize = hwInfo->capabilityTable.requiredPreemptionSurfaceSize; if (DebugManager.flags.OverrideCsrAllocationSize.get() > 0) { preemptionSurfaceSize = DebugManager.flags.OverrideCsrAllocationSize.get(); } AllocationProperties properties{rootDeviceIndex, true, preemptionSurfaceSize, AllocationType::PREEMPTION, isMultiOsContextCapable(), false, deviceBitfield}; properties.flags.uncacheable = hwInfo->workaroundTable.flags.waCSRUncachable; properties.alignment = HwHelper::get(hwInfo->platform.eRenderCoreFamily).getPreemptionAllocationAlignment(); this->preemptionAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(properties); return this->preemptionAllocation != nullptr; } std::unique_lock CommandStreamReceiver::obtainUniqueOwnership() { return std::unique_lock(this->ownershipMutex); } std::unique_lock CommandStreamReceiver::obtainHostPtrSurfaceCreationLock() { return std::unique_lock(this->hostPtrSurfaceCreationMutex); } AllocationsList &CommandStreamReceiver::getTemporaryAllocations() { return internalAllocationStorage->getTemporaryAllocations(); } AllocationsList &CommandStreamReceiver::getAllocationsForReuse() { return internalAllocationStorage->getAllocationsForReuse(); } bool CommandStreamReceiver::createAllocationForHostSurface(HostPtrSurface &surface, bool requiresL3Flush) { std::unique_lock lock = this->obtainHostPtrSurfaceCreationLock(); auto allocation = internalAllocationStorage->obtainTemporaryAllocationWithPtr(surface.getSurfaceSize(), surface.getMemoryPointer(), AllocationType::EXTERNAL_HOST_PTR); if (allocation == nullptr) { auto memoryManager = getMemoryManager(); AllocationProperties properties{rootDeviceIndex, false, // allocateMemory surface.getSurfaceSize(), AllocationType::EXTERNAL_HOST_PTR, false, // isMultiStorageAllocation osContext->getDeviceBitfield()}; properties.flags.flushL3RequiredForRead = properties.flags.flushL3RequiredForWrite = requiresL3Flush; allocation.reset(memoryManager->allocateGraphicsMemoryWithProperties(properties, surface.getMemoryPointer())); if (allocation == nullptr && surface.peekIsPtrCopyAllowed()) { // Try with no host pointer allocation and copy allocation.reset(memoryManager->allocateInternalGraphicsMemoryWithHostCopy(rootDeviceIndex, internalAllocationStorage->getDeviceBitfield(), surface.getMemoryPointer(), surface.getSurfaceSize())); } } if (allocation == nullptr) { return false; } allocation->updateTaskCount(CompletionStamp::notReady, osContext->getContextId()); surface.setAllocation(allocation.get()); internalAllocationStorage->storeAllocation(std::move(allocation), TEMPORARY_ALLOCATION); return true; } TagAllocatorBase *CommandStreamReceiver::getEventTsAllocator() { if (profilingTimeStampAllocator.get() == nullptr) { std::vector rootDeviceIndices = {rootDeviceIndex}; profilingTimeStampAllocator = std::make_unique>(rootDeviceIndices, getMemoryManager(), getPreferredTagPoolSize(), MemoryConstants::cacheLineSize, sizeof(HwTimeStamps), false, osContext->getDeviceBitfield()); } return profilingTimeStampAllocator.get(); } TagAllocatorBase *CommandStreamReceiver::getEventPerfCountAllocator(const uint32_t tagSize) { if (perfCounterAllocator.get() == nullptr) { std::vector rootDeviceIndices = {rootDeviceIndex}; perfCounterAllocator = std::make_unique>( rootDeviceIndices, getMemoryManager(), getPreferredTagPoolSize(), MemoryConstants::cacheLineSize, tagSize, false, osContext->getDeviceBitfield()); } return perfCounterAllocator.get(); } size_t CommandStreamReceiver::getPreferredTagPoolSize() const { if (DebugManager.flags.DisableTimestampPacketOptimizations.get()) { return 1; } return 2048; } bool CommandStreamReceiver::expectMemory(const void *gfxAddress, const void *srcAddress, size_t length, uint32_t compareOperation) { auto isMemoryEqual = (memcmp(gfxAddress, srcAddress, length) == 0); auto isEqualMemoryExpected = (compareOperation == AubMemDump::CmdServicesMemTraceMemoryCompare::CompareOperationValues::CompareEqual); return (isMemoryEqual == isEqualMemoryExpected); } bool CommandStreamReceiver::needsPageTableManager() const { auto hwInfo = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo(); if (pageTableManager.get() != nullptr) { return false; } return HwInfoConfig::get(hwInfo->platform.eProductFamily)->isPageTableManagerSupported(*hwInfo); } void CommandStreamReceiver::printDeviceIndex() { if (DebugManager.flags.PrintDeviceAndEngineIdOnSubmission.get()) { printf("%u: Submission to RootDevice Index: %u, Sub-Devices Mask: %lu, EngineId: %u (%s, %s)\n", SysCalls::getProcessId(), this->getRootDeviceIndex(), this->osContext->getDeviceBitfield().to_ulong(), this->osContext->getEngineType(), EngineHelpers::engineTypeToString(this->osContext->getEngineType()).c_str(), EngineHelpers::engineUsageToString(this->osContext->getEngineUsage()).c_str()); } } void CommandStreamReceiver::checkForNewResources(uint32_t submittedTaskCount, uint32_t allocationTaskCount, GraphicsAllocation &gfxAllocation) { if (useNewResourceImplicitFlush) { if (allocationTaskCount == GraphicsAllocation::objectNotUsed && !GraphicsAllocation::isIsaAllocationType(gfxAllocation.getAllocationType())) { newResources = true; if (DebugManager.flags.ProvideVerboseImplicitFlush.get()) { printf("New resource detected of type %llu\n", static_cast(gfxAllocation.getAllocationType())); } } } } bool CommandStreamReceiver::checkImplicitFlushForGpuIdle() { if (useGpuIdleImplicitFlush) { if (this->taskCount == *getTagAddress()) { return true; } } return false; } void CommandStreamReceiver::downloadTagAllocation() { if (this->getTagAllocation()) { this->downloadAllocation(*this->getTagAllocation()); } } bool CommandStreamReceiver::testTaskCountReady(volatile uint32_t *pollAddress, uint32_t taskCountToWait) { this->downloadTagAllocation(); for (uint32_t i = 0; i < activePartitions; i++) { if (!WaitUtils::waitFunction(pollAddress, taskCountToWait)) { return false; } pollAddress = ptrOffset(pollAddress, this->postSyncWriteOffset); } return true; } const HardwareInfo &CommandStreamReceiver::peekHwInfo() const { return *executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo(); } } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/command_stream_receiver.h000066400000000000000000000452711422164147700301500ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/aub_subcapture_status.h" #include "shared/source/command_stream/csr_definitions.h" #include "shared/source/command_stream/csr_properties_flags.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/stream_properties.h" #include "shared/source/command_stream/submission_status.h" #include "shared/source/command_stream/submissions_aggregator.h" #include "shared/source/command_stream/thread_arbitration_policy.h" #include "shared/source/command_stream/wait_status.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/blit_commands_helper.h" #include "shared/source/helpers/common_types.h" #include "shared/source/helpers/completion_stamp.h" #include "shared/source/helpers/flat_batch_buffer_helper.h" #include "shared/source/helpers/options.h" #include "shared/source/helpers/pipe_control_args.h" #include "shared/source/indirect_heap/indirect_heap.h" #include "shared/source/kernel/grf_config.h" #include "shared/source/os_interface/os_thread.h" #include "shared/source/utilities/spinlock.h" #include #include #include namespace NEO { class AllocationsList; class Device; class ExecutionEnvironment; class ExperimentalCommandBuffer; class GmmPageTableMngr; class GraphicsAllocation; class HostPtrSurface; class IndirectHeap; class InternalAllocationStorage; class LinearStream; class MemoryManager; class MultiGraphicsAllocation; class OsContext; class OSInterface; class ScratchSpaceController; class HwPerfCounter; class HwTimeStamps; class TagAllocatorBase; template class TimestampPackets; template class TagAllocator; enum class DispatchMode { DeviceDefault = 0, //default for given device ImmediateDispatch, //everything is submitted to the HW immediately AdaptiveDispatch, //dispatching is handled to async thread, which combines batch buffers basing on load (not implemented) BatchedDispatchWithCounter, //dispatching is batched, after n commands there is implicit flush (not implemented) BatchedDispatch // dispatching is batched, explicit clFlush is required }; class CommandStreamReceiver { public: enum class SamplerCacheFlushState { samplerCacheFlushNotRequired, samplerCacheFlushBefore, //add sampler cache flush before Walker with redescribed image samplerCacheFlushAfter //add sampler cache flush after Walker with redescribed image }; using MutexType = std::recursive_mutex; CommandStreamReceiver(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield); virtual ~CommandStreamReceiver(); virtual SubmissionStatus flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) = 0; virtual CompletionStamp flushTask(LinearStream &commandStream, size_t commandStreamStart, const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, uint32_t taskLevel, DispatchFlags &dispatchFlags, Device &device) = 0; virtual bool flushBatchedSubmissions() = 0; MOCKABLE_VIRTUAL SubmissionStatus submitBatchBuffer(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency); virtual void pollForCompletion() {} virtual void programHardwareContext(LinearStream &cmdStream) = 0; virtual size_t getCmdsSizeForHardwareContext() const = 0; void makeResident(MultiGraphicsAllocation &gfxAllocation); MOCKABLE_VIRTUAL void makeResident(GraphicsAllocation &gfxAllocation); virtual void makeNonResident(GraphicsAllocation &gfxAllocation); MOCKABLE_VIRTUAL void makeSurfacePackNonResident(ResidencyContainer &allocationsForResidency); virtual void processResidency(const ResidencyContainer &allocationsForResidency, uint32_t handleId) {} virtual void processEviction(); void makeResidentHostPtrAllocation(GraphicsAllocation *gfxAllocation); MOCKABLE_VIRTUAL void ensureCommandBufferAllocation(LinearStream &commandStream, size_t minimumRequiredSize, size_t additionalAllocationSize); MemoryManager *getMemoryManager() const; ResidencyContainer &getResidencyAllocations(); ResidencyContainer &getEvictionAllocations(); virtual GmmPageTableMngr *createPageTableManager() { return nullptr; } bool needsPageTableManager() const; MOCKABLE_VIRTUAL WaitStatus waitForTaskCount(uint32_t requiredTaskCount); WaitStatus waitForTaskCountAndCleanAllocationList(uint32_t requiredTaskCount, uint32_t allocationUsage); MOCKABLE_VIRTUAL WaitStatus waitForTaskCountAndCleanTemporaryAllocationList(uint32_t requiredTaskCount); LinearStream &getCS(size_t minRequiredSize = 1024u); OSInterface *getOSInterface() const; ExecutionEnvironment &peekExecutionEnvironment() const { return executionEnvironment; }; MOCKABLE_VIRTUAL void setTagAllocation(GraphicsAllocation *allocation); GraphicsAllocation *getTagAllocation() const { return tagAllocation; } MultiGraphicsAllocation *getTagsMultiAllocation() const { return tagsMultiAllocation; } MultiGraphicsAllocation &createTagsMultiAllocation(); volatile uint32_t *getTagAddress() const { return tagAddress; } uint64_t getDebugPauseStateGPUAddress() const { return tagAllocation->getGpuAddress() + debugPauseStateAddressOffset; } virtual bool waitForFlushStamp(FlushStamp &flushStampToWait) { return true; }; uint32_t peekTaskCount() const { return taskCount; } uint32_t peekTaskLevel() const { return taskLevel; } FlushStamp obtainCurrentFlushStamp() const; uint32_t peekLatestSentTaskCount() const { return latestSentTaskCount; } uint32_t peekLatestFlushedTaskCount() const { return latestFlushedTaskCount; } void enableNTo1SubmissionModel() { this->nTo1SubmissionModelEnabled = true; } bool isNTo1SubmissionModelEnabled() const { return this->nTo1SubmissionModelEnabled; } void overrideDispatchPolicy(DispatchMode overrideValue) { this->dispatchMode = overrideValue; } void setMediaVFEStateDirty(bool dirty) { mediaVfeStateDirty = dirty; } bool getMediaVFEStateDirty() { return mediaVfeStateDirty; } void setGSBAStateDirty(bool dirty) { GSBAStateDirty = dirty; } bool getGSBAStateDirty() { return GSBAStateDirty; } void setRequiredScratchSizes(uint32_t newRequiredScratchSize, uint32_t newRequiredPrivateScratchSize); GraphicsAllocation *getScratchAllocation(); GraphicsAllocation *getDebugSurfaceAllocation() const { return debugSurface; } GraphicsAllocation *allocateDebugSurface(size_t size); GraphicsAllocation *getPreemptionAllocation() const { return preemptionAllocation; } GraphicsAllocation *getGlobalFenceAllocation() const { return globalFenceAllocation; } GraphicsAllocation *getWorkPartitionAllocation() const { return workPartitionAllocation; } void requestStallingCommandsOnNextFlush() { stallingCommandsOnNextFlushRequired = true; } bool isStallingCommandsOnNextFlushRequired() const { return stallingCommandsOnNextFlushRequired; } virtual WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) = 0; virtual WaitStatus waitForCompletionWithTimeout(const WaitParams ¶ms, uint32_t taskCountToWait); WaitStatus baseWaitFunction(volatile uint32_t *pollAddress, const WaitParams ¶ms, uint32_t taskCountToWait); MOCKABLE_VIRTUAL bool testTaskCountReady(volatile uint32_t *pollAddress, uint32_t taskCountToWait); virtual void downloadAllocations(){}; void setSamplerCacheFlushRequired(SamplerCacheFlushState value) { this->samplerCacheFlushRequired = value; } FlatBatchBufferHelper &getFlatBatchBufferHelper() const { return *flatBatchBufferHelper; } void overwriteFlatBatchBufferHelper(FlatBatchBufferHelper *newHelper) { flatBatchBufferHelper.reset(newHelper); } MOCKABLE_VIRTUAL void initProgrammingFlags(); virtual AubSubCaptureStatus checkAndActivateAubSubCapture(const std::string &kernelName); void programForAubSubCapture(bool wasActiveInPreviousEnqueue, bool isActive); virtual void addAubComment(const char *comment); IndirectHeap &getIndirectHeap(IndirectHeap::Type heapType, size_t minRequiredSize); void allocateHeapMemory(IndirectHeap::Type heapType, size_t minRequiredSize, IndirectHeap *&indirectHeap); void releaseIndirectHeap(IndirectHeap::Type heapType); virtual enum CommandStreamReceiverType getType() = 0; void setExperimentalCmdBuffer(std::unique_ptr &&cmdBuffer); bool initializeTagAllocation(); MOCKABLE_VIRTUAL bool createWorkPartitionAllocation(const Device &device); MOCKABLE_VIRTUAL bool createGlobalFenceAllocation(); MOCKABLE_VIRTUAL bool createPreemptionAllocation(); MOCKABLE_VIRTUAL bool createPerDssBackedBuffer(Device &device); MOCKABLE_VIRTUAL std::unique_lock obtainUniqueOwnership(); bool peekTimestampPacketWriteEnabled() const { return timestampPacketWriteEnabled; } bool isLatestTaskCountFlushed() { return this->peekLatestFlushedTaskCount() == this->peekTaskCount(); } size_t defaultSshSize = 0u; bool canUse4GbHeaps = true; AllocationsList &getTemporaryAllocations(); AllocationsList &getAllocationsForReuse(); InternalAllocationStorage *getInternalAllocationStorage() const { return internalAllocationStorage.get(); } MOCKABLE_VIRTUAL bool createAllocationForHostSurface(HostPtrSurface &surface, bool requiresL3Flush); virtual size_t getPreferredTagPoolSize() const; virtual void setupContext(OsContext &osContext) { this->osContext = &osContext; } OsContext &getOsContext() const { return *osContext; } TagAllocatorBase *getEventTsAllocator(); TagAllocatorBase *getEventPerfCountAllocator(const uint32_t tagSize); virtual TagAllocatorBase *getTimestampPacketAllocator() = 0; virtual bool expectMemory(const void *gfxAddress, const void *srcAddress, size_t length, uint32_t compareOperation); virtual bool isMultiOsContextCapable() const = 0; virtual MemoryCompressionState getMemoryCompressionState(bool auxTranslationRequired, const HardwareInfo &hwInfo) const = 0; void setLatestSentTaskCount(uint32_t latestSentTaskCount) { this->latestSentTaskCount = latestSentTaskCount; } void setLatestFlushedTaskCount(uint32_t latestFlushedTaskCount) { this->latestFlushedTaskCount = latestFlushedTaskCount; } virtual uint32_t flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) = 0; virtual void flushTagUpdate() = 0; virtual void flushNonKernelTask(GraphicsAllocation *eventAlloc, uint64_t immediateGpuAddress, uint64_t immediateData, PipeControlArgs &args, bool isWaitOnEvents, bool isStartOfDispatch, bool isEndOfDispatch) = 0; virtual void updateTagFromWait() = 0; virtual bool isUpdateTagFromWaitEnabled() = 0; ScratchSpaceController *getScratchSpaceController() const { return scratchSpaceController.get(); } void downloadAllocation(GraphicsAllocation &gfxAllocation); void registerInstructionCacheFlush() { auto mutex = obtainUniqueOwnership(); requiresInstructionCacheFlush = true; } bool isLocalMemoryEnabled() const { return localMemoryEnabled; } uint32_t getRootDeviceIndex() { return rootDeviceIndex; } void startControllingDirectSubmissions(); bool isAnyDirectSubmissionEnabled() { return this->isDirectSubmissionEnabled() || isBlitterDirectSubmissionEnabled(); } virtual bool initDirectSubmission(Device &device, OsContext &osContext) { return true; } virtual bool isDirectSubmissionEnabled() const { return false; } virtual bool isBlitterDirectSubmissionEnabled() const { return false; } virtual void stopDirectSubmission() {} bool isStaticWorkPartitioningEnabled() const { return staticWorkPartitioningEnabled; } uint64_t getWorkPartitionAllocationGpuAddress() const; MOCKABLE_VIRTUAL bool isRcs() const; virtual void initializeDefaultsForInternalEngine(){}; virtual GraphicsAllocation *getClearColorAllocation() = 0; virtual void postInitFlagsSetup() = 0; bool isUsedNotifyEnableForPostSync() const { return useNotifyEnableForPostSync; } NEO::StreamProperties &getStreamProperties() { return this->streamProperties; } inline void setActivePartitions(uint32_t newPartitionCount) { activePartitions = newPartitionCount; } inline uint32_t getActivePartitions() const { return activePartitions; } bool skipResourceCleanup() const; inline bool isProgramActivePartitionConfigRequired() const { return this->isDirectSubmissionEnabled() ? false : this->activePartitionsConfig != this->activePartitions; } std::unique_ptr pageTableManager; inline uint32_t getPostSyncWriteOffset() const { return postSyncWriteOffset; } inline bool isMultiTileOperationEnabled() const { return (activePartitions > 1) && staticWorkPartitioningEnabled; } virtual void programComputeBarrierCommand(LinearStream &cmdStream) = 0; virtual size_t getCmdsSizeForComputeBarrierCommand() const = 0; const HardwareInfo &peekHwInfo() const; MOCKABLE_VIRTUAL bool isGpuHangDetected() const; virtual uint64_t getCompletionAddress() { return 0; } virtual uint32_t getCompletionValue(const GraphicsAllocation &gfxAllocation) { return 0; } protected: void cleanupResources(); void printDeviceIndex(); void checkForNewResources(uint32_t submittedTaskCount, uint32_t allocationTaskCount, GraphicsAllocation &gfxAllocation); bool checkImplicitFlushForGpuIdle(); void downloadTagAllocation(); MOCKABLE_VIRTUAL std::unique_lock obtainHostPtrSurfaceCreationLock(); std::unique_ptr flushStamp; std::unique_ptr submissionAggregator; std::unique_ptr flatBatchBufferHelper; std::unique_ptr experimentalCmdBuffer; std::unique_ptr internalAllocationStorage; std::unique_ptr kmdNotifyHelper; std::unique_ptr scratchSpaceController; std::unique_ptr profilingTimeStampAllocator; std::unique_ptr perfCounterAllocator; std::unique_ptr timestampPacketAllocator; std::unique_ptr userPauseConfirmation; ResidencyContainer residencyAllocations; ResidencyContainer evictionAllocations; MutexType ownershipMutex; MutexType hostPtrSurfaceCreationMutex; ExecutionEnvironment &executionEnvironment; LinearStream commandStream; StreamProperties streamProperties{}; // offset for debug state is 1kbyte, tag writes can use multiple offsets for multiple partitions and each offset can vary per platform const uint64_t debugPauseStateAddressOffset = MemoryConstants::kiloByte; uint64_t totalMemoryUsed = 0u; volatile uint32_t *tagAddress = nullptr; volatile DebugPauseState *debugPauseStateAddress = nullptr; SpinLock debugPauseStateLock; static void *asyncDebugBreakConfirmation(void *arg); std::function debugConfirmationFunction = []() { std::cin.get(); }; std::function downloadAllocationImpl; GraphicsAllocation *tagAllocation = nullptr; GraphicsAllocation *globalFenceAllocation = nullptr; GraphicsAllocation *preemptionAllocation = nullptr; GraphicsAllocation *debugSurface = nullptr; GraphicsAllocation *perDssBackedBuffer = nullptr; GraphicsAllocation *clearColorAllocation = nullptr; GraphicsAllocation *workPartitionAllocation = nullptr; MultiGraphicsAllocation *tagsMultiAllocation = nullptr; IndirectHeap *indirectHeap[IndirectHeap::Type::NUM_TYPES]; OsContext *osContext = nullptr; // current taskLevel. Used for determining if a PIPE_CONTROL is needed. std::atomic taskLevel{0}; std::atomic latestSentTaskCount{0}; std::atomic latestFlushedTaskCount{0}; // taskCount - # of tasks submitted std::atomic taskCount{0}; DispatchMode dispatchMode = DispatchMode::ImmediateDispatch; SamplerCacheFlushState samplerCacheFlushRequired = SamplerCacheFlushState::samplerCacheFlushNotRequired; PreemptionMode lastPreemptionMode = PreemptionMode::Initial; std::chrono::microseconds gpuHangCheckPeriod{500'000}; uint32_t lastSentL3Config = 0; uint32_t latestSentStatelessMocsConfig = 0; uint64_t lastSentSliceCount = QueueSliceCount::defaultSliceCount; uint32_t requiredScratchSize = 0; uint32_t requiredPrivateScratchSize = 0; uint32_t lastAdditionalKernelExecInfo = AdditionalKernelExecInfo::NotSet; KernelExecutionType lastKernelExecutionType = KernelExecutionType::Default; MemoryCompressionState lastMemoryCompressionState = MemoryCompressionState::NotApplicable; uint32_t activePartitions = 1; uint32_t activePartitionsConfig = 1; uint32_t postSyncWriteOffset = 0; const uint32_t rootDeviceIndex; const DeviceBitfield deviceBitfield; int8_t lastMediaSamplerConfig = -1; bool isPreambleSent = false; bool isStateSipSent = false; bool isEnginePrologueSent = false; bool isPerDssBackedBufferSent = false; bool GSBAFor32BitProgrammed = false; bool GSBAStateDirty = true; bool bindingTableBaseAddressRequired = false; bool mediaVfeStateDirty = true; bool lastVmeSubslicesConfig = false; bool stallingCommandsOnNextFlushRequired = false; bool timestampPacketWriteEnabled = false; bool staticWorkPartitioningEnabled = false; bool nTo1SubmissionModelEnabled = false; bool lastSpecialPipelineSelectMode = false; bool requiresInstructionCacheFlush = false; bool localMemoryEnabled = false; bool pageTableManagerInitialized = false; bool useNewResourceImplicitFlush = false; bool newResources = false; bool useGpuIdleImplicitFlush = false; bool lastSentUseGlobalAtomics = false; bool useNotifyEnableForPostSync = false; }; typedef CommandStreamReceiver *(*CommandStreamReceiverCreateFunc)(bool withAubDump, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield); } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/command_stream_receiver_hw.h000066400000000000000000000230031422164147700306330ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/submission_status.h" #include "shared/source/command_stream/wait_status.h" #include "shared/source/direct_submission/direct_submission_hw.h" #include "shared/source/direct_submission/dispatchers/blitter_dispatcher.h" #include "shared/source/direct_submission/dispatchers/render_dispatcher.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/helpers/dirty_state_helpers.h" #include "shared/source/helpers/hw_info.h" #include "hw_cmds.h" namespace NEO { template class DeviceCommandStreamReceiver; struct PipeControlArgs; template class CommandStreamReceiverHw : public CommandStreamReceiver { typedef typename GfxFamily::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START; typedef typename GfxFamily::PIPE_CONTROL PIPE_CONTROL; public: static CommandStreamReceiver *create(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield) { return new CommandStreamReceiverHw(executionEnvironment, rootDeviceIndex, deviceBitfield); } CommandStreamReceiverHw(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield); ~CommandStreamReceiverHw() override; SubmissionStatus flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override; CompletionStamp flushTask(LinearStream &commandStream, size_t commandStreamStart, const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, uint32_t taskLevel, DispatchFlags &dispatchFlags, Device &device) override; void forcePipeControl(NEO::LinearStream &commandStreamCSR); bool flushBatchedSubmissions() override; void programHardwareContext(LinearStream &cmdStream) override; size_t getCmdsSizeForHardwareContext() const override; static void addBatchBufferEnd(LinearStream &commandStream, void **patchLocation); void programEndingCmd(LinearStream &commandStream, Device &device, void **patchLocation, bool directSubmissionEnabled); void addBatchBufferStart(MI_BATCH_BUFFER_START *commandBufferMemory, uint64_t startAddress, bool secondary); size_t getRequiredStateBaseAddressSize(const Device &device) const; size_t getRequiredCmdStreamSize(const DispatchFlags &dispatchFlags, Device &device); size_t getRequiredCmdStreamSizeAligned(const DispatchFlags &dispatchFlags, Device &device); size_t getRequiredCmdSizeForPreamble(Device &device) const; size_t getCmdSizeForPreemption(const DispatchFlags &dispatchFlags) const; size_t getCmdSizeForEpilogue(const DispatchFlags &dispatchFlags) const; size_t getCmdSizeForEpilogueCommands(const DispatchFlags &dispatchFlags) const; size_t getCmdSizeForL3Config() const; size_t getCmdSizeForPipelineSelect() const; size_t getCmdSizeForMediaSampler(bool mediaSamplerRequired) const; size_t getCmdSizeForEngineMode(const DispatchFlags &dispatchFlags) const; size_t getCmdSizeForPerDssBackedBuffer(const HardwareInfo &hwInfo); size_t getCmdSizeForActivePartitionConfig() const; size_t getCmdSizeForStallingCommands(const DispatchFlags &dispatchFlags) const; size_t getCmdSizeForStallingNoPostSyncCommands() const; size_t getCmdSizeForStallingPostSyncCommands() const; size_t getCmdSizeForComputeMode(); MOCKABLE_VIRTUAL bool hasSharedHandles(); bool isPipelineSelectAlreadyProgrammed() const; void programComputeMode(LinearStream &csr, DispatchFlags &dispatchFlags, const HardwareInfo &hwInfo); WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) override; void collectStateBaseAddresPatchInfo( uint64_t commandBufferAddress, uint64_t commandOffset, const LinearStream *dsh, const LinearStream *ioh, const LinearStream *ssh, uint64_t generalStateBase); void collectStateBaseAddresIohPatchInfo(uint64_t commandBufferAddress, uint64_t commandOffset, const LinearStream &ioh); void resetKmdNotifyHelper(KmdNotifyHelper *newHelper); CommandStreamReceiverType getType() override { return CommandStreamReceiverType::CSR_HW; } uint32_t flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override; void flushTagUpdate() override; void flushNonKernelTask(GraphicsAllocation *eventAlloc, uint64_t immediateGpuAddress, uint64_t immediateData, PipeControlArgs &args, bool isWaitOnEvent, bool isStartOfDispatch, bool isEndOfDispatch) override; void flushMiFlushDW(); void flushMiFlushDW(GraphicsAllocation *eventAlloc, uint64_t immediateGpuAddress, uint64_t immediateData); void flushPipeControl(); void flushPipeControl(GraphicsAllocation *eventAlloc, uint64_t immediateGpuAddress, uint64_t immediateData, PipeControlArgs &args); void flushSemaphoreWait(GraphicsAllocation *eventAlloc, uint64_t immediateGpuAddress, uint64_t immediateData, PipeControlArgs &args, bool isStartOfDispatch, bool isEndOfDispatch); void flushSmallTask(LinearStream &commandStreamTask, size_t commandStreamStartTask); void flushHandler(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency); bool isUpdateTagFromWaitEnabled() override; void updateTagFromWait() override; bool isMultiOsContextCapable() const override; MemoryCompressionState getMemoryCompressionState(bool auxTranslationRequired, const HardwareInfo &hwInfo) const override; bool isDirectSubmissionEnabled() const override { return directSubmission.get() != nullptr; } bool isBlitterDirectSubmissionEnabled() const override { return blitterDirectSubmission.get() != nullptr; } void stopDirectSubmission() override; virtual bool isKmdWaitModeActive() { return true; } bool initDirectSubmission(Device &device, OsContext &osContext) override; GraphicsAllocation *getClearColorAllocation() override; TagAllocatorBase *getTimestampPacketAllocator() override; void postInitFlagsSetup() override; void programActivePartitionConfig(LinearStream &csr); void programComputeBarrierCommand(LinearStream &cmdStream) override { programStallingNoPostSyncCommandsForBarrier(cmdStream); } size_t getCmdsSizeForComputeBarrierCommand() const override { return getCmdSizeForStallingNoPostSyncCommands(); } protected: void programPreemption(LinearStream &csr, DispatchFlags &dispatchFlags); void programL3(LinearStream &csr, uint32_t &newL3Config); void programPreamble(LinearStream &csr, Device &device, uint32_t &newL3Config); void programPipelineSelect(LinearStream &csr, PipelineSelectArgs &pipelineSelectArgs); void programAdditionalStateBaseAddress(LinearStream &csr, typename GfxFamily::STATE_BASE_ADDRESS &cmd, Device &device); void programEpilogue(LinearStream &csr, Device &device, void **batchBufferEndLocation, DispatchFlags &dispatchFlags); void programEpliogueCommands(LinearStream &csr, const DispatchFlags &dispatchFlags); void programMediaSampler(LinearStream &csr, DispatchFlags &dispatchFlags); void programPerDssBackedBuffer(LinearStream &scr, Device &device, DispatchFlags &dispatchFlags); void programStateSip(LinearStream &cmdStream, Device &device); void programVFEState(LinearStream &csr, DispatchFlags &dispatchFlags, uint32_t maxFrontEndThreads); void programStallingCommandsForBarrier(LinearStream &cmdStream, DispatchFlags &dispatchFlags); void programStallingNoPostSyncCommandsForBarrier(LinearStream &cmdStream); void programStallingPostSyncCommandsForBarrier(LinearStream &cmdStream, TagNodeBase &tagNode); void programEngineModeCommands(LinearStream &csr, const DispatchFlags &dispatchFlags); void programEngineModeEpliogue(LinearStream &csr, const DispatchFlags &dispatchFlags); void programActivePartitionConfigFlushTask(LinearStream &csr); void programEnginePrologue(LinearStream &csr); size_t getCmdSizeForPrologue() const; void addClearSLMWorkAround(typename GfxFamily::PIPE_CONTROL *pCmd); void addPipeControlBeforeStateSip(LinearStream &commandStream, Device &device); void addPipeControlBefore3dState(LinearStream &commandStream, DispatchFlags &dispatchFlags); size_t getSshHeapSize(); bool are4GbHeapsAvailable() const; uint64_t getScratchPatchAddress(); void createScratchSpaceController(); bool detectInitProgrammingFlagsRequired(const DispatchFlags &dispatchFlags) const; bool checkPlatformSupportsNewResourceImplicitFlush() const; bool checkPlatformSupportsGpuIdleImplicitFlush() const; void configurePostSyncWriteOffset(); void unregisterDirectSubmissionFromController(); HeapDirtyState dshState; HeapDirtyState iohState; HeapDirtyState sshState; CsrSizeRequestFlags csrSizeRequestFlags = {}; bool wasSubmittedToSingleSubdevice = false; std::unique_ptr>> directSubmission; std::unique_ptr>> blitterDirectSubmission; size_t cmdStreamStart = 0; }; } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/command_stream_receiver_hw_base.inl000066400000000000000000002157461422164147700322010ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/command_stream/experimental_command_buffer.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/command_stream/scratch_space_controller_base.h" #include "shared/source/command_stream/stream_properties.h" #include "shared/source/command_stream/wait_status.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/device/device.h" #include "shared/source/direct_submission/direct_submission_controller.h" #include "shared/source/direct_submission/direct_submission_hw.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/gmm_helper/page_table_mngr.h" #include "shared/source/helpers/blit_commands_helper.h" #include "shared/source/helpers/cache_policy.h" #include "shared/source/helpers/engine_node_helper.h" #include "shared/source/helpers/flat_batch_buffer_helper_hw.h" #include "shared/source/helpers/flush_stamp.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/pause_on_gpu_properties.h" #include "shared/source/helpers/preamble.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/helpers/state_base_address.h" #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/indirect_heap/indirect_heap.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/os_interface/os_context.h" #include "shared/source/utilities/tag_allocator.h" #include "command_stream_receiver_hw_ext.inl" namespace NEO { template CommandStreamReceiverHw::~CommandStreamReceiverHw() { this->unregisterDirectSubmissionFromController(); } template CommandStreamReceiverHw::CommandStreamReceiverHw(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield) : CommandStreamReceiver(executionEnvironment, rootDeviceIndex, deviceBitfield) { const auto &hwInfo = peekHwInfo(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); localMemoryEnabled = hwHelper.getEnableLocalMemory(hwInfo); resetKmdNotifyHelper(new KmdNotifyHelper(&hwInfo.capabilityTable.kmdNotifyProperties)); if (DebugManager.flags.FlattenBatchBufferForAUBDump.get() || DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) { flatBatchBufferHelper.reset(new FlatBatchBufferHelperHw(executionEnvironment)); } defaultSshSize = getSshHeapSize(); canUse4GbHeaps = are4GbHeapsAvailable(); timestampPacketWriteEnabled = hwHelper.timestampPacketWriteSupported(); if (DebugManager.flags.EnableTimestampPacket.get() != -1) { timestampPacketWriteEnabled = !!DebugManager.flags.EnableTimestampPacket.get(); } createScratchSpaceController(); configurePostSyncWriteOffset(); } template SubmissionStatus CommandStreamReceiverHw::flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) { return SubmissionStatus::SUCCESS; } template inline void CommandStreamReceiverHw::addBatchBufferEnd(LinearStream &commandStream, void **patchLocation) { using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END; auto pCmd = commandStream.getSpaceForCmd(); *pCmd = GfxFamily::cmdInitBatchBufferEnd; if (patchLocation) { *patchLocation = pCmd; } } template inline void CommandStreamReceiverHw::programEndingCmd(LinearStream &commandStream, Device &device, void **patchLocation, bool directSubmissionEnabled) { if (directSubmissionEnabled) { uint64_t startAddress = commandStream.getGraphicsAllocation()->getGpuAddress() + commandStream.getUsed(); if (DebugManager.flags.BatchBufferStartPrepatchingWaEnabled.get() == 0) { startAddress = 0; } *patchLocation = commandStream.getSpace(sizeof(MI_BATCH_BUFFER_START)); auto bbStart = reinterpret_cast(*patchLocation); MI_BATCH_BUFFER_START cmd = {}; addBatchBufferStart(&cmd, startAddress, false); *bbStart = cmd; } else { if (!EngineHelpers::isBcs(osContext->getEngineType())) { PreemptionHelper::programStateSipEndWa(commandStream, device); } this->addBatchBufferEnd(commandStream, patchLocation); } } template inline void CommandStreamReceiverHw::addBatchBufferStart(MI_BATCH_BUFFER_START *commandBufferMemory, uint64_t startAddress, bool secondary) { MI_BATCH_BUFFER_START cmd = GfxFamily::cmdInitBatchBufferStart; cmd.setBatchBufferStartAddress(startAddress); cmd.setAddressSpaceIndicator(MI_BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR_PPGTT); if (secondary) { cmd.setSecondLevelBatchBuffer(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH); } if (DebugManager.flags.FlattenBatchBufferForAUBDump.get()) { flatBatchBufferHelper->registerBatchBufferStartAddress(reinterpret_cast(commandBufferMemory), startAddress); } *commandBufferMemory = cmd; } template inline size_t CommandStreamReceiverHw::getRequiredCmdSizeForPreamble(Device &device) const { size_t size = 0; if (mediaVfeStateDirty) { size += PreambleHelper::getVFECommandsSize(); } if (!this->isPreambleSent) { size += PreambleHelper::getAdditionalCommandsSize(device); } if (!this->isPreambleSent) { if (DebugManager.flags.ForceSemaphoreDelayBetweenWaits.get() > -1) { size += PreambleHelper::getSemaphoreDelayCommandSize(); } } return size; } template void CommandStreamReceiverHw::programHardwareContext(LinearStream &cmdStream) { programEnginePrologue(cmdStream); } template size_t CommandStreamReceiverHw::getCmdsSizeForHardwareContext() const { return getCmdSizeForPrologue(); } template CompletionStamp CommandStreamReceiverHw::flushTask( LinearStream &commandStreamTask, size_t commandStreamStartTask, const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, uint32_t taskLevel, DispatchFlags &dispatchFlags, Device &device) { typedef typename GfxFamily::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START; typedef typename GfxFamily::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END; typedef typename GfxFamily::PIPE_CONTROL PIPE_CONTROL; typedef typename GfxFamily::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; DEBUG_BREAK_IF(&commandStreamTask == &commandStream); DEBUG_BREAK_IF(!(dispatchFlags.preemptionMode == PreemptionMode::Disabled ? device.getPreemptionMode() == PreemptionMode::Disabled : true)); DEBUG_BREAK_IF(taskLevel >= CompletionStamp::notReady); DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "taskLevel", taskLevel); auto levelClosed = false; bool implicitFlush = dispatchFlags.implicitFlush || dispatchFlags.blocking || DebugManager.flags.ForceImplicitFlush.get(); void *currentPipeControlForNooping = nullptr; void *epiloguePipeControlLocation = nullptr; bool csrFlush = this->wasSubmittedToSingleSubdevice != dispatchFlags.useSingleSubdevice; csrFlush |= DebugManager.flags.ForceCsrFlushing.get(); if (csrFlush) { flushBatchedSubmissions(); } if (detectInitProgrammingFlagsRequired(dispatchFlags)) { initProgrammingFlags(); } const auto &hwInfo = peekHwInfo(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); if (dispatchFlags.blocking || dispatchFlags.dcFlush || dispatchFlags.guardCommandBufferWithPipeControl) { if (this->dispatchMode == DispatchMode::ImmediateDispatch) { //for ImmediateDispatch we will send this right away, therefore this pipe control will close the level //for BatchedSubmissions it will be nooped and only last ppc in batch will be emitted. levelClosed = true; //if we guard with ppc, flush dc as well to speed up completion latency if (dispatchFlags.guardCommandBufferWithPipeControl) { const auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily); if (hwInfoConfig.isDcFlushAllowed()) { dispatchFlags.dcFlush = true; } } } epiloguePipeControlLocation = ptrOffset(commandStreamTask.getCpuBase(), commandStreamTask.getUsed()); if ((dispatchFlags.outOfOrderExecutionAllowed || timestampPacketWriteEnabled) && !dispatchFlags.dcFlush) { currentPipeControlForNooping = epiloguePipeControlLocation; } auto address = getTagAllocation()->getGpuAddress(); PipeControlArgs args; args.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(dispatchFlags.dcFlush, hwInfo); args.notifyEnable = isUsedNotifyEnableForPostSync(); args.tlbInvalidation |= dispatchFlags.memoryMigrationRequired; args.textureCacheInvalidationEnable |= dispatchFlags.textureCacheFlush; args.workloadPartitionOffset = isMultiTileOperationEnabled(); MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( commandStreamTask, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, address, taskCount + 1, hwInfo, args); DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "taskCount", peekTaskCount()); if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) { flatBatchBufferHelper->setPatchInfoData(PatchInfoData(address, 0u, PatchInfoAllocationType::TagAddress, commandStreamTask.getGraphicsAllocation()->getGpuAddress(), commandStreamTask.getUsed() - 2 * sizeof(uint64_t), PatchInfoAllocationType::Default)); flatBatchBufferHelper->setPatchInfoData(PatchInfoData(address, 0u, PatchInfoAllocationType::TagValue, commandStreamTask.getGraphicsAllocation()->getGpuAddress(), commandStreamTask.getUsed() - sizeof(uint64_t), PatchInfoAllocationType::Default)); } } this->latestSentTaskCount = taskCount + 1; if (DebugManager.flags.ForceSLML3Config.get()) { dispatchFlags.useSLM = true; } auto newL3Config = PreambleHelper::getL3Config(hwInfo, dispatchFlags.useSLM); auto isSpecialPipelineSelectModeChanged = PreambleHelper::isSpecialPipelineSelectModeChanged(lastSpecialPipelineSelectMode, dispatchFlags.pipelineSelectArgs.specialPipelineSelectMode, hwInfo); auto requiresCoherency = hwHelper.forceNonGpuCoherencyWA(dispatchFlags.requiresCoherency); this->streamProperties.stateComputeMode.setProperties(requiresCoherency, dispatchFlags.numGrfRequired, dispatchFlags.threadArbitrationPolicy, hwInfo); csrSizeRequestFlags.l3ConfigChanged = this->lastSentL3Config != newL3Config; csrSizeRequestFlags.preemptionRequestChanged = this->lastPreemptionMode != dispatchFlags.preemptionMode; csrSizeRequestFlags.mediaSamplerConfigChanged = this->lastMediaSamplerConfig != static_cast(dispatchFlags.pipelineSelectArgs.mediaSamplerRequired); csrSizeRequestFlags.specialPipelineSelectModeChanged = isSpecialPipelineSelectModeChanged; csrSizeRequestFlags.activePartitionsChanged = isProgramActivePartitionConfigRequired(); auto force32BitAllocations = getMemoryManager()->peekForce32BitAllocations(); bool stateBaseAddressDirty = false; bool checkVfeStateDirty = false; if (requiredScratchSize || requiredPrivateScratchSize) { scratchSpaceController->setRequiredScratchSpace(ssh->getCpuBase(), 0u, requiredScratchSize, requiredPrivateScratchSize, this->taskCount, *this->osContext, stateBaseAddressDirty, checkVfeStateDirty); if (checkVfeStateDirty) { setMediaVFEStateDirty(true); } if (scratchSpaceController->getScratchSpaceAllocation()) { makeResident(*scratchSpaceController->getScratchSpaceAllocation()); } if (scratchSpaceController->getPrivateScratchSpaceAllocation()) { makeResident(*scratchSpaceController->getPrivateScratchSpaceAllocation()); } } if (dispatchFlags.usePerDssBackedBuffer) { if (!perDssBackedBuffer) { createPerDssBackedBuffer(device); } makeResident(*perDssBackedBuffer); } if (dispatchFlags.additionalKernelExecInfo != AdditionalKernelExecInfo::NotApplicable && lastAdditionalKernelExecInfo != dispatchFlags.additionalKernelExecInfo) { setMediaVFEStateDirty(true); } if (dispatchFlags.kernelExecutionType != KernelExecutionType::NotApplicable && lastKernelExecutionType != dispatchFlags.kernelExecutionType) { setMediaVFEStateDirty(true); } auto &commandStreamCSR = this->getCS(getRequiredCmdStreamSizeAligned(dispatchFlags, device)); auto commandStreamStartCSR = commandStreamCSR.getUsed(); TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer(commandStreamCSR, dispatchFlags.csrDependencies); TimestampPacketHelper::programCsrDependenciesForForTaskCountContainer(commandStreamCSR, dispatchFlags.csrDependencies); programActivePartitionConfigFlushTask(commandStreamCSR); programEngineModeCommands(commandStreamCSR, dispatchFlags); if (pageTableManager.get() && !pageTableManagerInitialized) { pageTableManagerInitialized = pageTableManager->initPageTableManagerRegisters(this); } programHardwareContext(commandStreamCSR); programComputeMode(commandStreamCSR, dispatchFlags, hwInfo); programPipelineSelect(commandStreamCSR, dispatchFlags.pipelineSelectArgs); programL3(commandStreamCSR, newL3Config); programPreamble(commandStreamCSR, device, newL3Config); programMediaSampler(commandStreamCSR, dispatchFlags); addPipeControlBefore3dState(commandStreamCSR, dispatchFlags); programPerDssBackedBuffer(commandStreamCSR, device, dispatchFlags); stateBaseAddressDirty |= ((GSBAFor32BitProgrammed ^ dispatchFlags.gsba32BitRequired) && force32BitAllocations); programVFEState(commandStreamCSR, dispatchFlags, device.getDeviceInfo().maxFrontEndThreads); programPreemption(commandStreamCSR, dispatchFlags); if (stallingCommandsOnNextFlushRequired) { programStallingCommandsForBarrier(commandStreamCSR, dispatchFlags); } const bool hasDsh = hwInfo.capabilityTable.supportsImages; bool dshDirty = hasDsh ? dshState.updateAndCheck(dsh) : false; bool iohDirty = iohState.updateAndCheck(ioh); bool sshDirty = sshState.updateAndCheck(ssh); auto isStateBaseAddressDirty = dshDirty || iohDirty || sshDirty || stateBaseAddressDirty; auto mocsIndex = latestSentStatelessMocsConfig; if (dispatchFlags.l3CacheSettings != L3CachingSettings::NotApplicable) { auto l3On = dispatchFlags.l3CacheSettings != L3CachingSettings::l3CacheOff; auto l1On = dispatchFlags.l3CacheSettings == L3CachingSettings::l3AndL1On; mocsIndex = hwHelper.getMocsIndex(*device.getGmmHelper(), l3On, l1On); } if (mocsIndex != latestSentStatelessMocsConfig) { isStateBaseAddressDirty = true; latestSentStatelessMocsConfig = mocsIndex; } if ((isMultiOsContextCapable() || dispatchFlags.areMultipleSubDevicesInContext) && (dispatchFlags.useGlobalAtomics != lastSentUseGlobalAtomics)) { isStateBaseAddressDirty = true; lastSentUseGlobalAtomics = dispatchFlags.useGlobalAtomics; } bool sourceLevelDebuggerActive = device.getSourceLevelDebugger() != nullptr ? true : false; auto memoryCompressionState = lastMemoryCompressionState; if (dispatchFlags.memoryCompressionState != MemoryCompressionState::NotApplicable) { memoryCompressionState = dispatchFlags.memoryCompressionState; } if (memoryCompressionState != lastMemoryCompressionState) { isStateBaseAddressDirty = true; lastMemoryCompressionState = memoryCompressionState; } //Reprogram state base address if required if (isStateBaseAddressDirty || sourceLevelDebuggerActive) { EncodeWA::addPipeControlBeforeStateBaseAddress(commandStreamCSR, hwInfo, isRcs()); EncodeWA::encodeAdditionalPipelineSelect(commandStreamCSR, dispatchFlags.pipelineSelectArgs, true, hwInfo, isRcs()); uint64_t newGSHbase = 0; GSBAFor32BitProgrammed = false; if (is64bit && scratchSpaceController->getScratchSpaceAllocation() && !force32BitAllocations) { newGSHbase = scratchSpaceController->calculateNewGSH(); } else if (is64bit && force32BitAllocations && dispatchFlags.gsba32BitRequired) { bool useLocalMemory = scratchSpaceController->getScratchSpaceAllocation() ? scratchSpaceController->getScratchSpaceAllocation()->isAllocatedInLocalMemoryPool() : false; newGSHbase = getMemoryManager()->getExternalHeapBaseAddress(rootDeviceIndex, useLocalMemory); GSBAFor32BitProgrammed = true; } auto stateBaseAddressCmdOffset = commandStreamCSR.getUsed(); auto pCmd = static_cast(commandStreamCSR.getSpace(sizeof(STATE_BASE_ADDRESS))); STATE_BASE_ADDRESS cmd; auto instructionHeapBaseAddress = getMemoryManager()->getInternalHeapBaseAddress(rootDeviceIndex, getMemoryManager()->isLocalMemoryUsedForIsa(rootDeviceIndex)); StateBaseAddressHelper::programStateBaseAddress( &cmd, dsh, ioh, ssh, newGSHbase, true, mocsIndex, getMemoryManager()->getInternalHeapBaseAddress(rootDeviceIndex, ioh->getGraphicsAllocation()->isAllocatedInLocalMemoryPool()), instructionHeapBaseAddress, 0, true, false, device.getGmmHelper(), isMultiOsContextCapable(), memoryCompressionState, dispatchFlags.useGlobalAtomics, dispatchFlags.areMultipleSubDevicesInContext); *pCmd = cmd; programAdditionalStateBaseAddress(commandStreamCSR, cmd, device); if (sshDirty) { bindingTableBaseAddressRequired = true; } if (bindingTableBaseAddressRequired) { StateBaseAddressHelper::programBindingTableBaseAddress(commandStreamCSR, *ssh, device.getGmmHelper()); bindingTableBaseAddressRequired = false; } EncodeWA::encodeAdditionalPipelineSelect(commandStreamCSR, dispatchFlags.pipelineSelectArgs, false, hwInfo, isRcs()); addPipeControlBeforeStateSip(commandStreamCSR, device); programStateSip(commandStreamCSR, device); if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) { collectStateBaseAddresPatchInfo(commandStream.getGraphicsAllocation()->getGpuAddress(), stateBaseAddressCmdOffset, dsh, ioh, ssh, newGSHbase); } } DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "this->taskLevel", (uint32_t)this->taskLevel); if (executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo()->workaroundTable.flags.waSamplerCacheFlushBetweenRedescribedSurfaceReads) { if (this->samplerCacheFlushRequired != SamplerCacheFlushState::samplerCacheFlushNotRequired) { PipeControlArgs args; args.textureCacheInvalidationEnable = true; MemorySynchronizationCommands::addPipeControl(commandStreamCSR, args); if (this->samplerCacheFlushRequired == SamplerCacheFlushState::samplerCacheFlushBefore) { this->samplerCacheFlushRequired = SamplerCacheFlushState::samplerCacheFlushAfter; } else { this->samplerCacheFlushRequired = SamplerCacheFlushState::samplerCacheFlushNotRequired; } } } if (experimentalCmdBuffer.get() != nullptr) { size_t startingOffset = experimentalCmdBuffer->programExperimentalCommandBuffer(); experimentalCmdBuffer->injectBufferStart(commandStreamCSR, startingOffset); } if (requiresInstructionCacheFlush) { PipeControlArgs args; args.instructionCacheInvalidateEnable = true; MemorySynchronizationCommands::addPipeControl(commandStreamCSR, args); requiresInstructionCacheFlush = false; } // Add a Pipe Control if we have a dependency on a previous walker to avoid concurrency issues. if (taskLevel > this->taskLevel) { auto programPipeControl = !timestampPacketWriteEnabled; if (DebugManager.flags.ResolveDependenciesViaPipeControls.get() == 1) { programPipeControl = true; } if (programPipeControl) { PipeControlArgs args; MemorySynchronizationCommands::addPipeControl(commandStreamCSR, args); } this->taskLevel = taskLevel; DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "this->taskCount", peekTaskCount()); } if (DebugManager.flags.ForcePipeControlPriorToWalker.get()) { forcePipeControl(commandStreamCSR); } if (hasDsh) { auto dshAllocation = dsh->getGraphicsAllocation(); this->makeResident(*dshAllocation); dshAllocation->setEvictable(false); } auto iohAllocation = ioh->getGraphicsAllocation(); auto sshAllocation = ssh->getGraphicsAllocation(); this->makeResident(*iohAllocation); this->makeResident(*sshAllocation); iohAllocation->setEvictable(false); this->makeResident(*tagAllocation); if (globalFenceAllocation) { makeResident(*globalFenceAllocation); } if (preemptionAllocation) { makeResident(*preemptionAllocation); } if (dispatchFlags.preemptionMode == PreemptionMode::MidThread || sourceLevelDebuggerActive) { makeResident(*SipKernel::getSipKernel(device).getSipAllocation()); if (debugSurface) { makeResident(*debugSurface); } } if (experimentalCmdBuffer.get() != nullptr) { experimentalCmdBuffer->makeResidentAllocations(); } if (workPartitionAllocation) { makeResident(*workPartitionAllocation); } // If the CSR has work in its CS, flush it before the task bool submitTask = commandStreamStartTask != commandStreamTask.getUsed(); bool submitCSR = (commandStreamStartCSR != commandStreamCSR.getUsed()) || this->isMultiOsContextCapable(); bool submitCommandStreamFromCsr = false; void *bbEndLocation = nullptr; auto bbEndPaddingSize = this->dispatchMode == DispatchMode::ImmediateDispatch ? 0 : sizeof(MI_BATCH_BUFFER_START) - sizeof(MI_BATCH_BUFFER_END); size_t chainedBatchBufferStartOffset = 0; GraphicsAllocation *chainedBatchBuffer = nullptr; bool directSubmissionEnabled = isDirectSubmissionEnabled(); if (submitTask) { programEndingCmd(commandStreamTask, device, &bbEndLocation, directSubmissionEnabled); EncodeNoop::emitNoop(commandStreamTask, bbEndPaddingSize); EncodeNoop::alignToCacheLine(commandStreamTask); if (submitCSR) { chainedBatchBufferStartOffset = commandStreamCSR.getUsed(); chainedBatchBuffer = commandStreamTask.getGraphicsAllocation(); // Add MI_BATCH_BUFFER_START to chain from CSR -> Task auto pBBS = reinterpret_cast(commandStreamCSR.getSpace(sizeof(MI_BATCH_BUFFER_START))); addBatchBufferStart(pBBS, ptrOffset(commandStreamTask.getGraphicsAllocation()->getGpuAddress(), commandStreamStartTask), false); if (DebugManager.flags.FlattenBatchBufferForAUBDump.get()) { flatBatchBufferHelper->registerCommandChunk(commandStreamTask.getGraphicsAllocation()->getGpuAddress(), reinterpret_cast(commandStreamTask.getCpuBase()), commandStreamStartTask, static_cast(ptrDiff(bbEndLocation, commandStreamTask.getGraphicsAllocation()->getGpuAddress())) + sizeof(MI_BATCH_BUFFER_START)); } auto commandStreamAllocation = commandStreamTask.getGraphicsAllocation(); DEBUG_BREAK_IF(commandStreamAllocation == nullptr); this->makeResident(*commandStreamAllocation); EncodeNoop::alignToCacheLine(commandStreamCSR); submitCommandStreamFromCsr = true; } else if (dispatchFlags.epilogueRequired) { this->makeResident(*commandStreamCSR.getGraphicsAllocation()); } this->programEpilogue(commandStreamCSR, device, &bbEndLocation, dispatchFlags); } else if (submitCSR) { programEndingCmd(commandStreamCSR, device, &bbEndLocation, directSubmissionEnabled); EncodeNoop::emitNoop(commandStreamCSR, bbEndPaddingSize); EncodeNoop::alignToCacheLine(commandStreamCSR); DEBUG_BREAK_IF(commandStreamCSR.getUsed() > commandStreamCSR.getMaxAvailableSpace()); submitCommandStreamFromCsr = true; } size_t startOffset = submitCommandStreamFromCsr ? commandStreamStartCSR : commandStreamStartTask; auto &streamToSubmit = submitCommandStreamFromCsr ? commandStreamCSR : commandStreamTask; BatchBuffer batchBuffer{streamToSubmit.getGraphicsAllocation(), startOffset, chainedBatchBufferStartOffset, chainedBatchBuffer, dispatchFlags.requiresCoherency, dispatchFlags.lowPriority, dispatchFlags.throttle, dispatchFlags.sliceCount, streamToSubmit.getUsed(), &streamToSubmit, bbEndLocation, dispatchFlags.useSingleSubdevice}; streamToSubmit.getGraphicsAllocation()->updateTaskCount(this->taskCount + 1, this->osContext->getContextId()); streamToSubmit.getGraphicsAllocation()->updateResidencyTaskCount(this->taskCount + 1, this->osContext->getContextId()); if (submitCSR | submitTask) { if (this->dispatchMode == DispatchMode::ImmediateDispatch) { flushHandler(batchBuffer, this->getResidencyAllocations()); if (dispatchFlags.blocking || dispatchFlags.dcFlush || dispatchFlags.guardCommandBufferWithPipeControl) { this->latestFlushedTaskCount = this->taskCount + 1; } } else { auto commandBuffer = new CommandBuffer(device); commandBuffer->batchBuffer = batchBuffer; commandBuffer->surfaces.swap(this->getResidencyAllocations()); commandBuffer->batchBufferEndLocation = bbEndLocation; commandBuffer->taskCount = this->taskCount + 1; commandBuffer->flushStamp->replaceStampObject(dispatchFlags.flushStampReference); commandBuffer->pipeControlThatMayBeErasedLocation = currentPipeControlForNooping; commandBuffer->epiloguePipeControlLocation = epiloguePipeControlLocation; this->submissionAggregator->recordCommandBuffer(commandBuffer); } } else { this->makeSurfacePackNonResident(this->getResidencyAllocations()); } this->wasSubmittedToSingleSubdevice = dispatchFlags.useSingleSubdevice; //check if we are not over the budget, if we are do implicit flush if (getMemoryManager()->isMemoryBudgetExhausted()) { if (this->totalMemoryUsed >= device.getDeviceInfo().globalMemSize / 4) { implicitFlush = true; } } if (DebugManager.flags.PerformImplicitFlushEveryEnqueueCount.get() != -1) { if ((taskCount + 1) % DebugManager.flags.PerformImplicitFlushEveryEnqueueCount.get() == 0) { implicitFlush = true; } } if (this->newResources) { implicitFlush = true; this->newResources = false; } implicitFlush |= checkImplicitFlushForGpuIdle(); if (this->dispatchMode == DispatchMode::BatchedDispatch && implicitFlush) { this->flushBatchedSubmissions(); } ++taskCount; DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "taskCount", peekTaskCount()); DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "Current taskCount:", tagAddress ? *tagAddress : 0); CompletionStamp completionStamp = { taskCount, this->taskLevel, flushStamp->peekStamp()}; this->taskLevel += levelClosed ? 1 : 0; return completionStamp; } template void CommandStreamReceiverHw::forcePipeControl(NEO::LinearStream &commandStreamCSR) { PipeControlArgs args; MemorySynchronizationCommands::addPipeControlWithCSStallOnly(commandStreamCSR); MemorySynchronizationCommands::addPipeControl(commandStreamCSR, args); } template void CommandStreamReceiverHw::programComputeMode(LinearStream &stream, DispatchFlags &dispatchFlags, const HardwareInfo &hwInfo) { if (this->streamProperties.stateComputeMode.isDirty()) { EncodeComputeMode::programComputeModeCommandWithSynchronization( stream, this->streamProperties.stateComputeMode, dispatchFlags.pipelineSelectArgs, hasSharedHandles(), hwInfo, isRcs()); } } template inline void CommandStreamReceiverHw::programStallingCommandsForBarrier(LinearStream &cmdStream, DispatchFlags &dispatchFlags) { stallingCommandsOnNextFlushRequired = false; auto barrierTimestampPacketNodes = dispatchFlags.barrierTimestampPacketNodes; if (barrierTimestampPacketNodes && barrierTimestampPacketNodes->peekNodes().size() != 0) { programStallingPostSyncCommandsForBarrier(cmdStream, *barrierTimestampPacketNodes->peekNodes()[0]); barrierTimestampPacketNodes->makeResident(*this); } else { programStallingNoPostSyncCommandsForBarrier(cmdStream); } } template inline bool CommandStreamReceiverHw::flushBatchedSubmissions() { if (this->dispatchMode == DispatchMode::ImmediateDispatch) { return true; } typedef typename GfxFamily::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START; typedef typename GfxFamily::PIPE_CONTROL PIPE_CONTROL; std::unique_lock lockGuard(ownershipMutex); bool submitResult = true; auto &commandBufferList = this->submissionAggregator->peekCmdBufferList(); if (!commandBufferList.peekIsEmpty()) { const auto totalMemoryBudget = static_cast(commandBufferList.peekHead()->device.getDeviceInfo().globalMemSize / 2); ResidencyContainer surfacesForSubmit; ResourcePackage resourcePackage; const auto &hwInfo = peekHwInfo(); auto pipeControlLocationSize = MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo); void *currentPipeControlForNooping = nullptr; void *epiloguePipeControlLocation = nullptr; while (!commandBufferList.peekIsEmpty()) { size_t totalUsedSize = 0u; this->submissionAggregator->aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, osContext->getContextId()); auto primaryCmdBuffer = commandBufferList.removeFrontOne(); auto nextCommandBuffer = commandBufferList.peekHead(); auto currentBBendLocation = primaryCmdBuffer->batchBufferEndLocation; auto lastTaskCount = primaryCmdBuffer->taskCount; FlushStampUpdateHelper flushStampUpdateHelper; flushStampUpdateHelper.insert(primaryCmdBuffer->flushStamp->getStampReference()); currentPipeControlForNooping = primaryCmdBuffer->pipeControlThatMayBeErasedLocation; epiloguePipeControlLocation = primaryCmdBuffer->epiloguePipeControlLocation; if (DebugManager.flags.FlattenBatchBufferForAUBDump.get()) { flatBatchBufferHelper->registerCommandChunk(primaryCmdBuffer.get()->batchBuffer, sizeof(MI_BATCH_BUFFER_START)); } while (nextCommandBuffer && nextCommandBuffer->inspectionId == primaryCmdBuffer->inspectionId) { //noop pipe control if (currentPipeControlForNooping) { if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) { flatBatchBufferHelper->removePipeControlData(pipeControlLocationSize, currentPipeControlForNooping, hwInfo); } memset(currentPipeControlForNooping, 0, pipeControlLocationSize); } //obtain next candidate for nooping currentPipeControlForNooping = nextCommandBuffer->pipeControlThatMayBeErasedLocation; //track epilogue pipe control epiloguePipeControlLocation = nextCommandBuffer->epiloguePipeControlLocation; flushStampUpdateHelper.insert(nextCommandBuffer->flushStamp->getStampReference()); auto nextCommandBufferAddress = nextCommandBuffer->batchBuffer.commandBufferAllocation->getGpuAddress(); auto offsetedCommandBuffer = (uint64_t)ptrOffset(nextCommandBufferAddress, nextCommandBuffer->batchBuffer.startOffset); auto cpuAddressForCommandBufferDestination = ptrOffset(nextCommandBuffer->batchBuffer.commandBufferAllocation->getUnderlyingBuffer(), nextCommandBuffer->batchBuffer.startOffset); auto cpuAddressForCurrentCommandBufferEndingSection = alignUp(ptrOffset(currentBBendLocation, sizeof(MI_BATCH_BUFFER_START)), MemoryConstants::cacheLineSize); //if we point to exact same command buffer, then batch buffer start is not needed at all if (cpuAddressForCurrentCommandBufferEndingSection == cpuAddressForCommandBufferDestination) { memset(currentBBendLocation, 0u, ptrDiff(cpuAddressForCurrentCommandBufferEndingSection, currentBBendLocation)); } else { addBatchBufferStart((MI_BATCH_BUFFER_START *)currentBBendLocation, offsetedCommandBuffer, false); } if (DebugManager.flags.FlattenBatchBufferForAUBDump.get()) { flatBatchBufferHelper->registerCommandChunk(nextCommandBuffer->batchBuffer, sizeof(MI_BATCH_BUFFER_START)); } currentBBendLocation = nextCommandBuffer->batchBufferEndLocation; lastTaskCount = nextCommandBuffer->taskCount; nextCommandBuffer = nextCommandBuffer->next; commandBufferList.removeFrontOne(); } surfacesForSubmit.reserve(resourcePackage.size() + 1); for (auto &surface : resourcePackage) { surfacesForSubmit.push_back(surface); } //make sure we flush DC if needed if (epiloguePipeControlLocation && MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo)) { auto emitDcFlush = true; if (DebugManager.flags.DisableDcFlushInEpilogue.get()) { emitDcFlush = false; } ((PIPE_CONTROL *)epiloguePipeControlLocation)->setDcFlushEnable(emitDcFlush); } primaryCmdBuffer->batchBuffer.endCmdPtr = currentBBendLocation; if (this->flush(primaryCmdBuffer->batchBuffer, surfacesForSubmit) != SubmissionStatus::SUCCESS) { submitResult = false; break; } //after flush task level is closed this->taskLevel++; flushStampUpdateHelper.updateAll(flushStamp->peekStamp()); if (!isUpdateTagFromWaitEnabled()) { this->latestFlushedTaskCount = lastTaskCount; } this->makeSurfacePackNonResident(surfacesForSubmit); resourcePackage.clear(); } this->totalMemoryUsed = 0; } return submitResult; } template size_t CommandStreamReceiverHw::getRequiredCmdStreamSizeAligned(const DispatchFlags &dispatchFlags, Device &device) { size_t size = getRequiredCmdStreamSize(dispatchFlags, device); return alignUp(size, MemoryConstants::cacheLineSize); } template size_t CommandStreamReceiverHw::getRequiredCmdStreamSize(const DispatchFlags &dispatchFlags, Device &device) { size_t size = getRequiredCmdSizeForPreamble(device); size += getRequiredStateBaseAddressSize(device); if (!this->isStateSipSent || device.isDebuggerActive()) { size += PreemptionHelper::getRequiredStateSipCmdSize(device, isRcs()); } size += MemorySynchronizationCommands::getSizeForSinglePipeControl(); size += sizeof(typename GfxFamily::MI_BATCH_BUFFER_START); size += getCmdSizeForL3Config(); if (this->streamProperties.stateComputeMode.isDirty()) { size += getCmdSizeForComputeMode(); } size += getCmdSizeForMediaSampler(dispatchFlags.pipelineSelectArgs.mediaSamplerRequired); size += getCmdSizeForPipelineSelect(); size += getCmdSizeForPreemption(dispatchFlags); if (dispatchFlags.usePerDssBackedBuffer && !isPerDssBackedBufferSent) { size += getCmdSizeForPerDssBackedBuffer(device.getHardwareInfo()); } size += getCmdSizeForEpilogue(dispatchFlags); size += getCmdsSizeForHardwareContext(); if (csrSizeRequestFlags.activePartitionsChanged) { size += getCmdSizeForActivePartitionConfig(); } if (executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo()->workaroundTable.flags.waSamplerCacheFlushBetweenRedescribedSurfaceReads) { if (this->samplerCacheFlushRequired != SamplerCacheFlushState::samplerCacheFlushNotRequired) { size += sizeof(typename GfxFamily::PIPE_CONTROL); } } if (experimentalCmdBuffer.get() != nullptr) { size += experimentalCmdBuffer->getRequiredInjectionSize(); } size += TimestampPacketHelper::getRequiredCmdStreamSize(dispatchFlags.csrDependencies); size += TimestampPacketHelper::getRequiredCmdStreamSizeForTaskCountContainer(dispatchFlags.csrDependencies); if (stallingCommandsOnNextFlushRequired) { size += getCmdSizeForStallingCommands(dispatchFlags); } if (requiresInstructionCacheFlush) { size += sizeof(typename GfxFamily::PIPE_CONTROL); } if (DebugManager.flags.ForcePipeControlPriorToWalker.get()) { size += 2 * sizeof(PIPE_CONTROL); } return size; } template inline size_t CommandStreamReceiverHw::getCmdSizeForPipelineSelect() const { size_t size = 0; if ((csrSizeRequestFlags.mediaSamplerConfigChanged || csrSizeRequestFlags.specialPipelineSelectModeChanged || !isPreambleSent) && !isPipelineSelectAlreadyProgrammed()) { size += PreambleHelper::getCmdSizeForPipelineSelect(peekHwInfo()); } return size; } template inline WaitStatus CommandStreamReceiverHw::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) { const auto params = kmdNotifyHelper->obtainTimeoutParams(useQuickKmdSleep, *getTagAddress(), taskCountToWait, flushStampToWait, throttle, this->isKmdWaitModeActive(), this->isAnyDirectSubmissionEnabled()); PRINT_DEBUG_STRING(DebugManager.flags.LogWaitingForCompletion.get(), stdout, "\nWaiting for task count %u at location %p. Current value: %u\n", taskCountToWait, getTagAddress(), *getTagAddress()); auto status = waitForCompletionWithTimeout(params, taskCountToWait); if (status == WaitStatus::NotReady) { waitForFlushStamp(flushStampToWait); //now call blocking wait, this is to ensure that task count is reached status = waitForCompletionWithTimeout(WaitParams{false, false, 0}, taskCountToWait); } // If GPU hang occured, then propagate it to the caller. if (status == WaitStatus::GpuHang) { return status; } UNRECOVERABLE_IF(*getTagAddress() < taskCountToWait); if (kmdNotifyHelper->quickKmdSleepForSporadicWaitsEnabled()) { kmdNotifyHelper->updateLastWaitForCompletionTimestamp(); } PRINT_DEBUG_STRING(DebugManager.flags.LogWaitingForCompletion.get(), stdout, "\nWaiting completed. Current value: %u\n", *getTagAddress()); return WaitStatus::Ready; } template inline void CommandStreamReceiverHw::programPreemption(LinearStream &csr, DispatchFlags &dispatchFlags) { PreemptionHelper::programCmdStream(csr, dispatchFlags.preemptionMode, this->lastPreemptionMode, preemptionAllocation); this->lastPreemptionMode = dispatchFlags.preemptionMode; } template inline size_t CommandStreamReceiverHw::getCmdSizeForPreemption(const DispatchFlags &dispatchFlags) const { return PreemptionHelper::getRequiredCmdStreamSize(dispatchFlags.preemptionMode, this->lastPreemptionMode); } template inline void CommandStreamReceiverHw::programStateSip(LinearStream &cmdStream, Device &device) { if (!this->isStateSipSent || device.isDebuggerActive()) { PreemptionHelper::programStateSip(cmdStream, device); this->isStateSipSent = true; } } template inline void CommandStreamReceiverHw::programPreamble(LinearStream &csr, Device &device, uint32_t &newL3Config) { if (!this->isPreambleSent) { PreambleHelper::programPreamble(&csr, device, newL3Config, this->preemptionAllocation); this->isPreambleSent = true; this->lastSentL3Config = newL3Config; } } template inline void CommandStreamReceiverHw::programVFEState(LinearStream &csr, DispatchFlags &dispatchFlags, uint32_t maxFrontEndThreads) { if (mediaVfeStateDirty) { if (dispatchFlags.additionalKernelExecInfo != AdditionalKernelExecInfo::NotApplicable) { lastAdditionalKernelExecInfo = dispatchFlags.additionalKernelExecInfo; } if (dispatchFlags.kernelExecutionType != KernelExecutionType::NotApplicable) { lastKernelExecutionType = dispatchFlags.kernelExecutionType; } auto &hwInfo = peekHwInfo(); auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily); const auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily); auto engineGroupType = hwHelper.getEngineGroupType(getOsContext().getEngineType(), getOsContext().getEngineUsage(), hwInfo); auto pVfeState = PreambleHelper::getSpaceForVfeState(&csr, hwInfo, engineGroupType); auto disableOverdispatch = hwInfoConfig.isDisableOverdispatchAvailable(hwInfo) && (dispatchFlags.additionalKernelExecInfo != AdditionalKernelExecInfo::NotSet); streamProperties.frontEndState.setProperties(lastKernelExecutionType == KernelExecutionType::Concurrent, dispatchFlags.disableEUFusion, disableOverdispatch, osContext->isEngineInstanced(), hwInfo); PreambleHelper::programVfeState( pVfeState, hwInfo, requiredScratchSize, getScratchPatchAddress(), maxFrontEndThreads, streamProperties); auto commandOffset = PreambleHelper::getScratchSpaceAddressOffsetForVfeState(&csr, pVfeState); if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) { flatBatchBufferHelper->collectScratchSpacePatchInfo(getScratchPatchAddress(), commandOffset, csr); } setMediaVFEStateDirty(false); } } template void CommandStreamReceiverHw::programMediaSampler(LinearStream &commandStream, DispatchFlags &dispatchFlags) { } template size_t CommandStreamReceiverHw::getCmdSizeForMediaSampler(bool mediaSamplerRequired) const { return 0; } template void CommandStreamReceiverHw::collectStateBaseAddresPatchInfo( uint64_t baseAddress, uint64_t commandOffset, const LinearStream *dsh, const LinearStream *ioh, const LinearStream *ssh, uint64_t generalStateBase) { typedef typename GfxFamily::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; if constexpr (GfxFamily::supportsSampler) { PatchInfoData dynamicStatePatchInfo = {dsh->getGraphicsAllocation()->getGpuAddress(), 0u, PatchInfoAllocationType::DynamicStateHeap, baseAddress, commandOffset + STATE_BASE_ADDRESS::PATCH_CONSTANTS::DYNAMICSTATEBASEADDRESS_BYTEOFFSET, PatchInfoAllocationType::Default}; flatBatchBufferHelper->setPatchInfoData(dynamicStatePatchInfo); } PatchInfoData generalStatePatchInfo = {generalStateBase, 0u, PatchInfoAllocationType::GeneralStateHeap, baseAddress, commandOffset + STATE_BASE_ADDRESS::PATCH_CONSTANTS::GENERALSTATEBASEADDRESS_BYTEOFFSET, PatchInfoAllocationType::Default}; PatchInfoData surfaceStatePatchInfo = {ssh->getGraphicsAllocation()->getGpuAddress(), 0u, PatchInfoAllocationType::SurfaceStateHeap, baseAddress, commandOffset + STATE_BASE_ADDRESS::PATCH_CONSTANTS::SURFACESTATEBASEADDRESS_BYTEOFFSET, PatchInfoAllocationType::Default}; flatBatchBufferHelper->setPatchInfoData(generalStatePatchInfo); flatBatchBufferHelper->setPatchInfoData(surfaceStatePatchInfo); collectStateBaseAddresIohPatchInfo(baseAddress, commandOffset, *ioh); } template void CommandStreamReceiverHw::resetKmdNotifyHelper(KmdNotifyHelper *newHelper) { kmdNotifyHelper.reset(newHelper); kmdNotifyHelper->updateAcLineStatus(); if (kmdNotifyHelper->quickKmdSleepForSporadicWaitsEnabled()) { kmdNotifyHelper->updateLastWaitForCompletionTimestamp(); } } template void CommandStreamReceiverHw::addClearSLMWorkAround(typename GfxFamily::PIPE_CONTROL *pCmd) { } template uint64_t CommandStreamReceiverHw::getScratchPatchAddress() { return scratchSpaceController->getScratchPatchAddress(); } template bool CommandStreamReceiverHw::detectInitProgrammingFlagsRequired(const DispatchFlags &dispatchFlags) const { return DebugManager.flags.ForceCsrReprogramming.get(); } template inline void CommandStreamReceiverHw::unregisterDirectSubmissionFromController() { auto directSubmissionController = executionEnvironment.directSubmissionController.get(); if (directSubmissionController) { directSubmissionController->unregisterDirectSubmission(this); } } template uint32_t CommandStreamReceiverHw::flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) { using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END; using MI_FLUSH_DW = typename GfxFamily::MI_FLUSH_DW; auto lock = obtainUniqueOwnership(); bool blitterDirectSubmission = this->isBlitterDirectSubmissionEnabled(); auto debugPauseEnabled = PauseOnGpuProperties::featureEnabled(DebugManager.flags.PauseOnBlitCopy.get()); auto &commandStream = getCS(BlitCommandsHelper::estimateBlitCommandsSize(blitPropertiesContainer, profilingEnabled, debugPauseEnabled, blitterDirectSubmission, *this->executionEnvironment.rootDeviceEnvironments[this->rootDeviceIndex])); auto commandStreamStart = commandStream.getUsed(); auto newTaskCount = taskCount + 1; latestSentTaskCount = newTaskCount; getOsContext().ensureContextInitialized(); this->initDirectSubmission(device, getOsContext()); const auto &hwInfo = this->peekHwInfo(); if (PauseOnGpuProperties::pauseModeAllowed(DebugManager.flags.PauseOnBlitCopy.get(), taskCount, PauseOnGpuProperties::PauseMode::BeforeWorkload)) { BlitCommandsHelper::dispatchDebugPauseCommands(commandStream, getDebugPauseStateGPUAddress(), DebugPauseState::waitingForUserStartConfirmation, DebugPauseState::hasUserStartConfirmation, hwInfo); } programEnginePrologue(commandStream); if (pageTableManager.get() && !pageTableManagerInitialized) { pageTableManagerInitialized = pageTableManager->initPageTableManagerRegisters(this); } for (auto &blitProperties : blitPropertiesContainer) { TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer(commandStream, blitProperties.csrDependencies); TimestampPacketHelper::programCsrDependenciesForForTaskCountContainer(commandStream, blitProperties.csrDependencies); if (blitProperties.outputTimestampPacket && profilingEnabled) { BlitCommandsHelper::encodeProfilingStartMmios(commandStream, *blitProperties.outputTimestampPacket); } BlitCommandsHelper::dispatchBlitCommands(blitProperties, commandStream, *this->executionEnvironment.rootDeviceEnvironments[this->rootDeviceIndex]); if (blitProperties.outputTimestampPacket) { if (profilingEnabled) { MiFlushArgs args; EncodeMiFlushDW::programMiFlushDw(commandStream, 0llu, newTaskCount, args, hwInfo); BlitCommandsHelper::encodeProfilingEndMmios(commandStream, *blitProperties.outputTimestampPacket); } else { auto timestampPacketGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(*blitProperties.outputTimestampPacket); MiFlushArgs args; args.commandWithPostSync = true; EncodeMiFlushDW::programMiFlushDw(commandStream, timestampPacketGpuAddress, 0, args, hwInfo); } makeResident(*blitProperties.outputTimestampPacket->getBaseGraphicsAllocation()); } blitProperties.csrDependencies.makeResident(*this); makeResident(*blitProperties.srcAllocation); makeResident(*blitProperties.dstAllocation); if (blitProperties.clearColorAllocation) { makeResident(*blitProperties.clearColorAllocation); } } BlitCommandsHelper::programGlobalSequencerFlush(commandStream); auto updateTag = !isUpdateTagFromWaitEnabled(); updateTag |= blocking; if (updateTag) { MemorySynchronizationCommands::addAdditionalSynchronization(commandStream, tagAllocation->getGpuAddress(), false, peekHwInfo()); MiFlushArgs args; args.commandWithPostSync = true; args.notifyEnable = isUsedNotifyEnableForPostSync(); EncodeMiFlushDW::programMiFlushDw(commandStream, tagAllocation->getGpuAddress(), newTaskCount, args, hwInfo); MemorySynchronizationCommands::addAdditionalSynchronization(commandStream, tagAllocation->getGpuAddress(), false, peekHwInfo()); } if (PauseOnGpuProperties::pauseModeAllowed(DebugManager.flags.PauseOnBlitCopy.get(), taskCount, PauseOnGpuProperties::PauseMode::AfterWorkload)) { BlitCommandsHelper::dispatchDebugPauseCommands(commandStream, getDebugPauseStateGPUAddress(), DebugPauseState::waitingForUserEndConfirmation, DebugPauseState::hasUserEndConfirmation, hwInfo); } void *endingCmdPtr = nullptr; programEndingCmd(commandStream, device, &endingCmdPtr, blitterDirectSubmission); EncodeNoop::alignToCacheLine(commandStream); makeResident(*tagAllocation); if (globalFenceAllocation) { makeResident(*globalFenceAllocation); } BatchBuffer batchBuffer{commandStream.getGraphicsAllocation(), commandStreamStart, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, commandStream.getUsed(), &commandStream, endingCmdPtr, false}; commandStream.getGraphicsAllocation()->updateTaskCount(newTaskCount, this->osContext->getContextId()); commandStream.getGraphicsAllocation()->updateResidencyTaskCount(newTaskCount, this->osContext->getContextId()); flush(batchBuffer, getResidencyAllocations()); makeSurfacePackNonResident(getResidencyAllocations()); if (updateTag) { latestFlushedTaskCount = newTaskCount; } taskCount = newTaskCount; auto flushStampToWait = flushStamp->peekStamp(); lock.unlock(); if (blocking) { waitForTaskCountWithKmdNotifyFallback(newTaskCount, flushStampToWait, false, QueueThrottle::MEDIUM); internalAllocationStorage->cleanAllocationList(newTaskCount, TEMPORARY_ALLOCATION); } return newTaskCount; } template inline void CommandStreamReceiverHw::flushTagUpdate() { if (this->osContext != nullptr) { if (EngineHelpers::isBcs(this->osContext->getEngineType())) { this->flushMiFlushDW(); } else { this->flushPipeControl(); } } } template void CommandStreamReceiverHw::flushNonKernelTask(GraphicsAllocation *eventAlloc, uint64_t immediateGpuAddress, uint64_t immediateData, PipeControlArgs &args, bool isWaitOnEvent, bool isStartOfDispatch, bool isEndOfDispatch) { if (isWaitOnEvent) { this->flushSemaphoreWait(eventAlloc, immediateGpuAddress, immediateData, args, isStartOfDispatch, isEndOfDispatch); } else { if (EngineHelpers::isBcs(this->osContext->getEngineType())) { this->flushMiFlushDW(eventAlloc, immediateGpuAddress, immediateData); } else { this->flushPipeControl(eventAlloc, immediateGpuAddress, immediateData, args); } } } template inline void CommandStreamReceiverHw::flushMiFlushDW() { auto lock = obtainUniqueOwnership(); auto &commandStream = getCS(EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite()); auto commandStreamStart = commandStream.getUsed(); const auto &hwInfo = this->peekHwInfo(); MiFlushArgs args; args.commandWithPostSync = true; args.notifyEnable = isUsedNotifyEnableForPostSync(); EncodeMiFlushDW::programMiFlushDw(commandStream, tagAllocation->getGpuAddress(), taskCount + 1, args, hwInfo); makeResident(*tagAllocation); this->flushSmallTask(commandStream, commandStreamStart); this->latestFlushedTaskCount = taskCount.load(); } template void CommandStreamReceiverHw::flushMiFlushDW(GraphicsAllocation *eventAlloc, uint64_t immediateGpuAddress, uint64_t immediateData) { auto lock = obtainUniqueOwnership(); auto &commandStream = getCS(EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite()); auto commandStreamStart = commandStream.getUsed(); programHardwareContext(commandStream); const auto &hwInfo = this->peekHwInfo(); MiFlushArgs args; if (eventAlloc) { args.commandWithPostSync = true; EncodeMiFlushDW::programMiFlushDw(commandStream, immediateGpuAddress, immediateData, args, hwInfo); makeResident(*eventAlloc); } else { EncodeMiFlushDW::programMiFlushDw(commandStream, 0, 0, args, hwInfo); } this->flushSmallTask(commandStream, commandStreamStart); } template void CommandStreamReceiverHw::flushPipeControl() { using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; auto lock = obtainUniqueOwnership(); const auto &hwInfo = peekHwInfo(); auto &commandStream = getCS(MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo)); auto commandStreamStart = commandStream.getUsed(); PipeControlArgs args; args.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo); args.notifyEnable = isUsedNotifyEnableForPostSync(); args.workloadPartitionOffset = isMultiTileOperationEnabled(); MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation(commandStream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, getTagAllocation()->getGpuAddress(), taskCount + 1, hwInfo, args); makeResident(*tagAllocation); this->flushSmallTask(commandStream, commandStreamStart); this->latestFlushedTaskCount = taskCount.load(); } template void CommandStreamReceiverHw::flushPipeControl(GraphicsAllocation *eventAlloc, uint64_t immediateGpuAddress, uint64_t immediateData, PipeControlArgs &args) { using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; auto lock = obtainUniqueOwnership(); auto &commandStream = getCS(MemorySynchronizationCommands::getSizeForSinglePipeControl()); auto commandStreamStart = commandStream.getUsed(); programHardwareContext(commandStream); const auto &hwInfo = peekHwInfo(); if (eventAlloc) { MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation(commandStream, PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, immediateGpuAddress, immediateData, hwInfo, args); makeResident(*eventAlloc); } else { NEO::PipeControlArgs args; NEO::MemorySynchronizationCommands::addPipeControl(commandStream, args); } this->flushSmallTask(commandStream, commandStreamStart); } template void CommandStreamReceiverHw::flushSemaphoreWait(GraphicsAllocation *eventAlloc, uint64_t immediateGpuAddress, uint64_t immediateData, PipeControlArgs &args, bool isStartOfDispatch, bool isEndOfDispatch) { auto lock = obtainUniqueOwnership(); const auto &hwInfo = this->peekHwInfo(); if (isStartOfDispatch && args.dcFlushEnable) { if (this->osContext->getEngineType() == aub_stream::ENGINE_BCS) { LinearStream &commandStream = getCS(EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite()); cmdStreamStart = commandStream.getUsed(); MiFlushArgs args; EncodeMiFlushDW::programMiFlushDw(commandStream, 0, 0, args, hwInfo); } else { LinearStream &commandStream = getCS(MemorySynchronizationCommands::getSizeForSinglePipeControl()); cmdStreamStart = commandStream.getUsed(); NEO::MemorySynchronizationCommands::addPipeControl(commandStream, args); } } using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT; using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION; LinearStream &commandStream = getCS(NEO::EncodeSempahore::getSizeMiSemaphoreWait()); if (isStartOfDispatch && !args.dcFlushEnable) { cmdStreamStart = commandStream.getUsed(); } programHardwareContext(commandStream); NEO::EncodeSempahore::addMiSemaphoreWaitCommand(commandStream, immediateGpuAddress, static_cast(immediateData), MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD); makeResident(*eventAlloc); if (isEndOfDispatch) { this->flushSmallTask(commandStream, cmdStreamStart); cmdStreamStart = 0; } } template void CommandStreamReceiverHw::flushSmallTask(LinearStream &commandStreamTask, size_t commandStreamStartTask) { using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START; using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END; void *endingCmdPtr = nullptr; if (isAnyDirectSubmissionEnabled()) { endingCmdPtr = commandStreamTask.getSpace(0); EncodeBatchBufferStartOrEnd::programBatchBufferStart(&commandStreamTask, 0ull, false); } else { auto batchBufferEnd = reinterpret_cast(commandStreamTask.getSpace(sizeof(MI_BATCH_BUFFER_END))); *batchBufferEnd = GfxFamily::cmdInitBatchBufferEnd; } auto bytesToPad = sizeof(MI_BATCH_BUFFER_START) - sizeof(MI_BATCH_BUFFER_END); EncodeNoop::emitNoop(commandStreamTask, bytesToPad); EncodeNoop::alignToCacheLine(commandStreamTask); if (globalFenceAllocation) { makeResident(*globalFenceAllocation); } BatchBuffer batchBuffer{commandStreamTask.getGraphicsAllocation(), commandStreamStartTask, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, commandStreamTask.getUsed(), &commandStreamTask, endingCmdPtr, false}; this->latestSentTaskCount = taskCount + 1; flushHandler(batchBuffer, getResidencyAllocations()); taskCount++; } template inline void CommandStreamReceiverHw::flushHandler(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) { flush(batchBuffer, allocationsForResidency); makeSurfacePackNonResident(allocationsForResidency); } template inline bool CommandStreamReceiverHw::isUpdateTagFromWaitEnabled() { auto &hwHelper = HwHelper::get(peekHwInfo().platform.eRenderCoreFamily); auto enabled = hwHelper.isUpdateTaskCountFromWaitSupported(); enabled &= this->isAnyDirectSubmissionEnabled(); switch (DebugManager.flags.UpdateTaskCountFromWait.get()) { case 0: enabled = false; break; case 1: enabled = this->isDirectSubmissionEnabled(); break; case 2: enabled = this->isAnyDirectSubmissionEnabled(); break; case 3: enabled = true; break; } return enabled; } template inline void CommandStreamReceiverHw::updateTagFromWait() { if (isUpdateTagFromWaitEnabled()) { flushTagUpdate(); } } template inline void CommandStreamReceiverHw::programAdditionalStateBaseAddress(LinearStream &csr, typename GfxFamily::STATE_BASE_ADDRESS &cmd, Device &device) {} template inline MemoryCompressionState CommandStreamReceiverHw::getMemoryCompressionState(bool auxTranslationRequired, const HardwareInfo &hwInfo) const { return MemoryCompressionState::NotApplicable; } template inline bool CommandStreamReceiverHw::isPipelineSelectAlreadyProgrammed() const { const auto &hwInfoConfig = *HwInfoConfig::get(peekHwInfo().platform.eProductFamily); return this->streamProperties.stateComputeMode.isDirty() && hwInfoConfig.is3DPipelineSelectWARequired() && isRcs(); } template inline void CommandStreamReceiverHw::programEpilogue(LinearStream &csr, Device &device, void **batchBufferEndLocation, DispatchFlags &dispatchFlags) { if (dispatchFlags.epilogueRequired) { auto currentOffset = ptrDiff(csr.getSpace(0u), csr.getCpuBase()); auto gpuAddress = ptrOffset(csr.getGraphicsAllocation()->getGpuAddress(), currentOffset); addBatchBufferStart(reinterpret_cast(*batchBufferEndLocation), gpuAddress, false); this->programEpliogueCommands(csr, dispatchFlags); programEndingCmd(csr, device, batchBufferEndLocation, isDirectSubmissionEnabled()); EncodeNoop::alignToCacheLine(csr); } } template inline size_t CommandStreamReceiverHw::getCmdSizeForEpilogue(const DispatchFlags &dispatchFlags) const { if (dispatchFlags.epilogueRequired) { size_t terminateCmd = sizeof(typename GfxFamily::MI_BATCH_BUFFER_END); if (isDirectSubmissionEnabled()) { terminateCmd = sizeof(typename GfxFamily::MI_BATCH_BUFFER_START); } auto size = getCmdSizeForEpilogueCommands(dispatchFlags) + terminateCmd; return alignUp(size, MemoryConstants::cacheLineSize); } return 0u; } template inline void CommandStreamReceiverHw::programEnginePrologue(LinearStream &csr) { } template inline size_t CommandStreamReceiverHw::getCmdSizeForPrologue() const { return 0u; } template inline void CommandStreamReceiverHw::stopDirectSubmission() { if (EngineHelpers::isBcs(this->osContext->getEngineType())) { this->blitterDirectSubmission->stopRingBuffer(); } else { this->directSubmission->stopRingBuffer(); } } template inline bool CommandStreamReceiverHw::initDirectSubmission(Device &device, OsContext &osContext) { bool ret = true; bool submitOnInit = false; auto startDirect = osContext.isDirectSubmissionAvailable(device.getHardwareInfo(), submitOnInit); if (startDirect) { auto lock = this->obtainUniqueOwnership(); if (!this->isAnyDirectSubmissionEnabled()) { if (EngineHelpers::isBcs(osContext.getEngineType())) { blitterDirectSubmission = DirectSubmissionHw>::create(device, osContext); ret = blitterDirectSubmission->initialize(submitOnInit, this->isUsedNotifyEnableForPostSync()); } else { directSubmission = DirectSubmissionHw>::create(device, osContext); ret = directSubmission->initialize(submitOnInit, this->isUsedNotifyEnableForPostSync()); } auto directSubmissionController = executionEnvironment.initializeDirectSubmissionController(); if (directSubmissionController) { directSubmissionController->registerDirectSubmission(this); } if (this->isUpdateTagFromWaitEnabled()) { this->overrideDispatchPolicy(DispatchMode::ImmediateDispatch); } } osContext.setDirectSubmissionActive(); } return ret; } template TagAllocatorBase *CommandStreamReceiverHw::getTimestampPacketAllocator() { if (timestampPacketAllocator.get() == nullptr) { auto &hwHelper = HwHelper::get(peekHwInfo().platform.eRenderCoreFamily); const std::vector rootDeviceIndices = {rootDeviceIndex}; timestampPacketAllocator = hwHelper.createTimestampPacketAllocator(rootDeviceIndices, getMemoryManager(), getPreferredTagPoolSize(), getType(), osContext->getDeviceBitfield()); } return timestampPacketAllocator.get(); } template void CommandStreamReceiverHw::postInitFlagsSetup() { useNewResourceImplicitFlush = checkPlatformSupportsNewResourceImplicitFlush(); int32_t overrideNewResourceImplicitFlush = DebugManager.flags.PerformImplicitFlushForNewResource.get(); if (overrideNewResourceImplicitFlush != -1) { useNewResourceImplicitFlush = overrideNewResourceImplicitFlush == 0 ? false : true; } useGpuIdleImplicitFlush = checkPlatformSupportsGpuIdleImplicitFlush(); int32_t overrideGpuIdleImplicitFlush = DebugManager.flags.PerformImplicitFlushForIdleGpu.get(); if (overrideGpuIdleImplicitFlush != -1) { useGpuIdleImplicitFlush = overrideGpuIdleImplicitFlush == 0 ? false : true; } } template size_t CommandStreamReceiverHw::getCmdSizeForStallingCommands(const DispatchFlags &dispatchFlags) const { auto barrierTimestampPacketNodes = dispatchFlags.barrierTimestampPacketNodes; if (barrierTimestampPacketNodes && barrierTimestampPacketNodes->peekNodes().size() > 0) { return getCmdSizeForStallingPostSyncCommands(); } else { return getCmdSizeForStallingNoPostSyncCommands(); } } template inline void CommandStreamReceiverHw::programActivePartitionConfigFlushTask(LinearStream &csr) { if (csrSizeRequestFlags.activePartitionsChanged) { programActivePartitionConfig(csr); } } template bool CommandStreamReceiverHw::hasSharedHandles() { if (!csrSizeRequestFlags.hasSharedHandles) { for (const auto &allocation : this->getResidencyAllocations()) { if (allocation->peekSharedHandle()) { csrSizeRequestFlags.hasSharedHandles = true; break; } } } return csrSizeRequestFlags.hasSharedHandles; } template size_t CommandStreamReceiverHw::getCmdSizeForComputeMode() { return EncodeComputeMode::getCmdSizeForComputeMode(this->peekHwInfo(), hasSharedHandles(), isRcs()); } } // namespace NEO command_stream_receiver_hw_bdw_and_later.inl000066400000000000000000000155441422164147700337670ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/command_stream/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver_hw_base.inl" #include "shared/source/helpers/address_patch.h" #include "hw_cmds.h" namespace NEO { template size_t CommandStreamReceiverHw::getSshHeapSize() { return getDefaultHeapSize(); } template bool CommandStreamReceiverHw::are4GbHeapsAvailable() const { return true; } template inline void CommandStreamReceiverHw::programL3(LinearStream &csr, uint32_t &newL3Config) { typedef typename GfxFamily::PIPE_CONTROL PIPE_CONTROL; if (csrSizeRequestFlags.l3ConfigChanged && this->isPreambleSent) { // Add a PIPE_CONTROL w/ CS_stall auto pCmd = (PIPE_CONTROL *)csr.getSpace(sizeof(PIPE_CONTROL)); PIPE_CONTROL cmd = GfxFamily::cmdInitPipeControl; cmd.setCommandStreamerStallEnable(true); cmd.setDcFlushEnable(true); addClearSLMWorkAround(&cmd); *pCmd = cmd; PreambleHelper::programL3(&csr, newL3Config); this->lastSentL3Config = newL3Config; } } template size_t CommandStreamReceiverHw::getRequiredStateBaseAddressSize(const Device &device) const { using PIPELINE_SELECT = typename GfxFamily::PIPELINE_SELECT; size_t size = 0; const auto &hwInfoConfig = *HwInfoConfig::get(peekHwInfo().platform.eProductFamily); if (hwInfoConfig.is3DPipelineSelectWARequired()) { size += (2 * PreambleHelper::getCmdSizeForPipelineSelect(peekHwInfo())); } size += sizeof(typename GfxFamily::STATE_BASE_ADDRESS) + sizeof(PIPE_CONTROL); return size; } template inline size_t CommandStreamReceiverHw::getCmdSizeForL3Config() const { if (!this->isPreambleSent) { return sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM); } else if (csrSizeRequestFlags.l3ConfigChanged) { return sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM) + sizeof(typename GfxFamily::PIPE_CONTROL); } return 0; } template void CommandStreamReceiverHw::programPipelineSelect(LinearStream &commandStream, PipelineSelectArgs &pipelineSelectArgs) { if (csrSizeRequestFlags.mediaSamplerConfigChanged || !isPreambleSent) { if (!isPipelineSelectAlreadyProgrammed()) { PreambleHelper::programPipelineSelect(&commandStream, pipelineSelectArgs, peekHwInfo()); } this->lastMediaSamplerConfig = pipelineSelectArgs.mediaSamplerRequired; } } template void CommandStreamReceiverHw::createScratchSpaceController() { scratchSpaceController = std::make_unique(rootDeviceIndex, executionEnvironment, *internalAllocationStorage.get()); } template void CommandStreamReceiverHw::programEpliogueCommands(LinearStream &csr, const DispatchFlags &dispatchFlags) { this->programEngineModeEpliogue(csr, dispatchFlags); } template size_t CommandStreamReceiverHw::getCmdSizeForEpilogueCommands(const DispatchFlags &dispatchFlags) const { return this->getCmdSizeForEngineMode(dispatchFlags); } template bool CommandStreamReceiverHw::isMultiOsContextCapable() const { return false; } template inline void CommandStreamReceiverHw::addPipeControlBeforeStateSip(LinearStream &commandStream, Device &device) {} template inline void CommandStreamReceiverHw::addPipeControlBefore3dState(LinearStream &commandStream, DispatchFlags &dispatchFlags) {} template bool CommandStreamReceiverHw::checkPlatformSupportsNewResourceImplicitFlush() const { return false; } template bool CommandStreamReceiverHw::checkPlatformSupportsGpuIdleImplicitFlush() const { return false; } template GraphicsAllocation *CommandStreamReceiverHw::getClearColorAllocation() { return nullptr; } template void CommandStreamReceiverHw::programPerDssBackedBuffer(LinearStream &commandStream, Device &device, DispatchFlags &dispatchFlags) { } template size_t CommandStreamReceiverHw::getCmdSizeForPerDssBackedBuffer(const HardwareInfo &hwInfo) { return 0; } template void CommandStreamReceiverHw::collectStateBaseAddresIohPatchInfo(uint64_t commandBufferAddress, uint64_t commandOffset, const LinearStream &ioh) { using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS; PatchInfoData indirectObjectPatchInfo = {ioh.getGraphicsAllocation()->getGpuAddress(), 0u, PatchInfoAllocationType::IndirectObjectHeap, commandBufferAddress, commandOffset + STATE_BASE_ADDRESS::PATCH_CONSTANTS::INDIRECTOBJECTBASEADDRESS_BYTEOFFSET, PatchInfoAllocationType::Default}; flatBatchBufferHelper->setPatchInfoData(indirectObjectPatchInfo); } template size_t CommandStreamReceiverHw::getCmdSizeForActivePartitionConfig() const { return 0; } template inline void CommandStreamReceiverHw::programActivePartitionConfig(LinearStream &csr) { } template inline size_t CommandStreamReceiverHw::getCmdSizeForStallingNoPostSyncCommands() const { return sizeof(typename GfxFamily::PIPE_CONTROL); } template inline size_t CommandStreamReceiverHw::getCmdSizeForStallingPostSyncCommands() const { return MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(peekHwInfo()); } template inline void CommandStreamReceiverHw::programStallingNoPostSyncCommandsForBarrier(LinearStream &cmdStream) { PipeControlArgs args; MemorySynchronizationCommands::addPipeControl(cmdStream, args); } template inline void CommandStreamReceiverHw::programStallingPostSyncCommandsForBarrier(LinearStream &cmdStream, TagNodeBase &tagNode) { auto barrierTimestampPacketGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(tagNode); const auto &hwInfo = peekHwInfo(); PipeControlArgs args; args.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo); MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( cmdStream, PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, barrierTimestampPacketGpuAddress, 0, hwInfo, args); } template inline void CommandStreamReceiverHw::configurePostSyncWriteOffset() { } } // namespace NEO command_stream_receiver_hw_dg2_and_later.inl000066400000000000000000000053211422164147700336570ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/command_stream/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/command_stream/device_command_stream.h" #include "shared/source/helpers/ray_tracing_helper.h" #include "shared/source/os_interface/hw_info_config.h" namespace NEO { using _3DSTATE_BTD = typename Family::_3DSTATE_BTD; using _3DSTATE_BTD_BODY = typename Family::_3DSTATE_BTD_BODY; using PIPE_CONTROL = typename Family::PIPE_CONTROL; template <> void CommandStreamReceiverHw::programPerDssBackedBuffer(LinearStream &commandStream, Device &device, DispatchFlags &dispatchFlags) { if (dispatchFlags.usePerDssBackedBuffer && !isPerDssBackedBufferSent) { DEBUG_BREAK_IF(perDssBackedBuffer == nullptr); auto _3dStateBtd = commandStream.getSpaceForCmd<_3DSTATE_BTD>(); _3DSTATE_BTD cmd = Family::cmd3dStateBtd; cmd.getBtdStateBody().setPerDssMemoryBackedBufferSize(static_cast<_3DSTATE_BTD_BODY::PER_DSS_MEMORY_BACKED_BUFFER_SIZE>(RayTracingHelper::getMemoryBackedFifoSizeToPatch())); cmd.getBtdStateBody().setMemoryBackedBufferBasePointer(perDssBackedBuffer->getGpuAddress()); *_3dStateBtd = cmd; isPerDssBackedBufferSent = true; } } template <> size_t CommandStreamReceiverHw::getCmdSizeForPerDssBackedBuffer(const HardwareInfo &hwInfo) { size_t size = sizeof(_3DSTATE_BTD); auto hwInfoConfig = HwInfoConfig::get(hwInfo.platform.eProductFamily); const auto &[isBasicWARequired, isExtendedWARequired] = hwInfoConfig->isPipeControlPriorToNonPipelinedStateCommandsWARequired(hwInfo, isRcs()); std::ignore = isBasicWARequired; if (isExtendedWARequired) { size += sizeof(typename Family::PIPE_CONTROL); } return size; } template inline void CommandStreamReceiverHw::addPipeControlBefore3dState(LinearStream &commandStream, DispatchFlags &dispatchFlags) { auto &hwInfo = peekHwInfo(); auto hwInfoConfig = HwInfoConfig::get(hwInfo.platform.eProductFamily); const auto &[isBasicWARequired, isExtendedWARequired] = hwInfoConfig->isPipeControlPriorToNonPipelinedStateCommandsWARequired(hwInfo, isRcs()); std::ignore = isBasicWARequired; PipeControlArgs args; args.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo); if (isExtendedWARequired && dispatchFlags.usePerDssBackedBuffer && !isPerDssBackedBufferSent) { DEBUG_BREAK_IF(perDssBackedBuffer == nullptr); NEO::EncodeWA::addPipeControlPriorToNonPipelinedStateCommand(commandStream, args, hwInfo, isRcs()); } } } // namespace NEO command_stream_receiver_hw_xehp_and_later.inl000066400000000000000000000234111422164147700341470ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/command_stream/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/implicit_scaling.h" #include "shared/source/command_stream/command_stream_receiver_hw_base.inl" #include "shared/source/command_stream/device_command_stream.h" #include "shared/source/command_stream/scratch_space_controller_xehp_and_later.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/helpers/preamble.h" #include "shared/source/kernel/grf_config.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/os_interface/os_interface.h" namespace NEO { template size_t CommandStreamReceiverHw::getSshHeapSize() { return 2 * MB; } template bool CommandStreamReceiverHw::are4GbHeapsAvailable() const { return is64bit; } template void CommandStreamReceiverHw::programL3(LinearStream &csr, uint32_t &newL3Config) {} template size_t CommandStreamReceiverHw::getRequiredStateBaseAddressSize(const Device &device) const { size_t size = sizeof(typename GfxFamily::STATE_BASE_ADDRESS); size += sizeof(typename GfxFamily::_3DSTATE_BINDING_TABLE_POOL_ALLOC); size += sizeof(PIPE_CONTROL); auto &hwInfo = *device.getRootDeviceEnvironment().getHardwareInfo(); auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily); if (hwInfoConfig.isAdditionalStateBaseAddressWARequired(hwInfo)) { size += sizeof(typename GfxFamily::STATE_BASE_ADDRESS); } return size; } template size_t CommandStreamReceiverHw::getCmdSizeForL3Config() const { return 0; } template void CommandStreamReceiverHw::programPipelineSelect(LinearStream &commandStream, PipelineSelectArgs &pipelineSelectArgs) { if (csrSizeRequestFlags.mediaSamplerConfigChanged || csrSizeRequestFlags.specialPipelineSelectModeChanged || !isPreambleSent) { PreambleHelper::programPipelineSelect(&commandStream, pipelineSelectArgs, peekHwInfo()); this->lastMediaSamplerConfig = pipelineSelectArgs.mediaSamplerRequired; this->lastSpecialPipelineSelectMode = pipelineSelectArgs.specialPipelineSelectMode; } } template void CommandStreamReceiverHw::createScratchSpaceController() { scratchSpaceController = std::make_unique(this->rootDeviceIndex, executionEnvironment, *internalAllocationStorage.get()); } template void CommandStreamReceiverHw::programEpliogueCommands(LinearStream &csr, const DispatchFlags &dispatchFlags) { this->programEngineModeEpliogue(csr, dispatchFlags); } template size_t CommandStreamReceiverHw::getCmdSizeForEpilogueCommands(const DispatchFlags &dispatchFlags) const { return this->getCmdSizeForEngineMode(dispatchFlags); } template bool CommandStreamReceiverHw::isMultiOsContextCapable() const { return deviceBitfield.count() > 1u; } template class ImplicitFlushSettings { public: static bool &getSettingForNewResource() { return defaultSettingForNewResource; } static bool &getSettingForGpuIdle() { return defaultSettingForGpuIdle; } private: static bool defaultSettingForNewResource; static bool defaultSettingForGpuIdle; }; template bool CommandStreamReceiverHw::checkPlatformSupportsNewResourceImplicitFlush() const { if (this->isMultiOsContextCapable()) { return false; } return ImplicitFlushSettings::getSettingForNewResource() ? getOSInterface()->newResourceImplicitFlush : false; } template bool CommandStreamReceiverHw::checkPlatformSupportsGpuIdleImplicitFlush() const { if (this->isMultiOsContextCapable() && !this->osContext->isDirectSubmissionActive()) { return false; } return ImplicitFlushSettings::getSettingForGpuIdle() ? getOSInterface()->gpuIdleImplicitFlush : false; } template GraphicsAllocation *CommandStreamReceiverHw::getClearColorAllocation() { return nullptr; } template void CommandStreamReceiverHw::collectStateBaseAddresIohPatchInfo(uint64_t commandBufferAddress, uint64_t commandOffset, const LinearStream &ioh) { } template inline size_t CommandStreamReceiverHw::getCmdSizeForActivePartitionConfig() const { if (this->staticWorkPartitioningEnabled) { return ImplicitScalingDispatch::getRegisterConfigurationSize(); } return 0; } template inline void CommandStreamReceiverHw::programActivePartitionConfig(LinearStream &csr) { if (this->staticWorkPartitioningEnabled) { uint64_t workPartitionAddress = getWorkPartitionAllocationGpuAddress(); ImplicitScalingDispatch::dispatchRegisterConfiguration(csr, workPartitionAddress, this->postSyncWriteOffset); } this->activePartitionsConfig = this->activePartitions; } template inline void CommandStreamReceiverHw::addPipeControlBeforeStateSip(LinearStream &commandStream, Device &device) { auto &hwInfo = peekHwInfo(); HwHelper &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); auto hwInfoConfig = HwInfoConfig::get(hwInfo.platform.eProductFamily); bool debuggingEnabled = device.getDebugger() != nullptr; PipeControlArgs args; args.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo); const auto &[isBasicWARequired, isExtendedWARequired] = hwInfoConfig->isPipeControlPriorToNonPipelinedStateCommandsWARequired(hwInfo, isRcs()); std::ignore = isExtendedWARequired; if (isBasicWARequired && debuggingEnabled && !hwHelper.isSipWANeeded(hwInfo)) { NEO::EncodeWA::addPipeControlPriorToNonPipelinedStateCommand(commandStream, args, hwInfo, isRcs()); } } template inline size_t CommandStreamReceiverHw::getCmdSizeForStallingNoPostSyncCommands() const { if (isMultiTileOperationEnabled()) { return ImplicitScalingDispatch::getBarrierSize(peekHwInfo(), false, false); } else { return sizeof(typename GfxFamily::PIPE_CONTROL); } } template inline size_t CommandStreamReceiverHw::getCmdSizeForStallingPostSyncCommands() const { if (isMultiTileOperationEnabled()) { return ImplicitScalingDispatch::getBarrierSize(peekHwInfo(), false, true); } else { return MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(peekHwInfo()); } } template inline void CommandStreamReceiverHw::programStallingNoPostSyncCommandsForBarrier(LinearStream &cmdStream) { const auto &hwInfo = peekHwInfo(); PipeControlArgs args; if (isMultiTileOperationEnabled()) { ImplicitScalingDispatch::dispatchBarrierCommands(cmdStream, this->deviceBitfield, args, hwInfo, 0, 0, false, false); } else { MemorySynchronizationCommands::addPipeControl(cmdStream, args); } } template inline void CommandStreamReceiverHw::programStallingPostSyncCommandsForBarrier(LinearStream &cmdStream, TagNodeBase &tagNode) { auto barrierTimestampPacketGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(tagNode); const auto &hwInfo = peekHwInfo(); PipeControlArgs args; args.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo); if (isMultiTileOperationEnabled()) { args.workloadPartitionOffset = true; ImplicitScalingDispatch::dispatchBarrierCommands(cmdStream, this->deviceBitfield, args, hwInfo, barrierTimestampPacketGpuAddress, 0, false, false); tagNode.setPacketsUsed(this->activePartitions); } else { MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( cmdStream, PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, barrierTimestampPacketGpuAddress, 0, hwInfo, args); } } template inline void CommandStreamReceiverHw::configurePostSyncWriteOffset() { this->postSyncWriteOffset = ImplicitScalingDispatch::getPostSyncOffset(); } } // namespace NEO command_stream_receiver_simulated_common_hw.h000066400000000000000000000070651422164147700342050ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/command_stream/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/memory_manager/memory_banks.h" #include "aub_mapper.h" #include "third_party/aub_stream/headers/hardware_context.h" namespace aub_stream { class AubManager; struct AubStream; } // namespace aub_stream namespace NEO { class AddressMapper; class GraphicsAllocation; class HardwareContextController; template class CommandStreamReceiverSimulatedCommonHw : public CommandStreamReceiverHw { protected: using CommandStreamReceiverHw::osContext; using AUB = typename AUBFamilyMapper::AUB; using MiContextDescriptorReg = typename AUB::MiContextDescriptorReg; bool getParametersForWriteMemory(GraphicsAllocation &graphicsAllocation, uint64_t &gpuAddress, void *&cpuAddress, size_t &size) const; void freeEngineInfo(AddressMapper >tRemap); MOCKABLE_VIRTUAL uint32_t getDeviceIndex() const; public: CommandStreamReceiverSimulatedCommonHw(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield); ~CommandStreamReceiverSimulatedCommonHw() override; uint64_t getGTTBits() const { return 0u; } void initGlobalMMIO(); void initAdditionalMMIO(); uint64_t getPPGTTAdditionalBits(GraphicsAllocation *gfxAllocation); void getGTTData(void *memory, AubGTTData &data); uint32_t getMemoryBankForGtt() const; static const AubMemDump::LrcaHelper &getCsTraits(aub_stream::EngineType engineType); void initEngineMMIO(); void submitLRCA(const MiContextDescriptorReg &contextDescriptor); void setupContext(OsContext &osContext) override; virtual bool expectMemoryEqual(void *gfxAddress, const void *srcAddress, size_t length); virtual bool expectMemoryNotEqual(void *gfxAddress, const void *srcAddress, size_t length); virtual bool expectMemoryCompressed(void *gfxAddress, const void *srcAddress, size_t length); virtual void pollForCompletionImpl(){}; virtual bool writeMemory(GraphicsAllocation &gfxAllocation) = 0; virtual void writeMemory(uint64_t gpuAddress, void *cpuAddress, size_t size, uint32_t memoryBank, uint64_t entryBits) = 0; virtual void writeMemoryWithAubManager(GraphicsAllocation &graphicsAllocation) = 0; virtual void writeMMIO(uint32_t offset, uint32_t value) = 0; virtual void setAubWritable(bool writable, GraphicsAllocation &graphicsAllocation) = 0; virtual bool isAubWritable(GraphicsAllocation &graphicsAllocation) const = 0; virtual void setTbxWritable(bool writable, GraphicsAllocation &graphicsAllocation) = 0; virtual bool isTbxWritable(GraphicsAllocation &graphicsAllocation) const = 0; virtual void dumpAllocation(GraphicsAllocation &gfxAllocation) = 0; virtual void initializeEngine() = 0; void makeNonResident(GraphicsAllocation &gfxAllocation) override; size_t getPreferredTagPoolSize() const override { return 1; } aub_stream::AubManager *aubManager = nullptr; std::unique_ptr hardwareContextController; struct EngineInfo { void *pLRCA; uint32_t ggttLRCA; void *pGlobalHWStatusPage; uint32_t ggttHWSP; void *pRingBuffer; uint32_t ggttRingBuffer; size_t sizeRingBuffer; uint32_t tailRingBuffer; } engineInfo = {}; AubMemDump::AubStream *stream; }; } // namespace NEO command_stream_receiver_simulated_common_hw_base.inl000066400000000000000000000124511422164147700355250ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/command_stream/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/aub/aub_helper.h" #include "shared/source/aub_mem_dump/page_table_entry_bits.h" #include "shared/source/command_stream/command_stream_receiver_simulated_common_hw.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/gmm_helper/resource_info.h" #include "shared/source/helpers/hardware_context_controller.h" #include "shared/source/memory_manager/address_mapper.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/os_context.h" #include "third_party/aub_stream/headers/aub_manager.h" namespace NEO { template void CommandStreamReceiverSimulatedCommonHw::initAdditionalMMIO() { if (DebugManager.flags.AubDumpAddMmioRegistersList.get() != "unk") { auto mmioList = AubHelper::getAdditionalMmioList(); for (auto &mmioPair : mmioList) { stream->writeMMIO(mmioPair.first, mmioPair.second); } } } template void CommandStreamReceiverSimulatedCommonHw::setupContext(OsContext &osContext) { CommandStreamReceiverHw::setupContext(osContext); auto engineType = osContext.getEngineType(); uint32_t flags = 0; getCsTraits(engineType).setContextSaveRestoreFlags(flags); if (aubManager && !osContext.isLowPriority()) { hardwareContextController = std::make_unique(*aubManager, osContext, flags); } } template bool CommandStreamReceiverSimulatedCommonHw::getParametersForWriteMemory(GraphicsAllocation &graphicsAllocation, uint64_t &gpuAddress, void *&cpuAddress, size_t &size) const { cpuAddress = graphicsAllocation.getUnderlyingBuffer(); gpuAddress = GmmHelper::decanonize(graphicsAllocation.getGpuAddress()); size = graphicsAllocation.getUnderlyingBufferSize(); if (graphicsAllocation.isCompressionEnabled()) { size = graphicsAllocation.getDefaultGmm()->gmmResourceInfo->getSizeAllocation(); } if (size == 0) return false; if (cpuAddress == nullptr && graphicsAllocation.isAllocationLockable()) { cpuAddress = this->getMemoryManager()->lockResource(&graphicsAllocation); } return true; } template bool CommandStreamReceiverSimulatedCommonHw::expectMemoryEqual(void *gfxAddress, const void *srcAddress, size_t length) { return this->expectMemory(gfxAddress, srcAddress, length, AubMemDump::CmdServicesMemTraceMemoryCompare::CompareOperationValues::CompareEqual); } template bool CommandStreamReceiverSimulatedCommonHw::expectMemoryNotEqual(void *gfxAddress, const void *srcAddress, size_t length) { return this->expectMemory(gfxAddress, srcAddress, length, AubMemDump::CmdServicesMemTraceMemoryCompare::CompareOperationValues::CompareNotEqual); } template bool CommandStreamReceiverSimulatedCommonHw::expectMemoryCompressed(void *gfxAddress, const void *srcAddress, size_t length) { return this->expectMemory(gfxAddress, srcAddress, length, AubMemDump::CmdServicesMemTraceMemoryCompare::CompareOperationValues::CompareNotEqual); } template void CommandStreamReceiverSimulatedCommonHw::freeEngineInfo(AddressMapper >tRemap) { alignedFree(engineInfo.pLRCA); gttRemap.unmap(engineInfo.pLRCA); engineInfo.pLRCA = nullptr; alignedFree(engineInfo.pGlobalHWStatusPage); gttRemap.unmap(engineInfo.pGlobalHWStatusPage); engineInfo.pGlobalHWStatusPage = nullptr; alignedFree(engineInfo.pRingBuffer); gttRemap.unmap(engineInfo.pRingBuffer); engineInfo.pRingBuffer = nullptr; } template void CommandStreamReceiverSimulatedCommonHw::makeNonResident(GraphicsAllocation &gfxAllocation) { if (gfxAllocation.isResident(osContext->getContextId())) { dumpAllocation(gfxAllocation); this->getEvictionAllocations().push_back(&gfxAllocation); gfxAllocation.releaseResidencyInOsContext(this->osContext->getContextId()); } } template uint32_t CommandStreamReceiverSimulatedCommonHw::getDeviceIndex() const { return osContext->getDeviceBitfield().any() ? static_cast(Math::log2(static_cast(osContext->getDeviceBitfield().to_ulong()))) : 0u; } template CommandStreamReceiverSimulatedCommonHw::CommandStreamReceiverSimulatedCommonHw(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield) : CommandStreamReceiverHw(executionEnvironment, rootDeviceIndex, deviceBitfield) { this->useNewResourceImplicitFlush = false; this->useGpuIdleImplicitFlush = false; } template CommandStreamReceiverSimulatedCommonHw::~CommandStreamReceiverSimulatedCommonHw() = default; } // namespace NEO command_stream_receiver_simulated_common_hw_bdw_and_later.inl000066400000000000000000000043111422164147700373740ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/command_stream/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver_simulated_common_hw_base.inl" namespace NEO { template void CommandStreamReceiverSimulatedCommonHw::initGlobalMMIO() { for (auto &mmioPair : AUBFamilyMapper::globalMMIO) { stream->writeMMIO(mmioPair.first, mmioPair.second); } } template uint64_t CommandStreamReceiverSimulatedCommonHw::getPPGTTAdditionalBits(GraphicsAllocation *gfxAllocation) { return BIT(PageTableEntry::presentBit) | BIT(PageTableEntry::writableBit) | BIT(PageTableEntry::userSupervisorBit); } template void CommandStreamReceiverSimulatedCommonHw::getGTTData(void *memory, AubGTTData &data) { data.present = true; data.localMemory = false; } template uint32_t CommandStreamReceiverSimulatedCommonHw::getMemoryBankForGtt() const { return MemoryBanks::getBank(getDeviceIndex()); } template const AubMemDump::LrcaHelper &CommandStreamReceiverSimulatedCommonHw::getCsTraits(aub_stream::EngineType engineType) { return *AUBFamilyMapper::csTraits[engineType]; } template void CommandStreamReceiverSimulatedCommonHw::initEngineMMIO() { auto mmioList = AUBFamilyMapper::perEngineMMIO[osContext->getEngineType()]; DEBUG_BREAK_IF(!mmioList); for (auto &mmioPair : *mmioList) { stream->writeMMIO(mmioPair.first, mmioPair.second); } } template void CommandStreamReceiverSimulatedCommonHw::submitLRCA(const MiContextDescriptorReg &contextDescriptor) { auto mmioBase = getCsTraits(osContext->getEngineType()).mmioBase; stream->writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2230), 0); stream->writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2230), 0); stream->writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2230), contextDescriptor.ulData[1]); stream->writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2230), contextDescriptor.ulData[0]); } } // namespace NEO command_stream_receiver_simulated_common_hw_xehp_and_later.inl000066400000000000000000000077551422164147700376030ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/command_stream/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/aub_mem_dump/page_table_entry_bits.h" #include "shared/source/command_stream/command_stream_receiver_simulated_common_hw_base.inl" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/memory_manager/memory_banks.h" #include "shared/source/memory_manager/memory_pool.h" #include "shared/source/memory_manager/physical_address_allocator.h" namespace NEO { template void CommandStreamReceiverSimulatedCommonHw::initGlobalMMIO() { for (auto &mmioPair : AUBFamilyMapper::globalMMIO) { stream->writeMMIO(mmioPair.first, mmioPair.second); } if (this->localMemoryEnabled) { MMIOPair guCntl = {0x00101010, 0x00000080}; //GU_CNTL stream->writeMMIO(guCntl.first, guCntl.second); MMIOPair lmemCfg = {0x0000cf58, 0x80000000}; //LMEM_CFG stream->writeMMIO(lmemCfg.first, lmemCfg.second); MMIOPair tileAddrRange[] = {{0x00004900, 0x0001}, {0x00004904, 0x0001}, {0x00004908, 0x0001}, {0x0000490c, 0x0001}}; //XEHP_TILE_ADDR_RANGE const uint32_t numberOfTiles = 4; const uint32_t localMemorySizeGB = static_cast(AubHelper::getPerTileLocalMemorySize(&this->peekHwInfo()) / MemoryConstants::gigaByte); uint32_t localMemoryBaseAddressInGB = 0x0; for (uint32_t i = 0; i < numberOfTiles; i++) { tileAddrRange[i].second |= localMemoryBaseAddressInGB << 1; tileAddrRange[i].second |= localMemorySizeGB << 8; stream->writeMMIO(tileAddrRange[i].first, tileAddrRange[i].second); localMemoryBaseAddressInGB += localMemorySizeGB; } } } template uint64_t CommandStreamReceiverSimulatedCommonHw::getPPGTTAdditionalBits(GraphicsAllocation *gfxAllocation) { if (DebugManager.flags.AUBDumpForceAllToLocalMemory.get() || (gfxAllocation && gfxAllocation->getMemoryPool() == MemoryPool::LocalMemory)) { return BIT(PageTableEntry::presentBit) | BIT(PageTableEntry::writableBit) | BIT(PageTableEntry::localMemoryBit); } return BIT(PageTableEntry::presentBit) | BIT(PageTableEntry::writableBit); } template void CommandStreamReceiverSimulatedCommonHw::getGTTData(void *memory, AubGTTData &data) { data.present = true; data.localMemory = this->localMemoryEnabled; } template uint32_t CommandStreamReceiverSimulatedCommonHw::getMemoryBankForGtt() const { auto deviceIndex = getDeviceIndex(); if (this->localMemoryEnabled) { return MemoryBanks::getBankForLocalMemory(deviceIndex); } return MemoryBanks::getBank(deviceIndex); } template const AubMemDump::LrcaHelper &CommandStreamReceiverSimulatedCommonHw::getCsTraits(aub_stream::EngineType engineType) { return *AUBFamilyMapper::csTraits[engineType]; } template void CommandStreamReceiverSimulatedCommonHw::initEngineMMIO() { auto mmioList = AUBFamilyMapper::perEngineMMIO[osContext->getEngineType()]; DEBUG_BREAK_IF(!mmioList); for (auto &mmioPair : *mmioList) { stream->writeMMIO(mmioPair.first, mmioPair.second); } } template void CommandStreamReceiverSimulatedCommonHw::submitLRCA(const MiContextDescriptorReg &contextDescriptor) { auto mmioBase = getCsTraits(osContext->getEngineType()).mmioBase; stream->writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2510), contextDescriptor.ulData[0]); stream->writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2514), contextDescriptor.ulData[1]); // Load our new exec list stream->writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2550), 1); } } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/command_stream_receiver_simulated_hw.h000066400000000000000000000151521422164147700327100ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/aub/aub_helper.h" #include "shared/source/aub_mem_dump/aub_mem_dump.h" #include "shared/source/command_stream/command_stream_receiver_simulated_common_hw.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/hardware_context_controller.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/memory_banks.h" #include "shared/source/memory_manager/memory_pool.h" #include "shared/source/memory_manager/physical_address_allocator.h" #include "shared/source/os_interface/os_context.h" #include "third_party/aub_stream/headers/allocation_params.h" #include "third_party/aub_stream/headers/aub_manager.h" #include "third_party/aub_stream/headers/hardware_context.h" namespace NEO { class GraphicsAllocation; template class CommandStreamReceiverSimulatedHw : public CommandStreamReceiverSimulatedCommonHw { protected: using CommandStreamReceiverSimulatedCommonHw::CommandStreamReceiverSimulatedCommonHw; using CommandStreamReceiverSimulatedCommonHw::osContext; using CommandStreamReceiverSimulatedCommonHw::getDeviceIndex; using CommandStreamReceiverSimulatedCommonHw::aubManager; using CommandStreamReceiverSimulatedCommonHw::hardwareContextController; using CommandStreamReceiverSimulatedCommonHw::writeMemory; public: uint32_t getMemoryBank(GraphicsAllocation *allocation) const { if (aubManager) { return static_cast(getMemoryBanksBitfield(allocation).to_ulong()); } uint32_t deviceIndexChosen = allocation->storageInfo.memoryBanks.any() ? getDeviceIndexFromStorageInfo(allocation->storageInfo) : getDeviceIndex(); if (allocation->getMemoryPool() == MemoryPool::LocalMemory) { return MemoryBanks::getBankForLocalMemory(deviceIndexChosen); } return MemoryBanks::getBank(deviceIndexChosen); } static uint32_t getDeviceIndexFromStorageInfo(StorageInfo storageInfo) { uint32_t deviceIndex = 0; while (!storageInfo.memoryBanks.test(0)) { storageInfo.memoryBanks >>= 1; deviceIndex++; } return deviceIndex; } DeviceBitfield getMemoryBanksBitfield(GraphicsAllocation *allocation) const { if (allocation->getMemoryPool() == MemoryPool::LocalMemory) { if (allocation->storageInfo.memoryBanks.any()) { if (allocation->storageInfo.cloningOfPageTables || this->isMultiOsContextCapable()) { return allocation->storageInfo.memoryBanks; } } return this->osContext->getDeviceBitfield(); } return {}; } int getAddressSpace(int hint) { bool traceLocalAllowed = false; switch (hint) { case AubMemDump::DataTypeHintValues::TraceLogicalRingContextRcs: case AubMemDump::DataTypeHintValues::TraceLogicalRingContextCcs: case AubMemDump::DataTypeHintValues::TraceLogicalRingContextBcs: case AubMemDump::DataTypeHintValues::TraceLogicalRingContextVcs: case AubMemDump::DataTypeHintValues::TraceLogicalRingContextVecs: case AubMemDump::DataTypeHintValues::TraceCommandBuffer: traceLocalAllowed = true; break; default: break; } if ((traceLocalAllowed && this->localMemoryEnabled) || DebugManager.flags.AUBDumpForceAllToLocalMemory.get()) { return AubMemDump::AddressSpaceValues::TraceLocal; } return AubMemDump::AddressSpaceValues::TraceNonlocal; } PhysicalAddressAllocator *createPhysicalAddressAllocator(const HardwareInfo *hwInfo) { const auto bankSize = AubHelper::getPerTileLocalMemorySize(hwInfo); const auto devicesCount = HwHelper::getSubDevicesCount(hwInfo); return new PhysicalAddressAllocatorHw(bankSize, devicesCount); } void writeMemoryWithAubManager(GraphicsAllocation &graphicsAllocation) override { uint64_t gpuAddress; void *cpuAddress; size_t size; this->getParametersForWriteMemory(graphicsAllocation, gpuAddress, cpuAddress, size); int hint = graphicsAllocation.getAllocationType() == AllocationType::COMMAND_BUFFER ? AubMemDump::DataTypeHintValues::TraceBatchBuffer : AubMemDump::DataTypeHintValues::TraceNotype; aub_stream::AllocationParams allocationParams(gpuAddress, cpuAddress, size, this->getMemoryBank(&graphicsAllocation), hint, graphicsAllocation.getUsedPageSize()); allocationParams.additionalParams.compressionEnabled = graphicsAllocation.isCompressionEnabled(); if (graphicsAllocation.storageInfo.cloningOfPageTables || !graphicsAllocation.isAllocatedInLocalMemoryPool()) { aubManager->writeMemory2(allocationParams); } else { hardwareContextController->writeMemory(allocationParams); } } void setAubWritable(bool writable, GraphicsAllocation &graphicsAllocation) override { auto bank = getMemoryBank(&graphicsAllocation); if (bank == 0u || graphicsAllocation.storageInfo.cloningOfPageTables) { bank = GraphicsAllocation::defaultBank; } graphicsAllocation.setAubWritable(writable, bank); } bool isAubWritable(GraphicsAllocation &graphicsAllocation) const override { auto bank = getMemoryBank(&graphicsAllocation); if (bank == 0u || graphicsAllocation.storageInfo.cloningOfPageTables) { bank = GraphicsAllocation::defaultBank; } return graphicsAllocation.isAubWritable(bank); } void setTbxWritable(bool writable, GraphicsAllocation &graphicsAllocation) override { auto bank = getMemoryBank(&graphicsAllocation); if (bank == 0u || graphicsAllocation.storageInfo.cloningOfPageTables) { bank = GraphicsAllocation::defaultBank; } graphicsAllocation.setTbxWritable(writable, bank); } bool isTbxWritable(GraphicsAllocation &graphicsAllocation) const override { auto bank = getMemoryBank(&graphicsAllocation); if (bank == 0u || graphicsAllocation.storageInfo.cloningOfPageTables) { bank = GraphicsAllocation::defaultBank; } return graphicsAllocation.isTbxWritable(bank); } }; } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/command_stream_receiver_with_aub_dump.h000066400000000000000000000040251422164147700330470ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/wait_status.h" #include namespace NEO { template class CommandStreamReceiverWithAUBDump : public BaseCSR { protected: using BaseCSR::osContext; public: CommandStreamReceiverWithAUBDump(const std::string &baseName, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield); CommandStreamReceiverWithAUBDump(const CommandStreamReceiverWithAUBDump &) = delete; CommandStreamReceiverWithAUBDump &operator=(const CommandStreamReceiverWithAUBDump &) = delete; SubmissionStatus flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override; void makeNonResident(GraphicsAllocation &gfxAllocation) override; AubSubCaptureStatus checkAndActivateAubSubCapture(const std::string &kernelName) override; void setupContext(OsContext &osContext) override; CommandStreamReceiverType getType() override { if (BaseCSR::getType() == CommandStreamReceiverType::CSR_TBX) { return CommandStreamReceiverType::CSR_TBX_WITH_AUB; } return CommandStreamReceiverType::CSR_HW_WITH_AUB; } WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) override; size_t getPreferredTagPoolSize() const override { return 1; } void addAubComment(const char *comment) override; void pollForCompletion() override; bool expectMemory(const void *gfxAddress, const void *srcAddress, size_t length, uint32_t compareOperation) override; std::unique_ptr aubCSR; }; } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/command_stream_receiver_with_aub_dump.inl000066400000000000000000000117561422164147700334130ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/aub/aub_center.h" #include "shared/source/command_stream/aub_command_stream_receiver.h" #include "shared/source/command_stream/command_stream_receiver_with_aub_dump.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" namespace NEO { extern CommandStreamReceiverCreateFunc commandStreamReceiverFactory[2 * IGFX_MAX_CORE]; template CommandStreamReceiverWithAUBDump::CommandStreamReceiverWithAUBDump(const std::string &baseName, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield) : BaseCSR(executionEnvironment, rootDeviceIndex, deviceBitfield) { bool isAubManager = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->aubCenter && executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->aubCenter->getAubManager(); bool isTbxMode = CommandStreamReceiverType::CSR_TBX == BaseCSR::getType(); bool createAubCsr = (isAubManager && isTbxMode) ? false : true; if (createAubCsr) { aubCSR.reset(AUBCommandStreamReceiver::create(baseName, false, executionEnvironment, rootDeviceIndex, deviceBitfield)); UNRECOVERABLE_IF(!aubCSR->initializeTagAllocation()); uint32_t subDevices = static_cast(this->deviceBitfield.count()); auto tagAddressToInitialize = aubCSR->getTagAddress(); for (uint32_t i = 0; i < subDevices; i++) { *tagAddressToInitialize = std::numeric_limits::max(); tagAddressToInitialize = ptrOffset(tagAddressToInitialize, this->postSyncWriteOffset); } } } template SubmissionStatus CommandStreamReceiverWithAUBDump::flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) { if (aubCSR) { aubCSR->flush(batchBuffer, allocationsForResidency); aubCSR->setLatestSentTaskCount(BaseCSR::peekLatestSentTaskCount()); aubCSR->setLatestFlushedTaskCount(BaseCSR::peekLatestSentTaskCount()); } return BaseCSR::flush(batchBuffer, allocationsForResidency); } template void CommandStreamReceiverWithAUBDump::makeNonResident(GraphicsAllocation &gfxAllocation) { auto residencyTaskCount = gfxAllocation.getResidencyTaskCount(this->osContext->getContextId()); BaseCSR::makeNonResident(gfxAllocation); if (aubCSR) { gfxAllocation.updateResidencyTaskCount(residencyTaskCount, this->osContext->getContextId()); aubCSR->makeNonResident(gfxAllocation); } } template AubSubCaptureStatus CommandStreamReceiverWithAUBDump::checkAndActivateAubSubCapture(const std::string &kernelName) { auto status = BaseCSR::checkAndActivateAubSubCapture(kernelName); if (aubCSR) { status = aubCSR->checkAndActivateAubSubCapture(kernelName); } BaseCSR::programForAubSubCapture(status.wasActiveInPreviousEnqueue, status.isActive); return status; } template void CommandStreamReceiverWithAUBDump::setupContext(OsContext &osContext) { BaseCSR::setupContext(osContext); if (aubCSR) { aubCSR->setupContext(osContext); } } template WaitStatus CommandStreamReceiverWithAUBDump::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) { if (aubCSR) { aubCSR->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, throttle); } return BaseCSR::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, throttle); } template void CommandStreamReceiverWithAUBDump::addAubComment(const char *comment) { if (aubCSR) { aubCSR->addAubComment(comment); } BaseCSR::addAubComment(comment); } template void CommandStreamReceiverWithAUBDump::pollForCompletion() { if (aubCSR) { aubCSR->pollForCompletion(); } BaseCSR::pollForCompletion(); } template bool CommandStreamReceiverWithAUBDump::expectMemory(const void *gfxAddress, const void *srcAddress, size_t length, uint32_t compareOperation) { if (aubCSR) { [[maybe_unused]] auto result = aubCSR->expectMemory(gfxAddress, srcAddress, length, compareOperation); DEBUG_BREAK_IF(!result); } return BaseCSR::expectMemory(gfxAddress, srcAddress, length, compareOperation); } } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/create_command_stream_impl.cpp000066400000000000000000000055541422164147700311630ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/aub_command_stream_receiver.h" #include "shared/source/command_stream/command_stream_receiver_with_aub_dump.h" #include "shared/source/command_stream/tbx_command_stream_receiver.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/api_specific_config.h" #include "shared/source/os_interface/device_factory.h" namespace NEO { extern CommandStreamReceiverCreateFunc commandStreamReceiverFactory[2 * IGFX_MAX_CORE]; CommandStreamReceiver *createCommandStreamImpl(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield) { auto funcCreate = commandStreamReceiverFactory[executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo()->platform.eRenderCoreFamily]; if (funcCreate == nullptr) { return nullptr; } CommandStreamReceiver *commandStreamReceiver = nullptr; int32_t csr = DebugManager.flags.SetCommandStreamReceiver.get(); if (csr < 0) { csr = CommandStreamReceiverType::CSR_HW; } switch (csr) { case CSR_HW: commandStreamReceiver = funcCreate(false, executionEnvironment, rootDeviceIndex, deviceBitfield); break; case CSR_AUB: commandStreamReceiver = AUBCommandStreamReceiver::create(ApiSpecificConfig::getName(), true, executionEnvironment, rootDeviceIndex, deviceBitfield); break; case CSR_TBX: commandStreamReceiver = TbxCommandStreamReceiver::create("", false, executionEnvironment, rootDeviceIndex, deviceBitfield); break; case CSR_HW_WITH_AUB: commandStreamReceiver = funcCreate(true, executionEnvironment, rootDeviceIndex, deviceBitfield); break; case CSR_TBX_WITH_AUB: commandStreamReceiver = TbxCommandStreamReceiver::create(ApiSpecificConfig::getName(), true, executionEnvironment, rootDeviceIndex, deviceBitfield); break; default: break; } return commandStreamReceiver; } bool prepareDeviceEnvironmentsImpl(ExecutionEnvironment &executionEnvironment) { if (DeviceFactory::isHwModeSelected()) { return DeviceFactory::prepareDeviceEnvironments(executionEnvironment); } return DeviceFactory::prepareDeviceEnvironmentsForProductFamilyOverride(executionEnvironment); } bool prepareDeviceEnvironmentImpl(ExecutionEnvironment &executionEnvironment, std::string &osPciPath, const uint32_t rootDeviceIndex) { if (DeviceFactory::isHwModeSelected()) { return DeviceFactory::prepareDeviceEnvironment(executionEnvironment, osPciPath, rootDeviceIndex); } return false; } } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/create_command_stream_impl.h000066400000000000000000000013351422164147700306210ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver.h" namespace NEO { class ExecutionEnvironment; extern CommandStreamReceiver *createCommandStreamImpl(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield); extern bool prepareDeviceEnvironmentsImpl(ExecutionEnvironment &executionEnvironment); extern bool prepareDeviceEnvironmentImpl(ExecutionEnvironment &executionEnvironment, std::string &osPciPath, const uint32_t rootDeviceIndex); } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/csr_definitions.h000066400000000000000000000252751422164147700264570ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/csr_deps.h" #include "shared/source/command_stream/csr_properties_flags.h" #include "shared/source/command_stream/memory_compression_state.h" #include "shared/source/command_stream/queue_throttle.h" #include "shared/source/command_stream/thread_arbitration_policy.h" #include "shared/source/helpers/constants.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/pipeline_select_args.h" #include "shared/source/kernel/grf_config.h" #include "shared/source/kernel/kernel_execution_type.h" #include namespace NEO { struct FlushStampTrackingObj; namespace CSRequirements { //cleanup section usually contains 1-2 pipeControls BB end and place for BB start //that makes 16 * 2 + 4 + 8 = 40 bytes //then command buffer is aligned to cacheline that can take up to 63 bytes //to be sure everything fits minimal size is at 2 x cacheline. constexpr auto minCommandQueueCommandStreamSize = 2 * MemoryConstants::cacheLineSize; constexpr auto csOverfetchSize = MemoryConstants::pageSize; } // namespace CSRequirements namespace TimeoutControls { constexpr int64_t maxTimeout = std::numeric_limits::max(); } namespace QueueSliceCount { constexpr uint64_t defaultSliceCount = 0; } namespace L3CachingSettings { constexpr uint32_t l3CacheOn = 0u; constexpr uint32_t l3CacheOff = 1u; constexpr uint32_t l3AndL1On = 2u; constexpr uint32_t NotApplicable = 3u; } // namespace L3CachingSettings struct DispatchFlags { DispatchFlags() = delete; DispatchFlags(CsrDependencies csrDependenciesP, TimestampPacketContainer *barrierTimestampPacketNodesP, PipelineSelectArgs pipelineSelectArgsP, FlushStampTrackingObj *flushStampReferenceP, QueueThrottle throttleP, PreemptionMode preemptionModeP, uint32_t numGrfRequiredP, uint32_t l3CacheSettingsP, int32_t threadArbitrationPolicyP, uint32_t additionalKernelExecInfoP, KernelExecutionType kernelExecutionTypeP, MemoryCompressionState memoryCompressionStateP, uint64_t sliceCountP, bool blockingP, bool dcFlushP, bool useSLMP, bool guardCommandBufferWithPipeControlP, bool gsba32BitRequiredP, bool requiresCoherencyP, bool lowPriorityP, bool implicitFlushP, bool outOfOrderExecutionAllowedP, bool epilogueRequiredP, bool usePerDSSbackedBufferP, bool useSingleSubdeviceP, bool useGlobalAtomicsP, bool areMultipleSubDevicesInContextP, bool memoryMigrationRequiredP, bool textureCacheFlush) : csrDependencies(csrDependenciesP), barrierTimestampPacketNodes(barrierTimestampPacketNodesP), pipelineSelectArgs(pipelineSelectArgsP), flushStampReference(flushStampReferenceP), throttle(throttleP), preemptionMode(preemptionModeP), numGrfRequired(numGrfRequiredP), l3CacheSettings(l3CacheSettingsP), threadArbitrationPolicy(threadArbitrationPolicyP), additionalKernelExecInfo(additionalKernelExecInfoP), kernelExecutionType(kernelExecutionTypeP), memoryCompressionState(memoryCompressionStateP), sliceCount(sliceCountP), blocking(blockingP), dcFlush(dcFlushP), useSLM(useSLMP), guardCommandBufferWithPipeControl(guardCommandBufferWithPipeControlP), gsba32BitRequired(gsba32BitRequiredP), requiresCoherency(requiresCoherencyP), lowPriority(lowPriorityP), implicitFlush(implicitFlushP), outOfOrderExecutionAllowed(outOfOrderExecutionAllowedP), epilogueRequired(epilogueRequiredP), usePerDssBackedBuffer(usePerDSSbackedBufferP), useSingleSubdevice(useSingleSubdeviceP), useGlobalAtomics(useGlobalAtomicsP), areMultipleSubDevicesInContext(areMultipleSubDevicesInContextP), memoryMigrationRequired(memoryMigrationRequiredP), textureCacheFlush(textureCacheFlush){}; CsrDependencies csrDependencies; TimestampPacketContainer *barrierTimestampPacketNodes = nullptr; PipelineSelectArgs pipelineSelectArgs; FlushStampTrackingObj *flushStampReference = nullptr; QueueThrottle throttle = QueueThrottle::MEDIUM; PreemptionMode preemptionMode = PreemptionMode::Disabled; uint32_t numGrfRequired = GrfConfig::DefaultGrfNumber; uint32_t l3CacheSettings = L3CachingSettings::l3CacheOn; int32_t threadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent; uint32_t additionalKernelExecInfo = AdditionalKernelExecInfo::NotApplicable; KernelExecutionType kernelExecutionType = KernelExecutionType::NotApplicable; MemoryCompressionState memoryCompressionState = MemoryCompressionState::NotApplicable; uint64_t sliceCount = QueueSliceCount::defaultSliceCount; uint64_t engineHints = 0; bool blocking = false; bool dcFlush = false; bool useSLM = false; bool guardCommandBufferWithPipeControl = false; bool gsba32BitRequired = false; bool requiresCoherency = false; bool lowPriority = false; bool implicitFlush = false; bool outOfOrderExecutionAllowed = false; bool epilogueRequired = false; bool usePerDssBackedBuffer = false; bool useSingleSubdevice = false; bool useGlobalAtomics = false; bool areMultipleSubDevicesInContext = false; bool memoryMigrationRequired = false; bool textureCacheFlush = false; bool disableEUFusion = false; }; struct CsrSizeRequestFlags { bool l3ConfigChanged = false; bool preemptionRequestChanged = false; bool mediaSamplerConfigChanged = false; bool hasSharedHandles = false; bool specialPipelineSelectModeChanged = false; bool activePartitionsChanged = false; }; } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/csr_deps.cpp000066400000000000000000000007261422164147700254240ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/csr_deps.h" #include "shared/source/helpers/timestamp_packet.h" namespace NEO { void CsrDependencies::makeResident(CommandStreamReceiver &commandStreamReceiver) const { for (auto ×tampPacketContainer : timestampPacketContainer) { timestampPacketContainer->makeResident(commandStreamReceiver); } } } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/csr_deps.h000066400000000000000000000011141422164147700250610ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/utilities/stackvec.h" namespace NEO { class TimestampPacketContainer; class CommandStreamReceiver; class CsrDependencies { public: enum class DependenciesType { OnCsr, OutOfCsr, All }; StackVec, 32> taskCountContainer; StackVec timestampPacketContainer; void makeResident(CommandStreamReceiver &commandStreamReceiver) const; }; } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/csr_properties_flags.h000066400000000000000000000005411422164147700275010ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include namespace NEO { namespace AdditionalKernelExecInfo { constexpr uint32_t DisableOverdispatch = 0u; constexpr uint32_t NotSet = 1u; constexpr uint32_t NotApplicable = 2u; } // namespace AdditionalKernelExecInfo } // namespace NEOcompute-runtime-22.14.22890/shared/source/command_stream/definitions/000077500000000000000000000000001422164147700254245ustar00rootroot00000000000000command_stream_receiver_hw_ext.inl000066400000000000000000000014351422164147700343070ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/command_stream/definitions/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/command_stream/csr_definitions.h" #include "shared/source/command_stream/linear_stream.h" namespace NEO { template void CommandStreamReceiverHw::programEngineModeCommands(LinearStream &csr, const DispatchFlags &dispatchFlags) { } template void CommandStreamReceiverHw::programEngineModeEpliogue(LinearStream &csr, const DispatchFlags &dispatchFlags) { } template size_t CommandStreamReceiverHw::getCmdSizeForEngineMode(const DispatchFlags &dispatchFlags) const { return 0u; } } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/definitions/stream_properties.inl000066400000000000000000000026031422164147700317000ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/stream_property.h" namespace NEO { struct StateComputeModeProperties { StreamProperty isCoherencyRequired{}; StreamProperty largeGrfMode{}; StreamProperty zPassAsyncComputeThreadLimit{}; StreamProperty pixelAsyncComputeThreadLimit{}; StreamProperty threadArbitrationPolicy{}; void setProperties(bool requiresCoherency, uint32_t numGrfRequired, int32_t threadArbitrationPolicy, const HardwareInfo &hwInfo); void setProperties(const StateComputeModeProperties &properties); bool isDirty() const; protected: void clearIsDirty(); bool isDirtyExtra() const; void setPropertiesExtra(); void setPropertiesExtra(const StateComputeModeProperties &properties); void clearIsDirtyExtra(); }; struct FrontEndProperties { StreamProperty computeDispatchAllWalkerEnable{}; StreamProperty disableEUFusion{}; StreamProperty disableOverdispatch{}; StreamProperty singleSliceDispatchCcsMode{}; void setProperties(bool isCooperativeKernel, bool disableEUFusion, bool disableOverdispatch, int32_t engineInstancedDevice, const HardwareInfo &hwInfo); void setProperties(const FrontEndProperties &properties); bool isDirty() const; protected: void clearIsDirty(); }; } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/device_command_stream.h000066400000000000000000000030331422164147700275710ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver_hw.h" namespace NEO { template CommandStreamReceiver *createDeviceCommandStreamReceiver(bool withAubDump, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield); template class DeviceCommandStreamReceiver : public CommandStreamReceiverHw { typedef CommandStreamReceiverHw BaseClass; protected: DeviceCommandStreamReceiver(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield) : BaseClass(executionEnvironment, rootDeviceIndex, deviceBitfield) { } public: static CommandStreamReceiver *create(bool withAubDump, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield) { return createDeviceCommandStreamReceiver(withAubDump, executionEnvironment, rootDeviceIndex, deviceBitfield); } }; } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/experimental_command_buffer.cpp000066400000000000000000000073371422164147700313530ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/experimental_command_buffer.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/helpers/constants.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/os_context.h" #include #include namespace NEO { ExperimentalCommandBuffer::ExperimentalCommandBuffer(CommandStreamReceiver *csr, double profilingTimerResolution) : commandStreamReceiver(csr), currentStream(nullptr), timestampsOffset(0), experimentalAllocationOffset(0), defaultPrint(true), timerResolution(profilingTimerResolution) { auto rootDeviceIndex = csr->getRootDeviceIndex(); timestamps = csr->getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, MemoryConstants::pageSize, AllocationType::INTERNAL_HOST_MEMORY, csr->getOsContext().getDeviceBitfield()}); memset(timestamps->getUnderlyingBuffer(), 0, timestamps->getUnderlyingBufferSize()); experimentalAllocation = csr->getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, MemoryConstants::pageSize, AllocationType::INTERNAL_HOST_MEMORY, csr->getOsContext().getDeviceBitfield()}); memset(experimentalAllocation->getUnderlyingBuffer(), 0, experimentalAllocation->getUnderlyingBufferSize()); } ExperimentalCommandBuffer::~ExperimentalCommandBuffer() { auto timestamp = static_cast(timestamps->getUnderlyingBuffer()); for (uint32_t i = 0; i < timestampsOffset / (2 * sizeof(uint64_t)); i++) { auto stop = static_cast(*(timestamp + 1) * timerResolution); auto start = static_cast(*timestamp * timerResolution); auto delta = stop - start; PRINT_DEBUG_STRING(defaultPrint, stdout, "#%u: delta %llu start %llu stop %llu\n", i, delta, start, stop); timestamp += 2; } MemoryManager *memoryManager = commandStreamReceiver->getMemoryManager(); memoryManager->freeGraphicsMemory(timestamps); memoryManager->freeGraphicsMemory(experimentalAllocation); if (currentStream.get()) { memoryManager->freeGraphicsMemory(currentStream->getGraphicsAllocation()); currentStream->replaceGraphicsAllocation(nullptr); } } void ExperimentalCommandBuffer::getCS(size_t minRequiredSize) { if (!currentStream) { currentStream.reset(new LinearStream(nullptr)); } minRequiredSize += CSRequirements::minCommandQueueCommandStreamSize; constexpr static auto additionalAllocationSize = CSRequirements::minCommandQueueCommandStreamSize + CSRequirements::csOverfetchSize; commandStreamReceiver->ensureCommandBufferAllocation(*currentStream, minRequiredSize, additionalAllocationSize); } void ExperimentalCommandBuffer::makeResidentAllocations() { commandStreamReceiver->makeResident(*currentStream->getGraphicsAllocation()); commandStreamReceiver->makeResident(*timestamps); commandStreamReceiver->makeResident(*experimentalAllocation); } } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/experimental_command_buffer.h000066400000000000000000000030021422164147700310010ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include #include namespace NEO { class CommandStreamReceiver; class GraphicsAllocation; class LinearStream; class MemoryManager; class ExperimentalCommandBuffer { public: virtual ~ExperimentalCommandBuffer(); ExperimentalCommandBuffer(CommandStreamReceiver *csr, double profilingTimerResolution); template void injectBufferStart(LinearStream &parentStream, size_t cmdBufferOffset); template size_t getRequiredInjectionSize() noexcept; template size_t programExperimentalCommandBuffer(); void makeResidentAllocations(); protected: template size_t getTotalExperimentalSize() noexcept; void getCS(size_t minRequiredSize); template void addTimeStampPipeControl(); template size_t getTimeStampPipeControlSize() noexcept; template void addExperimentalCommands(); template size_t getExperimentalCommandsSize() noexcept; CommandStreamReceiver *commandStreamReceiver; std::unique_ptr currentStream; GraphicsAllocation *timestamps; uint32_t timestampsOffset; GraphicsAllocation *experimentalAllocation; uint32_t experimentalAllocationOffset; bool defaultPrint; double timerResolution; }; } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/experimental_command_buffer.inl000066400000000000000000000106361422164147700313470ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/command_stream/experimental_command_buffer.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/pipe_control_args.h" #include "shared/source/memory_manager/graphics_allocation.h" namespace NEO { template void ExperimentalCommandBuffer::injectBufferStart(LinearStream &parentStream, size_t cmdBufferOffset) { using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START; auto pCmd = parentStream.getSpaceForCmd(); auto commandStreamReceiverHw = static_cast *>(commandStreamReceiver); commandStreamReceiverHw->addBatchBufferStart(pCmd, currentStream->getGraphicsAllocation()->getGpuAddress() + cmdBufferOffset, true); } template size_t ExperimentalCommandBuffer::getRequiredInjectionSize() noexcept { using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START; return sizeof(MI_BATCH_BUFFER_START); } template size_t ExperimentalCommandBuffer::programExperimentalCommandBuffer() { using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END; getCS(getTotalExperimentalSize()); size_t returnOffset = currentStream->getUsed(); //begin timestamp addTimeStampPipeControl(); addExperimentalCommands(); //end timestamp addTimeStampPipeControl(); //end auto pCmd = currentStream->getSpaceForCmd(); *pCmd = GfxFamily::cmdInitBatchBufferEnd; return returnOffset; } template size_t ExperimentalCommandBuffer::getTotalExperimentalSize() noexcept { using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END; size_t size = sizeof(MI_BATCH_BUFFER_END) + getTimeStampPipeControlSize() + getExperimentalCommandsSize(); return size; } template size_t ExperimentalCommandBuffer::getTimeStampPipeControlSize() noexcept { using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; // Two P_C for timestamps return 2 * MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation( *commandStreamReceiver->peekExecutionEnvironment().rootDeviceEnvironments[commandStreamReceiver->getRootDeviceIndex()]->getHardwareInfo()); } template void ExperimentalCommandBuffer::addTimeStampPipeControl() { using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; uint64_t timeStampAddress = timestamps->getGpuAddress() + timestampsOffset; PipeControlArgs args; MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( *currentStream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, timeStampAddress, 0llu, *commandStreamReceiver->peekExecutionEnvironment().rootDeviceEnvironments[commandStreamReceiver->getRootDeviceIndex()]->getHardwareInfo(), args); //moving to next chunk timestampsOffset += sizeof(uint64_t); DEBUG_BREAK_IF(timestamps->getUnderlyingBufferSize() < timestampsOffset); } template void ExperimentalCommandBuffer::addExperimentalCommands() { using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT; uint32_t *semaphoreData = reinterpret_cast(ptrOffset(experimentalAllocation->getUnderlyingBuffer(), experimentalAllocationOffset)); *semaphoreData = 1; uint64_t gpuAddr = experimentalAllocation->getGpuAddress() + experimentalAllocationOffset; auto semaphoreCmdSpace = currentStream->getSpaceForCmd(); auto semaphoreCmd = GfxFamily::cmdInitMiSemaphoreWait; semaphoreCmd.setCompareOperation(MI_SEMAPHORE_WAIT::COMPARE_OPERATION_SAD_EQUAL_SDD); semaphoreCmd.setSemaphoreDataDword(*semaphoreData); semaphoreCmd.setSemaphoreGraphicsAddress(gpuAddr); *semaphoreCmdSpace = semaphoreCmd; } template size_t ExperimentalCommandBuffer::getExperimentalCommandsSize() noexcept { using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT; return sizeof(MI_SEMAPHORE_WAIT); } } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/linear_stream.cpp000066400000000000000000000025401422164147700264430ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/linear_stream.h" #include "shared/source/memory_manager/graphics_allocation.h" namespace NEO { LinearStream::LinearStream(GraphicsAllocation *gfxAllocation, void *buffer, size_t bufferSize) : sizeUsed(0), maxAvailableSpace(bufferSize), buffer(buffer), graphicsAllocation(gfxAllocation) { } LinearStream::LinearStream(void *buffer, size_t bufferSize) : LinearStream(nullptr, buffer, bufferSize) { } LinearStream::LinearStream(GraphicsAllocation *gfxAllocation) : sizeUsed(0), graphicsAllocation(gfxAllocation) { if (gfxAllocation) { maxAvailableSpace = gfxAllocation->getUnderlyingBufferSize(); buffer = gfxAllocation->getUnderlyingBuffer(); } else { maxAvailableSpace = 0; buffer = nullptr; } } LinearStream::LinearStream() : LinearStream(nullptr) { } LinearStream::LinearStream(void *buffer, size_t bufferSize, CommandContainer *cmdContainer, size_t batchBufferEndSize) : LinearStream(buffer, bufferSize) { this->cmdContainer = cmdContainer; this->batchBufferEndSize = batchBufferEndSize; } uint64_t LinearStream::getGpuBase() const { if (graphicsAllocation) { return graphicsAllocation->getGpuAddress(); } return gpuBase; } } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/linear_stream.h000066400000000000000000000061431422164147700261130ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_container/cmdcontainer.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/helpers/string.h" #include #include #include namespace NEO { class GraphicsAllocation; class LinearStream { public: virtual ~LinearStream() = default; LinearStream(); LinearStream(void *buffer, size_t bufferSize); LinearStream(GraphicsAllocation *buffer); LinearStream(GraphicsAllocation *gfxAllocation, void *buffer, size_t bufferSize); LinearStream(void *buffer, size_t bufferSize, CommandContainer *cmdContainer, size_t batchBufferEndSize); void *getCpuBase() const; void *getSpace(size_t size); size_t getMaxAvailableSpace() const; size_t getAvailableSpace() const; size_t getUsed() const; uint64_t getGpuBase() const; void setGpuBase(uint64_t); void overrideMaxSize(size_t newMaxSize); void replaceBuffer(void *buffer, size_t bufferSize); GraphicsAllocation *getGraphicsAllocation() const; void replaceGraphicsAllocation(GraphicsAllocation *gfxAllocation); template Cmd *getSpaceForCmd() { auto ptr = getSpace(sizeof(Cmd)); return reinterpret_cast(ptr); } protected: std::atomic sizeUsed; size_t maxAvailableSpace; void *buffer; GraphicsAllocation *graphicsAllocation; CommandContainer *cmdContainer = nullptr; size_t batchBufferEndSize = 0; uint64_t gpuBase = 0; }; inline void *LinearStream::getCpuBase() const { return buffer; } inline void LinearStream::setGpuBase(uint64_t gpuAddress) { gpuBase = gpuAddress; } inline void *LinearStream::getSpace(size_t size) { if (cmdContainer != nullptr && getAvailableSpace() < batchBufferEndSize + size) { UNRECOVERABLE_IF(sizeUsed + batchBufferEndSize > maxAvailableSpace); cmdContainer->closeAndAllocateNextCommandBuffer(); } UNRECOVERABLE_IF(sizeUsed + size > maxAvailableSpace); UNRECOVERABLE_IF(reinterpret_cast(buffer) <= 0); auto memory = ptrOffset(buffer, sizeUsed); sizeUsed += size; return memory; } inline size_t LinearStream::getMaxAvailableSpace() const { return maxAvailableSpace; } inline size_t LinearStream::getAvailableSpace() const { DEBUG_BREAK_IF(sizeUsed > maxAvailableSpace); return maxAvailableSpace - sizeUsed; } inline size_t LinearStream::getUsed() const { return sizeUsed; } inline void LinearStream::overrideMaxSize(size_t newMaxSize) { maxAvailableSpace = newMaxSize; } inline void LinearStream::replaceBuffer(void *buffer, size_t bufferSize) { this->buffer = buffer; maxAvailableSpace = bufferSize; sizeUsed = 0; } inline GraphicsAllocation *LinearStream::getGraphicsAllocation() const { return graphicsAllocation; } inline void LinearStream::replaceGraphicsAllocation(GraphicsAllocation *gfxAllocation) { graphicsAllocation = gfxAllocation; } } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/memory_compression_state.h000066400000000000000000000003601422164147700304120ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once namespace NEO { enum class MemoryCompressionState { Disabled = 0x0u, Enabled = 0x1u, NotApplicable = 0x2u }; } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/per_dss_backed_buffer.cpp000066400000000000000000000004341422164147700300770ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" namespace NEO { bool CommandStreamReceiver::createPerDssBackedBuffer(Device &device) { return true; } } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/preemption.cpp000066400000000000000000000073471422164147700260120ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/preemption.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/device/device.h" #include "shared/source/helpers/string.h" #include "shared/source/kernel/kernel_descriptor.h" namespace NEO { bool PreemptionHelper::allowThreadGroupPreemption(const PreemptionFlags &flags) { if (flags.flags.disablePerCtxtPreemptionGranularityControl) { return false; } if (flags.flags.usesFencesForReadWriteImages && flags.flags.disableLSQCROPERFforOCL) { return false; } if (flags.flags.vmeKernel) { return false; } return true; } bool PreemptionHelper::allowMidThreadPreemption(const PreemptionFlags &flags) { return (flags.flags.disabledMidThreadPreemptionKernel == 0) && !(flags.flags.vmeKernel && !flags.flags.deviceSupportsVmePreemption); } PreemptionMode PreemptionHelper::taskPreemptionMode(PreemptionMode devicePreemptionMode, const PreemptionFlags &flags) { if (DebugManager.flags.ForceKernelPreemptionMode.get() != -1) { return static_cast(DebugManager.flags.ForceKernelPreemptionMode.get()); } if (devicePreemptionMode == PreemptionMode::Disabled) { return PreemptionMode::Disabled; } if (devicePreemptionMode >= PreemptionMode::MidThread && allowMidThreadPreemption(flags)) { return PreemptionMode::MidThread; } if (devicePreemptionMode >= PreemptionMode::ThreadGroup && allowThreadGroupPreemption(flags)) { return PreemptionMode::ThreadGroup; } return PreemptionMode::MidBatch; }; void PreemptionHelper::adjustDefaultPreemptionMode(RuntimeCapabilityTable &deviceCapabilities, bool allowMidThread, bool allowThreadGroup, bool allowMidBatch) { if (deviceCapabilities.defaultPreemptionMode >= PreemptionMode::MidThread && allowMidThread) { deviceCapabilities.defaultPreemptionMode = PreemptionMode::MidThread; } else if (deviceCapabilities.defaultPreemptionMode >= PreemptionMode::ThreadGroup && allowThreadGroup) { deviceCapabilities.defaultPreemptionMode = PreemptionMode::ThreadGroup; } else if (deviceCapabilities.defaultPreemptionMode >= PreemptionMode::MidBatch && allowMidBatch) { deviceCapabilities.defaultPreemptionMode = PreemptionMode::MidBatch; } else { deviceCapabilities.defaultPreemptionMode = PreemptionMode::Disabled; } } PreemptionMode PreemptionHelper::getDefaultPreemptionMode(const HardwareInfo &hwInfo) { return DebugManager.flags.ForcePreemptionMode.get() == -1 ? hwInfo.capabilityTable.defaultPreemptionMode : static_cast(DebugManager.flags.ForcePreemptionMode.get()); } PreemptionFlags PreemptionHelper::createPreemptionLevelFlags(Device &device, const KernelDescriptor *kernelDescriptor) { PreemptionFlags flags = {}; if (kernelDescriptor) { flags.flags.disabledMidThreadPreemptionKernel = kernelDescriptor->kernelAttributes.flags.requiresDisabledMidThreadPreemption; flags.flags.vmeKernel = kernelDescriptor->kernelAttributes.flags.usesVme; flags.flags.usesFencesForReadWriteImages = kernelDescriptor->kernelAttributes.flags.usesFencesForReadWriteImages; } flags.flags.deviceSupportsVmePreemption = device.getDeviceInfo().vmeAvcSupportsPreemption; flags.flags.disablePerCtxtPreemptionGranularityControl = device.getHardwareInfo().workaroundTable.flags.waDisablePerCtxtPreemptionGranularityControl; flags.flags.disableLSQCROPERFforOCL = device.getHardwareInfo().workaroundTable.flags.waDisableLSQCROPERFforOCL; return flags; } } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/preemption.h000066400000000000000000000067271422164147700254600ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/preemption_mode.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/os_interface/hw_info_config.h" #include "sku_info.h" namespace NEO { class Device; class GraphicsAllocation; struct KernelDescriptor; struct PreemptionFlags { PreemptionFlags() { data = 0; } union { struct { uint32_t disabledMidThreadPreemptionKernel : 1; uint32_t vmeKernel : 1; uint32_t deviceSupportsVmePreemption : 1; uint32_t disablePerCtxtPreemptionGranularityControl : 1; uint32_t usesFencesForReadWriteImages : 1; uint32_t disableLSQCROPERFforOCL : 1; uint32_t reserved : 26; } flags; uint32_t data; }; }; class PreemptionHelper { public: template using INTERFACE_DESCRIPTOR_DATA = typename CmdFamily::INTERFACE_DESCRIPTOR_DATA; static PreemptionMode taskPreemptionMode(PreemptionMode devicePreemptionMode, const PreemptionFlags &flags); static bool allowThreadGroupPreemption(const PreemptionFlags &flags); static bool allowMidThreadPreemption(const PreemptionFlags &flags); static void adjustDefaultPreemptionMode(RuntimeCapabilityTable &deviceCapabilities, bool allowMidThread, bool allowThreadGroup, bool allowMidBatch); static PreemptionFlags createPreemptionLevelFlags(Device &device, const KernelDescriptor *kernelDescriptor); template static size_t getRequiredPreambleSize(const Device &device); template static size_t getRequiredStateSipCmdSize(Device &device, bool isRcs); template static void programCsrBaseAddress(LinearStream &preambleCmdStream, Device &device, const GraphicsAllocation *preemptionCsr); template static void programStateSip(LinearStream &preambleCmdStream, Device &device); template static void programStateSipEndWa(LinearStream &cmdStream, Device &device); template static size_t getRequiredCmdStreamSize(PreemptionMode newPreemptionMode, PreemptionMode oldPreemptionMode); template static void programCmdStream(LinearStream &cmdStream, PreemptionMode newPreemptionMode, PreemptionMode oldPreemptionMode, GraphicsAllocation *preemptionCsr); template static size_t getPreemptionWaCsSize(const Device &device); template static void applyPreemptionWaCmdsBegin(LinearStream *pCommandStream, const Device &device); template static void applyPreemptionWaCmdsEnd(LinearStream *pCommandStream, const Device &device); static PreemptionMode getDefaultPreemptionMode(const HardwareInfo &hwInfo); template static void programInterfaceDescriptorDataPreemption(INTERFACE_DESCRIPTOR_DATA *idd, PreemptionMode preemptionMode); }; template struct PreemptionConfig { static const uint32_t mmioAddress; static const uint32_t mask; static const uint32_t threadGroupVal; static const uint32_t cmdLevelVal; static const uint32_t midThreadVal; }; } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/preemption.inl000066400000000000000000000124051422164147700260010ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/sip.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/device/device.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/pipe_control_args.h" #include "shared/source/helpers/preamble.h" #include "shared/source/memory_manager/graphics_allocation.h" namespace NEO { template void PreemptionHelper::programCsrBaseAddress(LinearStream &preambleCmdStream, Device &device, const GraphicsAllocation *preemptionCsr) { using GPGPU_CSR_BASE_ADDRESS = typename GfxFamily::GPGPU_CSR_BASE_ADDRESS; bool isMidThreadPreemption = device.getPreemptionMode() == PreemptionMode::MidThread; if (isMidThreadPreemption) { UNRECOVERABLE_IF(nullptr == preemptionCsr); auto csr = reinterpret_cast(preambleCmdStream.getSpace(sizeof(GPGPU_CSR_BASE_ADDRESS))); GPGPU_CSR_BASE_ADDRESS cmd = GfxFamily::cmdInitGpgpuCsrBaseAddress; cmd.setGpgpuCsrBaseAddress(preemptionCsr->getGpuAddressToPatch()); *csr = cmd; } } template void PreemptionHelper::programStateSip(LinearStream &preambleCmdStream, Device &device) { using STATE_SIP = typename GfxFamily::STATE_SIP; bool debuggingEnabled = device.getDebugger() != nullptr || device.isDebuggerActive(); bool isMidThreadPreemption = device.getPreemptionMode() == PreemptionMode::MidThread; if (isMidThreadPreemption || debuggingEnabled) { auto sipAllocation = SipKernel::getSipKernel(device).getSipAllocation(); auto sip = reinterpret_cast(preambleCmdStream.getSpace(sizeof(STATE_SIP))); STATE_SIP cmd = GfxFamily::cmdInitStateSip; cmd.setSystemInstructionPointer(sipAllocation->getGpuAddressToPatch()); *sip = cmd; } } template void PreemptionHelper::programStateSipEndWa(LinearStream &cmdStream, Device &device) {} template void PreemptionHelper::programCmdStream(LinearStream &cmdStream, PreemptionMode newPreemptionMode, PreemptionMode oldPreemptionMode, GraphicsAllocation *preemptionCsr) { if (oldPreemptionMode == newPreemptionMode) { return; } uint32_t regVal = 0; if (newPreemptionMode == PreemptionMode::MidThread) { regVal = PreemptionConfig::midThreadVal | PreemptionConfig::mask; } else if (newPreemptionMode == PreemptionMode::ThreadGroup) { regVal = PreemptionConfig::threadGroupVal | PreemptionConfig::mask; } else { regVal = PreemptionConfig::cmdLevelVal | PreemptionConfig::mask; } LriHelper::program(&cmdStream, PreemptionConfig::mmioAddress, regVal, true); } template size_t PreemptionHelper::getRequiredCmdStreamSize(PreemptionMode newPreemptionMode, PreemptionMode oldPreemptionMode) { if (newPreemptionMode == oldPreemptionMode) { return 0; } return sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM); } template size_t PreemptionHelper::getRequiredPreambleSize(const Device &device) { if (device.getPreemptionMode() == PreemptionMode::MidThread) { return sizeof(typename GfxFamily::GPGPU_CSR_BASE_ADDRESS); } return 0; } template size_t PreemptionHelper::getRequiredStateSipCmdSize(Device &device, bool isRcs) { size_t size = 0; bool isMidThreadPreemption = device.getPreemptionMode() == PreemptionMode::MidThread; bool debuggingEnabled = device.getDebugger() != nullptr || device.isDebuggerActive(); if (isMidThreadPreemption || debuggingEnabled) { size += sizeof(typename GfxFamily::STATE_SIP); } return size; } template size_t PreemptionHelper::getPreemptionWaCsSize(const Device &device) { return 0u; } template void PreemptionHelper::applyPreemptionWaCmdsBegin(LinearStream *pCommandStream, const Device &device) { } template void PreemptionHelper::applyPreemptionWaCmdsEnd(LinearStream *pCommandStream, const Device &device) { } template void PreemptionHelper::programInterfaceDescriptorDataPreemption(INTERFACE_DESCRIPTOR_DATA *idd, PreemptionMode preemptionMode) { using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA; if (preemptionMode == PreemptionMode::MidThread) { idd->setThreadPreemptionDisable(INTERFACE_DESCRIPTOR_DATA::THREAD_PREEMPTION_DISABLE_DISABLE); } else { idd->setThreadPreemptionDisable(INTERFACE_DESCRIPTOR_DATA::THREAD_PREEMPTION_DISABLE_ENABLE); } } template constexpr uint32_t PreemptionConfig::mmioAddress = 0x2580; template constexpr uint32_t PreemptionConfig::mask = ((1 << 1) | (1 << 2)) << 16; template constexpr uint32_t PreemptionConfig::threadGroupVal = (1 << 1); template constexpr uint32_t PreemptionConfig::cmdLevelVal = (1 << 2); template constexpr uint32_t PreemptionConfig::midThreadVal = 0; } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/preemption_mode.h000066400000000000000000000005201422164147700264450ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include namespace NEO { enum PreemptionMode : uint32_t { // Keep in sync with ForcePreemptionMode debug variable Initial = 0, Disabled = 1, MidBatch, ThreadGroup, MidThread, }; } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/preemption_xehp_and_later.inl000066400000000000000000000075411422164147700310430ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ template <> void PreemptionHelper::programCsrBaseAddress(LinearStream &preambleCmdStream, Device &device, const GraphicsAllocation *preemptionCsr) { } template <> void PreemptionHelper::programStateSip(LinearStream &preambleCmdStream, Device &device) { using STATE_SIP = typename GfxFamily::STATE_SIP; using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM; auto &hwInfo = device.getHardwareInfo(); bool debuggingEnabled = device.getDebugger() != nullptr; if (debuggingEnabled) { HwHelper &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); auto sipAllocation = SipKernel::getSipKernel(device).getSipAllocation(); if (hwHelper.isSipWANeeded(hwInfo)) { auto mmio = reinterpret_cast(preambleCmdStream.getSpace(sizeof(MI_LOAD_REGISTER_IMM))); MI_LOAD_REGISTER_IMM cmd = GfxFamily::cmdInitLoadRegisterImm; UNRECOVERABLE_IF((sipAllocation->getGpuAddressToPatch() & uint64_t(0xffffffff00000000)) != 0); uint32_t globalSip = static_cast(sipAllocation->getGpuAddressToPatch() & uint32_t(-1)); globalSip &= 0xfffffff8; globalSip |= 1; cmd.setDataDword(globalSip); cmd.setRegisterOffset(GlobalSipRegister::registerOffset); *mmio = cmd; } else { auto sip = reinterpret_cast(preambleCmdStream.getSpace(sizeof(STATE_SIP))); STATE_SIP cmd = GfxFamily::cmdInitStateSip; cmd.setSystemInstructionPointer(sipAllocation->getGpuAddressToPatch()); *sip = cmd; } } } template <> void PreemptionHelper::programStateSipEndWa(LinearStream &cmdStream, Device &device) { using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM; bool debuggingEnabled = device.getDebugger() != nullptr; if (debuggingEnabled) { HwHelper &hwHelper = HwHelper::get(device.getHardwareInfo().platform.eRenderCoreFamily); if (hwHelper.isSipWANeeded(device.getHardwareInfo())) { NEO::PipeControlArgs args; NEO::MemorySynchronizationCommands::addPipeControl(cmdStream, args); auto mmio = reinterpret_cast(cmdStream.getSpace(sizeof(MI_LOAD_REGISTER_IMM))); MI_LOAD_REGISTER_IMM cmd = GfxFamily::cmdInitLoadRegisterImm; uint32_t globalSip = 0; cmd.setDataDword(globalSip); cmd.setRegisterOffset(GlobalSipRegister::registerOffset); *mmio = cmd; } } } template <> size_t PreemptionHelper::getRequiredPreambleSize(const Device &device) { return 0u; } template <> size_t PreemptionHelper::getRequiredStateSipCmdSize(Device &device, bool isRcs) { size_t size = 0; bool debuggingEnabled = device.getDebugger() != nullptr || device.isDebuggerActive(); auto &hwInfo = device.getHardwareInfo(); if (debuggingEnabled) { HwHelper &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); if (hwHelper.isSipWANeeded(hwInfo)) { size += sizeof(typename GfxFamily::PIPE_CONTROL); size += 2 * sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM); } else { auto hwInfoConfig = HwInfoConfig::get(hwInfo.platform.eProductFamily); const auto &[isBasicWARequired, isExtendedWARequired] = hwInfoConfig->isPipeControlPriorToNonPipelinedStateCommandsWARequired(hwInfo, isRcs); const auto isWARequired = isBasicWARequired || isExtendedWARequired; if (isWARequired) { size += sizeof(typename GfxFamily::PIPE_CONTROL); } size += sizeof(typename GfxFamily::STATE_SIP); } } return size; } compute-runtime-22.14.22890/shared/source/command_stream/queue_throttle.h000066400000000000000000000003331422164147700263320ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include namespace NEO { enum QueueThrottle : uint32_t { LOW, MEDIUM, HIGH }; } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/scratch_space_controller.cpp000066400000000000000000000030401422164147700306570ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/scratch_space_controller.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/memory_manager.h" namespace NEO { ScratchSpaceController::ScratchSpaceController(uint32_t rootDeviceIndex, ExecutionEnvironment &environment, InternalAllocationStorage &allocationStorage) : rootDeviceIndex(rootDeviceIndex), executionEnvironment(environment), csrAllocationStorage(allocationStorage) { auto hwInfo = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo->platform.eRenderCoreFamily); computeUnitsUsedForScratch = hwHelper.getComputeUnitsUsedForScratch(hwInfo); } ScratchSpaceController::~ScratchSpaceController() { if (scratchAllocation) { getMemoryManager()->freeGraphicsMemory(scratchAllocation); } if (privateScratchAllocation) { getMemoryManager()->freeGraphicsMemory(privateScratchAllocation); } } MemoryManager *ScratchSpaceController::getMemoryManager() const { UNRECOVERABLE_IF(executionEnvironment.memoryManager.get() == nullptr); return executionEnvironment.memoryManager.get(); } } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/scratch_space_controller.h000066400000000000000000000072561422164147700303410ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/bindless_heaps_helper.h" #include "shared/source/indirect_heap/indirect_heap.h" #include #include namespace NEO { class Device; class ExecutionEnvironment; class GraphicsAllocation; class InternalAllocationStorage; class MemoryManager; struct HardwareInfo; class OsContext; class CommandStreamReceiver; namespace ScratchSpaceConstants { constexpr size_t scratchSpaceOffsetFor64Bit = 4096u; } using ResidencyContainer = std::vector; class ScratchSpaceController { public: ScratchSpaceController(uint32_t rootDeviceIndex, ExecutionEnvironment &environment, InternalAllocationStorage &allocationStorage); virtual ~ScratchSpaceController(); MOCKABLE_VIRTUAL GraphicsAllocation *getScratchSpaceAllocation() { return scratchAllocation; } GraphicsAllocation *getPrivateScratchSpaceAllocation() { return privateScratchAllocation; } virtual void setRequiredScratchSpace(void *sshBaseAddress, uint32_t scratchSlot, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, uint32_t currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty) = 0; virtual uint64_t calculateNewGSH() = 0; virtual uint64_t getScratchPatchAddress() = 0; inline uint32_t getPerThreadScratchSpaceSize() { return static_cast(scratchSizeBytes / computeUnitsUsedForScratch); } inline uint32_t getPerThreadPrivateScratchSize() { return static_cast(privateScratchSizeBytes / computeUnitsUsedForScratch); } virtual void reserveHeap(IndirectHeap::Type heapType, IndirectHeap *&indirectHeap) = 0; virtual void programHeaps(HeapContainer &heapContainer, uint32_t scratchSlot, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, uint32_t currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty) = 0; virtual void programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, uint32_t currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty, CommandStreamReceiver *csr) = 0; protected: MemoryManager *getMemoryManager() const; const uint32_t rootDeviceIndex; ExecutionEnvironment &executionEnvironment; GraphicsAllocation *scratchAllocation = nullptr; GraphicsAllocation *privateScratchAllocation = nullptr; InternalAllocationStorage &csrAllocationStorage; size_t scratchSizeBytes = 0; size_t privateScratchSizeBytes = 0; bool force32BitAllocation = false; uint32_t computeUnitsUsedForScratch = 0; }; } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/scratch_space_controller_base.cpp000066400000000000000000000127561422164147700316670ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/scratch_space_controller_base.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/constants.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/preamble.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/os_context.h" namespace NEO { ScratchSpaceControllerBase::ScratchSpaceControllerBase(uint32_t rootDeviceIndex, ExecutionEnvironment &environment, InternalAllocationStorage &allocationStorage) : ScratchSpaceController(rootDeviceIndex, environment, allocationStorage) { } void ScratchSpaceControllerBase::setRequiredScratchSpace(void *sshBaseAddress, uint32_t scratchSlot, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, uint32_t currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty) { size_t requiredScratchSizeInBytes = requiredPerThreadScratchSize * computeUnitsUsedForScratch; if (requiredScratchSizeInBytes && (scratchSizeBytes < requiredScratchSizeInBytes)) { if (scratchAllocation) { scratchAllocation->updateTaskCount(currentTaskCount, osContext.getContextId()); csrAllocationStorage.storeAllocation(std::unique_ptr(scratchAllocation), TEMPORARY_ALLOCATION); } scratchSizeBytes = requiredScratchSizeInBytes; createScratchSpaceAllocation(); vfeStateDirty = true; force32BitAllocation = getMemoryManager()->peekForce32BitAllocations(); if (is64bit && !force32BitAllocation) { stateBaseAddressDirty = true; } } } void ScratchSpaceControllerBase::createScratchSpaceAllocation() { scratchAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, scratchSizeBytes, AllocationType::SCRATCH_SURFACE, this->csrAllocationStorage.getDeviceBitfield()}); UNRECOVERABLE_IF(scratchAllocation == nullptr); } uint64_t ScratchSpaceControllerBase::calculateNewGSH() { uint64_t gsh = 0; if (scratchAllocation) { gsh = scratchAllocation->getGpuAddress() - ScratchSpaceConstants::scratchSpaceOffsetFor64Bit; } return gsh; } uint64_t ScratchSpaceControllerBase::getScratchPatchAddress() { //for 32 bit scratch space pointer is being programmed in Media VFE State and is relative to 0 as General State Base Address //for 64 bit, scratch space pointer is being programmed as "General State Base Address - scratchSpaceOffsetFor64bit" // and "0 + scratchSpaceOffsetFor64bit" is being programmed in Media VFE state uint64_t scratchAddress = 0; if (scratchAllocation) { scratchAddress = scratchAllocation->getGpuAddressToPatch(); if (is64bit && !getMemoryManager()->peekForce32BitAllocations()) { //this is to avoid scractch allocation offset "0" scratchAddress = ScratchSpaceConstants::scratchSpaceOffsetFor64Bit; } } return scratchAddress; } void ScratchSpaceControllerBase::reserveHeap(IndirectHeap::Type heapType, IndirectHeap *&indirectHeap) { if (heapType == IndirectHeap::Type::SURFACE_STATE) { auto &hwHelper = HwHelper::get(executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo()->platform.eRenderCoreFamily); auto surfaceStateSize = hwHelper.getRenderSurfaceStateSize(); indirectHeap->getSpace(surfaceStateSize); } } void ScratchSpaceControllerBase::programHeaps(HeapContainer &heapContainer, uint32_t offset, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, uint32_t currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty) { } void ScratchSpaceControllerBase::programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, uint32_t currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty, NEO::CommandStreamReceiver *csr) { } } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/scratch_space_controller_base.h000066400000000000000000000042601422164147700313230ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/scratch_space_controller.h" namespace NEO { class ScratchSpaceControllerBase : public ScratchSpaceController { public: ScratchSpaceControllerBase(uint32_t rootDeviceIndex, ExecutionEnvironment &environment, InternalAllocationStorage &allocationStorage); void setRequiredScratchSpace(void *sshBaseAddress, uint32_t scratchSlot, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, uint32_t currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty) override; uint64_t calculateNewGSH() override; uint64_t getScratchPatchAddress() override; void reserveHeap(IndirectHeap::Type heapType, IndirectHeap *&indirectHeap) override; void programHeaps(HeapContainer &heapContainer, uint32_t scratchSlot, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, uint32_t currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty) override; void programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, uint32_t currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty, NEO::CommandStreamReceiver *csr) override; protected: void createScratchSpaceAllocation(); }; } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/scratch_space_controller_xehp_and_later.cpp000066400000000000000000000273551422164147700337330ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/scratch_space_controller_xehp_and_later.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/api_specific_config.h" #include "shared/source/helpers/constants.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/os_context.h" namespace NEO { ScratchSpaceControllerXeHPAndLater::ScratchSpaceControllerXeHPAndLater(uint32_t rootDeviceIndex, ExecutionEnvironment &environment, InternalAllocationStorage &allocationStorage) : ScratchSpaceController(rootDeviceIndex, environment, allocationStorage) { auto &hwHelper = HwHelper::get(environment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo()->platform.eRenderCoreFamily); singleSurfaceStateSize = hwHelper.getRenderSurfaceStateSize(); if (DebugManager.flags.EnablePrivateScratchSlot1.get() != -1) { privateScratchSpaceSupported = !!DebugManager.flags.EnablePrivateScratchSlot1.get(); } if (privateScratchSpaceSupported) { ScratchSpaceControllerXeHPAndLater::stateSlotsCount *= 2; } } void ScratchSpaceControllerXeHPAndLater::setNewSshPtr(void *newSsh, bool &cfeDirty, bool changeId) { if (surfaceStateHeap != newSsh) { surfaceStateHeap = static_cast(newSsh); if (scratchAllocation == nullptr) { cfeDirty = false; } else { if (changeId) { slotId = 0; } programSurfaceState(); cfeDirty = true; } } } void ScratchSpaceControllerXeHPAndLater::setRequiredScratchSpace(void *sshBaseAddress, uint32_t offset, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, uint32_t currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty) { setNewSshPtr(sshBaseAddress, vfeStateDirty, offset == 0 ? true : false); bool scratchSurfaceDirty = false; prepareScratchAllocation(requiredPerThreadScratchSize, requiredPerThreadPrivateScratchSize, currentTaskCount, osContext, stateBaseAddressDirty, scratchSurfaceDirty, vfeStateDirty); if (scratchSurfaceDirty) { vfeStateDirty = true; updateSlots = true; programSurfaceState(); } } void ScratchSpaceControllerXeHPAndLater::programSurfaceState() { if (updateSlots) { slotId++; } UNRECOVERABLE_IF(slotId >= stateSlotsCount); UNRECOVERABLE_IF(scratchAllocation == nullptr && privateScratchAllocation == nullptr); void *surfaceStateForScratchAllocation = ptrOffset(static_cast(surfaceStateHeap), getOffsetToSurfaceState(slotId + sshOffset)); programSurfaceStateAtPtr(surfaceStateForScratchAllocation); } void ScratchSpaceControllerXeHPAndLater::programSurfaceStateAtPtr(void *surfaceStateForScratchAllocation) { auto &hwHelper = HwHelper::get(executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo()->platform.eRenderCoreFamily); uint64_t scratchAllocationAddress = 0u; if (scratchAllocation) { scratchAllocationAddress = scratchAllocation->getGpuAddress(); } hwHelper.setRenderSurfaceStateForBuffer(*executionEnvironment.rootDeviceEnvironments[rootDeviceIndex], surfaceStateForScratchAllocation, computeUnitsUsedForScratch, scratchAllocationAddress, 0, perThreadScratchSize, nullptr, false, scratchType, false, true); if (privateScratchSpaceSupported) { void *surfaceStateForPrivateScratchAllocation = ptrOffset(surfaceStateForScratchAllocation, singleSurfaceStateSize); uint64_t privateScratchAllocationAddress = 0u; if (privateScratchAllocation) { privateScratchAllocationAddress = privateScratchAllocation->getGpuAddress(); } hwHelper.setRenderSurfaceStateForBuffer(*executionEnvironment.rootDeviceEnvironments[rootDeviceIndex], surfaceStateForPrivateScratchAllocation, computeUnitsUsedForScratch, privateScratchAllocationAddress, 0, perThreadPrivateScratchSize, nullptr, false, scratchType, false, true); } } uint64_t ScratchSpaceControllerXeHPAndLater::calculateNewGSH() { return 0u; } uint64_t ScratchSpaceControllerXeHPAndLater::getScratchPatchAddress() { uint64_t scratchAddress = 0u; if (scratchAllocation || privateScratchAllocation) { if (ApiSpecificConfig::getBindlessConfiguration()) { scratchAddress = bindlessSS.surfaceStateOffset; } else { scratchAddress = static_cast(getOffsetToSurfaceState(slotId + sshOffset)); } } return scratchAddress; } size_t ScratchSpaceControllerXeHPAndLater::getOffsetToSurfaceState(uint32_t requiredSlotCount) const { auto offset = requiredSlotCount * singleSurfaceStateSize; if (privateScratchSpaceSupported) { offset *= 2; } return offset; } void ScratchSpaceControllerXeHPAndLater::reserveHeap(IndirectHeap::Type heapType, IndirectHeap *&indirectHeap) { if (heapType == IndirectHeap::Type::SURFACE_STATE) { indirectHeap->getSpace(getOffsetToSurfaceState(stateSlotsCount)); } } void ScratchSpaceControllerXeHPAndLater::programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, uint32_t currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty, NEO::CommandStreamReceiver *csr) { bool scratchSurfaceDirty = false; prepareScratchAllocation(requiredPerThreadScratchSize, requiredPerThreadPrivateScratchSize, currentTaskCount, osContext, stateBaseAddressDirty, scratchSurfaceDirty, vfeStateDirty); if (scratchSurfaceDirty) { bindlessSS = heapsHelper->allocateSSInHeap(singleSurfaceStateSize * (privateScratchSpaceSupported ? 2 : 1), scratchAllocation, BindlessHeapsHelper::SCRATCH_SSH); programSurfaceStateAtPtr(bindlessSS.ssPtr); vfeStateDirty = true; } csr->makeResident(*bindlessSS.heapAllocation); } void ScratchSpaceControllerXeHPAndLater::prepareScratchAllocation(uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, uint32_t currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &scratchSurfaceDirty, bool &vfeStateDirty) { uint32_t requiredPerThreadScratchSizeAlignedUp = alignUp(requiredPerThreadScratchSize, 64); size_t requiredScratchSizeInBytes = requiredPerThreadScratchSizeAlignedUp * computeUnitsUsedForScratch; scratchSurfaceDirty = false; auto multiTileCapable = osContext.getNumSupportedDevices() > 1; if (scratchSizeBytes < requiredScratchSizeInBytes) { if (scratchAllocation) { scratchAllocation->updateTaskCount(currentTaskCount, osContext.getContextId()); csrAllocationStorage.storeAllocation(std::unique_ptr(scratchAllocation), TEMPORARY_ALLOCATION); } scratchSurfaceDirty = true; scratchSizeBytes = requiredScratchSizeInBytes; perThreadScratchSize = requiredPerThreadScratchSizeAlignedUp; AllocationProperties properties{this->rootDeviceIndex, true, scratchSizeBytes, AllocationType::SCRATCH_SURFACE, multiTileCapable, false, osContext.getDeviceBitfield()}; scratchAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(properties); } if (privateScratchSpaceSupported) { uint32_t requiredPerThreadPrivateScratchSizeAlignedUp = alignUp(requiredPerThreadPrivateScratchSize, 64); size_t requiredPrivateScratchSizeInBytes = requiredPerThreadPrivateScratchSizeAlignedUp * computeUnitsUsedForScratch; if (privateScratchSizeBytes < requiredPrivateScratchSizeInBytes) { if (privateScratchAllocation) { privateScratchAllocation->updateTaskCount(currentTaskCount, osContext.getContextId()); csrAllocationStorage.storeAllocation(std::unique_ptr(privateScratchAllocation), TEMPORARY_ALLOCATION); } privateScratchSizeBytes = requiredPrivateScratchSizeInBytes; perThreadPrivateScratchSize = requiredPerThreadPrivateScratchSizeAlignedUp; scratchSurfaceDirty = true; AllocationProperties properties{this->rootDeviceIndex, true, privateScratchSizeBytes, AllocationType::PRIVATE_SURFACE, multiTileCapable, false, osContext.getDeviceBitfield()}; privateScratchAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(properties); } } } void ScratchSpaceControllerXeHPAndLater::programHeaps(HeapContainer &heapContainer, uint32_t scratchSlot, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, uint32_t currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty) { sshOffset = scratchSlot; updateSlots = false; setRequiredScratchSpace(heapContainer[0]->getUnderlyingBuffer(), sshOffset, requiredPerThreadScratchSize, requiredPerThreadPrivateScratchSize, currentTaskCount, osContext, stateBaseAddressDirty, vfeStateDirty); for (uint32_t i = 1; i < heapContainer.size(); ++i) { surfaceStateHeap = static_cast(heapContainer[i]->getUnderlyingBuffer()); updateSlots = false; programSurfaceState(); } updateSlots = true; } } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/scratch_space_controller_xehp_and_later.h000066400000000000000000000067661422164147700334030ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/scratch_space_controller.h" #include #include namespace NEO { class ScratchSpaceControllerXeHPAndLater : public ScratchSpaceController { public: ScratchSpaceControllerXeHPAndLater(uint32_t rootDeviceIndex, ExecutionEnvironment &environment, InternalAllocationStorage &allocationStorage); void setNewSshPtr(void *newSsh, bool &cfeDirty, bool changeId); void setRequiredScratchSpace(void *sshBaseAddress, uint32_t scratchSlot, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, uint32_t currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty) override; uint64_t calculateNewGSH() override; uint64_t getScratchPatchAddress() override; void reserveHeap(IndirectHeap::Type heapType, IndirectHeap *&indirectHeap) override; void programHeaps(HeapContainer &heapContainer, uint32_t scratchSlot, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, uint32_t currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty) override; void programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, uint32_t currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty, NEO::CommandStreamReceiver *csr) override; protected: MOCKABLE_VIRTUAL void programSurfaceState(); MOCKABLE_VIRTUAL void programSurfaceStateAtPtr(void *surfaceStateForScratchAllocation); MOCKABLE_VIRTUAL void prepareScratchAllocation(uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, uint32_t currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &scratchSurfaceDirty, bool &vfeStateDirty); size_t getOffsetToSurfaceState(uint32_t requiredSlotCount) const; bool updateSlots = true; uint32_t stateSlotsCount = 16; static const uint32_t scratchType = 6; bool privateScratchSpaceSupported = true; char *surfaceStateHeap = nullptr; size_t singleSurfaceStateSize = 0; uint32_t slotId = 0; uint32_t perThreadScratchSize = 0; uint32_t perThreadPrivateScratchSize = 0; uint32_t sshOffset = 0; SurfaceStateInHeapInfo bindlessSS = {}; }; } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/stream_properties.cpp000066400000000000000000000111241422164147700273630ustar00rootroot00000000000000/* * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/stream_properties.h" #include "shared/source/command_stream/thread_arbitration_policy.h" #include "shared/source/kernel/grf_config.h" using namespace NEO; void StateComputeModeProperties::setProperties(bool requiresCoherency, uint32_t numGrfRequired, int32_t threadArbitrationPolicy, const HardwareInfo &hwInfo) { clearIsDirty(); int32_t isCoherencyRequired = (requiresCoherency ? 1 : 0); this->isCoherencyRequired.set(isCoherencyRequired); if (this->largeGrfMode.value == -1 || numGrfRequired != GrfConfig::NotApplicable) { int32_t largeGrfMode = (numGrfRequired == GrfConfig::LargeGrfNumber ? 1 : 0); this->largeGrfMode.set(largeGrfMode); } int32_t zPassAsyncComputeThreadLimit = -1; if (DebugManager.flags.ForceZPassAsyncComputeThreadLimit.get() != -1) { zPassAsyncComputeThreadLimit = DebugManager.flags.ForceZPassAsyncComputeThreadLimit.get(); } this->zPassAsyncComputeThreadLimit.set(zPassAsyncComputeThreadLimit); int32_t pixelAsyncComputeThreadLimit = -1; if (DebugManager.flags.ForcePixelAsyncComputeThreadLimit.get() != -1) { pixelAsyncComputeThreadLimit = DebugManager.flags.ForcePixelAsyncComputeThreadLimit.get(); } this->pixelAsyncComputeThreadLimit.set(pixelAsyncComputeThreadLimit); bool setDefaultThreadArbitrationPolicy = (threadArbitrationPolicy == ThreadArbitrationPolicy::NotPresent) && (NEO::DebugManager.flags.ForceDefaultThreadArbitrationPolicyIfNotSpecified.get() || (this->threadArbitrationPolicy.value == ThreadArbitrationPolicy::NotPresent)); if (setDefaultThreadArbitrationPolicy) { auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily); threadArbitrationPolicy = hwHelper.getDefaultThreadArbitrationPolicy(); } if (DebugManager.flags.OverrideThreadArbitrationPolicy.get() != -1) { threadArbitrationPolicy = DebugManager.flags.OverrideThreadArbitrationPolicy.get(); } this->threadArbitrationPolicy.set(threadArbitrationPolicy); setPropertiesExtra(); } void StateComputeModeProperties::setProperties(const StateComputeModeProperties &properties) { clearIsDirty(); isCoherencyRequired.set(properties.isCoherencyRequired.value); largeGrfMode.set(properties.largeGrfMode.value); zPassAsyncComputeThreadLimit.set(properties.zPassAsyncComputeThreadLimit.value); pixelAsyncComputeThreadLimit.set(properties.pixelAsyncComputeThreadLimit.value); threadArbitrationPolicy.set(properties.threadArbitrationPolicy.value); setPropertiesExtra(properties); } bool StateComputeModeProperties::isDirty() const { return isCoherencyRequired.isDirty || largeGrfMode.isDirty || zPassAsyncComputeThreadLimit.isDirty || pixelAsyncComputeThreadLimit.isDirty || threadArbitrationPolicy.isDirty || isDirtyExtra(); } void StateComputeModeProperties::clearIsDirty() { isCoherencyRequired.isDirty = false; largeGrfMode.isDirty = false; zPassAsyncComputeThreadLimit.isDirty = false; pixelAsyncComputeThreadLimit.isDirty = false; threadArbitrationPolicy.isDirty = false; clearIsDirtyExtra(); } void FrontEndProperties::setProperties(bool isCooperativeKernel, bool disableEUFusion, bool disableOverdispatch, int32_t engineInstancedDevice, const HardwareInfo &hwInfo) { clearIsDirty(); this->computeDispatchAllWalkerEnable.set(isCooperativeKernel); this->disableEUFusion.set(disableEUFusion); this->disableOverdispatch.set(disableOverdispatch); this->singleSliceDispatchCcsMode.set(engineInstancedDevice); } void FrontEndProperties::setProperties(const FrontEndProperties &properties) { clearIsDirty(); disableOverdispatch.set(properties.disableOverdispatch.value); disableEUFusion.set(properties.disableEUFusion.value); singleSliceDispatchCcsMode.set(properties.singleSliceDispatchCcsMode.value); computeDispatchAllWalkerEnable.set(properties.computeDispatchAllWalkerEnable.value); } bool FrontEndProperties::isDirty() const { return disableOverdispatch.isDirty || disableEUFusion.isDirty || singleSliceDispatchCcsMode.isDirty || computeDispatchAllWalkerEnable.isDirty; } void FrontEndProperties::clearIsDirty() { disableEUFusion.isDirty = false; disableOverdispatch.isDirty = false; singleSliceDispatchCcsMode.isDirty = false; computeDispatchAllWalkerEnable.isDirty = false; } compute-runtime-22.14.22890/shared/source/command_stream/stream_properties.h000066400000000000000000000005741422164147700270370ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/hw_info.h" #include "stream_properties.inl" namespace NEO { struct StreamProperties { StateComputeModeProperties stateComputeMode{}; FrontEndProperties frontEndState{}; }; } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/stream_properties_extra.cpp000066400000000000000000000007161422164147700305730ustar00rootroot00000000000000/* * Copyright (C) 2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/stream_properties.h" using namespace NEO; void StateComputeModeProperties::setPropertiesExtra() { } void StateComputeModeProperties::setPropertiesExtra(const StateComputeModeProperties &properties) { } bool StateComputeModeProperties::isDirtyExtra() const { return false; } void StateComputeModeProperties::clearIsDirtyExtra() { } compute-runtime-22.14.22890/shared/source/command_stream/stream_property.h000066400000000000000000000006141422164147700265220ustar00rootroot00000000000000/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include namespace NEO { struct StreamProperty { int32_t value = -1; bool isDirty = false; void set(int32_t newValue) { if ((value != newValue) && (newValue != -1)) { value = newValue; isDirty = true; } } }; } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/submission_status.h000066400000000000000000000004501422164147700270570ustar00rootroot00000000000000/* * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include namespace NEO { enum class SubmissionStatus : uint32_t { SUCCESS = 0, FAILED, OUT_OF_MEMORY, UNSUPPORTED, DEVICE_UNINITIALIZED, }; } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/submissions_aggregator.cpp000066400000000000000000000115561422164147700304050ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "submissions_aggregator.h" #include "shared/source/helpers/flush_stamp.h" #include "shared/source/memory_manager/graphics_allocation.h" void NEO::SubmissionAggregator::recordCommandBuffer(CommandBuffer *commandBuffer) { this->cmdBuffers.pushTailOne(*commandBuffer); } void NEO::SubmissionAggregator::aggregateCommandBuffers(ResourcePackage &resourcePackage, size_t &totalUsedSize, size_t totalMemoryBudget, uint32_t osContextId) { auto primaryCommandBuffer = this->cmdBuffers.peekHead(); auto currentInspection = this->inspectionId; if (!primaryCommandBuffer) { return; } auto primaryBatchGraphicsAllocation = primaryCommandBuffer->batchBuffer.commandBufferAllocation; this->inspectionId++; primaryCommandBuffer->inspectionId = currentInspection; //primary command buffers must fix to budget for (auto &graphicsAllocation : primaryCommandBuffer->surfaces) { if (graphicsAllocation->getInspectionId(osContextId) < currentInspection) { graphicsAllocation->setInspectionId(currentInspection, osContextId); resourcePackage.push_back(graphicsAllocation); totalUsedSize += graphicsAllocation->getUnderlyingBufferSize(); } } //check if we have anything for merge if (!primaryCommandBuffer->next) { return; } //check if next cmd buffer is compatible if (primaryCommandBuffer->next->batchBuffer.requiresCoherency != primaryCommandBuffer->batchBuffer.requiresCoherency) { return; } if (primaryCommandBuffer->next->batchBuffer.low_priority != primaryCommandBuffer->batchBuffer.low_priority) { return; } if (primaryCommandBuffer->next->batchBuffer.throttle != primaryCommandBuffer->batchBuffer.throttle) { return; } if (primaryCommandBuffer->next->batchBuffer.sliceCount != primaryCommandBuffer->batchBuffer.sliceCount) { return; } auto nextCommandBuffer = primaryCommandBuffer->next; ResourcePackage newResources; while (nextCommandBuffer) { size_t nextCommandBufferNewResourcesSize = 0; //evaluate if buffer fits for (auto &graphicsAllocation : nextCommandBuffer->surfaces) { if (graphicsAllocation == primaryBatchGraphicsAllocation) { continue; } if (graphicsAllocation->getInspectionId(osContextId) < currentInspection) { graphicsAllocation->setInspectionId(currentInspection, osContextId); newResources.push_back(graphicsAllocation); nextCommandBufferNewResourcesSize += graphicsAllocation->getUnderlyingBufferSize(); } } if (nextCommandBuffer->batchBuffer.commandBufferAllocation && (nextCommandBuffer->batchBuffer.commandBufferAllocation != primaryBatchGraphicsAllocation)) { if (nextCommandBuffer->batchBuffer.commandBufferAllocation->getInspectionId(osContextId) < currentInspection) { nextCommandBuffer->batchBuffer.commandBufferAllocation->setInspectionId(currentInspection, osContextId); newResources.push_back(nextCommandBuffer->batchBuffer.commandBufferAllocation); nextCommandBufferNewResourcesSize += nextCommandBuffer->batchBuffer.commandBufferAllocation->getUnderlyingBufferSize(); } } if (nextCommandBufferNewResourcesSize + totalUsedSize <= totalMemoryBudget) { auto currentNode = nextCommandBuffer; nextCommandBuffer = nextCommandBuffer->next; totalUsedSize += nextCommandBufferNewResourcesSize; currentNode->inspectionId = currentInspection; for (auto &newResource : newResources) { resourcePackage.push_back(newResource); } newResources.clear(); } else { break; } } } NEO::BatchBuffer::BatchBuffer(GraphicsAllocation *commandBufferAllocation, size_t startOffset, size_t chainedBatchBufferStartOffset, GraphicsAllocation *chainedBatchBuffer, bool requiresCoherency, bool lowPriority, QueueThrottle throttle, uint64_t sliceCount, size_t usedSize, LinearStream *stream, void *endCmdPtr, bool useSingleSubdevice) : commandBufferAllocation(commandBufferAllocation), startOffset(startOffset), chainedBatchBufferStartOffset(chainedBatchBufferStartOffset), chainedBatchBuffer(chainedBatchBuffer), requiresCoherency(requiresCoherency), low_priority(lowPriority), throttle(throttle), sliceCount(sliceCount), usedSize(usedSize), stream(stream), endCmdPtr(endCmdPtr), useSingleSubdevice(useSingleSubdevice) {} NEO::CommandBuffer::CommandBuffer(Device &device) : device(device) { flushStamp.reset(new FlushStampTracker(false)); } compute-runtime-22.14.22890/shared/source/command_stream/submissions_aggregator.h000066400000000000000000000050221422164147700300410ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/csr_definitions.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/memory_manager/residency_container.h" #include "shared/source/utilities/idlist.h" #include "shared/source/utilities/stackvec.h" #include namespace NEO { class Device; class Event; class FlushStampTracker; class GraphicsAllocation; struct BatchBuffer { BatchBuffer(GraphicsAllocation *commandBufferAllocation, size_t startOffset, size_t chainedBatchBufferStartOffset, GraphicsAllocation *chainedBatchBuffer, bool requiresCoherency, bool lowPriority, QueueThrottle throttle, uint64_t sliceCount, size_t usedSize, LinearStream *stream, void *endCmdPtr, bool useSingleSubdevice); BatchBuffer() {} GraphicsAllocation *commandBufferAllocation = nullptr; size_t startOffset = 0u; size_t chainedBatchBufferStartOffset = 0u; GraphicsAllocation *chainedBatchBuffer = nullptr; bool requiresCoherency = false; bool low_priority = false; QueueThrottle throttle = QueueThrottle::MEDIUM; uint64_t sliceCount = QueueSliceCount::defaultSliceCount; size_t usedSize = 0u; //only used in drm csr in gem close worker active mode LinearStream *stream = nullptr; void *endCmdPtr = nullptr; bool useSingleSubdevice = false; }; struct CommandBuffer : public IDNode { CommandBuffer(Device &device); ResidencyContainer surfaces; BatchBuffer batchBuffer; void *batchBufferEndLocation = nullptr; uint32_t inspectionId = 0; uint32_t taskCount = 0u; void *pipeControlThatMayBeErasedLocation = nullptr; void *epiloguePipeControlLocation = nullptr; std::unique_ptr flushStamp; Device &device; }; struct CommandBufferList : public IDList {}; using ResourcePackage = StackVec; class SubmissionAggregator { public: void recordCommandBuffer(CommandBuffer *commandBuffer); void aggregateCommandBuffers(ResourcePackage &resourcePackage, size_t &totalUsedSize, size_t totalMemoryBudget, uint32_t osContextId); CommandBufferList &peekCmdBufferList() { return cmdBuffers; } protected: CommandBufferList cmdBuffers; uint32_t inspectionId = 1; }; } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/tbx_command_stream_receiver.cpp000066400000000000000000000025251422164147700313530ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/tbx_command_stream_receiver.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/hw_info.h" #include namespace NEO { TbxCommandStreamReceiverCreateFunc tbxCommandStreamReceiverFactory[IGFX_MAX_CORE] = {}; CommandStreamReceiver *TbxCommandStreamReceiver::create(const std::string &baseName, bool withAubDump, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield) { auto hwInfo = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo(); if (hwInfo->platform.eRenderCoreFamily >= IGFX_MAX_CORE) { DEBUG_BREAK_IF(!false); return nullptr; } auto pCreate = tbxCommandStreamReceiverFactory[hwInfo->platform.eRenderCoreFamily]; return pCreate ? pCreate(baseName, withAubDump, executionEnvironment, rootDeviceIndex, deviceBitfield) : nullptr; } } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/tbx_command_stream_receiver.h000066400000000000000000000043731422164147700310230ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/aub_mem_dump/aub_mem_dump.h" #include "shared/source/helpers/common_types.h" namespace NEO { class CommandStreamReceiver; class TbxSockets; class ExecutionEnvironment; class TbxStream : public AubMemDump::AubStream { protected: TbxSockets *socket = nullptr; public: TbxStream(); ~TbxStream() override; TbxStream(const TbxStream &) = delete; TbxStream &operator=(const TbxStream &) = delete; void open(const char *options) override; void close() override; bool init(uint32_t stepping, uint32_t device) override; void writeMemory(uint64_t physAddress, const void *memory, size_t size, uint32_t addressSpace, uint32_t hint) override; void writeMemoryWriteHeader(uint64_t physAddress, size_t size, uint32_t addressSpace, uint32_t hint) override; void writeGTT(uint32_t gttOffset, uint64_t entry) override; void writePTE(uint64_t physAddress, uint64_t entry, uint32_t addressSpace) override; void writeMMIOImpl(uint32_t offset, uint32_t value) override; void registerPoll(uint32_t registerOffset, uint32_t mask, uint32_t value, bool pollNotEqual, uint32_t timeoutAction) override; void readMemory(uint64_t physAddress, void *memory, size_t size); }; struct TbxCommandStreamReceiver { static CommandStreamReceiver *create(const std::string &baseName, bool withAubDump, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield); using TbxStream = NEO::TbxStream; }; typedef CommandStreamReceiver *(*TbxCommandStreamReceiverCreateFunc)(const std::string &baseName, bool withAubDump, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield); } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/tbx_command_stream_receiver_hw.h000066400000000000000000000104041422164147700315110ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver_simulated_hw.h" #include "shared/source/command_stream/tbx_command_stream_receiver.h" #include "shared/source/command_stream/wait_status.h" #include "shared/source/memory_manager/address_mapper.h" #include "shared/source/memory_manager/os_agnostic_memory_manager.h" #include "shared/source/memory_manager/page_table.h" #include "aub_mapper.h" #include namespace NEO { class AubSubCaptureManager; class TbxStream; template class TbxCommandStreamReceiverHw : public CommandStreamReceiverSimulatedHw { protected: typedef CommandStreamReceiverSimulatedHw BaseClass; using AUB = typename AUBFamilyMapper::AUB; using BaseClass::getParametersForWriteMemory; using BaseClass::osContext; uint32_t getMaskAndValueForPollForCompletion() const; bool getpollNotEqualValueForPollForCompletion() const; void flushSubmissionsAndDownloadAllocations(uint32_t taskCount); public: using CommandStreamReceiverSimulatedCommonHw::initAdditionalMMIO; using CommandStreamReceiverSimulatedCommonHw::aubManager; using CommandStreamReceiverSimulatedCommonHw::hardwareContextController; using CommandStreamReceiverSimulatedCommonHw::engineInfo; using CommandStreamReceiverSimulatedCommonHw::stream; SubmissionStatus flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override; WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) override; WaitStatus waitForCompletionWithTimeout(const WaitParams ¶ms, uint32_t taskCountToWait) override; void downloadAllocations() override; void downloadAllocationTbx(GraphicsAllocation &gfxAllocation); void processEviction() override; void processResidency(const ResidencyContainer &allocationsForResidency, uint32_t handleId) override; void writeMemory(uint64_t gpuAddress, void *cpuAddress, size_t size, uint32_t memoryBank, uint64_t entryBits) override; bool writeMemory(GraphicsAllocation &gfxAllocation) override; void writeMMIO(uint32_t offset, uint32_t value) override; bool expectMemory(const void *gfxAddress, const void *srcAddress, size_t length, uint32_t compareOperation) override; AubSubCaptureStatus checkAndActivateAubSubCapture(const std::string &kernelName) override; // Family specific version MOCKABLE_VIRTUAL void submitBatchBufferTbx(uint64_t batchBufferGpuAddress, const void *batchBuffer, size_t batchBufferSize, uint32_t memoryBank, uint64_t entryBits, bool overrideRingHead); void pollForCompletion() override; void dumpAllocation(GraphicsAllocation &gfxAllocation) override; static CommandStreamReceiver *create(const std::string &baseName, bool withAubDump, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield); TbxCommandStreamReceiverHw(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield); ~TbxCommandStreamReceiverHw() override; void initializeEngine() override; MemoryManager *getMemoryManager() { return CommandStreamReceiver::getMemoryManager(); } TbxStream tbxStream; std::unique_ptr subCaptureManager; uint32_t aubDeviceId; bool streamInitialized = false; std::unique_ptr physicalAddressAllocator; std::unique_ptr::type> ppgtt; std::unique_ptr ggtt; // remap CPU VA -> GGTT VA AddressMapper gttRemap; std::set allocationsForDownload = {}; CommandStreamReceiverType getType() override { return CommandStreamReceiverType::CSR_TBX; } bool dumpTbxNonWritable = false; bool isEngineInitialized = false; }; } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/tbx_command_stream_receiver_hw.inl000066400000000000000000000636131422164147700320560ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/aub/aub_center.h" #include "shared/source/aub/aub_helper.h" #include "shared/source/aub_mem_dump/aub_alloc_dump.h" #include "shared/source/aub_mem_dump/aub_alloc_dump.inl" #include "shared/source/aub_mem_dump/page_table_entry_bits.h" #include "shared/source/command_stream/aub_command_stream_receiver.h" #include "shared/source/command_stream/command_stream_receiver_with_aub_dump.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/api_specific_config.h" #include "shared/source/helpers/constants.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/engine_node_helper.h" #include "shared/source/helpers/hardware_context_controller.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/populate_factory.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/memory_banks.h" #include "shared/source/memory_manager/physical_address_allocator.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/os_interface/os_context.h" #include namespace NEO { template TbxCommandStreamReceiverHw::TbxCommandStreamReceiverHw(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield) : BaseClass(executionEnvironment, rootDeviceIndex, deviceBitfield) { physicalAddressAllocator.reset(this->createPhysicalAddressAllocator(&this->peekHwInfo())); executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->initAubCenter(this->localMemoryEnabled, "", this->getType()); auto aubCenter = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->aubCenter.get(); UNRECOVERABLE_IF(nullptr == aubCenter); aubManager = aubCenter->getAubManager(); ppgtt = std::make_unique::type>(physicalAddressAllocator.get()); ggtt = std::make_unique(physicalAddressAllocator.get()); auto debugDeviceId = DebugManager.flags.OverrideAubDeviceId.get(); this->aubDeviceId = debugDeviceId == -1 ? this->peekHwInfo().capabilityTable.aubDeviceId : static_cast(debugDeviceId); this->stream = &tbxStream; this->downloadAllocationImpl = [this](GraphicsAllocation &graphicsAllocation) { this->downloadAllocationTbx(graphicsAllocation); }; } template TbxCommandStreamReceiverHw::~TbxCommandStreamReceiverHw() { this->downloadAllocationImpl = nullptr; if (streamInitialized) { tbxStream.close(); } this->freeEngineInfo(gttRemap); } template void TbxCommandStreamReceiverHw::initializeEngine() { isEngineInitialized = true; if (hardwareContextController) { hardwareContextController->initialize(); return; } auto csTraits = this->getCsTraits(osContext->getEngineType()); if (engineInfo.pLRCA) { return; } this->initGlobalMMIO(); this->initEngineMMIO(); this->initAdditionalMMIO(); // Global HW Status Page { const size_t sizeHWSP = 0x1000; const size_t alignHWSP = 0x1000; engineInfo.pGlobalHWStatusPage = alignedMalloc(sizeHWSP, alignHWSP); engineInfo.ggttHWSP = gttRemap.map(engineInfo.pGlobalHWStatusPage, sizeHWSP); auto physHWSP = ggtt->map(engineInfo.ggttHWSP, sizeHWSP, this->getGTTBits(), this->getMemoryBankForGtt()); // Write our GHWSP AubGTTData data = {0}; this->getGTTData(reinterpret_cast(physHWSP), data); AUB::reserveAddressGGTT(tbxStream, engineInfo.ggttHWSP, sizeHWSP, physHWSP, data); tbxStream.writeMMIO(AubMemDump::computeRegisterOffset(csTraits.mmioBase, 0x2080), engineInfo.ggttHWSP); } // Allocate the LRCA const size_t sizeLRCA = csTraits.sizeLRCA; const size_t alignLRCA = csTraits.alignLRCA; auto pLRCABase = alignedMalloc(sizeLRCA, alignLRCA); engineInfo.pLRCA = pLRCABase; // Initialize the LRCA to a known state csTraits.initialize(pLRCABase); // Reserve the RCS ring buffer engineInfo.sizeRingBuffer = 0x4 * 0x1000; { const size_t alignRCS = 0x1000; engineInfo.pRingBuffer = alignedMalloc(engineInfo.sizeRingBuffer, alignRCS); engineInfo.ggttRingBuffer = gttRemap.map(engineInfo.pRingBuffer, engineInfo.sizeRingBuffer); auto physRCS = ggtt->map(engineInfo.ggttRingBuffer, engineInfo.sizeRingBuffer, this->getGTTBits(), this->getMemoryBankForGtt()); AubGTTData data = {0}; this->getGTTData(reinterpret_cast(physRCS), data); AUB::reserveAddressGGTT(tbxStream, engineInfo.ggttRingBuffer, engineInfo.sizeRingBuffer, physRCS, data); } // Initialize the ring MMIO registers { uint32_t ringHead = 0x000; uint32_t ringTail = 0x000; auto ringBase = engineInfo.ggttRingBuffer; auto ringCtrl = (uint32_t)((engineInfo.sizeRingBuffer - 0x1000) | 1); csTraits.setRingHead(pLRCABase, ringHead); csTraits.setRingTail(pLRCABase, ringTail); csTraits.setRingBase(pLRCABase, ringBase); csTraits.setRingCtrl(pLRCABase, ringCtrl); } // Write our LRCA { engineInfo.ggttLRCA = gttRemap.map(engineInfo.pLRCA, sizeLRCA); auto lrcAddressPhys = ggtt->map(engineInfo.ggttLRCA, sizeLRCA, this->getGTTBits(), this->getMemoryBankForGtt()); AubGTTData data = {0}; this->getGTTData(reinterpret_cast(lrcAddressPhys), data); AUB::reserveAddressGGTT(tbxStream, engineInfo.ggttLRCA, sizeLRCA, lrcAddressPhys, data); AUB::addMemoryWrite( tbxStream, lrcAddressPhys, pLRCABase, sizeLRCA, this->getAddressSpace(csTraits.aubHintLRCA), csTraits.aubHintLRCA); } DEBUG_BREAK_IF(!engineInfo.pLRCA); } template CommandStreamReceiver *TbxCommandStreamReceiverHw::create(const std::string &baseName, bool withAubDump, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield) { TbxCommandStreamReceiverHw *csr; auto &hwInfo = *(executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo()); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily); if (withAubDump) { auto localMemoryEnabled = hwHelper.getEnableLocalMemory(hwInfo); auto fullName = AUBCommandStreamReceiver::createFullFilePath(hwInfo, baseName, rootDeviceIndex); if (DebugManager.flags.AUBDumpCaptureFileName.get() != "unk") { fullName.assign(DebugManager.flags.AUBDumpCaptureFileName.get()); } executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->initAubCenter(localMemoryEnabled, fullName, CommandStreamReceiverType::CSR_TBX_WITH_AUB); csr = new CommandStreamReceiverWithAUBDump>(baseName, executionEnvironment, rootDeviceIndex, deviceBitfield); auto aubCenter = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->aubCenter.get(); UNRECOVERABLE_IF(nullptr == aubCenter); auto subCaptureCommon = aubCenter->getSubCaptureCommon(); UNRECOVERABLE_IF(nullptr == subCaptureCommon); if (subCaptureCommon->subCaptureMode > AubSubCaptureManager::SubCaptureMode::Off) { csr->subCaptureManager = std::make_unique(fullName, *subCaptureCommon, ApiSpecificConfig::getRegistryPath()); } if (csr->aubManager) { if (!csr->aubManager->isOpen()) { csr->aubManager->open(csr->subCaptureManager ? csr->subCaptureManager->getSubCaptureFileName("") : fullName); UNRECOVERABLE_IF(!csr->aubManager->isOpen()); } } } else { csr = new TbxCommandStreamReceiverHw(executionEnvironment, rootDeviceIndex, deviceBitfield); } if (!csr->aubManager) { // Open our stream csr->stream->open(nullptr); // Add the file header. bool streamInitialized = csr->stream->init(hwInfoConfig.getAubStreamSteppingFromHwRevId(hwInfo), csr->aubDeviceId); csr->streamInitialized = streamInitialized; } return csr; } template SubmissionStatus TbxCommandStreamReceiverHw::flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) { if (subCaptureManager) { if (aubManager) { aubManager->pause(false); } } initializeEngine(); // Write our batch buffer auto pBatchBuffer = ptrOffset(batchBuffer.commandBufferAllocation->getUnderlyingBuffer(), batchBuffer.startOffset); auto batchBufferGpuAddress = ptrOffset(batchBuffer.commandBufferAllocation->getGpuAddress(), batchBuffer.startOffset); auto currentOffset = batchBuffer.usedSize; DEBUG_BREAK_IF(currentOffset < batchBuffer.startOffset); auto sizeBatchBuffer = currentOffset - batchBuffer.startOffset; auto overrideRingHead = false; auto submissionTaskCount = this->taskCount + 1; allocationsForResidency.push_back(batchBuffer.commandBufferAllocation); batchBuffer.commandBufferAllocation->updateResidencyTaskCount(submissionTaskCount, this->osContext->getContextId()); batchBuffer.commandBufferAllocation->updateTaskCount(submissionTaskCount, osContext->getContextId()); // Write allocations for residency processResidency(allocationsForResidency, 0u); if (subCaptureManager) { if (aubManager) { auto status = subCaptureManager->getSubCaptureStatus(); if (!status.wasActiveInPreviousEnqueue && status.isActive) { overrideRingHead = true; } if (!status.wasActiveInPreviousEnqueue && !status.isActive) { aubManager->pause(true); } } } submitBatchBufferTbx( batchBufferGpuAddress, pBatchBuffer, sizeBatchBuffer, this->getMemoryBank(batchBuffer.commandBufferAllocation), this->getPPGTTAdditionalBits(batchBuffer.commandBufferAllocation), overrideRingHead); if (subCaptureManager) { pollForCompletion(); subCaptureManager->disableSubCapture(); } return SubmissionStatus::SUCCESS; } template void TbxCommandStreamReceiverHw::submitBatchBufferTbx(uint64_t batchBufferGpuAddress, const void *batchBuffer, size_t batchBufferSize, uint32_t memoryBank, uint64_t entryBits, bool overrideRingHead) { if (hardwareContextController) { if (batchBufferSize) { hardwareContextController->submit(batchBufferGpuAddress, batchBuffer, batchBufferSize, memoryBank, MemoryConstants::pageSize64k, overrideRingHead); } return; } auto csTraits = this->getCsTraits(osContext->getEngineType()); { auto physBatchBuffer = ppgtt->map(static_cast(batchBufferGpuAddress), batchBufferSize, entryBits, memoryBank); AubHelperHw aubHelperHw(this->localMemoryEnabled); AUB::reserveAddressPPGTT(tbxStream, static_cast(batchBufferGpuAddress), batchBufferSize, physBatchBuffer, entryBits, aubHelperHw); AUB::addMemoryWrite( tbxStream, physBatchBuffer, batchBuffer, batchBufferSize, this->getAddressSpace(AubMemDump::DataTypeHintValues::TraceBatchBufferPrimary), AubMemDump::DataTypeHintValues::TraceBatchBufferPrimary); } // Add a batch buffer start to the RCS auto previousTail = engineInfo.tailRingBuffer; { typedef typename GfxFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; typedef typename GfxFamily::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START; typedef typename GfxFamily::MI_NOOP MI_NOOP; auto pTail = ptrOffset(engineInfo.pRingBuffer, engineInfo.tailRingBuffer); auto ggttTail = ptrOffset(engineInfo.ggttRingBuffer, engineInfo.tailRingBuffer); auto sizeNeeded = sizeof(MI_BATCH_BUFFER_START) + sizeof(MI_NOOP) + sizeof(MI_LOAD_REGISTER_IMM); if (engineInfo.tailRingBuffer + sizeNeeded >= engineInfo.sizeRingBuffer) { // Pad the remaining ring with NOOPs auto sizeToWrap = engineInfo.sizeRingBuffer - engineInfo.tailRingBuffer; memset(pTail, 0, sizeToWrap); // write remaining ring auto physDumpStart = ggtt->map(ggttTail, sizeToWrap, this->getGTTBits(), this->getMemoryBankForGtt()); AUB::addMemoryWrite( tbxStream, physDumpStart, pTail, sizeToWrap, this->getAddressSpace(AubMemDump::DataTypeHintValues::TraceCommandBuffer), AubMemDump::DataTypeHintValues::TraceCommandBuffer); previousTail = 0; engineInfo.tailRingBuffer = 0; pTail = engineInfo.pRingBuffer; } else if (engineInfo.tailRingBuffer == 0) { // Add a LRI if this is our first submission auto lri = GfxFamily::cmdInitLoadRegisterImm; lri.setRegisterOffset(AubMemDump::computeRegisterOffset(csTraits.mmioBase, 0x2244)); lri.setDataDword(0x00010000); *(MI_LOAD_REGISTER_IMM *)pTail = lri; pTail = ((MI_LOAD_REGISTER_IMM *)pTail) + 1; } // Add our BBS auto bbs = GfxFamily::cmdInitBatchBufferStart; bbs.setBatchBufferStartAddress(static_cast(batchBufferGpuAddress)); bbs.setAddressSpaceIndicator(MI_BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR_PPGTT); *(MI_BATCH_BUFFER_START *)pTail = bbs; pTail = ((MI_BATCH_BUFFER_START *)pTail) + 1; // Add a NOOP as our tail needs to be aligned to a QWORD *(MI_NOOP *)pTail = GfxFamily::cmdInitNoop; pTail = ((MI_NOOP *)pTail) + 1; // Compute our new ring tail. engineInfo.tailRingBuffer = (uint32_t)ptrDiff(pTail, engineInfo.pRingBuffer); // Only dump the new commands auto ggttDumpStart = ptrOffset(engineInfo.ggttRingBuffer, previousTail); auto dumpStart = ptrOffset(engineInfo.pRingBuffer, previousTail); auto dumpLength = engineInfo.tailRingBuffer - previousTail; // write RCS auto physDumpStart = ggtt->map(ggttDumpStart, dumpLength, this->getGTTBits(), this->getMemoryBankForGtt()); AUB::addMemoryWrite( tbxStream, physDumpStart, dumpStart, dumpLength, this->getAddressSpace(AubMemDump::DataTypeHintValues::TraceCommandBuffer), AubMemDump::DataTypeHintValues::TraceCommandBuffer); // update the RCS mmio tail in the LRCA auto physLRCA = ggtt->map(engineInfo.ggttLRCA, sizeof(engineInfo.tailRingBuffer), this->getGTTBits(), this->getMemoryBankForGtt()); AUB::addMemoryWrite( tbxStream, physLRCA + 0x101c, &engineInfo.tailRingBuffer, sizeof(engineInfo.tailRingBuffer), this->getAddressSpace(AubMemDump::DataTypeHintValues::TraceNotype)); DEBUG_BREAK_IF(engineInfo.tailRingBuffer >= engineInfo.sizeRingBuffer); } // Submit our execlist by submitting to the execlist submit ports { typename AUB::MiContextDescriptorReg contextDescriptor = {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}; contextDescriptor.sData.Valid = true; contextDescriptor.sData.ForcePageDirRestore = false; contextDescriptor.sData.ForceRestore = false; contextDescriptor.sData.Legacy = true; contextDescriptor.sData.FaultSupport = 0; contextDescriptor.sData.PrivilegeAccessOrPPGTT = true; contextDescriptor.sData.ADor64bitSupport = AUB::Traits::addressingBits > 32; auto ggttLRCA = engineInfo.ggttLRCA; contextDescriptor.sData.LogicalRingCtxAddress = ggttLRCA / 4096; contextDescriptor.sData.ContextID = 0; this->submitLRCA(contextDescriptor); } } template void TbxCommandStreamReceiverHw::pollForCompletion() { if (hardwareContextController) { hardwareContextController->pollForCompletion(); return; } typedef typename AubMemDump::CmdServicesMemTraceRegisterPoll CmdServicesMemTraceRegisterPoll; auto mmioBase = this->getCsTraits(osContext->getEngineType()).mmioBase; bool pollNotEqual = getpollNotEqualValueForPollForCompletion(); uint32_t mask = getMaskAndValueForPollForCompletion(); uint32_t value = mask; tbxStream.registerPoll( AubMemDump::computeRegisterOffset(mmioBase, 0x2234), //EXECLIST_STATUS mask, value, pollNotEqual, CmdServicesMemTraceRegisterPoll::TimeoutActionValues::Abort); } template void TbxCommandStreamReceiverHw::writeMemory(uint64_t gpuAddress, void *cpuAddress, size_t size, uint32_t memoryBank, uint64_t entryBits) { UNRECOVERABLE_IF(!isEngineInitialized); AubHelperHw aubHelperHw(this->localMemoryEnabled); PageWalker walker = [&](uint64_t physAddress, size_t size, size_t offset, uint64_t entryBits) { AUB::reserveAddressGGTTAndWriteMmeory(tbxStream, static_cast(gpuAddress), cpuAddress, physAddress, size, offset, entryBits, aubHelperHw); }; ppgtt->pageWalk(static_cast(gpuAddress), size, 0, entryBits, walker, memoryBank); } template bool TbxCommandStreamReceiverHw::writeMemory(GraphicsAllocation &gfxAllocation) { UNRECOVERABLE_IF(!isEngineInitialized); if (!this->isTbxWritable(gfxAllocation)) { return false; } uint64_t gpuAddress; void *cpuAddress; size_t size; if (!this->getParametersForWriteMemory(gfxAllocation, gpuAddress, cpuAddress, size)) { return false; } if (aubManager) { this->writeMemoryWithAubManager(gfxAllocation); } else { writeMemory(gpuAddress, cpuAddress, size, this->getMemoryBank(&gfxAllocation), this->getPPGTTAdditionalBits(&gfxAllocation)); } if (AubHelper::isOneTimeAubWritableAllocationType(gfxAllocation.getAllocationType())) { this->setTbxWritable(false, gfxAllocation); } return true; } template void TbxCommandStreamReceiverHw::writeMMIO(uint32_t offset, uint32_t value) { if (hardwareContextController) { hardwareContextController->writeMMIO(offset, value); } } template bool TbxCommandStreamReceiverHw::expectMemory(const void *gfxAddress, const void *srcAddress, size_t length, uint32_t compareOperation) { if (hardwareContextController) { auto readMemory = std::make_unique(length); //note: memory bank should not matter assuming that we call expect on the memory that was previously allocated hardwareContextController->readMemory((uint64_t)gfxAddress, readMemory.get(), length, this->getMemoryBankForGtt(), MemoryConstants::pageSize64k); auto isMemoryEqual = (memcmp(readMemory.get(), srcAddress, length) == 0); auto isEqualMemoryExpected = (compareOperation == AubMemDump::CmdServicesMemTraceMemoryCompare::CompareOperationValues::CompareEqual); return (isMemoryEqual == isEqualMemoryExpected); } return BaseClass::expectMemory(gfxAddress, srcAddress, length, compareOperation); } template void TbxCommandStreamReceiverHw::flushSubmissionsAndDownloadAllocations(uint32_t taskCountToWait) { this->flushBatchedSubmissions(); if (this->latestFlushedTaskCount < taskCountToWait) { this->flushTagUpdate(); } volatile uint32_t *pollAddress = this->getTagAddress(); for (uint32_t i = 0; i < this->activePartitions; i++) { while (*pollAddress < this->latestFlushedTaskCount) { this->downloadAllocation(*this->getTagAllocation()); } pollAddress = ptrOffset(pollAddress, this->postSyncWriteOffset); } auto lockCSR = this->obtainUniqueOwnership(); for (GraphicsAllocation *graphicsAllocation : this->allocationsForDownload) { this->downloadAllocation(*graphicsAllocation); } this->allocationsForDownload.clear(); } template WaitStatus TbxCommandStreamReceiverHw::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) { flushSubmissionsAndDownloadAllocations(taskCountToWait); return BaseClass::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, throttle); } template WaitStatus TbxCommandStreamReceiverHw::waitForCompletionWithTimeout(const WaitParams ¶ms, uint32_t taskCountToWait) { flushSubmissionsAndDownloadAllocations(taskCountToWait); return BaseClass::waitForCompletionWithTimeout(params, taskCountToWait); } template void TbxCommandStreamReceiverHw::processEviction() { this->allocationsForDownload.insert(this->getEvictionAllocations().begin(), this->getEvictionAllocations().end()); BaseClass::processEviction(); } template void TbxCommandStreamReceiverHw::processResidency(const ResidencyContainer &allocationsForResidency, uint32_t handleId) { for (auto &gfxAllocation : allocationsForResidency) { if (dumpTbxNonWritable) { this->setTbxWritable(true, *gfxAllocation); } if (!writeMemory(*gfxAllocation)) { DEBUG_BREAK_IF(!((gfxAllocation->getUnderlyingBufferSize() == 0) || !this->isTbxWritable(*gfxAllocation))); } gfxAllocation->updateResidencyTaskCount(this->taskCount + 1, this->osContext->getContextId()); } dumpTbxNonWritable = false; } template void TbxCommandStreamReceiverHw::downloadAllocationTbx(GraphicsAllocation &gfxAllocation) { if (hardwareContextController) { hardwareContextController->readMemory(gfxAllocation.getGpuAddress(), gfxAllocation.getUnderlyingBuffer(), gfxAllocation.getUnderlyingBufferSize(), this->getMemoryBank(&gfxAllocation), MemoryConstants::pageSize64k); return; } auto cpuAddress = gfxAllocation.getUnderlyingBuffer(); auto gpuAddress = gfxAllocation.getGpuAddress(); auto length = gfxAllocation.getUnderlyingBufferSize(); if (length) { PageWalker walker = [&](uint64_t physAddress, size_t size, size_t offset, uint64_t entryBits) { DEBUG_BREAK_IF(offset > length); tbxStream.readMemory(physAddress, ptrOffset(cpuAddress, offset), size); }; ppgtt->pageWalk(static_cast(gpuAddress), length, 0, 0, walker, this->getMemoryBank(&gfxAllocation)); } } template void TbxCommandStreamReceiverHw::downloadAllocations() { volatile uint32_t *pollAddress = this->getTagAddress(); for (uint32_t i = 0; i < this->activePartitions; i++) { while (*pollAddress < this->latestFlushedTaskCount) { this->downloadAllocation(*this->getTagAllocation()); } pollAddress = ptrOffset(pollAddress, this->postSyncWriteOffset); } auto lockCSR = this->obtainUniqueOwnership(); for (GraphicsAllocation *graphicsAllocation : this->allocationsForDownload) { this->downloadAllocation(*graphicsAllocation); } this->allocationsForDownload.clear(); } template uint32_t TbxCommandStreamReceiverHw::getMaskAndValueForPollForCompletion() const { return 0x100; } template bool TbxCommandStreamReceiverHw::getpollNotEqualValueForPollForCompletion() const { return false; } template AubSubCaptureStatus TbxCommandStreamReceiverHw::checkAndActivateAubSubCapture(const std::string &kernelName) { if (!subCaptureManager) { return {false, false}; } auto status = subCaptureManager->checkAndActivateSubCapture(kernelName); if (status.isActive && !status.wasActiveInPreviousEnqueue) { dumpTbxNonWritable = true; } return status; } template void TbxCommandStreamReceiverHw::dumpAllocation(GraphicsAllocation &gfxAllocation) { if (!hardwareContextController) { return; } bool isBcsCsr = EngineHelpers::isBcs(this->osContext->getEngineType()); if (isBcsCsr != gfxAllocation.getAubInfo().bcsDumpOnly) { return; } if (DebugManager.flags.AUBDumpAllocsOnEnqueueReadOnly.get() || DebugManager.flags.AUBDumpAllocsOnEnqueueSVMMemcpyOnly.get()) { if (!gfxAllocation.isAllocDumpable()) { return; } gfxAllocation.setAllocDumpable(false, isBcsCsr); } auto dumpFormat = AubAllocDump::getDumpFormat(gfxAllocation); auto surfaceInfo = std::unique_ptr(AubAllocDump::getDumpSurfaceInfo(gfxAllocation, dumpFormat)); if (surfaceInfo) { hardwareContextController->pollForCompletion(); hardwareContextController->dumpSurface(*surfaceInfo.get()); } } } // namespace NEO tbx_command_stream_receiver_xehp_and_later.inl000066400000000000000000000005141422164147700343250ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/command_stream/* * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ template <> uint32_t TbxCommandStreamReceiverHw::getMaskAndValueForPollForCompletion() const { return 0x80; } template <> bool TbxCommandStreamReceiverHw::getpollNotEqualValueForPollForCompletion() const { return true; } compute-runtime-22.14.22890/shared/source/command_stream/tbx_stream.cpp000066400000000000000000000041651422164147700257730ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/aub/aub_helper.h" #include "shared/source/command_stream/tbx_command_stream_receiver.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/tbx/tbx_sockets.h" namespace NEO { TbxStream::TbxStream() { } TbxStream::~TbxStream() { delete socket; } void TbxStream::open(const char *options) { } void TbxStream::close() { DEBUG_BREAK_IF(!socket); socket->close(); } bool TbxStream::init(uint32_t stepping, uint32_t device) { socket = TbxSockets::create(); DEBUG_BREAK_IF(!socket); auto tbxServer = DebugManager.flags.TbxServer.get(); auto tbxPort = DebugManager.flags.TbxPort.get(); return socket->init(tbxServer, tbxPort); } void TbxStream::writeMemory(uint64_t addr, const void *memory, size_t size, uint32_t addressSpace, uint32_t hint) { uint32_t type = AubHelper::getMemType(addressSpace); socket->writeMemory(addr, memory, size, type); } void TbxStream::writeGTT(uint32_t gttOffset, uint64_t entry) { socket->writeGTT(gttOffset, entry); } void TbxStream::writePTE(uint64_t physAddress, uint64_t entry, uint32_t addressSpace) { uint32_t type = AubHelper::getMemType(addressSpace); socket->writeMemory(physAddress, &entry, sizeof(entry), type); } void TbxStream::writeMemoryWriteHeader(uint64_t physAddress, size_t size, uint32_t addressSpace, uint32_t hint) { } void TbxStream::writeMMIOImpl(uint32_t offset, uint32_t value) { socket->writeMMIO(offset, value); } void TbxStream::registerPoll(uint32_t registerOffset, uint32_t mask, uint32_t desiredValue, bool pollNotEqual, uint32_t timeoutAction) { bool matches = false; bool asyncMMIO = false; do { uint32_t value; socket->readMMIO(registerOffset, &value); matches = ((value & mask) == desiredValue); } while (matches == pollNotEqual && asyncMMIO); } void TbxStream::readMemory(uint64_t physAddress, void *memory, size_t size) { socket->readMemory(physAddress, memory, size); } } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/thread_arbitration_policy.h000066400000000000000000000005361422164147700305120ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include namespace NEO { namespace ThreadArbitrationPolicy { enum : int32_t { AgeBased = 0, RoundRobin = 1, RoundRobinAfterDependency = 2, NotPresent = -1 }; } // namespace ThreadArbitrationPolicy } // namespace NEO compute-runtime-22.14.22890/shared/source/command_stream/wait_status.h000066400000000000000000000005461422164147700256360ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include namespace NEO { enum class WaitStatus { NotReady = 0, Ready = 1, GpuHang = 2, }; struct WaitParams { bool indefinitelyPoll = false; bool enableTimeout = false; int64_t waitTimeout = 0; }; } // namespace NEOcompute-runtime-22.14.22890/shared/source/commands/000077500000000000000000000000001422164147700217215ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/commands/CMakeLists.txt000066400000000000000000000004251422164147700244620ustar00rootroot00000000000000# # Copyright (C) 2019-2021 Intel Corporation # # SPDX-License-Identifier: MIT # set(NEO_CORE_COMMANDS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/bxml_generator_glue.h ) set_property(GLOBAL PROPERTY NEO_CORE_COMMANDS ${NEO_CORE_COMMANDS}) compute-runtime-22.14.22890/shared/source/commands/bxml_generator_glue.h000066400000000000000000000017301422164147700261170ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include #include // Macro helpers #ifndef STATIC_ASSERT #define STATIC_ASSERT(e) static_assert(e, #e) #endif #ifndef SIZE32 #define SIZE32(x) (sizeof(x) / sizeof(uint32_t)) #endif // SIZE32 /*****************************************************************************\ MACRO: BITFIELD_RANGE PURPOSE: Calculates the number of bits between the startbit and the endbit (0 based) \*****************************************************************************/ #ifndef BITFIELD_RANGE #define BITFIELD_RANGE(startbit, endbit) ((endbit) - (startbit) + 1) #endif /*****************************************************************************\ MACRO: BITFIELD_BIT PURPOSE: Definition declared for clarity when creating structs \*****************************************************************************/ #ifndef BITFIELD_BIT #define BITFIELD_BIT(bit) 1 #endif compute-runtime-22.14.22890/shared/source/compiler_interface/000077500000000000000000000000001422164147700237525ustar00rootroot00000000000000compute-runtime-22.14.22890/shared/source/compiler_interface/CMakeLists.txt000066400000000000000000000023061422164147700265130ustar00rootroot00000000000000# # Copyright (C) 2019-2022 Intel Corporation # # SPDX-License-Identifier: MIT # set(NEO_CORE_COMPILER_INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/compiler_cache.h ${CMAKE_CURRENT_SOURCE_DIR}/compiler_cache.cpp ${CMAKE_CURRENT_SOURCE_DIR}/compiler_interface.cpp ${CMAKE_CURRENT_SOURCE_DIR}/compiler_interface.h ${CMAKE_CURRENT_SOURCE_DIR}/compiler_interface.inl ${CMAKE_CURRENT_SOURCE_DIR}/create_main.cpp ${CMAKE_CURRENT_SOURCE_DIR}/default_cache_config.h ${CMAKE_CURRENT_SOURCE_DIR}/external_functions.cpp ${CMAKE_CURRENT_SOURCE_DIR}/external_functions.h ${CMAKE_CURRENT_SOURCE_DIR}/intermediate_representations.h ${CMAKE_CURRENT_SOURCE_DIR}/linker.h ${CMAKE_CURRENT_SOURCE_DIR}/linker.inl ${CMAKE_CURRENT_SOURCE_DIR}/linker.cpp ${CMAKE_CURRENT_SOURCE_DIR}/compiler_options/compiler_options_base.h ${CMAKE_CURRENT_SOURCE_DIR}/compiler_options/compiler_options_base.cpp ${CMAKE_CURRENT_SOURCE_DIR}/compiler_options${BRANCH_DIR_SUFFIX}compiler_options.h ${CMAKE_CURRENT_SOURCE_DIR}/compiler_warnings/compiler_warnings.h ) set_property(GLOBAL PROPERTY NEO_CORE_COMPILER_INTERFACE ${NEO_CORE_COMPILER_INTERFACE}) compute-runtime-22.14.22890/shared/source/compiler_interface/compiler_cache.cpp000066400000000000000000000132101422164147700274100ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/compiler_interface/compiler_cache.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/casts.h" #include "shared/source/helpers/file_io.h" #include "shared/source/helpers/hash.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/utilities/debug_settings_reader.h" #include "shared/source/utilities/io_functions.h" #include "config.h" #include "os_inc.h" #include #include #include #include #include namespace NEO { std::mutex CompilerCache::cacheAccessMtx; const std::string CompilerCache::getCachedFileName(const HardwareInfo &hwInfo, const ArrayRef input, const ArrayRef options, const ArrayRef internalOptions) { Hash hash; hash.update("----", 4); hash.update(&*input.begin(), input.size()); hash.update("----", 4); hash.update(&*options.begin(), options.size()); hash.update("----", 4); hash.update(&*internalOptions.begin(), internalOptions.size()); hash.update("----", 4); hash.update(r_pod_cast(&hwInfo.platform), sizeof(hwInfo.platform)); hash.update("----", 4); hash.update(r_pod_cast(std::to_string(hwInfo.featureTable.asHash()).c_str()), std::to_string(hwInfo.featureTable.asHash()).length()); hash.update("----", 4); hash.update(r_pod_cast(std::to_string(hwInfo.workaroundTable.asHash()).c_str()), std::to_string(hwInfo.workaroundTable.asHash()).length()); auto res = hash.finish(); std::stringstream stream; stream << std::setfill('0') << std::setw(sizeof(res) * 2) << std::hex << res; if (DebugManager.flags.BinaryCacheTrace.get()) { std::string traceFilePath = config.cacheDir + PATH_SEPARATOR + stream.str() + ".trace"; std::string inputFilePath = config.cacheDir + PATH_SEPARATOR + stream.str() + ".input"; std::lock_guard lock(cacheAccessMtx); auto fp = NEO::IoFunctions::fopenPtr(traceFilePath.c_str(), "w"); if (fp) { NEO::IoFunctions::fprintf(fp, "---- input ----\n"); NEO::IoFunctions::fprintf(fp, "<%s>\n", inputFilePath.c_str()); NEO::IoFunctions::fprintf(fp, "---- options ----\n"); NEO::IoFunctions::fprintf(fp, "%s\n", &*options.begin()); NEO::IoFunctions::fprintf(fp, "---- internal options ----\n"); NEO::IoFunctions::fprintf(fp, "%s\n", &*internalOptions.begin()); NEO::IoFunctions::fprintf(fp, "---- platform ----\n"); NEO::IoFunctions::fprintf(fp, " eProductFamily=%d\n", hwInfo.platform.eProductFamily); NEO::IoFunctions::fprintf(fp, " ePCHProductFamily=%d\n", hwInfo.platform.ePCHProductFamily); NEO::IoFunctions::fprintf(fp, " eDisplayCoreFamily=%d\n", hwInfo.platform.eDisplayCoreFamily); NEO::IoFunctions::fprintf(fp, " eRenderCoreFamily=%d\n", hwInfo.platform.eRenderCoreFamily); NEO::IoFunctions::fprintf(fp, " ePlatformType=%d\n", hwInfo.platform.ePlatformType); NEO::IoFunctions::fprintf(fp, " usDeviceID=%d\n", hwInfo.platform.usDeviceID); NEO::IoFunctions::fprintf(fp, " usRevId=%d\n", hwInfo.platform.usRevId); NEO::IoFunctions::fprintf(fp, " usDeviceID_PCH=%d\n", hwInfo.platform.usDeviceID_PCH); NEO::IoFunctions::fprintf(fp, " usRevId_PCH=%d\n", hwInfo.platform.usRevId_PCH); NEO::IoFunctions::fprintf(fp, " eGTType=%d\n", hwInfo.platform.eGTType); NEO::IoFunctions::fprintf(fp, "---- feature table ----\n"); auto featureTable = r_pod_cast(&hwInfo.featureTable.packed); for (size_t idx = 0; idx < sizeof(hwInfo.featureTable.packed); idx++) { NEO::IoFunctions::fprintf(fp, "%02x.", (uint8_t)(featureTable[idx])); } NEO::IoFunctions::fprintf(fp, "\n"); NEO::IoFunctions::fprintf(fp, "---- workaround table ----\n"); auto workaroundTable = reinterpret_cast(&hwInfo.workaroundTable); for (size_t idx = 0; idx < sizeof(hwInfo.workaroundTable); idx++) { NEO::IoFunctions::fprintf(fp, "%02x.", (uint8_t)(workaroundTable[idx])); } NEO::IoFunctions::fprintf(fp, "\n"); NEO::IoFunctions::fclosePtr(fp); } fp = NEO::IoFunctions::fopenPtr(inputFilePath.c_str(), "w"); if (fp) { NEO::IoFunctions::fwritePtr(&*input.begin(), input.size(), 1, fp); NEO::IoFunctions::fclosePtr(fp); } } return stream.str(); } CompilerCache::CompilerCache(const CompilerCacheConfig &cacheConfig) : config(cacheConfig){}; bool CompilerCache::cacheBinary(const std::string kernelFileHash, const char *pBinary, uint32_t binarySize) { if (pBinary == nullptr || binarySize == 0) { return false; } std::string filePath = config.cacheDir + PATH_SEPARATOR + kernelFileHash + config.cacheFileExtension; std::lock_guard lock(cacheAccessMtx); return 0 != writeDataToFile(filePath.c_str(), pBinary, binarySize); } std::unique_ptr CompilerCache::loadCachedBinary(const std::string kernelFileHash, size_t &cachedBinarySize) { std::string filePath = config.cacheDir + PATH_SEPARATOR + kernelFileHash + config.cacheFileExtension; std::lock_guard lock(cacheAccessMtx); return loadDataFromFile(filePath.c_str(), cachedBinarySize); } } // namespace NEO compute-runtime-22.14.22890/shared/source/compiler_interface/compiler_cache.h000066400000000000000000000024201422164147700270560ustar00rootroot00000000000000/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/utilities/arrayref.h" #include #include #include #include #include namespace NEO { struct HardwareInfo; struct CompilerCacheConfig { bool enabled = true; std::string cacheFileExtension; std::string cacheDir; }; class CompilerCache { public: CompilerCache(const CompilerCacheConfig &config); virtual ~CompilerCache() = default; CompilerCache(const CompilerCache &) = delete; CompilerCache(CompilerCache &&) = delete; CompilerCache &operator=(const CompilerCache &) = delete; CompilerCache &operator=(CompilerCache &&) = delete; const std::string getCachedFileName(const HardwareInfo &hwInfo, ArrayRef input, ArrayRef options, ArrayRef internalOptions); MOCKABLE_VIRTUAL bool cacheBinary(const std::string kernelFileHash, const char *pBinary, uint32_t binarySize); MOCKABLE_VIRTUAL std::unique_ptr loadCachedBinary(const std::string kernelFileHash, size_t &cachedBinarySize); protected: static std::mutex cacheAccessMtx; CompilerCacheConfig config; }; } // namespace NEO compute-runtime-22.14.22890/shared/source/compiler_interface/compiler_interface.cpp000066400000000000000000000504321422164147700303140ustar00rootroot00000000000000/* * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/compiler_interface/compiler_cache.h" #include "shared/source/compiler_interface/compiler_interface.inl" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/device/device.h" #include "shared/source/helpers/compiler_hw_info_config.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/os_interface/os_inc_base.h" #include "cif/common/cif_main.h" #include "cif/helpers/error.h" #include "cif/import/library_api.h" #include "ocl_igc_interface/code_type.h" #include "ocl_igc_interface/fcl_ocl_device_ctx.h" #include "ocl_igc_interface/igc_ocl_device_ctx.h" #include "ocl_igc_interface/platform_helper.h" #include namespace NEO { SpinLock CompilerInterface::spinlock; enum CachingMode { None, Direct, PreProcess }; CompilerInterface::CompilerInterface() : cache() { } CompilerInterface::~CompilerInterface() = default; TranslationOutput::ErrorCode CompilerInterface::build( const NEO::Device &device, const TranslationInput &input, TranslationOutput &output) { if (false == isCompilerAvailable(input.srcType, input.outType)) { return TranslationOutput::ErrorCode::CompilerNotAvailable; } IGC::CodeType::CodeType_t srcCodeType = input.srcType; IGC::CodeType::CodeType_t intermediateCodeType = IGC::CodeType::undefined; if (input.preferredIntermediateType != IGC::CodeType::undefined) { intermediateCodeType = input.preferredIntermediateType; } CachingMode cachingMode = None; if (input.allowCaching) { if ((srcCodeType == IGC::CodeType::oclC) && (std::strstr(input.src.begin(), "#include") == nullptr)) { cachingMode = CachingMode::Direct; } else { cachingMode = CachingMode::PreProcess; } } std::string kernelFileHash; if (cachingMode == CachingMode::Direct) { kernelFileHash = cache->getCachedFileName(device.getHardwareInfo(), input.src, input.apiOptions, input.internalOptions); output.deviceBinary.mem = cache->loadCachedBinary(kernelFileHash, output.deviceBinary.size); if (output.deviceBinary.mem) { return TranslationOutput::ErrorCode::Success; } } auto inSrc = CIF::Builtins::CreateConstBuffer(igcMain.get(), input.src.begin(), input.src.size()); auto fclOptions = CIF::Builtins::CreateConstBuffer(igcMain.get(), input.apiOptions.begin(), input.apiOptions.size()); auto fclInternalOptions = CIF::Builtins::CreateConstBuffer(igcMain.get(), input.internalOptions.begin(), input.internalOptions.size()); auto idsBuffer = CIF::Builtins::CreateConstBuffer(igcMain.get(), nullptr, 0); auto valuesBuffer = CIF::Builtins::CreateConstBuffer(igcMain.get(), nullptr, 0); for (const auto &specConst : input.specializedValues) { idsBuffer->PushBackRawCopy(specConst.first); valuesBuffer->PushBackRawCopy(specConst.second); } CIF::RAII::UPtr_t intermediateRepresentation; if (srcCodeType == IGC::CodeType::oclC) { if (intermediateCodeType == IGC::CodeType::undefined) { intermediateCodeType = getPreferredIntermediateRepresentation(device); } auto fclTranslationCtx = createFclTranslationCtx(device, srcCodeType, intermediateCodeType); auto fclOutput = translate(fclTranslationCtx.get(), inSrc.get(), fclOptions.get(), fclInternalOptions.get()); if (fclOutput == nullptr) { return TranslationOutput::ErrorCode::UnknownError; } TranslationOutput::makeCopy(output.frontendCompilerLog, fclOutput->GetBuildLog()); if (fclOutput->Successful() == false) { return TranslationOutput::ErrorCode::BuildFailure; } output.intermediateCodeType = intermediateCodeType; TranslationOutput::makeCopy(output.intermediateRepresentation, fclOutput->GetOutput()); fclOutput->GetOutput()->Retain(); // will be used as input to compiler intermediateRepresentation.reset(fclOutput->GetOutput()); } else { inSrc->Retain(); // will be used as input to compiler directly intermediateRepresentation.reset(inSrc.get()); intermediateCodeType = srcCodeType; } if (cachingMode == CachingMode::PreProcess) { kernelFileHash = cache->getCachedFileName(device.getHardwareInfo(), ArrayRef(intermediateRepresentation->GetMemory(), intermediateRepresentation->GetSize()), input.apiOptions, input.internalOptions); output.deviceBinary.mem = cache->loadCachedBinary(kernelFileHash, output.deviceBinary.size); if (output.deviceBinary.mem) { return TranslationOutput::ErrorCode::Success; } } auto igcTranslationCtx = createIgcTranslationCtx(device, intermediateCodeType, IGC::CodeType::oclGenBin); auto igcOutput = translate(igcTranslationCtx.get(), intermediateRepresentation.get(), idsBuffer.get(), valuesBuffer.get(), fclOptions.get(), fclInternalOptions.get(), input.GTPinInput); if (igcOutput == nullptr) { return TranslationOutput::ErrorCode::UnknownError; } TranslationOutput::makeCopy(output.backendCompilerLog, igcOutput->GetBuildLog()); if (igcOutput->Successful() == false) { return TranslationOutput::ErrorCode::BuildFailure; } if (input.allowCaching) { cache->cacheBinary(kernelFileHash, igcOutput->GetOutput()->GetMemory(), static_cast(igcOutput->GetOutput()->GetSize())); } TranslationOutput::makeCopy(output.deviceBinary, igcOutput->GetOutput()); TranslationOutput::makeCopy(output.debugData, igcOutput->GetDebugData()); return TranslationOutput::ErrorCode::Success; } TranslationOutput::ErrorCode CompilerInterface::compile( const NEO::Device &device, const TranslationInput &input, TranslationOutput &output) { if ((IGC::CodeType::oclC != input.srcType) && (IGC::CodeType::elf != input.srcType)) { return TranslationOutput::ErrorCode::AlreadyCompiled; } if (false == isCompilerAvailable(input.srcType, input.outType)) { return TranslationOutput::ErrorCode::CompilerNotAvailable; } auto outType = input.outType; if (outType == IGC::CodeType::undefined) { outType = getPreferredIntermediateRepresentation(device); } auto fclSrc = CIF::Builtins::CreateConstBuffer(fclMain.get(), input.src.begin(), input.src.size()); auto fclOptions = CIF::Builtins::CreateConstBuffer(fclMain.get(), input.apiOptions.begin(), input.apiOptions.size()); auto fclInternalOptions = CIF::Builtins::CreateConstBuffer(fclMain.get(), input.internalOptions.begin(), input.internalOptions.size()); auto fclTranslationCtx = createFclTranslationCtx(device, input.srcType, outType); auto fclOutput = translate(fclTranslationCtx.get(), fclSrc.get(), fclOptions.get(), fclInternalOptions.get()); if (fclOutput == nullptr) { return TranslationOutput::ErrorCode::UnknownError; } TranslationOutput::makeCopy(output.frontendCompilerLog, fclOutput->GetBuildLog()); if (fclOutput->Successful() == false) { return TranslationOutput::ErrorCode::CompilationFailure; } output.intermediateCodeType = outType; TranslationOutput::makeCopy(output.intermediateRepresentation, fclOutput->GetOutput()); return TranslationOutput::ErrorCode::Success; } TranslationOutput::ErrorCode CompilerInterface::link( const NEO::Device &device, const TranslationInput &input, TranslationOutput &output) { if (false == isCompilerAvailable(input.srcType, input.outType)) { return TranslationOutput::ErrorCode::CompilerNotAvailable; } auto inSrc = CIF::Builtins::CreateConstBuffer(igcMain.get(), input.src.begin(), input.src.size()); auto igcOptions = CIF::Builtins::CreateConstBuffer(igcMain.get(), input.apiOptions.begin(), input.apiOptions.size()); auto igcInternalOptions = CIF::Builtins::CreateConstBuffer(igcMain.get(), input.internalOptions.begin(), input.internalOptions.size()); if (inSrc == nullptr) { return TranslationOutput::ErrorCode::UnknownError; } CIF::RAII::UPtr_t currOut; inSrc->Retain(); // shared with currSrc CIF::RAII::UPtr_t currSrc(inSrc.get()); IGC::CodeType::CodeType_t translationChain[] = {IGC::CodeType::elf, IGC::CodeType::oclGenBin}; constexpr size_t numTranslations = sizeof(translationChain) / sizeof(translationChain[0]); for (size_t ti = 1; ti < numTranslations; ti++) { IGC::CodeType::CodeType_t inType = translationChain[ti - 1]; IGC::CodeType::CodeType_t outType = translationChain[ti]; auto igcTranslationCtx = createIgcTranslationCtx(device, inType, outType); currOut = translate(igcTranslationCtx.get(), currSrc.get(), igcOptions.get(), igcInternalOptions.get(), input.GTPinInput); if (currOut == nullptr) { return TranslationOutput::ErrorCode::UnknownError; } if (currOut->Successful() == false) { TranslationOutput::makeCopy(output.backendCompilerLog, currOut->GetBuildLog()); return TranslationOutput::ErrorCode::LinkFailure; } currOut->GetOutput()->Retain(); // shared with currSrc currSrc.reset(currOut->GetOutput()); } TranslationOutput::makeCopy(output.backendCompilerLog, currOut->GetBuildLog()); TranslationOutput::makeCopy(output.deviceBinary, currOut->GetOutput()); TranslationOutput::makeCopy(output.debugData, currOut->GetDebugData()); return TranslationOutput::ErrorCode::Success; } TranslationOutput::ErrorCode CompilerInterface::getSpecConstantsInfo(const NEO::Device &device, ArrayRef srcSpirV, SpecConstantInfo &output) { if (false == isIgcAvailable()) { return TranslationOutput::ErrorCode::CompilerNotAvailable; } auto igcTranslationCtx = createIgcTranslationCtx(device, IGC::CodeType::spirV, IGC::CodeType::oclGenBin); auto inSrc = CIF::Builtins::CreateConstBuffer(igcMain.get(), srcSpirV.begin(), srcSpirV.size()); output.idsBuffer = CIF::Builtins::CreateConstBuffer(igcMain.get(), nullptr, 0); output.sizesBuffer = CIF::Builtins::CreateConstBuffer(igcMain.get(), nullptr, 0); auto retVal = getSpecConstantsInfoImpl(igcTranslationCtx.get(), inSrc.get(), output.idsBuffer.get(), output.sizesBuffer.get()); if (!retVal) { return TranslationOutput::ErrorCode::UnknownError; } return TranslationOutput::ErrorCode::Success; } TranslationOutput::ErrorCode CompilerInterface::createLibrary( NEO::Device &device, const TranslationInput &input, TranslationOutput &output) { if (false == isIgcAvailable()) { return TranslationOutput::ErrorCode::CompilerNotAvailable; } auto igcSrc = CIF::Builtins::CreateConstBuffer(igcMain.get(), input.src.begin(), input.src.size()); auto igcOptions = CIF::Builtins::CreateConstBuffer(igcMain.get(), input.apiOptions.begin(), input.apiOptions.size()); auto igcInternalOptions = CIF::Builtins::CreateConstBuffer(igcMain.get(), input.internalOptions.begin(), input.internalOptions.size()); auto intermediateRepresentation = IGC::CodeType::llvmBc; auto igcTranslationCtx = createIgcTranslationCtx(device, IGC::CodeType::elf, intermediateRepresentation); auto igcOutput = translate(igcTranslationCtx.get(), igcSrc.get(), igcOptions.get(), igcInternalOptions.get()); if (igcOutput == nullptr) { return TranslationOutput::ErrorCode::UnknownError; } TranslationOutput::makeCopy(output.backendCompilerLog, igcOutput->GetBuildLog()); if (igcOutput->Successful() == false) { return TranslationOutput::ErrorCode::LinkFailure; } output.intermediateCodeType = intermediateRepresentation; TranslationOutput::makeCopy(output.intermediateRepresentation, igcOutput->GetOutput()); return TranslationOutput::ErrorCode::Success; } TranslationOutput::ErrorCode CompilerInterface::getSipKernelBinary(NEO::Device &device, SipKernelType type, std::vector &retBinary, std::vector &stateSaveAreaHeader) { if (false == isIgcAvailable()) { return TranslationOutput::ErrorCode::CompilerNotAvailable; } bool bindlessSip = false; IGC::SystemRoutineType::SystemRoutineType_t typeOfSystemRoutine = IGC::SystemRoutineType::undefined; switch (type) { case SipKernelType::Csr: typeOfSystemRoutine = IGC::SystemRoutineType::contextSaveRestore; break; case SipKernelType::DbgCsr: typeOfSystemRoutine = IGC::SystemRoutineType::debug; break; case SipKernelType::DbgCsrLocal: typeOfSystemRoutine = IGC::SystemRoutineType::debugSlm; break; case SipKernelType::DbgBindless: typeOfSystemRoutine = IGC::SystemRoutineType::debug; bindlessSip = true; break; default: break; } auto deviceCtx = getIgcDeviceCtx(device); if (deviceCtx == nullptr) { return TranslationOutput::ErrorCode::UnknownError; } auto systemRoutineBuffer = igcMain.get()->CreateBuiltin(); auto stateSaveAreaBuffer = igcMain.get()->CreateBuiltin(); auto result = deviceCtx->GetSystemRoutine(typeOfSystemRoutine, bindlessSip, systemRoutineBuffer.get(), stateSaveAreaBuffer.get()); if (!result) { return TranslationOutput::ErrorCode::UnknownError; } retBinary.assign(systemRoutineBuffer->GetMemory(), systemRoutineBuffer->GetMemory() + systemRoutineBuffer->GetSizeRaw()); stateSaveAreaHeader.assign(stateSaveAreaBuffer->GetMemory(), stateSaveAreaBuffer->GetMemory() + stateSaveAreaBuffer->GetSizeRaw()); return TranslationOutput::ErrorCode::Success; } CIF::RAII::UPtr_t CompilerInterface::getIgcFeaturesAndWorkarounds(NEO::Device const &device) { return getIgcDeviceCtx(device)->GetIgcFeaturesAndWorkaroundsHandle(); } bool CompilerInterface::loadFcl() { return NEO::loadCompiler(Os::frontEndDllName, fclLib, fclMain); } bool CompilerInterface::loadIgc() { return NEO::loadCompiler(Os::igcDllName, igcLib, igcMain); } bool CompilerInterface::initialize(std::unique_ptr &&cache, bool requireFcl) { bool fclAvailable = requireFcl ? this->loadFcl() : false; bool igcAvailable = this->loadIgc(); this->cache.swap(cache); return this->cache && igcAvailable && (fclAvailable || (false == requireFcl)); } IGC::FclOclDeviceCtxTagOCL *CompilerInterface::getFclDeviceCtx(const Device &device) { auto ulock = this->lock(); auto it = fclDeviceContexts.find(&device); if (it != fclDeviceContexts.end()) { return it->second.get(); } if (fclMain == nullptr) { DEBUG_BREAK_IF(true); // compiler not available return nullptr; } auto newDeviceCtx = fclMain->CreateInterface(); if (newDeviceCtx == nullptr) { DEBUG_BREAK_IF(true); // could not create device context return nullptr; } newDeviceCtx->SetOclApiVersion(device.getHardwareInfo().capabilityTable.clVersionSupport * 10); if (newDeviceCtx->GetUnderlyingVersion() > 4U) { auto igcPlatform = newDeviceCtx->GetPlatformHandle(); if (nullptr == igcPlatform.get()) { DEBUG_BREAK_IF(true); // could not acquire handles to platform descriptor return nullptr; } const HardwareInfo *hwInfo = &device.getHardwareInfo(); IGC::PlatformHelper::PopulateInterfaceWith(*igcPlatform, hwInfo->platform); } fclDeviceContexts[&device] = std::move(newDeviceCtx); return fclDeviceContexts[&device].get(); } IGC::IgcOclDeviceCtxTagOCL *CompilerInterface::getIgcDeviceCtx(const Device &device) { auto ulock = this->lock(); auto it = igcDeviceContexts.find(&device); if (it != igcDeviceContexts.end()) { return it->second.get(); } if (igcMain == nullptr) { DEBUG_BREAK_IF(true); // compiler not available return nullptr; } auto newDeviceCtx = igcMain->CreateInterface(); if (newDeviceCtx == nullptr) { DEBUG_BREAK_IF(true); // could not create device context return nullptr; } newDeviceCtx->SetProfilingTimerResolution(static_cast(device.getDeviceInfo().outProfilingTimerResolution)); auto igcPlatform = newDeviceCtx->GetPlatformHandle(); auto igcGtSystemInfo = newDeviceCtx->GetGTSystemInfoHandle(); auto igcFtrWa = newDeviceCtx->GetIgcFeaturesAndWorkaroundsHandle(); if (false == NEO::areNotNullptr(igcPlatform.get(), igcGtSystemInfo.get(), igcFtrWa.get())) { DEBUG_BREAK_IF(true); // could not acquire handles to device descriptors return nullptr; } const HardwareInfo *hwInfo = &device.getHardwareInfo(); auto productFamily = DebugManager.flags.ForceCompilerUsePlatform.get(); if (productFamily != "unk") { getHwInfoForPlatformString(productFamily, hwInfo); } auto copyHwInfo = *hwInfo; const auto &compilerHwInfoConfig = *CompilerHwInfoConfig::get(copyHwInfo.platform.eProductFamily); compilerHwInfoConfig.adjustHwInfoForIgc(copyHwInfo); IGC::PlatformHelper::PopulateInterfaceWith(*igcPlatform, copyHwInfo.platform); IGC::GtSysInfoHelper::PopulateInterfaceWith(*igcGtSystemInfo, copyHwInfo.gtSystemInfo); igcFtrWa.get()->SetFtrDesktop(device.getHardwareInfo().featureTable.flags.ftrDesktop); igcFtrWa.get()->SetFtrChannelSwizzlingXOREnabled(device.getHardwareInfo().featureTable.flags.ftrChannelSwizzlingXOREnabled); igcFtrWa.get()->SetFtrIVBM0M1Platform(device.getHardwareInfo().featureTable.flags.ftrIVBM0M1Platform); igcFtrWa.get()->SetFtrSGTPVSKUStrapPresent(device.getHardwareInfo().featureTable.flags.ftrSGTPVSKUStrapPresent); igcFtrWa.get()->SetFtr5Slice(device.getHardwareInfo().featureTable.flags.ftr5Slice); igcFtrWa.get()->SetFtrGpGpuMidThreadLevelPreempt(CompilerHwInfoConfig::get(hwInfo->platform.eProductFamily)->isMidThreadPreemptionSupported(*hwInfo)); igcFtrWa.get()->SetFtrIoMmuPageFaulting(device.getHardwareInfo().featureTable.flags.ftrIoMmuPageFaulting); igcFtrWa.get()->SetFtrWddm2Svm(device.getHardwareInfo().featureTable.flags.ftrWddm2Svm); igcFtrWa.get()->SetFtrPooledEuEnabled(device.getHardwareInfo().featureTable.flags.ftrPooledEuEnabled); igcFtrWa.get()->SetFtrResourceStreamer(device.getHardwareInfo().featureTable.flags.ftrResourceStreamer); igcDeviceContexts[&device] = std::move(newDeviceCtx); return igcDeviceContexts[&device].get(); } IGC::CodeType::CodeType_t CompilerInterface::getPreferredIntermediateRepresentation(const Device &device) { return getFclDeviceCtx(device)->GetPreferredIntermediateRepresentation(); } CIF::RAII::UPtr_t CompilerInterface::createFclTranslationCtx(const Device &device, IGC::CodeType::CodeType_t inType, IGC::CodeType::CodeType_t outType) { auto deviceCtx = getFclDeviceCtx(device); if (deviceCtx == nullptr) { DEBUG_BREAK_IF(true); // could not create device context return nullptr; } if (fclBaseTranslationCtx == nullptr) { fclBaseTranslationCtx = deviceCtx->CreateTranslationCtx(inType, outType); } return deviceCtx->CreateTranslationCtx(inType, outType); } CIF::RAII::UPtr_t CompilerInterface::createIgcTranslationCtx(const Device &device, IGC::CodeType::CodeType_t inType, IGC::CodeType::CodeType_t outType) { auto deviceCtx = getIgcDeviceCtx(device); if (deviceCtx == nullptr) { DEBUG_BREAK_IF(true); // could not create device context return nullptr; } return deviceCtx->CreateTranslationCtx(inType, outType); } } // namespace NEO compute-runtime-22.14.22890/shared/source/compiler_interface/compiler_interface.h000066400000000000000000000173431422164147700277650ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/sip.h" #include "shared/source/compiler_interface/compiler_cache.h" #include "shared/source/helpers/string.h" #include "shared/source/os_interface/os_library.h" #include "shared/source/utilities/arrayref.h" #include "shared/source/utilities/spinlock.h" #include "cif/common/cif_main.h" #include "ocl_igc_interface/code_type.h" #include "ocl_igc_interface/fcl_ocl_device_ctx.h" #include "ocl_igc_interface/igc_ocl_device_ctx.h" #include #include namespace NEO { class Device; using specConstValuesMap = std::unordered_map; struct TranslationInput { TranslationInput(IGC::CodeType::CodeType_t srcType, IGC::CodeType::CodeType_t outType, IGC::CodeType::CodeType_t preferredIntermediateType = IGC::CodeType::undefined) : srcType(srcType), preferredIntermediateType(preferredIntermediateType), outType(outType) { } bool allowCaching = false; ArrayRef src; ArrayRef apiOptions; ArrayRef internalOptions; const char *tracingOptions = nullptr; uint32_t tracingOptionsCount = 0; IGC::CodeType::CodeType_t srcType = IGC::CodeType::invalid; IGC::CodeType::CodeType_t preferredIntermediateType = IGC::CodeType::invalid; IGC::CodeType::CodeType_t outType = IGC::CodeType::invalid; void *GTPinInput = nullptr; specConstValuesMap specializedValues; }; struct TranslationOutput { enum class ErrorCode { Success = 0, CompilerNotAvailable, CompilationFailure, BuildFailure, LinkFailure, AlreadyCompiled, UnknownError, }; struct MemAndSize { std::unique_ptr mem; size_t size = 0; }; IGC::CodeType::CodeType_t intermediateCodeType = IGC::CodeType::invalid; MemAndSize intermediateRepresentation; MemAndSize deviceBinary; MemAndSize debugData; std::string frontendCompilerLog; std::string backendCompilerLog; template static void makeCopy(ContainerT &dst, CIF::Builtins::BufferSimple *src) { if ((nullptr == src) || (src->GetSizeRaw() == 0)) { dst.clear(); return; } dst.assign(src->GetMemory(), src->GetSize()); } static void makeCopy(MemAndSize &dst, CIF::Builtins::BufferSimple *src) { if ((nullptr == src) || (src->GetSizeRaw() == 0)) { dst.mem.reset(); dst.size = 0U; return; } dst.size = src->GetSize(); dst.mem = ::makeCopy(src->GetMemory(), src->GetSize()); } }; struct SpecConstantInfo { CIF::RAII::UPtr_t idsBuffer; CIF::RAII::UPtr_t sizesBuffer; }; class CompilerInterface { public: CompilerInterface(); CompilerInterface(const CompilerInterface &) = delete; CompilerInterface &operator=(const CompilerInterface &) = delete; CompilerInterface(CompilerInterface &&) = delete; CompilerInterface &operator=(CompilerInterface &&) = delete; virtual ~CompilerInterface(); template static CompilerInterfaceT *createInstance(std::unique_ptr &&cache, bool requireFcl) { auto instance = new CompilerInterfaceT(); if (!instance->initialize(std::move(cache), requireFcl)) { delete instance; instance = nullptr; } return instance; } MOCKABLE_VIRTUAL TranslationOutput::ErrorCode build(const NEO::Device &device, const TranslationInput &input, TranslationOutput &output); MOCKABLE_VIRTUAL TranslationOutput::ErrorCode compile(const NEO::Device &device, const TranslationInput &input, TranslationOutput &output); MOCKABLE_VIRTUAL TranslationOutput::ErrorCode link(const NEO::Device &device, const TranslationInput &input, TranslationOutput &output); MOCKABLE_VIRTUAL TranslationOutput::ErrorCode getSpecConstantsInfo(const NEO::Device &device, ArrayRef srcSpirV, SpecConstantInfo &output); TranslationOutput::ErrorCode createLibrary(NEO::Device &device, const TranslationInput &input, TranslationOutput &output); MOCKABLE_VIRTUAL TranslationOutput::ErrorCode getSipKernelBinary(NEO::Device &device, SipKernelType type, std::vector &retBinary, std::vector &stateSaveAreaHeader); MOCKABLE_VIRTUAL CIF::RAII::UPtr_t getIgcFeaturesAndWorkarounds(const NEO::Device &device); protected: MOCKABLE_VIRTUAL bool initialize(std::unique_ptr &&cache, bool requireFcl); MOCKABLE_VIRTUAL bool loadFcl(); MOCKABLE_VIRTUAL bool loadIgc(); static SpinLock spinlock; MOCKABLE_VIRTUAL std::unique_lock lock() { return std::unique_lock{spinlock}; } std::unique_ptr cache = nullptr; using igcDevCtxUptr = CIF::RAII::UPtr_t; using fclDevCtxUptr = CIF::RAII::UPtr_t; std::unique_ptr igcLib; CIF::RAII::UPtr_t igcMain = nullptr; std::map igcDeviceContexts; std::unique_ptr fclLib; CIF::RAII::UPtr_t fclMain = nullptr; std::map fclDeviceContexts; CIF::RAII::UPtr_t fclBaseTranslationCtx = nullptr; MOCKABLE_VIRTUAL IGC::FclOclDeviceCtxTagOCL *getFclDeviceCtx(const Device &device); MOCKABLE_VIRTUAL IGC::IgcOclDeviceCtxTagOCL *getIgcDeviceCtx(const Device &device); MOCKABLE_VIRTUAL IGC::CodeType::CodeType_t getPreferredIntermediateRepresentation(const Device &device); MOCKABLE_VIRTUAL CIF::RAII::UPtr_t createFclTranslationCtx(const Device &device, IGC::CodeType::CodeType_t inType, IGC::CodeType::CodeType_t outType); MOCKABLE_VIRTUAL CIF::RAII::UPtr_t createIgcTranslationCtx(const Device &device, IGC::CodeType::CodeType_t inType, IGC::CodeType::CodeType_t outType); bool isFclAvailable() const { return (fclMain != nullptr); } bool isIgcAvailable() const { return (igcMain != nullptr); } bool isCompilerAvailable(IGC::CodeType::CodeType_t translationSrc, IGC::CodeType::CodeType_t translationDst) const { bool requiresFcl = (IGC::CodeType::oclC == translationSrc); bool requiresIgc = (IGC::CodeType::oclC != translationSrc) || ((IGC::CodeType::spirV != translationDst) && (IGC::CodeType::llvmBc != translationDst) && (IGC::CodeType::llvmLl != translationDst)); return (isFclAvailable() || (false == requiresFcl)) && (isIgcAvailable() || (false == requiresIgc)); } }; } // namespace NEO compute-runtime-22.14.22890/shared/source/compiler_interface/compiler_interface.inl000066400000000000000000000116171422164147700303160ustar00rootroot00000000000000/* * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/validators.h" #include "shared/source/os_interface/os_library.h" #include "cif/builtins/memory/buffer/buffer.h" #include "cif/common/cif.h" #include "cif/import/library_api.h" #include "ocl_igc_interface/ocl_translation_output.h" namespace NEO { using CIFBuffer = CIF::Builtins::BufferSimple; template inline CIF::RAII::UPtr_t translate(TranslationCtx *tCtx, CIFBuffer *src, CIFBuffer *options, CIFBuffer *internalOptions) { if (false == NEO::areNotNullptr(tCtx, src, options, internalOptions)) { return nullptr; } auto ret = tCtx->Translate(src, options, internalOptions, nullptr, 0); if (ret == nullptr) { return nullptr; // assume OOM or internal error } if ((ret->GetOutput() == nullptr) || (ret->GetBuildLog() == nullptr) || (ret->GetDebugData() == nullptr)) { return nullptr; // assume OOM or internal error } return ret; } template inline CIF::RAII::UPtr_t translate(TranslationCtx *tCtx, CIFBuffer *src, CIFBuffer *options, CIFBuffer *internalOptions, void *gtpinInit) { if (false == NEO::areNotNullptr(tCtx, src, options, internalOptions)) { return nullptr; } auto ret = tCtx->Translate(src, options, internalOptions, nullptr, 0, gtpinInit); if (ret == nullptr) { return nullptr; // assume OOM or internal error } if ((ret->GetOutput() == nullptr) || (ret->GetBuildLog() == nullptr) || (ret->GetDebugData() == nullptr)) { return nullptr; // assume OOM or internal error } return ret; } template inline bool getSpecConstantsInfoImpl(TranslationCtx *tCtx, CIFBuffer *src, CIFBuffer *outSpecConstantsIds, CIFBuffer *outSpecConstantsSizes) { if (!NEO::areNotNullptr(tCtx, src, outSpecConstantsIds, outSpecConstantsSizes)) { return false; } return tCtx->GetSpecConstantsInfoImpl(src, outSpecConstantsIds, outSpecConstantsSizes); } template inline CIF::RAII::UPtr_t translate(TranslationCtx *tCtx, CIFBuffer *src, CIFBuffer *specConstantsIds, CIFBuffer *specConstantsValues, CIFBuffer *options, CIFBuffer *internalOptions, void *gtpinInit) { if (false == NEO::areNotNullptr(tCtx, src, options, internalOptions)) { return nullptr; } auto ret = tCtx->Translate(src, specConstantsIds, specConstantsValues, options, internalOptions, nullptr, 0, gtpinInit); if (ret == nullptr) { return nullptr; // assume OOM or internal error } if (!NEO::areNotNullptr(ret->GetOutput(), ret->GetBuildLog(), ret->GetDebugData())) { return nullptr; // assume OOM or internal error } return ret; } CIF::CIFMain *createMainNoSanitize(CIF::CreateCIFMainFunc_t createFunc); template